compress videos bigger than 10 MB before sending

author: Christian Schneppe <christian@pix-art.de> 2016-08-26 23:48:48 +0200
committer: Christian Schneppe <christian@pix-art.de> 2016-08-28 21:33:19 +0200
commit: b3b3475e93a9b08f9e35edbf74673728b560ad3b (patch)
tree: fc72bfce668b358310061c0a94736a0bd14e8b5d
parent: 1f7f535d37b844dbd87447e1872c270edbca1302 (diff)
547 files changed, 354504 insertions, 18 deletions
diff --git a/art/ic_send_video_away.svg b/art/ic_send_video_away.svg
new file mode 100644
index 000000000..1e4384440
--- /dev/null
+++ b/art/ic_send_video_away.svg
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   fill="#FF9800"
+   fill-opacity="0.627451"
+   height="48"
+   viewBox="0 0 24 24"
+   width="48"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="ic_send_video_away.svg">
+  <metadata
+     id="metadata12">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs10" />
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="640"
+     inkscape:window-height="480"
+     id="namedview8"
+     showgrid="false"
+     inkscape:zoom="4.9166667"
+     inkscape:cx="24"
+     inkscape:cy="24"
+     inkscape:window-x="0"
+     inkscape:window-y="27"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="svg2" />
+  <path
+     d="M0 0h24v24H0z"
+     fill="none"
+     id="path4" />
+  <path
+     d="M17 10.5V7c0-.55-.45-1-1-1H4c-.55 0-1 .45-1 1v10c0 .55.45 1 1 1h12c.55 0 1-.45 1-1v-3.5l4 4v-11l-4 4z"
+     id="path6" />
+</svg>
diff --git a/art/ic_send_video_dnd.svg b/art/ic_send_video_dnd.svg
new file mode 100644
index 000000000..08489f717
--- /dev/null
+++ b/art/ic_send_video_dnd.svg
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   fill="#F44336"
+   fill-opacity="0.627451"
+   height="48"
+   viewBox="0 0 24 24"
+   width="48"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="ic_send_video_dnd.svg">
+  <metadata
+     id="metadata12">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs10" />
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="640"
+     inkscape:window-height="480"
+     id="namedview8"
+     showgrid="false"
+     inkscape:zoom="4.9166667"
+     inkscape:cx="24"
+     inkscape:cy="23.59322"
+     inkscape:window-x="0"
+     inkscape:window-y="27"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="svg2" />
+  <path
+     d="M0 0h24v24H0z"
+     fill="none"
+     id="path4" />
+  <path
+     d="M17 10.5V7c0-.55-.45-1-1-1H4c-.55 0-1 .45-1 1v10c0 .55.45 1 1 1h12c.55 0 1-.45 1-1v-3.5l4 4v-11l-4 4z"
+     id="path6" />
+</svg>
diff --git a/art/ic_send_video_offline.svg b/art/ic_send_video_offline.svg
new file mode 100644
index 000000000..9a548f5f3
--- /dev/null
+++ b/art/ic_send_video_offline.svg
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   fill="#000000"
+   fill-opacity="0.627451"
+   height="48"
+   viewBox="0 0 24 24"
+   width="48"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="ic_send_video_offline.svg">
+  <metadata
+     id="metadata12">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs10" />
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="640"
+     inkscape:window-height="480"
+     id="namedview8"
+     showgrid="false"
+     inkscape:zoom="4.9166667"
+     inkscape:cx="24"
+     inkscape:cy="24"
+     inkscape:window-x="0"
+     inkscape:window-y="27"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="svg2" />
+  <path
+     d="M0 0h24v24H0z"
+     fill="none"
+     id="path4" />
+  <path
+     d="M17 10.5V7c0-.55-.45-1-1-1H4c-.55 0-1 .45-1 1v10c0 .55.45 1 1 1h12c.55 0 1-.45 1-1v-3.5l4 4v-11l-4 4z"
+     id="path6" />
+</svg>
diff --git a/art/ic_send_video_online.svg b/art/ic_send_video_online.svg
new file mode 100644
index 000000000..ba4e47a45
--- /dev/null
+++ b/art/ic_send_video_online.svg
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   fill="#259B24"
+   fill-opacity="0.627451"
+   height="48"
+   viewBox="0 0 24 24"
+   width="48"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="ic_send_video_online.svg">
+  <metadata
+     id="metadata12">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs10" />
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="640"
+     inkscape:window-height="480"
+     id="namedview8"
+     showgrid="false"
+     inkscape:zoom="4.9166667"
+     inkscape:cx="24"
+     inkscape:cy="23.59322"
+     inkscape:window-x="0"
+     inkscape:window-y="27"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="svg2" />
+  <path
+     d="M0 0h24v24H0z"
+     fill="none"
+     id="path4" />
+  <path
+     d="M17 10.5V7c0-.55-.45-1-1-1H4c-.55 0-1 .45-1 1v10c0 .55.45 1 1 1h12c.55 0 1-.45 1-1v-3.5l4 4v-11l-4 4z"
+     id="path6" />
+</svg>
diff --git a/build.gradle b/build.gradle
index 74d561104..46c0c3b6f 100644
--- a/build.gradle
+++ b/build.gradle
@@ -48,7 +48,7 @@ dependencies {
 	compile 'jetty:javax.servlet:5.1.12'
 	compile 'com.google.code.gson:gson:2.4'
 	compile 'org.jbundle.util.osgi.wrapped:org.jbundle.util.osgi.wrapped.org.apache.http.client:4.1.2'
-	compile 'com.android.support:appcompat-v7:24.1.1'
+	compile 'com.android.support:appcompat-v7:24.2.0'
 	compile 'com.android.support:multidex:1.0.1'
 	compile 'com.github.bumptech.glide:glide:3.7.0'
 	compile 'com.github.chrisbanes:PhotoView:1.3.0'
@@ -57,6 +57,8 @@ dependencies {
 	compile 'com.google.android.gms:play-services-maps:9.4.0'
 	compile 'pub.devrel:easypermissions:0.1.9'
     compile "com.wefika:flowlayout:0.4.1"
+    compile files('libs/aspectjrt-1.7.3.jar')
+    compile files('libs/isoparser-1.0.6.jar')
 	playstoreCompile 'com.google.android.gms:play-services-gcm:9.4.0'
 }
 
@@ -74,8 +76,8 @@ android {
 	defaultConfig {
 		minSdkVersion 14
 		targetSdkVersion 24
-		versionCode 160
-		versionName "1.13.9"
+		versionCode 161
+		versionName "1.14.0-beta1"
 		archivesBaseName += "-$versionName"
 		applicationId "de.pixart.messenger"
 		multiDexEnabled true
@@ -113,6 +115,11 @@ android {
 		buildTypes.release.signingConfig = null
 	}
 
+    sourceSets.main {
+        jniLibs.srcDir 'libs'
+        jni.srcDirs = [] //disable automatic ndk-build call
+    }
+
 	lintOptions {
 		disable 'ExtraTranslation', 'MissingTranslation', 'InvalidPackage', 'MissingQuantity', 'AppCompatResource'
 	}
diff --git a/libs/aspectjrt-1.7.3.jar b/libs/aspectjrt-1.7.3.jar
new file mode 100644
index 000000000..ef9fe4bf7
--- /dev/null
+++ b/libs/aspectjrt-1.7.3.jar
diff --git a/libs/isoparser-1.0.6.jar b/libs/isoparser-1.0.6.jar
new file mode 100644
index 000000000..f0142bb37
--- /dev/null
+++ b/libs/isoparser-1.0.6.jar
diff --git a/src/main/java/de/pixart/messenger/Config.java b/src/main/java/de/pixart/messenger/Config.java
index dbf2d6cbe..01bec77cf 100644
--- a/src/main/java/de/pixart/messenger/Config.java
+++ b/src/main/java/de/pixart/messenger/Config.java
@@ -81,7 +81,7 @@ public final class Config {
 	public static final Bitmap.CompressFormat IMAGE_FORMAT = Bitmap.CompressFormat.JPEG;
 	public static final int IMAGE_QUALITY = 75;
 	public static final int IMAGE_MAX_SIZE = 524288; //512 KiB
-
+    public static final int VIDEO_MAX_SIZE = 10485760; //10 MiB
 	public static final int FILE_MAX_SIZE = 1048576; //1 MiB
 
 	public static final int DEFAULT_ZOOM = 15; //for locations
diff --git a/src/main/java/de/pixart/messenger/persistance/FileBackend.java b/src/main/java/de/pixart/messenger/persistance/FileBackend.java
index d1e18f313..2972c71de 100644
--- a/src/main/java/de/pixart/messenger/persistance/FileBackend.java
+++ b/src/main/java/de/pixart/messenger/persistance/FileBackend.java
@@ -274,17 +274,16 @@ public class FileBackend {
 		Log.d(Config.LOGTAG, "output file name " + file.getAbsolutePath());
 	}
 
-	public void copyFileToPrivateStorage(Message message, Uri uri) throws FileCopyException {
-		String mime = mXmppConnectionService.getContentResolver().getType(uri);
-		Log.d(Config.LOGTAG, "copy " + uri.toString() + " to private storage (mime="+mime+")");
-		String extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mime);
-		if (extension == null) {
-			extension = getExtensionFromUri(uri);
-		}
-		String filename = fileDateFormat.format(new Date(message.getTimeSent()))+"_"+message.getUuid().substring(0,4);
-		message.setRelativeFilePath(filename + "." + extension);
-		copyFileToPrivateStorage(mXmppConnectionService.getFileBackend().getFile(message), uri);
-	}
+    public void copyFileToPrivateStorage(Message message, Uri uri) throws FileCopyException {
+        String mime = mXmppConnectionService.getContentResolver().getType(uri);
+        Log.d(Config.LOGTAG, "copy " + uri.toString() + " to private storage (mime="+mime+")");
+        String extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mime);
+        if (extension == null) {
+            extension = getExtensionFromUri(uri);
+        }
+        message.setRelativeFilePath(message.getUuid() + "." + extension);
+        copyFileToPrivateStorage(mXmppConnectionService.getFileBackend().getFile(message), uri);
+    }
 
 	private String getExtensionFromUri(Uri uri) {
 		String[] projection = {MediaStore.MediaColumns.DATA};
diff --git a/src/main/java/de/pixart/messenger/services/XmppConnectionService.java b/src/main/java/de/pixart/messenger/services/XmppConnectionService.java
index d49d1f68b..26c5e6678 100644
--- a/src/main/java/de/pixart/messenger/services/XmppConnectionService.java
+++ b/src/main/java/de/pixart/messenger/services/XmppConnectionService.java
@@ -15,6 +15,7 @@ import android.media.AudioManager;
 import android.net.ConnectivityManager;
 import android.net.NetworkInfo;
 import android.net.Uri;
+import android.os.AsyncTask;
 import android.os.Binder;
 import android.os.Build;
 import android.os.Bundle;
@@ -43,15 +44,18 @@ import org.openintents.openpgp.IOpenPgpService2;
 import org.openintents.openpgp.util.OpenPgpApi;
 import org.openintents.openpgp.util.OpenPgpServiceConnection;
 
+import java.io.File;
 import java.math.BigInteger;
 import java.security.SecureRandom;
 import java.security.cert.CertificateException;
 import java.security.cert.X509Certificate;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Calendar;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Hashtable;
@@ -60,6 +64,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.ExecutionException;
 
 import de.duenndns.ssl.MemorizingTrustManager;
 import de.pixart.messenger.Config;
@@ -98,12 +103,14 @@ import de.pixart.messenger.ui.UiCallback;
 import de.pixart.messenger.utils.ConversationsFileObserver;
 import de.pixart.messenger.utils.CryptoHelper;
 import de.pixart.messenger.utils.ExceptionHelper;
+import de.pixart.messenger.utils.FileUtils;
 import de.pixart.messenger.utils.OnPhoneContactsLoadedListener;
 import de.pixart.messenger.utils.PRNGFixes;
 import de.pixart.messenger.utils.PhoneHelper;
 import de.pixart.messenger.utils.ReplacingSerialSingleThreadExecutor;
 import de.pixart.messenger.utils.SerialSingleThreadExecutor;
 import de.pixart.messenger.utils.Xmlns;
+import de.pixart.messenger.utils.video.MediaController;
 import de.pixart.messenger.xml.Element;
 import de.pixart.messenger.xmpp.OnBindListener;
 import de.pixart.messenger.xmpp.OnContactStatusChanged;
@@ -440,6 +447,7 @@ public class XmppConnectionService extends Service {
 		message.setCounterpart(conversation.getNextCounterpart());
 		message.setType(Message.TYPE_FILE);
 		final String path = getFileBackend().getOriginalPath(uri);
+        Log.d(Config.LOGTAG,"File path = " + path);
 		mFileAddingExecutor.execute(new Runnable() {
 			@Override
 			public void run() {
@@ -517,6 +525,75 @@ public class XmppConnectionService extends Service {
 		});
 	}
 
+    public void attachVideoToConversation(final Conversation conversation, final Uri uri, final UiCallback<Message> callback) {
+        if (FileBackend.weOwnFile(this, uri)) {
+            Log.d(Config.LOGTAG,"trying to attach video that belonged to us");
+            callback.error(R.string.security_error_invalid_file_access, null);
+            return;
+        }
+        File f = new File(FileUtils.getPath(this, uri));
+        long filesize = f.length();
+        String path = f.toString();
+        Log.d(Config.LOGTAG,"Video file (size) :" + f.toString() + "("+filesize/1024/1024+"MB)");
+        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMdd_HHmmssSSS", Locale.US);
+        File compressed_file = new File(FileBackend.getConversationsVideoDirectory() + "/"
+                + dateFormat.format(new Date())
+                + "_komp.mp4");
+        final String compressed_path = compressed_file.toString();
+        final Uri compressed_uri = Uri.fromFile(compressed_file);
+        if (filesize > 0 && filesize <= Config.VIDEO_MAX_SIZE) {
+            Log.d(Config.LOGTAG,conversation.getAccount().getJid().toBareJid()+ ": not compressing video. sending as file");
+            attachFileToConversation(conversation, uri, callback);
+        } else {
+            VideoCompressor CompressVideo = new VideoCompressor(path, compressed_path,  new Interface() {
+                @Override
+                public void videocompressed(boolean result) {
+                    if (result) {
+                        Log.d(Config.LOGTAG, conversation.getAccount().getJid().toBareJid() + ": sending compressed video.");
+                        attachFileToConversation(conversation, compressed_uri, callback);
+                    }
+                }
+            });
+            CompressVideo.execute();
+        }
+    }
+
+    public interface Interface {
+        void videocompressed(boolean result);
+    }
+
+    class VideoCompressor extends AsyncTask<String, Void, Boolean> {
+        private String originalpath;
+        private String compressedpath;
+        private Interface mListener;
+
+        public VideoCompressor(String path, String compressed_path, Interface mListener) {
+            originalpath = path;
+            compressedpath = compressed_path;
+            this.mListener  = mListener;
+        }
+
+        @Override
+        protected void onPreExecute() {
+            super.onPreExecute();
+            Log.d(Config.LOGTAG,"Start video compression");
+        }
+
+        @Override
+        protected Boolean doInBackground(String... params) {
+            return MediaController.getInstance().convertVideo(originalpath, compressedpath);
+        }
+
+        @Override
+        protected void onPostExecute(Boolean compressed) {
+            super.onPostExecute(compressed);
+            if (mListener != null) {
+                mListener.videocompressed(compressed);
+                Log.d(Config.LOGTAG, "Compression successfully!");
+            }
+        }
+    }
+
 	public Conversation find(Bookmark bookmark) {
 		return find(bookmark.getAccount(), bookmark.getJid());
 	}
diff --git a/src/main/java/de/pixart/messenger/ui/ConversationActivity.java b/src/main/java/de/pixart/messenger/ui/ConversationActivity.java
index 6442acaa3..bcb9e81fd 100644
--- a/src/main/java/de/pixart/messenger/ui/ConversationActivity.java
+++ b/src/main/java/de/pixart/messenger/ui/ConversationActivity.java
@@ -104,12 +104,14 @@ public class ConversationActivity extends XmppActivity
 	public static final int ATTACHMENT_CHOICE_CHOOSE_FILE = 0x0303;
 	public static final int ATTACHMENT_CHOICE_RECORD_VOICE = 0x0304;
 	public static final int ATTACHMENT_CHOICE_LOCATION = 0x0305;
-	public static final int ATTACHMENT_CHOICE_INVALID = 0x0306;
+    public static final int ATTACHMENT_CHOICE_CHOOSE_VIDEO = 0x0306;
+    public static final int ATTACHMENT_CHOICE_INVALID = 0x0399;
 	private static final String STATE_OPEN_CONVERSATION = "state_open_conversation";
 	private static final String STATE_PANEL_OPEN = "state_panel_open";
 	private static final String STATE_PENDING_URI = "state_pending_uri";
 	final private List<Uri> mPendingImageUris = new ArrayList<>();
 	final private List<Uri> mPendingFileUris = new ArrayList<>();
+    final private List<Uri> mPendingVideoUris = new ArrayList<>();
 	private String mOpenConverstaion = null;
 	private boolean mPanelOpen = true;
 	private Uri mPendingGeoUri = null;
@@ -610,6 +612,11 @@ public class ConversationActivity extends XmppActivity
 						intent.setType("image/*");
 						chooser = true;
 						break;
+                    case ATTACHMENT_CHOICE_CHOOSE_VIDEO:
+                        intent.setAction(Intent.ACTION_GET_CONTENT);
+                        intent.setType("video/*");
+                        chooser = true;
+                        break;
 					case ATTACHMENT_CHOICE_TAKE_PHOTO:
 						Uri uri = xmppConnectionService.getFileBackend().getTakePhotoUri();
 						intent.setAction(MediaStore.ACTION_IMAGE_CAPTURE);
@@ -692,6 +699,9 @@ public class ConversationActivity extends XmppActivity
 			case ATTACHMENT_CHOICE_CHOOSE_IMAGE:
 				getPreferences().edit().putString("recently_used_quick_action", "picture").apply();
 				break;
+            case ATTACHMENT_CHOICE_CHOOSE_VIDEO:
+                getPreferences().edit().putString("recently_used_quick_action", "video").apply();
+                break;
 		}
 		final Conversation conversation = getSelectedConversation();
 		final int encryption = conversation.getNextEncryption();
@@ -932,6 +942,9 @@ public class ConversationActivity extends XmppActivity
 					case R.id.attach_take_picture:
 						attachFile(ATTACHMENT_CHOICE_TAKE_PHOTO);
 						break;
+                    case R.id.attach_choose_video:
+                        attachFile(ATTACHMENT_CHOICE_CHOOSE_VIDEO);
+                        break;
 					case R.id.attach_choose_file:
 						attachFile(ATTACHMENT_CHOICE_CHOOSE_FILE);
 						break;
@@ -1413,6 +1426,10 @@ public class ConversationActivity extends XmppActivity
                 }
             }
 
+            for (Iterator<Uri> i = mPendingVideoUris.iterator(); i.hasNext(); i.remove()) {
+                attachVideoToConversation(getSelectedConversation(), i.next());
+            }
+
 			for (Iterator<Uri> i = mPendingFileUris.iterator(); i.hasNext(); i.remove()) {
 				attachFileToConversation(getSelectedConversation(), i.next());
 			}
@@ -1569,6 +1586,28 @@ public class ConversationActivity extends XmppActivity
 				} else {
 					selectPresence(c, callback);
 				}
+            } else if (requestCode == ATTACHMENT_CHOICE_CHOOSE_VIDEO) {
+                final List<Uri> uris = extractUriFromIntent(data);
+                final Conversation c = getSelectedConversation();
+                final OnPresenceSelected callback = new OnPresenceSelected() {
+                    @Override
+                    public void onPresenceSelected() {
+                        mPendingVideoUris.clear();
+                        mPendingVideoUris.addAll(uris);
+                        if (xmppConnectionServiceBound) {
+                            for (Iterator<Uri> i = mPendingVideoUris.iterator(); i.hasNext(); i.remove()) {
+                                attachVideoToConversation(c, i.next());
+                            }
+                        }
+                    }
+                };
+                if (c == null || c.getMode() == Conversation.MODE_MULTI
+                        || FileBackend.allFilesUnderSize(this, uris, getMaxHttpUploadSize(c))
+                        || c.getNextEncryption() == Message.ENCRYPTION_OTR) {
+                    callback.onPresenceSelected();
+                } else {
+                    selectPresence(c, callback);
+                }
 			} else if (requestCode == ATTACHMENT_CHOICE_TAKE_PHOTO) {
 				if (mPendingImageUris.size() == 1) {
 					Uri uri = mPendingImageUris.get(0);
@@ -1582,7 +1621,7 @@ public class ConversationActivity extends XmppActivity
 				} else {
 					mPendingImageUris.clear();
 				}
-			} else if (requestCode == ATTACHMENT_CHOICE_LOCATION) {
+            } else if (requestCode == ATTACHMENT_CHOICE_LOCATION) {
 				double latitude = data.getDoubleExtra("latitude", 0);
 				double longitude = data.getDoubleExtra("longitude", 0);
 				this.mPendingGeoUri = Uri.parse("geo:" + String.valueOf(latitude) + "," + String.valueOf(longitude));
@@ -1717,6 +1756,38 @@ public class ConversationActivity extends XmppActivity
 		});
 	}
 
+    private void attachVideoToConversation(Conversation conversation, Uri uri) {
+        if (conversation == null) {
+            return;
+        }
+        final Toast prepareFileToast = Toast.makeText(getApplicationContext(),getText(R.string.preparing_video), Toast.LENGTH_LONG);
+        prepareFileToast.show();
+        xmppConnectionService.attachVideoToConversation(conversation, uri, new UiCallback<Message>() {
+            @Override
+            public void success(Message message) {
+                hidePrepareFileToast(prepareFileToast);
+                xmppConnectionService.sendMessage(message);
+            }
+
+            @Override
+            public void error(final int errorCode, Message message) {
+                hidePrepareFileToast(prepareFileToast);
+                runOnUiThread(new Runnable() {
+                    @Override
+                    public void run() {
+                        replaceToast(getString(errorCode));
+                    }
+                });
+
+            }
+
+            @Override
+            public void userInputRequried(PendingIntent pi, Message message) {
+                hidePrepareFileToast(prepareFileToast);
+            }
+        });
+    }
+
 	private void attachImagesToConversation(Conversation conversation, Uri uri) {
 		if (conversation == null) {
 			return;
diff --git a/src/main/java/de/pixart/messenger/ui/ConversationFragment.java b/src/main/java/de/pixart/messenger/ui/ConversationFragment.java
index b89e8351c..9bee75b5e 100644
--- a/src/main/java/de/pixart/messenger/ui/ConversationFragment.java
+++ b/src/main/java/de/pixart/messenger/ui/ConversationFragment.java
@@ -276,6 +276,9 @@ public class ConversationFragment extends Fragment implements EditMessage.Keyboa
 					case CHOOSE_PICTURE:
 						activity.attachFile(ConversationActivity.ATTACHMENT_CHOICE_CHOOSE_IMAGE);
 						break;
+                    case CHOOSE_VIDEO:
+                        activity.attachFile(ConversationActivity.ATTACHMENT_CHOICE_CHOOSE_VIDEO);
+                        break;
 					case CANCEL:
 						if (conversation != null) {
 							if (conversation.getCorrectingMessage() != null) {
@@ -949,7 +952,7 @@ public class ConversationFragment extends Fragment implements EditMessage.Keyboa
 		mEditMessage.requestFocus();
 	}
 
-	enum SendButtonAction {TEXT, TAKE_PHOTO, SEND_LOCATION, RECORD_VOICE, CANCEL, CHOOSE_PICTURE}
+	enum SendButtonAction {TEXT, TAKE_PHOTO, SEND_LOCATION, RECORD_VOICE, CANCEL, CHOOSE_PICTURE, CHOOSE_VIDEO}
 
 	private int getSendButtonImageResource(SendButtonAction action, Presence.Status status) {
 		switch (action) {
@@ -1031,6 +1034,19 @@ public class ConversationFragment extends Fragment implements EditMessage.Keyboa
 					default:
 						return R.drawable.ic_send_picture_offline;
 				}
+            case CHOOSE_VIDEO:
+                switch (status) {
+                    case CHAT:
+                    case ONLINE:
+                        return R.drawable.ic_send_video_online;
+                    case AWAY:
+                        return R.drawable.ic_send_video_away;
+                    case XA:
+                    case DND:
+                        return R.drawable.ic_send_video_dnd;
+                    default:
+                        return R.drawable.ic_send_video_offline;
+                }
 		}
 		return R.drawable.ic_send_text_offline;
 	}
@@ -1074,6 +1090,9 @@ public class ConversationFragment extends Fragment implements EditMessage.Keyboa
 						case "picture":
 							action = SendButtonAction.CHOOSE_PICTURE;
 							break;
+                        case "video":
+                            action = SendButtonAction.CHOOSE_VIDEO;
+                            break;
 						default:
 							action = SendButtonAction.TEXT;
 							break;
diff --git a/src/main/java/de/pixart/messenger/utils/video/InputSurface.java b/src/main/java/de/pixart/messenger/utils/video/InputSurface.java
new file mode 100644
index 000000000..3376b4b87
--- /dev/null
+++ b/src/main/java/de/pixart/messenger/utils/video/InputSurface.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package de.pixart.messenger.utils.video;
+
+import android.annotation.TargetApi;
+import android.opengl.EGL14;
+import android.opengl.EGLConfig;
+import android.opengl.EGLContext;
+import android.opengl.EGLDisplay;
+import android.opengl.EGLExt;
+import android.opengl.EGLSurface;
+import android.os.Build;
+import android.view.Surface;
+
+@TargetApi(Build.VERSION_CODES.JELLY_BEAN_MR2)
+public class InputSurface {
+    private static final boolean VERBOSE = false;
+    private static final int EGL_RECORDABLE_ANDROID = 0x3142;
+    private static final int EGL_OPENGL_ES2_BIT = 4;
+    private EGLDisplay mEGLDisplay;
+    private EGLContext mEGLContext;
+    private EGLSurface mEGLSurface;
+    private Surface mSurface;
+
+    public InputSurface(Surface surface) {
+        if (surface == null) {
+            throw new NullPointerException();
+        }
+        mSurface = surface;
+        eglSetup();
+    }
+
+    private void eglSetup() {
+        mEGLDisplay = EGL14.eglGetDisplay(EGL14.EGL_DEFAULT_DISPLAY);
+        if (mEGLDisplay == EGL14.EGL_NO_DISPLAY) {
+            throw new RuntimeException("unable to get EGL14 display");
+        }
+        int[] version = new int[2];
+        if (!EGL14.eglInitialize(mEGLDisplay, version, 0, version, 1)) {
+            mEGLDisplay = null;
+            throw new RuntimeException("unable to initialize EGL14");
+        }
+
+        int[] attribList = {
+                EGL14.EGL_RED_SIZE, 8,
+                EGL14.EGL_GREEN_SIZE, 8,
+                EGL14.EGL_BLUE_SIZE, 8,
+                EGL14.EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
+                EGL_RECORDABLE_ANDROID, 1,
+                EGL14.EGL_NONE
+        };
+        EGLConfig[] configs = new EGLConfig[1];
+        int[] numConfigs = new int[1];
+        if (!EGL14.eglChooseConfig(mEGLDisplay, attribList, 0, configs, 0, configs.length,
+                numConfigs, 0)) {
+            throw new RuntimeException("unable to find RGB888+recordable ES2 EGL config");
+        }
+
+        int[] attrib_list = {
+                EGL14.EGL_CONTEXT_CLIENT_VERSION, 2,
+                EGL14.EGL_NONE
+        };
+
+        mEGLContext = EGL14.eglCreateContext(mEGLDisplay, configs[0], EGL14.EGL_NO_CONTEXT, attrib_list, 0);
+        checkEglError("eglCreateContext");
+        if (mEGLContext == null) {
+            throw new RuntimeException("null context");
+        }
+
+        int[] surfaceAttribs = {
+                EGL14.EGL_NONE
+        };
+        mEGLSurface = EGL14.eglCreateWindowSurface(mEGLDisplay, configs[0], mSurface,
+                surfaceAttribs, 0);
+        checkEglError("eglCreateWindowSurface");
+        if (mEGLSurface == null) {
+            throw new RuntimeException("surface was null");
+        }
+    }
+
+    public void release() {
+        if (EGL14.eglGetCurrentContext().equals(mEGLContext)) {
+            EGL14.eglMakeCurrent(mEGLDisplay, EGL14.EGL_NO_SURFACE, EGL14.EGL_NO_SURFACE, EGL14.EGL_NO_CONTEXT);
+        }
+        EGL14.eglDestroySurface(mEGLDisplay, mEGLSurface);
+        EGL14.eglDestroyContext(mEGLDisplay, mEGLContext);
+        mSurface.release();
+        mEGLDisplay = null;
+        mEGLContext = null;
+        mEGLSurface = null;
+        mSurface = null;
+    }
+
+    public void makeCurrent() {
+        if (!EGL14.eglMakeCurrent(mEGLDisplay, mEGLSurface, mEGLSurface, mEGLContext)) {
+            throw new RuntimeException("eglMakeCurrent failed");
+        }
+    }
+
+    public boolean swapBuffers() {
+        return EGL14.eglSwapBuffers(mEGLDisplay, mEGLSurface);
+    }
+
+    public Surface getSurface() {
+        return mSurface;
+    }
+
+    public void setPresentationTime(long nsecs) {
+        EGLExt.eglPresentationTimeANDROID(mEGLDisplay, mEGLSurface, nsecs);
+    }
+
+    private void checkEglError(String msg) {
+        boolean failed = false;
+        int error;
+        while ((error = EGL14.eglGetError()) != EGL14.EGL_SUCCESS) {
+            failed = true;
+        }
+        if (failed) {
+            throw new RuntimeException("EGL error encountered (see log)");
+        }
+    }
+}
diff --git a/src/main/java/de/pixart/messenger/utils/video/MP4Builder.java b/src/main/java/de/pixart/messenger/utils/video/MP4Builder.java
new file mode 100644
index 000000000..ee8b83e27
--- /dev/null
+++ b/src/main/java/de/pixart/messenger/utils/video/MP4Builder.java
@@ -0,0 +1,445 @@
+/*
+ * This is the source code of Telegram for Android v. 1.7.x.
+ * It is licensed under GNU GPL v. 2 or later.
+ * You should have received a copy of the license in this archive (see LICENSE).
+ *
+ * Copyright Nikolai Kudashov, 2013-2014.
+ */
+
+package de.pixart.messenger.utils.video;
+
+import android.annotation.TargetApi;
+import android.media.MediaCodec;
+import android.media.MediaFormat;
+
+import com.coremedia.iso.BoxParser;
+import com.coremedia.iso.IsoFile;
+import com.coremedia.iso.IsoTypeWriter;
+import com.coremedia.iso.boxes.Box;
+import com.coremedia.iso.boxes.Container;
+import com.coremedia.iso.boxes.DataEntryUrlBox;
+import com.coremedia.iso.boxes.DataInformationBox;
+import com.coremedia.iso.boxes.DataReferenceBox;
+import com.coremedia.iso.boxes.FileTypeBox;
+import com.coremedia.iso.boxes.HandlerBox;
+import com.coremedia.iso.boxes.MediaBox;
+import com.coremedia.iso.boxes.MediaHeaderBox;
+import com.coremedia.iso.boxes.MediaInformationBox;
+import com.coremedia.iso.boxes.MovieBox;
+import com.coremedia.iso.boxes.MovieHeaderBox;
+import com.coremedia.iso.boxes.SampleSizeBox;
+import com.coremedia.iso.boxes.SampleTableBox;
+import com.coremedia.iso.boxes.SampleToChunkBox;
+import com.coremedia.iso.boxes.StaticChunkOffsetBox;
+import com.coremedia.iso.boxes.SyncSampleBox;
+import com.coremedia.iso.boxes.TimeToSampleBox;
+import com.coremedia.iso.boxes.TrackBox;
+import com.coremedia.iso.boxes.TrackHeaderBox;
+import com.googlecode.mp4parser.DataSource;
+import com.googlecode.mp4parser.util.Matrix;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+
+@TargetApi(16)
+public class MP4Builder {
+
+    private InterleaveChunkMdat mdat = null;
+    private Mp4Movie currentMp4Movie = null;
+    private FileOutputStream fos = null;
+    private FileChannel fc = null;
+    private long dataOffset = 0;
+    private long writedSinceLastMdat = 0;
+    private boolean writeNewMdat = true;
+    private HashMap<Track, long[]> track2SampleSizes = new HashMap<>();
+    private ByteBuffer sizeBuffer = null;
+
+    public MP4Builder createMovie(Mp4Movie mp4Movie) throws Exception {
+        currentMp4Movie = mp4Movie;
+
+        fos = new FileOutputStream(mp4Movie.getCacheFile());
+        fc = fos.getChannel();
+
+        FileTypeBox fileTypeBox = createFileTypeBox();
+        fileTypeBox.getBox(fc);
+        dataOffset += fileTypeBox.getSize();
+        writedSinceLastMdat += dataOffset;
+
+        mdat = new InterleaveChunkMdat();
+
+        sizeBuffer = ByteBuffer.allocateDirect(4);
+
+        return this;
+    }
+
+    private void flushCurrentMdat() throws Exception {
+        long oldPosition = fc.position();
+        fc.position(mdat.getOffset());
+        mdat.getBox(fc);
+        fc.position(oldPosition);
+        mdat.setDataOffset(0);
+        mdat.setContentSize(0);
+        fos.flush();
+    }
+
+    public boolean writeSampleData(int trackIndex, ByteBuffer byteBuf, MediaCodec.BufferInfo bufferInfo, boolean isAudio) throws Exception {
+        if (writeNewMdat) {
+            mdat.setContentSize(0);
+            mdat.getBox(fc);
+            mdat.setDataOffset(dataOffset);
+            dataOffset += 16;
+            writedSinceLastMdat += 16;
+            writeNewMdat = false;
+        }
+
+        mdat.setContentSize(mdat.getContentSize() + bufferInfo.size);
+        writedSinceLastMdat += bufferInfo.size;
+
+        boolean flush = false;
+        if (writedSinceLastMdat >= 32 * 1024) {
+            flushCurrentMdat();
+            writeNewMdat = true;
+            flush = true;
+            writedSinceLastMdat -= 32 * 1024;
+        }
+
+        currentMp4Movie.addSample(trackIndex, dataOffset, bufferInfo);
+        byteBuf.position(bufferInfo.offset + (isAudio ? 0 : 4));
+        byteBuf.limit(bufferInfo.offset + bufferInfo.size);
+
+        if (!isAudio) {
+            sizeBuffer.position(0);
+            sizeBuffer.putInt(bufferInfo.size - 4);
+            sizeBuffer.position(0);
+            fc.write(sizeBuffer);
+        }
+
+        fc.write(byteBuf);
+        dataOffset += bufferInfo.size;
+
+        if (flush) {
+            fos.flush();
+        }
+        return flush;
+    }
+
+    public int addTrack(MediaFormat mediaFormat, boolean isAudio) throws Exception {
+        return currentMp4Movie.addTrack(mediaFormat, isAudio);
+    }
+
+    public void finishMovie(boolean error) throws Exception {
+        if (mdat.getContentSize() != 0) {
+            flushCurrentMdat();
+        }
+
+        for (Track track : currentMp4Movie.getTracks()) {
+            List<Sample> samples = track.getSamples();
+            long[] sizes = new long[samples.size()];
+            for (int i = 0; i < sizes.length; i++) {
+                sizes[i] = samples.get(i).getSize();
+            }
+            track2SampleSizes.put(track, sizes);
+        }
+
+        Box moov = createMovieBox(currentMp4Movie);
+        moov.getBox(fc);
+        fos.flush();
+
+        fc.close();
+        fos.close();
+    }
+
+    protected FileTypeBox createFileTypeBox() {
+        LinkedList<String> minorBrands = new LinkedList<>();
+        minorBrands.add("isom");
+        minorBrands.add("3gp4");
+        return new FileTypeBox("isom", 0, minorBrands);
+    }
+
+    private class InterleaveChunkMdat implements Box {
+        private Container parent;
+        private long contentSize = 1024 * 1024 * 1024;
+        private long dataOffset = 0;
+
+        public Container getParent() {
+            return parent;
+        }
+
+        public long getOffset() {
+            return dataOffset;
+        }
+
+        public void setDataOffset(long offset) {
+            dataOffset = offset;
+        }
+
+        public void setParent(Container parent) {
+            this.parent = parent;
+        }
+
+        public void setContentSize(long contentSize) {
+            this.contentSize = contentSize;
+        }
+
+        public long getContentSize() {
+            return contentSize;
+        }
+
+        public String getType() {
+            return "mdat";
+        }
+
+        public long getSize() {
+            return 16 + contentSize;
+        }
+
+        private boolean isSmallBox(long contentSize) {
+            return (contentSize + 8) < 4294967296L;
+        }
+
+        @Override
+        public void parse(DataSource dataSource, ByteBuffer header, long contentSize, BoxParser boxParser) throws IOException {
+
+        }
+
+        public void getBox(WritableByteChannel writableByteChannel) throws IOException {
+            ByteBuffer bb = ByteBuffer.allocate(16);
+            long size = getSize();
+            if (isSmallBox(size)) {
+                IsoTypeWriter.writeUInt32(bb, size);
+            } else {
+                IsoTypeWriter.writeUInt32(bb, 1);
+            }
+            bb.put(IsoFile.fourCCtoBytes("mdat"));
+            if (isSmallBox(size)) {
+                bb.put(new byte[8]);
+            } else {
+                IsoTypeWriter.writeUInt64(bb, size);
+            }
+            bb.rewind();
+            writableByteChannel.write(bb);
+        }
+    }
+
+    public static long gcd(long a, long b) {
+        if (b == 0) {
+            return a;
+        }
+        return gcd(b, a % b);
+    }
+
+    public long getTimescale(Mp4Movie mp4Movie) {
+        long timescale = 0;
+        if (!mp4Movie.getTracks().isEmpty()) {
+            timescale = mp4Movie.getTracks().iterator().next().getTimeScale();
+        }
+        for (Track track : mp4Movie.getTracks()) {
+            timescale = gcd(track.getTimeScale(), timescale);
+        }
+        return timescale;
+    }
+
+    protected MovieBox createMovieBox(Mp4Movie movie) {
+        MovieBox movieBox = new MovieBox();
+        MovieHeaderBox mvhd = new MovieHeaderBox();
+
+        mvhd.setCreationTime(new Date());
+        mvhd.setModificationTime(new Date());
+        mvhd.setMatrix(Matrix.ROTATE_0);
+        long movieTimeScale = getTimescale(movie);
+        long duration = 0;
+
+        for (Track track : movie.getTracks()) {
+            long tracksDuration = track.getDuration() * movieTimeScale / track.getTimeScale();
+            if (tracksDuration > duration) {
+                duration = tracksDuration;
+            }
+        }
+
+        mvhd.setDuration(duration);
+        mvhd.setTimescale(movieTimeScale);
+        mvhd.setNextTrackId(movie.getTracks().size() + 1);
+
+        movieBox.addBox(mvhd);
+        for (Track track : movie.getTracks()) {
+            movieBox.addBox(createTrackBox(track, movie));
+        }
+        return movieBox;
+    }
+
+    protected TrackBox createTrackBox(Track track, Mp4Movie movie) {
+        TrackBox trackBox = new TrackBox();
+        TrackHeaderBox tkhd = new TrackHeaderBox();
+
+        tkhd.setEnabled(true);
+        tkhd.setInMovie(true);
+        tkhd.setInPreview(true);
+        if (track.isAudio()) {
+            tkhd.setMatrix(Matrix.ROTATE_0);
+        } else {
+            tkhd.setMatrix(movie.getMatrix());
+        }
+        tkhd.setAlternateGroup(0);
+        tkhd.setCreationTime(track.getCreationTime());
+        tkhd.setDuration(track.getDuration() * getTimescale(movie) / track.getTimeScale());
+        tkhd.setHeight(track.getHeight());
+        tkhd.setWidth(track.getWidth());
+        tkhd.setLayer(0);
+        tkhd.setModificationTime(new Date());
+        tkhd.setTrackId(track.getTrackId() + 1);
+        tkhd.setVolume(track.getVolume());
+
+        trackBox.addBox(tkhd);
+
+        MediaBox mdia = new MediaBox();
+        trackBox.addBox(mdia);
+        MediaHeaderBox mdhd = new MediaHeaderBox();
+        mdhd.setCreationTime(track.getCreationTime());
+        mdhd.setDuration(track.getDuration());
+        mdhd.setTimescale(track.getTimeScale());
+        mdhd.setLanguage("eng");
+        mdia.addBox(mdhd);
+        HandlerBox hdlr = new HandlerBox();
+        hdlr.setName(track.isAudio() ? "SoundHandle" : "VideoHandle");
+        hdlr.setHandlerType(track.getHandler());
+
+        mdia.addBox(hdlr);
+
+        MediaInformationBox minf = new MediaInformationBox();
+        minf.addBox(track.getMediaHeaderBox());
+
+        DataInformationBox dinf = new DataInformationBox();
+        DataReferenceBox dref = new DataReferenceBox();
+        dinf.addBox(dref);
+        DataEntryUrlBox url = new DataEntryUrlBox();
+        url.setFlags(1);
+        dref.addBox(url);
+        minf.addBox(dinf);
+
+        Box stbl = createStbl(track);
+        minf.addBox(stbl);
+        mdia.addBox(minf);
+
+        return trackBox;
+    }
+
+    protected Box createStbl(Track track) {
+        SampleTableBox stbl = new SampleTableBox();
+
+        createStsd(track, stbl);
+        createStts(track, stbl);
+        createStss(track, stbl);
+        createStsc(track, stbl);
+        createStsz(track, stbl);
+        createStco(track, stbl);
+
+        return stbl;
+    }
+
+    protected void createStsd(Track track, SampleTableBox stbl) {
+        stbl.addBox(track.getSampleDescriptionBox());
+    }
+
+    protected void createStts(Track track, SampleTableBox stbl) {
+        TimeToSampleBox.Entry lastEntry = null;
+        List<TimeToSampleBox.Entry> entries = new ArrayList<>();
+
+        for (long delta : track.getSampleDurations()) {
+            if (lastEntry != null && lastEntry.getDelta() == delta) {
+                lastEntry.setCount(lastEntry.getCount() + 1);
+            } else {
+                lastEntry = new TimeToSampleBox.Entry(1, delta);
+                entries.add(lastEntry);
+            }
+        }
+        TimeToSampleBox stts = new TimeToSampleBox();
+        stts.setEntries(entries);
+        stbl.addBox(stts);
+    }
+
+    protected void createStss(Track track, SampleTableBox stbl) {
+        long[] syncSamples = track.getSyncSamples();
+        if (syncSamples != null && syncSamples.length > 0) {
+            SyncSampleBox stss = new SyncSampleBox();
+            stss.setSampleNumber(syncSamples);
+            stbl.addBox(stss);
+        }
+    }
+
+    protected void createStsc(Track track, SampleTableBox stbl) {
+        SampleToChunkBox stsc = new SampleToChunkBox();
+        stsc.setEntries(new LinkedList<SampleToChunkBox.Entry>());
+
+        long lastOffset = -1;
+        int lastChunkNumber = 1;
+        int lastSampleCount = 0;
+
+        int previousWritedChunkCount = -1;
+
+        int samplesCount = track.getSamples().size();
+        for (int a = 0; a < samplesCount; a++) {
+            Sample sample = track.getSamples().get(a);
+            long offset = sample.getOffset();
+            long size = sample.getSize();
+
+            lastOffset = offset + size;
+            lastSampleCount++;
+
+            boolean write = false;
+            if (a != samplesCount - 1) {
+                Sample nextSample = track.getSamples().get(a + 1);
+                if (lastOffset != nextSample.getOffset()) {
+                    write = true;
+                }
+            } else {
+                write = true;
+            }
+            if (write) {
+                if (previousWritedChunkCount != lastSampleCount) {
+                    stsc.getEntries().add(new SampleToChunkBox.Entry(lastChunkNumber, lastSampleCount, 1));
+                    previousWritedChunkCount = lastSampleCount;
+                }
+                lastSampleCount = 0;
+                lastChunkNumber++;
+            }
+        }
+        stbl.addBox(stsc);
+    }
+
+    protected void createStsz(Track track, SampleTableBox stbl) {
+        SampleSizeBox stsz = new SampleSizeBox();
+        stsz.setSampleSizes(track2SampleSizes.get(track));
+        stbl.addBox(stsz);
+    }
+
+    protected void createStco(Track track, SampleTableBox stbl) {
+        ArrayList<Long> chunksOffsets = new ArrayList<>();
+        long lastOffset = -1;
+        for (Sample sample : track.getSamples()) {
+            long offset = sample.getOffset();
+            if (lastOffset != -1 && lastOffset != offset) {
+                lastOffset = -1;
+            }
+            if (lastOffset == -1) {
+                chunksOffsets.add(offset);
+            }
+            lastOffset = offset + sample.getSize();
+        }
+        long[] chunkOffsetsLong = new long[chunksOffsets.size()];
+        for (int a = 0; a < chunksOffsets.size(); a++) {
+            chunkOffsetsLong[a] = chunksOffsets.get(a);
+        }
+
+        StaticChunkOffsetBox stco = new StaticChunkOffsetBox();
+        stco.setChunkOffsets(chunkOffsetsLong);
+        stbl.addBox(stco);
+    }
+}
diff --git a/src/main/java/de/pixart/messenger/utils/video/MediaController.java b/src/main/java/de/pixart/messenger/utils/video/MediaController.java
new file mode 100644
index 000000000..2f60310a8
--- /dev/null
+++ b/src/main/java/de/pixart/messenger/utils/video/MediaController.java
@@ -0,0 +1,633 @@
+package de.pixart.messenger.utils.video;
+
+import android.annotation.SuppressLint;
+import android.annotation.TargetApi;
+import android.media.MediaCodec;
+import android.media.MediaCodecInfo;
+import android.media.MediaCodecList;
+import android.media.MediaExtractor;
+import android.media.MediaFormat;
+import android.media.MediaMetadataRetriever;
+import android.os.Build;
+import android.util.Log;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+
+import de.pixart.messenger.Config;
+
+@SuppressLint("NewApi")
+public class MediaController {
+
+    public final static String MIME_TYPE = "video/avc";
+    private final static int PROCESSOR_TYPE_OTHER = 0;
+    private final static int PROCESSOR_TYPE_QCOM = 1;
+    private final static int PROCESSOR_TYPE_INTEL = 2;
+    private final static int PROCESSOR_TYPE_MTK = 3;
+    private final static int PROCESSOR_TYPE_SEC = 4;
+    private final static int PROCESSOR_TYPE_TI = 5;
+    private static volatile MediaController Instance = null;
+    private boolean videoConvertFirstWrite = true;
+
+    public static MediaController getInstance() {
+        MediaController localInstance = Instance;
+        if (localInstance == null) {
+            synchronized (MediaController.class) {
+                localInstance = Instance;
+                if (localInstance == null) {
+                    Instance = localInstance = new MediaController();
+                }
+            }
+        }
+        return localInstance;
+    }
+
+    @SuppressLint("NewApi")
+    public static int selectColorFormat(MediaCodecInfo codecInfo, String mimeType) {
+        MediaCodecInfo.CodecCapabilities capabilities = codecInfo.getCapabilitiesForType(mimeType);
+        int lastColorFormat = 0;
+        for (int i = 0; i < capabilities.colorFormats.length; i++) {
+            int colorFormat = capabilities.colorFormats[i];
+            if (isRecognizedFormat(colorFormat)) {
+                lastColorFormat = colorFormat;
+                if (!(codecInfo.getName().equals("OMX.SEC.AVC.Encoder") && colorFormat == 19)) {
+                    return colorFormat;
+                }
+            }
+        }
+        return lastColorFormat;
+    }
+
+    private static boolean isRecognizedFormat(int colorFormat) {
+        switch (colorFormat) {
+            case MediaCodecInfo.CodecCapabilities.COLOR_FormatYUV420Planar:
+            case MediaCodecInfo.CodecCapabilities.COLOR_FormatYUV420PackedPlanar:
+            case MediaCodecInfo.CodecCapabilities.COLOR_FormatYUV420SemiPlanar:
+            case MediaCodecInfo.CodecCapabilities.COLOR_FormatYUV420PackedSemiPlanar:
+            case MediaCodecInfo.CodecCapabilities.COLOR_TI_FormatYUV420PackedSemiPlanar:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    public native static int convertVideoFrame(ByteBuffer src, ByteBuffer dest, int destFormat, int width, int height, int padding, int swap);
+
+    private void didWriteData(final boolean last, final boolean error) {
+        final boolean firstWrite = videoConvertFirstWrite;
+        if (firstWrite) {
+            videoConvertFirstWrite = false;
+        }
+    }
+
+    public static MediaCodecInfo selectCodec(String mimeType) {
+        int numCodecs = MediaCodecList.getCodecCount();
+        MediaCodecInfo lastCodecInfo = null;
+        for (int i = 0; i < numCodecs; i++) {
+            MediaCodecInfo codecInfo = MediaCodecList.getCodecInfoAt(i);
+            if (!codecInfo.isEncoder()) {
+                continue;
+            }
+            String[] types = codecInfo.getSupportedTypes();
+            for (String type : types) {
+                if (type.equalsIgnoreCase(mimeType)) {
+                    lastCodecInfo = codecInfo;
+                    if (!lastCodecInfo.getName().equals("OMX.SEC.avc.enc")) {
+                        return lastCodecInfo;
+                    } else if (lastCodecInfo.getName().equals("OMX.SEC.AVC.Encoder")) {
+                        return lastCodecInfo;
+                    }
+                }
+            }
+        }
+        return lastCodecInfo;
+    }
+
+    @TargetApi(16)
+    private long readAndWriteTrack(MediaExtractor extractor, MP4Builder mediaMuxer, MediaCodec.BufferInfo info, long start, long end, File file, boolean isAudio) throws Exception {
+        int trackIndex = selectTrack(extractor, isAudio);
+        if (trackIndex >= 0) {
+            extractor.selectTrack(trackIndex);
+            MediaFormat trackFormat = extractor.getTrackFormat(trackIndex);
+            int muxerTrackIndex = mediaMuxer.addTrack(trackFormat, isAudio);
+            int maxBufferSize = trackFormat.getInteger(MediaFormat.KEY_MAX_INPUT_SIZE);
+            boolean inputDone = false;
+            if (start > 0) {
+                extractor.seekTo(start, MediaExtractor.SEEK_TO_PREVIOUS_SYNC);
+            } else {
+                extractor.seekTo(0, MediaExtractor.SEEK_TO_PREVIOUS_SYNC);
+            }
+            ByteBuffer buffer = ByteBuffer.allocateDirect(maxBufferSize);
+            long startTime = -1;
+
+            while (!inputDone) {
+
+                boolean eof = false;
+                int index = extractor.getSampleTrackIndex();
+                if (index == trackIndex) {
+                    info.size = extractor.readSampleData(buffer, 0);
+
+                    if (info.size < 0) {
+                        info.size = 0;
+                        eof = true;
+                    } else {
+                        info.presentationTimeUs = extractor.getSampleTime();
+                        if (start > 0 && startTime == -1) {
+                            startTime = info.presentationTimeUs;
+                        }
+                        if (end < 0 || info.presentationTimeUs < end) {
+                            info.offset = 0;
+                            info.flags = extractor.getSampleFlags();
+                            if (mediaMuxer.writeSampleData(muxerTrackIndex, buffer, info, isAudio)) {
+                                // didWriteData(messageObject, file, false, false);
+                            }
+                            extractor.advance();
+                        } else {
+                            eof = true;
+                        }
+                    }
+                } else if (index == -1) {
+                    eof = true;
+                }
+                if (eof) {
+                    inputDone = true;
+                }
+            }
+
+            extractor.unselectTrack(trackIndex);
+            return startTime;
+        }
+        return -1;
+    }
+
+    @TargetApi(16)
+    private int selectTrack(MediaExtractor extractor, boolean audio) {
+        int numTracks = extractor.getTrackCount();
+        for (int i = 0; i < numTracks; i++) {
+            MediaFormat format = extractor.getTrackFormat(i);
+            String mime = format.getString(MediaFormat.KEY_MIME);
+            if (audio) {
+                if (mime.startsWith("audio/")) {
+                    return i;
+                }
+            } else {
+                if (mime.startsWith("video/")) {
+                    return i;
+                }
+            }
+        }
+        return -5;
+    }
+
+    @TargetApi(16)
+    public boolean convertVideo(final String path, final String compressed_path) {
+
+        MediaMetadataRetriever retriever = new MediaMetadataRetriever();
+        retriever.setDataSource(path);
+        String height = retriever.extractMetadata(MediaMetadataRetriever.METADATA_KEY_VIDEO_HEIGHT);
+        String width = retriever.extractMetadata(MediaMetadataRetriever.METADATA_KEY_VIDEO_WIDTH);
+        String rotation = retriever.extractMetadata(MediaMetadataRetriever.METADATA_KEY_VIDEO_ROTATION);
+
+        int video_height = Integer.parseInt(height);
+        int video_width = Integer.parseInt(width);
+
+        Log.d(Config.LOGTAG, "Video dimensions: height: " + height + " width: " + width + "rotation: " + rotation);
+
+        long startTime = -1;
+        long endTime = -1;
+
+        int resultWidth = 640;
+        int resultHeight = 360;
+
+        int rotationValue = Integer.valueOf(rotation);
+        int originalWidth = Integer.valueOf(width);
+        int originalHeight = Integer.valueOf(height);
+        double ratio = video_width/video_height;
+
+        int bitrate = 50000; //450000
+        int rotateRender = 0;
+
+        File cacheFile = new File(compressed_path);
+
+        if (Build.VERSION.SDK_INT < 18 && resultHeight > resultWidth && resultWidth != originalWidth && resultHeight != originalHeight) {
+            int temp = resultHeight;
+            resultHeight = resultWidth;
+            resultWidth = temp;
+            rotationValue = 90;
+            rotateRender = 270;
+        } else if (Build.VERSION.SDK_INT > 20) {
+            if (rotationValue == 90) {
+                int temp = resultHeight;
+                resultHeight = resultWidth;
+                resultWidth = temp;
+                rotationValue = 0;
+                rotateRender = 270;
+            } else if (rotationValue == 180) {
+                rotateRender = 180;
+                rotationValue = 0;
+            } else if (rotationValue == 270) {
+                int temp = resultHeight;
+                resultHeight = resultWidth;
+                resultWidth = temp;
+                rotationValue = 0;
+                rotateRender = 90;
+            }
+        }
+
+
+        File inputFile = new File(path);
+        Log.d(Config.LOGTAG, "Input file is: " + inputFile.toString());
+        if (!inputFile.canRead()) {
+            didWriteData(true, true);
+            Log.d(Config.LOGTAG, "Compression failed. Input file could not be read.");
+            return false;
+        }
+
+        videoConvertFirstWrite = true;
+        boolean error = false;
+        long videoStartTime = startTime;
+
+        long time = System.currentTimeMillis();
+
+        if (resultWidth != 0 && resultHeight != 0) {
+            MP4Builder mediaMuxer = null;
+            MediaExtractor extractor = null;
+
+            try {
+                MediaCodec.BufferInfo info = new MediaCodec.BufferInfo();
+                Mp4Movie movie = new Mp4Movie();
+                movie.setCacheFile(cacheFile);
+                movie.setRotation(rotationValue);
+                movie.setSize(resultWidth, resultHeight);
+                mediaMuxer = new MP4Builder().createMovie(movie);
+                extractor = new MediaExtractor();
+                extractor.setDataSource(inputFile.toString());
+
+
+                if (resultWidth != originalWidth || resultHeight != originalHeight) {
+                    int videoIndex;
+                    videoIndex = selectTrack(extractor, false);
+
+                    if (videoIndex >= 0) {
+                        MediaCodec decoder = null;
+                        MediaCodec encoder = null;
+                        InputSurface inputSurface = null;
+                        OutputSurface outputSurface = null;
+
+                        try {
+                            long videoTime = -1;
+                            boolean outputDone = false;
+                            boolean inputDone = false;
+                            boolean decoderDone = false;
+                            int swapUV = 0;
+                            int videoTrackIndex = -5;
+
+                            int colorFormat;
+                            int processorType = PROCESSOR_TYPE_OTHER;
+                            String manufacturer = Build.MANUFACTURER.toLowerCase();
+                            if (Build.VERSION.SDK_INT < 18) {
+                                MediaCodecInfo codecInfo = selectCodec(MIME_TYPE);
+                                colorFormat = selectColorFormat(codecInfo, MIME_TYPE);
+                                if (colorFormat == 0) {
+                                    throw new RuntimeException("no supported color format");
+                                }
+                                String codecName = codecInfo.getName();
+                                if (codecName.contains("OMX.qcom.")) {
+                                    processorType = PROCESSOR_TYPE_QCOM;
+                                    if (Build.VERSION.SDK_INT == 16) {
+                                        if (manufacturer.equals("lge") || manufacturer.equals("nokia")) {
+                                            swapUV = 1;
+                                        }
+                                    }
+                                } else if (codecName.contains("OMX.Intel.")) {
+                                    processorType = PROCESSOR_TYPE_INTEL;
+                                } else if (codecName.equals("OMX.MTK.VIDEO.ENCODER.AVC")) {
+                                    processorType = PROCESSOR_TYPE_MTK;
+                                } else if (codecName.equals("OMX.SEC.AVC.Encoder")) {
+                                    processorType = PROCESSOR_TYPE_SEC;
+                                    swapUV = 1;
+                                } else if (codecName.equals("OMX.TI.DUCATI1.VIDEO.H264E")) {
+                                    processorType = PROCESSOR_TYPE_TI;
+                                }
+                                Log.d(Config.LOGTAG, "codec = " + codecInfo.getName() + " manufacturer = " + manufacturer + "device = " + Build.MODEL);
+                            } else {
+                                colorFormat = MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface;
+                            }
+                            Log.d(Config.LOGTAG, "colorFormat = " + colorFormat);
+
+                            int resultHeightAligned = resultHeight;
+                            int padding = 0;
+                            int bufferSize = resultWidth * resultHeight * 3 / 2;
+                            if (processorType == PROCESSOR_TYPE_OTHER) {
+                                if (resultHeight % 16 != 0) {
+                                    resultHeightAligned += (16 - (resultHeight % 16));
+                                    padding = resultWidth * (resultHeightAligned - resultHeight);
+                                    bufferSize += padding * 5 / 4;
+                                }
+                            } else if (processorType == PROCESSOR_TYPE_QCOM) {
+                                if (!manufacturer.toLowerCase().equals("lge")) {
+                                    int uvoffset = (resultWidth * resultHeight + 2047) & ~2047;
+                                    padding = uvoffset - (resultWidth * resultHeight);
+                                    bufferSize += padding;
+                                }
+                            } else if (processorType == PROCESSOR_TYPE_TI) {
+                                resultHeightAligned = 368;
+                                bufferSize = resultWidth * resultHeightAligned * 3 / 2;
+                                resultHeightAligned += (16 - (resultHeight % 16));
+                                padding = resultWidth * (resultHeightAligned - resultHeight);
+                                bufferSize += padding * 5 / 4;
+                            } else if (processorType == PROCESSOR_TYPE_MTK) {
+                                if (manufacturer.equals("baidu")) {
+                                    resultHeightAligned += (16 - (resultHeight % 16));
+                                    padding = resultWidth * (resultHeightAligned - resultHeight);
+                                    bufferSize += padding * 5 / 4;
+                                }
+                            }
+
+                            extractor.selectTrack(videoIndex);
+                            if (startTime > 0) {
+                                extractor.seekTo(startTime, MediaExtractor.SEEK_TO_PREVIOUS_SYNC);
+                            } else {
+                                extractor.seekTo(0, MediaExtractor.SEEK_TO_PREVIOUS_SYNC);
+                            }
+                            MediaFormat inputFormat = extractor.getTrackFormat(videoIndex);
+
+                            MediaFormat outputFormat = MediaFormat.createVideoFormat(MIME_TYPE, resultWidth, resultHeight);
+                            outputFormat.setInteger(MediaFormat.KEY_COLOR_FORMAT, colorFormat);
+                            outputFormat.setInteger(MediaFormat.KEY_BIT_RATE, bitrate != 0 ? bitrate : 921600);
+                            outputFormat.setInteger(MediaFormat.KEY_FRAME_RATE, 25);
+                            outputFormat.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, 10);
+                            if (Build.VERSION.SDK_INT < 18) {
+                                outputFormat.setInteger("stride", resultWidth + 32);
+                                outputFormat.setInteger("slice-height", resultHeight);
+                            }
+
+                            encoder = MediaCodec.createEncoderByType(MIME_TYPE);
+                            encoder.configure(outputFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
+                            if (Build.VERSION.SDK_INT >= 18) {
+                                inputSurface = new InputSurface(encoder.createInputSurface());
+                                inputSurface.makeCurrent();
+                            }
+                            encoder.start();
+
+                            decoder = MediaCodec.createDecoderByType(inputFormat.getString(MediaFormat.KEY_MIME));
+                            if (Build.VERSION.SDK_INT >= 18) {
+                                outputSurface = new OutputSurface();
+                            } else {
+                                outputSurface = new OutputSurface(resultWidth, resultHeight, rotateRender);
+                            }
+                            decoder.configure(inputFormat, outputSurface.getSurface(), null, 0);
+                            decoder.start();
+
+                            final int TIMEOUT_USEC = 2500;
+                            ByteBuffer[] decoderInputBuffers = null;
+                            ByteBuffer[] encoderOutputBuffers = null;
+                            ByteBuffer[] encoderInputBuffers = null;
+                            if (Build.VERSION.SDK_INT < 21) {
+                                decoderInputBuffers = decoder.getInputBuffers();
+                                encoderOutputBuffers = encoder.getOutputBuffers();
+                                if (Build.VERSION.SDK_INT < 18) {
+                                    encoderInputBuffers = encoder.getInputBuffers();
+                                }
+                            }
+
+                            while (!outputDone) {
+                                if (!inputDone) {
+                                    boolean eof = false;
+                                    int index = extractor.getSampleTrackIndex();
+                                    if (index == videoIndex) {
+                                        int inputBufIndex = decoder.dequeueInputBuffer(TIMEOUT_USEC);
+                                        if (inputBufIndex >= 0) {
+                                            ByteBuffer inputBuf;
+                                            if (Build.VERSION.SDK_INT < 21) {
+                                                inputBuf = decoderInputBuffers[inputBufIndex];
+                                            } else {
+                                                inputBuf = decoder.getInputBuffer(inputBufIndex);
+                                            }
+                                            int chunkSize = extractor.readSampleData(inputBuf, 0);
+                                            if (chunkSize < 0) {
+                                                decoder.queueInputBuffer(inputBufIndex, 0, 0, 0L, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
+                                                inputDone = true;
+                                            } else {
+                                                decoder.queueInputBuffer(inputBufIndex, 0, chunkSize, extractor.getSampleTime(), 0);
+                                                extractor.advance();
+                                            }
+                                        }
+                                    } else if (index == -1) {
+                                        eof = true;
+                                    }
+                                    if (eof) {
+                                        int inputBufIndex = decoder.dequeueInputBuffer(TIMEOUT_USEC);
+                                        if (inputBufIndex >= 0) {
+                                            decoder.queueInputBuffer(inputBufIndex, 0, 0, 0L, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
+                                            inputDone = true;
+                                        }
+                                    }
+                                }
+
+                                boolean decoderOutputAvailable = !decoderDone;
+                                boolean encoderOutputAvailable = true;
+                                while (decoderOutputAvailable || encoderOutputAvailable) {
+                                    int encoderStatus = encoder.dequeueOutputBuffer(info, TIMEOUT_USEC);
+                                    if (encoderStatus == MediaCodec.INFO_TRY_AGAIN_LATER) {
+                                        encoderOutputAvailable = false;
+                                    } else if (encoderStatus == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
+                                        if (Build.VERSION.SDK_INT < 21) {
+                                            encoderOutputBuffers = encoder.getOutputBuffers();
+                                        }
+                                    } else if (encoderStatus == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
+                                        MediaFormat newFormat = encoder.getOutputFormat();
+                                        if (videoTrackIndex == -5) {
+                                            videoTrackIndex = mediaMuxer.addTrack(newFormat, false);
+                                        }
+                                    } else if (encoderStatus < 0) {
+                                        throw new RuntimeException("unexpected result from encoder.dequeueOutputBuffer: " + encoderStatus);
+                                    } else {
+                                        ByteBuffer encodedData;
+                                        if (Build.VERSION.SDK_INT < 21) {
+                                            encodedData = encoderOutputBuffers[encoderStatus];
+                                        } else {
+                                            encodedData = encoder.getOutputBuffer(encoderStatus);
+                                        }
+                                        if (encodedData == null) {
+                                            throw new RuntimeException("encoderOutputBuffer " + encoderStatus + " was null");
+                                        }
+                                        if (info.size > 1) {
+                                            if ((info.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) == 0) {
+                                                if (mediaMuxer.writeSampleData(videoTrackIndex, encodedData, info, false)) {
+                                                    didWriteData(false, false);
+                                                }
+                                            } else if (videoTrackIndex == -5) {
+                                                byte[] csd = new byte[info.size];
+                                                encodedData.limit(info.offset + info.size);
+                                                encodedData.position(info.offset);
+                                                encodedData.get(csd);
+                                                ByteBuffer sps = null;
+                                                ByteBuffer pps = null;
+                                                for (int a = info.size - 1; a >= 0; a--) {
+                                                    if (a > 3) {
+                                                        if (csd[a] == 1 && csd[a - 1] == 0 && csd[a - 2] == 0 && csd[a - 3] == 0) {
+                                                            sps = ByteBuffer.allocate(a - 3);
+                                                            pps = ByteBuffer.allocate(info.size - (a - 3));
+                                                            sps.put(csd, 0, a - 3).position(0);
+                                                            pps.put(csd, a - 3, info.size - (a - 3)).position(0);
+                                                            break;
+                                                        }
+                                                    } else {
+                                                        break;
+                                                    }
+                                                }
+
+                                                MediaFormat newFormat = MediaFormat.createVideoFormat(MIME_TYPE, resultWidth, resultHeight);
+                                                if (sps != null && pps != null) {
+                                                    newFormat.setByteBuffer("csd-0", sps);
+                                                    newFormat.setByteBuffer("csd-1", pps);
+                                                }
+                                                videoTrackIndex = mediaMuxer.addTrack(newFormat, false);
+                                            }
+                                        }
+                                        outputDone = (info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0;
+                                        encoder.releaseOutputBuffer(encoderStatus, false);
+                                    }
+                                    if (encoderStatus != MediaCodec.INFO_TRY_AGAIN_LATER) {
+                                        continue;
+                                    }
+
+                                    if (!decoderDone) {
+                                        int decoderStatus = decoder.dequeueOutputBuffer(info, TIMEOUT_USEC);
+                                        if (decoderStatus == MediaCodec.INFO_TRY_AGAIN_LATER) {
+                                            decoderOutputAvailable = false;
+                                        } else if (decoderStatus == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
+
+                                        } else if (decoderStatus == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
+                                            MediaFormat newFormat = decoder.getOutputFormat();
+                                            Log.d(Config.LOGTAG, "newFormat = " + newFormat);
+                                        } else if (decoderStatus < 0) {
+                                            throw new RuntimeException("unexpected result from decoder.dequeueOutputBuffer: " + decoderStatus);
+                                        } else {
+                                            boolean doRender;
+                                            if (Build.VERSION.SDK_INT >= 18) {
+                                                doRender = info.size != 0;
+                                            } else {
+                                                doRender = info.size != 0 || info.presentationTimeUs != 0;
+                                            }
+                                            if (endTime > 0 && info.presentationTimeUs >= endTime) {
+                                                inputDone = true;
+                                                decoderDone = true;
+                                                doRender = false;
+                                                info.flags |= MediaCodec.BUFFER_FLAG_END_OF_STREAM;
+                                            }
+                                            if (startTime > 0 && videoTime == -1) {
+                                                if (info.presentationTimeUs < startTime) {
+                                                    doRender = false;
+                                                    Log.d(Config.LOGTAG, "drop frame startTime = " + startTime + " present time = " + info.presentationTimeUs);
+                                                } else {
+                                                    videoTime = info.presentationTimeUs;
+                                                }
+                                            }
+                                            decoder.releaseOutputBuffer(decoderStatus, doRender);
+                                            if (doRender) {
+                                                boolean errorWait = false;
+                                                try {
+                                                    outputSurface.awaitNewImage();
+                                                } catch (Exception e) {
+                                                    errorWait = true;
+                                                    Log.d(Config.LOGTAG, e.getMessage());
+                                                }
+                                                if (!errorWait) {
+                                                    if (Build.VERSION.SDK_INT >= 18) {
+                                                        outputSurface.drawImage(false);
+                                                        inputSurface.setPresentationTime(info.presentationTimeUs * 1000);
+                                                        inputSurface.swapBuffers();
+                                                    } else {
+                                                        int inputBufIndex = encoder.dequeueInputBuffer(TIMEOUT_USEC);
+                                                        if (inputBufIndex >= 0) {
+                                                            outputSurface.drawImage(true);
+                                                            ByteBuffer rgbBuf = outputSurface.getFrame();
+                                                            ByteBuffer yuvBuf = encoderInputBuffers[inputBufIndex];
+                                                            yuvBuf.clear();
+                                                            convertVideoFrame(rgbBuf, yuvBuf, colorFormat, resultWidth, resultHeight, padding, swapUV);
+                                                            encoder.queueInputBuffer(inputBufIndex, 0, bufferSize, info.presentationTimeUs, 0);
+                                                        } else {
+                                                            Log.d(Config.LOGTAG, "input buffer not available");
+                                                        }
+                                                    }
+                                                }
+                                            }
+                                            if ((info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
+                                                decoderOutputAvailable = false;
+                                                Log.d(Config.LOGTAG, "decoder stream end");
+                                                if (Build.VERSION.SDK_INT >= 18) {
+                                                    encoder.signalEndOfInputStream();
+                                                } else {
+                                                    int inputBufIndex = encoder.dequeueInputBuffer(TIMEOUT_USEC);
+                                                    if (inputBufIndex >= 0) {
+                                                        encoder.queueInputBuffer(inputBufIndex, 0, 1, info.presentationTimeUs, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                            if (videoTime != -1) {
+                                videoStartTime = videoTime;
+                            }
+                        } catch (Exception e) {
+                            Log.d(Config.LOGTAG, e.getMessage());
+                            error = true;
+                        }
+
+                        extractor.unselectTrack(videoIndex);
+
+                        if (outputSurface != null) {
+                            outputSurface.release();
+                        }
+                        if (inputSurface != null) {
+                            inputSurface.release();
+                        }
+                        if (decoder != null) {
+                            decoder.stop();
+                            decoder.release();
+                        }
+                        if (encoder != null) {
+                            encoder.stop();
+                            encoder.release();
+                        }
+                    }
+                } else {
+                    long videoTime = readAndWriteTrack(extractor, mediaMuxer, info, startTime, endTime, cacheFile, false);
+                    if (videoTime != -1) {
+                        videoStartTime = videoTime;
+                    }
+                }
+                if (!error) {
+                    readAndWriteTrack(extractor, mediaMuxer, info, videoStartTime, endTime, cacheFile, true);
+                }
+            } catch (Exception e) {
+                error = true;
+                Log.d(Config.LOGTAG, e.getMessage());
+            } finally {
+                if (extractor != null) {
+                    extractor.release();
+                }
+                if (mediaMuxer != null) {
+                    try {
+                        mediaMuxer.finishMovie(false);
+                    } catch (Exception e) {
+                        Log.d(Config.LOGTAG, e.getMessage());
+                    }
+                }
+                Log.d(Config.LOGTAG, "time = " + (System.currentTimeMillis() - time));
+            }
+        } else {
+            didWriteData(true, true);
+            Log.d(Config.LOGTAG, "Compression failed.");
+            return false;
+        }
+        didWriteData(true, error);
+        Log.d(Config.LOGTAG, "Compression succeed. Save compressed video to " + cacheFile.toString());
+        //inputFile.delete();
+        return true;
+    }
+}
+\ No newline at end of file
diff --git a/src/main/java/de/pixart/messenger/utils/video/Mp4Movie.java b/src/main/java/de/pixart/messenger/utils/video/Mp4Movie.java
new file mode 100644
index 000000000..4cf206d26
--- /dev/null
+++ b/src/main/java/de/pixart/messenger/utils/video/Mp4Movie.java
@@ -0,0 +1,81 @@
+/*
+ * This is the source code of Telegram for Android v. 1.7.x.
+ * It is licensed under GNU GPL v. 2 or later.
+ * You should have received a copy of the license in this archive (see LICENSE).
+ *
+ * Copyright Nikolai Kudashov, 2013-2014.
+ */
+
+package de.pixart.messenger.utils.video;
+
+import android.annotation.TargetApi;
+import android.media.MediaCodec;
+import android.media.MediaFormat;
+
+import com.googlecode.mp4parser.util.Matrix;
+
+import java.io.File;
+import java.util.ArrayList;
+
+@TargetApi(16)
+public class Mp4Movie {
+    private Matrix matrix = Matrix.ROTATE_0;
+    private ArrayList<Track> tracks = new ArrayList<Track>();
+    private File cacheFile;
+    private int width;
+    private int height;
+
+    public Matrix getMatrix() {
+        return matrix;
+    }
+
+    public int getWidth() {
+        return width;
+    }
+
+    public int getHeight() {
+        return height;
+    }
+
+    public void setCacheFile(File file) {
+        cacheFile = file;
+    }
+
+    public void setRotation(int angle) {
+        if (angle == 0) {
+            matrix = Matrix.ROTATE_0;
+        } else if (angle == 90) {
+            matrix = Matrix.ROTATE_90;
+        } else if (angle == 180) {
+            matrix = Matrix.ROTATE_180;
+        } else if (angle == 270) {
+            matrix = Matrix.ROTATE_270;
+        }
+    }
+
+    public void setSize(int w, int h) {
+        width = w;
+        height = h;
+    }
+
+    public ArrayList<Track> getTracks() {
+        return tracks;
+    }
+
+    public File getCacheFile() {
+        return cacheFile;
+    }
+
+    public void addSample(int trackIndex, long offset, MediaCodec.BufferInfo bufferInfo) throws Exception {
+        if (trackIndex < 0 || trackIndex >= tracks.size()) {
+            return;
+        }
+        Track track = tracks.get(trackIndex);
+        track.addSample(offset, bufferInfo);
+    }
+
+    public int addTrack(MediaFormat mediaFormat, boolean isAudio) throws Exception {
+        tracks.add(new Track(tracks.size(), mediaFormat, isAudio));
+        return tracks.size() - 1;
+    }
+}
diff --git a/src/main/java/de/pixart/messenger/utils/video/OutputSurface.java b/src/main/java/de/pixart/messenger/utils/video/OutputSurface.java
new file mode 100644
index 000000000..75ceb46ac
--- /dev/null
+++ b/src/main/java/de/pixart/messenger/utils/video/OutputSurface.java
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package de.pixart.messenger.utils.video;
+
+import android.annotation.TargetApi;
+import android.graphics.SurfaceTexture;
+import android.opengl.GLES20;
+import android.view.Surface;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import javax.microedition.khronos.egl.EGL10;
+import javax.microedition.khronos.egl.EGLConfig;
+import javax.microedition.khronos.egl.EGLContext;
+import javax.microedition.khronos.egl.EGLDisplay;
+import javax.microedition.khronos.egl.EGLSurface;
+
+@TargetApi(16)
+public class OutputSurface implements SurfaceTexture.OnFrameAvailableListener {
+
+    private static final int EGL_OPENGL_ES2_BIT = 4;
+    private static final int EGL_CONTEXT_CLIENT_VERSION = 0x3098;
+    private EGL10 mEGL;
+    private EGLDisplay mEGLDisplay = null;
+    private EGLContext mEGLContext = null;
+    private EGLSurface mEGLSurface = null;
+    private SurfaceTexture mSurfaceTexture;
+    private Surface mSurface;
+    private final Object mFrameSyncObject = new Object();
+    private boolean mFrameAvailable;
+    private TextureRenderer mTextureRender;
+    private int mWidth;
+    private int mHeight;
+    private int rotateRender = 0;
+    private ByteBuffer mPixelBuf;
+
+    public OutputSurface(int width, int height, int rotate) {
+        if (width <= 0 || height <= 0) {
+            throw new IllegalArgumentException();
+        }
+        mWidth = width;
+        mHeight = height;
+        rotateRender = rotate;
+        mPixelBuf = ByteBuffer.allocateDirect(mWidth * mHeight * 4);
+        mPixelBuf.order(ByteOrder.LITTLE_ENDIAN);
+        eglSetup(width, height);
+        makeCurrent();
+        setup();
+    }
+
+    public OutputSurface() {
+        setup();
+    }
+
+    private void setup() {
+        mTextureRender = new TextureRenderer(rotateRender);
+        mTextureRender.surfaceCreated();
+        mSurfaceTexture = new SurfaceTexture(mTextureRender.getTextureId());
+        mSurfaceTexture.setOnFrameAvailableListener(this);
+        mSurface = new Surface(mSurfaceTexture);
+    }
+
+    private void eglSetup(int width, int height) {
+        mEGL = (EGL10) EGLContext.getEGL();
+        mEGLDisplay = mEGL.eglGetDisplay(EGL10.EGL_DEFAULT_DISPLAY);
+
+        if (mEGLDisplay == EGL10.EGL_NO_DISPLAY) {
+            throw new RuntimeException("unable to get EGL10 display");
+        }
+
+        if (!mEGL.eglInitialize(mEGLDisplay, null)) {
+            mEGLDisplay = null;
+            throw new RuntimeException("unable to initialize EGL10");
+        }
+
+        int[] attribList = {
+                EGL10.EGL_RED_SIZE, 8,
+                EGL10.EGL_GREEN_SIZE, 8,
+                EGL10.EGL_BLUE_SIZE, 8,
+                EGL10.EGL_ALPHA_SIZE, 8,
+                EGL10.EGL_SURFACE_TYPE, EGL10.EGL_PBUFFER_BIT,
+                EGL10.EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
+                EGL10.EGL_NONE
+        };
+        EGLConfig[] configs = new EGLConfig[1];
+        int[] numConfigs = new int[1];
+        if (!mEGL.eglChooseConfig(mEGLDisplay, attribList, configs, configs.length, numConfigs)) {
+            throw new RuntimeException("unable to find RGB888+pbuffer EGL config");
+        }
+        int[] attrib_list = {
+                EGL_CONTEXT_CLIENT_VERSION, 2,
+                EGL10.EGL_NONE
+        };
+        mEGLContext = mEGL.eglCreateContext(mEGLDisplay, configs[0], EGL10.EGL_NO_CONTEXT, attrib_list);
+        checkEglError("eglCreateContext");
+        if (mEGLContext == null) {
+            throw new RuntimeException("null context");
+        }
+        int[] surfaceAttribs = {
+                EGL10.EGL_WIDTH, width,
+                EGL10.EGL_HEIGHT, height,
+                EGL10.EGL_NONE
+        };
+        mEGLSurface = mEGL.eglCreatePbufferSurface(mEGLDisplay, configs[0], surfaceAttribs);
+        checkEglError("eglCreatePbufferSurface");
+        if (mEGLSurface == null) {
+            throw new RuntimeException("surface was null");
+        }
+    }
+
+    public void release() {
+        if (mEGL != null) {
+            if (mEGL.eglGetCurrentContext().equals(mEGLContext)) {
+                mEGL.eglMakeCurrent(mEGLDisplay, EGL10.EGL_NO_SURFACE, EGL10.EGL_NO_SURFACE, EGL10.EGL_NO_CONTEXT);
+            }
+            mEGL.eglDestroySurface(mEGLDisplay, mEGLSurface);
+            mEGL.eglDestroyContext(mEGLDisplay, mEGLContext);
+        }
+        mSurface.release();
+        mEGLDisplay = null;
+        mEGLContext = null;
+        mEGLSurface = null;
+        mEGL = null;
+        mTextureRender = null;
+        mSurface = null;
+        mSurfaceTexture = null;
+    }
+
+    public void makeCurrent() {
+        if (mEGL == null) {
+            throw new RuntimeException("not configured for makeCurrent");
+        }
+        checkEglError("before makeCurrent");
+        if (!mEGL.eglMakeCurrent(mEGLDisplay, mEGLSurface, mEGLSurface, mEGLContext)) {
+            throw new RuntimeException("eglMakeCurrent failed");
+        }
+    }
+
+    public Surface getSurface() {
+        return mSurface;
+    }
+
+    public void changeFragmentShader(String fragmentShader) {
+        mTextureRender.changeFragmentShader(fragmentShader);
+    }
+
+    public void awaitNewImage() {
+        final int TIMEOUT_MS = 5000;
+        synchronized (mFrameSyncObject) {
+            while (!mFrameAvailable) {
+                try {
+                    mFrameSyncObject.wait(TIMEOUT_MS);
+                    if (!mFrameAvailable) {
+                        throw new RuntimeException("Surface frame wait timed out");
+                    }
+                } catch (InterruptedException ie) {
+                    throw new RuntimeException(ie);
+                }
+            }
+            mFrameAvailable = false;
+        }
+        mTextureRender.checkGlError("before updateTexImage");
+        mSurfaceTexture.updateTexImage();
+    }
+
+    public void drawImage(boolean invert) {
+        mTextureRender.drawFrame(mSurfaceTexture, invert);
+    }
+
+    @Override
+    public void onFrameAvailable(SurfaceTexture st) {
+        synchronized (mFrameSyncObject) {
+            if (mFrameAvailable) {
+                throw new RuntimeException("mFrameAvailable already set, frame could be dropped");
+            }
+            mFrameAvailable = true;
+            mFrameSyncObject.notifyAll();
+        }
+    }
+
+    public ByteBuffer getFrame() {
+        mPixelBuf.rewind();
+        GLES20.glReadPixels(0, 0, mWidth, mHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, mPixelBuf);
+        return mPixelBuf;
+    }
+
+    private void checkEglError(String msg) {
+        if (mEGL.eglGetError() != EGL10.EGL_SUCCESS) {
+            throw new RuntimeException("EGL error encountered (see log)");
+        }
+    }
+}
diff --git a/src/main/java/de/pixart/messenger/utils/video/Sample.java b/src/main/java/de/pixart/messenger/utils/video/Sample.java
new file mode 100644
index 000000000..f00ee5311
--- /dev/null
+++ b/src/main/java/de/pixart/messenger/utils/video/Sample.java
@@ -0,0 +1,27 @@
+/*
+ * This is the source code of Telegram for Android v. 1.7.x.
+ * It is licensed under GNU GPL v. 2 or later.
+ * You should have received a copy of the license in this archive (see LICENSE).
+ *
+ * Copyright Nikolai Kudashov, 2013-2014.
+ */
+
+package de.pixart.messenger.utils.video;
+
+public class Sample {
+    private long offset = 0;
+    private long size = 0;
+
+    public Sample(long offset, long size) {
+        this.offset = offset;
+        this.size = size;
+    }
+
+    public long getOffset() {
+        return offset;
+    }
+
+    public long getSize() {
+        return size;
+    }
+}
diff --git a/src/main/java/de/pixart/messenger/utils/video/TextureRenderer.java b/src/main/java/de/pixart/messenger/utils/video/TextureRenderer.java
new file mode 100644
index 000000000..aa4542fa3
--- /dev/null
+++ b/src/main/java/de/pixart/messenger/utils/video/TextureRenderer.java
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package de.pixart.messenger.utils.video;
+
+import android.annotation.TargetApi;
+import android.graphics.SurfaceTexture;
+import android.opengl.GLES11Ext;
+import android.opengl.GLES20;
+import android.opengl.Matrix;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.FloatBuffer;
+
+@TargetApi(16)
+public class TextureRenderer {
+
+    private static final int FLOAT_SIZE_BYTES = 4;
+    private static final int TRIANGLE_VERTICES_DATA_STRIDE_BYTES = 5 * FLOAT_SIZE_BYTES;
+    private static final int TRIANGLE_VERTICES_DATA_POS_OFFSET = 0;
+    private static final int TRIANGLE_VERTICES_DATA_UV_OFFSET = 3;
+    private static final float[] mTriangleVerticesData = {
+            -1.0f, -1.0f, 0, 0.f, 0.f,
+            1.0f, -1.0f, 0, 1.f, 0.f,
+            -1.0f, 1.0f, 0, 0.f, 1.f,
+            1.0f, 1.0f, 0, 1.f, 1.f,
+    };
+    private FloatBuffer mTriangleVertices;
+
+    private static final String VERTEX_SHADER =
+            "uniform mat4 uMVPMatrix;\n" +
+            "uniform mat4 uSTMatrix;\n" +
+            "attribute vec4 aPosition;\n" +
+            "attribute vec4 aTextureCoord;\n" +
+            "varying vec2 vTextureCoord;\n" +
+            "void main() {\n" +
+            "  gl_Position = uMVPMatrix * aPosition;\n" +
+            "  vTextureCoord = (uSTMatrix * aTextureCoord).xy;\n" +
+            "}\n";
+
+    private static final String FRAGMENT_SHADER =
+            "#extension GL_OES_EGL_image_external : require\n" +
+            "precision mediump float;\n" +
+            "varying vec2 vTextureCoord;\n" +
+            "uniform samplerExternalOES sTexture;\n" +
+            "void main() {\n" +
+            "  gl_FragColor = texture2D(sTexture, vTextureCoord);\n" +
+            "}\n";
+
+    private float[] mMVPMatrix = new float[16];
+    private float[] mSTMatrix = new float[16];
+    private int mProgram;
+    private int mTextureID = -12345;
+    private int muMVPMatrixHandle;
+    private int muSTMatrixHandle;
+    private int maPositionHandle;
+    private int maTextureHandle;
+    private int rotationAngle = 0;
+
+    public TextureRenderer(int rotation) {
+        rotationAngle = rotation;
+        mTriangleVertices = ByteBuffer.allocateDirect(mTriangleVerticesData.length * FLOAT_SIZE_BYTES).order(ByteOrder.nativeOrder()).asFloatBuffer();
+        mTriangleVertices.put(mTriangleVerticesData).position(0);
+        Matrix.setIdentityM(mSTMatrix, 0);
+    }
+
+    public int getTextureId() {
+        return mTextureID;
+    }
+
+    public void drawFrame(SurfaceTexture st, boolean invert) {
+        checkGlError("onDrawFrame start");
+        st.getTransformMatrix(mSTMatrix);
+
+        if (invert) {
+            mSTMatrix[5] = -mSTMatrix[5];
+            mSTMatrix[13] = 1.0f - mSTMatrix[13];
+        }
+
+        GLES20.glUseProgram(mProgram);
+        checkGlError("glUseProgram");
+        GLES20.glActiveTexture(GLES20.GL_TEXTURE0);
+        GLES20.glBindTexture(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, mTextureID);
+        mTriangleVertices.position(TRIANGLE_VERTICES_DATA_POS_OFFSET);
+        GLES20.glVertexAttribPointer(maPositionHandle, 3, GLES20.GL_FLOAT, false, TRIANGLE_VERTICES_DATA_STRIDE_BYTES, mTriangleVertices);
+        checkGlError("glVertexAttribPointer maPosition");
+        GLES20.glEnableVertexAttribArray(maPositionHandle);
+        checkGlError("glEnableVertexAttribArray maPositionHandle");
+        mTriangleVertices.position(TRIANGLE_VERTICES_DATA_UV_OFFSET);
+        GLES20.glVertexAttribPointer(maTextureHandle, 2, GLES20.GL_FLOAT, false, TRIANGLE_VERTICES_DATA_STRIDE_BYTES, mTriangleVertices);
+        checkGlError("glVertexAttribPointer maTextureHandle");
+        GLES20.glEnableVertexAttribArray(maTextureHandle);
+        checkGlError("glEnableVertexAttribArray maTextureHandle");
+        GLES20.glUniformMatrix4fv(muSTMatrixHandle, 1, false, mSTMatrix, 0);
+        GLES20.glUniformMatrix4fv(muMVPMatrixHandle, 1, false, mMVPMatrix, 0);
+        GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
+        checkGlError("glDrawArrays");
+        GLES20.glFinish();
+    }
+
+    public void surfaceCreated() {
+        mProgram = createProgram(VERTEX_SHADER, FRAGMENT_SHADER);
+        if (mProgram == 0) {
+            throw new RuntimeException("failed creating program");
+        }
+        maPositionHandle = GLES20.glGetAttribLocation(mProgram, "aPosition");
+        checkGlError("glGetAttribLocation aPosition");
+        if (maPositionHandle == -1) {
+            throw new RuntimeException("Could not get attrib location for aPosition");
+        }
+        maTextureHandle = GLES20.glGetAttribLocation(mProgram, "aTextureCoord");
+        checkGlError("glGetAttribLocation aTextureCoord");
+        if (maTextureHandle == -1) {
+            throw new RuntimeException("Could not get attrib location for aTextureCoord");
+        }
+        muMVPMatrixHandle = GLES20.glGetUniformLocation(mProgram, "uMVPMatrix");
+        checkGlError("glGetUniformLocation uMVPMatrix");
+        if (muMVPMatrixHandle == -1) {
+            throw new RuntimeException("Could not get attrib location for uMVPMatrix");
+        }
+        muSTMatrixHandle = GLES20.glGetUniformLocation(mProgram, "uSTMatrix");
+        checkGlError("glGetUniformLocation uSTMatrix");
+        if (muSTMatrixHandle == -1) {
+            throw new RuntimeException("Could not get attrib location for uSTMatrix");
+        }
+        int[] textures = new int[1];
+        GLES20.glGenTextures(1, textures, 0);
+        mTextureID = textures[0];
+        GLES20.glBindTexture(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, mTextureID);
+        checkGlError("glBindTexture mTextureID");
+        GLES20.glTexParameterf(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, GLES20.GL_TEXTURE_MIN_FILTER, GLES20.GL_NEAREST);
+        GLES20.glTexParameterf(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, GLES20.GL_TEXTURE_MAG_FILTER, GLES20.GL_LINEAR);
+        GLES20.glTexParameteri(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, GLES20.GL_TEXTURE_WRAP_S, GLES20.GL_CLAMP_TO_EDGE);
+        GLES20.glTexParameteri(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, GLES20.GL_TEXTURE_WRAP_T, GLES20.GL_CLAMP_TO_EDGE);
+        checkGlError("glTexParameter");
+
+        Matrix.setIdentityM(mMVPMatrix, 0);
+        if (rotationAngle != 0) {
+            Matrix.rotateM(mMVPMatrix, 0, rotationAngle, 0, 0, 1);
+        }
+    }
+
+    public void changeFragmentShader(String fragmentShader) {
+        GLES20.glDeleteProgram(mProgram);
+        mProgram = createProgram(VERTEX_SHADER, fragmentShader);
+        if (mProgram == 0) {
+            throw new RuntimeException("failed creating program");
+        }
+    }
+
+    private int loadShader(int shaderType, String source) {
+        int shader = GLES20.glCreateShader(shaderType);
+        checkGlError("glCreateShader type=" + shaderType);
+        GLES20.glShaderSource(shader, source);
+        GLES20.glCompileShader(shader);
+        int[] compiled = new int[1];
+        GLES20.glGetShaderiv(shader, GLES20.GL_COMPILE_STATUS, compiled, 0);
+        if (compiled[0] == 0) {
+            GLES20.glDeleteShader(shader);
+            shader = 0;
+        }
+        return shader;
+    }
+
+    private int createProgram(String vertexSource, String fragmentSource) {
+        int vertexShader = loadShader(GLES20.GL_VERTEX_SHADER, vertexSource);
+        if (vertexShader == 0) {
+            return 0;
+        }
+        int pixelShader = loadShader(GLES20.GL_FRAGMENT_SHADER, fragmentSource);
+        if (pixelShader == 0) {
+            return 0;
+        }
+        int program = GLES20.glCreateProgram();
+        checkGlError("glCreateProgram");
+        if (program == 0) {
+            return 0;
+        }
+        GLES20.glAttachShader(program, vertexShader);
+        checkGlError("glAttachShader");
+        GLES20.glAttachShader(program, pixelShader);
+        checkGlError("glAttachShader");
+        GLES20.glLinkProgram(program);
+        int[] linkStatus = new int[1];
+        GLES20.glGetProgramiv(program, GLES20.GL_LINK_STATUS, linkStatus, 0);
+        if (linkStatus[0] != GLES20.GL_TRUE) {
+            GLES20.glDeleteProgram(program);
+            program = 0;
+        }
+        return program;
+    }
+
+    public void checkGlError(String op) {
+        int error;
+        if ((error = GLES20.glGetError()) != GLES20.GL_NO_ERROR) {
+            throw new RuntimeException(op + ": glError " + error);
+        }
+    }
+}
diff --git a/src/main/java/de/pixart/messenger/utils/video/Track.java b/src/main/java/de/pixart/messenger/utils/video/Track.java
new file mode 100644
index 000000000..347c65490
--- /dev/null
+++ b/src/main/java/de/pixart/messenger/utils/video/Track.java
@@ -0,0 +1,263 @@
+/*
+ * This is the source code of Telegram for Android v. 1.7.x.
+ * It is licensed under GNU GPL v. 2 or later.
+ * You should have received a copy of the license in this archive (see LICENSE).
+ *
+ * Copyright Nikolai Kudashov, 2013-2014.
+ */
+
+package de.pixart.messenger.utils.video;
+
+import android.annotation.TargetApi;
+import android.media.MediaCodec;
+import android.media.MediaFormat;
+
+import com.coremedia.iso.boxes.AbstractMediaHeaderBox;
+import com.coremedia.iso.boxes.SampleDescriptionBox;
+import com.coremedia.iso.boxes.SoundMediaHeaderBox;
+import com.coremedia.iso.boxes.VideoMediaHeaderBox;
+import com.coremedia.iso.boxes.sampleentry.AudioSampleEntry;
+import com.coremedia.iso.boxes.sampleentry.VisualSampleEntry;
+import com.googlecode.mp4parser.boxes.mp4.ESDescriptorBox;
+import com.googlecode.mp4parser.boxes.mp4.objectdescriptors.AudioSpecificConfig;
+import com.googlecode.mp4parser.boxes.mp4.objectdescriptors.DecoderConfigDescriptor;
+import com.googlecode.mp4parser.boxes.mp4.objectdescriptors.ESDescriptor;
+import com.googlecode.mp4parser.boxes.mp4.objectdescriptors.SLConfigDescriptor;
+import com.mp4parser.iso14496.part15.AvcConfigurationBox;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+
+@TargetApi(16)
+public class Track {
+    private long trackId = 0;
+    private ArrayList<Sample> samples = new ArrayList<Sample>();
+    private long duration = 0;
+    private String handler;
+    private AbstractMediaHeaderBox headerBox = null;
+    private SampleDescriptionBox sampleDescriptionBox = null;
+    private LinkedList<Integer> syncSamples = null;
+    private int timeScale;
+    private Date creationTime = new Date();
+    private int height;
+    private int width;
+    private float volume = 0;
+    private ArrayList<Long> sampleDurations = new ArrayList<Long>();
+    private boolean isAudio = false;
+    private static Map<Integer, Integer> samplingFrequencyIndexMap = new HashMap<Integer, Integer>();
+    private long lastPresentationTimeUs = 0;
+    private boolean first = true;
+
+    static {
+        samplingFrequencyIndexMap.put(96000, 0x0);
+        samplingFrequencyIndexMap.put(88200, 0x1);
+        samplingFrequencyIndexMap.put(64000, 0x2);
+        samplingFrequencyIndexMap.put(48000, 0x3);
+        samplingFrequencyIndexMap.put(44100, 0x4);
+        samplingFrequencyIndexMap.put(32000, 0x5);
+        samplingFrequencyIndexMap.put(24000, 0x6);
+        samplingFrequencyIndexMap.put(22050, 0x7);
+        samplingFrequencyIndexMap.put(16000, 0x8);
+        samplingFrequencyIndexMap.put(12000, 0x9);
+        samplingFrequencyIndexMap.put(11025, 0xa);
+        samplingFrequencyIndexMap.put(8000, 0xb);
+    }
+
+    public Track(int id, MediaFormat format, boolean isAudio) throws Exception {
+        trackId = id;
+        if (!isAudio) {
+            sampleDurations.add((long)3015);
+            duration = 3015;
+            width = format.getInteger(MediaFormat.KEY_WIDTH);
+            height = format.getInteger(MediaFormat.KEY_HEIGHT);
+            timeScale = 90000;
+            syncSamples = new LinkedList<Integer>();
+            handler = "vide";
+            headerBox = new VideoMediaHeaderBox();
+            sampleDescriptionBox = new SampleDescriptionBox();
+            String mime = format.getString(MediaFormat.KEY_MIME);
+            if (mime.equals("video/avc")) {
+                VisualSampleEntry visualSampleEntry = new VisualSampleEntry("avc1");
+                visualSampleEntry.setDataReferenceIndex(1);
+                visualSampleEntry.setDepth(24);
+                visualSampleEntry.setFrameCount(1);
+                visualSampleEntry.setHorizresolution(72);
+                visualSampleEntry.setVertresolution(72);
+                visualSampleEntry.setWidth(width);
+                visualSampleEntry.setHeight(height);
+
+                AvcConfigurationBox avcConfigurationBox = new AvcConfigurationBox();
+
+                if (format.getByteBuffer("csd-0") != null) {
+                    ArrayList<byte[]> spsArray = new ArrayList<byte[]>();
+                    ByteBuffer spsBuff = format.getByteBuffer("csd-0");
+                    spsBuff.position(4);
+                    byte[] spsBytes = new byte[spsBuff.remaining()];
+                    spsBuff.get(spsBytes);
+                    spsArray.add(spsBytes);
+
+                    ArrayList<byte[]> ppsArray = new ArrayList<byte[]>();
+                    ByteBuffer ppsBuff = format.getByteBuffer("csd-1");
+                    ppsBuff.position(4);
+                    byte[] ppsBytes = new byte[ppsBuff.remaining()];
+                    ppsBuff.get(ppsBytes);
+                    ppsArray.add(ppsBytes);
+                    avcConfigurationBox.setSequenceParameterSets(spsArray);
+                    avcConfigurationBox.setPictureParameterSets(ppsArray);
+                }
+                //ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(spsBytes);
+                //SeqParameterSet seqParameterSet = SeqParameterSet.read(byteArrayInputStream);
+
+                avcConfigurationBox.setAvcLevelIndication(13);
+                avcConfigurationBox.setAvcProfileIndication(100);
+                avcConfigurationBox.setBitDepthLumaMinus8(-1);
+                avcConfigurationBox.setBitDepthChromaMinus8(-1);
+                avcConfigurationBox.setChromaFormat(-1);
+                avcConfigurationBox.setConfigurationVersion(1);
+                avcConfigurationBox.setLengthSizeMinusOne(3);
+                avcConfigurationBox.setProfileCompatibility(0);
+
+                visualSampleEntry.addBox(avcConfigurationBox);
+                sampleDescriptionBox.addBox(visualSampleEntry);
+            } else if (mime.equals("video/mp4v")) {
+                VisualSampleEntry visualSampleEntry = new VisualSampleEntry("mp4v");
+                visualSampleEntry.setDataReferenceIndex(1);
+                visualSampleEntry.setDepth(24);
+                visualSampleEntry.setFrameCount(1);
+                visualSampleEntry.setHorizresolution(72);
+                visualSampleEntry.setVertresolution(72);
+                visualSampleEntry.setWidth(width);
+                visualSampleEntry.setHeight(height);
+
+                sampleDescriptionBox.addBox(visualSampleEntry);
+            }
+        } else {
+            sampleDurations.add((long)1024);
+            duration = 1024;
+            isAudio = true;
+            volume = 1;
+            timeScale = format.getInteger(MediaFormat.KEY_SAMPLE_RATE);
+            handler = "soun";
+            headerBox = new SoundMediaHeaderBox();
+            sampleDescriptionBox = new SampleDescriptionBox();
+            AudioSampleEntry audioSampleEntry = new AudioSampleEntry("mp4a");
+            audioSampleEntry.setChannelCount(format.getInteger(MediaFormat.KEY_CHANNEL_COUNT));
+            audioSampleEntry.setSampleRate(format.getInteger(MediaFormat.KEY_SAMPLE_RATE));
+            audioSampleEntry.setDataReferenceIndex(1);
+            audioSampleEntry.setSampleSize(16);
+
+            ESDescriptorBox esds = new ESDescriptorBox();
+            ESDescriptor descriptor = new ESDescriptor();
+            descriptor.setEsId(0);
+
+            SLConfigDescriptor slConfigDescriptor = new SLConfigDescriptor();
+            slConfigDescriptor.setPredefined(2);
+            descriptor.setSlConfigDescriptor(slConfigDescriptor);
+
+            DecoderConfigDescriptor decoderConfigDescriptor = new DecoderConfigDescriptor();
+            decoderConfigDescriptor.setObjectTypeIndication(0x40);
+            decoderConfigDescriptor.setStreamType(5);
+            decoderConfigDescriptor.setBufferSizeDB(1536);
+            decoderConfigDescriptor.setMaxBitRate(96000);
+            decoderConfigDescriptor.setAvgBitRate(96000);
+
+            AudioSpecificConfig audioSpecificConfig = new AudioSpecificConfig();
+            audioSpecificConfig.setAudioObjectType(2);
+            audioSpecificConfig.setSamplingFrequencyIndex(samplingFrequencyIndexMap.get((int)audioSampleEntry.getSampleRate()));
+            audioSpecificConfig.setChannelConfiguration(audioSampleEntry.getChannelCount());
+            decoderConfigDescriptor.setAudioSpecificInfo(audioSpecificConfig);
+
+            descriptor.setDecoderConfigDescriptor(decoderConfigDescriptor);
+
+            ByteBuffer data = descriptor.serialize();
+            esds.setEsDescriptor(descriptor);
+            esds.setData(data);
+            audioSampleEntry.addBox(esds);
+            sampleDescriptionBox.addBox(audioSampleEntry);
+        }
+    }
+
+    public long getTrackId() {
+        return trackId;
+    }
+
+    public void addSample(long offset, MediaCodec.BufferInfo bufferInfo) {
+        boolean isSyncFrame = !isAudio && (bufferInfo.flags & MediaCodec.BUFFER_FLAG_SYNC_FRAME) != 0;
+        samples.add(new Sample(offset, bufferInfo.size));
+        if (syncSamples != null && isSyncFrame) {
+            syncSamples.add(samples.size());
+        }
+
+        long delta = bufferInfo.presentationTimeUs - lastPresentationTimeUs;
+        lastPresentationTimeUs = bufferInfo.presentationTimeUs;
+        delta = (delta * timeScale + 500000L) / 1000000L;
+        if (!first) {
+            sampleDurations.add(sampleDurations.size() - 1, delta);
+            duration += delta;
+        }
+        first = false;
+    }
+
+    public ArrayList<Sample> getSamples() {
+        return samples;
+    }
+
+    public long getDuration() {
+        return duration;
+    }
+
+    public String getHandler() {
+        return handler;
+    }
+
+    public AbstractMediaHeaderBox getMediaHeaderBox() {
+        return headerBox;
+    }
+
+    public SampleDescriptionBox getSampleDescriptionBox() {
+        return sampleDescriptionBox;
+    }
+
+    public long[] getSyncSamples() {
+        if (syncSamples == null || syncSamples.isEmpty()) {
+            return null;
+        }
+        long[] returns = new long[syncSamples.size()];
+        for (int i = 0; i < syncSamples.size(); i++) {
+            returns[i] = syncSamples.get(i);
+        }
+        return returns;
+    }
+
+    public int getTimeScale() {
+        return timeScale;
+    }
+
+    public Date getCreationTime() {
+        return creationTime;
+    }
+
+    public int getWidth() {
+        return width;
+    }
+
+    public int getHeight() {
+        return height;
+    }
+
+    public float getVolume() {
+        return volume;
+    }
+
+    public ArrayList<Long> getSampleDurations() {
+        return sampleDurations;
+    }
+
+    public boolean isAudio() {
+        return isAudio;
+    }
+}
diff --git a/src/main/jni/Android.mk b/src/main/jni/Android.mk
new file mode 100644
index 000000000..f9b84ba96
--- /dev/null
+++ b/src/main/jni/Android.mk
@@ -0,0 +1,400 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD -DWEBP_USE_THREAD -finline-functions -ffast-math -ffunction-sections -fdata-sections -O2
+LOCAL_C_INCLUDES += ./libwebp/src
+LOCAL_ARM_MODE := arm
+LOCAL_STATIC_LIBRARIES := cpufeatures
+LOCAL_MODULE := webp
+
+ifneq ($(findstring armeabi-v7a, $(TARGET_ARCH_ABI)),)
+  # Setting LOCAL_ARM_NEON will enable -mfpu=neon which may cause illegal
+  # instructions to be generated for armv7a code. Instead target the neon code
+  # specifically.
+  NEON := c.neon
+else
+  NEON := c
+endif
+
+LOCAL_SRC_FILES := \
+./libwebp/dec/alpha.c \
+./libwebp/dec/buffer.c \
+./libwebp/dec/frame.c \
+./libwebp/dec/idec.c \
+./libwebp/dec/io.c \
+./libwebp/dec/quant.c \
+./libwebp/dec/tree.c \
+./libwebp/dec/vp8.c \
+./libwebp/dec/vp8l.c \
+./libwebp/dec/webp.c \
+./libwebp/dsp/alpha_processing.c \
+./libwebp/dsp/alpha_processing_sse2.c \
+./libwebp/dsp/cpu.c \
+./libwebp/dsp/dec.c \
+./libwebp/dsp/dec_clip_tables.c \
+./libwebp/dsp/dec_mips32.c \
+./libwebp/dsp/dec_neon.$(NEON) \
+./libwebp/dsp/dec_sse2.c \
+./libwebp/dsp/enc.c \
+./libwebp/dsp/enc_avx2.c \
+./libwebp/dsp/enc_mips32.c \
+./libwebp/dsp/enc_neon.$(NEON) \
+./libwebp/dsp/enc_sse2.c \
+./libwebp/dsp/lossless.c \
+./libwebp/dsp/lossless_mips32.c \
+./libwebp/dsp/lossless_neon.$(NEON) \
+./libwebp/dsp/lossless_sse2.c \
+./libwebp/dsp/upsampling.c \
+./libwebp/dsp/upsampling_neon.$(NEON) \
+./libwebp/dsp/upsampling_sse2.c \
+./libwebp/dsp/yuv.c \
+./libwebp/dsp/yuv_mips32.c \
+./libwebp/dsp/yuv_sse2.c \
+./libwebp/enc/alpha.c \
+./libwebp/enc/analysis.c \
+./libwebp/enc/backward_references.c \
+./libwebp/enc/config.c \
+./libwebp/enc/cost.c \
+./libwebp/enc/filter.c \
+./libwebp/enc/frame.c \
+./libwebp/enc/histogram.c \
+./libwebp/enc/iterator.c \
+./libwebp/enc/picture.c \
+./libwebp/enc/picture_csp.c \
+./libwebp/enc/picture_psnr.c \
+./libwebp/enc/picture_rescale.c \
+./libwebp/enc/picture_tools.c \
+./libwebp/enc/quant.c \
+./libwebp/enc/syntax.c \
+./libwebp/enc/token.c \
+./libwebp/enc/tree.c \
+./libwebp/enc/vp8l.c \
+./libwebp/enc/webpenc.c \
+./libwebp/utils/bit_reader.c \
+./libwebp/utils/bit_writer.c \
+./libwebp/utils/color_cache.c \
+./libwebp/utils/filters.c \
+./libwebp/utils/huffman.c \
+./libwebp/utils/huffman_encode.c \
+./libwebp/utils/quant_levels.c \
+./libwebp/utils/quant_levels_dec.c \
+./libwebp/utils/random.c \
+./libwebp/utils/rescaler.c \
+./libwebp/utils/thread.c \
+./libwebp/utils/utils.c \
+
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+ifeq ($(TARGET_ARCH_ABI),armeabi)
+	LOCAL_ARM_MODE  := thumb
+else
+	LOCAL_ARM_MODE  := arm
+endif
+LOCAL_MODULE := sqlite
+LOCAL_CFLAGS 	:= -w -std=gnu99 -O2 -DNULL=0 -DSOCKLEN_T=socklen_t -DLOCALE_NOT_USED -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
+LOCAL_CFLAGS 	+= -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -DHAVE_STRCHRNUL=0
+
+LOCAL_SRC_FILES     := \
+./sqlite/sqlite3.c
+
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+LOCAL_PRELINK_MODULE := false
+LOCAL_STATIC_LIBRARIES := webp sqlite
+LOCAL_MODULE 	:= tmessages.7
+LOCAL_CFLAGS 	:= -w -std=gnu99 -O2 -DNULL=0 -DSOCKLEN_T=socklen_t -DLOCALE_NOT_USED -D_LARGEFILE_SOURCE=1 -D_FILE_OFFSET_BITS=64
+LOCAL_CFLAGS 	+= -Drestrict='' -D__EMX__ -DOPUS_BUILD -DFIXED_POINT -DUSE_ALLOCA -DHAVE_LRINT -DHAVE_LRINTF -fno-math-errno
+LOCAL_CFLAGS 	+= -DANDROID_NDK -DDISABLE_IMPORTGL -fno-strict-aliasing -fprefetch-loop-arrays -DAVOID_TABLES -DANDROID_TILE_BASED_DECODE -DANDROID_ARMV6_IDCT -ffast-math
+LOCAL_CPPFLAGS 	:= -DBSD=1 -ffast-math -O2 -funroll-loops
+LOCAL_LDLIBS 	:= -ljnigraphics -llog
+ifeq ($(TARGET_ARCH_ABI),armeabi)
+	LOCAL_ARM_MODE  := thumb
+else
+	LOCAL_ARM_MODE  := arm
+endif
+
+LOCAL_SRC_FILES     := \
+./opus/src/opus.c \
+./opus/src/opus_decoder.c \
+./opus/src/opus_encoder.c \
+./opus/src/opus_multistream.c \
+./opus/src/opus_multistream_encoder.c \
+./opus/src/opus_multistream_decoder.c \
+./opus/src/repacketizer.c \
+./opus/src/analysis.c \
+./opus/src/mlp.c \
+./opus/src/mlp_data.c
+
+LOCAL_SRC_FILES     += \
+./opus/silk/CNG.c \
+./opus/silk/code_signs.c \
+./opus/silk/init_decoder.c \
+./opus/silk/decode_core.c \
+./opus/silk/decode_frame.c \
+./opus/silk/decode_parameters.c \
+./opus/silk/decode_indices.c \
+./opus/silk/decode_pulses.c \
+./opus/silk/decoder_set_fs.c \
+./opus/silk/dec_API.c \
+./opus/silk/enc_API.c \
+./opus/silk/encode_indices.c \
+./opus/silk/encode_pulses.c \
+./opus/silk/gain_quant.c \
+./opus/silk/interpolate.c \
+./opus/silk/LP_variable_cutoff.c \
+./opus/silk/NLSF_decode.c \
+./opus/silk/NSQ.c \
+./opus/silk/NSQ_del_dec.c \
+./opus/silk/PLC.c \
+./opus/silk/shell_coder.c \
+./opus/silk/tables_gain.c \
+./opus/silk/tables_LTP.c \
+./opus/silk/tables_NLSF_CB_NB_MB.c \
+./opus/silk/tables_NLSF_CB_WB.c \
+./opus/silk/tables_other.c \
+./opus/silk/tables_pitch_lag.c \
+./opus/silk/tables_pulses_per_block.c \
+./opus/silk/VAD.c \
+./opus/silk/control_audio_bandwidth.c \
+./opus/silk/quant_LTP_gains.c \
+./opus/silk/VQ_WMat_EC.c \
+./opus/silk/HP_variable_cutoff.c \
+./opus/silk/NLSF_encode.c \
+./opus/silk/NLSF_VQ.c \
+./opus/silk/NLSF_unpack.c \
+./opus/silk/NLSF_del_dec_quant.c \
+./opus/silk/process_NLSFs.c \
+./opus/silk/stereo_LR_to_MS.c \
+./opus/silk/stereo_MS_to_LR.c \
+./opus/silk/check_control_input.c \
+./opus/silk/control_SNR.c \
+./opus/silk/init_encoder.c \
+./opus/silk/control_codec.c \
+./opus/silk/A2NLSF.c \
+./opus/silk/ana_filt_bank_1.c \
+./opus/silk/biquad_alt.c \
+./opus/silk/bwexpander_32.c \
+./opus/silk/bwexpander.c \
+./opus/silk/debug.c \
+./opus/silk/decode_pitch.c \
+./opus/silk/inner_prod_aligned.c \
+./opus/silk/lin2log.c \
+./opus/silk/log2lin.c \
+./opus/silk/LPC_analysis_filter.c \
+./opus/silk/LPC_inv_pred_gain.c \
+./opus/silk/table_LSF_cos.c \
+./opus/silk/NLSF2A.c \
+./opus/silk/NLSF_stabilize.c \
+./opus/silk/NLSF_VQ_weights_laroia.c \
+./opus/silk/pitch_est_tables.c \
+./opus/silk/resampler.c \
+./opus/silk/resampler_down2_3.c \
+./opus/silk/resampler_down2.c \
+./opus/silk/resampler_private_AR2.c \
+./opus/silk/resampler_private_down_FIR.c \
+./opus/silk/resampler_private_IIR_FIR.c \
+./opus/silk/resampler_private_up2_HQ.c \
+./opus/silk/resampler_rom.c \
+./opus/silk/sigm_Q15.c \
+./opus/silk/sort.c \
+./opus/silk/sum_sqr_shift.c \
+./opus/silk/stereo_decode_pred.c \
+./opus/silk/stereo_encode_pred.c \
+./opus/silk/stereo_find_predictor.c \
+./opus/silk/stereo_quant_pred.c
+
+LOCAL_SRC_FILES     += \
+./opus/silk/fixed/LTP_analysis_filter_FIX.c \
+./opus/silk/fixed/LTP_scale_ctrl_FIX.c \
+./opus/silk/fixed/corrMatrix_FIX.c \
+./opus/silk/fixed/encode_frame_FIX.c \
+./opus/silk/fixed/find_LPC_FIX.c \
+./opus/silk/fixed/find_LTP_FIX.c \
+./opus/silk/fixed/find_pitch_lags_FIX.c \
+./opus/silk/fixed/find_pred_coefs_FIX.c \
+./opus/silk/fixed/noise_shape_analysis_FIX.c \
+./opus/silk/fixed/prefilter_FIX.c \
+./opus/silk/fixed/process_gains_FIX.c \
+./opus/silk/fixed/regularize_correlations_FIX.c \
+./opus/silk/fixed/residual_energy16_FIX.c \
+./opus/silk/fixed/residual_energy_FIX.c \
+./opus/silk/fixed/solve_LS_FIX.c \
+./opus/silk/fixed/warped_autocorrelation_FIX.c \
+./opus/silk/fixed/apply_sine_window_FIX.c \
+./opus/silk/fixed/autocorr_FIX.c \
+./opus/silk/fixed/burg_modified_FIX.c \
+./opus/silk/fixed/k2a_FIX.c \
+./opus/silk/fixed/k2a_Q16_FIX.c \
+./opus/silk/fixed/pitch_analysis_core_FIX.c \
+./opus/silk/fixed/vector_ops_FIX.c \
+./opus/silk/fixed/schur64_FIX.c \
+./opus/silk/fixed/schur_FIX.c
+
+LOCAL_SRC_FILES     += \
+./opus/celt/bands.c \
+./opus/celt/celt.c \
+./opus/celt/celt_encoder.c \
+./opus/celt/celt_decoder.c \
+./opus/celt/cwrs.c \
+./opus/celt/entcode.c \
+./opus/celt/entdec.c \
+./opus/celt/entenc.c \
+./opus/celt/kiss_fft.c \
+./opus/celt/laplace.c \
+./opus/celt/mathops.c \
+./opus/celt/mdct.c \
+./opus/celt/modes.c \
+./opus/celt/pitch.c \
+./opus/celt/celt_lpc.c \
+./opus/celt/quant_bands.c \
+./opus/celt/rate.c \
+./opus/celt/vq.c \
+./opus/celt/arm/armcpu.c \
+./opus/celt/arm/arm_celt_map.c
+
+LOCAL_SRC_FILES     += \
+./opus/ogg/bitwise.c \
+./opus/ogg/framing.c \
+./opus/opusfile/info.c \
+./opus/opusfile/internal.c \
+./opus/opusfile/opusfile.c \
+./opus/opusfile/stream.c
+
+LOCAL_SRC_FILES     += \
+./giflib/dgif_lib.c \
+./giflib/gifalloc.c
+
+LOCAL_SRC_FILES     += \
+./aes/aes_ige.c \
+./aes/aes_misc.c
+
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+    LOCAL_SRC_FILES += ./aes/aes_arm.S
+else
+    ifeq ($(TARGET_ARCH_ABI),armeabi)
+        LOCAL_SRC_FILES += ./aes/aes_arm.S
+    else
+        ifeq ($(TARGET_ARCH_ABI),x86)
+            LOCAL_SRC_FILES += ./aes/aes_core.c
+        else
+            LOCAL_SRC_FILES += ./aes/aes_core.c
+        endif
+    endif
+endif
+
+LOCAL_C_INCLUDES    := \
+./opus/include \
+./opus/silk \
+./opus/silk/fixed \
+./opus/celt \
+./opus/ \
+./opus/opusfile \
+./libyuv/include
+
+LOCAL_SRC_FILES     += \
+./libjpeg/jcapimin.c \
+./libjpeg/jcapistd.c \
+./libjpeg/armv6_idct.S \
+./libjpeg/jccoefct.c \
+./libjpeg/jccolor.c \
+./libjpeg/jcdctmgr.c \
+./libjpeg/jchuff.c \
+./libjpeg/jcinit.c \
+./libjpeg/jcmainct.c \
+./libjpeg/jcmarker.c \
+./libjpeg/jcmaster.c \
+./libjpeg/jcomapi.c \
+./libjpeg/jcparam.c \
+./libjpeg/jcphuff.c \
+./libjpeg/jcprepct.c \
+./libjpeg/jcsample.c \
+./libjpeg/jctrans.c \
+./libjpeg/jdapimin.c \
+./libjpeg/jdapistd.c \
+./libjpeg/jdatadst.c \
+./libjpeg/jdatasrc.c \
+./libjpeg/jdcoefct.c \
+./libjpeg/jdcolor.c \
+./libjpeg/jddctmgr.c \
+./libjpeg/jdhuff.c \
+./libjpeg/jdinput.c \
+./libjpeg/jdmainct.c \
+./libjpeg/jdmarker.c \
+./libjpeg/jdmaster.c \
+./libjpeg/jdmerge.c \
+./libjpeg/jdphuff.c \
+./libjpeg/jdpostct.c \
+./libjpeg/jdsample.c \
+./libjpeg/jdtrans.c \
+./libjpeg/jerror.c \
+./libjpeg/jfdctflt.c \
+./libjpeg/jfdctfst.c \
+./libjpeg/jfdctint.c \
+./libjpeg/jidctflt.c \
+./libjpeg/jidctfst.c \
+./libjpeg/jidctint.c \
+./libjpeg/jidctred.c \
+./libjpeg/jmemmgr.c \
+./libjpeg/jmemnobs.c \
+./libjpeg/jquant1.c \
+./libjpeg/jquant2.c \
+./libjpeg/jutils.c
+
+LOCAL_SRC_FILES     += \
+./libyuv/source/compare_common.cc \
+./libyuv/source/compare_neon.cc \
+./libyuv/source/compare_posix.cc \
+./libyuv/source/compare_win.cc \
+./libyuv/source/compare.cc \
+./libyuv/source/convert_argb.cc \
+./libyuv/source/convert_from_argb.cc \
+./libyuv/source/convert_from.cc \
+./libyuv/source/convert_jpeg.cc \
+./libyuv/source/convert_to_argb.cc \
+./libyuv/source/convert_to_i420.cc \
+./libyuv/source/convert.cc \
+./libyuv/source/cpu_id.cc \
+./libyuv/source/format_conversion.cc \
+./libyuv/source/mjpeg_decoder.cc \
+./libyuv/source/mjpeg_validate.cc \
+./libyuv/source/planar_functions.cc \
+./libyuv/source/rotate_argb.cc \
+./libyuv/source/rotate_mips.cc \
+./libyuv/source/rotate_neon.cc \
+./libyuv/source/rotate_neon64.cc \
+./libyuv/source/rotate.cc \
+./libyuv/source/row_any.cc \
+./libyuv/source/row_common.cc \
+./libyuv/source/row_mips.cc \
+./libyuv/source/row_neon.cc \
+./libyuv/source/row_neon64.cc \
+./libyuv/source/row_posix.cc \
+./libyuv/source/row_win.cc \
+./libyuv/source/scale_argb.cc \
+./libyuv/source/scale_common.cc \
+./libyuv/source/scale_mips.cc \
+./libyuv/source/scale_neon.cc \
+./libyuv/source/scale_neon64.cc \
+./libyuv/source/scale_posix.cc \
+./libyuv/source/scale_win.cc \
+./libyuv/source/scale.cc \
+./libyuv/source/video_common.cc
+
+LOCAL_SRC_FILES     += \
+./jni.c \
+./sqlite_cursor.c \
+./sqlite_database.c \
+./sqlite_statement.c \
+./sqlite.c \
+./audio.c \
+./gif.c \
+./utils.c \
+./image.c \
+./video.c
+
+include $(BUILD_SHARED_LIBRARY)
+
+$(call import-module,android/cpufeatures)
+\ No newline at end of file
diff --git a/src/main/jni/Application.mk b/src/main/jni/Application.mk
new file mode 100644
index 000000000..61a377ba9
--- /dev/null
+++ b/src/main/jni/Application.mk
@@ -0,0 +1,2 @@
+APP_PLATFORM := android-9
+APP_ABI := armeabi armeabi-v7a x86
+\ No newline at end of file
diff --git a/src/main/jni/aes/aes.h b/src/main/jni/aes/aes.h
new file mode 100644
index 000000000..9eb0f69b1
--- /dev/null
+++ b/src/main/jni/aes/aes.h
@@ -0,0 +1,147 @@
+/* crypto/aes/aes.h -*- mode:C; c-file-style: "eay" -*- */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#ifndef HEADER_AES_H
+#define HEADER_AES_H
+
+//#include <openssl/opensslconf.h>
+
+#ifdef OPENSSL_NO_AES
+#error AES is disabled.
+#endif
+
+#include <stddef.h>
+
+#define AES_ENCRYPT	1
+#define AES_DECRYPT	0
+
+/* Because array size can't be a const in C, the following two are macros.
+   Both sizes are in bytes. */
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/* This should be a hidden type, but EVP requires that the size be known */
+struct aes_key_st {
+#ifdef AES_LONG
+    unsigned long rd_key[4 *(AES_MAXNR + 1)];
+#else
+    unsigned int rd_key[4 *(AES_MAXNR + 1)];
+#endif
+    int rounds;
+};
+typedef struct aes_key_st AES_KEY;
+
+const char *AES_options(void);
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+	AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+	AES_KEY *key);
+
+int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+	AES_KEY *key);
+int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+	AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+	const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+	const AES_KEY *key);
+
+void AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
+	const AES_KEY *key, const int enc);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+	size_t length, const AES_KEY *key,
+	unsigned char *ivec, const int enc);
+void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+	size_t length, const AES_KEY *key,
+	unsigned char *ivec, int *num, const int enc);
+void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out,
+	size_t length, const AES_KEY *key,
+	unsigned char *ivec, int *num, const int enc);
+void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
+	size_t length, const AES_KEY *key,
+	unsigned char *ivec, int *num, const int enc);
+void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out,
+	size_t length, const AES_KEY *key,
+	unsigned char *ivec, int *num);
+void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+	size_t length, const AES_KEY *key,
+	unsigned char ivec[AES_BLOCK_SIZE],
+	unsigned char ecount_buf[AES_BLOCK_SIZE],
+	unsigned int *num);
+/* NB: the IV is _two_ blocks long */
+void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
+		     size_t length, const AES_KEY *key,
+		     unsigned char *ivec, const int enc);
+/* NB: the IV is _four_ blocks long */
+void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
+			size_t length, const AES_KEY *key,
+			const AES_KEY *key2, const unsigned char *ivec,
+			const int enc);
+
+int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
+		unsigned char *out,
+		const unsigned char *in, unsigned int inlen);
+int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
+		unsigned char *out,
+		const unsigned char *in, unsigned int inlen);
+
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* !HEADER_AES_H */
diff --git a/src/main/jni/aes/aes_arm.S b/src/main/jni/aes/aes_arm.S
new file mode 100644
index 000000000..2697d4ce4
--- /dev/null
+++ b/src/main/jni/aes/aes_arm.S
@@ -0,0 +1,1071 @@
+#include "arm_arch.h"
+.text
+.code	32
+
+.type	AES_Te,%object
+.align	5
+AES_Te:
+.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
+.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
+.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
+.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
+.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
+.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
+.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
+.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
+.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
+.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
+.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
+.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
+.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
+.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
+.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
+.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
+.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
+.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
+.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
+.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
+.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
+.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
+.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
+.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
+.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
+.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
+.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
+.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
+.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
+.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
+.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
+.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
+.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
+.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
+.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
+.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
+.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
+.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
+.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
+.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
+.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
+.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
+.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
+.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
+.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
+.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
+.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
+.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
+.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
+.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
+.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
+.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
+.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
+.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
+.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
+.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
+.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
+.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
+.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
+.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
+.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
+.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
+.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
+.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+@ Te4[256]
+.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+@ rcon[]
+.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
+.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
+.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
+.size	AES_Te,.-AES_Te
+
+@ void AES_encrypt(const unsigned char *in, unsigned char *out,
+@ 		 const AES_KEY *key) {
+.global AES_encrypt
+.type   AES_encrypt,%function
+.align	5
+AES_encrypt:
+	sub	r3,pc,#8		@ AES_encrypt
+	stmdb   sp!,{r1,r4-r12,lr}
+	mov	r12,r0		@ inp
+	mov	r11,r2
+	sub	r10,r3,#AES_encrypt-AES_Te	@ Te
+#if __ARM_ARCH__<7
+	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
+	ldrb	r4,[r12,#2]	@ manner...
+	ldrb	r5,[r12,#1]
+	ldrb	r6,[r12,#0]
+	orr	r0,r0,r4,lsl#8
+	ldrb	r1,[r12,#7]
+	orr	r0,r0,r5,lsl#16
+	ldrb	r4,[r12,#6]
+	orr	r0,r0,r6,lsl#24
+	ldrb	r5,[r12,#5]
+	ldrb	r6,[r12,#4]
+	orr	r1,r1,r4,lsl#8
+	ldrb	r2,[r12,#11]
+	orr	r1,r1,r5,lsl#16
+	ldrb	r4,[r12,#10]
+	orr	r1,r1,r6,lsl#24
+	ldrb	r5,[r12,#9]
+	ldrb	r6,[r12,#8]
+	orr	r2,r2,r4,lsl#8
+	ldrb	r3,[r12,#15]
+	orr	r2,r2,r5,lsl#16
+	ldrb	r4,[r12,#14]
+	orr	r2,r2,r6,lsl#24
+	ldrb	r5,[r12,#13]
+	ldrb	r6,[r12,#12]
+	orr	r3,r3,r4,lsl#8
+	orr	r3,r3,r5,lsl#16
+	orr	r3,r3,r6,lsl#24
+#else
+	ldr	r0,[r12,#0]
+	ldr	r1,[r12,#4]
+	ldr	r2,[r12,#8]
+	ldr	r3,[r12,#12]
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+#endif
+	bl	_armv4_AES_encrypt
+
+	ldr	r12,[sp],#4		@ pop out
+#if __ARM_ARCH__>=7
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+	str	r0,[r12,#0]
+	str	r1,[r12,#4]
+	str	r2,[r12,#8]
+	str	r3,[r12,#12]
+#else
+	mov	r4,r0,lsr#24		@ write output in endian-neutral
+	mov	r5,r0,lsr#16		@ manner...
+	mov	r6,r0,lsr#8
+	strb	r4,[r12,#0]
+	strb	r5,[r12,#1]
+	mov	r4,r1,lsr#24
+	strb	r6,[r12,#2]
+	mov	r5,r1,lsr#16
+	strb	r0,[r12,#3]
+	mov	r6,r1,lsr#8
+	strb	r4,[r12,#4]
+	strb	r5,[r12,#5]
+	mov	r4,r2,lsr#24
+	strb	r6,[r12,#6]
+	mov	r5,r2,lsr#16
+	strb	r1,[r12,#7]
+	mov	r6,r2,lsr#8
+	strb	r4,[r12,#8]
+	strb	r5,[r12,#9]
+	mov	r4,r3,lsr#24
+	strb	r6,[r12,#10]
+	mov	r5,r3,lsr#16
+	strb	r2,[r12,#11]
+	mov	r6,r3,lsr#8
+	strb	r4,[r12,#12]
+	strb	r5,[r12,#13]
+	strb	r6,[r12,#14]
+	strb	r3,[r12,#15]
+#endif
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia   sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	AES_encrypt,.-AES_encrypt
+
+.type   _armv4_AES_encrypt,%function
+.align	2
+_armv4_AES_encrypt:
+	str	lr,[sp,#-4]!		@ push lr
+	ldmia	r11!,{r4-r7}
+	eor	r0,r0,r4
+	ldr	r12,[r11,#240-16]
+	eor	r1,r1,r5
+	eor	r2,r2,r6
+	eor	r3,r3,r7
+	sub	r12,r12,#1
+	mov	lr,#255
+
+	and	r7,lr,r0
+	and	r8,lr,r0,lsr#8
+	and	r9,lr,r0,lsr#16
+	mov	r0,r0,lsr#24
+.Lenc_loop:
+	ldr	r4,[r10,r7,lsl#2]	@ Te3[s0>>0]
+	and	r7,lr,r1,lsr#16	@ i0
+	ldr	r5,[r10,r8,lsl#2]	@ Te2[s0>>8]
+	and	r8,lr,r1
+	ldr	r6,[r10,r9,lsl#2]	@ Te1[s0>>16]
+	and	r9,lr,r1,lsr#8
+	ldr	r0,[r10,r0,lsl#2]	@ Te0[s0>>24]
+	mov	r1,r1,lsr#24
+
+	ldr	r7,[r10,r7,lsl#2]	@ Te1[s1>>16]
+	ldr	r8,[r10,r8,lsl#2]	@ Te3[s1>>0]
+	ldr	r9,[r10,r9,lsl#2]	@ Te2[s1>>8]
+	eor	r0,r0,r7,ror#8
+	ldr	r1,[r10,r1,lsl#2]	@ Te0[s1>>24]
+	and	r7,lr,r2,lsr#8	@ i0
+	eor	r5,r5,r8,ror#8
+	and	r8,lr,r2,lsr#16	@ i1
+	eor	r6,r6,r9,ror#8
+	and	r9,lr,r2
+	ldr	r7,[r10,r7,lsl#2]	@ Te2[s2>>8]
+	eor	r1,r1,r4,ror#24
+	ldr	r8,[r10,r8,lsl#2]	@ Te1[s2>>16]
+	mov	r2,r2,lsr#24
+
+	ldr	r9,[r10,r9,lsl#2]	@ Te3[s2>>0]
+	eor	r0,r0,r7,ror#16
+	ldr	r2,[r10,r2,lsl#2]	@ Te0[s2>>24]
+	and	r7,lr,r3		@ i0
+	eor	r1,r1,r8,ror#8
+	and	r8,lr,r3,lsr#8	@ i1
+	eor	r6,r6,r9,ror#16
+	and	r9,lr,r3,lsr#16	@ i2
+	ldr	r7,[r10,r7,lsl#2]	@ Te3[s3>>0]
+	eor	r2,r2,r5,ror#16
+	ldr	r8,[r10,r8,lsl#2]	@ Te2[s3>>8]
+	mov	r3,r3,lsr#24
+
+	ldr	r9,[r10,r9,lsl#2]	@ Te1[s3>>16]
+	eor	r0,r0,r7,ror#24
+	ldr	r7,[r11],#16
+	eor	r1,r1,r8,ror#16
+	ldr	r3,[r10,r3,lsl#2]	@ Te0[s3>>24]
+	eor	r2,r2,r9,ror#8
+	ldr	r4,[r11,#-12]
+	eor	r3,r3,r6,ror#8
+
+	ldr	r5,[r11,#-8]
+	eor	r0,r0,r7
+	ldr	r6,[r11,#-4]
+	and	r7,lr,r0
+	eor	r1,r1,r4
+	and	r8,lr,r0,lsr#8
+	eor	r2,r2,r5
+	and	r9,lr,r0,lsr#16
+	eor	r3,r3,r6
+	mov	r0,r0,lsr#24
+
+	subs	r12,r12,#1
+	bne	.Lenc_loop
+
+	add	r10,r10,#2
+
+	ldrb	r4,[r10,r7,lsl#2]	@ Te4[s0>>0]
+	and	r7,lr,r1,lsr#16	@ i0
+	ldrb	r5,[r10,r8,lsl#2]	@ Te4[s0>>8]
+	and	r8,lr,r1
+	ldrb	r6,[r10,r9,lsl#2]	@ Te4[s0>>16]
+	and	r9,lr,r1,lsr#8
+	ldrb	r0,[r10,r0,lsl#2]	@ Te4[s0>>24]
+	mov	r1,r1,lsr#24
+
+	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s1>>16]
+	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s1>>0]
+	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s1>>8]
+	eor	r0,r7,r0,lsl#8
+	ldrb	r1,[r10,r1,lsl#2]	@ Te4[s1>>24]
+	and	r7,lr,r2,lsr#8	@ i0
+	eor	r5,r8,r5,lsl#8
+	and	r8,lr,r2,lsr#16	@ i1
+	eor	r6,r9,r6,lsl#8
+	and	r9,lr,r2
+	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s2>>8]
+	eor	r1,r4,r1,lsl#24
+	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s2>>16]
+	mov	r2,r2,lsr#24
+
+	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s2>>0]
+	eor	r0,r7,r0,lsl#8
+	ldrb	r2,[r10,r2,lsl#2]	@ Te4[s2>>24]
+	and	r7,lr,r3		@ i0
+	eor	r1,r1,r8,lsl#16
+	and	r8,lr,r3,lsr#8	@ i1
+	eor	r6,r9,r6,lsl#8
+	and	r9,lr,r3,lsr#16	@ i2
+	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s3>>0]
+	eor	r2,r5,r2,lsl#24
+	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s3>>8]
+	mov	r3,r3,lsr#24
+
+	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s3>>16]
+	eor	r0,r7,r0,lsl#8
+	ldr	r7,[r11,#0]
+	ldrb	r3,[r10,r3,lsl#2]	@ Te4[s3>>24]
+	eor	r1,r1,r8,lsl#8
+	ldr	r4,[r11,#4]
+	eor	r2,r2,r9,lsl#16
+	ldr	r5,[r11,#8]
+	eor	r3,r6,r3,lsl#24
+	ldr	r6,[r11,#12]
+
+	eor	r0,r0,r7
+	eor	r1,r1,r4
+	eor	r2,r2,r5
+	eor	r3,r3,r6
+
+	sub	r10,r10,#2
+	ldr	pc,[sp],#4		@ pop and return
+.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
+
+.global private_AES_set_encrypt_key
+.type   private_AES_set_encrypt_key,%function
+.align	5
+private_AES_set_encrypt_key:
+_armv4_AES_set_encrypt_key:
+	sub	r3,pc,#8		@ AES_set_encrypt_key
+	teq	r0,#0
+	moveq	r0,#-1
+	beq	.Labrt
+	teq	r2,#0
+	moveq	r0,#-1
+	beq	.Labrt
+
+	teq	r1,#128
+	beq	.Lok
+	teq	r1,#192
+	beq	.Lok
+	teq	r1,#256
+	movne	r0,#-1
+	bne	.Labrt
+
+.Lok:	stmdb   sp!,{r4-r12,lr}
+	sub	r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
+
+	mov	r12,r0		@ inp
+	mov	lr,r1			@ bits
+	mov	r11,r2			@ key
+
+#if __ARM_ARCH__<7
+	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
+	ldrb	r4,[r12,#2]	@ manner...
+	ldrb	r5,[r12,#1]
+	ldrb	r6,[r12,#0]
+	orr	r0,r0,r4,lsl#8
+	ldrb	r1,[r12,#7]
+	orr	r0,r0,r5,lsl#16
+	ldrb	r4,[r12,#6]
+	orr	r0,r0,r6,lsl#24
+	ldrb	r5,[r12,#5]
+	ldrb	r6,[r12,#4]
+	orr	r1,r1,r4,lsl#8
+	ldrb	r2,[r12,#11]
+	orr	r1,r1,r5,lsl#16
+	ldrb	r4,[r12,#10]
+	orr	r1,r1,r6,lsl#24
+	ldrb	r5,[r12,#9]
+	ldrb	r6,[r12,#8]
+	orr	r2,r2,r4,lsl#8
+	ldrb	r3,[r12,#15]
+	orr	r2,r2,r5,lsl#16
+	ldrb	r4,[r12,#14]
+	orr	r2,r2,r6,lsl#24
+	ldrb	r5,[r12,#13]
+	ldrb	r6,[r12,#12]
+	orr	r3,r3,r4,lsl#8
+	str	r0,[r11],#16
+	orr	r3,r3,r5,lsl#16
+	str	r1,[r11,#-12]
+	orr	r3,r3,r6,lsl#24
+	str	r2,[r11,#-8]
+	str	r3,[r11,#-4]
+#else
+	ldr	r0,[r12,#0]
+	ldr	r1,[r12,#4]
+	ldr	r2,[r12,#8]
+	ldr	r3,[r12,#12]
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+	str	r0,[r11],#16
+	str	r1,[r11,#-12]
+	str	r2,[r11,#-8]
+	str	r3,[r11,#-4]
+#endif
+
+	teq	lr,#128
+	bne	.Lnot128
+	mov	r12,#10
+	str	r12,[r11,#240-16]
+	add	r6,r10,#256			@ rcon
+	mov	lr,#255
+
+.L128_loop:
+	and	r5,lr,r3,lsr#24
+	and	r7,lr,r3,lsr#16
+	ldrb	r5,[r10,r5]
+	and	r8,lr,r3,lsr#8
+	ldrb	r7,[r10,r7]
+	and	r9,lr,r3
+	ldrb	r8,[r10,r8]
+	orr	r5,r5,r7,lsl#24
+	ldrb	r9,[r10,r9]
+	orr	r5,r5,r8,lsl#16
+	ldr	r4,[r6],#4			@ rcon[i++]
+	orr	r5,r5,r9,lsl#8
+	eor	r5,r5,r4
+	eor	r0,r0,r5			@ rk[4]=rk[0]^...
+	eor	r1,r1,r0			@ rk[5]=rk[1]^rk[4]
+	str	r0,[r11],#16
+	eor	r2,r2,r1			@ rk[6]=rk[2]^rk[5]
+	str	r1,[r11,#-12]
+	eor	r3,r3,r2			@ rk[7]=rk[3]^rk[6]
+	str	r2,[r11,#-8]
+	subs	r12,r12,#1
+	str	r3,[r11,#-4]
+	bne	.L128_loop
+	sub	r2,r11,#176
+	b	.Ldone
+
+.Lnot128:
+#if __ARM_ARCH__<7
+	ldrb	r8,[r12,#19]
+	ldrb	r4,[r12,#18]
+	ldrb	r5,[r12,#17]
+	ldrb	r6,[r12,#16]
+	orr	r8,r8,r4,lsl#8
+	ldrb	r9,[r12,#23]
+	orr	r8,r8,r5,lsl#16
+	ldrb	r4,[r12,#22]
+	orr	r8,r8,r6,lsl#24
+	ldrb	r5,[r12,#21]
+	ldrb	r6,[r12,#20]
+	orr	r9,r9,r4,lsl#8
+	orr	r9,r9,r5,lsl#16
+	str	r8,[r11],#8
+	orr	r9,r9,r6,lsl#24
+	str	r9,[r11,#-4]
+#else
+	ldr	r8,[r12,#16]
+	ldr	r9,[r12,#20]
+#ifdef __ARMEL__
+	rev	r8,r8
+	rev	r9,r9
+#endif
+	str	r8,[r11],#8
+	str	r9,[r11,#-4]
+#endif
+
+	teq	lr,#192
+	bne	.Lnot192
+	mov	r12,#12
+	str	r12,[r11,#240-24]
+	add	r6,r10,#256			@ rcon
+	mov	lr,#255
+	mov	r12,#8
+
+.L192_loop:
+	and	r5,lr,r9,lsr#24
+	and	r7,lr,r9,lsr#16
+	ldrb	r5,[r10,r5]
+	and	r8,lr,r9,lsr#8
+	ldrb	r7,[r10,r7]
+	and	r9,lr,r9
+	ldrb	r8,[r10,r8]
+	orr	r5,r5,r7,lsl#24
+	ldrb	r9,[r10,r9]
+	orr	r5,r5,r8,lsl#16
+	ldr	r4,[r6],#4			@ rcon[i++]
+	orr	r5,r5,r9,lsl#8
+	eor	r9,r5,r4
+	eor	r0,r0,r9			@ rk[6]=rk[0]^...
+	eor	r1,r1,r0			@ rk[7]=rk[1]^rk[6]
+	str	r0,[r11],#24
+	eor	r2,r2,r1			@ rk[8]=rk[2]^rk[7]
+	str	r1,[r11,#-20]
+	eor	r3,r3,r2			@ rk[9]=rk[3]^rk[8]
+	str	r2,[r11,#-16]
+	subs	r12,r12,#1
+	str	r3,[r11,#-12]
+	subeq	r2,r11,#216
+	beq	.Ldone
+
+	ldr	r7,[r11,#-32]
+	ldr	r8,[r11,#-28]
+	eor	r7,r7,r3			@ rk[10]=rk[4]^rk[9]
+	eor	r9,r8,r7			@ rk[11]=rk[5]^rk[10]
+	str	r7,[r11,#-8]
+	str	r9,[r11,#-4]
+	b	.L192_loop
+
+.Lnot192:
+#if __ARM_ARCH__<7
+	ldrb	r8,[r12,#27]
+	ldrb	r4,[r12,#26]
+	ldrb	r5,[r12,#25]
+	ldrb	r6,[r12,#24]
+	orr	r8,r8,r4,lsl#8
+	ldrb	r9,[r12,#31]
+	orr	r8,r8,r5,lsl#16
+	ldrb	r4,[r12,#30]
+	orr	r8,r8,r6,lsl#24
+	ldrb	r5,[r12,#29]
+	ldrb	r6,[r12,#28]
+	orr	r9,r9,r4,lsl#8
+	orr	r9,r9,r5,lsl#16
+	str	r8,[r11],#8
+	orr	r9,r9,r6,lsl#24
+	str	r9,[r11,#-4]
+#else
+	ldr	r8,[r12,#24]
+	ldr	r9,[r12,#28]
+#ifdef __ARMEL__
+	rev	r8,r8
+	rev	r9,r9
+#endif
+	str	r8,[r11],#8
+	str	r9,[r11,#-4]
+#endif
+
+	mov	r12,#14
+	str	r12,[r11,#240-32]
+	add	r6,r10,#256			@ rcon
+	mov	lr,#255
+	mov	r12,#7
+
+.L256_loop:
+	and	r5,lr,r9,lsr#24
+	and	r7,lr,r9,lsr#16
+	ldrb	r5,[r10,r5]
+	and	r8,lr,r9,lsr#8
+	ldrb	r7,[r10,r7]
+	and	r9,lr,r9
+	ldrb	r8,[r10,r8]
+	orr	r5,r5,r7,lsl#24
+	ldrb	r9,[r10,r9]
+	orr	r5,r5,r8,lsl#16
+	ldr	r4,[r6],#4			@ rcon[i++]
+	orr	r5,r5,r9,lsl#8
+	eor	r9,r5,r4
+	eor	r0,r0,r9			@ rk[8]=rk[0]^...
+	eor	r1,r1,r0			@ rk[9]=rk[1]^rk[8]
+	str	r0,[r11],#32
+	eor	r2,r2,r1			@ rk[10]=rk[2]^rk[9]
+	str	r1,[r11,#-28]
+	eor	r3,r3,r2			@ rk[11]=rk[3]^rk[10]
+	str	r2,[r11,#-24]
+	subs	r12,r12,#1
+	str	r3,[r11,#-20]
+	subeq	r2,r11,#256
+	beq	.Ldone
+
+	and	r5,lr,r3
+	and	r7,lr,r3,lsr#8
+	ldrb	r5,[r10,r5]
+	and	r8,lr,r3,lsr#16
+	ldrb	r7,[r10,r7]
+	and	r9,lr,r3,lsr#24
+	ldrb	r8,[r10,r8]
+	orr	r5,r5,r7,lsl#8
+	ldrb	r9,[r10,r9]
+	orr	r5,r5,r8,lsl#16
+	ldr	r4,[r11,#-48]
+	orr	r5,r5,r9,lsl#24
+
+	ldr	r7,[r11,#-44]
+	ldr	r8,[r11,#-40]
+	eor	r4,r4,r5			@ rk[12]=rk[4]^...
+	ldr	r9,[r11,#-36]
+	eor	r7,r7,r4			@ rk[13]=rk[5]^rk[12]
+	str	r4,[r11,#-16]
+	eor	r8,r8,r7			@ rk[14]=rk[6]^rk[13]
+	str	r7,[r11,#-12]
+	eor	r9,r9,r8			@ rk[15]=rk[7]^rk[14]
+	str	r8,[r11,#-8]
+	str	r9,[r11,#-4]
+	b	.L256_loop
+
+.Ldone:	mov	r0,#0
+	ldmia   sp!,{r4-r12,lr}
+.Labrt:	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+.size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
+
+.global private_AES_set_decrypt_key
+.type   private_AES_set_decrypt_key,%function
+.align	5
+private_AES_set_decrypt_key:
+	str	lr,[sp,#-4]!            @ push lr
+	bl	_armv4_AES_set_encrypt_key
+	teq	r0,#0
+	ldrne	lr,[sp],#4              @ pop lr
+	bne	.Labrt
+
+	stmdb   sp!,{r4-r12}
+
+	ldr	r12,[r2,#240]	@ AES_set_encrypt_key preserves r2,
+	mov	r11,r2			@ which is AES_KEY *key
+	mov	r7,r2
+	add	r8,r2,r12,lsl#4
+
+.Linv:	ldr	r0,[r7]
+	ldr	r1,[r7,#4]
+	ldr	r2,[r7,#8]
+	ldr	r3,[r7,#12]
+	ldr	r4,[r8]
+	ldr	r5,[r8,#4]
+	ldr	r6,[r8,#8]
+	ldr	r9,[r8,#12]
+	str	r0,[r8],#-16
+	str	r1,[r8,#16+4]
+	str	r2,[r8,#16+8]
+	str	r3,[r8,#16+12]
+	str	r4,[r7],#16
+	str	r5,[r7,#-12]
+	str	r6,[r7,#-8]
+	str	r9,[r7,#-4]
+	teq	r7,r8
+	bne	.Linv
+	ldr	r0,[r11,#16]!		@ prefetch tp1
+	mov	r7,#0x80
+	mov	r8,#0x1b
+	orr	r7,r7,#0x8000
+	orr	r8,r8,#0x1b00
+	orr	r7,r7,r7,lsl#16
+	orr	r8,r8,r8,lsl#16
+	sub	r12,r12,#1
+	mvn	r9,r7
+	mov	r12,r12,lsl#2	@ (rounds-1)*4
+
+.Lmix:	and	r4,r0,r7
+	and	r1,r0,r9
+	sub	r4,r4,r4,lsr#7
+	and	r4,r4,r8
+	eor	r1,r4,r1,lsl#1	@ tp2
+
+	and	r4,r1,r7
+	and	r2,r1,r9
+	sub	r4,r4,r4,lsr#7
+	and	r4,r4,r8
+	eor	r2,r4,r2,lsl#1	@ tp4
+
+	and	r4,r2,r7
+	and	r3,r2,r9
+	sub	r4,r4,r4,lsr#7
+	and	r4,r4,r8
+	eor	r3,r4,r3,lsl#1	@ tp8
+
+	eor	r4,r1,r2
+	eor	r5,r0,r3		@ tp9
+	eor	r4,r4,r3		@ tpe
+	eor	r4,r4,r1,ror#24
+	eor	r4,r4,r5,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
+	eor	r4,r4,r2,ror#16
+	eor	r4,r4,r5,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
+	eor	r4,r4,r5,ror#8	@ ^= ROTATE(tp9,24)
+
+	ldr	r0,[r11,#4]		@ prefetch tp1
+	str	r4,[r11],#4
+	subs	r12,r12,#1
+	bne	.Lmix
+
+	mov	r0,#0
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia   sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+
+.type	AES_Td,%object
+.align	5
+AES_Td:
+.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
+.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
+.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
+.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
+.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
+.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
+.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
+.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
+.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
+.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
+.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
+.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
+.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
+.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
+.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
+.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
+.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
+.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
+.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
+.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
+.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
+.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
+.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
+.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
+.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
+.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
+.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
+.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
+.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
+.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
+.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
+.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
+.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
+.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
+.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
+.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
+.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
+.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
+.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
+.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
+.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
+.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
+.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
+.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
+.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
+.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
+.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
+.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
+.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
+.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
+.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
+.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
+.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
+.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
+.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
+.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
+.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
+.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
+.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
+.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
+.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
+.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
+.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
+.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+@ Td4[256]
+.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+.size	AES_Td,.-AES_Td
+
+@ void AES_decrypt(const unsigned char *in, unsigned char *out,
+@ 		 const AES_KEY *key) {
+.global AES_decrypt
+.type   AES_decrypt,%function
+.align	5
+AES_decrypt:
+	sub	r3,pc,#8		@ AES_decrypt
+	stmdb   sp!,{r1,r4-r12,lr}
+	mov	r12,r0		@ inp
+	mov	r11,r2
+	sub	r10,r3,#AES_decrypt-AES_Td		@ Td
+#if __ARM_ARCH__<7
+	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
+	ldrb	r4,[r12,#2]	@ manner...
+	ldrb	r5,[r12,#1]
+	ldrb	r6,[r12,#0]
+	orr	r0,r0,r4,lsl#8
+	ldrb	r1,[r12,#7]
+	orr	r0,r0,r5,lsl#16
+	ldrb	r4,[r12,#6]
+	orr	r0,r0,r6,lsl#24
+	ldrb	r5,[r12,#5]
+	ldrb	r6,[r12,#4]
+	orr	r1,r1,r4,lsl#8
+	ldrb	r2,[r12,#11]
+	orr	r1,r1,r5,lsl#16
+	ldrb	r4,[r12,#10]
+	orr	r1,r1,r6,lsl#24
+	ldrb	r5,[r12,#9]
+	ldrb	r6,[r12,#8]
+	orr	r2,r2,r4,lsl#8
+	ldrb	r3,[r12,#15]
+	orr	r2,r2,r5,lsl#16
+	ldrb	r4,[r12,#14]
+	orr	r2,r2,r6,lsl#24
+	ldrb	r5,[r12,#13]
+	ldrb	r6,[r12,#12]
+	orr	r3,r3,r4,lsl#8
+	orr	r3,r3,r5,lsl#16
+	orr	r3,r3,r6,lsl#24
+#else
+	ldr	r0,[r12,#0]
+	ldr	r1,[r12,#4]
+	ldr	r2,[r12,#8]
+	ldr	r3,[r12,#12]
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+#endif
+	bl	_armv4_AES_decrypt
+
+	ldr	r12,[sp],#4		@ pop out
+#if __ARM_ARCH__>=7
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+	str	r0,[r12,#0]
+	str	r1,[r12,#4]
+	str	r2,[r12,#8]
+	str	r3,[r12,#12]
+#else
+	mov	r4,r0,lsr#24		@ write output in endian-neutral
+	mov	r5,r0,lsr#16		@ manner...
+	mov	r6,r0,lsr#8
+	strb	r4,[r12,#0]
+	strb	r5,[r12,#1]
+	mov	r4,r1,lsr#24
+	strb	r6,[r12,#2]
+	mov	r5,r1,lsr#16
+	strb	r0,[r12,#3]
+	mov	r6,r1,lsr#8
+	strb	r4,[r12,#4]
+	strb	r5,[r12,#5]
+	mov	r4,r2,lsr#24
+	strb	r6,[r12,#6]
+	mov	r5,r2,lsr#16
+	strb	r1,[r12,#7]
+	mov	r6,r2,lsr#8
+	strb	r4,[r12,#8]
+	strb	r5,[r12,#9]
+	mov	r4,r3,lsr#24
+	strb	r6,[r12,#10]
+	mov	r5,r3,lsr#16
+	strb	r2,[r12,#11]
+	mov	r6,r3,lsr#8
+	strb	r4,[r12,#12]
+	strb	r5,[r12,#13]
+	strb	r6,[r12,#14]
+	strb	r3,[r12,#15]
+#endif
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia   sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	AES_decrypt,.-AES_decrypt
+
+.type   _armv4_AES_decrypt,%function
+.align	2
+_armv4_AES_decrypt:
+	str	lr,[sp,#-4]!		@ push lr
+	ldmia	r11!,{r4-r7}
+	eor	r0,r0,r4
+	ldr	r12,[r11,#240-16]
+	eor	r1,r1,r5
+	eor	r2,r2,r6
+	eor	r3,r3,r7
+	sub	r12,r12,#1
+	mov	lr,#255
+
+	and	r7,lr,r0,lsr#16
+	and	r8,lr,r0,lsr#8
+	and	r9,lr,r0
+	mov	r0,r0,lsr#24
+.Ldec_loop:
+	ldr	r4,[r10,r7,lsl#2]	@ Td1[s0>>16]
+	and	r7,lr,r1		@ i0
+	ldr	r5,[r10,r8,lsl#2]	@ Td2[s0>>8]
+	and	r8,lr,r1,lsr#16
+	ldr	r6,[r10,r9,lsl#2]	@ Td3[s0>>0]
+	and	r9,lr,r1,lsr#8
+	ldr	r0,[r10,r0,lsl#2]	@ Td0[s0>>24]
+	mov	r1,r1,lsr#24
+
+	ldr	r7,[r10,r7,lsl#2]	@ Td3[s1>>0]
+	ldr	r8,[r10,r8,lsl#2]	@ Td1[s1>>16]
+	ldr	r9,[r10,r9,lsl#2]	@ Td2[s1>>8]
+	eor	r0,r0,r7,ror#24
+	ldr	r1,[r10,r1,lsl#2]	@ Td0[s1>>24]
+	and	r7,lr,r2,lsr#8	@ i0
+	eor	r5,r8,r5,ror#8
+	and	r8,lr,r2		@ i1
+	eor	r6,r9,r6,ror#8
+	and	r9,lr,r2,lsr#16
+	ldr	r7,[r10,r7,lsl#2]	@ Td2[s2>>8]
+	eor	r1,r1,r4,ror#8
+	ldr	r8,[r10,r8,lsl#2]	@ Td3[s2>>0]
+	mov	r2,r2,lsr#24
+
+	ldr	r9,[r10,r9,lsl#2]	@ Td1[s2>>16]
+	eor	r0,r0,r7,ror#16
+	ldr	r2,[r10,r2,lsl#2]	@ Td0[s2>>24]
+	and	r7,lr,r3,lsr#16	@ i0
+	eor	r1,r1,r8,ror#24
+	and	r8,lr,r3,lsr#8	@ i1
+	eor	r6,r9,r6,ror#8
+	and	r9,lr,r3		@ i2
+	ldr	r7,[r10,r7,lsl#2]	@ Td1[s3>>16]
+	eor	r2,r2,r5,ror#8
+	ldr	r8,[r10,r8,lsl#2]	@ Td2[s3>>8]
+	mov	r3,r3,lsr#24
+
+	ldr	r9,[r10,r9,lsl#2]	@ Td3[s3>>0]
+	eor	r0,r0,r7,ror#8
+	ldr	r7,[r11],#16
+	eor	r1,r1,r8,ror#16
+	ldr	r3,[r10,r3,lsl#2]	@ Td0[s3>>24]
+	eor	r2,r2,r9,ror#24
+
+	ldr	r4,[r11,#-12]
+	eor	r0,r0,r7
+	ldr	r5,[r11,#-8]
+	eor	r3,r3,r6,ror#8
+	ldr	r6,[r11,#-4]
+	and	r7,lr,r0,lsr#16
+	eor	r1,r1,r4
+	and	r8,lr,r0,lsr#8
+	eor	r2,r2,r5
+	and	r9,lr,r0
+	eor	r3,r3,r6
+	mov	r0,r0,lsr#24
+
+	subs	r12,r12,#1
+	bne	.Ldec_loop
+
+	add	r10,r10,#1024
+
+	ldr	r5,[r10,#0]		@ prefetch Td4
+	ldr	r6,[r10,#32]
+	ldr	r4,[r10,#64]
+	ldr	r5,[r10,#96]
+	ldr	r6,[r10,#128]
+	ldr	r4,[r10,#160]
+	ldr	r5,[r10,#192]
+	ldr	r6,[r10,#224]
+
+	ldrb	r0,[r10,r0]		@ Td4[s0>>24]
+	ldrb	r4,[r10,r7]		@ Td4[s0>>16]
+	and	r7,lr,r1		@ i0
+	ldrb	r5,[r10,r8]		@ Td4[s0>>8]
+	and	r8,lr,r1,lsr#16
+	ldrb	r6,[r10,r9]		@ Td4[s0>>0]
+	and	r9,lr,r1,lsr#8
+
+	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
+	ldrb	r1,[r10,r1,lsr#24]	@ Td4[s1>>24]
+	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
+	eor	r0,r7,r0,lsl#24
+	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
+	eor	r1,r4,r1,lsl#8
+	and	r7,lr,r2,lsr#8	@ i0
+	eor	r5,r5,r8,lsl#8
+	and	r8,lr,r2		@ i1
+	ldrb	r7,[r10,r7]		@ Td4[s2>>8]
+	eor	r6,r6,r9,lsl#8
+	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
+	and	r9,lr,r2,lsr#16
+
+	ldrb	r2,[r10,r2,lsr#24]	@ Td4[s2>>24]
+	eor	r0,r0,r7,lsl#8
+	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
+	eor	r1,r8,r1,lsl#16
+	and	r7,lr,r3,lsr#16	@ i0
+	eor	r2,r5,r2,lsl#16
+	and	r8,lr,r3,lsr#8	@ i1
+	ldrb	r7,[r10,r7]		@ Td4[s3>>16]
+	eor	r6,r6,r9,lsl#16
+	ldrb	r8,[r10,r8]		@ Td4[s3>>8]
+	and	r9,lr,r3		@ i2
+
+	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
+	ldrb	r3,[r10,r3,lsr#24]	@ Td4[s3>>24]
+	eor	r0,r0,r7,lsl#16
+	ldr	r7,[r11,#0]
+	eor	r1,r1,r8,lsl#8
+	ldr	r4,[r11,#4]
+	eor	r2,r9,r2,lsl#8
+	ldr	r5,[r11,#8]
+	eor	r3,r6,r3,lsl#24
+	ldr	r6,[r11,#12]
+
+	eor	r0,r0,r7
+	eor	r1,r1,r4
+	eor	r2,r2,r5
+	eor	r3,r3,r6
+
+	sub	r10,r10,#1024
+	ldr	pc,[sp],#4		@ pop and return
+.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
+.asciz	"AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align	2
diff --git a/src/main/jni/aes/aes_core.c b/src/main/jni/aes/aes_core.c
new file mode 100644
index 000000000..4898bbdfb
--- /dev/null
+++ b/src/main/jni/aes/aes_core.c
@@ -0,0 +1,1358 @@
+/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
+/**
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Note: rewritten a little bit to provide error control and an OpenSSL-
+   compatible API */
+
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+#  define NDEBUG
+# endif
+#endif
+#include <assert.h>
+
+#include <stdlib.h>
+#include "aes.h"
+#include "aes_locl.h"
+
+#ifndef AES_ASM
+/*
+Te0[x] = S [x].[02, 01, 01, 03];
+Te1[x] = S [x].[03, 02, 01, 01];
+Te2[x] = S [x].[01, 03, 02, 01];
+Te3[x] = S [x].[01, 01, 03, 02];
+
+Td0[x] = Si[x].[0e, 09, 0d, 0b];
+Td1[x] = Si[x].[0b, 0e, 09, 0d];
+Td2[x] = Si[x].[0d, 0b, 0e, 09];
+Td3[x] = Si[x].[09, 0d, 0b, 0e];
+Td4[x] = Si[x].[01];
+*/
+
+static const u32 Te0[256] = {
+    0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+    0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+    0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+    0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+    0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+    0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+    0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+    0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+    0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+    0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+    0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+    0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+    0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+    0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+    0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+    0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+    0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+    0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+    0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+    0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+    0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+    0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+    0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+    0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+    0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+    0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+    0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+    0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+    0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+    0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+    0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+    0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+    0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+    0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+    0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+    0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+    0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+    0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+    0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+    0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+    0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+    0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+    0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+    0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+    0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+    0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+    0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+    0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+    0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+    0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+    0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+    0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+    0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+    0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+    0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+    0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+    0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+    0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+    0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+    0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+    0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+    0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+    0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+    0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+static const u32 Te1[256] = {
+    0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+    0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+    0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+    0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+    0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+    0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+    0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+    0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+    0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+    0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+    0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+    0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+    0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+    0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+    0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+    0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+    0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+    0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+    0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+    0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+    0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+    0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+    0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+    0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+    0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+    0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+    0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+    0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+    0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+    0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+    0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+    0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+    0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+    0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+    0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+    0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+    0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+    0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+    0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+    0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+    0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+    0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+    0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+    0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+    0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+    0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+    0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+    0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+    0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+    0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+    0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+    0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+    0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+    0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+    0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+    0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+    0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+    0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+    0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+    0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+    0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+    0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+    0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+    0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+static const u32 Te2[256] = {
+    0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+    0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+    0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+    0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+    0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+    0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+    0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+    0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+    0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+    0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+    0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+    0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+    0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+    0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+    0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+    0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+    0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+    0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+    0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+    0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+    0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+    0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+    0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+    0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+    0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+    0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+    0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+    0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+    0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+    0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+    0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+    0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+    0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+    0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+    0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+    0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+    0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+    0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+    0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+    0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+    0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+    0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+    0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+    0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+    0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+    0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+    0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+    0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+    0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+    0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+    0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+    0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+    0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+    0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+    0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+    0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+    0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+    0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+    0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+    0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+    0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+    0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+    0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+    0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+static const u32 Te3[256] = {
+    0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+    0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+    0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+    0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+    0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+    0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+    0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+    0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+    0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+    0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+    0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+    0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+    0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+    0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+    0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+    0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+    0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+    0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+    0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+    0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+    0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+    0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+    0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+    0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+    0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+    0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+    0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+    0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+    0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+    0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+    0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+    0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+    0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+    0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+    0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+    0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+    0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+    0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+    0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+    0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+    0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+    0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+    0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+    0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+    0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+    0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+    0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+    0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+    0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+    0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+    0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+    0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+    0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+    0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+    0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+    0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+    0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+    0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+    0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+    0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+    0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+    0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+    0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+    0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+
+static const u32 Td0[256] = {
+    0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+    0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+    0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+    0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+    0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+    0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+    0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+    0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+    0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+    0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+    0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+    0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+    0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+    0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+    0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+    0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+    0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+    0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+    0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+    0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+    0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+    0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+    0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+    0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+    0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+    0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+    0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+    0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+    0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+    0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+    0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+    0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+    0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+    0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+    0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+    0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+    0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+    0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+    0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+    0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+    0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+    0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+    0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+    0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+    0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+    0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+    0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+    0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+    0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+    0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+    0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+    0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+    0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+    0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+    0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+    0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+    0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+    0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+    0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+    0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+    0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+    0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+    0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+    0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+};
+static const u32 Td1[256] = {
+    0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+    0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+    0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+    0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+    0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+    0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+    0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+    0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+    0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+    0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+    0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+    0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+    0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+    0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+    0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+    0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+    0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+    0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+    0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+    0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+    0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+    0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+    0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+    0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+    0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+    0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+    0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+    0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+    0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+    0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+    0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+    0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+    0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+    0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+    0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+    0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+    0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+    0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+    0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+    0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+    0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+    0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+    0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+    0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+    0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+    0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+    0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+    0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+    0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+    0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+    0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+    0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+    0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+    0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+    0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+    0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+    0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+    0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+    0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+    0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+    0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+    0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+    0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+    0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+};
+static const u32 Td2[256] = {
+    0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+    0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+    0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+    0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+    0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+    0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+    0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+    0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+    0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+    0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+    0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+    0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+    0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+    0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+    0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+    0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+    0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+    0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+    0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+    0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+    0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+    0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+    0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+    0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+    0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+    0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+    0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+    0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+    0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+    0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+    0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+    0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+    0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+    0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+    0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+    0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+    0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+    0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+    0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+    0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+    0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+    0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+    0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+    0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+    0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+    0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+    0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+    0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+    0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+    0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+    0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+    0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+    0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+    0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+    0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+    0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+    0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+    0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+    0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+    0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+    0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+    0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+    0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+    0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+};
+static const u32 Td3[256] = {
+    0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+    0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+    0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+    0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+    0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+    0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+    0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+    0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+    0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+    0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+    0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+    0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+    0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+    0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+    0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+    0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+    0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+    0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+    0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+    0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+    0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+    0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+    0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+    0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+    0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+    0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+    0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+    0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+    0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+    0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+    0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+    0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+    0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+    0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+    0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+    0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+    0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+    0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+    0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+    0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+    0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+    0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+    0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+    0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+    0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+    0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+    0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+    0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+    0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+    0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+    0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+    0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+    0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+    0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+    0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+    0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+    0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+    0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+    0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+    0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+    0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+    0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+    0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+    0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+static const u8 Td4[256] = {
+    0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+    0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+    0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+    0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+    0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+    0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+    0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+    0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+    0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+    0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+    0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+    0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+    0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+    0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+    0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+    0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+    0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+    0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+    0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+    0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+    0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+    0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+    0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+    0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+    0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+    0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+    0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+    0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+    0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+    0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+    0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+    0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
+};
+static const u32 rcon[] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000,
+	0x10000000, 0x20000000, 0x40000000, 0x80000000,
+	0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+			AES_KEY *key) {
+
+	u32 *rk;
+   	int i = 0;
+	u32 temp;
+
+	if (!userKey || !key)
+		return -1;
+	if (bits != 128 && bits != 192 && bits != 256)
+		return -2;
+
+	rk = key->rd_key;
+
+	if (bits==128)
+		key->rounds = 10;
+	else if (bits==192)
+		key->rounds = 12;
+	else
+		key->rounds = 14;
+
+	rk[0] = GETU32(userKey     );
+	rk[1] = GETU32(userKey +  4);
+	rk[2] = GETU32(userKey +  8);
+	rk[3] = GETU32(userKey + 12);
+	if (bits == 128) {
+		while (1) {
+			temp  = rk[3];
+			rk[4] = rk[0] ^
+				(Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+				(Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
+				(Te0[(temp      ) & 0xff] & 0x0000ff00) ^
+				(Te1[(temp >> 24)       ] & 0x000000ff) ^
+				rcon[i];
+			rk[5] = rk[1] ^ rk[4];
+			rk[6] = rk[2] ^ rk[5];
+			rk[7] = rk[3] ^ rk[6];
+			if (++i == 10) {
+				return 0;
+			}
+			rk += 4;
+		}
+	}
+	rk[4] = GETU32(userKey + 16);
+	rk[5] = GETU32(userKey + 20);
+	if (bits == 192) {
+		while (1) {
+			temp = rk[ 5];
+			rk[ 6] = rk[ 0] ^
+				(Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+				(Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
+				(Te0[(temp      ) & 0xff] & 0x0000ff00) ^
+				(Te1[(temp >> 24)       ] & 0x000000ff) ^
+				rcon[i];
+			rk[ 7] = rk[ 1] ^ rk[ 6];
+			rk[ 8] = rk[ 2] ^ rk[ 7];
+			rk[ 9] = rk[ 3] ^ rk[ 8];
+			if (++i == 8) {
+				return 0;
+			}
+			rk[10] = rk[ 4] ^ rk[ 9];
+			rk[11] = rk[ 5] ^ rk[10];
+			rk += 6;
+		}
+	}
+	rk[6] = GETU32(userKey + 24);
+	rk[7] = GETU32(userKey + 28);
+	if (bits == 256) {
+		while (1) {
+			temp = rk[ 7];
+			rk[ 8] = rk[ 0] ^
+				(Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+				(Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
+				(Te0[(temp      ) & 0xff] & 0x0000ff00) ^
+				(Te1[(temp >> 24)       ] & 0x000000ff) ^
+				rcon[i];
+			rk[ 9] = rk[ 1] ^ rk[ 8];
+			rk[10] = rk[ 2] ^ rk[ 9];
+			rk[11] = rk[ 3] ^ rk[10];
+			if (++i == 7) {
+				return 0;
+			}
+			temp = rk[11];
+			rk[12] = rk[ 4] ^
+				(Te2[(temp >> 24)       ] & 0xff000000) ^
+				(Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
+				(Te0[(temp >>  8) & 0xff] & 0x0000ff00) ^
+				(Te1[(temp      ) & 0xff] & 0x000000ff);
+			rk[13] = rk[ 5] ^ rk[12];
+			rk[14] = rk[ 6] ^ rk[13];
+			rk[15] = rk[ 7] ^ rk[14];
+
+			rk += 8;
+        	}
+	}
+	return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+			 AES_KEY *key) {
+
+        u32 *rk;
+	int i, j, status;
+	u32 temp;
+
+	/* first, start with an encryption schedule */
+	status = private_AES_set_encrypt_key(userKey, bits, key);
+	if (status < 0)
+		return status;
+
+	rk = key->rd_key;
+
+	/* invert the order of the round keys: */
+	for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
+		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+	}
+	/* apply the inverse MixColumn transform to all round keys but the first and the last: */
+	for (i = 1; i < (key->rounds); i++) {
+		rk += 4;
+		rk[0] =
+			Td0[Te1[(rk[0] >> 24)       ] & 0xff] ^
+			Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
+			Td2[Te1[(rk[0] >>  8) & 0xff] & 0xff] ^
+			Td3[Te1[(rk[0]      ) & 0xff] & 0xff];
+		rk[1] =
+			Td0[Te1[(rk[1] >> 24)       ] & 0xff] ^
+			Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
+			Td2[Te1[(rk[1] >>  8) & 0xff] & 0xff] ^
+			Td3[Te1[(rk[1]      ) & 0xff] & 0xff];
+		rk[2] =
+			Td0[Te1[(rk[2] >> 24)       ] & 0xff] ^
+			Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
+			Td2[Te1[(rk[2] >>  8) & 0xff] & 0xff] ^
+			Td3[Te1[(rk[2]      ) & 0xff] & 0xff];
+		rk[3] =
+			Td0[Te1[(rk[3] >> 24)       ] & 0xff] ^
+			Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
+			Td2[Te1[(rk[3] >>  8) & 0xff] & 0xff] ^
+			Td3[Te1[(rk[3]      ) & 0xff] & 0xff];
+	}
+	return 0;
+}
+
+/*
+ * Encrypt a single block
+ * in and out can overlap
+ */
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+		 const AES_KEY *key) {
+
+	const u32 *rk;
+	u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+	int r;
+#endif /* ?FULL_UNROLL */
+
+	assert(in && out && key);
+	rk = key->rd_key;
+
+	/*
+	 * map byte array block to cipher state
+	 * and add initial round key:
+	 */
+	s0 = GETU32(in     ) ^ rk[0];
+	s1 = GETU32(in +  4) ^ rk[1];
+	s2 = GETU32(in +  8) ^ rk[2];
+	s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+	/* round 1: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+   	/* round 2: */
+   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+	/* round 3: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+   	/* round 4: */
+   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+	/* round 5: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+   	/* round 6: */
+   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+	/* round 7: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+   	/* round 8: */
+   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+	/* round 9: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+    if (key->rounds > 10) {
+        /* round 10: */
+        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+        /* round 11: */
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+        if (key->rounds > 12) {
+            /* round 12: */
+            s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+            s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+            s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+            s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+            /* round 13: */
+            t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+            t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+            t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+            t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+        }
+    }
+    rk += key->rounds << 2;
+#else  /* !FULL_UNROLL */
+    /*
+     * Nr - 1 full rounds:
+     */
+    r = key->rounds >> 1;
+    for (;;) {
+        t0 =
+            Te0[(s0 >> 24)       ] ^
+            Te1[(s1 >> 16) & 0xff] ^
+            Te2[(s2 >>  8) & 0xff] ^
+            Te3[(s3      ) & 0xff] ^
+            rk[4];
+        t1 =
+            Te0[(s1 >> 24)       ] ^
+            Te1[(s2 >> 16) & 0xff] ^
+            Te2[(s3 >>  8) & 0xff] ^
+            Te3[(s0      ) & 0xff] ^
+            rk[5];
+        t2 =
+            Te0[(s2 >> 24)       ] ^
+            Te1[(s3 >> 16) & 0xff] ^
+            Te2[(s0 >>  8) & 0xff] ^
+            Te3[(s1      ) & 0xff] ^
+            rk[6];
+        t3 =
+            Te0[(s3 >> 24)       ] ^
+            Te1[(s0 >> 16) & 0xff] ^
+            Te2[(s1 >>  8) & 0xff] ^
+            Te3[(s2      ) & 0xff] ^
+            rk[7];
+
+        rk += 8;
+        if (--r == 0) {
+            break;
+        }
+
+        s0 =
+            Te0[(t0 >> 24)       ] ^
+            Te1[(t1 >> 16) & 0xff] ^
+            Te2[(t2 >>  8) & 0xff] ^
+            Te3[(t3      ) & 0xff] ^
+            rk[0];
+        s1 =
+            Te0[(t1 >> 24)       ] ^
+            Te1[(t2 >> 16) & 0xff] ^
+            Te2[(t3 >>  8) & 0xff] ^
+            Te3[(t0      ) & 0xff] ^
+            rk[1];
+        s2 =
+            Te0[(t2 >> 24)       ] ^
+            Te1[(t3 >> 16) & 0xff] ^
+            Te2[(t0 >>  8) & 0xff] ^
+            Te3[(t1      ) & 0xff] ^
+            rk[2];
+        s3 =
+            Te0[(t3 >> 24)       ] ^
+            Te1[(t0 >> 16) & 0xff] ^
+            Te2[(t1 >>  8) & 0xff] ^
+            Te3[(t2      ) & 0xff] ^
+            rk[3];
+    }
+#endif /* ?FULL_UNROLL */
+    /*
+	 * apply last round and
+	 * map cipher state to byte array block:
+	 */
+	s0 =
+		(Te2[(t0 >> 24)       ] & 0xff000000) ^
+		(Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+		(Te0[(t2 >>  8) & 0xff] & 0x0000ff00) ^
+		(Te1[(t3      ) & 0xff] & 0x000000ff) ^
+		rk[0];
+	PUTU32(out     , s0);
+	s1 =
+		(Te2[(t1 >> 24)       ] & 0xff000000) ^
+		(Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+		(Te0[(t3 >>  8) & 0xff] & 0x0000ff00) ^
+		(Te1[(t0      ) & 0xff] & 0x000000ff) ^
+		rk[1];
+	PUTU32(out +  4, s1);
+	s2 =
+		(Te2[(t2 >> 24)       ] & 0xff000000) ^
+		(Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+		(Te0[(t0 >>  8) & 0xff] & 0x0000ff00) ^
+		(Te1[(t1      ) & 0xff] & 0x000000ff) ^
+		rk[2];
+	PUTU32(out +  8, s2);
+	s3 =
+		(Te2[(t3 >> 24)       ] & 0xff000000) ^
+		(Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+		(Te0[(t1 >>  8) & 0xff] & 0x0000ff00) ^
+		(Te1[(t2      ) & 0xff] & 0x000000ff) ^
+		rk[3];
+	PUTU32(out + 12, s3);
+}
+
+/*
+ * Decrypt a single block
+ * in and out can overlap
+ */
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+		 const AES_KEY *key) {
+
+	const u32 *rk;
+	u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+	int r;
+#endif /* ?FULL_UNROLL */
+
+	assert(in && out && key);
+	rk = key->rd_key;
+
+	/*
+	 * map byte array block to cipher state
+	 * and add initial round key:
+	 */
+    s0 = GETU32(in     ) ^ rk[0];
+    s1 = GETU32(in +  4) ^ rk[1];
+    s2 = GETU32(in +  8) ^ rk[2];
+    s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+    /* round 1: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+    /* round 2: */
+    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+    /* round 3: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+    /* round 4: */
+    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+    /* round 5: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+    /* round 6: */
+    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+    /* round 7: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+    /* round 8: */
+    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+    /* round 9: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+    if (key->rounds > 10) {
+        /* round 10: */
+        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+        /* round 11: */
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+        if (key->rounds > 12) {
+            /* round 12: */
+            s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+            s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+            s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+            s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+            /* round 13: */
+            t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+            t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+            t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+            t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+        }
+    }
+	rk += key->rounds << 2;
+#else  /* !FULL_UNROLL */
+    /*
+     * Nr - 1 full rounds:
+     */
+    r = key->rounds >> 1;
+    for (;;) {
+        t0 =
+            Td0[(s0 >> 24)       ] ^
+            Td1[(s3 >> 16) & 0xff] ^
+            Td2[(s2 >>  8) & 0xff] ^
+            Td3[(s1      ) & 0xff] ^
+            rk[4];
+        t1 =
+            Td0[(s1 >> 24)       ] ^
+            Td1[(s0 >> 16) & 0xff] ^
+            Td2[(s3 >>  8) & 0xff] ^
+            Td3[(s2      ) & 0xff] ^
+            rk[5];
+        t2 =
+            Td0[(s2 >> 24)       ] ^
+            Td1[(s1 >> 16) & 0xff] ^
+            Td2[(s0 >>  8) & 0xff] ^
+            Td3[(s3      ) & 0xff] ^
+            rk[6];
+        t3 =
+            Td0[(s3 >> 24)       ] ^
+            Td1[(s2 >> 16) & 0xff] ^
+            Td2[(s1 >>  8) & 0xff] ^
+            Td3[(s0      ) & 0xff] ^
+            rk[7];
+
+        rk += 8;
+        if (--r == 0) {
+            break;
+        }
+
+        s0 =
+            Td0[(t0 >> 24)       ] ^
+            Td1[(t3 >> 16) & 0xff] ^
+            Td2[(t2 >>  8) & 0xff] ^
+            Td3[(t1      ) & 0xff] ^
+            rk[0];
+        s1 =
+            Td0[(t1 >> 24)       ] ^
+            Td1[(t0 >> 16) & 0xff] ^
+            Td2[(t3 >>  8) & 0xff] ^
+            Td3[(t2      ) & 0xff] ^
+            rk[1];
+        s2 =
+            Td0[(t2 >> 24)       ] ^
+            Td1[(t1 >> 16) & 0xff] ^
+            Td2[(t0 >>  8) & 0xff] ^
+            Td3[(t3      ) & 0xff] ^
+            rk[2];
+        s3 =
+            Td0[(t3 >> 24)       ] ^
+            Td1[(t2 >> 16) & 0xff] ^
+            Td2[(t1 >>  8) & 0xff] ^
+            Td3[(t0      ) & 0xff] ^
+            rk[3];
+    }
+#endif /* ?FULL_UNROLL */
+    /*
+	 * apply last round and
+	 * map cipher state to byte array block:
+	 */
+   	s0 =
+   		(Td4[(t0 >> 24)       ] << 24) ^
+   		(Td4[(t3 >> 16) & 0xff] << 16) ^
+   		(Td4[(t2 >>  8) & 0xff] <<  8) ^
+   		(Td4[(t1      ) & 0xff])       ^
+   		rk[0];
+	PUTU32(out     , s0);
+   	s1 =
+   		(Td4[(t1 >> 24)       ] << 24) ^
+   		(Td4[(t0 >> 16) & 0xff] << 16) ^
+   		(Td4[(t3 >>  8) & 0xff] <<  8) ^
+   		(Td4[(t2      ) & 0xff])       ^
+   		rk[1];
+	PUTU32(out +  4, s1);
+   	s2 =
+   		(Td4[(t2 >> 24)       ] << 24) ^
+   		(Td4[(t1 >> 16) & 0xff] << 16) ^
+   		(Td4[(t0 >>  8) & 0xff] <<  8) ^
+   		(Td4[(t3      ) & 0xff])       ^
+   		rk[2];
+	PUTU32(out +  8, s2);
+   	s3 =
+   		(Td4[(t3 >> 24)       ] << 24) ^
+   		(Td4[(t2 >> 16) & 0xff] << 16) ^
+   		(Td4[(t1 >>  8) & 0xff] <<  8) ^
+   		(Td4[(t0      ) & 0xff])       ^
+   		rk[3];
+	PUTU32(out + 12, s3);
+}
+
+#else /* AES_ASM */
+
+static const u8 Te4[256] = {
+    0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+    0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
+    0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
+    0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
+    0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
+    0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
+    0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
+    0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
+    0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+    0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
+    0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
+    0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
+    0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
+    0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
+    0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
+    0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
+    0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
+    0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
+    0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
+    0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+    0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
+    0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
+    0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+    0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
+    0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+    0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+    0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
+    0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
+    0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
+    0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
+    0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
+    0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
+};
+static const u32 rcon[] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000,
+	0x10000000, 0x20000000, 0x40000000, 0x80000000,
+	0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+			AES_KEY *key) {
+	u32 *rk;
+   	int i = 0;
+	u32 temp;
+
+	if (!userKey || !key)
+		return -1;
+	if (bits != 128 && bits != 192 && bits != 256)
+		return -2;
+
+	rk = key->rd_key;
+
+	if (bits==128)
+		key->rounds = 10;
+	else if (bits==192)
+		key->rounds = 12;
+	else
+		key->rounds = 14;
+
+	rk[0] = GETU32(userKey     );
+	rk[1] = GETU32(userKey +  4);
+	rk[2] = GETU32(userKey +  8);
+	rk[3] = GETU32(userKey + 12);
+	if (bits == 128) {
+		while (1) {
+			temp  = rk[3];
+			rk[4] = rk[0] ^
+				(Te4[(temp >> 16) & 0xff] << 24) ^
+				(Te4[(temp >>  8) & 0xff] << 16) ^
+				(Te4[(temp      ) & 0xff] << 8) ^
+				(Te4[(temp >> 24)       ]) ^
+				rcon[i];
+			rk[5] = rk[1] ^ rk[4];
+			rk[6] = rk[2] ^ rk[5];
+			rk[7] = rk[3] ^ rk[6];
+			if (++i == 10) {
+				return 0;
+			}
+			rk += 4;
+		}
+	}
+	rk[4] = GETU32(userKey + 16);
+	rk[5] = GETU32(userKey + 20);
+	if (bits == 192) {
+		while (1) {
+			temp = rk[ 5];
+			rk[ 6] = rk[ 0] ^
+				(Te4[(temp >> 16) & 0xff] << 24) ^
+				(Te4[(temp >>  8) & 0xff] << 16) ^
+				(Te4[(temp      ) & 0xff] << 8) ^
+				(Te4[(temp >> 24)       ]) ^
+				rcon[i];
+			rk[ 7] = rk[ 1] ^ rk[ 6];
+			rk[ 8] = rk[ 2] ^ rk[ 7];
+			rk[ 9] = rk[ 3] ^ rk[ 8];
+			if (++i == 8) {
+				return 0;
+			}
+			rk[10] = rk[ 4] ^ rk[ 9];
+			rk[11] = rk[ 5] ^ rk[10];
+			rk += 6;
+		}
+	}
+	rk[6] = GETU32(userKey + 24);
+	rk[7] = GETU32(userKey + 28);
+	if (bits == 256) {
+		while (1) {
+			temp = rk[ 7];
+			rk[ 8] = rk[ 0] ^
+				(Te4[(temp >> 16) & 0xff] << 24) ^
+				(Te4[(temp >>  8) & 0xff] << 16) ^
+				(Te4[(temp      ) & 0xff] << 8) ^
+				(Te4[(temp >> 24)       ]) ^
+				rcon[i];
+			rk[ 9] = rk[ 1] ^ rk[ 8];
+			rk[10] = rk[ 2] ^ rk[ 9];
+			rk[11] = rk[ 3] ^ rk[10];
+			if (++i == 7) {
+				return 0;
+			}
+			temp = rk[11];
+			rk[12] = rk[ 4] ^
+				(Te4[(temp >> 24)       ] << 24) ^
+				(Te4[(temp >> 16) & 0xff] << 16) ^
+				(Te4[(temp >>  8) & 0xff] << 8) ^
+				(Te4[(temp      ) & 0xff]);
+			rk[13] = rk[ 5] ^ rk[12];
+			rk[14] = rk[ 6] ^ rk[13];
+			rk[15] = rk[ 7] ^ rk[14];
+
+			rk += 8;
+        	}
+	}
+	return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+			 AES_KEY *key) {
+
+        u32 *rk;
+	int i, j, status;
+	u32 temp;
+
+	/* first, start with an encryption schedule */
+	status = private_AES_set_encrypt_key(userKey, bits, key);
+	if (status < 0)
+		return status;
+
+	rk = key->rd_key;
+
+	/* invert the order of the round keys: */
+	for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
+		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+	}
+	/* apply the inverse MixColumn transform to all round keys but the first and the last: */
+	for (i = 1; i < (key->rounds); i++) {
+		rk += 4;
+		for (j = 0; j < 4; j++) {
+			u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
+
+			tp1 = rk[j];
+			m = tp1 & 0x80808080;
+			tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
+				((m - (m >> 7)) & 0x1b1b1b1b);
+			m = tp2 & 0x80808080;
+			tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
+				((m - (m >> 7)) & 0x1b1b1b1b);
+			m = tp4 & 0x80808080;
+			tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
+				((m - (m >> 7)) & 0x1b1b1b1b);
+			tp9 = tp8 ^ tp1;
+			tpb = tp9 ^ tp2;
+			tpd = tp9 ^ tp4;
+			tpe = tp8 ^ tp4 ^ tp2;
+#if defined(ROTATE)
+			rk[j] = tpe ^ ROTATE(tpd,16) ^
+				ROTATE(tp9,24) ^ ROTATE(tpb,8);
+#else
+			rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ 
+				(tp9 >> 8) ^ (tp9 << 24) ^
+				(tpb >> 24) ^ (tpb << 8);
+#endif
+		}
+	}
+	return 0;
+}
+
+#endif /* AES_ASM */
diff --git a/src/main/jni/aes/aes_ige.c b/src/main/jni/aes/aes_ige.c
new file mode 100644
index 000000000..f48f1f9f5
--- /dev/null
+++ b/src/main/jni/aes/aes_ige.c
@@ -0,0 +1,325 @@
+/* crypto/aes/aes_ige.c -*- mode:C; c-file-style: "eay" -*- */
+/* ====================================================================
+ * Copyright (c) 2006 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+//#include "cryptlib.h"
+
+#include "aes.h"
+#include "aes_locl.h"
+#include <assert.h>
+#define OPENSSL_assert assert
+
+#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
+typedef struct {
+        unsigned long data[N_WORDS];
+} aes_block_t;
+
+/* XXX: probably some better way to do this */
+#if defined(__i386__) || defined(__x86_64__)
+#define UNALIGNED_MEMOPS_ARE_FAST 1
+#else
+#define UNALIGNED_MEMOPS_ARE_FAST 0
+#endif
+
+#if UNALIGNED_MEMOPS_ARE_FAST
+#define load_block(d, s)        (d) = *(const aes_block_t *)(s)
+#define store_block(d, s)       *(aes_block_t *)(d) = (s)
+#else
+#define load_block(d, s)        memcpy((d).data, (s), AES_BLOCK_SIZE)
+#define store_block(d, s)       memcpy((d), (s).data, AES_BLOCK_SIZE)
+#endif
+
+/* N.B. The IV for this mode is _twice_ the block size */
+
+void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
+					 size_t length, const AES_KEY *key,
+					 unsigned char *ivec, const int enc)
+	{
+	size_t n;
+	size_t len = length;
+
+	OPENSSL_assert(in && out && key && ivec);
+	OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
+	OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
+
+	len = length / AES_BLOCK_SIZE;
+
+	if (AES_ENCRYPT == enc)
+		{
+		if (in != out &&
+		    (UNALIGNED_MEMOPS_ARE_FAST || ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(long)==0))
+			{
+			aes_block_t *ivp = (aes_block_t *)ivec;
+			aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+
+			while (len)
+				{
+				aes_block_t *inp = (aes_block_t *)in;
+				aes_block_t *outp = (aes_block_t *)out;
+
+				for(n=0 ; n < N_WORDS; ++n)
+					outp->data[n] = inp->data[n] ^ ivp->data[n];
+				AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key);
+				for(n=0 ; n < N_WORDS; ++n)
+					outp->data[n] ^= iv2p->data[n];
+				ivp = outp;
+				iv2p = inp;
+				--len;
+				in += AES_BLOCK_SIZE;
+				out += AES_BLOCK_SIZE;
+				}
+			memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+			memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+			}
+		else
+			{
+			aes_block_t tmp, tmp2;
+			aes_block_t iv;
+			aes_block_t iv2;
+
+			load_block(iv, ivec);
+			load_block(iv2, ivec + AES_BLOCK_SIZE);
+
+			while (len)
+				{
+				load_block(tmp, in);
+				for(n=0 ; n < N_WORDS; ++n)
+					tmp2.data[n] = tmp.data[n] ^ iv.data[n];
+				AES_encrypt((unsigned char *)tmp2.data, (unsigned char *)tmp2.data, key);
+				for(n=0 ; n < N_WORDS; ++n)
+					tmp2.data[n] ^= iv2.data[n];
+				store_block(out, tmp2);
+				iv = tmp2;
+				iv2 = tmp;
+				--len;
+				in += AES_BLOCK_SIZE;
+				out += AES_BLOCK_SIZE;
+				}
+			memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+			memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
+			}
+		}
+	else
+		{
+		if (in != out &&
+		    (UNALIGNED_MEMOPS_ARE_FAST || ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(long)==0))
+			{
+			aes_block_t *ivp = (aes_block_t *)ivec;
+			aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+
+			while (len)
+				{
+				aes_block_t tmp;
+				aes_block_t *inp = (aes_block_t *)in;
+				aes_block_t *outp = (aes_block_t *)out;
+
+				for(n=0 ; n < N_WORDS; ++n)
+					tmp.data[n] = inp->data[n] ^ iv2p->data[n];
+				AES_decrypt((unsigned char *)tmp.data, (unsigned char *)outp->data, key);
+				for(n=0 ; n < N_WORDS; ++n)
+					outp->data[n] ^= ivp->data[n];
+				ivp = inp;
+				iv2p = outp;
+				--len;
+				in += AES_BLOCK_SIZE;
+				out += AES_BLOCK_SIZE;
+				}
+			memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+			memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+			}
+		else
+			{
+			aes_block_t tmp, tmp2;
+			aes_block_t iv;
+			aes_block_t iv2;
+
+			load_block(iv, ivec);
+			load_block(iv2, ivec + AES_BLOCK_SIZE);
+
+			while (len)
+				{
+				load_block(tmp, in);
+				tmp2 = tmp;
+				for(n=0 ; n < N_WORDS; ++n)
+					tmp.data[n] ^= iv2.data[n];
+				AES_decrypt((unsigned char *)tmp.data, (unsigned char *)tmp.data, key);
+				for(n=0 ; n < N_WORDS; ++n)
+					tmp.data[n] ^= iv.data[n];
+				store_block(out, tmp);
+				iv = tmp2;
+				iv2 = tmp;
+				--len;
+				in += AES_BLOCK_SIZE;
+				out += AES_BLOCK_SIZE;
+				}
+			memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+			memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
+			}
+		}
+	}
+
+/*
+ * Note that its effectively impossible to do biIGE in anything other
+ * than a single pass, so no provision is made for chaining.
+ */
+
+/* N.B. The IV for this mode is _four times_ the block size */
+
+void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
+						size_t length, const AES_KEY *key,
+						const AES_KEY *key2, const unsigned char *ivec,
+						const int enc)
+	{
+	size_t n;
+	size_t len = length;
+	unsigned char tmp[AES_BLOCK_SIZE];
+	unsigned char tmp2[AES_BLOCK_SIZE];
+	unsigned char tmp3[AES_BLOCK_SIZE];
+	unsigned char prev[AES_BLOCK_SIZE];
+	const unsigned char *iv;
+	const unsigned char *iv2;
+
+	OPENSSL_assert(in && out && key && ivec);
+	OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
+	OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
+
+	if (AES_ENCRYPT == enc)
+		{
+		/* XXX: Do a separate case for when in != out (strictly should
+		   check for overlap, too) */
+
+		/* First the forward pass */ 
+		iv = ivec;
+		iv2 = ivec + AES_BLOCK_SIZE;
+		while (len >= AES_BLOCK_SIZE)
+			{
+			for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
+				out[n] = in[n] ^ iv[n];
+			AES_encrypt(out, out, key);
+			for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
+				out[n] ^= iv2[n];
+			iv = out;
+			memcpy(prev, in, AES_BLOCK_SIZE);
+			iv2 = prev;
+			len -= AES_BLOCK_SIZE;
+			in += AES_BLOCK_SIZE;
+			out += AES_BLOCK_SIZE;
+			}
+
+		/* And now backwards */
+		iv = ivec + AES_BLOCK_SIZE*2;
+		iv2 = ivec + AES_BLOCK_SIZE*3;
+		len = length;
+		while(len >= AES_BLOCK_SIZE)
+			{
+			out -= AES_BLOCK_SIZE;
+			/* XXX: reduce copies by alternating between buffers */
+			memcpy(tmp, out, AES_BLOCK_SIZE);
+			for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
+				out[n] ^= iv[n];
+			/*			hexdump(stdout, "out ^ iv", out, AES_BLOCK_SIZE); */
+			AES_encrypt(out, out, key);
+			/*			hexdump(stdout,"enc", out, AES_BLOCK_SIZE); */
+			/*			hexdump(stdout,"iv2", iv2, AES_BLOCK_SIZE); */
+			for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
+				out[n] ^= iv2[n];
+			/*			hexdump(stdout,"out", out, AES_BLOCK_SIZE); */
+			iv = out;
+			memcpy(prev, tmp, AES_BLOCK_SIZE);
+			iv2 = prev;
+			len -= AES_BLOCK_SIZE;
+			}
+		}
+	else
+		{
+		/* First backwards */
+		iv = ivec + AES_BLOCK_SIZE*2;
+		iv2 = ivec + AES_BLOCK_SIZE*3;
+		in += length;
+		out += length;
+		while (len >= AES_BLOCK_SIZE)
+			{
+			in -= AES_BLOCK_SIZE;
+			out -= AES_BLOCK_SIZE;
+			memcpy(tmp, in, AES_BLOCK_SIZE);
+			memcpy(tmp2, in, AES_BLOCK_SIZE);
+			for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
+				tmp[n] ^= iv2[n];
+			AES_decrypt(tmp, out, key);
+			for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
+				out[n] ^= iv[n];
+			memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
+			iv = tmp3;
+			iv2 = out;
+			len -= AES_BLOCK_SIZE;
+			}
+
+		/* And now forwards */
+		iv = ivec;
+		iv2 = ivec + AES_BLOCK_SIZE;
+		len = length;
+		while (len >= AES_BLOCK_SIZE)
+			{
+			memcpy(tmp, out, AES_BLOCK_SIZE);
+			memcpy(tmp2, out, AES_BLOCK_SIZE);
+			for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
+				tmp[n] ^= iv2[n];
+			AES_decrypt(tmp, out, key);
+			for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
+				out[n] ^= iv[n];
+			memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
+			iv = tmp3;
+			iv2 = out;
+			len -= AES_BLOCK_SIZE;
+			in += AES_BLOCK_SIZE;
+			out += AES_BLOCK_SIZE;
+			}
+		}
+	}
diff --git a/src/main/jni/aes/aes_locl.h b/src/main/jni/aes/aes_locl.h
new file mode 100644
index 000000000..ec61d465d
--- /dev/null
+++ b/src/main/jni/aes/aes_locl.h
@@ -0,0 +1,89 @@
+/* crypto/aes/aes.h -*- mode:C; c-file-style: "eay" -*- */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#ifndef HEADER_AES_LOCL_H
+#define HEADER_AES_LOCL_H
+
+//#include <openssl/e_os2.h>
+
+#ifdef OPENSSL_NO_AES
+#error AES is disabled.
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
+# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
+# define GETU32(p) SWAP(*((u32 *)(p)))
+# define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); }
+#else
+# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
+# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
+#endif
+
+#ifdef AES_LONG
+typedef unsigned long u32;
+#else
+typedef unsigned int u32;
+#endif
+typedef unsigned short u16;
+typedef unsigned char u8;
+
+#define MAXKC   (256/32)
+#define MAXKB   (256/8)
+#define MAXNR   14
+
+/* This controls loop-unrolling in aes_core.c */
+#undef FULL_UNROLL
+
+#endif /* !HEADER_AES_LOCL_H */
diff --git a/src/main/jni/aes/aes_misc.c b/src/main/jni/aes/aes_misc.c
new file mode 100644
index 000000000..640b0af95
--- /dev/null
+++ b/src/main/jni/aes/aes_misc.c
@@ -0,0 +1,85 @@
+/* crypto/aes/aes_misc.c -*- mode:C; c-file-style: "eay" -*- */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+//#include <openssl/opensslv.h>
+//#include <openssl/crypto.h>
+#include "aes.h"
+#include "aes_locl.h"
+
+const char AES_version[]="AES" ;//OPENSSL_VERSION_PTEXT;
+
+const char *AES_options(void) {
+#ifdef FULL_UNROLL
+        return "aes(full)";
+#else   
+        return "aes(partial)";
+#endif
+}
+
+/* FIPS wrapper functions to block low level AES calls in FIPS mode */
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+			AES_KEY *key)
+	{
+#ifdef OPENSSL_FIPS
+	fips_cipher_abort(AES);
+#endif
+	return private_AES_set_encrypt_key(userKey, bits, key);
+	}
+
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+			AES_KEY *key)
+	{
+#ifdef OPENSSL_FIPS
+	fips_cipher_abort(AES);
+#endif
+	return private_AES_set_decrypt_key(userKey, bits, key);
+	}
diff --git a/src/main/jni/aes/arm_arch.h b/src/main/jni/aes/arm_arch.h
new file mode 100644
index 000000000..5a8310768
--- /dev/null
+++ b/src/main/jni/aes/arm_arch.h
@@ -0,0 +1,51 @@
+#ifndef __ARM_ARCH_H__
+#define __ARM_ARCH_H__
+
+#if !defined(__ARM_ARCH__)
+# if defined(__CC_ARM)
+#  define __ARM_ARCH__ __TARGET_ARCH_ARM
+#  if defined(__BIG_ENDIAN)
+#   define __ARMEB__
+#  else
+#   define __ARMEL__
+#  endif
+# elif defined(__GNUC__)
+  /*
+   * Why doesn't gcc define __ARM_ARCH__? Instead it defines
+   * bunch of below macros. See all_architectires[] table in
+   * gcc/config/arm/arm.c. On a side note it defines
+   * __ARMEL__/__ARMEB__ for little-/big-endian.
+   */
+#  if	defined(__ARM_ARCH_7__)	|| defined(__ARM_ARCH_7A__)	|| \
+	defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)	|| \
+	defined(__ARM_ARCH_7EM__)
+#   define __ARM_ARCH__ 7
+#  elif	defined(__ARM_ARCH_6__)	|| defined(__ARM_ARCH_6J__)	|| \
+	defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__)	|| \
+	defined(__ARM_ARCH_6Z__)|| defined(__ARM_ARCH_6ZK__)	|| \
+	defined(__ARM_ARCH_6T2__)
+#   define __ARM_ARCH__ 6
+#  elif	defined(__ARM_ARCH_5__)	|| defined(__ARM_ARCH_5T__)	|| \
+	defined(__ARM_ARCH_5E__)|| defined(__ARM_ARCH_5TE__)	|| \
+	defined(__ARM_ARCH_5TEJ__)
+#   define __ARM_ARCH__ 5
+#  elif	defined(__ARM_ARCH_4__)	|| defined(__ARM_ARCH_4T__)
+#   define __ARM_ARCH__ 4
+#  else
+#   error "unsupported ARM architecture"
+#  endif
+# endif
+#endif
+
+#ifdef OPENSSL_FIPSCANISTER
+#include <openssl/fipssyms.h>
+#endif
+
+#if !__ASSEMBLER__
+extern unsigned int OPENSSL_armcap_P;
+                                     
+#define ARMV7_NEON      (1<<0)
+#define ARMV7_TICK      (1<<1)
+#endif
+
+#endif
diff --git a/src/main/jni/audio.c b/src/main/jni/audio.c
new file mode 100644
index 000000000..3ca0f1fb2
--- /dev/null
+++ b/src/main/jni/audio.c
@@ -0,0 +1,671 @@
+#include <jni.h>
+#include <ogg/ogg.h>
+#include <stdio.h>
+#include <opus.h>
+#include <stdlib.h>
+#include <time.h>
+#include <opusfile.h>
+#include "utils.h"
+
+typedef struct {
+    int version;
+    int channels; /* Number of channels: 1..255 */
+    int preskip;
+    ogg_uint32_t input_sample_rate;
+    int gain; /* in dB S7.8 should be zero whenever possible */
+    int channel_mapping;
+    /* The rest is only used if channel_mapping != 0 */
+    int nb_streams;
+    int nb_coupled;
+    unsigned char stream_map[255];
+} OpusHeader;
+
+typedef struct {
+    unsigned char *data;
+    int maxlen;
+    int pos;
+} Packet;
+
+typedef struct {
+    const unsigned char *data;
+    int maxlen;
+    int pos;
+} ROPacket;
+
+typedef struct {
+    void *readdata;
+    opus_int64 total_samples_per_channel;
+    int rawmode;
+    int channels;
+    long rate;
+    int gain;
+    int samplesize;
+    int endianness;
+    char *infilename;
+    int ignorelength;
+    int skip;
+    int extraout;
+    char *comments;
+    int comments_length;
+    int copy_comments;
+} oe_enc_opt;
+
+static int write_uint32(Packet *p, ogg_uint32_t val) {
+    if (p->pos > p->maxlen - 4) {
+        return 0;
+    }
+    p->data[p->pos  ] = (val    ) & 0xFF;
+    p->data[p->pos+1] = (val>> 8) & 0xFF;
+    p->data[p->pos+2] = (val>>16) & 0xFF;
+    p->data[p->pos+3] = (val>>24) & 0xFF;
+    p->pos += 4;
+    return 1;
+}
+
+static int write_uint16(Packet *p, ogg_uint16_t val) {
+    if (p->pos > p->maxlen-2) {
+        return 0;
+    }
+    p->data[p->pos  ] = (val    ) & 0xFF;
+    p->data[p->pos+1] = (val>> 8) & 0xFF;
+    p->pos += 2;
+    return 1;
+}
+
+static int write_chars(Packet *p, const unsigned char *str, int nb_chars)
+{
+    int i;
+    if (p->pos>p->maxlen-nb_chars)
+        return 0;
+    for (i=0;i<nb_chars;i++)
+        p->data[p->pos++] = str[i];
+    return 1;
+}
+
+static int read_uint32(ROPacket *p, ogg_uint32_t *val)
+{
+    if (p->pos>p->maxlen-4)
+        return 0;
+    *val =  (ogg_uint32_t)p->data[p->pos  ];
+    *val |= (ogg_uint32_t)p->data[p->pos+1]<< 8;
+    *val |= (ogg_uint32_t)p->data[p->pos+2]<<16;
+    *val |= (ogg_uint32_t)p->data[p->pos+3]<<24;
+    p->pos += 4;
+    return 1;
+}
+
+static int read_uint16(ROPacket *p, ogg_uint16_t *val)
+{
+    if (p->pos>p->maxlen-2)
+        return 0;
+    *val =  (ogg_uint16_t)p->data[p->pos  ];
+    *val |= (ogg_uint16_t)p->data[p->pos+1]<<8;
+    p->pos += 2;
+    return 1;
+}
+
+static int read_chars(ROPacket *p, unsigned char *str, int nb_chars)
+{
+    int i;
+    if (p->pos>p->maxlen-nb_chars)
+        return 0;
+    for (i=0;i<nb_chars;i++)
+        str[i] = p->data[p->pos++];
+    return 1;
+}
+
+int opus_header_to_packet(const OpusHeader *h, unsigned char *packet, int len) {
+    int i;
+    Packet p;
+    unsigned char ch;
+    
+    p.data = packet;
+    p.maxlen = len;
+    p.pos = 0;
+    if (len < 19) {
+        return 0;
+    }
+    if (!write_chars(&p, (const unsigned char *)"OpusHead", 8)) {
+        return 0;
+    }
+
+    ch = 1;
+    if (!write_chars(&p, &ch, 1)) {
+        return 0;
+    }
+    
+    ch = h->channels;
+    if (!write_chars(&p, &ch, 1)) {
+        return 0;
+    }
+    
+    if (!write_uint16(&p, h->preskip)) {
+        return 0;
+    }
+    
+    if (!write_uint32(&p, h->input_sample_rate)) {
+        return 0;
+    }
+    
+    if (!write_uint16(&p, h->gain)) {
+        return 0;
+    }
+    
+    ch = h->channel_mapping;
+    if (!write_chars(&p, &ch, 1)) {
+        return 0;
+    }
+    
+    if (h->channel_mapping != 0) {
+        ch = h->nb_streams;
+        if (!write_chars(&p, &ch, 1)) {
+            return 0;
+        }
+        
+        ch = h->nb_coupled;
+        if (!write_chars(&p, &ch, 1)) {
+            return 0;
+        }
+        
+        /* Multi-stream support */
+        for (i = 0; i < h->channels; i++) {
+            if (!write_chars(&p, &h->stream_map[i], 1)) {
+                return 0;
+            }
+        }
+    }
+    
+    return p.pos;
+}
+
+#define writeint(buf, base, val) do { buf[base + 3] = ((val) >> 24) & 0xff; \
+buf[base + 2]=((val) >> 16) & 0xff; \
+buf[base + 1]=((val) >> 8) & 0xff; \
+buf[base] = (val) & 0xff; \
+} while(0)
+
+static void comment_init(char **comments, int *length, const char *vendor_string) {
+    // The 'vendor' field should be the actual encoding library used
+    int vendor_length = strlen(vendor_string);
+    int user_comment_list_length = 0;
+    int len = 8 + 4 + vendor_length + 4;
+    char *p = (char *)malloc(len);
+    memcpy(p, "OpusTags", 8);
+    writeint(p, 8, vendor_length);
+    memcpy(p + 12, vendor_string, vendor_length);
+    writeint(p, 12 + vendor_length, user_comment_list_length);
+    *length = len;
+    *comments = p;
+}
+
+static void comment_pad(char **comments, int* length, int amount) {
+    if (amount > 0) {
+        char *p = *comments;
+        // Make sure there is at least amount worth of padding free, and round up to the maximum that fits in the current ogg segments
+        int newlen = (*length + amount + 255) / 255 * 255 - 1;
+        p = realloc(p, newlen);
+        for (int i = *length; i < newlen; i++) {
+            p[i] = 0;
+        }
+        *comments = p;
+        *length = newlen;
+    }
+}
+
+static int writeOggPage(ogg_page *page, FILE *os) {
+    int written = fwrite(page->header, sizeof(unsigned char), page->header_len, os);
+    written += fwrite(page->body, sizeof(unsigned char), page->body_len, os);
+    return written;
+}
+
+const opus_int32 bitrate = 16000;
+const opus_int32 rate = 16000;
+const opus_int32 frame_size = 960;
+const int with_cvbr = 1;
+const int max_ogg_delay = 0;
+const int comment_padding = 512;
+
+opus_int32 coding_rate = 16000;
+ogg_int32_t _packetId;
+OpusEncoder *_encoder = 0;
+uint8_t *_packet = 0;
+ogg_stream_state os;
+FILE *_fileOs = 0;
+oe_enc_opt inopt;
+OpusHeader header;
+opus_int32 min_bytes;
+int max_frame_bytes;
+ogg_packet op;
+ogg_page og;
+opus_int64 bytes_written;
+opus_int64 pages_out;
+opus_int64 total_samples;
+ogg_int64_t enc_granulepos;
+ogg_int64_t last_granulepos;
+int size_segments;
+int last_segments;
+
+void cleanupRecorder() {
+    
+    ogg_stream_flush(&os, &og);
+    
+    if (_encoder) {
+        opus_encoder_destroy(_encoder);
+        _encoder = 0;
+    }
+    
+    ogg_stream_clear(&os);
+    
+    if (_packet) {
+        free(_packet);
+        _packet = 0;
+    }
+    
+    if (_fileOs) {
+        fclose(_fileOs);
+        _fileOs = 0;
+    }
+    
+    _packetId = -1;
+    bytes_written = 0;
+    pages_out = 0;
+    total_samples = 0;
+    enc_granulepos = 0;
+    size_segments = 0;
+    last_segments = 0;
+    last_granulepos = 0;
+    memset(&os, 0, sizeof(ogg_stream_state));
+    memset(&inopt, 0, sizeof(oe_enc_opt));
+    memset(&header, 0, sizeof(OpusHeader));
+    memset(&op, 0, sizeof(ogg_packet));
+    memset(&og, 0, sizeof(ogg_page));
+}
+
+int initRecorder(const char *path) {
+    cleanupRecorder();
+    
+    if (!path) {
+        return 0;
+    }
+    
+    _fileOs = fopen(path, "wb");
+    if (!_fileOs) {
+        return 0;
+    }
+    
+    inopt.rate = rate;
+    inopt.gain = 0;
+    inopt.endianness = 0;
+    inopt.copy_comments = 0;
+    inopt.rawmode = 1;
+    inopt.ignorelength = 1;
+    inopt.samplesize = 16;
+    inopt.channels = 1;
+    inopt.skip = 0;
+    
+    comment_init(&inopt.comments, &inopt.comments_length, opus_get_version_string());
+    
+    if (rate > 24000) {
+        coding_rate = 48000;
+    } else if (rate > 16000) {
+        coding_rate = 24000;
+    } else if (rate > 12000) {
+        coding_rate = 16000;
+    } else if (rate > 8000) {
+        coding_rate = 12000;
+    } else {
+        coding_rate = 8000;
+    }
+    
+    if (rate != coding_rate) {
+        LOGE("Invalid rate");
+        return 0;
+    }
+    
+    header.channels = 1;
+    header.channel_mapping = 0;
+    header.input_sample_rate = rate;
+    header.gain = inopt.gain;
+    header.nb_streams = 1;
+    
+    int result = OPUS_OK;
+    _encoder = opus_encoder_create(coding_rate, 1, OPUS_APPLICATION_AUDIO, &result);
+    if (result != OPUS_OK) {
+        LOGE("Error cannot create encoder: %s", opus_strerror(result));
+        return 0;
+    }
+    
+    min_bytes = max_frame_bytes = (1275 * 3 + 7) * header.nb_streams;
+    _packet = malloc(max_frame_bytes);
+    
+    result = opus_encoder_ctl(_encoder, OPUS_SET_BITRATE(bitrate));
+    if (result != OPUS_OK) {
+        LOGE("Error OPUS_SET_BITRATE returned: %s", opus_strerror(result));
+        return 0;
+    }
+    
+#ifdef OPUS_SET_LSB_DEPTH
+    result = opus_encoder_ctl(_encoder, OPUS_SET_LSB_DEPTH(max(8, min(24, inopt.samplesize))));
+    if (result != OPUS_OK) {
+        LOGE("Warning OPUS_SET_LSB_DEPTH returned: %s", opus_strerror(result));
+    }
+#endif
+    
+    opus_int32 lookahead;
+    result = opus_encoder_ctl(_encoder, OPUS_GET_LOOKAHEAD(&lookahead));
+    if (result != OPUS_OK) {
+        LOGE("Error OPUS_GET_LOOKAHEAD returned: %s", opus_strerror(result));
+        return 0;
+    }
+    
+    inopt.skip += lookahead;
+    header.preskip = (int)(inopt.skip * (48000.0 / coding_rate));
+    inopt.extraout = (int)(header.preskip * (rate / 48000.0));
+    
+    if (ogg_stream_init(&os, rand()) == -1) {
+        LOGE("Error: stream init failed");
+        return 0;
+    }
+    
+    unsigned char header_data[100];
+    int packet_size = opus_header_to_packet(&header, header_data, 100);
+    op.packet = header_data;
+    op.bytes = packet_size;
+    op.b_o_s = 1;
+    op.e_o_s = 0;
+    op.granulepos = 0;
+    op.packetno = 0;
+    ogg_stream_packetin(&os, &op);
+    
+    while ((result = ogg_stream_flush(&os, &og))) {
+        if (!result) {
+            break;
+        }
+        
+        int pageBytesWritten = writeOggPage(&og, _fileOs);
+        if (pageBytesWritten != og.header_len + og.body_len) {
+            LOGE("Error: failed writing header to output stream");
+            return 0;
+        }
+        bytes_written += pageBytesWritten;
+        pages_out++;
+    }
+    
+    comment_pad(&inopt.comments, &inopt.comments_length, comment_padding);
+    op.packet = (unsigned char *)inopt.comments;
+    op.bytes = inopt.comments_length;
+    op.b_o_s = 0;
+    op.e_o_s = 0;
+    op.granulepos = 0;
+    op.packetno = 1;
+    ogg_stream_packetin(&os, &op);
+    
+    while ((result = ogg_stream_flush(&os, &og))) {
+        if (result == 0) {
+            break;
+        }
+        
+        int writtenPageBytes = writeOggPage(&og, _fileOs);
+        if (writtenPageBytes != og.header_len + og.body_len) {
+            LOGE("Error: failed writing header to output stream");
+            return 0;
+        }
+        
+        bytes_written += writtenPageBytes;
+        pages_out++;
+    }
+    
+    free(inopt.comments);
+    
+    return 1;
+}
+
+int writeFrame(uint8_t *framePcmBytes, unsigned int frameByteCount) {
+    int cur_frame_size = frame_size;
+    _packetId++;
+    
+    opus_int32 nb_samples = frameByteCount / 2;
+    total_samples += nb_samples;
+    if (nb_samples < frame_size) {
+        op.e_o_s = 1;
+    } else {
+        op.e_o_s = 0;
+    }
+    
+    int nbBytes = 0;
+    
+    if (nb_samples != 0) {
+        uint8_t *paddedFrameBytes = framePcmBytes;
+        int freePaddedFrameBytes = 0;
+        
+        if (nb_samples < cur_frame_size) {
+            paddedFrameBytes = malloc(cur_frame_size * 2);
+            freePaddedFrameBytes = 1;
+            memcpy(paddedFrameBytes, framePcmBytes, frameByteCount);
+            memset(paddedFrameBytes + nb_samples * 2, 0, cur_frame_size * 2 - nb_samples * 2);
+        }
+        
+        nbBytes = opus_encode(_encoder, (opus_int16 *)paddedFrameBytes, cur_frame_size, _packet, max_frame_bytes / 10);
+        if (freePaddedFrameBytes) {
+            free(paddedFrameBytes);
+            paddedFrameBytes = NULL;
+        }
+        
+        if (nbBytes < 0) {
+            LOGE("Encoding failed: %s. Aborting.", opus_strerror(nbBytes));
+            return 0;
+        }
+        
+        enc_granulepos += cur_frame_size * 48000 / coding_rate;
+        size_segments = (nbBytes + 255) / 255;
+        min_bytes = min(nbBytes, min_bytes);
+    }
+    
+    while ((((size_segments <= 255) && (last_segments + size_segments > 255)) || (enc_granulepos - last_granulepos > max_ogg_delay)) && ogg_stream_flush_fill(&os, &og, 255 * 255)) {
+        if (ogg_page_packets(&og) != 0) {
+            last_granulepos = ogg_page_granulepos(&og);
+        }
+        
+        last_segments -= og.header[26];
+        int writtenPageBytes = writeOggPage(&og, _fileOs);
+        if (writtenPageBytes != og.header_len + og.body_len) {
+            LOGE("Error: failed writing data to output stream");
+            return 0;
+        }
+        bytes_written += writtenPageBytes;
+        pages_out++;
+    }
+    
+    op.packet = (unsigned char *)_packet;
+    op.bytes = nbBytes;
+    op.b_o_s = 0;
+    op.granulepos = enc_granulepos;
+    if (op.e_o_s) {
+        op.granulepos = ((total_samples * 48000 + rate - 1) / rate) + header.preskip;
+    }
+    op.packetno = 2 + _packetId;
+    ogg_stream_packetin(&os, &op);
+    last_segments += size_segments;
+    
+    while ((op.e_o_s || (enc_granulepos + (frame_size * 48000 / coding_rate) - last_granulepos > max_ogg_delay) || (last_segments >= 255)) ? ogg_stream_flush_fill(&os, &og, 255 * 255) : ogg_stream_pageout_fill(&os, &og, 255 * 255)) {
+        if (ogg_page_packets(&og) != 0) {
+            last_granulepos = ogg_page_granulepos(&og);
+        }
+        last_segments -= og.header[26];
+        int writtenPageBytes = writeOggPage(&og, _fileOs);
+        if (writtenPageBytes != og.header_len + og.body_len) {
+            LOGE("Error: failed writing data to output stream");
+            return 0;
+        }
+        bytes_written += writtenPageBytes;
+        pages_out++;
+    }
+    
+    return 1;
+}
+
+JNIEXPORT int Java_org_telegram_android_MediaController_startRecord(JNIEnv *env, jclass class, jstring path) {
+    const char *pathStr = (*env)->GetStringUTFChars(env, path, 0);
+    
+    int result = initRecorder(pathStr);
+    
+    if (pathStr != 0) {
+        (*env)->ReleaseStringUTFChars(env, path, pathStr);
+    }
+    
+    return result;
+}
+
+JNIEXPORT int Java_org_telegram_android_MediaController_writeFrame(JNIEnv *env, jclass class, jobject frame, jint len) {
+    jbyte *frameBytes = (*env)->GetDirectBufferAddress(env, frame);
+    return writeFrame(frameBytes, len);
+}
+
+JNIEXPORT void Java_org_telegram_android_MediaController_stopRecord(JNIEnv *env, jclass class) {
+    cleanupRecorder();
+}
+
+//player
+OggOpusFile *_opusFile;
+int _isSeekable = 0;
+int64_t _totalPcmDuration = 0;
+int64_t _currentPcmOffset = 0;
+int _finished = 0;
+static const int playerBuffersCount = 3;
+static const int playerSampleRate = 48000;
+
+void cleanupPlayer() {
+    if (_opusFile) {
+        op_free(_opusFile);
+        _opusFile = 0;
+    }
+    _isSeekable = 0;
+    _totalPcmDuration = 0;
+    _currentPcmOffset = 0;
+    _finished = 0;
+}
+
+int seekPlayer(float position) {
+    if (!_opusFile || !_isSeekable || position < 0) {
+        return 0;
+    }
+    int result = op_pcm_seek(_opusFile, (ogg_int64_t)(position * _totalPcmDuration));
+    if (result != OPUS_OK) {
+        LOGE("op_pcm_seek failed: %d", result);
+    }
+    ogg_int64_t pcmPosition = op_pcm_tell(_opusFile);
+    _currentPcmOffset = pcmPosition;
+    return result == OPUS_OK;
+}
+
+int initPlayer(const char *path) {
+    cleanupPlayer();
+    
+    int openError = OPUS_OK;
+    _opusFile = op_open_file(path, &openError);
+    if (!_opusFile || openError != OPUS_OK) {
+        LOGE("op_open_file failed: %d", openError);
+        cleanupPlayer();
+        return 0;
+    }
+    
+    _isSeekable = op_seekable(_opusFile);
+    _totalPcmDuration = op_pcm_total(_opusFile, -1);
+        
+    return 1;
+}
+
+void fillBuffer(uint8_t *buffer, int capacity, int *args) {
+    if (_opusFile) {
+        args[1] = max(0, op_pcm_tell(_opusFile));
+        
+        if (_finished) {
+            args[0] = 0;
+            args[1] = 0;
+            args[2] = 1;
+            return;
+        } else {
+            int writtenOutputBytes = 0;
+            int endOfFileReached = 0;
+            
+            while (writtenOutputBytes < capacity) {
+                int readSamples = op_read(_opusFile, (opus_int16 *)(buffer + writtenOutputBytes), (capacity - writtenOutputBytes) / 2, NULL);
+                
+                if (readSamples > 0) {
+                    writtenOutputBytes += readSamples * 2;
+                } else {
+                    if (readSamples < 0) {
+                        LOGE("op_read failed: %d", readSamples);
+                    }
+                    endOfFileReached = 1;
+                    break;
+                }
+            }
+            
+            args[0] = writtenOutputBytes;
+            
+            if (endOfFileReached || args[1] + args[0] == _totalPcmDuration) {
+                _finished = 1;
+                args[2] = 1;
+            } else {
+                args[2] = 0;
+            }
+        }
+    } else {
+        memset(buffer, 0, capacity);
+        args[0] = capacity;
+        args[1] = _totalPcmDuration;
+    }
+}
+
+JNIEXPORT jlong Java_org_telegram_android_MediaController_getTotalPcmDuration(JNIEnv *env, jclass class) {
+    return _totalPcmDuration;
+}
+
+JNIEXPORT void Java_org_telegram_android_MediaController_readOpusFile(JNIEnv *env, jclass class, jobject buffer, jint capacity, jintArray args) {
+    jint *argsArr = (*env)->GetIntArrayElements(env, args, 0);
+    jbyte *bufferBytes = (*env)->GetDirectBufferAddress(env, buffer);
+    fillBuffer(bufferBytes, capacity, argsArr);
+    (*env)->ReleaseIntArrayElements(env, args, argsArr, 0);
+}
+
+JNIEXPORT int Java_org_telegram_android_MediaController_seekOpusFile(JNIEnv *env, jclass class, jfloat position) {
+    return seekPlayer(position);
+}
+
+JNIEXPORT int Java_org_telegram_android_MediaController_openOpusFile(JNIEnv *env, jclass class, jstring path) {
+    const char *pathStr = (*env)->GetStringUTFChars(env, path, 0);
+    
+    int result = initPlayer(pathStr);
+    
+    if (pathStr != 0) {
+        (*env)->ReleaseStringUTFChars(env, path, pathStr);
+    }
+    
+    return result;
+}
+
+JNIEXPORT void Java_org_telegram_android_MediaController_closeOpusFile(JNIEnv *env, jclass class) {
+    cleanupPlayer();
+}
+
+JNIEXPORT int Java_org_telegram_android_MediaController_isOpusFile(JNIEnv *env, jclass class, jstring path) {
+    const char *pathStr = (*env)->GetStringUTFChars(env, path, 0);
+    
+    int result = 0;
+    
+    int error = OPUS_OK;
+    OggOpusFile *file = op_test_file(pathStr, &error);
+    if (file != NULL) {
+        int error = op_test_open(file);
+        op_free(file);
+        
+        result = error == OPUS_OK;
+    }
+    
+    if (pathStr != 0) {
+        (*env)->ReleaseStringUTFChars(env, path, pathStr);
+    }
+    
+    return result;
+}
diff --git a/src/main/jni/empty.c b/src/main/jni/empty.c
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/main/jni/empty.c
diff --git a/src/main/jni/gif.c b/src/main/jni/gif.c
new file mode 100644
index 000000000..64dda4793
--- /dev/null
+++ b/src/main/jni/gif.c
@@ -0,0 +1,847 @@
+//thanks to https://github.com/koral--/android-gif-drawable
+/*
+ MIT License
+ Copyright (c)
+ 
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ 
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ 
+ // Copyright (c) 2011 Google Inc. All rights reserved.
+ //
+ // Redistribution and use in source and binary forms, with or without
+ // modification, are permitted provided that the following conditions are
+ // met:
+ //
+ //    * Redistributions of source code must retain the above copyright
+ // notice, this list of conditions and the following disclaimer.
+ //    * Redistributions in binary form must reproduce the above
+ // copyright notice, this list of conditions and the following disclaimer
+ // in the documentation and/or other materials provided with the
+ // distribution.
+ //    * Neither the name of Google Inc. nor the names of its
+ // contributors may be used to endorse or promote products derived from
+ // this software without specific prior written permission.
+ //
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ 
+ The GIFLIB distribution is Copyright (c) 1997  Eric S. Raymond
+ */
+
+#include <jni.h>
+#include <stdio.h>
+#include <time.h>
+#include <limits.h>
+#include "gif.h"
+#include "giflib/gif_lib.h"
+
+#define D_GIF_ERR_NO_FRAMES     	1000
+#define D_GIF_ERR_INVALID_SCR_DIMS 	1001
+#define D_GIF_ERR_INVALID_IMG_DIMS 	1002
+#define D_GIF_ERR_IMG_NOT_CONFINED 	1003
+
+typedef struct {
+	uint8_t blue;
+	uint8_t green;
+	uint8_t red;
+	uint8_t alpha;
+} argb;
+
+typedef struct {
+	unsigned int duration;
+	int transpIndex;
+	unsigned char disposalMethod;
+} FrameInfo;
+
+typedef struct {
+	GifFileType *gifFilePtr;
+	unsigned long lastFrameReaminder;
+	unsigned long nextStartTime;
+	int currentIndex;
+	unsigned int lastDrawIndex;
+	FrameInfo *infos;
+	argb *backupPtr;
+	int startPos;
+	unsigned char *rasterBits;
+	char *comment;
+	unsigned short loopCount;
+	int currentLoop;
+	jfloat speedFactor;
+} GifInfo;
+
+static ColorMapObject *defaultCmap = NULL;
+
+static ColorMapObject *genDefColorMap(void) {
+	ColorMapObject *cmap = GifMakeMapObject(256, NULL);
+	if (cmap != NULL) {
+		int iColor;
+		for (iColor = 0; iColor < 256; iColor++) {
+			cmap->Colors[iColor].Red = (GifByteType) iColor;
+			cmap->Colors[iColor].Green = (GifByteType) iColor;
+			cmap->Colors[iColor].Blue = (GifByteType) iColor;
+		}
+	}
+	return cmap;
+}
+
+jint gifOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env) {
+    defaultCmap = genDefColorMap();
+	if (defaultCmap == NULL) {
+        return -1;
+    }
+    return JNI_VERSION_1_6;
+}
+
+void gifOnJNIUnload(JavaVM *vm, void *reserved) {
+    GifFreeMapObject(defaultCmap);
+}
+
+static int fileReadFunc(GifFileType *gif, GifByteType *bytes, int size) {
+    FILE *file = (FILE *)gif->UserData;
+	return fread(bytes, 1, size, file);
+}
+
+static int fileRewindFun(GifInfo *info) {
+    return fseek(info->gifFilePtr->UserData, info->startPos, SEEK_SET);
+}
+
+static unsigned long getRealTime() {
+	struct timespec ts;
+	const clockid_t id = CLOCK_MONOTONIC;
+	if (id != (clockid_t) - 1 && clock_gettime(id, &ts) != -1) {
+        return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
+    }
+	return -1;
+}
+
+static void cleanUp(GifInfo *info) {
+    if (info->backupPtr) {
+        free(info->backupPtr);
+        info->backupPtr = NULL;
+    }
+    if (info->infos) {
+        free(info->infos);
+        info->infos = NULL;
+    }
+    if (info->rasterBits) {
+        free(info->rasterBits);
+        info->rasterBits = NULL;
+    }
+    if (info->comment) {
+        free(info->comment);
+        info->comment = NULL;
+    }
+    
+	GifFileType *GifFile = info->gifFilePtr;
+	if (GifFile->SColorMap == defaultCmap) {
+        GifFile->SColorMap = NULL;
+    }
+	if (GifFile->SavedImages != NULL) {
+		SavedImage *sp;
+		for (sp = GifFile->SavedImages; sp < GifFile->SavedImages + GifFile->ImageCount; sp++) {
+			if (sp->ImageDesc.ColorMap != NULL) {
+				GifFreeMapObject(sp->ImageDesc.ColorMap);
+				sp->ImageDesc.ColorMap = NULL;
+			}
+		}
+		free(GifFile->SavedImages);
+		GifFile->SavedImages = NULL;
+	}
+	DGifCloseFile(GifFile);
+	free(info);
+}
+
+static int getComment(GifByteType *Bytes, char **cmt) {
+	unsigned int len = (unsigned int) Bytes[0];
+	unsigned int offset = *cmt != NULL ? strlen(*cmt) : 0;
+	char *ret = realloc(*cmt, (len + offset + 1) * sizeof(char));
+	if (ret != NULL) {
+		memcpy(ret + offset, &Bytes[1], len);
+		ret[len + offset] = 0;
+		*cmt = ret;
+		return GIF_OK;
+	}
+	return GIF_ERROR;
+}
+
+static void packARGB32(argb *pixel, GifByteType alpha, GifByteType red, GifByteType green, GifByteType blue) {
+	pixel->alpha = alpha;
+	pixel->red = red;
+	pixel->green = green;
+	pixel->blue = blue;
+}
+
+static void getColorFromTable(int idx, argb *dst, const ColorMapObject *cmap) {
+    int colIdx = (idx >= cmap->ColorCount) ? 0 : idx;
+	GifColorType *col = &cmap->Colors[colIdx];
+	packARGB32(dst, 0xFF, col->Red, col->Green, col->Blue);
+}
+
+static void eraseColor(argb *bm, int w, int h, argb color) {
+	int i;
+	for (i = 0; i < w * h; i++) {
+        *(bm + i) = color;
+    }
+}
+
+static inline bool setupBackupBmp(GifInfo *info, short transpIndex) {
+	GifFileType *fGIF = info->gifFilePtr;
+	info->backupPtr = calloc(fGIF->SWidth * fGIF->SHeight, sizeof(argb));
+	if (!info->backupPtr) {
+		info->gifFilePtr->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+		return false;
+	}
+	argb paintingColor;
+	if (transpIndex == -1) {
+        getColorFromTable(fGIF->SBackGroundColor, &paintingColor, fGIF->SColorMap);
+    } else {
+        packARGB32(&paintingColor, 0, 0, 0, 0);
+    }
+	eraseColor(info->backupPtr, fGIF->SWidth, fGIF->SHeight, paintingColor);
+	return true;
+}
+
+static int readExtensions(int ExtFunction, GifByteType *ExtData, GifInfo *info) {
+	if (ExtData == NULL) {
+        return GIF_OK;
+    }
+	if (ExtFunction == GRAPHICS_EXT_FUNC_CODE && ExtData[0] == 4) {
+		FrameInfo *fi = &info->infos[info->gifFilePtr->ImageCount];
+		fi->transpIndex = -1;
+		char *b = (char*) ExtData + 1;
+		short delay = ((b[2] << 8) | b[1]);
+		fi->duration = delay > 1 ? delay * 10 : 100;
+		fi->disposalMethod = ((b[0] >> 2) & 7);
+		if (ExtData[1] & 1) {
+            fi->transpIndex = 0xff & b[3];
+        }
+		if (fi->disposalMethod == 3 && info->backupPtr == NULL) {
+			if (!setupBackupBmp(info, fi->transpIndex)) {
+                return GIF_ERROR;
+            }
+		}
+	} else if (ExtFunction == COMMENT_EXT_FUNC_CODE) {
+		if (getComment(ExtData, &info->comment) == GIF_ERROR) {
+			info->gifFilePtr->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+			return GIF_ERROR;
+		}
+	} else if (ExtFunction == APPLICATION_EXT_FUNC_CODE && ExtData[0] == 11) {
+		if (strncmp("NETSCAPE2.0", &ExtData[1], 11) == 0 || strncmp("ANIMEXTS1.0", &ExtData[1], 11) == 0) {
+			if (DGifGetExtensionNext(info->gifFilePtr, &ExtData, &ExtFunction) == GIF_ERROR) {
+                return GIF_ERROR;
+            }
+			if (ExtFunction == APPLICATION_EXT_FUNC_CODE && ExtData[0] == 3 && ExtData[1] == 1) {
+				info->loopCount = (unsigned short) (ExtData[2] + (ExtData[3] << 8));
+			}
+		}
+	}
+	return GIF_OK;
+}
+
+static int DDGifSlurp(GifFileType *GifFile, GifInfo* info, bool shouldDecode) {
+	GifRecordType RecordType;
+	GifByteType *ExtData;
+	int codeSize;
+	int ExtFunction;
+	size_t ImageSize;
+	do {
+		if (DGifGetRecordType(GifFile, &RecordType) == GIF_ERROR) {
+            return (GIF_ERROR);
+        }
+		switch (RecordType) {
+            case IMAGE_DESC_RECORD_TYPE:
+            
+			if (DGifGetImageDesc(GifFile, !shouldDecode) == GIF_ERROR) {
+                return (GIF_ERROR);
+            }
+			int i = shouldDecode ? info->currentIndex : GifFile->ImageCount - 1;
+			SavedImage *sp = &GifFile->SavedImages[i];
+			ImageSize = sp->ImageDesc.Width * sp->ImageDesc.Height;
+            
+			if (sp->ImageDesc.Width < 1 || sp->ImageDesc.Height < 1 || ImageSize > (SIZE_MAX / sizeof(GifPixelType))) {
+				GifFile->Error = D_GIF_ERR_INVALID_IMG_DIMS;
+				return GIF_ERROR;
+			}
+			if (sp->ImageDesc.Width > GifFile->SWidth || sp->ImageDesc.Height > GifFile->SHeight) {
+				GifFile->Error = D_GIF_ERR_IMG_NOT_CONFINED;
+				return GIF_ERROR;
+			}
+			if (shouldDecode) {
+				sp->RasterBits = info->rasterBits;
+				if (sp->ImageDesc.Interlace) {
+					int i, j;
+					int InterlacedOffset[] = { 0, 4, 2, 1 };
+					int InterlacedJumps[] = { 8, 8, 4, 2 };
+					for (i = 0; i < 4; i++) {
+                        for (j = InterlacedOffset[i]; j < sp->ImageDesc.Height; j += InterlacedJumps[i]) {
+                            if (DGifGetLine(GifFile, sp->RasterBits + j * sp->ImageDesc.Width, sp->ImageDesc.Width) == GIF_ERROR) {
+                                return GIF_ERROR;
+                            }
+                        }
+                    }
+				} else {
+					if (DGifGetLine(GifFile, sp->RasterBits, ImageSize) == GIF_ERROR) {
+                        return (GIF_ERROR);
+                    }
+				}
+				if (info->currentIndex >= GifFile->ImageCount - 1) {
+					if (info->loopCount > 0) {
+                        info->currentLoop++;
+                    }
+					if (fileRewindFun(info) != 0) {
+						info->gifFilePtr->Error = D_GIF_ERR_READ_FAILED;
+						return GIF_ERROR;
+					}
+				}
+				return GIF_OK;
+			} else {
+				if (DGifGetCode(GifFile, &codeSize, &ExtData) == GIF_ERROR) {
+                    return (GIF_ERROR);
+                }
+				while (ExtData != NULL) {
+					if (DGifGetCodeNext(GifFile, &ExtData) == GIF_ERROR) {
+                        return (GIF_ERROR);
+                    }
+				}
+			}
+			break;
+            
+            case EXTENSION_RECORD_TYPE:
+			if (DGifGetExtension(GifFile, &ExtFunction, &ExtData) == GIF_ERROR) {
+                return (GIF_ERROR);
+            }
+            
+			if (!shouldDecode) {
+				FrameInfo *tmpInfos = realloc(info->infos, (GifFile->ImageCount + 1) * sizeof(FrameInfo));
+                if (tmpInfos == NULL) {
+                    return GIF_ERROR;
+                }
+                info->infos = tmpInfos;
+				if (readExtensions(ExtFunction, ExtData, info) == GIF_ERROR) {
+                    return GIF_ERROR;
+                }
+			}
+			while (ExtData != NULL) {
+				if (DGifGetExtensionNext(GifFile, &ExtData, &ExtFunction) == GIF_ERROR) {
+                    return (GIF_ERROR);
+                }
+				if (!shouldDecode) {
+					if (readExtensions(ExtFunction, ExtData, info) == GIF_ERROR) {
+                        return GIF_ERROR;
+                    }
+				}
+			}
+			break;
+            
+            case TERMINATE_RECORD_TYPE:
+			break;
+            
+            default:
+			break;
+		}
+	} while (RecordType != TERMINATE_RECORD_TYPE);
+	bool ok = true;
+	if (shouldDecode) {
+		ok = (fileRewindFun(info) == 0);
+	}
+	if (ok) {
+        return (GIF_OK);
+    } else {
+		info->gifFilePtr->Error = D_GIF_ERR_READ_FAILED;
+		return (GIF_ERROR);
+	}
+}
+
+static void copyLine(argb *dst, const unsigned char *src, const ColorMapObject *cmap, int transparent, int width) {
+	for (; width > 0; width--, src++, dst++) {
+		if (*src != transparent) {
+            getColorFromTable(*src, dst, cmap);
+        }
+	}
+}
+
+static argb *getAddr(argb *bm, int width, int left, int top) {
+	return bm + top * width + left;
+}
+
+static void blitNormal(argb *bm, int width, int height, const SavedImage *frame, const ColorMapObject *cmap, int transparent) {
+	const unsigned char* src = (unsigned char*) frame->RasterBits;
+	argb *dst = getAddr(bm, width, frame->ImageDesc.Left, frame->ImageDesc.Top);
+	GifWord copyWidth = frame->ImageDesc.Width;
+	if (frame->ImageDesc.Left + copyWidth > width) {
+		copyWidth = width - frame->ImageDesc.Left;
+	}
+    
+	GifWord copyHeight = frame->ImageDesc.Height;
+	if (frame->ImageDesc.Top + copyHeight > height) {
+		copyHeight = height - frame->ImageDesc.Top;
+	}
+    
+	for (; copyHeight > 0; copyHeight--) {
+		copyLine(dst, src, cmap, transparent, copyWidth);
+		src += frame->ImageDesc.Width;
+		dst += width;
+	}
+}
+
+static void fillRect(argb *bm, int bmWidth, int bmHeight, GifWord left, GifWord top, GifWord width, GifWord height, argb col) {
+	uint32_t* dst = (uint32_t*) getAddr(bm, bmWidth, left, top);
+	GifWord copyWidth = width;
+	if (left + copyWidth > bmWidth) {
+		copyWidth = bmWidth - left;
+	}
+    
+	GifWord copyHeight = height;
+	if (top + copyHeight > bmHeight) {
+		copyHeight = bmHeight - top;
+	}
+	uint32_t* pColor = (uint32_t *) (&col);
+	for (; copyHeight > 0; copyHeight--) {
+		memset(dst, *pColor, copyWidth * sizeof(argb));
+		dst += bmWidth;
+	}
+}
+
+static void drawFrame(argb *bm, int bmWidth, int bmHeight, const SavedImage *frame, const ColorMapObject *cmap, short transpIndex) {
+	if (frame->ImageDesc.ColorMap != NULL) {
+		cmap = frame->ImageDesc.ColorMap;
+		if (cmap->ColorCount != (1 << cmap->BitsPerPixel)) {
+            cmap = defaultCmap;
+        }
+	}
+	blitNormal(bm, bmWidth, bmHeight, frame, cmap, transpIndex);
+}
+
+static bool checkIfCover(const SavedImage *target, const SavedImage *covered) {
+	if (target->ImageDesc.Left <= covered->ImageDesc.Left
+        && covered->ImageDesc.Left + covered->ImageDesc.Width
+        <= target->ImageDesc.Left + target->ImageDesc.Width
+        && target->ImageDesc.Top <= covered->ImageDesc.Top
+        && covered->ImageDesc.Top + covered->ImageDesc.Height
+        <= target->ImageDesc.Top + target->ImageDesc.Height) {
+		return true;
+	}
+	return false;
+}
+
+static inline void disposeFrameIfNeeded(argb *bm, GifInfo *info, unsigned int idx) {
+	argb* backup = info->backupPtr;
+	argb color;
+	packARGB32(&color, 0, 0, 0, 0);
+	GifFileType *fGif = info->gifFilePtr;
+	SavedImage* cur = &fGif->SavedImages[idx - 1];
+	SavedImage* next = &fGif->SavedImages[idx];
+	bool curTrans = info->infos[idx - 1].transpIndex != -1;
+	int curDisposal = info->infos[idx - 1].disposalMethod;
+	bool nextTrans = info->infos[idx].transpIndex != -1;
+	int nextDisposal = info->infos[idx].disposalMethod;
+	argb *tmp;
+	if ((curDisposal == 2 || curDisposal == 3) && (nextTrans || !checkIfCover(next, cur))) {
+		switch (curDisposal) {
+            case 2:
+            
+			fillRect(bm, fGif->SWidth, fGif->SHeight, cur->ImageDesc.Left, cur->ImageDesc.Top, cur->ImageDesc.Width, cur->ImageDesc.Height, color);
+			break;
+            
+            case 3:
+			tmp = bm;
+			bm = backup;
+			backup = tmp;
+			break;
+		}
+	}
+    
+	if (nextDisposal == 3) {
+        memcpy(backup, bm, fGif->SWidth * fGif->SHeight * sizeof(argb));
+    }
+}
+
+static void reset(GifInfo *info) {
+	if (fileRewindFun(info) != 0) {
+        return;
+    }
+	info->nextStartTime = 0;
+	info->currentLoop = -1;
+	info->currentIndex = -1;
+}
+
+static void getBitmap(argb *bm, GifInfo *info) {
+	GifFileType* fGIF = info->gifFilePtr;
+    
+	argb paintingColor;
+	int i = info->currentIndex;
+	if (DDGifSlurp(fGIF, info, true) == GIF_ERROR) {
+        return;
+    }
+	SavedImage* cur = &fGIF->SavedImages[i];
+	int transpIndex = info->infos[i].transpIndex;
+	if (i == 0) {
+		if (transpIndex == -1) {
+            getColorFromTable(fGIF->SBackGroundColor, &paintingColor, fGIF->SColorMap);
+        } else {
+            packARGB32(&paintingColor, 0, 0, 0, 0);
+        }
+		eraseColor(bm, fGIF->SWidth, fGIF->SHeight, paintingColor);
+	} else {
+		disposeFrameIfNeeded(bm, info, i);
+	}
+	drawFrame(bm, fGIF->SWidth, fGIF->SHeight, cur, fGIF->SColorMap, transpIndex);
+}
+
+static void setMetaData(int width, int height, int ImageCount, int errorCode, JNIEnv *env, jintArray metaData) {
+	jint *const ints = (*env)->GetIntArrayElements(env, metaData, 0);
+	if (ints == NULL) {
+        return;
+    }
+	ints[0] = width;
+	ints[1] = height;
+	ints[2] = ImageCount;
+	ints[3] = errorCode;
+	(*env)->ReleaseIntArrayElements(env, metaData, ints, 0);
+}
+
+static jint open(GifFileType *GifFileIn, int Error, int startPos, JNIEnv *env, jintArray metaData) {
+	if (startPos < 0) {
+		Error = D_GIF_ERR_NOT_READABLE;
+		DGifCloseFile(GifFileIn);
+	}
+	if (Error != 0 || GifFileIn == NULL) {
+		setMetaData(0, 0, 0, Error, env, metaData);
+		return (jint) NULL;
+	}
+	int width = GifFileIn->SWidth, height = GifFileIn->SHeight;
+	unsigned int wxh = width * height;
+	if (wxh < 1 || wxh > INT_MAX) {
+		DGifCloseFile(GifFileIn);
+		setMetaData(width, height, 0, D_GIF_ERR_INVALID_SCR_DIMS, env, metaData);
+		return (jint) NULL;
+	}
+	GifInfo *info = malloc(sizeof(GifInfo));
+	if (info == NULL) {
+		DGifCloseFile(GifFileIn);
+		setMetaData(width, height, 0, D_GIF_ERR_NOT_ENOUGH_MEM, env, metaData);
+		return (jint) NULL;
+	}
+	info->gifFilePtr = GifFileIn;
+	info->startPos = startPos;
+	info->currentIndex = -1;
+	info->nextStartTime = 0;
+	info->lastFrameReaminder = ULONG_MAX;
+	info->comment = NULL;
+	info->loopCount = 0;
+	info->currentLoop = -1;
+	info->speedFactor = 1.0;
+	info->rasterBits = calloc(GifFileIn->SHeight * GifFileIn->SWidth, sizeof(GifPixelType));
+	info->infos = malloc(sizeof(FrameInfo));
+	info->backupPtr = NULL;
+    
+	if (info->rasterBits == NULL || info->infos == NULL) {
+		cleanUp(info);
+		setMetaData(width, height, 0, D_GIF_ERR_NOT_ENOUGH_MEM, env, metaData);
+		return (jint) NULL;
+	}
+	info->infos->duration = 0;
+	info->infos->disposalMethod = 0;
+	info->infos->transpIndex = -1;
+	if (GifFileIn->SColorMap == NULL || GifFileIn->SColorMap->ColorCount != (1 << GifFileIn->SColorMap->BitsPerPixel)) {
+		GifFreeMapObject(GifFileIn->SColorMap);
+		GifFileIn->SColorMap = defaultCmap;
+	}
+
+	DDGifSlurp(GifFileIn, info, false);
+
+	int imgCount = GifFileIn->ImageCount;
+
+	if (imgCount < 1) {
+        Error = D_GIF_ERR_NO_FRAMES;
+    }
+	if (fileRewindFun(info) != 0) {
+        Error = D_GIF_ERR_READ_FAILED;
+    }
+	if (Error != 0) {
+        cleanUp(info);
+    }
+	setMetaData(width, height, imgCount, Error, env, metaData);
+	return (jint)(Error == 0 ? info : NULL);
+}
+
+JNIEXPORT jlong JNICALL Java_org_telegram_ui_Components_GifDrawable_getAllocationByteCount(JNIEnv *env, jclass class, jobject gifInfo) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL) {
+        return 0;
+    }
+	unsigned int pxCount = info->gifFilePtr->SWidth + info->gifFilePtr->SHeight;
+	jlong sum = pxCount * sizeof(char);
+	if (info->backupPtr != NULL) {
+        sum += pxCount * sizeof(argb);
+    }
+	return sum;
+}
+
+JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_reset(JNIEnv *env, jclass class, jobject gifInfo) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL) {
+        return;
+    }
+	reset(info);
+}
+
+JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_setSpeedFactor(JNIEnv *env, jclass class, jobject gifInfo, jfloat factor) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL) {
+        return;
+    }
+	info->speedFactor = factor;
+}
+
+JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_seekToTime(JNIEnv *env, jclass class, jobject gifInfo, jint desiredPos, jintArray jPixels) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL || jPixels == NULL) {
+        return;
+    }
+	int imgCount = info->gifFilePtr->ImageCount;
+	if (imgCount <= 1) {
+        return;
+    }
+    
+	unsigned long sum = 0;
+	int i;
+	for (i = 0; i < imgCount; i++) {
+		unsigned long newSum = sum + info->infos[i].duration;
+		if (newSum >= desiredPos) {
+            break;
+        }
+		sum = newSum;
+	}
+	if (i < info->currentIndex) {
+        return;
+    }
+    
+	unsigned long lastFrameRemainder = desiredPos - sum;
+	if (i == imgCount - 1 && lastFrameRemainder > info->infos[i].duration) {
+        lastFrameRemainder = info->infos[i].duration;
+    }
+	if (i > info->currentIndex) {
+		jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
+		if (pixels == NULL) {
+            return;
+        }
+		while (info->currentIndex <= i) {
+			info->currentIndex++;
+			getBitmap((argb*) pixels, info);
+		}
+		(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
+	}
+	info->lastFrameReaminder = lastFrameRemainder;
+    
+	if (info->speedFactor == 1.0) {
+        info->nextStartTime = getRealTime() + lastFrameRemainder;
+	} else {
+        info->nextStartTime = getRealTime() + lastFrameRemainder * info->speedFactor;
+    }
+}
+
+JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_seekToFrame(JNIEnv *env, jclass class, jobject gifInfo, jint desiredIdx, jintArray jPixels) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL|| jPixels==NULL) {
+        return;
+    }
+	if (desiredIdx <= info->currentIndex) {
+        return;
+    }
+    
+	int imgCount = info->gifFilePtr->ImageCount;
+	if (imgCount <= 1) {
+        return;
+    }
+    
+	jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
+	if (pixels == NULL) {
+        return;
+    }
+    
+	info->lastFrameReaminder = 0;
+	if (desiredIdx >= imgCount) {
+        desiredIdx = imgCount - 1;
+    }
+    
+	while (info->currentIndex < desiredIdx) {
+		info->currentIndex++;
+		getBitmap((argb *) pixels, info);
+	}
+	(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
+	if (info->speedFactor == 1.0) {
+        info->nextStartTime = getRealTime() + info->infos[info->currentIndex].duration;
+    } else {
+        info->nextStartTime = getRealTime() + info->infos[info->currentIndex].duration * info->speedFactor;
+    }
+}
+
+JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_renderFrame(JNIEnv *env, jclass class, jintArray jPixels, jobject gifInfo, jintArray metaData) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL || jPixels == NULL) {
+        return;
+    }
+	bool needRedraw = false;
+	unsigned long rt = getRealTime();
+    
+	if (rt >= info->nextStartTime && info->currentLoop < info->loopCount) {
+		if (++info->currentIndex >= info->gifFilePtr->ImageCount) {
+            info->currentIndex = 0;
+        }
+		needRedraw = true;
+	}
+	jint *const rawMetaData = (*env)->GetIntArrayElements(env, metaData, 0);
+	if (rawMetaData == NULL) {
+        return;
+    }
+    
+ 	if (needRedraw) {
+		jint *const pixels = (*env)->GetIntArrayElements(env, jPixels, 0);
+		if (pixels == NULL) {
+		    (*env)->ReleaseIntArrayElements(env, metaData, rawMetaData, 0);
+		    return;
+		}
+		getBitmap((argb *)pixels, info);
+		rawMetaData[3] = info->gifFilePtr->Error;
+        
+		(*env)->ReleaseIntArrayElements(env, jPixels, pixels, 0);
+		unsigned int scaledDuration = info->infos[info->currentIndex].duration;
+		if (info->speedFactor != 1.0) {
+			scaledDuration /= info->speedFactor;
+			if (scaledDuration<=0) {
+                scaledDuration=1;
+            } else if (scaledDuration > INT_MAX) {
+                scaledDuration = INT_MAX;
+            }
+		}
+		info->nextStartTime = rt + scaledDuration;
+		rawMetaData[4] = scaledDuration;
+	} else {
+	    long delay = info->nextStartTime-rt;
+	    if (delay < 0) {
+            rawMetaData[4] = -1;
+        } else {
+            rawMetaData[4] = (int) delay;
+        }
+	}
+	(*env)->ReleaseIntArrayElements(env, metaData, rawMetaData, 0);
+}
+
+JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_free(JNIEnv *env, jclass class, jobject gifInfo) {
+	if (gifInfo == NULL) {
+        return;
+    }
+	GifInfo *info = (GifInfo *)gifInfo;
+    FILE *file = info->gifFilePtr->UserData;
+    if (file) {
+        fclose(file);
+    }
+	info->gifFilePtr->UserData = NULL;
+	cleanUp(info);
+}
+
+JNIEXPORT jstring JNICALL Java_org_telegram_ui_Components_GifDrawable_getComment(JNIEnv *env, jclass class, jobject gifInfo) {
+	if (gifInfo == NULL) {
+        return NULL;
+    }
+	GifInfo *info = (GifInfo *)gifInfo;
+	return (*env)->NewStringUTF(env, info->comment);
+}
+
+JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getLoopCount(JNIEnv *env, jclass class, jobject gifInfo) {
+	if (gifInfo == NULL) {
+        return 0;
+    }
+	return ((GifInfo *)gifInfo)->loopCount;
+}
+
+JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getDuration(JNIEnv *env, jclass class, jobject gifInfo) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL) {
+        return 0;
+    }
+	int i;
+	unsigned long sum = 0;
+	for (i = 0; i < info->gifFilePtr->ImageCount; i++) {
+        sum += info->infos[i].duration;
+    }
+	return sum;
+}
+
+JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_getCurrentPosition(JNIEnv *env, jclass class, jobject gifInfo) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL) {
+        return 0;
+    }
+	int idx = info->currentIndex;
+	if (idx < 0 || info->gifFilePtr->ImageCount <= 1) {
+        return 0;
+    }
+	int i;
+	unsigned int sum = 0;
+	for (i = 0; i < idx; i++) {
+        sum += info->infos[i].duration;
+    }
+	unsigned long remainder = info->lastFrameReaminder == ULONG_MAX ? getRealTime() - info->nextStartTime : info->lastFrameReaminder;
+	return (int) (sum + remainder);
+}
+
+JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_saveRemainder(JNIEnv *env, jclass class, jobject gifInfo) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL) {
+        return;
+    }
+	info->lastFrameReaminder = getRealTime() - info->nextStartTime;
+}
+
+JNIEXPORT void JNICALL Java_org_telegram_ui_Components_GifDrawable_restoreRemainder(JNIEnv *env, jclass class, jobject gifInfo) {
+	GifInfo *info = (GifInfo *)gifInfo;
+	if (info == NULL || info->lastFrameReaminder == ULONG_MAX) {
+        return;
+    }
+	info->nextStartTime = getRealTime() + info->lastFrameReaminder;
+	info->lastFrameReaminder = ULONG_MAX;
+}
+
+JNIEXPORT jint JNICALL Java_org_telegram_ui_Components_GifDrawable_openFile(JNIEnv *env, jclass class, jintArray metaData, jstring jfname) {
+	if (jfname == NULL) {
+		setMetaData(0, 0, 0, D_GIF_ERR_OPEN_FAILED, env, metaData);
+		return (jint) NULL;
+	}
+    
+	const char *const fname = (*env)->GetStringUTFChars(env, jfname, 0);
+	FILE *file = fopen(fname, "rb");
+	(*env)->ReleaseStringUTFChars(env, jfname, fname);
+	if (file == NULL) {
+		setMetaData(0, 0, 0, D_GIF_ERR_OPEN_FAILED, env, metaData);
+		return (jint) NULL;
+	}
+	int Error = 0;
+	GifFileType *GifFileIn = DGifOpen(file, &fileReadFunc, &Error);
+	return open(GifFileIn, Error, ftell(file), env, metaData);
+}
diff --git a/src/main/jni/gif.h b/src/main/jni/gif.h
new file mode 100644
index 000000000..96409be19
--- /dev/null
+++ b/src/main/jni/gif.h
@@ -0,0 +1,7 @@
+#ifndef gif_h
+#define gif_h
+
+jint gifOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env);
+void gifOnJNIUnload(JavaVM *vm, void *reserved);
+
+#endif
+\ No newline at end of file
diff --git a/src/main/jni/giflib/config.h b/src/main/jni/giflib/config.h
new file mode 100644
index 000000000..e68b0d9ee
--- /dev/null
+++ b/src/main/jni/giflib/config.h
@@ -0,0 +1,13 @@
+
+// giflib config.h
+
+#ifndef GIF_CONFIG_H_DEFINED
+#define GIF_CONFIG_H_DEFINED
+
+#include <sys/types.h>
+#define HAVE_STDINT_H
+#define HAVE_FCNTL_H
+
+typedef uint32_t UINT32;
+
+#endif
diff --git a/src/main/jni/giflib/dgif_lib.c b/src/main/jni/giflib/dgif_lib.c
new file mode 100644
index 000000000..92442408b
--- /dev/null
+++ b/src/main/jni/giflib/dgif_lib.c
@@ -0,0 +1,1167 @@
+/******************************************************************************
+
+dgif_lib.c - GIF decoding
+
+The functions here and in egif_lib.c are partitioned carefully so that
+if you only require one of read and write capability, only one of these
+two modules will be linked.  Preserve this property!
+
+*****************************************************************************/
+
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifdef _WIN32
+#include <io.h>
+#endif /* _WIN32 */
+
+#include "gif_lib.h"
+#include "gif_lib_private.h"
+
+/* compose unsigned little endian value */
+#define UNSIGNED_LITTLE_ENDIAN(lo, hi)	((lo) | ((hi) << 8))
+
+/* avoid extra function call in case we use fread (TVT) */
+#define READ(_gif,_buf,_len)                                     \
+  (((GifFilePrivateType*)_gif->Private)->Read ?                   \
+    ((GifFilePrivateType*)_gif->Private)->Read(_gif,_buf,_len) : \
+    fread(_buf,1,_len,((GifFilePrivateType*)_gif->Private)->File))
+
+static int DGifGetWord(GifFileType *GifFile, GifWord *Word);
+static int DGifSetupDecompress(GifFileType *GifFile);
+static int DGifDecompressLine(GifFileType *GifFile, GifPixelType *Line,
+                              int LineLen);
+static int DGifGetPrefixChar(GifPrefixType *Prefix, int Code, int ClearCode);
+static int DGifDecompressInput(GifFileType *GifFile, int *Code);
+static int DGifBufferedInput(GifFileType *GifFile, GifByteType *Buf,
+                             GifByteType *NextByte);
+
+/******************************************************************************
+ Open a new GIF file for read, given by its name.
+ Returns dynamically allocated GifFileType pointer which serves as the GIF
+ info record.
+******************************************************************************/
+GifFileType *
+DGifOpenFileName(const char *FileName, int *Error)
+{
+    int FileHandle;
+    GifFileType *GifFile;
+
+    if ((FileHandle = open(FileName, O_RDONLY)) == -1) {
+	if (Error != NULL)
+	    *Error = D_GIF_ERR_OPEN_FAILED;
+        return NULL;
+    }
+    GifFile = DGifOpenFileHandle(FileHandle, Error);
+    return GifFile;
+}
+
+/******************************************************************************
+ Update a new GIF file, given its file handle.
+ Returns dynamically allocated GifFileType pointer which serves as the GIF
+ info record.
+******************************************************************************/
+GifFileType *
+DGifOpenFileHandle(int FileHandle, int *Error)
+{
+    char Buf[GIF_STAMP_LEN + 1];
+    GifFileType *GifFile;
+    GifFilePrivateType *Private;
+    FILE *f;
+
+    GifFile = (GifFileType *)malloc(sizeof(GifFileType));
+    if (GifFile == NULL) {
+        if (Error != NULL)
+	    *Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+        (void)close(FileHandle);
+        return NULL;
+    }
+
+    /*@i1@*/memset(GifFile, '\0', sizeof(GifFileType));
+
+    /* Belt and suspenders, in case the null pointer isn't zero */
+    GifFile->SavedImages = NULL;
+    GifFile->SColorMap = NULL;
+
+    Private = (GifFilePrivateType *)malloc(sizeof(GifFilePrivateType));
+    if (Private == NULL) {
+        if (Error != NULL)
+	    *Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+        (void)close(FileHandle);
+        free((char *)GifFile);
+        return NULL;
+    }
+#ifdef _WIN32
+    _setmode(FileHandle, O_BINARY);    /* Make sure it is in binary mode. */
+#endif /* _WIN32 */
+
+    f = fdopen(FileHandle, "rb");    /* Make it into a stream: */
+
+    /*@-mustfreeonly@*/
+    GifFile->Private = (void *)Private;
+    Private->FileHandle = FileHandle;
+    Private->File = f;
+    Private->FileState = FILE_STATE_READ;
+    Private->Read = NULL;        /* don't use alternate input method (TVT) */
+    GifFile->UserData = NULL;    /* TVT */
+    /*@=mustfreeonly@*/
+
+    /* Let's see if this is a GIF file: */
+    if (READ(GifFile, (unsigned char *)Buf, GIF_STAMP_LEN) != GIF_STAMP_LEN) {
+        if (Error != NULL)
+	    *Error = D_GIF_ERR_READ_FAILED;
+        (void)fclose(f);
+        free((char *)Private);
+        free((char *)GifFile);
+        return NULL;
+    }
+
+    /* Check for GIF prefix at start of file */
+    Buf[GIF_STAMP_LEN] = 0;
+    if (strncmp(GIF_STAMP, Buf, GIF_VERSION_POS) != 0) {
+        if (Error != NULL)
+	    *Error = D_GIF_ERR_NOT_GIF_FILE;
+        (void)fclose(f);
+        free((char *)Private);
+        free((char *)GifFile);
+        return NULL;
+    }
+
+    if (DGifGetScreenDesc(GifFile) == GIF_ERROR) {
+        (void)fclose(f);
+        free((char *)Private);
+        free((char *)GifFile);
+        return NULL;
+    }
+
+    GifFile->Error = 0;
+
+    /* What version of GIF? */
+    Private->gif89 = (Buf[GIF_VERSION_POS] == '9');
+
+    return GifFile;
+}
+
+/******************************************************************************
+ GifFileType constructor with user supplied input function (TVT)
+******************************************************************************/
+GifFileType *
+DGifOpen(void *userData, InputFunc readFunc, int *Error)
+{
+    char Buf[GIF_STAMP_LEN + 1];
+    GifFileType *GifFile;
+    GifFilePrivateType *Private;
+
+    GifFile = (GifFileType *)malloc(sizeof(GifFileType));
+    if (GifFile == NULL) {
+        if (Error != NULL)
+	    *Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+        return NULL;
+    }
+
+    memset(GifFile, '\0', sizeof(GifFileType));
+
+    /* Belt and suspenders, in case the null pointer isn't zero */
+    GifFile->SavedImages = NULL;
+    GifFile->SColorMap = NULL;
+
+    Private = (GifFilePrivateType *)malloc(sizeof(GifFilePrivateType));
+    if (!Private) {
+        if (Error != NULL)
+	    *Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+        free((char *)GifFile);
+        return NULL;
+    }
+
+    GifFile->Private = (void *)Private;
+    Private->FileHandle = 0;
+    Private->File = NULL;
+    Private->FileState = FILE_STATE_READ;
+
+    Private->Read = readFunc;    /* TVT */
+    GifFile->UserData = userData;    /* TVT */
+
+    /* Lets see if this is a GIF file: */
+    if (READ(GifFile, (unsigned char *)Buf, GIF_STAMP_LEN) != GIF_STAMP_LEN) {
+        if (Error != NULL)
+	    *Error = D_GIF_ERR_READ_FAILED;
+        free((char *)Private);
+        free((char *)GifFile);
+        return NULL;
+    }
+
+    /* Check for GIF prefix at start of file */
+    Buf[GIF_STAMP_LEN] = '\0';
+    if (strncmp(GIF_STAMP, Buf, GIF_VERSION_POS) != 0) {
+        if (Error != NULL)
+	    *Error = D_GIF_ERR_NOT_GIF_FILE;
+        free((char *)Private);
+        free((char *)GifFile);
+        return NULL;
+    }
+
+    if (DGifGetScreenDesc(GifFile) == GIF_ERROR) {
+        free((char *)Private);
+        free((char *)GifFile);
+	*Error = D_GIF_ERR_NO_SCRN_DSCR;
+        return NULL;
+    }
+
+    GifFile->Error = 0;
+
+    /* What version of GIF? */
+    Private->gif89 = (Buf[GIF_VERSION_POS] == '9');
+
+    return GifFile;
+}
+
+/******************************************************************************
+ This routine should be called before any other DGif calls. Note that
+ this routine is called automatically from DGif file open routines.
+******************************************************************************/
+int
+DGifGetScreenDesc(GifFileType *GifFile)
+{
+    int BitsPerPixel;
+    bool SortFlag;
+    GifByteType Buf[3];
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+
+    /* Put the screen descriptor into the file: */
+    if (DGifGetWord(GifFile, &GifFile->SWidth) == GIF_ERROR ||
+        DGifGetWord(GifFile, &GifFile->SHeight) == GIF_ERROR)
+        return GIF_ERROR;
+
+    if (READ(GifFile, Buf, 3) != 3) {
+        GifFile->Error = D_GIF_ERR_READ_FAILED;
+	GifFreeMapObject(GifFile->SColorMap);
+	GifFile->SColorMap = NULL;
+        return GIF_ERROR;
+    }
+    GifFile->SColorResolution = (((Buf[0] & 0x70) + 1) >> 4) + 1;
+    SortFlag = (Buf[0] & 0x08) != 0;
+    BitsPerPixel = (Buf[0] & 0x07) + 1;
+    GifFile->SBackGroundColor = Buf[1];
+    GifFile->AspectByte = Buf[2]; 
+    if (Buf[0] & 0x80) {    /* Do we have global color map? */
+	int i;
+
+        GifFile->SColorMap = GifMakeMapObject(1 << BitsPerPixel, NULL);
+        if (GifFile->SColorMap == NULL) {
+            GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+            return GIF_ERROR;
+        }
+
+        /* Get the global color map: */
+	GifFile->SColorMap->SortFlag = SortFlag;
+        for (i = 0; i < GifFile->SColorMap->ColorCount; i++) {
+            if (READ(GifFile, Buf, 3) != 3) {
+                GifFreeMapObject(GifFile->SColorMap);
+                GifFile->SColorMap = NULL;
+                GifFile->Error = D_GIF_ERR_READ_FAILED;
+                return GIF_ERROR;
+            }
+            GifFile->SColorMap->Colors[i].Red = Buf[0];
+            GifFile->SColorMap->Colors[i].Green = Buf[1];
+            GifFile->SColorMap->Colors[i].Blue = Buf[2];
+        }
+    } else {
+        GifFile->SColorMap = NULL;
+    }
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ This routine should be called before any attempt to read an image.
+******************************************************************************/
+int
+DGifGetRecordType(GifFileType *GifFile, GifRecordType* Type)
+{
+    GifByteType Buf;
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+
+    if (READ(GifFile, &Buf, 1) != 1) {
+        GifFile->Error = D_GIF_ERR_READ_FAILED;
+        return GIF_ERROR;
+    }
+
+    switch (Buf) {
+      case DESCRIPTOR_INTRODUCER:
+          *Type = IMAGE_DESC_RECORD_TYPE;
+          break;
+      case EXTENSION_INTRODUCER:
+          *Type = EXTENSION_RECORD_TYPE;
+          break;
+      case TERMINATOR_INTRODUCER:
+          *Type = TERMINATE_RECORD_TYPE;
+          break;
+      default:
+          *Type = UNDEFINED_RECORD_TYPE;
+          GifFile->Error = D_GIF_ERR_WRONG_RECORD;
+          return GIF_ERROR;
+    }
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ This routine should be called before any attempt to read an image.
+ Note it is assumed the Image desc. header has been read.
+******************************************************************************/
+int
+DGifGetImageDesc(GifFileType *GifFile, bool changeImageCount)
+{
+    unsigned int BitsPerPixel;
+    GifByteType Buf[3];
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+    SavedImage *sp;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+
+    if (DGifGetWord(GifFile, &GifFile->Image.Left) == GIF_ERROR ||
+        DGifGetWord(GifFile, &GifFile->Image.Top) == GIF_ERROR ||
+        DGifGetWord(GifFile, &GifFile->Image.Width) == GIF_ERROR ||
+        DGifGetWord(GifFile, &GifFile->Image.Height) == GIF_ERROR)
+        return GIF_ERROR;
+    if (READ(GifFile, Buf, 1) != 1) {
+        GifFile->Error = D_GIF_ERR_READ_FAILED;
+	GifFreeMapObject(GifFile->Image.ColorMap);
+	GifFile->Image.ColorMap = NULL;
+        return GIF_ERROR;
+    }
+    BitsPerPixel = (Buf[0] & 0x07) + 1;
+    GifFile->Image.Interlace = (Buf[0] & 0x40) ? true : false;
+
+    /* Setup the colormap */
+    if (GifFile->Image.ColorMap) {
+        GifFreeMapObject(GifFile->Image.ColorMap);
+        GifFile->Image.ColorMap = NULL;
+    }
+    /* Does this image have local color map? */
+    if (Buf[0] & 0x80) {
+	unsigned int i;
+
+        GifFile->Image.ColorMap = GifMakeMapObject(1 << BitsPerPixel, NULL);
+        if (GifFile->Image.ColorMap == NULL) {
+            GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+            return GIF_ERROR;
+        }
+
+        /* Get the image local color map: */
+        for (i = 0; i < GifFile->Image.ColorMap->ColorCount; i++) {
+            if (READ(GifFile, Buf, 3) != 3) {
+                GifFreeMapObject(GifFile->Image.ColorMap);
+                GifFile->Error = D_GIF_ERR_READ_FAILED;
+                GifFile->Image.ColorMap = NULL;
+                return GIF_ERROR;
+            }
+            GifFile->Image.ColorMap->Colors[i].Red = Buf[0];
+            GifFile->Image.ColorMap->Colors[i].Green = Buf[1];
+            GifFile->Image.ColorMap->Colors[i].Blue = Buf[2];
+        }
+    }
+   // if (changeImageCount)
+    {
+		if (GifFile->SavedImages) {
+			if ((GifFile->SavedImages = (SavedImage *)realloc(GifFile->SavedImages,
+										  sizeof(SavedImage) *
+										  (GifFile->ImageCount + 1))) == NULL) {
+				GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+				return GIF_ERROR;
+			}
+		} else {
+			if ((GifFile->SavedImages =
+				 (SavedImage *) malloc(sizeof(SavedImage))) == NULL) {
+				GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+				return GIF_ERROR;
+			}
+		}
+    }
+    sp = &GifFile->SavedImages[GifFile->ImageCount];
+    memcpy(&sp->ImageDesc, &GifFile->Image, sizeof(GifImageDesc));
+    if (GifFile->Image.ColorMap != NULL) {
+        sp->ImageDesc.ColorMap = GifMakeMapObject(
+                                 GifFile->Image.ColorMap->ColorCount,
+                                 GifFile->Image.ColorMap->Colors);
+        if (sp->ImageDesc.ColorMap == NULL) {
+            GifFile->Error = D_GIF_ERR_NOT_ENOUGH_MEM;
+            return GIF_ERROR;
+        }
+    }
+    sp->RasterBits = (unsigned char *)NULL;
+    sp->ExtensionBlockCount = 0;
+    sp->ExtensionBlocks = (ExtensionBlock *) NULL;
+    if (changeImageCount)
+    	GifFile->ImageCount++;
+
+    Private->PixelCount = (long)GifFile->Image.Width *
+       (long)GifFile->Image.Height;
+
+    /* Reset decompress algorithm parameters. */
+    (void)DGifSetupDecompress(GifFile);
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ Get one full scanned line (Line) of length LineLen from GIF file.
+******************************************************************************/
+int
+DGifGetLine(GifFileType *GifFile, GifPixelType *Line, int LineLen)
+{
+    GifByteType *Dummy;
+    GifFilePrivateType *Private = (GifFilePrivateType *) GifFile->Private;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+
+    if (!LineLen)
+        LineLen = GifFile->Image.Width;
+
+    if ((Private->PixelCount -= LineLen) > 0xffff0000UL) {
+        GifFile->Error = D_GIF_ERR_DATA_TOO_BIG;
+        return GIF_ERROR;
+    }
+
+    if (DGifDecompressLine(GifFile, Line, LineLen) == GIF_OK) {
+        if (Private->PixelCount == 0) {
+            /* We probably won't be called any more, so let's clean up
+             * everything before we return: need to flush out all the
+             * rest of image until an empty block (size 0)
+             * detected. We use GetCodeNext.
+	     */
+            do
+                if (DGifGetCodeNext(GifFile, &Dummy) == GIF_ERROR)
+                    return GIF_ERROR;
+            while (Dummy != NULL) ;
+        }
+        return GIF_OK;
+    } else
+        return GIF_ERROR;
+}
+
+/******************************************************************************
+ Put one pixel (Pixel) into GIF file.
+******************************************************************************/
+int
+DGifGetPixel(GifFileType *GifFile, GifPixelType Pixel)
+{
+    GifByteType *Dummy;
+    GifFilePrivateType *Private = (GifFilePrivateType *) GifFile->Private;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+    if (--Private->PixelCount > 0xffff0000UL)
+    {
+        GifFile->Error = D_GIF_ERR_DATA_TOO_BIG;
+        return GIF_ERROR;
+    }
+
+    if (DGifDecompressLine(GifFile, &Pixel, 1) == GIF_OK) {
+        if (Private->PixelCount == 0) {
+            /* We probably won't be called any more, so let's clean up
+             * everything before we return: need to flush out all the
+             * rest of image until an empty block (size 0)
+             * detected. We use GetCodeNext.
+	     */
+            do
+                if (DGifGetCodeNext(GifFile, &Dummy) == GIF_ERROR)
+                    return GIF_ERROR;
+            while (Dummy != NULL) ;
+        }
+        return GIF_OK;
+    } else
+        return GIF_ERROR;
+}
+
+/******************************************************************************
+ Get an extension block (see GIF manual) from GIF file. This routine only
+ returns the first data block, and DGifGetExtensionNext should be called
+ after this one until NULL extension is returned.
+ The Extension should NOT be freed by the user (not dynamically allocated).
+ Note it is assumed the Extension description header has been read.
+******************************************************************************/
+int
+DGifGetExtension(GifFileType *GifFile, int *ExtCode, GifByteType **Extension)
+{
+    GifByteType Buf;
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+
+    if (READ(GifFile, &Buf, 1) != 1) {
+        GifFile->Error = D_GIF_ERR_READ_FAILED;
+        return GIF_ERROR;
+    }
+    *ExtCode = Buf;
+
+    return DGifGetExtensionNext(GifFile, Extension, ExtCode);
+}
+
+/******************************************************************************
+ Get a following extension block (see GIF manual) from GIF file. This
+ routine should be called until NULL Extension is returned.
+ The Extension should NOT be freed by the user (not dynamically allocated).
+******************************************************************************/
+int
+DGifGetExtensionNext(GifFileType *GifFile, GifByteType ** Extension, int* ExtCode)
+{
+    GifByteType Buf;
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    if (READ(GifFile, &Buf, 1) != 1) {
+        GifFile->Error = D_GIF_ERR_READ_FAILED;
+        return GIF_ERROR;
+    }
+    if (Buf > 0)
+    {
+    	if (*ExtCode==GRAPHICS_EXT_FUNC_CODE)
+    		Buf=4;
+        *Extension = Private->Buf;    /* Use private unused buffer. */
+        (*Extension)[0] = Buf;  /* Pascal strings notation (pos. 0 is len.). */
+	/* coverity[tainted_data] */
+        if (READ(GifFile, &((*Extension)[1]), Buf) != Buf) {
+            GifFile->Error = D_GIF_ERR_READ_FAILED;
+            return GIF_ERROR;
+        }
+    } else
+        *Extension = NULL;
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ Extract a Graphics Control Block from raw extension data
+******************************************************************************/
+
+int DGifExtensionToGCB(const size_t GifExtensionLength,
+		       const GifByteType *GifExtension,
+		       GraphicsControlBlock *GCB)
+{
+    if (GifExtensionLength != 4) {
+	return GIF_ERROR;
+    }
+
+    GCB->DisposalMode = (GifExtension[0] >> 2) & 0x07;
+    GCB->UserInputFlag = (GifExtension[0] & 0x02) != 0;
+    GCB->DelayTime = UNSIGNED_LITTLE_ENDIAN(GifExtension[1], GifExtension[2]);
+    if (GifExtension[0] & 0x01)
+	GCB->TransparentColor = (int)GifExtension[3];
+    else
+	GCB->TransparentColor = NO_TRANSPARENT_COLOR;
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ Extract the Graphics Control Block for a saved image, if it exists.
+******************************************************************************/
+
+int DGifSavedExtensionToGCB(GifFileType *GifFile,
+			    int ImageIndex, GraphicsControlBlock *GCB)
+{
+    int i;
+
+    if (ImageIndex < 0 || ImageIndex > GifFile->ImageCount - 1)
+	return GIF_ERROR;
+
+    GCB->DisposalMode = DISPOSAL_UNSPECIFIED;
+    GCB->UserInputFlag = false;
+    GCB->DelayTime = 0;
+    GCB->TransparentColor = NO_TRANSPARENT_COLOR;
+
+    for (i = 0; i < GifFile->SavedImages[ImageIndex].ExtensionBlockCount; i++) {
+	ExtensionBlock *ep = &GifFile->SavedImages[ImageIndex].ExtensionBlocks[i];
+	if (ep->Function == GRAPHICS_EXT_FUNC_CODE)
+	    return DGifExtensionToGCB(ep->ByteCount, ep->Bytes, GCB);
+    }
+
+    return GIF_ERROR;
+}
+
+/******************************************************************************
+ This routine should be called last, to close the GIF file.
+******************************************************************************/
+int
+DGifCloseFile(GifFileType *GifFile)
+{
+    GifFilePrivateType *Private;
+
+    if (GifFile == NULL || GifFile->Private == NULL)
+        return GIF_ERROR;
+
+    if (GifFile->Image.ColorMap) {
+        GifFreeMapObject(GifFile->Image.ColorMap);
+        GifFile->Image.ColorMap = NULL;
+    }
+
+    if (GifFile->SColorMap) {
+        GifFreeMapObject(GifFile->SColorMap);
+        GifFile->SColorMap = NULL;
+    }
+
+    if (GifFile->SavedImages) {
+        GifFreeSavedImages(GifFile);
+        GifFile->SavedImages = NULL;
+    }
+
+    GifFreeExtensions(&GifFile->ExtensionBlockCount, &GifFile->ExtensionBlocks);
+
+    Private = (GifFilePrivateType *) GifFile->Private;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+
+    if (Private->File && (fclose(Private->File) != 0)) {
+        GifFile->Error = D_GIF_ERR_CLOSE_FAILED;
+        return GIF_ERROR;
+    }
+
+    free((char *)GifFile->Private);
+
+    /* 
+     * Without the #ifndef, we get spurious warnings because Coverity mistakenly
+     * thinks the GIF structure is freed on an error return. 
+     */
+#ifndef __COVERITY__
+    free(GifFile);
+#endif /* __COVERITY__ */
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ Get 2 bytes (word) from the given file:
+******************************************************************************/
+static int
+DGifGetWord(GifFileType *GifFile, GifWord *Word)
+{
+    unsigned char c[2];
+
+    if (READ(GifFile, c, 2) != 2) {
+        GifFile->Error = D_GIF_ERR_READ_FAILED;
+        return GIF_ERROR;
+    }
+
+    *Word = (GifWord)UNSIGNED_LITTLE_ENDIAN(c[0], c[1]);
+    return GIF_OK;
+}
+
+/******************************************************************************
+ Get the image code in compressed form.  This routine can be called if the
+ information needed to be piped out as is. Obviously this is much faster
+ than decoding and encoding again. This routine should be followed by calls
+ to DGifGetCodeNext, until NULL block is returned.
+ The block should NOT be freed by the user (not dynamically allocated).
+******************************************************************************/
+int
+DGifGetCode(GifFileType *GifFile, int *CodeSize, GifByteType **CodeBlock)
+{
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+
+    *CodeSize = Private->BitsPerPixel;
+
+    return DGifGetCodeNext(GifFile, CodeBlock);
+}
+
+/******************************************************************************
+ Continue to get the image code in compressed form. This routine should be
+ called until NULL block is returned.
+ The block should NOT be freed by the user (not dynamically allocated).
+******************************************************************************/
+int
+DGifGetCodeNext(GifFileType *GifFile, GifByteType **CodeBlock)
+{
+    GifByteType Buf;
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    /* coverity[tainted_data_argument] */
+    if (READ(GifFile, &Buf, 1) != 1) {
+        GifFile->Error = D_GIF_ERR_READ_FAILED;
+        return GIF_ERROR;
+    }
+
+    /* coverity[lower_bounds] */
+    if (Buf > 0) {
+        *CodeBlock = Private->Buf;    /* Use private unused buffer. */
+        (*CodeBlock)[0] = Buf;  /* Pascal strings notation (pos. 0 is len.). */
+	/* coverity[tainted_data] */
+        if (READ(GifFile, &((*CodeBlock)[1]), Buf) != Buf) {
+            GifFile->Error = D_GIF_ERR_READ_FAILED;
+            return GIF_ERROR;
+        }
+    } else {
+        *CodeBlock = NULL;
+        Private->Buf[0] = 0;    /* Make sure the buffer is empty! */
+        Private->PixelCount = 0;    /* And local info. indicate image read. */
+    }
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ Setup the LZ decompression for this image:
+******************************************************************************/
+static int
+DGifSetupDecompress(GifFileType *GifFile)
+{
+    int i, BitsPerPixel;
+    GifByteType CodeSize;
+    GifPrefixType *Prefix;
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    READ(GifFile, &CodeSize, 1);    /* Read Code size from file. */
+    BitsPerPixel = CodeSize;
+
+    Private->Buf[0] = 0;    /* Input Buffer empty. */
+    Private->BitsPerPixel = BitsPerPixel;
+    Private->ClearCode = (1 << BitsPerPixel);
+    Private->EOFCode = Private->ClearCode + 1;
+    Private->RunningCode = Private->EOFCode + 1;
+    Private->RunningBits = BitsPerPixel + 1;    /* Number of bits per code. */
+    Private->MaxCode1 = 1 << Private->RunningBits;    /* Max. code + 1. */
+    Private->StackPtr = 0;    /* No pixels on the pixel stack. */
+    Private->LastCode = NO_SUCH_CODE;
+    Private->CrntShiftState = 0;    /* No information in CrntShiftDWord. */
+    Private->CrntShiftDWord = 0;
+
+    Prefix = Private->Prefix;
+    for (i = 0; i <= LZ_MAX_CODE; i++)
+        Prefix[i] = NO_SUCH_CODE;
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ The LZ decompression routine:
+ This version decompress the given GIF file into Line of length LineLen.
+ This routine can be called few times (one per scan line, for example), in
+ order the complete the whole image.
+******************************************************************************/
+static int
+DGifDecompressLine(GifFileType *GifFile, GifPixelType *Line, int LineLen)
+{
+    int i = 0;
+    int j, CrntCode, EOFCode, ClearCode, CrntPrefix, LastCode, StackPtr;
+    GifByteType *Stack, *Suffix;
+    GifPrefixType *Prefix;
+    GifFilePrivateType *Private = (GifFilePrivateType *) GifFile->Private;
+
+    StackPtr = Private->StackPtr;
+    Prefix = Private->Prefix;
+    Suffix = Private->Suffix;
+    Stack = Private->Stack;
+    EOFCode = Private->EOFCode;
+    ClearCode = Private->ClearCode;
+    LastCode = Private->LastCode;
+
+    if (StackPtr > LZ_MAX_CODE) {
+        return GIF_ERROR;
+    }
+
+    if (StackPtr != 0) {
+        /* Let pop the stack off before continueing to read the GIF file: */
+        while (StackPtr != 0 && i < LineLen)
+            Line[i++] = Stack[--StackPtr];
+    }
+
+    while (i < LineLen) {    /* Decode LineLen items. */
+        if (DGifDecompressInput(GifFile, &CrntCode) == GIF_ERROR)
+            return GIF_ERROR;
+
+        if (CrntCode == EOFCode) {
+            /* Note however that usually we will not be here as we will stop
+             * decoding as soon as we got all the pixel, or EOF code will
+             * not be read at all, and DGifGetLine/Pixel clean everything.  */
+	    GifFile->Error = D_GIF_ERR_EOF_TOO_SOON;
+	    return GIF_ERROR;
+        } else if (CrntCode == ClearCode) {
+            /* We need to start over again: */
+            for (j = 0; j <= LZ_MAX_CODE; j++)
+                Prefix[j] = NO_SUCH_CODE;
+            Private->RunningCode = Private->EOFCode + 1;
+            Private->RunningBits = Private->BitsPerPixel + 1;
+            Private->MaxCode1 = 1 << Private->RunningBits;
+            LastCode = Private->LastCode = NO_SUCH_CODE;
+        } else {
+            /* Its regular code - if in pixel range simply add it to output
+             * stream, otherwise trace to codes linked list until the prefix
+             * is in pixel range: */
+            if (CrntCode < ClearCode) {
+                /* This is simple - its pixel scalar, so add it to output: */
+                Line[i++] = CrntCode;
+            } else {
+                /* Its a code to needed to be traced: trace the linked list
+                 * until the prefix is a pixel, while pushing the suffix
+                 * pixels on our stack. If we done, pop the stack in reverse
+                 * (thats what stack is good for!) order to output.  */
+                if (Prefix[CrntCode] == NO_SUCH_CODE) {
+                    /* Only allowed if CrntCode is exactly the running code:
+                     * In that case CrntCode = XXXCode, CrntCode or the
+                     * prefix code is last code and the suffix char is
+                     * exactly the prefix of last code! */
+                    if (CrntCode == Private->RunningCode - 2) {
+                        CrntPrefix = LastCode;
+                        Suffix[Private->RunningCode - 2] =
+                           Stack[StackPtr++] = DGifGetPrefixChar(Prefix,
+                                                                 LastCode,
+                                                                 ClearCode);
+                    } else {
+                        GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
+                        return GIF_ERROR;
+                    }
+                } else
+                    CrntPrefix = CrntCode;
+
+                /* Now (if image is O.K.) we should not get a NO_SUCH_CODE
+                 * during the trace. As we might loop forever, in case of
+                 * defective image, we use StackPtr as loop counter and stop
+                 * before overflowing Stack[]. */
+                while (StackPtr < LZ_MAX_CODE &&
+                       CrntPrefix > ClearCode && CrntPrefix <= LZ_MAX_CODE) {
+                    Stack[StackPtr++] = Suffix[CrntPrefix];
+                    CrntPrefix = Prefix[CrntPrefix];
+                }
+                if (StackPtr >= LZ_MAX_CODE || CrntPrefix > LZ_MAX_CODE) {
+                    GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
+                    return GIF_ERROR;
+                }
+                /* Push the last character on stack: */
+                Stack[StackPtr++] = CrntPrefix;
+
+                /* Now lets pop all the stack into output: */
+                while (StackPtr != 0 && i < LineLen)
+                    Line[i++] = Stack[--StackPtr];
+            }
+            if (LastCode != NO_SUCH_CODE && Prefix[Private->RunningCode - 2] == NO_SUCH_CODE) {
+                Prefix[Private->RunningCode - 2] = LastCode;
+
+                if (CrntCode == Private->RunningCode - 2) {
+                    /* Only allowed if CrntCode is exactly the running code:
+                     * In that case CrntCode = XXXCode, CrntCode or the
+                     * prefix code is last code and the suffix char is
+                     * exactly the prefix of last code! */
+                    Suffix[Private->RunningCode - 2] =
+                       DGifGetPrefixChar(Prefix, LastCode, ClearCode);
+                } else {
+                    Suffix[Private->RunningCode - 2] =
+                       DGifGetPrefixChar(Prefix, CrntCode, ClearCode);
+                }
+            }
+            LastCode = CrntCode;
+        }
+    }
+
+    Private->LastCode = LastCode;
+    Private->StackPtr = StackPtr;
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ Routine to trace the Prefixes linked list until we get a prefix which is
+ not code, but a pixel value (less than ClearCode). Returns that pixel value.
+ If image is defective, we might loop here forever, so we limit the loops to
+ the maximum possible if image O.k. - LZ_MAX_CODE times.
+******************************************************************************/
+static int
+DGifGetPrefixChar(GifPrefixType *Prefix, int Code, int ClearCode)
+{
+    int i = 0;
+
+    while (Code > ClearCode && i++ <= LZ_MAX_CODE) {
+        if (Code > LZ_MAX_CODE) {
+            return NO_SUCH_CODE;
+        }
+        Code = Prefix[Code];
+    }
+    return Code;
+}
+
+/******************************************************************************
+ Interface for accessing the LZ codes directly. Set Code to the real code
+ (12bits), or to -1 if EOF code is returned.
+******************************************************************************/
+int
+DGifGetLZCodes(GifFileType *GifFile, int *Code)
+{
+    GifByteType *CodeBlock;
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    if (!IS_READABLE(Private)) {
+        /* This file was NOT open for reading: */
+        GifFile->Error = D_GIF_ERR_NOT_READABLE;
+        return GIF_ERROR;
+    }
+
+    if (DGifDecompressInput(GifFile, Code) == GIF_ERROR)
+        return GIF_ERROR;
+
+    if (*Code == Private->EOFCode) {
+        /* Skip rest of codes (hopefully only NULL terminating block): */
+        do {
+            if (DGifGetCodeNext(GifFile, &CodeBlock) == GIF_ERROR)
+                return GIF_ERROR;
+        } while (CodeBlock != NULL) ;
+
+        *Code = -1;
+    } else if (*Code == Private->ClearCode) {
+        /* We need to start over again: */
+        Private->RunningCode = Private->EOFCode + 1;
+        Private->RunningBits = Private->BitsPerPixel + 1;
+        Private->MaxCode1 = 1 << Private->RunningBits;
+    }
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ The LZ decompression input routine:
+ This routine is responsable for the decompression of the bit stream from
+ 8 bits (bytes) packets, into the real codes.
+ Returns GIF_OK if read successfully.
+******************************************************************************/
+static int
+DGifDecompressInput(GifFileType *GifFile, int *Code)
+{
+    static const unsigned short CodeMasks[] = {
+	0x0000, 0x0001, 0x0003, 0x0007,
+	0x000f, 0x001f, 0x003f, 0x007f,
+	0x00ff, 0x01ff, 0x03ff, 0x07ff,
+	0x0fff
+    };
+
+    GifFilePrivateType *Private = (GifFilePrivateType *)GifFile->Private;
+
+    GifByteType NextByte;
+
+    /* The image can't contain more than LZ_BITS per code. */
+    if (Private->RunningBits > LZ_BITS) {
+        GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
+        return GIF_ERROR;
+    }
+    
+    while (Private->CrntShiftState < Private->RunningBits) {
+        /* Needs to get more bytes from input stream for next code: */
+        if (DGifBufferedInput(GifFile, Private->Buf, &NextByte) == GIF_ERROR) {
+            return GIF_ERROR;
+        }
+        Private->CrntShiftDWord |=
+	    ((unsigned long)NextByte) << Private->CrntShiftState;
+        Private->CrntShiftState += 8;
+    }
+    *Code = Private->CrntShiftDWord & CodeMasks[Private->RunningBits];
+
+    Private->CrntShiftDWord >>= Private->RunningBits;
+    Private->CrntShiftState -= Private->RunningBits;
+
+    /* If code cannot fit into RunningBits bits, must raise its size. Note
+     * however that codes above 4095 are used for special signaling.
+     * If we're using LZ_BITS bits already and we're at the max code, just
+     * keep using the table as it is, don't increment Private->RunningCode.
+     */
+    if (Private->RunningCode < LZ_MAX_CODE + 2 &&
+	++Private->RunningCode > Private->MaxCode1 &&
+	Private->RunningBits < LZ_BITS) {
+        Private->MaxCode1 <<= 1;
+        Private->RunningBits++;
+    }
+    return GIF_OK;
+}
+
+/******************************************************************************
+ This routines read one GIF data block at a time and buffers it internally
+ so that the decompression routine could access it.
+ The routine returns the next byte from its internal buffer (or read next
+ block in if buffer empty) and returns GIF_OK if succesful.
+******************************************************************************/
+static int
+DGifBufferedInput(GifFileType *GifFile, GifByteType *Buf, GifByteType *NextByte)
+{
+    if (Buf[0] == 0) {
+        /* Needs to read the next buffer - this one is empty: */
+        if (READ(GifFile, Buf, 1) != 1) {
+            GifFile->Error = D_GIF_ERR_READ_FAILED;
+            return GIF_ERROR;
+        }
+        /* There shouldn't be any empty data blocks here as the LZW spec
+         * says the LZW termination code should come first.  Therefore we
+         * shouldn't be inside this routine at that point.
+         */
+        if (Buf[0] == 0) {
+            GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
+            return GIF_ERROR;
+        }
+        /* There shouldn't be any empty data blocks here as the LZW spec
+         * says the LZW termination code should come first.  Therefore we
+         * shouldn't be inside this routine at that point.
+         */
+        if (Buf[0] == 0) {
+            GifFile->Error = D_GIF_ERR_IMAGE_DEFECT;
+            return GIF_ERROR;
+        }
+        if (READ(GifFile, &Buf[1], Buf[0]) != Buf[0]) {
+            GifFile->Error = D_GIF_ERR_READ_FAILED;
+            return GIF_ERROR;
+        }
+        *NextByte = Buf[1];
+        Buf[1] = 2;    /* We use now the second place as last char read! */
+        Buf[0]--;
+    } else {
+        *NextByte = Buf[Buf[1]++];
+        Buf[0]--;
+    }
+
+    return GIF_OK;
+}
+
+/******************************************************************************
+ This routine reads an entire GIF into core, hanging all its state info off
+ the GifFileType pointer.  Call DGifOpenFileName() or DGifOpenFileHandle()
+ first to initialize I/O.  Its inverse is EGifSpew().
+
+int
+DGifSlurp(GifFileType *GifFile)
+{
+    size_t ImageSize;
+    GifRecordType RecordType;
+    SavedImage *sp;
+    GifByteType *ExtData;
+    int ExtFunction;
+
+    GifFile->ExtensionBlocks = NULL;
+    GifFile->ExtensionBlockCount = 0;
+
+    do {
+        if (DGifGetRecordType(GifFile, &RecordType) == GIF_ERROR)
+            return (GIF_ERROR);
+        switch (RecordType) {
+          case IMAGE_DESC_RECORD_TYPE:
+              if (DGifGetImageDesc(GifFile) == GIF_ERROR)
+                  return (GIF_ERROR);
+
+              sp = &GifFile->SavedImages[GifFile->ImageCount - 1];
+              if (GifFile->ExtensionBlocks) {
+                  sp->ExtensionBlocks = GifFile->ExtensionBlocks;
+                  sp->ExtensionBlockCount = GifFile->ExtensionBlockCount;
+
+                  GifFile->ExtensionBlocks = NULL;
+                  GifFile->ExtensionBlockCount = 0;
+              }
+              // Allocate memory for the image
+              if (sp->ImageDesc.Width < 0 && sp->ImageDesc.Height < 0 &&
+                      sp->ImageDesc.Width > (INT_MAX / sp->ImageDesc.Height)) {
+                  return GIF_ERROR;
+              }
+              ImageSize = sp->ImageDesc.Width * sp->ImageDesc.Height;
+
+              if (ImageSize > (SIZE_MAX / sizeof(GifPixelType))) {
+                  return GIF_ERROR;
+              }
+              sp->RasterBits = (unsigned char *)malloc(ImageSize *
+                      sizeof(GifPixelType));
+
+              if (sp->RasterBits == NULL) {
+                  return GIF_ERROR;
+              }
+
+	      if (sp->ImageDesc.Interlace)
+	      {
+			  int i, j;
+			   // The way an interlaced image should be read -				* offsets and jumps...
+
+			  int InterlacedOffset[] = { 0, 4, 2, 1 };
+			  int InterlacedJumps[] = { 8, 8, 4, 2 };
+			  // Need to perform 4 passes on the image
+			  for (i = 0; i < 4; i++)
+				  for (j = InterlacedOffset[i];
+				   j < sp->ImageDesc.Height;
+				   j += InterlacedJumps[i]) {
+				  if (DGifGetLine(GifFile,
+						  sp->RasterBits+j*sp->ImageDesc.Width,
+						  sp->ImageDesc.Width) == GIF_ERROR)
+					  return GIF_ERROR;
+				  }
+	      }
+	      else
+	      {
+		  if (DGifGetLine(GifFile,sp->RasterBits,ImageSize)==GIF_ERROR)
+		      return (GIF_ERROR);
+	      }
+          break;
+
+          case EXTENSION_RECORD_TYPE:
+              if (DGifGetExtension(GifFile,&ExtFunction,&ExtData) == GIF_ERROR)
+                  return (GIF_ERROR);
+	      // Create an extension block with our data
+	      if (GifAddExtensionBlock(&GifFile->ExtensionBlockCount,
+				       &GifFile->ExtensionBlocks, 
+				       ExtFunction, ExtData[0], &ExtData[1])
+		  == GIF_ERROR)
+		  return (GIF_ERROR);
+              while (ExtData != NULL) {
+                  if (DGifGetExtensionNext(GifFile, &ExtData, &ExtFunction) == GIF_ERROR)
+                      return (GIF_ERROR);
+                  // Continue the extension block /
+		  if (ExtData != NULL)
+		      if (GifAddExtensionBlock(&GifFile->ExtensionBlockCount,
+					       &GifFile->ExtensionBlocks,
+					       CONTINUE_EXT_FUNC_CODE, 
+					       ExtData[0], &ExtData[1]) == GIF_ERROR)
+                      return (GIF_ERROR);
+              }
+              break;
+
+          case TERMINATE_RECORD_TYPE:
+              break;
+
+          default:    // Should be trapped by DGifGetRecordType
+              break;
+        }
+    } while (RecordType != TERMINATE_RECORD_TYPE);
+
+    return (GIF_OK);
+}
+
+ end */
diff --git a/src/main/jni/giflib/gif_hash.c b/src/main/jni/giflib/gif_hash.c
new file mode 100644
index 000000000..61a4d139c
--- /dev/null
+++ b/src/main/jni/giflib/gif_hash.c
@@ -0,0 +1,132 @@
+/*****************************************************************************
+
+gif_hash.c -- module to support the following operations:
+
+1. InitHashTable - initialize hash table.
+2. ClearHashTable - clear the hash table to an empty state.
+2. InsertHashTable - insert one item into data structure.
+3. ExistsHashTable - test if item exists in data structure.
+
+This module is used to hash the GIF codes during encoding.
+
+*****************************************************************************/
+
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gif_lib.h"
+#include "gif_hash.h"
+#include "gif_lib_private.h"
+
+/* #define  DEBUG_HIT_RATE    Debug number of misses per hash Insert/Exists. */
+
+#ifdef	DEBUG_HIT_RATE
+static long NumberOfTests = 0,
+	    NumberOfMisses = 0;
+#endif	/* DEBUG_HIT_RATE */
+
+static int KeyItem(uint32_t Item);
+
+/******************************************************************************
+ Initialize HashTable - allocate the memory needed and clear it.	      *
+******************************************************************************/
+GifHashTableType *_InitHashTable(void)
+{
+    GifHashTableType *HashTable;
+
+    if ((HashTable = (GifHashTableType *) malloc(sizeof(GifHashTableType)))
+	== NULL)
+	return NULL;
+
+    _ClearHashTable(HashTable);
+
+    return HashTable;
+}
+
+/******************************************************************************
+ Routine to clear the HashTable to an empty state.			      *
+ This part is a little machine depended. Use the commented part otherwise.   *
+******************************************************************************/
+void _ClearHashTable(GifHashTableType *HashTable)
+{
+    memset(HashTable -> HTable, 0xFF, HT_SIZE * sizeof(uint32_t));
+}
+
+/******************************************************************************
+ Routine to insert a new Item into the HashTable. The data is assumed to be  *
+ new one.								      *
+******************************************************************************/
+void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code)
+{
+    int HKey = KeyItem(Key);
+    uint32_t *HTable = HashTable -> HTable;
+
+#ifdef DEBUG_HIT_RATE
+	NumberOfTests++;
+	NumberOfMisses++;
+#endif /* DEBUG_HIT_RATE */
+
+    while (HT_GET_KEY(HTable[HKey]) != 0xFFFFFL) {
+#ifdef DEBUG_HIT_RATE
+	    NumberOfMisses++;
+#endif /* DEBUG_HIT_RATE */
+	HKey = (HKey + 1) & HT_KEY_MASK;
+    }
+    HTable[HKey] = HT_PUT_KEY(Key) | HT_PUT_CODE(Code);
+}
+
+/******************************************************************************
+ Routine to test if given Key exists in HashTable and if so returns its code *
+ Returns the Code if key was found, -1 if not.				      *
+******************************************************************************/
+int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key)
+{
+    int HKey = KeyItem(Key);
+    uint32_t *HTable = HashTable -> HTable, HTKey;
+
+#ifdef DEBUG_HIT_RATE
+	NumberOfTests++;
+	NumberOfMisses++;
+#endif /* DEBUG_HIT_RATE */
+
+    while ((HTKey = HT_GET_KEY(HTable[HKey])) != 0xFFFFFL) {
+#ifdef DEBUG_HIT_RATE
+	    NumberOfMisses++;
+#endif /* DEBUG_HIT_RATE */
+	if (Key == HTKey) return HT_GET_CODE(HTable[HKey]);
+	HKey = (HKey + 1) & HT_KEY_MASK;
+    }
+
+    return -1;
+}
+
+/******************************************************************************
+ Routine to generate an HKey for the hashtable out of the given unique key.  *
+ The given Key is assumed to be 20 bits as follows: lower 8 bits are the     *
+ new postfix character, while the upper 12 bits are the prefix code.	      *
+ Because the average hit ratio is only 2 (2 hash references per entry),      *
+ evaluating more complex keys (such as twin prime keys) does not worth it!   *
+******************************************************************************/
+static int KeyItem(uint32_t Item)
+{
+    return ((Item >> 12) ^ Item) & HT_KEY_MASK;
+}
+
+#ifdef	DEBUG_HIT_RATE
+/******************************************************************************
+ Debugging routine to print the hit ratio - number of times the hash table   *
+ was tested per operation. This routine was used to test the KeyItem routine *
+******************************************************************************/
+void HashTablePrintHitRatio(void)
+{
+    printf("Hash Table Hit Ratio is %ld/%ld = %ld%%.\n",
+	NumberOfMisses, NumberOfTests,
+	NumberOfMisses * 100 / NumberOfTests);
+}
+#endif	/* DEBUG_HIT_RATE */
+
+/* end */
diff --git a/src/main/jni/giflib/gif_hash.h b/src/main/jni/giflib/gif_hash.h
new file mode 100644
index 000000000..ac20a43cb
--- /dev/null
+++ b/src/main/jni/giflib/gif_hash.h
@@ -0,0 +1,39 @@
+/******************************************************************************
+
+gif_hash.h - magfic constants and declarations for GIF LZW
+
+******************************************************************************/
+
+#ifndef _GIF_HASH_H_
+#define _GIF_HASH_H_
+
+#include <unistd.h>
+#include <stdint.h>
+
+#define HT_SIZE			8192	   /* 12bits = 4096 or twice as big! */
+#define HT_KEY_MASK		0x1FFF			      /* 13bits keys */
+#define HT_KEY_NUM_BITS		13			      /* 13bits keys */
+#define HT_MAX_KEY		8191	/* 13bits - 1, maximal code possible */
+#define HT_MAX_CODE		4095	/* Biggest code possible in 12 bits. */
+
+/* The 32 bits of the long are divided into two parts for the key & code:   */
+/* 1. The code is 12 bits as our compression algorithm is limited to 12bits */
+/* 2. The key is 12 bits Prefix code + 8 bit new char or 20 bits.	    */
+/* The key is the upper 20 bits.  The code is the lower 12. */
+#define HT_GET_KEY(l)	(l >> 12)
+#define HT_GET_CODE(l)	(l & 0x0FFF)
+#define HT_PUT_KEY(l)	(l << 12)
+#define HT_PUT_CODE(l)	(l & 0x0FFF)
+
+typedef struct GifHashTableType {
+    uint32_t HTable[HT_SIZE];
+} GifHashTableType;
+
+GifHashTableType *_InitHashTable(void);
+void _ClearHashTable(GifHashTableType *HashTable);
+void _InsertHashTable(GifHashTableType *HashTable, uint32_t Key, int Code);
+int _ExistsHashTable(GifHashTableType *HashTable, uint32_t Key);
+
+#endif /* _GIF_HASH_H_ */
+
+/* end */
diff --git a/src/main/jni/giflib/gif_lib.h b/src/main/jni/giflib/gif_lib.h
new file mode 100644
index 000000000..b7aa9301d
--- /dev/null
+++ b/src/main/jni/giflib/gif_lib.h
@@ -0,0 +1,307 @@
+/******************************************************************************
+ 
+gif_lib.h - service library for decoding and encoding GIF images
+                                                                             
+*****************************************************************************/
+
+#ifndef _GIF_LIB_H_
+#define _GIF_LIB_H_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#define GIFLIB_MAJOR 5
+#define GIFLIB_MINOR 0
+#define GIFLIB_RELEASE 5
+
+#define GIF_ERROR   0
+#define GIF_OK      1
+
+#include <stddef.h>
+#include <stdbool.h>
+
+#define GIF_STAMP "GIFVER"          /* First chars in file - GIF stamp.  */
+#define GIF_STAMP_LEN sizeof(GIF_STAMP) - 1
+#define GIF_VERSION_POS 3           /* Version first character in stamp. */
+#define GIF87_STAMP "GIF87a"        /* First chars in file - GIF stamp.  */
+#define GIF89_STAMP "GIF89a"        /* First chars in file - GIF stamp.  */
+
+typedef unsigned char GifPixelType;
+typedef unsigned char *GifRowType;
+typedef unsigned char GifByteType;
+typedef unsigned int GifPrefixType;
+typedef int GifWord;
+
+typedef struct GifColorType {
+    GifByteType Red, Green, Blue;
+} GifColorType;
+
+typedef struct ColorMapObject {
+    int ColorCount;
+    int BitsPerPixel;
+    bool SortFlag;
+    GifColorType *Colors;    /* on malloc(3) heap */
+} ColorMapObject;
+
+typedef struct GifImageDesc {
+    GifWord Left, Top, Width, Height;   /* Current image dimensions. */
+    bool Interlace;                     /* Sequential/Interlaced lines. */
+    ColorMapObject *ColorMap;           /* The local color map */
+} GifImageDesc;
+
+typedef struct ExtensionBlock {
+    int ByteCount;
+    GifByteType *Bytes; /* on malloc(3) heap */
+    int Function;       /* The block function code */
+#define CONTINUE_EXT_FUNC_CODE    0x00    /* continuation subblock */
+#define COMMENT_EXT_FUNC_CODE     0xfe    /* comment */
+#define GRAPHICS_EXT_FUNC_CODE    0xf9    /* graphics control (GIF89) */
+#define PLAINTEXT_EXT_FUNC_CODE   0x01    /* plaintext */
+#define APPLICATION_EXT_FUNC_CODE 0xff    /* application block */
+} ExtensionBlock;
+
+typedef struct SavedImage {
+    GifImageDesc ImageDesc;
+    GifByteType *RasterBits;         /* on malloc(3) heap */
+    int ExtensionBlockCount;         /* Count of extensions before image */    
+    ExtensionBlock *ExtensionBlocks; /* Extensions before image */    
+} SavedImage;
+
+typedef struct GifFileType {
+    GifWord SWidth, SHeight;         /* Size of virtual canvas */
+    GifWord SColorResolution;        /* How many colors can we generate? */
+    GifWord SBackGroundColor;        /* Background color for virtual canvas */
+    GifByteType AspectByte;	     /* Used to compute pixel aspect ratio */
+    ColorMapObject *SColorMap;       /* Global colormap, NULL if nonexistent. */
+    int ImageCount;                  /* Number of current image (both APIs) */
+    GifImageDesc Image;              /* Current image (low-level API) */
+    SavedImage *SavedImages;         /* Image sequence (high-level API) */
+    int ExtensionBlockCount;         /* Count extensions past last image */
+    ExtensionBlock *ExtensionBlocks; /* Extensions past last image */    
+    int Error;			     /* Last error condition reported */
+    void *UserData;                  /* hook to attach user data (TVT) */
+    void *Private;                   /* Don't mess with this! */
+} GifFileType;
+
+#define GIF_ASPECT_RATIO(n)	((n)+15.0/64.0)
+
+typedef enum {
+    UNDEFINED_RECORD_TYPE,
+    SCREEN_DESC_RECORD_TYPE,
+    IMAGE_DESC_RECORD_TYPE, /* Begin with ',' */
+    EXTENSION_RECORD_TYPE,  /* Begin with '!' */
+    TERMINATE_RECORD_TYPE   /* Begin with ';' */
+} GifRecordType;
+
+/* func type to read gif data from arbitrary sources (TVT) */
+typedef int (*InputFunc) (GifFileType *, GifByteType *, int);
+
+/* func type to write gif data to arbitrary targets.
+ * Returns count of bytes written. (MRB)
+ */
+typedef int (*OutputFunc) (GifFileType *, const GifByteType *, int);
+
+/******************************************************************************
+ GIF89 structures
+******************************************************************************/
+
+typedef struct GraphicsControlBlock {
+    int DisposalMode;
+#define DISPOSAL_UNSPECIFIED      0       /* No disposal specified. */
+#define DISPOSE_DO_NOT            1       /* Leave image in place */
+#define DISPOSE_BACKGROUND        2       /* Set area too background color */
+#define DISPOSE_PREVIOUS          3       /* Restore to previous content */
+    bool UserInputFlag;      /* User confirmation required before disposal */
+    int DelayTime;           /* pre-display delay in 0.01sec units */
+    int TransparentColor;    /* Palette index for transparency, -1 if none */
+#define NO_TRANSPARENT_COLOR	-1
+} GraphicsControlBlock;
+
+/******************************************************************************
+ GIF encoding routines
+******************************************************************************/
+
+/* Main entry points */
+GifFileType *EGifOpenFileName(const char *GifFileName,
+                              const bool GifTestExistence, int *Error);
+GifFileType *EGifOpenFileHandle(const int GifFileHandle, int *Error);
+GifFileType *EGifOpen(void *userPtr, OutputFunc writeFunc, int *Error);
+int EGifSpew(GifFileType * GifFile);
+char *EGifGetGifVersion(GifFileType *GifFile); /* new in 5.x */
+int EGifCloseFile(GifFileType * GifFile);
+
+#define E_GIF_ERR_OPEN_FAILED    1    /* And EGif possible errors. */
+#define E_GIF_ERR_WRITE_FAILED   2
+#define E_GIF_ERR_HAS_SCRN_DSCR  3
+#define E_GIF_ERR_HAS_IMAG_DSCR  4
+#define E_GIF_ERR_NO_COLOR_MAP   5
+#define E_GIF_ERR_DATA_TOO_BIG   6
+#define E_GIF_ERR_NOT_ENOUGH_MEM 7
+#define E_GIF_ERR_DISK_IS_FULL   8
+#define E_GIF_ERR_CLOSE_FAILED   9
+#define E_GIF_ERR_NOT_WRITEABLE  10
+
+/* These are legacy.  You probably do not want to call them directly */
+int EGifPutScreenDesc(GifFileType *GifFile,
+                      const int GifWidth, const int GifHeight, 
+		      const int GifColorRes,
+                      const int GifBackGround,
+                      const ColorMapObject *GifColorMap);
+int EGifPutImageDesc(GifFileType *GifFile, 
+		     const int GifLeft, const int GifTop,
+                     const int GifWidth, const int GifHeight, 
+		     const bool GifInterlace,
+                     const ColorMapObject *GifColorMap);
+void EGifSetGifVersion(GifFileType *GifFile, const bool gif89);
+int EGifPutLine(GifFileType *GifFile, GifPixelType *GifLine,
+                int GifLineLen);
+int EGifPutPixel(GifFileType *GifFile, const GifPixelType GifPixel);
+int EGifPutComment(GifFileType *GifFile, const char *GifComment);
+int EGifPutExtensionLeader(GifFileType *GifFile, const int GifExtCode);
+int EGifPutExtensionBlock(GifFileType *GifFile,
+                         const int GifExtLen, const void *GifExtension);
+int EGifPutExtensionTrailer(GifFileType *GifFile);
+int EGifPutExtension(GifFileType *GifFile, const int GifExtCode, 
+		     const int GifExtLen,
+                     const void *GifExtension);
+int EGifPutCode(GifFileType *GifFile, int GifCodeSize,
+                const GifByteType *GifCodeBlock);
+int EGifPutCodeNext(GifFileType *GifFile,
+                    const GifByteType *GifCodeBlock);
+
+/******************************************************************************
+ GIF decoding routines
+******************************************************************************/
+
+/* Main entry points */
+GifFileType *DGifOpenFileName(const char *GifFileName, int *Error);
+GifFileType *DGifOpenFileHandle(int GifFileHandle, int *Error);
+int DGifSlurp(GifFileType * GifFile);
+GifFileType *DGifOpen(void *userPtr, InputFunc readFunc, int *Error);    /* new one (TVT) */
+int DGifCloseFile(GifFileType * GifFile);
+
+#define D_GIF_ERR_OPEN_FAILED    101    /* And DGif possible errors. */
+#define D_GIF_ERR_READ_FAILED    102
+#define D_GIF_ERR_NOT_GIF_FILE   103
+#define D_GIF_ERR_NO_SCRN_DSCR   104
+#define D_GIF_ERR_NO_IMAG_DSCR   105
+#define D_GIF_ERR_NO_COLOR_MAP   106
+#define D_GIF_ERR_WRONG_RECORD   107
+#define D_GIF_ERR_DATA_TOO_BIG   108
+#define D_GIF_ERR_NOT_ENOUGH_MEM 109
+#define D_GIF_ERR_CLOSE_FAILED   110
+#define D_GIF_ERR_NOT_READABLE   111
+#define D_GIF_ERR_IMAGE_DEFECT   112
+#define D_GIF_ERR_EOF_TOO_SOON   113
+
+/* These are legacy.  You probably do not want to call them directly */
+int DGifGetScreenDesc(GifFileType *GifFile);
+int DGifGetRecordType(GifFileType *GifFile, GifRecordType *GifType);
+int DGifGetImageDesc(GifFileType *GifFile, bool changeImageCount);
+int DGifGetLine(GifFileType *GifFile, GifPixelType *GifLine, int GifLineLen);
+int DGifGetPixel(GifFileType *GifFile, GifPixelType GifPixel);
+int DGifGetComment(GifFileType *GifFile, char *GifComment);
+int DGifGetExtension(GifFileType *GifFile, int *GifExtCode,
+                     GifByteType **GifExtension);
+int DGifGetExtensionNext(GifFileType *GifFile, GifByteType **GifExtension,int* ExtCode);
+int DGifGetCode(GifFileType *GifFile, int *GifCodeSize,
+                GifByteType **GifCodeBlock);
+int DGifGetCodeNext(GifFileType *GifFile, GifByteType **GifCodeBlock);
+int DGifGetLZCodes(GifFileType *GifFile, int *GifCode);
+
+
+/******************************************************************************
+ Color table quantization (deprecated)
+******************************************************************************/
+int GifQuantizeBuffer(unsigned int Width, unsigned int Height,
+                   int *ColorMapSize, GifByteType * RedInput,
+                   GifByteType * GreenInput, GifByteType * BlueInput,
+                   GifByteType * OutputBuffer,
+                   GifColorType * OutputColorMap);
+
+/******************************************************************************
+ Error handling and reporting.
+******************************************************************************/
+extern char *GifErrorString(int ErrorCode);     /* new in 2012 - ESR */
+
+/*****************************************************************************
+ Everything below this point is new after version 1.2, supporting `slurp
+ mode' for doing I/O in two big belts with all the image-bashing in core.
+******************************************************************************/
+
+/******************************************************************************
+ Color map handling from gif_alloc.c
+******************************************************************************/
+
+extern ColorMapObject *GifMakeMapObject(int ColorCount,
+                                     const GifColorType *ColorMap);
+extern void GifFreeMapObject(ColorMapObject *Object);
+extern ColorMapObject *GifUnionColorMap(const ColorMapObject *ColorIn1,
+                                     const ColorMapObject *ColorIn2,
+                                     GifPixelType ColorTransIn2[]);
+extern int GifBitSize(int n);
+
+/******************************************************************************
+ Support for the in-core structures allocation (slurp mode).              
+******************************************************************************/
+
+extern void GifApplyTranslation(SavedImage *Image, GifPixelType Translation[]);
+extern int GifAddExtensionBlock(int *ExtensionBlock_Count,
+				ExtensionBlock **ExtensionBlocks, 
+				int Function, 
+				unsigned int Len, unsigned char ExtData[]);
+extern void GifFreeExtensions(int *ExtensionBlock_Count,
+			      ExtensionBlock **ExtensionBlocks);
+extern SavedImage *GifMakeSavedImage(GifFileType *GifFile,
+                                  const SavedImage *CopyFrom);
+extern void GifFreeSavedImages(GifFileType *GifFile);
+
+/******************************************************************************
+ 5.x functions for GIF89 graphics control blocks
+******************************************************************************/
+
+int DGifExtensionToGCB(const size_t GifExtensionLength,
+		       const GifByteType *GifExtension,
+		       GraphicsControlBlock *GCB);
+size_t EGifGCBToExtension(const GraphicsControlBlock *GCB,
+		       GifByteType *GifExtension);
+
+int DGifSavedExtensionToGCB(GifFileType *GifFile, 
+			    int ImageIndex, 
+			    GraphicsControlBlock *GCB);
+int EGifGCBToSavedExtension(const GraphicsControlBlock *GCB, 
+			    GifFileType *GifFile, 
+			    int ImageIndex);
+
+/******************************************************************************
+ The library's internal utility font                          
+******************************************************************************/
+
+#define GIF_FONT_WIDTH  8
+#define GIF_FONT_HEIGHT 8
+extern const unsigned char GifAsciiTable8x8[][GIF_FONT_WIDTH];
+
+extern void GifDrawText8x8(SavedImage *Image,
+                     const int x, const int y,
+                     const char *legend, const int color);
+
+extern void GifDrawBox(SavedImage *Image,
+                    const int x, const int y,
+                    const int w, const int d, const int color);
+
+extern void GifDrawRectangle(SavedImage *Image,
+                   const int x, const int y,
+                   const int w, const int d, const int color);
+
+extern void GifDrawBoxedText8x8(SavedImage *Image,
+                          const int x, const int y,
+                          const char *legend,
+                          const int border, const int bg, const int fg);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* _GIF_LIB_H */
+
+/* end */
diff --git a/src/main/jni/giflib/gif_lib_private.h b/src/main/jni/giflib/gif_lib_private.h
new file mode 100644
index 000000000..adaf5571e
--- /dev/null
+++ b/src/main/jni/giflib/gif_lib_private.h
@@ -0,0 +1,59 @@
+/****************************************************************************
+
+gif_lib_private.h - internal giflib routines and structures
+
+****************************************************************************/
+
+#ifndef _GIF_LIB_PRIVATE_H
+#define _GIF_LIB_PRIVATE_H
+
+#include "gif_lib.h"
+#include "gif_hash.h"
+
+#define EXTENSION_INTRODUCER      0x21
+#define DESCRIPTOR_INTRODUCER     0x2c
+#define TERMINATOR_INTRODUCER     0x3b
+
+#define LZ_MAX_CODE         4095    /* Biggest code possible in 12 bits. */
+#define LZ_BITS             12
+
+#define FLUSH_OUTPUT        4096    /* Impossible code, to signal flush. */
+#define FIRST_CODE          4097    /* Impossible code, to signal first. */
+#define NO_SUCH_CODE        4098    /* Impossible code, to signal empty. */
+
+#define FILE_STATE_WRITE    0x01
+#define FILE_STATE_SCREEN   0x02
+#define FILE_STATE_IMAGE    0x04
+#define FILE_STATE_READ     0x08
+
+#define IS_READABLE(Private)    (Private->FileState & FILE_STATE_READ)
+#define IS_WRITEABLE(Private)   (Private->FileState & FILE_STATE_WRITE)
+
+typedef struct GifFilePrivateType {
+    GifWord FileState, FileHandle,  /* Where all this data goes to! */
+      BitsPerPixel,     /* Bits per pixel (Codes uses at least this + 1). */
+      ClearCode,   /* The CLEAR LZ code. */
+      EOFCode,     /* The EOF LZ code. */
+      RunningCode, /* The next code algorithm can generate. */
+      RunningBits, /* The number of bits required to represent RunningCode. */
+      MaxCode1,    /* 1 bigger than max. possible code, in RunningBits bits. */
+      LastCode,    /* The code before the current code. */
+      CrntCode,    /* Current algorithm code. */
+      StackPtr,    /* For character stack (see below). */
+      CrntShiftState;    /* Number of bits in CrntShiftDWord. */
+    unsigned long CrntShiftDWord;   /* For bytes decomposition into codes. */
+    unsigned long PixelCount;   /* Number of pixels in image. */
+    FILE *File;    /* File as stream. */
+    InputFunc Read;     /* function to read gif input (TVT) */
+    OutputFunc Write;   /* function to write gif output (MRB) */
+    GifByteType Buf[256];   /* Compressed input is buffered here. */
+    GifByteType Stack[LZ_MAX_CODE]; /* Decoded pixels are stacked here. */
+    GifByteType Suffix[LZ_MAX_CODE + 1];    /* So we can trace the codes. */
+    GifPrefixType Prefix[LZ_MAX_CODE + 1];
+    GifHashTableType *HashTable;
+    bool gif89;
+} GifFilePrivateType;
+
+#endif /* _GIF_LIB_PRIVATE_H */
+
+/* end */
diff --git a/src/main/jni/giflib/gifalloc.c b/src/main/jni/giflib/gifalloc.c
new file mode 100644
index 000000000..5726e7681
--- /dev/null
+++ b/src/main/jni/giflib/gifalloc.c
@@ -0,0 +1,400 @@
+/*****************************************************************************
+
+ GIF construction tools
+
+****************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gif_lib.h"
+
+#define MAX(x, y)    (((x) > (y)) ? (x) : (y))
+
+/******************************************************************************
+ Miscellaneous utility functions                          
+******************************************************************************/
+
+/* return smallest bitfield size n will fit in */
+int
+GifBitSize(int n)
+{
+    register int i;
+
+    for (i = 1; i <= 8; i++)
+        if ((1 << i) >= n)
+            break;
+    return (i);
+}
+
+/******************************************************************************
+  Color map object functions                              
+******************************************************************************/
+
+/*
+ * Allocate a color map of given size; initialize with contents of
+ * ColorMap if that pointer is non-NULL.
+ */
+ColorMapObject *
+GifMakeMapObject(int ColorCount, const GifColorType *ColorMap)
+{
+    ColorMapObject *Object;
+
+    /*** FIXME: Our ColorCount has to be a power of two.  Is it necessary to
+     * make the user know that or should we automatically round up instead? */
+    if (ColorCount != (1 << GifBitSize(ColorCount))) {
+        return ((ColorMapObject *) NULL);
+    }
+    
+    Object = (ColorMapObject *)malloc(sizeof(ColorMapObject));
+    if (Object == (ColorMapObject *) NULL) {
+        return ((ColorMapObject *) NULL);
+    }
+
+    Object->Colors = (GifColorType *)calloc(ColorCount, sizeof(GifColorType));
+    if (Object->Colors == (GifColorType *) NULL) {
+	free(Object);
+        return ((ColorMapObject *) NULL);
+    }
+
+    Object->ColorCount = ColorCount;
+    Object->BitsPerPixel = GifBitSize(ColorCount);
+
+    if (ColorMap != NULL) {
+        memcpy((char *)Object->Colors,
+               (char *)ColorMap, ColorCount * sizeof(GifColorType));
+    }
+
+    return (Object);
+}
+
+/*******************************************************************************
+Free a color map object
+*******************************************************************************/
+void
+GifFreeMapObject(ColorMapObject *Object)
+{
+    if (Object != NULL) {
+        (void)free(Object->Colors);
+        (void)free(Object);
+    }
+}
+
+#ifdef DEBUG
+void
+DumpColorMap(ColorMapObject *Object,
+             FILE * fp)
+{
+    if (Object != NULL) {
+        int i, j, Len = Object->ColorCount;
+
+        for (i = 0; i < Len; i += 4) {
+            for (j = 0; j < 4 && j < Len; j++) {
+                (void)fprintf(fp, "%3d: %02x %02x %02x   ", i + j,
+			      Object->Colors[i + j].Red,
+			      Object->Colors[i + j].Green,
+			      Object->Colors[i + j].Blue);
+            }
+            (void)fprintf(fp, "\n");
+        }
+    }
+}
+#endif /* DEBUG */
+
+/*******************************************************************************
+ Compute the union of two given color maps and return it.  If result can't 
+ fit into 256 colors, NULL is returned, the allocated union otherwise.
+ ColorIn1 is copied as is to ColorUnion, while colors from ColorIn2 are
+ copied iff they didn't exist before.  ColorTransIn2 maps the old
+ ColorIn2 into the ColorUnion color map table./
+*******************************************************************************/
+ColorMapObject *
+GifUnionColorMap(const ColorMapObject *ColorIn1,
+              const ColorMapObject *ColorIn2,
+              GifPixelType ColorTransIn2[])
+{
+    int i, j, CrntSlot, RoundUpTo, NewGifBitSize;
+    ColorMapObject *ColorUnion;
+
+    /*
+     * We don't worry about duplicates within either color map; if
+     * the caller wants to resolve those, he can perform unions
+     * with an empty color map.
+     */
+
+    /* Allocate table which will hold the result for sure. */
+    ColorUnion = GifMakeMapObject(MAX(ColorIn1->ColorCount,
+                               ColorIn2->ColorCount) * 2, NULL);
+
+    if (ColorUnion == NULL)
+        return (NULL);
+
+    /* 
+     * Copy ColorIn1 to ColorUnion.
+     */
+    for (i = 0; i < ColorIn1->ColorCount; i++)
+        ColorUnion->Colors[i] = ColorIn1->Colors[i];
+    CrntSlot = ColorIn1->ColorCount;
+
+    /* 
+     * Potentially obnoxious hack:
+     *
+     * Back CrntSlot down past all contiguous {0, 0, 0} slots at the end
+     * of table 1.  This is very useful if your display is limited to
+     * 16 colors.
+     */
+    while (ColorIn1->Colors[CrntSlot - 1].Red == 0
+           && ColorIn1->Colors[CrntSlot - 1].Green == 0
+           && ColorIn1->Colors[CrntSlot - 1].Blue == 0)
+        CrntSlot--;
+
+    /* Copy ColorIn2 to ColorUnion (use old colors if they exist): */
+    for (i = 0; i < ColorIn2->ColorCount && CrntSlot <= 256; i++) {
+        /* Let's see if this color already exists: */
+        for (j = 0; j < ColorIn1->ColorCount; j++)
+            if (memcmp (&ColorIn1->Colors[j], &ColorIn2->Colors[i], 
+                        sizeof(GifColorType)) == 0)
+                break;
+
+        if (j < ColorIn1->ColorCount)
+            ColorTransIn2[i] = j;    /* color exists in Color1 */
+        else {
+            /* Color is new - copy it to a new slot: */
+            ColorUnion->Colors[CrntSlot] = ColorIn2->Colors[i];
+            ColorTransIn2[i] = CrntSlot++;
+        }
+    }
+
+    if (CrntSlot > 256) {
+        GifFreeMapObject(ColorUnion);
+        return ((ColorMapObject *) NULL);
+    }
+
+    NewGifBitSize = GifBitSize(CrntSlot);
+    RoundUpTo = (1 << NewGifBitSize);
+
+    if (RoundUpTo != ColorUnion->ColorCount) {
+        register GifColorType *Map = ColorUnion->Colors;
+
+        /* 
+         * Zero out slots up to next power of 2.
+         * We know these slots exist because of the way ColorUnion's
+         * start dimension was computed.
+         */
+        for (j = CrntSlot; j < RoundUpTo; j++)
+            Map[j].Red = Map[j].Green = Map[j].Blue = 0;
+
+        /* perhaps we can shrink the map? */
+        if (RoundUpTo < ColorUnion->ColorCount)
+            ColorUnion->Colors = (GifColorType *)realloc(Map,
+                                 sizeof(GifColorType) * RoundUpTo);
+    }
+
+    ColorUnion->ColorCount = RoundUpTo;
+    ColorUnion->BitsPerPixel = NewGifBitSize;
+
+    return (ColorUnion);
+}
+
+/*******************************************************************************
+ Apply a given color translation to the raster bits of an image
+*******************************************************************************/
+void
+GifApplyTranslation(SavedImage *Image, GifPixelType Translation[])
+{
+    register int i;
+    register int RasterSize = Image->ImageDesc.Height * Image->ImageDesc.Width;
+
+    for (i = 0; i < RasterSize; i++)
+        Image->RasterBits[i] = Translation[Image->RasterBits[i]];
+}
+
+/******************************************************************************
+ Extension record functions                              
+******************************************************************************/
+int
+GifAddExtensionBlock(int *ExtensionBlockCount,
+		     ExtensionBlock **ExtensionBlocks,
+		     int Function,
+		     unsigned int Len,
+		     unsigned char ExtData[])
+{
+    ExtensionBlock *ep;
+
+    if (*ExtensionBlocks == NULL)
+        *ExtensionBlocks=(ExtensionBlock *)malloc(sizeof(ExtensionBlock));
+    else
+        *ExtensionBlocks = (ExtensionBlock *)realloc(*ExtensionBlocks,
+                                      sizeof(ExtensionBlock) *
+                                      (*ExtensionBlockCount + 1));
+
+    if (*ExtensionBlocks == NULL)
+        return (GIF_ERROR);
+
+    ep = &(*ExtensionBlocks)[(*ExtensionBlockCount)++];
+
+    ep->Function = Function;
+    ep->ByteCount=Len;
+    ep->Bytes = (GifByteType *)malloc(ep->ByteCount);
+    if (ep->Bytes == NULL)
+        return (GIF_ERROR);
+
+    if (ExtData != NULL) {
+        memcpy(ep->Bytes, ExtData, Len);
+    }
+
+    return (GIF_OK);
+}
+
+void
+GifFreeExtensions(int *ExtensionBlockCount,
+		  ExtensionBlock **ExtensionBlocks)
+{
+    ExtensionBlock *ep;
+
+    if (*ExtensionBlocks == NULL)
+        return;
+
+    for (ep = *ExtensionBlocks;
+	 ep < (*ExtensionBlocks + *ExtensionBlockCount); 
+	 ep++)
+        (void)free((char *)ep->Bytes);
+    (void)free((char *)*ExtensionBlocks);
+    *ExtensionBlocks = NULL;
+    *ExtensionBlockCount = 0;
+}
+
+/******************************************************************************
+ Image block allocation functions                          
+******************************************************************************/
+
+/* Private Function:
+ * Frees the last image in the GifFile->SavedImages array
+ */
+void
+FreeLastSavedImage(GifFileType *GifFile)
+{
+    SavedImage *sp;
+    
+    if ((GifFile == NULL) || (GifFile->SavedImages == NULL))
+        return;
+
+    /* Remove one SavedImage from the GifFile */
+    GifFile->ImageCount--;
+    sp = &GifFile->SavedImages[GifFile->ImageCount];
+
+    /* Deallocate its Colormap */
+    if (sp->ImageDesc.ColorMap != NULL) {
+        GifFreeMapObject(sp->ImageDesc.ColorMap);
+        sp->ImageDesc.ColorMap = NULL;
+    }
+
+    /* Deallocate the image data */
+    if (sp->RasterBits != NULL)
+        free((char *)sp->RasterBits);
+
+    /* Deallocate any extensions */
+    GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks);
+
+    /*** FIXME: We could realloc the GifFile->SavedImages structure but is
+     * there a point to it? Saves some memory but we'd have to do it every
+     * time.  If this is used in GifFreeSavedImages then it would be inefficient
+     * (The whole array is going to be deallocated.)  If we just use it when
+     * we want to free the last Image it's convenient to do it here.
+     */
+}
+
+/*
+ * Append an image block to the SavedImages array  
+ */
+SavedImage *
+GifMakeSavedImage(GifFileType *GifFile, const SavedImage *CopyFrom)
+{
+    if (GifFile->SavedImages == NULL)
+        GifFile->SavedImages = (SavedImage *)malloc(sizeof(SavedImage));
+    else
+        GifFile->SavedImages = (SavedImage *)realloc(GifFile->SavedImages,
+                               sizeof(SavedImage) * (GifFile->ImageCount + 1));
+
+    if (GifFile->SavedImages == NULL)
+        return ((SavedImage *)NULL);
+    else {
+        SavedImage *sp = &GifFile->SavedImages[GifFile->ImageCount++];
+        memset((char *)sp, '\0', sizeof(SavedImage));
+
+        if (CopyFrom != NULL) {
+            memcpy((char *)sp, CopyFrom, sizeof(SavedImage));
+
+            /* 
+             * Make our own allocated copies of the heap fields in the
+             * copied record.  This guards against potential aliasing
+             * problems.
+             */
+
+            /* first, the local color map */
+            if (sp->ImageDesc.ColorMap != NULL) {
+                sp->ImageDesc.ColorMap = GifMakeMapObject(
+                                         CopyFrom->ImageDesc.ColorMap->ColorCount,
+                                         CopyFrom->ImageDesc.ColorMap->Colors);
+                if (sp->ImageDesc.ColorMap == NULL) {
+                    FreeLastSavedImage(GifFile);
+                    return (SavedImage *)(NULL);
+                }
+            }
+
+            /* next, the raster */
+            sp->RasterBits = (unsigned char *)malloc(sizeof(GifPixelType) *
+                                                   CopyFrom->ImageDesc.Height *
+                                                   CopyFrom->ImageDesc.Width);
+            if (sp->RasterBits == NULL) {
+                FreeLastSavedImage(GifFile);
+                return (SavedImage *)(NULL);
+            }
+            memcpy(sp->RasterBits, CopyFrom->RasterBits,
+                   sizeof(GifPixelType) * CopyFrom->ImageDesc.Height *
+                   CopyFrom->ImageDesc.Width);
+
+            /* finally, the extension blocks */
+            if (sp->ExtensionBlocks != NULL) {
+                sp->ExtensionBlocks = (ExtensionBlock *)malloc(
+                                      sizeof(ExtensionBlock) *
+                                      CopyFrom->ExtensionBlockCount);
+                if (sp->ExtensionBlocks == NULL) {
+                    FreeLastSavedImage(GifFile);
+                    return (SavedImage *)(NULL);
+                }
+                memcpy(sp->ExtensionBlocks, CopyFrom->ExtensionBlocks,
+                       sizeof(ExtensionBlock) * CopyFrom->ExtensionBlockCount);
+            }
+        }
+
+        return (sp);
+    }
+}
+
+void
+GifFreeSavedImages(GifFileType *GifFile)
+{
+    SavedImage *sp;
+
+    if ((GifFile == NULL) || (GifFile->SavedImages == NULL)) {
+        return;
+    }
+    for (sp = GifFile->SavedImages;
+         sp < GifFile->SavedImages + GifFile->ImageCount; sp++) {
+        if (sp->ImageDesc.ColorMap != NULL) {
+            GifFreeMapObject(sp->ImageDesc.ColorMap);
+            sp->ImageDesc.ColorMap = NULL;
+        }
+
+        if (sp->RasterBits != NULL)
+            free((char *)sp->RasterBits);
+	
+	GifFreeExtensions(&sp->ExtensionBlockCount, &sp->ExtensionBlocks);
+    }
+    free((char *)GifFile->SavedImages);
+    GifFile->SavedImages = NULL;
+}
+
+/* end */
diff --git a/src/main/jni/image.c b/src/main/jni/image.c
new file mode 100644
index 000000000..85a859007
--- /dev/null
+++ b/src/main/jni/image.c
@@ -0,0 +1,569 @@
+#include <jni.h>
+#include <stdio.h>
+#include <setjmp.h>
+#include <libjpeg/jpeglib.h>
+#include <android/bitmap.h>
+#include <libwebp/webp/decode.h>
+#include <libwebp/webp/encode.h>
+#include "utils.h"
+#include "image.h"
+
+jclass jclass_NullPointerException;
+jclass jclass_RuntimeException;
+
+jclass jclass_Options;
+jfieldID jclass_Options_inJustDecodeBounds;
+jfieldID jclass_Options_outHeight;
+jfieldID jclass_Options_outWidth;
+
+jclass jclass_Bitmap;
+jmethodID jclass_Bitmap_createBitmap;
+jclass jclass_Config;
+jfieldID jclass_Config_ARGB_8888;
+
+const uint32_t PGPhotoEnhanceHistogramBins = 256;
+const uint32_t PGPhotoEnhanceSegments = 4;
+
+jclass createGlobarRef(JNIEnv *env, jclass class) {
+    if (class) {
+        return (*env)->NewGlobalRef(env, class);
+    }
+    return 0;
+}
+
+jint imageOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env) {
+    jclass_NullPointerException = createGlobarRef(env, (*env)->FindClass(env, "java/lang/NullPointerException"));
+    if (jclass_NullPointerException == 0) {
+        return -1;
+    }
+    jclass_RuntimeException = createGlobarRef(env, (*env)->FindClass(env, "java/lang/RuntimeException"));
+    if (jclass_RuntimeException == 0) {
+        return -1;
+    }
+    
+    jclass_Options = createGlobarRef(env, (*env)->FindClass(env, "android/graphics/BitmapFactory$Options"));
+    if (jclass_Options == 0) {
+        return -1;
+    }
+    jclass_Options_inJustDecodeBounds = (*env)->GetFieldID(env, jclass_Options, "inJustDecodeBounds", "Z");
+    if (jclass_Options_inJustDecodeBounds == 0) {
+        return -1;
+    }
+    jclass_Options_outHeight = (*env)->GetFieldID(env, jclass_Options, "outHeight", "I");
+    if (jclass_Options_outHeight == 0) {
+        return -1;
+    }
+    jclass_Options_outWidth = (*env)->GetFieldID(env, jclass_Options, "outWidth", "I");
+    if (jclass_Options_outWidth == 0) {
+        return -1;
+    }
+    
+    jclass_Bitmap = createGlobarRef(env, (*env)->FindClass(env, "android/graphics/Bitmap"));
+    if (jclass_Bitmap == 0) {
+        return -1;
+    }
+    jclass_Bitmap_createBitmap = (*env)->GetStaticMethodID(env, jclass_Bitmap, "createBitmap", "(IILandroid/graphics/Bitmap$Config;)Landroid/graphics/Bitmap;");
+    if (jclass_Bitmap_createBitmap == 0) {
+        return -1;
+    }
+    
+    jclass_Config = createGlobarRef(env, (*env)->FindClass(env, "android/graphics/Bitmap$Config"));
+    if (jclass_Config == 0) {
+        return -1;
+    }
+    jclass_Config_ARGB_8888 = (*env)->GetStaticFieldID(env, jclass_Config, "ARGB_8888", "Landroid/graphics/Bitmap$Config;");
+    if (jclass_Config_ARGB_8888 == 0) {
+        return -1;
+    }
+    
+    return JNI_VERSION_1_6;
+}
+
+static inline uint64_t get_colors (const uint8_t *p) {
+    return p[0] + (p[1] << 16) + ((uint64_t)p[2] << 32);
+}
+
+static void fastBlurMore(int imageWidth, int imageHeight, int imageStride, void *pixels, int radius) {
+    uint8_t *pix = (uint8_t *)pixels;
+    const int w = imageWidth;
+    const int h = imageHeight;
+    const int stride = imageStride;
+    const int r1 = radius + 1;
+    const int div = radius * 2 + 1;
+    
+    if (radius > 15 || div >= w || div >= h || w * h > 128 * 128 || imageStride > imageWidth * 4) {
+        return;
+    }
+    
+    uint64_t *rgb = malloc(imageWidth * imageHeight * sizeof(uint64_t));
+    if (rgb == NULL) {
+        return;
+    }
+    
+    int x, y, i;
+    
+    int yw = 0;
+    const int we = w - r1;
+    for (y = 0; y < h; y++) {
+        uint64_t cur = get_colors (&pix[yw]);
+        uint64_t rgballsum = -radius * cur;
+        uint64_t rgbsum = cur * ((r1 * (r1 + 1)) >> 1);
+        
+        for (i = 1; i <= radius; i++) {
+            uint64_t cur = get_colors (&pix[yw + i * 4]);
+            rgbsum += cur * (r1 - i);
+            rgballsum += cur;
+        }
+        
+        x = 0;
+        
+    #define update(start, middle, end) \
+            rgb[y * w + x] = (rgbsum >> 6) & 0x00FF00FF00FF00FF; \
+            rgballsum += get_colors (&pix[yw + (start) * 4]) - 2 * get_colors (&pix[yw + (middle) * 4]) + get_colors (&pix[yw + (end) * 4]); \
+            rgbsum += rgballsum; \
+            x++; \
+
+        while (x < r1) {
+            update (0, x, x + r1);
+        }
+        while (x < we) {
+            update (x - r1, x, x + r1);
+        }
+        while (x < w) {
+            update (x - r1, x, w - 1);
+        }
+    #undef update
+        
+        yw += stride;
+    }
+    
+    const int he = h - r1;
+    for (x = 0; x < w; x++) {
+        uint64_t rgballsum = -radius * rgb[x];
+        uint64_t rgbsum = rgb[x] * ((r1 * (r1 + 1)) >> 1);
+        for (i = 1; i <= radius; i++) {
+            rgbsum += rgb[i * w + x] * (r1 - i);
+            rgballsum += rgb[i * w + x];
+        }
+        
+        y = 0;
+        int yi = x * 4;
+        
+    #define update(start, middle, end) \
+            int64_t res = rgbsum >> 6; \
+            pix[yi] = res; \
+            pix[yi + 1] = res >> 16; \
+            pix[yi + 2] = res >> 32; \
+            rgballsum += rgb[x + (start) * w] - 2 * rgb[x + (middle) * w] + rgb[x + (end) * w]; \
+            rgbsum += rgballsum; \
+            y++; \
+            yi += stride;
+        
+        while (y < r1) {
+            update (0, y, y + r1);
+        }
+        while (y < he) {
+            update (y - r1, y, y + r1);
+        }
+        while (y < h) {
+            update (y - r1, y, h - 1);
+        }
+    #undef update
+    }
+}
+
+static void fastBlur(int imageWidth, int imageHeight, int imageStride, void *pixels, int radius) {
+    uint8_t *pix = (uint8_t *)pixels;
+    const int w = imageWidth;
+    const int h = imageHeight;
+    const int stride = imageStride;
+    const int r1 = radius + 1;
+    const int div = radius * 2 + 1;
+    int shift;
+    if (radius == 1) {
+        shift = 2;
+    } else if (radius == 3) {
+        shift = 4;
+    } else if (radius == 7) {
+        shift = 6;
+    } else if (radius == 15) {
+        shift = 8;
+    } else {
+        return;
+    }
+    
+    if (radius > 15 || div >= w || div >= h || w * h > 128 * 128 || imageStride > imageWidth * 4) {
+        return;
+    }
+    
+    uint64_t *rgb = malloc(imageWidth * imageHeight * sizeof(uint64_t));
+    if (rgb == NULL) {
+        return;
+    }
+    
+    int x, y, i;
+    
+    int yw = 0;
+    const int we = w - r1;
+    for (y = 0; y < h; y++) {
+        uint64_t cur = get_colors (&pix[yw]);
+        uint64_t rgballsum = -radius * cur;
+        uint64_t rgbsum = cur * ((r1 * (r1 + 1)) >> 1);
+        
+        for (i = 1; i <= radius; i++) {
+            uint64_t cur = get_colors (&pix[yw + i * 4]);
+            rgbsum += cur * (r1 - i);
+            rgballsum += cur;
+        }
+        
+        x = 0;
+        
+        #define update(start, middle, end)  \
+                rgb[y * w + x] = (rgbsum >> shift) & 0x00FF00FF00FF00FFLL; \
+                rgballsum += get_colors (&pix[yw + (start) * 4]) - 2 * get_colors (&pix[yw + (middle) * 4]) + get_colors (&pix[yw + (end) * 4]); \
+                rgbsum += rgballsum;        \
+                x++;                        \
+
+        while (x < r1) {
+            update (0, x, x + r1);
+        }
+        while (x < we) {
+            update (x - r1, x, x + r1);
+        }
+        while (x < w) {
+            update (x - r1, x, w - 1);
+        }
+        
+        #undef update
+        
+        yw += stride;
+    }
+    
+    const int he = h - r1;
+    for (x = 0; x < w; x++) {
+        uint64_t rgballsum = -radius * rgb[x];
+        uint64_t rgbsum = rgb[x] * ((r1 * (r1 + 1)) >> 1);
+        for (i = 1; i <= radius; i++) {
+            rgbsum += rgb[i * w + x] * (r1 - i);
+            rgballsum += rgb[i * w + x];
+        }
+        
+        y = 0;
+        int yi = x * 4;
+        
+        #define update(start, middle, end)  \
+                int64_t res = rgbsum >> shift;   \
+                pix[yi] = res;              \
+                pix[yi + 1] = res >> 16;    \
+                pix[yi + 2] = res >> 32;    \
+                rgballsum += rgb[x + (start) * w] - 2 * rgb[x + (middle) * w] + rgb[x + (end) * w]; \
+                rgbsum += rgballsum;        \
+                y++;                        \
+                yi += stride;
+        
+        while (y < r1) {
+            update (0, y, y + r1);
+        }
+        while (y < he) {
+            update (y - r1, y, y + r1);
+        }
+        while (y < h) {
+            update (y - r1, y, h - 1);
+        }
+        #undef update
+    }
+    
+    free(rgb);
+}
+
+typedef struct my_error_mgr {
+    struct jpeg_error_mgr pub;
+    jmp_buf setjmp_buffer;
+} *my_error_ptr;
+
+
+METHODDEF(void) my_error_exit(j_common_ptr cinfo) {
+    my_error_ptr myerr = (my_error_ptr) cinfo->err;
+    (*cinfo->err->output_message) (cinfo);
+    longjmp(myerr->setjmp_buffer, 1);
+}
+
+JNIEXPORT void Java_org_telegram_messenger_Utilities_blurBitmap(JNIEnv *env, jclass class, jobject bitmap, int radius) {
+    if (!bitmap) {
+        return;
+    }
+    
+    AndroidBitmapInfo info;
+    
+    if (AndroidBitmap_getInfo(env, bitmap, &info) < 0) {
+        return;
+    }
+    
+    if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888 || !info.width || !info.height || !info.stride) {
+        return;
+    }
+    
+    void *pixels = 0;
+    if (AndroidBitmap_lockPixels(env, bitmap, &pixels) < 0) {
+        return;
+    }
+    if (radius <= 3) {
+        fastBlur(info.width, info.height, info.stride, pixels, radius);
+    } else {
+        fastBlurMore(info.width, info.height, info.stride, pixels, radius);
+    }
+    AndroidBitmap_unlockPixels(env, bitmap);
+}
+
+JNIEXPORT void Java_org_telegram_messenger_Utilities_calcCDT(JNIEnv *env, jclass class, jobject hsvBuffer, int width, int height, jobject buffer) {
+    float imageWidth = width;
+    float imageHeight = height;
+    float _clipLimit = 1.25f;
+
+    uint32_t totalSegments = PGPhotoEnhanceSegments * PGPhotoEnhanceSegments;
+    uint32_t tileArea = (uint32_t)(floorf(imageWidth / PGPhotoEnhanceSegments) * floorf(imageHeight / PGPhotoEnhanceSegments));
+    uint32_t clipLimit = (uint32_t)max(1, _clipLimit * tileArea / (float) PGPhotoEnhanceHistogramBins);
+    float scale = 255.0f / (float) tileArea;
+
+
+    unsigned char *bytes = (*env)->GetDirectBufferAddress(env, hsvBuffer);
+
+    uint32_t **hist = calloc(totalSegments, sizeof(uint32_t *));
+    uint32_t **cdfs = calloc(totalSegments, sizeof(uint32_t *));
+    uint32_t *cdfsMin = calloc(totalSegments, sizeof(uint32_t));
+    uint32_t *cdfsMax = calloc(totalSegments, sizeof(uint32_t));
+    
+    for (uint32_t a = 0; a < totalSegments; a++) {
+        hist[a] = calloc(PGPhotoEnhanceHistogramBins, sizeof(uint32_t));
+        cdfs[a] = calloc(PGPhotoEnhanceHistogramBins, sizeof(uint32_t));
+    }
+    
+    float xMul = PGPhotoEnhanceSegments / imageWidth;
+    float yMul = PGPhotoEnhanceSegments / imageHeight;
+    
+    for (uint32_t y = 0; y < imageHeight; y++) {
+        uint32_t yOffset = y * width * 4;
+        for (uint32_t x = 0; x < imageWidth; x++) {
+            uint32_t index = x * 4 + yOffset;
+            
+            uint32_t tx = (uint32_t)(x * xMul);
+            uint32_t ty = (uint32_t)(y * yMul);
+            uint32_t t = ty * PGPhotoEnhanceSegments + tx;
+            
+            hist[t][bytes[index + 2]]++;
+        }
+    }
+    
+    for (uint32_t i = 0; i < totalSegments; i++) {
+        if (clipLimit > 0) {
+            uint32_t clipped = 0;
+            for (uint32_t j = 0; j < PGPhotoEnhanceHistogramBins; ++j) {
+                if (hist[i][j] > clipLimit) {
+                    clipped += hist[i][j] - clipLimit;
+                    hist[i][j] = clipLimit;
+                }
+            }
+            
+            uint32_t redistBatch = clipped / PGPhotoEnhanceHistogramBins;
+            uint32_t residual = clipped - redistBatch * PGPhotoEnhanceHistogramBins;
+            
+            for (uint32_t j = 0; j < PGPhotoEnhanceHistogramBins; ++j) {
+                hist[i][j] += redistBatch;
+            }
+            
+            for (uint32_t j = 0; j < residual; ++j) {
+                hist[i][j]++;
+            }
+        }
+        memcpy(cdfs[i], hist[i], PGPhotoEnhanceHistogramBins * sizeof(uint32_t));
+        
+        uint32_t hMin = PGPhotoEnhanceHistogramBins - 1;
+        for (uint32_t j = 0; j < hMin; ++j) {
+            if (cdfs[j] != 0) {
+                hMin = j;
+            }
+        }
+        
+        uint32_t cdf = 0;
+        for (uint32_t j = hMin; j < PGPhotoEnhanceHistogramBins; ++j) {
+            cdf += cdfs[i][j];
+            cdfs[i][j] = (uint8_t) min(255, cdf * scale);
+        }
+        
+        cdfsMin[i] = cdfs[i][hMin];
+        cdfsMax[i] = cdfs[i][PGPhotoEnhanceHistogramBins - 1];
+    }
+    
+    uint32_t resultSize = 4 * PGPhotoEnhanceHistogramBins * totalSegments;
+    uint32_t resultBytesPerRow = 4 * PGPhotoEnhanceHistogramBins;
+    
+    unsigned char *result = (*env)->GetDirectBufferAddress(env, buffer);
+    for (uint32_t tile = 0; tile < totalSegments; tile++) {
+        uint32_t yOffset = tile * resultBytesPerRow;
+        for (uint32_t i = 0; i < PGPhotoEnhanceHistogramBins; i++) {
+            uint32_t index = i * 4 + yOffset;
+            result[index] = (uint8_t)cdfs[tile][i];
+            result[index + 1] = (uint8_t)cdfsMin[tile];
+            result[index + 2] = (uint8_t)cdfsMax[tile];
+            result[index + 3] = 255;
+        }
+    }
+
+    for (uint32_t a = 0; a < totalSegments; a++) {
+        free(hist[a]);
+        free(cdfs[a]);
+    }
+    free(hist);
+    free(cdfs);
+    free(cdfsMax);
+    free(cdfsMin);
+}
+
+JNIEXPORT int Java_org_telegram_messenger_Utilities_pinBitmap(JNIEnv *env, jclass class, jobject bitmap) {
+    unsigned char *pixels;
+    return AndroidBitmap_lockPixels(env, bitmap, &pixels) >= 0 ? 1 : 0;
+}
+
+JNIEXPORT void Java_org_telegram_messenger_Utilities_loadBitmap(JNIEnv *env, jclass class, jstring path, jobject bitmap, int scale, int width, int height, int stride) {
+    
+    AndroidBitmapInfo info;
+    int i;
+    
+    if ((i = AndroidBitmap_getInfo(env, bitmap, &info)) >= 0) {
+        char *fileName = (*env)->GetStringUTFChars(env, path, NULL);
+        FILE *infile;
+        
+        if ((infile = fopen(fileName, "rb"))) {
+            struct my_error_mgr jerr;
+            struct jpeg_decompress_struct cinfo;
+            
+            cinfo.err = jpeg_std_error(&jerr.pub);
+            jerr.pub.error_exit = my_error_exit;
+            
+            if (!setjmp(jerr.setjmp_buffer)) {
+                jpeg_create_decompress(&cinfo);
+                jpeg_stdio_src(&cinfo, infile);
+                
+                jpeg_read_header(&cinfo, TRUE);
+                
+                cinfo.scale_denom = scale;
+                cinfo.scale_num = 1;
+                
+                jpeg_start_decompress(&cinfo);
+                int row_stride = cinfo.output_width * cinfo.output_components;
+                JSAMPARRAY buffer = (*cinfo.mem->alloc_sarray) ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
+
+                unsigned char *pixels;
+                if ((i = AndroidBitmap_lockPixels(env, bitmap, &pixels)) >= 0) {
+                    int rowCount = min(cinfo.output_height, height);
+                    int colCount = min(cinfo.output_width, width);
+                    
+                    while (cinfo.output_scanline < rowCount) {
+                        jpeg_read_scanlines(&cinfo, buffer, 1);
+                        
+                        //if (info.format == ANDROID_BITMAP_FORMAT_RGBA_8888) {
+                            if (cinfo.out_color_space == JCS_GRAYSCALE) {
+                                for (i = 0; i < colCount; i++) {
+                                    float alpha = buffer[0][i] / 255.0f;
+                                    pixels[i * 4] *= alpha;
+                                    pixels[i * 4 + 1] *= alpha;
+                                    pixels[i * 4 + 2] *= alpha;
+                                    pixels[i * 4 + 3] = buffer[0][i];
+                                }
+                            } else {
+                                int c = 0;
+                                for (i = 0; i < colCount; i++) {
+                                    pixels[i * 4] = buffer[0][i * 3];
+                                    pixels[i * 4 + 1] = buffer[0][i * 3 + 1];
+                                    pixels[i * 4 + 2] = buffer[0][i * 3 + 2];
+                                    pixels[i * 4 + 3] = 255;
+                                    c += 4;
+                                }
+                            }
+                        //} else if (info.format == ANDROID_BITMAP_FORMAT_RGB_565) {
+                            
+                        //}
+                        
+                        pixels += stride;
+                    }
+                    
+                    AndroidBitmap_unlockPixels(env, bitmap);
+                } else {
+                    throwException(env, "AndroidBitmap_lockPixels() failed ! error=%d", i);
+                }
+                
+                jpeg_finish_decompress(&cinfo);
+            } else {
+                throwException(env, "the JPEG code has signaled an error");
+            }
+            
+            jpeg_destroy_decompress(&cinfo);
+            fclose(infile);
+        } else {
+            throwException(env, "can't open %s", fileName);
+        }
+        
+        (*env)->ReleaseStringUTFChars(env, path, fileName);
+    } else {
+        throwException(env, "AndroidBitmap_getInfo() failed ! error=%d", i);
+    }
+}
+
+JNIEXPORT jobject Java_org_telegram_messenger_Utilities_loadWebpImage(JNIEnv *env, jclass class, jobject buffer, int len, jobject options) {
+    if (!buffer) {
+        (*env)->ThrowNew(env, jclass_NullPointerException, "Input buffer can not be null");
+        return 0;
+    }
+    
+    jbyte *inputBuffer = (*env)->GetDirectBufferAddress(env, buffer);
+    
+    int bitmapWidth = 0;
+    int bitmapHeight = 0;
+    if (!WebPGetInfo((uint8_t*)inputBuffer, len, &bitmapWidth, &bitmapHeight)) {
+        (*env)->ThrowNew(env, jclass_RuntimeException, "Invalid WebP format");
+        return 0;
+    }
+    
+    if (options && (*env)->GetBooleanField(env, options, jclass_Options_inJustDecodeBounds) == JNI_TRUE) {
+        (*env)->SetIntField(env, options, jclass_Options_outWidth, bitmapWidth);
+        (*env)->SetIntField(env, options, jclass_Options_outHeight, bitmapHeight);
+        return 0;
+    }
+
+    jobject value__ARGB_8888 = (*env)->GetStaticObjectField(env, jclass_Config, jclass_Config_ARGB_8888);
+    jobject outputBitmap = (*env)->CallStaticObjectMethod(env, jclass_Bitmap, jclass_Bitmap_createBitmap, (jint)bitmapWidth, (jint)bitmapHeight, value__ARGB_8888);
+    if (!outputBitmap) {
+        (*env)->ThrowNew(env, jclass_RuntimeException, "Failed to allocate Bitmap");
+        return 0;
+    }
+    outputBitmap = (*env)->NewLocalRef(env, outputBitmap);
+    
+    AndroidBitmapInfo bitmapInfo;
+    if (AndroidBitmap_getInfo(env, outputBitmap, &bitmapInfo) != ANDROID_BITMAP_RESUT_SUCCESS) {
+        (*env)->DeleteLocalRef(env, outputBitmap);
+        (*env)->ThrowNew(env, jclass_RuntimeException, "Failed to get Bitmap information");
+        return 0;
+    }
+    
+    void *bitmapPixels = 0;
+    if (AndroidBitmap_lockPixels(env, outputBitmap, &bitmapPixels) != ANDROID_BITMAP_RESUT_SUCCESS) {
+        (*env)->DeleteLocalRef(env, outputBitmap);
+        (*env)->ThrowNew(env, jclass_RuntimeException, "Failed to lock Bitmap pixels");
+        return 0;
+    }
+    
+    if (!WebPDecodeRGBAInto((uint8_t*)inputBuffer, len, (uint8_t*)bitmapPixels, bitmapInfo.height * bitmapInfo.stride, bitmapInfo.stride)) {
+        AndroidBitmap_unlockPixels(env, outputBitmap);
+        (*env)->DeleteLocalRef(env, outputBitmap);
+        (*env)->ThrowNew(env, jclass_RuntimeException, "Failed to unlock Bitmap pixels");
+        return 0;
+    }
+    
+    if (AndroidBitmap_unlockPixels(env, outputBitmap) != ANDROID_BITMAP_RESUT_SUCCESS) {
+        (*env)->DeleteLocalRef(env, outputBitmap);
+        (*env)->ThrowNew(env, jclass_RuntimeException, "Failed to unlock Bitmap pixels");
+        return 0;
+    }
+    
+    return outputBitmap;
+}
diff --git a/src/main/jni/image.h b/src/main/jni/image.h
new file mode 100644
index 000000000..58dd5385b
--- /dev/null
+++ b/src/main/jni/image.h
@@ -0,0 +1,8 @@
+#ifndef image_h
+#define image_h
+
+#include <jni.h>
+
+jint imageOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env);
+
+#endif
diff --git a/src/main/jni/jni.c b/src/main/jni/jni.c
new file mode 100644
index 000000000..ff600f003
--- /dev/null
+++ b/src/main/jni/jni.c
@@ -0,0 +1,93 @@
+#include <stdio.h>
+#include <string.h>
+#include <jni.h>
+#include <sys/types.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include "aes/aes.h"
+#include "utils.h"
+#include "sqlite.h"
+#include "gif.h"
+#include "image.h"
+
+jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+	JNIEnv *env = 0;
+    srand(time(NULL));
+    
+	if ((*vm)->GetEnv(vm, (void **) &env, JNI_VERSION_1_6) != JNI_OK) {
+		return -1;
+	}
+    
+    if (sqliteOnJNILoad(vm, reserved, env) == -1) {
+        return -1;
+    }
+    
+    if (imageOnJNILoad(vm, reserved, env) == -1) {
+        return -1;
+    }
+    
+    gifOnJNILoad(vm, reserved, env);
+    
+	return JNI_VERSION_1_6;
+}
+
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+    gifOnJNIUnload(vm, reserved);
+}
+
+JNIEXPORT void Java_org_telegram_messenger_Utilities_aesIgeEncryption(JNIEnv *env, jclass class, jobject buffer, jbyteArray key, jbyteArray iv, jboolean encrypt, int offset, int length) {
+    jbyte *what = (*env)->GetDirectBufferAddress(env, buffer) + offset;
+    unsigned char *keyBuff = (unsigned char *)(*env)->GetByteArrayElements(env, key, NULL);
+    unsigned char *ivBuff = (unsigned char *)(*env)->GetByteArrayElements(env, iv, NULL);
+    
+    AES_KEY akey;
+    if (!encrypt) {
+        AES_set_decrypt_key(keyBuff, 32 * 8, &akey);
+        AES_ige_encrypt(what, what, length, &akey, ivBuff, AES_DECRYPT);
+    } else {
+        AES_set_encrypt_key(keyBuff, 32 * 8, &akey);
+        AES_ige_encrypt(what, what, length, &akey, ivBuff, AES_ENCRYPT);
+    }
+    (*env)->ReleaseByteArrayElements(env, key, keyBuff, JNI_ABORT);
+    (*env)->ReleaseByteArrayElements(env, iv, ivBuff, 0);
+}
+
+uint64_t gcd(uint64_t a, uint64_t b){
+    while(a != 0 && b != 0) {
+        while((b & 1) == 0) b >>= 1;
+        while((a & 1) == 0) a >>= 1;
+        if(a > b) a -= b; else b -= a;
+    }
+    return b == 0 ? a : b;
+}
+
+JNIEXPORT jlong Java_org_telegram_messenger_Utilities_doPQNative(JNIEnv* env, jclass class, jlong _what) {
+    uint64_t what = _what;
+    int it = 0, i, j;
+    uint64_t g = 0;
+    for (i = 0; i < 3 || it < 1000; i++){
+        int q = ((lrand48() & 15) + 17) % what;
+        uint64_t x = (long long)lrand48() % (what - 1) + 1, y = x;
+        int lim = 1 << (i + 18), j;
+        for(j = 1; j < lim; j++){
+            ++it;
+            uint64_t a = x, b = x, c = q;
+            while(b){
+                if(b & 1){
+                    c += a;
+                    if(c >= what) c -= what;
+                }
+                a += a;
+                if(a >= what) a -= what;
+                b >>= 1;
+            }
+            x = c;
+            uint64_t z = x < y ? what + x - y : x - y;
+            g = gcd(z, what);
+            if(g != 1) break;
+            if(!(j & (j - 1))) y = x;
+        }
+        if(g > 1 && g < what) break;
+    }
+    return g;
+}
diff --git a/src/main/jni/libjpeg/armv6_idct.S b/src/main/jni/libjpeg/armv6_idct.S
new file mode 100644
index 000000000..18e4e8a18
--- /dev/null
+++ b/src/main/jni/libjpeg/armv6_idct.S
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This is a fast-and-accurate implementation of inverse Discrete Cosine
+ * Transform (IDCT) for ARMv6+. It also performs dequantization of the input
+ * coefficients just like other methods.
+ *
+ * This implementation is based on the scaled 1-D DCT algorithm proposed by
+ * Arai, Agui, and Nakajima. The following code is based on the figure 4-8
+ * on page 52 of the JPEG textbook by Pennebaker and Mitchell. Coefficients
+ * are (almost) directly mapped into registers.
+ *
+ * The accuracy is achieved by using SMULWy and SMLAWy instructions. Both
+ * multiply 32 bits by 16 bits and store the top 32 bits of the result. It
+ * makes 32-bit fixed-point arithmetic possible without overflow. That is
+ * why jpeg_idct_ifast(), which is written in C, cannot be improved.
+ *
+ * More tricks are used to gain more speed. First of all, we use as many
+ * registers as possible. ARM processor has 16 registers including sp (r13)
+ * and pc (r15), so only 14 registers can be used without limitations. In
+ * general, we let r0 to r7 hold the coefficients; r10 and r11 hold four
+ * 16-bit constants; r12 and r14 hold two of the four arguments; and r8 hold
+ * intermediate value. In the second pass, r9 is the loop counter. In the
+ * first pass, r8 to r11 are used to hold quantization values, so the loop
+ * counter is held by sp. Yes, the stack pointer. Since it must be aligned
+ * to 4-byte boundary all the time, we align it to 32-byte boundary and use
+ * bit 3 to bit 5. As the result, we actually use 14.1 registers. :-)
+ *
+ * Second, we rearrange quantization values to access them sequentially. The
+ * table is first transposed, and the new columns are placed in the order of
+ * 7, 5, 1, 3, 0, 2, 4, 6. Thus we can use LDMDB to load four values at a
+ * time. Rearranging coefficients also helps, but that requires to change a
+ * dozen of files, which seems not worth it. In addition, we choose to scale
+ * up quantization values by 13 bits, so the coefficients are scaled up by
+ * 16 bits after both passes. Then we can pack and saturate them two at a
+ * time using PKHTB and USAT16 instructions.
+ *
+ * Third, we reorder the instructions to avoid bubbles in the pipeline. This
+ * is done by hand accroding to the cycle timings and the interlock behavior
+ * described in the technical reference manual of ARM1136JF-S. We also take
+ * advantage of dual issue processors by interleaving instructions with
+ * dependencies. It has been benchmarked on four devices and all the results
+ * showed distinguishable improvements. Note that PLD instructions actually
+ * slow things down, so they are removed at the last minute. In the future,
+ * this might be futher improved using a system profiler.
+ */
+
+#ifdef __arm__
+#include <machine/cpu-features.h>
+#endif
+
+#if __ARM_ARCH__ >= 6
+
+// void armv6_idct(short *coefs, int *quans, unsigned char *rows, int col)
+    .arm
+    .text
+    .align
+    .global armv6_idct
+    .func   armv6_idct
+
+armv6_idct:
+    // Push everything except sp (r13) and pc (r15).
+    stmdb   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+
+    // r12 = quans, r14 = coefs.
+    sub     r4, sp, #236
+    bic     sp, r4, #31
+    add     r5, sp, #224
+    add     r12, r1, #256
+    stm     r5, {r2, r3, r4}
+    add     r14, r0, #16
+
+pass1_head:
+    // Load quantization values. (q[0, 2, 4, 6])
+    ldmdb   r12!, {r8, r9, r10, r11}
+
+    // Load coefficients. (c[4, 1, 2, 3, 0, 5, 6, 7])
+    ldrsh   r4, [r14, #-2] !
+    ldrsh   r1, [r14, #16]
+    ldrsh   r2, [r14, #32]
+    ldrsh   r3, [r14, #48]
+    ldrsh   r0, [r14, #64]
+    ldrsh   r5, [r14, #80]
+    ldrsh   r6, [r14, #96]
+    ldrsh   r7, [r14, #112]
+
+    // r4 = q[0] * c[0];
+    mul     r4, r8, r4
+
+    // Check if ACs are all zero.
+    cmp     r0, #0
+    orreqs  r8, r1, r2
+    orreqs  r8, r3, r5
+    orreqs  r8, r6, r7
+    beq     pass1_zero
+
+    // Step 1: Dequantizations.
+
+    // r2 = q[2] * c[2];
+    // r0 = q[4] * c[4] + r4;
+    // r6 = q[6] * c[6] + r2;
+    mul     r2, r9, r2
+    mla     r0, r10, r0, r4
+    mla     r6, r11, r6, r2
+
+    // Load quantization values. (q[7, 5, 1, 3])
+    ldmdb   r12!, {r8, r9, r10, r11}
+
+    // r4 = r4 * 2 - r0 = -(r0 - r4 * 2);
+    // r2 = r2 * 2 - r6 = -(r6 - r2 * 2);
+    rsb     r4, r0, r4, lsl #1
+    rsb     r2, r6, r2, lsl #1
+
+    // r7 = q[7] * c[7];
+    // r5 = q[5] * c[5];
+    // r1 = q[1] * c[1] + r7;
+    // r3 = q[3] * c[3] + r5;
+    mul     r7, r8, r7
+    mul     r5, r9, r5
+    mla     r1, r10, r1, r7
+    mla     r3, r11, r3, r5
+
+    // Load constants.
+    ldrd    r10, constants
+
+    // Step 2: Rotations and Butterflies.
+
+    // r7 = r1 - r7 * 2;
+    // r1 = r1 - r3;
+    // r5 = r5 * 2 - r3 = -(r3 - r5 * 2);
+    // r3 = r1 + r3 * 2;
+    // r8 = r5 + r7;
+    sub     r7, r1, r7, lsl #1
+    sub     r1, r1, r3
+    rsb     r5, r3, r5, lsl #1
+    add     r3, r1, r3, lsl #1
+    add     r8, r5, r7
+
+    // r2 = r2 * 1.41421 = r2 * 27146 / 65536 + r2;
+    // r8 = r8 * 1.84776 / 8 = r8 * 15137 / 65536;
+    // r1 = r1 * 1.41421 = r1 * 27146 / 65536 + r1;
+    smlawt  r2, r2, r10, r2
+    smulwb  r8, r8, r10
+    smlawt  r1, r1, r10, r1
+
+    // r0 = r0 + r6;
+    // r2 = r2 - r6;
+    // r6 = r0 - r6 * 2;
+    add     r0, r0, r6
+    sub     r2, r2, r6
+    sub     r6, r0, r6, lsl #1
+
+    // r5 = r5 * -2.61313 / 8 + r8 = r5 * -21407 / 65536 + r8;
+    // r8 = r7 * -1.08239 / 8 + r8 = r7 * -8867 / 65536 + r8;
+    smlawt  r5, r5, r11, r8
+    smlawb  r8, r7, r11, r8
+
+    // r4 = r4 + r2;
+    // r0 = r0 + r3;
+    // r2 = r4 - r2 * 2;
+    add     r4, r4, r2
+    add     r0, r0, r3
+    sub     r2, r4, r2, lsl #1
+
+    // r7 = r5 * 8 - r3 = -(r3 - r5 * 8);
+    // r3 = r0 - r3 * 2;
+    // r1 = r1 - r7;
+    // r4 = r4 + r7;
+    // r5 = r8 * 8 - r1 = -(r1 - r8 * 8);
+    // r7 = r4 - r7 * 2;
+    rsb     r7, r3, r5, lsl #3
+    sub     r3, r0, r3, lsl #1
+    sub     r1, r1, r7
+    add     r4, r4, r7
+    rsb     r5, r1, r8, lsl #3
+    sub     r7, r4, r7, lsl #1
+
+    // r2 = r2 + r1;
+    // r6 = r6 + r5;
+    // r1 = r2 - r1 * 2;
+    // r5 = r6 - r5 * 2;
+    add     r2, r2, r1
+    add     r6, r6, r5
+    sub     r1, r2, r1, lsl #1
+    sub     r5, r6, r5, lsl #1
+
+    // Step 3: Reorder and Save.
+
+    str     r0, [sp, #-4] !
+    str     r4, [sp, #32]
+    str     r2, [sp, #64]
+    str     r6, [sp, #96]
+    str     r5, [sp, #128]
+    str     r1, [sp, #160]
+    str     r7, [sp, #192]
+    str     r3, [sp, #224]
+    b       pass1_tail
+
+    // Precomputed 16-bit constants: 27146, 15137, -21407, -8867.
+    // Put them in the middle since LDRD only accepts offsets from -255 to 255.
+    .align  3
+constants:
+    .word   0x6a0a3b21
+    .word   0xac61dd5d
+
+pass1_zero:
+    str     r4, [sp, #-4] !
+    str     r4, [sp, #32]
+    str     r4, [sp, #64]
+    str     r4, [sp, #96]
+    str     r4, [sp, #128]
+    str     r4, [sp, #160]
+    str     r4, [sp, #192]
+    str     r4, [sp, #224]
+    sub     r12, r12, #16
+
+pass1_tail:
+    ands    r9, sp, #31
+    bne     pass1_head
+
+    // r12 = rows, r14 = col.
+    ldr     r12, [sp, #256]
+    ldr     r14, [sp, #260]
+
+    // Load constants.
+    ldrd    r10, constants
+
+pass2_head:
+    // Load coefficients. (c[0, 1, 2, 3, 4, 5, 6, 7])
+    ldmia   sp!, {r0, r1, r2, r3, r4, r5, r6, r7}
+
+    // r0 = r0 + 0x00808000;
+    add     r0, r0, #0x00800000
+    add     r0, r0, #0x00008000
+
+    // Step 1: Analog to the first pass.
+
+    // r0 = r0 + r4;
+    // r6 = r6 + r2;
+    add     r0, r0, r4
+    add     r6, r6, r2
+
+    // r4 = r0 - r4 * 2;
+    // r2 = r2 * 2 - r6 = -(r6 - r2 * 2);
+    sub     r4, r0, r4, lsl #1
+    rsb     r2, r6, r2, lsl #1
+
+    // r1 = r1 + r7;
+    // r3 = r3 + r5;
+    add     r1, r1, r7
+    add     r3, r3, r5
+
+    // Step 2: Rotations and Butterflies.
+
+    // r7 = r1 - r7 * 2;
+    // r1 = r1 - r3;
+    // r5 = r5 * 2 - r3 = -(r3 - r5 * 2);
+    // r3 = r1 + r3 * 2;
+    // r8 = r5 + r7;
+    sub     r7, r1, r7, lsl #1
+    sub     r1, r1, r3
+    rsb     r5, r3, r5, lsl #1
+    add     r3, r1, r3, lsl #1
+    add     r8, r5, r7
+
+    // r2 = r2 * 1.41421 = r2 * 27146 / 65536 + r2;
+    // r8 = r8 * 1.84776 / 8 = r8 * 15137 / 65536;
+    // r1 = r1 * 1.41421 = r1 * 27146 / 65536 + r1;
+    smlawt  r2, r2, r10, r2
+    smulwb  r8, r8, r10
+    smlawt  r1, r1, r10, r1
+
+    // r0 = r0 + r6;
+    // r2 = r2 - r6;
+    // r6 = r0 - r6 * 2;
+    add     r0, r0, r6
+    sub     r2, r2, r6
+    sub     r6, r0, r6, lsl #1
+
+    // r5 = r5 * -2.61313 / 8 + r8 = r5 * -21407 / 65536 + r8;
+    // r8 = r7 * -1.08239 / 8 + r8 = r7 * -8867 / 65536 + r8;
+    smlawt  r5, r5, r11, r8
+    smlawb  r8, r7, r11, r8
+
+    // r4 = r4 + r2;
+    // r0 = r0 + r3;
+    // r2 = r4 - r2 * 2;
+    add     r4, r4, r2
+    add     r0, r0, r3
+    sub     r2, r4, r2, lsl #1
+
+    // r7 = r5 * 8 - r3 = -(r3 - r5 * 8);
+    // r3 = r0 - r3 * 2;
+    // r1 = r1 - r7;
+    // r4 = r4 + r7;
+    // r5 = r8 * 8 - r1 = -(r1 - r8 * 8);
+    // r7 = r4 - r7 * 2;
+    rsb     r7, r3, r5, lsl #3
+    sub     r3, r0, r3, lsl #1
+    sub     r1, r1, r7
+    add     r4, r4, r7
+    rsb     r5, r1, r8, lsl #3
+    sub     r7, r4, r7, lsl #1
+
+    // r2 = r2 + r1;
+    // r6 = r6 + r5;
+    // r1 = r2 - r1 * 2;
+    // r5 = r6 - r5 * 2;
+    add     r2, r2, r1
+    add     r6, r6, r5
+    sub     r1, r2, r1, lsl #1
+    sub     r5, r6, r5, lsl #1
+
+    // Step 3: Reorder and Save.
+
+    // Load output pointer.
+    ldr     r8, [r12], #4
+
+    // For little endian: r6, r2, r4, r0, r3, r7, r1, r5.
+    pkhtb   r6, r6, r4, asr #16
+    pkhtb   r2, r2, r0, asr #16
+    pkhtb   r3, r3, r1, asr #16
+    pkhtb   r7, r7, r5, asr #16
+    usat16  r6, #8, r6
+    usat16  r2, #8, r2
+    usat16  r3, #8, r3
+    usat16  r7, #8, r7
+    orr     r0, r2, r6, lsl #8
+    orr     r1, r7, r3, lsl #8
+
+#ifdef __ARMEB__
+    // Reverse bytes for big endian.
+    rev     r0, r0
+    rev     r1, r1
+#endif
+
+    // Use STR instead of STRD to support unaligned access.
+    str     r0, [r8, r14] !
+    str     r1, [r8, #4]
+
+pass2_tail:
+    adds    r9, r9, #0x10000000
+    bpl     pass2_head
+
+    ldr     sp, [sp, #8]
+    add     sp, sp, #236
+
+    ldmia   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+    bx      lr
+    .endfunc
+
+#endif
diff --git a/src/main/jni/libjpeg/jcapimin.c b/src/main/jni/libjpeg/jcapimin.c
new file mode 100644
index 000000000..54fb8c58c
--- /dev/null
+++ b/src/main/jni/libjpeg/jcapimin.c
@@ -0,0 +1,280 @@
+/*
+ * jcapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-compression case or the transcoding-only
+ * case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jcapistd.c.  But also see jcparam.c for
+ * parameter-setup helper routines, jcomapi.c for routines shared by
+ * compression and decompression, and jctrans.c for the transcoding case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG compression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_compress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = FALSE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->dest = NULL;
+
+  cinfo->comp_info = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  cinfo->script_space = NULL;
+
+  cinfo->input_gamma = 1.0;	/* in case application forgets */
+
+  /* OK, I'm ready */
+  cinfo->global_state = CSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG compression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_compress (j_compress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG compression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_compress (j_compress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Forcibly suppress or un-suppress all quantization and Huffman tables.
+ * Marks all currently defined tables as already written (if suppress)
+ * or not written (if !suppress).  This will control whether they get emitted
+ * by a subsequent jpeg_start_compress call.
+ *
+ * This routine is exported for use by applications that want to produce
+ * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
+ * since it is called by jpeg_start_compress, we put it here --- otherwise
+ * jcparam.o would be linked whether the application used it or not.
+ */
+
+GLOBAL(void)
+jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
+{
+  int i;
+  JQUANT_TBL * qtbl;
+  JHUFF_TBL * htbl;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
+      qtbl->sent_table = suppress;
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+  }
+}
+
+
+/*
+ * Finish JPEG compression.
+ *
+ * If a multipass operating mode was selected, this may do a great deal of
+ * work including most of the actual output.
+ */
+
+GLOBAL(void)
+jpeg_finish_compress (j_compress_ptr cinfo)
+{
+  JDIMENSION iMCU_row;
+
+  if (cinfo->global_state == CSTATE_SCANNING ||
+      cinfo->global_state == CSTATE_RAW_OK) {
+    /* Terminate first pass */
+    if (cinfo->next_scanline < cinfo->image_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_pass) (cinfo);
+  } else if (cinfo->global_state != CSTATE_WRCOEFS)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any remaining passes */
+  while (! cinfo->master->is_last_pass) {
+    (*cinfo->master->prepare_for_pass) (cinfo);
+    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) iMCU_row;
+	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* We bypass the main controller and invoke coef controller directly;
+       * all work is being done from the coefficient buffer.
+       */
+      if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
+	ERREXIT(cinfo, JERR_CANT_SUSPEND);
+    }
+    (*cinfo->master->finish_pass) (cinfo);
+  }
+  /* Write EOI, do final cleanup */
+  (*cinfo->marker->write_file_trailer) (cinfo);
+  (*cinfo->dest->term_destination) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+}
+
+
+/*
+ * Write a special marker.
+ * This is only recommended for writing COM or APPn markers.
+ * Must be called after jpeg_start_compress() and before
+ * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
+ */
+
+GLOBAL(void)
+jpeg_write_marker (j_compress_ptr cinfo, int marker,
+		   const JOCTET *dataptr, unsigned int datalen)
+{
+  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
+
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
+  while (datalen--) {
+    (*write_marker_byte) (cinfo, *dataptr);
+    dataptr++;
+  }
+}
+
+/* Same, but piecemeal. */
+
+GLOBAL(void)
+jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+{
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+}
+
+GLOBAL(void)
+jpeg_write_m_byte (j_compress_ptr cinfo, int val)
+{
+  (*cinfo->marker->write_marker_byte) (cinfo, val);
+}
+
+
+/*
+ * Alternate compression function: just write an abbreviated table file.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * To produce a pair of files containing abbreviated tables and abbreviated
+ * image data, one would proceed as follows:
+ *
+ *		initialize JPEG object
+ *		set JPEG parameters
+ *		set destination to table file
+ *		jpeg_write_tables(cinfo);
+ *		set destination to image file
+ *		jpeg_start_compress(cinfo, FALSE);
+ *		write data...
+ *		jpeg_finish_compress(cinfo);
+ *
+ * jpeg_write_tables has the side effect of marking all tables written
+ * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
+ * will not re-emit the tables unless it is passed write_all_tables=TRUE.
+ */
+
+GLOBAL(void)
+jpeg_write_tables (j_compress_ptr cinfo)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Initialize the marker writer ... bit of a crock to do it here. */
+  jinit_marker_writer(cinfo);
+  /* Write them tables! */
+  (*cinfo->marker->write_tables_only) (cinfo);
+  /* And clean up. */
+  (*cinfo->dest->term_destination) (cinfo);
+  /*
+   * In library releases up through v6a, we called jpeg_abort() here to free
+   * any working memory allocated by the destination manager and marker
+   * writer.  Some applications had a problem with that: they allocated space
+   * of their own from the library memory manager, and didn't want it to go
+   * away during write_tables.  So now we do nothing.  This will cause a
+   * memory leak if an app calls write_tables repeatedly without doing a full
+   * compression cycle or otherwise resetting the JPEG object.  However, that
+   * seems less bad than unexpectedly freeing memory in the normal case.
+   * An app that prefers the old behavior can call jpeg_abort for itself after
+   * each call to jpeg_write_tables().
+   */
+}
diff --git a/src/main/jni/libjpeg/jcapistd.c b/src/main/jni/libjpeg/jcapistd.c
new file mode 100644
index 000000000..c0320b1b1
--- /dev/null
+++ b/src/main/jni/libjpeg/jcapistd.c
@@ -0,0 +1,161 @@
+/*
+ * jcapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-compression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_compress, it will end up linking in the entire compressor.
+ * We thus must separate this file from jcapimin.c to avoid linking the
+ * whole compression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Compression initialization.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * We require a write_all_tables parameter as a failsafe check when writing
+ * multiple datastreams from the same compression object.  Since prior runs
+ * will have left all the tables marked sent_table=TRUE, a subsequent run
+ * would emit an abbreviated stream (no tables) by default.  This may be what
+ * is wanted, but for safety's sake it should not be the default behavior:
+ * programmers should have to make a deliberate choice to emit abbreviated
+ * images.  Therefore the documentation and examples should encourage people
+ * to pass write_all_tables=TRUE; then it will take active thought to do the
+ * wrong thing.
+ */
+
+GLOBAL(void)
+jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (write_all_tables)
+    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  jinit_compress_master(cinfo);
+  /* Set up for the first pass */
+  (*cinfo->master->prepare_for_pass) (cinfo);
+  /* Ready for application to drive first pass through jpeg_write_scanlines
+   * or jpeg_write_raw_data.
+   */
+  cinfo->next_scanline = 0;
+  cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
+}
+
+
+/*
+ * Write some scanlines of data to the JPEG compressor.
+ *
+ * The return value will be the number of lines actually written.
+ * This should be less than the supplied num_lines only in case that
+ * the data destination module has requested suspension of the compressor,
+ * or if more than image_height scanlines are passed in.
+ *
+ * Note: we warn about excess calls to jpeg_write_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * excess scanlines passed in the last valid call are *silently* ignored,
+ * so that the application need not adjust num_lines for end-of-image
+ * when using a multiple-scanline buffer.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
+		      JDIMENSION num_lines)
+{
+  JDIMENSION row_ctr, rows_left;
+
+  if (cinfo->global_state != CSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height)
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_scanlines.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Ignore any extra scanlines at bottom of image. */
+  rows_left = cinfo->image_height - cinfo->next_scanline;
+  if (num_lines > rows_left)
+    num_lines = rows_left;
+
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
+  cinfo->next_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to write raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
+		     JDIMENSION num_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != CSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_raw_data.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Verify that at least one iMCU row has been passed. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  if (num_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Directly compress the row. */
+  if (! (*cinfo->coef->compress_data) (cinfo, data)) {
+    /* If compressor did not consume the whole row, suspend processing. */
+    return 0;
+  }
+
+  /* OK, we processed one iMCU row. */
+  cinfo->next_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
diff --git a/src/main/jni/libjpeg/jccoefct.c b/src/main/jni/libjpeg/jccoefct.c
new file mode 100644
index 000000000..1963ddb61
--- /dev/null
+++ b/src/main/jni/libjpeg/jccoefct.c
@@ -0,0 +1,449 @@
+/*
+ * jccoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for compression.
+ * This controller is the top level of the JPEG compressor proper.
+ * The coefficient buffer lies between forward-DCT and entropy encoding steps.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* We use a full-image coefficient buffer when doing Huffman optimization,
+ * and also for writing multiple-scan JPEG files.  In all cases, the DCT
+ * step is run during the first pass, and subsequent passes need only read
+ * the buffered coefficients.
+ */
+#ifdef ENTROPY_OPT_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#else
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#endif
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* For single-pass compression, it's sufficient to buffer just one MCU
+   * (although this may prove a bit slow in practice).  We allocate a
+   * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each
+   * MCU constructed and sent.  (On 80x86, the workspace is FAR even though
+   * it's not really very big; this is to keep the module interfaces unchanged
+   * when a large coefficient buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays.
+   */
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+/* Forward declarations */
+METHODDEF(boolean) compress_data
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+METHODDEF(boolean) compress_first_pass
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+METHODDEF(boolean) compress_output
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (coef->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_data;
+    break;
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_first_pass;
+    break;
+  case JBUF_CRANK_DEST:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_output;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data in the single-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(boolean)
+compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, bi, ci, yindex, yoffset, blockcnt;
+  JDIMENSION ypos, xpos;
+  jpeg_component_info *compptr;
+
+  /* Loop to write as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Determine where data comes from in input_buf and do the DCT thing.
+       * Each call on forward_DCT processes a horizontal row of DCT blocks
+       * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
+       * sequentially.  Dummy blocks at the right or bottom edge are filled in
+       * specially.  The data in them does not matter for image reconstruction,
+       * so we fill them with values that will encode to the smallest amount of
+       * data, viz: all zeroes in the AC entries, DC entries equal to previous
+       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
+       */
+      blkn = 0;
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	xpos = MCU_col_num * compptr->MCU_sample_width;
+	ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    (*cinfo->fdct->forward_DCT) (cinfo, compptr,
+					 input_buf[compptr->component_index],
+					 coef->MCU_buffer[blkn],
+					 ypos, xpos, (JDIMENSION) blockcnt);
+	    if (blockcnt < compptr->MCU_width) {
+	      /* Create some dummy blocks at the right edge of the image. */
+	      jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt],
+			(compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK));
+	      for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
+		coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
+	      }
+	    }
+	  } else {
+	    /* Create a row of dummy blocks at the bottom of the image. */
+	    jzero_far((void FAR *) coef->MCU_buffer[blkn],
+		      compptr->MCU_width * SIZEOF(JBLOCK));
+	    for (bi = 0; bi < compptr->MCU_width; bi++) {
+	      coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  ypos += DCTSIZE;
+	}
+      }
+      /* Try to write the MCU.  In event of a suspension failure, we will
+       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
+       */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+
+/*
+ * Process some data in the first pass of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * This amount of data is read from the source buffer, DCT'd and quantized,
+ * and saved into the virtual arrays.  We also generate suitable dummy blocks
+ * as needed at the right and lower edges.  (The dummy blocks are constructed
+ * in the virtual arrays, which have been padded appropriately.)  This makes
+ * it possible for subsequent passes not to worry about real vs. dummy blocks.
+ *
+ * We must also emit the data to the entropy encoder.  This is conveniently
+ * done by calling compress_output() after we've loaded the current strip
+ * of the virtual arrays.
+ *
+ * NB: input_buf contains a plane for each component in image.  All
+ * components are DCT'd and loaded into the virtual arrays in this pass.
+ * However, it may be that only a subset of the components are emitted to
+ * the entropy encoder during this first pass; be careful about looking
+ * at the scan-dependent variables (MCU dimensions, etc).
+ */
+
+METHODDEF(boolean)
+compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION blocks_across, MCUs_across, MCUindex;
+  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
+  JCOEF lastDC;
+  jpeg_component_info *compptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW thisblockrow, lastblockrow;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (coef->iMCU_row_num < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here, since may not be set! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    blocks_across = compptr->width_in_blocks;
+    h_samp_factor = compptr->h_samp_factor;
+    /* Count number of dummy blocks to be added at the right margin. */
+    ndummy = (int) (blocks_across % h_samp_factor);
+    if (ndummy > 0)
+      ndummy = h_samp_factor - ndummy;
+    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
+     * on forward_DCT processes a complete horizontal row of DCT blocks.
+     */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      thisblockrow = buffer[block_row];
+      (*cinfo->fdct->forward_DCT) (cinfo, compptr,
+				   input_buf[ci], thisblockrow,
+				   (JDIMENSION) (block_row * DCTSIZE),
+				   (JDIMENSION) 0, blocks_across);
+      if (ndummy > 0) {
+	/* Create dummy blocks at the right edge of the image. */
+	thisblockrow += blocks_across; /* => first dummy block */
+	jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
+	lastDC = thisblockrow[-1][0];
+	for (bi = 0; bi < ndummy; bi++) {
+	  thisblockrow[bi][0] = lastDC;
+	}
+      }
+    }
+    /* If at end of image, create dummy block rows as needed.
+     * The tricky part here is that within each MCU, we want the DC values
+     * of the dummy blocks to match the last real block's DC value.
+     * This squeezes a few more bytes out of the resulting file...
+     */
+    if (coef->iMCU_row_num == last_iMCU_row) {
+      blocks_across += ndummy;	/* include lower right corner */
+      MCUs_across = blocks_across / h_samp_factor;
+      for (block_row = block_rows; block_row < compptr->v_samp_factor;
+	   block_row++) {
+	thisblockrow = buffer[block_row];
+	lastblockrow = buffer[block_row-1];
+	jzero_far((void FAR *) thisblockrow,
+		  (size_t) (blocks_across * SIZEOF(JBLOCK)));
+	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
+	  lastDC = lastblockrow[h_samp_factor-1][0];
+	  for (bi = 0; bi < h_samp_factor; bi++) {
+	    thisblockrow[bi][0] = lastDC;
+	  }
+	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
+	  lastblockrow += h_samp_factor;
+	}
+      }
+    }
+  }
+  /* NB: compress_output will increment iMCU_row_num if successful.
+   * A suspension return will result in redoing all the work above next time.
+   */
+
+  /* Emit data to the entropy encoder, sharing code with subsequent passes */
+  return compress_output(cinfo, input_buf);
+}
+
+
+/*
+ * Process some data in subsequent passes of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan.
+   * NB: during first pass, this is safe only because the buffers will
+   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+	    coef->MCU_buffer[blkn++] = buffer_ptr++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+#endif /* FULL_COEF_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
+  coef->pub.start_pass = start_pass_coef;
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    int ci;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) compptr->v_samp_factor);
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/src/main/jni/libjpeg/jccolor.c b/src/main/jni/libjpeg/jccolor.c
new file mode 100644
index 000000000..57a76c36b
--- /dev/null
+++ b/src/main/jni/libjpeg/jccolor.c
@@ -0,0 +1,527 @@
+/*
+ * jccolor.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+// this enables unrolling null_convert's loop, and reading/write ints for speed
+#define ENABLE_ANDROID_NULL_CONVERT
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_converter pub; /* public fields */
+
+  /* Private state for RGB->YCC conversion */
+  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
+} my_color_converter;
+
+typedef my_color_converter * my_cconvert_ptr;
+
+
+/**************** RGB -> YCbCr conversion: most common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
+ *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
+ * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
+ * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
+ * were not represented exactly.  Now we sacrifice exact representation of
+ * maximum red and maximum blue in order to get exact grayscales.
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times R,G,B for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
+ * in the tables to save adding them separately in the inner loop.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table and divide it up into eight parts, instead of
+ * doing eight alloc_small requests.  This lets us use a single table base
+ * address, which can be held in a register in the inner loops on many
+ * machines (more than can hold all eight addresses, anyway).
+ */
+
+#define R_Y_OFF		0			/* offset to R => Y section */
+#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
+#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
+#define R_CB_OFF	(3*(MAXJSAMPLE+1))
+#define G_CB_OFF	(4*(MAXJSAMPLE+1))
+#define B_CB_OFF	(5*(MAXJSAMPLE+1))
+#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF	(6*(MAXJSAMPLE+1))
+#define B_CR_OFF	(7*(MAXJSAMPLE+1))
+#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
+
+
+/*
+ * Initialize for RGB->YCC colorspace conversion.
+ */
+
+METHODDEF(void)
+rgb_ycc_start (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 * rgb_ycc_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(TABLE_SIZE * SIZEOF(INT32)));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i     + ONE_HALF;
+    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
+    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
+    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
+     * This ensures that the maximum output will round to MAXJSAMPLE
+     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
+     */
+    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+/*  B=>Cb and R=>Cr tables are the same
+    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+*/
+    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
+    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ *
+ * Note that we change from the application's interleaved-pixel format
+ * to our internal noninterleaved, one-plane-per-component format.
+ * The input buffer is therefore three times as wide as the output buffer.
+ *
+ * A starting row offset is provided only for the output buffer.  The caller
+ * can easily adjust the passed input_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+rgb_ycc_convert (j_compress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		 JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles RGB->grayscale conversion, which is the same
+ * as the RGB->Y portion of RGB->YCbCr.
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		  JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles Adobe-style CMYK->YCCK conversion,
+ * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume rgb_ycc_start has been called.
+ */
+
+METHODDEF(void)
+cmyk_ycck_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    outptr3 = output_buf[3][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
+      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
+      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
+      /* K passes through as-is */
+      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
+      inptr += 4;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles grayscale output with no conversion.
+ * The source can be either plain grayscale or YCbCr (since Y == gray).
+ */
+
+METHODDEF(void)
+grayscale_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+  int instride = cinfo->input_components;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      outptr[col] = inptr[0];	/* don't need GETJSAMPLE() here */
+      inptr += instride;
+    }
+  }
+}
+
+#ifdef ENABLE_ANDROID_NULL_CONVERT
+
+typedef unsigned long UINT32;
+
+#define B0(n)   ((n) & 0xFF)
+#define B1(n)   (((n) >> 8) & 0xFF)
+#define B2(n)   (((n) >> 16) & 0xFF)
+#define B3(n)   ((n) >> 24)
+
+#define PACK(a, b, c, d)    ((a) | ((b) << 8) | ((c) << 16) | ((d) << 24))
+
+static int ptr_is_quad(const void* p)
+{
+    return (((const char*)p - (const char*)0) & 3) == 0;
+}
+
+static void copyquads(const UINT32 in[], UINT32 out0[], UINT32 out1[], UINT32 out2[], int col4)
+{
+    do {
+        UINT32 src0 = *in++;
+        UINT32 src1 = *in++;
+        UINT32 src2 = *in++;
+        // LEndian
+        *out0++ = PACK(B0(src0), B3(src0), B2(src1), B1(src2));
+        *out1++ = PACK(B1(src0), B0(src1), B3(src1), B2(src2));
+        *out2++ = PACK(B2(src0), B1(src1), B0(src2), B3(src2));
+    } while (--col4 != 0);
+}
+
+#endif
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles multi-component colorspaces without conversion.
+ * We assume input_components == num_components.
+ */
+
+METHODDEF(void)
+null_convert (j_compress_ptr cinfo,
+	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+	      JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  register int ci;
+  int nc = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->image_width;
+
+#ifdef ENABLE_ANDROID_NULL_CONVERT
+    if (1 == num_rows && 3 == nc && num_cols > 0) {
+        JSAMPROW inptr = *input_buf;
+        JSAMPROW outptr0 = output_buf[0][output_row];
+        JSAMPROW outptr1 = output_buf[1][output_row];
+        JSAMPROW outptr2 = output_buf[2][output_row];
+        
+        int col = num_cols;
+        int col4 = col >> 2;
+        if (col4 > 0 && ptr_is_quad(inptr) && ptr_is_quad(outptr0) &&
+                        ptr_is_quad(outptr1) && ptr_is_quad(outptr2)) {
+            
+            const UINT32* in = (const UINT32*)inptr;
+            UINT32* out0 = (UINT32*)outptr0;
+            UINT32* out1 = (UINT32*)outptr1;
+            UINT32* out2 = (UINT32*)outptr2;
+            copyquads(in, out0, out1, out2, col4);
+            col &= 3;
+            if (0 == col)
+                return;
+            col4 <<= 2;
+            inptr += col4 * 3;  /* we read this 3 times per in copyquads */
+            outptr0 += col4;
+            outptr1 += col4;
+            outptr2 += col4;
+            /* fall through to while-loop */
+        }
+        do {
+            *outptr0++ = *inptr++;
+            *outptr1++ = *inptr++;
+            *outptr2++ = *inptr++;
+        } while (--col != 0);
+        return;
+    }
+SLOW:
+#endif
+  while (--num_rows >= 0) {
+    /* It seems fastest to make a separate pass for each component. */
+    for (ci = 0; ci < nc; ci++) {
+      inptr = *input_buf;
+      outptr = output_buf[ci][output_row];
+      for (col = 0; col < num_cols; col++) {
+	outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+	inptr += nc;
+      }
+    }
+    input_buf++;
+    output_row++;
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+null_method (j_compress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for input colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_converter (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_converter));
+  cinfo->cconvert = (struct jpeg_color_converter *) cconvert;
+  /* set start_pass to null method until we find out differently */
+  cconvert->pub.start_pass = null_method;
+
+  /* Make sure input_components agrees with in_color_space */
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->input_components != 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+#if RGB_PIXELSIZE != 3
+    if (cinfo->input_components != RGB_PIXELSIZE)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+#endif /* else share code with YCbCr */
+
+  case JCS_YCbCr:
+    if (cinfo->input_components != 3)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->input_components != 4)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->input_components < 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+  }
+
+  /* Check num_components, set conversion method based on requested space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_GRAYSCALE)
+      cconvert->pub.color_convert = grayscale_convert;
+    else if (cinfo->in_color_space == JCS_RGB) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_gray_convert;
+    } else if (cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub.color_convert = grayscale_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_RGB && RGB_PIXELSIZE == 3)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_RGB) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_ycc_convert;
+    } else if (cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = cmyk_ycck_convert;
+    } else if (cinfo->in_color_space == JCS_YCCK)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:			/* allow null conversion of JCS_UNKNOWN */
+    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
+	cinfo->num_components != cinfo->input_components)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    cconvert->pub.color_convert = null_convert;
+    break;
+  }
+}
diff --git a/src/main/jni/libjpeg/jcdctmgr.c b/src/main/jni/libjpeg/jcdctmgr.c
new file mode 100644
index 000000000..61fa79b9e
--- /dev/null
+++ b/src/main/jni/libjpeg/jcdctmgr.c
@@ -0,0 +1,387 @@
+/*
+ * jcdctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the forward-DCT management logic.
+ * This code selects a particular DCT implementation to be used,
+ * and it performs related housekeeping chores including coefficient
+ * quantization.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_forward_dct pub;	/* public fields */
+
+  /* Pointer to the DCT routine actually in use */
+  forward_DCT_method_ptr do_dct;
+
+  /* The actual post-DCT divisors --- not identical to the quant table
+   * entries, because of scaling (especially for an unnormalized DCT).
+   * Each table is given in normal array order.
+   */
+  DCTELEM * divisors[NUM_QUANT_TBLS];
+
+#ifdef DCT_FLOAT_SUPPORTED
+  /* Same as above for the floating-point case. */
+  float_DCT_method_ptr do_float_dct;
+  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
+#endif
+} my_fdct_controller;
+
+typedef my_fdct_controller * my_fdct_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ * Verify that all referenced Q-tables are present, and set up
+ * the divisor table for each one.
+ * In the current implementation, DCT of all components is done during
+ * the first pass, even if only some components will be output in the
+ * first scan.  Hence all components should be examined here.
+ */
+
+METHODDEF(void)
+start_pass_fdctmgr (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  int ci, qtblno, i;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+  DCTELEM * dtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    qtblno = compptr->quant_tbl_no;
+    /* Make sure specified quantization table is present */
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    qtbl = cinfo->quant_tbl_ptrs[qtblno];
+    /* Compute divisors for this quant table */
+    /* We may do this more than once for same table, but it's not a big deal */
+    switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+    case JDCT_ISLOW:
+      /* For LL&M IDCT method, divisors are equal to raw quantization
+       * coefficients multiplied by 8 (to counteract scaling).
+       */
+      if (fdct->divisors[qtblno] == NULL) {
+	fdct->divisors[qtblno] = (DCTELEM *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      DCTSIZE2 * SIZEOF(DCTELEM));
+      }
+      dtbl = fdct->divisors[qtblno];
+      for (i = 0; i < DCTSIZE2; i++) {
+	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 */
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	if (fdct->divisors[qtblno] == NULL) {
+	  fdct->divisors[qtblno] = (DCTELEM *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					DCTSIZE2 * SIZEOF(DCTELEM));
+	}
+	dtbl = fdct->divisors[qtblno];
+	for (i = 0; i < DCTSIZE2; i++) {
+	  dtbl[i] = (DCTELEM)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-3);
+	}
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 * What's actually stored is 1/divisor so that the inner loop can
+	 * use a multiplication rather than a division.
+	 */
+	FAST_FLOAT * fdtbl;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	if (fdct->float_divisors[qtblno] == NULL) {
+	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					DCTSIZE2 * SIZEOF(FAST_FLOAT));
+	}
+	fdtbl = fdct->float_divisors[qtblno];
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fdtbl[i] = (FAST_FLOAT)
+	      (1.0 / (((double) qtbl->quantval[i] *
+		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Perform forward DCT on one or more blocks of a component.
+ *
+ * The input samples are taken from the sample_data[] array starting at
+ * position start_row/start_col, and moving to the right for any additional
+ * blocks. The quantized coefficients are returned in coef_blocks[].
+ */
+
+METHODDEF(void)
+forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
+	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+	     JDIMENSION start_row, JDIMENSION start_col,
+	     JDIMENSION num_blocks)
+/* This version is used for integer DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  forward_DCT_method_ptr do_dct = fdct->do_dct;
+  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
+  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  sample_data += start_row;	/* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
+    /* Load data into workspace, applying unsigned->signed conversion */
+    { register DCTELEM *workspaceptr;
+      register JSAMPROW elemptr;
+      register int elemr;
+
+      workspaceptr = workspace;
+      for (elemr = 0; elemr < DCTSIZE; elemr++) {
+	elemptr = sample_data[elemr] + start_col;
+#if DCTSIZE == 8		/* unroll the inner loop */
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+#else
+	{ register int elemc;
+	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
+	    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	  }
+	}
+#endif
+      }
+    }
+
+    /* Perform the DCT */
+    (*do_dct) (workspace);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register DCTELEM temp, qval;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	qval = divisors[i];
+	temp = workspace[i];
+	/* Divide the coefficient value by qval, ensuring proper rounding.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 *
+	 * In most files, at least half of the output values will be zero
+	 * (at default quantization settings, more like three-quarters...)
+	 * so we should ensure that this case is fast.  On many machines,
+	 * a comparison is enough cheaper than a divide to make a special test
+	 * a win.  Since both inputs will be nonnegative, we need only test
+	 * for a < b to discover whether a/b is 0.
+	 * If your machine's division is fast enough, define FAST_DIVIDE.
+	 */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a,b)	a /= b
+#else
+#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
+#endif
+	if (temp < 0) {
+	  temp = -temp;
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	  temp = -temp;
+	} else {
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	}
+	output_ptr[i] = (JCOEF) temp;
+      }
+    }
+  }
+}
+
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+METHODDEF(void)
+forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+		   JDIMENSION start_row, JDIMENSION start_col,
+		   JDIMENSION num_blocks)
+/* This version is used for floating-point DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  float_DCT_method_ptr do_dct = fdct->do_float_dct;
+  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
+  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  sample_data += start_row;	/* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
+    /* Load data into workspace, applying unsigned->signed conversion */
+    { register FAST_FLOAT *workspaceptr;
+      register JSAMPROW elemptr;
+      register int elemr;
+
+      workspaceptr = workspace;
+      for (elemr = 0; elemr < DCTSIZE; elemr++) {
+	elemptr = sample_data[elemr] + start_col;
+#if DCTSIZE == 8		/* unroll the inner loop */
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+#else
+	{ register int elemc;
+	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
+	    *workspaceptr++ = (FAST_FLOAT)
+	      (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	  }
+	}
+#endif
+      }
+    }
+
+    /* Perform the DCT */
+    (*do_dct) (workspace);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register FAST_FLOAT temp;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	/* Apply the quantization and scaling factor */
+	temp = workspace[i] * divisors[i];
+	/* Round to nearest integer.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 * The maximum coefficient size is +-16K (for 12-bit data), so this
+	 * code should work for either 16-bit or 32-bit ints.
+	 */
+	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
+      }
+    }
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
+
+
+/*
+ * Initialize FDCT manager.
+ */
+
+GLOBAL(void)
+jinit_forward_dct (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct;
+  int i;
+
+  fdct = (my_fdct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_fdct_controller));
+  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
+  fdct->pub.start_pass = start_pass_fdctmgr;
+
+  switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+  case JDCT_ISLOW:
+    fdct->pub.forward_DCT = forward_DCT;
+    fdct->do_dct = jpeg_fdct_islow;
+    break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  case JDCT_IFAST:
+    fdct->pub.forward_DCT = forward_DCT;
+    fdct->do_dct = jpeg_fdct_ifast;
+    break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  case JDCT_FLOAT:
+    fdct->pub.forward_DCT = forward_DCT_float;
+    fdct->do_float_dct = jpeg_fdct_float;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+
+  /* Mark divisor tables unallocated */
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    fdct->divisors[i] = NULL;
+#ifdef DCT_FLOAT_SUPPORTED
+    fdct->float_divisors[i] = NULL;
+#endif
+  }
+}
diff --git a/src/main/jni/libjpeg/jchuff.c b/src/main/jni/libjpeg/jchuff.c
new file mode 100644
index 000000000..f23525054
--- /dev/null
+++ b/src/main/jni/libjpeg/jchuff.c
@@ -0,0 +1,909 @@
+/*
+ * jchuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy encoding routines.
+ *
+ * Much of the complexity here has to do with supporting output suspension.
+ * If the data destination module demands suspension, we want to be able to
+ * back up to the start of the current MCU.  To do this, we copy state
+ * variables into local working storage, and update them back to the
+ * permanent JPEG objects only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jchuff.h"		/* Declarations shared with jcphuff.c */
+
+
+/* Expanded entropy encoder object for Huffman encoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  INT32 put_buffer;		/* current bit-accumulation buffer */
+  int put_bits;			/* # of bits now in it */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).put_buffer = (src).put_buffer, \
+	 (dest).put_bits = (src).put_bits, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  savable_state saved;		/* Bit buffer & DC state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+#ifdef ENTROPY_OPT_SUPPORTED	/* Statistics tables for optimization */
+  long * dc_count_ptrs[NUM_HUFF_TBLS];
+  long * ac_count_ptrs[NUM_HUFF_TBLS];
+#endif
+} huff_entropy_encoder;
+
+typedef huff_entropy_encoder * huff_entropy_ptr;
+
+/* Working state while writing an MCU.
+ * This struct contains all the fields that are needed by subroutines.
+ */
+
+typedef struct {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  savable_state cur;		/* Current bit buffer & DC state */
+  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
+} working_state;
+
+
+/* Forward declarations */
+METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo,
+					JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo));
+#ifdef ENTROPY_OPT_SUPPORTED
+METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo,
+					  JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo));
+#endif
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ * If gather_statistics is TRUE, we do not output anything during the scan,
+ * just count the Huffman symbols used and generate Huffman code tables.
+ */
+
+METHODDEF(void)
+start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, dctbl, actbl;
+  jpeg_component_info * compptr;
+
+  if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+    entropy->pub.encode_mcu = encode_mcu_gather;
+    entropy->pub.finish_pass = finish_pass_gather;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    entropy->pub.encode_mcu = encode_mcu_huff;
+    entropy->pub.finish_pass = finish_pass_huff;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+      /* Check for invalid table indexes */
+      /* (make_c_derived_tbl does this in the other path) */
+      if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
+	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
+      if (actbl < 0 || actbl >= NUM_HUFF_TBLS)
+	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
+      /* Allocate and zero the statistics tables */
+      /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+      if (entropy->dc_count_ptrs[dctbl] == NULL)
+	entropy->dc_count_ptrs[dctbl] = (long *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      257 * SIZEOF(long));
+      MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long));
+      if (entropy->ac_count_ptrs[actbl] == NULL)
+	entropy->ac_count_ptrs[actbl] = (long *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      257 * SIZEOF(long));
+      MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long));
+#endif
+    } else {
+      /* Compute derived values for Huffman tables */
+      /* We may do this more than once for a table, but it's not expensive */
+      jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
+			      & entropy->dc_derived_tbls[dctbl]);
+      jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
+			      & entropy->ac_derived_tbls[actbl]);
+    }
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Initialize bit buffer to empty */
+  entropy->saved.put_buffer = 0;
+  entropy->saved.put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ *
+ * Note this is also used by jcphuff.c.
+ */
+
+GLOBAL(void)
+jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
+			 c_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  c_derived_tbl *dtbl;
+  int p, i, l, lastp, si, maxsymbol;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (c_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(c_derived_tbl));
+  dtbl = *pdtbl;
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  lastp = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+  
+  /* Figure C.3: generate encoding tables */
+  /* These are code and size indexed by symbol value */
+
+  /* Set all codeless symbols to have code length 0;
+   * this lets us detect duplicate VAL entries here, and later
+   * allows emit_bits to detect any attempt to emit such symbols.
+   */
+  MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
+
+  /* This is also a convenient place to check for out-of-range
+   * and duplicated VAL entries.  We allow 0..255 for AC symbols
+   * but only 0..15 for DC.  (We could constrain them further
+   * based on data depth and mode, but this seems enough.)
+   */
+  maxsymbol = isDC ? 15 : 255;
+
+  for (p = 0; p < lastp; p++) {
+    i = htbl->huffval[p];
+    if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    dtbl->ehufco[i] = huffcode[p];
+    dtbl->ehufsi[i] = huffsize[p];
+  }
+}
+
+
+/* Outputting bytes to the file */
+
+/* Emit a byte, taking 'action' if must suspend. */
+#define emit_byte(state,val,action)  \
+	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(state)->free_in_buffer == 0)  \
+	    if (! dump_buffer(state))  \
+	      { action; } }
+
+
+LOCAL(boolean)
+dump_buffer (working_state * state)
+/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
+{
+  struct jpeg_destination_mgr * dest = state->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (state->cinfo))
+    return FALSE;
+  /* After a successful buffer dump, must reset buffer pointers */
+  state->next_output_byte = dest->next_output_byte;
+  state->free_in_buffer = dest->free_in_buffer;
+  return TRUE;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+INLINE
+LOCAL(boolean)
+emit_bits (working_state * state, unsigned int code, int size)
+/* Emit some bits; return TRUE if successful, FALSE if must suspend */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer = (INT32) code;
+  register int put_bits = state->cur.put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
+
+  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+  
+  put_bits += size;		/* new number of bits in buffer */
+  
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
+  
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+    
+    emit_byte(state, c, return FALSE);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte(state, 0, return FALSE);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  state->cur.put_buffer = put_buffer; /* update state variables */
+  state->cur.put_bits = put_bits;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+flush_bits (working_state * state)
+{
+  if (! emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */
+    return FALSE;
+  state->cur.put_buffer = 0;	/* and reset bit-buffer to empty */
+  state->cur.put_bits = 0;
+  return TRUE;
+}
+
+
+/* Encode a single block's worth of coefficients */
+
+LOCAL(boolean)
+encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
+		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
+{
+  register int temp, temp2;
+  register int nbits;
+  register int k, r, i;
+  
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+  
+  temp = temp2 = block[0] - last_dc_val;
+
+  if (temp < 0) {
+    temp = -temp;		/* temp is abs value of input */
+    /* For a negative input, want temp2 = bitwise complement of abs(input) */
+    /* This code assumes we are on a two's complement machine */
+    temp2--;
+  }
+  
+  /* Find the number of bits needed for the magnitude of the coefficient */
+  nbits = 0;
+  while (temp) {
+    nbits++;
+    temp >>= 1;
+  }
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > MAX_COEF_BITS+1)
+    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+  
+  /* Emit the Huffman-coded symbol for the number of bits */
+  if (! emit_bits(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
+    return FALSE;
+
+  /* Emit that number of bits of the value, if positive, */
+  /* or the complement of its magnitude, if negative. */
+  if (nbits)			/* emit_bits rejects calls with size 0 */
+    if (! emit_bits(state, (unsigned int) temp2, nbits))
+      return FALSE;
+
+  /* Encode the AC coefficients per section F.1.2.2 */
+  
+  r = 0;			/* r = run length of zeros */
+  
+  for (k = 1; k < DCTSIZE2; k++) {
+    if ((temp = block[jpeg_natural_order[k]]) == 0) {
+      r++;
+    } else {
+      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+      while (r > 15) {
+	if (! emit_bits(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
+	  return FALSE;
+	r -= 16;
+      }
+
+      temp2 = temp;
+      if (temp < 0) {
+	temp = -temp;		/* temp is abs value of input */
+	/* This code assumes we are on a two's complement machine */
+	temp2--;
+      }
+      
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      nbits = 1;		/* there must be at least one 1 bit */
+      while ((temp >>= 1))
+	nbits++;
+      /* Check for out-of-range coefficient values */
+      if (nbits > MAX_COEF_BITS)
+	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+      
+      /* Emit Huffman symbol for run length / number of bits */
+      i = (r << 4) + nbits;
+      if (! emit_bits(state, actbl->ehufco[i], actbl->ehufsi[i]))
+	return FALSE;
+
+      /* Emit that number of bits of the value, if positive, */
+      /* or the complement of its magnitude, if negative. */
+      if (! emit_bits(state, (unsigned int) temp2, nbits))
+	return FALSE;
+      
+      r = 0;
+    }
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    if (! emit_bits(state, actbl->ehufco[0], actbl->ehufsi[0]))
+      return FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(boolean)
+emit_restart (working_state * state, int restart_num)
+{
+  int ci;
+
+  if (! flush_bits(state))
+    return FALSE;
+
+  emit_byte(state, 0xFF, return FALSE);
+  emit_byte(state, JPEG_RST0 + restart_num, return FALSE);
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
+    state->cur.last_dc_val[ci] = 0;
+
+  /* The restart counter is not updated until we successfully write the MCU. */
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of Huffman-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  working_state state;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Load up working state */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cinfo = cinfo;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! emit_restart(&state, entropy->next_restart_num))
+	return FALSE;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    if (! encode_one_block(&state,
+			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
+			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
+			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+      return FALSE;
+    /* Update last_dc_val */
+    state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  /* Completed MCU, so update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  ASSIGN_STATE(entropy->saved, state.cur);
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  working_state state;
+
+  /* Load up working state ... flush_bits needs it */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cinfo = cinfo;
+
+  /* Flush out the last data */
+  if (! flush_bits(&state))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  ASSIGN_STATE(entropy->saved, state.cur);
+}
+
+
+/*
+ * Huffman coding optimization.
+ *
+ * We first scan the supplied data and count the number of uses of each symbol
+ * that is to be Huffman-coded. (This process MUST agree with the code above.)
+ * Then we build a Huffman coding tree for the observed counts.
+ * Symbols which are not needed at all for the particular image are not
+ * assigned any code, which saves space in the DHT marker as well as in
+ * the compressed data.
+ */
+
+#ifdef ENTROPY_OPT_SUPPORTED
+
+
+/* Process a single block's worth of coefficients */
+
+LOCAL(void)
+htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
+		 long dc_counts[], long ac_counts[])
+{
+  register int temp;
+  register int nbits;
+  register int k, r;
+  
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+  
+  temp = block[0] - last_dc_val;
+  if (temp < 0)
+    temp = -temp;
+  
+  /* Find the number of bits needed for the magnitude of the coefficient */
+  nbits = 0;
+  while (temp) {
+    nbits++;
+    temp >>= 1;
+  }
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > MAX_COEF_BITS+1)
+    ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+  /* Count the Huffman symbol for the number of bits */
+  dc_counts[nbits]++;
+  
+  /* Encode the AC coefficients per section F.1.2.2 */
+  
+  r = 0;			/* r = run length of zeros */
+  
+  for (k = 1; k < DCTSIZE2; k++) {
+    if ((temp = block[jpeg_natural_order[k]]) == 0) {
+      r++;
+    } else {
+      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+      while (r > 15) {
+	ac_counts[0xF0]++;
+	r -= 16;
+      }
+      
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      if (temp < 0)
+	temp = -temp;
+      
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      nbits = 1;		/* there must be at least one 1 bit */
+      while ((temp >>= 1))
+	nbits++;
+      /* Check for out-of-range coefficient values */
+      if (nbits > MAX_COEF_BITS)
+	ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+      
+      /* Count Huffman symbol for run length / number of bits */
+      ac_counts[(r << 4) + nbits]++;
+      
+      r = 0;
+    }
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    ac_counts[0]++;
+}
+
+
+/*
+ * Trial-encode one MCU's worth of Huffman-compressed coefficients.
+ * No data is actually output, so no suspension return is possible.
+ */
+
+METHODDEF(boolean)
+encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Take care of restart intervals if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      /* Re-initialize DC predictions to 0 */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+	entropy->saved.last_dc_val[ci] = 0;
+      /* Update restart state */
+      entropy->restarts_to_go = cinfo->restart_interval;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
+		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
+		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
+    entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Generate the best Huffman code table for the given counts, fill htbl.
+ * Note this is also used by jcphuff.c.
+ *
+ * The JPEG standard requires that no symbol be assigned a codeword of all
+ * one bits (so that padding bits added at the end of a compressed segment
+ * can't look like a valid code).  Because of the canonical ordering of
+ * codewords, this just means that there must be an unused slot in the
+ * longest codeword length category.  Section K.2 of the JPEG spec suggests
+ * reserving such a slot by pretending that symbol 256 is a valid symbol
+ * with count 1.  In theory that's not optimal; giving it count zero but
+ * including it in the symbol set anyway should give a better Huffman code.
+ * But the theoretically better code actually seems to come out worse in
+ * practice, because it produces more all-ones bytes (which incur stuffed
+ * zero bytes in the final file).  In any case the difference is tiny.
+ *
+ * The JPEG standard requires Huffman codes to be no more than 16 bits long.
+ * If some symbols have a very small but nonzero probability, the Huffman tree
+ * must be adjusted to meet the code length restriction.  We currently use
+ * the adjustment method suggested in JPEG section K.2.  This method is *not*
+ * optimal; it may not choose the best possible limited-length code.  But
+ * typically only very-low-frequency symbols will be given less-than-optimal
+ * lengths, so the code is almost optimal.  Experimental comparisons against
+ * an optimal limited-length-code algorithm indicate that the difference is
+ * microscopic --- usually less than a hundredth of a percent of total size.
+ * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
+ */
+
+GLOBAL(void)
+jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
+{
+#define MAX_CLEN 32		/* assumed maximum initial code length */
+  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
+  int codesize[257];		/* codesize[k] = code length of symbol k */
+  int others[257];		/* next symbol in current branch of tree */
+  int c1, c2;
+  int p, i, j;
+  long v;
+
+  /* This algorithm is explained in section K.2 of the JPEG standard */
+
+  MEMZERO(bits, SIZEOF(bits));
+  MEMZERO(codesize, SIZEOF(codesize));
+  for (i = 0; i < 257; i++)
+    others[i] = -1;		/* init links to empty */
+  
+  freq[256] = 1;		/* make sure 256 has a nonzero count */
+  /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
+   * that no real symbol is given code-value of all ones, because 256
+   * will be placed last in the largest codeword category.
+   */
+
+  /* Huffman's basic algorithm to assign optimal code lengths to symbols */
+
+  for (;;) {
+    /* Find the smallest nonzero frequency, set c1 = its symbol */
+    /* In case of ties, take the larger symbol number */
+    c1 = -1;
+    v = 1000000000L;
+    for (i = 0; i <= 256; i++) {
+      if (freq[i] && freq[i] <= v) {
+	v = freq[i];
+	c1 = i;
+      }
+    }
+
+    /* Find the next smallest nonzero frequency, set c2 = its symbol */
+    /* In case of ties, take the larger symbol number */
+    c2 = -1;
+    v = 1000000000L;
+    for (i = 0; i <= 256; i++) {
+      if (freq[i] && freq[i] <= v && i != c1) {
+	v = freq[i];
+	c2 = i;
+      }
+    }
+
+    /* Done if we've merged everything into one frequency */
+    if (c2 < 0)
+      break;
+    
+    /* Else merge the two counts/trees */
+    freq[c1] += freq[c2];
+    freq[c2] = 0;
+
+    /* Increment the codesize of everything in c1's tree branch */
+    codesize[c1]++;
+    while (others[c1] >= 0) {
+      c1 = others[c1];
+      codesize[c1]++;
+    }
+    
+    others[c1] = c2;		/* chain c2 onto c1's tree branch */
+    
+    /* Increment the codesize of everything in c2's tree branch */
+    codesize[c2]++;
+    while (others[c2] >= 0) {
+      c2 = others[c2];
+      codesize[c2]++;
+    }
+  }
+
+  /* Now count the number of symbols of each code length */
+  for (i = 0; i <= 256; i++) {
+    if (codesize[i]) {
+      /* The JPEG standard seems to think that this can't happen, */
+      /* but I'm paranoid... */
+      if (codesize[i] > MAX_CLEN)
+	ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
+
+      bits[codesize[i]]++;
+    }
+  }
+
+  /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
+   * Huffman procedure assigned any such lengths, we must adjust the coding.
+   * Here is what the JPEG spec says about how this next bit works:
+   * Since symbols are paired for the longest Huffman code, the symbols are
+   * removed from this length category two at a time.  The prefix for the pair
+   * (which is one bit shorter) is allocated to one of the pair; then,
+   * skipping the BITS entry for that prefix length, a code word from the next
+   * shortest nonzero BITS entry is converted into a prefix for two code words
+   * one bit longer.
+   */
+  
+  for (i = MAX_CLEN; i > 16; i--) {
+    while (bits[i] > 0) {
+      j = i - 2;		/* find length of new prefix to be used */
+      while (bits[j] == 0)
+	j--;
+      
+      bits[i] -= 2;		/* remove two symbols */
+      bits[i-1]++;		/* one goes in this length */
+      bits[j+1] += 2;		/* two new symbols in this length */
+      bits[j]--;		/* symbol of this length is now a prefix */
+    }
+  }
+
+  /* Remove the count for the pseudo-symbol 256 from the largest codelength */
+  while (bits[i] == 0)		/* find largest codelength still in use */
+    i--;
+  bits[i]--;
+  
+  /* Return final symbol counts (only for lengths 0..16) */
+  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
+  
+  /* Return a list of the symbols sorted by code length */
+  /* It's not real clear to me why we don't need to consider the codelength
+   * changes made above, but the JPEG spec seems to think this works.
+   */
+  p = 0;
+  for (i = 1; i <= MAX_CLEN; i++) {
+    for (j = 0; j <= 255; j++) {
+      if (codesize[j] == i) {
+	htbl->huffval[p] = (UINT8) j;
+	p++;
+      }
+    }
+  }
+
+  /* Set sent_table FALSE so updated table will be written to JPEG file. */
+  htbl->sent_table = FALSE;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, dctbl, actbl;
+  jpeg_component_info * compptr;
+  JHUFF_TBL **htblptr;
+  boolean did_dc[NUM_HUFF_TBLS];
+  boolean did_ac[NUM_HUFF_TBLS];
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  MEMZERO(did_dc, SIZEOF(did_dc));
+  MEMZERO(did_ac, SIZEOF(did_ac));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    if (! did_dc[dctbl]) {
+      htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl];
+      if (*htblptr == NULL)
+	*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);
+      did_dc[dctbl] = TRUE;
+    }
+    if (! did_ac[actbl]) {
+      htblptr = & cinfo->ac_huff_tbl_ptrs[actbl];
+      if (*htblptr == NULL)
+	*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);
+      did_ac[actbl] = TRUE;
+    }
+  }
+}
+
+
+#endif /* ENTROPY_OPT_SUPPORTED */
+
+
+/*
+ * Module initialization routine for Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_huff_encoder (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  entropy->pub.start_pass = start_pass_huff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+#ifdef ENTROPY_OPT_SUPPORTED
+    entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
+#endif
+  }
+}
diff --git a/src/main/jni/libjpeg/jchuff.h b/src/main/jni/libjpeg/jchuff.h
new file mode 100644
index 000000000..a9599fc1e
--- /dev/null
+++ b/src/main/jni/libjpeg/jchuff.h
@@ -0,0 +1,47 @@
+/*
+ * jchuff.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains declarations for Huffman entropy encoding routines
+ * that are shared between the sequential encoder (jchuff.c) and the
+ * progressive encoder (jcphuff.c).  No other modules need to see these.
+ */
+
+/* The legal range of a DCT coefficient is
+ *  -1024 .. +1023  for 8-bit data;
+ * -16384 .. +16383 for 12-bit data.
+ * Hence the magnitude should always fit in 10 or 14 bits respectively.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MAX_COEF_BITS 10
+#else
+#define MAX_COEF_BITS 14
+#endif
+
+/* Derived data constructed for each Huffman table */
+
+typedef struct {
+  unsigned int ehufco[256];	/* code for each symbol */
+  char ehufsi[256];		/* length of code for each symbol */
+  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
+} c_derived_tbl;
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_make_c_derived_tbl	jMkCDerived
+#define jpeg_gen_optimal_table	jGenOptTbl
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Expand a Huffman table definition into the derived format */
+EXTERN(void) jpeg_make_c_derived_tbl
+	JPP((j_compress_ptr cinfo, boolean isDC, int tblno,
+	     c_derived_tbl ** pdtbl));
+
+/* Generate an optimal table definition given the specified counts */
+EXTERN(void) jpeg_gen_optimal_table
+	JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]));
diff --git a/src/main/jni/libjpeg/jcinit.c b/src/main/jni/libjpeg/jcinit.c
new file mode 100644
index 000000000..5efffe331
--- /dev/null
+++ b/src/main/jni/libjpeg/jcinit.c
@@ -0,0 +1,72 @@
+/*
+ * jcinit.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains initialization logic for the JPEG compressor.
+ * This routine is in charge of selecting the modules to be executed and
+ * making an initialization call to each one.
+ *
+ * Logically, this code belongs in jcmaster.c.  It's split out because
+ * linking this routine implies linking the entire compression library.
+ * For a transcoding-only application, we want to be able to use jcmaster.c
+ * without linking in the whole library.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Master selection of compression modules.
+ * This is done once at the start of processing an image.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ */
+
+GLOBAL(void)
+jinit_compress_master (j_compress_ptr cinfo)
+{
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, FALSE /* full compression */);
+
+  /* Preprocessing */
+  if (! cinfo->raw_data_in) {
+    jinit_color_converter(cinfo);
+    jinit_downsampler(cinfo);
+    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
+  }
+  /* Forward DCT */
+  jinit_forward_dct(cinfo);
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      jinit_phuff_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_encoder(cinfo);
+  }
+
+  /* Need a full-image coefficient buffer in any multi-pass mode. */
+  jinit_c_coef_controller(cinfo,
+		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
+  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
diff --git a/src/main/jni/libjpeg/jcmainct.c b/src/main/jni/libjpeg/jcmainct.c
new file mode 100644
index 000000000..e0279a7e0
--- /dev/null
+++ b/src/main/jni/libjpeg/jcmainct.c
@@ -0,0 +1,293 @@
+/*
+ * jcmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for compression.
+ * The main buffer lies between the pre-processor and the JPEG
+ * compressor proper; it holds downsampled data in the JPEG colorspace.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Note: currently, there is no operating mode in which a full-image buffer
+ * is needed at this step.  If there were, that mode could not be used with
+ * "raw data" input, since this module is bypassed in that case.  However,
+ * we've left the code here for possible use in special applications.
+ */
+#undef FULL_MAIN_BUFFER_SUPPORTED
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_main_controller pub; /* public fields */
+
+  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
+  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
+  boolean suspended;		/* remember if we suspended output */
+  J_BUF_MODE pass_mode;		/* current operating mode */
+
+  /* If using just a strip buffer, this points to the entire set of buffers
+   * (we allocate one for each component).  In the full-image case, this
+   * points to the currently accessible strips of the virtual arrays.
+   */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  /* If using full-image storage, this array holds pointers to virtual-array
+   * control blocks for each component.  Unused if not full-image storage.
+   */
+  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
+#endif
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+METHODDEF(void) process_data_buffer_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  /* Do nothing in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  main->cur_iMCU_row = 0;	/* initialize counters */
+  main->rowgroup_ctr = 0;
+  main->suspended = FALSE;
+  main->pass_mode = pass_mode;	/* save mode for use by process_data */
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    if (main->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+    main->pub.process_data = process_data_simple_main;
+    break;
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  case JBUF_SAVE_SOURCE:
+  case JBUF_CRANK_DEST:
+  case JBUF_SAVE_AND_PASS:
+    if (main->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    main->pub.process_data = process_data_buffer_main;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This routine handles the simple pass-through mode,
+ * where we have only a strip buffer.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Read input data if we haven't filled the main buffer yet */
+    if (main->rowgroup_ctr < DCTSIZE)
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					main->buffer, &main->rowgroup_ctr,
+					(JDIMENSION) DCTSIZE);
+
+    /* If we don't have a full iMCU row buffered, return to application for
+     * more data.  Note that preprocessor will always pad to fill the iMCU row
+     * at the bottom of the image.
+     */
+    if (main->rowgroup_ctr != DCTSIZE)
+      return;
+
+    /* Send the completed row to the compressor */
+    if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+      /* If compressor did not consume the whole row, then we must need to
+       * suspend processing and return to the application.  In this situation
+       * we pretend we didn't yet consume the last input row; otherwise, if
+       * it happened to be the last row of the image, the application would
+       * think we were done.
+       */
+      if (! main->suspended) {
+	(*in_row_ctr)--;
+	main->suspended = TRUE;
+      }
+      return;
+    }
+    /* We did finish the row.  Undo our little suspension hack if a previous
+     * call suspended; then mark the main buffer empty.
+     */
+    if (main->suspended) {
+      (*in_row_ctr)++;
+      main->suspended = FALSE;
+    }
+    main->rowgroup_ctr = 0;
+    main->cur_iMCU_row++;
+  }
+}
+
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+
+/*
+ * Process some data.
+ * This routine handles all of the modes that use a full-size buffer.
+ */
+
+METHODDEF(void)
+process_data_buffer_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci;
+  jpeg_component_info *compptr;
+  boolean writing = (main->pass_mode != JBUF_CRANK_DEST);
+
+  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Realign the virtual buffers if at the start of an iMCU row. */
+    if (main->rowgroup_ctr == 0) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	main->buffer[ci] = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, main->whole_image[ci],
+	   main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
+	   (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
+      }
+      /* In a read pass, pretend we just read some source data. */
+      if (! writing) {
+	*in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
+	main->rowgroup_ctr = DCTSIZE;
+      }
+    }
+
+    /* If a write pass, read input data until the current iMCU row is full. */
+    /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
+    if (writing) {
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					main->buffer, &main->rowgroup_ctr,
+					(JDIMENSION) DCTSIZE);
+      /* Return to application if we need more data to fill the iMCU row. */
+      if (main->rowgroup_ctr < DCTSIZE)
+	return;
+    }
+
+    /* Emit data, unless this is a sink-only pass. */
+    if (main->pass_mode != JBUF_SAVE_SOURCE) {
+      if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+	/* If compressor did not consume the whole row, then we must need to
+	 * suspend processing and return to the application.  In this situation
+	 * we pretend we didn't yet consume the last input row; otherwise, if
+	 * it happened to be the last row of the image, the application would
+	 * think we were done.
+	 */
+	if (! main->suspended) {
+	  (*in_row_ctr)--;
+	  main->suspended = TRUE;
+	}
+	return;
+      }
+      /* We did finish the row.  Undo our little suspension hack if a previous
+       * call suspended; then mark the main buffer empty.
+       */
+      if (main->suspended) {
+	(*in_row_ctr)++;
+	main->suspended = FALSE;
+      }
+    }
+
+    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
+    main->rowgroup_ctr = 0;
+    main->cur_iMCU_row++;
+  }
+}
+
+#endif /* FULL_MAIN_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr main;
+  int ci;
+  jpeg_component_info *compptr;
+
+  main = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = (struct jpeg_c_main_controller *) main;
+  main->pub.start_pass = start_pass_main;
+
+  /* We don't need to create a buffer in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  /* Create the buffer.  It holds downsampled data, so each component
+   * may be of a different size.
+   */
+  if (need_full_buffer) {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component */
+    /* Note we pad the bottom to a multiple of the iMCU height */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      main->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 compptr->width_in_blocks * DCTSIZE,
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor) * DCTSIZE,
+	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    main->whole_image[0] = NULL; /* flag for no virtual arrays */
+#endif
+    /* Allocate a strip buffer for each component */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      main->buffer[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 compptr->width_in_blocks * DCTSIZE,
+	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+    }
+  }
+}
diff --git a/src/main/jni/libjpeg/jcmarker.c b/src/main/jni/libjpeg/jcmarker.c
new file mode 100644
index 000000000..3d1e6c6d5
--- /dev/null
+++ b/src/main/jni/libjpeg/jcmarker.c
@@ -0,0 +1,664 @@
+/*
+ * jcmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to write JPEG datastream markers.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+  
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+  
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+  
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+  
+  M_DHT   = 0xc4,
+  
+  M_DAC   = 0xcc,
+  
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+  
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+  
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+  
+  M_JPG0  = 0xf0,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+  
+  M_TEM   = 0x01,
+  
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_writer pub; /* public fields */
+
+  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
+} my_marker_writer;
+
+typedef my_marker_writer * my_marker_ptr;
+
+
+/*
+ * Basic output routines.
+ *
+ * Note that we do not support suspension while writing a marker.
+ * Therefore, an application using suspension must ensure that there is
+ * enough buffer space for the initial markers (typ. 600-700 bytes) before
+ * calling jpeg_start_compress, and enough space to write the trailing EOI
+ * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
+ * modes are not supported at all with suspension, so those two are the only
+ * points where markers will be written.
+ */
+
+LOCAL(void)
+emit_byte (j_compress_ptr cinfo, int val)
+/* Emit a byte */
+{
+  struct jpeg_destination_mgr * dest = cinfo->dest;
+
+  *(dest->next_output_byte)++ = (JOCTET) val;
+  if (--dest->free_in_buffer == 0) {
+    if (! (*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  }
+}
+
+
+LOCAL(void)
+emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
+/* Emit a marker code */
+{
+  emit_byte(cinfo, 0xFF);
+  emit_byte(cinfo, (int) mark);
+}
+
+
+LOCAL(void)
+emit_2bytes (j_compress_ptr cinfo, int value)
+/* Emit a 2-byte integer; these are always MSB first in JPEG files */
+{
+  emit_byte(cinfo, (value >> 8) & 0xFF);
+  emit_byte(cinfo, value & 0xFF);
+}
+
+
+/*
+ * Routines to write specific marker types.
+ */
+
+LOCAL(int)
+emit_dqt (j_compress_ptr cinfo, int index)
+/* Emit a DQT marker */
+/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
+{
+  JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
+  int prec;
+  int i;
+
+  if (qtbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
+
+  prec = 0;
+  for (i = 0; i < DCTSIZE2; i++) {
+    if (qtbl->quantval[i] > 255)
+      prec = 1;
+  }
+
+  if (! qtbl->sent_table) {
+    emit_marker(cinfo, M_DQT);
+
+    emit_2bytes(cinfo, prec ? DCTSIZE2*2 + 1 + 2 : DCTSIZE2 + 1 + 2);
+
+    emit_byte(cinfo, index + (prec<<4));
+
+    for (i = 0; i < DCTSIZE2; i++) {
+      /* The table entries must be emitted in zigzag order. */
+      unsigned int qval = qtbl->quantval[jpeg_natural_order[i]];
+      if (prec)
+	emit_byte(cinfo, (int) (qval >> 8));
+      emit_byte(cinfo, (int) (qval & 0xFF));
+    }
+
+    qtbl->sent_table = TRUE;
+  }
+
+  return prec;
+}
+
+
+LOCAL(void)
+emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
+/* Emit a DHT marker */
+{
+  JHUFF_TBL * htbl;
+  int length, i;
+  
+  if (is_ac) {
+    htbl = cinfo->ac_huff_tbl_ptrs[index];
+    index += 0x10;		/* output index has AC bit set */
+  } else {
+    htbl = cinfo->dc_huff_tbl_ptrs[index];
+  }
+
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
+  
+  if (! htbl->sent_table) {
+    emit_marker(cinfo, M_DHT);
+    
+    length = 0;
+    for (i = 1; i <= 16; i++)
+      length += htbl->bits[i];
+    
+    emit_2bytes(cinfo, length + 2 + 1 + 16);
+    emit_byte(cinfo, index);
+    
+    for (i = 1; i <= 16; i++)
+      emit_byte(cinfo, htbl->bits[i]);
+    
+    for (i = 0; i < length; i++)
+      emit_byte(cinfo, htbl->huffval[i]);
+    
+    htbl->sent_table = TRUE;
+  }
+}
+
+
+LOCAL(void)
+emit_dac (j_compress_ptr cinfo)
+/* Emit a DAC marker */
+/* Since the useful info is so small, we want to emit all the tables in */
+/* one DAC marker.  Therefore this routine does its own scan of the table. */
+{
+#ifdef C_ARITH_CODING_SUPPORTED
+  char dc_in_use[NUM_ARITH_TBLS];
+  char ac_in_use[NUM_ARITH_TBLS];
+  int length, i;
+  jpeg_component_info *compptr;
+  
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    dc_in_use[i] = ac_in_use[i] = 0;
+  
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    dc_in_use[compptr->dc_tbl_no] = 1;
+    ac_in_use[compptr->ac_tbl_no] = 1;
+  }
+  
+  length = 0;
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    length += dc_in_use[i] + ac_in_use[i];
+  
+  emit_marker(cinfo, M_DAC);
+  
+  emit_2bytes(cinfo, length*2 + 2);
+  
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    if (dc_in_use[i]) {
+      emit_byte(cinfo, i);
+      emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
+    }
+    if (ac_in_use[i]) {
+      emit_byte(cinfo, i + 0x10);
+      emit_byte(cinfo, cinfo->arith_ac_K[i]);
+    }
+  }
+#endif /* C_ARITH_CODING_SUPPORTED */
+}
+
+
+LOCAL(void)
+emit_dri (j_compress_ptr cinfo)
+/* Emit a DRI marker */
+{
+  emit_marker(cinfo, M_DRI);
+  
+  emit_2bytes(cinfo, 4);	/* fixed length */
+
+  emit_2bytes(cinfo, (int) cinfo->restart_interval);
+}
+
+
+LOCAL(void)
+emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
+/* Emit a SOF marker */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, code);
+  
+  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
+
+  /* Make sure image isn't bigger than SOF field can handle */
+  if ((long) cinfo->image_height > 65535L ||
+      (long) cinfo->image_width > 65535L)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
+
+  emit_byte(cinfo, cinfo->data_precision);
+  emit_2bytes(cinfo, (int) cinfo->image_height);
+  emit_2bytes(cinfo, (int) cinfo->image_width);
+
+  emit_byte(cinfo, cinfo->num_components);
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    emit_byte(cinfo, compptr->component_id);
+    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
+    emit_byte(cinfo, compptr->quant_tbl_no);
+  }
+}
+
+
+LOCAL(void)
+emit_sos (j_compress_ptr cinfo)
+/* Emit a SOS marker */
+{
+  int i, td, ta;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, M_SOS);
+  
+  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
+  
+  emit_byte(cinfo, cinfo->comps_in_scan);
+  
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    emit_byte(cinfo, compptr->component_id);
+    td = compptr->dc_tbl_no;
+    ta = compptr->ac_tbl_no;
+    if (cinfo->progressive_mode) {
+      /* Progressive mode: only DC or only AC tables are used in one scan;
+       * furthermore, Huffman coding of DC refinement uses no table at all.
+       * We emit 0 for unused field(s); this is recommended by the P&M text
+       * but does not seem to be specified in the standard.
+       */
+      if (cinfo->Ss == 0) {
+	ta = 0;			/* DC scan */
+	if (cinfo->Ah != 0 && !cinfo->arith_code)
+	  td = 0;		/* no DC table either */
+      } else {
+	td = 0;			/* AC scan */
+      }
+    }
+    emit_byte(cinfo, (td << 4) + ta);
+  }
+
+  emit_byte(cinfo, cinfo->Ss);
+  emit_byte(cinfo, cinfo->Se);
+  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
+}
+
+
+LOCAL(void)
+emit_jfif_app0 (j_compress_ptr cinfo)
+/* Emit a JFIF-compliant APP0 marker */
+{
+  /*
+   * Length of APP0 block	(2 bytes)
+   * Block ID			(4 bytes - ASCII "JFIF")
+   * Zero byte			(1 byte to terminate the ID string)
+   * Version Major, Minor	(2 bytes - major first)
+   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
+   * Xdpu			(2 bytes - dots per unit horizontal)
+   * Ydpu			(2 bytes - dots per unit vertical)
+   * Thumbnail X size		(1 byte)
+   * Thumbnail Y size		(1 byte)
+   */
+  
+  emit_marker(cinfo, M_APP0);
+  
+  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
+
+  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0x49);
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0);
+  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
+  emit_byte(cinfo, cinfo->JFIF_minor_version);
+  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
+  emit_2bytes(cinfo, (int) cinfo->X_density);
+  emit_2bytes(cinfo, (int) cinfo->Y_density);
+  emit_byte(cinfo, 0);		/* No thumbnail image */
+  emit_byte(cinfo, 0);
+}
+
+
+LOCAL(void)
+emit_adobe_app14 (j_compress_ptr cinfo)
+/* Emit an Adobe APP14 marker */
+{
+  /*
+   * Length of APP14 block	(2 bytes)
+   * Block ID			(5 bytes - ASCII "Adobe")
+   * Version Number		(2 bytes - currently 100)
+   * Flags0			(2 bytes - currently 0)
+   * Flags1			(2 bytes - currently 0)
+   * Color transform		(1 byte)
+   *
+   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
+   * now in circulation seem to use Version = 100, so that's what we write.
+   *
+   * We write the color transform byte as 1 if the JPEG color space is
+   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
+   * whether the encoder performed a transformation, which is pretty useless.
+   */
+  
+  emit_marker(cinfo, M_APP14);
+  
+  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
+
+  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
+  emit_byte(cinfo, 0x64);
+  emit_byte(cinfo, 0x6F);
+  emit_byte(cinfo, 0x62);
+  emit_byte(cinfo, 0x65);
+  emit_2bytes(cinfo, 100);	/* Version */
+  emit_2bytes(cinfo, 0);	/* Flags0 */
+  emit_2bytes(cinfo, 0);	/* Flags1 */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_YCbCr:
+    emit_byte(cinfo, 1);	/* Color transform = 1 */
+    break;
+  case JCS_YCCK:
+    emit_byte(cinfo, 2);	/* Color transform = 2 */
+    break;
+  default:
+    emit_byte(cinfo, 0);	/* Color transform = 0 */
+    break;
+  }
+}
+
+
+/*
+ * These routines allow writing an arbitrary marker with parameters.
+ * The only intended use is to emit COM or APPn markers after calling
+ * write_file_header and before calling write_frame_header.
+ * Other uses are not guaranteed to produce desirable results.
+ * Counting the parameter bytes properly is the caller's responsibility.
+ */
+
+METHODDEF(void)
+write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+/* Emit an arbitrary marker header */
+{
+  if (datalen > (unsigned int) 65533)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  emit_marker(cinfo, (JPEG_MARKER) marker);
+
+  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
+}
+
+METHODDEF(void)
+write_marker_byte (j_compress_ptr cinfo, int val)
+/* Emit one byte of marker parameters following write_marker_header */
+{
+  emit_byte(cinfo, val);
+}
+
+
+/*
+ * Write datastream header.
+ * This consists of an SOI and optional APPn markers.
+ * We recommend use of the JFIF marker, but not the Adobe marker,
+ * when using YCbCr or grayscale data.  The JFIF marker should NOT
+ * be used for any other JPEG colorspace.  The Adobe marker is helpful
+ * to distinguish RGB, CMYK, and YCCK colorspaces.
+ * Note that an application can write additional header markers after
+ * jpeg_start_compress returns.
+ */
+
+METHODDEF(void)
+write_file_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  emit_marker(cinfo, M_SOI);	/* first the SOI */
+
+  /* SOI is defined to reset restart interval to 0 */
+  marker->last_restart_interval = 0;
+
+  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
+    emit_jfif_app0(cinfo);
+  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
+    emit_adobe_app14(cinfo);
+}
+
+
+/*
+ * Write frame header.
+ * This consists of DQT and SOFn markers.
+ * Note that we do not emit the SOF until we have emitted the DQT(s).
+ * This avoids compatibility problems with incorrect implementations that
+ * try to error-check the quant table numbers as soon as they see the SOF.
+ */
+
+METHODDEF(void)
+write_frame_header (j_compress_ptr cinfo)
+{
+  int ci, prec;
+  boolean is_baseline;
+  jpeg_component_info *compptr;
+  
+  /* Emit DQT for each quantization table.
+   * Note that emit_dqt() suppresses any duplicate tables.
+   */
+  prec = 0;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
+  }
+  /* now prec is nonzero iff there are any 16-bit quant tables. */
+
+  /* Check for a non-baseline specification.
+   * Note we assume that Huffman table numbers won't be changed later.
+   */
+  if (cinfo->arith_code || cinfo->progressive_mode ||
+      cinfo->data_precision != 8) {
+    is_baseline = FALSE;
+  } else {
+    is_baseline = TRUE;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
+	is_baseline = FALSE;
+    }
+    if (prec && is_baseline) {
+      is_baseline = FALSE;
+      /* If it's baseline except for quantizer size, warn the user */
+      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
+    }
+  }
+
+  /* Emit the proper SOF marker */
+  if (cinfo->arith_code) {
+    emit_sof(cinfo, M_SOF9);	/* SOF code for arithmetic coding */
+  } else {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
+    else if (is_baseline)
+      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
+    else
+      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
+  }
+}
+
+
+/*
+ * Write scan header.
+ * This consists of DHT or DAC markers, optional DRI, and SOS.
+ * Compressed data will be written following the SOS.
+ */
+
+METHODDEF(void)
+write_scan_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  int i;
+  jpeg_component_info *compptr;
+
+  if (cinfo->arith_code) {
+    /* Emit arith conditioning info.  We may have some duplication
+     * if the file has multiple scans, but it's so small it's hardly
+     * worth worrying about.
+     */
+    emit_dac(cinfo);
+  } else {
+    /* Emit Huffman tables.
+     * Note that emit_dht() suppresses any duplicate tables.
+     */
+    for (i = 0; i < cinfo->comps_in_scan; i++) {
+      compptr = cinfo->cur_comp_info[i];
+      if (cinfo->progressive_mode) {
+	/* Progressive mode: only DC or only AC tables are used in one scan */
+	if (cinfo->Ss == 0) {
+	  if (cinfo->Ah == 0)	/* DC needs no table for refinement scan */
+	    emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+	} else {
+	  emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+	}
+      } else {
+	/* Sequential mode: need both DC and AC tables */
+	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+      }
+    }
+  }
+
+  /* Emit DRI if required --- note that DRI value could change for each scan.
+   * We avoid wasting space with unnecessary DRIs, however.
+   */
+  if (cinfo->restart_interval != marker->last_restart_interval) {
+    emit_dri(cinfo);
+    marker->last_restart_interval = cinfo->restart_interval;
+  }
+
+  emit_sos(cinfo);
+}
+
+
+/*
+ * Write datastream trailer.
+ */
+
+METHODDEF(void)
+write_file_trailer (j_compress_ptr cinfo)
+{
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Write an abbreviated table-specification datastream.
+ * This consists of SOI, DQT and DHT tables, and EOI.
+ * Any table that is defined and not marked sent_table = TRUE will be
+ * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
+ */
+
+METHODDEF(void)
+write_tables_only (j_compress_ptr cinfo)
+{
+  int i;
+
+  emit_marker(cinfo, M_SOI);
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if (cinfo->quant_tbl_ptrs[i] != NULL)
+      (void) emit_dqt(cinfo, i);
+  }
+
+  if (! cinfo->arith_code) {
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, FALSE);
+      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, TRUE);
+    }
+  }
+
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Initialize the marker writer module.
+ */
+
+GLOBAL(void)
+jinit_marker_writer (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker;
+
+  /* Create the subobject */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_marker_writer));
+  cinfo->marker = (struct jpeg_marker_writer *) marker;
+  /* Initialize method pointers */
+  marker->pub.write_file_header = write_file_header;
+  marker->pub.write_frame_header = write_frame_header;
+  marker->pub.write_scan_header = write_scan_header;
+  marker->pub.write_file_trailer = write_file_trailer;
+  marker->pub.write_tables_only = write_tables_only;
+  marker->pub.write_marker_header = write_marker_header;
+  marker->pub.write_marker_byte = write_marker_byte;
+  /* Initialize private state */
+  marker->last_restart_interval = 0;
+}
diff --git a/src/main/jni/libjpeg/jcmaster.c b/src/main/jni/libjpeg/jcmaster.c
new file mode 100644
index 000000000..aab4020b8
--- /dev/null
+++ b/src/main/jni/libjpeg/jcmaster.c
@@ -0,0 +1,590 @@
+/*
+ * jcmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG compressor.
+ * These routines are concerned with parameter validation, initial setup,
+ * and inter-pass control (determining the number of passes and the work 
+ * to be done in each pass).
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef enum {
+	main_pass,		/* input data, also do first output step */
+	huff_opt_pass,		/* Huffman code optimization pass */
+	output_pass		/* data output pass */
+} c_pass_type;
+
+typedef struct {
+  struct jpeg_comp_master pub;	/* public fields */
+
+  c_pass_type pass_type;	/* the type of the current pass */
+
+  int pass_number;		/* # of passes completed */
+  int total_passes;		/* total # of passes needed */
+
+  int scan_number;		/* current index in scan_info[] */
+} my_comp_master;
+
+typedef my_comp_master * my_master_ptr;
+
+
+/*
+ * Support routines that do various essential calculations.
+ */
+
+LOCAL(void)
+initial_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* Sanity check on image dimensions */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+      || cinfo->num_components <= 0 || cinfo->input_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* Width of an input scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Fill in the correct component_index value; don't rely on application */
+    compptr->component_index = ci;
+    /* For compression, we never do DCT scaling. */
+    compptr->DCT_scaled_size = DCTSIZE;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed (this flag isn't actually used for compression) */
+    compptr->component_needed = TRUE;
+  }
+
+  /* Compute number of fully interleaved MCU rows (number of times that
+   * main controller will call coefficient controller).
+   */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+}
+
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+
+LOCAL(void)
+validate_script (j_compress_ptr cinfo)
+/* Verify that the scan script in cinfo->scan_info[] is valid; also
+ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
+ */
+{
+  const jpeg_scan_info * scanptr;
+  int scanno, ncomps, ci, coefi, thisi;
+  int Ss, Se, Ah, Al;
+  boolean component_sent[MAX_COMPONENTS];
+#ifdef C_PROGRESSIVE_SUPPORTED
+  int * last_bitpos_ptr;
+  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
+  /* -1 until that coefficient has been seen; then last Al for it */
+#endif
+
+  if (cinfo->num_scans <= 0)
+    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
+
+  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
+   * for progressive JPEG, no scan can have this.
+   */
+  scanptr = cinfo->scan_info;
+  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    cinfo->progressive_mode = TRUE;
+    last_bitpos_ptr = & last_bitpos[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      for (coefi = 0; coefi < DCTSIZE2; coefi++)
+	*last_bitpos_ptr++ = -1;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      component_sent[ci] = FALSE;
+  }
+
+  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
+    /* Validate component indexes */
+    ncomps = scanptr->comps_in_scan;
+    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
+    for (ci = 0; ci < ncomps; ci++) {
+      thisi = scanptr->component_index[ci];
+      if (thisi < 0 || thisi >= cinfo->num_components)
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+      /* Components must appear in SOF order within each scan */
+      if (ci > 0 && thisi <= scanptr->component_index[ci-1])
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+    }
+    /* Validate progression parameters */
+    Ss = scanptr->Ss;
+    Se = scanptr->Se;
+    Ah = scanptr->Ah;
+    Al = scanptr->Al;
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that
+       * seems wrong: the upper bound ought to depend on data precision.
+       * Perhaps they really meant 0..N+1 for N-bit precision.
+       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
+       * out-of-range reconstructed DC values during the first DC scan,
+       * which might cause problems for some decoders.
+       */
+#if BITS_IN_JSAMPLE == 8
+#define MAX_AH_AL 10
+#else
+#define MAX_AH_AL 13
+#endif
+      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
+	  Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      if (Ss == 0) {
+	if (Se != 0)		/* DC and AC together not OK */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      } else {
+	if (ncomps != 1)	/* AC scans must be for only one component */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      }
+      for (ci = 0; ci < ncomps; ci++) {
+	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
+	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	for (coefi = Ss; coefi <= Se; coefi++) {
+	  if (last_bitpos_ptr[coefi] < 0) {
+	    /* first scan of this coefficient */
+	    if (Ah != 0)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  } else {
+	    /* not first scan */
+	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  }
+	  last_bitpos_ptr[coefi] = Al;
+	}
+      }
+#endif
+    } else {
+      /* For sequential JPEG, all progression parameters must be these: */
+      if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      /* Make sure components are not sent twice */
+      for (ci = 0; ci < ncomps; ci++) {
+	thisi = scanptr->component_index[ci];
+	if (component_sent[thisi])
+	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+	component_sent[thisi] = TRUE;
+      }
+    }
+  }
+
+  /* Now verify that everything got sent. */
+  if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    /* For progressive mode, we only check that at least some DC data
+     * got sent for each component; the spec does not require that all bits
+     * of all coefficients be transmitted.  Would it be wiser to enforce
+     * transmission of all coefficient bits??
+     */
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (last_bitpos[ci][0] < 0)
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+#endif
+  } else {
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (! component_sent[ci])
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+  }
+}
+
+#endif /* C_MULTISCAN_FILES_SUPPORTED */
+
+
+LOCAL(void)
+select_scan_parameters (j_compress_ptr cinfo)
+/* Set up the scan parameters for the current scan */
+{
+  int ci;
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+  if (cinfo->scan_info != NULL) {
+    /* Prepare for current scan --- the script is already validated */
+    my_master_ptr master = (my_master_ptr) cinfo->master;
+    const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number;
+
+    cinfo->comps_in_scan = scanptr->comps_in_scan;
+    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
+      cinfo->cur_comp_info[ci] =
+	&cinfo->comp_info[scanptr->component_index[ci]];
+    }
+    cinfo->Ss = scanptr->Ss;
+    cinfo->Se = scanptr->Se;
+    cinfo->Ah = scanptr->Ah;
+    cinfo->Al = scanptr->Al;
+  }
+  else
+#endif
+  {
+    /* Prepare for single sequential-JPEG scan containing all components */
+    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPS_IN_SCAN);
+    cinfo->comps_in_scan = cinfo->num_components;
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
+    }
+    cinfo->Ss = 0;
+    cinfo->Se = DCTSIZE2-1;
+    cinfo->Ah = 0;
+    cinfo->Al = 0;
+  }
+}
+
+
+LOCAL(void)
+per_scan_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = DCTSIZE;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height,
+		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * DCTSIZE;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+
+  /* Convert restart specified in rows to actual MCU count. */
+  /* Note that count must fit in 16 bits, so we provide limiting. */
+  if (cinfo->restart_in_rows > 0) {
+    long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row;
+    cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L);
+  }
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each pass.  We determine which modules
+ * will be active during this pass and give them appropriate start_pass calls.
+ * We also set is_last_pass to indicate whether any more passes will be
+ * required.
+ */
+
+METHODDEF(void)
+prepare_for_pass (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  switch (master->pass_type) {
+  case main_pass:
+    /* Initial pass: will collect input data, and do either Huffman
+     * optimization or data output for the first scan.
+     */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (! cinfo->raw_data_in) {
+      (*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->downsample->start_pass) (cinfo);
+      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+    (*cinfo->fdct->start_pass) (cinfo);
+    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
+    (*cinfo->coef->start_pass) (cinfo,
+				(master->total_passes > 1 ?
+				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    if (cinfo->optimize_coding) {
+      /* No immediate data output; postpone writing frame/scan headers */
+      master->pub.call_pass_startup = FALSE;
+    } else {
+      /* Will write frame/scan headers at first jpeg_write_scanlines call */
+      master->pub.call_pass_startup = TRUE;
+    }
+    break;
+#ifdef ENTROPY_OPT_SUPPORTED
+  case huff_opt_pass:
+    /* Do Huffman optimization for a scan after the first one. */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code) {
+      (*cinfo->entropy->start_pass) (cinfo, TRUE);
+      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+      master->pub.call_pass_startup = FALSE;
+      break;
+    }
+    /* Special case: Huffman DC refinement scans need no Huffman table
+     * and therefore we can skip the optimization pass for them.
+     */
+    master->pass_type = output_pass;
+    master->pass_number++;
+    /*FALLTHROUGH*/
+#endif
+  case output_pass:
+    /* Do a data-output pass. */
+    /* We need not repeat per-scan setup if prior optimization pass did it. */
+    if (! cinfo->optimize_coding) {
+      select_scan_parameters(cinfo);
+      per_scan_setup(cinfo);
+    }
+    (*cinfo->entropy->start_pass) (cinfo, FALSE);
+    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+    /* We emit frame/scan headers now */
+    if (master->scan_number == 0)
+      (*cinfo->marker->write_frame_header) (cinfo);
+    (*cinfo->marker->write_scan_header) (cinfo);
+    master->pub.call_pass_startup = FALSE;
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+  }
+
+  master->pub.is_last_pass = (master->pass_number == master->total_passes-1);
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->total_passes;
+  }
+}
+
+
+/*
+ * Special start-of-pass hook.
+ * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
+ * In single-pass processing, we need this hook because we don't want to
+ * write frame/scan headers during jpeg_start_compress; we want to let the
+ * application write COM markers etc. between jpeg_start_compress and the
+ * jpeg_write_scanlines loop.
+ * In multi-pass processing, this routine is not used.
+ */
+
+METHODDEF(void)
+pass_startup (j_compress_ptr cinfo)
+{
+  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
+
+  (*cinfo->marker->write_frame_header) (cinfo);
+  (*cinfo->marker->write_scan_header) (cinfo);
+}
+
+
+/*
+ * Finish up at end of pass.
+ */
+
+METHODDEF(void)
+finish_pass_master (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* The entropy coder always needs an end-of-pass call,
+   * either to analyze statistics or to flush its output buffer.
+   */
+  (*cinfo->entropy->finish_pass) (cinfo);
+
+  /* Update state for next pass */
+  switch (master->pass_type) {
+  case main_pass:
+    /* next pass is either output of scan 0 (after optimization)
+     * or output of scan 1 (if no optimization).
+     */
+    master->pass_type = output_pass;
+    if (! cinfo->optimize_coding)
+      master->scan_number++;
+    break;
+  case huff_opt_pass:
+    /* next pass is always output of current scan */
+    master->pass_type = output_pass;
+    break;
+  case output_pass:
+    /* next pass is either optimization or output of next scan */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    master->scan_number++;
+    break;
+  }
+
+  master->pass_number++;
+}
+
+
+/*
+ * Initialize master compression control.
+ */
+
+GLOBAL(void)
+jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_comp_master));
+  cinfo->master = (struct jpeg_comp_master *) master;
+  master->pub.prepare_for_pass = prepare_for_pass;
+  master->pub.pass_startup = pass_startup;
+  master->pub.finish_pass = finish_pass_master;
+  master->pub.is_last_pass = FALSE;
+
+  /* Validate parameters, determine derived values */
+  initial_setup(cinfo);
+
+  if (cinfo->scan_info != NULL) {
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+    validate_script(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    cinfo->num_scans = 1;
+  }
+
+  if (cinfo->progressive_mode)	/*  TEMPORARY HACK ??? */
+    cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
+
+  /* Initialize my private state */
+  if (transcode_only) {
+    /* no main pass in transcoding */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    else
+      master->pass_type = output_pass;
+  } else {
+    /* for normal compression, first pass is always this type: */
+    master->pass_type = main_pass;
+  }
+  master->scan_number = 0;
+  master->pass_number = 0;
+  if (cinfo->optimize_coding)
+    master->total_passes = cinfo->num_scans * 2;
+  else
+    master->total_passes = cinfo->num_scans;
+}
diff --git a/src/main/jni/libjpeg/jcomapi.c b/src/main/jni/libjpeg/jcomapi.c
new file mode 100644
index 000000000..9b1fa7568
--- /dev/null
+++ b/src/main/jni/libjpeg/jcomapi.c
@@ -0,0 +1,106 @@
+/*
+ * jcomapi.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface routines that are used for both
+ * compression and decompression.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Abort processing of a JPEG compression or decompression operation,
+ * but don't destroy the object itself.
+ *
+ * For this, we merely clean up all the nonpermanent memory pools.
+ * Note that temp files (virtual arrays) are not allowed to belong to
+ * the permanent pool, so we will be able to close all temp files here.
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_abort (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
+  if (cinfo->mem == NULL)
+    return;
+
+  /* Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
+    (*cinfo->mem->free_pool) (cinfo, pool);
+  }
+
+  /* Reset overall state for possible reuse of object */
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = DSTATE_START;
+    /* Try to keep application from accessing now-deleted marker list.
+     * A bit kludgy to do it here, but this is the most central place.
+     */
+    ((j_decompress_ptr) cinfo)->marker_list = NULL;
+  } else {
+    cinfo->global_state = CSTATE_START;
+  }
+}
+
+
+/*
+ * Destruction of a JPEG object.
+ *
+ * Everything gets deallocated except the master jpeg_compress_struct itself
+ * and the error manager struct.  Both of these are supplied by the application
+ * and must be freed, if necessary, by the application.  (Often they are on
+ * the stack and so don't need to be freed anyway.)
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_destroy (j_common_ptr cinfo)
+{
+  /* We need only tell the memory manager to release everything. */
+  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
+  if (cinfo->mem != NULL)
+    (*cinfo->mem->self_destruct) (cinfo);
+  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;	/* mark it destroyed */
+}
+
+
+/*
+ * Convenience routines for allocating quantization and Huffman tables.
+ * (Would jutils.c be a more reasonable place to put these?)
+ */
+
+GLOBAL(JQUANT_TBL *)
+jpeg_alloc_quant_table (j_common_ptr cinfo)
+{
+  JQUANT_TBL *tbl;
+
+  tbl = (JQUANT_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
+
+
+GLOBAL(JHUFF_TBL *)
+jpeg_alloc_huff_table (j_common_ptr cinfo)
+{
+  JHUFF_TBL *tbl;
+
+  tbl = (JHUFF_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
diff --git a/src/main/jni/libjpeg/jconfig.h b/src/main/jni/libjpeg/jconfig.h
new file mode 100644
index 000000000..15a98177b
--- /dev/null
+++ b/src/main/jni/libjpeg/jconfig.h
@@ -0,0 +1,156 @@
+/* android jconfig.h */
+/*
+ * jconfig.doc
+ *
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file documents the configuration options that are required to
+ * customize the JPEG software for a particular system.
+ *
+ * The actual configuration options for a particular installation are stored
+ * in jconfig.h.  On many machines, jconfig.h can be generated automatically
+ * or copied from one of the "canned" jconfig files that we supply.  But if
+ * you need to generate a jconfig.h file by hand, this file tells you how.
+ *
+ * DO NOT EDIT THIS FILE --- IT WON'T ACCOMPLISH ANYTHING.
+ * EDIT A COPY NAMED JCONFIG.H.
+ */
+
+
+/*
+ * These symbols indicate the properties of your machine or compiler.
+ * #define the symbol if yes, #undef it if no.
+ */
+
+/* Does your compiler support function prototypes?
+ * (If not, you also need to use ansi2knr, see install.doc)
+ */
+#define HAVE_PROTOTYPES
+
+/* Does your compiler support the declaration "unsigned char" ?
+ * How about "unsigned short" ?
+ */
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+
+/* Define "void" as "char" if your compiler doesn't know about type void.
+ * NOTE: be sure to define void such that "void *" represents the most general
+ * pointer type, e.g., that returned by malloc().
+ */
+/* #define void char */
+
+/* Define "const" as empty if your compiler doesn't know the "const" keyword.
+ */
+/* #define const */
+
+/* Define this if an ordinary "char" type is unsigned.
+ * If you're not sure, leaving it undefined will work at some cost in speed.
+ * If you defined HAVE_UNSIGNED_CHAR then the speed difference is minimal.
+ */
+#undef CHAR_IS_UNSIGNED
+
+/* Define this if your system has an ANSI-conforming <stddef.h> file.
+ */
+#define HAVE_STDDEF_H
+
+/* Define this if your system has an ANSI-conforming <stdlib.h> file.
+ */
+#define HAVE_STDLIB_H
+
+/* Define this if your system does not have an ANSI/SysV <string.h>,
+ * but does have a BSD-style <strings.h>.
+ */
+#undef NEED_BSD_STRINGS
+
+/* Define this if your system does not provide typedef size_t in any of the
+ * ANSI-standard places (stddef.h, stdlib.h, or stdio.h), but places it in
+ * <sys/types.h> instead.
+ */
+#undef NEED_SYS_TYPES_H
+
+/* For 80x86 machines, you need to define NEED_FAR_POINTERS,
+ * unless you are using a large-data memory model or 80386 flat-memory mode.
+ * On less brain-damaged CPUs this symbol must not be defined.
+ * (Defining this symbol causes large data structures to be referenced through
+ * "far" pointers and to be allocated with a special version of malloc.)
+ */
+#undef NEED_FAR_POINTERS
+
+/* Define this if your linker needs global names to be unique in less
+ * than the first 15 characters.
+ */
+#undef NEED_SHORT_EXTERNAL_NAMES
+
+/* Although a real ANSI C compiler can deal perfectly well with pointers to
+ * unspecified structures (see "incomplete types" in the spec), a few pre-ANSI
+ * and pseudo-ANSI compilers get confused.  To keep one of these bozos happy,
+ * define INCOMPLETE_TYPES_BROKEN.  This is not recommended unless you
+ * actually get "missing structure definition" warnings or errors while
+ * compiling the JPEG code.
+ */
+#undef INCOMPLETE_TYPES_BROKEN
+
+
+/*
+ * The following options affect code selection within the JPEG library,
+ * but they don't need to be visible to applications using the library.
+ * To minimize application namespace pollution, the symbols won't be
+ * defined unless JPEG_INTERNALS has been defined.
+ */
+
+#ifdef JPEG_INTERNALS
+
+/* Define this if your compiler implements ">>" on signed values as a logical
+ * (unsigned) shift; leave it undefined if ">>" is a signed (arithmetic) shift,
+ * which is the normal and rational definition.
+ */
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+
+#endif /* JPEG_INTERNALS */
+
+
+/*
+ * The remaining options do not affect the JPEG library proper,
+ * but only the sample applications cjpeg/djpeg (see cjpeg.c, djpeg.c).
+ * Other applications can ignore these.
+ */
+
+#ifdef JPEG_CJPEG_DJPEG
+
+/* These defines indicate which image (non-JPEG) file formats are allowed. */
+
+#define BMP_SUPPORTED		/* BMP image file format */
+#define GIF_SUPPORTED		/* GIF image file format */
+#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED		/* Utah RLE image file format */
+#define TARGA_SUPPORTED		/* Targa image file format */
+
+/* Define this if you want to name both input and output files on the command
+ * line, rather than using stdout and optionally stdin.  You MUST do this if
+ * your system can't cope with binary I/O to stdin/stdout.  See comments at
+ * head of cjpeg.c or djpeg.c.
+ */
+#undef TWO_FILE_COMMANDLINE
+
+/* Define this if your system needs explicit cleanup of temporary files.
+ * This is crucial under MS-DOS, where the temporary "files" may be areas
+ * of extended memory; on most other systems it's not as important.
+ */
+#undef NEED_SIGNAL_CATCHER
+
+/* By default, we open image files with fopen(...,"rb") or fopen(...,"wb").
+ * This is necessary on systems that distinguish text files from binary files,
+ * and is harmless on most systems that don't.  If you have one of the rare
+ * systems that complains about the "b" spec, define this symbol.
+ */
+#undef DONT_USE_B_MODE
+
+/* Define this if you want percent-done progress reports from cjpeg/djpeg.
+ */
+#undef PROGRESS_REPORT
+
+
+#endif /* JPEG_CJPEG_DJPEG */
diff --git a/src/main/jni/libjpeg/jcparam.c b/src/main/jni/libjpeg/jcparam.c
new file mode 100644
index 000000000..6fc48f536
--- /dev/null
+++ b/src/main/jni/libjpeg/jcparam.c
@@ -0,0 +1,610 @@
+/*
+ * jcparam.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains optional default-setting code for the JPEG compressor.
+ * Applications do not have to use this file, but those that don't use it
+ * must know a lot more about the innards of the JPEG code.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Quantization table setup routines
+ */
+
+GLOBAL(void)
+jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
+		      const unsigned int *basic_table,
+		      int scale_factor, boolean force_baseline)
+/* Define a quantization table equal to the basic_table times
+ * a scale factor (given as a percentage).
+ * If force_baseline is TRUE, the computed quantization table entries
+ * are limited to 1..255 for JPEG baseline compatibility.
+ */
+{
+  JQUANT_TBL ** qtblptr;
+  int i;
+  long temp;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
+    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
+
+  qtblptr = & cinfo->quant_tbl_ptrs[which_tbl];
+
+  if (*qtblptr == NULL)
+    *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    temp = ((long) basic_table[i] * scale_factor + 50L) / 100L;
+    /* limit the values to the valid range */
+    if (temp <= 0L) temp = 1L;
+    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
+    if (force_baseline && temp > 255L)
+      temp = 255L;		/* limit to baseline range if requested */
+    (*qtblptr)->quantval[i] = (UINT16) temp;
+  }
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*qtblptr)->sent_table = FALSE;
+}
+
+
+GLOBAL(void)
+jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
+			 boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and a straight percentage-scaling quality scale.  In most cases it's better
+ * to use jpeg_set_quality (below); this entry point is provided for
+ * applications that insist on a linear percentage scaling.
+ */
+{
+  /* These are the sample quantization tables given in JPEG spec section K.1.
+   * The spec says that the values given produce "good" quality, and
+   * when divided by 2, "very good" quality.
+   */
+  static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+    16,  11,  10,  16,  24,  40,  51,  61,
+    12,  12,  14,  19,  26,  58,  60,  55,
+    14,  13,  16,  24,  40,  57,  69,  56,
+    14,  17,  22,  29,  51,  87,  80,  62,
+    18,  22,  37,  56,  68, 109, 103,  77,
+    24,  35,  55,  64,  81, 104, 113,  92,
+    49,  64,  78,  87, 103, 121, 120, 101,
+    72,  92,  95,  98, 112, 100, 103,  99
+  };
+  static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+    17,  18,  24,  47,  99,  99,  99,  99,
+    18,  21,  26,  66,  99,  99,  99,  99,
+    24,  26,  56,  99,  99,  99,  99,  99,
+    47,  66,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99
+  };
+
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+		       scale_factor, force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+		       scale_factor, force_baseline);
+}
+
+
+GLOBAL(int)
+jpeg_quality_scaling (int quality)
+/* Convert a user-specified quality rating to a percentage scaling factor
+ * for an underlying quantization table, using our recommended scaling curve.
+ * The input 'quality' factor should be 0 (terrible) to 100 (very good).
+ */
+{
+  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
+  if (quality <= 0) quality = 1;
+  if (quality > 100) quality = 100;
+
+  /* The basic table is used as-is (scaling 100) for a quality of 50.
+   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
+   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
+   * to make all the table entries 1 (hence, minimum quantization loss).
+   * Qualities 1..50 are converted to scaling percentage 5000/Q.
+   */
+  if (quality < 50)
+    quality = 5000 / quality;
+  else
+    quality = 200 - quality*2;
+
+  return quality;
+}
+
+
+GLOBAL(void)
+jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables.
+ * This is the standard quality-adjusting entry point for typical user
+ * interfaces; only those who want detailed control over quantization tables
+ * would use the preceding three routines directly.
+ */
+{
+  /* Convert user 0-100 rating to percentage scaling */
+  quality = jpeg_quality_scaling(quality);
+
+  /* Set up standard quality tables */
+  jpeg_set_linear_quality(cinfo, quality, force_baseline);
+}
+
+
+/*
+ * Huffman table setup routines
+ */
+
+LOCAL(void)
+add_huff_table (j_compress_ptr cinfo,
+		JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
+/* Define a Huffman table */
+{
+  int nsymbols, len;
+
+  if (*htblptr == NULL)
+    *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+
+  /* Copy the number-of-symbols-of-each-code-length counts */
+  MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+
+  /* Validate the counts.  We do this here mainly so we can copy the right
+   * number of symbols from the val[] array, without risking marching off
+   * the end of memory.  jchuff.c will do a more thorough test later.
+   */
+  nsymbols = 0;
+  for (len = 1; len <= 16; len++)
+    nsymbols += bits[len];
+  if (nsymbols < 1 || nsymbols > 256)
+    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+  MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8));
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*htblptr)->sent_table = FALSE;
+}
+
+
+LOCAL(void)
+std_huff_tables (j_compress_ptr cinfo)
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+{
+  static const UINT8 bits_dc_luminance[17] =
+    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_luminance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  
+  static const UINT8 bits_dc_chrominance[17] =
+    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_chrominance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  
+  static const UINT8 bits_ac_luminance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+  static const UINT8 val_ac_luminance[] =
+    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+  
+  static const UINT8 bits_ac_chrominance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+  static const UINT8 val_ac_chrominance[] =
+    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+  
+  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0],
+		 bits_dc_luminance, val_dc_luminance);
+  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0],
+		 bits_ac_luminance, val_ac_luminance);
+  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1],
+		 bits_dc_chrominance, val_dc_chrominance);
+  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1],
+		 bits_ac_chrominance, val_ac_chrominance);
+}
+
+
+/*
+ * Default parameter setup for compression.
+ *
+ * Applications that don't choose to use this routine must do their
+ * own setup of all these parameters.  Alternately, you can call this
+ * to establish defaults and then alter parameters selectively.  This
+ * is the recommended approach since, if we add any new parameters,
+ * your code will still work (they'll be set to reasonable defaults).
+ */
+
+GLOBAL(void)
+jpeg_set_defaults (j_compress_ptr cinfo)
+{
+  int i;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Allocate comp_info array large enough for maximum component count.
+   * Array is made permanent in case application wants to compress
+   * multiple images at same param settings.
+   */
+  if (cinfo->comp_info == NULL)
+    cinfo->comp_info = (jpeg_component_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
+
+  /* Initialize everything not dependent on the color space */
+
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+  /* Set up two quantization tables using default quality of 75 */
+  jpeg_set_quality(cinfo, 75, TRUE);
+  /* Set up two Huffman tables */
+  std_huff_tables(cinfo);
+
+  /* Initialize default arithmetic coding conditioning */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+
+  /* Default is no multiple-scan output */
+  cinfo->scan_info = NULL;
+  cinfo->num_scans = 0;
+
+  /* Expect normal source image, not raw downsampled data */
+  cinfo->raw_data_in = FALSE;
+
+  /* Use Huffman coding, not arithmetic coding, by default */
+  cinfo->arith_code = FALSE;
+
+  /* By default, don't do extra passes to optimize entropy coding */
+  cinfo->optimize_coding = FALSE;
+  /* The standard Huffman tables are only valid for 8-bit data precision.
+   * If the precision is higher, force optimization on so that usable
+   * tables will be computed.  This test can be removed if default tables
+   * are supplied that are valid for the desired precision.
+   */
+  if (cinfo->data_precision > 8)
+    cinfo->optimize_coding = TRUE;
+
+  /* By default, use the simpler non-cosited sampling alignment */
+  cinfo->CCIR601_sampling = FALSE;
+
+  /* No input smoothing */
+  cinfo->smoothing_factor = 0;
+
+  /* DCT algorithm preference */
+  cinfo->dct_method = JDCT_DEFAULT;
+
+  /* No restart markers */
+  cinfo->restart_interval = 0;
+  cinfo->restart_in_rows = 0;
+
+  /* Fill in default JFIF marker parameters.  Note that whether the marker
+   * will actually be written is determined by jpeg_set_colorspace.
+   *
+   * By default, the library emits JFIF version code 1.01.
+   * An application that wants to emit JFIF 1.02 extension markers should set
+   * JFIF_minor_version to 2.  We could probably get away with just defaulting
+   * to 1.02, but there may still be some decoders in use that will complain
+   * about that; saying 1.01 should minimize compatibility problems.
+   */
+  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
+  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
+  cinfo->Y_density = 1;
+
+  /* Choose JPEG colorspace based on input space, set defaults accordingly */
+
+  jpeg_default_colorspace(cinfo);
+}
+
+
+/*
+ * Select an appropriate JPEG colorspace for in_color_space.
+ */
+
+GLOBAL(void)
+jpeg_default_colorspace (j_compress_ptr cinfo)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+    break;
+  case JCS_RGB:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_YCbCr:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_CMYK:
+    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
+    break;
+  case JCS_YCCK:
+    jpeg_set_colorspace(cinfo, JCS_YCCK);
+    break;
+  case JCS_UNKNOWN:
+    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+  }
+}
+
+
+/*
+ * Set the JPEG colorspace, and choose colorspace-dependent default values.
+ */
+
+GLOBAL(void)
+jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
+{
+  jpeg_component_info * compptr;
+  int ci;
+
+#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl)  \
+  (compptr = &cinfo->comp_info[index], \
+   compptr->component_id = (id), \
+   compptr->h_samp_factor = (hsamp), \
+   compptr->v_samp_factor = (vsamp), \
+   compptr->quant_tbl_no = (quant), \
+   compptr->dc_tbl_no = (dctbl), \
+   compptr->ac_tbl_no = (actbl) )
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
+   * tables 1 for chrominance components.
+   */
+
+  cinfo->jpeg_color_space = colorspace;
+
+  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
+  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
+
+  switch (colorspace) {
+  case JCS_GRAYSCALE:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 1;
+    /* JFIF specifies component ID 1 */
+    SET_COMP(0, 1, 1,1, 0, 0,0);
+    break;
+  case JCS_RGB:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
+    cinfo->num_components = 3;
+    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCbCr:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 3;
+    /* JFIF specifies component IDs 1,2,3 */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 1, 2,2, 0, 0,0);
+    SET_COMP(1, 2, 1,1, 1, 1,1);
+    SET_COMP(2, 3, 1,1, 1, 1,1);
+    break;
+  case JCS_CMYK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0);
+    SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCCK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 1, 2,2, 0, 0,0);
+    SET_COMP(1, 2, 1,1, 1, 1,1);
+    SET_COMP(2, 3, 1,1, 1, 1,1);
+    SET_COMP(3, 4, 2,2, 0, 0,0);
+    break;
+  case JCS_UNKNOWN:
+    cinfo->num_components = cinfo->input_components;
+    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPONENTS);
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      SET_COMP(ci, ci, 1,1, 0, 0,0);
+    }
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+  }
+}
+
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+LOCAL(jpeg_scan_info *)
+fill_a_scan (jpeg_scan_info * scanptr, int ci,
+	     int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for specified component */
+{
+  scanptr->comps_in_scan = 1;
+  scanptr->component_index[0] = ci;
+  scanptr->Ss = Ss;
+  scanptr->Se = Se;
+  scanptr->Ah = Ah;
+  scanptr->Al = Al;
+  scanptr++;
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_scans (jpeg_scan_info * scanptr, int ncomps,
+	    int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for each component */
+{
+  int ci;
+
+  for (ci = 0; ci < ncomps; ci++) {
+    scanptr->comps_in_scan = 1;
+    scanptr->component_index[0] = ci;
+    scanptr->Ss = Ss;
+    scanptr->Se = Se;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  }
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al)
+/* Support routine: generate interleaved DC scan if possible, else N scans */
+{
+  int ci;
+
+  if (ncomps <= MAX_COMPS_IN_SCAN) {
+    /* Single interleaved DC scan */
+    scanptr->comps_in_scan = ncomps;
+    for (ci = 0; ci < ncomps; ci++)
+      scanptr->component_index[ci] = ci;
+    scanptr->Ss = scanptr->Se = 0;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  } else {
+    /* Noninterleaved DC scan for each component */
+    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
+  }
+  return scanptr;
+}
+
+
+/*
+ * Create a recommended progressive-JPEG script.
+ * cinfo->num_components and cinfo->jpeg_color_space must be correct.
+ */
+
+GLOBAL(void)
+jpeg_simple_progression (j_compress_ptr cinfo)
+{
+  int ncomps = cinfo->num_components;
+  int nscans;
+  jpeg_scan_info * scanptr;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Figure space needed for script.  Calculation must match code below! */
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    nscans = 10;
+  } else {
+    /* All-purpose script for other color spaces. */
+    if (ncomps > MAX_COMPS_IN_SCAN)
+      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
+    else
+      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
+  }
+
+  /* Allocate space for script.
+   * We need to put it in the permanent pool in case the application performs
+   * multiple compressions without changing the settings.  To avoid a memory
+   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
+   * object, we try to re-use previously allocated space, and we allocate
+   * enough space to handle YCbCr even if initially asked for grayscale.
+   */
+  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
+    cinfo->script_space_size = MAX(nscans, 10);
+    cinfo->script_space = (jpeg_scan_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
+  }
+  scanptr = cinfo->script_space;
+  cinfo->scan_info = scanptr;
+  cinfo->num_scans = nscans;
+
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    /* Initial DC scan */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    /* Initial AC scan: get some luma data out in a hurry */
+    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
+    /* Chroma data is too small to be worth expending many scans on */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
+    /* Complete spectral selection for luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
+    /* Refine next bit of luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
+    /* Finish DC successive approximation */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    /* Finish AC successive approximation */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
+    /* Luma bottom bit comes last since it's usually largest scan */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
+  } else {
+    /* All-purpose script for other color spaces. */
+    /* Successive approximation first pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
+    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
+    /* Successive approximation second pass */
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
+    /* Successive approximation final pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
+  }
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jcphuff.c b/src/main/jni/libjpeg/jcphuff.c
new file mode 100644
index 000000000..07f9178b0
--- /dev/null
+++ b/src/main/jni/libjpeg/jcphuff.c
@@ -0,0 +1,833 @@
+/*
+ * jcphuff.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy encoding routines for progressive JPEG.
+ *
+ * We do not support output suspension in this module, since the library
+ * currently does not allow multiple-scan files to be written with output
+ * suspension.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jchuff.h"		/* Declarations shared with jchuff.c */
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+/* Expanded entropy encoder object for progressive Huffman encoding. */
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  /* Mode flag: TRUE for optimization, FALSE for actual data output */
+  boolean gather_statistics;
+
+  /* Bit-level coding status.
+   * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
+   */
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  INT32 put_buffer;		/* current bit-accumulation buffer */
+  int put_bits;			/* # of bits now in it */
+  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
+
+  /* Coding status for DC components */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+
+  /* Coding status for AC components */
+  int ac_tbl_no;		/* the table number of the single component */
+  unsigned int EOBRUN;		/* run length of EOBs */
+  unsigned int BE;		/* # of buffered correction bits before MCU */
+  char * bit_buffer;		/* buffer for correction bits (1 per char) */
+  /* packing correction bits tightly would save some space but cost time... */
+
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan).
+   * Since any one scan codes only DC or only AC, we only need one set
+   * of tables, not one for DC and one for AC.
+   */
+  c_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  /* Statistics tables for optimization; again, one set is enough */
+  long * count_ptrs[NUM_HUFF_TBLS];
+} phuff_entropy_encoder;
+
+typedef phuff_entropy_encoder * phuff_entropy_ptr;
+
+/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
+ * buffer can hold.  Larger sizes may slightly improve compression, but
+ * 1000 is already well into the realm of overkill.
+ * The minimum safe size is 64 bits.
+ */
+
+#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
+ * We assume that int right shift is unsigned if INT32 right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	int ishift_temp;
+#define IRIGHT_SHIFT(x,shft)  \
+	((ishift_temp = (x)) < 0 ? \
+	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+	 (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+/* Forward declarations */
+METHODDEF(boolean) encode_mcu_DC_first JPP((j_compress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_AC_first JPP((j_compress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_DC_refine JPP((j_compress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_AC_refine JPP((j_compress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_phuff JPP((j_compress_ptr cinfo));
+METHODDEF(void) finish_pass_gather_phuff JPP((j_compress_ptr cinfo));
+
+
+/*
+ * Initialize for a Huffman-compressed scan using progressive JPEG.
+ */
+
+METHODDEF(void)
+start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics)
+{  
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  entropy->cinfo = cinfo;
+  entropy->gather_statistics = gather_statistics;
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* We assume jcmaster.c already validated the scan parameters. */
+
+  /* Select execution routines */
+  if (cinfo->Ah == 0) {
+    if (is_DC_band)
+      entropy->pub.encode_mcu = encode_mcu_DC_first;
+    else
+      entropy->pub.encode_mcu = encode_mcu_AC_first;
+  } else {
+    if (is_DC_band)
+      entropy->pub.encode_mcu = encode_mcu_DC_refine;
+    else {
+      entropy->pub.encode_mcu = encode_mcu_AC_refine;
+      /* AC refinement needs a correction bit buffer */
+      if (entropy->bit_buffer == NULL)
+	entropy->bit_buffer = (char *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      MAX_CORR_BITS * SIZEOF(char));
+    }
+  }
+  if (gather_statistics)
+    entropy->pub.finish_pass = finish_pass_gather_phuff;
+  else
+    entropy->pub.finish_pass = finish_pass_phuff;
+
+  /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
+   * for AC coefficients.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* Initialize DC predictions to 0 */
+    entropy->last_dc_val[ci] = 0;
+    /* Get table index */
+    if (is_DC_band) {
+      if (cinfo->Ah != 0)	/* DC refinement needs no table */
+	continue;
+      tbl = compptr->dc_tbl_no;
+    } else {
+      entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
+    }
+    if (gather_statistics) {
+      /* Check for invalid table index */
+      /* (make_c_derived_tbl does this in the other path) */
+      if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
+      /* Allocate and zero the statistics tables */
+      /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+      if (entropy->count_ptrs[tbl] == NULL)
+	entropy->count_ptrs[tbl] = (long *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      257 * SIZEOF(long));
+      MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long));
+    } else {
+      /* Compute derived values for Huffman table */
+      /* We may do this more than once for a table, but it's not expensive */
+      jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
+			      & entropy->derived_tbls[tbl]);
+    }
+  }
+
+  /* Initialize AC stuff */
+  entropy->EOBRUN = 0;
+  entropy->BE = 0;
+
+  /* Initialize bit buffer to empty */
+  entropy->put_buffer = 0;
+  entropy->put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/* Outputting bytes to the file.
+ * NB: these must be called only when actually outputting,
+ * that is, entropy->gather_statistics == FALSE.
+ */
+
+/* Emit a byte */
+#define emit_byte(entropy,val)  \
+	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(entropy)->free_in_buffer == 0)  \
+	    dump_buffer(entropy); }
+
+
+LOCAL(void)
+dump_buffer (phuff_entropy_ptr entropy)
+/* Empty the output buffer; we do not support suspension in this module. */
+{
+  struct jpeg_destination_mgr * dest = entropy->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (entropy->cinfo))
+    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
+  /* After a successful buffer dump, must reset buffer pointers */
+  entropy->next_output_byte = dest->next_output_byte;
+  entropy->free_in_buffer = dest->free_in_buffer;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+INLINE
+LOCAL(void)
+emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size)
+/* Emit some bits, unless we are in gather mode */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer = (INT32) code;
+  register int put_bits = entropy->put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+  if (entropy->gather_statistics)
+    return;			/* do nothing if we're only getting stats */
+
+  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+  
+  put_bits += size;		/* new number of bits in buffer */
+  
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
+
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+    
+    emit_byte(entropy, c);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte(entropy, 0);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  entropy->put_buffer = put_buffer; /* update variables */
+  entropy->put_bits = put_bits;
+}
+
+
+LOCAL(void)
+flush_bits (phuff_entropy_ptr entropy)
+{
+  emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
+  entropy->put_buffer = 0;     /* and reset bit-buffer to empty */
+  entropy->put_bits = 0;
+}
+
+
+/*
+ * Emit (or just count) a Huffman symbol.
+ */
+
+INLINE
+LOCAL(void)
+emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol)
+{
+  if (entropy->gather_statistics)
+    entropy->count_ptrs[tbl_no][symbol]++;
+  else {
+    c_derived_tbl * tbl = entropy->derived_tbls[tbl_no];
+    emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
+  }
+}
+
+
+/*
+ * Emit bits from a correction bit buffer.
+ */
+
+LOCAL(void)
+emit_buffered_bits (phuff_entropy_ptr entropy, char * bufstart,
+		    unsigned int nbits)
+{
+  if (entropy->gather_statistics)
+    return;			/* no real work */
+
+  while (nbits > 0) {
+    emit_bits(entropy, (unsigned int) (*bufstart), 1);
+    bufstart++;
+    nbits--;
+  }
+}
+
+
+/*
+ * Emit any pending EOBRUN symbol.
+ */
+
+LOCAL(void)
+emit_eobrun (phuff_entropy_ptr entropy)
+{
+  register int temp, nbits;
+
+  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
+    temp = entropy->EOBRUN;
+    nbits = 0;
+    while ((temp >>= 1))
+      nbits++;
+    /* safety check: shouldn't happen given limited correction-bit buffer */
+    if (nbits > 14)
+      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+    emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
+    if (nbits)
+      emit_bits(entropy, entropy->EOBRUN, nbits);
+
+    entropy->EOBRUN = 0;
+
+    /* Emit any buffered correction bits */
+    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(void)
+emit_restart (phuff_entropy_ptr entropy, int restart_num)
+{
+  int ci;
+
+  emit_eobrun(entropy);
+
+  if (! entropy->gather_statistics) {
+    flush_bits(entropy);
+    emit_byte(entropy, 0xFF);
+    emit_byte(entropy, JPEG_RST0 + restart_num);
+  }
+
+  if (entropy->cinfo->Ss == 0) {
+    /* Re-initialize DC predictions to 0 */
+    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
+      entropy->last_dc_val[ci] = 0;
+  } else {
+    /* Re-initialize all AC-related fields to 0 */
+    entropy->EOBRUN = 0;
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  int blkn, ci;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+  jpeg_component_info * compptr;
+  ISHIFT_TEMPS
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al);
+
+    /* DC differences are figured on the point-transformed values. */
+    temp = temp2 - entropy->last_dc_val[ci];
+    entropy->last_dc_val[ci] = temp2;
+
+    /* Encode the DC coefficient difference per section G.1.2.1 */
+    temp2 = temp;
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      /* For a negative input, want temp2 = bitwise complement of abs(input) */
+      /* This code assumes we are on a two's complement machine */
+      temp2--;
+    }
+    
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    while (temp) {
+      nbits++;
+      temp >>= 1;
+    }
+    /* Check for out-of-range coefficient values.
+     * Since we're encoding a difference, the range limit is twice as much.
+     */
+    if (nbits > MAX_COEF_BITS+1)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+    
+    /* Count/emit the Huffman-coded symbol for the number of bits */
+    emit_symbol(entropy, compptr->dc_tbl_no, nbits);
+    
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    if (nbits)			/* emit_bits rejects calls with size 0 */
+      emit_bits(entropy, (unsigned int) temp2, nbits);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  register int r, k;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
+  
+  r = 0;			/* r = run length of zeros */
+   
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = (*block)[jpeg_natural_order[k]]) == 0) {
+      r++;
+      continue;
+    }
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value; so the code is
+     * interwoven with finding the abs value (temp) and output bits (temp2).
+     */
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      temp >>= Al;		/* apply the point transform */
+      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
+      temp2 = ~temp;
+    } else {
+      temp >>= Al;		/* apply the point transform */
+      temp2 = temp;
+    }
+    /* Watch out for case that nonzero coef is zero after point transform */
+    if (temp == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any pending EOBRUN */
+    if (entropy->EOBRUN > 0)
+      emit_eobrun(entropy);
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+    while (r > 15) {
+      emit_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+    }
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 1;			/* there must be at least one 1 bit */
+    while ((temp >>= 1))
+      nbits++;
+    /* Check for out-of-range coefficient values */
+    if (nbits > MAX_COEF_BITS)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits);
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    emit_bits(entropy, (unsigned int) temp2, nbits);
+
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0) {			/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    if (entropy->EOBRUN == 0x7FFF)
+      emit_eobrun(entropy);	/* force it out to avoid overflow */
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp;
+  int blkn;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    temp = (*block)[0];
+    emit_bits(entropy, (unsigned int) (temp >> Al), 1);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp;
+  register int r, k;
+  int EOB;
+  char *BR_buffer;
+  unsigned int BR;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+  int absvalues[DCTSIZE2];
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* It is convenient to make a pre-pass to determine the transformed
+   * coefficients' absolute values and the EOB position.
+   */
+  EOB = 0;
+  for (k = cinfo->Ss; k <= Se; k++) {
+    temp = (*block)[jpeg_natural_order[k]];
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if (temp < 0)
+      temp = -temp;		/* temp is abs value of input */
+    temp >>= Al;		/* apply the point transform */
+    absvalues[k] = temp;	/* save abs value for main pass */
+    if (temp == 1)
+      EOB = k;			/* EOB = index of last newly-nonzero coef */
+  }
+
+  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
+  
+  r = 0;			/* r = run length of zeros */
+  BR = 0;			/* BR = count of buffered bits added now */
+  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
+
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = absvalues[k]) == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any required ZRLs, but not if they can be folded into EOB */
+    while (r > 15 && k <= EOB) {
+      /* emit any pending EOBRUN and the BE correction bits */
+      emit_eobrun(entropy);
+      /* Emit ZRL */
+      emit_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+      /* Emit buffered correction bits that must be associated with ZRL */
+      emit_buffered_bits(entropy, BR_buffer, BR);
+      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+      BR = 0;
+    }
+
+    /* If the coef was previously nonzero, it only needs a correction bit.
+     * NOTE: a straight translation of the spec's figure G.7 would suggest
+     * that we also need to test r > 15.  But if r > 15, we can only get here
+     * if k > EOB, which implies that this coefficient is not 1.
+     */
+    if (temp > 1) {
+      /* The correction bit is the next bit of the absolute value. */
+      BR_buffer[BR++] = (char) (temp & 1);
+      continue;
+    }
+
+    /* Emit any pending EOBRUN and the BE correction bits */
+    emit_eobrun(entropy);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1);
+
+    /* Emit output bit for newly-nonzero coef */
+    temp = ((*block)[jpeg_natural_order[k]] < 0) ? 0 : 1;
+    emit_bits(entropy, (unsigned int) temp, 1);
+
+    /* Emit buffered correction bits that must be associated with this code */
+    emit_buffered_bits(entropy, BR_buffer, BR);
+    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+    BR = 0;
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    entropy->BE += BR;		/* concat my correction bits to older ones */
+    /* We force out the EOB if we risk either:
+     * 1. overflow of the EOB counter;
+     * 2. overflow of the correction bit buffer during the next MCU.
+     */
+    if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1))
+      emit_eobrun(entropy);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed progressive scan.
+ */
+
+METHODDEF(void)
+finish_pass_phuff (j_compress_ptr cinfo)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Flush out any buffered data */
+  emit_eobrun(entropy);
+  flush_bits(entropy);
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather_phuff (j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+  JHUFF_TBL **htblptr;
+  boolean did[NUM_HUFF_TBLS];
+
+  /* Flush out buffered data (all we care about is counting the EOB symbol) */
+  emit_eobrun(entropy);
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  MEMZERO(did, SIZEOF(did));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (is_DC_band) {
+      if (cinfo->Ah != 0)	/* DC refinement needs no table */
+	continue;
+      tbl = compptr->dc_tbl_no;
+    } else {
+      tbl = compptr->ac_tbl_no;
+    }
+    if (! did[tbl]) {
+      if (is_DC_band)
+        htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
+      else
+        htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
+      if (*htblptr == NULL)
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
+      did[tbl] = TRUE;
+    }
+  }
+}
+
+
+/*
+ * Module initialization routine for progressive Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_phuff_encoder (j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy;
+  int i;
+
+  entropy = (phuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(phuff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  entropy->pub.start_pass = start_pass_phuff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+    entropy->count_ptrs[i] = NULL;
+  }
+  entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jcprepct.c b/src/main/jni/libjpeg/jcprepct.c
new file mode 100644
index 000000000..fa93333db
--- /dev/null
+++ b/src/main/jni/libjpeg/jcprepct.c
@@ -0,0 +1,354 @@
+/*
+ * jcprepct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the compression preprocessing controller.
+ * This controller manages the color conversion, downsampling,
+ * and edge expansion steps.
+ *
+ * Most of the complexity here is associated with buffering input rows
+ * as required by the downsampler.  See the comments at the head of
+ * jcsample.c for the downsampler's needs.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* At present, jcsample.c can request context rows only for smoothing.
+ * In the future, we might also need context rows for CCIR601 sampling
+ * or other more-complex downsampling procedures.  The code to support
+ * context rows should be compiled only if needed.
+ */
+#ifdef INPUT_SMOOTHING_SUPPORTED
+#define CONTEXT_ROWS_SUPPORTED
+#endif
+
+
+/*
+ * For the simple (no-context-row) case, we just need to buffer one
+ * row group's worth of pixels for the downsampling step.  At the bottom of
+ * the image, we pad to a full row group by replicating the last pixel row.
+ * The downsampler's last output row is then replicated if needed to pad
+ * out to a full iMCU row.
+ *
+ * When providing context rows, we must buffer three row groups' worth of
+ * pixels.  Three row groups are physically allocated, but the row pointer
+ * arrays are made five row groups high, with the extra pointers above and
+ * below "wrapping around" to point to the last and first real row groups.
+ * This allows the downsampler to access the proper context rows.
+ * At the top and bottom of the image, we create dummy context rows by
+ * copying the first or last real pixel row.  This copying could be avoided
+ * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
+ * trouble on the compression side.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_prep_controller pub; /* public fields */
+
+  /* Downsampling input buffer.  This buffer holds color-converted data
+   * until we have enough to do a downsample step.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
+  int next_buf_row;		/* index of next row to store in color_buf */
+
+#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
+  int this_row_group;		/* starting row index of group to process */
+  int next_buf_stop;		/* downsample when we reach this index */
+#endif
+} my_prep_controller;
+
+typedef my_prep_controller * my_prep_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+
+  if (pass_mode != JBUF_PASS_THRU)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Initialize total-height counter for detecting bottom of image */
+  prep->rows_to_go = cinfo->image_height;
+  /* Mark the conversion buffer empty */
+  prep->next_buf_row = 0;
+#ifdef CONTEXT_ROWS_SUPPORTED
+  /* Preset additional state variables for context mode.
+   * These aren't used in non-context mode, so we needn't test which mode.
+   */
+  prep->this_row_group = 0;
+  /* Set next_buf_stop to stop after two row groups have been read in. */
+  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
+#endif
+}
+
+
+/*
+ * Expand an image vertically from height input_rows to height output_rows,
+ * by duplicating the bottom row.
+ */
+
+LOCAL(void)
+expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
+		    int input_rows, int output_rows)
+{
+  register int row;
+
+  for (row = input_rows; row < output_rows; row++) {
+    jcopy_sample_rows(image_data, input_rows-1, image_data, row,
+		      1, num_cols);
+  }
+}
+
+
+/*
+ * Process some data in the simple no-context case.
+ *
+ * Preprocessor output data is counted in "row groups".  A row group
+ * is defined to be v_samp_factor sample rows of each component.
+ * Downsampling will produce this much data from each max_v_samp_factor
+ * input rows.
+ */
+
+METHODDEF(void)
+pre_process_data (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		  JDIMENSION in_rows_avail,
+		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		  JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  JDIMENSION inrows;
+  jpeg_component_info * compptr;
+
+  while (*in_row_ctr < in_rows_avail &&
+	 *out_row_group_ctr < out_row_groups_avail) {
+    /* Do color conversion to fill the conversion buffer. */
+    inrows = in_rows_avail - *in_row_ctr;
+    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
+    numrows = (int) MIN((JDIMENSION) numrows, inrows);
+    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+				       prep->color_buf,
+				       (JDIMENSION) prep->next_buf_row,
+				       numrows);
+    *in_row_ctr += numrows;
+    prep->next_buf_row += numrows;
+    prep->rows_to_go -= numrows;
+    /* If at bottom of image, pad to fill the conversion buffer. */
+    if (prep->rows_to_go == 0 &&
+	prep->next_buf_row < cinfo->max_v_samp_factor) {
+      for (ci = 0; ci < cinfo->num_components; ci++) {
+	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			   prep->next_buf_row, cinfo->max_v_samp_factor);
+      }
+      prep->next_buf_row = cinfo->max_v_samp_factor;
+    }
+    /* If we've filled the conversion buffer, empty it. */
+    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf, (JDIMENSION) 0,
+					output_buf, *out_row_group_ctr);
+      prep->next_buf_row = 0;
+      (*out_row_group_ctr)++;
+    }
+    /* If at bottom of image, pad the output to a full iMCU height.
+     * Note we assume the caller is providing a one-iMCU-height output buffer!
+     */
+    if (prep->rows_to_go == 0 &&
+	*out_row_group_ctr < out_row_groups_avail) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	expand_bottom_edge(output_buf[ci],
+			   compptr->width_in_blocks * DCTSIZE,
+			   (int) (*out_row_group_ctr * compptr->v_samp_factor),
+			   (int) (out_row_groups_avail * compptr->v_samp_factor));
+      }
+      *out_row_group_ctr = out_row_groups_avail;
+      break;			/* can exit outer loop without test */
+    }
+  }
+}
+
+
+#ifdef CONTEXT_ROWS_SUPPORTED
+
+/*
+ * Process some data in the context case.
+ */
+
+METHODDEF(void)
+pre_process_context (j_compress_ptr cinfo,
+		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		     JDIMENSION in_rows_avail,
+		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		     JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  int buf_height = cinfo->max_v_samp_factor * 3;
+  JDIMENSION inrows;
+
+  while (*out_row_group_ctr < out_row_groups_avail) {
+    if (*in_row_ctr < in_rows_avail) {
+      /* Do color conversion to fill the conversion buffer. */
+      inrows = in_rows_avail - *in_row_ctr;
+      numrows = prep->next_buf_stop - prep->next_buf_row;
+      numrows = (int) MIN((JDIMENSION) numrows, inrows);
+      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+					 prep->color_buf,
+					 (JDIMENSION) prep->next_buf_row,
+					 numrows);
+      /* Pad at top of image, if first time through */
+      if (prep->rows_to_go == cinfo->image_height) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  int row;
+	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
+	    jcopy_sample_rows(prep->color_buf[ci], 0,
+			      prep->color_buf[ci], -row,
+			      1, cinfo->image_width);
+	  }
+	}
+      }
+      *in_row_ctr += numrows;
+      prep->next_buf_row += numrows;
+      prep->rows_to_go -= numrows;
+    } else {
+      /* Return for more data, unless we are at the bottom of the image. */
+      if (prep->rows_to_go != 0)
+	break;
+      /* When at bottom of image, pad to fill the conversion buffer. */
+      if (prep->next_buf_row < prep->next_buf_stop) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			     prep->next_buf_row, prep->next_buf_stop);
+	}
+	prep->next_buf_row = prep->next_buf_stop;
+      }
+    }
+    /* If we've gotten enough data, downsample a row group. */
+    if (prep->next_buf_row == prep->next_buf_stop) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf,
+					(JDIMENSION) prep->this_row_group,
+					output_buf, *out_row_group_ctr);
+      (*out_row_group_ctr)++;
+      /* Advance pointers with wraparound as necessary. */
+      prep->this_row_group += cinfo->max_v_samp_factor;
+      if (prep->this_row_group >= buf_height)
+	prep->this_row_group = 0;
+      if (prep->next_buf_row >= buf_height)
+	prep->next_buf_row = 0;
+      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
+    }
+  }
+}
+
+
+/*
+ * Create the wrapped-around downsampling input buffer needed for context mode.
+ */
+
+LOCAL(void)
+create_context_buffer (j_compress_ptr cinfo)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int rgroup_height = cinfo->max_v_samp_factor;
+  int ci, i;
+  jpeg_component_info * compptr;
+  JSAMPARRAY true_buffer, fake_buffer;
+
+  /* Grab enough space for fake row pointers for all the components;
+   * we need five row groups' worth of pointers for each component.
+   */
+  fake_buffer = (JSAMPARRAY)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(cinfo->num_components * 5 * rgroup_height) *
+				SIZEOF(JSAMPROW));
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate the actual buffer space (3 row groups) for this component.
+     * We make the buffer wide enough to allow the downsampler to edge-expand
+     * horizontally within the buffer, if it so chooses.
+     */
+    true_buffer = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+       (JDIMENSION) (3 * rgroup_height));
+    /* Copy true buffer row pointers into the middle of the fake row array */
+    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
+	    3 * rgroup_height * SIZEOF(JSAMPROW));
+    /* Fill in the above and below wraparound pointers */
+    for (i = 0; i < rgroup_height; i++) {
+      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
+      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
+    }
+    prep->color_buf[ci] = fake_buffer + rgroup_height;
+    fake_buffer += 5 * rgroup_height; /* point to space for next component */
+  }
+}
+
+#endif /* CONTEXT_ROWS_SUPPORTED */
+
+
+/*
+ * Initialize preprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_prep_ptr prep;
+  int ci;
+  jpeg_component_info * compptr;
+
+  if (need_full_buffer)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  prep = (my_prep_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_prep_controller));
+  cinfo->prep = (struct jpeg_c_prep_controller *) prep;
+  prep->pub.start_pass = start_pass_prep;
+
+  /* Allocate the color conversion buffer.
+   * We make the buffer wide enough to allow the downsampler to edge-expand
+   * horizontally within the buffer, if it so chooses.
+   */
+  if (cinfo->downsample->need_context_rows) {
+    /* Set up to provide context rows */
+#ifdef CONTEXT_ROWS_SUPPORTED
+    prep->pub.pre_process_data = pre_process_context;
+    create_context_buffer(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* No context, just make it tall enough for one row group */
+    prep->pub.pre_process_data = pre_process_data;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/src/main/jni/libjpeg/jcsample.c b/src/main/jni/libjpeg/jcsample.c
new file mode 100644
index 000000000..212ec8757
--- /dev/null
+++ b/src/main/jni/libjpeg/jcsample.c
@@ -0,0 +1,519 @@
+/*
+ * jcsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains downsampling routines.
+ *
+ * Downsampling input data is counted in "row groups".  A row group
+ * is defined to be max_v_samp_factor pixel rows of each component,
+ * from which the downsampler produces v_samp_factor sample rows.
+ * A single row group is processed in each call to the downsampler module.
+ *
+ * The downsampler is responsible for edge-expansion of its output data
+ * to fill an integral number of DCT blocks horizontally.  The source buffer
+ * may be modified if it is helpful for this purpose (the source buffer is
+ * allocated wide enough to correspond to the desired output width).
+ * The caller (the prep controller) is responsible for vertical padding.
+ *
+ * The downsampler may request "context rows" by setting need_context_rows
+ * during startup.  In this case, the input arrays will contain at least
+ * one row group's worth of pixels above and below the passed-in data;
+ * the caller will create dummy rows at image top and bottom by replicating
+ * the first or last real pixel row.
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ *
+ * The downsampling algorithm used here is a simple average of the source
+ * pixels covered by the output pixel.  The hi-falutin sampling literature
+ * refers to this as a "box filter".  In general the characteristics of a box
+ * filter are not very good, but for the specific cases we normally use (1:1
+ * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
+ * nearly so bad.  If you intend to use other sampling ratios, you'd be well
+ * advised to improve this code.
+ *
+ * A simple input-smoothing capability is provided.  This is mainly intended
+ * for cleaning up color-dithered GIF input files (if you find it inadequate,
+ * we suggest using an external filtering program such as pnmconvol).  When
+ * enabled, each input pixel P is replaced by a weighted sum of itself and its
+ * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
+ * where SF = (smoothing_factor / 1024).
+ * Currently, smoothing is only supported for 2h2v sampling factors.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to downsample a single component */
+typedef JMETHOD(void, downsample1_ptr,
+		(j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_downsampler pub;	/* public fields */
+
+  /* Downsampling method pointers, one per component */
+  downsample1_ptr methods[MAX_COMPONENTS];
+} my_downsampler;
+
+typedef my_downsampler * my_downsample_ptr;
+
+
+/*
+ * Initialize for a downsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_downsample (j_compress_ptr cinfo)
+{
+  /* no work for now */
+}
+
+
+/*
+ * Expand a component horizontally from width input_cols to width output_cols,
+ * by duplicating the rightmost samples.
+ */
+
+LOCAL(void)
+expand_right_edge (JSAMPARRAY image_data, int num_rows,
+		   JDIMENSION input_cols, JDIMENSION output_cols)
+{
+  register JSAMPROW ptr;
+  register JSAMPLE pixval;
+  register int count;
+  int row;
+  int numcols = (int) (output_cols - input_cols);
+
+  if (numcols > 0) {
+    for (row = 0; row < num_rows; row++) {
+      ptr = image_data[row] + input_cols;
+      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
+      for (count = numcols; count > 0; count--)
+	*ptr++ = pixval;
+    }
+  }
+}
+
+
+/*
+ * Do downsampling for a whole row group (all components).
+ *
+ * In this version we simply downsample each component independently.
+ */
+
+METHODDEF(void)
+sep_downsample (j_compress_ptr cinfo,
+		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
+{
+  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JSAMPARRAY in_ptr, out_ptr;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    in_ptr = input_buf[ci] + in_row_index;
+    out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor);
+    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * One row group is processed per call.
+ * This version handles arbitrary integral sampling ratios, without smoothing.
+ * Note that this version is not actually used for customary sampling ratios.
+ */
+
+METHODDEF(void)
+int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
+  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  JSAMPROW inptr, outptr;
+  INT32 outvalue;
+
+  h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
+  v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor;
+  numpix = h_expand * v_expand;
+  numpix2 = numpix/2;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * h_expand);
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    for (outcol = 0, outcol_h = 0; outcol < output_cols;
+	 outcol++, outcol_h += h_expand) {
+      outvalue = 0;
+      for (v = 0; v < v_expand; v++) {
+	inptr = input_data[inrow+v] + outcol_h;
+	for (h = 0; h < h_expand; h++) {
+	  outvalue += (INT32) GETJSAMPLE(*inptr++);
+	}
+      }
+      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
+    }
+    inrow += v_expand;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  /* Copy the data */
+  jcopy_sample_rows(input_data, 0, output_data, 0,
+		    cinfo->max_v_samp_factor, cinfo->image_width);
+  /* Edge-expand */
+  expand_right_edge(output_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the common case of 2:1 horizontal and 1:1 vertical,
+ * without smoothing.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int outrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr = input_data[outrow];
+    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
+			      + bias) >> 1);
+      bias ^= 1;		/* 0=>1, 1=>0 */
+      inptr += 2;
+    }
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr0, inptr1, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
+			      + bias) >> 2);
+      bias ^= 3;		/* 1=>2, 2=>1 */
+      inptr0 += 2; inptr1 += 2;
+    }
+    inrow += 2;
+  }
+}
+
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+			JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols * 2);
+
+  /* We don't bother to form the individual "smoothed" input pixel values;
+   * we can directly compute the output which is the average of the four
+   * smoothed values.  Each of the four member pixels contributes a fraction
+   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
+   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
+   * output.  The four corner-adjacent neighbor pixels contribute a fraction
+   * SF to just one smoothed pixel, or SF/4 to the final output; while the
+   * eight edge-adjacent neighbors contribute SF to each of two smoothed
+   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
+   * factors are scaled by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
+  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    above_ptr = input_data[inrow-1];
+    below_ptr = input_data[inrow+2];
+
+    /* Special case for first column: pretend column -1 is same as column 0 */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
+	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
+		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      /* sum of pixels directly mapped to this output element */
+      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+      /* sum of edge-neighbor pixels */
+      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
+		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+      /* The edge-neighbors count twice as much as corner-neighbors */
+      neighsum += neighsum;
+      /* Add in the corner-neighbors */
+      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
+		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+      /* form final output scaled up by 2^16 */
+      membersum = membersum * memberscale + neighsum * neighscale;
+      /* round, descale and output it */
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
+	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
+		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+    inrow += 2;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
+			    JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int outrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+  int colsum, lastcolsum, nextcolsum;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols);
+
+  /* Each of the eight neighbor pixels contributes a fraction SF to the
+   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
+   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
+  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
+
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr = input_data[outrow];
+    above_ptr = input_data[outrow-1];
+    below_ptr = input_data[outrow+1];
+
+    /* Special case for first column */
+    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
+	     GETJSAMPLE(*inptr);
+    membersum = GETJSAMPLE(*inptr++);
+    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		 GETJSAMPLE(*inptr);
+    neighsum = colsum + (colsum - membersum) + nextcolsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    lastcolsum = colsum; colsum = nextcolsum;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      membersum = GETJSAMPLE(*inptr++);
+      above_ptr++; below_ptr++;
+      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		   GETJSAMPLE(*inptr);
+      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
+      membersum = membersum * memberscale + neighsum * neighscale;
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      lastcolsum = colsum; colsum = nextcolsum;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr);
+    neighsum = lastcolsum + (colsum - membersum) + colsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+  }
+}
+
+#endif /* INPUT_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Module initialization routine for downsampling.
+ * Note that we must select a routine for each component.
+ */
+
+GLOBAL(void)
+jinit_downsampler (j_compress_ptr cinfo)
+{
+  my_downsample_ptr downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean smoothok = TRUE;
+
+  downsample = (my_downsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_downsampler));
+  cinfo->downsample = (struct jpeg_downsampler *) downsample;
+  downsample->pub.start_pass = start_pass_downsample;
+  downsample->pub.downsample = sep_downsample;
+  downsample->pub.need_context_rows = FALSE;
+
+  if (cinfo->CCIR601_sampling)
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, and set up method pointers */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor == cinfo->max_h_samp_factor &&
+	compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = fullsize_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = fullsize_downsample;
+    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
+	       compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+      smoothok = FALSE;
+      downsample->methods[ci] = h2v1_downsample;
+    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
+	       compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = h2v2_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = h2v2_downsample;
+    } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
+	       (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
+      smoothok = FALSE;
+      downsample->methods[ci] = int_downsample;
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+  }
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+  if (cinfo->smoothing_factor && !smoothok)
+    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
+#endif
+}
diff --git a/src/main/jni/libjpeg/jctrans.c b/src/main/jni/libjpeg/jctrans.c
new file mode 100644
index 000000000..0e6d70769
--- /dev/null
+++ b/src/main/jni/libjpeg/jctrans.c
@@ -0,0 +1,388 @@
+/*
+ * jctrans.c
+ *
+ * Copyright (C) 1995-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding compression,
+ * that is, writing raw DCT coefficient arrays to an output JPEG file.
+ * The routines in jcapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transencode_master_selection
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+LOCAL(void) transencode_coef_controller
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+
+
+/*
+ * Compression initialization for writing raw-coefficient data.
+ * Before calling this, all parameters and a data destination must be set up.
+ * Call jpeg_finish_compress() to actually write the data.
+ *
+ * The number of passed virtual arrays must match cinfo->num_components.
+ * Note that the virtual arrays need not be filled or even realized at
+ * the time write_coefficients is called; indeed, if the virtual arrays
+ * were requested from this compression object's memory manager, they
+ * typically will be realized during this routine and filled afterwards.
+ */
+
+GLOBAL(void)
+jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Mark all tables to be written */
+  jpeg_suppress_tables(cinfo, FALSE);
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  transencode_master_selection(cinfo, coef_arrays);
+  /* Wait for jpeg_finish_compress() call */
+  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
+  cinfo->global_state = CSTATE_WRCOEFS;
+}
+
+
+/*
+ * Initialize the compression object with default parameters,
+ * then copy from the source object all parameters needed for lossless
+ * transcoding.  Parameters that can be varied without loss (such as
+ * scan script and Huffman optimization) are left in their default states.
+ */
+
+GLOBAL(void)
+jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
+			       j_compress_ptr dstinfo)
+{
+  JQUANT_TBL ** qtblptr;
+  jpeg_component_info *incomp, *outcomp;
+  JQUANT_TBL *c_quant, *slot_quant;
+  int tblno, ci, coefi;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (dstinfo->global_state != CSTATE_START)
+    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
+  /* Copy fundamental image dimensions */
+  dstinfo->image_width = srcinfo->image_width;
+  dstinfo->image_height = srcinfo->image_height;
+  dstinfo->input_components = srcinfo->num_components;
+  dstinfo->in_color_space = srcinfo->jpeg_color_space;
+  /* Initialize all parameters to default values */
+  jpeg_set_defaults(dstinfo);
+  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
+   * Fix it to get the right header markers for the image colorspace.
+   */
+  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+  /* Copy the source's quantization tables. */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
+      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
+      if (*qtblptr == NULL)
+	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
+      MEMCOPY((*qtblptr)->quantval,
+	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
+	      SIZEOF((*qtblptr)->quantval));
+      (*qtblptr)->sent_table = FALSE;
+    }
+  }
+  /* Copy the source's per-component info.
+   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
+   */
+  dstinfo->num_components = srcinfo->num_components;
+  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
+	     MAX_COMPONENTS);
+  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
+       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
+    outcomp->component_id = incomp->component_id;
+    outcomp->h_samp_factor = incomp->h_samp_factor;
+    outcomp->v_samp_factor = incomp->v_samp_factor;
+    outcomp->quant_tbl_no = incomp->quant_tbl_no;
+    /* Make sure saved quantization table for component matches the qtable
+     * slot.  If not, the input file re-used this qtable slot.
+     * IJG encoder currently cannot duplicate this.
+     */
+    tblno = outcomp->quant_tbl_no;
+    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
+	srcinfo->quant_tbl_ptrs[tblno] == NULL)
+      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
+    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
+    c_quant = incomp->quant_table;
+    if (c_quant != NULL) {
+      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
+	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+      }
+    }
+    /* Note: we do not copy the source's Huffman table assignments;
+     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
+     */
+  }
+  /* Also copy JFIF version and resolution information, if available.
+   * Strictly speaking this isn't "critical" info, but it's nearly
+   * always appropriate to copy it if available.  In particular,
+   * if the application chooses to copy JFIF 1.02 extension markers from
+   * the source file, we need to copy the version to make sure we don't
+   * emit a file that has 1.02 extensions but a claimed version of 1.01.
+   * We will *not*, however, copy version info from mislabeled "2.01" files.
+   */
+  if (srcinfo->saw_JFIF_marker) {
+    if (srcinfo->JFIF_major_version == 1) {
+      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+    }
+    dstinfo->density_unit = srcinfo->density_unit;
+    dstinfo->X_density = srcinfo->X_density;
+    dstinfo->Y_density = srcinfo->Y_density;
+  }
+}
+
+
+/*
+ * Master selection of compression modules for transcoding.
+ * This substitutes for jcinit.c's initialization of the full compressor.
+ */
+
+LOCAL(void)
+transencode_master_selection (j_compress_ptr cinfo,
+			      jvirt_barray_ptr * coef_arrays)
+{
+  /* Although we don't actually use input_components for transcoding,
+   * jcmaster.c's initial_setup will complain if input_components is 0.
+   */
+  cinfo->input_components = 1;
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, TRUE /* transcode only */);
+
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      jinit_phuff_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_encoder(cinfo);
+  }
+
+  /* We need a special coefficient buffer controller. */
+  transencode_coef_controller(cinfo, coef_arrays);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI, JFIF) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
+
+
+/*
+ * The rest of this file is a special implementation of the coefficient
+ * buffer controller.  This is similar to jccoefct.c, but it handles only
+ * output from presupplied virtual arrays.  Furthermore, we generate any
+ * dummy padding blocks on-the-fly rather than expecting them to be present
+ * in the arrays.
+ */
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* Virtual block array for each component. */
+  jvirt_barray_ptr * whole_image;
+
+  /* Workspace for constructing dummy blocks at right/bottom edges. */
+  JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  if (pass_mode != JBUF_CRANK_DEST)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Process some data.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, blockcnt;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yindex+yoffset < compptr->last_row_height) {
+	    /* Fill in pointers to real blocks in this row */
+	    buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	    for (xindex = 0; xindex < blockcnt; xindex++)
+	      MCU_buffer[blkn++] = buffer_ptr++;
+	  } else {
+	    /* At bottom of image, need a whole row of dummy blocks */
+	    xindex = 0;
+	  }
+	  /* Fill in any dummy blocks needed in this row.
+	   * Dummy blocks are filled in the same way as in jccoefct.c:
+	   * all zeroes in the AC entries, DC entries equal to previous
+	   * block's DC value.  The init routine has already zeroed the
+	   * AC entries, so we need only set the DC entries correctly.
+	   */
+	  for (; xindex < compptr->MCU_width; xindex++) {
+	    MCU_buffer[blkn] = coef->dummy_buffer[blkn];
+	    MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
+	    blkn++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Initialize coefficient buffer controller.
+ *
+ * Each passed coefficient array must be the right size for that
+ * coefficient: width_in_blocks wide and height_in_blocks high,
+ * with unitheight at least v_samp_factor.
+ */
+
+LOCAL(void)
+transencode_coef_controller (j_compress_ptr cinfo,
+			     jvirt_barray_ptr * coef_arrays)
+{
+  my_coef_ptr coef;
+  JBLOCKROW buffer;
+  int i;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
+  coef->pub.start_pass = start_pass_coef;
+  coef->pub.compress_data = compress_output;
+
+  /* Save pointer to virtual arrays */
+  coef->whole_image = coef_arrays;
+
+  /* Allocate and pre-zero space for dummy DCT blocks. */
+  buffer = (JBLOCKROW)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  jzero_far((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+    coef->dummy_buffer[i] = buffer + i;
+  }
+}
diff --git a/src/main/jni/libjpeg/jdapimin.c b/src/main/jni/libjpeg/jdapimin.c
new file mode 100644
index 000000000..5c9607ed5
--- /dev/null
+++ b/src/main/jni/libjpeg/jdapimin.c
@@ -0,0 +1,401 @@
+/*
+ * jdapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-decompression case or the
+ * transcoding-only case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
+ * shared by compression and decompression, and jdtrans.c for the transcoding
+ * case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG decompression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_decompress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = TRUE;
+  cinfo->tile_decode = FALSE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->src = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  /* Initialize marker processor so application can override methods
+   * for COM, APPn markers before calling jpeg_read_header.
+   */
+  cinfo->marker_list = NULL;
+  jinit_marker_reader(cinfo);
+
+  /* And initialize the overall input controller. */
+  jinit_input_controller(cinfo);
+
+  /* OK, I'm ready */
+  cinfo->global_state = DSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG decompression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG decompression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Set default decompression parameters.
+ */
+
+LOCAL(void)
+default_decompress_parms (j_decompress_ptr cinfo)
+{
+  /* Guess the input colorspace, and set output colorspace accordingly. */
+  /* (Wish JPEG committee had provided a real way to specify this...) */
+  /* Note application may override our guesses. */
+  switch (cinfo->num_components) {
+  case 1:
+    cinfo->jpeg_color_space = JCS_GRAYSCALE;
+    cinfo->out_color_space = JCS_GRAYSCALE;
+    break;
+    
+  case 3:
+    if (cinfo->saw_JFIF_marker) {
+      cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
+    } else if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_RGB;
+	break;
+      case 1:
+	cinfo->jpeg_color_space = JCS_YCbCr;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+	break;
+      }
+    } else {
+      /* Saw no special markers, try to guess from the component IDs */
+      int cid0 = cinfo->comp_info[0].component_id;
+      int cid1 = cinfo->comp_info[1].component_id;
+      int cid2 = cinfo->comp_info[2].component_id;
+
+      if (cid0 == 1 && cid1 == 2 && cid2 == 3)
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
+      else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
+	cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
+      else {
+	TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+      }
+    }
+    /* Always guess RGB is proper output colorspace. */
+    cinfo->out_color_space = JCS_RGB;
+    break;
+    
+  case 4:
+    if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_CMYK;
+	break;
+      case 2:
+	cinfo->jpeg_color_space = JCS_YCCK;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+	break;
+      }
+    } else {
+      /* No special markers, assume straight CMYK. */
+      cinfo->jpeg_color_space = JCS_CMYK;
+    }
+    cinfo->out_color_space = JCS_CMYK;
+    break;
+    
+  default:
+    cinfo->jpeg_color_space = JCS_UNKNOWN;
+    cinfo->out_color_space = JCS_UNKNOWN;
+    break;
+  }
+
+  /* Set defaults for other decompression parameters. */
+  cinfo->scale_num = 1;		/* 1:1 scaling */
+  cinfo->scale_denom = 1;
+  cinfo->output_gamma = 1.0;
+  cinfo->buffered_image = FALSE;
+  cinfo->raw_data_out = FALSE;
+  cinfo->dct_method = JDCT_DEFAULT;
+  cinfo->do_fancy_upsampling = TRUE;
+  cinfo->do_block_smoothing = TRUE;
+  cinfo->quantize_colors = FALSE;
+  /* We set these in case application only sets quantize_colors. */
+  cinfo->dither_mode = JDITHER_FS;
+#ifdef QUANT_2PASS_SUPPORTED
+  cinfo->two_pass_quantize = TRUE;
+#else
+  cinfo->two_pass_quantize = FALSE;
+#endif
+  cinfo->desired_number_of_colors = 256;
+  cinfo->colormap = NULL;
+  /* Initialize for no mode change in buffered-image mode. */
+  cinfo->enable_1pass_quant = FALSE;
+  cinfo->enable_external_quant = FALSE;
+  cinfo->enable_2pass_quant = FALSE;
+}
+
+
+/*
+ * Decompression startup: read start of JPEG datastream to see what's there.
+ * Need only initialize JPEG object and supply a data source before calling.
+ *
+ * This routine will read as far as the first SOS marker (ie, actual start of
+ * compressed data), and will save all tables and parameters in the JPEG
+ * object.  It will also initialize the decompression parameters to default
+ * values, and finally return JPEG_HEADER_OK.  On return, the application may
+ * adjust the decompression parameters and then call jpeg_start_decompress.
+ * (Or, if the application only wanted to determine the image parameters,
+ * the data need not be decompressed.  In that case, call jpeg_abort or
+ * jpeg_destroy to release any temporary space.)
+ * If an abbreviated (tables only) datastream is presented, the routine will
+ * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
+ * re-use the JPEG object to read the abbreviated image datastream(s).
+ * It is unnecessary (but OK) to call jpeg_abort in this case.
+ * The JPEG_SUSPENDED return code only occurs if the data source module
+ * requests suspension of the decompressor.  In this case the application
+ * should load more source data and then re-call jpeg_read_header to resume
+ * processing.
+ * If a non-suspending data source is used and require_image is TRUE, then the
+ * return code need not be inspected since only JPEG_HEADER_OK is possible.
+ *
+ * This routine is now just a front end to jpeg_consume_input, with some
+ * extra error checking.
+ */
+
+GLOBAL(int)
+jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
+{
+  int retcode;
+
+  if (cinfo->global_state != DSTATE_START &&
+      cinfo->global_state != DSTATE_INHEADER)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  retcode = jpeg_consume_input(cinfo);
+
+  switch (retcode) {
+  case JPEG_REACHED_SOS:
+    retcode = JPEG_HEADER_OK;
+    break;
+  case JPEG_REACHED_EOI:
+    if (require_image)		/* Complain if application wanted an image */
+      ERREXIT(cinfo, JERR_NO_IMAGE);
+    /* Reset to start state; it would be safer to require the application to
+     * call jpeg_abort, but we can't change it now for compatibility reasons.
+     * A side effect is to free any temporary memory (there shouldn't be any).
+     */
+    jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
+    retcode = JPEG_HEADER_TABLES_ONLY;
+    break;
+  case JPEG_SUSPENDED:
+    /* no work */
+    break;
+  }
+
+  return retcode;
+}
+
+
+/*
+ * Consume data in advance of what the decompressor requires.
+ * This can be called at any time once the decompressor object has
+ * been created and a data source has been set up.
+ *
+ * This routine is essentially a state machine that handles a couple
+ * of critical state-transition actions, namely initial setup and
+ * transition from header scanning to ready-for-start_decompress.
+ * All the actual input is done via the input controller's consume_input
+ * method.
+ */
+
+GLOBAL(int)
+jpeg_consume_input (j_decompress_ptr cinfo)
+{
+  int retcode = JPEG_SUSPENDED;
+
+  /* NB: every possible DSTATE value should be listed in this switch */
+  switch (cinfo->global_state) {
+  case DSTATE_START:
+    /* Start-of-datastream actions: reset appropriate modules */
+    (*cinfo->inputctl->reset_input_controller) (cinfo);
+    /* Initialize application's data source module */
+    (*cinfo->src->init_source) (cinfo);
+    cinfo->global_state = DSTATE_INHEADER;
+    /*FALLTHROUGH*/
+  case DSTATE_INHEADER:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
+      /* Set up default parameters based on header data */
+      default_decompress_parms(cinfo);
+      /* Set global state: ready for start_decompress */
+      cinfo->global_state = DSTATE_READY;
+    }
+    break;
+  case DSTATE_READY:
+    /* Can't advance past first SOS until start_decompress is called */
+    retcode = JPEG_REACHED_SOS;
+    break;
+  case DSTATE_PRELOAD:
+  case DSTATE_PRESCAN:
+  case DSTATE_SCANNING:
+  case DSTATE_RAW_OK:
+  case DSTATE_BUFIMAGE:
+  case DSTATE_BUFPOST:
+  case DSTATE_STOPPING:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    break;
+  default:
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  return retcode;
+}
+
+
+/*
+ * Have we finished reading the input file?
+ */
+
+GLOBAL(boolean)
+jpeg_input_complete (j_decompress_ptr cinfo)
+{
+  /* Check for valid jpeg object */
+  if (cinfo->global_state < DSTATE_START ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->eoi_reached;
+}
+
+
+/*
+ * Is there more than one scan?
+ */
+
+GLOBAL(boolean)
+jpeg_has_multiple_scans (j_decompress_ptr cinfo)
+{
+  /* Only valid after jpeg_read_header completes */
+  if (cinfo->global_state < DSTATE_READY ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->has_multiple_scans;
+}
+
+
+/*
+ * Finish JPEG decompression.
+ *
+ * This will normally just verify the file trailer and release temp storage.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_decompress (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
+    /* Terminate final pass of non-buffered mode */
+#ifdef ANDROID_TILE_BASED_DECODE
+    cinfo->output_scanline = cinfo->output_height;
+#endif
+    if (cinfo->output_scanline < cinfo->output_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
+    /* Finishing after a buffered-image operation */
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state != DSTATE_STOPPING) {
+    /* STOPPING = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read until EOI */
+#ifndef ANDROID_TILE_BASED_DECODE
+  while (! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+#endif
+  /* Do final cleanup */
+  (*cinfo->src->term_source) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+  return TRUE;
+}
diff --git a/src/main/jni/libjpeg/jdapistd.c b/src/main/jni/libjpeg/jdapistd.c
new file mode 100644
index 000000000..e1233df36
--- /dev/null
+++ b/src/main/jni/libjpeg/jdapistd.c
@@ -0,0 +1,397 @@
+/*
+ * jdapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-decompression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_decompress, it will end up linking in the entire decompressor.
+ * We thus must separate this file from jdapimin.c to avoid linking the
+ * whole decompression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * If a multipass operating mode was selected, this will do all but the
+ * last pass, and thus may take a great deal of time.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_start_decompress (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      /* No more work here; expecting jpeg_start_output next */
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    /* If file has multiple scans, absorb them all into the coef buffer */
+    if (cinfo->inputctl->has_multiple_scans) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+      for (;;) {
+	int retcode;
+	/* Call progress monitor hook if present */
+	if (cinfo->progress != NULL)
+	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+	/* Absorb some more input */
+	retcode = (*cinfo->inputctl->consume_input) (cinfo);
+	if (retcode == JPEG_SUSPENDED)
+	  return FALSE;
+	if (retcode == JPEG_REACHED_EOI)
+	  break;
+	/* Advance progress counter if appropriate */
+	if (cinfo->progress != NULL &&
+	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	    /* jdmaster underestimated number of scans; ratchet up one scan */
+	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	  }
+	}
+      }
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+    }
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
+/*
+ * Tile decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ */
+
+GLOBAL(boolean)
+jpeg_start_tile_decompress (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    cinfo->tile_decode = TRUE;
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Set up for an output pass, and perform any dummy pass(es) needed.
+ * Common subroutine for jpeg_start_decompress and jpeg_start_output.
+ * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
+ * Exit: If done, returns TRUE and sets global_state for proper output mode.
+ *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
+ */
+
+LOCAL(boolean)
+output_pass_setup (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state != DSTATE_PRESCAN) {
+    /* First call: do pass setup */
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+    cinfo->global_state = DSTATE_PRESCAN;
+  }
+  /* Loop over any required dummy passes */
+  while (cinfo->master->is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Crank through the dummy pass */
+    while (cinfo->output_scanline < cinfo->output_height) {
+      JDIMENSION last_scanline;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+	cinfo->progress->pass_limit = (long) cinfo->output_height;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* Process some data */
+      last_scanline = cinfo->output_scanline;
+      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
+				    &cinfo->output_scanline, (JDIMENSION) 0);
+      if (cinfo->output_scanline == last_scanline)
+	return FALSE;		/* No progress made, must suspend */
+    }
+    /* Finish up dummy pass, and set up for another one */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  }
+  /* Ready for application to drive output pass through
+   * jpeg_read_scanlines or jpeg_read_raw_data.
+   */
+  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
+  return TRUE;
+}
+
+
+/*
+ * Read some scanlines of data from the JPEG decompressor.
+ *
+ * The return value will be the number of lines actually read.
+ * This may be less than the number requested in several cases,
+ * including bottom of image, data source suspension, and operating
+ * modes that emit multiple scanlines at a time.
+ *
+ * Note: we warn about excess calls to jpeg_read_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * an oversize buffer (max_lines > scanlines remaining) is not an error.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+		     JDIMENSION max_lines)
+{
+  JDIMENSION row_ctr;
+
+  if (cinfo->global_state != DSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Process some data */
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
+  cinfo->output_scanline += row_ctr;
+  return row_ctr;
+}
+/*
+ * Initialize the jpeg decoder to decompressing a rectangle with size of (width, height)
+ * and its upper-left corner located at (start_x, start_y).
+ * Align start_x and start_y to multiplies of iMCU width and height, respectively.
+ * Also, the new reader position and sampled image size will be returned in
+ * (start_x, start_y) and (width, height), respectively.
+ */
+
+GLOBAL(void)
+jpeg_init_read_tile_scanline(j_decompress_ptr cinfo, huffman_index *index,
+		     int *start_x, int *start_y, int *width, int *height)
+{
+  // Calculates the boundary of iMCU
+  int lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  int lines_per_iMCU_col = cinfo->max_h_samp_factor * DCTSIZE;
+  int row_offset = *start_y / lines_per_iMCU_row;
+  int col_left_boundary = ((*start_x / lines_per_iMCU_col)
+            / index->MCU_sample_size) * index->MCU_sample_size;
+  int col_right_boundary =
+                  jdiv_round_up(*start_x + *width, lines_per_iMCU_col);
+
+  cinfo->coef->MCU_columns_to_skip =
+      *start_x / lines_per_iMCU_col - col_left_boundary;
+
+  *height = (*start_y - row_offset * lines_per_iMCU_row) + *height;
+  *start_x = col_left_boundary * lines_per_iMCU_col;
+  *start_y = row_offset * lines_per_iMCU_row;
+  cinfo->image_width = jmin(cinfo->original_image_width,
+          col_right_boundary * lines_per_iMCU_col) -
+          col_left_boundary * lines_per_iMCU_col;
+  cinfo->input_iMCU_row = row_offset;
+  cinfo->output_iMCU_row = row_offset;
+
+  // Updates JPEG decoder parameter
+  jinit_color_deconverter(cinfo);
+  jpeg_calc_output_dimensions(cinfo);
+  jinit_upsampler(cinfo);
+  (*cinfo->master->prepare_for_output_pass) (cinfo);
+  if (cinfo->progressive_mode)
+    (*cinfo->entropy->start_pass) (cinfo);
+  else
+    jpeg_decompress_per_scan_setup(cinfo);
+
+  int sample_size = DCTSIZE / cinfo->min_DCT_scaled_size;
+
+  *height = jdiv_round_up(*height, sample_size);
+  *width = cinfo->output_width;
+  cinfo->output_scanline = lines_per_iMCU_row * row_offset / sample_size;
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+  cinfo->inputctl->consume_input_build_huffman_index =
+      cinfo->coef->consume_data_build_huffman_index;
+  cinfo->entropy->index = index;
+  cinfo->input_iMCU_row = row_offset;
+  cinfo->output_iMCU_row = row_offset;
+  cinfo->coef->MCU_column_left_boundary = col_left_boundary;
+  cinfo->coef->MCU_column_right_boundary = col_right_boundary;
+  cinfo->coef->column_left_boundary =
+      col_left_boundary / index->MCU_sample_size;
+  cinfo->coef->column_right_boundary =
+      jdiv_round_up(col_right_boundary, index->MCU_sample_size);
+}
+
+/*
+ * Read a scanline from the current position.
+ *
+ * Return the number of lines actually read.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_tile_scanline (j_decompress_ptr cinfo, huffman_index *index,
+        JSAMPARRAY scanlines)
+{
+  // Calculates the boundary of iMCU
+  int lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  int lines_per_iMCU_col = cinfo->max_h_samp_factor * DCTSIZE;
+  int sample_size = DCTSIZE / cinfo->min_DCT_scaled_size;
+  JDIMENSION row_ctr = 0;
+
+  if (cinfo->progressive_mode) {
+    (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, 1);
+  } else {
+    if (cinfo->output_scanline % (lines_per_iMCU_row / sample_size) == 0) {
+      // Set the read head to the next iMCU row
+      int iMCU_row_offset = cinfo->output_scanline /
+            (lines_per_iMCU_row / sample_size);
+      int offset_data_col_position = cinfo->coef->MCU_column_left_boundary /
+            index->MCU_sample_size;
+      huffman_offset_data offset_data =
+          index->scan[0].offset[iMCU_row_offset][offset_data_col_position];
+      (*cinfo->entropy->configure_huffman_decoder) (cinfo, offset_data);
+    }
+    (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, 1);
+  }
+
+  cinfo->output_scanline += row_ctr;
+  return row_ctr;
+}
+
+/*
+ * Alternate entry point to read raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
+		    JDIMENSION max_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != DSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Verify that at least one iMCU row can be returned. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size;
+  if (max_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Decompress directly into user's buffer. */
+  if (! (*cinfo->coef->decompress_data) (cinfo, data))
+    return 0;			/* suspension forced, can do nothing more */
+
+  /* OK, we processed one iMCU row. */
+  cinfo->output_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
+
+
+/* Additional entry points for buffered-image mode. */
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Initialize for an output pass in buffered-image mode.
+ */
+
+GLOBAL(boolean)
+jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
+{
+  if (cinfo->global_state != DSTATE_BUFIMAGE &&
+      cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Limit scan number to valid range */
+  if (scan_number <= 0)
+    scan_number = 1;
+  if (cinfo->inputctl->eoi_reached &&
+      scan_number > cinfo->input_scan_number)
+    scan_number = cinfo->input_scan_number;
+  cinfo->output_scan_number = scan_number;
+  /* Perform any dummy output passes, and set up for the real pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Finish up after an output pass in buffered-image mode.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_output (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
+    /* Terminate this pass. */
+    /* We do not require the whole pass to have been completed. */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_BUFPOST;
+  } else if (cinfo->global_state != DSTATE_BUFPOST) {
+    /* BUFPOST = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read markers looking for SOS or EOI */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  cinfo->global_state = DSTATE_BUFIMAGE;
+  return TRUE;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jdatadst.c b/src/main/jni/libjpeg/jdatadst.c
new file mode 100644
index 000000000..a8f6fb0e0
--- /dev/null
+++ b/src/main/jni/libjpeg/jdatadst.c
@@ -0,0 +1,151 @@
+/*
+ * jdatadst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to a file (or any stdio stream).  While these routines
+ * are sufficient for most applications, some will want to use a different
+ * destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data destination object for stdio output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  FILE * outfile;		/* target stream */
+  JOCTET * buffer;		/* start of buffer */
+} my_destination_mgr;
+
+typedef my_destination_mgr * my_dest_ptr;
+
+#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  /* Allocate the output buffer --- it will be released when done with image */
+  dest->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  OUTPUT_BUF_SIZE * SIZEOF(JOCTET));
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_output_buffer (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
+      (size_t) OUTPUT_BUF_SIZE)
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
+
+  /* Write any data remaining in the buffer */
+  if (datacount > 0) {
+    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
+      ERREXIT(cinfo, JERR_FILE_WRITE);
+  }
+  fflush(dest->outfile);
+  /* Make sure we wrote the output file OK */
+  if (ferror(dest->outfile))
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+}
+
+
+/*
+ * Prepare for output to a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing compression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
+{
+  my_dest_ptr dest;
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same file without re-executing jpeg_stdio_dest.
+   * This makes it dangerous to use this manager and a different destination
+   * manager serially with the same JPEG object, because their private object
+   * sizes may be different.  Caveat programmer.
+   */
+  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_destination_mgr));
+  }
+
+  dest = (my_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_destination;
+  dest->pub.empty_output_buffer = empty_output_buffer;
+  dest->pub.term_destination = term_destination;
+  dest->outfile = outfile;
+}
diff --git a/src/main/jni/libjpeg/jdatasrc.c b/src/main/jni/libjpeg/jdatasrc.c
new file mode 100644
index 000000000..edc752bf5
--- /dev/null
+++ b/src/main/jni/libjpeg/jdatasrc.c
@@ -0,0 +1,212 @@
+/*
+ * jdatasrc.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from a file (or any stdio stream).  While these routines
+ * are sufficient for most applications, some will want to use a different
+ * source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data source object for stdio input */
+
+typedef struct {
+  struct jpeg_source_mgr pub;	/* public fields */
+
+  FILE * infile;		/* source stream */
+  JOCTET * buffer;		/* start of buffer */
+  boolean start_of_file;	/* have we gotten any data yet? */
+} my_source_mgr;
+
+typedef my_source_mgr * my_src_ptr;
+
+#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_source (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* We reset the empty-input-file flag for each image,
+   * but we don't clear the input buffer.
+   * This is correct behavior for reading a series of images from one source.
+   */
+  src->start_of_file = TRUE;
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_input_buffer (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+  size_t nbytes;
+
+  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
+
+  if (nbytes <= 0) {
+    if (src->start_of_file)	/* Treat empty input file as fatal error */
+      ERREXIT(cinfo, JERR_INPUT_EMPTY);
+    WARNMS(cinfo, JWRN_JPEG_EOF);
+    /* Insert a fake EOI marker */
+    src->buffer[0] = (JOCTET) 0xFF;
+    src->buffer[1] = (JOCTET) JPEG_EOI;
+    nbytes = 2;
+  }
+
+  src->pub.next_input_byte = src->buffer;
+  src->pub.bytes_in_buffer = nbytes;
+  src->start_of_file = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data (j_decompress_ptr cinfo, long num_bytes)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long) src->pub.bytes_in_buffer) {
+      num_bytes -= (long) src->pub.bytes_in_buffer;
+      (void) fill_input_buffer(cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->pub.next_input_byte += (size_t) num_bytes;
+    src->pub.bytes_in_buffer -= (size_t) num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing decompression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
+{
+  my_src_ptr src;
+
+  /* The source object and input buffer are made permanent so that a series
+   * of JPEG images can be read from the same file by calling jpeg_stdio_src
+   * only before the first one.  (If we discarded the buffer at the end of
+   * one image, we'd likely lose the start of the next one.)
+   * This makes it unsafe to use this manager and a different source
+   * manager serially with the same JPEG object.  Caveat programmer.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_source_mgr));
+    src = (my_src_ptr) cinfo->src;
+    src->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  INPUT_BUF_SIZE * SIZEOF(JOCTET));
+  }
+
+  src = (my_src_ptr) cinfo->src;
+  src->pub.init_source = init_source;
+  src->pub.fill_input_buffer = fill_input_buffer;
+  src->pub.skip_input_data = skip_input_data;
+  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->pub.term_source = term_source;
+  src->infile = infile;
+  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
+  src->pub.next_input_byte = NULL; /* until buffer loaded */
+}
diff --git a/src/main/jni/libjpeg/jdcoefct.c b/src/main/jni/libjpeg/jdcoefct.c
new file mode 100644
index 000000000..e6e95062c
--- /dev/null
+++ b/src/main/jni/libjpeg/jdcoefct.c
@@ -0,0 +1,1038 @@
+/*
+ * jdcoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for decompression.
+ * This controller is the top level of the JPEG decompressor proper.
+ * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
+ *
+ * In buffered-image mode, this controller is the interface between
+ * input-oriented processing and output-oriented processing.
+ * Also, the input side (only) is used when reading a file for transcoding.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* Block smoothing is only applicable for progressive JPEG, so: */
+#ifndef D_PROGRESSIVE_SUPPORTED
+#undef BLOCK_SMOOTHING_SUPPORTED
+#endif
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_coef_controller pub; /* public fields */
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* In single-pass modes, it's sufficient to buffer just one MCU.
+   * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
+   * and let the entropy decoder write into that workspace each time.
+   * (On 80x86, the workspace is FAR even though it's not really very big;
+   * this is to keep the module interfaces unchanged when a large coefficient
+   * buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays; it is used only by the input side.
+   */
+  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+#endif
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  /* When doing block smoothing, we latch coefficient Al values here */
+  int * coef_bits_latch;
+#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
+#endif
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+/* Forward declarations */
+METHODDEF(int) decompress_onepass
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+METHODDEF(int) decompress_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
+METHODDEF(int) decompress_smooth_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_decompress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row (input side) */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->input_iMCU_row = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Initialize for an output processing pass.
+ */
+
+METHODDEF(void)
+start_output_pass (j_decompress_ptr cinfo)
+{
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* If multipass, check to see whether to use block smoothing on this pass */
+  if (coef->pub.coef_arrays != NULL) {
+    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
+      coef->pub.decompress_data = decompress_smooth_data;
+    else
+      coef->pub.decompress_data = decompress_data;
+  }
+#endif
+  cinfo->output_iMCU_row = 0;
+}
+
+
+/*
+ * Decompress and return some data in the single-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(int)
+decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, useful_width;
+  JSAMPARRAY output_ptr;
+  JDIMENSION start_col, output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+#ifdef ANDROID_TILE_BASED_DECODE
+  if (cinfo->tile_decode) {
+    last_MCU_col =
+        (cinfo->coef->MCU_column_right_boundary -
+         cinfo->coef->MCU_column_left_boundary) - 1;
+  }
+#endif
+
+  /* Loop to process as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
+      if (MCU_col_num < coef->pub.MCU_columns_to_skip) {
+        (*cinfo->entropy->decode_mcu_discard_coef) (cinfo);
+        continue;
+      } else {
+        jzero_far((void FAR *) coef->MCU_buffer[0],
+		(size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
+        if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	  /* Suspension forced; update state counters and exit */
+	  coef->MCU_vert_offset = yoffset;
+	  coef->MCU_ctr = MCU_col_num;
+	  return JPEG_SUSPENDED;
+        }
+      }
+      /* Determine where data should go in output_buf and do the IDCT thing.
+       * We skip dummy blocks at the right and bottom edges (but blkn gets
+       * incremented past them!).  Note the inner loop relies on having
+       * allocated the MCU_buffer[] blocks sequentially.
+       */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	/* Don't bother to IDCT an uninteresting component. */
+	if (! compptr->component_needed) {
+	  blkn += compptr->MCU_blocks;
+	  continue;
+	}
+	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						    : compptr->last_col_width;
+	output_ptr = output_buf[compptr->component_index] +
+	  yoffset * compptr->DCT_scaled_size;
+	start_col = MCU_col_num * compptr->MCU_sample_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (cinfo->input_iMCU_row < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    output_col = start_col;
+	    for (xindex = 0; xindex < useful_width; xindex++) {
+	      (*inverse_DCT) (cinfo, compptr,
+		        (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
+		        output_ptr, output_col);
+	      output_col += compptr->DCT_scaled_size;
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  output_ptr += compptr->DCT_scaled_size;
+	}
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  cinfo->output_iMCU_row++;
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Dummy consume-input routine for single-pass operation.
+ */
+
+METHODDEF(int)
+dummy_consume_data (j_decompress_ptr cinfo)
+{
+  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
+}
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+/*
+ * Consume input data and store it in the full-image coefficient buffer.
+ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
+ * ie, v_samp_factor block rows for each component in the scan.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ */
+
+METHODDEF(int)
+consume_data (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       cinfo->tile_decode ? 0 : cinfo->input_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Note: entropy decoder expects buffer to be zeroed,
+     * but this is handled automatically by the memory manager
+     * because we requested a pre-zeroed array.
+     */
+  }
+  unsigned int MCUs_per_row = cinfo->MCUs_per_row;
+#ifdef ANDROID_TILE_BASED_DECODE
+  if (cinfo->tile_decode) {
+    int iMCU_width_To_MCU_width;
+    if (cinfo->comps_in_scan > 1) {
+      // Interleaved
+      iMCU_width_To_MCU_width = 1;
+    } else {
+      // Non-intervleaved
+      iMCU_width_To_MCU_width = cinfo->cur_comp_info[0]->h_samp_factor;
+    }
+    MCUs_per_row = jmin(MCUs_per_row,
+        (cinfo->coef->column_right_boundary - cinfo->coef->column_left_boundary)
+        * cinfo->entropy->index->MCU_sample_size * iMCU_width_To_MCU_width);
+  }
+#endif
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+   // configure huffman decoder
+#ifdef ANDROID_TILE_BASED_DECODE
+    if (cinfo->tile_decode) {
+      huffman_scan_header scan_header =
+            cinfo->entropy->index->scan[cinfo->input_scan_number];
+      int col_offset = cinfo->coef->column_left_boundary;
+      (*cinfo->entropy->configure_huffman_decoder) (cinfo,
+              scan_header.offset[cinfo->input_iMCU_row]
+              [col_offset + yoffset * scan_header.MCUs_per_row]);
+    }
+#endif
+
+    // zero all blocks
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < MCUs_per_row;
+          MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+#ifdef ANDROID_TILE_BASED_DECODE
+            if (cinfo->tile_decode && cinfo->input_scan_number == 0) {
+              // need to do pre-zero ourselves.
+              jzero_far((void FAR *) coef->MCU_buffer[blkn-1],
+                        (size_t) (SIZEOF(JBLOCK)));
+            }
+#endif
+          }
+        }
+      }
+
+
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+/*
+ * Consume input data and store it in the coefficient buffer.
+ * Read one fully interleaved MCU row ("iMCU" row) per call.
+ */
+
+METHODDEF(int)
+consume_data_multi_scan (j_decompress_ptr cinfo)
+{
+  huffman_index *index = cinfo->entropy->index;
+  int i, retcode, ci;
+  int mcu = cinfo->input_iMCU_row;
+  jinit_phuff_decoder(cinfo);
+  for (i = 0; i < index->scan_count; i++) {
+    (*cinfo->inputctl->finish_input_pass) (cinfo);
+    jset_input_stream_position(cinfo, index->scan[i].bitstream_offset);
+    cinfo->output_iMCU_row = mcu;
+    cinfo->unread_marker = 0;
+    // Consume SOS and DHT headers
+    retcode = (*cinfo->inputctl->consume_markers) (cinfo, index, i);
+    cinfo->input_iMCU_row = mcu;
+    cinfo->input_scan_number = i;
+    cinfo->entropy->index = index;
+    // Consume scan block data
+    consume_data(cinfo);
+  }
+  cinfo->input_iMCU_row = mcu + 1;
+  cinfo->input_scan_number = 0;
+  cinfo->output_scan_number = 0;
+  return JPEG_ROW_COMPLETED;
+}
+
+/*
+ * Same as consume_data, expect for saving the Huffman decode information
+ * - bitstream offset and DC coefficient to index.
+ */
+
+METHODDEF(int)
+consume_data_build_huffman_index_baseline (j_decompress_ptr cinfo,
+        huffman_index *index, int current_scan)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKROW buffer_ptr;
+
+  huffman_scan_header *scan_header = index->scan + current_scan;
+  scan_header->MCU_rows_per_iMCU_row = coef->MCU_rows_per_iMCU_row;
+
+  size_t allocate_size = coef->MCU_rows_per_iMCU_row
+      * jdiv_round_up(cinfo->MCUs_per_row, index->MCU_sample_size)
+      * sizeof(huffman_offset_data);
+  scan_header->offset[cinfo->input_iMCU_row] =
+        (huffman_offset_data*)malloc(allocate_size);
+  index->mem_used += allocate_size;
+
+  huffman_offset_data *offset_data = scan_header->offset[cinfo->input_iMCU_row];
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      // Record huffman bit offset
+      if (MCU_col_num % index->MCU_sample_size == 0) {
+        (*cinfo->entropy->get_huffman_decoder_configuration)
+                (cinfo, offset_data);
+        ++offset_data;
+      }
+
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu_discard_coef) (cinfo)) {
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+/*
+ * Same as consume_data, expect for saving the Huffman decode information
+ * - bitstream offset and DC coefficient to index.
+ */
+
+METHODDEF(int)
+consume_data_build_huffman_index_progressive (j_decompress_ptr cinfo,
+        huffman_index *index, int current_scan)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  int factor = 4; // maximum factor is 4.
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    factor = jmin(factor, cinfo->cur_comp_info[ci]->h_samp_factor);
+
+  int sample_size = index->MCU_sample_size * factor;
+  huffman_scan_header *scan_header = index->scan + current_scan;
+  scan_header->MCU_rows_per_iMCU_row = coef->MCU_rows_per_iMCU_row;
+  scan_header->MCUs_per_row = jdiv_round_up(cinfo->MCUs_per_row, sample_size);
+  scan_header->comps_in_scan = cinfo->comps_in_scan;
+
+  size_t allocate_size = coef->MCU_rows_per_iMCU_row
+      * scan_header->MCUs_per_row * sizeof(huffman_offset_data);
+  scan_header->offset[cinfo->input_iMCU_row] =
+        (huffman_offset_data*)malloc(allocate_size);
+  index->mem_used += allocate_size;
+
+  huffman_offset_data *offset_data = scan_header->offset[cinfo->input_iMCU_row];
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       0, // Only need one row buffer
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+  }
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* For each MCU, we loop through different color components.
+       * Then, for each color component we will get a list of pointers to DCT
+       * blocks in the virtual buffer.
+       */
+      blkn = 0; /* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        /* Get the list of pointers to DCT blocks in
+         * the virtual buffer in a color component of the MCU.
+         */
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+            if (cinfo->input_scan_number == 0) {
+              // need to do pre-zero by ourself.
+              jzero_far((void FAR *) coef->MCU_buffer[blkn-1],
+                        (size_t) (SIZEOF(JBLOCK)));
+            }
+          }
+        }
+      }
+      // Record huffman bit offset
+      if (MCU_col_num % sample_size == 0) {
+        (*cinfo->entropy->get_huffman_decoder_configuration)
+                (cinfo, offset_data);
+        ++offset_data;
+      }
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  (*cinfo->entropy->get_huffman_decoder_configuration)
+        (cinfo, &scan_header->prev_MCU_offset);
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+/*
+ * Decompress and return some data in the multi-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image.
+ */
+
+METHODDEF(int)
+decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num;
+  int ci, block_row, block_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number < cinfo->output_scan_number ||
+	 (cinfo->input_scan_number == cinfo->output_scan_number &&
+	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       cinfo->tile_decode ? 0 : cinfo->output_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    int width_in_blocks = compptr->width_in_blocks;
+    int start_block = 0;
+#if ANDROID_TILE_BASED_DECODE
+    if (cinfo->tile_decode) {
+      // width_in_blocks for a component depends on its h_samp_factor.
+      width_in_blocks = jmin(width_in_blocks,
+        (cinfo->coef->MCU_column_right_boundary -
+         cinfo->coef->MCU_column_left_boundary) *
+         compptr->h_samp_factor);
+      start_block = coef->pub.MCU_columns_to_skip *
+         compptr->h_samp_factor;
+   }
+#endif
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      output_col = start_block * compptr->DCT_scaled_size;
+      buffer_ptr += start_block;
+      for (block_num = start_block; block_num < width_in_blocks; block_num++) {
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
+			output_ptr, output_col);
+	buffer_ptr++;
+	output_col += compptr->DCT_scaled_size;
+      }
+      output_ptr += compptr->DCT_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+
+/*
+ * This code applies interblock smoothing as described by section K.8
+ * of the JPEG standard: the first 5 AC coefficients are estimated from
+ * the DC values of a DCT block and its 8 neighboring blocks.
+ * We apply smoothing only for progressive JPEG decoding, and only if
+ * the coefficients it can estimate are not yet known to full precision.
+ */
+
+/* Natural-order array positions of the first 5 zigzag-order coefficients */
+#define Q01_POS  1
+#define Q10_POS  8
+#define Q20_POS  16
+#define Q11_POS  9
+#define Q02_POS  2
+
+/*
+ * Determine whether block smoothing is applicable and safe.
+ * We also latch the current states of the coef_bits[] entries for the
+ * AC coefficients; otherwise, if the input side of the decompressor
+ * advances into a new scan, we might think the coefficients are known
+ * more accurately than they really are.
+ */
+
+LOCAL(boolean)
+smoothing_ok (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  boolean smoothing_useful = FALSE;
+  int ci, coefi;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtable;
+  int * coef_bits;
+  int * coef_bits_latch;
+
+  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
+    return FALSE;
+
+  /* Allocate latch area if not already done */
+  if (coef->coef_bits_latch == NULL)
+    coef->coef_bits_latch = (int *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components *
+				  (SAVED_COEFS * SIZEOF(int)));
+  coef_bits_latch = coef->coef_bits_latch;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* All components' quantization values must already be latched. */
+    if ((qtable = compptr->quant_table) == NULL)
+      return FALSE;
+    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+    if (qtable->quantval[0] == 0 ||
+	qtable->quantval[Q01_POS] == 0 ||
+	qtable->quantval[Q10_POS] == 0 ||
+	qtable->quantval[Q20_POS] == 0 ||
+	qtable->quantval[Q11_POS] == 0 ||
+	qtable->quantval[Q02_POS] == 0)
+      return FALSE;
+    /* DC values must be at least partly known for all components. */
+    coef_bits = cinfo->coef_bits[ci];
+    if (coef_bits[0] < 0)
+      return FALSE;
+    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
+    for (coefi = 1; coefi <= 5; coefi++) {
+      coef_bits_latch[coefi] = coef_bits[coefi];
+      if (coef_bits[coefi] != 0)
+	smoothing_useful = TRUE;
+    }
+    coef_bits_latch += SAVED_COEFS;
+  }
+
+  return smoothing_useful;
+}
+
+
+/*
+ * Variant of decompress_data for use when doing block smoothing.
+ */
+
+METHODDEF(int)
+decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num, last_block_column;
+  int ci, block_row, block_rows, access_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+  boolean first_row, last_row;
+  JBLOCK workspace;
+  int *coef_bits;
+  JQUANT_TBL *quanttbl;
+  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
+  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
+  int Al, pred;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if (cinfo->input_scan_number == cinfo->output_scan_number) {
+      /* If input is working on current scan, we ordinarily want it to
+       * have completed the current row.  But if input scan is DC,
+       * we want it to keep one row ahead so that next block row's DC
+       * values are up to date.
+       */
+      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
+	break;
+    }
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 2; /* this and next iMCU row */
+      last_row = FALSE;
+    } else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+      access_rows = block_rows; /* this iMCU row only */
+      last_row = TRUE;
+    }
+    /* Align the virtual buffer for this component. */
+    if (cinfo->output_iMCU_row > 0) {
+      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+	 (JDIMENSION) access_rows, FALSE);
+      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
+      first_row = FALSE;
+    } else {
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
+      first_row = TRUE;
+    }
+    /* Fetch component-dependent info */
+    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+    quanttbl = compptr->quant_table;
+    Q00 = quanttbl->quantval[0];
+    Q01 = quanttbl->quantval[Q01_POS];
+    Q10 = quanttbl->quantval[Q10_POS];
+    Q20 = quanttbl->quantval[Q20_POS];
+    Q11 = quanttbl->quantval[Q11_POS];
+    Q02 = quanttbl->quantval[Q02_POS];
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      if (first_row && block_row == 0)
+	prev_block_row = buffer_ptr;
+      else
+	prev_block_row = buffer[block_row-1];
+      if (last_row && block_row == block_rows-1)
+	next_block_row = buffer_ptr;
+      else
+	next_block_row = buffer[block_row+1];
+      /* We fetch the surrounding DC values using a sliding-register approach.
+       * Initialize all nine here so as to do the right thing on narrow pics.
+       */
+      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
+      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
+      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
+      output_col = 0;
+      last_block_column = compptr->width_in_blocks - 1;
+      for (block_num = 0; block_num <= last_block_column; block_num++) {
+	/* Fetch current DCT block into workspace so we can modify it. */
+	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+	/* Update DC values */
+	if (block_num < last_block_column) {
+	  DC3 = (int) prev_block_row[1][0];
+	  DC6 = (int) buffer_ptr[1][0];
+	  DC9 = (int) next_block_row[1][0];
+	}
+	/* Compute coefficient estimates per K.8.
+	 * An estimate is applied only if coefficient is still zero,
+	 * and is not known to be fully accurate.
+	 */
+	/* AC01 */
+	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+	  num = 36 * Q00 * (DC4 - DC6);
+	  if (num >= 0) {
+	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[1] = (JCOEF) pred;
+	}
+	/* AC10 */
+	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+	  num = 36 * Q00 * (DC2 - DC8);
+	  if (num >= 0) {
+	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[8] = (JCOEF) pred;
+	}
+	/* AC20 */
+	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[16] = (JCOEF) pred;
+	}
+	/* AC11 */
+	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+	  if (num >= 0) {
+	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[9] = (JCOEF) pred;
+	}
+	/* AC02 */
+	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[2] = (JCOEF) pred;
+	}
+	/* OK, do the IDCT */
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+			output_ptr, output_col);
+	/* Advance for next column */
+	DC1 = DC2; DC2 = DC3;
+	DC4 = DC5; DC5 = DC6;
+	DC7 = DC8; DC8 = DC9;
+	buffer_ptr++, prev_block_row++, next_block_row++;
+	output_col += compptr->DCT_scaled_size;
+      }
+      output_ptr += compptr->DCT_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* BLOCK_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_d_coef_controller *) coef;
+  coef->pub.start_input_pass = start_input_pass;
+  coef->pub.start_output_pass = start_output_pass;
+  coef->pub.column_left_boundary = 0;
+  coef->pub.column_right_boundary = 0;
+  coef->pub.MCU_columns_to_skip = 0;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  coef->coef_bits_latch = NULL;
+#endif
+
+#ifdef ANDROID_TILE_BASED_DECODE
+  if (cinfo->tile_decode) {
+    if (cinfo->progressive_mode) {
+      /* Allocate one iMCU row virtual array, coef->whole_image[ci],
+       * for each color component, padded to a multiple of h_samp_factor
+       * DCT blocks in the horizontal direction.
+       */
+      int ci, access_rows;
+      jpeg_component_info *compptr;
+
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+        access_rows = compptr->v_samp_factor;
+        coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+	   (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	   (JDIMENSION) compptr->v_samp_factor, // one iMCU row
+	   (JDIMENSION) access_rows);
+      }
+      coef->pub.consume_data_build_huffman_index =
+            consume_data_build_huffman_index_progressive;
+      coef->pub.consume_data = consume_data_multi_scan;
+      coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+      coef->pub.decompress_data = decompress_onepass;
+    } else {
+      /* We only need a single-MCU buffer. */
+      JBLOCKROW buffer;
+      int i;
+
+      buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+      for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
+        coef->MCU_buffer[i] = buffer + i;
+      }
+      coef->pub.consume_data_build_huffman_index =
+            consume_data_build_huffman_index_baseline;
+      coef->pub.consume_data = dummy_consume_data;
+      coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+      coef->pub.decompress_data = decompress_onepass;
+    }
+    return;
+  }
+#endif
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    /* Note we ask for a pre-zeroed array. */
+    int ci, access_rows;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      access_rows = compptr->v_samp_factor;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+      /* If block smoothing could be used, need a bigger window */
+      if (cinfo->progressive_mode)
+	access_rows *= 3;
+#endif
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) access_rows);
+    }
+    coef->pub.consume_data = consume_data;
+    coef->pub.decompress_data = decompress_data;
+    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->pub.consume_data = dummy_consume_data;
+    coef->pub.decompress_data = decompress_onepass;
+    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/src/main/jni/libjpeg/jdcolor.c b/src/main/jni/libjpeg/jdcolor.c
new file mode 100644
index 000000000..d9048ebc7
--- /dev/null
+++ b/src/main/jni/libjpeg/jdcolor.c
@@ -0,0 +1,899 @@
+/*
+ * jdcolor.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#ifdef NV_ARM_NEON
+#include "jsimd_neon.h"
+#endif
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_deconverter pub; /* public fields */
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+} my_color_deconverter;
+
+typedef my_color_deconverter * my_cconvert_ptr;
+
+
+#ifdef ANDROID_RGB
+
+/* Declarations for ordered dithering.
+ * 
+ * We use 4x4 ordered dither array packed into 32 bits. This array is
+ * sufficent for dithering RGB_888 to RGB_565.
+ */
+
+#define DITHER_MASK         0x3
+#define DITHER_ROTATE(x)    (((x)<<24) | (((x)>>8)&0x00FFFFFF))
+static const INT32 dither_matrix[4] = {
+  0x0008020A,
+  0x0C040E06,
+  0x030B0109,
+  0x0F070D05
+};
+
+#endif
+
+
+/**************** YCbCr -> RGB conversion: most common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *	R = Y                + 1.40200 * Cr
+ *	G = Y - 0.34414 * Cb - 0.71414 * Cr
+ *	B = Y + 1.77200 * Cb
+ * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ * Notice that Y, being an integral input, does not contribute any fraction
+ * so it need not participate in the rounding.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times Cb and Cr for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
+ * values for the G calculation are left scaled up, since we must add them
+ * together before rounding.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                (MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                (MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                (MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                (MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    cconvert->Cr_r_tab[i] = (int)
+                    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    cconvert->Cb_b_tab[i] = (int)
+                    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+ycc_rgb_convert (j_decompress_ptr cinfo,
+		 JSAMPIMAGE input_buf, JDIMENSION input_row,
+		 JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+                              ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                 SCALEBITS))];
+      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+#ifdef ANDROID_RGB
+METHODDEF(void)
+ycc_rgba_8888_convert (j_decompress_ptr cinfo,
+         JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+                              ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                 SCALEBITS))];
+      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      outptr[RGB_ALPHA] =  0xFF;
+      outptr += 4;
+    }
+  }
+}
+
+METHODDEF(void)
+ycc_rgb_565_convert (j_decompress_ptr cinfo,
+         JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int r, g, b;
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+        y  = GETJSAMPLE(*inptr0++);
+        cb = GETJSAMPLE(*inptr1++);
+        cr = GETJSAMPLE(*inptr2++);
+        r = range_limit[y + Crrtab[cr]];
+        g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb]+Crgtab[cr], SCALEBITS))];
+        b = range_limit[y + Cbbtab[cb]];
+        rgb = PACK_SHORT_565(r,g,b);
+        *(INT16*)outptr = rgb;
+        outptr += 2;
+        num_cols--;
+    }
+    for (col = 0; col < (num_cols>>1); col++) {
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb]+Crgtab[cr], SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_SHORT_565(r,g,b);
+
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb]+Crgtab[cr], SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols&1) {
+      y  = GETJSAMPLE(*inptr0);
+      cb = GETJSAMPLE(*inptr1);
+      cr = GETJSAMPLE(*inptr2);
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb]+Crgtab[cr], SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_SHORT_565(r,g,b);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+METHODDEF(void)
+ycc_rgb_565D_convert (j_decompress_ptr cinfo,
+         JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int r, g, b;
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+        y  = GETJSAMPLE(*inptr0++);
+        cb = GETJSAMPLE(*inptr1++);
+        cr = GETJSAMPLE(*inptr2++);
+        r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+        g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb]+Crgtab[cr], SCALEBITS)), d0)];
+        b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+        rgb = PACK_SHORT_565(r,g,b);
+        *(INT16*)outptr = rgb;
+        outptr += 2;
+        num_cols--;
+    }
+    for (col = 0; col < (num_cols>>1); col++) {
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb]+Crgtab[cr], SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_SHORT_565(r,g,b);
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb]+Crgtab[cr], SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols&1) {
+      y  = GETJSAMPLE(*inptr0);
+      cb = GETJSAMPLE(*inptr1);
+      cr = GETJSAMPLE(*inptr2);
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb]+Crgtab[cr], SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      rgb = PACK_SHORT_565(r,g,b);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+#endif
+
+/**************** Cases other than YCbCr -> RGB(A) **************/
+
+#ifdef ANDROID_RGB
+METHODDEF(void)
+rgb_rgba_8888_convert (j_decompress_ptr cinfo,
+         JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      *outptr++ = *inptr0++;
+      *outptr++ = *inptr1++;
+      *outptr++ = *inptr2++;
+      *outptr++ = 0xFF;
+    }
+  }
+}
+
+METHODDEF(void)
+rgb_rgb_565_convert (j_decompress_ptr cinfo,
+         JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int r, g, b;
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+        r = GETJSAMPLE(*inptr0++);
+        g = GETJSAMPLE(*inptr1++);
+        b = GETJSAMPLE(*inptr2++);
+        rgb = PACK_SHORT_565(r,g,b);
+        *(INT16*)outptr = rgb;
+        outptr += 2;
+        num_cols--;
+    }
+    for (col = 0; col < (num_cols>>1); col++) {
+      r = GETJSAMPLE(*inptr0++);
+      g = GETJSAMPLE(*inptr1++);
+      b = GETJSAMPLE(*inptr2++);
+      rgb = PACK_SHORT_565(r,g,b);
+      r = GETJSAMPLE(*inptr0++);
+      g = GETJSAMPLE(*inptr1++);
+      b = GETJSAMPLE(*inptr2++);
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols&1) {
+      r = GETJSAMPLE(*inptr0);
+      g = GETJSAMPLE(*inptr1);
+      b = GETJSAMPLE(*inptr2);
+      rgb = PACK_SHORT_565(r,g,b);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+
+METHODDEF(void)
+rgb_rgb_565D_convert (j_decompress_ptr cinfo,
+         JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  JDIMENSION num_cols = cinfo->output_width;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int r, g, b;
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+        r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+        g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+        b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+        rgb = PACK_SHORT_565(r,g,b);
+        *(INT16*)outptr = rgb;
+        outptr += 2;
+        num_cols--;
+    }
+    for (col = 0; col < (num_cols>>1); col++) {
+      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_SHORT_565(r,g,b);
+      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols&1) {
+      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)];
+      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)];
+      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)];
+      rgb = PACK_SHORT_565(r,g,b);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+#endif
+
+/*
+ * Color conversion for no colorspace change: just copy the data,
+ * converting from separate-planes to interleaved representation.
+ */
+
+METHODDEF(void)
+null_convert (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION input_row,
+	      JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION count;
+  register int num_components = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->output_width;
+  int ci;
+
+  while (--num_rows >= 0) {
+    for (ci = 0; ci < num_components; ci++) {
+      inptr = input_buf[ci][input_row];
+      outptr = output_buf[0] + ci;
+      for (count = num_cols; count > 0; count--) {
+	*outptr = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+	outptr += num_components;
+      }
+    }
+    input_row++;
+    output_buf++;
+  }
+}
+
+
+/*
+ * Color conversion for grayscale: just copy the data.
+ * This also works for YCbCr -> grayscale conversion, in which
+ * we just copy the Y (luminance) component and ignore chrominance.
+ */
+
+METHODDEF(void)
+grayscale_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
+		    num_rows, cinfo->output_width);
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+METHODDEF(void)
+gray_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+#ifdef ANDROID_RGB
+METHODDEF(void)
+gray_rgba_8888_convert (j_decompress_ptr cinfo,
+          JSAMPIMAGE input_buf, JDIMENSION input_row,
+          JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      outptr[RGB_ALPHA] = 0xff;
+      outptr += 4;
+    }
+  }
+}
+
+METHODDEF(void)
+gray_rgb_565_convert (j_decompress_ptr cinfo,
+          JSAMPIMAGE input_buf, JDIMENSION input_row,
+          JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int g;
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+        g = *inptr++;
+        rgb = PACK_SHORT_565(g, g, g);
+        *(INT16*)outptr = rgb;
+        outptr += 2;
+        num_cols--;
+    }
+    for (col = 0; col < (num_cols>>1); col++) {
+      g = *inptr++;
+      rgb = PACK_SHORT_565(g, g, g);
+      g = *inptr++;
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g));
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols&1) {
+      g = *inptr;
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+METHODDEF(void)
+gray_rgb_565D_convert (j_decompress_ptr cinfo,
+          JSAMPIMAGE input_buf, JDIMENSION input_row,
+          JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  JDIMENSION num_cols = cinfo->output_width;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int g;
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+        g = *inptr++;
+        g = range_limit[DITHER_565_R(g, d0)];
+        rgb = PACK_SHORT_565(g, g, g);
+        *(INT16*)outptr = rgb;
+        outptr += 2;
+        num_cols--;
+    }
+    for (col = 0; col < (num_cols>>1); col++) {
+      g = *inptr++;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_SHORT_565(g, g, g);
+      d0 = DITHER_ROTATE(d0);
+      g = *inptr++;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g));
+      d0 = DITHER_ROTATE(d0);
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols&1) {
+      g = *inptr;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+#endif
+
+/*
+ * Adobe-style YCCK->CMYK conversion.
+ * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume build_ycc_rgb_table has been called.
+ */
+
+METHODDEF(void)
+ycck_cmyk_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    inptr3 = input_buf[3][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];   /* red */
+      outptr[1] = range_limit[MAXJSAMPLE - (y +                 /* green */
+                              ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                 SCALEBITS)))];
+      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];   /* blue */
+      /* K passes through unchanged */
+      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr += 4;
+    }
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+start_pass_dcolor (j_decompress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for output colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_deconverter (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+  int ci;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_deconverter));
+  cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert;
+  cconvert->pub.start_pass = start_pass_dcolor;
+
+  /* Make sure num_components agrees with jpeg_color_space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->num_components < 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+  }
+
+  /* Set out_color_components and conversion method based on requested space.
+   * Also clear the component_needed flags for any unused components,
+   * so that earlier pipeline stages can avoid useless computation.
+   */
+
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
+	cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub.color_convert = grayscale_convert;
+      /* For color->grayscale conversion, only the Y (0) component is needed */
+      for (ci = 1; ci < cinfo->num_components; ci++)
+	cinfo->comp_info[ci].component_needed = FALSE;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+      cconvert->pub.color_convert = gray_rgb_convert;
+    } else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
+      cconvert->pub.color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+#ifdef ANDROID_RGB
+  case JCS_RGBA_8888:
+    cinfo->out_color_components = 4;
+    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+#if defined(NV_ARM_NEON) && defined(__ARM_HAVE_NEON)
+      if (cap_neon_ycc_rgb()) {
+        cconvert->pub.color_convert = jsimd_ycc_rgba8888_convert;
+      } else {
+        cconvert->pub.color_convert = ycc_rgba_8888_convert;
+      }
+#else
+      cconvert->pub.color_convert = ycc_rgba_8888_convert;
+#endif
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+      cconvert->pub.color_convert = gray_rgba_8888_convert;
+    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      cconvert->pub.color_convert = rgb_rgba_8888_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB_565:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    if (cinfo->dither_mode == JDITHER_NONE) {
+      if (cinfo->jpeg_color_space == JCS_YCbCr) {
+#if defined(NV_ARM_NEON) && defined(__ARM_HAVE_NEON)
+        if (cap_neon_ycc_rgb())  {
+          cconvert->pub.color_convert = jsimd_ycc_rgb565_convert;
+        } else {
+          cconvert->pub.color_convert = ycc_rgb_565_convert;
+        }
+#else
+        cconvert->pub.color_convert = ycc_rgb_565_convert;
+#endif
+        build_ycc_rgb_table(cinfo);
+      } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+        cconvert->pub.color_convert = gray_rgb_565_convert;
+      } else if (cinfo->jpeg_color_space == JCS_RGB) {
+        cconvert->pub.color_convert = rgb_rgb_565_convert;
+      } else
+        ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    } else {
+      /* only ordered dither is supported */
+      if (cinfo->jpeg_color_space == JCS_YCbCr) {
+        cconvert->pub.color_convert = ycc_rgb_565D_convert;
+        build_ycc_rgb_table(cinfo);
+      } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+        cconvert->pub.color_convert = gray_rgb_565D_convert;
+      } else if (cinfo->jpeg_color_space == JCS_RGB) {
+        cconvert->pub.color_convert = rgb_rgb_565D_convert;
+      } else
+        ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+#endif
+    
+  case JCS_CMYK:
+    cinfo->out_color_components = 4;
+    if (cinfo->jpeg_color_space == JCS_YCCK) {
+      cconvert->pub.color_convert = ycck_cmyk_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+      cconvert->pub.color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:
+    /* Permit null conversion to same output space */
+    if (cinfo->out_color_space == cinfo->jpeg_color_space) {
+      cinfo->out_color_components = cinfo->num_components;
+      cconvert->pub.color_convert = null_convert;
+    } else			/* unsupported non-null conversion */
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+  }
+
+  if (cinfo->quantize_colors)
+    cinfo->output_components = 1; /* single colormapped output component */
+  else
+    cinfo->output_components = cinfo->out_color_components;
+}
diff --git a/src/main/jni/libjpeg/jdct.h b/src/main/jni/libjpeg/jdct.h
new file mode 100644
index 000000000..d5d868f16
--- /dev/null
+++ b/src/main/jni/libjpeg/jdct.h
@@ -0,0 +1,180 @@
+/*
+ * jdct.h
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file contains common declarations for the forward and
+ * inverse DCT modules.  These declarations are private to the DCT managers
+ * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
+ * The individual DCT algorithms are kept in separate files to ease 
+ * machine-dependent tuning (e.g., assembly coding).
+ */
+
+
+/*
+ * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
+ * the DCT is to be performed in-place in that buffer.  Type DCTELEM is int
+ * for 8-bit samples, INT32 for 12-bit samples.  (NOTE: Floating-point DCT
+ * implementations use an array of type FAST_FLOAT, instead.)
+ * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE).
+ * The DCT outputs are returned scaled up by a factor of 8; they therefore
+ * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
+ * convention improves accuracy in integer implementations and saves some
+ * work in floating-point ones.
+ * Quantization of the output coefficients is done by jcdctmgr.c.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#ifdef ANDROID_MIPS_IDCT
+typedef short DCTELEM;		/* 16 or 32 bits is fine */
+#else
+typedef int DCTELEM;		/* 16 or 32 bits is fine */
+#endif
+#else
+typedef INT32 DCTELEM;		/* must have 32 bits */
+#endif
+
+typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
+typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
+
+
+/*
+ * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
+ * to an output sample array.  The routine must dequantize the input data as
+ * well as perform the IDCT; for dequantization, it uses the multiplier table
+ * pointed to by compptr->dct_table.  The output data is to be placed into the
+ * sample array starting at a specified column.  (Any row offset needed will
+ * be applied to the array pointer before it is passed to the IDCT code.)
+ * Note that the number of samples emitted by the IDCT routine is
+ * DCT_scaled_size * DCT_scaled_size.
+ */
+
+/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
+
+/*
+ * Each IDCT routine has its own ideas about the best dct_table element type.
+ */
+
+typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
+#if BITS_IN_JSAMPLE == 8
+typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
+#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
+#else
+typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
+#endif
+typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
+
+
+/*
+ * Each IDCT routine is responsible for range-limiting its results and
+ * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+ * be quite far out of range if the input data is corrupt, so a bulletproof
+ * range-limiting step is required.  We use a mask-and-table-lookup method
+ * to do the combined operations quickly.  See the comments with
+ * prepare_range_limit_table (in jdmaster.c) for more info.
+ */
+
+#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit + CENTERJSAMPLE)
+
+#define RANGE_MASK  (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_fdct_islow		jFDislow
+#define jpeg_fdct_ifast		jFDifast
+#define jpeg_fdct_float		jFDfloat
+#define jpeg_idct_islow		jRDislow
+#define jpeg_idct_ifast		jRDifast
+#define jpeg_idct_float		jRDfloat
+#define jpeg_idct_4x4		jRD4x4
+#define jpeg_idct_2x2		jRD2x2
+#define jpeg_idct_1x1		jRD1x1
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Extern declarations for the forward and inverse DCT routines. */
+
+EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data));
+
+EXTERN(void) jpeg_idct_islow
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_ifast
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_float
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+
+/*
+ * Macros for handling fixed-point arithmetic; these are used by many
+ * but not all of the DCT/IDCT modules.
+ *
+ * All values are expected to be of type INT32.
+ * Fractional constants are scaled left by CONST_BITS bits.
+ * CONST_BITS is defined within each module using these macros,
+ * and may differ from one module to the next.
+ */
+
+#define ONE	((INT32) 1)
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
+ * thus causing a lot of useless floating-point operations at run time.
+ */
+
+#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round an INT32 value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * This macro is used only when the two inputs will actually be no more than
+ * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
+ * full 32x32 multiply.  This provides a useful speedup on many machines.
+ * Unfortunately there is no way to specify a 16x16->32 multiply portably
+ * in C, but some C compilers will do the right thing if you provide the
+ * correct combination of casts.
+ */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
+#endif
+#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#endif
+
+#ifndef MULTIPLY16C16		/* default definition */
+#define MULTIPLY16C16(var,const)  ((var) * (const))
+#endif
+
+/* Same except both inputs are variables. */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
+#endif
+
+#ifndef MULTIPLY16V16		/* default definition */
+#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
+#endif
diff --git a/src/main/jni/libjpeg/jddctmgr.c b/src/main/jni/libjpeg/jddctmgr.c
new file mode 100644
index 000000000..980f8f316
--- /dev/null
+++ b/src/main/jni/libjpeg/jddctmgr.c
@@ -0,0 +1,383 @@
+/*
+ * jddctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the inverse-DCT management logic.
+ * This code selects a particular IDCT implementation to be used,
+ * and it performs related housekeeping chores.  No code in this file
+ * is executed per IDCT step, only during output pass setup.
+ *
+ * Note that the IDCT routines are responsible for performing coefficient
+ * dequantization as well as the IDCT proper.  This module sets up the
+ * dequantization multiplier table needed by the IDCT routine.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef ANDROID_ARMV6_IDCT
+  #undef ANDROID_ARMV6_IDCT
+  #ifdef __arm__
+    #include <machine/cpu-features.h>
+    #if __ARM_ARCH__ >= 6
+      #define ANDROID_ARMV6_IDCT
+    #else
+      #warning "ANDROID_ARMV6_IDCT is disabled"
+    #endif
+  #endif
+#endif
+
+#ifdef NV_ARM_NEON
+#include "jsimd_neon.h"
+#endif
+
+#ifdef ANDROID_ARMV6_IDCT
+
+/* Intentionally declare the prototype with arguments of primitive types instead
+ * of type-defined ones. This will at least generate some warnings if jmorecfg.h
+ * is changed and becomes incompatible with the assembly code.
+ */
+extern void armv6_idct(short *coefs, int *quans, unsigned char **rows, int col);
+
+void jpeg_idct_armv6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  IFAST_MULT_TYPE *dct_table = (IFAST_MULT_TYPE *)compptr->dct_table;
+  armv6_idct(coef_block, dct_table, output_buf, output_col);
+}
+
+#endif
+
+#ifdef ANDROID_INTELSSE2_IDCT
+extern short __attribute__((aligned(16))) quantptrSSE[DCTSIZE2];
+extern void jpeg_idct_intelsse (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col);
+#endif
+
+#ifdef ANDROID_MIPS_IDCT
+extern void jpeg_idct_mips(j_decompress_ptr, jpeg_component_info *, JCOEFPTR, JSAMPARRAY, JDIMENSION);
+#endif
+
+/*
+ * The decompressor input side (jdinput.c) saves away the appropriate
+ * quantization table for each component at the start of the first scan
+ * involving that component.  (This is necessary in order to correctly
+ * decode files that reuse Q-table slots.)
+ * When we are ready to make an output pass, the saved Q-table is converted
+ * to a multiplier table that will actually be used by the IDCT routine.
+ * The multiplier table contents are IDCT-method-dependent.  To support
+ * application changes in IDCT method between scans, we can remake the
+ * multiplier tables if necessary.
+ * In buffered-image mode, the first output pass may occur before any data
+ * has been seen for some components, and thus before their Q-tables have
+ * been saved away.  To handle this case, multiplier tables are preset
+ * to zeroes; the result of the IDCT will be a neutral gray level.
+ */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_inverse_dct pub;	/* public fields */
+
+  /* This array contains the IDCT method code that each multiplier table
+   * is currently set up for, or -1 if it's not yet set up.
+   * The actual multiplier tables are pointed to by dct_table in the
+   * per-component comp_info structures.
+   */
+  int cur_method[MAX_COMPONENTS];
+} my_idct_controller;
+
+typedef my_idct_controller * my_idct_ptr;
+
+
+/* Allocated multiplier tables: big enough for any supported variant */
+
+typedef union {
+  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
+#ifdef DCT_IFAST_SUPPORTED
+  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  FLOAT_MULT_TYPE float_array[DCTSIZE2];
+#endif
+} multiplier_table;
+
+
+/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef IDCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Prepare for an output pass.
+ * Here we select the proper IDCT routine for each component and build
+ * a matching multiplier table.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
+  int ci, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  inverse_DCT_method_ptr method_ptr = NULL;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper IDCT routine for this component's scaling */
+    switch (compptr->DCT_scaled_size) {
+#ifdef IDCT_SCALING_SUPPORTED
+    case 1:
+      method_ptr = jpeg_idct_1x1;
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+    case 2:
+#if defined(NV_ARM_NEON) && defined(__ARM_HAVE_NEON)
+      if (cap_neon_idct_2x2()) {
+        method_ptr = jsimd_idct_2x2;
+      } else {
+        method_ptr = jpeg_idct_2x2;
+      }
+#else
+      method_ptr = jpeg_idct_2x2;
+#endif
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+    case 4:
+#if defined(NV_ARM_NEON) && defined(__ARM_HAVE_NEON)
+	  if (cap_neon_idct_4x4()) {
+        method_ptr = jsimd_idct_4x4;
+      } else {
+        method_ptr = jpeg_idct_4x4;
+      }
+#else
+      method_ptr = jpeg_idct_4x4;
+#endif
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+#endif
+    case DCTSIZE:
+      switch (cinfo->dct_method) {
+#ifdef ANDROID_ARMV6_IDCT
+      case JDCT_ISLOW:
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_armv6;
+	method = JDCT_IFAST;
+	break;
+#else /* ANDROID_ARMV6_IDCT */
+#ifdef ANDROID_INTELSSE2_IDCT
+      case JDCT_ISLOW:
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_intelsse;
+	method = JDCT_ISLOW; /* Use quant table of ISLOW.*/
+	break;
+#else /* ANDROID_INTELSSE2_IDCT */
+#ifdef ANDROID_MIPS_IDCT
+      case JDCT_ISLOW:
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_mips;
+	method = JDCT_IFAST;
+	break;
+#else /* ANDROID_MIPS_IDCT */
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+	method_ptr = jpeg_idct_islow;
+	method = JDCT_ISLOW;
+	break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+#if defined(NV_ARM_NEON) && defined(__ARM_HAVE_NEON)
+        if (cap_neon_idct_ifast()) {
+          method_ptr = jsimd_idct_ifast;
+        } else {
+          method_ptr = jpeg_idct_ifast;
+        }
+#else
+        method_ptr = jpeg_idct_ifast;
+#endif
+	method = JDCT_IFAST;
+	break;
+#endif
+#endif /* ANDROID_MIPS_IDCT */
+#endif /* ANDROID_INTELSSE2_IDCT*/
+#endif /* ANDROID_ARMV6_IDCT */
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+	method_ptr = jpeg_idct_float;
+	method = JDCT_FLOAT;
+	break;
+#endif
+      default:
+	ERREXIT(cinfo, JERR_NOT_COMPILED);
+	break;
+      }
+      break;
+    default:
+      ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size);
+      break;
+    }
+    idct->pub.inverse_DCT[ci] = method_ptr;
+    /* Create multiplier table from quant table.
+     * However, we can skip this if the component is uninteresting
+     * or if we already built the table.  Also, if no quant table
+     * has yet been saved for the component, we leave the
+     * multiplier table all-zero; we'll be reading zeroes from the
+     * coefficient controller's buffer anyway.
+     */
+    if (! compptr->component_needed || idct->cur_method[ci] == method)
+      continue;
+    qtbl = compptr->quant_table;
+    if (qtbl == NULL)		/* happens if no data yet for component */
+      continue;
+    idct->cur_method[ci] = method;
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      {
+	/* For LL&M IDCT method, multipliers are equal to raw quantization
+	 * coefficients, but are stored as ints to ensure access efficiency.
+	 */
+	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
+	}
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * For integer operation, the multiplier table is to be scaled by
+	 * IFAST_SCALE_BITS.
+	 */
+	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
+#ifdef ANDROID_ARMV6_IDCT
+	/* Precomputed values scaled up by 15 bits. */
+	static const unsigned short scales[DCTSIZE2] = {
+	  32768, 45451, 42813, 38531, 32768, 25746, 17734,  9041,
+	  45451, 63042, 59384, 53444, 45451, 35710, 24598, 12540,
+	  42813, 59384, 55938, 50343, 42813, 33638, 23170, 11812,
+	  38531, 53444, 50343, 45308, 38531, 30274, 20853, 10631,
+	  32768, 45451, 42813, 38531, 32768, 25746, 17734,  9041,
+	  25746, 35710, 33638, 30274, 25746, 20228, 13933,  7103,
+	  17734, 24598, 23170, 20853, 17734, 13933,  9598,  4893,
+	   9041, 12540, 11812, 10631,  9041,  7103,  4893,  2494,
+	};
+	/* Inverse map of [7, 5, 1, 3, 0, 2, 4, 6]. */
+	static const char orders[DCTSIZE] = {4, 2, 5, 3, 6, 1, 7, 0};
+	/* Reorder the columns after transposing. */
+	for (i = 0; i < DCTSIZE2; ++i) {
+	  int j = ((i & 7) << 3) + orders[i >> 3];
+	  ifmtbl[j] = (qtbl->quantval[i] * scales[i] + 2) >> 2;
+	}
+#else /* ANDROID_ARMV6_IDCT */
+
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ifmtbl[i] = (IFAST_MULT_TYPE)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-IFAST_SCALE_BITS);
+	}
+#endif /* ANDROID_ARMV6_IDCT */
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 */
+	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fmtbl[i] = (FLOAT_MULT_TYPE)
+	      ((double) qtbl->quantval[i] *
+	       aanscalefactor[row] * aanscalefactor[col]);
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Initialize IDCT manager.
+ */
+
+GLOBAL(void)
+jinit_inverse_dct (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct;
+  int ci;
+  jpeg_component_info *compptr;
+
+  idct = (my_idct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_idct_controller));
+  cinfo->idct = (struct jpeg_inverse_dct *) idct;
+  idct->pub.start_pass = start_pass;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate and pre-zero a multiplier table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(multiplier_table));
+    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
+    /* Mark multiplier table not yet set up for any method */
+    idct->cur_method[ci] = -1;
+  }
+}
diff --git a/src/main/jni/libjpeg/jdhuff.c b/src/main/jni/libjpeg/jdhuff.c
new file mode 100644
index 000000000..bc5d4fdd2
--- /dev/null
+++ b/src/main/jni/libjpeg/jdhuff.c
@@ -0,0 +1,894 @@
+/*
+ * jdhuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdhuff.h"		/* Declarations shared with jdphuff.c */
+
+LOCAL(boolean) process_restart (j_decompress_ptr cinfo);
+
+
+/*
+ * Expanded entropy decoder object for Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Precalculated info set up by start_pass for use in decode_mcu: */
+
+  /* Pointers to derived tables to be used for each block within an MCU */
+  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  /* Whether we care about the DC and AC coefficient values for each block */
+  boolean dc_needed[D_MAX_BLOCKS_IN_MCU];
+  boolean ac_needed[D_MAX_BLOCKS_IN_MCU];
+} huff_entropy_decoder;
+
+typedef huff_entropy_decoder * huff_entropy_ptr;
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, blkn, dctbl, actbl;
+  jpeg_component_info * compptr;
+
+  /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+   * This ought to be an error condition, but we make it a warning because
+   * there are some baseline files out there with all zeroes in these bytes.
+   */
+  if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2-1 ||
+      cinfo->Ah != 0 || cinfo->Al != 0)
+    WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    /* Compute derived values for Huffman tables */
+    /* We may do this more than once for a table, but it's not expensive */
+    jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl,
+			    & entropy->dc_derived_tbls[dctbl]);
+    jpeg_make_d_derived_tbl(cinfo, FALSE, actbl,
+			    & entropy->ac_derived_tbls[actbl]);
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Precalculate decoding info for each block in an MCU of this scan */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    /* Precalculate which table to use for each block */
+    entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
+    entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
+    /* Decide whether we really care about the coefficient values */
+    if (compptr->component_needed) {
+      entropy->dc_needed[blkn] = TRUE;
+      /* we don't need the ACs if producing a 1/8th-size image */
+      entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1);
+    } else {
+      entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
+    }
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ *
+ * Note this is also used by jdphuff.c.
+ */
+
+GLOBAL(void)
+jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
+			 d_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  d_derived_tbl *dtbl;
+  int p, i, l, si, numsymbols;
+  int lookbits, ctr;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (d_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(d_derived_tbl));
+  dtbl = *pdtbl;
+  dtbl->pub = htbl;		/* fill in back link */
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  numsymbols = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+  
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+
+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    if (htbl->bits[l]) {
+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
+       * minus the minimum code of length l
+       */
+      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
+      p += htbl->bits[l];
+      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
+    } else {
+      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
+    }
+  }
+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
+
+  /* Compute lookahead tables to speed up decoding.
+   * First we set all the table entries to 0, indicating "too long";
+   * then we iterate through the Huffman codes that are short enough and
+   * fill in all the entries that correspond to bit sequences starting
+   * with that code.
+   */
+
+  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+
+  p = 0;
+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
+    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
+      /* Generate left-justified code followed by all possible bit sequences */
+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
+      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
+	dtbl->look_nbits[lookbits] = l;
+	dtbl->look_sym[lookbits] = htbl->huffval[p];
+	lookbits++;
+      }
+    }
+  }
+
+  /* Validate symbols as being reasonable.
+   * For AC tables, we make no check, but accept all byte values 0..255.
+   * For DC tables, we require the symbols to be in range 0..15.
+   * (Tighter bounds could be applied depending on the data depth and mode,
+   * but this is sufficient to ensure safe decoding.)
+   */
+  if (isDC) {
+    for (i = 0; i < numsymbols; i++) {
+      int sym = htbl->huffval[i];
+      if (sym < 0 || sym > 15)
+	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    }
+  }
+}
+
+
+/*
+ * Out-of-line code for bit fetching (shared with jdphuff.c).
+ * See jdhuff.h for info about usage.
+ * Note: current values of get_buffer and bits_left are passed as parameters,
+ * but are returned in the corresponding fields of the state struct.
+ *
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * quite slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
+ */
+
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15	/* minimum allowable value */
+#else
+#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
+#endif
+
+
+GLOBAL(boolean)
+jpeg_fill_bit_buffer (bitread_working_state * state,
+		      register bit_buf_type get_buffer, register int bits_left,
+		      int nbits)
+/* Load up the bit buffer to a depth of at least nbits */
+{
+  /* Copy heavily used state fields into locals (hopefully registers) */
+  register const JOCTET * next_input_byte = state->next_input_byte;
+  register size_t bytes_in_buffer = state->bytes_in_buffer;
+  j_decompress_ptr cinfo = state->cinfo;
+
+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
+  /* (It is assumed that no request will be for more than that many bits.) */
+  /* We fail to do so only if we hit a marker or are forced to suspend. */
+
+  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
+    while (bits_left < MIN_GET_BITS) {
+      register int c;
+
+      /* Attempt to read a byte */
+      if (bytes_in_buffer == 0) {
+	if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	  return FALSE;
+	next_input_byte = cinfo->src->next_input_byte;
+	bytes_in_buffer = cinfo->src->bytes_in_buffer;
+      }
+      bytes_in_buffer--;
+      c = GETJOCTET(*next_input_byte++);
+
+      /* If it's 0xFF, check and discard stuffed zero byte */
+      if (c == 0xFF) {
+	/* Loop here to discard any padding FF's on terminating marker,
+	 * so that we can save a valid unread_marker value.  NOTE: we will
+	 * accept multiple FF's followed by a 0 as meaning a single FF data
+	 * byte.  This data pattern is not valid according to the standard.
+	 */
+	do {
+	  if (bytes_in_buffer == 0) {
+	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	      return FALSE;
+	    next_input_byte = cinfo->src->next_input_byte;
+	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
+	  }
+	  bytes_in_buffer--;
+	  c = GETJOCTET(*next_input_byte++);
+	} while (c == 0xFF);
+
+	if (c == 0) {
+	  /* Found FF/00, which represents an FF data byte */
+	  c = 0xFF;
+	} else {
+	  /* Oops, it's actually a marker indicating end of compressed data.
+	   * Save the marker code for later use.
+	   * Fine point: it might appear that we should save the marker into
+	   * bitread working state, not straight into permanent state.  But
+	   * once we have hit a marker, we cannot need to suspend within the
+	   * current MCU, because we will read no more bytes from the data
+	   * source.  So it is OK to update permanent state right away.
+	   */
+	  cinfo->unread_marker = c;
+	  /* See if we need to insert some fake zero bits. */
+	  goto no_more_bytes;
+	}
+      }
+
+      /* OK, load c into get_buffer */
+      get_buffer = (get_buffer << 8) | c;
+      bits_left += 8;
+    } /* end while */
+  } else {
+  no_more_bytes:
+    /* We get here if we've read the marker that terminates the compressed
+     * data segment.  There should be enough bits in the buffer register
+     * to satisfy the request; if so, no problem.
+     */
+    if (nbits > bits_left) {
+      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
+       * the data stream, so that we can produce some kind of image.
+       * We use a nonvolatile flag to ensure that only one warning message
+       * appears per data segment.
+       */
+      if (! cinfo->entropy->insufficient_data) {
+	WARNMS(cinfo, JWRN_HIT_MARKER);
+	cinfo->entropy->insufficient_data = TRUE;
+      }
+      /* Fill the buffer with zero bits */
+      get_buffer <<= MIN_GET_BITS - bits_left;
+      bits_left = MIN_GET_BITS;
+    }
+  }
+
+  /* Unload the local registers */
+  state->next_input_byte = next_input_byte;
+  state->bytes_in_buffer = bytes_in_buffer;
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  return TRUE;
+}
+
+
+/*
+ * Out-of-line code for Huffman code decoding.
+ * See jdhuff.h for info about usage.
+ */
+
+GLOBAL(int)
+jpeg_huff_decode (bitread_working_state * state,
+		  register bit_buf_type get_buffer, register int bits_left,
+		  d_derived_tbl * htbl, int min_bits)
+{
+  register int l = min_bits;
+  register INT32 code;
+
+  /* HUFF_DECODE has determined that the code is at least min_bits */
+  /* bits long, so fetch that many bits in one swoop. */
+
+  CHECK_BIT_BUFFER(*state, l, return -1);
+  code = GET_BITS(l);
+
+  /* Collect the rest of the Huffman code one bit at a time. */
+  /* This is per Figure F.16 in the JPEG spec. */
+
+  while (code > htbl->maxcode[l]) {
+    code <<= 1;
+    CHECK_BIT_BUFFER(*state, 1, return -1);
+    code |= GET_BITS(1);
+    l++;
+  }
+
+  /* Unload the local registers */
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  /* With garbage input we may reach the sentinel value l = 17. */
+
+  if (l > 16) {
+    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
+    return 0;			/* fake a zero as the safest result */
+  }
+
+  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+
+#else
+
+#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] =   /* entry n is 2**(n-1) */
+  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+
+static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
+  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
+    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
+    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
+    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+/*
+ * Save the current Huffman deocde position and the DC coefficients
+ * for each component into bitstream_offset and dc_info[], respectively.
+ */
+METHODDEF(void)
+get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+        huffman_offset_data *offset)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  short int *dc_info = offset->prev_dc;
+  int i;
+  jpeg_get_huffman_decoder_configuration(cinfo, offset);
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    dc_info[i] = entropy->saved.last_dc_val[i];
+  }
+}
+
+/*
+ * Save the current Huffman decoder position and the bit buffer
+ * into bitstream_offset and get_buffer, respectively.
+ */
+GLOBAL(void)
+jpeg_get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+        huffman_offset_data *offset)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  if (cinfo->restart_interval) {
+    // We are at the end of a data segment
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return;
+  }
+
+  // Save restarts_to_go and next_restart_num
+  offset->restarts_to_go = (unsigned short) entropy->restarts_to_go;
+  offset->next_restart_num = cinfo->marker->next_restart_num;
+
+  offset->bitstream_offset =
+      (jget_input_stream_position(cinfo) << LOG_TWO_BIT_BUF_SIZE)
+      + entropy->bitstate.bits_left;
+
+  offset->get_buffer = entropy->bitstate.get_buffer;
+}
+
+/*
+ * Configure the Huffman decoder to decode the image
+ * starting from the bitstream position recorded in offset.
+ */
+METHODDEF(void)
+configure_huffman_decoder(j_decompress_ptr cinfo, huffman_offset_data offset)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  short int *dc_info = offset.prev_dc;
+  int i;
+  jpeg_configure_huffman_decoder(cinfo, offset);
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    entropy->saved.last_dc_val[i] = dc_info[i];
+  }
+}
+
+/*
+ * Configure the Huffman decoder reader position and bit buffer.
+ */
+GLOBAL(void)
+jpeg_configure_huffman_decoder(j_decompress_ptr cinfo,
+        huffman_offset_data offset)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  // Restore restarts_to_go and next_restart_num
+  cinfo->unread_marker = 0;
+  entropy->restarts_to_go = offset.restarts_to_go;
+  cinfo->marker->next_restart_num = offset.next_restart_num;
+
+  unsigned int bitstream_offset = offset.bitstream_offset;
+  int blkn, i;
+
+  unsigned int byte_offset = bitstream_offset >> LOG_TWO_BIT_BUF_SIZE;
+  unsigned int bit_in_bit_buffer =
+      bitstream_offset & ((1 << LOG_TWO_BIT_BUF_SIZE) - 1);
+
+  jset_input_stream_position_bit(cinfo, byte_offset,
+          bit_in_bit_buffer, offset.get_buffer);
+}
+
+/*
+ * Decode and return one MCU's worth of Huffman-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
+ * (Wholesale zeroing is usually a little faster than retail...)
+ *
+ * Returns FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * this module, since we'll just re-assign them on the next call.)
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+      d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+      register int s, k, r;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      if (entropy->dc_needed[blkn]) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	int ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
+	(*block)[0] = (JCOEF) s;
+      }
+
+      if (entropy->ac_needed[blkn]) {
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (k = 1; k < DCTSIZE2; k++) {
+	  HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
+      
+	  r = s >> 4;
+	  s &= 15;
+      
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in jpeg_natural_order[] will save us
+	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
+	     */
+	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      break;
+	    k += 15;
+	  }
+	}
+
+      } else {
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* In this path we just discard the values */
+	for (k = 1; k < DCTSIZE2; k++) {
+	  HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
+      
+	  r = s >> 4;
+	  s &= 15;
+      
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    DROP_BITS(s);
+	  } else {
+	    if (r != 15)
+	      break;
+	    k += 15;
+	  }
+	}
+
+      }
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+/*
+ * Decode one MCU's worth of Huffman-compressed coefficients.
+ * The propose of this method is to calculate the
+ * data length of one MCU in Huffman-coded format.
+ * Therefore, all coefficients are discarded.
+ */
+
+METHODDEF(boolean)
+decode_mcu_discard_coef (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+      d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+      register int s, k, r;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      /* discard all coefficients */
+      if (entropy->dc_needed[blkn]) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	int ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+      }
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
+
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          DROP_BITS(s);
+        } else {
+          if (r != 15)
+            break;
+          k += 15;
+        }
+      }
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
+  entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.decode_mcu = decode_mcu;
+  entropy->pub.decode_mcu_discard_coef = decode_mcu_discard_coef;
+  entropy->pub.configure_huffman_decoder = configure_huffman_decoder;
+  entropy->pub.get_huffman_decoder_configuration =
+        get_huffman_decoder_configuration;
+  entropy->pub.index = NULL;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+  }
+}
+
+/*
+ * Call after jpeg_read_header
+ */
+GLOBAL(void)
+jpeg_create_huffman_index(j_decompress_ptr cinfo, huffman_index *index)
+{
+  int i, s;
+  index->scan_count = 1;
+  index->total_iMCU_rows = cinfo->total_iMCU_rows;
+  index->scan = (huffman_scan_header*)malloc(index->scan_count
+          * sizeof(huffman_scan_header));
+  index->scan[0].offset = (huffman_offset_data**)malloc(cinfo->total_iMCU_rows
+          * sizeof(huffman_offset_data*));
+  index->scan[0].prev_MCU_offset.bitstream_offset = 0;
+  index->MCU_sample_size = DEFAULT_MCU_SAMPLE_SIZE;
+
+  index->mem_used = sizeof(huffman_scan_header)
+      + cinfo->total_iMCU_rows * sizeof(huffman_offset_data*);
+}
+
+GLOBAL(void)
+jpeg_destroy_huffman_index(huffman_index *index)
+{
+    int i, j;
+    for (i = 0; i < index->scan_count; i++) {
+        for(j = 0; j < index->total_iMCU_rows; j++) {
+            free(index->scan[i].offset[j]);
+        }
+        free(index->scan[i].offset);
+    }
+    free(index->scan);
+}
+
+/*
+ * Set the reader byte position to offset
+ */
+GLOBAL(void)
+jset_input_stream_position(j_decompress_ptr cinfo, int offset)
+{
+  if (cinfo->src->seek_input_data) {
+    cinfo->src->seek_input_data(cinfo, offset);
+  } else {
+    cinfo->src->bytes_in_buffer = cinfo->src->current_offset - offset;
+    cinfo->src->next_input_byte = cinfo->src->start_input_byte + offset;
+  }
+}
+
+/*
+ * Set the reader byte position to offset and bit position to bit_left
+ * with bit buffer set to buf.
+ */
+GLOBAL(void)
+jset_input_stream_position_bit(j_decompress_ptr cinfo,
+        int byte_offset, int bit_left, INT32 buf)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  entropy->bitstate.bits_left = bit_left;
+  entropy->bitstate.get_buffer = buf;
+
+  jset_input_stream_position(cinfo, byte_offset);
+}
+
+/*
+ * Get the current reader byte position.
+ */
+GLOBAL(int)
+jget_input_stream_position(j_decompress_ptr cinfo)
+{
+  return cinfo->src->current_offset - cinfo->src->bytes_in_buffer;
+}
diff --git a/src/main/jni/libjpeg/jdhuff.h b/src/main/jni/libjpeg/jdhuff.h
new file mode 100644
index 000000000..5760a134e
--- /dev/null
+++ b/src/main/jni/libjpeg/jdhuff.h
@@ -0,0 +1,202 @@
+/*
+ * jdhuff.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains declarations for Huffman entropy decoding routines
+ * that are shared between the sequential decoder (jdhuff.c) and the
+ * progressive decoder (jdphuff.c).  No other modules need to see these.
+ */
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_make_d_derived_tbl	jMkDDerived
+#define jpeg_fill_bit_buffer	jFilBitBuf
+#define jpeg_huff_decode	jHufDecode
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Derived data constructed for each Huffman table */
+
+#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
+
+typedef struct {
+  /* Basic tables: (element [0] of each array is unused) */
+  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
+  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
+  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
+  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
+   * the smallest code of length k; so given a code of length k, the
+   * corresponding symbol is huffval[code + valoffset[k]]
+   */
+
+  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
+  JHUFF_TBL *pub;
+
+  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+   * the input data stream.  If the next Huffman code is no more
+   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
+   * the corresponding symbol directly from these tables.
+   */
+  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
+  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+} d_derived_tbl;
+
+/* Expand a Huffman table definition into the derived format */
+EXTERN(void) jpeg_make_d_derived_tbl
+	JPP((j_decompress_ptr cinfo, boolean isDC, int tblno,
+	     d_derived_tbl ** pdtbl));
+
+
+/*
+ * Fetching the next N bits from the input stream is a time-critical operation
+ * for the Huffman decoders.  We implement it with a combination of inline
+ * macros and out-of-line subroutines.  Note that N (the number of bits
+ * demanded at one time) never exceeds 15 for JPEG use.
+ *
+ * We read source bytes into get_buffer and dole out bits as needed.
+ * If get_buffer already contains enough bits, they are fetched in-line
+ * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
+ * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
+ * as full as possible (not just to the number of bits needed; this
+ * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
+ * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
+ * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
+ * at least the requested number of bits --- dummy zeroes are inserted if
+ * necessary.
+ */
+
+typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32	/* size of buffer in bits */
+#define LOG_TWO_BIT_BUF_SIZE  5	/* log_2(BIT_BUF_SIZE) */
+
+/* If long is > 32 bits on your machine, and shifting/masking longs is
+ * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
+ * appropriately should be a win.  Unfortunately we can't define the size
+ * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
+ * because not all machines measure sizeof in 8-bit bytes.
+ */
+
+typedef struct {		/* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+} bitread_perm_state;
+
+typedef struct {		/* Bitreading working state within an MCU */
+  /* Current data source location */
+  /* We need a copy, rather than munging the original, in case of suspension */
+  const JOCTET * next_input_byte; /* => next byte to read from source */
+  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
+  /* Bit input buffer --- note these values are kept in register variables,
+   * not in this struct, inside the inner loops.
+   */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+  /* Pointer needed by jpeg_fill_bit_buffer. */
+  j_decompress_ptr cinfo;	/* back link to decompress master record */
+} bitread_working_state;
+
+/* Macros to declare and load/save bitread local variables. */
+#define BITREAD_STATE_VARS  \
+	register bit_buf_type get_buffer;  \
+	register int bits_left;  \
+	bitread_working_state br_state
+
+#define BITREAD_LOAD_STATE(cinfop,permstate)  \
+	br_state.cinfo = cinfop; \
+	br_state.next_input_byte = cinfop->src->next_input_byte; \
+	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+	get_buffer = permstate.get_buffer; \
+	bits_left = permstate.bits_left;
+
+#define BITREAD_SAVE_STATE(cinfop,permstate)  \
+	cinfop->src->next_input_byte = br_state.next_input_byte; \
+	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+	permstate.get_buffer = get_buffer; \
+	permstate.bits_left = bits_left
+
+/*
+ * These macros provide the in-line portion of bit fetching.
+ * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
+ * before using GET_BITS, PEEK_BITS, or DROP_BITS.
+ * The variables get_buffer and bits_left are assumed to be locals,
+ * but the state struct might not be (jpeg_huff_decode needs this).
+ *	CHECK_BIT_BUFFER(state,n,action);
+ *		Ensure there are N bits in get_buffer; if suspend, take action.
+ *      val = GET_BITS(n);
+ *		Fetch next N bits.
+ *      val = PEEK_BITS(n);
+ *		Fetch next N bits without removing them from the buffer.
+ *	DROP_BITS(n);
+ *		Discard next N bits.
+ * The value N should be a simple variable, not an expression, because it
+ * is evaluated multiple times.
+ */
+
+#define CHECK_BIT_BUFFER(state,nbits,action) \
+	{ if (bits_left < (nbits)) {  \
+	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
+	      { action; }  \
+	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
+
+#define GET_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1))
+
+#define PEEK_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -  (nbits)))) & ((1<<(nbits))-1))
+
+#define DROP_BITS(nbits) \
+	(bits_left -= (nbits))
+
+/* Load up the bit buffer to a depth of at least nbits */
+EXTERN(boolean) jpeg_fill_bit_buffer
+	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
+	     register int bits_left, int nbits));
+
+
+/*
+ * Code for extracting next Huffman-coded symbol from input bit stream.
+ * Again, this is time-critical and we make the main paths be macros.
+ *
+ * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
+ * without looping.  Usually, more than 95% of the Huffman codes will be 8
+ * or fewer bits long.  The few overlength codes are handled with a loop,
+ * which need not be inline code.
+ *
+ * Notes about the HUFF_DECODE macro:
+ * 1. Near the end of the data segment, we may fail to get enough bits
+ *    for a lookahead.  In that case, we do it the hard way.
+ * 2. If the lookahead table contains no entry, the next code must be
+ *    more than HUFF_LOOKAHEAD bits long.
+ * 3. jpeg_huff_decode returns -1 if forced to suspend.
+ */
+
+#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
+{ register int nb, look; \
+  if (bits_left < HUFF_LOOKAHEAD) { \
+    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+    if (bits_left < HUFF_LOOKAHEAD) { \
+      nb = 1; goto slowlabel; \
+    } \
+  } \
+  look = PEEK_BITS(HUFF_LOOKAHEAD); \
+  if ((nb = htbl->look_nbits[look]) != 0) { \
+    DROP_BITS(nb); \
+    result = htbl->look_sym[look]; \
+  } else { \
+    nb = HUFF_LOOKAHEAD+1; \
+slowlabel: \
+    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
+	{ failaction; } \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+  } \
+}
+
+/* Out-of-line case for Huffman code fetching */
+EXTERN(int) jpeg_huff_decode
+	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
+	     register int bits_left, d_derived_tbl * htbl, int min_bits));
diff --git a/src/main/jni/libjpeg/jdinput.c b/src/main/jni/libjpeg/jdinput.c
new file mode 100644
index 000000000..4261c1a1c
--- /dev/null
+++ b/src/main/jni/libjpeg/jdinput.c
@@ -0,0 +1,415 @@
+/*
+ * jdinput.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input control logic for the JPEG decompressor.
+ * These routines are concerned with controlling the decompressor's input
+ * processing (marker reading and coefficient decoding).  The actual input
+ * reading is done in jdmarker.c, jdhuff.c, and jdphuff.c.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_input_controller pub; /* public fields */
+
+  boolean inheaders;		/* TRUE until first SOS is reached */
+} my_input_controller;
+
+typedef my_input_controller * my_inputctl_ptr;
+
+
+/* Forward declarations */
+METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
+METHODDEF(int) consume_markers_with_huffman_index JPP((j_decompress_ptr cinfo,
+                    huffman_index *index, int current_scan));
+
+
+/*
+ * Routines to calculate various quantities related to the size of the image.
+ */
+
+LOCAL(void)
+initial_setup (j_decompress_ptr cinfo)
+/* Called once, when first SOS marker is reached */
+{
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE.
+   * In the full decompressor, this will be overridden by jdmaster.c;
+   * but in the transcoder, jdmaster.c is not used, so we must do it here.
+   */
+  cinfo->min_DCT_scaled_size = DCTSIZE;
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_scaled_size = DCTSIZE;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+    /* downsampled_width and downsampled_height will also be overridden by
+     * jdmaster.c if we are doing full decompression.  The transcoder library
+     * doesn't use these values, but the calling application might.
+     */
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed, until color conversion says otherwise */
+    compptr->component_needed = TRUE;
+    /* Mark no quantization table yet saved for component */
+    compptr->quant_table = NULL;
+  }
+
+  /* Compute number of fully interleaved MCU rows. */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+
+  /* Decide whether file contains multiple scans */
+  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
+    cinfo->inputctl->has_multiple_scans = TRUE;
+  else
+    cinfo->inputctl->has_multiple_scans = FALSE;
+  cinfo->original_image_width = cinfo->image_width;
+}
+
+LOCAL(void)
+per_scan_setup (j_decompress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->DCT_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height,
+		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+#ifdef ANDROID_TILE_BASED_DECODE
+      if (cinfo->tile_decode) {
+        tmp = (int) (jdiv_round_up(cinfo->image_width, 8)
+                % compptr->MCU_width);
+        if (tmp == 0) tmp = compptr->MCU_width;
+        compptr->last_col_width = tmp;
+      }
+#endif
+
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+}
+
+GLOBAL(void)
+jpeg_decompress_per_scan_setup(j_decompress_ptr cinfo)
+{
+    per_scan_setup(cinfo);
+}
+
+
+
+/*
+ * Save away a copy of the Q-table referenced by each component present
+ * in the current scan, unless already saved during a prior scan.
+ *
+ * In a multiple-scan JPEG file, the encoder could assign different components
+ * the same Q-table slot number, but change table definitions between scans
+ * so that each component uses a different Q-table.  (The IJG encoder is not
+ * currently capable of doing this, but other encoders might.)  Since we want
+ * to be able to dequantize all the components at the end of the file, this
+ * means that we have to save away the table actually used for each component.
+ * We do this by copying the table at the start of the first scan containing
+ * the component.
+ * The JPEG spec prohibits the encoder from changing the contents of a Q-table
+ * slot between scans of a component using that slot.  If the encoder does so
+ * anyway, this decoder will simply use the Q-table values that were current
+ * at the start of the first scan for the component.
+ *
+ * The decompressor output side looks only at the saved quant tables,
+ * not at the current Q-table slots.
+ */
+
+LOCAL(void)
+latch_quant_tables (j_decompress_ptr cinfo)
+{
+  int ci, qtblno;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* No work if we already saved Q-table for this component */
+    if (compptr->quant_table != NULL)
+      continue;
+    /* Make sure specified quantization table is present */
+    qtblno = compptr->quant_tbl_no;
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    /* OK, save away the quantization table */
+    qtbl = (JQUANT_TBL *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(JQUANT_TBL));
+    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
+    compptr->quant_table = qtbl;
+  }
+}
+
+
+/*
+ * Initialize the input modules to read a scan of compressed data.
+ * The first call to this is done by jdmaster.c after initializing
+ * the entire decompressor (during jpeg_start_decompress).
+ * Subsequent calls come from consume_markers, below.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  per_scan_setup(cinfo);
+  latch_quant_tables(cinfo);
+  (*cinfo->entropy->start_pass) (cinfo);
+  (*cinfo->coef->start_input_pass) (cinfo);
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+  cinfo->inputctl->consume_input_build_huffman_index =
+        cinfo->coef->consume_data_build_huffman_index;
+}
+
+
+/*
+ * Finish up after inputting a compressed-data scan.
+ * This is called by the coefficient controller after it's read all
+ * the expected data of the scan.
+ */
+
+METHODDEF(void)
+finish_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->inputctl->consume_input = consume_markers;
+  cinfo->inputctl->consume_input_build_huffman_index =
+        consume_markers_with_huffman_index;
+}
+
+
+METHODDEF(int)
+consume_markers_with_huffman_index (j_decompress_ptr cinfo,
+        huffman_index *index, int current_scan)
+{
+    return consume_markers(cinfo);
+}
+/*
+ * Read JPEG markers before, between, or after compressed-data scans.
+ * Change state as necessary when a new scan is reached.
+ * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * The consume_input method pointer points either here or to the
+ * coefficient controller's consume_data routine, depending on whether
+ * we are reading a compressed data segment or inter-segment markers.
+ */
+
+METHODDEF(int)
+consume_markers (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+  int val;
+
+  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
+    return JPEG_REACHED_EOI;
+
+  val = (*cinfo->marker->read_markers) (cinfo);
+
+  switch (val) {
+  case JPEG_REACHED_SOS:	/* Found SOS */
+    if (inputctl->inheaders) {	/* 1st SOS */
+      initial_setup(cinfo);
+      inputctl->inheaders = FALSE;
+      /* Note: start_input_pass must be called by jdmaster.c
+       * before any more input can be consumed.  jdapimin.c is
+       * responsible for enforcing this sequencing.
+       */
+    } else {			/* 2nd or later SOS marker */
+      if (! inputctl->pub.has_multiple_scans)
+	ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+      start_input_pass(cinfo);
+    }
+    break;
+  case JPEG_REACHED_EOI:	/* Found EOI */
+    inputctl->pub.eoi_reached = TRUE;
+    if (inputctl->inheaders) {	/* Tables-only datastream, apparently */
+      if (cinfo->marker->saw_SOF)
+	ERREXIT(cinfo, JERR_SOF_NO_SOS);
+    } else {
+      /* Prevent infinite loop in coef ctlr's decompress_data routine
+       * if user set output_scan_number larger than number of scans.
+       */
+      if (cinfo->output_scan_number > cinfo->input_scan_number)
+	cinfo->output_scan_number = cinfo->input_scan_number;
+    }
+    break;
+  case JPEG_SUSPENDED:
+    break;
+  }
+
+  return val;
+}
+
+
+/*
+ * Reset state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.consume_input_build_huffman_index =
+        consume_markers_with_huffman_index;
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = TRUE;
+  /* Reset other modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->marker->reset_marker_reader) (cinfo);
+  /* Reset progression state -- would be cleaner if entropy decoder did this */
+  cinfo->coef_bits = NULL;
+}
+
+
+/*
+ * Initialize the input controller module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl;
+
+  /* Create subobject in permanent pool */
+  inputctl = (my_inputctl_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_input_controller));
+  cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
+  /* Initialize method pointers */
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.reset_input_controller = reset_input_controller;
+  inputctl->pub.start_input_pass = start_input_pass;
+  inputctl->pub.finish_input_pass = finish_input_pass;
+
+  inputctl->pub.consume_markers = consume_markers_with_huffman_index;
+  inputctl->pub.consume_input_build_huffman_index =
+        consume_markers_with_huffman_index;
+  /* Initialize state: can't use reset_input_controller since we don't
+   * want to try to reset other modules yet.
+   */
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = TRUE;
+}
diff --git a/src/main/jni/libjpeg/jdmainct.c b/src/main/jni/libjpeg/jdmainct.c
new file mode 100644
index 000000000..13c956f5d
--- /dev/null
+++ b/src/main/jni/libjpeg/jdmainct.c
@@ -0,0 +1,512 @@
+/*
+ * jdmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for decompression.
+ * The main buffer lies between the JPEG decompressor proper and the
+ * post-processor; it holds downsampled data in the JPEG colorspace.
+ *
+ * Note that this code is bypassed in raw-data mode, since the application
+ * supplies the equivalent of the main buffer in that case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * In the current system design, the main buffer need never be a full-image
+ * buffer; any full-height buffers will be found inside the coefficient or
+ * postprocessing controllers.  Nonetheless, the main controller is not
+ * trivial.  Its responsibility is to provide context rows for upsampling/
+ * rescaling, and doing this in an efficient fashion is a bit tricky.
+ *
+ * Postprocessor input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  (We require DCT_scaled_size values to be
+ * chosen such that these numbers are integers.  In practice DCT_scaled_size
+ * values will likely be powers of two, so we actually have the stronger
+ * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
+ * Upsampling will typically produce max_v_samp_factor pixel rows from each
+ * row group (times any additional scale factor that the upsampler is
+ * applying).
+ *
+ * The coefficient controller will deliver data to us one iMCU row at a time;
+ * each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
+ * exactly min_DCT_scaled_size row groups.  (This amount of data corresponds
+ * to one row of MCUs when the image is fully interleaved.)  Note that the
+ * number of sample rows varies across components, but the number of row
+ * groups does not.  Some garbage sample rows may be included in the last iMCU
+ * row at the bottom of the image.
+ *
+ * Depending on the vertical scaling algorithm used, the upsampler may need
+ * access to the sample row(s) above and below its current input row group.
+ * The upsampler is required to set need_context_rows TRUE at global selection
+ * time if so.  When need_context_rows is FALSE, this controller can simply
+ * obtain one iMCU row at a time from the coefficient controller and dole it
+ * out as row groups to the postprocessor.
+ *
+ * When need_context_rows is TRUE, this controller guarantees that the buffer
+ * passed to postprocessing contains at least one row group's worth of samples
+ * above and below the row group(s) being processed.  Note that the context
+ * rows "above" the first passed row group appear at negative row offsets in
+ * the passed buffer.  At the top and bottom of the image, the required
+ * context rows are manufactured by duplicating the first or last real sample
+ * row; this avoids having special cases in the upsampling inner loops.
+ *
+ * The amount of context is fixed at one row group just because that's a
+ * convenient number for this controller to work with.  The existing
+ * upsamplers really only need one sample row of context.  An upsampler
+ * supporting arbitrary output rescaling might wish for more than one row
+ * group of context when shrinking the image; tough, we don't handle that.
+ * (This is justified by the assumption that downsizing will be handled mostly
+ * by adjusting the DCT_scaled_size values, so that the actual scale factor at
+ * the upsample step needn't be much less than one.)
+ *
+ * To provide the desired context, we have to retain the last two row groups
+ * of one iMCU row while reading in the next iMCU row.  (The last row group
+ * can't be processed until we have another row group for its below-context,
+ * and so we have to save the next-to-last group too for its above-context.)
+ * We could do this most simply by copying data around in our buffer, but
+ * that'd be very slow.  We can avoid copying any data by creating a rather
+ * strange pointer structure.  Here's how it works.  We allocate a workspace
+ * consisting of M+2 row groups (where M = min_DCT_scaled_size is the number
+ * of row groups per iMCU row).  We create two sets of redundant pointers to
+ * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
+ * pointer lists look like this:
+ *                   M+1                          M-1
+ * master pointer --> 0         master pointer --> 0
+ *                    1                            1
+ *                   ...                          ...
+ *                   M-3                          M-3
+ *                   M-2                           M
+ *                   M-1                          M+1
+ *                    M                           M-2
+ *                   M+1                          M-1
+ *                    0                            0
+ * We read alternate iMCU rows using each master pointer; thus the last two
+ * row groups of the previous iMCU row remain un-overwritten in the workspace.
+ * The pointer lists are set up so that the required context rows appear to
+ * be adjacent to the proper places when we pass the pointer lists to the
+ * upsampler.
+ *
+ * The above pictures describe the normal state of the pointer lists.
+ * At top and bottom of the image, we diddle the pointer lists to duplicate
+ * the first or last sample row as necessary (this is cheaper than copying
+ * sample rows around).
+ *
+ * This scheme breaks down if M < 2, ie, min_DCT_scaled_size is 1.  In that
+ * situation each iMCU row provides only one row group so the buffering logic
+ * must be different (eg, we must read two iMCU rows before we can emit the
+ * first row group).  For now, we simply do not support providing context
+ * rows when min_DCT_scaled_size is 1.  That combination seems unlikely to
+ * be worth providing --- if someone wants a 1/8th-size preview, they probably
+ * want it quick and dirty, so a context-free upsampler is sufficient.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_main_controller pub; /* public fields */
+
+  /* Pointer to allocated workspace (M or M+2 row groups). */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
+  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+
+  /* Remaining fields are only used in the context case. */
+
+  /* These are the master pointers to the funny-order pointer lists. */
+  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
+
+  int whichptr;			/* indicates which pointer set is now in use */
+  int context_state;		/* process_data state machine status */
+  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
+  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+/* context_state values: */
+#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+METHODDEF(void) process_data_context_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) process_data_crank_post
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#endif
+
+
+LOCAL(void)
+alloc_funny_pointers (j_decompress_ptr cinfo)
+/* Allocate space for the funny pointer lists.
+ * This is done only once, not once per pass.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  /* Get top-level space for component array pointers.
+   * We alloc both arrays with one call to save a few cycles.
+   */
+  main->xbuffer[0] = (JSAMPIMAGE)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
+  main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    /* Get space for pointer lists --- M+4 row groups in each list.
+     * We alloc both pointer lists with one call to save a few cycles.
+     */
+    xbuf = (JSAMPARRAY)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
+    xbuf += rgroup;		/* want one row group at negative offsets */
+    main->xbuffer[0][ci] = xbuf;
+    xbuf += rgroup * (M + 4);
+    main->xbuffer[1][ci] = xbuf;
+  }
+}
+
+
+LOCAL(void)
+make_funny_pointers (j_decompress_ptr cinfo)
+/* Create the funny pointer lists discussed in the comments above.
+ * The actual workspace is already allocated (in main->buffer),
+ * and the space for the pointer lists is allocated too.
+ * This routine just fills in the curiously ordered lists.
+ * This will be repeated at the beginning of each pass.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY buf, xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main->xbuffer[0][ci];
+    xbuf1 = main->xbuffer[1][ci];
+    /* First copy the workspace pointers as-is */
+    buf = main->buffer[ci];
+    for (i = 0; i < rgroup * (M + 2); i++) {
+      xbuf0[i] = xbuf1[i] = buf[i];
+    }
+    /* In the second list, put the last four row groups in swapped order */
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
+      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
+    }
+    /* The wraparound pointers at top and bottom will be filled later
+     * (see set_wraparound_pointers, below).  Initially we want the "above"
+     * pointers to duplicate the first actual data line.  This only needs
+     * to happen in xbuffer[0].
+     */
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[0];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_wraparound_pointers (j_decompress_ptr cinfo)
+/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
+ * This changes the pointer list state from top-of-image to the normal state.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main->xbuffer[0][ci];
+    xbuf1 = main->xbuffer[1][ci];
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
+      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
+      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
+      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_bottom_pointers (j_decompress_ptr cinfo)
+/* Change the pointer lists to duplicate the last sample row at the bottom
+ * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup, iMCUheight, rows_left;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Count sample rows in one iMCU row and in one row group */
+    iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size;
+    rgroup = iMCUheight / cinfo->min_DCT_scaled_size;
+    /* Count nondummy sample rows remaining for this component */
+    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
+    if (rows_left == 0) rows_left = iMCUheight;
+    /* Count nondummy row groups.  Should get same answer for each component,
+     * so we need only do it once.
+     */
+    if (ci == 0) {
+      main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
+    }
+    /* Duplicate the last real sample row rgroup*2 times; this pads out the
+     * last partial rowgroup and ensures at least one full rowgroup of context.
+     */
+    xbuf = main->xbuffer[main->whichptr][ci];
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf[rows_left + i] = xbuf[rows_left-1];
+    }
+  }
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->upsample->need_context_rows) {
+      main->pub.process_data = process_data_context_main;
+      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
+      main->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
+      main->context_state = CTX_PREPARE_FOR_IMCU;
+      main->iMCU_row_ctr = 0;
+    } else {
+      /* Simple case with no context needed */
+      main->pub.process_data = process_data_simple_main;
+    }
+    main->buffer_full = FALSE;	/* Mark buffer empty */
+    main->rowgroup_ctr = 0;
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_CRANK_DEST:
+    /* For last pass of 2-pass quantization, just crank the postprocessor */
+    main->pub.process_data = process_data_crank_post;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the simple case where no context is required.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_decompress_ptr cinfo,
+			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			  JDIMENSION out_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  JDIMENSION rowgroups_avail;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! main->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer))
+      return;			/* suspension forced, can do nothing more */
+    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+  }
+
+  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
+  rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size;
+  /* Note: at the bottom of the image, we may pass extra garbage row groups
+   * to the postprocessor.  The postprocessor has to check for bottom
+   * of image anyway (at row resolution), so no point in us doing it too.
+   */
+
+  /* Feed the postprocessor */
+  (*cinfo->post->post_process_data) (cinfo, main->buffer,
+				     &main->rowgroup_ctr, rowgroups_avail,
+				     output_buf, out_row_ctr, out_rows_avail);
+
+  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
+  if (main->rowgroup_ctr >= rowgroups_avail) {
+    main->buffer_full = FALSE;
+    main->rowgroup_ctr = 0;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the case where context rows must be provided.
+ */
+
+METHODDEF(void)
+process_data_context_main (j_decompress_ptr cinfo,
+			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! main->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo,
+					   main->xbuffer[main->whichptr]))
+      return;			/* suspension forced, can do nothing more */
+    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    main->iMCU_row_ctr++;	/* count rows received */
+  }
+
+  /* Postprocessor typically will not swallow all the input data it is handed
+   * in one call (due to filling the output buffer first).  Must be prepared
+   * to exit and restart.  This switch lets us keep track of how far we got.
+   * Note that each case falls through to the next on successful completion.
+   */
+  switch (main->context_state) {
+  case CTX_POSTPONED_ROW:
+    /* Call postprocessor using previously set pointers for postponed row */
+    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
+			&main->rowgroup_ctr, main->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (main->rowgroup_ctr < main->rowgroups_avail)
+      return;			/* Need to suspend */
+    main->context_state = CTX_PREPARE_FOR_IMCU;
+    if (*out_row_ctr >= out_rows_avail)
+      return;			/* Postprocessor exactly filled output buf */
+    /*FALLTHROUGH*/
+  case CTX_PREPARE_FOR_IMCU:
+    /* Prepare to process first M-1 row groups of this iMCU row */
+    main->rowgroup_ctr = 0;
+    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1);
+    /* Check for bottom of image: if so, tweak pointers to "duplicate"
+     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
+     */
+    if (main->iMCU_row_ctr == cinfo->total_iMCU_rows)
+      set_bottom_pointers(cinfo);
+    main->context_state = CTX_PROCESS_IMCU;
+    /*FALLTHROUGH*/
+  case CTX_PROCESS_IMCU:
+    /* Call postprocessor using previously set pointers */
+    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
+			&main->rowgroup_ctr, main->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (main->rowgroup_ctr < main->rowgroups_avail)
+      return;			/* Need to suspend */
+    /* After the first iMCU, change wraparound pointers to normal state */
+    if (main->iMCU_row_ctr == 1)
+      set_wraparound_pointers(cinfo);
+    /* Prepare to load new iMCU row using other xbuffer list */
+    main->whichptr ^= 1;	/* 0=>1 or 1=>0 */
+    main->buffer_full = FALSE;
+    /* Still need to process last row group of this iMCU row, */
+    /* which is saved at index M+1 of the other xbuffer */
+    main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1);
+    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2);
+    main->context_state = CTX_POSTPONED_ROW;
+  }
+}
+
+
+/*
+ * Process some data.
+ * Final pass of two-pass quantization: just call the postprocessor.
+ * Source data will be the postprocessor controller's internal buffer.
+ */
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+METHODDEF(void)
+process_data_crank_post (j_decompress_ptr cinfo,
+			 JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			 JDIMENSION out_rows_avail)
+{
+  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
+				     (JDIMENSION *) NULL, (JDIMENSION) 0,
+				     output_buf, out_row_ctr, out_rows_avail);
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr main;
+  int ci, rgroup, ngroups;
+  jpeg_component_info *compptr;
+
+  main = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = (struct jpeg_d_main_controller *) main;
+  main->pub.start_pass = start_pass_main;
+
+  if (need_full_buffer)		/* shouldn't happen */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Allocate the workspace.
+   * ngroups is the number of row groups we need.
+   */
+  if (cinfo->upsample->need_context_rows) {
+    if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
+    ngroups = cinfo->min_DCT_scaled_size + 2;
+  } else {
+    ngroups = cinfo->min_DCT_scaled_size;
+  }
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    main->buffer[ci] = (*cinfo->mem->alloc_sarray)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 compptr->width_in_blocks * compptr->DCT_scaled_size,
+			 (JDIMENSION) (rgroup * ngroups));
+  }
+}
diff --git a/src/main/jni/libjpeg/jdmarker.c b/src/main/jni/libjpeg/jdmarker.c
new file mode 100644
index 000000000..6978049d4
--- /dev/null
+++ b/src/main/jni/libjpeg/jdmarker.c
@@ -0,0 +1,1410 @@
+/*
+ * jdmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to decode JPEG datastream markers.
+ * Most of the complexity arises from our desire to support input
+ * suspension: if not all of the data for a marker is available,
+ * we must exit back to the application.  On resumption, we reprocess
+ * the marker.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+  
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+  
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+  
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+  
+  M_DHT   = 0xc4,
+  
+  M_DAC   = 0xcc,
+  
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+  
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+  
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+  
+  M_JPG0  = 0xf0,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+  
+  M_TEM   = 0x01,
+  
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_reader pub; /* public fields */
+
+  /* Application-overridable marker processing methods */
+  jpeg_marker_parser_method process_COM;
+  jpeg_marker_parser_method process_APPn[16];
+
+  /* Limit on marker data length to save for each marker type */
+  unsigned int length_limit_COM;
+  unsigned int length_limit_APPn[16];
+
+  /* Status of COM/APPn marker saving */
+  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
+  unsigned int bytes_read;		/* data bytes read so far in marker */
+  /* Note: cur_marker is not linked into marker_list until it's all read. */
+} my_marker_reader;
+
+typedef my_marker_reader * my_marker_ptr;
+
+
+/*
+ * Macros for fetching data from the data source module.
+ *
+ * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
+ * the current restart point; we update them only when we have reached a
+ * suitable place to restart if a suspension occurs.
+ */
+
+/* Declare and initialize local copies of input pointer/count */
+#define INPUT_VARS(cinfo)  \
+	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
+	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
+	size_t bytes_in_buffer = datasrc->bytes_in_buffer
+
+/* Unload the local copies --- do this only at a restart boundary */
+#define INPUT_SYNC(cinfo)  \
+	( datasrc->next_input_byte = next_input_byte,  \
+	  datasrc->bytes_in_buffer = bytes_in_buffer )
+
+/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
+#define INPUT_RELOAD(cinfo)  \
+	( next_input_byte = datasrc->next_input_byte,  \
+	  bytes_in_buffer = datasrc->bytes_in_buffer )
+
+/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
+ * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
+ * but we must reload the local copies after a successful fill.
+ */
+#define MAKE_BYTE_AVAIL(cinfo,action)  \
+	if (bytes_in_buffer == 0) {  \
+	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
+	    { action; }  \
+	  INPUT_RELOAD(cinfo);  \
+	}
+
+/* Read a byte into variable V.
+ * If must suspend, take the specified action (typically "return FALSE").
+ */
+#define INPUT_BYTE(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = GETJOCTET(*next_input_byte++); )
+
+/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
+ * V should be declared unsigned int or perhaps INT32.
+ */
+#define INPUT_2BYTES(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
+		  MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V += GETJOCTET(*next_input_byte++); )
+
+
+/*
+ * Routines to process JPEG markers.
+ *
+ * Entry condition: JPEG marker itself has been read and its code saved
+ *   in cinfo->unread_marker; input restart point is just after the marker.
+ *
+ * Exit: if return TRUE, have read and processed any parameters, and have
+ *   updated the restart point to point after the parameters.
+ *   If return FALSE, was forced to suspend before reaching end of
+ *   marker parameters; restart point has not been moved.  Same routine
+ *   will be called again after application supplies more input data.
+ *
+ * This approach to suspension assumes that all of a marker's parameters
+ * can fit into a single input bufferload.  This should hold for "normal"
+ * markers.  Some COM/APPn markers might have large parameter segments
+ * that might not fit.  If we are simply dropping such a marker, we use
+ * skip_input_data to get past it, and thereby put the problem on the
+ * source manager's shoulders.  If we are saving the marker's contents
+ * into memory, we use a slightly different convention: when forced to
+ * suspend, the marker processor updates the restart point to the end of
+ * what it's consumed (ie, the end of the buffer) before returning FALSE.
+ * On resumption, cinfo->unread_marker still contains the marker code,
+ * but the data source will point to the next chunk of marker data.
+ * The marker processor must retain internal state to deal with this.
+ *
+ * Note that we don't bother to avoid duplicate trace messages if a
+ * suspension occurs within marker parameters.  Other side effects
+ * require more care.
+ */
+
+
+LOCAL(boolean)
+get_soi (j_decompress_ptr cinfo)
+/* Process an SOI marker */
+{
+  int i;
+  
+  TRACEMS(cinfo, 1, JTRC_SOI);
+
+  if (cinfo->marker->saw_SOI)
+    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
+
+  /* Reset all parameters that are defined to be reset by SOI */
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+  cinfo->restart_interval = 0;
+
+  /* Set initial assumptions for colorspace etc */
+
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
+
+  cinfo->saw_JFIF_marker = FALSE;
+  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+  cinfo->saw_Adobe_marker = FALSE;
+  cinfo->Adobe_transform = 0;
+
+  cinfo->marker->saw_SOI = TRUE;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith)
+/* Process a SOFn marker */
+{
+  INT32 length;
+  int c, ci;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  cinfo->progressive_mode = is_prog;
+  cinfo->arith_code = is_arith;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
+  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
+
+  length -= 8;
+
+  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
+	   (int) cinfo->image_width, (int) cinfo->image_height,
+	   cinfo->num_components);
+
+  if (cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
+
+  /* We don't support files in which the image height is initially specified */
+  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
+  /* might as well have a general sanity check. */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+      || cinfo->num_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  if (length != (cinfo->num_components * 3))
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
+    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 cinfo->num_components * SIZEOF(jpeg_component_info));
+  
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->component_index = ci;
+    INPUT_BYTE(cinfo, compptr->component_id, return FALSE);
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->h_samp_factor = (c >> 4) & 15;
+    compptr->v_samp_factor = (c     ) & 15;
+    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
+
+    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
+	     compptr->component_id, compptr->h_samp_factor,
+	     compptr->v_samp_factor, compptr->quant_tbl_no);
+  }
+
+  cinfo->marker->saw_SOF = TRUE;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sos (j_decompress_ptr cinfo)
+/* Process a SOS marker */
+{
+  INT32 length;
+  int i, ci, n, c, cc, pi;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOS_NO_SOF);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
+
+  TRACEMS1(cinfo, 1, JTRC_SOS, n);
+
+  if (length != (n * 2 + 6) || n < 1 || n > MAX_COMPS_IN_SCAN)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  cinfo->comps_in_scan = n;
+
+  /* Collect the component-spec parameters */
+
+  for (i = 0; i < n; i++) {
+    INPUT_BYTE(cinfo, cc, return FALSE);
+    INPUT_BYTE(cinfo, c, return FALSE);
+    
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (cc == compptr->component_id)
+	goto id_found;
+    }
+
+    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
+
+  id_found:
+
+    cinfo->cur_comp_info[i] = compptr;
+    compptr->dc_tbl_no = (c >> 4) & 15;
+    compptr->ac_tbl_no = (c     ) & 15;
+    
+    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc,
+	     compptr->dc_tbl_no, compptr->ac_tbl_no);
+
+    /* This CSi (cc) should differ from the previous CSi */
+    for (pi = 0; pi < i; pi++) {
+      if (cinfo->cur_comp_info[pi] == compptr)
+        ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
+    }
+  }
+
+  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ss = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Se = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ah = (c >> 4) & 15;
+  cinfo->Al = (c     ) & 15;
+
+  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
+	   cinfo->Ah, cinfo->Al);
+
+  /* Prepare to scan data & restart markers */
+  cinfo->marker->next_restart_num = 0;
+
+  /* Count another SOS marker */
+  cinfo->input_scan_number++;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+#ifdef D_ARITH_CODING_SUPPORTED
+
+LOCAL(boolean)
+get_dac (j_decompress_ptr cinfo)
+/* Process a DAC marker */
+{
+  INT32 length;
+  int index, val;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 0) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+    INPUT_BYTE(cinfo, val, return FALSE);
+
+    length -= 2;
+
+    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
+
+    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
+      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
+
+    if (index >= NUM_ARITH_TBLS) { /* define AC table */
+      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
+    } else {			/* define DC table */
+      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
+      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
+      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
+	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+    }
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+#else /* ! D_ARITH_CODING_SUPPORTED */
+
+#define get_dac(cinfo)  skip_variable(cinfo)
+
+#endif /* D_ARITH_CODING_SUPPORTED */
+
+
+LOCAL(boolean)
+get_dht (j_decompress_ptr cinfo)
+/* Process a DHT marker */
+{
+  INT32 length;
+  UINT8 bits[17];
+  UINT8 huffval[256];
+  int i, index, count;
+  JHUFF_TBL **htblptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 16) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+
+    TRACEMS1(cinfo, 1, JTRC_DHT, index);
+      
+    bits[0] = 0;
+    count = 0;
+    for (i = 1; i <= 16; i++) {
+      INPUT_BYTE(cinfo, bits[i], return FALSE);
+      count += bits[i];
+    }
+
+    length -= 1 + 16;
+
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[1], bits[2], bits[3], bits[4],
+	     bits[5], bits[6], bits[7], bits[8]);
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[9], bits[10], bits[11], bits[12],
+	     bits[13], bits[14], bits[15], bits[16]);
+
+    /* Here we just do minimal validation of the counts to avoid walking
+     * off the end of our table space.  jdhuff.c will check more carefully.
+     */
+    if (count > 256 || ((INT32) count) > length)
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+    for (i = 0; i < count; i++)
+      INPUT_BYTE(cinfo, huffval[i], return FALSE);
+
+    MEMZERO(&huffval[count], (256 - count) * SIZEOF(UINT8));
+
+    length -= count;
+
+    if (index & 0x10) {		/* AC table definition */
+      index -= 0x10;
+      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
+    } else {			/* DC table definition */
+      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
+    }
+
+    if (index < 0 || index >= NUM_HUFF_TBLS)
+      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
+
+    if (*htblptr == NULL)
+      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+  
+    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+    MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval));
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dqt (j_decompress_ptr cinfo)
+/* Process a DQT marker */
+{
+  INT32 length;
+  int n, i, prec;
+  unsigned int tmp;
+  JQUANT_TBL *quant_ptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 0) {
+    INPUT_BYTE(cinfo, n, return FALSE);
+    prec = n >> 4;
+    n &= 0x0F;
+
+    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
+
+    if (n >= NUM_QUANT_TBLS)
+      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
+      
+    if (cinfo->quant_tbl_ptrs[n] == NULL)
+      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+    quant_ptr = cinfo->quant_tbl_ptrs[n];
+
+    for (i = 0; i < DCTSIZE2; i++) {
+      if (prec)
+	INPUT_2BYTES(cinfo, tmp, return FALSE);
+      else
+	INPUT_BYTE(cinfo, tmp, return FALSE);
+      /* We convert the zigzag-order table to natural array order. */
+      quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp;
+    }
+
+    if (cinfo->err->trace_level >= 2) {
+      for (i = 0; i < DCTSIZE2; i += 8) {
+	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
+		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
+		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
+		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
+      }
+    }
+
+    length -= DCTSIZE2+1;
+    if (prec) length -= DCTSIZE2;
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dri (j_decompress_ptr cinfo)
+/* Process a DRI marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  
+  if (length != 4)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+
+  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
+
+  cinfo->restart_interval = tmp;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Routines for processing APPn and COM markers.
+ * These are either saved in memory or discarded, per application request.
+ * APP0 and APP14 are specially checked to see if they are
+ * JFIF and Adobe markers, respectively.
+ */
+
+#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
+#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
+#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
+
+
+LOCAL(void)
+examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	      unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP0.
+ * Take appropriate action if it is a JFIF marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  INT32 totallen = (INT32) datalen + remaining;
+
+  if (datalen >= APP0_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x49 &&
+      GETJOCTET(data[3]) == 0x46 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF APP0 marker: save info */
+    cinfo->saw_JFIF_marker = TRUE;
+    cinfo->JFIF_major_version = GETJOCTET(data[5]);
+    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
+    cinfo->density_unit = GETJOCTET(data[7]);
+    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
+    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+    /* Check version.
+     * Major version must be 1, anything else signals an incompatible change.
+     * (We used to treat this as an error, but now it's a nonfatal warning,
+     * because some bozo at Hijaak couldn't read the spec.)
+     * Minor version should be 0..2, but process anyway if newer.
+     */
+    if (cinfo->JFIF_major_version != 1)
+      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
+	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+    /* Generate trace messages */
+    TRACEMS5(cinfo, 1, JTRC_JFIF,
+	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+    /* Validate thumbnail dimensions and issue appropriate messages */
+    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
+	       GETJOCTET(data[12]), GETJOCTET(data[13]));
+    totallen -= APP0_DATA_LEN;
+    if (totallen !=
+	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
+  } else if (datalen >= 6 &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x58 &&
+      GETJOCTET(data[3]) == 0x58 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF "JFXX" extension APP0 marker */
+    /* The library doesn't actually do anything with these,
+     * but we try to produce a helpful trace message.
+     */
+    switch (GETJOCTET(data[5])) {
+    case 0x10:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
+      break;
+    case 0x11:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
+      break;
+    case 0x13:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
+      break;
+    default:
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
+	       GETJOCTET(data[5]), (int) totallen);
+      break;
+    }
+  } else {
+    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
+  }
+}
+
+
+LOCAL(void)
+examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	       unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP14.
+ * Take appropriate action if it is an Adobe marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  unsigned int version, flags0, flags1, transform;
+
+  if (datalen >= APP14_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x41 &&
+      GETJOCTET(data[1]) == 0x64 &&
+      GETJOCTET(data[2]) == 0x6F &&
+      GETJOCTET(data[3]) == 0x62 &&
+      GETJOCTET(data[4]) == 0x65) {
+    /* Found Adobe APP14 marker */
+    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
+    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
+    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
+    transform = GETJOCTET(data[11]);
+    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
+    cinfo->saw_Adobe_marker = TRUE;
+    cinfo->Adobe_transform = (UINT8) transform;
+  } else {
+    /* Start of APP14 does not match "Adobe", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
+  }
+}
+
+
+METHODDEF(boolean)
+get_interesting_appn (j_decompress_ptr cinfo)
+/* Process an APP0 or APP14 marker without saving it */
+{
+  INT32 length;
+  JOCTET b[APPN_DATA_LEN];
+  unsigned int i, numtoread;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  /* get the interesting part of the marker data */
+  if (length >= APPN_DATA_LEN)
+    numtoread = APPN_DATA_LEN;
+  else if (length > 0)
+    numtoread = (unsigned int) length;
+  else
+    numtoread = 0;
+  for (i = 0; i < numtoread; i++)
+    INPUT_BYTE(cinfo, b[i], return FALSE);
+  length -= numtoread;
+
+  /* process it */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  default:
+    /* can't get here unless jpeg_save_markers chooses wrong processor */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+METHODDEF(boolean)
+save_marker (j_decompress_ptr cinfo)
+/* Save an APPn or COM marker into the marker list */
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
+  unsigned int bytes_read, data_length;
+  JOCTET FAR * data;
+  INT32 length = 0;
+  INPUT_VARS(cinfo);
+
+  if (cur_marker == NULL) {
+    /* begin reading a marker */
+    INPUT_2BYTES(cinfo, length, return FALSE);
+    length -= 2;
+    if (length >= 0) {		/* watch out for bogus length word */
+      /* figure out how much we want to save */
+      unsigned int limit;
+      if (cinfo->unread_marker == (int) M_COM)
+	limit = marker->length_limit_COM;
+      else
+	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
+      if ((unsigned int) length < limit)
+	limit = (unsigned int) length;
+      /* allocate and initialize the marker item */
+      cur_marker = (jpeg_saved_marker_ptr)
+	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				    SIZEOF(struct jpeg_marker_struct) + limit);
+      cur_marker->next = NULL;
+      cur_marker->marker = (UINT8) cinfo->unread_marker;
+      cur_marker->original_length = (unsigned int) length;
+      cur_marker->data_length = limit;
+      /* data area is just beyond the jpeg_marker_struct */
+      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
+      marker->cur_marker = cur_marker;
+      marker->bytes_read = 0;
+      bytes_read = 0;
+      data_length = limit;
+    } else {
+      /* deal with bogus length word */
+      bytes_read = data_length = 0;
+      data = NULL;
+    }
+  } else {
+    /* resume reading a marker */
+    bytes_read = marker->bytes_read;
+    data_length = cur_marker->data_length;
+    data = cur_marker->data + bytes_read;
+  }
+
+  while (bytes_read < data_length) {
+    INPUT_SYNC(cinfo);		/* move the restart point to here */
+    marker->bytes_read = bytes_read;
+    /* If there's not at least one byte in buffer, suspend */
+    MAKE_BYTE_AVAIL(cinfo, return FALSE);
+    /* Copy bytes with reasonable rapidity */
+    while (bytes_read < data_length && bytes_in_buffer > 0) {
+      *data++ = *next_input_byte++;
+      bytes_in_buffer--;
+      bytes_read++;
+    }
+  }
+
+  /* Done reading what we want to read */
+  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
+    /* Add new marker to end of list */
+    if (cinfo->marker_list == NULL) {
+      cinfo->marker_list = cur_marker;
+    } else {
+      jpeg_saved_marker_ptr prev = cinfo->marker_list;
+      while (prev->next != NULL)
+	prev = prev->next;
+      prev->next = cur_marker;
+    }
+    /* Reset pointer & calc remaining data length */
+    data = cur_marker->data;
+    length = cur_marker->original_length - data_length;
+  }
+  /* Reset to initial state for next marker */
+  marker->cur_marker = NULL;
+
+  /* Process the marker if interesting; else just make a generic trace msg */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, data, data_length, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, data, data_length, length);
+    break;
+  default:
+    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
+	     (int) (data_length + length));
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+METHODDEF(boolean)
+skip_variable (j_decompress_ptr cinfo)
+/* Skip over an unknown or uninteresting variable-length marker */
+{
+  INT32 length;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
+
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+/*
+ * Find the next JPEG marker, save it in cinfo->unread_marker.
+ * Returns FALSE if had to suspend before reaching a marker;
+ * in that case cinfo->unread_marker is unchanged.
+ *
+ * Note that the result might not be a valid marker code,
+ * but it will never be 0 or FF.
+ */
+
+LOCAL(boolean)
+next_marker (j_decompress_ptr cinfo)
+{
+  int c;
+  INPUT_VARS(cinfo);
+
+  for (;;) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Skip any non-FF bytes.
+     * This may look a bit inefficient, but it will not occur in a valid file.
+     * We sync after each discarded byte so that a suspending data source
+     * can discard the byte from its buffer.
+     */
+    while (c != 0xFF) {
+      cinfo->marker->discarded_bytes++;
+      INPUT_SYNC(cinfo);
+      INPUT_BYTE(cinfo, c, return FALSE);
+    }
+    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
+     * pad bytes, so don't count them in discarded_bytes.  We assume there
+     * will not be so many consecutive FF bytes as to overflow a suspending
+     * data source's input buffer.
+     */
+    do {
+      INPUT_BYTE(cinfo, c, return FALSE);
+    } while (c == 0xFF);
+    if (c != 0)
+      break;			/* found a valid marker, exit loop */
+    /* Reach here if we found a stuffed-zero data sequence (FF/00).
+     * Discard it and loop back to try again.
+     */
+    cinfo->marker->discarded_bytes += 2;
+    INPUT_SYNC(cinfo);
+  }
+
+  if (cinfo->marker->discarded_bytes != 0) {
+    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
+    cinfo->marker->discarded_bytes = 0;
+  }
+
+  cinfo->unread_marker = c;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+first_marker (j_decompress_ptr cinfo)
+/* Like next_marker, but used to obtain the initial SOI marker. */
+/* For this marker, we do not allow preceding garbage or fill; otherwise,
+ * we might well scan an entire input file before realizing it ain't JPEG.
+ * If an application wants to process non-JFIF files, it must seek to the
+ * SOI before calling the JPEG library.
+ */
+{
+  int c, c2;
+  INPUT_VARS(cinfo);
+
+  INPUT_BYTE(cinfo, c, return FALSE);
+  INPUT_BYTE(cinfo, c2, return FALSE);
+  if (c != 0xFF || c2 != (int) M_SOI)
+    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
+
+  cinfo->unread_marker = c2;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Read markers until SOS or EOI.
+ *
+ * Returns same codes as are defined for jpeg_consume_input:
+ * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ */
+
+METHODDEF(int)
+read_markers (j_decompress_ptr cinfo)
+{
+  /* Outer loop repeats once for each marker. */
+  for (;;) {
+    /* Collect the marker proper, unless we already did. */
+    /* NB: first_marker() enforces the requirement that SOI appear first. */
+    if (cinfo->unread_marker == 0) {
+      if (! cinfo->marker->saw_SOI) {
+	if (! first_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      } else {
+	if (! next_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      }
+    }
+
+    /*
+     * Save the position of the fist marker after SOF.
+     */
+    if (cinfo->marker->current_sos_marker_position == -1)
+      cinfo->marker->current_sos_marker_position =
+          jget_input_stream_position(cinfo) - 2;
+
+    /* At this point cinfo->unread_marker contains the marker code and the
+     * input point is just past the marker proper, but before any parameters.
+     * A suspension will cause us to return with this state still true.
+     */
+    switch (cinfo->unread_marker) {
+    case M_SOI:
+      if (! get_soi(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF0:		/* Baseline */
+    case M_SOF1:		/* Extended sequential, Huffman */
+      if (! get_sof(cinfo, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF2:		/* Progressive, Huffman */
+      cinfo->marker->current_sos_marker_position = -1;
+      if (! get_sof(cinfo, TRUE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF9:		/* Extended sequential, arithmetic */
+      if (! get_sof(cinfo, FALSE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF10:		/* Progressive, arithmetic */
+      if (! get_sof(cinfo, TRUE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    /* Currently unsupported SOFn types */
+    case M_SOF3:		/* Lossless, Huffman */
+    case M_SOF5:		/* Differential sequential, Huffman */
+    case M_SOF6:		/* Differential progressive, Huffman */
+    case M_SOF7:		/* Differential lossless, Huffman */
+    case M_JPG:			/* Reserved for JPEG extensions */
+    case M_SOF11:		/* Lossless, arithmetic */
+    case M_SOF13:		/* Differential sequential, arithmetic */
+    case M_SOF14:		/* Differential progressive, arithmetic */
+    case M_SOF15:		/* Differential lossless, arithmetic */
+      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
+      break;
+
+    case M_SOS:
+      if (! get_sos(cinfo))
+	return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_SOS;
+    
+    case M_EOI:
+      TRACEMS(cinfo, 1, JTRC_EOI);
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_EOI;
+      
+    case M_DAC:
+      if (! get_dac(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DHT:
+      if (! get_dht(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DQT:
+      if (! get_dqt(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DRI:
+      if (! get_dri(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_APP0:
+    case M_APP1:
+    case M_APP2:
+    case M_APP3:
+    case M_APP4:
+    case M_APP5:
+    case M_APP6:
+    case M_APP7:
+    case M_APP8:
+    case M_APP9:
+    case M_APP10:
+    case M_APP11:
+    case M_APP12:
+    case M_APP13:
+    case M_APP14:
+    case M_APP15:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
+		cinfo->unread_marker - (int) M_APP0]) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_COM:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:		/* these are all parameterless */
+    case M_RST1:
+    case M_RST2:
+    case M_RST3:
+    case M_RST4:
+    case M_RST5:
+    case M_RST6:
+    case M_RST7:
+    case M_TEM:
+      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
+      break;
+
+    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
+      if (! skip_variable(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    default:			/* must be DHP, EXP, JPGn, or RESn */
+      /* For now, we treat the reserved markers as fatal errors since they are
+       * likely to be used to signal incompatible JPEG Part 3 extensions.
+       * Once the JPEG 3 version-number marker is well defined, this code
+       * ought to change!
+       */
+      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+      break;
+    }
+    /* Successfully processed marker, so reset state variable */
+    cinfo->unread_marker = 0;
+  } /* end loop */
+}
+
+
+/*
+ * Read a restart marker, which is expected to appear next in the datastream;
+ * if the marker is not there, take appropriate recovery action.
+ * Returns FALSE if suspension is required.
+ *
+ * This is called by the entropy decoder after it has read an appropriate
+ * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
+ * has already read a marker from the data source.  Under normal conditions
+ * cinfo->unread_marker will be reset to 0 before returning; if not reset,
+ * it holds a marker which the decoder will be unable to read past.
+ */
+
+METHODDEF(boolean)
+read_restart_marker (j_decompress_ptr cinfo)
+{
+  /* Obtain a marker unless we already did. */
+  /* Note that next_marker will complain if it skips any data. */
+  if (cinfo->unread_marker == 0) {
+    if (! next_marker(cinfo))
+      return FALSE;
+  }
+
+  if (cinfo->unread_marker ==
+      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
+    /* Normal case --- swallow the marker and let entropy decoder continue */
+    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
+    cinfo->unread_marker = 0;
+  } else {
+    /* Uh-oh, the restart markers have been messed up. */
+    /* Let the data source manager determine how to resync. */
+    if (! (*cinfo->src->resync_to_restart) (cinfo,
+					    cinfo->marker->next_restart_num))
+      return FALSE;
+  }
+
+  /* Update next-restart state */
+  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
+
+  return TRUE;
+}
+
+
+/*
+ * This is the default resync_to_restart method for data source managers
+ * to use if they don't have any better approach.  Some data source managers
+ * may be able to back up, or may have additional knowledge about the data
+ * which permits a more intelligent recovery strategy; such managers would
+ * presumably supply their own resync method.
+ *
+ * read_restart_marker calls resync_to_restart if it finds a marker other than
+ * the restart marker it was expecting.  (This code is *not* used unless
+ * a nonzero restart interval has been declared.)  cinfo->unread_marker is
+ * the marker code actually found (might be anything, except 0 or FF).
+ * The desired restart marker number (0..7) is passed as a parameter.
+ * This routine is supposed to apply whatever error recovery strategy seems
+ * appropriate in order to position the input stream to the next data segment.
+ * Note that cinfo->unread_marker is treated as a marker appearing before
+ * the current data-source input point; usually it should be reset to zero
+ * before returning.
+ * Returns FALSE if suspension is required.
+ *
+ * This implementation is substantially constrained by wanting to treat the
+ * input as a data stream; this means we can't back up.  Therefore, we have
+ * only the following actions to work with:
+ *   1. Simply discard the marker and let the entropy decoder resume at next
+ *      byte of file.
+ *   2. Read forward until we find another marker, discarding intervening
+ *      data.  (In theory we could look ahead within the current bufferload,
+ *      without having to discard data if we don't find the desired marker.
+ *      This idea is not implemented here, in part because it makes behavior
+ *      dependent on buffer size and chance buffer-boundary positions.)
+ *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
+ *      This will cause the entropy decoder to process an empty data segment,
+ *      inserting dummy zeroes, and then we will reprocess the marker.
+ *
+ * #2 is appropriate if we think the desired marker lies ahead, while #3 is
+ * appropriate if the found marker is a future restart marker (indicating
+ * that we have missed the desired restart marker, probably because it got
+ * corrupted).
+ * We apply #2 or #3 if the found marker is a restart marker no more than
+ * two counts behind or ahead of the expected one.  We also apply #2 if the
+ * found marker is not a legal JPEG marker code (it's certainly bogus data).
+ * If the found marker is a restart marker more than 2 counts away, we do #1
+ * (too much risk that the marker is erroneous; with luck we will be able to
+ * resync at some future point).
+ * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
+ * overrunning the end of a scan.  An implementation limited to single-scan
+ * files might find it better to apply #2 for markers other than EOI, since
+ * any other marker would have to be bogus data in that case.
+ */
+
+GLOBAL(boolean)
+jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
+{
+  int marker = cinfo->unread_marker;
+  int action = 1;
+  
+  /* Always put up a warning. */
+  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
+  
+  /* Outer loop handles repeated decision after scanning forward. */
+  for (;;) {
+    if (marker < (int) M_SOF0)
+      action = 2;		/* invalid marker */
+    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
+      action = 3;		/* valid non-restart marker */
+    else {
+      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
+	  marker == ((int) M_RST0 + ((desired+2) & 7)))
+	action = 3;		/* one of the next two expected restarts */
+      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
+	       marker == ((int) M_RST0 + ((desired-2) & 7)))
+	action = 2;		/* a prior restart, so advance */
+      else
+	action = 1;		/* desired restart or too far away */
+    }
+    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
+    switch (action) {
+    case 1:
+      /* Discard marker and let entropy decoder resume processing. */
+      cinfo->unread_marker = 0;
+      return TRUE;
+    case 2:
+      /* Scan to the next marker, and repeat the decision loop. */
+      if (! next_marker(cinfo))
+	return FALSE;
+      marker = cinfo->unread_marker;
+      break;
+    case 3:
+      /* Return without advancing past this marker. */
+      /* Entropy decoder will be forced to process an empty segment. */
+      return TRUE;
+    }
+  } /* end loop */
+}
+
+/*
+ * Get the position for all SOS markers in the image.
+ */
+
+METHODDEF(void)
+get_sos_marker_position(j_decompress_ptr cinfo, huffman_index *index)
+{
+  unsigned char *head;
+  int count = 0;
+  int retcode = JPEG_REACHED_SOS;
+
+  while (cinfo->src->bytes_in_buffer > 0) {
+    if (retcode == JPEG_REACHED_SOS) {
+      jpeg_configure_huffman_index_scan(cinfo, index, count++,
+              cinfo->marker->current_sos_marker_position);
+      // Skips scan content to the next non-RST JPEG marker.
+      while(next_marker(cinfo) &&
+              cinfo->unread_marker >= M_RST0 && cinfo->unread_marker <= M_RST7)
+          ;
+      cinfo->marker->current_sos_marker_position =
+        jget_input_stream_position(cinfo) - 2;
+      retcode = read_markers(cinfo);
+    } else {
+      break;
+    }
+  }
+}
+
+/*
+ * Reset marker processing state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  cinfo->comp_info = NULL;		/* until allocated by get_sof */
+  cinfo->input_scan_number = 0;		/* no SOS seen yet */
+  cinfo->unread_marker = 0;		/* no pending marker */
+  marker->pub.saw_SOI = FALSE;		/* set internal state too */
+  marker->pub.saw_SOF = FALSE;
+  marker->pub.discarded_bytes = 0;
+  marker->cur_marker = NULL;
+}
+
+
+/*
+ * Initialize the marker reader module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker;
+  int i;
+
+  /* Create subobject in permanent pool */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_marker_reader));
+  cinfo->marker = (struct jpeg_marker_reader *) marker;
+  /* Initialize public method pointers */
+  marker->pub.reset_marker_reader = reset_marker_reader;
+  marker->pub.read_markers = read_markers;
+  marker->pub.read_restart_marker = read_restart_marker;
+  marker->pub.get_sos_marker_position = get_sos_marker_position;
+
+  // Initialize the SOS marker position to avoid underdefined behavior due to
+  // using a undefined field.
+  marker->pub.current_sos_marker_position = 0;
+
+  /* Initialize COM/APPn processing.
+   * By default, we examine and then discard APP0 and APP14,
+   * but simply discard COM and all other APPn.
+   */
+  marker->process_COM = skip_variable;
+  marker->length_limit_COM = 0;
+  for (i = 0; i < 16; i++) {
+    marker->process_APPn[i] = skip_variable;
+    marker->length_limit_APPn[i] = 0;
+  }
+  marker->process_APPn[0] = get_interesting_appn;
+  marker->process_APPn[14] = get_interesting_appn;
+  /* Reset marker processing state */
+  reset_marker_reader(cinfo);
+}
+
+
+/*
+ * Control saving of COM and APPn markers into marker_list.
+ */
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+GLOBAL(void)
+jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
+		   unsigned int length_limit)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  long maxlength;
+  jpeg_marker_parser_method processor;
+
+  /* Length limit mustn't be larger than what we can allocate
+   * (should only be a concern in a 16-bit environment).
+   */
+  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
+  if (((long) length_limit) > maxlength)
+    length_limit = (unsigned int) maxlength;
+
+  /* Choose processor routine to use.
+   * APP0/APP14 have special requirements.
+   */
+  if (length_limit) {
+    processor = save_marker;
+    /* If saving APP0/APP14, save at least enough for our internal use. */
+    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
+      length_limit = APP0_DATA_LEN;
+    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
+      length_limit = APP14_DATA_LEN;
+  } else {
+    processor = skip_variable;
+    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
+    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
+      processor = get_interesting_appn;
+  }
+
+  if (marker_code == (int) M_COM) {
+    marker->process_COM = processor;
+    marker->length_limit_COM = length_limit;
+  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
+    marker->process_APPn[marker_code - (int) M_APP0] = processor;
+    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
+  } else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+/*
+ * Install a special processing method for COM or APPn markers.
+ */
+
+GLOBAL(void)
+jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
+			   jpeg_marker_parser_method routine)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  if (marker_code == (int) M_COM)
+    marker->process_COM = routine;
+  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
+    marker->process_APPn[marker_code - (int) M_APP0] = routine;
+  else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
diff --git a/src/main/jni/libjpeg/jdmaster.c b/src/main/jni/libjpeg/jdmaster.c
new file mode 100644
index 000000000..e3da758b1
--- /dev/null
+++ b/src/main/jni/libjpeg/jdmaster.c
@@ -0,0 +1,580 @@
+/*
+ * jdmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG decompressor.
+ * These routines are concerned with selecting the modules to be executed
+ * and with determining the number of passes and the work to be done in each
+ * pass.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_decomp_master pub; /* public fields */
+
+  int pass_number;		/* # of passes completed */
+
+  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
+
+  /* Saved references to initialized quantizer modules,
+   * in case we need to switch modes.
+   */
+  struct jpeg_color_quantizer * quantizer_1pass;
+  struct jpeg_color_quantizer * quantizer_2pass;
+} my_decomp_master;
+
+typedef my_decomp_master * my_master_ptr;
+
+
+/*
+ * Determine whether merged upsample/color conversion should be used.
+ * CRUCIAL: this must match the actual capabilities of jdmerge.c!
+ */
+
+LOCAL(boolean)
+use_merged_upsample (j_decompress_ptr cinfo)
+{
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  /* Merging is the equivalent of plain box-filter upsampling */
+  if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
+    return FALSE;
+
+#ifdef ANDROID_RGB
+  /* jdmerge.c only supports YCC=>RGB565 and YCC=>RGB color conversion */
+  if (cinfo->jpeg_color_space != JCS_YCbCr || 
+      cinfo->num_components != 3 ||
+      cinfo->out_color_components != 3 ||
+      (cinfo->out_color_space != JCS_RGB_565 && 
+         cinfo->out_color_space != JCS_RGB)) {
+    return FALSE;
+  }
+#else
+  /* jdmerge.c only supports YCC=>RGB color conversion */
+  if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
+      cinfo->out_color_space != JCS_RGB ||
+      cinfo->out_color_components != RGB_PIXELSIZE)
+    return FALSE;
+#endif
+
+  /* and it only handles 2h1v or 2h2v sampling ratios */
+  if (cinfo->comp_info[0].h_samp_factor != 2 ||
+      cinfo->comp_info[1].h_samp_factor != 1 ||
+      cinfo->comp_info[2].h_samp_factor != 1 ||
+      cinfo->comp_info[0].v_samp_factor >  2 ||
+      cinfo->comp_info[1].v_samp_factor != 1 ||
+      cinfo->comp_info[2].v_samp_factor != 1)
+    return FALSE;
+  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
+  if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
+      cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
+      cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size)
+    return FALSE;
+  /* ??? also need to test for upsample-time rescaling, when & if supported */
+  return TRUE;			/* by golly, it'll work... */
+#else
+  return FALSE;
+#endif
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ * Also note that it may be called before the master module is initialized!
+ */
+
+GLOBAL(void)
+jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+#endif
+
+  /* Prevent application from calling me at wrong times */
+#if ANDROID_TILE_BASED_DECODE
+  // Tile based decoding may call this function several times.
+  if (!cinfo->tile_decode)
+#endif
+    if (cinfo->global_state != DSTATE_READY)
+      ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+  /* Compute actual output image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num * 8 <= cinfo->scale_denom) {
+    /* Provide 1/8 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 8L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 8L);
+    cinfo->min_DCT_scaled_size = 1;
+  } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) {
+    /* Provide 1/4 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 4L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 4L);
+    cinfo->min_DCT_scaled_size = 2;
+  } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) {
+    /* Provide 1/2 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 2L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 2L);
+    cinfo->min_DCT_scaled_size = 4;
+  } else {
+    /* Provide 1/1 scaling */
+    cinfo->output_width = cinfo->image_width;
+    cinfo->output_height = cinfo->image_height;
+    cinfo->min_DCT_scaled_size = DCTSIZE;
+  }
+  /* In selecting the actual DCT scaling for each component, we try to
+   * scale up the chroma components via IDCT scaling rather than upsampling.
+   * This saves time if the upsampler gets to use 1:1 scaling.
+   * Note this code assumes that the supported DCT scalings are powers of 2.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    int ssize = cinfo->min_DCT_scaled_size;
+    while (ssize < DCTSIZE &&
+	   (compptr->h_samp_factor * ssize * 2 <=
+	    cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) &&
+	   (compptr->v_samp_factor * ssize * 2 <=
+	    cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) {
+      ssize = ssize * 2;
+    }
+    compptr->DCT_scaled_size = ssize;
+  }
+
+  /* Recompute downsampled dimensions of components;
+   * application needs to know these if using raw downsampled data.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Size in samples, after IDCT scaling */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width *
+		    (long) (compptr->h_samp_factor * compptr->DCT_scaled_size),
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height *
+		    (long) (compptr->v_samp_factor * compptr->DCT_scaled_size),
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+  }
+
+#else /* !IDCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->output_width = cinfo->image_width;
+  cinfo->output_height = cinfo->image_height;
+  /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE,
+   * and has computed unscaled downsampled_width and downsampled_height.
+   */
+
+#endif /* IDCT_SCALING_SUPPORTED */
+
+  /* Report number of components in selected colorspace. */
+  /* Probably this should be in the color conversion module... */
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    break;
+  case JCS_RGB:
+#if RGB_PIXELSIZE != 3
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    break;
+#endif /* else share code with YCbCr */
+#ifdef ANDROID_RGB
+  case JCS_RGB_565:
+#endif
+  case JCS_YCbCr:
+    cinfo->out_color_components = 3;
+    break;
+  case JCS_CMYK:
+  case JCS_YCCK:
+#ifdef ANDROID_RGB
+  case JCS_RGBA_8888:
+#endif
+    cinfo->out_color_components = 4;
+    break;
+  default:			/* else must be same colorspace as in file */
+    cinfo->out_color_components = cinfo->num_components;
+    break;
+  }
+  cinfo->output_components = (cinfo->quantize_colors ? 1 :
+			      cinfo->out_color_components);
+
+  /* See if upsampler will want to emit more than one row at a time */
+  if (use_merged_upsample(cinfo))
+    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
+  else
+    cinfo->rec_outbuf_height = 1;
+}
+
+
+/*
+ * Several decompression processes need to range-limit values to the range
+ * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
+ * due to noise introduced by quantization, roundoff error, etc.  These
+ * processes are inner loops and need to be as fast as possible.  On most
+ * machines, particularly CPUs with pipelines or instruction prefetch,
+ * a (subscript-check-less) C table lookup
+ *		x = sample_range_limit[x];
+ * is faster than explicit tests
+ *		if (x < 0)  x = 0;
+ *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ * These processes all use a common table prepared by the routine below.
+ *
+ * For most steps we can mathematically guarantee that the initial value
+ * of x is within MAXJSAMPLE+1 of the legal range, so a table running from
+ * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient.  But for the initial
+ * limiting step (just after the IDCT), a wildly out-of-range value is
+ * possible if the input data is corrupt.  To avoid any chance of indexing
+ * off the end of memory and getting a bad-pointer trap, we perform the
+ * post-IDCT limiting thus:
+ *		x = range_limit[x & MASK];
+ * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
+ * samples.  Under normal circumstances this is more than enough range and
+ * a correct output will be generated; with bogus input data the mask will
+ * cause wraparound, and we will safely generate a bogus-but-in-range output.
+ * For the post-IDCT step, we want to convert the data from signed to unsigned
+ * representation by adding CENTERJSAMPLE at the same time that we limit it.
+ * So the post-IDCT limiting table ends up looking like this:
+ *   CENTERJSAMPLE,CENTERJSAMPLE+1,...,MAXJSAMPLE,
+ *   MAXJSAMPLE (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0          (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0,1,...,CENTERJSAMPLE-1
+ * Negative inputs select values from the upper half of the table after
+ * masking.
+ *
+ * We can save some space by overlapping the start of the post-IDCT table
+ * with the simpler range limiting table.  The post-IDCT table begins at
+ * sample_range_limit + CENTERJSAMPLE.
+ *
+ * Note that the table is allocated in near data space on PCs; it's small
+ * enough and used often enough to justify this.
+ */
+
+LOCAL(void)
+prepare_range_limit_table (j_decompress_ptr cinfo)
+/* Allocate and fill in the sample_range_limit table */
+{
+  JSAMPLE * table;
+  int i;
+
+  table = (JSAMPLE *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  table += (MAXJSAMPLE+1);	/* allow negative subscripts of simple table */
+  cinfo->sample_range_limit = table;
+  /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+  MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+  /* Main part of "simple" table: limit[x] = x */
+  for (i = 0; i <= MAXJSAMPLE; i++)
+    table[i] = (JSAMPLE) i;
+  table += CENTERJSAMPLE;	/* Point to where post-IDCT table starts */
+  /* End of simple table, rest of first half of post-IDCT table */
+  for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++)
+    table[i] = MAXJSAMPLE;
+  /* Second half of post-IDCT table */
+  MEMZERO(table + (2 * (MAXJSAMPLE+1)),
+	  (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE),
+	  cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
+}
+
+
+/*
+ * Master selection of decompression modules.
+ * This is done once at jpeg_start_decompress time.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ * We also initialize the decompressor input side to begin consuming data.
+ *
+ * Since jpeg_read_header has finished, we know what is in the SOF
+ * and (first) SOS markers.  We also have all the application parameter
+ * settings.
+ */
+
+LOCAL(void)
+master_selection (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+  boolean use_c_buffer;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* Initialize dimensions and other stuff */
+  jpeg_calc_output_dimensions(cinfo);
+  prepare_range_limit_table(cinfo);
+
+  /* Width of an output scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Initialize my private state */
+  master->pass_number = 0;
+  master->using_merged_upsample = use_merged_upsample(cinfo);
+
+  /* Color quantizer selection */
+  master->quantizer_1pass = NULL;
+  master->quantizer_2pass = NULL;
+  /* No mode changes if not using buffered-image mode. */
+  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
+    cinfo->enable_1pass_quant = FALSE;
+    cinfo->enable_external_quant = FALSE;
+    cinfo->enable_2pass_quant = FALSE;
+  }
+  if (cinfo->quantize_colors) {
+    if (cinfo->raw_data_out)
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    /* 2-pass quantizer only works in 3-component color space. */
+    if (cinfo->out_color_components != 3) {
+      cinfo->enable_1pass_quant = TRUE;
+      cinfo->enable_external_quant = FALSE;
+      cinfo->enable_2pass_quant = FALSE;
+      cinfo->colormap = NULL;
+    } else if (cinfo->colormap != NULL) {
+      cinfo->enable_external_quant = TRUE;
+    } else if (cinfo->two_pass_quantize) {
+      cinfo->enable_2pass_quant = TRUE;
+    } else {
+      cinfo->enable_1pass_quant = TRUE;
+    }
+
+    if (cinfo->enable_1pass_quant) {
+#ifdef QUANT_1PASS_SUPPORTED
+      jinit_1pass_quantizer(cinfo);
+      master->quantizer_1pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+
+    /* We use the 2-pass code to map to external colormaps. */
+    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
+#ifdef QUANT_2PASS_SUPPORTED
+      jinit_2pass_quantizer(cinfo);
+      master->quantizer_2pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+    /* If both quantizers are initialized, the 2-pass one is left active;
+     * this is necessary for starting with quantization to an external map.
+     */
+  }
+
+  /* Post-processing: in particular, color conversion first */
+  if (! cinfo->raw_data_out) {
+    if (master->using_merged_upsample) {
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+      jinit_merged_upsampler(cinfo); /* does color conversion too */
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else {
+      jinit_color_deconverter(cinfo);
+      jinit_upsampler(cinfo);
+    }
+    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+  }
+  /* Inverse DCT */
+  jinit_inverse_dct(cinfo);
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+      jinit_phuff_decoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_decoder(cinfo);
+  }
+
+  /* Initialize principal buffer controllers. */
+  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
+  jinit_d_coef_controller(cinfo, use_c_buffer);
+
+  if (! cinfo->raw_data_out)
+    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* If jpeg_start_decompress will read the whole file, initialize
+   * progress monitoring appropriately.  The input step is counted
+   * as one pass.
+   */
+  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
+      cinfo->inputctl->has_multiple_scans) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
+    /* Count the input pass as done */
+    master->pass_number++;
+  }
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each output pass.  We determine which
+ * modules will be active during this pass and give them appropriate
+ * start_pass calls.  We also set is_dummy_pass to indicate whether this
+ * is a "real" output pass or a dummy pass for color quantization.
+ * (In the latter case, jdapistd.c will crank the pass to completion.)
+ */
+
+METHODDEF(void)
+prepare_for_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (master->pub.is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Final pass of 2-pass quantization */
+    master->pub.is_dummy_pass = FALSE;
+    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
+    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
+    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  } else {
+    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
+      /* Select new quantization method */
+      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+	cinfo->cquantize = master->quantizer_2pass;
+	master->pub.is_dummy_pass = TRUE;
+      } else if (cinfo->enable_1pass_quant) {
+	cinfo->cquantize = master->quantizer_1pass;
+      } else {
+	ERREXIT(cinfo, JERR_MODE_CHANGE);
+      }
+    }
+    (*cinfo->idct->start_pass) (cinfo);
+    (*cinfo->coef->start_output_pass) (cinfo);
+    if (! cinfo->raw_data_out) {
+      if (! master->using_merged_upsample)
+	(*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->upsample->start_pass) (cinfo);
+      if (cinfo->quantize_colors)
+	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+      (*cinfo->post->start_pass) (cinfo,
+	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+  }
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->pass_number +
+				    (master->pub.is_dummy_pass ? 2 : 1);
+    /* In buffered-image mode, we assume one more output pass if EOI not
+     * yet reached, but no more passes if EOI has been reached.
+     */
+    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
+      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
+    }
+  }
+}
+
+
+/*
+ * Finish up at end of an output pass.
+ */
+
+METHODDEF(void)
+finish_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (cinfo->quantize_colors)
+    (*cinfo->cquantize->finish_pass) (cinfo);
+  master->pass_number++;
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+GLOBAL(void)
+jpeg_new_colormap (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_BUFIMAGE)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
+      cinfo->colormap != NULL) {
+    /* Select 2-pass quantizer for external colormap use */
+    cinfo->cquantize = master->quantizer_2pass;
+    /* Notify quantizer of colormap change */
+    (*cinfo->cquantize->new_color_map) (cinfo);
+    master->pub.is_dummy_pass = FALSE; /* just in case */
+  } else
+    ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+/*
+ * Initialize master decompression control and select active modules.
+ * This is performed at the start of jpeg_start_decompress.
+ */
+
+GLOBAL(void)
+jinit_master_decompress (j_decompress_ptr cinfo)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_decomp_master));
+  cinfo->master = (struct jpeg_decomp_master *) master;
+  master->pub.prepare_for_output_pass = prepare_for_output_pass;
+  master->pub.finish_output_pass = finish_output_pass;
+
+  master->pub.is_dummy_pass = FALSE;
+
+  master_selection(cinfo);
+}
diff --git a/src/main/jni/libjpeg/jdmerge.c b/src/main/jni/libjpeg/jdmerge.c
new file mode 100644
index 000000000..77f33083a
--- /dev/null
+++ b/src/main/jni/libjpeg/jdmerge.c
@@ -0,0 +1,757 @@
+/*
+ * jdmerge.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ *
+ * This file combines functions from jdsample.c and jdcolor.c;
+ * read those files first to understand what's going on.
+ *
+ * When the chroma components are to be upsampled by simple replication
+ * (ie, box filtering), we can save some work in color conversion by
+ * calculating all the output pixels corresponding to a pair of chroma
+ * samples at one time.  In the conversion equations
+ *	R = Y           + K1 * Cr
+ *	G = Y + K2 * Cb + K3 * Cr
+ *	B = Y + K4 * Cb
+ * only the Y term varies among the group of pixels corresponding to a pair
+ * of chroma samples, so the rest of the terms can be calculated just once.
+ * At typical sampling ratios, this eliminates half or three-quarters of the
+ * multiplications needed for color conversion.
+ *
+ * This file currently provides implementations for the following cases:
+ *	YCbCr => RGB color conversion only.
+ *	Sampling ratios of 2h1v or 2h2v.
+ *	No scaling needed at upsample time.
+ *	Corner-aligned (non-CCIR601) sampling alignment.
+ * Other special cases could be added, but in most applications these are
+ * the only common cases.  (For uncommon cases we fall back on the more
+ * general code in jdsample.c and jdcolor.c.)
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+#ifdef ANDROID_RGB
+
+/* Declarations for ordered dithering.
+ * 
+ * We use 4x4 ordered dither array packed into 32 bits. This array is
+ * sufficent for dithering RGB_888 to RGB_565.
+ */
+
+#define DITHER_MASK         0x3
+#define DITHER_ROTATE(x)    (((x)<<24) | (((x)>>8)&0x00FFFFFF))
+static const INT32 dither_matrix[4] = {
+  0x0008020A,
+  0x0C040E06,
+  0x030B0109,
+  0x0F070D05
+};
+
+#endif
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+			   JSAMPARRAY output_buf));
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;		/* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;	/* samples per output row */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ * This is taken directly from jdcolor.c; see that file for more info.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  upsample->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    upsample->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    upsample->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_merged_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the spare buffer empty */
+  upsample->spare_full = FALSE;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * The control routine just handles the row buffering considerations.
+ */
+
+METHODDEF(void)
+merged_2v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 2:1 vertical sampling case: may need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPROW work_ptrs[2];
+  JDIMENSION num_rows;		/* number of rows returned to caller */
+
+  if (upsample->spare_full) {
+    /* If we have a spare row saved from a previous cycle, just return it. */
+      JDIMENSION size = upsample->out_row_width;
+#ifdef ANDROID_RGB
+    if (cinfo->out_color_space == JCS_RGB_565)
+      size = cinfo->output_width*2;
+#endif
+    jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
+		      1, size);
+
+    num_rows = 1;
+    upsample->spare_full = FALSE;
+  } else {
+    /* Figure number of rows to return to caller. */
+    num_rows = 2;
+    /* Not more than the distance to the end of the image. */
+    if (num_rows > upsample->rows_to_go)
+      num_rows = upsample->rows_to_go;
+    /* And not more than what the client can accept: */
+    out_rows_avail -= *out_row_ctr;
+    if (num_rows > out_rows_avail)
+      num_rows = out_rows_avail;
+    /* Create output pointer array for upsampler. */
+    work_ptrs[0] = output_buf[*out_row_ctr];
+    if (num_rows > 1) {
+      work_ptrs[1] = output_buf[*out_row_ctr + 1];
+    } else {
+      work_ptrs[1] = upsample->spare_row;
+      upsample->spare_full = TRUE;
+    }
+    /* Now do the upsampling. */
+    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
+  }
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (! upsample->spare_full)
+    (*in_row_group_ctr)++;
+}
+
+
+METHODDEF(void)
+merged_1v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 1:1 vertical sampling case: much easier, never need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Just do the upsampling. */
+  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
+			 output_buf + *out_row_ctr);
+  /* Adjust counts */
+  (*out_row_ctr)++;
+  (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by the control routines to do
+ * the actual upsampling/conversion.  One row group is processed per call.
+ *
+ * Note: since we may be writing directly into application-supplied buffers,
+ * we have to be honest about the output width; we can't assume the buffer
+ * has been rounded up to an even width.
+ */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+METHODDEF(void)
+h2v1_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] = range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] = range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] = range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] = range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    outptr[RGB_RED] = range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] = range_limit[y + cblue];
+  }
+}
+
+
+#ifdef ANDROID_RGB
+METHODDEF(void)
+h2v1_merged_upsample_565 (j_decompress_ptr cinfo,
+              JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+              JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  unsigned int r, g, b;
+  INT32 rgb;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r,g,b);
+    y  = GETJSAMPLE(*inptr0++);
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+    WRITE_TWO_PIXELS(outptr, rgb);
+    outptr += 4;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r,g,b);
+    *(INT16*)outptr = rgb;
+  }
+}
+
+
+METHODDEF(void)
+h2v1_merged_upsample_565D (j_decompress_ptr cinfo,
+              JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+              JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  JDIMENSION col_index = 0;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  unsigned int r, g, b;
+  INT32 rgb;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_SHORT_565(r,g,b);
+    y  = GETJSAMPLE(*inptr0++);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+    WRITE_TWO_PIXELS(outptr, rgb);
+    outptr += 4;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    rgb = PACK_SHORT_565(r,g,b);
+    *(INT16*)outptr = rgb;
+  }
+}
+
+
+#endif
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+METHODDEF(void)
+h2v2_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] = range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] = range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] = range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] = range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    outptr0[RGB_RED] = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] = range_limit[y + cblue];
+    y  = GETJSAMPLE(*inptr01);
+    outptr1[RGB_RED] = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] = range_limit[y + cblue];
+  }
+}
+
+
+#ifdef ANDROID_RGB
+
+METHODDEF(void)
+h2v2_merged_upsample_565 (j_decompress_ptr cinfo,
+              JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+              JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  unsigned int r, g, b;
+  INT32 rgb;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r,g,b);
+    y  = GETJSAMPLE(*inptr00++);
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+    WRITE_TWO_PIXELS(outptr0, rgb);
+    outptr0 += 4;
+    y  = GETJSAMPLE(*inptr01++);
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r,g,b);
+    y  = GETJSAMPLE(*inptr01++);
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+    WRITE_TWO_PIXELS(outptr1, rgb);
+    outptr1 += 4;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r,g,b);
+    *(INT16*)outptr0 = rgb;
+   y  = GETJSAMPLE(*inptr01);
+   r = range_limit[y + cred];
+   g = range_limit[y + cgreen];
+   b = range_limit[y + cblue];
+   rgb = PACK_SHORT_565(r,g,b);
+   *(INT16*)outptr1 = rgb;
+  }
+}
+
+
+
+METHODDEF(void)
+h2v2_merged_upsample_565D (j_decompress_ptr cinfo,
+              JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+              JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  JDIMENSION col_index = 0;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  INT32 d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK];
+  unsigned int r, g, b;
+  INT32 rgb;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */    
+    y  = GETJSAMPLE(*inptr00++);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_SHORT_565(r,g,b);
+    y  = GETJSAMPLE(*inptr00++);
+    r = range_limit[DITHER_565_R(y + cred, d1)];
+    g = range_limit[DITHER_565_G(y + cgreen, d1)];
+    b = range_limit[DITHER_565_B(y + cblue, d1)];
+    d1 = DITHER_ROTATE(d1);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+    WRITE_TWO_PIXELS(outptr0, rgb);
+    outptr0 += 4;
+    y  = GETJSAMPLE(*inptr01++);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_SHORT_565(r,g,b);
+    y  = GETJSAMPLE(*inptr01++);
+    r = range_limit[DITHER_565_R(y + cred, d1)];
+    g = range_limit[DITHER_565_G(y + cgreen, d1)];
+    b = range_limit[DITHER_565_B(y + cblue, d1)];
+    d1 = DITHER_ROTATE(d1);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r,g,b));
+    WRITE_TWO_PIXELS(outptr1, rgb);
+    outptr1 += 4;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    rgb = PACK_SHORT_565(r,g,b);
+    *(INT16*)outptr0 = rgb;
+   y  = GETJSAMPLE(*inptr01);
+   r = range_limit[DITHER_565_R(y + cred, d1)];
+   g = range_limit[DITHER_565_G(y + cgreen, d1)];
+   b = range_limit[DITHER_565_B(y + cblue, d1)];
+   rgb = PACK_SHORT_565(r,g,b);
+   *(INT16*)outptr1 = rgb;
+  }
+}
+
+#endif
+
+/*
+ * Module initialization routine for merged upsampling/color conversion.
+ *
+ * NB: this is called under the conditions determined by use_merged_upsample()
+ * in jdmaster.c.  That routine MUST correspond to the actual capabilities
+ * of this module; no safety checks are made here.
+ */
+
+GLOBAL(void)
+jinit_merged_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *) upsample;
+  upsample->pub.start_pass = start_pass_merged_upsample;
+  upsample->pub.need_context_rows = FALSE;
+
+  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
+  
+  if (cinfo->max_v_samp_factor == 2) {
+    upsample->pub.upsample = merged_2v_upsample;
+    upsample->upmethod = h2v2_merged_upsample;
+#ifdef ANDROID_RGB
+    if (cinfo->out_color_space == JCS_RGB_565) {
+        if (cinfo->dither_mode == JDITHER_NONE) {
+            upsample->upmethod = h2v2_merged_upsample_565;
+        } else {
+            upsample->upmethod = h2v2_merged_upsample_565D;
+        }
+    }
+#endif
+    /* Allocate a spare row buffer */
+    upsample->spare_row = (JSAMPROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
+  } else {
+    upsample->pub.upsample = merged_1v_upsample;
+    upsample->upmethod = h2v1_merged_upsample;
+#ifdef ANDROID_RGB
+    if (cinfo->out_color_space == JCS_RGB_565) {
+        if (cinfo->dither_mode == JDITHER_NONE) {
+            upsample->upmethod = h2v1_merged_upsample_565;
+        } else {
+            upsample->upmethod = h2v1_merged_upsample_565D;
+        }
+    }
+#endif
+    /* No spare row needed */
+    upsample->spare_row = NULL;
+  }
+
+  build_ycc_rgb_table(cinfo);
+}
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jdphuff.c b/src/main/jni/libjpeg/jdphuff.c
new file mode 100644
index 000000000..2f856e07b
--- /dev/null
+++ b/src/main/jni/libjpeg/jdphuff.c
@@ -0,0 +1,770 @@
+/*
+ * jdphuff.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines for progressive JPEG.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdhuff.h"		/* Declarations shared with jdhuff.c */
+
+
+#ifdef D_PROGRESSIVE_SUPPORTED
+
+/*
+ * Expanded entropy decoder object for progressive Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).EOBRUN = (src).EOBRUN, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
+} phuff_entropy_decoder;
+
+typedef phuff_entropy_decoder * phuff_entropy_ptr;
+
+/* Forward declarations */
+METHODDEF(boolean) decode_mcu_DC_first JPP((j_decompress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_AC_first JPP((j_decompress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_DC_refine JPP((j_decompress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_phuff_decoder (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band, bad;
+  int ci, coefi, tbl;
+  int *coef_bit_ptr;
+  jpeg_component_info * compptr;
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* Validate scan parameters */
+  bad = FALSE;
+  if (is_DC_band) {
+    if (cinfo->Se != 0)
+      bad = TRUE;
+  } else {
+    /* need not check Ss/Se < 0 since they came from unsigned bytes */
+    if (cinfo->Ss > cinfo->Se || cinfo->Se >= DCTSIZE2)
+      bad = TRUE;
+    /* AC scans may have only one component */
+    if (cinfo->comps_in_scan != 1)
+      bad = TRUE;
+  }
+  if (cinfo->Ah != 0) {
+    /* Successive approximation refinement scan: must have Al = Ah-1. */
+    if (cinfo->Al != cinfo->Ah-1)
+      bad = TRUE;
+  }
+  if (cinfo->Al > 13)		/* need not check for < 0 */
+    bad = TRUE;
+  /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
+   * but the spec doesn't say so, and we try to be liberal about what we
+   * accept.  Note: large Al values could result in out-of-range DC
+   * coefficients during early scans, leading to bizarre displays due to
+   * overflows in the IDCT math.  But we won't crash.
+   */
+  if (bad)
+    ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	     cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+  /* Update progression status, and verify that scan order is legal.
+   * Note that inter-scan inconsistencies are treated as warnings
+   * not fatal errors ... not clear if this is right way to behave.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    int cindex = cinfo->cur_comp_info[ci]->component_index;
+    coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+    if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+      WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+    for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+      int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+      if (cinfo->Ah != expected)
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+      coef_bit_ptr[coefi] = cinfo->Al;
+    }
+  }
+
+  /* Select MCU decoding routine */
+  if (cinfo->Ah == 0) {
+    if (is_DC_band)
+      entropy->pub.decode_mcu = decode_mcu_DC_first;
+    else
+      entropy->pub.decode_mcu = decode_mcu_AC_first;
+  } else {
+    if (is_DC_band)
+      entropy->pub.decode_mcu = decode_mcu_DC_refine;
+    else
+      entropy->pub.decode_mcu = decode_mcu_AC_refine;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* Make sure requested tables are present, and compute derived tables.
+     * We may build same derived table more than once, but it's not expensive.
+     */
+    if (is_DC_band) {
+      if (cinfo->Ah == 0) {	/* DC refinement needs no table */
+	tbl = compptr->dc_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+				& entropy->derived_tbls[tbl]);
+      }
+    } else {
+      tbl = compptr->ac_tbl_no;
+      jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+			      & entropy->derived_tbls[tbl]);
+      /* remember the single active table */
+      entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
+    }
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize private state variables */
+  entropy->saved.EOBRUN = 0;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+
+#else
+
+#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] =   /* entry n is 2**(n-1) */
+  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+
+static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
+  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
+    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
+    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
+    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+  /* Re-init EOB run count, too */
+  entropy->saved.EOBRUN = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Huffman MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * Huffman-compressed coefficients. 
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ *
+ * We return FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * spectral selection, since we'll just re-assign them on the next call.
+ * Successive approximation AC refinement has to be more careful, however.)
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Al = cinfo->Al;
+  register int s, r;
+  int blkn, ci;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  savable_state state;
+  d_derived_tbl * tbl;
+  jpeg_component_info * compptr;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      block = MCU_data[blkn];
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      /* Convert DC difference to actual value, update last_dc_val */
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
+      (*block)[0] = (JCOEF) (s << Al);
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state.
+     * We can avoid loading/saving bitread state if in an EOB run.
+     */
+    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+
+    if (EOBRUN > 0)		/* if it's a band of zeroes... */
+      EOBRUN--;			/* ...process it now (we do nothing) */
+    else {
+      BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+      block = MCU_data[0];
+      tbl = entropy->ac_derived_tbl;
+
+      for (k = cinfo->Ss; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	  /* Scale and output coefficient in natural (dezigzagged) order */
+	  (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al);
+	} else {
+	  if (r == 15) {	/* ZRL */
+	    k += 15;		/* skip 15 zeroes in band */
+	  } else {		/* EOBr, run length is 2^r + appended bits */
+	    EOBRUN = 1 << r;
+	    if (r) {		/* EOBr, r > 0 */
+	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    EOBRUN--;		/* this band is processed at this moment */
+	    break;		/* force end-of-band */
+	  }
+	}
+      }
+
+      BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    }
+
+    /* Completed MCU, so update state */
+    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int blkn;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* Not worth the cycles to check insufficient_data here,
+   * since we will not change the data anyway if we read zeroes.
+   */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
+    if (GET_BITS(1))
+      (*block)[0] |= p1;
+    /* Note: since we use |=, repeating the assignment later is safe */
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Se = cinfo->Se;
+  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+  register int s, k, r;
+  unsigned int EOBRUN;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+  int num_newnz;
+  int newnz_pos[DCTSIZE2];
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, don't modify the MCU.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+    block = MCU_data[0];
+    tbl = entropy->ac_derived_tbl;
+
+    /* If we are forced to suspend, we must undo the assignments to any newly
+     * nonzero coefficients in the block, because otherwise we'd get confused
+     * next time about which coefficients were already nonzero.
+     * But we need not undo addition of bits to already-nonzero coefficients;
+     * instead, we can test the current bit to see if we already did it.
+     */
+    num_newnz = 0;
+
+    /* initialize coefficient loop counter to start of band */
+    k = cinfo->Ss;
+
+    if (EOBRUN == 0) {
+      for (; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  if (s != 1)		/* size of new coef should always be 1 */
+	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1))
+	    s = p1;		/* newly nonzero coef is positive */
+	  else
+	    s = m1;		/* newly nonzero coef is negative */
+	} else {
+	  if (r != 15) {
+	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {
+	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    break;		/* rest of block is handled by EOB logic */
+	  }
+	  /* note s = 0 for processing ZRL */
+	}
+	/* Advance over already-nonzero coefs and r still-zero coefs,
+	 * appending correction bits to the nonzeroes.  A correction bit is 1
+	 * if the absolute value of the coefficient must be increased.
+	 */
+	do {
+	  thiscoef = *block + jpeg_natural_order[k];
+	  if (*thiscoef != 0) {
+	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	    if (GET_BITS(1)) {
+	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+		if (*thiscoef >= 0)
+		  *thiscoef += p1;
+		else
+		  *thiscoef += m1;
+	      }
+	    }
+	  } else {
+	    if (--r < 0)
+	      break;		/* reached target zero coefficient */
+	  }
+	  k++;
+	} while (k <= Se);
+	if (s) {
+	  int pos = jpeg_natural_order[k];
+	  /* Output newly nonzero coefficient */
+	  (*block)[pos] = (JCOEF) s;
+	  /* Remember its position in case we have to suspend */
+	  newnz_pos[num_newnz++] = pos;
+	}
+      }
+    }
+
+    if (EOBRUN > 0) {
+      /* Scan any remaining coefficient positions after the end-of-band
+       * (the last newly nonzero coefficient, if any).  Append a correction
+       * bit to each already-nonzero coefficient.  A correction bit is 1
+       * if the absolute value of the coefficient must be increased.
+       */
+      for (; k <= Se; k++) {
+	thiscoef = *block + jpeg_natural_order[k];
+	if (*thiscoef != 0) {
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1)) {
+	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+	      if (*thiscoef >= 0)
+		*thiscoef += p1;
+	      else
+		*thiscoef += m1;
+	    }
+	  }
+	}
+      }
+      /* Count one block completed in EOB run */
+      EOBRUN--;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+
+undoit:
+  /* Re-zero any output coefficients that we made newly nonzero */
+  while (num_newnz > 0)
+    (*block)[newnz_pos[--num_newnz]] = 0;
+
+  return FALSE;
+}
+
+/*
+ * Save the current Huffman decoder position and the bit buffer
+ * into bitstream_offset and get_buffer, respectively.
+ */
+GLOBAL(void)
+jpeg_get_huffman_decoder_configuration_progressive(j_decompress_ptr cinfo,
+        huffman_offset_data *offset)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+
+  if (cinfo->restart_interval) {
+    // We are at the end of a data segment
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return;
+  }
+
+  // Save restarts_to_go and next_restart_num.
+  offset->restarts_to_go = (unsigned short) entropy->restarts_to_go;
+  offset->next_restart_num = cinfo->marker->next_restart_num;
+
+  offset->bitstream_offset =
+      (jget_input_stream_position(cinfo) << LOG_TWO_BIT_BUF_SIZE)
+      + entropy->bitstate.bits_left;
+
+  offset->get_buffer = entropy->bitstate.get_buffer;
+}
+
+/*
+ * Save the current Huffman deocde position and the DC coefficients
+ * for each component into bitstream_offset and dc_info[], respectively.
+ */
+METHODDEF(void)
+get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+        huffman_offset_data *offset)
+{
+  int i;
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  jpeg_get_huffman_decoder_configuration_progressive(cinfo, offset);
+  offset->EOBRUN = entropy->saved.EOBRUN;
+  for (i = 0; i < cinfo->comps_in_scan; i++)
+    offset->prev_dc[i] = entropy->saved.last_dc_val[i];
+}
+
+/*
+ * Configure the Huffman decoder reader position and bit buffer.
+ */
+GLOBAL(void)
+jpeg_configure_huffman_decoder_progressive(j_decompress_ptr cinfo,
+        huffman_offset_data offset)
+{
+	phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+
+  // Restore restarts_to_go and next_restart_num
+  cinfo->unread_marker = 0;
+  entropy->restarts_to_go = offset.restarts_to_go;
+  cinfo->marker->next_restart_num = offset.next_restart_num;
+
+  unsigned int bitstream_offset = offset.bitstream_offset;
+  int blkn, i;
+
+  unsigned int byte_offset = bitstream_offset >> LOG_TWO_BIT_BUF_SIZE;
+  unsigned int bit_in_bit_buffer =
+      bitstream_offset & ((1 << LOG_TWO_BIT_BUF_SIZE) - 1);
+
+  jset_input_stream_position_bit(cinfo, byte_offset,
+          bit_in_bit_buffer, offset.get_buffer);
+}
+
+/*
+ * Configure the Huffman decoder to decode the image
+ * starting from (iMCU_row_offset, iMCU_col_offset).
+ */
+METHODDEF(void)
+configure_huffman_decoder(j_decompress_ptr cinfo, huffman_offset_data offset)
+{
+  int i;
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  jpeg_configure_huffman_decoder_progressive(cinfo, offset);
+  entropy->saved.EOBRUN = offset.EOBRUN;
+  for (i = 0; i < cinfo->comps_in_scan; i++)
+    entropy->saved.last_dc_val[i] = offset.prev_dc[i];
+}
+
+GLOBAL(void)
+jpeg_configure_huffman_index_scan(j_decompress_ptr cinfo,
+        huffman_index *index, int scan_no, int offset)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  if (scan_no >= index->scan_count) {
+    index->scan = realloc(index->scan,
+                    (scan_no + 1) * sizeof(huffman_scan_header));
+    index->mem_used += (scan_no - index->scan_count + 1)
+      * (sizeof(huffman_scan_header) + cinfo->total_iMCU_rows
+      * sizeof(huffman_offset_data*));
+    index->scan_count = scan_no + 1;
+  }
+  index->scan[scan_no].offset = (huffman_offset_data**)malloc(
+          cinfo->total_iMCU_rows * sizeof(huffman_offset_data*));
+  index->scan[scan_no].bitstream_offset = offset;
+}
+
+/*
+ * Module initialization routine for progressive Huffman entropy decoding.
+ */
+GLOBAL(void)
+jinit_phuff_decoder (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy;
+  int *coef_bit_ptr;
+  int ci, i;
+
+  entropy = (phuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(phuff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
+  entropy->pub.start_pass = start_pass_phuff_decoder;
+  entropy->pub.configure_huffman_decoder = configure_huffman_decoder;
+  entropy->pub.get_huffman_decoder_configuration =
+        get_huffman_decoder_configuration;
+
+  /* Mark derived tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+  }
+
+  /* Create progression status table */
+  cinfo->coef_bits = (int (*)[DCTSIZE2])
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				cinfo->num_components*DCTSIZE2*SIZEOF(int));
+  coef_bit_ptr = & cinfo->coef_bits[0][0];
+  for (ci = 0; ci < cinfo->num_components; ci++) 
+    for (i = 0; i < DCTSIZE2; i++)
+      *coef_bit_ptr++ = -1;
+}
+
+#endif /* D_PROGRESSIVE_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jdpostct.c b/src/main/jni/libjpeg/jdpostct.c
new file mode 100644
index 000000000..571563d72
--- /dev/null
+++ b/src/main/jni/libjpeg/jdpostct.c
@@ -0,0 +1,290 @@
+/*
+ * jdpostct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the decompression postprocessing controller.
+ * This controller manages the upsampling, color conversion, and color
+ * quantization/reduction steps; specifically, it controls the buffering
+ * between upsample/color conversion and color quantization/reduction.
+ *
+ * If no color quantization/reduction is required, then this module has no
+ * work to do, and it just hands off to the upsample/color conversion code.
+ * An integrated upsample/convert/quantize process would replace this module
+ * entirely.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_post_controller pub; /* public fields */
+
+  /* Color quantization source buffer: this holds output data from
+   * the upsample/color conversion step to be passed to the quantizer.
+   * For two-pass color quantization, we need a full-image buffer;
+   * for one-pass operation, a strip buffer is sufficient.
+   */
+  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
+  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;	/* buffer size in rows */
+  /* for two-pass mode only: */
+  JDIMENSION starting_row;	/* row # of first row in current strip */
+  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
+} my_post_controller;
+
+typedef my_post_controller * my_post_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) post_process_1pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) post_process_prepass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+METHODDEF(void) post_process_2pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->quantize_colors) {
+      /* Single-pass processing with color quantization. */
+      post->pub.post_process_data = post_process_1pass;
+      /* We could be doing buffered-image output before starting a 2-pass
+       * color quantization; in that case, jinit_d_post_controller did not
+       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
+       */
+      if (post->buffer == NULL) {
+	post->buffer = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, post->whole_image,
+	   (JDIMENSION) 0, post->strip_height, TRUE);
+      }
+    } else {
+      /* For single-pass processing without color quantization,
+       * I have no work to do; just call the upsampler directly.
+       */
+      post->pub.post_process_data = cinfo->upsample->upsample;
+    }
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    /* First pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_prepass;
+    break;
+  case JBUF_CRANK_DEST:
+    /* Second pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_2pass;
+    break;
+#endif /* QUANT_2PASS_SUPPORTED */
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+  post->starting_row = post->next_row = 0;
+}
+
+
+/*
+ * Process some data in the one-pass (strip buffer) case.
+ * This is used for color precision reduction as well as one-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_1pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Fill the buffer, but not more than what we can dump out in one go. */
+  /* Note we rely on the upsampler to detect bottom of image. */
+  max_rows = out_rows_avail - *out_row_ctr;
+  if (max_rows > post->strip_height)
+    max_rows = post->strip_height;
+  num_rows = 0;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &num_rows, max_rows);
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
+  *out_row_ctr += num_rows;
+}
+
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+/*
+ * Process some data in the first pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_prepass (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		      JDIMENSION in_row_groups_avail,
+		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		      JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION old_next_row, num_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, TRUE);
+  }
+
+  /* Upsample some data (up to a strip height's worth). */
+  old_next_row = post->next_row;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &post->next_row, post->strip_height);
+
+  /* Allow quantizer to scan new data.  No data is emitted, */
+  /* but we advance out_row_ctr so outer loop can tell when we're done. */
+  if (post->next_row > old_next_row) {
+    num_rows = post->next_row - old_next_row;
+    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
+					 (JSAMPARRAY) NULL, (int) num_rows);
+    *out_row_ctr += num_rows;
+  }
+
+  /* Advance if we filled the strip. */
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+
+/*
+ * Process some data in the second pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_2pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, FALSE);
+  }
+
+  /* Determine number of rows to emit. */
+  num_rows = post->strip_height - post->next_row; /* available in strip */
+  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+  /* We have to check bottom of image here, can't depend on upsampler. */
+  max_rows = cinfo->output_height - post->starting_row;
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer + post->next_row, output_buf + *out_row_ctr,
+		(int) num_rows);
+  *out_row_ctr += num_rows;
+
+  /* Advance if we filled the strip. */
+  post->next_row += num_rows;
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize postprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_post_ptr post;
+
+  post = (my_post_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_post_controller));
+  cinfo->post = (struct jpeg_d_post_controller *) post;
+  post->pub.start_pass = start_pass_dpost;
+  post->whole_image = NULL;	/* flag for no virtual arrays */
+  post->buffer = NULL;		/* flag for no strip buffer */
+
+  /* Create the quantization buffer, if needed */
+  if (cinfo->quantize_colors) {
+    /* The buffer strip height is max_v_samp_factor, which is typically
+     * an efficient number of rows for upsampling to return.
+     * (In the presence of output rescaling, we might want to be smarter?)
+     */
+    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
+    if (need_full_buffer) {
+      /* Two-pass color quantization: need full-image storage. */
+      /* We round up the number of rows to a multiple of the strip height. */
+#ifdef QUANT_2PASS_SUPPORTED
+      post->whole_image = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 (JDIMENSION) jround_up((long) cinfo->output_height,
+				(long) post->strip_height),
+	 post->strip_height);
+#else
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif /* QUANT_2PASS_SUPPORTED */
+    } else {
+      /* One-pass color quantization: just make a strip buffer. */
+      post->buffer = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 post->strip_height);
+    }
+  }
+}
diff --git a/src/main/jni/libjpeg/jdsample.c b/src/main/jni/libjpeg/jdsample.c
new file mode 100644
index 000000000..80ffefb2a
--- /dev/null
+++ b/src/main/jni/libjpeg/jdsample.c
@@ -0,0 +1,478 @@
+/*
+ * jdsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains upsampling routines.
+ *
+ * Upsampling input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  Upsampling will normally produce
+ * max_v_samp_factor pixel rows from each row group (but this could vary
+ * if the upsampler is applying a scale factor of its own).
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to upsample a single component */
+typedef JMETHOD(void, upsample1_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Color conversion buffer.  When using separate upsampling and color
+   * conversion steps, this buffer holds one upsampled row group until it
+   * has been color converted and output.
+   * Note: we do not allocate any storage for component(s) which are full-size,
+   * ie do not need rescaling.  The corresponding entry of color_buf[] is
+   * simply set to point to the input data array, thereby avoiding copying.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  /* Per-component upsampling method pointers */
+  upsample1_ptr methods[MAX_COMPONENTS];
+
+  int next_row_out;		/* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+
+  /* Height of an input row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_expand need not
+   * recompute them each time.  They are unused for other upsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the conversion buffer empty */
+  upsample->next_row_out = cinfo->max_v_samp_factor;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * In this version we upsample each component independently.
+ * We upsample one row group into the conversion buffer, then apply
+ * color conversion a row at a time.
+ */
+
+METHODDEF(void)
+sep_upsample (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	      JDIMENSION in_row_groups_avail,
+	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	      JDIMENSION out_rows_avail)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JDIMENSION num_rows;
+
+  /* Fill the conversion buffer, if it's empty */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      /* Invoke per-component upsample method.  Notice we pass a POINTER
+       * to color_buf[ci], so that fullsize_upsample can change it.
+       */
+      (*upsample->methods[ci]) (cinfo, compptr,
+	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+	upsample->color_buf + ci);
+    }
+    upsample->next_row_out = 0;
+  }
+
+  /* Color-convert and emit rows */
+
+  /* How many we have in the buffer: */
+  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
+  /* Not more than the distance to the end of the image.  Need this test
+   * in case the image height is not a multiple of max_v_samp_factor:
+   */
+  if (num_rows > upsample->rows_to_go) 
+    num_rows = upsample->rows_to_go;
+  /* And not more than what the client can accept: */
+  out_rows_avail -= *out_row_ctr;
+  if (num_rows > out_rows_avail)
+    num_rows = out_rows_avail;
+
+  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
+				     (JDIMENSION) upsample->next_row_out,
+				     output_buf + *out_row_ctr,
+				     (int) num_rows);
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  upsample->next_row_out += num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
+    (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by sep_upsample to upsample pixel values
+ * of a single component.  One row group is processed per call.
+ */
+
+
+/*
+ * For full-size components, we just make color_buf[ci] point at the
+ * input buffer, and thus avoid copying any data.  Note that this is
+ * safe only because sep_upsample doesn't declare the input row group
+ * "consumed" until we are done color converting and emitting it.
+ */
+
+METHODDEF(void)
+fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = input_data;
+}
+
+
+/*
+ * This is a no-op version used for "uninteresting" components.
+ * These components will not be referenced by color conversion.
+ */
+
+METHODDEF(void)
+noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = NULL;	/* safety check */
+}
+
+
+/*
+ * This version handles any integral sampling ratios.
+ * This is not used for typical JPEG files, so it need not be fast.
+ * Nor, for that matter, is it particularly accurate: the algorithm is
+ * simple replication of the input pixel onto the corresponding output
+ * pixels.  The hi-falutin sampling literature refers to this as a
+ * "box filter".  A box filter tends to introduce visible artifacts,
+ * so if you are actually going to use 3:1 or 4:1 sampling ratios
+ * you would be well advised to improve this code.
+ */
+
+METHODDEF(void)
+int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  register int h;
+  JSAMPROW outend;
+  int h_expand, v_expand;
+  int inrow, outrow;
+
+  h_expand = upsample->h_expand[compptr->component_index];
+  v_expand = upsample->v_expand[compptr->component_index];
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    /* Generate one output row with proper horizontal expansion */
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      for (h = h_expand; h > 0; h--) {
+	*outptr++ = invalue;
+      }
+    }
+    /* Generate any additional output rows by duplicating the first one */
+    if (v_expand > 1) {
+      jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+			v_expand-1, cinfo->output_width);
+    }
+    inrow++;
+    outrow += v_expand;
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int inrow;
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    inptr = input_data[inrow];
+    outptr = output_data[inrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int inrow, outrow;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+    jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+		      1, cinfo->output_width);
+    inrow++;
+    outrow += 2;
+  }
+}
+
+
+/*
+ * Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+ *
+ * The upsampling algorithm is linear interpolation between pixel centers,
+ * also known as a "triangle filter".  This is a good compromise between
+ * speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+ * of the way between input pixel centers.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register int invalue;
+  register JDIMENSION colctr;
+  int inrow;
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    inptr = input_data[inrow];
+    outptr = output_data[inrow];
+    /* Special case for first column */
+    invalue = GETJSAMPLE(*inptr++);
+    *outptr++ = (JSAMPLE) invalue;
+    *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2);
+
+    for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
+      /* General case: 3/4 * nearer pixel + 1/4 * further pixel */
+      invalue = GETJSAMPLE(*inptr++) * 3;
+      *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2);
+      *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(*inptr) + 2) >> 2);
+    }
+
+    /* Special case for last column */
+    invalue = GETJSAMPLE(*inptr);
+    *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2);
+    *outptr++ = (JSAMPLE) invalue;
+  }
+}
+
+
+/*
+ * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * Again a triangle filter; see comments for h2v1 case, above.
+ *
+ * It is OK for us to reference the adjacent input rows because we demanded
+ * context from the main buffer controller (see initialization code).
+ */
+
+METHODDEF(void)
+h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr0, inptr1, outptr;
+#if BITS_IN_JSAMPLE == 8
+  register int thiscolsum, lastcolsum, nextcolsum;
+#else
+  register INT32 thiscolsum, lastcolsum, nextcolsum;
+#endif
+  register JDIMENSION colctr;
+  int inrow, outrow, v;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    for (v = 0; v < 2; v++) {
+      /* inptr0 points to nearest input row, inptr1 points to next nearest */
+      inptr0 = input_data[inrow];
+      if (v == 0)		/* next nearest is row above */
+	inptr1 = input_data[inrow-1];
+      else			/* next nearest is row below */
+	inptr1 = input_data[inrow+1];
+      outptr = output_data[outrow++];
+
+      /* Special case for first column */
+      thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 8) >> 4);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
+      lastcolsum = thiscolsum; thiscolsum = nextcolsum;
+
+      for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
+	/* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
+	/* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
+	nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
+	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
+	lastcolsum = thiscolsum; thiscolsum = nextcolsum;
+      }
+
+      /* Special case for last column */
+      *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 7) >> 4);
+    }
+    inrow++;
+  }
+}
+
+
+/*
+ * Module initialization routine for upsampling.
+ */
+
+GLOBAL(void)
+jinit_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean need_buffer, do_fancy;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *) upsample;
+  upsample->pub.start_pass = start_pass_upsample;
+  upsample->pub.upsample = sep_upsample;
+  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
+
+  if (cinfo->CCIR601_sampling)	/* this isn't supported */
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
+   * so don't ask for it.
+   */
+  do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1;
+
+  /* Verify we can handle the sampling factors, select per-component methods,
+   * and create storage as needed.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Compute size of an "input group" after IDCT scaling.  This many samples
+     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) /
+		 cinfo->min_DCT_scaled_size;
+    v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+		 cinfo->min_DCT_scaled_size;
+    h_out_group = cinfo->max_h_samp_factor;
+    v_out_group = cinfo->max_v_samp_factor;
+    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
+    need_buffer = TRUE;
+    if (! compptr->component_needed) {
+      /* Don't bother to upsample an uninteresting component. */
+      upsample->methods[ci] = noop_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group == h_out_group && v_in_group == v_out_group) {
+      /* Fullsize components can be processed without any work. */
+      upsample->methods[ci] = fullsize_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group == v_out_group) {
+      /* Special cases for 2h1v upsampling */
+      if (do_fancy && compptr->downsampled_width > 2)
+	upsample->methods[ci] = h2v1_fancy_upsample;
+      else
+	upsample->methods[ci] = h2v1_upsample;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group * 2 == v_out_group) {
+      /* Special cases for 2h2v upsampling */
+      if (do_fancy && compptr->downsampled_width > 2) {
+	upsample->methods[ci] = h2v2_fancy_upsample;
+	upsample->pub.need_context_rows = TRUE;
+      } else
+	upsample->methods[ci] = h2v2_upsample;
+    } else if ((h_out_group % h_in_group) == 0 &&
+	       (v_out_group % v_in_group) == 0) {
+      /* Generic integral-factors upsampling method */
+      upsample->methods[ci] = int_upsample;
+      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
+      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+    if (need_buffer) {
+      upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) jround_up((long) cinfo->output_width,
+				(long) cinfo->max_h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/src/main/jni/libjpeg/jdtrans.c b/src/main/jni/libjpeg/jdtrans.c
new file mode 100644
index 000000000..586909cb6
--- /dev/null
+++ b/src/main/jni/libjpeg/jdtrans.c
@@ -0,0 +1,270 @@
+/*
+ * jdtrans.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding decompression,
+ * that is, reading raw DCT coefficient arrays from an input JPEG file.
+ * The routines in jdapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Read the coefficient arrays from a JPEG file.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * The entire image is read into a set of virtual coefficient-block arrays,
+ * one per component.  The return value is a pointer to the array of
+ * virtual-array descriptors.  These can be manipulated directly via the
+ * JPEG memory manager, or handed off to jpeg_write_coefficients().
+ * To release the memory occupied by the virtual arrays, call
+ * jpeg_finish_decompress() when done with the data.
+ *
+ * An alternative usage is to simply obtain access to the coefficient arrays
+ * during a buffered-image-mode decompression operation.  This is allowed
+ * after any jpeg_finish_output() call.  The arrays can be accessed until
+ * jpeg_finish_decompress() is called.  (Note that any call to the library
+ * may reposition the arrays, so don't rely on access_virt_barray() results
+ * to stay valid across library calls.)
+ *
+ * Returns NULL if suspended.  This case need be checked only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jpeg_read_coefficients (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    /* Absorb whole file into the coef buffer */
+    for (;;) {
+      int retcode;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      retcode = (*cinfo->inputctl->consume_input) (cinfo);
+      if (retcode == JPEG_SUSPENDED)
+        return NULL;
+      if (retcode == JPEG_REACHED_EOI)
+        break;
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+        cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+        }
+      }
+    }
+    /* Set state so that jpeg_finish_decompress does the right thing */
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return cinfo->coef->coef_arrays;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return NULL;			/* keep compiler happy */
+}
+
+LOCAL(boolean)
+jpeg_build_huffman_index_progressive(j_decompress_ptr cinfo,
+        huffman_index *index)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    printf("Progressive Mode\n");
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    int mcu, i;
+    cinfo->marker->get_sos_marker_position(cinfo, index);
+
+    /* Absorb whole file into the coef buffer */
+    for (mcu = 0; mcu < cinfo->total_iMCU_rows; mcu++) {
+      int retcode = 0;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      jinit_phuff_decoder(cinfo);
+      for (i = 0; i < index->scan_count; i++) {
+        (*cinfo->inputctl->finish_input_pass) (cinfo);
+        jset_input_stream_position(cinfo, index->scan[i].bitstream_offset);
+        cinfo->unread_marker = 0;
+        retcode = (*cinfo->inputctl->consume_input_build_huffman_index)
+                    (cinfo, index, i);
+        if (retcode == JPEG_REACHED_EOI)
+          break;
+        cinfo->input_iMCU_row = mcu;
+        if (mcu != 0)
+          (*cinfo->entropy->configure_huffman_decoder)
+                (cinfo, index->scan[i].prev_MCU_offset);
+        cinfo->input_scan_number = i;
+        retcode = (*cinfo->inputctl->consume_input_build_huffman_index)
+                    (cinfo, index, i);
+      }
+      if (retcode == JPEG_SUSPENDED)
+        return FALSE;
+      if (retcode == JPEG_REACHED_EOI)
+        break;
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+          cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+        }
+      }
+    }
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return TRUE;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return FALSE;			/* keep compiler happy */
+}
+
+LOCAL(boolean)
+jpeg_build_huffman_index_baseline(j_decompress_ptr cinfo, huffman_index *index)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    printf("Baseline Mode\n");
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    /* Absorb whole file into the coef buffer */
+    for (;;) {
+      int retcode;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      retcode = (*cinfo->inputctl->consume_input_build_huffman_index)
+                    (cinfo, index, 0);
+      if (retcode == JPEG_SUSPENDED)
+        return FALSE;
+      if (retcode == JPEG_REACHED_EOI)
+        break;
+      if (retcode == JPEG_SCAN_COMPLETED)
+        break;
+
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+        cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+        }
+      }
+    }
+    /* Set state so that jpeg_finish_decompress does the right thing */
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return TRUE;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return FALSE;			/* keep compiler happy */
+}
+
+GLOBAL(boolean)
+jpeg_build_huffman_index(j_decompress_ptr cinfo, huffman_index *index)
+{
+    cinfo->tile_decode = TRUE;
+    if (cinfo->progressive_mode)
+      return jpeg_build_huffman_index_progressive(cinfo, index);
+    else
+      return jpeg_build_huffman_index_baseline(cinfo, index);
+}
+
+/*
+ * Master selection of decompression modules for transcoding.
+ * This substitutes for jdmaster.c's initialization of the full decompressor.
+ */
+
+LOCAL(void)
+transdecode_master_selection (j_decompress_ptr cinfo)
+{
+  /* This is effectively a buffered-image operation. */
+  cinfo->buffered_image = TRUE;
+
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+      jinit_phuff_decoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else {
+      jinit_huff_decoder(cinfo);
+    }
+  }
+
+  /* Always get a full-image coefficient buffer. */
+  jinit_d_coef_controller(cinfo, TRUE);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+  /* Initialize progress monitoring. */
+  if (cinfo->progress != NULL) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else if (cinfo->inputctl->has_multiple_scans) {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    } else {
+      nscans = 1;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = 1;
+  }
+}
diff --git a/src/main/jni/libjpeg/jerror.c b/src/main/jni/libjpeg/jerror.c
new file mode 100644
index 000000000..3da7be86a
--- /dev/null
+++ b/src/main/jni/libjpeg/jerror.c
@@ -0,0 +1,252 @@
+/*
+ * jerror.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains simple error-reporting and trace-message routines.
+ * These are suitable for Unix-like systems and others where writing to
+ * stderr is the right thing to do.  Many applications will want to replace
+ * some or all of these routines.
+ *
+ * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
+ * you get a Windows-specific hack to display error messages in a dialog box.
+ * It ain't much, but it beats dropping error messages into the bit bucket,
+ * which is what happens to output to stderr under most Windows C compilers.
+ *
+ * These routines are used by both the compression and decompression code.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jversion.h"
+#include "jerror.h"
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+#include <windows.h>
+#endif
+
+#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+
+
+/*
+ * Create the message string table.
+ * We do this from the master message list in jerror.h by re-reading
+ * jerror.h with a suitable definition for macro JMESSAGE.
+ * The message table is made an external symbol just in case any applications
+ * want to refer to it directly.
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_message_table	jMsgTable
+#endif
+
+#define JMESSAGE(code,string)	string ,
+
+const char * const jpeg_std_message_table[] = {
+#include "jerror.h"
+  NULL
+};
+
+
+/*
+ * Error exit handler: must not return to caller.
+ *
+ * Applications may override this if they want to get control back after
+ * an error.  Typically one would longjmp somewhere instead of exiting.
+ * The setjmp buffer can be made a private field within an expanded error
+ * handler object.  Note that the info needed to generate an error message
+ * is stored in the error object, so you can generate the message now or
+ * later, at your convenience.
+ * You should make sure that the JPEG object is cleaned up (with jpeg_abort
+ * or jpeg_destroy) at some point.
+ */
+
+METHODDEF(void)
+error_exit (j_common_ptr cinfo)
+{
+  /* Always display the message */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Let the memory manager delete any temp files before we die */
+  jpeg_destroy(cinfo);
+
+  exit(EXIT_FAILURE);
+}
+
+
+/*
+ * Actual output of an error or trace message.
+ * Applications may override this method to send JPEG messages somewhere
+ * other than stderr.
+ *
+ * On Windows, printing to stderr is generally completely useless,
+ * so we provide optional code to produce an error-dialog popup.
+ * Most Windows applications will still prefer to override this routine,
+ * but if they don't, it'll do something at least marginally useful.
+ *
+ * NOTE: to use the library in an environment that doesn't support the
+ * C stdio library, you may have to delete the call to fprintf() entirely,
+ * not just not use this routine.
+ */
+
+METHODDEF(void)
+output_message (j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  /* Create the message */
+  (*cinfo->err->format_message) (cinfo, buffer);
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+  /* Display it in a message dialog box */
+  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
+	     MB_OK | MB_ICONERROR);
+#else
+  /* Send it to stderr, adding a newline */
+  fprintf(stderr, "%s\n", buffer);
+#endif
+}
+
+
+/*
+ * Decide whether to emit a trace or warning message.
+ * msg_level is one of:
+ *   -1: recoverable corrupt-data warning, may want to abort.
+ *    0: important advisory messages (always display to user).
+ *    1: first level of tracing detail.
+ *    2,3,...: successively more detailed tracing messages.
+ * An application might override this method if it wanted to abort on warnings
+ * or change the policy about which messages to display.
+ */
+
+METHODDEF(void)
+emit_message (j_common_ptr cinfo, int msg_level)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+
+  if (msg_level < 0) {
+    /* It's a warning message.  Since corrupt files may generate many warnings,
+     * the policy implemented here is to show only the first warning,
+     * unless trace_level >= 3.
+     */
+    if (err->num_warnings == 0 || err->trace_level >= 3)
+      (*err->output_message) (cinfo);
+    /* Always count warnings in num_warnings. */
+    err->num_warnings++;
+  } else {
+    /* It's a trace message.  Show it if trace_level >= msg_level. */
+    if (err->trace_level >= msg_level)
+      (*err->output_message) (cinfo);
+  }
+}
+
+
+/*
+ * Format a message string for the most recent JPEG error or message.
+ * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
+ * characters.  Note that no '\n' character is added to the string.
+ * Few applications should need to override this method.
+ */
+
+METHODDEF(void)
+format_message (j_common_ptr cinfo, char * buffer)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+  int msg_code = err->msg_code;
+  const char * msgtext = NULL;
+  const char * msgptr;
+  char ch;
+  boolean isstring;
+
+  /* Look up message string in proper table */
+  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
+    msgtext = err->jpeg_message_table[msg_code];
+  } else if (err->addon_message_table != NULL &&
+	     msg_code >= err->first_addon_message &&
+	     msg_code <= err->last_addon_message) {
+    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
+  }
+
+  /* Defend against bogus message number */
+  if (msgtext == NULL) {
+    err->msg_parm.i[0] = msg_code;
+    msgtext = err->jpeg_message_table[0];
+  }
+
+  /* Check for string parameter, as indicated by %s in the message text */
+  isstring = FALSE;
+  msgptr = msgtext;
+  while ((ch = *msgptr++) != '\0') {
+    if (ch == '%') {
+      if (*msgptr == 's') isstring = TRUE;
+      break;
+    }
+  }
+
+  /* Format the message into the passed buffer */
+  if (isstring)
+    sprintf(buffer, msgtext, err->msg_parm.s);
+  else
+    sprintf(buffer, msgtext,
+	    err->msg_parm.i[0], err->msg_parm.i[1],
+	    err->msg_parm.i[2], err->msg_parm.i[3],
+	    err->msg_parm.i[4], err->msg_parm.i[5],
+	    err->msg_parm.i[6], err->msg_parm.i[7]);
+}
+
+
+/*
+ * Reset error state variables at start of a new image.
+ * This is called during compression startup to reset trace/error
+ * processing to default state, without losing any application-specific
+ * method pointers.  An application might possibly want to override
+ * this method if it has additional error processing state.
+ */
+
+METHODDEF(void)
+reset_error_mgr (j_common_ptr cinfo)
+{
+  cinfo->err->num_warnings = 0;
+  /* trace_level is not reset since it is an application-supplied parameter */
+  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
+}
+
+
+/*
+ * Fill in the standard error-handling methods in a jpeg_error_mgr object.
+ * Typical call is:
+ *	struct jpeg_compress_struct cinfo;
+ *	struct jpeg_error_mgr err;
+ *
+ *	cinfo.err = jpeg_std_error(&err);
+ * after which the application may override some of the methods.
+ */
+
+GLOBAL(struct jpeg_error_mgr *)
+jpeg_std_error (struct jpeg_error_mgr * err)
+{
+  err->error_exit = error_exit;
+  err->emit_message = emit_message;
+  err->output_message = output_message;
+  err->format_message = format_message;
+  err->reset_error_mgr = reset_error_mgr;
+
+  err->trace_level = 0;		/* default = no tracing */
+  err->num_warnings = 0;	/* no warnings emitted yet */
+  err->msg_code = 0;		/* may be useful as a flag for "no error" */
+
+  /* Initialize message table pointers */
+  err->jpeg_message_table = jpeg_std_message_table;
+  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
+
+  err->addon_message_table = NULL;
+  err->first_addon_message = 0;	/* for safety */
+  err->last_addon_message = 0;
+
+  return err;
+}
diff --git a/src/main/jni/libjpeg/jerror.h b/src/main/jni/libjpeg/jerror.h
new file mode 100644
index 000000000..fc2fffeac
--- /dev/null
+++ b/src/main/jni/libjpeg/jerror.h
@@ -0,0 +1,291 @@
+/*
+ * jerror.h
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the error and message codes for the JPEG library.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ * A set of error-reporting macros are defined too.  Some applications using
+ * the JPEG library may wish to include this file to get the error codes
+ * and/or the macros.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef JERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code,string)
+#endif /* JERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code,string)	code ,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
+
+/* For maintenance convenience, list is alphabetical by message code name */
+JMESSAGE(JERR_ARITH_NOTIMPL,
+	 "Sorry, there are legal restrictions on arithmetic coding")
+JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
+JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
+JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
+JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
+JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
+JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
+JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
+JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
+JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
+JMESSAGE(JERR_BAD_LIB_VERSION,
+	 "Wrong JPEG library version: library is %d, caller expects %d")
+JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
+JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
+JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
+JMESSAGE(JERR_BAD_PROGRESSION,
+	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+JMESSAGE(JERR_BAD_PROG_SCRIPT,
+	 "Invalid progressive parameters at scan script entry %d")
+JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
+JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
+JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
+JMESSAGE(JERR_BAD_STRUCT_SIZE,
+	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
+JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
+JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
+JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
+JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
+JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
+JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
+JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
+JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
+JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
+JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
+JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
+JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
+JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
+JMESSAGE(JERR_FILE_READ, "Input file read error")
+JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
+JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
+JMESSAGE(JERR_HUFF_CLEN_OVERFLOW, "Huffman code size table overflow")
+JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
+JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
+JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
+JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
+JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
+	 "Cannot transcode due to multiple use of quantization table %d")
+JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
+JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
+JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
+JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
+JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
+JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
+JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
+JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
+JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
+JMESSAGE(JERR_QUANT_COMPONENTS,
+	 "Cannot quantize more than %d color components")
+JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
+JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
+JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
+JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
+JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
+JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
+JMESSAGE(JERR_SOS_NO_SOF, "Invalid JPEG file structure: SOS before SOF")
+JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
+JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
+JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
+JMESSAGE(JERR_TFILE_WRITE,
+	 "Write failed on temporary file --- out of disk space?")
+JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
+JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
+JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
+JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
+JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
+JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
+JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
+JMESSAGE(JMSG_VERSION, JVERSION)
+JMESSAGE(JTRC_16BIT_TABLES,
+	 "Caution: quantization tables are too coarse for baseline JPEG")
+JMESSAGE(JTRC_ADOBE,
+	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
+JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
+JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
+JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
+JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
+JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
+JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
+JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
+JMESSAGE(JTRC_EOI, "End Of Image")
+JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
+JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
+JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
+	 "Warning: thumbnail image size does not match data length %u")
+JMESSAGE(JTRC_JFIF_EXTENSION,
+	 "JFIF extension marker: type 0x%02x, length %u")
+JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
+JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
+JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
+JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
+JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
+JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
+JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
+JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
+JMESSAGE(JTRC_RST, "RST%d")
+JMESSAGE(JTRC_SMOOTH_NOTIMPL,
+	 "Smoothing not supported with nonstandard sampling ratios")
+JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
+JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
+JMESSAGE(JTRC_SOI, "Start of Image")
+JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
+JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
+JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
+JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
+JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
+JMESSAGE(JTRC_THUMB_JPEG,
+	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_PALETTE,
+	 "JFIF extension marker: palette thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_RGB,
+	 "JFIF extension marker: RGB thumbnail image, length %u")
+JMESSAGE(JTRC_UNKNOWN_IDS,
+	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
+JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
+JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
+JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+JMESSAGE(JWRN_BOGUS_PROGRESSION,
+	 "Inconsistent progression sequence for component %d coefficient %d")
+JMESSAGE(JWRN_EXTRANEOUS_DATA,
+	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
+JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
+JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
+JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
+JMESSAGE(JWRN_MUST_RESYNC,
+	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
+JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTMSGCODE
+} J_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
+
+
+#ifndef JERROR_H
+#define JERROR_H
+
+/* Macros to simplify using the error and trace message stuff */
+/* The first parameter is either type of cinfo pointer */
+
+/* Fatal errors (print message and exit) */
+#define ERREXIT(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT3(cinfo,code,p1,p2,p3)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXITS(cinfo,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+
+#define MAKESTMT(stuff)		do { stuff } while (0)
+
+/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
+#define WARNMS(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+
+/* Informational/debugging messages */
+#define TRACEMS(cinfo,lvl,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS1(cinfo,lvl,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMSS(cinfo,lvl,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+
+#endif /* JERROR_H */
diff --git a/src/main/jni/libjpeg/jfdctflt.c b/src/main/jni/libjpeg/jfdctflt.c
new file mode 100644
index 000000000..79d7a0078
--- /dev/null
+++ b/src/main/jni/libjpeg/jfdctflt.c
@@ -0,0 +1,168 @@
+/*
+ * jfdctflt.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * This implementation should be more accurate than either of the integer
+ * DCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_float (FAST_FLOAT * data)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
+  FAST_FLOAT *dataptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+    
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+    
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jfdctfst.c b/src/main/jni/libjpeg/jfdctfst.c
new file mode 100644
index 000000000..ccb378a3b
--- /dev/null
+++ b/src/main/jni/libjpeg/jfdctfst.c
@@ -0,0 +1,224 @@
+/*
+ * jfdctfst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
+#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
+#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
+#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_ifast (DCTELEM * data)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jfdctint.c b/src/main/jni/libjpeg/jfdctint.c
new file mode 100644
index 000000000..0a78b64ae
--- /dev/null
+++ b/src/main/jni/libjpeg/jfdctint.c
@@ -0,0 +1,283 @@
+/*
+ * jfdctint.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is INT32 anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_islow (DCTELEM * data)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3, z4, z5;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+				   CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+				   CONST_BITS-PASS1_BITS);
+    
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+    
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
+    
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+					   CONST_BITS+PASS1_BITS);
+    
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+    
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
+					   CONST_BITS+PASS1_BITS);
+    
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jidctflt.c b/src/main/jni/libjpeg/jidctflt.c
new file mode 100644
index 000000000..0188ce3df
--- /dev/null
+++ b/src/main/jni/libjpeg/jidctflt.c
@@ -0,0 +1,242 @@
+/*
+ * jidctflt.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * This implementation should be more accurate than either of the integer
+ * IDCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a float result.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  FLOAT_MULT_TYPE * quantptr;
+  FAST_FLOAT * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
+    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE*0] = tmp0 + tmp7;
+    wsptr[DCTSIZE*7] = tmp0 - tmp7;
+    wsptr[DCTSIZE*1] = tmp1 + tmp6;
+    wsptr[DCTSIZE*6] = tmp1 - tmp6;
+    wsptr[DCTSIZE*2] = tmp2 + tmp5;
+    wsptr[DCTSIZE*5] = tmp2 - tmp5;
+    wsptr[DCTSIZE*4] = tmp3 + tmp4;
+    wsptr[DCTSIZE*3] = tmp3 - tmp4;
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * And testing floats for zero is relatively expensive, so we don't bother.
+     */
+    
+    /* Even part */
+
+    tmp10 = wsptr[0] + wsptr[4];
+    tmp11 = wsptr[0] - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
+    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jidctfst.c b/src/main/jni/libjpeg/jidctfst.c
new file mode 100644
index 000000000..dba4216fb
--- /dev/null
+++ b/src/main/jni/libjpeg/jidctfst.c
@@ -0,0 +1,368 @@
+/*
+ * jidctfst.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jidctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * The dequantized coefficients are not integers because the AA&N scaling
+ * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
+ * so that the first and second IDCT rounds have the same input scaling.
+ * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
+ * avoid a descaling shift; this compromises accuracy rather drastically
+ * for small quantization table entries, but it saves a lot of shifts.
+ * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
+ * so we use a much larger scaling factor to preserve accuracy.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  8
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  8
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
+#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
+#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
+#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
+#else
+#define FIX_1_082392200  FIX(1.082392200)
+#define FIX_1_414213562  FIX(1.414213562)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_613125930  FIX(2.613125930)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
+ * multiplication will do.  For 12-bit data, the multiplier table is
+ * declared INT32, so a 32-bit multiply will be used.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
+#else
+#define DEQUANTIZE(coef,quantval)  \
+	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
+#endif
+
+
+/* Like DESCALE, but applies to a DCTELEM and produces an int.
+ * We assume that int right shift is unsigned if INT32 right shift is.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	DCTELEM ishift_temp;
+#if BITS_IN_JSAMPLE == 8
+#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
+#else
+#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
+#endif
+#define IRIGHT_SHIFT(x,shft)  \
+    ((ishift_temp = (x)) < 0 ? \
+     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
+     (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+#ifdef USE_ACCURATE_ROUNDING
+#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
+#else
+#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n))
+#endif
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  IFAST_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS			/* for DESCALE */
+  ISHIFT_TEMPS			/* for IDESCALE */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
+    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+    wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
+    wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+
+    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
+    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
+
+    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
+    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
+	    - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
+    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
+    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
+    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jidctint.c b/src/main/jni/libjpeg/jidctint.c
new file mode 100644
index 000000000..a72b3207c
--- /dev/null
+++ b/src/main/jni/libjpeg/jidctint.c
@@ -0,0 +1,389 @@
+/*
+ * jidctint.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate INT32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    
+    z1 = tmp0 + tmp3;
+    z2 = tmp1 + tmp2;
+    z3 = tmp0 + tmp2;
+    z4 = tmp1 + tmp3;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    tmp0 += z1 + z3;
+    tmp1 += z2 + z4;
+    tmp2 += z2 + z3;
+    tmp3 += z1 + z4;
+    
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+    
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    
+    tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
+    tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+    
+    z1 = tmp0 + tmp3;
+    z2 = tmp1 + tmp2;
+    z3 = tmp0 + tmp2;
+    z4 = tmp1 + tmp3;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    tmp0 += z1 + z3;
+    tmp1 += z2 + z4;
+    tmp2 += z2 + z3;
+    tmp3 += z1 + z4;
+    
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jidctred.c b/src/main/jni/libjpeg/jidctred.c
new file mode 100644
index 000000000..421f3c7ca
--- /dev/null
+++ b/src/main/jni/libjpeg/jidctred.c
@@ -0,0 +1,398 @@
+/*
+ * jidctred.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains inverse-DCT routines that produce reduced-size output:
+ * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.
+ *
+ * The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)
+ * algorithm used in jidctint.c.  We simply replace each 8-to-8 1-D IDCT step
+ * with an 8-to-4 step that produces the four averages of two adjacent outputs
+ * (or an 8-to-2 step producing two averages of four outputs, for 2x2 output).
+ * These steps were derived by computing the corresponding values at the end
+ * of the normal LL&M code, then simplifying as much as possible.
+ *
+ * 1x1 is trivial: just take the DC coefficient divided by 8.
+ *
+ * See jidctint.c for additional comments.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling is the same as in jidctint.c. */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_211164243  ((INT32)  1730)	/* FIX(0.211164243) */
+#define FIX_0_509795579  ((INT32)  4176)	/* FIX(0.509795579) */
+#define FIX_0_601344887  ((INT32)  4926)	/* FIX(0.601344887) */
+#define FIX_0_720959822  ((INT32)  5906)	/* FIX(0.720959822) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_850430095  ((INT32)  6967)	/* FIX(0.850430095) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_061594337  ((INT32)  8697)	/* FIX(1.061594337) */
+#define FIX_1_272758580  ((INT32)  10426)	/* FIX(1.272758580) */
+#define FIX_1_451774981  ((INT32)  11893)	/* FIX(1.451774981) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_2_172734803  ((INT32)  17799)	/* FIX(2.172734803) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_624509785  ((INT32)  29692)	/* FIX(3.624509785) */
+#else
+#define FIX_0_211164243  FIX(0.211164243)
+#define FIX_0_509795579  FIX(0.509795579)
+#define FIX_0_601344887  FIX(0.601344887)
+#define FIX_0_720959822  FIX(0.720959822)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_850430095  FIX(0.850430095)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_061594337  FIX(1.061594337)
+#define FIX_1_272758580  FIX(1.272758580)
+#define FIX_1_451774981  FIX(1.451774981)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_172734803  FIX(2.172734803)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_624509785  FIX(3.624509785)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 4x4 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
+    /* Don't bother to process column 4, because second pass won't use it */
+    if (ctr == DCTSIZE-4)
+      continue;
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 &&
+	inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero; we need not examine term 4 for 4x4 output */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      
+      continue;
+    }
+    
+    /* Even part */
+    
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= (CONST_BITS+1);
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865);
+    
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+    
+    /* Odd part */
+    
+    z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    
+    tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
+	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+    
+    tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
+	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+
+    /* Final output stage */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1);
+  }
+  
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* It's not clear whether a zero row test is worthwhile here ... */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+    
+    tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1);
+    
+    tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+	 + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
+    
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+    
+    /* Odd part */
+    
+    z1 = (INT32) wsptr[7];
+    z2 = (INT32) wsptr[5];
+    z3 = (INT32) wsptr[3];
+    z4 = (INT32) wsptr[1];
+    
+    tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
+	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+    
+    tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
+	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+
+    /* Final output stage */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 2x2 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp10, z1;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE*2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
+    /* Don't bother to process columns 2,4,6 */
+    if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6)
+      continue;
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      
+      continue;
+    }
+    
+    /* Even part */
+    
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 = z1 << (CONST_BITS+2);
+    
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp0 = MULTIPLY(z1, - FIX_0_720959822); /* sqrt(2) * (c7-c5+c3-c1) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp0 += MULTIPLY(z1, - FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+
+    /* Final output stage */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2);
+  }
+  
+  /* Pass 2: process 2 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* It's not clear whether a zero row test is worthwhile here ... */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+    
+    tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2);
+    
+    /* Odd part */
+
+    tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */
+	 + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
+	 + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
+	 + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+
+    /* Final output stage */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+					  CONST_BITS+PASS1_BITS+3+2)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0,
+					  CONST_BITS+PASS1_BITS+3+2)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 1x1 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  int dcval;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  SHIFT_TEMPS
+
+  /* We hardly need an inverse DCT routine for this: just take the
+   * average pixel value, which is one-eighth of the DC coefficient.
+   */
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
+  dcval = (int) DESCALE((INT32) dcval, 3);
+
+  output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jinclude.h b/src/main/jni/libjpeg/jinclude.h
new file mode 100644
index 000000000..0a4f15146
--- /dev/null
+++ b/src/main/jni/libjpeg/jinclude.h
@@ -0,0 +1,91 @@
+/*
+ * jinclude.h
+ *
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file exists to provide a single place to fix any problems with
+ * including the wrong system include files.  (Common problems are taken
+ * care of by the standard jconfig symbols, but on really weird systems
+ * you may have to edit this file.)
+ *
+ * NOTE: this file is NOT intended to be included by applications using the
+ * JPEG library.  Most applications need only include jpeglib.h.
+ */
+
+
+/* Include auto-config file to find out which system include files we need. */
+
+#include "jconfig.h"		/* auto configuration options */
+#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
+
+/*
+ * We need the NULL macro and size_t typedef.
+ * On an ANSI-conforming system it is sufficient to include <stddef.h>.
+ * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
+ * pull in <sys/types.h> as well.
+ * Note that the core JPEG library does not require <stdio.h>;
+ * only the default error handler and data source/destination modules do.
+ * But we must pull it in because of the references to FILE in jpeglib.h.
+ * You can remove those references if you want to compile without <stdio.h>.
+ */
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef NEED_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#include <stdio.h>
+
+/*
+ * We need memory copying and zeroing functions, plus strncpy().
+ * ANSI and System V implementations declare these in <string.h>.
+ * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
+ * Some systems may declare memset and memcpy in <memory.h>.
+ *
+ * NOTE: we assume the size parameters to these functions are of type size_t.
+ * Change the casts in these macros if not!
+ */
+
+#ifdef NEED_BSD_STRINGS
+
+#include <strings.h>
+#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
+#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+
+#else /* not BSD, assume ANSI/SysV string lib */
+
+#include <string.h>
+#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
+#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+
+#endif
+
+/*
+ * In ANSI C, and indeed any rational implementation, size_t is also the
+ * type returned by sizeof().  However, it seems there are some irrational
+ * implementations out there, in which sizeof() returns an int even though
+ * size_t is defined as long or unsigned long.  To ensure consistent results
+ * we always use this SIZEOF() macro in place of using sizeof() directly.
+ */
+
+#define SIZEOF(object)	((size_t) sizeof(object))
+
+/*
+ * The modules that use fread() and fwrite() always invoke them through
+ * these macros.  On some systems you may need to twiddle the argument casts.
+ * CAUTION: argument order is different from underlying functions!
+ */
+
+#define JFREAD(file,buf,sizeofbuf)  \
+  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFWRITE(file,buf,sizeofbuf)  \
+  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
diff --git a/src/main/jni/libjpeg/jmemmgr.c b/src/main/jni/libjpeg/jmemmgr.c
new file mode 100644
index 000000000..d801b322d
--- /dev/null
+++ b/src/main/jni/libjpeg/jmemmgr.c
@@ -0,0 +1,1118 @@
+/*
+ * jmemmgr.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the JPEG system-independent memory management
+ * routines.  This code is usable across a wide variety of machines; most
+ * of the system dependencies have been isolated in a separate file.
+ * The major functions provided here are:
+ *   * pool-based allocation and freeing of memory;
+ *   * policy decisions about how to divide available memory among the
+ *     virtual arrays;
+ *   * control logic for swapping virtual arrays between main memory and
+ *     backing storage.
+ * The separate system-dependent file provides the actual backing-storage
+ * access code, and it contains the policy decision about how much total
+ * main memory to use.
+ * This file is system-dependent in the sense that some of its functions
+ * are unnecessary in some systems.  For example, if there is enough virtual
+ * memory so that backing storage will never be used, much of the virtual
+ * array control logic could be removed.  (Of course, if you have that much
+ * memory then you shouldn't care about a little bit of unused code...)
+ */
+
+#define JPEG_INTERNALS
+#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef NO_GETENV
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
+extern char * getenv JPP((const char * name));
+#endif
+#endif
+
+
+/*
+ * Some important notes:
+ *   The allocation routines provided here must never return NULL.
+ *   They should exit to error_exit if unsuccessful.
+ *
+ *   It's not a good idea to try to merge the sarray and barray routines,
+ *   even though they are textually almost the same, because samples are
+ *   usually stored as bytes while coefficients are shorts or ints.  Thus,
+ *   in machines where byte pointers have a different representation from
+ *   word pointers, the resulting machine code could not be the same.
+ */
+
+
+/*
+ * Many machines require storage alignment: longs must start on 4-byte
+ * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
+ * always returns pointers that are multiples of the worst-case alignment
+ * requirement, and we had better do so too.
+ * There isn't any really portable way to determine the worst-case alignment
+ * requirement.  This module assumes that the alignment requirement is
+ * multiples of sizeof(ALIGN_TYPE).
+ * By default, we define ALIGN_TYPE as double.  This is necessary on some
+ * workstations (where doubles really do need 8-byte alignment) and will work
+ * fine on nearly everything.  If your machine has lesser alignment needs,
+ * you can save a few bytes by making ALIGN_TYPE smaller.
+ * The only place I know of where this will NOT work is certain Macintosh
+ * 680x0 compilers that define double as a 10-byte IEEE extended float.
+ * Doing 10-byte alignment is counterproductive because longwords won't be
+ * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
+ * such a compiler.
+ */
+
+#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
+#define ALIGN_TYPE  double
+#endif
+
+
+/*
+ * We allocate objects from "pools", where each pool is gotten with a single
+ * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
+ * overhead within a pool, except for alignment padding.  Each pool has a
+ * header with a link to the next pool of the same class.
+ * Small and large pool headers are identical except that the latter's
+ * link pointer must be FAR on 80x86 machines.
+ * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
+ * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
+ * of the alignment requirement of ALIGN_TYPE.
+ */
+
+typedef union small_pool_struct * small_pool_ptr;
+
+typedef union small_pool_struct {
+  struct {
+    small_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} small_pool_hdr;
+
+typedef union large_pool_struct FAR * large_pool_ptr;
+
+typedef union large_pool_struct {
+  struct {
+    large_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} large_pool_hdr;
+
+
+/*
+ * Here is the full definition of a memory manager object.
+ */
+
+typedef struct {
+  struct jpeg_memory_mgr pub;	/* public fields */
+
+  /* Each pool identifier (lifetime class) names a linked list of pools. */
+  small_pool_ptr small_list[JPOOL_NUMPOOLS];
+  large_pool_ptr large_list[JPOOL_NUMPOOLS];
+
+  /* Since we only have one lifetime class of virtual arrays, only one
+   * linked list is necessary (for each datatype).  Note that the virtual
+   * array control blocks being linked together are actually stored somewhere
+   * in the small-pool list.
+   */
+  jvirt_sarray_ptr virt_sarray_list;
+  jvirt_barray_ptr virt_barray_list;
+
+  /* This counts total space obtained from jpeg_get_small/large */
+  long total_space_allocated;
+
+  /* alloc_sarray and alloc_barray set this value for use by virtual
+   * array routines.
+   */
+  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
+} my_memory_mgr;
+
+typedef my_memory_mgr * my_mem_ptr;
+
+
+/*
+ * The control blocks for virtual arrays.
+ * Note that these blocks are allocated in the "small" pool area.
+ * System-dependent info for the associated backing store (if any) is hidden
+ * inside the backing_store_info struct.
+ */
+
+struct jvirt_sarray_control {
+  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+struct jvirt_barray_control {
+  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_barray_ptr next;	/* link to next virtual barray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+
+#ifdef MEM_STATS		/* optional extra stuff for statistics */
+
+LOCAL(void)
+print_mem_stats (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+
+  /* Since this is only a debugging stub, we can cheat a little by using
+   * fprintf directly rather than going through the trace message code.
+   * This is helpful because message parm array can't handle longs.
+   */
+  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
+	  pool_id, mem->total_space_allocated);
+
+  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
+       lhdr_ptr = lhdr_ptr->hdr.next) {
+    fprintf(stderr, "  Large chunk used %ld\n",
+	    (long) lhdr_ptr->hdr.bytes_used);
+  }
+
+  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
+       shdr_ptr = shdr_ptr->hdr.next) {
+    fprintf(stderr, "  Small chunk used %ld free %ld\n",
+	    (long) shdr_ptr->hdr.bytes_used,
+	    (long) shdr_ptr->hdr.bytes_left);
+  }
+}
+
+#endif /* MEM_STATS */
+
+
+LOCAL(void)
+out_of_memory (j_common_ptr cinfo, int which)
+/* Report an out-of-memory error and stop execution */
+/* If we compiled MEM_STATS support, report alloc requests before dying */
+{
+#ifdef MEM_STATS
+  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
+#endif
+  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
+}
+
+
+/*
+ * Allocation of "small" objects.
+ *
+ * For these, we use pooled storage.  When a new pool must be created,
+ * we try to get enough space for the current request plus a "slop" factor,
+ * where the slop will be the amount of leftover space in the new pool.
+ * The speed vs. space tradeoff is largely determined by the slop values.
+ * A different slop value is provided for each pool class (lifetime),
+ * and we also distinguish the first pool of a class from later ones.
+ * NOTE: the values given work fairly well on both 16- and 32-bit-int
+ * machines, but may be too small if longs are 64 bits or more.
+ */
+
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	1600,			/* first PERMANENT pool */
+	16000			/* first IMAGE pool */
+};
+
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	0,			/* additional PERMANENT pools */
+	5000			/* additional IMAGE pools */
+};
+
+#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
+
+
+METHODDEF(void *)
+alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "small" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr hdr_ptr, prev_hdr_ptr;
+  char * data_ptr;
+  size_t odd_bytes, min_request, slop;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
+    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* See if space is available in any existing pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+  prev_hdr_ptr = NULL;
+  hdr_ptr = mem->small_list[pool_id];
+  while (hdr_ptr != NULL) {
+    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
+      break;			/* found pool with enough space */
+    prev_hdr_ptr = hdr_ptr;
+    hdr_ptr = hdr_ptr->hdr.next;
+  }
+
+  /* Time to make a new pool? */
+  if (hdr_ptr == NULL) {
+    /* min_request is what we need now, slop is what will be leftover */
+    min_request = sizeofobject + SIZEOF(small_pool_hdr);
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      slop = first_pool_slop[pool_id];
+    else
+      slop = extra_pool_slop[pool_id];
+    /* Don't ask for more than MAX_ALLOC_CHUNK */
+    if (slop > (size_t) (MAX_ALLOC_CHUNK-min_request))
+      slop = (size_t) (MAX_ALLOC_CHUNK-min_request);
+    /* Try to get space, if fail reduce slop and try again */
+    for (;;) {
+      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
+      if (hdr_ptr != NULL)
+	break;
+      slop /= 2;
+      if (slop < MIN_SLOP)	/* give up when it gets real small */
+	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+    }
+    mem->total_space_allocated += min_request + slop;
+    /* Success, initialize the new pool header and add to end of list */
+    hdr_ptr->hdr.next = NULL;
+    hdr_ptr->hdr.bytes_used = 0;
+    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      mem->small_list[pool_id] = hdr_ptr;
+    else
+      prev_hdr_ptr->hdr.next = hdr_ptr;
+  }
+
+  /* OK, allocate the object from the current pool */
+  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
+  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
+  hdr_ptr->hdr.bytes_used += sizeofobject;
+  hdr_ptr->hdr.bytes_left -= sizeofobject;
+
+  return (void *) data_ptr;
+}
+
+
+/*
+ * Allocation of "large" objects.
+ *
+ * The external semantics of these are the same as "small" objects,
+ * except that FAR pointers are used on 80x86.  However the pool
+ * management heuristics are quite different.  We assume that each
+ * request is large enough that it may as well be passed directly to
+ * jpeg_get_large; the pool management just links everything together
+ * so that we can free it all on demand.
+ * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
+ * structures.  The routines that create these structures (see below)
+ * deliberately bunch rows together to ensure a large request size.
+ */
+
+METHODDEF(void FAR *)
+alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "large" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  large_pool_ptr hdr_ptr;
+  size_t odd_bytes;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
+    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* Always make a new pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
+					    SIZEOF(large_pool_hdr));
+  if (hdr_ptr == NULL)
+    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
+  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
+
+  /* Success, initialize the new pool header and add to list */
+  hdr_ptr->hdr.next = mem->large_list[pool_id];
+  /* We maintain space counts in each pool header for statistical purposes,
+   * even though they are not needed for allocation.
+   */
+  hdr_ptr->hdr.bytes_used = sizeofobject;
+  hdr_ptr->hdr.bytes_left = 0;
+  mem->large_list[pool_id] = hdr_ptr;
+
+  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
+}
+
+
+/*
+ * Creation of 2-D sample arrays.
+ * The pointers are in near heap, the samples themselves in FAR heap.
+ *
+ * To minimize allocation overhead and to allow I/O of large contiguous
+ * blocks, we allocate the sample rows in groups of as many rows as possible
+ * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
+ * NB: the virtual array control routines, later in this file, know about
+ * this chunking of rows.  The rowsperchunk value is left in the mem manager
+ * object so that it can be saved away if this sarray is the workspace for
+ * a virtual array.
+ */
+
+METHODDEF(JSAMPARRAY)
+alloc_sarray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION samplesperrow, JDIMENSION numrows)
+/* Allocate a 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JSAMPARRAY result;
+  JSAMPROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) samplesperrow * SIZEOF(JSAMPLE));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
+				    (size_t) (numrows * SIZEOF(JSAMPROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
+		  * SIZEOF(JSAMPLE)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += samplesperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * Creation of 2-D coefficient-block arrays.
+ * This is essentially the same as the code for sample arrays, above.
+ */
+
+METHODDEF(JBLOCKARRAY)
+alloc_barray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION blocksperrow, JDIMENSION numrows)
+/* Allocate a 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JBLOCKARRAY result;
+  JBLOCKROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) blocksperrow * SIZEOF(JBLOCK));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
+				     (size_t) (numrows * SIZEOF(JBLOCKROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
+		  * SIZEOF(JBLOCK)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += blocksperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * About virtual array management:
+ *
+ * The above "normal" array routines are only used to allocate strip buffers
+ * (as wide as the image, but just a few rows high).  Full-image-sized buffers
+ * are handled as "virtual" arrays.  The array is still accessed a strip at a
+ * time, but the memory manager must save the whole array for repeated
+ * accesses.  The intended implementation is that there is a strip buffer in
+ * memory (as high as is possible given the desired memory limit), plus a
+ * backing file that holds the rest of the array.
+ *
+ * The request_virt_array routines are told the total size of the image and
+ * the maximum number of rows that will be accessed at once.  The in-memory
+ * buffer must be at least as large as the maxaccess value.
+ *
+ * The request routines create control blocks but not the in-memory buffers.
+ * That is postponed until realize_virt_arrays is called.  At that time the
+ * total amount of space needed is known (approximately, anyway), so free
+ * memory can be divided up fairly.
+ *
+ * The access_virt_array routines are responsible for making a specific strip
+ * area accessible (after reading or writing the backing file, if necessary).
+ * Note that the access routines are told whether the caller intends to modify
+ * the accessed strip; during a read-only pass this saves having to rewrite
+ * data to disk.  The access routines are also responsible for pre-zeroing
+ * any newly accessed rows, if pre-zeroing was requested.
+ *
+ * In current usage, the access requests are usually for nonoverlapping
+ * strips; that is, successive access start_row numbers differ by exactly
+ * num_rows = maxaccess.  This means we can get good performance with simple
+ * buffer dump/reload logic, by making the in-memory buffer be a multiple
+ * of the access height; then there will never be accesses across bufferload
+ * boundaries.  The code will still work with overlapping access requests,
+ * but it doesn't handle bufferload overlaps very efficiently.
+ */
+
+
+METHODDEF(jvirt_sarray_ptr)
+request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION samplesperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_sarray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_sarray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->samplesperrow = samplesperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
+  mem->virt_sarray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(jvirt_barray_ptr)
+request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION blocksperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_barray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_barray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->blocksperrow = blocksperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
+  mem->virt_barray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(void)
+realize_virt_arrays (j_common_ptr cinfo)
+/* Allocate the in-memory buffers for any unrealized virtual arrays */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  long space_per_minheight, maximum_space, avail_mem;
+  long minheights, max_minheights;
+  jvirt_sarray_ptr sptr;
+  jvirt_barray_ptr bptr;
+
+  /* Compute the minimum space needed (maxaccess rows in each buffer)
+   * and the maximum space needed (full image height in each buffer).
+   * These may be of use to the system-dependent jpeg_mem_available routine.
+   */
+  space_per_minheight = 0;
+  maximum_space = 0;
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) sptr->maxaccess *
+			     (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+      maximum_space += (long) sptr->rows_in_array *
+		       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+    }
+  }
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) bptr->maxaccess *
+			     (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+      maximum_space += (long) bptr->rows_in_array *
+		       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+    }
+  }
+
+  if (space_per_minheight <= 0)
+    return;			/* no unrealized arrays, no work */
+
+  /* Determine amount of memory to actually use; this is system-dependent. */
+  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
+				 mem->total_space_allocated);
+
+  /* If the maximum space needed is available, make all the buffers full
+   * height; otherwise parcel it out with the same number of minheights
+   * in each buffer.
+   */
+  if (avail_mem >= maximum_space)
+    max_minheights = 1000000000L;
+  else {
+    max_minheights = avail_mem / space_per_minheight;
+    /* If there doesn't seem to be enough space, try to get the minimum
+     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
+     */
+    if (max_minheights <= 0)
+      max_minheights = 1;
+  }
+
+  /* Allocate the in-memory buffers and initialize backing store as needed. */
+
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	sptr->rows_in_mem = sptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
+				(long) sptr->rows_in_array *
+				(long) sptr->samplesperrow *
+				(long) SIZEOF(JSAMPLE));
+	sptr->b_s_open = TRUE;
+      }
+      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
+				      sptr->samplesperrow, sptr->rows_in_mem);
+      sptr->rowsperchunk = mem->last_rowsperchunk;
+      sptr->cur_start_row = 0;
+      sptr->first_undef_row = 0;
+      sptr->dirty = FALSE;
+    }
+  }
+
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	bptr->rows_in_mem = bptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
+				(long) bptr->rows_in_array *
+				(long) bptr->blocksperrow *
+				(long) SIZEOF(JBLOCK));
+	bptr->b_s_open = TRUE;
+      }
+      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
+				      bptr->blocksperrow, bptr->rows_in_mem);
+      bptr->rowsperchunk = mem->last_rowsperchunk;
+      bptr->cur_start_row = 0;
+      bptr->first_undef_row = 0;
+      bptr->dirty = FALSE;
+    }
+  }
+}
+
+
+LOCAL(void)
+do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual sample array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+LOCAL(void)
+do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual coefficient-block array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+METHODDEF(JSAMPARRAY)
+access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual sample array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_sarray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_sarray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+METHODDEF(JBLOCKARRAY)
+access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual block array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_barray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_barray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+/*
+ * Release all objects belonging to a specified pool.
+ */
+
+METHODDEF(void)
+free_pool (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+  size_t space_freed;
+
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+#ifdef MEM_STATS
+  if (cinfo->err->trace_level > 1)
+    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
+#endif
+
+  /* If freeing IMAGE pool, close any virtual arrays first */
+  if (pool_id == JPOOL_IMAGE) {
+    jvirt_sarray_ptr sptr;
+    jvirt_barray_ptr bptr;
+
+    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+      if (sptr->b_s_open) {	/* there may be no backing store */
+	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
+      }
+    }
+    mem->virt_sarray_list = NULL;
+    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+      if (bptr->b_s_open) {	/* there may be no backing store */
+	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
+      }
+    }
+    mem->virt_barray_list = NULL;
+  }
+
+  /* Release large objects */
+  lhdr_ptr = mem->large_list[pool_id];
+  mem->large_list[pool_id] = NULL;
+
+  while (lhdr_ptr != NULL) {
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
+    space_freed = lhdr_ptr->hdr.bytes_used +
+		  lhdr_ptr->hdr.bytes_left +
+		  SIZEOF(large_pool_hdr);
+    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    lhdr_ptr = next_lhdr_ptr;
+  }
+
+  /* Release small objects */
+  shdr_ptr = mem->small_list[pool_id];
+  mem->small_list[pool_id] = NULL;
+
+  while (shdr_ptr != NULL) {
+    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
+    space_freed = shdr_ptr->hdr.bytes_used +
+		  shdr_ptr->hdr.bytes_left +
+		  SIZEOF(small_pool_hdr);
+    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    shdr_ptr = next_shdr_ptr;
+  }
+}
+
+
+/*
+ * Close up shop entirely.
+ * Note that this cannot be called unless cinfo->mem is non-NULL.
+ */
+
+METHODDEF(void)
+self_destruct (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Close all backing store, release all memory.
+   * Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    free_pool(cinfo, pool);
+  }
+
+  /* Release the memory manager control block too. */
+  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
+  cinfo->mem = NULL;		/* ensures I will be called only once */
+
+  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
+}
+
+
+/*
+ * Memory manager initialization.
+ * When this is called, only the error manager pointer is valid in cinfo!
+ */
+
+GLOBAL(void)
+jinit_memory_mgr (j_common_ptr cinfo)
+{
+  my_mem_ptr mem;
+  long max_to_use;
+  int pool;
+  size_t test_mac;
+
+  cinfo->mem = NULL;		/* for safety if init fails */
+
+  /* Check for configuration errors.
+   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * doesn't reflect any real hardware alignment requirement.
+   * The test is a little tricky: for X>0, X and X-1 have no one-bits
+   * in common if and only if X is a power of 2, ie has only one one-bit.
+   * Some compilers may give an "unreachable code" warning here; ignore it.
+   */
+  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
+  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
+   * a multiple of SIZEOF(ALIGN_TYPE).
+   * Again, an "unreachable code" warning may be ignored here.
+   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
+   */
+  test_mac = (size_t) MAX_ALLOC_CHUNK;
+  if ((long) test_mac != MAX_ALLOC_CHUNK ||
+      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
+
+  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
+
+  /* Attempt to allocate memory manager's control block */
+  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
+
+  if (mem == NULL) {
+    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
+  }
+
+  /* OK, fill in the method pointers */
+  mem->pub.alloc_small = alloc_small;
+  mem->pub.alloc_large = alloc_large;
+  mem->pub.alloc_sarray = alloc_sarray;
+  mem->pub.alloc_barray = alloc_barray;
+  mem->pub.request_virt_sarray = request_virt_sarray;
+  mem->pub.request_virt_barray = request_virt_barray;
+  mem->pub.realize_virt_arrays = realize_virt_arrays;
+  mem->pub.access_virt_sarray = access_virt_sarray;
+  mem->pub.access_virt_barray = access_virt_barray;
+  mem->pub.free_pool = free_pool;
+  mem->pub.self_destruct = self_destruct;
+
+  /* Make MAX_ALLOC_CHUNK accessible to other modules */
+  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
+
+  /* Initialize working state */
+  mem->pub.max_memory_to_use = max_to_use;
+
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    mem->small_list[pool] = NULL;
+    mem->large_list[pool] = NULL;
+  }
+  mem->virt_sarray_list = NULL;
+  mem->virt_barray_list = NULL;
+
+  mem->total_space_allocated = SIZEOF(my_memory_mgr);
+
+  /* Declare ourselves open for business */
+  cinfo->mem = & mem->pub;
+
+  /* Check for an environment variable JPEGMEM; if found, override the
+   * default max_memory setting from jpeg_mem_init.  Note that the
+   * surrounding application may again override this value.
+   * If your system doesn't support getenv(), define NO_GETENV to disable
+   * this feature.
+   */
+#ifndef NO_GETENV
+  { char * memenv;
+
+    if ((memenv = getenv("JPEGMEM")) != NULL) {
+      char ch = 'x';
+
+      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+	if (ch == 'm' || ch == 'M')
+	  max_to_use *= 1000L;
+	mem->pub.max_memory_to_use = max_to_use * 1000L;
+      }
+    }
+  }
+#endif
+
+}
diff --git a/src/main/jni/libjpeg/jmemnobs.c b/src/main/jni/libjpeg/jmemnobs.c
new file mode 100644
index 000000000..eb8c33772
--- /dev/null
+++ b/src/main/jni/libjpeg/jmemnobs.c
@@ -0,0 +1,109 @@
+/*
+ * jmemnobs.c
+ *
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides a really simple implementation of the system-
+ * dependent portion of the JPEG memory manager.  This implementation
+ * assumes that no backing-store files are needed: all required space
+ * can be obtained from malloc().
+ * This is very portable in the sense that it'll compile on almost anything,
+ * but you'd better have lots of main memory (or virtual memory) if you want
+ * to process big images.
+ * Note that the max_memory_to_use option is ignored by this implementation.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+
+/*
+ * Memory allocation and freeing are controlled by the regular library
+ * routines malloc() and free().
+ */
+
+GLOBAL(void *)
+jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * "Large" objects are treated the same as "small" ones.
+ * NB: although we include FAR keywords in the routine declarations,
+ * this file won't actually work in 80x86 small/medium model; at least,
+ * you probably won't be able to process useful-size images in only 64KB.
+ */
+
+GLOBAL(void FAR *)
+jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void FAR *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * This routine computes the total memory space available for allocation.
+ * Here we always say, "we got all you want bud!"
+ */
+
+GLOBAL(long)
+jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
+		    long max_bytes_needed, long already_allocated)
+{
+  return max_bytes_needed;
+}
+
+
+/*
+ * Backing store (temporary file) management.
+ * Since jpeg_mem_available always promised the moon,
+ * this should never be called and we can just error out.
+ */
+
+GLOBAL(void)
+jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+			 long total_bytes_needed)
+{
+  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
+}
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  Here, there isn't any.
+ */
+
+GLOBAL(long)
+jpeg_mem_init (j_common_ptr cinfo)
+{
+  return 0;			/* just set max_memory_to_use to 0 */
+}
+
+GLOBAL(void)
+jpeg_mem_term (j_common_ptr cinfo)
+{
+  /* no work */
+}
diff --git a/src/main/jni/libjpeg/jmemsys.h b/src/main/jni/libjpeg/jmemsys.h
new file mode 100644
index 000000000..2ed1c63a1
--- /dev/null
+++ b/src/main/jni/libjpeg/jmemsys.h
@@ -0,0 +1,204 @@
+/*
+ * jmemsys.h
+ *
+ * Copyright (C) 1992-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file defines the interface between the system-independent
+ * and system-dependent portions of the JPEG memory manager.  No other
+ * modules need include it.  (The system-independent portion is jmemmgr.c;
+ * there are several different versions of the system-dependent portion.)
+ *
+ * This file works as-is for the system-dependent memory managers supplied
+ * in the IJG distribution.  You may need to modify it if you write a
+ * custom memory manager.  If system-dependent changes are needed in
+ * this file, the best method is to #ifdef them based on a configuration
+ * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
+ * and USE_MAC_MEMMGR.
+ */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_get_small		jGetSmall
+#define jpeg_free_small		jFreeSmall
+#define jpeg_get_large		jGetLarge
+#define jpeg_free_large		jFreeLarge
+#define jpeg_mem_available	jMemAvail
+#define jpeg_open_backing_store	jOpenBackStore
+#define jpeg_mem_init		jMemInit
+#define jpeg_mem_term		jMemTerm
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/*
+ * These two functions are used to allocate and release small chunks of
+ * memory.  (Typically the total amount requested through jpeg_get_small is
+ * no more than 20K or so; this will be requested in chunks of a few K each.)
+ * Behavior should be the same as for the standard library functions malloc
+ * and free; in particular, jpeg_get_small must return NULL on failure.
+ * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
+ * size of the object being freed, just in case it's needed.
+ * On an 80x86 machine using small-data memory model, these manage near heap.
+ */
+
+EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
+EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
+				  size_t sizeofobject));
+
+/*
+ * These two functions are used to allocate and release large chunks of
+ * memory (up to the total free space designated by jpeg_mem_available).
+ * The interface is the same as above, except that on an 80x86 machine,
+ * far pointers are used.  On most other machines these are identical to
+ * the jpeg_get/free_small routines; but we keep them separate anyway,
+ * in case a different allocation strategy is desirable for large chunks.
+ */
+
+EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
+				       size_t sizeofobject));
+EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
+				  size_t sizeofobject));
+
+/*
+ * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
+ * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
+ * matter, but that case should never come into play).  This macro is needed
+ * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
+ * On those machines, we expect that jconfig.h will provide a proper value.
+ * On machines with 32-bit flat address spaces, any large constant may be used.
+ *
+ * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
+ * size_t and will be a multiple of sizeof(align_type).
+ */
+
+#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
+#define MAX_ALLOC_CHUNK  1000000000L
+#endif
+
+/*
+ * This routine computes the total space still available for allocation by
+ * jpeg_get_large.  If more space than this is needed, backing store will be
+ * used.  NOTE: any memory already allocated must not be counted.
+ *
+ * There is a minimum space requirement, corresponding to the minimum
+ * feasible buffer sizes; jmemmgr.c will request that much space even if
+ * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
+ * all working storage in memory, is also passed in case it is useful.
+ * Finally, the total space already allocated is passed.  If no better
+ * method is available, cinfo->mem->max_memory_to_use - already_allocated
+ * is often a suitable calculation.
+ *
+ * It is OK for jpeg_mem_available to underestimate the space available
+ * (that'll just lead to more backing-store access than is really necessary).
+ * However, an overestimate will lead to failure.  Hence it's wise to subtract
+ * a slop factor from the true available space.  5% should be enough.
+ *
+ * On machines with lots of virtual memory, any large constant may be returned.
+ * Conversely, zero may be returned to always use the minimum amount of memory.
+ */
+
+EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
+				     long min_bytes_needed,
+				     long max_bytes_needed,
+				     long already_allocated));
+
+
+/*
+ * This structure holds whatever state is needed to access a single
+ * backing-store object.  The read/write/close method pointers are called
+ * by jmemmgr.c to manipulate the backing-store object; all other fields
+ * are private to the system-dependent backing store routines.
+ */
+
+#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
+
+
+#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
+
+typedef unsigned short XMSH;	/* type of extended-memory handles */
+typedef unsigned short EMSH;	/* type of expanded-memory handles */
+
+typedef union {
+  short file_handle;		/* DOS file handle if it's a temp file */
+  XMSH xms_handle;		/* handle if it's a chunk of XMS */
+  EMSH ems_handle;		/* handle if it's a chunk of EMS */
+} handle_union;
+
+#endif /* USE_MSDOS_MEMMGR */
+
+#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
+#include <Files.h>
+#endif /* USE_MAC_MEMMGR */
+
+
+typedef struct backing_store_struct * backing_store_ptr;
+
+typedef struct backing_store_struct {
+  /* Methods for reading/writing/closing this backing-store object */
+  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
+				     backing_store_ptr info,
+				     void FAR * buffer_address,
+				     long file_offset, long byte_count));
+  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info,
+				      void FAR * buffer_address,
+				      long file_offset, long byte_count));
+  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info));
+
+  /* Private fields for system-dependent backing-store management */
+#ifdef USE_MSDOS_MEMMGR
+  /* For the MS-DOS manager (jmemdos.c), we need: */
+  handle_union handle;		/* reference to backing-store storage object */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+#ifdef USE_MAC_MEMMGR
+  /* For the Mac manager (jmemmac.c), we need: */
+  short temp_file;		/* file reference number to temp file */
+  FSSpec tempSpec;		/* the FSSpec for the temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+#ifdef USE_ANDROID_ASHMEM
+  short temp_file;		/* file reference number to temp file */
+  unsigned char* addr;  /* the memory address mapped to ashmem */
+  long size;            /* the requested ashmem size */
+#else
+  /* For a typical implementation with temp files, we need: */
+  FILE * temp_file;		/* stdio reference to temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
+#endif
+#endif
+#endif
+} backing_store_info;
+
+
+/*
+ * Initial opening of a backing-store object.  This must fill in the
+ * read/write/close pointers in the object.  The read/write routines
+ * may take an error exit if the specified maximum file size is exceeded.
+ * (If jpeg_mem_available always returns a large value, this routine can
+ * just take an error exit.)
+ */
+
+EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
+					  backing_store_ptr info,
+					  long total_bytes_needed));
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  jpeg_mem_init will be called before anything is
+ * allocated (and, therefore, nothing in cinfo is of use except the error
+ * manager pointer).  It should return a suitable default value for
+ * max_memory_to_use; this may subsequently be overridden by the surrounding
+ * application.  (Note that max_memory_to_use is only important if
+ * jpeg_mem_available chooses to consult it ... no one else will.)
+ * jpeg_mem_term may assume that all requested memory has been freed and that
+ * all opened backing-store objects have been closed.
+ */
+
+EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
diff --git a/src/main/jni/libjpeg/jmorecfg.h b/src/main/jni/libjpeg/jmorecfg.h
new file mode 100644
index 000000000..66116dbf7
--- /dev/null
+++ b/src/main/jni/libjpeg/jmorecfg.h
@@ -0,0 +1,395 @@
+/*
+ * jmorecfg.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains additional configuration options that customize the
+ * JPEG software for special applications or support machine-dependent
+ * optimizations.  Most users will not need to touch this file.
+ */
+
+/*
+ * Define ANDROID_RGB to enable specific optimizations for Android
+ *   JCS_RGBA_8888 support
+ *   JCS_RGB_565 support
+ * 
+ */
+
+#define ANDROID_RGB
+
+#ifdef ANDROID_RGB
+#define PACK_SHORT_565(r,g,b)  ((((r)<<8)&0xf800)|(((g)<<3)&0x7E0)|((b)>>3))
+#define PACK_TWO_PIXELS(l,r)   ((r<<16) | l)
+#define PACK_NEED_ALIGNMENT(ptr) (((uintptr_t)(ptr))&3)
+#define WRITE_TWO_PIXELS(addr, pixels) do {     \
+         ((INT16*)(addr))[0] = (pixels);        \
+         ((INT16*)(addr))[1] = (pixels)>>16;    \
+    } while(0)
+#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels)  ((*(INT32*)(addr)) = pixels)
+#define DITHER_565_R(r, dither) ((r) + ((dither)&0xFF))
+#define DITHER_565_G(g, dither) ((g) + (((dither)&0xFF)>>1))
+#define DITHER_565_B(b, dither) ((b) + ((dither)&0xFF))
+#endif
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   12  for 12-bit sample values
+ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+ * JPEG standard, and the IJG code does not support anything else!
+ * We do not support run-time selection of data precision, sorry.
+ */
+
+#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
+
+
+/*
+ * Maximum number of components (color channels) allowed in JPEG image.
+ * To meet the letter of the JPEG spec, set this to 255.  However, darn
+ * few applications need more than 4 channels (maybe 5 for CMYK + alpha
+ * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
+ * really short on memory.  (Each allowed component costs a hundred or so
+ * bytes of storage, whether actually used in an image or not.)
+ */
+
+#define MAX_COMPONENTS  10	/* maximum number of image components */
+
+
+/*
+ * Basic data types.
+ * You may need to change these if you have a machine with unusual data
+ * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
+ * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
+ * but it had better be at least 16.
+ */
+
+/* Representation of a single sample (pixel element value).
+ * We frequently allocate large arrays of these, so it's important to keep
+ * them small.  But if you have memory to burn and access to char or short
+ * arrays is very slow on your hardware, you might want to change these.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+/* JSAMPLE should be the smallest type that will hold the values 0..255.
+ * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JSAMPLE;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJSAMPLE(value)  ((int) (value))
+#else
+#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+#define MAXJSAMPLE	255
+#define CENTERJSAMPLE	128
+
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE == 12
+/* JSAMPLE should be the smallest type that will hold the values 0..4095.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	4095
+#define CENTERJSAMPLE	2048
+
+#endif /* BITS_IN_JSAMPLE == 12 */
+
+
+/* Representation of a DCT frequency coefficient.
+ * This should be a signed value of at least 16 bits; "short" is usually OK.
+ * Again, we allocate large arrays of these, but you can change to int
+ * if you have memory to burn and "short" is really slow.
+ */
+
+typedef short JCOEF;
+
+
+/* Compressed datastreams are represented as arrays of JOCTET.
+ * These must be EXACTLY 8 bits wide, at least once they are written to
+ * external storage.  Note that when using the stdio data source/destination
+ * managers, this is also the data type passed to fread/fwrite.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JOCTET;
+#define GETJOCTET(value)  (value)
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JOCTET;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJOCTET(value)  (value)
+#else
+#define GETJOCTET(value)  ((value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+
+/* These typedefs are used for various table entries and so forth.
+ * They must be at least as wide as specified; but making them too big
+ * won't cost a huge amount of memory, so we don't provide special
+ * extraction code like we did for JSAMPLE.  (In other words, these
+ * typedefs live at a different point on the speed/space tradeoff curve.)
+ */
+
+/* UINT8 must hold at least the values 0..255. */
+
+#ifdef HAVE_UNSIGNED_CHAR
+typedef unsigned char UINT8;
+#else /* not HAVE_UNSIGNED_CHAR */
+#ifdef CHAR_IS_UNSIGNED
+typedef char UINT8;
+#else /* not CHAR_IS_UNSIGNED */
+typedef short UINT8;
+#endif /* CHAR_IS_UNSIGNED */
+#endif /* HAVE_UNSIGNED_CHAR */
+
+/* UINT16 must hold at least the values 0..65535. */
+
+#ifdef HAVE_UNSIGNED_SHORT
+typedef unsigned short UINT16;
+#else /* not HAVE_UNSIGNED_SHORT */
+typedef unsigned int UINT16;
+#endif /* HAVE_UNSIGNED_SHORT */
+
+/* INT16 must hold at least the values -32768..32767. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+typedef short INT16;
+#endif
+
+/* INT32 must hold at least signed 32-bit values. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+typedef long INT32;
+#endif
+
+/* Datatype used for image dimensions.  The JPEG standard only supports
+ * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
+ * "unsigned int" is sufficient on all machines.  However, if you need to
+ * handle larger images and you don't mind deviating from the spec, you
+ * can change this datatype.
+ */
+
+typedef unsigned int JDIMENSION;
+
+#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
+
+
+/* These macros are used in all function definitions and extern declarations.
+ * You could modify them if you need to change function linkage conventions;
+ * in particular, you'll need to do that to make the library a Windows DLL.
+ * Another application is to make all functions global for use with debuggers
+ * or code profilers that require it.
+ */
+
+/* a function called through method pointers: */
+#define METHODDEF(type)		static type
+/* a function used only in its module: */
+#define LOCAL(type)		static type
+/* a function referenced thru EXTERNs: */
+#define GLOBAL(type)		type
+/* a reference to a GLOBAL function: */
+#define EXTERN(type)		extern type
+
+
+/* This macro is used to declare a "method", that is, a function pointer.
+ * We want to supply prototype parameters if the compiler can cope.
+ * Note that the arglist parameter must be parenthesized!
+ * Again, you can customize this if you need special linkage keywords.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
+#else
+#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
+#endif
+
+
+/* Here is the pseudo-keyword for declaring pointers that must be "far"
+ * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
+ * by just saying "FAR *" where such a pointer is needed.  In a few places
+ * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
+ */
+
+#ifdef NEED_FAR_POINTERS
+#define FAR  far
+#else
+#define FAR
+#endif
+
+
+/*
+ * On a few systems, type boolean and/or its values FALSE, TRUE may appear
+ * in standard header files.  Or you may have conflicts with application-
+ * specific header files that you want to include together with these files.
+ * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
+ */
+
+#ifndef HAVE_BOOLEAN
+typedef int boolean;
+#endif
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
+
+
+/*
+ * The remaining options affect code selection within the JPEG library,
+ * but they don't need to be visible to most applications using the library.
+ * To minimize application namespace pollution, the symbols won't be
+ * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
+ */
+
+#ifdef JPEG_INTERNALS
+#define JPEG_INTERNAL_OPTIONS
+#endif
+
+#ifdef JPEG_INTERNAL_OPTIONS
+
+
+/*
+ * These defines indicate whether to include various optional functions.
+ * Undefining some of these symbols will produce a smaller but less capable
+ * library.  Note that you can leave certain source files out of the
+ * compilation/linking process if you've #undef'd the corresponding symbols.
+ * (You may HAVE to do that if your compiler doesn't like null source files.)
+ */
+
+/* Arithmetic coding is unsupported for legal reasons.  Complaints to IBM. */
+
+/* Capability options common to encoder and decoder: */
+
+#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
+#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
+#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
+
+/* Encoder capability options: */
+
+#undef  C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
+/* Note: if you selected 12-bit data precision, it is dangerous to turn off
+ * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
+ * precision, so jchuff.c normally uses entropy optimization to compute
+ * usable tables for higher precision.  If you don't want to do optimization,
+ * you'll have to supply different default Huffman tables.
+ * The exact same statements apply for progressive JPEG: the default tables
+ * don't work for progressive mode.  (This may get fixed, however.)
+ */
+#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
+
+/* Decoder capability options: */
+
+#undef  D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
+#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
+#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
+#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
+#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
+#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
+
+/* more capability options later, no doubt */
+
+
+/*
+ * Ordering of RGB data in scanlines passed to or from the application.
+ * If your application wants to deal with data in the order B,G,R, just
+ * change these macros.  You can also deal with formats such as R,G,B,X
+ * (one extra byte per pixel) by changing RGB_PIXELSIZE.  Note that changing
+ * the offsets will also change the order in which colormap data is organized.
+ * RESTRICTIONS:
+ * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
+ * 2. These macros only affect RGB<=>YCbCr color conversion, so they are not
+ *    useful if you are using JPEG color spaces other than YCbCr or grayscale.
+ * 3. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
+ *    is not 3 (they don't understand about dummy color components!).  So you
+ *    can't use color quantization if you change that value.
+ */
+
+#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
+#define RGB_GREEN	1	/* Offset of Green */
+#define RGB_BLUE	2	/* Offset of Blue */
+#ifdef ANDROID_RGB
+#define RGB_ALPHA   3   /* Offset of Alpha */
+#endif
+#define RGB_PIXELSIZE   3   /* JSAMPLEs per RGB scanline element */
+
+/* Definitions for speed-related optimizations. */
+
+
+/* If your compiler supports inline functions, define INLINE
+ * as the inline keyword; otherwise define it as empty.
+ */
+
+#ifndef INLINE
+#ifdef __GNUC__			/* for instance, GNU C knows about inline */
+#define INLINE __inline__
+#endif
+#ifndef INLINE
+#define INLINE			/* default is to define it as empty */
+#endif
+#endif
+
+
+/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
+ * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
+ * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
+ */
+
+#ifndef MULTIPLIER
+#ifdef ANDROID_INTELSSE2_IDCT
+  #define MULTIPLIER short
+#elif ANDROID_MIPS_IDCT
+  #define MULTIPLIER  short
+#elif NV_ARM_NEON
+  #define MULTIPLIER short
+#else
+  #define MULTIPLIER  int		/* type for fastest integer multiply */
+#endif
+#endif
+
+
+/* FAST_FLOAT should be either float or double, whichever is done faster
+ * by your compiler.  (Note that this type is only used in the floating point
+ * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
+ * Typically, float is faster in ANSI C compilers, while double is faster in
+ * pre-ANSI compilers (because they insist on converting to double anyway).
+ * The code below therefore chooses float if we have ANSI-style prototypes.
+ */
+
+#ifndef FAST_FLOAT
+#ifdef HAVE_PROTOTYPES
+#define FAST_FLOAT  float
+#else
+#define FAST_FLOAT  double
+#endif
+#endif
+
+#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/src/main/jni/libjpeg/jpegint.h b/src/main/jni/libjpeg/jpegint.h
new file mode 100644
index 000000000..3b5511e18
--- /dev/null
+++ b/src/main/jni/libjpeg/jpegint.h
@@ -0,0 +1,432 @@
+/*
+ * jpegint.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides common declarations for the various JPEG modules.
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+
+/* Declarations for both compression & decompression */
+
+typedef enum {			/* Operating modes for buffer controllers */
+	JBUF_PASS_THRU,		/* Plain stripwise operation */
+	/* Remaining modes require a full-image buffer to have been created */
+	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
+	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
+	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
+} J_BUF_MODE;
+
+/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
+#define CSTATE_START	100	/* after create_compress */
+#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
+#define DSTATE_START	200	/* after create_decompress */
+#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
+#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
+#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
+
+
+/* Declarations for compression modules */
+
+/* Master control module */
+struct jpeg_comp_master {
+  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean call_pass_startup;	/* True if pass_startup must be called */
+  boolean is_last_pass;		/* True during last pass */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_c_main_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_compress_ptr cinfo,
+			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			       JDIMENSION in_rows_avail));
+};
+
+/* Compression preprocessing (downsampling input buffer control) */
+struct jpeg_c_prep_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
+				   JSAMPARRAY input_buf,
+				   JDIMENSION *in_row_ctr,
+				   JDIMENSION in_rows_avail,
+				   JSAMPIMAGE output_buf,
+				   JDIMENSION *out_row_group_ctr,
+				   JDIMENSION out_row_groups_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_c_coef_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
+				   JSAMPIMAGE input_buf));
+};
+
+/* Colorspace conversion */
+struct jpeg_color_converter {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
+				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+				JDIMENSION output_row, int num_rows));
+};
+
+/* Downsampling */
+struct jpeg_downsampler {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, downsample, (j_compress_ptr cinfo,
+			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+			     JSAMPIMAGE output_buf,
+			     JDIMENSION out_row_group_index));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Forward DCT (also controls coefficient quantization) */
+struct jpeg_forward_dct {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  /* perhaps this should be an array??? */
+  JMETHOD(void, forward_DCT, (j_compress_ptr cinfo,
+			      jpeg_component_info * compptr,
+			      JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+			      JDIMENSION start_row, JDIMENSION start_col,
+			      JDIMENSION num_blocks));
+};
+
+/* Entropy encoding */
+struct jpeg_entropy_encoder {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
+  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+};
+
+/* Marker writing */
+struct jpeg_marker_writer {
+  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
+  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
+  /* These routines are exported to allow insertion of extra markers */
+  /* Probably only COM and APPn markers should be written this way */
+  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
+				      unsigned int datalen));
+  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
+};
+
+
+/* Declarations for decompression modules */
+
+/* Master control module */
+struct jpeg_decomp_master {
+  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
+};
+
+/* Input control module */
+struct jpeg_input_controller {
+  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_input_build_huffman_index, (j_decompress_ptr cinfo,
+                    huffman_index *index, int scan_count));
+  JMETHOD(int, consume_markers, (j_decompress_ptr cinfo,
+                    huffman_index *index, int scan_count));
+  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean has_multiple_scans;	/* True if file has multiple scans */
+  boolean eoi_reached;		/* True when EOI has been consumed */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_d_main_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
+			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			       JDIMENSION out_rows_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_d_coef_controller {
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_data_build_huffman_index, (j_decompress_ptr cinfo,
+                    huffman_index* index, int scan_count));
+  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
+				 JSAMPIMAGE output_buf));
+  /* Pointer to array of coefficient virtual arrays, or NULL if none */
+  jvirt_barray_ptr *coef_arrays;
+
+  /* column number of the first and last tile, respectively */
+  int column_left_boundary;
+  int column_right_boundary;
+
+  /* column number of the first and last MCU, respectively */
+  int MCU_column_left_boundary;
+  int MCU_column_right_boundary;
+
+  /* the number of MCU columns to skip from the indexed MCU, iM,
+   * to the requested MCU boundary, rM, where iM is the MCU that we sample
+   * into our index and is the nearest one to the left of rM.
+   */
+  int MCU_columns_to_skip;
+};
+
+/* Decompression postprocessing (color quantization buffer control) */
+struct jpeg_d_post_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
+				    JSAMPIMAGE input_buf,
+				    JDIMENSION *in_row_group_ctr,
+				    JDIMENSION in_row_groups_avail,
+				    JSAMPARRAY output_buf,
+				    JDIMENSION *out_row_ctr,
+				    JDIMENSION out_rows_avail));
+};
+
+/* Marker reading & parsing */
+struct jpeg_marker_reader {
+  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
+  /* Read markers until SOS or EOI.
+   * Returns same codes as are defined for jpeg_consume_input:
+   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+   */
+  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
+  JMETHOD(void, get_sos_marker_position, (j_decompress_ptr cinfo,
+                    huffman_index *index));
+  /* Read a restart marker --- exported for use by entropy decoder only */
+  jpeg_marker_parser_method read_restart_marker;
+
+  /* State of marker reader --- nominally internal, but applications
+   * supplying COM or APPn handlers might like to know the state.
+   */
+  boolean saw_SOI;		/* found SOI? */
+  boolean saw_SOF;		/* found SOF? */
+  int next_restart_num;		/* next restart number expected (0-7) */
+  int current_sos_marker_position;
+  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
+};
+
+/* Entropy decoding */
+struct jpeg_entropy_decoder {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
+				JBLOCKROW *MCU_data));
+  JMETHOD(boolean, decode_mcu_discard_coef, (j_decompress_ptr cinfo));
+  JMETHOD(void, configure_huffman_decoder, (j_decompress_ptr cinfo,
+                    huffman_offset_data offset));
+  JMETHOD(void, get_huffman_decoder_configuration, (j_decompress_ptr cinfo,
+                    huffman_offset_data *offset));
+
+  /* This is here to share code between baseline and progressive decoders; */
+  /* other modules probably should not use it */
+  boolean insufficient_data;	/* set TRUE after emitting warning */
+
+  huffman_index *index;
+};
+
+/* Inverse DCT (also performs dequantization) */
+typedef JMETHOD(void, inverse_DCT_method_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col));
+
+struct jpeg_inverse_dct {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  /* It is useful to allow each component to have a separate IDCT method. */
+  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
+};
+
+/* Upsampling (note that upsampler must also call color converter) */
+struct jpeg_upsampler {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf,
+			   JDIMENSION *in_row_group_ctr,
+			   JDIMENSION in_row_groups_avail,
+			   JSAMPARRAY output_buf,
+			   JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Colorspace conversion */
+struct jpeg_color_deconverter {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
+				JSAMPIMAGE input_buf, JDIMENSION input_row,
+				JSAMPARRAY output_buf, int num_rows));
+};
+
+/* Color quantization or color precision reduction */
+struct jpeg_color_quantizer {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
+  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
+				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
+				 int num_rows));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
+};
+
+
+/* Miscellaneous useful macros */
+
+#undef MAX
+#define MAX(a,b)	((a) > (b) ? (a) : (b))
+#undef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+
+
+/* We assume that right shift corresponds to signed division by 2 with
+ * rounding towards minus infinity.  This is correct for typical "arithmetic
+ * shift" instructions that shift in copies of the sign bit.  But some
+ * C compilers implement >> with an unsigned shift.  For these machines you
+ * must define RIGHT_SHIFT_IS_UNSIGNED.
+ * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
+ * It is only applied with constant shift counts.  SHIFT_TEMPS must be
+ * included in the variables of any routine using RIGHT_SHIFT.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define SHIFT_TEMPS	INT32 shift_temp;
+#define RIGHT_SHIFT(x,shft)  \
+	((shift_temp = (x)) < 0 ? \
+	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+	 (shift_temp >> (shft)))
+#else
+#define SHIFT_TEMPS
+#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jinit_compress_master	jICompress
+#define jinit_c_master_control	jICMaster
+#define jinit_c_main_controller	jICMainC
+#define jinit_c_prep_controller	jICPrepC
+#define jinit_c_coef_controller	jICCoefC
+#define jinit_color_converter	jICColor
+#define jinit_downsampler	jIDownsampler
+#define jinit_forward_dct	jIFDCT
+#define jinit_huff_encoder	jIHEncoder
+#define jinit_phuff_encoder	jIPHEncoder
+#define jinit_marker_writer	jIMWriter
+#define jinit_master_decompress	jIDMaster
+#define jinit_d_main_controller	jIDMainC
+#define jinit_d_coef_controller	jIDCoefC
+#define jinit_d_post_controller	jIDPostC
+#define jinit_input_controller	jIInCtlr
+#define jinit_marker_reader	jIMReader
+#define jinit_huff_decoder	jIHDecoder
+#define jinit_phuff_decoder	jIPHDecoder
+#define jinit_inverse_dct	jIIDCT
+#define jinit_upsampler		jIUpsampler
+#define jinit_color_deconverter	jIDColor
+#define jinit_1pass_quantizer	jI1Quant
+#define jinit_2pass_quantizer	jI2Quant
+#define jinit_merged_upsampler	jIMUpsampler
+#define jinit_memory_mgr	jIMemMgr
+#define jdiv_round_up		jDivRound
+#define jround_up		jRound
+#define jcopy_sample_rows	jCopySamples
+#define jcopy_block_row		jCopyBlocks
+#define jzero_far		jZeroFar
+#define jpeg_zigzag_order	jZIGTable
+#define jpeg_natural_order	jZAGTable
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Compression module initialization routines */
+EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
+					 boolean transcode_only));
+EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
+/* Decompression module initialization routines */
+EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_huff_decoder_no_data JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_decompress_per_scan_setup (j_decompress_ptr cinfo);
+/* Memory manager initialization */
+EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
+
+/* Utility routines in jutils.c */
+EXTERN(long) jdiv_round_up JPP((long a, long b));
+EXTERN(long) jround_up JPP((long a, long b));
+EXTERN(long) jmin JPP((long a, long b));
+EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
+				    JSAMPARRAY output_array, int dest_row,
+				    int num_rows, JDIMENSION num_cols));
+EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
+				  JDIMENSION num_blocks));
+EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
+
+EXTERN(void) jset_input_stream_position JPP((j_decompress_ptr cinfo,
+                    int offset));
+EXTERN(void) jset_input_stream_position_bit JPP((j_decompress_ptr cinfo,
+                    int byte_offset, int bit_left, INT32 buf));
+
+EXTERN(int) jget_input_stream_position JPP((j_decompress_ptr cinfo));
+/* Constant tables in jutils.c */
+#if 0				/* This table is not actually needed in v6a */
+extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
+#endif
+extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
+
+/* Suppress undefined-structure complaints if necessary. */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+#endif
+#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/src/main/jni/libjpeg/jpeglib.h b/src/main/jni/libjpeg/jpeglib.h
new file mode 100644
index 000000000..07e687294
--- /dev/null
+++ b/src/main/jni/libjpeg/jpeglib.h
@@ -0,0 +1,1184 @@
+/*
+ * jpeglib.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the application interface for the JPEG library.
+ * Most applications using the library need only include this file,
+ * and perhaps jerror.h if they want to know the exact error codes.
+ */
+
+#ifndef JPEGLIB_H
+#define JPEGLIB_H
+
+/*
+ * First we include the configuration files that record how this
+ * installation of the JPEG library is set up.  jconfig.h can be
+ * generated automatically for many systems.  jmorecfg.h contains
+ * manual configuration options that most people need not worry about.
+ */
+
+#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
+#include "jconfig.h"		/* widely used configuration options */
+#endif
+#include "jmorecfg.h"		/* seldom changed options */
+
+
+/* Version ID for the JPEG library.
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
+ */
+
+#define JPEG_LIB_VERSION  62	/* Version 6b */
+
+
+/* Various constants determining the sizes of things.
+ * All of these are specified by the JPEG standard, so don't change them
+ * if you want to be compatible.
+ */
+
+#define DCTSIZE		    8	/* The basic DCT block is 8x8 samples */
+#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
+/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
+ * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
+ * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
+ * to handle it.  We even let you do this from the jconfig.h file.  However,
+ * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
+ * sometimes emits noncompliant files doesn't mean you should too.
+ */
+#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
+#ifndef D_MAX_BLOCKS_IN_MCU
+#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
+#endif
+
+
+/* Data structures for images (arrays of samples and of DCT coefficients).
+ * On 80x86 machines, the image arrays are too big for near pointers,
+ * but the pointer arrays can fit in near memory.
+ */
+
+typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
+typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
+typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
+
+typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
+typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
+
+typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
+
+
+/* Types for JPEG compression parameters and working tables. */
+
+
+/* DCT coefficient quantization tables. */
+
+typedef struct {
+  /* This array gives the coefficient quantizers in natural array order
+   * (not the zigzag order in which they are stored in a JPEG DQT marker).
+   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
+   */
+  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JQUANT_TBL;
+
+
+/* Huffman coding tables. */
+
+typedef struct {
+  /* These two fields directly represent the contents of a JPEG DHT marker */
+  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
+				/* length k bits; bits[0] is unused */
+  UINT8 huffval[256];		/* The symbols, in order of incr code length */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JHUFF_TBL;
+
+
+/* Basic info about one component (color channel). */
+
+typedef struct {
+  /* These values are fixed over the whole image. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOF marker. */
+  int component_id;		/* identifier for this component (0..255) */
+  int component_index;		/* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;		/* horizontal sampling factor (1..4) */
+  int v_samp_factor;		/* vertical sampling factor (1..4) */
+  int quant_tbl_no;		/* quantization table selector (0..3) */
+  /* These values may vary between scans. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOS marker. */
+  /* The decompressor output side may not use these variables. */
+  int dc_tbl_no;		/* DC entropy table selector (0..3) */
+  int ac_tbl_no;		/* AC entropy table selector (0..3) */
+
+  /* Remaining fields should be treated as private by applications. */
+
+  /* These values are computed during compression or decompression startup: */
+  /* Component's size in DCT blocks.
+   * Any dummy blocks added to complete an MCU are not counted; therefore
+   * these values do not depend on whether a scan is interleaved or not.
+   */
+  JDIMENSION width_in_blocks;
+  JDIMENSION height_in_blocks;
+  /* Size of a DCT block in samples.  Always DCTSIZE for compression.
+   * For decompression this is the size of the output from one DCT block,
+   * reflecting any scaling we choose to apply during the IDCT step.
+   * Values of 1,2,4,8 are likely to be supported.  Note that different
+   * components may receive different IDCT scalings.
+   */
+  int DCT_scaled_size;
+  /* The downsampled dimensions are the component's actual, unpadded number
+   * of samples at the main buffer (preprocessing/compression interface), thus
+   * downsampled_width = ceil(image_width * Hi/Hmax)
+   * and similarly for height.  For decompression, IDCT scaling is included, so
+   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_scaled_size/DCTSIZE)
+   */
+  JDIMENSION downsampled_width;	 /* actual width in samples */
+  JDIMENSION downsampled_height; /* actual height in samples */
+  /* This flag is used only for decompression.  In cases where some of the
+   * components will be ignored (eg grayscale output from YCbCr image),
+   * we can skip most computations for the unused components.
+   */
+  boolean component_needed;	/* do we need the value of this component? */
+
+  /* These values are computed before starting a scan of the component. */
+  /* The decompressor output side may not use these variables. */
+  int MCU_width;		/* number of blocks per MCU, horizontally */
+  int MCU_height;		/* number of blocks per MCU, vertically */
+  int MCU_blocks;		/* MCU_width * MCU_height */
+  int MCU_sample_width;		/* MCU width in samples, MCU_width*DCT_scaled_size */
+  int last_col_width;		/* # of non-dummy blocks across in last MCU */
+  int last_row_height;		/* # of non-dummy blocks down in last MCU */
+
+  /* Saved quantization table for component; NULL if none yet saved.
+   * See jdinput.c comments about the need for this information.
+   * This field is currently used only for decompression.
+   */
+  JQUANT_TBL * quant_table;
+
+  /* Private per-component storage for DCT or IDCT subsystem. */
+  void * dct_table;
+} jpeg_component_info;
+
+
+/* The script for encoding a multiple-scan file is an array of these: */
+
+typedef struct {
+  int comps_in_scan;		/* number of components encoded in this scan */
+  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
+  int Ss, Se;			/* progressive JPEG spectral selection parms */
+  int Ah, Al;			/* progressive JPEG successive approx. parms */
+} jpeg_scan_info;
+
+/* The decompressor can save APPn and COM markers in a list of these: */
+
+typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
+
+struct jpeg_marker_struct {
+  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
+  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length;	/* # bytes of data in the file */
+  unsigned int data_length;	/* # bytes of data saved at data[] */
+  JOCTET FAR * data;		/* the data contained in the marker */
+  /* the marker length word is not counted in data_length or original_length */
+};
+
+/* Known color spaces. */
+
+typedef enum {
+	JCS_UNKNOWN,		/* error/unspecified */
+	JCS_GRAYSCALE,		/* monochrome */
+	JCS_RGB,		/* red/green/blue */
+	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
+	JCS_CMYK,		/* C/M/Y/K */
+	JCS_YCCK,		/* Y/Cb/Cr/K */
+#ifdef ANDROID_RGB
+    JCS_RGBA_8888,  /* red/green/blue/alpha */
+    JCS_RGB_565     /* red/green/blue in 565 format */
+#endif
+} J_COLOR_SPACE;
+
+/* DCT/IDCT algorithm options. */
+
+typedef enum {
+	JDCT_ISLOW,		/* slow but accurate integer algorithm */
+	JDCT_IFAST,		/* faster, less accurate integer method */
+	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
+} J_DCT_METHOD;
+
+#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
+#define JDCT_DEFAULT  JDCT_ISLOW
+#endif
+#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
+#define JDCT_FASTEST  JDCT_IFAST
+#endif
+
+/* Dithering options for decompression. */
+
+typedef enum {
+	JDITHER_NONE,		/* no dithering */
+	JDITHER_ORDERED,	/* simple ordered dither */
+	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
+} J_DITHER_MODE;
+
+
+/* Common fields between JPEG compression and decompression master structs. */
+
+#define jpeg_common_fields \
+  struct jpeg_error_mgr * err;	/* Error handler module */\
+  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
+  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
+  void * client_data;		/* Available for use by application */\
+  boolean is_decompressor;	/* So common code can tell which is which */\
+  int global_state		/* For checking call sequence validity */
+
+/* Routines that are to be used by both halves of the library are declared
+ * to receive a pointer to this structure.  There are no actual instances of
+ * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
+ */
+struct jpeg_common_struct {
+  jpeg_common_fields;		/* Fields common to both master struct types */
+  /* Additional fields follow in an actual jpeg_compress_struct or
+   * jpeg_decompress_struct.  All three structs must agree on these
+   * initial fields!  (This would be a lot cleaner in C++.)
+   */
+};
+
+typedef struct jpeg_common_struct * j_common_ptr;
+typedef struct jpeg_compress_struct * j_compress_ptr;
+typedef struct jpeg_decompress_struct * j_decompress_ptr;
+
+
+/* Master record for a compression instance */
+
+struct jpeg_compress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
+
+  /* Destination for compressed data */
+  struct jpeg_destination_mgr * dest;
+
+  /* Description of source image --- these fields must be filled in by
+   * outer application before starting compression.  in_color_space must
+   * be correct before you can even call jpeg_set_defaults().
+   */
+
+  JDIMENSION image_width;	/* input image width */
+  JDIMENSION image_height;	/* input image height */
+  int input_components;		/* # of color components in input image */
+  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
+
+  double input_gamma;		/* image gamma of input image */
+
+  /* Compression parameters --- these fields must be set before calling
+   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
+   * initialize everything to reasonable defaults, then changing anything
+   * the application specifically wants to change.  That way you won't get
+   * burnt when new parameters are added.  Also note that there are several
+   * helper routines to simplify changing parameters.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  int num_scans;		/* # of entries in scan_info array */
+  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
+  /* The default value of scan_info is NULL, which causes a single-scan
+   * sequential JPEG file to be emitted.  To create a multi-scan file,
+   * set num_scans and scan_info to point to an array of scan definitions.
+   */
+
+  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
+
+  /* The restart interval can be specified in absolute MCUs by setting
+   * restart_interval, or in MCU rows by setting restart_in_rows
+   * (in which case the correct restart_interval will be figured
+   * for each scan).
+   */
+  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
+  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
+
+  /* Parameters controlling emission of special markers. */
+
+  boolean write_JFIF_header;	/* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
+  UINT8 JFIF_minor_version;
+  /* These three values are not used by the JPEG code, merely copied */
+  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
+  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
+  /* ratio is defined by X_density/Y_density even when density_unit=0. */
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
+
+  /* State variable: index of next scanline to be written to
+   * jpeg_write_scanlines().  Application may use this to control its
+   * processing loop, e.g., "while (next_scanline < image_height)".
+   */
+
+  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
+
+  /* Remaining fields are known throughout compressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during compression startup
+   */
+  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
+  /* The coefficient controller receives data in units of MCU rows as defined
+   * for fully interleaved scans (whether the JPEG file is interleaved or not).
+   * There are v_samp_factor * DCTSIZE sample rows of each component in an
+   * "iMCU" (interleaved MCU) row.
+   */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /*
+   * Links to compression subobjects (methods and private variables of modules)
+   */
+  struct jpeg_comp_master * master;
+  struct jpeg_c_main_controller * main;
+  struct jpeg_c_prep_controller * prep;
+  struct jpeg_c_coef_controller * coef;
+  struct jpeg_marker_writer * marker;
+  struct jpeg_color_converter * cconvert;
+  struct jpeg_downsampler * downsample;
+  struct jpeg_forward_dct * fdct;
+  struct jpeg_entropy_encoder * entropy;
+  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
+  int script_space_size;
+};
+
+
+/* Master record for a decompression instance */
+
+struct jpeg_decompress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
+
+  /* Source of compressed data */
+  struct jpeg_source_mgr * src;
+
+  /* Basic description of image --- filled in by jpeg_read_header(). */
+  /* Application may inspect these values to decide how to process image. */
+
+  JDIMENSION original_image_width;	/* nominal image width (from SOF marker) */
+
+  JDIMENSION image_width;	/* nominal image width (from SOF marker)
+                               may be changed by tile decode */
+  JDIMENSION image_height;	/* nominal image height */
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  /* Decompression processing parameters --- these fields must be set before
+   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
+   * them to default values.
+   */
+
+  J_COLOR_SPACE out_color_space; /* colorspace for output */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  double output_gamma;		/* image gamma wanted in output */
+
+  boolean buffered_image;	/* TRUE=multiple output passes */
+  boolean raw_data_out;		/* TRUE=downsampled data wanted */
+
+  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
+  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
+
+  boolean quantize_colors;	/* TRUE=colormapped output wanted */
+  /* the following are ignored if not quantize_colors: */
+  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
+  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
+  int desired_number_of_colors;	/* max # colors to use in created colormap */
+  /* these are significant only in buffered-image mode: */
+  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
+  boolean enable_external_quant;/* enable future use of external colormap */
+  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
+
+  /* Description of actual output image that will be returned to application.
+   * These fields are computed by jpeg_start_decompress().
+   * You can also use jpeg_calc_output_dimensions() to determine these values
+   * in advance of calling jpeg_start_decompress().
+   */
+
+  JDIMENSION output_width;	/* scaled image width */
+  JDIMENSION output_height;	/* scaled image height */
+  int out_color_components;	/* # of color components in out_color_space */
+  int output_components;	/* # of color components returned */
+  /* output_components is 1 (a colormap index) when quantizing colors;
+   * otherwise it equals out_color_components.
+   */
+  int rec_outbuf_height;	/* min recommended height of scanline buffer */
+  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
+   * high, space and time will be wasted due to unnecessary data copying.
+   * Usually rec_outbuf_height will be 1 or 2, at most 4.
+   */
+
+  /* When quantizing colors, the output colormap is described by these fields.
+   * The application can supply a colormap by setting colormap non-NULL before
+   * calling jpeg_start_decompress; otherwise a colormap is created during
+   * jpeg_start_decompress or jpeg_start_output.
+   * The map has out_color_components rows and actual_number_of_colors columns.
+   */
+  int actual_number_of_colors;	/* number of entries in use */
+  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
+
+  /* State variables: these variables indicate the progress of decompression.
+   * The application may examine these but must not modify them.
+   */
+
+  /* Row index of next scanline to be read from jpeg_read_scanlines().
+   * Application may use this to control its processing loop, e.g.,
+   * "while (output_scanline < output_height)".
+   */
+  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
+
+  /* Current input scan number and number of iMCU rows completed in scan.
+   * These indicate the progress of the decompressor input side.
+   */
+  int input_scan_number;	/* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
+
+  /* The "output scan number" is the notional scan being displayed by the
+   * output side.  The decompressor will not allow output scan/row number
+   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
+   */
+  int output_scan_number;	/* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
+
+  /* Current progression status.  coef_bits[c][i] indicates the precision
+   * with which component c's DCT coefficient i (in zigzag order) is known.
+   * It is -1 when no data has yet been received, otherwise it is the point
+   * transform (shift) value for the most recent scan of the coefficient
+   * (thus, 0 at completion of the progression).
+   * This pointer is NULL when reading a non-progressive file.
+   */
+  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
+
+  /* Internal JPEG parameters --- the application usually need not look at
+   * these fields.  Note that the decompressor output side may not use
+   * any parameters that can change between scans.
+   */
+
+  /* Quantization and Huffman tables are carried forward across input
+   * datastreams when processing abbreviated JPEG datastreams.
+   */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  /* These parameters are never carried across datastreams, since they
+   * are given in SOF/SOS markers or defined to be reset by SOI.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  boolean tile_decode;         /* TRUE if using tile based decoding */
+  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
+
+  /* These fields record data obtained from optional markers recognized by
+   * the JPEG library.
+   */
+  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
+  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
+  UINT8 JFIF_major_version;	/* JFIF version number */
+  UINT8 JFIF_minor_version;
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
+
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+
+  /* Aside from the specific data retained from APPn markers known to the
+   * library, the uninterpreted contents of any or all APPn and COM markers
+   * can be saved in a list for examination by the application.
+   */
+  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
+
+  /* Remaining fields are known throughout decompressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during decompression startup
+   */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_scaled_size;	/* smallest DCT_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
+  /* The coefficient controller's input and output progress is measured in
+   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
+   * in fully interleaved JPEG scans, but are used whether the scan is
+   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
+   * rows of each component.  Therefore, the IDCT output contains
+   * v_samp_factor*DCT_scaled_size sample rows of a component per iMCU row.
+   */
+
+  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   * Note that the decompressor output side must not use these fields.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /* This field is shared between entropy decoder and marker parser.
+   * It is either zero or the code of a JPEG marker that has been
+   * read from the data source, but has not yet been processed.
+   */
+  int unread_marker;
+
+  /*
+   * Links to decompression subobjects (methods, private variables of modules)
+   */
+  struct jpeg_decomp_master * master;
+  struct jpeg_d_main_controller * main;
+  struct jpeg_d_coef_controller * coef;
+  struct jpeg_d_post_controller * post;
+  struct jpeg_input_controller * inputctl;
+  struct jpeg_marker_reader * marker;
+  struct jpeg_entropy_decoder * entropy;
+  struct jpeg_inverse_dct * idct;
+  struct jpeg_upsampler * upsample;
+  struct jpeg_color_deconverter * cconvert;
+  struct jpeg_color_quantizer * cquantize;
+};
+
+typedef struct {
+
+  // |--- byte_offset ---|- bit_left -|
+  //  \------ 27 -------/ \---- 5 ----/
+  unsigned int bitstream_offset;
+  short prev_dc[3];
+
+  // remaining EOBs in EOBRUN
+  unsigned short EOBRUN;
+
+  // save the decoder current bit buffer, entropy->bitstate.get_buffer.
+  INT32 get_buffer;
+
+  // save the restart info.
+  unsigned short restarts_to_go;
+  unsigned char next_restart_num;
+} huffman_offset_data;
+
+typedef struct {
+
+  // The header starting position of this scan
+  unsigned int bitstream_offset;
+
+  // Number of components in this scan
+  int comps_in_scan;
+
+  // Number of MCUs in each row
+  int MCUs_per_row;
+  int MCU_rows_per_iMCU_row;
+
+  // The last MCU position and its dc value in this scan
+  huffman_offset_data prev_MCU_offset;
+
+  huffman_offset_data **offset;
+} huffman_scan_header;
+
+#define DEFAULT_MCU_SAMPLE_SIZE 16
+
+typedef struct {
+
+  // The number of MCUs that we sample each time as an index point
+  int MCU_sample_size;
+
+  // Number of scan in this image
+  int scan_count;
+
+  // Number of iMCUs rows in this image
+  int total_iMCU_rows;
+
+  // Memory used by scan struct
+  size_t mem_used;
+  huffman_scan_header *scan;
+} huffman_index;
+
+/* "Object" declarations for JPEG modules that may be supplied or called
+ * directly by the surrounding application.
+ * As with all objects in the JPEG library, these structs only define the
+ * publicly visible methods and state variables of a module.  Additional
+ * private fields may exist after the public ones.
+ */
+
+
+/* Error handler object */
+
+struct jpeg_error_mgr {
+  /* Error exit handler: does not return to caller */
+  JMETHOD(void, error_exit, (j_common_ptr cinfo));
+  /* Conditionally emit a trace or warning message */
+  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
+  /* Routine that actually outputs a trace or error message */
+  JMETHOD(void, output_message, (j_common_ptr cinfo));
+  /* Format a message string for the most recent JPEG error or message */
+  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
+#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
+  /* Reset error state variables at start of a new image */
+  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
+
+  /* The message ID code and any parameters are saved here.
+   * A message can have one string parameter or up to 8 int parameters.
+   */
+  int msg_code;
+#define JMSG_STR_PARM_MAX  80
+  union {
+    int i[8];
+    char s[JMSG_STR_PARM_MAX];
+  } msg_parm;
+
+  /* Standard state variables for error facility */
+
+  int trace_level;		/* max msg_level that will be displayed */
+
+  /* For recoverable corrupt-data errors, we emit a warning message,
+   * but keep going unless emit_message chooses to abort.  emit_message
+   * should count warnings in num_warnings.  The surrounding application
+   * can check for bad data by seeing if num_warnings is nonzero at the
+   * end of processing.
+   */
+  long num_warnings;		/* number of corrupt-data warnings */
+
+  /* These fields point to the table(s) of error message strings.
+   * An application can change the table pointer to switch to a different
+   * message list (typically, to change the language in which errors are
+   * reported).  Some applications may wish to add additional error codes
+   * that will be handled by the JPEG library error mechanism; the second
+   * table pointer is used for this purpose.
+   *
+   * First table includes all errors generated by JPEG library itself.
+   * Error code 0 is reserved for a "no such error string" message.
+   */
+  const char * const * jpeg_message_table; /* Library errors */
+  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
+  /* Second table can be added by application (see cjpeg/djpeg for example).
+   * It contains strings numbered first_addon_message..last_addon_message.
+   */
+  const char * const * addon_message_table; /* Non-library errors */
+  int first_addon_message;	/* code for first string in addon table */
+  int last_addon_message;	/* code for last string in addon table */
+};
+
+
+/* Progress monitor object */
+
+struct jpeg_progress_mgr {
+  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
+
+  long pass_counter;		/* work units completed in this pass */
+  long pass_limit;		/* total number of work units in this pass */
+  int completed_passes;		/* passes completed so far */
+  int total_passes;		/* total number of passes expected */
+};
+
+
+/* Data destination object for compression */
+
+struct jpeg_destination_mgr {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+
+  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
+  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
+  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
+};
+
+
+/* Data source object for decompression */
+
+struct jpeg_source_mgr {
+  const JOCTET * next_input_byte; /* => next byte to read from buffer */
+  const JOCTET * start_input_byte; /* => first byte to read from input */
+  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+  size_t current_offset; /* current readed input offset */
+
+  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
+  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
+  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
+  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, seek_input_data, (j_decompress_ptr cinfo, long byte_offset));
+};
+
+
+/* Memory manager object.
+ * Allocates "small" objects (a few K total), "large" objects (tens of K),
+ * and "really big" objects (virtual arrays with backing store if needed).
+ * The memory manager does not allow individual objects to be freed; rather,
+ * each created object is assigned to a pool, and whole pools can be freed
+ * at once.  This is faster and more convenient than remembering exactly what
+ * to free, especially where malloc()/free() are not too speedy.
+ * NB: alloc routines never return NULL.  They exit to error_exit if not
+ * successful.
+ */
+
+#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
+#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS	2
+
+typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
+typedef struct jvirt_barray_control * jvirt_barray_ptr;
+
+
+struct jpeg_memory_mgr {
+  /* Method pointers */
+  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
+				size_t sizeofobject));
+  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
+				     size_t sizeofobject));
+  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
+				     JDIMENSION samplesperrow,
+				     JDIMENSION numrows));
+  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
+				      JDIMENSION blocksperrow,
+				      JDIMENSION numrows));
+  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION samplesperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION blocksperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
+  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
+					   jvirt_sarray_ptr ptr,
+					   JDIMENSION start_row,
+					   JDIMENSION num_rows,
+					   boolean writable));
+  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
+					    jvirt_barray_ptr ptr,
+					    JDIMENSION start_row,
+					    JDIMENSION num_rows,
+					    boolean writable));
+  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
+  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
+
+  /* Limit on memory allocation for this JPEG object.  (Note that this is
+   * merely advisory, not a guaranteed maximum; it only affects the space
+   * used for virtual-array buffers.)  May be changed by outer application
+   * after creating the JPEG object.
+   */
+  long max_memory_to_use;
+
+  /* Maximum allocation request accepted by alloc_large. */
+  long max_alloc_chunk;
+};
+
+
+/* Routine signature for application-supplied marker processing methods.
+ * Need not pass marker code since it is stored in cinfo->unread_marker.
+ */
+typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
+
+
+/* Declarations for routines called by application.
+ * The JPP macro hides prototype parameters from compilers that can't cope.
+ * Note JPP requires double parentheses.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JPP(arglist)	arglist
+#else
+#define JPP(arglist)	()
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers.
+ * We shorten external names to be unique in the first six letters, which
+ * is good enough for all known systems.
+ * (If your compiler itself needs names to be unique in less than 15
+ * characters, you are out of luck.  Get a better compiler.)
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_error		jStdError
+#define jpeg_CreateCompress	jCreaCompress
+#define jpeg_CreateDecompress	jCreaDecompress
+#define jpeg_destroy_compress	jDestCompress
+#define jpeg_destroy_decompress	jDestDecompress
+#define jpeg_stdio_dest		jStdDest
+#define jpeg_stdio_src		jStdSrc
+#define jpeg_set_defaults	jSetDefaults
+#define jpeg_set_colorspace	jSetColorspace
+#define jpeg_default_colorspace	jDefColorspace
+#define jpeg_set_quality	jSetQuality
+#define jpeg_set_linear_quality	jSetLQuality
+#define jpeg_add_quant_table	jAddQuantTable
+#define jpeg_quality_scaling	jQualityScaling
+#define jpeg_simple_progression	jSimProgress
+#define jpeg_suppress_tables	jSuppressTables
+#define jpeg_alloc_quant_table	jAlcQTable
+#define jpeg_alloc_huff_table	jAlcHTable
+#define jpeg_start_compress	jStrtCompress
+#define jpeg_write_scanlines	jWrtScanlines
+#define jpeg_finish_compress	jFinCompress
+#define jpeg_write_raw_data	jWrtRawData
+#define jpeg_write_marker	jWrtMarker
+#define jpeg_write_m_header	jWrtMHeader
+#define jpeg_write_m_byte	jWrtMByte
+#define jpeg_write_tables	jWrtTables
+#define jpeg_read_header	jReadHeader
+#define jpeg_start_decompress	jStrtDecompress
+#define jpeg_read_scanlines	jReadScanlines
+#define jpeg_finish_decompress	jFinDecompress
+#define jpeg_read_raw_data	jReadRawData
+#define jpeg_has_multiple_scans	jHasMultScn
+#define jpeg_start_output	jStrtOutput
+#define jpeg_finish_output	jFinOutput
+#define jpeg_input_complete	jInComplete
+#define jpeg_new_colormap	jNewCMap
+#define jpeg_consume_input	jConsumeInput
+#define jpeg_calc_output_dimensions	jCalcDimensions
+#define jpeg_save_markers	jSaveMarkers
+#define jpeg_set_marker_processor	jSetMarker
+#define jpeg_read_coefficients	jReadCoefs
+#define jpeg_write_coefficients	jWrtCoefs
+#define jpeg_copy_critical_parameters	jCopyCrit
+#define jpeg_abort_compress	jAbrtCompress
+#define jpeg_abort_decompress	jAbrtDecompress
+#define jpeg_abort		jAbort
+#define jpeg_destroy		jDestroy
+#define jpeg_resync_to_restart	jResyncRestart
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Default error-management setup */
+EXTERN(struct jpeg_error_mgr *) jpeg_std_error
+	JPP((struct jpeg_error_mgr * err));
+
+/* Initialization of JPEG compression objects.
+ * jpeg_create_compress() and jpeg_create_decompress() are the exported
+ * names that applications should call.  These expand to calls on
+ * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
+ * passed for version mismatch checking.
+ * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
+ */
+#define jpeg_create_compress(cinfo) \
+    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+			(size_t) sizeof(struct jpeg_compress_struct))
+#define jpeg_create_decompress(cinfo) \
+    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+			  (size_t) sizeof(struct jpeg_decompress_struct))
+EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
+				      int version, size_t structsize));
+EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
+					int version, size_t structsize));
+/* Destruction of JPEG compression objects */
+EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
+
+/* Standard data source and destination managers: stdio streams. */
+/* Caller is responsible for opening the file before and closing after. */
+EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
+EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
+
+/* Default parameter setup for compression */
+EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
+/* Compression parameter setup aids */
+EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
+				      J_COLOR_SPACE colorspace));
+EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
+				   boolean force_baseline));
+EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
+					  int scale_factor,
+					  boolean force_baseline));
+EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
+				       const unsigned int *basic_table,
+				       int scale_factor,
+				       boolean force_baseline));
+EXTERN(int) jpeg_quality_scaling JPP((int quality));
+EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
+				       boolean suppress));
+EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
+EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
+
+/* Main entry points for compression */
+EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
+				      boolean write_all_tables));
+EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
+					     JSAMPARRAY scanlines,
+					     JDIMENSION num_lines));
+EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
+
+/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
+					    JSAMPIMAGE data,
+					    JDIMENSION num_lines));
+
+/* Write a special marker.  See libjpeg.doc concerning safe usage. */
+EXTERN(void) jpeg_write_marker
+	JPP((j_compress_ptr cinfo, int marker,
+	     const JOCTET * dataptr, unsigned int datalen));
+/* Same, but piecemeal. */
+EXTERN(void) jpeg_write_m_header
+	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
+EXTERN(void) jpeg_write_m_byte
+	JPP((j_compress_ptr cinfo, int val));
+
+/* Alternate compression function: just write an abbreviated table file */
+EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
+
+/* Decompression startup: read start of JPEG datastream to see what's there */
+EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
+				  boolean require_image));
+/* Return value is one of: */
+#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK		1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
+/* If you pass require_image = TRUE (normal case), you need not check for
+ * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
+ * JPEG_SUSPENDED is only possible if you use a data source module that can
+ * give a suspension return (the stdio source module doesn't).
+ */
+
+/* Main entry points for decompression */
+EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_tile_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
+					    JSAMPARRAY scanlines,
+					    JDIMENSION max_lines));
+EXTERN(JDIMENSION) jpeg_read_scanlines_from JPP((j_decompress_ptr cinfo,
+					    JSAMPARRAY scanlines,
+					    int line_offset,
+					    JDIMENSION max_lines));
+EXTERN(JDIMENSION) jpeg_read_tile_scanline JPP((j_decompress_ptr cinfo,
+                        huffman_index *index,
+                        JSAMPARRAY scanlines));
+EXTERN(void) jpeg_init_read_tile_scanline JPP((j_decompress_ptr cinfo,
+                        huffman_index *index,
+		                int *start_x, int *start_y,
+                        int *width, int *height));
+EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
+
+/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
+					   JSAMPIMAGE data,
+					   JDIMENSION max_lines));
+
+/* Additional entry points for buffered-image mode. */
+EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
+				       int scan_number));
+EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
+EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
+/* Return value is one of: */
+/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI	2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
+
+/* Precalculate output dimensions for current decompression parameters. */
+EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
+
+/* Control saving of COM and APPn markers into marker_list. */
+EXTERN(void) jpeg_save_markers
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     unsigned int length_limit));
+
+/* Install a special processing method for COM or APPn markers. */
+EXTERN(void) jpeg_set_marker_processor
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     jpeg_marker_parser_method routine));
+
+/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
+EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_build_huffman_index
+    JPP((j_decompress_ptr cinfo, huffman_index *index));
+EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
+					  jvirt_barray_ptr * coef_arrays));
+EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
+						j_compress_ptr dstinfo));
+
+/* If you choose to abort compression or decompression before completing
+ * jpeg_finish_(de)compress, then you need to clean up to release memory,
+ * temporary files, etc.  You can just call jpeg_destroy_(de)compress
+ * if you're done with the JPEG object, but if you want to clean it up and
+ * reuse it, call this:
+ */
+EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
+
+/* Generic versions of jpeg_abort and jpeg_destroy that work on either
+ * flavor of JPEG object.  These may be more convenient in some places.
+ */
+EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
+
+/* Default restart-marker-resync procedure for use by data source modules */
+EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
+					    int desired));
+
+EXTERN(void) jpeg_configure_huffman_decoder(j_decompress_ptr cinfo,
+                        huffman_offset_data offset);
+EXTERN(void) jpeg_get_huffman_decoder_configuration(j_decompress_ptr cinfo,
+                        huffman_offset_data *offset);
+EXTERN(void) jpeg_create_huffman_index(j_decompress_ptr cinfo,
+                        huffman_index *index);
+EXTERN(void) jpeg_configure_huffman_index_scan(j_decompress_ptr cinfo,
+                        huffman_index *index, int scan_no, int offset);
+EXTERN(void) jpeg_destroy_huffman_index(huffman_index *index);
+
+
+/* These marker codes are exported since applications and data source modules
+ * are likely to want to use them.
+ */
+
+#define JPEG_RST0	0xD0	/* RST0 marker code */
+#define JPEG_EOI	0xD9	/* EOI marker code */
+#define JPEG_APP0	0xE0	/* APP0 marker code */
+#define JPEG_COM	0xFE	/* COM marker code */
+
+
+/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
+ * for structure definitions that are never filled in, keep it quiet by
+ * supplying dummy definitions for the various substructures.
+ */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+struct jpeg_comp_master { long dummy; };
+struct jpeg_c_main_controller { long dummy; };
+struct jpeg_c_prep_controller { long dummy; };
+struct jpeg_c_coef_controller { long dummy; };
+struct jpeg_marker_writer { long dummy; };
+struct jpeg_color_converter { long dummy; };
+struct jpeg_downsampler { long dummy; };
+struct jpeg_forward_dct { long dummy; };
+struct jpeg_entropy_encoder { long dummy; };
+struct jpeg_decomp_master { long dummy; };
+struct jpeg_d_main_controller { long dummy; };
+struct jpeg_d_coef_controller { long dummy; };
+struct jpeg_d_post_controller { long dummy; };
+struct jpeg_input_controller { long dummy; };
+struct jpeg_marker_reader { long dummy; };
+struct jpeg_entropy_decoder { long dummy; };
+struct jpeg_inverse_dct { long dummy; };
+struct jpeg_upsampler { long dummy; };
+struct jpeg_color_deconverter { long dummy; };
+struct jpeg_color_quantizer { long dummy; };
+#endif /* JPEG_INTERNALS */
+#endif /* INCOMPLETE_TYPES_BROKEN */
+
+
+/*
+ * The JPEG library modules define JPEG_INTERNALS before including this file.
+ * The internal structure declarations are read only when that is true.
+ * Applications using the library should not include jpegint.h, but may wish
+ * to include jerror.h.
+ */
+
+#ifdef JPEG_INTERNALS
+#include "jpegint.h"		/* fetch private declarations */
+#include "jerror.h"		/* fetch error codes too */
+#endif
+
+#endif /* JPEGLIB_H */
diff --git a/src/main/jni/libjpeg/jquant1.c b/src/main/jni/libjpeg/jquant1.c
new file mode 100644
index 000000000..b2f96aa15
--- /dev/null
+++ b/src/main/jni/libjpeg/jquant1.c
@@ -0,0 +1,856 @@
+/*
+ * jquant1.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 1-pass color quantization (color mapping) routines.
+ * These routines provide mapping to a fixed color map using equally spaced
+ * color values.  Optional Floyd-Steinberg or ordered dithering is available.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_1PASS_SUPPORTED
+
+
+/*
+ * The main purpose of 1-pass quantization is to provide a fast, if not very
+ * high quality, colormapped output capability.  A 2-pass quantizer usually
+ * gives better visual quality; however, for quantized grayscale output this
+ * quantizer is perfectly adequate.  Dithering is highly recommended with this
+ * quantizer, though you can turn it off if you really want to.
+ *
+ * In 1-pass quantization the colormap must be chosen in advance of seeing the
+ * image.  We use a map consisting of all combinations of Ncolors[i] color
+ * values for the i'th component.  The Ncolors[] values are chosen so that
+ * their product, the total number of colors, is no more than that requested.
+ * (In most cases, the product will be somewhat less.)
+ *
+ * Since the colormap is orthogonal, the representative value for each color
+ * component can be determined without considering the other components;
+ * then these indexes can be combined into a colormap index by a standard
+ * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
+ * can be precalculated and stored in the lookup table colorindex[].
+ * colorindex[i][j] maps pixel value j in component i to the nearest
+ * representative value (grid plane) for that component; this index is
+ * multiplied by the array stride for component i, so that the
+ * index of the colormap entry closest to a given pixel value is just
+ *    sum( colorindex[component-number][pixel-component-value] )
+ * Aside from being fast, this scheme allows for variable spacing between
+ * representative values with no additional lookup cost.
+ *
+ * If gamma correction has been applied in color conversion, it might be wise
+ * to adjust the color grid spacing so that the representative colors are
+ * equidistant in linear space.  At this writing, gamma correction is not
+ * implemented by jdcolor, so nothing is done here.
+ */
+
+
+/* Declarations for ordered dithering.
+ *
+ * We use a standard 16x16 ordered dither array.  The basic concept of ordered
+ * dithering is described in many references, for instance Dale Schumacher's
+ * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
+ * In place of Schumacher's comparisons against a "threshold" value, we add a
+ * "dither" value to the input pixel and then round the result to the nearest
+ * output value.  The dither value is equivalent to (0.5 - threshold) times
+ * the distance between output values.  For ordered dithering, we assume that
+ * the output colors are equally spaced; if not, results will probably be
+ * worse, since the dither may be too much or too little at a given point.
+ *
+ * The normal calculation would be to form pixel value + dither, range-limit
+ * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
+ * We can skip the separate range-limiting step by extending the colorindex
+ * table in both directions.
+ */
+
+#define ODITHER_SIZE  16	/* dimension of dither matrix */
+/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
+#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
+#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
+
+typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
+typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
+
+static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
+  /* Bayer's order-4 dither array.  Generated by the code given in
+   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
+   * The values in this array must range from 0 to ODITHER_CELLS-1.
+   */
+  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
+  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
+  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
+  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
+  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
+  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
+  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
+  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
+  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
+  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
+  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
+  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
+  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
+  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
+  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
+  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
+};
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array is indexed [component#][position].
+ * We provide (#columns + 2) entries per component; the extra entry at each
+ * end saves us from special-casing the first and last pixels.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+#define MAX_Q_COMPS 4		/* max components I can handle */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Initially allocated colormap is saved here */
+  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
+  int sv_actual;		/* number of entries in use */
+
+  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
+  /* colorindex[i][j] = index of color closest to pixel value j in component i,
+   * premultiplied as described above.  Since colormap indexes must fit into
+   * JSAMPLEs, the entries of this array will too.
+   */
+  boolean is_padded;		/* is the colorindex padded for odither? */
+
+  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
+
+  /* Variables for ordered dithering */
+  int row_index;		/* cur row's vertical index in dither matrix */
+  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Policy-making subroutines for create_colormap and create_colorindex.
+ * These routines determine the colormap to be used.  The rest of the module
+ * only assumes that the colormap is orthogonal.
+ *
+ *  * select_ncolors decides how to divvy up the available colors
+ *    among the components.
+ *  * output_value defines the set of representative values for a component.
+ *  * largest_input_value defines the mapping from input values to
+ *    representative values for a component.
+ * Note that the latter two routines may impose different policies for
+ * different components, though this is not currently done.
+ */
+
+
+LOCAL(int)
+select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
+/* Determine allocation of desired colors to components, */
+/* and fill in Ncolors[] array to indicate choice. */
+/* Return value is total number of colors (product of Ncolors[] values). */
+{
+  int nc = cinfo->out_color_components; /* number of color components */
+  int max_colors = cinfo->desired_number_of_colors;
+  int total_colors, iroot, i, j;
+  boolean changed;
+  long temp;
+  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+
+  /* We can allocate at least the nc'th root of max_colors per component. */
+  /* Compute floor(nc'th root of max_colors). */
+  iroot = 1;
+  do {
+    iroot++;
+    temp = iroot;		/* set temp = iroot ** nc */
+    for (i = 1; i < nc; i++)
+      temp *= iroot;
+  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
+  iroot--;			/* now iroot = floor(root) */
+
+  /* Must have at least 2 color values per component */
+  if (iroot < 2)
+    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
+
+  /* Initialize to iroot color values for each component */
+  total_colors = 1;
+  for (i = 0; i < nc; i++) {
+    Ncolors[i] = iroot;
+    total_colors *= iroot;
+  }
+  /* We may be able to increment the count for one or more components without
+   * exceeding max_colors, though we know not all can be incremented.
+   * Sometimes, the first component can be incremented more than once!
+   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
+   * In RGB colorspace, try to increment G first, then R, then B.
+   */
+  do {
+    changed = FALSE;
+    for (i = 0; i < nc; i++) {
+      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
+      /* calculate new total_colors if Ncolors[j] is incremented */
+      temp = total_colors / Ncolors[j];
+      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
+      if (temp > (long) max_colors)
+	break;			/* won't fit, done with this pass */
+      Ncolors[j]++;		/* OK, apply the increment */
+      total_colors = (int) temp;
+      changed = TRUE;
+    }
+  } while (changed);
+
+  return total_colors;
+}
+
+
+LOCAL(int)
+output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return j'th output value, where j will range from 0 to maxj */
+/* The output values must fall in 0..MAXJSAMPLE in increasing order */
+{
+  /* We always provide values 0 and MAXJSAMPLE for each component;
+   * any additional values are equally spaced between these limits.
+   * (Forcing the upper and lower values to the limits ensures that
+   * dithering can't produce a color outside the selected gamut.)
+   */
+  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
+}
+
+
+LOCAL(int)
+largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return largest input value that should map to j'th output value */
+/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
+{
+  /* Breakpoints are halfway between values returned by output_value */
+  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
+}
+
+
+/*
+ * Create the colormap.
+ */
+
+LOCAL(void)
+create_colormap (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colormap;		/* Created colormap */
+  int total_colors;		/* Number of distinct output colors */
+  int i,j,k, nci, blksize, blkdist, ptr, val;
+
+  /* Select number of colors for each component */
+  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
+
+  /* Report selected color counts */
+  if (cinfo->out_color_components == 3)
+    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
+	     total_colors, cquantize->Ncolors[0],
+	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
+  else
+    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
+
+  /* Allocate and fill in the colormap. */
+  /* The colors are ordered in the map in standard row-major order, */
+  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
+
+  colormap = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  /* blkdist is distance between groups of identical entries for a component */
+  blkdist = total_colors;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colormap entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blkdist / nci;
+    for (j = 0; j < nci; j++) {
+      /* Compute j'th output value (out of nci) for component */
+      val = output_value(cinfo, i, j, nci-1);
+      /* Fill in all colormap entries that have this value of this component */
+      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
+	/* fill in blksize entries beginning at ptr */
+	for (k = 0; k < blksize; k++)
+	  colormap[i][ptr+k] = (JSAMPLE) val;
+      }
+    }
+    blkdist = blksize;		/* blksize of this color is blkdist of next */
+  }
+
+  /* Save the colormap in private storage,
+   * where it will survive color quantization mode changes.
+   */
+  cquantize->sv_colormap = colormap;
+  cquantize->sv_actual = total_colors;
+}
+
+
+/*
+ * Create the color index table.
+ */
+
+LOCAL(void)
+create_colorindex (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPROW indexptr;
+  int i,j,k, nci, blksize, val, pad;
+
+  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
+   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
+   * This is not necessary in the other dithering modes.  However, we
+   * flag whether it was done in case user changes dithering mode.
+   */
+  if (cinfo->dither_mode == JDITHER_ORDERED) {
+    pad = MAXJSAMPLE*2;
+    cquantize->is_padded = TRUE;
+  } else {
+    pad = 0;
+    cquantize->is_padded = FALSE;
+  }
+
+  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) (MAXJSAMPLE+1 + pad),
+     (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  blksize = cquantize->sv_actual;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colorindex entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blksize / nci;
+
+    /* adjust colorindex pointers to provide padding at negative indexes. */
+    if (pad)
+      cquantize->colorindex[i] += MAXJSAMPLE;
+
+    /* in loop, val = index of current output value, */
+    /* and k = largest j that maps to current val */
+    indexptr = cquantize->colorindex[i];
+    val = 0;
+    k = largest_input_value(cinfo, i, 0, nci-1);
+    for (j = 0; j <= MAXJSAMPLE; j++) {
+      while (j > k)		/* advance val if past boundary */
+	k = largest_input_value(cinfo, i, ++val, nci-1);
+      /* premultiply so that no multiplication needed in main processing */
+      indexptr[j] = (JSAMPLE) (val * blksize);
+    }
+    /* Pad at both ends if necessary */
+    if (pad)
+      for (j = 1; j <= MAXJSAMPLE; j++) {
+	indexptr[-j] = indexptr[0];
+	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
+      }
+  }
+}
+
+
+/*
+ * Create an ordered-dither array for a component having ncolors
+ * distinct output values.
+ */
+
+LOCAL(ODITHER_MATRIX_PTR)
+make_odither_array (j_decompress_ptr cinfo, int ncolors)
+{
+  ODITHER_MATRIX_PTR odither;
+  int j,k;
+  INT32 num,den;
+
+  odither = (ODITHER_MATRIX_PTR)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(ODITHER_MATRIX));
+  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
+   * Hence the dither value for the matrix cell with fill order f
+   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
+   * On 16-bit-int machine, be careful to avoid overflow.
+   */
+  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
+  for (j = 0; j < ODITHER_SIZE; j++) {
+    for (k = 0; k < ODITHER_SIZE; k++) {
+      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
+	    * MAXJSAMPLE;
+      /* Ensure round towards zero despite C's lack of consistency
+       * about rounding negative values in integer division...
+       */
+      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
+    }
+  }
+  return odither;
+}
+
+
+/*
+ * Create the ordered-dither tables.
+ * Components having the same number of representative colors may 
+ * share a dither table.
+ */
+
+LOCAL(void)
+create_odither_tables (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  ODITHER_MATRIX_PTR odither;
+  int i, j, nci;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    odither = NULL;		/* search for matching prior component */
+    for (j = 0; j < i; j++) {
+      if (nci == cquantize->Ncolors[j]) {
+	odither = cquantize->odither[j];
+	break;
+      }
+    }
+    if (odither == NULL)	/* need a new table? */
+      odither = make_odither_array(cinfo, nci);
+    cquantize->odither[i] = odither;
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		JSAMPARRAY output_buf, int num_rows)
+/* General case, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colorindex = cquantize->colorindex;
+  register int pixcode, ci;
+  register JSAMPROW ptrin, ptrout;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  register int nc = cinfo->out_color_components;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode = 0;
+      for (ci = 0; ci < nc; ci++) {
+	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+      }
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		 JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW ptrin, ptrout;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		     JSAMPARRAY output_buf, int num_rows)
+/* General case, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  int * dither;			/* points to active row of dither matrix */
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int nc = cinfo->out_color_components;
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    jzero_far((void FAR *) output_buf[row],
+	      (size_t) (width * SIZEOF(JSAMPLE)));
+    row_index = cquantize->row_index;
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      colorindex_ci = cquantize->colorindex[ci];
+      dither = cquantize->odither[ci][row_index];
+      col_index = 0;
+
+      for (col = width; col > 0; col--) {
+	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+	 * select output value, accumulate into output code for this pixel.
+	 * Range-limiting need not be done explicitly, as we have extended
+	 * the colorindex table to produce the right answers for out-of-range
+	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+	 * required amount of padding.
+	 */
+	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
+	input_ptr += nc;
+	output_ptr++;
+	col_index = (col_index + 1) & ODITHER_MASK;
+      }
+    }
+    /* Advance row index for next row */
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		      JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int * dither0;		/* points to active row of dither matrix */
+  int * dither1;
+  int * dither2;
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    row_index = cquantize->row_index;
+    input_ptr = input_buf[row];
+    output_ptr = output_buf[row];
+    dither0 = cquantize->odither[0][row_index];
+    dither1 = cquantize->odither[1][row_index];
+    dither2 = cquantize->odither[2][row_index];
+    col_index = 0;
+
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
+					dither0[col_index]]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
+					dither1[col_index]]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
+					dither2[col_index]]);
+      *output_ptr++ = (JSAMPLE) pixcode;
+      col_index = (col_index + 1) & ODITHER_MASK;
+    }
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		    JSAMPARRAY output_buf, int num_rows)
+/* General case, with Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register LOCFSERROR cur;	/* current error or pixel value */
+  LOCFSERROR belowerr;		/* error for pixel below cur */
+  LOCFSERROR bpreverr;		/* error for below/prev col */
+  LOCFSERROR bnexterr;		/* error for below/next col */
+  LOCFSERROR delta;
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  JSAMPROW colormap_ci;
+  int pixcode;
+  int nc = cinfo->out_color_components;
+  int dir;			/* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;			/* dir * nc */
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    jzero_far((void FAR *) output_buf[row],
+	      (size_t) (width * SIZEOF(JSAMPLE)));
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      if (cquantize->on_odd_row) {
+	/* work right to left in this row */
+	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
+	output_ptr += width-1;
+	dir = -1;
+	dirnc = -nc;
+	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
+      } else {
+	/* work left to right in this row */
+	dir = 1;
+	dirnc = nc;
+	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+      }
+      colorindex_ci = cquantize->colorindex[ci];
+      colormap_ci = cquantize->sv_colormap[ci];
+      /* Preset error values: no error propagated to first pixel from left */
+      cur = 0;
+      /* and no error propagated to row below yet */
+      belowerr = bpreverr = 0;
+
+      for (col = width; col > 0; col--) {
+	/* cur holds the error propagated from the previous pixel on the
+	 * current line.  Add the error propagated from the previous line
+	 * to form the complete error correction term for this pixel, and
+	 * round the error term (which is expressed * 16) to an integer.
+	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+	 * for either sign of the error value.
+	 * Note: errorptr points to *previous* column's array entry.
+	 */
+	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+	 * The maximum error is +- MAXJSAMPLE; this sets the required size
+	 * of the range_limit array.
+	 */
+	cur += GETJSAMPLE(*input_ptr);
+	cur = GETJSAMPLE(range_limit[cur]);
+	/* Select output value, accumulate into output code for this pixel */
+	pixcode = GETJSAMPLE(colorindex_ci[cur]);
+	*output_ptr += (JSAMPLE) pixcode;
+	/* Compute actual representation error at this pixel */
+	/* Note: we can do this even though we don't have the final */
+	/* pixel code, because the colormap is orthogonal. */
+	cur -= GETJSAMPLE(colormap_ci[pixcode]);
+	/* Compute error fractions to be propagated to adjacent pixels.
+	 * Add these into the running sums, and simultaneously shift the
+	 * next-line error sums left by 1 column.
+	 */
+	bnexterr = cur;
+	delta = cur * 2;
+	cur += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr + cur);
+	cur += delta;		/* form error * 5 */
+	bpreverr = belowerr + cur;
+	belowerr = bnexterr;
+	cur += delta;		/* form error * 7 */
+	/* At this point cur contains the 7/16 error value to be propagated
+	 * to the next pixel on the current line, and all the errors for the
+	 * next line have been shifted over. We are therefore ready to move on.
+	 */
+	input_ptr += dirnc;	/* advance input ptr to next column */
+	output_ptr += dir;	/* advance output ptr to next column */
+	errorptr += dir;	/* advance errorptr to current column */
+      }
+      /* Post-loop cleanup: we must unload the final error value into the
+       * final fserrors[] entry.  Note we need not unload belowerr because
+       * it is for the dummy column before or after the actual array.
+       */
+      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
+    }
+    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
+  }
+}
+
+
+/*
+ * Allocate workspace for Floyd-Steinberg errors.
+ */
+
+LOCAL(void)
+alloc_fs_workspace (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    cquantize->fserrors[i] = (FSERRPTR)
+      (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+  }
+}
+
+
+/*
+ * Initialize for one-pass color quantization.
+ */
+
+METHODDEF(void)
+start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  /* Install my colormap. */
+  cinfo->colormap = cquantize->sv_colormap;
+  cinfo->actual_number_of_colors = cquantize->sv_actual;
+
+  /* Initialize for desired dithering mode. */
+  switch (cinfo->dither_mode) {
+  case JDITHER_NONE:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = color_quantize3;
+    else
+      cquantize->pub.color_quantize = color_quantize;
+    break;
+  case JDITHER_ORDERED:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = quantize3_ord_dither;
+    else
+      cquantize->pub.color_quantize = quantize_ord_dither;
+    cquantize->row_index = 0;	/* initialize state for ordered dither */
+    /* If user changed to ordered dither from another mode,
+     * we must recreate the color index table with padding.
+     * This will cost extra space, but probably isn't very likely.
+     */
+    if (! cquantize->is_padded)
+      create_colorindex(cinfo);
+    /* Create ordered-dither tables if we didn't already. */
+    if (cquantize->odither[0] == NULL)
+      create_odither_tables(cinfo);
+    break;
+  case JDITHER_FS:
+    cquantize->pub.color_quantize = quantize_fs_dither;
+    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
+    /* Allocate Floyd-Steinberg workspace if didn't already. */
+    if (cquantize->fserrors[0] == NULL)
+      alloc_fs_workspace(cinfo);
+    /* Initialize the propagated errors to zero. */
+    arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+    for (i = 0; i < cinfo->out_color_components; i++)
+      jzero_far((void FAR *) cquantize->fserrors[i], arraysize);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+}
+
+
+/*
+ * Finish up at the end of the pass.
+ */
+
+METHODDEF(void)
+finish_pass_1_quant (j_decompress_ptr cinfo)
+{
+  /* no work in 1-pass case */
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ * Shouldn't get to this module!
+ */
+
+METHODDEF(void)
+new_color_map_1_quant (j_decompress_ptr cinfo)
+{
+  ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+
+/*
+ * Module initialization routine for 1-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_1pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_1_quant;
+  cquantize->pub.finish_pass = finish_pass_1_quant;
+  cquantize->pub.new_color_map = new_color_map_1_quant;
+  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
+  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
+
+  /* Make sure my internal arrays won't overflow */
+  if (cinfo->out_color_components > MAX_Q_COMPS)
+    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
+  /* Make sure colormap indexes can be represented by JSAMPLEs */
+  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
+
+  /* Create the colormap and color index table. */
+  create_colormap(cinfo);
+  create_colorindex(cinfo);
+
+  /* Allocate Floyd-Steinberg workspace now if requested.
+   * We do this now since it is FAR storage and may affect the memory
+   * manager's space calculations.  If the user changes to FS dither
+   * mode in a later pass, we will allocate the space then, and will
+   * possibly overrun the max_memory_to_use setting.
+   */
+  if (cinfo->dither_mode == JDITHER_FS)
+    alloc_fs_workspace(cinfo);
+}
+
+#endif /* QUANT_1PASS_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jquant2.c b/src/main/jni/libjpeg/jquant2.c
new file mode 100644
index 000000000..af601e334
--- /dev/null
+++ b/src/main/jni/libjpeg/jquant2.c
@@ -0,0 +1,1310 @@
+/*
+ * jquant2.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 2-pass color quantization (color mapping) routines.
+ * These routines provide selection of a custom color map for an image,
+ * followed by mapping of the image to that color map, with optional
+ * Floyd-Steinberg dithering.
+ * It is also possible to use just the second pass to map to an arbitrary
+ * externally-given color map.
+ *
+ * Note: ordered dithering is not supported, since there isn't any fast
+ * way to compute intercolor distances; it's unclear that ordered dither's
+ * fundamental assumptions even hold with an irregularly spaced color map.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+
+/*
+ * This module implements the well-known Heckbert paradigm for color
+ * quantization.  Most of the ideas used here can be traced back to
+ * Heckbert's seminal paper
+ *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
+ *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
+ *
+ * In the first pass over the image, we accumulate a histogram showing the
+ * usage count of each possible color.  To keep the histogram to a reasonable
+ * size, we reduce the precision of the input; typical practice is to retain
+ * 5 or 6 bits per color, so that 8 or 4 different input values are counted
+ * in the same histogram cell.
+ *
+ * Next, the color-selection step begins with a box representing the whole
+ * color space, and repeatedly splits the "largest" remaining box until we
+ * have as many boxes as desired colors.  Then the mean color in each
+ * remaining box becomes one of the possible output colors.
+ * 
+ * The second pass over the image maps each input pixel to the closest output
+ * color (optionally after applying a Floyd-Steinberg dithering correction).
+ * This mapping is logically trivial, but making it go fast enough requires
+ * considerable care.
+ *
+ * Heckbert-style quantizers vary a good deal in their policies for choosing
+ * the "largest" box and deciding where to cut it.  The particular policies
+ * used here have proved out well in experimental comparisons, but better ones
+ * may yet be found.
+ *
+ * In earlier versions of the IJG code, this module quantized in YCbCr color
+ * space, processing the raw upsampled data without a color conversion step.
+ * This allowed the color conversion math to be done only once per colormap
+ * entry, not once per pixel.  However, that optimization precluded other
+ * useful optimizations (such as merging color conversion with upsampling)
+ * and it also interfered with desired capabilities such as quantizing to an
+ * externally-supplied colormap.  We have therefore abandoned that approach.
+ * The present code works in the post-conversion color space, typically RGB.
+ *
+ * To improve the visual quality of the results, we actually work in scaled
+ * RGB space, giving G distances more weight than R, and R in turn more than
+ * B.  To do everything in integer math, we must use integer scale factors.
+ * The 2/3/1 scale factors used here correspond loosely to the relative
+ * weights of the colors in the NTSC grayscale equation.
+ * If you want to use this code to quantize a non-RGB color space, you'll
+ * probably need to change these scale factors.
+ */
+
+#define R_SCALE 2		/* scale R distances by this much */
+#define G_SCALE 3		/* scale G distances by this much */
+#define B_SCALE 1		/* and B by this much */
+
+/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
+ * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
+ * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
+ * you'll get compile errors until you extend this logic.  In that case
+ * you'll probably want to tweak the histogram sizes too.
+ */
+
+#if RGB_RED == 0
+#define C0_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 0
+#define C0_SCALE B_SCALE
+#endif
+#if RGB_GREEN == 1
+#define C1_SCALE G_SCALE
+#endif
+#if RGB_RED == 2
+#define C2_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 2
+#define C2_SCALE B_SCALE
+#endif
+
+
+/*
+ * First we have the histogram data structure and routines for creating it.
+ *
+ * The number of bits of precision can be adjusted by changing these symbols.
+ * We recommend keeping 6 bits for G and 5 each for R and B.
+ * If you have plenty of memory and cycles, 6 bits all around gives marginally
+ * better results; if you are short of memory, 5 bits all around will save
+ * some space but degrade the results.
+ * To maintain a fully accurate histogram, we'd need to allocate a "long"
+ * (preferably unsigned long) for each cell.  In practice this is overkill;
+ * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
+ * and clamping those that do overflow to the maximum value will give close-
+ * enough results.  This reduces the recommended histogram size from 256Kb
+ * to 128Kb, which is a useful savings on PC-class machines.
+ * (In the second pass the histogram space is re-used for pixel mapping data;
+ * in that capacity, each cell must be able to store zero to the number of
+ * desired colors.  16 bits/cell is plenty for that too.)
+ * Since the JPEG code is intended to run in small memory model on 80x86
+ * machines, we can't just allocate the histogram in one chunk.  Instead
+ * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
+ * on 80x86 machines, the pointer row is in near memory but the actual
+ * arrays are in far memory (same arrangement as we use for image arrays).
+ */
+
+#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
+
+/* These will do the right thing for either R,G,B or B,G,R color order,
+ * but you may not like the results for other color orders.
+ */
+#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6		/* bits of precision in G histogram */
+#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
+
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
+#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
+#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
+
+
+typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
+
+typedef histcell FAR * histptr;	/* for pointers to histogram cells */
+
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
+typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
+typedef hist2d * hist3d;	/* type for top-level pointer */
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array has (#columns + 2) entries; the extra entry at
+ * each end saves us from special-casing the first and last pixels.
+ * Each entry is three values long, one value for each color component.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Space for the eventually created colormap is stashed here */
+  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
+  int desired;			/* desired # of colors = size of colormap */
+
+  /* Variables for accumulating image statistics */
+  hist3d histogram;		/* pointer to the histogram */
+
+  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors;		/* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+  int * error_limiter;		/* table for clamping the applied error */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Prescan some rows of pixels.
+ * In this module the prescan simply updates the histogram, which has been
+ * initialized to zeroes by start_pass.
+ * An output_buf parameter is required by the method signature, but no data
+ * is actually output (in fact the buffer controller is probably passing a
+ * NULL pointer).
+ */
+
+METHODDEF(void)
+prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW ptr;
+  register histptr histp;
+  register hist3d histogram = cquantize->histogram;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptr = input_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the histogram */
+      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
+			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+      /* increment, check for overflow and undo increment if so. */
+      if (++(*histp) <= 0)
+	(*histp)--;
+      ptr += 3;
+    }
+  }
+}
+
+
+/*
+ * Next we have the really interesting routines: selection of a colormap
+ * given the completed histogram.
+ * These routines work with a list of "boxes", each representing a rectangular
+ * subset of the input color space (to histogram precision).
+ */
+
+typedef struct {
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  INT32 volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+
+typedef box * boxptr;
+
+
+LOCAL(boxptr)
+find_biggest_color_pop (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest color population */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register long maxc = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(boxptr)
+find_biggest_volume (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest (scaled) volume */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register INT32 maxv = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->volume > maxv) {
+      which = boxp;
+      maxv = boxp->volume;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(void)
+update_box (j_decompress_ptr cinfo, boxptr boxp)
+/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
+/* and recompute its volume and population */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  INT32 dist0,dist1,dist2;
+  long ccount;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  if (c0max > c0min)
+    for (c0 = c0min; c0 <= c0max; c0++)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0min = c0min = c0;
+	    goto have_c0min;
+	  }
+      }
+ have_c0min:
+  if (c0max > c0min)
+    for (c0 = c0max; c0 >= c0min; c0--)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0max = c0max = c0;
+	    goto have_c0max;
+	  }
+      }
+ have_c0max:
+  if (c1max > c1min)
+    for (c1 = c1min; c1 <= c1max; c1++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1min = c1min = c1;
+	    goto have_c1min;
+	  }
+      }
+ have_c1min:
+  if (c1max > c1min)
+    for (c1 = c1max; c1 >= c1min; c1--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1max = c1max = c1;
+	    goto have_c1max;
+	  }
+      }
+ have_c1max:
+  if (c2max > c2min)
+    for (c2 = c2min; c2 <= c2max; c2++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2min = c2min = c2;
+	    goto have_c2min;
+	  }
+      }
+ have_c2min:
+  if (c2max > c2min)
+    for (c2 = c2max; c2 >= c2min; c2--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2max = c2max = c2;
+	    goto have_c2max;
+	  }
+      }
+ have_c2max:
+
+  /* Update box volume.
+   * We use 2-norm rather than real volume here; this biases the method
+   * against making long narrow boxes, and it has the side benefit that
+   * a box is splittable iff norm > 0.
+   * Since the differences are expressed in histogram-cell units,
+   * we have to shift back to JSAMPLE units to get consistent distances;
+   * after which, we scale according to the selected distance scale factors.
+   */
+  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
+  
+  /* Now scan remaining volume of box and compute population */
+  ccount = 0;
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++, histp++)
+	if (*histp != 0) {
+	  ccount++;
+	}
+    }
+  boxp->colorcount = ccount;
+}
+
+
+LOCAL(int)
+median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
+	    int desired_colors)
+/* Repeatedly select and split the largest box until we have enough boxes */
+{
+  int n,lb;
+  int c0,c1,c2,cmax;
+  register boxptr b1,b2;
+
+  while (numboxes < desired_colors) {
+    /* Select box to split.
+     * Current algorithm: by population for first half, then by volume.
+     */
+    if (numboxes*2 <= desired_colors) {
+      b1 = find_biggest_color_pop(boxlist, numboxes);
+    } else {
+      b1 = find_biggest_volume(boxlist, numboxes);
+    }
+    if (b1 == NULL)		/* no splittable boxes left! */
+      break;
+    b2 = &boxlist[numboxes];	/* where new box will go */
+    /* Copy the color bounds to the new box. */
+    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
+    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
+    /* Choose which axis to split the box on.
+     * Current algorithm: longest scaled axis.
+     * See notes in update_box about scaling distances.
+     */
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    /* We want to break any ties in favor of green, then red, blue last.
+     * This code does the right thing for R,G,B or B,G,R color orders only.
+     */
+#if RGB_RED == 0
+    cmax = c1; n = 1;
+    if (c0 > cmax) { cmax = c0; n = 0; }
+    if (c2 > cmax) { n = 2; }
+#else
+    cmax = c1; n = 1;
+    if (c2 > cmax) { cmax = c2; n = 2; }
+    if (c0 > cmax) { n = 0; }
+#endif
+    /* Choose split point along selected axis, and update box bounds.
+     * Current algorithm: split at halfway point.
+     * (Since the box has been shrunk to minimum volume,
+     * any split will produce two nonempty subboxes.)
+     * Note that lb value is max for lower box, so must be < old max.
+     */
+    switch (n) {
+    case 0:
+      lb = (b1->c0max + b1->c0min) / 2;
+      b1->c0max = lb;
+      b2->c0min = lb+1;
+      break;
+    case 1:
+      lb = (b1->c1max + b1->c1min) / 2;
+      b1->c1max = lb;
+      b2->c1min = lb+1;
+      break;
+    case 2:
+      lb = (b1->c2max + b1->c2min) / 2;
+      b1->c2max = lb;
+      b2->c2min = lb+1;
+      break;
+    }
+    /* Update stats for boxes */
+    update_box(cinfo, b1);
+    update_box(cinfo, b2);
+    numboxes++;
+  }
+  return numboxes;
+}
+
+
+LOCAL(void)
+compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
+/* Compute representative color for a box, put it in colormap[icolor] */
+{
+  /* Current algorithm: mean weighted by pixels (not colors) */
+  /* Note it is important to get the rounding correct! */
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  long count;
+  long total = 0;
+  long c0total = 0;
+  long c1total = 0;
+  long c2total = 0;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++) {
+	if ((count = *histp++) != 0) {
+	  total += count;
+	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+	}
+      }
+    }
+  
+  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
+  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
+  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
+}
+
+
+LOCAL(void)
+select_colors (j_decompress_ptr cinfo, int desired_colors)
+/* Master routine for color selection */
+{
+  boxptr boxlist;
+  int numboxes;
+  int i;
+
+  /* Allocate workspace for box list */
+  boxlist = (boxptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
+  /* Initialize one box containing whole space */
+  numboxes = 1;
+  boxlist[0].c0min = 0;
+  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
+  boxlist[0].c1min = 0;
+  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
+  boxlist[0].c2min = 0;
+  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
+  /* Shrink it to actually-used volume and set its statistics */
+  update_box(cinfo, & boxlist[0]);
+  /* Perform median-cut to produce final box list */
+  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
+  /* Compute the representative color for each box, fill colormap */
+  for (i = 0; i < numboxes; i++)
+    compute_color(cinfo, & boxlist[i], i);
+  cinfo->actual_number_of_colors = numboxes;
+  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
+}
+
+
+/*
+ * These routines are concerned with the time-critical task of mapping input
+ * colors to the nearest color in the selected colormap.
+ *
+ * We re-use the histogram space as an "inverse color map", essentially a
+ * cache for the results of nearest-color searches.  All colors within a
+ * histogram cell will be mapped to the same colormap entry, namely the one
+ * closest to the cell's center.  This may not be quite the closest entry to
+ * the actual input color, but it's almost as good.  A zero in the cache
+ * indicates we haven't found the nearest color for that cell yet; the array
+ * is cleared to zeroes before starting the mapping pass.  When we find the
+ * nearest color for a cell, its colormap index plus one is recorded in the
+ * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
+ * when they need to use an unfilled entry in the cache.
+ *
+ * Our method of efficiently finding nearest colors is based on the "locally
+ * sorted search" idea described by Heckbert and on the incremental distance
+ * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
+ * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
+ * the distances from a given colormap entry to each cell of the histogram can
+ * be computed quickly using an incremental method: the differences between
+ * distances to adjacent cells themselves differ by a constant.  This allows a
+ * fairly fast implementation of the "brute force" approach of computing the
+ * distance from every colormap entry to every histogram cell.  Unfortunately,
+ * it needs a work array to hold the best-distance-so-far for each histogram
+ * cell (because the inner loop has to be over cells, not colormap entries).
+ * The work array elements have to be INT32s, so the work array would need
+ * 256Kb at our recommended precision.  This is not feasible in DOS machines.
+ *
+ * To get around these problems, we apply Thomas' method to compute the
+ * nearest colors for only the cells within a small subbox of the histogram.
+ * The work array need be only as big as the subbox, so the memory usage
+ * problem is solved.  Furthermore, we need not fill subboxes that are never
+ * referenced in pass2; many images use only part of the color gamut, so a
+ * fair amount of work is saved.  An additional advantage of this
+ * approach is that we can apply Heckbert's locality criterion to quickly
+ * eliminate colormap entries that are far away from the subbox; typically
+ * three-fourths of the colormap entries are rejected by Heckbert's criterion,
+ * and we need not compute their distances to individual cells in the subbox.
+ * The speed of this approach is heavily influenced by the subbox size: too
+ * small means too much overhead, too big loses because Heckbert's criterion
+ * can't eliminate as many colormap entries.  Empirically the best subbox
+ * size seems to be about 1/512th of the histogram (1/8th in each direction).
+ *
+ * Thomas' article also describes a refined method which is asymptotically
+ * faster than the brute-force method, but it is also far more complex and
+ * cannot efficiently be applied to small subboxes.  It is therefore not
+ * useful for programs intended to be portable to DOS machines.  On machines
+ * with plenty of memory, filling the whole histogram in one shot with Thomas'
+ * refined method might be faster than the present code --- but then again,
+ * it might not be any faster, and it's certainly more complicated.
+ */
+
+
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS-3)
+#define BOX_C1_LOG  (HIST_C1_BITS-3)
+#define BOX_C2_LOG  (HIST_C2_BITS-3)
+
+#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
+
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
+
+
+/*
+ * The next three routines implement inverse colormap filling.  They could
+ * all be folded into one big routine, but splitting them up this way saves
+ * some stack space (the mindist[] and bestdist[] arrays need not coexist)
+ * and may allow some compilers to produce better code by registerizing more
+ * inner-loop variables.
+ */
+
+LOCAL(int)
+find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		    JSAMPLE colorlist[])
+/* Locate the colormap entries close enough to an update box to be candidates
+ * for the nearest entry to some cell(s) in the update box.  The update box
+ * is specified by the center coordinates of its first cell.  The number of
+ * candidate colormap entries is returned, and their colormap indexes are
+ * placed in colorlist[].
+ * This routine uses Heckbert's "locally sorted search" criterion to select
+ * the colors that need further consideration.
+ */
+{
+  int numcolors = cinfo->actual_number_of_colors;
+  int maxc0, maxc1, maxc2;
+  int centerc0, centerc1, centerc2;
+  int i, x, ncolors;
+  INT32 minmaxdist, min_dist, max_dist, tdist;
+  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+
+  /* Compute true coordinates of update box's upper corner and center.
+   * Actually we compute the coordinates of the center of the upper-corner
+   * histogram cell, which are the upper bounds of the volume we care about.
+   * Note that since ">>" rounds down, the "center" values may be closer to
+   * min than to max; hence comparisons to them must be "<=", not "<".
+   */
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
+  centerc0 = (minc0 + maxc0) >> 1;
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
+  centerc1 = (minc1 + maxc1) >> 1;
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
+  centerc2 = (minc2 + maxc2) >> 1;
+
+  /* For each color in colormap, find:
+   *  1. its minimum squared-distance to any point in the update box
+   *     (zero if color is within update box);
+   *  2. its maximum squared-distance to any point in the update box.
+   * Both of these can be found by considering only the corners of the box.
+   * We save the minimum distance for each color in mindist[];
+   * only the smallest maximum distance is of interest.
+   */
+  minmaxdist = 0x7FFFFFFFL;
+
+  for (i = 0; i < numcolors; i++) {
+    /* We compute the squared-c0-distance term, then add in the other two. */
+    x = GETJSAMPLE(cinfo->colormap[0][i]);
+    if (x < minc0) {
+      tdist = (x - minc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - maxc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else if (x > maxc0) {
+      tdist = (x - maxc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - minc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      min_dist = 0;
+      if (x <= centerc0) {
+	tdist = (x - maxc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      } else {
+	tdist = (x - minc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[1][i]);
+    if (x < minc1) {
+      tdist = (x - minc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc1) {
+      tdist = (x - maxc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc1) {
+	tdist = (x - maxc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[2][i]);
+    if (x < minc2) {
+      tdist = (x - minc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc2) {
+      tdist = (x - maxc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc2) {
+	tdist = (x - maxc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    mindist[i] = min_dist;	/* save away the results */
+    if (max_dist < minmaxdist)
+      minmaxdist = max_dist;
+  }
+
+  /* Now we know that no cell in the update box is more than minmaxdist
+   * away from some colormap entry.  Therefore, only colors that are
+   * within minmaxdist of some part of the box need be considered.
+   */
+  ncolors = 0;
+  for (i = 0; i < numcolors; i++) {
+    if (mindist[i] <= minmaxdist)
+      colorlist[ncolors++] = (JSAMPLE) i;
+  }
+  return ncolors;
+}
+
+
+LOCAL(void)
+find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+/* Find the closest colormap entry for each cell in the update box,
+ * given the list of candidate colors prepared by find_nearby_colors.
+ * Return the indexes of the closest entries in the bestcolor[] array.
+ * This routine uses Thomas' incremental distance calculation method to
+ * find the distance from a colormap entry to successive cells in the box.
+ */
+{
+  int ic0, ic1, ic2;
+  int i, icolor;
+  register INT32 * bptr;	/* pointer into bestdist[] array */
+  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
+  INT32 dist0, dist1;		/* initial distance values */
+  register INT32 dist2;		/* current distance in inner loop */
+  INT32 xx0, xx1;		/* distance increments */
+  register INT32 xx2;
+  INT32 inc0, inc1, inc2;	/* initial values for increments */
+  /* This array holds the distance to the nearest-so-far color for each cell */
+  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Initialize best-distance for each cell of the update box */
+  bptr = bestdist;
+  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
+    *bptr++ = 0x7FFFFFFFL;
+  
+  /* For each color selected by find_nearby_colors,
+   * compute its distance to the center of each cell in the box.
+   * If that's less than best-so-far, update best distance and color number.
+   */
+  
+  /* Nominal steps between cell centers ("x" in Thomas article) */
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
+  
+  for (i = 0; i < numcolors; i++) {
+    icolor = GETJSAMPLE(colorlist[i]);
+    /* Compute (square of) distance from minc0/c1/c2 to this color */
+    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+    dist0 = inc0*inc0;
+    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+    dist0 += inc1*inc1;
+    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+    dist0 += inc2*inc2;
+    /* Form the initial difference increments */
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
+    /* Now loop over all cells in box, updating distance per Thomas method */
+    bptr = bestdist;
+    cptr = bestcolor;
+    xx0 = inc0;
+    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
+      dist1 = dist0;
+      xx1 = inc1;
+      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
+	dist2 = dist1;
+	xx2 = inc2;
+	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+	  if (dist2 < *bptr) {
+	    *bptr = dist2;
+	    *cptr = (JSAMPLE) icolor;
+	  }
+	  dist2 += xx2;
+	  xx2 += 2 * STEP_C2 * STEP_C2;
+	  bptr++;
+	  cptr++;
+	}
+	dist1 += xx1;
+	xx1 += 2 * STEP_C1 * STEP_C1;
+      }
+      dist0 += xx0;
+      xx0 += 2 * STEP_C0 * STEP_C0;
+    }
+  }
+}
+
+
+LOCAL(void)
+fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
+/* Fill the inverse-colormap entries in the update box that contains */
+/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
+/* we can fill as many others as we wish.) */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int ic0, ic1, ic2;
+  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
+  register histptr cachep;	/* pointer into main cache array */
+  /* This array lists the candidate colormap indexes. */
+  JSAMPLE colorlist[MAXNUMCOLORS];
+  int numcolors;		/* number of candidate colors */
+  /* This array holds the actually closest colormap index for each cell. */
+  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Convert cell coordinates to update box ID */
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
+
+  /* Compute true coordinates of update box's origin corner.
+   * Actually we compute the coordinates of the center of the corner
+   * histogram cell, which are the lower bounds of the volume we care about.
+   */
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
+  
+  /* Determine which colormap entries are close enough to be candidates
+   * for the nearest entry to some cell in the update box.
+   */
+  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
+
+  /* Determine the actually nearest colors. */
+  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
+		   bestcolor);
+
+  /* Save the best color numbers (plus 1) in the main cache array */
+  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
+  cptr = bestcolor;
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
+      cachep = & histogram[c0+ic0][c1+ic1][c2];
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
+	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
+      }
+    }
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+pass2_no_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register JSAMPROW inptr, outptr;
+  register histptr cachep;
+  register int c0, c1, c2;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the cache */
+      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
+      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
+      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+      cachep = & histogram[c0][c1][c2];
+      /* If we have not seen this color before, find nearest colormap entry */
+      /* and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, c0,c1,c2);
+      /* Now emit the colormap index for this cell */
+      *outptr++ = (JSAMPLE) (*cachep - 1);
+    }
+  }
+}
+
+
+METHODDEF(void)
+pass2_fs_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
+  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  JSAMPROW inptr;		/* => current input pixel */
+  JSAMPROW outptr;		/* => current output pixel */
+  histptr cachep;
+  int dir;			/* +1 or -1 depending on direction */
+  int dir3;			/* 3*dir, for advancing inptr & errorptr */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  int *error_limit = cquantize->error_limiter;
+  JSAMPROW colormap0 = cinfo->colormap[0];
+  JSAMPROW colormap1 = cinfo->colormap[1];
+  JSAMPROW colormap2 = cinfo->colormap[2];
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    if (cquantize->on_odd_row) {
+      /* work right to left in this row */
+      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      outptr += width-1;
+      dir = -1;
+      dir3 = -3;
+      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
+      cquantize->on_odd_row = FALSE; /* flip for next time */
+    } else {
+      /* work left to right in this row */
+      dir = 1;
+      dir3 = 3;
+      errorptr = cquantize->fserrors; /* => entry before first real column */
+      cquantize->on_odd_row = TRUE; /* flip for next time */
+    }
+    /* Preset error values: no error propagated to first pixel from left */
+    cur0 = cur1 = cur2 = 0;
+    /* and no error propagated to row below yet */
+    belowerr0 = belowerr1 = belowerr2 = 0;
+    bpreverr0 = bpreverr1 = bpreverr2 = 0;
+
+    for (col = width; col > 0; col--) {
+      /* curN holds the error propagated from the previous pixel on the
+       * current line.  Add the error propagated from the previous line
+       * to form the complete error correction term for this pixel, and
+       * round the error term (which is expressed * 16) to an integer.
+       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+       * for either sign of the error value.
+       * Note: errorptr points to *previous* column's array entry.
+       */
+      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
+      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
+      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
+      /* Limit the error using transfer function set by init_error_limit.
+       * See comments with init_error_limit for rationale.
+       */
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
+       * this sets the required size of the range_limit array.
+       */
+      cur0 += GETJSAMPLE(inptr[0]);
+      cur1 += GETJSAMPLE(inptr[1]);
+      cur2 += GETJSAMPLE(inptr[2]);
+      cur0 = GETJSAMPLE(range_limit[cur0]);
+      cur1 = GETJSAMPLE(range_limit[cur1]);
+      cur2 = GETJSAMPLE(range_limit[cur2]);
+      /* Index into the cache with adjusted pixel value */
+      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
+      /* If we have not seen this color before, find nearest colormap */
+      /* entry and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
+      /* Now emit the colormap index for this cell */
+      { register int pixcode = *cachep - 1;
+	*outptr = (JSAMPLE) pixcode;
+	/* Compute representation error for this pixel */
+	cur0 -= GETJSAMPLE(colormap0[pixcode]);
+	cur1 -= GETJSAMPLE(colormap1[pixcode]);
+	cur2 -= GETJSAMPLE(colormap2[pixcode]);
+      }
+      /* Compute error fractions to be propagated to adjacent pixels.
+       * Add these into the running sums, and simultaneously shift the
+       * next-line error sums left by 1 column.
+       */
+      { register LOCFSERROR bnexterr, delta;
+
+	bnexterr = cur0;	/* Process component 0 */
+	delta = cur0 * 2;
+	cur0 += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
+	cur0 += delta;		/* form error * 5 */
+	bpreverr0 = belowerr0 + cur0;
+	belowerr0 = bnexterr;
+	cur0 += delta;		/* form error * 7 */
+	bnexterr = cur1;	/* Process component 1 */
+	delta = cur1 * 2;
+	cur1 += delta;		/* form error * 3 */
+	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
+	cur1 += delta;		/* form error * 5 */
+	bpreverr1 = belowerr1 + cur1;
+	belowerr1 = bnexterr;
+	cur1 += delta;		/* form error * 7 */
+	bnexterr = cur2;	/* Process component 2 */
+	delta = cur2 * 2;
+	cur2 += delta;		/* form error * 3 */
+	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
+	cur2 += delta;		/* form error * 5 */
+	bpreverr2 = belowerr2 + cur2;
+	belowerr2 = bnexterr;
+	cur2 += delta;		/* form error * 7 */
+      }
+      /* At this point curN contains the 7/16 error value to be propagated
+       * to the next pixel on the current line, and all the errors for the
+       * next line have been shifted over.  We are therefore ready to move on.
+       */
+      inptr += dir3;		/* Advance pixel pointers to next column */
+      outptr += dir;
+      errorptr += dir3;		/* advance errorptr to current column */
+    }
+    /* Post-loop cleanup: we must unload the final error values into the
+     * final fserrors[] entry.  Note we need not unload belowerrN because
+     * it is for the dummy column before or after the actual array.
+     */
+    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
+    errorptr[1] = (FSERROR) bpreverr1;
+    errorptr[2] = (FSERROR) bpreverr2;
+  }
+}
+
+
+/*
+ * Initialize the error-limiting transfer function (lookup table).
+ * The raw F-S error computation can potentially compute error values of up to
+ * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * much less, otherwise obviously wrong pixels will be created.  (Typical
+ * effects include weird fringes at color-area boundaries, isolated bright
+ * pixels in a dark area, etc.)  The standard advice for avoiding this problem
+ * is to ensure that the "corners" of the color cube are allocated as output
+ * colors; then repeated errors in the same direction cannot cause cascading
+ * error buildup.  However, that only prevents the error from getting
+ * completely out of hand; Aaron Giles reports that error limiting improves
+ * the results even with corner colors allocated.
+ * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
+ * well, but the smoother transfer function used below is even better.  Thanks
+ * to Aaron Giles for this idea.
+ */
+
+LOCAL(void)
+init_error_limit (j_decompress_ptr cinfo)
+/* Allocate and fill in the error_limiter table */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  int * table;
+  int in, out;
+
+  table = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
+  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+  cquantize->error_limiter = table;
+
+#define STEPSIZE ((MAXJSAMPLE+1)/16)
+  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
+  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
+  for (; in <= MAXJSAMPLE; in++) {
+    table[in] = out; table[-in] = -out;
+  }
+#undef STEPSIZE
+}
+
+
+/*
+ * Finish up at the end of each pass.
+ */
+
+METHODDEF(void)
+finish_pass1 (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Select the representative colors and fill in cinfo->colormap */
+  cinfo->colormap = cquantize->sv_colormap;
+  select_colors(cinfo, cquantize->desired);
+  /* Force next pass to zero the color index table */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+METHODDEF(void)
+finish_pass2 (j_decompress_ptr cinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * Initialize for each processing pass.
+ */
+
+METHODDEF(void)
+start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int i;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  if (is_pre_scan) {
+    /* Set up method pointers */
+    cquantize->pub.color_quantize = prescan_quantize;
+    cquantize->pub.finish_pass = finish_pass1;
+    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
+  } else {
+    /* Set up method pointers */
+    if (cinfo->dither_mode == JDITHER_FS)
+      cquantize->pub.color_quantize = pass2_fs_dither;
+    else
+      cquantize->pub.color_quantize = pass2_no_dither;
+    cquantize->pub.finish_pass = finish_pass2;
+
+    /* Make sure color count is acceptable */
+    i = cinfo->actual_number_of_colors;
+    if (i < 1)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
+    if (i > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+
+    if (cinfo->dither_mode == JDITHER_FS) {
+      size_t arraysize = (size_t) ((cinfo->output_width + 2) *
+				   (3 * SIZEOF(FSERROR)));
+      /* Allocate Floyd-Steinberg workspace if we didn't already. */
+      if (cquantize->fserrors == NULL)
+	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+      /* Initialize the propagated errors to zero. */
+      jzero_far((void FAR *) cquantize->fserrors, arraysize);
+      /* Make the error-limit table if we didn't already. */
+      if (cquantize->error_limiter == NULL)
+	init_error_limit(cinfo);
+      cquantize->on_odd_row = FALSE;
+    }
+
+  }
+  /* Zero the histogram or inverse color map, if necessary */
+  if (cquantize->needs_zeroed) {
+    for (i = 0; i < HIST_C0_ELEMS; i++) {
+      jzero_far((void FAR *) histogram[i],
+		HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+    }
+    cquantize->needs_zeroed = FALSE;
+  }
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+METHODDEF(void)
+new_color_map_2_quant (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Reset the inverse color map */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+/*
+ * Module initialization routine for 2-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_2pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+  int i;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_2_quant;
+  cquantize->pub.new_color_map = new_color_map_2_quant;
+  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
+  cquantize->error_limiter = NULL;
+
+  /* Make sure jdmaster didn't give me a case I can't handle */
+  if (cinfo->out_color_components != 3)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Allocate the histogram/inverse colormap storage */
+  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+  }
+  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
+
+  /* Allocate storage for the completed colormap, if required.
+   * We do this now since it is FAR storage and may affect
+   * the memory manager's space calculations.
+   */
+  if (cinfo->enable_2pass_quant) {
+    /* Make sure color count is acceptable */
+    int desired = cinfo->desired_number_of_colors;
+    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
+    if (desired < 8)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
+    /* Make sure colormap indexes can be represented by JSAMPLEs */
+    if (desired > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo,JPOOL_IMAGE, (JDIMENSION) desired, (JDIMENSION) 3);
+    cquantize->desired = desired;
+  } else
+    cquantize->sv_colormap = NULL;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  /* Allocate Floyd-Steinberg workspace if necessary.
+   * This isn't really needed until pass 2, but again it is FAR storage.
+   * Although we will cope with a later change in dither_mode,
+   * we do not promise to honor max_memory_to_use if dither_mode changes.
+   */
+  if (cinfo->dither_mode == JDITHER_FS) {
+    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR))));
+    /* Might as well create the error-limiting table too. */
+    init_error_limit(cinfo);
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/src/main/jni/libjpeg/jutils.c b/src/main/jni/libjpeg/jutils.c
new file mode 100644
index 000000000..616ad0511
--- /dev/null
+++ b/src/main/jni/libjpeg/jutils.c
@@ -0,0 +1,185 @@
+/*
+ * jutils.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains tables and miscellaneous utility routines needed
+ * for both compression and decompression.
+ * Note we prefix all global names with "j" to minimize conflicts with
+ * a surrounding application.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
+ * of a DCT block read in natural order (left to right, top to bottom).
+ */
+
+#if 0				/* This table is not actually needed in v6a */
+
+const int jpeg_zigzag_order[DCTSIZE2] = {
+   0,  1,  5,  6, 14, 15, 27, 28,
+   2,  4,  7, 13, 16, 26, 29, 42,
+   3,  8, 12, 17, 25, 30, 41, 43,
+   9, 11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+
+#endif
+
+/*
+ * jpeg_natural_order[i] is the natural-order position of the i'th element
+ * of zigzag order.
+ *
+ * When reading corrupted data, the Huffman decoders could attempt
+ * to reference an entry beyond the end of this array (if the decoded
+ * zero run length reaches past the end of the block).  To prevent
+ * wild stores without adding an inner-loop test, we put some extra
+ * "63"s after the real entries.  This will cause the extra coefficient
+ * to be stored in location 63 of the block, not somewhere random.
+ * The worst case would be a run-length of 15, which means we need 16
+ * fake entries.
+ */
+
+const int jpeg_natural_order[DCTSIZE2+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6,  7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+
+/*
+ * Arithmetic utilities
+ */
+
+GLOBAL(long)
+jdiv_round_up (long a, long b)
+/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
+/* Assumes a >= 0, b > 0 */
+{
+  return (a + b - 1L) / b;
+}
+
+
+GLOBAL(long)
+jround_up (long a, long b)
+/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
+/* Assumes a >= 0, b > 0 */
+{
+  a += b - 1L;
+  return a - (a % b);
+}
+
+GLOBAL(long)
+jmin (long a, long b)
+{
+  return a < b ? a : b;
+}
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines below do it the hard way.  (The performance cost
+ * is not all that great, because these routines aren't very heavily used.)
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macros */
+#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
+#define FMEMZERO(target,size)	MEMZERO(target,size)
+#else				/* 80x86 case, define if we can */
+#ifdef USE_FMEM
+#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
+#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
+#endif
+#endif
+
+
+GLOBAL(void)
+jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
+		   JSAMPARRAY output_array, int dest_row,
+		   int num_rows, JDIMENSION num_cols)
+/* Copy some rows of samples from one place to another.
+ * num_rows rows are copied from input_array[source_row++]
+ * to output_array[dest_row++]; these areas may overlap for duplication.
+ * The source and destination arrays must be at least as wide as num_cols.
+ */
+{
+  register JSAMPROW inptr, outptr;
+#ifdef FMEMCOPY
+  register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE));
+#else
+  register JDIMENSION count;
+#endif
+  register int row;
+
+  input_array += source_row;
+  output_array += dest_row;
+
+  for (row = num_rows; row > 0; row--) {
+    inptr = *input_array++;
+    outptr = *output_array++;
+#ifdef FMEMCOPY
+    FMEMCOPY(outptr, inptr, count);
+#else
+    for (count = num_cols; count > 0; count--)
+      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+#endif
+  }
+}
+
+
+GLOBAL(void)
+jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
+		 JDIMENSION num_blocks)
+/* Copy a row of coefficient blocks from one place to another. */
+{
+#ifdef FMEMCOPY
+  FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
+#else
+  register JCOEFPTR inptr, outptr;
+  register long count;
+
+  inptr = (JCOEFPTR) input_row;
+  outptr = (JCOEFPTR) output_row;
+  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
+    *outptr++ = *inptr++;
+  }
+#endif
+}
+
+
+GLOBAL(void)
+jzero_far (void FAR * target, size_t bytestozero)
+/* Zero out a chunk of FAR memory. */
+/* This might be sample-array data, block-array data, or alloc_large data. */
+{
+#ifdef FMEMZERO
+  FMEMZERO(target, bytestozero);
+#else
+  register char FAR * ptr = (char FAR *) target;
+  register size_t count;
+
+  for (count = bytestozero; count > 0; count--) {
+    *ptr++ = 0;
+  }
+#endif
+}
diff --git a/src/main/jni/libjpeg/jversion.h b/src/main/jni/libjpeg/jversion.h
new file mode 100644
index 000000000..6472c58d3
--- /dev/null
+++ b/src/main/jni/libjpeg/jversion.h
@@ -0,0 +1,14 @@
+/*
+ * jversion.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains software version identification.
+ */
+
+
+#define JVERSION	"6b  27-Mar-1998"
+
+#define JCOPYRIGHT	"Copyright (C) 1998, Thomas G. Lane"
diff --git a/src/main/jni/libwebp/dec/alpha.c b/src/main/jni/libwebp/dec/alpha.c
new file mode 100644
index 000000000..f23ba7d6a
--- /dev/null
+++ b/src/main/jni/libwebp/dec/alpha.c
@@ -0,0 +1,165 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha-plane decompression.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./alphai.h"
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "../utils/quant_levels_dec.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+//------------------------------------------------------------------------------
+// ALPHDecoder object.
+
+ALPHDecoder* ALPHNew(void) {
+  ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  return dec;
+}
+
+void ALPHDelete(ALPHDecoder* const dec) {
+  if (dec != NULL) {
+    VP8LDelete(dec->vp8l_dec_);
+    dec->vp8l_dec_ = NULL;
+    WebPSafeFree(dec);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Decoding.
+
+// Initialize alpha decoding by parsing the alpha header and decoding the image
+// header for alpha data stored using lossless compression.
+// Returns false in case of error in alpha header (data too short, invalid
+// compression method or filter, error in lossless header data etc).
+static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
+                    size_t data_size, int width, int height, uint8_t* output) {
+  int ok = 0;
+  const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
+  const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
+  int rsrv;
+
+  assert(width > 0 && height > 0);
+  assert(data != NULL && output != NULL);
+
+  dec->width_ = width;
+  dec->height_ = height;
+
+  if (data_size <= ALPHA_HEADER_LEN) {
+    return 0;
+  }
+
+  dec->method_ = (data[0] >> 0) & 0x03;
+  dec->filter_ = (data[0] >> 2) & 0x03;
+  dec->pre_processing_ = (data[0] >> 4) & 0x03;
+  rsrv = (data[0] >> 6) & 0x03;
+  if (dec->method_ < ALPHA_NO_COMPRESSION ||
+      dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
+      dec->filter_ >= WEBP_FILTER_LAST ||
+      dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
+      rsrv != 0) {
+    return 0;
+  }
+
+  if (dec->method_ == ALPHA_NO_COMPRESSION) {
+    const size_t alpha_decoded_size = dec->width_ * dec->height_;
+    ok = (alpha_data_size >= alpha_decoded_size);
+  } else {
+    assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
+    ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output);
+  }
+  return ok;
+}
+
+// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha
+// starting from row number 'row'. It assumes that rows up to (row - 1) have
+// already been decoded.
+// Returns false in case of bitstream error.
+static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
+  ALPHDecoder* const alph_dec = dec->alph_dec_;
+  const int width = alph_dec->width_;
+  const int height = alph_dec->height_;
+  WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_];
+  uint8_t* const output = dec->alpha_plane_;
+  if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
+    const size_t offset = row * width;
+    const size_t num_pixels = num_rows * width;
+    assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels);
+    memcpy(dec->alpha_plane_ + offset,
+           dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels);
+  } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
+    assert(alph_dec->vp8l_dec_ != NULL);
+    if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
+      return 0;
+    }
+  }
+
+  if (unfilter_func != NULL) {
+    unfilter_func(width, height, width, row, num_rows, output);
+  }
+
+  if (row + num_rows == dec->pic_hdr_.height_) {
+    dec->is_alpha_decoded_ = 1;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point.
+
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
+                                      int row, int num_rows) {
+  const int width = dec->pic_hdr_.width_;
+  const int height = dec->pic_hdr_.height_;
+
+  if (row < 0 || num_rows <= 0 || row + num_rows > height) {
+    return NULL;    // sanity check.
+  }
+
+  if (row == 0) {
+    // Initialize decoding.
+    assert(dec->alpha_plane_ != NULL);
+    dec->alph_dec_ = ALPHNew();
+    if (dec->alph_dec_ == NULL) return NULL;
+    if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
+                  width, height, dec->alpha_plane_)) {
+      ALPHDelete(dec->alph_dec_);
+      dec->alph_dec_ = NULL;
+      return NULL;
+    }
+    // if we allowed use of alpha dithering, check whether it's needed at all
+    if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
+      dec->alpha_dithering_ = 0;  // disable dithering
+    } else {
+      num_rows = height;          // decode everything in one pass
+    }
+  }
+
+  if (!dec->is_alpha_decoded_) {
+    int ok = 0;
+    assert(dec->alph_dec_ != NULL);
+    ok = ALPHDecode(dec, row, num_rows);
+    if (ok && dec->alpha_dithering_ > 0) {
+      ok = WebPDequantizeLevels(dec->alpha_plane_, width, height,
+                                dec->alpha_dithering_);
+    }
+    if (!ok || dec->is_alpha_decoded_) {
+      ALPHDelete(dec->alph_dec_);
+      dec->alph_dec_ = NULL;
+    }
+    if (!ok) return NULL;  // Error.
+  }
+
+  // Return a pointer to the current decoded row.
+  return dec->alpha_plane_ + row * width;
+}
diff --git a/src/main/jni/libwebp/dec/alphai.h b/src/main/jni/libwebp/dec/alphai.h
new file mode 100644
index 000000000..5fa230ca8
--- /dev/null
+++ b/src/main/jni/libwebp/dec/alphai.h
@@ -0,0 +1,55 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha decoder: internal header.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_DEC_ALPHAI_H_
+#define WEBP_DEC_ALPHAI_H_
+
+#include "./webpi.h"
+#include "../utils/filters.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP8LDecoder;  // Defined in dec/vp8li.h.
+
+typedef struct ALPHDecoder ALPHDecoder;
+struct ALPHDecoder {
+  int width_;
+  int height_;
+  int method_;
+  WEBP_FILTER_TYPE filter_;
+  int pre_processing_;
+  struct VP8LDecoder* vp8l_dec_;
+  VP8Io io_;
+  int use_8b_decode;  // Although alpha channel requires only 1 byte per
+                      // pixel, sometimes VP8LDecoder may need to allocate
+                      // 4 bytes per pixel internally during decode.
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Allocates a new alpha decoder instance.
+ALPHDecoder* ALPHNew(void);
+
+// Clears and deallocates an alpha decoder instance.
+void ALPHDelete(ALPHDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_ALPHAI_H_ */
diff --git a/src/main/jni/libwebp/dec/buffer.c b/src/main/jni/libwebp/dec/buffer.c
new file mode 100644
index 000000000..42feac74c
--- /dev/null
+++ b/src/main/jni/libwebp/dec/buffer.c
@@ -0,0 +1,251 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Everything about WebPDecBuffer
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "./vp8i.h"
+#include "./webpi.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// WebPDecBuffer
+
+// Number of bytes per pixel for the different color-spaces.
+static const int kModeBpp[MODE_LAST] = {
+  3, 4, 3, 4, 4, 2, 2,
+  4, 4, 4, 2,    // pre-multiplied modes
+  1, 1 };
+
+// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
+// Convert to an integer to handle both the unsigned/signed enum cases
+// without the need for casting to remove type limit warnings.
+static int IsValidColorspace(int webp_csp_mode) {
+  return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
+}
+
+static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
+  int ok = 1;
+  const WEBP_CSP_MODE mode = buffer->colorspace;
+  const int width = buffer->width;
+  const int height = buffer->height;
+  if (!IsValidColorspace(mode)) {
+    ok = 0;
+  } else if (!WebPIsRGBMode(mode)) {   // YUV checks
+    const WebPYUVABuffer* const buf = &buffer->u.YUVA;
+    const int y_stride = abs(buf->y_stride);
+    const int u_stride = abs(buf->u_stride);
+    const int v_stride = abs(buf->v_stride);
+    const int a_stride = abs(buf->a_stride);
+    const uint64_t y_size = (uint64_t)y_stride * height;
+    const uint64_t u_size = (uint64_t)u_stride * ((height + 1) / 2);
+    const uint64_t v_size = (uint64_t)v_stride * ((height + 1) / 2);
+    const uint64_t a_size = (uint64_t)a_stride * height;
+    ok &= (y_size <= buf->y_size);
+    ok &= (u_size <= buf->u_size);
+    ok &= (v_size <= buf->v_size);
+    ok &= (y_stride >= width);
+    ok &= (u_stride >= (width + 1) / 2);
+    ok &= (v_stride >= (width + 1) / 2);
+    ok &= (buf->y != NULL);
+    ok &= (buf->u != NULL);
+    ok &= (buf->v != NULL);
+    if (mode == MODE_YUVA) {
+      ok &= (a_stride >= width);
+      ok &= (a_size <= buf->a_size);
+      ok &= (buf->a != NULL);
+    }
+  } else {    // RGB checks
+    const WebPRGBABuffer* const buf = &buffer->u.RGBA;
+    const int stride = abs(buf->stride);
+    const uint64_t size = (uint64_t)stride * height;
+    ok &= (size <= buf->size);
+    ok &= (stride >= width * kModeBpp[mode]);
+    ok &= (buf->rgba != NULL);
+  }
+  return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
+}
+
+static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
+  const int w = buffer->width;
+  const int h = buffer->height;
+  const WEBP_CSP_MODE mode = buffer->colorspace;
+
+  if (w <= 0 || h <= 0 || !IsValidColorspace(mode)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+
+  if (!buffer->is_external_memory && buffer->private_memory == NULL) {
+    uint8_t* output;
+    int uv_stride = 0, a_stride = 0;
+    uint64_t uv_size = 0, a_size = 0, total_size;
+    // We need memory and it hasn't been allocated yet.
+    // => initialize output buffer, now that dimensions are known.
+    const int stride = w * kModeBpp[mode];
+    const uint64_t size = (uint64_t)stride * h;
+
+    if (!WebPIsRGBMode(mode)) {
+      uv_stride = (w + 1) / 2;
+      uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
+      if (mode == MODE_YUVA) {
+        a_stride = w;
+        a_size = (uint64_t)a_stride * h;
+      }
+    }
+    total_size = size + 2 * uv_size + a_size;
+
+    // Security/sanity checks
+    output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output));
+    if (output == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    buffer->private_memory = output;
+
+    if (!WebPIsRGBMode(mode)) {   // YUVA initialization
+      WebPYUVABuffer* const buf = &buffer->u.YUVA;
+      buf->y = output;
+      buf->y_stride = stride;
+      buf->y_size = (size_t)size;
+      buf->u = output + size;
+      buf->u_stride = uv_stride;
+      buf->u_size = (size_t)uv_size;
+      buf->v = output + size + uv_size;
+      buf->v_stride = uv_stride;
+      buf->v_size = (size_t)uv_size;
+      if (mode == MODE_YUVA) {
+        buf->a = output + size + 2 * uv_size;
+      }
+      buf->a_size = (size_t)a_size;
+      buf->a_stride = a_stride;
+    } else {  // RGBA initialization
+      WebPRGBABuffer* const buf = &buffer->u.RGBA;
+      buf->rgba = output;
+      buf->stride = stride;
+      buf->size = (size_t)size;
+    }
+  }
+  return CheckDecBuffer(buffer);
+}
+
+VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
+  if (buffer == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  if (WebPIsRGBMode(buffer->colorspace)) {
+    WebPRGBABuffer* const buf = &buffer->u.RGBA;
+    buf->rgba += (buffer->height - 1) * buf->stride;
+    buf->stride = -buf->stride;
+  } else {
+    WebPYUVABuffer* const buf = &buffer->u.YUVA;
+    const int H = buffer->height;
+    buf->y += (H - 1) * buf->y_stride;
+    buf->y_stride = -buf->y_stride;
+    buf->u += ((H - 1) >> 1) * buf->u_stride;
+    buf->u_stride = -buf->u_stride;
+    buf->v += ((H - 1) >> 1) * buf->v_stride;
+    buf->v_stride = -buf->v_stride;
+    if (buf->a != NULL) {
+      buf->a += (H - 1) * buf->a_stride;
+      buf->a_stride = -buf->a_stride;
+    }
+  }
+  return VP8_STATUS_OK;
+}
+
+VP8StatusCode WebPAllocateDecBuffer(int w, int h,
+                                    const WebPDecoderOptions* const options,
+                                    WebPDecBuffer* const out) {
+  VP8StatusCode status;
+  if (out == NULL || w <= 0 || h <= 0) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  if (options != NULL) {    // First, apply options if there is any.
+    if (options->use_cropping) {
+      const int cw = options->crop_width;
+      const int ch = options->crop_height;
+      const int x = options->crop_left & ~1;
+      const int y = options->crop_top & ~1;
+      if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
+        return VP8_STATUS_INVALID_PARAM;   // out of frame boundary.
+      }
+      w = cw;
+      h = ch;
+    }
+    if (options->use_scaling) {
+      if (options->scaled_width <= 0 || options->scaled_height <= 0) {
+        return VP8_STATUS_INVALID_PARAM;
+      }
+      w = options->scaled_width;
+      h = options->scaled_height;
+    }
+  }
+  out->width = w;
+  out->height = h;
+
+  // Then, allocate buffer for real.
+  status = AllocateBuffer(out);
+  if (status != VP8_STATUS_OK) return status;
+
+#if WEBP_DECODER_ABI_VERSION > 0x0203
+  // Use the stride trick if vertical flip is needed.
+  if (options != NULL && options->flip) {
+    status = WebPFlipBuffer(out);
+  }
+#endif
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// constructors / destructors
+
+int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;  // version mismatch
+  }
+  if (buffer == NULL) return 0;
+  memset(buffer, 0, sizeof(*buffer));
+  return 1;
+}
+
+void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
+  if (buffer != NULL) {
+    if (!buffer->is_external_memory) {
+      WebPSafeFree(buffer->private_memory);
+    }
+    buffer->private_memory = NULL;
+  }
+}
+
+void WebPCopyDecBuffer(const WebPDecBuffer* const src,
+                       WebPDecBuffer* const dst) {
+  if (src != NULL && dst != NULL) {
+    *dst = *src;
+    if (src->private_memory != NULL) {
+      dst->is_external_memory = 1;   // dst buffer doesn't own the memory.
+      dst->private_memory = NULL;
+    }
+  }
+}
+
+// Copy and transfer ownership from src to dst (beware of parameter order!)
+void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
+  if (src != NULL && dst != NULL) {
+    *dst = *src;
+    if (src->private_memory != NULL) {
+      src->is_external_memory = 1;   // src relinquishes ownership
+      src->private_memory = NULL;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/dec/decode_vp8.h b/src/main/jni/libwebp/dec/decode_vp8.h
new file mode 100644
index 000000000..b9337bbec
--- /dev/null
+++ b/src/main/jni/libwebp/dec/decode_vp8.h
@@ -0,0 +1,185 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Low-level API for VP8 decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_DECODE_VP8_H_
+#define WEBP_WEBP_DECODE_VP8_H_
+
+#include "../webp/decode.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Lower-level API
+//
+// These functions provide fine-grained control of the decoding process.
+// The call flow should resemble:
+//
+//   VP8Io io;
+//   VP8InitIo(&io);
+//   io.data = data;
+//   io.data_size = size;
+//   /* customize io's functions (setup()/put()/teardown()) if needed. */
+//
+//   VP8Decoder* dec = VP8New();
+//   bool ok = VP8Decode(dec);
+//   if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
+//   VP8Delete(dec);
+//   return ok;
+
+// Input / Output
+typedef struct VP8Io VP8Io;
+typedef int (*VP8IoPutHook)(const VP8Io* io);
+typedef int (*VP8IoSetupHook)(VP8Io* io);
+typedef void (*VP8IoTeardownHook)(const VP8Io* io);
+
+struct VP8Io {
+  // set by VP8GetHeaders()
+  int width, height;         // picture dimensions, in pixels (invariable).
+                             // These are the original, uncropped dimensions.
+                             // The actual area passed to put() is stored
+                             // in mb_w / mb_h fields.
+
+  // set before calling put()
+  int mb_y;                  // position of the current rows (in pixels)
+  int mb_w;                  // number of columns in the sample
+  int mb_h;                  // number of rows in the sample
+  const uint8_t* y, *u, *v;  // rows to copy (in yuv420 format)
+  int y_stride;              // row stride for luma
+  int uv_stride;             // row stride for chroma
+
+  void* opaque;              // user data
+
+  // called when fresh samples are available. Currently, samples are in
+  // YUV420 format, and can be up to width x 24 in size (depending on the
+  // in-loop filtering level, e.g.). Should return false in case of error
+  // or abort request. The actual size of the area to update is mb_w x mb_h
+  // in size, taking cropping into account.
+  VP8IoPutHook put;
+
+  // called just before starting to decode the blocks.
+  // Must return false in case of setup error, true otherwise. If false is
+  // returned, teardown() will NOT be called. But if the setup succeeded
+  // and true is returned, then teardown() will always be called afterward.
+  VP8IoSetupHook setup;
+
+  // Called just after block decoding is finished (or when an error occurred
+  // during put()). Is NOT called if setup() failed.
+  VP8IoTeardownHook teardown;
+
+  // this is a recommendation for the user-side yuv->rgb converter. This flag
+  // is set when calling setup() hook and can be overwritten by it. It then
+  // can be taken into consideration during the put() method.
+  int fancy_upsampling;
+
+  // Input buffer.
+  size_t data_size;
+  const uint8_t* data;
+
+  // If true, in-loop filtering will not be performed even if present in the
+  // bitstream. Switching off filtering may speed up decoding at the expense
+  // of more visible blocking. Note that output will also be non-compliant
+  // with the VP8 specifications.
+  int bypass_filtering;
+
+  // Cropping parameters.
+  int use_cropping;
+  int crop_left, crop_right, crop_top, crop_bottom;
+
+  // Scaling parameters.
+  int use_scaling;
+  int scaled_width, scaled_height;
+
+  // If non NULL, pointer to the alpha data (if present) corresponding to the
+  // start of the current row (That is: it is pre-offset by mb_y and takes
+  // cropping into account).
+  const uint8_t* a;
+};
+
+// Internal, version-checked, entry point
+int VP8InitIoInternal(VP8Io* const, int);
+
+// Set the custom IO function pointers and user-data. The setter for IO hooks
+// should be called before initiating incremental decoding. Returns true if
+// WebPIDecoder object is successfully modified, false otherwise.
+int WebPISetIOHooks(WebPIDecoder* const idec,
+                    VP8IoPutHook put,
+                    VP8IoSetupHook setup,
+                    VP8IoTeardownHook teardown,
+                    void* user_data);
+
+// Main decoding object. This is an opaque structure.
+typedef struct VP8Decoder VP8Decoder;
+
+// Create a new decoder object.
+VP8Decoder* VP8New(void);
+
+// Must be called to make sure 'io' is initialized properly.
+// Returns false in case of version mismatch. Upon such failure, no other
+// decoding function should be called (VP8Decode, VP8GetHeaders, ...)
+static WEBP_INLINE int VP8InitIo(VP8Io* const io) {
+  return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION);
+}
+
+// Decode the VP8 frame header. Returns true if ok.
+// Note: 'io->data' must be pointing to the start of the VP8 frame header.
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io);
+
+// Decode a picture. Will call VP8GetHeaders() if it wasn't done already.
+// Returns false in case of error.
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io);
+
+// Return current status of the decoder:
+VP8StatusCode VP8Status(VP8Decoder* const dec);
+
+// return readable string corresponding to the last status.
+const char* VP8StatusMessage(VP8Decoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Not a mandatory call between calls to VP8Decode().
+void VP8Clear(VP8Decoder* const dec);
+
+// Destroy the decoder object.
+void VP8Delete(VP8Decoder* const dec);
+
+//------------------------------------------------------------------------------
+// Miscellaneous VP8/VP8L bitstream probing functions.
+
+// Returns true if the next 3 bytes in data contain the VP8 signature.
+WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
+
+// Validates the VP8 data-header and retrieves basic header information viz
+// width and height. Returns 0 in case of formatting error. *width/*height
+// can be passed NULL.
+WEBP_EXTERN(int) VP8GetInfo(
+    const uint8_t* data,
+    size_t data_size,    // data available so far
+    size_t chunk_size,   // total data size expected in the chunk
+    int* const width, int* const height);
+
+// Returns true if the next byte(s) in data is a VP8L signature.
+WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
+
+// Validates the VP8L data-header and retrieves basic header information viz
+// width, height and alpha. Returns 0 in case of formatting error.
+// width/height/has_alpha can be passed NULL.
+WEBP_EXTERN(int) VP8LGetInfo(
+    const uint8_t* data, size_t data_size,  // data available so far
+    int* const width, int* const height, int* const has_alpha);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_DECODE_VP8_H_ */
diff --git a/src/main/jni/libwebp/dec/frame.c b/src/main/jni/libwebp/dec/frame.c
new file mode 100644
index 000000000..2359acc5b
--- /dev/null
+++ b/src/main/jni/libwebp/dec/frame.c
@@ -0,0 +1,828 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Frame-reconstruction function. Memory allocation.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./vp8i.h"
+#include "../utils/utils.h"
+
+#define ALIGN_MASK (32 - 1)
+
+static void ReconstructRow(const VP8Decoder* const dec,
+                           const VP8ThreadContext* ctx);  // TODO(skal): remove
+
+//------------------------------------------------------------------------------
+// Filtering
+
+// kFilterExtraRows[] = How many extra lines are needed on the MB boundary
+// for caching, given a filtering level.
+// Simple filter:  up to 2 luma samples are read and 1 is written.
+// Complex filter: up to 4 luma samples are read and 3 are written. Same for
+//                 U/V, so it's 8 samples total (because of the 2x upsampling).
+static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
+
+static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
+  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int cache_id = ctx->id_;
+  const int y_bps = dec->cache_y_stride_;
+  const VP8FInfo* const f_info = ctx->f_info_ + mb_x;
+  uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;
+  const int ilevel = f_info->f_ilevel_;
+  const int limit = f_info->f_limit_;
+  if (limit == 0) {
+    return;
+  }
+  assert(limit >= 3);
+  if (dec->filter_type_ == 1) {   // simple
+    if (mb_x > 0) {
+      VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
+    }
+    if (f_info->f_inner_) {
+      VP8SimpleHFilter16i(y_dst, y_bps, limit);
+    }
+    if (mb_y > 0) {
+      VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
+    }
+    if (f_info->f_inner_) {
+      VP8SimpleVFilter16i(y_dst, y_bps, limit);
+    }
+  } else {    // complex
+    const int uv_bps = dec->cache_uv_stride_;
+    uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
+    uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
+    const int hev_thresh = f_info->hev_thresh_;
+    if (mb_x > 0) {
+      VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
+      VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
+    }
+    if (f_info->f_inner_) {
+      VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
+      VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
+    }
+    if (mb_y > 0) {
+      VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
+      VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
+    }
+    if (f_info->f_inner_) {
+      VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
+      VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
+    }
+  }
+}
+
+// Filter the decoded macroblock row (if needed)
+static void FilterRow(const VP8Decoder* const dec) {
+  int mb_x;
+  const int mb_y = dec->thread_ctx_.mb_y_;
+  assert(dec->thread_ctx_.filter_row_);
+  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+    DoFilter(dec, mb_x, mb_y);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
+
+static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
+  if (dec->filter_type_ > 0) {
+    int s;
+    const VP8FilterHeader* const hdr = &dec->filter_hdr_;
+    for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+      int i4x4;
+      // First, compute the initial level
+      int base_level;
+      if (dec->segment_hdr_.use_segment_) {
+        base_level = dec->segment_hdr_.filter_strength_[s];
+        if (!dec->segment_hdr_.absolute_delta_) {
+          base_level += hdr->level_;
+        }
+      } else {
+        base_level = hdr->level_;
+      }
+      for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
+        VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
+        int level = base_level;
+        if (hdr->use_lf_delta_) {
+          // TODO(skal): only CURRENT is handled for now.
+          level += hdr->ref_lf_delta_[0];
+          if (i4x4) {
+            level += hdr->mode_lf_delta_[0];
+          }
+        }
+        level = (level < 0) ? 0 : (level > 63) ? 63 : level;
+        if (level > 0) {
+          int ilevel = level;
+          if (hdr->sharpness_ > 0) {
+            if (hdr->sharpness_ > 4) {
+              ilevel >>= 2;
+            } else {
+              ilevel >>= 1;
+            }
+            if (ilevel > 9 - hdr->sharpness_) {
+              ilevel = 9 - hdr->sharpness_;
+            }
+          }
+          if (ilevel < 1) ilevel = 1;
+          info->f_ilevel_ = ilevel;
+          info->f_limit_ = 2 * level + ilevel;
+          info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+        } else {
+          info->f_limit_ = 0;  // no filtering
+        }
+        info->f_inner_ = i4x4;
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Dithering
+
+#define DITHER_AMP_TAB_SIZE 12
+static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
+  // roughly, it's dqm->uv_mat_[1]
+  8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
+};
+
+void VP8InitDithering(const WebPDecoderOptions* const options,
+                      VP8Decoder* const dec) {
+  assert(dec != NULL);
+  if (options != NULL) {
+    const int d = options->dithering_strength;
+    const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;
+    const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);
+    if (f > 0) {
+      int s;
+      int all_amp = 0;
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        VP8QuantMatrix* const dqm = &dec->dqm_[s];
+        if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
+          // TODO(skal): should we specially dither more for uv_quant_ < 0?
+          const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
+          dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
+        }
+        all_amp |= dqm->dither_;
+      }
+      if (all_amp != 0) {
+        VP8InitRandom(&dec->dithering_rg_, 1.0f);
+        dec->dither_ = 1;
+      }
+    }
+#if WEBP_DECODER_ABI_VERSION > 0x0204
+    // potentially allow alpha dithering
+    dec->alpha_dithering_ = options->alpha_dithering_strength;
+    if (dec->alpha_dithering_ > 100) {
+      dec->alpha_dithering_ = 100;
+    } else if (dec->alpha_dithering_ < 0) {
+      dec->alpha_dithering_ = 0;
+    }
+#endif
+  }
+}
+
+// minimal amp that will provide a non-zero dithering effect
+#define MIN_DITHER_AMP 4
+#define DITHER_DESCALE 4
+#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
+#define DITHER_AMP_BITS 8
+#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
+
+static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
+  int i, j;
+  for (j = 0; j < 8; ++j) {
+    for (i = 0; i < 8; ++i) {
+      // TODO: could be made faster with SSE2
+      const int bits =
+          VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;
+      // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
+      const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;
+      const int v = (int)dst[i] + delta;
+      dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;
+    }
+    dst += bps;
+  }
+}
+
+static void DitherRow(VP8Decoder* const dec) {
+  int mb_x;
+  assert(dec->dither_);
+  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+    const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+    const VP8MBData* const data = ctx->mb_data_ + mb_x;
+    const int cache_id = ctx->id_;
+    const int uv_bps = dec->cache_uv_stride_;
+    if (data->dither_ >= MIN_DITHER_AMP) {
+      uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
+      uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
+      Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
+      Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// This function is called after a row of macroblocks is finished decoding.
+// It also takes into account the following restrictions:
+//  * In case of in-loop filtering, we must hold off sending some of the bottom
+//    pixels as they are yet unfiltered. They will be when the next macroblock
+//    row is decoded. Meanwhile, we must preserve them by rotating them in the
+//    cache area. This doesn't hold for the very bottom row of the uncropped
+//    picture of course.
+//  * we must clip the remaining pixels against the cropping area. The VP8Io
+//    struct must have the following fields set correctly before calling put():
+
+#define MACROBLOCK_VPOS(mb_y)  ((mb_y) * 16)    // vertical position of a MB
+
+// Finalize and transmit a complete row. Return false in case of user-abort.
+static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 1;
+  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int cache_id = ctx->id_;
+  const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
+  const int ysize = extra_y_rows * dec->cache_y_stride_;
+  const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
+  const int y_offset = cache_id * 16 * dec->cache_y_stride_;
+  const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
+  uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
+  uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
+  uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
+  const int mb_y = ctx->mb_y_;
+  const int is_first_row = (mb_y == 0);
+  const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
+
+  if (dec->mt_method_ == 2) {
+    ReconstructRow(dec, ctx);
+  }
+
+  if (ctx->filter_row_) {
+    FilterRow(dec);
+  }
+
+  if (dec->dither_) {
+    DitherRow(dec);
+  }
+
+  if (io->put != NULL) {
+    int y_start = MACROBLOCK_VPOS(mb_y);
+    int y_end = MACROBLOCK_VPOS(mb_y + 1);
+    if (!is_first_row) {
+      y_start -= extra_y_rows;
+      io->y = ydst;
+      io->u = udst;
+      io->v = vdst;
+    } else {
+      io->y = dec->cache_y_ + y_offset;
+      io->u = dec->cache_u_ + uv_offset;
+      io->v = dec->cache_v_ + uv_offset;
+    }
+
+    if (!is_last_row) {
+      y_end -= extra_y_rows;
+    }
+    if (y_end > io->crop_bottom) {
+      y_end = io->crop_bottom;    // make sure we don't overflow on last row.
+    }
+    io->a = NULL;
+    if (dec->alpha_data_ != NULL && y_start < y_end) {
+      // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
+      // good idea.
+      io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
+      if (io->a == NULL) {
+        return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                           "Could not decode alpha data.");
+      }
+    }
+    if (y_start < io->crop_top) {
+      const int delta_y = io->crop_top - y_start;
+      y_start = io->crop_top;
+      assert(!(delta_y & 1));
+      io->y += dec->cache_y_stride_ * delta_y;
+      io->u += dec->cache_uv_stride_ * (delta_y >> 1);
+      io->v += dec->cache_uv_stride_ * (delta_y >> 1);
+      if (io->a != NULL) {
+        io->a += io->width * delta_y;
+      }
+    }
+    if (y_start < y_end) {
+      io->y += io->crop_left;
+      io->u += io->crop_left >> 1;
+      io->v += io->crop_left >> 1;
+      if (io->a != NULL) {
+        io->a += io->crop_left;
+      }
+      io->mb_y = y_start - io->crop_top;
+      io->mb_w = io->crop_right - io->crop_left;
+      io->mb_h = y_end - y_start;
+      ok = io->put(io);
+    }
+  }
+  // rotate top samples if needed
+  if (cache_id + 1 == dec->num_caches_) {
+    if (!is_last_row) {
+      memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
+      memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
+      memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
+    }
+  }
+
+  return ok;
+}
+
+#undef MACROBLOCK_VPOS
+
+//------------------------------------------------------------------------------
+
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 1;
+  VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int filter_row =
+      (dec->filter_type_ > 0) &&
+      (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
+  if (dec->mt_method_ == 0) {
+    // ctx->id_ and ctx->f_info_ are already set
+    ctx->mb_y_ = dec->mb_y_;
+    ctx->filter_row_ = filter_row;
+    ReconstructRow(dec, ctx);
+    ok = FinishRow(dec, io);
+  } else {
+    WebPWorker* const worker = &dec->worker_;
+    // Finish previous job *before* updating context
+    ok &= WebPGetWorkerInterface()->Sync(worker);
+    assert(worker->status_ == OK);
+    if (ok) {   // spawn a new deblocking/output job
+      ctx->io_ = *io;
+      ctx->id_ = dec->cache_id_;
+      ctx->mb_y_ = dec->mb_y_;
+      ctx->filter_row_ = filter_row;
+      if (dec->mt_method_ == 2) {  // swap macroblock data
+        VP8MBData* const tmp = ctx->mb_data_;
+        ctx->mb_data_ = dec->mb_data_;
+        dec->mb_data_ = tmp;
+      } else {
+        // perform reconstruction directly in main thread
+        ReconstructRow(dec, ctx);
+      }
+      if (filter_row) {            // swap filter info
+        VP8FInfo* const tmp = ctx->f_info_;
+        ctx->f_info_ = dec->f_info_;
+        dec->f_info_ = tmp;
+      }
+      // (reconstruct)+filter in parallel
+      WebPGetWorkerInterface()->Launch(worker);
+      if (++dec->cache_id_ == dec->num_caches_) {
+        dec->cache_id_ = 0;
+      }
+    }
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// Finish setting up the decoding parameter once user's setup() is called.
+
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
+  // Call setup() first. This may trigger additional decoding features on 'io'.
+  // Note: Afterward, we must call teardown() no matter what.
+  if (io->setup != NULL && !io->setup(io)) {
+    VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
+    return dec->status_;
+  }
+
+  // Disable filtering per user request
+  if (io->bypass_filtering) {
+    dec->filter_type_ = 0;
+  }
+  // TODO(skal): filter type / strength / sharpness forcing
+
+  // Define the area where we can skip in-loop filtering, in case of cropping.
+  //
+  // 'Simple' filter reads two luma samples outside of the macroblock
+  // and filters one. It doesn't filter the chroma samples. Hence, we can
+  // avoid doing the in-loop filtering before crop_top/crop_left position.
+  // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
+  // Means: there's a dependency chain that goes all the way up to the
+  // top-left corner of the picture (MB #0). We must filter all the previous
+  // macroblocks.
+  // TODO(skal): add an 'approximate_decoding' option, that won't produce
+  // a 1:1 bit-exactness for complex filtering?
+  {
+    const int extra_pixels = kFilterExtraRows[dec->filter_type_];
+    if (dec->filter_type_ == 2) {
+      // For complex filter, we need to preserve the dependency chain.
+      dec->tl_mb_x_ = 0;
+      dec->tl_mb_y_ = 0;
+    } else {
+      // For simple filter, we can filter only the cropped region.
+      // We include 'extra_pixels' on the other side of the boundary, since
+      // vertical or horizontal filtering of the previous macroblock can
+      // modify some abutting pixels.
+      dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
+      dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
+      if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
+      if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
+    }
+    // We need some 'extra' pixels on the right/bottom.
+    dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
+    dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
+    if (dec->br_mb_x_ > dec->mb_w_) {
+      dec->br_mb_x_ = dec->mb_w_;
+    }
+    if (dec->br_mb_y_ > dec->mb_h_) {
+      dec->br_mb_y_ = dec->mb_h_;
+    }
+  }
+  PrecomputeFilterStrengths(dec);
+  return VP8_STATUS_OK;
+}
+
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 1;
+  if (dec->mt_method_ > 0) {
+    ok = WebPGetWorkerInterface()->Sync(&dec->worker_);
+  }
+
+  if (io->teardown != NULL) {
+    io->teardown(io);
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.
+//
+// Reason is: the deblocking filter cannot deblock the bottom horizontal edges
+// immediately, and needs to wait for first few rows of the next macroblock to
+// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
+// on strength).
+// With two threads, the vertical positions of the rows being decoded are:
+// Decode:  [ 0..15][16..31][32..47][48..63][64..79][...
+// Deblock:         [ 0..11][12..27][28..43][44..59][...
+// If we use two threads and two caches of 16 pixels, the sequence would be:
+// Decode:  [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
+// Deblock:         [ 0..11][12..27!!][-4..11][12..27][...
+// The problem occurs during row [12..15!!] that both the decoding and
+// deblocking threads are writing simultaneously.
+// With 3 cache lines, one get a safe write pattern:
+// Decode:  [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
+// Deblock:         [ 0..11][12..27][28..43][-4..11][12..27][28...
+// Note that multi-threaded output _without_ deblocking can make use of two
+// cache lines of 16 pixels only, since there's no lagging behind. The decoding
+// and output process have non-concurrent writing:
+// Decode:  [ 0..15][16..31][ 0..15][16..31][...
+// io->put:         [ 0..15][16..31][ 0..15][...
+
+#define MT_CACHE_LINES 3
+#define ST_CACHE_LINES 1   // 1 cache row only for single-threaded case
+
+// Initialize multi/single-thread worker
+static int InitThreadContext(VP8Decoder* const dec) {
+  dec->cache_id_ = 0;
+  if (dec->mt_method_ > 0) {
+    WebPWorker* const worker = &dec->worker_;
+    if (!WebPGetWorkerInterface()->Reset(worker)) {
+      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
+                         "thread initialization failed.");
+    }
+    worker->data1 = dec;
+    worker->data2 = (void*)&dec->thread_ctx_.io_;
+    worker->hook = (WebPWorkerHook)FinishRow;
+    dec->num_caches_ =
+      (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
+  } else {
+    dec->num_caches_ = ST_CACHE_LINES;
+  }
+  return 1;
+}
+
+int VP8GetThreadMethod(const WebPDecoderOptions* const options,
+                       const WebPHeaderStructure* const headers,
+                       int width, int height) {
+  if (options == NULL || options->use_threads == 0) {
+    return 0;
+  }
+  (void)headers;
+  (void)width;
+  (void)height;
+  assert(headers == NULL || !headers->is_lossless);
+#if defined(WEBP_USE_THREAD)
+  if (width < MIN_WIDTH_FOR_THREADS) return 0;
+  // TODO(skal): tune the heuristic further
+#if 0
+  if (height < 2 * width) return 2;
+#endif
+  return 2;
+#else   // !WEBP_USE_THREAD
+  return 0;
+#endif
+}
+
+#undef MT_CACHE_LINES
+#undef ST_CACHE_LINES
+
+//------------------------------------------------------------------------------
+// Memory setup
+
+static int AllocateMemory(VP8Decoder* const dec) {
+  const int num_caches = dec->num_caches_;
+  const int mb_w = dec->mb_w_;
+  // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
+  const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
+  const size_t top_size = sizeof(VP8TopSamples) * mb_w;
+  const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
+  const size_t f_info_size =
+      (dec->filter_type_ > 0) ?
+          mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)
+        : 0;
+  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
+  const size_t mb_data_size =
+      (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
+  const size_t cache_height = (16 * num_caches
+                            + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
+  const size_t cache_size = top_size * cache_height;
+  // alpha_size is the only one that scales as width x height.
+  const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
+      (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
+  const uint64_t needed = (uint64_t)intra_pred_mode_size
+                        + top_size + mb_info_size + f_info_size
+                        + yuv_size + mb_data_size
+                        + cache_size + alpha_size + ALIGN_MASK;
+  uint8_t* mem;
+
+  if (needed != (size_t)needed) return 0;  // check for overflow
+  if (needed > dec->mem_size_) {
+    WebPSafeFree(dec->mem_);
+    dec->mem_size_ = 0;
+    dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
+    if (dec->mem_ == NULL) {
+      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
+                         "no memory during frame initialization.");
+    }
+    // down-cast is ok, thanks to WebPSafeAlloc() above.
+    dec->mem_size_ = (size_t)needed;
+  }
+
+  mem = (uint8_t*)dec->mem_;
+  dec->intra_t_ = (uint8_t*)mem;
+  mem += intra_pred_mode_size;
+
+  dec->yuv_t_ = (VP8TopSamples*)mem;
+  mem += top_size;
+
+  dec->mb_info_ = ((VP8MB*)mem) + 1;
+  mem += mb_info_size;
+
+  dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
+  mem += f_info_size;
+  dec->thread_ctx_.id_ = 0;
+  dec->thread_ctx_.f_info_ = dec->f_info_;
+  if (dec->mt_method_ > 0) {
+    // secondary cache line. The deblocking process need to make use of the
+    // filtering strength from previous macroblock row, while the new ones
+    // are being decoded in parallel. We'll just swap the pointers.
+    dec->thread_ctx_.f_info_ += mb_w;
+  }
+
+  mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
+  assert((yuv_size & ALIGN_MASK) == 0);
+  dec->yuv_b_ = (uint8_t*)mem;
+  mem += yuv_size;
+
+  dec->mb_data_ = (VP8MBData*)mem;
+  dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
+  if (dec->mt_method_ == 2) {
+    dec->thread_ctx_.mb_data_ += mb_w;
+  }
+  mem += mb_data_size;
+
+  dec->cache_y_stride_ = 16 * mb_w;
+  dec->cache_uv_stride_ = 8 * mb_w;
+  {
+    const int extra_rows = kFilterExtraRows[dec->filter_type_];
+    const int extra_y = extra_rows * dec->cache_y_stride_;
+    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
+    dec->cache_y_ = ((uint8_t*)mem) + extra_y;
+    dec->cache_u_ = dec->cache_y_
+                  + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
+    dec->cache_v_ = dec->cache_u_
+                  + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
+    dec->cache_id_ = 0;
+  }
+  mem += cache_size;
+
+  // alpha plane
+  dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
+  mem += alpha_size;
+  assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
+
+  // note: left/top-info is initialized once for all.
+  memset(dec->mb_info_ - 1, 0, mb_info_size);
+  VP8InitScanline(dec);   // initialize left too.
+
+  // initialize top
+  memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
+
+  return 1;
+}
+
+static void InitIo(VP8Decoder* const dec, VP8Io* io) {
+  // prepare 'io'
+  io->mb_y = 0;
+  io->y = dec->cache_y_;
+  io->u = dec->cache_u_;
+  io->v = dec->cache_v_;
+  io->y_stride = dec->cache_y_stride_;
+  io->uv_stride = dec->cache_uv_stride_;
+  io->a = NULL;
+}
+
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
+  if (!InitThreadContext(dec)) return 0;  // call first. Sets dec->num_caches_.
+  if (!AllocateMemory(dec)) return 0;
+  InitIo(dec, io);
+  VP8DspInit();  // Init critical function pointers and look-up tables.
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main reconstruction function.
+
+static const int kScan[16] = {
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
+};
+
+static int CheckMode(int mb_x, int mb_y, int mode) {
+  if (mode == B_DC_PRED) {
+    if (mb_x == 0) {
+      return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
+    } else {
+      return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
+    }
+  }
+  return mode;
+}
+
+static void Copy32b(uint8_t* dst, uint8_t* src) {
+  memcpy(dst, src, 4);
+}
+
+static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,
+                                    uint8_t* const dst) {
+  switch (bits >> 30) {
+    case 3:
+      VP8Transform(src, dst, 0);
+      break;
+    case 2:
+      VP8TransformAC3(src, dst);
+      break;
+    case 1:
+      VP8TransformDC(src, dst);
+      break;
+    default:
+      break;
+  }
+}
+
+static void DoUVTransform(uint32_t bits, const int16_t* const src,
+                          uint8_t* const dst) {
+  if (bits & 0xff) {    // any non-zero coeff at all?
+    if (bits & 0xaa) {  // any non-zero AC coefficient?
+      VP8TransformUV(src, dst);   // note we don't use the AC3 variant for U/V
+    } else {
+      VP8TransformDCUV(src, dst);
+    }
+  }
+}
+
+static void ReconstructRow(const VP8Decoder* const dec,
+                           const VP8ThreadContext* ctx) {
+  int j;
+  int mb_x;
+  const int mb_y = ctx->mb_y_;
+  const int cache_id = ctx->id_;
+  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
+  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
+  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
+  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+    const VP8MBData* const block = ctx->mb_data_ + mb_x;
+
+    // Rotate in the left samples from previously decoded block. We move four
+    // pixels at a time for alignment reason, and because of in-loop filter.
+    if (mb_x > 0) {
+      for (j = -1; j < 16; ++j) {
+        Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
+      }
+      for (j = -1; j < 8; ++j) {
+        Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
+        Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
+      }
+    } else {
+      for (j = 0; j < 16; ++j) {
+        y_dst[j * BPS - 1] = 129;
+      }
+      for (j = 0; j < 8; ++j) {
+        u_dst[j * BPS - 1] = 129;
+        v_dst[j * BPS - 1] = 129;
+      }
+      // Init top-left sample on left column too
+      if (mb_y > 0) {
+        y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
+      }
+    }
+    {
+      // bring top samples into the cache
+      VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
+      const int16_t* const coeffs = block->coeffs_;
+      uint32_t bits = block->non_zero_y_;
+      int n;
+
+      if (mb_y > 0) {
+        memcpy(y_dst - BPS, top_yuv[0].y, 16);
+        memcpy(u_dst - BPS, top_yuv[0].u, 8);
+        memcpy(v_dst - BPS, top_yuv[0].v, 8);
+      } else if (mb_x == 0) {
+        // we only need to do this init once at block (0,0).
+        // Afterward, it remains valid for the whole topmost row.
+        memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
+        memset(u_dst - BPS - 1, 127, 8 + 1);
+        memset(v_dst - BPS - 1, 127, 8 + 1);
+      }
+
+      // predict and add residuals
+      if (block->is_i4x4_) {   // 4x4
+        uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
+
+        if (mb_y > 0) {
+          if (mb_x >= dec->mb_w_ - 1) {    // on rightmost border
+            memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
+          } else {
+            memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
+          }
+        }
+        // replicate the top-right pixels below
+        top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
+
+        // predict and add residuals for all 4x4 blocks in turn.
+        for (n = 0; n < 16; ++n, bits <<= 2) {
+          uint8_t* const dst = y_dst + kScan[n];
+          VP8PredLuma4[block->imodes_[n]](dst);
+          DoTransform(bits, coeffs + n * 16, dst);
+        }
+      } else {    // 16x16
+        const int pred_func = CheckMode(mb_x, mb_y,
+                                        block->imodes_[0]);
+        VP8PredLuma16[pred_func](y_dst);
+        if (bits != 0) {
+          for (n = 0; n < 16; ++n, bits <<= 2) {
+            DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
+          }
+        }
+      }
+      {
+        // Chroma
+        const uint32_t bits_uv = block->non_zero_uv_;
+        const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
+        VP8PredChroma8[pred_func](u_dst);
+        VP8PredChroma8[pred_func](v_dst);
+        DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
+        DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
+      }
+
+      // stash away top samples for next block
+      if (mb_y < dec->mb_h_ - 1) {
+        memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
+        memcpy(top_yuv[0].u, u_dst +  7 * BPS,  8);
+        memcpy(top_yuv[0].v, v_dst +  7 * BPS,  8);
+      }
+    }
+    // Transfer reconstructed samples from yuv_b_ cache to final destination.
+    {
+      const int y_offset = cache_id * 16 * dec->cache_y_stride_;
+      const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
+      uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
+      uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
+      uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
+      for (j = 0; j < 16; ++j) {
+        memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
+      }
+      for (j = 0; j < 8; ++j) {
+        memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
+        memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/dec/idec.c b/src/main/jni/libwebp/dec/idec.c
new file mode 100644
index 000000000..5d8bb0c29
--- /dev/null
+++ b/src/main/jni/libwebp/dec/idec.c
@@ -0,0 +1,857 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Incremental decoding
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "./alphai.h"
+#include "./webpi.h"
+#include "./vp8i.h"
+#include "../utils/utils.h"
+
+// In append mode, buffer allocations increase as multiples of this value.
+// Needs to be a power of 2.
+#define CHUNK_SIZE 4096
+#define MAX_MB_SIZE 4096
+
+//------------------------------------------------------------------------------
+// Data structures for memory and states
+
+// Decoding states. State normally flows as:
+// WEBP_HEADER->VP8_HEADER->VP8_PARTS0->VP8_DATA->DONE for a lossy image, and
+// WEBP_HEADER->VP8L_HEADER->VP8L_DATA->DONE for a lossless image.
+// If there is any error the decoder goes into state ERROR.
+typedef enum {
+  STATE_WEBP_HEADER,  // All the data before that of the VP8/VP8L chunk.
+  STATE_VP8_HEADER,   // The VP8 Frame header (within the VP8 chunk).
+  STATE_VP8_PARTS0,
+  STATE_VP8_DATA,
+  STATE_VP8L_HEADER,
+  STATE_VP8L_DATA,
+  STATE_DONE,
+  STATE_ERROR
+} DecState;
+
+// Operating state for the MemBuffer
+typedef enum {
+  MEM_MODE_NONE = 0,
+  MEM_MODE_APPEND,
+  MEM_MODE_MAP
+} MemBufferMode;
+
+// storage for partition #0 and partial data (in a rolling fashion)
+typedef struct {
+  MemBufferMode mode_;  // Operation mode
+  size_t start_;        // start location of the data to be decoded
+  size_t end_;          // end location
+  size_t buf_size_;     // size of the allocated buffer
+  uint8_t* buf_;        // We don't own this buffer in case WebPIUpdate()
+
+  size_t part0_size_;         // size of partition #0
+  const uint8_t* part0_buf_;  // buffer to store partition #0
+} MemBuffer;
+
+struct WebPIDecoder {
+  DecState state_;         // current decoding state
+  WebPDecParams params_;   // Params to store output info
+  int is_lossless_;        // for down-casting 'dec_'.
+  void* dec_;              // either a VP8Decoder or a VP8LDecoder instance
+  VP8Io io_;
+
+  MemBuffer mem_;          // input memory buffer.
+  WebPDecBuffer output_;   // output buffer (when no external one is supplied)
+  size_t chunk_size_;      // Compressed VP8/VP8L size extracted from Header.
+
+  int last_mb_y_;          // last row reached for intra-mode decoding
+};
+
+// MB context to restore in case VP8DecodeMB() fails
+typedef struct {
+  VP8MB left_;
+  VP8MB info_;
+  VP8BitReader token_br_;
+} MBContext;
+
+//------------------------------------------------------------------------------
+// MemBuffer: incoming data handling
+
+static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
+  return (mem->end_ - mem->start_);
+}
+
+// Check if we need to preserve the compressed alpha data, as it may not have
+// been decoded yet.
+static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
+  if (idec->state_ == STATE_WEBP_HEADER) {
+    // We haven't parsed the headers yet, so we don't know whether the image is
+    // lossy or lossless. This also means that we haven't parsed the ALPH chunk.
+    return 0;
+  }
+  if (idec->is_lossless_) {
+    return 0;  // ALPH chunk is not present for lossless images.
+  } else {
+    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    assert(dec != NULL);  // Must be true as idec->state_ != STATE_WEBP_HEADER.
+    return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
+  }
+}
+
+static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* const new_base = mem->buf_ + mem->start_;
+  // note: for VP8, setting up idec->io_ is only really needed at the beginning
+  // of the decoding, till partition #0 is complete.
+  idec->io_.data = new_base;
+  idec->io_.data_size = MemDataSize(mem);
+
+  if (idec->dec_ != NULL) {
+    if (!idec->is_lossless_) {
+      VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+      const int last_part = dec->num_parts_ - 1;
+      if (offset != 0) {
+        int p;
+        for (p = 0; p <= last_part; ++p) {
+          VP8RemapBitReader(dec->parts_ + p, offset);
+        }
+        // Remap partition #0 data pointer to new offset, but only in MAP
+        // mode (in APPEND mode, partition #0 is copied into a fixed memory).
+        if (mem->mode_ == MEM_MODE_MAP) {
+          VP8RemapBitReader(&dec->br_, offset);
+        }
+      }
+      assert(last_part >= 0);
+      dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
+      if (NeedCompressedAlpha(idec)) {
+        ALPHDecoder* const alph_dec = dec->alph_dec_;
+        dec->alpha_data_ += offset;
+        if (alph_dec != NULL) {
+          if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
+            VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
+            assert(alph_vp8l_dec != NULL);
+            assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
+            VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
+                                   dec->alpha_data_ + ALPHA_HEADER_LEN,
+                                   dec->alpha_data_size_ - ALPHA_HEADER_LEN);
+          } else {  // alph_dec->method_ == ALPHA_NO_COMPRESSION
+            // Nothing special to do in this case.
+          }
+        }
+      }
+    } else {    // Resize lossless bitreader
+      VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+      VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
+    }
+  }
+}
+
+// Appends data to the end of MemBuffer->buf_. It expands the allocated memory
+// size if required and also updates VP8BitReader's if new memory is allocated.
+static int AppendToMemBuffer(WebPIDecoder* const idec,
+                             const uint8_t* const data, size_t data_size) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  MemBuffer* const mem = &idec->mem_;
+  const int need_compressed_alpha = NeedCompressedAlpha(idec);
+  const uint8_t* const old_start = mem->buf_ + mem->start_;
+  const uint8_t* const old_base =
+      need_compressed_alpha ? dec->alpha_data_ : old_start;
+  assert(mem->mode_ == MEM_MODE_APPEND);
+  if (data_size > MAX_CHUNK_PAYLOAD) {
+    // security safeguard: trying to allocate more than what the format
+    // allows for a chunk should be considered a smoke smell.
+    return 0;
+  }
+
+  if (mem->end_ + data_size > mem->buf_size_) {  // Need some free memory
+    const size_t new_mem_start = old_start - old_base;
+    const size_t current_size = MemDataSize(mem) + new_mem_start;
+    const uint64_t new_size = (uint64_t)current_size + data_size;
+    const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
+    uint8_t* const new_buf =
+        (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
+    if (new_buf == NULL) return 0;
+    memcpy(new_buf, old_base, current_size);
+    WebPSafeFree(mem->buf_);
+    mem->buf_ = new_buf;
+    mem->buf_size_ = (size_t)extra_size;
+    mem->start_ = new_mem_start;
+    mem->end_ = current_size;
+  }
+
+  memcpy(mem->buf_ + mem->end_, data, data_size);
+  mem->end_ += data_size;
+  assert(mem->end_ <= mem->buf_size_);
+
+  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
+  return 1;
+}
+
+static int RemapMemBuffer(WebPIDecoder* const idec,
+                          const uint8_t* const data, size_t data_size) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* const old_buf = mem->buf_;
+  const uint8_t* const old_start = old_buf + mem->start_;
+  assert(mem->mode_ == MEM_MODE_MAP);
+
+  if (data_size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
+
+  mem->buf_ = (uint8_t*)data;
+  mem->end_ = mem->buf_size_ = data_size;
+
+  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
+  return 1;
+}
+
+static void InitMemBuffer(MemBuffer* const mem) {
+  mem->mode_       = MEM_MODE_NONE;
+  mem->buf_        = NULL;
+  mem->buf_size_   = 0;
+  mem->part0_buf_  = NULL;
+  mem->part0_size_ = 0;
+}
+
+static void ClearMemBuffer(MemBuffer* const mem) {
+  assert(mem);
+  if (mem->mode_ == MEM_MODE_APPEND) {
+    WebPSafeFree(mem->buf_);
+    WebPSafeFree((void*)mem->part0_buf_);
+  }
+}
+
+static int CheckMemBufferMode(MemBuffer* const mem, MemBufferMode expected) {
+  if (mem->mode_ == MEM_MODE_NONE) {
+    mem->mode_ = expected;    // switch to the expected mode
+  } else if (mem->mode_ != expected) {
+    return 0;         // we mixed the modes => error
+  }
+  assert(mem->mode_ == expected);   // mode is ok
+  return 1;
+}
+
+// To be called last.
+static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
+#if WEBP_DECODER_ABI_VERSION > 0x0203
+  const WebPDecoderOptions* const options = idec->params_.options;
+  WebPDecBuffer* const output = idec->params_.output;
+
+  idec->state_ = STATE_DONE;
+  if (options != NULL && options->flip) {
+    return WebPFlipBuffer(output);
+  }
+#endif
+  idec->state_ = STATE_DONE;
+  return VP8_STATUS_OK;
+}
+
+//------------------------------------------------------------------------------
+// Macroblock-decoding contexts
+
+static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br,
+                        MBContext* const context) {
+  context->left_ = dec->mb_info_[-1];
+  context->info_ = dec->mb_info_[dec->mb_x_];
+  context->token_br_ = *token_br;
+}
+
+static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
+                           VP8BitReader* const token_br) {
+  dec->mb_info_[-1] = context->left_;
+  dec->mb_info_[dec->mb_x_] = context->info_;
+  *token_br = context->token_br_;
+}
+
+//------------------------------------------------------------------------------
+
+static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
+  if (idec->state_ == STATE_VP8_DATA) {
+    VP8Io* const io = &idec->io_;
+    if (io->teardown != NULL) {
+      io->teardown(io);
+    }
+  }
+  idec->state_ = STATE_ERROR;
+  return error;
+}
+
+static void ChangeState(WebPIDecoder* const idec, DecState new_state,
+                        size_t consumed_bytes) {
+  MemBuffer* const mem = &idec->mem_;
+  idec->state_ = new_state;
+  mem->start_ += consumed_bytes;
+  assert(mem->start_ <= mem->end_);
+  idec->io_.data = mem->buf_ + mem->start_;
+  idec->io_.data_size = MemDataSize(mem);
+}
+
+// Headers
+static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* data = mem->buf_ + mem->start_;
+  size_t curr_size = MemDataSize(mem);
+  VP8StatusCode status;
+  WebPHeaderStructure headers;
+
+  headers.data = data;
+  headers.data_size = curr_size;
+  headers.have_all_data = 0;
+  status = WebPParseHeaders(&headers);
+  if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    return VP8_STATUS_SUSPENDED;  // We haven't found a VP8 chunk yet.
+  } else if (status != VP8_STATUS_OK) {
+    return IDecError(idec, status);
+  }
+
+  idec->chunk_size_ = headers.compressed_size;
+  idec->is_lossless_ = headers.is_lossless;
+  if (!idec->is_lossless_) {
+    VP8Decoder* const dec = VP8New();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    idec->dec_ = dec;
+    dec->alpha_data_ = headers.alpha_data;
+    dec->alpha_data_size_ = headers.alpha_data_size;
+    ChangeState(idec, STATE_VP8_HEADER, headers.offset);
+  } else {
+    VP8LDecoder* const dec = VP8LNew();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    idec->dec_ = dec;
+    ChangeState(idec, STATE_VP8L_HEADER, headers.offset);
+  }
+  return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
+  const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
+  const size_t curr_size = MemDataSize(&idec->mem_);
+  int width, height;
+  uint32_t bits;
+
+  if (curr_size < VP8_FRAME_HEADER_SIZE) {
+    // Not enough data bytes to extract VP8 Frame Header.
+    return VP8_STATUS_SUSPENDED;
+  }
+  if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) {
+    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+  }
+
+  bits = data[0] | (data[1] << 8) | (data[2] << 16);
+  idec->mem_.part0_size_ = (bits >> 5) + VP8_FRAME_HEADER_SIZE;
+
+  idec->io_.data = data;
+  idec->io_.data_size = curr_size;
+  idec->state_ = STATE_VP8_PARTS0;
+  return VP8_STATUS_OK;
+}
+
+// Partition #0
+static int CopyParts0Data(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8BitReader* const br = &dec->br_;
+  const size_t psize = br->buf_end_ - br->buf_;
+  MemBuffer* const mem = &idec->mem_;
+  assert(!idec->is_lossless_);
+  assert(mem->part0_buf_ == NULL);
+  assert(psize > 0);
+  assert(psize <= mem->part0_size_);  // Format limit: no need for runtime check
+  if (mem->mode_ == MEM_MODE_APPEND) {
+    // We copy and grab ownership of the partition #0 data.
+    uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, psize);
+    if (part0_buf == NULL) {
+      return 0;
+    }
+    memcpy(part0_buf, br->buf_, psize);
+    mem->part0_buf_ = part0_buf;
+    br->buf_ = part0_buf;
+    br->buf_end_ = part0_buf + psize;
+  } else {
+    // Else: just keep pointers to the partition #0's data in dec_->br_.
+  }
+  mem->start_ += psize;
+  return 1;
+}
+
+static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8Io* const io = &idec->io_;
+  const WebPDecParams* const params = &idec->params_;
+  WebPDecBuffer* const output = params->output;
+
+  // Wait till we have enough data for the whole partition #0
+  if (MemDataSize(&idec->mem_) < idec->mem_.part0_size_) {
+    return VP8_STATUS_SUSPENDED;
+  }
+
+  if (!VP8GetHeaders(dec, io)) {
+    const VP8StatusCode status = dec->status_;
+    if (status == VP8_STATUS_SUSPENDED ||
+        status == VP8_STATUS_NOT_ENOUGH_DATA) {
+      // treating NOT_ENOUGH_DATA as SUSPENDED state
+      return VP8_STATUS_SUSPENDED;
+    }
+    return IDecError(idec, status);
+  }
+
+  // Allocate/Verify output buffer now
+  dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
+                                       output);
+  if (dec->status_ != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+  // This change must be done before calling VP8InitFrame()
+  dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
+                                       io->width, io->height);
+  VP8InitDithering(params->options, dec);
+  if (!CopyParts0Data(idec)) {
+    return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
+  }
+
+  // Finish setting up the decoding parameters. Will call io->setup().
+  if (VP8EnterCritical(dec, io) != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+
+  // Note: past this point, teardown() must always be called
+  // in case of error.
+  idec->state_ = STATE_VP8_DATA;
+  // Allocate memory and prepare everything.
+  if (!VP8InitFrame(dec, io)) {
+    return IDecError(idec, dec->status_);
+  }
+  return VP8_STATUS_OK;
+}
+
+// Remaining partitions
+static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8Io* const io = &idec->io_;
+
+  assert(dec->ready_);
+  for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
+    if (idec->last_mb_y_ != dec->mb_y_) {
+      if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+        // note: normally, error shouldn't occur since we already have the whole
+        // partition0 available here in DecodeRemaining(). Reaching EOF while
+        // reading intra modes really means a BITSTREAM_ERROR.
+        return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+      }
+      idec->last_mb_y_ = dec->mb_y_;
+    }
+    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+      VP8BitReader* const token_br =
+          &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+      MBContext context;
+      SaveContext(dec, token_br, &context);
+      if (!VP8DecodeMB(dec, token_br)) {
+        // We shouldn't fail when MAX_MB data was available
+        if (dec->num_parts_ == 1 && MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
+          return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+        }
+        RestoreContext(&context, dec, token_br);
+        return VP8_STATUS_SUSPENDED;
+      }
+      // Release buffer only if there is only one partition
+      if (dec->num_parts_ == 1) {
+        idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
+        assert(idec->mem_.start_ <= idec->mem_.end_);
+      }
+    }
+    VP8InitScanline(dec);   // Prepare for next scanline
+
+    // Reconstruct, filter and emit the row.
+    if (!VP8ProcessRow(dec, io)) {
+      return IDecError(idec, VP8_STATUS_USER_ABORT);
+    }
+  }
+  // Synchronize the thread and check for errors.
+  if (!VP8ExitCritical(dec, io)) {
+    return IDecError(idec, VP8_STATUS_USER_ABORT);
+  }
+  dec->ready_ = 0;
+  return FinishDecoding(idec);
+}
+
+static VP8StatusCode ErrorStatusLossless(WebPIDecoder* const idec,
+                                         VP8StatusCode status) {
+  if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    return VP8_STATUS_SUSPENDED;
+  }
+  return IDecError(idec, status);
+}
+
+static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
+  VP8Io* const io = &idec->io_;
+  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+  const WebPDecParams* const params = &idec->params_;
+  WebPDecBuffer* const output = params->output;
+  size_t curr_size = MemDataSize(&idec->mem_);
+  assert(idec->is_lossless_);
+
+  // Wait until there's enough data for decoding header.
+  if (curr_size < (idec->chunk_size_ >> 3)) {
+    return VP8_STATUS_SUSPENDED;
+  }
+  if (!VP8LDecodeHeader(dec, io)) {
+    return ErrorStatusLossless(idec, dec->status_);
+  }
+  // Allocate/verify output buffer now.
+  dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
+                                       output);
+  if (dec->status_ != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+
+  idec->state_ = STATE_VP8L_DATA;
+  return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
+  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+  const size_t curr_size = MemDataSize(&idec->mem_);
+  assert(idec->is_lossless_);
+
+  // At present Lossless decoder can't decode image incrementally. So wait till
+  // all the image data is aggregated before image can be decoded.
+  if (curr_size < idec->chunk_size_) {
+    return VP8_STATUS_SUSPENDED;
+  }
+
+  if (!VP8LDecodeImage(dec)) {
+    // The decoding is called after all the data-bytes are aggregated. Change
+    // the error to VP8_BITSTREAM_ERROR in case lossless decoder fails to decode
+    // all the pixels (VP8_STATUS_SUSPENDED).
+    if (dec->status_ == VP8_STATUS_SUSPENDED) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    }
+    return ErrorStatusLossless(idec, dec->status_);
+  }
+
+  return FinishDecoding(idec);
+}
+
+  // Main decoding loop
+static VP8StatusCode IDecode(WebPIDecoder* idec) {
+  VP8StatusCode status = VP8_STATUS_SUSPENDED;
+
+  if (idec->state_ == STATE_WEBP_HEADER) {
+    status = DecodeWebPHeaders(idec);
+  } else {
+    if (idec->dec_ == NULL) {
+      return VP8_STATUS_SUSPENDED;    // can't continue if we have no decoder.
+    }
+  }
+  if (idec->state_ == STATE_VP8_HEADER) {
+    status = DecodeVP8FrameHeader(idec);
+  }
+  if (idec->state_ == STATE_VP8_PARTS0) {
+    status = DecodePartition0(idec);
+  }
+  if (idec->state_ == STATE_VP8_DATA) {
+    status = DecodeRemaining(idec);
+  }
+  if (idec->state_ == STATE_VP8L_HEADER) {
+    status = DecodeVP8LHeader(idec);
+  }
+  if (idec->state_ == STATE_VP8L_DATA) {
+    status = DecodeVP8LData(idec);
+  }
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// Public functions
+
+WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
+  WebPIDecoder* idec = (WebPIDecoder*)WebPSafeCalloc(1ULL, sizeof(*idec));
+  if (idec == NULL) {
+    return NULL;
+  }
+
+  idec->state_ = STATE_WEBP_HEADER;
+  idec->chunk_size_ = 0;
+
+  idec->last_mb_y_ = -1;
+
+  InitMemBuffer(&idec->mem_);
+  WebPInitDecBuffer(&idec->output_);
+  VP8InitIo(&idec->io_);
+
+  WebPResetDecParams(&idec->params_);
+  idec->params_.output = (output_buffer != NULL) ? output_buffer
+                                                 : &idec->output_;
+  WebPInitCustomIo(&idec->params_, &idec->io_);  // Plug the I/O functions.
+
+  return idec;
+}
+
+WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
+                          WebPDecoderConfig* config) {
+  WebPIDecoder* idec;
+
+  // Parse the bitstream's features, if requested:
+  if (data != NULL && data_size > 0 && config != NULL) {
+    if (WebPGetFeatures(data, data_size, &config->input) != VP8_STATUS_OK) {
+      return NULL;
+    }
+  }
+  // Create an instance of the incremental decoder
+  idec = WebPINewDecoder(config ? &config->output : NULL);
+  if (idec == NULL) {
+    return NULL;
+  }
+  // Finish initialization
+  if (config != NULL) {
+    idec->params_.options = &config->options;
+  }
+  return idec;
+}
+
+void WebPIDelete(WebPIDecoder* idec) {
+  if (idec == NULL) return;
+  if (idec->dec_ != NULL) {
+    if (!idec->is_lossless_) {
+      if (idec->state_ == STATE_VP8_DATA) {
+        // Synchronize the thread, clean-up and check for errors.
+        VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
+      }
+      VP8Delete((VP8Decoder*)idec->dec_);
+    } else {
+      VP8LDelete((VP8LDecoder*)idec->dec_);
+    }
+  }
+  ClearMemBuffer(&idec->mem_);
+  WebPFreeDecBuffer(&idec->output_);
+  WebPSafeFree(idec);
+}
+
+//------------------------------------------------------------------------------
+// Wrapper toward WebPINewDecoder
+
+WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
+                          size_t output_buffer_size, int output_stride) {
+  const int is_external_memory = (output_buffer != NULL);
+  WebPIDecoder* idec;
+
+  if (mode >= MODE_YUV) return NULL;
+  if (!is_external_memory) {    // Overwrite parameters to sane values.
+    output_buffer_size = 0;
+    output_stride = 0;
+  } else {  // A buffer was passed. Validate the other params.
+    if (output_stride == 0 || output_buffer_size == 0) {
+      return NULL;   // invalid parameter.
+    }
+  }
+  idec = WebPINewDecoder(NULL);
+  if (idec == NULL) return NULL;
+  idec->output_.colorspace = mode;
+  idec->output_.is_external_memory = is_external_memory;
+  idec->output_.u.RGBA.rgba = output_buffer;
+  idec->output_.u.RGBA.stride = output_stride;
+  idec->output_.u.RGBA.size = output_buffer_size;
+  return idec;
+}
+
+WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
+                           uint8_t* u, size_t u_size, int u_stride,
+                           uint8_t* v, size_t v_size, int v_stride,
+                           uint8_t* a, size_t a_size, int a_stride) {
+  const int is_external_memory = (luma != NULL);
+  WebPIDecoder* idec;
+  WEBP_CSP_MODE colorspace;
+
+  if (!is_external_memory) {    // Overwrite parameters to sane values.
+    luma_size = u_size = v_size = a_size = 0;
+    luma_stride = u_stride = v_stride = a_stride = 0;
+    u = v = a = NULL;
+    colorspace = MODE_YUVA;
+  } else {  // A luma buffer was passed. Validate the other parameters.
+    if (u == NULL || v == NULL) return NULL;
+    if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL;
+    if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL;
+    if (a != NULL) {
+      if (a_size == 0 || a_stride == 0) return NULL;
+    }
+    colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
+  }
+
+  idec = WebPINewDecoder(NULL);
+  if (idec == NULL) return NULL;
+
+  idec->output_.colorspace = colorspace;
+  idec->output_.is_external_memory = is_external_memory;
+  idec->output_.u.YUVA.y = luma;
+  idec->output_.u.YUVA.y_stride = luma_stride;
+  idec->output_.u.YUVA.y_size = luma_size;
+  idec->output_.u.YUVA.u = u;
+  idec->output_.u.YUVA.u_stride = u_stride;
+  idec->output_.u.YUVA.u_size = u_size;
+  idec->output_.u.YUVA.v = v;
+  idec->output_.u.YUVA.v_stride = v_stride;
+  idec->output_.u.YUVA.v_size = v_size;
+  idec->output_.u.YUVA.a = a;
+  idec->output_.u.YUVA.a_stride = a_stride;
+  idec->output_.u.YUVA.a_size = a_size;
+  return idec;
+}
+
+WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
+                          uint8_t* u, size_t u_size, int u_stride,
+                          uint8_t* v, size_t v_size, int v_stride) {
+  return WebPINewYUVA(luma, luma_size, luma_stride,
+                      u, u_size, u_stride,
+                      v, v_size, v_stride,
+                      NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
+
+static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
+  assert(idec);
+  if (idec->state_ == STATE_ERROR) {
+    return VP8_STATUS_BITSTREAM_ERROR;
+  }
+  if (idec->state_ == STATE_DONE) {
+    return VP8_STATUS_OK;
+  }
+  return VP8_STATUS_SUSPENDED;
+}
+
+VP8StatusCode WebPIAppend(WebPIDecoder* idec,
+                          const uint8_t* data, size_t data_size) {
+  VP8StatusCode status;
+  if (idec == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  status = IDecCheckStatus(idec);
+  if (status != VP8_STATUS_SUSPENDED) {
+    return status;
+  }
+  // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
+  if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_APPEND)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  // Append data to memory buffer
+  if (!AppendToMemBuffer(idec, data, data_size)) {
+    return VP8_STATUS_OUT_OF_MEMORY;
+  }
+  return IDecode(idec);
+}
+
+VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
+                          const uint8_t* data, size_t data_size) {
+  VP8StatusCode status;
+  if (idec == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  status = IDecCheckStatus(idec);
+  if (status != VP8_STATUS_SUSPENDED) {
+    return status;
+  }
+  // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
+  if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_MAP)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  // Make the memory buffer point to the new buffer
+  if (!RemapMemBuffer(idec, data, data_size)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  return IDecode(idec);
+}
+
+//------------------------------------------------------------------------------
+
+static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
+  if (idec == NULL || idec->dec_ == NULL) {
+    return NULL;
+  }
+  if (idec->state_ <= STATE_VP8_PARTS0) {
+    return NULL;
+  }
+  return idec->params_.output;
+}
+
+const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
+                                      int* left, int* top,
+                                      int* width, int* height) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (left != NULL) *left = 0;
+  if (top != NULL) *top = 0;
+  // TODO(skal): later include handling of rotations.
+  if (src) {
+    if (width != NULL) *width = src->width;
+    if (height != NULL) *height = idec->params_.last_y;
+  } else {
+    if (width != NULL) *width = 0;
+    if (height != NULL) *height = 0;
+  }
+  return src;
+}
+
+uint8_t* WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y,
+                        int* width, int* height, int* stride) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (src == NULL) return NULL;
+  if (src->colorspace >= MODE_YUV) {
+    return NULL;
+  }
+
+  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (width != NULL) *width = src->width;
+  if (height != NULL) *height = src->height;
+  if (stride != NULL) *stride = src->u.RGBA.stride;
+
+  return src->u.RGBA.rgba;
+}
+
+uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
+                         uint8_t** u, uint8_t** v, uint8_t** a,
+                         int* width, int* height,
+                         int* stride, int* uv_stride, int* a_stride) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (src == NULL) return NULL;
+  if (src->colorspace < MODE_YUV) {
+    return NULL;
+  }
+
+  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (u != NULL) *u = src->u.YUVA.u;
+  if (v != NULL) *v = src->u.YUVA.v;
+  if (a != NULL) *a = src->u.YUVA.a;
+  if (width != NULL) *width = src->width;
+  if (height != NULL) *height = src->height;
+  if (stride != NULL) *stride = src->u.YUVA.y_stride;
+  if (uv_stride != NULL) *uv_stride = src->u.YUVA.u_stride;
+  if (a_stride != NULL) *a_stride = src->u.YUVA.a_stride;
+
+  return src->u.YUVA.y;
+}
+
+int WebPISetIOHooks(WebPIDecoder* const idec,
+                    VP8IoPutHook put,
+                    VP8IoSetupHook setup,
+                    VP8IoTeardownHook teardown,
+                    void* user_data) {
+  if (idec == NULL || idec->state_ > STATE_WEBP_HEADER) {
+    return 0;
+  }
+
+  idec->io_.put = put;
+  idec->io_.setup = setup;
+  idec->io_.teardown = teardown;
+  idec->io_.opaque = user_data;
+
+  return 1;
+}
+
diff --git a/src/main/jni/libwebp/dec/io.c b/src/main/jni/libwebp/dec/io.c
new file mode 100644
index 000000000..8094e44f6
--- /dev/null
+++ b/src/main/jni/libwebp/dec/io.c
@@ -0,0 +1,648 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// functions for sample output.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "../dec/vp8i.h"
+#include "./webpi.h"
+#include "../dsp/dsp.h"
+#include "../dsp/yuv.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// Main YUV<->RGB conversion functions
+
+static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* output = p->output;
+  const WebPYUVABuffer* const buf = &output->u.YUVA;
+  uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride;
+  uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride;
+  uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  const int uv_w = (mb_w + 1) / 2;
+  const int uv_h = (mb_h + 1) / 2;
+  int j;
+  for (j = 0; j < mb_h; ++j) {
+    memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w);
+  }
+  for (j = 0; j < uv_h; ++j) {
+    memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w);
+    memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w);
+  }
+  return io->mb_h;
+}
+
+// Point-sampling U/V sampler.
+static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* const output = p->output;
+  WebPRGBABuffer* const buf = &output->u.RGBA;
+  uint8_t* const dst = buf->rgba + io->mb_y * buf->stride;
+  WebPSamplerProcessPlane(io->y, io->y_stride,
+                          io->u, io->v, io->uv_stride,
+                          dst, buf->stride, io->mb_w, io->mb_h,
+                          WebPSamplers[output->colorspace]);
+  return io->mb_h;
+}
+
+//------------------------------------------------------------------------------
+// YUV444 -> RGB conversion
+
+#if 0   // TODO(skal): this is for future rescaling.
+static int EmitRGB(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* output = p->output;
+  const WebPRGBABuffer* const buf = &output->u.RGBA;
+  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+  const uint8_t* y_src = io->y;
+  const uint8_t* u_src = io->u;
+  const uint8_t* v_src = io->v;
+  const WebPYUV444Converter convert = WebPYUV444Converters[output->colorspace];
+  const int mb_w = io->mb_w;
+  const int last = io->mb_h;
+  int j;
+  for (j = 0; j < last; ++j) {
+    convert(y_src, u_src, v_src, dst, mb_w);
+    y_src += io->y_stride;
+    u_src += io->uv_stride;
+    v_src += io->uv_stride;
+    dst += buf->stride;
+  }
+  return io->mb_h;
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Fancy upsampling
+
+#ifdef FANCY_UPSAMPLING
+static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
+  int num_lines_out = io->mb_h;   // a priori guess
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+  WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
+  const uint8_t* cur_y = io->y;
+  const uint8_t* cur_u = io->u;
+  const uint8_t* cur_v = io->v;
+  const uint8_t* top_u = p->tmp_u;
+  const uint8_t* top_v = p->tmp_v;
+  int y = io->mb_y;
+  const int y_end = io->mb_y + io->mb_h;
+  const int mb_w = io->mb_w;
+  const int uv_w = (mb_w + 1) / 2;
+
+  if (y == 0) {
+    // First line is special cased. We mirror the u/v samples at boundary.
+    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w);
+  } else {
+    // We can finish the left-over line from previous call.
+    upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
+             dst - buf->stride, dst, mb_w);
+    ++num_lines_out;
+  }
+  // Loop over each output pairs of row.
+  for (; y + 2 < y_end; y += 2) {
+    top_u = cur_u;
+    top_v = cur_v;
+    cur_u += io->uv_stride;
+    cur_v += io->uv_stride;
+    dst += 2 * buf->stride;
+    cur_y += 2 * io->y_stride;
+    upsample(cur_y - io->y_stride, cur_y,
+             top_u, top_v, cur_u, cur_v,
+             dst - buf->stride, dst, mb_w);
+  }
+  // move to last row
+  cur_y += io->y_stride;
+  if (io->crop_top + y_end < io->crop_bottom) {
+    // Save the unfinished samples for next call (as we're not done yet).
+    memcpy(p->tmp_y, cur_y, mb_w * sizeof(*p->tmp_y));
+    memcpy(p->tmp_u, cur_u, uv_w * sizeof(*p->tmp_u));
+    memcpy(p->tmp_v, cur_v, uv_w * sizeof(*p->tmp_v));
+    // The fancy upsampler leaves a row unfinished behind
+    // (except for the very last row)
+    num_lines_out--;
+  } else {
+    // Process the very last row of even-sized picture
+    if (!(y_end & 1)) {
+      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v,
+               dst + buf->stride, NULL, mb_w);
+    }
+  }
+  return num_lines_out;
+}
+
+#endif    /* FANCY_UPSAMPLING */
+
+//------------------------------------------------------------------------------
+
+static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
+  const uint8_t* alpha = io->a;
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
+  int j;
+
+  if (alpha != NULL) {
+    for (j = 0; j < mb_h; ++j) {
+      memcpy(dst, alpha, mb_w * sizeof(*dst));
+      alpha += io->width;
+      dst += buf->a_stride;
+    }
+  } else if (buf->a != NULL) {
+    // the user requested alpha, but there is none, set it to opaque.
+    for (j = 0; j < mb_h; ++j) {
+      memset(dst, 0xff, mb_w * sizeof(*dst));
+      dst += buf->a_stride;
+    }
+  }
+  return 0;
+}
+
+static int GetAlphaSourceRow(const VP8Io* const io,
+                             const uint8_t** alpha, int* const num_rows) {
+  int start_y = io->mb_y;
+  *num_rows = io->mb_h;
+
+  // Compensate for the 1-line delay of the fancy upscaler.
+  // This is similar to EmitFancyRGB().
+  if (io->fancy_upsampling) {
+    if (start_y == 0) {
+      // We don't process the last row yet. It'll be done during the next call.
+      --*num_rows;
+    } else {
+      --start_y;
+      // Fortunately, *alpha data is persistent, so we can go back
+      // one row and finish alpha blending, now that the fancy upscaler
+      // completed the YUV->RGB interpolation.
+      *alpha -= io->width;
+    }
+    if (io->crop_top + io->mb_y + io->mb_h == io->crop_bottom) {
+      // If it's the very last call, we process all the remaining rows!
+      *num_rows = io->crop_bottom - io->crop_top - start_y;
+    }
+  }
+  return start_y;
+}
+
+static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
+  const uint8_t* alpha = io->a;
+  if (alpha != NULL) {
+    const int mb_w = io->mb_w;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const int alpha_first =
+        (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    int num_rows;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+    uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
+    uint32_t alpha_mask = 0xff;
+    int i, j;
+
+    for (j = 0; j < num_rows; ++j) {
+      for (i = 0; i < mb_w; ++i) {
+        const uint32_t alpha_value = alpha[i];
+        dst[4 * i] = alpha_value;
+        alpha_mask &= alpha_value;
+      }
+      alpha += io->width;
+      dst += buf->stride;
+    }
+    // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with.
+    if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) {
+      WebPApplyAlphaMultiply(base_rgba, alpha_first,
+                             mb_w, num_rows, buf->stride);
+    }
+  }
+  return 0;
+}
+
+static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
+  const uint8_t* alpha = io->a;
+  if (alpha != NULL) {
+    const int mb_w = io->mb_w;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    int num_rows;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+#ifdef WEBP_SWAP_16BIT_CSP
+    uint8_t* alpha_dst = base_rgba;
+#else
+    uint8_t* alpha_dst = base_rgba + 1;
+#endif
+    uint32_t alpha_mask = 0x0f;
+    int i, j;
+
+    for (j = 0; j < num_rows; ++j) {
+      for (i = 0; i < mb_w; ++i) {
+        // Fill in the alpha value (converted to 4 bits).
+        const uint32_t alpha_value = alpha[i] >> 4;
+        alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+        alpha_mask &= alpha_value;
+      }
+      alpha += io->width;
+      alpha_dst += buf->stride;
+    }
+    if (alpha_mask != 0x0f && WebPIsPremultipliedMode(colorspace)) {
+      WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
+    }
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// YUV rescaling (no final RGB conversion needed)
+
+static int Rescale(const uint8_t* src, int src_stride,
+                   int new_lines, WebPRescaler* const wrk) {
+  int num_lines_out = 0;
+  while (new_lines > 0) {    // import new contributions of source rows.
+    const int lines_in = WebPRescalerImport(wrk, new_lines, src, src_stride);
+    src += lines_in * src_stride;
+    new_lines -= lines_in;
+    num_lines_out += WebPRescalerExport(wrk);    // emit output row(s)
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
+  const int mb_h = io->mb_h;
+  const int uv_mb_h = (mb_h + 1) >> 1;
+  WebPRescaler* const scaler = &p->scaler_y;
+  int num_lines_out = 0;
+  if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) {
+    // Before rescaling, we premultiply the luma directly into the io->y
+    // internal buffer. This is OK since these samples are not used for
+    // intra-prediction (the top samples are saved in cache_y_/u_/v_).
+    // But we need to cast the const away, though.
+    WebPMultRows((uint8_t*)io->y, io->y_stride,
+                 io->a, io->width, io->mb_w, mb_h, 0);
+  }
+  num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
+  Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u);
+  Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v);
+  return num_lines_out;
+}
+
+static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
+  if (io->a != NULL) {
+    const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+    uint8_t* dst_y = buf->y + p->last_y * buf->y_stride;
+    const uint8_t* src_a = buf->a + p->last_y * buf->a_stride;
+    const int num_lines_out = Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
+    if (num_lines_out > 0) {   // unmultiply the Y
+      WebPMultRows(dst_y, buf->y_stride, src_a, buf->a_stride,
+                   p->scaler_a.dst_width, num_lines_out, 1);
+    }
+  }
+  return 0;
+}
+
+static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
+  const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  const int out_width  = io->scaled_width;
+  const int out_height = io->scaled_height;
+  const int uv_out_width  = (out_width + 1) >> 1;
+  const int uv_out_height = (out_height + 1) >> 1;
+  const int uv_in_width  = (io->mb_w + 1) >> 1;
+  const int uv_in_height = (io->mb_h + 1) >> 1;
+  const size_t work_size = 2 * out_width;   // scratch memory for luma rescaler
+  const size_t uv_work_size = 2 * uv_out_width;  // and for each u/v ones
+  size_t tmp_size;
+  int32_t* work;
+
+  tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
+  if (has_alpha) {
+    tmp_size += work_size * sizeof(*work);
+  }
+  p->memory = WebPSafeCalloc(1ULL, tmp_size);
+  if (p->memory == NULL) {
+    return 0;   // memory error
+  }
+  work = (int32_t*)p->memory;
+  WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
+                   buf->y, out_width, out_height, buf->y_stride, 1,
+                   io->mb_w, out_width, io->mb_h, out_height,
+                   work);
+  WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
+                   buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
+                   uv_in_width, uv_out_width,
+                   uv_in_height, uv_out_height,
+                   work + work_size);
+  WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
+                   buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
+                   uv_in_width, uv_out_width,
+                   uv_in_height, uv_out_height,
+                   work + work_size + uv_work_size);
+  p->emit = EmitRescaledYUV;
+
+  if (has_alpha) {
+    WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
+                     buf->a, out_width, out_height, buf->a_stride, 1,
+                     io->mb_w, out_width, io->mb_h, out_height,
+                     work + work_size + 2 * uv_work_size);
+    p->emit_alpha = EmitRescaledAlphaYUV;
+    WebPInitAlphaProcessing();
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// RGBA rescaling
+
+static int ExportRGB(WebPDecParams* const p, int y_pos) {
+  const WebPYUV444Converter convert =
+      WebPYUV444Converters[p->output->colorspace];
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride;
+  int num_lines_out = 0;
+  // For RGB rescaling, because of the YUV420, current scan position
+  // U/V can be +1/-1 line from the Y one.  Hence the double test.
+  while (WebPRescalerHasPendingOutput(&p->scaler_y) &&
+         WebPRescalerHasPendingOutput(&p->scaler_u)) {
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+    assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
+    WebPRescalerExportRow(&p->scaler_y, 0);
+    WebPRescalerExportRow(&p->scaler_u, 0);
+    WebPRescalerExportRow(&p->scaler_v, 0);
+    convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst,
+            dst, p->scaler_y.dst_width);
+    dst += buf->stride;
+    ++num_lines_out;
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
+  const int mb_h = io->mb_h;
+  const int uv_mb_h = (mb_h + 1) >> 1;
+  int j = 0, uv_j = 0;
+  int num_lines_out = 0;
+  while (j < mb_h) {
+    const int y_lines_in =
+        WebPRescalerImport(&p->scaler_y, mb_h - j,
+                           io->y + j * io->y_stride, io->y_stride);
+    const int u_lines_in =
+        WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j,
+                           io->u + uv_j * io->uv_stride, io->uv_stride);
+    const int v_lines_in =
+        WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j,
+                           io->v + uv_j * io->uv_stride, io->uv_stride);
+    (void)v_lines_in;   // remove a gcc warning
+    assert(u_lines_in == v_lines_in);
+    j += y_lines_in;
+    uv_j += u_lines_in;
+    num_lines_out += ExportRGB(p, num_lines_out);
+  }
+  return num_lines_out;
+}
+
+static int ExportAlpha(WebPDecParams* const p, int y_pos) {
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int alpha_first =
+      (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+  uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
+  int num_lines_out = 0;
+  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+  uint32_t alpha_mask = 0xff;
+  const int width = p->scaler_a.dst_width;
+
+  while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
+    int i;
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+    WebPRescalerExportRow(&p->scaler_a, 0);
+    for (i = 0; i < width; ++i) {
+      const uint32_t alpha_value = p->scaler_a.dst[i];
+      dst[4 * i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    dst += buf->stride;
+    ++num_lines_out;
+  }
+  if (is_premult_alpha && alpha_mask != 0xff) {
+    WebPApplyAlphaMultiply(base_rgba, alpha_first,
+                           width, num_lines_out, buf->stride);
+  }
+  return num_lines_out;
+}
+
+static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
+#ifdef WEBP_SWAP_16BIT_CSP
+  uint8_t* alpha_dst = base_rgba;
+#else
+  uint8_t* alpha_dst = base_rgba + 1;
+#endif
+  int num_lines_out = 0;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int width = p->scaler_a.dst_width;
+  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+  uint32_t alpha_mask = 0x0f;
+
+  while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
+    int i;
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+    WebPRescalerExportRow(&p->scaler_a, 0);
+    for (i = 0; i < width; ++i) {
+      // Fill in the alpha value (converted to 4 bits).
+      const uint32_t alpha_value = p->scaler_a.dst[i] >> 4;
+      alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    alpha_dst += buf->stride;
+    ++num_lines_out;
+  }
+  if (is_premult_alpha && alpha_mask != 0x0f) {
+    WebPApplyAlphaMultiply4444(base_rgba, width, num_lines_out, buf->stride);
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
+  if (io->a != NULL) {
+    WebPRescaler* const scaler = &p->scaler_a;
+    int j = 0;
+    int pos = 0;
+    while (j < io->mb_h) {
+      j += WebPRescalerImport(scaler, io->mb_h - j,
+                              io->a + j * io->width, io->width);
+      pos += p->emit_alpha_row(p, pos);
+    }
+  }
+  return 0;
+}
+
+static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
+  const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
+  const int out_width  = io->scaled_width;
+  const int out_height = io->scaled_height;
+  const int uv_in_width  = (io->mb_w + 1) >> 1;
+  const int uv_in_height = (io->mb_h + 1) >> 1;
+  const size_t work_size = 2 * out_width;   // scratch memory for one rescaler
+  int32_t* work;  // rescalers work area
+  uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
+  size_t tmp_size1, tmp_size2, total_size;
+
+  tmp_size1 = 3 * work_size;
+  tmp_size2 = 3 * out_width;
+  if (has_alpha) {
+    tmp_size1 += work_size;
+    tmp_size2 += out_width;
+  }
+  total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
+  p->memory = WebPSafeCalloc(1ULL, total_size);
+  if (p->memory == NULL) {
+    return 0;   // memory error
+  }
+  work = (int32_t*)p->memory;
+  tmp = (uint8_t*)(work + tmp_size1);
+  WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
+                   tmp + 0 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, out_width, io->mb_h, out_height,
+                   work + 0 * work_size);
+  WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
+                   tmp + 1 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
+                   work + 1 * work_size);
+  WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
+                   tmp + 2 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
+                   work + 2 * work_size);
+  p->emit = EmitRescaledRGB;
+
+  if (has_alpha) {
+    WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
+                     tmp + 3 * out_width, out_width, out_height, 0, 1,
+                     io->mb_w, out_width, io->mb_h, out_height,
+                     work + 3 * work_size);
+    p->emit_alpha = EmitRescaledAlphaRGB;
+    if (p->output->colorspace == MODE_RGBA_4444 ||
+        p->output->colorspace == MODE_rgbA_4444) {
+      p->emit_alpha_row = ExportAlphaRGBA4444;
+    } else {
+      p->emit_alpha_row = ExportAlpha;
+    }
+    WebPInitAlphaProcessing();
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Default custom functions
+
+static int CustomSetup(VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int is_rgb = WebPIsRGBMode(colorspace);
+  const int is_alpha = WebPIsAlphaMode(colorspace);
+
+  p->memory = NULL;
+  p->emit = NULL;
+  p->emit_alpha = NULL;
+  p->emit_alpha_row = NULL;
+  if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) {
+    return 0;
+  }
+  if (is_alpha && WebPIsPremultipliedMode(colorspace)) {
+    WebPInitUpsamplers();
+  }
+  if (io->use_scaling) {
+    const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
+    if (!ok) {
+      return 0;    // memory error
+    }
+  } else {
+    if (is_rgb) {
+      p->emit = EmitSampledRGB;   // default
+      if (io->fancy_upsampling) {
+#ifdef FANCY_UPSAMPLING
+        const int uv_width = (io->mb_w + 1) >> 1;
+        p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width));
+        if (p->memory == NULL) {
+          return 0;   // memory error.
+        }
+        p->tmp_y = (uint8_t*)p->memory;
+        p->tmp_u = p->tmp_y + io->mb_w;
+        p->tmp_v = p->tmp_u + uv_width;
+        p->emit = EmitFancyRGB;
+        WebPInitUpsamplers();
+#endif
+      } else {
+        WebPInitSamplers();
+      }
+    } else {
+      p->emit = EmitYUV;
+    }
+    if (is_alpha) {  // need transparency output
+      p->emit_alpha =
+          (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
+              EmitAlphaRGBA4444
+          : is_rgb ? EmitAlphaRGB
+          : EmitAlphaYUV;
+      if (is_rgb) {
+        WebPInitAlphaProcessing();
+      }
+    }
+  }
+
+  if (is_rgb) {
+    VP8YUVInit();
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static int CustomPut(const VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  int num_lines_out;
+  assert(!(io->mb_y & 1));
+
+  if (mb_w <= 0 || mb_h <= 0) {
+    return 0;
+  }
+  num_lines_out = p->emit(io, p);
+  if (p->emit_alpha != NULL) {
+    p->emit_alpha(io, p);
+  }
+  p->last_y += num_lines_out;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static void CustomTeardown(const VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  WebPSafeFree(p->memory);
+  p->memory = NULL;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point
+
+void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
+  io->put      = CustomPut;
+  io->setup    = CustomSetup;
+  io->teardown = CustomTeardown;
+  io->opaque   = params;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/dec/quant.c b/src/main/jni/libwebp/dec/quant.c
new file mode 100644
index 000000000..5b648f942
--- /dev/null
+++ b/src/main/jni/libwebp/dec/quant.c
@@ -0,0 +1,110 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Quantizer initialization
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8i.h"
+
+static WEBP_INLINE int clip(int v, int M) {
+  return v < 0 ? 0 : v > M ? M : v;
+}
+
+// Paragraph 14.1
+static const uint8_t kDcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  10,
+  11,   12,  13,  14,  15,  16,  17,  17,
+  18,   19,  20,  20,  21,  21,  22,  22,
+  23,   23,  24,  25,  25,  26,  27,  28,
+  29,   30,  31,  32,  33,  34,  35,  36,
+  37,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  46,  47,  48,  49,  50,
+  51,   52,  53,  54,  55,  56,  57,  58,
+  59,   60,  61,  62,  63,  64,  65,  66,
+  67,   68,  69,  70,  71,  72,  73,  74,
+  75,   76,  76,  77,  78,  79,  80,  81,
+  82,   83,  84,  85,  86,  87,  88,  89,
+  91,   93,  95,  96,  98, 100, 101, 102,
+  104, 106, 108, 110, 112, 114, 116, 118,
+  122, 124, 126, 128, 130, 132, 134, 136,
+  138, 140, 143, 145, 148, 151, 154, 157
+};
+
+static const uint16_t kAcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  11,
+  12,   13,  14,  15,  16,  17,  18,  19,
+  20,   21,  22,  23,  24,  25,  26,  27,
+  28,   29,  30,  31,  32,  33,  34,  35,
+  36,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  47,  48,  49,  50,  51,
+  52,   53,  54,  55,  56,  57,  58,  60,
+  62,   64,  66,  68,  70,  72,  74,  76,
+  78,   80,  82,  84,  86,  88,  90,  92,
+  94,   96,  98, 100, 102, 104, 106, 108,
+  110, 112, 114, 116, 119, 122, 125, 128,
+  131, 134, 137, 140, 143, 146, 149, 152,
+  155, 158, 161, 164, 167, 170, 173, 177,
+  181, 185, 189, 193, 197, 201, 205, 209,
+  213, 217, 221, 225, 229, 234, 239, 245,
+  249, 254, 259, 264, 269, 274, 279, 284
+};
+
+//------------------------------------------------------------------------------
+// Paragraph 9.6
+
+void VP8ParseQuant(VP8Decoder* const dec) {
+  VP8BitReader* const br = &dec->br_;
+  const int base_q0 = VP8GetValue(br, 7);
+  const int dqy1_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dqy2_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dqy2_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dquv_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dquv_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+
+  const VP8SegmentHeader* const hdr = &dec->segment_hdr_;
+  int i;
+
+  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+    int q;
+    if (hdr->use_segment_) {
+      q = hdr->quantizer_[i];
+      if (!hdr->absolute_delta_) {
+        q += base_q0;
+      }
+    } else {
+      if (i > 0) {
+        dec->dqm_[i] = dec->dqm_[0];
+        continue;
+      } else {
+        q = base_q0;
+      }
+    }
+    {
+      VP8QuantMatrix* const m = &dec->dqm_[i];
+      m->y1_mat_[0] = kDcTable[clip(q + dqy1_dc, 127)];
+      m->y1_mat_[1] = kAcTable[clip(q + 0,       127)];
+
+      m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
+      // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
+      // The smallest precision for that is '(x*6349) >> 12' but 16 is a good
+      // word size.
+      m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
+      if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8;
+
+      m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
+      m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
+
+      m->uv_quant_ = q + dquv_ac;   // for dithering strength evaluation
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/dec/tree.c b/src/main/jni/libwebp/dec/tree.c
new file mode 100644
index 000000000..31208d9d4
--- /dev/null
+++ b/src/main/jni/libwebp/dec/tree.c
@@ -0,0 +1,516 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Coding trees and probas
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8i.h"
+#include "../utils/bit_reader_inl.h"
+
+#define USE_GENERIC_TREE
+
+#ifdef USE_GENERIC_TREE
+static const int8_t kYModesIntra4[18] = {
+  -B_DC_PRED, 1,
+    -B_TM_PRED, 2,
+      -B_VE_PRED, 3,
+        4, 6,
+          -B_HE_PRED, 5,
+            -B_RD_PRED, -B_VR_PRED,
+        -B_LD_PRED, 7,
+          -B_VL_PRED, 8,
+            -B_HD_PRED, -B_HU_PRED
+};
+#endif
+
+//------------------------------------------------------------------------------
+// Default probabilities
+
+// Paragraph 13.5
+static const uint8_t
+  CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+      { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+      { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+      { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+      { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+    },
+    { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+      { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+      { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+    },
+    { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+      { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+      { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+      { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+      { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+      { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+      { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
+      { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
+      { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
+    },
+    { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+      { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+      { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
+    },
+    { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+      { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+      { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
+    },
+    { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+      { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+      { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+      { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+      { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
+    },
+    { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+      { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+      { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+      { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+      { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
+    },
+    { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+      { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+      { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
+    },
+    { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+      { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+      { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+      { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+      { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
+    },
+    { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+      { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+      { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+      { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
+    },
+    { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+      { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+      { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
+    },
+    { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+      { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+      { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
+    },
+    { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+      { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+      { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
+    },
+    { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+      { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+      { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
+    },
+    { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+      { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+      { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+      { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+      { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  }
+};
+
+// Paragraph 11.5
+static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
+  { { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
+    { 152, 179, 64, 126, 170, 118, 46, 70, 95 },
+    { 175, 69, 143, 80, 85, 82, 72, 155, 103 },
+    { 56, 58, 10, 171, 218, 189, 17, 13, 152 },
+    { 114, 26, 17, 163, 44, 195, 21, 10, 173 },
+    { 121, 24, 80, 195, 26, 62, 44, 64, 85 },
+    { 144, 71, 10, 38, 171, 213, 144, 34, 26 },
+    { 170, 46, 55, 19, 136, 160, 33, 206, 71 },
+    { 63, 20, 8, 114, 114, 208, 12, 9, 226 },
+    { 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
+  { { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
+    { 72, 187, 100, 130, 157, 111, 32, 75, 80 },
+    { 66, 102, 167, 99, 74, 62, 40, 234, 128 },
+    { 41, 53, 9, 178, 241, 141, 26, 8, 107 },
+    { 74, 43, 26, 146, 73, 166, 49, 23, 157 },
+    { 65, 38, 105, 160, 51, 52, 31, 115, 128 },
+    { 104, 79, 12, 27, 217, 255, 87, 17, 7 },
+    { 87, 68, 71, 44, 114, 51, 15, 186, 23 },
+    { 47, 41, 14, 110, 182, 183, 21, 17, 194 },
+    { 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
+  { { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
+    { 43, 97, 183, 117, 85, 38, 35, 179, 61 },
+    { 39, 53, 200, 87, 26, 21, 43, 232, 171 },
+    { 56, 34, 51, 104, 114, 102, 29, 93, 77 },
+    { 39, 28, 85, 171, 58, 165, 90, 98, 64 },
+    { 34, 22, 116, 206, 23, 34, 43, 166, 73 },
+    { 107, 54, 32, 26, 51, 1, 81, 43, 31 },
+    { 68, 25, 106, 22, 64, 171, 36, 225, 114 },
+    { 34, 19, 21, 102, 132, 188, 16, 76, 124 },
+    { 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
+  { { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
+    { 60, 148, 31, 172, 219, 228, 21, 18, 111 },
+    { 112, 113, 77, 85, 179, 255, 38, 120, 114 },
+    { 40, 42, 1, 196, 245, 209, 10, 25, 109 },
+    { 88, 43, 29, 140, 166, 213, 37, 43, 154 },
+    { 61, 63, 30, 155, 67, 45, 68, 1, 209 },
+    { 100, 80, 8, 43, 154, 1, 51, 26, 71 },
+    { 142, 78, 78, 16, 255, 128, 34, 197, 171 },
+    { 41, 40, 5, 102, 211, 183, 4, 1, 221 },
+    { 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
+  { { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
+    { 67, 87, 58, 169, 82, 115, 26, 59, 179 },
+    { 63, 59, 90, 180, 59, 166, 93, 73, 154 },
+    { 40, 40, 21, 116, 143, 209, 34, 39, 175 },
+    { 47, 15, 16, 183, 34, 223, 49, 45, 183 },
+    { 46, 17, 33, 183, 6, 98, 15, 32, 183 },
+    { 57, 46, 22, 24, 128, 1, 54, 17, 37 },
+    { 65, 32, 73, 115, 28, 128, 23, 128, 205 },
+    { 40, 3, 9, 115, 51, 192, 18, 6, 223 },
+    { 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
+  { { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
+    { 64, 90, 70, 205, 40, 41, 23, 26, 57 },
+    { 54, 57, 112, 184, 5, 41, 38, 166, 213 },
+    { 30, 34, 26, 133, 152, 116, 10, 32, 134 },
+    { 39, 19, 53, 221, 26, 114, 32, 73, 255 },
+    { 31, 9, 65, 234, 2, 15, 1, 118, 73 },
+    { 75, 32, 12, 51, 192, 255, 160, 43, 51 },
+    { 88, 31, 35, 67, 102, 85, 55, 186, 85 },
+    { 56, 21, 23, 111, 59, 205, 45, 37, 192 },
+    { 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
+  { { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
+    { 95, 84, 53, 89, 128, 100, 113, 101, 45 },
+    { 75, 79, 123, 47, 51, 128, 81, 171, 1 },
+    { 57, 17, 5, 71, 102, 57, 53, 41, 49 },
+    { 38, 33, 13, 121, 57, 73, 26, 1, 85 },
+    { 41, 10, 67, 138, 77, 110, 90, 47, 114 },
+    { 115, 21, 2, 10, 102, 255, 166, 23, 6 },
+    { 101, 29, 16, 10, 85, 128, 101, 196, 26 },
+    { 57, 18, 10, 102, 102, 213, 34, 20, 43 },
+    { 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
+  { { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
+    { 69, 60, 71, 38, 73, 119, 28, 222, 37 },
+    { 68, 45, 128, 34, 1, 47, 11, 245, 171 },
+    { 62, 17, 19, 70, 146, 85, 55, 62, 70 },
+    { 37, 43, 37, 154, 100, 163, 85, 160, 1 },
+    { 63, 9, 92, 136, 28, 64, 32, 201, 85 },
+    { 75, 15, 9, 9, 64, 255, 184, 119, 16 },
+    { 86, 6, 28, 5, 64, 255, 25, 248, 1 },
+    { 56, 8, 17, 132, 137, 255, 55, 116, 128 },
+    { 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
+  { { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
+    { 51, 103, 44, 131, 131, 123, 31, 6, 158 },
+    { 86, 40, 64, 135, 148, 224, 45, 183, 128 },
+    { 22, 26, 17, 131, 240, 154, 14, 1, 209 },
+    { 45, 16, 21, 91, 64, 222, 7, 1, 197 },
+    { 56, 21, 39, 155, 60, 138, 23, 102, 213 },
+    { 83, 12, 13, 54, 192, 255, 68, 47, 28 },
+    { 85, 26, 85, 85, 128, 128, 32, 146, 171 },
+    { 18, 11, 7, 63, 144, 171, 4, 4, 246 },
+    { 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
+  { { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
+    { 85, 126, 47, 87, 176, 51, 41, 20, 32 },
+    { 101, 75, 128, 139, 118, 146, 116, 128, 85 },
+    { 56, 41, 15, 176, 236, 85, 37, 9, 62 },
+    { 71, 30, 17, 119, 118, 255, 17, 18, 138 },
+    { 101, 38, 60, 138, 55, 70, 43, 26, 142 },
+    { 146, 36, 19, 30, 171, 255, 97, 27, 20 },
+    { 138, 45, 61, 62, 219, 1, 81, 188, 64 },
+    { 32, 41, 20, 117, 151, 142, 20, 21, 163 },
+    { 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
+};
+
+void VP8ResetProba(VP8Proba* const proba) {
+  memset(proba->segments_, 255u, sizeof(proba->segments_));
+  // proba->bands_[][] is initialized later
+}
+
+static void ParseIntraMode(VP8BitReader* const br,
+                           VP8Decoder* const dec, int mb_x) {
+  uint8_t* const top = dec->intra_t_ + 4 * mb_x;
+  uint8_t* const left = dec->intra_l_;
+  VP8MBData* const block = dec->mb_data_ + mb_x;
+
+  // Note: we don't save segment map (yet), as we don't expect
+  // to decode more than 1 keyframe.
+  if (dec->segment_hdr_.update_map_) {
+    // Hardcoded tree parsing
+    block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0])
+                    ? VP8GetBit(br, dec->proba_.segments_[1])
+                    : 2 + VP8GetBit(br, dec->proba_.segments_[2]);
+  } else {
+    block->segment_ = 0;  // default for intra
+  }
+  if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_);
+
+  block->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
+  if (!block->is_i4x4_) {
+    // Hardcoded 16x16 intra-mode decision tree.
+    const int ymode =
+        VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED)
+                           : (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
+    block->imodes_[0] = ymode;
+    memset(top, ymode, 4 * sizeof(*top));
+    memset(left, ymode, 4 * sizeof(*left));
+  } else {
+    uint8_t* modes = block->imodes_;
+    int y;
+    for (y = 0; y < 4; ++y) {
+      int ymode = left[y];
+      int x;
+      for (x = 0; x < 4; ++x) {
+        const uint8_t* const prob = kBModesProba[top[x]][ymode];
+#ifdef USE_GENERIC_TREE
+        // Generic tree-parsing
+        int i = kYModesIntra4[VP8GetBit(br, prob[0])];
+        while (i > 0) {
+          i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])];
+        }
+        ymode = -i;
+#else
+        // Hardcoded tree parsing
+        ymode = !VP8GetBit(br, prob[0]) ? B_DC_PRED :
+                  !VP8GetBit(br, prob[1]) ? B_TM_PRED :
+                    !VP8GetBit(br, prob[2]) ? B_VE_PRED :
+                      !VP8GetBit(br, prob[3]) ?
+                        (!VP8GetBit(br, prob[4]) ? B_HE_PRED :
+                          (!VP8GetBit(br, prob[5]) ? B_RD_PRED : B_VR_PRED)) :
+                        (!VP8GetBit(br, prob[6]) ? B_LD_PRED :
+                          (!VP8GetBit(br, prob[7]) ? B_VL_PRED :
+                            (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
+#endif    // USE_GENERIC_TREE
+        top[x] = ymode;
+      }
+      memcpy(modes, top, 4 * sizeof(*top));
+      modes += 4;
+      left[y] = ymode;
+    }
+  }
+  // Hardcoded UVMode decision tree
+  block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
+                 : !VP8GetBit(br, 114) ? V_PRED
+                 : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
+}
+
+int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
+  int mb_x;
+  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+    ParseIntraMode(br, dec, mb_x);
+  }
+  return !dec->br_.eof_;
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 13
+
+static const uint8_t
+    CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+      { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  }
+};
+
+// Paragraph 9.9
+void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
+  VP8Proba* const proba = &dec->proba_;
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          const int v = VP8GetBit(br, CoeffsUpdateProba[t][b][c][p]) ?
+                        VP8GetValue(br, 8) : CoeffsProba0[t][b][c][p];
+          proba->bands_[t][b].probas_[c][p] = v;
+        }
+      }
+    }
+  }
+  dec->use_skip_proba_ = VP8Get(br);
+  if (dec->use_skip_proba_) {
+    dec->skip_p_ = VP8GetValue(br, 8);
+  }
+}
+
diff --git a/src/main/jni/libwebp/dec/vp8.c b/src/main/jni/libwebp/dec/vp8.c
new file mode 100644
index 000000000..47249d64b
--- /dev/null
+++ b/src/main/jni/libwebp/dec/vp8.c
@@ -0,0 +1,668 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// main entry for the decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "./alphai.h"
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "./webpi.h"
+#include "../utils/bit_reader_inl.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+
+int WebPGetDecoderVersion(void) {
+  return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+// VP8Decoder
+
+static void SetOk(VP8Decoder* const dec) {
+  dec->status_ = VP8_STATUS_OK;
+  dec->error_msg_ = "OK";
+}
+
+int VP8InitIoInternal(VP8Io* const io, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;  // mismatch error
+  }
+  if (io != NULL) {
+    memset(io, 0, sizeof(*io));
+  }
+  return 1;
+}
+
+VP8Decoder* VP8New(void) {
+  VP8Decoder* const dec = (VP8Decoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  if (dec != NULL) {
+    SetOk(dec);
+    WebPGetWorkerInterface()->Init(&dec->worker_);
+    dec->ready_ = 0;
+    dec->num_parts_ = 1;
+  }
+  return dec;
+}
+
+VP8StatusCode VP8Status(VP8Decoder* const dec) {
+  if (!dec) return VP8_STATUS_INVALID_PARAM;
+  return dec->status_;
+}
+
+const char* VP8StatusMessage(VP8Decoder* const dec) {
+  if (dec == NULL) return "no object";
+  if (!dec->error_msg_) return "OK";
+  return dec->error_msg_;
+}
+
+void VP8Delete(VP8Decoder* const dec) {
+  if (dec != NULL) {
+    VP8Clear(dec);
+    WebPSafeFree(dec);
+  }
+}
+
+int VP8SetError(VP8Decoder* const dec,
+                VP8StatusCode error, const char* const msg) {
+  // TODO This check would be unnecessary if alpha decompression was separated
+  // from VP8ProcessRow/FinishRow. This avoids setting 'dec->status_' to
+  // something other than VP8_STATUS_BITSTREAM_ERROR on alpha decompression
+  // failure.
+  if (dec->status_ == VP8_STATUS_OK) {
+    dec->status_ = error;
+    dec->error_msg_ = msg;
+    dec->ready_ = 0;
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+
+int VP8CheckSignature(const uint8_t* const data, size_t data_size) {
+  return (data_size >= 3 &&
+          data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a);
+}
+
+int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
+               int* const width, int* const height) {
+  if (data == NULL || data_size < VP8_FRAME_HEADER_SIZE) {
+    return 0;         // not enough data
+  }
+  // check signature
+  if (!VP8CheckSignature(data + 3, data_size - 3)) {
+    return 0;         // Wrong signature.
+  } else {
+    const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16);
+    const int key_frame = !(bits & 1);
+    const int w = ((data[7] << 8) | data[6]) & 0x3fff;
+    const int h = ((data[9] << 8) | data[8]) & 0x3fff;
+
+    if (!key_frame) {   // Not a keyframe.
+      return 0;
+    }
+
+    if (((bits >> 1) & 7) > 3) {
+      return 0;         // unknown profile
+    }
+    if (!((bits >> 4) & 1)) {
+      return 0;         // first frame is invisible!
+    }
+    if (((bits >> 5)) >= chunk_size) {  // partition_length
+      return 0;         // inconsistent size information.
+    }
+    if (w == 0 || h == 0) {
+      return 0;         // We don't support both width and height to be zero.
+    }
+
+    if (width) {
+      *width = w;
+    }
+    if (height) {
+      *height = h;
+    }
+
+    return 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Header parsing
+
+static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
+  assert(hdr != NULL);
+  hdr->use_segment_ = 0;
+  hdr->update_map_ = 0;
+  hdr->absolute_delta_ = 1;
+  memset(hdr->quantizer_, 0, sizeof(hdr->quantizer_));
+  memset(hdr->filter_strength_, 0, sizeof(hdr->filter_strength_));
+}
+
+// Paragraph 9.3
+static int ParseSegmentHeader(VP8BitReader* br,
+                              VP8SegmentHeader* hdr, VP8Proba* proba) {
+  assert(br != NULL);
+  assert(hdr != NULL);
+  hdr->use_segment_ = VP8Get(br);
+  if (hdr->use_segment_) {
+    hdr->update_map_ = VP8Get(br);
+    if (VP8Get(br)) {   // update data
+      int s;
+      hdr->absolute_delta_ = VP8Get(br);
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        hdr->quantizer_[s] = VP8Get(br) ? VP8GetSignedValue(br, 7) : 0;
+      }
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        hdr->filter_strength_[s] = VP8Get(br) ? VP8GetSignedValue(br, 6) : 0;
+      }
+    }
+    if (hdr->update_map_) {
+      int s;
+      for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
+        proba->segments_[s] = VP8Get(br) ? VP8GetValue(br, 8) : 255u;
+      }
+    }
+  } else {
+    hdr->update_map_ = 0;
+  }
+  return !br->eof_;
+}
+
+// Paragraph 9.5
+// This function returns VP8_STATUS_SUSPENDED if we don't have all the
+// necessary data in 'buf'.
+// This case is not necessarily an error (for incremental decoding).
+// Still, no bitreader is ever initialized to make it possible to read
+// unavailable memory.
+// If we don't even have the partitions' sizes, than VP8_STATUS_NOT_ENOUGH_DATA
+// is returned, and this is an unrecoverable error.
+// If the partitions were positioned ok, VP8_STATUS_OK is returned.
+static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
+                                     const uint8_t* buf, size_t size) {
+  VP8BitReader* const br = &dec->br_;
+  const uint8_t* sz = buf;
+  const uint8_t* buf_end = buf + size;
+  const uint8_t* part_start;
+  int last_part;
+  int p;
+
+  dec->num_parts_ = 1 << VP8GetValue(br, 2);
+  last_part = dec->num_parts_ - 1;
+  part_start = buf + last_part * 3;
+  if (buf_end < part_start) {
+    // we can't even read the sizes with sz[]! That's a failure.
+    return VP8_STATUS_NOT_ENOUGH_DATA;
+  }
+  for (p = 0; p < last_part; ++p) {
+    const uint32_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
+    const uint8_t* part_end = part_start + psize;
+    if (part_end > buf_end) part_end = buf_end;
+    VP8InitBitReader(dec->parts_ + p, part_start, part_end);
+    part_start = part_end;
+    sz += 3;
+  }
+  VP8InitBitReader(dec->parts_ + last_part, part_start, buf_end);
+  return (part_start < buf_end) ? VP8_STATUS_OK :
+           VP8_STATUS_SUSPENDED;   // Init is ok, but there's not enough data
+}
+
+// Paragraph 9.4
+static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
+  VP8FilterHeader* const hdr = &dec->filter_hdr_;
+  hdr->simple_    = VP8Get(br);
+  hdr->level_     = VP8GetValue(br, 6);
+  hdr->sharpness_ = VP8GetValue(br, 3);
+  hdr->use_lf_delta_ = VP8Get(br);
+  if (hdr->use_lf_delta_) {
+    if (VP8Get(br)) {   // update lf-delta?
+      int i;
+      for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
+        if (VP8Get(br)) {
+          hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6);
+        }
+      }
+      for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) {
+        if (VP8Get(br)) {
+          hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6);
+        }
+      }
+    }
+  }
+  dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
+  return !br->eof_;
+}
+
+// Topmost call
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
+  const uint8_t* buf;
+  size_t buf_size;
+  VP8FrameHeader* frm_hdr;
+  VP8PictureHeader* pic_hdr;
+  VP8BitReader* br;
+  VP8StatusCode status;
+
+  if (dec == NULL) {
+    return 0;
+  }
+  SetOk(dec);
+  if (io == NULL) {
+    return VP8SetError(dec, VP8_STATUS_INVALID_PARAM,
+                       "null VP8Io passed to VP8GetHeaders()");
+  }
+  buf = io->data;
+  buf_size = io->data_size;
+  if (buf_size < 4) {
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                       "Truncated header.");
+  }
+
+  // Paragraph 9.1
+  {
+    const uint32_t bits = buf[0] | (buf[1] << 8) | (buf[2] << 16);
+    frm_hdr = &dec->frm_hdr_;
+    frm_hdr->key_frame_ = !(bits & 1);
+    frm_hdr->profile_ = (bits >> 1) & 7;
+    frm_hdr->show_ = (bits >> 4) & 1;
+    frm_hdr->partition_length_ = (bits >> 5);
+    if (frm_hdr->profile_ > 3)
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                         "Incorrect keyframe parameters.");
+    if (!frm_hdr->show_)
+      return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
+                         "Frame not displayable.");
+    buf += 3;
+    buf_size -= 3;
+  }
+
+  pic_hdr = &dec->pic_hdr_;
+  if (frm_hdr->key_frame_) {
+    // Paragraph 9.2
+    if (buf_size < 7) {
+      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                         "cannot parse picture header");
+    }
+    if (!VP8CheckSignature(buf, buf_size)) {
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                         "Bad code word");
+    }
+    pic_hdr->width_ = ((buf[4] << 8) | buf[3]) & 0x3fff;
+    pic_hdr->xscale_ = buf[4] >> 6;   // ratio: 1, 5/4 5/3 or 2
+    pic_hdr->height_ = ((buf[6] << 8) | buf[5]) & 0x3fff;
+    pic_hdr->yscale_ = buf[6] >> 6;
+    buf += 7;
+    buf_size -= 7;
+
+    dec->mb_w_ = (pic_hdr->width_ + 15) >> 4;
+    dec->mb_h_ = (pic_hdr->height_ + 15) >> 4;
+    // Setup default output area (can be later modified during io->setup())
+    io->width = pic_hdr->width_;
+    io->height = pic_hdr->height_;
+    io->use_scaling  = 0;
+    io->use_cropping = 0;
+    io->crop_top  = 0;
+    io->crop_left = 0;
+    io->crop_right  = io->width;
+    io->crop_bottom = io->height;
+    io->mb_w = io->width;   // sanity check
+    io->mb_h = io->height;  // ditto
+
+    VP8ResetProba(&dec->proba_);
+    ResetSegmentHeader(&dec->segment_hdr_);
+  }
+
+  // Check if we have all the partition #0 available, and initialize dec->br_
+  // to read this partition (and this partition only).
+  if (frm_hdr->partition_length_ > buf_size) {
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                       "bad partition length");
+  }
+
+  br = &dec->br_;
+  VP8InitBitReader(br, buf, buf + frm_hdr->partition_length_);
+  buf += frm_hdr->partition_length_;
+  buf_size -= frm_hdr->partition_length_;
+
+  if (frm_hdr->key_frame_) {
+    pic_hdr->colorspace_ = VP8Get(br);
+    pic_hdr->clamp_type_ = VP8Get(br);
+  }
+  if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) {
+    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                       "cannot parse segment header");
+  }
+  // Filter specs
+  if (!ParseFilterHeader(br, dec)) {
+    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                       "cannot parse filter header");
+  }
+  status = ParsePartitions(dec, buf, buf_size);
+  if (status != VP8_STATUS_OK) {
+    return VP8SetError(dec, status, "cannot parse partitions");
+  }
+
+  // quantizer change
+  VP8ParseQuant(dec);
+
+  // Frame buffer marking
+  if (!frm_hdr->key_frame_) {
+    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
+                       "Not a key frame.");
+  }
+
+  VP8Get(br);   // ignore the value of update_proba_
+
+  VP8ParseProba(br, dec);
+
+  // sanitized state
+  dec->ready_ = 1;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Residual decoding (Paragraph 13.2 / 13.3)
+
+static const int kBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  // extra entry as sentinel
+};
+
+static const uint8_t kCat3[] = { 173, 148, 140, 0 };
+static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
+static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
+static const uint8_t kCat6[] =
+  { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
+static const uint8_t kZigzag[16] = {
+  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
+};
+
+// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
+static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
+  int v;
+  if (!VP8GetBit(br, p[3])) {
+    if (!VP8GetBit(br, p[4])) {
+      v = 2;
+    } else {
+      v = 3 + VP8GetBit(br, p[5]);
+    }
+  } else {
+    if (!VP8GetBit(br, p[6])) {
+      if (!VP8GetBit(br, p[7])) {
+        v = 5 + VP8GetBit(br, 159);
+      } else {
+        v = 7 + 2 * VP8GetBit(br, 165);
+        v += VP8GetBit(br, 145);
+      }
+    } else {
+      const uint8_t* tab;
+      const int bit1 = VP8GetBit(br, p[8]);
+      const int bit0 = VP8GetBit(br, p[9 + bit1]);
+      const int cat = 2 * bit1 + bit0;
+      v = 0;
+      for (tab = kCat3456[cat]; *tab; ++tab) {
+        v += v + VP8GetBit(br, *tab);
+      }
+      v += 3 + (8 << cat);
+    }
+  }
+  return v;
+}
+
+// Returns the position of the last non-zero coeff plus one
+static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob,
+                     int ctx, const quant_t dq, int n, int16_t* out) {
+  // n is either 0 or 1 here. kBands[n] is not necessary for extracting '*p'.
+  const uint8_t* p = prob[n].probas_[ctx];
+  for (; n < 16; ++n) {
+    if (!VP8GetBit(br, p[0])) {
+      return n;  // previous coeff was last non-zero coeff
+    }
+    while (!VP8GetBit(br, p[1])) {       // sequence of zero coeffs
+      p = prob[kBands[++n]].probas_[0];
+      if (n == 16) return 16;
+    }
+    {        // non zero coeff
+      const VP8ProbaArray* const p_ctx = &prob[kBands[n + 1]].probas_[0];
+      int v;
+      if (!VP8GetBit(br, p[2])) {
+        v = 1;
+        p = p_ctx[1];
+      } else {
+        v = GetLargeValue(br, p);
+        p = p_ctx[2];
+      }
+      out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
+    }
+  }
+  return 16;
+}
+
+static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
+  nz_coeffs <<= 2;
+  nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz;
+  return nz_coeffs;
+}
+
+static int ParseResiduals(VP8Decoder* const dec,
+                          VP8MB* const mb, VP8BitReader* const token_br) {
+  VP8BandProbas (* const bands)[NUM_BANDS] = dec->proba_.bands_;
+  const VP8BandProbas* ac_proba;
+  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
+  const VP8QuantMatrix* const q = &dec->dqm_[block->segment_];
+  int16_t* dst = block->coeffs_;
+  VP8MB* const left_mb = dec->mb_info_ - 1;
+  uint8_t tnz, lnz;
+  uint32_t non_zero_y = 0;
+  uint32_t non_zero_uv = 0;
+  int x, y, ch;
+  uint32_t out_t_nz, out_l_nz;
+  int first;
+
+  memset(dst, 0, 384 * sizeof(*dst));
+  if (!block->is_i4x4_) {    // parse DC
+    int16_t dc[16] = { 0 };
+    const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
+    const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc);
+    mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0);
+    if (nz > 1) {   // more than just the DC -> perform the full transform
+      VP8TransformWHT(dc, dst);
+    } else {        // only DC is non-zero -> inlined simplified transform
+      int i;
+      const int dc0 = (dc[0] + 3) >> 3;
+      for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0;
+    }
+    first = 1;
+    ac_proba = bands[0];
+  } else {
+    first = 0;
+    ac_proba = bands[3];
+  }
+
+  tnz = mb->nz_ & 0x0f;
+  lnz = left_mb->nz_ & 0x0f;
+  for (y = 0; y < 4; ++y) {
+    int l = lnz & 1;
+    uint32_t nz_coeffs = 0;
+    for (x = 0; x < 4; ++x) {
+      const int ctx = l + (tnz & 1);
+      const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst);
+      l = (nz > first);
+      tnz = (tnz >> 1) | (l << 7);
+      nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
+      dst += 16;
+    }
+    tnz >>= 4;
+    lnz = (lnz >> 1) | (l << 7);
+    non_zero_y = (non_zero_y << 8) | nz_coeffs;
+  }
+  out_t_nz = tnz;
+  out_l_nz = lnz >> 4;
+
+  for (ch = 0; ch < 4; ch += 2) {
+    uint32_t nz_coeffs = 0;
+    tnz = mb->nz_ >> (4 + ch);
+    lnz = left_mb->nz_ >> (4 + ch);
+    for (y = 0; y < 2; ++y) {
+      int l = lnz & 1;
+      for (x = 0; x < 2; ++x) {
+        const int ctx = l + (tnz & 1);
+        const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst);
+        l = (nz > 0);
+        tnz = (tnz >> 1) | (l << 3);
+        nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
+        dst += 16;
+      }
+      tnz >>= 2;
+      lnz = (lnz >> 1) | (l << 5);
+    }
+    // Note: we don't really need the per-4x4 details for U/V blocks.
+    non_zero_uv |= nz_coeffs << (4 * ch);
+    out_t_nz |= (tnz << 4) << ch;
+    out_l_nz |= (lnz & 0xf0) << ch;
+  }
+  mb->nz_ = out_t_nz;
+  left_mb->nz_ = out_l_nz;
+
+  block->non_zero_y_ = non_zero_y;
+  block->non_zero_uv_ = non_zero_uv;
+
+  // We look at the mode-code of each block and check if some blocks have less
+  // than three non-zero coeffs (code < 2). This is to avoid dithering flat and
+  // empty blocks.
+  block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
+
+  return !(non_zero_y | non_zero_uv);  // will be used for further optimization
+}
+
+//------------------------------------------------------------------------------
+// Main loop
+
+int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
+  VP8MB* const left = dec->mb_info_ - 1;
+  VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
+  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
+  int skip = dec->use_skip_proba_ ? block->skip_ : 0;
+
+  if (!skip) {
+    skip = ParseResiduals(dec, mb, token_br);
+  } else {
+    left->nz_ = mb->nz_ = 0;
+    if (!block->is_i4x4_) {
+      left->nz_dc_ = mb->nz_dc_ = 0;
+    }
+    block->non_zero_y_ = 0;
+    block->non_zero_uv_ = 0;
+  }
+
+  if (dec->filter_type_ > 0) {  // store filter info
+    VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
+    *finfo = dec->fstrengths_[block->segment_][block->is_i4x4_];
+    finfo->f_inner_ |= !skip;
+  }
+
+  return !token_br->eof_;
+}
+
+void VP8InitScanline(VP8Decoder* const dec) {
+  VP8MB* const left = dec->mb_info_ - 1;
+  left->nz_ = 0;
+  left->nz_dc_ = 0;
+  memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
+  dec->mb_x_ = 0;
+}
+
+static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
+  for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
+    // Parse bitstream for this row.
+    VP8BitReader* const token_br =
+        &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+    if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                         "Premature end-of-partition0 encountered.");
+    }
+    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+      if (!VP8DecodeMB(dec, token_br)) {
+        return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                           "Premature end-of-file encountered.");
+      }
+    }
+    VP8InitScanline(dec);   // Prepare for next scanline
+
+    // Reconstruct, filter and emit the row.
+    if (!VP8ProcessRow(dec, io)) {
+      return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
+    }
+  }
+  if (dec->mt_method_ > 0) {
+    if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) return 0;
+  }
+
+  return 1;
+}
+
+// Main entry point
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 0;
+  if (dec == NULL) {
+    return 0;
+  }
+  if (io == NULL) {
+    return VP8SetError(dec, VP8_STATUS_INVALID_PARAM,
+                       "NULL VP8Io parameter in VP8Decode().");
+  }
+
+  if (!dec->ready_) {
+    if (!VP8GetHeaders(dec, io)) {
+      return 0;
+    }
+  }
+  assert(dec->ready_);
+
+  // Finish setting up the decoding parameter. Will call io->setup().
+  ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
+  if (ok) {   // good to go.
+    // Will allocate memory and prepare everything.
+    if (ok) ok = VP8InitFrame(dec, io);
+
+    // Main decoding loop
+    if (ok) ok = ParseFrame(dec, io);
+
+    // Exit.
+    ok &= VP8ExitCritical(dec, io);
+  }
+
+  if (!ok) {
+    VP8Clear(dec);
+    return 0;
+  }
+
+  dec->ready_ = 0;
+  return ok;
+}
+
+void VP8Clear(VP8Decoder* const dec) {
+  if (dec == NULL) {
+    return;
+  }
+  WebPGetWorkerInterface()->End(&dec->worker_);
+  ALPHDelete(dec->alph_dec_);
+  dec->alph_dec_ = NULL;
+  WebPSafeFree(dec->mem_);
+  dec->mem_ = NULL;
+  dec->mem_size_ = 0;
+  memset(&dec->br_, 0, sizeof(dec->br_));
+  dec->ready_ = 0;
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/dec/vp8i.h b/src/main/jni/libwebp/dec/vp8i.h
new file mode 100644
index 000000000..29701be77
--- /dev/null
+++ b/src/main/jni/libwebp/dec/vp8i.h
@@ -0,0 +1,354 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// VP8 decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DEC_VP8I_H_
+#define WEBP_DEC_VP8I_H_
+
+#include <string.h>     // for memcpy()
+#include "./vp8li.h"
+#include "../utils/bit_reader.h"
+#include "../utils/random.h"
+#include "../utils/thread.h"
+#include "../dsp/dsp.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Various defines and enums
+
+// version numbers
+#define DEC_MAJ_VERSION 0
+#define DEC_MIN_VERSION 4
+#define DEC_REV_VERSION 2
+
+// intra prediction modes
+enum { B_DC_PRED = 0,   // 4x4 modes
+       B_TM_PRED,
+       B_VE_PRED,
+       B_HE_PRED,
+       B_RD_PRED,
+       B_VR_PRED,
+       B_LD_PRED,
+       B_VL_PRED,
+       B_HD_PRED,
+       B_HU_PRED,
+       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
+
+       // Luma16 or UV modes
+       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
+       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
+       B_PRED = NUM_BMODES,   // refined I4x4 mode
+
+       // special modes
+       B_DC_PRED_NOTOP = 4,
+       B_DC_PRED_NOLEFT = 5,
+       B_DC_PRED_NOTOPLEFT = 6,
+       NUM_B_DC_MODES = 7 };
+
+enum { MB_FEATURE_TREE_PROBS = 3,
+       NUM_MB_SEGMENTS = 4,
+       NUM_REF_LF_DELTAS = 4,
+       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
+       MAX_NUM_PARTITIONS = 8,
+       // Probabilities
+       NUM_TYPES = 4,
+       NUM_BANDS = 8,
+       NUM_CTX = 3,
+       NUM_PROBAS = 11,
+       NUM_MV_PROBAS = 19 };
+
+// YUV-cache parameters.
+// Constraints are: We need to store one 16x16 block of luma samples (y),
+// and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned,
+// in order to be SIMD-friendly. We also need to store the top, left and
+// top-left samples (from previously decoded blocks), along with four
+// extra top-right samples for luma (intra4x4 prediction only).
+// One possible layout is, using 32 * (17 + 9) bytes:
+//
+//   .+------   <- only 1 pixel high
+//   .|yyyyt.
+//   .|yyyyt.
+//   .|yyyyt.
+//   .|yyyy..
+//   .+--.+--   <- only 1 pixel high
+//   .|uu.|vv
+//   .|uu.|vv
+//
+// Every character is a 4x4 block, with legend:
+//  '.' = unused
+//  'y' = y-samples   'u' = u-samples     'v' = u-samples
+//  '|' = left sample,   '-' = top sample,    '+' = top-left sample
+//  't' = extra top-right sample for 4x4 modes
+// With this layout, BPS (=Bytes Per Scan-line) is one cacheline size.
+#define BPS       32    // this is the common stride used by yuv[]
+#define YUV_SIZE (BPS * 17 + BPS * 9)
+#define Y_SIZE   (BPS * 17)
+#define Y_OFF    (BPS * 1 + 8)
+#define U_OFF    (Y_OFF + BPS * 16 + BPS)
+#define V_OFF    (U_OFF + 16)
+
+// minimal width under which lossy multi-threading is always disabled
+#define MIN_WIDTH_FOR_THREADS 512
+
+//------------------------------------------------------------------------------
+// Headers
+
+typedef struct {
+  uint8_t key_frame_;
+  uint8_t profile_;
+  uint8_t show_;
+  uint32_t partition_length_;
+} VP8FrameHeader;
+
+typedef struct {
+  uint16_t width_;
+  uint16_t height_;
+  uint8_t xscale_;
+  uint8_t yscale_;
+  uint8_t colorspace_;   // 0 = YCbCr
+  uint8_t clamp_type_;
+} VP8PictureHeader;
+
+// segment features
+typedef struct {
+  int use_segment_;
+  int update_map_;        // whether to update the segment map or not
+  int absolute_delta_;    // absolute or delta values for quantizer and filter
+  int8_t quantizer_[NUM_MB_SEGMENTS];        // quantization changes
+  int8_t filter_strength_[NUM_MB_SEGMENTS];  // filter strength for segments
+} VP8SegmentHeader;
+
+
+// probas associated to one of the contexts
+typedef uint8_t VP8ProbaArray[NUM_PROBAS];
+
+typedef struct {   // all the probas associated to one band
+  VP8ProbaArray probas_[NUM_CTX];
+} VP8BandProbas;
+
+// Struct collecting all frame-persistent probabilities.
+typedef struct {
+  uint8_t segments_[MB_FEATURE_TREE_PROBS];
+  // Type: 0:Intra16-AC  1:Intra16-DC   2:Chroma   3:Intra4
+  VP8BandProbas bands_[NUM_TYPES][NUM_BANDS];
+} VP8Proba;
+
+// Filter parameters
+typedef struct {
+  int simple_;                  // 0=complex, 1=simple
+  int level_;                   // [0..63]
+  int sharpness_;               // [0..7]
+  int use_lf_delta_;
+  int ref_lf_delta_[NUM_REF_LF_DELTAS];
+  int mode_lf_delta_[NUM_MODE_LF_DELTAS];
+} VP8FilterHeader;
+
+//------------------------------------------------------------------------------
+// Informations about the macroblocks.
+
+typedef struct {  // filter specs
+  uint8_t f_limit_;      // filter limit in [3..189], or 0 if no filtering
+  uint8_t f_ilevel_;     // inner limit in [1..63]
+  uint8_t f_inner_;      // do inner filtering?
+  uint8_t hev_thresh_;   // high edge variance threshold in [0..2]
+} VP8FInfo;
+
+typedef struct {  // Top/Left Contexts used for syntax-parsing
+  uint8_t nz_;        // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
+  uint8_t nz_dc_;     // non-zero DC coeff (1bit)
+} VP8MB;
+
+// Dequantization matrices
+typedef int quant_t[2];      // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
+typedef struct {
+  quant_t y1_mat_, y2_mat_, uv_mat_;
+
+  int uv_quant_;   // U/V quantizer value
+  int dither_;     // dithering amplitude (0 = off, max=255)
+} VP8QuantMatrix;
+
+// Data needed to reconstruct a macroblock
+typedef struct {
+  int16_t coeffs_[384];   // 384 coeffs = (16+4+4) * 4*4
+  uint8_t is_i4x4_;       // true if intra4x4
+  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
+  uint8_t uvmode_;        // chroma prediction mode
+  // bit-wise info about the content of each sub-4x4 blocks (in decoding order).
+  // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
+  //   code=0 -> no coefficient
+  //   code=1 -> only DC
+  //   code=2 -> first three coefficients are non-zero
+  //   code=3 -> more than three coefficients are non-zero
+  // This allows to call specialized transform functions.
+  uint32_t non_zero_y_;
+  uint32_t non_zero_uv_;
+  uint8_t dither_;      // local dithering strength (deduced from non_zero_*)
+  uint8_t skip_;
+  uint8_t segment_;
+} VP8MBData;
+
+// Persistent information needed by the parallel processing
+typedef struct {
+  int id_;              // cache row to process (in [0..2])
+  int mb_y_;            // macroblock position of the row
+  int filter_row_;      // true if row-filtering is needed
+  VP8FInfo* f_info_;    // filter strengths (swapped with dec->f_info_)
+  VP8MBData* mb_data_;  // reconstruction data (swapped with dec->mb_data_)
+  VP8Io io_;            // copy of the VP8Io to pass to put()
+} VP8ThreadContext;
+
+// Saved top samples, per macroblock. Fits into a cache-line.
+typedef struct {
+  uint8_t y[16], u[8], v[8];
+} VP8TopSamples;
+
+//------------------------------------------------------------------------------
+// VP8Decoder: the main opaque structure handed over to user
+
+struct VP8Decoder {
+  VP8StatusCode status_;
+  int ready_;     // true if ready to decode a picture with VP8Decode()
+  const char* error_msg_;  // set when status_ is not OK.
+
+  // Main data source
+  VP8BitReader br_;
+
+  // headers
+  VP8FrameHeader   frm_hdr_;
+  VP8PictureHeader pic_hdr_;
+  VP8FilterHeader  filter_hdr_;
+  VP8SegmentHeader segment_hdr_;
+
+  // Worker
+  WebPWorker worker_;
+  int mt_method_;      // multi-thread method: 0=off, 1=[parse+recon][filter]
+                       // 2=[parse][recon+filter]
+  int cache_id_;       // current cache row
+  int num_caches_;     // number of cached rows of 16 pixels (1, 2 or 3)
+  VP8ThreadContext thread_ctx_;  // Thread context
+
+  // dimension, in macroblock units.
+  int mb_w_, mb_h_;
+
+  // Macroblock to process/filter, depending on cropping and filter_type.
+  int tl_mb_x_, tl_mb_y_;  // top-left MB that must be in-loop filtered
+  int br_mb_x_, br_mb_y_;  // last bottom-right MB that must be decoded
+
+  // number of partitions.
+  int num_parts_;
+  // per-partition boolean decoders.
+  VP8BitReader parts_[MAX_NUM_PARTITIONS];
+
+  // Dithering strength, deduced from decoding options
+  int dither_;                // whether to use dithering or not
+  VP8Random dithering_rg_;    // random generator for dithering
+
+  // dequantization (one set of DC/AC dequant factor per segment)
+  VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
+
+  // probabilities
+  VP8Proba proba_;
+  int use_skip_proba_;
+  uint8_t skip_p_;
+
+  // Boundary data cache and persistent buffers.
+  uint8_t* intra_t_;      // top intra modes values: 4 * mb_w_
+  uint8_t  intra_l_[4];   // left intra modes values
+
+  VP8TopSamples* yuv_t_;  // top y/u/v samples
+
+  VP8MB* mb_info_;        // contextual macroblock info (mb_w_ + 1)
+  VP8FInfo* f_info_;      // filter strength info
+  uint8_t* yuv_b_;        // main block for Y/U/V (size = YUV_SIZE)
+
+  uint8_t* cache_y_;      // macroblock row for storing unfiltered samples
+  uint8_t* cache_u_;
+  uint8_t* cache_v_;
+  int cache_y_stride_;
+  int cache_uv_stride_;
+
+  // main memory chunk for the above data. Persistent.
+  void* mem_;
+  size_t mem_size_;
+
+  // Per macroblock non-persistent infos.
+  int mb_x_, mb_y_;       // current position, in macroblock units
+  VP8MBData* mb_data_;    // parsed reconstruction data
+
+  // Filtering side-info
+  int filter_type_;                          // 0=off, 1=simple, 2=complex
+  VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2];  // precalculated per-segment/type
+
+  // Alpha
+  struct ALPHDecoder* alph_dec_;  // alpha-plane decoder object
+  const uint8_t* alpha_data_;     // compressed alpha data (if present)
+  size_t alpha_data_size_;
+  int is_alpha_decoded_;  // true if alpha_data_ is decoded in alpha_plane_
+  uint8_t* alpha_plane_;  // output. Persistent, contains the whole data.
+  int alpha_dithering_;   // derived from decoding options (0=off, 100=full).
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// in vp8.c
+int VP8SetError(VP8Decoder* const dec,
+                VP8StatusCode error, const char* const msg);
+
+// in tree.c
+void VP8ResetProba(VP8Proba* const proba);
+void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec);
+// parses one row of intra mode data in partition 0, returns !eof
+int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec);
+
+// in quant.c
+void VP8ParseQuant(VP8Decoder* const dec);
+
+// in frame.c
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* io);
+// Call io->setup() and finish setting up scan parameters.
+// After this call returns, one must always call VP8ExitCritical() with the
+// same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK
+// if ok, otherwise sets and returns the error status on *dec.
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
+// Must always be called in pair with VP8EnterCritical().
+// Returns false in case of error.
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
+// Return the multi-threading method to use (0=off), depending
+// on options and bitstream size. Only for lossy decoding.
+int VP8GetThreadMethod(const WebPDecoderOptions* const options,
+                       const WebPHeaderStructure* const headers,
+                       int width, int height);
+// Initialize dithering post-process if needed.
+void VP8InitDithering(const WebPDecoderOptions* const options,
+                      VP8Decoder* const dec);
+// Process the last decoded row (filtering + output).
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
+// To be called at the start of a new scanline, to initialize predictors.
+void VP8InitScanline(VP8Decoder* const dec);
+// Decode one macroblock. Returns false if there is not enough data.
+int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
+
+// in alpha.c
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
+                                      int row, int num_rows);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_VP8I_H_ */
diff --git a/src/main/jni/libwebp/dec/vp8l.c b/src/main/jni/libwebp/dec/vp8l.c
new file mode 100644
index 000000000..a7e7e2522
--- /dev/null
+++ b/src/main/jni/libwebp/dec/vp8l.c
@@ -0,0 +1,1404 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// main entry for the decoder
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+
+#include <stdlib.h>
+
+#include "./alphai.h"
+#include "./vp8li.h"
+#include "../dsp/dsp.h"
+#include "../dsp/lossless.h"
+#include "../dsp/yuv.h"
+#include "../utils/huffman.h"
+#include "../utils/utils.h"
+
+#define NUM_ARGB_CACHE_ROWS          16
+
+static const int kCodeLengthLiterals = 16;
+static const int kCodeLengthRepeatCode = 16;
+static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
+static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
+
+// -----------------------------------------------------------------------------
+//  Five Huffman codes are used at each meta code:
+//  1. green + length prefix codes + color cache codes,
+//  2. alpha,
+//  3. red,
+//  4. blue, and,
+//  5. distance prefix codes.
+typedef enum {
+  GREEN = 0,
+  RED   = 1,
+  BLUE  = 2,
+  ALPHA = 3,
+  DIST  = 4
+} HuffIndex;
+
+static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = {
+  NUM_LITERAL_CODES + NUM_LENGTH_CODES,
+  NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
+  NUM_DISTANCE_CODES
+};
+
+
+#define NUM_CODE_LENGTH_CODES       19
+static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
+  17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+
+#define CODE_TO_PLANE_CODES        120
+static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
+  0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
+  0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
+  0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
+  0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
+  0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
+  0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
+  0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
+  0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
+  0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
+  0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
+  0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
+  0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
+};
+
+static int DecodeImageStream(int xsize, int ysize,
+                             int is_level0,
+                             VP8LDecoder* const dec,
+                             uint32_t** const decoded_data);
+
+//------------------------------------------------------------------------------
+
+int VP8LCheckSignature(const uint8_t* const data, size_t size) {
+  return (size >= VP8L_FRAME_HEADER_SIZE &&
+          data[0] == VP8L_MAGIC_BYTE &&
+          (data[4] >> 5) == 0);  // version
+}
+
+static int ReadImageInfo(VP8LBitReader* const br,
+                         int* const width, int* const height,
+                         int* const has_alpha) {
+  if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0;
+  *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
+  *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
+  *has_alpha = VP8LReadBits(br, 1);
+  if (VP8LReadBits(br, VP8L_VERSION_BITS) != 0) return 0;
+  return 1;
+}
+
+int VP8LGetInfo(const uint8_t* data, size_t data_size,
+                int* const width, int* const height, int* const has_alpha) {
+  if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) {
+    return 0;         // not enough data
+  } else if (!VP8LCheckSignature(data, data_size)) {
+    return 0;         // bad signature
+  } else {
+    int w, h, a;
+    VP8LBitReader br;
+    VP8LInitBitReader(&br, data, data_size);
+    if (!ReadImageInfo(&br, &w, &h, &a)) {
+      return 0;
+    }
+    if (width != NULL) *width = w;
+    if (height != NULL) *height = h;
+    if (has_alpha != NULL) *has_alpha = a;
+    return 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int GetCopyDistance(int distance_symbol,
+                                       VP8LBitReader* const br) {
+  int extra_bits, offset;
+  if (distance_symbol < 4) {
+    return distance_symbol + 1;
+  }
+  extra_bits = (distance_symbol - 2) >> 1;
+  offset = (2 + (distance_symbol & 1)) << extra_bits;
+  return offset + VP8LReadBits(br, extra_bits) + 1;
+}
+
+static WEBP_INLINE int GetCopyLength(int length_symbol,
+                                     VP8LBitReader* const br) {
+  // Length and distance prefixes are encoded the same way.
+  return GetCopyDistance(length_symbol, br);
+}
+
+static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
+  if (plane_code > CODE_TO_PLANE_CODES) {
+    return plane_code - CODE_TO_PLANE_CODES;
+  } else {
+    const int dist_code = kCodeToPlane[plane_code - 1];
+    const int yoffset = dist_code >> 4;
+    const int xoffset = 8 - (dist_code & 0xf);
+    const int dist = yoffset * xsize + xoffset;
+    return (dist >= 1) ? dist : 1;  // dist<1 can happen if xsize is very small
+  }
+}
+
+//------------------------------------------------------------------------------
+// Decodes the next Huffman code from bit-stream.
+// FillBitWindow(br) needs to be called at minimum every second call
+// to ReadSymbol, in order to pre-fetch enough bits.
+static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
+                                  VP8LBitReader* const br) {
+  const HuffmanTreeNode* node = tree->root_;
+  uint32_t bits = VP8LPrefetchBits(br);
+  int bitpos = br->bit_pos_;
+  // Check if we find the bit combination from the Huffman lookup table.
+  const int lut_ix = bits & (HUFF_LUT - 1);
+  const int lut_bits = tree->lut_bits_[lut_ix];
+  if (lut_bits <= HUFF_LUT_BITS) {
+    VP8LSetBitPos(br, bitpos + lut_bits);
+    return tree->lut_symbol_[lut_ix];
+  }
+  node += tree->lut_jump_[lut_ix];
+  bitpos += HUFF_LUT_BITS;
+  bits >>= HUFF_LUT_BITS;
+
+  // Decode the value from a binary tree.
+  assert(node != NULL);
+  do {
+    node = HuffmanTreeNextNode(node, bits & 1);
+    bits >>= 1;
+    ++bitpos;
+  } while (HuffmanTreeNodeIsNotLeaf(node));
+  VP8LSetBitPos(br, bitpos);
+  return node->symbol_;
+}
+
+static int ReadHuffmanCodeLengths(
+    VP8LDecoder* const dec, const int* const code_length_code_lengths,
+    int num_symbols, int* const code_lengths) {
+  int ok = 0;
+  VP8LBitReader* const br = &dec->br_;
+  int symbol;
+  int max_symbol;
+  int prev_code_len = DEFAULT_CODE_LENGTH;
+  HuffmanTree tree;
+  int huff_codes[NUM_CODE_LENGTH_CODES] = { 0 };
+
+  if (!VP8LHuffmanTreeBuildImplicit(&tree, code_length_code_lengths,
+                                    huff_codes, NUM_CODE_LENGTH_CODES)) {
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    return 0;
+  }
+
+  if (VP8LReadBits(br, 1)) {    // use length
+    const int length_nbits = 2 + 2 * VP8LReadBits(br, 3);
+    max_symbol = 2 + VP8LReadBits(br, length_nbits);
+    if (max_symbol > num_symbols) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+      goto End;
+    }
+  } else {
+    max_symbol = num_symbols;
+  }
+
+  symbol = 0;
+  while (symbol < num_symbols) {
+    int code_len;
+    if (max_symbol-- == 0) break;
+    VP8LFillBitWindow(br);
+    code_len = ReadSymbol(&tree, br);
+    if (code_len < kCodeLengthLiterals) {
+      code_lengths[symbol++] = code_len;
+      if (code_len != 0) prev_code_len = code_len;
+    } else {
+      const int use_prev = (code_len == kCodeLengthRepeatCode);
+      const int slot = code_len - kCodeLengthLiterals;
+      const int extra_bits = kCodeLengthExtraBits[slot];
+      const int repeat_offset = kCodeLengthRepeatOffsets[slot];
+      int repeat = VP8LReadBits(br, extra_bits) + repeat_offset;
+      if (symbol + repeat > num_symbols) {
+        dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+        goto End;
+      } else {
+        const int length = use_prev ? prev_code_len : 0;
+        while (repeat-- > 0) code_lengths[symbol++] = length;
+      }
+    }
+  }
+  ok = 1;
+
+ End:
+  VP8LHuffmanTreeFree(&tree);
+  if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+  return ok;
+}
+
+// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman
+// tree.
+static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
+                           int* const code_lengths, int* const huff_codes,
+                           HuffmanTree* const tree) {
+  int ok = 0;
+  VP8LBitReader* const br = &dec->br_;
+  const int simple_code = VP8LReadBits(br, 1);
+
+  if (simple_code) {  // Read symbols, codes & code lengths directly.
+    int symbols[2];
+    int codes[2];
+    const int num_symbols = VP8LReadBits(br, 1) + 1;
+    const int first_symbol_len_code = VP8LReadBits(br, 1);
+    // The first code is either 1 bit or 8 bit code.
+    symbols[0] = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
+    codes[0] = 0;
+    code_lengths[0] = num_symbols - 1;
+    // The second code (if present), is always 8 bit long.
+    if (num_symbols == 2) {
+      symbols[1] = VP8LReadBits(br, 8);
+      codes[1] = 1;
+      code_lengths[1] = num_symbols - 1;
+    }
+    ok = VP8LHuffmanTreeBuildExplicit(tree, code_lengths, codes, symbols,
+                                      alphabet_size, num_symbols);
+  } else {  // Decode Huffman-coded code lengths.
+    int i;
+    int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
+    const int num_codes = VP8LReadBits(br, 4) + 4;
+    if (num_codes > NUM_CODE_LENGTH_CODES) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+      return 0;
+    }
+
+    memset(code_lengths, 0, alphabet_size * sizeof(*code_lengths));
+
+    for (i = 0; i < num_codes; ++i) {
+      code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3);
+    }
+    ok = ReadHuffmanCodeLengths(dec, code_length_code_lengths, alphabet_size,
+                                code_lengths);
+    ok = ok && VP8LHuffmanTreeBuildImplicit(tree, code_lengths, huff_codes,
+                                            alphabet_size);
+  }
+  ok = ok && !br->error_;
+  if (!ok) {
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    return 0;
+  }
+  return 1;
+}
+
+static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
+                            int color_cache_bits, int allow_recursion) {
+  int i, j;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LMetadata* const hdr = &dec->hdr_;
+  uint32_t* huffman_image = NULL;
+  HTreeGroup* htree_groups = NULL;
+  int num_htree_groups = 1;
+  int max_alphabet_size = 0;
+  int* code_lengths = NULL;
+  int* huff_codes = NULL;
+
+  if (allow_recursion && VP8LReadBits(br, 1)) {
+    // use meta Huffman codes.
+    const int huffman_precision = VP8LReadBits(br, 3) + 2;
+    const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
+    const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
+    const int huffman_pixs = huffman_xsize * huffman_ysize;
+    if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec,
+                           &huffman_image)) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+      goto Error;
+    }
+    hdr->huffman_subsample_bits_ = huffman_precision;
+    for (i = 0; i < huffman_pixs; ++i) {
+      // The huffman data is stored in red and green bytes.
+      const int group = (huffman_image[i] >> 8) & 0xffff;
+      huffman_image[i] = group;
+      if (group >= num_htree_groups) {
+        num_htree_groups = group + 1;
+      }
+    }
+  }
+
+  if (br->error_) goto Error;
+
+  // Find maximum alphabet size for the htree group.
+  for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+    int alphabet_size = kAlphabetSize[j];
+    if (j == 0 && color_cache_bits > 0) {
+      alphabet_size += 1 << color_cache_bits;
+    }
+    if (max_alphabet_size < alphabet_size) {
+      max_alphabet_size = alphabet_size;
+    }
+  }
+
+  htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
+  code_lengths =
+      (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, sizeof(*code_lengths));
+  huff_codes =
+      (int*)WebPSafeMalloc((uint64_t)max_alphabet_size, sizeof(*huff_codes));
+
+  if (htree_groups == NULL || code_lengths == NULL || huff_codes == NULL) {
+    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  for (i = 0; i < num_htree_groups; ++i) {
+    HuffmanTree* const htrees = htree_groups[i].htrees_;
+    for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+      int alphabet_size = kAlphabetSize[j];
+      HuffmanTree* const htree = htrees + j;
+      if (j == 0 && color_cache_bits > 0) {
+        alphabet_size += 1 << color_cache_bits;
+      }
+      if (!ReadHuffmanCode(alphabet_size, dec, code_lengths, huff_codes,
+                           htree)) {
+        goto Error;
+      }
+    }
+  }
+  WebPSafeFree(huff_codes);
+  WebPSafeFree(code_lengths);
+
+  // All OK. Finalize pointers and return.
+  hdr->huffman_image_ = huffman_image;
+  hdr->num_htree_groups_ = num_htree_groups;
+  hdr->htree_groups_ = htree_groups;
+  return 1;
+
+ Error:
+  WebPSafeFree(huff_codes);
+  WebPSafeFree(code_lengths);
+  WebPSafeFree(huffman_image);
+  VP8LHtreeGroupsFree(htree_groups, num_htree_groups);
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// Scaling.
+
+static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
+  const int num_channels = 4;
+  const int in_width = io->mb_w;
+  const int out_width = io->scaled_width;
+  const int in_height = io->mb_h;
+  const int out_height = io->scaled_height;
+  const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
+  int32_t* work;        // Rescaler work area.
+  const uint64_t scaled_data_size = num_channels * (uint64_t)out_width;
+  uint32_t* scaled_data;  // Temporary storage for scaled BGRA data.
+  const uint64_t memory_size = sizeof(*dec->rescaler) +
+                               work_size * sizeof(*work) +
+                               scaled_data_size * sizeof(*scaled_data);
+  uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory));
+  if (memory == NULL) {
+    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+    return 0;
+  }
+  assert(dec->rescaler_memory == NULL);
+  dec->rescaler_memory = memory;
+
+  dec->rescaler = (WebPRescaler*)memory;
+  memory += sizeof(*dec->rescaler);
+  work = (int32_t*)memory;
+  memory += work_size * sizeof(*work);
+  scaled_data = (uint32_t*)memory;
+
+  WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data,
+                   out_width, out_height, 0, num_channels,
+                   in_width, out_width, in_height, out_height, work);
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Export to ARGB
+
+// We have special "export" function since we need to convert from BGRA
+static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
+                  int rgba_stride, uint8_t* const rgba) {
+  uint32_t* const src = (uint32_t*)rescaler->dst;
+  const int dst_width = rescaler->dst_width;
+  int num_lines_out = 0;
+  while (WebPRescalerHasPendingOutput(rescaler)) {
+    uint8_t* const dst = rgba + num_lines_out * rgba_stride;
+    WebPRescalerExportRow(rescaler, 0);
+    WebPMultARGBRow(src, dst_width, 1);
+    VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
+    ++num_lines_out;
+  }
+  return num_lines_out;
+}
+
+// Emit scaled rows.
+static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
+                                uint8_t* in, int in_stride, int mb_h,
+                                uint8_t* const out, int out_stride) {
+  const WEBP_CSP_MODE colorspace = dec->output_->colorspace;
+  int num_lines_in = 0;
+  int num_lines_out = 0;
+  while (num_lines_in < mb_h) {
+    uint8_t* const row_in = in + num_lines_in * in_stride;
+    uint8_t* const row_out = out + num_lines_out * out_stride;
+    const int lines_left = mb_h - num_lines_in;
+    const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
+    assert(needed_lines > 0 && needed_lines <= lines_left);
+    WebPMultARGBRows(row_in, in_stride,
+                     dec->rescaler->src_width, needed_lines, 0);
+    WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride);
+    num_lines_in += needed_lines;
+    num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out);
+  }
+  return num_lines_out;
+}
+
+// Emit rows without any scaling.
+static int EmitRows(WEBP_CSP_MODE colorspace,
+                    const uint8_t* row_in, int in_stride,
+                    int mb_w, int mb_h,
+                    uint8_t* const out, int out_stride) {
+  int lines = mb_h;
+  uint8_t* row_out = out;
+  while (lines-- > 0) {
+    VP8LConvertFromBGRA((const uint32_t*)row_in, mb_w, colorspace, row_out);
+    row_in += in_stride;
+    row_out += out_stride;
+  }
+  return mb_h;  // Num rows out == num rows in.
+}
+
+//------------------------------------------------------------------------------
+// Export to YUVA
+
+// TODO(skal): should be in yuv.c
+static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos,
+                          const WebPDecBuffer* const output) {
+  const WebPYUVABuffer* const buf = &output->u.YUVA;
+  // first, the luma plane
+  {
+    int i;
+    uint8_t* const y = buf->y + y_pos * buf->y_stride;
+    for (i = 0; i < width; ++i) {
+      const uint32_t p = src[i];
+      y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff,
+                       YUV_HALF);
+    }
+  }
+
+  // then U/V planes
+  {
+    uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride;
+    uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride;
+    const int uv_width = width >> 1;
+    int i;
+    for (i = 0; i < uv_width; ++i) {
+      const uint32_t v0 = src[2 * i + 0];
+      const uint32_t v1 = src[2 * i + 1];
+      // VP8RGBToU/V expects four accumulated pixels. Hence we need to
+      // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less.
+      const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe);
+      const int g = ((v0 >>  7) & 0x1fe) + ((v1 >>  7) & 0x1fe);
+      const int b = ((v0 <<  1) & 0x1fe) + ((v1 <<  1) & 0x1fe);
+      if (!(y_pos & 1)) {  // even lines: store values
+        u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2);
+        v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2);
+      } else {             // odd lines: average with previous values
+        const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2);
+        const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2);
+        // Approximated average-of-four. But it's an acceptable diff.
+        u[i] = (u[i] + tmp_u + 1) >> 1;
+        v[i] = (v[i] + tmp_v + 1) >> 1;
+      }
+    }
+    if (width & 1) {       // last pixel
+      const uint32_t v0 = src[2 * i + 0];
+      const int r = (v0 >> 14) & 0x3fc;
+      const int g = (v0 >>  6) & 0x3fc;
+      const int b = (v0 <<  2) & 0x3fc;
+      if (!(y_pos & 1)) {  // even lines
+        u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2);
+        v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2);
+      } else {             // odd lines (note: we could just skip this)
+        const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2);
+        const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2);
+        u[i] = (u[i] + tmp_u + 1) >> 1;
+        v[i] = (v[i] + tmp_v + 1) >> 1;
+      }
+    }
+  }
+  // Lastly, store alpha if needed.
+  if (buf->a != NULL) {
+    int i;
+    uint8_t* const a = buf->a + y_pos * buf->a_stride;
+    for (i = 0; i < width; ++i) a[i] = (src[i] >> 24);
+  }
+}
+
+static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
+  WebPRescaler* const rescaler = dec->rescaler;
+  uint32_t* const src = (uint32_t*)rescaler->dst;
+  const int dst_width = rescaler->dst_width;
+  int num_lines_out = 0;
+  while (WebPRescalerHasPendingOutput(rescaler)) {
+    WebPRescalerExportRow(rescaler, 0);
+    WebPMultARGBRow(src, dst_width, 1);
+    ConvertToYUVA(src, dst_width, y_pos, dec->output_);
+    ++y_pos;
+    ++num_lines_out;
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
+                                uint8_t* in, int in_stride, int mb_h) {
+  int num_lines_in = 0;
+  int y_pos = dec->last_out_row_;
+  while (num_lines_in < mb_h) {
+    const int lines_left = mb_h - num_lines_in;
+    const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
+    WebPMultARGBRows(in, in_stride, dec->rescaler->src_width, needed_lines, 0);
+    WebPRescalerImport(dec->rescaler, lines_left, in, in_stride);
+    num_lines_in += needed_lines;
+    in += needed_lines * in_stride;
+    y_pos += ExportYUVA(dec, y_pos);
+  }
+  return y_pos;
+}
+
+static int EmitRowsYUVA(const VP8LDecoder* const dec,
+                        const uint8_t* in, int in_stride,
+                        int mb_w, int num_rows) {
+  int y_pos = dec->last_out_row_;
+  while (num_rows-- > 0) {
+    ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output_);
+    in += in_stride;
+    ++y_pos;
+  }
+  return y_pos;
+}
+
+//------------------------------------------------------------------------------
+// Cropping.
+
+// Sets io->mb_y, io->mb_h & io->mb_w according to start row, end row and
+// crop options. Also updates the input data pointer, so that it points to the
+// start of the cropped window. Note that pixels are in ARGB format even if
+// 'in_data' is uint8_t*.
+// Returns true if the crop window is not empty.
+static int SetCropWindow(VP8Io* const io, int y_start, int y_end,
+                         uint8_t** const in_data, int pixel_stride) {
+  assert(y_start < y_end);
+  assert(io->crop_left < io->crop_right);
+  if (y_end > io->crop_bottom) {
+    y_end = io->crop_bottom;  // make sure we don't overflow on last row.
+  }
+  if (y_start < io->crop_top) {
+    const int delta = io->crop_top - y_start;
+    y_start = io->crop_top;
+    *in_data += delta * pixel_stride;
+  }
+  if (y_start >= y_end) return 0;  // Crop window is empty.
+
+  *in_data += io->crop_left * sizeof(uint32_t);
+
+  io->mb_y = y_start - io->crop_top;
+  io->mb_w = io->crop_right - io->crop_left;
+  io->mb_h = y_end - y_start;
+  return 1;  // Non-empty crop window.
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int GetMetaIndex(
+    const uint32_t* const image, int xsize, int bits, int x, int y) {
+  if (bits == 0) return 0;
+  return image[xsize * (y >> bits) + (x >> bits)];
+}
+
+static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
+                                                   int x, int y) {
+  const int meta_index = GetMetaIndex(hdr->huffman_image_, hdr->huffman_xsize_,
+                                      hdr->huffman_subsample_bits_, x, y);
+  assert(meta_index < hdr->num_htree_groups_);
+  return hdr->htree_groups_ + meta_index;
+}
+
+//------------------------------------------------------------------------------
+// Main loop, with custom row-processing function
+
+typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
+
+static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
+                                   const uint32_t* const rows) {
+  int n = dec->next_transform_;
+  const int cache_pixs = dec->width_ * num_rows;
+  const int start_row = dec->last_row_;
+  const int end_row = start_row + num_rows;
+  const uint32_t* rows_in = rows;
+  uint32_t* const rows_out = dec->argb_cache_;
+
+  // Inverse transforms.
+  // TODO: most transforms only need to operate on the cropped region only.
+  memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
+  while (n-- > 0) {
+    VP8LTransform* const transform = &dec->transforms_[n];
+    VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out);
+    rows_in = rows_out;
+  }
+}
+
+// Special method for paletted alpha data.
+static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows,
+                                        const uint8_t* const rows) {
+  const int start_row = dec->last_row_;
+  const int end_row = start_row + num_rows;
+  const uint8_t* rows_in = rows;
+  uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
+  VP8LTransform* const transform = &dec->transforms_[0];
+  assert(dec->next_transform_ == 1);
+  assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
+  VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in,
+                                      rows_out);
+}
+
+// Processes (transforms, scales & color-converts) the rows decoded after the
+// last call.
+static void ProcessRows(VP8LDecoder* const dec, int row) {
+  const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
+  const int num_rows = row - dec->last_row_;
+
+  if (num_rows <= 0) return;  // Nothing to be done.
+  ApplyInverseTransforms(dec, num_rows, rows);
+
+  // Emit output.
+  {
+    VP8Io* const io = dec->io_;
+    uint8_t* rows_data = (uint8_t*)dec->argb_cache_;
+    const int in_stride = io->width * sizeof(uint32_t);  // in unit of RGBA
+    if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) {
+      // Nothing to output (this time).
+    } else {
+      const WebPDecBuffer* const output = dec->output_;
+      if (output->colorspace < MODE_YUV) {  // convert to RGBA
+        const WebPRGBABuffer* const buf = &output->u.RGBA;
+        uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
+        const int num_rows_out = io->use_scaling ?
+            EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
+                                 rgba, buf->stride) :
+            EmitRows(output->colorspace, rows_data, in_stride,
+                     io->mb_w, io->mb_h, rgba, buf->stride);
+        // Update 'last_out_row_'.
+        dec->last_out_row_ += num_rows_out;
+      } else {                              // convert to YUVA
+        dec->last_out_row_ = io->use_scaling ?
+            EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h) :
+            EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
+      }
+      assert(dec->last_out_row_ <= output->height);
+    }
+  }
+
+  // Update 'last_row_'.
+  dec->last_row_ = row;
+  assert(dec->last_row_ <= dec->height_);
+}
+
+// Row-processing for the special case when alpha data contains only one
+// transform (color indexing), and trivial non-green literals.
+static int Is8bOptimizable(const VP8LMetadata* const hdr) {
+  int i;
+  if (hdr->color_cache_size_ > 0) return 0;
+  // When the Huffman tree contains only one symbol, we can skip the
+  // call to ReadSymbol() for red/blue/alpha channels.
+  for (i = 0; i < hdr->num_htree_groups_; ++i) {
+    const HuffmanTree* const htrees = hdr->htree_groups_[i].htrees_;
+    if (htrees[RED].num_nodes_ > 1) return 0;
+    if (htrees[BLUE].num_nodes_ > 1) return 0;
+    if (htrees[ALPHA].num_nodes_ > 1) return 0;
+  }
+  return 1;
+}
+
+static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
+  const int num_rows = row - dec->last_row_;
+  const uint8_t* const in =
+      (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
+  if (num_rows > 0) {
+    ApplyInverseTransformsAlpha(dec, num_rows, in);
+  }
+  dec->last_row_ = dec->last_out_row_ = row;
+}
+
+static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
+                           int width, int height, int last_row) {
+  int ok = 1;
+  int row = dec->last_pixel_ / width;
+  int col = dec->last_pixel_ % width;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LMetadata* const hdr = &dec->hdr_;
+  const HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
+  int pos = dec->last_pixel_;         // current position
+  const int end = width * height;     // End of data
+  const int last = width * last_row;  // Last pixel to decode
+  const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
+  const int mask = hdr->huffman_mask_;
+  assert(htree_group != NULL);
+  assert(pos < end);
+  assert(last_row <= height);
+  assert(Is8bOptimizable(hdr));
+
+  while (!br->eos_ && pos < last) {
+    int code;
+    // Only update when changing tile.
+    if ((col & mask) == 0) {
+      htree_group = GetHtreeGroupForPos(hdr, col, row);
+    }
+    VP8LFillBitWindow(br);
+    code = ReadSymbol(&htree_group->htrees_[GREEN], br);
+    if (code < NUM_LITERAL_CODES) {  // Literal
+      data[pos] = code;
+      ++pos;
+      ++col;
+      if (col >= width) {
+        col = 0;
+        ++row;
+        if (row % NUM_ARGB_CACHE_ROWS == 0) {
+          ExtractPalettedAlphaRows(dec, row);
+        }
+      }
+    } else if (code < len_code_limit) {  // Backward reference
+      int dist_code, dist;
+      const int length_sym = code - NUM_LITERAL_CODES;
+      const int length = GetCopyLength(length_sym, br);
+      const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);
+      VP8LFillBitWindow(br);
+      dist_code = GetCopyDistance(dist_symbol, br);
+      dist = PlaneCodeToDistance(width, dist_code);
+      if (pos >= dist && end - pos >= length) {
+        int i;
+        for (i = 0; i < length; ++i) data[pos + i] = data[pos + i - dist];
+      } else {
+        ok = 0;
+        goto End;
+      }
+      pos += length;
+      col += length;
+      while (col >= width) {
+        col -= width;
+        ++row;
+        if (row % NUM_ARGB_CACHE_ROWS == 0) {
+          ExtractPalettedAlphaRows(dec, row);
+        }
+      }
+      if (pos < last && (col & mask)) {
+        htree_group = GetHtreeGroupForPos(hdr, col, row);
+      }
+    } else {  // Not reached
+      ok = 0;
+      goto End;
+    }
+    assert(br->eos_ == VP8LIsEndOfStream(br));
+    ok = !br->error_;
+    if (!ok) goto End;
+  }
+  // Process the remaining rows corresponding to last row-block.
+  ExtractPalettedAlphaRows(dec, row);
+
+ End:
+  if (br->error_ || !ok || (br->eos_ && pos < end)) {
+    ok = 0;
+    dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED
+                            : VP8_STATUS_BITSTREAM_ERROR;
+  } else {
+    dec->last_pixel_ = (int)pos;
+    if (pos == end) dec->state_ = READ_DATA;
+  }
+  return ok;
+}
+
+static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
+                           int width, int height, int last_row,
+                           ProcessRowsFunc process_func) {
+  int ok = 1;
+  int row = dec->last_pixel_ / width;
+  int col = dec->last_pixel_ % width;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LMetadata* const hdr = &dec->hdr_;
+  HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
+  uint32_t* src = data + dec->last_pixel_;
+  uint32_t* last_cached = src;
+  uint32_t* const src_end = data + width * height;     // End of data
+  uint32_t* const src_last = data + width * last_row;  // Last pixel to decode
+  const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
+  const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
+  VP8LColorCache* const color_cache =
+      (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
+  const int mask = hdr->huffman_mask_;
+  assert(htree_group != NULL);
+  assert(src < src_end);
+  assert(src_last <= src_end);
+
+  while (!br->eos_ && src < src_last) {
+    int code;
+    // Only update when changing tile. Note we could use this test:
+    // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
+    // but that's actually slower and needs storing the previous col/row.
+    if ((col & mask) == 0) {
+      htree_group = GetHtreeGroupForPos(hdr, col, row);
+    }
+    VP8LFillBitWindow(br);
+    code = ReadSymbol(&htree_group->htrees_[GREEN], br);
+    if (code < NUM_LITERAL_CODES) {  // Literal
+      int red, green, blue, alpha;
+      red = ReadSymbol(&htree_group->htrees_[RED], br);
+      green = code;
+      VP8LFillBitWindow(br);
+      blue = ReadSymbol(&htree_group->htrees_[BLUE], br);
+      alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);
+      *src = ((uint32_t)alpha << 24) | (red << 16) | (green << 8) | blue;
+    AdvanceByOne:
+      ++src;
+      ++col;
+      if (col >= width) {
+        col = 0;
+        ++row;
+        if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) {
+          process_func(dec, row);
+        }
+        if (color_cache != NULL) {
+          while (last_cached < src) {
+            VP8LColorCacheInsert(color_cache, *last_cached++);
+          }
+        }
+      }
+    } else if (code < len_code_limit) {  // Backward reference
+      int dist_code, dist;
+      const int length_sym = code - NUM_LITERAL_CODES;
+      const int length = GetCopyLength(length_sym, br);
+      const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);
+      VP8LFillBitWindow(br);
+      dist_code = GetCopyDistance(dist_symbol, br);
+      dist = PlaneCodeToDistance(width, dist_code);
+      if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) {
+        ok = 0;
+        goto End;
+      } else {
+        int i;
+        for (i = 0; i < length; ++i) src[i] = src[i - dist];
+        src += length;
+      }
+      col += length;
+      while (col >= width) {
+        col -= width;
+        ++row;
+        if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) {
+          process_func(dec, row);
+        }
+      }
+      if (src < src_last) {
+        if (col & mask) htree_group = GetHtreeGroupForPos(hdr, col, row);
+        if (color_cache != NULL) {
+          while (last_cached < src) {
+            VP8LColorCacheInsert(color_cache, *last_cached++);
+          }
+        }
+      }
+    } else if (code < color_cache_limit) {  // Color cache
+      const int key = code - len_code_limit;
+      assert(color_cache != NULL);
+      while (last_cached < src) {
+        VP8LColorCacheInsert(color_cache, *last_cached++);
+      }
+      *src = VP8LColorCacheLookup(color_cache, key);
+      goto AdvanceByOne;
+    } else {  // Not reached
+      ok = 0;
+      goto End;
+    }
+    assert(br->eos_ == VP8LIsEndOfStream(br));
+    ok = !br->error_;
+    if (!ok) goto End;
+  }
+  // Process the remaining rows corresponding to last row-block.
+  if (process_func != NULL) process_func(dec, row);
+
+ End:
+  if (br->error_ || !ok || (br->eos_ && src < src_end)) {
+    ok = 0;
+    dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED
+                            : VP8_STATUS_BITSTREAM_ERROR;
+  } else {
+    dec->last_pixel_ = (int)(src - data);
+    if (src == src_end) dec->state_ = READ_DATA;
+  }
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LTransform
+
+static void ClearTransform(VP8LTransform* const transform) {
+  WebPSafeFree(transform->data_);
+  transform->data_ = NULL;
+}
+
+// For security reason, we need to remap the color map to span
+// the total possible bundled values, and not just the num_colors.
+static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
+  int i;
+  const int final_num_colors = 1 << (8 >> transform->bits_);
+  uint32_t* const new_color_map =
+      (uint32_t*)WebPSafeMalloc((uint64_t)final_num_colors,
+                                sizeof(*new_color_map));
+  if (new_color_map == NULL) {
+    return 0;
+  } else {
+    uint8_t* const data = (uint8_t*)transform->data_;
+    uint8_t* const new_data = (uint8_t*)new_color_map;
+    new_color_map[0] = transform->data_[0];
+    for (i = 4; i < 4 * num_colors; ++i) {
+      // Equivalent to AddPixelEq(), on a byte-basis.
+      new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
+    }
+    for (; i < 4 * final_num_colors; ++i)
+      new_data[i] = 0;  // black tail.
+    WebPSafeFree(transform->data_);
+    transform->data_ = new_color_map;
+  }
+  return 1;
+}
+
+static int ReadTransform(int* const xsize, int const* ysize,
+                         VP8LDecoder* const dec) {
+  int ok = 1;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LTransform* transform = &dec->transforms_[dec->next_transform_];
+  const VP8LImageTransformType type =
+      (VP8LImageTransformType)VP8LReadBits(br, 2);
+
+  // Each transform type can only be present once in the stream.
+  if (dec->transforms_seen_ & (1U << type)) {
+    return 0;  // Already there, let's not accept the second same transform.
+  }
+  dec->transforms_seen_ |= (1U << type);
+
+  transform->type_ = type;
+  transform->xsize_ = *xsize;
+  transform->ysize_ = *ysize;
+  transform->data_ = NULL;
+  ++dec->next_transform_;
+  assert(dec->next_transform_ <= NUM_TRANSFORMS);
+
+  switch (type) {
+    case PREDICTOR_TRANSFORM:
+    case CROSS_COLOR_TRANSFORM:
+      transform->bits_ = VP8LReadBits(br, 3) + 2;
+      ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize_,
+                                               transform->bits_),
+                             VP8LSubSampleSize(transform->ysize_,
+                                               transform->bits_),
+                             0, dec, &transform->data_);
+      break;
+    case COLOR_INDEXING_TRANSFORM: {
+       const int num_colors = VP8LReadBits(br, 8) + 1;
+       const int bits = (num_colors > 16) ? 0
+                      : (num_colors > 4) ? 1
+                      : (num_colors > 2) ? 2
+                      : 3;
+       *xsize = VP8LSubSampleSize(transform->xsize_, bits);
+       transform->bits_ = bits;
+       ok = DecodeImageStream(num_colors, 1, 0, dec, &transform->data_);
+       ok = ok && ExpandColorMap(num_colors, transform);
+      break;
+    }
+    case SUBTRACT_GREEN:
+      break;
+    default:
+      assert(0);    // can't happen
+      break;
+  }
+
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LMetadata
+
+static void InitMetadata(VP8LMetadata* const hdr) {
+  assert(hdr);
+  memset(hdr, 0, sizeof(*hdr));
+}
+
+static void ClearMetadata(VP8LMetadata* const hdr) {
+  assert(hdr);
+
+  WebPSafeFree(hdr->huffman_image_);
+  VP8LHtreeGroupsFree(hdr->htree_groups_, hdr->num_htree_groups_);
+  VP8LColorCacheClear(&hdr->color_cache_);
+  InitMetadata(hdr);
+}
+
+// -----------------------------------------------------------------------------
+// VP8LDecoder
+
+VP8LDecoder* VP8LNew(void) {
+  VP8LDecoder* const dec = (VP8LDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  if (dec == NULL) return NULL;
+  dec->status_ = VP8_STATUS_OK;
+  dec->action_ = READ_DIM;
+  dec->state_ = READ_DIM;
+
+  VP8LDspInit();  // Init critical function pointers.
+
+  return dec;
+}
+
+void VP8LClear(VP8LDecoder* const dec) {
+  int i;
+  if (dec == NULL) return;
+  ClearMetadata(&dec->hdr_);
+
+  WebPSafeFree(dec->pixels_);
+  dec->pixels_ = NULL;
+  for (i = 0; i < dec->next_transform_; ++i) {
+    ClearTransform(&dec->transforms_[i]);
+  }
+  dec->next_transform_ = 0;
+  dec->transforms_seen_ = 0;
+
+  WebPSafeFree(dec->rescaler_memory);
+  dec->rescaler_memory = NULL;
+
+  dec->output_ = NULL;   // leave no trace behind
+}
+
+void VP8LDelete(VP8LDecoder* const dec) {
+  if (dec != NULL) {
+    VP8LClear(dec);
+    WebPSafeFree(dec);
+  }
+}
+
+static void UpdateDecoder(VP8LDecoder* const dec, int width, int height) {
+  VP8LMetadata* const hdr = &dec->hdr_;
+  const int num_bits = hdr->huffman_subsample_bits_;
+  dec->width_ = width;
+  dec->height_ = height;
+
+  hdr->huffman_xsize_ = VP8LSubSampleSize(width, num_bits);
+  hdr->huffman_mask_ = (num_bits == 0) ? ~0 : (1 << num_bits) - 1;
+}
+
+static int DecodeImageStream(int xsize, int ysize,
+                             int is_level0,
+                             VP8LDecoder* const dec,
+                             uint32_t** const decoded_data) {
+  int ok = 1;
+  int transform_xsize = xsize;
+  int transform_ysize = ysize;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LMetadata* const hdr = &dec->hdr_;
+  uint32_t* data = NULL;
+  int color_cache_bits = 0;
+
+  // Read the transforms (may recurse).
+  if (is_level0) {
+    while (ok && VP8LReadBits(br, 1)) {
+      ok = ReadTransform(&transform_xsize, &transform_ysize, dec);
+    }
+  }
+
+  // Color cache
+  if (ok && VP8LReadBits(br, 1)) {
+    color_cache_bits = VP8LReadBits(br, 4);
+    ok = (color_cache_bits >= 1 && color_cache_bits <= MAX_CACHE_BITS);
+    if (!ok) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+      goto End;
+    }
+  }
+
+  // Read the Huffman codes (may recurse).
+  ok = ok && ReadHuffmanCodes(dec, transform_xsize, transform_ysize,
+                              color_cache_bits, is_level0);
+  if (!ok) {
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    goto End;
+  }
+
+  // Finish setting up the color-cache
+  if (color_cache_bits > 0) {
+    hdr->color_cache_size_ = 1 << color_cache_bits;
+    if (!VP8LColorCacheInit(&hdr->color_cache_, color_cache_bits)) {
+      dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+      ok = 0;
+      goto End;
+    }
+  } else {
+    hdr->color_cache_size_ = 0;
+  }
+  UpdateDecoder(dec, transform_xsize, transform_ysize);
+
+  if (is_level0) {   // level 0 complete
+    dec->state_ = READ_HDR;
+    goto End;
+  }
+
+  {
+    const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize;
+    data = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*data));
+    if (data == NULL) {
+      dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+      ok = 0;
+      goto End;
+    }
+  }
+
+  // Use the Huffman trees to decode the LZ77 encoded data.
+  ok = DecodeImageData(dec, data, transform_xsize, transform_ysize,
+                       transform_ysize, NULL);
+  ok = ok && !br->error_;
+
+ End:
+
+  if (!ok) {
+    WebPSafeFree(data);
+    ClearMetadata(hdr);
+    // If not enough data (br.eos_) resulted in BIT_STREAM_ERROR, update the
+    // status appropriately.
+    if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR && dec->br_.eos_) {
+      dec->status_ = VP8_STATUS_SUSPENDED;
+    }
+  } else {
+    if (decoded_data != NULL) {
+      *decoded_data = data;
+    } else {
+      // We allocate image data in this function only for transforms. At level 0
+      // (that is: not the transforms), we shouldn't have allocated anything.
+      assert(data == NULL);
+      assert(is_level0);
+    }
+    dec->last_pixel_ = 0;  // Reset for future DECODE_DATA_FUNC() calls.
+    if (!is_level0) ClearMetadata(hdr);  // Clean up temporary data behind.
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// Allocate internal buffers dec->pixels_ and dec->argb_cache_.
+static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
+  const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
+  // Scratch buffer corresponding to top-prediction row for transforming the
+  // first row in the row-blocks. Not needed for paletted alpha.
+  const uint64_t cache_top_pixels = (uint16_t)final_width;
+  // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha.
+  const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
+  const uint64_t total_num_pixels =
+      num_pixels + cache_top_pixels + cache_pixels;
+
+  assert(dec->width_ <= final_width);
+  dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t));
+  if (dec->pixels_ == NULL) {
+    dec->argb_cache_ = NULL;    // for sanity check
+    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+    return 0;
+  }
+  dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels;
+  return 1;
+}
+
+static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
+  const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_;
+  dec->argb_cache_ = NULL;    // for sanity check
+  dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t));
+  if (dec->pixels_ == NULL) {
+    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+    return 0;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+// Special row-processing that only stores the alpha data.
+static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
+  const int num_rows = row - dec->last_row_;
+  const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
+
+  if (num_rows <= 0) return;  // Nothing to be done.
+  ApplyInverseTransforms(dec, num_rows, in);
+
+  // Extract alpha (which is stored in the green plane).
+  {
+    const int width = dec->io_->width;      // the final width (!= dec->width_)
+    const int cache_pixs = width * num_rows;
+    uint8_t* const dst = (uint8_t*)dec->io_->opaque + width * dec->last_row_;
+    const uint32_t* const src = dec->argb_cache_;
+    int i;
+    for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
+  }
+  dec->last_row_ = dec->last_out_row_ = row;
+}
+
+int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
+                          const uint8_t* const data, size_t data_size,
+                          uint8_t* const output) {
+  int ok = 0;
+  VP8LDecoder* dec;
+  VP8Io* io;
+  assert(alph_dec != NULL);
+  alph_dec->vp8l_dec_ = VP8LNew();
+  if (alph_dec->vp8l_dec_ == NULL) return 0;
+  dec = alph_dec->vp8l_dec_;
+
+  dec->width_ = alph_dec->width_;
+  dec->height_ = alph_dec->height_;
+  dec->io_ = &alph_dec->io_;
+  io = dec->io_;
+
+  VP8InitIo(io);
+  WebPInitCustomIo(NULL, io);  // Just a sanity Init. io won't be used.
+  io->opaque = output;
+  io->width = alph_dec->width_;
+  io->height = alph_dec->height_;
+
+  dec->status_ = VP8_STATUS_OK;
+  VP8LInitBitReader(&dec->br_, data, data_size);
+
+  dec->action_ = READ_HDR;
+  if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) {
+    goto Err;
+  }
+
+  // Special case: if alpha data uses only the color indexing transform and
+  // doesn't use color cache (a frequent case), we will use DecodeAlphaData()
+  // method that only needs allocation of 1 byte per pixel (alpha channel).
+  if (dec->next_transform_ == 1 &&
+      dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM &&
+      Is8bOptimizable(&dec->hdr_)) {
+    alph_dec->use_8b_decode = 1;
+    ok = AllocateInternalBuffers8b(dec);
+  } else {
+    // Allocate internal buffers (note that dec->width_ may have changed here).
+    alph_dec->use_8b_decode = 0;
+    ok = AllocateInternalBuffers32b(dec, alph_dec->width_);
+  }
+
+  if (!ok) goto Err;
+
+  dec->action_ = READ_DATA;
+  return 1;
+
+ Err:
+  VP8LDelete(alph_dec->vp8l_dec_);
+  alph_dec->vp8l_dec_ = NULL;
+  return 0;
+}
+
+int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
+  VP8LDecoder* const dec = alph_dec->vp8l_dec_;
+  assert(dec != NULL);
+  assert(dec->action_ == READ_DATA);
+  assert(last_row <= dec->height_);
+
+  if (dec->last_pixel_ == dec->width_ * dec->height_) {
+    return 1;  // done
+  }
+
+  // Decode (with special row processing).
+  return alph_dec->use_8b_decode ?
+      DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
+                      last_row) :
+      DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
+                      last_row, ExtractAlphaRows);
+}
+
+//------------------------------------------------------------------------------
+
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
+  int width, height, has_alpha;
+
+  if (dec == NULL) return 0;
+  if (io == NULL) {
+    dec->status_ = VP8_STATUS_INVALID_PARAM;
+    return 0;
+  }
+
+  dec->io_ = io;
+  dec->status_ = VP8_STATUS_OK;
+  VP8LInitBitReader(&dec->br_, io->data, io->data_size);
+  if (!ReadImageInfo(&dec->br_, &width, &height, &has_alpha)) {
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    goto Error;
+  }
+  dec->state_ = READ_DIM;
+  io->width = width;
+  io->height = height;
+
+  dec->action_ = READ_HDR;
+  if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error;
+  return 1;
+
+ Error:
+  VP8LClear(dec);
+  assert(dec->status_ != VP8_STATUS_OK);
+  return 0;
+}
+
+int VP8LDecodeImage(VP8LDecoder* const dec) {
+  VP8Io* io = NULL;
+  WebPDecParams* params = NULL;
+
+  // Sanity checks.
+  if (dec == NULL) return 0;
+
+  dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+  assert(dec->hdr_.htree_groups_ != NULL);
+  assert(dec->hdr_.num_htree_groups_ > 0);
+
+  io = dec->io_;
+  assert(io != NULL);
+  params = (WebPDecParams*)io->opaque;
+  assert(params != NULL);
+  dec->output_ = params->output;
+  assert(dec->output_ != NULL);
+
+  // Initialization.
+  if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
+    dec->status_ = VP8_STATUS_INVALID_PARAM;
+    goto Err;
+  }
+
+  if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;
+
+  if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
+
+  if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
+    // need the alpha-multiply functions for premultiplied output or rescaling
+    WebPInitAlphaProcessing();
+  }
+
+  // Decode.
+  dec->action_ = READ_DATA;
+  if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
+                       dec->height_, ProcessRows)) {
+    goto Err;
+  }
+
+  // Cleanup.
+  params->last_y = dec->last_out_row_;
+  VP8LClear(dec);
+  return 1;
+
+ Err:
+  VP8LClear(dec);
+  assert(dec->status_ != VP8_STATUS_OK);
+  return 0;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/dec/vp8li.h b/src/main/jni/libwebp/dec/vp8li.h
new file mode 100644
index 000000000..21c593feb
--- /dev/null
+++ b/src/main/jni/libwebp/dec/vp8li.h
@@ -0,0 +1,132 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Lossless decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora(vikaas.arora@gmail.com)
+
+#ifndef WEBP_DEC_VP8LI_H_
+#define WEBP_DEC_VP8LI_H_
+
+#include <string.h>     // for memcpy()
+#include "./webpi.h"
+#include "../utils/bit_reader.h"
+#include "../utils/color_cache.h"
+#include "../utils/huffman.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+  READ_DATA = 0,
+  READ_HDR = 1,
+  READ_DIM = 2
+} VP8LDecodeState;
+
+typedef struct VP8LTransform VP8LTransform;
+struct VP8LTransform {
+  VP8LImageTransformType type_;   // transform type.
+  int                    bits_;   // subsampling bits defining transform window.
+  int                    xsize_;  // transform window X index.
+  int                    ysize_;  // transform window Y index.
+  uint32_t              *data_;   // transform data.
+};
+
+typedef struct {
+  int             color_cache_size_;
+  VP8LColorCache  color_cache_;
+
+  int             huffman_mask_;
+  int             huffman_subsample_bits_;
+  int             huffman_xsize_;
+  uint32_t       *huffman_image_;
+  int             num_htree_groups_;
+  HTreeGroup     *htree_groups_;
+} VP8LMetadata;
+
+typedef struct VP8LDecoder VP8LDecoder;
+struct VP8LDecoder {
+  VP8StatusCode    status_;
+  VP8LDecodeState  action_;
+  VP8LDecodeState  state_;
+  VP8Io           *io_;
+
+  const WebPDecBuffer *output_;    // shortcut to io->opaque->output
+
+  uint32_t        *pixels_;        // Internal data: either uint8_t* for alpha
+                                   // or uint32_t* for BGRA.
+  uint32_t        *argb_cache_;    // Scratch buffer for temporary BGRA storage.
+
+  VP8LBitReader    br_;
+
+  int              width_;
+  int              height_;
+  int              last_row_;      // last input row decoded so far.
+  int              last_pixel_;    // last pixel decoded so far. However, it may
+                                   // not be transformed, scaled and
+                                   // color-converted yet.
+  int              last_out_row_;  // last row output so far.
+
+  VP8LMetadata     hdr_;
+
+  int              next_transform_;
+  VP8LTransform    transforms_[NUM_TRANSFORMS];
+  // or'd bitset storing the transforms types.
+  uint32_t         transforms_seen_;
+
+  uint8_t         *rescaler_memory;  // Working memory for rescaling work.
+  WebPRescaler    *rescaler;         // Common rescaler for all channels.
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+struct ALPHDecoder;  // Defined in dec/alphai.h.
+
+// in vp8l.c
+
+// Decodes image header for alpha data stored using lossless compression.
+// Returns false in case of error.
+int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec,
+                          const uint8_t* const data, size_t data_size,
+                          uint8_t* const output);
+
+// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are
+// already decoded in previous call(s), it will resume decoding from where it
+// was paused.
+// Returns false in case of bitstream error.
+int VP8LDecodeAlphaImageStream(struct ALPHDecoder* const alph_dec,
+                               int last_row);
+
+// Allocates and initialize a new lossless decoder instance.
+VP8LDecoder* VP8LNew(void);
+
+// Decodes the image header. Returns false in case of error.
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
+
+// Decodes an image. It's required to decode the lossless header before calling
+// this function. Returns false in case of error, with updated dec->status_.
+int VP8LDecodeImage(VP8LDecoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Preserves the dec->status_ value.
+void VP8LClear(VP8LDecoder* const dec);
+
+// Clears and deallocate a lossless decoder instance.
+void VP8LDelete(VP8LDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_VP8LI_H_ */
diff --git a/src/main/jni/libwebp/dec/webp.c b/src/main/jni/libwebp/dec/webp.c
new file mode 100644
index 000000000..33872ddad
--- /dev/null
+++ b/src/main/jni/libwebp/dec/webp.c
@@ -0,0 +1,836 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Main decoding functions for WEBP images.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "./webpi.h"
+#include "../webp/mux_types.h"  // ALPHA_FLAG
+
+//------------------------------------------------------------------------------
+// RIFF layout is:
+//   Offset  tag
+//   0...3   "RIFF" 4-byte tag
+//   4...7   size of image data (including metadata) starting at offset 8
+//   8...11  "WEBP"   our form-type signature
+// The RIFF container (12 bytes) is followed by appropriate chunks:
+//   12..15  "VP8 ": 4-bytes tags, signaling the use of VP8 video format
+//   16..19  size of the raw VP8 image data, starting at offset 20
+//   20....  the VP8 bytes
+// Or,
+//   12..15  "VP8L": 4-bytes tags, signaling the use of VP8L lossless format
+//   16..19  size of the raw VP8L image data, starting at offset 20
+//   20....  the VP8L bytes
+// Or,
+//   12..15  "VP8X": 4-bytes tags, describing the extended-VP8 chunk.
+//   16..19  size of the VP8X chunk starting at offset 20.
+//   20..23  VP8X flags bit-map corresponding to the chunk-types present.
+//   24..26  Width of the Canvas Image.
+//   27..29  Height of the Canvas Image.
+// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8,
+// VP8L, XMP, EXIF  ...)
+// All sizes are in little-endian order.
+// Note: chunk data size must be padded to multiple of 2 when written.
+
+static WEBP_INLINE uint32_t get_le24(const uint8_t* const data) {
+  return data[0] | (data[1] << 8) | (data[2] << 16);
+}
+
+static WEBP_INLINE uint32_t get_le32(const uint8_t* const data) {
+  return (uint32_t)get_le24(data) | (data[3] << 24);
+}
+
+// Validates the RIFF container (if detected) and skips over it.
+// If a RIFF container is detected, returns:
+//     VP8_STATUS_BITSTREAM_ERROR for invalid header,
+//     VP8_STATUS_NOT_ENOUGH_DATA for truncated data if have_all_data is true,
+// and VP8_STATUS_OK otherwise.
+// In case there are not enough bytes (partial RIFF container), return 0 for
+// *riff_size. Else return the RIFF size extracted from the header.
+static VP8StatusCode ParseRIFF(const uint8_t** const data,
+                               size_t* const data_size, int have_all_data,
+                               size_t* const riff_size) {
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(riff_size != NULL);
+
+  *riff_size = 0;  // Default: no RIFF present.
+  if (*data_size >= RIFF_HEADER_SIZE && !memcmp(*data, "RIFF", TAG_SIZE)) {
+    if (memcmp(*data + 8, "WEBP", TAG_SIZE)) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Wrong image file signature.
+    } else {
+      const uint32_t size = get_le32(*data + TAG_SIZE);
+      // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn").
+      if (size < TAG_SIZE + CHUNK_HEADER_SIZE) {
+        return VP8_STATUS_BITSTREAM_ERROR;
+      }
+      if (size > MAX_CHUNK_PAYLOAD) {
+        return VP8_STATUS_BITSTREAM_ERROR;
+      }
+      if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
+        return VP8_STATUS_NOT_ENOUGH_DATA;  // Truncated bitstream.
+      }
+      // We have a RIFF container. Skip it.
+      *riff_size = size;
+      *data += RIFF_HEADER_SIZE;
+      *data_size -= RIFF_HEADER_SIZE;
+    }
+  }
+  return VP8_STATUS_OK;
+}
+
+// Validates the VP8X header and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid VP8X header,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If a VP8X chunk is found, found_vp8x is set to true and *width_ptr,
+// *height_ptr and *flags_ptr are set to the corresponding values extracted
+// from the VP8X chunk.
+static VP8StatusCode ParseVP8X(const uint8_t** const data,
+                               size_t* const data_size,
+                               int* const found_vp8x,
+                               int* const width_ptr, int* const height_ptr,
+                               uint32_t* const flags_ptr) {
+  const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(found_vp8x != NULL);
+
+  *found_vp8x = 0;
+
+  if (*data_size < CHUNK_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+  }
+
+  if (!memcmp(*data, "VP8X", TAG_SIZE)) {
+    int width, height;
+    uint32_t flags;
+    const uint32_t chunk_size = get_le32(*data + TAG_SIZE);
+    if (chunk_size != VP8X_CHUNK_SIZE) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Wrong chunk size.
+    }
+
+    // Verify if enough data is available to validate the VP8X chunk.
+    if (*data_size < vp8x_size) {
+      return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+    }
+    flags = get_le32(*data + 8);
+    width = 1 + get_le24(*data + 12);
+    height = 1 + get_le24(*data + 15);
+    if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // image is too large
+    }
+
+    if (flags_ptr != NULL) *flags_ptr = flags;
+    if (width_ptr != NULL) *width_ptr = width;
+    if (height_ptr != NULL) *height_ptr = height;
+    // Skip over VP8X header bytes.
+    *data += vp8x_size;
+    *data_size -= vp8x_size;
+    *found_vp8x = 1;
+  }
+  return VP8_STATUS_OK;
+}
+
+// Skips to the next VP8/VP8L chunk header in the data given the size of the
+// RIFF chunk 'riff_size'.
+// Returns VP8_STATUS_BITSTREAM_ERROR if any invalid chunk size is encountered,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If an alpha chunk is found, *alpha_data and *alpha_size are set
+// appropriately.
+static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
+                                         size_t* const data_size,
+                                         size_t const riff_size,
+                                         const uint8_t** const alpha_data,
+                                         size_t* const alpha_size) {
+  const uint8_t* buf;
+  size_t buf_size;
+  uint32_t total_size = TAG_SIZE +           // "WEBP".
+                        CHUNK_HEADER_SIZE +  // "VP8Xnnnn".
+                        VP8X_CHUNK_SIZE;     // data.
+  assert(data != NULL);
+  assert(data_size != NULL);
+  buf = *data;
+  buf_size = *data_size;
+
+  assert(alpha_data != NULL);
+  assert(alpha_size != NULL);
+  *alpha_data = NULL;
+  *alpha_size = 0;
+
+  while (1) {
+    uint32_t chunk_size;
+    uint32_t disk_chunk_size;   // chunk_size with padding
+
+    *data = buf;
+    *data_size = buf_size;
+
+    if (buf_size < CHUNK_HEADER_SIZE) {  // Insufficient data.
+      return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+
+    chunk_size = get_le32(buf + TAG_SIZE);
+    if (chunk_size > MAX_CHUNK_PAYLOAD) {
+      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
+    }
+    // For odd-sized chunk-payload, there's one byte padding at the end.
+    disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
+    total_size += disk_chunk_size;
+
+    // Check that total bytes skipped so far does not exceed riff_size.
+    if (riff_size > 0 && (total_size > riff_size)) {
+      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
+    }
+
+    // Start of a (possibly incomplete) VP8/VP8L chunk implies that we have
+    // parsed all the optional chunks.
+    // Note: This check must occur before the check 'buf_size < disk_chunk_size'
+    // below to allow incomplete VP8/VP8L chunks.
+    if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
+        !memcmp(buf, "VP8L", TAG_SIZE)) {
+      return VP8_STATUS_OK;
+    }
+
+    if (buf_size < disk_chunk_size) {             // Insufficient data.
+      return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+
+    if (!memcmp(buf, "ALPH", TAG_SIZE)) {         // A valid ALPH header.
+      *alpha_data = buf + CHUNK_HEADER_SIZE;
+      *alpha_size = chunk_size;
+    }
+
+    // We have a full and valid chunk; skip it.
+    buf += disk_chunk_size;
+    buf_size -= disk_chunk_size;
+  }
+}
+
+// Validates the VP8/VP8L Header ("VP8 nnnn" or "VP8L nnnn") and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid (chunk larger than
+//         riff_size) VP8/VP8L header,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If a VP8/VP8L chunk is found, *chunk_size is set to the total number of bytes
+// extracted from the VP8/VP8L chunk header.
+// The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data.
+static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
+                                    size_t* const data_size, int have_all_data,
+                                    size_t riff_size, size_t* const chunk_size,
+                                    int* const is_lossless) {
+  const uint8_t* const data = *data_ptr;
+  const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE);
+  const int is_vp8l = !memcmp(data, "VP8L", TAG_SIZE);
+  const uint32_t minimal_size =
+      TAG_SIZE + CHUNK_HEADER_SIZE;  // "WEBP" + "VP8 nnnn" OR
+                                     // "WEBP" + "VP8Lnnnn"
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(chunk_size != NULL);
+  assert(is_lossless != NULL);
+
+  if (*data_size < CHUNK_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+  }
+
+  if (is_vp8 || is_vp8l) {
+    // Bitstream contains VP8/VP8L header.
+    const uint32_t size = get_le32(data + TAG_SIZE);
+    if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Inconsistent size information.
+    }
+    if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
+      return VP8_STATUS_NOT_ENOUGH_DATA;  // Truncated bitstream.
+    }
+    // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header.
+    *chunk_size = size;
+    *data_ptr += CHUNK_HEADER_SIZE;
+    *data_size -= CHUNK_HEADER_SIZE;
+    *is_lossless = is_vp8l;
+  } else {
+    // Raw VP8/VP8L bitstream (no header).
+    *is_lossless = VP8LCheckSignature(data, *data_size);
+    *chunk_size = *data_size;
+  }
+
+  return VP8_STATUS_OK;
+}
+
+//------------------------------------------------------------------------------
+
+// Fetch '*width', '*height', '*has_alpha' and fill out 'headers' based on
+// 'data'. All the output parameters may be NULL. If 'headers' is NULL only the
+// minimal amount will be read to fetch the remaining parameters.
+// If 'headers' is non-NULL this function will attempt to locate both alpha
+// data (with or without a VP8X chunk) and the bitstream chunk (VP8/VP8L).
+// Note: The following chunk sequences (before the raw VP8/VP8L data) are
+// considered valid by this function:
+// RIFF + VP8(L)
+// RIFF + VP8X + (optional chunks) + VP8(L)
+// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
+// VP8(L)     <-- Not a valid WebP format: only allowed for internal purpose.
+static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
+                                          size_t data_size,
+                                          int* const width,
+                                          int* const height,
+                                          int* const has_alpha,
+                                          int* const has_animation,
+                                          int* const format,
+                                          WebPHeaderStructure* const headers) {
+  int canvas_width = 0;
+  int canvas_height = 0;
+  int image_width = 0;
+  int image_height = 0;
+  int found_riff = 0;
+  int found_vp8x = 0;
+  int animation_present = 0;
+  int fragments_present = 0;
+  const int have_all_data = (headers != NULL) ? headers->have_all_data : 0;
+
+  VP8StatusCode status;
+  WebPHeaderStructure hdrs;
+
+  if (data == NULL || data_size < RIFF_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;
+  }
+  memset(&hdrs, 0, sizeof(hdrs));
+  hdrs.data = data;
+  hdrs.data_size = data_size;
+
+  // Skip over RIFF header.
+  status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size);
+  if (status != VP8_STATUS_OK) {
+    return status;   // Wrong RIFF header / insufficient data.
+  }
+  found_riff = (hdrs.riff_size > 0);
+
+  // Skip over VP8X.
+  {
+    uint32_t flags = 0;
+    status = ParseVP8X(&data, &data_size, &found_vp8x,
+                       &canvas_width, &canvas_height, &flags);
+    if (status != VP8_STATUS_OK) {
+      return status;  // Wrong VP8X / insufficient data.
+    }
+    animation_present = !!(flags & ANIMATION_FLAG);
+    fragments_present = !!(flags & FRAGMENTS_FLAG);
+    if (!found_riff && found_vp8x) {
+      // Note: This restriction may be removed in the future, if it becomes
+      // necessary to send VP8X chunk to the decoder.
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
+    if (has_animation != NULL) *has_animation = animation_present;
+    if (format != NULL) *format = 0;   // default = undefined
+
+    image_width = canvas_width;
+    image_height = canvas_height;
+    if (found_vp8x && (animation_present || fragments_present) &&
+        headers == NULL) {
+      status = VP8_STATUS_OK;
+      goto ReturnWidthHeight;  // Just return features from VP8X header.
+    }
+  }
+
+  if (data_size < TAG_SIZE) {
+    status = VP8_STATUS_NOT_ENOUGH_DATA;
+    goto ReturnWidthHeight;
+  }
+
+  // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH".
+  if ((found_riff && found_vp8x) ||
+      (!found_riff && !found_vp8x && !memcmp(data, "ALPH", TAG_SIZE))) {
+    status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size,
+                                 &hdrs.alpha_data, &hdrs.alpha_data_size);
+    if (status != VP8_STATUS_OK) {
+      goto ReturnWidthHeight;  // Invalid chunk size / insufficient data.
+    }
+  }
+
+  // Skip over VP8/VP8L header.
+  status = ParseVP8Header(&data, &data_size, have_all_data, hdrs.riff_size,
+                          &hdrs.compressed_size, &hdrs.is_lossless);
+  if (status != VP8_STATUS_OK) {
+    goto ReturnWidthHeight;  // Wrong VP8/VP8L chunk-header / insufficient data.
+  }
+  if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) {
+    return VP8_STATUS_BITSTREAM_ERROR;
+  }
+
+  if (format != NULL && !(animation_present || fragments_present)) {
+    *format = hdrs.is_lossless ? 2 : 1;
+  }
+
+  if (!hdrs.is_lossless) {
+    if (data_size < VP8_FRAME_HEADER_SIZE) {
+      status = VP8_STATUS_NOT_ENOUGH_DATA;
+      goto ReturnWidthHeight;
+    }
+    // Validates raw VP8 data.
+    if (!VP8GetInfo(data, data_size, (uint32_t)hdrs.compressed_size,
+                    &image_width, &image_height)) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+  } else {
+    if (data_size < VP8L_FRAME_HEADER_SIZE) {
+      status = VP8_STATUS_NOT_ENOUGH_DATA;
+      goto ReturnWidthHeight;
+    }
+    // Validates raw VP8L data.
+    if (!VP8LGetInfo(data, data_size, &image_width, &image_height, has_alpha)) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+  }
+  // Validates image size coherency.
+  if (found_vp8x) {
+    if (canvas_width != image_width || canvas_height != image_height) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+  }
+  if (headers != NULL) {
+    *headers = hdrs;
+    headers->offset = data - headers->data;
+    assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
+    assert(headers->offset == headers->data_size - data_size);
+  }
+ ReturnWidthHeight:
+  if (status == VP8_STATUS_OK ||
+      (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) {
+    if (has_alpha != NULL) {
+      // If the data did not contain a VP8X/VP8L chunk the only definitive way
+      // to set this is by looking for alpha data (from an ALPH chunk).
+      *has_alpha |= (hdrs.alpha_data != NULL);
+    }
+    if (width != NULL) *width = image_width;
+    if (height != NULL) *height = image_height;
+    return VP8_STATUS_OK;
+  } else {
+    return status;
+  }
+}
+
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
+  VP8StatusCode status;
+  int has_animation = 0;
+  assert(headers != NULL);
+  // fill out headers, ignore width/height/has_alpha.
+  status = ParseHeadersInternal(headers->data, headers->data_size,
+                                NULL, NULL, NULL, &has_animation,
+                                NULL, headers);
+  if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    // TODO(jzern): full support of animation frames will require API additions.
+    if (has_animation) {
+      status = VP8_STATUS_UNSUPPORTED_FEATURE;
+    }
+  }
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// WebPDecParams
+
+void WebPResetDecParams(WebPDecParams* const params) {
+  if (params != NULL) {
+    memset(params, 0, sizeof(*params));
+  }
+}
+
+//------------------------------------------------------------------------------
+// "Into" decoding variants
+
+// Main flow
+static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
+                                WebPDecParams* const params) {
+  VP8StatusCode status;
+  VP8Io io;
+  WebPHeaderStructure headers;
+
+  headers.data = data;
+  headers.data_size = data_size;
+  headers.have_all_data = 1;
+  status = WebPParseHeaders(&headers);   // Process Pre-VP8 chunks.
+  if (status != VP8_STATUS_OK) {
+    return status;
+  }
+
+  assert(params != NULL);
+  VP8InitIo(&io);
+  io.data = headers.data + headers.offset;
+  io.data_size = headers.data_size - headers.offset;
+  WebPInitCustomIo(params, &io);  // Plug the I/O functions.
+
+  if (!headers.is_lossless) {
+    VP8Decoder* const dec = VP8New();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    dec->alpha_data_ = headers.alpha_data;
+    dec->alpha_data_size_ = headers.alpha_data_size;
+
+    // Decode bitstream header, update io->width/io->height.
+    if (!VP8GetHeaders(dec, &io)) {
+      status = dec->status_;   // An error occurred. Grab error status.
+    } else {
+      // Allocate/check output buffers.
+      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
+                                     params->output);
+      if (status == VP8_STATUS_OK) {  // Decode
+        // This change must be done before calling VP8Decode()
+        dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
+                                             io.width, io.height);
+        VP8InitDithering(params->options, dec);
+        if (!VP8Decode(dec, &io)) {
+          status = dec->status_;
+        }
+      }
+    }
+    VP8Delete(dec);
+  } else {
+    VP8LDecoder* const dec = VP8LNew();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    if (!VP8LDecodeHeader(dec, &io)) {
+      status = dec->status_;   // An error occurred. Grab error status.
+    } else {
+      // Allocate/check output buffers.
+      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
+                                     params->output);
+      if (status == VP8_STATUS_OK) {  // Decode
+        if (!VP8LDecodeImage(dec)) {
+          status = dec->status_;
+        }
+      }
+    }
+    VP8LDelete(dec);
+  }
+
+  if (status != VP8_STATUS_OK) {
+    WebPFreeDecBuffer(params->output);
+  }
+
+#if WEBP_DECODER_ABI_VERSION > 0x0203
+  if (params->options != NULL && params->options->flip) {
+    status = WebPFlipBuffer(params->output);
+  }
+#endif
+  return status;
+}
+
+// Helpers
+static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace,
+                                     const uint8_t* const data,
+                                     size_t data_size,
+                                     uint8_t* const rgba,
+                                     int stride, size_t size) {
+  WebPDecParams params;
+  WebPDecBuffer buf;
+  if (rgba == NULL) {
+    return NULL;
+  }
+  WebPInitDecBuffer(&buf);
+  WebPResetDecParams(&params);
+  params.output = &buf;
+  buf.colorspace    = colorspace;
+  buf.u.RGBA.rgba   = rgba;
+  buf.u.RGBA.stride = stride;
+  buf.u.RGBA.size   = size;
+  buf.is_external_memory = 1;
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  return rgba;
+}
+
+uint8_t* WebPDecodeRGBInto(const uint8_t* data, size_t data_size,
+                           uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_RGB, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeRGBAInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_rgbA, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeARGBInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_ARGB, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeBGRInto(const uint8_t* data, size_t data_size,
+                           uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_BGR, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeBGRAInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size,
+                           uint8_t* luma, size_t luma_size, int luma_stride,
+                           uint8_t* u, size_t u_size, int u_stride,
+                           uint8_t* v, size_t v_size, int v_stride) {
+  WebPDecParams params;
+  WebPDecBuffer output;
+  if (luma == NULL) return NULL;
+  WebPInitDecBuffer(&output);
+  WebPResetDecParams(&params);
+  params.output = &output;
+  output.colorspace      = MODE_YUV;
+  output.u.YUVA.y        = luma;
+  output.u.YUVA.y_stride = luma_stride;
+  output.u.YUVA.y_size   = luma_size;
+  output.u.YUVA.u        = u;
+  output.u.YUVA.u_stride = u_stride;
+  output.u.YUVA.u_size   = u_size;
+  output.u.YUVA.v        = v;
+  output.u.YUVA.v_stride = v_stride;
+  output.u.YUVA.v_size   = v_size;
+  output.is_external_memory = 1;
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  return luma;
+}
+
+//------------------------------------------------------------------------------
+
+static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* const data,
+                       size_t data_size, int* const width, int* const height,
+                       WebPDecBuffer* const keep_info) {
+  WebPDecParams params;
+  WebPDecBuffer output;
+
+  WebPInitDecBuffer(&output);
+  WebPResetDecParams(&params);
+  params.output = &output;
+  output.colorspace = mode;
+
+  // Retrieve (and report back) the required dimensions from bitstream.
+  if (!WebPGetInfo(data, data_size, &output.width, &output.height)) {
+    return NULL;
+  }
+  if (width != NULL) *width = output.width;
+  if (height != NULL) *height = output.height;
+
+  // Decode
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  if (keep_info != NULL) {    // keep track of the side-info
+    WebPCopyDecBuffer(&output, keep_info);
+  }
+  // return decoded samples (don't clear 'output'!)
+  return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y;
+}
+
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
+                       int* width, int* height) {
+  return Decode(MODE_RGB, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_RGBA, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_ARGB, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
+                       int* width, int* height) {
+  return Decode(MODE_BGR, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_BGRA, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
+                       int* width, int* height, uint8_t** u, uint8_t** v,
+                       int* stride, int* uv_stride) {
+  WebPDecBuffer output;   // only to preserve the side-infos
+  uint8_t* const out = Decode(MODE_YUV, data, data_size,
+                              width, height, &output);
+
+  if (out != NULL) {
+    const WebPYUVABuffer* const buf = &output.u.YUVA;
+    *u = buf->u;
+    *v = buf->v;
+    *stride = buf->y_stride;
+    *uv_stride = buf->u_stride;
+    assert(buf->u_stride == buf->v_stride);
+  }
+  return out;
+}
+
+static void DefaultFeatures(WebPBitstreamFeatures* const features) {
+  assert(features != NULL);
+  memset(features, 0, sizeof(*features));
+}
+
+static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
+                                 WebPBitstreamFeatures* const features) {
+  if (features == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  DefaultFeatures(features);
+
+  // Only parse enough of the data to retrieve the features.
+  return ParseHeadersInternal(data, data_size,
+                              &features->width, &features->height,
+                              &features->has_alpha, &features->has_animation,
+                              &features->format, NULL);
+}
+
+//------------------------------------------------------------------------------
+// WebPGetInfo()
+
+int WebPGetInfo(const uint8_t* data, size_t data_size,
+                int* width, int* height) {
+  WebPBitstreamFeatures features;
+
+  if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) {
+    return 0;
+  }
+
+  if (width != NULL) {
+    *width  = features.width;
+  }
+  if (height != NULL) {
+    *height = features.height;
+  }
+
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Advance decoding API
+
+int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
+                                  int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;   // version mismatch
+  }
+  if (config == NULL) {
+    return 0;
+  }
+  memset(config, 0, sizeof(*config));
+  DefaultFeatures(&config->input);
+  WebPInitDecBuffer(&config->output);
+  return 1;
+}
+
+VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
+                                      WebPBitstreamFeatures* features,
+                                      int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return VP8_STATUS_INVALID_PARAM;   // version mismatch
+  }
+  if (features == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  return GetFeatures(data, data_size, features);
+}
+
+VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
+                         WebPDecoderConfig* config) {
+  WebPDecParams params;
+  VP8StatusCode status;
+
+  if (config == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+
+  status = GetFeatures(data, data_size, &config->input);
+  if (status != VP8_STATUS_OK) {
+    if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Not-enough-data treated as error.
+    }
+    return status;
+  }
+
+  WebPResetDecParams(&params);
+  params.output = &config->output;
+  params.options = &config->options;
+  status = DecodeInto(data, data_size, &params);
+
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// Cropping and rescaling.
+
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
+                          VP8Io* const io, WEBP_CSP_MODE src_colorspace) {
+  const int W = io->width;
+  const int H = io->height;
+  int x = 0, y = 0, w = W, h = H;
+
+  // Cropping
+  io->use_cropping = (options != NULL) && (options->use_cropping > 0);
+  if (io->use_cropping) {
+    w = options->crop_width;
+    h = options->crop_height;
+    x = options->crop_left;
+    y = options->crop_top;
+    if (!WebPIsRGBMode(src_colorspace)) {   // only snap for YUV420
+      x &= ~1;
+      y &= ~1;
+    }
+    if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
+      return 0;  // out of frame boundary error
+    }
+  }
+  io->crop_left   = x;
+  io->crop_top    = y;
+  io->crop_right  = x + w;
+  io->crop_bottom = y + h;
+  io->mb_w = w;
+  io->mb_h = h;
+
+  // Scaling
+  io->use_scaling = (options != NULL) && (options->use_scaling > 0);
+  if (io->use_scaling) {
+    if (options->scaled_width <= 0 || options->scaled_height <= 0) {
+      return 0;
+    }
+    io->scaled_width = options->scaled_width;
+    io->scaled_height = options->scaled_height;
+  }
+
+  // Filter
+  io->bypass_filtering = options && options->bypass_filtering;
+
+  // Fancy upsampler
+#ifdef FANCY_UPSAMPLING
+  io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling);
+#endif
+
+  if (io->use_scaling) {
+    // disable filter (only for large downscaling ratio).
+    io->bypass_filtering = (io->scaled_width < W * 3 / 4) &&
+                           (io->scaled_height < H * 3 / 4);
+    io->fancy_upsampling = 0;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/dec/webpi.h b/src/main/jni/libwebp/dec/webpi.h
new file mode 100644
index 000000000..457c72eda
--- /dev/null
+++ b/src/main/jni/libwebp/dec/webpi.h
@@ -0,0 +1,120 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Internal header: WebP decoding parameters and custom IO on buffer
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#ifndef WEBP_DEC_WEBPI_H_
+#define WEBP_DEC_WEBPI_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "../utils/rescaler.h"
+#include "./decode_vp8.h"
+
+//------------------------------------------------------------------------------
+// WebPDecParams: Decoding output parameters. Transient internal object.
+
+typedef struct WebPDecParams WebPDecParams;
+typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p);
+typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos);
+
+struct WebPDecParams {
+  WebPDecBuffer* output;             // output buffer.
+  uint8_t* tmp_y, *tmp_u, *tmp_v;    // cache for the fancy upsampler
+                                     // or used for tmp rescaling
+
+  int last_y;                 // coordinate of the line that was last output
+  const WebPDecoderOptions* options;  // if not NULL, use alt decoding features
+  // rescalers
+  WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a;
+  void* memory;                  // overall scratch memory for the output work.
+
+  OutputFunc emit;               // output RGB or YUV samples
+  OutputFunc emit_alpha;         // output alpha channel
+  OutputRowFunc emit_alpha_row;  // output one line of rescaled alpha values
+};
+
+// Should be called first, before any use of the WebPDecParams object.
+void WebPResetDecParams(WebPDecParams* const params);
+
+//------------------------------------------------------------------------------
+// Header parsing helpers
+
+// Structure storing a description of the RIFF headers.
+typedef struct {
+  const uint8_t* data;         // input buffer
+  size_t data_size;            // input buffer size
+  int have_all_data;           // true if all data is known to be available
+  size_t offset;               // offset to main data chunk (VP8 or VP8L)
+  const uint8_t* alpha_data;   // points to alpha chunk (if present)
+  size_t alpha_data_size;      // alpha chunk size
+  size_t compressed_size;      // VP8/VP8L compressed data size
+  size_t riff_size;            // size of the riff payload (or 0 if absent)
+  int is_lossless;             // true if a VP8L chunk is present
+} WebPHeaderStructure;
+
+// Skips over all valid chunks prior to the first VP8/VP8L frame header.
+// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk),
+// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE
+// in the case of non-decodable features (animation for instance).
+// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless
+// fields are updated appropriately upon success.
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
+
+//------------------------------------------------------------------------------
+// Misc utils
+
+// Initializes VP8Io with custom setup, io and teardown functions. The default
+// hooks will use the supplied 'params' as io->opaque handle.
+void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io);
+
+// Setup crop_xxx fields, mb_w and mb_h in io. 'src_colorspace' refers
+// to the *compressed* format, not the output one.
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
+                          VP8Io* const io, WEBP_CSP_MODE src_colorspace);
+
+//------------------------------------------------------------------------------
+// Internal functions regarding WebPDecBuffer memory (in buffer.c).
+// Don't really need to be externally visible for now.
+
+// Prepare 'buffer' with the requested initial dimensions width/height.
+// If no external storage is supplied, initializes buffer by allocating output
+// memory and setting up the stride information. Validate the parameters. Return
+// an error code in case of problem (no memory, or invalid stride / size /
+// dimension / etc.). If *options is not NULL, also verify that the options'
+// parameters are valid and apply them to the width/height dimensions of the
+// output buffer. This takes cropping / scaling / rotation into account.
+// Also incorporates the options->flip flag to flip the buffer parameters if
+// needed.
+VP8StatusCode WebPAllocateDecBuffer(int width, int height,
+                                    const WebPDecoderOptions* const options,
+                                    WebPDecBuffer* const buffer);
+
+// Flip buffer vertically by negating the various strides.
+VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer);
+
+// Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the
+// memory (still held by 'src').
+void WebPCopyDecBuffer(const WebPDecBuffer* const src,
+                       WebPDecBuffer* const dst);
+
+// Copy and transfer ownership from src to dst (beware of parameter order!)
+void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_WEBPI_H_ */
diff --git a/src/main/jni/libwebp/dsp/alpha_processing.c b/src/main/jni/libwebp/dsp/alpha_processing.c
new file mode 100644
index 000000000..d0f7a6cca
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/alpha_processing.c
@@ -0,0 +1,329 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include "./dsp.h"
+
+// Tables can be faster on some platform but incur some extra binary size (~2k).
+// #define USE_TABLES_FOR_ALPHA_MULT
+
+// -----------------------------------------------------------------------------
+
+#define MFIX 24    // 24bit fixed-point arithmetic
+#define HALF ((1u << MFIX) >> 1)
+#define KINV_255 ((1u << MFIX) / 255u)
+
+static uint32_t Mult(uint8_t x, uint32_t mult) {
+  const uint32_t v = (x * mult + HALF) >> MFIX;
+  assert(v <= 255);  // <- 24bit precision is enough to ensure that.
+  return v;
+}
+
+#ifdef USE_TABLES_FOR_ALPHA_MULT
+
+static const uint32_t kMultTables[2][256] = {
+  {    // (255u << MFIX) / alpha
+    0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
+    0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
+    0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
+    0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
+    0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
+    0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
+    0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
+    0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
+    0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
+    0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
+    0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
+    0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
+    0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
+    0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
+    0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
+    0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
+    0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
+    0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
+    0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
+    0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
+    0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
+    0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
+    0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
+    0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
+    0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
+    0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
+    0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
+    0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
+    0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
+    0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
+    0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
+    0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
+    0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
+    0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
+    0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
+    0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
+    0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
+    0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
+    0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
+    0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
+    0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
+    0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
+    0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
+  {   // alpha * KINV_255
+    0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
+    0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
+    0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
+    0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
+    0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
+    0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
+    0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
+    0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
+    0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
+    0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
+    0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
+    0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
+    0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
+    0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
+    0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
+    0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
+    0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
+    0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
+    0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
+    0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
+    0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
+    0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
+    0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
+    0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
+    0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
+    0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
+    0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
+    0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
+    0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
+    0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
+    0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
+    0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
+    0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
+    0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
+    0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
+    0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
+    0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
+    0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
+    0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
+    0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
+    0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
+    0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
+    0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
+};
+
+static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
+  return kMultTables[!inverse][a];
+}
+
+#else
+
+static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
+  return inverse ? (255u << MFIX) / a : a * KINV_255;
+}
+
+#endif    // USE_TABLES_FOR_ALPHA_MULT
+
+static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    const uint32_t argb = ptr[x];
+    if (argb < 0xff000000u) {      // alpha < 255
+      if (argb <= 0x00ffffffu) {   // alpha == 0
+        ptr[x] = 0;
+      } else {
+        const uint32_t alpha = (argb >> 24) & 0xff;
+        const uint32_t scale = GetScale(alpha, inverse);
+        uint32_t out = argb & 0xff000000u;
+        out |= Mult(argb >>  0, scale) <<  0;
+        out |= Mult(argb >>  8, scale) <<  8;
+        out |= Mult(argb >> 16, scale) << 16;
+        ptr[x] = out;
+      }
+    }
+  }
+}
+
+static void MultRow(uint8_t* const ptr, const uint8_t* const alpha,
+                    int width, int inverse) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    const uint32_t a = alpha[x];
+    if (a != 255) {
+      if (a == 0) {
+        ptr[x] = 0;
+      } else {
+        const uint32_t scale = GetScale(a, inverse);
+        ptr[x] = Mult(ptr[x], scale);
+      }
+    }
+  }
+}
+
+#undef KINV_255
+#undef HALF
+#undef MFIX
+
+void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
+void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
+                    int width, int inverse);
+
+//------------------------------------------------------------------------------
+// Generic per-plane calls
+
+void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
+                      int inverse) {
+  int n;
+  for (n = 0; n < num_rows; ++n) {
+    WebPMultARGBRow((uint32_t*)ptr, width, inverse);
+    ptr += stride;
+  }
+}
+
+void WebPMultRows(uint8_t* ptr, int stride,
+                  const uint8_t* alpha, int alpha_stride,
+                  int width, int num_rows, int inverse) {
+  int n;
+  for (n = 0; n < num_rows; ++n) {
+    WebPMultRow(ptr, alpha, width, inverse);
+    ptr += stride;
+    alpha += alpha_stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Premultiplied modes
+
+// non dithered-modes
+
+// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
+// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
+// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
+#if 1     // (int)(x * a / 255.)
+#define MULTIPLIER(a)   ((a) * 32897U)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+#else     // (int)(x * a / 255. + .5)
+#define MULTIPLIER(a) ((a) * 65793U)
+#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
+#endif
+
+static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
+                               int w, int h, int stride) {
+  while (h-- > 0) {
+    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+    int i;
+    for (i = 0; i < w; ++i) {
+      const uint32_t a = alpha[4 * i];
+      if (a != 0xff) {
+        const uint32_t mult = MULTIPLIER(a);
+        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+      }
+    }
+    rgba += stride;
+  }
+}
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+// rgbA4444
+
+#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
+
+static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
+  return (x & 0xf0) | (x >> 4);
+}
+
+static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
+  return (x & 0x0f) | (x << 4);
+}
+
+static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
+  return (x * m) >> 16;
+}
+
+static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
+                                               int w, int h, int stride,
+                                               int rg_byte_pos /* 0 or 1 */) {
+  while (h-- > 0) {
+    int i;
+    for (i = 0; i < w; ++i) {
+      const uint32_t rg = rgba4444[2 * i + rg_byte_pos];
+      const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)];
+      const uint8_t a = ba & 0x0f;
+      const uint32_t mult = MULTIPLIER(a);
+      const uint8_t r = multiply(dither_hi(rg), mult);
+      const uint8_t g = multiply(dither_lo(rg), mult);
+      const uint8_t b = multiply(dither_hi(ba), mult);
+      rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f);
+      rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a;
+    }
+    rgba4444 += stride;
+  }
+}
+#undef MULTIPLIER
+
+static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
+                                   int w, int h, int stride) {
+#ifdef WEBP_SWAP_16BIT_CSP
+  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
+#else
+  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
+#endif
+}
+
+static int ExtractAlpha(const uint8_t* argb, int argb_stride,
+                        int width, int height,
+                        uint8_t* alpha, int alpha_stride) {
+  uint8_t alpha_mask = 0xff;
+  int i, j;
+
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < width; ++i) {
+      const uint8_t alpha_value = argb[4 * i];
+      alpha[i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  return (alpha_mask == 0xff);
+}
+
+void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
+void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
+int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
+
+//------------------------------------------------------------------------------
+// Init function
+
+extern void WebPInitAlphaProcessingSSE2(void);
+
+void WebPInitAlphaProcessing(void) {
+  WebPMultARGBRow = MultARGBRow;
+  WebPMultRow = MultRow;
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
+  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
+  WebPExtractAlpha = ExtractAlpha;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitAlphaProcessingSSE2();
+    }
+#endif
+  }
+}
diff --git a/src/main/jni/libwebp/dsp/alpha_processing_sse2.c b/src/main/jni/libwebp/dsp/alpha_processing_sse2.c
new file mode 100644
index 000000000..3d0a9b579
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/alpha_processing_sse2.c
@@ -0,0 +1,77 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+
+//------------------------------------------------------------------------------
+
+static int ExtractAlpha(const uint8_t* argb, int argb_stride,
+                        int width, int height,
+                        uint8_t* alpha, int alpha_stride) {
+  // alpha_and stores an 'and' operation of all the alpha[] values. The final
+  // value is not 0xff if any of the alpha[] is not equal to 0xff.
+  uint32_t alpha_and = 0xff;
+  int i, j;
+  const __m128i a_mask = _mm_set1_epi32(0xffu);  // to preserve alpha
+  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
+  __m128i all_alphas = all_0xff;
+
+  // We must be able to access 3 extra bytes after the last written byte
+  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
+  // last byte of the quadruplet.
+  const int limit = (width - 1) & ~7;
+
+  for (j = 0; j < height; ++j) {
+    const __m128i* src = (const __m128i*)argb;
+    for (i = 0; i < limit; i += 8) {
+      // load 32 argb bytes
+      const __m128i a0 = _mm_loadu_si128(src + 0);
+      const __m128i a1 = _mm_loadu_si128(src + 1);
+      const __m128i b0 = _mm_and_si128(a0, a_mask);
+      const __m128i b1 = _mm_and_si128(a1, a_mask);
+      const __m128i c0 = _mm_packs_epi32(b0, b1);
+      const __m128i d0 = _mm_packus_epi16(c0, c0);
+      // store
+      _mm_storel_epi64((__m128i*)&alpha[i], d0);
+      // accumulate eight alpha 'and' in parallel
+      all_alphas = _mm_and_si128(all_alphas, d0);
+      src += 2;
+    }
+    for (; i < width; ++i) {
+      const uint32_t alpha_value = argb[4 * i];
+      alpha[i] = alpha_value;
+      alpha_and &= alpha_value;
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  // Combine the eight alpha 'and' into a 8-bit mask.
+  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+  return (alpha_and == 0xff);
+}
+
+#endif   // WEBP_USE_SSE2
+
+//------------------------------------------------------------------------------
+// Init function
+
+extern void WebPInitAlphaProcessingSSE2(void);
+
+void WebPInitAlphaProcessingSSE2(void) {
+#if defined(WEBP_USE_SSE2)
+  WebPExtractAlpha = ExtractAlpha;
+#endif
+}
diff --git a/src/main/jni/libwebp/dsp/cpu.c b/src/main/jni/libwebp/dsp/cpu.c
new file mode 100644
index 000000000..8754f8746
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/cpu.c
@@ -0,0 +1,130 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// CPU detection
+//
+// Author: Christian Duvivier (cduvivier@google.com)
+
+#include "./dsp.h"
+
+#if defined(__ANDROID__)
+#include <cpu-features.h>
+#endif
+
+//------------------------------------------------------------------------------
+// SSE2 detection.
+//
+
+// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
+#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+  __asm__ volatile (
+    "mov %%ebx, %%edi\n"
+    "cpuid\n"
+    "xchg %%edi, %%ebx\n"
+    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+    : "a"(info_type));
+}
+#elif defined(__i386__) || defined(__x86_64__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+  __asm__ volatile (
+    "cpuid\n"
+    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+    : "a"(info_type));
+}
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
+#define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
+#elif defined(WEBP_MSC_SSE2)
+#define GetCPUInfo __cpuid
+#endif
+
+// NaCl has no support for xgetbv or the raw opcode.
+#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
+static WEBP_INLINE uint64_t xgetbv(void) {
+  const uint32_t ecx = 0;
+  uint32_t eax, edx;
+  // Use the raw opcode for xgetbv for compatibility with older toolchains.
+  __asm__ volatile (
+    ".byte 0x0f, 0x01, 0xd0\n"
+    : "=a"(eax), "=d"(edx) : "c" (ecx));
+  return ((uint64_t)edx << 32) | eax;
+}
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
+#define xgetbv() _xgetbv(0)
+#elif defined(_MSC_VER) && defined(_M_IX86)
+static WEBP_INLINE uint64_t xgetbv(void) {
+  uint32_t eax_, edx_;
+  __asm {
+    xor ecx, ecx  // ecx = 0
+    // Use the raw opcode for xgetbv for compatibility with older toolchains.
+    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
+    mov eax_, eax
+    mov edx_, edx
+  }
+  return ((uint64_t)edx_ << 32) | eax_;
+}
+#else
+#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
+static int x86CPUInfo(CPUFeature feature) {
+  int cpu_info[4];
+  GetCPUInfo(cpu_info, 1);
+  if (feature == kSSE2) {
+    return 0 != (cpu_info[3] & 0x04000000);
+  }
+  if (feature == kSSE3) {
+    return 0 != (cpu_info[2] & 0x00000001);
+  }
+  if (feature == kAVX) {
+    // bits 27 (OSXSAVE) & 28 (256-bit AVX)
+    if ((cpu_info[2] & 0x18000000) == 0x18000000) {
+      // XMM state and YMM state enabled by the OS.
+      return (xgetbv() & 0x6) == 0x6;
+    }
+  }
+  if (feature == kAVX2) {
+    if (x86CPUInfo(kAVX)) {
+      GetCPUInfo(cpu_info, 7);
+      return ((cpu_info[1] & 0x00000020) == 0x00000020);
+    }
+  }
+  return 0;
+}
+VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
+#elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
+static int AndroidCPUInfo(CPUFeature feature) {
+  const AndroidCpuFamily cpu_family = android_getCpuFamily();
+  const uint64_t cpu_features = android_getCpuFeatures();
+  if (feature == kNEON) {
+    return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
+            0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
+  }
+  return 0;
+}
+VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
+#elif defined(WEBP_USE_NEON)
+// define a dummy function to enable turning off NEON at runtime by setting
+// VP8DecGetCPUInfo = NULL
+static int armCPUInfo(CPUFeature feature) {
+  (void)feature;
+  return 1;
+}
+VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
+#elif defined(WEBP_USE_MIPS32)
+static int mipsCPUInfo(CPUFeature feature) {
+  (void)feature;
+  return 1;
+}
+VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
+#else
+VP8CPUInfo VP8GetCPUInfo = NULL;
+#endif
+
diff --git a/src/main/jni/libwebp/dsp/dec.c b/src/main/jni/libwebp/dsp/dec.c
new file mode 100644
index 000000000..65a2a885b
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/dec.c
@@ -0,0 +1,731 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical decoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+#include "../dec/vp8i.h"
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+}
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+#define STORE(x, y, v) \
+  dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
+
+#define STORE2(y, dc, d, c) do {    \
+  const int DC = (dc);              \
+  STORE(0, y, DC + (d));            \
+  STORE(1, y, DC + (c));            \
+  STORE(2, y, DC - (c));            \
+  STORE(3, y, DC - (d));            \
+} while (0)
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+#define MUL(a, b) (((a) * (b)) >> 16)
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int C[4 * 4], *tmp;
+  int i;
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // vertical pass
+    const int a = in[0] + in[8];    // [-4096, 4094]
+    const int b = in[0] - in[8];    // [-4095, 4095]
+    const int c = MUL(in[4], kC2) - MUL(in[12], kC1);   // [-3783, 3783]
+    const int d = MUL(in[4], kC1) + MUL(in[12], kC2);   // [-3785, 3781]
+    tmp[0] = a + d;   // [-7881, 7875]
+    tmp[1] = b + c;   // [-7878, 7878]
+    tmp[2] = b - c;   // [-7878, 7878]
+    tmp[3] = a - d;   // [-7877, 7879]
+    tmp += 4;
+    in++;
+  }
+  // Each pass is expanding the dynamic range by ~3.85 (upper bound).
+  // The exact value is (2. + (kC1 + kC2) / 65536).
+  // After the second pass, maximum interval is [-3794, 3794], assuming
+  // an input in [-2048, 2047] interval. We then need to add a dst value
+  // in the [0, 255] range.
+  // In the worst case scenario, the input to clip_8b() can be as large as
+  // [-60713, 60968].
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // horizontal pass
+    const int dc = tmp[0] + 4;
+    const int a =  dc +  tmp[8];
+    const int b =  dc -  tmp[8];
+    const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
+    const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
+    STORE(0, 0, a + d);
+    STORE(1, 0, b + c);
+    STORE(2, 0, b - c);
+    STORE(3, 0, a - d);
+    tmp++;
+    dst += BPS;
+  }
+}
+
+// Simplified transform when only in[0], in[1] and in[4] are non-zero
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+  const int a = in[0] + 4;
+  const int c4 = MUL(in[4], kC2);
+  const int d4 = MUL(in[4], kC1);
+  const int c1 = MUL(in[1], kC2);
+  const int d1 = MUL(in[1], kC1);
+  STORE2(0, a + d4, d1, c1);
+  STORE2(1, a + c4, d1, c1);
+  STORE2(2, a - c4, d1, c1);
+  STORE2(3, a - d4, d1, c1);
+}
+#undef MUL
+#undef STORE2
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
+  if (do_two) {
+    TransformOne(in + 16, dst + 4);
+  }
+}
+
+static void TransformUV(const int16_t* in, uint8_t* dst) {
+  VP8Transform(in + 0 * 16, dst, 1);
+  VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
+}
+
+static void TransformDC(const int16_t *in, uint8_t* dst) {
+  const int DC = in[0] + 4;
+  int i, j;
+  for (j = 0; j < 4; ++j) {
+    for (i = 0; i < 4; ++i) {
+      STORE(i, j, DC);
+    }
+  }
+}
+
+static void TransformDCUV(const int16_t* in, uint8_t* dst) {
+  if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
+  if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
+  if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
+  if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
+}
+
+#undef STORE
+
+//------------------------------------------------------------------------------
+// Paragraph 14.3
+
+static void TransformWHT(const int16_t* in, int16_t* out) {
+  int tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i) {
+    const int a0 = in[0 + i] + in[12 + i];
+    const int a1 = in[4 + i] + in[ 8 + i];
+    const int a2 = in[4 + i] - in[ 8 + i];
+    const int a3 = in[0 + i] - in[12 + i];
+    tmp[0  + i] = a0 + a1;
+    tmp[8  + i] = a0 - a1;
+    tmp[4  + i] = a3 + a2;
+    tmp[12 + i] = a3 - a2;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int dc = tmp[0 + i * 4] + 3;    // w/ rounder
+    const int a0 = dc             + tmp[3 + i * 4];
+    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
+    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
+    const int a3 = dc             - tmp[3 + i * 4];
+    out[ 0] = (a0 + a1) >> 3;
+    out[16] = (a3 + a2) >> 3;
+    out[32] = (a0 - a1) >> 3;
+    out[48] = (a3 - a2) >> 3;
+    out += 64;
+  }
+}
+
+void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+#define DST(x, y) dst[(x) + (y) * BPS]
+
+static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) {
+  const uint8_t* top = dst - BPS;
+  const uint8_t* const clip0 = VP8kclip1 - top[-1];
+  int y;
+  for (y = 0; y < size; ++y) {
+    const uint8_t* const clip = clip0 + dst[-1];
+    int x;
+    for (x = 0; x < size; ++x) {
+      dst[x] = clip[top[x]];
+    }
+    dst += BPS;
+  }
+}
+static void TM4(uint8_t *dst)   { TrueMotion(dst, 4); }
+static void TM8uv(uint8_t *dst) { TrueMotion(dst, 8); }
+static void TM16(uint8_t *dst)  { TrueMotion(dst, 16); }
+
+//------------------------------------------------------------------------------
+// 16x16
+
+static void VE16(uint8_t *dst) {     // vertical
+  int j;
+  for (j = 0; j < 16; ++j) {
+    memcpy(dst + j * BPS, dst - BPS, 16);
+  }
+}
+
+static void HE16(uint8_t *dst) {     // horizontal
+  int j;
+  for (j = 16; j > 0; --j) {
+    memset(dst, dst[-1], 16);
+    dst += BPS;
+  }
+}
+
+static WEBP_INLINE void Put16(int v, uint8_t* dst) {
+  int j;
+  for (j = 0; j < 16; ++j) {
+    memset(dst + j * BPS, v, 16);
+  }
+}
+
+static void DC16(uint8_t *dst) {    // DC
+  int DC = 16;
+  int j;
+  for (j = 0; j < 16; ++j) {
+    DC += dst[-1 + j * BPS] + dst[j - BPS];
+  }
+  Put16(DC >> 5, dst);
+}
+
+static void DC16NoTop(uint8_t *dst) {   // DC with top samples not available
+  int DC = 8;
+  int j;
+  for (j = 0; j < 16; ++j) {
+    DC += dst[-1 + j * BPS];
+  }
+  Put16(DC >> 4, dst);
+}
+
+static void DC16NoLeft(uint8_t *dst) {  // DC with left samples not available
+  int DC = 8;
+  int i;
+  for (i = 0; i < 16; ++i) {
+    DC += dst[i - BPS];
+  }
+  Put16(DC >> 4, dst);
+}
+
+static void DC16NoTopLeft(uint8_t *dst) {  // DC with no top and left samples
+  Put16(0x80, dst);
+}
+
+//------------------------------------------------------------------------------
+// 4x4
+
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+static void VE4(uint8_t *dst) {    // vertical
+  const uint8_t* top = dst - BPS;
+  const uint8_t vals[4] = {
+    AVG3(top[-1], top[0], top[1]),
+    AVG3(top[ 0], top[1], top[2]),
+    AVG3(top[ 1], top[2], top[3]),
+    AVG3(top[ 2], top[3], top[4])
+  };
+  int i;
+  for (i = 0; i < 4; ++i) {
+    memcpy(dst + i * BPS, vals, sizeof(vals));
+  }
+}
+
+static void HE4(uint8_t *dst) {    // horizontal
+  const int A = dst[-1 - BPS];
+  const int B = dst[-1];
+  const int C = dst[-1 + BPS];
+  const int D = dst[-1 + 2 * BPS];
+  const int E = dst[-1 + 3 * BPS];
+  *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(A, B, C);
+  *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(B, C, D);
+  *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(C, D, E);
+  *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(D, E, E);
+}
+
+static void DC4(uint8_t *dst) {   // DC
+  uint32_t dc = 4;
+  int i;
+  for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
+  dc >>= 3;
+  for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
+}
+
+static void RD4(uint8_t *dst) {   // Down-right
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  DST(0, 3)                                     = AVG3(J, K, L);
+  DST(0, 2) = DST(1, 3)                         = AVG3(I, J, K);
+  DST(0, 1) = DST(1, 2) = DST(2, 3)             = AVG3(X, I, J);
+  DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
+  DST(1, 0) = DST(2, 1) = DST(3, 2)             = AVG3(B, A, X);
+  DST(2, 0) = DST(3, 1)                         = AVG3(C, B, A);
+  DST(3, 0)                                     = AVG3(D, C, B);
+}
+
+static void LD4(uint8_t *dst) {   // Down-Left
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  const int E = dst[4 - BPS];
+  const int F = dst[5 - BPS];
+  const int G = dst[6 - BPS];
+  const int H = dst[7 - BPS];
+  DST(0, 0)                                     = AVG3(A, B, C);
+  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
+  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
+  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+  DST(3, 1) = DST(2, 2) = DST(1, 3)             = AVG3(E, F, G);
+  DST(3, 2) = DST(2, 3)                         = AVG3(F, G, H);
+  DST(3, 3)                                     = AVG3(G, H, H);
+}
+
+static void VR4(uint8_t *dst) {   // Vertical-Right
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  DST(0, 0) = DST(1, 2) = AVG2(X, A);
+  DST(1, 0) = DST(2, 2) = AVG2(A, B);
+  DST(2, 0) = DST(3, 2) = AVG2(B, C);
+  DST(3, 0)             = AVG2(C, D);
+
+  DST(0, 3) =             AVG3(K, J, I);
+  DST(0, 2) =             AVG3(J, I, X);
+  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+  DST(3, 1) =             AVG3(B, C, D);
+}
+
+static void VL4(uint8_t *dst) {   // Vertical-Left
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  const int E = dst[4 - BPS];
+  const int F = dst[5 - BPS];
+  const int G = dst[6 - BPS];
+  const int H = dst[7 - BPS];
+  DST(0, 0) =             AVG2(A, B);
+  DST(1, 0) = DST(0, 2) = AVG2(B, C);
+  DST(2, 0) = DST(1, 2) = AVG2(C, D);
+  DST(3, 0) = DST(2, 2) = AVG2(D, E);
+
+  DST(0, 1) =             AVG3(A, B, C);
+  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+              DST(3, 2) = AVG3(E, F, G);
+              DST(3, 3) = AVG3(F, G, H);
+}
+
+static void HU4(uint8_t *dst) {   // Horizontal-Up
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  DST(0, 0) =             AVG2(I, J);
+  DST(2, 0) = DST(0, 1) = AVG2(J, K);
+  DST(2, 1) = DST(0, 2) = AVG2(K, L);
+  DST(1, 0) =             AVG3(I, J, K);
+  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+  DST(3, 2) = DST(2, 2) =
+    DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static void HD4(uint8_t *dst) {  // Horizontal-Down
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+
+  DST(0, 0) = DST(2, 1) = AVG2(I, X);
+  DST(0, 1) = DST(2, 2) = AVG2(J, I);
+  DST(0, 2) = DST(2, 3) = AVG2(K, J);
+  DST(0, 3)             = AVG2(L, K);
+
+  DST(3, 0)             = AVG3(A, B, C);
+  DST(2, 0)             = AVG3(X, A, B);
+  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+  DST(1, 3)             = AVG3(L, K, J);
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void VE8uv(uint8_t *dst) {    // vertical
+  int j;
+  for (j = 0; j < 8; ++j) {
+    memcpy(dst + j * BPS, dst - BPS, 8);
+  }
+}
+
+static void HE8uv(uint8_t *dst) {    // horizontal
+  int j;
+  for (j = 0; j < 8; ++j) {
+    memset(dst, dst[-1], 8);
+    dst += BPS;
+  }
+}
+
+// helper for chroma-DC predictions
+static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
+  int j;
+  for (j = 0; j < 8; ++j) {
+    memset(dst + j * BPS, value, 8);
+  }
+}
+
+static void DC8uv(uint8_t *dst) {     // DC
+  int dc0 = 8;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[i - BPS] + dst[-1 + i * BPS];
+  }
+  Put8x8uv(dc0 >> 4, dst);
+}
+
+static void DC8uvNoLeft(uint8_t *dst) {   // DC with no left samples
+  int dc0 = 4;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[i - BPS];
+  }
+  Put8x8uv(dc0 >> 3, dst);
+}
+
+static void DC8uvNoTop(uint8_t *dst) {  // DC with no top samples
+  int dc0 = 4;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[-1 + i * BPS];
+  }
+  Put8x8uv(dc0 >> 3, dst);
+}
+
+static void DC8uvNoTopLeft(uint8_t *dst) {    // DC with nothing
+  Put8x8uv(0x80, dst);
+}
+
+//------------------------------------------------------------------------------
+// default C implementations
+
+const VP8PredFunc VP8PredLuma4[NUM_BMODES] = {
+  DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4
+};
+
+const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = {
+  DC16, TM16, VE16, HE16,
+  DC16NoTop, DC16NoLeft, DC16NoTopLeft
+};
+
+const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
+  DC8uv, TM8uv, VE8uv, HE8uv,
+  DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft
+};
+
+//------------------------------------------------------------------------------
+// Edge filtering functions
+
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
+  const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  p[-step] = VP8kclip1[p0 + a2];
+  p[    0] = VP8kclip1[q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0);
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  const int a3 = (a1 + 1) >> 1;
+  p[-2*step] = VP8kclip1[p1 + a3];
+  p[-  step] = VP8kclip1[p0 + a2];
+  p[      0] = VP8kclip1[q0 - a1];
+  p[   step] = VP8kclip1[q1 - a3];
+}
+
+// 6 pixels in, 6 pixels out
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
+  const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2*step];
+  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
+  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
+  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
+  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
+  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+  p[-3*step] = VP8kclip1[p2 + a3];
+  p[-2*step] = VP8kclip1[p1 + a2];
+  p[-  step] = VP8kclip1[p0 + a1];
+  p[      0] = VP8kclip1[q0 - a1];
+  p[   step] = VP8kclip1[q1 - a2];
+  p[ 2*step] = VP8kclip1[q2 - a3];
+}
+
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
+}
+
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
+}
+
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+                                     int step, int t, int it) {
+  const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
+  const int p0 = p[-step], q0 = p[0];
+  const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
+  if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0;
+  return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it &&
+         VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
+         VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i, stride, thresh2)) {
+      do_filter2(p + i, stride);
+    }
+  }
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i * stride, 1, thresh2)) {
+      do_filter2(p + i * stride, 1);
+    }
+  }
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter6(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter4(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+//------------------------------------------------------------------------------
+
+VP8DecIdct2 VP8Transform;
+VP8DecIdct VP8TransformAC3;
+VP8DecIdct VP8TransformUV;
+VP8DecIdct VP8TransformDC;
+VP8DecIdct VP8TransformDCUV;
+
+VP8LumaFilterFunc VP8VFilter16;
+VP8LumaFilterFunc VP8HFilter16;
+VP8ChromaFilterFunc VP8VFilter8;
+VP8ChromaFilterFunc VP8HFilter8;
+VP8LumaFilterFunc VP8VFilter16i;
+VP8LumaFilterFunc VP8HFilter16i;
+VP8ChromaFilterFunc VP8VFilter8i;
+VP8ChromaFilterFunc VP8HFilter8i;
+VP8SimpleFilterFunc VP8SimpleVFilter16;
+VP8SimpleFilterFunc VP8SimpleHFilter16;
+VP8SimpleFilterFunc VP8SimpleVFilter16i;
+VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+extern void VP8DspInitSSE2(void);
+extern void VP8DspInitNEON(void);
+extern void VP8DspInitMIPS32(void);
+
+void VP8DspInit(void) {
+  VP8InitClipTables();
+
+  VP8TransformWHT = TransformWHT;
+  VP8Transform = TransformTwo;
+  VP8TransformUV = TransformUV;
+  VP8TransformDC = TransformDC;
+  VP8TransformDCUV = TransformDCUV;
+  VP8TransformAC3 = TransformAC3;
+
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8DspInitSSE2();
+    }
+#elif defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8DspInitNEON();
+    }
+#elif defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8DspInitMIPS32();
+    }
+#endif
+  }
+}
+
diff --git a/src/main/jni/libwebp/dsp/dec_clip_tables.c b/src/main/jni/libwebp/dsp/dec_clip_tables.c
new file mode 100644
index 000000000..eec5a6d1a
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/dec_clip_tables.c
@@ -0,0 +1,366 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Clipping tables for filtering
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#define USE_STATIC_TABLES     // undefine to have run-time table initialization
+
+#ifdef USE_STATIC_TABLES
+
+static const uint8_t abs0[255 + 255 + 1] = {
+  0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
+  0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8,
+  0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc,
+  0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0,
+  0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4,
+  0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8,
+  0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac,
+  0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0,
+  0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94,
+  0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88,
+  0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c,
+  0x7b, 0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
+  0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
+  0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58,
+  0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c,
+  0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
+  0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
+  0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
+  0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
+  0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+  0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
+  0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
+  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
+  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
+  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
+  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
+  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
+  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+static const int8_t sclip1[1020 + 1020 + 1] = {
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
+  0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+  0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab,
+  0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+  0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3,
+  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+  0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
+  0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
+  0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
+  0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
+  0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
+  0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+  0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
+  0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
+  0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+  0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
+};
+
+static const int8_t sclip2[112 + 112 + 1] = {
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
+  0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
+};
+
+static const uint8_t clip1[255 + 511 + 1] = {
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
+  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
+  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
+  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
+  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
+  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
+  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+#else
+
+// uninitialized tables
+static uint8_t abs0[255 + 255 + 1];
+static int8_t sclip1[1020 + 1020 + 1];
+static int8_t sclip2[112 + 112 + 1];
+static uint8_t clip1[255 + 511 + 1];
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+#endif
+
+const int8_t* const VP8ksclip1 = &sclip1[1020];
+const int8_t* const VP8ksclip2 = &sclip2[112];
+const uint8_t* const VP8kclip1 = &clip1[255];
+const uint8_t* const VP8kabs0 = &abs0[255];
+
+void VP8InitClipTables(void) {
+#if !defined(USE_STATIC_TABLES)
+  int i;
+  if (!tables_ok) {
+    for (i = -255; i <= 255; ++i) {
+      abs0[255 + i] = (i < 0) ? -i : i;
+    }
+    for (i = -1020; i <= 1020; ++i) {
+      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
+    }
+    for (i = -112; i <= 112; ++i) {
+      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
+    }
+    for (i = -255; i <= 255 + 255; ++i) {
+      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+    }
+    tables_ok = 1;
+  }
+#endif    // USE_STATIC_TABLES
+}
diff --git a/src/main/jni/libwebp/dsp/dec_mips32.c b/src/main/jni/libwebp/dsp/dec_mips32.c
new file mode 100644
index 000000000..3e89ed37a
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/dec_mips32.c
@@ -0,0 +1,578 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of dsp functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+static WEBP_INLINE int abs_mips32(int x) {
+  const int sign = x >> 31;
+  return (x ^ sign) - sign;
+}
+
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  p[-step] = VP8kclip1[p0 + a2];
+  p[    0] = VP8kclip1[q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0);
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  const int a3 = (a1 + 1) >> 1;
+  p[-2 * step] = VP8kclip1[p1 + a3];
+  p[-    step] = VP8kclip1[p0 + a2];
+  p[        0] = VP8kclip1[q0 - a1];
+  p[     step] = VP8kclip1[q1 - a3];
+}
+
+// 6 pixels in, 6 pixels out
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
+  const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
+  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
+  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
+  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
+  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+  p[-3 * step] = VP8kclip1[p2 + a3];
+  p[-2 * step] = VP8kclip1[p1 + a2];
+  p[-    step] = VP8kclip1[p0 + a1];
+  p[        0] = VP8kclip1[q0 - a1];
+  p[     step] = VP8kclip1[q1 - a2];
+  p[ 2 * step] = VP8kclip1[q2 - a3];
+}
+
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
+}
+
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return ((2 * abs_mips32(p0 - q0) + (abs_mips32(p1 - q1) >> 1)) <= thresh);
+}
+
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+                                     int step, int t, int it) {
+  const int p3 = p[-4 * step], p2 = p[-3 * step];
+  const int p1 = p[-2 * step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
+  if ((2 * abs_mips32(p0 - q0) + (abs_mips32(p1 - q1) >> 1)) > t) {
+    return 0;
+  }
+  return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
+         abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
+         abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
+}
+
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter6(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter4(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i, stride, thresh)) {
+      do_filter2(p + i, stride);
+    }
+  }
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i * stride, 1, thresh)) {
+      do_filter2(p + i * stride, 1);
+    }
+  }
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14;
+  int temp15, temp16, temp17, temp18;
+  int16_t* p_in = (int16_t*)in;
+
+  // loops unrolled and merged to avoid usage of tmp buffer
+  // and to reduce number of stalls. MUL macro is written
+  // in assembler and inlined
+  __asm__ volatile(
+    "lh       %[temp0],  0(%[in])                      \n\t"
+    "lh       %[temp8],  16(%[in])                     \n\t"
+    "lh       %[temp4],  8(%[in])                      \n\t"
+    "lh       %[temp12], 24(%[in])                     \n\t"
+    "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
+    "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp12], %[kC1]             \n\t"
+    "mul      %[temp4],  %[temp4],  %[kC1]             \n\t"
+    "mul      %[temp12], %[temp12], %[kC2]             \n\t"
+    "lh       %[temp1],  2(%[in])                      \n\t"
+    "lh       %[temp5],  10(%[in])                     \n\t"
+    "lh       %[temp9],  18(%[in])                     \n\t"
+    "lh       %[temp13], 26(%[in])                     \n\t"
+    "sra      %[temp8],  %[temp8],  16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp4],  %[temp4],  16                 \n\t"
+    "sra      %[temp12], %[temp12], 16                 \n\t"
+    "lh       %[temp2],  4(%[in])                      \n\t"
+    "lh       %[temp6],  12(%[in])                     \n\t"
+    "lh       %[temp10], 20(%[in])                     \n\t"
+    "lh       %[temp14], 28(%[in])                     \n\t"
+    "subu     %[temp17], %[temp8],  %[temp17]          \n\t"
+    "addu     %[temp4],  %[temp4],  %[temp12]          \n\t"
+    "addu     %[temp8],  %[temp16], %[temp4]           \n\t"
+    "subu     %[temp4],  %[temp16], %[temp4]           \n\t"
+    "addu     %[temp16], %[temp1],  %[temp9]           \n\t"
+    "subu     %[temp1],  %[temp1],  %[temp9]           \n\t"
+    "lh       %[temp3],  6(%[in])                      \n\t"
+    "lh       %[temp7],  14(%[in])                     \n\t"
+    "lh       %[temp11], 22(%[in])                     \n\t"
+    "lh       %[temp15], 30(%[in])                     \n\t"
+    "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
+    "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp13], %[kC1]             \n\t"
+    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    "mul      %[temp13], %[temp13], %[kC2]             \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
+    "sra      %[temp5],  %[temp5],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
+    "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
+    "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
+    "mul      %[temp17], %[temp14], %[kC1]             \n\t"
+    "mul      %[temp14], %[temp14], %[kC2]             \n\t"
+    "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
+    "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
+    "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
+    "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
+    "mul      %[temp6],  %[temp6],  %[kC1]             \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp14], %[temp14], 16                 \n\t"
+    "sra      %[temp10], %[temp10], 16                 \n\t"
+    "sra      %[temp6],  %[temp6],  16                 \n\t"
+    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
+    "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
+    "addu     %[temp10], %[temp16], %[temp6]           \n\t"
+    "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
+    "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
+    "mul      %[temp17], %[temp15], %[kC1]             \n\t"
+    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
+    "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
+    "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
+    "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
+    "mul      %[temp7],  %[temp7],  %[kC1]             \n\t"
+    "addiu    %[temp8],  %[temp8],  4                  \n\t"
+    "addiu    %[temp12], %[temp12], 4                  \n\t"
+    "addiu    %[temp0],  %[temp0],  4                  \n\t"
+    "addiu    %[temp4],  %[temp4],  4                  \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp15], %[temp15], 16                 \n\t"
+    "sra      %[temp11], %[temp11], 16                 \n\t"
+    "sra      %[temp7],  %[temp7],  16                 \n\t"
+    "subu     %[temp17], %[temp11], %[temp17]          \n\t"
+    "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
+    "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
+    "subu     %[temp3],  %[temp3],  %[temp17]          \n\t"
+    "addu     %[temp11], %[temp16], %[temp7]           \n\t"
+    "subu     %[temp7],  %[temp16], %[temp7]           \n\t"
+    "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
+    "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
+    "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp11], %[kC1]             \n\t"
+    "mul      %[temp9],  %[temp9],  %[kC1]             \n\t"
+    "mul      %[temp11], %[temp11], %[kC2]             \n\t"
+    "sra      %[temp10], %[temp10], 16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp11], %[temp11], 16                 \n\t"
+    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
+    "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
+    "addu     %[temp10], %[temp12], %[temp14]          \n\t"
+    "subu     %[temp12], %[temp12], %[temp14]          \n\t"
+    "mul      %[temp14], %[temp13], %[kC2]             \n\t"
+    "mul      %[temp9],  %[temp15], %[kC1]             \n\t"
+    "mul      %[temp13], %[temp13], %[kC1]             \n\t"
+    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
+    "sra      %[temp14], %[temp14], 16                 \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "sra      %[temp15], %[temp15], 16                 \n\t"
+    "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
+    "addu     %[temp15], %[temp13], %[temp15]          \n\t"
+    "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
+    "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
+    "mul      %[temp13], %[temp3],  %[kC1]             \n\t"
+    "mul      %[temp1],  %[temp1],  %[kC1]             \n\t"
+    "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
+    "sra      %[temp2],  %[temp2],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "sra      %[temp1],  %[temp1],  16                 \n\t"
+    "sra      %[temp3],  %[temp3],  16                 \n\t"
+    "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
+    "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
+    "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
+    "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
+    "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
+    "mul      %[temp1],  %[temp7],  %[kC1]             \n\t"
+    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
+    "sra      %[temp6],  %[temp6],  16                 \n\t"
+    "sra      %[temp1],  %[temp1],  16                 \n\t"
+    "sra      %[temp5],  %[temp5],  16                 \n\t"
+    "sra      %[temp7],  %[temp7],  16                 \n\t"
+    "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
+    "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
+    "addu     %[temp5],  %[temp16], %[temp11]          \n\t"
+    "subu     %[temp16], %[temp16], %[temp11]          \n\t"
+    "addu     %[temp11], %[temp8],  %[temp17]          \n\t"
+    "subu     %[temp8],  %[temp8],  %[temp17]          \n\t"
+    "sra      %[temp5],  %[temp5],  3                  \n\t"
+    "sra      %[temp16], %[temp16], 3                  \n\t"
+    "sra      %[temp11], %[temp11], 3                  \n\t"
+    "sra      %[temp8],  %[temp8],  3                  \n\t"
+    "addu     %[temp17], %[temp10], %[temp15]          \n\t"
+    "subu     %[temp10], %[temp10], %[temp15]          \n\t"
+    "addu     %[temp15], %[temp12], %[temp9]           \n\t"
+    "subu     %[temp12], %[temp12], %[temp9]           \n\t"
+    "sra      %[temp17], %[temp17], 3                  \n\t"
+    "sra      %[temp10], %[temp10], 3                  \n\t"
+    "sra      %[temp15], %[temp15], 3                  \n\t"
+    "sra      %[temp12], %[temp12], 3                  \n\t"
+    "addu     %[temp9],  %[temp14], %[temp3]           \n\t"
+    "subu     %[temp14], %[temp14], %[temp3]           \n\t"
+    "addu     %[temp3],  %[temp0],  %[temp13]          \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp13]          \n\t"
+    "sra      %[temp9],  %[temp9],  3                  \n\t"
+    "sra      %[temp14], %[temp14], 3                  \n\t"
+    "sra      %[temp3],  %[temp3],  3                  \n\t"
+    "sra      %[temp0],  %[temp0],  3                  \n\t"
+    "addu     %[temp13], %[temp2],  %[temp7]           \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp7]           \n\t"
+    "addu     %[temp7],  %[temp4],  %[temp1]           \n\t"
+    "subu     %[temp4],  %[temp4],  %[temp1]           \n\t"
+    "sra      %[temp13], %[temp13], 3                  \n\t"
+    "sra      %[temp2],  %[temp2],  3                  \n\t"
+    "sra      %[temp7],  %[temp7],  3                  \n\t"
+    "sra      %[temp4],  %[temp4],  3                  \n\t"
+    "addiu    %[temp6],  $zero,     255                \n\t"
+    "lbu      %[temp1],  0(%[dst])                     \n\t"
+    "addu     %[temp1],  %[temp1],  %[temp5]           \n\t"
+    "sra      %[temp5],  %[temp1],  8                  \n\t"
+    "sra      %[temp18], %[temp1],  31                 \n\t"
+    "beqz     %[temp5],  1f                            \n\t"
+    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
+    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
+  "1:                                                  \n\t"
+    "lbu      %[temp18], 1(%[dst])                     \n\t"
+    "sb       %[temp1],  0(%[dst])                     \n\t"
+    "addu     %[temp18], %[temp18], %[temp11]          \n\t"
+    "sra      %[temp11], %[temp18], 8                  \n\t"
+    "sra      %[temp1],  %[temp18], 31                 \n\t"
+    "beqz     %[temp11], 2f                            \n\t"
+    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
+    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
+  "2:                                                  \n\t"
+    "lbu      %[temp1],  2(%[dst])                     \n\t"
+    "sb       %[temp18], 1(%[dst])                     \n\t"
+    "addu     %[temp1],  %[temp1],  %[temp8]           \n\t"
+    "sra      %[temp8],  %[temp1],  8                  \n\t"
+    "sra      %[temp18], %[temp1],  31                 \n\t"
+    "beqz     %[temp8],  3f                            \n\t"
+    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
+    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
+  "3:                                                  \n\t"
+    "lbu      %[temp18], 3(%[dst])                     \n\t"
+    "sb       %[temp1],  2(%[dst])                     \n\t"
+    "addu     %[temp18], %[temp18], %[temp16]          \n\t"
+    "sra      %[temp16], %[temp18], 8                  \n\t"
+    "sra      %[temp1],  %[temp18], 31                 \n\t"
+    "beqz     %[temp16], 4f                            \n\t"
+    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
+    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
+  "4:                                                  \n\t"
+    "sb       %[temp18], 3(%[dst])                     \n\t"
+    "lbu      %[temp5],  32(%[dst])                    \n\t"
+    "lbu      %[temp8],  33(%[dst])                    \n\t"
+    "lbu      %[temp11], 34(%[dst])                    \n\t"
+    "lbu      %[temp16], 35(%[dst])                    \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp17]          \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp15]          \n\t"
+    "addu     %[temp11], %[temp11], %[temp12]          \n\t"
+    "addu     %[temp16], %[temp16], %[temp10]          \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "beqz     %[temp18], 5f                            \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "5:                                                  \n\t"
+    "sra      %[temp18], %[temp8],  8                  \n\t"
+    "sra      %[temp1],  %[temp8],  31                 \n\t"
+    "beqz     %[temp18], 6f                            \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp1]           \n\t"
+  "6:                                                  \n\t"
+    "sra      %[temp18], %[temp11], 8                  \n\t"
+    "sra      %[temp1],  %[temp11], 31                 \n\t"
+    "sra      %[temp17], %[temp16], 8                  \n\t"
+    "sra      %[temp15], %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 7f                            \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp1]           \n\t"
+  "7:                                                  \n\t"
+    "beqz     %[temp17], 8f                            \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp15]          \n\t"
+  "8:                                                  \n\t"
+    "sb       %[temp5],  32(%[dst])                    \n\t"
+    "sb       %[temp8],  33(%[dst])                    \n\t"
+    "sb       %[temp11], 34(%[dst])                    \n\t"
+    "sb       %[temp16], 35(%[dst])                    \n\t"
+    "lbu      %[temp5],  64(%[dst])                    \n\t"
+    "lbu      %[temp8],  65(%[dst])                    \n\t"
+    "lbu      %[temp11], 66(%[dst])                    \n\t"
+    "lbu      %[temp16], 67(%[dst])                    \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp9]           \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp3]           \n\t"
+    "addu     %[temp11], %[temp11], %[temp0]           \n\t"
+    "addu     %[temp16], %[temp16], %[temp14]          \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "sra      %[temp17], %[temp8],  8                  \n\t"
+    "sra      %[temp15], %[temp8],  31                 \n\t"
+    "sra      %[temp12], %[temp11], 8                  \n\t"
+    "sra      %[temp10], %[temp11], 31                 \n\t"
+    "sra      %[temp9],  %[temp16], 8                  \n\t"
+    "sra      %[temp3],  %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 9f                            \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "9:                                                  \n\t"
+    "beqz     %[temp17], 10f                           \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
+  "10:                                                 \n\t"
+    "beqz     %[temp12], 11f                           \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
+  "11:                                                 \n\t"
+    "beqz     %[temp9],  12f                           \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
+  "12:                                                 \n\t"
+    "sb       %[temp5],  64(%[dst])                    \n\t"
+    "sb       %[temp8],  65(%[dst])                    \n\t"
+    "sb       %[temp11], 66(%[dst])                    \n\t"
+    "sb       %[temp16], 67(%[dst])                    \n\t"
+    "lbu      %[temp5],  96(%[dst])                    \n\t"
+    "lbu      %[temp8],  97(%[dst])                    \n\t"
+    "lbu      %[temp11], 98(%[dst])                    \n\t"
+    "lbu      %[temp16], 99(%[dst])                    \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp7]           \n\t"
+    "addu     %[temp11], %[temp11], %[temp4]           \n\t"
+    "addu     %[temp16], %[temp16], %[temp2]           \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "sra      %[temp17], %[temp8],  8                  \n\t"
+    "sra      %[temp15], %[temp8],  31                 \n\t"
+    "sra      %[temp12], %[temp11], 8                  \n\t"
+    "sra      %[temp10], %[temp11], 31                 \n\t"
+    "sra      %[temp9],  %[temp16], 8                  \n\t"
+    "sra      %[temp3],  %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 13f                           \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "13:                                                 \n\t"
+    "beqz     %[temp17], 14f                           \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
+  "14:                                                 \n\t"
+    "beqz     %[temp12], 15f                           \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
+  "15:                                                 \n\t"
+    "beqz     %[temp9],  16f                           \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
+  "16:                                                 \n\t"
+    "sb       %[temp5],  96(%[dst])                    \n\t"
+    "sb       %[temp8],  97(%[dst])                    \n\t"
+    "sb       %[temp11], 98(%[dst])                    \n\t"
+    "sb       %[temp16], 99(%[dst])                    \n\t"
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+      [temp18]"=&r"(temp18)
+    : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
+  if (do_two) {
+    TransformOne(in + 16, dst + 4);
+  }
+}
+
+#endif  // WEBP_USE_MIPS32
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitMIPS32(void);
+
+void VP8DspInitMIPS32(void) {
+#if defined(WEBP_USE_MIPS32)
+  VP8InitClipTables();
+
+  VP8Transform = TransformTwo;
+
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+#endif  // WEBP_USE_MIPS32
+}
diff --git a/src/main/jni/libwebp/dsp/dec_neon.c b/src/main/jni/libwebp/dsp/dec_neon.c
new file mode 100644
index 000000000..9c5bc1c7d
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/dec_neon.c
@@ -0,0 +1,1292 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// ARM NEON version of dsp functions and loop filtering.
+//
+// Authors: Somnath Banerjee (somnath@google.com)
+//          Johann Koenig (johannkoenig@google.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include "./neon.h"
+#include "../dec/vp8i.h"
+
+//------------------------------------------------------------------------------
+// NxM Loading functions
+
+// Load/Store vertical edge
+#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                                \
+  "vld4.8   {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \
+  "vld4.8   {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \
+  "vld4.8   {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \
+  "vld4.8   {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \
+  "vld4.8   {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \
+  "vld4.8   {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \
+  "vld4.8   {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
+  "vld4.8   {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
+
+#define STORE8x2(c1, c2, p, stride)                                            \
+  "vst2.8   {" #c1"[0], " #c2"[0]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[1], " #c2"[1]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[2], " #c2"[2]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[3], " #c2"[3]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[4], " #c2"[4]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[5], " #c2"[5]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[6], " #c2"[6]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
+
+#if !defined(WORK_AROUND_GCC)
+
+// This intrinsics version makes gcc-4.6.3 crash during Load4x??() compilation
+// (register alloc, probably). The variants somewhat mitigate the problem, but
+// not quite. HFilter16i() remains problematic.
+static WEBP_INLINE uint8x8x4_t Load4x8(const uint8_t* const src, int stride) {
+  const uint8x8_t zero = vdup_n_u8(0);
+  uint8x8x4_t out;
+  INIT_VECTOR4(out, zero, zero, zero, zero);
+  out = vld4_lane_u8(src + 0 * stride, out, 0);
+  out = vld4_lane_u8(src + 1 * stride, out, 1);
+  out = vld4_lane_u8(src + 2 * stride, out, 2);
+  out = vld4_lane_u8(src + 3 * stride, out, 3);
+  out = vld4_lane_u8(src + 4 * stride, out, 4);
+  out = vld4_lane_u8(src + 5 * stride, out, 5);
+  out = vld4_lane_u8(src + 6 * stride, out, 6);
+  out = vld4_lane_u8(src + 7 * stride, out, 7);
+  return out;
+}
+
+static WEBP_INLINE void Load4x16(const uint8_t* const src, int stride,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+  // row0 = p1[0..7]|p0[0..7]|q0[0..7]|q1[0..7]
+  // row8 = p1[8..15]|p0[8..15]|q0[8..15]|q1[8..15]
+  const uint8x8x4_t row0 = Load4x8(src - 2 + 0 * stride, stride);
+  const uint8x8x4_t row8 = Load4x8(src - 2 + 8 * stride, stride);
+  *p1 = vcombine_u8(row0.val[0], row8.val[0]);
+  *p0 = vcombine_u8(row0.val[1], row8.val[1]);
+  *q0 = vcombine_u8(row0.val[2], row8.val[2]);
+  *q1 = vcombine_u8(row0.val[3], row8.val[3]);
+}
+
+#else  // WORK_AROUND_GCC
+
+#define LOADQ_LANE_32b(VALUE, LANE) do {                             \
+  (VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE));   \
+  src += stride;                                                     \
+} while (0)
+
+static WEBP_INLINE void Load4x16(const uint8_t* src, int stride,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+  const uint32x4_t zero = vdupq_n_u32(0);
+  uint32x4x4_t in;
+  INIT_VECTOR4(in, zero, zero, zero, zero);
+  src -= 2;
+  LOADQ_LANE_32b(in.val[0], 0);
+  LOADQ_LANE_32b(in.val[1], 0);
+  LOADQ_LANE_32b(in.val[2], 0);
+  LOADQ_LANE_32b(in.val[3], 0);
+  LOADQ_LANE_32b(in.val[0], 1);
+  LOADQ_LANE_32b(in.val[1], 1);
+  LOADQ_LANE_32b(in.val[2], 1);
+  LOADQ_LANE_32b(in.val[3], 1);
+  LOADQ_LANE_32b(in.val[0], 2);
+  LOADQ_LANE_32b(in.val[1], 2);
+  LOADQ_LANE_32b(in.val[2], 2);
+  LOADQ_LANE_32b(in.val[3], 2);
+  LOADQ_LANE_32b(in.val[0], 3);
+  LOADQ_LANE_32b(in.val[1], 3);
+  LOADQ_LANE_32b(in.val[2], 3);
+  LOADQ_LANE_32b(in.val[3], 3);
+  // Transpose four 4x4 parts:
+  {
+    const uint8x16x2_t row01 = vtrnq_u8(vreinterpretq_u8_u32(in.val[0]),
+                                        vreinterpretq_u8_u32(in.val[1]));
+    const uint8x16x2_t row23 = vtrnq_u8(vreinterpretq_u8_u32(in.val[2]),
+                                        vreinterpretq_u8_u32(in.val[3]));
+    const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]),
+                                         vreinterpretq_u16_u8(row23.val[0]));
+    const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]),
+                                         vreinterpretq_u16_u8(row23.val[1]));
+    *p1 = vreinterpretq_u8_u16(row02.val[0]);
+    *p0 = vreinterpretq_u8_u16(row13.val[0]);
+    *q0 = vreinterpretq_u8_u16(row02.val[1]);
+    *q1 = vreinterpretq_u8_u16(row13.val[1]);
+  }
+}
+#undef LOADQ_LANE_32b
+
+#endif  // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Load8x16(const uint8_t* const src, int stride,
+                                 uint8x16_t* const p3, uint8x16_t* const p2,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1,
+                                 uint8x16_t* const q2, uint8x16_t* const q3) {
+  Load4x16(src - 2, stride, p3, p2, p1, p0);
+  Load4x16(src + 2, stride, q0, q1, q2, q3);
+}
+
+static WEBP_INLINE void Load16x4(const uint8_t* const src, int stride,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+  *p1 = vld1q_u8(src - 2 * stride);
+  *p0 = vld1q_u8(src - 1 * stride);
+  *q0 = vld1q_u8(src + 0 * stride);
+  *q1 = vld1q_u8(src + 1 * stride);
+}
+
+static WEBP_INLINE void Load16x8(const uint8_t* const src, int stride,
+                                 uint8x16_t* const p3, uint8x16_t* const p2,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1,
+                                 uint8x16_t* const q2, uint8x16_t* const q3) {
+  Load16x4(src - 2  * stride, stride, p3, p2, p1, p0);
+  Load16x4(src + 2  * stride, stride, q0, q1, q2, q3);
+}
+
+static WEBP_INLINE void Load8x8x2(const uint8_t* const u,
+                                  const uint8_t* const v,
+                                  int stride,
+                                  uint8x16_t* const p3, uint8x16_t* const p2,
+                                  uint8x16_t* const p1, uint8x16_t* const p0,
+                                  uint8x16_t* const q0, uint8x16_t* const q1,
+                                  uint8x16_t* const q2, uint8x16_t* const q3) {
+  // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
+  // and the v-samples on the higher half.
+  *p3 = vcombine_u8(vld1_u8(u - 4 * stride), vld1_u8(v - 4 * stride));
+  *p2 = vcombine_u8(vld1_u8(u - 3 * stride), vld1_u8(v - 3 * stride));
+  *p1 = vcombine_u8(vld1_u8(u - 2 * stride), vld1_u8(v - 2 * stride));
+  *p0 = vcombine_u8(vld1_u8(u - 1 * stride), vld1_u8(v - 1 * stride));
+  *q0 = vcombine_u8(vld1_u8(u + 0 * stride), vld1_u8(v + 0 * stride));
+  *q1 = vcombine_u8(vld1_u8(u + 1 * stride), vld1_u8(v + 1 * stride));
+  *q2 = vcombine_u8(vld1_u8(u + 2 * stride), vld1_u8(v + 2 * stride));
+  *q3 = vcombine_u8(vld1_u8(u + 3 * stride), vld1_u8(v + 3 * stride));
+}
+
+#if !defined(WORK_AROUND_GCC)
+
+#define LOAD_UV_8(ROW) \
+  vcombine_u8(vld1_u8(u - 4 + (ROW) * stride), vld1_u8(v - 4 + (ROW) * stride))
+
+static WEBP_INLINE void Load8x8x2T(const uint8_t* const u,
+                                   const uint8_t* const v,
+                                   int stride,
+                                   uint8x16_t* const p3, uint8x16_t* const p2,
+                                   uint8x16_t* const p1, uint8x16_t* const p0,
+                                   uint8x16_t* const q0, uint8x16_t* const q1,
+                                   uint8x16_t* const q2, uint8x16_t* const q3) {
+  // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
+  // and the v-samples on the higher half.
+  const uint8x16_t row0 = LOAD_UV_8(0);
+  const uint8x16_t row1 = LOAD_UV_8(1);
+  const uint8x16_t row2 = LOAD_UV_8(2);
+  const uint8x16_t row3 = LOAD_UV_8(3);
+  const uint8x16_t row4 = LOAD_UV_8(4);
+  const uint8x16_t row5 = LOAD_UV_8(5);
+  const uint8x16_t row6 = LOAD_UV_8(6);
+  const uint8x16_t row7 = LOAD_UV_8(7);
+  // Perform two side-by-side 8x8 transposes
+  // u00 u01 u02 u03 u04 u05 u06 u07 | v00 v01 v02 v03 v04 v05 v06 v07
+  // u10 u11 u12 u13 u14 u15 u16 u17 | v10 v11 v12 ...
+  // u20 u21 u22 u23 u24 u25 u26 u27 | v20 v21 ...
+  // u30 u31 u32 u33 u34 u35 u36 u37 | ...
+  // u40 u41 u42 u43 u44 u45 u46 u47 | ...
+  // u50 u51 u52 u53 u54 u55 u56 u57 | ...
+  // u60 u61 u62 u63 u64 u65 u66 u67 | v60 ...
+  // u70 u71 u72 u73 u74 u75 u76 u77 | v70 v71 v72 ...
+  const uint8x16x2_t row01 = vtrnq_u8(row0, row1);  // u00 u10 u02 u12 ...
+                                                    // u01 u11 u03 u13 ...
+  const uint8x16x2_t row23 = vtrnq_u8(row2, row3);  // u20 u30 u22 u32 ...
+                                                    // u21 u31 u23 u33 ...
+  const uint8x16x2_t row45 = vtrnq_u8(row4, row5);  // ...
+  const uint8x16x2_t row67 = vtrnq_u8(row6, row7);  // ...
+  const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]),
+                                       vreinterpretq_u16_u8(row23.val[0]));
+  const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]),
+                                       vreinterpretq_u16_u8(row23.val[1]));
+  const uint16x8x2_t row46 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[0]),
+                                       vreinterpretq_u16_u8(row67.val[0]));
+  const uint16x8x2_t row57 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[1]),
+                                       vreinterpretq_u16_u8(row67.val[1]));
+  const uint32x4x2_t row04 = vtrnq_u32(vreinterpretq_u32_u16(row02.val[0]),
+                                       vreinterpretq_u32_u16(row46.val[0]));
+  const uint32x4x2_t row26 = vtrnq_u32(vreinterpretq_u32_u16(row02.val[1]),
+                                       vreinterpretq_u32_u16(row46.val[1]));
+  const uint32x4x2_t row15 = vtrnq_u32(vreinterpretq_u32_u16(row13.val[0]),
+                                       vreinterpretq_u32_u16(row57.val[0]));
+  const uint32x4x2_t row37 = vtrnq_u32(vreinterpretq_u32_u16(row13.val[1]),
+                                       vreinterpretq_u32_u16(row57.val[1]));
+  *p3 = vreinterpretq_u8_u32(row04.val[0]);
+  *p2 = vreinterpretq_u8_u32(row15.val[0]);
+  *p1 = vreinterpretq_u8_u32(row26.val[0]);
+  *p0 = vreinterpretq_u8_u32(row37.val[0]);
+  *q0 = vreinterpretq_u8_u32(row04.val[1]);
+  *q1 = vreinterpretq_u8_u32(row15.val[1]);
+  *q2 = vreinterpretq_u8_u32(row26.val[1]);
+  *q3 = vreinterpretq_u8_u32(row37.val[1]);
+}
+#undef LOAD_UV_8
+
+#endif  // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Store2x8(const uint8x8x2_t v,
+                                 uint8_t* const dst, int stride) {
+  vst2_lane_u8(dst + 0 * stride, v, 0);
+  vst2_lane_u8(dst + 1 * stride, v, 1);
+  vst2_lane_u8(dst + 2 * stride, v, 2);
+  vst2_lane_u8(dst + 3 * stride, v, 3);
+  vst2_lane_u8(dst + 4 * stride, v, 4);
+  vst2_lane_u8(dst + 5 * stride, v, 5);
+  vst2_lane_u8(dst + 6 * stride, v, 6);
+  vst2_lane_u8(dst + 7 * stride, v, 7);
+}
+
+static WEBP_INLINE void Store2x16(const uint8x16_t p0, const uint8x16_t q0,
+                                  uint8_t* const dst, int stride) {
+  uint8x8x2_t lo, hi;
+  lo.val[0] = vget_low_u8(p0);
+  lo.val[1] = vget_low_u8(q0);
+  hi.val[0] = vget_high_u8(p0);
+  hi.val[1] = vget_high_u8(q0);
+  Store2x8(lo, dst - 1 + 0 * stride, stride);
+  Store2x8(hi, dst - 1 + 8 * stride, stride);
+}
+
+#if !defined(WORK_AROUND_GCC)
+static WEBP_INLINE void Store4x8(const uint8x8x4_t v,
+                                 uint8_t* const dst, int stride) {
+  vst4_lane_u8(dst + 0 * stride, v, 0);
+  vst4_lane_u8(dst + 1 * stride, v, 1);
+  vst4_lane_u8(dst + 2 * stride, v, 2);
+  vst4_lane_u8(dst + 3 * stride, v, 3);
+  vst4_lane_u8(dst + 4 * stride, v, 4);
+  vst4_lane_u8(dst + 5 * stride, v, 5);
+  vst4_lane_u8(dst + 6 * stride, v, 6);
+  vst4_lane_u8(dst + 7 * stride, v, 7);
+}
+
+static WEBP_INLINE void Store4x16(const uint8x16_t p1, const uint8x16_t p0,
+                                  const uint8x16_t q0, const uint8x16_t q1,
+                                  uint8_t* const dst, int stride) {
+  uint8x8x4_t lo, hi;
+  INIT_VECTOR4(lo,
+               vget_low_u8(p1), vget_low_u8(p0),
+               vget_low_u8(q0), vget_low_u8(q1));
+  INIT_VECTOR4(hi,
+               vget_high_u8(p1), vget_high_u8(p0),
+               vget_high_u8(q0), vget_high_u8(q1));
+  Store4x8(lo, dst - 2 + 0 * stride, stride);
+  Store4x8(hi, dst - 2 + 8 * stride, stride);
+}
+#endif  // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Store16x2(const uint8x16_t p0, const uint8x16_t q0,
+                                  uint8_t* const dst, int stride) {
+  vst1q_u8(dst - stride, p0);
+  vst1q_u8(dst, q0);
+}
+
+static WEBP_INLINE void Store16x4(const uint8x16_t p1, const uint8x16_t p0,
+                                  const uint8x16_t q0, const uint8x16_t q1,
+                                  uint8_t* const dst, int stride) {
+  Store16x2(p1, p0, dst - stride, stride);
+  Store16x2(q0, q1, dst + stride, stride);
+}
+
+static WEBP_INLINE void Store8x2x2(const uint8x16_t p0, const uint8x16_t q0,
+                                   uint8_t* const u, uint8_t* const v,
+                                   int stride) {
+  // p0 and q0 contain the u+v samples packed in low/high halves.
+  vst1_u8(u - stride, vget_low_u8(p0));
+  vst1_u8(u,          vget_low_u8(q0));
+  vst1_u8(v - stride, vget_high_u8(p0));
+  vst1_u8(v,          vget_high_u8(q0));
+}
+
+static WEBP_INLINE void Store8x4x2(const uint8x16_t p1, const uint8x16_t p0,
+                                   const uint8x16_t q0, const uint8x16_t q1,
+                                   uint8_t* const u, uint8_t* const v,
+                                   int stride) {
+  // The p1...q1 registers contain the u+v samples packed in low/high halves.
+  Store8x2x2(p1, p0, u - stride, v - stride, stride);
+  Store8x2x2(q0, q1, u + stride, v + stride, stride);
+}
+
+#if !defined(WORK_AROUND_GCC)
+
+#define STORE6_LANE(DST, VAL0, VAL1, LANE) do {   \
+  vst3_lane_u8((DST) - 3, (VAL0), (LANE));        \
+  vst3_lane_u8((DST) + 0, (VAL1), (LANE));        \
+  (DST) += stride;                                \
+} while (0)
+
+static WEBP_INLINE void Store6x8x2(const uint8x16_t p2, const uint8x16_t p1,
+                                   const uint8x16_t p0, const uint8x16_t q0,
+                                   const uint8x16_t q1, const uint8x16_t q2,
+                                   uint8_t* u, uint8_t* v,
+                                   int stride) {
+  uint8x8x3_t u0, u1, v0, v1;
+  INIT_VECTOR3(u0, vget_low_u8(p2), vget_low_u8(p1), vget_low_u8(p0));
+  INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
+  INIT_VECTOR3(v0, vget_high_u8(p2), vget_high_u8(p1), vget_high_u8(p0));
+  INIT_VECTOR3(v1, vget_high_u8(q0), vget_high_u8(q1), vget_high_u8(q2));
+  STORE6_LANE(u, u0, u1, 0);
+  STORE6_LANE(u, u0, u1, 1);
+  STORE6_LANE(u, u0, u1, 2);
+  STORE6_LANE(u, u0, u1, 3);
+  STORE6_LANE(u, u0, u1, 4);
+  STORE6_LANE(u, u0, u1, 5);
+  STORE6_LANE(u, u0, u1, 6);
+  STORE6_LANE(u, u0, u1, 7);
+  STORE6_LANE(v, v0, v1, 0);
+  STORE6_LANE(v, v0, v1, 1);
+  STORE6_LANE(v, v0, v1, 2);
+  STORE6_LANE(v, v0, v1, 3);
+  STORE6_LANE(v, v0, v1, 4);
+  STORE6_LANE(v, v0, v1, 5);
+  STORE6_LANE(v, v0, v1, 6);
+  STORE6_LANE(v, v0, v1, 7);
+}
+#undef STORE6_LANE
+
+static WEBP_INLINE void Store4x8x2(const uint8x16_t p1, const uint8x16_t p0,
+                                   const uint8x16_t q0, const uint8x16_t q1,
+                                   uint8_t* const u, uint8_t* const v,
+                                   int stride) {
+  uint8x8x4_t u0, v0;
+  INIT_VECTOR4(u0,
+               vget_low_u8(p1), vget_low_u8(p0),
+               vget_low_u8(q0), vget_low_u8(q1));
+  INIT_VECTOR4(v0,
+               vget_high_u8(p1), vget_high_u8(p0),
+               vget_high_u8(q0), vget_high_u8(q1));
+  vst4_lane_u8(u - 2 + 0 * stride, u0, 0);
+  vst4_lane_u8(u - 2 + 1 * stride, u0, 1);
+  vst4_lane_u8(u - 2 + 2 * stride, u0, 2);
+  vst4_lane_u8(u - 2 + 3 * stride, u0, 3);
+  vst4_lane_u8(u - 2 + 4 * stride, u0, 4);
+  vst4_lane_u8(u - 2 + 5 * stride, u0, 5);
+  vst4_lane_u8(u - 2 + 6 * stride, u0, 6);
+  vst4_lane_u8(u - 2 + 7 * stride, u0, 7);
+  vst4_lane_u8(v - 2 + 0 * stride, v0, 0);
+  vst4_lane_u8(v - 2 + 1 * stride, v0, 1);
+  vst4_lane_u8(v - 2 + 2 * stride, v0, 2);
+  vst4_lane_u8(v - 2 + 3 * stride, v0, 3);
+  vst4_lane_u8(v - 2 + 4 * stride, v0, 4);
+  vst4_lane_u8(v - 2 + 5 * stride, v0, 5);
+  vst4_lane_u8(v - 2 + 6 * stride, v0, 6);
+  vst4_lane_u8(v - 2 + 7 * stride, v0, 7);
+}
+
+#endif  // !WORK_AROUND_GCC
+
+// Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
+static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
+  return vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(v)));
+}
+
+// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
+// to the corresponding rows of 'dst'.
+static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
+                                            const int16x8_t dst01,
+                                            const int16x8_t dst23) {
+  // Unsigned saturate to 8b.
+  const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
+  const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
+
+  // Store the results.
+  vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0);
+  vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1);
+  vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0);
+  vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
+}
+
+static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
+                               uint8_t* const dst) {
+  uint32x2_t dst01 = vdup_n_u32(0);
+  uint32x2_t dst23 = vdup_n_u32(0);
+
+  // Load the source pixels.
+  dst01 = vld1_lane_u32((uint32_t*)(dst + 0 * BPS), dst01, 0);
+  dst23 = vld1_lane_u32((uint32_t*)(dst + 2 * BPS), dst23, 0);
+  dst01 = vld1_lane_u32((uint32_t*)(dst + 1 * BPS), dst01, 1);
+  dst23 = vld1_lane_u32((uint32_t*)(dst + 3 * BPS), dst23, 1);
+
+  {
+    // Convert to 16b.
+    const int16x8_t dst01_s16 = ConvertU8ToS16(dst01);
+    const int16x8_t dst23_s16 = ConvertU8ToS16(dst23);
+
+    // Descale with rounding.
+    const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
+    const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
+    // Add the inverse transform.
+    SaturateAndStore4x4(dst, out01, out23);
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static uint8x16_t NeedsFilter(const uint8x16_t p1, const uint8x16_t p0,
+                              const uint8x16_t q0, const uint8x16_t q1,
+                              int thresh) {
+  const uint8x16_t thresh_v = vdupq_n_u8((uint8_t)thresh);
+  const uint8x16_t a_p0_q0 = vabdq_u8(p0, q0);               // abs(p0-q0)
+  const uint8x16_t a_p1_q1 = vabdq_u8(p1, q1);               // abs(p1-q1)
+  const uint8x16_t a_p0_q0_2 = vqaddq_u8(a_p0_q0, a_p0_q0);  // 2 * abs(p0-q0)
+  const uint8x16_t a_p1_q1_2 = vshrq_n_u8(a_p1_q1, 1);       // abs(p1-q1) / 2
+  const uint8x16_t sum = vqaddq_u8(a_p0_q0_2, a_p1_q1_2);
+  const uint8x16_t mask = vcgeq_u8(thresh_v, sum);
+  return mask;
+}
+
+static int8x16_t FlipSign(const uint8x16_t v) {
+  const uint8x16_t sign_bit = vdupq_n_u8(0x80);
+  return vreinterpretq_s8_u8(veorq_u8(v, sign_bit));
+}
+
+static uint8x16_t FlipSignBack(const int8x16_t v) {
+  const int8x16_t sign_bit = vdupq_n_s8(0x80);
+  return vreinterpretq_u8_s8(veorq_s8(v, sign_bit));
+}
+
+static int8x16_t GetBaseDelta(const int8x16_t p1, const int8x16_t p0,
+                              const int8x16_t q0, const int8x16_t q1) {
+  const int8x16_t q0_p0 = vqsubq_s8(q0, p0);      // (q0-p0)
+  const int8x16_t p1_q1 = vqsubq_s8(p1, q1);      // (p1-q1)
+  const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0);   // (p1-q1) + 1 * (q0 - p0)
+  const int8x16_t s2 = vqaddq_s8(q0_p0, s1);      // (p1-q1) + 2 * (q0 - p0)
+  const int8x16_t s3 = vqaddq_s8(q0_p0, s2);      // (p1-q1) + 3 * (q0 - p0)
+  return s3;
+}
+
+static int8x16_t GetBaseDelta0(const int8x16_t p0, const int8x16_t q0) {
+  const int8x16_t q0_p0 = vqsubq_s8(q0, p0);      // (q0-p0)
+  const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0);   // 2 * (q0 - p0)
+  const int8x16_t s2 = vqaddq_s8(q0_p0, s1);      // 3 * (q0 - p0)
+  return s2;
+}
+
+//------------------------------------------------------------------------------
+
+static void ApplyFilter2(const int8x16_t p0s, const int8x16_t q0s,
+                         const int8x16_t delta,
+                         uint8x16_t* const op0, uint8x16_t* const oq0) {
+  const int8x16_t kCst3 = vdupq_n_s8(0x03);
+  const int8x16_t kCst4 = vdupq_n_s8(0x04);
+  const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
+  const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4);
+  const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3);
+  const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3);
+  const int8x16_t sp0 = vqaddq_s8(p0s, delta3);
+  const int8x16_t sq0 = vqsubq_s8(q0s, delta4);
+  *op0 = FlipSignBack(sp0);
+  *oq0 = FlipSignBack(sq0);
+}
+
+#if defined(USE_INTRINSICS)
+
+static void DoFilter2(const uint8x16_t p1, const uint8x16_t p0,
+                      const uint8x16_t q0, const uint8x16_t q1,
+                      const uint8x16_t mask,
+                      uint8x16_t* const op0, uint8x16_t* const oq0) {
+  const int8x16_t p1s = FlipSign(p1);
+  const int8x16_t p0s = FlipSign(p0);
+  const int8x16_t q0s = FlipSign(q0);
+  const int8x16_t q1s = FlipSign(q1);
+  const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
+  const int8x16_t delta1 = vandq_s8(delta0, vreinterpretq_s8_u8(mask));
+  ApplyFilter2(p0s, q0s, delta1, op0, oq0);
+}
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  uint8x16_t p1, p0, q0, q1, op0, oq0;
+  Load16x4(p, stride, &p1, &p0, &q0, &q1);
+  {
+    const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
+    DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
+  }
+  Store16x2(op0, oq0, p, stride);
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  uint8x16_t p1, p0, q0, q1, oq0, op0;
+  Load4x16(p, stride, &p1, &p0, &q0, &q1);
+  {
+    const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
+    DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
+  }
+  Store2x16(op0, oq0, p, stride);
+}
+
+#else
+
+#define QRegs "q0", "q1", "q2", "q3",                                          \
+              "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+
+#define FLIP_SIGN_BIT2(a, b, s)                                                \
+  "veor     " #a "," #a "," #s "               \n"                             \
+  "veor     " #b "," #b "," #s "               \n"                             \
+
+#define FLIP_SIGN_BIT4(a, b, c, d, s)                                          \
+  FLIP_SIGN_BIT2(a, b, s)                                                      \
+  FLIP_SIGN_BIT2(c, d, s)                                                      \
+
+#define NEEDS_FILTER(p1, p0, q0, q1, thresh, mask)                             \
+  "vabd.u8    q15," #p0 "," #q0 "         \n"  /* abs(p0 - q0) */              \
+  "vabd.u8    q14," #p1 "," #q1 "         \n"  /* abs(p1 - q1) */              \
+  "vqadd.u8   q15, q15, q15               \n"  /* abs(p0 - q0) * 2 */          \
+  "vshr.u8    q14, q14, #1                \n"  /* abs(p1 - q1) / 2 */          \
+  "vqadd.u8   q15, q15, q14     \n"  /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \
+  "vdup.8     q14, " #thresh "            \n"                                  \
+  "vcge.u8   " #mask ", q14, q15          \n"  /* mask <= thresh */
+
+#define GET_BASE_DELTA(p1, p0, q0, q1, o)                                      \
+  "vqsub.s8   q15," #q0 "," #p0 "         \n"  /* (q0 - p0) */                 \
+  "vqsub.s8  " #o "," #p1 "," #q1 "       \n"  /* (p1 - q1) */                 \
+  "vqadd.s8  " #o "," #o ", q15           \n"  /* (p1 - q1) + 1 * (p0 - q0) */ \
+  "vqadd.s8  " #o "," #o ", q15           \n"  /* (p1 - q1) + 2 * (p0 - q0) */ \
+  "vqadd.s8  " #o "," #o ", q15           \n"  /* (p1 - q1) + 3 * (p0 - q0) */
+
+#define DO_SIMPLE_FILTER(p0, q0, fl)                                           \
+  "vmov.i8    q15, #0x03                  \n"                                  \
+  "vqadd.s8   q15, q15, " #fl "           \n"  /* filter1 = filter + 3 */      \
+  "vshr.s8    q15, q15, #3                \n"  /* filter1 >> 3 */              \
+  "vqadd.s8  " #p0 "," #p0 ", q15         \n"  /* p0 += filter1 */             \
+                                                                               \
+  "vmov.i8    q15, #0x04                  \n"                                  \
+  "vqadd.s8   q15, q15, " #fl "           \n"  /* filter1 = filter + 4 */      \
+  "vshr.s8    q15, q15, #3                \n"  /* filter2 >> 3 */              \
+  "vqsub.s8  " #q0 "," #q0 ", q15         \n"  /* q0 -= filter2 */
+
+// Applies filter on 2 pixels (p0 and q0)
+#define DO_FILTER2(p1, p0, q0, q1, thresh)                                     \
+  NEEDS_FILTER(p1, p0, q0, q1, thresh, q9)     /* filter mask in q9 */         \
+  "vmov.i8    q10, #0x80                  \n"  /* sign bit */                  \
+  FLIP_SIGN_BIT4(p1, p0, q0, q1, q10)          /* convert to signed value */   \
+  GET_BASE_DELTA(p1, p0, q0, q1, q11)          /* get filter level  */         \
+  "vand       q9, q9, q11                 \n"  /* apply filter mask */         \
+  DO_SIMPLE_FILTER(p0, q0, q9)                 /* apply filter */              \
+  FLIP_SIGN_BIT2(p0, q0, q10)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  __asm__ volatile (
+    "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
+
+    "vld1.u8    {q1}, [%[p]], %[stride]        \n"  // p1
+    "vld1.u8    {q2}, [%[p]], %[stride]        \n"  // p0
+    "vld1.u8    {q3}, [%[p]], %[stride]        \n"  // q0
+    "vld1.u8    {q12}, [%[p]]                  \n"  // q1
+
+    DO_FILTER2(q1, q2, q3, q12, %[thresh])
+
+    "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
+
+    "vst1.u8    {q2}, [%[p]], %[stride]        \n"  // store op0
+    "vst1.u8    {q3}, [%[p]]                   \n"  // store oq0
+    : [p] "+r"(p)
+    : [stride] "r"(stride), [thresh] "r"(thresh)
+    : "memory", QRegs
+  );
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  __asm__ volatile (
+    "sub        r4, %[p], #2                   \n"  // base1 = p - 2
+    "lsl        r6, %[stride], #1              \n"  // r6 = 2 * stride
+    "add        r5, r4, %[stride]              \n"  // base2 = base1 + stride
+
+    LOAD8x4(d2, d3, d4, d5, [r4], [r5], r6)
+    LOAD8x4(d24, d25, d26, d27, [r4], [r5], r6)
+    "vswp       d3, d24                        \n"  // p1:q1 p0:q3
+    "vswp       d5, d26                        \n"  // q0:q2 q1:q4
+    "vswp       q2, q12                        \n"  // p1:q1 p0:q2 q0:q3 q1:q4
+
+    DO_FILTER2(q1, q2, q12, q13, %[thresh])
+
+    "sub        %[p], %[p], #1                 \n"  // p - 1
+
+    "vswp        d5, d24                       \n"
+    STORE8x2(d4, d5, [%[p]], %[stride])
+    STORE8x2(d24, d25, [%[p]], %[stride])
+
+    : [p] "+r"(p)
+    : [stride] "r"(stride), [thresh] "r"(thresh)
+    : "memory", "r4", "r5", "r6", QRegs
+  );
+}
+
+#endif    // USE_INTRINSICS
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  uint32_t k;
+  for (k = 3; k != 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  uint32_t k;
+  for (k = 3; k != 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+static uint8x16_t NeedsHev(const uint8x16_t p1, const uint8x16_t p0,
+                           const uint8x16_t q0, const uint8x16_t q1,
+                           int hev_thresh) {
+  const uint8x16_t hev_thresh_v = vdupq_n_u8((uint8_t)hev_thresh);
+  const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0);  // abs(p1 - p0)
+  const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0);  // abs(q1 - q0)
+  const uint8x16_t mask1 = vcgtq_u8(a_p1_p0, hev_thresh_v);
+  const uint8x16_t mask2 = vcgtq_u8(a_q1_q0, hev_thresh_v);
+  const uint8x16_t mask = vorrq_u8(mask1, mask2);
+  return mask;
+}
+
+static uint8x16_t NeedsFilter2(const uint8x16_t p3, const uint8x16_t p2,
+                               const uint8x16_t p1, const uint8x16_t p0,
+                               const uint8x16_t q0, const uint8x16_t q1,
+                               const uint8x16_t q2, const uint8x16_t q3,
+                               int ithresh, int thresh) {
+  const uint8x16_t ithresh_v = vdupq_n_u8((uint8_t)ithresh);
+  const uint8x16_t a_p3_p2 = vabdq_u8(p3, p2);  // abs(p3 - p2)
+  const uint8x16_t a_p2_p1 = vabdq_u8(p2, p1);  // abs(p2 - p1)
+  const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0);  // abs(p1 - p0)
+  const uint8x16_t a_q3_q2 = vabdq_u8(q3, q2);  // abs(q3 - q2)
+  const uint8x16_t a_q2_q1 = vabdq_u8(q2, q1);  // abs(q2 - q1)
+  const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0);  // abs(q1 - q0)
+  const uint8x16_t max1 = vmaxq_u8(a_p3_p2, a_p2_p1);
+  const uint8x16_t max2 = vmaxq_u8(a_p1_p0, a_q3_q2);
+  const uint8x16_t max3 = vmaxq_u8(a_q2_q1, a_q1_q0);
+  const uint8x16_t max12 = vmaxq_u8(max1, max2);
+  const uint8x16_t max123 = vmaxq_u8(max12, max3);
+  const uint8x16_t mask2 = vcgeq_u8(ithresh_v, max123);
+  const uint8x16_t mask1 = NeedsFilter(p1, p0, q0, q1, thresh);
+  const uint8x16_t mask = vandq_u8(mask1, mask2);
+  return mask;
+}
+
+//  4-points filter
+
+static void ApplyFilter4(
+    const int8x16_t p1, const int8x16_t p0,
+    const int8x16_t q0, const int8x16_t q1,
+    const int8x16_t delta0,
+    uint8x16_t* const op1, uint8x16_t* const op0,
+    uint8x16_t* const oq0, uint8x16_t* const oq1) {
+  const int8x16_t kCst3 = vdupq_n_s8(0x03);
+  const int8x16_t kCst4 = vdupq_n_s8(0x04);
+  const int8x16_t delta1 = vqaddq_s8(delta0, kCst4);
+  const int8x16_t delta2 = vqaddq_s8(delta0, kCst3);
+  const int8x16_t a1 = vshrq_n_s8(delta1, 3);
+  const int8x16_t a2 = vshrq_n_s8(delta2, 3);
+  const int8x16_t a3 = vrshrq_n_s8(a1, 1);   // a3 = (a1 + 1) >> 1
+  *op0 = FlipSignBack(vqaddq_s8(p0, a2));  // clip(p0 + a2)
+  *oq0 = FlipSignBack(vqsubq_s8(q0, a1));  // clip(q0 - a1)
+  *op1 = FlipSignBack(vqaddq_s8(p1, a3));  // clip(p1 + a3)
+  *oq1 = FlipSignBack(vqsubq_s8(q1, a3));  // clip(q1 - a3)
+}
+
+static void DoFilter4(
+    const uint8x16_t p1, const uint8x16_t p0,
+    const uint8x16_t q0, const uint8x16_t q1,
+    const uint8x16_t mask, const uint8x16_t hev_mask,
+    uint8x16_t* const op1, uint8x16_t* const op0,
+    uint8x16_t* const oq0, uint8x16_t* const oq1) {
+  // This is a fused version of DoFilter2() calling ApplyFilter2 directly
+  const int8x16_t p1s = FlipSign(p1);
+  int8x16_t p0s = FlipSign(p0);
+  int8x16_t q0s = FlipSign(q0);
+  const int8x16_t q1s = FlipSign(q1);
+  const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
+
+  // do_filter2 part (simple loopfilter on pixels with hev)
+  {
+    const int8x16_t delta = GetBaseDelta(p1s, p0s, q0s, q1s);
+    const int8x16_t simple_lf_delta =
+        vandq_s8(delta, vreinterpretq_s8_u8(simple_lf_mask));
+    uint8x16_t tmp_p0, tmp_q0;
+    ApplyFilter2(p0s, q0s, simple_lf_delta, &tmp_p0, &tmp_q0);
+    // TODO(skal): avoid the double FlipSign() in ApplyFilter2() and here
+    p0s = FlipSign(tmp_p0);
+    q0s = FlipSign(tmp_q0);
+  }
+
+  // do_filter4 part (complex loopfilter on pixels without hev)
+  {
+    const int8x16_t delta0 = GetBaseDelta0(p0s, q0s);
+    // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
+    const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
+    const int8x16_t complex_lf_delta =
+        vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
+    ApplyFilter4(p1s, p0s, q0s, q1s, complex_lf_delta, op1, op0, oq0, oq1);
+  }
+}
+
+//  6-points filter
+
+static void ApplyFilter6(
+    const int8x16_t p2, const int8x16_t p1, const int8x16_t p0,
+    const int8x16_t q0, const int8x16_t q1, const int8x16_t q2,
+    const int8x16_t delta,
+    uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
+    uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
+  const int16x8_t kCst63 = vdupq_n_s16(63);
+  const int8x8_t kCst27 = vdup_n_s8(27);
+  const int8x8_t kCst18 = vdup_n_s8(18);
+  const int8x8_t kCst9 = vdup_n_s8(9);
+  const int8x8_t delta_lo = vget_low_s8(delta);
+  const int8x8_t delta_hi = vget_high_s8(delta);
+  const int16x8_t s1_lo = vmlal_s8(kCst63, kCst27, delta_lo);  // 63 + 27 * a
+  const int16x8_t s1_hi = vmlal_s8(kCst63, kCst27, delta_hi);  // 63 + 27 * a
+  const int16x8_t s2_lo = vmlal_s8(kCst63, kCst18, delta_lo);  // 63 + 18 * a
+  const int16x8_t s2_hi = vmlal_s8(kCst63, kCst18, delta_hi);  // 63 + 18 * a
+  const int16x8_t s3_lo = vmlal_s8(kCst63, kCst9, delta_lo);   // 63 + 9 * a
+  const int16x8_t s3_hi = vmlal_s8(kCst63, kCst9, delta_hi);   // 63 + 9 * a
+  const int8x8_t a1_lo = vqshrn_n_s16(s1_lo, 7);
+  const int8x8_t a1_hi = vqshrn_n_s16(s1_hi, 7);
+  const int8x8_t a2_lo = vqshrn_n_s16(s2_lo, 7);
+  const int8x8_t a2_hi = vqshrn_n_s16(s2_hi, 7);
+  const int8x8_t a3_lo = vqshrn_n_s16(s3_lo, 7);
+  const int8x8_t a3_hi = vqshrn_n_s16(s3_hi, 7);
+  const int8x16_t a1 = vcombine_s8(a1_lo, a1_hi);
+  const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi);
+  const int8x16_t a3 = vcombine_s8(a3_lo, a3_hi);
+
+  *op0 = FlipSignBack(vqaddq_s8(p0, a1));  // clip(p0 + a1)
+  *oq0 = FlipSignBack(vqsubq_s8(q0, a1));  // clip(q0 - q1)
+  *oq1 = FlipSignBack(vqsubq_s8(q1, a2));  // clip(q1 - a2)
+  *op1 = FlipSignBack(vqaddq_s8(p1, a2));  // clip(p1 + a2)
+  *oq2 = FlipSignBack(vqsubq_s8(q2, a3));  // clip(q2 - a3)
+  *op2 = FlipSignBack(vqaddq_s8(p2, a3));  // clip(p2 + a3)
+}
+
+static void DoFilter6(
+    const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
+    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
+    const uint8x16_t mask, const uint8x16_t hev_mask,
+    uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
+    uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
+  // This is a fused version of DoFilter2() calling ApplyFilter2 directly
+  const int8x16_t p2s = FlipSign(p2);
+  const int8x16_t p1s = FlipSign(p1);
+  int8x16_t p0s = FlipSign(p0);
+  int8x16_t q0s = FlipSign(q0);
+  const int8x16_t q1s = FlipSign(q1);
+  const int8x16_t q2s = FlipSign(q2);
+  const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
+  const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
+
+  // do_filter2 part (simple loopfilter on pixels with hev)
+  {
+    const int8x16_t simple_lf_delta =
+        vandq_s8(delta0, vreinterpretq_s8_u8(simple_lf_mask));
+    uint8x16_t tmp_p0, tmp_q0;
+    ApplyFilter2(p0s, q0s, simple_lf_delta, &tmp_p0, &tmp_q0);
+    // TODO(skal): avoid the double FlipSign() in ApplyFilter2() and here
+    p0s = FlipSign(tmp_p0);
+    q0s = FlipSign(tmp_q0);
+  }
+
+  // do_filter6 part (complex loopfilter on pixels without hev)
+  {
+    // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
+    const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
+    const int8x16_t complex_lf_delta =
+        vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
+    ApplyFilter6(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta,
+                 op2, op1, op0, oq0, oq1, oq2);
+  }
+}
+
+// on macroblock edges
+
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  Load16x8(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+              &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store16x2(op2, op1, p - 2 * stride, stride);
+    Store16x2(op0, oq0, p + 0 * stride, stride);
+    Store16x2(oq1, oq2, p + 2 * stride, stride);
+  }
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  Load8x16(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+              &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store2x16(op2, op1, p - 2, stride);
+    Store2x16(op0, oq0, p + 0, stride);
+    Store2x16(oq1, oq2, p + 2, stride);
+  }
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  uint32_t k;
+  uint8x16_t p3, p2, p1, p0;
+  Load16x4(p + 2  * stride, stride, &p3, &p2, &p1, &p0);
+  for (k = 3; k != 0; --k) {
+    uint8x16_t q0, q1, q2, q3;
+    p += 4 * stride;
+    Load16x4(p + 2  * stride, stride, &q0, &q1, &q2, &q3);
+    {
+      const uint8x16_t mask =
+          NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+      const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+      // p3 and p2 are not just temporary variables here: they will be
+      // re-used for next span. And q2/q3 will become p1/p0 accordingly.
+      DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
+      Store16x4(p1, p0, p3, p2, p, stride);
+      p1 = q2;
+      p0 = q3;
+    }
+  }
+}
+
+#if !defined(WORK_AROUND_GCC)
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  uint32_t k;
+  uint8x16_t p3, p2, p1, p0;
+  Load4x16(p + 2, stride, &p3, &p2, &p1, &p0);
+  for (k = 3; k != 0; --k) {
+    uint8x16_t q0, q1, q2, q3;
+    p += 4;
+    Load4x16(p + 2, stride, &q0, &q1, &q2, &q3);
+    {
+      const uint8x16_t mask =
+          NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+      const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+      DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
+      Store4x16(p1, p0, p3, p2, p, stride);
+      p1 = q2;
+      p0 = q3;
+    }
+  }
+}
+#endif  // !WORK_AROUND_GCC
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+              &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store8x2x2(op2, op1, u - 2 * stride, v - 2 * stride, stride);
+    Store8x2x2(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
+    Store8x2x2(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
+  }
+}
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  u += 4 * stride;
+  v += 4 * stride;
+  Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op1, op0, oq0, oq1;
+    DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
+    Store8x4x2(op1, op0, oq0, oq1, u, v, stride);
+  }
+}
+
+#if !defined(WORK_AROUND_GCC)
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+              &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store6x8x2(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
+  }
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  u += 4;
+  v += 4;
+  Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op1, op0, oq0, oq1;
+    DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
+    Store4x8x2(op1, op0, oq0, oq1, u, v, stride);
+  }
+}
+#endif  // !WORK_AROUND_GCC
+
+//-----------------------------------------------------------------------------
+// Inverse transforms (Paragraph 14.4)
+
+// Technically these are unsigned but vqdmulh is only available in signed.
+// vqdmulh returns high half (effectively >> 16) but also doubles the value,
+// changing the >> 16 to >> 15 and requiring an additional >> 1.
+// We use this to our advantage with kC2. The canonical value is 35468.
+// However, the high bit is set so treating it as signed will give incorrect
+// results. We avoid this by down shifting by 1 here to clear the highest bit.
+// Combined with the doubling effect of vqdmulh we get >> 16.
+// This can not be applied to kC1 because the lowest bit is set. Down shifting
+// the constant would reduce precision.
+
+// libwebp uses a trick to avoid some extra addition that libvpx does.
+// Instead of:
+// temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+// libwebp adds 1 << 16 to cospi8sqrt2minus1 (kC1). However, this causes the
+// same issue with kC1 and vqdmulh that we work around by down shifting kC2
+
+static const int16_t kC1 = 20091;
+static const int16_t kC2 = 17734;  // half of kC2, actually. See comment above.
+
+#if defined(USE_INTRINSICS)
+static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
+                                     int16x8x2_t* const out) {
+  // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
+  // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
+  const int16x8x2_t tmp0 = vzipq_s16(in0, in1);   // a0 c0 a1 c1 a2 c2 ...
+                                                  // b0 d0 b1 d1 b2 d2 ...
+  *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
+}
+
+static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
+  // {rows} = in0 | in4
+  //          in8 | in12
+  // B1 = in4 | in12
+  const int16x8_t B1 =
+      vcombine_s16(vget_high_s16(rows->val[0]), vget_high_s16(rows->val[1]));
+  // C0 = kC1 * in4 | kC1 * in12
+  // C1 = kC2 * in4 | kC2 * in12
+  const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
+  const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
+  const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
+                                vget_low_s16(rows->val[1]));   // in0 + in8
+  const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
+                                vget_low_s16(rows->val[1]));   // in0 - in8
+  // c = kC2 * in4 - kC1 * in12
+  // d = kC1 * in4 + kC2 * in12
+  const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
+  const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
+  const int16x8_t D0 = vcombine_s16(a, b);      // D0 = a | b
+  const int16x8_t D1 = vcombine_s16(d, c);      // D1 = d | c
+  const int16x8_t E0 = vqaddq_s16(D0, D1);      // a+d | b+c
+  const int16x8_t E_tmp = vqsubq_s16(D0, D1);   // a-d | b-c
+  const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
+  Transpose8x2(E0, E1, rows);
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int16x8x2_t rows;
+  INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
+  TransformPass(&rows);
+  TransformPass(&rows);
+  Add4x4(rows.val[0], rows.val[1], dst);
+}
+
+#else
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  const int kBPS = BPS;
+  // kC1, kC2. Padded because vld1.16 loads 8 bytes
+  const int16_t constants[4] = { kC1, kC2, 0, 0 };
+  /* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
+  __asm__ volatile (
+    "vld1.16         {q1, q2}, [%[in]]           \n"
+    "vld1.16         {d0}, [%[constants]]        \n"
+
+    /* d2: in[0]
+     * d3: in[8]
+     * d4: in[4]
+     * d5: in[12]
+     */
+    "vswp            d3, d4                      \n"
+
+    /* q8 = {in[4], in[12]} * kC1 * 2 >> 16
+     * q9 = {in[4], in[12]} * kC2 >> 16
+     */
+    "vqdmulh.s16     q8, q2, d0[0]               \n"
+    "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+    /* d22 = a = in[0] + in[8]
+     * d23 = b = in[0] - in[8]
+     */
+    "vqadd.s16       d22, d2, d3                 \n"
+    "vqsub.s16       d23, d2, d3                 \n"
+
+    /* The multiplication should be x * kC1 >> 16
+     * However, with vqdmulh we get x * kC1 * 2 >> 16
+     * (multiply, double, return high half)
+     * We avoided this in kC2 by pre-shifting the constant.
+     * q8 = in[4]/[12] * kC1 >> 16
+     */
+    "vshr.s16        q8, q8, #1                  \n"
+
+    /* Add {in[4], in[12]} back after the multiplication. This is handled by
+     * adding 1 << 16 to kC1 in the libwebp C code.
+     */
+    "vqadd.s16       q8, q2, q8                  \n"
+
+    /* d20 = c = in[4]*kC2 - in[12]*kC1
+     * d21 = d = in[4]*kC1 + in[12]*kC2
+     */
+    "vqsub.s16       d20, d18, d17               \n"
+    "vqadd.s16       d21, d19, d16               \n"
+
+    /* d2 = tmp[0] = a + d
+     * d3 = tmp[1] = b + c
+     * d4 = tmp[2] = b - c
+     * d5 = tmp[3] = a - d
+     */
+    "vqadd.s16       d2, d22, d21                \n"
+    "vqadd.s16       d3, d23, d20                \n"
+    "vqsub.s16       d4, d23, d20                \n"
+    "vqsub.s16       d5, d22, d21                \n"
+
+    "vzip.16         q1, q2                      \n"
+    "vzip.16         q1, q2                      \n"
+
+    "vswp            d3, d4                      \n"
+
+    /* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+     * q9 = {tmp[4], tmp[12]} * kC2 >> 16
+     */
+    "vqdmulh.s16     q8, q2, d0[0]               \n"
+    "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+    /* d22 = a = tmp[0] + tmp[8]
+     * d23 = b = tmp[0] - tmp[8]
+     */
+    "vqadd.s16       d22, d2, d3                 \n"
+    "vqsub.s16       d23, d2, d3                 \n"
+
+    /* See long winded explanations prior */
+    "vshr.s16        q8, q8, #1                  \n"
+    "vqadd.s16       q8, q2, q8                  \n"
+
+    /* d20 = c = in[4]*kC2 - in[12]*kC1
+     * d21 = d = in[4]*kC1 + in[12]*kC2
+     */
+    "vqsub.s16       d20, d18, d17               \n"
+    "vqadd.s16       d21, d19, d16               \n"
+
+    /* d2 = tmp[0] = a + d
+     * d3 = tmp[1] = b + c
+     * d4 = tmp[2] = b - c
+     * d5 = tmp[3] = a - d
+     */
+    "vqadd.s16       d2, d22, d21                \n"
+    "vqadd.s16       d3, d23, d20                \n"
+    "vqsub.s16       d4, d23, d20                \n"
+    "vqsub.s16       d5, d22, d21                \n"
+
+    "vld1.32         d6[0], [%[dst]], %[kBPS]    \n"
+    "vld1.32         d6[1], [%[dst]], %[kBPS]    \n"
+    "vld1.32         d7[0], [%[dst]], %[kBPS]    \n"
+    "vld1.32         d7[1], [%[dst]], %[kBPS]    \n"
+
+    "sub         %[dst], %[dst], %[kBPS], lsl #2 \n"
+
+    /* (val) + 4 >> 3 */
+    "vrshr.s16       d2, d2, #3                  \n"
+    "vrshr.s16       d3, d3, #3                  \n"
+    "vrshr.s16       d4, d4, #3                  \n"
+    "vrshr.s16       d5, d5, #3                  \n"
+
+    "vzip.16         q1, q2                      \n"
+    "vzip.16         q1, q2                      \n"
+
+    /* Must accumulate before saturating */
+    "vmovl.u8        q8, d6                      \n"
+    "vmovl.u8        q9, d7                      \n"
+
+    "vqadd.s16       q1, q1, q8                  \n"
+    "vqadd.s16       q2, q2, q9                  \n"
+
+    "vqmovun.s16     d0, q1                      \n"
+    "vqmovun.s16     d1, q2                      \n"
+
+    "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d1[1], [%[dst]]             \n"
+
+    : [in] "+r"(in), [dst] "+r"(dst)  /* modified registers */
+    : [kBPS] "r"(kBPS), [constants] "r"(constants)  /* constants */
+    : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11"  /* clobbered */
+  );
+}
+
+#endif    // USE_INTRINSICS
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
+  if (do_two) {
+    TransformOne(in + 16, dst + 4);
+  }
+}
+
+static void TransformDC(const int16_t* in, uint8_t* dst) {
+  const int16x8_t DC = vdupq_n_s16(in[0]);
+  Add4x4(DC, DC, dst);
+}
+
+//------------------------------------------------------------------------------
+
+#define STORE_WHT(dst, col, rows) do {                  \
+  *dst = vgetq_lane_s32(rows.val[0], col); (dst) += 16; \
+  *dst = vgetq_lane_s32(rows.val[1], col); (dst) += 16; \
+  *dst = vgetq_lane_s32(rows.val[2], col); (dst) += 16; \
+  *dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
+} while (0)
+
+static void TransformWHT(const int16_t* in, int16_t* out) {
+  int32x4x4_t tmp;
+
+  {
+    // Load the source.
+    const int16x4_t in00_03 = vld1_s16(in + 0);
+    const int16x4_t in04_07 = vld1_s16(in + 4);
+    const int16x4_t in08_11 = vld1_s16(in + 8);
+    const int16x4_t in12_15 = vld1_s16(in + 12);
+    const int32x4_t a0 = vaddl_s16(in00_03, in12_15);  // in[0..3] + in[12..15]
+    const int32x4_t a1 = vaddl_s16(in04_07, in08_11);  // in[4..7] + in[8..11]
+    const int32x4_t a2 = vsubl_s16(in04_07, in08_11);  // in[4..7] - in[8..11]
+    const int32x4_t a3 = vsubl_s16(in00_03, in12_15);  // in[0..3] - in[12..15]
+    tmp.val[0] = vaddq_s32(a0, a1);
+    tmp.val[1] = vaddq_s32(a3, a2);
+    tmp.val[2] = vsubq_s32(a0, a1);
+    tmp.val[3] = vsubq_s32(a3, a2);
+    // Arrange the temporary results column-wise.
+    tmp = Transpose4x4(tmp);
+  }
+
+  {
+    const int32x4_t kCst3 = vdupq_n_s32(3);
+    const int32x4_t dc = vaddq_s32(tmp.val[0], kCst3);  // add rounder
+    const int32x4_t a0 = vaddq_s32(dc, tmp.val[3]);
+    const int32x4_t a1 = vaddq_s32(tmp.val[1], tmp.val[2]);
+    const int32x4_t a2 = vsubq_s32(tmp.val[1], tmp.val[2]);
+    const int32x4_t a3 = vsubq_s32(dc, tmp.val[3]);
+
+    tmp.val[0] = vaddq_s32(a0, a1);
+    tmp.val[1] = vaddq_s32(a3, a2);
+    tmp.val[2] = vsubq_s32(a0, a1);
+    tmp.val[3] = vsubq_s32(a3, a2);
+
+    // right shift the results by 3.
+    tmp.val[0] = vshrq_n_s32(tmp.val[0], 3);
+    tmp.val[1] = vshrq_n_s32(tmp.val[1], 3);
+    tmp.val[2] = vshrq_n_s32(tmp.val[2], 3);
+    tmp.val[3] = vshrq_n_s32(tmp.val[3], 3);
+
+    STORE_WHT(out, 0, tmp);
+    STORE_WHT(out, 1, tmp);
+    STORE_WHT(out, 2, tmp);
+    STORE_WHT(out, 3, tmp);
+  }
+}
+
+#undef STORE_WHT
+
+//------------------------------------------------------------------------------
+
+#define MUL(a, b) (((a) * (b)) >> 16)
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+  static const int kC1_full = 20091 + (1 << 16);
+  static const int kC2_full = 35468;
+  const int16x4_t A = vdup_n_s16(in[0]);
+  const int16x4_t c4 = vdup_n_s16(MUL(in[4], kC2_full));
+  const int16x4_t d4 = vdup_n_s16(MUL(in[4], kC1_full));
+  const int c1 = MUL(in[1], kC2_full);
+  const int d1 = MUL(in[1], kC1_full);
+  const uint64_t cd = (uint64_t)( d1 & 0xffff) <<  0 |
+                      (uint64_t)( c1 & 0xffff) << 16 |
+                      (uint64_t)(-c1 & 0xffff) << 32 |
+                      (uint64_t)(-d1 & 0xffff) << 48;
+  const int16x4_t CD = vcreate_s16(cd);
+  const int16x4_t B = vqadd_s16(A, CD);
+  const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4));
+  const int16x8_t m2_m3 = vcombine_s16(vqsub_s16(B, c4), vqsub_s16(B, d4));
+  Add4x4(m0_m1, m2_m3, dst);
+}
+#undef MUL
+
+#endif   // WEBP_USE_NEON
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitNEON(void);
+
+void VP8DspInitNEON(void) {
+#if defined(WEBP_USE_NEON)
+  VP8Transform = TransformTwo;
+  VP8TransformAC3 = TransformAC3;
+  VP8TransformDC = TransformDC;
+  VP8TransformWHT = TransformWHT;
+
+  VP8VFilter16 = VFilter16;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16 = HFilter16;
+#if !defined(WORK_AROUND_GCC)
+  VP8HFilter16i = HFilter16i;
+#endif
+  VP8VFilter8 = VFilter8;
+  VP8VFilter8i = VFilter8i;
+#if !defined(WORK_AROUND_GCC)
+  VP8HFilter8 = HFilter8;
+  VP8HFilter8i = HFilter8i;
+#endif
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+#endif   // WEBP_USE_NEON
+}
diff --git a/src/main/jni/libwebp/dsp/dec_sse2.c b/src/main/jni/libwebp/dsp/dec_sse2.c
new file mode 100644
index 000000000..c37a637f5
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/dec_sse2.c
@@ -0,0 +1,978 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of some decoding functions (idct, loop filtering).
+//
+// Author: somnath@google.com (Somnath Banerjee)
+//         cduvivier@google.com (Christian Duvivier)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+// The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
+// one it seems => disable it by default. Uncomment the following to enable:
+// #define USE_TRANSFORM_AC3
+
+#include <emmintrin.h>
+#include "../dec/vp8i.h"
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
+  // This implementation makes use of 16-bit fixed point versions of two
+  // multiply constants:
+  //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
+  //    K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
+  //
+  // To be able to use signed 16-bit integers, we use the following trick to
+  // have constants within range:
+  // - Associated constants are obtained by subtracting the 16-bit fixed point
+  //   version of one:
+  //      k = K - (1 << 16)  =>  K = k + (1 << 16)
+  //      K1 = 85267  =>  k1 =  20091
+  //      K2 = 35468  =>  k2 = -30068
+  // - The multiplication of a variable by a constant become the sum of the
+  //   variable and the multiplication of that variable by the associated
+  //   constant:
+  //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
+  const __m128i k1 = _mm_set1_epi16(20091);
+  const __m128i k2 = _mm_set1_epi16(-30068);
+  __m128i T0, T1, T2, T3;
+
+  // Load and concatenate the transform coefficients (we'll do two transforms
+  // in parallel). In the case of only one transform, the second half of the
+  // vectors will just contain random value we'll never use nor store.
+  __m128i in0, in1, in2, in3;
+  {
+    in0 = _mm_loadl_epi64((__m128i*)&in[0]);
+    in1 = _mm_loadl_epi64((__m128i*)&in[4]);
+    in2 = _mm_loadl_epi64((__m128i*)&in[8]);
+    in3 = _mm_loadl_epi64((__m128i*)&in[12]);
+    // a00 a10 a20 a30   x x x x
+    // a01 a11 a21 a31   x x x x
+    // a02 a12 a22 a32   x x x x
+    // a03 a13 a23 a33   x x x x
+    if (do_two) {
+      const __m128i inB0 = _mm_loadl_epi64((__m128i*)&in[16]);
+      const __m128i inB1 = _mm_loadl_epi64((__m128i*)&in[20]);
+      const __m128i inB2 = _mm_loadl_epi64((__m128i*)&in[24]);
+      const __m128i inB3 = _mm_loadl_epi64((__m128i*)&in[28]);
+      in0 = _mm_unpacklo_epi64(in0, inB0);
+      in1 = _mm_unpacklo_epi64(in1, inB1);
+      in2 = _mm_unpacklo_epi64(in2, inB2);
+      in3 = _mm_unpacklo_epi64(in3, inB3);
+      // a00 a10 a20 a30   b00 b10 b20 b30
+      // a01 a11 a21 a31   b01 b11 b21 b31
+      // a02 a12 a22 a32   b02 b12 b22 b32
+      // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+  }
+
+  // Vertical pass and subsequent transpose.
+  {
+    // First pass, c and d calculations are longer because of the "trick"
+    // multiplications.
+    const __m128i a = _mm_add_epi16(in0, in2);
+    const __m128i b = _mm_sub_epi16(in0, in2);
+    // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
+    const __m128i c1 = _mm_mulhi_epi16(in1, k2);
+    const __m128i c2 = _mm_mulhi_epi16(in3, k1);
+    const __m128i c3 = _mm_sub_epi16(in1, in3);
+    const __m128i c4 = _mm_sub_epi16(c1, c2);
+    const __m128i c = _mm_add_epi16(c3, c4);
+    // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
+    const __m128i d1 = _mm_mulhi_epi16(in1, k1);
+    const __m128i d2 = _mm_mulhi_epi16(in3, k2);
+    const __m128i d3 = _mm_add_epi16(in1, in3);
+    const __m128i d4 = _mm_add_epi16(d1, d2);
+    const __m128i d = _mm_add_epi16(d3, d4);
+
+    // Second pass.
+    const __m128i tmp0 = _mm_add_epi16(a, d);
+    const __m128i tmp1 = _mm_add_epi16(b, c);
+    const __m128i tmp2 = _mm_sub_epi16(b, c);
+    const __m128i tmp3 = _mm_sub_epi16(a, d);
+
+    // Transpose the two 4x4.
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(tmp0, tmp1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(tmp2, tmp3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(tmp0, tmp1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(tmp2, tmp3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Horizontal pass and subsequent transpose.
+  {
+    // First pass, c and d calculations are longer because of the "trick"
+    // multiplications.
+    const __m128i four = _mm_set1_epi16(4);
+    const __m128i dc = _mm_add_epi16(T0, four);
+    const __m128i a =  _mm_add_epi16(dc, T2);
+    const __m128i b =  _mm_sub_epi16(dc, T2);
+    // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
+    const __m128i c1 = _mm_mulhi_epi16(T1, k2);
+    const __m128i c2 = _mm_mulhi_epi16(T3, k1);
+    const __m128i c3 = _mm_sub_epi16(T1, T3);
+    const __m128i c4 = _mm_sub_epi16(c1, c2);
+    const __m128i c = _mm_add_epi16(c3, c4);
+    // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
+    const __m128i d1 = _mm_mulhi_epi16(T1, k1);
+    const __m128i d2 = _mm_mulhi_epi16(T3, k2);
+    const __m128i d3 = _mm_add_epi16(T1, T3);
+    const __m128i d4 = _mm_add_epi16(d1, d2);
+    const __m128i d = _mm_add_epi16(d3, d4);
+
+    // Second pass.
+    const __m128i tmp0 = _mm_add_epi16(a, d);
+    const __m128i tmp1 = _mm_add_epi16(b, c);
+    const __m128i tmp2 = _mm_sub_epi16(b, c);
+    const __m128i tmp3 = _mm_sub_epi16(a, d);
+    const __m128i shifted0 = _mm_srai_epi16(tmp0, 3);
+    const __m128i shifted1 = _mm_srai_epi16(tmp1, 3);
+    const __m128i shifted2 = _mm_srai_epi16(tmp2, 3);
+    const __m128i shifted3 = _mm_srai_epi16(tmp3, 3);
+
+    // Transpose the two 4x4.
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(shifted0, shifted1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(shifted2, shifted3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(shifted0, shifted1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(shifted2, shifted3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Add inverse transform to 'dst' and store.
+  {
+    const __m128i zero = _mm_setzero_si128();
+    // Load the reference(s).
+    __m128i dst0, dst1, dst2, dst3;
+    if (do_two) {
+      // Load eight bytes/pixels per line.
+      dst0 = _mm_loadl_epi64((__m128i*)(dst + 0 * BPS));
+      dst1 = _mm_loadl_epi64((__m128i*)(dst + 1 * BPS));
+      dst2 = _mm_loadl_epi64((__m128i*)(dst + 2 * BPS));
+      dst3 = _mm_loadl_epi64((__m128i*)(dst + 3 * BPS));
+    } else {
+      // Load four bytes/pixels per line.
+      dst0 = _mm_cvtsi32_si128(*(int*)(dst + 0 * BPS));
+      dst1 = _mm_cvtsi32_si128(*(int*)(dst + 1 * BPS));
+      dst2 = _mm_cvtsi32_si128(*(int*)(dst + 2 * BPS));
+      dst3 = _mm_cvtsi32_si128(*(int*)(dst + 3 * BPS));
+    }
+    // Convert to 16b.
+    dst0 = _mm_unpacklo_epi8(dst0, zero);
+    dst1 = _mm_unpacklo_epi8(dst1, zero);
+    dst2 = _mm_unpacklo_epi8(dst2, zero);
+    dst3 = _mm_unpacklo_epi8(dst3, zero);
+    // Add the inverse transform(s).
+    dst0 = _mm_add_epi16(dst0, T0);
+    dst1 = _mm_add_epi16(dst1, T1);
+    dst2 = _mm_add_epi16(dst2, T2);
+    dst3 = _mm_add_epi16(dst3, T3);
+    // Unsigned saturate to 8b.
+    dst0 = _mm_packus_epi16(dst0, dst0);
+    dst1 = _mm_packus_epi16(dst1, dst1);
+    dst2 = _mm_packus_epi16(dst2, dst2);
+    dst3 = _mm_packus_epi16(dst3, dst3);
+    // Store the results.
+    if (do_two) {
+      // Store eight bytes/pixels per line.
+      _mm_storel_epi64((__m128i*)(dst + 0 * BPS), dst0);
+      _mm_storel_epi64((__m128i*)(dst + 1 * BPS), dst1);
+      _mm_storel_epi64((__m128i*)(dst + 2 * BPS), dst2);
+      _mm_storel_epi64((__m128i*)(dst + 3 * BPS), dst3);
+    } else {
+      // Store four bytes/pixels per line.
+      *(int*)(dst + 0 * BPS) = _mm_cvtsi128_si32(dst0);
+      *(int*)(dst + 1 * BPS) = _mm_cvtsi128_si32(dst1);
+      *(int*)(dst + 2 * BPS) = _mm_cvtsi128_si32(dst2);
+      *(int*)(dst + 3 * BPS) = _mm_cvtsi128_si32(dst3);
+    }
+  }
+}
+
+#if defined(USE_TRANSFORM_AC3)
+#define MUL(a, b) (((a) * (b)) >> 16)
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+  static const int kC1 = 20091 + (1 << 16);
+  static const int kC2 = 35468;
+  const __m128i A = _mm_set1_epi16(in[0] + 4);
+  const __m128i c4 = _mm_set1_epi16(MUL(in[4], kC2));
+  const __m128i d4 = _mm_set1_epi16(MUL(in[4], kC1));
+  const int c1 = MUL(in[1], kC2);
+  const int d1 = MUL(in[1], kC1);
+  const __m128i CD = _mm_set_epi16(0, 0, 0, 0, -d1, -c1, c1, d1);
+  const __m128i B = _mm_adds_epi16(A, CD);
+  const __m128i m0 = _mm_adds_epi16(B, d4);
+  const __m128i m1 = _mm_adds_epi16(B, c4);
+  const __m128i m2 = _mm_subs_epi16(B, c4);
+  const __m128i m3 = _mm_subs_epi16(B, d4);
+  const __m128i zero = _mm_setzero_si128();
+  // Load the source pixels.
+  __m128i dst0 = _mm_cvtsi32_si128(*(int*)(dst + 0 * BPS));
+  __m128i dst1 = _mm_cvtsi32_si128(*(int*)(dst + 1 * BPS));
+  __m128i dst2 = _mm_cvtsi32_si128(*(int*)(dst + 2 * BPS));
+  __m128i dst3 = _mm_cvtsi32_si128(*(int*)(dst + 3 * BPS));
+  // Convert to 16b.
+  dst0 = _mm_unpacklo_epi8(dst0, zero);
+  dst1 = _mm_unpacklo_epi8(dst1, zero);
+  dst2 = _mm_unpacklo_epi8(dst2, zero);
+  dst3 = _mm_unpacklo_epi8(dst3, zero);
+  // Add the inverse transform.
+  dst0 = _mm_adds_epi16(dst0, _mm_srai_epi16(m0, 3));
+  dst1 = _mm_adds_epi16(dst1, _mm_srai_epi16(m1, 3));
+  dst2 = _mm_adds_epi16(dst2, _mm_srai_epi16(m2, 3));
+  dst3 = _mm_adds_epi16(dst3, _mm_srai_epi16(m3, 3));
+  // Unsigned saturate to 8b.
+  dst0 = _mm_packus_epi16(dst0, dst0);
+  dst1 = _mm_packus_epi16(dst1, dst1);
+  dst2 = _mm_packus_epi16(dst2, dst2);
+  dst3 = _mm_packus_epi16(dst3, dst3);
+  // Store the results.
+  *(int*)(dst + 0 * BPS) = _mm_cvtsi128_si32(dst0);
+  *(int*)(dst + 1 * BPS) = _mm_cvtsi128_si32(dst1);
+  *(int*)(dst + 2 * BPS) = _mm_cvtsi128_si32(dst2);
+  *(int*)(dst + 3 * BPS) = _mm_cvtsi128_si32(dst3);
+}
+#undef MUL
+#endif   // USE_TRANSFORM_AC3
+
+//------------------------------------------------------------------------------
+// Loop Filter (Paragraph 15)
+
+// Compute abs(p - q) = subs(p - q) OR subs(q - p)
+#define MM_ABS(p, q)  _mm_or_si128(                                            \
+    _mm_subs_epu8((q), (p)),                                                   \
+    _mm_subs_epu8((p), (q)))
+
+// Shift each byte of "x" by 3 bits while preserving by the sign bit.
+static WEBP_INLINE void SignedShift8b(__m128i* const x) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i signs = _mm_cmpgt_epi8(zero, *x);
+  const __m128i lo_0 = _mm_unpacklo_epi8(*x, signs);  // s8 -> s16 sign extend
+  const __m128i hi_0 = _mm_unpackhi_epi8(*x, signs);
+  const __m128i lo_1 = _mm_srai_epi16(lo_0, 3);
+  const __m128i hi_1 = _mm_srai_epi16(hi_0, 3);
+  *x = _mm_packs_epi16(lo_1, hi_1);
+}
+
+#define FLIP_SIGN_BIT2(a, b) {                                                 \
+  a = _mm_xor_si128(a, sign_bit);                                              \
+  b = _mm_xor_si128(b, sign_bit);                                              \
+}
+
+#define FLIP_SIGN_BIT4(a, b, c, d) {                                           \
+  FLIP_SIGN_BIT2(a, b);                                                        \
+  FLIP_SIGN_BIT2(c, d);                                                        \
+}
+
+// input/output is uint8_t
+static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
+                                  const __m128i* const p0,
+                                  const __m128i* const q0,
+                                  const __m128i* const q1,
+                                  int hev_thresh, __m128i* const not_hev) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i t_1 = MM_ABS(*p1, *p0);
+  const __m128i t_2 = MM_ABS(*q1, *q0);
+
+  const __m128i h = _mm_set1_epi8(hev_thresh);
+  const __m128i t_3 = _mm_subs_epu8(t_1, h);  // abs(p1 - p0) - hev_tresh
+  const __m128i t_4 = _mm_subs_epu8(t_2, h);  // abs(q1 - q0) - hev_tresh
+
+  *not_hev = _mm_or_si128(t_3, t_4);
+  *not_hev = _mm_cmpeq_epi8(*not_hev, zero);  // not_hev <= t1 && not_hev <= t2
+}
+
+// input pixels are int8_t
+static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
+                                     const __m128i* const p0,
+                                     const __m128i* const q0,
+                                     const __m128i* const q1,
+                                     __m128i* const delta) {
+  // beware of addition order, for saturation!
+  const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1);   // p1 - q1
+  const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0);   // q0 - p0
+  const __m128i s1 = _mm_adds_epi8(p1_q1, q0_p0);  // p1 - q1 + 1 * (q0 - p0)
+  const __m128i s2 = _mm_adds_epi8(q0_p0, s1);     // p1 - q1 + 2 * (q0 - p0)
+  const __m128i s3 = _mm_adds_epi8(q0_p0, s2);     // p1 - q1 + 3 * (q0 - p0)
+  *delta = s3;
+}
+
+// input and output are int8_t
+static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
+                                       const __m128i* const fl) {
+  const __m128i k3 = _mm_set1_epi8(3);
+  const __m128i k4 = _mm_set1_epi8(4);
+  __m128i v3 = _mm_adds_epi8(*fl, k3);
+  __m128i v4 = _mm_adds_epi8(*fl, k4);
+
+  SignedShift8b(&v4);                  // v4 >> 3
+  SignedShift8b(&v3);                  // v3 >> 3
+  *q0 = _mm_subs_epi8(*q0, v4);        // q0 -= v4
+  *p0 = _mm_adds_epi8(*p0, v3);        // p0 += v3
+}
+
+// Updates values of 2 pixels at MB edge during complex filtering.
+// Update operations:
+// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
+// Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
+static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
+                                      const __m128i* const a0_lo,
+                                      const __m128i* const a0_hi) {
+  const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
+  const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
+  const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+  *pi = _mm_adds_epi8(*pi, delta);
+  *qi = _mm_subs_epi8(*qi, delta);
+  FLIP_SIGN_BIT2(*pi, *qi);
+}
+
+// input pixels are uint8_t
+static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
+                                    const __m128i* const p0,
+                                    const __m128i* const q0,
+                                    const __m128i* const q1,
+                                    int thresh, __m128i* const mask) {
+  const __m128i m_thresh = _mm_set1_epi8(thresh);
+  const __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
+  const __m128i kFE = _mm_set1_epi8(0xFE);
+  const __m128i t2 = _mm_and_si128(t1, kFE);  // set lsb of each byte to zero
+  const __m128i t3 = _mm_srli_epi16(t2, 1);   // abs(p1 - q1) / 2
+
+  const __m128i t4 = MM_ABS(*p0, *q0);        // abs(p0 - q0)
+  const __m128i t5 = _mm_adds_epu8(t4, t4);   // abs(p0 - q0) * 2
+  const __m128i t6 = _mm_adds_epu8(t5, t3);   // abs(p0-q0)*2 + abs(p1-q1)/2
+
+  const __m128i t7 = _mm_subs_epu8(t6, m_thresh);  // mask <= m_thresh
+  *mask = _mm_cmpeq_epi8(t7, _mm_setzero_si128());
+}
+
+//------------------------------------------------------------------------------
+// Edge filtering functions
+
+// Applies filter on 2 pixels (p0 and q0)
+static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
+                                  __m128i* const q0, __m128i* const q1,
+                                  int thresh) {
+  __m128i a, mask;
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+  // convert p1/q1 to int8_t (for GetBaseDelta)
+  const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
+  const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
+
+  NeedsFilter(p1, p0, q0, q1, thresh, &mask);
+
+  FLIP_SIGN_BIT2(*p0, *q0);
+  GetBaseDelta(&p1s, p0, q0, &q1s, &a);
+  a = _mm_and_si128(a, mask);     // mask filter values we don't care about
+  DoSimpleFilter(p0, q0, &a);
+  FLIP_SIGN_BIT2(*p0, *q0);
+}
+
+// Applies filter on 4 pixels (p1, p0, q0 and q1)
+static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
+                                  __m128i* const q0, __m128i* const q1,
+                                  const __m128i* const mask, int hev_thresh) {
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+  const __m128i k64 = _mm_set1_epi8(0x40);
+  const __m128i zero = _mm_setzero_si128();
+  __m128i not_hev;
+  __m128i t1, t2, t3;
+
+  // compute hev mask
+  GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
+
+  // convert to signed values
+  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+
+  t1 = _mm_subs_epi8(*p1, *q1);        // p1 - q1
+  t1 = _mm_andnot_si128(not_hev, t1);  // hev(p1 - q1)
+  t2 = _mm_subs_epi8(*q0, *p0);        // q0 - p0
+  t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 1 * (q0 - p0)
+  t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 2 * (q0 - p0)
+  t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 3 * (q0 - p0)
+  t1 = _mm_and_si128(t1, *mask);       // mask filter values we don't care about
+
+  t2 = _mm_set1_epi8(3);
+  t3 = _mm_set1_epi8(4);
+  t2 = _mm_adds_epi8(t1, t2);        // 3 * (q0 - p0) + (p1 - q1) + 3
+  t3 = _mm_adds_epi8(t1, t3);        // 3 * (q0 - p0) + (p1 - q1) + 4
+  SignedShift8b(&t2);                // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
+  SignedShift8b(&t3);                // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
+  *p0 = _mm_adds_epi8(*p0, t2);      // p0 += t2
+  *q0 = _mm_subs_epi8(*q0, t3);      // q0 -= t3
+  FLIP_SIGN_BIT2(*p0, *q0);
+
+  // this is equivalent to signed (a + 1) >> 1 calculation
+  t2 = _mm_add_epi8(t3, sign_bit);
+  t3 = _mm_avg_epu8(t2, zero);
+  t3 = _mm_sub_epi8(t3, k64);
+
+  t3 = _mm_and_si128(not_hev, t3);   // if !hev
+  *q1 = _mm_subs_epi8(*q1, t3);      // q1 -= t3
+  *p1 = _mm_adds_epi8(*p1, t3);      // p1 += t3
+  FLIP_SIGN_BIT2(*p1, *q1);
+}
+
+// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
+static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
+                                  __m128i* const p0, __m128i* const q0,
+                                  __m128i* const q1, __m128i* const q2,
+                                  const __m128i* const mask, int hev_thresh) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+  __m128i a, not_hev;
+
+  // compute hev mask
+  GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
+
+  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+  FLIP_SIGN_BIT2(*p2, *q2);
+  GetBaseDelta(p1, p0, q0, q1, &a);
+
+  { // do simple filter on pixels with hev
+    const __m128i m = _mm_andnot_si128(not_hev, *mask);
+    const __m128i f = _mm_and_si128(a, m);
+    DoSimpleFilter(p0, q0, &f);
+  }
+
+  { // do strong filter on pixels with not hev
+    const __m128i k9 = _mm_set1_epi16(0x0900);
+    const __m128i k63 = _mm_set1_epi16(63);
+
+    const __m128i m = _mm_and_si128(not_hev, *mask);
+    const __m128i f = _mm_and_si128(a, m);
+
+    const __m128i f_lo = _mm_unpacklo_epi8(zero, f);
+    const __m128i f_hi = _mm_unpackhi_epi8(zero, f);
+
+    const __m128i f9_lo = _mm_mulhi_epi16(f_lo, k9);    // Filter (lo) * 9
+    const __m128i f9_hi = _mm_mulhi_epi16(f_hi, k9);    // Filter (hi) * 9
+
+    const __m128i a2_lo = _mm_add_epi16(f9_lo, k63);    // Filter * 9 + 63
+    const __m128i a2_hi = _mm_add_epi16(f9_hi, k63);    // Filter * 9 + 63
+
+    const __m128i a1_lo = _mm_add_epi16(a2_lo, f9_lo);  // Filter * 18 + 63
+    const __m128i a1_hi = _mm_add_epi16(a2_hi, f9_hi);  // Filter * 18 + 63
+
+    const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo);  // Filter * 27 + 63
+    const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi);  // Filter * 27 + 63
+
+    Update2Pixels(p2, q2, &a2_lo, &a2_hi);
+    Update2Pixels(p1, q1, &a1_lo, &a1_hi);
+    Update2Pixels(p0, q0, &a0_lo, &a0_hi);
+  }
+}
+
+// reads 8 rows across a vertical edge.
+//
+// TODO(somnath): Investigate _mm_shuffle* also see if it can be broken into
+// two Load4x4() to avoid code duplication.
+static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
+                                __m128i* const p, __m128i* const q) {
+  __m128i t1, t2;
+
+  // Load 0th, 1st, 4th and 5th rows
+  __m128i r0 =  _mm_cvtsi32_si128(*((int*)&b[0 * stride]));  // 03 02 01 00
+  __m128i r1 =  _mm_cvtsi32_si128(*((int*)&b[1 * stride]));  // 13 12 11 10
+  __m128i r4 =  _mm_cvtsi32_si128(*((int*)&b[4 * stride]));  // 43 42 41 40
+  __m128i r5 =  _mm_cvtsi32_si128(*((int*)&b[5 * stride]));  // 53 52 51 50
+
+  r0 = _mm_unpacklo_epi32(r0, r4);               // 43 42 41 40 03 02 01 00
+  r1 = _mm_unpacklo_epi32(r1, r5);               // 53 52 51 50 13 12 11 10
+
+  // t1 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
+  t1 = _mm_unpacklo_epi8(r0, r1);
+
+  // Load 2nd, 3rd, 6th and 7th rows
+  r0 =  _mm_cvtsi32_si128(*((int*)&b[2 * stride]));          // 23 22 21 22
+  r1 =  _mm_cvtsi32_si128(*((int*)&b[3 * stride]));          // 33 32 31 30
+  r4 =  _mm_cvtsi32_si128(*((int*)&b[6 * stride]));          // 63 62 61 60
+  r5 =  _mm_cvtsi32_si128(*((int*)&b[7 * stride]));          // 73 72 71 70
+
+  r0 = _mm_unpacklo_epi32(r0, r4);               // 63 62 61 60 23 22 21 20
+  r1 = _mm_unpacklo_epi32(r1, r5);               // 73 72 71 70 33 32 31 30
+
+  // t2 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
+  t2 = _mm_unpacklo_epi8(r0, r1);
+
+  // t1 = 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+  // t2 = 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+  r0 = t1;
+  t1 = _mm_unpacklo_epi16(t1, t2);
+  t2 = _mm_unpackhi_epi16(r0, t2);
+
+  // *p = 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+  // *q = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+  *p = _mm_unpacklo_epi32(t1, t2);
+  *q = _mm_unpackhi_epi32(t1, t2);
+}
+
+static WEBP_INLINE void Load16x4(const uint8_t* const r0,
+                                 const uint8_t* const r8,
+                                 int stride,
+                                 __m128i* const p1, __m128i* const p0,
+                                 __m128i* const q0, __m128i* const q1) {
+  __m128i t1, t2;
+  // Assume the pixels around the edge (|) are numbered as follows
+  //                00 01 | 02 03
+  //                10 11 | 12 13
+  //                 ...  |  ...
+  //                e0 e1 | e2 e3
+  //                f0 f1 | f2 f3
+  //
+  // r0 is pointing to the 0th row (00)
+  // r8 is pointing to the 8th row (80)
+
+  // Load
+  // p1 = 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+  // q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+  // p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+  // q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+  Load8x4(r0, stride, p1, q0);
+  Load8x4(r8, stride, p0, q1);
+
+  t1 = *p1;
+  t2 = *q0;
+  // p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+  // p0 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+  // q0 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+  // q1 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+  *p1 = _mm_unpacklo_epi64(t1, *p0);
+  *p0 = _mm_unpackhi_epi64(t1, *p0);
+  *q0 = _mm_unpacklo_epi64(t2, *q1);
+  *q1 = _mm_unpackhi_epi64(t2, *q1);
+}
+
+static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
+  int i;
+  for (i = 0; i < 4; ++i, dst += stride) {
+    *((int32_t*)dst) = _mm_cvtsi128_si32(*x);
+    *x = _mm_srli_si128(*x, 4);
+  }
+}
+
+// Transpose back and store
+static WEBP_INLINE void Store16x4(const __m128i* const p1,
+                                  const __m128i* const p0,
+                                  const __m128i* const q0,
+                                  const __m128i* const q1,
+                                  uint8_t* r0, uint8_t* r8,
+                                  int stride) {
+  __m128i t1, p1_s, p0_s, q0_s, q1_s;
+
+  // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+  // p1 = f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+  t1 = *p0;
+  p0_s = _mm_unpacklo_epi8(*p1, t1);
+  p1_s = _mm_unpackhi_epi8(*p1, t1);
+
+  // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+  // q1 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+  t1 = *q0;
+  q0_s = _mm_unpacklo_epi8(t1, *q1);
+  q1_s = _mm_unpackhi_epi8(t1, *q1);
+
+  // p0 = 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+  // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+  t1 = p0_s;
+  p0_s = _mm_unpacklo_epi16(t1, q0_s);
+  q0_s = _mm_unpackhi_epi16(t1, q0_s);
+
+  // p1 = b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+  // q1 = f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+  t1 = p1_s;
+  p1_s = _mm_unpacklo_epi16(t1, q1_s);
+  q1_s = _mm_unpackhi_epi16(t1, q1_s);
+
+  Store4x4(&p0_s, r0, stride);
+  r0 += 4 * stride;
+  Store4x4(&q0_s, r0, stride);
+
+  Store4x4(&p1_s, r8, stride);
+  r8 += 4 * stride;
+  Store4x4(&q1_s, r8, stride);
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  // Load
+  __m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
+  __m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
+  __m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
+  __m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);
+
+  DoFilter2(&p1, &p0, &q0, &q1, thresh);
+
+  // Store
+  _mm_storeu_si128((__m128i*)&p[-stride], p0);
+  _mm_storeu_si128((__m128i*)&p[0], q0);
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  __m128i p1, p0, q0, q1;
+
+  p -= 2;  // beginning of p1
+
+  Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
+  DoFilter2(&p1, &p0, &q0, &q1, thresh);
+  Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+#define MAX_DIFF1(p3, p2, p1, p0, m) do {                                      \
+  m = MM_ABS(p1, p0);                                                          \
+  m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
+} while (0)
+
+#define MAX_DIFF2(p3, p2, p1, p0, m) do {                                      \
+  m = _mm_max_epu8(m, MM_ABS(p1, p0));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
+} while (0)
+
+#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) {                             \
+  e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]);                            \
+  e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]);                            \
+  e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]);                            \
+  e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]);                            \
+}
+
+#define LOADUV_H_EDGE(p, u, v, stride) do {                                    \
+  const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]);                 \
+  const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]);                 \
+  p = _mm_unpacklo_epi64(U, V);                                                \
+} while (0)
+
+#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) {                        \
+  LOADUV_H_EDGE(e1, u, v, 0 * stride);                                         \
+  LOADUV_H_EDGE(e2, u, v, 1 * stride);                                         \
+  LOADUV_H_EDGE(e3, u, v, 2 * stride);                                         \
+  LOADUV_H_EDGE(e4, u, v, 3 * stride);                                         \
+}
+
+#define STOREUV(p, u, v, stride) {                                             \
+  _mm_storel_epi64((__m128i*)&u[(stride)], p);                                 \
+  p = _mm_srli_si128(p, 8);                                                    \
+  _mm_storel_epi64((__m128i*)&v[(stride)], p);                                 \
+}
+
+static WEBP_INLINE void ComplexMask(const __m128i* const p1,
+                                    const __m128i* const p0,
+                                    const __m128i* const q0,
+                                    const __m128i* const q1,
+                                    int thresh, int ithresh,
+                                    __m128i* const mask) {
+  const __m128i it = _mm_set1_epi8(ithresh);
+  const __m128i diff = _mm_subs_epu8(*mask, it);
+  const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
+  __m128i filter_mask;
+  NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
+  *mask = _mm_and_si128(thresh_mask, filter_mask);
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  __m128i t1;
+  __m128i mask;
+  __m128i p2, p1, p0, q0, q1, q2;
+
+  // Load p3, p2, p1, p0
+  LOAD_H_EDGES4(p - 4 * stride, stride, t1, p2, p1, p0);
+  MAX_DIFF1(t1, p2, p1, p0, mask);
+
+  // Load q0, q1, q2, q3
+  LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
+  MAX_DIFF2(t1, q2, q1, q0, mask);
+
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+  // Store
+  _mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
+  _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
+  _mm_storeu_si128((__m128i*)&p[-1 * stride], p0);
+  _mm_storeu_si128((__m128i*)&p[+0 * stride], q0);
+  _mm_storeu_si128((__m128i*)&p[+1 * stride], q1);
+  _mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i p3, p2, p1, p0, q0, q1, q2, q3;
+
+  uint8_t* const b = p - 4;
+  Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);  // p3, p2, p1, p0
+  MAX_DIFF1(p3, p2, p1, p0, mask);
+
+  Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);  // q0, q1, q2, q3
+  MAX_DIFF2(q3, q2, q1, q0, mask);
+
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+  Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
+  Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  __m128i p3, p2, p1, p0;   // loop invariants
+
+  LOAD_H_EDGES4(p, stride, p3, p2, p1, p0);  // prologue
+
+  for (k = 3; k > 0; --k) {
+    __m128i mask, tmp1, tmp2;
+    uint8_t* const b = p + 2 * stride;   // beginning of p1
+    p += 4 * stride;
+
+    MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
+    LOAD_H_EDGES4(p, stride, p3, p2, tmp1, tmp2);
+    MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
+
+    // p3 and p2 are not just temporary variables here: they will be
+    // re-used for next span. And q2/q3 will become p1/p0 accordingly.
+    ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+    DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
+
+    // Store
+    _mm_storeu_si128((__m128i*)&b[0 * stride], p1);
+    _mm_storeu_si128((__m128i*)&b[1 * stride], p0);
+    _mm_storeu_si128((__m128i*)&b[2 * stride], p3);
+    _mm_storeu_si128((__m128i*)&b[3 * stride], p2);
+
+    // rotate samples
+    p1 = tmp1;
+    p0 = tmp2;
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  __m128i p3, p2, p1, p0;   // loop invariants
+
+  Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0);  // prologue
+
+  for (k = 3; k > 0; --k) {
+    __m128i mask, tmp1, tmp2;
+    uint8_t* const b = p + 2;   // beginning of p1
+
+    p += 4;  // beginning of q0 (and next span)
+
+    MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
+    Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
+    MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
+
+    ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+    DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
+
+    Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
+
+    // rotate samples
+    p1 = tmp1;
+    p0 = tmp2;
+  }
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i t1, p2, p1, p0, q0, q1, q2;
+
+  // Load p3, p2, p1, p0
+  LOADUV_H_EDGES4(u - 4 * stride, v - 4 * stride, stride, t1, p2, p1, p0);
+  MAX_DIFF1(t1, p2, p1, p0, mask);
+
+  // Load q0, q1, q2, q3
+  LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
+  MAX_DIFF2(t1, q2, q1, q0, mask);
+
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+  // Store
+  STOREUV(p2, u, v, -3 * stride);
+  STOREUV(p1, u, v, -2 * stride);
+  STOREUV(p0, u, v, -1 * stride);
+  STOREUV(q0, u, v, 0 * stride);
+  STOREUV(q1, u, v, 1 * stride);
+  STOREUV(q2, u, v, 2 * stride);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i p3, p2, p1, p0, q0, q1, q2, q3;
+
+  uint8_t* const tu = u - 4;
+  uint8_t* const tv = v - 4;
+  Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0);  // p3, p2, p1, p0
+  MAX_DIFF1(p3, p2, p1, p0, mask);
+
+  Load16x4(u, v, stride, &q0, &q1, &q2, &q3);    // q0, q1, q2, q3
+  MAX_DIFF2(q3, q2, q1, q0, mask);
+
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+  Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
+  Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i t1, t2, p1, p0, q0, q1;
+
+  // Load p3, p2, p1, p0
+  LOADUV_H_EDGES4(u, v, stride, t2, t1, p1, p0);
+  MAX_DIFF1(t2, t1, p1, p0, mask);
+
+  u += 4 * stride;
+  v += 4 * stride;
+
+  // Load q0, q1, q2, q3
+  LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
+  MAX_DIFF2(t2, t1, q1, q0, mask);
+
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+
+  // Store
+  STOREUV(p1, u, v, -2 * stride);
+  STOREUV(p0, u, v, -1 * stride);
+  STOREUV(q0, u, v, 0 * stride);
+  STOREUV(q1, u, v, 1 * stride);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i t1, t2, p1, p0, q0, q1;
+  Load16x4(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
+  MAX_DIFF1(t2, t1, p1, p0, mask);
+
+  u += 4;  // beginning of q0
+  v += 4;
+  Load16x4(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
+  MAX_DIFF2(t2, t1, q1, q0, mask);
+
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
+  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+
+  u -= 2;  // beginning of p1
+  v -= 2;
+  Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
+}
+
+#endif   // WEBP_USE_SSE2
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitSSE2(void);
+
+void VP8DspInitSSE2(void) {
+#if defined(WEBP_USE_SSE2)
+  VP8Transform = Transform;
+#if defined(USE_TRANSFORM_AC3)
+  VP8TransformAC3 = TransformAC3;
+#endif
+
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+#endif   // WEBP_USE_SSE2
+}
diff --git a/src/main/jni/libwebp/dsp/dsp.h b/src/main/jni/libwebp/dsp/dsp.h
new file mode 100644
index 000000000..52c44b2dc
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/dsp.h
@@ -0,0 +1,293 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   Speed-critical functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_DSP_H_
+#define WEBP_DSP_DSP_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// CPU detection
+
+#if defined(__GNUC__)
+# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
+# define LOCAL_GCC_PREREQ(maj, min) \
+    (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
+#else
+# define LOCAL_GCC_VERSION 0
+# define LOCAL_GCC_PREREQ(maj, min) 0
+#endif
+
+#ifdef __clang__
+# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
+# define LOCAL_CLANG_PREREQ(maj, min) \
+    (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
+#else
+# define LOCAL_CLANG_VERSION 0
+# define LOCAL_CLANG_PREREQ(maj, min) 0
+#endif  // __clang__
+
+#if defined(_MSC_VER) && _MSC_VER > 1310 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
+#endif
+
+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
+// files without intrinsics, allowing the corresponding Init() to be called.
+// Files containing intrinsics will need to be built targeting the instruction
+// set so should succeed on one of the earlier tests.
+#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2)
+#define WEBP_USE_SSE2
+#endif
+
+#if defined(__AVX2__) || defined(WEBP_HAVE_AVX2)
+#define WEBP_USE_AVX2
+#endif
+
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
+#define WEBP_ANDROID_NEON  // Android targets that might support NEON
+#endif
+
+#if defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || defined(__aarch64__)
+#define WEBP_USE_NEON
+#endif
+
+#if defined(__mips__) && !defined(__mips64) && (__mips_isa_rev < 6)
+#define WEBP_USE_MIPS32
+#if (__mips_isa_rev >= 2)
+#define WEBP_USE_MIPS32_R2
+#endif
+#endif
+
+typedef enum {
+  kSSE2,
+  kSSE3,
+  kAVX,
+  kAVX2,
+  kNEON,
+  kMIPS32
+} CPUFeature;
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+extern VP8CPUInfo VP8GetCPUInfo;
+
+//------------------------------------------------------------------------------
+// Encoding
+
+// Transforms
+// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
+//          will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
+typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                        int do_two);
+typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
+typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
+extern VP8Idct VP8ITransform;
+extern VP8Fdct VP8FTransform;
+extern VP8WHT VP8FTransformWHT;
+// Predictions
+// *dst is the destination block. *top and *left can be NULL.
+typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
+                              const uint8_t* top);
+typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
+extern VP8Intra4Preds VP8EncPredLuma4;
+extern VP8IntraPreds VP8EncPredLuma16;
+extern VP8IntraPreds VP8EncPredChroma8;
+
+typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
+extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
+typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
+                          const uint16_t* const weights);
+extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
+
+typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
+extern VP8BlockCopy VP8Copy4x4;
+// Quantization
+struct VP8Matrix;   // forward declaration
+typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
+                                const struct VP8Matrix* const mtx);
+extern VP8QuantizeBlock VP8EncQuantizeBlock;
+
+// specific to 2nd transform:
+typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
+                                   const struct VP8Matrix* const mtx);
+extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
+
+// Collect histogram for susceptibility calculation and accumulate in histo[].
+struct VP8Histogram;
+typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
+                          int start_block, int end_block,
+                          struct VP8Histogram* const histo);
+extern const int VP8DspScan[16 + 4 + 4];
+extern VP8CHisto VP8CollectHistogram;
+
+void VP8EncDspInit(void);   // must be called before using any of the above
+
+//------------------------------------------------------------------------------
+// Decoding
+
+typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
+// when doing two transforms, coeffs is actually int16_t[2][16].
+typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
+extern VP8DecIdct2 VP8Transform;
+extern VP8DecIdct VP8TransformAC3;
+extern VP8DecIdct VP8TransformUV;
+extern VP8DecIdct VP8TransformDC;
+extern VP8DecIdct VP8TransformDCUV;
+extern VP8WHT VP8TransformWHT;
+
+// *dst is the destination block, with stride BPS. Boundary samples are
+// assumed accessible when needed.
+typedef void (*VP8PredFunc)(uint8_t* dst);
+extern const VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
+extern const VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
+extern const VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
+
+// clipping tables (for filtering)
+extern const int8_t* const VP8ksclip1;  // clips [-1020, 1020] to [-128, 127]
+extern const int8_t* const VP8ksclip2;  // clips [-112, 112] to [-16, 15]
+extern const uint8_t* const VP8kclip1;  // clips [-255,511] to [0,255]
+extern const uint8_t* const VP8kabs0;   // abs(x) for x in [-255,255]
+void VP8InitClipTables(void);           // must be called first
+
+// simple filter (only for luma)
+typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
+extern VP8SimpleFilterFunc VP8SimpleVFilter16;
+extern VP8SimpleFilterFunc VP8SimpleHFilter16;
+extern VP8SimpleFilterFunc VP8SimpleVFilter16i;  // filter 3 inner edges
+extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+// regular filter (on both macroblock edges and inner edges)
+typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
+                                  int thresh, int ithresh, int hev_t);
+typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
+                                    int thresh, int ithresh, int hev_t);
+// on outer edge
+extern VP8LumaFilterFunc VP8VFilter16;
+extern VP8LumaFilterFunc VP8HFilter16;
+extern VP8ChromaFilterFunc VP8VFilter8;
+extern VP8ChromaFilterFunc VP8HFilter8;
+
+// on inner edge
+extern VP8LumaFilterFunc VP8VFilter16i;   // filtering 3 inner edges altogether
+extern VP8LumaFilterFunc VP8HFilter16i;
+extern VP8ChromaFilterFunc VP8VFilter8i;  // filtering u and v altogether
+extern VP8ChromaFilterFunc VP8HFilter8i;
+
+// must be called before anything using the above
+void VP8DspInit(void);
+
+//------------------------------------------------------------------------------
+// WebP I/O
+
+#define FANCY_UPSAMPLING   // undefined to remove fancy upsampling support
+
+// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
+// bottom_y can be NULL if only one line of output is needed (at top/bottom).
+typedef void (*WebPUpsampleLinePairFunc)(
+    const uint8_t* top_y, const uint8_t* bottom_y,
+    const uint8_t* top_u, const uint8_t* top_v,
+    const uint8_t* cur_u, const uint8_t* cur_v,
+    uint8_t* top_dst, uint8_t* bottom_dst, int len);
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB(A) modes
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+#endif    // FANCY_UPSAMPLING
+
+// Per-row point-sampling methods.
+typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
+                                   const uint8_t* u, const uint8_t* v,
+                                   uint8_t* dst, int len);
+// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
+void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
+                             const uint8_t* u, const uint8_t* v, int uv_stride,
+                             uint8_t* dst, int dst_stride,
+                             int width, int height, WebPSamplerRowFunc func);
+
+// Sampling functions to convert rows of YUV to RGB(A)
+extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
+
+// General function for converting two lines of ARGB or RGBA.
+// 'alpha_is_last' should be true if 0xff000000 is stored in memory as
+// as 0x00, 0x00, 0x00, 0xff (little endian).
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
+
+// YUV444->RGB converters
+typedef void (*WebPYUV444Converter)(const uint8_t* y,
+                                    const uint8_t* u, const uint8_t* v,
+                                    uint8_t* dst, int len);
+
+extern const WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+
+// Must be called before using the WebPUpsamplers[] (and for premultiplied
+// colorspaces like rgbA, rgbA4444, etc)
+void WebPInitUpsamplers(void);
+// Must be called before using WebPSamplers[]
+void WebPInitSamplers(void);
+
+//------------------------------------------------------------------------------
+// Utilities for processing transparent channel.
+
+// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
+// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
+extern void (*WebPApplyAlphaMultiply)(
+    uint8_t* rgba, int alpha_first, int w, int h, int stride);
+
+// Same, buf specifically for RGBA4444 format
+extern void (*WebPApplyAlphaMultiply4444)(
+    uint8_t* rgba4444, int w, int h, int stride);
+
+// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
+// (this is the opposite of WebPDispatchAlpha).
+// Returns true if there's only trivial 0xff alpha values.
+extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride,
+                               int width, int height,
+                               uint8_t* alpha, int alpha_stride);
+
+// Pre-Multiply operation transforms x into x * A / 255  (where x=Y,R,G or B).
+// Un-Multiply operation transforms x into x * 255 / A.
+
+// Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row.
+extern void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
+
+// Same a WebPMultARGBRow(), but for several rows.
+void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
+                      int inverse);
+
+// Same for a row of single values, with side alpha values.
+extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
+                           int width, int inverse);
+
+// Same a WebPMultRow(), but for several 'num_rows' rows.
+void WebPMultRows(uint8_t* ptr, int stride,
+                  const uint8_t* alpha, int alpha_stride,
+                  int width, int num_rows, int inverse);
+
+// To be called first before using the above.
+void WebPInitAlphaProcessing(void);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DSP_DSP_H_ */
diff --git a/src/main/jni/libwebp/dsp/enc.c b/src/main/jni/libwebp/dsp/enc.c
new file mode 100644
index 000000000..e4ea8cb8a
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/enc.c
@@ -0,0 +1,741 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical encoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>  // for abs()
+
+#include "./dsp.h"
+#include "../enc/vp8enci.h"
+
+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int clip_max(int v, int max) {
+  return (v > max) ? max : v;
+}
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+const int VP8DspScan[16 + 4 + 4] = {
+  // Luma
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+
+  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
+  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
+};
+
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  int j;
+  for (j = start_block; j < end_block; ++j) {
+    int k;
+    int16_t out[16];
+
+    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+    // Convert coefficients to bin.
+    for (k = 0; k < 16; ++k) {
+      const int v = abs(out[k]) >> 3;  // TODO(skal): add rounding?
+      const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
+      histo->distribution[clipped_value]++;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// run-time tables (~4k)
+
+static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+static void InitTables(void) {
+  if (!tables_ok) {
+    int i;
+    for (i = -255; i <= 255 + 255; ++i) {
+      clip1[255 + i] = clip_8b(i);
+    }
+    tables_ok = 1;
+  }
+}
+
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+#define STORE(x, y, v) \
+  dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+#define MUL(a, b) (((a) * (b)) >> 16)
+
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+                                      uint8_t* dst) {
+  int C[4 * 4], *tmp;
+  int i;
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // vertical pass
+    const int a = in[0] + in[8];
+    const int b = in[0] - in[8];
+    const int c = MUL(in[4], kC2) - MUL(in[12], kC1);
+    const int d = MUL(in[4], kC1) + MUL(in[12], kC2);
+    tmp[0] = a + d;
+    tmp[1] = b + c;
+    tmp[2] = b - c;
+    tmp[3] = a - d;
+    tmp += 4;
+    in++;
+  }
+
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // horizontal pass
+    const int dc = tmp[0] + 4;
+    const int a =  dc +  tmp[8];
+    const int b =  dc -  tmp[8];
+    const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
+    const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
+    STORE(0, i, a + d);
+    STORE(1, i, b + c);
+    STORE(2, i, b - c);
+    STORE(3, i, a - d);
+    tmp++;
+  }
+}
+
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                       int do_two) {
+  ITransformOne(ref, in, dst);
+  if (do_two) {
+    ITransformOne(ref + 4, in + 16, dst + 4);
+  }
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  int i;
+  int tmp[16];
+  for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
+    const int d0 = src[0] - ref[0];   // 9bit dynamic range ([-255,255])
+    const int d1 = src[1] - ref[1];
+    const int d2 = src[2] - ref[2];
+    const int d3 = src[3] - ref[3];
+    const int a0 = (d0 + d3);         // 10b                      [-510,510]
+    const int a1 = (d1 + d2);
+    const int a2 = (d1 - d2);
+    const int a3 = (d0 - d3);
+    tmp[0 + i * 4] = (a0 + a1) * 8;   // 14b                      [-8160,8160]
+    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9;      // [-7536,7542]
+    tmp[2 + i * 4] = (a0 - a1) * 8;
+    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  937) >> 9;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int a0 = (tmp[0 + i] + tmp[12 + i]);  // 15b
+    const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
+    const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
+    const int a3 = (tmp[0 + i] - tmp[12 + i]);
+    out[0 + i] = (a0 + a1 + 7) >> 4;            // 12b
+    out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
+    out[8 + i] = (a0 - a1 + 7) >> 4;
+    out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
+  }
+}
+
+static void FTransformWHT(const int16_t* in, int16_t* out) {
+  // input is 12b signed
+  int32_t tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i, in += 64) {
+    const int a0 = (in[0 * 16] + in[2 * 16]);  // 13b
+    const int a1 = (in[1 * 16] + in[3 * 16]);
+    const int a2 = (in[1 * 16] - in[3 * 16]);
+    const int a3 = (in[0 * 16] - in[2 * 16]);
+    tmp[0 + i * 4] = a0 + a1;   // 14b
+    tmp[1 + i * 4] = a3 + a2;
+    tmp[2 + i * 4] = a3 - a2;
+    tmp[3 + i * 4] = a0 - a1;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int a0 = (tmp[0 + i] + tmp[8 + i]);  // 15b
+    const int a1 = (tmp[4 + i] + tmp[12+ i]);
+    const int a2 = (tmp[4 + i] - tmp[12+ i]);
+    const int a3 = (tmp[0 + i] - tmp[8 + i]);
+    const int b0 = a0 + a1;    // 16b
+    const int b1 = a3 + a2;
+    const int b2 = a3 - a2;
+    const int b3 = a0 - a1;
+    out[ 0 + i] = b0 >> 1;     // 15b
+    out[ 4 + i] = b1 >> 1;
+    out[ 8 + i] = b2 >> 1;
+    out[12 + i] = b3 >> 1;
+  }
+}
+
+#undef MUL
+#undef STORE
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+#define DST(x, y) dst[(x) + (y) * BPS]
+
+static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
+  int j;
+  for (j = 0; j < size; ++j) {
+    memset(dst + j * BPS, value, size);
+  }
+}
+
+static WEBP_INLINE void VerticalPred(uint8_t* dst,
+                                     const uint8_t* top, int size) {
+  int j;
+  if (top) {
+    for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
+  } else {
+    Fill(dst, 127, size);
+  }
+}
+
+static WEBP_INLINE void HorizontalPred(uint8_t* dst,
+                                       const uint8_t* left, int size) {
+  if (left) {
+    int j;
+    for (j = 0; j < size; ++j) {
+      memset(dst + j * BPS, left[j], size);
+    }
+  } else {
+    Fill(dst, 129, size);
+  }
+}
+
+static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
+                                   const uint8_t* top, int size) {
+  int y;
+  if (left) {
+    if (top) {
+      const uint8_t* const clip = clip1 + 255 - left[-1];
+      for (y = 0; y < size; ++y) {
+        const uint8_t* const clip_table = clip + left[y];
+        int x;
+        for (x = 0; x < size; ++x) {
+          dst[x] = clip_table[top[x]];
+        }
+        dst += BPS;
+      }
+    } else {
+      HorizontalPred(dst, left, size);
+    }
+  } else {
+    // true motion without left samples (hence: with default 129 value)
+    // is equivalent to VE prediction where you just copy the top samples.
+    // Note that if top samples are not available, the default value is
+    // then 129, and not 127 as in the VerticalPred case.
+    if (top) {
+      VerticalPred(dst, top, size);
+    } else {
+      Fill(dst, 129, size);
+    }
+  }
+}
+
+static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
+                               const uint8_t* top,
+                               int size, int round, int shift) {
+  int DC = 0;
+  int j;
+  if (top) {
+    for (j = 0; j < size; ++j) DC += top[j];
+    if (left) {   // top and left present
+      for (j = 0; j < size; ++j) DC += left[j];
+    } else {      // top, but no left
+      DC += DC;
+    }
+    DC = (DC + round) >> shift;
+  } else if (left) {   // left but no top
+    for (j = 0; j < size; ++j) DC += left[j];
+    DC += DC;
+    DC = (DC + round) >> shift;
+  } else {   // no top, no left, nothing.
+    DC = 0x80;
+  }
+  Fill(dst, DC, size);
+}
+
+//------------------------------------------------------------------------------
+// Chroma 8x8 prediction (paragraph 12.2)
+
+static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
+                             const uint8_t* top) {
+  // U block
+  DCMode(C8DC8 + dst, left, top, 8, 8, 4);
+  VerticalPred(C8VE8 + dst, top, 8);
+  HorizontalPred(C8HE8 + dst, left, 8);
+  TrueMotion(C8TM8 + dst, left, top, 8);
+  // V block
+  dst += 8;
+  if (top) top += 8;
+  if (left) left += 16;
+  DCMode(C8DC8 + dst, left, top, 8, 8, 4);
+  VerticalPred(C8VE8 + dst, top, 8);
+  HorizontalPred(C8HE8 + dst, left, 8);
+  TrueMotion(C8TM8 + dst, left, top, 8);
+}
+
+//------------------------------------------------------------------------------
+// luma 16x16 prediction (paragraph 12.3)
+
+static void Intra16Preds(uint8_t* dst,
+                         const uint8_t* left, const uint8_t* top) {
+  DCMode(I16DC16 + dst, left, top, 16, 16, 5);
+  VerticalPred(I16VE16 + dst, top, 16);
+  HorizontalPred(I16HE16 + dst, left, 16);
+  TrueMotion(I16TM16 + dst, left, top, 16);
+}
+
+//------------------------------------------------------------------------------
+// luma 4x4 prediction
+
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+static void VE4(uint8_t* dst, const uint8_t* top) {    // vertical
+  const uint8_t vals[4] = {
+    AVG3(top[-1], top[0], top[1]),
+    AVG3(top[ 0], top[1], top[2]),
+    AVG3(top[ 1], top[2], top[3]),
+    AVG3(top[ 2], top[3], top[4])
+  };
+  int i;
+  for (i = 0; i < 4; ++i) {
+    memcpy(dst + i * BPS, vals, 4);
+  }
+}
+
+static void HE4(uint8_t* dst, const uint8_t* top) {    // horizontal
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J);
+  *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K);
+  *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L);
+  *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L);
+}
+
+static void DC4(uint8_t* dst, const uint8_t* top) {
+  uint32_t dc = 4;
+  int i;
+  for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
+  Fill(dst, dc >> 3, 4);
+}
+
+static void RD4(uint8_t* dst, const uint8_t* top) {
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+  const int D = top[3];
+  DST(0, 3)                                     = AVG3(J, K, L);
+  DST(0, 2) = DST(1, 3)                         = AVG3(I, J, K);
+  DST(0, 1) = DST(1, 2) = DST(2, 3)             = AVG3(X, I, J);
+  DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
+  DST(1, 0) = DST(2, 1) = DST(3, 2)             = AVG3(B, A, X);
+  DST(2, 0) = DST(3, 1)                         = AVG3(C, B, A);
+  DST(3, 0)                                     = AVG3(D, C, B);
+}
+
+static void LD4(uint8_t* dst, const uint8_t* top) {
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+  const int D = top[3];
+  const int E = top[4];
+  const int F = top[5];
+  const int G = top[6];
+  const int H = top[7];
+  DST(0, 0)                                     = AVG3(A, B, C);
+  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
+  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
+  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+  DST(3, 1) = DST(2, 2) = DST(1, 3)             = AVG3(E, F, G);
+  DST(3, 2) = DST(2, 3)                         = AVG3(F, G, H);
+  DST(3, 3)                                     = AVG3(G, H, H);
+}
+
+static void VR4(uint8_t* dst, const uint8_t* top) {
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+  const int D = top[3];
+  DST(0, 0) = DST(1, 2) = AVG2(X, A);
+  DST(1, 0) = DST(2, 2) = AVG2(A, B);
+  DST(2, 0) = DST(3, 2) = AVG2(B, C);
+  DST(3, 0)             = AVG2(C, D);
+
+  DST(0, 3) =             AVG3(K, J, I);
+  DST(0, 2) =             AVG3(J, I, X);
+  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+  DST(3, 1) =             AVG3(B, C, D);
+}
+
+static void VL4(uint8_t* dst, const uint8_t* top) {
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+  const int D = top[3];
+  const int E = top[4];
+  const int F = top[5];
+  const int G = top[6];
+  const int H = top[7];
+  DST(0, 0) =             AVG2(A, B);
+  DST(1, 0) = DST(0, 2) = AVG2(B, C);
+  DST(2, 0) = DST(1, 2) = AVG2(C, D);
+  DST(3, 0) = DST(2, 2) = AVG2(D, E);
+
+  DST(0, 1) =             AVG3(A, B, C);
+  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+              DST(3, 2) = AVG3(E, F, G);
+              DST(3, 3) = AVG3(F, G, H);
+}
+
+static void HU4(uint8_t* dst, const uint8_t* top) {
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  DST(0, 0) =             AVG2(I, J);
+  DST(2, 0) = DST(0, 1) = AVG2(J, K);
+  DST(2, 1) = DST(0, 2) = AVG2(K, L);
+  DST(1, 0) =             AVG3(I, J, K);
+  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+  DST(3, 2) = DST(2, 2) =
+  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static void HD4(uint8_t* dst, const uint8_t* top) {
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+
+  DST(0, 0) = DST(2, 1) = AVG2(I, X);
+  DST(0, 1) = DST(2, 2) = AVG2(J, I);
+  DST(0, 2) = DST(2, 3) = AVG2(K, J);
+  DST(0, 3)             = AVG2(L, K);
+
+  DST(3, 0)             = AVG3(A, B, C);
+  DST(2, 0)             = AVG3(X, A, B);
+  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+  DST(1, 3)             = AVG3(L, K, J);
+}
+
+static void TM4(uint8_t* dst, const uint8_t* top) {
+  int x, y;
+  const uint8_t* const clip = clip1 + 255 - top[-1];
+  for (y = 0; y < 4; ++y) {
+    const uint8_t* const clip_table = clip + top[-2 - y];
+    for (x = 0; x < 4; ++x) {
+      dst[x] = clip_table[top[x]];
+    }
+    dst += BPS;
+  }
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+// Left samples are top[-5 .. -2], top_left is top[-1], top are
+// located at top[0..3], and top right is top[4..7]
+static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+  DC4(I4DC4 + dst, top);
+  TM4(I4TM4 + dst, top);
+  VE4(I4VE4 + dst, top);
+  HE4(I4HE4 + dst, top);
+  RD4(I4RD4 + dst, top);
+  VR4(I4VR4 + dst, top);
+  LD4(I4LD4 + dst, top);
+  VL4(I4VL4 + dst, top);
+  HD4(I4HD4 + dst, top);
+  HU4(I4HU4 + dst, top);
+}
+
+//------------------------------------------------------------------------------
+// Metric
+
+static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
+                              int w, int h) {
+  int count = 0;
+  int y, x;
+  for (y = 0; y < h; ++y) {
+    for (x = 0; x < w; ++x) {
+      const int diff = (int)a[x] - b[x];
+      count += diff * diff;
+    }
+    a += BPS;
+    b += BPS;
+  }
+  return count;
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  return GetSSE(a, b, 16, 16);
+}
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  return GetSSE(a, b, 16, 8);
+}
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  return GetSSE(a, b, 8, 8);
+}
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  return GetSSE(a, b, 4, 4);
+}
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// Hadamard transform
+// Returns the weighted sum of the absolute value of transformed coefficients.
+static int TTransform(const uint8_t* in, const uint16_t* w) {
+  int sum = 0;
+  int tmp[16];
+  int i;
+  // horizontal pass
+  for (i = 0; i < 4; ++i, in += BPS) {
+    const int a0 = in[0] + in[2];
+    const int a1 = in[1] + in[3];
+    const int a2 = in[1] - in[3];
+    const int a3 = in[0] - in[2];
+    tmp[0 + i * 4] = a0 + a1;
+    tmp[1 + i * 4] = a3 + a2;
+    tmp[2 + i * 4] = a3 - a2;
+    tmp[3 + i * 4] = a0 - a1;
+  }
+  // vertical pass
+  for (i = 0; i < 4; ++i, ++w) {
+    const int a0 = tmp[0 + i] + tmp[8 + i];
+    const int a1 = tmp[4 + i] + tmp[12+ i];
+    const int a2 = tmp[4 + i] - tmp[12+ i];
+    const int a3 = tmp[0 + i] - tmp[8 + i];
+    const int b0 = a0 + a1;
+    const int b1 = a3 + a2;
+    const int b2 = a3 - a2;
+    const int b3 = a0 - a1;
+
+    sum += w[ 0] * abs(b0);
+    sum += w[ 4] * abs(b1);
+    sum += w[ 8] * abs(b2);
+    sum += w[12] * abs(b3);
+  }
+  return sum;
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  const int sum1 = TTransform(a, w);
+  const int sum2 = TTransform(b, w);
+  return abs(sum2 - sum1) >> 5;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+static const uint8_t kZigzag[16] = {
+  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+// Simple quantization
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  int last = -1;
+  int n;
+  for (n = 0; n < 16; ++n) {
+    const int j = kZigzag[n];
+    const int sign = (in[j] < 0);
+    const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    if (coeff > mtx->zthresh_[j]) {
+      const uint32_t Q = mtx->q_[j];
+      const uint32_t iQ = mtx->iq_[j];
+      const uint32_t B = mtx->bias_[j];
+      int level = QUANTDIV(coeff, iQ, B);
+      if (level > MAX_LEVEL) level = MAX_LEVEL;
+      if (sign) level = -level;
+      in[j] = level * Q;
+      out[n] = level;
+      if (level) last = n;
+    } else {
+      out[n] = 0;
+      in[j] = 0;
+    }
+  }
+  return (last >= 0);
+}
+
+static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
+                            const VP8Matrix* const mtx) {
+  int n, last = -1;
+  for (n = 0; n < 16; ++n) {
+    const int j = kZigzag[n];
+    const int sign = (in[j] < 0);
+    const uint32_t coeff = sign ? -in[j] : in[j];
+    assert(mtx->sharpen_[j] == 0);
+    if (coeff > mtx->zthresh_[j]) {
+      const uint32_t Q = mtx->q_[j];
+      const uint32_t iQ = mtx->iq_[j];
+      const uint32_t B = mtx->bias_[j];
+      int level = QUANTDIV(coeff, iQ, B);
+      if (level > MAX_LEVEL) level = MAX_LEVEL;
+      if (sign) level = -level;
+      in[j] = level * Q;
+      out[n] = level;
+      if (level) last = n;
+    } else {
+      out[n] = 0;
+      in[j] = 0;
+    }
+  }
+  return (last >= 0);
+}
+
+//------------------------------------------------------------------------------
+// Block copy
+
+static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) {
+  int y;
+  for (y = 0; y < size; ++y) {
+    memcpy(dst, src, size);
+    src += BPS;
+    dst += BPS;
+  }
+}
+
+static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }
+
+//------------------------------------------------------------------------------
+// Initialization
+
+// Speed-critical function pointers. We have to initialize them to the default
+// implementations within VP8EncDspInit().
+VP8CHisto VP8CollectHistogram;
+VP8Idct VP8ITransform;
+VP8Fdct VP8FTransform;
+VP8WHT VP8FTransformWHT;
+VP8Intra4Preds VP8EncPredLuma4;
+VP8IntraPreds VP8EncPredLuma16;
+VP8IntraPreds VP8EncPredChroma8;
+VP8Metric VP8SSE16x16;
+VP8Metric VP8SSE8x8;
+VP8Metric VP8SSE16x8;
+VP8Metric VP8SSE4x4;
+VP8WMetric VP8TDisto4x4;
+VP8WMetric VP8TDisto16x16;
+VP8QuantizeBlock VP8EncQuantizeBlock;
+VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
+VP8BlockCopy VP8Copy4x4;
+
+extern void VP8EncDspInitSSE2(void);
+extern void VP8EncDspInitAVX2(void);
+extern void VP8EncDspInitNEON(void);
+extern void VP8EncDspInitMIPS32(void);
+
+void VP8EncDspInit(void) {
+  VP8DspInit();  // common inverse transforms
+  InitTables();
+
+  // default C implementations
+  VP8CollectHistogram = CollectHistogram;
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+  VP8FTransformWHT = FTransformWHT;
+  VP8EncPredLuma4 = Intra4Preds;
+  VP8EncPredLuma16 = Intra16Preds;
+  VP8EncPredChroma8 = IntraChromaPreds;
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE4x4 = SSE4x4;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
+  VP8Copy4x4 = Copy4x4;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8EncDspInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_AVX2)
+    if (VP8GetCPUInfo(kAVX2)) {
+      VP8EncDspInitAVX2();
+    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8EncDspInitNEON();
+    }
+#endif
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8EncDspInitMIPS32();
+    }
+#endif
+  }
+}
+
diff --git a/src/main/jni/libwebp/dsp/enc_avx2.c b/src/main/jni/libwebp/dsp/enc_avx2.c
new file mode 100644
index 000000000..372e6169d
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/enc_avx2.c
@@ -0,0 +1,24 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// AVX2 version of speed-critical encoding functions.
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_AVX2)
+
+#endif  // WEBP_USE_AVX2
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitAVX2(void);
+
+void VP8EncDspInitAVX2(void) {
+}
diff --git a/src/main/jni/libwebp/dsp/enc_mips32.c b/src/main/jni/libwebp/dsp/enc_mips32.c
new file mode 100644
index 000000000..def9a1697
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/enc_mips32.c
@@ -0,0 +1,776 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of speed-critical encoding functions.
+//
+// Author(s): Djordje Pesut    (djordje.pesut@imgtec.com)
+//            Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+//            Slobodan Prijic  (slobodan.prijic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "../enc/vp8enci.h"
+#include "../enc/cost.h"
+
+#if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409
+#define WORK_AROUND_GCC
+#endif
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+// macro for one vertical pass in ITransformOne
+// MUL macro inlined
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to load from in buffer
+// TEMP0..TEMP3 - registers for corresponding tmp elements
+// TEMP4..TEMP5 - temporary registers
+#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3)        \
+  "lh      %[temp16],      "#A"(%[temp20])                 \n\t"            \
+  "lh      %[temp18],      "#B"(%[temp20])                 \n\t"            \
+  "lh      %[temp17],      "#C"(%[temp20])                 \n\t"            \
+  "lh      %[temp19],      "#D"(%[temp20])                 \n\t"            \
+  "addu    %["#TEMP4"],    %[temp16],      %[temp18]       \n\t"            \
+  "subu    %[temp16],      %[temp16],      %[temp18]       \n\t"            \
+  "mul     %["#TEMP0"],    %[temp17],      %[kC2]          \n\t"            \
+  "mul     %[temp18],      %[temp19],      %[kC1]          \n\t"            \
+  "mul     %[temp17],      %[temp17],      %[kC1]          \n\t"            \
+  "mul     %[temp19],      %[temp19],      %[kC2]          \n\t"            \
+  "sra     %["#TEMP0"],    %["#TEMP0"],    16              \n\n"            \
+  "sra     %[temp18],      %[temp18],      16              \n\n"            \
+  "sra     %[temp17],      %[temp17],      16              \n\n"            \
+  "sra     %[temp19],      %[temp19],      16              \n\n"            \
+  "subu    %["#TEMP2"],    %["#TEMP0"],    %[temp18]       \n\t"            \
+  "addu    %["#TEMP3"],    %[temp17],      %[temp19]       \n\t"            \
+  "addu    %["#TEMP0"],    %["#TEMP4"],    %["#TEMP3"]     \n\t"            \
+  "addu    %["#TEMP1"],    %[temp16],      %["#TEMP2"]     \n\t"            \
+  "subu    %["#TEMP2"],    %[temp16],      %["#TEMP2"]     \n\t"            \
+  "subu    %["#TEMP3"],    %["#TEMP4"],    %["#TEMP3"]     \n\t"
+
+// macro for one horizontal pass in ITransformOne
+// MUL and STORE macros inlined
+// a = clip_8b(a) is replaced with: a = max(a, 0); a = min(a, 255)
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to load from ref and store to dst buffer
+// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)            \
+  "addiu   %["#TEMP0"],    %["#TEMP0"],    4               \n\t"            \
+  "addu    %[temp16],      %["#TEMP0"],    %["#TEMP8"]     \n\t"            \
+  "subu    %[temp17],      %["#TEMP0"],    %["#TEMP8"]     \n\t"            \
+  "mul     %["#TEMP0"],    %["#TEMP4"],    %[kC2]          \n\t"            \
+  "mul     %["#TEMP8"],    %["#TEMP12"],   %[kC1]          \n\t"            \
+  "mul     %["#TEMP4"],    %["#TEMP4"],    %[kC1]          \n\t"            \
+  "mul     %["#TEMP12"],   %["#TEMP12"],   %[kC2]          \n\t"            \
+  "sra     %["#TEMP0"],    %["#TEMP0"],    16              \n\t"            \
+  "sra     %["#TEMP8"],    %["#TEMP8"],    16              \n\t"            \
+  "sra     %["#TEMP4"],    %["#TEMP4"],    16              \n\t"            \
+  "sra     %["#TEMP12"],   %["#TEMP12"],   16              \n\t"            \
+  "subu    %[temp18],      %["#TEMP0"],    %["#TEMP8"]     \n\t"            \
+  "addu    %[temp19],      %["#TEMP4"],    %["#TEMP12"]    \n\t"            \
+  "addu    %["#TEMP0"],    %[temp16],      %[temp19]       \n\t"            \
+  "addu    %["#TEMP4"],    %[temp17],      %[temp18]       \n\t"            \
+  "subu    %["#TEMP8"],    %[temp17],      %[temp18]       \n\t"            \
+  "subu    %["#TEMP12"],   %[temp16],      %[temp19]       \n\t"            \
+  "lw      %[temp20],      0(%[args])                      \n\t"            \
+  "sra     %["#TEMP0"],    %["#TEMP0"],    3               \n\t"            \
+  "sra     %["#TEMP4"],    %["#TEMP4"],    3               \n\t"            \
+  "sra     %["#TEMP8"],    %["#TEMP8"],    3               \n\t"            \
+  "sra     %["#TEMP12"],   %["#TEMP12"],   3               \n\t"            \
+  "lbu     %[temp16],      "#A"(%[temp20])                 \n\t"            \
+  "lbu     %[temp17],      "#B"(%[temp20])                 \n\t"            \
+  "lbu     %[temp18],      "#C"(%[temp20])                 \n\t"            \
+  "lbu     %[temp19],      "#D"(%[temp20])                 \n\t"            \
+  "addu    %["#TEMP0"],    %[temp16],      %["#TEMP0"]     \n\t"            \
+  "addu    %["#TEMP4"],    %[temp17],      %["#TEMP4"]     \n\t"            \
+  "addu    %["#TEMP8"],    %[temp18],      %["#TEMP8"]     \n\t"            \
+  "addu    %["#TEMP12"],   %[temp19],      %["#TEMP12"]    \n\t"            \
+  "slt     %[temp16],      %["#TEMP0"],    $zero           \n\t"            \
+  "slt     %[temp17],      %["#TEMP4"],    $zero           \n\t"            \
+  "slt     %[temp18],      %["#TEMP8"],    $zero           \n\t"            \
+  "slt     %[temp19],      %["#TEMP12"],   $zero           \n\t"            \
+  "movn    %["#TEMP0"],    $zero,          %[temp16]       \n\t"            \
+  "movn    %["#TEMP4"],    $zero,          %[temp17]       \n\t"            \
+  "movn    %["#TEMP8"],    $zero,          %[temp18]       \n\t"            \
+  "movn    %["#TEMP12"],   $zero,          %[temp19]       \n\t"            \
+  "addiu   %[temp20],      $zero,          255             \n\t"            \
+  "slt     %[temp16],      %["#TEMP0"],    %[temp20]       \n\t"            \
+  "slt     %[temp17],      %["#TEMP4"],    %[temp20]       \n\t"            \
+  "slt     %[temp18],      %["#TEMP8"],    %[temp20]       \n\t"            \
+  "slt     %[temp19],      %["#TEMP12"],   %[temp20]       \n\t"            \
+  "movz    %["#TEMP0"],    %[temp20],      %[temp16]       \n\t"            \
+  "movz    %["#TEMP4"],    %[temp20],      %[temp17]       \n\t"            \
+  "lw      %[temp16],      8(%[args])                      \n\t"            \
+  "movz    %["#TEMP8"],    %[temp20],      %[temp18]       \n\t"            \
+  "movz    %["#TEMP12"],   %[temp20],      %[temp19]       \n\t"            \
+  "sb      %["#TEMP0"],    "#A"(%[temp16])                 \n\t"            \
+  "sb      %["#TEMP4"],    "#B"(%[temp16])                 \n\t"            \
+  "sb      %["#TEMP8"],    "#C"(%[temp16])                 \n\t"            \
+  "sb      %["#TEMP12"],   "#D"(%[temp16])                 \n\t"
+
+// Does one or two inverse transforms.
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+                                      uint8_t* dst) {
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+  int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
+  int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
+  const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst};
+
+  __asm__ volatile(
+    "lw      %[temp20],      4(%[args])                      \n\t"
+    VERTICAL_PASS(0, 16,  8, 24, temp4,  temp0,  temp1,  temp2,  temp3)
+    VERTICAL_PASS(2, 18, 10, 26, temp8,  temp4,  temp5,  temp6,  temp7)
+    VERTICAL_PASS(4, 20, 12, 28, temp12, temp8,  temp9,  temp10, temp11)
+    VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)
+
+    HORIZONTAL_PASS( 0,  1,  2,  3, temp0, temp4, temp8,  temp12)
+    HORIZONTAL_PASS(16, 17, 18, 19, temp1, temp5, temp9,  temp13)
+    HORIZONTAL_PASS(32, 33, 34, 35, temp2, temp6, temp10, temp14)
+    HORIZONTAL_PASS(48, 49, 50, 51, temp3, temp7, temp11, temp15)
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
+    : [args]"r"(args), [kC1]"r"(kC1), [kC2]"r"(kC2)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void ITransform(const uint8_t* ref, const int16_t* in,
+                       uint8_t* dst, int do_two) {
+  ITransformOne(ref, in, dst);
+  if (do_two) {
+    ITransformOne(ref + 4, in + 16, dst + 4);
+  }
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+// macro for one pass through for loop in QuantizeBlock
+// QUANTDIV macro inlined
+// J - offset in bytes (kZigzag[n] * 2)
+// K - offset in bytes (kZigzag[n] * 4)
+// N - offset in bytes (n * 2)
+#define QUANTIZE_ONE(J, K, N)                                               \
+  "lh           %[temp0],       "#J"(%[ppin])                       \n\t"   \
+  "lhu          %[temp1],       "#J"(%[ppsharpen])                  \n\t"   \
+  "lw           %[temp2],       "#K"(%[ppzthresh])                  \n\t"   \
+  "sra          %[sign],        %[temp0],           15              \n\t"   \
+  "xor          %[coeff],       %[temp0],           %[sign]         \n\t"   \
+  "subu         %[coeff],       %[coeff],           %[sign]         \n\t"   \
+  "addu         %[coeff],       %[coeff],           %[temp1]        \n\t"   \
+  "slt          %[temp4],       %[temp2],           %[coeff]        \n\t"   \
+  "addiu        %[temp5],       $zero,              0               \n\t"   \
+  "addiu        %[level],       $zero,              0               \n\t"   \
+  "beqz         %[temp4],       2f                                  \n\t"   \
+  "lhu          %[temp1],       "#J"(%[ppiq])                       \n\t"   \
+  "lw           %[temp2],       "#K"(%[ppbias])                     \n\t"   \
+  "lhu          %[temp3],       "#J"(%[ppq])                        \n\t"   \
+  "mul          %[level],       %[coeff],           %[temp1]        \n\t"   \
+  "addu         %[level],       %[level],           %[temp2]        \n\t"   \
+  "sra          %[level],       %[level],           17              \n\t"   \
+  "slt          %[temp4],       %[max_level],       %[level]        \n\t"   \
+  "movn         %[level],       %[max_level],       %[temp4]        \n\t"   \
+  "xor          %[level],       %[level],           %[sign]         \n\t"   \
+  "subu         %[level],       %[level],           %[sign]         \n\t"   \
+  "mul          %[temp5],       %[level],           %[temp3]        \n\t"   \
+"2:                                                                 \n\t"   \
+  "sh           %[temp5],       "#J"(%[ppin])                       \n\t"   \
+  "sh           %[level],       "#N"(%[pout])                       \n\t"
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  int sign, coeff, level, i;
+  int max_level = MAX_LEVEL;
+
+  int16_t* ppin             = &in[0];
+  int16_t* pout             = &out[0];
+  const uint16_t* ppsharpen = &mtx->sharpen_[0];
+  const uint32_t* ppzthresh = &mtx->zthresh_[0];
+  const uint16_t* ppq       = &mtx->q_[0];
+  const uint16_t* ppiq      = &mtx->iq_[0];
+  const uint32_t* ppbias    = &mtx->bias_[0];
+
+  __asm__ volatile(
+    QUANTIZE_ONE( 0,  0,  0)
+    QUANTIZE_ONE( 2,  4,  2)
+    QUANTIZE_ONE( 8, 16,  4)
+    QUANTIZE_ONE(16, 32,  6)
+    QUANTIZE_ONE(10, 20,  8)
+    QUANTIZE_ONE( 4,  8, 10)
+    QUANTIZE_ONE( 6, 12, 12)
+    QUANTIZE_ONE(12, 24, 14)
+    QUANTIZE_ONE(18, 36, 16)
+    QUANTIZE_ONE(24, 48, 18)
+    QUANTIZE_ONE(26, 52, 20)
+    QUANTIZE_ONE(20, 40, 22)
+    QUANTIZE_ONE(14, 28, 24)
+    QUANTIZE_ONE(22, 44, 26)
+    QUANTIZE_ONE(28, 56, 28)
+    QUANTIZE_ONE(30, 60, 30)
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [sign]"=&r"(sign), [coeff]"=&r"(coeff),
+      [level]"=&r"(level)
+    : [pout]"r"(pout), [ppin]"r"(ppin),
+      [ppiq]"r"(ppiq), [max_level]"r"(max_level),
+      [ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
+      [ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
+    : "memory", "hi", "lo"
+  );
+
+  // moved out from macro to increase possibility for earlier breaking
+  for (i = 15; i >= 0; i--) {
+    if (out[i]) return 1;
+  }
+  return 0;
+}
+
+#undef QUANTIZE_ONE
+
+// macro for one horizontal pass in Disto4x4 (TTransform)
+// two calls of function TTransform are merged into single one
+// A..D - offsets in bytes to load from a and b buffers
+// E..H - offsets in bytes to store first results to tmp buffer
+// E1..H1 - offsets in bytes to store second results to tmp buffer
+#define HORIZONTAL_PASS(A, B, C, D, E, F, G, H, E1, F1, G1, H1)   \
+  "lbu    %[temp0],  "#A"(%[a])              \n\t"                \
+  "lbu    %[temp1],  "#B"(%[a])              \n\t"                \
+  "lbu    %[temp2],  "#C"(%[a])              \n\t"                \
+  "lbu    %[temp3],  "#D"(%[a])              \n\t"                \
+  "lbu    %[temp4],  "#A"(%[b])              \n\t"                \
+  "lbu    %[temp5],  "#B"(%[b])              \n\t"                \
+  "lbu    %[temp6],  "#C"(%[b])              \n\t"                \
+  "lbu    %[temp7],  "#D"(%[b])              \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp2]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"                \
+  "addu   %[temp2],  %[temp1],    %[temp3]   \n\t"                \
+  "subu   %[temp1],  %[temp1],    %[temp3]   \n\t"                \
+  "addu   %[temp3],  %[temp4],    %[temp6]   \n\t"                \
+  "subu   %[temp4],  %[temp4],    %[temp6]   \n\t"                \
+  "addu   %[temp6],  %[temp5],    %[temp7]   \n\t"                \
+  "subu   %[temp5],  %[temp5],    %[temp7]   \n\t"                \
+  "addu   %[temp7],  %[temp8],    %[temp2]   \n\t"                \
+  "subu   %[temp2],  %[temp8],    %[temp2]   \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
+  "addu   %[temp1],  %[temp3],    %[temp6]   \n\t"                \
+  "subu   %[temp3],  %[temp3],    %[temp6]   \n\t"                \
+  "addu   %[temp6],  %[temp4],    %[temp5]   \n\t"                \
+  "subu   %[temp4],  %[temp4],    %[temp5]   \n\t"                \
+  "sw     %[temp7],  "#E"(%[tmp])            \n\t"                \
+  "sw     %[temp2],  "#H"(%[tmp])            \n\t"                \
+  "sw     %[temp8],  "#F"(%[tmp])            \n\t"                \
+  "sw     %[temp0],  "#G"(%[tmp])            \n\t"                \
+  "sw     %[temp1],  "#E1"(%[tmp])           \n\t"                \
+  "sw     %[temp3],  "#H1"(%[tmp])           \n\t"                \
+  "sw     %[temp6],  "#F1"(%[tmp])           \n\t"                \
+  "sw     %[temp4],  "#G1"(%[tmp])           \n\t"
+
+// macro for one vertical pass in Disto4x4 (TTransform)
+// two calls of function TTransform are merged into single one
+// since only one accu is available in mips32r1 instruction set
+//   first is done second call of function TTransform and after
+//   that first one.
+//   const int sum1 = TTransform(a, w);
+//   const int sum2 = TTransform(b, w);
+//   return abs(sum2 - sum1) >> 5;
+//   (sum2 - sum1) is calculated with madds (sub2) and msubs (sub1)
+// A..D - offsets in bytes to load first results from tmp buffer
+// A1..D1 - offsets in bytes to load second results from tmp buffer
+// E..H - offsets in bytes to load from w buffer
+#define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H)     \
+  "lw     %[temp0],  "#A1"(%[tmp])           \n\t"                \
+  "lw     %[temp1],  "#C1"(%[tmp])           \n\t"                \
+  "lw     %[temp2],  "#B1"(%[tmp])           \n\t"                \
+  "lw     %[temp3],  "#D1"(%[tmp])           \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
+  "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"                \
+  "subu   %[temp2],  %[temp2],    %[temp3]   \n\t"                \
+  "addu   %[temp3],  %[temp8],    %[temp1]   \n\t"                \
+  "subu   %[temp8],  %[temp8],    %[temp1]   \n\t"                \
+  "addu   %[temp1],  %[temp0],    %[temp2]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"                \
+  "sra    %[temp4],  %[temp3],    31         \n\t"                \
+  "sra    %[temp5],  %[temp1],    31         \n\t"                \
+  "sra    %[temp6],  %[temp0],    31         \n\t"                \
+  "sra    %[temp7],  %[temp8],    31         \n\t"                \
+  "xor    %[temp3],  %[temp3],    %[temp4]   \n\t"                \
+  "xor    %[temp1],  %[temp1],    %[temp5]   \n\t"                \
+  "xor    %[temp0],  %[temp0],    %[temp6]   \n\t"                \
+  "xor    %[temp8],  %[temp8],    %[temp7]   \n\t"                \
+  "subu   %[temp3],  %[temp3],    %[temp4]   \n\t"                \
+  "subu   %[temp1],  %[temp1],    %[temp5]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp6]   \n\t"                \
+  "subu   %[temp8],  %[temp8],    %[temp7]   \n\t"                \
+  "lhu    %[temp4],  "#E"(%[w])              \n\t"                \
+  "lhu    %[temp5],  "#F"(%[w])              \n\t"                \
+  "lhu    %[temp6],  "#G"(%[w])              \n\t"                \
+  "lhu    %[temp7],  "#H"(%[w])              \n\t"                \
+  "madd   %[temp4],  %[temp3]                \n\t"                \
+  "madd   %[temp5],  %[temp1]                \n\t"                \
+  "madd   %[temp6],  %[temp0]                \n\t"                \
+  "madd   %[temp7],  %[temp8]                \n\t"                \
+  "lw     %[temp0],  "#A"(%[tmp])            \n\t"                \
+  "lw     %[temp1],  "#C"(%[tmp])            \n\t"                \
+  "lw     %[temp2],  "#B"(%[tmp])            \n\t"                \
+  "lw     %[temp3],  "#D"(%[tmp])            \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
+  "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"                \
+  "subu   %[temp2],  %[temp2],    %[temp3]   \n\t"                \
+  "addu   %[temp3],  %[temp8],    %[temp1]   \n\t"                \
+  "subu   %[temp1],  %[temp8],    %[temp1]   \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp2]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"                \
+  "sra    %[temp2],  %[temp3],    31         \n\t"                \
+  "xor    %[temp3],  %[temp3],    %[temp2]   \n\t"                \
+  "subu   %[temp3],  %[temp3],    %[temp2]   \n\t"                \
+  "msub   %[temp4],  %[temp3]                \n\t"                \
+  "sra    %[temp2],  %[temp8],    31         \n\t"                \
+  "sra    %[temp3],  %[temp0],    31         \n\t"                \
+  "sra    %[temp4],  %[temp1],    31         \n\t"                \
+  "xor    %[temp8],  %[temp8],    %[temp2]   \n\t"                \
+  "xor    %[temp0],  %[temp0],    %[temp3]   \n\t"                \
+  "xor    %[temp1],  %[temp1],    %[temp4]   \n\t"                \
+  "subu   %[temp8],  %[temp8],    %[temp2]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp3]   \n\t"                \
+  "subu   %[temp1],  %[temp1],    %[temp4]   \n\t"                \
+  "msub   %[temp5],  %[temp8]                \n\t"                \
+  "msub   %[temp6],  %[temp0]                \n\t"                \
+  "msub   %[temp7],  %[temp1]                \n\t"
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  int tmp[32];
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+
+  __asm__ volatile(
+    HORIZONTAL_PASS( 0,  1,  2,  3,    0,  4,  8, 12,    64,  68,  72,  76)
+    HORIZONTAL_PASS(16, 17, 18, 19,   16, 20, 24, 28,    80,  84,  88,  92)
+    HORIZONTAL_PASS(32, 33, 34, 35,   32, 36, 40, 44,    96, 100, 104, 108)
+    HORIZONTAL_PASS(48, 49, 50, 51,   48, 52, 56, 60,   112, 116, 120, 124)
+    "mthi   $zero                             \n\t"
+    "mtlo   $zero                             \n\t"
+    VERTICAL_PASS( 0, 16, 32, 48,     64, 80,  96, 112,   0,  8, 16, 24)
+    VERTICAL_PASS( 4, 20, 36, 52,     68, 84, 100, 116,   2, 10, 18, 26)
+    VERTICAL_PASS( 8, 24, 40, 56,     72, 88, 104, 120,   4, 12, 20, 28)
+    VERTICAL_PASS(12, 28, 44, 60,     76, 92, 108, 124,   6, 14, 22, 30)
+    "mflo   %[temp0]                          \n\t"
+    "sra    %[temp1],  %[temp0],  31          \n\t"
+    "xor    %[temp0],  %[temp0],  %[temp1]    \n\t"
+    "subu   %[temp0],  %[temp0],  %[temp1]    \n\t"
+    "sra    %[temp0],  %[temp0],  5           \n\t"
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [a]"r"(a), [b]"r"(b), [w]"r"(w), [tmp]"r"(tmp)
+    : "memory", "hi", "lo"
+  );
+
+  return temp0;
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+// macro for one horizontal pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to load from src and ref buffers
+// TEMP0..TEMP3 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \
+  "lw     %["#TEMP1"],  0(%[args])                     \n\t"    \
+  "lw     %["#TEMP2"],  4(%[args])                     \n\t"    \
+  "lbu    %[temp16],    "#A"(%["#TEMP1"])              \n\t"    \
+  "lbu    %[temp17],    "#A"(%["#TEMP2"])              \n\t"    \
+  "lbu    %[temp18],    "#B"(%["#TEMP1"])              \n\t"    \
+  "lbu    %[temp19],    "#B"(%["#TEMP2"])              \n\t"    \
+  "subu   %[temp20],    %[temp16],    %[temp17]        \n\t"    \
+  "lbu    %[temp16],    "#C"(%["#TEMP1"])              \n\t"    \
+  "lbu    %[temp17],    "#C"(%["#TEMP2"])              \n\t"    \
+  "subu   %["#TEMP0"],  %[temp18],    %[temp19]        \n\t"    \
+  "lbu    %[temp18],    "#D"(%["#TEMP1"])              \n\t"    \
+  "lbu    %[temp19],    "#D"(%["#TEMP2"])              \n\t"    \
+  "subu   %["#TEMP1"],  %[temp16],    %[temp17]        \n\t"    \
+  "subu   %["#TEMP2"],  %[temp18],    %[temp19]        \n\t"    \
+  "addu   %["#TEMP3"],  %[temp20],    %["#TEMP2"]      \n\t"    \
+  "subu   %["#TEMP2"],  %[temp20],    %["#TEMP2"]      \n\t"    \
+  "addu   %[temp20],    %["#TEMP0"],  %["#TEMP1"]      \n\t"    \
+  "subu   %["#TEMP0"],  %["#TEMP0"],  %["#TEMP1"]      \n\t"    \
+  "mul    %[temp16],    %["#TEMP2"],  %[c5352]         \n\t"    \
+  "mul    %[temp17],    %["#TEMP2"],  %[c2217]         \n\t"    \
+  "mul    %[temp18],    %["#TEMP0"],  %[c5352]         \n\t"    \
+  "mul    %[temp19],    %["#TEMP0"],  %[c2217]         \n\t"    \
+  "addu   %["#TEMP1"],  %["#TEMP3"],  %[temp20]        \n\t"    \
+  "subu   %[temp20],    %["#TEMP3"],  %[temp20]        \n\t"    \
+  "sll    %["#TEMP0"],  %["#TEMP1"],  3                \n\t"    \
+  "sll    %["#TEMP2"],  %[temp20],    3                \n\t"    \
+  "addiu  %[temp16],    %[temp16],    1812             \n\t"    \
+  "addiu  %[temp17],    %[temp17],    937              \n\t"    \
+  "addu   %[temp16],    %[temp16],    %[temp19]        \n\t"    \
+  "subu   %[temp17],    %[temp17],    %[temp18]        \n\t"    \
+  "sra    %["#TEMP1"],  %[temp16],    9                \n\t"    \
+  "sra    %["#TEMP3"],  %[temp17],    9                \n\t"
+
+// macro for one vertical pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to store to out buffer
+// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)  \
+  "addu   %[temp16],    %["#TEMP0"],  %["#TEMP12"]     \n\t"    \
+  "subu   %[temp19],    %["#TEMP0"],  %["#TEMP12"]     \n\t"    \
+  "addu   %[temp17],    %["#TEMP4"],  %["#TEMP8"]      \n\t"    \
+  "subu   %[temp18],    %["#TEMP4"],  %["#TEMP8"]      \n\t"    \
+  "mul    %["#TEMP8"],  %[temp19],    %[c2217]         \n\t"    \
+  "mul    %["#TEMP12"], %[temp18],    %[c2217]         \n\t"    \
+  "mul    %["#TEMP4"],  %[temp19],    %[c5352]         \n\t"    \
+  "mul    %[temp18],    %[temp18],    %[c5352]         \n\t"    \
+  "addiu  %[temp16],    %[temp16],    7                \n\t"    \
+  "addu   %["#TEMP0"],  %[temp16],    %[temp17]        \n\t"    \
+  "sra    %["#TEMP0"],  %["#TEMP0"],  4                \n\t"    \
+  "addu   %["#TEMP12"], %["#TEMP12"], %["#TEMP4"]      \n\t"    \
+  "subu   %["#TEMP4"],  %[temp16],    %[temp17]        \n\t"    \
+  "sra    %["#TEMP4"],  %["#TEMP4"],  4                \n\t"    \
+  "addiu  %["#TEMP8"],  %["#TEMP8"],  30000            \n\t"    \
+  "addiu  %["#TEMP12"], %["#TEMP12"], 12000            \n\t"    \
+  "addiu  %["#TEMP8"],  %["#TEMP8"],  21000            \n\t"    \
+  "subu   %["#TEMP8"],  %["#TEMP8"],  %[temp18]        \n\t"    \
+  "sra    %["#TEMP12"], %["#TEMP12"], 16               \n\t"    \
+  "sra    %["#TEMP8"],  %["#TEMP8"],  16               \n\t"    \
+  "addiu  %[temp16],    %["#TEMP12"], 1                \n\t"    \
+  "movn   %["#TEMP12"], %[temp16],    %[temp19]        \n\t"    \
+  "sh     %["#TEMP0"],  "#A"(%[temp20])                \n\t"    \
+  "sh     %["#TEMP4"],  "#C"(%[temp20])                \n\t"    \
+  "sh     %["#TEMP8"],  "#D"(%[temp20])                \n\t"    \
+  "sh     %["#TEMP12"], "#B"(%[temp20])                \n\t"
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
+  int temp17, temp18, temp19, temp20;
+  const int c2217 = 2217;
+  const int c5352 = 5352;
+  const int* const args[3] =
+      { (const int*)src, (const int*)ref, (const int*)out };
+
+  __asm__ volatile(
+    HORIZONTAL_PASS( 0,  1,  2,  3, temp0,  temp1,  temp2,  temp3)
+    HORIZONTAL_PASS(16, 17, 18, 19, temp4,  temp5,  temp6,  temp7)
+    HORIZONTAL_PASS(32, 33, 34, 35, temp8,  temp9,  temp10, temp11)
+    HORIZONTAL_PASS(48, 49, 50, 51, temp12, temp13, temp14, temp15)
+    "lw   %[temp20],    8(%[args])                     \n\t"
+    VERTICAL_PASS(0,  8, 16, 24, temp0, temp4, temp8,  temp12)
+    VERTICAL_PASS(2, 10, 18, 26, temp1, temp5, temp9,  temp13)
+    VERTICAL_PASS(4, 12, 20, 28, temp2, temp6, temp10, temp14)
+    VERTICAL_PASS(6, 14, 22, 30, temp3, temp7, temp11, temp15)
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
+    : [args]"r"(args), [c2217]"r"(c2217), [c5352]"r"(c5352)
+    : "memory", "hi", "lo"
+  );
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+// Forward declaration.
+extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res);
+
+int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res) {
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  int p0 = res->prob[n][ctx0][0];
+  const uint16_t* t = res->cost[n][ctx0];
+  int cost;
+  const int const_2 = 2;
+  const int const_255 = 255;
+  const int const_max_level = MAX_VARIABLE_LEVEL;
+  int res_cost;
+  int res_prob;
+  int res_coeffs;
+  int res_last;
+  int v_reg;
+  int b_reg;
+  int ctx_reg;
+  int cost_add, temp_1, temp_2, temp_3;
+
+  if (res->last < 0) {
+    return VP8BitCost(0, p0);
+  }
+
+  cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+
+  res_cost = (int)res->cost;
+  res_prob = (int)res->prob;
+  res_coeffs = (int)res->coeffs;
+  res_last = (int)res->last;
+
+  __asm__ volatile(
+    ".set   push                                                           \n\t"
+    ".set   noreorder                                                      \n\t"
+
+    "sll    %[temp_1],     %[n],              1                            \n\t"
+    "addu   %[res_coeffs], %[res_coeffs],     %[temp_1]                    \n\t"
+    "slt    %[temp_2],     %[n],              %[res_last]                  \n\t"
+    "bnez   %[temp_2],     1f                                              \n\t"
+    " li    %[cost_add],   0                                               \n\t"
+    "b      2f                                                             \n\t"
+    " nop                                                                  \n\t"
+  "1:                                                                      \n\t"
+    "lh     %[v_reg],      0(%[res_coeffs])                                \n\t"
+    "addu   %[b_reg],      %[n],              %[VP8EncBands]               \n\t"
+    "move   %[temp_1],     %[const_max_level]                              \n\t"
+    "addu   %[cost],       %[cost],           %[cost_add]                  \n\t"
+    "negu   %[temp_2],     %[v_reg]                                        \n\t"
+    "slti   %[temp_3],     %[v_reg],          0                            \n\t"
+    "movn   %[v_reg],      %[temp_2],         %[temp_3]                    \n\t"
+    "lbu    %[b_reg],      1(%[b_reg])                                     \n\t"
+    "li     %[cost_add],   0                                               \n\t"
+
+    "sltiu  %[temp_3],     %[v_reg],          2                            \n\t"
+    "move   %[ctx_reg],    %[v_reg]                                        \n\t"
+    "movz   %[ctx_reg],    %[const_2],        %[temp_3]                    \n\t"
+    //  cost += VP8LevelCost(t, v);
+    "slt    %[temp_3],     %[v_reg],          %[const_max_level]           \n\t"
+    "movn   %[temp_1],     %[v_reg],          %[temp_3]                    \n\t"
+    "sll    %[temp_2],     %[v_reg],          1                            \n\t"
+    "addu   %[temp_2],     %[temp_2],         %[VP8LevelFixedCosts]        \n\t"
+    "lhu    %[temp_2],     0(%[temp_2])                                    \n\t"
+    "sll    %[temp_1],     %[temp_1],         1                            \n\t"
+    "addu   %[temp_1],     %[temp_1],         %[t]                         \n\t"
+    "lhu    %[temp_3],     0(%[temp_1])                                    \n\t"
+    "addu   %[cost],       %[cost],           %[temp_2]                    \n\t"
+
+    //  t = res->cost[b][ctx];
+    "sll    %[temp_1],     %[ctx_reg],        7                            \n\t"
+    "sll    %[temp_2],     %[ctx_reg],        3                            \n\t"
+    "addu   %[cost],       %[cost],           %[temp_3]                    \n\t"
+    "addu   %[temp_1],     %[temp_1],         %[temp_2]                    \n\t"
+    "sll    %[temp_2],     %[b_reg],          3                            \n\t"
+    "sll    %[temp_3],     %[b_reg],          5                            \n\t"
+    "sub    %[temp_2],     %[temp_3],         %[temp_2]                    \n\t"
+    "sll    %[temp_3],     %[temp_2],         4                            \n\t"
+    "addu   %[temp_1],     %[temp_1],         %[temp_3]                    \n\t"
+    "addu   %[temp_2],     %[temp_2],         %[res_cost]                  \n\t"
+    "addiu  %[n],          %[n],              1                            \n\t"
+    "addu   %[t],          %[temp_1],         %[temp_2]                    \n\t"
+    "slt    %[temp_1],     %[n],              %[res_last]                  \n\t"
+    "bnez   %[temp_1],     1b                                              \n\t"
+    " addiu %[res_coeffs], %[res_coeffs],     2                            \n\t"
+   "2:                                                                     \n\t"
+
+    ".set   pop                                                            \n\t"
+    : [cost]"+r"(cost), [t]"+r"(t), [n]"+r"(n), [v_reg]"=&r"(v_reg),
+      [ctx_reg]"=&r"(ctx_reg), [b_reg]"=&r"(b_reg), [cost_add]"=&r"(cost_add),
+      [temp_1]"=&r"(temp_1), [temp_2]"=&r"(temp_2), [temp_3]"=&r"(temp_3)
+    : [const_2]"r"(const_2), [const_255]"r"(const_255), [res_last]"r"(res_last),
+      [VP8EntropyCost]"r"(VP8EntropyCost), [VP8EncBands]"r"(VP8EncBands),
+      [const_max_level]"r"(const_max_level), [res_prob]"r"(res_prob),
+      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_coeffs]"r"(res_coeffs),
+      [res_cost]"r"(res_cost)
+    : "memory"
+  );
+
+  // Last coefficient is always non-zero
+  {
+    const int v = abs(res->coeffs[n]);
+    assert(v != 0);
+    cost += VP8LevelCost(t, v);
+    if (n < 15) {
+      const int b = VP8EncBands[n + 1];
+      const int ctx = (v == 1) ? 1 : 2;
+      const int last_p0 = res->prob[b][ctx][0];
+      cost += VP8BitCost(0, last_p0);
+    }
+  }
+  return cost;
+}
+
+#define GET_SSE_INNER(A, B, C, D)                               \
+  "lbu     %[temp0],    "#A"(%[a])                   \n\t"      \
+  "lbu     %[temp1],    "#A"(%[b])                   \n\t"      \
+  "lbu     %[temp2],    "#B"(%[a])                   \n\t"      \
+  "lbu     %[temp3],    "#B"(%[b])                   \n\t"      \
+  "lbu     %[temp4],    "#C"(%[a])                   \n\t"      \
+  "lbu     %[temp5],    "#C"(%[b])                   \n\t"      \
+  "lbu     %[temp6],    "#D"(%[a])                   \n\t"      \
+  "lbu     %[temp7],    "#D"(%[b])                   \n\t"      \
+  "subu    %[temp0],    %[temp0],     %[temp1]       \n\t"      \
+  "subu    %[temp2],    %[temp2],     %[temp3]       \n\t"      \
+  "subu    %[temp4],    %[temp4],     %[temp5]       \n\t"      \
+  "subu    %[temp6],    %[temp6],     %[temp7]       \n\t"      \
+  "madd    %[temp0],    %[temp0]                     \n\t"      \
+  "madd    %[temp2],    %[temp2]                     \n\t"      \
+  "madd    %[temp4],    %[temp4]                     \n\t"      \
+  "madd    %[temp6],    %[temp6]                     \n\t"
+
+#define GET_SSE(A, B, C, D)               \
+  GET_SSE_INNER(A, A + 1, A + 2, A + 3)   \
+  GET_SSE_INNER(B, B + 1, B + 2, B + 3)   \
+  GET_SSE_INNER(C, C + 1, C + 2, C + 3)   \
+  GET_SSE_INNER(D, D + 1, D + 2, D + 3)
+
+#if !defined(WORK_AROUND_GCC)
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+  __asm__ volatile(
+     "mult   $zero,    $zero                            \n\t"
+
+     GET_SSE(  0,   4,   8,  12)
+     GET_SSE( 16,  20,  24,  28)
+     GET_SSE( 32,  36,  40,  44)
+     GET_SSE( 48,  52,  56,  60)
+     GET_SSE( 64,  68,  72,  76)
+     GET_SSE( 80,  84,  88,  92)
+     GET_SSE( 96, 100, 104, 108)
+     GET_SSE(112, 116, 120, 124)
+     GET_SSE(128, 132, 136, 140)
+     GET_SSE(144, 148, 152, 156)
+     GET_SSE(160, 164, 168, 172)
+     GET_SSE(176, 180, 184, 188)
+     GET_SSE(192, 196, 200, 204)
+     GET_SSE(208, 212, 216, 220)
+     GET_SSE(224, 228, 232, 236)
+     GET_SSE(240, 244, 248, 252)
+
+    "mflo    %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi" , "lo"
+  );
+  return count;
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+  __asm__ volatile(
+     "mult   $zero,    $zero                            \n\t"
+
+     GET_SSE(  0,   4,   8,  12)
+     GET_SSE( 16,  20,  24,  28)
+     GET_SSE( 32,  36,  40,  44)
+     GET_SSE( 48,  52,  56,  60)
+     GET_SSE( 64,  68,  72,  76)
+     GET_SSE( 80,  84,  88,  92)
+     GET_SSE( 96, 100, 104, 108)
+     GET_SSE(112, 116, 120, 124)
+
+    "mflo    %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi" , "lo"
+  );
+  return count;
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+  __asm__ volatile(
+     "mult   $zero,    $zero                            \n\t"
+
+     GET_SSE( 0,   4,  16,  20)
+     GET_SSE(32,  36,  48,  52)
+     GET_SSE(64,  68,  80,  84)
+     GET_SSE(96, 100, 112, 116)
+
+    "mflo    %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi" , "lo"
+  );
+  return count;
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+  __asm__ volatile(
+     "mult   $zero,    $zero                            \n\t"
+
+     GET_SSE(0, 16, 32, 48)
+
+    "mflo    %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi" , "lo"
+  );
+  return count;
+}
+
+#endif  // WORK_AROUND_GCC
+
+#undef GET_SSE_MIPS32
+#undef GET_SSE_MIPS32_INNER
+
+#endif  // WEBP_USE_MIPS32
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitMIPS32(void);
+
+void VP8EncDspInitMIPS32(void) {
+#if defined(WEBP_USE_MIPS32)
+  VP8ITransform = ITransform;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+  VP8FTransform = FTransform;
+#if !defined(WORK_AROUND_GCC)
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE4x4 = SSE4x4;
+#endif
+#endif  // WEBP_USE_MIPS32
+}
diff --git a/src/main/jni/libwebp/dsp/enc_neon.c b/src/main/jni/libwebp/dsp/enc_neon.c
new file mode 100644
index 000000000..42041f73e
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/enc_neon.c
@@ -0,0 +1,1077 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// ARM NEON version of speed-critical encoding functions.
+//
+// adapted from libvpx (http://www.webmproject.org/code/)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+
+#include "./neon.h"
+#include "../enc/vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+// Inverse transform.
+// This code is pretty much the same as TransformOne in the dec_neon.c, except
+// for subtraction to *ref. See the comments there for algorithmic explanations.
+
+static const int16_t kC1 = 20091;
+static const int16_t kC2 = 17734;  // half of kC2, actually. See comment above.
+
+// This code works but is *slower* than the inlined-asm version below
+// (with gcc-4.6). So we disable it for now. Later, it'll be conditional to
+// USE_INTRINSICS define.
+// With gcc-4.8, it's a little faster speed than inlined-assembly.
+#if defined(USE_INTRINSICS)
+
+// Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
+static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
+  return vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(v)));
+}
+
+// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
+// to the corresponding rows of 'dst'.
+static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
+                                            const int16x8_t dst01,
+                                            const int16x8_t dst23) {
+  // Unsigned saturate to 8b.
+  const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
+  const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
+
+  // Store the results.
+  vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0);
+  vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1);
+  vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0);
+  vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
+}
+
+static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
+                               const uint8_t* const ref, uint8_t* const dst) {
+  uint32x2_t dst01 = vdup_n_u32(0);
+  uint32x2_t dst23 = vdup_n_u32(0);
+
+  // Load the source pixels.
+  dst01 = vld1_lane_u32((uint32_t*)(ref + 0 * BPS), dst01, 0);
+  dst23 = vld1_lane_u32((uint32_t*)(ref + 2 * BPS), dst23, 0);
+  dst01 = vld1_lane_u32((uint32_t*)(ref + 1 * BPS), dst01, 1);
+  dst23 = vld1_lane_u32((uint32_t*)(ref + 3 * BPS), dst23, 1);
+
+  {
+    // Convert to 16b.
+    const int16x8_t dst01_s16 = ConvertU8ToS16(dst01);
+    const int16x8_t dst23_s16 = ConvertU8ToS16(dst23);
+
+    // Descale with rounding.
+    const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
+    const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
+    // Add the inverse transform.
+    SaturateAndStore4x4(dst, out01, out23);
+  }
+}
+
+static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
+                                     int16x8x2_t* const out) {
+  // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
+  // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
+  const int16x8x2_t tmp0 = vzipq_s16(in0, in1);   // a0 c0 a1 c1 a2 c2 ...
+                                                  // b0 d0 b1 d1 b2 d2 ...
+  *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
+}
+
+static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
+  // {rows} = in0 | in4
+  //          in8 | in12
+  // B1 = in4 | in12
+  const int16x8_t B1 =
+      vcombine_s16(vget_high_s16(rows->val[0]), vget_high_s16(rows->val[1]));
+  // C0 = kC1 * in4 | kC1 * in12
+  // C1 = kC2 * in4 | kC2 * in12
+  const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
+  const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
+  const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
+                                vget_low_s16(rows->val[1]));   // in0 + in8
+  const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
+                                vget_low_s16(rows->val[1]));   // in0 - in8
+  // c = kC2 * in4 - kC1 * in12
+  // d = kC1 * in4 + kC2 * in12
+  const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
+  const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
+  const int16x8_t D0 = vcombine_s16(a, b);      // D0 = a | b
+  const int16x8_t D1 = vcombine_s16(d, c);      // D1 = d | c
+  const int16x8_t E0 = vqaddq_s16(D0, D1);      // a+d | b+c
+  const int16x8_t E_tmp = vqsubq_s16(D0, D1);   // a-d | b-c
+  const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
+  Transpose8x2(E0, E1, rows);
+}
+
+static void ITransformOne(const uint8_t* ref,
+                          const int16_t* in, uint8_t* dst) {
+  int16x8x2_t rows;
+  INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
+  TransformPass(&rows);
+  TransformPass(&rows);
+  Add4x4(rows.val[0], rows.val[1], ref, dst);
+}
+
+#else
+
+static void ITransformOne(const uint8_t* ref,
+                          const int16_t* in, uint8_t* dst) {
+  const int kBPS = BPS;
+  const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
+
+  __asm__ volatile (
+    "vld1.16         {q1, q2}, [%[in]]           \n"
+    "vld1.16         {d0}, [%[kC1C2]]            \n"
+
+    // d2: in[0]
+    // d3: in[8]
+    // d4: in[4]
+    // d5: in[12]
+    "vswp            d3, d4                      \n"
+
+    // q8 = {in[4], in[12]} * kC1 * 2 >> 16
+    // q9 = {in[4], in[12]} * kC2 >> 16
+    "vqdmulh.s16     q8, q2, d0[0]               \n"
+    "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+    // d22 = a = in[0] + in[8]
+    // d23 = b = in[0] - in[8]
+    "vqadd.s16       d22, d2, d3                 \n"
+    "vqsub.s16       d23, d2, d3                 \n"
+
+    //  q8 = in[4]/[12] * kC1 >> 16
+    "vshr.s16        q8, q8, #1                  \n"
+
+    // Add {in[4], in[12]} back after the multiplication.
+    "vqadd.s16       q8, q2, q8                  \n"
+
+    // d20 = c = in[4]*kC2 - in[12]*kC1
+    // d21 = d = in[4]*kC1 + in[12]*kC2
+    "vqsub.s16       d20, d18, d17               \n"
+    "vqadd.s16       d21, d19, d16               \n"
+
+    // d2 = tmp[0] = a + d
+    // d3 = tmp[1] = b + c
+    // d4 = tmp[2] = b - c
+    // d5 = tmp[3] = a - d
+    "vqadd.s16       d2, d22, d21                \n"
+    "vqadd.s16       d3, d23, d20                \n"
+    "vqsub.s16       d4, d23, d20                \n"
+    "vqsub.s16       d5, d22, d21                \n"
+
+    "vzip.16         q1, q2                      \n"
+    "vzip.16         q1, q2                      \n"
+
+    "vswp            d3, d4                      \n"
+
+    // q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+    // q9 = {tmp[4], tmp[12]} * kC2 >> 16
+    "vqdmulh.s16     q8, q2, d0[0]               \n"
+    "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+    // d22 = a = tmp[0] + tmp[8]
+    // d23 = b = tmp[0] - tmp[8]
+    "vqadd.s16       d22, d2, d3                 \n"
+    "vqsub.s16       d23, d2, d3                 \n"
+
+    "vshr.s16        q8, q8, #1                  \n"
+    "vqadd.s16       q8, q2, q8                  \n"
+
+    // d20 = c = in[4]*kC2 - in[12]*kC1
+    // d21 = d = in[4]*kC1 + in[12]*kC2
+    "vqsub.s16       d20, d18, d17               \n"
+    "vqadd.s16       d21, d19, d16               \n"
+
+    // d2 = tmp[0] = a + d
+    // d3 = tmp[1] = b + c
+    // d4 = tmp[2] = b - c
+    // d5 = tmp[3] = a - d
+    "vqadd.s16       d2, d22, d21                \n"
+    "vqadd.s16       d3, d23, d20                \n"
+    "vqsub.s16       d4, d23, d20                \n"
+    "vqsub.s16       d5, d22, d21                \n"
+
+    "vld1.32         d6[0], [%[ref]], %[kBPS]    \n"
+    "vld1.32         d6[1], [%[ref]], %[kBPS]    \n"
+    "vld1.32         d7[0], [%[ref]], %[kBPS]    \n"
+    "vld1.32         d7[1], [%[ref]], %[kBPS]    \n"
+
+    "sub         %[ref], %[ref], %[kBPS], lsl #2 \n"
+
+    // (val) + 4 >> 3
+    "vrshr.s16       d2, d2, #3                  \n"
+    "vrshr.s16       d3, d3, #3                  \n"
+    "vrshr.s16       d4, d4, #3                  \n"
+    "vrshr.s16       d5, d5, #3                  \n"
+
+    "vzip.16         q1, q2                      \n"
+    "vzip.16         q1, q2                      \n"
+
+    // Must accumulate before saturating
+    "vmovl.u8        q8, d6                      \n"
+    "vmovl.u8        q9, d7                      \n"
+
+    "vqadd.s16       q1, q1, q8                  \n"
+    "vqadd.s16       q2, q2, q9                  \n"
+
+    "vqmovun.s16     d0, q1                      \n"
+    "vqmovun.s16     d1, q2                      \n"
+
+    "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d1[1], [%[dst]]             \n"
+
+    : [in] "+r"(in), [dst] "+r"(dst)               // modified registers
+    : [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref)  // constants
+    : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11"  // clobbered
+  );
+}
+
+#endif    // USE_INTRINSICS
+
+static void ITransform(const uint8_t* ref,
+                       const int16_t* in, uint8_t* dst, int do_two) {
+  ITransformOne(ref, in, dst);
+  if (do_two) {
+    ITransformOne(ref + 4, in + 16, dst + 4);
+  }
+}
+
+// Load all 4x4 pixels into a single uint8x16_t variable.
+static uint8x16_t Load4x4(const uint8_t* src) {
+  uint32x4_t out = vdupq_n_u32(0);
+  out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0);
+  out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1);
+  out = vld1q_lane_u32((const uint32_t*)(src + 2 * BPS), out, 2);
+  out = vld1q_lane_u32((const uint32_t*)(src + 3 * BPS), out, 3);
+  return vreinterpretq_u8_u32(out);
+}
+
+// Forward transform.
+
+#if defined(USE_INTRINSICS)
+
+static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
+                                         const int16x4_t C, const int16x4_t D,
+                                         int16x8_t* const out01,
+                                         int16x8_t* const out32) {
+  const int16x4x2_t AB = vtrn_s16(A, B);
+  const int16x4x2_t CD = vtrn_s16(C, D);
+  const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
+                                     vreinterpret_s32_s16(CD.val[0]));
+  const int32x2x2_t tmp13 = vtrn_s32(vreinterpret_s32_s16(AB.val[1]),
+                                     vreinterpret_s32_s16(CD.val[1]));
+  *out01 = vreinterpretq_s16_s64(
+      vcombine_s64(vreinterpret_s64_s32(tmp02.val[0]),
+                   vreinterpret_s64_s32(tmp13.val[0])));
+  *out32 = vreinterpretq_s16_s64(
+      vcombine_s64(vreinterpret_s64_s32(tmp13.val[1]),
+                   vreinterpret_s64_s32(tmp02.val[1])));
+}
+
+static WEBP_INLINE int16x8_t DiffU8ToS16(const uint8x8_t a,
+                                         const uint8x8_t b) {
+  return vreinterpretq_s16_u16(vsubl_u8(a, b));
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref,
+                       int16_t* out) {
+  int16x8_t d0d1, d3d2;   // working 4x4 int16 variables
+  {
+    const uint8x16_t S0 = Load4x4(src);
+    const uint8x16_t R0 = Load4x4(ref);
+    const int16x8_t D0D1 = DiffU8ToS16(vget_low_u8(S0), vget_low_u8(R0));
+    const int16x8_t D2D3 = DiffU8ToS16(vget_high_u8(S0), vget_high_u8(R0));
+    const int16x4_t D0 = vget_low_s16(D0D1);
+    const int16x4_t D1 = vget_high_s16(D0D1);
+    const int16x4_t D2 = vget_low_s16(D2D3);
+    const int16x4_t D3 = vget_high_s16(D2D3);
+    Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2);
+  }
+  {    // 1rst pass
+    const int32x4_t kCst937 = vdupq_n_s32(937);
+    const int32x4_t kCst1812 = vdupq_n_s32(1812);
+    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);   // d0+d3 | d1+d2   (=a0|a1)
+    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);   // d0-d3 | d1-d2   (=a3|a2)
+    const int16x8_t a0a1_2 = vshlq_n_s16(a0a1, 3);
+    const int16x4_t tmp0 = vadd_s16(vget_low_s16(a0a1_2),
+                                    vget_high_s16(a0a1_2));
+    const int16x4_t tmp2 = vsub_s16(vget_low_s16(a0a1_2),
+                                    vget_high_s16(a0a1_2));
+    const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
+    const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
+    const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
+    const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
+    const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9);
+    const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
+    Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
+  }
+  {    // 2nd pass
+    // the (1<<16) addition is for the replacement: a3!=0  <-> 1-(a3==0)
+    const int32x4_t kCst12000 = vdupq_n_s32(12000 + (1 << 16));
+    const int32x4_t kCst51000 = vdupq_n_s32(51000);
+    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);   // d0+d3 | d1+d2   (=a0|a1)
+    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);   // d0-d3 | d1-d2   (=a3|a2)
+    const int16x4_t a0_k7 = vadd_s16(vget_low_s16(a0a1), vdup_n_s16(7));
+    const int16x4_t out0 = vshr_n_s16(vadd_s16(a0_k7, vget_high_s16(a0a1)), 4);
+    const int16x4_t out2 = vshr_n_s16(vsub_s16(a0_k7, vget_high_s16(a0a1)), 4);
+    const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
+    const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
+    const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
+    const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
+    const int16x4_t tmp1 = vaddhn_s32(a2_p_a3, kCst12000);
+    const int16x4_t out3 = vaddhn_s32(a3_m_a2, kCst51000);
+    const int16x4_t a3_eq_0 =
+        vreinterpret_s16_u16(vceq_s16(vget_low_s16(a3a2), vdup_n_s16(0)));
+    const int16x4_t out1 = vadd_s16(tmp1, a3_eq_0);
+    vst1_s16(out +  0, out0);
+    vst1_s16(out +  4, out1);
+    vst1_s16(out +  8, out2);
+    vst1_s16(out + 12, out3);
+  }
+}
+
+#else
+
+// adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
+static const int16_t kCoeff16[] = {
+  5352,  5352,  5352, 5352, 2217,  2217,  2217, 2217
+};
+static const int32_t kCoeff32[] = {
+   1812,  1812,  1812,  1812,
+    937,   937,   937,   937,
+  12000, 12000, 12000, 12000,
+  51000, 51000, 51000, 51000
+};
+
+static void FTransform(const uint8_t* src, const uint8_t* ref,
+                       int16_t* out) {
+  const int kBPS = BPS;
+  const uint8_t* src_ptr = src;
+  const uint8_t* ref_ptr = ref;
+  const int16_t* coeff16 = kCoeff16;
+  const int32_t* coeff32 = kCoeff32;
+
+  __asm__ volatile (
+    // load src into q4, q5 in high half
+    "vld1.8 {d8},  [%[src_ptr]], %[kBPS]      \n"
+    "vld1.8 {d10}, [%[src_ptr]], %[kBPS]      \n"
+    "vld1.8 {d9},  [%[src_ptr]], %[kBPS]      \n"
+    "vld1.8 {d11}, [%[src_ptr]]               \n"
+
+    // load ref into q6, q7 in high half
+    "vld1.8 {d12}, [%[ref_ptr]], %[kBPS]      \n"
+    "vld1.8 {d14}, [%[ref_ptr]], %[kBPS]      \n"
+    "vld1.8 {d13}, [%[ref_ptr]], %[kBPS]      \n"
+    "vld1.8 {d15}, [%[ref_ptr]]               \n"
+
+    // Pack the high values in to q4 and q6
+    "vtrn.32     q4, q5                       \n"
+    "vtrn.32     q6, q7                       \n"
+
+    // d[0-3] = src - ref
+    "vsubl.u8    q0, d8, d12                  \n"
+    "vsubl.u8    q1, d9, d13                  \n"
+
+    // load coeff16 into q8(d16=5352, d17=2217)
+    "vld1.16     {q8}, [%[coeff16]]           \n"
+
+    // load coeff32 high half into q9 = 1812, q10 = 937
+    "vld1.32     {q9, q10}, [%[coeff32]]!     \n"
+
+    // load coeff32 low half into q11=12000, q12=51000
+    "vld1.32     {q11,q12}, [%[coeff32]]      \n"
+
+    // part 1
+    // Transpose. Register dN is the same as dN in C
+    "vtrn.32         d0, d2                   \n"
+    "vtrn.32         d1, d3                   \n"
+    "vtrn.16         d0, d1                   \n"
+    "vtrn.16         d2, d3                   \n"
+
+    "vadd.s16        d4, d0, d3               \n" // a0 = d0 + d3
+    "vadd.s16        d5, d1, d2               \n" // a1 = d1 + d2
+    "vsub.s16        d6, d1, d2               \n" // a2 = d1 - d2
+    "vsub.s16        d7, d0, d3               \n" // a3 = d0 - d3
+
+    "vadd.s16        d0, d4, d5               \n" // a0 + a1
+    "vshl.s16        d0, d0, #3               \n" // temp[0+i*4] = (a0+a1) << 3
+    "vsub.s16        d2, d4, d5               \n" // a0 - a1
+    "vshl.s16        d2, d2, #3               \n" // (temp[2+i*4] = (a0-a1) << 3
+
+    "vmlal.s16       q9, d7, d16              \n" // a3*5352 + 1812
+    "vmlal.s16       q10, d7, d17             \n" // a3*2217 + 937
+    "vmlal.s16       q9, d6, d17              \n" // a2*2217 + a3*5352 + 1812
+    "vmlsl.s16       q10, d6, d16             \n" // a3*2217 + 937 - a2*5352
+
+    // temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
+    // temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
+    "vshrn.s32       d1, q9, #9               \n"
+    "vshrn.s32       d3, q10, #9              \n"
+
+    // part 2
+    // transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
+    "vtrn.32         d0, d2                   \n"
+    "vtrn.32         d1, d3                   \n"
+    "vtrn.16         d0, d1                   \n"
+    "vtrn.16         d2, d3                   \n"
+
+    "vmov.s16        d26, #7                  \n"
+
+    "vadd.s16        d4, d0, d3               \n" // a1 = ip[0] + ip[12]
+    "vadd.s16        d5, d1, d2               \n" // b1 = ip[4] + ip[8]
+    "vsub.s16        d6, d1, d2               \n" // c1 = ip[4] - ip[8]
+    "vadd.s16        d4, d4, d26              \n" // a1 + 7
+    "vsub.s16        d7, d0, d3               \n" // d1 = ip[0] - ip[12]
+
+    "vadd.s16        d0, d4, d5               \n" // op[0] = a1 + b1 + 7
+    "vsub.s16        d2, d4, d5               \n" // op[8] = a1 - b1 + 7
+
+    "vmlal.s16       q11, d7, d16             \n" // d1*5352 + 12000
+    "vmlal.s16       q12, d7, d17             \n" // d1*2217 + 51000
+
+    "vceq.s16        d4, d7, #0               \n"
+
+    "vshr.s16        d0, d0, #4               \n"
+    "vshr.s16        d2, d2, #4               \n"
+
+    "vmlal.s16       q11, d6, d17             \n" // c1*2217 + d1*5352 + 12000
+    "vmlsl.s16       q12, d6, d16             \n" // d1*2217 - c1*5352 + 51000
+
+    "vmvn            d4, d4                   \n" // !(d1 == 0)
+    // op[4] = (c1*2217 + d1*5352 + 12000)>>16
+    "vshrn.s32       d1, q11, #16             \n"
+    // op[4] += (d1!=0)
+    "vsub.s16        d1, d1, d4               \n"
+    // op[12]= (d1*2217 - c1*5352 + 51000)>>16
+    "vshrn.s32       d3, q12, #16             \n"
+
+    // set result to out array
+    "vst1.16         {q0, q1}, [%[out]]   \n"
+    : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
+      [coeff32] "+r"(coeff32)          // modified registers
+    : [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
+      [out] "r"(out)                   // constants
+    : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
+      "q10", "q11", "q12", "q13"       // clobbered
+  );
+}
+
+#endif
+
+#define LOAD_LANE_16b(VALUE, LANE) do {             \
+  (VALUE) = vld1_lane_s16(src, (VALUE), (LANE));    \
+  src += stride;                                    \
+} while (0)
+
+static void FTransformWHT(const int16_t* src, int16_t* out) {
+  const int stride = 16;
+  const int16x4_t zero = vdup_n_s16(0);
+  int32x4x4_t tmp0;
+  int16x4x4_t in;
+  INIT_VECTOR4(in, zero, zero, zero, zero);
+  LOAD_LANE_16b(in.val[0], 0);
+  LOAD_LANE_16b(in.val[1], 0);
+  LOAD_LANE_16b(in.val[2], 0);
+  LOAD_LANE_16b(in.val[3], 0);
+  LOAD_LANE_16b(in.val[0], 1);
+  LOAD_LANE_16b(in.val[1], 1);
+  LOAD_LANE_16b(in.val[2], 1);
+  LOAD_LANE_16b(in.val[3], 1);
+  LOAD_LANE_16b(in.val[0], 2);
+  LOAD_LANE_16b(in.val[1], 2);
+  LOAD_LANE_16b(in.val[2], 2);
+  LOAD_LANE_16b(in.val[3], 2);
+  LOAD_LANE_16b(in.val[0], 3);
+  LOAD_LANE_16b(in.val[1], 3);
+  LOAD_LANE_16b(in.val[2], 3);
+  LOAD_LANE_16b(in.val[3], 3);
+
+  {
+    // a0 = in[0 * 16] + in[2 * 16]
+    // a1 = in[1 * 16] + in[3 * 16]
+    // a2 = in[1 * 16] - in[3 * 16]
+    // a3 = in[0 * 16] - in[2 * 16]
+    const int32x4_t a0 = vaddl_s16(in.val[0], in.val[2]);
+    const int32x4_t a1 = vaddl_s16(in.val[1], in.val[3]);
+    const int32x4_t a2 = vsubl_s16(in.val[1], in.val[3]);
+    const int32x4_t a3 = vsubl_s16(in.val[0], in.val[2]);
+    tmp0.val[0] = vaddq_s32(a0, a1);
+    tmp0.val[1] = vaddq_s32(a3, a2);
+    tmp0.val[2] = vsubq_s32(a3, a2);
+    tmp0.val[3] = vsubq_s32(a0, a1);
+  }
+  {
+    const int32x4x4_t tmp1 = Transpose4x4(tmp0);
+    // a0 = tmp[0 + i] + tmp[ 8 + i]
+    // a1 = tmp[4 + i] + tmp[12 + i]
+    // a2 = tmp[4 + i] - tmp[12 + i]
+    // a3 = tmp[0 + i] - tmp[ 8 + i]
+    const int32x4_t a0 = vaddq_s32(tmp1.val[0], tmp1.val[2]);
+    const int32x4_t a1 = vaddq_s32(tmp1.val[1], tmp1.val[3]);
+    const int32x4_t a2 = vsubq_s32(tmp1.val[1], tmp1.val[3]);
+    const int32x4_t a3 = vsubq_s32(tmp1.val[0], tmp1.val[2]);
+    const int32x4_t b0 = vhaddq_s32(a0, a1);  // (a0 + a1) >> 1
+    const int32x4_t b1 = vhaddq_s32(a3, a2);  // (a3 + a2) >> 1
+    const int32x4_t b2 = vhsubq_s32(a3, a2);  // (a3 - a2) >> 1
+    const int32x4_t b3 = vhsubq_s32(a0, a1);  // (a0 - a1) >> 1
+    const int16x4_t out0 = vmovn_s32(b0);
+    const int16x4_t out1 = vmovn_s32(b1);
+    const int16x4_t out2 = vmovn_s32(b2);
+    const int16x4_t out3 = vmovn_s32(b3);
+
+    vst1_s16(out +  0, out0);
+    vst1_s16(out +  4, out1);
+    vst1_s16(out +  8, out2);
+    vst1_s16(out + 12, out3);
+  }
+}
+#undef LOAD_LANE_16b
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// This code works but is *slower* than the inlined-asm version below
+// (with gcc-4.6). So we disable it for now. Later, it'll be conditional to
+// USE_INTRINSICS define.
+// With gcc-4.8, it's only slightly slower than the inlined.
+#if defined(USE_INTRINSICS)
+
+// Zero extend an uint16x4_t 'v' to an int32x4_t.
+static WEBP_INLINE int32x4_t ConvertU16ToS32(uint16x4_t v) {
+  return vreinterpretq_s32_u32(vmovl_u16(v));
+}
+
+// Does a regular 4x4 transpose followed by an adjustment of the upper columns
+// in the inner rows to restore the source order of differences,
+// i.e., a0 - a1 | a3 - a2.
+static WEBP_INLINE int32x4x4_t DistoTranspose4x4(const int32x4x4_t rows) {
+  int32x4x4_t out = Transpose4x4(rows);
+  // restore source order in the columns containing differences.
+  const int32x2_t r1h = vget_high_s32(out.val[1]);
+  const int32x2_t r2h = vget_high_s32(out.val[2]);
+  out.val[1] = vcombine_s32(vget_low_s32(out.val[1]), r2h);
+  out.val[2] = vcombine_s32(vget_low_s32(out.val[2]), r1h);
+  return out;
+}
+
+static WEBP_INLINE int32x4x4_t DistoHorizontalPass(const uint8x8_t r0r1,
+                                                   const uint8x8_t r2r3) {
+  // a0 = in[0] + in[2] | a1 = in[1] + in[3]
+  const uint16x8_t a0a1 = vaddl_u8(r0r1, r2r3);
+  // a3 = in[0] - in[2] | a2 = in[1] - in[3]
+  const uint16x8_t a3a2 = vsubl_u8(r0r1, r2r3);
+  const int32x4_t tmp0 = vpaddlq_s16(vreinterpretq_s16_u16(a0a1));  // a0 + a1
+  const int32x4_t tmp1 = vpaddlq_s16(vreinterpretq_s16_u16(a3a2));  // a3 + a2
+  // no pairwise subtraction; reorder to perform tmp[2]/tmp[3] calculations.
+  // a0a0 a3a3 a0a0 a3a3 a0a0 a3a3 a0a0 a3a3
+  // a1a1 a2a2 a1a1 a2a2 a1a1 a2a2 a1a1 a2a2
+  const int16x8x2_t transpose =
+      vtrnq_s16(vreinterpretq_s16_u16(a0a1), vreinterpretq_s16_u16(a3a2));
+  // tmp[3] = a0 - a1 | tmp[2] = a3 - a2
+  const int32x4_t tmp32_1 = vsubl_s16(vget_low_s16(transpose.val[0]),
+                                      vget_low_s16(transpose.val[1]));
+  const int32x4_t tmp32_2 = vsubl_s16(vget_high_s16(transpose.val[0]),
+                                      vget_high_s16(transpose.val[1]));
+  // [0]: tmp[3] [1]: tmp[2]
+  const int32x4x2_t split = vtrnq_s32(tmp32_1, tmp32_2);
+  const int32x4x4_t res = { { tmp0, tmp1, split.val[1], split.val[0] } };
+  return res;
+}
+
+static WEBP_INLINE int32x4x4_t DistoVerticalPass(const int32x4x4_t rows) {
+  // a0 = tmp[0 + i] + tmp[8 + i];
+  const int32x4_t a0 = vaddq_s32(rows.val[0], rows.val[1]);
+  // a1 = tmp[4 + i] + tmp[12+ i];
+  const int32x4_t a1 = vaddq_s32(rows.val[2], rows.val[3]);
+  // a2 = tmp[4 + i] - tmp[12+ i];
+  const int32x4_t a2 = vsubq_s32(rows.val[2], rows.val[3]);
+  // a3 = tmp[0 + i] - tmp[8 + i];
+  const int32x4_t a3 = vsubq_s32(rows.val[0], rows.val[1]);
+  const int32x4_t b0 = vqabsq_s32(vaddq_s32(a0, a1));  // abs(a0 + a1)
+  const int32x4_t b1 = vqabsq_s32(vaddq_s32(a3, a2));  // abs(a3 + a2)
+  const int32x4_t b2 = vabdq_s32(a3, a2);              // abs(a3 - a2)
+  const int32x4_t b3 = vabdq_s32(a0, a1);              // abs(a0 - a1)
+  const int32x4x4_t res = { { b0, b1, b2, b3 } };
+  return res;
+}
+
+// Calculate the weighted sum of the rows in 'b'.
+static WEBP_INLINE int64x1_t DistoSum(const int32x4x4_t b,
+                                      const int32x4_t w0, const int32x4_t w1,
+                                      const int32x4_t w2, const int32x4_t w3) {
+  const int32x4_t s0 = vmulq_s32(w0, b.val[0]);
+  const int32x4_t s1 = vmlaq_s32(s0, w1, b.val[1]);
+  const int32x4_t s2 = vmlaq_s32(s1, w2, b.val[2]);
+  const int32x4_t s3 = vmlaq_s32(s2, w3, b.val[3]);
+  const int64x2_t sum1 = vpaddlq_s32(s3);
+  const int64x1_t sum2 = vadd_s64(vget_low_s64(sum1), vget_high_s64(sum1));
+  return sum2;
+}
+
+#define LOAD_LANE_32b(src, VALUE, LANE) \
+    (VALUE) = vld1q_lane_u32((const uint32_t*)(src), (VALUE), (LANE))
+
+// Hadamard transform
+// Returns the weighted sum of the absolute value of transformed coefficients.
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  uint32x4_t d0d1 = { 0, 0, 0, 0 };
+  uint32x4_t d2d3 = { 0, 0, 0, 0 };
+  LOAD_LANE_32b(a + 0 * BPS, d0d1, 0);  // a00 a01 a02 a03
+  LOAD_LANE_32b(a + 1 * BPS, d0d1, 1);  // a10 a11 a12 a13
+  LOAD_LANE_32b(b + 0 * BPS, d0d1, 2);  // b00 b01 b02 b03
+  LOAD_LANE_32b(b + 1 * BPS, d0d1, 3);  // b10 b11 b12 b13
+  LOAD_LANE_32b(a + 2 * BPS, d2d3, 0);  // a20 a21 a22 a23
+  LOAD_LANE_32b(a + 3 * BPS, d2d3, 1);  // a30 a31 a32 a33
+  LOAD_LANE_32b(b + 2 * BPS, d2d3, 2);  // b20 b21 b22 b23
+  LOAD_LANE_32b(b + 3 * BPS, d2d3, 3);  // b30 b31 b32 b33
+
+  {
+    // a00 a01 a20 a21 a10 a11 a30 a31 b00 b01 b20 b21 b10 b11 b30 b31
+    // a02 a03 a22 a23 a12 a13 a32 a33 b02 b03 b22 b23 b12 b13 b32 b33
+    const uint16x8x2_t tmp =
+        vtrnq_u16(vreinterpretq_u16_u32(d0d1), vreinterpretq_u16_u32(d2d3));
+    const uint8x16_t d0d1u8 = vreinterpretq_u8_u16(tmp.val[0]);
+    const uint8x16_t d2d3u8 = vreinterpretq_u8_u16(tmp.val[1]);
+    const int32x4x4_t hpass_a = DistoHorizontalPass(vget_low_u8(d0d1u8),
+                                                    vget_low_u8(d2d3u8));
+    const int32x4x4_t hpass_b = DistoHorizontalPass(vget_high_u8(d0d1u8),
+                                                    vget_high_u8(d2d3u8));
+    const int32x4x4_t tmp_a = DistoTranspose4x4(hpass_a);
+    const int32x4x4_t tmp_b = DistoTranspose4x4(hpass_b);
+    const int32x4x4_t vpass_a = DistoVerticalPass(tmp_a);
+    const int32x4x4_t vpass_b = DistoVerticalPass(tmp_b);
+    const int32x4_t w0 = ConvertU16ToS32(vld1_u16(w + 0));
+    const int32x4_t w1 = ConvertU16ToS32(vld1_u16(w + 4));
+    const int32x4_t w2 = ConvertU16ToS32(vld1_u16(w + 8));
+    const int32x4_t w3 = ConvertU16ToS32(vld1_u16(w + 12));
+    const int64x1_t sum1 = DistoSum(vpass_a, w0, w1, w2, w3);
+    const int64x1_t sum2 = DistoSum(vpass_b, w0, w1, w2, w3);
+    const int32x2_t diff = vabd_s32(vreinterpret_s32_s64(sum1),
+                                    vreinterpret_s32_s64(sum2));
+    const int32x2_t res = vshr_n_s32(diff, 5);
+    return vget_lane_s32(res, 0);
+  }
+}
+
+#undef LOAD_LANE_32b
+
+#else
+
+// Hadamard transform
+// Returns the weighted sum of the absolute value of transformed coefficients.
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  const int kBPS = BPS;
+  const uint8_t* A = a;
+  const uint8_t* B = b;
+  const uint16_t* W = w;
+  int sum;
+  __asm__ volatile (
+    "vld1.32         d0[0], [%[a]], %[kBPS]   \n"
+    "vld1.32         d0[1], [%[a]], %[kBPS]   \n"
+    "vld1.32         d2[0], [%[a]], %[kBPS]   \n"
+    "vld1.32         d2[1], [%[a]]            \n"
+
+    "vld1.32         d1[0], [%[b]], %[kBPS]   \n"
+    "vld1.32         d1[1], [%[b]], %[kBPS]   \n"
+    "vld1.32         d3[0], [%[b]], %[kBPS]   \n"
+    "vld1.32         d3[1], [%[b]]            \n"
+
+    // a d0/d2, b d1/d3
+    // d0/d1: 01 01 01 01
+    // d2/d3: 23 23 23 23
+    // But: it goes 01 45 23 67
+    // Notice the middle values are transposed
+    "vtrn.16         q0, q1                   \n"
+
+    // {a0, a1} = {in[0] + in[2], in[1] + in[3]}
+    "vaddl.u8        q2, d0, d2               \n"
+    "vaddl.u8        q10, d1, d3              \n"
+    // {a3, a2} = {in[0] - in[2], in[1] - in[3]}
+    "vsubl.u8        q3, d0, d2               \n"
+    "vsubl.u8        q11, d1, d3              \n"
+
+    // tmp[0] = a0 + a1
+    "vpaddl.s16      q0, q2                   \n"
+    "vpaddl.s16      q8, q10                  \n"
+
+    // tmp[1] = a3 + a2
+    "vpaddl.s16      q1, q3                   \n"
+    "vpaddl.s16      q9, q11                  \n"
+
+    // No pair subtract
+    // q2 = {a0, a3}
+    // q3 = {a1, a2}
+    "vtrn.16         q2, q3                   \n"
+    "vtrn.16         q10, q11                 \n"
+
+    // {tmp[3], tmp[2]} = {a0 - a1, a3 - a2}
+    "vsubl.s16       q12, d4, d6              \n"
+    "vsubl.s16       q13, d5, d7              \n"
+    "vsubl.s16       q14, d20, d22            \n"
+    "vsubl.s16       q15, d21, d23            \n"
+
+    // separate tmp[3] and tmp[2]
+    // q12 = tmp[3]
+    // q13 = tmp[2]
+    "vtrn.32         q12, q13                 \n"
+    "vtrn.32         q14, q15                 \n"
+
+    // Transpose tmp for a
+    "vswp            d1, d26                  \n" // vtrn.64
+    "vswp            d3, d24                  \n" // vtrn.64
+    "vtrn.32         q0, q1                   \n"
+    "vtrn.32         q13, q12                 \n"
+
+    // Transpose tmp for b
+    "vswp            d17, d30                 \n" // vtrn.64
+    "vswp            d19, d28                 \n" // vtrn.64
+    "vtrn.32         q8, q9                   \n"
+    "vtrn.32         q15, q14                 \n"
+
+    // The first Q register is a, the second b.
+    // q0/8 tmp[0-3]
+    // q13/15 tmp[4-7]
+    // q1/9 tmp[8-11]
+    // q12/14 tmp[12-15]
+
+    // These are still in 01 45 23 67 order. We fix it easily in the addition
+    // case but the subtraction propagates them.
+    "vswp            d3, d27                  \n"
+    "vswp            d19, d31                 \n"
+
+    // a0 = tmp[0] + tmp[8]
+    "vadd.s32        q2, q0, q1               \n"
+    "vadd.s32        q3, q8, q9               \n"
+
+    // a1 = tmp[4] + tmp[12]
+    "vadd.s32        q10, q13, q12            \n"
+    "vadd.s32        q11, q15, q14            \n"
+
+    // a2 = tmp[4] - tmp[12]
+    "vsub.s32        q13, q13, q12            \n"
+    "vsub.s32        q15, q15, q14            \n"
+
+    // a3 = tmp[0] - tmp[8]
+    "vsub.s32        q0, q0, q1               \n"
+    "vsub.s32        q8, q8, q9               \n"
+
+    // b0 = a0 + a1
+    "vadd.s32        q1, q2, q10              \n"
+    "vadd.s32        q9, q3, q11              \n"
+
+    // b1 = a3 + a2
+    "vadd.s32        q12, q0, q13             \n"
+    "vadd.s32        q14, q8, q15             \n"
+
+    // b2 = a3 - a2
+    "vsub.s32        q0, q0, q13              \n"
+    "vsub.s32        q8, q8, q15              \n"
+
+    // b3 = a0 - a1
+    "vsub.s32        q2, q2, q10              \n"
+    "vsub.s32        q3, q3, q11              \n"
+
+    "vld1.64         {q10, q11}, [%[w]]       \n"
+
+    // abs(b0)
+    "vabs.s32        q1, q1                   \n"
+    "vabs.s32        q9, q9                   \n"
+    // abs(b1)
+    "vabs.s32        q12, q12                 \n"
+    "vabs.s32        q14, q14                 \n"
+    // abs(b2)
+    "vabs.s32        q0, q0                   \n"
+    "vabs.s32        q8, q8                   \n"
+    // abs(b3)
+    "vabs.s32        q2, q2                   \n"
+    "vabs.s32        q3, q3                   \n"
+
+    // expand w before using.
+    "vmovl.u16       q13, d20                 \n"
+    "vmovl.u16       q15, d21                 \n"
+
+    // w[0] * abs(b0)
+    "vmul.u32        q1, q1, q13              \n"
+    "vmul.u32        q9, q9, q13              \n"
+
+    // w[4] * abs(b1)
+    "vmla.u32        q1, q12, q15             \n"
+    "vmla.u32        q9, q14, q15             \n"
+
+    // expand w before using.
+    "vmovl.u16       q13, d22                 \n"
+    "vmovl.u16       q15, d23                 \n"
+
+    // w[8] * abs(b1)
+    "vmla.u32        q1, q0, q13              \n"
+    "vmla.u32        q9, q8, q13              \n"
+
+    // w[12] * abs(b1)
+    "vmla.u32        q1, q2, q15              \n"
+    "vmla.u32        q9, q3, q15              \n"
+
+    // Sum the arrays
+    "vpaddl.u32      q1, q1                   \n"
+    "vpaddl.u32      q9, q9                   \n"
+    "vadd.u64        d2, d3                   \n"
+    "vadd.u64        d18, d19                 \n"
+
+    // Hadamard transform needs 4 bits of extra precision (2 bits in each
+    // direction) for dynamic raw. Weights w[] are 16bits at max, so the maximum
+    // precision for coeff is 8bit of input + 4bits of Hadamard transform +
+    // 16bits for w[] + 2 bits of abs() summation.
+    //
+    // This uses a maximum of 31 bits (signed). Discarding the top 32 bits is
+    // A-OK.
+
+    // sum2 - sum1
+    "vsub.u32        d0, d2, d18              \n"
+    // abs(sum2 - sum1)
+    "vabs.s32        d0, d0                   \n"
+    // abs(sum2 - sum1) >> 5
+    "vshr.u32        d0, #5                   \n"
+
+    // It would be better to move the value straight into r0 but I'm not
+    // entirely sure how this works with inline assembly.
+    "vmov.32         %[sum], d0[0]            \n"
+
+    : [sum] "=r"(sum), [a] "+r"(A), [b] "+r"(B), [w] "+r"(W)
+    : [kBPS] "r"(kBPS)
+    : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
+      "q10", "q11", "q12", "q13", "q14", "q15"  // clobbered
+  ) ;
+
+  return sum;
+}
+
+#endif  // USE_INTRINSICS
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+//------------------------------------------------------------------------------
+
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
+  int j;
+  for (j = start_block; j < end_block; ++j) {
+    int16_t out[16];
+    FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+    {
+      int k;
+      const int16x8_t a0 = vld1q_s16(out + 0);
+      const int16x8_t b0 = vld1q_s16(out + 8);
+      const uint16x8_t a1 = vreinterpretq_u16_s16(vabsq_s16(a0));
+      const uint16x8_t b1 = vreinterpretq_u16_s16(vabsq_s16(b0));
+      const uint16x8_t a2 = vshrq_n_u16(a1, 3);
+      const uint16x8_t b2 = vshrq_n_u16(b1, 3);
+      const uint16x8_t a3 = vminq_u16(a2, max_coeff_thresh);
+      const uint16x8_t b3 = vminq_u16(b2, max_coeff_thresh);
+      vst1q_s16(out + 0, vreinterpretq_s16_u16(a3));
+      vst1q_s16(out + 8, vreinterpretq_s16_u16(b3));
+      // Convert coefficients to bin.
+      for (k = 0; k < 16; ++k) {
+        histo->distribution[out[k]]++;
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
+                                        const uint8_t* const b,
+                                        uint32x4_t* const sum) {
+  const uint8x16_t a0 = vld1q_u8(a);
+  const uint8x16_t b0 = vld1q_u8(b);
+  const uint8x16_t abs_diff = vabdq_u8(a0, b0);
+  uint16x8_t prod = vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+  prod = vmlal_u8(prod, vget_high_u8(abs_diff), vget_high_u8(abs_diff));
+  *sum = vpadalq_u16(*sum, prod);      // pair-wise add and accumulate
+}
+
+// Horizontal sum of all four uint32_t values in 'sum'.
+static int SumToInt(uint32x4_t sum) {
+  const uint64x2_t sum2 = vpaddlq_u32(sum);
+  const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
+  return (int)sum3;
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  uint32x4_t sum = vdupq_n_u32(0);
+  int y;
+  for (y = 0; y < 16; ++y) {
+    AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
+  }
+  return SumToInt(sum);
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  uint32x4_t sum = vdupq_n_u32(0);
+  int y;
+  for (y = 0; y < 8; ++y) {
+    AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
+  }
+  return SumToInt(sum);
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  uint32x4_t sum = vdupq_n_u32(0);
+  int y;
+  for (y = 0; y < 8; ++y) {
+    const uint8x8_t a0 = vld1_u8(a + y * BPS);
+    const uint8x8_t b0 = vld1_u8(b + y * BPS);
+    const uint8x8_t abs_diff = vabd_u8(a0, b0);
+    const uint16x8_t prod = vmull_u8(abs_diff, abs_diff);
+    sum = vpadalq_u16(sum, prod);
+  }
+  return SumToInt(sum);
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  const uint8x16_t a0 = Load4x4(a);
+  const uint8x16_t b0 = Load4x4(b);
+  const uint8x16_t abs_diff = vabdq_u8(a0, b0);
+  uint16x8_t prod = vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+  prod = vmlal_u8(prod, vget_high_u8(abs_diff), vget_high_u8(abs_diff));
+  return SumToInt(vpaddlq_u16(prod));
+}
+
+//------------------------------------------------------------------------------
+
+// Compilation with gcc-4.6.x is problematic for now.
+#if !defined(WORK_AROUND_GCC)
+
+static int16x8_t Quantize(int16_t* const in,
+                          const VP8Matrix* const mtx, int offset) {
+  const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
+  const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
+  const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
+  const uint32x4_t bias0 = vld1q_u32(&mtx->bias_[offset + 0]);
+  const uint32x4_t bias1 = vld1q_u32(&mtx->bias_[offset + 4]);
+
+  const int16x8_t a = vld1q_s16(in + offset);                // in
+  const uint16x8_t b = vreinterpretq_u16_s16(vabsq_s16(a));  // coeff = abs(in)
+  const int16x8_t sign = vshrq_n_s16(a, 15);                 // sign
+  const uint16x8_t c = vaddq_u16(b, sharp);                  // + sharpen
+  const uint32x4_t m0 = vmull_u16(vget_low_u16(c), vget_low_u16(iq));
+  const uint32x4_t m1 = vmull_u16(vget_high_u16(c), vget_high_u16(iq));
+  const uint32x4_t m2 = vhaddq_u32(m0, bias0);
+  const uint32x4_t m3 = vhaddq_u32(m1, bias1);     // (coeff * iQ + bias) >> 1
+  const uint16x8_t c0 = vcombine_u16(vshrn_n_u32(m2, 16),
+                                     vshrn_n_u32(m3, 16));   // QFIX=17 = 16+1
+  const uint16x8_t c1 = vminq_u16(c0, vdupq_n_u16(MAX_LEVEL));
+  const int16x8_t c2 = veorq_s16(vreinterpretq_s16_u16(c1), sign);
+  const int16x8_t c3 = vsubq_s16(c2, sign);                  // restore sign
+  const int16x8_t c4 = vmulq_s16(c3, vreinterpretq_s16_u16(q));
+  vst1q_s16(in + offset, c4);
+  assert(QFIX == 17);  // this function can't work as is if QFIX != 16+1
+  return c3;
+}
+
+static const uint8_t kShuffles[4][8] = {
+  { 0,   1,  2,  3,  8,  9, 16, 17 },
+  { 10, 11,  4,  5,  6,  7, 12, 13 },
+  { 18, 19, 24, 25, 26, 27, 20, 21 },
+  { 14, 15, 22, 23, 28, 29, 30, 31 }
+};
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  const int16x8_t out0 = Quantize(in, mtx, 0);
+  const int16x8_t out1 = Quantize(in, mtx, 8);
+  uint8x8x4_t shuffles;
+  // vtbl4_u8 is marked unavailable for iOS arm64, use wider versions there.
+#if defined(__APPLE__) && defined(__aarch64__)
+  uint8x16x2_t all_out;
+  INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
+  INIT_VECTOR4(shuffles,
+               vtbl2q_u8(all_out, vld1_u8(kShuffles[0])),
+               vtbl2q_u8(all_out, vld1_u8(kShuffles[1])),
+               vtbl2q_u8(all_out, vld1_u8(kShuffles[2])),
+               vtbl2q_u8(all_out, vld1_u8(kShuffles[3])));
+#else
+  uint8x8x4_t all_out;
+  INIT_VECTOR4(all_out,
+               vreinterpret_u8_s16(vget_low_s16(out0)),
+               vreinterpret_u8_s16(vget_high_s16(out0)),
+               vreinterpret_u8_s16(vget_low_s16(out1)),
+               vreinterpret_u8_s16(vget_high_s16(out1)));
+  INIT_VECTOR4(shuffles,
+               vtbl4_u8(all_out, vld1_u8(kShuffles[0])),
+               vtbl4_u8(all_out, vld1_u8(kShuffles[1])),
+               vtbl4_u8(all_out, vld1_u8(kShuffles[2])),
+               vtbl4_u8(all_out, vld1_u8(kShuffles[3])));
+#endif
+  // Zigzag reordering
+  vst1_u8((uint8_t*)(out +  0), shuffles.val[0]);
+  vst1_u8((uint8_t*)(out +  4), shuffles.val[1]);
+  vst1_u8((uint8_t*)(out +  8), shuffles.val[2]);
+  vst1_u8((uint8_t*)(out + 12), shuffles.val[3]);
+  // test zeros
+  if (*(uint64_t*)(out +  0) != 0) return 1;
+  if (*(uint64_t*)(out +  4) != 0) return 1;
+  if (*(uint64_t*)(out +  8) != 0) return 1;
+  if (*(uint64_t*)(out + 12) != 0) return 1;
+  return 0;
+}
+
+#endif   // !WORK_AROUND_GCC
+
+#endif   // WEBP_USE_NEON
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitNEON(void);
+
+void VP8EncDspInitNEON(void) {
+#if defined(WEBP_USE_NEON)
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+
+  VP8FTransformWHT = FTransformWHT;
+
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+  VP8CollectHistogram = CollectHistogram;
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE4x4 = SSE4x4;
+#if !defined(WORK_AROUND_GCC)
+  VP8EncQuantizeBlock = QuantizeBlock;
+#endif
+#endif   // WEBP_USE_NEON
+}
diff --git a/src/main/jni/libwebp/dsp/enc_sse2.c b/src/main/jni/libwebp/dsp/enc_sse2.c
new file mode 100644
index 000000000..9958d9f6f
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/enc_sse2.c
@@ -0,0 +1,982 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of speed-critical encoding functions.
+//
+// Author: Christian Duvivier (cduvivier@google.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <stdlib.h>  // for abs()
+#include <emmintrin.h>
+
+#include "../enc/cost.h"
+#include "../enc/vp8enci.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// Quite useful macro for debugging. Left here for convenience.
+
+#if 0
+#include <stdio.h>
+static void PrintReg(const __m128i r, const char* const name, int size) {
+  int n;
+  union {
+    __m128i r;
+    uint8_t i8[16];
+    uint16_t i16[8];
+    uint32_t i32[4];
+    uint64_t i64[2];
+  } tmp;
+  tmp.r = r;
+  printf("%s\t: ", name);
+  if (size == 8) {
+    for (n = 0; n < 16; ++n) printf("%.2x ", tmp.i8[n]);
+  } else if (size == 16) {
+    for (n = 0; n < 8; ++n) printf("%.4x ", tmp.i16[n]);
+  } else if (size == 32) {
+    for (n = 0; n < 4; ++n) printf("%.8x ", tmp.i32[n]);
+  } else {
+    for (n = 0; n < 2; ++n) printf("%.16lx ", tmp.i64[n]);
+  }
+  printf("\n");
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
+  int j;
+  for (j = start_block; j < end_block; ++j) {
+    int16_t out[16];
+    int k;
+
+    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+    // Convert coefficients to bin (within out[]).
+    {
+      // Load.
+      const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
+      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
+      // sign(out) = out >> 15  (0x0000 if positive, 0xffff if negative)
+      const __m128i sign0 = _mm_srai_epi16(out0, 15);
+      const __m128i sign1 = _mm_srai_epi16(out1, 15);
+      // abs(out) = (out ^ sign) - sign
+      const __m128i xor0 = _mm_xor_si128(out0, sign0);
+      const __m128i xor1 = _mm_xor_si128(out1, sign1);
+      const __m128i abs0 = _mm_sub_epi16(xor0, sign0);
+      const __m128i abs1 = _mm_sub_epi16(xor1, sign1);
+      // v = abs(out) >> 3
+      const __m128i v0 = _mm_srai_epi16(abs0, 3);
+      const __m128i v1 = _mm_srai_epi16(abs1, 3);
+      // bin = min(v, MAX_COEFF_THRESH)
+      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
+      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
+      // Store.
+      _mm_storeu_si128((__m128i*)&out[0], bin0);
+      _mm_storeu_si128((__m128i*)&out[8], bin1);
+    }
+
+    // Convert coefficients to bin.
+    for (k = 0; k < 16; ++k) {
+      histo->distribution[out[k]]++;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+// Does one or two inverse transforms.
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                       int do_two) {
+  // This implementation makes use of 16-bit fixed point versions of two
+  // multiply constants:
+  //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
+  //    K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
+  //
+  // To be able to use signed 16-bit integers, we use the following trick to
+  // have constants within range:
+  // - Associated constants are obtained by subtracting the 16-bit fixed point
+  //   version of one:
+  //      k = K - (1 << 16)  =>  K = k + (1 << 16)
+  //      K1 = 85267  =>  k1 =  20091
+  //      K2 = 35468  =>  k2 = -30068
+  // - The multiplication of a variable by a constant become the sum of the
+  //   variable and the multiplication of that variable by the associated
+  //   constant:
+  //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
+  const __m128i k1 = _mm_set1_epi16(20091);
+  const __m128i k2 = _mm_set1_epi16(-30068);
+  __m128i T0, T1, T2, T3;
+
+  // Load and concatenate the transform coefficients (we'll do two inverse
+  // transforms in parallel). In the case of only one inverse transform, the
+  // second half of the vectors will just contain random value we'll never
+  // use nor store.
+  __m128i in0, in1, in2, in3;
+  {
+    in0 = _mm_loadl_epi64((__m128i*)&in[0]);
+    in1 = _mm_loadl_epi64((__m128i*)&in[4]);
+    in2 = _mm_loadl_epi64((__m128i*)&in[8]);
+    in3 = _mm_loadl_epi64((__m128i*)&in[12]);
+    // a00 a10 a20 a30   x x x x
+    // a01 a11 a21 a31   x x x x
+    // a02 a12 a22 a32   x x x x
+    // a03 a13 a23 a33   x x x x
+    if (do_two) {
+      const __m128i inB0 = _mm_loadl_epi64((__m128i*)&in[16]);
+      const __m128i inB1 = _mm_loadl_epi64((__m128i*)&in[20]);
+      const __m128i inB2 = _mm_loadl_epi64((__m128i*)&in[24]);
+      const __m128i inB3 = _mm_loadl_epi64((__m128i*)&in[28]);
+      in0 = _mm_unpacklo_epi64(in0, inB0);
+      in1 = _mm_unpacklo_epi64(in1, inB1);
+      in2 = _mm_unpacklo_epi64(in2, inB2);
+      in3 = _mm_unpacklo_epi64(in3, inB3);
+      // a00 a10 a20 a30   b00 b10 b20 b30
+      // a01 a11 a21 a31   b01 b11 b21 b31
+      // a02 a12 a22 a32   b02 b12 b22 b32
+      // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+  }
+
+  // Vertical pass and subsequent transpose.
+  {
+    // First pass, c and d calculations are longer because of the "trick"
+    // multiplications.
+    const __m128i a = _mm_add_epi16(in0, in2);
+    const __m128i b = _mm_sub_epi16(in0, in2);
+    // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
+    const __m128i c1 = _mm_mulhi_epi16(in1, k2);
+    const __m128i c2 = _mm_mulhi_epi16(in3, k1);
+    const __m128i c3 = _mm_sub_epi16(in1, in3);
+    const __m128i c4 = _mm_sub_epi16(c1, c2);
+    const __m128i c = _mm_add_epi16(c3, c4);
+    // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
+    const __m128i d1 = _mm_mulhi_epi16(in1, k1);
+    const __m128i d2 = _mm_mulhi_epi16(in3, k2);
+    const __m128i d3 = _mm_add_epi16(in1, in3);
+    const __m128i d4 = _mm_add_epi16(d1, d2);
+    const __m128i d = _mm_add_epi16(d3, d4);
+
+    // Second pass.
+    const __m128i tmp0 = _mm_add_epi16(a, d);
+    const __m128i tmp1 = _mm_add_epi16(b, c);
+    const __m128i tmp2 = _mm_sub_epi16(b, c);
+    const __m128i tmp3 = _mm_sub_epi16(a, d);
+
+    // Transpose the two 4x4.
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(tmp0, tmp1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(tmp2, tmp3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(tmp0, tmp1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(tmp2, tmp3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Horizontal pass and subsequent transpose.
+  {
+    // First pass, c and d calculations are longer because of the "trick"
+    // multiplications.
+    const __m128i four = _mm_set1_epi16(4);
+    const __m128i dc = _mm_add_epi16(T0, four);
+    const __m128i a =  _mm_add_epi16(dc, T2);
+    const __m128i b =  _mm_sub_epi16(dc, T2);
+    // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
+    const __m128i c1 = _mm_mulhi_epi16(T1, k2);
+    const __m128i c2 = _mm_mulhi_epi16(T3, k1);
+    const __m128i c3 = _mm_sub_epi16(T1, T3);
+    const __m128i c4 = _mm_sub_epi16(c1, c2);
+    const __m128i c = _mm_add_epi16(c3, c4);
+    // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
+    const __m128i d1 = _mm_mulhi_epi16(T1, k1);
+    const __m128i d2 = _mm_mulhi_epi16(T3, k2);
+    const __m128i d3 = _mm_add_epi16(T1, T3);
+    const __m128i d4 = _mm_add_epi16(d1, d2);
+    const __m128i d = _mm_add_epi16(d3, d4);
+
+    // Second pass.
+    const __m128i tmp0 = _mm_add_epi16(a, d);
+    const __m128i tmp1 = _mm_add_epi16(b, c);
+    const __m128i tmp2 = _mm_sub_epi16(b, c);
+    const __m128i tmp3 = _mm_sub_epi16(a, d);
+    const __m128i shifted0 = _mm_srai_epi16(tmp0, 3);
+    const __m128i shifted1 = _mm_srai_epi16(tmp1, 3);
+    const __m128i shifted2 = _mm_srai_epi16(tmp2, 3);
+    const __m128i shifted3 = _mm_srai_epi16(tmp3, 3);
+
+    // Transpose the two 4x4.
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(shifted0, shifted1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(shifted2, shifted3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(shifted0, shifted1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(shifted2, shifted3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Add inverse transform to 'ref' and store.
+  {
+    const __m128i zero = _mm_setzero_si128();
+    // Load the reference(s).
+    __m128i ref0, ref1, ref2, ref3;
+    if (do_two) {
+      // Load eight bytes/pixels per line.
+      ref0 = _mm_loadl_epi64((__m128i*)&ref[0 * BPS]);
+      ref1 = _mm_loadl_epi64((__m128i*)&ref[1 * BPS]);
+      ref2 = _mm_loadl_epi64((__m128i*)&ref[2 * BPS]);
+      ref3 = _mm_loadl_epi64((__m128i*)&ref[3 * BPS]);
+    } else {
+      // Load four bytes/pixels per line.
+      ref0 = _mm_cvtsi32_si128(*(int*)&ref[0 * BPS]);
+      ref1 = _mm_cvtsi32_si128(*(int*)&ref[1 * BPS]);
+      ref2 = _mm_cvtsi32_si128(*(int*)&ref[2 * BPS]);
+      ref3 = _mm_cvtsi32_si128(*(int*)&ref[3 * BPS]);
+    }
+    // Convert to 16b.
+    ref0 = _mm_unpacklo_epi8(ref0, zero);
+    ref1 = _mm_unpacklo_epi8(ref1, zero);
+    ref2 = _mm_unpacklo_epi8(ref2, zero);
+    ref3 = _mm_unpacklo_epi8(ref3, zero);
+    // Add the inverse transform(s).
+    ref0 = _mm_add_epi16(ref0, T0);
+    ref1 = _mm_add_epi16(ref1, T1);
+    ref2 = _mm_add_epi16(ref2, T2);
+    ref3 = _mm_add_epi16(ref3, T3);
+    // Unsigned saturate to 8b.
+    ref0 = _mm_packus_epi16(ref0, ref0);
+    ref1 = _mm_packus_epi16(ref1, ref1);
+    ref2 = _mm_packus_epi16(ref2, ref2);
+    ref3 = _mm_packus_epi16(ref3, ref3);
+    // Store the results.
+    if (do_two) {
+      // Store eight bytes/pixels per line.
+      _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
+      _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
+      _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
+      _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
+    } else {
+      // Store four bytes/pixels per line.
+      *((int32_t *)&dst[0 * BPS]) = _mm_cvtsi128_si32(ref0);
+      *((int32_t *)&dst[1 * BPS]) = _mm_cvtsi128_si32(ref1);
+      *((int32_t *)&dst[2 * BPS]) = _mm_cvtsi128_si32(ref2);
+      *((int32_t *)&dst[3 * BPS]) = _mm_cvtsi128_si32(ref3);
+    }
+  }
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i seven = _mm_set1_epi16(7);
+  const __m128i k937 = _mm_set1_epi32(937);
+  const __m128i k1812 = _mm_set1_epi32(1812);
+  const __m128i k51000 = _mm_set1_epi32(51000);
+  const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
+  const __m128i k5352_2217 = _mm_set_epi16(5352,  2217, 5352,  2217,
+                                           5352,  2217, 5352,  2217);
+  const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352,
+                                           2217, -5352, 2217, -5352);
+  const __m128i k88p = _mm_set_epi16(8, 8, 8, 8, 8, 8, 8, 8);
+  const __m128i k88m = _mm_set_epi16(-8, 8, -8, 8, -8, 8, -8, 8);
+  const __m128i k5352_2217p = _mm_set_epi16(2217, 5352, 2217, 5352,
+                                            2217, 5352, 2217, 5352);
+  const __m128i k5352_2217m = _mm_set_epi16(-5352, 2217, -5352, 2217,
+                                            -5352, 2217, -5352, 2217);
+  __m128i v01, v32;
+
+
+  // Difference between src and ref and initial transpose.
+  {
+    // Load src and convert to 16b.
+    const __m128i src0 = _mm_loadl_epi64((__m128i*)&src[0 * BPS]);
+    const __m128i src1 = _mm_loadl_epi64((__m128i*)&src[1 * BPS]);
+    const __m128i src2 = _mm_loadl_epi64((__m128i*)&src[2 * BPS]);
+    const __m128i src3 = _mm_loadl_epi64((__m128i*)&src[3 * BPS]);
+    const __m128i src_0 = _mm_unpacklo_epi8(src0, zero);
+    const __m128i src_1 = _mm_unpacklo_epi8(src1, zero);
+    const __m128i src_2 = _mm_unpacklo_epi8(src2, zero);
+    const __m128i src_3 = _mm_unpacklo_epi8(src3, zero);
+    // Load ref and convert to 16b.
+    const __m128i ref0 = _mm_loadl_epi64((__m128i*)&ref[0 * BPS]);
+    const __m128i ref1 = _mm_loadl_epi64((__m128i*)&ref[1 * BPS]);
+    const __m128i ref2 = _mm_loadl_epi64((__m128i*)&ref[2 * BPS]);
+    const __m128i ref3 = _mm_loadl_epi64((__m128i*)&ref[3 * BPS]);
+    const __m128i ref_0 = _mm_unpacklo_epi8(ref0, zero);
+    const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
+    const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
+    const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
+    // Compute difference. -> 00 01 02 03 00 00 00 00
+    const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
+    const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
+    const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
+    const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
+
+
+    // Unpack and shuffle
+    // 00 01 02 03   0 0 0 0
+    // 10 11 12 13   0 0 0 0
+    // 20 21 22 23   0 0 0 0
+    // 30 31 32 33   0 0 0 0
+    const __m128i shuf01 = _mm_unpacklo_epi32(diff0, diff1);
+    const __m128i shuf23 = _mm_unpacklo_epi32(diff2, diff3);
+    // 00 01 10 11 02 03 12 13
+    // 20 21 30 31 22 23 32 33
+    const __m128i shuf01_p =
+        _mm_shufflehi_epi16(shuf01, _MM_SHUFFLE(2, 3, 0, 1));
+    const __m128i shuf23_p =
+        _mm_shufflehi_epi16(shuf23, _MM_SHUFFLE(2, 3, 0, 1));
+    // 00 01 10 11 03 02 13 12
+    // 20 21 30 31 23 22 33 32
+    const __m128i s01 = _mm_unpacklo_epi64(shuf01_p, shuf23_p);
+    const __m128i s32 = _mm_unpackhi_epi64(shuf01_p, shuf23_p);
+    // 00 01 10 11 20 21 30 31
+    // 03 02 13 12 23 22 33 32
+    const __m128i a01 = _mm_add_epi16(s01, s32);
+    const __m128i a32 = _mm_sub_epi16(s01, s32);
+    // [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ]
+    // [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ]
+
+    const __m128i tmp0 = _mm_madd_epi16(a01, k88p);  // [ (a0 + a1) << 3, ... ]
+    const __m128i tmp2 = _mm_madd_epi16(a01, k88m);  // [ (a0 - a1) << 3, ... ]
+    const __m128i tmp1_1 = _mm_madd_epi16(a32, k5352_2217p);
+    const __m128i tmp3_1 = _mm_madd_epi16(a32, k5352_2217m);
+    const __m128i tmp1_2 = _mm_add_epi32(tmp1_1, k1812);
+    const __m128i tmp3_2 = _mm_add_epi32(tmp3_1, k937);
+    const __m128i tmp1   = _mm_srai_epi32(tmp1_2, 9);
+    const __m128i tmp3   = _mm_srai_epi32(tmp3_2, 9);
+    const __m128i s03 = _mm_packs_epi32(tmp0, tmp2);
+    const __m128i s12 = _mm_packs_epi32(tmp1, tmp3);
+    const __m128i s_lo = _mm_unpacklo_epi16(s03, s12);   // 0 1 0 1 0 1...
+    const __m128i s_hi = _mm_unpackhi_epi16(s03, s12);   // 2 3 2 3 2 3
+    const __m128i v23 = _mm_unpackhi_epi32(s_lo, s_hi);
+    v01 = _mm_unpacklo_epi32(s_lo, s_hi);
+    v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));  // 3 2 3 2 3 2..
+  }
+
+  // Second pass
+  {
+    // Same operations are done on the (0,3) and (1,2) pairs.
+    // a0 = v0 + v3
+    // a1 = v1 + v2
+    // a3 = v0 - v3
+    // a2 = v1 - v2
+    const __m128i a01 = _mm_add_epi16(v01, v32);
+    const __m128i a32 = _mm_sub_epi16(v01, v32);
+    const __m128i a11 = _mm_unpackhi_epi64(a01, a01);
+    const __m128i a22 = _mm_unpackhi_epi64(a32, a32);
+    const __m128i a01_plus_7 = _mm_add_epi16(a01, seven);
+
+    // d0 = (a0 + a1 + 7) >> 4;
+    // d2 = (a0 - a1 + 7) >> 4;
+    const __m128i c0 = _mm_add_epi16(a01_plus_7, a11);
+    const __m128i c2 = _mm_sub_epi16(a01_plus_7, a11);
+    const __m128i d0 = _mm_srai_epi16(c0, 4);
+    const __m128i d2 = _mm_srai_epi16(c2, 4);
+
+    // f1 = ((b3 * 5352 + b2 * 2217 + 12000) >> 16)
+    // f3 = ((b3 * 2217 - b2 * 5352 + 51000) >> 16)
+    const __m128i b23 = _mm_unpacklo_epi16(a22, a32);
+    const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
+    const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
+    const __m128i d1 = _mm_add_epi32(c1, k12000_plus_one);
+    const __m128i d3 = _mm_add_epi32(c3, k51000);
+    const __m128i e1 = _mm_srai_epi32(d1, 16);
+    const __m128i e3 = _mm_srai_epi32(d3, 16);
+    const __m128i f1 = _mm_packs_epi32(e1, e1);
+    const __m128i f3 = _mm_packs_epi32(e3, e3);
+    // f1 = f1 + (a3 != 0);
+    // The compare will return (0xffff, 0) for (==0, !=0). To turn that into the
+    // desired (0, 1), we add one earlier through k12000_plus_one.
+    // -> f1 = f1 + 1 - (a3 == 0)
+    const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
+
+    const __m128i d0_g1 = _mm_unpacklo_epi64(d0, g1);
+    const __m128i d2_f3 = _mm_unpacklo_epi64(d2, f3);
+    _mm_storeu_si128((__m128i*)&out[0], d0_g1);
+    _mm_storeu_si128((__m128i*)&out[8], d2_f3);
+  }
+}
+
+static void FTransformWHT(const int16_t* in, int16_t* out) {
+  int32_t tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i, in += 64) {
+    const int a0 = (in[0 * 16] + in[2 * 16]);
+    const int a1 = (in[1 * 16] + in[3 * 16]);
+    const int a2 = (in[1 * 16] - in[3 * 16]);
+    const int a3 = (in[0 * 16] - in[2 * 16]);
+    tmp[0 + i * 4] = a0 + a1;
+    tmp[1 + i * 4] = a3 + a2;
+    tmp[2 + i * 4] = a3 - a2;
+    tmp[3 + i * 4] = a0 - a1;
+  }
+  {
+    const __m128i src0 = _mm_loadu_si128((__m128i*)&tmp[0]);
+    const __m128i src1 = _mm_loadu_si128((__m128i*)&tmp[4]);
+    const __m128i src2 = _mm_loadu_si128((__m128i*)&tmp[8]);
+    const __m128i src3 = _mm_loadu_si128((__m128i*)&tmp[12]);
+    const __m128i a0 = _mm_add_epi32(src0, src2);
+    const __m128i a1 = _mm_add_epi32(src1, src3);
+    const __m128i a2 = _mm_sub_epi32(src1, src3);
+    const __m128i a3 = _mm_sub_epi32(src0, src2);
+    const __m128i b0 = _mm_srai_epi32(_mm_add_epi32(a0, a1), 1);
+    const __m128i b1 = _mm_srai_epi32(_mm_add_epi32(a3, a2), 1);
+    const __m128i b2 = _mm_srai_epi32(_mm_sub_epi32(a3, a2), 1);
+    const __m128i b3 = _mm_srai_epi32(_mm_sub_epi32(a0, a1), 1);
+    const __m128i out0 = _mm_packs_epi32(b0, b1);
+    const __m128i out1 = _mm_packs_epi32(b2, b3);
+    _mm_storeu_si128((__m128i*)&out[0], out0);
+    _mm_storeu_si128((__m128i*)&out[8], out1);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Metric
+
+static int SSE_Nx4(const uint8_t* a, const uint8_t* b,
+                   int num_quads, int do_16) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i sum1 = zero;
+  __m128i sum2 = zero;
+
+  while (num_quads-- > 0) {
+    // Note: for the !do_16 case, we read 16 pixels instead of 8 but that's ok,
+    // thanks to buffer over-allocation to that effect.
+    const __m128i a0 = _mm_loadu_si128((__m128i*)&a[BPS * 0]);
+    const __m128i a1 = _mm_loadu_si128((__m128i*)&a[BPS * 1]);
+    const __m128i a2 = _mm_loadu_si128((__m128i*)&a[BPS * 2]);
+    const __m128i a3 = _mm_loadu_si128((__m128i*)&a[BPS * 3]);
+    const __m128i b0 = _mm_loadu_si128((__m128i*)&b[BPS * 0]);
+    const __m128i b1 = _mm_loadu_si128((__m128i*)&b[BPS * 1]);
+    const __m128i b2 = _mm_loadu_si128((__m128i*)&b[BPS * 2]);
+    const __m128i b3 = _mm_loadu_si128((__m128i*)&b[BPS * 3]);
+
+    // compute clip0(a-b) and clip0(b-a)
+    const __m128i a0p = _mm_subs_epu8(a0, b0);
+    const __m128i a0m = _mm_subs_epu8(b0, a0);
+    const __m128i a1p = _mm_subs_epu8(a1, b1);
+    const __m128i a1m = _mm_subs_epu8(b1, a1);
+    const __m128i a2p = _mm_subs_epu8(a2, b2);
+    const __m128i a2m = _mm_subs_epu8(b2, a2);
+    const __m128i a3p = _mm_subs_epu8(a3, b3);
+    const __m128i a3m = _mm_subs_epu8(b3, a3);
+
+    // compute |a-b| with 8b arithmetic as clip0(a-b) | clip0(b-a)
+    const __m128i diff0 = _mm_or_si128(a0p, a0m);
+    const __m128i diff1 = _mm_or_si128(a1p, a1m);
+    const __m128i diff2 = _mm_or_si128(a2p, a2m);
+    const __m128i diff3 = _mm_or_si128(a3p, a3m);
+
+    // unpack (only four operations, instead of eight)
+    const __m128i low0 = _mm_unpacklo_epi8(diff0, zero);
+    const __m128i low1 = _mm_unpacklo_epi8(diff1, zero);
+    const __m128i low2 = _mm_unpacklo_epi8(diff2, zero);
+    const __m128i low3 = _mm_unpacklo_epi8(diff3, zero);
+
+    // multiply with self
+    const __m128i low_madd0 = _mm_madd_epi16(low0, low0);
+    const __m128i low_madd1 = _mm_madd_epi16(low1, low1);
+    const __m128i low_madd2 = _mm_madd_epi16(low2, low2);
+    const __m128i low_madd3 = _mm_madd_epi16(low3, low3);
+
+    // collect in a cascading way
+    const __m128i low_sum0 = _mm_add_epi32(low_madd0, low_madd1);
+    const __m128i low_sum1 = _mm_add_epi32(low_madd2, low_madd3);
+    sum1 = _mm_add_epi32(sum1, low_sum0);
+    sum2 = _mm_add_epi32(sum2, low_sum1);
+
+    if (do_16) {  // if necessary, process the higher 8 bytes similarly
+      const __m128i hi0 = _mm_unpackhi_epi8(diff0, zero);
+      const __m128i hi1 = _mm_unpackhi_epi8(diff1, zero);
+      const __m128i hi2 = _mm_unpackhi_epi8(diff2, zero);
+      const __m128i hi3 = _mm_unpackhi_epi8(diff3, zero);
+
+      const __m128i hi_madd0 = _mm_madd_epi16(hi0, hi0);
+      const __m128i hi_madd1 = _mm_madd_epi16(hi1, hi1);
+      const __m128i hi_madd2 = _mm_madd_epi16(hi2, hi2);
+      const __m128i hi_madd3 = _mm_madd_epi16(hi3, hi3);
+      const __m128i hi_sum0 = _mm_add_epi32(hi_madd0, hi_madd1);
+      const __m128i hi_sum1 = _mm_add_epi32(hi_madd2, hi_madd3);
+      sum1 = _mm_add_epi32(sum1, hi_sum0);
+      sum2 = _mm_add_epi32(sum2, hi_sum1);
+    }
+    a += 4 * BPS;
+    b += 4 * BPS;
+  }
+  {
+    int32_t tmp[4];
+    const __m128i sum = _mm_add_epi32(sum1, sum2);
+    _mm_storeu_si128((__m128i*)tmp, sum);
+    return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+  }
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  return SSE_Nx4(a, b, 4, 1);
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  return SSE_Nx4(a, b, 2, 1);
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  return SSE_Nx4(a, b, 2, 0);
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  const __m128i zero = _mm_setzero_si128();
+
+  // Load values. Note that we read 8 pixels instead of 4,
+  // but the a/b buffers are over-allocated to that effect.
+  const __m128i a0 = _mm_loadl_epi64((__m128i*)&a[BPS * 0]);
+  const __m128i a1 = _mm_loadl_epi64((__m128i*)&a[BPS * 1]);
+  const __m128i a2 = _mm_loadl_epi64((__m128i*)&a[BPS * 2]);
+  const __m128i a3 = _mm_loadl_epi64((__m128i*)&a[BPS * 3]);
+  const __m128i b0 = _mm_loadl_epi64((__m128i*)&b[BPS * 0]);
+  const __m128i b1 = _mm_loadl_epi64((__m128i*)&b[BPS * 1]);
+  const __m128i b2 = _mm_loadl_epi64((__m128i*)&b[BPS * 2]);
+  const __m128i b3 = _mm_loadl_epi64((__m128i*)&b[BPS * 3]);
+
+  // Combine pair of lines and convert to 16b.
+  const __m128i a01 = _mm_unpacklo_epi32(a0, a1);
+  const __m128i a23 = _mm_unpacklo_epi32(a2, a3);
+  const __m128i b01 = _mm_unpacklo_epi32(b0, b1);
+  const __m128i b23 = _mm_unpacklo_epi32(b2, b3);
+  const __m128i a01s = _mm_unpacklo_epi8(a01, zero);
+  const __m128i a23s = _mm_unpacklo_epi8(a23, zero);
+  const __m128i b01s = _mm_unpacklo_epi8(b01, zero);
+  const __m128i b23s = _mm_unpacklo_epi8(b23, zero);
+
+  // Compute differences; (a-b)^2 = (abs(a-b))^2 = (sat8(a-b) + sat8(b-a))^2
+  // TODO(cduvivier): Dissassemble and figure out why this is fastest. We don't
+  //                  need absolute values, there is no need to do calculation
+  //                  in 8bit as we are already in 16bit, ... Yet this is what
+  //                  benchmarks the fastest!
+  const __m128i d0 = _mm_subs_epu8(a01s, b01s);
+  const __m128i d1 = _mm_subs_epu8(b01s, a01s);
+  const __m128i d2 = _mm_subs_epu8(a23s, b23s);
+  const __m128i d3 = _mm_subs_epu8(b23s, a23s);
+
+  // Square and add them all together.
+  const __m128i madd0 = _mm_madd_epi16(d0, d0);
+  const __m128i madd1 = _mm_madd_epi16(d1, d1);
+  const __m128i madd2 = _mm_madd_epi16(d2, d2);
+  const __m128i madd3 = _mm_madd_epi16(d3, d3);
+  const __m128i sum0 = _mm_add_epi32(madd0, madd1);
+  const __m128i sum1 = _mm_add_epi32(madd2, madd3);
+  const __m128i sum2 = _mm_add_epi32(sum0, sum1);
+
+  int32_t tmp[4];
+  _mm_storeu_si128((__m128i*)tmp, sum2);
+  return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+}
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// Hadamard transform
+// Returns the difference between the weighted sum of the absolute value of
+// transformed coefficients.
+static int TTransform(const uint8_t* inA, const uint8_t* inB,
+                      const uint16_t* const w) {
+  int32_t sum[4];
+  __m128i tmp_0, tmp_1, tmp_2, tmp_3;
+  const __m128i zero = _mm_setzero_si128();
+
+  // Load, combine and transpose inputs.
+  {
+    const __m128i inA_0 = _mm_loadl_epi64((__m128i*)&inA[BPS * 0]);
+    const __m128i inA_1 = _mm_loadl_epi64((__m128i*)&inA[BPS * 1]);
+    const __m128i inA_2 = _mm_loadl_epi64((__m128i*)&inA[BPS * 2]);
+    const __m128i inA_3 = _mm_loadl_epi64((__m128i*)&inA[BPS * 3]);
+    const __m128i inB_0 = _mm_loadl_epi64((__m128i*)&inB[BPS * 0]);
+    const __m128i inB_1 = _mm_loadl_epi64((__m128i*)&inB[BPS * 1]);
+    const __m128i inB_2 = _mm_loadl_epi64((__m128i*)&inB[BPS * 2]);
+    const __m128i inB_3 = _mm_loadl_epi64((__m128i*)&inB[BPS * 3]);
+
+    // Combine inA and inB (we'll do two transforms in parallel).
+    const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
+    const __m128i inAB_1 = _mm_unpacklo_epi8(inA_1, inB_1);
+    const __m128i inAB_2 = _mm_unpacklo_epi8(inA_2, inB_2);
+    const __m128i inAB_3 = _mm_unpacklo_epi8(inA_3, inB_3);
+    // a00 b00 a01 b01 a02 b03 a03 b03   0 0 0 0 0 0 0 0
+    // a10 b10 a11 b11 a12 b12 a13 b13   0 0 0 0 0 0 0 0
+    // a20 b20 a21 b21 a22 b22 a23 b23   0 0 0 0 0 0 0 0
+    // a30 b30 a31 b31 a32 b32 a33 b33   0 0 0 0 0 0 0 0
+
+    // Transpose the two 4x4, discarding the filling zeroes.
+    const __m128i transpose0_0 = _mm_unpacklo_epi8(inAB_0, inAB_2);
+    const __m128i transpose0_1 = _mm_unpacklo_epi8(inAB_1, inAB_3);
+    // a00 a20  b00 b20  a01 a21  b01 b21  a02 a22  b02 b22  a03 a23  b03 b23
+    // a10 a30  b10 b30  a11 a31  b11 b31  a12 a32  b12 b32  a13 a33  b13 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
+    // a00 a10 a20 a30  b00 b10 b20 b30  a01 a11 a21 a31  b01 b11 b21 b31
+    // a02 a12 a22 a32  b02 b12 b22 b32  a03 a13 a23 a33  b03 b13 b23 b33
+
+    // Convert to 16b.
+    tmp_0 = _mm_unpacklo_epi8(transpose1_0, zero);
+    tmp_1 = _mm_unpackhi_epi8(transpose1_0, zero);
+    tmp_2 = _mm_unpacklo_epi8(transpose1_1, zero);
+    tmp_3 = _mm_unpackhi_epi8(transpose1_1, zero);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Horizontal pass and subsequent transpose.
+  {
+    // Calculate a and b (two 4x4 at once).
+    const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+    const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+    const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+    const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+    const __m128i b0 = _mm_add_epi16(a0, a1);
+    const __m128i b1 = _mm_add_epi16(a3, a2);
+    const __m128i b2 = _mm_sub_epi16(a3, a2);
+    const __m128i b3 = _mm_sub_epi16(a0, a1);
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+
+    // Transpose the two 4x4.
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(b0, b1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(b2, b3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(b0, b1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(b2, b3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    tmp_0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    tmp_1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    tmp_2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    tmp_3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Vertical pass and difference of weighted sums.
+  {
+    // Load all inputs.
+    // TODO(cduvivier): Make variable declarations and allocations aligned so
+    //                  we can use _mm_load_si128 instead of _mm_loadu_si128.
+    const __m128i w_0 = _mm_loadu_si128((__m128i*)&w[0]);
+    const __m128i w_8 = _mm_loadu_si128((__m128i*)&w[8]);
+
+    // Calculate a and b (two 4x4 at once).
+    const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+    const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+    const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+    const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+    const __m128i b0 = _mm_add_epi16(a0, a1);
+    const __m128i b1 = _mm_add_epi16(a3, a2);
+    const __m128i b2 = _mm_sub_epi16(a3, a2);
+    const __m128i b3 = _mm_sub_epi16(a0, a1);
+
+    // Separate the transforms of inA and inB.
+    __m128i A_b0 = _mm_unpacklo_epi64(b0, b1);
+    __m128i A_b2 = _mm_unpacklo_epi64(b2, b3);
+    __m128i B_b0 = _mm_unpackhi_epi64(b0, b1);
+    __m128i B_b2 = _mm_unpackhi_epi64(b2, b3);
+
+    {
+      // sign(b) = b >> 15  (0x0000 if positive, 0xffff if negative)
+      const __m128i sign_A_b0 = _mm_srai_epi16(A_b0, 15);
+      const __m128i sign_A_b2 = _mm_srai_epi16(A_b2, 15);
+      const __m128i sign_B_b0 = _mm_srai_epi16(B_b0, 15);
+      const __m128i sign_B_b2 = _mm_srai_epi16(B_b2, 15);
+
+      // b = abs(b) = (b ^ sign) - sign
+      A_b0 = _mm_xor_si128(A_b0, sign_A_b0);
+      A_b2 = _mm_xor_si128(A_b2, sign_A_b2);
+      B_b0 = _mm_xor_si128(B_b0, sign_B_b0);
+      B_b2 = _mm_xor_si128(B_b2, sign_B_b2);
+      A_b0 = _mm_sub_epi16(A_b0, sign_A_b0);
+      A_b2 = _mm_sub_epi16(A_b2, sign_A_b2);
+      B_b0 = _mm_sub_epi16(B_b0, sign_B_b0);
+      B_b2 = _mm_sub_epi16(B_b2, sign_B_b2);
+    }
+
+    // weighted sums
+    A_b0 = _mm_madd_epi16(A_b0, w_0);
+    A_b2 = _mm_madd_epi16(A_b2, w_8);
+    B_b0 = _mm_madd_epi16(B_b0, w_0);
+    B_b2 = _mm_madd_epi16(B_b2, w_8);
+    A_b0 = _mm_add_epi32(A_b0, A_b2);
+    B_b0 = _mm_add_epi32(B_b0, B_b2);
+
+    // difference of weighted sums
+    A_b0 = _mm_sub_epi32(A_b0, B_b0);
+    _mm_storeu_si128((__m128i*)&sum[0], A_b0);
+  }
+  return sum[0] + sum[1] + sum[2] + sum[3];
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  const int diff_sum = TTransform(a, b, w);
+  return abs(diff_sum) >> 5;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
+                                       const uint16_t* const sharpen,
+                                       const VP8Matrix* const mtx) {
+  const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
+  const __m128i zero = _mm_setzero_si128();
+  __m128i coeff0, coeff8;
+  __m128i out0, out8;
+  __m128i packed_out;
+
+  // Load all inputs.
+  // TODO(cduvivier): Make variable declarations and allocations aligned so that
+  //                  we can use _mm_load_si128 instead of _mm_loadu_si128.
+  __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
+  __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
+  const __m128i iq0 = _mm_loadu_si128((__m128i*)&mtx->iq_[0]);
+  const __m128i iq8 = _mm_loadu_si128((__m128i*)&mtx->iq_[8]);
+  const __m128i q0 = _mm_loadu_si128((__m128i*)&mtx->q_[0]);
+  const __m128i q8 = _mm_loadu_si128((__m128i*)&mtx->q_[8]);
+
+  // extract sign(in)  (0x0000 if positive, 0xffff if negative)
+  const __m128i sign0 = _mm_cmpgt_epi16(zero, in0);
+  const __m128i sign8 = _mm_cmpgt_epi16(zero, in8);
+
+  // coeff = abs(in) = (in ^ sign) - sign
+  coeff0 = _mm_xor_si128(in0, sign0);
+  coeff8 = _mm_xor_si128(in8, sign8);
+  coeff0 = _mm_sub_epi16(coeff0, sign0);
+  coeff8 = _mm_sub_epi16(coeff8, sign8);
+
+  // coeff = abs(in) + sharpen
+  if (sharpen != NULL) {
+    const __m128i sharpen0 = _mm_loadu_si128((__m128i*)&sharpen[0]);
+    const __m128i sharpen8 = _mm_loadu_si128((__m128i*)&sharpen[8]);
+    coeff0 = _mm_add_epi16(coeff0, sharpen0);
+    coeff8 = _mm_add_epi16(coeff8, sharpen8);
+  }
+
+  // out = (coeff * iQ + B) >> QFIX
+  {
+    // doing calculations with 32b precision (QFIX=17)
+    // out = (coeff * iQ)
+    const __m128i coeff_iQ0H = _mm_mulhi_epu16(coeff0, iq0);
+    const __m128i coeff_iQ0L = _mm_mullo_epi16(coeff0, iq0);
+    const __m128i coeff_iQ8H = _mm_mulhi_epu16(coeff8, iq8);
+    const __m128i coeff_iQ8L = _mm_mullo_epi16(coeff8, iq8);
+    __m128i out_00 = _mm_unpacklo_epi16(coeff_iQ0L, coeff_iQ0H);
+    __m128i out_04 = _mm_unpackhi_epi16(coeff_iQ0L, coeff_iQ0H);
+    __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
+    __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
+    // out = (coeff * iQ + B)
+    const __m128i bias_00 = _mm_loadu_si128((__m128i*)&mtx->bias_[0]);
+    const __m128i bias_04 = _mm_loadu_si128((__m128i*)&mtx->bias_[4]);
+    const __m128i bias_08 = _mm_loadu_si128((__m128i*)&mtx->bias_[8]);
+    const __m128i bias_12 = _mm_loadu_si128((__m128i*)&mtx->bias_[12]);
+    out_00 = _mm_add_epi32(out_00, bias_00);
+    out_04 = _mm_add_epi32(out_04, bias_04);
+    out_08 = _mm_add_epi32(out_08, bias_08);
+    out_12 = _mm_add_epi32(out_12, bias_12);
+    // out = QUANTDIV(coeff, iQ, B, QFIX)
+    out_00 = _mm_srai_epi32(out_00, QFIX);
+    out_04 = _mm_srai_epi32(out_04, QFIX);
+    out_08 = _mm_srai_epi32(out_08, QFIX);
+    out_12 = _mm_srai_epi32(out_12, QFIX);
+
+    // pack result as 16b
+    out0 = _mm_packs_epi32(out_00, out_04);
+    out8 = _mm_packs_epi32(out_08, out_12);
+
+    // if (coeff > 2047) coeff = 2047
+    out0 = _mm_min_epi16(out0, max_coeff_2047);
+    out8 = _mm_min_epi16(out8, max_coeff_2047);
+  }
+
+  // get sign back (if (sign[j]) out_n = -out_n)
+  out0 = _mm_xor_si128(out0, sign0);
+  out8 = _mm_xor_si128(out8, sign8);
+  out0 = _mm_sub_epi16(out0, sign0);
+  out8 = _mm_sub_epi16(out8, sign8);
+
+  // in = out * Q
+  in0 = _mm_mullo_epi16(out0, q0);
+  in8 = _mm_mullo_epi16(out8, q8);
+
+  _mm_storeu_si128((__m128i*)&in[0], in0);
+  _mm_storeu_si128((__m128i*)&in[8], in8);
+
+  // zigzag the output before storing it.
+  //
+  // The zigzag pattern can almost be reproduced with a small sequence of
+  // shuffles. After it, we only need to swap the 7th (ending up in third
+  // position instead of twelfth) and 8th values.
+  {
+    __m128i outZ0, outZ8;
+    outZ0 = _mm_shufflehi_epi16(out0,  _MM_SHUFFLE(2, 1, 3, 0));
+    outZ0 = _mm_shuffle_epi32  (outZ0, _MM_SHUFFLE(3, 1, 2, 0));
+    outZ0 = _mm_shufflehi_epi16(outZ0, _MM_SHUFFLE(3, 1, 0, 2));
+    outZ8 = _mm_shufflelo_epi16(out8,  _MM_SHUFFLE(3, 0, 2, 1));
+    outZ8 = _mm_shuffle_epi32  (outZ8, _MM_SHUFFLE(3, 1, 2, 0));
+    outZ8 = _mm_shufflelo_epi16(outZ8, _MM_SHUFFLE(1, 3, 2, 0));
+    _mm_storeu_si128((__m128i*)&out[0], outZ0);
+    _mm_storeu_si128((__m128i*)&out[8], outZ8);
+    packed_out = _mm_packs_epi16(outZ0, outZ8);
+  }
+  {
+    const int16_t outZ_12 = out[12];
+    const int16_t outZ_3 = out[3];
+    out[3] = outZ_12;
+    out[12] = outZ_3;
+  }
+
+  // detect if all 'out' values are zeroes or not
+  return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
+}
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
+}
+
+static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
+                            const VP8Matrix* const mtx) {
+  return DoQuantizeBlock(in, out, NULL, mtx);
+}
+
+// Forward declaration.
+void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
+                              VP8Residual* const res);
+
+void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
+                              VP8Residual* const res) {
+  const __m128i c0 = _mm_loadu_si128((const __m128i*)coeffs);
+  const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
+  // Use SSE to compare 8 values with a single instruction.
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i m0 = _mm_cmpeq_epi16(c0, zero);
+  const __m128i m1 = _mm_cmpeq_epi16(c1, zero);
+  // Get the comparison results as a bitmask, consisting of two times 16 bits:
+  // two identical bits for each result. Concatenate both bitmasks to get a
+  // single 32 bit value. Negate the mask to get the position of entries that
+  // are not equal to zero. We don't need to mask out least significant bits
+  // according to res->first, since coeffs[0] is 0 if res->first > 0
+  const uint32_t mask =
+      ~(((uint32_t)_mm_movemask_epi8(m1) << 16) | _mm_movemask_epi8(m0));
+  // The position of the most significant non-zero bit indicates the position of
+  // the last non-zero value. Divide the result by two because __movemask_epi8
+  // operates on 8 bit values instead of 16 bit values.
+  assert(res->first == 0 || coeffs[0] == 0);
+  res->last = mask ? (BitsLog2Floor(mask) >> 1) : -1;
+  res->coeffs = coeffs;
+}
+
+#endif   // WEBP_USE_SSE2
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitSSE2(void);
+
+void VP8EncDspInitSSE2(void) {
+#if defined(WEBP_USE_SSE2)
+  VP8CollectHistogram = CollectHistogram;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+  VP8FTransformWHT = FTransformWHT;
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE4x4 = SSE4x4;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+#endif   // WEBP_USE_SSE2
+}
+
diff --git a/src/main/jni/libwebp/dsp/lossless.c b/src/main/jni/libwebp/dsp/lossless.c
new file mode 100644
index 000000000..a1bf3584b
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/lossless.c
@@ -0,0 +1,1639 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#include "./dsp.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include "../dec/vp8li.h"
+#include "../utils/endian_inl.h"
+#include "./lossless.h"
+#include "./yuv.h"
+
+#define MAX_DIFF_COST (1e30f)
+
+// lookup table for small values of log2(int)
+const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
+  0.0000000000000000f, 0.0000000000000000f,
+  1.0000000000000000f, 1.5849625007211560f,
+  2.0000000000000000f, 2.3219280948873621f,
+  2.5849625007211560f, 2.8073549220576041f,
+  3.0000000000000000f, 3.1699250014423121f,
+  3.3219280948873621f, 3.4594316186372973f,
+  3.5849625007211560f, 3.7004397181410921f,
+  3.8073549220576041f, 3.9068905956085187f,
+  4.0000000000000000f, 4.0874628412503390f,
+  4.1699250014423121f, 4.2479275134435852f,
+  4.3219280948873626f, 4.3923174227787606f,
+  4.4594316186372973f, 4.5235619560570130f,
+  4.5849625007211560f, 4.6438561897747243f,
+  4.7004397181410917f, 4.7548875021634682f,
+  4.8073549220576037f, 4.8579809951275718f,
+  4.9068905956085187f, 4.9541963103868749f,
+  5.0000000000000000f, 5.0443941193584533f,
+  5.0874628412503390f, 5.1292830169449663f,
+  5.1699250014423121f, 5.2094533656289501f,
+  5.2479275134435852f, 5.2854022188622487f,
+  5.3219280948873626f, 5.3575520046180837f,
+  5.3923174227787606f, 5.4262647547020979f,
+  5.4594316186372973f, 5.4918530963296747f,
+  5.5235619560570130f, 5.5545888516776376f,
+  5.5849625007211560f, 5.6147098441152083f,
+  5.6438561897747243f, 5.6724253419714951f,
+  5.7004397181410917f, 5.7279204545631987f,
+  5.7548875021634682f, 5.7813597135246599f,
+  5.8073549220576037f, 5.8328900141647412f,
+  5.8579809951275718f, 5.8826430493618415f,
+  5.9068905956085187f, 5.9307373375628866f,
+  5.9541963103868749f, 5.9772799234999167f,
+  6.0000000000000000f, 6.0223678130284543f,
+  6.0443941193584533f, 6.0660891904577720f,
+  6.0874628412503390f, 6.1085244567781691f,
+  6.1292830169449663f, 6.1497471195046822f,
+  6.1699250014423121f, 6.1898245588800175f,
+  6.2094533656289501f, 6.2288186904958804f,
+  6.2479275134435852f, 6.2667865406949010f,
+  6.2854022188622487f, 6.3037807481771030f,
+  6.3219280948873626f, 6.3398500028846243f,
+  6.3575520046180837f, 6.3750394313469245f,
+  6.3923174227787606f, 6.4093909361377017f,
+  6.4262647547020979f, 6.4429434958487279f,
+  6.4594316186372973f, 6.4757334309663976f,
+  6.4918530963296747f, 6.5077946401986963f,
+  6.5235619560570130f, 6.5391588111080309f,
+  6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211560f, 6.5999128421871278f,
+  6.6147098441152083f, 6.6293566200796094f,
+  6.6438561897747243f, 6.6582114827517946f,
+  6.6724253419714951f, 6.6865005271832185f,
+  6.7004397181410917f, 6.7142455176661224f,
+  6.7279204545631987f, 6.7414669864011464f,
+  6.7548875021634682f, 6.7681843247769259f,
+  6.7813597135246599f, 6.7944158663501061f,
+  6.8073549220576037f, 6.8201789624151878f,
+  6.8328900141647412f, 6.8454900509443747f,
+  6.8579809951275718f, 6.8703647195834047f,
+  6.8826430493618415f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745946f,
+  6.9307373375628866f, 6.9425145053392398f,
+  6.9541963103868749f, 6.9657842846620869f,
+  6.9772799234999167f, 6.9886846867721654f,
+  7.0000000000000000f, 7.0112272554232539f,
+  7.0223678130284543f, 7.0334230015374501f,
+  7.0443941193584533f, 7.0552824355011898f,
+  7.0660891904577720f, 7.0768155970508308f,
+  7.0874628412503390f, 7.0980320829605263f,
+  7.1085244567781691f, 7.1189410727235076f,
+  7.1292830169449663f, 7.1395513523987936f,
+  7.1497471195046822f, 7.1598713367783890f,
+  7.1699250014423121f, 7.1799090900149344f,
+  7.1898245588800175f, 7.1996723448363644f,
+  7.2094533656289501f, 7.2191685204621611f,
+  7.2288186904958804f, 7.2384047393250785f,
+  7.2479275134435852f, 7.2573878426926521f,
+  7.2667865406949010f, 7.2761244052742375f,
+  7.2854022188622487f, 7.2946207488916270f,
+  7.3037807481771030f, 7.3128829552843557f,
+  7.3219280948873626f, 7.3309168781146167f,
+  7.3398500028846243f, 7.3487281542310771f,
+  7.3575520046180837f, 7.3663222142458160f,
+  7.3750394313469245f, 7.3837042924740519f,
+  7.3923174227787606f, 7.4008794362821843f,
+  7.4093909361377017f, 7.4178525148858982f,
+  7.4262647547020979f, 7.4346282276367245f,
+  7.4429434958487279f, 7.4512111118323289f,
+  7.4594316186372973f, 7.4676055500829976f,
+  7.4757334309663976f, 7.4838157772642563f,
+  7.4918530963296747f, 7.4998458870832056f,
+  7.5077946401986963f, 7.5156998382840427f,
+  7.5235619560570130f, 7.5313814605163118f,
+  7.5391588111080309f, 7.5468944598876364f,
+  7.5545888516776376f, 7.5622424242210728f,
+  7.5698556083309478f, 7.5774288280357486f,
+  7.5849625007211560f, 7.5924570372680806f,
+  7.5999128421871278f, 7.6073303137496104f,
+  7.6147098441152083f, 7.6220518194563764f,
+  7.6293566200796094f, 7.6366246205436487f,
+  7.6438561897747243f, 7.6510516911789281f,
+  7.6582114827517946f, 7.6653359171851764f,
+  7.6724253419714951f, 7.6794800995054464f,
+  7.6865005271832185f, 7.6934869574993252f,
+  7.7004397181410917f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071855f,
+  7.7279204545631987f, 7.7347096202258383f,
+  7.7414669864011464f, 7.7481928495894605f,
+  7.7548875021634682f, 7.7615512324444795f,
+  7.7681843247769259f, 7.7747870596011736f,
+  7.7813597135246599f, 7.7879025593914317f,
+  7.7944158663501061f, 7.8008998999203047f,
+  7.8073549220576037f, 7.8137811912170374f,
+  7.8201789624151878f, 7.8265484872909150f,
+  7.8328900141647412f, 7.8392037880969436f,
+  7.8454900509443747f, 7.8517490414160571f,
+  7.8579809951275718f, 7.8641861446542797f,
+  7.8703647195834047f, 7.8765169465649993f,
+  7.8826430493618415f, 7.8887432488982591f,
+  7.8948177633079437f, 7.9008668079807486f,
+  7.9068905956085187f, 7.9128893362299619f,
+  7.9188632372745946f, 7.9248125036057812f,
+  7.9307373375628866f, 7.9366379390025709f,
+  7.9425145053392398f, 7.9483672315846778f,
+  7.9541963103868749f, 7.9600019320680805f,
+  7.9657842846620869f, 7.9715435539507719f,
+  7.9772799234999167f, 7.9829935746943103f,
+  7.9886846867721654f, 7.9943534368588577f
+};
+
+const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
+  0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f,
+  8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f,
+  24.00000000f,  28.52932501f,  33.21928095f,  38.05374781f,
+  43.01955001f,  48.10571634f,  53.30296891f,  58.60335893f,
+  64.00000000f,  69.48686830f,  75.05865003f,  80.71062276f,
+  86.43856190f,  92.23866588f,  98.10749561f,  104.04192499f,
+  110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
+  134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
+  160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
+  186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
+  212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
+  240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
+  268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
+  296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
+  325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
+  354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
+  384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
+  413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
+  444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
+  474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
+  505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
+  536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
+  568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
+  600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
+  632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
+  664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
+  696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
+  729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
+  762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
+  795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
+  828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
+  862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
+  896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
+  929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
+  963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
+  998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
+  1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
+  1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
+  1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
+  1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
+  1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
+  1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
+  1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
+  1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
+  1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
+  1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
+  1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
+  1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
+  1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
+  1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
+  1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
+  1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
+  1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
+  1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
+  1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
+  1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
+  1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
+  1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
+  1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
+  1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
+  1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
+  1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
+  1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
+  2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
+};
+
+const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
+  { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
+  { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
+  { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
+  { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
+  { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
+  {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
+  {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
+  {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
+  {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+  {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+  {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+};
+
+const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
+   0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  2,  3,  0,  1,  2,  3,
+   0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+  127,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
+};
+
+// The threshold till approximate version of log_2 can be used.
+// Practically, we can get rid of the call to log() as the two values match to
+// very high degree (the ratio of these two is 0.99999x).
+// Keeping a high threshold for now.
+#define APPROX_LOG_WITH_CORRECTION_MAX  65536
+#define APPROX_LOG_MAX                   4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+static float FastSLog2Slow(uint32_t v) {
+  assert(v >= LOG_LOOKUP_IDX_MAX);
+  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    int log_cnt = 0;
+    uint32_t y = 1;
+    int correction = 0;
+    const float v_f = (float)v;
+    const uint32_t orig_v = v;
+    do {
+      ++log_cnt;
+      v = v >> 1;
+      y = y << 1;
+    } while (v >= LOG_LOOKUP_IDX_MAX);
+    // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
+    // Xf = floor(Xf) * (1 + (v % y) / v)
+    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
+    // The correction factor: log(1 + d) ~ d; for very small d values, so
+    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
+    // LOG_2_RECIPROCAL ~ 23/16
+    correction = (23 * (orig_v & (y - 1))) >> 4;
+    return v_f * (kLog2Table[v] + log_cnt) + correction;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * v * log((double)v));
+  }
+}
+
+static float FastLog2Slow(uint32_t v) {
+  assert(v >= LOG_LOOKUP_IDX_MAX);
+  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    int log_cnt = 0;
+    uint32_t y = 1;
+    const uint32_t orig_v = v;
+    double log_2;
+    do {
+      ++log_cnt;
+      v = v >> 1;
+      y = y << 1;
+    } while (v >= LOG_LOOKUP_IDX_MAX);
+    log_2 = kLog2Table[v] + log_cnt;
+    if (orig_v >= APPROX_LOG_MAX) {
+      // Since the division is still expensive, add this correction factor only
+      // for large values of 'v'.
+      const int correction = (23 * (orig_v & (y - 1))) >> 4;
+      log_2 += (double)correction / orig_v;
+    }
+    return (float)log_2;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * log((double)v));
+  }
+}
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+// Mostly used to reduce code size + readability
+static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
+
+// In-place sum of each component with mod 256.
+static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
+  const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
+  const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
+  *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+  return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+  return Average2(Average2(a0, a2), a1);
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
+                                     uint32_t a2, uint32_t a3) {
+  return Average2(Average2(a0, a1), Average2(a2, a3));
+}
+
+static WEBP_INLINE uint32_t Clip255(uint32_t a) {
+  if (a < 256) {
+    return a;
+  }
+  // return 0, when a is a negative integer.
+  // return 255, when a is positive.
+  return ~a >> 24;
+}
+
+static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
+  return Clip255(a + b - c);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
+  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
+                                         (c1 >> 16) & 0xff,
+                                         (c2 >> 16) & 0xff);
+  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
+                                         (c1 >> 8) & 0xff,
+                                         (c2 >> 8) & 0xff);
+  const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
+  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
+  return Clip255(a + (a - b) / 2);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const uint32_t ave = Average2(c0, c1);
+  const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
+  const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
+  const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
+  const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
+  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
+#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
+# define LOCAL_INLINE __attribute__ ((noinline))
+#else
+# define LOCAL_INLINE WEBP_INLINE
+#endif
+
+static LOCAL_INLINE int Sub3(int a, int b, int c) {
+  const int pb = b - c;
+  const int pa = a - c;
+  return abs(pb) - abs(pa);
+}
+
+#undef LOCAL_INLINE
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+  const int pa_minus_pb =
+      Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
+      Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
+      Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
+      Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
+  return (pa_minus_pb <= 0) ? a : b;
+}
+
+//------------------------------------------------------------------------------
+// Predictors
+
+static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
+  (void)top;
+  (void)left;
+  return ARGB_BLACK;
+}
+static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
+  (void)top;
+  return left;
+}
+static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[0];
+}
+static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[1];
+}
+static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[-1];
+}
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average3(left, top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[0]);
+  return pred;
+}
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[-1], top[0]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[0], top[1]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Select(top[0], left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+  return pred;
+}
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+  return pred;
+}
+
+static const VP8LPredictorFunc kPredictorsC[16] = {
+  Predictor0, Predictor1, Predictor2, Predictor3,
+  Predictor4, Predictor5, Predictor6, Predictor7,
+  Predictor8, Predictor9, Predictor10, Predictor11,
+  Predictor12, Predictor13,
+  Predictor0, Predictor0    // <- padding security sentinels
+};
+
+static float PredictionCostSpatial(const int counts[256], int weight_0,
+                                   double exp_val) {
+  const int significant_symbols = 256 >> 4;
+  const double exp_decay_factor = 0.6;
+  double bits = weight_0 * counts[0];
+  int i;
+  for (i = 1; i < significant_symbols; ++i) {
+    bits += exp_val * (counts[i] + counts[256 - i]);
+    exp_val *= exp_decay_factor;
+  }
+  return (float)(-0.1 * bits);
+}
+
+// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
+static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
+  int i;
+  double retval = 0.;
+  int sumX = 0, sumXY = 0;
+  for (i = 0; i < 256; ++i) {
+    const int x = X[i];
+    const int xy = x + Y[i];
+    if (x != 0) {
+      sumX += x;
+      retval -= VP8LFastSLog2(x);
+      sumXY += xy;
+      retval -= VP8LFastSLog2(xy);
+    } else if (xy != 0) {
+      sumXY += xy;
+      retval -= VP8LFastSLog2(xy);
+    }
+  }
+  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
+  return (float)retval;
+}
+
+static float PredictionCostSpatialHistogram(const int accumulated[4][256],
+                                            const int tile[4][256]) {
+  int i;
+  double retval = 0;
+  for (i = 0; i < 4; ++i) {
+    const double kExpValue = 0.94;
+    retval += PredictionCostSpatial(tile[i], 1, kExpValue);
+    retval += CombinedShannonEntropy(tile[i], accumulated[i]);
+  }
+  return (float)retval;
+}
+
+static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
+  ++histo_argb[0][argb >> 24];
+  ++histo_argb[1][(argb >> 16) & 0xff];
+  ++histo_argb[2][(argb >> 8) & 0xff];
+  ++histo_argb[3][argb & 0xff];
+}
+
+static int GetBestPredictorForTile(int width, int height,
+                                   int tile_x, int tile_y, int bits,
+                                   const int accumulated[4][256],
+                                   const uint32_t* const argb_scratch) {
+  const int kNumPredModes = 14;
+  const int col_start = tile_x << bits;
+  const int row_start = tile_y << bits;
+  const int tile_size = 1 << bits;
+  const int max_y = GetMin(tile_size, height - row_start);
+  const int max_x = GetMin(tile_size, width - col_start);
+  float best_diff = MAX_DIFF_COST;
+  int best_mode = 0;
+  int mode;
+  for (mode = 0; mode < kNumPredModes; ++mode) {
+    const uint32_t* current_row = argb_scratch;
+    const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
+    float cur_diff;
+    int y;
+    int histo_argb[4][256];
+    memset(histo_argb, 0, sizeof(histo_argb));
+    for (y = 0; y < max_y; ++y) {
+      int x;
+      const int row = row_start + y;
+      const uint32_t* const upper_row = current_row;
+      current_row = upper_row + width;
+      for (x = 0; x < max_x; ++x) {
+        const int col = col_start + x;
+        uint32_t predict;
+        if (row == 0) {
+          predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
+        } else if (col == 0) {
+          predict = upper_row[col];  // Top.
+        } else {
+          predict = pred_func(current_row[col - 1], upper_row + col);
+        }
+        UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
+      }
+    }
+    cur_diff = PredictionCostSpatialHistogram(
+        accumulated, (const int (*)[256])histo_argb);
+    if (cur_diff < best_diff) {
+      best_diff = cur_diff;
+      best_mode = mode;
+    }
+  }
+
+  return best_mode;
+}
+
+static void CopyTileWithPrediction(int width, int height,
+                                   int tile_x, int tile_y, int bits, int mode,
+                                   const uint32_t* const argb_scratch,
+                                   uint32_t* const argb) {
+  const int col_start = tile_x << bits;
+  const int row_start = tile_y << bits;
+  const int tile_size = 1 << bits;
+  const int max_y = GetMin(tile_size, height - row_start);
+  const int max_x = GetMin(tile_size, width - col_start);
+  const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
+  const uint32_t* current_row = argb_scratch;
+
+  int y;
+  for (y = 0; y < max_y; ++y) {
+    int x;
+    const int row = row_start + y;
+    const uint32_t* const upper_row = current_row;
+    current_row = upper_row + width;
+    for (x = 0; x < max_x; ++x) {
+      const int col = col_start + x;
+      const int pix = row * width + col;
+      uint32_t predict;
+      if (row == 0) {
+        predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
+      } else if (col == 0) {
+        predict = upper_row[col];  // Top.
+      } else {
+        predict = pred_func(current_row[col - 1], upper_row + col);
+      }
+      argb[pix] = VP8LSubPixels(current_row[col], predict);
+    }
+  }
+}
+
+void VP8LResidualImage(int width, int height, int bits,
+                       uint32_t* const argb, uint32_t* const argb_scratch,
+                       uint32_t* const image) {
+  const int max_tile_size = 1 << bits;
+  const int tiles_per_row = VP8LSubSampleSize(width, bits);
+  const int tiles_per_col = VP8LSubSampleSize(height, bits);
+  uint32_t* const upper_row = argb_scratch;
+  uint32_t* const current_tile_rows = argb_scratch + width;
+  int tile_y;
+  int histo[4][256];
+  memset(histo, 0, sizeof(histo));
+  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+    const int tile_y_offset = tile_y * max_tile_size;
+    const int this_tile_height =
+        (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
+    int tile_x;
+    if (tile_y > 0) {
+      memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
+             width * sizeof(*upper_row));
+    }
+    memcpy(current_tile_rows, &argb[tile_y_offset * width],
+           this_tile_height * width * sizeof(*current_tile_rows));
+    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+      int pred;
+      int y;
+      const int tile_x_offset = tile_x * max_tile_size;
+      int all_x_max = tile_x_offset + max_tile_size;
+      if (all_x_max > width) {
+        all_x_max = width;
+      }
+      pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits,
+                                     (const int (*)[256])histo,
+                                     argb_scratch);
+      image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
+      CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
+                             argb_scratch, argb);
+      for (y = 0; y < max_tile_size; ++y) {
+        int ix;
+        int all_x;
+        int all_y = tile_y_offset + y;
+        if (all_y >= height) {
+          break;
+        }
+        ix = all_y * width + tile_x_offset;
+        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+          UpdateHisto(histo, argb[ix]);
+        }
+      }
+    }
+  }
+}
+
+// Inverse prediction.
+static void PredictorInverseTransform(const VP8LTransform* const transform,
+                                      int y_start, int y_end, uint32_t* data) {
+  const int width = transform->xsize_;
+  if (y_start == 0) {  // First Row follows the L (mode=1) mode.
+    int x;
+    const uint32_t pred0 = Predictor0(data[-1], NULL);
+    AddPixelsEq(data, pred0);
+    for (x = 1; x < width; ++x) {
+      const uint32_t pred1 = Predictor1(data[x - 1], NULL);
+      AddPixelsEq(data + x, pred1);
+    }
+    data += width;
+    ++y_start;
+  }
+
+  {
+    int y = y_start;
+    const int tile_width = 1 << transform->bits_;
+    const int mask = tile_width - 1;
+    const int safe_width = width & ~mask;
+    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+    const uint32_t* pred_mode_base =
+        transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+    while (y < y_end) {
+      const uint32_t pred2 = Predictor2(data[-1], data - width);
+      const uint32_t* pred_mode_src = pred_mode_base;
+      VP8LPredictorFunc pred_func;
+      int x = 1;
+      int t = 1;
+      // First pixel follows the T (mode=2) mode.
+      AddPixelsEq(data, pred2);
+      // .. the rest:
+      while (x < safe_width) {
+        pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+        for (; t < tile_width; ++t, ++x) {
+          const uint32_t pred = pred_func(data[x - 1], data + x - width);
+          AddPixelsEq(data + x, pred);
+        }
+        t = 0;
+      }
+      if (x < width) {
+        pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+        for (; x < width; ++x) {
+          const uint32_t pred = pred_func(data[x - 1], data + x - width);
+          AddPixelsEq(data + x, pred);
+        }
+      }
+      data += width;
+      ++y;
+      if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
+        pred_mode_base += tiles_per_row;
+      }
+    }
+  }
+}
+
+void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = argb_data[i];
+    const uint32_t green = (argb >> 8) & 0xff;
+    const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
+    const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
+    argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
+  }
+}
+
+// Add green to blue and red channels (i.e. perform the inverse transform of
+// 'subtract green').
+void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = data[i];
+    const uint32_t green = ((argb >> 8) & 0xff);
+    uint32_t red_blue = (argb & 0x00ff00ffu);
+    red_blue += (green << 16) | green;
+    red_blue &= 0x00ff00ffu;
+    data[i] = (argb & 0xff00ff00u) | red_blue;
+  }
+}
+
+static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
+  m->green_to_red_ = 0;
+  m->green_to_blue_ = 0;
+  m->red_to_blue_ = 0;
+}
+
+static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
+                                                int8_t color) {
+  return (uint32_t)((int)(color_pred) * color) >> 5;
+}
+
+static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
+                                               VP8LMultipliers* const m) {
+  m->green_to_red_  = (color_code >>  0) & 0xff;
+  m->green_to_blue_ = (color_code >>  8) & 0xff;
+  m->red_to_blue_   = (color_code >> 16) & 0xff;
+}
+
+static WEBP_INLINE uint32_t MultipliersToColorCode(
+    const VP8LMultipliers* const m) {
+  return 0xff000000u |
+         ((uint32_t)(m->red_to_blue_) << 16) |
+         ((uint32_t)(m->green_to_blue_) << 8) |
+         m->green_to_red_;
+}
+
+void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
+                          int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = data[i];
+    const uint32_t green = argb >> 8;
+    const uint32_t red = argb >> 16;
+    uint32_t new_red = red;
+    uint32_t new_blue = argb;
+    new_red -= ColorTransformDelta(m->green_to_red_, green);
+    new_red &= 0xff;
+    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
+    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+    new_blue &= 0xff;
+    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+  }
+}
+
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
+                                 int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = data[i];
+    const uint32_t green = argb >> 8;
+    const uint32_t red = argb >> 16;
+    uint32_t new_red = red;
+    uint32_t new_blue = argb;
+    new_red += ColorTransformDelta(m->green_to_red_, green);
+    new_red &= 0xff;
+    new_blue += ColorTransformDelta(m->green_to_blue_, green);
+    new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
+    new_blue &= 0xff;
+    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+  }
+}
+
+static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
+                                             uint32_t argb) {
+  const uint32_t green = argb >> 8;
+  uint32_t new_red = argb >> 16;
+  new_red -= ColorTransformDelta(green_to_red, green);
+  return (new_red & 0xff);
+}
+
+static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
+                                              uint8_t red_to_blue,
+                                              uint32_t argb) {
+  const uint32_t green = argb >> 8;
+  const uint32_t red = argb >> 16;
+  uint8_t new_blue = argb;
+  new_blue -= ColorTransformDelta(green_to_blue, green);
+  new_blue -= ColorTransformDelta(red_to_blue, red);
+  return (new_blue & 0xff);
+}
+
+static float PredictionCostCrossColor(const int accumulated[256],
+                                      const int counts[256]) {
+  // Favor low entropy, locally and globally.
+  // Favor small absolute values for PredictionCostSpatial
+  static const double kExpValue = 2.4;
+  return CombinedShannonEntropy(counts, accumulated) +
+         PredictionCostSpatial(counts, 3, kExpValue);
+}
+
+static float GetPredictionCostCrossColorRed(
+    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
+    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
+    const int accumulated_red_histo[256], const uint32_t* const argb) {
+  int all_y;
+  int histo[256] = { 0 };
+  float cur_diff;
+  for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
+    int ix = all_y * xsize + tile_x_offset;
+    int all_x;
+    for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+      ++histo[TransformColorRed(green_to_red, argb[ix])];  // red.
+    }
+  }
+  cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
+  if ((uint8_t)green_to_red == prev_x.green_to_red_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if ((uint8_t)green_to_red == prev_y.green_to_red_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if (green_to_red == 0) {
+    cur_diff -= 3;
+  }
+  return cur_diff;
+}
+
+static void GetBestGreenToRed(
+    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
+    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
+    const int accumulated_red_histo[256], const uint32_t* const argb,
+    VP8LMultipliers* const best_tx) {
+  int min_green_to_red = -64;
+  int max_green_to_red = 64;
+  int green_to_red = 0;
+  int eval_min = 1;
+  int eval_max = 1;
+  float cur_diff_min = MAX_DIFF_COST;
+  float cur_diff_max = MAX_DIFF_COST;
+  // Do a binary search to find the optimal green_to_red color transform.
+  while (max_green_to_red - min_green_to_red > 2) {
+    if (eval_min) {
+      cur_diff_min = GetPredictionCostCrossColorRed(
+          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
+          prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb);
+      eval_min = 0;
+    }
+    if (eval_max) {
+      cur_diff_max = GetPredictionCostCrossColorRed(
+          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
+          prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb);
+      eval_max = 0;
+    }
+    if (cur_diff_min < cur_diff_max) {
+      green_to_red = min_green_to_red;
+      max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
+      eval_max = 1;
+    } else {
+      green_to_red = max_green_to_red;
+      min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
+      eval_min = 1;
+    }
+  }
+  best_tx->green_to_red_ = green_to_red;
+}
+
+static float GetPredictionCostCrossColorBlue(
+    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
+    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
+    int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
+    const uint32_t* const argb) {
+  int all_y;
+  int histo[256] = { 0 };
+  float cur_diff;
+  for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
+    int all_x;
+    int ix = all_y * xsize + tile_x_offset;
+    for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+      ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
+    }
+  }
+  cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
+  if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if (green_to_blue == 0) {
+    cur_diff -= 3;
+  }
+  if (red_to_blue == 0) {
+    cur_diff -= 3;
+  }
+  return cur_diff;
+}
+
+static void GetBestGreenRedToBlue(
+    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
+    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
+    const int accumulated_blue_histo[256], const uint32_t* const argb,
+    VP8LMultipliers* const best_tx) {
+  float best_diff = MAX_DIFF_COST;
+  float cur_diff;
+  const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
+  const int min_green_to_blue = -32;
+  const int max_green_to_blue = 32;
+  const int min_red_to_blue = -32;
+  const int max_red_to_blue = 32;
+  const int num_iters =
+      (1 + (max_green_to_blue - min_green_to_blue) / step) *
+      (1 + (max_red_to_blue - min_red_to_blue) / step);
+  // Number of tries to get optimal green_to_blue & red_to_blue color transforms
+  // after finding a local minima.
+  const int max_tries_after_min = 4 + (num_iters >> 2);
+  int num_tries_after_min = 0;
+  int green_to_blue;
+  for (green_to_blue = min_green_to_blue;
+       green_to_blue <= max_green_to_blue &&
+       num_tries_after_min < max_tries_after_min;
+       green_to_blue += step) {
+    int red_to_blue;
+    for (red_to_blue = min_red_to_blue;
+         red_to_blue <= max_red_to_blue &&
+         num_tries_after_min < max_tries_after_min;
+         red_to_blue += step) {
+      cur_diff = GetPredictionCostCrossColorBlue(
+          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
+          prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);
+      if (cur_diff < best_diff) {
+        best_diff = cur_diff;
+        best_tx->green_to_blue_ = green_to_blue;
+        best_tx->red_to_blue_ = red_to_blue;
+        num_tries_after_min = 0;
+      } else {
+        ++num_tries_after_min;
+      }
+    }
+  }
+}
+
+static VP8LMultipliers GetBestColorTransformForTile(
+    int tile_x, int tile_y, int bits,
+    VP8LMultipliers prev_x,
+    VP8LMultipliers prev_y,
+    int quality, int xsize, int ysize,
+    const int accumulated_red_histo[256],
+    const int accumulated_blue_histo[256],
+    const uint32_t* const argb) {
+  const int max_tile_size = 1 << bits;
+  const int tile_y_offset = tile_y * max_tile_size;
+  const int tile_x_offset = tile_x * max_tile_size;
+  const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
+  const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
+  VP8LMultipliers best_tx;
+  MultipliersClear(&best_tx);
+
+  GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
+                    prev_x, prev_y, accumulated_red_histo, argb, &best_tx);
+  GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
+                        xsize, prev_x, prev_y, quality, accumulated_blue_histo,
+                        argb, &best_tx);
+  return best_tx;
+}
+
+static void CopyTileWithColorTransform(int xsize, int ysize,
+                                       int tile_x, int tile_y,
+                                       int max_tile_size,
+                                       VP8LMultipliers color_transform,
+                                       uint32_t* argb) {
+  const int xscan = GetMin(max_tile_size, xsize - tile_x);
+  int yscan = GetMin(max_tile_size, ysize - tile_y);
+  argb += tile_y * xsize + tile_x;
+  while (yscan-- > 0) {
+    VP8LTransformColor(&color_transform, argb, xscan);
+    argb += xsize;
+  }
+}
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
+                             uint32_t* const argb, uint32_t* image) {
+  const int max_tile_size = 1 << bits;
+  const int tile_xsize = VP8LSubSampleSize(width, bits);
+  const int tile_ysize = VP8LSubSampleSize(height, bits);
+  int accumulated_red_histo[256] = { 0 };
+  int accumulated_blue_histo[256] = { 0 };
+  int tile_x, tile_y;
+  VP8LMultipliers prev_x, prev_y;
+  MultipliersClear(&prev_y);
+  MultipliersClear(&prev_x);
+  for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+    for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+      int y;
+      const int tile_x_offset = tile_x * max_tile_size;
+      const int tile_y_offset = tile_y * max_tile_size;
+      const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
+      const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
+      const int offset = tile_y * tile_xsize + tile_x;
+      if (tile_y != 0) {
+        ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
+      }
+      prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
+                                            prev_x, prev_y,
+                                            quality, width, height,
+                                            accumulated_red_histo,
+                                            accumulated_blue_histo,
+                                            argb);
+      image[offset] = MultipliersToColorCode(&prev_x);
+      CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
+                                 max_tile_size, prev_x, argb);
+
+      // Gather accumulated histogram data.
+      for (y = tile_y_offset; y < all_y_max; ++y) {
+        int ix = y * width + tile_x_offset;
+        const int ix_end = ix + all_x_max - tile_x_offset;
+        for (; ix < ix_end; ++ix) {
+          const uint32_t pix = argb[ix];
+          if (ix >= 2 &&
+              pix == argb[ix - 2] &&
+              pix == argb[ix - 1]) {
+            continue;  // repeated pixels are handled by backward references
+          }
+          if (ix >= width + 2 &&
+              argb[ix - 2] == argb[ix - width - 2] &&
+              argb[ix - 1] == argb[ix - width - 1] &&
+              pix == argb[ix - width]) {
+            continue;  // repeated pixels are handled by backward references
+          }
+          ++accumulated_red_histo[(pix >> 16) & 0xff];
+          ++accumulated_blue_histo[(pix >> 0) & 0xff];
+        }
+      }
+    }
+  }
+}
+
+// Color space inverse transform.
+static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
+                                       int y_start, int y_end, uint32_t* data) {
+  const int width = transform->xsize_;
+  const int tile_width = 1 << transform->bits_;
+  const int mask = tile_width - 1;
+  const int safe_width = width & ~mask;
+  const int remaining_width = width - safe_width;
+  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+  int y = y_start;
+  const uint32_t* pred_row =
+      transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+  while (y < y_end) {
+    const uint32_t* pred = pred_row;
+    VP8LMultipliers m = { 0, 0, 0 };
+    const uint32_t* const data_safe_end = data + safe_width;
+    const uint32_t* const data_end = data + width;
+    while (data < data_safe_end) {
+      ColorCodeToMultipliers(*pred++, &m);
+      VP8LTransformColorInverse(&m, data, tile_width);
+      data += tile_width;
+    }
+    if (data < data_end) {  // Left-overs using C-version.
+      ColorCodeToMultipliers(*pred++, &m);
+      VP8LTransformColorInverse(&m, data, remaining_width);
+      data += remaining_width;
+    }
+    ++y;
+    if ((y & mask) == 0) pred_row += tiles_per_row;
+  }
+}
+
+// Separate out pixels packed together using pixel-bundling.
+// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
+#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)             \
+void FUNC_NAME(const VP8LTransform* const transform,                           \
+               int y_start, int y_end, const TYPE* src, TYPE* dst) {           \
+  int y;                                                                       \
+  const int bits_per_pixel = 8 >> transform->bits_;                            \
+  const int width = transform->xsize_;                                         \
+  const uint32_t* const color_map = transform->data_;                          \
+  if (bits_per_pixel < 8) {                                                    \
+    const int pixels_per_byte = 1 << transform->bits_;                         \
+    const int count_mask = pixels_per_byte - 1;                                \
+    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
+    for (y = y_start; y < y_end; ++y) {                                        \
+      uint32_t packed_pixels = 0;                                              \
+      int x;                                                                   \
+      for (x = 0; x < width; ++x) {                                            \
+        /* We need to load fresh 'packed_pixels' once every                */  \
+        /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
+        /* is a power of 2, so can just use a mask for that, instead of    */  \
+        /* decrementing a counter.                                         */  \
+        if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
+        *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
+        packed_pixels >>= bits_per_pixel;                                      \
+      }                                                                        \
+    }                                                                          \
+  } else {                                                                     \
+    for (y = y_start; y < y_end; ++y) {                                        \
+      int x;                                                                   \
+      for (x = 0; x < width; ++x) {                                            \
+        *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                      \
+      }                                                                        \
+    }                                                                          \
+  }                                                                            \
+}
+
+static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
+  return (idx >> 8) & 0xff;
+}
+
+static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
+  return idx;
+}
+
+static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
+  return val;
+}
+
+static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
+  return (val >> 8) & 0xff;
+}
+
+static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
+                           GetARGBValue)
+COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
+                    GetAlphaValue)
+
+#undef COLOR_INDEX_INVERSE
+
+void VP8LInverseTransform(const VP8LTransform* const transform,
+                          int row_start, int row_end,
+                          const uint32_t* const in, uint32_t* const out) {
+  const int width = transform->xsize_;
+  assert(row_start < row_end);
+  assert(row_end <= transform->ysize_);
+  switch (transform->type_) {
+    case SUBTRACT_GREEN:
+      VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
+      break;
+    case PREDICTOR_TRANSFORM:
+      PredictorInverseTransform(transform, row_start, row_end, out);
+      if (row_end != transform->ysize_) {
+        // The last predicted row in this iteration will be the top-pred row
+        // for the first row in next iteration.
+        memcpy(out - width, out + (row_end - row_start - 1) * width,
+               width * sizeof(*out));
+      }
+      break;
+    case CROSS_COLOR_TRANSFORM:
+      ColorSpaceInverseTransform(transform, row_start, row_end, out);
+      break;
+    case COLOR_INDEXING_TRANSFORM:
+      if (in == out && transform->bits_ > 0) {
+        // Move packed pixels to the end of unpacked region, so that unpacking
+        // can occur seamlessly.
+        // Also, note that this is the only transform that applies on
+        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
+        // transforms work on effective width of xsize_.
+        const int out_stride = (row_end - row_start) * width;
+        const int in_stride = (row_end - row_start) *
+            VP8LSubSampleSize(transform->xsize_, transform->bits_);
+        uint32_t* const src = out + out_stride - in_stride;
+        memmove(src, out, in_stride * sizeof(*src));
+        ColorIndexInverseTransform(transform, row_start, row_end, src, out);
+      } else {
+        ColorIndexInverseTransform(transform, row_start, row_end, in, out);
+      }
+      break;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Color space conversion.
+
+static int is_big_endian(void) {
+  static const union {
+    uint16_t w;
+    uint8_t b[2];
+  } tmp = { 1 };
+  return (tmp.b[0] != 1);
+}
+
+void VP8LConvertBGRAToRGB_C(const uint32_t* src,
+                            int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >> 16) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >>  0) & 0xff;
+  }
+}
+
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >> 16) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >>  0) & 0xff;
+    *dst++ = (argb >> 24) & 0xff;
+  }
+}
+
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
+                                 int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
+    const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
+#ifdef WEBP_SWAP_16BIT_CSP
+    *dst++ = ba;
+    *dst++ = rg;
+#else
+    *dst++ = rg;
+    *dst++ = ba;
+#endif
+  }
+}
+
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
+                               int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
+    const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
+#ifdef WEBP_SWAP_16BIT_CSP
+    *dst++ = gb;
+    *dst++ = rg;
+#else
+    *dst++ = rg;
+    *dst++ = gb;
+#endif
+  }
+}
+
+void VP8LConvertBGRAToBGR_C(const uint32_t* src,
+                            int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >>  0) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >> 16) & 0xff;
+  }
+}
+
+static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
+                       int swap_on_big_endian) {
+  if (is_big_endian() == swap_on_big_endian) {
+    const uint32_t* const src_end = src + num_pixels;
+    while (src < src_end) {
+      const uint32_t argb = *src++;
+
+#if !defined(WORDS_BIGENDIAN)
+#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
+      *(uint32_t*)dst = BSwap32(argb);
+#else  // WEBP_REFERENCE_IMPLEMENTATION
+      dst[0] = (argb >> 24) & 0xff;
+      dst[1] = (argb >> 16) & 0xff;
+      dst[2] = (argb >>  8) & 0xff;
+      dst[3] = (argb >>  0) & 0xff;
+#endif
+#else  // WORDS_BIGENDIAN
+      dst[0] = (argb >>  0) & 0xff;
+      dst[1] = (argb >>  8) & 0xff;
+      dst[2] = (argb >> 16) & 0xff;
+      dst[3] = (argb >> 24) & 0xff;
+#endif
+      dst += sizeof(argb);
+    }
+  } else {
+    memcpy(dst, src, num_pixels * sizeof(*src));
+  }
+}
+
+void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
+                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
+  switch (out_colorspace) {
+    case MODE_RGB:
+      VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
+      break;
+    case MODE_RGBA:
+      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
+      break;
+    case MODE_rgbA:
+      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
+      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+      break;
+    case MODE_BGR:
+      VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
+      break;
+    case MODE_BGRA:
+      CopyOrSwap(in_data, num_pixels, rgba, 1);
+      break;
+    case MODE_bgrA:
+      CopyOrSwap(in_data, num_pixels, rgba, 1);
+      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+      break;
+    case MODE_ARGB:
+      CopyOrSwap(in_data, num_pixels, rgba, 0);
+      break;
+    case MODE_Argb:
+      CopyOrSwap(in_data, num_pixels, rgba, 0);
+      WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
+      break;
+    case MODE_RGBA_4444:
+      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+      break;
+    case MODE_rgbA_4444:
+      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+      WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
+      break;
+    case MODE_RGB_565:
+      VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
+      break;
+    default:
+      assert(0);          // Code flow should not reach here.
+  }
+}
+
+//------------------------------------------------------------------------------
+// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
+void VP8LBundleColorMap(const uint8_t* const row, int width,
+                        int xbits, uint32_t* const dst) {
+  int x;
+  if (xbits > 0) {
+    const int bit_depth = 1 << (3 - xbits);
+    const int mask = (1 << xbits) - 1;
+    uint32_t code = 0xff000000;
+    for (x = 0; x < width; ++x) {
+      const int xsub = x & mask;
+      if (xsub == 0) {
+        code = 0xff000000;
+      }
+      code |= row[x] << (8 + bit_depth * xsub);
+      dst[x >> xbits] = code;
+    }
+  } else {
+    for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static double ExtraCost(const uint32_t* population, int length) {
+  int i;
+  double cost = 0.;
+  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
+  return cost;
+}
+
+static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
+                                int length) {
+  int i;
+  double cost = 0.;
+  for (i = 2; i < length - 2; ++i) {
+    const int xy = X[i + 2] + Y[i + 2];
+    cost += (i >> 1) * xy;
+  }
+  return cost;
+}
+
+// Returns the various RLE counts
+static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
+  int i;
+  int streak = 0;
+  VP8LStreaks stats;
+  memset(&stats, 0, sizeof(stats));
+  for (i = 0; i < length - 1; ++i) {
+    ++streak;
+    if (population[i] == population[i + 1]) {
+      continue;
+    }
+    stats.counts[population[i] != 0] += (streak > 3);
+    stats.streaks[population[i] != 0][(streak > 3)] += streak;
+    streak = 0;
+  }
+  ++streak;
+  stats.counts[population[i] != 0] += (streak > 3);
+  stats.streaks[population[i] != 0][(streak > 3)] += streak;
+  return stats;
+}
+
+static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
+                                            const uint32_t* Y, int length) {
+  int i;
+  int streak = 0;
+  VP8LStreaks stats;
+  memset(&stats, 0, sizeof(stats));
+  for (i = 0; i < length - 1; ++i) {
+    const int xy = X[i] + Y[i];
+    const int xy_next = X[i + 1] + Y[i + 1];
+    ++streak;
+    if (xy == xy_next) {
+      continue;
+    }
+    stats.counts[xy != 0] += (streak > 3);
+    stats.streaks[xy != 0][(streak > 3)] += streak;
+    streak = 0;
+  }
+  {
+    const int xy = X[i] + Y[i];
+    ++streak;
+    stats.counts[xy != 0] += (streak > 3);
+    stats.streaks[xy != 0][(streak > 3)] += streak;
+  }
+  return stats;
+}
+
+//------------------------------------------------------------------------------
+
+static void HistogramAdd(const VP8LHistogram* const a,
+                         const VP8LHistogram* const b,
+                         VP8LHistogram* const out) {
+  int i;
+  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
+  assert(a->palette_code_bits_ == b->palette_code_bits_);
+  if (b != out) {
+    for (i = 0; i < literal_size; ++i) {
+      out->literal_[i] = a->literal_[i] + b->literal_[i];
+    }
+    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+      out->distance_[i] = a->distance_[i] + b->distance_[i];
+    }
+    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
+      out->red_[i] = a->red_[i] + b->red_[i];
+      out->blue_[i] = a->blue_[i] + b->blue_[i];
+      out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
+    }
+  } else {
+    for (i = 0; i < literal_size; ++i) {
+      out->literal_[i] += a->literal_[i];
+    }
+    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+      out->distance_[i] += a->distance_[i];
+    }
+    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
+      out->red_[i] += a->red_[i];
+      out->blue_[i] += a->blue_[i];
+      out->alpha_[i] += a->alpha_[i];
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+VP8LPredictorFunc VP8LPredictors[16];
+
+VP8LTransformColorFunc VP8LTransformColor;
+VP8LTransformColorFunc VP8LTransformColorInverse;
+
+VP8LConvertFunc VP8LConvertBGRAToRGB;
+VP8LConvertFunc VP8LConvertBGRAToRGBA;
+VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+VP8LConvertFunc VP8LConvertBGRAToRGB565;
+VP8LConvertFunc VP8LConvertBGRAToBGR;
+
+VP8LFastLog2SlowFunc VP8LFastLog2Slow;
+VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+
+VP8LCostFunc VP8LExtraCost;
+VP8LCostCombinedFunc VP8LExtraCostCombined;
+
+VP8LCostCountFunc VP8LHuffmanCostCount;
+VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
+
+VP8LHistogramAddFunc VP8LHistogramAdd;
+
+extern void VP8LDspInitSSE2(void);
+extern void VP8LDspInitNEON(void);
+extern void VP8LDspInitMIPS32(void);
+
+void VP8LDspInit(void) {
+  memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
+
+  VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
+  VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
+
+  VP8LTransformColor = VP8LTransformColor_C;
+  VP8LTransformColorInverse = VP8LTransformColorInverse_C;
+
+  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
+  VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
+  VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
+  VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
+  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
+
+  VP8LFastLog2Slow = FastLog2Slow;
+  VP8LFastSLog2Slow = FastSLog2Slow;
+
+  VP8LExtraCost = ExtraCost;
+  VP8LExtraCostCombined = ExtraCostCombined;
+
+  VP8LHuffmanCostCount = HuffmanCostCount;
+  VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
+
+  VP8LHistogramAdd = HistogramAdd;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8LDspInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8LDspInitNEON();
+    }
+#endif
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8LDspInitMIPS32();
+    }
+#endif
+  }
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/dsp/lossless.h b/src/main/jni/libwebp/dsp/lossless.h
new file mode 100644
index 000000000..8c7551c9c
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/lossless.h
@@ -0,0 +1,249 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+
+#ifndef WEBP_DSP_LOSSLESS_H_
+#define WEBP_DSP_LOSSLESS_H_
+
+#include "../webp/types.h"
+#include "../webp/decode.h"
+
+#include "../enc/histogram.h"
+#include "../utils/utils.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Signatures and generic function-pointers
+
+typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
+extern VP8LPredictorFunc VP8LPredictors[16];
+
+typedef void (*VP8LProcessBlueAndRedFunc)(uint32_t* argb_data, int num_pixels);
+extern VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+extern VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+
+typedef struct {
+  // Note: the members are uint8_t, so that any negative values are
+  // automatically converted to "mod 256" values.
+  uint8_t green_to_red_;
+  uint8_t green_to_blue_;
+  uint8_t red_to_blue_;
+} VP8LMultipliers;
+typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
+                                       uint32_t* argb_data, int num_pixels);
+extern VP8LTransformColorFunc VP8LTransformColor;
+extern VP8LTransformColorFunc VP8LTransformColorInverse;
+
+typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
+                                uint8_t* dst);
+extern VP8LConvertFunc VP8LConvertBGRAToRGB;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
+extern VP8LConvertFunc VP8LConvertBGRAToBGR;
+
+// Expose some C-only fallback functions
+void VP8LTransformColor_C(const VP8LMultipliers* const m,
+                          uint32_t* data, int num_pixels);
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
+                                 uint32_t* data, int num_pixels);
+
+void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
+                                 int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
+                               int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
+void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels);
+
+// Must be called before calling any of the above methods.
+void VP8LDspInit(void);
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+struct VP8LTransform;  // Defined in dec/vp8li.h.
+
+// Performs inverse transform of data given transform information, start and end
+// rows. Transform will be applied to rows [row_start, row_end[.
+// The *in and *out pointers refer to source and destination data respectively
+// corresponding to the intermediate row (row_start).
+void VP8LInverseTransform(const struct VP8LTransform* const transform,
+                          int row_start, int row_end,
+                          const uint32_t* const in, uint32_t* const out);
+
+// Similar to the static method ColorIndexInverseTransform() that is part of
+// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than
+// uint32_t) arguments for 'src' and 'dst'.
+void VP8LColorIndexInverseTransformAlpha(
+    const struct VP8LTransform* const transform, int y_start, int y_end,
+    const uint8_t* src, uint8_t* dst);
+
+void VP8LResidualImage(int width, int height, int bits,
+                       uint32_t* const argb, uint32_t* const argb_scratch,
+                       uint32_t* const image);
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
+                             uint32_t* const argb, uint32_t* image);
+
+//------------------------------------------------------------------------------
+// Color space conversion.
+
+// Converts from BGRA to other color spaces.
+void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
+                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba);
+
+//------------------------------------------------------------------------------
+// Misc methods.
+
+// Computes sampled size of 'size' when sampling using 'sampling bits'.
+static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
+                                              uint32_t sampling_bits) {
+  return (size + (1 << sampling_bits) - 1) >> sampling_bits;
+}
+
+// -----------------------------------------------------------------------------
+// Faster logarithm for integers. Small values use a look-up table.
+#define LOG_LOOKUP_IDX_MAX 256
+extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
+extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
+typedef float (*VP8LFastLog2SlowFunc)(uint32_t v);
+
+extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
+extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+
+static WEBP_INLINE float VP8LFastLog2(uint32_t v) {
+  return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
+}
+// Fast calculation of v * log2(v) for integer input.
+static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
+  return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
+}
+
+// -----------------------------------------------------------------------------
+// Huffman-cost related functions.
+
+typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
+                                       int length);
+
+extern VP8LCostFunc VP8LExtraCost;
+extern VP8LCostCombinedFunc VP8LExtraCostCombined;
+
+typedef struct {        // small struct to hold counters
+  int counts[2];        // index: 0=zero steak, 1=non-zero streak
+  int streaks[2][2];    // [zero/non-zero][streak<3 / streak>=3]
+} VP8LStreaks;
+
+typedef VP8LStreaks (*VP8LCostCountFunc)(const uint32_t* population,
+                                         int length);
+typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const uint32_t* X,
+                                                 const uint32_t* Y, int length);
+
+extern VP8LCostCountFunc VP8LHuffmanCostCount;
+extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
+
+typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
+                                     const VP8LHistogram* const b,
+                                     VP8LHistogram* const out);
+extern VP8LHistogramAddFunc VP8LHistogramAdd;
+
+// -----------------------------------------------------------------------------
+// PrefixEncode()
+
+static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
+  const int log_floor = BitsLog2Floor(n);
+  if (n == (n & ~(n - 1)))  // zero or a power of two.
+    return log_floor;
+  else
+    return log_floor + 1;
+}
+
+// Splitting of distance and length codes into prefixes and
+// extra bits. The prefixes are encoded with an entropy code
+// while the extra bits are stored just as normal bits.
+static WEBP_INLINE void VP8LPrefixEncodeBitsNoLUT(int distance, int* const code,
+                                                  int* const extra_bits) {
+  const int highest_bit = BitsLog2Floor(--distance);
+  const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
+  *extra_bits = highest_bit - 1;
+  *code = 2 * highest_bit + second_highest_bit;
+}
+
+static WEBP_INLINE void VP8LPrefixEncodeNoLUT(int distance, int* const code,
+                                              int* const extra_bits,
+                                              int* const extra_bits_value) {
+  const int highest_bit = BitsLog2Floor(--distance);
+  const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
+  *extra_bits = highest_bit - 1;
+  *extra_bits_value = distance & ((1 << *extra_bits) - 1);
+  *code = 2 * highest_bit + second_highest_bit;
+}
+
+#define PREFIX_LOOKUP_IDX_MAX   512
+typedef struct {
+  int8_t code_;
+  int8_t extra_bits_;
+} VP8LPrefixCode;
+
+// These tables are derived using VP8LPrefixEncodeNoLUT.
+extern const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX];
+extern const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX];
+static WEBP_INLINE void VP8LPrefixEncodeBits(int distance, int* const code,
+                                             int* const extra_bits) {
+  if (distance < PREFIX_LOOKUP_IDX_MAX) {
+    const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
+    *code = prefix_code.code_;
+    *extra_bits = prefix_code.extra_bits_;
+  } else {
+    VP8LPrefixEncodeBitsNoLUT(distance, code, extra_bits);
+  }
+}
+
+static WEBP_INLINE void VP8LPrefixEncode(int distance, int* const code,
+                                         int* const extra_bits,
+                                         int* const extra_bits_value) {
+  if (distance < PREFIX_LOOKUP_IDX_MAX) {
+    const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
+    *code = prefix_code.code_;
+    *extra_bits = prefix_code.extra_bits_;
+    *extra_bits_value = kPrefixEncodeExtraBitsValue[distance];
+  } else {
+    VP8LPrefixEncodeNoLUT(distance, code, extra_bits, extra_bits_value);
+  }
+}
+
+// In-place difference of each component with mod 256.
+static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
+  const uint32_t alpha_and_green =
+      0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);
+  const uint32_t red_and_blue =
+      0xff00ff00u + (a & 0x00ff00ffu) - (b & 0x00ff00ffu);
+  return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+void VP8LBundleColorMap(const uint8_t* const row, int width,
+                        int xbits, uint32_t* const dst);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  // WEBP_DSP_LOSSLESS_H_
diff --git a/src/main/jni/libwebp/dsp/lossless_mips32.c b/src/main/jni/libwebp/dsp/lossless_mips32.c
new file mode 100644
index 000000000..130858079
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/lossless_mips32.c
@@ -0,0 +1,416 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of lossless functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+#include "./lossless.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define APPROX_LOG_WITH_CORRECTION_MAX  65536
+#define APPROX_LOG_MAX                   4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+
+static float FastSLog2Slow(uint32_t v) {
+  assert(v >= LOG_LOOKUP_IDX_MAX);
+  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    uint32_t log_cnt, y, correction;
+    const int c24 = 24;
+    const float v_f = (float)v;
+    uint32_t temp;
+
+    // Xf = 256 = 2^8
+    // log_cnt is index of leading one in upper 24 bits
+    __asm__ volatile(
+      "clz      %[log_cnt], %[v]                      \n\t"
+      "addiu    %[y],       $zero,        1           \n\t"
+      "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t"
+      "sllv     %[y],       %[y],         %[log_cnt]  \n\t"
+      "srlv     %[temp],    %[v],         %[log_cnt]  \n\t"
+      : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),
+        [temp]"=r"(temp)
+      : [c24]"r"(c24), [v]"r"(v)
+    );
+
+    // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
+    // Xf = floor(Xf) * (1 + (v % y) / v)
+    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
+    // The correction factor: log(1 + d) ~ d; for very small d values, so
+    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
+    // LOG_2_RECIPROCAL ~ 23/16
+
+    // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
+    correction = (23 * (v & (y - 1))) >> 4;
+    return v_f * (kLog2Table[temp] + log_cnt) + correction;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * v * log((double)v));
+  }
+}
+
+static float FastLog2Slow(uint32_t v) {
+  assert(v >= LOG_LOOKUP_IDX_MAX);
+  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    uint32_t log_cnt, y;
+    const int c24 = 24;
+    double log_2;
+    uint32_t temp;
+
+    __asm__ volatile(
+      "clz      %[log_cnt], %[v]                      \n\t"
+      "addiu    %[y],       $zero,        1           \n\t"
+      "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t"
+      "sllv     %[y],       %[y],         %[log_cnt]  \n\t"
+      "srlv     %[temp],    %[v],         %[log_cnt]  \n\t"
+      : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),
+        [temp]"=r"(temp)
+      : [c24]"r"(c24), [v]"r"(v)
+    );
+
+    log_2 = kLog2Table[temp] + log_cnt;
+    if (v >= APPROX_LOG_MAX) {
+      // Since the division is still expensive, add this correction factor only
+      // for large values of 'v'.
+
+      const uint32_t correction = (23 * (v & (y - 1))) >> 4;
+      log_2 += (double)correction / v;
+    }
+    return (float)log_2;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * log((double)v));
+  }
+}
+
+// C version of this function:
+//   int i = 0;
+//   int64_t cost = 0;
+//   const uint32_t* pop = &population[4];
+//   const uint32_t* LoopEnd = &population[length];
+//   while (pop != LoopEnd) {
+//     ++i;
+//     cost += i * *pop;
+//     cost += i * *(pop + 1);
+//     pop += 2;
+//   }
+//   return (double)cost;
+static double ExtraCost(const uint32_t* const population, int length) {
+  int i, temp0, temp1;
+  const uint32_t* pop = &population[4];
+  const uint32_t* const LoopEnd = &population[length];
+
+  __asm__ volatile(
+    "mult   $zero,    $zero                  \n\t"
+    "xor    %[i],     %[i],       %[i]       \n\t"
+    "beq    %[pop],   %[LoopEnd], 2f         \n\t"
+  "1:                                        \n\t"
+    "lw     %[temp0], 0(%[pop])              \n\t"
+    "lw     %[temp1], 4(%[pop])              \n\t"
+    "addiu  %[i],     %[i],       1          \n\t"
+    "addiu  %[pop],   %[pop],     8          \n\t"
+    "madd   %[i],     %[temp0]               \n\t"
+    "madd   %[i],     %[temp1]               \n\t"
+    "bne    %[pop],   %[LoopEnd], 1b         \n\t"
+  "2:                                        \n\t"
+    "mfhi   %[temp0]                         \n\t"
+    "mflo   %[temp1]                         \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      [i]"=&r"(i), [pop]"+r"(pop)
+    : [LoopEnd]"r"(LoopEnd)
+    : "memory", "hi", "lo"
+  );
+
+  return (double)((int64_t)temp0 << 32 | temp1);
+}
+
+// C version of this function:
+//   int i = 0;
+//   int64_t cost = 0;
+//   const uint32_t* pX = &X[4];
+//   const uint32_t* pY = &Y[4];
+//   const uint32_t* LoopEnd = &X[length];
+//   while (pX != LoopEnd) {
+//     const uint32_t xy0 = *pX + *pY;
+//     const uint32_t xy1 = *(pX + 1) + *(pY + 1);
+//     ++i;
+//     cost += i * xy0;
+//     cost += i * xy1;
+//     pX += 2;
+//     pY += 2;
+//   }
+//   return (double)cost;
+static double ExtraCostCombined(const uint32_t* const X,
+                                const uint32_t* const Y, int length) {
+  int i, temp0, temp1, temp2, temp3;
+  const uint32_t* pX = &X[4];
+  const uint32_t* pY = &Y[4];
+  const uint32_t* const LoopEnd = &X[length];
+
+  __asm__ volatile(
+    "mult   $zero,    $zero                  \n\t"
+    "xor    %[i],     %[i],       %[i]       \n\t"
+    "beq    %[pX],    %[LoopEnd], 2f         \n\t"
+  "1:                                        \n\t"
+    "lw     %[temp0], 0(%[pX])               \n\t"
+    "lw     %[temp1], 0(%[pY])               \n\t"
+    "lw     %[temp2], 4(%[pX])               \n\t"
+    "lw     %[temp3], 4(%[pY])               \n\t"
+    "addiu  %[i],     %[i],       1          \n\t"
+    "addu   %[temp0], %[temp0],   %[temp1]   \n\t"
+    "addu   %[temp2], %[temp2],   %[temp3]   \n\t"
+    "addiu  %[pX],    %[pX],      8          \n\t"
+    "addiu  %[pY],    %[pY],      8          \n\t"
+    "madd   %[i],     %[temp0]               \n\t"
+    "madd   %[i],     %[temp2]               \n\t"
+    "bne    %[pX],    %[LoopEnd], 1b         \n\t"
+  "2:                                        \n\t"
+    "mfhi   %[temp0]                         \n\t"
+    "mflo   %[temp1]                         \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+      [i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
+    : [LoopEnd]"r"(LoopEnd)
+    : "memory", "hi", "lo"
+  );
+
+  return (double)((int64_t)temp0 << 32 | temp1);
+}
+
+#define HUFFMAN_COST_PASS                                 \
+  __asm__ volatile(                                       \
+    "sll   %[temp1],  %[temp0],    3           \n\t"      \
+    "addiu %[temp3],  %[streak],   -3          \n\t"      \
+    "addu  %[temp2],  %[pstreaks], %[temp1]    \n\t"      \
+    "blez  %[temp3],  1f                       \n\t"      \
+    "srl   %[temp1],  %[temp1],    1           \n\t"      \
+    "addu  %[temp3],  %[pcnts],    %[temp1]    \n\t"      \
+    "lw    %[temp0],  4(%[temp2])              \n\t"      \
+    "lw    %[temp1],  0(%[temp3])              \n\t"      \
+    "addu  %[temp0],  %[temp0],    %[streak]   \n\t"      \
+    "addiu %[temp1],  %[temp1],    1           \n\t"      \
+    "sw    %[temp0],  4(%[temp2])              \n\t"      \
+    "sw    %[temp1],  0(%[temp3])              \n\t"      \
+    "b     2f                                  \n\t"      \
+  "1:                                          \n\t"      \
+    "lw    %[temp0],  0(%[temp2])              \n\t"      \
+    "addu  %[temp0],  %[temp0],    %[streak]   \n\t"      \
+    "sw    %[temp0],  0(%[temp2])              \n\t"      \
+  "2:                                          \n\t"      \
+    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),           \
+      [temp3]"=&r"(temp3), [temp0]"+r"(temp0)             \
+    : [pstreaks]"r"(pstreaks), [pcnts]"r"(pcnts),         \
+      [streak]"r"(streak)                                 \
+    : "memory"                                            \
+  );
+
+// Returns the various RLE counts
+static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
+  int i;
+  int streak = 0;
+  VP8LStreaks stats;
+  int* const pstreaks = &stats.streaks[0][0];
+  int* const pcnts = &stats.counts[0];
+  int temp0, temp1, temp2, temp3;
+  memset(&stats, 0, sizeof(stats));
+  for (i = 0; i < length - 1; ++i) {
+    ++streak;
+    if (population[i] == population[i + 1]) {
+      continue;
+    }
+    temp0 = (population[i] != 0);
+    HUFFMAN_COST_PASS
+    streak = 0;
+  }
+  ++streak;
+  temp0 = (population[i] != 0);
+  HUFFMAN_COST_PASS
+
+  return stats;
+}
+
+static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
+                                            const uint32_t* Y, int length) {
+  int i;
+  int streak = 0;
+  VP8LStreaks stats;
+  int* const pstreaks = &stats.streaks[0][0];
+  int* const pcnts = &stats.counts[0];
+  int temp0, temp1, temp2, temp3;
+  memset(&stats, 0, sizeof(stats));
+  for (i = 0; i < length - 1; ++i) {
+    const uint32_t xy = X[i] + Y[i];
+    const uint32_t xy_next = X[i + 1] + Y[i + 1];
+    ++streak;
+    if (xy == xy_next) {
+      continue;
+    }
+    temp0 = (xy != 0);
+    HUFFMAN_COST_PASS
+    streak = 0;
+  }
+  {
+    const uint32_t xy = X[i] + Y[i];
+    ++streak;
+    temp0 = (xy != 0);
+    HUFFMAN_COST_PASS
+  }
+
+  return stats;
+}
+
+#define ASM_START                                       \
+  __asm__ volatile(                                     \
+    ".set   push                            \n\t"       \
+    ".set   at                              \n\t"       \
+    ".set   macro                           \n\t"       \
+  "1:                                       \n\t"
+
+// P2 = P0 + P1
+// A..D - offsets
+// E - temp variable to tell macro
+//     if pointer should be incremented
+// literal_ and successive histograms could be unaligned
+// so we must use ulw and usw
+#define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2)           \
+    "ulw    %[temp0], "#A"(%["#P0"])        \n\t"       \
+    "ulw    %[temp1], "#B"(%["#P0"])        \n\t"       \
+    "ulw    %[temp2], "#C"(%["#P0"])        \n\t"       \
+    "ulw    %[temp3], "#D"(%["#P0"])        \n\t"       \
+    "ulw    %[temp4], "#A"(%["#P1"])        \n\t"       \
+    "ulw    %[temp5], "#B"(%["#P1"])        \n\t"       \
+    "ulw    %[temp6], "#C"(%["#P1"])        \n\t"       \
+    "ulw    %[temp7], "#D"(%["#P1"])        \n\t"       \
+    "addu   %[temp4], %[temp4],   %[temp0]  \n\t"       \
+    "addu   %[temp5], %[temp5],   %[temp1]  \n\t"       \
+    "addu   %[temp6], %[temp6],   %[temp2]  \n\t"       \
+    "addu   %[temp7], %[temp7],   %[temp3]  \n\t"       \
+    "addiu  %["#P0"],  %["#P0"],  16        \n\t"       \
+  ".if "#E" == 1                            \n\t"       \
+    "addiu  %["#P1"],  %["#P1"],  16        \n\t"       \
+  ".endif                                   \n\t"       \
+    "usw    %[temp4], "#A"(%["#P2"])        \n\t"       \
+    "usw    %[temp5], "#B"(%["#P2"])        \n\t"       \
+    "usw    %[temp6], "#C"(%["#P2"])        \n\t"       \
+    "usw    %[temp7], "#D"(%["#P2"])        \n\t"       \
+    "addiu  %["#P2"], %["#P2"],   16        \n\t"       \
+    "bne    %["#P0"], %[LoopEnd], 1b        \n\t"       \
+    ".set   pop                             \n\t"       \
+
+#define ASM_END_COMMON_0                                \
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),         \
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),         \
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),         \
+      [pa]"+r"(pa), [pout]"+r"(pout)
+
+#define ASM_END_COMMON_1                                \
+    : [LoopEnd]"r"(LoopEnd)                             \
+    : "memory", "at"                                    \
+  );
+
+#define ASM_END_0                                       \
+    ASM_END_COMMON_0                                    \
+      , [pb]"+r"(pb)                                    \
+    ASM_END_COMMON_1
+
+#define ASM_END_1                                       \
+    ASM_END_COMMON_0                                    \
+    ASM_END_COMMON_1
+
+#define ADD_VECTOR(A, B, OUT, SIZE, EXTRA_SIZE)  do {   \
+  const uint32_t* pa = (const uint32_t*)(A);            \
+  const uint32_t* pb = (const uint32_t*)(B);            \
+  uint32_t* pout = (uint32_t*)(OUT);                    \
+  const uint32_t* const LoopEnd = pa + (SIZE);          \
+  assert((SIZE) % 4 == 0);                              \
+  ASM_START                                             \
+  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)              \
+  ASM_END_0                                             \
+  if ((EXTRA_SIZE) > 0) {                               \
+    const int last = (EXTRA_SIZE);                      \
+    int i;                                              \
+    for (i = 0; i < last; ++i) pout[i] = pa[i] + pb[i]; \
+  }                                                     \
+} while (0)
+
+#define ADD_VECTOR_EQ(A, OUT, SIZE, EXTRA_SIZE)  do {   \
+  const uint32_t* pa = (const uint32_t*)(A);            \
+  uint32_t* pout = (uint32_t*)(OUT);                    \
+  const uint32_t* const LoopEnd = pa + (SIZE);          \
+  assert((SIZE) % 4 == 0);                              \
+  ASM_START                                             \
+  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)            \
+  ASM_END_1                                             \
+  if ((EXTRA_SIZE) > 0) {                               \
+    const int last = (EXTRA_SIZE);                      \
+    int i;                                              \
+    for (i = 0; i < last; ++i) pout[i] += pa[i];        \
+  }                                                     \
+} while (0)
+
+static void HistogramAdd(const VP8LHistogram* const a,
+                         const VP8LHistogram* const b,
+                         VP8LHistogram* const out) {
+  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+  const int extra_cache_size = VP8LHistogramNumCodes(a->palette_code_bits_)
+                             - (NUM_LITERAL_CODES + NUM_LENGTH_CODES);
+  assert(a->palette_code_bits_ == b->palette_code_bits_);
+
+  if (b != out) {
+    ADD_VECTOR(a->literal_, b->literal_, out->literal_,
+               NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
+    ADD_VECTOR(a->distance_, b->distance_, out->distance_,
+               NUM_DISTANCE_CODES, 0);
+    ADD_VECTOR(a->red_, b->red_, out->red_, NUM_LITERAL_CODES, 0);
+    ADD_VECTOR(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES, 0);
+    ADD_VECTOR(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
+  } else {
+    ADD_VECTOR_EQ(a->literal_, out->literal_,
+                  NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
+    ADD_VECTOR_EQ(a->distance_, out->distance_, NUM_DISTANCE_CODES, 0);
+    ADD_VECTOR_EQ(a->red_, out->red_, NUM_LITERAL_CODES, 0);
+    ADD_VECTOR_EQ(a->blue_, out->blue_, NUM_LITERAL_CODES, 0);
+    ADD_VECTOR_EQ(a->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
+  }
+}
+
+#undef ADD_VECTOR_EQ
+#undef ADD_VECTOR
+#undef ASM_END_1
+#undef ASM_END_0
+#undef ASM_END_COMMON_1
+#undef ASM_END_COMMON_0
+#undef ADD_TO_OUT
+#undef ASM_START
+
+#endif  // WEBP_USE_MIPS32
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LDspInitMIPS32(void);
+
+void VP8LDspInitMIPS32(void) {
+#if defined(WEBP_USE_MIPS32)
+  VP8LFastSLog2Slow = FastSLog2Slow;
+  VP8LFastLog2Slow = FastLog2Slow;
+  VP8LExtraCost = ExtraCost;
+  VP8LExtraCostCombined = ExtraCostCombined;
+  VP8LHuffmanCostCount = HuffmanCostCount;
+  VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
+  VP8LHistogramAdd = HistogramAdd;
+#endif  // WEBP_USE_MIPS32
+}
diff --git a/src/main/jni/libwebp/dsp/lossless_neon.c b/src/main/jni/libwebp/dsp/lossless_neon.c
new file mode 100644
index 000000000..987767b54
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/lossless_neon.c
@@ -0,0 +1,332 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON variant of methods for lossless decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <arm_neon.h>
+
+#include "./lossless.h"
+#include "./neon.h"
+
+//------------------------------------------------------------------------------
+// Colorspace conversion functions
+
+#if !defined(WORK_AROUND_GCC)
+// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
+// gcc-4.8.x at least.
+static void ConvertBGRAToRGBA(const uint32_t* src,
+                              int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~15);
+  for (; src < end; src += 16) {
+    uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+    // swap B and R. (VSWP d0,d2 has no intrinsics equivalent!)
+    const uint8x16_t tmp = pixel.val[0];
+    pixel.val[0] = pixel.val[2];
+    pixel.val[2] = tmp;
+    vst4q_u8(dst, pixel);
+    dst += 64;
+  }
+  VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst);  // left-overs
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~15);
+  for (; src < end; src += 16) {
+    const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+    const uint8x16x3_t tmp = { { pixel.val[0], pixel.val[1], pixel.val[2] } };
+    vst3q_u8(dst, tmp);
+    dst += 48;
+  }
+  VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst);  // left-overs
+}
+
+static void ConvertBGRAToRGB(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~15);
+  for (; src < end; src += 16) {
+    const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+    const uint8x16x3_t tmp = { { pixel.val[2], pixel.val[1], pixel.val[0] } };
+    vst3q_u8(dst, tmp);
+    dst += 48;
+  }
+  VP8LConvertBGRAToRGB_C(src, num_pixels & 15, dst);  // left-overs
+}
+
+#else  // WORK_AROUND_GCC
+
+// gcc-4.6.0 fallback
+
+static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
+
+static void ConvertBGRAToRGBA(const uint32_t* src,
+                              int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~1);
+  const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
+  for (; src < end; src += 2) {
+    const uint8x8_t pixels = vld1_u8((uint8_t*)src);
+    vst1_u8(dst, vtbl1_u8(pixels, shuffle));
+    dst += 8;
+  }
+  VP8LConvertBGRAToRGBA_C(src, num_pixels & 1, dst);  // left-overs
+}
+
+static const uint8_t kBGRShuffle[3][8] = {
+  {  0,  1,  2,  4,  5,  6,  8,  9 },
+  { 10, 12, 13, 14, 16, 17, 18, 20 },
+  { 21, 22, 24, 25, 26, 28, 29, 30 }
+};
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~7);
+  const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
+  const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
+  const uint8x8_t shuffle2 = vld1_u8(kBGRShuffle[2]);
+  for (; src < end; src += 8) {
+    uint8x8x4_t pixels;
+    INIT_VECTOR4(pixels,
+                 vld1_u8((const uint8_t*)(src + 0)),
+                 vld1_u8((const uint8_t*)(src + 2)),
+                 vld1_u8((const uint8_t*)(src + 4)),
+                 vld1_u8((const uint8_t*)(src + 6)));
+    vst1_u8(dst +  0, vtbl4_u8(pixels, shuffle0));
+    vst1_u8(dst +  8, vtbl4_u8(pixels, shuffle1));
+    vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));
+    dst += 8 * 3;
+  }
+  VP8LConvertBGRAToBGR_C(src, num_pixels & 7, dst);  // left-overs
+}
+
+static const uint8_t kRGBShuffle[3][8] = {
+  {  2,  1,  0,  6,  5,  4, 10,  9 },
+  {  8, 14, 13, 12, 18, 17, 16, 22 },
+  { 21, 20, 26, 25, 24, 30, 29, 28 }
+};
+
+static void ConvertBGRAToRGB(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~7);
+  const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
+  const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
+  const uint8x8_t shuffle2 = vld1_u8(kRGBShuffle[2]);
+  for (; src < end; src += 8) {
+    uint8x8x4_t pixels;
+    INIT_VECTOR4(pixels,
+                 vld1_u8((const uint8_t*)(src + 0)),
+                 vld1_u8((const uint8_t*)(src + 2)),
+                 vld1_u8((const uint8_t*)(src + 4)),
+                 vld1_u8((const uint8_t*)(src + 6)));
+    vst1_u8(dst +  0, vtbl4_u8(pixels, shuffle0));
+    vst1_u8(dst +  8, vtbl4_u8(pixels, shuffle1));
+    vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));
+    dst += 8 * 3;
+  }
+  VP8LConvertBGRAToRGB_C(src, num_pixels & 7, dst);  // left-overs
+}
+
+#endif   // !WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+
+#ifdef USE_INTRINSICS
+
+static WEBP_INLINE uint32_t Average2(const uint32_t* const a,
+                                     const uint32_t* const b) {
+  const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));
+  const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));
+  const uint8x8_t avg = vhadd_u8(a0, b0);
+  return vget_lane_u32(vreinterpret_u32_u8(avg), 0);
+}
+
+static WEBP_INLINE uint32_t Average3(const uint32_t* const a,
+                                     const uint32_t* const b,
+                                     const uint32_t* const c) {
+  const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));
+  const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));
+  const uint8x8_t c0 = vreinterpret_u8_u64(vcreate_u64(*c));
+  const uint8x8_t avg1 = vhadd_u8(a0, c0);
+  const uint8x8_t avg2 = vhadd_u8(avg1, b0);
+  return vget_lane_u32(vreinterpret_u32_u8(avg2), 0);
+}
+
+static WEBP_INLINE uint32_t Average4(const uint32_t* const a,
+                                     const uint32_t* const b,
+                                     const uint32_t* const c,
+                                     const uint32_t* const d) {
+  const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a));
+  const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b));
+  const uint8x8_t c0 = vreinterpret_u8_u64(vcreate_u64(*c));
+  const uint8x8_t d0 = vreinterpret_u8_u64(vcreate_u64(*d));
+  const uint8x8_t avg1 = vhadd_u8(a0, b0);
+  const uint8x8_t avg2 = vhadd_u8(c0, d0);
+  const uint8x8_t avg3 = vhadd_u8(avg1, avg2);
+  return vget_lane_u32(vreinterpret_u32_u8(avg3), 0);
+}
+
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+  return Average3(&left, top + 0, top + 1);
+}
+
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+  return Average2(&left, top - 1);
+}
+
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+  return Average2(&left, top + 0);
+}
+
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return Average2(top - 1, top + 0);
+}
+
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return Average2(top + 0, top + 1);
+}
+
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+  return Average4(&left, top - 1, top + 0, top + 1);
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE uint32_t Select(const uint32_t* const c0,
+                                   const uint32_t* const c1,
+                                   const uint32_t* const c2) {
+  const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));
+  const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));
+  const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));
+  const uint8x8_t bc = vabd_u8(p1, p2);   // |b-c|
+  const uint8x8_t ac = vabd_u8(p0, p2);   // |a-c|
+  const int16x4_t sum_bc = vreinterpret_s16_u16(vpaddl_u8(bc));
+  const int16x4_t sum_ac = vreinterpret_s16_u16(vpaddl_u8(ac));
+  const int32x2_t diff = vpaddl_s16(vsub_s16(sum_bc, sum_ac));
+  const int32_t pa_minus_pb = vget_lane_s32(diff, 0);
+  return (pa_minus_pb <= 0) ? *c0 : *c1;
+}
+
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+  return Select(top + 0, &left, top - 1);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(const uint32_t* const c0,
+                                                   const uint32_t* const c1,
+                                                   const uint32_t* const c2) {
+  const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));
+  const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));
+  const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));
+  const uint16x8_t sum0 = vaddl_u8(p0, p1);                // add and widen
+  const uint16x8_t sum1 = vqsubq_u16(sum0, vmovl_u8(p2));  // widen and subtract
+  const uint8x8_t out = vqmovn_u16(sum1);                  // narrow and clamp
+  return vget_lane_u32(vreinterpret_u32_u8(out), 0);
+}
+
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+  return ClampedAddSubtractFull(&left, top + 0, top - 1);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(const uint32_t* const c0,
+                                                   const uint32_t* const c1,
+                                                   const uint32_t* const c2) {
+  const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));
+  const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));
+  const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));
+  const uint8x8_t avg = vhadd_u8(p0, p1);                  // Average(c0,c1)
+  const uint8x8_t ab = vshr_n_u8(vqsub_u8(avg, p2), 1);    // (a-b)>>1 saturated
+  const uint8x8_t ba = vshr_n_u8(vqsub_u8(p2, avg), 1);    // (b-a)>>1 saturated
+  const uint8x8_t out = vqsub_u8(vqadd_u8(avg, ab), ba);
+  return vget_lane_u32(vreinterpret_u32_u8(out), 0);
+}
+
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+  return ClampedAddSubtractHalf(&left, top + 0, top - 1);
+}
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+// vtbl? are unavailable in iOS/arm64 builds.
+#if !defined(__aarch64__)
+
+// 255 = byte will be zero'd
+static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255  };
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  const uint32_t* const end = argb_data + (num_pixels & ~3);
+  const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
+  for (; argb_data < end; argb_data += 4) {
+    const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
+    const uint8x16_t greens =
+        vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
+                    vtbl1_u8(vget_high_u8(argb), shuffle));
+    vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
+  }
+  // fallthrough and finish off with plain-C
+  VP8LSubtractGreenFromBlueAndRed_C(argb_data, num_pixels & 3);
+}
+
+static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  const uint32_t* const end = argb_data + (num_pixels & ~3);
+  const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
+  for (; argb_data < end; argb_data += 4) {
+    const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
+    const uint8x16_t greens =
+        vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
+                    vtbl1_u8(vget_high_u8(argb), shuffle));
+    vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
+  }
+  // fallthrough and finish off with plain-C
+  VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
+}
+
+#endif   // !__aarch64__
+
+#endif   // USE_INTRINSICS
+
+#endif   // WEBP_USE_NEON
+
+//------------------------------------------------------------------------------
+
+extern void VP8LDspInitNEON(void);
+
+void VP8LDspInitNEON(void) {
+#if defined(WEBP_USE_NEON)
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+  VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
+
+#ifdef USE_INTRINSICS
+  VP8LPredictors[5] = Predictor5;
+  VP8LPredictors[6] = Predictor6;
+  VP8LPredictors[7] = Predictor7;
+  VP8LPredictors[8] = Predictor8;
+  VP8LPredictors[9] = Predictor9;
+  VP8LPredictors[10] = Predictor10;
+  VP8LPredictors[11] = Predictor11;
+  VP8LPredictors[12] = Predictor12;
+  VP8LPredictors[13] = Predictor13;
+
+#if !defined(__aarch64__)
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+#endif
+#endif
+
+#endif   // WEBP_USE_NEON
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/dsp/lossless_sse2.c b/src/main/jni/libwebp/dsp/lossless_sse2.c
new file mode 100644
index 000000000..713090980
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/lossless_sse2.c
@@ -0,0 +1,535 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 variant of methods for lossless decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#include <assert.h>
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+#include "./lossless.h"
+
+//------------------------------------------------------------------------------
+// Predictor Transform
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
+  const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
+  const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
+  const __m128i V1 = _mm_add_epi16(C0, C1);
+  const __m128i V2 = _mm_sub_epi16(V1, C2);
+  const __m128i b = _mm_packus_epi16(V2, V2);
+  const uint32_t output = _mm_cvtsi128_si32(b);
+  return output;
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
+  const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
+  const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
+  const __m128i avg = _mm_add_epi16(C1, C0);
+  const __m128i A0 = _mm_srli_epi16(avg, 1);
+  const __m128i A1 = _mm_sub_epi16(A0, B0);
+  const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
+  const __m128i A2 = _mm_sub_epi16(A1, BgtA);
+  const __m128i A3 = _mm_srai_epi16(A2, 1);
+  const __m128i A4 = _mm_add_epi16(A0, A3);
+  const __m128i A5 = _mm_packus_epi16(A4, A4);
+  const uint32_t output = _mm_cvtsi128_si32(A5);
+  return output;
+}
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+  int pa_minus_pb;
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i A0 = _mm_cvtsi32_si128(a);
+  const __m128i B0 = _mm_cvtsi32_si128(b);
+  const __m128i C0 = _mm_cvtsi32_si128(c);
+  const __m128i AC0 = _mm_subs_epu8(A0, C0);
+  const __m128i CA0 = _mm_subs_epu8(C0, A0);
+  const __m128i BC0 = _mm_subs_epu8(B0, C0);
+  const __m128i CB0 = _mm_subs_epu8(C0, B0);
+  const __m128i AC = _mm_or_si128(AC0, CA0);
+  const __m128i BC = _mm_or_si128(BC0, CB0);
+  const __m128i pa = _mm_unpacklo_epi8(AC, zero);  // |a - c|
+  const __m128i pb = _mm_unpacklo_epi8(BC, zero);  // |b - c|
+  const __m128i diff = _mm_sub_epi16(pb, pa);
+  {
+    int16_t out[8];
+    _mm_storeu_si128((__m128i*)out, diff);
+    pa_minus_pb = out[0] + out[1] + out[2] + out[3];
+  }
+  return (pa_minus_pb <= 0) ? a : b;
+}
+
+static WEBP_INLINE __m128i Average2_128i(uint32_t a0, uint32_t a1) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero);
+  const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
+  const __m128i sum = _mm_add_epi16(A1, A0);
+  const __m128i avg = _mm_srli_epi16(sum, 1);
+  return avg;
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+  const __m128i avg = Average2_128i(a0, a1);
+  const __m128i A2 = _mm_packus_epi16(avg, avg);
+  const uint32_t output = _mm_cvtsi128_si32(A2);
+  return output;
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i avg1 = Average2_128i(a0, a2);
+  const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
+  const __m128i sum = _mm_add_epi16(avg1, A1);
+  const __m128i avg2 = _mm_srli_epi16(sum, 1);
+  const __m128i A2 = _mm_packus_epi16(avg2, avg2);
+  const uint32_t output = _mm_cvtsi128_si32(A2);
+  return output;
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
+                                     uint32_t a2, uint32_t a3) {
+  const __m128i avg1 = Average2_128i(a0, a1);
+  const __m128i avg2 = Average2_128i(a2, a3);
+  const __m128i sum = _mm_add_epi16(avg2, avg1);
+  const __m128i avg3 = _mm_srli_epi16(sum, 1);
+  const __m128i A0 = _mm_packus_epi16(avg3, avg3);
+  const uint32_t output = _mm_cvtsi128_si32(A0);
+  return output;
+}
+
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average3(left, top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[0]);
+  return pred;
+}
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[-1], top[0]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[0], top[1]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Select(top[0], left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+  return pred;
+}
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+  return pred;
+}
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  const __m128i mask = _mm_set1_epi32(0x0000ff00);
+  int i;
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
+    const __m128i in_00g0 = _mm_and_si128(in, mask);     // 00g0|00g0|...
+    const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8);  // 0g00|0g00|...
+    const __m128i in_000g = _mm_srli_epi32(in_00g0, 8);  // 000g|000g|...
+    const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
+    const __m128i out = _mm_sub_epi8(in, in_0g0g);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+  // fallthrough and finish off with plain-C
+  VP8LSubtractGreenFromBlueAndRed_C(argb_data + i, num_pixels - i);
+}
+
+static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  const __m128i mask = _mm_set1_epi32(0x0000ff00);
+  int i;
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
+    const __m128i in_00g0 = _mm_and_si128(in, mask);     // 00g0|00g0|...
+    const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8);  // 0g00|0g00|...
+    const __m128i in_000g = _mm_srli_epi32(in_00g0, 8);  // 000g|000g|...
+    const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
+    const __m128i out = _mm_add_epi8(in, in_0g0g);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+  // fallthrough and finish off with plain-C
+  VP8LAddGreenToBlueAndRed_C(argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static WEBP_INLINE __m128i ColorTransformDelta(__m128i color_pred,
+                                               __m128i color) {
+  // We simulate signed 8-bit multiplication as:
+  // * Left shift the two (8-bit) numbers by 8 bits,
+  // * Perform a 16-bit signed multiplication and retain the higher 16-bits.
+  const __m128i color_pred_shifted = _mm_slli_epi32(color_pred, 8);
+  const __m128i color_shifted = _mm_slli_epi32(color, 8);
+  // Note: This performs multiplication on 8 packed 16-bit numbers, 4 of which
+  // happen to be zeroes.
+  const __m128i signed_mult =
+      _mm_mulhi_epi16(color_pred_shifted, color_shifted);
+  return _mm_srli_epi32(signed_mult, 5);
+}
+
+static WEBP_INLINE void TransformColor(const VP8LMultipliers* const m,
+                                       uint32_t* argb_data,
+                                       int num_pixels) {
+  const __m128i g_to_r = _mm_set1_epi32(m->green_to_red_);       // multipliers
+  const __m128i g_to_b = _mm_set1_epi32(m->green_to_blue_);
+  const __m128i r_to_b = _mm_set1_epi32(m->red_to_blue_);
+
+  int i;
+
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
+    const __m128i alpha_green_mask = _mm_set1_epi32(0xff00ff00);  // masks
+    const __m128i red_mask = _mm_set1_epi32(0x00ff0000);
+    const __m128i green_mask = _mm_set1_epi32(0x0000ff00);
+    const __m128i lower_8bit_mask  = _mm_set1_epi32(0x000000ff);
+    const __m128i ag = _mm_and_si128(in, alpha_green_mask);      // alpha, green
+    const __m128i r = _mm_srli_epi32(_mm_and_si128(in, red_mask), 16);
+    const __m128i g = _mm_srli_epi32(_mm_and_si128(in, green_mask), 8);
+    const __m128i b = in;
+
+    const __m128i r_delta = ColorTransformDelta(g_to_r, g);      // red
+    const __m128i r_new =
+        _mm_and_si128(_mm_sub_epi32(r, r_delta), lower_8bit_mask);
+    const __m128i r_new_shifted = _mm_slli_epi32(r_new, 16);
+
+    const __m128i b_delta_1 = ColorTransformDelta(g_to_b, g);    // blue
+    const __m128i b_delta_2 = ColorTransformDelta(r_to_b, r);
+    const __m128i b_delta = _mm_add_epi32(b_delta_1, b_delta_2);
+    const __m128i b_new =
+        _mm_and_si128(_mm_sub_epi32(b, b_delta), lower_8bit_mask);
+
+    const __m128i out = _mm_or_si128(_mm_or_si128(ag, r_new_shifted), b_new);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+
+  // Fall-back to C-version for left-overs.
+  VP8LTransformColor_C(m, argb_data + i, num_pixels - i);
+}
+
+static WEBP_INLINE void TransformColorInverse(const VP8LMultipliers* const m,
+                                              uint32_t* argb_data,
+                                              int num_pixels) {
+  const __m128i g_to_r = _mm_set1_epi32(m->green_to_red_);       // multipliers
+  const __m128i g_to_b = _mm_set1_epi32(m->green_to_blue_);
+  const __m128i r_to_b = _mm_set1_epi32(m->red_to_blue_);
+
+  int i;
+
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
+    const __m128i alpha_green_mask = _mm_set1_epi32(0xff00ff00);  // masks
+    const __m128i red_mask = _mm_set1_epi32(0x00ff0000);
+    const __m128i green_mask = _mm_set1_epi32(0x0000ff00);
+    const __m128i lower_8bit_mask  = _mm_set1_epi32(0x000000ff);
+    const __m128i ag = _mm_and_si128(in, alpha_green_mask);      // alpha, green
+    const __m128i r = _mm_srli_epi32(_mm_and_si128(in, red_mask), 16);
+    const __m128i g = _mm_srli_epi32(_mm_and_si128(in, green_mask), 8);
+    const __m128i b = in;
+
+    const __m128i r_delta = ColorTransformDelta(g_to_r, g);      // red
+    const __m128i r_new =
+        _mm_and_si128(_mm_add_epi32(r, r_delta), lower_8bit_mask);
+    const __m128i r_new_shifted = _mm_slli_epi32(r_new, 16);
+
+    const __m128i b_delta_1 = ColorTransformDelta(g_to_b, g);    // blue
+    const __m128i b_delta_2 = ColorTransformDelta(r_to_b, r_new);
+    const __m128i b_delta = _mm_add_epi32(b_delta_1, b_delta_2);
+    const __m128i b_new =
+        _mm_and_si128(_mm_add_epi32(b, b_delta), lower_8bit_mask);
+
+    const __m128i out = _mm_or_si128(_mm_or_si128(ag, r_new_shifted), b_new);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+
+  // Fall-back to C-version for left-overs.
+  VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Color-space conversion functions
+
+static void ConvertBGRAToRGBA(const uint32_t* src,
+                              int num_pixels, uint8_t* dst) {
+  const __m128i* in = (const __m128i*)src;
+  __m128i* out = (__m128i*)dst;
+  while (num_pixels >= 8) {
+    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
+    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
+    const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4);  // b0b4g0g4r0r4a0a4...
+    const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4);  // b2b6g2g6r2r6a2a6...
+    const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);   // b0b2b4b6g0g2g4g6...
+    const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);   // b1b3b5b7g1g3g5g7...
+    const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);   // b0...b7 | g0...g7
+    const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);   // r0...r7 | a0...a7
+    const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);  // g0...g7 | a0...a7
+    const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);  // r0...r7 | b0...b7
+    const __m128i rg0 = _mm_unpacklo_epi8(rb0, ga0);   // r0g0r1g1 ... r6g6r7g7
+    const __m128i ba0 = _mm_unpackhi_epi8(rb0, ga0);   // b0a0b1a1 ... b6a6b7a7
+    const __m128i rgba0 = _mm_unpacklo_epi16(rg0, ba0);  // rgba0|rgba1...
+    const __m128i rgba4 = _mm_unpackhi_epi16(rg0, ba0);  // rgba4|rgba5...
+    _mm_storeu_si128(out++, rgba0);
+    _mm_storeu_si128(out++, rgba4);
+    num_pixels -= 8;
+  }
+  // left-overs
+  VP8LConvertBGRAToRGBA_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToRGBA4444(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
+  const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
+  const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
+  const __m128i* in = (const __m128i*)src;
+  __m128i* out = (__m128i*)dst;
+  while (num_pixels >= 8) {
+    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
+    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
+    const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4);  // b0b4g0g4r0r4a0a4...
+    const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4);  // b2b6g2g6r2r6a2a6...
+    const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);    // b0b2b4b6g0g2g4g6...
+    const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);    // b1b3b5b7g1g3g5g7...
+    const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);    // b0...b7 | g0...g7
+    const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);    // r0...r7 | a0...a7
+    const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);   // g0...g7 | a0...a7
+    const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);   // r0...r7 | b0...b7
+    const __m128i ga1 = _mm_srli_epi16(ga0, 4);         // g0-|g1-|...|a6-|a7-
+    const __m128i rb1 = _mm_and_si128(rb0, mask_0xf0);  // -r0|-r1|...|-b6|-a7
+    const __m128i ga2 = _mm_and_si128(ga1, mask_0x0f);  // g0-|g1-|...|a6-|a7-
+    const __m128i rgba0 = _mm_or_si128(ga2, rb1);       // rg0..rg7 | ba0..ba7
+    const __m128i rgba1 = _mm_srli_si128(rgba0, 8);     // ba0..ba7 | 0
+#ifdef WEBP_SWAP_16BIT_CSP
+    const __m128i rgba = _mm_unpacklo_epi8(rgba1, rgba0);  // barg0...barg7
+#else
+    const __m128i rgba = _mm_unpacklo_epi8(rgba0, rgba1);  // rgba0...rgba7
+#endif
+    _mm_storeu_si128(out++, rgba);
+    num_pixels -= 8;
+  }
+  // left-overs
+  VP8LConvertBGRAToRGBA4444_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToRGB565(const uint32_t* src,
+                                int num_pixels, uint8_t* dst) {
+  const __m128i mask_0xe0 = _mm_set1_epi8(0xe0);
+  const __m128i mask_0xf8 = _mm_set1_epi8(0xf8);
+  const __m128i mask_0x07 = _mm_set1_epi8(0x07);
+  const __m128i* in = (const __m128i*)src;
+  __m128i* out = (__m128i*)dst;
+  while (num_pixels >= 8) {
+    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
+    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
+    const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4);  // b0b4g0g4r0r4a0a4...
+    const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4);  // b2b6g2g6r2r6a2a6...
+    const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);      // b0b2b4b6g0g2g4g6...
+    const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);      // b1b3b5b7g1g3g5g7...
+    const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);      // b0...b7 | g0...g7
+    const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);      // r0...r7 | a0...a7
+    const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);     // g0...g7 | a0...a7
+    const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);     // r0...r7 | b0...b7
+    const __m128i rb1 = _mm_and_si128(rb0, mask_0xf8);    // -r0..-r7|-b0..-b7
+    const __m128i g_lo1 = _mm_srli_epi16(ga0, 5);
+    const __m128i g_lo2 = _mm_and_si128(g_lo1, mask_0x07);  // g0-...g7-|xx (3b)
+    const __m128i g_hi1 = _mm_slli_epi16(ga0, 3);
+    const __m128i g_hi2 = _mm_and_si128(g_hi1, mask_0xe0);  // -g0...-g7|xx (3b)
+    const __m128i b0 = _mm_srli_si128(rb1, 8);              // -b0...-b7|0
+    const __m128i rg1 = _mm_or_si128(rb1, g_lo2);           // gr0...gr7|xx
+    const __m128i b1 = _mm_srli_epi16(b0, 3);
+    const __m128i gb1 = _mm_or_si128(b1, g_hi2);            // bg0...bg7|xx
+#ifdef WEBP_SWAP_16BIT_CSP
+    const __m128i rgba = _mm_unpacklo_epi8(gb1, rg1);     // rggb0...rggb7
+#else
+    const __m128i rgba = _mm_unpacklo_epi8(rg1, gb1);     // bgrb0...bgrb7
+#endif
+    _mm_storeu_si128(out++, rgba);
+    num_pixels -= 8;
+  }
+  // left-overs
+  VP8LConvertBGRAToRGB565_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
+  const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
+  const __m128i* in = (const __m128i*)src;
+  const uint8_t* const end = dst + num_pixels * 3;
+  // the last storel_epi64 below writes 8 bytes starting at offset 18
+  while (dst + 26 <= end) {
+    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
+    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
+    const __m128i a0l = _mm_and_si128(bgra0, mask_l);   // bgr0|0|bgr0|0
+    const __m128i a4l = _mm_and_si128(bgra4, mask_l);   // bgr0|0|bgr0|0
+    const __m128i a0h = _mm_and_si128(bgra0, mask_h);   // 0|bgr0|0|bgr0
+    const __m128i a4h = _mm_and_si128(bgra4, mask_h);   // 0|bgr0|0|bgr0
+    const __m128i b0h = _mm_srli_epi64(a0h, 8);         // 000b|gr00|000b|gr00
+    const __m128i b4h = _mm_srli_epi64(a4h, 8);         // 000b|gr00|000b|gr00
+    const __m128i c0 = _mm_or_si128(a0l, b0h);          // rgbrgb00|rgbrgb00
+    const __m128i c4 = _mm_or_si128(a4l, b4h);          // rgbrgb00|rgbrgb00
+    const __m128i c2 = _mm_srli_si128(c0, 8);
+    const __m128i c6 = _mm_srli_si128(c4, 8);
+    _mm_storel_epi64((__m128i*)(dst +   0), c0);
+    _mm_storel_epi64((__m128i*)(dst +   6), c2);
+    _mm_storel_epi64((__m128i*)(dst +  12), c4);
+    _mm_storel_epi64((__m128i*)(dst +  18), c6);
+    dst += 24;
+    num_pixels -= 8;
+  }
+  // left-overs
+  VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, dst);
+}
+
+//------------------------------------------------------------------------------
+
+#define LINE_SIZE 16    // 8 or 16
+static void AddVector(const uint32_t* a, const uint32_t* b, uint32_t* out,
+                      int size) {
+  int i;
+  assert(size % LINE_SIZE == 0);
+  for (i = 0; i < size; i += LINE_SIZE) {
+    const __m128i a0 = _mm_loadu_si128((__m128i*)&a[i +  0]);
+    const __m128i a1 = _mm_loadu_si128((__m128i*)&a[i +  4]);
+#if (LINE_SIZE == 16)
+    const __m128i a2 = _mm_loadu_si128((__m128i*)&a[i +  8]);
+    const __m128i a3 = _mm_loadu_si128((__m128i*)&a[i + 12]);
+#endif
+    const __m128i b0 = _mm_loadu_si128((__m128i*)&b[i +  0]);
+    const __m128i b1 = _mm_loadu_si128((__m128i*)&b[i +  4]);
+#if (LINE_SIZE == 16)
+    const __m128i b2 = _mm_loadu_si128((__m128i*)&b[i +  8]);
+    const __m128i b3 = _mm_loadu_si128((__m128i*)&b[i + 12]);
+#endif
+    _mm_storeu_si128((__m128i*)&out[i +  0], _mm_add_epi32(a0, b0));
+    _mm_storeu_si128((__m128i*)&out[i +  4], _mm_add_epi32(a1, b1));
+#if (LINE_SIZE == 16)
+    _mm_storeu_si128((__m128i*)&out[i +  8], _mm_add_epi32(a2, b2));
+    _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
+#endif
+  }
+}
+
+static void AddVectorEq(const uint32_t* a, uint32_t* out, int size) {
+  int i;
+  assert(size % LINE_SIZE == 0);
+  for (i = 0; i < size; i += LINE_SIZE) {
+    const __m128i a0 = _mm_loadu_si128((__m128i*)&a[i +  0]);
+    const __m128i a1 = _mm_loadu_si128((__m128i*)&a[i +  4]);
+#if (LINE_SIZE == 16)
+    const __m128i a2 = _mm_loadu_si128((__m128i*)&a[i +  8]);
+    const __m128i a3 = _mm_loadu_si128((__m128i*)&a[i + 12]);
+#endif
+    const __m128i b0 = _mm_loadu_si128((__m128i*)&out[i +  0]);
+    const __m128i b1 = _mm_loadu_si128((__m128i*)&out[i +  4]);
+#if (LINE_SIZE == 16)
+    const __m128i b2 = _mm_loadu_si128((__m128i*)&out[i +  8]);
+    const __m128i b3 = _mm_loadu_si128((__m128i*)&out[i + 12]);
+#endif
+    _mm_storeu_si128((__m128i*)&out[i +  0], _mm_add_epi32(a0, b0));
+    _mm_storeu_si128((__m128i*)&out[i +  4], _mm_add_epi32(a1, b1));
+#if (LINE_SIZE == 16)
+    _mm_storeu_si128((__m128i*)&out[i +  8], _mm_add_epi32(a2, b2));
+    _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
+#endif
+  }
+}
+#undef LINE_SIZE
+
+// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
+// that's ok since the histogram values are less than 1<<28 (max picture size).
+static void HistogramAdd(const VP8LHistogram* const a,
+                         const VP8LHistogram* const b,
+                         VP8LHistogram* const out) {
+  int i;
+  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
+  assert(a->palette_code_bits_ == b->palette_code_bits_);
+  if (b != out) {
+    AddVector(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
+    AddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
+    AddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
+    AddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
+  } else {
+    AddVectorEq(a->literal_, out->literal_, NUM_LITERAL_CODES);
+    AddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
+    AddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
+    AddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
+  }
+  for (i = NUM_LITERAL_CODES; i < literal_size; ++i) {
+    out->literal_[i] = a->literal_[i] + b->literal_[i];
+  }
+  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+    out->distance_[i] = a->distance_[i] + b->distance_[i];
+  }
+}
+
+#endif   // WEBP_USE_SSE2
+
+//------------------------------------------------------------------------------
+
+extern void VP8LDspInitSSE2(void);
+
+void VP8LDspInitSSE2(void) {
+#if defined(WEBP_USE_SSE2)
+  VP8LPredictors[5] = Predictor5;
+  VP8LPredictors[6] = Predictor6;
+  VP8LPredictors[7] = Predictor7;
+  VP8LPredictors[8] = Predictor8;
+  VP8LPredictors[9] = Predictor9;
+  VP8LPredictors[10] = Predictor10;
+  VP8LPredictors[11] = Predictor11;
+  VP8LPredictors[12] = Predictor12;
+  VP8LPredictors[13] = Predictor13;
+
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+
+  VP8LTransformColor = TransformColor;
+  VP8LTransformColorInverse = TransformColorInverse;
+
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
+  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
+  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+
+  VP8LHistogramAdd = HistogramAdd;
+#endif   // WEBP_USE_SSE2
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/dsp/neon.h b/src/main/jni/libwebp/dsp/neon.h
new file mode 100644
index 000000000..7e06eaeef
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/neon.h
@@ -0,0 +1,82 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  NEON common code.
+
+#ifndef WEBP_DSP_NEON_H_
+#define WEBP_DSP_NEON_H_
+
+#include <arm_neon.h>
+
+#include "./dsp.h"
+
+// Right now, some intrinsics functions seem slower, so we disable them
+// everywhere except aarch64 where the inline assembly is incompatible.
+#if defined(__aarch64__)
+#define USE_INTRINSICS   // use intrinsics when possible
+#endif
+
+#define INIT_VECTOR2(v, a, b) do {  \
+  v.val[0] = a;                     \
+  v.val[1] = b;                     \
+} while (0)
+
+#define INIT_VECTOR3(v, a, b, c) do {  \
+  v.val[0] = a;                        \
+  v.val[1] = b;                        \
+  v.val[2] = c;                        \
+} while (0)
+
+#define INIT_VECTOR4(v, a, b, c, d) do {  \
+  v.val[0] = a;                           \
+  v.val[1] = b;                           \
+  v.val[2] = c;                           \
+  v.val[3] = d;                           \
+} while (0)
+
+// if using intrinsics, this flag avoids some functions that make gcc-4.6.3
+// crash ("internal compiler error: in immed_double_const, at emit-rtl.").
+// (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
+#if !(LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
+#define WORK_AROUND_GCC
+#endif
+
+static WEBP_INLINE int32x4x4_t Transpose4x4(const int32x4x4_t rows) {
+  uint64x2x2_t row01, row23;
+
+  row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);
+  row01.val[1] = vreinterpretq_u64_s32(rows.val[1]);
+  row23.val[0] = vreinterpretq_u64_s32(rows.val[2]);
+  row23.val[1] = vreinterpretq_u64_s32(rows.val[3]);
+  // Transpose 64-bit values (there's no vswp equivalent)
+  {
+    const uint64x1_t row0h = vget_high_u64(row01.val[0]);
+    const uint64x1_t row2l = vget_low_u64(row23.val[0]);
+    const uint64x1_t row1h = vget_high_u64(row01.val[1]);
+    const uint64x1_t row3l = vget_low_u64(row23.val[1]);
+    row01.val[0] = vcombine_u64(vget_low_u64(row01.val[0]), row2l);
+    row23.val[0] = vcombine_u64(row0h, vget_high_u64(row23.val[0]));
+    row01.val[1] = vcombine_u64(vget_low_u64(row01.val[1]), row3l);
+    row23.val[1] = vcombine_u64(row1h, vget_high_u64(row23.val[1]));
+  }
+  {
+    const int32x4x2_t out01 = vtrnq_s32(vreinterpretq_s32_u64(row01.val[0]),
+                                        vreinterpretq_s32_u64(row01.val[1]));
+    const int32x4x2_t out23 = vtrnq_s32(vreinterpretq_s32_u64(row23.val[0]),
+                                        vreinterpretq_s32_u64(row23.val[1]));
+    int32x4x4_t out;
+    out.val[0] = out01.val[0];
+    out.val[1] = out01.val[1];
+    out.val[2] = out23.val[0];
+    out.val[3] = out23.val[1];
+    return out;
+  }
+}
+
+#endif  // WEBP_DSP_NEON_H_
diff --git a/src/main/jni/libwebp/dsp/upsampling.c b/src/main/jni/libwebp/dsp/upsampling.c
new file mode 100644
index 000000000..2b1656bf9
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/upsampling.c
@@ -0,0 +1,222 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV to RGB upsampling functions.
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+#include "./yuv.h"
+
+#include <assert.h>
+
+//------------------------------------------------------------------------------
+// Fancy upsampler
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB
+WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
+
+// Given samples laid out in a square as:
+//  [a b]
+//  [c d]
+// we interpolate u/v as:
+//  ([9*a + 3*b + 3*c +   d    3*a + 9*b + 3*c +   d] + [8 8]) / 16
+//  ([3*a +   b + 9*c + 3*d      a + 3*b + 3*c + 9*d]   [8 8]) / 16
+
+// We process u and v together stashed into 32bit (16bit each).
+#define LOAD_UV(u, v) ((u) | ((v) << 16))
+
+#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                  \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* cur_u, const uint8_t* cur_v,              \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
+  int x;                                                                       \
+  const int last_pixel_pair = (len - 1) >> 1;                                  \
+  uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]);   /* top-left sample */        \
+  uint32_t l_uv  = LOAD_UV(cur_u[0], cur_v[0]);   /* left-sample */            \
+  assert(top_y != NULL);                                                       \
+  {                                                                            \
+    const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                \
+    FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst);                          \
+  }                                                                            \
+  if (bottom_y != NULL) {                                                      \
+    const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                \
+    FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst);                    \
+  }                                                                            \
+  for (x = 1; x <= last_pixel_pair; ++x) {                                     \
+    const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]);  /* top sample */       \
+    const uint32_t uv   = LOAD_UV(cur_u[x], cur_v[x]);  /* sample */           \
+    /* precompute invariant values associated with first and second diagonals*/\
+    const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u;               \
+    const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3;                   \
+    const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3;                    \
+    {                                                                          \
+      const uint32_t uv0 = (diag_12 + tl_uv) >> 1;                             \
+      const uint32_t uv1 = (diag_03 + t_uv) >> 1;                              \
+      FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                          \
+           top_dst + (2 * x - 1) * XSTEP);                                     \
+      FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16),                          \
+           top_dst + (2 * x - 0) * XSTEP);                                     \
+    }                                                                          \
+    if (bottom_y != NULL) {                                                    \
+      const uint32_t uv0 = (diag_03 + l_uv) >> 1;                              \
+      const uint32_t uv1 = (diag_12 + uv) >> 1;                                \
+      FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                       \
+           bottom_dst + (2 * x - 1) * XSTEP);                                  \
+      FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16),                       \
+           bottom_dst + (2 * x + 0) * XSTEP);                                  \
+    }                                                                          \
+    tl_uv = t_uv;                                                              \
+    l_uv = uv;                                                                 \
+  }                                                                            \
+  if (!(len & 1)) {                                                            \
+    {                                                                          \
+      const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;              \
+      FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16),                            \
+           top_dst + (len - 1) * XSTEP);                                       \
+    }                                                                          \
+    if (bottom_y != NULL) {                                                    \
+      const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;              \
+      FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16),                         \
+           bottom_dst + (len - 1) * XSTEP);                                    \
+    }                                                                          \
+  }                                                                            \
+}
+
+// All variants implemented.
+UPSAMPLE_FUNC(UpsampleRgbLinePair,  VP8YuvToRgb,  3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair,  VP8YuvToBgr,  3)
+UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
+UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
+UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
+UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
+UPSAMPLE_FUNC(UpsampleRgb565LinePair,  VP8YuvToRgb565,  2)
+
+#undef LOAD_UV
+#undef UPSAMPLE_FUNC
+
+#endif  // FANCY_UPSAMPLING
+
+//------------------------------------------------------------------------------
+
+#if !defined(FANCY_UPSAMPLING)
+#define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC)                                      \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y,              \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* bot_u, const uint8_t* bot_v,              \
+                      uint8_t* top_dst, uint8_t* bot_dst, int len) {           \
+  const int half_len = len >> 1;                                               \
+  int x;                                                                       \
+  assert(top_dst != NULL);                                                     \
+  {                                                                            \
+    for (x = 0; x < half_len; ++x) {                                           \
+      FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x + 0);         \
+      FUNC(top_y[2 * x + 1], top_u[x], top_v[x], top_dst + 8 * x + 4);         \
+    }                                                                          \
+    if (len & 1) FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x);  \
+  }                                                                            \
+  if (bot_dst != NULL) {                                                       \
+    for (x = 0; x < half_len; ++x) {                                           \
+      FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x + 0);         \
+      FUNC(bot_y[2 * x + 1], bot_u[x], bot_v[x], bot_dst + 8 * x + 4);         \
+    }                                                                          \
+    if (len & 1) FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x);  \
+  }                                                                            \
+}
+
+DUAL_SAMPLE_FUNC(DualLineSamplerBGRA, VP8YuvToBgra)
+DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb)
+#undef DUAL_SAMPLE_FUNC
+
+#endif  // !FANCY_UPSAMPLING
+
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
+  WebPInitUpsamplers();
+  VP8YUVInit();
+#ifdef FANCY_UPSAMPLING
+  return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB];
+#else
+  return (alpha_is_last ? DualLineSamplerBGRA : DualLineSamplerARGB);
+#endif
+}
+
+//------------------------------------------------------------------------------
+// YUV444 converter
+
+#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP)                                    \
+static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
+                      uint8_t* dst, int len) {                                 \
+  int i;                                                                       \
+  for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);           \
+}
+
+YUV444_FUNC(Yuv444ToRgb,      VP8YuvToRgb,  3)
+YUV444_FUNC(Yuv444ToBgr,      VP8YuvToBgr,  3)
+YUV444_FUNC(Yuv444ToRgba,     VP8YuvToRgba, 4)
+YUV444_FUNC(Yuv444ToBgra,     VP8YuvToBgra, 4)
+YUV444_FUNC(Yuv444ToArgb,     VP8YuvToArgb, 4)
+YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2)
+YUV444_FUNC(Yuv444ToRgb565,   VP8YuvToRgb565, 2)
+
+#undef YUV444_FUNC
+
+const WebPYUV444Converter WebPYUV444Converters[MODE_LAST] = {
+  Yuv444ToRgb,       // MODE_RGB
+  Yuv444ToRgba,      // MODE_RGBA
+  Yuv444ToBgr,       // MODE_BGR
+  Yuv444ToBgra,      // MODE_BGRA
+  Yuv444ToArgb,      // MODE_ARGB
+  Yuv444ToRgba4444,  // MODE_RGBA_4444
+  Yuv444ToRgb565,    // MODE_RGB_565
+  Yuv444ToRgba,      // MODE_rgbA
+  Yuv444ToBgra,      // MODE_bgrA
+  Yuv444ToArgb,      // MODE_Argb
+  Yuv444ToRgba4444   // MODE_rgbA_4444
+};
+
+//------------------------------------------------------------------------------
+// Main calls
+
+extern void WebPInitUpsamplersSSE2(void);
+extern void WebPInitUpsamplersNEON(void);
+
+void WebPInitUpsamplers(void) {
+#ifdef FANCY_UPSAMPLING
+  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
+  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
+  WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
+  WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair;
+  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitUpsamplersSSE2();
+    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      WebPInitUpsamplersNEON();
+    }
+#endif
+  }
+#endif  // FANCY_UPSAMPLING
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/dsp/upsampling_neon.c b/src/main/jni/libwebp/dsp/upsampling_neon.c
new file mode 100644
index 000000000..d31ed4d6a
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/upsampling_neon.c
@@ -0,0 +1,267 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON version of YUV to RGB upsampling functions.
+//
+// Author: mans@mansr.com (Mans Rullgard)
+// Based on SSE code by: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+#include <arm_neon.h>
+#include <string.h>
+#include "./neon.h"
+#include "./yuv.h"
+
+#ifdef FANCY_UPSAMPLING
+
+//-----------------------------------------------------------------------------
+// U/V upsampling
+
+// Loads 9 pixels each from rows r1 and r2 and generates 16 pixels.
+#define UPSAMPLE_16PIXELS(r1, r2, out) {                                \
+  uint8x8_t a = vld1_u8(r1);                                            \
+  uint8x8_t b = vld1_u8(r1 + 1);                                        \
+  uint8x8_t c = vld1_u8(r2);                                            \
+  uint8x8_t d = vld1_u8(r2 + 1);                                        \
+                                                                        \
+  uint16x8_t al = vshll_n_u8(a, 1);                                     \
+  uint16x8_t bl = vshll_n_u8(b, 1);                                     \
+  uint16x8_t cl = vshll_n_u8(c, 1);                                     \
+  uint16x8_t dl = vshll_n_u8(d, 1);                                     \
+                                                                        \
+  uint8x8_t diag1, diag2;                                               \
+  uint16x8_t sl;                                                        \
+                                                                        \
+  /* a + b + c + d */                                                   \
+  sl = vaddl_u8(a,  b);                                                 \
+  sl = vaddw_u8(sl, c);                                                 \
+  sl = vaddw_u8(sl, d);                                                 \
+                                                                        \
+  al = vaddq_u16(sl, al); /* 3a +  b +  c +  d */                       \
+  bl = vaddq_u16(sl, bl); /*  a + 3b +  c +  d */                       \
+                                                                        \
+  al = vaddq_u16(al, dl); /* 3a +  b +  c + 3d */                       \
+  bl = vaddq_u16(bl, cl); /*  a + 3b + 3c +  d */                       \
+                                                                        \
+  diag2 = vshrn_n_u16(al, 3);                                           \
+  diag1 = vshrn_n_u16(bl, 3);                                           \
+                                                                        \
+  a = vrhadd_u8(a, diag1);                                              \
+  b = vrhadd_u8(b, diag2);                                              \
+  c = vrhadd_u8(c, diag2);                                              \
+  d = vrhadd_u8(d, diag1);                                              \
+                                                                        \
+  {                                                                     \
+    uint8x8x2_t a_b, c_d;                                               \
+    INIT_VECTOR2(a_b, a, b);                                            \
+    INIT_VECTOR2(c_d, c, d);                                            \
+    vst2_u8(out,      a_b);                                             \
+    vst2_u8(out + 32, c_d);                                             \
+  }                                                                     \
+}
+
+// Turn the macro into a function for reducing code-size when non-critical
+static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
+                             uint8_t *out) {
+  UPSAMPLE_16PIXELS(r1, r2, out);
+}
+
+#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) {                  \
+  uint8_t r1[9], r2[9];                                                 \
+  memcpy(r1, (tb), (num_pixels));                                       \
+  memcpy(r2, (bb), (num_pixels));                                       \
+  /* replicate last byte */                                             \
+  memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels));    \
+  memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels));    \
+  Upsample16Pixels(r1, r2, out);                                        \
+}
+
+//-----------------------------------------------------------------------------
+// YUV->RGB conversion
+
+static const int16_t kCoeffs[4] = { kYScale, kVToR, kUToG, kVToG };
+
+#define v255 vdup_n_u8(255)
+
+#define STORE_Rgb(out, r, g, b) do {                                    \
+  uint8x8x3_t r_g_b;                                                    \
+  INIT_VECTOR3(r_g_b, r, g, b);                                         \
+  vst3_u8(out, r_g_b);                                                  \
+} while (0)
+
+#define STORE_Bgr(out, r, g, b) do {                                    \
+  uint8x8x3_t b_g_r;                                                    \
+  INIT_VECTOR3(b_g_r, b, g, r);                                         \
+  vst3_u8(out, b_g_r);                                                  \
+} while (0)
+
+#define STORE_Rgba(out, r, g, b) do {                                   \
+  uint8x8x4_t r_g_b_v255;                                               \
+  INIT_VECTOR4(r_g_b_v255, r, g, b, v255);                              \
+  vst4_u8(out, r_g_b_v255);                                             \
+} while (0)
+
+#define STORE_Bgra(out, r, g, b) do {                                   \
+  uint8x8x4_t b_g_r_v255;                                               \
+  INIT_VECTOR4(b_g_r_v255, b, g, r, v255);                              \
+  vst4_u8(out, b_g_r_v255);                                             \
+} while (0)
+
+#define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) {            \
+  int i;                                                                \
+  for (i = 0; i < N; i += 8) {                                          \
+    const int off = ((cur_x) + i) * XSTEP;                              \
+    uint8x8_t y  = vld1_u8((src_y) + (cur_x)  + i);                     \
+    uint8x8_t u  = vld1_u8((src_uv) + i);                               \
+    uint8x8_t v  = vld1_u8((src_uv) + i + 16);                          \
+    const int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16));       \
+    const int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128));      \
+    const int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128));      \
+    int32x4_t yl = vmull_lane_s16(vget_low_s16(yy),  cf16, 0);          \
+    int32x4_t yh = vmull_lane_s16(vget_high_s16(yy), cf16, 0);          \
+    const int32x4_t rl = vmlal_lane_s16(yl, vget_low_s16(vv),  cf16, 1);\
+    const int32x4_t rh = vmlal_lane_s16(yh, vget_high_s16(vv), cf16, 1);\
+    int32x4_t gl = vmlsl_lane_s16(yl, vget_low_s16(uu),  cf16, 2);      \
+    int32x4_t gh = vmlsl_lane_s16(yh, vget_high_s16(uu), cf16, 2);      \
+    const int32x4_t bl = vmovl_s16(vget_low_s16(uu));                   \
+    const int32x4_t bh = vmovl_s16(vget_high_s16(uu));                  \
+    gl = vmlsl_lane_s16(gl, vget_low_s16(vv),  cf16, 3);                \
+    gh = vmlsl_lane_s16(gh, vget_high_s16(vv), cf16, 3);                \
+    yl = vmlaq_lane_s32(yl, bl, cf32, 0);                               \
+    yh = vmlaq_lane_s32(yh, bh, cf32, 0);                               \
+    /* vrshrn_n_s32() already incorporates the rounding constant */     \
+    y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, YUV_FIX2),            \
+                                 vrshrn_n_s32(rh, YUV_FIX2)));          \
+    u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, YUV_FIX2),            \
+                                 vrshrn_n_s32(gh, YUV_FIX2)));          \
+    v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(yl, YUV_FIX2),            \
+                                 vrshrn_n_s32(yh, YUV_FIX2)));          \
+    STORE_ ## FMT(out + off, y, u, v);                                  \
+  }                                                                     \
+}
+
+#define CONVERT1(FUNC, XSTEP, N, src_y, src_uv, rgb, cur_x) {           \
+  int i;                                                                \
+  for (i = 0; i < N; i++) {                                             \
+    const int off = ((cur_x) + i) * XSTEP;                              \
+    const int y = src_y[(cur_x) + i];                                   \
+    const int u = (src_uv)[i];                                          \
+    const int v = (src_uv)[i + 16];                                     \
+    FUNC(y, u, v, rgb + off);                                           \
+  }                                                                     \
+}
+
+#define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv,                  \
+                      top_dst, bottom_dst, cur_x, len) {                \
+  CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x)                  \
+  if (bottom_y != NULL) {                                               \
+    CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x)   \
+  }                                                                     \
+}
+
+#define CONVERT2RGB_1(FUNC, XSTEP, top_y, bottom_y, uv,                 \
+                      top_dst, bottom_dst, cur_x, len) {                \
+  CONVERT1(FUNC, XSTEP, len, top_y, uv, top_dst, cur_x);                \
+  if (bottom_y != NULL) {                                               \
+    CONVERT1(FUNC, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \
+  }                                                                     \
+}
+
+#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP)                       \
+static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y,    \
+                      const uint8_t *top_u, const uint8_t *top_v,       \
+                      const uint8_t *cur_u, const uint8_t *cur_v,       \
+                      uint8_t *top_dst, uint8_t *bottom_dst, int len) { \
+  int block;                                                            \
+  /* 16 byte aligned array to cache reconstructed u and v */            \
+  uint8_t uv_buf[2 * 32 + 15];                                          \
+  uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);     \
+  const int uv_len = (len + 1) >> 1;                                    \
+  /* 9 pixels must be read-able for each block */                       \
+  const int num_blocks = (uv_len - 1) >> 3;                             \
+  const int leftover = uv_len - num_blocks * 8;                         \
+  const int last_pos = 1 + 16 * num_blocks;                             \
+                                                                        \
+  const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                  \
+  const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                  \
+                                                                        \
+  const int16x4_t cf16 = vld1_s16(kCoeffs);                             \
+  const int32x2_t cf32 = vdup_n_s32(kUToB);                             \
+  const uint8x8_t u16  = vdup_n_u8(16);                                 \
+  const uint8x8_t u128 = vdup_n_u8(128);                                \
+                                                                        \
+  /* Treat the first pixel in regular way */                            \
+  assert(top_y != NULL);                                                \
+  {                                                                     \
+    const int u0 = (top_u[0] + u_diag) >> 1;                            \
+    const int v0 = (top_v[0] + v_diag) >> 1;                            \
+    VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst);                         \
+  }                                                                     \
+  if (bottom_y != NULL) {                                               \
+    const int u0 = (cur_u[0] + u_diag) >> 1;                            \
+    const int v0 = (cur_v[0] + v_diag) >> 1;                            \
+    VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst);                   \
+  }                                                                     \
+                                                                        \
+  for (block = 0; block < num_blocks; ++block) {                        \
+    UPSAMPLE_16PIXELS(top_u, cur_u, r_uv);                              \
+    UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16);                         \
+    CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv,                    \
+                  top_dst, bottom_dst, 16 * block + 1, 16);             \
+    top_u += 8;                                                         \
+    cur_u += 8;                                                         \
+    top_v += 8;                                                         \
+    cur_v += 8;                                                         \
+  }                                                                     \
+                                                                        \
+  UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv);                    \
+  UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16);               \
+  CONVERT2RGB_1(VP8YuvTo ## FMT, XSTEP, top_y, bottom_y, r_uv,          \
+                top_dst, bottom_dst, last_pos, len - last_pos);         \
+}
+
+// NEON variants of the fancy upsampler.
+NEON_UPSAMPLE_FUNC(UpsampleRgbLinePair,  Rgb,  3)
+NEON_UPSAMPLE_FUNC(UpsampleBgrLinePair,  Bgr,  3)
+NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePair, Rgba, 4)
+NEON_UPSAMPLE_FUNC(UpsampleBgraLinePair, Bgra, 4)
+
+#endif  // FANCY_UPSAMPLING
+
+#endif   // WEBP_USE_NEON
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitUpsamplersNEON(void);
+
+#ifdef FANCY_UPSAMPLING
+
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+void WebPInitUpsamplersNEON(void) {
+#if defined(WEBP_USE_NEON)
+  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
+#endif   // WEBP_USE_NEON
+}
+
+#else
+
+// this empty function is to avoid an empty .o
+void WebPInitUpsamplersNEON(void) {}
+
+#endif  // FANCY_UPSAMPLING
diff --git a/src/main/jni/libwebp/dsp/upsampling_sse2.c b/src/main/jni/libwebp/dsp/upsampling_sse2.c
new file mode 100644
index 000000000..45cf0906e
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/upsampling_sse2.c
@@ -0,0 +1,214 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of YUV to RGB upsampling functions.
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <string.h>
+#include "./yuv.h"
+
+#ifdef FANCY_UPSAMPLING
+
+// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
+// u = (9*a + 3*b + 3*c + d + 8) / 16
+//   = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2
+//   = (a + m + 1) / 2
+// where m = (a + 3*b + 3*c + d) / 8
+//         = ((a + b + c + d) / 2 + b + c) / 4
+//
+// Let's say  k = (a + b + c + d) / 4.
+// We can compute k as
+// k = (s + t + 1) / 2 - ((a^d) | (b^c) | (s^t)) & 1
+// where s = (a + d + 1) / 2 and t = (b + c + 1) / 2
+//
+// Then m can be written as
+// m = (k + t + 1) / 2 - (((b^c) & (s^t)) | (k^t)) & 1
+
+// Computes out = (k + in + 1) / 2 - ((ij & (s^t)) | (k^in)) & 1
+#define GET_M(ij, in, out) do {                                                \
+  const __m128i tmp0 = _mm_avg_epu8(k, (in));     /* (k + in + 1) / 2 */       \
+  const __m128i tmp1 = _mm_and_si128((ij), st);   /* (ij) & (s^t) */           \
+  const __m128i tmp2 = _mm_xor_si128(k, (in));    /* (k^in) */                 \
+  const __m128i tmp3 = _mm_or_si128(tmp1, tmp2);  /* ((ij) & (s^t)) | (k^in) */\
+  const __m128i tmp4 = _mm_and_si128(tmp3, one);  /* & 1 -> lsb_correction */  \
+  (out) = _mm_sub_epi8(tmp0, tmp4);    /* (k + in + 1) / 2 - lsb_correction */ \
+} while (0)
+
+// pack and store two alternating pixel rows
+#define PACK_AND_STORE(a, b, da, db, out) do {                                 \
+  const __m128i t_a = _mm_avg_epu8(a, da);  /* (9a + 3b + 3c +  d + 8) / 16 */ \
+  const __m128i t_b = _mm_avg_epu8(b, db);  /* (3a + 9b +  c + 3d + 8) / 16 */ \
+  const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b);                             \
+  const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b);                             \
+  _mm_store_si128(((__m128i*)(out)) + 0, t_1);                                 \
+  _mm_store_si128(((__m128i*)(out)) + 1, t_2);                                 \
+} while (0)
+
+// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
+#define UPSAMPLE_32PIXELS(r1, r2, out) {                                       \
+  const __m128i one = _mm_set1_epi8(1);                                        \
+  const __m128i a = _mm_loadu_si128((__m128i*)&(r1)[0]);                       \
+  const __m128i b = _mm_loadu_si128((__m128i*)&(r1)[1]);                       \
+  const __m128i c = _mm_loadu_si128((__m128i*)&(r2)[0]);                       \
+  const __m128i d = _mm_loadu_si128((__m128i*)&(r2)[1]);                       \
+                                                                               \
+  const __m128i s = _mm_avg_epu8(a, d);        /* s = (a + d + 1) / 2 */       \
+  const __m128i t = _mm_avg_epu8(b, c);        /* t = (b + c + 1) / 2 */       \
+  const __m128i st = _mm_xor_si128(s, t);      /* st = s^t */                  \
+                                                                               \
+  const __m128i ad = _mm_xor_si128(a, d);      /* ad = a^d */                  \
+  const __m128i bc = _mm_xor_si128(b, c);      /* bc = b^c */                  \
+                                                                               \
+  const __m128i t1 = _mm_or_si128(ad, bc);     /* (a^d) | (b^c) */             \
+  const __m128i t2 = _mm_or_si128(t1, st);     /* (a^d) | (b^c) | (s^t) */     \
+  const __m128i t3 = _mm_and_si128(t2, one);   /* (a^d) | (b^c) | (s^t) & 1 */ \
+  const __m128i t4 = _mm_avg_epu8(s, t);                                       \
+  const __m128i k = _mm_sub_epi8(t4, t3);      /* k = (a + b + c + d) / 4 */   \
+  __m128i diag1, diag2;                                                        \
+                                                                               \
+  GET_M(bc, t, diag1);                  /* diag1 = (a + 3b + 3c + d) / 8 */    \
+  GET_M(ad, s, diag2);                  /* diag2 = (3a + b + c + 3d) / 8 */    \
+                                                                               \
+  /* pack the alternate pixels */                                              \
+  PACK_AND_STORE(a, b, diag1, diag2, out +      0);  /* store top */           \
+  PACK_AND_STORE(c, d, diag2, diag1, out + 2 * 32);  /* store bottom */        \
+}
+
+// Turn the macro into a function for reducing code-size when non-critical
+static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],
+                             uint8_t* const out) {
+  UPSAMPLE_32PIXELS(r1, r2, out);
+}
+
+#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) {                         \
+  uint8_t r1[17], r2[17];                                                      \
+  memcpy(r1, (tb), (num_pixels));                                              \
+  memcpy(r2, (bb), (num_pixels));                                              \
+  /* replicate last byte */                                                    \
+  memset(r1 + (num_pixels), r1[(num_pixels) - 1], 17 - (num_pixels));          \
+  memset(r2 + (num_pixels), r2[(num_pixels) - 1], 17 - (num_pixels));          \
+  /* using the shared function instead of the macro saves ~3k code size */     \
+  Upsample32Pixels(r1, r2, out);                                               \
+}
+
+#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y,                              \
+                    top_dst, bottom_dst, cur_x, num_pixels) {                  \
+  int n;                                                                       \
+  for (n = 0; n < (num_pixels); ++n) {                                         \
+    FUNC(top_y[(cur_x) + n], r_u[n], r_v[n],                                   \
+         top_dst + ((cur_x) + n) * XSTEP);                                     \
+  }                                                                            \
+  if (bottom_y != NULL) {                                                      \
+    for (n = 0; n < (num_pixels); ++n) {                                       \
+      FUNC(bottom_y[(cur_x) + n], r_u[64 + n], r_v[64 + n],                    \
+           bottom_dst + ((cur_x) + n) * XSTEP);                                \
+    }                                                                          \
+  }                                                                            \
+}
+
+#define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y,                           \
+                       top_dst, bottom_dst, cur_x) do {                        \
+  FUNC##32(top_y + (cur_x), r_u, r_v, top_dst + (cur_x) * XSTEP);              \
+  if (bottom_y != NULL) {                                                      \
+    FUNC##32(bottom_y + (cur_x), r_u + 64, r_v + 64,                           \
+             bottom_dst + (cur_x) * XSTEP);                                    \
+  }                                                                            \
+} while (0)
+
+#define SSE2_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                             \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* cur_u, const uint8_t* cur_v,              \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
+  int uv_pos, pos;                                                             \
+  /* 16byte-aligned array to cache reconstructed u and v */                    \
+  uint8_t uv_buf[4 * 32 + 15];                                                 \
+  uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);             \
+  uint8_t* const r_v = r_u + 32;                                               \
+                                                                               \
+  assert(top_y != NULL);                                                       \
+  {   /* Treat the first pixel in regular way */                               \
+    const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                       \
+    const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                       \
+    const int u0_t = (top_u[0] + u_diag) >> 1;                                 \
+    const int v0_t = (top_v[0] + v_diag) >> 1;                                 \
+    FUNC(top_y[0], u0_t, v0_t, top_dst);                                       \
+    if (bottom_y != NULL) {                                                    \
+      const int u0_b = (cur_u[0] + u_diag) >> 1;                               \
+      const int v0_b = (cur_v[0] + v_diag) >> 1;                               \
+      FUNC(bottom_y[0], u0_b, v0_b, bottom_dst);                               \
+    }                                                                          \
+  }                                                                            \
+  /* For UPSAMPLE_32PIXELS, 17 u/v values must be read-able for each block */  \
+  for (pos = 1, uv_pos = 0; pos + 32 + 1 <= len; pos += 32, uv_pos += 16) {    \
+    UPSAMPLE_32PIXELS(top_u + uv_pos, cur_u + uv_pos, r_u);                    \
+    UPSAMPLE_32PIXELS(top_v + uv_pos, cur_v + uv_pos, r_v);                    \
+    CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, pos);    \
+  }                                                                            \
+  if (len > 1) {                                                               \
+    const int left_over = ((len + 1) >> 1) - (pos >> 1);                       \
+    assert(left_over > 0);                                                     \
+    UPSAMPLE_LAST_BLOCK(top_u + uv_pos, cur_u + uv_pos, left_over, r_u);       \
+    UPSAMPLE_LAST_BLOCK(top_v + uv_pos, cur_v + uv_pos, left_over, r_v);       \
+    CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst,             \
+                pos, len - pos);                                               \
+  }                                                                            \
+}
+
+// SSE2 variants of the fancy upsampler.
+SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePair,  VP8YuvToRgb,  3)
+SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePair,  VP8YuvToBgr,  3)
+SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
+
+#undef GET_M
+#undef PACK_AND_STORE
+#undef UPSAMPLE_32PIXELS
+#undef UPSAMPLE_LAST_BLOCK
+#undef CONVERT2RGB
+#undef CONVERT2RGB_32
+#undef SSE2_UPSAMPLE_FUNC
+
+#endif  // FANCY_UPSAMPLING
+
+#endif   // WEBP_USE_SSE2
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitUpsamplersSSE2(void);
+
+#ifdef FANCY_UPSAMPLING
+
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+void WebPInitUpsamplersSSE2(void) {
+#if defined(WEBP_USE_SSE2)
+  VP8YUVInitSSE2();
+  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
+#endif   // WEBP_USE_SSE2
+}
+
+#else
+
+// this empty function is to avoid an empty .o
+void WebPInitUpsamplersSSE2(void) {}
+
+#endif  // FANCY_UPSAMPLING
diff --git a/src/main/jni/libwebp/dsp/yuv.c b/src/main/jni/libwebp/dsp/yuv.c
new file mode 100644
index 000000000..d7cb4ebcb
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/yuv.c
@@ -0,0 +1,154 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./yuv.h"
+
+#if defined(WEBP_YUV_USE_TABLE)
+
+static int done = 0;
+
+static WEBP_INLINE uint8_t clip(int v, int max_value) {
+  return v < 0 ? 0 : v > max_value ? max_value : v;
+}
+
+int16_t VP8kVToR[256], VP8kUToB[256];
+int32_t VP8kVToG[256], VP8kUToG[256];
+uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
+uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
+
+void VP8YUVInit(void) {
+  int i;
+  if (done) {
+    return;
+  }
+#ifndef USE_YUVj
+  for (i = 0; i < 256; ++i) {
+    VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
+    VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
+    VP8kVToG[i] = -45773 * (i - 128);
+    VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX;
+  }
+  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
+    const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX;
+    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
+    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
+  }
+#else
+  for (i = 0; i < 256; ++i) {
+    VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
+    VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
+    VP8kVToG[i] = -46802 * (i - 128);
+    VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
+  }
+  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
+    const int k = i;
+    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
+    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
+  }
+#endif
+
+  done = 1;
+}
+
+#else
+
+void VP8YUVInit(void) {}
+
+#endif  // WEBP_YUV_USE_TABLE
+
+//-----------------------------------------------------------------------------
+// Plain-C version
+
+#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP)                                       \
+static void FUNC_NAME(const uint8_t* y,                                        \
+                      const uint8_t* u, const uint8_t* v,                      \
+                      uint8_t* dst, int len) {                                 \
+  const uint8_t* const end = dst + (len & ~1) * XSTEP;                         \
+  while (dst != end) {                                                         \
+    FUNC(y[0], u[0], v[0], dst);                                               \
+    FUNC(y[1], u[0], v[0], dst + XSTEP);                                       \
+    y += 2;                                                                    \
+    ++u;                                                                       \
+    ++v;                                                                       \
+    dst += 2 * XSTEP;                                                          \
+  }                                                                            \
+  if (len & 1) {                                                               \
+    FUNC(y[0], u[0], v[0], dst);                                               \
+  }                                                                            \
+}                                                                              \
+
+// All variants implemented.
+ROW_FUNC(YuvToRgbRow,      VP8YuvToRgb,  3)
+ROW_FUNC(YuvToBgrRow,      VP8YuvToBgr,  3)
+ROW_FUNC(YuvToRgbaRow,     VP8YuvToRgba, 4)
+ROW_FUNC(YuvToBgraRow,     VP8YuvToBgra, 4)
+ROW_FUNC(YuvToArgbRow,     VP8YuvToArgb, 4)
+ROW_FUNC(YuvToRgba4444Row, VP8YuvToRgba4444, 2)
+ROW_FUNC(YuvToRgb565Row,   VP8YuvToRgb565, 2)
+
+#undef ROW_FUNC
+
+// Main call for processing a plane with a WebPSamplerRowFunc function:
+void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
+                             const uint8_t* u, const uint8_t* v, int uv_stride,
+                             uint8_t* dst, int dst_stride,
+                             int width, int height, WebPSamplerRowFunc func) {
+  int j;
+  for (j = 0; j < height; ++j) {
+    func(y, u, v, dst, width);
+    y += y_stride;
+    if (j & 1) {
+      u += uv_stride;
+      v += uv_stride;
+    }
+    dst += dst_stride;
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Main call
+
+WebPSamplerRowFunc WebPSamplers[MODE_LAST];
+
+extern void WebPInitSamplersSSE2(void);
+extern void WebPInitSamplersMIPS32(void);
+
+void WebPInitSamplers(void) {
+  WebPSamplers[MODE_RGB]       = YuvToRgbRow;
+  WebPSamplers[MODE_RGBA]      = YuvToRgbaRow;
+  WebPSamplers[MODE_BGR]       = YuvToBgrRow;
+  WebPSamplers[MODE_BGRA]      = YuvToBgraRow;
+  WebPSamplers[MODE_ARGB]      = YuvToArgbRow;
+  WebPSamplers[MODE_RGBA_4444] = YuvToRgba4444Row;
+  WebPSamplers[MODE_RGB_565]   = YuvToRgb565Row;
+  WebPSamplers[MODE_rgbA]      = YuvToRgbaRow;
+  WebPSamplers[MODE_bgrA]      = YuvToBgraRow;
+  WebPSamplers[MODE_Argb]      = YuvToArgbRow;
+  WebPSamplers[MODE_rgbA_4444] = YuvToRgba4444Row;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitSamplersSSE2();
+    }
+#endif  // WEBP_USE_SSE2
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      WebPInitSamplersMIPS32();
+    }
+#endif  // WEBP_USE_MIPS32
+  }
+}
+
+//-----------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/dsp/yuv.h b/src/main/jni/libwebp/dsp/yuv.h
new file mode 100644
index 000000000..8a47edd82
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/yuv.h
@@ -0,0 +1,321 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// inline YUV<->RGB conversion function
+//
+// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
+// More information at: http://en.wikipedia.org/wiki/YCbCr
+// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
+// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
+// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
+// We use 16bit fixed point operations for RGB->YUV conversion (YUV_FIX).
+//
+// For the Y'CbCr to RGB conversion, the BT.601 specification reads:
+//   R = 1.164 * (Y-16) + 1.596 * (V-128)
+//   G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128)
+//   B = 1.164 * (Y-16)                   + 2.018 * (U-128)
+// where Y is in the [16,235] range, and U/V in the [16,240] range.
+// In the table-lookup version (WEBP_YUV_USE_TABLE), the common factor
+// "1.164 * (Y-16)" can be handled as an offset in the VP8kClip[] table.
+// So in this case the formulae should read:
+//   R = 1.164 * [Y + 1.371 * (V-128)                  ] - 18.624
+//   G = 1.164 * [Y - 0.698 * (V-128) - 0.336 * (U-128)] - 18.624
+//   B = 1.164 * [Y                   + 1.733 * (U-128)] - 18.624
+// once factorized.
+// For YUV->RGB conversion, only 14bit fixed precision is used (YUV_FIX2).
+// That's the maximum possible for a convenient ARM implementation.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_YUV_H_
+#define WEBP_DSP_YUV_H_
+
+#include "./dsp.h"
+#include "../dec/decode_vp8.h"
+
+// Define the following to use the LUT-based code:
+// #define WEBP_YUV_USE_TABLE
+
+#if defined(WEBP_EXPERIMENTAL_FEATURES)
+// Do NOT activate this feature for real compression. This is only experimental!
+// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
+// This colorspace is close to Rec.601's Y'CbCr model with the notable
+// difference of allowing larger range for luma/chroma.
+// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
+// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
+// #define USE_YUVj
+#endif
+
+//------------------------------------------------------------------------------
+// YUV -> RGB conversion
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+  YUV_FIX = 16,                    // fixed-point precision for RGB->YUV
+  YUV_HALF = 1 << (YUV_FIX - 1),
+  YUV_MASK = (256 << YUV_FIX) - 1,
+  YUV_RANGE_MIN = -227,            // min value of r/g/b output
+  YUV_RANGE_MAX = 256 + 226,       // max value of r/g/b output
+
+  YUV_FIX2 = 14,                   // fixed-point precision for YUV->RGB
+  YUV_HALF2 = 1 << (YUV_FIX2 - 1),
+  YUV_MASK2 = (256 << YUV_FIX2) - 1
+};
+
+// These constants are 14b fixed-point version of ITU-R BT.601 constants.
+#define kYScale 19077    // 1.164 = 255 / 219
+#define kVToR   26149    // 1.596 = 255 / 112 * 0.701
+#define kUToG   6419     // 0.391 = 255 / 112 * 0.886 * 0.114 / 0.587
+#define kVToG   13320    // 0.813 = 255 / 112 * 0.701 * 0.299 / 0.587
+#define kUToB   33050    // 2.018 = 255 / 112 * 0.886
+#define kRCst (-kYScale * 16 - kVToR * 128 + YUV_HALF2)
+#define kGCst (-kYScale * 16 + kUToG * 128 + kVToG * 128 + YUV_HALF2)
+#define kBCst (-kYScale * 16 - kUToB * 128 + YUV_HALF2)
+
+//------------------------------------------------------------------------------
+
+#if !defined(WEBP_YUV_USE_TABLE)
+
+// slower on x86 by ~7-8%, but bit-exact with the SSE2 version
+
+static WEBP_INLINE int VP8Clip8(int v) {
+  return ((v & ~YUV_MASK2) == 0) ? (v >> YUV_FIX2) : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int VP8YUVToR(int y, int v) {
+  return VP8Clip8(kYScale * y + kVToR * v + kRCst);
+}
+
+static WEBP_INLINE int VP8YUVToG(int y, int u, int v) {
+  return VP8Clip8(kYScale * y - kUToG * u - kVToG * v + kGCst);
+}
+
+static WEBP_INLINE int VP8YUVToB(int y, int u) {
+  return VP8Clip8(kYScale * y + kUToB * u + kBCst);
+}
+
+static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v,
+                                    uint8_t* const rgb) {
+  rgb[0] = VP8YUVToR(y, v);
+  rgb[1] = VP8YUVToG(y, u, v);
+  rgb[2] = VP8YUVToB(y, u);
+}
+
+static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v,
+                                    uint8_t* const bgr) {
+  bgr[0] = VP8YUVToB(y, u);
+  bgr[1] = VP8YUVToG(y, u, v);
+  bgr[2] = VP8YUVToR(y, v);
+}
+
+static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v,
+                                       uint8_t* const rgb) {
+  const int r = VP8YUVToR(y, v);      // 5 usable bits
+  const int g = VP8YUVToG(y, u, v);   // 6 usable bits
+  const int b = VP8YUVToB(y, u);      // 5 usable bits
+  const int rg = (r & 0xf8) | (g >> 5);
+  const int gb = ((g << 3) & 0xe0) | (b >> 3);
+#ifdef WEBP_SWAP_16BIT_CSP
+  rgb[0] = gb;
+  rgb[1] = rg;
+#else
+  rgb[0] = rg;
+  rgb[1] = gb;
+#endif
+}
+
+static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v,
+                                         uint8_t* const argb) {
+  const int r = VP8YUVToR(y, v);        // 4 usable bits
+  const int g = VP8YUVToG(y, u, v);     // 4 usable bits
+  const int b = VP8YUVToB(y, u);        // 4 usable bits
+  const int rg = (r & 0xf0) | (g >> 4);
+  const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
+#ifdef WEBP_SWAP_16BIT_CSP
+  argb[0] = ba;
+  argb[1] = rg;
+#else
+  argb[0] = rg;
+  argb[1] = ba;
+#endif
+}
+
+#else
+
+// Table-based version, not totally equivalent to the SSE2 version.
+// Rounding diff is only +/-1 though.
+
+extern int16_t VP8kVToR[256], VP8kUToB[256];
+extern int32_t VP8kVToG[256], VP8kUToG[256];
+extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
+extern uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
+
+static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v,
+                                    uint8_t* const rgb) {
+  const int r_off = VP8kVToR[v];
+  const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
+  const int b_off = VP8kUToB[u];
+  rgb[0] = VP8kClip[y + r_off - YUV_RANGE_MIN];
+  rgb[1] = VP8kClip[y + g_off - YUV_RANGE_MIN];
+  rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN];
+}
+
+static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v,
+                                    uint8_t* const bgr) {
+  const int r_off = VP8kVToR[v];
+  const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
+  const int b_off = VP8kUToB[u];
+  bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN];
+  bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN];
+  bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN];
+}
+
+static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v,
+                                       uint8_t* const rgb) {
+  const int r_off = VP8kVToR[v];
+  const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
+  const int b_off = VP8kUToB[u];
+  const int rg = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
+                  (VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
+  const int gb = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
+                   (VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
+#ifdef WEBP_SWAP_16BIT_CSP
+  rgb[0] = gb;
+  rgb[1] = rg;
+#else
+  rgb[0] = rg;
+  rgb[1] = gb;
+#endif
+}
+
+static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v,
+                                         uint8_t* const argb) {
+  const int r_off = VP8kVToR[v];
+  const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
+  const int b_off = VP8kUToB[u];
+  const int rg = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
+                   VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
+  const int ba = (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4) | 0x0f;
+#ifdef WEBP_SWAP_16BIT_CSP
+  argb[0] = ba;
+  argb[1] = rg;
+#else
+  argb[0] = rg;
+  argb[1] = ba;
+#endif
+}
+
+#endif  // WEBP_YUV_USE_TABLE
+
+//-----------------------------------------------------------------------------
+// Alpha handling variants
+
+static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const argb) {
+  argb[0] = 0xff;
+  VP8YuvToRgb(y, u, v, argb + 1);
+}
+
+static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const bgra) {
+  VP8YuvToBgr(y, u, v, bgra);
+  bgra[3] = 0xff;
+}
+
+static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const rgba) {
+  VP8YuvToRgb(y, u, v, rgba);
+  rgba[3] = 0xff;
+}
+
+// Must be called before everything, to initialize the tables.
+void VP8YUVInit(void);
+
+//-----------------------------------------------------------------------------
+// SSE2 extra functions (mostly for upsampling_sse2.c)
+
+#if defined(WEBP_USE_SSE2)
+
+// When the following is defined, tables are initialized statically, adding ~12k
+// to the binary size. Otherwise, they are initialized at run-time (small cost).
+#define WEBP_YUV_USE_SSE2_TABLES
+
+#if defined(FANCY_UPSAMPLING)
+// Process 32 pixels and store the result (24b or 32b per pixel) in *dst.
+void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                    uint8_t* dst);
+void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                   uint8_t* dst);
+void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                    uint8_t* dst);
+void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                   uint8_t* dst);
+#endif  // FANCY_UPSAMPLING
+
+// Must be called to initialize tables before using the functions.
+void VP8YUVInitSSE2(void);
+
+#endif    // WEBP_USE_SSE2
+
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+
+// Stub functions that can be called with various rounding values:
+static WEBP_INLINE int VP8ClipUV(int uv, int rounding) {
+  uv = (uv + rounding + (128 << (YUV_FIX + 2))) >> (YUV_FIX + 2);
+  return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255;
+}
+
+#ifndef USE_YUVj
+
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
+  const int luma = 16839 * r + 33059 * g + 6420 * b;
+  return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX;  // no need to clip
+}
+
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
+  const int u = -9719 * r - 19081 * g + 28800 * b;
+  return VP8ClipUV(u, rounding);
+}
+
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
+  const int v = +28800 * r - 24116 * g - 4684 * b;
+  return VP8ClipUV(v, rounding);
+}
+
+#else
+
+// This JPEG-YUV colorspace, only for comparison!
+// These are also 16bit precision coefficients from Rec.601, but with full
+// [0..255] output range.
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
+  const int luma = 19595 * r + 38470 * g + 7471 * b;
+  return (luma + rounding) >> YUV_FIX;  // no need to clip
+}
+
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
+  const int u = -11058 * r - 21710 * g + 32768 * b;
+  return VP8ClipUV(u, rounding);
+}
+
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
+  const int v = 32768 * r - 27439 * g - 5329 * b;
+  return VP8ClipUV(v, rounding);
+}
+
+#endif    // USE_YUVj
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DSP_YUV_H_ */
diff --git a/src/main/jni/libwebp/dsp/yuv_mips32.c b/src/main/jni/libwebp/dsp/yuv_mips32.c
new file mode 100644
index 000000000..c82b4dfdd
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/yuv_mips32.c
@@ -0,0 +1,100 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of YUV to RGB upsampling functions.
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "./yuv.h"
+
+//------------------------------------------------------------------------------
+// simple point-sampling
+
+#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A)                                 \
+static void FUNC_NAME(const uint8_t* y,                                        \
+                      const uint8_t* u, const uint8_t* v,                      \
+                      uint8_t* dst, int len) {                                 \
+  int i, r, g, b;                                                              \
+  int temp0, temp1, temp2, temp3, temp4;                                       \
+  for (i = 0; i < (len >> 1); i++) {                                           \
+    temp1 = kVToR * v[0];                                                      \
+    temp3 = kVToG * v[0];                                                      \
+    temp2 = kUToG * u[0];                                                      \
+    temp4 = kUToB * u[0];                                                      \
+    temp0 = kYScale * y[0];                                                    \
+    temp1 += kRCst;                                                            \
+    temp3 -= kGCst;                                                            \
+    temp2 += temp3;                                                            \
+    temp4 += kBCst;                                                            \
+    r = VP8Clip8(temp0 + temp1);                                               \
+    g = VP8Clip8(temp0 - temp2);                                               \
+    b = VP8Clip8(temp0 + temp4);                                               \
+    temp0 = kYScale * y[1];                                                    \
+    dst[R] = r;                                                                \
+    dst[G] = g;                                                                \
+    dst[B] = b;                                                                \
+    if (A) dst[A] = 0xff;                                                      \
+    r = VP8Clip8(temp0 + temp1);                                               \
+    g = VP8Clip8(temp0 - temp2);                                               \
+    b = VP8Clip8(temp0 + temp4);                                               \
+    dst[R + XSTEP] = r;                                                        \
+    dst[G + XSTEP] = g;                                                        \
+    dst[B + XSTEP] = b;                                                        \
+    if (A) dst[A + XSTEP] = 0xff;                                              \
+    y += 2;                                                                    \
+    ++u;                                                                       \
+    ++v;                                                                       \
+    dst += 2 * XSTEP;                                                          \
+  }                                                                            \
+  if (len & 1) {                                                               \
+    temp1 = kVToR * v[0];                                                      \
+    temp3 = kVToG * v[0];                                                      \
+    temp2 = kUToG * u[0];                                                      \
+    temp4 = kUToB * u[0];                                                      \
+    temp0 = kYScale * y[0];                                                    \
+    temp1 += kRCst;                                                            \
+    temp3 -= kGCst;                                                            \
+    temp2 += temp3;                                                            \
+    temp4 += kBCst;                                                            \
+    r = VP8Clip8(temp0 + temp1);                                               \
+    g = VP8Clip8(temp0 - temp2);                                               \
+    b = VP8Clip8(temp0 + temp4);                                               \
+    dst[R] = r;                                                                \
+    dst[G] = g;                                                                \
+    dst[B] = b;                                                                \
+    if (A) dst[A] = 0xff;                                                      \
+  }                                                                            \
+}
+
+ROW_FUNC(YuvToRgbRow,      3, 0, 1, 2, 0)
+ROW_FUNC(YuvToRgbaRow,     4, 0, 1, 2, 3)
+ROW_FUNC(YuvToBgrRow,      3, 2, 1, 0, 0)
+ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
+
+#undef ROW_FUNC
+
+#endif   // WEBP_USE_MIPS32
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitSamplersMIPS32(void);
+
+void WebPInitSamplersMIPS32(void) {
+#if defined(WEBP_USE_MIPS32)
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+#endif  // WEBP_USE_MIPS32
+}
diff --git a/src/main/jni/libwebp/dsp/yuv_sse2.c b/src/main/jni/libwebp/dsp/yuv_sse2.c
new file mode 100644
index 000000000..6fe0f3b0d
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/yuv_sse2.c
@@ -0,0 +1,322 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./yuv.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <emmintrin.h>
+#include <string.h>   // for memcpy
+
+typedef union {   // handy struct for converting SSE2 registers
+  int32_t i32[4];
+  uint8_t u8[16];
+  __m128i m;
+} VP8kCstSSE2;
+
+#if defined(WEBP_YUV_USE_SSE2_TABLES)
+
+#include "./yuv_tables_sse2.h"
+
+void VP8YUVInitSSE2(void) {}
+
+#else
+
+static int done_sse2 = 0;
+static VP8kCstSSE2 VP8kUtoRGBA[256], VP8kVtoRGBA[256], VP8kYtoRGBA[256];
+
+void VP8YUVInitSSE2(void) {
+  if (!done_sse2) {
+    int i;
+    for (i = 0; i < 256; ++i) {
+      VP8kYtoRGBA[i].i32[0] =
+        VP8kYtoRGBA[i].i32[1] =
+        VP8kYtoRGBA[i].i32[2] = (i - 16) * kYScale + YUV_HALF2;
+      VP8kYtoRGBA[i].i32[3] = 0xff << YUV_FIX2;
+
+      VP8kUtoRGBA[i].i32[0] = 0;
+      VP8kUtoRGBA[i].i32[1] = -kUToG * (i - 128);
+      VP8kUtoRGBA[i].i32[2] =  kUToB * (i - 128);
+      VP8kUtoRGBA[i].i32[3] = 0;
+
+      VP8kVtoRGBA[i].i32[0] =  kVToR * (i - 128);
+      VP8kVtoRGBA[i].i32[1] = -kVToG * (i - 128);
+      VP8kVtoRGBA[i].i32[2] = 0;
+      VP8kVtoRGBA[i].i32[3] = 0;
+    }
+    done_sse2 = 1;
+
+#if 0   // code used to generate 'yuv_tables_sse2.h'
+    printf("static const VP8kCstSSE2 VP8kYtoRGBA[256] = {\n");
+    for (i = 0; i < 256; ++i) {
+      printf("  {{0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x}},\n",
+             VP8kYtoRGBA[i].i32[0], VP8kYtoRGBA[i].i32[1],
+             VP8kYtoRGBA[i].i32[2], VP8kYtoRGBA[i].i32[3]);
+    }
+    printf("};\n\n");
+    printf("static const VP8kCstSSE2 VP8kUtoRGBA[256] = {\n");
+    for (i = 0; i < 256; ++i) {
+      printf("  {{0, 0x%.8x, 0x%.8x, 0}},\n",
+             VP8kUtoRGBA[i].i32[1], VP8kUtoRGBA[i].i32[2]);
+    }
+    printf("};\n\n");
+    printf("static VP8kCstSSE2 VP8kVtoRGBA[256] = {\n");
+    for (i = 0; i < 256; ++i) {
+      printf("  {{0x%.8x, 0x%.8x, 0, 0}},\n",
+             VP8kVtoRGBA[i].i32[0], VP8kVtoRGBA[i].i32[1]);
+    }
+    printf("};\n\n");
+#endif
+  }
+}
+
+#endif  // WEBP_YUV_USE_SSE2_TABLES
+
+//-----------------------------------------------------------------------------
+
+static WEBP_INLINE __m128i LoadUVPart(int u, int v) {
+  const __m128i u_part = _mm_loadu_si128(&VP8kUtoRGBA[u].m);
+  const __m128i v_part = _mm_loadu_si128(&VP8kVtoRGBA[v].m);
+  const __m128i uv_part = _mm_add_epi32(u_part, v_part);
+  return uv_part;
+}
+
+static WEBP_INLINE __m128i GetRGBA32bWithUV(int y, const __m128i uv_part) {
+  const __m128i y_part = _mm_loadu_si128(&VP8kYtoRGBA[y].m);
+  const __m128i rgba1 = _mm_add_epi32(y_part, uv_part);
+  const __m128i rgba2 = _mm_srai_epi32(rgba1, YUV_FIX2);
+  return rgba2;
+}
+
+static WEBP_INLINE __m128i GetRGBA32b(int y, int u, int v) {
+  const __m128i uv_part = LoadUVPart(u, v);
+  return GetRGBA32bWithUV(y, uv_part);
+}
+
+static WEBP_INLINE void YuvToRgbSSE2(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const rgb) {
+  const __m128i tmp0 = GetRGBA32b(y, u, v);
+  const __m128i tmp1 = _mm_packs_epi32(tmp0, tmp0);
+  const __m128i tmp2 = _mm_packus_epi16(tmp1, tmp1);
+  // Note: we store 8 bytes at a time, not 3 bytes! -> memory stomp
+  _mm_storel_epi64((__m128i*)rgb, tmp2);
+}
+
+static WEBP_INLINE void YuvToBgrSSE2(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const bgr) {
+  const __m128i tmp0 = GetRGBA32b(y, u, v);
+  const __m128i tmp1 = _mm_shuffle_epi32(tmp0, _MM_SHUFFLE(3, 0, 1, 2));
+  const __m128i tmp2 = _mm_packs_epi32(tmp1, tmp1);
+  const __m128i tmp3 = _mm_packus_epi16(tmp2, tmp2);
+  // Note: we store 8 bytes at a time, not 3 bytes! -> memory stomp
+  _mm_storel_epi64((__m128i*)bgr, tmp3);
+}
+
+//-----------------------------------------------------------------------------
+// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
+
+#ifdef FANCY_UPSAMPLING
+
+void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                    uint8_t* dst) {
+  int n;
+  for (n = 0; n < 32; n += 4) {
+    const __m128i tmp0_1 = GetRGBA32b(y[n + 0], u[n + 0], v[n + 0]);
+    const __m128i tmp0_2 = GetRGBA32b(y[n + 1], u[n + 1], v[n + 1]);
+    const __m128i tmp0_3 = GetRGBA32b(y[n + 2], u[n + 2], v[n + 2]);
+    const __m128i tmp0_4 = GetRGBA32b(y[n + 3], u[n + 3], v[n + 3]);
+    const __m128i tmp1_1 = _mm_packs_epi32(tmp0_1, tmp0_2);
+    const __m128i tmp1_2 = _mm_packs_epi32(tmp0_3, tmp0_4);
+    const __m128i tmp2 = _mm_packus_epi16(tmp1_1, tmp1_2);
+    _mm_storeu_si128((__m128i*)dst, tmp2);
+    dst += 4 * 4;
+  }
+}
+
+void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                    uint8_t* dst) {
+  int n;
+  for (n = 0; n < 32; n += 2) {
+    const __m128i tmp0_1 = GetRGBA32b(y[n + 0], u[n + 0], v[n + 0]);
+    const __m128i tmp0_2 = GetRGBA32b(y[n + 1], u[n + 1], v[n + 1]);
+    const __m128i tmp1_1 = _mm_shuffle_epi32(tmp0_1, _MM_SHUFFLE(3, 0, 1, 2));
+    const __m128i tmp1_2 = _mm_shuffle_epi32(tmp0_2, _MM_SHUFFLE(3, 0, 1, 2));
+    const __m128i tmp2_1 = _mm_packs_epi32(tmp1_1, tmp1_2);
+    const __m128i tmp3 = _mm_packus_epi16(tmp2_1, tmp2_1);
+    _mm_storel_epi64((__m128i*)dst, tmp3);
+    dst += 4 * 2;
+  }
+}
+
+void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                   uint8_t* dst) {
+  int n;
+  uint8_t tmp0[2 * 3 + 5 + 15];
+  uint8_t* const tmp = (uint8_t*)((uintptr_t)(tmp0 + 15) & ~15);  // align
+  for (n = 0; n < 30; ++n) {   // we directly stomp the *dst memory
+    YuvToRgbSSE2(y[n], u[n], v[n], dst + n * 3);
+  }
+  // Last two pixels are special: we write in a tmp buffer before sending
+  // to dst.
+  YuvToRgbSSE2(y[n + 0], u[n + 0], v[n + 0], tmp + 0);
+  YuvToRgbSSE2(y[n + 1], u[n + 1], v[n + 1], tmp + 3);
+  memcpy(dst + n * 3, tmp, 2 * 3);
+}
+
+void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                   uint8_t* dst) {
+  int n;
+  uint8_t tmp0[2 * 3 + 5 + 15];
+  uint8_t* const tmp = (uint8_t*)((uintptr_t)(tmp0 + 15) & ~15);  // align
+  for (n = 0; n < 30; ++n) {
+    YuvToBgrSSE2(y[n], u[n], v[n], dst + n * 3);
+  }
+  YuvToBgrSSE2(y[n + 0], u[n + 0], v[n + 0], tmp + 0);
+  YuvToBgrSSE2(y[n + 1], u[n + 1], v[n + 1], tmp + 3);
+  memcpy(dst + n * 3, tmp, 2 * 3);
+}
+
+#endif  // FANCY_UPSAMPLING
+
+//-----------------------------------------------------------------------------
+// Arbitrary-length row conversion functions
+
+static void YuvToRgbaRowSSE2(const uint8_t* y,
+                             const uint8_t* u, const uint8_t* v,
+                             uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 4 <= len; n += 4) {
+    const __m128i uv_0 = LoadUVPart(u[0], v[0]);
+    const __m128i uv_1 = LoadUVPart(u[1], v[1]);
+    const __m128i tmp0_1 = GetRGBA32bWithUV(y[0], uv_0);
+    const __m128i tmp0_2 = GetRGBA32bWithUV(y[1], uv_0);
+    const __m128i tmp0_3 = GetRGBA32bWithUV(y[2], uv_1);
+    const __m128i tmp0_4 = GetRGBA32bWithUV(y[3], uv_1);
+    const __m128i tmp1_1 = _mm_packs_epi32(tmp0_1, tmp0_2);
+    const __m128i tmp1_2 = _mm_packs_epi32(tmp0_3, tmp0_4);
+    const __m128i tmp2 = _mm_packus_epi16(tmp1_1, tmp1_2);
+    _mm_storeu_si128((__m128i*)dst, tmp2);
+    dst += 4 * 4;
+    y += 4;
+    u += 2;
+    v += 2;
+  }
+  // Finish off
+  while (n < len) {
+    VP8YuvToRgba(y[0], u[0], v[0], dst);
+    dst += 4;
+    ++y;
+    u += (n & 1);
+    v += (n & 1);
+    ++n;
+  }
+}
+
+static void YuvToBgraRowSSE2(const uint8_t* y,
+                             const uint8_t* u, const uint8_t* v,
+                             uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 2 <= len; n += 2) {
+    const __m128i uv_0 = LoadUVPart(u[0], v[0]);
+    const __m128i tmp0_1 = GetRGBA32bWithUV(y[0], uv_0);
+    const __m128i tmp0_2 = GetRGBA32bWithUV(y[1], uv_0);
+    const __m128i tmp1_1 = _mm_shuffle_epi32(tmp0_1, _MM_SHUFFLE(3, 0, 1, 2));
+    const __m128i tmp1_2 = _mm_shuffle_epi32(tmp0_2, _MM_SHUFFLE(3, 0, 1, 2));
+    const __m128i tmp2_1 = _mm_packs_epi32(tmp1_1, tmp1_2);
+    const __m128i tmp3 = _mm_packus_epi16(tmp2_1, tmp2_1);
+    _mm_storel_epi64((__m128i*)dst, tmp3);
+    dst += 4 * 2;
+    y += 2;
+    ++u;
+    ++v;
+  }
+  // Finish off
+  if (len & 1) {
+    VP8YuvToBgra(y[0], u[0], v[0], dst);
+  }
+}
+
+static void YuvToArgbRowSSE2(const uint8_t* y,
+                             const uint8_t* u, const uint8_t* v,
+                             uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 2 <= len; n += 2) {
+    const __m128i uv_0 = LoadUVPart(u[0], v[0]);
+    const __m128i tmp0_1 = GetRGBA32bWithUV(y[0], uv_0);
+    const __m128i tmp0_2 = GetRGBA32bWithUV(y[1], uv_0);
+    const __m128i tmp1_1 = _mm_shuffle_epi32(tmp0_1, _MM_SHUFFLE(2, 1, 0, 3));
+    const __m128i tmp1_2 = _mm_shuffle_epi32(tmp0_2, _MM_SHUFFLE(2, 1, 0, 3));
+    const __m128i tmp2_1 = _mm_packs_epi32(tmp1_1, tmp1_2);
+    const __m128i tmp3 = _mm_packus_epi16(tmp2_1, tmp2_1);
+    _mm_storel_epi64((__m128i*)dst, tmp3);
+    dst += 4 * 2;
+    y += 2;
+    ++u;
+    ++v;
+  }
+  // Finish off
+  if (len & 1) {
+    VP8YuvToArgb(y[0], u[0], v[0], dst);
+  }
+}
+
+static void YuvToRgbRowSSE2(const uint8_t* y,
+                            const uint8_t* u, const uint8_t* v,
+                            uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 2 < len; ++n) {   // we directly stomp the *dst memory
+    YuvToRgbSSE2(y[0], u[0], v[0], dst);  // stomps 8 bytes
+    dst += 3;
+    ++y;
+    u += (n & 1);
+    v += (n & 1);
+  }
+  VP8YuvToRgb(y[0], u[0], v[0], dst);
+  if (len > 1) {
+    VP8YuvToRgb(y[1], u[n & 1], v[n & 1], dst + 3);
+  }
+}
+
+static void YuvToBgrRowSSE2(const uint8_t* y,
+                            const uint8_t* u, const uint8_t* v,
+                            uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 2 < len; ++n) {   // we directly stomp the *dst memory
+    YuvToBgrSSE2(y[0], u[0], v[0], dst);  // stomps 8 bytes
+    dst += 3;
+    ++y;
+    u += (n & 1);
+    v += (n & 1);
+  }
+  VP8YuvToBgr(y[0], u[0], v[0], dst + 0);
+  if (len > 1) {
+    VP8YuvToBgr(y[1], u[n & 1], v[n & 1], dst + 3);
+  }
+}
+
+#endif  // WEBP_USE_SSE2
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitSamplersSSE2(void);
+
+void WebPInitSamplersSSE2(void) {
+#if defined(WEBP_USE_SSE2)
+  WebPSamplers[MODE_RGB]  = YuvToRgbRowSSE2;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRowSSE2;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRowSSE2;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRowSSE2;
+  WebPSamplers[MODE_ARGB] = YuvToArgbRowSSE2;
+#endif  // WEBP_USE_SSE2
+}
diff --git a/src/main/jni/libwebp/dsp/yuv_tables_sse2.h b/src/main/jni/libwebp/dsp/yuv_tables_sse2.h
new file mode 100644
index 000000000..2b0f05751
--- /dev/null
+++ b/src/main/jni/libwebp/dsp/yuv_tables_sse2.h
@@ -0,0 +1,536 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 tables for YUV->RGB conversion (12kB overall)
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+// This file is not compiled, but #include'd directly from yuv.c
+// Only used if WEBP_YUV_USE_SSE2_TABLES is defined.
+
+static const VP8kCstSSE2 VP8kYtoRGBA[256] = {
+  {{0xfffb77b0, 0xfffb77b0, 0xfffb77b0, 0x003fc000}},
+  {{0xfffbc235, 0xfffbc235, 0xfffbc235, 0x003fc000}},
+  {{0xfffc0cba, 0xfffc0cba, 0xfffc0cba, 0x003fc000}},
+  {{0xfffc573f, 0xfffc573f, 0xfffc573f, 0x003fc000}},
+  {{0xfffca1c4, 0xfffca1c4, 0xfffca1c4, 0x003fc000}},
+  {{0xfffcec49, 0xfffcec49, 0xfffcec49, 0x003fc000}},
+  {{0xfffd36ce, 0xfffd36ce, 0xfffd36ce, 0x003fc000}},
+  {{0xfffd8153, 0xfffd8153, 0xfffd8153, 0x003fc000}},
+  {{0xfffdcbd8, 0xfffdcbd8, 0xfffdcbd8, 0x003fc000}},
+  {{0xfffe165d, 0xfffe165d, 0xfffe165d, 0x003fc000}},
+  {{0xfffe60e2, 0xfffe60e2, 0xfffe60e2, 0x003fc000}},
+  {{0xfffeab67, 0xfffeab67, 0xfffeab67, 0x003fc000}},
+  {{0xfffef5ec, 0xfffef5ec, 0xfffef5ec, 0x003fc000}},
+  {{0xffff4071, 0xffff4071, 0xffff4071, 0x003fc000}},
+  {{0xffff8af6, 0xffff8af6, 0xffff8af6, 0x003fc000}},
+  {{0xffffd57b, 0xffffd57b, 0xffffd57b, 0x003fc000}},
+  {{0x00002000, 0x00002000, 0x00002000, 0x003fc000}},
+  {{0x00006a85, 0x00006a85, 0x00006a85, 0x003fc000}},
+  {{0x0000b50a, 0x0000b50a, 0x0000b50a, 0x003fc000}},
+  {{0x0000ff8f, 0x0000ff8f, 0x0000ff8f, 0x003fc000}},
+  {{0x00014a14, 0x00014a14, 0x00014a14, 0x003fc000}},
+  {{0x00019499, 0x00019499, 0x00019499, 0x003fc000}},
+  {{0x0001df1e, 0x0001df1e, 0x0001df1e, 0x003fc000}},
+  {{0x000229a3, 0x000229a3, 0x000229a3, 0x003fc000}},
+  {{0x00027428, 0x00027428, 0x00027428, 0x003fc000}},
+  {{0x0002bead, 0x0002bead, 0x0002bead, 0x003fc000}},
+  {{0x00030932, 0x00030932, 0x00030932, 0x003fc000}},
+  {{0x000353b7, 0x000353b7, 0x000353b7, 0x003fc000}},
+  {{0x00039e3c, 0x00039e3c, 0x00039e3c, 0x003fc000}},
+  {{0x0003e8c1, 0x0003e8c1, 0x0003e8c1, 0x003fc000}},
+  {{0x00043346, 0x00043346, 0x00043346, 0x003fc000}},
+  {{0x00047dcb, 0x00047dcb, 0x00047dcb, 0x003fc000}},
+  {{0x0004c850, 0x0004c850, 0x0004c850, 0x003fc000}},
+  {{0x000512d5, 0x000512d5, 0x000512d5, 0x003fc000}},
+  {{0x00055d5a, 0x00055d5a, 0x00055d5a, 0x003fc000}},
+  {{0x0005a7df, 0x0005a7df, 0x0005a7df, 0x003fc000}},
+  {{0x0005f264, 0x0005f264, 0x0005f264, 0x003fc000}},
+  {{0x00063ce9, 0x00063ce9, 0x00063ce9, 0x003fc000}},
+  {{0x0006876e, 0x0006876e, 0x0006876e, 0x003fc000}},
+  {{0x0006d1f3, 0x0006d1f3, 0x0006d1f3, 0x003fc000}},
+  {{0x00071c78, 0x00071c78, 0x00071c78, 0x003fc000}},
+  {{0x000766fd, 0x000766fd, 0x000766fd, 0x003fc000}},
+  {{0x0007b182, 0x0007b182, 0x0007b182, 0x003fc000}},
+  {{0x0007fc07, 0x0007fc07, 0x0007fc07, 0x003fc000}},
+  {{0x0008468c, 0x0008468c, 0x0008468c, 0x003fc000}},
+  {{0x00089111, 0x00089111, 0x00089111, 0x003fc000}},
+  {{0x0008db96, 0x0008db96, 0x0008db96, 0x003fc000}},
+  {{0x0009261b, 0x0009261b, 0x0009261b, 0x003fc000}},
+  {{0x000970a0, 0x000970a0, 0x000970a0, 0x003fc000}},
+  {{0x0009bb25, 0x0009bb25, 0x0009bb25, 0x003fc000}},
+  {{0x000a05aa, 0x000a05aa, 0x000a05aa, 0x003fc000}},
+  {{0x000a502f, 0x000a502f, 0x000a502f, 0x003fc000}},
+  {{0x000a9ab4, 0x000a9ab4, 0x000a9ab4, 0x003fc000}},
+  {{0x000ae539, 0x000ae539, 0x000ae539, 0x003fc000}},
+  {{0x000b2fbe, 0x000b2fbe, 0x000b2fbe, 0x003fc000}},
+  {{0x000b7a43, 0x000b7a43, 0x000b7a43, 0x003fc000}},
+  {{0x000bc4c8, 0x000bc4c8, 0x000bc4c8, 0x003fc000}},
+  {{0x000c0f4d, 0x000c0f4d, 0x000c0f4d, 0x003fc000}},
+  {{0x000c59d2, 0x000c59d2, 0x000c59d2, 0x003fc000}},
+  {{0x000ca457, 0x000ca457, 0x000ca457, 0x003fc000}},
+  {{0x000ceedc, 0x000ceedc, 0x000ceedc, 0x003fc000}},
+  {{0x000d3961, 0x000d3961, 0x000d3961, 0x003fc000}},
+  {{0x000d83e6, 0x000d83e6, 0x000d83e6, 0x003fc000}},
+  {{0x000dce6b, 0x000dce6b, 0x000dce6b, 0x003fc000}},
+  {{0x000e18f0, 0x000e18f0, 0x000e18f0, 0x003fc000}},
+  {{0x000e6375, 0x000e6375, 0x000e6375, 0x003fc000}},
+  {{0x000eadfa, 0x000eadfa, 0x000eadfa, 0x003fc000}},
+  {{0x000ef87f, 0x000ef87f, 0x000ef87f, 0x003fc000}},
+  {{0x000f4304, 0x000f4304, 0x000f4304, 0x003fc000}},
+  {{0x000f8d89, 0x000f8d89, 0x000f8d89, 0x003fc000}},
+  {{0x000fd80e, 0x000fd80e, 0x000fd80e, 0x003fc000}},
+  {{0x00102293, 0x00102293, 0x00102293, 0x003fc000}},
+  {{0x00106d18, 0x00106d18, 0x00106d18, 0x003fc000}},
+  {{0x0010b79d, 0x0010b79d, 0x0010b79d, 0x003fc000}},
+  {{0x00110222, 0x00110222, 0x00110222, 0x003fc000}},
+  {{0x00114ca7, 0x00114ca7, 0x00114ca7, 0x003fc000}},
+  {{0x0011972c, 0x0011972c, 0x0011972c, 0x003fc000}},
+  {{0x0011e1b1, 0x0011e1b1, 0x0011e1b1, 0x003fc000}},
+  {{0x00122c36, 0x00122c36, 0x00122c36, 0x003fc000}},
+  {{0x001276bb, 0x001276bb, 0x001276bb, 0x003fc000}},
+  {{0x0012c140, 0x0012c140, 0x0012c140, 0x003fc000}},
+  {{0x00130bc5, 0x00130bc5, 0x00130bc5, 0x003fc000}},
+  {{0x0013564a, 0x0013564a, 0x0013564a, 0x003fc000}},
+  {{0x0013a0cf, 0x0013a0cf, 0x0013a0cf, 0x003fc000}},
+  {{0x0013eb54, 0x0013eb54, 0x0013eb54, 0x003fc000}},
+  {{0x001435d9, 0x001435d9, 0x001435d9, 0x003fc000}},
+  {{0x0014805e, 0x0014805e, 0x0014805e, 0x003fc000}},
+  {{0x0014cae3, 0x0014cae3, 0x0014cae3, 0x003fc000}},
+  {{0x00151568, 0x00151568, 0x00151568, 0x003fc000}},
+  {{0x00155fed, 0x00155fed, 0x00155fed, 0x003fc000}},
+  {{0x0015aa72, 0x0015aa72, 0x0015aa72, 0x003fc000}},
+  {{0x0015f4f7, 0x0015f4f7, 0x0015f4f7, 0x003fc000}},
+  {{0x00163f7c, 0x00163f7c, 0x00163f7c, 0x003fc000}},
+  {{0x00168a01, 0x00168a01, 0x00168a01, 0x003fc000}},
+  {{0x0016d486, 0x0016d486, 0x0016d486, 0x003fc000}},
+  {{0x00171f0b, 0x00171f0b, 0x00171f0b, 0x003fc000}},
+  {{0x00176990, 0x00176990, 0x00176990, 0x003fc000}},
+  {{0x0017b415, 0x0017b415, 0x0017b415, 0x003fc000}},
+  {{0x0017fe9a, 0x0017fe9a, 0x0017fe9a, 0x003fc000}},
+  {{0x0018491f, 0x0018491f, 0x0018491f, 0x003fc000}},
+  {{0x001893a4, 0x001893a4, 0x001893a4, 0x003fc000}},
+  {{0x0018de29, 0x0018de29, 0x0018de29, 0x003fc000}},
+  {{0x001928ae, 0x001928ae, 0x001928ae, 0x003fc000}},
+  {{0x00197333, 0x00197333, 0x00197333, 0x003fc000}},
+  {{0x0019bdb8, 0x0019bdb8, 0x0019bdb8, 0x003fc000}},
+  {{0x001a083d, 0x001a083d, 0x001a083d, 0x003fc000}},
+  {{0x001a52c2, 0x001a52c2, 0x001a52c2, 0x003fc000}},
+  {{0x001a9d47, 0x001a9d47, 0x001a9d47, 0x003fc000}},
+  {{0x001ae7cc, 0x001ae7cc, 0x001ae7cc, 0x003fc000}},
+  {{0x001b3251, 0x001b3251, 0x001b3251, 0x003fc000}},
+  {{0x001b7cd6, 0x001b7cd6, 0x001b7cd6, 0x003fc000}},
+  {{0x001bc75b, 0x001bc75b, 0x001bc75b, 0x003fc000}},
+  {{0x001c11e0, 0x001c11e0, 0x001c11e0, 0x003fc000}},
+  {{0x001c5c65, 0x001c5c65, 0x001c5c65, 0x003fc000}},
+  {{0x001ca6ea, 0x001ca6ea, 0x001ca6ea, 0x003fc000}},
+  {{0x001cf16f, 0x001cf16f, 0x001cf16f, 0x003fc000}},
+  {{0x001d3bf4, 0x001d3bf4, 0x001d3bf4, 0x003fc000}},
+  {{0x001d8679, 0x001d8679, 0x001d8679, 0x003fc000}},
+  {{0x001dd0fe, 0x001dd0fe, 0x001dd0fe, 0x003fc000}},
+  {{0x001e1b83, 0x001e1b83, 0x001e1b83, 0x003fc000}},
+  {{0x001e6608, 0x001e6608, 0x001e6608, 0x003fc000}},
+  {{0x001eb08d, 0x001eb08d, 0x001eb08d, 0x003fc000}},
+  {{0x001efb12, 0x001efb12, 0x001efb12, 0x003fc000}},
+  {{0x001f4597, 0x001f4597, 0x001f4597, 0x003fc000}},
+  {{0x001f901c, 0x001f901c, 0x001f901c, 0x003fc000}},
+  {{0x001fdaa1, 0x001fdaa1, 0x001fdaa1, 0x003fc000}},
+  {{0x00202526, 0x00202526, 0x00202526, 0x003fc000}},
+  {{0x00206fab, 0x00206fab, 0x00206fab, 0x003fc000}},
+  {{0x0020ba30, 0x0020ba30, 0x0020ba30, 0x003fc000}},
+  {{0x002104b5, 0x002104b5, 0x002104b5, 0x003fc000}},
+  {{0x00214f3a, 0x00214f3a, 0x00214f3a, 0x003fc000}},
+  {{0x002199bf, 0x002199bf, 0x002199bf, 0x003fc000}},
+  {{0x0021e444, 0x0021e444, 0x0021e444, 0x003fc000}},
+  {{0x00222ec9, 0x00222ec9, 0x00222ec9, 0x003fc000}},
+  {{0x0022794e, 0x0022794e, 0x0022794e, 0x003fc000}},
+  {{0x0022c3d3, 0x0022c3d3, 0x0022c3d3, 0x003fc000}},
+  {{0x00230e58, 0x00230e58, 0x00230e58, 0x003fc000}},
+  {{0x002358dd, 0x002358dd, 0x002358dd, 0x003fc000}},
+  {{0x0023a362, 0x0023a362, 0x0023a362, 0x003fc000}},
+  {{0x0023ede7, 0x0023ede7, 0x0023ede7, 0x003fc000}},
+  {{0x0024386c, 0x0024386c, 0x0024386c, 0x003fc000}},
+  {{0x002482f1, 0x002482f1, 0x002482f1, 0x003fc000}},
+  {{0x0024cd76, 0x0024cd76, 0x0024cd76, 0x003fc000}},
+  {{0x002517fb, 0x002517fb, 0x002517fb, 0x003fc000}},
+  {{0x00256280, 0x00256280, 0x00256280, 0x003fc000}},
+  {{0x0025ad05, 0x0025ad05, 0x0025ad05, 0x003fc000}},
+  {{0x0025f78a, 0x0025f78a, 0x0025f78a, 0x003fc000}},
+  {{0x0026420f, 0x0026420f, 0x0026420f, 0x003fc000}},
+  {{0x00268c94, 0x00268c94, 0x00268c94, 0x003fc000}},
+  {{0x0026d719, 0x0026d719, 0x0026d719, 0x003fc000}},
+  {{0x0027219e, 0x0027219e, 0x0027219e, 0x003fc000}},
+  {{0x00276c23, 0x00276c23, 0x00276c23, 0x003fc000}},
+  {{0x0027b6a8, 0x0027b6a8, 0x0027b6a8, 0x003fc000}},
+  {{0x0028012d, 0x0028012d, 0x0028012d, 0x003fc000}},
+  {{0x00284bb2, 0x00284bb2, 0x00284bb2, 0x003fc000}},
+  {{0x00289637, 0x00289637, 0x00289637, 0x003fc000}},
+  {{0x0028e0bc, 0x0028e0bc, 0x0028e0bc, 0x003fc000}},
+  {{0x00292b41, 0x00292b41, 0x00292b41, 0x003fc000}},
+  {{0x002975c6, 0x002975c6, 0x002975c6, 0x003fc000}},
+  {{0x0029c04b, 0x0029c04b, 0x0029c04b, 0x003fc000}},
+  {{0x002a0ad0, 0x002a0ad0, 0x002a0ad0, 0x003fc000}},
+  {{0x002a5555, 0x002a5555, 0x002a5555, 0x003fc000}},
+  {{0x002a9fda, 0x002a9fda, 0x002a9fda, 0x003fc000}},
+  {{0x002aea5f, 0x002aea5f, 0x002aea5f, 0x003fc000}},
+  {{0x002b34e4, 0x002b34e4, 0x002b34e4, 0x003fc000}},
+  {{0x002b7f69, 0x002b7f69, 0x002b7f69, 0x003fc000}},
+  {{0x002bc9ee, 0x002bc9ee, 0x002bc9ee, 0x003fc000}},
+  {{0x002c1473, 0x002c1473, 0x002c1473, 0x003fc000}},
+  {{0x002c5ef8, 0x002c5ef8, 0x002c5ef8, 0x003fc000}},
+  {{0x002ca97d, 0x002ca97d, 0x002ca97d, 0x003fc000}},
+  {{0x002cf402, 0x002cf402, 0x002cf402, 0x003fc000}},
+  {{0x002d3e87, 0x002d3e87, 0x002d3e87, 0x003fc000}},
+  {{0x002d890c, 0x002d890c, 0x002d890c, 0x003fc000}},
+  {{0x002dd391, 0x002dd391, 0x002dd391, 0x003fc000}},
+  {{0x002e1e16, 0x002e1e16, 0x002e1e16, 0x003fc000}},
+  {{0x002e689b, 0x002e689b, 0x002e689b, 0x003fc000}},
+  {{0x002eb320, 0x002eb320, 0x002eb320, 0x003fc000}},
+  {{0x002efda5, 0x002efda5, 0x002efda5, 0x003fc000}},
+  {{0x002f482a, 0x002f482a, 0x002f482a, 0x003fc000}},
+  {{0x002f92af, 0x002f92af, 0x002f92af, 0x003fc000}},
+  {{0x002fdd34, 0x002fdd34, 0x002fdd34, 0x003fc000}},
+  {{0x003027b9, 0x003027b9, 0x003027b9, 0x003fc000}},
+  {{0x0030723e, 0x0030723e, 0x0030723e, 0x003fc000}},
+  {{0x0030bcc3, 0x0030bcc3, 0x0030bcc3, 0x003fc000}},
+  {{0x00310748, 0x00310748, 0x00310748, 0x003fc000}},
+  {{0x003151cd, 0x003151cd, 0x003151cd, 0x003fc000}},
+  {{0x00319c52, 0x00319c52, 0x00319c52, 0x003fc000}},
+  {{0x0031e6d7, 0x0031e6d7, 0x0031e6d7, 0x003fc000}},
+  {{0x0032315c, 0x0032315c, 0x0032315c, 0x003fc000}},
+  {{0x00327be1, 0x00327be1, 0x00327be1, 0x003fc000}},
+  {{0x0032c666, 0x0032c666, 0x0032c666, 0x003fc000}},
+  {{0x003310eb, 0x003310eb, 0x003310eb, 0x003fc000}},
+  {{0x00335b70, 0x00335b70, 0x00335b70, 0x003fc000}},
+  {{0x0033a5f5, 0x0033a5f5, 0x0033a5f5, 0x003fc000}},
+  {{0x0033f07a, 0x0033f07a, 0x0033f07a, 0x003fc000}},
+  {{0x00343aff, 0x00343aff, 0x00343aff, 0x003fc000}},
+  {{0x00348584, 0x00348584, 0x00348584, 0x003fc000}},
+  {{0x0034d009, 0x0034d009, 0x0034d009, 0x003fc000}},
+  {{0x00351a8e, 0x00351a8e, 0x00351a8e, 0x003fc000}},
+  {{0x00356513, 0x00356513, 0x00356513, 0x003fc000}},
+  {{0x0035af98, 0x0035af98, 0x0035af98, 0x003fc000}},
+  {{0x0035fa1d, 0x0035fa1d, 0x0035fa1d, 0x003fc000}},
+  {{0x003644a2, 0x003644a2, 0x003644a2, 0x003fc000}},
+  {{0x00368f27, 0x00368f27, 0x00368f27, 0x003fc000}},
+  {{0x0036d9ac, 0x0036d9ac, 0x0036d9ac, 0x003fc000}},
+  {{0x00372431, 0x00372431, 0x00372431, 0x003fc000}},
+  {{0x00376eb6, 0x00376eb6, 0x00376eb6, 0x003fc000}},
+  {{0x0037b93b, 0x0037b93b, 0x0037b93b, 0x003fc000}},
+  {{0x003803c0, 0x003803c0, 0x003803c0, 0x003fc000}},
+  {{0x00384e45, 0x00384e45, 0x00384e45, 0x003fc000}},
+  {{0x003898ca, 0x003898ca, 0x003898ca, 0x003fc000}},
+  {{0x0038e34f, 0x0038e34f, 0x0038e34f, 0x003fc000}},
+  {{0x00392dd4, 0x00392dd4, 0x00392dd4, 0x003fc000}},
+  {{0x00397859, 0x00397859, 0x00397859, 0x003fc000}},
+  {{0x0039c2de, 0x0039c2de, 0x0039c2de, 0x003fc000}},
+  {{0x003a0d63, 0x003a0d63, 0x003a0d63, 0x003fc000}},
+  {{0x003a57e8, 0x003a57e8, 0x003a57e8, 0x003fc000}},
+  {{0x003aa26d, 0x003aa26d, 0x003aa26d, 0x003fc000}},
+  {{0x003aecf2, 0x003aecf2, 0x003aecf2, 0x003fc000}},
+  {{0x003b3777, 0x003b3777, 0x003b3777, 0x003fc000}},
+  {{0x003b81fc, 0x003b81fc, 0x003b81fc, 0x003fc000}},
+  {{0x003bcc81, 0x003bcc81, 0x003bcc81, 0x003fc000}},
+  {{0x003c1706, 0x003c1706, 0x003c1706, 0x003fc000}},
+  {{0x003c618b, 0x003c618b, 0x003c618b, 0x003fc000}},
+  {{0x003cac10, 0x003cac10, 0x003cac10, 0x003fc000}},
+  {{0x003cf695, 0x003cf695, 0x003cf695, 0x003fc000}},
+  {{0x003d411a, 0x003d411a, 0x003d411a, 0x003fc000}},
+  {{0x003d8b9f, 0x003d8b9f, 0x003d8b9f, 0x003fc000}},
+  {{0x003dd624, 0x003dd624, 0x003dd624, 0x003fc000}},
+  {{0x003e20a9, 0x003e20a9, 0x003e20a9, 0x003fc000}},
+  {{0x003e6b2e, 0x003e6b2e, 0x003e6b2e, 0x003fc000}},
+  {{0x003eb5b3, 0x003eb5b3, 0x003eb5b3, 0x003fc000}},
+  {{0x003f0038, 0x003f0038, 0x003f0038, 0x003fc000}},
+  {{0x003f4abd, 0x003f4abd, 0x003f4abd, 0x003fc000}},
+  {{0x003f9542, 0x003f9542, 0x003f9542, 0x003fc000}},
+  {{0x003fdfc7, 0x003fdfc7, 0x003fdfc7, 0x003fc000}},
+  {{0x00402a4c, 0x00402a4c, 0x00402a4c, 0x003fc000}},
+  {{0x004074d1, 0x004074d1, 0x004074d1, 0x003fc000}},
+  {{0x0040bf56, 0x0040bf56, 0x0040bf56, 0x003fc000}},
+  {{0x004109db, 0x004109db, 0x004109db, 0x003fc000}},
+  {{0x00415460, 0x00415460, 0x00415460, 0x003fc000}},
+  {{0x00419ee5, 0x00419ee5, 0x00419ee5, 0x003fc000}},
+  {{0x0041e96a, 0x0041e96a, 0x0041e96a, 0x003fc000}},
+  {{0x004233ef, 0x004233ef, 0x004233ef, 0x003fc000}},
+  {{0x00427e74, 0x00427e74, 0x00427e74, 0x003fc000}},
+  {{0x0042c8f9, 0x0042c8f9, 0x0042c8f9, 0x003fc000}},
+  {{0x0043137e, 0x0043137e, 0x0043137e, 0x003fc000}},
+  {{0x00435e03, 0x00435e03, 0x00435e03, 0x003fc000}},
+  {{0x0043a888, 0x0043a888, 0x0043a888, 0x003fc000}},
+  {{0x0043f30d, 0x0043f30d, 0x0043f30d, 0x003fc000}},
+  {{0x00443d92, 0x00443d92, 0x00443d92, 0x003fc000}},
+  {{0x00448817, 0x00448817, 0x00448817, 0x003fc000}},
+  {{0x0044d29c, 0x0044d29c, 0x0044d29c, 0x003fc000}},
+  {{0x00451d21, 0x00451d21, 0x00451d21, 0x003fc000}},
+  {{0x004567a6, 0x004567a6, 0x004567a6, 0x003fc000}},
+  {{0x0045b22b, 0x0045b22b, 0x0045b22b, 0x003fc000}}
+};
+
+static const VP8kCstSSE2 VP8kUtoRGBA[256] = {
+  {{0, 0x000c8980, 0xffbf7300, 0}}, {{0, 0x000c706d, 0xffbff41a, 0}},
+  {{0, 0x000c575a, 0xffc07534, 0}}, {{0, 0x000c3e47, 0xffc0f64e, 0}},
+  {{0, 0x000c2534, 0xffc17768, 0}}, {{0, 0x000c0c21, 0xffc1f882, 0}},
+  {{0, 0x000bf30e, 0xffc2799c, 0}}, {{0, 0x000bd9fb, 0xffc2fab6, 0}},
+  {{0, 0x000bc0e8, 0xffc37bd0, 0}}, {{0, 0x000ba7d5, 0xffc3fcea, 0}},
+  {{0, 0x000b8ec2, 0xffc47e04, 0}}, {{0, 0x000b75af, 0xffc4ff1e, 0}},
+  {{0, 0x000b5c9c, 0xffc58038, 0}}, {{0, 0x000b4389, 0xffc60152, 0}},
+  {{0, 0x000b2a76, 0xffc6826c, 0}}, {{0, 0x000b1163, 0xffc70386, 0}},
+  {{0, 0x000af850, 0xffc784a0, 0}}, {{0, 0x000adf3d, 0xffc805ba, 0}},
+  {{0, 0x000ac62a, 0xffc886d4, 0}}, {{0, 0x000aad17, 0xffc907ee, 0}},
+  {{0, 0x000a9404, 0xffc98908, 0}}, {{0, 0x000a7af1, 0xffca0a22, 0}},
+  {{0, 0x000a61de, 0xffca8b3c, 0}}, {{0, 0x000a48cb, 0xffcb0c56, 0}},
+  {{0, 0x000a2fb8, 0xffcb8d70, 0}}, {{0, 0x000a16a5, 0xffcc0e8a, 0}},
+  {{0, 0x0009fd92, 0xffcc8fa4, 0}}, {{0, 0x0009e47f, 0xffcd10be, 0}},
+  {{0, 0x0009cb6c, 0xffcd91d8, 0}}, {{0, 0x0009b259, 0xffce12f2, 0}},
+  {{0, 0x00099946, 0xffce940c, 0}}, {{0, 0x00098033, 0xffcf1526, 0}},
+  {{0, 0x00096720, 0xffcf9640, 0}}, {{0, 0x00094e0d, 0xffd0175a, 0}},
+  {{0, 0x000934fa, 0xffd09874, 0}}, {{0, 0x00091be7, 0xffd1198e, 0}},
+  {{0, 0x000902d4, 0xffd19aa8, 0}}, {{0, 0x0008e9c1, 0xffd21bc2, 0}},
+  {{0, 0x0008d0ae, 0xffd29cdc, 0}}, {{0, 0x0008b79b, 0xffd31df6, 0}},
+  {{0, 0x00089e88, 0xffd39f10, 0}}, {{0, 0x00088575, 0xffd4202a, 0}},
+  {{0, 0x00086c62, 0xffd4a144, 0}}, {{0, 0x0008534f, 0xffd5225e, 0}},
+  {{0, 0x00083a3c, 0xffd5a378, 0}}, {{0, 0x00082129, 0xffd62492, 0}},
+  {{0, 0x00080816, 0xffd6a5ac, 0}}, {{0, 0x0007ef03, 0xffd726c6, 0}},
+  {{0, 0x0007d5f0, 0xffd7a7e0, 0}}, {{0, 0x0007bcdd, 0xffd828fa, 0}},
+  {{0, 0x0007a3ca, 0xffd8aa14, 0}}, {{0, 0x00078ab7, 0xffd92b2e, 0}},
+  {{0, 0x000771a4, 0xffd9ac48, 0}}, {{0, 0x00075891, 0xffda2d62, 0}},
+  {{0, 0x00073f7e, 0xffdaae7c, 0}}, {{0, 0x0007266b, 0xffdb2f96, 0}},
+  {{0, 0x00070d58, 0xffdbb0b0, 0}}, {{0, 0x0006f445, 0xffdc31ca, 0}},
+  {{0, 0x0006db32, 0xffdcb2e4, 0}}, {{0, 0x0006c21f, 0xffdd33fe, 0}},
+  {{0, 0x0006a90c, 0xffddb518, 0}}, {{0, 0x00068ff9, 0xffde3632, 0}},
+  {{0, 0x000676e6, 0xffdeb74c, 0}}, {{0, 0x00065dd3, 0xffdf3866, 0}},
+  {{0, 0x000644c0, 0xffdfb980, 0}}, {{0, 0x00062bad, 0xffe03a9a, 0}},
+  {{0, 0x0006129a, 0xffe0bbb4, 0}}, {{0, 0x0005f987, 0xffe13cce, 0}},
+  {{0, 0x0005e074, 0xffe1bde8, 0}}, {{0, 0x0005c761, 0xffe23f02, 0}},
+  {{0, 0x0005ae4e, 0xffe2c01c, 0}}, {{0, 0x0005953b, 0xffe34136, 0}},
+  {{0, 0x00057c28, 0xffe3c250, 0}}, {{0, 0x00056315, 0xffe4436a, 0}},
+  {{0, 0x00054a02, 0xffe4c484, 0}}, {{0, 0x000530ef, 0xffe5459e, 0}},
+  {{0, 0x000517dc, 0xffe5c6b8, 0}}, {{0, 0x0004fec9, 0xffe647d2, 0}},
+  {{0, 0x0004e5b6, 0xffe6c8ec, 0}}, {{0, 0x0004cca3, 0xffe74a06, 0}},
+  {{0, 0x0004b390, 0xffe7cb20, 0}}, {{0, 0x00049a7d, 0xffe84c3a, 0}},
+  {{0, 0x0004816a, 0xffe8cd54, 0}}, {{0, 0x00046857, 0xffe94e6e, 0}},
+  {{0, 0x00044f44, 0xffe9cf88, 0}}, {{0, 0x00043631, 0xffea50a2, 0}},
+  {{0, 0x00041d1e, 0xffead1bc, 0}}, {{0, 0x0004040b, 0xffeb52d6, 0}},
+  {{0, 0x0003eaf8, 0xffebd3f0, 0}}, {{0, 0x0003d1e5, 0xffec550a, 0}},
+  {{0, 0x0003b8d2, 0xffecd624, 0}}, {{0, 0x00039fbf, 0xffed573e, 0}},
+  {{0, 0x000386ac, 0xffedd858, 0}}, {{0, 0x00036d99, 0xffee5972, 0}},
+  {{0, 0x00035486, 0xffeeda8c, 0}}, {{0, 0x00033b73, 0xffef5ba6, 0}},
+  {{0, 0x00032260, 0xffefdcc0, 0}}, {{0, 0x0003094d, 0xfff05dda, 0}},
+  {{0, 0x0002f03a, 0xfff0def4, 0}}, {{0, 0x0002d727, 0xfff1600e, 0}},
+  {{0, 0x0002be14, 0xfff1e128, 0}}, {{0, 0x0002a501, 0xfff26242, 0}},
+  {{0, 0x00028bee, 0xfff2e35c, 0}}, {{0, 0x000272db, 0xfff36476, 0}},
+  {{0, 0x000259c8, 0xfff3e590, 0}}, {{0, 0x000240b5, 0xfff466aa, 0}},
+  {{0, 0x000227a2, 0xfff4e7c4, 0}}, {{0, 0x00020e8f, 0xfff568de, 0}},
+  {{0, 0x0001f57c, 0xfff5e9f8, 0}}, {{0, 0x0001dc69, 0xfff66b12, 0}},
+  {{0, 0x0001c356, 0xfff6ec2c, 0}}, {{0, 0x0001aa43, 0xfff76d46, 0}},
+  {{0, 0x00019130, 0xfff7ee60, 0}}, {{0, 0x0001781d, 0xfff86f7a, 0}},
+  {{0, 0x00015f0a, 0xfff8f094, 0}}, {{0, 0x000145f7, 0xfff971ae, 0}},
+  {{0, 0x00012ce4, 0xfff9f2c8, 0}}, {{0, 0x000113d1, 0xfffa73e2, 0}},
+  {{0, 0x0000fabe, 0xfffaf4fc, 0}}, {{0, 0x0000e1ab, 0xfffb7616, 0}},
+  {{0, 0x0000c898, 0xfffbf730, 0}}, {{0, 0x0000af85, 0xfffc784a, 0}},
+  {{0, 0x00009672, 0xfffcf964, 0}}, {{0, 0x00007d5f, 0xfffd7a7e, 0}},
+  {{0, 0x0000644c, 0xfffdfb98, 0}}, {{0, 0x00004b39, 0xfffe7cb2, 0}},
+  {{0, 0x00003226, 0xfffefdcc, 0}}, {{0, 0x00001913, 0xffff7ee6, 0}},
+  {{0, 0x00000000, 0x00000000, 0}}, {{0, 0xffffe6ed, 0x0000811a, 0}},
+  {{0, 0xffffcdda, 0x00010234, 0}}, {{0, 0xffffb4c7, 0x0001834e, 0}},
+  {{0, 0xffff9bb4, 0x00020468, 0}}, {{0, 0xffff82a1, 0x00028582, 0}},
+  {{0, 0xffff698e, 0x0003069c, 0}}, {{0, 0xffff507b, 0x000387b6, 0}},
+  {{0, 0xffff3768, 0x000408d0, 0}}, {{0, 0xffff1e55, 0x000489ea, 0}},
+  {{0, 0xffff0542, 0x00050b04, 0}}, {{0, 0xfffeec2f, 0x00058c1e, 0}},
+  {{0, 0xfffed31c, 0x00060d38, 0}}, {{0, 0xfffeba09, 0x00068e52, 0}},
+  {{0, 0xfffea0f6, 0x00070f6c, 0}}, {{0, 0xfffe87e3, 0x00079086, 0}},
+  {{0, 0xfffe6ed0, 0x000811a0, 0}}, {{0, 0xfffe55bd, 0x000892ba, 0}},
+  {{0, 0xfffe3caa, 0x000913d4, 0}}, {{0, 0xfffe2397, 0x000994ee, 0}},
+  {{0, 0xfffe0a84, 0x000a1608, 0}}, {{0, 0xfffdf171, 0x000a9722, 0}},
+  {{0, 0xfffdd85e, 0x000b183c, 0}}, {{0, 0xfffdbf4b, 0x000b9956, 0}},
+  {{0, 0xfffda638, 0x000c1a70, 0}}, {{0, 0xfffd8d25, 0x000c9b8a, 0}},
+  {{0, 0xfffd7412, 0x000d1ca4, 0}}, {{0, 0xfffd5aff, 0x000d9dbe, 0}},
+  {{0, 0xfffd41ec, 0x000e1ed8, 0}}, {{0, 0xfffd28d9, 0x000e9ff2, 0}},
+  {{0, 0xfffd0fc6, 0x000f210c, 0}}, {{0, 0xfffcf6b3, 0x000fa226, 0}},
+  {{0, 0xfffcdda0, 0x00102340, 0}}, {{0, 0xfffcc48d, 0x0010a45a, 0}},
+  {{0, 0xfffcab7a, 0x00112574, 0}}, {{0, 0xfffc9267, 0x0011a68e, 0}},
+  {{0, 0xfffc7954, 0x001227a8, 0}}, {{0, 0xfffc6041, 0x0012a8c2, 0}},
+  {{0, 0xfffc472e, 0x001329dc, 0}}, {{0, 0xfffc2e1b, 0x0013aaf6, 0}},
+  {{0, 0xfffc1508, 0x00142c10, 0}}, {{0, 0xfffbfbf5, 0x0014ad2a, 0}},
+  {{0, 0xfffbe2e2, 0x00152e44, 0}}, {{0, 0xfffbc9cf, 0x0015af5e, 0}},
+  {{0, 0xfffbb0bc, 0x00163078, 0}}, {{0, 0xfffb97a9, 0x0016b192, 0}},
+  {{0, 0xfffb7e96, 0x001732ac, 0}}, {{0, 0xfffb6583, 0x0017b3c6, 0}},
+  {{0, 0xfffb4c70, 0x001834e0, 0}}, {{0, 0xfffb335d, 0x0018b5fa, 0}},
+  {{0, 0xfffb1a4a, 0x00193714, 0}}, {{0, 0xfffb0137, 0x0019b82e, 0}},
+  {{0, 0xfffae824, 0x001a3948, 0}}, {{0, 0xfffacf11, 0x001aba62, 0}},
+  {{0, 0xfffab5fe, 0x001b3b7c, 0}}, {{0, 0xfffa9ceb, 0x001bbc96, 0}},
+  {{0, 0xfffa83d8, 0x001c3db0, 0}}, {{0, 0xfffa6ac5, 0x001cbeca, 0}},
+  {{0, 0xfffa51b2, 0x001d3fe4, 0}}, {{0, 0xfffa389f, 0x001dc0fe, 0}},
+  {{0, 0xfffa1f8c, 0x001e4218, 0}}, {{0, 0xfffa0679, 0x001ec332, 0}},
+  {{0, 0xfff9ed66, 0x001f444c, 0}}, {{0, 0xfff9d453, 0x001fc566, 0}},
+  {{0, 0xfff9bb40, 0x00204680, 0}}, {{0, 0xfff9a22d, 0x0020c79a, 0}},
+  {{0, 0xfff9891a, 0x002148b4, 0}}, {{0, 0xfff97007, 0x0021c9ce, 0}},
+  {{0, 0xfff956f4, 0x00224ae8, 0}}, {{0, 0xfff93de1, 0x0022cc02, 0}},
+  {{0, 0xfff924ce, 0x00234d1c, 0}}, {{0, 0xfff90bbb, 0x0023ce36, 0}},
+  {{0, 0xfff8f2a8, 0x00244f50, 0}}, {{0, 0xfff8d995, 0x0024d06a, 0}},
+  {{0, 0xfff8c082, 0x00255184, 0}}, {{0, 0xfff8a76f, 0x0025d29e, 0}},
+  {{0, 0xfff88e5c, 0x002653b8, 0}}, {{0, 0xfff87549, 0x0026d4d2, 0}},
+  {{0, 0xfff85c36, 0x002755ec, 0}}, {{0, 0xfff84323, 0x0027d706, 0}},
+  {{0, 0xfff82a10, 0x00285820, 0}}, {{0, 0xfff810fd, 0x0028d93a, 0}},
+  {{0, 0xfff7f7ea, 0x00295a54, 0}}, {{0, 0xfff7ded7, 0x0029db6e, 0}},
+  {{0, 0xfff7c5c4, 0x002a5c88, 0}}, {{0, 0xfff7acb1, 0x002adda2, 0}},
+  {{0, 0xfff7939e, 0x002b5ebc, 0}}, {{0, 0xfff77a8b, 0x002bdfd6, 0}},
+  {{0, 0xfff76178, 0x002c60f0, 0}}, {{0, 0xfff74865, 0x002ce20a, 0}},
+  {{0, 0xfff72f52, 0x002d6324, 0}}, {{0, 0xfff7163f, 0x002de43e, 0}},
+  {{0, 0xfff6fd2c, 0x002e6558, 0}}, {{0, 0xfff6e419, 0x002ee672, 0}},
+  {{0, 0xfff6cb06, 0x002f678c, 0}}, {{0, 0xfff6b1f3, 0x002fe8a6, 0}},
+  {{0, 0xfff698e0, 0x003069c0, 0}}, {{0, 0xfff67fcd, 0x0030eada, 0}},
+  {{0, 0xfff666ba, 0x00316bf4, 0}}, {{0, 0xfff64da7, 0x0031ed0e, 0}},
+  {{0, 0xfff63494, 0x00326e28, 0}}, {{0, 0xfff61b81, 0x0032ef42, 0}},
+  {{0, 0xfff6026e, 0x0033705c, 0}}, {{0, 0xfff5e95b, 0x0033f176, 0}},
+  {{0, 0xfff5d048, 0x00347290, 0}}, {{0, 0xfff5b735, 0x0034f3aa, 0}},
+  {{0, 0xfff59e22, 0x003574c4, 0}}, {{0, 0xfff5850f, 0x0035f5de, 0}},
+  {{0, 0xfff56bfc, 0x003676f8, 0}}, {{0, 0xfff552e9, 0x0036f812, 0}},
+  {{0, 0xfff539d6, 0x0037792c, 0}}, {{0, 0xfff520c3, 0x0037fa46, 0}},
+  {{0, 0xfff507b0, 0x00387b60, 0}}, {{0, 0xfff4ee9d, 0x0038fc7a, 0}},
+  {{0, 0xfff4d58a, 0x00397d94, 0}}, {{0, 0xfff4bc77, 0x0039feae, 0}},
+  {{0, 0xfff4a364, 0x003a7fc8, 0}}, {{0, 0xfff48a51, 0x003b00e2, 0}},
+  {{0, 0xfff4713e, 0x003b81fc, 0}}, {{0, 0xfff4582b, 0x003c0316, 0}},
+  {{0, 0xfff43f18, 0x003c8430, 0}}, {{0, 0xfff42605, 0x003d054a, 0}},
+  {{0, 0xfff40cf2, 0x003d8664, 0}}, {{0, 0xfff3f3df, 0x003e077e, 0}},
+  {{0, 0xfff3dacc, 0x003e8898, 0}}, {{0, 0xfff3c1b9, 0x003f09b2, 0}},
+  {{0, 0xfff3a8a6, 0x003f8acc, 0}}, {{0, 0xfff38f93, 0x00400be6, 0}}
+};
+
+static VP8kCstSSE2 VP8kVtoRGBA[256] = {
+  {{0xffcced80, 0x001a0400, 0, 0}}, {{0xffcd53a5, 0x0019cff8, 0, 0}},
+  {{0xffcdb9ca, 0x00199bf0, 0, 0}}, {{0xffce1fef, 0x001967e8, 0, 0}},
+  {{0xffce8614, 0x001933e0, 0, 0}}, {{0xffceec39, 0x0018ffd8, 0, 0}},
+  {{0xffcf525e, 0x0018cbd0, 0, 0}}, {{0xffcfb883, 0x001897c8, 0, 0}},
+  {{0xffd01ea8, 0x001863c0, 0, 0}}, {{0xffd084cd, 0x00182fb8, 0, 0}},
+  {{0xffd0eaf2, 0x0017fbb0, 0, 0}}, {{0xffd15117, 0x0017c7a8, 0, 0}},
+  {{0xffd1b73c, 0x001793a0, 0, 0}}, {{0xffd21d61, 0x00175f98, 0, 0}},
+  {{0xffd28386, 0x00172b90, 0, 0}}, {{0xffd2e9ab, 0x0016f788, 0, 0}},
+  {{0xffd34fd0, 0x0016c380, 0, 0}}, {{0xffd3b5f5, 0x00168f78, 0, 0}},
+  {{0xffd41c1a, 0x00165b70, 0, 0}}, {{0xffd4823f, 0x00162768, 0, 0}},
+  {{0xffd4e864, 0x0015f360, 0, 0}}, {{0xffd54e89, 0x0015bf58, 0, 0}},
+  {{0xffd5b4ae, 0x00158b50, 0, 0}}, {{0xffd61ad3, 0x00155748, 0, 0}},
+  {{0xffd680f8, 0x00152340, 0, 0}}, {{0xffd6e71d, 0x0014ef38, 0, 0}},
+  {{0xffd74d42, 0x0014bb30, 0, 0}}, {{0xffd7b367, 0x00148728, 0, 0}},
+  {{0xffd8198c, 0x00145320, 0, 0}}, {{0xffd87fb1, 0x00141f18, 0, 0}},
+  {{0xffd8e5d6, 0x0013eb10, 0, 0}}, {{0xffd94bfb, 0x0013b708, 0, 0}},
+  {{0xffd9b220, 0x00138300, 0, 0}}, {{0xffda1845, 0x00134ef8, 0, 0}},
+  {{0xffda7e6a, 0x00131af0, 0, 0}}, {{0xffdae48f, 0x0012e6e8, 0, 0}},
+  {{0xffdb4ab4, 0x0012b2e0, 0, 0}}, {{0xffdbb0d9, 0x00127ed8, 0, 0}},
+  {{0xffdc16fe, 0x00124ad0, 0, 0}}, {{0xffdc7d23, 0x001216c8, 0, 0}},
+  {{0xffdce348, 0x0011e2c0, 0, 0}}, {{0xffdd496d, 0x0011aeb8, 0, 0}},
+  {{0xffddaf92, 0x00117ab0, 0, 0}}, {{0xffde15b7, 0x001146a8, 0, 0}},
+  {{0xffde7bdc, 0x001112a0, 0, 0}}, {{0xffdee201, 0x0010de98, 0, 0}},
+  {{0xffdf4826, 0x0010aa90, 0, 0}}, {{0xffdfae4b, 0x00107688, 0, 0}},
+  {{0xffe01470, 0x00104280, 0, 0}}, {{0xffe07a95, 0x00100e78, 0, 0}},
+  {{0xffe0e0ba, 0x000fda70, 0, 0}}, {{0xffe146df, 0x000fa668, 0, 0}},
+  {{0xffe1ad04, 0x000f7260, 0, 0}}, {{0xffe21329, 0x000f3e58, 0, 0}},
+  {{0xffe2794e, 0x000f0a50, 0, 0}}, {{0xffe2df73, 0x000ed648, 0, 0}},
+  {{0xffe34598, 0x000ea240, 0, 0}}, {{0xffe3abbd, 0x000e6e38, 0, 0}},
+  {{0xffe411e2, 0x000e3a30, 0, 0}}, {{0xffe47807, 0x000e0628, 0, 0}},
+  {{0xffe4de2c, 0x000dd220, 0, 0}}, {{0xffe54451, 0x000d9e18, 0, 0}},
+  {{0xffe5aa76, 0x000d6a10, 0, 0}}, {{0xffe6109b, 0x000d3608, 0, 0}},
+  {{0xffe676c0, 0x000d0200, 0, 0}}, {{0xffe6dce5, 0x000ccdf8, 0, 0}},
+  {{0xffe7430a, 0x000c99f0, 0, 0}}, {{0xffe7a92f, 0x000c65e8, 0, 0}},
+  {{0xffe80f54, 0x000c31e0, 0, 0}}, {{0xffe87579, 0x000bfdd8, 0, 0}},
+  {{0xffe8db9e, 0x000bc9d0, 0, 0}}, {{0xffe941c3, 0x000b95c8, 0, 0}},
+  {{0xffe9a7e8, 0x000b61c0, 0, 0}}, {{0xffea0e0d, 0x000b2db8, 0, 0}},
+  {{0xffea7432, 0x000af9b0, 0, 0}}, {{0xffeada57, 0x000ac5a8, 0, 0}},
+  {{0xffeb407c, 0x000a91a0, 0, 0}}, {{0xffeba6a1, 0x000a5d98, 0, 0}},
+  {{0xffec0cc6, 0x000a2990, 0, 0}}, {{0xffec72eb, 0x0009f588, 0, 0}},
+  {{0xffecd910, 0x0009c180, 0, 0}}, {{0xffed3f35, 0x00098d78, 0, 0}},
+  {{0xffeda55a, 0x00095970, 0, 0}}, {{0xffee0b7f, 0x00092568, 0, 0}},
+  {{0xffee71a4, 0x0008f160, 0, 0}}, {{0xffeed7c9, 0x0008bd58, 0, 0}},
+  {{0xffef3dee, 0x00088950, 0, 0}}, {{0xffefa413, 0x00085548, 0, 0}},
+  {{0xfff00a38, 0x00082140, 0, 0}}, {{0xfff0705d, 0x0007ed38, 0, 0}},
+  {{0xfff0d682, 0x0007b930, 0, 0}}, {{0xfff13ca7, 0x00078528, 0, 0}},
+  {{0xfff1a2cc, 0x00075120, 0, 0}}, {{0xfff208f1, 0x00071d18, 0, 0}},
+  {{0xfff26f16, 0x0006e910, 0, 0}}, {{0xfff2d53b, 0x0006b508, 0, 0}},
+  {{0xfff33b60, 0x00068100, 0, 0}}, {{0xfff3a185, 0x00064cf8, 0, 0}},
+  {{0xfff407aa, 0x000618f0, 0, 0}}, {{0xfff46dcf, 0x0005e4e8, 0, 0}},
+  {{0xfff4d3f4, 0x0005b0e0, 0, 0}}, {{0xfff53a19, 0x00057cd8, 0, 0}},
+  {{0xfff5a03e, 0x000548d0, 0, 0}}, {{0xfff60663, 0x000514c8, 0, 0}},
+  {{0xfff66c88, 0x0004e0c0, 0, 0}}, {{0xfff6d2ad, 0x0004acb8, 0, 0}},
+  {{0xfff738d2, 0x000478b0, 0, 0}}, {{0xfff79ef7, 0x000444a8, 0, 0}},
+  {{0xfff8051c, 0x000410a0, 0, 0}}, {{0xfff86b41, 0x0003dc98, 0, 0}},
+  {{0xfff8d166, 0x0003a890, 0, 0}}, {{0xfff9378b, 0x00037488, 0, 0}},
+  {{0xfff99db0, 0x00034080, 0, 0}}, {{0xfffa03d5, 0x00030c78, 0, 0}},
+  {{0xfffa69fa, 0x0002d870, 0, 0}}, {{0xfffad01f, 0x0002a468, 0, 0}},
+  {{0xfffb3644, 0x00027060, 0, 0}}, {{0xfffb9c69, 0x00023c58, 0, 0}},
+  {{0xfffc028e, 0x00020850, 0, 0}}, {{0xfffc68b3, 0x0001d448, 0, 0}},
+  {{0xfffcced8, 0x0001a040, 0, 0}}, {{0xfffd34fd, 0x00016c38, 0, 0}},
+  {{0xfffd9b22, 0x00013830, 0, 0}}, {{0xfffe0147, 0x00010428, 0, 0}},
+  {{0xfffe676c, 0x0000d020, 0, 0}}, {{0xfffecd91, 0x00009c18, 0, 0}},
+  {{0xffff33b6, 0x00006810, 0, 0}}, {{0xffff99db, 0x00003408, 0, 0}},
+  {{0x00000000, 0x00000000, 0, 0}}, {{0x00006625, 0xffffcbf8, 0, 0}},
+  {{0x0000cc4a, 0xffff97f0, 0, 0}}, {{0x0001326f, 0xffff63e8, 0, 0}},
+  {{0x00019894, 0xffff2fe0, 0, 0}}, {{0x0001feb9, 0xfffefbd8, 0, 0}},
+  {{0x000264de, 0xfffec7d0, 0, 0}}, {{0x0002cb03, 0xfffe93c8, 0, 0}},
+  {{0x00033128, 0xfffe5fc0, 0, 0}}, {{0x0003974d, 0xfffe2bb8, 0, 0}},
+  {{0x0003fd72, 0xfffdf7b0, 0, 0}}, {{0x00046397, 0xfffdc3a8, 0, 0}},
+  {{0x0004c9bc, 0xfffd8fa0, 0, 0}}, {{0x00052fe1, 0xfffd5b98, 0, 0}},
+  {{0x00059606, 0xfffd2790, 0, 0}}, {{0x0005fc2b, 0xfffcf388, 0, 0}},
+  {{0x00066250, 0xfffcbf80, 0, 0}}, {{0x0006c875, 0xfffc8b78, 0, 0}},
+  {{0x00072e9a, 0xfffc5770, 0, 0}}, {{0x000794bf, 0xfffc2368, 0, 0}},
+  {{0x0007fae4, 0xfffbef60, 0, 0}}, {{0x00086109, 0xfffbbb58, 0, 0}},
+  {{0x0008c72e, 0xfffb8750, 0, 0}}, {{0x00092d53, 0xfffb5348, 0, 0}},
+  {{0x00099378, 0xfffb1f40, 0, 0}}, {{0x0009f99d, 0xfffaeb38, 0, 0}},
+  {{0x000a5fc2, 0xfffab730, 0, 0}}, {{0x000ac5e7, 0xfffa8328, 0, 0}},
+  {{0x000b2c0c, 0xfffa4f20, 0, 0}}, {{0x000b9231, 0xfffa1b18, 0, 0}},
+  {{0x000bf856, 0xfff9e710, 0, 0}}, {{0x000c5e7b, 0xfff9b308, 0, 0}},
+  {{0x000cc4a0, 0xfff97f00, 0, 0}}, {{0x000d2ac5, 0xfff94af8, 0, 0}},
+  {{0x000d90ea, 0xfff916f0, 0, 0}}, {{0x000df70f, 0xfff8e2e8, 0, 0}},
+  {{0x000e5d34, 0xfff8aee0, 0, 0}}, {{0x000ec359, 0xfff87ad8, 0, 0}},
+  {{0x000f297e, 0xfff846d0, 0, 0}}, {{0x000f8fa3, 0xfff812c8, 0, 0}},
+  {{0x000ff5c8, 0xfff7dec0, 0, 0}}, {{0x00105bed, 0xfff7aab8, 0, 0}},
+  {{0x0010c212, 0xfff776b0, 0, 0}}, {{0x00112837, 0xfff742a8, 0, 0}},
+  {{0x00118e5c, 0xfff70ea0, 0, 0}}, {{0x0011f481, 0xfff6da98, 0, 0}},
+  {{0x00125aa6, 0xfff6a690, 0, 0}}, {{0x0012c0cb, 0xfff67288, 0, 0}},
+  {{0x001326f0, 0xfff63e80, 0, 0}}, {{0x00138d15, 0xfff60a78, 0, 0}},
+  {{0x0013f33a, 0xfff5d670, 0, 0}}, {{0x0014595f, 0xfff5a268, 0, 0}},
+  {{0x0014bf84, 0xfff56e60, 0, 0}}, {{0x001525a9, 0xfff53a58, 0, 0}},
+  {{0x00158bce, 0xfff50650, 0, 0}}, {{0x0015f1f3, 0xfff4d248, 0, 0}},
+  {{0x00165818, 0xfff49e40, 0, 0}}, {{0x0016be3d, 0xfff46a38, 0, 0}},
+  {{0x00172462, 0xfff43630, 0, 0}}, {{0x00178a87, 0xfff40228, 0, 0}},
+  {{0x0017f0ac, 0xfff3ce20, 0, 0}}, {{0x001856d1, 0xfff39a18, 0, 0}},
+  {{0x0018bcf6, 0xfff36610, 0, 0}}, {{0x0019231b, 0xfff33208, 0, 0}},
+  {{0x00198940, 0xfff2fe00, 0, 0}}, {{0x0019ef65, 0xfff2c9f8, 0, 0}},
+  {{0x001a558a, 0xfff295f0, 0, 0}}, {{0x001abbaf, 0xfff261e8, 0, 0}},
+  {{0x001b21d4, 0xfff22de0, 0, 0}}, {{0x001b87f9, 0xfff1f9d8, 0, 0}},
+  {{0x001bee1e, 0xfff1c5d0, 0, 0}}, {{0x001c5443, 0xfff191c8, 0, 0}},
+  {{0x001cba68, 0xfff15dc0, 0, 0}}, {{0x001d208d, 0xfff129b8, 0, 0}},
+  {{0x001d86b2, 0xfff0f5b0, 0, 0}}, {{0x001decd7, 0xfff0c1a8, 0, 0}},
+  {{0x001e52fc, 0xfff08da0, 0, 0}}, {{0x001eb921, 0xfff05998, 0, 0}},
+  {{0x001f1f46, 0xfff02590, 0, 0}}, {{0x001f856b, 0xffeff188, 0, 0}},
+  {{0x001feb90, 0xffefbd80, 0, 0}}, {{0x002051b5, 0xffef8978, 0, 0}},
+  {{0x0020b7da, 0xffef5570, 0, 0}}, {{0x00211dff, 0xffef2168, 0, 0}},
+  {{0x00218424, 0xffeeed60, 0, 0}}, {{0x0021ea49, 0xffeeb958, 0, 0}},
+  {{0x0022506e, 0xffee8550, 0, 0}}, {{0x0022b693, 0xffee5148, 0, 0}},
+  {{0x00231cb8, 0xffee1d40, 0, 0}}, {{0x002382dd, 0xffede938, 0, 0}},
+  {{0x0023e902, 0xffedb530, 0, 0}}, {{0x00244f27, 0xffed8128, 0, 0}},
+  {{0x0024b54c, 0xffed4d20, 0, 0}}, {{0x00251b71, 0xffed1918, 0, 0}},
+  {{0x00258196, 0xffece510, 0, 0}}, {{0x0025e7bb, 0xffecb108, 0, 0}},
+  {{0x00264de0, 0xffec7d00, 0, 0}}, {{0x0026b405, 0xffec48f8, 0, 0}},
+  {{0x00271a2a, 0xffec14f0, 0, 0}}, {{0x0027804f, 0xffebe0e8, 0, 0}},
+  {{0x0027e674, 0xffebace0, 0, 0}}, {{0x00284c99, 0xffeb78d8, 0, 0}},
+  {{0x0028b2be, 0xffeb44d0, 0, 0}}, {{0x002918e3, 0xffeb10c8, 0, 0}},
+  {{0x00297f08, 0xffeadcc0, 0, 0}}, {{0x0029e52d, 0xffeaa8b8, 0, 0}},
+  {{0x002a4b52, 0xffea74b0, 0, 0}}, {{0x002ab177, 0xffea40a8, 0, 0}},
+  {{0x002b179c, 0xffea0ca0, 0, 0}}, {{0x002b7dc1, 0xffe9d898, 0, 0}},
+  {{0x002be3e6, 0xffe9a490, 0, 0}}, {{0x002c4a0b, 0xffe97088, 0, 0}},
+  {{0x002cb030, 0xffe93c80, 0, 0}}, {{0x002d1655, 0xffe90878, 0, 0}},
+  {{0x002d7c7a, 0xffe8d470, 0, 0}}, {{0x002de29f, 0xffe8a068, 0, 0}},
+  {{0x002e48c4, 0xffe86c60, 0, 0}}, {{0x002eaee9, 0xffe83858, 0, 0}},
+  {{0x002f150e, 0xffe80450, 0, 0}}, {{0x002f7b33, 0xffe7d048, 0, 0}},
+  {{0x002fe158, 0xffe79c40, 0, 0}}, {{0x0030477d, 0xffe76838, 0, 0}},
+  {{0x0030ada2, 0xffe73430, 0, 0}}, {{0x003113c7, 0xffe70028, 0, 0}},
+  {{0x003179ec, 0xffe6cc20, 0, 0}}, {{0x0031e011, 0xffe69818, 0, 0}},
+  {{0x00324636, 0xffe66410, 0, 0}}, {{0x0032ac5b, 0xffe63008, 0, 0}}
+};
diff --git a/src/main/jni/libwebp/enc/alpha.c b/src/main/jni/libwebp/enc/alpha.c
new file mode 100644
index 000000000..79cb94dbd
--- /dev/null
+++ b/src/main/jni/libwebp/enc/alpha.c
@@ -0,0 +1,433 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha-plane compression.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../utils/filters.h"
+#include "../utils/quant_levels.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+// -----------------------------------------------------------------------------
+// Encodes the given alpha data via specified compression method 'method'.
+// The pre-processing (quantization) is performed if 'quality' is less than 100.
+// For such cases, the encoding is lossy. The valid range is [0, 100] for
+// 'quality' and [0, 1] for 'method':
+//   'method = 0' - No compression;
+//   'method = 1' - Use lossless coder on the alpha plane only
+// 'filter' values [0, 4] correspond to prediction modes none, horizontal,
+// vertical & gradient filters. The prediction mode 4 will try all the
+// prediction modes 0 to 3 and pick the best one.
+// 'effort_level': specifies how much effort must be spent to try and reduce
+//  the compressed output size. In range 0 (quick) to 6 (slow).
+//
+// 'output' corresponds to the buffer containing compressed alpha data.
+//          This buffer is allocated by this method and caller should call
+//          WebPSafeFree(*output) when done.
+// 'output_size' corresponds to size of this compressed alpha buffer.
+//
+// Returns 1 on successfully encoding the alpha and
+//         0 if either:
+//           invalid quality or method, or
+//           memory allocation for the compressed data fails.
+
+#include "../enc/vp8li.h"
+
+static int EncodeLossless(const uint8_t* const data, int width, int height,
+                          int effort_level,  // in [0..6] range
+                          VP8LBitWriter* const bw,
+                          WebPAuxStats* const stats) {
+  int ok = 0;
+  WebPConfig config;
+  WebPPicture picture;
+
+  WebPPictureInit(&picture);
+  picture.width = width;
+  picture.height = height;
+  picture.use_argb = 1;
+  picture.stats = stats;
+  if (!WebPPictureAlloc(&picture)) return 0;
+
+  // Transfer the alpha values to the green channel.
+  {
+    int i, j;
+    uint32_t* dst = picture.argb;
+    const uint8_t* src = data;
+    for (j = 0; j < picture.height; ++j) {
+      for (i = 0; i < picture.width; ++i) {
+        dst[i] = src[i] << 8;  // we leave A/R/B channels zero'd.
+      }
+      src += width;
+      dst += picture.argb_stride;
+    }
+  }
+
+  WebPConfigInit(&config);
+  config.lossless = 1;
+  config.method = effort_level;  // impact is very small
+  // Set a low default quality for encoding alpha. Ensure that Alpha quality at
+  // lower methods (3 and below) is less than the threshold for triggering
+  // costly 'BackwardReferencesTraceBackwards'.
+  config.quality = 8.f * effort_level;
+  assert(config.quality >= 0 && config.quality <= 100.f);
+
+  ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK);
+  WebPPictureFree(&picture);
+  ok = ok && !bw->error_;
+  if (!ok) {
+    VP8LBitWriterDestroy(bw);
+    return 0;
+  }
+  return 1;
+
+}
+
+// -----------------------------------------------------------------------------
+
+// Small struct to hold the result of a filter mode compression attempt.
+typedef struct {
+  size_t score;
+  VP8BitWriter bw;
+  WebPAuxStats stats;
+} FilterTrial;
+
+// This function always returns an initialized 'bw' object, even upon error.
+static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
+                               int method, int filter, int reduce_levels,
+                               int effort_level,  // in [0..6] range
+                               uint8_t* const tmp_alpha,
+                               FilterTrial* result) {
+  int ok = 0;
+  const uint8_t* alpha_src;
+  WebPFilterFunc filter_func;
+  uint8_t header;
+  const size_t data_size = width * height;
+  const uint8_t* output = NULL;
+  size_t output_size = 0;
+  VP8LBitWriter tmp_bw;
+
+  assert((uint64_t)data_size == (uint64_t)width * height);  // as per spec
+  assert(filter >= 0 && filter < WEBP_FILTER_LAST);
+  assert(method >= ALPHA_NO_COMPRESSION);
+  assert(method <= ALPHA_LOSSLESS_COMPRESSION);
+  assert(sizeof(header) == ALPHA_HEADER_LEN);
+  // TODO(skal): have a common function and #define's to validate alpha params.
+
+  filter_func = WebPFilters[filter];
+  if (filter_func != NULL) {
+    filter_func(data, width, height, width, tmp_alpha);
+    alpha_src = tmp_alpha;
+  }  else {
+    alpha_src = data;
+  }
+
+  if (method != ALPHA_NO_COMPRESSION) {
+    ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3);
+    ok = ok && EncodeLossless(alpha_src, width, height, effort_level,
+                              &tmp_bw, &result->stats);
+    if (ok) {
+      output = VP8LBitWriterFinish(&tmp_bw);
+      output_size = VP8LBitWriterNumBytes(&tmp_bw);
+      if (output_size > data_size) {
+        // compressed size is larger than source! Revert to uncompressed mode.
+        method = ALPHA_NO_COMPRESSION;
+        VP8LBitWriterDestroy(&tmp_bw);
+      }
+    } else {
+      VP8LBitWriterDestroy(&tmp_bw);
+      return 0;
+    }
+  }
+
+  if (method == ALPHA_NO_COMPRESSION) {
+    output = alpha_src;
+    output_size = data_size;
+    ok = 1;
+  }
+
+  // Emit final result.
+  header = method | (filter << 2);
+  if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
+
+  VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size);
+  ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN);
+  ok = ok && VP8BitWriterAppend(&result->bw, output, output_size);
+
+  if (method != ALPHA_NO_COMPRESSION) {
+    VP8LBitWriterDestroy(&tmp_bw);
+  }
+  ok = ok && !result->bw.error_;
+  result->score = VP8BitWriterSize(&result->bw);
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+// TODO(skal): move to dsp/ ?
+static void CopyPlane(const uint8_t* src, int src_stride,
+                      uint8_t* dst, int dst_stride, int width, int height) {
+  while (height-- > 0) {
+    memcpy(dst, src, width);
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+static int GetNumColors(const uint8_t* data, int width, int height,
+                        int stride) {
+  int j;
+  int colors = 0;
+  uint8_t color[256] = { 0 };
+
+  for (j = 0; j < height; ++j) {
+    int i;
+    const uint8_t* const p = data + j * stride;
+    for (i = 0; i < width; ++i) {
+      color[p[i]] = 1;
+    }
+  }
+  for (j = 0; j < 256; ++j) {
+    if (color[j] > 0) ++colors;
+  }
+  return colors;
+}
+
+#define FILTER_TRY_NONE (1 << WEBP_FILTER_NONE)
+#define FILTER_TRY_ALL ((1 << WEBP_FILTER_LAST) - 1)
+
+// Given the input 'filter' option, return an OR'd bit-set of filters to try.
+static uint32_t GetFilterMap(const uint8_t* alpha, int width, int height,
+                             int filter, int effort_level) {
+  uint32_t bit_map = 0U;
+  if (filter == WEBP_FILTER_FAST) {
+    // Quick estimate of the best candidate.
+    int try_filter_none = (effort_level > 3);
+    const int kMinColorsForFilterNone = 16;
+    const int kMaxColorsForFilterNone = 192;
+    const int num_colors = GetNumColors(alpha, width, height, width);
+    // For low number of colors, NONE yields better compression.
+    filter = (num_colors <= kMinColorsForFilterNone) ? WEBP_FILTER_NONE :
+             EstimateBestFilter(alpha, width, height, width);
+    bit_map |= 1 << filter;
+    // For large number of colors, try FILTER_NONE in addition to the best
+    // filter as well.
+    if (try_filter_none || num_colors > kMaxColorsForFilterNone) {
+      bit_map |= FILTER_TRY_NONE;
+    }
+  } else if (filter == WEBP_FILTER_NONE) {
+    bit_map = FILTER_TRY_NONE;
+  } else {  // WEBP_FILTER_BEST -> try all
+    bit_map = FILTER_TRY_ALL;
+  }
+  return bit_map;
+}
+
+static void InitFilterTrial(FilterTrial* const score) {
+  score->score = (size_t)~0U;
+  VP8BitWriterInit(&score->bw, 0);
+}
+
+static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height,
+                                 size_t data_size, int method, int filter,
+                                 int reduce_levels, int effort_level,
+                                 uint8_t** const output,
+                                 size_t* const output_size,
+                                 WebPAuxStats* const stats) {
+  int ok = 1;
+  FilterTrial best;
+  uint32_t try_map =
+      GetFilterMap(alpha, width, height, filter, effort_level);
+  InitFilterTrial(&best);
+  if (try_map != FILTER_TRY_NONE) {
+    uint8_t* filtered_alpha =  (uint8_t*)WebPSafeMalloc(1ULL, data_size);
+    if (filtered_alpha == NULL) return 0;
+
+    for (filter = WEBP_FILTER_NONE; ok && try_map; ++filter, try_map >>= 1) {
+      if (try_map & 1) {
+        FilterTrial trial;
+        ok = EncodeAlphaInternal(alpha, width, height, method, filter,
+                                 reduce_levels, effort_level, filtered_alpha,
+                                 &trial);
+        if (ok && trial.score < best.score) {
+          VP8BitWriterWipeOut(&best.bw);
+          best = trial;
+        } else {
+          VP8BitWriterWipeOut(&trial.bw);
+        }
+      }
+    }
+    WebPSafeFree(filtered_alpha);
+  } else {
+    ok = EncodeAlphaInternal(alpha, width, height, method, WEBP_FILTER_NONE,
+                             reduce_levels, effort_level, NULL, &best);
+  }
+  if (ok) {
+    if (stats != NULL) *stats = best.stats;
+    *output_size = VP8BitWriterSize(&best.bw);
+    *output = VP8BitWriterBuf(&best.bw);
+  } else {
+    VP8BitWriterWipeOut(&best.bw);
+  }
+  return ok;
+}
+
+static int EncodeAlpha(VP8Encoder* const enc,
+                       int quality, int method, int filter,
+                       int effort_level,
+                       uint8_t** const output, size_t* const output_size) {
+  const WebPPicture* const pic = enc->pic_;
+  const int width = pic->width;
+  const int height = pic->height;
+
+  uint8_t* quant_alpha = NULL;
+  const size_t data_size = width * height;
+  uint64_t sse = 0;
+  int ok = 1;
+  const int reduce_levels = (quality < 100);
+
+  // quick sanity checks
+  assert((uint64_t)data_size == (uint64_t)width * height);  // as per spec
+  assert(enc != NULL && pic != NULL && pic->a != NULL);
+  assert(output != NULL && output_size != NULL);
+  assert(width > 0 && height > 0);
+  assert(pic->a_stride >= width);
+  assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST);
+
+  if (quality < 0 || quality > 100) {
+    return 0;
+  }
+
+  if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) {
+    return 0;
+  }
+
+  if (method == ALPHA_NO_COMPRESSION) {
+    // Don't filter, as filtering will make no impact on compressed size.
+    filter = WEBP_FILTER_NONE;
+  }
+
+  quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size);
+  if (quant_alpha == NULL) {
+    return 0;
+  }
+
+  // Extract alpha data (width x height) from raw_data (stride x height).
+  CopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
+
+  if (reduce_levels) {  // No Quantization required for 'quality = 100'.
+    // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence
+    // mapped to moderate quality 70. Hence Quality:[0, 70] -> Levels:[2, 16]
+    // and Quality:]70, 100] -> Levels:]16, 256].
+    const int alpha_levels = (quality <= 70) ? (2 + quality / 5)
+                                             : (16 + (quality - 70) * 8);
+    ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, &sse);
+  }
+
+  if (ok) {
+    ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method,
+                               filter, reduce_levels, effort_level, output,
+                               output_size, pic->stats);
+    if (pic->stats != NULL) {  // need stats?
+      pic->stats->coded_size += (int)(*output_size);
+      enc->sse_[3] = sse;
+    }
+  }
+
+  WebPSafeFree(quant_alpha);
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// Main calls
+
+static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
+  const WebPConfig* config = enc->config_;
+  uint8_t* alpha_data = NULL;
+  size_t alpha_size = 0;
+  const int effort_level = config->method;  // maps to [0..6]
+  const WEBP_FILTER_TYPE filter =
+      (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
+      (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
+                                       WEBP_FILTER_BEST;
+  if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
+                   filter, effort_level, &alpha_data, &alpha_size)) {
+    return 0;
+  }
+  if (alpha_size != (uint32_t)alpha_size) {  // Sanity check.
+    WebPSafeFree(alpha_data);
+    return 0;
+  }
+  enc->alpha_data_size_ = (uint32_t)alpha_size;
+  enc->alpha_data_ = alpha_data;
+  (void)dummy;
+  return 1;
+}
+
+void VP8EncInitAlpha(VP8Encoder* const enc) {
+  enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
+  enc->alpha_data_ = NULL;
+  enc->alpha_data_size_ = 0;
+  if (enc->thread_level_ > 0) {
+    WebPWorker* const worker = &enc->alpha_worker_;
+    WebPGetWorkerInterface()->Init(worker);
+    worker->data1 = enc;
+    worker->data2 = NULL;
+    worker->hook = (WebPWorkerHook)CompressAlphaJob;
+  }
+}
+
+int VP8EncStartAlpha(VP8Encoder* const enc) {
+  if (enc->has_alpha_) {
+    if (enc->thread_level_ > 0) {
+      WebPWorker* const worker = &enc->alpha_worker_;
+      // Makes sure worker is good to go.
+      if (!WebPGetWorkerInterface()->Reset(worker)) {
+        return 0;
+      }
+      WebPGetWorkerInterface()->Launch(worker);
+      return 1;
+    } else {
+      return CompressAlphaJob(enc, NULL);   // just do the job right away
+    }
+  }
+  return 1;
+}
+
+int VP8EncFinishAlpha(VP8Encoder* const enc) {
+  if (enc->has_alpha_) {
+    if (enc->thread_level_ > 0) {
+      WebPWorker* const worker = &enc->alpha_worker_;
+      if (!WebPGetWorkerInterface()->Sync(worker)) return 0;  // error
+    }
+  }
+  return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+}
+
+int VP8EncDeleteAlpha(VP8Encoder* const enc) {
+  int ok = 1;
+  if (enc->thread_level_ > 0) {
+    WebPWorker* const worker = &enc->alpha_worker_;
+    // finish anything left in flight
+    ok = WebPGetWorkerInterface()->Sync(worker);
+    // still need to end the worker, even if !ok
+    WebPGetWorkerInterface()->End(worker);
+  }
+  WebPSafeFree(enc->alpha_data_);
+  enc->alpha_data_ = NULL;
+  enc->alpha_data_size_ = 0;
+  enc->has_alpha_ = 0;
+  return ok;
+}
+
diff --git a/src/main/jni/libwebp/enc/analysis.c b/src/main/jni/libwebp/enc/analysis.c
new file mode 100644
index 000000000..e019465bb
--- /dev/null
+++ b/src/main/jni/libwebp/enc/analysis.c
@@ -0,0 +1,498 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Macroblock analysis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "./vp8enci.h"
+#include "./cost.h"
+#include "../utils/utils.h"
+
+#define MAX_ITERS_K_MEANS  6
+
+//------------------------------------------------------------------------------
+// Smooth the segment map by replacing isolated block by the majority of its
+// neighbours.
+
+static void SmoothSegmentMap(VP8Encoder* const enc) {
+  int n, x, y;
+  const int w = enc->mb_w_;
+  const int h = enc->mb_h_;
+  const int majority_cnt_3_x_3_grid = 5;
+  uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
+  assert((uint64_t)(w * h) == (uint64_t)w * h);   // no overflow, as per spec
+
+  if (tmp == NULL) return;
+  for (y = 1; y < h - 1; ++y) {
+    for (x = 1; x < w - 1; ++x) {
+      int cnt[NUM_MB_SEGMENTS] = { 0 };
+      const VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
+      int majority_seg = mb->segment_;
+      // Check the 8 neighbouring segment values.
+      cnt[mb[-w - 1].segment_]++;  // top-left
+      cnt[mb[-w + 0].segment_]++;  // top
+      cnt[mb[-w + 1].segment_]++;  // top-right
+      cnt[mb[   - 1].segment_]++;  // left
+      cnt[mb[   + 1].segment_]++;  // right
+      cnt[mb[ w - 1].segment_]++;  // bottom-left
+      cnt[mb[ w + 0].segment_]++;  // bottom
+      cnt[mb[ w + 1].segment_]++;  // bottom-right
+      for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
+        if (cnt[n] >= majority_cnt_3_x_3_grid) {
+          majority_seg = n;
+          break;
+        }
+      }
+      tmp[x + y * w] = majority_seg;
+    }
+  }
+  for (y = 1; y < h - 1; ++y) {
+    for (x = 1; x < w - 1; ++x) {
+      VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
+      mb->segment_ = tmp[x + y * w];
+    }
+  }
+  WebPSafeFree(tmp);
+}
+
+//------------------------------------------------------------------------------
+// set segment susceptibility alpha_ / beta_
+
+static WEBP_INLINE int clip(int v, int m, int M) {
+  return (v < m) ? m : (v > M) ? M : v;
+}
+
+static void SetSegmentAlphas(VP8Encoder* const enc,
+                             const int centers[NUM_MB_SEGMENTS],
+                             int mid) {
+  const int nb = enc->segment_hdr_.num_segments_;
+  int min = centers[0], max = centers[0];
+  int n;
+
+  if (nb > 1) {
+    for (n = 0; n < nb; ++n) {
+      if (min > centers[n]) min = centers[n];
+      if (max < centers[n]) max = centers[n];
+    }
+  }
+  if (max == min) max = min + 1;
+  assert(mid <= max && mid >= min);
+  for (n = 0; n < nb; ++n) {
+    const int alpha = 255 * (centers[n] - mid) / (max - min);
+    const int beta = 255 * (centers[n] - min) / (max - min);
+    enc->dqm_[n].alpha_ = clip(alpha, -127, 127);
+    enc->dqm_[n].beta_ = clip(beta, 0, 255);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+#define MAX_ALPHA 255                // 8b of precision for susceptibilities.
+#define ALPHA_SCALE (2 * MAX_ALPHA)  // scaling factor for alpha.
+#define DEFAULT_ALPHA (-1)
+#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
+
+static int FinalAlphaValue(int alpha) {
+  alpha = MAX_ALPHA - alpha;
+  return clip(alpha, 0, MAX_ALPHA);
+}
+
+static int GetAlpha(const VP8Histogram* const histo) {
+  int max_value = 0, last_non_zero = 1;
+  int k;
+  int alpha;
+  for (k = 0; k <= MAX_COEFF_THRESH; ++k) {
+    const int value = histo->distribution[k];
+    if (value > 0) {
+      if (value > max_value) max_value = value;
+      last_non_zero = k;
+    }
+  }
+  // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
+  // values which happen to be mostly noise. This leaves the maximum precision
+  // for handling the useful small values which contribute most.
+  alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
+  return alpha;
+}
+
+static void MergeHistograms(const VP8Histogram* const in,
+                            VP8Histogram* const out) {
+  int i;
+  for (i = 0; i <= MAX_COEFF_THRESH; ++i) {
+    out->distribution[i] += in->distribution[i];
+  }
+}
+
+//------------------------------------------------------------------------------
+// Simplified k-Means, to assign Nb segments based on alpha-histogram
+
+static void AssignSegments(VP8Encoder* const enc,
+                           const int alphas[MAX_ALPHA + 1]) {
+  // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
+  // explicit check is needed to avoid spurious warning about 'n + 1' exceeding
+  // array bounds of 'centers' with some compilers (noticed with gcc-4.9).
+  const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ?
+                 enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS;
+  int centers[NUM_MB_SEGMENTS];
+  int weighted_average = 0;
+  int map[MAX_ALPHA + 1];
+  int a, n, k;
+  int min_a = 0, max_a = MAX_ALPHA, range_a;
+  // 'int' type is ok for histo, and won't overflow
+  int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
+
+  assert(nb >= 1);
+  assert(nb <= NUM_MB_SEGMENTS);
+
+  // bracket the input
+  for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}
+  min_a = n;
+  for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}
+  max_a = n;
+  range_a = max_a - min_a;
+
+  // Spread initial centers evenly
+  for (k = 0, n = 1; k < nb; ++k, n += 2) {
+    assert(n < 2 * nb);
+    centers[k] = min_a + (n * range_a) / (2 * nb);
+  }
+
+  for (k = 0; k < MAX_ITERS_K_MEANS; ++k) {     // few iters are enough
+    int total_weight;
+    int displaced;
+    // Reset stats
+    for (n = 0; n < nb; ++n) {
+      accum[n] = 0;
+      dist_accum[n] = 0;
+    }
+    // Assign nearest center for each 'a'
+    n = 0;    // track the nearest center for current 'a'
+    for (a = min_a; a <= max_a; ++a) {
+      if (alphas[a]) {
+        while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {
+          n++;
+        }
+        map[a] = n;
+        // accumulate contribution into best centroid
+        dist_accum[n] += a * alphas[a];
+        accum[n] += alphas[a];
+      }
+    }
+    // All point are classified. Move the centroids to the
+    // center of their respective cloud.
+    displaced = 0;
+    weighted_average = 0;
+    total_weight = 0;
+    for (n = 0; n < nb; ++n) {
+      if (accum[n]) {
+        const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];
+        displaced += abs(centers[n] - new_center);
+        centers[n] = new_center;
+        weighted_average += new_center * accum[n];
+        total_weight += accum[n];
+      }
+    }
+    weighted_average = (weighted_average + total_weight / 2) / total_weight;
+    if (displaced < 5) break;   // no need to keep on looping...
+  }
+
+  // Map each original value to the closest centroid
+  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+    VP8MBInfo* const mb = &enc->mb_info_[n];
+    const int alpha = mb->alpha_;
+    mb->segment_ = map[alpha];
+    mb->alpha_ = centers[map[alpha]];  // for the record.
+  }
+
+  if (nb > 1) {
+    const int smooth = (enc->config_->preprocessing & 1);
+    if (smooth) SmoothSegmentMap(enc);
+  }
+
+  SetSegmentAlphas(enc, centers, weighted_average);  // pick some alphas.
+}
+
+//------------------------------------------------------------------------------
+// Macroblock analysis: collect histogram for each mode, deduce the maximal
+// susceptibility and set best modes for this macroblock.
+// Segment assignment is done later.
+
+// Number of modes to inspect for alpha_ evaluation. We don't need to test all
+// the possible modes during the analysis phase: we risk falling into a local
+// optimum, or be subject to boundary effect
+#define MAX_INTRA16_MODE 2
+#define MAX_INTRA4_MODE  2
+#define MAX_UV_MODE      2
+
+static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
+  const int max_mode = MAX_INTRA16_MODE;
+  int mode;
+  int best_alpha = DEFAULT_ALPHA;
+  int best_mode = 0;
+
+  VP8MakeLuma16Preds(it);
+  for (mode = 0; mode < max_mode; ++mode) {
+    VP8Histogram histo = { { 0 } };
+    int alpha;
+
+    VP8CollectHistogram(it->yuv_in_ + Y_OFF,
+                        it->yuv_p_ + VP8I16ModeOffsets[mode],
+                        0, 16, &histo);
+    alpha = GetAlpha(&histo);
+    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
+      best_alpha = alpha;
+      best_mode = mode;
+    }
+  }
+  VP8SetIntra16Mode(it, best_mode);
+  return best_alpha;
+}
+
+static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
+                                   int best_alpha) {
+  uint8_t modes[16];
+  const int max_mode = MAX_INTRA4_MODE;
+  int i4_alpha;
+  VP8Histogram total_histo = { { 0 } };
+  int cur_histo = 0;
+
+  VP8IteratorStartI4(it);
+  do {
+    int mode;
+    int best_mode_alpha = DEFAULT_ALPHA;
+    VP8Histogram histos[2];
+    const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+
+    VP8MakeIntra4Preds(it);
+    for (mode = 0; mode < max_mode; ++mode) {
+      int alpha;
+
+      memset(&histos[cur_histo], 0, sizeof(histos[cur_histo]));
+      VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode],
+                          0, 1, &histos[cur_histo]);
+      alpha = GetAlpha(&histos[cur_histo]);
+      if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) {
+        best_mode_alpha = alpha;
+        modes[it->i4_] = mode;
+        cur_histo ^= 1;   // keep track of best histo so far.
+      }
+    }
+    // accumulate best histogram
+    MergeHistograms(&histos[cur_histo ^ 1], &total_histo);
+    // Note: we reuse the original samples for predictors
+  } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
+
+  i4_alpha = GetAlpha(&total_histo);
+  if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) {
+    VP8SetIntra4Mode(it, modes);
+    best_alpha = i4_alpha;
+  }
+  return best_alpha;
+}
+
+static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
+  int best_alpha = DEFAULT_ALPHA;
+  int best_mode = 0;
+  const int max_mode = MAX_UV_MODE;
+  int mode;
+
+  VP8MakeChroma8Preds(it);
+  for (mode = 0; mode < max_mode; ++mode) {
+    VP8Histogram histo = { { 0 } };
+    int alpha;
+    VP8CollectHistogram(it->yuv_in_ + U_OFF,
+                        it->yuv_p_ + VP8UVModeOffsets[mode],
+                        16, 16 + 4 + 4, &histo);
+    alpha = GetAlpha(&histo);
+    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
+      best_alpha = alpha;
+      best_mode = mode;
+    }
+  }
+  VP8SetIntraUVMode(it, best_mode);
+  return best_alpha;
+}
+
+static void MBAnalyze(VP8EncIterator* const it,
+                      int alphas[MAX_ALPHA + 1],
+                      int* const alpha, int* const uv_alpha) {
+  const VP8Encoder* const enc = it->enc_;
+  int best_alpha, best_uv_alpha;
+
+  VP8SetIntra16Mode(it, 0);  // default: Intra16, DC_PRED
+  VP8SetSkip(it, 0);         // not skipped
+  VP8SetSegment(it, 0);      // default segment, spec-wise.
+
+  best_alpha = MBAnalyzeBestIntra16Mode(it);
+  if (enc->method_ >= 5) {
+    // We go and make a fast decision for intra4/intra16.
+    // It's usually not a good and definitive pick, but helps seeding the stats
+    // about level bit-cost.
+    // TODO(skal): improve criterion.
+    best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha);
+  }
+  best_uv_alpha = MBAnalyzeBestUVMode(it);
+
+  // Final susceptibility mix
+  best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
+  best_alpha = FinalAlphaValue(best_alpha);
+  alphas[best_alpha]++;
+  it->mb_->alpha_ = best_alpha;   // for later remapping.
+
+  // Accumulate for later complexity analysis.
+  *alpha += best_alpha;   // mixed susceptibility (not just luma)
+  *uv_alpha += best_uv_alpha;
+}
+
+static void DefaultMBInfo(VP8MBInfo* const mb) {
+  mb->type_ = 1;     // I16x16
+  mb->uv_mode_ = 0;
+  mb->skip_ = 0;     // not skipped
+  mb->segment_ = 0;  // default segment
+  mb->alpha_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Main analysis loop:
+// Collect all susceptibilities for each macroblock and record their
+// distribution in alphas[]. Segments is assigned a-posteriori, based on
+// this histogram.
+// We also pick an intra16 prediction mode, which shouldn't be considered
+// final except for fast-encode settings. We can also pick some intra4 modes
+// and decide intra4/intra16, but that's usually almost always a bad choice at
+// this stage.
+
+static void ResetAllMBInfo(VP8Encoder* const enc) {
+  int n;
+  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+    DefaultMBInfo(&enc->mb_info_[n]);
+  }
+  // Default susceptibilities.
+  enc->dqm_[0].alpha_ = 0;
+  enc->dqm_[0].beta_ = 0;
+  // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value.
+  enc->alpha_ = 0;
+  enc->uv_alpha_ = 0;
+  WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+}
+
+// struct used to collect job result
+typedef struct {
+  WebPWorker worker;
+  int alphas[MAX_ALPHA + 1];
+  int alpha, uv_alpha;
+  VP8EncIterator it;
+  int delta_progress;
+} SegmentJob;
+
+// main work call
+static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) {
+  int ok = 1;
+  if (!VP8IteratorIsDone(it)) {
+    uint8_t tmp[32 + ALIGN_CST];
+    uint8_t* const scratch = (uint8_t*)DO_ALIGN(tmp);
+    do {
+      // Let's pretend we have perfect lossless reconstruction.
+      VP8IteratorImport(it, scratch);
+      MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);
+      ok = VP8IteratorProgress(it, job->delta_progress);
+    } while (ok && VP8IteratorNext(it));
+  }
+  return ok;
+}
+
+static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
+  int i;
+  for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
+  dst->alpha += src->alpha;
+  dst->uv_alpha += src->uv_alpha;
+}
+
+// initialize the job struct with some TODOs
+static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
+                           int start_row, int end_row) {
+  WebPGetWorkerInterface()->Init(&job->worker);
+  job->worker.data1 = job;
+  job->worker.data2 = &job->it;
+  job->worker.hook = (WebPWorkerHook)DoSegmentsJob;
+  VP8IteratorInit(enc, &job->it);
+  VP8IteratorSetRow(&job->it, start_row);
+  VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
+  memset(job->alphas, 0, sizeof(job->alphas));
+  job->alpha = 0;
+  job->uv_alpha = 0;
+  // only one of both jobs can record the progress, since we don't
+  // expect the user's hook to be multi-thread safe
+  job->delta_progress = (start_row == 0) ? 20 : 0;
+}
+
+// main entry point
+int VP8EncAnalyze(VP8Encoder* const enc) {
+  int ok = 1;
+  const int do_segments =
+      enc->config_->emulate_jpeg_size ||   // We need the complexity evaluation.
+      (enc->segment_hdr_.num_segments_ > 1) ||
+      (enc->method_ == 0);  // for method 0, we need preds_[] to be filled.
+  if (do_segments) {
+    const int last_row = enc->mb_h_;
+    // We give a little more than a half work to the main thread.
+    const int split_row = (9 * last_row + 15) >> 4;
+    const int total_mb = last_row * enc->mb_w_;
+#ifdef WEBP_USE_THREAD
+    const int kMinSplitRow = 2;  // minimal rows needed for mt to be worth it
+    const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
+#else
+    const int do_mt = 0;
+#endif
+    const WebPWorkerInterface* const worker_interface =
+        WebPGetWorkerInterface();
+    SegmentJob main_job;
+    if (do_mt) {
+      SegmentJob side_job;
+      // Note the use of '&' instead of '&&' because we must call the functions
+      // no matter what.
+      InitSegmentJob(enc, &main_job, 0, split_row);
+      InitSegmentJob(enc, &side_job, split_row, last_row);
+      // we don't need to call Reset() on main_job.worker, since we're calling
+      // WebPWorkerExecute() on it
+      ok &= worker_interface->Reset(&side_job.worker);
+      // launch the two jobs in parallel
+      if (ok) {
+        worker_interface->Launch(&side_job.worker);
+        worker_interface->Execute(&main_job.worker);
+        ok &= worker_interface->Sync(&side_job.worker);
+        ok &= worker_interface->Sync(&main_job.worker);
+      }
+      worker_interface->End(&side_job.worker);
+      if (ok) MergeJobs(&side_job, &main_job);  // merge results together
+    } else {
+      // Even for single-thread case, we use the generic Worker tools.
+      InitSegmentJob(enc, &main_job, 0, last_row);
+      worker_interface->Execute(&main_job.worker);
+      ok &= worker_interface->Sync(&main_job.worker);
+    }
+    worker_interface->End(&main_job.worker);
+    if (ok) {
+      enc->alpha_ = main_job.alpha / total_mb;
+      enc->uv_alpha_ = main_job.uv_alpha / total_mb;
+      AssignSegments(enc, main_job.alphas);
+    }
+  } else {   // Use only one default segment.
+    ResetAllMBInfo(enc);
+  }
+  return ok;
+}
+
diff --git a/src/main/jni/libwebp/enc/backward_references.c b/src/main/jni/libwebp/enc/backward_references.c
new file mode 100644
index 000000000..a3c30aa07
--- /dev/null
+++ b/src/main/jni/libwebp/enc/backward_references.c
@@ -0,0 +1,975 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#include <assert.h>
+#include <math.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../dsp/lossless.h"
+#include "../utils/color_cache.h"
+#include "../utils/utils.h"
+
+#define VALUES_IN_BYTE 256
+
+#define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL)
+
+#define MIN_BLOCK_SIZE 256  // minimum block size for backward references
+
+#define MAX_ENTROPY    (1e30f)
+
+// 1M window (4M bytes) minus 120 special codes for short distances.
+#define WINDOW_SIZE ((1 << 20) - 120)
+
+// Bounds for the match length.
+#define MIN_LENGTH 2
+#define MAX_LENGTH 4096
+
+// -----------------------------------------------------------------------------
+
+static const uint8_t plane_to_code_lut[128] = {
+ 96,   73,  55,  39,  23,  13,   5,  1,  255, 255, 255, 255, 255, 255, 255, 255,
+ 101,  78,  58,  42,  26,  16,   8,  2,    0,   3,  9,   17,  27,  43,  59,  79,
+ 102,  86,  62,  46,  32,  20,  10,  6,    4,   7,  11,  21,  33,  47,  63,  87,
+ 105,  90,  70,  52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53,  71,  91,
+ 110,  99,  82,  66,  48,  35,  30,  24,  22,  25,  31,  36,  49,  67,  83, 100,
+ 115, 108,  94,  76,  64,  50,  44,  40,  34,  41,  45,  51,  65,  77,  95, 109,
+ 118, 113, 103,  92,  80,  68,  60,  56,  54,  57,  61,  69,  81,  93, 104, 114,
+ 119, 116, 111, 106,  97,  88,  84,  74,  72,  75,  85,  89,  98, 107, 112, 117
+};
+
+static int DistanceToPlaneCode(int xsize, int dist) {
+  const int yoffset = dist / xsize;
+  const int xoffset = dist - yoffset * xsize;
+  if (xoffset <= 8 && yoffset < 8) {
+    return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
+  } else if (xoffset > xsize - 8 && yoffset < 7) {
+    return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
+  }
+  return dist + 120;
+}
+
+static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
+                                       const uint32_t* const array2,
+                                       const int max_limit) {
+  int match_len = 0;
+  while (match_len < max_limit && array1[match_len] == array2[match_len]) {
+    ++match_len;
+  }
+  return match_len;
+}
+
+// -----------------------------------------------------------------------------
+//  VP8LBackwardRefs
+
+struct PixOrCopyBlock {
+  PixOrCopyBlock* next_;   // next block (or NULL)
+  PixOrCopy* start_;       // data start
+  int size_;               // currently used size
+};
+
+static void ClearBackwardRefs(VP8LBackwardRefs* const refs) {
+  assert(refs != NULL);
+  if (refs->tail_ != NULL) {
+    *refs->tail_ = refs->free_blocks_;  // recycle all blocks at once
+  }
+  refs->free_blocks_ = refs->refs_;
+  refs->tail_ = &refs->refs_;
+  refs->last_block_ = NULL;
+  refs->refs_ = NULL;
+}
+
+void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
+  assert(refs != NULL);
+  ClearBackwardRefs(refs);
+  while (refs->free_blocks_ != NULL) {
+    PixOrCopyBlock* const next = refs->free_blocks_->next_;
+    WebPSafeFree(refs->free_blocks_);
+    refs->free_blocks_ = next;
+  }
+}
+
+void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
+  assert(refs != NULL);
+  memset(refs, 0, sizeof(*refs));
+  refs->tail_ = &refs->refs_;
+  refs->block_size_ =
+      (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
+}
+
+VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
+  VP8LRefsCursor c;
+  c.cur_block_ = refs->refs_;
+  if (refs->refs_ != NULL) {
+    c.cur_pos = c.cur_block_->start_;
+    c.last_pos_ = c.cur_pos + c.cur_block_->size_;
+  } else {
+    c.cur_pos = NULL;
+    c.last_pos_ = NULL;
+  }
+  return c;
+}
+
+void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
+  PixOrCopyBlock* const b = c->cur_block_->next_;
+  c->cur_pos = (b == NULL) ? NULL : b->start_;
+  c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
+  c->cur_block_ = b;
+}
+
+// Create a new block, either from the free list or allocated
+static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
+  PixOrCopyBlock* b = refs->free_blocks_;
+  if (b == NULL) {   // allocate new memory chunk
+    const size_t total_size =
+        sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
+    b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
+    if (b == NULL) {
+      refs->error_ |= 1;
+      return NULL;
+    }
+    b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
+  } else {  // recycle from free-list
+    refs->free_blocks_ = b->next_;
+  }
+  *refs->tail_ = b;
+  refs->tail_ = &b->next_;
+  refs->last_block_ = b;
+  b->next_ = NULL;
+  b->size_ = 0;
+  return b;
+}
+
+static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
+                                              const PixOrCopy v) {
+  PixOrCopyBlock* b = refs->last_block_;
+  if (b == NULL || b->size_ == refs->block_size_) {
+    b = BackwardRefsNewBlock(refs);
+    if (b == NULL) return;   // refs->error_ is set
+  }
+  b->start_[b->size_++] = v;
+}
+
+int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
+                         VP8LBackwardRefs* const dst) {
+  const PixOrCopyBlock* b = src->refs_;
+  ClearBackwardRefs(dst);
+  assert(src->block_size_ == dst->block_size_);
+  while (b != NULL) {
+    PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst);
+    if (new_b == NULL) return 0;   // dst->error_ is set
+    memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_));
+    new_b->size_ = b->size_;
+    b = b->next_;
+  }
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Hash chains
+
+// initialize as empty
+static void HashChainInit(VP8LHashChain* const p) {
+  int i;
+  assert(p != NULL);
+  for (i = 0; i < p->size_; ++i) {
+    p->chain_[i] = -1;
+  }
+  for (i = 0; i < HASH_SIZE; ++i) {
+    p->hash_to_first_index_[i] = -1;
+  }
+}
+
+int VP8LHashChainInit(VP8LHashChain* const p, int size) {
+  assert(p->size_ == 0);
+  assert(p->chain_ == NULL);
+  assert(size > 0);
+  p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_));
+  if (p->chain_ == NULL) return 0;
+  p->size_ = size;
+  HashChainInit(p);
+  return 1;
+}
+
+void VP8LHashChainClear(VP8LHashChain* const p) {
+  assert(p != NULL);
+  WebPSafeFree(p->chain_);
+  p->size_ = 0;
+  p->chain_ = NULL;
+}
+
+// -----------------------------------------------------------------------------
+
+static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) {
+  uint64_t key = ((uint64_t)argb[1] << 32) | argb[0];
+  key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS);
+  return key;
+}
+
+// Insertion of two pixels at a time.
+static void HashChainInsert(VP8LHashChain* const p,
+                            const uint32_t* const argb, int pos) {
+  const uint64_t hash_code = GetPixPairHash64(argb);
+  p->chain_[pos] = p->hash_to_first_index_[hash_code];
+  p->hash_to_first_index_[hash_code] = pos;
+}
+
+static void GetParamsForHashChainFindCopy(int quality, int xsize,
+                                          int cache_bits, int* window_size,
+                                          int* iter_pos, int* iter_limit) {
+  const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
+  const int iter_neg = -iter_mult * (quality >> 1);
+  // Limit the backward-ref window size for lower qualities.
+  const int max_window_size = (quality > 50) ? WINDOW_SIZE
+                            : (quality > 25) ? (xsize << 8)
+                            : (xsize << 4);
+  assert(xsize > 0);
+  *window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE
+               : max_window_size;
+  *iter_pos = 8 + (quality >> 3);
+  // For lower entropy images, the rigorous search loop in HashChainFindCopy
+  // can be relaxed.
+  *iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2;
+}
+
+static int HashChainFindCopy(const VP8LHashChain* const p,
+                             int base_position, int xsize_signed,
+                             const uint32_t* const argb, int max_len,
+                             int window_size, int iter_pos, int iter_limit,
+                             int* const distance_ptr,
+                             int* const length_ptr) {
+  const uint32_t* const argb_start = argb + base_position;
+  uint64_t best_val = 0;
+  uint32_t best_length = 1;
+  uint32_t best_distance = 0;
+  const uint32_t xsize = (uint32_t)xsize_signed;
+  const int min_pos =
+      (base_position > window_size) ? base_position - window_size : 0;
+  int pos;
+  assert(xsize > 0);
+  if (max_len > MAX_LENGTH) {
+    max_len = MAX_LENGTH;
+  }
+  for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
+       pos >= min_pos;
+       pos = p->chain_[pos]) {
+    uint64_t val;
+    uint32_t curr_length;
+    uint32_t distance;
+    const uint32_t* const ptr1 = (argb + pos + best_length - 1);
+    const uint32_t* const ptr2 = (argb_start + best_length - 1);
+
+    if (iter_pos < 0) {
+      if (iter_pos < iter_limit || best_val >= 0xff0000) {
+        break;
+      }
+    }
+    --iter_pos;
+
+    // Before 'expensive' linear match, check if the two arrays match at the
+    // current best length index and also for the succeeding elements.
+    if (ptr1[0] != ptr2[0] || ptr1[1] != ptr2[1]) continue;
+
+    curr_length = FindMatchLength(argb + pos, argb_start, max_len);
+    if (curr_length < best_length) continue;
+
+    distance = (uint32_t)(base_position - pos);
+    val = curr_length << 16;
+    // Favoring 2d locality here gives savings for certain images.
+    if (distance < 9 * xsize) {
+      const uint32_t y = distance / xsize;
+      uint32_t x = distance % xsize;
+      if (x > (xsize >> 1)) {
+        x = xsize - x;
+      }
+      if (x <= 7) {
+        val += 9 * 9 + 9 * 9;
+        val -= y * y + x * x;
+      }
+    }
+    if (best_val < val) {
+      best_val = val;
+      best_length = curr_length;
+      best_distance = distance;
+      if (curr_length >= (uint32_t)max_len) {
+        break;
+      }
+      if ((best_distance == 1 || distance == xsize) &&
+          best_length >= 128) {
+        break;
+      }
+    }
+  }
+  *distance_ptr = (int)best_distance;
+  *length_ptr = best_length;
+  return (best_length >= MIN_LENGTH);
+}
+
+static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) {
+  while (length >= MAX_LENGTH) {
+    BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, MAX_LENGTH));
+    length -= MAX_LENGTH;
+  }
+  if (length > 0) {
+    BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, length));
+  }
+}
+
+static int BackwardReferencesRle(int xsize, int ysize,
+                                 const uint32_t* const argb,
+                                 VP8LBackwardRefs* const refs) {
+  const int pix_count = xsize * ysize;
+  int match_len = 0;
+  int i;
+  ClearBackwardRefs(refs);
+  PushBackCopy(refs, match_len);    // i=0 case
+  BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[0]));
+  for (i = 1; i < pix_count; ++i) {
+    if (argb[i] == argb[i - 1]) {
+      ++match_len;
+    } else {
+      PushBackCopy(refs, match_len);
+      match_len = 0;
+      BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[i]));
+    }
+  }
+  PushBackCopy(refs, match_len);
+  return !refs->error_;
+}
+
+static int BackwardReferencesHashChain(int xsize, int ysize,
+                                       const uint32_t* const argb,
+                                       int cache_bits, int quality,
+                                       VP8LHashChain* const hash_chain,
+                                       VP8LBackwardRefs* const refs) {
+  int i;
+  int ok = 0;
+  int cc_init = 0;
+  const int use_color_cache = (cache_bits > 0);
+  const int pix_count = xsize * ysize;
+  VP8LColorCache hashers;
+  int window_size = WINDOW_SIZE;
+  int iter_pos = 1;
+  int iter_limit = -1;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  ClearBackwardRefs(refs);
+  GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
+                                &window_size, &iter_pos, &iter_limit);
+  HashChainInit(hash_chain);
+  for (i = 0; i < pix_count; ) {
+    // Alternative#1: Code the pixels starting at 'i' using backward reference.
+    int offset = 0;
+    int len = 0;
+    if (i < pix_count - 1) {  // FindCopy(i,..) reads pixels at [i] and [i + 1].
+      int max_len = pix_count - i;
+      HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
+                        window_size, iter_pos, iter_limit,
+                        &offset, &len);
+    }
+    if (len >= MIN_LENGTH) {
+      // Alternative#2: Insert the pixel at 'i' as literal, and code the
+      // pixels starting at 'i + 1' using backward reference.
+      int offset2 = 0;
+      int len2 = 0;
+      int k;
+      HashChainInsert(hash_chain, &argb[i], i);
+      if (i < pix_count - 2) {  // FindCopy(i+1,..) reads [i + 1] and [i + 2].
+        int max_len = pix_count - (i + 1);
+        HashChainFindCopy(hash_chain, i + 1, xsize, argb, max_len,
+                          window_size, iter_pos, iter_limit,
+                          &offset2, &len2);
+        if (len2 > len + 1) {
+          const uint32_t pixel = argb[i];
+          // Alternative#2 is a better match. So push pixel at 'i' as literal.
+          PixOrCopy v;
+          if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
+            const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
+            v = PixOrCopyCreateCacheIdx(ix);
+          } else {
+            if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
+            v = PixOrCopyCreateLiteral(pixel);
+          }
+          BackwardRefsCursorAdd(refs, v);
+          i++;  // Backward reference to be done for next pixel.
+          len = len2;
+          offset = offset2;
+        }
+      }
+      if (len >= MAX_LENGTH) {
+        len = MAX_LENGTH - 1;
+      }
+      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
+      if (use_color_cache) {
+        for (k = 0; k < len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      // Add to the hash_chain (but cannot add the last pixel).
+      {
+        const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+        for (k = 1; k < last; ++k) {
+          HashChainInsert(hash_chain, &argb[i + k], i + k);
+        }
+      }
+      i += len;
+    } else {
+      const uint32_t pixel = argb[i];
+      PixOrCopy v;
+      if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
+        // push pixel as a PixOrCopyCreateCacheIdx pixel
+        const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
+        v = PixOrCopyCreateCacheIdx(ix);
+      } else {
+        if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
+        v = PixOrCopyCreateLiteral(pixel);
+      }
+      BackwardRefsCursorAdd(refs, v);
+      if (i + 1 < pix_count) {
+        HashChainInsert(hash_chain, &argb[i], i);
+      }
+      ++i;
+    }
+  }
+  ok = !refs->error_;
+Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+typedef struct {
+  double alpha_[VALUES_IN_BYTE];
+  double red_[VALUES_IN_BYTE];
+  double literal_[PIX_OR_COPY_CODES_MAX];
+  double blue_[VALUES_IN_BYTE];
+  double distance_[NUM_DISTANCE_CODES];
+} CostModel;
+
+static int BackwardReferencesTraceBackwards(
+    int xsize, int ysize, int recursive_cost_model,
+    const uint32_t* const argb, int quality, int cache_bits,
+    VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs* const refs);
+
+static void ConvertPopulationCountTableToBitEstimates(
+    int num_symbols, const uint32_t population_counts[], double output[]) {
+  uint32_t sum = 0;
+  int nonzeros = 0;
+  int i;
+  for (i = 0; i < num_symbols; ++i) {
+    sum += population_counts[i];
+    if (population_counts[i] > 0) {
+      ++nonzeros;
+    }
+  }
+  if (nonzeros <= 1) {
+    memset(output, 0, num_symbols * sizeof(*output));
+  } else {
+    const double logsum = VP8LFastLog2(sum);
+    for (i = 0; i < num_symbols; ++i) {
+      output[i] = logsum - VP8LFastLog2(population_counts[i]);
+    }
+  }
+}
+
+static int CostModelBuild(CostModel* const m, int xsize, int ysize,
+                          int recursion_level, const uint32_t* const argb,
+                          int quality, int cache_bits,
+                          VP8LHashChain* const hash_chain,
+                          VP8LBackwardRefs* const refs) {
+  int ok = 0;
+  VP8LHistogram* histo = NULL;
+
+  ClearBackwardRefs(refs);
+  if (recursion_level > 0) {
+    if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
+                                          argb, quality, cache_bits, hash_chain,
+                                          refs)) {
+      goto Error;
+    }
+  } else {
+    if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality,
+                                     hash_chain, refs)) {
+      goto Error;
+    }
+  }
+  histo = VP8LAllocateHistogram(cache_bits);
+  if (histo == NULL) goto Error;
+
+  VP8LHistogramCreate(histo, refs, cache_bits);
+
+  ConvertPopulationCountTableToBitEstimates(
+      VP8LHistogramNumCodes(histo->palette_code_bits_),
+      histo->literal_, m->literal_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo->red_, m->red_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo->blue_, m->blue_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo->alpha_, m->alpha_);
+  ConvertPopulationCountTableToBitEstimates(
+      NUM_DISTANCE_CODES, histo->distance_, m->distance_);
+  ok = 1;
+
+ Error:
+  VP8LFreeHistogram(histo);
+  return ok;
+}
+
+static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
+  return m->alpha_[v >> 24] +
+         m->red_[(v >> 16) & 0xff] +
+         m->literal_[(v >> 8) & 0xff] +
+         m->blue_[v & 0xff];
+}
+
+static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
+  const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
+  return m->literal_[literal_idx];
+}
+
+static WEBP_INLINE double GetLengthCost(const CostModel* const m,
+                                        uint32_t length) {
+  int code, extra_bits;
+  VP8LPrefixEncodeBits(length, &code, &extra_bits);
+  return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
+}
+
+static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
+                                          uint32_t distance) {
+  int code, extra_bits;
+  VP8LPrefixEncodeBits(distance, &code, &extra_bits);
+  return m->distance_[code] + extra_bits;
+}
+
+static int BackwardReferencesHashChainDistanceOnly(
+    int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
+    int quality, int cache_bits, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs* const refs, uint32_t* const dist_array) {
+  int i;
+  int ok = 0;
+  int cc_init = 0;
+  const int pix_count = xsize * ysize;
+  const int use_color_cache = (cache_bits > 0);
+  float* const cost =
+      (float*)WebPSafeMalloc(pix_count, sizeof(*cost));
+  CostModel* cost_model = (CostModel*)WebPSafeMalloc(1ULL, sizeof(*cost_model));
+  VP8LColorCache hashers;
+  const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
+  const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
+  const int min_distance_code = 2;  // TODO(vikasa): tune as function of quality
+  int window_size = WINDOW_SIZE;
+  int iter_pos = 1;
+  int iter_limit = -1;
+
+  if (cost == NULL || cost_model == NULL) goto Error;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
+                      quality, cache_bits, hash_chain, refs)) {
+    goto Error;
+  }
+
+  for (i = 0; i < pix_count; ++i) cost[i] = 1e38f;
+
+  // We loop one pixel at a time, but store all currently best points to
+  // non-processed locations from this point.
+  dist_array[0] = 0;
+  GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
+                                &window_size, &iter_pos, &iter_limit);
+  HashChainInit(hash_chain);
+  for (i = 0; i < pix_count; ++i) {
+    double prev_cost = 0.0;
+    int shortmax;
+    if (i > 0) {
+      prev_cost = cost[i - 1];
+    }
+    for (shortmax = 0; shortmax < 2; ++shortmax) {
+      int offset = 0;
+      int len = 0;
+      if (i < pix_count - 1) {  // FindCopy reads pixels at [i] and [i + 1].
+        int max_len = shortmax ? 2 : pix_count - i;
+        HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
+                          window_size, iter_pos, iter_limit,
+                          &offset, &len);
+      }
+      if (len >= MIN_LENGTH) {
+        const int code = DistanceToPlaneCode(xsize, offset);
+        const double distance_cost =
+            prev_cost + GetDistanceCost(cost_model, code);
+        int k;
+        for (k = 1; k < len; ++k) {
+          const double cost_val = distance_cost + GetLengthCost(cost_model, k);
+          if (cost[i + k] > cost_val) {
+            cost[i + k] = (float)cost_val;
+            dist_array[i + k] = k + 1;
+          }
+        }
+        // This if is for speedup only. It roughly doubles the speed, and
+        // makes compression worse by .1 %.
+        if (len >= 128 && code <= min_distance_code) {
+          // Long copy for short distances, let's skip the middle
+          // lookups for better copies.
+          // 1) insert the hashes.
+          if (use_color_cache) {
+            for (k = 0; k < len; ++k) {
+              VP8LColorCacheInsert(&hashers, argb[i + k]);
+            }
+          }
+          // 2) Add to the hash_chain (but cannot add the last pixel)
+          {
+            const int last = (len + i < pix_count - 1) ? len + i
+                                                       : pix_count - 1;
+            for (k = i; k < last; ++k) {
+              HashChainInsert(hash_chain, &argb[k], k);
+            }
+          }
+          // 3) jump.
+          i += len - 1;  // for loop does ++i, thus -1 here.
+          goto next_symbol;
+        }
+      }
+    }
+    if (i < pix_count - 1) {
+      HashChainInsert(hash_chain, &argb[i], i);
+    }
+    {
+      // inserting a literal pixel
+      double cost_val = prev_cost;
+      if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
+        const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
+        cost_val += GetCacheCost(cost_model, ix) * mul0;
+      } else {
+        if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+        cost_val += GetLiteralCost(cost_model, argb[i]) * mul1;
+      }
+      if (cost[i] > cost_val) {
+        cost[i] = (float)cost_val;
+        dist_array[i] = 1;  // only one is inserted.
+      }
+    }
+ next_symbol: ;
+  }
+  // Last pixel still to do, it can only be a single step if not reached
+  // through cheaper means already.
+  ok = !refs->error_;
+Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  WebPSafeFree(cost_model);
+  WebPSafeFree(cost);
+  return ok;
+}
+
+// We pack the path at the end of *dist_array and return
+// a pointer to this part of the array. Example:
+// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
+static void TraceBackwards(uint32_t* const dist_array,
+                           int dist_array_size,
+                           uint32_t** const chosen_path,
+                           int* const chosen_path_size) {
+  uint32_t* path = dist_array + dist_array_size;
+  uint32_t* cur = dist_array + dist_array_size - 1;
+  while (cur >= dist_array) {
+    const int k = *cur;
+    --path;
+    *path = k;
+    cur -= k;
+  }
+  *chosen_path = path;
+  *chosen_path_size = (int)(dist_array + dist_array_size - path);
+}
+
+static int BackwardReferencesHashChainFollowChosenPath(
+    int xsize, int ysize, const uint32_t* const argb,
+    int quality, int cache_bits,
+    const uint32_t* const chosen_path, int chosen_path_size,
+    VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs* const refs) {
+  const int pix_count = xsize * ysize;
+  const int use_color_cache = (cache_bits > 0);
+  int size = 0;
+  int i = 0;
+  int k;
+  int ix;
+  int ok = 0;
+  int cc_init = 0;
+  int window_size = WINDOW_SIZE;
+  int iter_pos = 1;
+  int iter_limit = -1;
+  VP8LColorCache hashers;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  ClearBackwardRefs(refs);
+  GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
+                                &window_size, &iter_pos, &iter_limit);
+  HashChainInit(hash_chain);
+  for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
+    int offset = 0;
+    int len = 0;
+    int max_len = chosen_path[ix];
+    if (max_len != 1) {
+      HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
+                        window_size, iter_pos, iter_limit,
+                        &offset, &len);
+      assert(len == max_len);
+      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
+      if (use_color_cache) {
+        for (k = 0; k < len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      {
+        const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+        for (k = 0; k < last; ++k) {
+          HashChainInsert(hash_chain, &argb[i + k], i + k);
+        }
+      }
+      i += len;
+    } else {
+      PixOrCopy v;
+      if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
+        // push pixel as a color cache index
+        const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]);
+        v = PixOrCopyCreateCacheIdx(idx);
+      } else {
+        if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+        v = PixOrCopyCreateLiteral(argb[i]);
+      }
+      BackwardRefsCursorAdd(refs, v);
+      if (i + 1 < pix_count) {
+        HashChainInsert(hash_chain, &argb[i], i);
+      }
+      ++i;
+    }
+  }
+  ok = !refs->error_;
+Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  return ok;
+}
+
+// Returns 1 on success.
+static int BackwardReferencesTraceBackwards(int xsize, int ysize,
+                                            int recursive_cost_model,
+                                            const uint32_t* const argb,
+                                            int quality, int cache_bits,
+                                            VP8LHashChain* const hash_chain,
+                                            VP8LBackwardRefs* const refs) {
+  int ok = 0;
+  const int dist_array_size = xsize * ysize;
+  uint32_t* chosen_path = NULL;
+  int chosen_path_size = 0;
+  uint32_t* dist_array =
+      (uint32_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
+
+  if (dist_array == NULL) goto Error;
+
+  if (!BackwardReferencesHashChainDistanceOnly(
+      xsize, ysize, recursive_cost_model, argb, quality, cache_bits, hash_chain,
+      refs, dist_array)) {
+    goto Error;
+  }
+  TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
+  if (!BackwardReferencesHashChainFollowChosenPath(
+      xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size,
+      hash_chain, refs)) {
+    goto Error;
+  }
+  ok = 1;
+ Error:
+  WebPSafeFree(dist_array);
+  return ok;
+}
+
+static void BackwardReferences2DLocality(int xsize,
+                                         const VP8LBackwardRefs* const refs) {
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  while (VP8LRefsCursorOk(&c)) {
+    if (PixOrCopyIsCopy(c.cur_pos)) {
+      const int dist = c.cur_pos->argb_or_distance;
+      const int transformed_dist = DistanceToPlaneCode(xsize, dist);
+      c.cur_pos->argb_or_distance = transformed_dist;
+    }
+    VP8LRefsCursorNext(&c);
+  }
+}
+
+VP8LBackwardRefs* VP8LGetBackwardReferences(
+    int width, int height, const uint32_t* const argb, int quality,
+    int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs refs_array[2]) {
+  int lz77_is_useful;
+  const int num_pix = width * height;
+  VP8LBackwardRefs* best = NULL;
+  VP8LBackwardRefs* const refs_lz77 = &refs_array[0];
+  VP8LBackwardRefs* const refs_rle = &refs_array[1];
+
+  if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality,
+                                   hash_chain, refs_lz77)) {
+    return NULL;
+  }
+  if (!BackwardReferencesRle(width, height, argb, refs_rle)) {
+    return NULL;
+  }
+
+  {
+    double bit_cost_lz77, bit_cost_rle;
+    VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
+    if (histo == NULL) return NULL;
+    // Evaluate LZ77 coding.
+    VP8LHistogramCreate(histo, refs_lz77, cache_bits);
+    bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
+    // Evaluate RLE coding.
+    VP8LHistogramCreate(histo, refs_rle, cache_bits);
+    bit_cost_rle = VP8LHistogramEstimateBits(histo);
+    // Decide if LZ77 is useful.
+    lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
+    VP8LFreeHistogram(histo);
+  }
+
+  // Choose appropriate backward reference.
+  if (lz77_is_useful) {
+    // TraceBackwards is costly. Don't execute it at lower quality.
+    const int try_lz77_trace_backwards = (quality >= 25);
+    best = refs_lz77;   // default guess: lz77 is better
+    if (try_lz77_trace_backwards) {
+      // Set recursion level for large images using a color cache.
+      const int recursion_level =
+          (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0;
+      VP8LBackwardRefs* const refs_trace = &refs_array[1];
+      ClearBackwardRefs(refs_trace);
+      if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb,
+                                           quality, cache_bits, hash_chain,
+                                           refs_trace)) {
+        best = refs_trace;
+      }
+    }
+  } else {
+    best = refs_rle;
+  }
+
+  if (use_2d_locality) BackwardReferences2DLocality(width, best);
+
+  return best;
+}
+
+// Returns entropy for the given cache bits.
+static double ComputeCacheEntropy(const uint32_t* const argb,
+                                  int xsize, int ysize,
+                                  const VP8LBackwardRefs* const refs,
+                                  int cache_bits) {
+  int pixel_index = 0;
+  uint32_t k;
+  const int use_color_cache = (cache_bits > 0);
+  int cc_init = 0;
+  double entropy = MAX_ENTROPY;
+  const double kSmallPenaltyForLargeCache = 4.0;
+  VP8LColorCache hashers;
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits);
+  if (histo == NULL) goto Error;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  while (VP8LRefsCursorOk(&c)) {
+    const PixOrCopy* const v = c.cur_pos;
+    if (PixOrCopyIsLiteral(v)) {
+      if (use_color_cache &&
+          VP8LColorCacheContains(&hashers, argb[pixel_index])) {
+        // push pixel as a cache index
+        const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]);
+        const PixOrCopy token = PixOrCopyCreateCacheIdx(ix);
+        VP8LHistogramAddSinglePixOrCopy(histo, &token);
+      } else {
+        VP8LHistogramAddSinglePixOrCopy(histo, v);
+      }
+    } else {
+      VP8LHistogramAddSinglePixOrCopy(histo, v);
+    }
+    if (use_color_cache) {
+      for (k = 0; k < PixOrCopyLength(v); ++k) {
+        VP8LColorCacheInsert(&hashers, argb[pixel_index + k]);
+      }
+    }
+    pixel_index += PixOrCopyLength(v);
+    VP8LRefsCursorNext(&c);
+  }
+  assert(pixel_index == xsize * ysize);
+  (void)xsize;  // xsize is not used in non-debug compilations otherwise.
+  (void)ysize;  // ysize is not used in non-debug compilations otherwise.
+  entropy = VP8LHistogramEstimateBits(histo) +
+      kSmallPenaltyForLargeCache * cache_bits;
+ Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  VP8LFreeHistogram(histo);
+  return entropy;
+}
+
+// *best_cache_bits will contain how many bits are to be used for a color cache.
+// Returns 0 in case of memory error.
+int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
+                                      int xsize, int ysize, int quality,
+                                      VP8LHashChain* const hash_chain,
+                                      VP8LBackwardRefs* const refs,
+                                      int* const best_cache_bits) {
+  int eval_low = 1;
+  int eval_high = 1;
+  double entropy_low = MAX_ENTROPY;
+  double entropy_high = MAX_ENTROPY;
+  int cache_bits_low = 0;
+  int cache_bits_high = MAX_COLOR_CACHE_BITS;
+
+  if (!BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, hash_chain,
+                                   refs)) {
+    return 0;
+  }
+  // Do a binary search to find the optimal entropy for cache_bits.
+  while (cache_bits_high - cache_bits_low > 1) {
+    if (eval_low) {
+      entropy_low =
+          ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_low);
+      eval_low = 0;
+    }
+    if (eval_high) {
+      entropy_high =
+          ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_high);
+      eval_high = 0;
+    }
+    if (entropy_high < entropy_low) {
+      *best_cache_bits = cache_bits_high;
+      cache_bits_low = (cache_bits_low + cache_bits_high) / 2;
+      eval_low = 1;
+    } else {
+      *best_cache_bits = cache_bits_low;
+      cache_bits_high = (cache_bits_low + cache_bits_high) / 2;
+      eval_high = 1;
+    }
+  }
+  return 1;
+}
diff --git a/src/main/jni/libwebp/enc/backward_references.h b/src/main/jni/libwebp/enc/backward_references.h
new file mode 100644
index 000000000..c2c81c56e
--- /dev/null
+++ b/src/main/jni/libwebp/enc/backward_references.h
@@ -0,0 +1,212 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#ifndef WEBP_ENC_BACKWARD_REFERENCES_H_
+#define WEBP_ENC_BACKWARD_REFERENCES_H_
+
+#include <assert.h>
+#include <stdlib.h>
+#include "../webp/types.h"
+#include "../webp/format_constants.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The spec allows 11, we use 9 bits to reduce memory consumption in encoding.
+// Having 9 instead of 11 only removes about 0.25 % of compression density.
+#define MAX_COLOR_CACHE_BITS 9
+
+// Max ever number of codes we'll use:
+#define PIX_OR_COPY_CODES_MAX \
+    (NUM_LITERAL_CODES + NUM_LENGTH_CODES + (1 << MAX_COLOR_CACHE_BITS))
+
+// -----------------------------------------------------------------------------
+// PixOrCopy
+
+enum Mode {
+  kLiteral,
+  kCacheIdx,
+  kCopy,
+  kNone
+};
+
+typedef struct {
+  // mode as uint8_t to make the memory layout to be exactly 8 bytes.
+  uint8_t mode;
+  uint16_t len;
+  uint32_t argb_or_distance;
+} PixOrCopy;
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateCopy(uint32_t distance,
+                                                 uint16_t len) {
+  PixOrCopy retval;
+  retval.mode = kCopy;
+  retval.argb_or_distance = distance;
+  retval.len = len;
+  return retval;
+}
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateCacheIdx(int idx) {
+  PixOrCopy retval;
+  assert(idx >= 0);
+  assert(idx < (1 << MAX_COLOR_CACHE_BITS));
+  retval.mode = kCacheIdx;
+  retval.argb_or_distance = idx;
+  retval.len = 1;
+  return retval;
+}
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateLiteral(uint32_t argb) {
+  PixOrCopy retval;
+  retval.mode = kLiteral;
+  retval.argb_or_distance = argb;
+  retval.len = 1;
+  return retval;
+}
+
+static WEBP_INLINE int PixOrCopyIsLiteral(const PixOrCopy* const p) {
+  return (p->mode == kLiteral);
+}
+
+static WEBP_INLINE int PixOrCopyIsCacheIdx(const PixOrCopy* const p) {
+  return (p->mode == kCacheIdx);
+}
+
+static WEBP_INLINE int PixOrCopyIsCopy(const PixOrCopy* const p) {
+  return (p->mode == kCopy);
+}
+
+static WEBP_INLINE uint32_t PixOrCopyLiteral(const PixOrCopy* const p,
+                                             int component) {
+  assert(p->mode == kLiteral);
+  return (p->argb_or_distance >> (component * 8)) & 0xff;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyLength(const PixOrCopy* const p) {
+  return p->len;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyArgb(const PixOrCopy* const p) {
+  assert(p->mode == kLiteral);
+  return p->argb_or_distance;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyCacheIdx(const PixOrCopy* const p) {
+  assert(p->mode == kCacheIdx);
+  assert(p->argb_or_distance < (1U << MAX_COLOR_CACHE_BITS));
+  return p->argb_or_distance;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) {
+  assert(p->mode == kCopy);
+  return p->argb_or_distance;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LHashChain
+
+#define HASH_BITS 18
+#define HASH_SIZE (1 << HASH_BITS)
+
+typedef struct VP8LHashChain VP8LHashChain;
+struct VP8LHashChain {
+  // Stores the most recently added position with the given hash value.
+  int32_t hash_to_first_index_[HASH_SIZE];
+  // chain_[pos] stores the previous position with the same hash value
+  // for every pixel in the image.
+  int32_t* chain_;
+  // This is the maximum size of the hash_chain that can be constructed.
+  // Typically this is the pixel count (width x height) for a given image.
+  int size_;
+};
+
+// Must be called first, to set size.
+int VP8LHashChainInit(VP8LHashChain* const p, int size);
+void VP8LHashChainClear(VP8LHashChain* const p);  // release memory
+
+// -----------------------------------------------------------------------------
+// VP8LBackwardRefs (block-based backward-references storage)
+
+// maximum number of reference blocks the image will be segmented into
+#define MAX_REFS_BLOCK_PER_IMAGE 16
+
+typedef struct PixOrCopyBlock PixOrCopyBlock;   // forward declaration
+typedef struct VP8LBackwardRefs VP8LBackwardRefs;
+
+// Container for blocks chain
+struct VP8LBackwardRefs {
+  int block_size_;               // common block-size
+  int error_;                    // set to true if some memory error occurred
+  PixOrCopyBlock* refs_;         // list of currently used blocks
+  PixOrCopyBlock** tail_;        // for list recycling
+  PixOrCopyBlock* free_blocks_;  // free-list
+  PixOrCopyBlock* last_block_;   // used for adding new refs (internal)
+};
+
+// Initialize the object. 'block_size' is the common block size to store
+// references (typically, width * height / MAX_REFS_BLOCK_PER_IMAGE).
+void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size);
+// Release memory for backward references.
+void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs);
+// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error.
+int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
+                         VP8LBackwardRefs* const dst);
+
+// Cursor for iterating on references content
+typedef struct {
+  // public:
+  PixOrCopy* cur_pos;           // current position
+  // private:
+  PixOrCopyBlock* cur_block_;   // current block in the refs list
+  const PixOrCopy* last_pos_;   // sentinel for switching to next block
+} VP8LRefsCursor;
+
+// Returns a cursor positioned at the beginning of the references list.
+VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs);
+// Returns true if cursor is pointing at a valid position.
+static WEBP_INLINE int VP8LRefsCursorOk(const VP8LRefsCursor* const c) {
+  return (c->cur_pos != NULL);
+}
+// Move to next block of references. Internal, not to be called directly.
+void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c);
+// Move to next position, or NULL. Should not be called if !VP8LRefsCursorOk().
+static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
+  assert(c != NULL);
+  assert(VP8LRefsCursorOk(c));
+  if (++c->cur_pos == c->last_pos_) VP8LRefsCursorNextBlock(c);
+}
+
+// -----------------------------------------------------------------------------
+// Main entry points
+
+// Evaluates best possible backward references for specified quality.
+// Further optimize for 2D locality if use_2d_locality flag is set.
+// The return value is the pointer to the best of the two backward refs viz,
+// refs[0] or refs[1].
+VP8LBackwardRefs* VP8LGetBackwardReferences(
+    int width, int height, const uint32_t* const argb, int quality,
+    int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs refs[2]);
+
+// Produce an estimate for a good color cache size for the image.
+int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
+                                      int xsize, int ysize, int quality,
+                                      VP8LHashChain* const hash_chain,
+                                      VP8LBackwardRefs* const ref,
+                                      int* const best_cache_bits);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBP_ENC_BACKWARD_REFERENCES_H_
diff --git a/src/main/jni/libwebp/enc/config.c b/src/main/jni/libwebp/enc/config.c
new file mode 100644
index 000000000..53a3bb2e7
--- /dev/null
+++ b/src/main/jni/libwebp/enc/config.c
@@ -0,0 +1,166 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Coding tools configuration
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "../webp/encode.h"
+
+//------------------------------------------------------------------------------
+// WebPConfig
+//------------------------------------------------------------------------------
+
+int WebPConfigInitInternal(WebPConfig* config,
+                           WebPPreset preset, float quality, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
+    return 0;   // caller/system version mismatch!
+  }
+  if (config == NULL) return 0;
+
+  config->quality = quality;
+  config->target_size = 0;
+  config->target_PSNR = 0.;
+  config->method = 4;
+  config->sns_strength = 50;
+  config->filter_strength = 60;   // mid-filtering
+  config->filter_sharpness = 0;
+  config->filter_type = 1;        // default: strong (so U/V is filtered too)
+  config->partitions = 0;
+  config->segments = 4;
+  config->pass = 1;
+  config->show_compressed = 0;
+  config->preprocessing = 0;
+  config->autofilter = 0;
+  config->partition_limit = 0;
+  config->alpha_compression = 1;
+  config->alpha_filtering = 1;
+  config->alpha_quality = 100;
+  config->lossless = 0;
+  config->image_hint = WEBP_HINT_DEFAULT;
+  config->emulate_jpeg_size = 0;
+  config->thread_level = 0;
+  config->low_memory = 0;
+
+  // TODO(skal): tune.
+  switch (preset) {
+    case WEBP_PRESET_PICTURE:
+      config->sns_strength = 80;
+      config->filter_sharpness = 4;
+      config->filter_strength = 35;
+      config->preprocessing &= ~2;   // no dithering
+      break;
+    case WEBP_PRESET_PHOTO:
+      config->sns_strength = 80;
+      config->filter_sharpness = 3;
+      config->filter_strength = 30;
+      config->preprocessing |= 2;
+      break;
+    case WEBP_PRESET_DRAWING:
+      config->sns_strength = 25;
+      config->filter_sharpness = 6;
+      config->filter_strength = 10;
+      break;
+    case WEBP_PRESET_ICON:
+      config->sns_strength = 0;
+      config->filter_strength = 0;   // disable filtering to retain sharpness
+      config->preprocessing &= ~2;   // no dithering
+      break;
+    case WEBP_PRESET_TEXT:
+      config->sns_strength = 0;
+      config->filter_strength = 0;   // disable filtering to retain sharpness
+      config->preprocessing &= ~2;   // no dithering
+      config->segments = 2;
+      break;
+    case WEBP_PRESET_DEFAULT:
+    default:
+      break;
+  }
+  return WebPValidateConfig(config);
+}
+
+int WebPValidateConfig(const WebPConfig* config) {
+  if (config == NULL) return 0;
+  if (config->quality < 0 || config->quality > 100)
+    return 0;
+  if (config->target_size < 0)
+    return 0;
+  if (config->target_PSNR < 0)
+    return 0;
+  if (config->method < 0 || config->method > 6)
+    return 0;
+  if (config->segments < 1 || config->segments > 4)
+    return 0;
+  if (config->sns_strength < 0 || config->sns_strength > 100)
+    return 0;
+  if (config->filter_strength < 0 || config->filter_strength > 100)
+    return 0;
+  if (config->filter_sharpness < 0 || config->filter_sharpness > 7)
+    return 0;
+  if (config->filter_type < 0 || config->filter_type > 1)
+    return 0;
+  if (config->autofilter < 0 || config->autofilter > 1)
+    return 0;
+  if (config->pass < 1 || config->pass > 10)
+    return 0;
+  if (config->show_compressed < 0 || config->show_compressed > 1)
+    return 0;
+#if WEBP_ENCODER_ABI_VERSION > 0x0204
+  if (config->preprocessing < 0 || config->preprocessing > 7)
+#else
+  if (config->preprocessing < 0 || config->preprocessing > 3)
+#endif
+    return 0;
+  if (config->partitions < 0 || config->partitions > 3)
+    return 0;
+  if (config->partition_limit < 0 || config->partition_limit > 100)
+    return 0;
+  if (config->alpha_compression < 0)
+    return 0;
+  if (config->alpha_filtering < 0)
+    return 0;
+  if (config->alpha_quality < 0 || config->alpha_quality > 100)
+    return 0;
+  if (config->lossless < 0 || config->lossless > 1)
+    return 0;
+  if (config->image_hint >= WEBP_HINT_LAST)
+    return 0;
+  if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1)
+    return 0;
+  if (config->thread_level < 0 || config->thread_level > 1)
+    return 0;
+  if (config->low_memory < 0 || config->low_memory > 1)
+    return 0;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#if WEBP_ENCODER_ABI_VERSION > 0x0202
+#define MAX_LEVEL 9
+
+// Mapping between -z level and -m / -q parameter settings.
+static const struct {
+  uint8_t method_;
+  uint8_t quality_;
+} kLosslessPresets[MAX_LEVEL + 1] = {
+  { 0,  0 }, { 1, 20 }, { 2, 25 }, { 3, 30 }, { 3, 50 },
+  { 4, 50 }, { 4, 75 }, { 4, 90 }, { 5, 90 }, { 6, 100 }
+};
+
+int WebPConfigLosslessPreset(WebPConfig* config, int level) {
+  if (config == NULL || level < 0 || level > MAX_LEVEL) return 0;
+  config->lossless = 1;
+  config->method = kLosslessPresets[level].method_;
+  config->quality = kLosslessPresets[level].quality_;
+  return 1;
+}
+#endif
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/cost.c b/src/main/jni/libwebp/enc/cost.c
new file mode 100644
index 000000000..9d2cc0170
--- /dev/null
+++ b/src/main/jni/libwebp/enc/cost.c
@@ -0,0 +1,735 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Cost tables for level and modes
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./cost.h"
+
+//------------------------------------------------------------------------------
+// Boolean-cost cost table
+
+const uint16_t VP8EntropyCost[256] = {
+  1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
+  1178, 1152, 1110, 1076, 1061, 1024, 1024,  992,  968,  951,
+   939,  911,  896,  878,  871,  854,  838,  820,  811,  794,
+   786,  768,  768,  752,  740,  732,  720,  709,  704,  690,
+   683,  672,  666,  655,  647,  640,  631,  622,  615,  607,
+   598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
+   534,  528,  522,  512,  512,  504,  500,  494,  488,  483,
+   477,  473,  467,  461,  458,  452,  448,  443,  438,  434,
+   427,  424,  419,  415,  410,  406,  403,  399,  394,  390,
+   384,  384,  377,  374,  370,  366,  362,  359,  355,  351,
+   347,  342,  342,  336,  333,  330,  326,  323,  320,  316,
+   312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
+   280,  277,  274,  272,  268,  266,  262,  256,  256,  256,
+   251,  248,  245,  242,  240,  237,  234,  232,  228,  226,
+   223,  221,  218,  216,  214,  211,  208,  205,  203,  201,
+   198,  196,  192,  191,  188,  187,  183,  181,  179,  176,
+   175,  171,  171,  168,  165,  163,  160,  159,  156,  154,
+   152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
+   131,  128,  128,  125,  123,  121,  119,  117,  115,  113,
+   111,  110,  107,  105,  103,  102,  100,   98,   96,   94,
+    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,
+    74,   73,   71,   69,   67,   66,   64,   63,   61,   59,
+    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,
+    41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
+    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,
+    10,    9,    7,    6,    4,    3
+};
+
+//------------------------------------------------------------------------------
+// Level cost tables
+
+// For each given level, the following table gives the pattern of contexts to
+// use for coding it (in [][0]) as well as the bit value to use for each
+// context (in [][1]).
+const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
+                  {0x001, 0x000}, {0x007, 0x001}, {0x00f, 0x005},
+  {0x00f, 0x00d}, {0x033, 0x003}, {0x033, 0x003}, {0x033, 0x023},
+  {0x033, 0x023}, {0x033, 0x023}, {0x033, 0x023}, {0x0d3, 0x013},
+  {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013},
+  {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x093},
+  {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+  {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+  {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+  {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153}
+};
+
+// fixed costs for coding levels, deduce from the coding tree.
+// This is only the part that doesn't depend on the probability state.
+const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
+     0,  256,  256,  256,  256,  432,  618,  630,
+   731,  640,  640,  828,  901,  948, 1021, 1101,
+  1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
+  1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
+  1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
+  1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
+  1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
+  1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
+  1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
+  1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
+  1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
+  1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
+  1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
+  2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
+  2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
+  2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
+  2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
+  2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
+  2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
+  2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
+  2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
+  2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
+  2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
+  2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
+  2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
+  2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
+  2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
+  2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
+  2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
+  2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
+  2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
+  3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
+  3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
+  3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
+  3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
+  3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
+  3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
+  3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
+  3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
+  3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
+  3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
+  2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
+  2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
+  3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
+  3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
+  3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
+  3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
+  3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
+  3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
+  3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
+  3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
+  3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
+  3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
+  3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
+  3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
+  3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
+  3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
+  3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
+  3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
+  3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
+  3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
+  3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
+  4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
+  4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
+  4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
+  4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
+  4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
+  4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
+  4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
+  4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
+  4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
+  4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
+  4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
+  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
+  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
+  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
+  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
+  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
+  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
+  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
+  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
+  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
+  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
+  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
+  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
+  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
+  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
+  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
+  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
+  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
+  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
+  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
+  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
+  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
+  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
+  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
+  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
+  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
+  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
+  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
+  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
+  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
+  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
+  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
+  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
+  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
+  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
+  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
+  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
+  6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
+  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
+  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
+  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
+  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
+  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
+  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
+  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
+  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
+  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
+  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
+  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
+  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
+  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
+  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
+  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
+  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
+  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
+  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
+  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
+  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
+  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
+  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
+  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
+  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
+  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
+  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
+  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
+  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
+  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
+  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
+  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
+  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
+  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
+  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
+  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
+  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
+  6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
+  5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
+  5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
+  5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
+  5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
+  5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
+  5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
+  5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
+  5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
+  5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
+  5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
+  5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
+  6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
+  6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
+  6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
+  6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
+  6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
+  6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
+  6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
+  6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
+  6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
+  6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
+  6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
+  6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
+  6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
+  6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
+  6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
+  6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
+  6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
+  6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
+  6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
+  7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
+  7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
+  6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
+  6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
+  6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
+  6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
+  6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
+  6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
+  6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
+  6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
+  6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
+  6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
+  7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
+  7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
+  7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
+  7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
+  7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
+  7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
+  7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
+  7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
+  7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
+  7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
+  7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
+  7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
+  7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
+};
+
+static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
+  int pattern = VP8LevelCodes[level - 1][0];
+  int bits = VP8LevelCodes[level - 1][1];
+  int cost = 0;
+  int i;
+  for (i = 2; pattern; ++i) {
+    if (pattern & 1) {
+      cost += VP8BitCost(bits & 1, probas[i]);
+    }
+    bits >>= 1;
+    pattern >>= 1;
+  }
+  return cost;
+}
+
+//------------------------------------------------------------------------------
+// Pre-calc level costs once for all
+
+void VP8CalculateLevelCosts(VP8Proba* const proba) {
+  int ctype, band, ctx;
+
+  if (!proba->dirty_) return;  // nothing to do.
+
+  for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
+    for (band = 0; band < NUM_BANDS; ++band) {
+      for (ctx = 0; ctx < NUM_CTX; ++ctx) {
+        const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
+        uint16_t* const table = proba->level_cost_[ctype][band][ctx];
+        const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0;
+        const int cost_base = VP8BitCost(1, p[1]) + cost0;
+        int v;
+        table[0] = VP8BitCost(0, p[1]) + cost0;
+        for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) {
+          table[v] = cost_base + VariableLevelCost(v, p);
+        }
+        // Starting at level 67 and up, the variable part of the cost is
+        // actually constant.
+      }
+    }
+  }
+  proba->dirty_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Mode cost tables.
+
+// These are the fixed probabilities (in the coding trees) turned into bit-cost
+// by calling VP8BitCost().
+const uint16_t VP8FixedCostsUV[4] = { 302, 984, 439, 642 };
+// note: these values include the fixed VP8BitCost(1, 145) mode selection cost.
+const uint16_t VP8FixedCostsI16[4] = { 663, 919, 872, 919 };
+const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
+  { {   40, 1151, 1723, 1874, 2103, 2019, 1628, 1777, 2226, 2137 },
+    {  192,  469, 1296, 1308, 1849, 1794, 1781, 1703, 1713, 1522 },
+    {  142,  910,  762, 1684, 1849, 1576, 1460, 1305, 1801, 1657 },
+    {  559,  641, 1370,  421, 1182, 1569, 1612, 1725,  863, 1007 },
+    {  299, 1059, 1256, 1108,  636, 1068, 1581, 1883,  869, 1142 },
+    {  277, 1111,  707, 1362, 1089,  672, 1603, 1541, 1545, 1291 },
+    {  214,  781, 1609, 1303, 1632, 2229,  726, 1560, 1713,  918 },
+    {  152, 1037, 1046, 1759, 1983, 2174, 1358,  742, 1740, 1390 },
+    {  512, 1046, 1420,  753,  752, 1297, 1486, 1613,  460, 1207 },
+    {  424,  827, 1362,  719, 1462, 1202, 1199, 1476, 1199,  538 } },
+  { {  240,  402, 1134, 1491, 1659, 1505, 1517, 1555, 1979, 2099 },
+    {  467,  242,  960, 1232, 1714, 1620, 1834, 1570, 1676, 1391 },
+    {  500,  455,  463, 1507, 1699, 1282, 1564,  982, 2114, 2114 },
+    {  672,  643, 1372,  331, 1589, 1667, 1453, 1938,  996,  876 },
+    {  458,  783, 1037,  911,  738,  968, 1165, 1518,  859, 1033 },
+    {  504,  815,  504, 1139, 1219,  719, 1506, 1085, 1268, 1268 },
+    {  333,  630, 1445, 1239, 1883, 3672,  799, 1548, 1865,  598 },
+    {  399,  644,  746, 1342, 1856, 1350, 1493,  613, 1855, 1015 },
+    {  622,  749, 1205,  608, 1066, 1408, 1290, 1406,  546,  971 },
+    {  500,  753, 1041,  668, 1230, 1617, 1297, 1425, 1383,  523 } },
+  { {  394,  553,  523, 1502, 1536,  981, 1608, 1142, 1666, 2181 },
+    {  655,  430,  375, 1411, 1861, 1220, 1677, 1135, 1978, 1553 },
+    {  690,  640,  245, 1954, 2070, 1194, 1528,  982, 1972, 2232 },
+    {  559,  834,  741,  867, 1131,  980, 1225,  852, 1092,  784 },
+    {  690,  875,  516,  959,  673,  894, 1056, 1190, 1528, 1126 },
+    {  740,  951,  384, 1277, 1177,  492, 1579, 1155, 1846, 1513 },
+    {  323,  775, 1062, 1776, 3062, 1274,  813, 1188, 1372,  655 },
+    {  488,  971,  484, 1767, 1515, 1775, 1115,  503, 1539, 1461 },
+    {  740, 1006,  998,  709,  851, 1230, 1337,  788,  741,  721 },
+    {  522, 1073,  573, 1045, 1346,  887, 1046, 1146, 1203,  697 } },
+  { {  105,  864, 1442, 1009, 1934, 1840, 1519, 1920, 1673, 1579 },
+    {  534,  305, 1193,  683, 1388, 2164, 1802, 1894, 1264, 1170 },
+    {  305,  518,  877, 1108, 1426, 3215, 1425, 1064, 1320, 1242 },
+    {  683,  732, 1927,  257, 1493, 2048, 1858, 1552, 1055,  947 },
+    {  394,  814, 1024,  660,  959, 1556, 1282, 1289,  893, 1047 },
+    {  528,  615,  996,  940, 1201,  635, 1094, 2515,  803, 1358 },
+    {  347,  614, 1609, 1187, 3133, 1345, 1007, 1339, 1017,  667 },
+    {  218,  740,  878, 1605, 3650, 3650, 1345,  758, 1357, 1617 },
+    {  672,  750, 1541,  558, 1257, 1599, 1870, 2135,  402, 1087 },
+    {  592,  684, 1161,  430, 1092, 1497, 1475, 1489, 1095,  822 } },
+  { {  228, 1056, 1059, 1368,  752,  982, 1512, 1518,  987, 1782 },
+    {  494,  514,  818,  942,  965,  892, 1610, 1356, 1048, 1363 },
+    {  512,  648,  591, 1042,  761,  991, 1196, 1454, 1309, 1463 },
+    {  683,  749, 1043,  676,  841, 1396, 1133, 1138,  654,  939 },
+    {  622, 1101, 1126,  994,  361, 1077, 1203, 1318,  877, 1219 },
+    {  631, 1068,  857, 1650,  651,  477, 1650, 1419,  828, 1170 },
+    {  555,  727, 1068, 1335, 3127, 1339,  820, 1331, 1077,  429 },
+    {  504,  879,  624, 1398,  889,  889, 1392,  808,  891, 1406 },
+    {  683, 1602, 1289,  977,  578,  983, 1280, 1708,  406, 1122 },
+    {  399,  865, 1433, 1070, 1072,  764,  968, 1477, 1223,  678 } },
+  { {  333,  760,  935, 1638, 1010,  529, 1646, 1410, 1472, 2219 },
+    {  512,  494,  750, 1160, 1215,  610, 1870, 1868, 1628, 1169 },
+    {  572,  646,  492, 1934, 1208,  603, 1580, 1099, 1398, 1995 },
+    {  786,  789,  942,  581, 1018,  951, 1599, 1207,  731,  768 },
+    {  690, 1015,  672, 1078,  582,  504, 1693, 1438, 1108, 2897 },
+    {  768, 1267,  571, 2005, 1243,  244, 2881, 1380, 1786, 1453 },
+    {  452,  899, 1293,  903, 1311, 3100,  465, 1311, 1319,  813 },
+    {  394,  927,  942, 1103, 1358, 1104,  946,  593, 1363, 1109 },
+    {  559, 1005, 1007, 1016,  658, 1173, 1021, 1164,  623, 1028 },
+    {  564,  796,  632, 1005, 1014,  863, 2316, 1268,  938,  764 } },
+  { {  266,  606, 1098, 1228, 1497, 1243,  948, 1030, 1734, 1461 },
+    {  366,  585,  901, 1060, 1407, 1247,  876, 1134, 1620, 1054 },
+    {  452,  565,  542, 1729, 1479, 1479, 1016,  886, 2938, 1150 },
+    {  555, 1088, 1533,  950, 1354,  895,  834, 1019, 1021,  496 },
+    {  704,  815, 1193,  971,  973,  640, 1217, 2214,  832,  578 },
+    {  672, 1245,  579,  871,  875,  774,  872, 1273, 1027,  949 },
+    {  296, 1134, 2050, 1784, 1636, 3425,  442, 1550, 2076,  722 },
+    {  342,  982, 1259, 1846, 1848, 1848,  622,  568, 1847, 1052 },
+    {  555, 1064, 1304,  828,  746, 1343, 1075, 1329, 1078,  494 },
+    {  288, 1167, 1285, 1174, 1639, 1639,  833, 2254, 1304,  509 } },
+  { {  342,  719,  767, 1866, 1757, 1270, 1246,  550, 1746, 2151 },
+    {  483,  653,  694, 1509, 1459, 1410, 1218,  507, 1914, 1266 },
+    {  488,  757,  447, 2979, 1813, 1268, 1654,  539, 1849, 2109 },
+    {  522, 1097, 1085,  851, 1365, 1111,  851,  901,  961,  605 },
+    {  709,  716,  841,  728,  736,  945,  941,  862, 2845, 1057 },
+    {  512, 1323,  500, 1336, 1083,  681, 1342,  717, 1604, 1350 },
+    {  452, 1155, 1372, 1900, 1501, 3290,  311,  944, 1919,  922 },
+    {  403, 1520,  977, 2132, 1733, 3522, 1076,  276, 3335, 1547 },
+    {  559, 1374, 1101,  615,  673, 2462,  974,  795,  984,  984 },
+    {  547, 1122, 1062,  812, 1410,  951, 1140,  622, 1268,  651 } },
+  { {  165,  982, 1235,  938, 1334, 1366, 1659, 1578,  964, 1612 },
+    {  592,  422,  925,  847, 1139, 1112, 1387, 2036,  861, 1041 },
+    {  403,  837,  732,  770,  941, 1658, 1250,  809, 1407, 1407 },
+    {  896,  874, 1071,  381, 1568, 1722, 1437, 2192,  480, 1035 },
+    {  640, 1098, 1012, 1032,  684, 1382, 1581, 2106,  416,  865 },
+    {  559, 1005,  819,  914,  710,  770, 1418,  920,  838, 1435 },
+    {  415, 1258, 1245,  870, 1278, 3067,  770, 1021, 1287,  522 },
+    {  406,  990,  601, 1009, 1265, 1265, 1267,  759, 1017, 1277 },
+    {  968, 1182, 1329,  788, 1032, 1292, 1705, 1714,  203, 1403 },
+    {  732,  877, 1279,  471,  901, 1161, 1545, 1294,  755,  755 } },
+  { {  111,  931, 1378, 1185, 1933, 1648, 1148, 1714, 1873, 1307 },
+    {  406,  414, 1030, 1023, 1910, 1404, 1313, 1647, 1509,  793 },
+    {  342,  640,  575, 1088, 1241, 1349, 1161, 1350, 1756, 1502 },
+    {  559,  766, 1185,  357, 1682, 1428, 1329, 1897, 1219,  802 },
+    {  473,  909, 1164,  771,  719, 2508, 1427, 1432,  722,  782 },
+    {  342,  892,  785, 1145, 1150,  794, 1296, 1550,  973, 1057 },
+    {  208, 1036, 1326, 1343, 1606, 3395,  815, 1455, 1618,  712 },
+    {  228,  928,  890, 1046, 3499, 1711,  994,  829, 1720, 1318 },
+    {  768,  724, 1058,  636,  991, 1075, 1319, 1324,  616,  825 },
+    {  305, 1167, 1358,  899, 1587, 1587,  987, 1988, 1332,  501 } }
+};
+
+//------------------------------------------------------------------------------
+// Mode costs
+
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  const int p0 = res->prob[n][ctx0][0];
+  const uint16_t* t = res->cost[n][ctx0];
+  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+  // be missing during the loop.
+  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+
+  if (res->last < 0) {
+    return VP8BitCost(0, p0);
+  }
+  for (; n < res->last; ++n) {
+    const int v = abs(res->coeffs[n]);
+    const int b = VP8EncBands[n + 1];
+    const int ctx = (v >= 2) ? 2 : v;
+    cost += VP8LevelCost(t, v);
+    t = res->cost[b][ctx];
+  }
+  // Last coefficient is always non-zero
+  {
+    const int v = abs(res->coeffs[n]);
+    assert(v != 0);
+    cost += VP8LevelCost(t, v);
+    if (n < 15) {
+      const int b = VP8EncBands[n + 1];
+      const int ctx = (v == 1) ? 1 : 2;
+      const int last_p0 = res->prob[b][ctx][0];
+      cost += VP8BitCost(0, last_p0);
+    }
+  }
+  return cost;
+}
+
+//------------------------------------------------------------------------------
+// init function
+
+#if defined(WEBP_USE_MIPS32)
+extern int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res);
+#endif  // WEBP_USE_MIPS32
+
+// TODO(skal): this, and GetResidualCost(), should probably go somewhere
+// under src/dsp/ at some point.
+VP8GetResidualCostFunc VP8GetResidualCost;
+
+void VP8GetResidualCostInit(void) {
+  VP8GetResidualCost = GetResidualCost;
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8GetResidualCost = VP8GetResidualCostMIPS32;
+    }
+#endif
+  }
+}
+
+//------------------------------------------------------------------------------
+// helper functions for residuals struct VP8Residual.
+
+void VP8InitResidual(int first, int coeff_type,
+                     VP8Encoder* const enc, VP8Residual* const res) {
+  res->coeff_type = coeff_type;
+  res->prob  = enc->proba_.coeffs_[coeff_type];
+  res->stats = enc->proba_.stats_[coeff_type];
+  res->cost  = enc->proba_.level_cost_[coeff_type];
+  res->first = first;
+}
+
+static void SetResidualCoeffs(const int16_t* const coeffs,
+                              VP8Residual* const res) {
+  int n;
+  res->last = -1;
+  assert(res->first == 0 || coeffs[0] == 0);
+  for (n = 15; n >= 0; --n) {
+    if (coeffs[n]) {
+      res->last = n;
+      break;
+    }
+  }
+  res->coeffs = coeffs;
+}
+
+//------------------------------------------------------------------------------
+// init function
+
+#if defined(WEBP_USE_SSE2)
+extern void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
+                                     VP8Residual* const res);
+#endif  // WEBP_USE_SSE2
+
+VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
+
+void VP8SetResidualCoeffsInit(void) {
+  VP8SetResidualCoeffs = SetResidualCoeffs;
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8SetResidualCoeffs = VP8SetResidualCoeffsSSE2;
+    }
+#endif
+  }
+}
+
+//------------------------------------------------------------------------------
+// Mode costs
+
+int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
+  const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int R = 0;
+  int ctx;
+
+  VP8InitResidual(0, 3, enc, &res);
+  ctx = it->top_nz_[x] + it->left_nz_[y];
+  VP8SetResidualCoeffs(levels, &res);
+  R += VP8GetResidualCost(ctx, &res);
+  return R;
+}
+
+int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int x, y;
+  int R = 0;
+
+  VP8IteratorNzToBytes(it);   // re-import the non-zero context
+
+  // DC
+  VP8InitResidual(0, 1, enc, &res);
+  VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+  R += VP8GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
+
+  // AC
+  VP8InitResidual(1, 0, enc, &res);
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      R += VP8GetResidualCost(ctx, &res);
+      it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0);
+    }
+  }
+  return R;
+}
+
+int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int ch, x, y;
+  int R = 0;
+
+  VP8IteratorNzToBytes(it);  // re-import the non-zero context
+
+  VP8InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        R += VP8GetResidualCost(ctx, &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0);
+      }
+    }
+  }
+  return R;
+}
+
+
+//------------------------------------------------------------------------------
+// Recording of token probabilities.
+
+// Record proba context used
+static int Record(int bit, proba_t* const stats) {
+  proba_t p = *stats;
+  if (p >= 0xffff0000u) {               // an overflow is inbound.
+    p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
+  }
+  // record bit count (lower 16 bits) and increment total count (upper 16 bits).
+  p += 0x00010000u + bit;
+  *stats = p;
+  return bit;
+}
+
+// We keep the table-free variant around for reference, in case.
+#define USE_LEVEL_CODE_TABLE
+
+// Simulate block coding, but only record statistics.
+// Note: no need to record the fixed probas.
+int VP8RecordCoeffs(int ctx, const VP8Residual* const res) {
+  int n = res->first;
+  // should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  proba_t* s = res->stats[n][ctx];
+  if (res->last  < 0) {
+    Record(0, s + 0);
+    return 0;
+  }
+  while (n <= res->last) {
+    int v;
+    Record(1, s + 0);  // order of record doesn't matter
+    while ((v = res->coeffs[n++]) == 0) {
+      Record(0, s + 1);
+      s = res->stats[VP8EncBands[n]][0];
+    }
+    Record(1, s + 1);
+    if (!Record(2u < (unsigned int)(v + 1), s + 2)) {  // v = -1 or 1
+      s = res->stats[VP8EncBands[n]][1];
+    } else {
+      v = abs(v);
+#if !defined(USE_LEVEL_CODE_TABLE)
+      if (!Record(v > 4, s + 3)) {
+        if (Record(v != 2, s + 4))
+          Record(v == 4, s + 5);
+      } else if (!Record(v > 10, s + 6)) {
+        Record(v > 6, s + 7);
+      } else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
+        Record((v >= 3 + (8 << 1)), s + 9);
+      } else {
+        Record((v >= 3 + (8 << 3)), s + 10);
+      }
+#else
+      if (v > MAX_VARIABLE_LEVEL) {
+        v = MAX_VARIABLE_LEVEL;
+      }
+
+      {
+        const int bits = VP8LevelCodes[v - 1][1];
+        int pattern = VP8LevelCodes[v - 1][0];
+        int i;
+        for (i = 0; (pattern >>= 1) != 0; ++i) {
+          const int mask = 2 << i;
+          if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
+        }
+      }
+#endif
+      s = res->stats[VP8EncBands[n]][2];
+    }
+  }
+  if (n < 16) Record(0, s + 0);
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/cost.h b/src/main/jni/libwebp/enc/cost.h
new file mode 100644
index 000000000..4e5589521
--- /dev/null
+++ b/src/main/jni/libwebp/enc/cost.h
@@ -0,0 +1,83 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Cost tables for level and modes.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_ENC_COST_H_
+#define WEBP_ENC_COST_H_
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./vp8enci.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// On-the-fly info about the current set of residuals. Handy to avoid
+// passing zillions of params.
+typedef struct {
+  int first;
+  int last;
+  const int16_t* coeffs;
+
+  int coeff_type;
+  ProbaArray* prob;
+  StatsArray* stats;
+  CostArray*  cost;
+} VP8Residual;
+
+void VP8InitResidual(int first, int coeff_type,
+                     VP8Encoder* const enc, VP8Residual* const res);
+
+typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
+                                         VP8Residual* const res);
+extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
+
+void VP8SetResidualCoeffsInit(void);  // must be called first
+
+int VP8RecordCoeffs(int ctx, const VP8Residual* const res);
+
+// approximate cost per level:
+extern const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1];
+extern const uint16_t VP8EntropyCost[256];        // 8bit fixed-point log(p)
+
+// Cost of coding one event with probability 'proba'.
+static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
+  return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];
+}
+
+// Cost calculation function.
+typedef int (*VP8GetResidualCostFunc)(int ctx0, const VP8Residual* const res);
+extern VP8GetResidualCostFunc VP8GetResidualCost;
+
+void VP8GetResidualCostInit(void);  // must be called first
+
+// Level cost calculations
+extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
+void VP8CalculateLevelCosts(VP8Proba* const proba);
+static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
+  return VP8LevelFixedCosts[level]
+       + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level];
+}
+
+// Mode costs
+extern const uint16_t VP8FixedCostsUV[4];
+extern const uint16_t VP8FixedCostsI16[4];
+extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES];
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_ENC_COST_H_ */
diff --git a/src/main/jni/libwebp/enc/filter.c b/src/main/jni/libwebp/enc/filter.c
new file mode 100644
index 000000000..11db4bd8c
--- /dev/null
+++ b/src/main/jni/libwebp/enc/filter.c
@@ -0,0 +1,296 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Selecting filter level
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include <assert.h>
+#include "./vp8enci.h"
+#include "../dsp/dsp.h"
+
+// This table gives, for a given sharpness, the filtering strength to be
+// used (at least) in order to filter a given edge step delta.
+// This is constructed by brute force inspection: for all delta, we iterate
+// over all possible filtering strength / thresh until needs_filter() returns
+// true.
+#define MAX_DELTA_SIZE 64
+static const uint8_t kLevelsFromDelta[8][MAX_DELTA_SIZE] = {
+  { 0,   1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
+  { 0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 13, 14, 15, 17, 18,
+    20, 21, 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42,
+    44, 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 13, 14, 16, 17, 19,
+    20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43,
+    44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 13, 15, 16, 18, 19,
+    21, 22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43,
+    45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 14, 15, 17, 18, 20,
+    21, 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44,
+    45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  4,  5,  7,  8,  9, 11, 12, 13, 15, 16, 17, 19, 20,
+    22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, 44,
+    46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  4,  5,  7,  8,  9, 11, 12, 13, 15, 16, 18, 19, 21,
+    22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45,
+    46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  4,  5,  7,  8,  9, 11, 12, 14, 15, 17, 18, 20, 21,
+    23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45,
+    47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }
+};
+
+int VP8FilterStrengthFromDelta(int sharpness, int delta) {
+  const int pos = (delta < MAX_DELTA_SIZE) ? delta : MAX_DELTA_SIZE - 1;
+  assert(sharpness >= 0 && sharpness <= 7);
+  return kLevelsFromDelta[sharpness][pos];
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 15.4: compute the inner-edge filtering strength
+
+static int GetILevel(int sharpness, int level) {
+  if (sharpness > 0) {
+    if (sharpness > 4) {
+      level >>= 2;
+    } else {
+      level >>= 1;
+    }
+    if (level > 9 - sharpness) {
+      level = 9 - sharpness;
+    }
+  }
+  if (level < 1) level = 1;
+  return level;
+}
+
+static void DoFilter(const VP8EncIterator* const it, int level) {
+  const VP8Encoder* const enc = it->enc_;
+  const int ilevel = GetILevel(enc->config_->filter_sharpness, level);
+  const int limit = 2 * level + ilevel;
+
+  uint8_t* const y_dst = it->yuv_out2_ + Y_OFF;
+  uint8_t* const u_dst = it->yuv_out2_ + U_OFF;
+  uint8_t* const v_dst = it->yuv_out2_ + V_OFF;
+
+  // copy current block to yuv_out2_
+  memcpy(y_dst, it->yuv_out_, YUV_SIZE * sizeof(uint8_t));
+
+  if (enc->filter_hdr_.simple_ == 1) {   // simple
+    VP8SimpleHFilter16i(y_dst, BPS, limit);
+    VP8SimpleVFilter16i(y_dst, BPS, limit);
+  } else {    // complex
+    const int hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+    VP8HFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
+    VP8HFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
+    VP8VFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
+    VP8VFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// SSIM metric
+
+enum { KERNEL = 3 };
+static const double kMinValue = 1.e-10;  // minimal threshold
+
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) {
+  dst->w   += src->w;
+  dst->xm  += src->xm;
+  dst->ym  += src->ym;
+  dst->xxm += src->xxm;
+  dst->xym += src->xym;
+  dst->yym += src->yym;
+}
+
+static void VP8SSIMAccumulate(const uint8_t* src1, int stride1,
+                              const uint8_t* src2, int stride2,
+                              int xo, int yo, int W, int H,
+                              DistoStats* const stats) {
+  const int ymin = (yo - KERNEL < 0) ? 0 : yo - KERNEL;
+  const int ymax = (yo + KERNEL > H - 1) ? H - 1 : yo + KERNEL;
+  const int xmin = (xo - KERNEL < 0) ? 0 : xo - KERNEL;
+  const int xmax = (xo + KERNEL > W - 1) ? W - 1 : xo + KERNEL;
+  int x, y;
+  src1 += ymin * stride1;
+  src2 += ymin * stride2;
+  for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
+    for (x = xmin; x <= xmax; ++x) {
+      const int s1 = src1[x];
+      const int s2 = src2[x];
+      stats->w   += 1;
+      stats->xm  += s1;
+      stats->ym  += s2;
+      stats->xxm += s1 * s1;
+      stats->xym += s1 * s2;
+      stats->yym += s2 * s2;
+    }
+  }
+}
+
+double VP8SSIMGet(const DistoStats* const stats) {
+  const double xmxm = stats->xm * stats->xm;
+  const double ymym = stats->ym * stats->ym;
+  const double xmym = stats->xm * stats->ym;
+  const double w2 = stats->w * stats->w;
+  double sxx = stats->xxm * stats->w - xmxm;
+  double syy = stats->yym * stats->w - ymym;
+  double sxy = stats->xym * stats->w - xmym;
+  double C1, C2;
+  double fnum;
+  double fden;
+  // small errors are possible, due to rounding. Clamp to zero.
+  if (sxx < 0.) sxx = 0.;
+  if (syy < 0.) syy = 0.;
+  C1 = 6.5025 * w2;
+  C2 = 58.5225 * w2;
+  fnum = (2 * xmym + C1) * (2 * sxy + C2);
+  fden = (xmxm + ymym + C1) * (sxx + syy + C2);
+  return (fden != 0.) ? fnum / fden : kMinValue;
+}
+
+double VP8SSIMGetSquaredError(const DistoStats* const s) {
+  if (s->w > 0.) {
+    const double iw2 = 1. / (s->w * s->w);
+    const double sxx = s->xxm * s->w - s->xm * s->xm;
+    const double syy = s->yym * s->w - s->ym * s->ym;
+    const double sxy = s->xym * s->w - s->xm * s->ym;
+    const double SSE = iw2 * (sxx + syy - 2. * sxy);
+    if (SSE > kMinValue) return SSE;
+  }
+  return kMinValue;
+}
+
+void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
+                            const uint8_t* src2, int stride2,
+                            int W, int H, DistoStats* const stats) {
+  int x, y;
+  for (y = 0; y < H; ++y) {
+    for (x = 0; x < W; ++x) {
+      VP8SSIMAccumulate(src1, stride1, src2, stride2, x, y, W, H, stats);
+    }
+  }
+}
+
+static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
+  int x, y;
+  DistoStats s = { .0, .0, .0, .0, .0, .0 };
+
+  // compute SSIM in a 10 x 10 window
+  for (x = 3; x < 13; x++) {
+    for (y = 3; y < 13; y++) {
+      VP8SSIMAccumulate(yuv1 + Y_OFF, BPS, yuv2 + Y_OFF, BPS, x, y, 16, 16, &s);
+    }
+  }
+  for (x = 1; x < 7; x++) {
+    for (y = 1; y < 7; y++) {
+      VP8SSIMAccumulate(yuv1 + U_OFF, BPS, yuv2 + U_OFF, BPS, x, y, 8, 8, &s);
+      VP8SSIMAccumulate(yuv1 + V_OFF, BPS, yuv2 + V_OFF, BPS, x, y, 8, 8, &s);
+    }
+  }
+  return VP8SSIMGet(&s);
+}
+
+//------------------------------------------------------------------------------
+// Exposed APIs: Encoder should call the following 3 functions to adjust
+// loop filter strength
+
+void VP8InitFilter(VP8EncIterator* const it) {
+  if (it->lf_stats_ != NULL) {
+    int s, i;
+    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+      for (i = 0; i < MAX_LF_LEVELS; i++) {
+        (*it->lf_stats_)[s][i] = 0;
+      }
+    }
+  }
+}
+
+void VP8StoreFilterStats(VP8EncIterator* const it) {
+  int d;
+  VP8Encoder* const enc = it->enc_;
+  const int s = it->mb_->segment_;
+  const int level0 = enc->dqm_[s].fstrength_;  // TODO: ref_lf_delta[]
+
+  // explore +/-quant range of values around level0
+  const int delta_min = -enc->dqm_[s].quant_;
+  const int delta_max = enc->dqm_[s].quant_;
+  const int step_size = (delta_max - delta_min >= 4) ? 4 : 1;
+
+  if (it->lf_stats_ == NULL) return;
+
+  // NOTE: Currently we are applying filter only across the sublock edges
+  // There are two reasons for that.
+  // 1. Applying filter on macro block edges will change the pixels in
+  // the left and top macro blocks. That will be hard to restore
+  // 2. Macro Blocks on the bottom and right are not yet compressed. So we
+  // cannot apply filter on the right and bottom macro block edges.
+  if (it->mb_->type_ == 1 && it->mb_->skip_) return;
+
+  // Always try filter level  zero
+  (*it->lf_stats_)[s][0] += GetMBSSIM(it->yuv_in_, it->yuv_out_);
+
+  for (d = delta_min; d <= delta_max; d += step_size) {
+    const int level = level0 + d;
+    if (level <= 0 || level >= MAX_LF_LEVELS) {
+      continue;
+    }
+    DoFilter(it, level);
+    (*it->lf_stats_)[s][level] += GetMBSSIM(it->yuv_in_, it->yuv_out2_);
+  }
+}
+
+void VP8AdjustFilterStrength(VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  if (it->lf_stats_ != NULL) {
+    int s;
+    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+      int i, best_level = 0;
+      // Improvement over filter level 0 should be at least 1e-5 (relatively)
+      double best_v = 1.00001 * (*it->lf_stats_)[s][0];
+      for (i = 1; i < MAX_LF_LEVELS; i++) {
+        const double v = (*it->lf_stats_)[s][i];
+        if (v > best_v) {
+          best_v = v;
+          best_level = i;
+        }
+      }
+      enc->dqm_[s].fstrength_ = best_level;
+    }
+  } else if (enc->config_->filter_strength > 0) {
+    int max_level = 0;
+    int s;
+    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+      VP8SegmentInfo* const dqm = &enc->dqm_[s];
+      // this '>> 3' accounts for some inverse WHT scaling
+      const int delta = (dqm->max_edge_ * dqm->y2_.q_[1]) >> 3;
+      const int level =
+          VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, delta);
+      if (level > dqm->fstrength_) {
+        dqm->fstrength_ = level;
+      }
+      if (max_level < dqm->fstrength_) {
+        max_level = dqm->fstrength_;
+      }
+    }
+    enc->filter_hdr_.level_ = max_level;
+  }
+}
+
+// -----------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/frame.c b/src/main/jni/libwebp/enc/frame.c
new file mode 100644
index 000000000..cdf1dabfc
--- /dev/null
+++ b/src/main/jni/libwebp/enc/frame.c
@@ -0,0 +1,854 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   frame coding and analysis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <string.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "./cost.h"
+#include "../webp/format_constants.h"  // RIFF constants
+
+#define SEGMENT_VISU 0
+#define DEBUG_SEARCH 0    // useful to track search convergence
+
+//------------------------------------------------------------------------------
+// multi-pass convergence
+
+#define HEADER_SIZE_ESTIMATE (RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE +  \
+                              VP8_FRAME_HEADER_SIZE)
+#define DQ_LIMIT 0.4  // convergence is considered reached if dq < DQ_LIMIT
+// we allow 2k of extra head-room in PARTITION0 limit.
+#define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11)
+
+typedef struct {  // struct for organizing convergence in either size or PSNR
+  int is_first;
+  float dq;
+  float q, last_q;
+  double value, last_value;   // PSNR or size
+  double target;
+  int do_size_search;
+} PassStats;
+
+static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) {
+  const uint64_t target_size = (uint64_t)enc->config_->target_size;
+  const int do_size_search = (target_size != 0);
+  const float target_PSNR = enc->config_->target_PSNR;
+
+  s->is_first = 1;
+  s->dq = 10.f;
+  s->q = s->last_q = enc->config_->quality;
+  s->target = do_size_search ? (double)target_size
+            : (target_PSNR > 0.) ? target_PSNR
+            : 40.;   // default, just in case
+  s->value = s->last_value = 0.;
+  s->do_size_search = do_size_search;
+  return do_size_search;
+}
+
+static float Clamp(float v, float min, float max) {
+  return (v < min) ? min : (v > max) ? max : v;
+}
+
+static float ComputeNextQ(PassStats* const s) {
+  float dq;
+  if (s->is_first) {
+    dq = (s->value > s->target) ? -s->dq : s->dq;
+    s->is_first = 0;
+  } else if (s->value != s->last_value) {
+    const double slope = (s->target - s->value) / (s->last_value - s->value);
+    dq = (float)(slope * (s->last_q - s->q));
+  } else {
+    dq = 0.;  // we're done?!
+  }
+  // Limit variable to avoid large swings.
+  s->dq = Clamp(dq, -30.f, 30.f);
+  s->last_q = s->q;
+  s->last_value = s->value;
+  s->q = Clamp(s->q + s->dq, 0.f, 100.f);
+  return s->q;
+}
+
+//------------------------------------------------------------------------------
+// Tables for level coding
+
+const uint8_t VP8EncBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  // sentinel
+};
+
+const uint8_t VP8Cat3[] = { 173, 148, 140 };
+const uint8_t VP8Cat4[] = { 176, 155, 140, 135 };
+const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 };
+const uint8_t VP8Cat6[] =
+    { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
+
+//------------------------------------------------------------------------------
+// Reset the statistics about: number of skips, token proba, level cost,...
+
+static void ResetStats(VP8Encoder* const enc) {
+  VP8Proba* const proba = &enc->proba_;
+  VP8CalculateLevelCosts(proba);
+  proba->nb_skip_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Skip decision probability
+
+#define SKIP_PROBA_THRESHOLD 250  // value below which using skip_proba is OK.
+
+static int CalcSkipProba(uint64_t nb, uint64_t total) {
+  return (int)(total ? (total - nb) * 255 / total : 255);
+}
+
+// Returns the bit-cost for coding the skip probability.
+static int FinalizeSkipProba(VP8Encoder* const enc) {
+  VP8Proba* const proba = &enc->proba_;
+  const int nb_mbs = enc->mb_w_ * enc->mb_h_;
+  const int nb_events = proba->nb_skip_;
+  int size;
+  proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs);
+  proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD);
+  size = 256;   // 'use_skip_proba' bit
+  if (proba->use_skip_proba_) {
+    size +=  nb_events * VP8BitCost(1, proba->skip_proba_)
+         + (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba_);
+    size += 8 * 256;   // cost of signaling the skip_proba_ itself.
+  }
+  return size;
+}
+
+// Collect statistics and deduce probabilities for next coding pass.
+// Return the total bit-cost for coding the probability updates.
+static int CalcTokenProba(int nb, int total) {
+  assert(nb <= total);
+  return nb ? (255 - nb * 255 / total) : 255;
+}
+
+// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
+static int BranchCost(int nb, int total, int proba) {
+  return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
+}
+
+static void ResetTokenStats(VP8Encoder* const enc) {
+  VP8Proba* const proba = &enc->proba_;
+  memset(proba->stats_, 0, sizeof(proba->stats_));
+}
+
+static int FinalizeTokenProbas(VP8Proba* const proba) {
+  int has_changed = 0;
+  int size = 0;
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          const proba_t stats = proba->stats_[t][b][c][p];
+          const int nb = (stats >> 0) & 0xffff;
+          const int total = (stats >> 16) & 0xffff;
+          const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
+          const int old_p = VP8CoeffsProba0[t][b][c][p];
+          const int new_p = CalcTokenProba(nb, total);
+          const int old_cost = BranchCost(nb, total, old_p)
+                             + VP8BitCost(0, update_proba);
+          const int new_cost = BranchCost(nb, total, new_p)
+                             + VP8BitCost(1, update_proba)
+                             + 8 * 256;
+          const int use_new_p = (old_cost > new_cost);
+          size += VP8BitCost(use_new_p, update_proba);
+          if (use_new_p) {  // only use proba that seem meaningful enough.
+            proba->coeffs_[t][b][c][p] = new_p;
+            has_changed |= (new_p != old_p);
+            size += 8 * 256;
+          } else {
+            proba->coeffs_[t][b][c][p] = old_p;
+          }
+        }
+      }
+    }
+  }
+  proba->dirty_ = has_changed;
+  return size;
+}
+
+//------------------------------------------------------------------------------
+// Finalize Segment probability based on the coding tree
+
+static int GetProba(int a, int b) {
+  const int total = a + b;
+  return (total == 0) ? 255     // that's the default probability.
+                      : (255 * a + total / 2) / total;  // rounded proba
+}
+
+static void SetSegmentProbas(VP8Encoder* const enc) {
+  int p[NUM_MB_SEGMENTS] = { 0 };
+  int n;
+
+  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+    const VP8MBInfo* const mb = &enc->mb_info_[n];
+    p[mb->segment_]++;
+  }
+  if (enc->pic_->stats != NULL) {
+    for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
+      enc->pic_->stats->segment_size[n] = p[n];
+    }
+  }
+  if (enc->segment_hdr_.num_segments_ > 1) {
+    uint8_t* const probas = enc->proba_.segments_;
+    probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
+    probas[1] = GetProba(p[0], p[1]);
+    probas[2] = GetProba(p[2], p[3]);
+
+    enc->segment_hdr_.update_map_ =
+        (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
+    enc->segment_hdr_.size_ =
+        p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
+        p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
+        p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
+        p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
+  } else {
+    enc->segment_hdr_.update_map_ = 0;
+    enc->segment_hdr_.size_ = 0;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Coefficient coding
+
+static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  const uint8_t* p = res->prob[n][ctx];
+  if (!VP8PutBit(bw, res->last >= 0, p[0])) {
+    return 0;
+  }
+
+  while (n < 16) {
+    const int c = res->coeffs[n++];
+    const int sign = c < 0;
+    int v = sign ? -c : c;
+    if (!VP8PutBit(bw, v != 0, p[1])) {
+      p = res->prob[VP8EncBands[n]][0];
+      continue;
+    }
+    if (!VP8PutBit(bw, v > 1, p[2])) {
+      p = res->prob[VP8EncBands[n]][1];
+    } else {
+      if (!VP8PutBit(bw, v > 4, p[3])) {
+        if (VP8PutBit(bw, v != 2, p[4]))
+          VP8PutBit(bw, v == 4, p[5]);
+      } else if (!VP8PutBit(bw, v > 10, p[6])) {
+        if (!VP8PutBit(bw, v > 6, p[7])) {
+          VP8PutBit(bw, v == 6, 159);
+        } else {
+          VP8PutBit(bw, v >= 9, 165);
+          VP8PutBit(bw, !(v & 1), 145);
+        }
+      } else {
+        int mask;
+        const uint8_t* tab;
+        if (v < 3 + (8 << 1)) {          // VP8Cat3  (3b)
+          VP8PutBit(bw, 0, p[8]);
+          VP8PutBit(bw, 0, p[9]);
+          v -= 3 + (8 << 0);
+          mask = 1 << 2;
+          tab = VP8Cat3;
+        } else if (v < 3 + (8 << 2)) {   // VP8Cat4  (4b)
+          VP8PutBit(bw, 0, p[8]);
+          VP8PutBit(bw, 1, p[9]);
+          v -= 3 + (8 << 1);
+          mask = 1 << 3;
+          tab = VP8Cat4;
+        } else if (v < 3 + (8 << 3)) {   // VP8Cat5  (5b)
+          VP8PutBit(bw, 1, p[8]);
+          VP8PutBit(bw, 0, p[10]);
+          v -= 3 + (8 << 2);
+          mask = 1 << 4;
+          tab = VP8Cat5;
+        } else {                         // VP8Cat6 (11b)
+          VP8PutBit(bw, 1, p[8]);
+          VP8PutBit(bw, 1, p[10]);
+          v -= 3 + (8 << 3);
+          mask = 1 << 10;
+          tab = VP8Cat6;
+        }
+        while (mask) {
+          VP8PutBit(bw, !!(v & mask), *tab++);
+          mask >>= 1;
+        }
+      }
+      p = res->prob[VP8EncBands[n]][2];
+    }
+    VP8PutBitUniform(bw, sign);
+    if (n == 16 || !VP8PutBit(bw, n <= res->last, p[0])) {
+      return 1;   // EOB
+    }
+  }
+  return 1;
+}
+
+static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
+                          const VP8ModeScore* const rd) {
+  int x, y, ch;
+  VP8Residual res;
+  uint64_t pos1, pos2, pos3;
+  const int i16 = (it->mb_->type_ == 1);
+  const int segment = it->mb_->segment_;
+  VP8Encoder* const enc = it->enc_;
+
+  VP8IteratorNzToBytes(it);
+
+  pos1 = VP8BitWriterPos(bw);
+  if (i16) {
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+    it->top_nz_[8] = it->left_nz_[8] =
+      PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
+    VP8InitResidual(1, 0, enc, &res);
+  } else {
+    VP8InitResidual(0, 3, enc, &res);
+  }
+
+  // luma-AC
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res);
+    }
+  }
+  pos2 = VP8BitWriterPos(bw);
+
+  // U/V
+  VP8InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+            PutCoeffs(bw, ctx, &res);
+      }
+    }
+  }
+  pos3 = VP8BitWriterPos(bw);
+  it->luma_bits_ = pos2 - pos1;
+  it->uv_bits_ = pos3 - pos2;
+  it->bit_count_[segment][i16] += it->luma_bits_;
+  it->bit_count_[segment][2] += it->uv_bits_;
+  VP8IteratorBytesToNz(it);
+}
+
+// Same as CodeResiduals, but doesn't actually write anything.
+// Instead, it just records the event distribution.
+static void RecordResiduals(VP8EncIterator* const it,
+                            const VP8ModeScore* const rd) {
+  int x, y, ch;
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+
+  VP8IteratorNzToBytes(it);
+
+  if (it->mb_->type_ == 1) {   // i16x16
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+    it->top_nz_[8] = it->left_nz_[8] =
+      VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
+    VP8InitResidual(1, 0, enc, &res);
+  } else {
+    VP8InitResidual(0, 3, enc, &res);
+  }
+
+  // luma-AC
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res);
+    }
+  }
+
+  // U/V
+  VP8InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+            VP8RecordCoeffs(ctx, &res);
+      }
+    }
+  }
+
+  VP8IteratorBytesToNz(it);
+}
+
+//------------------------------------------------------------------------------
+// Token buffer
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
+                        VP8TBuffer* const tokens) {
+  int x, y, ch;
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+
+  VP8IteratorNzToBytes(it);
+  if (it->mb_->type_ == 1) {   // i16x16
+    const int ctx = it->top_nz_[8] + it->left_nz_[8];
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+    it->top_nz_[8] = it->left_nz_[8] =
+        VP8RecordCoeffTokens(ctx, 1,
+                             res.first, res.last, res.coeffs, tokens);
+    VP8RecordCoeffs(ctx, &res);
+    VP8InitResidual(1, 0, enc, &res);
+  } else {
+    VP8InitResidual(0, 3, enc, &res);
+  }
+
+  // luma-AC
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] =
+          VP8RecordCoeffTokens(ctx, res.coeff_type,
+                               res.first, res.last, res.coeffs, tokens);
+      VP8RecordCoeffs(ctx, &res);
+    }
+  }
+
+  // U/V
+  VP8InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+            VP8RecordCoeffTokens(ctx, 2,
+                                 res.first, res.last, res.coeffs, tokens);
+        VP8RecordCoeffs(ctx, &res);
+      }
+    }
+  }
+  VP8IteratorBytesToNz(it);
+  return !tokens->error_;
+}
+
+#endif    // !DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// ExtraInfo map / Debug function
+
+#if SEGMENT_VISU
+static void SetBlock(uint8_t* p, int value, int size) {
+  int y;
+  for (y = 0; y < size; ++y) {
+    memset(p, value, size);
+    p += BPS;
+  }
+}
+#endif
+
+static void ResetSSE(VP8Encoder* const enc) {
+  enc->sse_[0] = 0;
+  enc->sse_[1] = 0;
+  enc->sse_[2] = 0;
+  // Note: enc->sse_[3] is managed by alpha.c
+  enc->sse_count_ = 0;
+}
+
+static void StoreSSE(const VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  const uint8_t* const in = it->yuv_in_;
+  const uint8_t* const out = it->yuv_out_;
+  // Note: not totally accurate at boundary. And doesn't include in-loop filter.
+  enc->sse_[0] += VP8SSE16x16(in + Y_OFF, out + Y_OFF);
+  enc->sse_[1] += VP8SSE8x8(in + U_OFF, out + U_OFF);
+  enc->sse_[2] += VP8SSE8x8(in + V_OFF, out + V_OFF);
+  enc->sse_count_ += 16 * 16;
+}
+
+static void StoreSideInfo(const VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  const VP8MBInfo* const mb = it->mb_;
+  WebPPicture* const pic = enc->pic_;
+
+  if (pic->stats != NULL) {
+    StoreSSE(it);
+    enc->block_count_[0] += (mb->type_ == 0);
+    enc->block_count_[1] += (mb->type_ == 1);
+    enc->block_count_[2] += (mb->skip_ != 0);
+  }
+
+  if (pic->extra_info != NULL) {
+    uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_];
+    switch (pic->extra_info_type) {
+      case 1: *info = mb->type_; break;
+      case 2: *info = mb->segment_; break;
+      case 3: *info = enc->dqm_[mb->segment_].quant_; break;
+      case 4: *info = (mb->type_ == 1) ? it->preds_[0] : 0xff; break;
+      case 5: *info = mb->uv_mode_; break;
+      case 6: {
+        const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
+        *info = (b > 255) ? 255 : b; break;
+      }
+      case 7: *info = mb->alpha_; break;
+      default: *info = 0; break;
+    }
+  }
+#if SEGMENT_VISU  // visualize segments and prediction modes
+  SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16);
+  SetBlock(it->yuv_out_ + U_OFF, it->preds_[0] * 64, 8);
+  SetBlock(it->yuv_out_ + V_OFF, mb->uv_mode_ * 64, 8);
+#endif
+}
+
+static double GetPSNR(uint64_t mse, uint64_t size) {
+  return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99;
+}
+
+//------------------------------------------------------------------------------
+//  StatLoop(): only collect statistics (number of skips, token usage, ...).
+//  This is used for deciding optimal probabilities. It also modifies the
+//  quantizer value if some target (size, PSNR) was specified.
+
+static void SetLoopParams(VP8Encoder* const enc, float q) {
+  // Make sure the quality parameter is inside valid bounds
+  q = Clamp(q, 0.f, 100.f);
+
+  VP8SetSegmentParams(enc, q);      // setup segment quantizations and filters
+  SetSegmentProbas(enc);            // compute segment probabilities
+
+  ResetStats(enc);
+  ResetSSE(enc);
+}
+
+static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
+                            int nb_mbs, int percent_delta,
+                            PassStats* const s) {
+  VP8EncIterator it;
+  uint64_t size = 0;
+  uint64_t size_p0 = 0;
+  uint64_t distortion = 0;
+  const uint64_t pixel_count = nb_mbs * 384;
+
+  VP8IteratorInit(enc, &it);
+  SetLoopParams(enc, s->q);
+  do {
+    VP8ModeScore info;
+    VP8IteratorImport(&it, NULL);
+    if (VP8Decimate(&it, &info, rd_opt)) {
+      // Just record the number of skips and act like skip_proba is not used.
+      enc->proba_.nb_skip_++;
+    }
+    RecordResiduals(&it, &info);
+    size += info.R + info.H;
+    size_p0 += info.H;
+    distortion += info.D;
+    if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
+      return 0;
+    VP8IteratorSaveBoundary(&it);
+  } while (VP8IteratorNext(&it) && --nb_mbs > 0);
+
+  size_p0 += enc->segment_hdr_.size_;
+  if (s->do_size_search) {
+    size += FinalizeSkipProba(enc);
+    size += FinalizeTokenProbas(&enc->proba_);
+    size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE;
+    s->value = (double)size;
+  } else {
+    s->value = GetPSNR(distortion, pixel_count);
+  }
+  return size_p0;
+}
+
+static int StatLoop(VP8Encoder* const enc) {
+  const int method = enc->method_;
+  const int do_search = enc->do_search_;
+  const int fast_probe = ((method == 0 || method == 3) && !do_search);
+  int num_pass_left = enc->config_->pass;
+  const int task_percent = 20;
+  const int percent_per_pass =
+      (task_percent + num_pass_left / 2) / num_pass_left;
+  const int final_percent = enc->percent_ + task_percent;
+  const VP8RDLevel rd_opt =
+      (method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE;
+  int nb_mbs = enc->mb_w_ * enc->mb_h_;
+  PassStats stats;
+
+  InitPassStats(enc, &stats);
+  ResetTokenStats(enc);
+
+  // Fast mode: quick analysis pass over few mbs. Better than nothing.
+  if (fast_probe) {
+    if (method == 3) {  // we need more stats for method 3 to be reliable.
+      nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100;
+    } else {
+      nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50;
+    }
+  }
+
+  while (num_pass_left-- > 0) {
+    const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
+                             (num_pass_left == 0) ||
+                             (enc->max_i4_header_bits_ == 0);
+    const uint64_t size_p0 =
+        OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats);
+    if (size_p0 == 0) return 0;
+#if (DEBUG_SEARCH > 0)
+    printf("#%d value:%.1lf -> %.1lf   q:%.2f -> %.2f\n",
+           num_pass_left, stats.last_value, stats.value, stats.last_q, stats.q);
+#endif
+    if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
+      ++num_pass_left;
+      enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+      continue;                        // ...and start over
+    }
+    if (is_last_pass) {
+      break;
+    }
+    // If no target size: just do several pass without changing 'q'
+    if (do_search) {
+      ComputeNextQ(&stats);
+      if (fabs(stats.dq) <= DQ_LIMIT) break;
+    }
+  }
+  if (!do_search || !stats.do_size_search) {
+    // Need to finalize probas now, since it wasn't done during the search.
+    FinalizeSkipProba(enc);
+    FinalizeTokenProbas(&enc->proba_);
+  }
+  VP8CalculateLevelCosts(&enc->proba_);  // finalize costs
+  return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
+}
+
+//------------------------------------------------------------------------------
+// Main loops
+//
+
+static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
+
+static int PreLoopInitialize(VP8Encoder* const enc) {
+  int p;
+  int ok = 1;
+  const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
+  const int bytes_per_parts =
+      enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
+  // Initialize the bit-writers
+  for (p = 0; ok && p < enc->num_parts_; ++p) {
+    ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
+  }
+  if (!ok) {
+    VP8EncFreeBitWriters(enc);  // malloc error occurred
+    WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
+  return ok;
+}
+
+static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
+  VP8Encoder* const enc = it->enc_;
+  if (ok) {      // Finalize the partitions, check for extra errors.
+    int p;
+    for (p = 0; p < enc->num_parts_; ++p) {
+      VP8BitWriterFinish(enc->parts_ + p);
+      ok &= !enc->parts_[p].error_;
+    }
+  }
+
+  if (ok) {      // All good. Finish up.
+    if (enc->pic_->stats != NULL) {  // finalize byte counters...
+      int i, s;
+      for (i = 0; i <= 2; ++i) {
+        for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+          enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
+        }
+      }
+    }
+    VP8AdjustFilterStrength(it);     // ...and store filter stats.
+  } else {
+    // Something bad happened -> need to do some memory cleanup.
+    VP8EncFreeBitWriters(enc);
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+//  VP8EncLoop(): does the final bitstream coding.
+
+static void ResetAfterSkip(VP8EncIterator* const it) {
+  if (it->mb_->type_ == 1) {
+    *it->nz_ = 0;  // reset all predictors
+    it->left_nz_[8] = 0;
+  } else {
+    *it->nz_ &= (1 << 24);  // preserve the dc_nz bit
+  }
+}
+
+int VP8EncLoop(VP8Encoder* const enc) {
+  VP8EncIterator it;
+  int ok = PreLoopInitialize(enc);
+  if (!ok) return 0;
+
+  StatLoop(enc);  // stats-collection loop
+
+  VP8IteratorInit(enc, &it);
+  VP8InitFilter(&it);
+  do {
+    VP8ModeScore info;
+    const int dont_use_skip = !enc->proba_.use_skip_proba_;
+    const VP8RDLevel rd_opt = enc->rd_opt_level_;
+
+    VP8IteratorImport(&it, NULL);
+    // Warning! order is important: first call VP8Decimate() and
+    // *then* decide how to code the skip decision if there's one.
+    if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
+      CodeResiduals(it.bw_, &it, &info);
+    } else {   // reset predictors after a skip
+      ResetAfterSkip(&it);
+    }
+    StoreSideInfo(&it);
+    VP8StoreFilterStats(&it);
+    VP8IteratorExport(&it);
+    ok = VP8IteratorProgress(&it, 20);
+    VP8IteratorSaveBoundary(&it);
+  } while (ok && VP8IteratorNext(&it));
+
+  return PostLoopFinalize(&it, ok);
+}
+
+//------------------------------------------------------------------------------
+// Single pass using Token Buffer.
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+#define MIN_COUNT 96  // minimum number of macroblocks before updating stats
+
+int VP8EncTokenLoop(VP8Encoder* const enc) {
+  // Roughly refresh the proba eight times per pass
+  int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
+  int num_pass_left = enc->config_->pass;
+  const int do_search = enc->do_search_;
+  VP8EncIterator it;
+  VP8Proba* const proba = &enc->proba_;
+  const VP8RDLevel rd_opt = enc->rd_opt_level_;
+  const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
+  PassStats stats;
+  int ok;
+
+  InitPassStats(enc, &stats);
+  ok = PreLoopInitialize(enc);
+  if (!ok) return 0;
+
+  if (max_count < MIN_COUNT) max_count = MIN_COUNT;
+
+  assert(enc->num_parts_ == 1);
+  assert(enc->use_tokens_);
+  assert(proba->use_skip_proba_ == 0);
+  assert(rd_opt >= RD_OPT_BASIC);   // otherwise, token-buffer won't be useful
+  assert(num_pass_left > 0);
+
+  while (ok && num_pass_left-- > 0) {
+    const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
+                             (num_pass_left == 0) ||
+                             (enc->max_i4_header_bits_ == 0);
+    uint64_t size_p0 = 0;
+    uint64_t distortion = 0;
+    int cnt = max_count;
+    VP8IteratorInit(enc, &it);
+    SetLoopParams(enc, stats.q);
+    if (is_last_pass) {
+      ResetTokenStats(enc);
+      VP8InitFilter(&it);  // don't collect stats until last pass (too costly)
+    }
+    VP8TBufferClear(&enc->tokens_);
+    do {
+      VP8ModeScore info;
+      VP8IteratorImport(&it, NULL);
+      if (--cnt < 0) {
+        FinalizeTokenProbas(proba);
+        VP8CalculateLevelCosts(proba);  // refresh cost tables for rd-opt
+        cnt = max_count;
+      }
+      VP8Decimate(&it, &info, rd_opt);
+      ok = RecordTokens(&it, &info, &enc->tokens_);
+      if (!ok) {
+        WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+        break;
+      }
+      size_p0 += info.H;
+      distortion += info.D;
+      if (is_last_pass) {
+        StoreSideInfo(&it);
+        VP8StoreFilterStats(&it);
+        VP8IteratorExport(&it);
+        ok = VP8IteratorProgress(&it, 20);
+      }
+      VP8IteratorSaveBoundary(&it);
+    } while (ok && VP8IteratorNext(&it));
+    if (!ok) break;
+
+    size_p0 += enc->segment_hdr_.size_;
+    if (stats.do_size_search) {
+      uint64_t size = FinalizeTokenProbas(&enc->proba_);
+      size += VP8EstimateTokenSize(&enc->tokens_,
+                                   (const uint8_t*)proba->coeffs_);
+      size = (size + size_p0 + 1024) >> 11;  // -> size in bytes
+      size += HEADER_SIZE_ESTIMATE;
+      stats.value = (double)size;
+    } else {  // compute and store PSNR
+      stats.value = GetPSNR(distortion, pixel_count);
+    }
+
+#if (DEBUG_SEARCH > 0)
+    printf("#%2d metric:%.1lf -> %.1lf   last_q=%.2lf q=%.2lf dq=%.2lf\n",
+           num_pass_left, stats.last_value, stats.value,
+           stats.last_q, stats.q, stats.dq);
+#endif
+    if (size_p0 > PARTITION0_SIZE_LIMIT) {
+      ++num_pass_left;
+      enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+      continue;                        // ...and start over
+    }
+    if (is_last_pass) {
+      break;   // done
+    }
+    if (do_search) {
+      ComputeNextQ(&stats);  // Adjust q
+    }
+  }
+  if (ok) {
+    if (!stats.do_size_search) {
+      FinalizeTokenProbas(&enc->proba_);
+    }
+    ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
+                       (const uint8_t*)proba->coeffs_, 1);
+  }
+  ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+  return PostLoopFinalize(&it, ok);
+}
+
+#else
+
+int VP8EncTokenLoop(VP8Encoder* const enc) {
+  (void)enc;
+  return 0;   // we shouldn't be here.
+}
+
+#endif    // DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/enc/histogram.c b/src/main/jni/libwebp/enc/histogram.c
new file mode 100644
index 000000000..7c6abb4d6
--- /dev/null
+++ b/src/main/jni/libwebp/enc/histogram.c
@@ -0,0 +1,741 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include <math.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../dsp/lossless.h"
+#include "../utils/utils.h"
+
+#define MAX_COST 1.e38
+
+// Number of partitions for the three dominant (literal, red and blue) symbol
+// costs.
+#define NUM_PARTITIONS 4
+// The size of the bin-hash corresponding to the three dominant costs.
+#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
+
+static void HistogramClear(VP8LHistogram* const p) {
+  uint32_t* const literal = p->literal_;
+  const int cache_bits = p->palette_code_bits_;
+  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  memset(p, 0, histo_size);
+  p->palette_code_bits_ = cache_bits;
+  p->literal_ = literal;
+}
+
+static void HistogramCopy(const VP8LHistogram* const src,
+                          VP8LHistogram* const dst) {
+  uint32_t* const dst_literal = dst->literal_;
+  const int dst_cache_bits = dst->palette_code_bits_;
+  const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
+  assert(src->palette_code_bits_ == dst_cache_bits);
+  memcpy(dst, src, histo_size);
+  dst->literal_ = dst_literal;
+}
+
+int VP8LGetHistogramSize(int cache_bits) {
+  const int literal_size = VP8LHistogramNumCodes(cache_bits);
+  const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
+  assert(total_size <= (size_t)0x7fffffff);
+  return (int)total_size;
+}
+
+void VP8LFreeHistogram(VP8LHistogram* const histo) {
+  WebPSafeFree(histo);
+}
+
+void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) {
+  WebPSafeFree(histo);
+}
+
+void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
+                            VP8LHistogram* const histo) {
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  while (VP8LRefsCursorOk(&c)) {
+    VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
+    VP8LRefsCursorNext(&c);
+  }
+}
+
+void VP8LHistogramCreate(VP8LHistogram* const p,
+                         const VP8LBackwardRefs* const refs,
+                         int palette_code_bits) {
+  if (palette_code_bits >= 0) {
+    p->palette_code_bits_ = palette_code_bits;
+  }
+  HistogramClear(p);
+  VP8LHistogramStoreRefs(refs, p);
+}
+
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
+  p->palette_code_bits_ = palette_code_bits;
+  HistogramClear(p);
+}
+
+VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
+  VP8LHistogram* histo = NULL;
+  const int total_size = VP8LGetHistogramSize(cache_bits);
+  uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
+  if (memory == NULL) return NULL;
+  histo = (VP8LHistogram*)memory;
+  // literal_ won't necessary be aligned.
+  histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
+  VP8LHistogramInit(histo, cache_bits);
+  return histo;
+}
+
+VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
+  int i;
+  VP8LHistogramSet* set;
+  const size_t total_size = sizeof(*set)
+                            + sizeof(*set->histograms) * size
+                            + (size_t)VP8LGetHistogramSize(cache_bits) * size;
+  uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
+  if (memory == NULL) return NULL;
+
+  set = (VP8LHistogramSet*)memory;
+  memory += sizeof(*set);
+  set->histograms = (VP8LHistogram**)memory;
+  memory += size * sizeof(*set->histograms);
+  set->max_size = size;
+  set->size = size;
+  for (i = 0; i < size; ++i) {
+    set->histograms[i] = (VP8LHistogram*)memory;
+    // literal_ won't necessary be aligned.
+    set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
+    VP8LHistogramInit(set->histograms[i], cache_bits);
+    // There's no padding/alignment between successive histograms.
+    memory += VP8LGetHistogramSize(cache_bits);
+  }
+  return set;
+}
+
+// -----------------------------------------------------------------------------
+
+void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
+                                     const PixOrCopy* const v) {
+  if (PixOrCopyIsLiteral(v)) {
+    ++histo->alpha_[PixOrCopyLiteral(v, 3)];
+    ++histo->red_[PixOrCopyLiteral(v, 2)];
+    ++histo->literal_[PixOrCopyLiteral(v, 1)];
+    ++histo->blue_[PixOrCopyLiteral(v, 0)];
+  } else if (PixOrCopyIsCacheIdx(v)) {
+    const int literal_ix =
+        NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
+    ++histo->literal_[literal_ix];
+  } else {
+    int code, extra_bits;
+    VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);
+    ++histo->literal_[NUM_LITERAL_CODES + code];
+    VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
+    ++histo->distance_[code];
+  }
+}
+
+static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val,
+                                            double retval) {
+  double mix;
+  if (nonzeros < 5) {
+    if (nonzeros <= 1) {
+      return 0;
+    }
+    // Two symbols, they will be 0 and 1 in a Huffman code.
+    // Let's mix in a bit of entropy to favor good clustering when
+    // distributions of these are combined.
+    if (nonzeros == 2) {
+      return 0.99 * sum + 0.01 * retval;
+    }
+    // No matter what the entropy says, we cannot be better than min_limit
+    // with Huffman coding. I am mixing a bit of entropy into the
+    // min_limit since it produces much better (~0.5 %) compression results
+    // perhaps because of better entropy clustering.
+    if (nonzeros == 3) {
+      mix = 0.95;
+    } else {
+      mix = 0.7;  // nonzeros == 4.
+    }
+  } else {
+    mix = 0.627;
+  }
+
+  {
+    double min_limit = 2 * sum - max_val;
+    min_limit = mix * min_limit + (1.0 - mix) * retval;
+    return (retval < min_limit) ? min_limit : retval;
+  }
+}
+
+static double BitsEntropy(const uint32_t* const array, int n) {
+  double retval = 0.;
+  uint32_t sum = 0;
+  int nonzeros = 0;
+  uint32_t max_val = 0;
+  int i;
+  for (i = 0; i < n; ++i) {
+    if (array[i] != 0) {
+      sum += array[i];
+      ++nonzeros;
+      retval -= VP8LFastSLog2(array[i]);
+      if (max_val < array[i]) {
+        max_val = array[i];
+      }
+    }
+  }
+  retval += VP8LFastSLog2(sum);
+  return BitsEntropyRefine(nonzeros, sum, max_val, retval);
+}
+
+static double BitsEntropyCombined(const uint32_t* const X,
+                                  const uint32_t* const Y, int n) {
+  double retval = 0.;
+  int sum = 0;
+  int nonzeros = 0;
+  int max_val = 0;
+  int i;
+  for (i = 0; i < n; ++i) {
+    const int xy = X[i] + Y[i];
+    if (xy != 0) {
+      sum += xy;
+      ++nonzeros;
+      retval -= VP8LFastSLog2(xy);
+      if (max_val < xy) {
+        max_val = xy;
+      }
+    }
+  }
+  retval += VP8LFastSLog2(sum);
+  return BitsEntropyRefine(nonzeros, sum, max_val, retval);
+}
+
+static double InitialHuffmanCost(void) {
+  // Small bias because Huffman code length is typically not stored in
+  // full length.
+  static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
+  static const double kSmallBias = 9.1;
+  return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
+}
+
+// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
+static double FinalHuffmanCost(const VP8LStreaks* const stats) {
+  double retval = InitialHuffmanCost();
+  retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
+  retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
+  retval += 1.796875 * stats->streaks[0][0];
+  retval += 3.28125 * stats->streaks[1][0];
+  return retval;
+}
+
+// Trampolines
+static double HuffmanCost(const uint32_t* const population, int length) {
+  const VP8LStreaks stats = VP8LHuffmanCostCount(population, length);
+  return FinalHuffmanCost(&stats);
+}
+
+static double HuffmanCostCombined(const uint32_t* const X,
+                                  const uint32_t* const Y, int length) {
+  const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length);
+  return FinalHuffmanCost(&stats);
+}
+
+// Aggregated costs
+static double PopulationCost(const uint32_t* const population, int length) {
+  return BitsEntropy(population, length) + HuffmanCost(population, length);
+}
+
+static double GetCombinedEntropy(const uint32_t* const X,
+                                 const uint32_t* const Y, int length) {
+  return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
+}
+
+// Estimates the Entropy + Huffman + other block overhead size cost.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+  return
+      PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_))
+      + PopulationCost(p->red_, NUM_LITERAL_CODES)
+      + PopulationCost(p->blue_, NUM_LITERAL_CODES)
+      + PopulationCost(p->alpha_, NUM_LITERAL_CODES)
+      + PopulationCost(p->distance_, NUM_DISTANCE_CODES)
+      + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+      + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
+}
+
+double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
+  return
+      BitsEntropy(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_))
+      + BitsEntropy(p->red_, NUM_LITERAL_CODES)
+      + BitsEntropy(p->blue_, NUM_LITERAL_CODES)
+      + BitsEntropy(p->alpha_, NUM_LITERAL_CODES)
+      + BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
+      + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+      + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
+}
+
+// -----------------------------------------------------------------------------
+// Various histogram combine/cost-eval functions
+
+static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
+                                       const VP8LHistogram* const b,
+                                       double cost_threshold,
+                                       double* cost) {
+  const int palette_code_bits = a->palette_code_bits_;
+  assert(a->palette_code_bits_ == b->palette_code_bits_);
+  *cost += GetCombinedEntropy(a->literal_, b->literal_,
+                              VP8LHistogramNumCodes(palette_code_bits));
+  *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
+                                 b->literal_ + NUM_LITERAL_CODES,
+                                 NUM_LENGTH_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  *cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  *cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
+  *cost += VP8LExtraCostCombined(a->distance_, b->distance_,
+                                 NUM_DISTANCE_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  return 1;
+}
+
+// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
+// to the threshold value 'cost_threshold'. The score returned is
+//  Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
+// Since the previous score passed is 'cost_threshold', we only need to compare
+// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
+// early.
+static double HistogramAddEval(const VP8LHistogram* const a,
+                               const VP8LHistogram* const b,
+                               VP8LHistogram* const out,
+                               double cost_threshold) {
+  double cost = 0;
+  const double sum_cost = a->bit_cost_ + b->bit_cost_;
+  cost_threshold += sum_cost;
+
+  if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
+    VP8LHistogramAdd(a, b, out);
+    out->bit_cost_ = cost;
+    out->palette_code_bits_ = a->palette_code_bits_;
+  }
+
+  return cost - sum_cost;
+}
+
+// Same as HistogramAddEval(), except that the resulting histogram
+// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
+// the term C(b) which is constant over all the evaluations.
+static double HistogramAddThresh(const VP8LHistogram* const a,
+                                 const VP8LHistogram* const b,
+                                 double cost_threshold) {
+  double cost = -a->bit_cost_;
+  GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
+  return cost;
+}
+
+// -----------------------------------------------------------------------------
+
+// The structure to keep track of cost range for the three dominant entropy
+// symbols.
+// TODO(skal): Evaluate if float can be used here instead of double for
+// representing the entropy costs.
+typedef struct {
+  double literal_max_;
+  double literal_min_;
+  double red_max_;
+  double red_min_;
+  double blue_max_;
+  double blue_min_;
+} DominantCostRange;
+
+static void DominantCostRangeInit(DominantCostRange* const c) {
+  c->literal_max_ = 0.;
+  c->literal_min_ = MAX_COST;
+  c->red_max_ = 0.;
+  c->red_min_ = MAX_COST;
+  c->blue_max_ = 0.;
+  c->blue_min_ = MAX_COST;
+}
+
+static void UpdateDominantCostRange(
+    const VP8LHistogram* const h, DominantCostRange* const c) {
+  if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_;
+  if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_;
+  if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_;
+  if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_;
+  if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_;
+  if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_;
+}
+
+static void UpdateHistogramCost(VP8LHistogram* const h) {
+  const double alpha_cost = PopulationCost(h->alpha_, NUM_LITERAL_CODES);
+  const double distance_cost =
+      PopulationCost(h->distance_, NUM_DISTANCE_CODES) +
+      VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
+  const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
+  h->literal_cost_ = PopulationCost(h->literal_, num_codes) +
+                     VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
+                                   NUM_LENGTH_CODES);
+  h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES);
+  h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES);
+  h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
+                 alpha_cost + distance_cost;
+}
+
+static int GetBinIdForEntropy(double min, double max, double val) {
+  const double range = max - min + 1e-6;
+  const double delta = val - min;
+  return (int)(NUM_PARTITIONS * delta / range);
+}
+
+// TODO(vikasa): Evaluate, if there's any correlation between red & blue.
+static int GetHistoBinIndex(
+    const VP8LHistogram* const h, const DominantCostRange* const c) {
+  const int bin_id =
+      GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) +
+      NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_,
+                                          h->red_cost_) +
+      NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_,
+                                                           c->literal_max_,
+                                                           h->literal_cost_);
+  assert(bin_id < BIN_SIZE);
+  return bin_id;
+}
+
+// Construct the histograms from backward references.
+static void HistogramBuild(
+    int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs,
+    VP8LHistogramSet* const image_histo) {
+  int x = 0, y = 0;
+  const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
+  VP8LHistogram** const histograms = image_histo->histograms;
+  VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);
+  assert(histo_bits > 0);
+  // Construct the Histo from a given backward references.
+  while (VP8LRefsCursorOk(&c)) {
+    const PixOrCopy* const v = c.cur_pos;
+    const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
+    VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
+    x += PixOrCopyLength(v);
+    while (x >= xsize) {
+      x -= xsize;
+      ++y;
+    }
+    VP8LRefsCursorNext(&c);
+  }
+}
+
+// Copies the histograms and computes its bit_cost.
+static void HistogramCopyAndAnalyze(
+    VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) {
+  int i;
+  const int histo_size = orig_histo->size;
+  VP8LHistogram** const orig_histograms = orig_histo->histograms;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  for (i = 0; i < histo_size; ++i) {
+    VP8LHistogram* const histo = orig_histograms[i];
+    UpdateHistogramCost(histo);
+    // Copy histograms from orig_histo[] to image_histo[].
+    HistogramCopy(histo, histograms[i]);
+  }
+}
+
+// Partition histograms to different entropy bins for three dominant (literal,
+// red and blue) symbol costs and compute the histogram aggregate bit_cost.
+static void HistogramAnalyzeEntropyBin(
+    VP8LHistogramSet* const image_histo, int16_t* const bin_map) {
+  int i;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  const int histo_size = image_histo->size;
+  const int bin_depth = histo_size + 1;
+  DominantCostRange cost_range;
+  DominantCostRangeInit(&cost_range);
+
+  // Analyze the dominant (literal, red and blue) entropy costs.
+  for (i = 0; i < histo_size; ++i) {
+    VP8LHistogram* const histo = histograms[i];
+    UpdateDominantCostRange(histo, &cost_range);
+  }
+
+  // bin-hash histograms on three of the dominant (literal, red and blue)
+  // symbol costs.
+  for (i = 0; i < histo_size; ++i) {
+    int num_histos;
+    VP8LHistogram* const histo = histograms[i];
+    const int16_t bin_id = (int16_t)GetHistoBinIndex(histo, &cost_range);
+    const int bin_offset = bin_id * bin_depth;
+    // bin_map[n][0] for every bin 'n' maintains the counter for the number of
+    // histograms in that bin.
+    // Get and increment the num_histos in that bin.
+    num_histos = ++bin_map[bin_offset];
+    assert(bin_offset + num_histos < bin_depth * BIN_SIZE);
+    // Add histogram i'th index at num_histos (last) position in the bin_map.
+    bin_map[bin_offset + num_histos] = i;
+  }
+}
+
+// Compact the histogram set by moving the valid one left in the set to the
+// head and moving the ones that have been merged to other histograms towards
+// the end.
+// TODO(vikasa): Evaluate if this method can be avoided by altering the code
+// logic of HistogramCombineEntropyBin main loop.
+static void HistogramCompactBins(VP8LHistogramSet* const image_histo) {
+  int start = 0;
+  int end = image_histo->size - 1;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  while (start < end) {
+    while (start <= end && histograms[start] != NULL &&
+           histograms[start]->bit_cost_ != 0.) {
+      ++start;
+    }
+    while (start <= end && histograms[end]->bit_cost_ == 0.) {
+      histograms[end] = NULL;
+      --end;
+    }
+    if (start < end) {
+      assert(histograms[start] != NULL);
+      assert(histograms[end] != NULL);
+      HistogramCopy(histograms[end], histograms[start]);
+      histograms[end] = NULL;
+      --end;
+    }
+  }
+  image_histo->size = end + 1;
+}
+
+static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
+                                       VP8LHistogram* const histos,
+                                       int16_t* const bin_map, int bin_depth,
+                                       double combine_cost_factor) {
+  int bin_id;
+  VP8LHistogram* cur_combo = histos;
+  VP8LHistogram** const histograms = image_histo->histograms;
+
+  for (bin_id = 0; bin_id < BIN_SIZE; ++bin_id) {
+    const int bin_offset = bin_id * bin_depth;
+    const int num_histos = bin_map[bin_offset];
+    const int idx1 = bin_map[bin_offset + 1];
+    int n;
+    for (n = 2; n <= num_histos; ++n) {
+      const int idx2 = bin_map[bin_offset + n];
+      const double bit_cost_idx2 = histograms[idx2]->bit_cost_;
+      if (bit_cost_idx2 > 0.) {
+        const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor;
+        const double curr_cost_diff =
+            HistogramAddEval(histograms[idx1], histograms[idx2],
+                             cur_combo, bit_cost_thresh);
+        if (curr_cost_diff < bit_cost_thresh) {
+          HistogramCopy(cur_combo, histograms[idx1]);
+          histograms[idx2]->bit_cost_ = 0.;
+        }
+      }
+    }
+  }
+  HistogramCompactBins(image_histo);
+}
+
+static uint32_t MyRand(uint32_t *seed) {
+  *seed *= 16807U;
+  if (*seed == 0) {
+    *seed = 1;
+  }
+  return *seed;
+}
+
+static void HistogramCombine(VP8LHistogramSet* const image_histo,
+                             VP8LHistogramSet* const histos, int quality) {
+  int iter;
+  uint32_t seed = 0;
+  int tries_with_no_success = 0;
+  int image_histo_size = image_histo->size;
+  const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8;
+  const int outer_iters = image_histo_size * iter_mult;
+  const int num_pairs = image_histo_size / 2;
+  const int num_tries_no_success = outer_iters / 2;
+  const int min_cluster_size = 2;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  VP8LHistogram* cur_combo = histos->histograms[0];   // trial histogram
+  VP8LHistogram* best_combo = histos->histograms[1];  // best histogram so far
+
+  // Collapse similar histograms in 'image_histo'.
+  for (iter = 0;
+       iter < outer_iters && image_histo_size >= min_cluster_size;
+       ++iter) {
+    double best_cost_diff = 0.;
+    int best_idx1 = -1, best_idx2 = 1;
+    int j;
+    const int num_tries =
+        (num_pairs < image_histo_size) ? num_pairs : image_histo_size;
+    seed += iter;
+    for (j = 0; j < num_tries; ++j) {
+      double curr_cost_diff;
+      // Choose two histograms at random and try to combine them.
+      const uint32_t idx1 = MyRand(&seed) % image_histo_size;
+      const uint32_t tmp = (j & 7) + 1;
+      const uint32_t diff =
+          (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1);
+      const uint32_t idx2 = (idx1 + diff + 1) % image_histo_size;
+      if (idx1 == idx2) {
+        continue;
+      }
+
+      // Calculate cost reduction on combining.
+      curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2],
+                                        cur_combo, best_cost_diff);
+      if (curr_cost_diff < best_cost_diff) {    // found a better pair?
+        {     // swap cur/best combo histograms
+          VP8LHistogram* const tmp_histo = cur_combo;
+          cur_combo = best_combo;
+          best_combo = tmp_histo;
+        }
+        best_cost_diff = curr_cost_diff;
+        best_idx1 = idx1;
+        best_idx2 = idx2;
+      }
+    }
+
+    if (best_idx1 >= 0) {
+      HistogramCopy(best_combo, histograms[best_idx1]);
+      // swap best_idx2 slot with last one (which is now unused)
+      --image_histo_size;
+      if (best_idx2 != image_histo_size) {
+        HistogramCopy(histograms[image_histo_size], histograms[best_idx2]);
+        histograms[image_histo_size] = NULL;
+      }
+      tries_with_no_success = 0;
+    }
+    if (++tries_with_no_success >= num_tries_no_success) {
+      break;
+    }
+  }
+  image_histo->size = image_histo_size;
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+static void HistogramRemap(const VP8LHistogramSet* const orig_histo,
+                           const VP8LHistogramSet* const image_histo,
+                           uint16_t* const symbols) {
+  int i;
+  VP8LHistogram** const orig_histograms = orig_histo->histograms;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  for (i = 0; i < orig_histo->size; ++i) {
+    int best_out = 0;
+    double best_bits =
+        HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
+    int k;
+    for (k = 1; k < image_histo->size; ++k) {
+      const double cur_bits =
+          HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = k;
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  // Recompute each out based on raw and symbols.
+  for (i = 0; i < image_histo->size; ++i) {
+    HistogramClear(histograms[i]);
+  }
+
+  for (i = 0; i < orig_histo->size; ++i) {
+    const int idx = symbols[i];
+    VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]);
+  }
+}
+
+static double GetCombineCostFactor(int histo_size, int quality) {
+  double combine_cost_factor = 0.16;
+  if (histo_size > 256) combine_cost_factor /= 2.;
+  if (histo_size > 512) combine_cost_factor /= 2.;
+  if (histo_size > 1024) combine_cost_factor /= 2.;
+  if (quality <= 50) combine_cost_factor /= 2.;
+  return combine_cost_factor;
+}
+
+int VP8LGetHistoImageSymbols(int xsize, int ysize,
+                             const VP8LBackwardRefs* const refs,
+                             int quality, int histo_bits, int cache_bits,
+                             VP8LHistogramSet* const image_histo,
+                             uint16_t* const histogram_symbols) {
+  int ok = 0;
+  const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
+  const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
+  const int image_histo_raw_size = histo_xsize * histo_ysize;
+
+  // The bin_map for every bin follows following semantics:
+  // bin_map[n][0] = num_histo; // The number of histograms in that bin.
+  // bin_map[n][1] = index of first histogram in that bin;
+  // bin_map[n][num_histo] = index of last histogram in that bin;
+  // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = un-used indices.
+  const int bin_depth = image_histo_raw_size + 1;
+  int16_t* bin_map = NULL;
+  VP8LHistogramSet* const histos = VP8LAllocateHistogramSet(2, cache_bits);
+  VP8LHistogramSet* const orig_histo =
+      VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
+
+  if (orig_histo == NULL || histos == NULL) {
+    goto Error;
+  }
+
+  // Don't attempt linear bin-partition heuristic for:
+  // histograms of small sizes, as bin_map will be very sparse and;
+  // Higher qualities (> 90), to preserve the compression gains at those
+  // quality settings.
+  if (orig_histo->size > 2 * BIN_SIZE && quality < 90) {
+    const int bin_map_size = bin_depth * BIN_SIZE;
+    bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map));
+    if (bin_map == NULL) goto Error;
+  }
+
+  // Construct the histograms from backward references.
+  HistogramBuild(xsize, histo_bits, refs, orig_histo);
+  // Copies the histograms and computes its bit_cost.
+  HistogramCopyAndAnalyze(orig_histo, image_histo);
+
+  if (bin_map != NULL) {
+    const double combine_cost_factor =
+        GetCombineCostFactor(image_histo_raw_size, quality);
+    HistogramAnalyzeEntropyBin(orig_histo, bin_map);
+    // Collapse histograms with similar entropy.
+    HistogramCombineEntropyBin(image_histo, histos->histograms[0],
+                               bin_map, bin_depth, combine_cost_factor);
+  }
+
+  // Collapse similar histograms by random histogram-pair compares.
+  HistogramCombine(image_histo, histos, quality);
+
+  // Find the optimal map from original histograms to the final ones.
+  HistogramRemap(orig_histo, image_histo, histogram_symbols);
+
+  ok = 1;
+
+ Error:
+  WebPSafeFree(bin_map);
+  VP8LFreeHistogramSet(orig_histo);
+  VP8LFreeHistogramSet(histos);
+  return ok;
+}
diff --git a/src/main/jni/libwebp/enc/histogram.h b/src/main/jni/libwebp/enc/histogram.h
new file mode 100644
index 000000000..1cf4c5474
--- /dev/null
+++ b/src/main/jni/libwebp/enc/histogram.h
@@ -0,0 +1,118 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Models the histograms of literal and distance codes.
+
+#ifndef WEBP_ENC_HISTOGRAM_H_
+#define WEBP_ENC_HISTOGRAM_H_
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "./backward_references.h"
+#include "../webp/format_constants.h"
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A simple container for histograms of data.
+typedef struct {
+  // literal_ contains green literal, palette-code and
+  // copy-length-prefix histogram
+  uint32_t* literal_;         // Pointer to the allocated buffer for literal.
+  uint32_t red_[NUM_LITERAL_CODES];
+  uint32_t blue_[NUM_LITERAL_CODES];
+  uint32_t alpha_[NUM_LITERAL_CODES];
+  // Backward reference prefix-code histogram.
+  uint32_t distance_[NUM_DISTANCE_CODES];
+  int palette_code_bits_;
+  double bit_cost_;      // cached value of VP8LHistogramEstimateBits(this)
+  double literal_cost_;  // Cached values of dominant entropy costs:
+  double red_cost_;      //   literal, red & blue.
+  double blue_cost_;
+} VP8LHistogram;
+
+// Collection of histograms with fixed capacity, allocated as one
+// big memory chunk. Can be destroyed by calling WebPSafeFree().
+typedef struct {
+  int size;         // number of slots currently in use
+  int max_size;     // maximum capacity
+  VP8LHistogram** histograms;
+} VP8LHistogramSet;
+
+// Create the histogram.
+//
+// The input data is the PixOrCopy data, which models the literals, stop
+// codes and backward references (both distances and lengths).  Also: if
+// palette_code_bits is >= 0, initialize the histogram with this value.
+void VP8LHistogramCreate(VP8LHistogram* const p,
+                         const VP8LBackwardRefs* const refs,
+                         int palette_code_bits);
+
+// Return the size of the histogram for a given palette_code_bits.
+int VP8LGetHistogramSize(int palette_code_bits);
+
+// Set the palette_code_bits and reset the stats.
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
+
+// Collect all the references into a histogram (without reset)
+void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
+                            VP8LHistogram* const histo);
+
+// Free the memory allocated for the histogram.
+void VP8LFreeHistogram(VP8LHistogram* const histo);
+
+// Free the memory allocated for the histogram set.
+void VP8LFreeHistogramSet(VP8LHistogramSet* const histo);
+
+// Allocate an array of pointer to histograms, allocated and initialized
+// using 'cache_bits'. Return NULL in case of memory error.
+VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits);
+
+// Allocate and initialize histogram object with specified 'cache_bits'.
+// Returns NULL in case of memory error.
+// Special case of VP8LAllocateHistogramSet, with size equals 1.
+VP8LHistogram* VP8LAllocateHistogram(int cache_bits);
+
+// Accumulate a token 'v' into a histogram.
+void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
+                                     const PixOrCopy* const v);
+
+// Estimate how many bits the combined entropy of literals and distance
+// approximately maps to.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+
+// This function estimates the cost in bits excluding the bits needed to
+// represent the entropy code itself.
+double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
+
+static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
+  return NUM_LITERAL_CODES + NUM_LENGTH_CODES +
+      ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
+}
+
+// Builds the histogram image.
+int VP8LGetHistoImageSymbols(int xsize, int ysize,
+                             const VP8LBackwardRefs* const refs,
+                             int quality, int histogram_bits, int cache_bits,
+                             VP8LHistogramSet* const image_in,
+                             uint16_t* const histogram_symbols);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBP_ENC_HISTOGRAM_H_
diff --git a/src/main/jni/libwebp/enc/iterator.c b/src/main/jni/libwebp/enc/iterator.c
new file mode 100644
index 000000000..e42ad001a
--- /dev/null
+++ b/src/main/jni/libwebp/enc/iterator.c
@@ -0,0 +1,456 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// VP8Iterator: block iterator
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <string.h>
+
+#include "./vp8enci.h"
+
+//------------------------------------------------------------------------------
+// VP8Iterator
+//------------------------------------------------------------------------------
+
+static void InitLeft(VP8EncIterator* const it) {
+  it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] =
+      (it->y_ > 0) ? 129 : 127;
+  memset(it->y_left_, 129, 16);
+  memset(it->u_left_, 129, 8);
+  memset(it->v_left_, 129, 8);
+  it->left_nz_[8] = 0;
+}
+
+static void InitTop(VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  const size_t top_size = enc->mb_w_ * 16;
+  memset(enc->y_top_, 127, 2 * top_size);
+  memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
+}
+
+void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
+  VP8Encoder* const enc = it->enc_;
+  it->x_ = 0;
+  it->y_ = y;
+  it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)];
+  it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
+  it->nz_ = enc->nz_;
+  it->mb_ = enc->mb_info_ + y * enc->mb_w_;
+  it->y_top_ = enc->y_top_;
+  it->uv_top_ = enc->uv_top_;
+  InitLeft(it);
+}
+
+void VP8IteratorReset(VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  VP8IteratorSetRow(it, 0);
+  VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_);  // default
+  InitTop(it);
+  InitLeft(it);
+  memset(it->bit_count_, 0, sizeof(it->bit_count_));
+  it->do_trellis_ = 0;
+}
+
+void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) {
+  it->count_down_ = it->count_down0_ = count_down;
+}
+
+int VP8IteratorIsDone(const VP8EncIterator* const it) {
+  return (it->count_down_ <= 0);
+}
+
+void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
+  it->enc_ = enc;
+  it->y_stride_  = enc->pic_->y_stride;
+  it->uv_stride_ = enc->pic_->uv_stride;
+  it->yuv_in_   = (uint8_t*)DO_ALIGN(it->yuv_mem_);
+  it->yuv_out_  = it->yuv_in_ + YUV_SIZE;
+  it->yuv_out2_ = it->yuv_out_ + YUV_SIZE;
+  it->yuv_p_    = it->yuv_out2_ + YUV_SIZE;
+  it->lf_stats_ = enc->lf_stats_;
+  it->percent0_ = enc->percent_;
+  it->y_left_ = (uint8_t*)DO_ALIGN(it->yuv_left_mem_ + 1);
+  it->u_left_ = it->y_left_ + 16 + 16;
+  it->v_left_ = it->u_left_ + 16;
+  VP8IteratorReset(it);
+}
+
+int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
+  VP8Encoder* const enc = it->enc_;
+  if (delta && enc->pic_->progress_hook != NULL) {
+    const int done = it->count_down0_ - it->count_down_;
+    const int percent = (it->count_down0_ <= 0)
+                      ? it->percent0_
+                      : it->percent0_ + delta * done / it->count_down0_;
+    return WebPReportProgress(enc->pic_, percent, &enc->percent_);
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Import the source samples into the cache. Takes care of replicating
+// boundary pixels if necessary.
+
+static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; }
+
+static void ImportBlock(const uint8_t* src, int src_stride,
+                        uint8_t* dst, int w, int h, int size) {
+  int i;
+  for (i = 0; i < h; ++i) {
+    memcpy(dst, src, w);
+    if (w < size) {
+      memset(dst + w, dst[w - 1], size - w);
+    }
+    dst += BPS;
+    src += src_stride;
+  }
+  for (i = h; i < size; ++i) {
+    memcpy(dst, dst - BPS, size);
+    dst += BPS;
+  }
+}
+
+static void ImportLine(const uint8_t* src, int src_stride,
+                       uint8_t* dst, int len, int total_len) {
+  int i;
+  for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src;
+  for (; i < total_len; ++i) dst[i] = dst[len - 1];
+}
+
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32) {
+  const VP8Encoder* const enc = it->enc_;
+  const int x = it->x_, y = it->y_;
+  const WebPPicture* const pic = enc->pic_;
+  const uint8_t* const ysrc = pic->y + (y * pic->y_stride  + x) * 16;
+  const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
+  const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
+  const int w = MinSize(pic->width - x * 16, 16);
+  const int h = MinSize(pic->height - y * 16, 16);
+  const int uv_w = (w + 1) >> 1;
+  const int uv_h = (h + 1) >> 1;
+
+  ImportBlock(ysrc, pic->y_stride,  it->yuv_in_ + Y_OFF, w, h, 16);
+  ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF, uv_w, uv_h, 8);
+  ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF, uv_w, uv_h, 8);
+
+  if (tmp_32 == NULL) return;
+
+  // Import source (uncompressed) samples into boundary.
+  if (x == 0) {
+    InitLeft(it);
+  } else {
+    if (y == 0) {
+      it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127;
+    } else {
+      it->y_left_[-1] = ysrc[- 1 - pic->y_stride];
+      it->u_left_[-1] = usrc[- 1 - pic->uv_stride];
+      it->v_left_[-1] = vsrc[- 1 - pic->uv_stride];
+    }
+    ImportLine(ysrc - 1, pic->y_stride,  it->y_left_, h,   16);
+    ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8);
+    ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8);
+  }
+
+  it->y_top_  = tmp_32 + 0;
+  it->uv_top_ = tmp_32 + 16;
+  if (y == 0) {
+    memset(tmp_32, 127, 32 * sizeof(*tmp_32));
+  } else {
+    ImportLine(ysrc - pic->y_stride,  1, tmp_32,          w,   16);
+    ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16,     uv_w, 8);
+    ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Copy back the compressed samples into user space if requested.
+
+static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride,
+                        int w, int h) {
+  while (h-- > 0) {
+    memcpy(dst, src, w);
+    dst += dst_stride;
+    src += BPS;
+  }
+}
+
+void VP8IteratorExport(const VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  if (enc->config_->show_compressed) {
+    const int x = it->x_, y = it->y_;
+    const uint8_t* const ysrc = it->yuv_out_ + Y_OFF;
+    const uint8_t* const usrc = it->yuv_out_ + U_OFF;
+    const uint8_t* const vsrc = it->yuv_out_ + V_OFF;
+    const WebPPicture* const pic = enc->pic_;
+    uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
+    uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
+    uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;
+    int w = (pic->width - x * 16);
+    int h = (pic->height - y * 16);
+
+    if (w > 16) w = 16;
+    if (h > 16) h = 16;
+
+    // Luma plane
+    ExportBlock(ysrc, ydst, pic->y_stride, w, h);
+
+    {   // U/V planes
+      const int uv_w = (w + 1) >> 1;
+      const int uv_h = (h + 1) >> 1;
+      ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h);
+      ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Non-zero contexts setup/teardown
+
+// Nz bits:
+//  0  1  2  3  Y
+//  4  5  6  7
+//  8  9 10 11
+// 12 13 14 15
+// 16 17        U
+// 18 19
+// 20 21        V
+// 22 23
+// 24           DC-intra16
+
+// Convert packed context to byte array
+#define BIT(nz, n) (!!((nz) & (1 << (n))))
+
+void VP8IteratorNzToBytes(VP8EncIterator* const it) {
+  const int tnz = it->nz_[0], lnz = it->nz_[-1];
+  int* const top_nz = it->top_nz_;
+  int* const left_nz = it->left_nz_;
+
+  // Top-Y
+  top_nz[0] = BIT(tnz, 12);
+  top_nz[1] = BIT(tnz, 13);
+  top_nz[2] = BIT(tnz, 14);
+  top_nz[3] = BIT(tnz, 15);
+  // Top-U
+  top_nz[4] = BIT(tnz, 18);
+  top_nz[5] = BIT(tnz, 19);
+  // Top-V
+  top_nz[6] = BIT(tnz, 22);
+  top_nz[7] = BIT(tnz, 23);
+  // DC
+  top_nz[8] = BIT(tnz, 24);
+
+  // left-Y
+  left_nz[0] = BIT(lnz,  3);
+  left_nz[1] = BIT(lnz,  7);
+  left_nz[2] = BIT(lnz, 11);
+  left_nz[3] = BIT(lnz, 15);
+  // left-U
+  left_nz[4] = BIT(lnz, 17);
+  left_nz[5] = BIT(lnz, 19);
+  // left-V
+  left_nz[6] = BIT(lnz, 21);
+  left_nz[7] = BIT(lnz, 23);
+  // left-DC is special, iterated separately
+}
+
+void VP8IteratorBytesToNz(VP8EncIterator* const it) {
+  uint32_t nz = 0;
+  const int* const top_nz = it->top_nz_;
+  const int* const left_nz = it->left_nz_;
+  // top
+  nz |= (top_nz[0] << 12) | (top_nz[1] << 13);
+  nz |= (top_nz[2] << 14) | (top_nz[3] << 15);
+  nz |= (top_nz[4] << 18) | (top_nz[5] << 19);
+  nz |= (top_nz[6] << 22) | (top_nz[7] << 23);
+  nz |= (top_nz[8] << 24);  // we propagate the _top_ bit, esp. for intra4
+  // left
+  nz |= (left_nz[0] << 3) | (left_nz[1] << 7);
+  nz |= (left_nz[2] << 11);
+  nz |= (left_nz[4] << 17) | (left_nz[6] << 21);
+
+  *it->nz_ = nz;
+}
+
+#undef BIT
+
+//------------------------------------------------------------------------------
+// Advance to the next position, doing the bookkeeping.
+
+void VP8IteratorSaveBoundary(VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  const int x = it->x_, y = it->y_;
+  const uint8_t* const ysrc = it->yuv_out_ + Y_OFF;
+  const uint8_t* const uvsrc = it->yuv_out_ + U_OFF;
+  if (x < enc->mb_w_ - 1) {   // left
+    int i;
+    for (i = 0; i < 16; ++i) {
+      it->y_left_[i] = ysrc[15 + i * BPS];
+    }
+    for (i = 0; i < 8; ++i) {
+      it->u_left_[i] = uvsrc[7 + i * BPS];
+      it->v_left_[i] = uvsrc[15 + i * BPS];
+    }
+    // top-left (before 'top'!)
+    it->y_left_[-1] = it->y_top_[15];
+    it->u_left_[-1] = it->uv_top_[0 + 7];
+    it->v_left_[-1] = it->uv_top_[8 + 7];
+  }
+  if (y < enc->mb_h_ - 1) {  // top
+    memcpy(it->y_top_, ysrc + 15 * BPS, 16);
+    memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8);
+  }
+}
+
+int VP8IteratorNext(VP8EncIterator* const it) {
+  it->preds_ += 4;
+  it->mb_ += 1;
+  it->nz_ += 1;
+  it->y_top_ += 16;
+  it->uv_top_ += 16;
+  it->x_ += 1;
+  if (it->x_ == it->enc_->mb_w_) {
+    VP8IteratorSetRow(it, ++it->y_);
+  }
+  return (0 < --it->count_down_);
+}
+
+//------------------------------------------------------------------------------
+// Helper function to set mode properties
+
+void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
+  uint8_t* preds = it->preds_;
+  int y;
+  for (y = 0; y < 4; ++y) {
+    memset(preds, mode, 4);
+    preds += it->enc_->preds_w_;
+  }
+  it->mb_->type_ = 1;
+}
+
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {
+  uint8_t* preds = it->preds_;
+  int y;
+  for (y = 4; y > 0; --y) {
+    memcpy(preds, modes, 4 * sizeof(*modes));
+    preds += it->enc_->preds_w_;
+    modes += 4;
+  }
+  it->mb_->type_ = 0;
+}
+
+void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {
+  it->mb_->uv_mode_ = mode;
+}
+
+void VP8SetSkip(const VP8EncIterator* const it, int skip) {
+  it->mb_->skip_ = skip;
+}
+
+void VP8SetSegment(const VP8EncIterator* const it, int segment) {
+  it->mb_->segment_ = segment;
+}
+
+//------------------------------------------------------------------------------
+// Intra4x4 sub-blocks iteration
+//
+//  We store and update the boundary samples into an array of 37 pixels. They
+//  are updated as we iterate and reconstructs each intra4x4 blocks in turn.
+//  The position of the samples has the following snake pattern:
+//
+// 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36  <- Top-right
+// --+-----------+-----------+-----------+-----------+
+// 15|         19|         23|         27|         31|
+// 14|         18|         22|         26|         30|
+// 13|         17|         21|         25|         29|
+// 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28|
+// --+-----------+-----------+-----------+-----------+
+// 11|         15|         19|         23|         27|
+// 10|         14|         18|         22|         26|
+//  9|         13|         17|         21|         25|
+//  8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24|
+// --+-----------+-----------+-----------+-----------+
+//  7|         11|         15|         19|         23|
+//  6|         10|         14|         18|         22|
+//  5|          9|         13|         17|         21|
+//  4| 5  6  7  8| 9 10 11 12|13 14 15 16|17 18 19 20|
+// --+-----------+-----------+-----------+-----------+
+//  3|          7|         11|         15|         19|
+//  2|          6|         10|         14|         18|
+//  1|          5|          9|         13|         17|
+//  0| 1  2  3  4| 5  6  7  8| 9 10 11 12|13 14 15 16|
+// --+-----------+-----------+-----------+-----------+
+
+// Array to record the position of the top sample to pass to the prediction
+// functions in dsp.c.
+static const uint8_t VP8TopLeftI4[16] = {
+  17, 21, 25, 29,
+  13, 17, 21, 25,
+  9,  13, 17, 21,
+  5,   9, 13, 17
+};
+
+void VP8IteratorStartI4(VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  int i;
+
+  it->i4_ = 0;    // first 4x4 sub-block
+  it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];
+
+  // Import the boundary samples
+  for (i = 0; i < 17; ++i) {    // left
+    it->i4_boundary_[i] = it->y_left_[15 - i];
+  }
+  for (i = 0; i < 16; ++i) {    // top
+    it->i4_boundary_[17 + i] = it->y_top_[i];
+  }
+  // top-right samples have a special case on the far right of the picture
+  if (it->x_ < enc->mb_w_ - 1) {
+    for (i = 16; i < 16 + 4; ++i) {
+      it->i4_boundary_[17 + i] = it->y_top_[i];
+    }
+  } else {    // else, replicate the last valid pixel four times
+    for (i = 16; i < 16 + 4; ++i) {
+      it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
+    }
+  }
+  VP8IteratorNzToBytes(it);  // import the non-zero context
+}
+
+int VP8IteratorRotateI4(VP8EncIterator* const it,
+                        const uint8_t* const yuv_out) {
+  const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];
+  uint8_t* const top = it->i4_top_;
+  int i;
+
+  // Update the cache with 7 fresh samples
+  for (i = 0; i <= 3; ++i) {
+    top[-4 + i] = blk[i + 3 * BPS];   // store future top samples
+  }
+  if ((it->i4_ & 3) != 3) {  // if not on the right sub-blocks #3, #7, #11, #15
+    for (i = 0; i <= 2; ++i) {        // store future left samples
+      top[i] = blk[3 + (2 - i) * BPS];
+    }
+  } else {  // else replicate top-right samples, as says the specs.
+    for (i = 0; i <= 3; ++i) {
+      top[i] = top[i + 4];
+    }
+  }
+  // move pointers to next sub-block
+  ++it->i4_;
+  if (it->i4_ == 16) {    // we're done
+    return 0;
+  }
+
+  it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/enc/picture.c b/src/main/jni/libwebp/enc/picture.c
new file mode 100644
index 000000000..9a66fbe74
--- /dev/null
+++ b/src/main/jni/libwebp/enc/picture.c
@@ -0,0 +1,289 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture class basis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// WebPPicture
+//------------------------------------------------------------------------------
+
+static int DummyWriter(const uint8_t* data, size_t data_size,
+                       const WebPPicture* const picture) {
+  // The following are to prevent 'unused variable' error message.
+  (void)data;
+  (void)data_size;
+  (void)picture;
+  return 1;
+}
+
+int WebPPictureInitInternal(WebPPicture* picture, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
+    return 0;   // caller/system version mismatch!
+  }
+  if (picture != NULL) {
+    memset(picture, 0, sizeof(*picture));
+    picture->writer = DummyWriter;
+    WebPEncodingSetError(picture, VP8_ENC_OK);
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static void WebPPictureResetBufferARGB(WebPPicture* const picture) {
+  picture->memory_argb_ = NULL;
+  picture->argb = NULL;
+  picture->argb_stride = 0;
+}
+
+static void WebPPictureResetBufferYUVA(WebPPicture* const picture) {
+  picture->memory_ = NULL;
+  picture->y = picture->u = picture->v = picture->a = NULL;
+  picture->y_stride = picture->uv_stride = 0;
+  picture->a_stride = 0;
+}
+
+void WebPPictureResetBuffers(WebPPicture* const picture) {
+  WebPPictureResetBufferARGB(picture);
+  WebPPictureResetBufferYUVA(picture);
+}
+
+int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
+  void* memory;
+  const uint64_t argb_size = (uint64_t)width * height;
+
+  assert(picture != NULL);
+
+  WebPSafeFree(picture->memory_argb_);
+  WebPPictureResetBufferARGB(picture);
+
+  if (width <= 0 || height <= 0) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
+  }
+  // allocate a new buffer.
+  memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb));
+  if (memory == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
+  // TODO(skal): align plane to cache line?
+  picture->memory_argb_ = memory;
+  picture->argb = (uint32_t*)memory;
+  picture->argb_stride = width;
+  return 1;
+}
+
+int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
+  const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
+  const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
+  const int y_stride = width;
+  const int uv_width = (width + 1) >> 1;
+  const int uv_height = (height + 1) >> 1;
+  const int uv_stride = uv_width;
+  int a_width, a_stride;
+  uint64_t y_size, uv_size, a_size, total_size;
+  uint8_t* mem;
+
+  assert(picture != NULL);
+
+  WebPSafeFree(picture->memory_);
+  WebPPictureResetBufferYUVA(picture);
+
+  if (uv_csp != WEBP_YUV420) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  }
+
+  // alpha
+  a_width = has_alpha ? width : 0;
+  a_stride = a_width;
+  y_size = (uint64_t)y_stride * height;
+  uv_size = (uint64_t)uv_stride * uv_height;
+  a_size =  (uint64_t)a_stride * height;
+
+  total_size = y_size + a_size + 2 * uv_size;
+
+  // Security and validation checks
+  if (width <= 0 || height <= 0 ||         // luma/alpha param error
+      uv_width < 0 || uv_height < 0) {     // u/v param error
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
+  }
+  // allocate a new buffer.
+  mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+  if (mem == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
+
+  // From now on, we're in the clear, we can no longer fail...
+  picture->memory_ = (void*)mem;
+  picture->y_stride  = y_stride;
+  picture->uv_stride = uv_stride;
+  picture->a_stride  = a_stride;
+
+  // TODO(skal): we could align the y/u/v planes and adjust stride.
+  picture->y = mem;
+  mem += y_size;
+
+  picture->u = mem;
+  mem += uv_size;
+  picture->v = mem;
+  mem += uv_size;
+
+  if (a_size > 0) {
+    picture->a = mem;
+    mem += a_size;
+  }
+  (void)mem;  // makes the static analyzer happy
+  return 1;
+}
+
+int WebPPictureAlloc(WebPPicture* picture) {
+  if (picture != NULL) {
+    const int width = picture->width;
+    const int height = picture->height;
+
+    WebPPictureFree(picture);   // erase previous buffer
+
+    if (!picture->use_argb) {
+      return WebPPictureAllocYUVA(picture, width, height);
+    } else {
+      return WebPPictureAllocARGB(picture, width, height);
+    }
+  }
+  return 1;
+}
+
+void WebPPictureFree(WebPPicture* picture) {
+  if (picture != NULL) {
+    WebPSafeFree(picture->memory_);
+    WebPSafeFree(picture->memory_argb_);
+    WebPPictureResetBuffers(picture);
+  }
+}
+
+//------------------------------------------------------------------------------
+// WebPMemoryWriter: Write-to-memory
+
+void WebPMemoryWriterInit(WebPMemoryWriter* writer) {
+  writer->mem = NULL;
+  writer->size = 0;
+  writer->max_size = 0;
+}
+
+int WebPMemoryWrite(const uint8_t* data, size_t data_size,
+                    const WebPPicture* picture) {
+  WebPMemoryWriter* const w = (WebPMemoryWriter*)picture->custom_ptr;
+  uint64_t next_size;
+  if (w == NULL) {
+    return 1;
+  }
+  next_size = (uint64_t)w->size + data_size;
+  if (next_size > w->max_size) {
+    uint8_t* new_mem;
+    uint64_t next_max_size = 2ULL * w->max_size;
+    if (next_max_size < next_size) next_max_size = next_size;
+    if (next_max_size < 8192ULL) next_max_size = 8192ULL;
+    new_mem = (uint8_t*)WebPSafeMalloc(next_max_size, 1);
+    if (new_mem == NULL) {
+      return 0;
+    }
+    if (w->size > 0) {
+      memcpy(new_mem, w->mem, w->size);
+    }
+    WebPSafeFree(w->mem);
+    w->mem = new_mem;
+    // down-cast is ok, thanks to WebPSafeMalloc
+    w->max_size = (size_t)next_max_size;
+  }
+  if (data_size > 0) {
+    memcpy(w->mem + w->size, data, data_size);
+    w->size += data_size;
+  }
+  return 1;
+}
+
+void WebPMemoryWriterClear(WebPMemoryWriter* writer) {
+  if (writer != NULL) {
+    WebPSafeFree(writer->mem);
+    writer->mem = NULL;
+    writer->size = 0;
+    writer->max_size = 0;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Simplest high-level calls:
+
+typedef int (*Importer)(WebPPicture* const, const uint8_t* const, int);
+
+static size_t Encode(const uint8_t* rgba, int width, int height, int stride,
+                     Importer import, float quality_factor, int lossless,
+                     uint8_t** output) {
+  WebPPicture pic;
+  WebPConfig config;
+  WebPMemoryWriter wrt;
+  int ok;
+
+  if (!WebPConfigPreset(&config, WEBP_PRESET_DEFAULT, quality_factor) ||
+      !WebPPictureInit(&pic)) {
+    return 0;  // shouldn't happen, except if system installation is broken
+  }
+
+  config.lossless = !!lossless;
+  pic.use_argb = !!lossless;
+  pic.width = width;
+  pic.height = height;
+  pic.writer = WebPMemoryWrite;
+  pic.custom_ptr = &wrt;
+  WebPMemoryWriterInit(&wrt);
+
+  ok = import(&pic, rgba, stride) && WebPEncode(&config, &pic);
+  WebPPictureFree(&pic);
+  if (!ok) {
+    WebPMemoryWriterClear(&wrt);
+    *output = NULL;
+    return 0;
+  }
+  *output = wrt.mem;
+  return wrt.size;
+}
+
+#define ENCODE_FUNC(NAME, IMPORTER)                                     \
+size_t NAME(const uint8_t* in, int w, int h, int bps, float q,          \
+            uint8_t** out) {                                            \
+  return Encode(in, w, h, bps, IMPORTER, q, 0, out);                    \
+}
+
+ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB)
+ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR)
+ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA)
+ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA)
+
+#undef ENCODE_FUNC
+
+#define LOSSLESS_DEFAULT_QUALITY 70.
+#define LOSSLESS_ENCODE_FUNC(NAME, IMPORTER)                                 \
+size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) {       \
+  return Encode(in, w, h, bps, IMPORTER, LOSSLESS_DEFAULT_QUALITY, 1, out);  \
+}
+
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA)
+
+#undef LOSSLESS_ENCODE_FUNC
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/picture_csp.c b/src/main/jni/libwebp/enc/picture_csp.c
new file mode 100644
index 000000000..7875f625b
--- /dev/null
+++ b/src/main/jni/libwebp/enc/picture_csp.c
@@ -0,0 +1,1114 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture utils for colorspace conversion
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "../utils/random.h"
+#include "../utils/utils.h"
+#include "../dsp/yuv.h"
+
+// Uncomment to disable gamma-compression during RGB->U/V averaging
+#define USE_GAMMA_COMPRESSION
+
+// If defined, use table to compute x / alpha.
+#define USE_INVERSE_ALPHA_TABLE
+
+static const union {
+  uint32_t argb;
+  uint8_t  bytes[4];
+} test_endian = { 0xff000000u };
+#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+//------------------------------------------------------------------------------
+// Detection of non-trivial transparency
+
+// Returns true if alpha[] has non-0xff values.
+static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
+                          int x_step, int y_step) {
+  if (alpha == NULL) return 0;
+  while (height-- > 0) {
+    int x;
+    for (x = 0; x < width * x_step; x += x_step) {
+      if (alpha[x] != 0xff) return 1;  // TODO(skal): check 4/8 bytes at a time.
+    }
+    alpha += y_step;
+  }
+  return 0;
+}
+
+// Checking for the presence of non-opaque alpha.
+int WebPPictureHasTransparency(const WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (!picture->use_argb) {
+    return CheckNonOpaque(picture->a, picture->width, picture->height,
+                          1, picture->a_stride);
+  } else {
+    int x, y;
+    const uint32_t* argb = picture->argb;
+    if (argb == NULL) return 0;
+    for (y = 0; y < picture->height; ++y) {
+      for (x = 0; x < picture->width; ++x) {
+        if (argb[x] < 0xff000000u) return 1;   // test any alpha values != 0xff
+      }
+      argb += picture->argb_stride;
+    }
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// Code for gamma correction
+
+#if defined(USE_GAMMA_COMPRESSION)
+
+// gamma-compensates loss of resolution during chroma subsampling
+#define kGamma 0.80      // for now we use a different gamma value than kGammaF
+#define kGammaFix 12     // fixed-point precision for linear values
+#define kGammaScale ((1 << kGammaFix) - 1)
+#define kGammaTabFix 7   // fixed-point fractional bits precision
+#define kGammaTabScale (1 << kGammaTabFix)
+#define kGammaTabRounder (kGammaTabScale >> 1)
+#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
+
+static int kLinearToGammaTab[kGammaTabSize + 1];
+static uint16_t kGammaToLinearTab[256];
+static int kGammaTablesOk = 0;
+
+static void InitGammaTables(void) {
+  if (!kGammaTablesOk) {
+    int v;
+    const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
+    const double norm = 1. / 255.;
+    for (v = 0; v <= 255; ++v) {
+      kGammaToLinearTab[v] =
+          (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
+    }
+    for (v = 0; v <= kGammaTabSize; ++v) {
+      kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
+    }
+    kGammaTablesOk = 1;
+  }
+}
+
+static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
+  return kGammaToLinearTab[v];
+}
+
+static WEBP_INLINE int Interpolate(int v) {
+  const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
+  const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
+  const int v0 = kLinearToGammaTab[tab_pos];
+  const int v1 = kLinearToGammaTab[tab_pos + 1];
+  const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
+  assert(tab_pos + 1 < kGammaTabSize + 1);
+  return y;
+}
+
+// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
+// U/V value, suitable for RGBToU/V calls.
+static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
+  const int y = Interpolate(base_value << shift);   // final uplifted value
+  return (y + kGammaTabRounder) >> kGammaTabFix;    // descale
+}
+
+#else
+
+static void InitGammaTables(void) {}
+static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
+static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
+  return (int)(base_value << shift);
+}
+
+#endif    // USE_GAMMA_COMPRESSION
+
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+
+static int RGBToY(int r, int g, int b, VP8Random* const rg) {
+  return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
+                      : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
+}
+
+static int RGBToU(int r, int g, int b, VP8Random* const rg) {
+  return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
+                      : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
+}
+
+static int RGBToV(int r, int g, int b, VP8Random* const rg) {
+  return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
+                      : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
+}
+
+//------------------------------------------------------------------------------
+// Smart RGB->YUV conversion
+
+static const int kNumIterations = 6;
+static const int kMinDimensionIterativeConversion = 4;
+
+// We use a-priori a different precision for storing RGB and Y/W components
+// We could use YFIX=0 and only uint8_t for fixed_y_t, but it produces some
+// banding sometimes. Better use extra precision.
+// TODO(skal): cleanup once TFIX/YFIX values are fixed.
+
+typedef int16_t fixed_t;      // signed type with extra TFIX precision for UV
+typedef uint16_t fixed_y_t;   // unsigned type with extra YFIX precision for W
+#define TFIX 6   // fixed-point precision of RGB
+#define YFIX 2   // fixed point precision for Y/W
+
+#define THALF ((1 << TFIX) >> 1)
+#define MAX_Y_T ((256 << YFIX) - 1)
+#define TROUNDER (1 << (YUV_FIX + TFIX - 1))
+
+#if defined(USE_GAMMA_COMPRESSION)
+
+// float variant of gamma-correction
+// We use tables of different size and precision, along with a 'real-world'
+// Gamma value close to ~2.
+#define kGammaF 2.2
+static float kGammaToLinearTabF[MAX_Y_T + 1];   // size scales with Y_FIX
+static float kLinearToGammaTabF[kGammaTabSize + 2];
+static int kGammaTablesFOk = 0;
+
+static void InitGammaTablesF(void) {
+  if (!kGammaTablesFOk) {
+    int v;
+    const double norm = 1. / MAX_Y_T;
+    const double scale = 1. / kGammaTabSize;
+    for (v = 0; v <= MAX_Y_T; ++v) {
+      kGammaToLinearTabF[v] = (float)pow(norm * v, kGammaF);
+    }
+    for (v = 0; v <= kGammaTabSize; ++v) {
+      kLinearToGammaTabF[v] = (float)(MAX_Y_T * pow(scale * v, 1. / kGammaF));
+    }
+    // to prevent small rounding errors to cause read-overflow:
+    kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize];
+    kGammaTablesFOk = 1;
+  }
+}
+
+static WEBP_INLINE float GammaToLinearF(int v) {
+  return kGammaToLinearTabF[v];
+}
+
+static WEBP_INLINE float LinearToGammaF(float value) {
+  const float v = value * kGammaTabSize;
+  const int tab_pos = (int)v;
+  const float x = v - (float)tab_pos;      // fractional part
+  const float v0 = kLinearToGammaTabF[tab_pos + 0];
+  const float v1 = kLinearToGammaTabF[tab_pos + 1];
+  const float y = v1 * x + v0 * (1.f - x);  // interpolate
+  return y;
+}
+
+#else
+
+static void InitGammaTablesF(void) {}
+static WEBP_INLINE float GammaToLinearF(int v) {
+  const float norm = 1.f / MAX_Y_T;
+  return norm * v;
+}
+static WEBP_INLINE float LinearToGammaF(float value) {
+  return MAX_Y_T * value;
+}
+
+#endif    // USE_GAMMA_COMPRESSION
+
+//------------------------------------------------------------------------------
+
+// precision: YFIX -> TFIX
+static WEBP_INLINE int FixedYToW(int v) {
+#if TFIX == YFIX
+  return v;
+#elif TFIX >= YFIX
+  return v << (TFIX - YFIX);
+#else
+  return v >> (YFIX - TFIX);
+#endif
+}
+
+static WEBP_INLINE int FixedWToY(int v) {
+#if TFIX == YFIX
+  return v;
+#elif YFIX >= TFIX
+  return v << (YFIX - TFIX);
+#else
+  return v >> (TFIX - YFIX);
+#endif
+}
+
+static uint8_t clip_8b(fixed_t v) {
+  return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
+}
+
+static fixed_y_t clip_y(int y) {
+  return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
+}
+
+// precision: TFIX -> YFIX
+static fixed_y_t clip_fixed_t(fixed_t v) {
+  const int y = FixedWToY(v);
+  const fixed_y_t w = clip_y(y);
+  return w;
+}
+
+//------------------------------------------------------------------------------
+
+static int RGBToGray(int r, int g, int b) {
+  const int luma = 19595 * r + 38470 * g + 7471 * b + YUV_HALF;
+  return (luma >> YUV_FIX);
+}
+
+static float RGBToGrayF(float r, float g, float b) {
+  return 0.299f * r + 0.587f * g + 0.114f * b;
+}
+
+static float ScaleDown(int a, int b, int c, int d) {
+  const float A = GammaToLinearF(a);
+  const float B = GammaToLinearF(b);
+  const float C = GammaToLinearF(c);
+  const float D = GammaToLinearF(d);
+  return LinearToGammaF(0.25f * (A + B + C + D));
+}
+
+static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) {
+  while (len-- > 0) {
+    const float R = GammaToLinearF(src[0]);
+    const float G = GammaToLinearF(src[1]);
+    const float B = GammaToLinearF(src[2]);
+    const float Y = RGBToGrayF(R, G, B);
+    *dst++ = (fixed_y_t)(LinearToGammaF(Y) + .5);
+    src += 3;
+  }
+}
+
+static WEBP_INLINE void UpdateChroma(const fixed_y_t* src1,
+                                     const fixed_y_t* src2,
+                                     fixed_t* dst, fixed_y_t* tmp, int len) {
+  while (len--> 0) {
+    const float r = ScaleDown(src1[0], src1[3], src2[0], src2[3]);
+    const float g = ScaleDown(src1[1], src1[4], src2[1], src2[4]);
+    const float b = ScaleDown(src1[2], src1[5], src2[2], src2[5]);
+    const float W = RGBToGrayF(r, g, b);
+    dst[0] = (fixed_t)FixedYToW((int)(r - W));
+    dst[1] = (fixed_t)FixedYToW((int)(g - W));
+    dst[2] = (fixed_t)FixedYToW((int)(b - W));
+    dst += 3;
+    src1 += 6;
+    src2 += 6;
+    if (tmp != NULL) {
+      tmp[0] = tmp[1] = clip_y((int)(W + .5));
+      tmp += 2;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int Filter(const fixed_t* const A, const fixed_t* const B,
+                              int rightwise) {
+  int v;
+  if (!rightwise) {
+    v = (A[0] * 9 + A[-3] * 3 + B[0] * 3 + B[-3]);
+  } else {
+    v = (A[0] * 9 + A[+3] * 3 + B[0] * 3 + B[+3]);
+  }
+  return (v + 8) >> 4;
+}
+
+static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; }
+
+//------------------------------------------------------------------------------
+
+// 8bit -> YFIX
+static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {
+  return ((fixed_y_t)a << YFIX) | (1 << (YFIX - 1));
+}
+
+static void ImportOneRow(const uint8_t* const r_ptr,
+                         const uint8_t* const g_ptr,
+                         const uint8_t* const b_ptr,
+                         int step,
+                         int pic_width,
+                         fixed_y_t* const dst) {
+  int i;
+  for (i = 0; i < pic_width; ++i) {
+    const int off = i * step;
+    dst[3 * i + 0] = UpLift(r_ptr[off]);
+    dst[3 * i + 1] = UpLift(g_ptr[off]);
+    dst[3 * i + 2] = UpLift(b_ptr[off]);
+  }
+  if (pic_width & 1) {  // replicate rightmost pixel
+    memcpy(dst + 3 * pic_width, dst + 3 * (pic_width - 1), 3 * sizeof(*dst));
+  }
+}
+
+static void InterpolateTwoRows(const fixed_y_t* const best_y,
+                               const fixed_t* const prev_uv,
+                               const fixed_t* const cur_uv,
+                               const fixed_t* const next_uv,
+                               int w,
+                               fixed_y_t* const out1,
+                               fixed_y_t* const out2) {
+  int i, k;
+  {  // special boundary case for i==0
+    const int W0 = FixedYToW(best_y[0]);
+    const int W1 = FixedYToW(best_y[w]);
+    for (k = 0; k <= 2; ++k) {
+      out1[k] = clip_fixed_t(Filter2(cur_uv[k], prev_uv[k]) + W0);
+      out2[k] = clip_fixed_t(Filter2(cur_uv[k], next_uv[k]) + W1);
+    }
+  }
+  for (i = 1; i < w - 1; ++i) {
+    const int W0 = FixedYToW(best_y[i + 0]);
+    const int W1 = FixedYToW(best_y[i + w]);
+    const int off = 3 * (i >> 1);
+    for (k = 0; k <= 2; ++k) {
+      const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1);
+      const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1);
+      out1[3 * i + k] = clip_fixed_t(tmp0 + W0);
+      out2[3 * i + k] = clip_fixed_t(tmp1 + W1);
+    }
+  }
+  {  // special boundary case for i == w - 1
+    const int W0 = FixedYToW(best_y[i + 0]);
+    const int W1 = FixedYToW(best_y[i + w]);
+    const int off = 3 * (i >> 1);
+    for (k = 0; k <= 2; ++k) {
+      out1[3 * i + k] =
+          clip_fixed_t(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0);
+      out2[3 * i + k] =
+          clip_fixed_t(Filter2(cur_uv[off + k], next_uv[off + k]) + W1);
+    }
+  }
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
+  const int luma = 16839 * r + 33059 * g + 6420 * b + TROUNDER;
+  return clip_8b(16 + (luma >> (YUV_FIX + TFIX)));
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
+  const int u =  -9719 * r - 19081 * g + 28800 * b + TROUNDER;
+  return clip_8b(128 + (u >> (YUV_FIX + TFIX)));
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
+  const int v = +28800 * r - 24116 * g -  4684 * b + TROUNDER;
+  return clip_8b(128 + (v >> (YUV_FIX + TFIX)));
+}
+
+static int ConvertWRGBToYUV(const fixed_y_t* const best_y,
+                            const fixed_t* const best_uv,
+                            WebPPicture* const picture) {
+  int i, j;
+  const int w = (picture->width + 1) & ~1;
+  const int h = (picture->height + 1) & ~1;
+  const int uv_w = w >> 1;
+  const int uv_h = h >> 1;
+  for (j = 0; j < picture->height; ++j) {
+    for (i = 0; i < picture->width; ++i) {
+      const int off = 3 * ((i >> 1) + (j >> 1) * uv_w);
+      const int off2 = i + j * picture->y_stride;
+      const int W = FixedYToW(best_y[i + j * w]);
+      const int r = best_uv[off + 0] + W;
+      const int g = best_uv[off + 1] + W;
+      const int b = best_uv[off + 2] + W;
+      picture->y[off2] = ConvertRGBToY(r, g, b);
+    }
+  }
+  for (j = 0; j < uv_h; ++j) {
+    uint8_t* const dst_u = picture->u + j * picture->uv_stride;
+    uint8_t* const dst_v = picture->v + j * picture->uv_stride;
+    for (i = 0; i < uv_w; ++i) {
+      const int off = 3 * (i + j * uv_w);
+      const int r = best_uv[off + 0];
+      const int g = best_uv[off + 1];
+      const int b = best_uv[off + 2];
+      dst_u[i] = ConvertRGBToU(r, g, b);
+      dst_v[i] = ConvertRGBToV(r, g, b);
+    }
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main function
+
+#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
+
+static int PreprocessARGB(const uint8_t* const r_ptr,
+                          const uint8_t* const g_ptr,
+                          const uint8_t* const b_ptr,
+                          int step, int rgb_stride,
+                          WebPPicture* const picture) {
+  // we expand the right/bottom border if needed
+  const int w = (picture->width + 1) & ~1;
+  const int h = (picture->height + 1) & ~1;
+  const int uv_w = w >> 1;
+  const int uv_h = h >> 1;
+  int i, j, iter;
+
+  // TODO(skal): allocate one big memory chunk. But for now, it's easier
+  // for valgrind debugging to have several chunks.
+  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
+  fixed_y_t* const best_y = SAFE_ALLOC(w, h, fixed_y_t);
+  fixed_y_t* const target_y = SAFE_ALLOC(w, h, fixed_y_t);
+  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
+  fixed_t* const best_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+  fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+  fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
+  int ok;
+
+  if (best_y == NULL || best_uv == NULL ||
+      target_y == NULL || target_uv == NULL ||
+      best_rgb_y == NULL || best_rgb_uv == NULL ||
+      tmp_buffer == NULL) {
+    ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    goto End;
+  }
+  assert(picture->width >= kMinDimensionIterativeConversion);
+  assert(picture->height >= kMinDimensionIterativeConversion);
+
+  // Import RGB samples to W/RGB representation.
+  for (j = 0; j < picture->height; j += 2) {
+    const int is_last_row = (j == picture->height - 1);
+    fixed_y_t* const src1 = tmp_buffer;
+    fixed_y_t* const src2 = tmp_buffer + 3 * w;
+    const int off1 = j * rgb_stride;
+    const int off2 = off1 + rgb_stride;
+    const int uv_off = (j >> 1) * 3 * uv_w;
+    fixed_y_t* const dst_y = best_y + j * w;
+
+    // prepare two rows of input
+    ImportOneRow(r_ptr + off1, g_ptr + off1, b_ptr + off1,
+                 step, picture->width, src1);
+    if (!is_last_row) {
+      ImportOneRow(r_ptr + off2, g_ptr + off2, b_ptr + off2,
+                   step, picture->width, src2);
+    } else {
+      memcpy(src2, src1, 3 * w * sizeof(*src2));
+    }
+    UpdateW(src1, target_y + (j + 0) * w, w);
+    UpdateW(src2, target_y + (j + 1) * w, w);
+    UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w);
+    memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv));
+    memcpy(dst_y + w, dst_y, w * sizeof(*dst_y));
+  }
+
+  // Iterate and resolve clipping conflicts.
+  for (iter = 0; iter < kNumIterations; ++iter) {
+    int k;
+    const fixed_t* cur_uv = best_uv;
+    const fixed_t* prev_uv = best_uv;
+    for (j = 0; j < h; j += 2) {
+      fixed_y_t* const src1 = tmp_buffer;
+      fixed_y_t* const src2 = tmp_buffer + 3 * w;
+
+      {
+        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
+        InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv,
+                           w, src1, src2);
+        prev_uv = cur_uv;
+        cur_uv = next_uv;
+      }
+
+      UpdateW(src1, best_rgb_y + 0 * w, w);
+      UpdateW(src2, best_rgb_y + 1 * w, w);
+      UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w);
+
+      // update two rows of Y and one row of RGB
+      for (i = 0; i < 2 * w; ++i) {
+        const int off = i + j * w;
+        const int diff_y = target_y[off] - best_rgb_y[i];
+        const int new_y = (int)best_y[off] + diff_y;
+        best_y[off] = clip_y(new_y);
+      }
+      for (i = 0; i < uv_w; ++i) {
+        const int off = 3 * (i + (j >> 1) * uv_w);
+        int W;
+        for (k = 0; k <= 2; ++k) {
+          const int diff_uv = (int)target_uv[off + k] - best_rgb_uv[3 * i + k];
+          best_uv[off + k] += diff_uv;
+        }
+        W = RGBToGray(best_uv[off + 0], best_uv[off + 1], best_uv[off + 2]);
+        for (k = 0; k <= 2; ++k) {
+          best_uv[off + k] -= W;
+        }
+      }
+    }
+    // TODO(skal): add early-termination criterion
+  }
+
+  // final reconstruction
+  ok = ConvertWRGBToYUV(best_y, best_uv, picture);
+
+ End:
+  WebPSafeFree(best_y);
+  WebPSafeFree(best_uv);
+  WebPSafeFree(target_y);
+  WebPSafeFree(target_uv);
+  WebPSafeFree(best_rgb_y);
+  WebPSafeFree(best_rgb_uv);
+  WebPSafeFree(tmp_buffer);
+  return ok;
+}
+#undef SAFE_ALLOC
+
+//------------------------------------------------------------------------------
+// "Fast" regular RGB->YUV
+
+#define SUM4(ptr, step) LinearToGamma(                     \
+    GammaToLinear((ptr)[0]) +                              \
+    GammaToLinear((ptr)[(step)]) +                         \
+    GammaToLinear((ptr)[rgb_stride]) +                     \
+    GammaToLinear((ptr)[rgb_stride + (step)]), 0)          \
+
+#define SUM2(ptr) \
+    LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
+
+#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
+#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
+
+#if defined(USE_INVERSE_ALPHA_TABLE)
+
+static const int kAlphaFix = 19;
+// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
+// formula is then equal to v / a in most (99.6%) cases. Note that this table
+// and constant are adjusted very tightly to fit 32b arithmetic.
+// In particular, they use the fact that the operands for 'v / a' are actually
+// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
+// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
+// overflow is: kGammaFix + kAlphaFix <= 31.
+static const uint32_t kInvAlpha[4 * 0xff + 1] = {
+  0,  /* alpha = 0 */
+  524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
+  58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
+  30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
+  20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
+  15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
+  12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
+  10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
+  9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
+  8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
+  7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
+  6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
+  5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
+  5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
+  4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
+  4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
+  4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
+  4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
+  3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
+  3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
+  3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
+  3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
+  3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
+  2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
+  2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
+  2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
+  2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
+  2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
+  2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
+  2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
+  2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
+  2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
+  2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
+  2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
+  1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
+  1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
+  1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
+  1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
+  1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
+  1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
+  1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
+  1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
+  1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
+  1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
+  1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
+  1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
+  1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
+  1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
+  1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
+  1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
+  1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
+  1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
+  1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
+  1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
+  1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
+  1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
+  1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
+  1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
+  1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
+  1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
+  1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
+  1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
+  1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
+  1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
+  1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
+  1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
+  1006, 1004, 1002, 1000, 998, 996, 994, 992,
+  991, 989, 987, 985, 983, 981, 979, 978,
+  976, 974, 972, 970, 969, 967, 965, 963,
+  961, 960, 958, 956, 954, 953, 951, 949,
+  948, 946, 944, 942, 941, 939, 937, 936,
+  934, 932, 931, 929, 927, 926, 924, 923,
+  921, 919, 918, 916, 914, 913, 911, 910,
+  908, 907, 905, 903, 902, 900, 899, 897,
+  896, 894, 893, 891, 890, 888, 887, 885,
+  884, 882, 881, 879, 878, 876, 875, 873,
+  872, 870, 869, 868, 866, 865, 863, 862,
+  860, 859, 858, 856, 855, 853, 852, 851,
+  849, 848, 846, 845, 844, 842, 841, 840,
+  838, 837, 836, 834, 833, 832, 830, 829,
+  828, 826, 825, 824, 823, 821, 820, 819,
+  817, 816, 815, 814, 812, 811, 810, 809,
+  807, 806, 805, 804, 802, 801, 800, 799,
+  798, 796, 795, 794, 793, 791, 790, 789,
+  788, 787, 786, 784, 783, 782, 781, 780,
+  779, 777, 776, 775, 774, 773, 772, 771,
+  769, 768, 767, 766, 765, 764, 763, 762,
+  760, 759, 758, 757, 756, 755, 754, 753,
+  752, 751, 750, 748, 747, 746, 745, 744,
+  743, 742, 741, 740, 739, 738, 737, 736,
+  735, 734, 733, 732, 731, 730, 729, 728,
+  727, 726, 725, 724, 723, 722, 721, 720,
+  719, 718, 717, 716, 715, 714, 713, 712,
+  711, 710, 709, 708, 707, 706, 705, 704,
+  703, 702, 701, 700, 699, 699, 698, 697,
+  696, 695, 694, 693, 692, 691, 690, 689,
+  688, 688, 687, 686, 685, 684, 683, 682,
+  681, 680, 680, 679, 678, 677, 676, 675,
+  674, 673, 673, 672, 671, 670, 669, 668,
+  667, 667, 666, 665, 664, 663, 662, 661,
+  661, 660, 659, 658, 657, 657, 656, 655,
+  654, 653, 652, 652, 651, 650, 649, 648,
+  648, 647, 646, 645, 644, 644, 643, 642,
+  641, 640, 640, 639, 638, 637, 637, 636,
+  635, 634, 633, 633, 632, 631, 630, 630,
+  629, 628, 627, 627, 626, 625, 624, 624,
+  623, 622, 621, 621, 620, 619, 618, 618,
+  617, 616, 616, 615, 614, 613, 613, 612,
+  611, 611, 610, 609, 608, 608, 607, 606,
+  606, 605, 604, 604, 603, 602, 601, 601,
+  600, 599, 599, 598, 597, 597, 596, 595,
+  595, 594, 593, 593, 592, 591, 591, 590,
+  589, 589, 588, 587, 587, 586, 585, 585,
+  584, 583, 583, 582, 581, 581, 580, 579,
+  579, 578, 578, 577, 576, 576, 575, 574,
+  574, 573, 572, 572, 571, 571, 570, 569,
+  569, 568, 568, 567, 566, 566, 565, 564,
+  564, 563, 563, 562, 561, 561, 560, 560,
+  559, 558, 558, 557, 557, 556, 555, 555,
+  554, 554, 553, 553, 552, 551, 551, 550,
+  550, 549, 548, 548, 547, 547, 546, 546,
+  545, 544, 544, 543, 543, 542, 542, 541,
+  541, 540, 539, 539, 538, 538, 537, 537,
+  536, 536, 535, 534, 534, 533, 533, 532,
+  532, 531, 531, 530, 530, 529, 529, 528,
+  527, 527, 526, 526, 525, 525, 524, 524,
+  523, 523, 522, 522, 521, 521, 520, 520,
+  519, 519, 518, 518, 517, 517, 516, 516,
+  515, 515, 514, 514
+};
+
+// Note that LinearToGamma() expects the values to be premultiplied by 4,
+// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
+#define DIVIDE_BY_ALPHA(sum, a)  (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
+
+#else
+
+#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
+
+#endif  // USE_INVERSE_ALPHA_TABLE
+
+static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
+                                             const uint8_t* a_ptr,
+                                             uint32_t total_a, int step,
+                                             int rgb_stride) {
+  const uint32_t sum =
+      a_ptr[0] * GammaToLinear(src[0]) +
+      a_ptr[step] * GammaToLinear(src[step]) +
+      a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
+      a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
+  assert(total_a > 0 && total_a <= 4 * 0xff);
+#if defined(USE_INVERSE_ALPHA_TABLE)
+  assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
+#endif
+  return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
+}
+
+static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
+                                      const uint8_t* const g_ptr,
+                                      const uint8_t* const b_ptr,
+                                      int step,
+                                      uint8_t* const dst_y,
+                                      int width,
+                                      VP8Random* const rg) {
+  int i, j;
+  for (i = 0, j = 0; i < width; ++i, j += step) {
+    dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
+  }
+}
+
+static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr,
+                                                 const uint8_t* const g_ptr,
+                                                 const uint8_t* const b_ptr,
+                                                 const uint8_t* const a_ptr,
+                                                 int rgb_stride,
+                                                 uint8_t* const dst_u,
+                                                 uint8_t* const dst_v,
+                                                 int width,
+                                                 VP8Random* const rg) {
+  int i, j;
+  // we loop over 2x2 blocks and produce one U/V value for each.
+  for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * sizeof(uint32_t)) {
+    const uint32_t a = SUM4ALPHA(a_ptr + j);
+    int r, g, b;
+    if (a == 4 * 0xff || a == 0) {
+      r = SUM4(r_ptr + j, 4);
+      g = SUM4(g_ptr + j, 4);
+      b = SUM4(b_ptr + j, 4);
+    } else {
+      r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
+      g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
+      b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
+    }
+    dst_u[i] = RGBToU(r, g, b, rg);
+    dst_v[i] = RGBToV(r, g, b, rg);
+  }
+  if (width & 1) {
+    const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
+    int r, g, b;
+    if (a == 4 * 0xff || a == 0) {
+      r = SUM2(r_ptr + j);
+      g = SUM2(g_ptr + j);
+      b = SUM2(b_ptr + j);
+    } else {
+      r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
+      g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
+      b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
+    }
+    dst_u[i] = RGBToU(r, g, b, rg);
+    dst_v[i] = RGBToV(r, g, b, rg);
+  }
+}
+
+static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr,
+                                        const uint8_t* const g_ptr,
+                                        const uint8_t* const b_ptr,
+                                        int step, int rgb_stride,
+                                        uint8_t* const dst_u,
+                                        uint8_t* const dst_v,
+                                        int width,
+                                        VP8Random* const rg) {
+  int i, j;
+  for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) {
+    const int r = SUM4(r_ptr + j, step);
+    const int g = SUM4(g_ptr + j, step);
+    const int b = SUM4(b_ptr + j, step);
+    dst_u[i] = RGBToU(r, g, b, rg);
+    dst_v[i] = RGBToV(r, g, b, rg);
+  }
+  if (width & 1) {
+    const int r = SUM2(r_ptr + j);
+    const int g = SUM2(g_ptr + j);
+    const int b = SUM2(b_ptr + j);
+    dst_u[i] = RGBToU(r, g, b, rg);
+    dst_v[i] = RGBToV(r, g, b, rg);
+  }
+}
+
+static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
+                              const uint8_t* const g_ptr,
+                              const uint8_t* const b_ptr,
+                              const uint8_t* const a_ptr,
+                              int step,         // bytes per pixel
+                              int rgb_stride,   // bytes per scanline
+                              float dithering,
+                              int use_iterative_conversion,
+                              WebPPicture* const picture) {
+  int y;
+  const int width = picture->width;
+  const int height = picture->height;
+  const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
+
+  picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
+  picture->use_argb = 0;
+
+  // disable smart conversion if source is too small (overkill).
+  if (width < kMinDimensionIterativeConversion ||
+      height < kMinDimensionIterativeConversion) {
+    use_iterative_conversion = 0;
+  }
+
+  if (!WebPPictureAllocYUVA(picture, width, height)) {
+    return 0;
+  }
+  if (has_alpha) {
+    WebPInitAlphaProcessing();
+    assert(step == 4);
+#if defined(USE_INVERSE_ALPHA_TABLE)
+    assert(kAlphaFix + kGammaFix <= 31);
+#endif
+  }
+
+  if (use_iterative_conversion) {
+    InitGammaTablesF();
+    if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
+      return 0;
+    }
+    if (has_alpha) {
+      WebPExtractAlpha(a_ptr, rgb_stride, width, height,
+                       picture->a, picture->a_stride);
+    }
+  } else {
+    uint8_t* dst_y = picture->y;
+    uint8_t* dst_u = picture->u;
+    uint8_t* dst_v = picture->v;
+    uint8_t* dst_a = picture->a;
+
+    VP8Random base_rg;
+    VP8Random* rg = NULL;
+    if (dithering > 0.) {
+      VP8InitRandom(&base_rg, dithering);
+      rg = &base_rg;
+    }
+
+    InitGammaTables();
+
+    // Downsample Y/U/V planes, two rows at a time
+    for (y = 0; y < (height >> 1); ++y) {
+      int rows_have_alpha = has_alpha;
+      const int off1 = (2 * y + 0) * rgb_stride;
+      const int off2 = (2 * y + 1) * rgb_stride;
+      ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step,
+                    dst_y, width, rg);
+      ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step,
+                    dst_y + picture->y_stride, width, rg);
+      dst_y += 2 * picture->y_stride;
+      if (has_alpha) {
+        rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride,
+                                             width, 2,
+                                             dst_a, picture->a_stride);
+        dst_a += 2 * picture->a_stride;
+      }
+      if (!rows_have_alpha) {
+        ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1,
+                        step, rgb_stride, dst_u, dst_v, width, rg);
+      } else {
+        ConvertRowsToUVWithAlpha(r_ptr + off1, g_ptr + off1, b_ptr + off1,
+                                 a_ptr + off1, rgb_stride,
+                                 dst_u, dst_v, width, rg);
+      }
+      dst_u += picture->uv_stride;
+      dst_v += picture->uv_stride;
+    }
+    if (height & 1) {    // extra last row
+      const int off = 2 * y * rgb_stride;
+      int row_has_alpha = has_alpha;
+      ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step,
+                    dst_y, width, rg);
+      if (row_has_alpha) {
+        row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0);
+      }
+      if (!row_has_alpha) {
+        ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off,
+                        step, 0, dst_u, dst_v, width, rg);
+      } else {
+        ConvertRowsToUVWithAlpha(r_ptr + off, g_ptr + off, b_ptr + off,
+                                 a_ptr + off, 0,
+                                 dst_u, dst_v, width, rg);
+      }
+    }
+  }
+  return 1;
+}
+
+#undef SUM4
+#undef SUM2
+#undef SUM4ALPHA
+#undef SUM2ALPHA
+
+//------------------------------------------------------------------------------
+// call for ARGB->YUVA conversion
+
+static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
+                             float dithering, int use_iterative_conversion) {
+  if (picture == NULL) return 0;
+  if (picture->argb == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  } else {
+    const uint8_t* const argb = (const uint8_t*)picture->argb;
+    const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
+    const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
+    const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
+    const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
+
+    picture->colorspace = WEBP_YUV420;
+    return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
+                              dithering, use_iterative_conversion, picture);
+  }
+}
+
+int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace,
+                                  float dithering) {
+  return PictureARGBToYUVA(picture, colorspace, dithering, 0);
+}
+
+int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
+  return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
+}
+
+#if WEBP_ENCODER_ABI_VERSION > 0x0204
+int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
+  return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
+}
+#endif
+
+//------------------------------------------------------------------------------
+// call for YUVA -> ARGB conversion
+
+int WebPPictureYUVAToARGB(WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  }
+  if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  }
+  if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  }
+  // Allocate a new argb buffer (discarding the previous one).
+  if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
+  picture->use_argb = 1;
+
+  // Convert
+  {
+    int y;
+    const int width = picture->width;
+    const int height = picture->height;
+    const int argb_stride = 4 * picture->argb_stride;
+    uint8_t* dst = (uint8_t*)picture->argb;
+    const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
+    WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
+
+    // First row, with replicated top samples.
+    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
+    cur_y += picture->y_stride;
+    dst += argb_stride;
+    // Center rows.
+    for (y = 1; y + 1 < height; y += 2) {
+      const uint8_t* const top_u = cur_u;
+      const uint8_t* const top_v = cur_v;
+      cur_u += picture->uv_stride;
+      cur_v += picture->uv_stride;
+      upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
+               dst, dst + argb_stride, width);
+      cur_y += 2 * picture->y_stride;
+      dst += 2 * argb_stride;
+    }
+    // Last row (if needed), with replicated bottom samples.
+    if (height > 1 && !(height & 1)) {
+      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
+    }
+    // Insert alpha values if needed, in replacement for the default 0xff ones.
+    if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
+      for (y = 0; y < height; ++y) {
+        uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
+        const uint8_t* const src = picture->a + y * picture->a_stride;
+        int x;
+        for (x = 0; x < width; ++x) {
+          argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
+        }
+      }
+    }
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// automatic import / conversion
+
+static int Import(WebPPicture* const picture,
+                  const uint8_t* const rgb, int rgb_stride,
+                  int step, int swap_rb, int import_alpha) {
+  int y;
+  const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0);
+  const uint8_t* const g_ptr = rgb + 1;
+  const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2);
+  const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL;
+  const int width = picture->width;
+  const int height = picture->height;
+
+  if (!picture->use_argb) {
+    return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
+                              0.f /* no dithering */, 0, picture);
+  }
+  if (!WebPPictureAlloc(picture)) return 0;
+
+  assert(step >= (import_alpha ? 4 : 3));
+  for (y = 0; y < height; ++y) {
+    uint32_t* const dst = &picture->argb[y * picture->argb_stride];
+    int x;
+    for (x = 0; x < width; ++x) {
+      const int offset = step * x + y * rgb_stride;
+      dst[x] = MakeARGB32(import_alpha ? a_ptr[offset] : 0xff,
+                          r_ptr[offset], g_ptr[offset], b_ptr[offset]);
+    }
+  }
+  return 1;
+}
+
+// Public API
+
+int WebPPictureImportRGB(WebPPicture* picture,
+                         const uint8_t* rgb, int rgb_stride) {
+  return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 0, 0) : 0;
+}
+
+int WebPPictureImportBGR(WebPPicture* picture,
+                         const uint8_t* rgb, int rgb_stride) {
+  return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 1, 0) : 0;
+}
+
+int WebPPictureImportRGBA(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 1) : 0;
+}
+
+int WebPPictureImportBGRA(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 1) : 0;
+}
+
+int WebPPictureImportRGBX(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 0) : 0;
+}
+
+int WebPPictureImportBGRX(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 0) : 0;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/picture_psnr.c b/src/main/jni/libwebp/enc/picture_psnr.c
new file mode 100644
index 000000000..2254b7e58
--- /dev/null
+++ b/src/main/jni/libwebp/enc/picture_psnr.c
@@ -0,0 +1,150 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools for measuring distortion
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <math.h>
+
+#include "./vp8enci.h"
+
+//------------------------------------------------------------------------------
+// local-min distortion
+//
+// For every pixel in the *reference* picture, we search for the local best
+// match in the compressed image. This is not a symmetrical measure.
+
+#define RADIUS 2  // search radius. Shouldn't be too large.
+
+static float AccumulateLSIM(const uint8_t* src, int src_stride,
+                            const uint8_t* ref, int ref_stride,
+                            int w, int h) {
+  int x, y;
+  double total_sse = 0.;
+  for (y = 0; y < h; ++y) {
+    const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS;
+    const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1;
+    for (x = 0; x < w; ++x) {
+      const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS;
+      const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1;
+      double best_sse = 255. * 255.;
+      const double value = (double)ref[y * ref_stride + x];
+      int i, j;
+      for (j = y_0; j < y_1; ++j) {
+        const uint8_t* s = src + j * src_stride;
+        for (i = x_0; i < x_1; ++i) {
+          const double sse = (double)(s[i] - value) * (s[i] - value);
+          if (sse < best_sse) best_sse = sse;
+        }
+      }
+      total_sse += best_sse;
+    }
+  }
+  return (float)total_sse;
+}
+#undef RADIUS
+
+//------------------------------------------------------------------------------
+// Distortion
+
+// Max value returned in case of exact similarity.
+static const double kMinDistortion_dB = 99.;
+static float GetPSNR(const double v) {
+  return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
+                          : kMinDistortion_dB);
+}
+
+int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
+                          int type, float result[5]) {
+  DistoStats stats[5];
+  int has_alpha;
+  int uv_w, uv_h;
+
+  if (src == NULL || ref == NULL ||
+      src->width != ref->width || src->height != ref->height ||
+      src->y == NULL || ref->y == NULL ||
+      src->u == NULL || ref->u == NULL ||
+      src->v == NULL || ref->v == NULL ||
+      result == NULL) {
+    return 0;
+  }
+  // TODO(skal): provide distortion for ARGB too.
+  if (src->use_argb == 1 || src->use_argb != ref->use_argb) {
+    return 0;
+  }
+
+  has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT);
+  if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) ||
+      (has_alpha && (src->a == NULL || ref->a == NULL))) {
+    return 0;
+  }
+
+  memset(stats, 0, sizeof(stats));
+
+  uv_w = (src->width + 1) >> 1;
+  uv_h = (src->height + 1) >> 1;
+  if (type >= 2) {
+    float sse[4];
+    sse[0] = AccumulateLSIM(src->y, src->y_stride,
+                            ref->y, ref->y_stride, src->width, src->height);
+    sse[1] = AccumulateLSIM(src->u, src->uv_stride,
+                            ref->u, ref->uv_stride, uv_w, uv_h);
+    sse[2] = AccumulateLSIM(src->v, src->uv_stride,
+                            ref->v, ref->uv_stride, uv_w, uv_h);
+    sse[3] = has_alpha ? AccumulateLSIM(src->a, src->a_stride,
+                                        ref->a, ref->a_stride,
+                                        src->width, src->height)
+                       : 0.f;
+    result[0] = GetPSNR(sse[0] / (src->width * src->height));
+    result[1] = GetPSNR(sse[1] / (uv_w * uv_h));
+    result[2] = GetPSNR(sse[2] / (uv_w * uv_h));
+    result[3] = GetPSNR(sse[3] / (src->width * src->height));
+    {
+      double total_sse = sse[0] + sse[1] + sse[2];
+      int total_pixels = src->width * src->height + 2 * uv_w * uv_h;
+      if (has_alpha) {
+        total_pixels += src->width * src->height;
+        total_sse += sse[3];
+      }
+      result[4] = GetPSNR(total_sse / total_pixels);
+    }
+  } else {
+    int c;
+    VP8SSIMAccumulatePlane(src->y, src->y_stride,
+                           ref->y, ref->y_stride,
+                           src->width, src->height, &stats[0]);
+    VP8SSIMAccumulatePlane(src->u, src->uv_stride,
+                           ref->u, ref->uv_stride,
+                           uv_w, uv_h, &stats[1]);
+    VP8SSIMAccumulatePlane(src->v, src->uv_stride,
+                           ref->v, ref->uv_stride,
+                           uv_w, uv_h, &stats[2]);
+    if (has_alpha) {
+      VP8SSIMAccumulatePlane(src->a, src->a_stride,
+                             ref->a, ref->a_stride,
+                             src->width, src->height, &stats[3]);
+    }
+    for (c = 0; c <= 4; ++c) {
+      if (type == 1) {
+        const double v = VP8SSIMGet(&stats[c]);
+        result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
+                                     : kMinDistortion_dB);
+      } else {
+        const double v = VP8SSIMGetSquaredError(&stats[c]);
+        result[c] = GetPSNR(v);
+      }
+      // Accumulate forward
+      if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
+    }
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/picture_rescale.c b/src/main/jni/libwebp/enc/picture_rescale.c
new file mode 100644
index 000000000..de52848ce
--- /dev/null
+++ b/src/main/jni/libwebp/enc/picture_rescale.c
@@ -0,0 +1,285 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools: copy, crop, rescaling and view.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../utils/rescaler.h"
+#include "../utils/utils.h"
+
+#define HALVE(x) (((x) + 1) >> 1)
+
+// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them
+// into 'dst'. Mark 'dst' as not owning any memory.
+static void PictureGrabSpecs(const WebPPicture* const src,
+                             WebPPicture* const dst) {
+  assert(src != NULL && dst != NULL);
+  *dst = *src;
+  WebPPictureResetBuffers(dst);
+}
+
+//------------------------------------------------------------------------------
+// Picture copying
+
+static void CopyPlane(const uint8_t* src, int src_stride,
+                      uint8_t* dst, int dst_stride, int width, int height) {
+  while (height-- > 0) {
+    memcpy(dst, src, width);
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+// Adjust top-left corner to chroma sample position.
+static void SnapTopLeftPosition(const WebPPicture* const pic,
+                                int* const left, int* const top) {
+  if (!pic->use_argb) {
+    *left &= ~1;
+    *top &= ~1;
+  }
+}
+
+// Adjust top-left corner and verify that the sub-rectangle is valid.
+static int AdjustAndCheckRectangle(const WebPPicture* const pic,
+                                   int* const left, int* const top,
+                                   int width, int height) {
+  SnapTopLeftPosition(pic, left, top);
+  if ((*left) < 0 || (*top) < 0) return 0;
+  if (width <= 0 || height <= 0) return 0;
+  if ((*left) + width > pic->width) return 0;
+  if ((*top) + height > pic->height) return 0;
+  return 1;
+}
+
+int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
+  if (src == NULL || dst == NULL) return 0;
+  if (src == dst) return 1;
+
+  PictureGrabSpecs(src, dst);
+  if (!WebPPictureAlloc(dst)) return 0;
+
+  if (!src->use_argb) {
+    CopyPlane(src->y, src->y_stride,
+              dst->y, dst->y_stride, dst->width, dst->height);
+    CopyPlane(src->u, src->uv_stride,
+              dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+    CopyPlane(src->v, src->uv_stride,
+              dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+    if (dst->a != NULL)  {
+      CopyPlane(src->a, src->a_stride,
+                dst->a, dst->a_stride, dst->width, dst->height);
+    }
+  } else {
+    CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride,
+              (uint8_t*)dst->argb, 4 * dst->argb_stride,
+              4 * dst->width, dst->height);
+  }
+  return 1;
+}
+
+int WebPPictureIsView(const WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (picture->use_argb) {
+    return (picture->memory_argb_ == NULL);
+  }
+  return (picture->memory_ == NULL);
+}
+
+int WebPPictureView(const WebPPicture* src,
+                    int left, int top, int width, int height,
+                    WebPPicture* dst) {
+  if (src == NULL || dst == NULL) return 0;
+
+  // verify rectangle position.
+  if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0;
+
+  if (src != dst) {  // beware of aliasing! We don't want to leak 'memory_'.
+    PictureGrabSpecs(src, dst);
+  }
+  dst->width = width;
+  dst->height = height;
+  if (!src->use_argb) {
+    dst->y = src->y + top * src->y_stride + left;
+    dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1);
+    dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1);
+    dst->y_stride = src->y_stride;
+    dst->uv_stride = src->uv_stride;
+    if (src->a != NULL) {
+      dst->a = src->a + top * src->a_stride + left;
+      dst->a_stride = src->a_stride;
+    }
+  } else {
+    dst->argb = src->argb + top * src->argb_stride + left;
+    dst->argb_stride = src->argb_stride;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Picture cropping
+
+int WebPPictureCrop(WebPPicture* pic,
+                    int left, int top, int width, int height) {
+  WebPPicture tmp;
+
+  if (pic == NULL) return 0;
+  if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0;
+
+  PictureGrabSpecs(pic, &tmp);
+  tmp.width = width;
+  tmp.height = height;
+  if (!WebPPictureAlloc(&tmp)) return 0;
+
+  if (!pic->use_argb) {
+    const int y_offset = top * pic->y_stride + left;
+    const int uv_offset = (top / 2) * pic->uv_stride + left / 2;
+    CopyPlane(pic->y + y_offset, pic->y_stride,
+              tmp.y, tmp.y_stride, width, height);
+    CopyPlane(pic->u + uv_offset, pic->uv_stride,
+              tmp.u, tmp.uv_stride, HALVE(width), HALVE(height));
+    CopyPlane(pic->v + uv_offset, pic->uv_stride,
+              tmp.v, tmp.uv_stride, HALVE(width), HALVE(height));
+
+    if (tmp.a != NULL) {
+      const int a_offset = top * pic->a_stride + left;
+      CopyPlane(pic->a + a_offset, pic->a_stride,
+                tmp.a, tmp.a_stride, width, height);
+    }
+  } else {
+    const uint8_t* const src =
+        (const uint8_t*)(pic->argb + top * pic->argb_stride + left);
+    CopyPlane(src, pic->argb_stride * 4,
+              (uint8_t*)tmp.argb, tmp.argb_stride * 4,
+              width * 4, height);
+  }
+  WebPPictureFree(pic);
+  *pic = tmp;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Simple picture rescaler
+
+static void RescalePlane(const uint8_t* src,
+                         int src_width, int src_height, int src_stride,
+                         uint8_t* dst,
+                         int dst_width, int dst_height, int dst_stride,
+                         int32_t* const work,
+                         int num_channels) {
+  WebPRescaler rescaler;
+  int y = 0;
+  WebPRescalerInit(&rescaler, src_width, src_height,
+                   dst, dst_width, dst_height, dst_stride,
+                   num_channels,
+                   src_width, dst_width,
+                   src_height, dst_height,
+                   work);
+  memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
+  while (y < src_height) {
+    y += WebPRescalerImport(&rescaler, src_height - y,
+                            src + y * src_stride, src_stride);
+    WebPRescalerExport(&rescaler);
+  }
+}
+
+static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) {
+  assert(pic->argb != NULL);
+  WebPMultARGBRows((uint8_t*)pic->argb, pic->argb_stride * sizeof(*pic->argb),
+                   pic->width, pic->height, inverse);
+}
+
+static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
+  if (pic->a != NULL) {
+    WebPMultRows(pic->y, pic->y_stride, pic->a, pic->a_stride,
+                 pic->width, pic->height, inverse);
+  }
+}
+
+int WebPPictureRescale(WebPPicture* pic, int width, int height) {
+  WebPPicture tmp;
+  int prev_width, prev_height;
+  int32_t* work;
+
+  if (pic == NULL) return 0;
+  prev_width = pic->width;
+  prev_height = pic->height;
+  // if width is unspecified, scale original proportionally to height ratio.
+  if (width == 0) {
+    width = (prev_width * height + prev_height / 2) / prev_height;
+  }
+  // if height is unspecified, scale original proportionally to width ratio.
+  if (height == 0) {
+    height = (prev_height * width + prev_width / 2) / prev_width;
+  }
+  // Check if the overall dimensions still make sense.
+  if (width <= 0 || height <= 0) return 0;
+
+  PictureGrabSpecs(pic, &tmp);
+  tmp.width = width;
+  tmp.height = height;
+  if (!WebPPictureAlloc(&tmp)) return 0;
+
+  if (!pic->use_argb) {
+    work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
+    if (work == NULL) {
+      WebPPictureFree(&tmp);
+      return 0;
+    }
+    // If present, we need to rescale alpha first (for AlphaMultiplyY).
+    if (pic->a != NULL) {
+      WebPInitAlphaProcessing();
+      RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
+                   tmp.a, width, height, tmp.a_stride, work, 1);
+    }
+
+    // We take transparency into account on the luma plane only. That's not
+    // totally exact blending, but still is a good approximation.
+    AlphaMultiplyY(pic, 0);
+    RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
+                 tmp.y, width, height, tmp.y_stride, work, 1);
+    AlphaMultiplyY(&tmp, 1);
+
+    RescalePlane(pic->u,
+                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
+                 tmp.u,
+                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+    RescalePlane(pic->v,
+                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
+                 tmp.v,
+                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+  } else {
+    work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
+    if (work == NULL) {
+      WebPPictureFree(&tmp);
+      return 0;
+    }
+    // In order to correctly interpolate colors, we need to apply the alpha
+    // weighting first (black-matting), scale the RGB values, and remove
+    // the premultiplication afterward (while preserving the alpha channel).
+    WebPInitAlphaProcessing();
+    AlphaMultiplyARGB(pic, 0);
+    RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
+                 pic->argb_stride * 4,
+                 (uint8_t*)tmp.argb, width, height,
+                 tmp.argb_stride * 4,
+                 work, 4);
+    AlphaMultiplyARGB(&tmp, 1);
+  }
+  WebPPictureFree(pic);
+  WebPSafeFree(work);
+  *pic = tmp;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/picture_tools.c b/src/main/jni/libwebp/enc/picture_tools.c
new file mode 100644
index 000000000..7c7364639
--- /dev/null
+++ b/src/main/jni/libwebp/enc/picture_tools.c
@@ -0,0 +1,206 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools: alpha handling, etc.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8enci.h"
+#include "../dsp/yuv.h"
+
+static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
+  return (0xff000000u | (r << 16) | (g << 8) | b);
+}
+
+//------------------------------------------------------------------------------
+// Helper: clean up fully transparent area to help compressibility.
+
+#define SIZE 8
+#define SIZE2 (SIZE / 2)
+static int is_transparent_area(const uint8_t* ptr, int stride, int size) {
+  int y, x;
+  for (y = 0; y < size; ++y) {
+    for (x = 0; x < size; ++x) {
+      if (ptr[x]) {
+        return 0;
+      }
+    }
+    ptr += stride;
+  }
+  return 1;
+}
+
+static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) {
+  int y, x;
+  for (y = 0; y < size; ++y) {
+    for (x = 0; x < size; ++x) {
+      if (ptr[x] & 0xff000000u) {
+        return 0;
+      }
+    }
+    ptr += stride;
+  }
+  return 1;
+}
+
+static void flatten(uint8_t* ptr, int v, int stride, int size) {
+  int y;
+  for (y = 0; y < size; ++y) {
+    memset(ptr, v, size);
+    ptr += stride;
+  }
+}
+
+static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) {
+  int x, y;
+  for (y = 0; y < size; ++y) {
+    for (x = 0; x < size; ++x) ptr[x] = v;
+    ptr += stride;
+  }
+}
+
+void WebPCleanupTransparentArea(WebPPicture* pic) {
+  int x, y, w, h;
+  if (pic == NULL) return;
+  w = pic->width / SIZE;
+  h = pic->height / SIZE;
+
+  // note: we ignore the left-overs on right/bottom
+  if (pic->use_argb) {
+    uint32_t argb_value = 0;
+    for (y = 0; y < h; ++y) {
+      int need_reset = 1;
+      for (x = 0; x < w; ++x) {
+        const int off = (y * pic->argb_stride + x) * SIZE;
+        if (is_transparent_argb_area(pic->argb + off, pic->argb_stride, SIZE)) {
+          if (need_reset) {
+            argb_value = pic->argb[off];
+            need_reset = 0;
+          }
+          flatten_argb(pic->argb + off, argb_value, pic->argb_stride, SIZE);
+        } else {
+          need_reset = 1;
+        }
+      }
+    }
+  } else {
+    const uint8_t* const a_ptr = pic->a;
+    int values[3] = { 0 };
+    if (a_ptr == NULL) return;    // nothing to do
+    for (y = 0; y < h; ++y) {
+      int need_reset = 1;
+      for (x = 0; x < w; ++x) {
+        const int off_a = (y * pic->a_stride + x) * SIZE;
+        const int off_y = (y * pic->y_stride + x) * SIZE;
+        const int off_uv = (y * pic->uv_stride + x) * SIZE2;
+        if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) {
+          if (need_reset) {
+            values[0] = pic->y[off_y];
+            values[1] = pic->u[off_uv];
+            values[2] = pic->v[off_uv];
+            need_reset = 0;
+          }
+          flatten(pic->y + off_y, values[0], pic->y_stride, SIZE);
+          flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2);
+          flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2);
+        } else {
+          need_reset = 1;
+        }
+      }
+    }
+  }
+}
+
+#undef SIZE
+#undef SIZE2
+
+//------------------------------------------------------------------------------
+// Blend color and remove transparency info
+
+#define BLEND(V0, V1, ALPHA) \
+    ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16)
+#define BLEND_10BIT(V0, V1, ALPHA) \
+    ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18)
+
+void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
+  const int red = (background_rgb >> 16) & 0xff;
+  const int green = (background_rgb >> 8) & 0xff;
+  const int blue = (background_rgb >> 0) & 0xff;
+  int x, y;
+  if (pic == NULL) return;
+  if (!pic->use_argb) {
+    const int uv_width = (pic->width >> 1);  // omit last pixel during u/v loop
+    const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF);
+    // VP8RGBToU/V expects the u/v values summed over four pixels
+    const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
+    const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
+    const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
+    if (!has_alpha || pic->a == NULL) return;    // nothing to do
+    for (y = 0; y < pic->height; ++y) {
+      // Luma blending
+      uint8_t* const y_ptr = pic->y + y * pic->y_stride;
+      uint8_t* const a_ptr = pic->a + y * pic->a_stride;
+      for (x = 0; x < pic->width; ++x) {
+        const int alpha = a_ptr[x];
+        if (alpha < 0xff) {
+          y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]);
+        }
+      }
+      // Chroma blending every even line
+      if ((y & 1) == 0) {
+        uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride;
+        uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride;
+        uint8_t* const a_ptr2 =
+            (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
+        for (x = 0; x < uv_width; ++x) {
+          // Average four alpha values into a single blending weight.
+          // TODO(skal): might lead to visible contouring. Can we do better?
+          const int alpha =
+              a_ptr[2 * x + 0] + a_ptr[2 * x + 1] +
+              a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1];
+          u[x] = BLEND_10BIT(U0, u[x], alpha);
+          v[x] = BLEND_10BIT(V0, v[x], alpha);
+        }
+        if (pic->width & 1) {   // rightmost pixel
+          const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
+          u[x] = BLEND_10BIT(U0, u[x], alpha);
+          v[x] = BLEND_10BIT(V0, v[x], alpha);
+        }
+      }
+      memset(a_ptr, 0xff, pic->width);
+    }
+  } else {
+    uint32_t* argb = pic->argb;
+    const uint32_t background = MakeARGB32(red, green, blue);
+    for (y = 0; y < pic->height; ++y) {
+      for (x = 0; x < pic->width; ++x) {
+        const int alpha = (argb[x] >> 24) & 0xff;
+        if (alpha != 0xff) {
+          if (alpha > 0) {
+            int r = (argb[x] >> 16) & 0xff;
+            int g = (argb[x] >>  8) & 0xff;
+            int b = (argb[x] >>  0) & 0xff;
+            r = BLEND(red, r, alpha);
+            g = BLEND(green, g, alpha);
+            b = BLEND(blue, b, alpha);
+            argb[x] = MakeARGB32(r, g, b);
+          } else {
+            argb[x] = background;
+          }
+        }
+      }
+      argb += pic->argb_stride;
+    }
+  }
+}
+
+#undef BLEND
+#undef BLEND_10BIT
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/quant.c b/src/main/jni/libwebp/enc/quant.c
new file mode 100644
index 000000000..9130a4160
--- /dev/null
+++ b/src/main/jni/libwebp/enc/quant.c
@@ -0,0 +1,1170 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   Quantization
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>  // for abs()
+
+#include "./vp8enci.h"
+#include "./cost.h"
+
+#define DO_TRELLIS_I4  1
+#define DO_TRELLIS_I16 1   // not a huge gain, but ok at low bitrate.
+#define DO_TRELLIS_UV  0   // disable trellis for UV. Risky. Not worth.
+#define USE_TDISTO 1
+
+#define MID_ALPHA 64      // neutral value for susceptibility
+#define MIN_ALPHA 30      // lowest usable value for susceptibility
+#define MAX_ALPHA 100     // higher meaningful value for susceptibility
+
+#define SNS_TO_DQ 0.9     // Scaling constant between the sns value and the QP
+                          // power-law modulation. Must be strictly less than 1.
+
+#define I4_PENALTY 4000   // Rate-penalty for quick i4/i16 decision
+
+// number of non-zero coeffs below which we consider the block very flat
+// (and apply a penalty to complex predictions)
+#define FLATNESS_LIMIT_I16 10      // I16 mode
+#define FLATNESS_LIMIT_I4  3       // I4 mode
+#define FLATNESS_LIMIT_UV  2       // UV mode
+#define FLATNESS_PENALTY   140     // roughly ~1bit per block
+
+#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
+
+// #define DEBUG_BLOCK
+
+//------------------------------------------------------------------------------
+
+#if defined(DEBUG_BLOCK)
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static void PrintBlockInfo(const VP8EncIterator* const it,
+                           const VP8ModeScore* const rd) {
+  int i, j;
+  const int is_i16 = (it->mb_->type_ == 1);
+  printf("SOURCE / OUTPUT / ABS DELTA\n");
+  for (j = 0; j < 24; ++j) {
+    if (j == 16) printf("\n");   // newline before the U/V block
+    for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
+    printf("     ");
+    for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
+    printf("     ");
+    for (i = 0; i < 16; ++i) {
+      printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
+    }
+    printf("\n");
+  }
+  printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
+    (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
+    (int)rd->score);
+  if (is_i16) {
+    printf("Mode: %d\n", rd->mode_i16);
+    printf("y_dc_levels:");
+    for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
+    printf("\n");
+  } else {
+    printf("Modes[16]: ");
+    for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
+    printf("\n");
+  }
+  printf("y_ac_levels:\n");
+  for (j = 0; j < 16; ++j) {
+    for (i = is_i16 ? 1 : 0; i < 16; ++i) {
+      printf("%4d ", rd->y_ac_levels[j][i]);
+    }
+    printf("\n");
+  }
+  printf("\n");
+  printf("uv_levels (mode=%d):\n", rd->mode_uv);
+  for (j = 0; j < 8; ++j) {
+    for (i = 0; i < 16; ++i) {
+      printf("%4d ", rd->uv_levels[j][i]);
+    }
+    printf("\n");
+  }
+}
+
+#endif   // DEBUG_BLOCK
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int clip(int v, int m, int M) {
+  return v < m ? m : v > M ? M : v;
+}
+
+static const uint8_t kZigzag[16] = {
+  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+static const uint8_t kDcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  10,
+  11,   12,  13,  14,  15,  16,  17,  17,
+  18,   19,  20,  20,  21,  21,  22,  22,
+  23,   23,  24,  25,  25,  26,  27,  28,
+  29,   30,  31,  32,  33,  34,  35,  36,
+  37,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  46,  47,  48,  49,  50,
+  51,   52,  53,  54,  55,  56,  57,  58,
+  59,   60,  61,  62,  63,  64,  65,  66,
+  67,   68,  69,  70,  71,  72,  73,  74,
+  75,   76,  76,  77,  78,  79,  80,  81,
+  82,   83,  84,  85,  86,  87,  88,  89,
+  91,   93,  95,  96,  98, 100, 101, 102,
+  104, 106, 108, 110, 112, 114, 116, 118,
+  122, 124, 126, 128, 130, 132, 134, 136,
+  138, 140, 143, 145, 148, 151, 154, 157
+};
+
+static const uint16_t kAcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  11,
+  12,   13,  14,  15,  16,  17,  18,  19,
+  20,   21,  22,  23,  24,  25,  26,  27,
+  28,   29,  30,  31,  32,  33,  34,  35,
+  36,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  47,  48,  49,  50,  51,
+  52,   53,  54,  55,  56,  57,  58,  60,
+  62,   64,  66,  68,  70,  72,  74,  76,
+  78,   80,  82,  84,  86,  88,  90,  92,
+  94,   96,  98, 100, 102, 104, 106, 108,
+  110, 112, 114, 116, 119, 122, 125, 128,
+  131, 134, 137, 140, 143, 146, 149, 152,
+  155, 158, 161, 164, 167, 170, 173, 177,
+  181, 185, 189, 193, 197, 201, 205, 209,
+  213, 217, 221, 225, 229, 234, 239, 245,
+  249, 254, 259, 264, 269, 274, 279, 284
+};
+
+static const uint16_t kAcTable2[128] = {
+  8,     8,   9,  10,  12,  13,  15,  17,
+  18,   20,  21,  23,  24,  26,  27,  29,
+  31,   32,  34,  35,  37,  38,  40,  41,
+  43,   44,  46,  48,  49,  51,  52,  54,
+  55,   57,  58,  60,  62,  63,  65,  66,
+  68,   69,  71,  72,  74,  75,  77,  79,
+  80,   82,  83,  85,  86,  88,  89,  93,
+  96,   99, 102, 105, 108, 111, 114, 117,
+  120, 124, 127, 130, 133, 136, 139, 142,
+  145, 148, 151, 155, 158, 161, 164, 167,
+  170, 173, 176, 179, 184, 189, 193, 198,
+  203, 207, 212, 217, 221, 226, 230, 235,
+  240, 244, 249, 254, 258, 263, 268, 274,
+  280, 286, 292, 299, 305, 311, 317, 323,
+  330, 336, 342, 348, 354, 362, 370, 379,
+  385, 393, 401, 409, 416, 424, 432, 440
+};
+
+static const uint8_t kBiasMatrices[3][2] = {  // [luma-ac,luma-dc,chroma][dc,ac]
+  { 96, 110 }, { 96, 108 }, { 110, 115 }
+};
+
+// Sharpening by (slightly) raising the hi-frequency coeffs.
+// Hack-ish but helpful for mid-bitrate range. Use with care.
+#define SHARPEN_BITS 11  // number of descaling bits for sharpening bias
+static const uint8_t kFreqSharpening[16] = {
+  0,  30, 60, 90,
+  30, 60, 90, 90,
+  60, 90, 90, 90,
+  90, 90, 90, 90
+};
+
+//------------------------------------------------------------------------------
+// Initialize quantization parameters in VP8Matrix
+
+// Returns the average quantizer
+static int ExpandMatrix(VP8Matrix* const m, int type) {
+  int i, sum;
+  for (i = 0; i < 2; ++i) {
+    const int is_ac_coeff = (i > 0);
+    const int bias = kBiasMatrices[type][is_ac_coeff];
+    m->iq_[i] = (1 << QFIX) / m->q_[i];
+    m->bias_[i] = BIAS(bias);
+    // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
+    //   * zero if coeff <= zthresh
+    //   * non-zero if coeff > zthresh
+    m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
+  }
+  for (i = 2; i < 16; ++i) {
+    m->q_[i] = m->q_[1];
+    m->iq_[i] = m->iq_[1];
+    m->bias_[i] = m->bias_[1];
+    m->zthresh_[i] = m->zthresh_[1];
+  }
+  for (sum = 0, i = 0; i < 16; ++i) {
+    if (type == 0) {  // we only use sharpening for AC luma coeffs
+      m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
+    } else {
+      m->sharpen_[i] = 0;
+    }
+    sum += m->q_[i];
+  }
+  return (sum + 8) >> 4;
+}
+
+static void SetupMatrices(VP8Encoder* enc) {
+  int i;
+  const int tlambda_scale =
+    (enc->method_ >= 4) ? enc->config_->sns_strength
+                        : 0;
+  const int num_segments = enc->segment_hdr_.num_segments_;
+  for (i = 0; i < num_segments; ++i) {
+    VP8SegmentInfo* const m = &enc->dqm_[i];
+    const int q = m->quant_;
+    int q4, q16, quv;
+    m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
+    m->y1_.q_[1] = kAcTable[clip(q,                  0, 127)];
+
+    m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
+    m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
+
+    m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
+    m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
+
+    q4  = ExpandMatrix(&m->y1_, 0);
+    q16 = ExpandMatrix(&m->y2_, 1);
+    quv = ExpandMatrix(&m->uv_, 2);
+
+    m->lambda_i4_          = (3 * q4 * q4) >> 7;
+    m->lambda_i16_         = (3 * q16 * q16);
+    m->lambda_uv_          = (3 * quv * quv) >> 6;
+    m->lambda_mode_        = (1 * q4 * q4) >> 7;
+    m->lambda_trellis_i4_  = (7 * q4 * q4) >> 3;
+    m->lambda_trellis_i16_ = (q16 * q16) >> 2;
+    m->lambda_trellis_uv_  = (quv *quv) << 1;
+    m->tlambda_            = (tlambda_scale * q4) >> 5;
+
+    m->min_disto_ = 10 * m->y1_.q_[0];   // quantization-aware min disto
+    m->max_edge_  = 0;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Initialize filtering parameters
+
+// Very small filter-strength values have close to no visual effect. So we can
+// save a little decoding-CPU by turning filtering off for these.
+#define FSTRENGTH_CUTOFF 2
+
+static void SetupFilterStrength(VP8Encoder* const enc) {
+  int i;
+  // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
+  const int level0 = 5 * enc->config_->filter_strength;
+  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+    VP8SegmentInfo* const m = &enc->dqm_[i];
+    // We focus on the quantization of AC coeffs.
+    const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
+    const int base_strength =
+        VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
+    // Segments with lower complexity ('beta') will be less filtered.
+    const int f = base_strength * level0 / (256 + m->beta_);
+    m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
+  }
+  // We record the initial strength (mainly for the case of 1-segment only).
+  enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
+  enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
+  enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
+}
+
+//------------------------------------------------------------------------------
+
+// Note: if you change the values below, remember that the max range
+// allowed by the syntax for DQ_UV is [-16,16].
+#define MAX_DQ_UV (6)
+#define MIN_DQ_UV (-4)
+
+// We want to emulate jpeg-like behaviour where the expected "good" quality
+// is around q=75. Internally, our "good" middle is around c=50. So we
+// map accordingly using linear piece-wise function
+static double QualityToCompression(double c) {
+  const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
+  // The file size roughly scales as pow(quantizer, 3.). Actually, the
+  // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
+  // in the mid-quant range. So we scale the compressibility inversely to
+  // this power-law: quant ~= compression ^ 1/3. This law holds well for
+  // low quant. Finer modeling for high-quant would make use of kAcTable[]
+  // more explicitly.
+  const double v = pow(linear_c, 1 / 3.);
+  return v;
+}
+
+static double QualityToJPEGCompression(double c, double alpha) {
+  // We map the complexity 'alpha' and quality setting 'c' to a compression
+  // exponent empirically matched to the compression curve of libjpeg6b.
+  // On average, the WebP output size will be roughly similar to that of a
+  // JPEG file compressed with same quality factor.
+  const double amin = 0.30;
+  const double amax = 0.85;
+  const double exp_min = 0.4;
+  const double exp_max = 0.9;
+  const double slope = (exp_min - exp_max) / (amax - amin);
+  // Linearly interpolate 'expn' from exp_min to exp_max
+  // in the [amin, amax] range.
+  const double expn = (alpha > amax) ? exp_min
+                    : (alpha < amin) ? exp_max
+                    : exp_max + slope * (alpha - amin);
+  const double v = pow(c, expn);
+  return v;
+}
+
+static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
+                                 const VP8SegmentInfo* const S2) {
+  return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
+}
+
+static void SimplifySegments(VP8Encoder* const enc) {
+  int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
+  const int num_segments = enc->segment_hdr_.num_segments_;
+  int num_final_segments = 1;
+  int s1, s2;
+  for (s1 = 1; s1 < num_segments; ++s1) {    // find similar segments
+    const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
+    int found = 0;
+    // check if we already have similar segment
+    for (s2 = 0; s2 < num_final_segments; ++s2) {
+      const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
+      if (SegmentsAreEquivalent(S1, S2)) {
+        found = 1;
+        break;
+      }
+    }
+    map[s1] = s2;
+    if (!found) {
+      if (num_final_segments != s1) {
+        enc->dqm_[num_final_segments] = enc->dqm_[s1];
+      }
+      ++num_final_segments;
+    }
+  }
+  if (num_final_segments < num_segments) {  // Remap
+    int i = enc->mb_w_ * enc->mb_h_;
+    while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
+    enc->segment_hdr_.num_segments_ = num_final_segments;
+    // Replicate the trailing segment infos (it's mostly cosmetics)
+    for (i = num_final_segments; i < num_segments; ++i) {
+      enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
+    }
+  }
+}
+
+void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
+  int i;
+  int dq_uv_ac, dq_uv_dc;
+  const int num_segments = enc->segment_hdr_.num_segments_;
+  const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
+  const double Q = quality / 100.;
+  const double c_base = enc->config_->emulate_jpeg_size ?
+      QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
+      QualityToCompression(Q);
+  for (i = 0; i < num_segments; ++i) {
+    // We modulate the base coefficient to accommodate for the quantization
+    // susceptibility and allow denser segments to be quantized more.
+    const double expn = 1. - amp * enc->dqm_[i].alpha_;
+    const double c = pow(c_base, expn);
+    const int q = (int)(127. * (1. - c));
+    assert(expn > 0.);
+    enc->dqm_[i].quant_ = clip(q, 0, 127);
+  }
+
+  // purely indicative in the bitstream (except for the 1-segment case)
+  enc->base_quant_ = enc->dqm_[0].quant_;
+
+  // fill-in values for the unused segments (required by the syntax)
+  for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
+    enc->dqm_[i].quant_ = enc->base_quant_;
+  }
+
+  // uv_alpha_ is normally spread around ~60. The useful range is
+  // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
+  // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
+  dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
+                                          / (MAX_ALPHA - MIN_ALPHA);
+  // we rescale by the user-defined strength of adaptation
+  dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
+  // and make it safe.
+  dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
+  // We also boost the dc-uv-quant a little, based on sns-strength, since
+  // U/V channels are quite more reactive to high quants (flat DC-blocks
+  // tend to appear, and are unpleasant).
+  dq_uv_dc = -4 * enc->config_->sns_strength / 100;
+  dq_uv_dc = clip(dq_uv_dc, -15, 15);   // 4bit-signed max allowed
+
+  enc->dq_y1_dc_ = 0;       // TODO(skal): dq-lum
+  enc->dq_y2_dc_ = 0;
+  enc->dq_y2_ac_ = 0;
+  enc->dq_uv_dc_ = dq_uv_dc;
+  enc->dq_uv_ac_ = dq_uv_ac;
+
+  SetupFilterStrength(enc);   // initialize segments' filtering, eventually
+
+  if (num_segments > 1) SimplifySegments(enc);
+
+  SetupMatrices(enc);         // finalize quantization matrices
+}
+
+//------------------------------------------------------------------------------
+// Form the predictions in cache
+
+// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
+const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
+const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
+
+// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
+const int VP8I4ModeOffsets[NUM_BMODES] = {
+  I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
+};
+
+void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
+  const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
+  const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
+  VP8EncPredLuma16(it->yuv_p_, left, top);
+}
+
+void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
+  const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
+  const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
+  VP8EncPredChroma8(it->yuv_p_, left, top);
+}
+
+void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
+  VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
+}
+
+//------------------------------------------------------------------------------
+// Quantize
+
+// Layout:
+// +----+
+// |YYYY| 0
+// |YYYY| 4
+// |YYYY| 8
+// |YYYY| 12
+// +----+
+// |UUVV| 16
+// |UUVV| 20
+// +----+
+
+const int VP8Scan[16] = {  // Luma
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+};
+
+static const int VP8ScanUV[4 + 4] = {
+  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
+  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
+};
+
+//------------------------------------------------------------------------------
+// Distortion measurement
+
+static const uint16_t kWeightY[16] = {
+  38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
+};
+
+static const uint16_t kWeightTrellis[16] = {
+#if USE_TDISTO == 0
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+#else
+  30, 27, 19, 11,
+  27, 24, 17, 10,
+  19, 17, 12,  8,
+  11, 10,  8,  6
+#endif
+};
+
+// Init/Copy the common fields in score.
+static void InitScore(VP8ModeScore* const rd) {
+  rd->D  = 0;
+  rd->SD = 0;
+  rd->R  = 0;
+  rd->H  = 0;
+  rd->nz = 0;
+  rd->score = MAX_COST;
+}
+
+static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
+  dst->D  = src->D;
+  dst->SD = src->SD;
+  dst->R  = src->R;
+  dst->H  = src->H;
+  dst->nz = src->nz;      // note that nz is not accumulated, but just copied.
+  dst->score = src->score;
+}
+
+static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
+  dst->D  += src->D;
+  dst->SD += src->SD;
+  dst->R  += src->R;
+  dst->H  += src->H;
+  dst->nz |= src->nz;     // here, new nz bits are accumulated.
+  dst->score += src->score;
+}
+
+//------------------------------------------------------------------------------
+// Performs trellis-optimized quantization.
+
+// Trellis node
+typedef struct {
+  int8_t prev;            // best previous node
+  int8_t sign;            // sign of coeff_i
+  int16_t level;          // level
+} Node;
+
+// Score state
+typedef struct {
+  score_t score;          // partial RD score
+  const uint16_t* costs;  // shortcut to cost tables
+} ScoreState;
+
+// If a coefficient was quantized to a value Q (using a neutral bias),
+// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
+// We don't test negative values though.
+#define MIN_DELTA 0   // how much lower level to try
+#define MAX_DELTA 1   // how much higher
+#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
+#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
+#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
+
+static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
+  // TODO: incorporate the "* 256" in the tables?
+  rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
+}
+
+static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
+                                          score_t distortion) {
+  return rate * lambda + 256 * distortion;
+}
+
+static int TrellisQuantizeBlock(const VP8Encoder* const enc,
+                                int16_t in[16], int16_t out[16],
+                                int ctx0, int coeff_type,
+                                const VP8Matrix* const mtx,
+                                int lambda) {
+  const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
+  const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
+  const int first = (coeff_type == 0) ? 1 : 0;
+  Node nodes[16][NUM_NODES];
+  ScoreState score_states[2][NUM_NODES];
+  ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
+  ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
+  int best_path[3] = {-1, -1, -1};   // store best-last/best-level/best-previous
+  score_t best_score;
+  int n, m, p, last;
+
+  {
+    score_t cost;
+    const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
+    const int last_proba = probas[VP8EncBands[first]][ctx0][0];
+
+    // compute the position of the last interesting coefficient
+    last = first - 1;
+    for (n = 15; n >= first; --n) {
+      const int j = kZigzag[n];
+      const int err = in[j] * in[j];
+      if (err > thresh) {
+        last = n;
+        break;
+      }
+    }
+    // we don't need to go inspect up to n = 16 coeffs. We can just go up
+    // to last + 1 (inclusive) without losing much.
+    if (last < 15) ++last;
+
+    // compute 'skip' score. This is the max score one can do.
+    cost = VP8BitCost(0, last_proba);
+    best_score = RDScoreTrellis(lambda, cost, 0);
+
+    // initialize source node.
+    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
+      const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
+      ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
+      ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
+    }
+  }
+
+  // traverse trellis.
+  for (n = first; n <= last; ++n) {
+    const int j = kZigzag[n];
+    const uint32_t Q  = mtx->q_[j];
+    const uint32_t iQ = mtx->iq_[j];
+    const uint32_t B = BIAS(0x00);     // neutral bias
+    // note: it's important to take sign of the _original_ coeff,
+    // so we don't have to consider level < 0 afterward.
+    const int sign = (in[j] < 0);
+    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    int level0 = QUANTDIV(coeff0, iQ, B);
+    if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
+
+    {   // Swap current and previous score states
+      ScoreState* const tmp = ss_cur;
+      ss_cur = ss_prev;
+      ss_prev = tmp;
+    }
+
+    // test all alternate level values around level0.
+    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
+      Node* const cur = &NODE(n, m);
+      int level = level0 + m;
+      const int ctx = (level > 2) ? 2 : level;
+      const int band = VP8EncBands[n + 1];
+      score_t base_score, last_pos_score;
+      score_t best_cur_score = MAX_COST;
+      int best_prev = 0;   // default, in case
+
+      ss_cur[m].score = MAX_COST;
+      ss_cur[m].costs = costs[band][ctx];
+      if (level > MAX_LEVEL || level < 0) {   // node is dead?
+        continue;
+      }
+
+      // Compute extra rate cost if last coeff's position is < 15
+      {
+        const score_t last_pos_cost =
+            (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
+        last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
+      }
+
+      {
+        // Compute delta_error = how much coding this level will
+        // subtract to max_error as distortion.
+        // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
+        const int new_error = coeff0 - level * Q;
+        const int delta_error =
+            kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
+        base_score = RDScoreTrellis(lambda, 0, delta_error);
+      }
+
+      // Inspect all possible non-dead predecessors. Retain only the best one.
+      for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
+        // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
+        // eliminated since their score can't be better than the current best.
+        const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
+        // Examine node assuming it's a non-terminal one.
+        const score_t score =
+            base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
+        if (score < best_cur_score) {
+          best_cur_score = score;
+          best_prev = p;
+        }
+      }
+      // Store best finding in current node.
+      cur->sign = sign;
+      cur->level = level;
+      cur->prev = best_prev;
+      ss_cur[m].score = best_cur_score;
+
+      // Now, record best terminal node (and thus best entry in the graph).
+      if (level != 0) {
+        const score_t score = best_cur_score + last_pos_score;
+        if (score < best_score) {
+          best_score = score;
+          best_path[0] = n;                     // best eob position
+          best_path[1] = m;                     // best node index
+          best_path[2] = best_prev;             // best predecessor
+        }
+      }
+    }
+  }
+
+  // Fresh start
+  memset(in + first, 0, (16 - first) * sizeof(*in));
+  memset(out + first, 0, (16 - first) * sizeof(*out));
+  if (best_path[0] == -1) {
+    return 0;   // skip!
+  }
+
+  {
+    // Unwind the best path.
+    // Note: best-prev on terminal node is not necessarily equal to the
+    // best_prev for non-terminal. So we patch best_path[2] in.
+    int nz = 0;
+    int best_node = best_path[1];
+    n = best_path[0];
+    NODE(n, best_node).prev = best_path[2];   // force best-prev for terminal
+
+    for (; n >= first; --n) {
+      const Node* const node = &NODE(n, best_node);
+      const int j = kZigzag[n];
+      out[n] = node->sign ? -node->level : node->level;
+      nz |= node->level;
+      in[j] = out[n] * mtx->q_[j];
+      best_node = node->prev;
+    }
+    return (nz != 0);
+  }
+}
+
+#undef NODE
+
+//------------------------------------------------------------------------------
+// Performs: difference, transform, quantize, back-transform, add
+// all at once. Output is the reconstructed block in *yuv_out, and the
+// quantized levels in *levels.
+
+static int ReconstructIntra16(VP8EncIterator* const it,
+                              VP8ModeScore* const rd,
+                              uint8_t* const yuv_out,
+                              int mode) {
+  const VP8Encoder* const enc = it->enc_;
+  const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
+  const uint8_t* const src = it->yuv_in_ + Y_OFF;
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  int nz = 0;
+  int n;
+  int16_t tmp[16][16], dc_tmp[16];
+
+  for (n = 0; n < 16; ++n) {
+    VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
+  }
+  VP8FTransformWHT(tmp[0], dc_tmp);
+  nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
+
+  if (DO_TRELLIS_I16 && it->do_trellis_) {
+    int x, y;
+    VP8IteratorNzToBytes(it);
+    for (y = 0, n = 0; y < 4; ++y) {
+      for (x = 0; x < 4; ++x, ++n) {
+        const int ctx = it->top_nz_[x] + it->left_nz_[y];
+        const int non_zero =
+            TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
+                                 &dqm->y1_, dqm->lambda_trellis_i16_);
+        it->top_nz_[x] = it->left_nz_[y] = non_zero;
+        rd->y_ac_levels[n][0] = 0;
+        nz |= non_zero << n;
+      }
+    }
+  } else {
+    for (n = 0; n < 16; ++n) {
+      // Zero-out the first coeff, so that: a) nz is correct below, and
+      // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
+      tmp[n][0] = 0;
+      nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
+      assert(rd->y_ac_levels[n][0] == 0);
+    }
+  }
+
+  // Transform back
+  VP8TransformWHT(dc_tmp, tmp[0]);
+  for (n = 0; n < 16; n += 2) {
+    VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
+  }
+
+  return nz;
+}
+
+static int ReconstructIntra4(VP8EncIterator* const it,
+                             int16_t levels[16],
+                             const uint8_t* const src,
+                             uint8_t* const yuv_out,
+                             int mode) {
+  const VP8Encoder* const enc = it->enc_;
+  const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  int nz = 0;
+  int16_t tmp[16];
+
+  VP8FTransform(src, ref, tmp);
+  if (DO_TRELLIS_I4 && it->do_trellis_) {
+    const int x = it->i4_ & 3, y = it->i4_ >> 2;
+    const int ctx = it->top_nz_[x] + it->left_nz_[y];
+    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
+                              dqm->lambda_trellis_i4_);
+  } else {
+    nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
+  }
+  VP8ITransform(ref, tmp, yuv_out, 0);
+  return nz;
+}
+
+static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
+                         uint8_t* const yuv_out, int mode) {
+  const VP8Encoder* const enc = it->enc_;
+  const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
+  const uint8_t* const src = it->yuv_in_ + U_OFF;
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  int nz = 0;
+  int n;
+  int16_t tmp[8][16];
+
+  for (n = 0; n < 8; ++n) {
+    VP8FTransform(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
+  }
+  if (DO_TRELLIS_UV && it->do_trellis_) {
+    int ch, x, y;
+    for (ch = 0, n = 0; ch <= 2; ch += 2) {
+      for (y = 0; y < 2; ++y) {
+        for (x = 0; x < 2; ++x, ++n) {
+          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+          const int non_zero =
+              TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
+                                   &dqm->uv_, dqm->lambda_trellis_uv_);
+          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
+          nz |= non_zero << n;
+        }
+      }
+    }
+  } else {
+    for (n = 0; n < 8; ++n) {
+      nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
+    }
+  }
+
+  for (n = 0; n < 8; n += 2) {
+    VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
+  }
+  return (nz << 16);
+}
+
+//------------------------------------------------------------------------------
+// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
+// Pick the mode is lower RD-cost = Rate + lambda * Distortion.
+
+static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
+  // We look at the first three AC coefficients to determine what is the average
+  // delta between each sub-4x4 block.
+  const int v0 = abs(DCs[1]);
+  const int v1 = abs(DCs[4]);
+  const int v2 = abs(DCs[5]);
+  int max_v = (v0 > v1) ? v1 : v0;
+  max_v = (v2 > max_v) ? v2 : max_v;
+  if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
+}
+
+static void SwapPtr(uint8_t** a, uint8_t** b) {
+  uint8_t* const tmp = *a;
+  *a = *b;
+  *b = tmp;
+}
+
+static void SwapOut(VP8EncIterator* const it) {
+  SwapPtr(&it->yuv_out_, &it->yuv_out2_);
+}
+
+static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
+  score_t score = 0;
+  while (num_blocks-- > 0) {      // TODO(skal): refine positional scoring?
+    int i;
+    for (i = 1; i < 16; ++i) {    // omit DC, we're only interested in AC
+      score += (levels[i] != 0);
+      if (score > thresh) return 0;
+    }
+    levels += 16;
+  }
+  return 1;
+}
+
+static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
+  const int kNumBlocks = 16;
+  VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
+  const int lambda = dqm->lambda_i16_;
+  const int tlambda = dqm->tlambda_;
+  const uint8_t* const src = it->yuv_in_ + Y_OFF;
+  VP8ModeScore rd16;
+  int mode;
+
+  rd->mode_i16 = -1;
+  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+    uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF;  // scratch buffer
+    int nz;
+
+    // Reconstruct
+    nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
+
+    // Measure RD-score
+    rd16.D = VP8SSE16x16(src, tmp_dst);
+    rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
+            : 0;
+    rd16.H = VP8FixedCostsI16[mode];
+    rd16.R = VP8GetCostLuma16(it, &rd16);
+    if (mode > 0 &&
+        IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
+      // penalty to avoid flat area to be mispredicted by complex mode
+      rd16.R += FLATNESS_PENALTY * kNumBlocks;
+    }
+
+    // Since we always examine Intra16 first, we can overwrite *rd directly.
+    SetRDScore(lambda, &rd16);
+    if (mode == 0 || rd16.score < rd->score) {
+      CopyScore(rd, &rd16);
+      rd->mode_i16 = mode;
+      rd->nz = nz;
+      memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
+      memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
+      SwapOut(it);
+    }
+  }
+  SetRDScore(dqm->lambda_mode_, rd);   // finalize score for mode decision.
+  VP8SetIntra16Mode(it, rd->mode_i16);
+
+  // we have a blocky macroblock (only DCs are non-zero) with fairly high
+  // distortion, record max delta so we can later adjust the minimal filtering
+  // strength needed to smooth these blocks out.
+  if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
+    StoreMaxDelta(dqm, rd->y_dc_levels);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+// return the cost array corresponding to the surrounding prediction modes.
+static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
+                                     const uint8_t modes[16]) {
+  const int preds_w = it->enc_->preds_w_;
+  const int x = (it->i4_ & 3), y = it->i4_ >> 2;
+  const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
+  const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
+  return VP8FixedCostsI4[top][left];
+}
+
+static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
+  const VP8Encoder* const enc = it->enc_;
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  const int lambda = dqm->lambda_i4_;
+  const int tlambda = dqm->tlambda_;
+  const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
+  uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
+  int total_header_bits = 0;
+  VP8ModeScore rd_best;
+
+  if (enc->max_i4_header_bits_ == 0) {
+    return 0;
+  }
+
+  InitScore(&rd_best);
+  rd_best.H = 211;  // '211' is the value of VP8BitCost(0, 145)
+  SetRDScore(dqm->lambda_mode_, &rd_best);
+  VP8IteratorStartI4(it);
+  do {
+    const int kNumBlocks = 1;
+    VP8ModeScore rd_i4;
+    int mode;
+    int best_mode = -1;
+    const uint8_t* const src = src0 + VP8Scan[it->i4_];
+    const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
+    uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
+    uint8_t* tmp_dst = it->yuv_p_ + I4TMP;    // scratch buffer.
+
+    InitScore(&rd_i4);
+    VP8MakeIntra4Preds(it);
+    for (mode = 0; mode < NUM_BMODES; ++mode) {
+      VP8ModeScore rd_tmp;
+      int16_t tmp_levels[16];
+
+      // Reconstruct
+      rd_tmp.nz =
+          ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
+
+      // Compute RD-score
+      rd_tmp.D = VP8SSE4x4(src, tmp_dst);
+      rd_tmp.SD =
+          tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
+                  : 0;
+      rd_tmp.H = mode_costs[mode];
+      rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
+      if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
+        rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
+      }
+
+      SetRDScore(lambda, &rd_tmp);
+      if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
+        CopyScore(&rd_i4, &rd_tmp);
+        best_mode = mode;
+        SwapPtr(&tmp_dst, &best_block);
+        memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
+      }
+    }
+    SetRDScore(dqm->lambda_mode_, &rd_i4);
+    AddScore(&rd_best, &rd_i4);
+    if (rd_best.score >= rd->score) {
+      return 0;
+    }
+    total_header_bits += (int)rd_i4.H;   // <- equal to mode_costs[best_mode];
+    if (total_header_bits > enc->max_i4_header_bits_) {
+      return 0;
+    }
+    // Copy selected samples if not in the right place already.
+    if (best_block != best_blocks + VP8Scan[it->i4_]) {
+      VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
+    }
+    rd->modes_i4[it->i4_] = best_mode;
+    it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
+  } while (VP8IteratorRotateI4(it, best_blocks));
+
+  // finalize state
+  CopyScore(rd, &rd_best);
+  VP8SetIntra4Mode(it, rd->modes_i4);
+  SwapOut(it);
+  memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
+  return 1;   // select intra4x4 over intra16x16
+}
+
+//------------------------------------------------------------------------------
+
+static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
+  const int kNumBlocks = 8;
+  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
+  const int lambda = dqm->lambda_uv_;
+  const uint8_t* const src = it->yuv_in_ + U_OFF;
+  uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF;  // scratch buffer
+  uint8_t* const dst0 = it->yuv_out_ + U_OFF;
+  VP8ModeScore rd_best;
+  int mode;
+
+  rd->mode_uv = -1;
+  InitScore(&rd_best);
+  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+    VP8ModeScore rd_uv;
+
+    // Reconstruct
+    rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
+
+    // Compute RD-score
+    rd_uv.D  = VP8SSE16x8(src, tmp_dst);
+    rd_uv.SD = 0;    // TODO: should we call TDisto? it tends to flatten areas.
+    rd_uv.H  = VP8FixedCostsUV[mode];
+    rd_uv.R  = VP8GetCostUV(it, &rd_uv);
+    if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
+      rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
+    }
+
+    SetRDScore(lambda, &rd_uv);
+    if (mode == 0 || rd_uv.score < rd_best.score) {
+      CopyScore(&rd_best, &rd_uv);
+      rd->mode_uv = mode;
+      memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
+      memcpy(dst0, tmp_dst, UV_SIZE);   //  TODO: SwapUVOut() ?
+    }
+  }
+  VP8SetIntraUVMode(it, rd->mode_uv);
+  AddScore(rd, &rd_best);
+}
+
+//------------------------------------------------------------------------------
+// Final reconstruction and quantization.
+
+static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
+  const VP8Encoder* const enc = it->enc_;
+  const int is_i16 = (it->mb_->type_ == 1);
+  int nz = 0;
+
+  if (is_i16) {
+    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
+  } else {
+    VP8IteratorStartI4(it);
+    do {
+      const int mode =
+          it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
+      const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+      uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
+      VP8MakeIntra4Preds(it);
+      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
+                              src, dst, mode) << it->i4_;
+    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
+  }
+
+  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
+  rd->nz = nz;
+}
+
+// Refine intra16/intra4 sub-modes based on distortion only (not rate).
+static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
+  const int is_i16 = (it->mb_->type_ == 1);
+  score_t best_score = MAX_COST;
+
+  if (try_both_i4_i16 || is_i16) {
+    int mode;
+    int best_mode = -1;
+    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+      const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
+      const uint8_t* const src = it->yuv_in_ + Y_OFF;
+      const score_t score = VP8SSE16x16(src, ref);
+      if (score < best_score) {
+        best_mode = mode;
+        best_score = score;
+      }
+    }
+    VP8SetIntra16Mode(it, best_mode);
+  }
+  if (try_both_i4_i16 || !is_i16) {
+    uint8_t modes_i4[16];
+    // We don't evaluate the rate here, but just account for it through a
+    // constant penalty (i4 mode usually needs more bits compared to i16).
+    score_t score_i4 = (score_t)I4_PENALTY;
+
+    VP8IteratorStartI4(it);
+    do {
+      int mode;
+      int best_sub_mode = -1;
+      score_t best_sub_score = MAX_COST;
+      const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+
+      // TODO(skal): we don't really need the prediction pixels here,
+      // but just the distortion against 'src'.
+      VP8MakeIntra4Preds(it);
+      for (mode = 0; mode < NUM_BMODES; ++mode) {
+        const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
+        const score_t score = VP8SSE4x4(src, ref);
+        if (score < best_sub_score) {
+          best_sub_mode = mode;
+          best_sub_score = score;
+        }
+      }
+      modes_i4[it->i4_] = best_sub_mode;
+      score_i4 += best_sub_score;
+      if (score_i4 >= best_score) break;
+    } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
+    if (score_i4 < best_score) {
+      VP8SetIntra4Mode(it, modes_i4);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
+                VP8RDLevel rd_opt) {
+  int is_skipped;
+  const int method = it->enc_->method_;
+
+  InitScore(rd);
+
+  // We can perform predictions for Luma16x16 and Chroma8x8 already.
+  // Luma4x4 predictions needs to be done as-we-go.
+  VP8MakeLuma16Preds(it);
+  VP8MakeChroma8Preds(it);
+
+  if (rd_opt > RD_OPT_NONE) {
+    it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
+    PickBestIntra16(it, rd);
+    if (method >= 2) {
+      PickBestIntra4(it, rd);
+    }
+    PickBestUV(it, rd);
+    if (rd_opt == RD_OPT_TRELLIS) {   // finish off with trellis-optim now
+      it->do_trellis_ = 1;
+      SimpleQuantize(it, rd);
+    }
+  } else {
+    // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
+    // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
+    DistoRefine(it, (method >= 2));
+    SimpleQuantize(it, rd);
+  }
+  is_skipped = (rd->nz == 0);
+  VP8SetSkip(it, is_skipped);
+  return is_skipped;
+}
+
diff --git a/src/main/jni/libwebp/enc/syntax.c b/src/main/jni/libwebp/enc/syntax.c
new file mode 100644
index 000000000..d1ff0a53c
--- /dev/null
+++ b/src/main/jni/libwebp/enc/syntax.c
@@ -0,0 +1,383 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Header syntax writing
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"  // RIFF constants
+#include "../webp/mux_types.h"         // ALPHA_FLAG
+#include "./vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Helper functions
+
+static int IsVP8XNeeded(const VP8Encoder* const enc) {
+  return !!enc->has_alpha_;  // Currently the only case when VP8X is needed.
+                             // This could change in the future.
+}
+
+static int PutPaddingByte(const WebPPicture* const pic) {
+  const uint8_t pad_byte[1] = { 0 };
+  return !!pic->writer(pad_byte, 1, pic);
+}
+
+//------------------------------------------------------------------------------
+// Writers for header's various pieces (in order of appearance)
+
+static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc,
+                                       size_t riff_size) {
+  const WebPPicture* const pic = enc->pic_;
+  uint8_t riff[RIFF_HEADER_SIZE] = {
+    'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P'
+  };
+  assert(riff_size == (uint32_t)riff_size);
+  PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
+  if (!pic->writer(riff, sizeof(riff), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
+  const WebPPicture* const pic = enc->pic_;
+  uint8_t vp8x[CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE] = {
+    'V', 'P', '8', 'X'
+  };
+  uint32_t flags = 0;
+
+  assert(IsVP8XNeeded(enc));
+  assert(pic->width >= 1 && pic->height >= 1);
+  assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
+
+  if (enc->has_alpha_) {
+    flags |= ALPHA_FLAG;
+  }
+
+  PutLE32(vp8x + TAG_SIZE,              VP8X_CHUNK_SIZE);
+  PutLE32(vp8x + CHUNK_HEADER_SIZE,     flags);
+  PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
+  PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
+  if (!pic->writer(vp8x, sizeof(vp8x), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutAlphaChunk(const VP8Encoder* const enc) {
+  const WebPPicture* const pic = enc->pic_;
+  uint8_t alpha_chunk_hdr[CHUNK_HEADER_SIZE] = {
+    'A', 'L', 'P', 'H'
+  };
+
+  assert(enc->has_alpha_);
+
+  // Alpha chunk header.
+  PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size_);
+  if (!pic->writer(alpha_chunk_hdr, sizeof(alpha_chunk_hdr), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+
+  // Alpha chunk data.
+  if (!pic->writer(enc->alpha_data_, enc->alpha_data_size_, pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+
+  // Padding.
+  if ((enc->alpha_data_size_ & 1) && !PutPaddingByte(pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8Header(const WebPPicture* const pic,
+                                      size_t vp8_size) {
+  uint8_t vp8_chunk_hdr[CHUNK_HEADER_SIZE] = {
+    'V', 'P', '8', ' '
+  };
+  assert(vp8_size == (uint32_t)vp8_size);
+  PutLE32(vp8_chunk_hdr + TAG_SIZE, (uint32_t)vp8_size);
+  if (!pic->writer(vp8_chunk_hdr, sizeof(vp8_chunk_hdr), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8FrameHeader(const WebPPicture* const pic,
+                                           int profile, size_t size0) {
+  uint8_t vp8_frm_hdr[VP8_FRAME_HEADER_SIZE];
+  uint32_t bits;
+
+  if (size0 >= VP8_MAX_PARTITION0_SIZE) {  // partition #0 is too big to fit
+    return VP8_ENC_ERROR_PARTITION0_OVERFLOW;
+  }
+
+  // Paragraph 9.1.
+  bits = 0                         // keyframe (1b)
+       | (profile << 1)            // profile (3b)
+       | (1 << 4)                  // visible (1b)
+       | ((uint32_t)size0 << 5);   // partition length (19b)
+  vp8_frm_hdr[0] = (bits >>  0) & 0xff;
+  vp8_frm_hdr[1] = (bits >>  8) & 0xff;
+  vp8_frm_hdr[2] = (bits >> 16) & 0xff;
+  // signature
+  vp8_frm_hdr[3] = (VP8_SIGNATURE >> 16) & 0xff;
+  vp8_frm_hdr[4] = (VP8_SIGNATURE >>  8) & 0xff;
+  vp8_frm_hdr[5] = (VP8_SIGNATURE >>  0) & 0xff;
+  // dimensions
+  vp8_frm_hdr[6] = pic->width & 0xff;
+  vp8_frm_hdr[7] = pic->width >> 8;
+  vp8_frm_hdr[8] = pic->height & 0xff;
+  vp8_frm_hdr[9] = pic->height >> 8;
+
+  if (!pic->writer(vp8_frm_hdr, sizeof(vp8_frm_hdr), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+// WebP Headers.
+static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
+                          size_t vp8_size, size_t riff_size) {
+  WebPPicture* const pic = enc->pic_;
+  WebPEncodingError err = VP8_ENC_OK;
+
+  // RIFF header.
+  err = PutRIFFHeader(enc, riff_size);
+  if (err != VP8_ENC_OK) goto Error;
+
+  // VP8X.
+  if (IsVP8XNeeded(enc)) {
+    err = PutVP8XHeader(enc);
+    if (err != VP8_ENC_OK) goto Error;
+  }
+
+  // Alpha.
+  if (enc->has_alpha_) {
+    err = PutAlphaChunk(enc);
+    if (err != VP8_ENC_OK) goto Error;
+  }
+
+  // VP8 header.
+  err = PutVP8Header(pic, vp8_size);
+  if (err != VP8_ENC_OK) goto Error;
+
+  // VP8 frame header.
+  err = PutVP8FrameHeader(pic, enc->profile_, size0);
+  if (err != VP8_ENC_OK) goto Error;
+
+  // All OK.
+  return 1;
+
+  // Error.
+ Error:
+  return WebPEncodingSetError(pic, err);
+}
+
+// Segmentation header
+static void PutSegmentHeader(VP8BitWriter* const bw,
+                             const VP8Encoder* const enc) {
+  const VP8SegmentHeader* const hdr = &enc->segment_hdr_;
+  const VP8Proba* const proba = &enc->proba_;
+  if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) {
+    // We always 'update' the quant and filter strength values
+    const int update_data = 1;
+    int s;
+    VP8PutBitUniform(bw, hdr->update_map_);
+    if (VP8PutBitUniform(bw, update_data)) {
+      // we always use absolute values, not relative ones
+      VP8PutBitUniform(bw, 1);   // (segment_feature_mode = 1. Paragraph 9.3.)
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        VP8PutSignedValue(bw, enc->dqm_[s].quant_, 7);
+      }
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        VP8PutSignedValue(bw, enc->dqm_[s].fstrength_, 6);
+      }
+    }
+    if (hdr->update_map_) {
+      for (s = 0; s < 3; ++s) {
+        if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) {
+          VP8PutValue(bw, proba->segments_[s], 8);
+        }
+      }
+    }
+  }
+}
+
+// Filtering parameters header
+static void PutFilterHeader(VP8BitWriter* const bw,
+                            const VP8FilterHeader* const hdr) {
+  const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0);
+  VP8PutBitUniform(bw, hdr->simple_);
+  VP8PutValue(bw, hdr->level_, 6);
+  VP8PutValue(bw, hdr->sharpness_, 3);
+  if (VP8PutBitUniform(bw, use_lf_delta)) {
+    // '0' is the default value for i4x4_lf_delta_ at frame #0.
+    const int need_update = (hdr->i4x4_lf_delta_ != 0);
+    if (VP8PutBitUniform(bw, need_update)) {
+      // we don't use ref_lf_delta => emit four 0 bits
+      VP8PutValue(bw, 0, 4);
+      // we use mode_lf_delta for i4x4
+      VP8PutSignedValue(bw, hdr->i4x4_lf_delta_, 6);
+      VP8PutValue(bw, 0, 3);    // all others unused
+    }
+  }
+}
+
+// Nominal quantization parameters
+static void PutQuant(VP8BitWriter* const bw,
+                     const VP8Encoder* const enc) {
+  VP8PutValue(bw, enc->base_quant_, 7);
+  VP8PutSignedValue(bw, enc->dq_y1_dc_, 4);
+  VP8PutSignedValue(bw, enc->dq_y2_dc_, 4);
+  VP8PutSignedValue(bw, enc->dq_y2_ac_, 4);
+  VP8PutSignedValue(bw, enc->dq_uv_dc_, 4);
+  VP8PutSignedValue(bw, enc->dq_uv_ac_, 4);
+}
+
+// Partition sizes
+static int EmitPartitionsSize(const VP8Encoder* const enc,
+                              WebPPicture* const pic) {
+  uint8_t buf[3 * (MAX_NUM_PARTITIONS - 1)];
+  int p;
+  for (p = 0; p < enc->num_parts_ - 1; ++p) {
+    const size_t part_size = VP8BitWriterSize(enc->parts_ + p);
+    if (part_size >= VP8_MAX_PARTITION_SIZE) {
+      return WebPEncodingSetError(pic, VP8_ENC_ERROR_PARTITION_OVERFLOW);
+    }
+    buf[3 * p + 0] = (part_size >>  0) & 0xff;
+    buf[3 * p + 1] = (part_size >>  8) & 0xff;
+    buf[3 * p + 2] = (part_size >> 16) & 0xff;
+  }
+  return p ? pic->writer(buf, 3 * p, pic) : 1;
+}
+
+//------------------------------------------------------------------------------
+
+static int GeneratePartition0(VP8Encoder* const enc) {
+  VP8BitWriter* const bw = &enc->bw_;
+  const int mb_size = enc->mb_w_ * enc->mb_h_;
+  uint64_t pos1, pos2, pos3;
+
+  pos1 = VP8BitWriterPos(bw);
+  if (!VP8BitWriterInit(bw, mb_size * 7 / 8)) {        // ~7 bits per macroblock
+    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
+  VP8PutBitUniform(bw, 0);   // colorspace
+  VP8PutBitUniform(bw, 0);   // clamp type
+
+  PutSegmentHeader(bw, enc);
+  PutFilterHeader(bw, &enc->filter_hdr_);
+  VP8PutValue(bw, enc->num_parts_ == 8 ? 3 :
+                  enc->num_parts_ == 4 ? 2 :
+                  enc->num_parts_ == 2 ? 1 : 0, 2);
+  PutQuant(bw, enc);
+  VP8PutBitUniform(bw, 0);   // no proba update
+  VP8WriteProbas(bw, &enc->proba_);
+  pos2 = VP8BitWriterPos(bw);
+  VP8CodeIntraModes(enc);
+  VP8BitWriterFinish(bw);
+
+  pos3 = VP8BitWriterPos(bw);
+
+  if (enc->pic_->stats) {
+    enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
+    enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
+    enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_;
+  }
+  if (bw->error_) {
+    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
+  return 1;
+}
+
+void VP8EncFreeBitWriters(VP8Encoder* const enc) {
+  int p;
+  VP8BitWriterWipeOut(&enc->bw_);
+  for (p = 0; p < enc->num_parts_; ++p) {
+    VP8BitWriterWipeOut(enc->parts_ + p);
+  }
+}
+
+int VP8EncWrite(VP8Encoder* const enc) {
+  WebPPicture* const pic = enc->pic_;
+  VP8BitWriter* const bw = &enc->bw_;
+  const int task_percent = 19;
+  const int percent_per_part = task_percent / enc->num_parts_;
+  const int final_percent = enc->percent_ + task_percent;
+  int ok = 0;
+  size_t vp8_size, pad, riff_size;
+  int p;
+
+  // Partition #0 with header and partition sizes
+  ok = GeneratePartition0(enc);
+  if (!ok) return 0;
+
+  // Compute VP8 size
+  vp8_size = VP8_FRAME_HEADER_SIZE +
+             VP8BitWriterSize(bw) +
+             3 * (enc->num_parts_ - 1);
+  for (p = 0; p < enc->num_parts_; ++p) {
+    vp8_size += VP8BitWriterSize(enc->parts_ + p);
+  }
+  pad = vp8_size & 1;
+  vp8_size += pad;
+
+  // Compute RIFF size
+  // At the minimum it is: "WEBPVP8 nnnn" + VP8 data size.
+  riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8_size;
+  if (IsVP8XNeeded(enc)) {  // Add size for: VP8X header + data.
+    riff_size += CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+  }
+  if (enc->has_alpha_) {  // Add size for: ALPH header + data.
+    const uint32_t padded_alpha_size = enc->alpha_data_size_ +
+                                       (enc->alpha_data_size_ & 1);
+    riff_size += CHUNK_HEADER_SIZE + padded_alpha_size;
+  }
+  // Sanity check.
+  if (riff_size > 0xfffffffeU) {
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG);
+  }
+
+  // Emit headers and partition #0
+  {
+    const uint8_t* const part0 = VP8BitWriterBuf(bw);
+    const size_t size0 = VP8BitWriterSize(bw);
+    ok = ok && PutWebPHeaders(enc, size0, vp8_size, riff_size)
+            && pic->writer(part0, size0, pic)
+            && EmitPartitionsSize(enc, pic);
+    VP8BitWriterWipeOut(bw);    // will free the internal buffer.
+  }
+
+  // Token partitions
+  for (p = 0; p < enc->num_parts_; ++p) {
+    const uint8_t* const buf = VP8BitWriterBuf(enc->parts_ + p);
+    const size_t size = VP8BitWriterSize(enc->parts_ + p);
+    if (size)
+      ok = ok && pic->writer(buf, size, pic);
+    VP8BitWriterWipeOut(enc->parts_ + p);    // will free the internal buffer.
+    ok = ok && WebPReportProgress(pic, enc->percent_ + percent_per_part,
+                                  &enc->percent_);
+  }
+
+  // Padding byte
+  if (ok && pad) {
+    ok = PutPaddingByte(pic);
+  }
+
+  enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
+  ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/enc/token.c b/src/main/jni/libwebp/enc/token.c
new file mode 100644
index 000000000..8af13a082
--- /dev/null
+++ b/src/main/jni/libwebp/enc/token.c
@@ -0,0 +1,286 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Paginated token buffer
+//
+//  A 'token' is a bit value associated with a probability, either fixed
+// or a later-to-be-determined after statistics have been collected.
+// For dynamic probability, we just record the slot id (idx) for the probability
+// value in the final probability array (uint8_t* probas in VP8EmitTokens).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "./cost.h"
+#include "./vp8enci.h"
+#include "../utils/utils.h"
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+// we use pages to reduce the number of memcpy()
+#define MIN_PAGE_SIZE 8192          // minimum number of token per page
+#define FIXED_PROBA_BIT (1u << 14)
+
+typedef uint16_t token_t;  // bit#15: bit
+                           // bit #14: constant proba or idx
+                           // bits 0..13: slot or constant proba
+struct VP8Tokens {
+  VP8Tokens* next_;        // pointer to next page
+};
+// Token data is located in memory just after the next_ field.
+// This macro is used to return their address and hide the trick.
+#define TOKEN_DATA(p) ((token_t*)&(p)[1])
+
+//------------------------------------------------------------------------------
+
+void VP8TBufferInit(VP8TBuffer* const b, int page_size) {
+  b->tokens_ = NULL;
+  b->pages_ = NULL;
+  b->last_page_ = &b->pages_;
+  b->left_ = 0;
+  b->page_size_ = (page_size < MIN_PAGE_SIZE) ? MIN_PAGE_SIZE : page_size;
+  b->error_ = 0;
+}
+
+void VP8TBufferClear(VP8TBuffer* const b) {
+  if (b != NULL) {
+    const VP8Tokens* p = b->pages_;
+    while (p != NULL) {
+      const VP8Tokens* const next = p->next_;
+      WebPSafeFree((void*)p);
+      p = next;
+    }
+    VP8TBufferInit(b, b->page_size_);
+  }
+}
+
+static int TBufferNewPage(VP8TBuffer* const b) {
+  VP8Tokens* page = NULL;
+  const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t);
+  if (!b->error_) {
+    page = (VP8Tokens*)WebPSafeMalloc(1ULL, size);
+  }
+  if (page == NULL) {
+    b->error_ = 1;
+    return 0;
+  }
+  page->next_ = NULL;
+
+  *b->last_page_ = page;
+  b->last_page_ = &page->next_;
+  b->left_ = b->page_size_;
+  b->tokens_ = TOKEN_DATA(page);
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#define TOKEN_ID(t, b, ctx, p) \
+    ((p) + NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
+
+static WEBP_INLINE int AddToken(VP8TBuffer* const b,
+                                int bit, uint32_t proba_idx) {
+  assert(proba_idx < FIXED_PROBA_BIT);
+  assert(bit == 0 || bit == 1);
+  if (b->left_ > 0 || TBufferNewPage(b)) {
+    const int slot = --b->left_;
+    b->tokens_[slot] = (bit << 15) | proba_idx;
+  }
+  return bit;
+}
+
+static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b,
+                                         int bit, int proba) {
+  assert(proba < 256);
+  assert(bit == 0 || bit == 1);
+  if (b->left_ > 0 || TBufferNewPage(b)) {
+    const int slot = --b->left_;
+    b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba;
+  }
+}
+
+int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
+                         const int16_t* const coeffs,
+                         VP8TBuffer* const tokens) {
+  int n = first;
+  uint32_t base_id = TOKEN_ID(coeff_type, n, ctx, 0);
+  if (!AddToken(tokens, last >= 0, base_id + 0)) {
+    return 0;
+  }
+
+  while (n < 16) {
+    const int c = coeffs[n++];
+    const int sign = c < 0;
+    int v = sign ? -c : c;
+    if (!AddToken(tokens, v != 0, base_id + 1)) {
+      ctx = 0;
+      base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
+      continue;
+    }
+    if (!AddToken(tokens, v > 1, base_id + 2)) {
+      ctx = 1;
+    } else {
+      if (!AddToken(tokens, v > 4, base_id + 3)) {
+        if (AddToken(tokens, v != 2, base_id + 4))
+          AddToken(tokens, v == 4, base_id + 5);
+      } else if (!AddToken(tokens, v > 10, base_id + 6)) {
+        if (!AddToken(tokens, v > 6, base_id + 7)) {
+          AddConstantToken(tokens, v == 6, 159);
+        } else {
+          AddConstantToken(tokens, v >= 9, 165);
+          AddConstantToken(tokens, !(v & 1), 145);
+        }
+      } else {
+        int mask;
+        const uint8_t* tab;
+        if (v < 3 + (8 << 1)) {          // VP8Cat3  (3b)
+          AddToken(tokens, 0, base_id + 8);
+          AddToken(tokens, 0, base_id + 9);
+          v -= 3 + (8 << 0);
+          mask = 1 << 2;
+          tab = VP8Cat3;
+        } else if (v < 3 + (8 << 2)) {   // VP8Cat4  (4b)
+          AddToken(tokens, 0, base_id + 8);
+          AddToken(tokens, 1, base_id + 9);
+          v -= 3 + (8 << 1);
+          mask = 1 << 3;
+          tab = VP8Cat4;
+        } else if (v < 3 + (8 << 3)) {   // VP8Cat5  (5b)
+          AddToken(tokens, 1, base_id + 8);
+          AddToken(tokens, 0, base_id + 10);
+          v -= 3 + (8 << 2);
+          mask = 1 << 4;
+          tab = VP8Cat5;
+        } else {                         // VP8Cat6 (11b)
+          AddToken(tokens, 1, base_id + 8);
+          AddToken(tokens, 1, base_id + 10);
+          v -= 3 + (8 << 3);
+          mask = 1 << 10;
+          tab = VP8Cat6;
+        }
+        while (mask) {
+          AddConstantToken(tokens, !!(v & mask), *tab++);
+          mask >>= 1;
+        }
+      }
+      ctx = 2;
+    }
+    AddConstantToken(tokens, sign, 128);
+    base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
+    if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) {
+      return 1;   // EOB
+    }
+  }
+  return 1;
+}
+
+#undef TOKEN_ID
+
+//------------------------------------------------------------------------------
+// This function works, but isn't currently used. Saved for later.
+
+#if 0
+
+static void Record(int bit, proba_t* const stats) {
+  proba_t p = *stats;
+  if (p >= 0xffff0000u) {               // an overflow is inbound.
+    p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
+  }
+  // record bit count (lower 16 bits) and increment total count (upper 16 bits).
+  p += 0x00010000u + bit;
+  *stats = p;
+}
+
+void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
+  const VP8Tokens* p = b->pages_;
+  while (p != NULL) {
+    const int N = (p->next_ == NULL) ? b->left_ : 0;
+    int n = MAX_NUM_TOKEN;
+    const token_t* const tokens = TOKEN_DATA(p);
+    while (n-- > N) {
+      const token_t token = tokens[n];
+      if (!(token & FIXED_PROBA_BIT)) {
+        Record((token >> 15) & 1, stats + (token & 0x3fffu));
+      }
+    }
+    p = p->next_;
+  }
+}
+
+#endif   // 0
+
+//------------------------------------------------------------------------------
+// Final coding pass, with known probabilities
+
+int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
+                  const uint8_t* const probas, int final_pass) {
+  const VP8Tokens* p = b->pages_;
+  (void)final_pass;
+  assert(!b->error_);
+  while (p != NULL) {
+    const VP8Tokens* const next = p->next_;
+    const int N = (next == NULL) ? b->left_ : 0;
+    int n = b->page_size_;
+    const token_t* const tokens = TOKEN_DATA(p);
+    while (n-- > N) {
+      const token_t token = tokens[n];
+      const int bit = (token >> 15) & 1;
+      if (token & FIXED_PROBA_BIT) {
+        VP8PutBit(bw, bit, token & 0xffu);  // constant proba
+      } else {
+        VP8PutBit(bw, bit, probas[token & 0x3fffu]);
+      }
+    }
+    if (final_pass) WebPSafeFree((void*)p);
+    p = next;
+  }
+  if (final_pass) b->pages_ = NULL;
+  return 1;
+}
+
+// Size estimation
+size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas) {
+  size_t size = 0;
+  const VP8Tokens* p = b->pages_;
+  assert(!b->error_);
+  while (p != NULL) {
+    const VP8Tokens* const next = p->next_;
+    const int N = (next == NULL) ? b->left_ : 0;
+    int n = b->page_size_;
+    const token_t* const tokens = TOKEN_DATA(p);
+    while (n-- > N) {
+      const token_t token = tokens[n];
+      const int bit = token & (1 << 15);
+      if (token & FIXED_PROBA_BIT) {
+        size += VP8BitCost(bit, token & 0xffu);
+      } else {
+        size += VP8BitCost(bit, probas[token & 0x3fffu]);
+      }
+    }
+    p = next;
+  }
+  return size;
+}
+
+//------------------------------------------------------------------------------
+
+#else     // DISABLE_TOKEN_BUFFER
+
+void VP8TBufferInit(VP8TBuffer* const b) {
+  (void)b;
+}
+void VP8TBufferClear(VP8TBuffer* const b) {
+  (void)b;
+}
+
+#endif    // !DISABLE_TOKEN_BUFFER
+
diff --git a/src/main/jni/libwebp/enc/tree.c b/src/main/jni/libwebp/enc/tree.c
new file mode 100644
index 000000000..e5d05e522
--- /dev/null
+++ b/src/main/jni/libwebp/enc/tree.c
@@ -0,0 +1,504 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Coding of token probabilities, intra modes and segments.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Default probabilities
+
+// Paragraph 13.5
+const uint8_t
+  VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+      { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+      { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+      { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+      { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+    },
+    { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+      { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+      { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+    },
+    { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+      { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+      { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+      { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+      { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+      { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+      { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
+      { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
+      { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
+    },
+    { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+      { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+      { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
+    },
+    { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+      { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+      { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
+    },
+    { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+      { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+      { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+      { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+      { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
+    },
+    { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+      { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+      { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+      { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+      { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
+    },
+    { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+      { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+      { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
+    },
+    { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+      { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+      { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+      { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+      { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
+    },
+    { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+      { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+      { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+      { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
+    },
+    { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+      { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+      { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
+    },
+    { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+      { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+      { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
+    },
+    { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+      { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+      { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
+    },
+    { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+      { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+      { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
+    },
+    { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+      { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+      { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+      { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+      { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  }
+};
+
+void VP8DefaultProbas(VP8Encoder* const enc) {
+  VP8Proba* const probas = &enc->proba_;
+  probas->use_skip_proba_ = 0;
+  memset(probas->segments_, 255u, sizeof(probas->segments_));
+  memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0));
+  // Note: we could hard-code the level_costs_ corresponding to VP8CoeffsProba0,
+  // but that's ~11k of static data. Better call VP8CalculateLevelCosts() later.
+  probas->dirty_ = 1;
+}
+
+// Paragraph 11.5.  900bytes.
+static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
+  { { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
+    { 152, 179, 64, 126, 170, 118, 46, 70, 95 },
+    { 175, 69, 143, 80, 85, 82, 72, 155, 103 },
+    { 56, 58, 10, 171, 218, 189, 17, 13, 152 },
+    { 114, 26, 17, 163, 44, 195, 21, 10, 173 },
+    { 121, 24, 80, 195, 26, 62, 44, 64, 85 },
+    { 144, 71, 10, 38, 171, 213, 144, 34, 26 },
+    { 170, 46, 55, 19, 136, 160, 33, 206, 71 },
+    { 63, 20, 8, 114, 114, 208, 12, 9, 226 },
+    { 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
+  { { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
+    { 72, 187, 100, 130, 157, 111, 32, 75, 80 },
+    { 66, 102, 167, 99, 74, 62, 40, 234, 128 },
+    { 41, 53, 9, 178, 241, 141, 26, 8, 107 },
+    { 74, 43, 26, 146, 73, 166, 49, 23, 157 },
+    { 65, 38, 105, 160, 51, 52, 31, 115, 128 },
+    { 104, 79, 12, 27, 217, 255, 87, 17, 7 },
+    { 87, 68, 71, 44, 114, 51, 15, 186, 23 },
+    { 47, 41, 14, 110, 182, 183, 21, 17, 194 },
+    { 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
+  { { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
+    { 43, 97, 183, 117, 85, 38, 35, 179, 61 },
+    { 39, 53, 200, 87, 26, 21, 43, 232, 171 },
+    { 56, 34, 51, 104, 114, 102, 29, 93, 77 },
+    { 39, 28, 85, 171, 58, 165, 90, 98, 64 },
+    { 34, 22, 116, 206, 23, 34, 43, 166, 73 },
+    { 107, 54, 32, 26, 51, 1, 81, 43, 31 },
+    { 68, 25, 106, 22, 64, 171, 36, 225, 114 },
+    { 34, 19, 21, 102, 132, 188, 16, 76, 124 },
+    { 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
+  { { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
+    { 60, 148, 31, 172, 219, 228, 21, 18, 111 },
+    { 112, 113, 77, 85, 179, 255, 38, 120, 114 },
+    { 40, 42, 1, 196, 245, 209, 10, 25, 109 },
+    { 88, 43, 29, 140, 166, 213, 37, 43, 154 },
+    { 61, 63, 30, 155, 67, 45, 68, 1, 209 },
+    { 100, 80, 8, 43, 154, 1, 51, 26, 71 },
+    { 142, 78, 78, 16, 255, 128, 34, 197, 171 },
+    { 41, 40, 5, 102, 211, 183, 4, 1, 221 },
+    { 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
+  { { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
+    { 67, 87, 58, 169, 82, 115, 26, 59, 179 },
+    { 63, 59, 90, 180, 59, 166, 93, 73, 154 },
+    { 40, 40, 21, 116, 143, 209, 34, 39, 175 },
+    { 47, 15, 16, 183, 34, 223, 49, 45, 183 },
+    { 46, 17, 33, 183, 6, 98, 15, 32, 183 },
+    { 57, 46, 22, 24, 128, 1, 54, 17, 37 },
+    { 65, 32, 73, 115, 28, 128, 23, 128, 205 },
+    { 40, 3, 9, 115, 51, 192, 18, 6, 223 },
+    { 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
+  { { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
+    { 64, 90, 70, 205, 40, 41, 23, 26, 57 },
+    { 54, 57, 112, 184, 5, 41, 38, 166, 213 },
+    { 30, 34, 26, 133, 152, 116, 10, 32, 134 },
+    { 39, 19, 53, 221, 26, 114, 32, 73, 255 },
+    { 31, 9, 65, 234, 2, 15, 1, 118, 73 },
+    { 75, 32, 12, 51, 192, 255, 160, 43, 51 },
+    { 88, 31, 35, 67, 102, 85, 55, 186, 85 },
+    { 56, 21, 23, 111, 59, 205, 45, 37, 192 },
+    { 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
+  { { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
+    { 95, 84, 53, 89, 128, 100, 113, 101, 45 },
+    { 75, 79, 123, 47, 51, 128, 81, 171, 1 },
+    { 57, 17, 5, 71, 102, 57, 53, 41, 49 },
+    { 38, 33, 13, 121, 57, 73, 26, 1, 85 },
+    { 41, 10, 67, 138, 77, 110, 90, 47, 114 },
+    { 115, 21, 2, 10, 102, 255, 166, 23, 6 },
+    { 101, 29, 16, 10, 85, 128, 101, 196, 26 },
+    { 57, 18, 10, 102, 102, 213, 34, 20, 43 },
+    { 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
+  { { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
+    { 69, 60, 71, 38, 73, 119, 28, 222, 37 },
+    { 68, 45, 128, 34, 1, 47, 11, 245, 171 },
+    { 62, 17, 19, 70, 146, 85, 55, 62, 70 },
+    { 37, 43, 37, 154, 100, 163, 85, 160, 1 },
+    { 63, 9, 92, 136, 28, 64, 32, 201, 85 },
+    { 75, 15, 9, 9, 64, 255, 184, 119, 16 },
+    { 86, 6, 28, 5, 64, 255, 25, 248, 1 },
+    { 56, 8, 17, 132, 137, 255, 55, 116, 128 },
+    { 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
+  { { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
+    { 51, 103, 44, 131, 131, 123, 31, 6, 158 },
+    { 86, 40, 64, 135, 148, 224, 45, 183, 128 },
+    { 22, 26, 17, 131, 240, 154, 14, 1, 209 },
+    { 45, 16, 21, 91, 64, 222, 7, 1, 197 },
+    { 56, 21, 39, 155, 60, 138, 23, 102, 213 },
+    { 83, 12, 13, 54, 192, 255, 68, 47, 28 },
+    { 85, 26, 85, 85, 128, 128, 32, 146, 171 },
+    { 18, 11, 7, 63, 144, 171, 4, 4, 246 },
+    { 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
+  { { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
+    { 85, 126, 47, 87, 176, 51, 41, 20, 32 },
+    { 101, 75, 128, 139, 118, 146, 116, 128, 85 },
+    { 56, 41, 15, 176, 236, 85, 37, 9, 62 },
+    { 71, 30, 17, 119, 118, 255, 17, 18, 138 },
+    { 101, 38, 60, 138, 55, 70, 43, 26, 142 },
+    { 146, 36, 19, 30, 171, 255, 97, 27, 20 },
+    { 138, 45, 61, 62, 219, 1, 81, 188, 64 },
+    { 32, 41, 20, 117, 151, 142, 20, 21, 163 },
+    { 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
+};
+
+static int PutI4Mode(VP8BitWriter* const bw, int mode,
+                     const uint8_t* const prob) {
+  if (VP8PutBit(bw, mode != B_DC_PRED, prob[0])) {
+    if (VP8PutBit(bw, mode != B_TM_PRED, prob[1])) {
+      if (VP8PutBit(bw, mode != B_VE_PRED, prob[2])) {
+        if (!VP8PutBit(bw, mode >= B_LD_PRED, prob[3])) {
+          if (VP8PutBit(bw, mode != B_HE_PRED, prob[4])) {
+            VP8PutBit(bw, mode != B_RD_PRED, prob[5]);
+          }
+        } else {
+          if (VP8PutBit(bw, mode != B_LD_PRED, prob[6])) {
+            if (VP8PutBit(bw, mode != B_VL_PRED, prob[7])) {
+              VP8PutBit(bw, mode != B_HD_PRED, prob[8]);
+            }
+          }
+        }
+      }
+    }
+  }
+  return mode;
+}
+
+static void PutI16Mode(VP8BitWriter* const bw, int mode) {
+  if (VP8PutBit(bw, (mode == TM_PRED || mode == H_PRED), 156)) {
+    VP8PutBit(bw, mode == TM_PRED, 128);    // TM or HE
+  } else {
+    VP8PutBit(bw, mode == V_PRED, 163);     // VE or DC
+  }
+}
+
+static void PutUVMode(VP8BitWriter* const bw, int uv_mode) {
+  if (VP8PutBit(bw, uv_mode != DC_PRED, 142)) {
+    if (VP8PutBit(bw, uv_mode != V_PRED, 114)) {
+      VP8PutBit(bw, uv_mode != H_PRED, 183);    // else: TM_PRED
+    }
+  }
+}
+
+static void PutSegment(VP8BitWriter* const bw, int s, const uint8_t* p) {
+  if (VP8PutBit(bw, s >= 2, p[0])) p += 1;
+  VP8PutBit(bw, s & 1, p[1]);
+}
+
+void VP8CodeIntraModes(VP8Encoder* const enc) {
+  VP8BitWriter* const bw = &enc->bw_;
+  VP8EncIterator it;
+  VP8IteratorInit(enc, &it);
+  do {
+    const VP8MBInfo* const mb = it.mb_;
+    const uint8_t* preds = it.preds_;
+    if (enc->segment_hdr_.update_map_) {
+      PutSegment(bw, mb->segment_, enc->proba_.segments_);
+    }
+    if (enc->proba_.use_skip_proba_) {
+      VP8PutBit(bw, mb->skip_, enc->proba_.skip_proba_);
+    }
+    if (VP8PutBit(bw, (mb->type_ != 0), 145)) {  // i16x16
+      PutI16Mode(bw, preds[0]);
+    } else {
+      const int preds_w = enc->preds_w_;
+      const uint8_t* top_pred = preds - preds_w;
+      int x, y;
+      for (y = 0; y < 4; ++y) {
+        int left = preds[-1];
+        for (x = 0; x < 4; ++x) {
+          const uint8_t* const probas = kBModesProba[top_pred[x]][left];
+          left = PutI4Mode(bw, preds[x], probas);
+        }
+        top_pred = preds;
+        preds += preds_w;
+      }
+    }
+    PutUVMode(bw, mb->uv_mode_);
+  } while (VP8IteratorNext(&it));
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 13
+
+const uint8_t
+    VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+      { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  }
+};
+
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) {
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          const uint8_t p0 = probas->coeffs_[t][b][c][p];
+          const int update = (p0 != VP8CoeffsProba0[t][b][c][p]);
+          if (VP8PutBit(bw, update, VP8CoeffsUpdateProba[t][b][c][p])) {
+            VP8PutValue(bw, p0, 8);
+          }
+        }
+      }
+    }
+  }
+  if (VP8PutBitUniform(bw, probas->use_skip_proba_)) {
+    VP8PutValue(bw, probas->skip_proba_, 8);
+  }
+}
+
diff --git a/src/main/jni/libwebp/enc/vp8enci.h b/src/main/jni/libwebp/enc/vp8enci.h
new file mode 100644
index 000000000..10c8fb0a1
--- /dev/null
+++ b/src/main/jni/libwebp/enc/vp8enci.h
@@ -0,0 +1,582 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   WebP encoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_ENC_VP8ENCI_H_
+#define WEBP_ENC_VP8ENCI_H_
+
+#include <string.h>     // for memcpy()
+#include "../webp/encode.h"
+#include "../dsp/dsp.h"
+#include "../utils/bit_writer.h"
+#include "../utils/thread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Various defines and enums
+
+// version numbers
+#define ENC_MAJ_VERSION 0
+#define ENC_MIN_VERSION 4
+#define ENC_REV_VERSION 2
+
+// intra prediction modes
+enum { B_DC_PRED = 0,   // 4x4 modes
+       B_TM_PRED = 1,
+       B_VE_PRED = 2,
+       B_HE_PRED = 3,
+       B_RD_PRED = 4,
+       B_VR_PRED = 5,
+       B_LD_PRED = 6,
+       B_VL_PRED = 7,
+       B_HD_PRED = 8,
+       B_HU_PRED = 9,
+       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
+
+       // Luma16 or UV modes
+       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
+       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
+       NUM_PRED_MODES = 4
+     };
+
+enum { NUM_MB_SEGMENTS = 4,
+       MAX_NUM_PARTITIONS = 8,
+       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
+       NUM_BANDS = 8,
+       NUM_CTX = 3,
+       NUM_PROBAS = 11,
+       MAX_LF_LEVELS = 64,       // Maximum loop filter level
+       MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
+       MAX_LEVEL = 2047          // max level (note: max codable is 2047 + 67)
+     };
+
+typedef enum {   // Rate-distortion optimization levels
+  RD_OPT_NONE        = 0,  // no rd-opt
+  RD_OPT_BASIC       = 1,  // basic scoring (no trellis)
+  RD_OPT_TRELLIS     = 2,  // perform trellis-quant on the final decision only
+  RD_OPT_TRELLIS_ALL = 3   // trellis-quant for every scoring (much slower)
+} VP8RDLevel;
+
+// YUV-cache parameters. Cache is 16-pixels wide.
+// The original or reconstructed samples can be accessed using VP8Scan[]
+// The predicted blocks can be accessed using offsets to yuv_p_ and
+// the arrays VP8*ModeOffsets[];
+//         +----+      YUV Samples area. See VP8Scan[] for accessing the blocks.
+//  Y_OFF  |YYYY| <- original samples  ('yuv_in_')
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+//  U_OFF  |UUVV| V_OFF  (=U_OFF + 8)
+//         |UUVV|
+//         +----+
+//  Y_OFF  |YYYY| <- compressed/decoded samples  ('yuv_out_')
+//         |YYYY|    There are two buffers like this ('yuv_out_'/'yuv_out2_')
+//         |YYYY|
+//         |YYYY|
+//  U_OFF  |UUVV| V_OFF
+//         |UUVV|
+//          x2 (for yuv_out2_)
+//         +----+     Prediction area ('yuv_p_', size = PRED_SIZE)
+// I16DC16 |YYYY|  Intra16 predictions (16x16 block each)
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+// I16TM16 |YYYY|
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+// I16VE16 |YYYY|
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+// I16HE16 |YYYY|
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+//         +----+  Chroma U/V predictions (16x8 block each)
+// C8DC8   |UUVV|
+//         |UUVV|
+// C8TM8   |UUVV|
+//         |UUVV|
+// C8VE8   |UUVV|
+//         |UUVV|
+// C8HE8   |UUVV|
+//         |UUVV|
+//         +----+  Intra 4x4 predictions (4x4 block each)
+//         |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
+//         |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
+//         |YY..| I4HD4 I4HU4 I4TMP
+//         +----+
+#define BPS       16   // this is the common stride
+#define Y_SIZE   (BPS * 16)
+#define UV_SIZE  (BPS * 8)
+#define YUV_SIZE (Y_SIZE + UV_SIZE)
+#define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
+#define Y_OFF    (0)
+#define U_OFF    (Y_SIZE)
+#define V_OFF    (U_OFF + 8)
+#define ALIGN_CST 15
+#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
+
+extern const int VP8Scan[16];           // in quant.c
+extern const int VP8UVModeOffsets[4];   // in analyze.c
+extern const int VP8I16ModeOffsets[4];
+extern const int VP8I4ModeOffsets[NUM_BMODES];
+
+// Layout of prediction blocks
+// intra 16x16
+#define I16DC16 (0 * 16 * BPS)
+#define I16TM16 (1 * 16 * BPS)
+#define I16VE16 (2 * 16 * BPS)
+#define I16HE16 (3 * 16 * BPS)
+// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
+#define C8DC8 (4 * 16 * BPS)
+#define C8TM8 (4 * 16 * BPS + 8 * BPS)
+#define C8VE8 (5 * 16 * BPS)
+#define C8HE8 (5 * 16 * BPS + 8 * BPS)
+// intra 4x4
+#define I4DC4 (6 * 16 * BPS +  0)
+#define I4TM4 (6 * 16 * BPS +  4)
+#define I4VE4 (6 * 16 * BPS +  8)
+#define I4HE4 (6 * 16 * BPS + 12)
+#define I4RD4 (6 * 16 * BPS + 4 * BPS +  0)
+#define I4VR4 (6 * 16 * BPS + 4 * BPS +  4)
+#define I4LD4 (6 * 16 * BPS + 4 * BPS +  8)
+#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
+#define I4HD4 (6 * 16 * BPS + 8 * BPS +  0)
+#define I4HU4 (6 * 16 * BPS + 8 * BPS +  4)
+#define I4TMP (6 * 16 * BPS + 8 * BPS +  8)
+
+typedef int64_t score_t;     // type used for scores, rate, distortion
+// Note that MAX_COST is not the maximum allowed by sizeof(score_t),
+// in order to allow overflowing computations.
+#define MAX_COST ((score_t)0x7fffffffffffffLL)
+
+#define QFIX 17
+#define BIAS(b)  ((b) << (QFIX - 8))
+// Fun fact: this is the _only_ line where we're actually being lossy and
+// discarding bits.
+static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
+  return (int)((n * iQ + B) >> QFIX);
+}
+
+// size of histogram used by CollectHistogram.
+#define MAX_COEFF_THRESH   31
+typedef struct VP8Histogram VP8Histogram;
+struct VP8Histogram {
+  // TODO(skal): we only need to store the max_value and last_non_zero actually.
+  int distribution[MAX_COEFF_THRESH + 1];
+};
+
+// Uncomment the following to remove token-buffer code:
+// #define DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// Headers
+
+typedef uint32_t proba_t;   // 16b + 16b
+typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
+typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
+typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
+typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS];  // filter stats
+
+typedef struct VP8Encoder VP8Encoder;
+
+// segment features
+typedef struct {
+  int num_segments_;      // Actual number of segments. 1 segment only = unused.
+  int update_map_;        // whether to update the segment map or not.
+                          // must be 0 if there's only 1 segment.
+  int size_;              // bit-cost for transmitting the segment map
+} VP8SegmentHeader;
+
+// Struct collecting all frame-persistent probabilities.
+typedef struct {
+  uint8_t segments_[3];     // probabilities for segment tree
+  uint8_t skip_proba_;      // final probability of being skipped.
+  ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];      // 1056 bytes
+  StatsArray stats_[NUM_TYPES][NUM_BANDS];       // 4224 bytes
+  CostArray level_cost_[NUM_TYPES][NUM_BANDS];   // 13056 bytes
+  int dirty_;               // if true, need to call VP8CalculateLevelCosts()
+  int use_skip_proba_;      // Note: we always use skip_proba for now.
+  int nb_skip_;             // number of skipped blocks
+} VP8Proba;
+
+// Filter parameters. Not actually used in the code (we don't perform
+// the in-loop filtering), but filled from user's config
+typedef struct {
+  int simple_;             // filtering type: 0=complex, 1=simple
+  int level_;              // base filter level [0..63]
+  int sharpness_;          // [0..7]
+  int i4x4_lf_delta_;      // delta filter level for i4x4 relative to i16x16
+} VP8FilterHeader;
+
+//------------------------------------------------------------------------------
+// Informations about the macroblocks.
+
+typedef struct {
+  // block type
+  unsigned int type_:2;     // 0=i4x4, 1=i16x16
+  unsigned int uv_mode_:2;
+  unsigned int skip_:1;
+  unsigned int segment_:2;
+  uint8_t alpha_;      // quantization-susceptibility
+} VP8MBInfo;
+
+typedef struct VP8Matrix {
+  uint16_t q_[16];        // quantizer steps
+  uint16_t iq_[16];       // reciprocals, fixed point.
+  uint32_t bias_[16];     // rounding bias
+  uint32_t zthresh_[16];  // value below which a coefficient is zeroed
+  uint16_t sharpen_[16];  // frequency boosters for slight sharpening
+} VP8Matrix;
+
+typedef struct {
+  VP8Matrix y1_, y2_, uv_;  // quantization matrices
+  int alpha_;      // quant-susceptibility, range [-127,127]. Zero is neutral.
+                   // Lower values indicate a lower risk of blurriness.
+  int beta_;       // filter-susceptibility, range [0,255].
+  int quant_;      // final segment quantizer.
+  int fstrength_;  // final in-loop filtering strength
+  int max_edge_;   // max edge delta (for filtering strength)
+  int min_disto_;  // minimum distortion required to trigger filtering record
+  // reactivities
+  int lambda_i16_, lambda_i4_, lambda_uv_;
+  int lambda_mode_, lambda_trellis_, tlambda_;
+  int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
+} VP8SegmentInfo;
+
+// Handy transient struct to accumulate score and info during RD-optimization
+// and mode evaluation.
+typedef struct {
+  score_t D, SD;              // Distortion, spectral distortion
+  score_t H, R, score;        // header bits, rate, score.
+  int16_t y_dc_levels[16];    // Quantized levels for luma-DC, luma-AC, chroma.
+  int16_t y_ac_levels[16][16];
+  int16_t uv_levels[4 + 4][16];
+  int mode_i16;               // mode number for intra16 prediction
+  uint8_t modes_i4[16];       // mode numbers for intra4 predictions
+  int mode_uv;                // mode number of chroma prediction
+  uint32_t nz;                // non-zero blocks
+} VP8ModeScore;
+
+// Iterator structure to iterate through macroblocks, pointing to the
+// right neighbouring data (samples, predictions, contexts, ...)
+typedef struct {
+  int x_, y_;                      // current macroblock
+  int y_stride_, uv_stride_;       // respective strides
+  uint8_t*      yuv_in_;           // input samples
+  uint8_t*      yuv_out_;          // output samples
+  uint8_t*      yuv_out2_;         // secondary buffer swapped with yuv_out_.
+  uint8_t*      yuv_p_;            // scratch buffer for prediction
+  VP8Encoder*   enc_;              // back-pointer
+  VP8MBInfo*    mb_;               // current macroblock
+  VP8BitWriter* bw_;               // current bit-writer
+  uint8_t*      preds_;            // intra mode predictors (4x4 blocks)
+  uint32_t*     nz_;               // non-zero pattern
+  uint8_t       i4_boundary_[37];  // 32+5 boundary samples needed by intra4x4
+  uint8_t*      i4_top_;           // pointer to the current top boundary sample
+  int           i4_;               // current intra4x4 mode being tested
+  int           top_nz_[9];        // top-non-zero context.
+  int           left_nz_[9];       // left-non-zero. left_nz[8] is independent.
+  uint64_t      bit_count_[4][3];  // bit counters for coded levels.
+  uint64_t      luma_bits_;        // macroblock bit-cost for luma
+  uint64_t      uv_bits_;          // macroblock bit-cost for chroma
+  LFStats*      lf_stats_;         // filter stats (borrowed from enc_)
+  int           do_trellis_;       // if true, perform extra level optimisation
+  int           count_down_;       // number of mb still to be processed
+  int           count_down0_;      // starting counter value (for progress)
+  int           percent0_;         // saved initial progress percent
+
+  uint8_t* y_left_;    // left luma samples (addressable from index -1 to 15).
+  uint8_t* u_left_;    // left u samples (addressable from index -1 to 7)
+  uint8_t* v_left_;    // left v samples (addressable from index -1 to 7)
+
+  uint8_t* y_top_;     // top luma samples at position 'x_'
+  uint8_t* uv_top_;    // top u/v samples at position 'x_', packed as 16 bytes
+
+  // memory for storing y/u/v_left_ and yuv_in_/out_*
+  uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + ALIGN_CST];     // memory for *_left_
+  uint8_t yuv_mem_[3 * YUV_SIZE + PRED_SIZE + ALIGN_CST];  // memory for yuv_*
+} VP8EncIterator;
+
+  // in iterator.c
+// must be called first
+void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
+// restart a scan
+void VP8IteratorReset(VP8EncIterator* const it);
+// reset iterator position to row 'y'
+void VP8IteratorSetRow(VP8EncIterator* const it, int y);
+// set count down (=number of iterations to go)
+void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down);
+// return true if iteration is finished
+int VP8IteratorIsDone(const VP8EncIterator* const it);
+// Import uncompressed samples from source.
+// If tmp_32 is not NULL, import boundary samples too.
+// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32);
+// export decimated samples
+void VP8IteratorExport(const VP8EncIterator* const it);
+// go to next macroblock. Returns false if not finished.
+int VP8IteratorNext(VP8EncIterator* const it);
+// save the yuv_out_ boundary values to top_/left_ arrays for next iterations.
+void VP8IteratorSaveBoundary(VP8EncIterator* const it);
+// Report progression based on macroblock rows. Return 0 for user-abort request.
+int VP8IteratorProgress(const VP8EncIterator* const it,
+                        int final_delta_percent);
+// Intra4x4 iterations
+void VP8IteratorStartI4(VP8EncIterator* const it);
+// returns true if not done.
+int VP8IteratorRotateI4(VP8EncIterator* const it,
+                        const uint8_t* const yuv_out);
+
+// Non-zero context setup/teardown
+void VP8IteratorNzToBytes(VP8EncIterator* const it);
+void VP8IteratorBytesToNz(VP8EncIterator* const it);
+
+// Helper functions to set mode properties
+void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
+void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
+void VP8SetSkip(const VP8EncIterator* const it, int skip);
+void VP8SetSegment(const VP8EncIterator* const it, int segment);
+
+//------------------------------------------------------------------------------
+// Paginated token buffer
+
+typedef struct VP8Tokens VP8Tokens;  // struct details in token.c
+
+typedef struct {
+#if !defined(DISABLE_TOKEN_BUFFER)
+  VP8Tokens* pages_;        // first page
+  VP8Tokens** last_page_;   // last page
+  uint16_t* tokens_;        // set to (*last_page_)->tokens_
+  int left_;                // how many free tokens left before the page is full
+  int page_size_;           // number of tokens per page
+#endif
+  int error_;         // true in case of malloc error
+} VP8TBuffer;
+
+// initialize an empty buffer
+void VP8TBufferInit(VP8TBuffer* const b, int page_size);
+void VP8TBufferClear(VP8TBuffer* const b);   // de-allocate pages memory
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+// Finalizes bitstream when probabilities are known.
+// Deletes the allocated token memory if final_pass is true.
+int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
+                  const uint8_t* const probas, int final_pass);
+
+// record the coding of coefficients without knowing the probabilities yet
+int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
+                         const int16_t* const coeffs,
+                         VP8TBuffer* const tokens);
+
+// Estimate the final coded size given a set of 'probas'.
+size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);
+
+// unused for now
+void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
+
+#endif  // !DISABLE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// VP8Encoder
+
+struct VP8Encoder {
+  const WebPConfig* config_;    // user configuration and parameters
+  WebPPicture* pic_;            // input / output picture
+
+  // headers
+  VP8FilterHeader   filter_hdr_;     // filtering information
+  VP8SegmentHeader  segment_hdr_;    // segment information
+
+  int profile_;                      // VP8's profile, deduced from Config.
+
+  // dimension, in macroblock units.
+  int mb_w_, mb_h_;
+  int preds_w_;   // stride of the *preds_ prediction plane (=4*mb_w + 1)
+
+  // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
+  int num_parts_;
+
+  // per-partition boolean decoders.
+  VP8BitWriter bw_;                         // part0
+  VP8BitWriter parts_[MAX_NUM_PARTITIONS];  // token partitions
+  VP8TBuffer tokens_;                       // token buffer
+
+  int percent_;                             // for progress
+
+  // transparency blob
+  int has_alpha_;
+  uint8_t* alpha_data_;       // non-NULL if transparency is present
+  uint32_t alpha_data_size_;
+  WebPWorker alpha_worker_;
+
+  // quantization info (one set of DC/AC dequant factor per segment)
+  VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
+  int base_quant_;                 // nominal quantizer value. Only used
+                                   // for relative coding of segments' quant.
+  int alpha_;                      // global susceptibility (<=> complexity)
+  int uv_alpha_;                   // U/V quantization susceptibility
+  // global offset of quantizers, shared by all segments
+  int dq_y1_dc_;
+  int dq_y2_dc_, dq_y2_ac_;
+  int dq_uv_dc_, dq_uv_ac_;
+
+  // probabilities and statistics
+  VP8Proba proba_;
+  uint64_t sse_[4];        // sum of Y/U/V/A squared errors for all macroblocks
+  uint64_t sse_count_;     // pixel count for the sse_[] stats
+  int      coded_size_;
+  int      residual_bytes_[3][4];
+  int      block_count_[3];
+
+  // quality/speed settings
+  int method_;               // 0=fastest, 6=best/slowest.
+  VP8RDLevel rd_opt_level_;  // Deduced from method_.
+  int max_i4_header_bits_;   // partition #0 safeness factor
+  int thread_level_;         // derived from config->thread_level
+  int do_search_;            // derived from config->target_XXX
+  int use_tokens_;           // if true, use token buffer
+
+  // Memory
+  VP8MBInfo* mb_info_;   // contextual macroblock infos (mb_w_ + 1)
+  uint8_t*   preds_;     // predictions modes: (4*mb_w+1) * (4*mb_h+1)
+  uint32_t*  nz_;        // non-zero bit context: mb_w+1
+  uint8_t*   y_top_;     // top luma samples.
+  uint8_t*   uv_top_;    // top u/v samples.
+                         // U and V are packed into 16 bytes (8 U + 8 V)
+  LFStats*   lf_stats_;  // autofilter stats (if NULL, autofilter is off)
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+  // in tree.c
+extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+extern const uint8_t
+    VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+// Reset the token probabilities to their initial (default) values
+void VP8DefaultProbas(VP8Encoder* const enc);
+// Write the token probabilities
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
+// Writes the partition #0 modes (that is: all intra modes)
+void VP8CodeIntraModes(VP8Encoder* const enc);
+
+  // in syntax.c
+// Generates the final bitstream by coding the partition0 and headers,
+// and appending an assembly of all the pre-coded token partitions.
+// Return true if everything is ok.
+int VP8EncWrite(VP8Encoder* const enc);
+// Release memory allocated for bit-writing in VP8EncLoop & seq.
+void VP8EncFreeBitWriters(VP8Encoder* const enc);
+
+  // in frame.c
+extern const uint8_t VP8EncBands[16 + 1];
+extern const uint8_t VP8Cat3[];
+extern const uint8_t VP8Cat4[];
+extern const uint8_t VP8Cat5[];
+extern const uint8_t VP8Cat6[];
+
+// Form all the four Intra16x16 predictions in the yuv_p_ cache
+void VP8MakeLuma16Preds(const VP8EncIterator* const it);
+// Form all the four Chroma8x8 predictions in the yuv_p_ cache
+void VP8MakeChroma8Preds(const VP8EncIterator* const it);
+// Form all the ten Intra4x4 predictions in the yuv_p_ cache
+// for the 4x4 block it->i4_
+void VP8MakeIntra4Preds(const VP8EncIterator* const it);
+// Rate calculation
+int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
+int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
+int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
+// Main coding calls
+int VP8EncLoop(VP8Encoder* const enc);
+int VP8EncTokenLoop(VP8Encoder* const enc);
+
+  // in webpenc.c
+// Assign an error code to a picture. Return false for convenience.
+int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
+int WebPReportProgress(const WebPPicture* const pic,
+                       int percent, int* const percent_store);
+
+  // in analysis.c
+// Main analysis loop. Decides the segmentations and complexity.
+// Assigns a first guess for Intra16 and uvmode_ prediction modes.
+int VP8EncAnalyze(VP8Encoder* const enc);
+
+  // in quant.c
+// Sets up segment's quantization values, base_quant_ and filter strengths.
+void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
+// Pick best modes and fills the levels. Returns true if skipped.
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
+                VP8RDLevel rd_opt);
+
+  // in alpha.c
+void VP8EncInitAlpha(VP8Encoder* const enc);    // initialize alpha compression
+int VP8EncStartAlpha(VP8Encoder* const enc);    // start alpha coding process
+int VP8EncFinishAlpha(VP8Encoder* const enc);   // finalize compressed data
+int VP8EncDeleteAlpha(VP8Encoder* const enc);   // delete compressed data
+
+  // in filter.c
+
+// SSIM utils
+typedef struct {
+  double w, xm, ym, xxm, xym, yym;
+} DistoStats;
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst);
+void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
+                            const uint8_t* src2, int stride2,
+                            int W, int H, DistoStats* const stats);
+double VP8SSIMGet(const DistoStats* const stats);
+double VP8SSIMGetSquaredError(const DistoStats* const stats);
+
+// autofilter
+void VP8InitFilter(VP8EncIterator* const it);
+void VP8StoreFilterStats(VP8EncIterator* const it);
+void VP8AdjustFilterStrength(VP8EncIterator* const it);
+
+// returns the approximate filtering strength needed to smooth a edge
+// step of 'delta', given a sharpness parameter 'sharpness'.
+int VP8FilterStrengthFromDelta(int sharpness, int delta);
+
+  // misc utils for picture_*.c:
+
+// Remove reference to the ARGB/YUVA buffer (doesn't free anything).
+void WebPPictureResetBuffers(WebPPicture* const picture);
+
+// Allocates ARGB buffer of given dimension (previous one is always free'd).
+// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
+// out-of-memory).
+int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
+
+// Allocates YUVA buffer of given dimension (previous one is always free'd).
+// Uses picture->csp to determine whether an alpha buffer is needed.
+// Preserves the ARGB buffer.
+// Returns false in case of error (invalid param, out-of-memory).
+int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
+
+//------------------------------------------------------------------------------
+
+#if WEBP_ENCODER_ABI_VERSION <= 0x0203
+void WebPMemoryWriterClear(WebPMemoryWriter* writer);
+#endif
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_ENC_VP8ENCI_H_ */
diff --git a/src/main/jni/libwebp/enc/vp8l.c b/src/main/jni/libwebp/enc/vp8l.c
new file mode 100644
index 000000000..891dd01bf
--- /dev/null
+++ b/src/main/jni/libwebp/enc/vp8l.c
@@ -0,0 +1,1244 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// main entry for the lossless encoder.
+//
+// Author: Vikas Arora (vikaas.arora@gmail.com)
+//
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "./backward_references.h"
+#include "./vp8enci.h"
+#include "./vp8li.h"
+#include "../dsp/lossless.h"
+#include "../utils/bit_writer.h"
+#include "../utils/huffman_encode.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+#define PALETTE_KEY_RIGHT_SHIFT   22  // Key for 1K buffer.
+#define MAX_HUFF_IMAGE_SIZE       (16 * 1024 * 1024)
+#define MAX_COLORS_FOR_GRAPH      64
+
+// -----------------------------------------------------------------------------
+// Palette
+
+static int CompareColors(const void* p1, const void* p2) {
+  const uint32_t a = *(const uint32_t*)p1;
+  const uint32_t b = *(const uint32_t*)p2;
+  assert(a != b);
+  return (a < b) ? -1 : 1;
+}
+
+// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
+// creates a palette and returns true, else returns false.
+static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
+                                   uint32_t palette[MAX_PALETTE_SIZE],
+                                   int* const palette_size) {
+  int i, x, y, key;
+  int num_colors = 0;
+  uint8_t in_use[MAX_PALETTE_SIZE * 4] = { 0 };
+  uint32_t colors[MAX_PALETTE_SIZE * 4];
+  static const uint32_t kHashMul = 0x1e35a7bd;
+  const uint32_t* argb = pic->argb;
+  const int width = pic->width;
+  const int height = pic->height;
+  uint32_t last_pix = ~argb[0];   // so we're sure that last_pix != argb[0]
+
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      if (argb[x] == last_pix) {
+        continue;
+      }
+      last_pix = argb[x];
+      key = (kHashMul * last_pix) >> PALETTE_KEY_RIGHT_SHIFT;
+      while (1) {
+        if (!in_use[key]) {
+          colors[key] = last_pix;
+          in_use[key] = 1;
+          ++num_colors;
+          if (num_colors > MAX_PALETTE_SIZE) {
+            return 0;
+          }
+          break;
+        } else if (colors[key] == last_pix) {
+          // The color is already there.
+          break;
+        } else {
+          // Some other color sits there.
+          // Do linear conflict resolution.
+          ++key;
+          key &= (MAX_PALETTE_SIZE * 4 - 1);  // key mask for 1K buffer.
+        }
+      }
+    }
+    argb += pic->argb_stride;
+  }
+
+  // TODO(skal): could we reuse in_use[] to speed up EncodePalette()?
+  num_colors = 0;
+  for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) {
+    if (in_use[i]) {
+      palette[num_colors] = colors[i];
+      ++num_colors;
+    }
+  }
+
+  qsort(palette, num_colors, sizeof(*palette), CompareColors);
+  *palette_size = num_colors;
+  return 1;
+}
+
+static int AnalyzeEntropy(const uint32_t* argb,
+                          int width, int height, int argb_stride,
+                          double* const nonpredicted_bits,
+                          double* const predicted_bits) {
+  int x, y;
+  const uint32_t* last_line = NULL;
+  uint32_t last_pix = argb[0];    // so we're sure that pix_diff == 0
+
+  VP8LHistogramSet* const histo_set = VP8LAllocateHistogramSet(2, 0);
+  if (histo_set == NULL) return 0;
+
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      const uint32_t pix = argb[x];
+      const uint32_t pix_diff = VP8LSubPixels(pix, last_pix);
+      if (pix_diff == 0) continue;
+      if (last_line != NULL && pix == last_line[x]) {
+        continue;
+      }
+      last_pix = pix;
+      {
+        const PixOrCopy pix_token = PixOrCopyCreateLiteral(pix);
+        const PixOrCopy pix_diff_token = PixOrCopyCreateLiteral(pix_diff);
+        VP8LHistogramAddSinglePixOrCopy(histo_set->histograms[0], &pix_token);
+        VP8LHistogramAddSinglePixOrCopy(histo_set->histograms[1],
+                                        &pix_diff_token);
+      }
+    }
+    last_line = argb;
+    argb += argb_stride;
+  }
+  *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(histo_set->histograms[0]);
+  *predicted_bits = VP8LHistogramEstimateBitsBulk(histo_set->histograms[1]);
+  VP8LFreeHistogramSet(histo_set);
+  return 1;
+}
+
+static int AnalyzeAndInit(VP8LEncoder* const enc, WebPImageHint image_hint) {
+  const WebPPicture* const pic = enc->pic_;
+  const int width = pic->width;
+  const int height = pic->height;
+  const int pix_cnt = width * height;
+  // we round the block size up, so we're guaranteed to have
+  // at max MAX_REFS_BLOCK_PER_IMAGE blocks used:
+  int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1;
+  assert(pic != NULL && pic->argb != NULL);
+
+  enc->use_palette_ =
+      AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_);
+
+  if (image_hint == WEBP_HINT_GRAPH) {
+    if (enc->use_palette_ && enc->palette_size_ < MAX_COLORS_FOR_GRAPH) {
+      enc->use_palette_ = 0;
+    }
+  }
+
+  if (!enc->use_palette_) {
+    if (image_hint == WEBP_HINT_PHOTO) {
+      enc->use_predict_ = 1;
+      enc->use_cross_color_ = 1;
+    } else {
+      double non_pred_entropy, pred_entropy;
+      if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride,
+                          &non_pred_entropy, &pred_entropy)) {
+        return 0;
+      }
+      if (pred_entropy < 0.95 * non_pred_entropy) {
+        enc->use_predict_ = 1;
+        enc->use_cross_color_ = 1;
+      }
+    }
+  }
+  if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0;
+
+  // palette-friendly input typically uses less literals
+  //  -> reduce block size a bit
+  if (enc->use_palette_) refs_block_size /= 2;
+  VP8LBackwardRefsInit(&enc->refs_[0], refs_block_size);
+  VP8LBackwardRefsInit(&enc->refs_[1], refs_block_size);
+
+  return 1;
+}
+
+// Returns false in case of memory error.
+static int GetHuffBitLengthsAndCodes(
+    const VP8LHistogramSet* const histogram_image,
+    HuffmanTreeCode* const huffman_codes) {
+  int i, k;
+  int ok = 0;
+  uint64_t total_length_size = 0;
+  uint8_t* mem_buf = NULL;
+  const int histogram_image_size = histogram_image->size;
+  int max_num_symbols = 0;
+  uint8_t* buf_rle = NULL;
+  HuffmanTree* huff_tree = NULL;
+
+  // Iterate over all histograms and get the aggregate number of codes used.
+  for (i = 0; i < histogram_image_size; ++i) {
+    const VP8LHistogram* const histo = histogram_image->histograms[i];
+    HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+    for (k = 0; k < 5; ++k) {
+      const int num_symbols =
+          (k == 0) ? VP8LHistogramNumCodes(histo->palette_code_bits_) :
+          (k == 4) ? NUM_DISTANCE_CODES : 256;
+      codes[k].num_symbols = num_symbols;
+      total_length_size += num_symbols;
+    }
+  }
+
+  // Allocate and Set Huffman codes.
+  {
+    uint16_t* codes;
+    uint8_t* lengths;
+    mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size,
+                                       sizeof(*lengths) + sizeof(*codes));
+    if (mem_buf == NULL) goto End;
+
+    codes = (uint16_t*)mem_buf;
+    lengths = (uint8_t*)&codes[total_length_size];
+    for (i = 0; i < 5 * histogram_image_size; ++i) {
+      const int bit_length = huffman_codes[i].num_symbols;
+      huffman_codes[i].codes = codes;
+      huffman_codes[i].code_lengths = lengths;
+      codes += bit_length;
+      lengths += bit_length;
+      if (max_num_symbols < bit_length) {
+        max_num_symbols = bit_length;
+      }
+    }
+  }
+
+  buf_rle = (uint8_t*)WebPSafeMalloc(1ULL, max_num_symbols);
+  huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * max_num_symbols,
+                                           sizeof(*huff_tree));
+  if (buf_rle == NULL || huff_tree == NULL) goto End;
+
+  // Create Huffman trees.
+  for (i = 0; i < histogram_image_size; ++i) {
+    HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+    VP8LHistogram* const histo = histogram_image->histograms[i];
+    VP8LCreateHuffmanTree(histo->literal_, 15, buf_rle, huff_tree, codes + 0);
+    VP8LCreateHuffmanTree(histo->red_, 15, buf_rle, huff_tree, codes + 1);
+    VP8LCreateHuffmanTree(histo->blue_, 15, buf_rle, huff_tree, codes + 2);
+    VP8LCreateHuffmanTree(histo->alpha_, 15, buf_rle, huff_tree, codes + 3);
+    VP8LCreateHuffmanTree(histo->distance_, 15, buf_rle, huff_tree, codes + 4);
+  }
+  ok = 1;
+ End:
+  WebPSafeFree(huff_tree);
+  WebPSafeFree(buf_rle);
+  if (!ok) {
+    WebPSafeFree(mem_buf);
+    memset(huffman_codes, 0, 5 * histogram_image_size * sizeof(*huffman_codes));
+  }
+  return ok;
+}
+
+static void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+    VP8LBitWriter* const bw, const uint8_t* code_length_bitdepth) {
+  // RFC 1951 will calm you down if you are worried about this funny sequence.
+  // This sequence is tuned from that, but more weighted for lower symbol count,
+  // and more spiking histograms.
+  static const uint8_t kStorageOrder[CODE_LENGTH_CODES] = {
+    17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+  };
+  int i;
+  // Throw away trailing zeros:
+  int codes_to_store = CODE_LENGTH_CODES;
+  for (; codes_to_store > 4; --codes_to_store) {
+    if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+      break;
+    }
+  }
+  VP8LWriteBits(bw, 4, codes_to_store - 4);
+  for (i = 0; i < codes_to_store; ++i) {
+    VP8LWriteBits(bw, 3, code_length_bitdepth[kStorageOrder[i]]);
+  }
+}
+
+static void ClearHuffmanTreeIfOnlyOneSymbol(
+    HuffmanTreeCode* const huffman_code) {
+  int k;
+  int count = 0;
+  for (k = 0; k < huffman_code->num_symbols; ++k) {
+    if (huffman_code->code_lengths[k] != 0) {
+      ++count;
+      if (count > 1) return;
+    }
+  }
+  for (k = 0; k < huffman_code->num_symbols; ++k) {
+    huffman_code->code_lengths[k] = 0;
+    huffman_code->codes[k] = 0;
+  }
+}
+
+static void StoreHuffmanTreeToBitMask(
+    VP8LBitWriter* const bw,
+    const HuffmanTreeToken* const tokens, const int num_tokens,
+    const HuffmanTreeCode* const huffman_code) {
+  int i;
+  for (i = 0; i < num_tokens; ++i) {
+    const int ix = tokens[i].code;
+    const int extra_bits = tokens[i].extra_bits;
+    VP8LWriteBits(bw, huffman_code->code_lengths[ix], huffman_code->codes[ix]);
+    switch (ix) {
+      case 16:
+        VP8LWriteBits(bw, 2, extra_bits);
+        break;
+      case 17:
+        VP8LWriteBits(bw, 3, extra_bits);
+        break;
+      case 18:
+        VP8LWriteBits(bw, 7, extra_bits);
+        break;
+    }
+  }
+}
+
+// 'huff_tree' and 'tokens' are pre-alloacted buffers.
+static void StoreFullHuffmanCode(VP8LBitWriter* const bw,
+                                 HuffmanTree* const huff_tree,
+                                 HuffmanTreeToken* const tokens,
+                                 const HuffmanTreeCode* const tree) {
+  uint8_t code_length_bitdepth[CODE_LENGTH_CODES] = { 0 };
+  uint16_t code_length_bitdepth_symbols[CODE_LENGTH_CODES] = { 0 };
+  const int max_tokens = tree->num_symbols;
+  int num_tokens;
+  HuffmanTreeCode huffman_code;
+  huffman_code.num_symbols = CODE_LENGTH_CODES;
+  huffman_code.code_lengths = code_length_bitdepth;
+  huffman_code.codes = code_length_bitdepth_symbols;
+
+  VP8LWriteBits(bw, 1, 0);
+  num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens);
+  {
+    uint32_t histogram[CODE_LENGTH_CODES] = { 0 };
+    uint8_t buf_rle[CODE_LENGTH_CODES] = { 0 };
+    int i;
+    for (i = 0; i < num_tokens; ++i) {
+      ++histogram[tokens[i].code];
+    }
+
+    VP8LCreateHuffmanTree(histogram, 7, buf_rle, huff_tree, &huffman_code);
+  }
+
+  StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth);
+  ClearHuffmanTreeIfOnlyOneSymbol(&huffman_code);
+  {
+    int trailing_zero_bits = 0;
+    int trimmed_length = num_tokens;
+    int write_trimmed_length;
+    int length;
+    int i = num_tokens;
+    while (i-- > 0) {
+      const int ix = tokens[i].code;
+      if (ix == 0 || ix == 17 || ix == 18) {
+        --trimmed_length;   // discount trailing zeros
+        trailing_zero_bits += code_length_bitdepth[ix];
+        if (ix == 17) {
+          trailing_zero_bits += 3;
+        } else if (ix == 18) {
+          trailing_zero_bits += 7;
+        }
+      } else {
+        break;
+      }
+    }
+    write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12);
+    length = write_trimmed_length ? trimmed_length : num_tokens;
+    VP8LWriteBits(bw, 1, write_trimmed_length);
+    if (write_trimmed_length) {
+      const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1);
+      const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
+      VP8LWriteBits(bw, 3, nbitpairs - 1);
+      assert(trimmed_length >= 2);
+      VP8LWriteBits(bw, nbitpairs * 2, trimmed_length - 2);
+    }
+    StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code);
+  }
+}
+
+// 'huff_tree' and 'tokens' are pre-alloacted buffers.
+static void StoreHuffmanCode(VP8LBitWriter* const bw,
+                             HuffmanTree* const huff_tree,
+                             HuffmanTreeToken* const tokens,
+                             const HuffmanTreeCode* const huffman_code) {
+  int i;
+  int count = 0;
+  int symbols[2] = { 0, 0 };
+  const int kMaxBits = 8;
+  const int kMaxSymbol = 1 << kMaxBits;
+
+  // Check whether it's a small tree.
+  for (i = 0; i < huffman_code->num_symbols && count < 3; ++i) {
+    if (huffman_code->code_lengths[i] != 0) {
+      if (count < 2) symbols[count] = i;
+      ++count;
+    }
+  }
+
+  if (count == 0) {   // emit minimal tree for empty cases
+    // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
+    VP8LWriteBits(bw, 4, 0x01);
+  } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) {
+    VP8LWriteBits(bw, 1, 1);  // Small tree marker to encode 1 or 2 symbols.
+    VP8LWriteBits(bw, 1, count - 1);
+    if (symbols[0] <= 1) {
+      VP8LWriteBits(bw, 1, 0);  // Code bit for small (1 bit) symbol value.
+      VP8LWriteBits(bw, 1, symbols[0]);
+    } else {
+      VP8LWriteBits(bw, 1, 1);
+      VP8LWriteBits(bw, 8, symbols[0]);
+    }
+    if (count == 2) {
+      VP8LWriteBits(bw, 8, symbols[1]);
+    }
+  } else {
+    StoreFullHuffmanCode(bw, huff_tree, tokens, huffman_code);
+  }
+}
+
+static void WriteHuffmanCode(VP8LBitWriter* const bw,
+                             const HuffmanTreeCode* const code,
+                             int code_index) {
+  const int depth = code->code_lengths[code_index];
+  const int symbol = code->codes[code_index];
+  VP8LWriteBits(bw, depth, symbol);
+}
+
+static WebPEncodingError StoreImageToBitMask(
+    VP8LBitWriter* const bw, int width, int histo_bits,
+    VP8LBackwardRefs* const refs,
+    const uint16_t* histogram_symbols,
+    const HuffmanTreeCode* const huffman_codes) {
+  // x and y trace the position in the image.
+  int x = 0;
+  int y = 0;
+  const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  while (VP8LRefsCursorOk(&c)) {
+    const PixOrCopy* const v = c.cur_pos;
+    const int histogram_ix = histogram_symbols[histo_bits ?
+                                               (y >> histo_bits) * histo_xsize +
+                                               (x >> histo_bits) : 0];
+    const HuffmanTreeCode* const codes = huffman_codes + 5 * histogram_ix;
+    if (PixOrCopyIsCacheIdx(v)) {
+      const int code = PixOrCopyCacheIdx(v);
+      const int literal_ix = 256 + NUM_LENGTH_CODES + code;
+      WriteHuffmanCode(bw, codes, literal_ix);
+    } else if (PixOrCopyIsLiteral(v)) {
+      static const int order[] = { 1, 2, 0, 3 };
+      int k;
+      for (k = 0; k < 4; ++k) {
+        const int code = PixOrCopyLiteral(v, order[k]);
+        WriteHuffmanCode(bw, codes + k, code);
+      }
+    } else {
+      int bits, n_bits;
+      int code, distance;
+
+      VP8LPrefixEncode(v->len, &code, &n_bits, &bits);
+      WriteHuffmanCode(bw, codes, 256 + code);
+      VP8LWriteBits(bw, n_bits, bits);
+
+      distance = PixOrCopyDistance(v);
+      VP8LPrefixEncode(distance, &code, &n_bits, &bits);
+      WriteHuffmanCode(bw, codes + 4, code);
+      VP8LWriteBits(bw, n_bits, bits);
+    }
+    x += PixOrCopyLength(v);
+    while (x >= width) {
+      x -= width;
+      ++y;
+    }
+    VP8LRefsCursorNext(&c);
+  }
+  return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK;
+}
+
+// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31
+static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
+                                              const uint32_t* const argb,
+                                              VP8LHashChain* const hash_chain,
+                                              VP8LBackwardRefs refs_array[2],
+                                              int width, int height,
+                                              int quality) {
+  int i;
+  int max_tokens = 0;
+  WebPEncodingError err = VP8_ENC_OK;
+  VP8LBackwardRefs* refs;
+  HuffmanTreeToken* tokens = NULL;
+  HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } };
+  const uint16_t histogram_symbols[1] = { 0 };    // only one tree, one symbol
+  VP8LHistogramSet* const histogram_image = VP8LAllocateHistogramSet(1, 0);
+  HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
+        3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
+  if (histogram_image == NULL || huff_tree == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  // Calculate backward references from ARGB image.
+  refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, 1,
+                                   hash_chain, refs_array);
+  if (refs == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+  // Build histogram image and symbols from backward references.
+  VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]);
+
+  // Create Huffman bit lengths and codes for each histogram image.
+  assert(histogram_image->size == 1);
+  if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  // No color cache, no Huffman image.
+  VP8LWriteBits(bw, 1, 0);
+
+  // Find maximum number of symbols for the huffman tree-set.
+  for (i = 0; i < 5; ++i) {
+    HuffmanTreeCode* const codes = &huffman_codes[i];
+    if (max_tokens < codes->num_symbols) {
+      max_tokens = codes->num_symbols;
+    }
+  }
+
+  tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
+  if (tokens == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  // Store Huffman codes.
+  for (i = 0; i < 5; ++i) {
+    HuffmanTreeCode* const codes = &huffman_codes[i];
+    StoreHuffmanCode(bw, huff_tree, tokens, codes);
+    ClearHuffmanTreeIfOnlyOneSymbol(codes);
+  }
+
+  // Store actual literals.
+  err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols,
+                            huffman_codes);
+
+ Error:
+  WebPSafeFree(tokens);
+  WebPSafeFree(huff_tree);
+  VP8LFreeHistogramSet(histogram_image);
+  WebPSafeFree(huffman_codes[0].codes);
+  return err;
+}
+
+static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
+                                             const uint32_t* const argb,
+                                             VP8LHashChain* const hash_chain,
+                                             VP8LBackwardRefs refs_array[2],
+                                             int width, int height, int quality,
+                                             int cache_bits,
+                                             int histogram_bits) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const int use_2d_locality = 1;
+  const int use_color_cache = (cache_bits > 0);
+  const uint32_t histogram_image_xysize =
+      VP8LSubSampleSize(width, histogram_bits) *
+      VP8LSubSampleSize(height, histogram_bits);
+  VP8LHistogramSet* histogram_image =
+      VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits);
+  int histogram_image_size = 0;
+  size_t bit_array_size = 0;
+  HuffmanTree* huff_tree = NULL;
+  HuffmanTreeToken* tokens = NULL;
+  HuffmanTreeCode* huffman_codes = NULL;
+  VP8LBackwardRefs refs;
+  VP8LBackwardRefs* best_refs;
+  uint16_t* const histogram_symbols =
+      (uint16_t*)WebPSafeMalloc(histogram_image_xysize,
+                                sizeof(*histogram_symbols));
+  assert(histogram_bits >= MIN_HUFFMAN_BITS);
+  assert(histogram_bits <= MAX_HUFFMAN_BITS);
+
+  VP8LBackwardRefsInit(&refs, refs_array[0].block_size_);
+  if (histogram_image == NULL || histogram_symbols == NULL) {
+    VP8LFreeHistogramSet(histogram_image);
+    WebPSafeFree(histogram_symbols);
+    return 0;
+  }
+
+  // 'best_refs' is the reference to the best backward refs and points to one
+  // of refs_array[0] or refs_array[1].
+  // Calculate backward references from ARGB image.
+  best_refs = VP8LGetBackwardReferences(width, height, argb, quality,
+                                        cache_bits, use_2d_locality,
+                                        hash_chain, refs_array);
+  if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) {
+    goto Error;
+  }
+  // Build histogram image and symbols from backward references.
+  if (!VP8LGetHistoImageSymbols(width, height, &refs,
+                                quality, histogram_bits, cache_bits,
+                                histogram_image,
+                                histogram_symbols)) {
+    goto Error;
+  }
+  // Create Huffman bit lengths and codes for each histogram image.
+  histogram_image_size = histogram_image->size;
+  bit_array_size = 5 * histogram_image_size;
+  huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
+                                                   sizeof(*huffman_codes));
+  if (huffman_codes == NULL ||
+      !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+    goto Error;
+  }
+  // Free combined histograms.
+  VP8LFreeHistogramSet(histogram_image);
+  histogram_image = NULL;
+
+  // Color Cache parameters.
+  VP8LWriteBits(bw, 1, use_color_cache);
+  if (use_color_cache) {
+    VP8LWriteBits(bw, 4, cache_bits);
+  }
+
+  // Huffman image + meta huffman.
+  {
+    const int write_histogram_image = (histogram_image_size > 1);
+    VP8LWriteBits(bw, 1, write_histogram_image);
+    if (write_histogram_image) {
+      uint32_t* const histogram_argb =
+          (uint32_t*)WebPSafeMalloc(histogram_image_xysize,
+                                    sizeof(*histogram_argb));
+      int max_index = 0;
+      uint32_t i;
+      if (histogram_argb == NULL) goto Error;
+      for (i = 0; i < histogram_image_xysize; ++i) {
+        const int symbol_index = histogram_symbols[i] & 0xffff;
+        histogram_argb[i] = 0xff000000 | (symbol_index << 8);
+        if (symbol_index >= max_index) {
+          max_index = symbol_index + 1;
+        }
+      }
+      histogram_image_size = max_index;
+
+      VP8LWriteBits(bw, 3, histogram_bits - 2);
+      err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array,
+                                 VP8LSubSampleSize(width, histogram_bits),
+                                 VP8LSubSampleSize(height, histogram_bits),
+                                 quality);
+      WebPSafeFree(histogram_argb);
+      if (err != VP8_ENC_OK) goto Error;
+    }
+  }
+
+  // Store Huffman codes.
+  {
+    int i;
+    int max_tokens = 0;
+    huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES,
+                                             sizeof(*huff_tree));
+    if (huff_tree == NULL) goto Error;
+    // Find maximum number of symbols for the huffman tree-set.
+    for (i = 0; i < 5 * histogram_image_size; ++i) {
+      HuffmanTreeCode* const codes = &huffman_codes[i];
+      if (max_tokens < codes->num_symbols) {
+        max_tokens = codes->num_symbols;
+      }
+    }
+    tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens,
+                                               sizeof(*tokens));
+    if (tokens == NULL) goto Error;
+    for (i = 0; i < 5 * histogram_image_size; ++i) {
+      HuffmanTreeCode* const codes = &huffman_codes[i];
+      StoreHuffmanCode(bw, huff_tree, tokens, codes);
+      ClearHuffmanTreeIfOnlyOneSymbol(codes);
+    }
+  }
+
+  // Store actual literals.
+  err = StoreImageToBitMask(bw, width, histogram_bits, &refs,
+                            histogram_symbols, huffman_codes);
+
+ Error:
+  WebPSafeFree(tokens);
+  WebPSafeFree(huff_tree);
+  VP8LFreeHistogramSet(histogram_image);
+  VP8LBackwardRefsClear(&refs);
+  if (huffman_codes != NULL) {
+    WebPSafeFree(huffman_codes->codes);
+    WebPSafeFree(huffman_codes);
+  }
+  WebPSafeFree(histogram_symbols);
+  return err;
+}
+
+// -----------------------------------------------------------------------------
+// Transforms
+
+// Check if it would be a good idea to subtract green from red and blue. We
+// only impact entropy in red/blue components, don't bother to look at others.
+static WebPEncodingError EvalAndApplySubtractGreen(VP8LEncoder* const enc,
+                                                   int width, int height,
+                                                   VP8LBitWriter* const bw) {
+  if (!enc->use_palette_) {
+    int i;
+    const uint32_t* const argb = enc->argb_;
+    double bit_cost_before, bit_cost_after;
+    // Allocate histogram with cache_bits = 1.
+    VP8LHistogram* const histo = VP8LAllocateHistogram(1);
+    if (histo == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
+    for (i = 0; i < width * height; ++i) {
+      const uint32_t c = argb[i];
+      ++histo->red_[(c >> 16) & 0xff];
+      ++histo->blue_[(c >> 0) & 0xff];
+    }
+    bit_cost_before = VP8LHistogramEstimateBits(histo);
+
+    VP8LHistogramInit(histo, 1);
+    for (i = 0; i < width * height; ++i) {
+      const uint32_t c = argb[i];
+      const int green = (c >> 8) & 0xff;
+      ++histo->red_[((c >> 16) - green) & 0xff];
+      ++histo->blue_[((c >> 0) - green) & 0xff];
+    }
+    bit_cost_after = VP8LHistogramEstimateBits(histo);
+    VP8LFreeHistogram(histo);
+
+    // Check if subtracting green yields low entropy.
+    enc->use_subtract_green_ = (bit_cost_after < bit_cost_before);
+    if (enc->use_subtract_green_) {
+      VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+      VP8LWriteBits(bw, 2, SUBTRACT_GREEN);
+      VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
+    }
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc,
+                                            int width, int height, int quality,
+                                            VP8LBitWriter* const bw) {
+  const int pred_bits = enc->transform_bits_;
+  const int transform_width = VP8LSubSampleSize(width, pred_bits);
+  const int transform_height = VP8LSubSampleSize(height, pred_bits);
+
+  VP8LResidualImage(width, height, pred_bits, enc->argb_, enc->argb_scratch_,
+                    enc->transform_data_);
+  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+  VP8LWriteBits(bw, 2, PREDICTOR_TRANSFORM);
+  assert(pred_bits >= 2);
+  VP8LWriteBits(bw, 3, pred_bits - 2);
+  return EncodeImageNoHuffman(bw, enc->transform_data_,
+                              (VP8LHashChain*)&enc->hash_chain_,
+                              (VP8LBackwardRefs*)enc->refs_,  // cast const away
+                              transform_width, transform_height,
+                              quality);
+}
+
+static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc,
+                                               int width, int height,
+                                               int quality,
+                                               VP8LBitWriter* const bw) {
+  const int ccolor_transform_bits = enc->transform_bits_;
+  const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
+  const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
+
+  VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
+                          enc->argb_, enc->transform_data_);
+  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+  VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM);
+  assert(ccolor_transform_bits >= 2);
+  VP8LWriteBits(bw, 3, ccolor_transform_bits - 2);
+  return EncodeImageNoHuffman(bw, enc->transform_data_,
+                              (VP8LHashChain*)&enc->hash_chain_,
+                              (VP8LBackwardRefs*)enc->refs_,  // cast const away
+                              transform_width, transform_height,
+                              quality);
+}
+
+// -----------------------------------------------------------------------------
+
+static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic,
+                                         size_t riff_size, size_t vp8l_size) {
+  uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = {
+    'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P',
+    'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE,
+  };
+  PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
+  PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size);
+  if (!pic->writer(riff, sizeof(riff), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static int WriteImageSize(const WebPPicture* const pic,
+                          VP8LBitWriter* const bw) {
+  const int width = pic->width - 1;
+  const int height = pic->height - 1;
+  assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION);
+
+  VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, width);
+  VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, height);
+  return !bw->error_;
+}
+
+static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) {
+  VP8LWriteBits(bw, 1, has_alpha);
+  VP8LWriteBits(bw, VP8L_VERSION_BITS, VP8L_VERSION);
+  return !bw->error_;
+}
+
+static WebPEncodingError WriteImage(const WebPPicture* const pic,
+                                    VP8LBitWriter* const bw,
+                                    size_t* const coded_size) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const uint8_t* const webpll_data = VP8LBitWriterFinish(bw);
+  const size_t webpll_size = VP8LBitWriterNumBytes(bw);
+  const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size;
+  const size_t pad = vp8l_size & 1;
+  const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad;
+
+  err = WriteRiffHeader(pic, riff_size, vp8l_size);
+  if (err != VP8_ENC_OK) goto Error;
+
+  if (!pic->writer(webpll_data, webpll_size, pic)) {
+    err = VP8_ENC_ERROR_BAD_WRITE;
+    goto Error;
+  }
+
+  if (pad) {
+    const uint8_t pad_byte[1] = { 0 };
+    if (!pic->writer(pad_byte, 1, pic)) {
+      err = VP8_ENC_ERROR_BAD_WRITE;
+      goto Error;
+    }
+  }
+  *coded_size = CHUNK_HEADER_SIZE + riff_size;
+  return VP8_ENC_OK;
+
+ Error:
+  return err;
+}
+
+// -----------------------------------------------------------------------------
+
+// Allocates the memory for argb (W x H) buffer, 2 rows of context for
+// prediction and transform data.
+static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
+                                                 int width, int height) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const int tile_size = 1 << enc->transform_bits_;
+  const uint64_t image_size = width * height;
+  const uint64_t argb_scratch_size = tile_size * width + width;
+  const int transform_data_size =
+      VP8LSubSampleSize(width, enc->transform_bits_) *
+      VP8LSubSampleSize(height, enc->transform_bits_);
+  const uint64_t total_size =
+      image_size + argb_scratch_size + (uint64_t)transform_data_size;
+  uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+  if (mem == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+  enc->argb_ = mem;
+  mem += image_size;
+  enc->argb_scratch_ = mem;
+  mem += argb_scratch_size;
+  enc->transform_data_ = mem;
+  enc->current_width_ = width;
+
+ Error:
+  return err;
+}
+
+static void ApplyPalette(uint32_t* src, uint32_t* dst,
+                         uint32_t src_stride, uint32_t dst_stride,
+                         const uint32_t* palette, int palette_size,
+                         int width, int height, int xbits, uint8_t* row) {
+  int i, x, y;
+  int use_LUT = 1;
+  for (i = 0; i < palette_size; ++i) {
+    if ((palette[i] & 0xffff00ffu) != 0) {
+      use_LUT = 0;
+      break;
+    }
+  }
+
+  if (use_LUT) {
+    uint8_t inv_palette[MAX_PALETTE_SIZE] = { 0 };
+    for (i = 0; i < palette_size; ++i) {
+      const int color = (palette[i] >> 8) & 0xff;
+      inv_palette[color] = i;
+    }
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        const int color = (src[x] >> 8) & 0xff;
+        row[x] = inv_palette[color];
+      }
+      VP8LBundleColorMap(row, width, xbits, dst);
+      src += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    // Use 1 pixel cache for ARGB pixels.
+    uint32_t last_pix = palette[0];
+    int last_idx = 0;
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        const uint32_t pix = src[x];
+        if (pix != last_pix) {
+          for (i = 0; i < palette_size; ++i) {
+            if (pix == palette[i]) {
+              last_idx = i;
+              last_pix = pix;
+              break;
+            }
+          }
+        }
+        row[x] = last_idx;
+      }
+      VP8LBundleColorMap(row, width, xbits, dst);
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+}
+
+// Note: Expects "enc->palette_" to be set properly.
+// Also, "enc->palette_" will be modified after this call and should not be used
+// later.
+static WebPEncodingError EncodePalette(VP8LBitWriter* const bw,
+                                       VP8LEncoder* const enc, int quality) {
+  WebPEncodingError err = VP8_ENC_OK;
+  int i;
+  const WebPPicture* const pic = enc->pic_;
+  uint32_t* src = pic->argb;
+  uint32_t* dst;
+  const int width = pic->width;
+  const int height = pic->height;
+  uint32_t* const palette = enc->palette_;
+  const int palette_size = enc->palette_size_;
+  uint8_t* row = NULL;
+  int xbits;
+
+  // Replace each input pixel by corresponding palette index.
+  // This is done line by line.
+  if (palette_size <= 4) {
+    xbits = (palette_size <= 2) ? 3 : 2;
+  } else {
+    xbits = (palette_size <= 16) ? 1 : 0;
+  }
+
+  err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
+  if (err != VP8_ENC_OK) goto Error;
+  dst = enc->argb_;
+
+  row = (uint8_t*)WebPSafeMalloc(width, sizeof(*row));
+  if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
+
+  ApplyPalette(src, dst, pic->argb_stride, enc->current_width_,
+               palette, palette_size, width, height, xbits, row);
+
+  // Save palette to bitstream.
+  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+  VP8LWriteBits(bw, 2, COLOR_INDEXING_TRANSFORM);
+  assert(palette_size >= 1);
+  VP8LWriteBits(bw, 8, palette_size - 1);
+  for (i = palette_size - 1; i >= 1; --i) {
+    palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
+  }
+  err = EncodeImageNoHuffman(bw, palette, &enc->hash_chain_, enc->refs_,
+                             palette_size, 1, quality);
+
+ Error:
+  WebPSafeFree(row);
+  return err;
+}
+
+// -----------------------------------------------------------------------------
+
+static int GetHistoBits(int method, int use_palette, int width, int height) {
+  const int hist_size = VP8LGetHistogramSize(MAX_COLOR_CACHE_BITS);
+  // Make tile size a function of encoding method (Range: 0 to 6).
+  int histo_bits = (use_palette ? 9 : 7) - method;
+  while (1) {
+    const int huff_image_size = VP8LSubSampleSize(width, histo_bits) *
+                                VP8LSubSampleSize(height, histo_bits);
+    if ((uint64_t)huff_image_size * hist_size <= MAX_HUFF_IMAGE_SIZE) break;
+    ++histo_bits;
+  }
+  return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
+         (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
+}
+
+static int GetTransformBits(int method, int histo_bits) {
+  const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5;
+  return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits;
+}
+
+static int GetCacheBits(float quality) {
+  return (quality <= 25.f) ? 0 : 7;
+}
+
+static void FinishEncParams(VP8LEncoder* const enc) {
+  const WebPConfig* const config = enc->config_;
+  const WebPPicture* const pic = enc->pic_;
+  const int method = config->method;
+  const float quality = config->quality;
+  const int use_palette = enc->use_palette_;
+  enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height);
+  enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
+  enc->cache_bits_ = GetCacheBits(quality);
+}
+
+// -----------------------------------------------------------------------------
+// VP8LEncoder
+
+static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config,
+                                   const WebPPicture* const picture) {
+  VP8LEncoder* const enc = (VP8LEncoder*)WebPSafeCalloc(1ULL, sizeof(*enc));
+  if (enc == NULL) {
+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    return NULL;
+  }
+  enc->config_ = config;
+  enc->pic_ = picture;
+
+  VP8LDspInit();
+
+  return enc;
+}
+
+static void VP8LEncoderDelete(VP8LEncoder* enc) {
+  if (enc != NULL) {
+    VP8LHashChainClear(&enc->hash_chain_);
+    VP8LBackwardRefsClear(&enc->refs_[0]);
+    VP8LBackwardRefsClear(&enc->refs_[1]);
+    WebPSafeFree(enc->argb_);
+    WebPSafeFree(enc);
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Main call
+
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+                                   const WebPPicture* const picture,
+                                   VP8LBitWriter* const bw) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const int quality = (int)config->quality;
+  const int width = picture->width;
+  const int height = picture->height;
+  VP8LEncoder* const enc = VP8LEncoderNew(config, picture);
+  const size_t byte_position = VP8LBitWriterNumBytes(bw);
+
+  if (enc == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  // ---------------------------------------------------------------------------
+  // Analyze image (entropy, num_palettes etc)
+
+  if (!AnalyzeAndInit(enc, config->image_hint)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  FinishEncParams(enc);
+
+  if (enc->use_palette_) {
+    err = EncodePalette(bw, enc, quality);
+    if (err != VP8_ENC_OK) goto Error;
+    // Color cache is disabled for palette.
+    enc->cache_bits_ = 0;
+  }
+
+  // In case image is not packed.
+  if (enc->argb_ == NULL) {
+    int y;
+    err = AllocateTransformBuffer(enc, width, height);
+    if (err != VP8_ENC_OK) goto Error;
+    for (y = 0; y < height; ++y) {
+      memcpy(enc->argb_ + y * width,
+             picture->argb + y * picture->argb_stride,
+             width * sizeof(*enc->argb_));
+    }
+    enc->current_width_ = width;
+  }
+
+  // ---------------------------------------------------------------------------
+  // Apply transforms and write transform data.
+
+  err = EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw);
+  if (err != VP8_ENC_OK) goto Error;
+
+  if (enc->use_predict_) {
+    err = ApplyPredictFilter(enc, enc->current_width_, height, quality, bw);
+    if (err != VP8_ENC_OK) goto Error;
+  }
+
+  if (enc->use_cross_color_) {
+    err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw);
+    if (err != VP8_ENC_OK) goto Error;
+  }
+
+  VP8LWriteBits(bw, 1, !TRANSFORM_PRESENT);  // No more transforms.
+
+  // ---------------------------------------------------------------------------
+  // Estimate the color cache size.
+
+  if (enc->cache_bits_ > 0) {
+    if (!VP8LCalculateEstimateForCacheSize(enc->argb_, enc->current_width_,
+                                           height, quality, &enc->hash_chain_,
+                                           &enc->refs_[0], &enc->cache_bits_)) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Encode and write the transformed image.
+
+  err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_,
+                            enc->current_width_, height, quality,
+                            enc->cache_bits_, enc->histo_bits_);
+  if (err != VP8_ENC_OK) goto Error;
+
+  if (picture->stats != NULL) {
+    WebPAuxStats* const stats = picture->stats;
+    stats->lossless_features = 0;
+    if (enc->use_predict_) stats->lossless_features |= 1;
+    if (enc->use_cross_color_) stats->lossless_features |= 2;
+    if (enc->use_subtract_green_) stats->lossless_features |= 4;
+    if (enc->use_palette_) stats->lossless_features |= 8;
+    stats->histogram_bits = enc->histo_bits_;
+    stats->transform_bits = enc->transform_bits_;
+    stats->cache_bits = enc->cache_bits_;
+    stats->palette_size = enc->palette_size_;
+    stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position);
+  }
+
+ Error:
+  VP8LEncoderDelete(enc);
+  return err;
+}
+
+int VP8LEncodeImage(const WebPConfig* const config,
+                    const WebPPicture* const picture) {
+  int width, height;
+  int has_alpha;
+  size_t coded_size;
+  int percent = 0;
+  int initial_size;
+  WebPEncodingError err = VP8_ENC_OK;
+  VP8LBitWriter bw;
+
+  if (picture == NULL) return 0;
+
+  if (config == NULL || picture->argb == NULL) {
+    err = VP8_ENC_ERROR_NULL_PARAMETER;
+    WebPEncodingSetError(picture, err);
+    return 0;
+  }
+
+  width = picture->width;
+  height = picture->height;
+  // Initialize BitWriter with size corresponding to 16 bpp to photo images and
+  // 8 bpp for graphical images.
+  initial_size = (config->image_hint == WEBP_HINT_GRAPH) ?
+                 width * height : width * height * 2;
+  if (!VP8LBitWriterInit(&bw, initial_size)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  if (!WebPReportProgress(picture, 1, &percent)) {
+ UserAbort:
+    err = VP8_ENC_ERROR_USER_ABORT;
+    goto Error;
+  }
+  // Reset stats (for pure lossless coding)
+  if (picture->stats != NULL) {
+    WebPAuxStats* const stats = picture->stats;
+    memset(stats, 0, sizeof(*stats));
+    stats->PSNR[0] = 99.f;
+    stats->PSNR[1] = 99.f;
+    stats->PSNR[2] = 99.f;
+    stats->PSNR[3] = 99.f;
+    stats->PSNR[4] = 99.f;
+  }
+
+  // Write image size.
+  if (!WriteImageSize(picture, &bw)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  has_alpha = WebPPictureHasTransparency(picture);
+  // Write the non-trivial Alpha flag and lossless version.
+  if (!WriteRealAlphaAndVersion(&bw, has_alpha)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort;
+
+  // Encode main image stream.
+  err = VP8LEncodeStream(config, picture, &bw);
+  if (err != VP8_ENC_OK) goto Error;
+
+  // TODO(skal): have a fine-grained progress report in VP8LEncodeStream().
+  if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort;
+
+  // Finish the RIFF chunk.
+  err = WriteImage(picture, &bw, &coded_size);
+  if (err != VP8_ENC_OK) goto Error;
+
+  if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort;
+
+  // Save size.
+  if (picture->stats != NULL) {
+    picture->stats->coded_size += (int)coded_size;
+    picture->stats->lossless_size = (int)coded_size;
+  }
+
+  if (picture->extra_info != NULL) {
+    const int mb_w = (width + 15) >> 4;
+    const int mb_h = (height + 15) >> 4;
+    memset(picture->extra_info, 0, mb_w * mb_h * sizeof(*picture->extra_info));
+  }
+
+ Error:
+  if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+  VP8LBitWriterDestroy(&bw);
+  if (err != VP8_ENC_OK) {
+    WebPEncodingSetError(picture, err);
+    return 0;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/enc/vp8li.h b/src/main/jni/libwebp/enc/vp8li.h
new file mode 100644
index 000000000..6b6db127d
--- /dev/null
+++ b/src/main/jni/libwebp/enc/vp8li.h
@@ -0,0 +1,77 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Lossless encoder: internal header.
+//
+// Author: Vikas Arora (vikaas.arora@gmail.com)
+
+#ifndef WEBP_ENC_VP8LI_H_
+#define WEBP_ENC_VP8LI_H_
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../utils/bit_writer.h"
+#include "../webp/encode.h"
+#include "../webp/format_constants.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  const WebPConfig* config_;    // user configuration and parameters
+  const WebPPicture* pic_;      // input picture.
+
+  uint32_t* argb_;              // Transformed argb image data.
+  uint32_t* argb_scratch_;      // Scratch memory for argb rows
+                                // (used for prediction).
+  uint32_t* transform_data_;    // Scratch memory for transform data.
+  int       current_width_;     // Corresponds to packed image width.
+
+  // Encoding parameters derived from quality parameter.
+  int histo_bits_;
+  int transform_bits_;
+  int cache_bits_;        // If equal to 0, don't use color cache.
+
+  // Encoding parameters derived from image characteristics.
+  int use_cross_color_;
+  int use_subtract_green_;
+  int use_predict_;
+  int use_palette_;
+  int palette_size_;
+  uint32_t palette_[MAX_PALETTE_SIZE];
+
+  // Some 'scratch' (potentially large) objects.
+  struct VP8LBackwardRefs refs_[2];  // Backward Refs array corresponding to
+                                     // LZ77 & RLE coding.
+  VP8LHashChain hash_chain_;         // HashChain data for constructing
+                                     // backward references.
+} VP8LEncoder;
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Encodes the picture.
+// Returns 0 if config or picture is NULL or picture doesn't have valid argb
+// input.
+int VP8LEncodeImage(const WebPConfig* const config,
+                    const WebPPicture* const picture);
+
+// Encodes the main image stream using the supplied bit writer.
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+                                   const WebPPicture* const picture,
+                                   VP8LBitWriter* const bw);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_ENC_VP8LI_H_ */
diff --git a/src/main/jni/libwebp/enc/webpenc.c b/src/main/jni/libwebp/enc/webpenc.c
new file mode 100644
index 000000000..0cb83f125
--- /dev/null
+++ b/src/main/jni/libwebp/enc/webpenc.c
@@ -0,0 +1,382 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebP encoder: main entry point
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "./vp8li.h"
+#include "./cost.h"
+#include "../utils/utils.h"
+
+// #define PRINT_MEMORY_INFO
+
+#ifdef PRINT_MEMORY_INFO
+#include <stdio.h>
+#endif
+
+//------------------------------------------------------------------------------
+
+int WebPGetEncoderVersion(void) {
+  return (ENC_MAJ_VERSION << 16) | (ENC_MIN_VERSION << 8) | ENC_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+// VP8Encoder
+//------------------------------------------------------------------------------
+
+static void ResetSegmentHeader(VP8Encoder* const enc) {
+  VP8SegmentHeader* const hdr = &enc->segment_hdr_;
+  hdr->num_segments_ = enc->config_->segments;
+  hdr->update_map_  = (hdr->num_segments_ > 1);
+  hdr->size_ = 0;
+}
+
+static void ResetFilterHeader(VP8Encoder* const enc) {
+  VP8FilterHeader* const hdr = &enc->filter_hdr_;
+  hdr->simple_ = 1;
+  hdr->level_ = 0;
+  hdr->sharpness_ = 0;
+  hdr->i4x4_lf_delta_ = 0;
+}
+
+static void ResetBoundaryPredictions(VP8Encoder* const enc) {
+  // init boundary values once for all
+  // Note: actually, initializing the preds_[] is only needed for intra4.
+  int i;
+  uint8_t* const top = enc->preds_ - enc->preds_w_;
+  uint8_t* const left = enc->preds_ - 1;
+  for (i = -1; i < 4 * enc->mb_w_; ++i) {
+    top[i] = B_DC_PRED;
+  }
+  for (i = 0; i < 4 * enc->mb_h_; ++i) {
+    left[i * enc->preds_w_] = B_DC_PRED;
+  }
+  enc->nz_[-1] = 0;   // constant
+}
+
+// Mapping from config->method_ to coding tools used.
+//-------------------+---+---+---+---+---+---+---+
+//   Method          | 0 | 1 | 2 | 3 |(4)| 5 | 6 |
+//-------------------+---+---+---+---+---+---+---+
+// fast probe        | x |   |   | x |   |   |   |
+//-------------------+---+---+---+---+---+---+---+
+// dynamic proba     | ~ | x | x | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// fast mode analysis|   |   |   |   | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// basic rd-opt      |   |   |   | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// disto-score i4/16 |   |   | x |   |   |   |   |
+//-------------------+---+---+---+---+---+---+---+
+// rd-opt i4/16      |   |   | ~ | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// token buffer (opt)|   |   |   | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// Trellis           |   |   |   |   |   | x |Ful|
+//-------------------+---+---+---+---+---+---+---+
+// full-SNS          |   |   |   |   | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+
+static void MapConfigToTools(VP8Encoder* const enc) {
+  const WebPConfig* const config = enc->config_;
+  const int method = config->method;
+  const int limit = 100 - config->partition_limit;
+  enc->method_ = method;
+  enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL
+                     : (method >= 5) ? RD_OPT_TRELLIS
+                     : (method >= 3) ? RD_OPT_BASIC
+                     : RD_OPT_NONE;
+  enc->max_i4_header_bits_ =
+      256 * 16 * 16 *                 // upper bound: up to 16bit per 4x4 block
+      (limit * limit) / (100 * 100);  // ... modulated with a quadratic curve.
+
+  enc->thread_level_ = config->thread_level;
+
+  enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0);
+  if (!config->low_memory) {
+#if !defined(DISABLE_TOKEN_BUFFER)
+    enc->use_tokens_ = (enc->rd_opt_level_ >= RD_OPT_BASIC);  // need rd stats
+#endif
+    if (enc->use_tokens_) {
+      enc->num_parts_ = 1;   // doesn't work with multi-partition
+    }
+  }
+}
+
+// Memory scaling with dimensions:
+//  memory (bytes) ~= 2.25 * w + 0.0625 * w * h
+//
+// Typical memory footprint (614x440 picture)
+//              encoder: 22111
+//                 info: 4368
+//                preds: 17741
+//          top samples: 1263
+//             non-zero: 175
+//             lf-stats: 0
+//                total: 45658
+// Transient object sizes:
+//       VP8EncIterator: 3360
+//         VP8ModeScore: 872
+//       VP8SegmentInfo: 732
+//             VP8Proba: 18352
+//              LFStats: 2048
+// Picture size (yuv): 419328
+
+static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
+                                  WebPPicture* const picture) {
+  const int use_filter =
+      (config->filter_strength > 0) || (config->autofilter > 0);
+  const int mb_w = (picture->width + 15) >> 4;
+  const int mb_h = (picture->height + 15) >> 4;
+  const int preds_w = 4 * mb_w + 1;
+  const int preds_h = 4 * mb_h + 1;
+  const size_t preds_size = preds_w * preds_h * sizeof(uint8_t);
+  const int top_stride = mb_w * 16;
+  const size_t nz_size = (mb_w + 1) * sizeof(uint32_t) + ALIGN_CST;
+  const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo);
+  const size_t samples_size = 2 * top_stride * sizeof(uint8_t)  // top-luma/u/v
+                            + ALIGN_CST;                        // align all
+  const size_t lf_stats_size =
+      config->autofilter ? sizeof(LFStats) + ALIGN_CST : 0;
+  VP8Encoder* enc;
+  uint8_t* mem;
+  const uint64_t size = (uint64_t)sizeof(VP8Encoder)   // main struct
+                      + ALIGN_CST                      // cache alignment
+                      + info_size                      // modes info
+                      + preds_size                     // prediction modes
+                      + samples_size                   // top/left samples
+                      + nz_size                        // coeff context bits
+                      + lf_stats_size;                 // autofilter stats
+
+#ifdef PRINT_MEMORY_INFO
+  printf("===================================\n");
+  printf("Memory used:\n"
+         "             encoder: %ld\n"
+         "                info: %ld\n"
+         "               preds: %ld\n"
+         "         top samples: %ld\n"
+         "            non-zero: %ld\n"
+         "            lf-stats: %ld\n"
+         "               total: %ld\n",
+         sizeof(VP8Encoder) + ALIGN_CST, info_size,
+         preds_size, samples_size, nz_size, lf_stats_size, size);
+  printf("Transient object sizes:\n"
+         "      VP8EncIterator: %ld\n"
+         "        VP8ModeScore: %ld\n"
+         "      VP8SegmentInfo: %ld\n"
+         "            VP8Proba: %ld\n"
+         "             LFStats: %ld\n",
+         sizeof(VP8EncIterator), sizeof(VP8ModeScore),
+         sizeof(VP8SegmentInfo), sizeof(VP8Proba),
+         sizeof(LFStats));
+  printf("Picture size (yuv): %ld\n",
+         mb_w * mb_h * 384 * sizeof(uint8_t));
+  printf("===================================\n");
+#endif
+  mem = (uint8_t*)WebPSafeMalloc(size, sizeof(*mem));
+  if (mem == NULL) {
+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    return NULL;
+  }
+  enc = (VP8Encoder*)mem;
+  mem = (uint8_t*)DO_ALIGN(mem + sizeof(*enc));
+  memset(enc, 0, sizeof(*enc));
+  enc->num_parts_ = 1 << config->partitions;
+  enc->mb_w_ = mb_w;
+  enc->mb_h_ = mb_h;
+  enc->preds_w_ = preds_w;
+  enc->mb_info_ = (VP8MBInfo*)mem;
+  mem += info_size;
+  enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_;
+  mem += preds_w * preds_h * sizeof(uint8_t);
+  enc->nz_ = 1 + (uint32_t*)DO_ALIGN(mem);
+  mem += nz_size;
+  enc->lf_stats_ = lf_stats_size ? (LFStats*)DO_ALIGN(mem) : NULL;
+  mem += lf_stats_size;
+
+  // top samples (all 16-aligned)
+  mem = (uint8_t*)DO_ALIGN(mem);
+  enc->y_top_ = (uint8_t*)mem;
+  enc->uv_top_ = enc->y_top_ + top_stride;
+  mem += 2 * top_stride;
+  assert(mem <= (uint8_t*)enc + size);
+
+  enc->config_ = config;
+  enc->profile_ = use_filter ? ((config->filter_type == 1) ? 0 : 1) : 2;
+  enc->pic_ = picture;
+  enc->percent_ = 0;
+
+  MapConfigToTools(enc);
+  VP8EncDspInit();
+  VP8DefaultProbas(enc);
+  ResetSegmentHeader(enc);
+  ResetFilterHeader(enc);
+  ResetBoundaryPredictions(enc);
+  VP8GetResidualCostInit();
+  VP8SetResidualCoeffsInit();
+  VP8EncInitAlpha(enc);
+
+  // lower quality means smaller output -> we modulate a little the page
+  // size based on quality. This is just a crude 1rst-order prediction.
+  {
+    const float scale = 1.f + config->quality * 5.f / 100.f;  // in [1,6]
+    VP8TBufferInit(&enc->tokens_, (int)(mb_w * mb_h * 4 * scale));
+  }
+  return enc;
+}
+
+static int DeleteVP8Encoder(VP8Encoder* enc) {
+  int ok = 1;
+  if (enc != NULL) {
+    ok = VP8EncDeleteAlpha(enc);
+    VP8TBufferClear(&enc->tokens_);
+    WebPSafeFree(enc);
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+
+static double GetPSNR(uint64_t err, uint64_t size) {
+  return (err > 0 && size > 0) ? 10. * log10(255. * 255. * size / err) : 99.;
+}
+
+static void FinalizePSNR(const VP8Encoder* const enc) {
+  WebPAuxStats* stats = enc->pic_->stats;
+  const uint64_t size = enc->sse_count_;
+  const uint64_t* const sse = enc->sse_;
+  stats->PSNR[0] = (float)GetPSNR(sse[0], size);
+  stats->PSNR[1] = (float)GetPSNR(sse[1], size / 4);
+  stats->PSNR[2] = (float)GetPSNR(sse[2], size / 4);
+  stats->PSNR[3] = (float)GetPSNR(sse[0] + sse[1] + sse[2], size * 3 / 2);
+  stats->PSNR[4] = (float)GetPSNR(sse[3], size);
+}
+
+static void StoreStats(VP8Encoder* const enc) {
+  WebPAuxStats* const stats = enc->pic_->stats;
+  if (stats != NULL) {
+    int i, s;
+    for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+      stats->segment_level[i] = enc->dqm_[i].fstrength_;
+      stats->segment_quant[i] = enc->dqm_[i].quant_;
+      for (s = 0; s <= 2; ++s) {
+        stats->residual_bytes[s][i] = enc->residual_bytes_[s][i];
+      }
+    }
+    FinalizePSNR(enc);
+    stats->coded_size = enc->coded_size_;
+    for (i = 0; i < 3; ++i) {
+      stats->block_count[i] = enc->block_count_[i];
+    }
+  }
+  WebPReportProgress(enc->pic_, 100, &enc->percent_);  // done!
+}
+
+int WebPEncodingSetError(const WebPPicture* const pic,
+                         WebPEncodingError error) {
+  assert((int)error < VP8_ENC_ERROR_LAST);
+  assert((int)error >= VP8_ENC_OK);
+  ((WebPPicture*)pic)->error_code = error;
+  return 0;
+}
+
+int WebPReportProgress(const WebPPicture* const pic,
+                       int percent, int* const percent_store) {
+  if (percent_store != NULL && percent != *percent_store) {
+    *percent_store = percent;
+    if (pic->progress_hook && !pic->progress_hook(percent, pic)) {
+      // user abort requested
+      WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
+      return 0;
+    }
+  }
+  return 1;  // ok
+}
+//------------------------------------------------------------------------------
+
+int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
+  int ok = 0;
+
+  if (pic == NULL)
+    return 0;
+  WebPEncodingSetError(pic, VP8_ENC_OK);  // all ok so far
+  if (config == NULL)  // bad params
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
+  if (!WebPValidateConfig(config))
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  if (pic->width <= 0 || pic->height <= 0)
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
+  if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION)
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
+
+  if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats));
+
+  if (!config->lossless) {
+    VP8Encoder* enc = NULL;
+    if (pic->y == NULL || pic->u == NULL || pic->v == NULL) {
+      // Make sure we have YUVA samples.
+      if (config->preprocessing & 4) {
+#if WEBP_ENCODER_ABI_VERSION > 0x0204
+        if (!WebPPictureSmartARGBToYUVA(pic)) {
+          return 0;
+        }
+#endif
+      } else {
+        float dithering = 0.f;
+        if (config->preprocessing & 2) {
+          const float x = config->quality / 100.f;
+          const float x2 = x * x;
+          // slowly decreasing from max dithering at low quality (q->0)
+          // to 0.5 dithering amplitude at high quality (q->100)
+          dithering = 1.0f + (0.5f - 1.0f) * x2 * x2;
+        }
+        if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) {
+          return 0;
+        }
+      }
+    }
+
+    enc = InitVP8Encoder(config, pic);
+    if (enc == NULL) return 0;  // pic->error is already set.
+    // Note: each of the tasks below account for 20% in the progress report.
+    ok = VP8EncAnalyze(enc);
+
+    // Analysis is done, proceed to actual coding.
+    ok = ok && VP8EncStartAlpha(enc);   // possibly done in parallel
+    if (!enc->use_tokens_) {
+      ok = ok && VP8EncLoop(enc);
+    } else {
+      ok = ok && VP8EncTokenLoop(enc);
+    }
+    ok = ok && VP8EncFinishAlpha(enc);
+
+    ok = ok && VP8EncWrite(enc);
+    StoreStats(enc);
+    if (!ok) {
+      VP8EncFreeBitWriters(enc);
+    }
+    ok &= DeleteVP8Encoder(enc);  // must always be called, even if !ok
+  } else {
+    // Make sure we have ARGB samples.
+    if (pic->argb == NULL && !WebPPictureYUVAToARGB(pic)) {
+      return 0;
+    }
+
+    ok = VP8LEncodeImage(config, pic);  // Sets pic->error in case of problem.
+  }
+
+  return ok;
+}
diff --git a/src/main/jni/libwebp/utils/bit_reader.c b/src/main/jni/libwebp/utils/bit_reader.c
new file mode 100644
index 000000000..64503e6b9
--- /dev/null
+++ b/src/main/jni/libwebp/utils/bit_reader.c
@@ -0,0 +1,210 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Boolean decoder non-inlined methods
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include "./bit_reader_inl.h"
+
+//------------------------------------------------------------------------------
+// VP8BitReader
+
+void VP8InitBitReader(VP8BitReader* const br,
+                      const uint8_t* const start, const uint8_t* const end) {
+  assert(br != NULL);
+  assert(start != NULL);
+  assert(start <= end);
+  br->range_   = 255 - 1;
+  br->buf_     = start;
+  br->buf_end_ = end;
+  br->value_   = 0;
+  br->bits_    = -8;   // to load the very first 8bits
+  br->eof_     = 0;
+  VP8LoadNewBytes(br);
+}
+
+void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) {
+  if (br->buf_ != NULL) {
+    br->buf_ += offset;
+    br->buf_end_ += offset;
+  }
+}
+
+const uint8_t kVP8Log2Range[128] = {
+     7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0
+};
+
+// range = ((range - 1) << kVP8Log2Range[range]) + 1
+const range_t kVP8NewRange[128] = {
+  127, 127, 191, 127, 159, 191, 223, 127,
+  143, 159, 175, 191, 207, 223, 239, 127,
+  135, 143, 151, 159, 167, 175, 183, 191,
+  199, 207, 215, 223, 231, 239, 247, 127,
+  131, 135, 139, 143, 147, 151, 155, 159,
+  163, 167, 171, 175, 179, 183, 187, 191,
+  195, 199, 203, 207, 211, 215, 219, 223,
+  227, 231, 235, 239, 243, 247, 251, 127,
+  129, 131, 133, 135, 137, 139, 141, 143,
+  145, 147, 149, 151, 153, 155, 157, 159,
+  161, 163, 165, 167, 169, 171, 173, 175,
+  177, 179, 181, 183, 185, 187, 189, 191,
+  193, 195, 197, 199, 201, 203, 205, 207,
+  209, 211, 213, 215, 217, 219, 221, 223,
+  225, 227, 229, 231, 233, 235, 237, 239,
+  241, 243, 245, 247, 249, 251, 253, 127
+};
+
+void VP8LoadFinalBytes(VP8BitReader* const br) {
+  assert(br != NULL && br->buf_ != NULL);
+  // Only read 8bits at a time
+  if (br->buf_ < br->buf_end_) {
+    br->bits_ += 8;
+    br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8);
+  } else if (!br->eof_) {
+    br->value_ <<= 8;
+    br->bits_ += 8;
+    br->eof_ = 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Higher-level calls
+
+uint32_t VP8GetValue(VP8BitReader* const br, int bits) {
+  uint32_t v = 0;
+  while (bits-- > 0) {
+    v |= VP8GetBit(br, 0x80) << bits;
+  }
+  return v;
+}
+
+int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
+  const int value = VP8GetValue(br, bits);
+  return VP8Get(br) ? -value : value;
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitReader
+
+#define VP8L_LOG8_WBITS 4  // Number of bytes needed to store VP8L_WBITS bits.
+
+#if !defined(WEBP_FORCE_ALIGNED) && \
+    (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
+     defined(__i386__) || defined(_M_IX86) || \
+     defined(__x86_64__) || defined(_M_X64))
+#define VP8L_USE_UNALIGNED_LOAD
+#endif
+
+static const uint32_t kBitMask[VP8L_MAX_NUM_BIT_READ + 1] = {
+  0,
+  0x000001, 0x000003, 0x000007, 0x00000f,
+  0x00001f, 0x00003f, 0x00007f, 0x0000ff,
+  0x0001ff, 0x0003ff, 0x0007ff, 0x000fff,
+  0x001fff, 0x003fff, 0x007fff, 0x00ffff,
+  0x01ffff, 0x03ffff, 0x07ffff, 0x0fffff,
+  0x1fffff, 0x3fffff, 0x7fffff, 0xffffff
+};
+
+void VP8LInitBitReader(VP8LBitReader* const br, const uint8_t* const start,
+                       size_t length) {
+  size_t i;
+  vp8l_val_t value = 0;
+  assert(br != NULL);
+  assert(start != NULL);
+  assert(length < 0xfffffff8u);   // can't happen with a RIFF chunk.
+
+  br->len_ = length;
+  br->val_ = 0;
+  br->bit_pos_ = 0;
+  br->eos_ = 0;
+  br->error_ = 0;
+
+  if (length > sizeof(br->val_)) {
+    length = sizeof(br->val_);
+  }
+  for (i = 0; i < length; ++i) {
+    value |= (vp8l_val_t)start[i] << (8 * i);
+  }
+  br->val_ = value;
+  br->pos_ = length;
+  br->buf_ = start;
+}
+
+void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
+                            const uint8_t* const buf, size_t len) {
+  assert(br != NULL);
+  assert(buf != NULL);
+  assert(len < 0xfffffff8u);   // can't happen with a RIFF chunk.
+  br->buf_ = buf;
+  br->len_ = len;
+  // pos_ > len_ should be considered a param error.
+  br->error_ = (br->pos_ > br->len_);
+  br->eos_ = br->error_ || VP8LIsEndOfStream(br);
+}
+
+// If not at EOS, reload up to VP8L_LBITS byte-by-byte
+static void ShiftBytes(VP8LBitReader* const br) {
+  while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
+    br->val_ >>= 8;
+    br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (VP8L_LBITS - 8);
+    ++br->pos_;
+    br->bit_pos_ -= 8;
+  }
+  br->eos_ = VP8LIsEndOfStream(br);
+}
+
+void VP8LDoFillBitWindow(VP8LBitReader* const br) {
+  assert(br->bit_pos_ >= VP8L_WBITS);
+  // TODO(jzern): given the fixed read size it may be possible to force
+  //              alignment in this block.
+#if defined(VP8L_USE_UNALIGNED_LOAD)
+  if (br->pos_ + sizeof(br->val_) < br->len_) {
+    br->val_ >>= VP8L_WBITS;
+    br->bit_pos_ -= VP8L_WBITS;
+    // The expression below needs a little-endian arch to work correctly.
+    // This gives a large speedup for decoding speed.
+    br->val_ |= (vp8l_val_t)*(const uint32_t*)(br->buf_ + br->pos_) <<
+                (VP8L_LBITS - VP8L_WBITS);
+    br->pos_ += VP8L_LOG8_WBITS;
+    return;
+  }
+#endif
+  ShiftBytes(br);       // Slow path.
+}
+
+uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
+  assert(n_bits >= 0);
+  // Flag an error if end_of_stream or n_bits is more than allowed limit.
+  if (!br->eos_ && n_bits <= VP8L_MAX_NUM_BIT_READ) {
+    const uint32_t val =
+        (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
+    const int new_bits = br->bit_pos_ + n_bits;
+    br->bit_pos_ = new_bits;
+    ShiftBytes(br);
+    return val;
+  } else {
+    br->error_ = 1;
+    return 0;
+  }
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/utils/bit_reader.h b/src/main/jni/libwebp/utils/bit_reader.h
new file mode 100644
index 000000000..f569734f5
--- /dev/null
+++ b/src/main/jni/libwebp/utils/bit_reader.h
@@ -0,0 +1,169 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Boolean decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora (vikaas.arora@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_READER_H_
+#define WEBP_UTILS_BIT_READER_H_
+
+#include <assert.h>
+#ifdef _MSC_VER
+#include <stdlib.h>  // _byteswap_ulong
+#endif
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The Boolean decoder needs to maintain infinite precision on the value_ field.
+// However, since range_ is only 8bit, we only need an active window of 8 bits
+// for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls
+// below 128, range_ is updated, and fresh bits read from the bitstream are
+// brought in as LSB. To avoid reading the fresh bits one by one (slow), we
+// cache BITS of them ahead. The total of (BITS + 8) bits must fit into a
+// natural register (with type bit_t). To fetch BITS bits from bitstream we
+// use a type lbit_t.
+//
+// BITS can be any multiple of 8 from 8 to 56 (inclusive).
+// Pick values that fit natural register size.
+
+#if defined(__i386__) || defined(_M_IX86)      // x86 32bit
+#define BITS 24
+#elif defined(__x86_64__) || defined(_M_X64)   // x86 64bit
+#define BITS 56
+#elif defined(__arm__) || defined(_M_ARM)      // ARM
+#define BITS 24
+#elif defined(__mips__)                        // MIPS
+#define BITS 24
+#else                                          // reasonable default
+#define BITS 24  // TODO(skal): test aarch64 and find the proper BITS value.
+#endif
+
+//------------------------------------------------------------------------------
+// Derived types and constants:
+//   bit_t = natural register type for storing 'value_' (which is BITS+8 bits)
+//   range_t = register for 'range_' (which is 8bits only)
+
+#if (BITS > 24)
+typedef uint64_t bit_t;
+#else
+typedef uint32_t bit_t;
+#endif
+
+typedef uint32_t range_t;
+
+//------------------------------------------------------------------------------
+// Bitreader
+
+typedef struct VP8BitReader VP8BitReader;
+struct VP8BitReader {
+  // boolean decoder  (keep the field ordering as is!)
+  bit_t value_;               // current value
+  range_t range_;             // current range minus 1. In [127, 254] interval.
+  int bits_;                  // number of valid bits left
+  // read buffer
+  const uint8_t* buf_;        // next byte to be read
+  const uint8_t* buf_end_;    // end of read buffer
+  int eof_;                   // true if input is exhausted
+};
+
+// Initialize the bit reader and the boolean decoder.
+void VP8InitBitReader(VP8BitReader* const br,
+                      const uint8_t* const start, const uint8_t* const end);
+
+// Update internal pointers to displace the byte buffer by the
+// relative offset 'offset'.
+void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset);
+
+// return the next value made of 'num_bits' bits
+uint32_t VP8GetValue(VP8BitReader* const br, int num_bits);
+static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
+  return VP8GetValue(br, 1);
+}
+
+// return the next value with sign-extension.
+int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
+
+// bit_reader_inl.h will implement the following methods:
+//   static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob)
+//   static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v)
+// and should be included by the .c files that actually need them.
+// This is to avoid recompiling the whole library whenever this file is touched,
+// and also allowing platform-specific ad-hoc hacks.
+
+// -----------------------------------------------------------------------------
+// Bitreader for lossless format
+
+// maximum number of bits (inclusive) the bit-reader can handle:
+#define VP8L_MAX_NUM_BIT_READ 24
+
+#define VP8L_LBITS 64  // Number of bits prefetched.
+#define VP8L_WBITS 32  // Minimum number of bytes ready after VP8LFillBitWindow.
+
+typedef uint64_t vp8l_val_t;  // right now, this bit-reader can only use 64bit.
+
+typedef struct {
+  vp8l_val_t     val_;        // pre-fetched bits
+  const uint8_t* buf_;        // input byte buffer
+  size_t         len_;        // buffer length
+  size_t         pos_;        // byte position in buf_
+  int            bit_pos_;    // current bit-reading position in val_
+  int            eos_;        // bitstream is finished
+  int            error_;      // an error occurred (buffer overflow attempt...)
+} VP8LBitReader;
+
+void VP8LInitBitReader(VP8LBitReader* const br,
+                       const uint8_t* const start,
+                       size_t length);
+
+//  Sets a new data buffer.
+void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
+                            const uint8_t* const buffer, size_t length);
+
+// Reads the specified number of bits from read buffer.
+// Flags an error in case end_of_stream or n_bits is more than the allowed limit
+// of VP8L_MAX_NUM_BIT_READ (inclusive).
+// Flags eos_ if this read attempt is going to cross the read buffer.
+uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
+
+// Return the prefetched bits, so they can be looked up.
+static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
+  return (uint32_t)(br->val_ >> br->bit_pos_);
+}
+
+// Returns true if there was an attempt at reading bit past the end of
+// the buffer. Doesn't set br->eos_ flag.
+static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) {
+  assert(br->pos_ <= br->len_);
+  return (br->pos_ == br->len_) && (br->bit_pos_ > VP8L_LBITS);
+}
+
+// For jumping over a number of bits in the bit stream when accessed with
+// VP8LPrefetchBits and VP8LFillBitWindow.
+static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) {
+  br->bit_pos_ = val;
+  br->eos_ = VP8LIsEndOfStream(br);
+}
+
+// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
+// Speed critical, but infrequent part of the code can be non-inlined.
+extern void VP8LDoFillBitWindow(VP8LBitReader* const br);
+static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) {
+  if (br->bit_pos_ >= VP8L_WBITS) VP8LDoFillBitWindow(br);
+}
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_BIT_READER_H_ */
diff --git a/src/main/jni/libwebp/utils/bit_reader_inl.h b/src/main/jni/libwebp/utils/bit_reader_inl.h
new file mode 100644
index 000000000..81427c625
--- /dev/null
+++ b/src/main/jni/libwebp/utils/bit_reader_inl.h
@@ -0,0 +1,172 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Specific inlined methods for boolean decoder [VP8GetBit() ...]
+// This file should be included by the .c sources that actually need to call
+// these methods.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_READER_INL_H_
+#define WEBP_UTILS_BIT_READER_INL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#ifdef WEBP_FORCE_ALIGNED
+#include <string.h>  // memcpy
+#endif
+
+#include "../dsp/dsp.h"
+#include "./bit_reader.h"
+#include "./endian_inl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Derived type lbit_t = natural type for memory I/O
+
+#if   (BITS > 32)
+typedef uint64_t lbit_t;
+#elif (BITS > 16)
+typedef uint32_t lbit_t;
+#elif (BITS >  8)
+typedef uint16_t lbit_t;
+#else
+typedef uint8_t lbit_t;
+#endif
+
+extern const uint8_t kVP8Log2Range[128];
+extern const range_t kVP8NewRange[128];
+
+// special case for the tail byte-reading
+void VP8LoadFinalBytes(VP8BitReader* const br);
+
+//------------------------------------------------------------------------------
+// Inlined critical functions
+
+// makes sure br->value_ has at least BITS bits worth of data
+static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
+  assert(br != NULL && br->buf_ != NULL);
+  // Read 'BITS' bits at a time if possible.
+  if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
+    // convert memory type to register type (with some zero'ing!)
+    bit_t bits;
+#if defined(WEBP_FORCE_ALIGNED)
+    lbit_t in_bits;
+    memcpy(&in_bits, br->buf_, sizeof(in_bits));
+#elif defined(WEBP_USE_MIPS32)
+    // This is needed because of un-aligned read.
+    lbit_t in_bits;
+    lbit_t* p_buf_ = (lbit_t*)br->buf_;
+    __asm__ volatile(
+      ".set   push                             \n\t"
+      ".set   at                               \n\t"
+      ".set   macro                            \n\t"
+      "ulw    %[in_bits], 0(%[p_buf_])         \n\t"
+      ".set   pop                              \n\t"
+      : [in_bits]"=r"(in_bits)
+      : [p_buf_]"r"(p_buf_)
+      : "memory", "at"
+    );
+#else
+    const lbit_t in_bits = *(const lbit_t*)br->buf_;
+#endif
+    br->buf_ += BITS >> 3;
+#if !defined(WORDS_BIGENDIAN)
+#if (BITS > 32)
+    bits = BSwap64(in_bits);
+    bits >>= 64 - BITS;
+#elif (BITS >= 24)
+    bits = BSwap32(in_bits);
+    bits >>= (32 - BITS);
+#elif (BITS == 16)
+    bits = BSwap16(in_bits);
+#else   // BITS == 8
+    bits = (bit_t)in_bits;
+#endif  // BITS > 32
+#else    // WORDS_BIGENDIAN
+    bits = (bit_t)in_bits;
+    if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS);
+#endif
+    br->value_ = bits | (br->value_ << BITS);
+    br->bits_ += BITS;
+  } else {
+    VP8LoadFinalBytes(br);    // no need to be inlined
+  }
+}
+
+// Read a bit with proba 'prob'. Speed-critical function!
+static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
+  // Don't move this declaration! It makes a big speed difference to store
+  // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
+  // alter br->range_ value.
+  range_t range = br->range_;
+  if (br->bits_ < 0) {
+    VP8LoadNewBytes(br);
+  }
+  {
+    const int pos = br->bits_;
+    const range_t split = (range * prob) >> 8;
+    const range_t value = (range_t)(br->value_ >> pos);
+#if defined(__arm__) || defined(_M_ARM)      // ARM-specific
+    const int bit = ((int)(split - value) >> 31) & 1;
+    if (value > split) {
+      range -= split + 1;
+      br->value_ -= (bit_t)(split + 1) << pos;
+    } else {
+      range = split;
+    }
+#else  // faster version on x86
+    int bit;  // Don't use 'const int bit = (value > split);", it's slower.
+    if (value > split) {
+      range -= split + 1;
+      br->value_ -= (bit_t)(split + 1) << pos;
+      bit = 1;
+    } else {
+      range = split;
+      bit = 0;
+    }
+#endif
+    if (range <= (range_t)0x7e) {
+      const int shift = kVP8Log2Range[range];
+      range = kVP8NewRange[range];
+      br->bits_ -= shift;
+    }
+    br->range_ = range;
+    return bit;
+  }
+}
+
+// simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here)
+static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
+  if (br->bits_ < 0) {
+    VP8LoadNewBytes(br);
+  }
+  {
+    const int pos = br->bits_;
+    const range_t split = br->range_ >> 1;
+    const range_t value = (range_t)(br->value_ >> pos);
+    const int32_t mask = (int32_t)(split - value) >> 31;  // -1 or 0
+    br->bits_ -= 1;
+    br->range_ += mask;
+    br->range_ |= 1;
+    br->value_ -= (bit_t)((split + 1) & mask) << pos;
+    return (v ^ mask) - mask;
+  }
+}
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif   // WEBP_UTILS_BIT_READER_INL_H_
diff --git a/src/main/jni/libwebp/utils/bit_writer.c b/src/main/jni/libwebp/utils/bit_writer.c
new file mode 100644
index 000000000..9875ca662
--- /dev/null
+++ b/src/main/jni/libwebp/utils/bit_writer.c
@@ -0,0 +1,307 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Bit writing and boolean coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora (vikaas.arora@gmail.com)
+
+#include <assert.h>
+#include <string.h>   // for memcpy()
+#include <stdlib.h>
+
+#include "./bit_writer.h"
+#include "./endian_inl.h"
+#include "./utils.h"
+
+//------------------------------------------------------------------------------
+// VP8BitWriter
+
+static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) {
+  uint8_t* new_buf;
+  size_t new_size;
+  const uint64_t needed_size_64b = (uint64_t)bw->pos_ + extra_size;
+  const size_t needed_size = (size_t)needed_size_64b;
+  if (needed_size_64b != needed_size) {
+    bw->error_ = 1;
+    return 0;
+  }
+  if (needed_size <= bw->max_pos_) return 1;
+  // If the following line wraps over 32bit, the test just after will catch it.
+  new_size = 2 * bw->max_pos_;
+  if (new_size < needed_size) new_size = needed_size;
+  if (new_size < 1024) new_size = 1024;
+  new_buf = (uint8_t*)WebPSafeMalloc(1ULL, new_size);
+  if (new_buf == NULL) {
+    bw->error_ = 1;
+    return 0;
+  }
+  if (bw->pos_ > 0) {
+    assert(bw->buf_ != NULL);
+    memcpy(new_buf, bw->buf_, bw->pos_);
+  }
+  WebPSafeFree(bw->buf_);
+  bw->buf_ = new_buf;
+  bw->max_pos_ = new_size;
+  return 1;
+}
+
+static void Flush(VP8BitWriter* const bw) {
+  const int s = 8 + bw->nb_bits_;
+  const int32_t bits = bw->value_ >> s;
+  assert(bw->nb_bits_ >= 0);
+  bw->value_ -= bits << s;
+  bw->nb_bits_ -= 8;
+  if ((bits & 0xff) != 0xff) {
+    size_t pos = bw->pos_;
+    if (!BitWriterResize(bw, bw->run_ + 1)) {
+      return;
+    }
+    if (bits & 0x100) {  // overflow -> propagate carry over pending 0xff's
+      if (pos > 0) bw->buf_[pos - 1]++;
+    }
+    if (bw->run_ > 0) {
+      const int value = (bits & 0x100) ? 0x00 : 0xff;
+      for (; bw->run_ > 0; --bw->run_) bw->buf_[pos++] = value;
+    }
+    bw->buf_[pos++] = bits;
+    bw->pos_ = pos;
+  } else {
+    bw->run_++;   // delay writing of bytes 0xff, pending eventual carry.
+  }
+}
+
+//------------------------------------------------------------------------------
+// renormalization
+
+static const uint8_t kNorm[128] = {  // renorm_sizes[i] = 8 - log2(i)
+     7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0
+};
+
+// range = ((range + 1) << kVP8Log2Range[range]) - 1
+static const uint8_t kNewRange[128] = {
+  127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239,
+  127, 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239,
+  247, 127, 131, 135, 139, 143, 147, 151, 155, 159, 163, 167, 171, 175, 179,
+  183, 187, 191, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239,
+  243, 247, 251, 127, 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149,
+  151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, 175, 177, 179,
+  181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, 205, 207, 209,
+  211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239,
+  241, 243, 245, 247, 249, 251, 253, 127
+};
+
+int VP8PutBit(VP8BitWriter* const bw, int bit, int prob) {
+  const int split = (bw->range_ * prob) >> 8;
+  if (bit) {
+    bw->value_ += split + 1;
+    bw->range_ -= split + 1;
+  } else {
+    bw->range_ = split;
+  }
+  if (bw->range_ < 127) {   // emit 'shift' bits out and renormalize
+    const int shift = kNorm[bw->range_];
+    bw->range_ = kNewRange[bw->range_];
+    bw->value_ <<= shift;
+    bw->nb_bits_ += shift;
+    if (bw->nb_bits_ > 0) Flush(bw);
+  }
+  return bit;
+}
+
+int VP8PutBitUniform(VP8BitWriter* const bw, int bit) {
+  const int split = bw->range_ >> 1;
+  if (bit) {
+    bw->value_ += split + 1;
+    bw->range_ -= split + 1;
+  } else {
+    bw->range_ = split;
+  }
+  if (bw->range_ < 127) {
+    bw->range_ = kNewRange[bw->range_];
+    bw->value_ <<= 1;
+    bw->nb_bits_ += 1;
+    if (bw->nb_bits_ > 0) Flush(bw);
+  }
+  return bit;
+}
+
+void VP8PutValue(VP8BitWriter* const bw, int value, int nb_bits) {
+  int mask;
+  for (mask = 1 << (nb_bits - 1); mask; mask >>= 1)
+    VP8PutBitUniform(bw, value & mask);
+}
+
+void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits) {
+  if (!VP8PutBitUniform(bw, value != 0))
+    return;
+  if (value < 0) {
+    VP8PutValue(bw, ((-value) << 1) | 1, nb_bits + 1);
+  } else {
+    VP8PutValue(bw, value << 1, nb_bits + 1);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size) {
+  bw->range_   = 255 - 1;
+  bw->value_   = 0;
+  bw->run_     = 0;
+  bw->nb_bits_ = -8;
+  bw->pos_     = 0;
+  bw->max_pos_ = 0;
+  bw->error_   = 0;
+  bw->buf_     = NULL;
+  return (expected_size > 0) ? BitWriterResize(bw, expected_size) : 1;
+}
+
+uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw) {
+  VP8PutValue(bw, 0, 9 - bw->nb_bits_);
+  bw->nb_bits_ = 0;   // pad with zeroes
+  Flush(bw);
+  return bw->buf_;
+}
+
+int VP8BitWriterAppend(VP8BitWriter* const bw,
+                       const uint8_t* data, size_t size) {
+  assert(data != NULL);
+  if (bw->nb_bits_ != -8) return 0;   // Flush() must have been called
+  if (!BitWriterResize(bw, size)) return 0;
+  memcpy(bw->buf_ + bw->pos_, data, size);
+  bw->pos_ += size;
+  return 1;
+}
+
+void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
+  if (bw != NULL) {
+    WebPSafeFree(bw->buf_);
+    memset(bw, 0, sizeof(*bw));
+  }
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitWriter
+
+// This is the minimum amount of size the memory buffer is guaranteed to grow
+// when extra space is needed.
+#define MIN_EXTRA_SIZE  (32768ULL)
+
+#define VP8L_WRITER_BYTES ((int)sizeof(vp8l_wtype_t))
+#define VP8L_WRITER_BITS (VP8L_WRITER_BYTES * 8)
+#define VP8L_WRITER_MAX_BITS (8 * (int)sizeof(vp8l_atype_t))
+
+// Returns 1 on success.
+static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
+  uint8_t* allocated_buf;
+  size_t allocated_size;
+  const size_t max_bytes = bw->end_ - bw->buf_;
+  const size_t current_size = bw->cur_ - bw->buf_;
+  const uint64_t size_required_64b = (uint64_t)current_size + extra_size;
+  const size_t size_required = (size_t)size_required_64b;
+  if (size_required != size_required_64b) {
+    bw->error_ = 1;
+    return 0;
+  }
+  if (max_bytes > 0 && size_required <= max_bytes) return 1;
+  allocated_size = (3 * max_bytes) >> 1;
+  if (allocated_size < size_required) allocated_size = size_required;
+  // make allocated size multiple of 1k
+  allocated_size = (((allocated_size >> 10) + 1) << 10);
+  allocated_buf = (uint8_t*)WebPSafeMalloc(1ULL, allocated_size);
+  if (allocated_buf == NULL) {
+    bw->error_ = 1;
+    return 0;
+  }
+  if (current_size > 0) {
+    memcpy(allocated_buf, bw->buf_, current_size);
+  }
+  WebPSafeFree(bw->buf_);
+  bw->buf_ = allocated_buf;
+  bw->cur_ = bw->buf_ + current_size;
+  bw->end_ = bw->buf_ + allocated_size;
+  return 1;
+}
+
+int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size) {
+  memset(bw, 0, sizeof(*bw));
+  return VP8LBitWriterResize(bw, expected_size);
+}
+
+void VP8LBitWriterDestroy(VP8LBitWriter* const bw) {
+  if (bw != NULL) {
+    WebPSafeFree(bw->buf_);
+    memset(bw, 0, sizeof(*bw));
+  }
+}
+
+void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) {
+  assert(n_bits <= 32);
+  // That's the max we can handle:
+  assert(bw->used_ + n_bits <= 2 * VP8L_WRITER_MAX_BITS);
+  if (n_bits > 0) {
+    // Local field copy.
+    vp8l_atype_t lbits = bw->bits_;
+    int used = bw->used_;
+    // Special case of overflow handling for 32bit accumulator (2-steps flush).
+    if (VP8L_WRITER_BITS == 16) {
+      if (used + n_bits >= VP8L_WRITER_MAX_BITS) {
+        // Fill up all the VP8L_WRITER_MAX_BITS so it can be flushed out below.
+        const int shift = VP8L_WRITER_MAX_BITS - used;
+        lbits |= (vp8l_atype_t)bits << used;
+        used = VP8L_WRITER_MAX_BITS;
+        n_bits -= shift;
+        bits >>= shift;
+        assert(n_bits <= VP8L_WRITER_MAX_BITS);
+      }
+    }
+    // If needed, make some room by flushing some bits out.
+    while (used >= VP8L_WRITER_BITS) {
+      if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
+        const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
+        if (extra_size != (size_t)extra_size ||
+            !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+          bw->cur_ = bw->buf_;
+          bw->error_ = 1;
+          return;
+        }
+      }
+      *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)lbits);
+      bw->cur_ += VP8L_WRITER_BYTES;
+      lbits >>= VP8L_WRITER_BITS;
+      used -= VP8L_WRITER_BITS;
+    }
+    // Eventually, insert new bits.
+    bw->bits_ = lbits | ((vp8l_atype_t)bits << used);
+    bw->used_ = used + n_bits;
+  }
+}
+
+uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
+  // flush leftover bits
+  if (VP8LBitWriterResize(bw, (bw->used_ + 7) >> 3)) {
+    while (bw->used_ > 0) {
+      *bw->cur_++ = (uint8_t)bw->bits_;
+      bw->bits_ >>= 8;
+      bw->used_ -= 8;
+    }
+    bw->used_ = 0;
+  }
+  return bw->buf_;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/utils/bit_writer.h b/src/main/jni/libwebp/utils/bit_writer.h
new file mode 100644
index 000000000..c80d22ae9
--- /dev/null
+++ b/src/main/jni/libwebp/utils/bit_writer.h
@@ -0,0 +1,120 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Bit writing and boolean coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_WRITER_H_
+#define WEBP_UTILS_BIT_WRITER_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Bit-writing
+
+typedef struct VP8BitWriter VP8BitWriter;
+struct VP8BitWriter {
+  int32_t  range_;      // range-1
+  int32_t  value_;
+  int      run_;        // number of outstanding bits
+  int      nb_bits_;    // number of pending bits
+  uint8_t* buf_;        // internal buffer. Re-allocated regularly. Not owned.
+  size_t   pos_;
+  size_t   max_pos_;
+  int      error_;      // true in case of error
+};
+
+// Initialize the object. Allocates some initial memory based on expected_size.
+int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size);
+// Finalize the bitstream coding. Returns a pointer to the internal buffer.
+uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw);
+// Release any pending memory and zeroes the object. Not a mandatory call.
+// Only useful in case of error, when the internal buffer hasn't been grabbed!
+void VP8BitWriterWipeOut(VP8BitWriter* const bw);
+
+int VP8PutBit(VP8BitWriter* const bw, int bit, int prob);
+int VP8PutBitUniform(VP8BitWriter* const bw, int bit);
+void VP8PutValue(VP8BitWriter* const bw, int value, int nb_bits);
+void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits);
+
+// Appends some bytes to the internal buffer. Data is copied.
+int VP8BitWriterAppend(VP8BitWriter* const bw,
+                       const uint8_t* data, size_t size);
+
+// return approximate write position (in bits)
+static WEBP_INLINE uint64_t VP8BitWriterPos(const VP8BitWriter* const bw) {
+  return (uint64_t)(bw->pos_ + bw->run_) * 8 + 8 + bw->nb_bits_;
+}
+
+// Returns a pointer to the internal buffer.
+static WEBP_INLINE uint8_t* VP8BitWriterBuf(const VP8BitWriter* const bw) {
+  return bw->buf_;
+}
+// Returns the size of the internal buffer.
+static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) {
+  return bw->pos_;
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitWriter
+
+#if defined(__x86_64__) || defined(_M_X64)   // 64bit
+typedef uint64_t vp8l_atype_t;   // accumulator type
+typedef uint32_t vp8l_wtype_t;   // writing type
+#define WSWAP HToLE32
+#else
+typedef uint32_t vp8l_atype_t;
+typedef uint16_t vp8l_wtype_t;
+#define WSWAP HToLE16
+#endif
+
+typedef struct {
+  vp8l_atype_t bits_;   // bit accumulator
+  int          used_;   // number of bits used in accumulator
+  uint8_t*     buf_;    // start of buffer
+  uint8_t*     cur_;    // current write position
+  uint8_t*     end_;    // end of buffer
+
+  // After all bits are written (VP8LBitWriterFinish()), the caller must observe
+  // the state of error_. A value of 1 indicates that a memory allocation
+  // failure has happened during bit writing. A value of 0 indicates successful
+  // writing of bits.
+  int error_;
+} VP8LBitWriter;
+
+static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
+  return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3);
+}
+
+uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw);
+
+// Returns 0 in case of memory allocation error.
+int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
+
+void VP8LBitWriterDestroy(VP8LBitWriter* const bw);
+
+// This function writes bits into bytes in increasing addresses (little endian),
+// and within a byte least-significant-bit first.
+// This function can write up to 32 bits in one go, but VP8LBitReader can only
+// read 24 bits max (VP8L_MAX_NUM_BIT_READ).
+// VP8LBitWriter's error_ flag is set in case of  memory allocation error.
+void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_BIT_WRITER_H_ */
diff --git a/src/main/jni/libwebp/utils/color_cache.c b/src/main/jni/libwebp/utils/color_cache.c
new file mode 100644
index 000000000..8a88f08b7
--- /dev/null
+++ b/src/main/jni/libwebp/utils/color_cache.c
@@ -0,0 +1,39 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Color Cache for WebP Lossless
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./color_cache.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// VP8LColorCache.
+
+int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
+  const int hash_size = 1 << hash_bits;
+  assert(cc != NULL);
+  assert(hash_bits > 0);
+  cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size,
+                                          sizeof(*cc->colors_));
+  if (cc->colors_ == NULL) return 0;
+  cc->hash_shift_ = 32 - hash_bits;
+  return 1;
+}
+
+void VP8LColorCacheClear(VP8LColorCache* const cc) {
+  if (cc != NULL) {
+    WebPSafeFree(cc->colors_);
+    cc->colors_ = NULL;
+  }
+}
+
diff --git a/src/main/jni/libwebp/utils/color_cache.h b/src/main/jni/libwebp/utils/color_cache.h
new file mode 100644
index 000000000..0f824ed45
--- /dev/null
+++ b/src/main/jni/libwebp/utils/color_cache.h
@@ -0,0 +1,70 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Color Cache for WebP Lossless
+//
+// Authors: Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#ifndef WEBP_UTILS_COLOR_CACHE_H_
+#define WEBP_UTILS_COLOR_CACHE_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Main color cache struct.
+typedef struct {
+  uint32_t *colors_;  // color entries
+  int hash_shift_;    // Hash shift: 32 - hash_bits.
+} VP8LColorCache;
+
+static const uint32_t kHashMul = 0x1e35a7bd;
+
+static WEBP_INLINE uint32_t VP8LColorCacheLookup(
+    const VP8LColorCache* const cc, uint32_t key) {
+  assert(key <= (~0U >> cc->hash_shift_));
+  return cc->colors_[key];
+}
+
+static WEBP_INLINE void VP8LColorCacheInsert(const VP8LColorCache* const cc,
+                                             uint32_t argb) {
+  const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
+  cc->colors_[key] = argb;
+}
+
+static WEBP_INLINE int VP8LColorCacheGetIndex(const VP8LColorCache* const cc,
+                                              uint32_t argb) {
+  return (kHashMul * argb) >> cc->hash_shift_;
+}
+
+static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc,
+                                              uint32_t argb) {
+  const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
+  return cc->colors_[key] == argb;
+}
+
+//------------------------------------------------------------------------------
+
+// Initializes the color cache with 'hash_bits' bits for the keys.
+// Returns false in case of memory error.
+int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits);
+
+// Delete the memory associated to color cache.
+void VP8LColorCacheClear(VP8LColorCache* const color_cache);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBP_UTILS_COLOR_CACHE_H_
diff --git a/src/main/jni/libwebp/utils/endian_inl.h b/src/main/jni/libwebp/utils/endian_inl.h
new file mode 100644
index 000000000..cd56c37f4
--- /dev/null
+++ b/src/main/jni/libwebp/utils/endian_inl.h
@@ -0,0 +1,100 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Endian related functions.
+
+#ifndef WEBP_UTILS_ENDIAN_INL_H_
+#define WEBP_UTILS_ENDIAN_INL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include "../dsp/dsp.h"
+#include "../webp/types.h"
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) && \
+    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+#if defined(WORDS_BIGENDIAN)
+#define HToLE32 BSwap32
+#define HToLE16 BSwap16
+#else
+#define HToLE32(x) (x)
+#define HToLE16(x) (x)
+#endif
+
+#if !defined(HAVE_CONFIG_H)
+// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64
+#if LOCAL_GCC_PREREQ(4,3) || LOCAL_CLANG_PREREQ(3,3)
+#define HAVE_BUILTIN_BSWAP32
+#define HAVE_BUILTIN_BSWAP64
+#endif
+// clang-3.3 and gcc-4.8 have a builtin function for swap16
+#if LOCAL_GCC_PREREQ(4,8) || LOCAL_CLANG_PREREQ(3,3)
+#define HAVE_BUILTIN_BSWAP16
+#endif
+#endif  // !HAVE_CONFIG_H
+
+static WEBP_INLINE uint16_t BSwap16(uint16_t x) {
+#if defined(HAVE_BUILTIN_BSWAP16)
+  return __builtin_bswap16(x);
+#elif defined(_MSC_VER)
+  return _byteswap_ushort(x);
+#else
+  // gcc will recognize a 'rorw $8, ...' here:
+  return (x >> 8) | ((x & 0xff) << 8);
+#endif  // HAVE_BUILTIN_BSWAP16
+}
+
+static WEBP_INLINE uint32_t BSwap32(uint32_t x) {
+#if defined(WEBP_USE_MIPS32_R2)
+  uint32_t ret;
+  __asm__ volatile (
+    "wsbh   %[ret], %[x]          \n\t"
+    "rotr   %[ret], %[ret],  16   \n\t"
+    : [ret]"=r"(ret)
+    : [x]"r"(x)
+  );
+  return ret;
+#elif defined(HAVE_BUILTIN_BSWAP32)
+  return __builtin_bswap32(x);
+#elif defined(__i386__) || defined(__x86_64__)
+  uint32_t swapped_bytes;
+  __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x));
+  return swapped_bytes;
+#elif defined(_MSC_VER)
+  return (uint32_t)_byteswap_ulong(x);
+#else
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+#endif  // HAVE_BUILTIN_BSWAP32
+}
+
+static WEBP_INLINE uint64_t BSwap64(uint64_t x) {
+#if defined(HAVE_BUILTIN_BSWAP64)
+  return __builtin_bswap64(x);
+#elif defined(__x86_64__)
+  uint64_t swapped_bytes;
+  __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x));
+  return swapped_bytes;
+#elif defined(_MSC_VER)
+  return (uint64_t)_byteswap_uint64(x);
+#else  // generic code for swapping 64-bit values (suggested by bdb@)
+  x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32);
+  x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16);
+  x = ((x & 0xff00ff00ff00ff00ull) >>  8) | ((x & 0x00ff00ff00ff00ffull) <<  8);
+  return x;
+#endif  // HAVE_BUILTIN_BSWAP64
+}
+
+#endif  // WEBP_UTILS_ENDIAN_INL_H_
diff --git a/src/main/jni/libwebp/utils/filters.c b/src/main/jni/libwebp/utils/filters.c
new file mode 100644
index 000000000..2d15bd0e4
--- /dev/null
+++ b/src/main/jni/libwebp/utils/filters.c
@@ -0,0 +1,266 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#include "./filters.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out)                                                 \
+  assert(in != NULL);                                                          \
+  assert(out != NULL);                                                         \
+  assert(width > 0);                                                           \
+  assert(height > 0);                                                          \
+  assert(stride >= width);                                                     \
+  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
+  (void)height;  // Silence unused warning.
+
+static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
+                                    uint8_t* dst, int length, int inverse) {
+  int i;
+  if (inverse) {
+    for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
+  } else {
+    for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
+  }
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
+                                           int width, int height, int stride,
+                                           int row, int num_rows,
+                                           int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Leftmost pixel is the same as input for topmost scanline.
+    out[0] = in[0];
+    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    // Leftmost pixel is predicted from above.
+    PredictLine(in, preds - stride, out, 1, inverse);
+    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+static void HorizontalFilter(const uint8_t* data, int width, int height,
+                             int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void HorizontalUnfilter(int width, int height, int stride, int row,
+                               int num_rows, uint8_t* data) {
+  DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
+                                         int width, int height, int stride,
+                                         int row, int num_rows,
+                                         int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Very first top-left pixel is copied.
+    out[0] = in[0];
+    // Rest of top scan-line is left-predicted.
+    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    in += stride;
+    out += stride;
+  } else {
+    // We are starting from in-between. Make sure 'preds' points to prev row.
+    preds -= stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    PredictLine(in, preds, out, width, inverse);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+static void VerticalFilter(const uint8_t* data, int width, int height,
+                           int stride, uint8_t* filtered_data) {
+  DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void VerticalUnfilter(int width, int height, int stride, int row,
+                             int num_rows, uint8_t* data) {
+  DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+  const int g = a + b - c;
+  return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
+}
+
+static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
+                                         int width, int height, int stride,
+                                         int row, int num_rows,
+                                         int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  // left prediction for top scan-line
+  if (row == 0) {
+    out[0] = in[0];
+    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    int w;
+    // leftmost pixel: predict from above.
+    PredictLine(in, preds - stride, out, 1, inverse);
+    for (w = 1; w < width; ++w) {
+      const int pred = GradientPredictor(preds[w - 1],
+                                         preds[w - stride],
+                                         preds[w - stride - 1]);
+      out[w] = in[w] + (inverse ? pred : -pred);
+    }
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+static void GradientFilter(const uint8_t* data, int width, int height,
+                           int stride, uint8_t* filtered_data) {
+  DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void GradientUnfilter(int width, int height, int stride, int row,
+                             int num_rows, uint8_t* data) {
+  DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+#undef SANITY_CHECK
+
+// -----------------------------------------------------------------------------
+// Quick estimate of a potentially interesting filter mode to try.
+
+#define SMAX 16
+#define SDIFF(a, b) (abs((a) - (b)) >> 4)   // Scoring diff, in [0..SMAX)
+
+WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
+                                    int width, int height, int stride) {
+  int i, j;
+  int bins[WEBP_FILTER_LAST][SMAX];
+  memset(bins, 0, sizeof(bins));
+
+  // We only sample every other pixels. That's enough.
+  for (j = 2; j < height - 1; j += 2) {
+    const uint8_t* const p = data + j * stride;
+    int mean = p[0];
+    for (i = 2; i < width - 1; i += 2) {
+      const int diff0 = SDIFF(p[i], mean);
+      const int diff1 = SDIFF(p[i], p[i - 1]);
+      const int diff2 = SDIFF(p[i], p[i - width]);
+      const int grad_pred =
+          GradientPredictor(p[i - 1], p[i - width], p[i - width - 1]);
+      const int diff3 = SDIFF(p[i], grad_pred);
+      bins[WEBP_FILTER_NONE][diff0] = 1;
+      bins[WEBP_FILTER_HORIZONTAL][diff1] = 1;
+      bins[WEBP_FILTER_VERTICAL][diff2] = 1;
+      bins[WEBP_FILTER_GRADIENT][diff3] = 1;
+      mean = (3 * mean + p[i] + 2) >> 2;
+    }
+  }
+  {
+    int filter;
+    WEBP_FILTER_TYPE best_filter = WEBP_FILTER_NONE;
+    int best_score = 0x7fffffff;
+    for (filter = WEBP_FILTER_NONE; filter < WEBP_FILTER_LAST; ++filter) {
+      int score = 0;
+      for (i = 0; i < SMAX; ++i) {
+        if (bins[filter][i] > 0) {
+          score += i;
+        }
+      }
+      if (score < best_score) {
+        best_score = score;
+        best_filter = (WEBP_FILTER_TYPE)filter;
+      }
+    }
+    return best_filter;
+  }
+}
+
+#undef SMAX
+#undef SDIFF
+
+//------------------------------------------------------------------------------
+
+const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST] = {
+  NULL,              // WEBP_FILTER_NONE
+  HorizontalFilter,  // WEBP_FILTER_HORIZONTAL
+  VerticalFilter,    // WEBP_FILTER_VERTICAL
+  GradientFilter     // WEBP_FILTER_GRADIENT
+};
+
+const WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST] = {
+  NULL,                // WEBP_FILTER_NONE
+  HorizontalUnfilter,  // WEBP_FILTER_HORIZONTAL
+  VerticalUnfilter,    // WEBP_FILTER_VERTICAL
+  GradientUnfilter     // WEBP_FILTER_GRADIENT
+};
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/utils/filters.h b/src/main/jni/libwebp/utils/filters.h
new file mode 100644
index 000000000..dde39cb5c
--- /dev/null
+++ b/src/main/jni/libwebp/utils/filters.h
@@ -0,0 +1,59 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_UTILS_FILTERS_H_
+#define WEBP_UTILS_FILTERS_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Filters.
+typedef enum {
+  WEBP_FILTER_NONE = 0,
+  WEBP_FILTER_HORIZONTAL,
+  WEBP_FILTER_VERTICAL,
+  WEBP_FILTER_GRADIENT,
+  WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1,  // end marker
+  WEBP_FILTER_BEST,
+  WEBP_FILTER_FAST
+} WEBP_FILTER_TYPE;
+
+typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
+                               int stride, uint8_t* out);
+typedef void (*WebPUnfilterFunc)(int width, int height, int stride,
+                                 int row, int num_rows, uint8_t* data);
+
+// Filter the given data using the given predictor.
+// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
+// in raster order.
+// 'stride' is number of bytes per scan line (with possible padding).
+// 'out' should be pre-allocated.
+extern const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+
+// In-place reconstruct the original data from the given filtered data.
+// The reconstruction will be done for 'num_rows' rows starting from 'row'
+// (assuming rows upto 'row - 1' are already reconstructed).
+extern const WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+// Fast estimate of a potentially good filter.
+WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
+                                    int width, int height, int stride);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_FILTERS_H_ */
diff --git a/src/main/jni/libwebp/utils/huffman.c b/src/main/jni/libwebp/utils/huffman.c
new file mode 100644
index 000000000..c4c16d9e6
--- /dev/null
+++ b/src/main/jni/libwebp/utils/huffman.c
@@ -0,0 +1,319 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for building and looking up Huffman trees.
+//
+// Author: Urvang Joshi (urvang@google.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./huffman.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+// Uncomment the following to use look-up table for ReverseBits()
+// (might be faster on some platform)
+// #define USE_LUT_REVERSE_BITS
+
+// Huffman data read via DecodeImageStream is represented in two (red and green)
+// bytes.
+#define MAX_HTREE_GROUPS    0x10000
+#define NON_EXISTENT_SYMBOL (-1)
+
+static void TreeNodeInit(HuffmanTreeNode* const node) {
+  node->children_ = -1;   // means: 'unassigned so far'
+}
+
+static int NodeIsEmpty(const HuffmanTreeNode* const node) {
+  return (node->children_ < 0);
+}
+
+static int IsFull(const HuffmanTree* const tree) {
+  return (tree->num_nodes_ == tree->max_nodes_);
+}
+
+static void AssignChildren(HuffmanTree* const tree,
+                           HuffmanTreeNode* const node) {
+  HuffmanTreeNode* const children = tree->root_ + tree->num_nodes_;
+  node->children_ = (int)(children - node);
+  assert(children - node == (int)(children - node));
+  tree->num_nodes_ += 2;
+  TreeNodeInit(children + 0);
+  TreeNodeInit(children + 1);
+}
+
+// A Huffman tree is a full binary tree; and in a full binary tree with L
+// leaves, the total number of nodes N = 2 * L - 1.
+static int HuffmanTreeMaxNodes(int num_leaves) {
+  return (2 * num_leaves - 1);
+}
+
+static int HuffmanTreeAllocate(HuffmanTree* const tree, int num_nodes) {
+  assert(tree != NULL);
+  tree->root_ =
+      (HuffmanTreeNode*)WebPSafeMalloc(num_nodes, sizeof(*tree->root_));
+  return (tree->root_ != NULL);
+}
+
+static int TreeInit(HuffmanTree* const tree, int num_leaves) {
+  assert(tree != NULL);
+  if (num_leaves == 0) return 0;
+  tree->max_nodes_ = HuffmanTreeMaxNodes(num_leaves);
+  assert(tree->max_nodes_ < (1 << 16));   // limit for the lut_jump_ table
+  if (!HuffmanTreeAllocate(tree, tree->max_nodes_)) return 0;
+  TreeNodeInit(tree->root_);  // Initialize root.
+  tree->num_nodes_ = 1;
+  memset(tree->lut_bits_, 255, sizeof(tree->lut_bits_));
+  memset(tree->lut_jump_, 0, sizeof(tree->lut_jump_));
+  return 1;
+}
+
+void VP8LHuffmanTreeFree(HuffmanTree* const tree) {
+  if (tree != NULL) {
+    WebPSafeFree(tree->root_);
+    tree->root_ = NULL;
+    tree->max_nodes_ = 0;
+    tree->num_nodes_ = 0;
+  }
+}
+
+HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups) {
+  HTreeGroup* const htree_groups =
+      (HTreeGroup*)WebPSafeCalloc(num_htree_groups, sizeof(*htree_groups));
+  assert(num_htree_groups <= MAX_HTREE_GROUPS);
+  if (htree_groups == NULL) {
+    return NULL;
+  }
+  return htree_groups;
+}
+
+void VP8LHtreeGroupsFree(HTreeGroup* htree_groups, int num_htree_groups) {
+  if (htree_groups != NULL) {
+    int i, j;
+    for (i = 0; i < num_htree_groups; ++i) {
+      HuffmanTree* const htrees = htree_groups[i].htrees_;
+      for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+        VP8LHuffmanTreeFree(&htrees[j]);
+      }
+    }
+    WebPSafeFree(htree_groups);
+  }
+}
+
+int VP8LHuffmanCodeLengthsToCodes(
+    const int* const code_lengths, int code_lengths_size,
+    int* const huff_codes) {
+  int symbol;
+  int code_len;
+  int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+  int curr_code;
+  int next_codes[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+  int max_code_length = 0;
+
+  assert(code_lengths != NULL);
+  assert(code_lengths_size > 0);
+  assert(huff_codes != NULL);
+
+  // Calculate max code length.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > max_code_length) {
+      max_code_length = code_lengths[symbol];
+    }
+  }
+  if (max_code_length > MAX_ALLOWED_CODE_LENGTH) return 0;
+
+  // Calculate code length histogram.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    ++code_length_hist[code_lengths[symbol]];
+  }
+  code_length_hist[0] = 0;
+
+  // Calculate the initial values of 'next_codes' for each code length.
+  // next_codes[code_len] denotes the code to be assigned to the next symbol
+  // of code length 'code_len'.
+  curr_code = 0;
+  next_codes[0] = -1;  // Unused, as code length = 0 implies code doesn't exist.
+  for (code_len = 1; code_len <= max_code_length; ++code_len) {
+    curr_code = (curr_code + code_length_hist[code_len - 1]) << 1;
+    next_codes[code_len] = curr_code;
+  }
+
+  // Get symbols.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > 0) {
+      huff_codes[symbol] = next_codes[code_lengths[symbol]]++;
+    } else {
+      huff_codes[symbol] = NON_EXISTENT_SYMBOL;
+    }
+  }
+  return 1;
+}
+
+#ifndef USE_LUT_REVERSE_BITS
+
+static int ReverseBitsShort(int bits, int num_bits) {
+  int retval = 0;
+  int i;
+  assert(num_bits <= 8);   // Not a hard requirement, just for coherency.
+  for (i = 0; i < num_bits; ++i) {
+    retval <<= 1;
+    retval |= bits & 1;
+    bits >>= 1;
+  }
+  return retval;
+}
+
+#else
+
+static const uint8_t kReversedBits[16] = {  // Pre-reversed 4-bit values.
+  0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+  0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+};
+
+static int ReverseBitsShort(int bits, int num_bits) {
+  const uint8_t v = (kReversedBits[bits & 0xf] << 4) | kReversedBits[bits >> 4];
+  assert(num_bits <= 8);
+  return v >> (8 - num_bits);
+}
+
+#endif
+
+static int TreeAddSymbol(HuffmanTree* const tree,
+                         int symbol, int code, int code_length) {
+  int step = HUFF_LUT_BITS;
+  int base_code;
+  HuffmanTreeNode* node = tree->root_;
+  const HuffmanTreeNode* const max_node = tree->root_ + tree->max_nodes_;
+  assert(symbol == (int16_t)symbol);
+  if (code_length <= HUFF_LUT_BITS) {
+    int i;
+    base_code = ReverseBitsShort(code, code_length);
+    for (i = 0; i < (1 << (HUFF_LUT_BITS - code_length)); ++i) {
+      const int idx = base_code | (i << code_length);
+      tree->lut_symbol_[idx] = (int16_t)symbol;
+      tree->lut_bits_[idx] = code_length;
+    }
+  } else {
+    base_code = ReverseBitsShort((code >> (code_length - HUFF_LUT_BITS)),
+                                 HUFF_LUT_BITS);
+  }
+  while (code_length-- > 0) {
+    if (node >= max_node) {
+      return 0;
+    }
+    if (NodeIsEmpty(node)) {
+      if (IsFull(tree)) return 0;    // error: too many symbols.
+      AssignChildren(tree, node);
+    } else if (!HuffmanTreeNodeIsNotLeaf(node)) {
+      return 0;  // leaf is already occupied.
+    }
+    node += node->children_ + ((code >> code_length) & 1);
+    if (--step == 0) {
+      tree->lut_jump_[base_code] = (int16_t)(node - tree->root_);
+    }
+  }
+  if (NodeIsEmpty(node)) {
+    node->children_ = 0;      // turn newly created node into a leaf.
+  } else if (HuffmanTreeNodeIsNotLeaf(node)) {
+    return 0;   // trying to assign a symbol to already used code.
+  }
+  node->symbol_ = symbol;  // Add symbol in this node.
+  return 1;
+}
+
+int VP8LHuffmanTreeBuildImplicit(HuffmanTree* const tree,
+                                 const int* const code_lengths,
+                                 int* const codes,
+                                 int code_lengths_size) {
+  int symbol;
+  int num_symbols = 0;
+  int root_symbol = 0;
+
+  assert(tree != NULL);
+  assert(code_lengths != NULL);
+
+  // Find out number of symbols and the root symbol.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > 0) {
+      // Note: code length = 0 indicates non-existent symbol.
+      ++num_symbols;
+      root_symbol = symbol;
+    }
+  }
+
+  // Initialize the tree. Will fail for num_symbols = 0
+  if (!TreeInit(tree, num_symbols)) return 0;
+
+  // Build tree.
+  if (num_symbols == 1) {  // Trivial case.
+    const int max_symbol = code_lengths_size;
+    if (root_symbol < 0 || root_symbol >= max_symbol) {
+      VP8LHuffmanTreeFree(tree);
+      return 0;
+    }
+    return TreeAddSymbol(tree, root_symbol, 0, 0);
+  } else {  // Normal case.
+    int ok = 0;
+    memset(codes, 0, code_lengths_size * sizeof(*codes));
+
+    if (!VP8LHuffmanCodeLengthsToCodes(code_lengths, code_lengths_size,
+                                       codes)) {
+      goto End;
+    }
+
+    // Add symbols one-by-one.
+    for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+      if (code_lengths[symbol] > 0) {
+        if (!TreeAddSymbol(tree, symbol, codes[symbol],
+                           code_lengths[symbol])) {
+          goto End;
+        }
+      }
+    }
+    ok = 1;
+ End:
+    ok = ok && IsFull(tree);
+    if (!ok) VP8LHuffmanTreeFree(tree);
+    return ok;
+  }
+}
+
+int VP8LHuffmanTreeBuildExplicit(HuffmanTree* const tree,
+                                 const int* const code_lengths,
+                                 const int* const codes,
+                                 const int* const symbols, int max_symbol,
+                                 int num_symbols) {
+  int ok = 0;
+  int i;
+  assert(tree != NULL);
+  assert(code_lengths != NULL);
+  assert(codes != NULL);
+  assert(symbols != NULL);
+
+  // Initialize the tree. Will fail if num_symbols = 0.
+  if (!TreeInit(tree, num_symbols)) return 0;
+
+  // Add symbols one-by-one.
+  for (i = 0; i < num_symbols; ++i) {
+    if (codes[i] != NON_EXISTENT_SYMBOL) {
+      if (symbols[i] < 0 || symbols[i] >= max_symbol) {
+        goto End;
+      }
+      if (!TreeAddSymbol(tree, symbols[i], codes[i], code_lengths[i])) {
+        goto End;
+      }
+    }
+  }
+  ok = 1;
+ End:
+  ok = ok && IsFull(tree);
+  if (!ok) VP8LHuffmanTreeFree(tree);
+  return ok;
+}
diff --git a/src/main/jni/libwebp/utils/huffman.h b/src/main/jni/libwebp/utils/huffman.h
new file mode 100644
index 000000000..624bc1750
--- /dev/null
+++ b/src/main/jni/libwebp/utils/huffman.h
@@ -0,0 +1,102 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for building and looking up Huffman trees.
+//
+// Author: Urvang Joshi (urvang@google.com)
+
+#ifndef WEBP_UTILS_HUFFMAN_H_
+#define WEBP_UTILS_HUFFMAN_H_
+
+#include <assert.h>
+#include "../webp/format_constants.h"
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A node of a Huffman tree.
+typedef struct {
+  int symbol_;
+  int children_;  // delta offset to both children (contiguous) or 0 if leaf.
+} HuffmanTreeNode;
+
+// Huffman Tree.
+#define HUFF_LUT_BITS 7
+#define HUFF_LUT (1U << HUFF_LUT_BITS)
+typedef struct HuffmanTree HuffmanTree;
+struct HuffmanTree {
+  // Fast lookup for short bit lengths.
+  uint8_t lut_bits_[HUFF_LUT];
+  int16_t lut_symbol_[HUFF_LUT];
+  int16_t lut_jump_[HUFF_LUT];
+  // Complete tree for lookups.
+  HuffmanTreeNode* root_;   // all the nodes, starting at root.
+  int max_nodes_;           // max number of nodes
+  int num_nodes_;           // number of currently occupied nodes
+};
+
+// Huffman Tree group.
+typedef struct HTreeGroup HTreeGroup;
+struct HTreeGroup {
+  HuffmanTree htrees_[HUFFMAN_CODES_PER_META_CODE];
+};
+
+// Returns true if the given node is not a leaf of the Huffman tree.
+static WEBP_INLINE int HuffmanTreeNodeIsNotLeaf(
+    const HuffmanTreeNode* const node) {
+  return node->children_;
+}
+
+// Go down one level. Most critical function. 'right_child' must be 0 or 1.
+static WEBP_INLINE const HuffmanTreeNode* HuffmanTreeNextNode(
+    const HuffmanTreeNode* node, int right_child) {
+  return node + node->children_ + right_child;
+}
+
+// Releases the nodes of the Huffman tree.
+// Note: It does NOT free 'tree' itself.
+void VP8LHuffmanTreeFree(HuffmanTree* const tree);
+
+// Creates the instance of HTreeGroup with specified number of tree-groups.
+HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups);
+
+// Releases the memory allocated for HTreeGroup.
+void VP8LHtreeGroupsFree(HTreeGroup* htree_groups, int num_htree_groups);
+
+// Builds Huffman tree assuming code lengths are implicitly in symbol order.
+// The 'huff_codes' and 'code_lengths' are pre-allocated temporary memory
+// buffers, used for creating the huffman tree.
+// Returns false in case of error (invalid tree or memory error).
+int VP8LHuffmanTreeBuildImplicit(HuffmanTree* const tree,
+                                 const int* const code_lengths,
+                                 int* const huff_codes,
+                                 int code_lengths_size);
+
+// Build a Huffman tree with explicitly given lists of code lengths, codes
+// and symbols. Verifies that all symbols added are smaller than max_symbol.
+// Returns false in case of an invalid symbol, invalid tree or memory error.
+int VP8LHuffmanTreeBuildExplicit(HuffmanTree* const tree,
+                                 const int* const code_lengths,
+                                 const int* const codes,
+                                 const int* const symbols, int max_symbol,
+                                 int num_symbols);
+
+// Utility: converts Huffman code lengths to corresponding Huffman codes.
+// 'huff_codes' should be pre-allocated.
+// Returns false in case of error (memory allocation, invalid codes).
+int VP8LHuffmanCodeLengthsToCodes(const int* const code_lengths,
+                                  int code_lengths_size, int* const huff_codes);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  // WEBP_UTILS_HUFFMAN_H_
diff --git a/src/main/jni/libwebp/utils/huffman_encode.c b/src/main/jni/libwebp/utils/huffman_encode.c
new file mode 100644
index 000000000..6421c2bee
--- /dev/null
+++ b/src/main/jni/libwebp/utils/huffman_encode.c
@@ -0,0 +1,417 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Entropy encoding (Huffman) for webp lossless.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./huffman_encode.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"
+
+// -----------------------------------------------------------------------------
+// Util function to optimize the symbol map for RLE coding
+
+// Heuristics for selecting the stride ranges to collapse.
+static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
+  return abs(a - b) < 4;
+}
+
+// Change the population counts in a way that the consequent
+// Huffman tree compression, especially its RLE-part, give smaller output.
+static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle,
+                                  uint32_t* const counts) {
+  // 1) Let's make the Huffman code more compatible with rle encoding.
+  int i;
+  for (; length >= 0; --length) {
+    if (length == 0) {
+      return;  // All zeros.
+    }
+    if (counts[length - 1] != 0) {
+      // Now counts[0..length - 1] does not have trailing zeros.
+      break;
+    }
+  }
+  // 2) Let's mark all population counts that already can be encoded
+  // with an rle code.
+  {
+    // Let's not spoil any of the existing good rle codes.
+    // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+    // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+    uint32_t symbol = counts[0];
+    int stride = 0;
+    for (i = 0; i < length + 1; ++i) {
+      if (i == length || counts[i] != symbol) {
+        if ((symbol == 0 && stride >= 5) ||
+            (symbol != 0 && stride >= 7)) {
+          int k;
+          for (k = 0; k < stride; ++k) {
+            good_for_rle[i - k - 1] = 1;
+          }
+        }
+        stride = 1;
+        if (i != length) {
+          symbol = counts[i];
+        }
+      } else {
+        ++stride;
+      }
+    }
+  }
+  // 3) Let's replace those population counts that lead to more rle codes.
+  {
+    uint32_t stride = 0;
+    uint32_t limit = counts[0];
+    uint32_t sum = 0;
+    for (i = 0; i < length + 1; ++i) {
+      if (i == length || good_for_rle[i] ||
+          (i != 0 && good_for_rle[i - 1]) ||
+          !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
+        if (stride >= 4 || (stride >= 3 && sum == 0)) {
+          uint32_t k;
+          // The stride must end, collapse what we have, if we have enough (4).
+          uint32_t count = (sum + stride / 2) / stride;
+          if (count < 1) {
+            count = 1;
+          }
+          if (sum == 0) {
+            // Don't make an all zeros stride to be upgraded to ones.
+            count = 0;
+          }
+          for (k = 0; k < stride; ++k) {
+            // We don't want to change value at counts[i],
+            // that is already belonging to the next stride. Thus - 1.
+            counts[i - k - 1] = count;
+          }
+        }
+        stride = 0;
+        sum = 0;
+        if (i < length - 3) {
+          // All interesting strides have a count of at least 4,
+          // at least when non-zeros.
+          limit = (counts[i] + counts[i + 1] +
+                   counts[i + 2] + counts[i + 3] + 2) / 4;
+        } else if (i < length) {
+          limit = counts[i];
+        } else {
+          limit = 0;
+        }
+      }
+      ++stride;
+      if (i != length) {
+        sum += counts[i];
+        if (stride >= 4) {
+          limit = (sum + stride / 2) / stride;
+        }
+      }
+    }
+  }
+}
+
+// A comparer function for two Huffman trees: sorts first by 'total count'
+// (more comes first), and then by 'value' (more comes first).
+static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
+  const HuffmanTree* const t1 = (const HuffmanTree*)ptr1;
+  const HuffmanTree* const t2 = (const HuffmanTree*)ptr2;
+  if (t1->total_count_ > t2->total_count_) {
+    return -1;
+  } else if (t1->total_count_ < t2->total_count_) {
+    return 1;
+  } else {
+    assert(t1->value_ != t2->value_);
+    return (t1->value_ < t2->value_) ? -1 : 1;
+  }
+}
+
+static void SetBitDepths(const HuffmanTree* const tree,
+                         const HuffmanTree* const pool,
+                         uint8_t* const bit_depths, int level) {
+  if (tree->pool_index_left_ >= 0) {
+    SetBitDepths(&pool[tree->pool_index_left_], pool, bit_depths, level + 1);
+    SetBitDepths(&pool[tree->pool_index_right_], pool, bit_depths, level + 1);
+  } else {
+    bit_depths[tree->value_] = level;
+  }
+}
+
+// Create an optimal Huffman tree.
+//
+// (data,length): population counts.
+// tree_limit: maximum bit depth (inclusive) of the codes.
+// bit_depths[]: how many bits are used for the symbol.
+//
+// Returns 0 when an error has occurred.
+//
+// The catch here is that the tree cannot be arbitrarily deep
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+static void GenerateOptimalTree(const uint32_t* const histogram,
+                                int histogram_size,
+                                HuffmanTree* tree, int tree_depth_limit,
+                                uint8_t* const bit_depths) {
+  uint32_t count_min;
+  HuffmanTree* tree_pool;
+  int tree_size_orig = 0;
+  int i;
+
+  for (i = 0; i < histogram_size; ++i) {
+    if (histogram[i] != 0) {
+      ++tree_size_orig;
+    }
+  }
+
+  if (tree_size_orig == 0) {   // pretty optimal already!
+    return;
+  }
+
+  tree_pool = tree + tree_size_orig;
+
+  // For block sizes with less than 64k symbols we never need to do a
+  // second iteration of this loop.
+  // If we actually start running inside this loop a lot, we would perhaps
+  // be better off with the Katajainen algorithm.
+  assert(tree_size_orig <= (1 << (tree_depth_limit - 1)));
+  for (count_min = 1; ; count_min *= 2) {
+    int tree_size = tree_size_orig;
+    // We need to pack the Huffman tree in tree_depth_limit bits.
+    // So, we try by faking histogram entries to be at least 'count_min'.
+    int idx = 0;
+    int j;
+    for (j = 0; j < histogram_size; ++j) {
+      if (histogram[j] != 0) {
+        const uint32_t count =
+            (histogram[j] < count_min) ? count_min : histogram[j];
+        tree[idx].total_count_ = count;
+        tree[idx].value_ = j;
+        tree[idx].pool_index_left_ = -1;
+        tree[idx].pool_index_right_ = -1;
+        ++idx;
+      }
+    }
+
+    // Build the Huffman tree.
+    qsort(tree, tree_size, sizeof(*tree), CompareHuffmanTrees);
+
+    if (tree_size > 1) {  // Normal case.
+      int tree_pool_size = 0;
+      while (tree_size > 1) {  // Finish when we have only one root.
+        uint32_t count;
+        tree_pool[tree_pool_size++] = tree[tree_size - 1];
+        tree_pool[tree_pool_size++] = tree[tree_size - 2];
+        count = tree_pool[tree_pool_size - 1].total_count_ +
+                tree_pool[tree_pool_size - 2].total_count_;
+        tree_size -= 2;
+        {
+          // Search for the insertion point.
+          int k;
+          for (k = 0; k < tree_size; ++k) {
+            if (tree[k].total_count_ <= count) {
+              break;
+            }
+          }
+          memmove(tree + (k + 1), tree + k, (tree_size - k) * sizeof(*tree));
+          tree[k].total_count_ = count;
+          tree[k].value_ = -1;
+
+          tree[k].pool_index_left_ = tree_pool_size - 1;
+          tree[k].pool_index_right_ = tree_pool_size - 2;
+          tree_size = tree_size + 1;
+        }
+      }
+      SetBitDepths(&tree[0], tree_pool, bit_depths, 0);
+    } else if (tree_size == 1) {  // Trivial case: only one element.
+      bit_depths[tree[0].value_] = 1;
+    }
+
+    {
+      // Test if this Huffman tree satisfies our 'tree_depth_limit' criteria.
+      int max_depth = bit_depths[0];
+      for (j = 1; j < histogram_size; ++j) {
+        if (max_depth < bit_depths[j]) {
+          max_depth = bit_depths[j];
+        }
+      }
+      if (max_depth <= tree_depth_limit) {
+        break;
+      }
+    }
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Coding of the Huffman tree values
+
+static HuffmanTreeToken* CodeRepeatedValues(int repetitions,
+                                            HuffmanTreeToken* tokens,
+                                            int value, int prev_value) {
+  assert(value <= MAX_ALLOWED_CODE_LENGTH);
+  if (value != prev_value) {
+    tokens->code = value;
+    tokens->extra_bits = 0;
+    ++tokens;
+    --repetitions;
+  }
+  while (repetitions >= 1) {
+    if (repetitions < 3) {
+      int i;
+      for (i = 0; i < repetitions; ++i) {
+        tokens->code = value;
+        tokens->extra_bits = 0;
+        ++tokens;
+      }
+      break;
+    } else if (repetitions < 7) {
+      tokens->code = 16;
+      tokens->extra_bits = repetitions - 3;
+      ++tokens;
+      break;
+    } else {
+      tokens->code = 16;
+      tokens->extra_bits = 3;
+      ++tokens;
+      repetitions -= 6;
+    }
+  }
+  return tokens;
+}
+
+static HuffmanTreeToken* CodeRepeatedZeros(int repetitions,
+                                           HuffmanTreeToken* tokens) {
+  while (repetitions >= 1) {
+    if (repetitions < 3) {
+      int i;
+      for (i = 0; i < repetitions; ++i) {
+        tokens->code = 0;   // 0-value
+        tokens->extra_bits = 0;
+        ++tokens;
+      }
+      break;
+    } else if (repetitions < 11) {
+      tokens->code = 17;
+      tokens->extra_bits = repetitions - 3;
+      ++tokens;
+      break;
+    } else if (repetitions < 139) {
+      tokens->code = 18;
+      tokens->extra_bits = repetitions - 11;
+      ++tokens;
+      break;
+    } else {
+      tokens->code = 18;
+      tokens->extra_bits = 0x7f;  // 138 repeated 0s
+      ++tokens;
+      repetitions -= 138;
+    }
+  }
+  return tokens;
+}
+
+int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
+                                    HuffmanTreeToken* tokens, int max_tokens) {
+  HuffmanTreeToken* const starting_token = tokens;
+  HuffmanTreeToken* const ending_token = tokens + max_tokens;
+  const int depth_size = tree->num_symbols;
+  int prev_value = 8;  // 8 is the initial value for rle.
+  int i = 0;
+  assert(tokens != NULL);
+  while (i < depth_size) {
+    const int value = tree->code_lengths[i];
+    int k = i + 1;
+    int runs;
+    while (k < depth_size && tree->code_lengths[k] == value) ++k;
+    runs = k - i;
+    if (value == 0) {
+      tokens = CodeRepeatedZeros(runs, tokens);
+    } else {
+      tokens = CodeRepeatedValues(runs, tokens, value, prev_value);
+      prev_value = value;
+    }
+    i += runs;
+    assert(tokens <= ending_token);
+  }
+  (void)ending_token;    // suppress 'unused variable' warning
+  return (int)(tokens - starting_token);
+}
+
+// -----------------------------------------------------------------------------
+
+// Pre-reversed 4-bit values.
+static const uint8_t kReversedBits[16] = {
+  0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+  0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+};
+
+static uint32_t ReverseBits(int num_bits, uint32_t bits) {
+  uint32_t retval = 0;
+  int i = 0;
+  while (i < num_bits) {
+    i += 4;
+    retval |= kReversedBits[bits & 0xf] << (MAX_ALLOWED_CODE_LENGTH + 1 - i);
+    bits >>= 4;
+  }
+  retval >>= (MAX_ALLOWED_CODE_LENGTH + 1 - num_bits);
+  return retval;
+}
+
+// Get the actual bit values for a tree of bit depths.
+static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
+  // 0 bit-depth means that the symbol does not exist.
+  int i;
+  int len;
+  uint32_t next_code[MAX_ALLOWED_CODE_LENGTH + 1];
+  int depth_count[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+
+  assert(tree != NULL);
+  len = tree->num_symbols;
+  for (i = 0; i < len; ++i) {
+    const int code_length = tree->code_lengths[i];
+    assert(code_length <= MAX_ALLOWED_CODE_LENGTH);
+    ++depth_count[code_length];
+  }
+  depth_count[0] = 0;  // ignore unused symbol
+  next_code[0] = 0;
+  {
+    uint32_t code = 0;
+    for (i = 1; i <= MAX_ALLOWED_CODE_LENGTH; ++i) {
+      code = (code + depth_count[i - 1]) << 1;
+      next_code[i] = code;
+    }
+  }
+  for (i = 0; i < len; ++i) {
+    const int code_length = tree->code_lengths[i];
+    tree->codes[i] = ReverseBits(code_length, next_code[code_length]++);
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Main entry point
+
+void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
+                           uint8_t* const buf_rle,
+                           HuffmanTree* const huff_tree,
+                           HuffmanTreeCode* const huff_code) {
+  const int num_symbols = huff_code->num_symbols;
+  memset(buf_rle, 0, num_symbols * sizeof(*buf_rle));
+  OptimizeHuffmanForRle(num_symbols, buf_rle, histogram);
+  GenerateOptimalTree(histogram, num_symbols, huff_tree, tree_depth_limit,
+                      huff_code->code_lengths);
+  // Create the actual bit codes for the bit lengths.
+  ConvertBitDepthsToSymbols(huff_code);
+}
diff --git a/src/main/jni/libwebp/utils/huffman_encode.h b/src/main/jni/libwebp/utils/huffman_encode.h
new file mode 100644
index 000000000..91aa18f46
--- /dev/null
+++ b/src/main/jni/libwebp/utils/huffman_encode.h
@@ -0,0 +1,61 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Entropy encoding (Huffman) for webp lossless
+
+#ifndef WEBP_UTILS_HUFFMAN_ENCODE_H_
+#define WEBP_UTILS_HUFFMAN_ENCODE_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Struct for holding the tree header in coded form.
+typedef struct {
+  uint8_t code;         // value (0..15) or escape code (16,17,18)
+  uint8_t extra_bits;   // extra bits for escape codes
+} HuffmanTreeToken;
+
+// Struct to represent the tree codes (depth and bits array).
+typedef struct {
+  int       num_symbols;   // Number of symbols.
+  uint8_t*  code_lengths;  // Code lengths of the symbols.
+  uint16_t* codes;         // Symbol Codes.
+} HuffmanTreeCode;
+
+// Struct to represent the Huffman tree.
+// TODO(vikasa): Add comment for the fields of the Struct.
+typedef struct {
+  uint32_t total_count_;
+  int value_;
+  int pool_index_left_;    // Index for the left sub-tree.
+  int pool_index_right_;   // Index for the right sub-tree.
+} HuffmanTree;
+
+// Turn the Huffman tree into a token sequence.
+// Returns the number of tokens used.
+int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
+                                    HuffmanTreeToken* tokens, int max_tokens);
+
+// Create an optimized tree, and tokenize it.
+// 'buf_rle' and 'huff_tree' are pre-allocated and the 'tree' is the constructed
+// huffman code tree.
+void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
+                           uint8_t* const buf_rle, HuffmanTree* const huff_tree,
+                           HuffmanTreeCode* const tree);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBP_UTILS_HUFFMAN_ENCODE_H_
diff --git a/src/main/jni/libwebp/utils/quant_levels.c b/src/main/jni/libwebp/utils/quant_levels.c
new file mode 100644
index 000000000..d7c8aab92
--- /dev/null
+++ b/src/main/jni/libwebp/utils/quant_levels.c
@@ -0,0 +1,140 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Quantize levels for specified number of quantization-levels ([2, 256]).
+// Min and max values are preserved (usual 0 and 255 for alpha plane).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "./quant_levels.h"
+
+#define NUM_SYMBOLS     256
+
+#define MAX_ITER  6             // Maximum number of convergence steps.
+#define ERROR_THRESHOLD 1e-4    // MSE stopping criterion.
+
+// -----------------------------------------------------------------------------
+// Quantize levels.
+
+int QuantizeLevels(uint8_t* const data, int width, int height,
+                   int num_levels, uint64_t* const sse) {
+  int freq[NUM_SYMBOLS] = { 0 };
+  int q_level[NUM_SYMBOLS] = { 0 };
+  double inv_q_level[NUM_SYMBOLS] = { 0 };
+  int min_s = 255, max_s = 0;
+  const size_t data_size = height * width;
+  int i, num_levels_in, iter;
+  double last_err = 1.e38, err = 0.;
+  const double err_threshold = ERROR_THRESHOLD * data_size;
+
+  if (data == NULL) {
+    return 0;
+  }
+
+  if (width <= 0 || height <= 0) {
+    return 0;
+  }
+
+  if (num_levels < 2 || num_levels > 256) {
+    return 0;
+  }
+
+  {
+    size_t n;
+    num_levels_in = 0;
+    for (n = 0; n < data_size; ++n) {
+      num_levels_in += (freq[data[n]] == 0);
+      if (min_s > data[n]) min_s = data[n];
+      if (max_s < data[n]) max_s = data[n];
+      ++freq[data[n]];
+    }
+  }
+
+  if (num_levels_in <= num_levels) goto End;  // nothing to do!
+
+  // Start with uniformly spread centroids.
+  for (i = 0; i < num_levels; ++i) {
+    inv_q_level[i] = min_s + (double)(max_s - min_s) * i / (num_levels - 1);
+  }
+
+  // Fixed values. Won't be changed.
+  q_level[min_s] = 0;
+  q_level[max_s] = num_levels - 1;
+  assert(inv_q_level[0] == min_s);
+  assert(inv_q_level[num_levels - 1] == max_s);
+
+  // k-Means iterations.
+  for (iter = 0; iter < MAX_ITER; ++iter) {
+    double q_sum[NUM_SYMBOLS] = { 0 };
+    double q_count[NUM_SYMBOLS] = { 0 };
+    int s, slot = 0;
+
+    // Assign classes to representatives.
+    for (s = min_s; s <= max_s; ++s) {
+      // Keep track of the nearest neighbour 'slot'
+      while (slot < num_levels - 1 &&
+             2 * s > inv_q_level[slot] + inv_q_level[slot + 1]) {
+        ++slot;
+      }
+      if (freq[s] > 0) {
+        q_sum[slot] += s * freq[s];
+        q_count[slot] += freq[s];
+      }
+      q_level[s] = slot;
+    }
+
+    // Assign new representatives to classes.
+    if (num_levels > 2) {
+      for (slot = 1; slot < num_levels - 1; ++slot) {
+        const double count = q_count[slot];
+        if (count > 0.) {
+          inv_q_level[slot] = q_sum[slot] / count;
+        }
+      }
+    }
+
+    // Compute convergence error.
+    err = 0.;
+    for (s = min_s; s <= max_s; ++s) {
+      const double error = s - inv_q_level[q_level[s]];
+      err += freq[s] * error * error;
+    }
+
+    // Check for convergence: we stop as soon as the error is no
+    // longer improving.
+    if (last_err - err < err_threshold) break;
+    last_err = err;
+  }
+
+  // Remap the alpha plane to quantized values.
+  {
+    // double->int rounding operation can be costly, so we do it
+    // once for all before remapping. We also perform the data[] -> slot
+    // mapping, while at it (avoid one indirection in the final loop).
+    uint8_t map[NUM_SYMBOLS];
+    int s;
+    size_t n;
+    for (s = min_s; s <= max_s; ++s) {
+      const int slot = q_level[s];
+      map[s] = (uint8_t)(inv_q_level[slot] + .5);
+    }
+    // Final pass.
+    for (n = 0; n < data_size; ++n) {
+      data[n] = map[data[n]];
+    }
+  }
+ End:
+  // Store sum of squared error if needed.
+  if (sse != NULL) *sse = (uint64_t)err;
+
+  return 1;
+}
+
diff --git a/src/main/jni/libwebp/utils/quant_levels.h b/src/main/jni/libwebp/utils/quant_levels.h
new file mode 100644
index 000000000..1cb5a32ca
--- /dev/null
+++ b/src/main/jni/libwebp/utils/quant_levels.h
@@ -0,0 +1,36 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha plane quantization utility
+//
+// Author:  Vikas Arora (vikasa@google.com)
+
+#ifndef WEBP_UTILS_QUANT_LEVELS_H_
+#define WEBP_UTILS_QUANT_LEVELS_H_
+
+#include <stdlib.h>
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Replace the input 'data' of size 'width'x'height' with 'num-levels'
+// quantized values. If not NULL, 'sse' will contain the sum of squared error.
+// Valid range for 'num_levels' is [2, 256].
+// Returns false in case of error (data is NULL, or parameters are invalid).
+int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
+                   uint64_t* const sse);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_QUANT_LEVELS_H_ */
diff --git a/src/main/jni/libwebp/utils/quant_levels_dec.c b/src/main/jni/libwebp/utils/quant_levels_dec.c
new file mode 100644
index 000000000..5b8b8b49e
--- /dev/null
+++ b/src/main/jni/libwebp/utils/quant_levels_dec.c
@@ -0,0 +1,279 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Implement gradient smoothing: we replace a current alpha value by its
+// surrounding average if it's close enough (that is: the change will be less
+// than the minimum distance between two quantized level).
+// We use sliding window for computing the 2d moving average.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./quant_levels_dec.h"
+
+#include <string.h>   // for memset
+
+#include "./utils.h"
+
+// #define USE_DITHERING   // uncomment to enable ordered dithering (not vital)
+
+#define FIX 16     // fix-point precision for averaging
+#define LFIX 2     // extra precision for look-up table
+#define LUT_SIZE ((1 << (8 + LFIX)) - 1)  // look-up table size
+
+#if defined(USE_DITHERING)
+
+#define DFIX 4           // extra precision for ordered dithering
+#define DSIZE 4          // dithering size (must be a power of two)
+// cf. http://en.wikipedia.org/wiki/Ordered_dithering
+static const uint8_t kOrderedDither[DSIZE][DSIZE] = {
+  {  0,  8,  2, 10 },     // coefficients are in DFIX fixed-point precision
+  { 12,  4, 14,  6 },
+  {  3, 11,  1,  9 },
+  { 15,  7, 13,  5 }
+};
+
+#else
+#define DFIX 0
+#endif
+
+typedef struct {
+  int width_, height_;  // dimension
+  int row_;             // current input row being processed
+  uint8_t* src_;        // input pointer
+  uint8_t* dst_;        // output pointer
+
+  int radius_;          // filter radius (=delay)
+  int scale_;           // normalization factor, in FIX bits precision
+
+  void* mem_;           // all memory
+
+  // various scratch buffers
+  uint16_t* start_;
+  uint16_t* cur_;
+  uint16_t* end_;
+  uint16_t* top_;
+  uint16_t* average_;
+
+  // input levels distribution
+  int num_levels_;       // number of quantized levels
+  int min_, max_;        // min and max level values
+  int min_level_dist_;   // smallest distance between two consecutive levels
+
+  int16_t* correction_;  // size = 1 + 2*LUT_SIZE  -> ~4k memory
+} SmoothParams;
+
+//------------------------------------------------------------------------------
+
+#define CLIP_MASK (int)(~0U << (8 + DFIX))
+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & CLIP_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u;
+}
+
+// vertical accumulation
+static void VFilter(SmoothParams* const p) {
+  const uint8_t* src = p->src_;
+  const int w = p->width_;
+  uint16_t* const cur = p->cur_;
+  const uint16_t* const top = p->top_;
+  uint16_t* const out = p->end_;
+  uint16_t sum = 0;               // all arithmetic is modulo 16bit
+  int x;
+
+  for (x = 0; x < w; ++x) {
+    uint16_t new_value;
+    sum += src[x];
+    new_value = top[x] + sum;
+    out[x] = new_value - cur[x];  // vertical sum of 'r' pixels.
+    cur[x] = new_value;
+  }
+  // move input pointers one row down
+  p->top_ = p->cur_;
+  p->cur_ += w;
+  if (p->cur_ == p->end_) p->cur_ = p->start_;  // roll-over
+  // We replicate edges, as it's somewhat easier as a boundary condition.
+  // That's why we don't update the 'src' pointer on top/bottom area:
+  if (p->row_ >= 0 && p->row_ < p->height_ - 1) {
+    p->src_ += p->width_;
+  }
+}
+
+// horizontal accumulation. We use mirror replication of missing pixels, as it's
+// a little easier to implement (surprisingly).
+static void HFilter(SmoothParams* const p) {
+  const uint16_t* const in = p->end_;
+  uint16_t* const out = p->average_;
+  const uint32_t scale = p->scale_;
+  const int w = p->width_;
+  const int r = p->radius_;
+
+  int x;
+  for (x = 0; x <= r; ++x) {   // left mirroring
+    const uint16_t delta = in[x + r - 1] + in[r - x];
+    out[x] = (delta * scale) >> FIX;
+  }
+  for (; x < w - r; ++x) {     // bulk middle run
+    const uint16_t delta = in[x + r] - in[x - r - 1];
+    out[x] = (delta * scale) >> FIX;
+  }
+  for (; x < w; ++x) {         // right mirroring
+    const uint16_t delta =
+        2 * in[w - 1] - in[2 * w - 2 - r - x] - in[x - r - 1];
+    out[x] = (delta * scale) >> FIX;
+  }
+}
+
+// emit one filtered output row
+static void ApplyFilter(SmoothParams* const p) {
+  const uint16_t* const average = p->average_;
+  const int w = p->width_;
+  const int16_t* const correction = p->correction_;
+#if defined(USE_DITHERING)
+  const uint8_t* const dither = kOrderedDither[p->row_ % DSIZE];
+#endif
+  uint8_t* const dst = p->dst_;
+  int x;
+  for (x = 0; x < w; ++x) {
+    const int v = dst[x];
+    if (v < p->max_ && v > p->min_) {
+      const int c = (v << DFIX) + correction[average[x] - (v << LFIX)];
+#if defined(USE_DITHERING)
+      dst[x] = clip_8b(c + dither[x % DSIZE]);
+#else
+      dst[x] = clip_8b(c);
+#endif
+    }
+  }
+  p->dst_ += w;  // advance output pointer
+}
+
+//------------------------------------------------------------------------------
+// Initialize correction table
+
+static void InitCorrectionLUT(int16_t* const lut, int min_dist) {
+  // The correction curve is:
+  //   f(x) = x for x <= threshold2
+  //   f(x) = 0 for x >= threshold1
+  // and a linear interpolation for range x=[threshold2, threshold1]
+  // (along with f(-x) = -f(x) symmetry).
+  // Note that: threshold2 = 3/4 * threshold1
+  const int threshold1 = min_dist << LFIX;
+  const int threshold2 = (3 * threshold1) >> 2;
+  const int max_threshold = threshold2 << DFIX;
+  const int delta = threshold1 - threshold2;
+  int i;
+  for (i = 1; i <= LUT_SIZE; ++i) {
+    int c = (i <= threshold2) ? (i << DFIX)
+          : (i < threshold1) ? max_threshold * (threshold1 - i) / delta
+          : 0;
+    c >>= LFIX;
+    lut[+i] = +c;
+    lut[-i] = -c;
+  }
+  lut[0] = 0;
+}
+
+static void CountLevels(const uint8_t* const data, int size,
+                        SmoothParams* const p) {
+  int i, last_level;
+  uint8_t used_levels[256] = { 0 };
+  p->min_ = 255;
+  p->max_ = 0;
+  for (i = 0; i < size; ++i) {
+    const int v = data[i];
+    if (v < p->min_) p->min_ = v;
+    if (v > p->max_) p->max_ = v;
+    used_levels[v] = 1;
+  }
+  // Compute the mininum distance between two non-zero levels.
+  p->min_level_dist_ = p->max_ - p->min_;
+  last_level = -1;
+  for (i = 0; i < 256; ++i) {
+    if (used_levels[i]) {
+      ++p->num_levels_;
+      if (last_level >= 0) {
+        const int level_dist = i - last_level;
+        if (level_dist < p->min_level_dist_) {
+          p->min_level_dist_ = level_dist;
+        }
+      }
+      last_level = i;
+    }
+  }
+}
+
+// Initialize all params.
+static int InitParams(uint8_t* const data, int width, int height,
+                      int radius, SmoothParams* const p) {
+  const int R = 2 * radius + 1;  // total size of the kernel
+
+  const size_t size_scratch_m = (R + 1) * width * sizeof(*p->start_);
+  const size_t size_m =  width * sizeof(*p->average_);
+  const size_t size_lut = (1 + 2 * LUT_SIZE) * sizeof(*p->correction_);
+  const size_t total_size = size_scratch_m + size_m + size_lut;
+  uint8_t* mem = (uint8_t*)WebPSafeMalloc(1U, total_size);
+
+  if (mem == NULL) return 0;
+  p->mem_ = (void*)mem;
+
+  p->start_ = (uint16_t*)mem;
+  p->cur_ = p->start_;
+  p->end_ = p->start_ + R * width;
+  p->top_ = p->end_ - width;
+  memset(p->top_, 0, width * sizeof(*p->top_));
+  mem += size_scratch_m;
+
+  p->average_ = (uint16_t*)mem;
+  mem += size_m;
+
+  p->width_ = width;
+  p->height_ = height;
+  p->src_ = data;
+  p->dst_ = data;
+  p->radius_ = radius;
+  p->scale_ = (1 << (FIX + LFIX)) / (R * R);  // normalization constant
+  p->row_ = -radius;
+
+  // analyze the input distribution so we can best-fit the threshold
+  CountLevels(data, width * height, p);
+
+  // correction table
+  p->correction_ = ((int16_t*)mem) + LUT_SIZE;
+  InitCorrectionLUT(p->correction_, p->min_level_dist_);
+
+  return 1;
+}
+
+static void CleanupParams(SmoothParams* const p) {
+  WebPSafeFree(p->mem_);
+}
+
+int WebPDequantizeLevels(uint8_t* const data, int width, int height,
+                         int strength) {
+  const int radius = 4 * strength / 100;
+  if (strength < 0 || strength > 100) return 0;
+  if (data == NULL || width <= 0 || height <= 0) return 0;  // bad params
+  if (radius > 0) {
+    SmoothParams p;
+    memset(&p, 0, sizeof(p));
+    if (!InitParams(data, width, height, radius, &p)) return 0;
+    if (p.num_levels_ > 2) {
+      for (; p.row_ < p.height_; ++p.row_) {
+        VFilter(&p);  // accumulate average of input
+        // Need to wait few rows in order to prime the filter,
+        // before emitting some output.
+        if (p.row_ >= p.radius_) {
+          HFilter(&p);
+          ApplyFilter(&p);
+        }
+      }
+    }
+    CleanupParams(&p);
+  }
+  return 1;
+}
diff --git a/src/main/jni/libwebp/utils/quant_levels_dec.h b/src/main/jni/libwebp/utils/quant_levels_dec.h
new file mode 100644
index 000000000..9aab06807
--- /dev/null
+++ b/src/main/jni/libwebp/utils/quant_levels_dec.h
@@ -0,0 +1,35 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha plane de-quantization utility
+//
+// Author:  Vikas Arora (vikasa@google.com)
+
+#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_H_
+#define WEBP_UTILS_QUANT_LEVELS_DEC_H_
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Apply post-processing to input 'data' of size 'width'x'height' assuming that
+// the source was quantized to a reduced number of levels.
+// Strength is in [0..100] and controls the amount of dithering applied.
+// Returns false in case of error (data is NULL, invalid parameters,
+// malloc failure, ...).
+int WebPDequantizeLevels(uint8_t* const data, int width, int height,
+                         int strength);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_QUANT_LEVELS_DEC_H_ */
diff --git a/src/main/jni/libwebp/utils/random.c b/src/main/jni/libwebp/utils/random.c
new file mode 100644
index 000000000..24e96ad64
--- /dev/null
+++ b/src/main/jni/libwebp/utils/random.c
@@ -0,0 +1,43 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Pseudo-random utilities
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <string.h>
+#include "./random.h"
+
+//------------------------------------------------------------------------------
+
+// 31b-range values
+static const uint32_t kRandomTable[VP8_RANDOM_TABLE_SIZE] = {
+  0x0de15230, 0x03b31886, 0x775faccb, 0x1c88626a, 0x68385c55, 0x14b3b828,
+  0x4a85fef8, 0x49ddb84b, 0x64fcf397, 0x5c550289, 0x4a290000, 0x0d7ec1da,
+  0x5940b7ab, 0x5492577d, 0x4e19ca72, 0x38d38c69, 0x0c01ee65, 0x32a1755f,
+  0x5437f652, 0x5abb2c32, 0x0faa57b1, 0x73f533e7, 0x685feeda, 0x7563cce2,
+  0x6e990e83, 0x4730a7ed, 0x4fc0d9c6, 0x496b153c, 0x4f1403fa, 0x541afb0c,
+  0x73990b32, 0x26d7cb1c, 0x6fcc3706, 0x2cbb77d8, 0x75762f2a, 0x6425ccdd,
+  0x24b35461, 0x0a7d8715, 0x220414a8, 0x141ebf67, 0x56b41583, 0x73e502e3,
+  0x44cab16f, 0x28264d42, 0x73baaefb, 0x0a50ebed, 0x1d6ab6fb, 0x0d3ad40b,
+  0x35db3b68, 0x2b081e83, 0x77ce6b95, 0x5181e5f0, 0x78853bbc, 0x009f9494,
+  0x27e5ed3c
+};
+
+void VP8InitRandom(VP8Random* const rg, float dithering) {
+  memcpy(rg->tab_, kRandomTable, sizeof(rg->tab_));
+  rg->index1_ = 0;
+  rg->index2_ = 31;
+  rg->amp_ = (dithering < 0.0) ? 0
+           : (dithering > 1.0) ? (1 << VP8_RANDOM_DITHER_FIX)
+           : (uint32_t)((1 << VP8_RANDOM_DITHER_FIX) * dithering);
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/src/main/jni/libwebp/utils/random.h b/src/main/jni/libwebp/utils/random.h
new file mode 100644
index 000000000..c392a615c
--- /dev/null
+++ b/src/main/jni/libwebp/utils/random.h
@@ -0,0 +1,63 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Pseudo-random utilities
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_RANDOM_H_
+#define WEBP_UTILS_RANDOM_H_
+
+#include <assert.h>
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VP8_RANDOM_DITHER_FIX 8   // fixed-point precision for dithering
+#define VP8_RANDOM_TABLE_SIZE 55
+
+typedef struct {
+  int index1_, index2_;
+  uint32_t tab_[VP8_RANDOM_TABLE_SIZE];
+  int amp_;
+} VP8Random;
+
+// Initializes random generator with an amplitude 'dithering' in range [0..1].
+void VP8InitRandom(VP8Random* const rg, float dithering);
+
+// Returns a centered pseudo-random number with 'num_bits' amplitude.
+// (uses D.Knuth's Difference-based random generator).
+// 'amp' is in VP8_RANDOM_DITHER_FIX fixed-point precision.
+static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits,
+                                      int amp) {
+  int diff;
+  assert(num_bits + VP8_RANDOM_DITHER_FIX <= 31);
+  diff = rg->tab_[rg->index1_] - rg->tab_[rg->index2_];
+  if (diff < 0) diff += (1u << 31);
+  rg->tab_[rg->index1_] = diff;
+  if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0;
+  if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0;
+  // sign-extend, 0-center
+  diff = (int)((uint32_t)diff << 1) >> (32 - num_bits);
+  diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX;  // restrict range
+  diff += 1 << (num_bits - 1);                   // shift back to 0.5-center
+  return diff;
+}
+
+static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
+  return VP8RandomBits2(rg, num_bits, rg->amp_);
+}
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_RANDOM_H_ */
diff --git a/src/main/jni/libwebp/utils/rescaler.c b/src/main/jni/libwebp/utils/rescaler.c
new file mode 100644
index 000000000..fad9c6b0e
--- /dev/null
+++ b/src/main/jni/libwebp/utils/rescaler.c
@@ -0,0 +1,333 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./rescaler.h"
+#include "../dsp/dsp.h"
+
+//------------------------------------------------------------------------------
+// Implementations of critical functions ImportRow / ExportRow
+
+void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
+                              const uint8_t* const src, int channel) = NULL;
+void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL;
+
+#define RFIX 30
+#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
+
+static void ImportRowC(WebPRescaler* const wrk,
+                       const uint8_t* const src, int channel) {
+  const int x_stride = wrk->num_channels;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  int x_in = channel;
+  int x_out;
+  int accum = 0;
+  if (!wrk->x_expand) {
+    int sum = 0;
+    for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+      accum += wrk->x_add;
+      for (; accum > 0; accum -= wrk->x_sub) {
+        sum += src[x_in];
+        x_in += x_stride;
+      }
+      {        // Emit next horizontal pixel.
+        const int32_t base = src[x_in];
+        const int32_t frac = base * (-accum);
+        x_in += x_stride;
+        wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
+        // fresh fractional start for next pixel
+        sum = (int)MULT_FIX(frac, wrk->fx_scale);
+      }
+    }
+  } else {        // simple bilinear interpolation
+    int left = src[channel], right = src[channel];
+    for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+      if (accum < 0) {
+        left = right;
+        x_in += x_stride;
+        right = src[x_in];
+        accum += wrk->x_add;
+      }
+      wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
+      accum -= wrk->x_sub;
+    }
+  }
+  // Accumulate the contribution of the new row.
+  for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+    wrk->irow[x_out] += wrk->frow[x_out];
+  }
+}
+
+static void ExportRowC(WebPRescaler* const wrk, int x_out) {
+  if (wrk->y_accum <= 0) {
+    uint8_t* const dst = wrk->dst;
+    int32_t* const irow = wrk->irow;
+    const int32_t* const frow = wrk->frow;
+    const int yscale = wrk->fy_scale * (-wrk->y_accum);
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    for (; x_out < x_out_max; ++x_out) {
+      const int frac = (int)MULT_FIX(frow[x_out], yscale);
+      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+      dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+      irow[x_out] = frac;   // new fractional start
+    }
+    wrk->y_accum += wrk->y_add;
+    wrk->dst += wrk->dst_stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// MIPS version
+
+#if defined(WEBP_USE_MIPS32)
+
+static void ImportRowMIPS(WebPRescaler* const wrk,
+                          const uint8_t* const src, int channel) {
+  const int x_stride = wrk->num_channels;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const int fx_scale = wrk->fx_scale;
+  const int x_add = wrk->x_add;
+  const int x_sub = wrk->x_sub;
+  int* frow = wrk->frow + channel;
+  int* irow = wrk->irow + channel;
+  const uint8_t* src1 = src + channel;
+  int temp1, temp2, temp3;
+  int base, frac, sum;
+  int accum, accum1;
+  const int x_stride1 = x_stride << 2;
+  int loop_c = x_out_max - channel;
+
+  if (!wrk->x_expand) {
+    __asm__ volatile (
+      "li     %[temp1],   0x8000                    \n\t"
+      "li     %[temp2],   0x10000                   \n\t"
+      "li     %[sum],     0                         \n\t"
+      "li     %[accum],   0                         \n\t"
+    "1:                                             \n\t"
+      "addu   %[accum],   %[accum],   %[x_add]      \n\t"
+      "blez   %[accum],   3f                        \n\t"
+    "2:                                             \n\t"
+      "lbu    %[temp3],   0(%[src1])                \n\t"
+      "subu   %[accum],   %[accum],   %[x_sub]      \n\t"
+      "addu   %[src1],    %[src1],    %[x_stride]   \n\t"
+      "addu   %[sum],     %[sum],     %[temp3]      \n\t"
+      "bgtz   %[accum],   2b                        \n\t"
+    "3:                                             \n\t"
+      "lbu    %[base],    0(%[src1])                \n\t"
+      "addu   %[src1],    %[src1],    %[x_stride]   \n\t"
+      "negu   %[accum1],  %[accum]                  \n\t"
+      "mul    %[frac],    %[base],    %[accum1]     \n\t"
+      "addu   %[temp3],   %[sum],     %[base]       \n\t"
+      "mul    %[temp3],   %[temp3],   %[x_sub]      \n\t"
+      "lw     %[base],    0(%[irow])                \n\t"
+      "subu   %[loop_c],  %[loop_c],  %[x_stride]   \n\t"
+      "sll    %[accum1],  %[frac],    2             \n\t"
+      "mult   %[temp1],   %[temp2]                  \n\t"
+      "madd   %[accum1],  %[fx_scale]               \n\t"
+      "mfhi   %[sum]                                \n\t"
+      "subu   %[temp3],   %[temp3],   %[frac]       \n\t"
+      "sw     %[temp3],   0(%[frow])                \n\t"
+      "add    %[base],    %[base],    %[temp3]      \n\t"
+      "sw     %[base],    0(%[irow])                \n\t"
+      "addu   %[irow],    %[irow],    %[x_stride1]  \n\t"
+      "addu   %[frow],    %[frow],    %[x_stride1]  \n\t"
+      "bgtz   %[loop_c],  1b                        \n\t"
+
+      : [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3),
+        [sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac),
+        [frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1),
+        [temp2] "=&r" (temp2), [temp1] "=&r" (temp1)
+      : [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale),
+        [x_sub] "r" (x_sub), [x_add] "r" (x_add),
+        [loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1)
+      : "memory", "hi", "lo"
+    );
+  } else {
+    __asm__ volatile (
+      "lbu    %[temp1],   0(%[src1])                \n\t"
+      "move   %[temp2],   %[temp1]                  \n\t"
+      "li     %[accum],   0                         \n\t"
+    "1:                                             \n\t"
+      "bgez   %[accum],   2f                        \n\t"
+      "move   %[temp2],   %[temp1]                  \n\t"
+      "addu   %[src1],    %[x_stride]               \n\t"
+      "lbu    %[temp1],   0(%[src1])                \n\t"
+      "addu   %[accum],   %[x_add]                  \n\t"
+    "2:                                             \n\t"
+      "subu   %[temp3],   %[temp2],   %[temp1]      \n\t"
+      "mul    %[temp3],   %[temp3],   %[accum]      \n\t"
+      "mul    %[base],    %[temp1],   %[x_add]      \n\t"
+      "subu   %[accum],   %[accum],   %[x_sub]      \n\t"
+      "lw     %[frac],    0(%[irow])                \n\t"
+      "subu   %[loop_c],  %[loop_c],  %[x_stride]   \n\t"
+      "addu   %[temp3],   %[base],    %[temp3]      \n\t"
+      "sw     %[temp3],   0(%[frow])                \n\t"
+      "addu   %[frow],    %[x_stride1]              \n\t"
+      "addu   %[frac],    %[temp3]                  \n\t"
+      "sw     %[frac],    0(%[irow])                \n\t"
+      "addu   %[irow],    %[x_stride1]              \n\t"
+      "bgtz   %[loop_c],  1b                        \n\t"
+
+      : [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1),
+        [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base),
+        [frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow)
+      : [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub),
+        [x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c)
+      : "memory", "hi", "lo"
+    );
+  }
+}
+
+static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) {
+  if (wrk->y_accum <= 0) {
+    uint8_t* const dst = wrk->dst;
+    int32_t* const irow = wrk->irow;
+    const int32_t* const frow = wrk->frow;
+    const int yscale = wrk->fy_scale * (-wrk->y_accum);
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+    // if wrk->fxy_scale can fit into 32 bits use optimized code,
+    // otherwise use C code
+    if ((wrk->fxy_scale >> 32) == 0) {
+      int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;
+      const int temp2 = (int)(wrk->fxy_scale);
+      const int temp8 = x_out_max << 2;
+      uint8_t* dst_t = (uint8_t*)dst;
+      int32_t* irow_t = (int32_t*)irow;
+      const int32_t* frow_t = (const int32_t*)frow;
+
+      __asm__ volatile(
+        "addiu    %[temp6],    $zero,       -256          \n\t"
+        "addiu    %[temp7],    $zero,       255           \n\t"
+        "li       %[temp3],    0x10000                    \n\t"
+        "li       %[temp4],    0x8000                     \n\t"
+        "addu     %[loop_end], %[frow_t],   %[temp8]      \n\t"
+      "1:                                                 \n\t"
+        "lw       %[temp0],    0(%[frow_t])               \n\t"
+        "mult     %[temp3],    %[temp4]                   \n\t"
+        "addiu    %[frow_t],   %[frow_t],   4             \n\t"
+        "sll      %[temp0],    %[temp0],    2             \n\t"
+        "madd     %[temp0],    %[yscale]                  \n\t"
+        "mfhi     %[temp1]                                \n\t"
+        "lw       %[temp0],    0(%[irow_t])               \n\t"
+        "addiu    %[dst_t],    %[dst_t],    1             \n\t"
+        "addiu    %[irow_t],   %[irow_t],   4             \n\t"
+        "subu     %[temp0],    %[temp0],    %[temp1]      \n\t"
+        "mult     %[temp3],    %[temp4]                   \n\t"
+        "sll      %[temp0],    %[temp0],    2             \n\t"
+        "madd     %[temp0],    %[temp2]                   \n\t"
+        "mfhi     %[temp5]                                \n\t"
+        "sw       %[temp1],    -4(%[irow_t])              \n\t"
+        "and      %[temp0],    %[temp5],    %[temp6]      \n\t"
+        "slti     %[temp1],    %[temp5],    0             \n\t"
+        "beqz     %[temp0],    2f                         \n\t"
+        "xor      %[temp5],    %[temp5],    %[temp5]      \n\t"
+        "movz     %[temp5],    %[temp7],    %[temp1]      \n\t"
+      "2:                                                 \n\t"
+        "sb       %[temp5],    -1(%[dst_t])               \n\t"
+        "bne      %[frow_t],   %[loop_end], 1b            \n\t"
+
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+          [temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t),
+          [dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end)
+        : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)
+        : "memory", "hi", "lo"
+      );
+      wrk->y_accum += wrk->y_add;
+      wrk->dst += wrk->dst_stride;
+    } else {
+      ExportRowC(wrk, x_out);
+    }
+  }
+}
+#endif   // WEBP_USE_MIPS32
+
+//------------------------------------------------------------------------------
+
+void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
+                      uint8_t* const dst, int dst_width, int dst_height,
+                      int dst_stride, int num_channels, int x_add, int x_sub,
+                      int y_add, int y_sub, int32_t* const work) {
+  wrk->x_expand = (src_width < dst_width);
+  wrk->src_width = src_width;
+  wrk->src_height = src_height;
+  wrk->dst_width = dst_width;
+  wrk->dst_height = dst_height;
+  wrk->dst = dst;
+  wrk->dst_stride = dst_stride;
+  wrk->num_channels = num_channels;
+  // for 'x_expand', we use bilinear interpolation
+  wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
+  wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
+  wrk->y_accum = y_add;
+  wrk->y_add = y_add;
+  wrk->y_sub = y_sub;
+  wrk->fx_scale = (1 << RFIX) / x_sub;
+  wrk->fy_scale = (1 << RFIX) / y_sub;
+  wrk->fxy_scale = wrk->x_expand ?
+      ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
+      ((int64_t)dst_height << RFIX) / (x_add * src_height);
+  wrk->irow = work;
+  wrk->frow = work + num_channels * dst_width;
+
+  if (WebPRescalerImportRow == NULL) {
+    WebPRescalerImportRow = ImportRowC;
+    WebPRescalerExportRow = ExportRowC;
+    if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_MIPS32)
+      if (VP8GetCPUInfo(kMIPS32)) {
+        WebPRescalerImportRow = ImportRowMIPS;
+        WebPRescalerExportRow = ExportRowMIPS;
+      }
+#endif
+    }
+  }
+}
+
+#undef MULT_FIX
+#undef RFIX
+
+//------------------------------------------------------------------------------
+// all-in-one calls
+
+int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {
+  const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub;
+  return (num_lines > max_num_lines) ? max_num_lines : num_lines;
+}
+
+int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
+                       const uint8_t* src, int src_stride) {
+  int total_imported = 0;
+  while (total_imported < num_lines && wrk->y_accum > 0) {
+    int channel;
+    for (channel = 0; channel < wrk->num_channels; ++channel) {
+      WebPRescalerImportRow(wrk, src, channel);
+    }
+    src += src_stride;
+    ++total_imported;
+    wrk->y_accum -= wrk->y_sub;
+  }
+  return total_imported;
+}
+
+int WebPRescalerExport(WebPRescaler* const rescaler) {
+  int total_exported = 0;
+  while (WebPRescalerHasPendingOutput(rescaler)) {
+    WebPRescalerExportRow(rescaler, 0);
+    ++total_exported;
+  }
+  return total_exported;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/utils/rescaler.h b/src/main/jni/libwebp/utils/rescaler.h
new file mode 100644
index 000000000..a6f378712
--- /dev/null
+++ b/src/main/jni/libwebp/utils/rescaler.h
@@ -0,0 +1,82 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_RESCALER_H_
+#define WEBP_UTILS_RESCALER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "../webp/types.h"
+
+// Structure used for on-the-fly rescaling
+typedef struct {
+  int x_expand;               // true if we're expanding in the x direction
+  int num_channels;           // bytes to jump between pixels
+  int fy_scale, fx_scale;     // fixed-point scaling factor
+  int64_t fxy_scale;          // ''
+  // we need hpel-precise add/sub increments, for the downsampled U/V planes.
+  int y_accum;                // vertical accumulator
+  int y_add, y_sub;           // vertical increments (add ~= src, sub ~= dst)
+  int x_add, x_sub;           // horizontal increments (add ~= src, sub ~= dst)
+  int src_width, src_height;  // source dimensions
+  int dst_width, dst_height;  // destination dimensions
+  uint8_t* dst;
+  int dst_stride;
+  int32_t* irow, *frow;       // work buffer
+} WebPRescaler;
+
+// Initialize a rescaler given scratch area 'work' and dimensions of src & dst.
+void WebPRescalerInit(WebPRescaler* const rescaler,
+                      int src_width, int src_height,
+                      uint8_t* const dst,
+                      int dst_width, int dst_height, int dst_stride,
+                      int num_channels,
+                      int x_add, int x_sub,
+                      int y_add, int y_sub,
+                      int32_t* const work);
+
+// Returns the number of input lines needed next to produce one output line,
+// considering that the maximum available input lines are 'max_num_lines'.
+int WebPRescaleNeededLines(const WebPRescaler* const rescaler,
+                           int max_num_lines);
+
+// Import multiple rows over all channels, until at least one row is ready to
+// be exported. Returns the actual number of lines that were imported.
+int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows,
+                       const uint8_t* src, int src_stride);
+
+// Import a row of data and save its contribution in the rescaler.
+// 'channel' denotes the channel number to be imported.
+extern void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
+                                     const uint8_t* const src, int channel);
+// Export one row (starting at x_out position) from rescaler.
+extern void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out);
+
+// Return true if there is pending output rows ready.
+static WEBP_INLINE
+int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
+  return (rescaler->y_accum <= 0);
+}
+
+// Export as many rows as possible. Return the numbers of rows written.
+int WebPRescalerExport(WebPRescaler* const rescaler);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_RESCALER_H_ */
diff --git a/src/main/jni/libwebp/utils/thread.c b/src/main/jni/libwebp/utils/thread.c
new file mode 100644
index 000000000..264210ba2
--- /dev/null
+++ b/src/main/jni/libwebp/utils/thread.c
@@ -0,0 +1,309 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <string.h>   // for memset()
+#include "./thread.h"
+#include "./utils.h"
+
+#ifdef WEBP_USE_THREAD
+
+#if defined(_WIN32)
+
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
+typedef struct {
+  HANDLE waiting_sem_;
+  HANDLE received_sem_;
+  HANDLE signal_event_;
+} pthread_cond_t;
+
+#else  // !_WIN32
+
+#include <pthread.h>
+
+#endif  // _WIN32
+
+struct WebPWorkerImpl {
+  pthread_mutex_t mutex_;
+  pthread_cond_t  condition_;
+  pthread_t       thread_;
+};
+
+#if defined(_WIN32)
+
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+#include <process.h>
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static int pthread_create(pthread_t* const thread, const void* attr,
+                          unsigned int (__stdcall *start)(void*), void* arg) {
+  (void)attr;
+  *thread = (pthread_t)_beginthreadex(NULL,   /* void *security */
+                                      0,      /* unsigned stack_size */
+                                      start,
+                                      arg,
+                                      0,      /* unsigned initflag */
+                                      NULL);  /* unsigned *thrdaddr */
+  if (*thread == NULL) return 1;
+  SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+  return 0;
+}
+
+static int pthread_join(pthread_t thread, void** value_ptr) {
+  (void)value_ptr;
+  return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+          CloseHandle(thread) == 0);
+}
+
+// Mutex
+static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
+  (void)mutexattr;
+  InitializeCriticalSection(mutex);
+  return 0;
+}
+
+static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
+  EnterCriticalSection(mutex);
+  return 0;
+}
+
+static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
+  LeaveCriticalSection(mutex);
+  return 0;
+}
+
+static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
+  DeleteCriticalSection(mutex);
+  return 0;
+}
+
+// Condition
+static int pthread_cond_destroy(pthread_cond_t* const condition) {
+  int ok = 1;
+  ok &= (CloseHandle(condition->waiting_sem_) != 0);
+  ok &= (CloseHandle(condition->received_sem_) != 0);
+  ok &= (CloseHandle(condition->signal_event_) != 0);
+  return !ok;
+}
+
+static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
+  (void)cond_attr;
+  condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+  condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+  condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+  if (condition->waiting_sem_ == NULL ||
+      condition->received_sem_ == NULL ||
+      condition->signal_event_ == NULL) {
+    pthread_cond_destroy(condition);
+    return 1;
+  }
+  return 0;
+}
+
+static int pthread_cond_signal(pthread_cond_t* const condition) {
+  int ok = 1;
+  if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+    // a thread is waiting in pthread_cond_wait: allow it to be notified
+    ok = SetEvent(condition->signal_event_);
+    // wait until the event is consumed so the signaler cannot consume
+    // the event via its own pthread_cond_wait.
+    ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+           WAIT_OBJECT_0);
+  }
+  return !ok;
+}
+
+static int pthread_cond_wait(pthread_cond_t* const condition,
+                             pthread_mutex_t* const mutex) {
+  int ok;
+  // note that there is a consumer available so the signal isn't dropped in
+  // pthread_cond_signal
+  if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+    return 1;
+  // now unlock the mutex so pthread_cond_signal may be issued
+  pthread_mutex_unlock(mutex);
+  ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+        WAIT_OBJECT_0);
+  ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+  pthread_mutex_lock(mutex);
+  return !ok;
+}
+
+#else  // !_WIN32
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif  // _WIN32
+
+//------------------------------------------------------------------------------
+
+static void Execute(WebPWorker* const worker);  // Forward declaration.
+
+static THREADFN ThreadLoop(void* ptr) {
+  WebPWorker* const worker = (WebPWorker*)ptr;
+  int done = 0;
+  while (!done) {
+    pthread_mutex_lock(&worker->impl_->mutex_);
+    while (worker->status_ == OK) {   // wait in idling mode
+      pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+    }
+    if (worker->status_ == WORK) {
+      Execute(worker);
+      worker->status_ = OK;
+    } else if (worker->status_ == NOT_OK) {   // finish the worker
+      done = 1;
+    }
+    // signal to the main thread that we're done (for Sync())
+    pthread_cond_signal(&worker->impl_->condition_);
+    pthread_mutex_unlock(&worker->impl_->mutex_);
+  }
+  return THREAD_RETURN(NULL);    // Thread is finished
+}
+
+// main thread state control
+static void ChangeState(WebPWorker* const worker,
+                        WebPWorkerStatus new_status) {
+  // No-op when attempting to change state on a thread that didn't come up.
+  // Checking status_ without acquiring the lock first would result in a data
+  // race.
+  if (worker->impl_ == NULL) return;
+
+  pthread_mutex_lock(&worker->impl_->mutex_);
+  if (worker->status_ >= OK) {
+    // wait for the worker to finish
+    while (worker->status_ != OK) {
+      pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+    }
+    // assign new status and release the working thread if needed
+    if (new_status != OK) {
+      worker->status_ = new_status;
+      pthread_cond_signal(&worker->impl_->condition_);
+    }
+  }
+  pthread_mutex_unlock(&worker->impl_->mutex_);
+}
+
+#endif  // WEBP_USE_THREAD
+
+//------------------------------------------------------------------------------
+
+static void Init(WebPWorker* const worker) {
+  memset(worker, 0, sizeof(*worker));
+  worker->status_ = NOT_OK;
+}
+
+static int Sync(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+  ChangeState(worker, OK);
+#endif
+  assert(worker->status_ <= OK);
+  return !worker->had_error;
+}
+
+static int Reset(WebPWorker* const worker) {
+  int ok = 1;
+  worker->had_error = 0;
+  if (worker->status_ < OK) {
+#ifdef WEBP_USE_THREAD
+    worker->impl_ = (WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(*worker->impl_));
+    if (worker->impl_ == NULL) {
+      return 0;
+    }
+    if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
+      goto Error;
+    }
+    if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
+      pthread_mutex_destroy(&worker->impl_->mutex_);
+      goto Error;
+    }
+    pthread_mutex_lock(&worker->impl_->mutex_);
+    ok = !pthread_create(&worker->impl_->thread_, NULL, ThreadLoop, worker);
+    if (ok) worker->status_ = OK;
+    pthread_mutex_unlock(&worker->impl_->mutex_);
+    if (!ok) {
+      pthread_mutex_destroy(&worker->impl_->mutex_);
+      pthread_cond_destroy(&worker->impl_->condition_);
+ Error:
+      WebPSafeFree(worker->impl_);
+      worker->impl_ = NULL;
+      return 0;
+    }
+#else
+    worker->status_ = OK;
+#endif
+  } else if (worker->status_ > OK) {
+    ok = Sync(worker);
+  }
+  assert(!ok || (worker->status_ == OK));
+  return ok;
+}
+
+static void Execute(WebPWorker* const worker) {
+  if (worker->hook != NULL) {
+    worker->had_error |= !worker->hook(worker->data1, worker->data2);
+  }
+}
+
+static void Launch(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+  ChangeState(worker, WORK);
+#else
+  Execute(worker);
+#endif
+}
+
+static void End(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+  if (worker->impl_ != NULL) {
+    ChangeState(worker, NOT_OK);
+    pthread_join(worker->impl_->thread_, NULL);
+    pthread_mutex_destroy(&worker->impl_->mutex_);
+    pthread_cond_destroy(&worker->impl_->condition_);
+    WebPSafeFree(worker->impl_);
+    worker->impl_ = NULL;
+  }
+#else
+  worker->status_ = NOT_OK;
+  assert(worker->impl_ == NULL);
+#endif
+  assert(worker->status_ == NOT_OK);
+}
+
+//------------------------------------------------------------------------------
+
+static WebPWorkerInterface g_worker_interface = {
+  Init, Reset, Sync, Launch, Execute, End
+};
+
+int WebPSetWorkerInterface(const WebPWorkerInterface* const winterface) {
+  if (winterface == NULL ||
+      winterface->Init == NULL || winterface->Reset == NULL ||
+      winterface->Sync == NULL || winterface->Launch == NULL ||
+      winterface->Execute == NULL || winterface->End == NULL) {
+    return 0;
+  }
+  g_worker_interface = *winterface;
+  return 1;
+}
+
+const WebPWorkerInterface* WebPGetWorkerInterface(void) {
+  return &g_worker_interface;
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/utils/thread.h b/src/main/jni/libwebp/utils/thread.h
new file mode 100644
index 000000000..7bd451b12
--- /dev/null
+++ b/src/main/jni/libwebp/utils/thread.h
@@ -0,0 +1,93 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_THREAD_H_
+#define WEBP_UTILS_THREAD_H_
+
+#ifdef HAVE_CONFIG_H
+#include "../webp/config.h"
+#endif
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// State of the worker thread object
+typedef enum {
+  NOT_OK = 0,   // object is unusable
+  OK,           // ready to work
+  WORK          // busy finishing the current task
+} WebPWorkerStatus;
+
+// Function to be called by the worker thread. Takes two opaque pointers as
+// arguments (data1 and data2), and should return false in case of error.
+typedef int (*WebPWorkerHook)(void*, void*);
+
+// Platform-dependent implementation details for the worker.
+typedef struct WebPWorkerImpl WebPWorkerImpl;
+
+// Synchronization object used to launch job in the worker thread
+typedef struct {
+  WebPWorkerImpl* impl_;
+  WebPWorkerStatus status_;
+  WebPWorkerHook hook;    // hook to call
+  void* data1;            // first argument passed to 'hook'
+  void* data2;            // second argument passed to 'hook'
+  int had_error;          // return value of the last call to 'hook'
+} WebPWorker;
+
+// The interface for all thread-worker related functions. All these functions
+// must be implemented.
+typedef struct {
+  // Must be called first, before any other method.
+  void (*Init)(WebPWorker* const worker);
+  // Must be called to initialize the object and spawn the thread. Re-entrant.
+  // Will potentially launch the thread. Returns false in case of error.
+  int (*Reset)(WebPWorker* const worker);
+  // Makes sure the previous work is finished. Returns true if worker->had_error
+  // was not set and no error condition was triggered by the working thread.
+  int (*Sync)(WebPWorker* const worker);
+  // Triggers the thread to call hook() with data1 and data2 arguments. These
+  // hook/data1/data2 values can be changed at any time before calling this
+  // function, but not be changed afterward until the next call to Sync().
+  void (*Launch)(WebPWorker* const worker);
+  // This function is similar to Launch() except that it calls the
+  // hook directly instead of using a thread. Convenient to bypass the thread
+  // mechanism while still using the WebPWorker structs. Sync() must
+  // still be called afterward (for error reporting).
+  void (*Execute)(WebPWorker* const worker);
+  // Kill the thread and terminate the object. To use the object again, one
+  // must call Reset() again.
+  void (*End)(WebPWorker* const worker);
+} WebPWorkerInterface;
+
+// Install a new set of threading functions, overriding the defaults. This
+// should be done before any workers are started, i.e., before any encoding or
+// decoding takes place. The contents of the interface struct are copied, it
+// is safe to free the corresponding memory after this call. This function is
+// not thread-safe. Return false in case of invalid pointer or methods.
+WEBP_EXTERN(int) WebPSetWorkerInterface(
+    const WebPWorkerInterface* const interface);
+
+// Retrieve the currently set thread worker interface.
+WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_THREAD_H_ */
diff --git a/src/main/jni/libwebp/utils/utils.c b/src/main/jni/libwebp/utils/utils.c
new file mode 100644
index 000000000..8ff7f12fa
--- /dev/null
+++ b/src/main/jni/libwebp/utils/utils.c
@@ -0,0 +1,211 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./utils.h"
+
+// If PRINT_MEM_INFO is defined, extra info (like total memory used, number of
+// alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
+// and not multi-thread safe!).
+// An interesting alternative is valgrind's 'massif' tool:
+//    http://valgrind.org/docs/manual/ms-manual.html
+// Here is an example command line:
+/*    valgrind --tool=massif --massif-out-file=massif.out \
+               --stacks=yes --alloc-fn=WebPSafeAlloc --alloc-fn=WebPSafeCalloc
+      ms_print massif.out
+*/
+// In addition:
+// * if PRINT_MEM_TRAFFIC is defined, all the details of the malloc/free cycles
+//   are printed.
+// * if MALLOC_FAIL_AT is defined, the global environment variable
+//   $MALLOC_FAIL_AT is used to simulate a memory error when calloc or malloc
+//   is called for the nth time. Example usage:
+//   export MALLOC_FAIL_AT=50 && ./examples/cwebp input.png
+// * if MALLOC_LIMIT is defined, the global environment variable $MALLOC_LIMIT
+//   sets the maximum amount of memory (in bytes) made available to libwebp.
+//   This can be used to emulate environment with very limited memory.
+//   Example: export MALLOC_LIMIT=64000000 && ./examples/dwebp picture.webp
+
+// #define PRINT_MEM_INFO
+// #define PRINT_MEM_TRAFFIC
+// #define MALLOC_FAIL_AT
+// #define MALLOC_LIMIT
+
+//------------------------------------------------------------------------------
+// Checked memory allocation
+
+#if defined(PRINT_MEM_INFO)
+
+#include <stdio.h>
+#include <stdlib.h>  // for abort()
+
+static int num_malloc_calls = 0;
+static int num_calloc_calls = 0;
+static int num_free_calls = 0;
+static int countdown_to_fail = 0;     // 0 = off
+
+typedef struct MemBlock MemBlock;
+struct MemBlock {
+  void* ptr_;
+  size_t size_;
+  MemBlock* next_;
+};
+
+static MemBlock* all_blocks = NULL;
+static size_t total_mem = 0;
+static size_t total_mem_allocated = 0;
+static size_t high_water_mark = 0;
+static size_t mem_limit = 0;
+
+static int exit_registered = 0;
+
+static void PrintMemInfo(void) {
+  fprintf(stderr, "\nMEMORY INFO:\n");
+  fprintf(stderr, "num calls to: malloc = %4d\n", num_malloc_calls);
+  fprintf(stderr, "              calloc = %4d\n", num_calloc_calls);
+  fprintf(stderr, "              free   = %4d\n", num_free_calls);
+  fprintf(stderr, "total_mem: %u\n", (uint32_t)total_mem);
+  fprintf(stderr, "total_mem allocated: %u\n", (uint32_t)total_mem_allocated);
+  fprintf(stderr, "high-water mark: %u\n", (uint32_t)high_water_mark);
+  while (all_blocks != NULL) {
+    MemBlock* b = all_blocks;
+    all_blocks = b->next_;
+    free(b);
+  }
+}
+
+static void Increment(int* const v) {
+  if (!exit_registered) {
+#if defined(MALLOC_FAIL_AT)
+    {
+      const char* const malloc_fail_at_str = getenv("MALLOC_FAIL_AT");
+      if (malloc_fail_at_str != NULL) {
+        countdown_to_fail = atoi(malloc_fail_at_str);
+      }
+    }
+#endif
+#if defined(MALLOC_LIMIT)
+    {
+      const char* const malloc_limit_str = getenv("MALLOC_LIMIT");
+      if (malloc_limit_str != NULL) {
+        mem_limit = atoi(malloc_limit_str);
+      }
+    }
+#endif
+    (void)countdown_to_fail;
+    (void)mem_limit;
+    atexit(PrintMemInfo);
+    exit_registered = 1;
+  }
+  ++*v;
+}
+
+static void AddMem(void* ptr, size_t size) {
+  if (ptr != NULL) {
+    MemBlock* const b = (MemBlock*)malloc(sizeof(*b));
+    if (b == NULL) abort();
+    b->next_ = all_blocks;
+    all_blocks = b;
+    b->ptr_ = ptr;
+    b->size_ = size;
+    total_mem += size;
+    total_mem_allocated += size;
+#if defined(PRINT_MEM_TRAFFIC)
+#if defined(MALLOC_FAIL_AT)
+    fprintf(stderr, "fail-count: %5d [mem=%u]\n",
+            num_malloc_calls + num_calloc_calls, (uint32_t)total_mem);
+#else
+    fprintf(stderr, "Mem: %u (+%u)\n", (uint32_t)total_mem, (uint32_t)size);
+#endif
+#endif
+    if (total_mem > high_water_mark) high_water_mark = total_mem;
+  }
+}
+
+static void SubMem(void* ptr) {
+  if (ptr != NULL) {
+    MemBlock** b = &all_blocks;
+    // Inefficient search, but that's just for debugging.
+    while (*b != NULL && (*b)->ptr_ != ptr) b = &(*b)->next_;
+    if (*b == NULL) {
+      fprintf(stderr, "Invalid pointer free! (%p)\n", ptr);
+      abort();
+    }
+    {
+      MemBlock* const block = *b;
+      *b = block->next_;
+      total_mem -= block->size_;
+#if defined(PRINT_MEM_TRAFFIC)
+      fprintf(stderr, "Mem: %u (-%u)\n",
+              (uint32_t)total_mem, (uint32_t)block->size_);
+#endif
+      free(block);
+    }
+  }
+}
+
+#else
+#define Increment(v) do {} while (0)
+#define AddMem(p, s) do {} while (0)
+#define SubMem(p)    do {} while (0)
+#endif
+
+// Returns 0 in case of overflow of nmemb * size.
+static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
+  const uint64_t total_size = nmemb * size;
+  if (nmemb == 0) return 1;
+  if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+  if (total_size != (size_t)total_size) return 0;
+#if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT)
+  if (countdown_to_fail > 0 && --countdown_to_fail == 0) {
+    return 0;    // fake fail!
+  }
+#endif
+#if defined(MALLOC_LIMIT)
+  if (mem_limit > 0 && total_mem + total_size >= mem_limit) {
+    return 0;   // fake fail!
+  }
+#endif
+
+  return 1;
+}
+
+void* WebPSafeMalloc(uint64_t nmemb, size_t size) {
+  void* ptr;
+  Increment(&num_malloc_calls);
+  if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
+  assert(nmemb * size > 0);
+  ptr = malloc((size_t)(nmemb * size));
+  AddMem(ptr, (size_t)(nmemb * size));
+  return ptr;
+}
+
+void* WebPSafeCalloc(uint64_t nmemb, size_t size) {
+  void* ptr;
+  Increment(&num_calloc_calls);
+  if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
+  assert(nmemb * size > 0);
+  ptr = calloc((size_t)nmemb, size);
+  AddMem(ptr, (size_t)(nmemb * size));
+  return ptr;
+}
+
+void WebPSafeFree(void* const ptr) {
+  if (ptr != NULL) {
+    Increment(&num_free_calls);
+    SubMem(ptr);
+  }
+  free(ptr);
+}
+
+//------------------------------------------------------------------------------
diff --git a/src/main/jni/libwebp/utils/utils.h b/src/main/jni/libwebp/utils/utils.h
new file mode 100644
index 000000000..f2c498a9d
--- /dev/null
+++ b/src/main/jni/libwebp/utils/utils.h
@@ -0,0 +1,121 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Authors: Skal (pascal.massimino@gmail.com)
+//          Urvang (urvang@google.com)
+
+#ifndef WEBP_UTILS_UTILS_H_
+#define WEBP_UTILS_UTILS_H_
+
+#include <assert.h>
+
+#include "../webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Memory allocation
+
+// This is the maximum memory amount that libwebp will ever try to allocate.
+#define WEBP_MAX_ALLOCABLE_MEMORY (1ULL << 40)
+
+// size-checking safe malloc/calloc: verify that the requested size is not too
+// large, or return NULL. You don't need to call these for constructs like
+// malloc(sizeof(foo)), but only if there's picture-dependent size involved
+// somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
+// safe malloc() borrows the signature from calloc(), pointing at the dangerous
+// underlying multiply involved.
+WEBP_EXTERN(void*) WebPSafeMalloc(uint64_t nmemb, size_t size);
+// Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
+// in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
+WEBP_EXTERN(void*) WebPSafeCalloc(uint64_t nmemb, size_t size);
+
+// Companion deallocation function to the above allocations.
+WEBP_EXTERN(void) WebPSafeFree(void* const ptr);
+
+//------------------------------------------------------------------------------
+// Reading/writing data.
+
+// Read 16, 24 or 32 bits stored in little-endian order.
+static WEBP_INLINE int GetLE16(const uint8_t* const data) {
+  return (int)(data[0] << 0) | (data[1] << 8);
+}
+
+static WEBP_INLINE int GetLE24(const uint8_t* const data) {
+  return GetLE16(data) | (data[2] << 16);
+}
+
+static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
+  return (uint32_t)GetLE16(data) | (GetLE16(data + 2) << 16);
+}
+
+// Store 16, 24 or 32 bits in little-endian order.
+static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
+  assert(val < (1 << 16));
+  data[0] = (val >> 0);
+  data[1] = (val >> 8);
+}
+
+static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
+  assert(val < (1 << 24));
+  PutLE16(data, val & 0xffff);
+  data[2] = (val >> 16);
+}
+
+static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
+  PutLE16(data, (int)(val & 0xffff));
+  PutLE16(data + 2, (int)(val >> 16));
+}
+
+// Returns (int)floor(log2(n)). n must be > 0.
+// use GNU builtins where available.
+#if defined(__GNUC__) && \
+    ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  return 31 ^ __builtin_clz(n);
+}
+#elif defined(_MSC_VER) && _MSC_VER > 1310 && \
+      (defined(_M_X64) || defined(_M_IX86))
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  uint32_t first_set_bit;
+  _BitScanReverse(&first_set_bit, n);
+  return first_set_bit;
+}
+#else
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  int log = 0;
+  uint32_t value = n;
+  int i;
+
+  for (i = 4; i >= 0; --i) {
+    const int shift = (1 << i);
+    const uint32_t x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  return log;
+}
+#endif
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_UTILS_H_ */
diff --git a/src/main/jni/libwebp/webp/decode.h b/src/main/jni/libwebp/webp/decode.h
new file mode 100644
index 000000000..8d3f7be92
--- /dev/null
+++ b/src/main/jni/libwebp/webp/decode.h
@@ -0,0 +1,503 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Main decoding functions for WebP images.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_DECODE_H_
+#define WEBP_WEBP_DECODE_H_
+
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_DECODER_ABI_VERSION 0x0203    // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum VP8StatusCode VP8StatusCode;
+// typedef enum WEBP_CSP_MODE WEBP_CSP_MODE;
+typedef struct WebPRGBABuffer WebPRGBABuffer;
+typedef struct WebPYUVABuffer WebPYUVABuffer;
+typedef struct WebPDecBuffer WebPDecBuffer;
+typedef struct WebPIDecoder WebPIDecoder;
+typedef struct WebPBitstreamFeatures WebPBitstreamFeatures;
+typedef struct WebPDecoderOptions WebPDecoderOptions;
+typedef struct WebPDecoderConfig WebPDecoderConfig;
+
+// Return the decoder's version number, packed in hexadecimal using 8bits for
+// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetDecoderVersion(void);
+
+// Retrieve basic header information: width, height.
+// This function will also validate the header and return 0 in
+// case of formatting error.
+// Pointers 'width' and 'height' can be passed NULL if deemed irrelevant.
+WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, size_t data_size,
+                             int* width, int* height);
+
+// Decodes WebP images pointed to by 'data' and returns RGBA samples, along
+// with the dimensions in *width and *height. The ordering of samples in
+// memory is R, G, B, A, R, G, B, A... in scan order (endian-independent).
+// The returned pointer should be deleted calling free().
+// Returns NULL in case of error.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, size_t data_size,
+                                     int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning A, R, G, B, A, R, G, B... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeARGB(const uint8_t* data, size_t data_size,
+                                     int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning B, G, R, A, B, G, R, A... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRA(const uint8_t* data, size_t data_size,
+                                     int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning R, G, B, R, G, B... ordered data.
+// If the bitstream contains transparency, it is ignored.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGB(const uint8_t* data, size_t data_size,
+                                    int* width, int* height);
+
+// Same as WebPDecodeRGB, but returning B, G, R, B, G, R... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size,
+                                    int* width, int* height);
+
+
+// Decode WebP images pointed to by 'data' to Y'UV format(*). The pointer
+// returned is the Y samples buffer. Upon return, *u and *v will point to
+// the U and V chroma data. These U and V buffers need NOT be free()'d,
+// unlike the returned Y luma one. The dimension of the U and V planes
+// are both (*width + 1) / 2 and (*height + 1)/ 2.
+// Upon return, the Y buffer has a stride returned as '*stride', while U and V
+// have a common stride returned as '*uv_stride'.
+// Return NULL in case of error.
+// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
+WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, size_t data_size,
+                                    int* width, int* height,
+                                    uint8_t** u, uint8_t** v,
+                                    int* stride, int* uv_stride);
+
+// These five functions are variants of the above ones, that decode the image
+// directly into a pre-allocated buffer 'output_buffer'. The maximum storage
+// available in this buffer is indicated by 'output_buffer_size'. If this
+// storage is not sufficient (or an error occurred), NULL is returned.
+// Otherwise, output_buffer is returned, for convenience.
+// The parameter 'output_stride' specifies the distance (in bytes)
+// between scanlines. Hence, output_buffer_size is expected to be at least
+// output_stride x picture-height.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBAInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*) WebPDecodeARGBInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRAInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// RGB and BGR variants. Here too the transparency information, if present,
+// will be dropped and ignored.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// WebPDecodeYUVInto() is a variant of WebPDecodeYUV() that operates directly
+// into pre-allocated luma/chroma plane buffers. This function requires the
+// strides to be passed: one for the luma plane and one for each of the
+// chroma ones. The size of each plane buffer is passed as 'luma_size',
+// 'u_size' and 'v_size' respectively.
+// Pointer to the luma plane ('*luma') is returned or NULL if an error occurred
+// during decoding (or because some buffers were found to be too small).
+WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* luma, size_t luma_size, int luma_stride,
+    uint8_t* u, size_t u_size, int u_stride,
+    uint8_t* v, size_t v_size, int v_stride);
+
+//------------------------------------------------------------------------------
+// Output colorspaces and buffer
+
+// Colorspaces
+// Note: the naming describes the byte-ordering of packed samples in memory.
+// For instance, MODE_BGRA relates to samples ordered as B,G,R,A,B,G,R,A,...
+// Non-capital names (e.g.:MODE_Argb) relates to pre-multiplied RGB channels.
+// RGBA-4444 and RGB-565 colorspaces are represented by following byte-order:
+// RGBA-4444: [r3 r2 r1 r0 g3 g2 g1 g0], [b3 b2 b1 b0 a3 a2 a1 a0], ...
+// RGB-565: [r4 r3 r2 r1 r0 g5 g4 g3], [g2 g1 g0 b4 b3 b2 b1 b0], ...
+// In the case WEBP_SWAP_16BITS_CSP is defined, the bytes are swapped for
+// these two modes:
+// RGBA-4444: [b3 b2 b1 b0 a3 a2 a1 a0], [r3 r2 r1 r0 g3 g2 g1 g0], ...
+// RGB-565: [g2 g1 g0 b4 b3 b2 b1 b0], [r4 r3 r2 r1 r0 g5 g4 g3], ...
+
+typedef enum WEBP_CSP_MODE {
+  MODE_RGB = 0, MODE_RGBA = 1,
+  MODE_BGR = 2, MODE_BGRA = 3,
+  MODE_ARGB = 4, MODE_RGBA_4444 = 5,
+  MODE_RGB_565 = 6,
+  // RGB-premultiplied transparent modes (alpha value is preserved)
+  MODE_rgbA = 7,
+  MODE_bgrA = 8,
+  MODE_Argb = 9,
+  MODE_rgbA_4444 = 10,
+  // YUV modes must come after RGB ones.
+  MODE_YUV = 11, MODE_YUVA = 12,  // yuv 4:2:0
+  MODE_LAST = 13
+} WEBP_CSP_MODE;
+
+// Some useful macros:
+static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) {
+  return (mode == MODE_rgbA || mode == MODE_bgrA || mode == MODE_Argb ||
+          mode == MODE_rgbA_4444);
+}
+
+static WEBP_INLINE int WebPIsAlphaMode(WEBP_CSP_MODE mode) {
+  return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB ||
+          mode == MODE_RGBA_4444 || mode == MODE_YUVA ||
+          WebPIsPremultipliedMode(mode));
+}
+
+static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) {
+  return (mode < MODE_YUV);
+}
+
+//------------------------------------------------------------------------------
+// WebPDecBuffer: Generic structure for describing the output sample buffer.
+
+struct WebPRGBABuffer {    // view as RGBA
+  uint8_t* rgba;    // pointer to RGBA samples
+  int stride;       // stride in bytes from one scanline to the next.
+  size_t size;      // total size of the *rgba buffer.
+};
+
+struct WebPYUVABuffer {              // view as YUVA
+  uint8_t* y, *u, *v, *a;     // pointer to luma, chroma U/V, alpha samples
+  int y_stride;               // luma stride
+  int u_stride, v_stride;     // chroma strides
+  int a_stride;               // alpha stride
+  size_t y_size;              // luma plane size
+  size_t u_size, v_size;      // chroma planes size
+  size_t a_size;              // alpha-plane size
+};
+
+// Output buffer
+struct WebPDecBuffer {
+  WEBP_CSP_MODE colorspace;  // Colorspace.
+  int width, height;         // Dimensions.
+  int is_external_memory;    // If true, 'internal_memory' pointer is not used.
+  union {
+    WebPRGBABuffer RGBA;
+    WebPYUVABuffer YUVA;
+  } u;                       // Nameless union of buffer parameters.
+  uint32_t       pad[4];     // padding for later use
+
+  uint8_t* private_memory;   // Internally allocated memory (only when
+                             // is_external_memory is false). Should not be used
+                             // externally, but accessed via the buffer union.
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int);
+
+// Initialize the structure as empty. Must be called before any other use.
+// Returns false in case of version mismatch
+static WEBP_INLINE int WebPInitDecBuffer(WebPDecBuffer* buffer) {
+  return WebPInitDecBufferInternal(buffer, WEBP_DECODER_ABI_VERSION);
+}
+
+// Free any memory associated with the buffer. Must always be called last.
+// Note: doesn't free the 'buffer' structure itself.
+WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
+
+//------------------------------------------------------------------------------
+// Enumeration of the status codes
+
+typedef enum VP8StatusCode {
+  VP8_STATUS_OK = 0,
+  VP8_STATUS_OUT_OF_MEMORY,
+  VP8_STATUS_INVALID_PARAM,
+  VP8_STATUS_BITSTREAM_ERROR,
+  VP8_STATUS_UNSUPPORTED_FEATURE,
+  VP8_STATUS_SUSPENDED,
+  VP8_STATUS_USER_ABORT,
+  VP8_STATUS_NOT_ENOUGH_DATA
+} VP8StatusCode;
+
+//------------------------------------------------------------------------------
+// Incremental decoding
+//
+// This API allows streamlined decoding of partial data.
+// Picture can be incrementally decoded as data become available thanks to the
+// WebPIDecoder object. This object can be left in a SUSPENDED state if the
+// picture is only partially decoded, pending additional input.
+// Code example:
+//
+//   WebPInitDecBuffer(&buffer);
+//   buffer.colorspace = mode;
+//   ...
+//   WebPIDecoder* idec = WebPINewDecoder(&buffer);
+//   while (has_more_data) {
+//     // ... (get additional data)
+//     status = WebPIAppend(idec, new_data, new_data_size);
+//     if (status != VP8_STATUS_SUSPENDED ||
+//       break;
+//     }
+//
+//     // The above call decodes the current available buffer.
+//     // Part of the image can now be refreshed by calling to
+//     // WebPIDecGetRGB()/WebPIDecGetYUVA() etc.
+//   }
+//   WebPIDelete(idec);
+
+// Creates a new incremental decoder with the supplied buffer parameter.
+// This output_buffer can be passed NULL, in which case a default output buffer
+// is used (with MODE_RGB). Otherwise, an internal reference to 'output_buffer'
+// is kept, which means that the lifespan of 'output_buffer' must be larger than
+// that of the returned WebPIDecoder object.
+// The supplied 'output_buffer' content MUST NOT be changed between calls to
+// WebPIAppend() or WebPIUpdate() unless 'output_buffer.is_external_memory' is
+// set to 1. In such a case, it is allowed to modify the pointers, size and
+// stride of output_buffer.u.RGBA or output_buffer.u.YUVA, provided they remain
+// within valid bounds.
+// All other fields of WebPDecBuffer MUST remain constant between calls.
+// Returns NULL if the allocation failed.
+WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
+
+// This function allocates and initializes an incremental-decoder object, which
+// will output the RGB/A samples specified by 'csp' into a preallocated
+// buffer 'output_buffer'. The size of this buffer is at least
+// 'output_buffer_size' and the stride (distance in bytes between two scanlines)
+// is specified by 'output_stride'.
+// Additionally, output_buffer can be passed NULL in which case the output
+// buffer will be allocated automatically when the decoding starts. The
+// colorspace 'csp' is taken into account for allocating this buffer. All other
+// parameters are ignored.
+// Returns NULL if the allocation failed, or if some parameters are invalid.
+WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
+    WEBP_CSP_MODE csp,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// This function allocates and initializes an incremental-decoder object, which
+// will output the raw luma/chroma samples into a preallocated planes if
+// supplied. The luma plane is specified by its pointer 'luma', its size
+// 'luma_size' and its stride 'luma_stride'. Similarly, the chroma-u plane
+// is specified by the 'u', 'u_size' and 'u_stride' parameters, and the chroma-v
+// plane by 'v' and 'v_size'. And same for the alpha-plane. The 'a' pointer
+// can be pass NULL in case one is not interested in the transparency plane.
+// Conversely, 'luma' can be passed NULL if no preallocated planes are supplied.
+// In this case, the output buffer will be automatically allocated (using
+// MODE_YUVA) when decoding starts. All parameters are then ignored.
+// Returns NULL if the allocation failed or if a parameter is invalid.
+WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
+    uint8_t* luma, size_t luma_size, int luma_stride,
+    uint8_t* u, size_t u_size, int u_stride,
+    uint8_t* v, size_t v_size, int v_stride,
+    uint8_t* a, size_t a_size, int a_stride);
+
+// Deprecated version of the above, without the alpha plane.
+// Kept for backward compatibility.
+WEBP_EXTERN(WebPIDecoder*) WebPINewYUV(
+    uint8_t* luma, size_t luma_size, int luma_stride,
+    uint8_t* u, size_t u_size, int u_stride,
+    uint8_t* v, size_t v_size, int v_stride);
+
+// Deletes the WebPIDecoder object and associated memory. Must always be called
+// if WebPINewDecoder, WebPINewRGB or WebPINewYUV succeeded.
+WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* idec);
+
+// Copies and decodes the next available data. Returns VP8_STATUS_OK when
+// the image is successfully decoded. Returns VP8_STATUS_SUSPENDED when more
+// data is expected. Returns error in other cases.
+WEBP_EXTERN(VP8StatusCode) WebPIAppend(
+    WebPIDecoder* idec, const uint8_t* data, size_t data_size);
+
+// A variant of the above function to be used when data buffer contains
+// partial data from the beginning. In this case data buffer is not copied
+// to the internal memory.
+// Note that the value of the 'data' pointer can change between calls to
+// WebPIUpdate, for instance when the data buffer is resized to fit larger data.
+WEBP_EXTERN(VP8StatusCode) WebPIUpdate(
+    WebPIDecoder* idec, const uint8_t* data, size_t data_size);
+
+// Returns the RGB/A image decoded so far. Returns NULL if output params
+// are not initialized yet. The RGB/A output type corresponds to the colorspace
+// specified during call to WebPINewDecoder() or WebPINewRGB().
+// *last_y is the index of last decoded row in raster scan order. Some pointers
+// (*last_y, *width etc.) can be NULL if corresponding information is not
+// needed.
+WEBP_EXTERN(uint8_t*) WebPIDecGetRGB(
+    const WebPIDecoder* idec, int* last_y,
+    int* width, int* height, int* stride);
+
+// Same as above function to get a YUVA image. Returns pointer to the luma
+// plane or NULL in case of error. If there is no alpha information
+// the alpha pointer '*a' will be returned NULL.
+WEBP_EXTERN(uint8_t*) WebPIDecGetYUVA(
+    const WebPIDecoder* idec, int* last_y,
+    uint8_t** u, uint8_t** v, uint8_t** a,
+    int* width, int* height, int* stride, int* uv_stride, int* a_stride);
+
+// Deprecated alpha-less version of WebPIDecGetYUVA(): it will ignore the
+// alpha information (if present). Kept for backward compatibility.
+static WEBP_INLINE uint8_t* WebPIDecGetYUV(
+    const WebPIDecoder* idec, int* last_y, uint8_t** u, uint8_t** v,
+    int* width, int* height, int* stride, int* uv_stride) {
+  return WebPIDecGetYUVA(idec, last_y, u, v, NULL, width, height,
+                         stride, uv_stride, NULL);
+}
+
+// Generic call to retrieve information about the displayable area.
+// If non NULL, the left/right/width/height pointers are filled with the visible
+// rectangular area so far.
+// Returns NULL in case the incremental decoder object is in an invalid state.
+// Otherwise returns the pointer to the internal representation. This structure
+// is read-only, tied to WebPIDecoder's lifespan and should not be modified.
+WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
+    const WebPIDecoder* idec, int* left, int* top, int* width, int* height);
+
+//------------------------------------------------------------------------------
+// Advanced decoding parametrization
+//
+//  Code sample for using the advanced decoding API
+/*
+     // A) Init a configuration object
+     WebPDecoderConfig config;
+     CHECK(WebPInitDecoderConfig(&config));
+
+     // B) optional: retrieve the bitstream's features.
+     CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
+
+     // C) Adjust 'config', if needed
+     config.no_fancy_upsampling = 1;
+     config.output.colorspace = MODE_BGRA;
+     // etc.
+
+     // Note that you can also make config.output point to an externally
+     // supplied memory buffer, provided it's big enough to store the decoded
+     // picture. Otherwise, config.output will just be used to allocate memory
+     // and store the decoded picture.
+
+     // D) Decode!
+     CHECK(WebPDecode(data, data_size, &config) == VP8_STATUS_OK);
+
+     // E) Decoded image is now in config.output (and config.output.u.RGBA)
+
+     // F) Reclaim memory allocated in config's object. It's safe to call
+     // this function even if the memory is external and wasn't allocated
+     // by WebPDecode().
+     WebPFreeDecBuffer(&config.output);
+*/
+
+// Features gathered from the bitstream
+struct WebPBitstreamFeatures {
+  int width;          // Width in pixels, as read from the bitstream.
+  int height;         // Height in pixels, as read from the bitstream.
+  int has_alpha;      // True if the bitstream contains an alpha channel.
+  int has_animation;  // True if the bitstream is an animation.
+  int format;         // 0 = undefined (/mixed), 1 = lossy, 2 = lossless
+
+  // Unused for now:
+  int no_incremental_decoding;  // if true, using incremental decoding is not
+                                // recommended.
+  int rotate;                   // TODO(later)
+  int uv_sampling;              // should be 0 for now. TODO(later)
+  uint32_t pad[2];              // padding for later use
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
+    const uint8_t*, size_t, WebPBitstreamFeatures*, int);
+
+// Retrieve features from the bitstream. The *features structure is filled
+// with information gathered from the bitstream.
+// Returns VP8_STATUS_OK when the features are successfully retrieved. Returns
+// VP8_STATUS_NOT_ENOUGH_DATA when more data is needed to retrieve the
+// features from headers. Returns error in other cases.
+static WEBP_INLINE VP8StatusCode WebPGetFeatures(
+    const uint8_t* data, size_t data_size,
+    WebPBitstreamFeatures* features) {
+  return WebPGetFeaturesInternal(data, data_size, features,
+                                 WEBP_DECODER_ABI_VERSION);
+}
+
+// Decoding options
+struct WebPDecoderOptions {
+  int bypass_filtering;               // if true, skip the in-loop filtering
+  int no_fancy_upsampling;            // if true, use faster pointwise upsampler
+  int use_cropping;                   // if true, cropping is applied _first_
+  int crop_left, crop_top;            // top-left position for cropping.
+                                      // Will be snapped to even values.
+  int crop_width, crop_height;        // dimension of the cropping area
+  int use_scaling;                    // if true, scaling is applied _afterward_
+  int scaled_width, scaled_height;    // final resolution
+  int use_threads;                    // if true, use multi-threaded decoding
+  int dithering_strength;             // dithering strength (0=Off, 100=full)
+#if WEBP_DECODER_ABI_VERSION > 0x0203
+  int flip;                           // flip output vertically
+#endif
+#if WEBP_DECODER_ABI_VERSION > 0x0204
+  int alpha_dithering_strength;       // alpha dithering strength in [0..100]
+#endif
+
+  // Unused for now:
+  int force_rotation;                 // forced rotation (to be applied _last_)
+  int no_enhancement;                 // if true, discard enhancement layer
+#if WEBP_DECODER_ABI_VERSION < 0x0203
+  uint32_t pad[5];                    // padding for later use
+#elif WEBP_DECODER_ABI_VERSION < 0x0204
+  uint32_t pad[4];                    // padding for later use
+#else
+  uint32_t pad[3];                    // padding for later use
+#endif
+};
+
+// Main object storing the configuration for advanced decoding.
+struct WebPDecoderConfig {
+  WebPBitstreamFeatures input;  // Immutable bitstream features (optional)
+  WebPDecBuffer output;         // Output buffer (can point to external mem)
+  WebPDecoderOptions options;   // Decoding options
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
+
+// Initialize the configuration as empty. This function must always be
+// called first, unless WebPGetFeatures() is to be called.
+// Returns false in case of mismatched version.
+static WEBP_INLINE int WebPInitDecoderConfig(WebPDecoderConfig* config) {
+  return WebPInitDecoderConfigInternal(config, WEBP_DECODER_ABI_VERSION);
+}
+
+// Instantiate a new incremental decoder object with the requested
+// configuration. The bitstream can be passed using 'data' and 'data_size'
+// parameter, in which case the features will be parsed and stored into
+// config->input. Otherwise, 'data' can be NULL and no parsing will occur.
+// Note that 'config' can be NULL too, in which case a default configuration
+// is used.
+// The return WebPIDecoder object must always be deleted calling WebPIDelete().
+// Returns NULL in case of error (and config->status will then reflect
+// the error condition).
+WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, size_t data_size,
+                                       WebPDecoderConfig* config);
+
+// Non-incremental version. This version decodes the full data at once, taking
+// 'config' into account. Returns decoding status (which should be VP8_STATUS_OK
+// if the decoding was successful).
+WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, size_t data_size,
+                                      WebPDecoderConfig* config);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_DECODE_H_ */
diff --git a/src/main/jni/libwebp/webp/demux.h b/src/main/jni/libwebp/webp/demux.h
new file mode 100644
index 000000000..2da3239dd
--- /dev/null
+++ b/src/main/jni/libwebp/webp/demux.h
@@ -0,0 +1,224 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Demux API.
+// Enables extraction of image and extended format data from WebP files.
+
+// Code Example: Demuxing WebP data to extract all the frames, ICC profile
+// and EXIF/XMP metadata.
+/*
+  WebPDemuxer* demux = WebPDemux(&webp_data);
+
+  uint32_t width = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
+  uint32_t height = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
+  // ... (Get information about the features present in the WebP file).
+  uint32_t flags = WebPDemuxGetI(demux, WEBP_FF_FORMAT_FLAGS);
+
+  // ... (Iterate over all frames).
+  WebPIterator iter;
+  if (WebPDemuxGetFrame(demux, 1, &iter)) {
+    do {
+      // ... (Consume 'iter'; e.g. Decode 'iter.fragment' with WebPDecode(),
+      // ... and get other frame properties like width, height, offsets etc.
+      // ... see 'struct WebPIterator' below for more info).
+    } while (WebPDemuxNextFrame(&iter));
+    WebPDemuxReleaseIterator(&iter);
+  }
+
+  // ... (Extract metadata).
+  WebPChunkIterator chunk_iter;
+  if (flags & ICCP_FLAG) WebPDemuxGetChunk(demux, "ICCP", 1, &chunk_iter);
+  // ... (Consume the ICC profile in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & EXIF_FLAG) WebPDemuxGetChunk(demux, "EXIF", 1, &chunk_iter);
+  // ... (Consume the EXIF metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & XMP_FLAG) WebPDemuxGetChunk(demux, "XMP ", 1, &chunk_iter);
+  // ... (Consume the XMP metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  WebPDemuxDelete(demux);
+*/
+
+#ifndef WEBP_WEBP_DEMUX_H_
+#define WEBP_WEBP_DEMUX_H_
+
+#include "./mux_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_DEMUX_ABI_VERSION 0x0101    // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPDemuxState WebPDemuxState;
+// typedef enum WebPFormatFeature WebPFormatFeature;
+typedef struct WebPDemuxer WebPDemuxer;
+typedef struct WebPIterator WebPIterator;
+typedef struct WebPChunkIterator WebPChunkIterator;
+
+//------------------------------------------------------------------------------
+
+// Returns the version number of the demux library, packed in hexadecimal using
+// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetDemuxVersion(void);
+
+//------------------------------------------------------------------------------
+// Life of a Demux object
+
+typedef enum WebPDemuxState {
+  WEBP_DEMUX_PARSE_ERROR    = -1,  // An error occurred while parsing.
+  WEBP_DEMUX_PARSING_HEADER =  0,  // Not enough data to parse full header.
+  WEBP_DEMUX_PARSED_HEADER  =  1,  // Header parsing complete,
+                                   // data may be available.
+  WEBP_DEMUX_DONE           =  2   // Entire file has been parsed.
+} WebPDemuxState;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
+    const WebPData*, int, WebPDemuxState*, int);
+
+// Parses the full WebP file given by 'data'.
+// Returns a WebPDemuxer object on successful parse, NULL otherwise.
+static WEBP_INLINE WebPDemuxer* WebPDemux(const WebPData* data) {
+  return WebPDemuxInternal(data, 0, NULL, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Parses the possibly incomplete WebP file given by 'data'.
+// If 'state' is non-NULL it will be set to indicate the status of the demuxer.
+// Returns NULL in case of error or if there isn't enough data to start parsing;
+// and a WebPDemuxer object on successful parse.
+// Note that WebPDemuxer keeps internal pointers to 'data' memory segment.
+// If this data is volatile, the demuxer object should be deleted (by calling
+// WebPDemuxDelete()) and WebPDemuxPartial() called again on the new data.
+// This is usually an inexpensive operation.
+static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(
+    const WebPData* data, WebPDemuxState* state) {
+  return WebPDemuxInternal(data, 1, state, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Frees memory associated with 'dmux'.
+WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
+
+//------------------------------------------------------------------------------
+// Data/information extraction.
+
+typedef enum WebPFormatFeature {
+  WEBP_FF_FORMAT_FLAGS,  // Extended format flags present in the 'VP8X' chunk.
+  WEBP_FF_CANVAS_WIDTH,
+  WEBP_FF_CANVAS_HEIGHT,
+  WEBP_FF_LOOP_COUNT,
+  WEBP_FF_BACKGROUND_COLOR,
+  WEBP_FF_FRAME_COUNT    // Number of frames present in the demux object.
+                         // In case of a partial demux, this is the number of
+                         // frames seen so far, with the last frame possibly
+                         // being partial.
+} WebPFormatFeature;
+
+// Get the 'feature' value from the 'dmux'.
+// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
+// returned a state > WEBP_DEMUX_PARSING_HEADER.
+WEBP_EXTERN(uint32_t) WebPDemuxGetI(
+    const WebPDemuxer* dmux, WebPFormatFeature feature);
+
+//------------------------------------------------------------------------------
+// Frame iteration.
+
+struct WebPIterator {
+  int frame_num;
+  int num_frames;          // equivalent to WEBP_FF_FRAME_COUNT.
+  int fragment_num;
+  int num_fragments;
+  int x_offset, y_offset;  // offset relative to the canvas.
+  int width, height;       // dimensions of this frame or fragment.
+  int duration;            // display duration in milliseconds.
+  WebPMuxAnimDispose dispose_method;  // dispose method for the frame.
+  int complete;   // true if 'fragment' contains a full frame. partial images
+                  // may still be decoded with the WebP incremental decoder.
+  WebPData fragment;  // The frame or fragment given by 'frame_num' and
+                      // 'fragment_num'.
+  int has_alpha;      // True if the frame or fragment contains transparency.
+  WebPMuxAnimBlend blend_method;  // Blend operation for the frame.
+
+  uint32_t pad[2];         // padding for later use.
+  void* private_;          // for internal use only.
+};
+
+// Retrieves frame 'frame_number' from 'dmux'.
+// 'iter->fragment' points to the first fragment on return from this function.
+// Individual fragments may be extracted using WebPDemuxSelectFragment().
+// Setting 'frame_number' equal to 0 will return the last frame of the image.
+// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
+// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
+// NOTE: 'dmux' must persist for the lifetime of 'iter'.
+WEBP_EXTERN(int) WebPDemuxGetFrame(
+    const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
+
+// Sets 'iter->fragment' to point to the next ('iter->frame_num' + 1) or
+// previous ('iter->frame_num' - 1) frame. These functions do not loop.
+// Returns true on success, false otherwise.
+WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
+WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
+
+// Sets 'iter->fragment' to reflect fragment number 'fragment_num'.
+// Returns true if fragment 'fragment_num' is present, false otherwise.
+WEBP_EXTERN(int) WebPDemuxSelectFragment(WebPIterator* iter, int fragment_num);
+
+// Releases any memory associated with 'iter'.
+// Must be called before any subsequent calls to WebPDemuxGetChunk() on the same
+// iter. Also, must be called before destroying the associated WebPDemuxer with
+// WebPDemuxDelete().
+WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
+
+//------------------------------------------------------------------------------
+// Chunk iteration.
+
+struct WebPChunkIterator {
+  // The current and total number of chunks with the fourcc given to
+  // WebPDemuxGetChunk().
+  int chunk_num;
+  int num_chunks;
+  WebPData chunk;    // The payload of the chunk.
+
+  uint32_t pad[6];   // padding for later use
+  void* private_;
+};
+
+// Retrieves the 'chunk_number' instance of the chunk with id 'fourcc' from
+// 'dmux'.
+// 'fourcc' is a character array containing the fourcc of the chunk to return,
+// e.g., "ICCP", "XMP ", "EXIF", etc.
+// Setting 'chunk_number' equal to 0 will return the last chunk in a set.
+// Returns true if the chunk is found, false otherwise. Image related chunk
+// payloads are accessed through WebPDemuxGetFrame() and related functions.
+// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
+// NOTE: 'dmux' must persist for the lifetime of the iterator.
+WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux,
+                                   const char fourcc[4], int chunk_number,
+                                   WebPChunkIterator* iter);
+
+// Sets 'iter->chunk' to point to the next ('iter->chunk_num' + 1) or previous
+// ('iter->chunk_num' - 1) chunk. These functions do not loop.
+// Returns true on success, false otherwise.
+WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
+WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
+
+// Releases any memory associated with 'iter'.
+// Must be called before destroying the associated WebPDemuxer with
+// WebPDemuxDelete().
+WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_DEMUX_H_ */
diff --git a/src/main/jni/libwebp/webp/encode.h b/src/main/jni/libwebp/webp/encode.h
new file mode 100644
index 000000000..3c2637489
--- /dev/null
+++ b/src/main/jni/libwebp/webp/encode.h
@@ -0,0 +1,518 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   WebP encoder: main interface
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_ENCODE_H_
+#define WEBP_WEBP_ENCODE_H_
+
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_ENCODER_ABI_VERSION 0x0202    // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPImageHint WebPImageHint;
+// typedef enum WebPEncCSP WebPEncCSP;
+// typedef enum WebPPreset WebPPreset;
+// typedef enum WebPEncodingError WebPEncodingError;
+typedef struct WebPConfig WebPConfig;
+typedef struct WebPPicture WebPPicture;   // main structure for I/O
+typedef struct WebPAuxStats WebPAuxStats;
+typedef struct WebPMemoryWriter WebPMemoryWriter;
+
+// Return the encoder's version number, packed in hexadecimal using 8bits for
+// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetEncoderVersion(void);
+
+//------------------------------------------------------------------------------
+// One-stop-shop call! No questions asked:
+
+// Returns the size of the compressed data (pointed to by *output), or 0 if
+// an error occurred. The compressed data must be released by the caller
+// using the call 'free(*output)'.
+// These functions compress using the lossy format, and the quality_factor
+// can go from 0 (smaller output, lower quality) to 100 (best quality,
+// larger output).
+WEBP_EXTERN(size_t) WebPEncodeRGB(const uint8_t* rgb,
+                                  int width, int height, int stride,
+                                  float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeBGR(const uint8_t* bgr,
+                                  int width, int height, int stride,
+                                  float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeRGBA(const uint8_t* rgba,
+                                   int width, int height, int stride,
+                                   float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeBGRA(const uint8_t* bgra,
+                                   int width, int height, int stride,
+                                   float quality_factor, uint8_t** output);
+
+// These functions are the equivalent of the above, but compressing in a
+// lossless manner. Files are usually larger than lossy format, but will
+// not suffer any compression loss.
+WEBP_EXTERN(size_t) WebPEncodeLosslessRGB(const uint8_t* rgb,
+                                          int width, int height, int stride,
+                                          uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessBGR(const uint8_t* bgr,
+                                          int width, int height, int stride,
+                                          uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessRGBA(const uint8_t* rgba,
+                                           int width, int height, int stride,
+                                           uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
+                                           int width, int height, int stride,
+                                           uint8_t** output);
+
+//------------------------------------------------------------------------------
+// Coding parameters
+
+// Image characteristics hint for the underlying encoder.
+typedef enum WebPImageHint {
+  WEBP_HINT_DEFAULT = 0,  // default preset.
+  WEBP_HINT_PICTURE,      // digital picture, like portrait, inner shot
+  WEBP_HINT_PHOTO,        // outdoor photograph, with natural lighting
+  WEBP_HINT_GRAPH,        // Discrete tone image (graph, map-tile etc).
+  WEBP_HINT_LAST
+} WebPImageHint;
+
+// Compression parameters.
+struct WebPConfig {
+  int lossless;           // Lossless encoding (0=lossy(default), 1=lossless).
+  float quality;          // between 0 (smallest file) and 100 (biggest)
+  int method;             // quality/speed trade-off (0=fast, 6=slower-better)
+
+  WebPImageHint image_hint;  // Hint for image type (lossless only for now).
+
+  // Parameters related to lossy compression only:
+  int target_size;        // if non-zero, set the desired target size in bytes.
+                          // Takes precedence over the 'compression' parameter.
+  float target_PSNR;      // if non-zero, specifies the minimal distortion to
+                          // try to achieve. Takes precedence over target_size.
+  int segments;           // maximum number of segments to use, in [1..4]
+  int sns_strength;       // Spatial Noise Shaping. 0=off, 100=maximum.
+  int filter_strength;    // range: [0 = off .. 100 = strongest]
+  int filter_sharpness;   // range: [0 = off .. 7 = least sharp]
+  int filter_type;        // filtering type: 0 = simple, 1 = strong (only used
+                          // if filter_strength > 0 or autofilter > 0)
+  int autofilter;         // Auto adjust filter's strength [0 = off, 1 = on]
+  int alpha_compression;  // Algorithm for encoding the alpha plane (0 = none,
+                          // 1 = compressed with WebP lossless). Default is 1.
+  int alpha_filtering;    // Predictive filtering method for alpha plane.
+                          //  0: none, 1: fast, 2: best. Default if 1.
+  int alpha_quality;      // Between 0 (smallest size) and 100 (lossless).
+                          // Default is 100.
+  int pass;               // number of entropy-analysis passes (in [1..10]).
+
+  int show_compressed;    // if true, export the compressed picture back.
+                          // In-loop filtering is not applied.
+  int preprocessing;      // preprocessing filter:
+                          // 0=none, 1=segment-smooth, 2=pseudo-random dithering
+  int partitions;         // log2(number of token partitions) in [0..3]. Default
+                          // is set to 0 for easier progressive decoding.
+  int partition_limit;    // quality degradation allowed to fit the 512k limit
+                          // on prediction modes coding (0: no degradation,
+                          // 100: maximum possible degradation).
+  int emulate_jpeg_size;  // If true, compression parameters will be remapped
+                          // to better match the expected output size from
+                          // JPEG compression. Generally, the output size will
+                          // be similar but the degradation will be lower.
+  int thread_level;       // If non-zero, try and use multi-threaded encoding.
+  int low_memory;         // If set, reduce memory usage (but increase CPU use).
+
+  uint32_t pad[5];        // padding for later use
+};
+
+// Enumerate some predefined settings for WebPConfig, depending on the type
+// of source picture. These presets are used when calling WebPConfigPreset().
+typedef enum WebPPreset {
+  WEBP_PRESET_DEFAULT = 0,  // default preset.
+  WEBP_PRESET_PICTURE,      // digital picture, like portrait, inner shot
+  WEBP_PRESET_PHOTO,        // outdoor photograph, with natural lighting
+  WEBP_PRESET_DRAWING,      // hand or line drawing, with high-contrast details
+  WEBP_PRESET_ICON,         // small-sized colorful images
+  WEBP_PRESET_TEXT          // text-like
+} WebPPreset;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
+
+// Should always be called, to initialize a fresh WebPConfig structure before
+// modification. Returns false in case of version mismatch. WebPConfigInit()
+// must have succeeded before using the 'config' object.
+// Note that the default values are lossless=0 and quality=75.
+static WEBP_INLINE int WebPConfigInit(WebPConfig* config) {
+  return WebPConfigInitInternal(config, WEBP_PRESET_DEFAULT, 75.f,
+                                WEBP_ENCODER_ABI_VERSION);
+}
+
+// This function will initialize the configuration according to a predefined
+// set of parameters (referred to by 'preset') and a given quality factor.
+// This function can be called as a replacement to WebPConfigInit(). Will
+// return false in case of error.
+static WEBP_INLINE int WebPConfigPreset(WebPConfig* config,
+                                        WebPPreset preset, float quality) {
+  return WebPConfigInitInternal(config, preset, quality,
+                                WEBP_ENCODER_ABI_VERSION);
+}
+
+#if WEBP_ENCODER_ABI_VERSION > 0x0202
+// Activate the lossless compression mode with the desired efficiency level
+// between 0 (fastest, lowest compression) and 9 (slower, best compression).
+// A good default level is '6', providing a fair tradeoff between compression
+// speed and final compressed size.
+// This function will overwrite several fields from config: 'method', 'quality'
+// and 'lossless'. Returns false in case of parameter error.
+WEBP_EXTERN(int) WebPConfigLosslessPreset(WebPConfig* config, int level);
+#endif
+
+// Returns true if 'config' is non-NULL and all configuration parameters are
+// within their valid ranges.
+WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* config);
+
+//------------------------------------------------------------------------------
+// Input / Output
+// Structure for storing auxiliary statistics (mostly for lossy encoding).
+
+struct WebPAuxStats {
+  int coded_size;         // final size
+
+  float PSNR[5];          // peak-signal-to-noise ratio for Y/U/V/All/Alpha
+  int block_count[3];     // number of intra4/intra16/skipped macroblocks
+  int header_bytes[2];    // approximate number of bytes spent for header
+                          // and mode-partition #0
+  int residual_bytes[3][4];  // approximate number of bytes spent for
+                             // DC/AC/uv coefficients for each (0..3) segments.
+  int segment_size[4];    // number of macroblocks in each segments
+  int segment_quant[4];   // quantizer values for each segments
+  int segment_level[4];   // filtering strength for each segments [0..63]
+
+  int alpha_data_size;    // size of the transparency data
+  int layer_data_size;    // size of the enhancement layer data
+
+  // lossless encoder statistics
+  uint32_t lossless_features;  // bit0:predictor bit1:cross-color transform
+                               // bit2:subtract-green bit3:color indexing
+  int histogram_bits;          // number of precision bits of histogram
+  int transform_bits;          // precision bits for transform
+  int cache_bits;              // number of bits for color cache lookup
+  int palette_size;            // number of color in palette, if used
+  int lossless_size;           // final lossless size
+
+  uint32_t pad[4];        // padding for later use
+};
+
+// Signature for output function. Should return true if writing was successful.
+// data/data_size is the segment of data to write, and 'picture' is for
+// reference (and so one can make use of picture->custom_ptr).
+typedef int (*WebPWriterFunction)(const uint8_t* data, size_t data_size,
+                                  const WebPPicture* picture);
+
+// WebPMemoryWrite: a special WebPWriterFunction that writes to memory using
+// the following WebPMemoryWriter object (to be set as a custom_ptr).
+struct WebPMemoryWriter {
+  uint8_t* mem;       // final buffer (of size 'max_size', larger than 'size').
+  size_t   size;      // final size
+  size_t   max_size;  // total capacity
+  uint32_t pad[1];    // padding for later use
+};
+
+// The following must be called first before any use.
+WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer);
+
+#if WEBP_ENCODER_ABI_VERSION > 0x0203
+// The following must be called to deallocate writer->mem memory. The 'writer'
+// object itself is not deallocated.
+WEBP_EXTERN(void) WebPMemoryWriterClear(WebPMemoryWriter* writer);
+#endif
+// The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon
+// completion, writer.mem and writer.size will hold the coded data.
+#if WEBP_ENCODER_ABI_VERSION > 0x0203
+// writer.mem must be freed by calling WebPMemoryWriterClear.
+#else
+// writer.mem must be freed by calling 'free(writer.mem)'.
+#endif
+WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
+                                 const WebPPicture* picture);
+
+// Progress hook, called from time to time to report progress. It can return
+// false to request an abort of the encoding process, or true otherwise if
+// everything is OK.
+typedef int (*WebPProgressHook)(int percent, const WebPPicture* picture);
+
+// Color spaces.
+typedef enum WebPEncCSP {
+  // chroma sampling
+  WEBP_YUV420  = 0,        // 4:2:0
+  WEBP_YUV420A = 4,        // alpha channel variant
+  WEBP_CSP_UV_MASK = 3,    // bit-mask to get the UV sampling factors
+  WEBP_CSP_ALPHA_BIT = 4   // bit that is set if alpha is present
+} WebPEncCSP;
+
+// Encoding error conditions.
+typedef enum WebPEncodingError {
+  VP8_ENC_OK = 0,
+  VP8_ENC_ERROR_OUT_OF_MEMORY,            // memory error allocating objects
+  VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY,  // memory error while flushing bits
+  VP8_ENC_ERROR_NULL_PARAMETER,           // a pointer parameter is NULL
+  VP8_ENC_ERROR_INVALID_CONFIGURATION,    // configuration is invalid
+  VP8_ENC_ERROR_BAD_DIMENSION,            // picture has invalid width/height
+  VP8_ENC_ERROR_PARTITION0_OVERFLOW,      // partition is bigger than 512k
+  VP8_ENC_ERROR_PARTITION_OVERFLOW,       // partition is bigger than 16M
+  VP8_ENC_ERROR_BAD_WRITE,                // error while flushing bytes
+  VP8_ENC_ERROR_FILE_TOO_BIG,             // file is bigger than 4G
+  VP8_ENC_ERROR_USER_ABORT,               // abort request by user
+  VP8_ENC_ERROR_LAST                      // list terminator. always last.
+} WebPEncodingError;
+
+// maximum width/height allowed (inclusive), in pixels
+#define WEBP_MAX_DIMENSION 16383
+
+// Main exchange structure (input samples, output bytes, statistics)
+struct WebPPicture {
+  //   INPUT
+  //////////////
+  // Main flag for encoder selecting between ARGB or YUV input.
+  // It is recommended to use ARGB input (*argb, argb_stride) for lossless
+  // compression, and YUV input (*y, *u, *v, etc.) for lossy compression
+  // since these are the respective native colorspace for these formats.
+  int use_argb;
+
+  // YUV input (mostly used for input to lossy compression)
+  WebPEncCSP colorspace;     // colorspace: should be YUV420 for now (=Y'CbCr).
+  int width, height;         // dimensions (less or equal to WEBP_MAX_DIMENSION)
+  uint8_t *y, *u, *v;        // pointers to luma/chroma planes.
+  int y_stride, uv_stride;   // luma/chroma strides.
+  uint8_t* a;                // pointer to the alpha plane
+  int a_stride;              // stride of the alpha plane
+  uint32_t pad1[2];          // padding for later use
+
+  // ARGB input (mostly used for input to lossless compression)
+  uint32_t* argb;            // Pointer to argb (32 bit) plane.
+  int argb_stride;           // This is stride in pixels units, not bytes.
+  uint32_t pad2[3];          // padding for later use
+
+  //   OUTPUT
+  ///////////////
+  // Byte-emission hook, to store compressed bytes as they are ready.
+  WebPWriterFunction writer;  // can be NULL
+  void* custom_ptr;           // can be used by the writer.
+
+  // map for extra information (only for lossy compression mode)
+  int extra_info_type;    // 1: intra type, 2: segment, 3: quant
+                          // 4: intra-16 prediction mode,
+                          // 5: chroma prediction mode,
+                          // 6: bit cost, 7: distortion
+  uint8_t* extra_info;    // if not NULL, points to an array of size
+                          // ((width + 15) / 16) * ((height + 15) / 16) that
+                          // will be filled with a macroblock map, depending
+                          // on extra_info_type.
+
+  //   STATS AND REPORTS
+  ///////////////////////////
+  // Pointer to side statistics (updated only if not NULL)
+  WebPAuxStats* stats;
+
+  // Error code for the latest error encountered during encoding
+  WebPEncodingError error_code;
+
+  // If not NULL, report progress during encoding.
+  WebPProgressHook progress_hook;
+
+  void* user_data;        // this field is free to be set to any value and
+                          // used during callbacks (like progress-report e.g.).
+
+  uint32_t pad3[3];       // padding for later use
+
+  // Unused for now
+  uint8_t *pad4, *pad5;
+  uint32_t pad6[8];       // padding for later use
+
+  // PRIVATE FIELDS
+  ////////////////////
+  void* memory_;          // row chunk of memory for yuva planes
+  void* memory_argb_;     // and for argb too.
+  void* pad7[2];          // padding for later use
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPPictureInitInternal(WebPPicture*, int);
+
+// Should always be called, to initialize the structure. Returns false in case
+// of version mismatch. WebPPictureInit() must have succeeded before using the
+// 'picture' object.
+// Note that, by default, use_argb is false and colorspace is WEBP_YUV420.
+static WEBP_INLINE int WebPPictureInit(WebPPicture* picture) {
+  return WebPPictureInitInternal(picture, WEBP_ENCODER_ABI_VERSION);
+}
+
+//------------------------------------------------------------------------------
+// WebPPicture utils
+
+// Convenience allocation / deallocation based on picture->width/height:
+// Allocate y/u/v buffers as per colorspace/width/height specification.
+// Note! This function will free the previous buffer if needed.
+// Returns false in case of memory error.
+WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* picture);
+
+// Release the memory allocated by WebPPictureAlloc() or WebPPictureImport*().
+// Note that this function does _not_ free the memory used by the 'picture'
+// object itself.
+// Besides memory (which is reclaimed) all other fields of 'picture' are
+// preserved.
+WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
+
+// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return, *dst
+// will fully own the copied pixels (this is not a view). The 'dst' picture need
+// not be initialized as its content is overwritten.
+// Returns false in case of memory allocation error.
+WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
+
+// Compute PSNR, SSIM or LSIM distortion metric between two pictures.
+// Result is in dB, stores in result[] in the Y/U/V/Alpha/All order.
+// Returns false in case of error (src and ref don't have same dimension, ...)
+// Warning: this function is rather CPU-intensive.
+WEBP_EXTERN(int) WebPPictureDistortion(
+    const WebPPicture* src, const WebPPicture* ref,
+    int metric_type,           // 0 = PSNR, 1 = SSIM, 2 = LSIM
+    float result[5]);
+
+// self-crops a picture to the rectangle defined by top/left/width/height.
+// Returns false in case of memory allocation error, or if the rectangle is
+// outside of the source picture.
+// The rectangle for the view is defined by the top-left corner pixel
+// coordinates (left, top) as well as its width and height. This rectangle
+// must be fully be comprised inside the 'src' source picture. If the source
+// picture uses the YUV420 colorspace, the top and left coordinates will be
+// snapped to even values.
+WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
+                                 int left, int top, int width, int height);
+
+// Extracts a view from 'src' picture into 'dst'. The rectangle for the view
+// is defined by the top-left corner pixel coordinates (left, top) as well
+// as its width and height. This rectangle must be fully be comprised inside
+// the 'src' source picture. If the source picture uses the YUV420 colorspace,
+// the top and left coordinates will be snapped to even values.
+// Picture 'src' must out-live 'dst' picture. Self-extraction of view is allowed
+// ('src' equal to 'dst') as a mean of fast-cropping (but note that doing so,
+// the original dimension will be lost). Picture 'dst' need not be initialized
+// with WebPPictureInit() if it is different from 'src', since its content will
+// be overwritten.
+// Returns false in case of memory allocation error or invalid parameters.
+WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
+                                 int left, int top, int width, int height,
+                                 WebPPicture* dst);
+
+// Returns true if the 'picture' is actually a view and therefore does
+// not own the memory for pixels.
+WEBP_EXTERN(int) WebPPictureIsView(const WebPPicture* picture);
+
+// Rescale a picture to new dimension width x height.
+// Now gamma correction is applied.
+// Returns false in case of error (invalid parameter or insufficient memory).
+WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* pic, int width, int height);
+
+// Colorspace conversion function to import RGB samples.
+// Previous buffer will be free'd, if any.
+// *rgb buffer should have a size of at least height * rgb_stride.
+// Returns false in case of memory error.
+WEBP_EXTERN(int) WebPPictureImportRGB(
+    WebPPicture* picture, const uint8_t* rgb, int rgb_stride);
+// Same, but for RGBA buffer.
+WEBP_EXTERN(int) WebPPictureImportRGBA(
+    WebPPicture* picture, const uint8_t* rgba, int rgba_stride);
+// Same, but for RGBA buffer. Imports the RGB direct from the 32-bit format
+// input buffer ignoring the alpha channel. Avoids needing to copy the data
+// to a temporary 24-bit RGB buffer to import the RGB only.
+WEBP_EXTERN(int) WebPPictureImportRGBX(
+    WebPPicture* picture, const uint8_t* rgbx, int rgbx_stride);
+
+// Variants of the above, but taking BGR(A|X) input.
+WEBP_EXTERN(int) WebPPictureImportBGR(
+    WebPPicture* picture, const uint8_t* bgr, int bgr_stride);
+WEBP_EXTERN(int) WebPPictureImportBGRA(
+    WebPPicture* picture, const uint8_t* bgra, int bgra_stride);
+WEBP_EXTERN(int) WebPPictureImportBGRX(
+    WebPPicture* picture, const uint8_t* bgrx, int bgrx_stride);
+
+// Converts picture->argb data to the YUV420A format. The 'colorspace'
+// parameter is deprecated and should be equal to WEBP_YUV420.
+// Upon return, picture->use_argb is set to false. The presence of real
+// non-opaque transparent values is detected, and 'colorspace' will be
+// adjusted accordingly. Note that this method is lossy.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture,
+                                       WebPEncCSP /*colorspace = WEBP_YUV420*/);
+
+// Same as WebPPictureARGBToYUVA(), but the conversion is done using
+// pseudo-random dithering with a strength 'dithering' between
+// 0.0 (no dithering) and 1.0 (maximum dithering). This is useful
+// for photographic picture.
+WEBP_EXTERN(int) WebPPictureARGBToYUVADithered(
+    WebPPicture* picture, WebPEncCSP colorspace, float dithering);
+
+#if WEBP_ENCODER_ABI_VERSION > 0x0204
+// Performs 'smart' RGBA->YUVA420 downsampling and colorspace conversion.
+// Downsampling is handled with extra care in case of color clipping. This
+// method is roughly 2x slower than WebPPictureARGBToYUVA() but produces better
+// YUV representation.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureSmartARGBToYUVA(WebPPicture* picture);
+#endif
+
+// Converts picture->yuv to picture->argb and sets picture->use_argb to true.
+// The input format must be YUV_420 or YUV_420A.
+// Note that the use of this method is discouraged if one has access to the
+// raw ARGB samples, since using YUV420 is comparatively lossy. Also, the
+// conversion from YUV420 to ARGB incurs a small loss too.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureYUVAToARGB(WebPPicture* picture);
+
+// Helper function: given a width x height plane of RGBA or YUV(A) samples
+// clean-up the YUV or RGB samples under fully transparent area, to help
+// compressibility (no guarantee, though).
+WEBP_EXTERN(void) WebPCleanupTransparentArea(WebPPicture* picture);
+
+// Scan the picture 'picture' for the presence of non fully opaque alpha values.
+// Returns true in such case. Otherwise returns false (indicating that the
+// alpha plane can be ignored altogether e.g.).
+WEBP_EXTERN(int) WebPPictureHasTransparency(const WebPPicture* picture);
+
+// Remove the transparency information (if present) by blending the color with
+// the background color 'background_rgb' (specified as 24bit RGB triplet).
+// After this call, all alpha values are reset to 0xff.
+WEBP_EXTERN(void) WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
+
+//------------------------------------------------------------------------------
+// Main call
+
+// Main encoding call, after config and picture have been initialized.
+// 'picture' must be less than 16384x16384 in dimension (cf WEBP_MAX_DIMENSION),
+// and the 'config' object must be a valid one.
+// Returns false in case of error, true otherwise.
+// In case of error, picture->error_code is updated accordingly.
+// 'picture' can hold the source samples in both YUV(A) or ARGB input, depending
+// on the value of 'picture->use_argb'. It is highly recommended to use
+// the former for lossy encoding, and the latter for lossless encoding
+// (when config.lossless is true). Automatic conversion from one format to
+// another is provided but they both incur some loss.
+WEBP_EXTERN(int) WebPEncode(const WebPConfig* config, WebPPicture* picture);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_ENCODE_H_ */
diff --git a/src/main/jni/libwebp/webp/format_constants.h b/src/main/jni/libwebp/webp/format_constants.h
new file mode 100644
index 000000000..4c04b50c6
--- /dev/null
+++ b/src/main/jni/libwebp/webp/format_constants.h
@@ -0,0 +1,88 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Internal header for constants related to WebP file format.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_WEBP_FORMAT_CONSTANTS_H_
+#define WEBP_WEBP_FORMAT_CONSTANTS_H_
+
+// Create fourcc of the chunk from the chunk tag characters.
+#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24)
+
+// VP8 related constants.
+#define VP8_SIGNATURE 0x9d012a              // Signature in VP8 data.
+#define VP8_MAX_PARTITION0_SIZE (1 << 19)   // max size of mode partition
+#define VP8_MAX_PARTITION_SIZE  (1 << 24)   // max size for token partition
+#define VP8_FRAME_HEADER_SIZE 10  // Size of the frame header within VP8 data.
+
+// VP8L related constants.
+#define VP8L_SIGNATURE_SIZE          1      // VP8L signature size.
+#define VP8L_MAGIC_BYTE              0x2f   // VP8L signature byte.
+#define VP8L_IMAGE_SIZE_BITS         14     // Number of bits used to store
+                                            // width and height.
+#define VP8L_VERSION_BITS            3      // 3 bits reserved for version.
+#define VP8L_VERSION                 0      // version 0
+#define VP8L_FRAME_HEADER_SIZE       5      // Size of the VP8L frame header.
+
+#define MAX_PALETTE_SIZE             256
+#define MAX_CACHE_BITS               11
+#define HUFFMAN_CODES_PER_META_CODE  5
+#define ARGB_BLACK                   0xff000000
+
+#define DEFAULT_CODE_LENGTH          8
+#define MAX_ALLOWED_CODE_LENGTH      15
+
+#define NUM_LITERAL_CODES            256
+#define NUM_LENGTH_CODES             24
+#define NUM_DISTANCE_CODES           40
+#define CODE_LENGTH_CODES            19
+
+#define MIN_HUFFMAN_BITS             2  // min number of Huffman bits
+#define MAX_HUFFMAN_BITS             9  // max number of Huffman bits
+
+#define TRANSFORM_PRESENT            1  // The bit to be written when next data
+                                        // to be read is a transform.
+#define NUM_TRANSFORMS               4  // Maximum number of allowed transform
+                                        // in a bitstream.
+typedef enum {
+  PREDICTOR_TRANSFORM      = 0,
+  CROSS_COLOR_TRANSFORM    = 1,
+  SUBTRACT_GREEN           = 2,
+  COLOR_INDEXING_TRANSFORM = 3
+} VP8LImageTransformType;
+
+// Alpha related constants.
+#define ALPHA_HEADER_LEN            1
+#define ALPHA_NO_COMPRESSION        0
+#define ALPHA_LOSSLESS_COMPRESSION  1
+#define ALPHA_PREPROCESSED_LEVELS   1
+
+// Mux related constants.
+#define TAG_SIZE           4     // Size of a chunk tag (e.g. "VP8L").
+#define CHUNK_SIZE_BYTES   4     // Size needed to store chunk's size.
+#define CHUNK_HEADER_SIZE  8     // Size of a chunk header.
+#define RIFF_HEADER_SIZE   12    // Size of the RIFF header ("RIFFnnnnWEBP").
+#define ANMF_CHUNK_SIZE    16    // Size of an ANMF chunk.
+#define ANIM_CHUNK_SIZE    6     // Size of an ANIM chunk.
+#define FRGM_CHUNK_SIZE    6     // Size of a FRGM chunk.
+#define VP8X_CHUNK_SIZE    10    // Size of a VP8X chunk.
+
+#define MAX_CANVAS_SIZE     (1 << 24)     // 24-bit max for VP8X width/height.
+#define MAX_IMAGE_AREA      (1ULL << 32)  // 32-bit max for width x height.
+#define MAX_LOOP_COUNT      (1 << 16)     // maximum value for loop-count
+#define MAX_DURATION        (1 << 24)     // maximum duration
+#define MAX_POSITION_OFFSET (1 << 24)     // maximum frame/fragment x/y offset
+
+// Maximum chunk payload is such that adding the header and padding won't
+// overflow a uint32_t.
+#define MAX_CHUNK_PAYLOAD (~0U - CHUNK_HEADER_SIZE - 1)
+
+#endif  /* WEBP_WEBP_FORMAT_CONSTANTS_H_ */
diff --git a/src/main/jni/libwebp/webp/mux.h b/src/main/jni/libwebp/webp/mux.h
new file mode 100644
index 000000000..1ae03b348
--- /dev/null
+++ b/src/main/jni/libwebp/webp/mux.h
@@ -0,0 +1,399 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  RIFF container manipulation for WebP images.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+// This API allows manipulation of WebP container images containing features
+// like color profile, metadata, animation and fragmented images.
+//
+// Code Example#1: Create a WebPMux object with image data, color profile and
+// XMP metadata.
+/*
+  int copy_data = 0;
+  WebPMux* mux = WebPMuxNew();
+  // ... (Prepare image data).
+  WebPMuxSetImage(mux, &image, copy_data);
+  // ... (Prepare ICCP color profile data).
+  WebPMuxSetChunk(mux, "ICCP", &icc_profile, copy_data);
+  // ... (Prepare XMP metadata).
+  WebPMuxSetChunk(mux, "XMP ", &xmp, copy_data);
+  // Get data from mux in WebP RIFF format.
+  WebPMuxAssemble(mux, &output_data);
+  WebPMuxDelete(mux);
+  // ... (Consume output_data; e.g. write output_data.bytes to file).
+  WebPDataClear(&output_data);
+*/
+
+// Code Example#2: Get image and color profile data from a WebP file.
+/*
+  int copy_data = 0;
+  // ... (Read data from file).
+  WebPMux* mux = WebPMuxCreate(&data, copy_data);
+  WebPMuxGetFrame(mux, 1, &image);
+  // ... (Consume image; e.g. call WebPDecode() to decode the data).
+  WebPMuxGetChunk(mux, "ICCP", &icc_profile);
+  // ... (Consume icc_data).
+  WebPMuxDelete(mux);
+  free(data);
+*/
+
+#ifndef WEBP_WEBP_MUX_H_
+#define WEBP_WEBP_MUX_H_
+
+#include "./mux_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_MUX_ABI_VERSION 0x0101        // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPMuxError WebPMuxError;
+// typedef enum WebPChunkId WebPChunkId;
+typedef struct WebPMux WebPMux;   // main opaque object.
+typedef struct WebPMuxFrameInfo WebPMuxFrameInfo;
+typedef struct WebPMuxAnimParams WebPMuxAnimParams;
+
+// Error codes
+typedef enum WebPMuxError {
+  WEBP_MUX_OK                 =  1,
+  WEBP_MUX_NOT_FOUND          =  0,
+  WEBP_MUX_INVALID_ARGUMENT   = -1,
+  WEBP_MUX_BAD_DATA           = -2,
+  WEBP_MUX_MEMORY_ERROR       = -3,
+  WEBP_MUX_NOT_ENOUGH_DATA    = -4
+} WebPMuxError;
+
+// IDs for different types of chunks.
+typedef enum WebPChunkId {
+  WEBP_CHUNK_VP8X,     // VP8X
+  WEBP_CHUNK_ICCP,     // ICCP
+  WEBP_CHUNK_ANIM,     // ANIM
+  WEBP_CHUNK_ANMF,     // ANMF
+  WEBP_CHUNK_FRGM,     // FRGM
+  WEBP_CHUNK_ALPHA,    // ALPH
+  WEBP_CHUNK_IMAGE,    // VP8/VP8L
+  WEBP_CHUNK_EXIF,     // EXIF
+  WEBP_CHUNK_XMP,      // XMP
+  WEBP_CHUNK_UNKNOWN,  // Other chunks.
+  WEBP_CHUNK_NIL
+} WebPChunkId;
+
+//------------------------------------------------------------------------------
+
+// Returns the version number of the mux library, packed in hexadecimal using
+// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetMuxVersion(void);
+
+//------------------------------------------------------------------------------
+// Life of a Mux object
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPMux*) WebPNewInternal(int);
+
+// Creates an empty mux object.
+// Returns:
+//   A pointer to the newly created empty mux object.
+//   Or NULL in case of memory error.
+static WEBP_INLINE WebPMux* WebPMuxNew(void) {
+  return WebPNewInternal(WEBP_MUX_ABI_VERSION);
+}
+
+// Deletes the mux object.
+// Parameters:
+//   mux - (in/out) object to be deleted
+WEBP_EXTERN(void) WebPMuxDelete(WebPMux* mux);
+
+//------------------------------------------------------------------------------
+// Mux creation.
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPMux*) WebPMuxCreateInternal(const WebPData*, int, int);
+
+// Creates a mux object from raw data given in WebP RIFF format.
+// Parameters:
+//   bitstream - (in) the bitstream data in WebP RIFF format
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
+// Returns:
+//   A pointer to the mux object created from given data - on success.
+//   NULL - In case of invalid data or memory error.
+static WEBP_INLINE WebPMux* WebPMuxCreate(const WebPData* bitstream,
+                                          int copy_data) {
+  return WebPMuxCreateInternal(bitstream, copy_data, WEBP_MUX_ABI_VERSION);
+}
+
+//------------------------------------------------------------------------------
+// Non-image chunks.
+
+// Note: Only non-image related chunks should be managed through chunk APIs.
+// (Image related chunks are: "ANMF", "FRGM", "VP8 ", "VP8L" and "ALPH").
+// To add, get and delete images, use WebPMuxSetImage(), WebPMuxPushFrame(),
+// WebPMuxGetFrame() and WebPMuxDeleteFrame().
+
+// Adds a chunk with id 'fourcc' and data 'chunk_data' in the mux object.
+// Any existing chunk(s) with the same id will be removed.
+// Parameters:
+//   mux - (in/out) object to which the chunk is to be added
+//   fourcc - (in) a character array containing the fourcc of the given chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
+//   chunk_data - (in) the chunk data to be added
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, fourcc or chunk_data is NULL
+//                               or if fourcc corresponds to an image chunk.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetChunk(
+    WebPMux* mux, const char fourcc[4], const WebPData* chunk_data,
+    int copy_data);
+
+// Gets a reference to the data of the chunk with id 'fourcc' in the mux object.
+// The caller should NOT free the returned data.
+// Parameters:
+//   mux - (in) object from which the chunk data is to be fetched
+//   fourcc - (in) a character array containing the fourcc of the chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
+//   chunk_data - (out) returned chunk data
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, fourcc or chunk_data is NULL
+//                               or if fourcc corresponds to an image chunk.
+//   WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given id.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetChunk(
+    const WebPMux* mux, const char fourcc[4], WebPData* chunk_data);
+
+// Deletes the chunk with the given 'fourcc' from the mux object.
+// Parameters:
+//   mux - (in/out) object from which the chunk is to be deleted
+//   fourcc - (in) a character array containing the fourcc of the chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or fourcc is NULL
+//                               or if fourcc corresponds to an image chunk.
+//   WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given fourcc.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteChunk(
+    WebPMux* mux, const char fourcc[4]);
+
+//------------------------------------------------------------------------------
+// Images.
+
+// Encapsulates data about a single frame/fragment.
+struct WebPMuxFrameInfo {
+  WebPData    bitstream;  // image data: can be a raw VP8/VP8L bitstream
+                          // or a single-image WebP file.
+  int         x_offset;   // x-offset of the frame.
+  int         y_offset;   // y-offset of the frame.
+  int         duration;   // duration of the frame (in milliseconds).
+
+  WebPChunkId id;         // frame type: should be one of WEBP_CHUNK_ANMF,
+                          // WEBP_CHUNK_FRGM or WEBP_CHUNK_IMAGE
+  WebPMuxAnimDispose dispose_method;  // Disposal method for the frame.
+  WebPMuxAnimBlend   blend_method;    // Blend operation for the frame.
+  uint32_t    pad[1];     // padding for later use
+};
+
+// Sets the (non-animated and non-fragmented) image in the mux object.
+// Note: Any existing images (including frames/fragments) will be removed.
+// Parameters:
+//   mux - (in/out) object in which the image is to be set
+//   bitstream - (in) can be a raw VP8/VP8L bitstream or a single-image
+//               WebP file (non-animated and non-fragmented)
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(
+    WebPMux* mux, const WebPData* bitstream, int copy_data);
+
+// Adds a frame at the end of the mux object.
+// Notes: (1) frame.id should be one of WEBP_CHUNK_ANMF or WEBP_CHUNK_FRGM
+//        (2) For setting a non-animated non-fragmented image, use
+//            WebPMuxSetImage() instead.
+//        (3) Type of frame being pushed must be same as the frames in mux.
+//        (4) As WebP only supports even offsets, any odd offset will be snapped
+//            to an even location using: offset &= ~1
+// Parameters:
+//   mux - (in/out) object to which the frame is to be added
+//   frame - (in) frame data.
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or frame is NULL
+//                               or if content of 'frame' is invalid.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(
+    WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data);
+
+// Gets the nth frame from the mux object.
+// The content of 'frame->bitstream' is allocated using malloc(), and NOT
+// owned by the 'mux' object. It MUST be deallocated by the caller by calling
+// WebPDataClear().
+// nth=0 has a special meaning - last position.
+// Parameters:
+//   mux - (in) object from which the info is to be fetched
+//   nth - (in) index of the frame in the mux object
+//   frame - (out) data of the returned frame
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or frame is NULL.
+//   WEBP_MUX_NOT_FOUND - if there are less than nth frames in the mux object.
+//   WEBP_MUX_BAD_DATA - if nth frame chunk in mux is invalid.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(
+    const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame);
+
+// Deletes a frame from the mux object.
+// nth=0 has a special meaning - last position.
+// Parameters:
+//   mux - (in/out) object from which a frame is to be deleted
+//   nth - (in) The position from which the frame is to be deleted
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL.
+//   WEBP_MUX_NOT_FOUND - If there are less than nth frames in the mux object
+//                        before deletion.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
+
+//------------------------------------------------------------------------------
+// Animation.
+
+// Animation parameters.
+struct WebPMuxAnimParams {
+  uint32_t bgcolor;  // Background color of the canvas stored (in MSB order) as:
+                     // Bits 00 to 07: Alpha.
+                     // Bits 08 to 15: Red.
+                     // Bits 16 to 23: Green.
+                     // Bits 24 to 31: Blue.
+  int loop_count;    // Number of times to repeat the animation [0 = infinite].
+};
+
+// Sets the animation parameters in the mux object. Any existing ANIM chunks
+// will be removed.
+// Parameters:
+//   mux - (in/out) object in which ANIM chunk is to be set/added
+//   params - (in) animation parameters.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetAnimationParams(
+    WebPMux* mux, const WebPMuxAnimParams* params);
+
+// Gets the animation parameters from the mux object.
+// Parameters:
+//   mux - (in) object from which the animation parameters to be fetched
+//   params - (out) animation parameters extracted from the ANIM chunk
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
+//   WEBP_MUX_NOT_FOUND - if ANIM chunk is not present in mux object.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetAnimationParams(
+    const WebPMux* mux, WebPMuxAnimParams* params);
+
+//------------------------------------------------------------------------------
+// Misc Utilities.
+
+#if WEBP_MUX_ABI_VERSION > 0x0101
+// Sets the canvas size for the mux object. The width and height can be
+// specified explicitly or left as zero (0, 0).
+// * When width and height are specified explicitly, then this frame bound is
+//   enforced during subsequent calls to WebPMuxAssemble() and an error is
+//   reported if any animated frame does not completely fit within the canvas.
+// * When unspecified (0, 0), the constructed canvas will get the frame bounds
+//   from the bounding-box over all frames after calling WebPMuxAssemble().
+// Parameters:
+//   mux - (in) object to which the canvas size is to be set
+//   width - (in) canvas width
+//   height - (in) canvas height
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL; or
+//                               width or height are invalid or out of bounds
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetCanvasSize(WebPMux* mux,
+                                               int width, int height);
+#endif
+
+// Gets the canvas size from the mux object.
+// Note: This method assumes that the VP8X chunk, if present, is up-to-date.
+// That is, the mux object hasn't been modified since the last call to
+// WebPMuxAssemble() or WebPMuxCreate().
+// Parameters:
+//   mux - (in) object from which the canvas size is to be fetched
+//   width - (out) canvas width
+//   height - (out) canvas height
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, width or height is NULL.
+//   WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetCanvasSize(const WebPMux* mux,
+                                               int* width, int* height);
+
+// Gets the feature flags from the mux object.
+// Note: This method assumes that the VP8X chunk, if present, is up-to-date.
+// That is, the mux object hasn't been modified since the last call to
+// WebPMuxAssemble() or WebPMuxCreate().
+// Parameters:
+//   mux - (in) object from which the features are to be fetched
+//   flags - (out) the flags specifying which features are present in the
+//           mux object. This will be an OR of various flag values.
+//           Enum 'WebPFeatureFlags' can be used to test individual flag values.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or flags is NULL.
+//   WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetFeatures(const WebPMux* mux,
+                                             uint32_t* flags);
+
+// Gets number of chunks with the given 'id' in the mux object.
+// Parameters:
+//   mux - (in) object from which the info is to be fetched
+//   id - (in) chunk id specifying the type of chunk
+//   num_elements - (out) number of chunks with the given chunk id
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, or num_elements is NULL.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
+                                           WebPChunkId id, int* num_elements);
+
+// Assembles all chunks in WebP RIFF format and returns in 'assembled_data'.
+// This function also validates the mux object.
+// Note: The content of 'assembled_data' will be ignored and overwritten.
+// Also, the content of 'assembled_data' is allocated using malloc(), and NOT
+// owned by the 'mux' object. It MUST be deallocated by the caller by calling
+// WebPDataClear(). It's always safe to call WebPDataClear() upon return,
+// even in case of error.
+// Parameters:
+//   mux - (in/out) object whose chunks are to be assembled
+//   assembled_data - (out) assembled WebP data
+// Returns:
+//   WEBP_MUX_BAD_DATA - if mux object is invalid.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or assembled_data is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxAssemble(WebPMux* mux,
+                                          WebPData* assembled_data);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_MUX_H_ */
diff --git a/src/main/jni/libwebp/webp/mux_types.h b/src/main/jni/libwebp/webp/mux_types.h
new file mode 100644
index 000000000..c94043a3c
--- /dev/null
+++ b/src/main/jni/libwebp/webp/mux_types.h
@@ -0,0 +1,97 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Data-types common to the mux and demux libraries.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_WEBP_MUX_TYPES_H_
+#define WEBP_WEBP_MUX_TYPES_H_
+
+#include <stdlib.h>  // free()
+#include <string.h>  // memset()
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPFeatureFlags WebPFeatureFlags;
+// typedef enum WebPMuxAnimDispose WebPMuxAnimDispose;
+// typedef enum WebPMuxAnimBlend WebPMuxAnimBlend;
+typedef struct WebPData WebPData;
+
+// VP8X Feature Flags.
+typedef enum WebPFeatureFlags {
+  FRAGMENTS_FLAG  = 0x00000001,
+  ANIMATION_FLAG  = 0x00000002,
+  XMP_FLAG        = 0x00000004,
+  EXIF_FLAG       = 0x00000008,
+  ALPHA_FLAG      = 0x00000010,
+  ICCP_FLAG       = 0x00000020
+} WebPFeatureFlags;
+
+// Dispose method (animation only). Indicates how the area used by the current
+// frame is to be treated before rendering the next frame on the canvas.
+typedef enum WebPMuxAnimDispose {
+  WEBP_MUX_DISPOSE_NONE,       // Do not dispose.
+  WEBP_MUX_DISPOSE_BACKGROUND  // Dispose to background color.
+} WebPMuxAnimDispose;
+
+// Blend operation (animation only). Indicates how transparent pixels of the
+// current frame are blended with those of the previous canvas.
+typedef enum WebPMuxAnimBlend {
+  WEBP_MUX_BLEND,              // Blend.
+  WEBP_MUX_NO_BLEND            // Do not blend.
+} WebPMuxAnimBlend;
+
+// Data type used to describe 'raw' data, e.g., chunk data
+// (ICC profile, metadata) and WebP compressed image data.
+struct WebPData {
+  const uint8_t* bytes;
+  size_t size;
+};
+
+// Initializes the contents of the 'webp_data' object with default values.
+static WEBP_INLINE void WebPDataInit(WebPData* webp_data) {
+  if (webp_data != NULL) {
+    memset(webp_data, 0, sizeof(*webp_data));
+  }
+}
+
+// Clears the contents of the 'webp_data' object by calling free(). Does not
+// deallocate the object itself.
+static WEBP_INLINE void WebPDataClear(WebPData* webp_data) {
+  if (webp_data != NULL) {
+    free((void*)webp_data->bytes);
+    WebPDataInit(webp_data);
+  }
+}
+
+// Allocates necessary storage for 'dst' and copies the contents of 'src'.
+// Returns true on success.
+static WEBP_INLINE int WebPDataCopy(const WebPData* src, WebPData* dst) {
+  if (src == NULL || dst == NULL) return 0;
+  WebPDataInit(dst);
+  if (src->bytes != NULL && src->size != 0) {
+    dst->bytes = (uint8_t*)malloc(src->size);
+    if (dst->bytes == NULL) return 0;
+    memcpy((void*)dst->bytes, src->bytes, src->size);
+    dst->size = src->size;
+  }
+  return 1;
+}
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_MUX_TYPES_H_ */
diff --git a/src/main/jni/libwebp/webp/types.h b/src/main/jni/libwebp/webp/types.h
new file mode 100644
index 000000000..568d1f263
--- /dev/null
+++ b/src/main/jni/libwebp/webp/types.h
@@ -0,0 +1,47 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Common types
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_TYPES_H_
+#define WEBP_WEBP_TYPES_H_
+
+#include <stddef.h>  // for size_t
+
+#ifndef _MSC_VER
+#include <inttypes.h>
+#ifdef __STRICT_ANSI__
+#define WEBP_INLINE
+#else  /* __STRICT_ANSI__ */
+#define WEBP_INLINE inline
+#endif
+#else
+typedef signed   char int8_t;
+typedef unsigned char uint8_t;
+typedef signed   short int16_t;
+typedef unsigned short uint16_t;
+typedef signed   int int32_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+#define WEBP_INLINE __forceinline
+#endif  /* _MSC_VER */
+
+#ifndef WEBP_EXTERN
+// This explicitly marks library functions and allows for changing the
+// signature for e.g., Windows DLL builds.
+#define WEBP_EXTERN(type) extern type
+#endif  /* WEBP_EXTERN */
+
+// Macro to check ABI compatibility (same major revision number)
+#define WEBP_ABI_IS_INCOMPATIBLE(a, b) (((a) >> 8) != ((b) >> 8))
+
+#endif  /* WEBP_WEBP_TYPES_H_ */
diff --git a/src/main/jni/libyuv/include/libyuv.h b/src/main/jni/libyuv/include/libyuv.h
new file mode 100644
index 000000000..3bebe642c
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv.h
@@ -0,0 +1,33 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_H_  // NOLINT
+#define INCLUDE_LIBYUV_H_
+
+#include "libyuv/basic_types.h"
+#include "libyuv/compare.h"
+#include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"
+#include "libyuv/convert_from.h"
+#include "libyuv/convert_from_argb.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/format_conversion.h"
+#include "libyuv/mjpeg_decoder.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "libyuv/rotate_argb.h"
+#include "libyuv/row.h"
+#include "libyuv/scale.h"
+#include "libyuv/scale_argb.h"
+#include "libyuv/scale_row.h"
+#include "libyuv/version.h"
+#include "libyuv/video_common.h"
+
+#endif  // INCLUDE_LIBYUV_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/basic_types.h b/src/main/jni/libyuv/include/libyuv/basic_types.h
new file mode 100644
index 000000000..beb750ba6
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/basic_types.h
@@ -0,0 +1,118 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_  // NOLINT
+#define INCLUDE_LIBYUV_BASIC_TYPES_H_
+
+#include <stddef.h>  // for NULL, size_t
+
+#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
+#include <sys/types.h>  // for uintptr_t on x86
+#else
+#include <stdint.h>  // for uintptr_t
+#endif
+
+#ifndef GG_LONGLONG
+#ifndef INT_TYPES_DEFINED
+#define INT_TYPES_DEFINED
+#ifdef COMPILER_MSVC
+typedef unsigned __int64 uint64;
+typedef __int64 int64;
+#ifndef INT64_C
+#define INT64_C(x) x ## I64
+#endif
+#ifndef UINT64_C
+#define UINT64_C(x) x ## UI64
+#endif
+#define INT64_F "I64"
+#else  // COMPILER_MSVC
+#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
+typedef unsigned long uint64;  // NOLINT
+typedef long int64;  // NOLINT
+#ifndef INT64_C
+#define INT64_C(x) x ## L
+#endif
+#ifndef UINT64_C
+#define UINT64_C(x) x ## UL
+#endif
+#define INT64_F "l"
+#else  // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
+typedef unsigned long long uint64;  // NOLINT
+typedef long long int64;  // NOLINT
+#ifndef INT64_C
+#define INT64_C(x) x ## LL
+#endif
+#ifndef UINT64_C
+#define UINT64_C(x) x ## ULL
+#endif
+#define INT64_F "ll"
+#endif  // __LP64__
+#endif  // COMPILER_MSVC
+typedef unsigned int uint32;
+typedef int int32;
+typedef unsigned short uint16;  // NOLINT
+typedef short int16;  // NOLINT
+typedef unsigned char uint8;
+typedef signed char int8;
+#endif  // INT_TYPES_DEFINED
+#endif  // GG_LONGLONG
+
+// Detect compiler is for x86 or x64.
+#if defined(__x86_64__) || defined(_M_X64) || \
+    defined(__i386__) || defined(_M_IX86)
+#define CPU_X86 1
+#endif
+// Detect compiler is for ARM.
+#if defined(__arm__) || defined(_M_ARM)
+#define CPU_ARM 1
+#endif
+
+#ifndef ALIGNP
+#ifdef __cplusplus
+#define ALIGNP(p, t) \
+    (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
+    ((t) - 1)) & ~((t) - 1))))
+#else
+#define ALIGNP(p, t) \
+    ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1))))  /* NOLINT */
+#endif
+#endif
+
+#if !defined(LIBYUV_API)
+#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
+#define LIBYUV_API __declspec(dllexport)
+#elif defined(LIBYUV_USING_SHARED_LIBRARY)
+#define LIBYUV_API __declspec(dllimport)
+#else
+#define LIBYUV_API
+#endif  // LIBYUV_BUILDING_SHARED_LIBRARY
+#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
+    (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
+    defined(LIBYUV_USING_SHARED_LIBRARY))
+#define LIBYUV_API __attribute__ ((visibility ("default")))
+#else
+#define LIBYUV_API
+#endif  // __GNUC__
+#endif  // LIBYUV_API
+
+#define LIBYUV_BOOL int
+#define LIBYUV_FALSE 0
+#define LIBYUV_TRUE 1
+
+// Visual C x86 or GCC little endian.
+#if defined(__x86_64__) || defined(_M_X64) || \
+  defined(__i386__) || defined(_M_IX86) || \
+  defined(__arm__) || defined(_M_ARM) || \
+  (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define LIBYUV_LITTLE_ENDIAN
+#endif
+
+#endif  // INCLUDE_LIBYUV_BASIC_TYPES_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/compare.h b/src/main/jni/libyuv/include/libyuv/compare.h
new file mode 100644
index 000000000..5dfac7c86
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/compare.h
@@ -0,0 +1,73 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_COMPARE_H_  // NOLINT
+#define INCLUDE_LIBYUV_COMPARE_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Compute a hash for specified memory. Seed of 5381 recommended.
+LIBYUV_API
+uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
+
+// Sum Square Error - used to compute Mean Square Error or PSNR.
+LIBYUV_API
+uint64 ComputeSumSquareError(const uint8* src_a,
+                             const uint8* src_b, int count);
+
+LIBYUV_API
+uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
+                                  const uint8* src_b, int stride_b,
+                                  int width, int height);
+
+static const int kMaxPsnr = 128;
+
+LIBYUV_API
+double SumSquareErrorToPsnr(uint64 sse, uint64 count);
+
+LIBYUV_API
+double CalcFramePsnr(const uint8* src_a, int stride_a,
+                     const uint8* src_b, int stride_b,
+                     int width, int height);
+
+LIBYUV_API
+double I420Psnr(const uint8* src_y_a, int stride_y_a,
+                const uint8* src_u_a, int stride_u_a,
+                const uint8* src_v_a, int stride_v_a,
+                const uint8* src_y_b, int stride_y_b,
+                const uint8* src_u_b, int stride_u_b,
+                const uint8* src_v_b, int stride_v_b,
+                int width, int height);
+
+LIBYUV_API
+double CalcFrameSsim(const uint8* src_a, int stride_a,
+                     const uint8* src_b, int stride_b,
+                     int width, int height);
+
+LIBYUV_API
+double I420Ssim(const uint8* src_y_a, int stride_y_a,
+                const uint8* src_u_a, int stride_u_a,
+                const uint8* src_v_a, int stride_v_a,
+                const uint8* src_y_b, int stride_y_b,
+                const uint8* src_u_b, int stride_u_b,
+                const uint8* src_v_b, int stride_v_b,
+                int width, int height);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_COMPARE_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/convert.h b/src/main/jni/libyuv/include/libyuv/convert.h
new file mode 100644
index 000000000..1bd45c837
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/convert.h
@@ -0,0 +1,254 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_H_  // NOLINT
+#define INCLUDE_LIBYUV_CONVERT_H_
+
+#include "libyuv/basic_types.h"
+// TODO(fbarchard): Remove the following headers includes.
+#include "libyuv/convert_from.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Convert I444 to I420.
+LIBYUV_API
+int I444ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert I422 to I420.
+LIBYUV_API
+int I422ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert I411 to I420.
+LIBYUV_API
+int I411ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Copy I420 to I420.
+#define I420ToI420 I420Copy
+LIBYUV_API
+int I420Copy(const uint8* src_y, int src_stride_y,
+             const uint8* src_u, int src_stride_u,
+             const uint8* src_v, int src_stride_v,
+             uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int width, int height);
+
+// Convert I400 (grey) to I420.
+LIBYUV_API
+int I400ToI420(const uint8* src_y, int src_stride_y,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert NV12 to I420.
+LIBYUV_API
+int NV12ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_uv, int src_stride_uv,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert NV21 to I420.
+LIBYUV_API
+int NV21ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_vu, int src_stride_vu,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert YUY2 to I420.
+LIBYUV_API
+int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert UYVY to I420.
+LIBYUV_API
+int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert M420 to I420.
+LIBYUV_API
+int M420ToI420(const uint8* src_m420, int src_stride_m420,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert Q420 to I420.
+LIBYUV_API
+int Q420ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// ARGB little endian (bgra in memory) to I420.
+LIBYUV_API
+int ARGBToI420(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// BGRA little endian (argb in memory) to I420.
+LIBYUV_API
+int BGRAToI420(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// ABGR little endian (rgba in memory) to I420.
+LIBYUV_API
+int ABGRToI420(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// RGBA little endian (abgr in memory) to I420.
+LIBYUV_API
+int RGBAToI420(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// RGB little endian (bgr in memory) to I420.
+LIBYUV_API
+int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
+                uint8* dst_y, int dst_stride_y,
+                uint8* dst_u, int dst_stride_u,
+                uint8* dst_v, int dst_stride_v,
+                int width, int height);
+
+// RGB big endian (rgb in memory) to I420.
+LIBYUV_API
+int RAWToI420(const uint8* src_frame, int src_stride_frame,
+              uint8* dst_y, int dst_stride_y,
+              uint8* dst_u, int dst_stride_u,
+              uint8* dst_v, int dst_stride_v,
+              int width, int height);
+
+// RGB16 (RGBP fourcc) little endian to I420.
+LIBYUV_API
+int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
+                 uint8* dst_y, int dst_stride_y,
+                 uint8* dst_u, int dst_stride_u,
+                 uint8* dst_v, int dst_stride_v,
+                 int width, int height);
+
+// RGB15 (RGBO fourcc) little endian to I420.
+LIBYUV_API
+int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
+                   uint8* dst_y, int dst_stride_y,
+                   uint8* dst_u, int dst_stride_u,
+                   uint8* dst_v, int dst_stride_v,
+                   int width, int height);
+
+// RGB12 (R444 fourcc) little endian to I420.
+LIBYUV_API
+int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
+                   uint8* dst_y, int dst_stride_y,
+                   uint8* dst_u, int dst_stride_u,
+                   uint8* dst_v, int dst_stride_v,
+                   int width, int height);
+
+#ifdef HAVE_JPEG
+// src_width/height provided by capture.
+// dst_width/height for clipping determine final size.
+LIBYUV_API
+int MJPGToI420(const uint8* sample, size_t sample_size,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int src_width, int src_height,
+               int dst_width, int dst_height);
+
+// Query size of MJPG in pixels.
+LIBYUV_API
+int MJPGSize(const uint8* sample, size_t sample_size,
+             int* width, int* height);
+#endif
+
+// Note Bayer formats (BGGR) To I420 are in format_conversion.h
+
+// Convert camera sample to I420 with cropping, rotation and vertical flip.
+// "src_size" is needed to parse MJPG.
+// "dst_stride_y" number of bytes in a row of the dst_y plane.
+//   Normally this would be the same as dst_width, with recommended alignment
+//   to 16 bytes for better efficiency.
+//   If rotation of 90 or 270 is used, stride is affected. The caller should
+//   allocate the I420 buffer according to rotation.
+// "dst_stride_u" number of bytes in a row of the dst_u plane.
+//   Normally this would be the same as (dst_width + 1) / 2, with
+//   recommended alignment to 16 bytes for better efficiency.
+//   If rotation of 90 or 270 is used, stride is affected.
+// "crop_x" and "crop_y" are starting position for cropping.
+//   To center, crop_x = (src_width - dst_width) / 2
+//              crop_y = (src_height - dst_height) / 2
+// "src_width" / "src_height" is size of src_frame in pixels.
+//   "src_height" can be negative indicating a vertically flipped image source.
+// "crop_width" / "crop_height" is the size to crop the src to.
+//    Must be less than or equal to src_width/src_height
+//    Cropping parameters are pre-rotation.
+// "rotation" can be 0, 90, 180 or 270.
+// "format" is a fourcc. ie 'I420', 'YUY2'
+// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
+LIBYUV_API
+int ConvertToI420(const uint8* src_frame, size_t src_size,
+                  uint8* dst_y, int dst_stride_y,
+                  uint8* dst_u, int dst_stride_u,
+                  uint8* dst_v, int dst_stride_v,
+                  int crop_x, int crop_y,
+                  int src_width, int src_height,
+                  int crop_width, int crop_height,
+                  enum RotationMode rotation,
+                  uint32 format);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_CONVERT_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/convert_argb.h b/src/main/jni/libyuv/include/libyuv/convert_argb.h
new file mode 100644
index 000000000..a18014ca2
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/convert_argb.h
@@ -0,0 +1,225 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_  // NOLINT
+#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
+
+#include "libyuv/basic_types.h"
+// TODO(fbarchard): Remove the following headers includes
+#include "libyuv/convert_from.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+
+// TODO(fbarchard): This set of functions should exactly match convert.h
+// Add missing Q420.
+// TODO(fbarchard): Add tests. Create random content of right size and convert
+// with C vs Opt and or to I420 and compare.
+// TODO(fbarchard): Some of these functions lack parameter setting.
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Alias.
+#define ARGBToARGB ARGBCopy
+
+// Copy ARGB to ARGB.
+LIBYUV_API
+int ARGBCopy(const uint8* src_argb, int src_stride_argb,
+             uint8* dst_argb, int dst_stride_argb,
+             int width, int height);
+
+// Convert I420 to ARGB.
+LIBYUV_API
+int I420ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert I422 to ARGB.
+LIBYUV_API
+int I422ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert I444 to ARGB.
+LIBYUV_API
+int I444ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert I411 to ARGB.
+LIBYUV_API
+int I411ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert I400 (grey) to ARGB.
+LIBYUV_API
+int I400ToARGB(const uint8* src_y, int src_stride_y,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Alias.
+#define YToARGB I400ToARGB_Reference
+
+// Convert I400 to ARGB. Reverse of ARGBToI400.
+LIBYUV_API
+int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
+                         uint8* dst_argb, int dst_stride_argb,
+                         int width, int height);
+
+// Convert NV12 to ARGB.
+LIBYUV_API
+int NV12ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_uv, int src_stride_uv,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert NV21 to ARGB.
+LIBYUV_API
+int NV21ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_vu, int src_stride_vu,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert M420 to ARGB.
+LIBYUV_API
+int M420ToARGB(const uint8* src_m420, int src_stride_m420,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// TODO(fbarchard): Convert Q420 to ARGB.
+// LIBYUV_API
+// int Q420ToARGB(const uint8* src_y, int src_stride_y,
+//                const uint8* src_yuy2, int src_stride_yuy2,
+//                uint8* dst_argb, int dst_stride_argb,
+//                int width, int height);
+
+// Convert YUY2 to ARGB.
+LIBYUV_API
+int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert UYVY to ARGB.
+LIBYUV_API
+int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// BGRA little endian (argb in memory) to ARGB.
+LIBYUV_API
+int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// ABGR little endian (rgba in memory) to ARGB.
+LIBYUV_API
+int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// RGBA little endian (abgr in memory) to ARGB.
+LIBYUV_API
+int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Deprecated function name.
+#define BG24ToARGB RGB24ToARGB
+
+// RGB little endian (bgr in memory) to ARGB.
+LIBYUV_API
+int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height);
+
+// RGB big endian (rgb in memory) to ARGB.
+LIBYUV_API
+int RAWToARGB(const uint8* src_frame, int src_stride_frame,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height);
+
+// RGB16 (RGBP fourcc) little endian to ARGB.
+LIBYUV_API
+int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
+                 uint8* dst_argb, int dst_stride_argb,
+                 int width, int height);
+
+// RGB15 (RGBO fourcc) little endian to ARGB.
+LIBYUV_API
+int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
+                   uint8* dst_argb, int dst_stride_argb,
+                   int width, int height);
+
+// RGB12 (R444 fourcc) little endian to ARGB.
+LIBYUV_API
+int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
+                   uint8* dst_argb, int dst_stride_argb,
+                   int width, int height);
+
+#ifdef HAVE_JPEG
+// src_width/height provided by capture
+// dst_width/height for clipping determine final size.
+LIBYUV_API
+int MJPGToARGB(const uint8* sample, size_t sample_size,
+               uint8* dst_argb, int dst_stride_argb,
+               int src_width, int src_height,
+               int dst_width, int dst_height);
+#endif
+
+// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
+
+// Convert camera sample to ARGB with cropping, rotation and vertical flip.
+// "src_size" is needed to parse MJPG.
+// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
+//   Normally this would be the same as dst_width, with recommended alignment
+//   to 16 bytes for better efficiency.
+//   If rotation of 90 or 270 is used, stride is affected. The caller should
+//   allocate the I420 buffer according to rotation.
+// "dst_stride_u" number of bytes in a row of the dst_u plane.
+//   Normally this would be the same as (dst_width + 1) / 2, with
+//   recommended alignment to 16 bytes for better efficiency.
+//   If rotation of 90 or 270 is used, stride is affected.
+// "crop_x" and "crop_y" are starting position for cropping.
+//   To center, crop_x = (src_width - dst_width) / 2
+//              crop_y = (src_height - dst_height) / 2
+// "src_width" / "src_height" is size of src_frame in pixels.
+//   "src_height" can be negative indicating a vertically flipped image source.
+// "crop_width" / "crop_height" is the size to crop the src to.
+//    Must be less than or equal to src_width/src_height
+//    Cropping parameters are pre-rotation.
+// "rotation" can be 0, 90, 180 or 270.
+// "format" is a fourcc. ie 'I420', 'YUY2'
+// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
+LIBYUV_API
+int ConvertToARGB(const uint8* src_frame, size_t src_size,
+                  uint8* dst_argb, int dst_stride_argb,
+                  int crop_x, int crop_y,
+                  int src_width, int src_height,
+                  int crop_width, int crop_height,
+                  enum RotationMode rotation,
+                  uint32 format);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_CONVERT_ARGB_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/convert_from.h b/src/main/jni/libyuv/include/libyuv/convert_from.h
new file mode 100644
index 000000000..b1cf57f7d
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/convert_from.h
@@ -0,0 +1,173 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_  // NOLINT
+#define INCLUDE_LIBYUV_CONVERT_FROM_H_
+
+#include "libyuv/basic_types.h"
+#include "libyuv/rotate.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// See Also convert.h for conversions from formats to I420.
+
+// I420Copy in convert to I420ToI420.
+
+LIBYUV_API
+int I420ToI422(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+LIBYUV_API
+int I420ToI444(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+LIBYUV_API
+int I420ToI411(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
+LIBYUV_API
+int I400Copy(const uint8* src_y, int src_stride_y,
+             uint8* dst_y, int dst_stride_y,
+             int width, int height);
+
+// TODO(fbarchard): I420ToM420
+// TODO(fbarchard): I420ToQ420
+
+LIBYUV_API
+int I420ToNV12(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height);
+
+LIBYUV_API
+int I420ToNV21(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_vu, int dst_stride_vu,
+               int width, int height);
+
+LIBYUV_API
+int I420ToYUY2(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_frame, int dst_stride_frame,
+               int width, int height);
+
+LIBYUV_API
+int I420ToUYVY(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_frame, int dst_stride_frame,
+               int width, int height);
+
+LIBYUV_API
+int I420ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+LIBYUV_API
+int I420ToBGRA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+LIBYUV_API
+int I420ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+LIBYUV_API
+int I420ToRGBA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_rgba, int dst_stride_rgba,
+               int width, int height);
+
+LIBYUV_API
+int I420ToRGB24(const uint8* src_y, int src_stride_y,
+                const uint8* src_u, int src_stride_u,
+                const uint8* src_v, int src_stride_v,
+                uint8* dst_frame, int dst_stride_frame,
+                int width, int height);
+
+LIBYUV_API
+int I420ToRAW(const uint8* src_y, int src_stride_y,
+              const uint8* src_u, int src_stride_u,
+              const uint8* src_v, int src_stride_v,
+              uint8* dst_frame, int dst_stride_frame,
+              int width, int height);
+
+LIBYUV_API
+int I420ToRGB565(const uint8* src_y, int src_stride_y,
+                 const uint8* src_u, int src_stride_u,
+                 const uint8* src_v, int src_stride_v,
+                 uint8* dst_frame, int dst_stride_frame,
+                 int width, int height);
+
+LIBYUV_API
+int I420ToARGB1555(const uint8* src_y, int src_stride_y,
+                   const uint8* src_u, int src_stride_u,
+                   const uint8* src_v, int src_stride_v,
+                   uint8* dst_frame, int dst_stride_frame,
+                   int width, int height);
+
+LIBYUV_API
+int I420ToARGB4444(const uint8* src_y, int src_stride_y,
+                   const uint8* src_u, int src_stride_u,
+                   const uint8* src_v, int src_stride_v,
+                   uint8* dst_frame, int dst_stride_frame,
+                   int width, int height);
+
+// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
+
+// Convert I420 to specified format.
+// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
+//    buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
+LIBYUV_API
+int ConvertFromI420(const uint8* y, int y_stride,
+                    const uint8* u, int u_stride,
+                    const uint8* v, int v_stride,
+                    uint8* dst_sample, int dst_sample_stride,
+                    int width, int height,
+                    uint32 format);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_CONVERT_FROM_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/convert_from_argb.h b/src/main/jni/libyuv/include/libyuv/convert_from_argb.h
new file mode 100644
index 000000000..90f43af04
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/convert_from_argb.h
@@ -0,0 +1,166 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  // NOLINT
+#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Copy ARGB to ARGB.
+#define ARGBToARGB ARGBCopy
+LIBYUV_API
+int ARGBCopy(const uint8* src_argb, int src_stride_argb,
+             uint8* dst_argb, int dst_stride_argb,
+             int width, int height);
+
+// Convert ARGB To BGRA.
+LIBYUV_API
+int ARGBToBGRA(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_bgra, int dst_stride_bgra,
+               int width, int height);
+
+// Convert ARGB To ABGR.
+LIBYUV_API
+int ARGBToABGR(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height);
+
+// Convert ARGB To RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_rgba, int dst_stride_rgba,
+               int width, int height);
+
+// Convert ARGB To RGB24.
+LIBYUV_API
+int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_rgb24, int dst_stride_rgb24,
+                int width, int height);
+
+// Convert ARGB To RAW.
+LIBYUV_API
+int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
+              uint8* dst_rgb, int dst_stride_rgb,
+              int width, int height);
+
+// Convert ARGB To RGB565.
+LIBYUV_API
+int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
+                 uint8* dst_rgb565, int dst_stride_rgb565,
+                 int width, int height);
+
+// Convert ARGB To ARGB1555.
+LIBYUV_API
+int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_argb1555, int dst_stride_argb1555,
+                   int width, int height);
+
+// Convert ARGB To ARGB4444.
+LIBYUV_API
+int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_argb4444, int dst_stride_argb4444,
+                   int width, int height);
+
+// Convert ARGB To I444.
+LIBYUV_API
+int ARGBToI444(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I422.
+LIBYUV_API
+int ARGBToI422(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I420. (also in convert.h)
+LIBYUV_API
+int ARGBToI420(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB to J420. (JPeg full range I420).
+LIBYUV_API
+int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yj, int dst_stride_yj,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB To I411.
+LIBYUV_API
+int ARGBToI411(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert ARGB to J400. (JPeg full range).
+LIBYUV_API
+int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yj, int dst_stride_yj,
+               int width, int height);
+
+// Convert ARGB to I400.
+LIBYUV_API
+int ARGBToI400(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height);
+
+// Convert ARGB To NV12.
+LIBYUV_API
+int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height);
+
+// Convert ARGB To NV21.
+LIBYUV_API
+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_vu, int dst_stride_vu,
+               int width, int height);
+
+// Convert ARGB To NV21.
+LIBYUV_API
+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_vu, int dst_stride_vu,
+               int width, int height);
+
+// Convert ARGB To YUY2.
+LIBYUV_API
+int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yuy2, int dst_stride_yuy2,
+               int width, int height);
+
+// Convert ARGB To UYVY.
+LIBYUV_API
+int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_uyvy, int dst_stride_uyvy,
+               int width, int height);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/cpu_id.h b/src/main/jni/libyuv/include/libyuv/cpu_id.h
new file mode 100644
index 000000000..dc858a814
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/cpu_id.h
@@ -0,0 +1,81 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_CPU_ID_H_  // NOLINT
+#define INCLUDE_LIBYUV_CPU_ID_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// TODO(fbarchard): Consider overlapping bits for different architectures.
+// Internal flag to indicate cpuid requires initialization.
+#define kCpuInit 0x1
+
+// These flags are only valid on ARM processors.
+static const int kCpuHasARM = 0x2;
+static const int kCpuHasNEON = 0x4;
+// 0x8 reserved for future ARM flag.
+
+// These flags are only valid on x86 processors.
+static const int kCpuHasX86 = 0x10;
+static const int kCpuHasSSE2 = 0x20;
+static const int kCpuHasSSSE3 = 0x40;
+static const int kCpuHasSSE41 = 0x80;
+static const int kCpuHasSSE42 = 0x100;
+static const int kCpuHasAVX = 0x200;
+static const int kCpuHasAVX2 = 0x400;
+static const int kCpuHasERMS = 0x800;
+static const int kCpuHasFMA3 = 0x1000;
+// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
+
+// These flags are only valid on MIPS processors.
+static const int kCpuHasMIPS = 0x10000;
+static const int kCpuHasMIPS_DSP = 0x20000;
+static const int kCpuHasMIPS_DSPR2 = 0x40000;
+
+// Internal function used to auto-init.
+LIBYUV_API
+int InitCpuFlags(void);
+
+// Internal function for parsing /proc/cpuinfo.
+LIBYUV_API
+int ArmCpuCaps(const char* cpuinfo_name);
+
+// Detect CPU has SSE2 etc.
+// Test_flag parameter should be one of kCpuHas constants above.
+// returns non-zero if instruction set is detected
+static __inline int TestCpuFlag(int test_flag) {
+  LIBYUV_API extern int cpu_info_;
+  return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
+}
+
+// For testing, allow CPU flags to be disabled.
+// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
+// MaskCpuFlags(-1) to enable all cpu specific optimizations.
+// MaskCpuFlags(0) to disable all cpu specific optimizations.
+LIBYUV_API
+void MaskCpuFlags(int enable_flags);
+
+// Low level cpuid for X86. Returns zeros on other CPUs.
+// eax is the info type that you want.
+// ecx is typically the cpu number, and should normally be zero.
+LIBYUV_API
+void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_CPU_ID_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/format_conversion.h b/src/main/jni/libyuv/include/libyuv/format_conversion.h
new file mode 100644
index 000000000..b18bf0534
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/format_conversion.h
@@ -0,0 +1,168 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_  // NOLINT
+#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Convert Bayer RGB formats to I420.
+LIBYUV_API
+int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height);
+
+LIBYUV_API
+int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height);
+
+LIBYUV_API
+int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height);
+
+LIBYUV_API
+int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height);
+
+// Temporary API mapper.
+#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
+    BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
+
+LIBYUV_API
+int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
+                uint8* dst_y, int dst_stride_y,
+                uint8* dst_u, int dst_stride_u,
+                uint8* dst_v, int dst_stride_v,
+                int width, int height,
+                uint32 src_fourcc_bayer);
+
+// Convert I420 to Bayer RGB formats.
+LIBYUV_API
+int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    uint8* dst_frame, int dst_stride_frame,
+                    int width, int height);
+
+LIBYUV_API
+int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    uint8* dst_frame, int dst_stride_frame,
+                    int width, int height);
+
+LIBYUV_API
+int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    uint8* dst_frame, int dst_stride_frame,
+                    int width, int height);
+
+LIBYUV_API
+int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    uint8* dst_frame, int dst_stride_frame,
+                    int width, int height);
+
+// Temporary API mapper.
+#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
+    I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
+
+LIBYUV_API
+int I420ToBayer(const uint8* src_y, int src_stride_y,
+                const uint8* src_u, int src_stride_u,
+                const uint8* src_v, int src_stride_v,
+                uint8* dst_frame, int dst_stride_frame,
+                int width, int height,
+                uint32 dst_fourcc_bayer);
+
+// Convert Bayer RGB formats to ARGB.
+LIBYUV_API
+int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+LIBYUV_API
+int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+LIBYUV_API
+int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+LIBYUV_API
+int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+// Temporary API mapper.
+#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
+
+LIBYUV_API
+int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height,
+                uint32 src_fourcc_bayer);
+
+// Converts ARGB to Bayer RGB formats.
+LIBYUV_API
+int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_bayer, int dst_stride_bayer,
+                    int width, int height);
+
+LIBYUV_API
+int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_bayer, int dst_stride_bayer,
+                    int width, int height);
+
+LIBYUV_API
+int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_bayer, int dst_stride_bayer,
+                    int width, int height);
+
+LIBYUV_API
+int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_bayer, int dst_stride_bayer,
+                    int width, int height);
+
+// Temporary API mapper.
+#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
+
+LIBYUV_API
+int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_bayer, int dst_stride_bayer,
+                int width, int height,
+                uint32 dst_fourcc_bayer);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_FORMATCONVERSION_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/mjpeg_decoder.h b/src/main/jni/libyuv/include/libyuv/mjpeg_decoder.h
new file mode 100644
index 000000000..8423121d1
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/mjpeg_decoder.h
@@ -0,0 +1,192 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_  // NOLINT
+#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+// NOTE: For a simplified public API use convert.h MJPGToI420().
+
+struct jpeg_common_struct;
+struct jpeg_decompress_struct;
+struct jpeg_source_mgr;
+
+namespace libyuv {
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+static const uint32 kUnknownDataSize = 0xFFFFFFFF;
+
+enum JpegSubsamplingType {
+  kJpegYuv420,
+  kJpegYuv422,
+  kJpegYuv411,
+  kJpegYuv444,
+  kJpegYuv400,
+  kJpegUnknown
+};
+
+struct Buffer {
+  const uint8* data;
+  int len;
+};
+
+struct BufferVector {
+  Buffer* buffers;
+  int len;
+  int pos;
+};
+
+struct SetJmpErrorMgr;
+
+// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
+// simply independent JPEG images with a fixed huffman table (which is omitted).
+// It is rarely used in video transmission, but is common as a camera capture
+// format, especially in Logitech devices. This class implements a decoder for
+// MJPEG frames.
+//
+// See http://tools.ietf.org/html/rfc2435
+class LIBYUV_API MJpegDecoder {
+ public:
+  typedef void (*CallbackFunction)(void* opaque,
+                                   const uint8* const* data,
+                                   const int* strides,
+                                   int rows);
+
+  static const int kColorSpaceUnknown;
+  static const int kColorSpaceGrayscale;
+  static const int kColorSpaceRgb;
+  static const int kColorSpaceYCbCr;
+  static const int kColorSpaceCMYK;
+  static const int kColorSpaceYCCK;
+
+  MJpegDecoder();
+  ~MJpegDecoder();
+
+  // Loads a new frame, reads its headers, and determines the uncompressed
+  // image format.
+  // Returns LIBYUV_TRUE if image looks valid and format is supported.
+  // If return value is LIBYUV_TRUE, then the values for all the following
+  // getters are populated.
+  // src_len is the size of the compressed mjpeg frame in bytes.
+  LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
+
+  // Returns width of the last loaded frame in pixels.
+  int GetWidth();
+
+  // Returns height of the last loaded frame in pixels.
+  int GetHeight();
+
+  // Returns format of the last loaded frame. The return value is one of the
+  // kColorSpace* constants.
+  int GetColorSpace();
+
+  // Number of color components in the color space.
+  int GetNumComponents();
+
+  // Sample factors of the n-th component.
+  int GetHorizSampFactor(int component);
+
+  int GetVertSampFactor(int component);
+
+  int GetHorizSubSampFactor(int component);
+
+  int GetVertSubSampFactor(int component);
+
+  // Public for testability.
+  int GetImageScanlinesPerImcuRow();
+
+  // Public for testability.
+  int GetComponentScanlinesPerImcuRow(int component);
+
+  // Width of a component in bytes.
+  int GetComponentWidth(int component);
+
+  // Height of a component.
+  int GetComponentHeight(int component);
+
+  // Width of a component in bytes with padding for DCTSIZE. Public for testing.
+  int GetComponentStride(int component);
+
+  // Size of a component in bytes.
+  int GetComponentSize(int component);
+
+  // Call this after LoadFrame() if you decide you don't want to decode it
+  // after all.
+  LIBYUV_BOOL UnloadFrame();
+
+  // Decodes the entire image into a one-buffer-per-color-component format.
+  // dst_width must match exactly. dst_height must be <= to image height; if
+  // less, the image is cropped. "planes" must have size equal to at least
+  // GetNumComponents() and they must point to non-overlapping buffers of size
+  // at least GetComponentSize(i). The pointers in planes are incremented
+  // to point to after the end of the written data.
+  // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
+  LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
+
+  // Decodes the entire image and passes the data via repeated calls to a
+  // callback function. Each call will get the data for a whole number of
+  // image scanlines.
+  // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
+  LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
+                        int dst_width, int dst_height);
+
+  // The helper function which recognizes the jpeg sub-sampling type.
+  static JpegSubsamplingType JpegSubsamplingTypeHelper(
+     int* subsample_x, int* subsample_y, int number_of_components);
+
+ private:
+  void AllocOutputBuffers(int num_outbufs);
+  void DestroyOutputBuffers();
+
+  LIBYUV_BOOL StartDecode();
+  LIBYUV_BOOL FinishDecode();
+
+  void SetScanlinePointers(uint8** data);
+  LIBYUV_BOOL DecodeImcuRow();
+
+  int GetComponentScanlinePadding(int component);
+
+  // A buffer holding the input data for a frame.
+  Buffer buf_;
+  BufferVector buf_vec_;
+
+  jpeg_decompress_struct* decompress_struct_;
+  jpeg_source_mgr* source_mgr_;
+  SetJmpErrorMgr* error_mgr_;
+
+  // LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
+  // GetComponentScanlinePadding() != 0.)
+  LIBYUV_BOOL has_scanline_padding_;
+
+  // Temporaries used to point to scanline outputs.
+  int num_outbufs_;  // Outermost size of all arrays below.
+  uint8*** scanlines_;
+  int* scanlines_sizes_;
+  // Temporary buffer used for decoding when we can't decode directly to the
+  // output buffers. Large enough for just one iMCU row.
+  uint8** databuf_;
+  int* databuf_strides_;
+};
+
+}  // namespace libyuv
+
+#endif  //  __cplusplus
+#endif  // INCLUDE_LIBYUV_MJPEG_DECODER_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/planar_functions.h b/src/main/jni/libyuv/include/libyuv/planar_functions.h
new file mode 100644
index 000000000..d10a16985
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/planar_functions.h
@@ -0,0 +1,439 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_  // NOLINT
+#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
+
+#include "libyuv/basic_types.h"
+
+// TODO(fbarchard): Remove the following headers includes.
+#include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Copy a plane of data.
+LIBYUV_API
+void CopyPlane(const uint8* src_y, int src_stride_y,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height);
+
+LIBYUV_API
+void CopyPlane_16(const uint16* src_y, int src_stride_y,
+                  uint16* dst_y, int dst_stride_y,
+                  int width, int height);
+
+// Set a plane of data to a 32 bit value.
+LIBYUV_API
+void SetPlane(uint8* dst_y, int dst_stride_y,
+              int width, int height,
+              uint32 value);
+
+// Copy I400.  Supports inverting.
+LIBYUV_API
+int I400ToI400(const uint8* src_y, int src_stride_y,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height);
+
+
+// Copy I422 to I422.
+#define I422ToI422 I422Copy
+LIBYUV_API
+int I422Copy(const uint8* src_y, int src_stride_y,
+             const uint8* src_u, int src_stride_u,
+             const uint8* src_v, int src_stride_v,
+             uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int width, int height);
+
+// Copy I444 to I444.
+#define I444ToI444 I444Copy
+LIBYUV_API
+int I444Copy(const uint8* src_y, int src_stride_y,
+             const uint8* src_u, int src_stride_u,
+             const uint8* src_v, int src_stride_v,
+             uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int width, int height);
+
+// Convert YUY2 to I422.
+LIBYUV_API
+int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert UYVY to I422.
+LIBYUV_API
+int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Convert I420 to I400. (calls CopyPlane ignoring u/v).
+LIBYUV_API
+int I420ToI400(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height);
+
+// Alias
+#define I420ToI420Mirror I420Mirror
+
+// I420 mirror.
+LIBYUV_API
+int I420Mirror(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
+// Alias
+#define I400ToI400Mirror I400Mirror
+
+// I400 mirror.  A single plane is mirrored horizontally.
+// Pass negative height to achieve 180 degree rotation.
+LIBYUV_API
+int I400Mirror(const uint8* src_y, int src_stride_y,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height);
+
+// Alias
+#define ARGBToARGBMirror ARGBMirror
+
+// ARGB mirror.
+LIBYUV_API
+int ARGBMirror(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert NV12 to RGB565.
+LIBYUV_API
+int NV12ToRGB565(const uint8* src_y, int src_stride_y,
+                 const uint8* src_uv, int src_stride_uv,
+                 uint8* dst_rgb565, int dst_stride_rgb565,
+                 int width, int height);
+
+// Convert NV21 to RGB565.
+LIBYUV_API
+int NV21ToRGB565(const uint8* src_y, int src_stride_y,
+                 const uint8* src_uv, int src_stride_uv,
+                 uint8* dst_rgb565, int dst_stride_rgb565,
+                 int width, int height);
+
+// I422ToARGB is in convert_argb.h
+// Convert I422 to BGRA.
+LIBYUV_API
+int I422ToBGRA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_bgra, int dst_stride_bgra,
+               int width, int height);
+
+// Convert I422 to ABGR.
+LIBYUV_API
+int I422ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height);
+
+// Convert I422 to RGBA.
+LIBYUV_API
+int I422ToRGBA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_rgba, int dst_stride_rgba,
+               int width, int height);
+
+// Draw a rectangle into I420.
+LIBYUV_API
+int I420Rect(uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int x, int y, int width, int height,
+             int value_y, int value_u, int value_v);
+
+// Draw a rectangle into ARGB.
+LIBYUV_API
+int ARGBRect(uint8* dst_argb, int dst_stride_argb,
+             int x, int y, int width, int height, uint32 value);
+
+// Convert ARGB to gray scale ARGB.
+LIBYUV_API
+int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Make a rectangle of ARGB gray scale.
+LIBYUV_API
+int ARGBGray(uint8* dst_argb, int dst_stride_argb,
+             int x, int y, int width, int height);
+
+// Make a rectangle of ARGB Sepia tone.
+LIBYUV_API
+int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
+              int x, int y, int width, int height);
+
+// Apply a matrix rotation to each ARGB pixel.
+// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
+// The first 4 coefficients apply to B, G, R, A and produce B of the output.
+// The next 4 coefficients apply to B, G, R, A and produce G of the output.
+// The next 4 coefficients apply to B, G, R, A and produce R of the output.
+// The last 4 coefficients apply to B, G, R, A and produce A of the output.
+LIBYUV_API
+int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_argb, int dst_stride_argb,
+                    const int8* matrix_argb,
+                    int width, int height);
+
+// Deprecated. Use ARGBColorMatrix instead.
+// Apply a matrix rotation to each ARGB pixel.
+// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
+// The first 4 coefficients apply to B, G, R, A and produce B of the output.
+// The next 4 coefficients apply to B, G, R, A and produce G of the output.
+// The last 4 coefficients apply to B, G, R, A and produce R of the output.
+LIBYUV_API
+int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
+                   const int8* matrix_rgb,
+                   int x, int y, int width, int height);
+
+// Apply a color table each ARGB pixel.
+// Table contains 256 ARGB values.
+LIBYUV_API
+int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
+                   const uint8* table_argb,
+                   int x, int y, int width, int height);
+
+// Apply a color table each ARGB pixel but preserve destination alpha.
+// Table contains 256 ARGB values.
+LIBYUV_API
+int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
+                  const uint8* table_argb,
+                  int x, int y, int width, int height);
+
+// Apply a luma/color table each ARGB pixel but preserve destination alpha.
+// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
+// RGB (YJ style) and C is an 8 bit color component (R, G or B).
+LIBYUV_API
+int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
+                       uint8* dst_argb, int dst_stride_argb,
+                       const uint8* luma_rgb_table,
+                       int width, int height);
+
+// Apply a 3 term polynomial to ARGB values.
+// poly points to a 4x4 matrix.  The first row is constants.  The 2nd row is
+// coefficients for b, g, r and a.  The 3rd row is coefficients for b squared,
+// g squared, r squared and a squared.  The 4rd row is coefficients for b to
+// the 3, g to the 3, r to the 3 and a to the 3.  The values are summed and
+// result clamped to 0 to 255.
+// A polynomial approximation can be dirived using software such as 'R'.
+
+LIBYUV_API
+int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_argb, int dst_stride_argb,
+                   const float* poly,
+                   int width, int height);
+
+// Quantize a rectangle of ARGB. Alpha unaffected.
+// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
+// interval_size should be a value between 1 and 255.
+// interval_offset should be a value between 0 and 255.
+LIBYUV_API
+int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
+                 int scale, int interval_size, int interval_offset,
+                 int x, int y, int width, int height);
+
+// Copy ARGB to ARGB.
+LIBYUV_API
+int ARGBCopy(const uint8* src_argb, int src_stride_argb,
+             uint8* dst_argb, int dst_stride_argb,
+             int width, int height);
+
+// Copy ARGB to ARGB.
+LIBYUV_API
+int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
+                  uint8* dst_argb, int dst_stride_argb,
+                  int width, int height);
+
+// Copy ARGB to ARGB.
+LIBYUV_API
+int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
+                     uint8* dst_argb, int dst_stride_argb,
+                     int width, int height);
+
+typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1,
+                             uint8* dst_argb, int width);
+
+// Get function to Alpha Blend ARGB pixels and store to destination.
+LIBYUV_API
+ARGBBlendRow GetARGBBlend();
+
+// Alpha Blend ARGB images and store to destination.
+// Alpha of destination is set to 255.
+LIBYUV_API
+int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
+              const uint8* src_argb1, int src_stride_argb1,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height);
+
+// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
+LIBYUV_API
+int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
+                 const uint8* src_argb1, int src_stride_argb1,
+                 uint8* dst_argb, int dst_stride_argb,
+                 int width, int height);
+
+// Add ARGB image with ARGB image. Saturates to 255.
+LIBYUV_API
+int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
+            const uint8* src_argb1, int src_stride_argb1,
+            uint8* dst_argb, int dst_stride_argb,
+            int width, int height);
+
+// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
+LIBYUV_API
+int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
+                 const uint8* src_argb1, int src_stride_argb1,
+                 uint8* dst_argb, int dst_stride_argb,
+                 int width, int height);
+
+// Convert I422 to YUY2.
+LIBYUV_API
+int I422ToYUY2(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_frame, int dst_stride_frame,
+               int width, int height);
+
+// Convert I422 to UYVY.
+LIBYUV_API
+int I422ToUYVY(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_frame, int dst_stride_frame,
+               int width, int height);
+
+// Convert unattentuated ARGB to preattenuated ARGB.
+LIBYUV_API
+int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
+                  uint8* dst_argb, int dst_stride_argb,
+                  int width, int height);
+
+// Convert preattentuated ARGB to unattenuated ARGB.
+LIBYUV_API
+int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+// Convert MJPG to ARGB.
+LIBYUV_API
+int MJPGToARGB(const uint8* sample, size_t sample_size,
+               uint8* argb, int argb_stride,
+               int w, int h, int dw, int dh);
+
+// Internal function - do not call directly.
+// Computes table of cumulative sum for image where the value is the sum
+// of all values above and to the left of the entry. Used by ARGBBlur.
+LIBYUV_API
+int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
+                             int32* dst_cumsum, int dst_stride32_cumsum,
+                             int width, int height);
+
+// Blur ARGB image.
+// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
+//   16 byte boundary.
+// dst_stride32_cumsum is number of ints in a row (width * 4).
+// radius is number of pixels around the center.  e.g. 1 = 3x3. 2=5x5.
+// Blur is optimized for radius of 5 (11x11) or less.
+LIBYUV_API
+int ARGBBlur(const uint8* src_argb, int src_stride_argb,
+             uint8* dst_argb, int dst_stride_argb,
+             int32* dst_cumsum, int dst_stride32_cumsum,
+             int width, int height, int radius);
+
+// Multiply ARGB image by ARGB value.
+LIBYUV_API
+int ARGBShade(const uint8* src_argb, int src_stride_argb,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height, uint32 value);
+
+// Interpolate between two ARGB images using specified amount of interpolation
+// (0 to 255) and store to destination.
+// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
+// and 255 means 1% src_argb0 and 99% src_argb1.
+// Internally uses ARGBScale bilinear filtering.
+// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
+LIBYUV_API
+int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
+                    const uint8* src_argb1, int src_stride_argb1,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height, int interpolation);
+
+#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
+    defined(TARGET_IPHONE_SIMULATOR)
+#define LIBYUV_DISABLE_X86
+#endif
+
+// Row functions for copying a pixels from a source with a slope to a row
+// of destination. Useful for scaling, rotation, mirror, texture mapping.
+LIBYUV_API
+void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
+                     uint8* dst_argb, const float* uv_dudv, int width);
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+LIBYUV_API
+void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
+                        uint8* dst_argb, const float* uv_dudv, int width);
+#define HAS_ARGBAFFINEROW_SSE2
+#endif  // LIBYUV_DISABLE_X86
+
+// Shuffle ARGB channel order.  e.g. BGRA to ARGB.
+// shuffler is 16 bytes and must be aligned.
+LIBYUV_API
+int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
+                uint8* dst_argb, int dst_stride_argb,
+                const uint8* shuffler, int width, int height);
+
+// Sobel ARGB effect with planar output.
+LIBYUV_API
+int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
+                     uint8* dst_y, int dst_stride_y,
+                     int width, int height);
+
+// Sobel ARGB effect.
+LIBYUV_API
+int ARGBSobel(const uint8* src_argb, int src_stride_argb,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height);
+
+// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
+LIBYUV_API
+int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/rotate.h b/src/main/jni/libyuv/include/libyuv/rotate.h
new file mode 100644
index 000000000..8af60b895
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/rotate.h
@@ -0,0 +1,117 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROTATE_H_  // NOLINT
+#define INCLUDE_LIBYUV_ROTATE_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Supported rotation.
+typedef enum RotationMode {
+  kRotate0 = 0,  // No rotation.
+  kRotate90 = 90,  // Rotate 90 degrees clockwise.
+  kRotate180 = 180,  // Rotate 180 degrees.
+  kRotate270 = 270,  // Rotate 270 degrees clockwise.
+
+  // Deprecated.
+  kRotateNone = 0,
+  kRotateClockwise = 90,
+  kRotateCounterClockwise = 270,
+} RotationModeEnum;
+
+// Rotate I420 frame.
+LIBYUV_API
+int I420Rotate(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int src_width, int src_height, enum RotationMode mode);
+
+// Rotate NV12 input and store in I420.
+LIBYUV_API
+int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
+                     const uint8* src_uv, int src_stride_uv,
+                     uint8* dst_y, int dst_stride_y,
+                     uint8* dst_u, int dst_stride_u,
+                     uint8* dst_v, int dst_stride_v,
+                     int src_width, int src_height, enum RotationMode mode);
+
+// Rotate a plane by 0, 90, 180, or 270.
+LIBYUV_API
+int RotatePlane(const uint8* src, int src_stride,
+                uint8* dst, int dst_stride,
+                int src_width, int src_height, enum RotationMode mode);
+
+// Rotate planes by 90, 180, 270. Deprecated.
+LIBYUV_API
+void RotatePlane90(const uint8* src, int src_stride,
+                   uint8* dst, int dst_stride,
+                   int width, int height);
+
+LIBYUV_API
+void RotatePlane180(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride,
+                    int width, int height);
+
+LIBYUV_API
+void RotatePlane270(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride,
+                    int width, int height);
+
+LIBYUV_API
+void RotateUV90(const uint8* src, int src_stride,
+                uint8* dst_a, int dst_stride_a,
+                uint8* dst_b, int dst_stride_b,
+                int width, int height);
+
+// Rotations for when U and V are interleaved.
+// These functions take one input pointer and
+// split the data into two buffers while
+// rotating them. Deprecated.
+LIBYUV_API
+void RotateUV180(const uint8* src, int src_stride,
+                 uint8* dst_a, int dst_stride_a,
+                 uint8* dst_b, int dst_stride_b,
+                 int width, int height);
+
+LIBYUV_API
+void RotateUV270(const uint8* src, int src_stride,
+                 uint8* dst_a, int dst_stride_a,
+                 uint8* dst_b, int dst_stride_b,
+                 int width, int height);
+
+// The 90 and 270 functions are based on transposes.
+// Doing a transpose with reversing the read/write
+// order will result in a rotation by +- 90 degrees.
+// Deprecated.
+LIBYUV_API
+void TransposePlane(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride,
+                    int width, int height);
+
+LIBYUV_API
+void TransposeUV(const uint8* src, int src_stride,
+                 uint8* dst_a, int dst_stride_a,
+                 uint8* dst_b, int dst_stride_b,
+                 int width, int height);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_ROTATE_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/rotate_argb.h b/src/main/jni/libyuv/include/libyuv/rotate_argb.h
new file mode 100644
index 000000000..660ff5573
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/rotate_argb.h
@@ -0,0 +1,33 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_  // NOLINT
+#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
+
+#include "libyuv/basic_types.h"
+#include "libyuv/rotate.h"  // For RotationMode.
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Rotate ARGB frame
+LIBYUV_API
+int ARGBRotate(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_argb, int dst_stride_argb,
+               int src_width, int src_height, enum RotationMode mode);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_ROTATE_ARGB_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/row.h b/src/main/jni/libyuv/include/libyuv/row.h
new file mode 100644
index 000000000..477b27447
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/row.h
@@ -0,0 +1,1821 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
+#define INCLUDE_LIBYUV_ROW_H_
+
+#include <stdlib.h>  // For malloc.
+
+#include "libyuv/basic_types.h"
+
+#if defined(__native_client__)
+#include "ppapi/c/pp_macros.h"  // For PPAPI_RELEASE
+#endif
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
+
+#ifdef __cplusplus
+#define align_buffer_64(var, size)                                             \
+  uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
+  uint8* var = reinterpret_cast<uint8*>                                        \
+      ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
+#else
+#define align_buffer_64(var, size)                                             \
+  uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
+  uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
+#endif
+
+#define free_aligned_buffer_64(var) \
+  free(var##_mem);  \
+  var = 0
+
+#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
+    defined(TARGET_IPHONE_SIMULATOR) || \
+    (defined(_MSC_VER) && defined(__clang__))
+#define LIBYUV_DISABLE_X86
+#endif
+// True if compiling for SSSE3 as a requirement.
+#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
+#define LIBYUV_SSSE3_ONLY
+#endif
+
+// Enable for NaCL pepper 33 for bundle and AVX2 support.
+#if defined(__native_client__) && PPAPI_RELEASE >= 33
+#define NEW_BINUTILS
+#endif
+#if defined(__native_client__) && defined(__arm__) && PPAPI_RELEASE < 37
+#define LIBYUV_DISABLE_NEON
+#endif
+
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+// Effects:
+#define HAS_ARGBADDROW_SSE2
+#define HAS_ARGBAFFINEROW_SSE2
+#define HAS_ARGBATTENUATEROW_SSSE3
+#define HAS_ARGBBLENDROW_SSSE3
+#define HAS_ARGBCOLORMATRIXROW_SSSE3
+#define HAS_ARGBCOLORTABLEROW_X86
+#define HAS_ARGBCOPYALPHAROW_SSE2
+#define HAS_ARGBCOPYYTOALPHAROW_SSE2
+#define HAS_ARGBGRAYROW_SSSE3
+#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
+#define HAS_ARGBMIRRORROW_SSSE3
+#define HAS_ARGBMULTIPLYROW_SSE2
+#define HAS_ARGBPOLYNOMIALROW_SSE2
+#define HAS_ARGBQUANTIZEROW_SSE2
+#define HAS_ARGBSEPIAROW_SSSE3
+#define HAS_ARGBSHADEROW_SSE2
+#define HAS_ARGBSUBTRACTROW_SSE2
+#define HAS_ARGBTOUVROW_SSSE3
+#define HAS_ARGBUNATTENUATEROW_SSE2
+#define HAS_COMPUTECUMULATIVESUMROW_SSE2
+#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+#define HAS_INTERPOLATEROW_SSE2
+#define HAS_INTERPOLATEROW_SSSE3
+#define HAS_RGBCOLORTABLEROW_X86
+#define HAS_SOBELROW_SSE2
+#define HAS_SOBELTOPLANEROW_SSE2
+#define HAS_SOBELXROW_SSE2
+#define HAS_SOBELXYROW_SSE2
+#define HAS_SOBELYROW_SSE2
+
+// Conversions:
+#define HAS_ABGRTOUVROW_SSSE3
+#define HAS_ABGRTOYROW_SSSE3
+#define HAS_ARGB1555TOARGBROW_SSE2
+#define HAS_ARGB4444TOARGBROW_SSE2
+#define HAS_ARGBSHUFFLEROW_SSE2
+#define HAS_ARGBSHUFFLEROW_SSSE3
+#define HAS_ARGBTOARGB1555ROW_SSE2
+#define HAS_ARGBTOARGB4444ROW_SSE2
+#define HAS_ARGBTOBAYERGGROW_SSE2
+#define HAS_ARGBTOBAYERROW_SSSE3
+#define HAS_ARGBTORAWROW_SSSE3
+#define HAS_ARGBTORGB24ROW_SSSE3
+#define HAS_ARGBTORGB565ROW_SSE2
+#define HAS_ARGBTOUV422ROW_SSSE3
+#define HAS_ARGBTOUV444ROW_SSSE3
+#define HAS_ARGBTOUVJROW_SSSE3
+#define HAS_ARGBTOYJROW_SSSE3
+#define HAS_ARGBTOYROW_SSSE3
+#define HAS_BGRATOUVROW_SSSE3
+#define HAS_BGRATOYROW_SSSE3
+#define HAS_COPYROW_ERMS
+#define HAS_COPYROW_SSE2
+#define HAS_COPYROW_X86
+#define HAS_HALFROW_SSE2
+#define HAS_I400TOARGBROW_SSE2
+#define HAS_I411TOARGBROW_SSSE3
+#define HAS_I422TOARGB1555ROW_SSSE3
+#define HAS_I422TOABGRROW_SSSE3
+#define HAS_I422TOARGB1555ROW_SSSE3
+#define HAS_I422TOARGB4444ROW_SSSE3
+#define HAS_I422TOARGBROW_SSSE3
+#define HAS_I422TOBGRAROW_SSSE3
+#define HAS_I422TORAWROW_SSSE3
+#define HAS_I422TORGB24ROW_SSSE3
+#define HAS_I422TORGB565ROW_SSSE3
+#define HAS_I422TORGBAROW_SSSE3
+#define HAS_I422TOUYVYROW_SSE2
+#define HAS_I422TOYUY2ROW_SSE2
+#define HAS_I444TOARGBROW_SSSE3
+#define HAS_MERGEUVROW_SSE2
+#define HAS_MIRRORROW_SSE2
+#define HAS_MIRRORROW_SSSE3
+#define HAS_MIRRORROW_UV_SSSE3
+#define HAS_MIRRORUVROW_SSSE3
+#define HAS_NV12TOARGBROW_SSSE3
+#define HAS_NV12TORGB565ROW_SSSE3
+#define HAS_NV21TOARGBROW_SSSE3
+#define HAS_NV21TORGB565ROW_SSSE3
+#define HAS_RAWTOARGBROW_SSSE3
+#define HAS_RAWTOYROW_SSSE3
+#define HAS_RGB24TOARGBROW_SSSE3
+#define HAS_RGB24TOYROW_SSSE3
+#define HAS_RGB565TOARGBROW_SSE2
+#define HAS_RGBATOUVROW_SSSE3
+#define HAS_RGBATOYROW_SSSE3
+#define HAS_SETROW_X86
+#define HAS_SPLITUVROW_SSE2
+#define HAS_UYVYTOARGBROW_SSSE3
+#define HAS_UYVYTOUV422ROW_SSE2
+#define HAS_UYVYTOUVROW_SSE2
+#define HAS_UYVYTOYROW_SSE2
+#define HAS_YTOARGBROW_SSE2
+#define HAS_YUY2TOARGBROW_SSSE3
+#define HAS_YUY2TOUV422ROW_SSE2
+#define HAS_YUY2TOUVROW_SSE2
+#define HAS_YUY2TOYROW_SSE2
+#endif
+
+// The following are available on x64 Visual C:
+#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64)
+#define HAS_I422TOARGBROW_SSSE3
+#endif
+
+// GCC >= 4.7.0 required for AVX2.
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
+#define GCC_HAS_AVX2 1
+#endif  // GNUC >= 4.7
+#endif  // __GNUC__
+
+// clang >= 3.4.0 required for AVX2.
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
+#define CLANG_HAS_AVX2 1
+#endif  // clang >= 3.4
+#endif  // __clang__
+
+// Visual C 2012 required for AVX2.
+#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
+#define VISUALC_HAS_AVX2 1
+#endif  // VisualStudio >= 2012
+
+// The following are available on all x86 platforms, but
+// require VS2012, clang 3.4 or gcc 4.7.
+// The code supports NaCL but requires a new compiler and validator.
+#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
+    defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
+// Effects:
+#define HAS_ARGBPOLYNOMIALROW_AVX2
+#define HAS_ARGBSHUFFLEROW_AVX2
+#define HAS_ARGBCOPYALPHAROW_AVX2
+#define HAS_ARGBCOPYYTOALPHAROW_AVX2
+#endif
+
+// The following are require VS2012.
+// TODO(fbarchard): Port to gcc.
+#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
+#define HAS_ARGBTOUVROW_AVX2
+#define HAS_ARGBTOYJROW_AVX2
+#define HAS_ARGBTOYROW_AVX2
+#define HAS_HALFROW_AVX2
+#define HAS_I422TOARGBROW_AVX2
+#define HAS_INTERPOLATEROW_AVX2
+#define HAS_MERGEUVROW_AVX2
+#define HAS_MIRRORROW_AVX2
+#define HAS_SPLITUVROW_AVX2
+#define HAS_UYVYTOUV422ROW_AVX2
+#define HAS_UYVYTOUVROW_AVX2
+#define HAS_UYVYTOYROW_AVX2
+#define HAS_YUY2TOUV422ROW_AVX2
+#define HAS_YUY2TOUVROW_AVX2
+#define HAS_YUY2TOYROW_AVX2
+
+// Effects:
+#define HAS_ARGBADDROW_AVX2
+#define HAS_ARGBATTENUATEROW_AVX2
+#define HAS_ARGBMIRRORROW_AVX2
+#define HAS_ARGBMULTIPLYROW_AVX2
+#define HAS_ARGBSUBTRACTROW_AVX2
+#define HAS_ARGBUNATTENUATEROW_AVX2
+#endif  // defined(VISUALC_HAS_AVX2)
+
+// The following are Yasm x86 only:
+// TODO(fbarchard): Port AVX2 to inline.
+#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
+    (defined(_M_IX86) || defined(_M_X64) || \
+    defined(__x86_64__) || defined(__i386__))
+#define HAS_MERGEUVROW_AVX2
+#define HAS_MERGEUVROW_MMX
+#define HAS_SPLITUVROW_AVX2
+#define HAS_SPLITUVROW_MMX
+#define HAS_UYVYTOYROW_AVX2
+#define HAS_UYVYTOYROW_MMX
+#define HAS_YUY2TOYROW_AVX2
+#define HAS_YUY2TOYROW_MMX
+#endif
+
+// The following are disabled when SSSE3 is available:
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
+    !defined(LIBYUV_SSSE3_ONLY)
+#define HAS_ARGBBLENDROW_SSE2
+#define HAS_ARGBATTENUATEROW_SSE2
+#define HAS_MIRRORROW_SSE2
+#endif
+
+// The following are available on arm64 platforms:
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+// #define HAS_I444TOARGBROW_NEON
+// #define HAS_I422TOARGBROW_NEON
+// #define HAS_I411TOARGBROW_NEON
+// #define HAS_I422TOBGRAROW_NEON
+// #define HAS_I422TOABGRROW_NEON
+// #define HAS_I422TORGBAROW_NEON
+// #define HAS_I422TORGB24ROW_NEON
+// #define HAS_I422TORAWROW_NEON
+// #define HAS_I422TORGB565ROW_NEON
+// #define HAS_I422TOARGB1555ROW_NEON
+// #define HAS_I422TOARGB4444ROW_NEON
+// #define HAS_YTOARGBROW_NEON
+// #define HAS_I400TOARGBROW_NEON
+// #define HAS_NV12TOARGBROW_NEON
+// #define HAS_NV21TOARGBROW_NEON
+// #define HAS_NV12TORGB565ROW_NEON
+// #define HAS_NV21TORGB565ROW_NEON
+// #define HAS_YUY2TOARGBROW_NEON
+// #define HAS_UYVYTOARGBROW_NEON
+#define HAS_SPLITUVROW_NEON
+#define HAS_MERGEUVROW_NEON
+#define HAS_COPYROW_NEON
+#define HAS_SETROW_NEON
+#define HAS_ARGBSETROWS_NEON
+#define HAS_MIRRORROW_NEON
+#define HAS_MIRRORUVROW_NEON
+#define HAS_ARGBMIRRORROW_NEON
+#define HAS_RGB24TOARGBROW_NEON
+#define HAS_RAWTOARGBROW_NEON
+// #define HAS_RGB565TOARGBROW_NEON
+// #define HAS_ARGB1555TOARGBROW_NEON
+// #define HAS_ARGB4444TOARGBROW_NEON
+#define HAS_ARGBTORGB24ROW_NEON
+#define HAS_ARGBTORAWROW_NEON
+#define HAS_YUY2TOYROW_NEON
+#define HAS_UYVYTOYROW_NEON
+#define HAS_YUY2TOUV422ROW_NEON
+#define HAS_UYVYTOUV422ROW_NEON
+#define HAS_YUY2TOUVROW_NEON
+#define HAS_UYVYTOUVROW_NEON
+#define HAS_HALFROW_NEON
+#define HAS_ARGBTOBAYERROW_NEON
+#define HAS_ARGBTOBAYERGGROW_NEON
+#define HAS_ARGBSHUFFLEROW_NEON
+#define HAS_I422TOYUY2ROW_NEON
+#define HAS_I422TOUYVYROW_NEON
+// #define HAS_ARGBTORGB565ROW_NEON
+// #define HAS_ARGBTOARGB1555ROW_NEON
+// #define HAS_ARGBTOARGB4444ROW_NEON
+#define HAS_ARGBTOYROW_NEON
+#define HAS_ARGBTOYJROW_NEON
+#define HAS_ARGBTOUV444ROW_NEON
+#define HAS_ARGBTOUV422ROW_NEON
+#define HAS_ARGBTOUV411ROW_NEON
+// #define HAS_ARGBTOUVROW_NEON
+// #define HAS_ARGBTOUVJROW_NEON
+// #define HAS_BGRATOUVROW_NEON
+// #define HAS_ABGRTOUVROW_NEON
+// #define HAS_RGBATOUVROW_NEON
+// #define HAS_RGB24TOUVROW_NEON
+// #define HAS_RAWTOUVROW_NEON
+// #define HAS_RGB565TOUVROW_NEON
+// #define HAS_ARGB1555TOUVROW_NEON
+// #define HAS_ARGB4444TOUVROW_NEON
+// #define HAS_RGB565TOYROW_NEON
+// #define HAS_ARGB1555TOYROW_NEON
+// #define HAS_ARGB4444TOYROW_NEON
+#define HAS_BGRATOYROW_NEON
+#define HAS_ABGRTOYROW_NEON
+#define HAS_RGBATOYROW_NEON
+#define HAS_RGB24TOYROW_NEON
+#define HAS_RAWTOYROW_NEON
+#define HAS_INTERPOLATEROW_NEON
+#define HAS_ARGBBLENDROW_NEON
+#define HAS_ARGBATTENUATEROW_NEON
+#define HAS_ARGBQUANTIZEROW_NEON
+#define HAS_ARGBSHADEROW_NEON
+#define HAS_ARGBGRAYROW_NEON
+#define HAS_ARGBSEPIAROW_NEON
+#define HAS_ARGBCOLORMATRIXROW_NEON
+#define HAS_ARGBMULTIPLYROW_NEON
+#define HAS_ARGBADDROW_NEON
+#define HAS_ARGBSUBTRACTROW_NEON
+#define HAS_SOBELROW_NEON
+#define HAS_SOBELTOPLANEROW_NEON
+#define HAS_SOBELXYROW_NEON
+#define HAS_SOBELXROW_NEON
+#define HAS_SOBELYROW_NEON
+#endif
+
+// The following are available on Neon platforms:
+#if !defined(LIBYUV_DISABLE_NEON) && \
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+#define HAS_ABGRTOUVROW_NEON
+#define HAS_ABGRTOYROW_NEON
+#define HAS_ARGB1555TOARGBROW_NEON
+#define HAS_ARGB1555TOUVROW_NEON
+#define HAS_ARGB1555TOYROW_NEON
+#define HAS_ARGB4444TOARGBROW_NEON
+#define HAS_ARGB4444TOUVROW_NEON
+#define HAS_ARGB4444TOYROW_NEON
+#define HAS_ARGBTOARGB1555ROW_NEON
+#define HAS_ARGBTOARGB4444ROW_NEON
+#define HAS_ARGBTOBAYERROW_NEON
+#define HAS_ARGBTOBAYERGGROW_NEON
+#define HAS_ARGBTORAWROW_NEON
+#define HAS_ARGBTORGB24ROW_NEON
+#define HAS_ARGBTORGB565ROW_NEON
+#define HAS_ARGBTOUV411ROW_NEON
+#define HAS_ARGBTOUV422ROW_NEON
+#define HAS_ARGBTOUV444ROW_NEON
+#define HAS_ARGBTOUVROW_NEON
+#define HAS_ARGBTOUVJROW_NEON
+#define HAS_ARGBTOYROW_NEON
+#define HAS_ARGBTOYJROW_NEON
+#define HAS_BGRATOUVROW_NEON
+#define HAS_BGRATOYROW_NEON
+#define HAS_COPYROW_NEON
+#define HAS_HALFROW_NEON
+#define HAS_I400TOARGBROW_NEON
+#define HAS_I411TOARGBROW_NEON
+#define HAS_I422TOABGRROW_NEON
+#define HAS_I422TOARGB1555ROW_NEON
+#define HAS_I422TOARGB4444ROW_NEON
+#define HAS_I422TOARGBROW_NEON
+#define HAS_I422TOBGRAROW_NEON
+#define HAS_I422TORAWROW_NEON
+#define HAS_I422TORGB24ROW_NEON
+#define HAS_I422TORGB565ROW_NEON
+#define HAS_I422TORGBAROW_NEON
+#define HAS_I422TOUYVYROW_NEON
+#define HAS_I422TOYUY2ROW_NEON
+#define HAS_I444TOARGBROW_NEON
+#define HAS_MERGEUVROW_NEON
+#define HAS_MIRRORROW_NEON
+#define HAS_MIRRORUVROW_NEON
+#define HAS_NV12TOARGBROW_NEON
+#define HAS_NV12TORGB565ROW_NEON
+#define HAS_NV21TOARGBROW_NEON
+#define HAS_NV21TORGB565ROW_NEON
+#define HAS_RAWTOARGBROW_NEON
+#define HAS_RAWTOUVROW_NEON
+#define HAS_RAWTOYROW_NEON
+#define HAS_RGB24TOARGBROW_NEON
+#define HAS_RGB24TOUVROW_NEON
+#define HAS_RGB24TOYROW_NEON
+#define HAS_RGB565TOARGBROW_NEON
+#define HAS_RGB565TOUVROW_NEON
+#define HAS_RGB565TOYROW_NEON
+#define HAS_RGBATOUVROW_NEON
+#define HAS_RGBATOYROW_NEON
+#define HAS_SETROW_NEON
+#define HAS_SPLITUVROW_NEON
+#define HAS_UYVYTOARGBROW_NEON
+#define HAS_UYVYTOUV422ROW_NEON
+#define HAS_UYVYTOUVROW_NEON
+#define HAS_UYVYTOYROW_NEON
+#define HAS_YTOARGBROW_NEON
+#define HAS_YUY2TOARGBROW_NEON
+#define HAS_YUY2TOUV422ROW_NEON
+#define HAS_YUY2TOUVROW_NEON
+#define HAS_YUY2TOYROW_NEON
+
+// Effects:
+#define HAS_ARGBADDROW_NEON
+#define HAS_ARGBATTENUATEROW_NEON
+#define HAS_ARGBBLENDROW_NEON
+#define HAS_ARGBGRAYROW_NEON
+#define HAS_ARGBMIRRORROW_NEON
+#define HAS_ARGBMULTIPLYROW_NEON
+#define HAS_ARGBQUANTIZEROW_NEON
+#define HAS_ARGBSEPIAROW_NEON
+#define HAS_ARGBSHADEROW_NEON
+#define HAS_ARGBSUBTRACTROW_NEON
+#define HAS_SOBELROW_NEON
+#define HAS_SOBELTOPLANEROW_NEON
+#define HAS_SOBELXYROW_NEON
+#define HAS_SOBELXROW_NEON
+#define HAS_SOBELYROW_NEON
+#define HAS_INTERPOLATEROW_NEON
+// TODO(fbarchard): Investigate neon unittest failure.
+// #define HAS_ARGBCOLORMATRIXROW_NEON
+#endif
+
+// The following are available on Mips platforms:
+#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
+    (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
+#define HAS_COPYROW_MIPS
+#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+#define HAS_I422TOABGRROW_MIPS_DSPR2
+#define HAS_I422TOARGBROW_MIPS_DSPR2
+#define HAS_I422TOBGRAROW_MIPS_DSPR2
+#define HAS_INTERPOLATEROWS_MIPS_DSPR2
+#define HAS_MIRRORROW_MIPS_DSPR2
+#define HAS_MIRRORUVROW_MIPS_DSPR2
+#define HAS_SPLITUVROW_MIPS_DSPR2
+#endif
+#endif
+
+#if defined(_MSC_VER) && !defined(__CLR_VER)
+#define SIMD_ALIGNED(var) __declspec(align(16)) var
+typedef __declspec(align(16)) int16 vec16[8];
+typedef __declspec(align(16)) int32 vec32[4];
+typedef __declspec(align(16)) int8 vec8[16];
+typedef __declspec(align(16)) uint16 uvec16[8];
+typedef __declspec(align(16)) uint32 uvec32[4];
+typedef __declspec(align(16)) uint8 uvec8[16];
+typedef __declspec(align(32)) int16 lvec16[16];
+typedef __declspec(align(32)) int32 lvec32[8];
+typedef __declspec(align(32)) int8 lvec8[32];
+typedef __declspec(align(32)) uint16 ulvec16[16];
+typedef __declspec(align(32)) uint32 ulvec32[8];
+typedef __declspec(align(32)) uint8 ulvec8[32];
+
+#elif defined(__GNUC__)
+// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
+#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
+typedef int16 __attribute__((vector_size(16))) vec16;
+typedef int32 __attribute__((vector_size(16))) vec32;
+typedef int8 __attribute__((vector_size(16))) vec8;
+typedef uint16 __attribute__((vector_size(16))) uvec16;
+typedef uint32 __attribute__((vector_size(16))) uvec32;
+typedef uint8 __attribute__((vector_size(16))) uvec8;
+#else
+#define SIMD_ALIGNED(var) var
+typedef int16 vec16[8];
+typedef int32 vec32[4];
+typedef int8 vec8[16];
+typedef uint16 uvec16[8];
+typedef uint32 uvec32[4];
+typedef uint8 uvec8[16];
+#endif
+
+#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
+#define OMITFP
+#else
+#define OMITFP __attribute__((optimize("omit-frame-pointer")))
+#endif
+
+// NaCL macros for GCC x86 and x64.
+
+// TODO(nfullagar): When pepper_33 toolchain is distributed, default to
+// NEW_BINUTILS and remove all BUNDLEALIGN occurances.
+#if defined(__native_client__)
+#define LABELALIGN ".p2align 5\n"
+#else
+#define LABELALIGN ".p2align 2\n"
+#endif
+#if defined(__native_client__) && defined(__x86_64__)
+#if defined(NEW_BINUTILS)
+#define BUNDLELOCK ".bundle_lock\n"
+#define BUNDLEUNLOCK ".bundle_unlock\n"
+#define BUNDLEALIGN "\n"
+#else
+#define BUNDLELOCK "\n"
+#define BUNDLEUNLOCK "\n"
+#define BUNDLEALIGN ".p2align 5\n"
+#endif
+#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
+#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
+#define MEMLEA(offset, base) #offset "(%q" #base ")"
+#define MEMLEA3(offset, index, scale) \
+    #offset "(,%q" #index "," #scale ")"
+#define MEMLEA4(offset, base, index, scale) \
+    #offset "(%q" #base ",%q" #index "," #scale ")"
+#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
+#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
+#define MEMOPREG(opcode, offset, base, index, scale, reg) \
+    BUNDLELOCK \
+    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
+    #opcode " (%%r15,%%r14),%%" #reg "\n" \
+    BUNDLEUNLOCK
+#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
+    BUNDLELOCK \
+    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
+    #opcode " %%" #reg ",(%%r15,%%r14)\n" \
+    BUNDLEUNLOCK
+#define MEMOPARG(opcode, offset, base, index, scale, arg) \
+    BUNDLELOCK \
+    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
+    #opcode " (%%r15,%%r14),%" #arg "\n" \
+    BUNDLEUNLOCK
+#else  // defined(__native_client__) && defined(__x86_64__)
+#define BUNDLEALIGN "\n"
+#define MEMACCESS(base) "(%" #base ")"
+#define MEMACCESS2(offset, base) #offset "(%" #base ")"
+#define MEMLEA(offset, base) #offset "(%" #base ")"
+#define MEMLEA3(offset, index, scale) \
+    #offset "(,%" #index "," #scale ")"
+#define MEMLEA4(offset, base, index, scale) \
+    #offset "(%" #base ",%" #index "," #scale ")"
+#define MEMMOVESTRING(s, d)
+#define MEMSTORESTRING(reg, d)
+#define MEMOPREG(opcode, offset, base, index, scale, reg) \
+    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
+#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
+    #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
+#define MEMOPARG(opcode, offset, base, index, scale, arg) \
+    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
+#endif  // defined(__native_client__) && defined(__x86_64__)
+
+#if defined(__arm__) || defined(__aarch64__)
+#undef MEMACCESS
+#if defined(__native_client__)
+#define MEMACCESS(base) ".p2align   3\nbic %" #base ", #0xc0000000\n"
+#else
+#define MEMACCESS(base) "\n"
+#endif
+#endif
+
+void I444ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width);
+void I422ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width);
+void I411ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width);
+void I422ToBGRARow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_bgra,
+                        int width);
+void I422ToABGRRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_abgr,
+                        int width);
+void I422ToRGBARow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_rgba,
+                        int width);
+void I422ToRGB24Row_NEON(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_rgb24,
+                         int width);
+void I422ToRAWRow_NEON(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
+                       uint8* dst_raw,
+                       int width);
+void I422ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_u,
+                          const uint8* src_v,
+                          uint8* dst_rgb565,
+                          int width);
+void I422ToARGB1555Row_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb1555,
+                            int width);
+void I422ToARGB4444Row_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb4444,
+                            int width);
+void NV12ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_uv,
+                        uint8* dst_argb,
+                        int width);
+void NV21ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_vu,
+                        uint8* dst_argb,
+                        int width);
+void NV12ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
+                          int width);
+void NV21ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_vu,
+                          uint8* dst_rgb565,
+                          int width);
+void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
+                        uint8* dst_argb,
+                        int width);
+void UYVYToARGBRow_NEON(const uint8* src_uyvy,
+                        uint8* dst_argb,
+                        int width);
+
+void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
+void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
+void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix);
+void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix);
+void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix);
+void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int pix);
+void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
+                       uint8* dst_u, uint8* dst_v, int pix);
+void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
+                     uint8* dst_u, uint8* dst_v, int pix);
+void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
+                        uint8* dst_u, uint8* dst_v, int pix);
+void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
+void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
+void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
+void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
+void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
+void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
+void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
+void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
+void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
+void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
+void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
+void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
+void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
+void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
+void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
+void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
+void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
+void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
+void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
+
+void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
+                          uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
+                        uint8* dst_u, uint8* dst_v, int width);
+void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
+                       uint8* dst_u, uint8* dst_v, int width);
+void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
+                       uint8* dst_u, uint8* dst_v, int width);
+void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
+                       uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
+                                 uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
+                                  uint8* dst_u, uint8* dst_v, int width);
+void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
+                                 uint8* dst_u, uint8* dst_v, int width);
+void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
+                                 uint8* dst_u, uint8* dst_v, int width);
+void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
+                                 uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
+                           uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
+                            uint8* dst_u, uint8* dst_v, int width);
+void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
+                           uint8* dst_u, uint8* dst_v, int width);
+void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
+                           uint8* dst_u, uint8* dst_v, int width);
+void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
+                           uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                             int pix);
+void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                             int pix);
+void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                             int pix);
+void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
+                           uint8* dst_u, uint8* dst_v, int pix);
+void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
+                           uint8* dst_u, uint8* dst_v, int pix);
+void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
+                         uint8* dst_u, uint8* dst_v, int pix);
+void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
+                            uint8* dst_u, uint8* dst_v, int pix);
+void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
+                              int src_stride_argb1555,
+                              uint8* dst_u, uint8* dst_v, int pix);
+void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
+                              int src_stride_argb4444,
+                              uint8* dst_u, uint8* dst_v, int pix);
+void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_u, uint8* dst_v, int width);
+void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
+                   uint8* dst_u, uint8* dst_v, int width);
+void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
+                   uint8* dst_u, uint8* dst_v, int width);
+void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
+                   uint8* dst_u, uint8* dst_v, int width);
+void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
+                    uint8* dst_u, uint8* dst_v, int width);
+void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
+                  uint8* dst_u, uint8* dst_v, int width);
+void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
+                     uint8* dst_u, uint8* dst_v, int width);
+void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
+                       uint8* dst_u, uint8* dst_v, int width);
+void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
+                       uint8* dst_u, uint8* dst_v, int width);
+
+void ARGBToUV444Row_SSSE3(const uint8* src_argb,
+                          uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
+                                    uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
+                              uint8* dst_u, uint8* dst_v, int width);
+
+void ARGBToUV422Row_SSSE3(const uint8* src_argb,
+                          uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
+                                    uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
+                              uint8* dst_u, uint8* dst_v, int width);
+
+void ARGBToUV444Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV422Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width);
+void ARGBToUV411Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width);
+
+void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
+void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
+void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
+void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
+void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
+void MirrorRow_C(const uint8* src, uint8* dst, int width);
+
+void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                       int width);
+void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                      int width);
+void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                            int width);
+void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                   int width);
+
+void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
+void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
+void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
+void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
+
+void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
+void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
+void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
+void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
+void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                           int pix);
+void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                               int pix);
+void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
+                                     uint8* dst_v, int pix);
+void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                         int pix);
+void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                         int pix);
+void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                         int pix);
+void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                               int pix);
+
+void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                  int width);
+void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                     int width);
+void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                     int width);
+void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                     int width);
+void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
+                               uint8* dst_uv, int width);
+void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                         int width);
+void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                         int width);
+void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                         int width);
+
+void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
+void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
+void CopyRow_X86(const uint8* src, uint8* dst, int count);
+void CopyRow_NEON(const uint8* src, uint8* dst, int count);
+void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
+void CopyRow_C(const uint8* src, uint8* dst, int count);
+
+void CopyRow_16_C(const uint16* src, uint16* dst, int count);
+
+void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
+
+void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
+void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
+void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
+
+void SetRow_X86(uint8* dst, uint32 v32, int count);
+void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
+                     int dst_stride, int height);
+void SetRow_NEON(uint8* dst, uint32 v32, int count);
+void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
+                      int dst_stride, int height);
+void SetRow_C(uint8* dst, uint32 v32, int count);
+void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride,
+                   int height);
+
+// ARGBShufflers for BGRAToARGB etc.
+void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
+                      const uint8* shuffler, int pix);
+void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix);
+void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                          const uint8* shuffler, int pix);
+void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix);
+void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix);
+void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                                    const uint8* shuffler, int pix);
+void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
+                             const uint8* shuffler, int pix);
+void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                              const uint8* shuffler, int pix);
+void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
+                             const uint8* shuffler, int pix);
+void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
+                             const uint8* shuffler, int pix);
+
+void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
+void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
+void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
+                            int pix);
+void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
+                            int pix);
+
+void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
+void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
+void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
+                            int pix);
+void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
+                            int pix);
+void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
+void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
+void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
+void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
+void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
+void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
+                              int pix);
+void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
+                                int pix);
+void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
+                                int pix);
+void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
+void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
+void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
+                              int pix);
+void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
+                                int pix);
+void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
+                                int pix);
+
+void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
+
+void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+
+void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
+
+void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
+void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
+void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
+void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
+void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
+void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
+
+void I444ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_argb,
+                     int width);
+void I422ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_argb,
+                     int width);
+void I411ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_argb,
+                     int width);
+void NV12ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_uv,
+                     uint8* dst_argb,
+                     int width);
+void NV21ToRGB565Row_C(const uint8* src_y,
+                       const uint8* src_vu,
+                       uint8* dst_argb,
+                       int width);
+void NV12ToRGB565Row_C(const uint8* src_y,
+                       const uint8* src_uv,
+                       uint8* dst_argb,
+                       int width);
+void NV21ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_vu,
+                     uint8* dst_argb,
+                     int width);
+void YUY2ToARGBRow_C(const uint8* src_yuy2,
+                     uint8* dst_argb,
+                     int width);
+void UYVYToARGBRow_C(const uint8* src_uyvy,
+                     uint8* dst_argb,
+                     int width);
+void I422ToBGRARow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_bgra,
+                     int width);
+void I422ToABGRRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_abgr,
+                     int width);
+void I422ToRGBARow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_rgba,
+                     int width);
+void I422ToRGB24Row_C(const uint8* src_y,
+                      const uint8* src_u,
+                      const uint8* src_v,
+                      uint8* dst_rgb24,
+                      int width);
+void I422ToRAWRow_C(const uint8* src_y,
+                    const uint8* src_u,
+                    const uint8* src_v,
+                    uint8* dst_raw,
+                    int width);
+void I422ToARGB4444Row_C(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb4444,
+                         int width);
+void I422ToARGB1555Row_C(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb4444,
+                         int width);
+void I422ToRGB565Row_C(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
+                       uint8* dst_rgb565,
+                       int width);
+void YToARGBRow_C(const uint8* src_y,
+                  uint8* dst_argb,
+                  int width);
+void I422ToARGBRow_AVX2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width);
+void I444ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
+                         int width);
+void I422ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
+                         int width);
+void I411ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb,
+                         int width);
+void NV12ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_uv,
+                         uint8* dst_argb,
+                         int width);
+void NV21ToARGBRow_SSSE3(const uint8* src_y,
+                         const uint8* src_vu,
+                         uint8* dst_argb,
+                         int width);
+void NV12ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_uv,
+                           uint8* dst_argb,
+                           int width);
+void NV21ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_vu,
+                           uint8* dst_argb,
+                           int width);
+void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
+                         uint8* dst_argb,
+                         int width);
+void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
+                         uint8* dst_argb,
+                         int width);
+void I422ToBGRARow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_bgra,
+                         int width);
+void I422ToABGRRow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_abgr,
+                         int width);
+void I422ToRGBARow_SSSE3(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_rgba,
+                         int width);
+void I422ToARGB4444Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
+void I422ToARGB1555Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
+void I422ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
+                           uint8* dst_argb,
+                           int width);
+// RGB24/RAW are unaligned.
+void I422ToRGB24Row_SSSE3(const uint8* src_y,
+                          const uint8* src_u,
+                          const uint8* src_v,
+                          uint8* dst_rgb24,
+                          int width);
+void I422ToRAWRow_SSSE3(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_raw,
+                        int width);
+
+void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_argb,
+                                   int width);
+void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_argb,
+                                   int width);
+void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_argb,
+                                   int width);
+void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_uv,
+                                   uint8* dst_argb,
+                                   int width);
+void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_vu,
+                                   uint8* dst_argb,
+                                   int width);
+void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
+                                   uint8* dst_argb,
+                                   int width);
+void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
+                                   uint8* dst_argb,
+                                   int width);
+void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_bgra,
+                                   int width);
+void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_abgr,
+                                   int width);
+void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
+                                   const uint8* src_u,
+                                   const uint8* src_v,
+                                   uint8* dst_rgba,
+                                   int width);
+void I422ToARGBRow_Any_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
+void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
+void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
+void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
+void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_uv,
+                             uint8* dst_argb,
+                             int width);
+void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_vu,
+                             uint8* dst_argb,
+                             int width);
+void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
+                               const uint8* src_uv,
+                               uint8* dst_argb,
+                               int width);
+void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
+                               const uint8* src_vu,
+                               uint8* dst_argb,
+                               int width);
+void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
+                             uint8* dst_argb,
+                             int width);
+void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
+                             uint8* dst_argb,
+                             int width);
+void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_bgra,
+                             int width);
+void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_abgr,
+                             int width);
+void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_rgba,
+                             int width);
+void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
+                                 const uint8* src_u,
+                                 const uint8* src_v,
+                                 uint8* dst_rgba,
+                                 int width);
+void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
+                                 const uint8* src_u,
+                                 const uint8* src_v,
+                                 uint8* dst_rgba,
+                                 int width);
+void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
+                               const uint8* src_u,
+                               const uint8* src_v,
+                               uint8* dst_rgba,
+                               int width);
+// RGB24/RAW are unaligned.
+void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
+                              int width);
+void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
+void YToARGBRow_SSE2(const uint8* src_y,
+                     uint8* dst_argb,
+                     int width);
+void YToARGBRow_NEON(const uint8* src_y,
+                     uint8* dst_argb,
+                     int width);
+void YToARGBRow_Any_SSE2(const uint8* src_y,
+                         uint8* dst_argb,
+                         int width);
+void YToARGBRow_Any_NEON(const uint8* src_y,
+                         uint8* dst_argb,
+                         int width);
+
+// ARGB preattenuated alpha blend.
+void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
+                        uint8* dst_argb, int width);
+void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
+                       uint8* dst_argb, int width);
+void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
+                       uint8* dst_argb, int width);
+void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
+                    uint8* dst_argb, int width);
+
+// ARGB multiply images. Same API as Blend, but these require
+// pointer and width alignment for SSE2.
+void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
+                       uint8* dst_argb, int width);
+void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
+                          uint8* dst_argb, int width);
+void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
+                              uint8* dst_argb, int width);
+void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
+                          uint8* dst_argb, int width);
+void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
+                              uint8* dst_argb, int width);
+void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
+                          uint8* dst_argb, int width);
+void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
+                              uint8* dst_argb, int width);
+
+// ARGB add images.
+void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
+                  uint8* dst_argb, int width);
+void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
+                     uint8* dst_argb, int width);
+void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
+                         uint8* dst_argb, int width);
+void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
+                     uint8* dst_argb, int width);
+void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
+                         uint8* dst_argb, int width);
+void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
+                     uint8* dst_argb, int width);
+void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
+                         uint8* dst_argb, int width);
+
+// ARGB subtract images. Same API as Blend, but these require
+// pointer and width alignment for SSE2.
+void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
+                       uint8* dst_argb, int width);
+void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
+                          uint8* dst_argb, int width);
+void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
+                              uint8* dst_argb, int width);
+void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
+                          uint8* dst_argb, int width);
+void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
+                              uint8* dst_argb, int width);
+void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
+                          uint8* dst_argb, int width);
+void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
+                              uint8* dst_argb, int width);
+
+void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
+
+void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
+
+void I444ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
+void I422ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
+void I411ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
+void I422ToBGRARow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
+void I422ToABGRRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
+void I422ToRGBARow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb,
+                            int width);
+void I422ToRGB24Row_Any_NEON(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
+void I422ToRAWRow_Any_NEON(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
+                           uint8* dst_argb,
+                           int width);
+void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
+                                const uint8* src_u,
+                                const uint8* src_v,
+                                uint8* dst_argb,
+                                int width);
+void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
+                                const uint8* src_u,
+                                const uint8* src_v,
+                                uint8* dst_argb,
+                                int width);
+void I422ToRGB565Row_Any_NEON(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
+                              int width);
+void NV12ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_uv,
+                            uint8* dst_argb,
+                            int width);
+void NV21ToARGBRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_uv,
+                            uint8* dst_argb,
+                            int width);
+void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
+                              const uint8* src_uv,
+                              uint8* dst_argb,
+                              int width);
+void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
+                              const uint8* src_uv,
+                              uint8* dst_argb,
+                              int width);
+void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
+                            uint8* dst_argb,
+                            int width);
+void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
+                            uint8* dst_argb,
+                            int width);
+void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
+                              int width);
+void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
+                              int width);
+void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
+                              int width);
+void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
+                              int width);
+void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
+                              int width);
+void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
+                              const uint8* src_u,
+                              const uint8* src_v,
+                              uint8* dst_argb,
+                              int width);
+
+void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
+void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
+                         uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
+void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
+                         uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
+                               uint8* dst_y, int pix);
+void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
+                                uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
+                                   uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
+void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
+                         uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
+void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
+                   uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToUV422Row_C(const uint8* src_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
+void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
+                             uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
+void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
+                             uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
+void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
+                             uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
+                         uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
+                         uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
+                               uint8* dst_y, int pix);
+void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
+                                uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
+                                   uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
+                         uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_NEON(const uint8* src_uyvy,
+                         uint8* dst_u, uint8* dst_v, int pix);
+
+void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
+                   uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_C(const uint8* src_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
+                             uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
+                             uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
+void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
+                          uint8* dst_u, uint8* dst_v, int pix);
+void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
+                             uint8* dst_u, uint8* dst_v, int pix);
+
+void HalfRow_C(const uint8* src_uv, int src_uv_stride,
+               uint8* dst_uv, int pix);
+void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix);
+void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix);
+void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix);
+
+void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
+                  uint16* dst_uv, int pix);
+
+void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
+                      uint32 selector, int pix);
+void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
+                          uint32 selector, int pix);
+void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
+                         uint32 selector, int pix);
+void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer,
+                              uint32 selector, int pix);
+void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
+                             uint32 selector, int pix);
+void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
+                        uint32 /* selector */, int pix);
+void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
+                           uint32 /* selector */, int pix);
+void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
+                           uint32 /* selector */, int pix);
+void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
+                               uint32 /* selector */, int pix);
+void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
+                               uint32 /* selector */, int pix);
+
+void I422ToYUY2Row_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_yuy2, int width);
+void I422ToUYVYRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_uyvy, int width);
+void I422ToYUY2Row_SSE2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_yuy2, int width);
+void I422ToUYVYRow_SSE2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_uyvy, int width);
+void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_yuy2, int width);
+void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_uyvy, int width);
+void I422ToYUY2Row_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_yuy2, int width);
+void I422ToUYVYRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_uyvy, int width);
+void I422ToYUY2Row_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_yuy2, int width);
+void I422ToUYVYRow_Any_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_uyvy, int width);
+
+// Effects related row functions.
+void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
+                               int width);
+void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                                int width);
+void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
+                               int width);
+void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
+                               int width);
+
+// Inverse table for unattenuate, shared by C and SSE2.
+extern const uint32 fixed_invtbl8[256];
+void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
+                                 int width);
+void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
+                                 int width);
+
+void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
+void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
+
+void ARGBSepiaRow_C(uint8* dst_argb, int width);
+void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
+void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
+
+void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
+                          const int8* matrix_argb, int width);
+void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                              const int8* matrix_argb, int width);
+void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
+                             const int8* matrix_argb, int width);
+
+void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
+void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
+
+void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
+void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
+
+void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
+                       int interval_offset, int width);
+void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
+                          int interval_offset, int width);
+void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
+                          int interval_offset, int width);
+
+void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
+                    uint32 value);
+void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
+                       uint32 value);
+void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
+                       uint32 value);
+
+// Used for blur.
+void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
+                                    int width, int area, uint8* dst, int count);
+void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
+                                  const int32* previous_cumsum, int width);
+
+void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
+                                 int width, int area, uint8* dst, int count);
+void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
+                               const int32* previous_cumsum, int width);
+
+LIBYUV_API
+void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
+                     uint8* dst_argb, const float* uv_dudv, int width);
+LIBYUV_API
+void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
+                        uint8* dst_argb, const float* uv_dudv, int width);
+
+// Used for I420Scale, ARGBScale, and ARGBInterpolate.
+void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
+                      ptrdiff_t src_stride_ptr,
+                      int width, int source_y_fraction);
+void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                         ptrdiff_t src_stride_ptr, int width,
+                         int source_y_fraction);
+void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                          ptrdiff_t src_stride_ptr, int width,
+                          int source_y_fraction);
+void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
+                         ptrdiff_t src_stride_ptr, int width,
+                         int source_y_fraction);
+void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
+                         ptrdiff_t src_stride_ptr, int width,
+                         int source_y_fraction);
+void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
+                                ptrdiff_t src_stride_ptr, int width,
+                                int source_y_fraction);
+void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                                   ptrdiff_t src_stride_ptr, int width,
+                                   int source_y_fraction);
+void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                                    ptrdiff_t src_stride_ptr, int width,
+                                    int source_y_fraction);
+void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
+                             ptrdiff_t src_stride_ptr, int width,
+                             int source_y_fraction);
+void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                             ptrdiff_t src_stride_ptr, int width,
+                             int source_y_fraction);
+void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                              ptrdiff_t src_stride_ptr, int width,
+                              int source_y_fraction);
+void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
+                             ptrdiff_t src_stride_ptr, int width,
+                             int source_y_fraction);
+void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
+                                    ptrdiff_t src_stride_ptr, int width,
+                                    int source_y_fraction);
+
+void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                         ptrdiff_t src_stride_ptr,
+                         int width, int source_y_fraction);
+
+// Sobel images.
+void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
+                 uint8* dst_sobelx, int width);
+void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
+                    const uint8* src_y2, uint8* dst_sobelx, int width);
+void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
+                    const uint8* src_y2, uint8* dst_sobelx, int width);
+void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
+                 uint8* dst_sobely, int width);
+void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
+                    uint8* dst_sobely, int width);
+void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
+                    uint8* dst_sobely, int width);
+void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
+                uint8* dst_argb, int width);
+void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                   uint8* dst_argb, int width);
+void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                   uint8* dst_argb, int width);
+void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
+                       uint8* dst_y, int width);
+void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width);
+void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width);
+void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
+                  uint8* dst_argb, int width);
+void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width);
+void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width);
+
+void ARGBPolynomialRow_C(const uint8* src_argb,
+                         uint8* dst_argb, const float* poly,
+                         int width);
+void ARGBPolynomialRow_SSE2(const uint8* src_argb,
+                            uint8* dst_argb, const float* poly,
+                            int width);
+void ARGBPolynomialRow_AVX2(const uint8* src_argb,
+                            uint8* dst_argb, const float* poly,
+                            int width);
+
+void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
+                             const uint8* luma, uint32 lumacoeff);
+void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                                 int width,
+                                 const uint8* luma, uint32 lumacoeff);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/scale.h b/src/main/jni/libyuv/include/libyuv/scale.h
new file mode 100644
index 000000000..a3bc07e0f
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/scale.h
@@ -0,0 +1,102 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_SCALE_H_  // NOLINT
+#define INCLUDE_LIBYUV_SCALE_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Supported filtering.
+typedef enum FilterMode {
+  kFilterNone = 0,  // Point sample; Fastest.
+  kFilterLinear = 1,  // Filter horizontally only.
+  kFilterBilinear = 2,  // Faster than box, but lower quality scaling down.
+  kFilterBox = 3  // Highest quality.
+} FilterModeEnum;
+
+// Scale a YUV plane.
+LIBYUV_API
+void ScalePlane(const uint8* src, int src_stride,
+                int src_width, int src_height,
+                uint8* dst, int dst_stride,
+                int dst_width, int dst_height,
+                enum FilterMode filtering);
+
+void ScalePlane_16(const uint16* src, int src_stride,
+                   int src_width, int src_height,
+                   uint16* dst, int dst_stride,
+                   int dst_width, int dst_height,
+                   enum FilterMode filtering);
+
+// Scales a YUV 4:2:0 image from the src width and height to the
+// dst width and height.
+// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
+// used. This produces basic (blocky) quality at the fastest speed.
+// If filtering is kFilterBilinear, interpolation is used to produce a better
+// quality image, at the expense of speed.
+// If filtering is kFilterBox, averaging is used to produce ever better
+// quality image, at further expense of speed.
+// Returns 0 if successful.
+
+LIBYUV_API
+int I420Scale(const uint8* src_y, int src_stride_y,
+              const uint8* src_u, int src_stride_u,
+              const uint8* src_v, int src_stride_v,
+              int src_width, int src_height,
+              uint8* dst_y, int dst_stride_y,
+              uint8* dst_u, int dst_stride_u,
+              uint8* dst_v, int dst_stride_v,
+              int dst_width, int dst_height,
+              enum FilterMode filtering);
+
+LIBYUV_API
+int I420Scale_16(const uint16* src_y, int src_stride_y,
+                 const uint16* src_u, int src_stride_u,
+                 const uint16* src_v, int src_stride_v,
+                 int src_width, int src_height,
+                 uint16* dst_y, int dst_stride_y,
+                 uint16* dst_u, int dst_stride_u,
+                 uint16* dst_v, int dst_stride_v,
+                 int dst_width, int dst_height,
+                 enum FilterMode filtering);
+
+#ifdef __cplusplus
+// Legacy API.  Deprecated.
+LIBYUV_API
+int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
+          int src_stride_y, int src_stride_u, int src_stride_v,
+          int src_width, int src_height,
+          uint8* dst_y, uint8* dst_u, uint8* dst_v,
+          int dst_stride_y, int dst_stride_u, int dst_stride_v,
+          int dst_width, int dst_height,
+          LIBYUV_BOOL interpolate);
+
+// Legacy API.  Deprecated.
+LIBYUV_API
+int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
+                uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
+                LIBYUV_BOOL interpolate);
+
+// For testing, allow disabling of specialized scalers.
+LIBYUV_API
+void SetUseReferenceImpl(LIBYUV_BOOL use);
+#endif  // __cplusplus
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_SCALE_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/scale_argb.h b/src/main/jni/libyuv/include/libyuv/scale_argb.h
new file mode 100644
index 000000000..0c9b36257
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/scale_argb.h
@@ -0,0 +1,57 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_  // NOLINT
+#define INCLUDE_LIBYUV_SCALE_ARGB_H_
+
+#include "libyuv/basic_types.h"
+#include "libyuv/scale.h"  // For FilterMode
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+LIBYUV_API
+int ARGBScale(const uint8* src_argb, int src_stride_argb,
+              int src_width, int src_height,
+              uint8* dst_argb, int dst_stride_argb,
+              int dst_width, int dst_height,
+              enum FilterMode filtering);
+
+// Clipped scale takes destination rectangle coordinates for clip values.
+LIBYUV_API
+int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
+                  int src_width, int src_height,
+                  uint8* dst_argb, int dst_stride_argb,
+                  int dst_width, int dst_height,
+                  int clip_x, int clip_y, int clip_width, int clip_height,
+                  enum FilterMode filtering);
+
+// TODO(fbarchard): Implement this.
+// Scale with YUV conversion to ARGB and clipping.
+LIBYUV_API
+int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
+                       const uint8* src_u, int src_stride_u,
+                       const uint8* src_v, int src_stride_v,
+                       uint32 src_fourcc,
+                       int src_width, int src_height,
+                       uint8* dst_argb, int dst_stride_argb,
+                       uint32 dst_fourcc,
+                       int dst_width, int dst_height,
+                       int clip_x, int clip_y, int clip_width, int clip_height,
+                       enum FilterMode filtering);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_SCALE_ARGB_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/scale_row.h b/src/main/jni/libyuv/include/libyuv/scale_row.h
new file mode 100644
index 000000000..70e6bc55b
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/scale_row.h
@@ -0,0 +1,349 @@
+/*
+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_  // NOLINT
+#define INCLUDE_LIBYUV_SCALE_ROW_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
+    defined(TARGET_IPHONE_SIMULATOR)
+#define LIBYUV_DISABLE_X86
+#endif
+
+// The following are available on all x86 platforms:
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#define HAS_SCALEROWDOWN2_SSE2
+#define HAS_SCALEROWDOWN4_SSE2
+#define HAS_SCALEROWDOWN34_SSSE3
+#define HAS_SCALEROWDOWN38_SSSE3
+#define HAS_SCALEADDROWS_SSE2
+#define HAS_SCALEFILTERCOLS_SSSE3
+#define HAS_SCALECOLSUP2_SSE2
+#define HAS_SCALEARGBROWDOWN2_SSE2
+#define HAS_SCALEARGBROWDOWNEVEN_SSE2
+#define HAS_SCALEARGBCOLS_SSE2
+#define HAS_SCALEARGBFILTERCOLS_SSSE3
+#define HAS_SCALEARGBCOLSUP2_SSE2
+#define HAS_FIXEDDIV_X86
+#define HAS_FIXEDDIV1_X86
+#endif
+
+// The following are available on Neon platforms:
+#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+#define HAS_SCALEROWDOWN2_NEON
+#define HAS_SCALEROWDOWN4_NEON
+#define HAS_SCALEROWDOWN34_NEON
+#define HAS_SCALEROWDOWN38_NEON
+#define HAS_SCALEARGBROWDOWNEVEN_NEON
+#define HAS_SCALEARGBROWDOWN2_NEON
+#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+    (defined(__aarch64__) || defined(LIBYUV_NEON))
+#define HAS_SCALEROWDOWN2_NEON
+#define HAS_SCALEROWDOWN4_NEON
+#define HAS_SCALEROWDOWN34_NEON
+#define HAS_SCALEROWDOWN38_NEON
+#define HAS_SCALEARGBROWDOWN2_NEON
+#define HAS_SCALEARGBROWDOWNEVEN_NEON
+#endif
+
+// The following are available on Mips platforms:
+#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
+    defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+#define HAS_SCALEROWDOWN2_MIPS_DSPR2
+#define HAS_SCALEROWDOWN4_MIPS_DSPR2
+#define HAS_SCALEROWDOWN34_MIPS_DSPR2
+#define HAS_SCALEROWDOWN38_MIPS_DSPR2
+#endif
+
+// Scale ARGB vertically with bilinear interpolation.
+void ScalePlaneVertical(int src_height,
+                        int dst_width, int dst_height,
+                        int src_stride, int dst_stride,
+                        const uint8* src_argb, uint8* dst_argb,
+                        int x, int y, int dy,
+                        int bpp, enum FilterMode filtering);
+
+void ScalePlaneVertical_16(int src_height,
+                           int dst_width, int dst_height,
+                           int src_stride, int dst_stride,
+                           const uint16* src_argb, uint16* dst_argb,
+                           int x, int y, int dy,
+                           int wpp, enum FilterMode filtering);
+
+// Simplify the filtering based on scale factors.
+enum FilterMode ScaleFilterReduce(int src_width, int src_height,
+                                  int dst_width, int dst_height,
+                                  enum FilterMode filtering);
+
+// Divide num by div and return as 16.16 fixed point result.
+int FixedDiv_C(int num, int div);
+int FixedDiv_X86(int num, int div);
+// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
+int FixedDiv1_C(int num, int div);
+int FixedDiv1_X86(int num, int div);
+#ifdef HAS_FIXEDDIV_X86
+#define FixedDiv FixedDiv_X86
+#define FixedDiv1 FixedDiv1_X86
+#else
+#define FixedDiv FixedDiv_C
+#define FixedDiv1 FixedDiv1_C
+#endif
+
+// Compute slope values for stepping.
+void ScaleSlope(int src_width, int src_height,
+                int dst_width, int dst_height,
+                enum FilterMode filtering,
+                int* x, int* y, int* dx, int* dy);
+
+void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                     uint8* dst, int dst_width);
+void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                        uint16* dst, int dst_width);
+void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst, int dst_width);
+void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                              uint16* dst, int dst_width);
+void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst, int dst_width);
+void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst, int dst_width);
+void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                     uint8* dst, int dst_width);
+void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                        uint16* dst, int dst_width);
+void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst, int dst_width);
+void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst, int dst_width);
+void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                      uint8* dst, int dst_width);
+void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                         uint16* dst, int dst_width);
+void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* d, int dst_width);
+void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* d, int dst_width);
+void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* d, int dst_width);
+void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* d, int dst_width);
+void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
+                 int dst_width, int x, int dx);
+void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                    int dst_width, int x, int dx);
+void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
+                    int dst_width, int, int);
+void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                       int dst_width, int, int);
+void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
+                       int dst_width, int x, int dx);
+void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                          int dst_width, int x, int dx);
+void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
+                         int dst_width, int x, int dx);
+void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                            int dst_width, int x, int dx);
+void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                      uint8* dst, int dst_width);
+void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                         uint16* dst, int dst_width);
+void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
+                            ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint16* dst_ptr, int dst_width);
+void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* dst_ptr, int dst_width);
+void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                    uint16* dst_ptr, int src_width, int src_height);
+void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                       uint32* dst_ptr, int src_width, int src_height);
+void ScaleARGBRowDown2_C(const uint8* src_argb,
+                         ptrdiff_t src_stride,
+                         uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
+                               ptrdiff_t src_stride,
+                               uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
+                            uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
+                            int src_stepx,
+                            uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
+                               ptrdiff_t src_stride,
+                               int src_stepx,
+                               uint8* dst_argb, int dst_width);
+void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
+                     int dst_width, int x, int dx);
+void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
+                       int dst_width, int x, int dx);
+void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
+                        int dst_width, int, int);
+void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
+                           int dst_width, int x, int dx);
+void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
+                             int dst_width, int x, int dx);
+
+void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
+                                  ptrdiff_t src_stride,
+                                  uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
+                                        ptrdiff_t src_stride,
+                                        uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
+                                     ptrdiff_t src_stride,
+                                     uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                          uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                          uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);
+void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                       uint16* dst_ptr, int src_width,
+                       int src_height);
+void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                           int dst_width, int x, int dx);
+void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                       int dst_width, int x, int dx);
+void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
+                            ptrdiff_t src_stride,
+                            uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
+                                  ptrdiff_t src_stride,
+                                  uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
+                               ptrdiff_t src_stride,
+                               uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                               int src_stepx,
+                               uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
+                                  ptrdiff_t src_stride,
+                                  int src_stepx,
+                                  uint8* dst_argb, int dst_width);
+void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
+                        int dst_width, int x, int dx);
+void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
+                               int dst_width, int x, int dx);
+void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
+                           int dst_width, int x, int dx);
+// Row functions.
+void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+                               int src_stepx,
+                               uint8* dst_argb, int dst_width);
+void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+                                  int src_stepx,
+                                  uint8* dst_argb, int dst_width);
+void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst, int dst_width);
+void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width);
+
+// ScaleRowDown2Box also used by planar functions
+// NEON downscalers with interpolation.
+
+// Note - not static due to reuse in convert for 444 to 420.
+void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst, int dst_width);
+
+void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst, int dst_width);
+
+void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+
+// Down scale from 4 to 3 pixels. Use the neon multilane read/write
+//  to load up the every 4th pixel into a 4 different registers.
+// Point samples 32 pixels to 24 pixels.
+void ScaleRowDown34_NEON(const uint8* src_ptr,
+                         ptrdiff_t src_stride,
+                         uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+
+// 32 -> 12
+void ScaleRowDown38_NEON(const uint8* src_ptr,
+                         ptrdiff_t src_stride,
+                         uint8* dst_ptr, int dst_width);
+// 32x3 -> 12x1
+void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+
+void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst, int dst_width);
+void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                 uint8* dst, int dst_width);
+void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst, int dst_width);
+void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                 uint8* dst, int dst_width);
+void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width);
+void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                     uint8* d, int dst_width);
+void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                     uint8* d, int dst_width);
+void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width);
+void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                     uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
+                                     ptrdiff_t src_stride,
+                                     uint8* dst_ptr, int dst_width);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_SCALE_ROW_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/version.h b/src/main/jni/libyuv/include/libyuv/version.h
new file mode 100644
index 000000000..c6952040b
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/version.h
@@ -0,0 +1,16 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
+#define INCLUDE_LIBYUV_VERSION_H_
+
+#define LIBYUV_VERSION 1074
+
+#endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/src/main/jni/libyuv/include/libyuv/video_common.h b/src/main/jni/libyuv/include/libyuv/video_common.h
new file mode 100644
index 000000000..91acc2ffc
--- /dev/null
+++ b/src/main/jni/libyuv/include/libyuv/video_common.h
@@ -0,0 +1,182 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Common definitions for video, including fourcc and VideoFormat.
+
+#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_  // NOLINT
+#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+// Definition of FourCC codes
+//////////////////////////////////////////////////////////////////////////////
+
+// Convert four characters to a FourCC code.
+// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
+// constants are used in a switch.
+#ifdef __cplusplus
+#define FOURCC(a, b, c, d) ( \
+    (static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
+    (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
+#else
+#define FOURCC(a, b, c, d) ( \
+    ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
+    ((uint32)(c) << 16) | ((uint32)(d) << 24))  /* NOLINT */
+#endif
+
+// Some pages discussing FourCC codes:
+//   http://www.fourcc.org/yuv.php
+//   http://v4l2spec.bytesex.org/spec/book1.htm
+//   http://developer.apple.com/quicktime/icefloe/dispatch020.html
+//   http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
+//   http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
+
+// FourCC codes grouped according to implementation efficiency.
+// Primary formats should convert in 1 efficient step.
+// Secondary formats are converted in 2 steps.
+// Auxilliary formats call primary converters.
+enum FourCC {
+  // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
+  FOURCC_I420 = FOURCC('I', '4', '2', '0'),
+  FOURCC_I422 = FOURCC('I', '4', '2', '2'),
+  FOURCC_I444 = FOURCC('I', '4', '4', '4'),
+  FOURCC_I411 = FOURCC('I', '4', '1', '1'),
+  FOURCC_I400 = FOURCC('I', '4', '0', '0'),
+  FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
+  FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
+  FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
+  FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
+
+  // 2 Secondary YUV formats: row biplanar.
+  FOURCC_M420 = FOURCC('M', '4', '2', '0'),
+  FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
+
+  // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
+  FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
+  FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
+  FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
+  FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
+  FOURCC_RAW  = FOURCC('r', 'a', 'w', ' '),
+  FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
+  FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'),  // rgb565 LE.
+  FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'),  // argb1555 LE.
+  FOURCC_R444 = FOURCC('R', '4', '4', '4'),  // argb4444 LE.
+
+  // 4 Secondary RGB formats: 4 Bayer Patterns.
+  FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
+  FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
+  FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
+  FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
+
+  // 1 Primary Compressed YUV format.
+  FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
+
+  // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
+  FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
+  FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
+  FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
+  FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'),  // Linux version of I420.
+  FOURCC_J420 = FOURCC('J', '4', '2', '0'),
+  FOURCC_J400 = FOURCC('J', '4', '0', '0'),
+
+  // 14 Auxiliary aliases.  CanonicalFourCC() maps these to canonical fourcc.
+  FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'),  // Alias for I420.
+  FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'),  // Alias for I422.
+  FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'),  // Alias for I444.
+  FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'),  // Alias for YUY2.
+  FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'),  // Alias for YUY2 on Mac.
+  FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'),  // Alias for UYVY.
+  FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'),  // Alias for UYVY on Mac.
+  FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'),  // Alias for MJPG.
+  FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'),  // Alias for MJPG on Mac.
+  FOURCC_BA81 = FOURCC('B', 'A', '8', '1'),  // Alias for BGGR.
+  FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'),  // Alias for RAW.
+  FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'),  // Alias for 24BG.
+  FOURCC_CM32 = FOURCC(0, 0, 0, 32),  // Alias for BGRA kCMPixelFormat_32ARGB
+  FOURCC_CM24 = FOURCC(0, 0, 0, 24),  // Alias for RAW kCMPixelFormat_24RGB
+  FOURCC_L555 = FOURCC('L', '5', '5', '5'),  // Alias for RGBO.
+  FOURCC_L565 = FOURCC('L', '5', '6', '5'),  // Alias for RGBP.
+  FOURCC_5551 = FOURCC('5', '5', '5', '1'),  // Alias for RGBO.
+
+  // 1 Auxiliary compressed YUV format set aside for capturer.
+  FOURCC_H264 = FOURCC('H', '2', '6', '4'),
+
+  // Match any fourcc.
+  FOURCC_ANY = -1,
+};
+
+enum FourCCBpp {
+  // Canonical fourcc codes used in our code.
+  FOURCC_BPP_I420 = 12,
+  FOURCC_BPP_I422 = 16,
+  FOURCC_BPP_I444 = 24,
+  FOURCC_BPP_I411 = 12,
+  FOURCC_BPP_I400 = 8,
+  FOURCC_BPP_NV21 = 12,
+  FOURCC_BPP_NV12 = 12,
+  FOURCC_BPP_YUY2 = 16,
+  FOURCC_BPP_UYVY = 16,
+  FOURCC_BPP_M420 = 12,
+  FOURCC_BPP_Q420 = 12,
+  FOURCC_BPP_ARGB = 32,
+  FOURCC_BPP_BGRA = 32,
+  FOURCC_BPP_ABGR = 32,
+  FOURCC_BPP_RGBA = 32,
+  FOURCC_BPP_24BG = 24,
+  FOURCC_BPP_RAW  = 24,
+  FOURCC_BPP_RGBP = 16,
+  FOURCC_BPP_RGBO = 16,
+  FOURCC_BPP_R444 = 16,
+  FOURCC_BPP_RGGB = 8,
+  FOURCC_BPP_BGGR = 8,
+  FOURCC_BPP_GRBG = 8,
+  FOURCC_BPP_GBRG = 8,
+  FOURCC_BPP_YV12 = 12,
+  FOURCC_BPP_YV16 = 16,
+  FOURCC_BPP_YV24 = 24,
+  FOURCC_BPP_YU12 = 12,
+  FOURCC_BPP_J420 = 12,
+  FOURCC_BPP_J400 = 8,
+  FOURCC_BPP_MJPG = 0,  // 0 means unknown.
+  FOURCC_BPP_H264 = 0,
+  FOURCC_BPP_IYUV = 12,
+  FOURCC_BPP_YU16 = 16,
+  FOURCC_BPP_YU24 = 24,
+  FOURCC_BPP_YUYV = 16,
+  FOURCC_BPP_YUVS = 16,
+  FOURCC_BPP_HDYC = 16,
+  FOURCC_BPP_2VUY = 16,
+  FOURCC_BPP_JPEG = 1,
+  FOURCC_BPP_DMB1 = 1,
+  FOURCC_BPP_BA81 = 8,
+  FOURCC_BPP_RGB3 = 24,
+  FOURCC_BPP_BGR3 = 24,
+  FOURCC_BPP_CM32 = 32,
+  FOURCC_BPP_CM24 = 24,
+
+  // Match any fourcc.
+  FOURCC_BPP_ANY  = 0,  // 0 means unknown.
+};
+
+// Converts fourcc aliases into canonical ones.
+LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_VIDEO_COMMON_H_  NOLINT
diff --git a/src/main/jni/libyuv/source/compare.cc b/src/main/jni/libyuv/source/compare.cc
new file mode 100644
index 000000000..dc715e019
--- /dev/null
+++ b/src/main/jni/libyuv/source/compare.cc
@@ -0,0 +1,325 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/compare.h"
+
+#include <float.h>
+#include <math.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#include "libyuv/basic_types.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// hash seed of 5381 recommended.
+// Internal C version of HashDjb2 with int sized count for efficiency.
+uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
+
+// This module is for Visual C x86
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || \
+    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
+#define HAS_HASHDJB2_SSE41
+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
+
+#if _MSC_VER >= 1700
+#define HAS_HASHDJB2_AVX2
+uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
+#endif
+
+#endif  // HAS_HASHDJB2_SSE41
+
+// hash seed of 5381 recommended.
+LIBYUV_API
+uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
+  const int kBlockSize = 1 << 15;  // 32768;
+  int remainder;
+  uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
+#if defined(HAS_HASHDJB2_SSE41)
+  if (TestCpuFlag(kCpuHasSSE41)) {
+    HashDjb2_SSE = HashDjb2_SSE41;
+  }
+#endif
+#if defined(HAS_HASHDJB2_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    HashDjb2_SSE = HashDjb2_AVX2;
+  }
+#endif
+
+  while (count >= (uint64)(kBlockSize)) {
+    seed = HashDjb2_SSE(src, kBlockSize, seed);
+    src += kBlockSize;
+    count -= kBlockSize;
+  }
+  remainder = (int)(count) & ~15;
+  if (remainder) {
+    seed = HashDjb2_SSE(src, remainder, seed);
+    src += remainder;
+    count -= remainder;
+  }
+  remainder = (int)(count) & 15;
+  if (remainder) {
+    seed = HashDjb2_C(src, remainder, seed);
+  }
+  return seed;
+}
+
+uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
+#if !defined(LIBYUV_DISABLE_NEON) && \
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+#define HAS_SUMSQUAREERROR_NEON
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
+#endif
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#define HAS_SUMSQUAREERROR_SSE2
+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
+#endif
+// Visual C 2012 required for AVX2.
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
+#define HAS_SUMSQUAREERROR_AVX2
+uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
+#endif
+
+// TODO(fbarchard): Refactor into row function.
+LIBYUV_API
+uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
+                             int count) {
+  // SumSquareError returns values 0 to 65535 for each squared difference.
+  // Up to 65536 of those can be summed and remain within a uint32.
+  // After each block of 65536 pixels, accumulate into a uint64.
+  const int kBlockSize = 65536;
+  int remainder = count & (kBlockSize - 1) & ~31;
+  uint64 sse = 0;
+  int i;
+  uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
+      SumSquareError_C;
+#if defined(HAS_SUMSQUAREERROR_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    SumSquareError = SumSquareError_NEON;
+  }
+#endif
+#if defined(HAS_SUMSQUAREERROR_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
+    // Note only used for multiples of 16 so count is not checked.
+    SumSquareError = SumSquareError_SSE2;
+  }
+#endif
+#if defined(HAS_SUMSQUAREERROR_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    // Note only used for multiples of 32 so count is not checked.
+    SumSquareError = SumSquareError_AVX2;
+  }
+#endif
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+: sse)
+#endif
+  for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
+    sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
+  }
+  src_a += count & ~(kBlockSize - 1);
+  src_b += count & ~(kBlockSize - 1);
+  if (remainder) {
+    sse += SumSquareError(src_a, src_b, remainder);
+    src_a += remainder;
+    src_b += remainder;
+  }
+  remainder = count & 31;
+  if (remainder) {
+    sse += SumSquareError_C(src_a, src_b, remainder);
+  }
+  return sse;
+}
+
+LIBYUV_API
+uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
+                                  const uint8* src_b, int stride_b,
+                                  int width, int height) {
+  uint64 sse = 0;
+  int h;
+  // Coalesce rows.
+  if (stride_a == width &&
+      stride_b == width) {
+    width *= height;
+    height = 1;
+    stride_a = stride_b = 0;
+  }
+  for (h = 0; h < height; ++h) {
+    sse += ComputeSumSquareError(src_a, src_b, width);
+    src_a += stride_a;
+    src_b += stride_b;
+  }
+  return sse;
+}
+
+LIBYUV_API
+double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
+  double psnr;
+  if (sse > 0) {
+    double mse = (double)(count) / (double)(sse);
+    psnr = 10.0 * log10(255.0 * 255.0 * mse);
+  } else {
+    psnr = kMaxPsnr;      // Limit to prevent divide by 0
+  }
+
+  if (psnr > kMaxPsnr)
+    psnr = kMaxPsnr;
+
+  return psnr;
+}
+
+LIBYUV_API
+double CalcFramePsnr(const uint8* src_a, int stride_a,
+                     const uint8* src_b, int stride_b,
+                     int width, int height) {
+  const uint64 samples = width * height;
+  const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
+                                                src_b, stride_b,
+                                                width, height);
+  return SumSquareErrorToPsnr(sse, samples);
+}
+
+LIBYUV_API
+double I420Psnr(const uint8* src_y_a, int stride_y_a,
+                const uint8* src_u_a, int stride_u_a,
+                const uint8* src_v_a, int stride_v_a,
+                const uint8* src_y_b, int stride_y_b,
+                const uint8* src_u_b, int stride_u_b,
+                const uint8* src_v_b, int stride_v_b,
+                int width, int height) {
+  const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
+                                                  src_y_b, stride_y_b,
+                                                  width, height);
+  const int width_uv = (width + 1) >> 1;
+  const int height_uv = (height + 1) >> 1;
+  const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
+                                                  src_u_b, stride_u_b,
+                                                  width_uv, height_uv);
+  const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
+                                                  src_v_b, stride_v_b,
+                                                  width_uv, height_uv);
+  const uint64 samples = width * height + 2 * (width_uv * height_uv);
+  const uint64 sse = sse_y + sse_u + sse_v;
+  return SumSquareErrorToPsnr(sse, samples);
+}
+
+static const int64 cc1 =  26634;  // (64^2*(.01*255)^2
+static const int64 cc2 = 239708;  // (64^2*(.03*255)^2
+
+static double Ssim8x8_C(const uint8* src_a, int stride_a,
+                        const uint8* src_b, int stride_b) {
+  int64 sum_a = 0;
+  int64 sum_b = 0;
+  int64 sum_sq_a = 0;
+  int64 sum_sq_b = 0;
+  int64 sum_axb = 0;
+
+  int i;
+  for (i = 0; i < 8; ++i) {
+    int j;
+    for (j = 0; j < 8; ++j) {
+      sum_a += src_a[j];
+      sum_b += src_b[j];
+      sum_sq_a += src_a[j] * src_a[j];
+      sum_sq_b += src_b[j] * src_b[j];
+      sum_axb += src_a[j] * src_b[j];
+    }
+
+    src_a += stride_a;
+    src_b += stride_b;
+  }
+
+  {
+    const int64 count = 64;
+    // scale the constants by number of pixels
+    const int64 c1 = (cc1 * count * count) >> 12;
+    const int64 c2 = (cc2 * count * count) >> 12;
+
+    const int64 sum_a_x_sum_b = sum_a * sum_b;
+
+    const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
+                         (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
+
+    const int64 sum_a_sq = sum_a*sum_a;
+    const int64 sum_b_sq = sum_b*sum_b;
+
+    const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
+                         (count * sum_sq_a - sum_a_sq +
+                          count * sum_sq_b - sum_b_sq + c2);
+
+    if (ssim_d == 0.0) {
+      return DBL_MAX;
+    }
+    return ssim_n * 1.0 / ssim_d;
+  }
+}
+
+// We are using a 8x8 moving window with starting location of each 8x8 window
+// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
+// block boundaries to penalize blocking artifacts.
+LIBYUV_API
+double CalcFrameSsim(const uint8* src_a, int stride_a,
+                     const uint8* src_b, int stride_b,
+                     int width, int height) {
+  int samples = 0;
+  double ssim_total = 0;
+  double (*Ssim8x8)(const uint8* src_a, int stride_a,
+                    const uint8* src_b, int stride_b) = Ssim8x8_C;
+
+  // sample point start with each 4x4 location
+  int i;
+  for (i = 0; i < height - 8; i += 4) {
+    int j;
+    for (j = 0; j < width - 8; j += 4) {
+      ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
+      samples++;
+    }
+
+    src_a += stride_a * 4;
+    src_b += stride_b * 4;
+  }
+
+  ssim_total /= samples;
+  return ssim_total;
+}
+
+LIBYUV_API
+double I420Ssim(const uint8* src_y_a, int stride_y_a,
+                const uint8* src_u_a, int stride_u_a,
+                const uint8* src_v_a, int stride_v_a,
+                const uint8* src_y_b, int stride_y_b,
+                const uint8* src_u_b, int stride_u_b,
+                const uint8* src_v_b, int stride_v_b,
+                int width, int height) {
+  const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
+                                      src_y_b, stride_y_b, width, height);
+  const int width_uv = (width + 1) >> 1;
+  const int height_uv = (height + 1) >> 1;
+  const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
+                                      src_u_b, stride_u_b,
+                                      width_uv, height_uv);
+  const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
+                                      src_v_b, stride_v_b,
+                                      width_uv, height_uv);
+  return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/compare_common.cc b/src/main/jni/libyuv/source/compare_common.cc
new file mode 100644
index 000000000..c546b5182
--- /dev/null
+++ b/src/main/jni/libyuv/source/compare_common.cc
@@ -0,0 +1,42 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
+  uint32 sse = 0u;
+  int i;
+  for (i = 0; i < count; ++i) {
+    int diff = src_a[i] - src_b[i];
+    sse += (uint32)(diff * diff);
+  }
+  return sse;
+}
+
+// hash seed of 5381 recommended.
+// Internal C version of HashDjb2 with int sized count for efficiency.
+uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
+  uint32 hash = seed;
+  int i;
+  for (i = 0; i < count; ++i) {
+    hash += (hash << 5) + src[i];
+  }
+  return hash;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/compare_neon.cc b/src/main/jni/libyuv/source/compare_neon.cc
new file mode 100644
index 000000000..55052c0ee
--- /dev/null
+++ b/src/main/jni/libyuv/source/compare_neon.cc
@@ -0,0 +1,103 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
+  volatile uint32 sse;
+  asm volatile (
+    "vmov.u8    q8, #0                         \n"
+    "vmov.u8    q10, #0                        \n"
+    "vmov.u8    q9, #0                         \n"
+    "vmov.u8    q11, #0                        \n"
+
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"
+    MEMACCESS(1)
+    "vld1.8     {q1}, [%1]!                    \n"
+    "subs       %2, %2, #16                    \n"
+    "vsubl.u8   q2, d0, d2                     \n"
+    "vsubl.u8   q3, d1, d3                     \n"
+    "vmlal.s16  q8, d4, d4                     \n"
+    "vmlal.s16  q9, d6, d6                     \n"
+    "vmlal.s16  q10, d5, d5                    \n"
+    "vmlal.s16  q11, d7, d7                    \n"
+    "bgt        1b                             \n"
+
+    "vadd.u32   q8, q8, q9                     \n"
+    "vadd.u32   q10, q10, q11                  \n"
+    "vadd.u32   q11, q8, q10                   \n"
+    "vpaddl.u32 q1, q11                        \n"
+    "vadd.u64   d0, d2, d3                     \n"
+    "vmov.32    %3, d0[0]                      \n"
+    : "+r"(src_a),
+      "+r"(src_b),
+      "+r"(count),
+      "=r"(sse)
+    :
+    : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
+  return sse;
+}
+
+#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
+  volatile uint32 sse;
+  asm volatile (
+    "eor        v16.16b, v16.16b, v16.16b      \n"
+    "eor        v18.16b, v18.16b, v18.16b      \n"
+    "eor        v17.16b, v17.16b, v17.16b      \n"
+    "eor        v19.16b, v19.16b, v19.16b      \n"
+
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b}, [%0], #16            \n"
+    MEMACCESS(1)
+    "ld1        {v1.16b}, [%1], #16            \n"
+    "subs       %2, %2, #16                    \n"
+    "usubl      v2.8h, v0.8b, v1.8b            \n"
+    "usubl2     v3.8h, v0.16b, v1.16b          \n"
+    "smlal      v16.4s, v2.4h, v2.4h           \n"
+    "smlal      v17.4s, v3.4h, v3.4h           \n"
+    "smlal2     v18.4s, v2.8h, v2.8h           \n"
+    "smlal2     v19.4s, v3.8h, v3.8h           \n"
+    "bgt        1b                             \n"
+
+    "add        v16.4s, v16.4s, v17.4s         \n"
+    "add        v18.4s, v18.4s, v19.4s         \n"
+    "add        v19.4s, v16.4s, v18.4s         \n"
+    "addv       s0, v19.4s                     \n"
+    "fmov       %w3, s0                        \n"
+    : "+r"(src_a),
+      "+r"(src_b),
+      "+r"(count),
+      "=r"(sse)
+    :
+    : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
+  return sse;
+}
+
+#endif  // __ARM_NEON__
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/compare_posix.cc b/src/main/jni/libyuv/source/compare_posix.cc
new file mode 100644
index 000000000..ac361190e
--- /dev/null
+++ b/src/main/jni/libyuv/source/compare_posix.cc
@@ -0,0 +1,158 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+
+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
+  uint32 sse;
+  asm volatile (  // NOLINT
+    "pxor      %%xmm0,%%xmm0                   \n"
+    "pxor      %%xmm5,%%xmm5                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10, 0) ",%0          \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x10, 1) ",%1          \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm1,%%xmm3                   \n"
+    "psubusb   %%xmm2,%%xmm1                   \n"
+    "psubusb   %%xmm3,%%xmm2                   \n"
+    "por       %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "punpcklbw %%xmm5,%%xmm1                   \n"
+    "punpckhbw %%xmm5,%%xmm2                   \n"
+    "pmaddwd   %%xmm1,%%xmm1                   \n"
+    "pmaddwd   %%xmm2,%%xmm2                   \n"
+    "paddd     %%xmm1,%%xmm0                   \n"
+    "paddd     %%xmm2,%%xmm0                   \n"
+    "jg        1b                              \n"
+
+    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
+    "paddd     %%xmm1,%%xmm0                   \n"
+    "pshufd    $0x1,%%xmm0,%%xmm1              \n"
+    "paddd     %%xmm1,%%xmm0                   \n"
+    "movd      %%xmm0,%3                       \n"
+
+  : "+r"(src_a),      // %0
+    "+r"(src_b),      // %1
+    "+r"(count),      // %2
+    "=g"(sse)         // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );  // NOLINT
+  return sse;
+}
+
+#endif  // defined(__x86_64__) || defined(__i386__)
+
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
+#define HAS_HASHDJB2_SSE41
+static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
+static uvec32 kHashMul0 = {
+  0x0c3525e1,  // 33 ^ 15
+  0xa3476dc1,  // 33 ^ 14
+  0x3b4039a1,  // 33 ^ 13
+  0x4f5f0981,  // 33 ^ 12
+};
+static uvec32 kHashMul1 = {
+  0x30f35d61,  // 33 ^ 11
+  0x855cb541,  // 33 ^ 10
+  0x040a9121,  // 33 ^ 9
+  0x747c7101,  // 33 ^ 8
+};
+static uvec32 kHashMul2 = {
+  0xec41d4e1,  // 33 ^ 7
+  0x4cfa3cc1,  // 33 ^ 6
+  0x025528a1,  // 33 ^ 5
+  0x00121881,  // 33 ^ 4
+};
+static uvec32 kHashMul3 = {
+  0x00008c61,  // 33 ^ 3
+  0x00000441,  // 33 ^ 2
+  0x00000021,  // 33 ^ 1
+  0x00000001,  // 33 ^ 0
+};
+
+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
+  uint32 hash;
+  asm volatile (  // NOLINT
+    "movd      %2,%%xmm0                       \n"
+    "pxor      %%xmm7,%%xmm7                   \n"
+    "movdqa    %4,%%xmm6                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10, 0) ",%0          \n"
+    "pmulld    %%xmm6,%%xmm0                   \n"
+    "movdqa    %5,%%xmm5                       \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "punpcklbw %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm2,%%xmm3                   \n"
+    "punpcklwd %%xmm7,%%xmm3                   \n"
+    "pmulld    %%xmm5,%%xmm3                   \n"
+    "movdqa    %6,%%xmm5                       \n"
+    "movdqa    %%xmm2,%%xmm4                   \n"
+    "punpckhwd %%xmm7,%%xmm4                   \n"
+    "pmulld    %%xmm5,%%xmm4                   \n"
+    "movdqa    %7,%%xmm5                       \n"
+    "punpckhbw %%xmm7,%%xmm1                   \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "punpcklwd %%xmm7,%%xmm2                   \n"
+    "pmulld    %%xmm5,%%xmm2                   \n"
+    "movdqa    %8,%%xmm5                       \n"
+    "punpckhwd %%xmm7,%%xmm1                   \n"
+    "pmulld    %%xmm5,%%xmm1                   \n"
+    "paddd     %%xmm4,%%xmm3                   \n"
+    "paddd     %%xmm2,%%xmm1                   \n"
+    "sub       $0x10,%1                        \n"
+    "paddd     %%xmm3,%%xmm1                   \n"
+    "pshufd    $0xe,%%xmm1,%%xmm2              \n"
+    "paddd     %%xmm2,%%xmm1                   \n"
+    "pshufd    $0x1,%%xmm1,%%xmm2              \n"
+    "paddd     %%xmm2,%%xmm1                   \n"
+    "paddd     %%xmm1,%%xmm0                   \n"
+    "jg        1b                              \n"
+    "movd      %%xmm0,%3                       \n"
+  : "+r"(src),        // %0
+    "+r"(count),      // %1
+    "+rm"(seed),      // %2
+    "=g"(hash)        // %3
+  : "m"(kHash16x33),  // %4
+    "m"(kHashMul0),   // %5
+    "m"(kHashMul1),   // %6
+    "m"(kHashMul2),   // %7
+    "m"(kHashMul3)    // %8
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );  // NOLINT
+  return hash;
+}
+#endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
diff --git a/src/main/jni/libyuv/source/compare_win.cc b/src/main/jni/libyuv/source/compare_win.cc
new file mode 100644
index 000000000..99831651f
--- /dev/null
+++ b/src/main/jni/libyuv/source/compare_win.cc
@@ -0,0 +1,232 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+__declspec(naked) __declspec(align(16))
+uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
+  __asm {
+    mov        eax, [esp + 4]    // src_a
+    mov        edx, [esp + 8]    // src_b
+    mov        ecx, [esp + 12]   // count
+    pxor       xmm0, xmm0
+    pxor       xmm5, xmm5
+
+    align      4
+  wloop:
+    movdqa     xmm1, [eax]
+    lea        eax,  [eax + 16]
+    movdqa     xmm2, [edx]
+    lea        edx,  [edx + 16]
+    sub        ecx, 16
+    movdqa     xmm3, xmm1  // abs trick
+    psubusb    xmm1, xmm2
+    psubusb    xmm2, xmm3
+    por        xmm1, xmm2
+    movdqa     xmm2, xmm1
+    punpcklbw  xmm1, xmm5
+    punpckhbw  xmm2, xmm5
+    pmaddwd    xmm1, xmm1
+    pmaddwd    xmm2, xmm2
+    paddd      xmm0, xmm1
+    paddd      xmm0, xmm2
+    jg         wloop
+
+    pshufd     xmm1, xmm0, 0xee
+    paddd      xmm0, xmm1
+    pshufd     xmm1, xmm0, 0x01
+    paddd      xmm0, xmm1
+    movd       eax, xmm0
+    ret
+  }
+}
+
+// Visual C 2012 required for AVX2.
+#if _MSC_VER >= 1700
+// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
+#pragma warning(disable: 4752)
+__declspec(naked) __declspec(align(16))
+uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
+  __asm {
+    mov        eax, [esp + 4]    // src_a
+    mov        edx, [esp + 8]    // src_b
+    mov        ecx, [esp + 12]   // count
+    vpxor      ymm0, ymm0, ymm0  // sum
+    vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
+    sub        edx, eax
+
+    align      4
+  wloop:
+    vmovdqu    ymm1, [eax]
+    vmovdqu    ymm2, [eax + edx]
+    lea        eax,  [eax + 32]
+    sub        ecx, 32
+    vpsubusb   ymm3, ymm1, ymm2  // abs difference trick
+    vpsubusb   ymm2, ymm2, ymm1
+    vpor       ymm1, ymm2, ymm3
+    vpunpcklbw ymm2, ymm1, ymm5  // u16.  mutates order.
+    vpunpckhbw ymm1, ymm1, ymm5
+    vpmaddwd   ymm2, ymm2, ymm2  // square + hadd to u32.
+    vpmaddwd   ymm1, ymm1, ymm1
+    vpaddd     ymm0, ymm0, ymm1
+    vpaddd     ymm0, ymm0, ymm2
+    jg         wloop
+
+    vpshufd    ymm1, ymm0, 0xee  // 3, 2 + 1, 0 both lanes.
+    vpaddd     ymm0, ymm0, ymm1
+    vpshufd    ymm1, ymm0, 0x01  // 1 + 0 both lanes.
+    vpaddd     ymm0, ymm0, ymm1
+    vpermq     ymm1, ymm0, 0x02  // high + low lane.
+    vpaddd     ymm0, ymm0, ymm1
+    vmovd      eax, xmm0
+    vzeroupper
+    ret
+  }
+}
+#endif  // _MSC_VER >= 1700
+
+#define HAS_HASHDJB2_SSE41
+static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
+static uvec32 kHashMul0 = {
+  0x0c3525e1,  // 33 ^ 15
+  0xa3476dc1,  // 33 ^ 14
+  0x3b4039a1,  // 33 ^ 13
+  0x4f5f0981,  // 33 ^ 12
+};
+static uvec32 kHashMul1 = {
+  0x30f35d61,  // 33 ^ 11
+  0x855cb541,  // 33 ^ 10
+  0x040a9121,  // 33 ^ 9
+  0x747c7101,  // 33 ^ 8
+};
+static uvec32 kHashMul2 = {
+  0xec41d4e1,  // 33 ^ 7
+  0x4cfa3cc1,  // 33 ^ 6
+  0x025528a1,  // 33 ^ 5
+  0x00121881,  // 33 ^ 4
+};
+static uvec32 kHashMul3 = {
+  0x00008c61,  // 33 ^ 3
+  0x00000441,  // 33 ^ 2
+  0x00000021,  // 33 ^ 1
+  0x00000001,  // 33 ^ 0
+};
+
+// 27: 66 0F 38 40 C6     pmulld      xmm0,xmm6
+// 44: 66 0F 38 40 DD     pmulld      xmm3,xmm5
+// 59: 66 0F 38 40 E5     pmulld      xmm4,xmm5
+// 72: 66 0F 38 40 D5     pmulld      xmm2,xmm5
+// 83: 66 0F 38 40 CD     pmulld      xmm1,xmm5
+#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
+    _asm _emit 0x40 _asm _emit reg
+
+__declspec(naked) __declspec(align(16))
+uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
+  __asm {
+    mov        eax, [esp + 4]    // src
+    mov        ecx, [esp + 8]    // count
+    movd       xmm0, [esp + 12]  // seed
+
+    pxor       xmm7, xmm7        // constant 0 for unpck
+    movdqa     xmm6, kHash16x33
+
+    align      4
+  wloop:
+    movdqu     xmm1, [eax]       // src[0-15]
+    lea        eax, [eax + 16]
+    pmulld(0xc6)                 // pmulld      xmm0,xmm6  hash *= 33 ^ 16
+    movdqa     xmm5, kHashMul0
+    movdqa     xmm2, xmm1
+    punpcklbw  xmm2, xmm7        // src[0-7]
+    movdqa     xmm3, xmm2
+    punpcklwd  xmm3, xmm7        // src[0-3]
+    pmulld(0xdd)                 // pmulld     xmm3, xmm5
+    movdqa     xmm5, kHashMul1
+    movdqa     xmm4, xmm2
+    punpckhwd  xmm4, xmm7        // src[4-7]
+    pmulld(0xe5)                 // pmulld     xmm4, xmm5
+    movdqa     xmm5, kHashMul2
+    punpckhbw  xmm1, xmm7        // src[8-15]
+    movdqa     xmm2, xmm1
+    punpcklwd  xmm2, xmm7        // src[8-11]
+    pmulld(0xd5)                 // pmulld     xmm2, xmm5
+    movdqa     xmm5, kHashMul3
+    punpckhwd  xmm1, xmm7        // src[12-15]
+    pmulld(0xcd)                 // pmulld     xmm1, xmm5
+    paddd      xmm3, xmm4        // add 16 results
+    paddd      xmm1, xmm2
+    sub        ecx, 16
+    paddd      xmm1, xmm3
+
+    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
+    paddd      xmm1, xmm2
+    pshufd     xmm2, xmm1, 0x01
+    paddd      xmm1, xmm2
+    paddd      xmm0, xmm1
+    jg         wloop
+
+    movd       eax, xmm0         // return hash
+    ret
+  }
+}
+
+// Visual C 2012 required for AVX2.
+#if _MSC_VER >= 1700
+__declspec(naked) __declspec(align(16))
+uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
+  __asm {
+    mov        eax, [esp + 4]    // src
+    mov        ecx, [esp + 8]    // count
+    movd       xmm0, [esp + 12]  // seed
+    movdqa     xmm6, kHash16x33
+
+    align      4
+  wloop:
+    vpmovzxbd  xmm3, dword ptr [eax]  // src[0-3]
+    pmulld     xmm0, xmm6  // hash *= 33 ^ 16
+    vpmovzxbd  xmm4, dword ptr [eax + 4]  // src[4-7]
+    pmulld     xmm3, kHashMul0
+    vpmovzxbd  xmm2, dword ptr [eax + 8]  // src[8-11]
+    pmulld     xmm4, kHashMul1
+    vpmovzxbd  xmm1, dword ptr [eax + 12]  // src[12-15]
+    pmulld     xmm2, kHashMul2
+    lea        eax, [eax + 16]
+    pmulld     xmm1, kHashMul3
+    paddd      xmm3, xmm4        // add 16 results
+    paddd      xmm1, xmm2
+    sub        ecx, 16
+    paddd      xmm1, xmm3
+    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
+    paddd      xmm1, xmm2
+    pshufd     xmm2, xmm1, 0x01
+    paddd      xmm1, xmm2
+    paddd      xmm0, xmm1
+    jg         wloop
+
+    movd       eax, xmm0         // return hash
+    ret
+  }
+}
+#endif  // _MSC_VER >= 1700
+
+#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/convert.cc b/src/main/jni/libyuv/source/convert.cc
new file mode 100644
index 000000000..c31ecf263
--- /dev/null
+++ b/src/main/jni/libyuv/source/convert.cc
@@ -0,0 +1,1543 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert.h"
+
+#include "libyuv/basic_types.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "libyuv/scale.h"  // For ScalePlane()
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
+static __inline int Abs(int v) {
+  return v >= 0 ? v : -v;
+}
+
+// Any I4xx To I420 format with mirroring.
+static int I4xxToI420(const uint8* src_y, int src_stride_y,
+                      const uint8* src_u, int src_stride_u,
+                      const uint8* src_v, int src_stride_v,
+                      uint8* dst_y, int dst_stride_y,
+                      uint8* dst_u, int dst_stride_u,
+                      uint8* dst_v, int dst_stride_v,
+                      int src_y_width, int src_y_height,
+                      int src_uv_width, int src_uv_height) {
+  const int dst_y_width = Abs(src_y_width);
+  const int dst_y_height = Abs(src_y_height);
+  const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
+  const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
+  if (src_y_width == 0 || src_y_height == 0 ||
+      src_uv_width == 0 || src_uv_height == 0) {
+    return -1;
+  }
+  ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
+             dst_y, dst_stride_y, dst_y_width, dst_y_height,
+             kFilterBilinear);
+  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
+             dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
+             kFilterBilinear);
+  ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
+             dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
+             kFilterBilinear);
+  return 0;
+}
+
+// Copy I420 with optional flipping
+// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
+// is does row coalescing.
+LIBYUV_API
+int I420Copy(const uint8* src_y, int src_stride_y,
+             const uint8* src_u, int src_stride_u,
+             const uint8* src_v, int src_stride_v,
+             uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int width, int height) {
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_y || !src_u || !src_v ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (halfheight - 1) * src_stride_u;
+    src_v = src_v + (halfheight - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  if (dst_y) {
+    CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  }
+  // Copy UV planes.
+  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
+  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
+  return 0;
+}
+
+// 422 chroma is 1/2 width, 1x height
+// 420 chroma is 1/2 width, 1/2 height
+LIBYUV_API
+int I422ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  const int src_uv_width = SUBSAMPLE(width, 1, 1);
+  return I4xxToI420(src_y, src_stride_y,
+                    src_u, src_stride_u,
+                    src_v, src_stride_v,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    width, height,
+                    src_uv_width, height);
+}
+
+// 444 chroma is 1x width, 1x height
+// 420 chroma is 1/2 width, 1/2 height
+LIBYUV_API
+int I444ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  return I4xxToI420(src_y, src_stride_y,
+                    src_u, src_stride_u,
+                    src_v, src_stride_v,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    width, height,
+                    width, height);
+}
+
+// 411 chroma is 1/4 width, 1x height
+// 420 chroma is 1/2 width, 1/2 height
+LIBYUV_API
+int I411ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  const int src_uv_width = SUBSAMPLE(width, 3, 2);
+  return I4xxToI420(src_y, src_stride_y,
+                    src_u, src_stride_u,
+                    src_v, src_stride_v,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    width, height,
+                    src_uv_width, height);
+}
+
+// I400 is greyscale typically used in MJPG
+LIBYUV_API
+int I400ToI420(const uint8* src_y, int src_stride_y,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_y || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_stride_y = -src_stride_y;
+  }
+  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
+  SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
+  return 0;
+}
+
+static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
+                       uint8* dst, int dst_stride,
+                       int width, int height) {
+  int y;
+  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+#if defined(HAS_COPYROW_X86)
+  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
+    CopyRow = CopyRow_X86;
+  }
+#endif
+#if defined(HAS_COPYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
+      IS_ALIGNED(src, 16) &&
+      IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) &&
+      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+    CopyRow = CopyRow_SSE2;
+  }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+  if (TestCpuFlag(kCpuHasERMS)) {
+    CopyRow = CopyRow_ERMS;
+  }
+#endif
+#if defined(HAS_COPYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+    CopyRow = CopyRow_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_MIPS)
+  if (TestCpuFlag(kCpuHasMIPS)) {
+    CopyRow = CopyRow_MIPS;
+  }
+#endif
+
+  // Copy plane
+  for (y = 0; y < height - 1; y += 2) {
+    CopyRow(src, dst, width);
+    CopyRow(src + src_stride_0, dst + dst_stride, width);
+    src += src_stride_0 + src_stride_1;
+    dst += dst_stride * 2;
+  }
+  if (height & 1) {
+    CopyRow(src, dst, width);
+  }
+}
+
+// Support converting from FOURCC_M420
+// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
+// easy conversion to I420.
+// M420 format description:
+// M420 is row biplanar 420: 2 rows of Y and 1 row of UV.
+// Chroma is half width / half height. (420)
+// src_stride_m420 is row planar. Normally this will be the width in pixels.
+//   The UV plane is half width, but 2 values, so src_stride_m420 applies to
+//   this as well as the two Y planes.
+static int X420ToI420(const uint8* src_y,
+                      int src_stride_y0, int src_stride_y1,
+                      const uint8* src_uv, int src_stride_uv,
+                      uint8* dst_y, int dst_stride_y,
+                      uint8* dst_u, int dst_stride_u,
+                      uint8* dst_v, int dst_stride_v,
+                      int width, int height) {
+  int y;
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
+      SplitUVRow_C;
+  if (!src_y || !src_uv ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    dst_y = dst_y + (height - 1) * dst_stride_y;
+    dst_u = dst_u + (halfheight - 1) * dst_stride_u;
+    dst_v = dst_v + (halfheight - 1) * dst_stride_v;
+    dst_stride_y = -dst_stride_y;
+    dst_stride_u = -dst_stride_u;
+    dst_stride_v = -dst_stride_v;
+  }
+  // Coalesce rows.
+  if (src_stride_y0 == width &&
+      src_stride_y1 == width &&
+      dst_stride_y == width) {
+    width *= height;
+    height = 1;
+    src_stride_y0 = src_stride_y1 = dst_stride_y = 0;
+  }
+  // Coalesce rows.
+  if (src_stride_uv == halfwidth * 2 &&
+      dst_stride_u == halfwidth &&
+      dst_stride_v == halfwidth) {
+    halfwidth *= halfheight;
+    halfheight = 1;
+    src_stride_uv = dst_stride_u = dst_stride_v = 0;
+  }
+#if defined(HAS_SPLITUVROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+    SplitUVRow = SplitUVRow_Any_SSE2;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      SplitUVRow = SplitUVRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
+          IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
+          IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
+        SplitUVRow = SplitUVRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_SPLITUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+    SplitUVRow = SplitUVRow_Any_AVX2;
+    if (IS_ALIGNED(halfwidth, 32)) {
+      SplitUVRow = SplitUVRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_SPLITUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+    SplitUVRow = SplitUVRow_Any_NEON;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      SplitUVRow = SplitUVRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
+    SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2;
+      if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
+          IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
+          IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
+        SplitUVRow = SplitUVRow_MIPS_DSPR2;
+      }
+    }
+  }
+#endif
+
+  if (dst_y) {
+    if (src_stride_y0 == src_stride_y1) {
+      CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height);
+    } else {
+      CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
+                 width, height);
+    }
+  }
+
+  for (y = 0; y < halfheight; ++y) {
+    // Copy a row of UV.
+    SplitUVRow(src_uv, dst_u, dst_v, halfwidth);
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+    src_uv += src_stride_uv;
+  }
+  return 0;
+}
+
+// Convert NV12 to I420.
+LIBYUV_API
+int NV12ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_uv, int src_stride_uv,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  return X420ToI420(src_y, src_stride_y, src_stride_y,
+                    src_uv, src_stride_uv,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    width, height);
+}
+
+// Convert NV21 to I420.  Same as NV12 but u and v pointers swapped.
+LIBYUV_API
+int NV21ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_vu, int src_stride_vu,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  return X420ToI420(src_y, src_stride_y, src_stride_y,
+                    src_vu, src_stride_vu,
+                    dst_y, dst_stride_y,
+                    dst_v, dst_stride_v,
+                    dst_u, dst_stride_u,
+                    width, height);
+}
+
+// Convert M420 to I420.
+LIBYUV_API
+int M420ToI420(const uint8* src_m420, int src_stride_m420,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
+                    src_m420 + src_stride_m420 * 2, src_stride_m420 * 3,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    width, height);
+}
+
+// Convert Q420 to I420.
+// Format is rows of YY/YUYV
+LIBYUV_API
+int Q420ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  int halfheight;
+  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+  void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
+      int pix) = YUY2ToUV422Row_C;
+  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
+      YUY2ToYRow_C;
+  if (!src_y || !src_yuy2 ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    dst_y = dst_y + (height - 1) * dst_stride_y;
+    dst_u = dst_u + (halfheight - 1) * dst_stride_u;
+    dst_v = dst_v + (halfheight - 1) * dst_stride_v;
+    dst_stride_y = -dst_stride_y;
+    dst_stride_u = -dst_stride_u;
+    dst_stride_v = -dst_stride_v;
+  }
+  // CopyRow for rows of just Y in Q420 copied to Y plane of I420.
+#if defined(HAS_COPYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+    CopyRow = CopyRow_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_X86)
+  if (IS_ALIGNED(width, 4)) {
+    CopyRow = CopyRow_X86;
+  }
+#endif
+#if defined(HAS_COPYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
+      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    CopyRow = CopyRow_SSE2;
+  }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+  if (TestCpuFlag(kCpuHasERMS)) {
+    CopyRow = CopyRow_ERMS;
+  }
+#endif
+#if defined(HAS_COPYROW_MIPS)
+  if (TestCpuFlag(kCpuHasMIPS)) {
+    CopyRow = CopyRow_MIPS;
+  }
+#endif
+
+#if defined(HAS_YUY2TOYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
+    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
+      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
+        YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          YUY2ToYRow = YUY2ToYRow_SSE2;
+        }
+      }
+    }
+  }
+#endif
+#if defined(HAS_YUY2TOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
+    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
+      YUY2ToYRow = YUY2ToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_YUY2TOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    YUY2ToYRow = YUY2ToYRow_Any_NEON;
+    if (width >= 16) {
+      YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
+    }
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToYRow = YUY2ToYRow_NEON;
+      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    CopyRow(src_y, dst_y, width);
+    src_y += src_stride_y;
+    dst_y += dst_stride_y;
+
+    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
+    YUY2ToYRow(src_yuy2, dst_y, width);
+    src_yuy2 += src_stride_yuy2;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    CopyRow(src_y, dst_y, width);
+    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
+  }
+  return 0;
+}
+
+// Convert YUY2 to I420.
+LIBYUV_API
+int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
+      uint8* dst_u, uint8* dst_v, int pix) = YUY2ToUVRow_C;
+  void (*YUY2ToYRow)(const uint8* src_yuy2,
+      uint8* dst_y, int pix) = YUY2ToYRow_C;
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+    src_stride_yuy2 = -src_stride_yuy2;
+  }
+#if defined(HAS_YUY2TOYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
+    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
+      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
+        YUY2ToUVRow = YUY2ToUVRow_SSE2;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          YUY2ToYRow = YUY2ToYRow_SSE2;
+        }
+      }
+    }
+  }
+#endif
+#if defined(HAS_YUY2TOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    YUY2ToUVRow = YUY2ToUVRow_Any_AVX2;
+    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      YUY2ToUVRow = YUY2ToUVRow_AVX2;
+      YUY2ToYRow = YUY2ToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_YUY2TOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    YUY2ToYRow = YUY2ToYRow_Any_NEON;
+    if (width >= 16) {
+      YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
+    }
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToYRow = YUY2ToYRow_NEON;
+      YUY2ToUVRow = YUY2ToUVRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width);
+    YUY2ToYRow(src_yuy2, dst_y, width);
+    YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
+    src_yuy2 += src_stride_yuy2 * 2;
+    dst_y += dst_stride_y * 2;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width);
+    YUY2ToYRow(src_yuy2, dst_y, width);
+  }
+  return 0;
+}
+
+// Convert UYVY to I420.
+LIBYUV_API
+int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
+      uint8* dst_u, uint8* dst_v, int pix) = UYVYToUVRow_C;
+  void (*UYVYToYRow)(const uint8* src_uyvy,
+      uint8* dst_y, int pix) = UYVYToYRow_C;
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
+    src_stride_uyvy = -src_stride_uyvy;
+  }
+#if defined(HAS_UYVYTOYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    UYVYToUVRow = UYVYToUVRow_Any_SSE2;
+    UYVYToYRow = UYVYToYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
+      UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
+        UYVYToUVRow = UYVYToUVRow_SSE2;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          UYVYToYRow = UYVYToYRow_SSE2;
+        }
+      }
+    }
+  }
+#endif
+#if defined(HAS_UYVYTOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    UYVYToUVRow = UYVYToUVRow_Any_AVX2;
+    UYVYToYRow = UYVYToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      UYVYToUVRow = UYVYToUVRow_AVX2;
+      UYVYToYRow = UYVYToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_UYVYTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    UYVYToYRow = UYVYToYRow_Any_NEON;
+    if (width >= 16) {
+      UYVYToUVRow = UYVYToUVRow_Any_NEON;
+    }
+    if (IS_ALIGNED(width, 16)) {
+      UYVYToYRow = UYVYToYRow_NEON;
+      UYVYToUVRow = UYVYToUVRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
+    UYVYToYRow(src_uyvy, dst_y, width);
+    UYVYToYRow(src_uyvy + src_stride_uyvy, dst_y + dst_stride_y, width);
+    src_uyvy += src_stride_uyvy * 2;
+    dst_y += dst_stride_y * 2;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width);
+    UYVYToYRow(src_uyvy, dst_y, width);
+  }
+  return 0;
+}
+
+// Convert ARGB to I420.
+LIBYUV_API
+int ARGBToI420(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  if (!src_argb ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVRow = ARGBToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          ARGBToYRow = ARGBToYRow_SSSE3;
+        }
+      }
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
+    ARGBToYRow(src_argb, dst_y, width);
+    ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+    src_argb += src_stride_argb * 2;
+    dst_y += dst_stride_y * 2;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
+    ARGBToYRow(src_argb, dst_y, width);
+  }
+  return 0;
+}
+
+// Convert BGRA to I420.
+LIBYUV_API
+int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
+      uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
+  void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
+      BGRAToYRow_C;
+  if (!src_bgra ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_bgra = src_bgra + (height - 1) * src_stride_bgra;
+    src_stride_bgra = -src_stride_bgra;
+  }
+#if defined(HAS_BGRATOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
+    BGRAToYRow = BGRAToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3;
+      BGRAToYRow = BGRAToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16)) {
+        BGRAToUVRow = BGRAToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          BGRAToYRow = BGRAToYRow_SSSE3;
+        }
+      }
+    }
+  }
+#elif defined(HAS_BGRATOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    BGRAToYRow = BGRAToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      BGRAToYRow = BGRAToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_BGRATOUVROW_NEON)
+    if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+      BGRAToUVRow = BGRAToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        BGRAToUVRow = BGRAToUVRow_NEON;
+      }
+    }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
+    BGRAToYRow(src_bgra, dst_y, width);
+    BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width);
+    src_bgra += src_stride_bgra * 2;
+    dst_y += dst_stride_y * 2;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width);
+    BGRAToYRow(src_bgra, dst_y, width);
+  }
+  return 0;
+}
+
+// Convert ABGR to I420.
+LIBYUV_API
+int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
+      uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
+  void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
+      ABGRToYRow_C;
+  if (!src_abgr ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_abgr = src_abgr + (height - 1) * src_stride_abgr;
+    src_stride_abgr = -src_stride_abgr;
+  }
+#if defined(HAS_ABGRTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
+    ABGRToYRow = ABGRToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3;
+      ABGRToYRow = ABGRToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16)) {
+        ABGRToUVRow = ABGRToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          ABGRToYRow = ABGRToYRow_SSSE3;
+        }
+      }
+    }
+  }
+#elif defined(HAS_ABGRTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ABGRToYRow = ABGRToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ABGRToYRow = ABGRToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ABGRTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ABGRToUVRow = ABGRToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ABGRToUVRow = ABGRToUVRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
+    ABGRToYRow(src_abgr, dst_y, width);
+    ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
+    src_abgr += src_stride_abgr * 2;
+    dst_y += dst_stride_y * 2;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width);
+    ABGRToYRow(src_abgr, dst_y, width);
+  }
+  return 0;
+}
+
+// Convert RGBA to I420.
+LIBYUV_API
+int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
+      uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
+  void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
+      RGBAToYRow_C;
+  if (!src_rgba ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_rgba = src_rgba + (height - 1) * src_stride_rgba;
+    src_stride_rgba = -src_stride_rgba;
+  }
+#if defined(HAS_RGBATOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
+    RGBAToYRow = RGBAToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3;
+      RGBAToYRow = RGBAToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16)) {
+        RGBAToUVRow = RGBAToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          RGBAToYRow = RGBAToYRow_SSSE3;
+        }
+      }
+    }
+  }
+#elif defined(HAS_RGBATOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGBAToYRow = RGBAToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGBAToYRow = RGBAToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_RGBATOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    RGBAToUVRow = RGBAToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      RGBAToUVRow = RGBAToUVRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
+    RGBAToYRow(src_rgba, dst_y, width);
+    RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width);
+    src_rgba += src_stride_rgba * 2;
+    dst_y += dst_stride_y * 2;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width);
+    RGBAToYRow(src_rgba, dst_y, width);
+  }
+  return 0;
+}
+
+// Convert RGB24 to I420.
+LIBYUV_API
+int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
+                uint8* dst_y, int dst_stride_y,
+                uint8* dst_u, int dst_stride_u,
+                uint8* dst_v, int dst_stride_v,
+                int width, int height) {
+  int y;
+#if defined(HAS_RGB24TOYROW_NEON)
+  void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
+      uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
+  void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) =
+      RGB24ToYRow_C;
+#else
+  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      RGB24ToARGBRow_C;
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#endif
+  if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
+    src_stride_rgb24 = -src_stride_rgb24;
+  }
+
+#if defined(HAS_RGB24TOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB24ToYRow = RGB24ToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB24ToYRow = RGB24ToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_RGB24TOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24ToUVRow = RGB24ToUVRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#endif  // HAS_ARGBTOUVROW_SSSE3
+
+  {
+#if !defined(HAS_RGB24TOYROW_NEON)
+    // Allocate 2 rows of ARGB.
+    const int kRowSize = (width * 4 + 15) & ~15;
+    align_buffer_64(row, kRowSize * 2);
+#endif
+
+    for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_RGB24TOYROW_NEON)
+      RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
+      RGB24ToYRow(src_rgb24, dst_y, width);
+      RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
+#else
+      RGB24ToARGBRow(src_rgb24, row, width);
+      RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
+      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+      src_rgb24 += src_stride_rgb24 * 2;
+      dst_y += dst_stride_y * 2;
+      dst_u += dst_stride_u;
+      dst_v += dst_stride_v;
+    }
+    if (height & 1) {
+#if defined(HAS_RGB24TOYROW_NEON)
+      RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
+      RGB24ToYRow(src_rgb24, dst_y, width);
+#else
+      RGB24ToARGBRow(src_rgb24, row, width);
+      ARGBToUVRow(row, 0, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+#endif
+    }
+#if !defined(HAS_RGB24TOYROW_NEON)
+    free_aligned_buffer_64(row);
+#endif
+  }
+  return 0;
+}
+
+// Convert RAW to I420.
+LIBYUV_API
+int RAWToI420(const uint8* src_raw, int src_stride_raw,
+              uint8* dst_y, int dst_stride_y,
+              uint8* dst_u, int dst_stride_u,
+              uint8* dst_v, int dst_stride_v,
+              int width, int height) {
+  int y;
+#if defined(HAS_RAWTOYROW_NEON)
+  void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw,
+      uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C;
+  void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) =
+      RAWToYRow_C;
+#else
+  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      RAWToARGBRow_C;
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#endif
+  if (!src_raw || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_raw = src_raw + (height - 1) * src_stride_raw;
+    src_stride_raw = -src_stride_raw;
+  }
+
+#if defined(HAS_RAWTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RAWToYRow = RAWToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RAWToYRow = RAWToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_RAWTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    RAWToUVRow = RAWToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      RAWToUVRow = RAWToUVRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_RAWTOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RAWToARGBRow = RAWToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#endif  // HAS_ARGBTOUVROW_SSSE3
+
+  {
+    // Allocate 2 rows of ARGB.
+    const int kRowSize = (width * 4 + 15) & ~15;
+    align_buffer_64(row, kRowSize * 2);
+
+    for (y = 0; y < height - 1; y += 2) {
+  #if defined(HAS_RAWTOYROW_NEON)
+      RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
+      RAWToYRow(src_raw, dst_y, width);
+      RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
+  #else
+      RAWToARGBRow(src_raw, row, width);
+      RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
+      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+  #endif
+      src_raw += src_stride_raw * 2;
+      dst_y += dst_stride_y * 2;
+      dst_u += dst_stride_u;
+      dst_v += dst_stride_v;
+    }
+    if (height & 1) {
+  #if defined(HAS_RAWTOYROW_NEON)
+      RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
+      RAWToYRow(src_raw, dst_y, width);
+  #else
+      RAWToARGBRow(src_raw, row, width);
+      ARGBToUVRow(row, 0, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+  #endif
+    }
+  #if !defined(HAS_RAWTOYROW_NEON)
+    free_aligned_buffer_64(row);
+  #endif
+  }
+  return 0;
+}
+
+// Convert RGB565 to I420.
+LIBYUV_API
+int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
+                 uint8* dst_y, int dst_stride_y,
+                 uint8* dst_u, int dst_stride_u,
+                 uint8* dst_v, int dst_stride_v,
+                 int width, int height) {
+  int y;
+#if defined(HAS_RGB565TOYROW_NEON)
+  void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
+      uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C;
+  void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) =
+      RGB565ToYRow_C;
+#else
+  void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      RGB565ToARGBRow_C;
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#endif
+  if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
+    src_stride_rgb565 = -src_stride_rgb565;
+  }
+
+#if defined(HAS_RGB565TOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB565ToYRow = RGB565ToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToYRow = RGB565ToYRow_NEON;
+    }
+    if (width >= 16) {
+      RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        RGB565ToUVRow = RGB565ToUVRow_NEON;
+      }
+    }
+  }
+#else  // HAS_RGB565TOYROW_NEON
+
+#if defined(HAS_RGB565TOARGBROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#endif  // HAS_ARGBTOUVROW_SSSE3
+#endif  // HAS_RGB565TOYROW_NEON
+
+  {
+#if !defined(HAS_RGB565TOYROW_NEON)
+    // Allocate 2 rows of ARGB.
+    const int kRowSize = (width * 4 + 15) & ~15;
+    align_buffer_64(row, kRowSize * 2);
+#endif
+
+    for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_RGB565TOYROW_NEON)
+      RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
+      RGB565ToYRow(src_rgb565, dst_y, width);
+      RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
+#else
+      RGB565ToARGBRow(src_rgb565, row, width);
+      RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
+      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+      src_rgb565 += src_stride_rgb565 * 2;
+      dst_y += dst_stride_y * 2;
+      dst_u += dst_stride_u;
+      dst_v += dst_stride_v;
+    }
+    if (height & 1) {
+#if defined(HAS_RGB565TOYROW_NEON)
+      RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
+      RGB565ToYRow(src_rgb565, dst_y, width);
+#else
+      RGB565ToARGBRow(src_rgb565, row, width);
+      ARGBToUVRow(row, 0, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+#endif
+    }
+#if !defined(HAS_RGB565TOYROW_NEON)
+    free_aligned_buffer_64(row);
+#endif
+  }
+  return 0;
+}
+
+// Convert ARGB1555 to I420.
+LIBYUV_API
+int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
+                   uint8* dst_y, int dst_stride_y,
+                   uint8* dst_u, int dst_stride_u,
+                   uint8* dst_v, int dst_stride_v,
+                   int width, int height) {
+  int y;
+#if defined(HAS_ARGB1555TOYROW_NEON)
+  void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
+      uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
+  void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
+      ARGB1555ToYRow_C;
+#else
+  void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      ARGB1555ToARGBRow_C;
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#endif
+  if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
+    src_stride_argb1555 = -src_stride_argb1555;
+  }
+
+#if defined(HAS_ARGB1555TOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGB1555ToYRow = ARGB1555ToYRow_NEON;
+    }
+    if (width >= 16) {
+      ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
+      }
+    }
+  }
+#else  // HAS_ARGB1555TOYROW_NEON
+
+#if defined(HAS_ARGB1555TOARGBROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
+    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#endif  // HAS_ARGBTOUVROW_SSSE3
+#endif  // HAS_ARGB1555TOYROW_NEON
+
+  {
+#if !defined(HAS_ARGB1555TOYROW_NEON)
+    // Allocate 2 rows of ARGB.
+    const int kRowSize = (width * 4 + 15) & ~15;
+    align_buffer_64(row, kRowSize * 2);
+#endif
+    for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_ARGB1555TOYROW_NEON)
+      ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
+      ARGB1555ToYRow(src_argb1555, dst_y, width);
+      ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
+                     width);
+#else
+      ARGB1555ToARGBRow(src_argb1555, row, width);
+      ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
+                        width);
+      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+      src_argb1555 += src_stride_argb1555 * 2;
+      dst_y += dst_stride_y * 2;
+      dst_u += dst_stride_u;
+      dst_v += dst_stride_v;
+    }
+    if (height & 1) {
+#if defined(HAS_ARGB1555TOYROW_NEON)
+      ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
+      ARGB1555ToYRow(src_argb1555, dst_y, width);
+#else
+      ARGB1555ToARGBRow(src_argb1555, row, width);
+      ARGBToUVRow(row, 0, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+#endif
+    }
+#if !defined(HAS_ARGB1555TOYROW_NEON)
+  free_aligned_buffer_64(row);
+#endif
+  }
+  return 0;
+}
+
+// Convert ARGB4444 to I420.
+LIBYUV_API
+int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
+                   uint8* dst_y, int dst_stride_y,
+                   uint8* dst_u, int dst_stride_u,
+                   uint8* dst_v, int dst_stride_v,
+                   int width, int height) {
+  int y;
+#if defined(HAS_ARGB4444TOYROW_NEON)
+  void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
+      uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
+  void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
+      ARGB4444ToYRow_C;
+#else
+  void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      ARGB4444ToARGBRow_C;
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+#endif
+  if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
+    src_stride_argb4444 = -src_stride_argb4444;
+  }
+
+#if defined(HAS_ARGB4444TOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGB4444ToYRow = ARGB4444ToYRow_NEON;
+    }
+    if (width >= 16) {
+      ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
+      }
+    }
+  }
+#else  // HAS_ARGB4444TOYROW_NEON
+
+#if defined(HAS_ARGB4444TOARGBROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
+    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#endif  // HAS_ARGBTOUVROW_SSSE3
+#endif  // HAS_ARGB4444TOYROW_NEON
+
+  {
+#if !defined(HAS_ARGB4444TOYROW_NEON)
+    // Allocate 2 rows of ARGB.
+    const int kRowSize = (width * 4 + 15) & ~15;
+    align_buffer_64(row, kRowSize * 2);
+#endif
+
+    for (y = 0; y < height - 1; y += 2) {
+#if defined(HAS_ARGB4444TOYROW_NEON)
+      ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
+      ARGB4444ToYRow(src_argb4444, dst_y, width);
+      ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
+                     width);
+#else
+      ARGB4444ToARGBRow(src_argb4444, row, width);
+      ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
+                        width);
+      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+      src_argb4444 += src_stride_argb4444 * 2;
+      dst_y += dst_stride_y * 2;
+      dst_u += dst_stride_u;
+      dst_v += dst_stride_v;
+    }
+    if (height & 1) {
+#if defined(HAS_ARGB4444TOYROW_NEON)
+      ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
+      ARGB4444ToYRow(src_argb4444, dst_y, width);
+#else
+      ARGB4444ToARGBRow(src_argb4444, row, width);
+      ARGBToUVRow(row, 0, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+#endif
+    }
+#if !defined(HAS_ARGB4444TOYROW_NEON)
+    free_aligned_buffer_64(row);
+#endif
+  }
+  return 0;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/convert_argb.cc b/src/main/jni/libyuv/source/convert_argb.cc
new file mode 100644
index 000000000..ac0bc3d15
--- /dev/null
+++ b/src/main/jni/libyuv/source/convert_argb.cc
@@ -0,0 +1,938 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert_argb.h"
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/format_conversion.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "libyuv/rotate_argb.h"
+#include "libyuv/row.h"
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Copy ARGB with optional flipping
+LIBYUV_API
+int ARGBCopy(const uint8* src_argb, int src_stride_argb,
+             uint8* dst_argb, int dst_stride_argb,
+             int width, int height) {
+  if (!src_argb || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+
+  CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
+            width * 4, height);
+  return 0;
+}
+
+// Convert I444 to ARGB.
+LIBYUV_API
+int I444ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*I444ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I444ToARGBRow_C;
+  if (!src_y || !src_u || !src_v ||
+      !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u == width &&
+      src_stride_v == width &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
+  }
+#if defined(HAS_I444TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I444ToARGBRow = I444ToARGBRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_I444TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I444ToARGBRow = I444ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I444ToARGBRow = I444ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I422 to ARGB.
+LIBYUV_API
+int I422ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*I422ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToARGBRow_C;
+  if (!src_y || !src_u || !src_v ||
+      !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u * 2 == width &&
+      src_stride_v * 2 == width &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
+  }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I422ToARGBRow = I422ToARGBRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToARGBRow = I422ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I411 to ARGB.
+LIBYUV_API
+int I411ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*I411ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I411ToARGBRow_C;
+  if (!src_y || !src_u || !src_v ||
+      !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u * 4 == width &&
+      src_stride_v * 4 == width &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
+  }
+#if defined(HAS_I411TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I411ToARGBRow = I411ToARGBRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_I411TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I411ToARGBRow = I411ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I411ToARGBRow = I411ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I400 to ARGB.
+LIBYUV_API
+int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
+                         uint8* dst_argb, int dst_stride_argb,
+                         int width, int height) {
+  int y;
+  void (*YToARGBRow)(const uint8* y_buf,
+                     uint8* rgb_buf,
+                     int width) = YToARGBRow_C;
+  if (!src_y || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = dst_stride_argb = 0;
+  }
+#if defined(HAS_YTOARGBROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    YToARGBRow = YToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      YToARGBRow = YToARGBRow_SSE2;
+    }
+  }
+#elif defined(HAS_YTOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    YToARGBRow = YToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      YToARGBRow = YToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    YToARGBRow(src_y, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+  }
+  return 0;
+}
+
+// Convert I400 to ARGB.
+LIBYUV_API
+int I400ToARGB(const uint8* src_y, int src_stride_y,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
+      I400ToARGBRow_C;
+  if (!src_y || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_stride_y = -src_stride_y;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = dst_stride_argb = 0;
+  }
+#if defined(HAS_I400TOARGBROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
+    I400ToARGBRow = I400ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I400ToARGBRow = I400ToARGBRow_SSE2;
+      }
+    }
+  }
+#elif defined(HAS_I400TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I400ToARGBRow = I400ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I400ToARGBRow = I400ToARGBRow_NEON;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    I400ToARGBRow(src_y, dst_argb, width);
+    src_y += src_stride_y;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Shuffle table for converting BGRA to ARGB.
+static uvec8 kShuffleMaskBGRAToARGB = {
+  3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
+};
+
+// Shuffle table for converting ABGR to ARGB.
+static uvec8 kShuffleMaskABGRToARGB = {
+  2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
+};
+
+// Shuffle table for converting RGBA to ARGB.
+static uvec8 kShuffleMaskRGBAToARGB = {
+  1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
+};
+
+// Convert BGRA to ARGB.
+LIBYUV_API
+int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return ARGBShuffle(src_bgra, src_stride_bgra,
+                     dst_argb, dst_stride_argb,
+                     (const uint8*)(&kShuffleMaskBGRAToARGB),
+                     width, height);
+}
+
+// Convert ARGB to BGRA (same as BGRAToARGB).
+LIBYUV_API
+int ARGBToBGRA(const uint8* src_bgra, int src_stride_bgra,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return ARGBShuffle(src_bgra, src_stride_bgra,
+                     dst_argb, dst_stride_argb,
+                     (const uint8*)(&kShuffleMaskBGRAToARGB),
+                     width, height);
+}
+
+// Convert ABGR to ARGB.
+LIBYUV_API
+int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return ARGBShuffle(src_abgr, src_stride_abgr,
+                     dst_argb, dst_stride_argb,
+                     (const uint8*)(&kShuffleMaskABGRToARGB),
+                     width, height);
+}
+
+// Convert ARGB to ABGR to (same as ABGRToARGB).
+LIBYUV_API
+int ARGBToABGR(const uint8* src_abgr, int src_stride_abgr,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return ARGBShuffle(src_abgr, src_stride_abgr,
+                     dst_argb, dst_stride_argb,
+                     (const uint8*)(&kShuffleMaskABGRToARGB),
+                     width, height);
+}
+
+// Convert RGBA to ARGB.
+LIBYUV_API
+int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return ARGBShuffle(src_rgba, src_stride_rgba,
+                     dst_argb, dst_stride_argb,
+                     (const uint8*)(&kShuffleMaskRGBAToARGB),
+                     width, height);
+}
+
+// Convert RGB24 to ARGB.
+LIBYUV_API
+int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height) {
+  int y;
+  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      RGB24ToARGBRow_C;
+  if (!src_rgb24 || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
+    src_stride_rgb24 = -src_stride_rgb24;
+  }
+  // Coalesce rows.
+  if (src_stride_rgb24 == width * 3 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_rgb24 = dst_stride_argb = 0;
+  }
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+    }
+  }
+#elif defined(HAS_RGB24TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB24ToARGBRow = RGB24ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    RGB24ToARGBRow(src_rgb24, dst_argb, width);
+    src_rgb24 += src_stride_rgb24;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert RAW to ARGB.
+LIBYUV_API
+int RAWToARGB(const uint8* src_raw, int src_stride_raw,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height) {
+  int y;
+  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+      RAWToARGBRow_C;
+  if (!src_raw || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_raw = src_raw + (height - 1) * src_stride_raw;
+    src_stride_raw = -src_stride_raw;
+  }
+  // Coalesce rows.
+  if (src_stride_raw == width * 3 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_raw = dst_stride_argb = 0;
+  }
+#if defined(HAS_RAWTOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      RAWToARGBRow = RAWToARGBRow_SSSE3;
+    }
+  }
+#elif defined(HAS_RAWTOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RAWToARGBRow = RAWToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RAWToARGBRow = RAWToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    RAWToARGBRow(src_raw, dst_argb, width);
+    src_raw += src_stride_raw;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert RGB565 to ARGB.
+LIBYUV_API
+int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
+                 uint8* dst_argb, int dst_stride_argb,
+                 int width, int height) {
+  int y;
+  void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
+      RGB565ToARGBRow_C;
+  if (!src_rgb565 || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
+    src_stride_rgb565 = -src_stride_rgb565;
+  }
+  // Coalesce rows.
+  if (src_stride_rgb565 == width * 2 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_rgb565 = dst_stride_argb = 0;
+  }
+#if defined(HAS_RGB565TOARGBROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
+    }
+  }
+#elif defined(HAS_RGB565TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      RGB565ToARGBRow = RGB565ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    RGB565ToARGBRow(src_rgb565, dst_argb, width);
+    src_rgb565 += src_stride_rgb565;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert ARGB1555 to ARGB.
+LIBYUV_API
+int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
+                   uint8* dst_argb, int dst_stride_argb,
+                   int width, int height) {
+  int y;
+  void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
+      int pix) = ARGB1555ToARGBRow_C;
+  if (!src_argb1555 || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
+    src_stride_argb1555 = -src_stride_argb1555;
+  }
+  // Coalesce rows.
+  if (src_stride_argb1555 == width * 2 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb1555 = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGB1555TOARGBROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
+    }
+  }
+#elif defined(HAS_ARGB1555TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
+    src_argb1555 += src_stride_argb1555;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert ARGB4444 to ARGB.
+LIBYUV_API
+int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
+                   uint8* dst_argb, int dst_stride_argb,
+                   int width, int height) {
+  int y;
+  void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
+      int pix) = ARGB4444ToARGBRow_C;
+  if (!src_argb4444 || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
+    src_stride_argb4444 = -src_stride_argb4444;
+  }
+  // Coalesce rows.
+  if (src_stride_argb4444 == width * 2 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb4444 = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGB4444TOARGBROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
+    }
+  }
+#elif defined(HAS_ARGB4444TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
+    src_argb4444 += src_stride_argb4444;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert NV12 to ARGB.
+LIBYUV_API
+int NV12ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_uv, int src_stride_uv,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*NV12ToARGBRow)(const uint8* y_buf,
+                        const uint8* uv_buf,
+                        uint8* rgb_buf,
+                        int width) = NV12ToARGBRow_C;
+  if (!src_y || !src_uv || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_NV12TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        NV12ToARGBRow = NV12ToARGBRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_NV12TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      NV12ToARGBRow = NV12ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    NV12ToARGBRow(src_y, src_uv, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_uv += src_stride_uv;
+    }
+  }
+  return 0;
+}
+
+// Convert NV21 to ARGB.
+LIBYUV_API
+int NV21ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_uv, int src_stride_uv,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*NV21ToARGBRow)(const uint8* y_buf,
+                        const uint8* uv_buf,
+                        uint8* rgb_buf,
+                        int width) = NV21ToARGBRow_C;
+  if (!src_y || !src_uv || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_NV21TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        NV21ToARGBRow = NV21ToARGBRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_NV21TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      NV21ToARGBRow = NV21ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    NV21ToARGBRow(src_y, src_uv, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_uv += src_stride_uv;
+    }
+  }
+  return 0;
+}
+
+// Convert M420 to ARGB.
+LIBYUV_API
+int M420ToARGB(const uint8* src_m420, int src_stride_m420,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*NV12ToARGBRow)(const uint8* y_buf,
+                        const uint8* uv_buf,
+                        uint8* rgb_buf,
+                        int width) = NV12ToARGBRow_C;
+  if (!src_m420 || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_NV12TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        NV12ToARGBRow = NV12ToARGBRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_NV12TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      NV12ToARGBRow = NV12ToARGBRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+    NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
+                  dst_argb + dst_stride_argb, width);
+    dst_argb += dst_stride_argb * 2;
+    src_m420 += src_stride_m420 * 3;
+  }
+  if (height & 1) {
+    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+  }
+  return 0;
+}
+
+// Convert YUY2 to ARGB.
+LIBYUV_API
+int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
+      YUY2ToARGBRow_C;
+  if (!src_yuy2 || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+    src_stride_yuy2 = -src_stride_yuy2;
+  }
+  // Coalesce rows.
+  if (src_stride_yuy2 == width * 2 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_yuy2 = dst_stride_argb = 0;
+  }
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+  // Posix is 16, Windows is 8.
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_YUY2TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      YUY2ToARGBRow = YUY2ToARGBRow_NEON;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    YUY2ToARGBRow(src_yuy2, dst_argb, width);
+    src_yuy2 += src_stride_yuy2;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert UYVY to ARGB.
+LIBYUV_API
+int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
+      UYVYToARGBRow_C;
+  if (!src_uyvy || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
+    src_stride_uyvy = -src_stride_uyvy;
+  }
+  // Coalesce rows.
+  if (src_stride_uyvy == width * 2 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_uyvy = dst_stride_argb = 0;
+  }
+#if defined(HAS_UYVYTOARGBROW_SSSE3)
+  // Posix is 16, Windows is 8.
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        UYVYToARGBRow = UYVYToARGBRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_UYVYTOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      UYVYToARGBRow = UYVYToARGBRow_NEON;
+    }
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    UYVYToARGBRow(src_uyvy, dst_argb, width);
+    src_uyvy += src_stride_uyvy;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/convert_from.cc b/src/main/jni/libyuv/source/convert_from.cc
new file mode 100644
index 000000000..c1a2f62f0
--- /dev/null
+++ b/src/main/jni/libyuv/source/convert_from.cc
@@ -0,0 +1,1210 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert_from.h"
+
+#include "libyuv/basic_types.h"
+#include "libyuv/convert.h"  // For I420Copy
+#include "libyuv/cpu_id.h"
+#include "libyuv/format_conversion.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/rotate.h"
+#include "libyuv/scale.h"  // For ScalePlane()
+#include "libyuv/video_common.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
+static __inline int Abs(int v) {
+  return v >= 0 ? v : -v;
+}
+
+// I420 To any I4xx YUV format with mirroring.
+static int I420ToI4xx(const uint8* src_y, int src_stride_y,
+                      const uint8* src_u, int src_stride_u,
+                      const uint8* src_v, int src_stride_v,
+                      uint8* dst_y, int dst_stride_y,
+                      uint8* dst_u, int dst_stride_u,
+                      uint8* dst_v, int dst_stride_v,
+                      int src_y_width, int src_y_height,
+                      int dst_uv_width, int dst_uv_height) {
+  const int dst_y_width = Abs(src_y_width);
+  const int dst_y_height = Abs(src_y_height);
+  const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
+  const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
+  if (src_y_width == 0 || src_y_height == 0 ||
+      dst_uv_width <= 0 || dst_uv_height <= 0) {
+    return -1;
+  }
+  ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
+             dst_y, dst_stride_y, dst_y_width, dst_y_height,
+             kFilterBilinear);
+  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
+             dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
+             kFilterBilinear);
+  ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
+             dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
+             kFilterBilinear);
+  return 0;
+}
+
+// 420 chroma is 1/2 width, 1/2 height
+// 422 chroma is 1/2 width, 1x height
+LIBYUV_API
+int I420ToI422(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  const int dst_uv_width = (Abs(width) + 1) >> 1;
+  const int dst_uv_height = Abs(height);
+  return I420ToI4xx(src_y, src_stride_y,
+                    src_u, src_stride_u,
+                    src_v, src_stride_v,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    width, height,
+                    dst_uv_width, dst_uv_height);
+}
+
+// 420 chroma is 1/2 width, 1/2 height
+// 444 chroma is 1x width, 1x height
+LIBYUV_API
+int I420ToI444(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  const int dst_uv_width = Abs(width);
+  const int dst_uv_height = Abs(height);
+  return I420ToI4xx(src_y, src_stride_y,
+                    src_u, src_stride_u,
+                    src_v, src_stride_v,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    width, height,
+                    dst_uv_width, dst_uv_height);
+}
+
+// 420 chroma is 1/2 width, 1/2 height
+// 411 chroma is 1/4 width, 1x height
+LIBYUV_API
+int I420ToI411(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  const int dst_uv_width = (Abs(width) + 3) >> 2;
+  const int dst_uv_height = Abs(height);
+  return I420ToI4xx(src_y, src_stride_y,
+                    src_u, src_stride_u,
+                    src_v, src_stride_v,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_stride_u,
+                    dst_v, dst_stride_v,
+                    width, height,
+                    dst_uv_width, dst_uv_height);
+}
+
+// Copy to I400. Source can be I420,422,444,400,NV12,NV21
+LIBYUV_API
+int I400Copy(const uint8* src_y, int src_stride_y,
+             uint8* dst_y, int dst_stride_y,
+             int width, int height) {
+  if (!src_y || !dst_y ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_stride_y = -src_stride_y;
+  }
+  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  return 0;
+}
+
+LIBYUV_API
+int I422ToYUY2(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_yuy2, int dst_stride_yuy2,
+               int width, int height) {
+  int y;
+  void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
+                        const uint8* src_v, uint8* dst_yuy2, int width) =
+      I422ToYUY2Row_C;
+  if (!src_y || !src_u || !src_v || !dst_yuy2 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+    dst_stride_yuy2 = -dst_stride_yuy2;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u * 2 == width &&
+      src_stride_v * 2 == width &&
+      dst_stride_yuy2 == width * 2) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
+  }
+#if defined(HAS_I422TOYUY2ROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToYUY2Row = I422ToYUY2Row_SSE2;
+    }
+  }
+#elif defined(HAS_I422TOYUY2ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToYUY2Row = I422ToYUY2Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+    dst_yuy2 += dst_stride_yuy2;
+  }
+  return 0;
+}
+
+LIBYUV_API
+int I420ToYUY2(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_yuy2, int dst_stride_yuy2,
+               int width, int height) {
+  int y;
+  void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
+                        const uint8* src_v, uint8* dst_yuy2, int width) =
+      I422ToYUY2Row_C;
+  if (!src_y || !src_u || !src_v || !dst_yuy2 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+    dst_stride_yuy2 = -dst_stride_yuy2;
+  }
+#if defined(HAS_I422TOYUY2ROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToYUY2Row = I422ToYUY2Row_SSE2;
+    }
+  }
+#elif defined(HAS_I422TOYUY2ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToYUY2Row = I422ToYUY2Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
+    I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
+                  dst_yuy2 + dst_stride_yuy2, width);
+    src_y += src_stride_y * 2;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+    dst_yuy2 += dst_stride_yuy2 * 2;
+  }
+  if (height & 1) {
+    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
+  }
+  return 0;
+}
+
+LIBYUV_API
+int I422ToUYVY(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_uyvy, int dst_stride_uyvy,
+               int width, int height) {
+  int y;
+  void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
+                        const uint8* src_v, uint8* dst_uyvy, int width) =
+      I422ToUYVYRow_C;
+  if (!src_y || !src_u || !src_v || !dst_uyvy ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+    dst_stride_uyvy = -dst_stride_uyvy;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u * 2 == width &&
+      src_stride_v * 2 == width &&
+      dst_stride_uyvy == width * 2) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
+  }
+#if defined(HAS_I422TOUYVYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToUYVYRow = I422ToUYVYRow_SSE2;
+    }
+  }
+#elif defined(HAS_I422TOUYVYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToUYVYRow = I422ToUYVYRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+    dst_uyvy += dst_stride_uyvy;
+  }
+  return 0;
+}
+
+LIBYUV_API
+int I420ToUYVY(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_uyvy, int dst_stride_uyvy,
+               int width, int height) {
+  int y;
+  void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
+                        const uint8* src_v, uint8* dst_uyvy, int width) =
+      I422ToUYVYRow_C;
+  if (!src_y || !src_u || !src_v || !dst_uyvy ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+    dst_stride_uyvy = -dst_stride_uyvy;
+  }
+#if defined(HAS_I422TOUYVYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToUYVYRow = I422ToUYVYRow_SSE2;
+    }
+  }
+#elif defined(HAS_I422TOUYVYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToUYVYRow = I422ToUYVYRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
+    I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
+                  dst_uyvy + dst_stride_uyvy, width);
+    src_y += src_stride_y * 2;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+    dst_uyvy += dst_stride_uyvy * 2;
+  }
+  if (height & 1) {
+    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
+  }
+  return 0;
+}
+
+LIBYUV_API
+int I420ToNV12(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height) {
+  int y;
+  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+      int width) = MergeUVRow_C;
+  // Coalesce rows.
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_y || !src_u || !src_v || !dst_y || !dst_uv ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    dst_y = dst_y + (height - 1) * dst_stride_y;
+    dst_uv = dst_uv + (halfheight - 1) * dst_stride_uv;
+    dst_stride_y = -dst_stride_y;
+    dst_stride_uv = -dst_stride_uv;
+  }
+  if (src_stride_y == width &&
+      dst_stride_y == width) {
+    width *= height;
+    height = 1;
+    src_stride_y = dst_stride_y = 0;
+  }
+  // Coalesce rows.
+  if (src_stride_u == halfwidth &&
+      src_stride_v == halfwidth &&
+      dst_stride_uv == halfwidth * 2) {
+    halfwidth *= halfheight;
+    halfheight = 1;
+    src_stride_u = src_stride_v = dst_stride_uv = 0;
+  }
+#if defined(HAS_MERGEUVROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+    MergeUVRow_ = MergeUVRow_Any_SSE2;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
+          IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
+          IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
+        MergeUVRow_ = MergeUVRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_MERGEUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+    MergeUVRow_ = MergeUVRow_Any_AVX2;
+    if (IS_ALIGNED(halfwidth, 32)) {
+      MergeUVRow_ = MergeUVRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_MERGEUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+    MergeUVRow_ = MergeUVRow_Any_NEON;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUVRow_ = MergeUVRow_NEON;
+    }
+  }
+#endif
+
+  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  for (y = 0; y < halfheight; ++y) {
+    // Merge a row of U and V into a row of UV.
+    MergeUVRow_(src_u, src_v, dst_uv, halfwidth);
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+    dst_uv += dst_stride_uv;
+  }
+  return 0;
+}
+
+LIBYUV_API
+int I420ToNV21(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_vu, int dst_stride_vu,
+               int width, int height) {
+  return I420ToNV12(src_y, src_stride_y,
+                    src_v, src_stride_v,
+                    src_u, src_stride_u,
+                    dst_y, src_stride_y,
+                    dst_vu, dst_stride_vu,
+                    width, height);
+}
+
+// Convert I420 to ARGB.
+LIBYUV_API
+int I420ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*I422ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToARGBRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I422ToARGBRow = I422ToARGBRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToARGBRow = I422ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to BGRA.
+LIBYUV_API
+int I420ToBGRA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_bgra, int dst_stride_bgra,
+               int width, int height) {
+  int y;
+  void (*I422ToBGRARow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToBGRARow_C;
+  if (!src_y || !src_u || !src_v || !dst_bgra ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
+    dst_stride_bgra = -dst_stride_bgra;
+  }
+#if defined(HAS_I422TOBGRAROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
+        I422ToBGRARow = I422ToBGRARow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_I422TOBGRAROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToBGRARow = I422ToBGRARow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToBGRARow = I422ToBGRARow_NEON;
+    }
+  }
+#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
+    I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
+    dst_bgra += dst_stride_bgra;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to ABGR.
+LIBYUV_API
+int I420ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  int y;
+  void (*I422ToABGRRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToABGRRow_C;
+  if (!src_y || !src_u || !src_v || !dst_abgr ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
+    dst_stride_abgr = -dst_stride_abgr;
+  }
+#if defined(HAS_I422TOABGRROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
+        I422ToABGRRow = I422ToABGRRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_I422TOABGRROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToABGRRow = I422ToABGRRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    dst_abgr += dst_stride_abgr;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to RGBA.
+LIBYUV_API
+int I420ToRGBA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_rgba, int dst_stride_rgba,
+               int width, int height) {
+  int y;
+  void (*I422ToRGBARow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToRGBARow_C;
+  if (!src_y || !src_u || !src_v || !dst_rgba ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
+    dst_stride_rgba = -dst_stride_rgba;
+  }
+#if defined(HAS_I422TORGBAROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
+        I422ToRGBARow = I422ToRGBARow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_I422TORGBAROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToRGBARow = I422ToRGBARow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRGBARow = I422ToRGBARow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+    dst_rgba += dst_stride_rgba;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to RGB24.
+LIBYUV_API
+int I420ToRGB24(const uint8* src_y, int src_stride_y,
+                const uint8* src_u, int src_stride_u,
+                const uint8* src_v, int src_stride_v,
+                uint8* dst_rgb24, int dst_stride_rgb24,
+                int width, int height) {
+  int y;
+  void (*I422ToRGB24Row)(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* rgb_buf,
+                         int width) = I422ToRGB24Row_C;
+  if (!src_y || !src_u || !src_v || !dst_rgb24 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
+    dst_stride_rgb24 = -dst_stride_rgb24;
+  }
+#if defined(HAS_I422TORGB24ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRGB24Row = I422ToRGB24Row_SSSE3;
+    }
+  }
+#elif defined(HAS_I422TORGB24ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRGB24Row = I422ToRGB24Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
+    dst_rgb24 += dst_stride_rgb24;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to RAW.
+LIBYUV_API
+int I420ToRAW(const uint8* src_y, int src_stride_y,
+                const uint8* src_u, int src_stride_u,
+                const uint8* src_v, int src_stride_v,
+                uint8* dst_raw, int dst_stride_raw,
+                int width, int height) {
+  int y;
+  void (*I422ToRAWRow)(const uint8* y_buf,
+                       const uint8* u_buf,
+                       const uint8* v_buf,
+                       uint8* rgb_buf,
+                       int width) = I422ToRAWRow_C;
+  if (!src_y || !src_u || !src_v || !dst_raw ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_raw = dst_raw + (height - 1) * dst_stride_raw;
+    dst_stride_raw = -dst_stride_raw;
+  }
+#if defined(HAS_I422TORAWROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRAWRow = I422ToRAWRow_SSSE3;
+    }
+  }
+#elif defined(HAS_I422TORAWROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToRAWRow = I422ToRAWRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRAWRow = I422ToRAWRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
+    dst_raw += dst_stride_raw;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to ARGB1555.
+LIBYUV_API
+int I420ToARGB1555(const uint8* src_y, int src_stride_y,
+                   const uint8* src_u, int src_stride_u,
+                   const uint8* src_v, int src_stride_v,
+                   uint8* dst_argb1555, int dst_stride_argb1555,
+                   int width, int height) {
+  int y;
+  void (*I422ToARGB1555Row)(const uint8* y_buf,
+                            const uint8* u_buf,
+                            const uint8* v_buf,
+                            uint8* rgb_buf,
+                            int width) = I422ToARGB1555Row_C;
+  if (!src_y || !src_u || !src_v || !dst_argb1555 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555;
+    dst_stride_argb1555 = -dst_stride_argb1555;
+  }
+#if defined(HAS_I422TOARGB1555ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGB1555Row = I422ToARGB1555Row_SSSE3;
+    }
+  }
+#elif defined(HAS_I422TOARGB1555ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGB1555Row = I422ToARGB1555Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
+    dst_argb1555 += dst_stride_argb1555;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+
+// Convert I420 to ARGB4444.
+LIBYUV_API
+int I420ToARGB4444(const uint8* src_y, int src_stride_y,
+                   const uint8* src_u, int src_stride_u,
+                   const uint8* src_v, int src_stride_v,
+                   uint8* dst_argb4444, int dst_stride_argb4444,
+                   int width, int height) {
+  int y;
+  void (*I422ToARGB4444Row)(const uint8* y_buf,
+                            const uint8* u_buf,
+                            const uint8* v_buf,
+                            uint8* rgb_buf,
+                            int width) = I422ToARGB4444Row_C;
+  if (!src_y || !src_u || !src_v || !dst_argb4444 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444;
+    dst_stride_argb4444 = -dst_stride_argb4444;
+  }
+#if defined(HAS_I422TOARGB4444ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGB4444Row = I422ToARGB4444Row_SSSE3;
+    }
+  }
+#elif defined(HAS_I422TOARGB4444ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGB4444Row = I422ToARGB4444Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
+    dst_argb4444 += dst_stride_argb4444;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to RGB565.
+LIBYUV_API
+int I420ToRGB565(const uint8* src_y, int src_stride_y,
+                 const uint8* src_u, int src_stride_u,
+                 const uint8* src_v, int src_stride_v,
+                 uint8* dst_rgb565, int dst_stride_rgb565,
+                 int width, int height) {
+  int y;
+  void (*I422ToRGB565Row)(const uint8* y_buf,
+                          const uint8* u_buf,
+                          const uint8* v_buf,
+                          uint8* rgb_buf,
+                          int width) = I422ToRGB565Row_C;
+  if (!src_y || !src_u || !src_v || !dst_rgb565 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+    dst_stride_rgb565 = -dst_stride_rgb565;
+  }
+#if defined(HAS_I422TORGB565ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRGB565Row = I422ToRGB565Row_SSSE3;
+    }
+  }
+#elif defined(HAS_I422TORGB565ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRGB565Row = I422ToRGB565Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
+    dst_rgb565 += dst_stride_rgb565;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to specified format
+LIBYUV_API
+int ConvertFromI420(const uint8* y, int y_stride,
+                    const uint8* u, int u_stride,
+                    const uint8* v, int v_stride,
+                    uint8* dst_sample, int dst_sample_stride,
+                    int width, int height,
+                    uint32 fourcc) {
+  uint32 format = CanonicalFourCC(fourcc);
+  int r = 0;
+  if (!y || !u|| !v || !dst_sample ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  switch (format) {
+    // Single plane formats
+    case FOURCC_YUY2:
+      r = I420ToYUY2(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample,
+                     dst_sample_stride ? dst_sample_stride : width * 2,
+                     width, height);
+      break;
+    case FOURCC_UYVY:
+      r = I420ToUYVY(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample,
+                     dst_sample_stride ? dst_sample_stride : width * 2,
+                     width, height);
+      break;
+    case FOURCC_RGBP:
+      r = I420ToRGB565(y, y_stride,
+                       u, u_stride,
+                       v, v_stride,
+                       dst_sample,
+                       dst_sample_stride ? dst_sample_stride : width * 2,
+                       width, height);
+      break;
+    case FOURCC_RGBO:
+      r = I420ToARGB1555(y, y_stride,
+                         u, u_stride,
+                         v, v_stride,
+                         dst_sample,
+                         dst_sample_stride ? dst_sample_stride : width * 2,
+                         width, height);
+      break;
+    case FOURCC_R444:
+      r = I420ToARGB4444(y, y_stride,
+                         u, u_stride,
+                         v, v_stride,
+                         dst_sample,
+                         dst_sample_stride ? dst_sample_stride : width * 2,
+                         width, height);
+      break;
+    case FOURCC_24BG:
+      r = I420ToRGB24(y, y_stride,
+                      u, u_stride,
+                      v, v_stride,
+                      dst_sample,
+                      dst_sample_stride ? dst_sample_stride : width * 3,
+                      width, height);
+      break;
+    case FOURCC_RAW:
+      r = I420ToRAW(y, y_stride,
+                    u, u_stride,
+                    v, v_stride,
+                    dst_sample,
+                    dst_sample_stride ? dst_sample_stride : width * 3,
+                    width, height);
+      break;
+    case FOURCC_ARGB:
+      r = I420ToARGB(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample,
+                     dst_sample_stride ? dst_sample_stride : width * 4,
+                     width, height);
+      break;
+    case FOURCC_BGRA:
+      r = I420ToBGRA(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample,
+                     dst_sample_stride ? dst_sample_stride : width * 4,
+                     width, height);
+      break;
+    case FOURCC_ABGR:
+      r = I420ToABGR(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample,
+                     dst_sample_stride ? dst_sample_stride : width * 4,
+                     width, height);
+      break;
+    case FOURCC_RGBA:
+      r = I420ToRGBA(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample,
+                     dst_sample_stride ? dst_sample_stride : width * 4,
+                     width, height);
+      break;
+    case FOURCC_BGGR:
+      r = I420ToBayerBGGR(y, y_stride,
+                          u, u_stride,
+                          v, v_stride,
+                          dst_sample,
+                          dst_sample_stride ? dst_sample_stride : width,
+                          width, height);
+      break;
+    case FOURCC_GBRG:
+      r = I420ToBayerGBRG(y, y_stride,
+                          u, u_stride,
+                          v, v_stride,
+                          dst_sample,
+                          dst_sample_stride ? dst_sample_stride : width,
+                          width, height);
+      break;
+    case FOURCC_GRBG:
+      r = I420ToBayerGRBG(y, y_stride,
+                          u, u_stride,
+                          v, v_stride,
+                          dst_sample,
+                          dst_sample_stride ? dst_sample_stride : width,
+                          width, height);
+      break;
+    case FOURCC_RGGB:
+      r = I420ToBayerRGGB(y, y_stride,
+                          u, u_stride,
+                          v, v_stride,
+                          dst_sample,
+                          dst_sample_stride ? dst_sample_stride : width,
+                          width, height);
+      break;
+    case FOURCC_I400:
+      r = I400Copy(y, y_stride,
+                   dst_sample,
+                   dst_sample_stride ? dst_sample_stride : width,
+                   width, height);
+      break;
+    case FOURCC_NV12: {
+      uint8* dst_uv = dst_sample + width * height;
+      r = I420ToNV12(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample,
+                     dst_sample_stride ? dst_sample_stride : width,
+                     dst_uv,
+                     dst_sample_stride ? dst_sample_stride : width,
+                     width, height);
+      break;
+    }
+    case FOURCC_NV21: {
+      uint8* dst_vu = dst_sample + width * height;
+      r = I420ToNV21(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample,
+                     dst_sample_stride ? dst_sample_stride : width,
+                     dst_vu,
+                     dst_sample_stride ? dst_sample_stride : width,
+                     width, height);
+      break;
+    }
+    // TODO(fbarchard): Add M420 and Q420.
+    // Triplanar formats
+    // TODO(fbarchard): halfstride instead of halfwidth
+    case FOURCC_I420:
+    case FOURCC_YU12:
+    case FOURCC_YV12: {
+      int halfwidth = (width + 1) / 2;
+      int halfheight = (height + 1) / 2;
+      uint8* dst_u;
+      uint8* dst_v;
+      if (format == FOURCC_YV12) {
+        dst_v = dst_sample + width * height;
+        dst_u = dst_v + halfwidth * halfheight;
+      } else {
+        dst_u = dst_sample + width * height;
+        dst_v = dst_u + halfwidth * halfheight;
+      }
+      r = I420Copy(y, y_stride,
+                   u, u_stride,
+                   v, v_stride,
+                   dst_sample, width,
+                   dst_u, halfwidth,
+                   dst_v, halfwidth,
+                   width, height);
+      break;
+    }
+    case FOURCC_I422:
+    case FOURCC_YV16: {
+      int halfwidth = (width + 1) / 2;
+      uint8* dst_u;
+      uint8* dst_v;
+      if (format == FOURCC_YV16) {
+        dst_v = dst_sample + width * height;
+        dst_u = dst_v + halfwidth * height;
+      } else {
+        dst_u = dst_sample + width * height;
+        dst_v = dst_u + halfwidth * height;
+      }
+      r = I420ToI422(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample, width,
+                     dst_u, halfwidth,
+                     dst_v, halfwidth,
+                     width, height);
+      break;
+    }
+    case FOURCC_I444:
+    case FOURCC_YV24: {
+      uint8* dst_u;
+      uint8* dst_v;
+      if (format == FOURCC_YV24) {
+        dst_v = dst_sample + width * height;
+        dst_u = dst_v + width * height;
+      } else {
+        dst_u = dst_sample + width * height;
+        dst_v = dst_u + width * height;
+      }
+      r = I420ToI444(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample, width,
+                     dst_u, width,
+                     dst_v, width,
+                     width, height);
+      break;
+    }
+    case FOURCC_I411: {
+      int quarterwidth = (width + 3) / 4;
+      uint8* dst_u = dst_sample + width * height;
+      uint8* dst_v = dst_u + quarterwidth * height;
+      r = I420ToI411(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     dst_sample, width,
+                     dst_u, quarterwidth,
+                     dst_v, quarterwidth,
+                     width, height);
+      break;
+    }
+
+    // Formats not supported - MJPG, biplanar, some rgb formats.
+    default:
+      return -1;  // unknown fourcc - return failure code.
+  }
+  return r;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/convert_from_argb.cc b/src/main/jni/libyuv/source/convert_from_argb.cc
new file mode 100644
index 000000000..de461ddb0
--- /dev/null
+++ b/src/main/jni/libyuv/source/convert_from_argb.cc
@@ -0,0 +1,1133 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert_from_argb.h"
+
+#include "libyuv/basic_types.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/format_conversion.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// ARGB little endian (bgra in memory) to I444
+LIBYUV_API
+int ARGBToI444(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+      int pix) = ARGBToUV444Row_C;
+  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_y == width &&
+      dst_stride_u == width &&
+      dst_stride_v == width) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+  }
+#if defined(HAS_ARGBTOUV444ROW_SSSE3)
+    if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+      ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
+      if (IS_ALIGNED(width, 16)) {
+        ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
+        if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+          ARGBToUV444Row = ARGBToUV444Row_SSSE3;
+        }
+      }
+  }
+#elif defined(HAS_ARGBTOUV444ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToUV444Row = ARGBToUV444Row_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToUV444Row(src_argb, dst_u, dst_v, width);
+    ARGBToYRow(src_argb, dst_y, width);
+    src_argb += src_stride_argb;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
+// ARGB little endian (bgra in memory) to I422
+LIBYUV_API
+int ARGBToI422(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+      int pix) = ARGBToUV422Row_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_y == width &&
+      dst_stride_u * 2 == width &&
+      dst_stride_v * 2 == width) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+  }
+#if defined(HAS_ARGBTOUV422ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUV422Row = ARGBToUV422Row_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUV422Row = ARGBToUV422Row_NEON;
+    }
+  }
+#endif
+
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToUV422Row(src_argb, dst_u, dst_v, width);
+    ARGBToYRow(src_argb, dst_y, width);
+    src_argb += src_stride_argb;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
+// ARGB little endian (bgra in memory) to I411
+LIBYUV_API
+int ARGBToI411(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+      int pix) = ARGBToUV411Row_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_y == width &&
+      dst_stride_u * 4 == width &&
+      dst_stride_v * 4 == width) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+  }
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUV411ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 32) {
+    ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToUV411Row = ARGBToUV411Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToUV411Row(src_argb, dst_u, dst_v, width);
+    ARGBToYRow(src_argb, dst_y, width);
+    src_argb += src_stride_argb;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
+LIBYUV_API
+int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height) {
+  int y;
+  int halfwidth = (width + 1) >> 1;
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                      int width) = MergeUVRow_C;
+  if (!src_argb ||
+      !dst_y || !dst_uv ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVRow = ARGBToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          ARGBToYRow = ARGBToYRow_SSSE3;
+        }
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_MERGEUVROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+    MergeUVRow_ = MergeUVRow_Any_SSE2;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
+        MergeUVRow_ = MergeUVRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_MERGEUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+    MergeUVRow_ = MergeUVRow_Any_AVX2;
+    if (IS_ALIGNED(halfwidth, 32)) {
+      MergeUVRow_ = MergeUVRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_MERGEUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+    MergeUVRow_ = MergeUVRow_Any_NEON;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUVRow_ = MergeUVRow_NEON;
+    }
+  }
+#endif
+  {
+    // Allocate a rows of uv.
+    align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
+    uint8* row_v = row_u + ((halfwidth + 15) & ~15);
+
+    for (y = 0; y < height - 1; y += 2) {
+      ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+      MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
+      ARGBToYRow(src_argb, dst_y, width);
+      ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+      src_argb += src_stride_argb * 2;
+      dst_y += dst_stride_y * 2;
+      dst_uv += dst_stride_uv;
+    }
+    if (height & 1) {
+      ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+      MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
+      ARGBToYRow(src_argb, dst_y, width);
+    }
+    free_aligned_buffer_64(row_u);
+  }
+  return 0;
+}
+
+// Same as NV12 but U and V swapped.
+LIBYUV_API
+int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_uv, int dst_stride_uv,
+               int width, int height) {
+  int y;
+  int halfwidth = (width + 1) >> 1;
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                      int width) = MergeUVRow_C;
+  if (!src_argb ||
+      !dst_y || !dst_uv ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVRow = ARGBToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          ARGBToYRow = ARGBToYRow_SSSE3;
+        }
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_MERGEUVROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
+    MergeUVRow_ = MergeUVRow_Any_SSE2;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
+        MergeUVRow_ = MergeUVRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_MERGEUVROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
+    MergeUVRow_ = MergeUVRow_Any_AVX2;
+    if (IS_ALIGNED(halfwidth, 32)) {
+      MergeUVRow_ = MergeUVRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_MERGEUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
+    MergeUVRow_ = MergeUVRow_Any_NEON;
+    if (IS_ALIGNED(halfwidth, 16)) {
+      MergeUVRow_ = MergeUVRow_NEON;
+    }
+  }
+#endif
+  {
+    // Allocate a rows of uv.
+    align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
+    uint8* row_v = row_u + ((halfwidth + 15) & ~15);
+
+    for (y = 0; y < height - 1; y += 2) {
+      ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
+      MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
+      ARGBToYRow(src_argb, dst_y, width);
+      ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
+      src_argb += src_stride_argb * 2;
+      dst_y += dst_stride_y * 2;
+      dst_uv += dst_stride_uv;
+    }
+    if (height & 1) {
+      ARGBToUVRow(src_argb, 0, row_u, row_v, width);
+      MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
+      ARGBToYRow(src_argb, dst_y, width);
+    }
+    free_aligned_buffer_64(row_u);
+  }
+  return 0;
+}
+
+// Convert ARGB to YUY2.
+LIBYUV_API
+int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yuy2, int dst_stride_yuy2,
+               int width, int height) {
+  int y;
+  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+      int pix) = ARGBToUV422Row_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
+      const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C;
+
+  if (!src_argb || !dst_yuy2 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+    dst_stride_yuy2 = -dst_stride_yuy2;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_yuy2 == width * 2) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_yuy2 = 0;
+  }
+#if defined(HAS_ARGBTOUV422ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUV422Row = ARGBToUV422Row_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUV422Row = ARGBToUV422Row_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+#if defined(HAS_I422TOYUY2ROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToYUY2Row = I422ToYUY2Row_SSE2;
+    }
+  }
+#elif defined(HAS_I422TOYUY2ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToYUY2Row = I422ToYUY2Row_NEON;
+    }
+  }
+#endif
+
+  {
+    // Allocate a rows of yuv.
+    align_buffer_64(row_y, ((width + 63) & ~63) * 2);
+    uint8* row_u = row_y + ((width + 63) & ~63);
+    uint8* row_v = row_u + ((width + 63) & ~63) / 2;
+
+    for (y = 0; y < height; ++y) {
+      ARGBToUV422Row(src_argb, row_u, row_v, width);
+      ARGBToYRow(src_argb, row_y, width);
+      I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
+      src_argb += src_stride_argb;
+      dst_yuy2 += dst_stride_yuy2;
+    }
+
+    free_aligned_buffer_64(row_y);
+  }
+  return 0;
+}
+
+// Convert ARGB to UYVY.
+LIBYUV_API
+int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_uyvy, int dst_stride_uyvy,
+               int width, int height) {
+  int y;
+  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+      int pix) = ARGBToUV422Row_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
+      const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C;
+
+  if (!src_argb || !dst_uyvy ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
+    dst_stride_uyvy = -dst_stride_uyvy;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_uyvy == width * 2) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_uyvy = 0;
+  }
+#if defined(HAS_ARGBTOUV422ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUV422Row = ARGBToUV422Row_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOUV422ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUV422Row = ARGBToUV422Row_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+#if defined(HAS_I422TOUYVYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToUYVYRow = I422ToUYVYRow_SSE2;
+    }
+  }
+#elif defined(HAS_I422TOUYVYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToUYVYRow = I422ToUYVYRow_NEON;
+    }
+  }
+#endif
+
+  {
+    // Allocate a rows of yuv.
+    align_buffer_64(row_y, ((width + 63) & ~63) * 2);
+    uint8* row_u = row_y + ((width + 63) & ~63);
+    uint8* row_v = row_u + ((width + 63) & ~63) / 2;
+
+    for (y = 0; y < height; ++y) {
+      ARGBToUV422Row(src_argb, row_u, row_v, width);
+      ARGBToYRow(src_argb, row_y, width);
+      I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
+      src_argb += src_stride_argb;
+      dst_uyvy += dst_stride_uyvy;
+    }
+
+    free_aligned_buffer_64(row_y);
+  }
+  return 0;
+}
+
+// Convert ARGB to I400.
+LIBYUV_API
+int ARGBToI400(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height) {
+  int y;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  if (!src_argb || !dst_y || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_y == width) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_y = 0;
+  }
+#if defined(HAS_ARGBTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    ARGBToYRow = ARGBToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYRow = ARGBToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToYRow(src_argb, dst_y, width);
+    src_argb += src_stride_argb;
+    dst_y += dst_stride_y;
+  }
+  return 0;
+}
+
+// Shuffle table for converting ARGB to RGBA.
+static uvec8 kShuffleMaskARGBToRGBA = {
+  3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u
+};
+
+// Convert ARGB to RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_rgba, int dst_stride_rgba,
+               int width, int height) {
+  return ARGBShuffle(src_argb, src_stride_argb,
+                     dst_rgba, dst_stride_rgba,
+                     (const uint8*)(&kShuffleMaskARGBToRGBA),
+                     width, height);
+}
+
+// Convert ARGB To RGB24.
+LIBYUV_API
+int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_rgb24, int dst_stride_rgb24,
+                int width, int height) {
+  int y;
+  void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+      ARGBToRGB24Row_C;
+  if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_rgb24 == width * 3) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_rgb24 = 0;
+  }
+#if defined(HAS_ARGBTORGB24ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
+    }
+  }
+#elif defined(HAS_ARGBTORGB24ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToRGB24Row = ARGBToRGB24Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToRGB24Row(src_argb, dst_rgb24, width);
+    src_argb += src_stride_argb;
+    dst_rgb24 += dst_stride_rgb24;
+  }
+  return 0;
+}
+
+// Convert ARGB To RAW.
+LIBYUV_API
+int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
+              uint8* dst_raw, int dst_stride_raw,
+              int width, int height) {
+  int y;
+  void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+      ARGBToRAWRow_C;
+  if (!src_argb || !dst_raw || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_raw == width * 3) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_raw = 0;
+  }
+#if defined(HAS_ARGBTORAWROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToRAWRow = ARGBToRAWRow_SSSE3;
+    }
+  }
+#elif defined(HAS_ARGBTORAWROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToRAWRow = ARGBToRAWRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToRAWRow(src_argb, dst_raw, width);
+    src_argb += src_stride_argb;
+    dst_raw += dst_stride_raw;
+  }
+  return 0;
+}
+
+// Convert ARGB To RGB565.
+LIBYUV_API
+int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
+                 uint8* dst_rgb565, int dst_stride_rgb565,
+                 int width, int height) {
+  int y;
+  void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+      ARGBToRGB565Row_C;
+  if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_rgb565 == width * 2) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_rgb565 = 0;
+  }
+#if defined(HAS_ARGBTORGB565ROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+    ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
+    }
+  }
+#elif defined(HAS_ARGBTORGB565ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToRGB565Row = ARGBToRGB565Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToRGB565Row(src_argb, dst_rgb565, width);
+    src_argb += src_stride_argb;
+    dst_rgb565 += dst_stride_rgb565;
+  }
+  return 0;
+}
+
+// Convert ARGB To ARGB1555.
+LIBYUV_API
+int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_argb1555, int dst_stride_argb1555,
+                   int width, int height) {
+  int y;
+  void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+      ARGBToARGB1555Row_C;
+  if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb1555 == width * 2) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb1555 = 0;
+  }
+#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+    ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
+    }
+  }
+#elif defined(HAS_ARGBTOARGB1555ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToARGB1555Row(src_argb, dst_argb1555, width);
+    src_argb += src_stride_argb;
+    dst_argb1555 += dst_stride_argb1555;
+  }
+  return 0;
+}
+
+// Convert ARGB To ARGB4444.
+LIBYUV_API
+int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_argb4444, int dst_stride_argb4444,
+                   int width, int height) {
+  int y;
+  void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+      ARGBToARGB4444Row_C;
+  if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb4444 == width * 2) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb4444 = 0;
+  }
+#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+    ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
+    }
+  }
+#elif defined(HAS_ARGBTOARGB4444ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToARGB4444Row = ARGBToARGB4444Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToARGB4444Row(src_argb, dst_argb4444, width);
+    src_argb += src_stride_argb;
+    dst_argb4444 += dst_stride_argb4444;
+  }
+  return 0;
+}
+
+// Convert ARGB to J420. (JPeg full range I420).
+LIBYUV_API
+int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yj, int dst_stride_yj,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
+  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
+      ARGBToYJRow_C;
+  if (!src_argb ||
+      !dst_yj || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
+    ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
+      ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVJRow = ARGBToUVJRow_SSSE3;
+        if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
+          ARGBToYJRow = ARGBToYJRow_SSSE3;
+        }
+      }
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYJRow = ARGBToYJRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYJROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYJRow = ARGBToYJRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYJRow = ARGBToYJRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVJROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVJRow = ARGBToUVJRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
+    ARGBToYJRow(src_argb, dst_yj, width);
+    ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
+    src_argb += src_stride_argb * 2;
+    dst_yj += dst_stride_yj * 2;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
+    ARGBToYJRow(src_argb, dst_yj, width);
+  }
+  return 0;
+}
+
+// Convert ARGB to J400.
+LIBYUV_API
+int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_yj, int dst_stride_yj,
+               int width, int height) {
+  int y;
+  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
+      ARGBToYJRow_C;
+  if (!src_argb || !dst_yj || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_yj == width) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_yj = 0;
+  }
+#if defined(HAS_ARGBTOYJROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+          IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
+        ARGBToYJRow = ARGBToYJRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      ARGBToYJRow = ARGBToYJRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOYJROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYJRow = ARGBToYJRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYJRow = ARGBToYJRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBToYJRow(src_argb, dst_yj, width);
+    src_argb += src_stride_argb;
+    dst_yj += dst_stride_yj;
+  }
+  return 0;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/convert_jpeg.cc b/src/main/jni/libyuv/source/convert_jpeg.cc
new file mode 100644
index 000000000..bcb980f7f
--- /dev/null
+++ b/src/main/jni/libyuv/source/convert_jpeg.cc
@@ -0,0 +1,392 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert.h"
+
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#ifdef HAVE_JPEG
+struct I420Buffers {
+  uint8* y;
+  int y_stride;
+  uint8* u;
+  int u_stride;
+  uint8* v;
+  int v_stride;
+  int w;
+  int h;
+};
+
+static void JpegCopyI420(void* opaque,
+                         const uint8* const* data,
+                         const int* strides,
+                         int rows) {
+  I420Buffers* dest = (I420Buffers*)(opaque);
+  I420Copy(data[0], strides[0],
+           data[1], strides[1],
+           data[2], strides[2],
+           dest->y, dest->y_stride,
+           dest->u, dest->u_stride,
+           dest->v, dest->v_stride,
+           dest->w, rows);
+  dest->y += rows * dest->y_stride;
+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
+  dest->h -= rows;
+}
+
+static void JpegI422ToI420(void* opaque,
+                           const uint8* const* data,
+                           const int* strides,
+                           int rows) {
+  I420Buffers* dest = (I420Buffers*)(opaque);
+  I422ToI420(data[0], strides[0],
+             data[1], strides[1],
+             data[2], strides[2],
+             dest->y, dest->y_stride,
+             dest->u, dest->u_stride,
+             dest->v, dest->v_stride,
+             dest->w, rows);
+  dest->y += rows * dest->y_stride;
+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
+  dest->h -= rows;
+}
+
+static void JpegI444ToI420(void* opaque,
+                           const uint8* const* data,
+                           const int* strides,
+                           int rows) {
+  I420Buffers* dest = (I420Buffers*)(opaque);
+  I444ToI420(data[0], strides[0],
+             data[1], strides[1],
+             data[2], strides[2],
+             dest->y, dest->y_stride,
+             dest->u, dest->u_stride,
+             dest->v, dest->v_stride,
+             dest->w, rows);
+  dest->y += rows * dest->y_stride;
+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
+  dest->h -= rows;
+}
+
+static void JpegI411ToI420(void* opaque,
+                           const uint8* const* data,
+                           const int* strides,
+                           int rows) {
+  I420Buffers* dest = (I420Buffers*)(opaque);
+  I411ToI420(data[0], strides[0],
+             data[1], strides[1],
+             data[2], strides[2],
+             dest->y, dest->y_stride,
+             dest->u, dest->u_stride,
+             dest->v, dest->v_stride,
+             dest->w, rows);
+  dest->y += rows * dest->y_stride;
+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
+  dest->h -= rows;
+}
+
+static void JpegI400ToI420(void* opaque,
+                           const uint8* const* data,
+                           const int* strides,
+                           int rows) {
+  I420Buffers* dest = (I420Buffers*)(opaque);
+  I400ToI420(data[0], strides[0],
+             dest->y, dest->y_stride,
+             dest->u, dest->u_stride,
+             dest->v, dest->v_stride,
+             dest->w, rows);
+  dest->y += rows * dest->y_stride;
+  dest->u += ((rows + 1) >> 1) * dest->u_stride;
+  dest->v += ((rows + 1) >> 1) * dest->v_stride;
+  dest->h -= rows;
+}
+
+// Query size of MJPG in pixels.
+LIBYUV_API
+int MJPGSize(const uint8* sample, size_t sample_size,
+             int* width, int* height) {
+  MJpegDecoder mjpeg_decoder;
+  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+  if (ret) {
+    *width = mjpeg_decoder.GetWidth();
+    *height = mjpeg_decoder.GetHeight();
+  }
+  mjpeg_decoder.UnloadFrame();
+  return ret ? 0 : -1;  // -1 for runtime failure.
+}
+
+// MJPG (Motion JPeg) to I420
+// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
+LIBYUV_API
+int MJPGToI420(const uint8* sample,
+               size_t sample_size,
+               uint8* y, int y_stride,
+               uint8* u, int u_stride,
+               uint8* v, int v_stride,
+               int w, int h,
+               int dw, int dh) {
+  if (sample_size == kUnknownDataSize) {
+    // ERROR: MJPEG frame size unknown
+    return -1;
+  }
+
+  // TODO(fbarchard): Port MJpeg to C.
+  MJpegDecoder mjpeg_decoder;
+  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+  if (ret && (mjpeg_decoder.GetWidth() != w ||
+              mjpeg_decoder.GetHeight() != h)) {
+    // ERROR: MJPEG frame has unexpected dimensions
+    mjpeg_decoder.UnloadFrame();
+    return 1;  // runtime failure
+  }
+  if (ret) {
+    I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
+    // YUV420
+    if (mjpeg_decoder.GetColorSpace() ==
+            MJpegDecoder::kColorSpaceYCbCr &&
+        mjpeg_decoder.GetNumComponents() == 3 &&
+        mjpeg_decoder.GetVertSampFactor(0) == 2 &&
+        mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+        mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+        mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+        mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+        mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
+    // YUV422
+    } else if (mjpeg_decoder.GetColorSpace() ==
+                   MJpegDecoder::kColorSpaceYCbCr &&
+               mjpeg_decoder.GetNumComponents() == 3 &&
+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
+    // YUV444
+    } else if (mjpeg_decoder.GetColorSpace() ==
+                   MJpegDecoder::kColorSpaceYCbCr &&
+               mjpeg_decoder.GetNumComponents() == 3 &&
+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
+    // YUV411
+    } else if (mjpeg_decoder.GetColorSpace() ==
+                   MJpegDecoder::kColorSpaceYCbCr &&
+               mjpeg_decoder.GetNumComponents() == 3 &&
+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
+    // YUV400
+    } else if (mjpeg_decoder.GetColorSpace() ==
+                   MJpegDecoder::kColorSpaceGrayscale &&
+               mjpeg_decoder.GetNumComponents() == 1 &&
+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(0) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
+    } else {
+      // TODO(fbarchard): Implement conversion for any other colorspace/sample
+      // factors that occur in practice. 411 is supported by libjpeg
+      // ERROR: Unable to convert MJPEG frame because format is not supported
+      mjpeg_decoder.UnloadFrame();
+      return 1;
+    }
+  }
+  return ret ? 0 : 1;
+}
+
+#ifdef HAVE_JPEG
+struct ARGBBuffers {
+  uint8* argb;
+  int argb_stride;
+  int w;
+  int h;
+};
+
+static void JpegI420ToARGB(void* opaque,
+                         const uint8* const* data,
+                         const int* strides,
+                         int rows) {
+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+  I420ToARGB(data[0], strides[0],
+             data[1], strides[1],
+             data[2], strides[2],
+             dest->argb, dest->argb_stride,
+             dest->w, rows);
+  dest->argb += rows * dest->argb_stride;
+  dest->h -= rows;
+}
+
+static void JpegI422ToARGB(void* opaque,
+                           const uint8* const* data,
+                           const int* strides,
+                           int rows) {
+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+  I422ToARGB(data[0], strides[0],
+             data[1], strides[1],
+             data[2], strides[2],
+             dest->argb, dest->argb_stride,
+             dest->w, rows);
+  dest->argb += rows * dest->argb_stride;
+  dest->h -= rows;
+}
+
+static void JpegI444ToARGB(void* opaque,
+                           const uint8* const* data,
+                           const int* strides,
+                           int rows) {
+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+  I444ToARGB(data[0], strides[0],
+             data[1], strides[1],
+             data[2], strides[2],
+             dest->argb, dest->argb_stride,
+             dest->w, rows);
+  dest->argb += rows * dest->argb_stride;
+  dest->h -= rows;
+}
+
+static void JpegI411ToARGB(void* opaque,
+                           const uint8* const* data,
+                           const int* strides,
+                           int rows) {
+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+  I411ToARGB(data[0], strides[0],
+             data[1], strides[1],
+             data[2], strides[2],
+             dest->argb, dest->argb_stride,
+             dest->w, rows);
+  dest->argb += rows * dest->argb_stride;
+  dest->h -= rows;
+}
+
+static void JpegI400ToARGB(void* opaque,
+                           const uint8* const* data,
+                           const int* strides,
+                           int rows) {
+  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
+  I400ToARGB(data[0], strides[0],
+             dest->argb, dest->argb_stride,
+             dest->w, rows);
+  dest->argb += rows * dest->argb_stride;
+  dest->h -= rows;
+}
+
+// MJPG (Motion JPeg) to ARGB
+// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
+LIBYUV_API
+int MJPGToARGB(const uint8* sample,
+               size_t sample_size,
+               uint8* argb, int argb_stride,
+               int w, int h,
+               int dw, int dh) {
+  if (sample_size == kUnknownDataSize) {
+    // ERROR: MJPEG frame size unknown
+    return -1;
+  }
+
+  // TODO(fbarchard): Port MJpeg to C.
+  MJpegDecoder mjpeg_decoder;
+  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+  if (ret && (mjpeg_decoder.GetWidth() != w ||
+              mjpeg_decoder.GetHeight() != h)) {
+    // ERROR: MJPEG frame has unexpected dimensions
+    mjpeg_decoder.UnloadFrame();
+    return 1;  // runtime failure
+  }
+  if (ret) {
+    ARGBBuffers bufs = { argb, argb_stride, dw, dh };
+    // YUV420
+    if (mjpeg_decoder.GetColorSpace() ==
+            MJpegDecoder::kColorSpaceYCbCr &&
+        mjpeg_decoder.GetNumComponents() == 3 &&
+        mjpeg_decoder.GetVertSampFactor(0) == 2 &&
+        mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+        mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+        mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+        mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+        mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
+    // YUV422
+    } else if (mjpeg_decoder.GetColorSpace() ==
+                   MJpegDecoder::kColorSpaceYCbCr &&
+               mjpeg_decoder.GetNumComponents() == 3 &&
+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
+    // YUV444
+    } else if (mjpeg_decoder.GetColorSpace() ==
+                   MJpegDecoder::kColorSpaceYCbCr &&
+               mjpeg_decoder.GetNumComponents() == 3 &&
+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
+    // YUV411
+    } else if (mjpeg_decoder.GetColorSpace() ==
+                   MJpegDecoder::kColorSpaceYCbCr &&
+               mjpeg_decoder.GetNumComponents() == 3 &&
+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
+               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
+               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
+    // YUV400
+    } else if (mjpeg_decoder.GetColorSpace() ==
+                   MJpegDecoder::kColorSpaceGrayscale &&
+               mjpeg_decoder.GetNumComponents() == 1 &&
+               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
+               mjpeg_decoder.GetHorizSampFactor(0) == 1) {
+      ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
+    } else {
+      // TODO(fbarchard): Implement conversion for any other colorspace/sample
+      // factors that occur in practice. 411 is supported by libjpeg
+      // ERROR: Unable to convert MJPEG frame because format is not supported
+      mjpeg_decoder.UnloadFrame();
+      return 1;
+    }
+  }
+  return ret ? 0 : 1;
+}
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/convert_to_argb.cc b/src/main/jni/libyuv/source/convert_to_argb.cc
new file mode 100644
index 000000000..1b228a7b4
--- /dev/null
+++ b/src/main/jni/libyuv/source/convert_to_argb.cc
@@ -0,0 +1,327 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/convert_argb.h"
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/format_conversion.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "libyuv/rotate_argb.h"
+#include "libyuv/row.h"
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Convert camera sample to I420 with cropping, rotation and vertical flip.
+// src_width is used for source stride computation
+// src_height is used to compute location of planes, and indicate inversion
+// sample_size is measured in bytes and is the size of the frame.
+//   With MJPEG it is the compressed size of the frame.
+LIBYUV_API
+int ConvertToARGB(const uint8* sample, size_t sample_size,
+                  uint8* crop_argb, int argb_stride,
+                  int crop_x, int crop_y,
+                  int src_width, int src_height,
+                  int crop_width, int crop_height,
+                  enum RotationMode rotation,
+                  uint32 fourcc) {
+  uint32 format = CanonicalFourCC(fourcc);
+  int aligned_src_width = (src_width + 1) & ~1;
+  const uint8* src;
+  const uint8* src_uv;
+  int abs_src_height = (src_height < 0) ? -src_height : src_height;
+  int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+  int r = 0;
+
+  // One pass rotation is available for some formats. For the rest, convert
+  // to I420 (with optional vertical flipping) into a temporary I420 buffer,
+  // and then rotate the I420 to the final destination buffer.
+  // For in-place conversion, if destination crop_argb is same as source sample,
+  // also enable temporary buffer.
+  LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
+      crop_argb == sample;
+  uint8* tmp_argb = crop_argb;
+  int tmp_argb_stride = argb_stride;
+  uint8* rotate_buffer = NULL;
+  int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+
+  if (crop_argb == NULL || sample == NULL ||
+      src_width <= 0 || crop_width <= 0 ||
+      src_height == 0 || crop_height == 0) {
+    return -1;
+  }
+  if (src_height < 0) {
+    inv_crop_height = -inv_crop_height;
+  }
+
+  if (need_buf) {
+    int argb_size = crop_width * abs_crop_height * 4;
+    rotate_buffer = (uint8*)malloc(argb_size);
+    if (!rotate_buffer) {
+      return 1;  // Out of memory runtime error.
+    }
+    crop_argb = rotate_buffer;
+    argb_stride = crop_width;
+  }
+
+  switch (format) {
+    // Single plane formats
+    case FOURCC_YUY2:
+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
+      r = YUY2ToARGB(src, aligned_src_width * 2,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_UYVY:
+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
+      r = UYVYToARGB(src, aligned_src_width * 2,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_24BG:
+      src = sample + (src_width * crop_y + crop_x) * 3;
+      r = RGB24ToARGB(src, src_width * 3,
+                      crop_argb, argb_stride,
+                      crop_width, inv_crop_height);
+      break;
+    case FOURCC_RAW:
+      src = sample + (src_width * crop_y + crop_x) * 3;
+      r = RAWToARGB(src, src_width * 3,
+                    crop_argb, argb_stride,
+                    crop_width, inv_crop_height);
+      break;
+    case FOURCC_ARGB:
+      src = sample + (src_width * crop_y + crop_x) * 4;
+      r = ARGBToARGB(src, src_width * 4,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_BGRA:
+      src = sample + (src_width * crop_y + crop_x) * 4;
+      r = BGRAToARGB(src, src_width * 4,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_ABGR:
+      src = sample + (src_width * crop_y + crop_x) * 4;
+      r = ABGRToARGB(src, src_width * 4,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_RGBA:
+      src = sample + (src_width * crop_y + crop_x) * 4;
+      r = RGBAToARGB(src, src_width * 4,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_RGBP:
+      src = sample + (src_width * crop_y + crop_x) * 2;
+      r = RGB565ToARGB(src, src_width * 2,
+                       crop_argb, argb_stride,
+                       crop_width, inv_crop_height);
+      break;
+    case FOURCC_RGBO:
+      src = sample + (src_width * crop_y + crop_x) * 2;
+      r = ARGB1555ToARGB(src, src_width * 2,
+                         crop_argb, argb_stride,
+                         crop_width, inv_crop_height);
+      break;
+    case FOURCC_R444:
+      src = sample + (src_width * crop_y + crop_x) * 2;
+      r = ARGB4444ToARGB(src, src_width * 2,
+                         crop_argb, argb_stride,
+                         crop_width, inv_crop_height);
+      break;
+    // TODO(fbarchard): Support cropping Bayer by odd numbers
+    // by adjusting fourcc.
+    case FOURCC_BGGR:
+      src = sample + (src_width * crop_y + crop_x);
+      r = BayerBGGRToARGB(src, src_width,
+                          crop_argb, argb_stride,
+                          crop_width, inv_crop_height);
+      break;
+
+    case FOURCC_GBRG:
+      src = sample + (src_width * crop_y + crop_x);
+      r = BayerGBRGToARGB(src, src_width,
+                          crop_argb, argb_stride,
+                          crop_width, inv_crop_height);
+      break;
+
+    case FOURCC_GRBG:
+      src = sample + (src_width * crop_y + crop_x);
+      r = BayerGRBGToARGB(src, src_width,
+                          crop_argb, argb_stride,
+                          crop_width, inv_crop_height);
+      break;
+
+    case FOURCC_RGGB:
+      src = sample + (src_width * crop_y + crop_x);
+      r = BayerRGGBToARGB(src, src_width,
+                          crop_argb, argb_stride,
+                          crop_width, inv_crop_height);
+      break;
+
+    case FOURCC_I400:
+      src = sample + src_width * crop_y + crop_x;
+      r = I400ToARGB(src, src_width,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+
+    // Biplanar formats
+    case FOURCC_NV12:
+      src = sample + (src_width * crop_y + crop_x);
+      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
+      r = NV12ToARGB(src, src_width,
+                     src_uv, aligned_src_width,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_NV21:
+      src = sample + (src_width * crop_y + crop_x);
+      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
+      // Call NV12 but with u and v parameters swapped.
+      r = NV21ToARGB(src, src_width,
+                     src_uv, aligned_src_width,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_M420:
+      src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
+      r = M420ToARGB(src, src_width,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+//    case FOURCC_Q420:
+//      src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
+//      src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
+//               src_width + crop_x * 2;
+//      r = Q420ToARGB(src, src_width * 3,
+//                    src_uv, src_width * 3,
+//                    crop_argb, argb_stride,
+//                    crop_width, inv_crop_height);
+//      break;
+    // Triplanar formats
+    case FOURCC_I420:
+    case FOURCC_YU12:
+    case FOURCC_YV12: {
+      const uint8* src_y = sample + (src_width * crop_y + crop_x);
+      const uint8* src_u;
+      const uint8* src_v;
+      int halfwidth = (src_width + 1) / 2;
+      int halfheight = (abs_src_height + 1) / 2;
+      if (format == FOURCC_YV12) {
+        src_v = sample + src_width * abs_src_height +
+            (halfwidth * crop_y + crop_x) / 2;
+        src_u = sample + src_width * abs_src_height +
+            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+      } else {
+        src_u = sample + src_width * abs_src_height +
+            (halfwidth * crop_y + crop_x) / 2;
+        src_v = sample + src_width * abs_src_height +
+            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+      }
+      r = I420ToARGB(src_y, src_width,
+                     src_u, halfwidth,
+                     src_v, halfwidth,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    }
+    case FOURCC_I422:
+    case FOURCC_YV16: {
+      const uint8* src_y = sample + src_width * crop_y + crop_x;
+      const uint8* src_u;
+      const uint8* src_v;
+      int halfwidth = (src_width + 1) / 2;
+      if (format == FOURCC_YV16) {
+        src_v = sample + src_width * abs_src_height +
+            halfwidth * crop_y + crop_x / 2;
+        src_u = sample + src_width * abs_src_height +
+            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+      } else {
+        src_u = sample + src_width * abs_src_height +
+            halfwidth * crop_y + crop_x / 2;
+        src_v = sample + src_width * abs_src_height +
+            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+      }
+      r = I422ToARGB(src_y, src_width,
+                     src_u, halfwidth,
+                     src_v, halfwidth,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    }
+    case FOURCC_I444:
+    case FOURCC_YV24: {
+      const uint8* src_y = sample + src_width * crop_y + crop_x;
+      const uint8* src_u;
+      const uint8* src_v;
+      if (format == FOURCC_YV24) {
+        src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
+        src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+      } else {
+        src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
+        src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+      }
+      r = I444ToARGB(src_y, src_width,
+                     src_u, src_width,
+                     src_v, src_width,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    }
+    case FOURCC_I411: {
+      int quarterwidth = (src_width + 3) / 4;
+      const uint8* src_y = sample + src_width * crop_y + crop_x;
+      const uint8* src_u = sample + src_width * abs_src_height +
+          quarterwidth * crop_y + crop_x / 4;
+      const uint8* src_v = sample + src_width * abs_src_height +
+          quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
+      r = I411ToARGB(src_y, src_width,
+                     src_u, quarterwidth,
+                     src_v, quarterwidth,
+                     crop_argb, argb_stride,
+                     crop_width, inv_crop_height);
+      break;
+    }
+#ifdef HAVE_JPEG
+    case FOURCC_MJPG:
+      r = MJPGToARGB(sample, sample_size,
+                     crop_argb, argb_stride,
+                     src_width, abs_src_height, crop_width, inv_crop_height);
+      break;
+#endif
+    default:
+      r = -1;  // unknown fourcc - return failure code.
+  }
+
+  if (need_buf) {
+    if (!r) {
+      r = ARGBRotate(crop_argb, argb_stride,
+                     tmp_argb, tmp_argb_stride,
+                     crop_width, abs_crop_height, rotation);
+    }
+    free(rotate_buffer);
+  }
+
+  return r;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/convert_to_i420.cc b/src/main/jni/libyuv/source/convert_to_i420.cc
new file mode 100644
index 000000000..7b194fff7
--- /dev/null
+++ b/src/main/jni/libyuv/source/convert_to_i420.cc
@@ -0,0 +1,383 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+
+#include "libyuv/convert.h"
+
+#include "libyuv/format_conversion.h"
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Convert camera sample to I420 with cropping, rotation and vertical flip.
+// src_width is used for source stride computation
+// src_height is used to compute location of planes, and indicate inversion
+// sample_size is measured in bytes and is the size of the frame.
+//   With MJPEG it is the compressed size of the frame.
+LIBYUV_API
+int ConvertToI420(const uint8* sample,
+                  size_t sample_size,
+                  uint8* y, int y_stride,
+                  uint8* u, int u_stride,
+                  uint8* v, int v_stride,
+                  int crop_x, int crop_y,
+                  int src_width, int src_height,
+                  int crop_width, int crop_height,
+                  enum RotationMode rotation,
+                  uint32 fourcc) {
+  uint32 format = CanonicalFourCC(fourcc);
+  int aligned_src_width = (src_width + 1) & ~1;
+  const uint8* src;
+  const uint8* src_uv;
+  int abs_src_height = (src_height < 0) ? -src_height : src_height;
+  int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+  int r = 0;
+  LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
+      format != FOURCC_NV12 && format != FOURCC_NV21 &&
+      format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
+  uint8* tmp_y = y;
+  uint8* tmp_u = u;
+  uint8* tmp_v = v;
+  int tmp_y_stride = y_stride;
+  int tmp_u_stride = u_stride;
+  int tmp_v_stride = v_stride;
+  uint8* rotate_buffer = NULL;
+  int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+
+  if (!y || !u || !v || !sample ||
+      src_width <= 0 || crop_width <= 0  ||
+      src_height == 0 || crop_height == 0) {
+    return -1;
+  }
+  if (src_height < 0) {
+    inv_crop_height = -inv_crop_height;
+  }
+
+  // One pass rotation is available for some formats. For the rest, convert
+  // to I420 (with optional vertical flipping) into a temporary I420 buffer,
+  // and then rotate the I420 to the final destination buffer.
+  // For in-place conversion, if destination y is same as source sample,
+  // also enable temporary buffer.
+  if (need_buf) {
+    int y_size = crop_width * abs_crop_height;
+    int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
+    rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
+    if (!rotate_buffer) {
+      return 1;  // Out of memory runtime error.
+    }
+    y = rotate_buffer;
+    u = y + y_size;
+    v = u + uv_size;
+    y_stride = crop_width;
+    u_stride = v_stride = ((crop_width + 1) / 2);
+  }
+
+  switch (format) {
+    // Single plane formats
+    case FOURCC_YUY2:
+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
+      r = YUY2ToI420(src, aligned_src_width * 2,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_UYVY:
+      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
+      r = UYVYToI420(src, aligned_src_width * 2,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_RGBP:
+      src = sample + (src_width * crop_y + crop_x) * 2;
+      r = RGB565ToI420(src, src_width * 2,
+                       y, y_stride,
+                       u, u_stride,
+                       v, v_stride,
+                       crop_width, inv_crop_height);
+      break;
+    case FOURCC_RGBO:
+      src = sample + (src_width * crop_y + crop_x) * 2;
+      r = ARGB1555ToI420(src, src_width * 2,
+                         y, y_stride,
+                         u, u_stride,
+                         v, v_stride,
+                         crop_width, inv_crop_height);
+      break;
+    case FOURCC_R444:
+      src = sample + (src_width * crop_y + crop_x) * 2;
+      r = ARGB4444ToI420(src, src_width * 2,
+                         y, y_stride,
+                         u, u_stride,
+                         v, v_stride,
+                         crop_width, inv_crop_height);
+      break;
+    case FOURCC_24BG:
+      src = sample + (src_width * crop_y + crop_x) * 3;
+      r = RGB24ToI420(src, src_width * 3,
+                      y, y_stride,
+                      u, u_stride,
+                      v, v_stride,
+                      crop_width, inv_crop_height);
+      break;
+    case FOURCC_RAW:
+      src = sample + (src_width * crop_y + crop_x) * 3;
+      r = RAWToI420(src, src_width * 3,
+                    y, y_stride,
+                    u, u_stride,
+                    v, v_stride,
+                    crop_width, inv_crop_height);
+      break;
+    case FOURCC_ARGB:
+      src = sample + (src_width * crop_y + crop_x) * 4;
+      r = ARGBToI420(src, src_width * 4,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_BGRA:
+      src = sample + (src_width * crop_y + crop_x) * 4;
+      r = BGRAToI420(src, src_width * 4,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_ABGR:
+      src = sample + (src_width * crop_y + crop_x) * 4;
+      r = ABGRToI420(src, src_width * 4,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_RGBA:
+      src = sample + (src_width * crop_y + crop_x) * 4;
+      r = RGBAToI420(src, src_width * 4,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    // TODO(fbarchard): Support cropping Bayer by odd numbers
+    // by adjusting fourcc.
+    case FOURCC_BGGR:
+      src = sample + (src_width * crop_y + crop_x);
+      r = BayerBGGRToI420(src, src_width,
+                          y, y_stride,
+                          u, u_stride,
+                          v, v_stride,
+                          crop_width, inv_crop_height);
+      break;
+    case FOURCC_GBRG:
+      src = sample + (src_width * crop_y + crop_x);
+      r = BayerGBRGToI420(src, src_width,
+                          y, y_stride,
+                          u, u_stride,
+                          v, v_stride,
+                          crop_width, inv_crop_height);
+      break;
+    case FOURCC_GRBG:
+      src = sample + (src_width * crop_y + crop_x);
+      r = BayerGRBGToI420(src, src_width,
+                          y, y_stride,
+                          u, u_stride,
+                          v, v_stride,
+                          crop_width, inv_crop_height);
+      break;
+    case FOURCC_RGGB:
+      src = sample + (src_width * crop_y + crop_x);
+      r = BayerRGGBToI420(src, src_width,
+                          y, y_stride,
+                          u, u_stride,
+                          v, v_stride,
+                          crop_width, inv_crop_height);
+      break;
+    case FOURCC_I400:
+      src = sample + src_width * crop_y + crop_x;
+      r = I400ToI420(src, src_width,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    // Biplanar formats
+    case FOURCC_NV12:
+      src = sample + (src_width * crop_y + crop_x);
+      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
+      r = NV12ToI420Rotate(src, src_width,
+                           src_uv, aligned_src_width,
+                           y, y_stride,
+                           u, u_stride,
+                           v, v_stride,
+                           crop_width, inv_crop_height, rotation);
+      break;
+    case FOURCC_NV21:
+      src = sample + (src_width * crop_y + crop_x);
+      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
+      // Call NV12 but with u and v parameters swapped.
+      r = NV12ToI420Rotate(src, src_width,
+                           src_uv, aligned_src_width,
+                           y, y_stride,
+                           v, v_stride,
+                           u, u_stride,
+                           crop_width, inv_crop_height, rotation);
+      break;
+    case FOURCC_M420:
+      src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
+      r = M420ToI420(src, src_width,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    case FOURCC_Q420:
+      src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
+      src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
+               src_width + crop_x * 2;
+      r = Q420ToI420(src, src_width * 3,
+                    src_uv, src_width * 3,
+                    y, y_stride,
+                    u, u_stride,
+                    v, v_stride,
+                    crop_width, inv_crop_height);
+      break;
+    // Triplanar formats
+    case FOURCC_I420:
+    case FOURCC_YU12:
+    case FOURCC_YV12: {
+      const uint8* src_y = sample + (src_width * crop_y + crop_x);
+      const uint8* src_u;
+      const uint8* src_v;
+      int halfwidth = (src_width + 1) / 2;
+      int halfheight = (abs_src_height + 1) / 2;
+      if (format == FOURCC_YV12) {
+        src_v = sample + src_width * abs_src_height +
+            (halfwidth * crop_y + crop_x) / 2;
+        src_u = sample + src_width * abs_src_height +
+            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+      } else {
+        src_u = sample + src_width * abs_src_height +
+            (halfwidth * crop_y + crop_x) / 2;
+        src_v = sample + src_width * abs_src_height +
+            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
+      }
+      r = I420Rotate(src_y, src_width,
+                     src_u, halfwidth,
+                     src_v, halfwidth,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height, rotation);
+      break;
+    }
+    case FOURCC_I422:
+    case FOURCC_YV16: {
+      const uint8* src_y = sample + src_width * crop_y + crop_x;
+      const uint8* src_u;
+      const uint8* src_v;
+      int halfwidth = (src_width + 1) / 2;
+      if (format == FOURCC_YV16) {
+        src_v = sample + src_width * abs_src_height +
+            halfwidth * crop_y + crop_x / 2;
+        src_u = sample + src_width * abs_src_height +
+            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+      } else {
+        src_u = sample + src_width * abs_src_height +
+            halfwidth * crop_y + crop_x / 2;
+        src_v = sample + src_width * abs_src_height +
+            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
+      }
+      r = I422ToI420(src_y, src_width,
+                     src_u, halfwidth,
+                     src_v, halfwidth,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    }
+    case FOURCC_I444:
+    case FOURCC_YV24: {
+      const uint8* src_y = sample + src_width * crop_y + crop_x;
+      const uint8* src_u;
+      const uint8* src_v;
+      if (format == FOURCC_YV24) {
+        src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
+        src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+      } else {
+        src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
+        src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
+      }
+      r = I444ToI420(src_y, src_width,
+                     src_u, src_width,
+                     src_v, src_width,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    }
+    case FOURCC_I411: {
+      int quarterwidth = (src_width + 3) / 4;
+      const uint8* src_y = sample + src_width * crop_y + crop_x;
+      const uint8* src_u = sample + src_width * abs_src_height +
+          quarterwidth * crop_y + crop_x / 4;
+      const uint8* src_v = sample + src_width * abs_src_height +
+          quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
+      r = I411ToI420(src_y, src_width,
+                     src_u, quarterwidth,
+                     src_v, quarterwidth,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     crop_width, inv_crop_height);
+      break;
+    }
+#ifdef HAVE_JPEG
+    case FOURCC_MJPG:
+      r = MJPGToI420(sample, sample_size,
+                     y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     src_width, abs_src_height, crop_width, inv_crop_height);
+      break;
+#endif
+    default:
+      r = -1;  // unknown fourcc - return failure code.
+  }
+
+  if (need_buf) {
+    if (!r) {
+      r = I420Rotate(y, y_stride,
+                     u, u_stride,
+                     v, v_stride,
+                     tmp_y, tmp_y_stride,
+                     tmp_u, tmp_u_stride,
+                     tmp_v, tmp_v_stride,
+                     crop_width, abs_crop_height, rotation);
+    }
+    free(rotate_buffer);
+  }
+
+  return r;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/cpu_id.cc b/src/main/jni/libyuv/source/cpu_id.cc
new file mode 100644
index 000000000..deb4c4465
--- /dev/null
+++ b/src/main/jni/libyuv/source/cpu_id.cc
@@ -0,0 +1,293 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/cpu_id.h"
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>  // For __cpuidex()
+#endif
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+    !defined(__native_client__)  && \
+    defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
+#include <immintrin.h>  // For _xgetbv()
+#endif
+
+#if !defined(__native_client__)
+#include <stdlib.h>  // For getenv()
+#endif
+
+// For ArmCpuCaps() but unittested on all platforms
+#include <stdio.h>
+#include <string.h>
+
+#include "libyuv/basic_types.h"  // For CPU_X86
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// For functions that use the stack and have runtime checks for overflow,
+// use SAFEBUFFERS to avoid additional check.
+#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
+#define SAFEBUFFERS __declspec(safebuffers)
+#else
+#define SAFEBUFFERS
+#endif
+
+// Low level cpuid for X86. Returns zeros on other CPUs.
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+    (defined(_M_IX86) || defined(_M_X64) || \
+    defined(__i386__) || defined(__x86_64__))
+LIBYUV_API
+void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
+#if defined(_MSC_VER) && !defined(__clang__)
+#if (_MSC_FULL_VER >= 160040219)
+  __cpuidex((int*)(cpu_info), info_eax, info_ecx);
+#elif defined(_M_IX86)
+  __asm {
+    mov        eax, info_eax
+    mov        ecx, info_ecx
+    mov        edi, cpu_info
+    cpuid
+    mov        [edi], eax
+    mov        [edi + 4], ebx
+    mov        [edi + 8], ecx
+    mov        [edi + 12], edx
+  }
+#else
+  if (info_ecx == 0) {
+    __cpuid((int*)(cpu_info), info_eax);
+  } else {
+    cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
+  }
+#endif
+#else  // defined(_MSC_VER)
+  uint32 info_ebx, info_edx;
+  asm volatile (  // NOLINT
+#if defined( __i386__) && defined(__PIC__)
+    // Preserve ebx for fpic 32 bit.
+    "mov %%ebx, %%edi                          \n"
+    "cpuid                                     \n"
+    "xchg %%edi, %%ebx                         \n"
+    : "=D" (info_ebx),
+#else
+    "cpuid                                     \n"
+    : "=b" (info_ebx),
+#endif  //  defined( __i386__) && defined(__PIC__)
+      "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
+  cpu_info[0] = info_eax;
+  cpu_info[1] = info_ebx;
+  cpu_info[2] = info_ecx;
+  cpu_info[3] = info_edx;
+#endif  // defined(_MSC_VER)
+}
+
+#if !defined(__native_client__)
+#define HAS_XGETBV
+// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
+int TestOsSaveYmm() {
+  uint32 xcr0 = 0u;
+#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
+  xcr0 = (uint32)(_xgetbv(0));  // VS2010 SP1 required.
+#elif defined(_M_IX86) && defined(_MSC_VER)
+  __asm {
+    xor        ecx, ecx    // xcr 0
+    _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
+    mov        xcr0, eax
+  }
+#elif defined(__i386__) || defined(__x86_64__)
+  asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
+#endif  // defined(_MSC_VER)
+  return((xcr0 & 6) == 6);  // Is ymm saved?
+}
+#endif  // !defined(__native_client__)
+#else
+LIBYUV_API
+void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
+  cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
+}
+#endif
+
+// based on libvpx arm_cpudetect.c
+// For Arm, but public to allow testing on any CPU
+LIBYUV_API SAFEBUFFERS
+int ArmCpuCaps(const char* cpuinfo_name) {
+  char cpuinfo_line[512];
+  FILE* f = fopen(cpuinfo_name, "r");
+  if (!f) {
+    // Assume Neon if /proc/cpuinfo is unavailable.
+    // This will occur for Chrome sandbox for Pepper or Render process.
+    return kCpuHasNEON;
+  }
+  while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
+    if (memcmp(cpuinfo_line, "Features", 8) == 0) {
+      char* p = strstr(cpuinfo_line, " neon");
+      if (p && (p[5] == ' ' || p[5] == '\n')) {
+        fclose(f);
+        return kCpuHasNEON;
+      }
+    }
+  }
+  fclose(f);
+  return 0;
+}
+
+#if defined(__mips__) && defined(__linux__)
+static int MipsCpuCaps(const char* search_string) {
+  char cpuinfo_line[512];
+  const char* file_name = "/proc/cpuinfo";
+  FILE* f = fopen(file_name, "r");
+  if (!f) {
+    // Assume DSP if /proc/cpuinfo is unavailable.
+    // This will occur for Chrome sandbox for Pepper or Render process.
+    return kCpuHasMIPS_DSP;
+  }
+  while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
+    if (strstr(cpuinfo_line, search_string) != NULL) {
+      fclose(f);
+      return kCpuHasMIPS_DSP;
+    }
+  }
+  fclose(f);
+  return 0;
+}
+#endif
+
+// CPU detect function for SIMD instruction sets.
+LIBYUV_API
+int cpu_info_ = kCpuInit;  // cpu_info is not initialized yet.
+
+// Test environment variable for disabling CPU features. Any non-zero value
+// to disable. Zero ignored to make it easy to set the variable on/off.
+#if !defined(__native_client__) && !defined(_M_ARM)
+
+static LIBYUV_BOOL TestEnv(const char* name) {
+  const char* var = getenv(name);
+  if (var) {
+    if (var[0] != '0') {
+      return LIBYUV_TRUE;
+    }
+  }
+  return LIBYUV_FALSE;
+}
+#else  // nacl does not support getenv().
+static LIBYUV_BOOL TestEnv(const char*) {
+  return LIBYUV_FALSE;
+}
+#endif
+
+LIBYUV_API SAFEBUFFERS
+int InitCpuFlags(void) {
+#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
+
+  uint32 cpu_info0[4] = { 0, 0, 0, 0 };
+  uint32 cpu_info1[4] = { 0, 0, 0, 0 };
+  uint32 cpu_info7[4] = { 0, 0, 0, 0 };
+  CpuId(0, 0, cpu_info0);
+  CpuId(1, 0, cpu_info1);
+  if (cpu_info0[0] >= 7) {
+    CpuId(7, 0, cpu_info7);
+  }
+  cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
+              ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
+              ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
+              ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
+              ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
+              ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
+              kCpuHasX86;
+
+#ifdef HAS_XGETBV
+  if ((cpu_info1[2] & 0x18000000) == 0x18000000 &&  // AVX and OSSave
+      TestOsSaveYmm()) {  // Saves YMM.
+    cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
+                 kCpuHasAVX;
+  }
+#endif
+  // Environment variable overrides for testing.
+  if (TestEnv("LIBYUV_DISABLE_X86")) {
+    cpu_info_ &= ~kCpuHasX86;
+  }
+  if (TestEnv("LIBYUV_DISABLE_SSE2")) {
+    cpu_info_ &= ~kCpuHasSSE2;
+  }
+  if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
+    cpu_info_ &= ~kCpuHasSSSE3;
+  }
+  if (TestEnv("LIBYUV_DISABLE_SSE41")) {
+    cpu_info_ &= ~kCpuHasSSE41;
+  }
+  if (TestEnv("LIBYUV_DISABLE_SSE42")) {
+    cpu_info_ &= ~kCpuHasSSE42;
+  }
+  if (TestEnv("LIBYUV_DISABLE_AVX")) {
+    cpu_info_ &= ~kCpuHasAVX;
+  }
+  if (TestEnv("LIBYUV_DISABLE_AVX2")) {
+    cpu_info_ &= ~kCpuHasAVX2;
+  }
+  if (TestEnv("LIBYUV_DISABLE_ERMS")) {
+    cpu_info_ &= ~kCpuHasERMS;
+  }
+  if (TestEnv("LIBYUV_DISABLE_FMA3")) {
+    cpu_info_ &= ~kCpuHasFMA3;
+  }
+#elif defined(__mips__) && defined(__linux__)
+  // Linux mips parse text file for dsp detect.
+  cpu_info_ = MipsCpuCaps("dsp");  // set kCpuHasMIPS_DSP.
+#if defined(__mips_dspr2)
+  cpu_info_ |= kCpuHasMIPS_DSPR2;
+#endif
+  cpu_info_ |= kCpuHasMIPS;
+
+  if (getenv("LIBYUV_DISABLE_MIPS")) {
+    cpu_info_ &= ~kCpuHasMIPS;
+  }
+  if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
+    cpu_info_ &= ~kCpuHasMIPS_DSP;
+  }
+  if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
+    cpu_info_ &= ~kCpuHasMIPS_DSPR2;
+  }
+#elif defined(__arm__) || defined(__aarch64__)
+// gcc -mfpu=neon defines __ARM_NEON__
+// __ARM_NEON__ generates code that requires Neon.  NaCL also requires Neon.
+// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
+#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
+  cpu_info_ = kCpuHasNEON;
+// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
+// flag in it.
+// So for aarch64, neon enabling is hard coded here.
+#elif defined(__aarch64__)
+  cpu_info_ = kCpuHasNEON;
+#else
+  // Linux arm parse text file for neon detect.
+  cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
+#endif
+  cpu_info_ |= kCpuHasARM;
+  if (TestEnv("LIBYUV_DISABLE_NEON")) {
+    cpu_info_ &= ~kCpuHasNEON;
+  }
+#endif  // __arm__
+  if (TestEnv("LIBYUV_DISABLE_ASM")) {
+    cpu_info_ = 0;
+  }
+  return cpu_info_;
+}
+
+LIBYUV_API
+void MaskCpuFlags(int enable_flags) {
+  cpu_info_ = InitCpuFlags() & enable_flags;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/format_conversion.cc b/src/main/jni/libyuv/source/format_conversion.cc
new file mode 100644
index 000000000..3c1737153
--- /dev/null
+++ b/src/main/jni/libyuv/source/format_conversion.cc
@@ -0,0 +1,554 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/format_conversion.h"
+
+#include "libyuv/basic_types.h"
+#include "libyuv/cpu_id.h"
+#include "libyuv/video_common.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// generate a selector mask useful for pshufb
+static uint32 GenerateSelector(int select0, int select1) {
+  return (uint32)(select0) |
+         (uint32)((select1 + 4) << 8) |
+         (uint32)((select0 + 8) << 16) |
+         (uint32)((select1 + 12) << 24);
+}
+
+static int MakeSelectors(const int blue_index,
+                         const int green_index,
+                         const int red_index,
+                         uint32 dst_fourcc_bayer,
+                         uint32* index_map) {
+  // Now build a lookup table containing the indices for the four pixels in each
+  // 2x2 Bayer grid.
+  switch (dst_fourcc_bayer) {
+    case FOURCC_BGGR:
+      index_map[0] = GenerateSelector(blue_index, green_index);
+      index_map[1] = GenerateSelector(green_index, red_index);
+      break;
+    case FOURCC_GBRG:
+      index_map[0] = GenerateSelector(green_index, blue_index);
+      index_map[1] = GenerateSelector(red_index, green_index);
+      break;
+    case FOURCC_RGGB:
+      index_map[0] = GenerateSelector(red_index, green_index);
+      index_map[1] = GenerateSelector(green_index, blue_index);
+      break;
+    case FOURCC_GRBG:
+      index_map[0] = GenerateSelector(green_index, red_index);
+      index_map[1] = GenerateSelector(blue_index, green_index);
+      break;
+    default:
+      return -1;  // Bad FourCC
+  }
+  return 0;
+}
+
+// Converts 32 bit ARGB to Bayer RGB formats.
+LIBYUV_API
+int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_bayer, int dst_stride_bayer,
+                int width, int height,
+                uint32 dst_fourcc_bayer) {
+  int y;
+  const int blue_index = 0;  // Offsets for ARGB format
+  const int green_index = 1;
+  const int red_index = 2;
+  uint32 index_map[2];
+  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
+                         uint32 selector, int pix) = ARGBToBayerRow_C;
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+#if defined(HAS_ARGBTOBAYERROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
+    }
+  }
+#elif defined(HAS_ARGBTOBAYERROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToBayerRow = ARGBToBayerRow_NEON;
+    }
+  }
+#endif
+  if (MakeSelectors(blue_index, green_index, red_index,
+                    dst_fourcc_bayer, index_map)) {
+    return -1;  // Bad FourCC
+  }
+
+  for (y = 0; y < height; ++y) {
+    ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
+    src_argb += src_stride_argb;
+    dst_bayer += dst_stride_bayer;
+  }
+  return 0;
+}
+
+#define AVG(a, b) (((a) + (b)) >> 1)
+
+static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
+                       uint8* dst_argb, int pix) {
+  const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
+  uint8 g = src_bayer0[1];
+  uint8 r = src_bayer1[1];
+  int x;
+  for (x = 0; x < pix - 2; x += 2) {
+    dst_argb[0] = src_bayer0[0];
+    dst_argb[1] = AVG(g, src_bayer0[1]);
+    dst_argb[2] = AVG(r, src_bayer1[1]);
+    dst_argb[3] = 255U;
+    dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
+    dst_argb[5] = src_bayer0[1];
+    dst_argb[6] = src_bayer1[1];
+    dst_argb[7] = 255U;
+    g = src_bayer0[1];
+    r = src_bayer1[1];
+    src_bayer0 += 2;
+    src_bayer1 += 2;
+    dst_argb += 8;
+  }
+  dst_argb[0] = src_bayer0[0];
+  dst_argb[1] = AVG(g, src_bayer0[1]);
+  dst_argb[2] = AVG(r, src_bayer1[1]);
+  dst_argb[3] = 255U;
+  if (!(pix & 1)) {
+    dst_argb[4] = src_bayer0[0];
+    dst_argb[5] = src_bayer0[1];
+    dst_argb[6] = src_bayer1[1];
+    dst_argb[7] = 255U;
+  }
+}
+
+static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
+                       uint8* dst_argb, int pix) {
+  const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
+  uint8 g = src_bayer0[1];
+  uint8 b = src_bayer1[1];
+  int x;
+  for (x = 0; x < pix - 2; x += 2) {
+    dst_argb[0] = AVG(b, src_bayer1[1]);
+    dst_argb[1] = AVG(g, src_bayer0[1]);
+    dst_argb[2] = src_bayer0[0];
+    dst_argb[3] = 255U;
+    dst_argb[4] = src_bayer1[1];
+    dst_argb[5] = src_bayer0[1];
+    dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
+    dst_argb[7] = 255U;
+    g = src_bayer0[1];
+    b = src_bayer1[1];
+    src_bayer0 += 2;
+    src_bayer1 += 2;
+    dst_argb += 8;
+  }
+  dst_argb[0] = AVG(b, src_bayer1[1]);
+  dst_argb[1] = AVG(g, src_bayer0[1]);
+  dst_argb[2] = src_bayer0[0];
+  dst_argb[3] = 255U;
+  if (!(pix & 1)) {
+    dst_argb[4] = src_bayer1[1];
+    dst_argb[5] = src_bayer0[1];
+    dst_argb[6] = src_bayer0[0];
+    dst_argb[7] = 255U;
+  }
+}
+
+static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
+                       uint8* dst_argb, int pix) {
+  const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
+  uint8 b = src_bayer0[1];
+  int x;
+  for (x = 0; x < pix - 2; x += 2) {
+    dst_argb[0] = AVG(b, src_bayer0[1]);
+    dst_argb[1] = src_bayer0[0];
+    dst_argb[2] = src_bayer1[0];
+    dst_argb[3] = 255U;
+    dst_argb[4] = src_bayer0[1];
+    dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
+    dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
+    dst_argb[7] = 255U;
+    b = src_bayer0[1];
+    src_bayer0 += 2;
+    src_bayer1 += 2;
+    dst_argb += 8;
+  }
+  dst_argb[0] = AVG(b, src_bayer0[1]);
+  dst_argb[1] = src_bayer0[0];
+  dst_argb[2] = src_bayer1[0];
+  dst_argb[3] = 255U;
+  if (!(pix & 1)) {
+    dst_argb[4] = src_bayer0[1];
+    dst_argb[5] = src_bayer0[0];
+    dst_argb[6] = src_bayer1[0];
+    dst_argb[7] = 255U;
+  }
+}
+
+static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
+                       uint8* dst_argb, int pix) {
+  const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
+  uint8 r = src_bayer0[1];
+  int x;
+  for (x = 0; x < pix - 2; x += 2) {
+    dst_argb[0] = src_bayer1[0];
+    dst_argb[1] = src_bayer0[0];
+    dst_argb[2] = AVG(r, src_bayer0[1]);
+    dst_argb[3] = 255U;
+    dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
+    dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
+    dst_argb[6] = src_bayer0[1];
+    dst_argb[7] = 255U;
+    r = src_bayer0[1];
+    src_bayer0 += 2;
+    src_bayer1 += 2;
+    dst_argb += 8;
+  }
+  dst_argb[0] = src_bayer1[0];
+  dst_argb[1] = src_bayer0[0];
+  dst_argb[2] = AVG(r, src_bayer0[1]);
+  dst_argb[3] = 255U;
+  if (!(pix & 1)) {
+    dst_argb[4] = src_bayer1[0];
+    dst_argb[5] = src_bayer0[0];
+    dst_argb[6] = src_bayer0[1];
+    dst_argb[7] = 255U;
+  }
+}
+
+// Converts any Bayer RGB format to ARGB.
+LIBYUV_API
+int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height,
+                uint32 src_fourcc_bayer) {
+  int y;
+  void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int pix);
+  void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int pix);
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  switch (src_fourcc_bayer) {
+    case FOURCC_BGGR:
+      BayerRow0 = BayerRowBG;
+      BayerRow1 = BayerRowGR;
+      break;
+    case FOURCC_GBRG:
+      BayerRow0 = BayerRowGB;
+      BayerRow1 = BayerRowRG;
+      break;
+    case FOURCC_GRBG:
+      BayerRow0 = BayerRowGR;
+      BayerRow1 = BayerRowBG;
+      break;
+    case FOURCC_RGGB:
+      BayerRow0 = BayerRowRG;
+      BayerRow1 = BayerRowGB;
+      break;
+    default:
+      return -1;    // Bad FourCC
+  }
+
+  for (y = 0; y < height - 1; y += 2) {
+    BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
+    BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
+              dst_argb + dst_stride_argb, width);
+    src_bayer += src_stride_bayer * 2;
+    dst_argb += dst_stride_argb * 2;
+  }
+  if (height & 1) {
+    BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
+  }
+  return 0;
+}
+
+// Converts any Bayer RGB format to ARGB.
+LIBYUV_API
+int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
+                uint8* dst_y, int dst_stride_y,
+                uint8* dst_u, int dst_stride_u,
+                uint8* dst_v, int dst_stride_v,
+                int width, int height,
+                uint32 src_fourcc_bayer) {
+  void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int pix);
+  void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int pix);
+
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      ARGBToYRow_C;
+  // Negative height means invert the image.
+  if (height < 0) {
+    int halfheight;
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    dst_y = dst_y + (height - 1) * dst_stride_y;
+    dst_u = dst_u + (halfheight - 1) * dst_stride_u;
+    dst_v = dst_v + (halfheight - 1) * dst_stride_v;
+    dst_stride_y = -dst_stride_y;
+    dst_stride_u = -dst_stride_u;
+    dst_stride_v = -dst_stride_v;
+  }
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
+    ARGBToYRow = ARGBToYRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_ARGBTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToYRow = ARGBToYRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToYRow = ARGBToYRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
+    ARGBToUVRow = ARGBToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_NEON;
+    }
+  }
+#endif
+
+  switch (src_fourcc_bayer) {
+    case FOURCC_BGGR:
+      BayerRow0 = BayerRowBG;
+      BayerRow1 = BayerRowGR;
+      break;
+    case FOURCC_GBRG:
+      BayerRow0 = BayerRowGB;
+      BayerRow1 = BayerRowRG;
+      break;
+    case FOURCC_GRBG:
+      BayerRow0 = BayerRowGR;
+      BayerRow1 = BayerRowBG;
+      break;
+    case FOURCC_RGGB:
+      BayerRow0 = BayerRowRG;
+      BayerRow1 = BayerRowGB;
+      break;
+    default:
+      return -1;  // Bad FourCC
+  }
+
+  {
+    // Allocate 2 rows of ARGB.
+    const int kRowSize = (width * 4 + 15) & ~15;
+    align_buffer_64(row, kRowSize * 2);
+    int y;
+    for (y = 0; y < height - 1; y += 2) {
+      BayerRow0(src_bayer, src_stride_bayer, row, width);
+      BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
+                row + kRowSize, width);
+      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
+      src_bayer += src_stride_bayer * 2;
+      dst_y += dst_stride_y * 2;
+      dst_u += dst_stride_u;
+      dst_v += dst_stride_v;
+    }
+    if (height & 1) {
+      BayerRow0(src_bayer, src_stride_bayer, row, width);
+      ARGBToUVRow(row, 0, dst_u, dst_v, width);
+      ARGBToYRow(row, dst_y, width);
+    }
+    free_aligned_buffer_64(row);
+  }
+  return 0;
+}
+
+// Convert I420 to Bayer.
+LIBYUV_API
+int I420ToBayer(const uint8* src_y, int src_stride_y,
+                const uint8* src_u, int src_stride_u,
+                const uint8* src_v, int src_stride_v,
+                uint8* dst_bayer, int dst_stride_bayer,
+                int width, int height,
+                uint32 dst_fourcc_bayer) {
+  void (*I422ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToARGBRow_C;
+  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
+                         uint32 selector, int pix) = ARGBToBayerRow_C;
+  const int blue_index = 0;  // Offsets for ARGB format
+  const int green_index = 1;
+  const int red_index = 2;
+  uint32 index_map[2];
+  // Negative height means invert the image.
+  if (height < 0) {
+    int halfheight;
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (halfheight - 1) * src_stride_u;
+    src_v = src_v + (halfheight - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToARGBRow = I422ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+  }
+#endif
+
+#if defined(HAS_ARGBTOBAYERROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
+    }
+  }
+#elif defined(HAS_ARGBTOBAYERROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToBayerRow = ARGBToBayerRow_NEON;
+    }
+  }
+#endif
+
+  if (MakeSelectors(blue_index, green_index, red_index,
+                    dst_fourcc_bayer, index_map)) {
+    return -1;  // Bad FourCC
+  }
+  {
+    // Allocate a row of ARGB.
+    align_buffer_64(row, width * 4);
+    int y;
+    for (y = 0; y < height; ++y) {
+      I422ToARGBRow(src_y, src_u, src_v, row, width);
+      ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
+      dst_bayer += dst_stride_bayer;
+      src_y += src_stride_y;
+      if (y & 1) {
+        src_u += src_stride_u;
+        src_v += src_stride_v;
+      }
+    }
+    free_aligned_buffer_64(row);
+  }
+  return 0;
+}
+
+#define MAKEBAYERFOURCC(BAYER)                                                 \
+LIBYUV_API                                                                     \
+int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer,         \
+                         uint8* dst_y, int dst_stride_y,                       \
+                         uint8* dst_u, int dst_stride_u,                       \
+                         uint8* dst_v, int dst_stride_v,                       \
+                         int width, int height) {                              \
+  return BayerToI420(src_bayer, src_stride_bayer,                              \
+                     dst_y, dst_stride_y,                                      \
+                     dst_u, dst_stride_u,                                      \
+                     dst_v, dst_stride_v,                                      \
+                     width, height,                                            \
+                     FOURCC_##BAYER);                                          \
+}                                                                              \
+                                                                               \
+LIBYUV_API                                                                     \
+int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y,                   \
+                       const uint8* src_u, int src_stride_u,                   \
+                       const uint8* src_v, int src_stride_v,                   \
+                       uint8* dst_bayer, int dst_stride_bayer,                 \
+                       int width, int height) {                                \
+  return I420ToBayer(src_y, src_stride_y,                                      \
+                     src_u, src_stride_u,                                      \
+                     src_v, src_stride_v,                                      \
+                     dst_bayer, dst_stride_bayer,                              \
+                     width, height,                                            \
+                     FOURCC_##BAYER);                                          \
+}                                                                              \
+                                                                               \
+LIBYUV_API                                                                     \
+int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb,             \
+                       uint8* dst_bayer, int dst_stride_bayer,                 \
+                       int width, int height) {                                \
+  return ARGBToBayer(src_argb, src_stride_argb,                                \
+                     dst_bayer, dst_stride_bayer,                              \
+                     width, height,                                            \
+                     FOURCC_##BAYER);                                          \
+}                                                                              \
+                                                                               \
+LIBYUV_API                                                                     \
+int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer,         \
+                         uint8* dst_argb, int dst_stride_argb,                 \
+                         int width, int height) {                              \
+  return BayerToARGB(src_bayer, src_stride_bayer,                              \
+                     dst_argb, dst_stride_argb,                                \
+                     width, height,                                            \
+                     FOURCC_##BAYER);                                          \
+}
+
+MAKEBAYERFOURCC(BGGR)
+MAKEBAYERFOURCC(GBRG)
+MAKEBAYERFOURCC(GRBG)
+MAKEBAYERFOURCC(RGGB)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/mjpeg_decoder.cc b/src/main/jni/libyuv/source/mjpeg_decoder.cc
new file mode 100644
index 000000000..36028c3cc
--- /dev/null
+++ b/src/main/jni/libyuv/source/mjpeg_decoder.cc
@@ -0,0 +1,566 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/mjpeg_decoder.h"
+
+#ifdef HAVE_JPEG
+#include <assert.h>
+
+#if !defined(__pnacl__) && !defined(__CLR_VER) && \
+    !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
+// Must be included before jpeglib.
+#include <setjmp.h>
+#define HAVE_SETJMP
+#endif
+struct FILE;  // For jpeglib.h.
+
+// C++ build requires extern C for jpeg internals.
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <jpeglib.h>
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#include "libyuv/planar_functions.h"  // For CopyPlane().
+
+namespace libyuv {
+
+#ifdef HAVE_SETJMP
+struct SetJmpErrorMgr {
+  jpeg_error_mgr base;  // Must be at the top
+  jmp_buf setjmp_buffer;
+};
+#endif
+
+const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
+const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
+const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
+const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
+const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
+const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
+
+// Methods that are passed to jpeglib.
+boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
+void init_source(jpeg_decompress_struct* cinfo);
+void skip_input_data(jpeg_decompress_struct* cinfo,
+                     long num_bytes);  // NOLINT
+void term_source(jpeg_decompress_struct* cinfo);
+void ErrorHandler(jpeg_common_struct* cinfo);
+
+MJpegDecoder::MJpegDecoder()
+    : has_scanline_padding_(LIBYUV_FALSE),
+      num_outbufs_(0),
+      scanlines_(NULL),
+      scanlines_sizes_(NULL),
+      databuf_(NULL),
+      databuf_strides_(NULL) {
+  decompress_struct_ = new jpeg_decompress_struct;
+  source_mgr_ = new jpeg_source_mgr;
+#ifdef HAVE_SETJMP
+  error_mgr_ = new SetJmpErrorMgr;
+  decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
+  // Override standard exit()-based error handler.
+  error_mgr_->base.error_exit = &ErrorHandler;
+#endif
+  decompress_struct_->client_data = NULL;
+  source_mgr_->init_source = &init_source;
+  source_mgr_->fill_input_buffer = &fill_input_buffer;
+  source_mgr_->skip_input_data = &skip_input_data;
+  source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
+  source_mgr_->term_source = &term_source;
+  jpeg_create_decompress(decompress_struct_);
+  decompress_struct_->src = source_mgr_;
+  buf_vec_.buffers = &buf_;
+  buf_vec_.len = 1;
+}
+
+MJpegDecoder::~MJpegDecoder() {
+  jpeg_destroy_decompress(decompress_struct_);
+  delete decompress_struct_;
+  delete source_mgr_;
+#ifdef HAVE_SETJMP
+  delete error_mgr_;
+#endif
+  DestroyOutputBuffers();
+}
+
+LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
+  if (!ValidateJpeg(src, src_len)) {
+    return LIBYUV_FALSE;
+  }
+
+  buf_.data = src;
+  buf_.len = static_cast<int>(src_len);
+  buf_vec_.pos = 0;
+  decompress_struct_->client_data = &buf_vec_;
+#ifdef HAVE_SETJMP
+  if (setjmp(error_mgr_->setjmp_buffer)) {
+    // We called jpeg_read_header, it experienced an error, and we called
+    // longjmp() and rewound the stack to here. Return error.
+    return LIBYUV_FALSE;
+  }
+#endif
+  if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
+    // ERROR: Bad MJPEG header
+    return LIBYUV_FALSE;
+  }
+  AllocOutputBuffers(GetNumComponents());
+  for (int i = 0; i < num_outbufs_; ++i) {
+    int scanlines_size = GetComponentScanlinesPerImcuRow(i);
+    if (scanlines_sizes_[i] != scanlines_size) {
+      if (scanlines_[i]) {
+        delete scanlines_[i];
+      }
+      scanlines_[i] = new uint8* [scanlines_size];
+      scanlines_sizes_[i] = scanlines_size;
+    }
+
+    // We allocate padding for the final scanline to pad it up to DCTSIZE bytes
+    // to avoid memory errors, since jpeglib only reads full MCUs blocks. For
+    // the preceding scanlines, the padding is not needed/wanted because the
+    // following addresses will already be valid (they are the initial bytes of
+    // the next scanline) and will be overwritten when jpeglib writes out that
+    // next scanline.
+    int databuf_stride = GetComponentStride(i);
+    int databuf_size = scanlines_size * databuf_stride;
+    if (databuf_strides_[i] != databuf_stride) {
+      if (databuf_[i]) {
+        delete databuf_[i];
+      }
+      databuf_[i] = new uint8[databuf_size];
+      databuf_strides_[i] = databuf_stride;
+    }
+
+    if (GetComponentStride(i) != GetComponentWidth(i)) {
+      has_scanline_padding_ = LIBYUV_TRUE;
+    }
+  }
+  return LIBYUV_TRUE;
+}
+
+static int DivideAndRoundUp(int numerator, int denominator) {
+  return (numerator + denominator - 1) / denominator;
+}
+
+static int DivideAndRoundDown(int numerator, int denominator) {
+  return numerator / denominator;
+}
+
+// Returns width of the last loaded frame.
+int MJpegDecoder::GetWidth() {
+  return decompress_struct_->image_width;
+}
+
+// Returns height of the last loaded frame.
+int MJpegDecoder::GetHeight() {
+  return decompress_struct_->image_height;
+}
+
+// Returns format of the last loaded frame. The return value is one of the
+// kColorSpace* constants.
+int MJpegDecoder::GetColorSpace() {
+  return decompress_struct_->jpeg_color_space;
+}
+
+// Number of color components in the color space.
+int MJpegDecoder::GetNumComponents() {
+  return decompress_struct_->num_components;
+}
+
+// Sample factors of the n-th component.
+int MJpegDecoder::GetHorizSampFactor(int component) {
+  return decompress_struct_->comp_info[component].h_samp_factor;
+}
+
+int MJpegDecoder::GetVertSampFactor(int component) {
+  return decompress_struct_->comp_info[component].v_samp_factor;
+}
+
+int MJpegDecoder::GetHorizSubSampFactor(int component) {
+  return decompress_struct_->max_h_samp_factor /
+      GetHorizSampFactor(component);
+}
+
+int MJpegDecoder::GetVertSubSampFactor(int component) {
+  return decompress_struct_->max_v_samp_factor /
+      GetVertSampFactor(component);
+}
+
+int MJpegDecoder::GetImageScanlinesPerImcuRow() {
+  return decompress_struct_->max_v_samp_factor * DCTSIZE;
+}
+
+int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
+  int vs = GetVertSubSampFactor(component);
+  return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
+}
+
+int MJpegDecoder::GetComponentWidth(int component) {
+  int hs = GetHorizSubSampFactor(component);
+  return DivideAndRoundUp(GetWidth(), hs);
+}
+
+int MJpegDecoder::GetComponentHeight(int component) {
+  int vs = GetVertSubSampFactor(component);
+  return DivideAndRoundUp(GetHeight(), vs);
+}
+
+// Get width in bytes padded out to a multiple of DCTSIZE
+int MJpegDecoder::GetComponentStride(int component) {
+  return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
+}
+
+int MJpegDecoder::GetComponentSize(int component) {
+  return GetComponentWidth(component) * GetComponentHeight(component);
+}
+
+LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
+#ifdef HAVE_SETJMP
+  if (setjmp(error_mgr_->setjmp_buffer)) {
+    // We called jpeg_abort_decompress, it experienced an error, and we called
+    // longjmp() and rewound the stack to here. Return error.
+    return LIBYUV_FALSE;
+  }
+#endif
+  jpeg_abort_decompress(decompress_struct_);
+  return LIBYUV_TRUE;
+}
+
+// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
+LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
+    uint8** planes, int dst_width, int dst_height) {
+  if (dst_width != GetWidth() ||
+      dst_height > GetHeight()) {
+    // ERROR: Bad dimensions
+    return LIBYUV_FALSE;
+  }
+#ifdef HAVE_SETJMP
+  if (setjmp(error_mgr_->setjmp_buffer)) {
+    // We called into jpeglib, it experienced an error sometime during this
+    // function call, and we called longjmp() and rewound the stack to here.
+    // Return error.
+    return LIBYUV_FALSE;
+  }
+#endif
+  if (!StartDecode()) {
+    return LIBYUV_FALSE;
+  }
+  SetScanlinePointers(databuf_);
+  int lines_left = dst_height;
+  // Compute amount of lines to skip to implement vertical crop.
+  // TODO(fbarchard): Ensure skip is a multiple of maximum component
+  // subsample. ie 2
+  int skip = (GetHeight() - dst_height) / 2;
+  if (skip > 0) {
+    // There is no API to skip lines in the output data, so we read them
+    // into the temp buffer.
+    while (skip >= GetImageScanlinesPerImcuRow()) {
+      if (!DecodeImcuRow()) {
+        FinishDecode();
+        return LIBYUV_FALSE;
+      }
+      skip -= GetImageScanlinesPerImcuRow();
+    }
+    if (skip > 0) {
+      // Have a partial iMCU row left over to skip. Must read it and then
+      // copy the parts we want into the destination.
+      if (!DecodeImcuRow()) {
+        FinishDecode();
+        return LIBYUV_FALSE;
+      }
+      for (int i = 0; i < num_outbufs_; ++i) {
+        // TODO(fbarchard): Compute skip to avoid this
+        assert(skip % GetVertSubSampFactor(i) == 0);
+        int rows_to_skip =
+            DivideAndRoundDown(skip, GetVertSubSampFactor(i));
+        int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
+                                rows_to_skip;
+        int data_to_skip = rows_to_skip * GetComponentStride(i);
+        CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
+                  planes[i], GetComponentWidth(i),
+                  GetComponentWidth(i), scanlines_to_copy);
+        planes[i] += scanlines_to_copy * GetComponentWidth(i);
+      }
+      lines_left -= (GetImageScanlinesPerImcuRow() - skip);
+    }
+  }
+
+  // Read full MCUs but cropped horizontally
+  for (; lines_left > GetImageScanlinesPerImcuRow();
+         lines_left -= GetImageScanlinesPerImcuRow()) {
+    if (!DecodeImcuRow()) {
+      FinishDecode();
+      return LIBYUV_FALSE;
+    }
+    for (int i = 0; i < num_outbufs_; ++i) {
+      int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
+      CopyPlane(databuf_[i], GetComponentStride(i),
+                planes[i], GetComponentWidth(i),
+                GetComponentWidth(i), scanlines_to_copy);
+      planes[i] += scanlines_to_copy * GetComponentWidth(i);
+    }
+  }
+
+  if (lines_left > 0) {
+    // Have a partial iMCU row left over to decode.
+    if (!DecodeImcuRow()) {
+      FinishDecode();
+      return LIBYUV_FALSE;
+    }
+    for (int i = 0; i < num_outbufs_; ++i) {
+      int scanlines_to_copy =
+          DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
+      CopyPlane(databuf_[i], GetComponentStride(i),
+                planes[i], GetComponentWidth(i),
+                GetComponentWidth(i), scanlines_to_copy);
+      planes[i] += scanlines_to_copy * GetComponentWidth(i);
+    }
+  }
+  return FinishDecode();
+}
+
+LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
+    int dst_width, int dst_height) {
+  if (dst_width != GetWidth() ||
+      dst_height > GetHeight()) {
+    // ERROR: Bad dimensions
+    return LIBYUV_FALSE;
+  }
+#ifdef HAVE_SETJMP
+  if (setjmp(error_mgr_->setjmp_buffer)) {
+    // We called into jpeglib, it experienced an error sometime during this
+    // function call, and we called longjmp() and rewound the stack to here.
+    // Return error.
+    return LIBYUV_FALSE;
+  }
+#endif
+  if (!StartDecode()) {
+    return LIBYUV_FALSE;
+  }
+  SetScanlinePointers(databuf_);
+  int lines_left = dst_height;
+  // TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
+  int skip = (GetHeight() - dst_height) / 2;
+  if (skip > 0) {
+    while (skip >= GetImageScanlinesPerImcuRow()) {
+      if (!DecodeImcuRow()) {
+        FinishDecode();
+        return LIBYUV_FALSE;
+      }
+      skip -= GetImageScanlinesPerImcuRow();
+    }
+    if (skip > 0) {
+      // Have a partial iMCU row left over to skip.
+      if (!DecodeImcuRow()) {
+        FinishDecode();
+        return LIBYUV_FALSE;
+      }
+      for (int i = 0; i < num_outbufs_; ++i) {
+        // TODO(fbarchard): Compute skip to avoid this
+        assert(skip % GetVertSubSampFactor(i) == 0);
+        int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
+        int data_to_skip = rows_to_skip * GetComponentStride(i);
+        // Change our own data buffer pointers so we can pass them to the
+        // callback.
+        databuf_[i] += data_to_skip;
+      }
+      int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
+      (*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
+      // Now change them back.
+      for (int i = 0; i < num_outbufs_; ++i) {
+        int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
+        int data_to_skip = rows_to_skip * GetComponentStride(i);
+        databuf_[i] -= data_to_skip;
+      }
+      lines_left -= scanlines_to_copy;
+    }
+  }
+  // Read full MCUs until we get to the crop point.
+  for (; lines_left >= GetImageScanlinesPerImcuRow();
+         lines_left -= GetImageScanlinesPerImcuRow()) {
+    if (!DecodeImcuRow()) {
+      FinishDecode();
+      return LIBYUV_FALSE;
+    }
+    (*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
+  }
+  if (lines_left > 0) {
+    // Have a partial iMCU row left over to decode.
+    if (!DecodeImcuRow()) {
+      FinishDecode();
+      return LIBYUV_FALSE;
+    }
+    (*fn)(opaque, databuf_, databuf_strides_, lines_left);
+  }
+  return FinishDecode();
+}
+
+void init_source(j_decompress_ptr cinfo) {
+  fill_input_buffer(cinfo);
+}
+
+boolean fill_input_buffer(j_decompress_ptr cinfo) {
+  BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
+  if (buf_vec->pos >= buf_vec->len) {
+    assert(0 && "No more data");
+    // ERROR: No more data
+    return FALSE;
+  }
+  cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
+  cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
+  ++buf_vec->pos;
+  return TRUE;
+}
+
+void skip_input_data(j_decompress_ptr cinfo,
+                     long num_bytes) {  // NOLINT
+  cinfo->src->next_input_byte += num_bytes;
+}
+
+void term_source(j_decompress_ptr cinfo) {
+  // Nothing to do.
+}
+
+#ifdef HAVE_SETJMP
+void ErrorHandler(j_common_ptr cinfo) {
+  // This is called when a jpeglib command experiences an error. Unfortunately
+  // jpeglib's error handling model is not very flexible, because it expects the
+  // error handler to not return--i.e., it wants the program to terminate. To
+  // recover from errors we use setjmp() as shown in their example. setjmp() is
+  // C's implementation for the "call with current continuation" functionality
+  // seen in some functional programming languages.
+  // A formatted message can be output, but is unsafe for release.
+#ifdef DEBUG
+  char buf[JMSG_LENGTH_MAX];
+  (*cinfo->err->format_message)(cinfo, buf);
+  // ERROR: Error in jpeglib: buf
+#endif
+
+  SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
+  // This rewinds the call stack to the point of the corresponding setjmp()
+  // and causes it to return (for a second time) with value 1.
+  longjmp(mgr->setjmp_buffer, 1);
+}
+#endif
+
+void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
+  if (num_outbufs != num_outbufs_) {
+    // We could perhaps optimize this case to resize the output buffers without
+    // necessarily having to delete and recreate each one, but it's not worth
+    // it.
+    DestroyOutputBuffers();
+
+    scanlines_ = new uint8** [num_outbufs];
+    scanlines_sizes_ = new int[num_outbufs];
+    databuf_ = new uint8* [num_outbufs];
+    databuf_strides_ = new int[num_outbufs];
+
+    for (int i = 0; i < num_outbufs; ++i) {
+      scanlines_[i] = NULL;
+      scanlines_sizes_[i] = 0;
+      databuf_[i] = NULL;
+      databuf_strides_[i] = 0;
+    }
+
+    num_outbufs_ = num_outbufs;
+  }
+}
+
+void MJpegDecoder::DestroyOutputBuffers() {
+  for (int i = 0; i < num_outbufs_; ++i) {
+    delete [] scanlines_[i];
+    delete [] databuf_[i];
+  }
+  delete [] scanlines_;
+  delete [] databuf_;
+  delete [] scanlines_sizes_;
+  delete [] databuf_strides_;
+  scanlines_ = NULL;
+  databuf_ = NULL;
+  scanlines_sizes_ = NULL;
+  databuf_strides_ = NULL;
+  num_outbufs_ = 0;
+}
+
+// JDCT_IFAST and do_block_smoothing improve performance substantially.
+LIBYUV_BOOL MJpegDecoder::StartDecode() {
+  decompress_struct_->raw_data_out = TRUE;
+  decompress_struct_->dct_method = JDCT_IFAST;  // JDCT_ISLOW is default
+  decompress_struct_->dither_mode = JDITHER_NONE;
+  // Not applicable to 'raw':
+  decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE);
+  // Only for buffered mode:
+  decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE);
+  // Blocky but fast:
+  decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE);
+
+  if (!jpeg_start_decompress(decompress_struct_)) {
+    // ERROR: Couldn't start JPEG decompressor";
+    return LIBYUV_FALSE;
+  }
+  return LIBYUV_TRUE;
+}
+
+LIBYUV_BOOL MJpegDecoder::FinishDecode() {
+  // jpeglib considers it an error if we finish without decoding the whole
+  // image, so we call "abort" rather than "finish".
+  jpeg_abort_decompress(decompress_struct_);
+  return LIBYUV_TRUE;
+}
+
+void MJpegDecoder::SetScanlinePointers(uint8** data) {
+  for (int i = 0; i < num_outbufs_; ++i) {
+    uint8* data_i = data[i];
+    for (int j = 0; j < scanlines_sizes_[i]; ++j) {
+      scanlines_[i][j] = data_i;
+      data_i += GetComponentStride(i);
+    }
+  }
+}
+
+inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
+  return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
+      jpeg_read_raw_data(decompress_struct_,
+                         scanlines_,
+                         GetImageScanlinesPerImcuRow());
+}
+
+// The helper function which recognizes the jpeg sub-sampling type.
+JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
+    int* subsample_x, int* subsample_y, int number_of_components) {
+  if (number_of_components == 3) {  // Color images.
+    if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
+        subsample_x[1] == 2 && subsample_y[1] == 2 &&
+        subsample_x[2] == 2 && subsample_y[2] == 2) {
+      return kJpegYuv420;
+    } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
+        subsample_x[1] == 2 && subsample_y[1] == 1 &&
+        subsample_x[2] == 2 && subsample_y[2] == 1) {
+      return kJpegYuv422;
+    } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
+        subsample_x[1] == 1 && subsample_y[1] == 1 &&
+        subsample_x[2] == 1 && subsample_y[2] == 1) {
+      return kJpegYuv444;
+    }
+  } else if (number_of_components == 1) {  // Grey-scale images.
+    if (subsample_x[0] == 1 && subsample_y[0] == 1) {
+      return kJpegYuv400;
+    }
+  }
+  return kJpegUnknown;
+}
+
+}  // namespace libyuv
+#endif  // HAVE_JPEG
+
diff --git a/src/main/jni/libyuv/source/mjpeg_validate.cc b/src/main/jni/libyuv/source/mjpeg_validate.cc
new file mode 100644
index 000000000..23d22d099
--- /dev/null
+++ b/src/main/jni/libyuv/source/mjpeg_validate.cc
@@ -0,0 +1,47 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/mjpeg_decoder.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Helper function to validate the jpeg appears intact.
+// TODO(fbarchard): Optimize case where SOI is found but EOI is not.
+LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
+  size_t i;
+  if (sample_size < 64) {
+    // ERROR: Invalid jpeg size: sample_size
+    return LIBYUV_FALSE;
+  }
+  if (sample[0] != 0xff || sample[1] != 0xd8) {  // Start Of Image
+    // ERROR: Invalid jpeg initial start code
+    return LIBYUV_FALSE;
+  }
+  for (i = sample_size - 2; i > 1;) {
+    if (sample[i] != 0xd9) {
+      if (sample[i] == 0xff && sample[i + 1] == 0xd9) {  // End Of Image
+        return LIBYUV_TRUE;  // Success: Valid jpeg.
+      }
+      --i;
+    }
+    --i;
+  }
+  // ERROR: Invalid jpeg end code not found. Size sample_size
+  return LIBYUV_FALSE;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
diff --git a/src/main/jni/libyuv/source/planar_functions.cc b/src/main/jni/libyuv/source/planar_functions.cc
new file mode 100644
index 000000000..3857008ca
--- /dev/null
+++ b/src/main/jni/libyuv/source/planar_functions.cc
@@ -0,0 +1,2291 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/planar_functions.h"
+
+#include <string.h>  // for memset()
+
+#include "libyuv/cpu_id.h"
+#ifdef HAVE_JPEG
+#include "libyuv/mjpeg_decoder.h"
+#endif
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Copy a plane of data
+LIBYUV_API
+void CopyPlane(const uint8* src_y, int src_stride_y,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height) {
+  int y;
+  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      dst_stride_y == width) {
+    width *= height;
+    height = 1;
+    src_stride_y = dst_stride_y = 0;
+  }
+  // Nothing to do.
+  if (src_y == dst_y && src_stride_y == dst_stride_y) {
+    return;
+  }
+#if defined(HAS_COPYROW_X86)
+  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
+    CopyRow = CopyRow_X86;
+  }
+#endif
+#if defined(HAS_COPYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
+      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    CopyRow = CopyRow_SSE2;
+  }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+  if (TestCpuFlag(kCpuHasERMS)) {
+    CopyRow = CopyRow_ERMS;
+  }
+#endif
+#if defined(HAS_COPYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+    CopyRow = CopyRow_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_MIPS)
+  if (TestCpuFlag(kCpuHasMIPS)) {
+    CopyRow = CopyRow_MIPS;
+  }
+#endif
+
+  // Copy plane
+  for (y = 0; y < height; ++y) {
+    CopyRow(src_y, dst_y, width);
+    src_y += src_stride_y;
+    dst_y += dst_stride_y;
+  }
+}
+
+LIBYUV_API
+void CopyPlane_16(const uint16* src_y, int src_stride_y,
+                  uint16* dst_y, int dst_stride_y,
+                  int width, int height) {
+  int y;
+  void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      dst_stride_y == width) {
+    width *= height;
+    height = 1;
+    src_stride_y = dst_stride_y = 0;
+  }
+#if defined(HAS_COPYROW_16_X86)
+  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
+    CopyRow = CopyRow_16_X86;
+  }
+#endif
+#if defined(HAS_COPYROW_16_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
+      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    CopyRow = CopyRow_16_SSE2;
+  }
+#endif
+#if defined(HAS_COPYROW_16_ERMS)
+  if (TestCpuFlag(kCpuHasERMS)) {
+    CopyRow = CopyRow_16_ERMS;
+  }
+#endif
+#if defined(HAS_COPYROW_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+    CopyRow = CopyRow_16_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_16_MIPS)
+  if (TestCpuFlag(kCpuHasMIPS)) {
+    CopyRow = CopyRow_16_MIPS;
+  }
+#endif
+
+  // Copy plane
+  for (y = 0; y < height; ++y) {
+    CopyRow(src_y, dst_y, width);
+    src_y += src_stride_y;
+    dst_y += dst_stride_y;
+  }
+}
+
+// Copy I422.
+LIBYUV_API
+int I422Copy(const uint8* src_y, int src_stride_y,
+             const uint8* src_u, int src_stride_u,
+             const uint8* src_v, int src_stride_v,
+             uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int width, int height) {
+  int halfwidth = (width + 1) >> 1;
+  if (!src_y || !src_u || !src_v ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (height - 1) * src_stride_u;
+    src_v = src_v + (height - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
+  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
+  return 0;
+}
+
+// Copy I444.
+LIBYUV_API
+int I444Copy(const uint8* src_y, int src_stride_y,
+             const uint8* src_u, int src_stride_u,
+             const uint8* src_v, int src_stride_v,
+             uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int width, int height) {
+  if (!src_y || !src_u || !src_v ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (height - 1) * src_stride_u;
+    src_v = src_v + (height - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+  return 0;
+}
+
+// Copy I400.
+LIBYUV_API
+int I400ToI400(const uint8* src_y, int src_stride_y,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height) {
+  if (!src_y || !dst_y || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_stride_y = -src_stride_y;
+  }
+  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  return 0;
+}
+
+// Convert I420 to I400.
+LIBYUV_API
+int I420ToI400(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height) {
+  if (!src_y || !dst_y || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_stride_y = -src_stride_y;
+  }
+  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  return 0;
+}
+
+// Mirror a plane of data.
+void MirrorPlane(const uint8* src_y, int src_stride_y,
+                 uint8* dst_y, int dst_stride_y,
+                 int width, int height) {
+  int y;
+  void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_stride_y = -src_stride_y;
+  }
+#if defined(HAS_MIRRORROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
+    MirrorRow = MirrorRow_NEON;
+  }
+#endif
+#if defined(HAS_MIRRORROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
+    MirrorRow = MirrorRow_SSE2;
+  }
+#endif
+#if defined(HAS_MIRRORROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    MirrorRow = MirrorRow_SSSE3;
+  }
+#endif
+#if defined(HAS_MIRRORROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
+    MirrorRow = MirrorRow_AVX2;
+  }
+#endif
+
+  // Mirror plane
+  for (y = 0; y < height; ++y) {
+    MirrorRow(src_y, dst_y, width);
+    src_y += src_stride_y;
+    dst_y += dst_stride_y;
+  }
+}
+
+// Convert YUY2 to I422.
+LIBYUV_API
+int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*YUY2ToUV422Row)(const uint8* src_yuy2,
+                         uint8* dst_u, uint8* dst_v, int pix) =
+      YUY2ToUV422Row_C;
+  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
+      YUY2ToYRow_C;
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
+    src_stride_yuy2 = -src_stride_yuy2;
+  }
+  // Coalesce rows.
+  if (src_stride_yuy2 == width * 2 &&
+      dst_stride_y == width &&
+      dst_stride_u * 2 == width &&
+      dst_stride_v * 2 == width) {
+    width *= height;
+    height = 1;
+    src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+  }
+#if defined(HAS_YUY2TOYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
+    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
+      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
+        YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          YUY2ToYRow = YUY2ToYRow_SSE2;
+        }
+      }
+    }
+  }
+#endif
+#if defined(HAS_YUY2TOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
+    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
+      YUY2ToYRow = YUY2ToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_YUY2TOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    YUY2ToYRow = YUY2ToYRow_Any_NEON;
+    if (width >= 16) {
+      YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
+    }
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToYRow = YUY2ToYRow_NEON;
+      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
+    YUY2ToYRow(src_yuy2, dst_y, width);
+    src_yuy2 += src_stride_yuy2;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
+// Convert UYVY to I422.
+LIBYUV_API
+int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  void (*UYVYToUV422Row)(const uint8* src_uyvy,
+                         uint8* dst_u, uint8* dst_v, int pix) =
+      UYVYToUV422Row_C;
+  void (*UYVYToYRow)(const uint8* src_uyvy,
+                     uint8* dst_y, int pix) = UYVYToYRow_C;
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
+    src_stride_uyvy = -src_stride_uyvy;
+  }
+  // Coalesce rows.
+  if (src_stride_uyvy == width * 2 &&
+      dst_stride_y == width &&
+      dst_stride_u * 2 == width &&
+      dst_stride_v * 2 == width) {
+    width *= height;
+    height = 1;
+    src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+  }
+#if defined(HAS_UYVYTOYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
+    UYVYToYRow = UYVYToYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
+      UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
+        UYVYToUV422Row = UYVYToUV422Row_SSE2;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          UYVYToYRow = UYVYToYRow_SSE2;
+        }
+      }
+    }
+  }
+#endif
+#if defined(HAS_UYVYTOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
+    UYVYToYRow = UYVYToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      UYVYToUV422Row = UYVYToUV422Row_AVX2;
+      UYVYToYRow = UYVYToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_UYVYTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    UYVYToYRow = UYVYToYRow_Any_NEON;
+    if (width >= 16) {
+      UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
+    }
+    if (IS_ALIGNED(width, 16)) {
+      UYVYToYRow = UYVYToYRow_NEON;
+      UYVYToUV422Row = UYVYToUV422Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
+    UYVYToYRow(src_uyvy, dst_y, width);
+    src_uyvy += src_stride_uyvy;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
+// Mirror I400 with optional flipping
+LIBYUV_API
+int I400Mirror(const uint8* src_y, int src_stride_y,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height) {
+  if (!src_y || !dst_y ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_stride_y = -src_stride_y;
+  }
+
+  MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  return 0;
+}
+
+// Mirror I420 with optional flipping
+LIBYUV_API
+int I420Mirror(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (halfheight - 1) * src_stride_u;
+    src_v = src_v + (halfheight - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  if (dst_y) {
+    MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  }
+  MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
+  MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
+  return 0;
+}
+
+// ARGB mirror.
+LIBYUV_API
+int ARGBMirror(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
+      ARGBMirrorRow_C;
+  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+
+#if defined(HAS_ARGBMIRRORROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBMirrorRow = ARGBMirrorRow_SSSE3;
+  }
+#endif
+#if defined(HAS_ARGBMIRRORROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
+    ARGBMirrorRow = ARGBMirrorRow_AVX2;
+  }
+#endif
+#if defined(HAS_ARGBMIRRORROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
+    ARGBMirrorRow = ARGBMirrorRow_NEON;
+  }
+#endif
+
+  // Mirror plane
+  for (y = 0; y < height; ++y) {
+    ARGBMirrorRow(src_argb, dst_argb, width);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Get a blender that optimized for the CPU, alignment and pixel count.
+// As there are 6 blenders to choose from, the caller should try to use
+// the same blend function for all pixels if possible.
+LIBYUV_API
+ARGBBlendRow GetARGBBlend() {
+  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
+                       uint8* dst_argb, int width) = ARGBBlendRow_C;
+#if defined(HAS_ARGBBLENDROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBBlendRow = ARGBBlendRow_SSSE3;
+    return ARGBBlendRow;
+  }
+#endif
+#if defined(HAS_ARGBBLENDROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    ARGBBlendRow = ARGBBlendRow_SSE2;
+  }
+#endif
+#if defined(HAS_ARGBBLENDROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBBlendRow = ARGBBlendRow_NEON;
+  }
+#endif
+  return ARGBBlendRow;
+}
+
+// Alpha Blend 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
+              const uint8* src_argb1, int src_stride_argb1,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height) {
+  int y;
+  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
+                       uint8* dst_argb, int width) = GetARGBBlend();
+  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb0 == width * 4 &&
+      src_stride_argb1 == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+  }
+
+  for (y = 0; y < height; ++y) {
+    ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
+    src_argb0 += src_stride_argb0;
+    src_argb1 += src_stride_argb1;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Multiply 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
+                 const uint8* src_argb1, int src_stride_argb1,
+                 uint8* dst_argb, int dst_stride_argb,
+                 int width, int height) {
+  int y;
+  void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
+                          int width) = ARGBMultiplyRow_C;
+  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb0 == width * 4 &&
+      src_stride_argb1 == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBMULTIPLYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+    ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBMULTIPLYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+    ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBMULTIPLYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBMultiplyRow = ARGBMultiplyRow_NEON;
+    }
+  }
+#endif
+
+  // Multiply plane
+  for (y = 0; y < height; ++y) {
+    ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
+    src_argb0 += src_stride_argb0;
+    src_argb1 += src_stride_argb1;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Add 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
+            const uint8* src_argb1, int src_stride_argb1,
+            uint8* dst_argb, int dst_stride_argb,
+            int width, int height) {
+  int y;
+  void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
+                     int width) = ARGBAddRow_C;
+  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb0 == width * 4 &&
+      src_stride_argb1 == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    ARGBAddRow = ARGBAddRow_SSE2;
+  }
+#endif
+#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+    ARGBAddRow = ARGBAddRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAddRow = ARGBAddRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBADDROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+    ARGBAddRow = ARGBAddRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAddRow = ARGBAddRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBADDROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBAddRow = ARGBAddRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAddRow = ARGBAddRow_NEON;
+    }
+  }
+#endif
+
+  // Add plane
+  for (y = 0; y < height; ++y) {
+    ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
+    src_argb0 += src_stride_argb0;
+    src_argb1 += src_stride_argb1;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Subtract 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
+                 const uint8* src_argb1, int src_stride_argb1,
+                 uint8* dst_argb, int dst_stride_argb,
+                 int width, int height) {
+  int y;
+  void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
+                          int width) = ARGBSubtractRow_C;
+  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb0 == width * 4 &&
+      src_stride_argb1 == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBSUBTRACTROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+    ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBSubtractRow = ARGBSubtractRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBSUBTRACTROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+    ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBSubtractRow = ARGBSubtractRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBSUBTRACTROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBSubtractRow = ARGBSubtractRow_NEON;
+    }
+  }
+#endif
+
+  // Subtract plane
+  for (y = 0; y < height; ++y) {
+    ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
+    src_argb0 += src_stride_argb0;
+    src_argb1 += src_stride_argb1;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert I422 to BGRA.
+LIBYUV_API
+int I422ToBGRA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_bgra, int dst_stride_bgra,
+               int width, int height) {
+  int y;
+  void (*I422ToBGRARow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToBGRARow_C;
+  if (!src_y || !src_u || !src_v ||
+      !dst_bgra ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
+    dst_stride_bgra = -dst_stride_bgra;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u * 2 == width &&
+      src_stride_v * 2 == width &&
+      dst_stride_bgra == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
+  }
+#if defined(HAS_I422TOBGRAROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422ToBGRARow = I422ToBGRARow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToBGRARow = I422ToBGRARow_NEON;
+    }
+  }
+#elif defined(HAS_I422TOBGRAROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
+        I422ToBGRARow = I422ToBGRARow_SSSE3;
+      }
+    }
+  }
+#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
+    I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
+    dst_bgra += dst_stride_bgra;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I422 to ABGR.
+LIBYUV_API
+int I422ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  int y;
+  void (*I422ToABGRRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToABGRRow_C;
+  if (!src_y || !src_u || !src_v ||
+      !dst_abgr ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
+    dst_stride_abgr = -dst_stride_abgr;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u * 2 == width &&
+      src_stride_v * 2 == width &&
+      dst_stride_abgr == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
+  }
+#if defined(HAS_I422TOABGRROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422ToABGRRow = I422ToABGRRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToABGRRow = I422ToABGRRow_NEON;
+    }
+  }
+#elif defined(HAS_I422TOABGRROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
+        I422ToABGRRow = I422ToABGRRow_SSSE3;
+      }
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+    dst_abgr += dst_stride_abgr;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I422 to RGBA.
+LIBYUV_API
+int I422ToRGBA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_rgba, int dst_stride_rgba,
+               int width, int height) {
+  int y;
+  void (*I422ToRGBARow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToRGBARow_C;
+  if (!src_y || !src_u || !src_v ||
+      !dst_rgba ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
+    dst_stride_rgba = -dst_stride_rgba;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u * 2 == width &&
+      src_stride_v * 2 == width &&
+      dst_stride_rgba == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
+  }
+#if defined(HAS_I422TORGBAROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422ToRGBARow = I422ToRGBARow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToRGBARow = I422ToRGBARow_NEON;
+    }
+  }
+#elif defined(HAS_I422TORGBAROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
+        I422ToRGBARow = I422ToRGBARow_SSSE3;
+      }
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+    dst_rgba += dst_stride_rgba;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert NV12 to RGB565.
+LIBYUV_API
+int NV12ToRGB565(const uint8* src_y, int src_stride_y,
+                 const uint8* src_uv, int src_stride_uv,
+                 uint8* dst_rgb565, int dst_stride_rgb565,
+                 int width, int height) {
+  int y;
+  void (*NV12ToRGB565Row)(const uint8* y_buf,
+                          const uint8* uv_buf,
+                          uint8* rgb_buf,
+                          int width) = NV12ToRGB565Row_C;
+  if (!src_y || !src_uv || !dst_rgb565 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+    dst_stride_rgb565 = -dst_stride_rgb565;
+  }
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
+    }
+  }
+#elif defined(HAS_NV12TORGB565ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      NV12ToRGB565Row = NV12ToRGB565Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
+    dst_rgb565 += dst_stride_rgb565;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_uv += src_stride_uv;
+    }
+  }
+  return 0;
+}
+
+// Convert NV21 to RGB565.
+LIBYUV_API
+int NV21ToRGB565(const uint8* src_y, int src_stride_y,
+                 const uint8* src_vu, int src_stride_vu,
+                 uint8* dst_rgb565, int dst_stride_rgb565,
+                 int width, int height) {
+  int y;
+  void (*NV21ToRGB565Row)(const uint8* y_buf,
+                          const uint8* src_vu,
+                          uint8* rgb_buf,
+                          int width) = NV21ToRGB565Row_C;
+  if (!src_y || !src_vu || !dst_rgb565 ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
+    dst_stride_rgb565 = -dst_stride_rgb565;
+  }
+#if defined(HAS_NV21TORGB565ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
+    }
+  }
+#elif defined(HAS_NV21TORGB565ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      NV21ToRGB565Row = NV21ToRGB565Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
+    dst_rgb565 += dst_stride_rgb565;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_vu += src_stride_vu;
+    }
+  }
+  return 0;
+}
+
+LIBYUV_API
+void SetPlane(uint8* dst_y, int dst_stride_y,
+              int width, int height,
+              uint32 value) {
+  int y;
+  uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
+  void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
+  // Coalesce rows.
+  if (dst_stride_y == width) {
+    width *= height;
+    height = 1;
+    dst_stride_y = 0;
+  }
+#if defined(HAS_SETROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) &&
+      IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    SetRow = SetRow_NEON;
+  }
+#endif
+#if defined(HAS_SETROW_X86)
+  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
+    SetRow = SetRow_X86;
+  }
+#endif
+
+  // Set plane
+  for (y = 0; y < height; ++y) {
+    SetRow(dst_y, v32, width);
+    dst_y += dst_stride_y;
+  }
+}
+
+// Draw a rectangle into I420
+LIBYUV_API
+int I420Rect(uint8* dst_y, int dst_stride_y,
+             uint8* dst_u, int dst_stride_u,
+             uint8* dst_v, int dst_stride_v,
+             int x, int y,
+             int width, int height,
+             int value_y, int value_u, int value_v) {
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  uint8* start_y = dst_y + y * dst_stride_y + x;
+  uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
+  uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
+  if (!dst_y || !dst_u || !dst_v ||
+      width <= 0 || height <= 0 ||
+      x < 0 || y < 0 ||
+      value_y < 0 || value_y > 255 ||
+      value_u < 0 || value_u > 255 ||
+      value_v < 0 || value_v > 255) {
+    return -1;
+  }
+
+  SetPlane(start_y, dst_stride_y, width, height, value_y);
+  SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
+  SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
+  return 0;
+}
+
+// Draw a rectangle into ARGB
+LIBYUV_API
+int ARGBRect(uint8* dst_argb, int dst_stride_argb,
+             int dst_x, int dst_y,
+             int width, int height,
+             uint32 value) {
+  if (!dst_argb ||
+      width <= 0 || height <= 0 ||
+      dst_x < 0 || dst_y < 0) {
+    return -1;
+  }
+  dst_argb += dst_y * dst_stride_argb + dst_x * 4;
+  // Coalesce rows.
+  if (dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    dst_stride_argb = 0;
+  }
+#if defined(HAS_SETROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
+    return 0;
+  }
+#endif
+#if defined(HAS_SETROW_X86)
+  if (TestCpuFlag(kCpuHasX86)) {
+    ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
+    return 0;
+  }
+#endif
+  ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
+  return 0;
+}
+
+// Convert unattentuated ARGB to preattenuated ARGB.
+// An unattenutated ARGB alpha blend uses the formula
+// p = a * f + (1 - a) * b
+// where
+//   p is output pixel
+//   f is foreground pixel
+//   b is background pixel
+//   a is alpha value from foreground pixel
+// An preattenutated ARGB alpha blend uses the formula
+// p = f + (1 - a) * b
+// where
+//   f is foreground pixel premultiplied by alpha
+
+LIBYUV_API
+int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
+                  uint8* dst_argb, int dst_stride_argb,
+                  int width, int height) {
+  int y;
+  void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
+                           int width) = ARGBAttenuateRow_C;
+  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBATTENUATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBAttenuateRow(src_argb, dst_argb, width);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert preattentuated ARGB to unattenuated ARGB.
+LIBYUV_API
+int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height) {
+  int y;
+  void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
+                             int width) = ARGBUnattenuateRow_C;
+  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
+    }
+  }
+#endif
+// TODO(fbarchard): Neon version.
+
+  for (y = 0; y < height; ++y) {
+    ARGBUnattenuateRow(src_argb, dst_argb, width);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Convert ARGB to Grayed ARGB.
+LIBYUV_API
+int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  int y;
+  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
+                      int width) = ARGBGrayRow_C;
+  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBGRAYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBGrayRow = ARGBGrayRow_SSSE3;
+  }
+#elif defined(HAS_ARGBGRAYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    ARGBGrayRow = ARGBGrayRow_NEON;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBGrayRow(src_argb, dst_argb, width);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Make a rectangle of ARGB gray scale.
+LIBYUV_API
+int ARGBGray(uint8* dst_argb, int dst_stride_argb,
+             int dst_x, int dst_y,
+             int width, int height) {
+  int y;
+  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
+                      int width) = ARGBGrayRow_C;
+  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
+    return -1;
+  }
+  // Coalesce rows.
+  if (dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBGRAYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBGrayRow = ARGBGrayRow_SSSE3;
+  }
+#elif defined(HAS_ARGBGRAYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    ARGBGrayRow = ARGBGrayRow_NEON;
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    ARGBGrayRow(dst, dst, width);
+    dst += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Make a rectangle of ARGB Sepia tone.
+LIBYUV_API
+int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
+              int dst_x, int dst_y, int width, int height) {
+  int y;
+  void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
+  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
+    return -1;
+  }
+  // Coalesce rows.
+  if (dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBSEPIAROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBSepiaRow = ARGBSepiaRow_SSSE3;
+  }
+#elif defined(HAS_ARGBSEPIAROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    ARGBSepiaRow = ARGBSepiaRow_NEON;
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    ARGBSepiaRow(dst, width);
+    dst += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Apply a 4x4 matrix to each ARGB pixel.
+// Note: Normally for shading, but can be used to swizzle or invert.
+LIBYUV_API
+int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_argb, int dst_stride_argb,
+                    const int8* matrix_argb,
+                    int width, int height) {
+  int y;
+  void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
+      const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
+  if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
+  }
+#elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Apply a 4x3 matrix to each ARGB pixel.
+// Deprecated.
+LIBYUV_API
+int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
+                   const int8* matrix_rgb,
+                   int dst_x, int dst_y, int width, int height) {
+  SIMD_ALIGNED(int8 matrix_argb[16]);
+  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+  if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
+      dst_x < 0 || dst_y < 0) {
+    return -1;
+  }
+
+  // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
+  matrix_argb[0] = matrix_rgb[0] / 2;
+  matrix_argb[1] = matrix_rgb[1] / 2;
+  matrix_argb[2] = matrix_rgb[2] / 2;
+  matrix_argb[3] = matrix_rgb[3] / 2;
+  matrix_argb[4] = matrix_rgb[4] / 2;
+  matrix_argb[5] = matrix_rgb[5] / 2;
+  matrix_argb[6] = matrix_rgb[6] / 2;
+  matrix_argb[7] = matrix_rgb[7] / 2;
+  matrix_argb[8] = matrix_rgb[8] / 2;
+  matrix_argb[9] = matrix_rgb[9] / 2;
+  matrix_argb[10] = matrix_rgb[10] / 2;
+  matrix_argb[11] = matrix_rgb[11] / 2;
+  matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
+  matrix_argb[15] = 64;  // 1.0
+
+  return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
+                         dst, dst_stride_argb,
+                         &matrix_argb[0], width, height);
+}
+
+// Apply a color table each ARGB pixel.
+// Table contains 256 ARGB values.
+LIBYUV_API
+int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
+                   const uint8* table_argb,
+                   int dst_x, int dst_y, int width, int height) {
+  int y;
+  void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
+                            int width) = ARGBColorTableRow_C;
+  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
+      dst_x < 0 || dst_y < 0) {
+    return -1;
+  }
+  // Coalesce rows.
+  if (dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBCOLORTABLEROW_X86)
+  if (TestCpuFlag(kCpuHasX86)) {
+    ARGBColorTableRow = ARGBColorTableRow_X86;
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    ARGBColorTableRow(dst, table_argb, width);
+    dst += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Apply a color table each ARGB pixel but preserve destination alpha.
+// Table contains 256 ARGB values.
+LIBYUV_API
+int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
+                  const uint8* table_argb,
+                  int dst_x, int dst_y, int width, int height) {
+  int y;
+  void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
+                           int width) = RGBColorTableRow_C;
+  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
+      dst_x < 0 || dst_y < 0) {
+    return -1;
+  }
+  // Coalesce rows.
+  if (dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    dst_stride_argb = 0;
+  }
+#if defined(HAS_RGBCOLORTABLEROW_X86)
+  if (TestCpuFlag(kCpuHasX86)) {
+    RGBColorTableRow = RGBColorTableRow_X86;
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    RGBColorTableRow(dst, table_argb, width);
+    dst += dst_stride_argb;
+  }
+  return 0;
+}
+
+// ARGBQuantize is used to posterize art.
+// e.g. rgb / qvalue * qvalue + qvalue / 2
+// But the low levels implement efficiently with 3 parameters, and could be
+// used for other high level operations.
+// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
+// where scale is 1 / interval_size as a fixed point value.
+// The divide is replaces with a multiply by reciprocal fixed point multiply.
+// Caveat - although SSE2 saturates, the C function does not and should be used
+// with care if doing anything but quantization.
+LIBYUV_API
+int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
+                 int scale, int interval_size, int interval_offset,
+                 int dst_x, int dst_y, int width, int height) {
+  int y;
+  void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
+                          int interval_offset, int width) = ARGBQuantizeRow_C;
+  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
+  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
+      interval_size < 1 || interval_size > 255) {
+    return -1;
+  }
+  // Coalesce rows.
+  if (dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBQUANTIZEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
+  }
+#elif defined(HAS_ARGBQUANTIZEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    ARGBQuantizeRow = ARGBQuantizeRow_NEON;
+  }
+#endif
+  for (y = 0; y < height; ++y) {
+    ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
+    dst += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Computes table of cumulative sum for image where the value is the sum
+// of all values above and to the left of the entry. Used by ARGBBlur.
+LIBYUV_API
+int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
+                             int32* dst_cumsum, int dst_stride32_cumsum,
+                             int width, int height) {
+  int y;
+  void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
+      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
+  int32* previous_cumsum = dst_cumsum;
+  if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
+    return -1;
+  }
+#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
+  }
+#endif
+  memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
+  for (y = 0; y < height; ++y) {
+    ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
+    previous_cumsum = dst_cumsum;
+    dst_cumsum += dst_stride32_cumsum;
+    src_argb += src_stride_argb;
+  }
+  return 0;
+}
+
+// Blur ARGB image.
+// Caller should allocate CumulativeSum table of width * height * 16 bytes
+// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
+// as the buffer is treated as circular.
+LIBYUV_API
+int ARGBBlur(const uint8* src_argb, int src_stride_argb,
+             uint8* dst_argb, int dst_stride_argb,
+             int32* dst_cumsum, int dst_stride32_cumsum,
+             int width, int height, int radius) {
+  int y;
+  void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
+      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
+  void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
+      int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
+  int32* cumsum_bot_row;
+  int32* max_cumsum_bot_row;
+  int32* cumsum_top_row;
+
+  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  if (radius > height) {
+    radius = height;
+  }
+  if (radius > (width / 2 - 1)) {
+    radius = width / 2 - 1;
+  }
+  if (radius <= 0) {
+    return -1;
+  }
+#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
+    CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
+  }
+#endif
+  // Compute enough CumulativeSum for first row to be blurred. After this
+  // one row of CumulativeSum is updated at a time.
+  ARGBComputeCumulativeSum(src_argb, src_stride_argb,
+                           dst_cumsum, dst_stride32_cumsum,
+                           width, radius);
+
+  src_argb = src_argb + radius * src_stride_argb;
+  cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
+
+  max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
+  cumsum_top_row = &dst_cumsum[0];
+
+  for (y = 0; y < height; ++y) {
+    int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
+    int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
+    int area = radius * (bot_y - top_y);
+    int boxwidth = radius * 4;
+    int x;
+    int n;
+
+    // Increment cumsum_top_row pointer with circular buffer wrap around.
+    if (top_y) {
+      cumsum_top_row += dst_stride32_cumsum;
+      if (cumsum_top_row >= max_cumsum_bot_row) {
+        cumsum_top_row = dst_cumsum;
+      }
+    }
+    // Increment cumsum_bot_row pointer with circular buffer wrap around and
+    // then fill in a row of CumulativeSum.
+    if ((y + radius) < height) {
+      const int32* prev_cumsum_bot_row = cumsum_bot_row;
+      cumsum_bot_row += dst_stride32_cumsum;
+      if (cumsum_bot_row >= max_cumsum_bot_row) {
+        cumsum_bot_row = dst_cumsum;
+      }
+      ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
+                              width);
+      src_argb += src_stride_argb;
+    }
+
+    // Left clipped.
+    for (x = 0; x < radius + 1; ++x) {
+      CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
+                                boxwidth, area, &dst_argb[x * 4], 1);
+      area += (bot_y - top_y);
+      boxwidth += 4;
+    }
+
+    // Middle unclipped.
+    n = (width - 1) - radius - x + 1;
+    CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
+                              boxwidth, area, &dst_argb[x * 4], n);
+
+    // Right clipped.
+    for (x += n; x <= width - 1; ++x) {
+      area -= (bot_y - top_y);
+      boxwidth -= 4;
+      CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
+                                cumsum_bot_row + (x - radius - 1) * 4,
+                                boxwidth, area, &dst_argb[x * 4], 1);
+    }
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Multiply ARGB image by a specified ARGB value.
+LIBYUV_API
+int ARGBShade(const uint8* src_argb, int src_stride_argb,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height, uint32 value) {
+  int y;
+  void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
+                       int width, uint32 value) = ARGBShadeRow_C;
+  if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
+    return -1;
+  }
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBSHADEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBShadeRow = ARGBShadeRow_SSE2;
+  }
+#elif defined(HAS_ARGBSHADEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    ARGBShadeRow = ARGBShadeRow_NEON;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBShadeRow(src_argb, dst_argb, width, value);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Interpolate 2 ARGB images by specified amount (0 to 255).
+LIBYUV_API
+int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
+                    const uint8* src_argb1, int src_stride_argb1,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height, int interpolation) {
+  int y;
+  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
+                         ptrdiff_t src_stride, int dst_width,
+                         int source_y_fraction) = InterpolateRow_C;
+  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb0 == width * 4 &&
+      src_stride_argb1 == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+  }
+#if defined(HAS_INTERPOLATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+    InterpolateRow = InterpolateRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
+          IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        InterpolateRow = InterpolateRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
+    InterpolateRow = InterpolateRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 4)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
+          IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        InterpolateRow = InterpolateRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
+    InterpolateRow = InterpolateRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      InterpolateRow = InterpolateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
+    InterpolateRow = InterpolateRow_Any_NEON;
+    if (IS_ALIGNED(width, 4)) {
+      InterpolateRow = InterpolateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
+      IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
+      IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
+                   width * 4, interpolation);
+    src_argb0 += src_stride_argb0;
+    src_argb1 += src_stride_argb1;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Shuffle ARGB channel order.  e.g. BGRA to ARGB.
+LIBYUV_API
+int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
+                uint8* dst_argb, int dst_stride_argb,
+                const uint8* shuffler, int width, int height) {
+  int y;
+  void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
+                         const uint8* shuffler, int pix) = ARGBShuffleRow_C;
+  if (!src_bgra || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_bgra = src_bgra + (height - 1) * src_stride_bgra;
+    src_stride_bgra = -src_stride_bgra;
+  }
+  // Coalesce rows.
+  if (src_stride_bgra == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_bgra = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBSHUFFLEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
+    ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBShuffleRow = ARGBShuffleRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
+    ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        ARGBShuffleRow = ARGBShuffleRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_ARGBSHUFFLEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
+    ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBShuffleRow = ARGBShuffleRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBSHUFFLEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
+    ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBShuffleRow = ARGBShuffleRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
+    src_bgra += src_stride_bgra;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Sobel ARGB effect.
+static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
+                        uint8* dst_argb, int dst_stride_argb,
+                        int width, int height,
+                        void (*SobelRow)(const uint8* src_sobelx,
+                                         const uint8* src_sobely,
+                                         uint8* dst, int width)) {
+  int y;
+  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
+                         uint32 selector, int pix) = ARGBToBayerGGRow_C;
+  void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
+                    uint8* dst_sobely, int width) = SobelYRow_C;
+  void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
+                    const uint8* src_y2, uint8* dst_sobely, int width) =
+      SobelXRow_C;
+  const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
+  if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb  = src_argb  + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // ARGBToBayer used to select G channel from ARGB.
+#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+    ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOBAYERROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBTOBAYERGGROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBToBayerRow = ARGBToBayerGGRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_SOBELYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    SobelYRow = SobelYRow_SSE2;
+  }
+#endif
+#if defined(HAS_SOBELYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    SobelYRow = SobelYRow_NEON;
+  }
+#endif
+#if defined(HAS_SOBELXROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    SobelXRow = SobelXRow_SSE2;
+  }
+#endif
+#if defined(HAS_SOBELXROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    SobelXRow = SobelXRow_NEON;
+  }
+#endif
+  {
+    // 3 rows with edges before/after.
+    const int kRowSize = (width + kEdge + 15) & ~15;
+    align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
+    uint8* row_sobelx = rows;
+    uint8* row_sobely = rows + kRowSize;
+    uint8* row_y = rows + kRowSize * 2;
+
+    // Convert first row.
+    uint8* row_y0 = row_y + kEdge;
+    uint8* row_y1 = row_y0 + kRowSize;
+    uint8* row_y2 = row_y1 + kRowSize;
+    ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
+    row_y0[-1] = row_y0[0];
+    memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
+    ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
+    row_y1[-1] = row_y1[0];
+    memset(row_y1 + width, row_y1[width - 1], 16);
+    memset(row_y2 + width, 0, 16);
+
+    for (y = 0; y < height; ++y) {
+      // Convert next row of ARGB to Y.
+      if (y < (height - 1)) {
+        src_argb += src_stride_argb;
+      }
+      ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
+      row_y2[-1] = row_y2[0];
+      row_y2[width] = row_y2[width - 1];
+
+      SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
+      SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
+      SobelRow(row_sobelx, row_sobely, dst_argb, width);
+
+      // Cycle thru circular queue of 3 row_y buffers.
+      {
+        uint8* row_yt = row_y0;
+        row_y0 = row_y1;
+        row_y1 = row_y2;
+        row_y2 = row_yt;
+      }
+
+      dst_argb += dst_stride_argb;
+    }
+    free_aligned_buffer_64(rows);
+  }
+  return 0;
+}
+
+// Sobel ARGB effect.
+LIBYUV_API
+int ARGBSobel(const uint8* src_argb, int src_stride_argb,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height) {
+  void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
+                   uint8* dst_argb, int width) = SobelRow_C;
+#if defined(HAS_SOBELROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    SobelRow = SobelRow_SSE2;
+  }
+#endif
+#if defined(HAS_SOBELROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    SobelRow = SobelRow_NEON;
+  }
+#endif
+  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
+                      width, height, SobelRow);
+}
+
+// Sobel ARGB effect with planar output.
+LIBYUV_API
+int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
+                     uint8* dst_y, int dst_stride_y,
+                     int width, int height) {
+  void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_, int width) = SobelToPlaneRow_C;
+#if defined(HAS_SOBELTOPLANEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    SobelToPlaneRow = SobelToPlaneRow_SSE2;
+  }
+#endif
+#if defined(HAS_SOBELTOPLANEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
+    SobelToPlaneRow = SobelToPlaneRow_NEON;
+  }
+#endif
+  return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
+                      width, height, SobelToPlaneRow);
+}
+
+// SobelXY ARGB effect.
+// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
+LIBYUV_API
+int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height) {
+  void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width) = SobelXYRow_C;
+#if defined(HAS_SOBELXYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    SobelXYRow = SobelXYRow_SSE2;
+  }
+#endif
+#if defined(HAS_SOBELXYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    SobelXYRow = SobelXYRow_NEON;
+  }
+#endif
+  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
+                      width, height, SobelXYRow);
+}
+
+// Apply a 4x4 polynomial to each ARGB pixel.
+LIBYUV_API
+int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
+                   uint8* dst_argb, int dst_stride_argb,
+                   const float* poly,
+                   int width, int height) {
+  int y;
+  void (*ARGBPolynomialRow)(const uint8* src_argb,
+                            uint8* dst_argb, const float* poly,
+                            int width) = ARGBPolynomialRow_C;
+  if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb  = src_argb  + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
+    ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
+  }
+#endif
+#if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
+      IS_ALIGNED(width, 2)) {
+    ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBPolynomialRow(src_argb, dst_argb, poly, width);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Apply a lumacolortable to each ARGB pixel.
+LIBYUV_API
+int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
+                       uint8* dst_argb, int dst_stride_argb,
+                       const uint8* luma,
+                       int width, int height) {
+  int y;
+  void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
+      int width, const uint8* luma, const uint32 lumacoeff) =
+      ARGBLumaColorTableRow_C;
+  if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb  = src_argb  + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
+    ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Copy Alpha from one ARGB image to another.
+LIBYUV_API
+int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
+                  uint8* dst_argb, int dst_stride_argb,
+                  int width, int height) {
+  int y;
+  void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
+      ARGBCopyAlphaRow_C;
+  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_argb == width * 4 &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_argb = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
+      IS_ALIGNED(width, 8)) {
+    ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
+  }
+#endif
+#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
+    ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBCopyAlphaRow(src_argb, dst_argb, width);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+// Copy a planar Y channel to the alpha channel of a destination ARGB image.
+LIBYUV_API
+int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
+                     uint8* dst_argb, int dst_stride_argb,
+                     int width, int height) {
+  int y;
+  void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
+      ARGBCopyYToAlphaRow_C;
+  if (!src_y || !dst_argb || width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_stride_y = -src_stride_y;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = dst_stride_argb = 0;
+  }
+#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
+      IS_ALIGNED(width, 8)) {
+    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
+  }
+#endif
+#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
+    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    ARGBCopyYToAlphaRow(src_y, dst_argb, width);
+    src_y += src_stride_y;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/rotate.cc b/src/main/jni/libyuv/source/rotate.cc
new file mode 100644
index 000000000..fe0e72b13
--- /dev/null
+++ b/src/main/jni/libyuv/source/rotate.cc
@@ -0,0 +1,1315 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate.h"
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/convert.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
+#if defined(__APPLE__) && defined(__i386__)
+#define DECLARE_FUNCTION(name)                                                 \
+    ".text                                     \n"                             \
+    ".private_extern _" #name "                \n"                             \
+    ".align 4,0x90                             \n"                             \
+"_" #name ":                                   \n"
+#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
+#define DECLARE_FUNCTION(name)                                                 \
+    ".text                                     \n"                             \
+    ".align 4,0x90                             \n"                             \
+"_" #name ":                                   \n"
+#else
+#define DECLARE_FUNCTION(name)                                                 \
+    ".text                                     \n"                             \
+    ".align 4,0x90                             \n"                             \
+#name ":                                       \n"
+#endif
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+#define HAS_MIRRORROW_NEON
+void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
+#define HAS_MIRRORROW_UV_NEON
+void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
+#define HAS_TRANSPOSE_WX8_NEON
+void TransposeWx8_NEON(const uint8* src, int src_stride,
+                       uint8* dst, int dst_stride, int width);
+#define HAS_TRANSPOSE_UVWX8_NEON
+void TransposeUVWx8_NEON(const uint8* src, int src_stride,
+                         uint8* dst_a, int dst_stride_a,
+                         uint8* dst_b, int dst_stride_b,
+                         int width);
+#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+    (defined(__aarch64__) || defined(LIBYUV_NEON))
+// #define HAS_MIRRORROW_NEON
+// void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
+// #define HAS_MIRRORROW_UV_NEON
+// void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
+// #define HAS_TRANSPOSE_WX8_NEON
+// void TransposeWx8_NEON(const uint8* src, int src_stride,
+//                        uint8* dst, int dst_stride, int width);
+// #define HAS_TRANSPOSE_UVWX8_NEON
+// void TransposeUVWx8_NEON(const uint8* src, int src_stride,
+//                          uint8* dst_a, int dst_stride_a,
+//                          uint8* dst_b, int dst_stride_b,
+//                          int width);
+#endif  // defined(__ARM_NEON__)
+
+#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
+    defined(__mips__) && \
+    defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+#define HAS_TRANSPOSE_WX8_MIPS_DSPR2
+void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+                             uint8* dst, int dst_stride, int width);
+
+void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
+                                  uint8* dst, int dst_stride, int width);
+#define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2
+void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+                               uint8* dst_a, int dst_stride_a,
+                               uint8* dst_b, int dst_stride_b,
+                               int width);
+#endif  // defined(__mips__)
+
+#if !defined(LIBYUV_DISABLE_X86) && \
+    defined(_M_IX86) && defined(_MSC_VER)
+#define HAS_TRANSPOSE_WX8_SSSE3
+__declspec(naked) __declspec(align(16))
+static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
+                               uint8* dst, int dst_stride, int width) {
+  __asm {
+    push      edi
+    push      esi
+    push      ebp
+    mov       eax, [esp + 12 + 4]   // src
+    mov       edi, [esp + 12 + 8]   // src_stride
+    mov       edx, [esp + 12 + 12]  // dst
+    mov       esi, [esp + 12 + 16]  // dst_stride
+    mov       ecx, [esp + 12 + 20]  // width
+
+    // Read in the data from the source pointer.
+    // First round of bit swap.
+    align      4
+ convertloop:
+    movq      xmm0, qword ptr [eax]
+    lea       ebp, [eax + 8]
+    movq      xmm1, qword ptr [eax + edi]
+    lea       eax, [eax + 2 * edi]
+    punpcklbw xmm0, xmm1
+    movq      xmm2, qword ptr [eax]
+    movdqa    xmm1, xmm0
+    palignr   xmm1, xmm1, 8
+    movq      xmm3, qword ptr [eax + edi]
+    lea       eax, [eax + 2 * edi]
+    punpcklbw xmm2, xmm3
+    movdqa    xmm3, xmm2
+    movq      xmm4, qword ptr [eax]
+    palignr   xmm3, xmm3, 8
+    movq      xmm5, qword ptr [eax + edi]
+    punpcklbw xmm4, xmm5
+    lea       eax, [eax + 2 * edi]
+    movdqa    xmm5, xmm4
+    movq      xmm6, qword ptr [eax]
+    palignr   xmm5, xmm5, 8
+    movq      xmm7, qword ptr [eax + edi]
+    punpcklbw xmm6, xmm7
+    mov       eax, ebp
+    movdqa    xmm7, xmm6
+    palignr   xmm7, xmm7, 8
+    // Second round of bit swap.
+    punpcklwd xmm0, xmm2
+    punpcklwd xmm1, xmm3
+    movdqa    xmm2, xmm0
+    movdqa    xmm3, xmm1
+    palignr   xmm2, xmm2, 8
+    palignr   xmm3, xmm3, 8
+    punpcklwd xmm4, xmm6
+    punpcklwd xmm5, xmm7
+    movdqa    xmm6, xmm4
+    movdqa    xmm7, xmm5
+    palignr   xmm6, xmm6, 8
+    palignr   xmm7, xmm7, 8
+    // Third round of bit swap.
+    // Write to the destination pointer.
+    punpckldq xmm0, xmm4
+    movq      qword ptr [edx], xmm0
+    movdqa    xmm4, xmm0
+    palignr   xmm4, xmm4, 8
+    movq      qword ptr [edx + esi], xmm4
+    lea       edx, [edx + 2 * esi]
+    punpckldq xmm2, xmm6
+    movdqa    xmm6, xmm2
+    palignr   xmm6, xmm6, 8
+    movq      qword ptr [edx], xmm2
+    punpckldq xmm1, xmm5
+    movq      qword ptr [edx + esi], xmm6
+    lea       edx, [edx + 2 * esi]
+    movdqa    xmm5, xmm1
+    movq      qword ptr [edx], xmm1
+    palignr   xmm5, xmm5, 8
+    punpckldq xmm3, xmm7
+    movq      qword ptr [edx + esi], xmm5
+    lea       edx, [edx + 2 * esi]
+    movq      qword ptr [edx], xmm3
+    movdqa    xmm7, xmm3
+    palignr   xmm7, xmm7, 8
+    sub       ecx, 8
+    movq      qword ptr [edx + esi], xmm7
+    lea       edx, [edx + 2 * esi]
+    jg        convertloop
+
+    pop       ebp
+    pop       esi
+    pop       edi
+    ret
+  }
+}
+
+#define HAS_TRANSPOSE_UVWX8_SSE2
+__declspec(naked) __declspec(align(16))
+static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
+                                uint8* dst_a, int dst_stride_a,
+                                uint8* dst_b, int dst_stride_b,
+                                int w) {
+  __asm {
+    push      ebx
+    push      esi
+    push      edi
+    push      ebp
+    mov       eax, [esp + 16 + 4]   // src
+    mov       edi, [esp + 16 + 8]   // src_stride
+    mov       edx, [esp + 16 + 12]  // dst_a
+    mov       esi, [esp + 16 + 16]  // dst_stride_a
+    mov       ebx, [esp + 16 + 20]  // dst_b
+    mov       ebp, [esp + 16 + 24]  // dst_stride_b
+    mov       ecx, esp
+    sub       esp, 4 + 16
+    and       esp, ~15
+    mov       [esp + 16], ecx
+    mov       ecx, [ecx + 16 + 28]  // w
+
+    align      4
+ convertloop:
+    // Read in the data from the source pointer.
+    // First round of bit swap.
+    movdqa    xmm0, [eax]
+    movdqa    xmm1, [eax + edi]
+    lea       eax, [eax + 2 * edi]
+    movdqa    xmm7, xmm0  // use xmm7 as temp register.
+    punpcklbw xmm0, xmm1
+    punpckhbw xmm7, xmm1
+    movdqa    xmm1, xmm7
+    movdqa    xmm2, [eax]
+    movdqa    xmm3, [eax + edi]
+    lea       eax, [eax + 2 * edi]
+    movdqa    xmm7, xmm2
+    punpcklbw xmm2, xmm3
+    punpckhbw xmm7, xmm3
+    movdqa    xmm3, xmm7
+    movdqa    xmm4, [eax]
+    movdqa    xmm5, [eax + edi]
+    lea       eax, [eax + 2 * edi]
+    movdqa    xmm7, xmm4
+    punpcklbw xmm4, xmm5
+    punpckhbw xmm7, xmm5
+    movdqa    xmm5, xmm7
+    movdqa    xmm6, [eax]
+    movdqa    xmm7, [eax + edi]
+    lea       eax, [eax + 2 * edi]
+    movdqa    [esp], xmm5  // backup xmm5
+    neg       edi
+    movdqa    xmm5, xmm6   // use xmm5 as temp register.
+    punpcklbw xmm6, xmm7
+    punpckhbw xmm5, xmm7
+    movdqa    xmm7, xmm5
+    lea       eax, [eax + 8 * edi + 16]
+    neg       edi
+    // Second round of bit swap.
+    movdqa    xmm5, xmm0
+    punpcklwd xmm0, xmm2
+    punpckhwd xmm5, xmm2
+    movdqa    xmm2, xmm5
+    movdqa    xmm5, xmm1
+    punpcklwd xmm1, xmm3
+    punpckhwd xmm5, xmm3
+    movdqa    xmm3, xmm5
+    movdqa    xmm5, xmm4
+    punpcklwd xmm4, xmm6
+    punpckhwd xmm5, xmm6
+    movdqa    xmm6, xmm5
+    movdqa    xmm5, [esp]  // restore xmm5
+    movdqa    [esp], xmm6  // backup xmm6
+    movdqa    xmm6, xmm5    // use xmm6 as temp register.
+    punpcklwd xmm5, xmm7
+    punpckhwd xmm6, xmm7
+    movdqa    xmm7, xmm6
+    // Third round of bit swap.
+    // Write to the destination pointer.
+    movdqa    xmm6, xmm0
+    punpckldq xmm0, xmm4
+    punpckhdq xmm6, xmm4
+    movdqa    xmm4, xmm6
+    movdqa    xmm6, [esp]  // restore xmm6
+    movlpd    qword ptr [edx], xmm0
+    movhpd    qword ptr [ebx], xmm0
+    movlpd    qword ptr [edx + esi], xmm4
+    lea       edx, [edx + 2 * esi]
+    movhpd    qword ptr [ebx + ebp], xmm4
+    lea       ebx, [ebx + 2 * ebp]
+    movdqa    xmm0, xmm2   // use xmm0 as the temp register.
+    punpckldq xmm2, xmm6
+    movlpd    qword ptr [edx], xmm2
+    movhpd    qword ptr [ebx], xmm2
+    punpckhdq xmm0, xmm6
+    movlpd    qword ptr [edx + esi], xmm0
+    lea       edx, [edx + 2 * esi]
+    movhpd    qword ptr [ebx + ebp], xmm0
+    lea       ebx, [ebx + 2 * ebp]
+    movdqa    xmm0, xmm1   // use xmm0 as the temp register.
+    punpckldq xmm1, xmm5
+    movlpd    qword ptr [edx], xmm1
+    movhpd    qword ptr [ebx], xmm1
+    punpckhdq xmm0, xmm5
+    movlpd    qword ptr [edx + esi], xmm0
+    lea       edx, [edx + 2 * esi]
+    movhpd    qword ptr [ebx + ebp], xmm0
+    lea       ebx, [ebx + 2 * ebp]
+    movdqa    xmm0, xmm3   // use xmm0 as the temp register.
+    punpckldq xmm3, xmm7
+    movlpd    qword ptr [edx], xmm3
+    movhpd    qword ptr [ebx], xmm3
+    punpckhdq xmm0, xmm7
+    sub       ecx, 8
+    movlpd    qword ptr [edx + esi], xmm0
+    lea       edx, [edx + 2 * esi]
+    movhpd    qword ptr [ebx + ebp], xmm0
+    lea       ebx, [ebx + 2 * ebp]
+    jg        convertloop
+
+    mov       esp, [esp + 16]
+    pop       ebp
+    pop       edi
+    pop       esi
+    pop       ebx
+    ret
+  }
+}
+#elif !defined(LIBYUV_DISABLE_X86) && \
+    (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
+#define HAS_TRANSPOSE_WX8_SSSE3
+static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
+                               uint8* dst, int dst_stride, int width) {
+  asm volatile (
+    // Read in the data from the source pointer.
+    // First round of bit swap.
+    ".p2align  2                                 \n"
+  "1:                                            \n"
+    "movq       (%0),%%xmm0                      \n"
+    "movq       (%0,%3),%%xmm1                   \n"
+    "lea        (%0,%3,2),%0                     \n"
+    "punpcklbw  %%xmm1,%%xmm0                    \n"
+    "movq       (%0),%%xmm2                      \n"
+    "movdqa     %%xmm0,%%xmm1                    \n"
+    "palignr    $0x8,%%xmm1,%%xmm1               \n"
+    "movq       (%0,%3),%%xmm3                   \n"
+    "lea        (%0,%3,2),%0                     \n"
+    "punpcklbw  %%xmm3,%%xmm2                    \n"
+    "movdqa     %%xmm2,%%xmm3                    \n"
+    "movq       (%0),%%xmm4                      \n"
+    "palignr    $0x8,%%xmm3,%%xmm3               \n"
+    "movq       (%0,%3),%%xmm5                   \n"
+    "lea        (%0,%3,2),%0                     \n"
+    "punpcklbw  %%xmm5,%%xmm4                    \n"
+    "movdqa     %%xmm4,%%xmm5                    \n"
+    "movq       (%0),%%xmm6                      \n"
+    "palignr    $0x8,%%xmm5,%%xmm5               \n"
+    "movq       (%0,%3),%%xmm7                   \n"
+    "lea        (%0,%3,2),%0                     \n"
+    "punpcklbw  %%xmm7,%%xmm6                    \n"
+    "neg        %3                               \n"
+    "movdqa     %%xmm6,%%xmm7                    \n"
+    "lea        0x8(%0,%3,8),%0                  \n"
+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
+    "neg        %3                               \n"
+     // Second round of bit swap.
+    "punpcklwd  %%xmm2,%%xmm0                    \n"
+    "punpcklwd  %%xmm3,%%xmm1                    \n"
+    "movdqa     %%xmm0,%%xmm2                    \n"
+    "movdqa     %%xmm1,%%xmm3                    \n"
+    "palignr    $0x8,%%xmm2,%%xmm2               \n"
+    "palignr    $0x8,%%xmm3,%%xmm3               \n"
+    "punpcklwd  %%xmm6,%%xmm4                    \n"
+    "punpcklwd  %%xmm7,%%xmm5                    \n"
+    "movdqa     %%xmm4,%%xmm6                    \n"
+    "movdqa     %%xmm5,%%xmm7                    \n"
+    "palignr    $0x8,%%xmm6,%%xmm6               \n"
+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
+    // Third round of bit swap.
+    // Write to the destination pointer.
+    "punpckldq  %%xmm4,%%xmm0                    \n"
+    "movq       %%xmm0,(%1)                      \n"
+    "movdqa     %%xmm0,%%xmm4                    \n"
+    "palignr    $0x8,%%xmm4,%%xmm4               \n"
+    "movq       %%xmm4,(%1,%4)                   \n"
+    "lea        (%1,%4,2),%1                     \n"
+    "punpckldq  %%xmm6,%%xmm2                    \n"
+    "movdqa     %%xmm2,%%xmm6                    \n"
+    "movq       %%xmm2,(%1)                      \n"
+    "palignr    $0x8,%%xmm6,%%xmm6               \n"
+    "punpckldq  %%xmm5,%%xmm1                    \n"
+    "movq       %%xmm6,(%1,%4)                   \n"
+    "lea        (%1,%4,2),%1                     \n"
+    "movdqa     %%xmm1,%%xmm5                    \n"
+    "movq       %%xmm1,(%1)                      \n"
+    "palignr    $0x8,%%xmm5,%%xmm5               \n"
+    "movq       %%xmm5,(%1,%4)                   \n"
+    "lea        (%1,%4,2),%1                     \n"
+    "punpckldq  %%xmm7,%%xmm3                    \n"
+    "movq       %%xmm3,(%1)                      \n"
+    "movdqa     %%xmm3,%%xmm7                    \n"
+    "palignr    $0x8,%%xmm7,%%xmm7               \n"
+    "sub        $0x8,%2                          \n"
+    "movq       %%xmm7,(%1,%4)                   \n"
+    "lea        (%1,%4,2),%1                     \n"
+    "jg         1b                               \n"
+    : "+r"(src),    // %0
+      "+r"(dst),    // %1
+      "+r"(width)   // %2
+    : "r"((intptr_t)(src_stride)),  // %3
+      "r"((intptr_t)(dst_stride))   // %4
+    : "memory", "cc"
+  #if defined(__SSE2__)
+      , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+  #endif
+  );
+}
+
+#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)
+#define HAS_TRANSPOSE_UVWX8_SSE2
+void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
+                         uint8* dst_a, int dst_stride_a,
+                         uint8* dst_b, int dst_stride_b,
+                         int w);
+  asm (
+    DECLARE_FUNCTION(TransposeUVWx8_SSE2)
+    "push   %ebx                               \n"
+    "push   %esi                               \n"
+    "push   %edi                               \n"
+    "push   %ebp                               \n"
+    "mov    0x14(%esp),%eax                    \n"
+    "mov    0x18(%esp),%edi                    \n"
+    "mov    0x1c(%esp),%edx                    \n"
+    "mov    0x20(%esp),%esi                    \n"
+    "mov    0x24(%esp),%ebx                    \n"
+    "mov    0x28(%esp),%ebp                    \n"
+    "mov    %esp,%ecx                          \n"
+    "sub    $0x14,%esp                         \n"
+    "and    $0xfffffff0,%esp                   \n"
+    "mov    %ecx,0x10(%esp)                    \n"
+    "mov    0x2c(%ecx),%ecx                    \n"
+
+"1:                                            \n"
+    "movdqa (%eax),%xmm0                       \n"
+    "movdqa (%eax,%edi,1),%xmm1                \n"
+    "lea    (%eax,%edi,2),%eax                 \n"
+    "movdqa %xmm0,%xmm7                        \n"
+    "punpcklbw %xmm1,%xmm0                     \n"
+    "punpckhbw %xmm1,%xmm7                     \n"
+    "movdqa %xmm7,%xmm1                        \n"
+    "movdqa (%eax),%xmm2                       \n"
+    "movdqa (%eax,%edi,1),%xmm3                \n"
+    "lea    (%eax,%edi,2),%eax                 \n"
+    "movdqa %xmm2,%xmm7                        \n"
+    "punpcklbw %xmm3,%xmm2                     \n"
+    "punpckhbw %xmm3,%xmm7                     \n"
+    "movdqa %xmm7,%xmm3                        \n"
+    "movdqa (%eax),%xmm4                       \n"
+    "movdqa (%eax,%edi,1),%xmm5                \n"
+    "lea    (%eax,%edi,2),%eax                 \n"
+    "movdqa %xmm4,%xmm7                        \n"
+    "punpcklbw %xmm5,%xmm4                     \n"
+    "punpckhbw %xmm5,%xmm7                     \n"
+    "movdqa %xmm7,%xmm5                        \n"
+    "movdqa (%eax),%xmm6                       \n"
+    "movdqa (%eax,%edi,1),%xmm7                \n"
+    "lea    (%eax,%edi,2),%eax                 \n"
+    "movdqa %xmm5,(%esp)                       \n"
+    "neg    %edi                               \n"
+    "movdqa %xmm6,%xmm5                        \n"
+    "punpcklbw %xmm7,%xmm6                     \n"
+    "punpckhbw %xmm7,%xmm5                     \n"
+    "movdqa %xmm5,%xmm7                        \n"
+    "lea    0x10(%eax,%edi,8),%eax             \n"
+    "neg    %edi                               \n"
+    "movdqa %xmm0,%xmm5                        \n"
+    "punpcklwd %xmm2,%xmm0                     \n"
+    "punpckhwd %xmm2,%xmm5                     \n"
+    "movdqa %xmm5,%xmm2                        \n"
+    "movdqa %xmm1,%xmm5                        \n"
+    "punpcklwd %xmm3,%xmm1                     \n"
+    "punpckhwd %xmm3,%xmm5                     \n"
+    "movdqa %xmm5,%xmm3                        \n"
+    "movdqa %xmm4,%xmm5                        \n"
+    "punpcklwd %xmm6,%xmm4                     \n"
+    "punpckhwd %xmm6,%xmm5                     \n"
+    "movdqa %xmm5,%xmm6                        \n"
+    "movdqa (%esp),%xmm5                       \n"
+    "movdqa %xmm6,(%esp)                       \n"
+    "movdqa %xmm5,%xmm6                        \n"
+    "punpcklwd %xmm7,%xmm5                     \n"
+    "punpckhwd %xmm7,%xmm6                     \n"
+    "movdqa %xmm6,%xmm7                        \n"
+    "movdqa %xmm0,%xmm6                        \n"
+    "punpckldq %xmm4,%xmm0                     \n"
+    "punpckhdq %xmm4,%xmm6                     \n"
+    "movdqa %xmm6,%xmm4                        \n"
+    "movdqa (%esp),%xmm6                       \n"
+    "movlpd %xmm0,(%edx)                       \n"
+    "movhpd %xmm0,(%ebx)                       \n"
+    "movlpd %xmm4,(%edx,%esi,1)                \n"
+    "lea    (%edx,%esi,2),%edx                 \n"
+    "movhpd %xmm4,(%ebx,%ebp,1)                \n"
+    "lea    (%ebx,%ebp,2),%ebx                 \n"
+    "movdqa %xmm2,%xmm0                        \n"
+    "punpckldq %xmm6,%xmm2                     \n"
+    "movlpd %xmm2,(%edx)                       \n"
+    "movhpd %xmm2,(%ebx)                       \n"
+    "punpckhdq %xmm6,%xmm0                     \n"
+    "movlpd %xmm0,(%edx,%esi,1)                \n"
+    "lea    (%edx,%esi,2),%edx                 \n"
+    "movhpd %xmm0,(%ebx,%ebp,1)                \n"
+    "lea    (%ebx,%ebp,2),%ebx                 \n"
+    "movdqa %xmm1,%xmm0                        \n"
+    "punpckldq %xmm5,%xmm1                     \n"
+    "movlpd %xmm1,(%edx)                       \n"
+    "movhpd %xmm1,(%ebx)                       \n"
+    "punpckhdq %xmm5,%xmm0                     \n"
+    "movlpd %xmm0,(%edx,%esi,1)                \n"
+    "lea    (%edx,%esi,2),%edx                 \n"
+    "movhpd %xmm0,(%ebx,%ebp,1)                \n"
+    "lea    (%ebx,%ebp,2),%ebx                 \n"
+    "movdqa %xmm3,%xmm0                        \n"
+    "punpckldq %xmm7,%xmm3                     \n"
+    "movlpd %xmm3,(%edx)                       \n"
+    "movhpd %xmm3,(%ebx)                       \n"
+    "punpckhdq %xmm7,%xmm0                     \n"
+    "sub    $0x8,%ecx                          \n"
+    "movlpd %xmm0,(%edx,%esi,1)                \n"
+    "lea    (%edx,%esi,2),%edx                 \n"
+    "movhpd %xmm0,(%ebx,%ebp,1)                \n"
+    "lea    (%ebx,%ebp,2),%ebx                 \n"
+    "jg     1b                                 \n"
+    "mov    0x10(%esp),%esp                    \n"
+    "pop    %ebp                               \n"
+    "pop    %edi                               \n"
+    "pop    %esi                               \n"
+    "pop    %ebx                               \n"
+#if defined(__native_client__)
+    "pop    %ecx                               \n"
+    "and    $0xffffffe0,%ecx                   \n"
+    "jmp    *%ecx                              \n"
+#else
+    "ret                                       \n"
+#endif
+);
+#elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
+    defined(__x86_64__)
+// 64 bit version has enough registers to do 16x8 to 8x16 at a time.
+#define HAS_TRANSPOSE_WX8_FAST_SSSE3
+static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
+                                    uint8* dst, int dst_stride, int width) {
+  asm volatile (
+  // Read in the data from the source pointer.
+  // First round of bit swap.
+  ".p2align  2                                 \n"
+"1:                                            \n"
+  "movdqa     (%0),%%xmm0                      \n"
+  "movdqa     (%0,%3),%%xmm1                   \n"
+  "lea        (%0,%3,2),%0                     \n"
+  "movdqa     %%xmm0,%%xmm8                    \n"
+  "punpcklbw  %%xmm1,%%xmm0                    \n"
+  "punpckhbw  %%xmm1,%%xmm8                    \n"
+  "movdqa     (%0),%%xmm2                      \n"
+  "movdqa     %%xmm0,%%xmm1                    \n"
+  "movdqa     %%xmm8,%%xmm9                    \n"
+  "palignr    $0x8,%%xmm1,%%xmm1               \n"
+  "palignr    $0x8,%%xmm9,%%xmm9               \n"
+  "movdqa     (%0,%3),%%xmm3                   \n"
+  "lea        (%0,%3,2),%0                     \n"
+  "movdqa     %%xmm2,%%xmm10                   \n"
+  "punpcklbw  %%xmm3,%%xmm2                    \n"
+  "punpckhbw  %%xmm3,%%xmm10                   \n"
+  "movdqa     %%xmm2,%%xmm3                    \n"
+  "movdqa     %%xmm10,%%xmm11                  \n"
+  "movdqa     (%0),%%xmm4                      \n"
+  "palignr    $0x8,%%xmm3,%%xmm3               \n"
+  "palignr    $0x8,%%xmm11,%%xmm11             \n"
+  "movdqa     (%0,%3),%%xmm5                   \n"
+  "lea        (%0,%3,2),%0                     \n"
+  "movdqa     %%xmm4,%%xmm12                   \n"
+  "punpcklbw  %%xmm5,%%xmm4                    \n"
+  "punpckhbw  %%xmm5,%%xmm12                   \n"
+  "movdqa     %%xmm4,%%xmm5                    \n"
+  "movdqa     %%xmm12,%%xmm13                  \n"
+  "movdqa     (%0),%%xmm6                      \n"
+  "palignr    $0x8,%%xmm5,%%xmm5               \n"
+  "palignr    $0x8,%%xmm13,%%xmm13             \n"
+  "movdqa     (%0,%3),%%xmm7                   \n"
+  "lea        (%0,%3,2),%0                     \n"
+  "movdqa     %%xmm6,%%xmm14                   \n"
+  "punpcklbw  %%xmm7,%%xmm6                    \n"
+  "punpckhbw  %%xmm7,%%xmm14                   \n"
+  "neg        %3                               \n"
+  "movdqa     %%xmm6,%%xmm7                    \n"
+  "movdqa     %%xmm14,%%xmm15                  \n"
+  "lea        0x10(%0,%3,8),%0                 \n"
+  "palignr    $0x8,%%xmm7,%%xmm7               \n"
+  "palignr    $0x8,%%xmm15,%%xmm15             \n"
+  "neg        %3                               \n"
+   // Second round of bit swap.
+  "punpcklwd  %%xmm2,%%xmm0                    \n"
+  "punpcklwd  %%xmm3,%%xmm1                    \n"
+  "movdqa     %%xmm0,%%xmm2                    \n"
+  "movdqa     %%xmm1,%%xmm3                    \n"
+  "palignr    $0x8,%%xmm2,%%xmm2               \n"
+  "palignr    $0x8,%%xmm3,%%xmm3               \n"
+  "punpcklwd  %%xmm6,%%xmm4                    \n"
+  "punpcklwd  %%xmm7,%%xmm5                    \n"
+  "movdqa     %%xmm4,%%xmm6                    \n"
+  "movdqa     %%xmm5,%%xmm7                    \n"
+  "palignr    $0x8,%%xmm6,%%xmm6               \n"
+  "palignr    $0x8,%%xmm7,%%xmm7               \n"
+  "punpcklwd  %%xmm10,%%xmm8                   \n"
+  "punpcklwd  %%xmm11,%%xmm9                   \n"
+  "movdqa     %%xmm8,%%xmm10                   \n"
+  "movdqa     %%xmm9,%%xmm11                   \n"
+  "palignr    $0x8,%%xmm10,%%xmm10             \n"
+  "palignr    $0x8,%%xmm11,%%xmm11             \n"
+  "punpcklwd  %%xmm14,%%xmm12                  \n"
+  "punpcklwd  %%xmm15,%%xmm13                  \n"
+  "movdqa     %%xmm12,%%xmm14                  \n"
+  "movdqa     %%xmm13,%%xmm15                  \n"
+  "palignr    $0x8,%%xmm14,%%xmm14             \n"
+  "palignr    $0x8,%%xmm15,%%xmm15             \n"
+  // Third round of bit swap.
+  // Write to the destination pointer.
+  "punpckldq  %%xmm4,%%xmm0                    \n"
+  "movq       %%xmm0,(%1)                      \n"
+  "movdqa     %%xmm0,%%xmm4                    \n"
+  "palignr    $0x8,%%xmm4,%%xmm4               \n"
+  "movq       %%xmm4,(%1,%4)                   \n"
+  "lea        (%1,%4,2),%1                     \n"
+  "punpckldq  %%xmm6,%%xmm2                    \n"
+  "movdqa     %%xmm2,%%xmm6                    \n"
+  "movq       %%xmm2,(%1)                      \n"
+  "palignr    $0x8,%%xmm6,%%xmm6               \n"
+  "punpckldq  %%xmm5,%%xmm1                    \n"
+  "movq       %%xmm6,(%1,%4)                   \n"
+  "lea        (%1,%4,2),%1                     \n"
+  "movdqa     %%xmm1,%%xmm5                    \n"
+  "movq       %%xmm1,(%1)                      \n"
+  "palignr    $0x8,%%xmm5,%%xmm5               \n"
+  "movq       %%xmm5,(%1,%4)                   \n"
+  "lea        (%1,%4,2),%1                     \n"
+  "punpckldq  %%xmm7,%%xmm3                    \n"
+  "movq       %%xmm3,(%1)                      \n"
+  "movdqa     %%xmm3,%%xmm7                    \n"
+  "palignr    $0x8,%%xmm7,%%xmm7               \n"
+  "movq       %%xmm7,(%1,%4)                   \n"
+  "lea        (%1,%4,2),%1                     \n"
+  "punpckldq  %%xmm12,%%xmm8                   \n"
+  "movq       %%xmm8,(%1)                      \n"
+  "movdqa     %%xmm8,%%xmm12                   \n"
+  "palignr    $0x8,%%xmm12,%%xmm12             \n"
+  "movq       %%xmm12,(%1,%4)                  \n"
+  "lea        (%1,%4,2),%1                     \n"
+  "punpckldq  %%xmm14,%%xmm10                  \n"
+  "movdqa     %%xmm10,%%xmm14                  \n"
+  "movq       %%xmm10,(%1)                     \n"
+  "palignr    $0x8,%%xmm14,%%xmm14             \n"
+  "punpckldq  %%xmm13,%%xmm9                   \n"
+  "movq       %%xmm14,(%1,%4)                  \n"
+  "lea        (%1,%4,2),%1                     \n"
+  "movdqa     %%xmm9,%%xmm13                   \n"
+  "movq       %%xmm9,(%1)                      \n"
+  "palignr    $0x8,%%xmm13,%%xmm13             \n"
+  "movq       %%xmm13,(%1,%4)                  \n"
+  "lea        (%1,%4,2),%1                     \n"
+  "punpckldq  %%xmm15,%%xmm11                  \n"
+  "movq       %%xmm11,(%1)                     \n"
+  "movdqa     %%xmm11,%%xmm15                  \n"
+  "palignr    $0x8,%%xmm15,%%xmm15             \n"
+  "sub        $0x10,%2                         \n"
+  "movq       %%xmm15,(%1,%4)                  \n"
+  "lea        (%1,%4,2),%1                     \n"
+  "jg         1b                               \n"
+  : "+r"(src),    // %0
+    "+r"(dst),    // %1
+    "+r"(width)   // %2
+  : "r"((intptr_t)(src_stride)),  // %3
+    "r"((intptr_t)(dst_stride))   // %4
+  : "memory", "cc",
+    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+    "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13",  "xmm14",  "xmm15"
+);
+}
+
+#define HAS_TRANSPOSE_UVWX8_SSE2
+static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
+                                uint8* dst_a, int dst_stride_a,
+                                uint8* dst_b, int dst_stride_b,
+                                int w) {
+  asm volatile (
+  // Read in the data from the source pointer.
+  // First round of bit swap.
+  ".p2align  2                                 \n"
+"1:                                            \n"
+  "movdqa     (%0),%%xmm0                      \n"
+  "movdqa     (%0,%4),%%xmm1                   \n"
+  "lea        (%0,%4,2),%0                     \n"
+  "movdqa     %%xmm0,%%xmm8                    \n"
+  "punpcklbw  %%xmm1,%%xmm0                    \n"
+  "punpckhbw  %%xmm1,%%xmm8                    \n"
+  "movdqa     %%xmm8,%%xmm1                    \n"
+  "movdqa     (%0),%%xmm2                      \n"
+  "movdqa     (%0,%4),%%xmm3                   \n"
+  "lea        (%0,%4,2),%0                     \n"
+  "movdqa     %%xmm2,%%xmm8                    \n"
+  "punpcklbw  %%xmm3,%%xmm2                    \n"
+  "punpckhbw  %%xmm3,%%xmm8                    \n"
+  "movdqa     %%xmm8,%%xmm3                    \n"
+  "movdqa     (%0),%%xmm4                      \n"
+  "movdqa     (%0,%4),%%xmm5                   \n"
+  "lea        (%0,%4,2),%0                     \n"
+  "movdqa     %%xmm4,%%xmm8                    \n"
+  "punpcklbw  %%xmm5,%%xmm4                    \n"
+  "punpckhbw  %%xmm5,%%xmm8                    \n"
+  "movdqa     %%xmm8,%%xmm5                    \n"
+  "movdqa     (%0),%%xmm6                      \n"
+  "movdqa     (%0,%4),%%xmm7                   \n"
+  "lea        (%0,%4,2),%0                     \n"
+  "movdqa     %%xmm6,%%xmm8                    \n"
+  "punpcklbw  %%xmm7,%%xmm6                    \n"
+  "neg        %4                               \n"
+  "lea        0x10(%0,%4,8),%0                 \n"
+  "punpckhbw  %%xmm7,%%xmm8                    \n"
+  "movdqa     %%xmm8,%%xmm7                    \n"
+  "neg        %4                               \n"
+   // Second round of bit swap.
+  "movdqa     %%xmm0,%%xmm8                    \n"
+  "movdqa     %%xmm1,%%xmm9                    \n"
+  "punpckhwd  %%xmm2,%%xmm8                    \n"
+  "punpckhwd  %%xmm3,%%xmm9                    \n"
+  "punpcklwd  %%xmm2,%%xmm0                    \n"
+  "punpcklwd  %%xmm3,%%xmm1                    \n"
+  "movdqa     %%xmm8,%%xmm2                    \n"
+  "movdqa     %%xmm9,%%xmm3                    \n"
+  "movdqa     %%xmm4,%%xmm8                    \n"
+  "movdqa     %%xmm5,%%xmm9                    \n"
+  "punpckhwd  %%xmm6,%%xmm8                    \n"
+  "punpckhwd  %%xmm7,%%xmm9                    \n"
+  "punpcklwd  %%xmm6,%%xmm4                    \n"
+  "punpcklwd  %%xmm7,%%xmm5                    \n"
+  "movdqa     %%xmm8,%%xmm6                    \n"
+  "movdqa     %%xmm9,%%xmm7                    \n"
+  // Third round of bit swap.
+  // Write to the destination pointer.
+  "movdqa     %%xmm0,%%xmm8                    \n"
+  "punpckldq  %%xmm4,%%xmm0                    \n"
+  "movlpd     %%xmm0,(%1)                      \n"  // Write back U channel
+  "movhpd     %%xmm0,(%2)                      \n"  // Write back V channel
+  "punpckhdq  %%xmm4,%%xmm8                    \n"
+  "movlpd     %%xmm8,(%1,%5)                   \n"
+  "lea        (%1,%5,2),%1                     \n"
+  "movhpd     %%xmm8,(%2,%6)                   \n"
+  "lea        (%2,%6,2),%2                     \n"
+  "movdqa     %%xmm2,%%xmm8                    \n"
+  "punpckldq  %%xmm6,%%xmm2                    \n"
+  "movlpd     %%xmm2,(%1)                      \n"
+  "movhpd     %%xmm2,(%2)                      \n"
+  "punpckhdq  %%xmm6,%%xmm8                    \n"
+  "movlpd     %%xmm8,(%1,%5)                   \n"
+  "lea        (%1,%5,2),%1                     \n"
+  "movhpd     %%xmm8,(%2,%6)                   \n"
+  "lea        (%2,%6,2),%2                     \n"
+  "movdqa     %%xmm1,%%xmm8                    \n"
+  "punpckldq  %%xmm5,%%xmm1                    \n"
+  "movlpd     %%xmm1,(%1)                      \n"
+  "movhpd     %%xmm1,(%2)                      \n"
+  "punpckhdq  %%xmm5,%%xmm8                    \n"
+  "movlpd     %%xmm8,(%1,%5)                   \n"
+  "lea        (%1,%5,2),%1                     \n"
+  "movhpd     %%xmm8,(%2,%6)                   \n"
+  "lea        (%2,%6,2),%2                     \n"
+  "movdqa     %%xmm3,%%xmm8                    \n"
+  "punpckldq  %%xmm7,%%xmm3                    \n"
+  "movlpd     %%xmm3,(%1)                      \n"
+  "movhpd     %%xmm3,(%2)                      \n"
+  "punpckhdq  %%xmm7,%%xmm8                    \n"
+  "sub        $0x8,%3                          \n"
+  "movlpd     %%xmm8,(%1,%5)                   \n"
+  "lea        (%1,%5,2),%1                     \n"
+  "movhpd     %%xmm8,(%2,%6)                   \n"
+  "lea        (%2,%6,2),%2                     \n"
+  "jg         1b                               \n"
+  : "+r"(src),    // %0
+    "+r"(dst_a),  // %1
+    "+r"(dst_b),  // %2
+    "+r"(w)   // %3
+  : "r"((intptr_t)(src_stride)),    // %4
+    "r"((intptr_t)(dst_stride_a)),  // %5
+    "r"((intptr_t)(dst_stride_b))   // %6
+  : "memory", "cc",
+    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+    "xmm8", "xmm9"
+);
+}
+#endif
+#endif
+
+static void TransposeWx8_C(const uint8* src, int src_stride,
+                           uint8* dst, int dst_stride,
+                           int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    dst[0] = src[0 * src_stride];
+    dst[1] = src[1 * src_stride];
+    dst[2] = src[2 * src_stride];
+    dst[3] = src[3 * src_stride];
+    dst[4] = src[4 * src_stride];
+    dst[5] = src[5 * src_stride];
+    dst[6] = src[6 * src_stride];
+    dst[7] = src[7 * src_stride];
+    ++src;
+    dst += dst_stride;
+  }
+}
+
+static void TransposeWxH_C(const uint8* src, int src_stride,
+                           uint8* dst, int dst_stride,
+                           int width, int height) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    int j;
+    for (j = 0; j < height; ++j) {
+      dst[i * dst_stride + j] = src[j * src_stride + i];
+    }
+  }
+}
+
+LIBYUV_API
+void TransposePlane(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride,
+                    int width, int height) {
+  int i = height;
+  void (*TransposeWx8)(const uint8* src, int src_stride,
+                       uint8* dst, int dst_stride,
+                       int width) = TransposeWx8_C;
+#if defined(HAS_TRANSPOSE_WX8_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    TransposeWx8 = TransposeWx8_NEON;
+  }
+#endif
+#if defined(HAS_TRANSPOSE_WX8_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
+    TransposeWx8 = TransposeWx8_SSSE3;
+  }
+#endif
+#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) &&
+      IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
+    TransposeWx8 = TransposeWx8_FAST_SSSE3;
+  }
+#endif
+#if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
+    if (IS_ALIGNED(width, 4) &&
+        IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
+      TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;
+    } else {
+      TransposeWx8 = TransposeWx8_MIPS_DSPR2;
+    }
+  }
+#endif
+
+  // Work across the source in 8x8 tiles
+  while (i >= 8) {
+    TransposeWx8(src, src_stride, dst, dst_stride, width);
+    src += 8 * src_stride;    // Go down 8 rows.
+    dst += 8;                 // Move over 8 columns.
+    i -= 8;
+  }
+
+  TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
+}
+
+LIBYUV_API
+void RotatePlane90(const uint8* src, int src_stride,
+                   uint8* dst, int dst_stride,
+                   int width, int height) {
+  // Rotate by 90 is a transpose with the source read
+  // from bottom to top. So set the source pointer to the end
+  // of the buffer and flip the sign of the source stride.
+  src += src_stride * (height - 1);
+  src_stride = -src_stride;
+  TransposePlane(src, src_stride, dst, dst_stride, width, height);
+}
+
+LIBYUV_API
+void RotatePlane270(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride,
+                    int width, int height) {
+  // Rotate by 270 is a transpose with the destination written
+  // from bottom to top. So set the destination pointer to the end
+  // of the buffer and flip the sign of the destination stride.
+  dst += dst_stride * (width - 1);
+  dst_stride = -dst_stride;
+  TransposePlane(src, src_stride, dst, dst_stride, width, height);
+}
+
+LIBYUV_API
+void RotatePlane180(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride,
+                    int width, int height) {
+  // Swap first and last row and mirror the content. Uses a temporary row.
+  align_buffer_64(row, width);
+  const uint8* src_bot = src + src_stride * (height - 1);
+  uint8* dst_bot = dst + dst_stride * (height - 1);
+  int half_height = (height + 1) >> 1;
+  int y;
+  void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
+  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+#if defined(HAS_MIRRORROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
+    MirrorRow = MirrorRow_NEON;
+  }
+#endif
+#if defined(HAS_MIRRORROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
+      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+    MirrorRow = MirrorRow_SSE2;
+  }
+#endif
+#if defined(HAS_MIRRORROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
+      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+    MirrorRow = MirrorRow_SSSE3;
+  }
+#endif
+#if defined(HAS_MIRRORROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
+    MirrorRow = MirrorRow_AVX2;
+  }
+#endif
+#if defined(HAS_MIRRORROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
+      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
+    MirrorRow = MirrorRow_MIPS_DSPR2;
+  }
+#endif
+#if defined(HAS_COPYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+    CopyRow = CopyRow_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_X86)
+  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
+    CopyRow = CopyRow_X86;
+  }
+#endif
+#if defined(HAS_COPYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
+      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
+      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+    CopyRow = CopyRow_SSE2;
+  }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+  if (TestCpuFlag(kCpuHasERMS)) {
+    CopyRow = CopyRow_ERMS;
+  }
+#endif
+#if defined(HAS_COPYROW_MIPS)
+  if (TestCpuFlag(kCpuHasMIPS)) {
+    CopyRow = CopyRow_MIPS;
+  }
+#endif
+
+  // Odd height will harmlessly mirror the middle row twice.
+  for (y = 0; y < half_height; ++y) {
+    MirrorRow(src, row, width);  // Mirror first row into a buffer
+    src += src_stride;
+    MirrorRow(src_bot, dst, width);  // Mirror last row into first row
+    dst += dst_stride;
+    CopyRow(row, dst_bot, width);  // Copy first mirrored row into last
+    src_bot -= src_stride;
+    dst_bot -= dst_stride;
+  }
+  free_aligned_buffer_64(row);
+}
+
+static void TransposeUVWx8_C(const uint8* src, int src_stride,
+                             uint8* dst_a, int dst_stride_a,
+                             uint8* dst_b, int dst_stride_b,
+                             int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    dst_a[0] = src[0 * src_stride + 0];
+    dst_b[0] = src[0 * src_stride + 1];
+    dst_a[1] = src[1 * src_stride + 0];
+    dst_b[1] = src[1 * src_stride + 1];
+    dst_a[2] = src[2 * src_stride + 0];
+    dst_b[2] = src[2 * src_stride + 1];
+    dst_a[3] = src[3 * src_stride + 0];
+    dst_b[3] = src[3 * src_stride + 1];
+    dst_a[4] = src[4 * src_stride + 0];
+    dst_b[4] = src[4 * src_stride + 1];
+    dst_a[5] = src[5 * src_stride + 0];
+    dst_b[5] = src[5 * src_stride + 1];
+    dst_a[6] = src[6 * src_stride + 0];
+    dst_b[6] = src[6 * src_stride + 1];
+    dst_a[7] = src[7 * src_stride + 0];
+    dst_b[7] = src[7 * src_stride + 1];
+    src += 2;
+    dst_a += dst_stride_a;
+    dst_b += dst_stride_b;
+  }
+}
+
+static void TransposeUVWxH_C(const uint8* src, int src_stride,
+                             uint8* dst_a, int dst_stride_a,
+                             uint8* dst_b, int dst_stride_b,
+                             int width, int height) {
+  int i;
+  for (i = 0; i < width * 2; i += 2) {
+    int j;
+    for (j = 0; j < height; ++j) {
+      dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
+      dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
+    }
+  }
+}
+
+LIBYUV_API
+void TransposeUV(const uint8* src, int src_stride,
+                 uint8* dst_a, int dst_stride_a,
+                 uint8* dst_b, int dst_stride_b,
+                 int width, int height) {
+  int i = height;
+  void (*TransposeUVWx8)(const uint8* src, int src_stride,
+                         uint8* dst_a, int dst_stride_a,
+                         uint8* dst_b, int dst_stride_b,
+                         int width) = TransposeUVWx8_C;
+#if defined(HAS_TRANSPOSE_UVWX8_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    TransposeUVWx8 = TransposeUVWx8_NEON;
+  }
+#elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(width, 8) &&
+      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
+    TransposeUVWx8 = TransposeUVWx8_SSE2;
+  }
+#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
+      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
+    TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
+  }
+#endif
+
+  // Work through the source in 8x8 tiles.
+  while (i >= 8) {
+    TransposeUVWx8(src, src_stride,
+                   dst_a, dst_stride_a,
+                   dst_b, dst_stride_b,
+                   width);
+    src += 8 * src_stride;    // Go down 8 rows.
+    dst_a += 8;               // Move over 8 columns.
+    dst_b += 8;               // Move over 8 columns.
+    i -= 8;
+  }
+
+  TransposeUVWxH_C(src, src_stride,
+                   dst_a, dst_stride_a,
+                   dst_b, dst_stride_b,
+                   width, i);
+}
+
+LIBYUV_API
+void RotateUV90(const uint8* src, int src_stride,
+                uint8* dst_a, int dst_stride_a,
+                uint8* dst_b, int dst_stride_b,
+                int width, int height) {
+  src += src_stride * (height - 1);
+  src_stride = -src_stride;
+
+  TransposeUV(src, src_stride,
+              dst_a, dst_stride_a,
+              dst_b, dst_stride_b,
+              width, height);
+}
+
+LIBYUV_API
+void RotateUV270(const uint8* src, int src_stride,
+                 uint8* dst_a, int dst_stride_a,
+                 uint8* dst_b, int dst_stride_b,
+                 int width, int height) {
+  dst_a += dst_stride_a * (width - 1);
+  dst_b += dst_stride_b * (width - 1);
+  dst_stride_a = -dst_stride_a;
+  dst_stride_b = -dst_stride_b;
+
+  TransposeUV(src, src_stride,
+              dst_a, dst_stride_a,
+              dst_b, dst_stride_b,
+              width, height);
+}
+
+// Rotate 180 is a horizontal and vertical flip.
+LIBYUV_API
+void RotateUV180(const uint8* src, int src_stride,
+                 uint8* dst_a, int dst_stride_a,
+                 uint8* dst_b, int dst_stride_b,
+                 int width, int height) {
+  int i;
+  void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
+      MirrorUVRow_C;
+#if defined(HAS_MIRRORUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    MirrorRowUV = MirrorUVRow_NEON;
+  }
+#elif defined(HAS_MIRRORROW_UV_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
+    MirrorRowUV = MirrorUVRow_SSSE3;
+  }
+#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
+      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
+    MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
+  }
+#endif
+
+  dst_a += dst_stride_a * (height - 1);
+  dst_b += dst_stride_b * (height - 1);
+
+  for (i = 0; i < height; ++i) {
+    MirrorRowUV(src, dst_a, dst_b, width);
+    src += src_stride;
+    dst_a -= dst_stride_a;
+    dst_b -= dst_stride_b;
+  }
+}
+
+LIBYUV_API
+int RotatePlane(const uint8* src, int src_stride,
+                uint8* dst, int dst_stride,
+                int width, int height,
+                enum RotationMode mode) {
+  if (!src || width <= 0 || height == 0 || !dst) {
+    return -1;
+  }
+
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src = src + (height - 1) * src_stride;
+    src_stride = -src_stride;
+  }
+
+  switch (mode) {
+    case kRotate0:
+      // copy frame
+      CopyPlane(src, src_stride,
+                dst, dst_stride,
+                width, height);
+      return 0;
+    case kRotate90:
+      RotatePlane90(src, src_stride,
+                    dst, dst_stride,
+                    width, height);
+      return 0;
+    case kRotate270:
+      RotatePlane270(src, src_stride,
+                     dst, dst_stride,
+                     width, height);
+      return 0;
+    case kRotate180:
+      RotatePlane180(src, src_stride,
+                     dst, dst_stride,
+                     width, height);
+      return 0;
+    default:
+      break;
+  }
+  return -1;
+}
+
+LIBYUV_API
+int I420Rotate(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height,
+               enum RotationMode mode) {
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
+      !dst_y || !dst_u || !dst_v) {
+    return -1;
+  }
+
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (halfheight - 1) * src_stride_u;
+    src_v = src_v + (halfheight - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  switch (mode) {
+    case kRotate0:
+      // copy frame
+      return I420Copy(src_y, src_stride_y,
+                      src_u, src_stride_u,
+                      src_v, src_stride_v,
+                      dst_y, dst_stride_y,
+                      dst_u, dst_stride_u,
+                      dst_v, dst_stride_v,
+                      width, height);
+    case kRotate90:
+      RotatePlane90(src_y, src_stride_y,
+                    dst_y, dst_stride_y,
+                    width, height);
+      RotatePlane90(src_u, src_stride_u,
+                    dst_u, dst_stride_u,
+                    halfwidth, halfheight);
+      RotatePlane90(src_v, src_stride_v,
+                    dst_v, dst_stride_v,
+                    halfwidth, halfheight);
+      return 0;
+    case kRotate270:
+      RotatePlane270(src_y, src_stride_y,
+                     dst_y, dst_stride_y,
+                     width, height);
+      RotatePlane270(src_u, src_stride_u,
+                     dst_u, dst_stride_u,
+                     halfwidth, halfheight);
+      RotatePlane270(src_v, src_stride_v,
+                     dst_v, dst_stride_v,
+                     halfwidth, halfheight);
+      return 0;
+    case kRotate180:
+      RotatePlane180(src_y, src_stride_y,
+                     dst_y, dst_stride_y,
+                     width, height);
+      RotatePlane180(src_u, src_stride_u,
+                     dst_u, dst_stride_u,
+                     halfwidth, halfheight);
+      RotatePlane180(src_v, src_stride_v,
+                     dst_v, dst_stride_v,
+                     halfwidth, halfheight);
+      return 0;
+    default:
+      break;
+  }
+  return -1;
+}
+
+LIBYUV_API
+int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
+                     const uint8* src_uv, int src_stride_uv,
+                     uint8* dst_y, int dst_stride_y,
+                     uint8* dst_u, int dst_stride_u,
+                     uint8* dst_v, int dst_stride_v,
+                     int width, int height,
+                     enum RotationMode mode) {
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_y || !src_uv || width <= 0 || height == 0 ||
+      !dst_y || !dst_u || !dst_v) {
+    return -1;
+  }
+
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_uv = src_uv + (halfheight - 1) * src_stride_uv;
+    src_stride_y = -src_stride_y;
+    src_stride_uv = -src_stride_uv;
+  }
+
+  switch (mode) {
+    case kRotate0:
+      // copy frame
+      return NV12ToI420(src_y, src_stride_y,
+                        src_uv, src_stride_uv,
+                        dst_y, dst_stride_y,
+                        dst_u, dst_stride_u,
+                        dst_v, dst_stride_v,
+                        width, height);
+    case kRotate90:
+      RotatePlane90(src_y, src_stride_y,
+                    dst_y, dst_stride_y,
+                    width, height);
+      RotateUV90(src_uv, src_stride_uv,
+                 dst_u, dst_stride_u,
+                 dst_v, dst_stride_v,
+                 halfwidth, halfheight);
+      return 0;
+    case kRotate270:
+      RotatePlane270(src_y, src_stride_y,
+                     dst_y, dst_stride_y,
+                     width, height);
+      RotateUV270(src_uv, src_stride_uv,
+                  dst_u, dst_stride_u,
+                  dst_v, dst_stride_v,
+                  halfwidth, halfheight);
+      return 0;
+    case kRotate180:
+      RotatePlane180(src_y, src_stride_y,
+                     dst_y, dst_stride_y,
+                     width, height);
+      RotateUV180(src_uv, src_stride_uv,
+                  dst_u, dst_stride_u,
+                  dst_v, dst_stride_v,
+                  halfwidth, halfheight);
+      return 0;
+    default:
+      break;
+  }
+  return -1;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/rotate_argb.cc b/src/main/jni/libyuv/source/rotate_argb.cc
new file mode 100644
index 000000000..ab0f9ce07
--- /dev/null
+++ b/src/main/jni/libyuv/source/rotate_argb.cc
@@ -0,0 +1,209 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/rotate.h"
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/convert.h"
+#include "libyuv/planar_functions.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// ARGBScale has a function to copy pixels to a row, striding each source
+// pixel by a constant.
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(_M_IX86) || \
+    (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
+#define HAS_SCALEARGBROWDOWNEVEN_SSE2
+void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
+                               int src_stepx,
+                               uint8* dst_ptr, int dst_width);
+#endif
+#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+#define HAS_SCALEARGBROWDOWNEVEN_NEON
+void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
+                               int src_stepx,
+                               uint8* dst_ptr, int dst_width);
+#endif
+
+void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
+                            int src_stepx,
+                            uint8* dst_ptr, int dst_width);
+
+static void ARGBTranspose(const uint8* src, int src_stride,
+                          uint8* dst, int dst_stride,
+                          int width, int height) {
+  int i;
+  int src_pixel_step = src_stride >> 2;
+  void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
+      int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
+#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) &&  // Width of dest.
+      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+    ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
+  }
+#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) &&  // Width of dest.
+      IS_ALIGNED(src, 4)) {
+    ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
+  }
+#endif
+
+  for (i = 0; i < width; ++i) {  // column of source to row of dest.
+    ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
+    dst += dst_stride;
+    src += 4;
+  }
+}
+
+void ARGBRotate90(const uint8* src, int src_stride,
+                  uint8* dst, int dst_stride,
+                  int width, int height) {
+  // Rotate by 90 is a ARGBTranspose with the source read
+  // from bottom to top. So set the source pointer to the end
+  // of the buffer and flip the sign of the source stride.
+  src += src_stride * (height - 1);
+  src_stride = -src_stride;
+  ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
+}
+
+void ARGBRotate270(const uint8* src, int src_stride,
+                    uint8* dst, int dst_stride,
+                    int width, int height) {
+  // Rotate by 270 is a ARGBTranspose with the destination written
+  // from bottom to top. So set the destination pointer to the end
+  // of the buffer and flip the sign of the destination stride.
+  dst += dst_stride * (width - 1);
+  dst_stride = -dst_stride;
+  ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
+}
+
+void ARGBRotate180(const uint8* src, int src_stride,
+                   uint8* dst, int dst_stride,
+                   int width, int height) {
+  // Swap first and last row and mirror the content. Uses a temporary row.
+  align_buffer_64(row, width * 4);
+  const uint8* src_bot = src + src_stride * (height - 1);
+  uint8* dst_bot = dst + dst_stride * (height - 1);
+  int half_height = (height + 1) >> 1;
+  int y;
+  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
+      ARGBMirrorRow_C;
+  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+#if defined(HAS_ARGBMIRRORROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
+      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+    ARGBMirrorRow = ARGBMirrorRow_SSSE3;
+  }
+#endif
+#if defined(HAS_ARGBMIRRORROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
+    ARGBMirrorRow = ARGBMirrorRow_AVX2;
+  }
+#endif
+#if defined(HAS_ARGBMIRRORROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
+    ARGBMirrorRow = ARGBMirrorRow_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
+    CopyRow = CopyRow_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_X86)
+  if (TestCpuFlag(kCpuHasX86)) {
+    CopyRow = CopyRow_X86;
+  }
+#endif
+#if defined(HAS_COPYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) &&
+      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
+      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+    CopyRow = CopyRow_SSE2;
+  }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+  if (TestCpuFlag(kCpuHasERMS)) {
+    CopyRow = CopyRow_ERMS;
+  }
+#endif
+#if defined(HAS_COPYROW_MIPS)
+  if (TestCpuFlag(kCpuHasMIPS)) {
+    CopyRow = CopyRow_MIPS;
+  }
+#endif
+
+  // Odd height will harmlessly mirror the middle row twice.
+  for (y = 0; y < half_height; ++y) {
+    ARGBMirrorRow(src, row, width);  // Mirror first row into a buffer
+    ARGBMirrorRow(src_bot, dst, width);  // Mirror last row into first row
+    CopyRow(row, dst_bot, width * 4);  // Copy first mirrored row into last
+    src += src_stride;
+    dst += dst_stride;
+    src_bot -= src_stride;
+    dst_bot -= dst_stride;
+  }
+  free_aligned_buffer_64(row);
+}
+
+LIBYUV_API
+int ARGBRotate(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height,
+               enum RotationMode mode) {
+  if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
+    return -1;
+  }
+
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+
+  switch (mode) {
+    case kRotate0:
+      // copy frame
+      return ARGBCopy(src_argb, src_stride_argb,
+                      dst_argb, dst_stride_argb,
+                      width, height);
+    case kRotate90:
+      ARGBRotate90(src_argb, src_stride_argb,
+                   dst_argb, dst_stride_argb,
+                   width, height);
+      return 0;
+    case kRotate270:
+      ARGBRotate270(src_argb, src_stride_argb,
+                    dst_argb, dst_stride_argb,
+                    width, height);
+      return 0;
+    case kRotate180:
+      ARGBRotate180(src_argb, src_stride_argb,
+                    dst_argb, dst_stride_argb,
+                    width, height);
+      return 0;
+    default:
+      break;
+  }
+  return -1;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/rotate_mips.cc b/src/main/jni/libyuv/source/rotate_mips.cc
new file mode 100644
index 000000000..70770fd06
--- /dev/null
+++ b/src/main/jni/libyuv/source/rotate_mips.cc
@@ -0,0 +1,485 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_MIPS) && \
+    defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
+    (_MIPS_SIM == _MIPS_SIM_ABI32)
+
+void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+                             uint8* dst, int dst_stride,
+                             int width) {
+   __asm__ __volatile__ (
+      ".set push                                         \n"
+      ".set noreorder                                    \n"
+      "sll              $t2, %[src_stride], 0x1          \n" // src_stride x 2
+      "sll              $t4, %[src_stride], 0x2          \n" // src_stride x 4
+      "sll              $t9, %[src_stride], 0x3          \n" // src_stride x 8
+      "addu             $t3, $t2, %[src_stride]          \n"
+      "addu             $t5, $t4, %[src_stride]          \n"
+      "addu             $t6, $t2, $t4                    \n"
+      "andi             $t0, %[dst], 0x3                 \n"
+      "andi             $t1, %[dst_stride], 0x3          \n"
+      "or               $t0, $t0, $t1                    \n"
+      "bnez             $t0, 11f                         \n"
+      " subu            $t7, $t9, %[src_stride]          \n"
+//dst + dst_stride word aligned
+    "1:                                                  \n"
+      "lbu              $t0, 0(%[src])                   \n"
+      "lbux             $t1, %[src_stride](%[src])       \n"
+      "lbux             $t8, $t2(%[src])                 \n"
+      "lbux             $t9, $t3(%[src])                 \n"
+      "sll              $t1, $t1, 16                     \n"
+      "sll              $t9, $t9, 16                     \n"
+      "or               $t0, $t0, $t1                    \n"
+      "or               $t8, $t8, $t9                    \n"
+      "precr.qb.ph      $s0, $t8, $t0                    \n"
+      "lbux             $t0, $t4(%[src])                 \n"
+      "lbux             $t1, $t5(%[src])                 \n"
+      "lbux             $t8, $t6(%[src])                 \n"
+      "lbux             $t9, $t7(%[src])                 \n"
+      "sll              $t1, $t1, 16                     \n"
+      "sll              $t9, $t9, 16                     \n"
+      "or               $t0, $t0, $t1                    \n"
+      "or               $t8, $t8, $t9                    \n"
+      "precr.qb.ph      $s1, $t8, $t0                    \n"
+      "sw               $s0, 0(%[dst])                   \n"
+      "addiu            %[width], -1                     \n"
+      "addiu            %[src], 1                        \n"
+      "sw               $s1, 4(%[dst])                   \n"
+      "bnez             %[width], 1b                     \n"
+      " addu            %[dst], %[dst], %[dst_stride]    \n"
+      "b                2f                               \n"
+//dst + dst_stride unaligned
+   "11:                                                  \n"
+      "lbu              $t0, 0(%[src])                   \n"
+      "lbux             $t1, %[src_stride](%[src])       \n"
+      "lbux             $t8, $t2(%[src])                 \n"
+      "lbux             $t9, $t3(%[src])                 \n"
+      "sll              $t1, $t1, 16                     \n"
+      "sll              $t9, $t9, 16                     \n"
+      "or               $t0, $t0, $t1                    \n"
+      "or               $t8, $t8, $t9                    \n"
+      "precr.qb.ph      $s0, $t8, $t0                    \n"
+      "lbux             $t0, $t4(%[src])                 \n"
+      "lbux             $t1, $t5(%[src])                 \n"
+      "lbux             $t8, $t6(%[src])                 \n"
+      "lbux             $t9, $t7(%[src])                 \n"
+      "sll              $t1, $t1, 16                     \n"
+      "sll              $t9, $t9, 16                     \n"
+      "or               $t0, $t0, $t1                    \n"
+      "or               $t8, $t8, $t9                    \n"
+      "precr.qb.ph      $s1, $t8, $t0                    \n"
+      "swr              $s0, 0(%[dst])                   \n"
+      "swl              $s0, 3(%[dst])                   \n"
+      "addiu            %[width], -1                     \n"
+      "addiu            %[src], 1                        \n"
+      "swr              $s1, 4(%[dst])                   \n"
+      "swl              $s1, 7(%[dst])                   \n"
+      "bnez             %[width], 11b                    \n"
+       "addu             %[dst], %[dst], %[dst_stride]   \n"
+    "2:                                                  \n"
+      ".set pop                                          \n"
+      :[src] "+r" (src),
+       [dst] "+r" (dst),
+       [width] "+r" (width)
+      :[src_stride] "r" (src_stride),
+       [dst_stride] "r" (dst_stride)
+      : "t0", "t1",  "t2", "t3", "t4", "t5",
+        "t6", "t7", "t8", "t9",
+        "s0", "s1"
+  );
+}
+
+void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
+                                  uint8* dst, int dst_stride,
+                                  int width) {
+  __asm__ __volatile__ (
+      ".set noat                                         \n"
+      ".set push                                         \n"
+      ".set noreorder                                    \n"
+      "beqz             %[width], 2f                     \n"
+      " sll             $t2, %[src_stride], 0x1          \n"  // src_stride x 2
+      "sll              $t4, %[src_stride], 0x2          \n"  // src_stride x 4
+      "sll              $t9, %[src_stride], 0x3          \n"  // src_stride x 8
+      "addu             $t3, $t2, %[src_stride]          \n"
+      "addu             $t5, $t4, %[src_stride]          \n"
+      "addu             $t6, $t2, $t4                    \n"
+
+      "srl              $AT, %[width], 0x2               \n"
+      "andi             $t0, %[dst], 0x3                 \n"
+      "andi             $t1, %[dst_stride], 0x3          \n"
+      "or               $t0, $t0, $t1                    \n"
+      "bnez             $t0, 11f                         \n"
+      " subu            $t7, $t9, %[src_stride]          \n"
+//dst + dst_stride word aligned
+      "1:                                                \n"
+      "lw               $t0, 0(%[src])                   \n"
+      "lwx              $t1, %[src_stride](%[src])       \n"
+      "lwx              $t8, $t2(%[src])                 \n"
+      "lwx              $t9, $t3(%[src])                 \n"
+
+// t0 = | 30 | 20 | 10 | 00 |
+// t1 = | 31 | 21 | 11 | 01 |
+// t8 = | 32 | 22 | 12 | 02 |
+// t9 = | 33 | 23 | 13 | 03 |
+
+      "precr.qb.ph     $s0, $t1, $t0                     \n"
+      "precr.qb.ph     $s1, $t9, $t8                     \n"
+      "precrq.qb.ph    $s2, $t1, $t0                     \n"
+      "precrq.qb.ph    $s3, $t9, $t8                     \n"
+
+  // s0 = | 21 | 01 | 20 | 00 |
+  // s1 = | 23 | 03 | 22 | 02 |
+  // s2 = | 31 | 11 | 30 | 10 |
+  // s3 = | 33 | 13 | 32 | 12 |
+
+      "precr.qb.ph     $s4, $s1, $s0                     \n"
+      "precrq.qb.ph    $s5, $s1, $s0                     \n"
+      "precr.qb.ph     $s6, $s3, $s2                     \n"
+      "precrq.qb.ph    $s7, $s3, $s2                     \n"
+
+  // s4 = | 03 | 02 | 01 | 00 |
+  // s5 = | 23 | 22 | 21 | 20 |
+  // s6 = | 13 | 12 | 11 | 10 |
+  // s7 = | 33 | 32 | 31 | 30 |
+
+      "lwx              $t0, $t4(%[src])                 \n"
+      "lwx              $t1, $t5(%[src])                 \n"
+      "lwx              $t8, $t6(%[src])                 \n"
+      "lwx              $t9, $t7(%[src])                 \n"
+
+// t0 = | 34 | 24 | 14 | 04 |
+// t1 = | 35 | 25 | 15 | 05 |
+// t8 = | 36 | 26 | 16 | 06 |
+// t9 = | 37 | 27 | 17 | 07 |
+
+      "precr.qb.ph     $s0, $t1, $t0                     \n"
+      "precr.qb.ph     $s1, $t9, $t8                     \n"
+      "precrq.qb.ph    $s2, $t1, $t0                     \n"
+      "precrq.qb.ph    $s3, $t9, $t8                     \n"
+
+  // s0 = | 25 | 05 | 24 | 04 |
+  // s1 = | 27 | 07 | 26 | 06 |
+  // s2 = | 35 | 15 | 34 | 14 |
+  // s3 = | 37 | 17 | 36 | 16 |
+
+      "precr.qb.ph     $t0, $s1, $s0                     \n"
+      "precrq.qb.ph    $t1, $s1, $s0                     \n"
+      "precr.qb.ph     $t8, $s3, $s2                     \n"
+      "precrq.qb.ph    $t9, $s3, $s2                     \n"
+
+  // t0 = | 07 | 06 | 05 | 04 |
+  // t1 = | 27 | 26 | 25 | 24 |
+  // t8 = | 17 | 16 | 15 | 14 |
+  // t9 = | 37 | 36 | 35 | 34 |
+
+      "addu            $s0, %[dst], %[dst_stride]        \n"
+      "addu            $s1, $s0, %[dst_stride]           \n"
+      "addu            $s2, $s1, %[dst_stride]           \n"
+
+      "sw              $s4, 0(%[dst])                    \n"
+      "sw              $t0, 4(%[dst])                    \n"
+      "sw              $s6, 0($s0)                       \n"
+      "sw              $t8, 4($s0)                       \n"
+      "sw              $s5, 0($s1)                       \n"
+      "sw              $t1, 4($s1)                       \n"
+      "sw              $s7, 0($s2)                       \n"
+      "sw              $t9, 4($s2)                       \n"
+
+      "addiu            $AT, -1                          \n"
+      "addiu            %[src], 4                        \n"
+
+      "bnez             $AT, 1b                          \n"
+      " addu            %[dst], $s2, %[dst_stride]       \n"
+      "b                2f                               \n"
+//dst + dst_stride unaligned
+      "11:                                               \n"
+      "lw               $t0, 0(%[src])                   \n"
+      "lwx              $t1, %[src_stride](%[src])       \n"
+      "lwx              $t8, $t2(%[src])                 \n"
+      "lwx              $t9, $t3(%[src])                 \n"
+
+// t0 = | 30 | 20 | 10 | 00 |
+// t1 = | 31 | 21 | 11 | 01 |
+// t8 = | 32 | 22 | 12 | 02 |
+// t9 = | 33 | 23 | 13 | 03 |
+
+      "precr.qb.ph     $s0, $t1, $t0                     \n"
+      "precr.qb.ph     $s1, $t9, $t8                     \n"
+      "precrq.qb.ph    $s2, $t1, $t0                     \n"
+      "precrq.qb.ph    $s3, $t9, $t8                     \n"
+
+  // s0 = | 21 | 01 | 20 | 00 |
+  // s1 = | 23 | 03 | 22 | 02 |
+  // s2 = | 31 | 11 | 30 | 10 |
+  // s3 = | 33 | 13 | 32 | 12 |
+
+      "precr.qb.ph     $s4, $s1, $s0                     \n"
+      "precrq.qb.ph    $s5, $s1, $s0                     \n"
+      "precr.qb.ph     $s6, $s3, $s2                     \n"
+      "precrq.qb.ph    $s7, $s3, $s2                     \n"
+
+  // s4 = | 03 | 02 | 01 | 00 |
+  // s5 = | 23 | 22 | 21 | 20 |
+  // s6 = | 13 | 12 | 11 | 10 |
+  // s7 = | 33 | 32 | 31 | 30 |
+
+      "lwx              $t0, $t4(%[src])                 \n"
+      "lwx              $t1, $t5(%[src])                 \n"
+      "lwx              $t8, $t6(%[src])                 \n"
+      "lwx              $t9, $t7(%[src])                 \n"
+
+// t0 = | 34 | 24 | 14 | 04 |
+// t1 = | 35 | 25 | 15 | 05 |
+// t8 = | 36 | 26 | 16 | 06 |
+// t9 = | 37 | 27 | 17 | 07 |
+
+      "precr.qb.ph     $s0, $t1, $t0                     \n"
+      "precr.qb.ph     $s1, $t9, $t8                     \n"
+      "precrq.qb.ph    $s2, $t1, $t0                     \n"
+      "precrq.qb.ph    $s3, $t9, $t8                     \n"
+
+  // s0 = | 25 | 05 | 24 | 04 |
+  // s1 = | 27 | 07 | 26 | 06 |
+  // s2 = | 35 | 15 | 34 | 14 |
+  // s3 = | 37 | 17 | 36 | 16 |
+
+      "precr.qb.ph     $t0, $s1, $s0                     \n"
+      "precrq.qb.ph    $t1, $s1, $s0                     \n"
+      "precr.qb.ph     $t8, $s3, $s2                     \n"
+      "precrq.qb.ph    $t9, $s3, $s2                     \n"
+
+  // t0 = | 07 | 06 | 05 | 04 |
+  // t1 = | 27 | 26 | 25 | 24 |
+  // t8 = | 17 | 16 | 15 | 14 |
+  // t9 = | 37 | 36 | 35 | 34 |
+
+      "addu            $s0, %[dst], %[dst_stride]        \n"
+      "addu            $s1, $s0, %[dst_stride]           \n"
+      "addu            $s2, $s1, %[dst_stride]           \n"
+
+      "swr              $s4, 0(%[dst])                   \n"
+      "swl              $s4, 3(%[dst])                   \n"
+      "swr              $t0, 4(%[dst])                   \n"
+      "swl              $t0, 7(%[dst])                   \n"
+      "swr              $s6, 0($s0)                      \n"
+      "swl              $s6, 3($s0)                      \n"
+      "swr              $t8, 4($s0)                      \n"
+      "swl              $t8, 7($s0)                      \n"
+      "swr              $s5, 0($s1)                      \n"
+      "swl              $s5, 3($s1)                      \n"
+      "swr              $t1, 4($s1)                      \n"
+      "swl              $t1, 7($s1)                      \n"
+      "swr              $s7, 0($s2)                      \n"
+      "swl              $s7, 3($s2)                      \n"
+      "swr              $t9, 4($s2)                      \n"
+      "swl              $t9, 7($s2)                      \n"
+
+      "addiu            $AT, -1                          \n"
+      "addiu            %[src], 4                        \n"
+
+      "bnez             $AT, 11b                         \n"
+      " addu            %[dst], $s2, %[dst_stride]       \n"
+      "2:                                                \n"
+      ".set pop                                          \n"
+      ".set at                                           \n"
+      :[src] "+r" (src),
+       [dst] "+r" (dst),
+       [width] "+r" (width)
+      :[src_stride] "r" (src_stride),
+       [dst_stride] "r" (dst_stride)
+      : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
+        "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
+  );
+}
+
+void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+                               uint8* dst_a, int dst_stride_a,
+                               uint8* dst_b, int dst_stride_b,
+                               int width) {
+  __asm__ __volatile__ (
+      ".set push                                         \n"
+      ".set noreorder                                    \n"
+      "beqz            %[width], 2f                      \n"
+      " sll            $t2, %[src_stride], 0x1           \n" // src_stride x 2
+      "sll             $t4, %[src_stride], 0x2           \n" // src_stride x 4
+      "sll             $t9, %[src_stride], 0x3           \n" // src_stride x 8
+      "addu            $t3, $t2, %[src_stride]           \n"
+      "addu            $t5, $t4, %[src_stride]           \n"
+      "addu            $t6, $t2, $t4                     \n"
+      "subu            $t7, $t9, %[src_stride]           \n"
+      "srl             $t1, %[width], 1                  \n"
+
+// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
+      "andi            $t0, %[dst_a], 0x3                \n"
+      "andi            $t8, %[dst_b], 0x3                \n"
+      "or              $t0, $t0, $t8                     \n"
+      "andi            $t8, %[dst_stride_a], 0x3         \n"
+      "andi            $s5, %[dst_stride_b], 0x3         \n"
+      "or              $t8, $t8, $s5                     \n"
+      "or              $t0, $t0, $t8                     \n"
+      "bnez            $t0, 11f                          \n"
+      " nop                                              \n"
+// dst + dst_stride word aligned (both, a & b dst addresses)
+    "1:                                                  \n"
+      "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
+      "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
+      "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
+      "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
+      "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
+      "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
+
+      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
+      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
+
+      "sll             $t0, $t0, 16                      \n"
+      "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
+      "sll             $t9, $t9, 16                      \n"
+      "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
+
+      "sw              $s3, 0($s5)                       \n"
+      "sw              $s4, 0($s6)                       \n"
+
+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
+
+      "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
+      "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
+      "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
+      "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
+      "sw              $s3, 0(%[dst_a])                  \n"
+      "sw              $s4, 0(%[dst_b])                  \n"
+
+      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
+      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
+
+      "sll             $t0, $t0, 16                      \n"
+      "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
+      "sll             $t9, $t9, 16                      \n"
+      "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
+      "sw              $s3, 4($s5)                       \n"
+      "sw              $s4, 4($s6)                       \n"
+
+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
+
+      "addiu           %[src], 4                         \n"
+      "addiu           $t1, -1                           \n"
+      "sll             $t0, %[dst_stride_a], 1           \n"
+      "sll             $t8, %[dst_stride_b], 1           \n"
+      "sw              $s3, 4(%[dst_a])                  \n"
+      "sw              $s4, 4(%[dst_b])                  \n"
+      "addu            %[dst_a], %[dst_a], $t0           \n"
+      "bnez            $t1, 1b                           \n"
+      " addu           %[dst_b], %[dst_b], $t8           \n"
+      "b               2f                                \n"
+      " nop                                              \n"
+
+// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
+   "11:                                                  \n"
+      "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
+      "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
+      "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
+      "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
+      "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
+      "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
+
+      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
+      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
+
+      "sll             $t0, $t0, 16                      \n"
+      "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
+      "sll             $t9, $t9, 16                      \n"
+      "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
+
+      "swr             $s3, 0($s5)                       \n"
+      "swl             $s3, 3($s5)                       \n"
+      "swr             $s4, 0($s6)                       \n"
+      "swl             $s4, 3($s6)                       \n"
+
+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
+
+      "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
+      "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
+      "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
+      "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
+      "swr             $s3, 0(%[dst_a])                  \n"
+      "swl             $s3, 3(%[dst_a])                  \n"
+      "swr             $s4, 0(%[dst_b])                  \n"
+      "swl             $s4, 3(%[dst_b])                  \n"
+
+      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
+      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
+
+      "sll             $t0, $t0, 16                      \n"
+      "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
+      "sll             $t9, $t9, 16                      \n"
+      "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
+
+      "swr             $s3, 4($s5)                       \n"
+      "swl             $s3, 7($s5)                       \n"
+      "swr             $s4, 4($s6)                       \n"
+      "swl             $s4, 7($s6)                       \n"
+
+      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
+      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
+
+      "addiu           %[src], 4                         \n"
+      "addiu           $t1, -1                           \n"
+      "sll             $t0, %[dst_stride_a], 1           \n"
+      "sll             $t8, %[dst_stride_b], 1           \n"
+      "swr             $s3, 4(%[dst_a])                  \n"
+      "swl             $s3, 7(%[dst_a])                  \n"
+      "swr             $s4, 4(%[dst_b])                  \n"
+      "swl             $s4, 7(%[dst_b])                  \n"
+      "addu            %[dst_a], %[dst_a], $t0           \n"
+      "bnez            $t1, 11b                          \n"
+      " addu           %[dst_b], %[dst_b], $t8           \n"
+
+      "2:                                                \n"
+      ".set pop                                          \n"
+      : [src] "+r" (src),
+        [dst_a] "+r" (dst_a),
+        [dst_b] "+r" (dst_b),
+        [width] "+r" (width),
+        [src_stride] "+r" (src_stride)
+      : [dst_stride_a] "r" (dst_stride_a),
+        [dst_stride_b] "r" (dst_stride_b)
+      : "t0", "t1",  "t2", "t3",  "t4", "t5",
+        "t6", "t7", "t8", "t9",
+        "s0", "s1", "s2", "s3",
+        "s4", "s5", "s6"
+  );
+}
+
+#endif  // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/rotate_neon.cc b/src/main/jni/libyuv/source/rotate_neon.cc
new file mode 100644
index 000000000..d354e11fa
--- /dev/null
+++ b/src/main/jni/libyuv/source/rotate_neon.cc
@@ -0,0 +1,533 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+
+static uvec8 kVTbl4x4Transpose =
+  { 0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15 };
+
+void TransposeWx8_NEON(const uint8* src, int src_stride,
+                       uint8* dst, int dst_stride,
+                       int width) {
+  const uint8* src_temp = NULL;
+  asm volatile (
+    // loops are on blocks of 8. loop will stop when
+    // counter gets to or below 0. starting the counter
+    // at w-8 allow for this
+    "sub         %5, #8                        \n"
+
+    // handle 8x8 blocks. this should be the majority of the plane
+    ".p2align  2                               \n"
+    "1:                                        \n"
+      "mov         %0, %1                      \n"
+
+      MEMACCESS(0)
+      "vld1.8      {d0}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d1}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d2}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d3}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d4}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d5}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d6}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d7}, [%0]                  \n"
+
+      "vtrn.8      d1, d0                      \n"
+      "vtrn.8      d3, d2                      \n"
+      "vtrn.8      d5, d4                      \n"
+      "vtrn.8      d7, d6                      \n"
+
+      "vtrn.16     d1, d3                      \n"
+      "vtrn.16     d0, d2                      \n"
+      "vtrn.16     d5, d7                      \n"
+      "vtrn.16     d4, d6                      \n"
+
+      "vtrn.32     d1, d5                      \n"
+      "vtrn.32     d0, d4                      \n"
+      "vtrn.32     d3, d7                      \n"
+      "vtrn.32     d2, d6                      \n"
+
+      "vrev16.8    q0, q0                      \n"
+      "vrev16.8    q1, q1                      \n"
+      "vrev16.8    q2, q2                      \n"
+      "vrev16.8    q3, q3                      \n"
+
+      "mov         %0, %3                      \n"
+
+    MEMACCESS(0)
+      "vst1.8      {d1}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d0}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d3}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d2}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d5}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d4}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d7}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d6}, [%0]                  \n"
+
+      "add         %1, #8                      \n"  // src += 8
+      "add         %3, %3, %4, lsl #3          \n"  // dst += 8 * dst_stride
+      "subs        %5,  #8                     \n"  // w   -= 8
+      "bge         1b                          \n"
+
+    // add 8 back to counter. if the result is 0 there are
+    // no residuals.
+    "adds        %5, #8                        \n"
+    "beq         4f                            \n"
+
+    // some residual, so between 1 and 7 lines left to transpose
+    "cmp         %5, #2                        \n"
+    "blt         3f                            \n"
+
+    "cmp         %5, #4                        \n"
+    "blt         2f                            \n"
+
+    // 4x8 block
+    "mov         %0, %1                        \n"
+    MEMACCESS(0)
+    "vld1.32     {d0[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d0[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d1[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d1[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d2[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d2[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d3[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d3[1]}, [%0]                 \n"
+
+    "mov         %0, %3                        \n"
+
+    MEMACCESS(6)
+    "vld1.8      {q3}, [%6]                    \n"
+
+    "vtbl.8      d4, {d0, d1}, d6              \n"
+    "vtbl.8      d5, {d0, d1}, d7              \n"
+    "vtbl.8      d0, {d2, d3}, d6              \n"
+    "vtbl.8      d1, {d2, d3}, d7              \n"
+
+    // TODO(frkoenig): Rework shuffle above to
+    // write out with 4 instead of 8 writes.
+    MEMACCESS(0)
+    "vst1.32     {d4[0]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d4[1]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d5[0]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d5[1]}, [%0]                 \n"
+
+    "add         %0, %3, #4                    \n"
+    MEMACCESS(0)
+    "vst1.32     {d0[0]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d0[1]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d1[0]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d1[1]}, [%0]                 \n"
+
+    "add         %1, #4                        \n"  // src += 4
+    "add         %3, %3, %4, lsl #2            \n"  // dst += 4 * dst_stride
+    "subs        %5,  #4                       \n"  // w   -= 4
+    "beq         4f                            \n"
+
+    // some residual, check to see if it includes a 2x8 block,
+    // or less
+    "cmp         %5, #2                        \n"
+    "blt         3f                            \n"
+
+    // 2x8 block
+    "2:                                        \n"
+    "mov         %0, %1                        \n"
+    MEMACCESS(0)
+    "vld1.16     {d0[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d1[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d0[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d1[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d0[2]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d1[2]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d0[3]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d1[3]}, [%0]                 \n"
+
+    "vtrn.8      d0, d1                        \n"
+
+    "mov         %0, %3                        \n"
+
+    MEMACCESS(0)
+    "vst1.64     {d0}, [%0], %4                \n"
+    MEMACCESS(0)
+    "vst1.64     {d1}, [%0]                    \n"
+
+    "add         %1, #2                        \n"  // src += 2
+    "add         %3, %3, %4, lsl #1            \n"  // dst += 2 * dst_stride
+    "subs        %5,  #2                       \n"  // w   -= 2
+    "beq         4f                            \n"
+
+    // 1x8 block
+    "3:                                        \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[0]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[1]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[2]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[3]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[4]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[5]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[6]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[7]}, [%1]                 \n"
+
+    MEMACCESS(3)
+    "vst1.64     {d0}, [%3]                    \n"
+
+    "4:                                        \n"
+
+    : "+r"(src_temp),          // %0
+      "+r"(src),               // %1
+      "+r"(src_stride),        // %2
+      "+r"(dst),               // %3
+      "+r"(dst_stride),        // %4
+      "+r"(width)              // %5
+    : "r"(&kVTbl4x4Transpose)  // %6
+    : "memory", "cc", "q0", "q1", "q2", "q3"
+  );
+}
+
+static uvec8 kVTbl4x4TransposeDi =
+  { 0,  8,  1,  9,  2, 10,  3, 11,  4, 12,  5, 13,  6, 14,  7, 15 };
+
+void TransposeUVWx8_NEON(const uint8* src, int src_stride,
+                         uint8* dst_a, int dst_stride_a,
+                         uint8* dst_b, int dst_stride_b,
+                         int width) {
+  const uint8* src_temp = NULL;
+  asm volatile (
+    // loops are on blocks of 8. loop will stop when
+    // counter gets to or below 0. starting the counter
+    // at w-8 allow for this
+    "sub         %7, #8                        \n"
+
+    // handle 8x8 blocks. this should be the majority of the plane
+    ".p2align  2                               \n"
+    "1:                                        \n"
+      "mov         %0, %1                      \n"
+
+      MEMACCESS(0)
+      "vld2.8      {d0,  d1},  [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d2,  d3},  [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d4,  d5},  [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d6,  d7},  [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d16, d17}, [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d18, d19}, [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d20, d21}, [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d22, d23}, [%0]            \n"
+
+      "vtrn.8      q1, q0                      \n"
+      "vtrn.8      q3, q2                      \n"
+      "vtrn.8      q9, q8                      \n"
+      "vtrn.8      q11, q10                    \n"
+
+      "vtrn.16     q1, q3                      \n"
+      "vtrn.16     q0, q2                      \n"
+      "vtrn.16     q9, q11                     \n"
+      "vtrn.16     q8, q10                     \n"
+
+      "vtrn.32     q1, q9                      \n"
+      "vtrn.32     q0, q8                      \n"
+      "vtrn.32     q3, q11                     \n"
+      "vtrn.32     q2, q10                     \n"
+
+      "vrev16.8    q0, q0                      \n"
+      "vrev16.8    q1, q1                      \n"
+      "vrev16.8    q2, q2                      \n"
+      "vrev16.8    q3, q3                      \n"
+      "vrev16.8    q8, q8                      \n"
+      "vrev16.8    q9, q9                      \n"
+      "vrev16.8    q10, q10                    \n"
+      "vrev16.8    q11, q11                    \n"
+
+      "mov         %0, %3                      \n"
+
+    MEMACCESS(0)
+      "vst1.8      {d2},  [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d0},  [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d6},  [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d4},  [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d18}, [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d16}, [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d22}, [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d20}, [%0]                 \n"
+
+      "mov         %0, %5                      \n"
+
+    MEMACCESS(0)
+      "vst1.8      {d3},  [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d1},  [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d7},  [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d5},  [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d19}, [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d17}, [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d23}, [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d21}, [%0]                 \n"
+
+      "add         %1, #8*2                    \n"  // src   += 8*2
+      "add         %3, %3, %4, lsl #3          \n"  // dst_a += 8 * dst_stride_a
+      "add         %5, %5, %6, lsl #3          \n"  // dst_b += 8 * dst_stride_b
+      "subs        %7,  #8                     \n"  // w     -= 8
+      "bge         1b                          \n"
+
+    // add 8 back to counter. if the result is 0 there are
+    // no residuals.
+    "adds        %7, #8                        \n"
+    "beq         4f                            \n"
+
+    // some residual, so between 1 and 7 lines left to transpose
+    "cmp         %7, #2                        \n"
+    "blt         3f                            \n"
+
+    "cmp         %7, #4                        \n"
+    "blt         2f                            \n"
+
+    // TODO(frkoenig): Clean this up
+    // 4x8 block
+    "mov         %0, %1                        \n"
+    MEMACCESS(0)
+    "vld1.64     {d0}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d1}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d2}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d3}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d4}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d5}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d6}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d7}, [%0]                    \n"
+
+    MEMACCESS(8)
+    "vld1.8      {q15}, [%8]                   \n"
+
+    "vtrn.8      q0, q1                        \n"
+    "vtrn.8      q2, q3                        \n"
+
+    "vtbl.8      d16, {d0, d1}, d30            \n"
+    "vtbl.8      d17, {d0, d1}, d31            \n"
+    "vtbl.8      d18, {d2, d3}, d30            \n"
+    "vtbl.8      d19, {d2, d3}, d31            \n"
+    "vtbl.8      d20, {d4, d5}, d30            \n"
+    "vtbl.8      d21, {d4, d5}, d31            \n"
+    "vtbl.8      d22, {d6, d7}, d30            \n"
+    "vtbl.8      d23, {d6, d7}, d31            \n"
+
+    "mov         %0, %3                        \n"
+
+    MEMACCESS(0)
+    "vst1.32     {d16[0]},  [%0], %4           \n"
+    MEMACCESS(0)
+    "vst1.32     {d16[1]},  [%0], %4           \n"
+    MEMACCESS(0)
+    "vst1.32     {d17[0]},  [%0], %4           \n"
+    MEMACCESS(0)
+    "vst1.32     {d17[1]},  [%0], %4           \n"
+
+    "add         %0, %3, #4                    \n"
+    MEMACCESS(0)
+    "vst1.32     {d20[0]}, [%0], %4            \n"
+    MEMACCESS(0)
+    "vst1.32     {d20[1]}, [%0], %4            \n"
+    MEMACCESS(0)
+    "vst1.32     {d21[0]}, [%0], %4            \n"
+    MEMACCESS(0)
+    "vst1.32     {d21[1]}, [%0]                \n"
+
+    "mov         %0, %5                        \n"
+
+    MEMACCESS(0)
+    "vst1.32     {d18[0]}, [%0], %6            \n"
+    MEMACCESS(0)
+    "vst1.32     {d18[1]}, [%0], %6            \n"
+    MEMACCESS(0)
+    "vst1.32     {d19[0]}, [%0], %6            \n"
+    MEMACCESS(0)
+    "vst1.32     {d19[1]}, [%0], %6            \n"
+
+    "add         %0, %5, #4                    \n"
+    MEMACCESS(0)
+    "vst1.32     {d22[0]},  [%0], %6           \n"
+    MEMACCESS(0)
+    "vst1.32     {d22[1]},  [%0], %6           \n"
+    MEMACCESS(0)
+    "vst1.32     {d23[0]},  [%0], %6           \n"
+    MEMACCESS(0)
+    "vst1.32     {d23[1]},  [%0]               \n"
+
+    "add         %1, #4*2                      \n"  // src   += 4 * 2
+    "add         %3, %3, %4, lsl #2            \n"  // dst_a += 4 * dst_stride_a
+    "add         %5, %5, %6, lsl #2            \n"  // dst_b += 4 * dst_stride_b
+    "subs        %7,  #4                       \n"  // w     -= 4
+    "beq         4f                            \n"
+
+    // some residual, check to see if it includes a 2x8 block,
+    // or less
+    "cmp         %7, #2                        \n"
+    "blt         3f                            \n"
+
+    // 2x8 block
+    "2:                                        \n"
+    "mov         %0, %1                        \n"
+    MEMACCESS(0)
+    "vld2.16     {d0[0], d2[0]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d1[0], d3[0]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d0[1], d2[1]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d1[1], d3[1]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d0[2], d2[2]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d1[2], d3[2]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d0[3], d2[3]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d1[3], d3[3]}, [%0]          \n"
+
+    "vtrn.8      d0, d1                        \n"
+    "vtrn.8      d2, d3                        \n"
+
+    "mov         %0, %3                        \n"
+
+    MEMACCESS(0)
+    "vst1.64     {d0}, [%0], %4                \n"
+    MEMACCESS(0)
+    "vst1.64     {d2}, [%0]                    \n"
+
+    "mov         %0, %5                        \n"
+
+    MEMACCESS(0)
+    "vst1.64     {d1}, [%0], %6                \n"
+    MEMACCESS(0)
+    "vst1.64     {d3}, [%0]                    \n"
+
+    "add         %1, #2*2                      \n"  // src   += 2 * 2
+    "add         %3, %3, %4, lsl #1            \n"  // dst_a += 2 * dst_stride_a
+    "add         %5, %5, %6, lsl #1            \n"  // dst_b += 2 * dst_stride_b
+    "subs        %7,  #2                       \n"  // w     -= 2
+    "beq         4f                            \n"
+
+    // 1x8 block
+    "3:                                        \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[0], d1[0]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[1], d1[1]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[2], d1[2]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[3], d1[3]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[4], d1[4]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[5], d1[5]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[6], d1[6]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[7], d1[7]}, [%1]          \n"
+
+    MEMACCESS(3)
+    "vst1.64     {d0}, [%3]                    \n"
+    MEMACCESS(5)
+    "vst1.64     {d1}, [%5]                    \n"
+
+    "4:                                        \n"
+
+    : "+r"(src_temp),            // %0
+      "+r"(src),                 // %1
+      "+r"(src_stride),          // %2
+      "+r"(dst_a),               // %3
+      "+r"(dst_stride_a),        // %4
+      "+r"(dst_b),               // %5
+      "+r"(dst_stride_b),        // %6
+      "+r"(width)                // %7
+    : "r"(&kVTbl4x4TransposeDi)  // %8
+    : "memory", "cc",
+      "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
+  );
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/rotate_neon64.cc b/src/main/jni/libyuv/source/rotate_neon64.cc
new file mode 100644
index 000000000..b080a2c6a
--- /dev/null
+++ b/src/main/jni/libyuv/source/rotate_neon64.cc
@@ -0,0 +1,540 @@
+/*
+ *  Copyright 2014 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+//this ifdef should be removed if TransposeWx8_NEON's aarch64 has
+//been done
+#ifdef HAS_TRANSPOSE_WX8_NEON
+static uvec8 kVTbl4x4Transpose =
+  { 0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15 };
+
+void TransposeWx8_NEON(const uint8* src, int src_stride,
+                       uint8* dst, int dst_stride,
+                       int width) {
+  const uint8* src_temp = NULL;
+  asm volatile (
+    // loops are on blocks of 8. loop will stop when
+    // counter gets to or below 0. starting the counter
+    // at w-8 allow for this
+    "sub         %5, #8                        \n"
+
+    // handle 8x8 blocks. this should be the majority of the plane
+    ".p2align  2                               \n"
+    "1:                                        \n"
+      "mov         %0, %1                      \n"
+
+      MEMACCESS(0)
+      "vld1.8      {d0}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d1}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d2}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d3}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d4}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d5}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d6}, [%0], %2              \n"
+      MEMACCESS(0)
+      "vld1.8      {d7}, [%0]                  \n"
+
+      "vtrn.8      d1, d0                      \n"
+      "vtrn.8      d3, d2                      \n"
+      "vtrn.8      d5, d4                      \n"
+      "vtrn.8      d7, d6                      \n"
+
+      "vtrn.16     d1, d3                      \n"
+      "vtrn.16     d0, d2                      \n"
+      "vtrn.16     d5, d7                      \n"
+      "vtrn.16     d4, d6                      \n"
+
+      "vtrn.32     d1, d5                      \n"
+      "vtrn.32     d0, d4                      \n"
+      "vtrn.32     d3, d7                      \n"
+      "vtrn.32     d2, d6                      \n"
+
+      "vrev16.8    q0, q0                      \n"
+      "vrev16.8    q1, q1                      \n"
+      "vrev16.8    q2, q2                      \n"
+      "vrev16.8    q3, q3                      \n"
+
+      "mov         %0, %3                      \n"
+
+    MEMACCESS(0)
+      "vst1.8      {d1}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d0}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d3}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d2}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d5}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d4}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d7}, [%0], %4              \n"
+    MEMACCESS(0)
+      "vst1.8      {d6}, [%0]                  \n"
+
+      "add         %1, #8                      \n"  // src += 8
+      "add         %3, %3, %4, lsl #3          \n"  // dst += 8 * dst_stride
+      "subs        %5,  #8                     \n"  // w   -= 8
+      "bge         1b                          \n"
+
+    // add 8 back to counter. if the result is 0 there are
+    // no residuals.
+    "adds        %5, #8                        \n"
+    "beq         4f                            \n"
+
+    // some residual, so between 1 and 7 lines left to transpose
+    "cmp         %5, #2                        \n"
+    "blt         3f                            \n"
+
+    "cmp         %5, #4                        \n"
+    "blt         2f                            \n"
+
+    // 4x8 block
+    "mov         %0, %1                        \n"
+    MEMACCESS(0)
+    "vld1.32     {d0[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d0[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d1[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d1[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d2[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d2[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d3[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.32     {d3[1]}, [%0]                 \n"
+
+    "mov         %0, %3                        \n"
+
+    MEMACCESS(6)
+    "vld1.8      {q3}, [%6]                    \n"
+
+    "vtbl.8      d4, {d0, d1}, d6              \n"
+    "vtbl.8      d5, {d0, d1}, d7              \n"
+    "vtbl.8      d0, {d2, d3}, d6              \n"
+    "vtbl.8      d1, {d2, d3}, d7              \n"
+
+    // TODO(frkoenig): Rework shuffle above to
+    // write out with 4 instead of 8 writes.
+    MEMACCESS(0)
+    "vst1.32     {d4[0]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d4[1]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d5[0]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d5[1]}, [%0]                 \n"
+
+    "add         %0, %3, #4                    \n"
+    MEMACCESS(0)
+    "vst1.32     {d0[0]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d0[1]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d1[0]}, [%0], %4             \n"
+    MEMACCESS(0)
+    "vst1.32     {d1[1]}, [%0]                 \n"
+
+    "add         %1, #4                        \n"  // src += 4
+    "add         %3, %3, %4, lsl #2            \n"  // dst += 4 * dst_stride
+    "subs        %5,  #4                       \n"  // w   -= 4
+    "beq         4f                            \n"
+
+    // some residual, check to see if it includes a 2x8 block,
+    // or less
+    "cmp         %5, #2                        \n"
+    "blt         3f                            \n"
+
+    // 2x8 block
+    "2:                                        \n"
+    "mov         %0, %1                        \n"
+    MEMACCESS(0)
+    "vld1.16     {d0[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d1[0]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d0[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d1[1]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d0[2]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d1[2]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d0[3]}, [%0], %2             \n"
+    MEMACCESS(0)
+    "vld1.16     {d1[3]}, [%0]                 \n"
+
+    "vtrn.8      d0, d1                        \n"
+
+    "mov         %0, %3                        \n"
+
+    MEMACCESS(0)
+    "vst1.64     {d0}, [%0], %4                \n"
+    MEMACCESS(0)
+    "vst1.64     {d1}, [%0]                    \n"
+
+    "add         %1, #2                        \n"  // src += 2
+    "add         %3, %3, %4, lsl #1            \n"  // dst += 2 * dst_stride
+    "subs        %5,  #2                       \n"  // w   -= 2
+    "beq         4f                            \n"
+
+    // 1x8 block
+    "3:                                        \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[0]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[1]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[2]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[3]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[4]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[5]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[6]}, [%1], %2             \n"
+    MEMACCESS(1)
+    "vld1.8      {d0[7]}, [%1]                 \n"
+
+    MEMACCESS(3)
+    "vst1.64     {d0}, [%3]                    \n"
+
+    "4:                                        \n"
+
+    : "+r"(src_temp),          // %0
+      "+r"(src),               // %1
+      "+r"(src_stride),        // %2
+      "+r"(dst),               // %3
+      "+r"(dst_stride),        // %4
+      "+r"(width)              // %5
+    : "r"(&kVTbl4x4Transpose)  // %6
+    : "memory", "cc", "q0", "q1", "q2", "q3"
+  );
+}
+#endif //HAS_TRANSPOSE_WX8_NEON
+
+//this ifdef should be removed if TransposeUVWx8_NEON's aarch64 has
+//been done
+#ifdef HAS_TRANSPOSE_UVWX8_NEON
+static uvec8 kVTbl4x4TransposeDi =
+  { 0,  8,  1,  9,  2, 10,  3, 11,  4, 12,  5, 13,  6, 14,  7, 15 };
+
+void TransposeUVWx8_NEON(const uint8* src, int src_stride,
+                         uint8* dst_a, int dst_stride_a,
+                         uint8* dst_b, int dst_stride_b,
+                         int width) {
+  const uint8* src_temp = NULL;
+  asm volatile (
+    // loops are on blocks of 8. loop will stop when
+    // counter gets to or below 0. starting the counter
+    // at w-8 allow for this
+    "sub         %7, #8                        \n"
+
+    // handle 8x8 blocks. this should be the majority of the plane
+    ".p2align  2                               \n"
+    "1:                                        \n"
+      "mov         %0, %1                      \n"
+
+      MEMACCESS(0)
+      "vld2.8      {d0,  d1},  [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d2,  d3},  [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d4,  d5},  [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d6,  d7},  [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d16, d17}, [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d18, d19}, [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d20, d21}, [%0], %2        \n"
+      MEMACCESS(0)
+      "vld2.8      {d22, d23}, [%0]            \n"
+
+      "vtrn.8      q1, q0                      \n"
+      "vtrn.8      q3, q2                      \n"
+      "vtrn.8      q9, q8                      \n"
+      "vtrn.8      q11, q10                    \n"
+
+      "vtrn.16     q1, q3                      \n"
+      "vtrn.16     q0, q2                      \n"
+      "vtrn.16     q9, q11                     \n"
+      "vtrn.16     q8, q10                     \n"
+
+      "vtrn.32     q1, q9                      \n"
+      "vtrn.32     q0, q8                      \n"
+      "vtrn.32     q3, q11                     \n"
+      "vtrn.32     q2, q10                     \n"
+
+      "vrev16.8    q0, q0                      \n"
+      "vrev16.8    q1, q1                      \n"
+      "vrev16.8    q2, q2                      \n"
+      "vrev16.8    q3, q3                      \n"
+      "vrev16.8    q8, q8                      \n"
+      "vrev16.8    q9, q9                      \n"
+      "vrev16.8    q10, q10                    \n"
+      "vrev16.8    q11, q11                    \n"
+
+      "mov         %0, %3                      \n"
+
+    MEMACCESS(0)
+      "vst1.8      {d2},  [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d0},  [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d6},  [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d4},  [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d18}, [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d16}, [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d22}, [%0], %4             \n"
+    MEMACCESS(0)
+      "vst1.8      {d20}, [%0]                 \n"
+
+      "mov         %0, %5                      \n"
+
+    MEMACCESS(0)
+      "vst1.8      {d3},  [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d1},  [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d7},  [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d5},  [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d19}, [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d17}, [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d23}, [%0], %6             \n"
+    MEMACCESS(0)
+      "vst1.8      {d21}, [%0]                 \n"
+
+      "add         %1, #8*2                    \n"  // src   += 8*2
+      "add         %3, %3, %4, lsl #3          \n"  // dst_a += 8 * dst_stride_a
+      "add         %5, %5, %6, lsl #3          \n"  // dst_b += 8 * dst_stride_b
+      "subs        %7,  #8                     \n"  // w     -= 8
+      "bge         1b                          \n"
+
+    // add 8 back to counter. if the result is 0 there are
+    // no residuals.
+    "adds        %7, #8                        \n"
+    "beq         4f                            \n"
+
+    // some residual, so between 1 and 7 lines left to transpose
+    "cmp         %7, #2                        \n"
+    "blt         3f                            \n"
+
+    "cmp         %7, #4                        \n"
+    "blt         2f                            \n"
+
+    // TODO(frkoenig): Clean this up
+    // 4x8 block
+    "mov         %0, %1                        \n"
+    MEMACCESS(0)
+    "vld1.64     {d0}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d1}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d2}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d3}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d4}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d5}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d6}, [%0], %2                \n"
+    MEMACCESS(0)
+    "vld1.64     {d7}, [%0]                    \n"
+
+    MEMACCESS(8)
+    "vld1.8      {q15}, [%8]                   \n"
+
+    "vtrn.8      q0, q1                        \n"
+    "vtrn.8      q2, q3                        \n"
+
+    "vtbl.8      d16, {d0, d1}, d30            \n"
+    "vtbl.8      d17, {d0, d1}, d31            \n"
+    "vtbl.8      d18, {d2, d3}, d30            \n"
+    "vtbl.8      d19, {d2, d3}, d31            \n"
+    "vtbl.8      d20, {d4, d5}, d30            \n"
+    "vtbl.8      d21, {d4, d5}, d31            \n"
+    "vtbl.8      d22, {d6, d7}, d30            \n"
+    "vtbl.8      d23, {d6, d7}, d31            \n"
+
+    "mov         %0, %3                        \n"
+
+    MEMACCESS(0)
+    "vst1.32     {d16[0]},  [%0], %4           \n"
+    MEMACCESS(0)
+    "vst1.32     {d16[1]},  [%0], %4           \n"
+    MEMACCESS(0)
+    "vst1.32     {d17[0]},  [%0], %4           \n"
+    MEMACCESS(0)
+    "vst1.32     {d17[1]},  [%0], %4           \n"
+
+    "add         %0, %3, #4                    \n"
+    MEMACCESS(0)
+    "vst1.32     {d20[0]}, [%0], %4            \n"
+    MEMACCESS(0)
+    "vst1.32     {d20[1]}, [%0], %4            \n"
+    MEMACCESS(0)
+    "vst1.32     {d21[0]}, [%0], %4            \n"
+    MEMACCESS(0)
+    "vst1.32     {d21[1]}, [%0]                \n"
+
+    "mov         %0, %5                        \n"
+
+    MEMACCESS(0)
+    "vst1.32     {d18[0]}, [%0], %6            \n"
+    MEMACCESS(0)
+    "vst1.32     {d18[1]}, [%0], %6            \n"
+    MEMACCESS(0)
+    "vst1.32     {d19[0]}, [%0], %6            \n"
+    MEMACCESS(0)
+    "vst1.32     {d19[1]}, [%0], %6            \n"
+
+    "add         %0, %5, #4                    \n"
+    MEMACCESS(0)
+    "vst1.32     {d22[0]},  [%0], %6           \n"
+    MEMACCESS(0)
+    "vst1.32     {d22[1]},  [%0], %6           \n"
+    MEMACCESS(0)
+    "vst1.32     {d23[0]},  [%0], %6           \n"
+    MEMACCESS(0)
+    "vst1.32     {d23[1]},  [%0]               \n"
+
+    "add         %1, #4*2                      \n"  // src   += 4 * 2
+    "add         %3, %3, %4, lsl #2            \n"  // dst_a += 4 * dst_stride_a
+    "add         %5, %5, %6, lsl #2            \n"  // dst_b += 4 * dst_stride_b
+    "subs        %7,  #4                       \n"  // w     -= 4
+    "beq         4f                            \n"
+
+    // some residual, check to see if it includes a 2x8 block,
+    // or less
+    "cmp         %7, #2                        \n"
+    "blt         3f                            \n"
+
+    // 2x8 block
+    "2:                                        \n"
+    "mov         %0, %1                        \n"
+    MEMACCESS(0)
+    "vld2.16     {d0[0], d2[0]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d1[0], d3[0]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d0[1], d2[1]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d1[1], d3[1]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d0[2], d2[2]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d1[2], d3[2]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d0[3], d2[3]}, [%0], %2      \n"
+    MEMACCESS(0)
+    "vld2.16     {d1[3], d3[3]}, [%0]          \n"
+
+    "vtrn.8      d0, d1                        \n"
+    "vtrn.8      d2, d3                        \n"
+
+    "mov         %0, %3                        \n"
+
+    MEMACCESS(0)
+    "vst1.64     {d0}, [%0], %4                \n"
+    MEMACCESS(0)
+    "vst1.64     {d2}, [%0]                    \n"
+
+    "mov         %0, %5                        \n"
+
+    MEMACCESS(0)
+    "vst1.64     {d1}, [%0], %6                \n"
+    MEMACCESS(0)
+    "vst1.64     {d3}, [%0]                    \n"
+
+    "add         %1, #2*2                      \n"  // src   += 2 * 2
+    "add         %3, %3, %4, lsl #1            \n"  // dst_a += 2 * dst_stride_a
+    "add         %5, %5, %6, lsl #1            \n"  // dst_b += 2 * dst_stride_b
+    "subs        %7,  #2                       \n"  // w     -= 2
+    "beq         4f                            \n"
+
+    // 1x8 block
+    "3:                                        \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[0], d1[0]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[1], d1[1]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[2], d1[2]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[3], d1[3]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[4], d1[4]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[5], d1[5]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[6], d1[6]}, [%1], %2      \n"
+    MEMACCESS(1)
+    "vld2.8      {d0[7], d1[7]}, [%1]          \n"
+
+    MEMACCESS(3)
+    "vst1.64     {d0}, [%3]                    \n"
+    MEMACCESS(5)
+    "vst1.64     {d1}, [%5]                    \n"
+
+    "4:                                        \n"
+
+    : "+r"(src_temp),            // %0
+      "+r"(src),                 // %1
+      "+r"(src_stride),          // %2
+      "+r"(dst_a),               // %3
+      "+r"(dst_stride_a),        // %4
+      "+r"(dst_b),               // %5
+      "+r"(dst_stride_b),        // %6
+      "+r"(width)                // %7
+    : "r"(&kVTbl4x4TransposeDi)  // %8
+    : "memory", "cc",
+      "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
+  );
+}
+#endif // HAS_TRANSPOSE_UVWX8_NEON
+#endif // __aarch64__
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/row_any.cc b/src/main/jni/libyuv/source/row_any.cc
new file mode 100644
index 000000000..aaa0378d7
--- /dev/null
+++ b/src/main/jni/libyuv/source/row_any.cc
@@ -0,0 +1,602 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels.
+// TODO(fbarchard): Consider 'any' functions handling odd alignment.
+// YUV to RGB does multiple of 8 with SIMD and remainder with C.
+#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK)        \
+    void NAMEANY(const uint8* y_buf,                                           \
+                 const uint8* u_buf,                                           \
+                 const uint8* v_buf,                                           \
+                 uint8* rgb_buf,                                               \
+                 int width) {                                                  \
+      int n = width & ~MASK;                                                   \
+      I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n);                         \
+      I420TORGB_C(y_buf + n,                                                   \
+                  u_buf + (n >> UV_SHIFT),                                     \
+                  v_buf + (n >> UV_SHIFT),                                     \
+                  rgb_buf + n * BPP, width & MASK);                            \
+    }
+
+#ifdef HAS_I422TOARGBROW_SSSE3
+YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
+     1, 4, 7)
+#endif  // HAS_I422TOARGBROW_SSSE3
+#ifdef HAS_I444TOARGBROW_SSSE3
+YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
+     0, 4, 7)
+YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
+     2, 4, 7)
+YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
+     1, 4, 7)
+YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
+     1, 4, 7)
+YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
+     1, 4, 7)
+// I422ToRGB565Row_SSSE3 is unaligned.
+YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C,
+     1, 2, 7)
+YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C,
+     1, 2, 7)
+YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C,
+     1, 2, 7)
+// I422ToRGB24Row_SSSE3 is unaligned.
+YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
+YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
+YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
+YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
+#endif  // HAS_I444TOARGBROW_SSSE3
+#ifdef HAS_I422TOARGBROW_AVX2
+YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
+#endif  // HAS_I422TOARGBROW_AVX2
+#ifdef HAS_I422TOARGBROW_NEON
+YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
+YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
+YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7)
+YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
+YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
+YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
+YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7)
+YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7)
+YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C,
+     1, 2, 7)
+YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
+     1, 2, 7)
+YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
+#endif  // HAS_I422TOARGBROW_NEON
+#ifdef HAS_I422TOYUY2ROW_NEON
+YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
+#endif  // HAS_I422TOYUY2ROW_NEON
+#ifdef HAS_I422TOUYVYROW_NEON
+YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
+#endif  // HAS_I422TOUYVYROW_NEON
+#undef YANY
+
+// Wrappers to handle odd width
+#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP)             \
+    void NAMEANY(const uint8* y_buf,                                           \
+                 const uint8* uv_buf,                                          \
+                 uint8* rgb_buf,                                               \
+                 int width) {                                                  \
+      int n = width & ~7;                                                      \
+      NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n);                               \
+      NV12TORGB_C(y_buf + n,                                                   \
+                  uv_buf + (n >> UV_SHIFT),                                    \
+                  rgb_buf + n * BPP, width & 7);                               \
+    }
+
+#ifdef HAS_NV12TOARGBROW_SSSE3
+NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
+      0, 4)
+NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
+      0, 4)
+#endif  // HAS_NV12TOARGBROW_SSSE3
+#ifdef HAS_NV12TOARGBROW_NEON
+NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
+NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
+#endif  // HAS_NV12TOARGBROW_NEON
+#ifdef HAS_NV12TORGB565ROW_SSSE3
+NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C,
+      0, 2)
+NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C,
+      0, 2)
+#endif  // HAS_NV12TORGB565ROW_SSSE3
+#ifdef HAS_NV12TORGB565ROW_NEON
+NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2)
+NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
+#endif  // HAS_NV12TORGB565ROW_NEON
+#undef NVANY
+
+#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP)          \
+    void NAMEANY(const uint8* src,                                             \
+                 uint8* dst,                                                   \
+                 int width) {                                                  \
+      int n = width & ~MASK;                                                   \
+      ARGBTORGB_SIMD(src, dst, n);                                             \
+      ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK);                \
+    }
+
+#if defined(HAS_ARGBTORGB24ROW_SSSE3)
+RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C,
+       15, 4, 3)
+RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C,
+       15, 4, 3)
+RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C,
+       3, 4, 2)
+RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C,
+       3, 4, 2)
+RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
+       3, 4, 2)
+#endif
+#if defined(HAS_I400TOARGBROW_SSE2)
+RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
+       7, 1, 4)
+#endif
+#if defined(HAS_YTOARGBROW_SSE2)
+RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C,
+       7, 1, 4)
+RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
+       15, 2, 4)
+RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
+       15, 2, 4)
+// These require alignment on ARGB, so C is used for remainder.
+RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
+       15, 3, 4)
+RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C,
+       15, 3, 4)
+RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C,
+       7, 2, 4)
+RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C,
+       7, 2, 4)
+RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C,
+       7, 2, 4)
+#endif
+#if defined(HAS_ARGBTORGB24ROW_NEON)
+RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
+RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3)
+RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C,
+       7, 4, 2)
+RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C,
+       7, 4, 2)
+RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
+       7, 4, 2)
+RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C,
+       7, 1, 4)
+RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C,
+       7, 1, 4)
+RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C,
+       7, 2, 4)
+RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
+       7, 2, 4)
+#endif
+#undef RGBANY
+
+// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
+#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP)        \
+    void NAMEANY(const uint8* src,                                             \
+                 uint8* dst, uint32 selector,                                  \
+                 int width) {                                                  \
+      int n = width & ~MASK;                                                   \
+      ARGBTORGB_SIMD(src, dst, selector, n);                                   \
+      ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK);      \
+    }
+
+#if defined(HAS_ARGBTOBAYERROW_SSSE3)
+BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
+         7, 4, 1)
+#endif
+#if defined(HAS_ARGBTOBAYERROW_NEON)
+BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
+         7, 4, 1)
+#endif
+#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
+BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C,
+         7, 4, 1)
+#endif
+#if defined(HAS_ARGBTOBAYERGGROW_NEON)
+BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C,
+         7, 4, 1)
+#endif
+
+#undef BAYERANY
+
+// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
+#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM)                            \
+    void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) {             \
+      ARGBTOY_SIMD(src_argb, dst_y, width - NUM);                              \
+      ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP,                            \
+                   dst_y + (width - NUM) * BPP, NUM);                          \
+    }
+
+#ifdef HAS_ARGBTOYROW_AVX2
+YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
+YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 4, 1, 32)
+YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
+YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
+#endif
+#ifdef HAS_ARGBTOYROW_SSSE3
+YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
+#endif
+#ifdef HAS_BGRATOYROW_SSSE3
+YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
+YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
+YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
+YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
+YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
+#endif
+#ifdef HAS_ARGBTOYJROW_SSSE3
+YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16)
+#endif
+#ifdef HAS_ARGBTOYROW_NEON
+YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
+#endif
+#ifdef HAS_ARGBTOYJROW_NEON
+YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 4, 1, 8)
+#endif
+#ifdef HAS_BGRATOYROW_NEON
+YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8)
+#endif
+#ifdef HAS_ABGRTOYROW_NEON
+YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8)
+#endif
+#ifdef HAS_RGBATOYROW_NEON
+YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8)
+#endif
+#ifdef HAS_RGB24TOYROW_NEON
+YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8)
+#endif
+#ifdef HAS_RAWTOYROW_NEON
+YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
+#endif
+#ifdef HAS_RGB565TOYROW_NEON
+YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
+#endif
+#ifdef HAS_ARGB1555TOYROW_NEON
+YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
+#endif
+#ifdef HAS_ARGB4444TOYROW_NEON
+YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
+#endif
+#ifdef HAS_YUY2TOYROW_NEON
+YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
+#endif
+#ifdef HAS_UYVYTOYROW_NEON
+YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
+#endif
+#ifdef HAS_RGB24TOARGBROW_NEON
+YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
+#endif
+#ifdef HAS_RAWTOARGBROW_NEON
+YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
+#endif
+#ifdef HAS_RGB565TOARGBROW_NEON
+YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
+#endif
+#ifdef HAS_ARGB1555TOARGBROW_NEON
+YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
+#endif
+#ifdef HAS_ARGB4444TOARGBROW_NEON
+YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
+#endif
+#undef YANY
+
+#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK)                \
+    void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) {             \
+      int n = width & ~MASK;                                                   \
+      ARGBTOY_SIMD(src_argb, dst_y, n);                                        \
+      ARGBTOY_C(src_argb + n * SBPP,                                           \
+                dst_y  + n * BPP, width & MASK);                               \
+    }
+
+// Attenuate is destructive so last16 method can not be used due to overlap.
+#ifdef HAS_ARGBATTENUATEROW_SSSE3
+YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
+     4, 4, 3)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_SSE2
+YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C,
+     4, 4, 3)
+#endif
+#ifdef HAS_ARGBUNATTENUATEROW_SSE2
+YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C,
+     4, 4, 3)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_AVX2
+YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C,
+     4, 4, 7)
+#endif
+#ifdef HAS_ARGBUNATTENUATEROW_AVX2
+YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C,
+     4, 4, 7)
+#endif
+#ifdef HAS_ARGBATTENUATEROW_NEON
+YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
+     4, 4, 7)
+#endif
+#undef YANY
+
+// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
+#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK)                     \
+    void NAMEANY(const uint8* src_argb, int src_stride_argb,                   \
+                 uint8* dst_u, uint8* dst_v, int width) {                      \
+      int n = width & ~MASK;                                                   \
+      ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n);                \
+      ANYTOUV_C(src_argb  + n * BPP, src_stride_argb,                          \
+                dst_u + (n >> 1),                                              \
+                dst_v + (n >> 1),                                              \
+                width & MASK);                                                 \
+    }
+
+#ifdef HAS_ARGBTOUVROW_AVX2
+UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31)
+UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31)
+UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
+#endif
+#ifdef HAS_ARGBTOUVROW_SSSE3
+UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
+UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C,
+      4, 15)
+UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
+UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
+UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
+UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15)
+UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_ARGBTOUVROW_NEON
+UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
+#endif
+#ifdef HAS_ARGBTOUVJROW_NEON
+UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15)
+#endif
+#ifdef HAS_BGRATOUVROW_NEON
+UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
+#endif
+#ifdef HAS_ABGRTOUVROW_NEON
+UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
+#endif
+#ifdef HAS_RGBATOUVROW_NEON
+UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
+#endif
+#ifdef HAS_RGB24TOUVROW_NEON
+UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15)
+#endif
+#ifdef HAS_RAWTOUVROW_NEON
+UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15)
+#endif
+#ifdef HAS_RGB565TOUVROW_NEON
+UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_ARGB1555TOUVROW_NEON
+UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_ARGB4444TOUVROW_NEON
+UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_YUY2TOUVROW_NEON
+UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
+#endif
+#ifdef HAS_UYVYTOUVROW_NEON
+UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
+#endif
+#undef UVANY
+
+#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT)           \
+    void NAMEANY(const uint8* src_uv,                                          \
+                 uint8* dst_u, uint8* dst_v, int width) {                      \
+      int n = width & ~MASK;                                                   \
+      ANYTOUV_SIMD(src_uv, dst_u, dst_v, n);                                   \
+      ANYTOUV_C(src_uv  + n * BPP,                                             \
+                dst_u + (n >> SHIFT),                                          \
+                dst_v + (n >> SHIFT),                                          \
+                width & MASK);                                                 \
+    }
+
+#ifdef HAS_ARGBTOUV444ROW_SSSE3
+UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
+         ARGBToUV444Row_C, 4, 15, 0)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_AVX2
+UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2,
+         YUY2ToUV422Row_C, 2, 31, 1)
+UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2,
+         UYVYToUV422Row_C, 2, 31, 1)
+#endif
+#ifdef HAS_ARGBTOUVROW_SSSE3
+UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
+         ARGBToUV422Row_C, 4, 15, 1)
+UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
+         YUY2ToUV422Row_C, 2, 15, 1)
+UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,
+         UYVYToUV422Row_C, 2, 15, 1)
+#endif
+#ifdef HAS_YUY2TOUV422ROW_NEON
+UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON,
+         ARGBToUV444Row_C, 4, 7, 0)
+UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON,
+         ARGBToUV422Row_C, 4, 15, 1)
+UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON,
+         ARGBToUV411Row_C, 4, 31, 2)
+UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,
+         YUY2ToUV422Row_C, 2, 15, 1)
+UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
+         UYVYToUV422Row_C, 2, 15, 1)
+#endif
+#undef UV422ANY
+
+#define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK)                  \
+    void NAMEANY(const uint8* src_uv,                                          \
+                 uint8* dst_u, uint8* dst_v, int width) {                      \
+      int n = width & ~MASK;                                                   \
+      ANYTOUV_SIMD(src_uv, dst_u, dst_v, n);                                   \
+      ANYTOUV_C(src_uv + n * 2,                                                \
+                dst_u + n,                                                     \
+                dst_v + n,                                                     \
+                width & MASK);                                                 \
+    }
+
+#ifdef HAS_SPLITUVROW_SSE2
+SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
+#endif
+#ifdef HAS_SPLITUVROW_AVX2
+SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31)
+#endif
+#ifdef HAS_SPLITUVROW_NEON
+SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
+#endif
+#ifdef HAS_SPLITUVROW_MIPS_DSPR2
+SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
+              SplitUVRow_C, 15)
+#endif
+#undef SPLITUVROWANY
+
+#define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK)                 \
+    void NAMEANY(const uint8* src_u, const uint8* src_v,                       \
+                 uint8* dst_uv, int width) {                                   \
+      int n = width & ~MASK;                                                   \
+      ANYTOUV_SIMD(src_u, src_v, dst_uv, n);                                   \
+      ANYTOUV_C(src_u + n,                                                     \
+                src_v + n,                                                     \
+                dst_uv + n * 2,                                                \
+                width & MASK);                                                 \
+    }
+
+#ifdef HAS_MERGEUVROW_SSE2
+MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
+#endif
+#ifdef HAS_MERGEUVROW_AVX2
+MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31)
+#endif
+#ifdef HAS_MERGEUVROW_NEON
+MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
+#endif
+#undef MERGEUVROW_ANY
+
+#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK)                  \
+    void NAMEANY(const uint8* src_argb0, const uint8* src_argb1,               \
+                 uint8* dst_argb, int width) {                                 \
+      int n = width & ~MASK;                                                   \
+      ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n);                        \
+      ARGBMATH_C(src_argb0 + n * 4,                                            \
+                 src_argb1 + n * 4,                                            \
+                 dst_argb + n * 4,                                             \
+                 width & MASK);                                                \
+    }
+
+#ifdef HAS_ARGBMULTIPLYROW_SSE2
+MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C,
+            3)
+#endif
+#ifdef HAS_ARGBADDROW_SSE2
+MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3)
+#endif
+#ifdef HAS_ARGBSUBTRACTROW_SSE2
+MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C,
+            3)
+#endif
+#ifdef HAS_ARGBMULTIPLYROW_AVX2
+MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C,
+            7)
+#endif
+#ifdef HAS_ARGBADDROW_AVX2
+MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7)
+#endif
+#ifdef HAS_ARGBSUBTRACTROW_AVX2
+MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C,
+            7)
+#endif
+#ifdef HAS_ARGBMULTIPLYROW_NEON
+MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C,
+            7)
+#endif
+#ifdef HAS_ARGBADDROW_NEON
+MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7)
+#endif
+#ifdef HAS_ARGBSUBTRACTROW_NEON
+MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C,
+            7)
+#endif
+#undef MATHROW_ANY
+
+// Shuffle may want to work in place, so last16 method can not be used.
+#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK)                \
+    void NAMEANY(const uint8* src_argb, uint8* dst_argb,                       \
+                 const uint8* shuffler, int width) {                           \
+      int n = width & ~MASK;                                                   \
+      ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n);                           \
+      ARGBTOY_C(src_argb + n * SBPP,                                           \
+                dst_argb  + n * BPP, shuffler, width & MASK);                  \
+    }
+
+#ifdef HAS_ARGBSHUFFLEROW_SSE2
+YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2,
+     ARGBShuffleRow_C, 4, 4, 3)
+#endif
+#ifdef HAS_ARGBSHUFFLEROW_SSSE3
+YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3,
+     ARGBShuffleRow_C, 4, 4, 7)
+#endif
+#ifdef HAS_ARGBSHUFFLEROW_AVX2
+YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2,
+     ARGBShuffleRow_C, 4, 4, 15)
+#endif
+#ifdef HAS_ARGBSHUFFLEROW_NEON
+YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON,
+     ARGBShuffleRow_C, 4, 4, 3)
+#endif
+#undef YANY
+
+// Interpolate may want to work in place, so last16 method can not be used.
+#define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK)                      \
+    void NAMEANY(uint8* dst_ptr, const uint8* src_ptr,                         \
+                 ptrdiff_t src_stride_ptr, int width,                          \
+                 int source_y_fraction) {                                      \
+      int n = width & ~MASK;                                                   \
+      TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr,                              \
+                n, source_y_fraction);                                         \
+      TERP_C(dst_ptr + n * BPP,                                                \
+             src_ptr + n * SBPP, src_stride_ptr,                               \
+             width & MASK, source_y_fraction);                                 \
+    }
+
+#ifdef HAS_INTERPOLATEROW_AVX2
+NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2,
+     InterpolateRow_C, 1, 1, 32)
+#endif
+#ifdef HAS_INTERPOLATEROW_SSSE3
+NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3,
+     InterpolateRow_C, 1, 1, 15)
+#endif
+#ifdef HAS_INTERPOLATEROW_SSE2
+NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2,
+     InterpolateRow_C, 1, 1, 15)
+#endif
+#ifdef HAS_INTERPOLATEROW_NEON
+NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON,
+     InterpolateRow_C, 1, 1, 15)
+#endif
+#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
+NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2,
+     InterpolateRow_C, 1, 1, 3)
+#endif
+#undef NANY
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/row_common.cc b/src/main/jni/libyuv/source/row_common.cc
new file mode 100644
index 000000000..fa2b752a2
--- /dev/null
+++ b/src/main/jni/libyuv/source/row_common.cc
@@ -0,0 +1,2286 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#include <string.h>  // For memcpy and memset.
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// llvm x86 is poor at ternary operator, so use branchless min/max.
+
+#define USE_BRANCHLESS 1
+#if USE_BRANCHLESS
+static __inline int32 clamp0(int32 v) {
+  return ((-(v) >> 31) & (v));
+}
+
+static __inline int32 clamp255(int32 v) {
+  return (((255 - (v)) >> 31) | (v)) & 255;
+}
+
+static __inline uint32 Clamp(int32 val) {
+  int v = clamp0(val);
+  return (uint32)(clamp255(v));
+}
+
+static __inline uint32 Abs(int32 v) {
+  int m = v >> 31;
+  return (v + m) ^ m;
+}
+#else  // USE_BRANCHLESS
+static __inline int32 clamp0(int32 v) {
+  return (v < 0) ? 0 : v;
+}
+
+static __inline int32 clamp255(int32 v) {
+  return (v > 255) ? 255 : v;
+}
+
+static __inline uint32 Clamp(int32 val) {
+  int v = clamp0(val);
+  return (uint32)(clamp255(v));
+}
+
+static __inline uint32 Abs(int32 v) {
+  return (v < 0) ? -v : v;
+}
+#endif  // USE_BRANCHLESS
+
+#ifdef LIBYUV_LITTLE_ENDIAN
+#define WRITEWORD(p, v) *(uint32*)(p) = v
+#else
+static inline void WRITEWORD(uint8* p, uint32 v) {
+  p[0] = (uint8)(v & 255);
+  p[1] = (uint8)((v >> 8) & 255);
+  p[2] = (uint8)((v >> 16) & 255);
+  p[3] = (uint8)((v >> 24) & 255);
+}
+#endif
+
+void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_rgb24[0];
+    uint8 g = src_rgb24[1];
+    uint8 r = src_rgb24[2];
+    dst_argb[0] = b;
+    dst_argb[1] = g;
+    dst_argb[2] = r;
+    dst_argb[3] = 255u;
+    dst_argb += 4;
+    src_rgb24 += 3;
+  }
+}
+
+void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 r = src_raw[0];
+    uint8 g = src_raw[1];
+    uint8 b = src_raw[2];
+    dst_argb[0] = b;
+    dst_argb[1] = g;
+    dst_argb[2] = r;
+    dst_argb[3] = 255u;
+    dst_argb += 4;
+    src_raw += 3;
+  }
+}
+
+void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_rgb565[0] & 0x1f;
+    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+    uint8 r = src_rgb565[1] >> 3;
+    dst_argb[0] = (b << 3) | (b >> 2);
+    dst_argb[1] = (g << 2) | (g >> 4);
+    dst_argb[2] = (r << 3) | (r >> 2);
+    dst_argb[3] = 255u;
+    dst_argb += 4;
+    src_rgb565 += 2;
+  }
+}
+
+void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
+                         int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_argb1555[0] & 0x1f;
+    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
+    uint8 a = src_argb1555[1] >> 7;
+    dst_argb[0] = (b << 3) | (b >> 2);
+    dst_argb[1] = (g << 3) | (g >> 2);
+    dst_argb[2] = (r << 3) | (r >> 2);
+    dst_argb[3] = -a;
+    dst_argb += 4;
+    src_argb1555 += 2;
+  }
+}
+
+void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
+                         int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_argb4444[0] & 0x0f;
+    uint8 g = src_argb4444[0] >> 4;
+    uint8 r = src_argb4444[1] & 0x0f;
+    uint8 a = src_argb4444[1] >> 4;
+    dst_argb[0] = (b << 4) | b;
+    dst_argb[1] = (g << 4) | g;
+    dst_argb[2] = (r << 4) | r;
+    dst_argb[3] = (a << 4) | a;
+    dst_argb += 4;
+    src_argb4444 += 2;
+  }
+}
+
+void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_argb[0];
+    uint8 g = src_argb[1];
+    uint8 r = src_argb[2];
+    dst_rgb[0] = b;
+    dst_rgb[1] = g;
+    dst_rgb[2] = r;
+    dst_rgb += 3;
+    src_argb += 4;
+  }
+}
+
+void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_argb[0];
+    uint8 g = src_argb[1];
+    uint8 r = src_argb[2];
+    dst_rgb[0] = r;
+    dst_rgb[1] = g;
+    dst_rgb[2] = b;
+    dst_rgb += 3;
+    src_argb += 4;
+  }
+}
+
+void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint8 b0 = src_argb[0] >> 3;
+    uint8 g0 = src_argb[1] >> 2;
+    uint8 r0 = src_argb[2] >> 3;
+    uint8 b1 = src_argb[4] >> 3;
+    uint8 g1 = src_argb[5] >> 2;
+    uint8 r1 = src_argb[6] >> 3;
+    WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
+              (b1 << 16) | (g1 << 21) | (r1 << 27));
+    dst_rgb += 4;
+    src_argb += 8;
+  }
+  if (width & 1) {
+    uint8 b0 = src_argb[0] >> 3;
+    uint8 g0 = src_argb[1] >> 2;
+    uint8 r0 = src_argb[2] >> 3;
+    *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
+  }
+}
+
+void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint8 b0 = src_argb[0] >> 3;
+    uint8 g0 = src_argb[1] >> 3;
+    uint8 r0 = src_argb[2] >> 3;
+    uint8 a0 = src_argb[3] >> 7;
+    uint8 b1 = src_argb[4] >> 3;
+    uint8 g1 = src_argb[5] >> 3;
+    uint8 r1 = src_argb[6] >> 3;
+    uint8 a1 = src_argb[7] >> 7;
+    *(uint32*)(dst_rgb) =
+        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
+        (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
+    dst_rgb += 4;
+    src_argb += 8;
+  }
+  if (width & 1) {
+    uint8 b0 = src_argb[0] >> 3;
+    uint8 g0 = src_argb[1] >> 3;
+    uint8 r0 = src_argb[2] >> 3;
+    uint8 a0 = src_argb[3] >> 7;
+    *(uint16*)(dst_rgb) =
+        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
+  }
+}
+
+void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint8 b0 = src_argb[0] >> 4;
+    uint8 g0 = src_argb[1] >> 4;
+    uint8 r0 = src_argb[2] >> 4;
+    uint8 a0 = src_argb[3] >> 4;
+    uint8 b1 = src_argb[4] >> 4;
+    uint8 g1 = src_argb[5] >> 4;
+    uint8 r1 = src_argb[6] >> 4;
+    uint8 a1 = src_argb[7] >> 4;
+    *(uint32*)(dst_rgb) =
+        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
+        (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
+    dst_rgb += 4;
+    src_argb += 8;
+  }
+  if (width & 1) {
+    uint8 b0 = src_argb[0] >> 4;
+    uint8 g0 = src_argb[1] >> 4;
+    uint8 r0 = src_argb[2] >> 4;
+    uint8 a0 = src_argb[3] >> 4;
+    *(uint16*)(dst_rgb) =
+        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
+  }
+}
+
+static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
+  return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
+}
+
+static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
+  return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
+}
+static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
+  return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
+}
+
+#define MAKEROWY(NAME, R, G, B, BPP) \
+void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
+  int x;                                                                       \
+  for (x = 0; x < width; ++x) {                                                \
+    dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
+    src_argb0 += BPP;                                                          \
+    dst_y += 1;                                                                \
+  }                                                                            \
+}                                                                              \
+void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
+                       uint8* dst_u, uint8* dst_v, int width) {                \
+  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
+  int x;                                                                       \
+  for (x = 0; x < width - 1; x += 2) {                                         \
+    uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
+               src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
+    uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
+               src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
+    uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
+               src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
+    dst_u[0] = RGBToU(ar, ag, ab);                                             \
+    dst_v[0] = RGBToV(ar, ag, ab);                                             \
+    src_rgb0 += BPP * 2;                                                       \
+    src_rgb1 += BPP * 2;                                                       \
+    dst_u += 1;                                                                \
+    dst_v += 1;                                                                \
+  }                                                                            \
+  if (width & 1) {                                                             \
+    uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
+    uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
+    uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
+    dst_u[0] = RGBToU(ar, ag, ab);                                             \
+    dst_v[0] = RGBToV(ar, ag, ab);                                             \
+  }                                                                            \
+}
+
+MAKEROWY(ARGB, 2, 1, 0, 4)
+MAKEROWY(BGRA, 1, 2, 3, 4)
+MAKEROWY(ABGR, 0, 1, 2, 4)
+MAKEROWY(RGBA, 3, 2, 1, 4)
+MAKEROWY(RGB24, 2, 1, 0, 3)
+MAKEROWY(RAW, 0, 1, 2, 3)
+#undef MAKEROWY
+
+// JPeg uses a variation on BT.601-1 full range
+// y =  0.29900 * r + 0.58700 * g + 0.11400 * b
+// u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
+// v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
+// BT.601 Mpeg range uses:
+// b 0.1016 * 255 = 25.908 = 25
+// g 0.5078 * 255 = 129.489 = 129
+// r 0.2578 * 255 = 65.739 = 66
+// JPeg 8 bit Y (not used):
+// b 0.11400 * 256 = 29.184 = 29
+// g 0.58700 * 256 = 150.272 = 150
+// r 0.29900 * 256 = 76.544 = 77
+// JPeg 7 bit Y:
+// b 0.11400 * 128 = 14.592 = 15
+// g 0.58700 * 128 = 75.136 = 75
+// r 0.29900 * 128 = 38.272 = 38
+// JPeg 8 bit U:
+// b  0.50000 * 255 = 127.5 = 127
+// g -0.33126 * 255 = -84.4713 = -84
+// r -0.16874 * 255 = -43.0287 = -43
+// JPeg 8 bit V:
+// b -0.08131 * 255 = -20.73405 = -20
+// g -0.41869 * 255 = -106.76595 = -107
+// r  0.50000 * 255 = 127.5 = 127
+
+static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
+  return (38 * r + 75 * g +  15 * b + 64) >> 7;
+}
+
+static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
+  return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
+}
+static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
+  return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
+}
+
+#define AVGB(a, b) (((a) + (b) + 1) >> 1)
+
+#define MAKEROWYJ(NAME, R, G, B, BPP) \
+void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
+  int x;                                                                       \
+  for (x = 0; x < width; ++x) {                                                \
+    dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
+    src_argb0 += BPP;                                                          \
+    dst_y += 1;                                                                \
+  }                                                                            \
+}                                                                              \
+void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
+                        uint8* dst_u, uint8* dst_v, int width) {               \
+  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
+  int x;                                                                       \
+  for (x = 0; x < width - 1; x += 2) {                                         \
+    uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
+                    AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
+    uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
+                    AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
+    uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
+                    AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
+    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
+    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
+    src_rgb0 += BPP * 2;                                                       \
+    src_rgb1 += BPP * 2;                                                       \
+    dst_u += 1;                                                                \
+    dst_v += 1;                                                                \
+  }                                                                            \
+  if (width & 1) {                                                             \
+    uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
+    uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
+    uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
+    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
+    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
+  }                                                                            \
+}
+
+MAKEROWYJ(ARGB, 2, 1, 0, 4)
+#undef MAKEROWYJ
+
+void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_rgb565[0] & 0x1f;
+    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+    uint8 r = src_rgb565[1] >> 3;
+    b = (b << 3) | (b >> 2);
+    g = (g << 2) | (g >> 4);
+    r = (r << 3) | (r >> 2);
+    dst_y[0] = RGBToY(r, g, b);
+    src_rgb565 += 2;
+    dst_y += 1;
+  }
+}
+
+void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_argb1555[0] & 0x1f;
+    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
+    b = (b << 3) | (b >> 2);
+    g = (g << 3) | (g >> 2);
+    r = (r << 3) | (r >> 2);
+    dst_y[0] = RGBToY(r, g, b);
+    src_argb1555 += 2;
+    dst_y += 1;
+  }
+}
+
+void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 b = src_argb4444[0] & 0x0f;
+    uint8 g = src_argb4444[0] >> 4;
+    uint8 r = src_argb4444[1] & 0x0f;
+    b = (b << 4) | b;
+    g = (g << 4) | g;
+    r = (r << 4) | r;
+    dst_y[0] = RGBToY(r, g, b);
+    src_argb4444 += 2;
+    dst_y += 1;
+  }
+}
+
+void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
+                     uint8* dst_u, uint8* dst_v, int width) {
+  const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint8 b0 = src_rgb565[0] & 0x1f;
+    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+    uint8 r0 = src_rgb565[1] >> 3;
+    uint8 b1 = src_rgb565[2] & 0x1f;
+    uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
+    uint8 r1 = src_rgb565[3] >> 3;
+    uint8 b2 = next_rgb565[0] & 0x1f;
+    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
+    uint8 r2 = next_rgb565[1] >> 3;
+    uint8 b3 = next_rgb565[2] & 0x1f;
+    uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
+    uint8 r3 = next_rgb565[3] >> 3;
+    uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
+    uint8 g = (g0 + g1 + g2 + g3);
+    uint8 r = (r0 + r1 + r2 + r3);
+    b = (b << 1) | (b >> 6);  // 787 -> 888.
+    r = (r << 1) | (r >> 6);
+    dst_u[0] = RGBToU(r, g, b);
+    dst_v[0] = RGBToV(r, g, b);
+    src_rgb565 += 4;
+    next_rgb565 += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+  if (width & 1) {
+    uint8 b0 = src_rgb565[0] & 0x1f;
+    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
+    uint8 r0 = src_rgb565[1] >> 3;
+    uint8 b2 = next_rgb565[0] & 0x1f;
+    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
+    uint8 r2 = next_rgb565[1] >> 3;
+    uint8 b = (b0 + b2);  // 565 * 2 = 676.
+    uint8 g = (g0 + g2);
+    uint8 r = (r0 + r2);
+    b = (b << 2) | (b >> 4);  // 676 -> 888
+    g = (g << 1) | (g >> 6);
+    r = (r << 2) | (r >> 4);
+    dst_u[0] = RGBToU(r, g, b);
+    dst_v[0] = RGBToV(r, g, b);
+  }
+}
+
+void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint8 b0 = src_argb1555[0] & 0x1f;
+    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
+    uint8 b1 = src_argb1555[2] & 0x1f;
+    uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
+    uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
+    uint8 b2 = next_argb1555[0] & 0x1f;
+    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
+    uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
+    uint8 b3 = next_argb1555[2] & 0x1f;
+    uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
+    uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
+    uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
+    uint8 g = (g0 + g1 + g2 + g3);
+    uint8 r = (r0 + r1 + r2 + r3);
+    b = (b << 1) | (b >> 6);  // 777 -> 888.
+    g = (g << 1) | (g >> 6);
+    r = (r << 1) | (r >> 6);
+    dst_u[0] = RGBToU(r, g, b);
+    dst_v[0] = RGBToV(r, g, b);
+    src_argb1555 += 4;
+    next_argb1555 += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+  if (width & 1) {
+    uint8 b0 = src_argb1555[0] & 0x1f;
+    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
+    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
+    uint8 b2 = next_argb1555[0] & 0x1f;
+    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
+    uint8 r2 = next_argb1555[1] >> 3;
+    uint8 b = (b0 + b2);  // 555 * 2 = 666.
+    uint8 g = (g0 + g2);
+    uint8 r = (r0 + r2);
+    b = (b << 2) | (b >> 4);  // 666 -> 888.
+    g = (g << 2) | (g >> 4);
+    r = (r << 2) | (r >> 4);
+    dst_u[0] = RGBToU(r, g, b);
+    dst_v[0] = RGBToV(r, g, b);
+  }
+}
+
+void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint8 b0 = src_argb4444[0] & 0x0f;
+    uint8 g0 = src_argb4444[0] >> 4;
+    uint8 r0 = src_argb4444[1] & 0x0f;
+    uint8 b1 = src_argb4444[2] & 0x0f;
+    uint8 g1 = src_argb4444[2] >> 4;
+    uint8 r1 = src_argb4444[3] & 0x0f;
+    uint8 b2 = next_argb4444[0] & 0x0f;
+    uint8 g2 = next_argb4444[0] >> 4;
+    uint8 r2 = next_argb4444[1] & 0x0f;
+    uint8 b3 = next_argb4444[2] & 0x0f;
+    uint8 g3 = next_argb4444[2] >> 4;
+    uint8 r3 = next_argb4444[3] & 0x0f;
+    uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
+    uint8 g = (g0 + g1 + g2 + g3);
+    uint8 r = (r0 + r1 + r2 + r3);
+    b = (b << 2) | (b >> 4);  // 666 -> 888.
+    g = (g << 2) | (g >> 4);
+    r = (r << 2) | (r >> 4);
+    dst_u[0] = RGBToU(r, g, b);
+    dst_v[0] = RGBToV(r, g, b);
+    src_argb4444 += 4;
+    next_argb4444 += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+  if (width & 1) {
+    uint8 b0 = src_argb4444[0] & 0x0f;
+    uint8 g0 = src_argb4444[0] >> 4;
+    uint8 r0 = src_argb4444[1] & 0x0f;
+    uint8 b2 = next_argb4444[0] & 0x0f;
+    uint8 g2 = next_argb4444[0] >> 4;
+    uint8 r2 = next_argb4444[1] & 0x0f;
+    uint8 b = (b0 + b2);  // 444 * 2 = 555.
+    uint8 g = (g0 + g2);
+    uint8 r = (r0 + r2);
+    b = (b << 3) | (b >> 2);  // 555 -> 888.
+    g = (g << 3) | (g >> 2);
+    r = (r << 3) | (r >> 2);
+    dst_u[0] = RGBToU(r, g, b);
+    dst_v[0] = RGBToV(r, g, b);
+  }
+}
+
+void ARGBToUV444Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 ab = src_argb[0];
+    uint8 ag = src_argb[1];
+    uint8 ar = src_argb[2];
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+    src_argb += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+}
+
+void ARGBToUV422Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
+    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
+    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+    src_argb += 8;
+    dst_u += 1;
+    dst_v += 1;
+  }
+  if (width & 1) {
+    uint8 ab = src_argb[0];
+    uint8 ag = src_argb[1];
+    uint8 ar = src_argb[2];
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+  }
+}
+
+void ARGBToUV411Row_C(const uint8* src_argb,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  int x;
+  for (x = 0; x < width - 3; x += 4) {
+    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
+    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
+    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+    src_argb += 16;
+    dst_u += 1;
+    dst_v += 1;
+  }
+  if ((width & 3) == 3) {
+    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
+    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
+    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+  } else if ((width & 3) == 2) {
+    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
+    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
+    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+  } else if ((width & 3) == 1) {
+    uint8 ab = src_argb[0];
+    uint8 ag = src_argb[1];
+    uint8 ar = src_argb[2];
+    dst_u[0] = RGBToU(ar, ag, ab);
+    dst_v[0] = RGBToV(ar, ag, ab);
+  }
+}
+
+void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
+    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
+    dst_argb[3] = src_argb[3];
+    dst_argb += 4;
+    src_argb += 4;
+  }
+}
+
+// Convert a row of image to Sepia tone.
+void ARGBSepiaRow_C(uint8* dst_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    int b = dst_argb[0];
+    int g = dst_argb[1];
+    int r = dst_argb[2];
+    int sb = (b * 17 + g * 68 + r * 35) >> 7;
+    int sg = (b * 22 + g * 88 + r * 45) >> 7;
+    int sr = (b * 24 + g * 98 + r * 50) >> 7;
+    // b does not over flow. a is preserved from original.
+    dst_argb[0] = sb;
+    dst_argb[1] = clamp255(sg);
+    dst_argb[2] = clamp255(sr);
+    dst_argb += 4;
+  }
+}
+
+// Apply color matrix to a row of image. Matrix is signed.
+// TODO(fbarchard): Consider adding rounding (+32).
+void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
+                          const int8* matrix_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    int b = src_argb[0];
+    int g = src_argb[1];
+    int r = src_argb[2];
+    int a = src_argb[3];
+    int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
+              r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
+    int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
+              r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
+    int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
+              r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
+    int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
+              r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
+    dst_argb[0] = Clamp(sb);
+    dst_argb[1] = Clamp(sg);
+    dst_argb[2] = Clamp(sr);
+    dst_argb[3] = Clamp(sa);
+    src_argb += 4;
+    dst_argb += 4;
+  }
+}
+
+// Apply color table to a row of image.
+void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    int b = dst_argb[0];
+    int g = dst_argb[1];
+    int r = dst_argb[2];
+    int a = dst_argb[3];
+    dst_argb[0] = table_argb[b * 4 + 0];
+    dst_argb[1] = table_argb[g * 4 + 1];
+    dst_argb[2] = table_argb[r * 4 + 2];
+    dst_argb[3] = table_argb[a * 4 + 3];
+    dst_argb += 4;
+  }
+}
+
+// Apply color table to a row of image.
+void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    int b = dst_argb[0];
+    int g = dst_argb[1];
+    int r = dst_argb[2];
+    dst_argb[0] = table_argb[b * 4 + 0];
+    dst_argb[1] = table_argb[g * 4 + 1];
+    dst_argb[2] = table_argb[r * 4 + 2];
+    dst_argb += 4;
+  }
+}
+
+void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
+                       int interval_offset, int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    int b = dst_argb[0];
+    int g = dst_argb[1];
+    int r = dst_argb[2];
+    dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
+    dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
+    dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
+    dst_argb += 4;
+  }
+}
+
+#define REPEAT8(v) (v) | ((v) << 8)
+#define SHADE(f, v) v * f >> 24
+
+void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
+                    uint32 value) {
+  const uint32 b_scale = REPEAT8(value & 0xff);
+  const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
+  const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
+  const uint32 a_scale = REPEAT8(value >> 24);
+
+  int i;
+  for (i = 0; i < width; ++i) {
+    const uint32 b = REPEAT8(src_argb[0]);
+    const uint32 g = REPEAT8(src_argb[1]);
+    const uint32 r = REPEAT8(src_argb[2]);
+    const uint32 a = REPEAT8(src_argb[3]);
+    dst_argb[0] = SHADE(b, b_scale);
+    dst_argb[1] = SHADE(g, g_scale);
+    dst_argb[2] = SHADE(r, r_scale);
+    dst_argb[3] = SHADE(a, a_scale);
+    src_argb += 4;
+    dst_argb += 4;
+  }
+}
+#undef REPEAT8
+#undef SHADE
+
+#define REPEAT8(v) (v) | ((v) << 8)
+#define SHADE(f, v) v * f >> 16
+
+void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
+                       uint8* dst_argb, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    const uint32 b = REPEAT8(src_argb0[0]);
+    const uint32 g = REPEAT8(src_argb0[1]);
+    const uint32 r = REPEAT8(src_argb0[2]);
+    const uint32 a = REPEAT8(src_argb0[3]);
+    const uint32 b_scale = src_argb1[0];
+    const uint32 g_scale = src_argb1[1];
+    const uint32 r_scale = src_argb1[2];
+    const uint32 a_scale = src_argb1[3];
+    dst_argb[0] = SHADE(b, b_scale);
+    dst_argb[1] = SHADE(g, g_scale);
+    dst_argb[2] = SHADE(r, r_scale);
+    dst_argb[3] = SHADE(a, a_scale);
+    src_argb0 += 4;
+    src_argb1 += 4;
+    dst_argb += 4;
+  }
+}
+#undef REPEAT8
+#undef SHADE
+
+#define SHADE(f, v) clamp255(v + f)
+
+void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
+                  uint8* dst_argb, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    const int b = src_argb0[0];
+    const int g = src_argb0[1];
+    const int r = src_argb0[2];
+    const int a = src_argb0[3];
+    const int b_add = src_argb1[0];
+    const int g_add = src_argb1[1];
+    const int r_add = src_argb1[2];
+    const int a_add = src_argb1[3];
+    dst_argb[0] = SHADE(b, b_add);
+    dst_argb[1] = SHADE(g, g_add);
+    dst_argb[2] = SHADE(r, r_add);
+    dst_argb[3] = SHADE(a, a_add);
+    src_argb0 += 4;
+    src_argb1 += 4;
+    dst_argb += 4;
+  }
+}
+#undef SHADE
+
+#define SHADE(f, v) clamp0(f - v)
+
+void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
+                       uint8* dst_argb, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    const int b = src_argb0[0];
+    const int g = src_argb0[1];
+    const int r = src_argb0[2];
+    const int a = src_argb0[3];
+    const int b_sub = src_argb1[0];
+    const int g_sub = src_argb1[1];
+    const int r_sub = src_argb1[2];
+    const int a_sub = src_argb1[3];
+    dst_argb[0] = SHADE(b, b_sub);
+    dst_argb[1] = SHADE(g, g_sub);
+    dst_argb[2] = SHADE(r, r_sub);
+    dst_argb[3] = SHADE(a, a_sub);
+    src_argb0 += 4;
+    src_argb1 += 4;
+    dst_argb += 4;
+  }
+}
+#undef SHADE
+
+// Sobel functions which mimics SSSE3.
+void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
+                 uint8* dst_sobelx, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    int a = src_y0[i];
+    int b = src_y1[i];
+    int c = src_y2[i];
+    int a_sub = src_y0[i + 2];
+    int b_sub = src_y1[i + 2];
+    int c_sub = src_y2[i + 2];
+    int a_diff = a - a_sub;
+    int b_diff = b - b_sub;
+    int c_diff = c - c_sub;
+    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
+    dst_sobelx[i] = (uint8)(clamp255(sobel));
+  }
+}
+
+void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
+                 uint8* dst_sobely, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    int a = src_y0[i + 0];
+    int b = src_y0[i + 1];
+    int c = src_y0[i + 2];
+    int a_sub = src_y1[i + 0];
+    int b_sub = src_y1[i + 1];
+    int c_sub = src_y1[i + 2];
+    int a_diff = a - a_sub;
+    int b_diff = b - b_sub;
+    int c_diff = c - c_sub;
+    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
+    dst_sobely[i] = (uint8)(clamp255(sobel));
+  }
+}
+
+void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
+                uint8* dst_argb, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    int r = src_sobelx[i];
+    int b = src_sobely[i];
+    int s = clamp255(r + b);
+    dst_argb[0] = (uint8)(s);
+    dst_argb[1] = (uint8)(s);
+    dst_argb[2] = (uint8)(s);
+    dst_argb[3] = (uint8)(255u);
+    dst_argb += 4;
+  }
+}
+
+void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
+                       uint8* dst_y, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    int r = src_sobelx[i];
+    int b = src_sobely[i];
+    int s = clamp255(r + b);
+    dst_y[i] = (uint8)(s);
+  }
+}
+
+void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
+                  uint8* dst_argb, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    int r = src_sobelx[i];
+    int b = src_sobely[i];
+    int g = clamp255(r + b);
+    dst_argb[0] = (uint8)(b);
+    dst_argb[1] = (uint8)(g);
+    dst_argb[2] = (uint8)(r);
+    dst_argb[3] = (uint8)(255u);
+    dst_argb += 4;
+  }
+}
+
+void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
+  // Copy a Y to RGB.
+  int x;
+  for (x = 0; x < width; ++x) {
+    uint8 y = src_y[0];
+    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
+    dst_argb[3] = 255u;
+    dst_argb += 4;
+    ++src_y;
+  }
+}
+
+// C reference code that mimics the YUV assembly.
+
+#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
+
+#define UB 127 /* min(63,(int8)(2.018 * 64)) */
+#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
+#define UR 0
+
+#define VB 0
+#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
+#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
+
+// Bias
+#define BB UB * 128 + VB * 128
+#define BG UG * 128 + VG * 128
+#define BR UR * 128 + VR * 128
+
+static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
+                              uint8* b, uint8* g, uint8* r) {
+  int32 y1 = ((int32)(y) - 16) * YG;
+  *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
+  *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
+  *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
+}
+
+#if !defined(LIBYUV_DISABLE_NEON) && \
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
+// C mimic assembly.
+// TODO(fbarchard): Remove subsampling from Neon.
+void I444ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
+    uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
+    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+    src_y += 2;
+    src_u += 2;
+    src_v += 2;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+  }
+}
+#else
+void I444ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    src_y += 1;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 4;  // Advance 1 pixel.
+  }
+}
+#endif
+// Also used for 420
+void I422ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    YuvPixel(src_y[1], src_u[0], src_v[0],
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+  }
+}
+
+void I422ToRGB24Row_C(const uint8* src_y,
+                      const uint8* src_u,
+                      const uint8* src_v,
+                      uint8* rgb_buf,
+                      int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    YuvPixel(src_y[1], src_u[0], src_v[0],
+             rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 6;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+  }
+}
+
+void I422ToRAWRow_C(const uint8* src_y,
+                    const uint8* src_u,
+                    const uint8* src_v,
+                    uint8* rgb_buf,
+                    int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+    YuvPixel(src_y[1], src_u[0], src_v[0],
+             rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 6;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+  }
+}
+
+void I422ToARGB4444Row_C(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb4444,
+                         int width) {
+  uint8 b0;
+  uint8 g0;
+  uint8 r0;
+  uint8 b1;
+  uint8 g1;
+  uint8 r1;
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+    b0 = b0 >> 4;
+    g0 = g0 >> 4;
+    r0 = r0 >> 4;
+    b1 = b1 >> 4;
+    g1 = g1 >> 4;
+    r1 = r1 >> 4;
+    *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
+        (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    dst_argb4444 += 4;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    b0 = b0 >> 4;
+    g0 = g0 >> 4;
+    r0 = r0 >> 4;
+    *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
+        0xf000;
+  }
+}
+
+void I422ToARGB1555Row_C(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_argb1555,
+                         int width) {
+  uint8 b0;
+  uint8 g0;
+  uint8 r0;
+  uint8 b1;
+  uint8 g1;
+  uint8 r1;
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+    b0 = b0 >> 3;
+    g0 = g0 >> 3;
+    r0 = r0 >> 3;
+    b1 = b1 >> 3;
+    g1 = g1 >> 3;
+    r1 = r1 >> 3;
+    *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
+        (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    dst_argb1555 += 4;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    b0 = b0 >> 3;
+    g0 = g0 >> 3;
+    r0 = r0 >> 3;
+    *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
+        0x8000;
+  }
+}
+
+void I422ToRGB565Row_C(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
+                       uint8* dst_rgb565,
+                       int width) {
+  uint8 b0;
+  uint8 g0;
+  uint8 r0;
+  uint8 b1;
+  uint8 g1;
+  uint8 r1;
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+    b0 = b0 >> 3;
+    g0 = g0 >> 2;
+    r0 = r0 >> 3;
+    b1 = b1 >> 3;
+    g1 = g1 >> 2;
+    r1 = r1 >> 3;
+    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
+        (b1 << 16) | (g1 << 21) | (r1 << 27);
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    dst_rgb565 += 4;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+    b0 = b0 >> 3;
+    g0 = g0 >> 2;
+    r0 = r0 >> 3;
+    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+  }
+}
+
+void I411ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 3; x += 4) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    YuvPixel(src_y[1], src_u[0], src_v[0],
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+    YuvPixel(src_y[2], src_u[0], src_v[0],
+             rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
+    rgb_buf[11] = 255;
+    YuvPixel(src_y[3], src_u[0], src_v[0],
+             rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
+    rgb_buf[15] = 255;
+    src_y += 4;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 16;  // Advance 4 pixels.
+  }
+  if (width & 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    YuvPixel(src_y[1], src_u[0], src_v[0],
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+    src_y += 2;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+  }
+}
+
+void NV12ToARGBRow_C(const uint8* src_y,
+                     const uint8* usrc_v,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+    src_y += 2;
+    usrc_v += 2;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+  }
+}
+
+void NV21ToARGBRow_C(const uint8* src_y,
+                     const uint8* src_vu,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_vu[1], src_vu[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+
+    YuvPixel(src_y[1], src_vu[1], src_vu[0],
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+
+    src_y += 2;
+    src_vu += 2;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_vu[1], src_vu[0],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+  }
+}
+
+void NV12ToRGB565Row_C(const uint8* src_y,
+                       const uint8* usrc_v,
+                       uint8* dst_rgb565,
+                       int width) {
+  uint8 b0;
+  uint8 g0;
+  uint8 r0;
+  uint8 b1;
+  uint8 g1;
+  uint8 r1;
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
+    YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
+    b0 = b0 >> 3;
+    g0 = g0 >> 2;
+    r0 = r0 >> 3;
+    b1 = b1 >> 3;
+    g1 = g1 >> 2;
+    r1 = r1 >> 3;
+    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
+        (b1 << 16) | (g1 << 21) | (r1 << 27);
+    src_y += 2;
+    usrc_v += 2;
+    dst_rgb565 += 4;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
+    b0 = b0 >> 3;
+    g0 = g0 >> 2;
+    r0 = r0 >> 3;
+    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+  }
+}
+
+void NV21ToRGB565Row_C(const uint8* src_y,
+                       const uint8* vsrc_u,
+                       uint8* dst_rgb565,
+                       int width) {
+  uint8 b0;
+  uint8 g0;
+  uint8 r0;
+  uint8 b1;
+  uint8 g1;
+  uint8 r1;
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
+    YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
+    b0 = b0 >> 3;
+    g0 = g0 >> 2;
+    r0 = r0 >> 3;
+    b1 = b1 >> 3;
+    g1 = g1 >> 2;
+    r1 = r1 >> 3;
+    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
+        (b1 << 16) | (g1 << 21) | (r1 << 27);
+    src_y += 2;
+    vsrc_u += 2;
+    dst_rgb565 += 4;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
+    b0 = b0 >> 3;
+    g0 = g0 >> 2;
+    r0 = r0 >> 3;
+    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
+  }
+}
+
+void YUY2ToARGBRow_C(const uint8* src_yuy2,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+    src_yuy2 += 4;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+  }
+}
+
+void UYVYToARGBRow_C(const uint8* src_uyvy,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+    src_uyvy += 4;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+  }
+}
+
+void I422ToBGRARow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
+    rgb_buf[0] = 255;
+    YuvPixel(src_y[1], src_u[0], src_v[0],
+             rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
+    rgb_buf[4] = 255;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
+    rgb_buf[0] = 255;
+  }
+}
+
+void I422ToABGRRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+    rgb_buf[3] = 255;
+    YuvPixel(src_y[1], src_u[0], src_v[0],
+             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
+    rgb_buf[7] = 255;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+    rgb_buf[3] = 255;
+  }
+}
+
+void I422ToRGBARow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* rgb_buf,
+                     int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
+    rgb_buf[0] = 255;
+    YuvPixel(src_y[1], src_u[0], src_v[0],
+             rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
+    rgb_buf[4] = 255;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], src_u[0], src_v[0],
+             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
+    rgb_buf[0] = 255;
+  }
+}
+
+void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    YuvPixel(src_y[0], 128, 128,
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+    YuvPixel(src_y[1], 128, 128,
+             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+    rgb_buf[7] = 255;
+    src_y += 2;
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+  if (width & 1) {
+    YuvPixel(src_y[0], 128, 128,
+             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+    rgb_buf[3] = 255;
+  }
+}
+
+void MirrorRow_C(const uint8* src, uint8* dst, int width) {
+  int x;
+  src += width - 1;
+  for (x = 0; x < width - 1; x += 2) {
+    dst[x] = src[0];
+    dst[x + 1] = src[-1];
+    src -= 2;
+  }
+  if (width & 1) {
+    dst[width - 1] = src[0];
+  }
+}
+
+void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
+  int x;
+  src_uv += (width - 1) << 1;
+  for (x = 0; x < width - 1; x += 2) {
+    dst_u[x] = src_uv[0];
+    dst_u[x + 1] = src_uv[-2];
+    dst_v[x] = src_uv[1];
+    dst_v[x + 1] = src_uv[-2 + 1];
+    src_uv -= 4;
+  }
+  if (width & 1) {
+    dst_u[width - 1] = src_uv[0];
+    dst_v[width - 1] = src_uv[1];
+  }
+}
+
+void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
+  int x;
+  const uint32* src32 = (const uint32*)(src);
+  uint32* dst32 = (uint32*)(dst);
+  src32 += width - 1;
+  for (x = 0; x < width - 1; x += 2) {
+    dst32[x] = src32[0];
+    dst32[x + 1] = src32[-1];
+    src32 -= 2;
+  }
+  if (width & 1) {
+    dst32[width - 1] = src32[0];
+  }
+}
+
+void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    dst_u[x] = src_uv[0];
+    dst_u[x + 1] = src_uv[2];
+    dst_v[x] = src_uv[1];
+    dst_v[x + 1] = src_uv[3];
+    src_uv += 4;
+  }
+  if (width & 1) {
+    dst_u[width - 1] = src_uv[0];
+    dst_v[width - 1] = src_uv[1];
+  }
+}
+
+void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                  int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    dst_uv[0] = src_u[x];
+    dst_uv[1] = src_v[x];
+    dst_uv[2] = src_u[x + 1];
+    dst_uv[3] = src_v[x + 1];
+    dst_uv += 4;
+  }
+  if (width & 1) {
+    dst_uv[0] = src_u[width - 1];
+    dst_uv[1] = src_v[width - 1];
+  }
+}
+
+void CopyRow_C(const uint8* src, uint8* dst, int count) {
+  memcpy(dst, src, count);
+}
+
+void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
+  memcpy(dst, src, count * 2);
+}
+
+void SetRow_C(uint8* dst, uint32 v8, int count) {
+#ifdef _MSC_VER
+  // VC will generate rep stosb.
+  int x;
+  for (x = 0; x < count; ++x) {
+    dst[x] = v8;
+  }
+#else
+  memset(dst, v8, count);
+#endif
+}
+
+void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
+                 int dst_stride, int height) {
+  int y;
+  for (y = 0; y < height; ++y) {
+    uint32* d = (uint32*)(dst);
+    int x;
+    for (x = 0; x < width; ++x) {
+      d[x] = v32;
+    }
+    dst += dst_stride;
+  }
+}
+
+// Filter 2 rows of YUY2 UV's (422) into U and V (420).
+void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
+                   uint8* dst_u, uint8* dst_v, int width) {
+  // Output a row of UV values, filtering 2 rows of YUY2.
+  int x;
+  for (x = 0; x < width; x += 2) {
+    dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
+    dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
+    src_yuy2 += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+}
+
+// Copy row of YUY2 UV's (422) into U and V (422).
+void YUY2ToUV422Row_C(const uint8* src_yuy2,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  // Output a row of UV values.
+  int x;
+  for (x = 0; x < width; x += 2) {
+    dst_u[0] = src_yuy2[1];
+    dst_v[0] = src_yuy2[3];
+    src_yuy2 += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+}
+
+// Copy row of YUY2 Y's (422) into Y (420/422).
+void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
+  // Output a row of Y values.
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    dst_y[x] = src_yuy2[0];
+    dst_y[x + 1] = src_yuy2[2];
+    src_yuy2 += 4;
+  }
+  if (width & 1) {
+    dst_y[width - 1] = src_yuy2[0];
+  }
+}
+
+// Filter 2 rows of UYVY UV's (422) into U and V (420).
+void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
+                   uint8* dst_u, uint8* dst_v, int width) {
+  // Output a row of UV values.
+  int x;
+  for (x = 0; x < width; x += 2) {
+    dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
+    dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
+    src_uyvy += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+}
+
+// Copy row of UYVY UV's (422) into U and V (422).
+void UYVYToUV422Row_C(const uint8* src_uyvy,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  // Output a row of UV values.
+  int x;
+  for (x = 0; x < width; x += 2) {
+    dst_u[0] = src_uyvy[0];
+    dst_v[0] = src_uyvy[2];
+    src_uyvy += 4;
+    dst_u += 1;
+    dst_v += 1;
+  }
+}
+
+// Copy row of UYVY Y's (422) into Y (420/422).
+void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
+  // Output a row of Y values.
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    dst_y[x] = src_uyvy[1];
+    dst_y[x + 1] = src_uyvy[3];
+    src_uyvy += 4;
+  }
+  if (width & 1) {
+    dst_y[width - 1] = src_uyvy[1];
+  }
+}
+
+#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
+
+// Blend src_argb0 over src_argb1 and store to dst_argb.
+// dst_argb may be src_argb0 or src_argb1.
+// This code mimics the SSSE3 version for better testability.
+void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
+                    uint8* dst_argb, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    uint32 fb = src_argb0[0];
+    uint32 fg = src_argb0[1];
+    uint32 fr = src_argb0[2];
+    uint32 a = src_argb0[3];
+    uint32 bb = src_argb1[0];
+    uint32 bg = src_argb1[1];
+    uint32 br = src_argb1[2];
+    dst_argb[0] = BLEND(fb, bb, a);
+    dst_argb[1] = BLEND(fg, bg, a);
+    dst_argb[2] = BLEND(fr, br, a);
+    dst_argb[3] = 255u;
+
+    fb = src_argb0[4 + 0];
+    fg = src_argb0[4 + 1];
+    fr = src_argb0[4 + 2];
+    a = src_argb0[4 + 3];
+    bb = src_argb1[4 + 0];
+    bg = src_argb1[4 + 1];
+    br = src_argb1[4 + 2];
+    dst_argb[4 + 0] = BLEND(fb, bb, a);
+    dst_argb[4 + 1] = BLEND(fg, bg, a);
+    dst_argb[4 + 2] = BLEND(fr, br, a);
+    dst_argb[4 + 3] = 255u;
+    src_argb0 += 8;
+    src_argb1 += 8;
+    dst_argb += 8;
+  }
+
+  if (width & 1) {
+    uint32 fb = src_argb0[0];
+    uint32 fg = src_argb0[1];
+    uint32 fr = src_argb0[2];
+    uint32 a = src_argb0[3];
+    uint32 bb = src_argb1[0];
+    uint32 bg = src_argb1[1];
+    uint32 br = src_argb1[2];
+    dst_argb[0] = BLEND(fb, bb, a);
+    dst_argb[1] = BLEND(fg, bg, a);
+    dst_argb[2] = BLEND(fr, br, a);
+    dst_argb[3] = 255u;
+  }
+}
+#undef BLEND
+#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
+
+// Multiply source RGB by alpha and store to destination.
+// This code mimics the SSSE3 version for better testability.
+void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
+  int i;
+  for (i = 0; i < width - 1; i += 2) {
+    uint32 b = src_argb[0];
+    uint32 g = src_argb[1];
+    uint32 r = src_argb[2];
+    uint32 a = src_argb[3];
+    dst_argb[0] = ATTENUATE(b, a);
+    dst_argb[1] = ATTENUATE(g, a);
+    dst_argb[2] = ATTENUATE(r, a);
+    dst_argb[3] = a;
+    b = src_argb[4];
+    g = src_argb[5];
+    r = src_argb[6];
+    a = src_argb[7];
+    dst_argb[4] = ATTENUATE(b, a);
+    dst_argb[5] = ATTENUATE(g, a);
+    dst_argb[6] = ATTENUATE(r, a);
+    dst_argb[7] = a;
+    src_argb += 8;
+    dst_argb += 8;
+  }
+
+  if (width & 1) {
+    const uint32 b = src_argb[0];
+    const uint32 g = src_argb[1];
+    const uint32 r = src_argb[2];
+    const uint32 a = src_argb[3];
+    dst_argb[0] = ATTENUATE(b, a);
+    dst_argb[1] = ATTENUATE(g, a);
+    dst_argb[2] = ATTENUATE(r, a);
+    dst_argb[3] = a;
+  }
+}
+#undef ATTENUATE
+
+// Divide source RGB by alpha and store to destination.
+// b = (b * 255 + (a / 2)) / a;
+// g = (g * 255 + (a / 2)) / a;
+// r = (r * 255 + (a / 2)) / a;
+// Reciprocal method is off by 1 on some values. ie 125
+// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
+#define T(a) 0x01000000 + (0x10000 / a)
+const uint32 fixed_invtbl8[256] = {
+  0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
+  T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
+  T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
+  T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
+  T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
+  T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
+  T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
+  T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
+  T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
+  T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
+  T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
+  T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
+  T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
+  T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
+  T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
+  T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
+  T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
+  T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
+  T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
+  T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
+  T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
+  T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
+  T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
+  T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
+  T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
+  T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
+  T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
+  T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
+  T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
+  T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
+  T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
+  T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
+#undef T
+
+void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    uint32 b = src_argb[0];
+    uint32 g = src_argb[1];
+    uint32 r = src_argb[2];
+    const uint32 a = src_argb[3];
+    const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
+    b = (b * ia) >> 8;
+    g = (g * ia) >> 8;
+    r = (r * ia) >> 8;
+    // Clamping should not be necessary but is free in assembly.
+    dst_argb[0] = clamp255(b);
+    dst_argb[1] = clamp255(g);
+    dst_argb[2] = clamp255(r);
+    dst_argb[3] = a;
+    src_argb += 4;
+    dst_argb += 4;
+  }
+}
+
+void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
+                               const int32* previous_cumsum, int width) {
+  int32 row_sum[4] = {0, 0, 0, 0};
+  int x;
+  for (x = 0; x < width; ++x) {
+    row_sum[0] += row[x * 4 + 0];
+    row_sum[1] += row[x * 4 + 1];
+    row_sum[2] += row[x * 4 + 2];
+    row_sum[3] += row[x * 4 + 3];
+    cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
+    cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
+    cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
+    cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
+  }
+}
+
+void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
+                                int w, int area, uint8* dst, int count) {
+  float ooa = 1.0f / area;
+  int i;
+  for (i = 0; i < count; ++i) {
+    dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
+    dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
+    dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
+    dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
+    dst += 4;
+    tl += 4;
+    bl += 4;
+  }
+}
+
+// Copy pixels from rotated source to destination row with a slope.
+LIBYUV_API
+void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
+                     uint8* dst_argb, const float* uv_dudv, int width) {
+  int i;
+  // Render a row of pixels from source into a buffer.
+  float uv[2];
+  uv[0] = uv_dudv[0];
+  uv[1] = uv_dudv[1];
+  for (i = 0; i < width; ++i) {
+    int x = (int)(uv[0]);
+    int y = (int)(uv[1]);
+    *(uint32*)(dst_argb) =
+        *(const uint32*)(src_argb + y * src_argb_stride +
+                                         x * 4);
+    dst_argb += 4;
+    uv[0] += uv_dudv[2];
+    uv[1] += uv_dudv[3];
+  }
+}
+
+// Blend 2 rows into 1 for conversions such as I422ToI420.
+void HalfRow_C(const uint8* src_uv, int src_uv_stride,
+               uint8* dst_uv, int pix) {
+  int x;
+  for (x = 0; x < pix; ++x) {
+    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
+  }
+}
+
+void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
+                  uint16* dst_uv, int pix) {
+  int x;
+  for (x = 0; x < pix; ++x) {
+    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
+  }
+}
+
+// C version 2x2 -> 2x1.
+void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
+                      ptrdiff_t src_stride,
+                      int width, int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  const uint8* src_ptr1 = src_ptr + src_stride;
+  int x;
+  if (source_y_fraction == 0) {
+    memcpy(dst_ptr, src_ptr, width);
+    return;
+  }
+  if (source_y_fraction == 128) {
+    HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
+    return;
+  }
+  for (x = 0; x < width - 1; x += 2) {
+    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
+    src_ptr += 2;
+    src_ptr1 += 2;
+    dst_ptr += 2;
+  }
+  if (width & 1) {
+    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+  }
+}
+
+void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                         ptrdiff_t src_stride,
+                         int width, int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  const uint16* src_ptr1 = src_ptr + src_stride;
+  int x;
+  if (source_y_fraction == 0) {
+    memcpy(dst_ptr, src_ptr, width * 2);
+    return;
+  }
+  if (source_y_fraction == 128) {
+    HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
+    return;
+  }
+  for (x = 0; x < width - 1; x += 2) {
+    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
+    src_ptr += 2;
+    src_ptr1 += 2;
+    dst_ptr += 2;
+  }
+  if (width & 1) {
+    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+  }
+}
+
+// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
+void ARGBToBayerRow_C(const uint8* src_argb,
+                      uint8* dst_bayer, uint32 selector, int pix) {
+  int index0 = selector & 0xff;
+  int index1 = (selector >> 8) & 0xff;
+  // Copy a row of Bayer.
+  int x;
+  for (x = 0; x < pix - 1; x += 2) {
+    dst_bayer[0] = src_argb[index0];
+    dst_bayer[1] = src_argb[index1];
+    src_argb += 8;
+    dst_bayer += 2;
+  }
+  if (pix & 1) {
+    dst_bayer[0] = src_argb[index0];
+  }
+}
+
+// Select G channel from ARGB.  e.g.  GGGGGGGG
+void ARGBToBayerGGRow_C(const uint8* src_argb,
+                        uint8* dst_bayer, uint32 selector, int pix) {
+  // Copy a row of G.
+  int x;
+  for (x = 0; x < pix - 1; x += 2) {
+    dst_bayer[0] = src_argb[1];
+    dst_bayer[1] = src_argb[5];
+    src_argb += 8;
+    dst_bayer += 2;
+  }
+  if (pix & 1) {
+    dst_bayer[0] = src_argb[1];
+  }
+}
+
+// Use first 4 shuffler values to reorder ARGB channels.
+void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
+                      const uint8* shuffler, int pix) {
+  int index0 = shuffler[0];
+  int index1 = shuffler[1];
+  int index2 = shuffler[2];
+  int index3 = shuffler[3];
+  // Shuffle a row of ARGB.
+  int x;
+  for (x = 0; x < pix; ++x) {
+    // To support in-place conversion.
+    uint8 b = src_argb[index0];
+    uint8 g = src_argb[index1];
+    uint8 r = src_argb[index2];
+    uint8 a = src_argb[index3];
+    dst_argb[0] = b;
+    dst_argb[1] = g;
+    dst_argb[2] = r;
+    dst_argb[3] = a;
+    src_argb += 4;
+    dst_argb += 4;
+  }
+}
+
+void I422ToYUY2Row_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_frame, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    dst_frame[0] = src_y[0];
+    dst_frame[1] = src_u[0];
+    dst_frame[2] = src_y[1];
+    dst_frame[3] = src_v[0];
+    dst_frame += 4;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+  }
+  if (width & 1) {
+    dst_frame[0] = src_y[0];
+    dst_frame[1] = src_u[0];
+    dst_frame[2] = src_y[0];  // duplicate last y
+    dst_frame[3] = src_v[0];
+  }
+}
+
+void I422ToUYVYRow_C(const uint8* src_y,
+                     const uint8* src_u,
+                     const uint8* src_v,
+                     uint8* dst_frame, int width) {
+  int x;
+  for (x = 0; x < width - 1; x += 2) {
+    dst_frame[0] = src_u[0];
+    dst_frame[1] = src_y[0];
+    dst_frame[2] = src_v[0];
+    dst_frame[3] = src_y[1];
+    dst_frame += 4;
+    src_y += 2;
+    src_u += 1;
+    src_v += 1;
+  }
+  if (width & 1) {
+    dst_frame[0] = src_u[0];
+    dst_frame[1] = src_y[0];
+    dst_frame[2] = src_v[0];
+    dst_frame[3] = src_y[0];  // duplicate last y
+  }
+}
+
+#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
+// row_win.cc has asm version, but GCC uses 2 step wrapper.
+#if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
+void I422ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
+                           uint8* rgb_buf,
+                           int width) {
+  // Allocate a row of ARGB.
+  align_buffer_64(row, width * 4);
+  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
+  ARGBToRGB565Row_SSE2(row, rgb_buf, width);
+  free_aligned_buffer_64(row);
+}
+#endif  // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
+
+#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
+void I422ToARGB1555Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* rgb_buf,
+                             int width) {
+  // Allocate a row of ARGB.
+  align_buffer_64(row, width * 4);
+  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
+  ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
+  free_aligned_buffer_64(row);
+}
+
+void I422ToARGB4444Row_SSSE3(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* rgb_buf,
+                             int width) {
+  // Allocate a row of ARGB.
+  align_buffer_64(row, width * 4);
+  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
+  ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
+  free_aligned_buffer_64(row);
+}
+
+void NV12ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_uv,
+                           uint8* dst_rgb565,
+                           int width) {
+  // Allocate a row of ARGB.
+  align_buffer_64(row, width * 4);
+  NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
+  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
+  free_aligned_buffer_64(row);
+}
+
+void NV21ToRGB565Row_SSSE3(const uint8* src_y,
+                           const uint8* src_vu,
+                           uint8* dst_rgb565,
+                           int width) {
+  // Allocate a row of ARGB.
+  align_buffer_64(row, width * 4);
+  NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
+  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
+  free_aligned_buffer_64(row);
+}
+
+void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
+                         uint8* dst_argb,
+                         int width) {
+  // Allocate a rows of yuv.
+  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
+  uint8* row_u = row_y + ((width + 63) & ~63);
+  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
+  YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
+  YUY2ToYRow_SSE2(src_yuy2, row_y, width);
+  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
+  free_aligned_buffer_64(row_y);
+}
+
+void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
+                                   uint8* dst_argb,
+                                   int width) {
+  // Allocate a rows of yuv.
+  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
+  uint8* row_u = row_y + ((width + 63) & ~63);
+  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
+  YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
+  YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
+  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
+  free_aligned_buffer_64(row_y);
+}
+
+void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
+                         uint8* dst_argb,
+                         int width) {
+  // Allocate a rows of yuv.
+  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
+  uint8* row_u = row_y + ((width + 63) & ~63);
+  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
+  UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
+  UYVYToYRow_SSE2(src_uyvy, row_y, width);
+  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
+  free_aligned_buffer_64(row_y);
+}
+
+void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
+                                   uint8* dst_argb,
+                                   int width) {
+  // Allocate a rows of yuv.
+  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
+  uint8* row_u = row_y + ((width + 63) & ~63);
+  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
+  UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
+  UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
+  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
+  free_aligned_buffer_64(row_y);
+}
+
+#endif  // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
+#endif  // !defined(LIBYUV_DISABLE_X86)
+
+void ARGBPolynomialRow_C(const uint8* src_argb,
+                         uint8* dst_argb, const float* poly,
+                         int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    float b = (float)(src_argb[0]);
+    float g = (float)(src_argb[1]);
+    float r = (float)(src_argb[2]);
+    float a = (float)(src_argb[3]);
+    float b2 = b * b;
+    float g2 = g * g;
+    float r2 = r * r;
+    float a2 = a * a;
+    float db = poly[0] + poly[4] * b;
+    float dg = poly[1] + poly[5] * g;
+    float dr = poly[2] + poly[6] * r;
+    float da = poly[3] + poly[7] * a;
+    float b3 = b2 * b;
+    float g3 = g2 * g;
+    float r3 = r2 * r;
+    float a3 = a2 * a;
+    db += poly[8] * b2;
+    dg += poly[9] * g2;
+    dr += poly[10] * r2;
+    da += poly[11] * a2;
+    db += poly[12] * b3;
+    dg += poly[13] * g3;
+    dr += poly[14] * r3;
+    da += poly[15] * a3;
+
+    dst_argb[0] = Clamp((int32)(db));
+    dst_argb[1] = Clamp((int32)(dg));
+    dst_argb[2] = Clamp((int32)(dr));
+    dst_argb[3] = Clamp((int32)(da));
+    src_argb += 4;
+    dst_argb += 4;
+  }
+}
+
+void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
+                             const uint8* luma, uint32 lumacoeff) {
+  uint32 bc = lumacoeff & 0xff;
+  uint32 gc = (lumacoeff >> 8) & 0xff;
+  uint32 rc = (lumacoeff >> 16) & 0xff;
+
+  int i;
+  for (i = 0; i < width - 1; i += 2) {
+    // Luminance in rows, color values in columns.
+    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
+                           src_argb[2] * rc) & 0x7F00u) + luma;
+    const uint8* luma1;
+    dst_argb[0] = luma0[src_argb[0]];
+    dst_argb[1] = luma0[src_argb[1]];
+    dst_argb[2] = luma0[src_argb[2]];
+    dst_argb[3] = src_argb[3];
+    luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
+              src_argb[6] * rc) & 0x7F00u) + luma;
+    dst_argb[4] = luma1[src_argb[4]];
+    dst_argb[5] = luma1[src_argb[5]];
+    dst_argb[6] = luma1[src_argb[6]];
+    dst_argb[7] = src_argb[7];
+    src_argb += 8;
+    dst_argb += 8;
+  }
+  if (width & 1) {
+    // Luminance in rows, color values in columns.
+    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
+                           src_argb[2] * rc) & 0x7F00u) + luma;
+    dst_argb[0] = luma0[src_argb[0]];
+    dst_argb[1] = luma0[src_argb[1]];
+    dst_argb[2] = luma0[src_argb[2]];
+    dst_argb[3] = src_argb[3];
+  }
+}
+
+void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
+  int i;
+  for (i = 0; i < width - 1; i += 2) {
+    dst[3] = src[3];
+    dst[7] = src[7];
+    dst += 8;
+    src += 8;
+  }
+  if (width & 1) {
+    dst[3] = src[3];
+  }
+}
+
+void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
+  int i;
+  for (i = 0; i < width - 1; i += 2) {
+    dst[3] = src[0];
+    dst[7] = src[1];
+    dst += 8;
+    src += 2;
+  }
+  if (width & 1) {
+    dst[3] = src[0];
+  }
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/row_mips.cc b/src/main/jni/libyuv/source/row_mips.cc
new file mode 100644
index 000000000..da7183bc1
--- /dev/null
+++ b/src/main/jni/libyuv/source/row_mips.cc
@@ -0,0 +1,994 @@
+/*
+ *  Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// The following are available on Mips platforms:
+#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
+    (_MIPS_SIM == _MIPS_SIM_ABI32)
+
+#ifdef HAS_COPYROW_MIPS
+void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
+  __asm__ __volatile__ (
+    ".set      noreorder                         \n"
+    ".set      noat                              \n"
+    "slti      $at, %[count], 8                  \n"
+    "bne       $at ,$zero, $last8                \n"
+    "xor       $t8, %[src], %[dst]               \n"
+    "andi      $t8, $t8, 0x3                     \n"
+
+    "bne       $t8, $zero, unaligned             \n"
+    "negu      $a3, %[dst]                       \n"
+    // make dst/src aligned
+    "andi      $a3, $a3, 0x3                     \n"
+    "beq       $a3, $zero, $chk16w               \n"
+    // word-aligned now count is the remining bytes count
+    "subu     %[count], %[count], $a3            \n"
+
+    "lwr       $t8, 0(%[src])                    \n"
+    "addu      %[src], %[src], $a3               \n"
+    "swr       $t8, 0(%[dst])                    \n"
+    "addu      %[dst], %[dst], $a3               \n"
+
+    // Now the dst/src are mutually word-aligned with word-aligned addresses
+    "$chk16w:                                    \n"
+    "andi      $t8, %[count], 0x3f               \n"  // whole 64-B chunks?
+    // t8 is the byte count after 64-byte chunks
+    "beq       %[count], $t8, chk8w              \n"
+    // There will be at most 1 32-byte chunk after it
+    "subu      $a3, %[count], $t8                \n"  // the reminder
+    // Here a3 counts bytes in 16w chunks
+    "addu      $a3, %[dst], $a3                  \n"
+    // Now a3 is the final dst after 64-byte chunks
+    "addu      $t0, %[dst], %[count]             \n"
+    // t0 is the "past the end" address
+
+    // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
+    // the "t0-32" address
+    // This means: for x=128 the last "safe" a1 address is "t0-160"
+    // Alternatively, for x=64 the last "safe" a1 address is "t0-96"
+    // we will use "pref 30,128(a1)", so "t0-160" is the limit
+    "subu      $t9, $t0, 160                     \n"
+    // t9 is the "last safe pref 30,128(a1)" address
+    "pref      0, 0(%[src])                      \n"  // first line of src
+    "pref      0, 32(%[src])                     \n"  // second line of src
+    "pref      0, 64(%[src])                     \n"
+    "pref      30, 32(%[dst])                    \n"
+    // In case the a1 > t9 don't use "pref 30" at all
+    "sgtu      $v1, %[dst], $t9                  \n"
+    "bgtz      $v1, $loop16w                     \n"
+    "nop                                         \n"
+    // otherwise, start with using pref30
+    "pref      30, 64(%[dst])                    \n"
+    "$loop16w:                                    \n"
+    "pref      0, 96(%[src])                     \n"
+    "lw        $t0, 0(%[src])                    \n"
+    "bgtz      $v1, $skip_pref30_96              \n"  // skip
+    "lw        $t1, 4(%[src])                    \n"
+    "pref      30, 96(%[dst])                    \n"  // continue
+    "$skip_pref30_96:                            \n"
+    "lw        $t2, 8(%[src])                    \n"
+    "lw        $t3, 12(%[src])                   \n"
+    "lw        $t4, 16(%[src])                   \n"
+    "lw        $t5, 20(%[src])                   \n"
+    "lw        $t6, 24(%[src])                   \n"
+    "lw        $t7, 28(%[src])                   \n"
+    "pref      0, 128(%[src])                    \n"
+    //  bring the next lines of src, addr 128
+    "sw        $t0, 0(%[dst])                    \n"
+    "sw        $t1, 4(%[dst])                    \n"
+    "sw        $t2, 8(%[dst])                    \n"
+    "sw        $t3, 12(%[dst])                   \n"
+    "sw        $t4, 16(%[dst])                   \n"
+    "sw        $t5, 20(%[dst])                   \n"
+    "sw        $t6, 24(%[dst])                   \n"
+    "sw        $t7, 28(%[dst])                   \n"
+    "lw        $t0, 32(%[src])                   \n"
+    "bgtz      $v1, $skip_pref30_128             \n"  // skip pref 30,128(a1)
+    "lw        $t1, 36(%[src])                   \n"
+    "pref      30, 128(%[dst])                   \n"  // set dest, addr 128
+    "$skip_pref30_128:                           \n"
+    "lw        $t2, 40(%[src])                   \n"
+    "lw        $t3, 44(%[src])                   \n"
+    "lw        $t4, 48(%[src])                   \n"
+    "lw        $t5, 52(%[src])                   \n"
+    "lw        $t6, 56(%[src])                   \n"
+    "lw        $t7, 60(%[src])                   \n"
+    "pref      0, 160(%[src])                    \n"
+    // bring the next lines of src, addr 160
+    "sw        $t0, 32(%[dst])                   \n"
+    "sw        $t1, 36(%[dst])                   \n"
+    "sw        $t2, 40(%[dst])                   \n"
+    "sw        $t3, 44(%[dst])                   \n"
+    "sw        $t4, 48(%[dst])                   \n"
+    "sw        $t5, 52(%[dst])                   \n"
+    "sw        $t6, 56(%[dst])                   \n"
+    "sw        $t7, 60(%[dst])                   \n"
+
+    "addiu     %[dst], %[dst], 64                \n"  // adding 64 to dest
+    "sgtu      $v1, %[dst], $t9                  \n"
+    "bne       %[dst], $a3, $loop16w             \n"
+    " addiu    %[src], %[src], 64                \n"  // adding 64 to src
+    "move      %[count], $t8                     \n"
+
+    // Here we have src and dest word-aligned but less than 64-bytes to go
+
+    "chk8w:                                      \n"
+    "pref      0, 0x0(%[src])                    \n"
+    "andi      $t8, %[count], 0x1f               \n"  // 32-byte chunk?
+    // the t8 is the reminder count past 32-bytes
+    "beq       %[count], $t8, chk1w              \n"
+    // count=t8,no 32-byte chunk
+    " nop                                        \n"
+
+    "lw        $t0, 0(%[src])                    \n"
+    "lw        $t1, 4(%[src])                    \n"
+    "lw        $t2, 8(%[src])                    \n"
+    "lw        $t3, 12(%[src])                   \n"
+    "lw        $t4, 16(%[src])                   \n"
+    "lw        $t5, 20(%[src])                   \n"
+    "lw        $t6, 24(%[src])                   \n"
+    "lw        $t7, 28(%[src])                   \n"
+    "addiu     %[src], %[src], 32                \n"
+
+    "sw        $t0, 0(%[dst])                    \n"
+    "sw        $t1, 4(%[dst])                    \n"
+    "sw        $t2, 8(%[dst])                    \n"
+    "sw        $t3, 12(%[dst])                   \n"
+    "sw        $t4, 16(%[dst])                   \n"
+    "sw        $t5, 20(%[dst])                   \n"
+    "sw        $t6, 24(%[dst])                   \n"
+    "sw        $t7, 28(%[dst])                   \n"
+    "addiu     %[dst], %[dst], 32                \n"
+
+    "chk1w:                                      \n"
+    "andi      %[count], $t8, 0x3                \n"
+    // now count is the reminder past 1w chunks
+    "beq       %[count], $t8, $last8             \n"
+    " subu     $a3, $t8, %[count]                \n"
+    // a3 is count of bytes in 1w chunks
+    "addu      $a3, %[dst], $a3                  \n"
+    // now a3 is the dst address past the 1w chunks
+    // copying in words (4-byte chunks)
+    "$wordCopy_loop:                             \n"
+    "lw        $t3, 0(%[src])                    \n"
+    // the first t3 may be equal t0 ... optimize?
+    "addiu     %[src], %[src],4                  \n"
+    "addiu     %[dst], %[dst],4                  \n"
+    "bne       %[dst], $a3,$wordCopy_loop        \n"
+    " sw       $t3, -4(%[dst])                   \n"
+
+    // For the last (<8) bytes
+    "$last8:                                     \n"
+    "blez      %[count], leave                   \n"
+    " addu     $a3, %[dst], %[count]             \n"  // a3 -last dst address
+    "$last8loop:                                 \n"
+    "lb        $v1, 0(%[src])                    \n"
+    "addiu     %[src], %[src], 1                 \n"
+    "addiu     %[dst], %[dst], 1                 \n"
+    "bne       %[dst], $a3, $last8loop           \n"
+    " sb       $v1, -1(%[dst])                   \n"
+
+    "leave:                                      \n"
+    "  j       $ra                               \n"
+    "  nop                                       \n"
+
+    //
+    // UNALIGNED case
+    //
+
+    "unaligned:                                  \n"
+    // got here with a3="negu a1"
+    "andi      $a3, $a3, 0x3                     \n"  // a1 is word aligned?
+    "beqz      $a3, $ua_chk16w                   \n"
+    " subu     %[count], %[count], $a3           \n"
+    // bytes left after initial a3 bytes
+    "lwr       $v1, 0(%[src])                    \n"
+    "lwl       $v1, 3(%[src])                    \n"
+    "addu      %[src], %[src], $a3               \n"  // a3 may be 1, 2 or 3
+    "swr       $v1, 0(%[dst])                    \n"
+    "addu      %[dst], %[dst], $a3               \n"
+    // below the dst will be word aligned (NOTE1)
+    "$ua_chk16w:                                 \n"
+    "andi      $t8, %[count], 0x3f               \n"  // whole 64-B chunks?
+    // t8 is the byte count after 64-byte chunks
+    "beq       %[count], $t8, ua_chk8w           \n"
+    // if a2==t8, no 64-byte chunks
+    // There will be at most 1 32-byte chunk after it
+    "subu      $a3, %[count], $t8                \n"  // the reminder
+    // Here a3 counts bytes in 16w chunks
+    "addu      $a3, %[dst], $a3                  \n"
+    // Now a3 is the final dst after 64-byte chunks
+    "addu      $t0, %[dst], %[count]             \n"  // t0 "past the end"
+    "subu      $t9, $t0, 160                     \n"
+    // t9 is the "last safe pref 30,128(a1)" address
+    "pref      0, 0(%[src])                      \n"  // first line of src
+    "pref      0, 32(%[src])                     \n"  // second line  addr 32
+    "pref      0, 64(%[src])                     \n"
+    "pref      30, 32(%[dst])                    \n"
+    // safe, as we have at least 64 bytes ahead
+    // In case the a1 > t9 don't use "pref 30" at all
+    "sgtu      $v1, %[dst], $t9                  \n"
+    "bgtz      $v1, $ua_loop16w                  \n"
+    // skip "pref 30,64(a1)" for too short arrays
+    " nop                                        \n"
+    // otherwise, start with using pref30
+    "pref      30, 64(%[dst])                    \n"
+    "$ua_loop16w:                                \n"
+    "pref      0, 96(%[src])                     \n"
+    "lwr       $t0, 0(%[src])                    \n"
+    "lwl       $t0, 3(%[src])                    \n"
+    "lwr       $t1, 4(%[src])                    \n"
+    "bgtz      $v1, $ua_skip_pref30_96           \n"
+    " lwl      $t1, 7(%[src])                    \n"
+    "pref      30, 96(%[dst])                    \n"
+    // continue setting up the dest, addr 96
+    "$ua_skip_pref30_96:                         \n"
+    "lwr       $t2, 8(%[src])                    \n"
+    "lwl       $t2, 11(%[src])                   \n"
+    "lwr       $t3, 12(%[src])                   \n"
+    "lwl       $t3, 15(%[src])                   \n"
+    "lwr       $t4, 16(%[src])                   \n"
+    "lwl       $t4, 19(%[src])                   \n"
+    "lwr       $t5, 20(%[src])                   \n"
+    "lwl       $t5, 23(%[src])                   \n"
+    "lwr       $t6, 24(%[src])                   \n"
+    "lwl       $t6, 27(%[src])                   \n"
+    "lwr       $t7, 28(%[src])                   \n"
+    "lwl       $t7, 31(%[src])                   \n"
+    "pref      0, 128(%[src])                    \n"
+    // bring the next lines of src, addr 128
+    "sw        $t0, 0(%[dst])                    \n"
+    "sw        $t1, 4(%[dst])                    \n"
+    "sw        $t2, 8(%[dst])                    \n"
+    "sw        $t3, 12(%[dst])                   \n"
+    "sw        $t4, 16(%[dst])                   \n"
+    "sw        $t5, 20(%[dst])                   \n"
+    "sw        $t6, 24(%[dst])                   \n"
+    "sw        $t7, 28(%[dst])                   \n"
+    "lwr       $t0, 32(%[src])                   \n"
+    "lwl       $t0, 35(%[src])                   \n"
+    "lwr       $t1, 36(%[src])                   \n"
+    "bgtz      $v1, ua_skip_pref30_128           \n"
+    " lwl      $t1, 39(%[src])                   \n"
+    "pref      30, 128(%[dst])                   \n"
+    // continue setting up the dest, addr 128
+    "ua_skip_pref30_128:                         \n"
+
+    "lwr       $t2, 40(%[src])                   \n"
+    "lwl       $t2, 43(%[src])                   \n"
+    "lwr       $t3, 44(%[src])                   \n"
+    "lwl       $t3, 47(%[src])                   \n"
+    "lwr       $t4, 48(%[src])                   \n"
+    "lwl       $t4, 51(%[src])                   \n"
+    "lwr       $t5, 52(%[src])                   \n"
+    "lwl       $t5, 55(%[src])                   \n"
+    "lwr       $t6, 56(%[src])                   \n"
+    "lwl       $t6, 59(%[src])                   \n"
+    "lwr       $t7, 60(%[src])                   \n"
+    "lwl       $t7, 63(%[src])                   \n"
+    "pref      0, 160(%[src])                    \n"
+    // bring the next lines of src, addr 160
+    "sw        $t0, 32(%[dst])                   \n"
+    "sw        $t1, 36(%[dst])                   \n"
+    "sw        $t2, 40(%[dst])                   \n"
+    "sw        $t3, 44(%[dst])                   \n"
+    "sw        $t4, 48(%[dst])                   \n"
+    "sw        $t5, 52(%[dst])                   \n"
+    "sw        $t6, 56(%[dst])                   \n"
+    "sw        $t7, 60(%[dst])                   \n"
+
+    "addiu     %[dst],%[dst],64                  \n"  // adding 64 to dest
+    "sgtu      $v1,%[dst],$t9                    \n"
+    "bne       %[dst],$a3,$ua_loop16w            \n"
+    " addiu    %[src],%[src],64                  \n"  // adding 64 to src
+    "move      %[count],$t8                      \n"
+
+    // Here we have src and dest word-aligned but less than 64-bytes to go
+
+    "ua_chk8w:                                   \n"
+    "pref      0, 0x0(%[src])                    \n"
+    "andi      $t8, %[count], 0x1f               \n"  // 32-byte chunk?
+    // the t8 is the reminder count
+    "beq       %[count], $t8, $ua_chk1w          \n"
+    // when count==t8, no 32-byte chunk
+
+    "lwr       $t0, 0(%[src])                    \n"
+    "lwl       $t0, 3(%[src])                    \n"
+    "lwr       $t1, 4(%[src])                    \n"
+    "lwl       $t1, 7(%[src])                    \n"
+    "lwr       $t2, 8(%[src])                    \n"
+    "lwl       $t2, 11(%[src])                   \n"
+    "lwr       $t3, 12(%[src])                   \n"
+    "lwl       $t3, 15(%[src])                   \n"
+    "lwr       $t4, 16(%[src])                   \n"
+    "lwl       $t4, 19(%[src])                   \n"
+    "lwr       $t5, 20(%[src])                   \n"
+    "lwl       $t5, 23(%[src])                   \n"
+    "lwr       $t6, 24(%[src])                   \n"
+    "lwl       $t6, 27(%[src])                   \n"
+    "lwr       $t7, 28(%[src])                   \n"
+    "lwl       $t7, 31(%[src])                   \n"
+    "addiu     %[src], %[src], 32                \n"
+
+    "sw        $t0, 0(%[dst])                    \n"
+    "sw        $t1, 4(%[dst])                    \n"
+    "sw        $t2, 8(%[dst])                    \n"
+    "sw        $t3, 12(%[dst])                   \n"
+    "sw        $t4, 16(%[dst])                   \n"
+    "sw        $t5, 20(%[dst])                   \n"
+    "sw        $t6, 24(%[dst])                   \n"
+    "sw        $t7, 28(%[dst])                   \n"
+    "addiu     %[dst], %[dst], 32                \n"
+
+    "$ua_chk1w:                                  \n"
+    "andi      %[count], $t8, 0x3                \n"
+    // now count is the reminder past 1w chunks
+    "beq       %[count], $t8, ua_smallCopy       \n"
+    "subu      $a3, $t8, %[count]                \n"
+    // a3 is count of bytes in 1w chunks
+    "addu      $a3, %[dst], $a3                  \n"
+    // now a3 is the dst address past the 1w chunks
+
+    // copying in words (4-byte chunks)
+    "$ua_wordCopy_loop:                          \n"
+    "lwr       $v1, 0(%[src])                    \n"
+    "lwl       $v1, 3(%[src])                    \n"
+    "addiu     %[src], %[src], 4                 \n"
+    "addiu     %[dst], %[dst], 4                 \n"
+    // note: dst=a1 is word aligned here, see NOTE1
+    "bne       %[dst], $a3, $ua_wordCopy_loop    \n"
+    " sw       $v1,-4(%[dst])                    \n"
+
+    // Now less than 4 bytes (value in count) left to copy
+    "ua_smallCopy:                               \n"
+    "beqz      %[count], leave                   \n"
+    " addu     $a3, %[dst], %[count]             \n" // a3 = last dst address
+    "$ua_smallCopy_loop:                         \n"
+    "lb        $v1, 0(%[src])                    \n"
+    "addiu     %[src], %[src], 1                 \n"
+    "addiu     %[dst], %[dst], 1                 \n"
+    "bne       %[dst],$a3,$ua_smallCopy_loop     \n"
+    " sb       $v1, -1(%[dst])                   \n"
+
+    "j         $ra                               \n"
+    " nop                                        \n"
+    ".set      at                                \n"
+    ".set      reorder                           \n"
+       : [dst] "+r" (dst), [src] "+r" (src)
+       : [count] "r" (count)
+       : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
+       "t8", "t9", "a3", "v1", "at"
+  );
+}
+#endif  // HAS_COPYROW_MIPS
+
+// MIPS DSPR2 functions
+#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
+    (__mips_dsp_rev >= 2) && \
+    (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
+
+void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                           int width) {
+  __asm__ __volatile__ (
+    ".set push                                     \n"
+    ".set noreorder                                \n"
+    "srl             $t4, %[width], 4              \n"  // multiplies of 16
+    "blez            $t4, 2f                       \n"
+    " andi           %[width], %[width], 0xf       \n"  // residual
+
+    ".p2align        2                             \n"
+  "1:                                              \n"
+    "addiu           $t4, $t4, -1                  \n"
+    "lw              $t0, 0(%[src_uv])             \n"  // V1 | U1 | V0 | U0
+    "lw              $t1, 4(%[src_uv])             \n"  // V3 | U3 | V2 | U2
+    "lw              $t2, 8(%[src_uv])             \n"  // V5 | U5 | V4 | U4
+    "lw              $t3, 12(%[src_uv])            \n"  // V7 | U7 | V6 | U6
+    "lw              $t5, 16(%[src_uv])            \n"  // V9 | U9 | V8 | U8
+    "lw              $t6, 20(%[src_uv])            \n"  // V11 | U11 | V10 | U10
+    "lw              $t7, 24(%[src_uv])            \n"  // V13 | U13 | V12 | U12
+    "lw              $t8, 28(%[src_uv])            \n"  // V15 | U15 | V14 | U14
+    "addiu           %[src_uv], %[src_uv], 32      \n"
+    "precrq.qb.ph    $t9, $t1, $t0                 \n"  // V3 | V2 | V1 | V0
+    "precr.qb.ph     $t0, $t1, $t0                 \n"  // U3 | U2 | U1 | U0
+    "precrq.qb.ph    $t1, $t3, $t2                 \n"  // V7 | V6 | V5 | V4
+    "precr.qb.ph     $t2, $t3, $t2                 \n"  // U7 | U6 | U5 | U4
+    "precrq.qb.ph    $t3, $t6, $t5                 \n"  // V11 | V10 | V9 | V8
+    "precr.qb.ph     $t5, $t6, $t5                 \n"  // U11 | U10 | U9 | U8
+    "precrq.qb.ph    $t6, $t8, $t7                 \n"  // V15 | V14 | V13 | V12
+    "precr.qb.ph     $t7, $t8, $t7                 \n"  // U15 | U14 | U13 | U12
+    "sw              $t9, 0(%[dst_v])              \n"
+    "sw              $t0, 0(%[dst_u])              \n"
+    "sw              $t1, 4(%[dst_v])              \n"
+    "sw              $t2, 4(%[dst_u])              \n"
+    "sw              $t3, 8(%[dst_v])              \n"
+    "sw              $t5, 8(%[dst_u])              \n"
+    "sw              $t6, 12(%[dst_v])             \n"
+    "sw              $t7, 12(%[dst_u])             \n"
+    "addiu           %[dst_v], %[dst_v], 16        \n"
+    "bgtz            $t4, 1b                       \n"
+    " addiu          %[dst_u], %[dst_u], 16        \n"
+
+    "beqz            %[width], 3f                  \n"
+    " nop                                          \n"
+
+  "2:                                              \n"
+    "lbu             $t0, 0(%[src_uv])             \n"
+    "lbu             $t1, 1(%[src_uv])             \n"
+    "addiu           %[src_uv], %[src_uv], 2       \n"
+    "addiu           %[width], %[width], -1        \n"
+    "sb              $t0, 0(%[dst_u])              \n"
+    "sb              $t1, 0(%[dst_v])              \n"
+    "addiu           %[dst_u], %[dst_u], 1         \n"
+    "bgtz            %[width], 2b                  \n"
+    " addiu          %[dst_v], %[dst_v], 1         \n"
+
+  "3:                                              \n"
+    ".set pop                                      \n"
+     : [src_uv] "+r" (src_uv),
+       [width] "+r" (width),
+       [dst_u] "+r" (dst_u),
+       [dst_v] "+r" (dst_v)
+     :
+     : "t0", "t1", "t2", "t3",
+     "t4", "t5", "t6", "t7", "t8", "t9"
+  );
+}
+
+void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
+                                     uint8* dst_v, int width) {
+  __asm__ __volatile__ (
+    ".set push                                     \n"
+    ".set noreorder                                \n"
+    "srl             $t4, %[width], 4              \n"  // multiplies of 16
+    "blez            $t4, 2f                       \n"
+    " andi           %[width], %[width], 0xf       \n"  // residual
+
+    ".p2align        2                             \n"
+  "1:                                              \n"
+    "addiu           $t4, $t4, -1                  \n"
+    "lwr             $t0, 0(%[src_uv])             \n"
+    "lwl             $t0, 3(%[src_uv])             \n"  // V1 | U1 | V0 | U0
+    "lwr             $t1, 4(%[src_uv])             \n"
+    "lwl             $t1, 7(%[src_uv])             \n"  // V3 | U3 | V2 | U2
+    "lwr             $t2, 8(%[src_uv])             \n"
+    "lwl             $t2, 11(%[src_uv])            \n"  // V5 | U5 | V4 | U4
+    "lwr             $t3, 12(%[src_uv])            \n"
+    "lwl             $t3, 15(%[src_uv])            \n"  // V7 | U7 | V6 | U6
+    "lwr             $t5, 16(%[src_uv])            \n"
+    "lwl             $t5, 19(%[src_uv])            \n"  // V9 | U9 | V8 | U8
+    "lwr             $t6, 20(%[src_uv])            \n"
+    "lwl             $t6, 23(%[src_uv])            \n"  // V11 | U11 | V10 | U10
+    "lwr             $t7, 24(%[src_uv])            \n"
+    "lwl             $t7, 27(%[src_uv])            \n"  // V13 | U13 | V12 | U12
+    "lwr             $t8, 28(%[src_uv])            \n"
+    "lwl             $t8, 31(%[src_uv])            \n"  // V15 | U15 | V14 | U14
+    "precrq.qb.ph    $t9, $t1, $t0                 \n"  // V3 | V2 | V1 | V0
+    "precr.qb.ph     $t0, $t1, $t0                 \n"  // U3 | U2 | U1 | U0
+    "precrq.qb.ph    $t1, $t3, $t2                 \n"  // V7 | V6 | V5 | V4
+    "precr.qb.ph     $t2, $t3, $t2                 \n"  // U7 | U6 | U5 | U4
+    "precrq.qb.ph    $t3, $t6, $t5                 \n"  // V11 | V10 | V9 | V8
+    "precr.qb.ph     $t5, $t6, $t5                 \n"  // U11 | U10 | U9 | U8
+    "precrq.qb.ph    $t6, $t8, $t7                 \n"  // V15 | V14 | V13 | V12
+    "precr.qb.ph     $t7, $t8, $t7                 \n"  // U15 | U14 | U13 | U12
+    "addiu           %[src_uv], %[src_uv], 32      \n"
+    "swr             $t9, 0(%[dst_v])              \n"
+    "swl             $t9, 3(%[dst_v])              \n"
+    "swr             $t0, 0(%[dst_u])              \n"
+    "swl             $t0, 3(%[dst_u])              \n"
+    "swr             $t1, 4(%[dst_v])              \n"
+    "swl             $t1, 7(%[dst_v])              \n"
+    "swr             $t2, 4(%[dst_u])              \n"
+    "swl             $t2, 7(%[dst_u])              \n"
+    "swr             $t3, 8(%[dst_v])              \n"
+    "swl             $t3, 11(%[dst_v])             \n"
+    "swr             $t5, 8(%[dst_u])              \n"
+    "swl             $t5, 11(%[dst_u])             \n"
+    "swr             $t6, 12(%[dst_v])             \n"
+    "swl             $t6, 15(%[dst_v])             \n"
+    "swr             $t7, 12(%[dst_u])             \n"
+    "swl             $t7, 15(%[dst_u])             \n"
+    "addiu           %[dst_u], %[dst_u], 16        \n"
+    "bgtz            $t4, 1b                       \n"
+    " addiu          %[dst_v], %[dst_v], 16        \n"
+
+    "beqz            %[width], 3f                  \n"
+    " nop                                          \n"
+
+  "2:                                              \n"
+    "lbu             $t0, 0(%[src_uv])             \n"
+    "lbu             $t1, 1(%[src_uv])             \n"
+    "addiu           %[src_uv], %[src_uv], 2       \n"
+    "addiu           %[width], %[width], -1        \n"
+    "sb              $t0, 0(%[dst_u])              \n"
+    "sb              $t1, 0(%[dst_v])              \n"
+    "addiu           %[dst_u], %[dst_u], 1         \n"
+    "bgtz            %[width], 2b                  \n"
+    " addiu          %[dst_v], %[dst_v], 1         \n"
+
+  "3:                                              \n"
+    ".set pop                                      \n"
+     : [src_uv] "+r" (src_uv),
+       [width] "+r" (width),
+       [dst_u] "+r" (dst_u),
+       [dst_v] "+r" (dst_v)
+     :
+     : "t0", "t1", "t2", "t3",
+     "t4", "t5", "t6", "t7", "t8", "t9"
+  );
+}
+
+void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
+  __asm__ __volatile__ (
+    ".set push                             \n"
+    ".set noreorder                        \n"
+
+    "srl       $t4, %[width], 4            \n"  // multiplies of 16
+    "andi      $t5, %[width], 0xf          \n"
+    "blez      $t4, 2f                     \n"
+    " addu     %[src], %[src], %[width]    \n"  // src += width
+
+    ".p2align  2                           \n"
+   "1:                                     \n"
+    "lw        $t0, -16(%[src])            \n"  // |3|2|1|0|
+    "lw        $t1, -12(%[src])            \n"  // |7|6|5|4|
+    "lw        $t2, -8(%[src])             \n"  // |11|10|9|8|
+    "lw        $t3, -4(%[src])             \n"  // |15|14|13|12|
+    "wsbh      $t0, $t0                    \n"  // |2|3|0|1|
+    "wsbh      $t1, $t1                    \n"  // |6|7|4|5|
+    "wsbh      $t2, $t2                    \n"  // |10|11|8|9|
+    "wsbh      $t3, $t3                    \n"  // |14|15|12|13|
+    "rotr      $t0, $t0, 16                \n"  // |0|1|2|3|
+    "rotr      $t1, $t1, 16                \n"  // |4|5|6|7|
+    "rotr      $t2, $t2, 16                \n"  // |8|9|10|11|
+    "rotr      $t3, $t3, 16                \n"  // |12|13|14|15|
+    "addiu     %[src], %[src], -16         \n"
+    "addiu     $t4, $t4, -1                \n"
+    "sw        $t3, 0(%[dst])              \n"  // |15|14|13|12|
+    "sw        $t2, 4(%[dst])              \n"  // |11|10|9|8|
+    "sw        $t1, 8(%[dst])              \n"  // |7|6|5|4|
+    "sw        $t0, 12(%[dst])             \n"  // |3|2|1|0|
+    "bgtz      $t4, 1b                     \n"
+    " addiu    %[dst], %[dst], 16          \n"
+    "beqz      $t5, 3f                     \n"
+    " nop                                  \n"
+
+   "2:                                     \n"
+    "lbu       $t0, -1(%[src])             \n"
+    "addiu     $t5, $t5, -1                \n"
+    "addiu     %[src], %[src], -1          \n"
+    "sb        $t0, 0(%[dst])              \n"
+    "bgez      $t5, 2b                     \n"
+    " addiu    %[dst], %[dst], 1           \n"
+
+   "3:                                     \n"
+    ".set pop                              \n"
+      : [src] "+r" (src), [dst] "+r" (dst)
+      : [width] "r" (width)
+      : "t0", "t1", "t2", "t3", "t4", "t5"
+  );
+}
+
+void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                            int width) {
+  int x = 0;
+  int y = 0;
+  __asm__ __volatile__ (
+    ".set push                                    \n"
+    ".set noreorder                               \n"
+
+    "addu            $t4, %[width], %[width]      \n"
+    "srl             %[x], %[width], 4            \n"
+    "andi            %[y], %[width], 0xf          \n"
+    "blez            %[x], 2f                     \n"
+    " addu           %[src_uv], %[src_uv], $t4    \n"
+
+    ".p2align        2                            \n"
+   "1:                                            \n"
+    "lw              $t0, -32(%[src_uv])          \n"  // |3|2|1|0|
+    "lw              $t1, -28(%[src_uv])          \n"  // |7|6|5|4|
+    "lw              $t2, -24(%[src_uv])          \n"  // |11|10|9|8|
+    "lw              $t3, -20(%[src_uv])          \n"  // |15|14|13|12|
+    "lw              $t4, -16(%[src_uv])          \n"  // |19|18|17|16|
+    "lw              $t6, -12(%[src_uv])          \n"  // |23|22|21|20|
+    "lw              $t7, -8(%[src_uv])           \n"  // |27|26|25|24|
+    "lw              $t8, -4(%[src_uv])           \n"  // |31|30|29|28|
+
+    "rotr            $t0, $t0, 16                 \n"  // |1|0|3|2|
+    "rotr            $t1, $t1, 16                 \n"  // |5|4|7|6|
+    "rotr            $t2, $t2, 16                 \n"  // |9|8|11|10|
+    "rotr            $t3, $t3, 16                 \n"  // |13|12|15|14|
+    "rotr            $t4, $t4, 16                 \n"  // |17|16|19|18|
+    "rotr            $t6, $t6, 16                 \n"  // |21|20|23|22|
+    "rotr            $t7, $t7, 16                 \n"  // |25|24|27|26|
+    "rotr            $t8, $t8, 16                 \n"  // |29|28|31|30|
+    "precr.qb.ph     $t9, $t0, $t1                \n"  // |0|2|4|6|
+    "precrq.qb.ph    $t5, $t0, $t1                \n"  // |1|3|5|7|
+    "precr.qb.ph     $t0, $t2, $t3                \n"  // |8|10|12|14|
+    "precrq.qb.ph    $t1, $t2, $t3                \n"  // |9|11|13|15|
+    "precr.qb.ph     $t2, $t4, $t6                \n"  // |16|18|20|22|
+    "precrq.qb.ph    $t3, $t4, $t6                \n"  // |17|19|21|23|
+    "precr.qb.ph     $t4, $t7, $t8                \n"  // |24|26|28|30|
+    "precrq.qb.ph    $t6, $t7, $t8                \n"  // |25|27|29|31|
+    "addiu           %[src_uv], %[src_uv], -32    \n"
+    "addiu           %[x], %[x], -1               \n"
+    "swr             $t4, 0(%[dst_u])             \n"
+    "swl             $t4, 3(%[dst_u])             \n"  // |30|28|26|24|
+    "swr             $t6, 0(%[dst_v])             \n"
+    "swl             $t6, 3(%[dst_v])             \n"  // |31|29|27|25|
+    "swr             $t2, 4(%[dst_u])             \n"
+    "swl             $t2, 7(%[dst_u])             \n"  // |22|20|18|16|
+    "swr             $t3, 4(%[dst_v])             \n"
+    "swl             $t3, 7(%[dst_v])             \n"  // |23|21|19|17|
+    "swr             $t0, 8(%[dst_u])             \n"
+    "swl             $t0, 11(%[dst_u])            \n"  // |14|12|10|8|
+    "swr             $t1, 8(%[dst_v])             \n"
+    "swl             $t1, 11(%[dst_v])            \n"  // |15|13|11|9|
+    "swr             $t9, 12(%[dst_u])            \n"
+    "swl             $t9, 15(%[dst_u])            \n"  // |6|4|2|0|
+    "swr             $t5, 12(%[dst_v])            \n"
+    "swl             $t5, 15(%[dst_v])            \n"  // |7|5|3|1|
+    "addiu           %[dst_v], %[dst_v], 16       \n"
+    "bgtz            %[x], 1b                     \n"
+    " addiu          %[dst_u], %[dst_u], 16       \n"
+    "beqz            %[y], 3f                     \n"
+    " nop                                         \n"
+    "b               2f                           \n"
+    " nop                                         \n"
+
+   "2:                                            \n"
+    "lbu             $t0, -2(%[src_uv])           \n"
+    "lbu             $t1, -1(%[src_uv])           \n"
+    "addiu           %[src_uv], %[src_uv], -2     \n"
+    "addiu           %[y], %[y], -1               \n"
+    "sb              $t0, 0(%[dst_u])             \n"
+    "sb              $t1, 0(%[dst_v])             \n"
+    "addiu           %[dst_u], %[dst_u], 1        \n"
+    "bgtz            %[y], 2b                     \n"
+    " addiu          %[dst_v], %[dst_v], 1        \n"
+
+   "3:                                            \n"
+    ".set pop                                     \n"
+      : [src_uv] "+r" (src_uv),
+        [dst_u] "+r" (dst_u),
+        [dst_v] "+r" (dst_v),
+        [x] "=&r" (x),
+        [y] "+r" (y)
+      : [width] "r" (width)
+      : "t0", "t1", "t2", "t3", "t4",
+      "t5", "t7", "t8", "t9"
+  );
+}
+
+// Convert (4 Y and 2 VU) I422 and arrange RGB values into
+// t5 = | 0 | B0 | 0 | b0 |
+// t4 = | 0 | B1 | 0 | b1 |
+// t9 = | 0 | G0 | 0 | g0 |
+// t8 = | 0 | G1 | 0 | g1 |
+// t2 = | 0 | R0 | 0 | r0 |
+// t1 = | 0 | R1 | 0 | r1 |
+#define I422ToTransientMipsRGB                                                 \
+      "lw                $t0, 0(%[y_buf])       \n"                            \
+      "lhu               $t1, 0(%[u_buf])       \n"                            \
+      "lhu               $t2, 0(%[v_buf])       \n"                            \
+      "preceu.ph.qbr     $t1, $t1               \n"                            \
+      "preceu.ph.qbr     $t2, $t2               \n"                            \
+      "preceu.ph.qbra    $t3, $t0               \n"                            \
+      "preceu.ph.qbla    $t0, $t0               \n"                            \
+      "subu.ph           $t1, $t1, $s5          \n"                            \
+      "subu.ph           $t2, $t2, $s5          \n"                            \
+      "subu.ph           $t3, $t3, $s4          \n"                            \
+      "subu.ph           $t0, $t0, $s4          \n"                            \
+      "mul.ph            $t3, $t3, $s0          \n"                            \
+      "mul.ph            $t0, $t0, $s0          \n"                            \
+      "shll.ph           $t4, $t1, 0x7          \n"                            \
+      "subu.ph           $t4, $t4, $t1          \n"                            \
+      "mul.ph            $t6, $t1, $s1          \n"                            \
+      "mul.ph            $t1, $t2, $s2          \n"                            \
+      "addq_s.ph         $t5, $t4, $t3          \n"                            \
+      "addq_s.ph         $t4, $t4, $t0          \n"                            \
+      "shra.ph           $t5, $t5, 6            \n"                            \
+      "shra.ph           $t4, $t4, 6            \n"                            \
+      "addiu             %[u_buf], 2            \n"                            \
+      "addiu             %[v_buf], 2            \n"                            \
+      "addu.ph           $t6, $t6, $t1          \n"                            \
+      "mul.ph            $t1, $t2, $s3          \n"                            \
+      "addu.ph           $t9, $t6, $t3          \n"                            \
+      "addu.ph           $t8, $t6, $t0          \n"                            \
+      "shra.ph           $t9, $t9, 6            \n"                            \
+      "shra.ph           $t8, $t8, 6            \n"                            \
+      "addu.ph           $t2, $t1, $t3          \n"                            \
+      "addu.ph           $t1, $t1, $t0          \n"                            \
+      "shra.ph           $t2, $t2, 6            \n"                            \
+      "shra.ph           $t1, $t1, 6            \n"                            \
+      "subu.ph           $t5, $t5, $s5          \n"                            \
+      "subu.ph           $t4, $t4, $s5          \n"                            \
+      "subu.ph           $t9, $t9, $s5          \n"                            \
+      "subu.ph           $t8, $t8, $s5          \n"                            \
+      "subu.ph           $t2, $t2, $s5          \n"                            \
+      "subu.ph           $t1, $t1, $s5          \n"                            \
+      "shll_s.ph         $t5, $t5, 8            \n"                            \
+      "shll_s.ph         $t4, $t4, 8            \n"                            \
+      "shll_s.ph         $t9, $t9, 8            \n"                            \
+      "shll_s.ph         $t8, $t8, 8            \n"                            \
+      "shll_s.ph         $t2, $t2, 8            \n"                            \
+      "shll_s.ph         $t1, $t1, 8            \n"                            \
+      "shra.ph           $t5, $t5, 8            \n"                            \
+      "shra.ph           $t4, $t4, 8            \n"                            \
+      "shra.ph           $t9, $t9, 8            \n"                            \
+      "shra.ph           $t8, $t8, 8            \n"                            \
+      "shra.ph           $t2, $t2, 8            \n"                            \
+      "shra.ph           $t1, $t1, 8            \n"                            \
+      "addu.ph           $t5, $t5, $s5          \n"                            \
+      "addu.ph           $t4, $t4, $s5          \n"                            \
+      "addu.ph           $t9, $t9, $s5          \n"                            \
+      "addu.ph           $t8, $t8, $s5          \n"                            \
+      "addu.ph           $t2, $t2, $s5          \n"                            \
+      "addu.ph           $t1, $t1, $s5          \n"
+
+void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width) {
+  __asm__ __volatile__ (
+    ".set push                                \n"
+    ".set noreorder                           \n"
+    "beqz              %[width], 2f           \n"
+    " repl.ph          $s0, 74                \n"  // |YG|YG| = |74|74|
+    "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25|
+    "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52|
+    "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102|
+    "repl.ph           $s4, 16                \n"  // |0|16|0|16|
+    "repl.ph           $s5, 128               \n"  // |128|128| // clipping
+    "lui               $s6, 0xff00            \n"
+    "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|ff|
+
+    ".p2align          2                      \n"
+   "1:                                        \n"
+      I422ToTransientMipsRGB
+// Arranging into argb format
+    "precr.qb.ph       $t4, $t8, $t4          \n"  // |G1|g1|B1|b1|
+    "precr.qb.ph       $t5, $t9, $t5          \n"  // |G0|g0|B0|b0|
+    "addiu             %[width], -4           \n"
+    "precrq.qb.ph      $t8, $t4, $t5          \n"  // |G1|B1|G0|B0|
+    "precr.qb.ph       $t9, $t4, $t5          \n"  // |g1|b1|g0|b0|
+    "precr.qb.ph       $t2, $t1, $t2          \n"  // |R1|r1|R0|r0|
+
+    "addiu             %[y_buf], 4            \n"
+    "preceu.ph.qbla    $t1, $t2               \n"  // |0 |R1|0 |R0|
+    "preceu.ph.qbra    $t2, $t2               \n"  // |0 |r1|0 |r0|
+    "or                $t1, $t1, $s6          \n"  // |ff|R1|ff|R0|
+    "or                $t2, $t2, $s6          \n"  // |ff|r1|ff|r0|
+    "precrq.ph.w       $t0, $t2, $t9          \n"  // |ff|r1|g1|b1|
+    "precrq.ph.w       $t3, $t1, $t8          \n"  // |ff|R1|G1|B1|
+    "sll               $t9, $t9, 16           \n"
+    "sll               $t8, $t8, 16           \n"
+    "packrl.ph         $t2, $t2, $t9          \n"  // |ff|r0|g0|b0|
+    "packrl.ph         $t1, $t1, $t8          \n"  // |ff|R0|G0|B0|
+// Store results.
+    "sw                $t2, 0(%[rgb_buf])     \n"
+    "sw                $t0, 4(%[rgb_buf])     \n"
+    "sw                $t1, 8(%[rgb_buf])     \n"
+    "sw                $t3, 12(%[rgb_buf])    \n"
+    "bnez              %[width], 1b           \n"
+    " addiu            %[rgb_buf], 16         \n"
+   "2:                                        \n"
+    ".set pop                                 \n"
+      :[y_buf] "+r" (y_buf),
+       [u_buf] "+r" (u_buf),
+       [v_buf] "+r" (v_buf),
+       [width] "+r" (width),
+       [rgb_buf] "+r" (rgb_buf)
+      :
+      : "t0", "t1",  "t2", "t3",  "t4", "t5",
+      "t6", "t7", "t8", "t9",
+      "s0", "s1", "s2", "s3",
+      "s4", "s5", "s6"
+  );
+}
+
+void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width) {
+  __asm__ __volatile__ (
+    ".set push                                \n"
+    ".set noreorder                           \n"
+    "beqz              %[width], 2f           \n"
+    " repl.ph          $s0, 74                \n"  // |YG|YG| = |74|74|
+    "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25|
+    "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52|
+    "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102|
+    "repl.ph           $s4, 16                \n"  // |0|16|0|16|
+    "repl.ph           $s5, 128               \n"  // |128|128|
+    "lui               $s6, 0xff00            \n"
+    "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|
+
+    ".p2align          2                       \n"
+   "1:                                         \n"
+      I422ToTransientMipsRGB
+// Arranging into abgr format
+    "precr.qb.ph      $t0, $t8, $t1           \n"  // |G1|g1|R1|r1|
+    "precr.qb.ph      $t3, $t9, $t2           \n"  // |G0|g0|R0|r0|
+    "precrq.qb.ph     $t8, $t0, $t3           \n"  // |G1|R1|G0|R0|
+    "precr.qb.ph      $t9, $t0, $t3           \n"  // |g1|r1|g0|r0|
+
+    "precr.qb.ph       $t2, $t4, $t5          \n"  // |B1|b1|B0|b0|
+    "addiu             %[width], -4           \n"
+    "addiu             %[y_buf], 4            \n"
+    "preceu.ph.qbla    $t1, $t2               \n"  // |0 |B1|0 |B0|
+    "preceu.ph.qbra    $t2, $t2               \n"  // |0 |b1|0 |b0|
+    "or                $t1, $t1, $s6          \n"  // |ff|B1|ff|B0|
+    "or                $t2, $t2, $s6          \n"  // |ff|b1|ff|b0|
+    "precrq.ph.w       $t0, $t2, $t9          \n"  // |ff|b1|g1|r1|
+    "precrq.ph.w       $t3, $t1, $t8          \n"  // |ff|B1|G1|R1|
+    "sll               $t9, $t9, 16           \n"
+    "sll               $t8, $t8, 16           \n"
+    "packrl.ph         $t2, $t2, $t9          \n"  // |ff|b0|g0|r0|
+    "packrl.ph         $t1, $t1, $t8          \n"  // |ff|B0|G0|R0|
+// Store results.
+    "sw                $t2, 0(%[rgb_buf])     \n"
+    "sw                $t0, 4(%[rgb_buf])     \n"
+    "sw                $t1, 8(%[rgb_buf])     \n"
+    "sw                $t3, 12(%[rgb_buf])    \n"
+    "bnez              %[width], 1b           \n"
+    " addiu            %[rgb_buf], 16         \n"
+   "2:                                        \n"
+    ".set pop                                 \n"
+      :[y_buf] "+r" (y_buf),
+       [u_buf] "+r" (u_buf),
+       [v_buf] "+r" (v_buf),
+       [width] "+r" (width),
+       [rgb_buf] "+r" (rgb_buf)
+      :
+      : "t0", "t1",  "t2", "t3",  "t4", "t5",
+      "t6", "t7", "t8", "t9",
+      "s0", "s1", "s2", "s3",
+      "s4", "s5", "s6"
+  );
+}
+
+void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width) {
+  __asm__ __volatile__ (
+    ".set push                                \n"
+    ".set noreorder                           \n"
+    "beqz              %[width], 2f           \n"
+    " repl.ph          $s0, 74                \n"  // |YG|YG| = |74 |74 |
+    "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25|
+    "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52|
+    "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102|
+    "repl.ph           $s4, 16                \n"  // |0|16|0|16|
+    "repl.ph           $s5, 128               \n"  // |128|128|
+    "lui               $s6, 0xff              \n"
+    "ori               $s6, 0xff              \n"  // |00|ff|00|ff|
+
+    ".p2align          2                      \n"
+   "1:                                        \n"
+      I422ToTransientMipsRGB
+      // Arranging into bgra format
+    "precr.qb.ph       $t4, $t4, $t8          \n"  // |B1|b1|G1|g1|
+    "precr.qb.ph       $t5, $t5, $t9          \n"  // |B0|b0|G0|g0|
+    "precrq.qb.ph      $t8, $t4, $t5          \n"  // |B1|G1|B0|G0|
+    "precr.qb.ph       $t9, $t4, $t5          \n"  // |b1|g1|b0|g0|
+
+    "precr.qb.ph       $t2, $t1, $t2          \n"  // |R1|r1|R0|r0|
+    "addiu             %[width], -4           \n"
+    "addiu             %[y_buf], 4            \n"
+    "preceu.ph.qbla    $t1, $t2               \n"  // |0 |R1|0 |R0|
+    "preceu.ph.qbra    $t2, $t2               \n"  // |0 |r1|0 |r0|
+    "sll               $t1, $t1, 8            \n"  // |R1|0 |R0|0 |
+    "sll               $t2, $t2, 8            \n"  // |r1|0 |r0|0 |
+    "or                $t1, $t1, $s6          \n"  // |R1|ff|R0|ff|
+    "or                $t2, $t2, $s6          \n"  // |r1|ff|r0|ff|
+    "precrq.ph.w       $t0, $t9, $t2          \n"  // |b1|g1|r1|ff|
+    "precrq.ph.w       $t3, $t8, $t1          \n"  // |B1|G1|R1|ff|
+    "sll               $t1, $t1, 16           \n"
+    "sll               $t2, $t2, 16           \n"
+    "packrl.ph         $t2, $t9, $t2          \n"  // |b0|g0|r0|ff|
+    "packrl.ph         $t1, $t8, $t1          \n"  // |B0|G0|R0|ff|
+// Store results.
+    "sw                $t2, 0(%[rgb_buf])     \n"
+    "sw                $t0, 4(%[rgb_buf])     \n"
+    "sw                $t1, 8(%[rgb_buf])     \n"
+    "sw                $t3, 12(%[rgb_buf])    \n"
+    "bnez              %[width], 1b           \n"
+    " addiu            %[rgb_buf], 16         \n"
+   "2:                                        \n"
+    ".set pop                                 \n"
+      :[y_buf] "+r" (y_buf),
+       [u_buf] "+r" (u_buf),
+       [v_buf] "+r" (v_buf),
+       [width] "+r" (width),
+       [rgb_buf] "+r" (rgb_buf)
+      :
+      : "t0", "t1",  "t2", "t3",  "t4", "t5",
+      "t6", "t7", "t8", "t9",
+      "s0", "s1", "s2", "s3",
+      "s4", "s5", "s6"
+  );
+}
+
+// Bilinear filter 8x2 -> 8x1
+void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
+                                ptrdiff_t src_stride, int dst_width,
+                                int source_y_fraction) {
+    int y0_fraction = 256 - source_y_fraction;
+    const uint8* src_ptr1 = src_ptr + src_stride;
+
+  __asm__ __volatile__ (
+     ".set push                                           \n"
+     ".set noreorder                                      \n"
+
+     "replv.ph          $t0, %[y0_fraction]               \n"
+     "replv.ph          $t1, %[source_y_fraction]         \n"
+
+    ".p2align           2                                 \n"
+   "1:                                                    \n"
+     "lw                $t2, 0(%[src_ptr])                \n"
+     "lw                $t3, 0(%[src_ptr1])               \n"
+     "lw                $t4, 4(%[src_ptr])                \n"
+     "lw                $t5, 4(%[src_ptr1])               \n"
+     "muleu_s.ph.qbl    $t6, $t2, $t0                     \n"
+     "muleu_s.ph.qbr    $t7, $t2, $t0                     \n"
+     "muleu_s.ph.qbl    $t8, $t3, $t1                     \n"
+     "muleu_s.ph.qbr    $t9, $t3, $t1                     \n"
+     "muleu_s.ph.qbl    $t2, $t4, $t0                     \n"
+     "muleu_s.ph.qbr    $t3, $t4, $t0                     \n"
+     "muleu_s.ph.qbl    $t4, $t5, $t1                     \n"
+     "muleu_s.ph.qbr    $t5, $t5, $t1                     \n"
+     "addq.ph           $t6, $t6, $t8                     \n"
+     "addq.ph           $t7, $t7, $t9                     \n"
+     "addq.ph           $t2, $t2, $t4                     \n"
+     "addq.ph           $t3, $t3, $t5                     \n"
+     "shra.ph           $t6, $t6, 8                       \n"
+     "shra.ph           $t7, $t7, 8                       \n"
+     "shra.ph           $t2, $t2, 8                       \n"
+     "shra.ph           $t3, $t3, 8                       \n"
+     "precr.qb.ph       $t6, $t6, $t7                     \n"
+     "precr.qb.ph       $t2, $t2, $t3                     \n"
+     "addiu             %[src_ptr], %[src_ptr], 8         \n"
+     "addiu             %[src_ptr1], %[src_ptr1], 8       \n"
+     "addiu             %[dst_width], %[dst_width], -8    \n"
+     "sw                $t6, 0(%[dst_ptr])                \n"
+     "sw                $t2, 4(%[dst_ptr])                \n"
+     "bgtz              %[dst_width], 1b                  \n"
+     " addiu            %[dst_ptr], %[dst_ptr], 8         \n"
+
+     ".set pop                                            \n"
+  : [dst_ptr] "+r" (dst_ptr),
+    [src_ptr1] "+r" (src_ptr1),
+    [src_ptr] "+r" (src_ptr),
+    [dst_width] "+r" (dst_width)
+  : [source_y_fraction] "r" (source_y_fraction),
+    [y0_fraction] "r" (y0_fraction),
+    [src_stride] "r" (src_stride)
+  : "t0", "t1", "t2", "t3", "t4", "t5",
+    "t6", "t7", "t8", "t9"
+  );
+}
+#endif  // __mips_dsp_rev >= 2
+
+#endif  // defined(__mips__)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/row_neon.cc b/src/main/jni/libyuv/source/row_neon.cc
new file mode 100644
index 000000000..1392cf5fc
--- /dev/null
+++ b/src/main/jni/libyuv/source/row_neon.cc
@@ -0,0 +1,3148 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+
+// Read 8 Y, 4 U and 4 V from 422
+#define READYUV422                                                             \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.32    {d2[0]}, [%1]!                 \n"                             \
+    MEMACCESS(2)                                                               \
+    "vld1.32    {d2[1]}, [%2]!                 \n"
+
+// Read 8 Y, 2 U and 2 V from 422
+#define READYUV411                                                             \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.16    {d2[0]}, [%1]!                 \n"                             \
+    MEMACCESS(2)                                                               \
+    "vld1.16    {d2[1]}, [%2]!                 \n"                             \
+    "vmov.u8    d3, d2                         \n"                             \
+    "vzip.u8    d2, d3                         \n"
+
+// Read 8 Y, 8 U and 8 V from 444
+#define READYUV444                                                             \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.8     {d2}, [%1]!                    \n"                             \
+    MEMACCESS(2)                                                               \
+    "vld1.8     {d3}, [%2]!                    \n"                             \
+    "vpaddl.u8  q1, q1                         \n"                             \
+    "vrshrn.u16 d2, q1, #1                     \n"
+
+// Read 8 Y, and set 4 U and 4 V to 128
+#define READYUV400                                                             \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    "vmov.u8    d2, #128                       \n"
+
+// Read 8 Y and 4 UV from NV12
+#define READNV12                                                               \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.8     {d2}, [%1]!                    \n"                             \
+    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
+    "vuzp.u8    d2, d3                         \n"                             \
+    "vtrn.u32   d2, d3                         \n"
+
+// Read 8 Y and 4 VU from NV21
+#define READNV21                                                               \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.8     {d2}, [%1]!                    \n"                             \
+    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
+    "vuzp.u8    d3, d2                         \n"                             \
+    "vtrn.u32   d2, d3                         \n"
+
+// Read 8 YUY2
+#define READYUY2                                                               \
+    MEMACCESS(0)                                                               \
+    "vld2.8     {d0, d2}, [%0]!                \n"                             \
+    "vmov.u8    d3, d2                         \n"                             \
+    "vuzp.u8    d2, d3                         \n"                             \
+    "vtrn.u32   d2, d3                         \n"
+
+// Read 8 UYVY
+#define READUYVY                                                               \
+    MEMACCESS(0)                                                               \
+    "vld2.8     {d2, d3}, [%0]!                \n"                             \
+    "vmov.u8    d0, d3                         \n"                             \
+    "vmov.u8    d3, d2                         \n"                             \
+    "vuzp.u8    d2, d3                         \n"                             \
+    "vtrn.u32   d2, d3                         \n"
+
+#define YUV422TORGB                                                            \
+    "veor.u8    d2, d26                        \n"/*subtract 128 from u and v*/\
+    "vmull.s8   q8, d2, d24                    \n"/*  u/v B/R component      */\
+    "vmull.s8   q9, d2, d25                    \n"/*  u/v G component        */\
+    "vmov.u8    d1, #0                         \n"/*  split odd/even y apart */\
+    "vtrn.u8    d0, d1                         \n"                             \
+    "vsub.s16   q0, q0, q15                    \n"/*  offset y               */\
+    "vmul.s16   q0, q0, q14                    \n"                             \
+    "vadd.s16   d18, d19                       \n"                             \
+    "vqadd.s16  d20, d0, d16                   \n" /* B */                     \
+    "vqadd.s16  d21, d1, d16                   \n"                             \
+    "vqadd.s16  d22, d0, d17                   \n" /* R */                     \
+    "vqadd.s16  d23, d1, d17                   \n"                             \
+    "vqadd.s16  d16, d0, d18                   \n" /* G */                     \
+    "vqadd.s16  d17, d1, d18                   \n"                             \
+    "vqshrun.s16 d0, q10, #6                   \n" /* B */                     \
+    "vqshrun.s16 d1, q11, #6                   \n" /* G */                     \
+    "vqshrun.s16 d2, q8, #6                    \n" /* R */                     \
+    "vmovl.u8   q10, d0                        \n"/*  set up for reinterleave*/\
+    "vmovl.u8   q11, d1                        \n"                             \
+    "vmovl.u8   q8, d2                         \n"                             \
+    "vtrn.u8    d20, d21                       \n"                             \
+    "vtrn.u8    d22, d23                       \n"                             \
+    "vtrn.u8    d16, d17                       \n"                             \
+    "vmov.u8    d21, d16                       \n"
+
+static vec8 kUVToRB  = { 127, 127, 127, 127, 102, 102, 102, 102,
+                         0, 0, 0, 0, 0, 0, 0, 0 };
+static vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
+                       0, 0, 0, 0, 0, 0, 0, 0 };
+
+void I444ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV444
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_argb),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void I422ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_argb),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void I411ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV411
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_argb),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void I422ToBGRARow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_bgra,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vswp.u8    d20, d22                       \n"
+    "vmov.u8    d19, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_bgra),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void I422ToABGRRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_abgr,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vswp.u8    d20, d22                       \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_abgr),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void I422ToRGBARow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_rgba,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d19, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_rgba),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void I422ToRGB24Row_NEON(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_rgb24,
+                         int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    MEMACCESS(3)
+    "vst3.8     {d20, d21, d22}, [%3]!         \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),      // %0
+      "+r"(src_u),      // %1
+      "+r"(src_v),      // %2
+      "+r"(dst_rgb24),  // %3
+      "+r"(width)       // %4
+    : "r"(&kUVToRB),    // %5
+      "r"(&kUVToG)      // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void I422ToRAWRow_NEON(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
+                       uint8* dst_raw,
+                       int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vswp.u8    d20, d22                       \n"
+    MEMACCESS(3)
+    "vst3.8     {d20, d21, d22}, [%3]!         \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),    // %0
+      "+r"(src_u),    // %1
+      "+r"(src_v),    // %2
+      "+r"(dst_raw),  // %3
+      "+r"(width)     // %4
+    : "r"(&kUVToRB),  // %5
+      "r"(&kUVToG)    // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+#define ARGBTORGB565                                                           \
+    "vshr.u8    d20, d20, #3                   \n"  /* B                    */ \
+    "vshr.u8    d21, d21, #2                   \n"  /* G                    */ \
+    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
+    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
+    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
+    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
+    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
+    "vshl.u16   q10, q10, #11                  \n"  /* R                    */ \
+    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
+    "vorr       q0, q0, q10                    \n"  /* BGR                  */
+
+void I422ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_u,
+                          const uint8* src_v,
+                          uint8* dst_rgb565,
+                          int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    ARGBTORGB565
+    MEMACCESS(3)
+    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels RGB565.
+    "bgt        1b                             \n"
+    : "+r"(src_y),    // %0
+      "+r"(src_u),    // %1
+      "+r"(src_v),    // %2
+      "+r"(dst_rgb565),  // %3
+      "+r"(width)     // %4
+    : "r"(&kUVToRB),  // %5
+      "r"(&kUVToG)    // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+#define ARGBTOARGB1555                                                         \
+    "vshr.u8    q10, q10, #3                   \n"  /* B                    */ \
+    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
+    "vshr.u8    d23, d23, #7                   \n"  /* A                    */ \
+    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
+    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
+    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
+    "vmovl.u8   q11, d23                       \n"  /* A                    */ \
+    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
+    "vshl.u16   q10, q10, #10                  \n"  /* R                    */ \
+    "vshl.u16   q11, q11, #15                  \n"  /* A                    */ \
+    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
+    "vorr       q1, q10, q11                   \n"  /* RA                   */ \
+    "vorr       q0, q0, q1                     \n"  /* BGRA                 */
+
+void I422ToARGB1555Row_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb1555,
+                            int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    ARGBTOARGB1555
+    MEMACCESS(3)
+    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB1555.
+    "bgt        1b                             \n"
+    : "+r"(src_y),    // %0
+      "+r"(src_u),    // %1
+      "+r"(src_v),    // %2
+      "+r"(dst_argb1555),  // %3
+      "+r"(width)     // %4
+    : "r"(&kUVToRB),  // %5
+      "r"(&kUVToG)    // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+#define ARGBTOARGB4444                                                         \
+    "vshr.u8    d20, d20, #4                   \n"  /* B                    */ \
+    "vbic.32    d21, d21, d4                   \n"  /* G                    */ \
+    "vshr.u8    d22, d22, #4                   \n"  /* R                    */ \
+    "vbic.32    d23, d23, d4                   \n"  /* A                    */ \
+    "vorr       d0, d20, d21                   \n"  /* BG                   */ \
+    "vorr       d1, d22, d23                   \n"  /* RA                   */ \
+    "vzip.u8    d0, d1                         \n"  /* BGRA                 */
+
+void I422ToARGB4444Row_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb4444,
+                            int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    ARGBTOARGB4444
+    MEMACCESS(3)
+    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB4444.
+    "bgt        1b                             \n"
+    : "+r"(src_y),    // %0
+      "+r"(src_u),    // %1
+      "+r"(src_v),    // %2
+      "+r"(dst_argb4444),  // %3
+      "+r"(width)     // %4
+    : "r"(&kUVToRB),  // %5
+      "r"(&kUVToG)    // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void YToARGBRow_NEON(const uint8* src_y,
+                     uint8* dst_argb,
+                     int width) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {d24}, [%3]                    \n"
+    MEMACCESS(4)
+    "vld1.8     {d25}, [%4]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV400
+    YUV422TORGB
+    "subs       %2, %2, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(dst_argb),  // %1
+      "+r"(width)      // %2
+    : "r"(&kUVToRB),   // %3
+      "r"(&kUVToG)     // %4
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void I400ToARGBRow_NEON(const uint8* src_y,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+    "vmov.u8    d23, #255                      \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d20}, [%0]!                   \n"
+    "vmov       d21, d20                       \n"
+    "vmov       d22, d20                       \n"
+    "subs       %2, %2, #8                     \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(dst_argb),  // %1
+      "+r"(width)      // %2
+    :
+    : "cc", "memory", "d20", "d21", "d22", "d23"
+  );
+}
+
+void NV12ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_uv,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.8     {d24}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {d25}, [%5]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READNV12
+    YUV422TORGB
+    "subs       %3, %3, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(2)
+    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_uv),    // %1
+      "+r"(dst_argb),  // %2
+      "+r"(width)      // %3
+    : "r"(&kUVToRB),   // %4
+      "r"(&kUVToG)     // %5
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void NV21ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_uv,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.8     {d24}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {d25}, [%5]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READNV21
+    YUV422TORGB
+    "subs       %3, %3, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(2)
+    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_uv),    // %1
+      "+r"(dst_argb),  // %2
+      "+r"(width)      // %3
+    : "r"(&kUVToRB),   // %4
+      "r"(&kUVToG)     // %5
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void NV12ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
+                          int width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.8     {d24}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {d25}, [%5]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READNV12
+    YUV422TORGB
+    "subs       %3, %3, #8                     \n"
+    ARGBTORGB565
+    MEMACCESS(2)
+    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_uv),    // %1
+      "+r"(dst_rgb565),  // %2
+      "+r"(width)      // %3
+    : "r"(&kUVToRB),   // %4
+      "r"(&kUVToG)     // %5
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void NV21ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
+                          int width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.8     {d24}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {d25}, [%5]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READNV21
+    YUV422TORGB
+    "subs       %3, %3, #8                     \n"
+    ARGBTORGB565
+    MEMACCESS(2)
+    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_uv),    // %1
+      "+r"(dst_rgb565),  // %2
+      "+r"(width)      // %3
+    : "r"(&kUVToRB),   // %4
+      "r"(&kUVToG)     // %5
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {d24}, [%3]                    \n"
+    MEMACCESS(4)
+    "vld1.8     {d25}, [%4]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUY2
+    YUV422TORGB
+    "subs       %2, %2, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_yuy2),  // %0
+      "+r"(dst_argb),  // %1
+      "+r"(width)      // %2
+    : "r"(&kUVToRB),   // %3
+      "r"(&kUVToG)     // %4
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void UYVYToARGBRow_NEON(const uint8* src_uyvy,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {d24}, [%3]                    \n"
+    MEMACCESS(4)
+    "vld1.8     {d25}, [%4]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READUYVY
+    YUV422TORGB
+    "subs       %2, %2, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_uyvy),  // %0
+      "+r"(dst_argb),  // %1
+      "+r"(width)      // %2
+    : "r"(&kUVToRB),   // %3
+      "r"(&kUVToG)     // %4
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
+void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                     int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pairs of UV
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"  // store U
+    MEMACCESS(2)
+    "vst1.8     {q1}, [%2]!                    \n"  // store V
+    "bgt        1b                             \n"
+    : "+r"(src_uv),  // %0
+      "+r"(dst_u),   // %1
+      "+r"(dst_v),   // %2
+      "+r"(width)    // %3  // Output registers
+    :                       // Input registers
+    : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
+// Reads 16 U's and V's and writes out 16 pairs of UV.
+void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                     int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load U
+    MEMACCESS(1)
+    "vld1.8     {q1}, [%1]!                    \n"  // load V
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop
+    MEMACCESS(2)
+    "vst2.u8    {q0, q1}, [%2]!                \n"  // store 16 pairs of UV
+    "bgt        1b                             \n"
+    :
+      "+r"(src_u),   // %0
+      "+r"(src_v),   // %1
+      "+r"(dst_uv),  // %2
+      "+r"(width)    // %3  // Output registers
+    :                       // Input registers
+    : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
+// Copy multiple of 32.  vld4.8  allow unaligned and is fastest on a15.
+void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 32
+    "subs       %2, %2, #32                    \n"  // 32 processed per loop
+    MEMACCESS(1)
+    "vst1.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 32
+    "bgt        1b                             \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(count)  // %2  // Output registers
+  :                     // Input registers
+  : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
+// SetRow8 writes 'count' bytes using a 32 bit value repeated.
+void SetRow_NEON(uint8* dst, uint32 v32, int count) {
+  asm volatile (
+    "vdup.u32  q0, %2                          \n"  // duplicate 4 ints
+    "1:                                        \n"
+    "subs      %1, %1, #16                     \n"  // 16 bytes per loop
+    MEMACCESS(0)
+    "vst1.8    {q0}, [%0]!                     \n"  // store
+    "bgt       1b                              \n"
+  : "+r"(dst),   // %0
+    "+r"(count)  // %1
+  : "r"(v32)     // %2
+  : "cc", "memory", "q0"
+  );
+}
+
+// TODO(fbarchard): Make fully assembler
+// SetRow32 writes 'count' words using a 32 bit value repeated.
+void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
+                      int dst_stride, int height) {
+  for (int y = 0; y < height; ++y) {
+    SetRow_NEON(dst, v32, width << 2);
+    dst += dst_stride;
+  }
+}
+
+void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+  asm volatile (
+    // Start at end of source row.
+    "mov        r3, #-16                       \n"
+    "add        %0, %0, %2                     \n"
+    "sub        %0, #16                        \n"
+
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
+    "subs       %2, #16                        \n"  // 16 pixels per loop.
+    "vrev64.8   q0, q0                         \n"
+    MEMACCESS(1)
+    "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(width)  // %2
+  :
+  : "cc", "memory", "r3", "q0"
+  );
+}
+
+void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                      int width) {
+  asm volatile (
+    // Start at end of source row.
+    "mov        r12, #-16                      \n"
+    "add        %0, %0, %3, lsl #1             \n"
+    "sub        %0, #16                        \n"
+
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld2.8     {d0, d1}, [%0], r12            \n"  // src -= 16
+    "subs       %3, #8                         \n"  // 8 pixels per loop.
+    "vrev64.8   q0, q0                         \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // dst += 8
+    MEMACCESS(2)
+    "vst1.8     {d1}, [%2]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src_uv),  // %0
+    "+r"(dst_u),   // %1
+    "+r"(dst_v),   // %2
+    "+r"(width)    // %3
+  :
+  : "cc", "memory", "r12", "q0"
+  );
+}
+
+void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+  asm volatile (
+    // Start at end of source row.
+    "mov        r3, #-16                       \n"
+    "add        %0, %0, %2, lsl #2             \n"
+    "sub        %0, #16                        \n"
+
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
+    "subs       %2, #4                         \n"  // 4 pixels per loop.
+    "vrev64.32  q0, q0                         \n"
+    MEMACCESS(1)
+    "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(width)  // %2
+  :
+  : "cc", "memory", "r3", "q0"
+  );
+}
+
+void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #255                       \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld3.8     {d1, d2, d3}, [%0]!            \n"  // load 8 pixels of RGB24.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    MEMACCESS(1)
+    "vst4.8     {d1, d2, d3, d4}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb24),  // %0
+    "+r"(dst_argb),   // %1
+    "+r"(pix)         // %2
+  :
+  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
+  );
+}
+
+void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #255                       \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld3.8     {d1, d2, d3}, [%0]!            \n"  // load 8 pixels of RAW.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vswp.u8    d1, d3                         \n"  // swap R, B
+    MEMACCESS(1)
+    "vst4.8     {d1, d2, d3, d4}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_raw),   // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
+  );
+}
+
+#define RGB565TOARGB                                                           \
+    "vshrn.u16  d6, q0, #5                     \n"  /* G xxGGGGGG           */ \
+    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB RRRRRxxx */ \
+    "vshl.u8    d6, d6, #2                     \n"  /* G GGGGGG00 upper 6   */ \
+    "vshr.u8    d1, d1, #3                     \n"  /* R 000RRRRR lower 5   */ \
+    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
+    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
+    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
+    "vshr.u8    d4, d6, #6                     \n"  /* G 000000GG lower 2   */ \
+    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
+    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
+
+void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    RGB565TOARGB
+    MEMACCESS(1)
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb565),  // %0
+    "+r"(dst_argb),    // %1
+    "+r"(pix)          // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
+  );
+}
+
+#define ARGB1555TOARGB                                                         \
+    "vshrn.u16  d7, q0, #8                     \n"  /* A Arrrrrxx           */ \
+    "vshr.u8    d6, d7, #2                     \n"  /* R xxxRRRRR           */ \
+    "vshrn.u16  d5, q0, #5                     \n"  /* G xxxGGGGG           */ \
+    "vmovn.u16  d4, q0                         \n"  /* B xxxBBBBB           */ \
+    "vshr.u8    d7, d7, #7                     \n"  /* A 0000000A           */ \
+    "vneg.s8    d7, d7                         \n"  /* A AAAAAAAA upper 8   */ \
+    "vshl.u8    d6, d6, #3                     \n"  /* R RRRRR000 upper 5   */ \
+    "vshr.u8    q1, q3, #5                     \n"  /* R,A 00000RRR lower 3 */ \
+    "vshl.u8    q0, q2, #3                     \n"  /* B,G BBBBB000 upper 5 */ \
+    "vshr.u8    q2, q0, #5                     \n"  /* B,G 00000BBB lower 3 */ \
+    "vorr.u8    q1, q1, q3                     \n"  /* R,A                  */ \
+    "vorr.u8    q0, q0, q2                     \n"  /* B,G                  */ \
+
+// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
+#define RGB555TOARGB                                                           \
+    "vshrn.u16  d6, q0, #5                     \n"  /* G xxxGGGGG           */ \
+    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB xRRRRRxx */ \
+    "vshl.u8    d6, d6, #3                     \n"  /* G GGGGG000 upper 5   */ \
+    "vshr.u8    d1, d1, #2                     \n"  /* R 00xRRRRR lower 5   */ \
+    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
+    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
+    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
+    "vshr.u8    d4, d6, #5                     \n"  /* G 00000GGG lower 3   */ \
+    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
+    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
+
+void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
+                            int pix) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGB1555TOARGB
+    MEMACCESS(1)
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_argb1555),  // %0
+    "+r"(dst_argb),    // %1
+    "+r"(pix)          // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
+  );
+}
+
+#define ARGB4444TOARGB                                                         \
+    "vuzp.u8    d0, d1                         \n"  /* d0 BG, d1 RA         */ \
+    "vshl.u8    q2, q0, #4                     \n"  /* B,R BBBB0000         */ \
+    "vshr.u8    q1, q0, #4                     \n"  /* G,A 0000GGGG         */ \
+    "vshr.u8    q0, q2, #4                     \n"  /* B,R 0000BBBB         */ \
+    "vorr.u8    q0, q0, q2                     \n"  /* B,R BBBBBBBB         */ \
+    "vshl.u8    q2, q1, #4                     \n"  /* G,A GGGG0000         */ \
+    "vorr.u8    q1, q1, q2                     \n"  /* G,A GGGGGGGG         */ \
+    "vswp.u8    d1, d2                         \n"  /* B,R,G,A -> B,G,R,A   */
+
+void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
+                            int pix) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGB4444TOARGB
+    MEMACCESS(1)
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_argb4444),  // %0
+    "+r"(dst_argb),    // %1
+    "+r"(pix)          // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
+  );
+}
+
+void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d1, d2, d3, d4}, [%0]!        \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    MEMACCESS(1)
+    "vst3.8     {d1, d2, d3}, [%1]!            \n"  // store 8 pixels of RGB24.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_rgb24),  // %1
+    "+r"(pix)         // %2
+  :
+  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
+  );
+}
+
+void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d1, d2, d3, d4}, [%0]!        \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vswp.u8    d1, d3                         \n"  // swap R, B
+    MEMACCESS(1)
+    "vst3.8     {d1, d2, d3}, [%1]!            \n"  // store 8 pixels of RAW.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_raw),   // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
+  );
+}
+
+void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pixels of YUY2.
+    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"  // store 16 pixels of Y.
+    "bgt        1b                             \n"
+  : "+r"(src_yuy2),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
+void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pixels of UYVY.
+    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
+    MEMACCESS(1)
+    "vst1.8     {q1}, [%1]!                    \n"  // store 16 pixels of Y.
+    "bgt        1b                             \n"
+  : "+r"(src_uyvy),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
+void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of YUY2.
+    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
+    MEMACCESS(1)
+    "vst1.8     {d1}, [%1]!                    \n"  // store 8 U.
+    MEMACCESS(2)
+    "vst1.8     {d3}, [%2]!                    \n"  // store 8 V.
+    "bgt        1b                             \n"
+  : "+r"(src_yuy2),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3"  // Clobber List
+  );
+}
+
+void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of UYVY.
+    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 U.
+    MEMACCESS(2)
+    "vst1.8     {d2}, [%2]!                    \n"  // store 8 V.
+    "bgt        1b                             \n"
+  : "+r"(src_uyvy),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3"  // Clobber List
+  );
+}
+
+void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // stride + src_yuy2
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of YUY2.
+    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
+    MEMACCESS(1)
+    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load next row YUY2.
+    "vrhadd.u8  d1, d1, d5                     \n"  // average rows of U
+    "vrhadd.u8  d3, d3, d7                     \n"  // average rows of V
+    MEMACCESS(2)
+    "vst1.8     {d1}, [%2]!                    \n"  // store 8 U.
+    MEMACCESS(3)
+    "vst1.8     {d3}, [%3]!                    \n"  // store 8 V.
+    "bgt        1b                             \n"
+  : "+r"(src_yuy2),     // %0
+    "+r"(stride_yuy2),  // %1
+    "+r"(dst_u),        // %2
+    "+r"(dst_v),        // %3
+    "+r"(pix)           // %4
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"  // Clobber List
+  );
+}
+
+void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // stride + src_uyvy
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of UYVY.
+    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
+    MEMACCESS(1)
+    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load next row UYVY.
+    "vrhadd.u8  d0, d0, d4                     \n"  // average rows of U
+    "vrhadd.u8  d2, d2, d6                     \n"  // average rows of V
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 U.
+    MEMACCESS(3)
+    "vst1.8     {d2}, [%3]!                    \n"  // store 8 V.
+    "bgt        1b                             \n"
+  : "+r"(src_uyvy),     // %0
+    "+r"(stride_uyvy),  // %1
+    "+r"(dst_u),        // %2
+    "+r"(dst_v),        // %3
+    "+r"(pix)           // %4
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"  // Clobber List
+  );
+}
+
+void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix) {
+  asm volatile (
+    // change the stride to row 2 pointer
+    "add        %1, %0                         \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load row 1 16 pixels.
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop
+    MEMACCESS(1)
+    "vld1.8     {q1}, [%1]!                    \n"  // load row 2 16 pixels.
+    "vrhadd.u8  q0, q1                         \n"  // average row 1 and 2
+    MEMACCESS(2)
+    "vst1.8     {q0}, [%2]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src_uv),         // %0
+    "+r"(src_uv_stride),  // %1
+    "+r"(dst_uv),         // %2
+    "+r"(pix)             // %3
+  :
+  : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
+// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
+void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
+                         uint32 selector, int pix) {
+  asm volatile (
+    "vmov.u32   d6[0], %3                      \n"  // selector
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0, q1}, [%0]!                \n"  // load row 8 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop
+    "vtbl.8     d4, {d0, d1}, d6               \n"  // look up 4 pixels
+    "vtbl.8     d5, {d2, d3}, d6               \n"  // look up 4 pixels
+    "vtrn.u32   d4, d5                         \n"  // combine 8 pixels
+    MEMACCESS(1)
+    "vst1.8     {d4}, [%1]!                    \n"  // store 8.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_bayer),  // %1
+    "+r"(pix)         // %2
+  : "r"(selector)     // %3
+  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
+  );
+}
+
+// Select G channels from ARGB.  e.g.  GGGGGGGG
+void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
+                           uint32 /*selector*/, int pix) {
+  asm volatile (
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load row 8 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop
+    MEMACCESS(1)
+    "vst1.8     {d1}, [%1]!                    \n"  // store 8 G's.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_bayer),  // %1
+    "+r"(pix)         // %2
+  :
+  : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {q2}, [%3]                     \n"  // shuffler
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 4 pixels.
+    "subs       %2, %2, #4                     \n"  // 4 processed per loop
+    "vtbl.8     d2, {d0, d1}, d4               \n"  // look up 2 first pixels
+    "vtbl.8     d3, {d0, d1}, d5               \n"  // look up 2 next pixels
+    MEMACCESS(1)
+    "vst1.8     {q1}, [%1]!                    \n"  // store 4.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  : "r"(shuffler)    // %3
+  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
+  );
+}
+
+void I422ToYUY2Row_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_yuy2, int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld2.8     {d0, d2}, [%0]!                \n"  // load 16 Ys
+    MEMACCESS(1)
+    "vld1.8     {d1}, [%1]!                    \n"  // load 8 Us
+    MEMACCESS(2)
+    "vld1.8     {d3}, [%2]!                    \n"  // load 8 Vs
+    "subs       %4, %4, #16                    \n"  // 16 pixels
+    MEMACCESS(3)
+    "vst4.8     {d0, d1, d2, d3}, [%3]!        \n"  // Store 8 YUY2/16 pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_y),     // %0
+    "+r"(src_u),     // %1
+    "+r"(src_v),     // %2
+    "+r"(dst_yuy2),  // %3
+    "+r"(width)      // %4
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3"
+  );
+}
+
+void I422ToUYVYRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_uyvy, int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld2.8     {d1, d3}, [%0]!                \n"  // load 16 Ys
+    MEMACCESS(1)
+    "vld1.8     {d0}, [%1]!                    \n"  // load 8 Us
+    MEMACCESS(2)
+    "vld1.8     {d2}, [%2]!                    \n"  // load 8 Vs
+    "subs       %4, %4, #16                    \n"  // 16 pixels
+    MEMACCESS(3)
+    "vst4.8     {d0, d1, d2, d3}, [%3]!        \n"  // Store 8 UYVY/16 pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_y),     // %0
+    "+r"(src_u),     // %1
+    "+r"(src_v),     // %2
+    "+r"(dst_uyvy),  // %3
+    "+r"(width)      // %4
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3"
+  );
+}
+
+void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGBTORGB565
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels RGB565.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_rgb565),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+  );
+}
+
+void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
+                            int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGBTOARGB1555
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB1555.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb1555),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+  );
+}
+
+void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
+                            int pix) {
+  asm volatile (
+    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGBTOARGB4444
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB4444.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),      // %0
+    "+r"(dst_argb4444),  // %1
+    "+r"(pix)            // %2
+  :
+  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+  );
+}
+
+void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
+  );
+}
+
+void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
+    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
+    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit Y
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
+  );
+}
+
+// 8x1 pixels.
+void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    "vmov.u8    d24, #112                      \n"  // UB / VR 0.875 coefficient
+    "vmov.u8    d25, #74                       \n"  // UG -0.5781 coefficient
+    "vmov.u8    d26, #38                       \n"  // UR -0.2969 coefficient
+    "vmov.u8    d27, #18                       \n"  // VB -0.1406 coefficient
+    "vmov.u8    d28, #94                       \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlsl.u8   q2, d1, d25                    \n"  // G
+    "vmlsl.u8   q2, d2, d26                    \n"  // R
+    "vadd.u16   q2, q2, q15                    \n"  // +128 -> unsigned
+
+    "vmull.u8   q3, d2, d24                    \n"  // R
+    "vmlsl.u8   q3, d1, d28                    \n"  // G
+    "vmlsl.u8   q3, d0, d27                    \n"  // B
+    "vadd.u16   q3, q3, q15                    \n"  // +128 -> unsigned
+
+    "vqshrn.u16  d0, q2, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q3, #8                    \n"  // 16 bit to 8 bit V
+
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
+    MEMACCESS(2)
+    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
+  );
+}
+
+// 16x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
+
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
+    "vmul.s16   q8, q0, q10                    \n"  // B
+    "vmls.s16   q8, q1, q11                    \n"  // G
+    "vmls.s16   q8, q2, q12                    \n"  // R
+    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
+
+    "vmul.s16   q9, q2, q10                    \n"  // R
+    "vmls.s16   q9, q1, q14                    \n"  // G
+    "vmls.s16   q9, q0, q13                    \n"  // B
+    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
+
+    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
+
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
+    MEMACCESS(2)
+    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// 32x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 32.
+void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(0)
+    "vld4.8     {d8, d10, d12, d14}, [%0]!     \n"  // load 8 more ARGB pixels.
+    MEMACCESS(0)
+    "vld4.8     {d9, d11, d13, d15}, [%0]!     \n"  // load last 8 ARGB pixels.
+    "vpaddl.u8  q4, q4                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q5, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q6, q6                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vpadd.u16  d0, d0, d1                     \n"  // B 16 shorts -> 8 shorts.
+    "vpadd.u16  d1, d8, d9                     \n"  // B
+    "vpadd.u16  d2, d2, d3                     \n"  // G 16 shorts -> 8 shorts.
+    "vpadd.u16  d3, d10, d11                   \n"  // G
+    "vpadd.u16  d4, d4, d5                     \n"  // R 16 shorts -> 8 shorts.
+    "vpadd.u16  d5, d12, d13                   \n"  // R
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %3, %3, #32                    \n"  // 32 processed per loop.
+    "vmul.s16   q8, q0, q10                    \n"  // B
+    "vmls.s16   q8, q1, q11                    \n"  // G
+    "vmls.s16   q8, q2, q12                    \n"  // R
+    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
+    "vmul.s16   q9, q2, q10                    \n"  // R
+    "vmls.s16   q9, q1, q14                    \n"  // G
+    "vmls.s16   q9, q0, q13                    \n"  // B
+    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
+    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
+    MEMACCESS(2)
+    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+#define RGBTOUV(QB, QG, QR) \
+    "vmul.s16   q8, " #QB ", q10               \n"  /* B                    */ \
+    "vmls.s16   q8, " #QG ", q11               \n"  /* G                    */ \
+    "vmls.s16   q8, " #QR ", q12               \n"  /* R                    */ \
+    "vadd.u16   q8, q8, q15                    \n"  /* +128 -> unsigned     */ \
+    "vmul.s16   q9, " #QR ", q10               \n"  /* R                    */ \
+    "vmls.s16   q9, " #QG ", q14               \n"  /* G                    */ \
+    "vmls.s16   q9, " #QB ", q13               \n"  /* B                    */ \
+    "vadd.u16   q9, q9, q15                    \n"  /* +128 -> unsigned     */ \
+    "vqshrn.u16  d0, q8, #8                    \n"  /* 16 bit to 8 bit U    */ \
+    "vqshrn.u16  d1, q9, #8                    \n"  /* 16 bit to 8 bit V    */
+
+// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
+void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
+    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q0, q1, q2)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(src_stride_argb),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// TODO(fbarchard): Subsample match C code.
+void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #127 / 2                  \n"  // UB / VR 0.500 coefficient
+    "vmov.s16   q11, #84 / 2                   \n"  // UG -0.33126 coefficient
+    "vmov.s16   q12, #43 / 2                   \n"  // UR -0.16874 coefficient
+    "vmov.s16   q13, #20 / 2                   \n"  // VB -0.08131 coefficient
+    "vmov.s16   q14, #107 / 2                  \n"  // VG -0.41869 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
+    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q0, q1, q2)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(src_stride_argb),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_bgra
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 BGRA pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 BGRA pixels.
+    "vpaddl.u8  q3, q3                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more BGRA pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 BGRA pixels.
+    "vpadal.u8  q3, q7                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q6                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q1, q1, #1                     \n"  // 2x average
+    "vrshr.u16  q2, q2, #1                     \n"
+    "vrshr.u16  q3, q3, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q3, q2, q1)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_bgra),  // %0
+    "+r"(src_stride_bgra),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_abgr
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ABGR pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ABGR pixels.
+    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ABGR pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ABGR pixels.
+    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q2, q1, q0)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_abgr),  // %0
+    "+r"(src_stride_abgr),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_rgba
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 RGBA pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 RGBA pixels.
+    "vpaddl.u8  q0, q1                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q2                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q3                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more RGBA pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 RGBA pixels.
+    "vpadal.u8  q0, q5                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q6                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q7                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q0, q1, q2)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_rgba),  // %0
+    "+r"(src_stride_rgba),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
+                       uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_rgb24
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RGB24 pixels.
+    MEMACCESS(0)
+    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RGB24 pixels.
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RGB24 pixels.
+    MEMACCESS(1)
+    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RGB24 pixels.
+    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q0, q1, q2)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb24),  // %0
+    "+r"(src_stride_rgb24),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
+                     uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_raw
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RAW pixels.
+    MEMACCESS(0)
+    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RAW pixels.
+    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RAW pixels.
+    MEMACCESS(1)
+    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RAW pixels.
+    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q2, q1, q0)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_raw),  // %0
+    "+r"(src_stride_raw),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
+                        uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
+    RGB565TOARGB
+    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // next 8 RGB565 pixels.
+    RGB565TOARGB
+    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // load 8 RGB565 pixels.
+    RGB565TOARGB
+    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // next 8 RGB565 pixels.
+    RGB565TOARGB
+    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
+    "vrshr.u16  q5, q5, #1                     \n"
+    "vrshr.u16  q6, q6, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
+    "vmul.s16   q8, q4, q10                    \n"  // B
+    "vmls.s16   q8, q5, q11                    \n"  // G
+    "vmls.s16   q8, q6, q12                    \n"  // R
+    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
+    "vmul.s16   q9, q6, q10                    \n"  // R
+    "vmls.s16   q9, q5, q14                    \n"  // G
+    "vmls.s16   q9, q4, q13                    \n"  // B
+    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
+    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb565),  // %0
+    "+r"(src_stride_rgb565),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
+                        uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
+    RGB555TOARGB
+    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB1555 pixels.
+    RGB555TOARGB
+    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB1555 pixels.
+    RGB555TOARGB
+    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB1555 pixels.
+    RGB555TOARGB
+    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
+    "vrshr.u16  q5, q5, #1                     \n"
+    "vrshr.u16  q6, q6, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
+    "vmul.s16   q8, q4, q10                    \n"  // B
+    "vmls.s16   q8, q5, q11                    \n"  // G
+    "vmls.s16   q8, q6, q12                    \n"  // R
+    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
+    "vmul.s16   q9, q6, q10                    \n"  // R
+    "vmls.s16   q9, q5, q14                    \n"  // G
+    "vmls.s16   q9, q4, q13                    \n"  // B
+    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
+    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb1555),  // %0
+    "+r"(src_stride_argb1555),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
+                          uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
+    ARGB4444TOARGB
+    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB4444 pixels.
+    ARGB4444TOARGB
+    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB4444 pixels.
+    ARGB4444TOARGB
+    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB4444 pixels.
+    ARGB4444TOARGB
+    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
+    "vrshr.u16  q5, q5, #1                     \n"
+    "vrshr.u16  q6, q6, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
+    "vmul.s16   q8, q4, q10                    \n"  // B
+    "vmls.s16   q8, q5, q11                    \n"  // G
+    "vmls.s16   q8, q6, q12                    \n"  // R
+    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
+    "vmul.s16   q9, q6, q10                    \n"  // R
+    "vmls.s16   q9, q5, q14                    \n"  // G
+    "vmls.s16   q9, q4, q13                    \n"  // B
+    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
+    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb4444),  // %0
+    "+r"(src_stride_argb4444),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    RGB565TOARGB
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb565),  // %0
+    "+r"(dst_y),       // %1
+    "+r"(pix)          // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+  );
+}
+
+void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGB1555TOARGB
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb1555),  // %0
+    "+r"(dst_y),         // %1
+    "+r"(pix)            // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+  );
+}
+
+void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGB4444TOARGB
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb4444),  // %0
+    "+r"(dst_y),         // %1
+    "+r"(pix)            // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+  );
+}
+
+void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of BGRA.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d1, d4                     \n"  // R
+    "vmlal.u8   q8, d2, d5                     \n"  // G
+    "vmlal.u8   q8, d3, d6                     \n"  // B
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_bgra),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+
+void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ABGR.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d0, d4                     \n"  // R
+    "vmlal.u8   q8, d1, d5                     \n"  // G
+    "vmlal.u8   q8, d2, d6                     \n"  // B
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_abgr),  // %0
+    "+r"(dst_y),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+
+void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of RGBA.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d1, d4                     \n"  // B
+    "vmlal.u8   q8, d2, d5                     \n"  // G
+    "vmlal.u8   q8, d3, d6                     \n"  // R
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_rgba),  // %0
+    "+r"(dst_y),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+
+void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RGB24.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d0, d4                     \n"  // B
+    "vmlal.u8   q8, d1, d5                     \n"  // G
+    "vmlal.u8   q8, d2, d6                     \n"  // R
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb24),  // %0
+    "+r"(dst_y),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+
+void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
+    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
+    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
+    "vmov.u8    d7, #16                        \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RAW.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q8, d0, d4                     \n"  // B
+    "vmlal.u8   q8, d1, d5                     \n"  // G
+    "vmlal.u8   q8, d2, d6                     \n"  // R
+    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d7                         \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_raw),  // %0
+    "+r"(dst_y),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
+  );
+}
+
+// Bilinear filter 16x2 -> 16x1
+void InterpolateRow_NEON(uint8* dst_ptr,
+                         const uint8* src_ptr, ptrdiff_t src_stride,
+                         int dst_width, int source_y_fraction) {
+  asm volatile (
+    "cmp        %4, #0                         \n"
+    "beq        100f                           \n"
+    "add        %2, %1                         \n"
+    "cmp        %4, #64                        \n"
+    "beq        75f                            \n"
+    "cmp        %4, #128                       \n"
+    "beq        50f                            \n"
+    "cmp        %4, #192                       \n"
+    "beq        25f                            \n"
+
+    "vdup.8     d5, %4                         \n"
+    "rsb        %4, #256                       \n"
+    "vdup.8     d4, %4                         \n"
+    // General purpose row blend.
+  "1:                                          \n"
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"
+    MEMACCESS(2)
+    "vld1.8     {q1}, [%2]!                    \n"
+    "subs       %3, %3, #16                    \n"
+    "vmull.u8   q13, d0, d4                    \n"
+    "vmull.u8   q14, d1, d4                    \n"
+    "vmlal.u8   q13, d2, d5                    \n"
+    "vmlal.u8   q14, d3, d5                    \n"
+    "vrshrn.u16 d0, q13, #8                    \n"
+    "vrshrn.u16 d1, q14, #8                    \n"
+    MEMACCESS(0)
+    "vst1.8     {q0}, [%0]!                    \n"
+    "bgt        1b                             \n"
+    "b          99f                            \n"
+
+    // Blend 25 / 75.
+  "25:                                         \n"
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"
+    MEMACCESS(2)
+    "vld1.8     {q1}, [%2]!                    \n"
+    "subs       %3, %3, #16                    \n"
+    "vrhadd.u8  q0, q1                         \n"
+    "vrhadd.u8  q0, q1                         \n"
+    MEMACCESS(0)
+    "vst1.8     {q0}, [%0]!                    \n"
+    "bgt        25b                            \n"
+    "b          99f                            \n"
+
+    // Blend 50 / 50.
+  "50:                                         \n"
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"
+    MEMACCESS(2)
+    "vld1.8     {q1}, [%2]!                    \n"
+    "subs       %3, %3, #16                    \n"
+    "vrhadd.u8  q0, q1                         \n"
+    MEMACCESS(0)
+    "vst1.8     {q0}, [%0]!                    \n"
+    "bgt        50b                            \n"
+    "b          99f                            \n"
+
+    // Blend 75 / 25.
+  "75:                                         \n"
+    MEMACCESS(1)
+    "vld1.8     {q1}, [%1]!                    \n"
+    MEMACCESS(2)
+    "vld1.8     {q0}, [%2]!                    \n"
+    "subs       %3, %3, #16                    \n"
+    "vrhadd.u8  q0, q1                         \n"
+    "vrhadd.u8  q0, q1                         \n"
+    MEMACCESS(0)
+    "vst1.8     {q0}, [%0]!                    \n"
+    "bgt        75b                            \n"
+    "b          99f                            \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+  "100:                                        \n"
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"
+    "subs       %3, %3, #16                    \n"
+    MEMACCESS(0)
+    "vst1.8     {q0}, [%0]!                    \n"
+    "bgt        100b                           \n"
+
+  "99:                                         \n"
+  : "+r"(dst_ptr),          // %0
+    "+r"(src_ptr),          // %1
+    "+r"(src_stride),       // %2
+    "+r"(dst_width),        // %3
+    "+r"(source_y_fraction) // %4
+  :
+  : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
+  );
+}
+
+// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
+void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
+                       uint8* dst_argb, int width) {
+  asm volatile (
+    "subs       %3, #8                         \n"
+    "blt        89f                            \n"
+    // Blend 8 pixels.
+  "8:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB0.
+    MEMACCESS(1)
+    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 pixels of ARGB1.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q10, d4, d3                    \n"  // db * a
+    "vmull.u8   q11, d5, d3                    \n"  // dg * a
+    "vmull.u8   q12, d6, d3                    \n"  // dr * a
+    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
+    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
+    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
+    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
+    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
+    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
+    "vqadd.u8   d2, d2, d6                     \n"  // + sr
+    "vmov.u8    d3, #255                       \n"  // a = 255
+    MEMACCESS(2)
+    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 pixels of ARGB.
+    "bge        8b                             \n"
+
+  "89:                                         \n"
+    "adds       %3, #8-1                       \n"
+    "blt        99f                            \n"
+
+    // Blend 1 pixels.
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n"  // load 1 pixel ARGB0.
+    MEMACCESS(1)
+    "vld4.8     {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n"  // load 1 pixel ARGB1.
+    "subs       %3, %3, #1                     \n"  // 1 processed per loop.
+    "vmull.u8   q10, d4, d3                    \n"  // db * a
+    "vmull.u8   q11, d5, d3                    \n"  // dg * a
+    "vmull.u8   q12, d6, d3                    \n"  // dr * a
+    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
+    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
+    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
+    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
+    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
+    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
+    "vqadd.u8   d2, d2, d6                     \n"  // + sr
+    "vmov.u8    d3, #255                       \n"  // a = 255
+    MEMACCESS(2)
+    "vst4.8     {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n"  // store 1 pixel.
+    "bge        1b                             \n"
+
+  "99:                                         \n"
+
+  : "+r"(src_argb0),    // %0
+    "+r"(src_argb1),    // %1
+    "+r"(dst_argb),     // %2
+    "+r"(width)         // %3
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"
+  );
+}
+
+// Attenuate 8 pixels at a time.
+void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
+  asm volatile (
+    // Attenuate 8 pixels.
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q10, d0, d3                    \n"  // b * a
+    "vmull.u8   q11, d1, d3                    \n"  // g * a
+    "vmull.u8   q12, d2, d3                    \n"  // r * a
+    "vqrshrn.u16 d0, q10, #8                   \n"  // b >>= 8
+    "vqrshrn.u16 d1, q11, #8                   \n"  // g >>= 8
+    "vqrshrn.u16 d2, q12, #8                   \n"  // r >>= 8
+    MEMACCESS(1)
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_argb),   // %1
+    "+r"(width)       // %2
+  :
+  : "cc", "memory", "q0", "q1", "q10", "q11", "q12"
+  );
+}
+
+// Quantize 8 ARGB pixels (32 bytes).
+// dst = (dst * scale >> 16) * interval_size + interval_offset;
+void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
+                          int interval_offset, int width) {
+  asm volatile (
+    "vdup.u16   q8, %2                         \n"
+    "vshr.u16   q8, q8, #1                     \n"  // scale >>= 1
+    "vdup.u16   q9, %3                         \n"  // interval multiply.
+    "vdup.u16   q10, %4                        \n"  // interval add
+
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]         \n"  // load 8 pixels of ARGB.
+    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
+    "vmovl.u8   q0, d0                         \n"  // b (0 .. 255)
+    "vmovl.u8   q1, d2                         \n"
+    "vmovl.u8   q2, d4                         \n"
+    "vqdmulh.s16 q0, q0, q8                    \n"  // b * scale
+    "vqdmulh.s16 q1, q1, q8                    \n"  // g
+    "vqdmulh.s16 q2, q2, q8                    \n"  // r
+    "vmul.u16   q0, q0, q9                     \n"  // b * interval_size
+    "vmul.u16   q1, q1, q9                     \n"  // g
+    "vmul.u16   q2, q2, q9                     \n"  // r
+    "vadd.u16   q0, q0, q10                    \n"  // b + interval_offset
+    "vadd.u16   q1, q1, q10                    \n"  // g
+    "vadd.u16   q2, q2, q10                    \n"  // r
+    "vqmovn.u16 d0, q0                         \n"
+    "vqmovn.u16 d2, q1                         \n"
+    "vqmovn.u16 d4, q2                         \n"
+    MEMACCESS(0)
+    "vst4.8     {d0, d2, d4, d6}, [%0]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(dst_argb),       // %0
+    "+r"(width)           // %1
+  : "r"(scale),           // %2
+    "r"(interval_size),   // %3
+    "r"(interval_offset)  // %4
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"
+  );
+}
+
+// Shade 8 pixels at a time by specified value.
+// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
+// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
+void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
+                       uint32 value) {
+  asm volatile (
+    "vdup.u32   q0, %3                         \n"  // duplicate scale value.
+    "vzip.u8    d0, d1                         \n"  // d0 aarrggbb.
+    "vshr.u16   q0, q0, #1                     \n"  // scale / 2.
+
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d20, d22, d24, d26}, [%0]!    \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmovl.u8   q10, d20                       \n"  // b (0 .. 255)
+    "vmovl.u8   q11, d22                       \n"
+    "vmovl.u8   q12, d24                       \n"
+    "vmovl.u8   q13, d26                       \n"
+    "vqrdmulh.s16 q10, q10, d0[0]              \n"  // b * scale * 2
+    "vqrdmulh.s16 q11, q11, d0[1]              \n"  // g
+    "vqrdmulh.s16 q12, q12, d0[2]              \n"  // r
+    "vqrdmulh.s16 q13, q13, d0[3]              \n"  // a
+    "vqmovn.u16 d20, q10                       \n"
+    "vqmovn.u16 d22, q11                       \n"
+    "vqmovn.u16 d24, q12                       \n"
+    "vqmovn.u16 d26, q13                       \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d22, d24, d26}, [%1]!    \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),       // %0
+    "+r"(dst_argb),       // %1
+    "+r"(width)           // %2
+  : "r"(value)            // %3
+  : "cc", "memory", "q0", "q10", "q11", "q12", "q13"
+  );
+}
+
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
+// Similar to ARGBToYJ but stores ARGB.
+// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
+void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
+  asm volatile (
+    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
+    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
+    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit B
+    "vmov       d1, d0                         \n"  // G
+    "vmov       d2, d0                         \n"  // R
+    MEMACCESS(1)
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(width)      // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
+  );
+}
+
+// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
+//    b = (r * 35 + g * 68 + b * 17) >> 7
+//    g = (r * 45 + g * 88 + b * 22) >> 7
+//    r = (r * 50 + g * 98 + b * 24) >> 7
+void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
+  asm volatile (
+    "vmov.u8    d20, #17                       \n"  // BB coefficient
+    "vmov.u8    d21, #68                       \n"  // BG coefficient
+    "vmov.u8    d22, #35                       \n"  // BR coefficient
+    "vmov.u8    d24, #22                       \n"  // GB coefficient
+    "vmov.u8    d25, #88                       \n"  // GG coefficient
+    "vmov.u8    d26, #45                       \n"  // GR coefficient
+    "vmov.u8    d28, #24                       \n"  // BB coefficient
+    "vmov.u8    d29, #98                       \n"  // BG coefficient
+    "vmov.u8    d30, #50                       \n"  // BR coefficient
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]         \n"  // load 8 ARGB pixels.
+    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q2, d0, d20                    \n"  // B to Sepia B
+    "vmlal.u8   q2, d1, d21                    \n"  // G
+    "vmlal.u8   q2, d2, d22                    \n"  // R
+    "vmull.u8   q3, d0, d24                    \n"  // B to Sepia G
+    "vmlal.u8   q3, d1, d25                    \n"  // G
+    "vmlal.u8   q3, d2, d26                    \n"  // R
+    "vmull.u8   q8, d0, d28                    \n"  // B to Sepia R
+    "vmlal.u8   q8, d1, d29                    \n"  // G
+    "vmlal.u8   q8, d2, d30                    \n"  // R
+    "vqshrn.u16 d0, q2, #7                     \n"  // 16 bit to 8 bit B
+    "vqshrn.u16 d1, q3, #7                     \n"  // 16 bit to 8 bit G
+    "vqshrn.u16 d2, q8, #7                     \n"  // 16 bit to 8 bit R
+    MEMACCESS(0)
+    "vst4.8     {d0, d1, d2, d3}, [%0]!        \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(dst_argb),  // %0
+    "+r"(width)      // %1
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3",
+    "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// Tranform 8 ARGB pixels (32 bytes) with color matrix.
+// TODO(fbarchard): Was same as Sepia except matrix is provided.  This function
+// needs to saturate.  Consider doing a non-saturating version.
+void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
+                             const int8* matrix_argb, int width) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {q2}, [%3]                     \n"  // load 3 ARGB vectors.
+    "vmovl.s8   q0, d4                         \n"  // B,G coefficients s16.
+    "vmovl.s8   q1, d5                         \n"  // R,A coefficients s16.
+
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d16, d18, d20, d22}, [%0]!    \n"  // load 8 ARGB pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "vmovl.u8   q8, d16                        \n"  // b (0 .. 255) 16 bit
+    "vmovl.u8   q9, d18                        \n"  // g
+    "vmovl.u8   q10, d20                       \n"  // r
+    "vmovl.u8   q15, d22                       \n"  // a
+    "vmul.s16   q12, q8, d0[0]                 \n"  // B = B * Matrix B
+    "vmul.s16   q13, q8, d1[0]                 \n"  // G = B * Matrix G
+    "vmul.s16   q14, q8, d2[0]                 \n"  // R = B * Matrix R
+    "vmul.s16   q15, q8, d3[0]                 \n"  // A = B * Matrix A
+    "vmul.s16   q4, q9, d0[1]                  \n"  // B += G * Matrix B
+    "vmul.s16   q5, q9, d1[1]                  \n"  // G += G * Matrix G
+    "vmul.s16   q6, q9, d2[1]                  \n"  // R += G * Matrix R
+    "vmul.s16   q7, q9, d3[1]                  \n"  // A += G * Matrix A
+    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
+    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
+    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
+    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
+    "vmul.s16   q4, q10, d0[2]                 \n"  // B += R * Matrix B
+    "vmul.s16   q5, q10, d1[2]                 \n"  // G += R * Matrix G
+    "vmul.s16   q6, q10, d2[2]                 \n"  // R += R * Matrix R
+    "vmul.s16   q7, q10, d3[2]                 \n"  // A += R * Matrix A
+    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
+    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
+    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
+    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
+    "vmul.s16   q4, q15, d0[3]                 \n"  // B += A * Matrix B
+    "vmul.s16   q5, q15, d1[3]                 \n"  // G += A * Matrix G
+    "vmul.s16   q6, q15, d2[3]                 \n"  // R += A * Matrix R
+    "vmul.s16   q7, q15, d3[3]                 \n"  // A += A * Matrix A
+    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
+    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
+    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
+    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
+    "vqshrun.s16 d16, q12, #6                  \n"  // 16 bit to 8 bit B
+    "vqshrun.s16 d18, q13, #6                  \n"  // 16 bit to 8 bit G
+    "vqshrun.s16 d20, q14, #6                  \n"  // 16 bit to 8 bit R
+    "vqshrun.s16 d22, q15, #6                  \n"  // 16 bit to 8 bit A
+    MEMACCESS(1)
+    "vst4.8     {d16, d18, d20, d22}, [%1]!    \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_argb),   // %1
+    "+r"(width)       // %2
+  : "r"(matrix_argb)  // %3
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
+    "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+
+// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
+#ifdef HAS_ARGBMULTIPLYROW_NEON
+// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
+void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  asm volatile (
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(1)
+    "vld4.8     {d1, d3, d5, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "vmull.u8   q0, d0, d1                     \n"  // multiply B
+    "vmull.u8   q1, d2, d3                     \n"  // multiply G
+    "vmull.u8   q2, d4, d5                     \n"  // multiply R
+    "vmull.u8   q3, d6, d7                     \n"  // multiply A
+    "vrshrn.u16 d0, q0, #8                     \n"  // 16 bit to 8 bit B
+    "vrshrn.u16 d1, q1, #8                     \n"  // 16 bit to 8 bit G
+    "vrshrn.u16 d2, q2, #8                     \n"  // 16 bit to 8 bit R
+    "vrshrn.u16 d3, q3, #8                     \n"  // 16 bit to 8 bit A
+    MEMACCESS(2)
+    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3"
+  );
+}
+#endif  // HAS_ARGBMULTIPLYROW_NEON
+
+// Add 2 rows of ARGB pixels together, 8 pixels at a time.
+void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
+                     uint8* dst_argb, int width) {
+  asm volatile (
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(1)
+    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "vqadd.u8   q0, q0, q2                     \n"  // add B, G
+    "vqadd.u8   q1, q1, q3                     \n"  // add R, A
+    MEMACCESS(2)
+    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3"
+  );
+}
+
+// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
+void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  asm volatile (
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(1)
+    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "vqsub.u8   q0, q0, q2                     \n"  // subtract B, G
+    "vqsub.u8   q1, q1, q3                     \n"  // subtract R, A
+    MEMACCESS(2)
+    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3"
+  );
+}
+
+// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
+// A = 255
+// R = Sobel
+// G = Sobel
+// B = Sobel
+void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // alpha
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d0}, [%0]!                    \n"  // load 8 sobelx.
+    MEMACCESS(1)
+    "vld1.8     {d1}, [%1]!                    \n"  // load 8 sobely.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "vqadd.u8   d0, d0, d1                     \n"  // add
+    "vmov.u8    d1, d0                         \n"
+    "vmov.u8    d2, d0                         \n"
+    MEMACCESS(2)
+    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_argb),    // %2
+    "+r"(width)        // %3
+  :
+  : "cc", "memory", "q0", "q1"
+  );
+}
+
+// Adds Sobel X and Sobel Y and stores Sobel into plane.
+void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width) {
+  asm volatile (
+    // 16 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 16 sobelx.
+    MEMACCESS(1)
+    "vld1.8     {q1}, [%1]!                    \n"  // load 16 sobely.
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
+    "vqadd.u8   q0, q0, q1                     \n"  // add
+    MEMACCESS(2)
+    "vst1.8     {q0}, [%2]!                    \n"  // store 16 pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_y),       // %2
+    "+r"(width)        // %3
+  :
+  : "cc", "memory", "q0", "q1"
+  );
+}
+
+// Mixes Sobel X, Sobel Y and Sobel into ARGB.
+// A = 255
+// R = Sobel X
+// G = Sobel
+// B = Sobel Y
+void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // alpha
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d2}, [%0]!                    \n"  // load 8 sobelx.
+    MEMACCESS(1)
+    "vld1.8     {d0}, [%1]!                    \n"  // load 8 sobely.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "vqadd.u8   d1, d0, d2                     \n"  // add
+    MEMACCESS(2)
+    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_argb),    // %2
+    "+r"(width)        // %3
+  :
+  : "cc", "memory", "q0", "q1"
+  );
+}
+
+// SobelX as a matrix is
+// -1  0  1
+// -2  0  2
+// -1  0  1
+void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
+                    const uint8* src_y2, uint8* dst_sobelx, int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d0}, [%0],%5                  \n"  // top
+    MEMACCESS(0)
+    "vld1.8     {d1}, [%0],%6                  \n"
+    "vsubl.u8   q0, d0, d1                     \n"
+    MEMACCESS(1)
+    "vld1.8     {d2}, [%1],%5                  \n"  // center * 2
+    MEMACCESS(1)
+    "vld1.8     {d3}, [%1],%6                  \n"
+    "vsubl.u8   q1, d2, d3                     \n"
+    "vadd.s16   q0, q0, q1                     \n"
+    "vadd.s16   q0, q0, q1                     \n"
+    MEMACCESS(2)
+    "vld1.8     {d2}, [%2],%5                  \n"  // bottom
+    MEMACCESS(2)
+    "vld1.8     {d3}, [%2],%6                  \n"
+    "subs       %4, %4, #8                     \n"  // 8 pixels
+    "vsubl.u8   q1, d2, d3                     \n"
+    "vadd.s16   q0, q0, q1                     \n"
+    "vabs.s16   q0, q0                         \n"
+    "vqmovn.u16 d0, q0                         \n"
+    MEMACCESS(3)
+    "vst1.8     {d0}, [%3]!                    \n"  // store 8 sobelx
+    "bgt        1b                             \n"
+  : "+r"(src_y0),      // %0
+    "+r"(src_y1),      // %1
+    "+r"(src_y2),      // %2
+    "+r"(dst_sobelx),  // %3
+    "+r"(width)        // %4
+  : "r"(2),            // %5
+    "r"(6)             // %6
+  : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
+// SobelY as a matrix is
+// -1 -2 -1
+//  0  0  0
+//  1  2  1
+void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
+                    uint8* dst_sobely, int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d0}, [%0],%4                  \n"  // left
+    MEMACCESS(1)
+    "vld1.8     {d1}, [%1],%4                  \n"
+    "vsubl.u8   q0, d0, d1                     \n"
+    MEMACCESS(0)
+    "vld1.8     {d2}, [%0],%4                  \n"  // center * 2
+    MEMACCESS(1)
+    "vld1.8     {d3}, [%1],%4                  \n"
+    "vsubl.u8   q1, d2, d3                     \n"
+    "vadd.s16   q0, q0, q1                     \n"
+    "vadd.s16   q0, q0, q1                     \n"
+    MEMACCESS(0)
+    "vld1.8     {d2}, [%0],%5                  \n"  // right
+    MEMACCESS(1)
+    "vld1.8     {d3}, [%1],%5                  \n"
+    "subs       %3, %3, #8                     \n"  // 8 pixels
+    "vsubl.u8   q1, d2, d3                     \n"
+    "vadd.s16   q0, q0, q1                     \n"
+    "vabs.s16   q0, q0                         \n"
+    "vqmovn.u16 d0, q0                         \n"
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 sobely
+    "bgt        1b                             \n"
+  : "+r"(src_y0),      // %0
+    "+r"(src_y1),      // %1
+    "+r"(dst_sobely),  // %2
+    "+r"(width)        // %3
+  : "r"(1),            // %4
+    "r"(6)             // %5
+  : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+#endif  // __ARM_NEON__
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/row_neon64.cc b/src/main/jni/libyuv/source/row_neon64.cc
new file mode 100644
index 000000000..952e10d73
--- /dev/null
+++ b/src/main/jni/libyuv/source/row_neon64.cc
@@ -0,0 +1,3327 @@
+/*
+ *  Copyright 2014 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+// Read 8 Y, 4 U and 4 V from 422
+#define READYUV422                                                             \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.32    {d2[0]}, [%1]!                 \n"                             \
+    MEMACCESS(2)                                                               \
+    "vld1.32    {d2[1]}, [%2]!                 \n"
+
+// Read 8 Y, 2 U and 2 V from 422
+#define READYUV411                                                             \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.16    {d2[0]}, [%1]!                 \n"                             \
+    MEMACCESS(2)                                                               \
+    "vld1.16    {d2[1]}, [%2]!                 \n"                             \
+    "vmov.u8    d3, d2                         \n"                             \
+    "vzip.u8    d2, d3                         \n"
+
+// Read 8 Y, 8 U and 8 V from 444
+#define READYUV444                                                             \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.8     {d2}, [%1]!                    \n"                             \
+    MEMACCESS(2)                                                               \
+    "vld1.8     {d3}, [%2]!                    \n"                             \
+    "vpaddl.u8  q1, q1                         \n"                             \
+    "vrshrn.u16 d2, q1, #1                     \n"
+
+// Read 8 Y, and set 4 U and 4 V to 128
+#define READYUV400                                                             \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    "vmov.u8    d2, #128                       \n"
+
+// Read 8 Y and 4 UV from NV12
+#define READNV12                                                               \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.8     {d2}, [%1]!                    \n"                             \
+    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
+    "vuzp.u8    d2, d3                         \n"                             \
+    "vtrn.u32   d2, d3                         \n"
+
+// Read 8 Y and 4 VU from NV21
+#define READNV21                                                               \
+    MEMACCESS(0)                                                               \
+    "vld1.8     {d0}, [%0]!                    \n"                             \
+    MEMACCESS(1)                                                               \
+    "vld1.8     {d2}, [%1]!                    \n"                             \
+    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
+    "vuzp.u8    d3, d2                         \n"                             \
+    "vtrn.u32   d2, d3                         \n"
+
+// Read 8 YUY2
+#define READYUY2                                                               \
+    MEMACCESS(0)                                                               \
+    "vld2.8     {d0, d2}, [%0]!                \n"                             \
+    "vmov.u8    d3, d2                         \n"                             \
+    "vuzp.u8    d2, d3                         \n"                             \
+    "vtrn.u32   d2, d3                         \n"
+
+// Read 8 UYVY
+#define READUYVY                                                               \
+    MEMACCESS(0)                                                               \
+    "vld2.8     {d2, d3}, [%0]!                \n"                             \
+    "vmov.u8    d0, d3                         \n"                             \
+    "vmov.u8    d3, d2                         \n"                             \
+    "vuzp.u8    d2, d3                         \n"                             \
+    "vtrn.u32   d2, d3                         \n"
+
+#define YUV422TORGB                                                            \
+    "veor.u8    d2, d26                        \n"/*subtract 128 from u and v*/\
+    "vmull.s8   q8, d2, d24                    \n"/*  u/v B/R component      */\
+    "vmull.s8   q9, d2, d25                    \n"/*  u/v G component        */\
+    "vmov.u8    d1, #0                         \n"/*  split odd/even y apart */\
+    "vtrn.u8    d0, d1                         \n"                             \
+    "vsub.s16   q0, q0, q15                    \n"/*  offset y               */\
+    "vmul.s16   q0, q0, q14                    \n"                             \
+    "vadd.s16   d18, d19                       \n"                             \
+    "vqadd.s16  d20, d0, d16                   \n" /* B */                     \
+    "vqadd.s16  d21, d1, d16                   \n"                             \
+    "vqadd.s16  d22, d0, d17                   \n" /* R */                     \
+    "vqadd.s16  d23, d1, d17                   \n"                             \
+    "vqadd.s16  d16, d0, d18                   \n" /* G */                     \
+    "vqadd.s16  d17, d1, d18                   \n"                             \
+    "vqshrun.s16 d0, q10, #6                   \n" /* B */                     \
+    "vqshrun.s16 d1, q11, #6                   \n" /* G */                     \
+    "vqshrun.s16 d2, q8, #6                    \n" /* R */                     \
+    "vmovl.u8   q10, d0                        \n"/*  set up for reinterleave*/\
+    "vmovl.u8   q11, d1                        \n"                             \
+    "vmovl.u8   q8, d2                         \n"                             \
+    "vtrn.u8    d20, d21                       \n"                             \
+    "vtrn.u8    d22, d23                       \n"                             \
+    "vtrn.u8    d16, d17                       \n"                             \
+    "vmov.u8    d21, d16                       \n"
+
+static vec8 kUVToRB  = { 127, 127, 127, 127, 102, 102, 102, 102,
+                         0, 0, 0, 0, 0, 0, 0, 0 };
+static vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
+                       0, 0, 0, 0, 0, 0, 0, 0 };
+
+#ifdef HAS_I444TOARGBROW_NEON
+void I444ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV444
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_argb),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I444TOARGBROW_NEON
+
+#ifdef HAS_I422TOARGBROW_NEON
+void I422ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_argb),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TOARGBROW_NEON
+
+#ifdef HAS_I411TOARGBROW_NEON
+void I411ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV411
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_argb),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I411TOARGBROW_NEON
+
+#ifdef HAS_I422TOBGRAROW_NEON
+void I422ToBGRARow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_bgra,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vswp.u8    d20, d22                       \n"
+    "vmov.u8    d19, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_bgra),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TOBGRAROW_NEON
+
+#ifdef HAS_I422TOABGRROW_NEON
+void I422ToABGRRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_abgr,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vswp.u8    d20, d22                       \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_abgr),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TOABGRROW_NEON
+
+#ifdef HAS_I422TORGBAROW_NEON
+void I422ToRGBARow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_rgba,
+                        int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d19, #255                      \n"
+    MEMACCESS(3)
+    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_u),     // %1
+      "+r"(src_v),     // %2
+      "+r"(dst_rgba),  // %3
+      "+r"(width)      // %4
+    : "r"(&kUVToRB),   // %5
+      "r"(&kUVToG)     // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TORGBAROW_NEON
+
+#ifdef HAS_I422TORGB24ROW_NEON
+void I422ToRGB24Row_NEON(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_rgb24,
+                         int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    MEMACCESS(3)
+    "vst3.8     {d20, d21, d22}, [%3]!         \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),      // %0
+      "+r"(src_u),      // %1
+      "+r"(src_v),      // %2
+      "+r"(dst_rgb24),  // %3
+      "+r"(width)       // %4
+    : "r"(&kUVToRB),    // %5
+      "r"(&kUVToG)      // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TORGB24ROW_NEON
+
+#ifdef HAS_I422TORAWROW_NEON
+void I422ToRAWRow_NEON(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
+                       uint8* dst_raw,
+                       int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vswp.u8    d20, d22                       \n"
+    MEMACCESS(3)
+    "vst3.8     {d20, d21, d22}, [%3]!         \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),    // %0
+      "+r"(src_u),    // %1
+      "+r"(src_v),    // %2
+      "+r"(dst_raw),  // %3
+      "+r"(width)     // %4
+    : "r"(&kUVToRB),  // %5
+      "r"(&kUVToG)    // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TORAWROW_NEON
+
+#define ARGBTORGB565                                                           \
+    "vshr.u8    d20, d20, #3                   \n"  /* B                    */ \
+    "vshr.u8    d21, d21, #2                   \n"  /* G                    */ \
+    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
+    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
+    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
+    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
+    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
+    "vshl.u16   q10, q10, #11                  \n"  /* R                    */ \
+    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
+    "vorr       q0, q0, q10                    \n"  /* BGR                  */
+
+#ifdef HAS_I422TORGB565ROW_NEON
+void I422ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_u,
+                          const uint8* src_v,
+                          uint8* dst_rgb565,
+                          int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    ARGBTORGB565
+    MEMACCESS(3)
+    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels RGB565.
+    "bgt        1b                             \n"
+    : "+r"(src_y),    // %0
+      "+r"(src_u),    // %1
+      "+r"(src_v),    // %2
+      "+r"(dst_rgb565),  // %3
+      "+r"(width)     // %4
+    : "r"(&kUVToRB),  // %5
+      "r"(&kUVToG)    // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TORGB565ROW_NEON
+
+#define ARGBTOARGB1555                                                         \
+    "vshr.u8    q10, q10, #3                   \n"  /* B                    */ \
+    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
+    "vshr.u8    d23, d23, #7                   \n"  /* A                    */ \
+    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
+    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
+    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
+    "vmovl.u8   q11, d23                       \n"  /* A                    */ \
+    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
+    "vshl.u16   q10, q10, #10                  \n"  /* R                    */ \
+    "vshl.u16   q11, q11, #15                  \n"  /* A                    */ \
+    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
+    "vorr       q1, q10, q11                   \n"  /* RA                   */ \
+    "vorr       q0, q0, q1                     \n"  /* BGRA                 */
+
+#ifdef HAS_I422TOARGB1555ROW_NEON
+void I422ToARGB1555Row_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb1555,
+                            int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    ARGBTOARGB1555
+    MEMACCESS(3)
+    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB1555.
+    "bgt        1b                             \n"
+    : "+r"(src_y),    // %0
+      "+r"(src_u),    // %1
+      "+r"(src_v),    // %2
+      "+r"(dst_argb1555),  // %3
+      "+r"(width)     // %4
+    : "r"(&kUVToRB),  // %5
+      "r"(&kUVToG)    // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TOARGB1555ROW_NEON
+
+#define ARGBTOARGB4444                                                         \
+    "vshr.u8    d20, d20, #4                   \n"  /* B                    */ \
+    "vbic.32    d21, d21, d4                   \n"  /* G                    */ \
+    "vshr.u8    d22, d22, #4                   \n"  /* R                    */ \
+    "vbic.32    d23, d23, d4                   \n"  /* A                    */ \
+    "vorr       d0, d20, d21                   \n"  /* BG                   */ \
+    "vorr       d1, d22, d23                   \n"  /* RA                   */ \
+    "vzip.u8    d0, d1                         \n"  /* BGRA                 */
+
+#ifdef HAS_I422TOARGB4444ROW_NEON
+void I422ToARGB4444Row_NEON(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_argb4444,
+                            int width) {
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.8     {d24}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {d25}, [%6]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV422
+    YUV422TORGB
+    "subs       %4, %4, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    ARGBTOARGB4444
+    MEMACCESS(3)
+    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB4444.
+    "bgt        1b                             \n"
+    : "+r"(src_y),    // %0
+      "+r"(src_u),    // %1
+      "+r"(src_v),    // %2
+      "+r"(dst_argb4444),  // %3
+      "+r"(width)     // %4
+    : "r"(&kUVToRB),  // %5
+      "r"(&kUVToG)    // %6
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_I422TOARGB4444ROW_NEON
+
+#ifdef HAS_YTOARGBROW_NEON
+void YToARGBRow_NEON(const uint8* src_y,
+                     uint8* dst_argb,
+                     int width) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {d24}, [%3]                    \n"
+    MEMACCESS(4)
+    "vld1.8     {d25}, [%4]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUV400
+    YUV422TORGB
+    "subs       %2, %2, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(dst_argb),  // %1
+      "+r"(width)      // %2
+    : "r"(&kUVToRB),   // %3
+      "r"(&kUVToG)     // %4
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_YTOARGBROW_NEON
+
+#ifdef HAS_I400TOARGBROW_NEON
+void I400ToARGBRow_NEON(const uint8* src_y,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+    "vmov.u8    d23, #255                      \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d20}, [%0]!                   \n"
+    "vmov       d21, d20                       \n"
+    "vmov       d22, d20                       \n"
+    "subs       %2, %2, #8                     \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(dst_argb),  // %1
+      "+r"(width)      // %2
+    :
+    : "cc", "memory", "d20", "d21", "d22", "d23"
+  );
+}
+#endif  // HAS_I400TOARGBROW_NEON
+
+#ifdef HAS_NV12TOARGBROW_NEON
+void NV12ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_uv,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.8     {d24}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {d25}, [%5]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READNV12
+    YUV422TORGB
+    "subs       %3, %3, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(2)
+    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_uv),    // %1
+      "+r"(dst_argb),  // %2
+      "+r"(width)      // %3
+    : "r"(&kUVToRB),   // %4
+      "r"(&kUVToG)     // %5
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_NV12TOARGBROW_NEON
+
+#ifdef HAS_NV21TOARGBROW_NEON
+void NV21ToARGBRow_NEON(const uint8* src_y,
+                        const uint8* src_uv,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.8     {d24}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {d25}, [%5]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READNV21
+    YUV422TORGB
+    "subs       %3, %3, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(2)
+    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_uv),    // %1
+      "+r"(dst_argb),  // %2
+      "+r"(width)      // %3
+    : "r"(&kUVToRB),   // %4
+      "r"(&kUVToG)     // %5
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_NV21TOARGBROW_NEON
+
+#ifdef HAS_NV12TORGB565ROW_NEON
+void NV12ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
+                          int width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.8     {d24}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {d25}, [%5]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READNV12
+    YUV422TORGB
+    "subs       %3, %3, #8                     \n"
+    ARGBTORGB565
+    MEMACCESS(2)
+    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_uv),    // %1
+      "+r"(dst_rgb565),  // %2
+      "+r"(width)      // %3
+    : "r"(&kUVToRB),   // %4
+      "r"(&kUVToG)     // %5
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_NV12TORGB565ROW_NEON
+
+#ifdef HAS_NV21TORGB565ROW_NEON
+void NV21ToRGB565Row_NEON(const uint8* src_y,
+                          const uint8* src_uv,
+                          uint8* dst_rgb565,
+                          int width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.8     {d24}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {d25}, [%5]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READNV21
+    YUV422TORGB
+    "subs       %3, %3, #8                     \n"
+    ARGBTORGB565
+    MEMACCESS(2)
+    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
+    "bgt        1b                             \n"
+    : "+r"(src_y),     // %0
+      "+r"(src_uv),    // %1
+      "+r"(dst_rgb565),  // %2
+      "+r"(width)      // %3
+    : "r"(&kUVToRB),   // %4
+      "r"(&kUVToG)     // %5
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_NV21TORGB565ROW_NEON
+
+#ifdef HAS_YUY2TOARGBROW_NEON
+void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {d24}, [%3]                    \n"
+    MEMACCESS(4)
+    "vld1.8     {d25}, [%4]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READYUY2
+    YUV422TORGB
+    "subs       %2, %2, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_yuy2),  // %0
+      "+r"(dst_argb),  // %1
+      "+r"(width)      // %2
+    : "r"(&kUVToRB),   // %3
+      "r"(&kUVToG)     // %4
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_YUY2TOARGBROW_NEON
+
+#ifdef HAS_UYVYTOARGBROW_NEON
+void UYVYToARGBRow_NEON(const uint8* src_uyvy,
+                        uint8* dst_argb,
+                        int width) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {d24}, [%3]                    \n"
+    MEMACCESS(4)
+    "vld1.8     {d25}, [%4]                    \n"
+    "vmov.u8    d26, #128                      \n"
+    "vmov.u16   q14, #74                       \n"
+    "vmov.u16   q15, #16                       \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    READUYVY
+    YUV422TORGB
+    "subs       %2, %2, #8                     \n"
+    "vmov.u8    d23, #255                      \n"
+    MEMACCESS(1)
+    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
+    "bgt        1b                             \n"
+    : "+r"(src_uyvy),  // %0
+      "+r"(dst_argb),  // %1
+      "+r"(width)      // %2
+    : "r"(&kUVToRB),   // %3
+      "r"(&kUVToG)     // %4
+    : "cc", "memory", "q0", "q1", "q2", "q3",
+      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_UYVYTOARGBROW_NEON
+
+// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
+#ifdef HAS_SPLITUVROW_NEON
+void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                     int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld2        {v0.16b, v1.16b}, [%0], #32    \n"  // load 16 pairs of UV
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop
+    MEMACCESS(1)
+    "st1        {v0.16b}, [%1], #16            \n"  // store U
+    MEMACCESS(2)
+    "st1        {v1.16b}, [%2], #16            \n"  // store V
+    "bgt        1b                             \n"
+    : "+r"(src_uv),  // %0
+      "+r"(dst_u),   // %1
+      "+r"(dst_v),   // %2
+      "+r"(width)    // %3  // Output registers
+    :                       // Input registers
+    : "cc", "memory", "v0", "v1"  // Clobber List
+  );
+}
+#endif  // HAS_SPLITUVROW_NEON
+
+// Reads 16 U's and V's and writes out 16 pairs of UV.
+#ifdef HAS_MERGEUVROW_NEON
+void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                     int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b}, [%0], #16            \n"  // load U
+    MEMACCESS(1)
+    "ld1        {v1.16b}, [%1], #16            \n"  // load V
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop
+    MEMACCESS(2)
+    "st2        {v0.16b, v1.16b}, [%2], #32    \n"  // store 16 pairs of UV
+    "bgt        1b                             \n"
+    :
+      "+r"(src_u),   // %0
+      "+r"(src_v),   // %1
+      "+r"(dst_uv),  // %2
+      "+r"(width)    // %3  // Output registers
+    :                       // Input registers
+    : "cc", "memory", "v0", "v1"  // Clobber List
+  );
+}
+#endif  // HAS_MERGEUVROW_NEON
+
+// Copy multiple of 32.  vld4.8  allow unaligned and is fastest on a15.
+#ifdef HAS_COPYROW_NEON
+void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.8b-v3.8b}, [%0], #32       \n"  // load 32
+    "subs       %2, %2, #32                    \n"  // 32 processed per loop
+    MEMACCESS(1)
+    "st1        {v0.8b-v3.8b}, [%1], #32       \n"  // store 32
+    "bgt        1b                             \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(count)  // %2  // Output registers
+  :                     // Input registers
+  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
+  );
+}
+#endif  // HAS_COPYROW_NEON
+
+// SetRow8 writes 'count' bytes using a 32 bit value repeated.
+#ifdef HAS_SETROW_NEON
+void SetRow_NEON(uint8* dst, uint32 v32, int count) {
+  asm volatile (
+    "dup        v0.4s, %w2                     \n"  // duplicate 4 ints
+    "1:                                        \n"
+    "subs      %1, %1, #16                     \n"  // 16 bytes per loop
+    MEMACCESS(0)
+    "st1        {v0.16b}, [%0], #16            \n"  // store
+    "bgt       1b                              \n"
+  : "+r"(dst),   // %0
+    "+r"(count)  // %1
+  : "r"(v32)     // %2
+  : "cc", "memory", "v0"
+  );
+}
+#endif  // HAS_SETROW_NEON
+
+// TODO(fbarchard): Make fully assembler
+// SetRow32 writes 'count' words using a 32 bit value repeated.
+#ifdef HAS_ARGBSETROWS_NEON
+void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
+                      int dst_stride, int height) {
+  for (int y = 0; y < height; ++y) {
+    SetRow_NEON(dst, v32, width << 2);
+    dst += dst_stride;
+  }
+}
+#endif  // HAS_ARGBSETROWS_NEON
+
+#ifdef HAS_MIRRORROW_NEON
+void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+  asm volatile (
+    // Start at end of source row.
+    "add        %0, %0, %2                     \n"
+    "sub        %0, %0, #16                    \n"
+
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b}, [%0], %3             \n"  // src -= 16
+    "subs       %2, %2, #16                    \n"  // 16 pixels per loop.
+    "rev64      v0.16b, v0.16b                 \n"
+    MEMACCESS(1)
+    "st1        {v0.D}[1], [%1], #8            \n"  // dst += 16
+    MEMACCESS(1)
+    "st1        {v0.D}[0], [%1], #8            \n"
+    "bgt        1b                             \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(width)  // %2
+  : "r"((ptrdiff_t)-16)    // %3
+  : "cc", "memory", "v0"
+  );
+}
+#endif  // HAS_MIRRORROW_NEON
+
+#ifdef HAS_MIRRORUVROW_NEON
+void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                      int width) {
+  asm volatile (
+    // Start at end of source row.
+    "add        %0, %0, %3, lsl #1             \n"
+    "sub        %0, %0, #16                    \n"
+
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld2        {v0.8b, v1.8b}, [%0], %4       \n"  // src -= 16
+    "subs       %3, %3, #8                     \n"  // 8 pixels per loop.
+    "rev64      v0.8b, v0.8b                   \n"
+    "rev64      v1.8b, v1.8b                   \n"
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8               \n"  // dst += 8
+    MEMACCESS(2)
+    "st1        {v1.8b}, [%2], #8               \n"
+    "bgt        1b                             \n"
+  : "+r"(src_uv),  // %0
+    "+r"(dst_u),   // %1
+    "+r"(dst_v),   // %2
+    "+r"(width)    // %3
+  : "r"((ptrdiff_t)-16)      // %4
+  : "cc", "memory", "v0", "v1"
+  );
+}
+#endif  // HAS_MIRRORUVROW_NEON
+
+#ifdef HAS_ARGBMIRRORROW_NEON
+void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
+  asm volatile (
+    // Start at end of source row.
+    "add        %0, %0, %2, lsl #2             \n"
+    "sub        %0, %0, #16                    \n"
+
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b}, [%0], %3             \n"  // src -= 16
+    "subs       %2, %2, #4                     \n"  // 4 pixels per loop.
+    "rev64      v0.4s, v0.4s                   \n"
+    MEMACCESS(1)
+    "st1        {v0.D}[1], [%1], #8            \n"  // dst += 16
+    MEMACCESS(1)
+    "st1        {v0.D}[0], [%1], #8            \n"
+    "bgt        1b                             \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(width)  // %2
+  : "r"((ptrdiff_t)-16)    // %3
+  : "cc", "memory", "v0"
+  );
+}
+#endif  // HAS_ARGBMIRRORROW_NEON
+
+#ifdef HAS_RGB24TOARGBROW_NEON
+void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
+  asm volatile (
+    "movi       v4.8b, #255                    \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld3        {v1.8b-v3.8b}, [%0], #24       \n"  // load 8 pixels of RGB24.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    MEMACCESS(1)
+    "st4        {v1.8b-v4.8b}, [%1], #32       \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb24),  // %0
+    "+r"(dst_argb),   // %1
+    "+r"(pix)         // %2
+  :
+  : "cc", "memory", "v1", "v2", "v3", "v4"  // Clobber List
+  );
+}
+#endif  // HAS_RGB24TOARGBROW_NEON
+
+#ifdef HAS_RAWTOARGBROW_NEON
+void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
+  asm volatile (
+    "movi       v5.8b, #255                    \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld3        {v0.8b-v2.8b}, [%0], #24       \n"  // read r g b
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "mov        v3.8b, v1.8b                   \n"  // move g
+    "mov        v4.8b, v0.8b                   \n"  // move r
+    MEMACCESS(1)
+    "st4        {v2.8b-v5.8b}, [%1], #32       \n"  // store b g r a
+    "bgt        1b                             \n"
+  : "+r"(src_raw),   // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5"  // Clobber List
+  );
+}
+#endif  // HAS_RAWTOARGBROW_NEON
+
+#define RGB565TOARGB                                                           \
+    "vshrn.u16  d6, q0, #5                     \n"  /* G xxGGGGGG           */ \
+    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB RRRRRxxx */ \
+    "vshl.u8    d6, d6, #2                     \n"  /* G GGGGGG00 upper 6   */ \
+    "vshr.u8    d1, d1, #3                     \n"  /* R 000RRRRR lower 5   */ \
+    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
+    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
+    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
+    "vshr.u8    d4, d6, #6                     \n"  /* G 000000GG lower 2   */ \
+    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
+    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
+
+#ifdef HAS_RGB565TOARGBROW_NEON
+void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    RGB565TOARGB
+    MEMACCESS(1)
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb565),  // %0
+    "+r"(dst_argb),    // %1
+    "+r"(pix)          // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
+  );
+}
+#endif  // HAS_RGB565TOARGBROW_NEON
+
+#define ARGB1555TOARGB                                                         \
+    "vshrn.u16  d7, q0, #8                     \n"  /* A Arrrrrxx           */ \
+    "vshr.u8    d6, d7, #2                     \n"  /* R xxxRRRRR           */ \
+    "vshrn.u16  d5, q0, #5                     \n"  /* G xxxGGGGG           */ \
+    "vmovn.u16  d4, q0                         \n"  /* B xxxBBBBB           */ \
+    "vshr.u8    d7, d7, #7                     \n"  /* A 0000000A           */ \
+    "vneg.s8    d7, d7                         \n"  /* A AAAAAAAA upper 8   */ \
+    "vshl.u8    d6, d6, #3                     \n"  /* R RRRRR000 upper 5   */ \
+    "vshr.u8    q1, q3, #5                     \n"  /* R,A 00000RRR lower 3 */ \
+    "vshl.u8    q0, q2, #3                     \n"  /* B,G BBBBB000 upper 5 */ \
+    "vshr.u8    q2, q0, #5                     \n"  /* B,G 00000BBB lower 3 */ \
+    "vorr.u8    q1, q1, q3                     \n"  /* R,A                  */ \
+    "vorr.u8    q0, q0, q2                     \n"  /* B,G                  */ \
+
+// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
+#define RGB555TOARGB                                                           \
+    "vshrn.u16  d6, q0, #5                     \n"  /* G xxxGGGGG           */ \
+    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB xRRRRRxx */ \
+    "vshl.u8    d6, d6, #3                     \n"  /* G GGGGG000 upper 5   */ \
+    "vshr.u8    d1, d1, #2                     \n"  /* R 00xRRRRR lower 5   */ \
+    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
+    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
+    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
+    "vshr.u8    d4, d6, #5                     \n"  /* G 00000GGG lower 3   */ \
+    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
+    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
+
+#ifdef HAS_ARGB1555TOARGBROW_NEON
+void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
+                            int pix) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGB1555TOARGB
+    MEMACCESS(1)
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_argb1555),  // %0
+    "+r"(dst_argb),    // %1
+    "+r"(pix)          // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
+  );
+}
+#endif  // HAS_ARGB1555TOARGBROW_NEON
+
+#define ARGB4444TOARGB                                                         \
+    "vuzp.u8    d0, d1                         \n"  /* d0 BG, d1 RA         */ \
+    "vshl.u8    q2, q0, #4                     \n"  /* B,R BBBB0000         */ \
+    "vshr.u8    q1, q0, #4                     \n"  /* G,A 0000GGGG         */ \
+    "vshr.u8    q0, q2, #4                     \n"  /* B,R 0000BBBB         */ \
+    "vorr.u8    q0, q0, q2                     \n"  /* B,R BBBBBBBB         */ \
+    "vshl.u8    q2, q1, #4                     \n"  /* G,A GGGG0000         */ \
+    "vorr.u8    q1, q1, q2                     \n"  /* G,A GGGGGGGG         */ \
+    "vswp.u8    d1, d2                         \n"  /* B,R,G,A -> B,G,R,A   */
+
+#ifdef HAS_ARGB4444TOARGBROW_NEON
+void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
+                            int pix) {
+  asm volatile (
+    "vmov.u8    d3, #255                       \n"  // Alpha
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGB4444TOARGB
+    MEMACCESS(1)
+    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_argb4444),  // %0
+    "+r"(dst_argb),    // %1
+    "+r"(pix)          // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
+  );
+}
+#endif  // HAS_ARGB4444TOARGBROW_NEON
+
+#ifdef HAS_ARGBTORGB24ROW_NEON
+void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v1.8b-v4.8b}, [%0], #32       \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    MEMACCESS(1)
+    "st3        {v1.8b-v3.8b}, [%1], #24       \n"  // store 8 pixels of RGB24.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_rgb24),  // %1
+    "+r"(pix)         // %2
+  :
+  : "cc", "memory", "v1", "v2", "v3", "v4"  // Clobber List
+  );
+}
+#endif  // HAS_ARGBTORGB24ROW_NEON
+
+#ifdef HAS_ARGBTORAWROW_NEON
+void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v1.8b-v4.8b}, [%0], #32       \n"  // load b g r a
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "mov        v4.8b, v2.8b                   \n"  // mov g
+    "mov        v5.8b, v1.8b                   \n"  // mov b
+    MEMACCESS(1)
+    "st3        {v3.8b-v5.8b}, [%1], #24       \n"  // store r g b
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_raw),   // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v1", "v2", "v3", "v4", "v5"  // Clobber List
+  );
+}
+#endif  // HAS_ARGBTORAWROW_NEON
+
+#ifdef HAS_YUY2TOYROW_NEON
+void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld2        {v0.16b, v1.16b}, [%0], #32    \n"  // load 16 pixels of YUY2.
+    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
+    MEMACCESS(1)
+    "st1        {v0.16b}, [%1], #16            \n"  // store 16 pixels of Y.
+    "bgt        1b                             \n"
+  : "+r"(src_yuy2),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v0", "v1"  // Clobber List
+  );
+}
+#endif  // HAS_YUY2TOYROW_NEON
+
+#ifdef HAS_UYVYTOYROW_NEON
+void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld2        {v0.16b, v1.16b}, [%0], #32    \n"  // load 16 pixels of UYVY.
+    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
+    MEMACCESS(1)
+    "st1        {v1.16b}, [%1], #16            \n"  // store 16 pixels of Y.
+    "bgt        1b                             \n"
+  : "+r"(src_uyvy),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v0", "v1"  // Clobber List
+  );
+}
+#endif  // HAS_UYVYTOYROW_NEON
+
+#ifdef HAS_YUY2TOUV422ROW_NEON
+void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 16 pixels of YUY2.
+    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
+    MEMACCESS(1)
+    "st1        {v1.8b}, [%1], #8              \n"  // store 8 U.
+    MEMACCESS(2)
+    "st1        {v3.8b}, [%2], #8              \n"  // store 8 V.
+    "bgt        1b                             \n"
+  : "+r"(src_yuy2),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
+  );
+}
+#endif  // HAS_YUY2TOUV422ROW_NEON
+
+#ifdef HAS_UYVYTOUV422ROW_NEON
+void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 16 pixels of UYVY.
+    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 U.
+    MEMACCESS(2)
+    "st1        {v2.8b}, [%2], #8              \n"  // store 8 V.
+    "bgt        1b                             \n"
+  : "+r"(src_uyvy),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
+  );
+}
+#endif  // HAS_UYVYTOUV422ROW_NEON
+
+#ifdef HAS_YUY2TOUVROW_NEON
+void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %x1, %x0, %w1, sxtw            \n"  // stride + src_yuy2
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 16 pixels of YUY2.
+    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
+    MEMACCESS(1)
+    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load next row YUY2.
+    "urhadd     v1.8b, v1.8b, v5.8b            \n"  // average rows of U
+    "urhadd     v3.8b, v3.8b, v7.8b            \n"  // average rows of V
+    MEMACCESS(2)
+    "st1        {v1.8b}, [%2], #8              \n"  // store 8 U.
+    MEMACCESS(3)
+    "st1        {v3.8b}, [%3], #8              \n"  // store 8 V.
+    "bgt        1b                             \n"
+  : "+r"(src_yuy2),     // %0
+    "+r"(stride_yuy2),  // %1
+    "+r"(dst_u),        // %2
+    "+r"(dst_v),        // %3
+    "+r"(pix)           // %4
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"  // Clobber List
+  );
+}
+#endif  // HAS_YUY2TOUVROW_NEON
+
+#ifdef HAS_UYVYTOUVROW_NEON
+void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %x1, %x0, %w1, sxtw            \n"  // stride + src_uyvy
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 16 pixels of UYVY.
+    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
+    MEMACCESS(1)
+    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load next row UYVY.
+    "urhadd     v0.8b, v0.8b, v4.8b            \n"  // average rows of U
+    "urhadd     v2.8b, v2.8b, v6.8b            \n"  // average rows of V
+    MEMACCESS(2)
+    "st1        {v0.8b}, [%2], #8              \n"  // store 8 U.
+    MEMACCESS(3)
+    "st1        {v2.8b}, [%3], #8              \n"  // store 8 V.
+    "bgt        1b                             \n"
+  : "+r"(src_uyvy),     // %0
+    "+r"(stride_uyvy),  // %1
+    "+r"(dst_u),        // %2
+    "+r"(dst_v),        // %3
+    "+r"(pix)           // %4
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"  // Clobber List
+  );
+}
+#endif  // HAS_UYVYTOUVROW_NEON
+
+#ifdef HAS_HALFROW_NEON
+void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix) {
+  asm volatile (
+    // change the stride to row 2 pointer
+    "add        %x1, %x0, %w1, sxtw            \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b}, [%0], #16            \n"  // load row 1 16 pixels.
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop
+    MEMACCESS(1)
+    "ld1        {v1.16b}, [%1], #16            \n"  // load row 2 16 pixels.
+    "urhadd     v0.16b, v0.16b, v1.16b         \n"  // average row 1 and 2
+    MEMACCESS(2)
+    "st1        {v0.16b}, [%2], #16            \n"
+    "bgt        1b                             \n"
+  : "+r"(src_uv),         // %0
+    "+r"(src_uv_stride),  // %1
+    "+r"(dst_uv),         // %2
+    "+r"(pix)             // %3
+  :
+  : "cc", "memory", "v0", "v1"  // Clobber List
+  );
+}
+#endif  // HAS_HALFROW_NEON
+
+// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
+#ifdef HAS_ARGBTOBAYERROW_NEON
+void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
+                         uint32 selector, int pix) {
+  asm volatile (
+    "mov        v2.s[0], %w3                   \n"  // selector
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b, v1.16b}, [%0], 32     \n"  // load row 8 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop
+    "tbl        v4.8b, {v0.16b}, v2.8b         \n"  // look up 4 pixels
+    "tbl        v5.8b, {v1.16b}, v2.8b         \n"  // look up 4 pixels
+    "trn1       v4.4s, v4.4s, v5.4s            \n"  // combine 8 pixels
+    MEMACCESS(1)
+    "st1        {v4.8b}, [%1], #8              \n"  // store 8.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_bayer),  // %1
+    "+r"(pix)         // %2
+  : "r"(selector)     // %3
+  : "cc", "memory", "v0", "v1", "v2", "v4", "v5"   // Clobber List
+  );
+}
+#endif  // HAS_ARGBTOBAYERROW_NEON
+
+// Select G channels from ARGB.  e.g.  GGGGGGGG
+#ifdef HAS_ARGBTOBAYERGGROW_NEON
+void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
+                           uint32 /*selector*/, int pix) {
+  asm volatile (
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load row 8 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop
+    MEMACCESS(1)
+    "st1        {v1.8b}, [%1], #8              \n"  // store 8 G's.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_bayer),  // %1
+    "+r"(pix)         // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
+  );
+}
+#endif  // HAS_ARGBTOBAYERGGROW_NEON
+
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+#ifdef HAS_ARGBSHUFFLEROW_NEON
+void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix) {
+  asm volatile (
+    MEMACCESS(3)
+    "ld1        {v2.16b}, [%3]                 \n"  // shuffler
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b}, [%0], #16            \n"  // load 4 pixels.
+    "subs       %2, %2, #4                     \n"  // 4 processed per loop
+    "tbl        v1.16b, {v0.16b}, v2.16b       \n"  // look up 4 pixels
+    MEMACCESS(1)
+    "st1        {v1.16b}, [%1], #16            \n"  // store 4.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  : "r"(shuffler)    // %3
+  : "cc", "memory", "v0", "v1", "v2"  // Clobber List
+  );
+}
+#endif  // HAS_ARGBSHUFFLEROW_NEON
+
+#ifdef HAS_I422TOYUY2ROW_NEON
+void I422ToYUY2Row_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_yuy2, int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld2        {v0.8b, v1.8b}, [%0], #16      \n"  // load 16 Ys
+    "mov        v2.8b, v1.8b                   \n"
+    MEMACCESS(1)
+    "ld1        {v1.8b}, [%1], #8              \n"  // load 8 Us
+    MEMACCESS(2)
+    "ld1        {v3.8b}, [%2], #8              \n"  // load 8 Vs
+    "subs       %4, %4, #16                    \n"  // 16 pixels
+    MEMACCESS(3)
+    "st4        {v0.8b-v3.8b}, [%3], #32       \n"  // Store 8 YUY2/16 pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_y),     // %0
+    "+r"(src_u),     // %1
+    "+r"(src_v),     // %2
+    "+r"(dst_yuy2),  // %3
+    "+r"(width)      // %4
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3"
+  );
+}
+#endif  // HAS_I422TOYUY2ROW_NEON
+
+#ifdef HAS_I422TOUYVYROW_NEON
+void I422ToUYVYRow_NEON(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_uyvy, int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld2        {v1.8b, v2.8b}, [%0], #16      \n"  // load 16 Ys
+    "mov        v3.8b, v2.8b                   \n"
+    MEMACCESS(1)
+    "ld1        {v0.8b}, [%1], #8              \n"  // load 8 Us
+    MEMACCESS(2)
+    "ld1        {v2.8b}, [%2], #8              \n"  // load 8 Vs
+    "subs       %4, %4, #16                    \n"  // 16 pixels
+    MEMACCESS(3)
+    "st4        {v0.8b-v3.8b}, [%3], #32       \n"  // Store 8 UYVY/16 pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_y),     // %0
+    "+r"(src_u),     // %1
+    "+r"(src_v),     // %2
+    "+r"(dst_uyvy),  // %3
+    "+r"(width)      // %4
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3"
+  );
+}
+#endif  // HAS_I422TOUYVYROW_NEON
+
+#ifdef HAS_ARGBTORGB565ROW_NEON
+void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGBTORGB565
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels RGB565.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_rgb565),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+  );
+}
+#endif  // HAS_ARGBTORGB565ROW_NEON
+
+#ifdef HAS_ARGBTOARGB1555ROW_NEON
+void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
+                            int pix) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGBTOARGB1555
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB1555.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb1555),  // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+  );
+}
+#endif  // HAS_ARGBTOARGB1555ROW_NEON
+
+#ifdef HAS_ARGBTOARGB4444ROW_NEON
+void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
+                            int pix) {
+  asm volatile (
+    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGBTOARGB4444
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB4444.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),      // %0
+    "+r"(dst_argb4444),  // %1
+    "+r"(pix)            // %2
+  :
+  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
+  );
+}
+#endif  // HAS_ARGBTOARGB4444ROW_NEON
+
+#ifdef HAS_ARGBTOYROW_NEON
+void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "movi       v4.8b, #13                     \n"  // B * 0.1016 coefficient
+    "movi       v5.8b, #65                     \n"  // G * 0.5078 coefficient
+    "movi       v6.8b, #33                     \n"  // R * 0.2578 coefficient
+    "movi       v7.8b, #16                     \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v3.8h, v0.8b, v4.8b            \n"  // B
+    "umlal      v3.8h, v1.8b, v5.8b            \n"  // G
+    "umlal      v3.8h, v2.8b, v6.8b            \n"  // R
+    "sqrshrun   v0.8b, v3.8h, #7               \n"  // 16 bit to 8 bit Y
+    "uqadd      v0.8b, v0.8b, v7.8b            \n"
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
+  );
+}
+#endif  // HAS_ARGBTOYROW_NEON
+
+#ifdef HAS_ARGBTOYJROW_NEON
+void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "movi       v4.8b, #15                     \n"  // B * 0.11400 coefficient
+    "movi       v5.8b, #75                     \n"  // G * 0.58700 coefficient
+    "movi       v6.8b, #38                     \n"  // R * 0.29900 coefficient
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v3.8h, v0.8b, v4.8b            \n"  // B
+    "umlal      v3.8h, v1.8b, v5.8b            \n"  // G
+    "umlal      v3.8h, v2.8b, v6.8b            \n"  // R
+    "sqrshrun   v0.8b, v3.8h, #7               \n"  // 15 bit to 8 bit Y
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
+  );
+}
+#endif  // HAS_ARGBTOYJROW_NEON
+
+// 8x1 pixels.
+#ifdef HAS_ARGBTOUV444ROW_NEON
+void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    "movi       v24.8b, #112                   \n"  // UB / VR 0.875 coefficient
+    "movi       v25.8b, #74                    \n"  // UG -0.5781 coefficient
+    "movi       v26.8b, #38                    \n"  // UR -0.2969 coefficient
+    "movi       v27.8b, #18                    \n"  // VB -0.1406 coefficient
+    "movi       v28.8b, #94                    \n"  // VG -0.7344 coefficient
+    "movi       v29.16b,#0x80                  \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "umull      v4.8h, v0.8b, v24.8b           \n"  // B
+    "umlsl      v4.8h, v1.8b, v25.8b           \n"  // G
+    "umlsl      v4.8h, v2.8b, v26.8b           \n"  // R
+    "add        v4.8h, v4.8h, v29.8h           \n"  // +128 -> unsigned
+
+    "umull      v3.8h, v2.8b, v24.8b           \n"  // R
+    "umlsl      v3.8h, v1.8b, v28.8b           \n"  // G
+    "umlsl      v3.8h, v0.8b, v27.8b           \n"  // B
+    "add        v3.8h, v3.8h, v29.8h           \n"  // +128 -> unsigned
+
+    "uqshrn     v0.8b, v4.8h, #8               \n"  // 16 bit to 8 bit U
+    "uqshrn     v1.8b, v3.8h, #8               \n"  // 16 bit to 8 bit V
+
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels U.
+    MEMACCESS(2)
+    "st1        {v1.8b}, [%2], #8              \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4",
+    "v24", "v25", "v26", "v27", "v28", "v29"
+  );
+}
+#endif  // HAS_ARGBTOUV444ROW_NEON
+
+// 16x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+#ifdef HAS_ARGBTOUV422ROW_NEON
+void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    "movi       v20.8h, #112 / 2               \n"  // UB / VR 0.875 coefficient
+    "movi       v21.8h, #74 / 2                \n"  // UG -0.5781 coefficient
+    "movi       v22.8h, #38 / 2                \n"  // UR -0.2969 coefficient
+    "movi       v23.8h, #18 / 2                \n"  // VB -0.1406 coefficient
+    "movi       v24.8h, #94 / 2                \n"  // VG -0.7344 coefficient
+    "movi       v25.16b, #0x80                 \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.16b-v3.16b}, [%0], #64     \n"  // load 16 ARGB pixels.
+
+    "uaddlp     v0.8h, v0.16b                  \n"  // B 16 bytes -> 8 shorts.
+    "uaddlp     v1.8h, v1.16b                  \n"  // G 16 bytes -> 8 shorts.
+    "uaddlp     v2.8h, v2.16b                  \n"  // R 16 bytes -> 8 shorts.
+
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
+    "mul        v3.8h, v0.8h, v20.8h           \n"  // B
+    "mls        v3.8h, v1.8h, v21.8h           \n"  // G
+    "mls        v3.8h, v2.8h, v22.8h           \n"  // R
+    "add        v3.8h, v3.8h, v25.8h           \n"  // +128 -> unsigned
+
+    "mul        v4.8h, v2.8h, v20.8h           \n"  // R
+    "mls        v4.8h, v1.8h, v24.8h           \n"  // G
+    "mls        v4.8h, v0.8h, v23.8h           \n"  // B
+    "add        v4.8h, v4.8h, v25.8h           \n"  // +128 -> unsigned
+
+    "uqshrn     v0.8b, v3.8h, #8               \n"  // 16 bit to 8 bit U
+    "uqshrn     v1.8b, v4.8h, #8               \n"  // 16 bit to 8 bit V
+
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels U.
+    MEMACCESS(2)
+    "st1        {v1.8b}, [%2], #8              \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+    "v20", "v21", "v22", "v23", "v24", "v25"
+  );
+}
+#endif  // HAS_ARGBTOUV422ROW_NEON
+
+// 32x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 32.
+#ifdef HAS_ARGBTOUV411ROW_NEON
+void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                         int pix) {
+  asm volatile (
+    "movi       v20.8h, #112 / 2               \n"  // UB / VR 0.875 coefficient
+    "movi       v21.8h, #74 / 2                \n"  // UG -0.5781 coefficient
+    "movi       v22.8h, #38 / 2                \n"  // UR -0.2969 coefficient
+    "movi       v23.8h, #18 / 2                \n"  // VB -0.1406 coefficient
+    "movi       v24.8h, #94 / 2                \n"  // VG -0.7344 coefficient
+    "movi       v25.16b, #0x80                 \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.16b-v3.16b}, [%0], #64     \n"  // load 16 ARGB pixels.
+    "uaddlp     v0.8h, v0.16b                  \n"  // B 16 bytes -> 8 shorts.
+    "uaddlp     v1.8h, v1.16b                  \n"  // G 16 bytes -> 8 shorts.
+    "uaddlp     v2.8h, v2.16b                  \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(0)
+    "ld4        {v4.16b-v7.16b}, [%0], #64     \n"  // load next 16 ARGB pixels.
+    "uaddlp     v4.8h, v4.16b                  \n"  // B 16 bytes -> 8 shorts.
+    "uaddlp     v5.8h, v5.16b                  \n"  // G 16 bytes -> 8 shorts.
+    "uaddlp     v6.8h, v6.16b                  \n"  // R 16 bytes -> 8 shorts.
+
+    "addp       v0.8h, v0.8h, v4.8h            \n"  // B 16 shorts -> 8 shorts.
+    "addp       v1.8h, v1.8h, v5.8h            \n"  // G 16 shorts -> 8 shorts.
+    "addp       v2.8h, v2.8h, v6.8h            \n"  // R 16 shorts -> 8 shorts.
+
+    "urshr      v0.8h, v0.8h, #1               \n"  // 2x average
+    "urshr      v1.8h, v1.8h, #1               \n"
+    "urshr      v2.8h, v2.8h, #1               \n"
+
+    "subs       %3, %3, #32                    \n"  // 32 processed per loop.
+    "mul        v3.8h, v0.8h, v20.8h           \n"  // B
+    "mls        v3.8h, v1.8h, v21.8h           \n"  // G
+    "mls        v3.8h, v2.8h, v22.8h           \n"  // R
+    "add        v3.8h, v3.8h, v25.8h           \n"  // +128 -> unsigned
+    "mul        v4.8h, v2.8h, v20.8h           \n"  // R
+    "mls        v4.8h, v1.8h, v24.8h           \n"  // G
+    "mls        v4.8h, v0.8h, v23.8h           \n"  // B
+    "add        v4.8h, v4.8h, v25.8h           \n"  // +128 -> unsigned
+    "uqshrn     v0.8b, v3.8h, #8               \n"  // 16 bit to 8 bit U
+    "uqshrn     v1.8b, v4.8h, #8               \n"  // 16 bit to 8 bit V
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels U.
+    MEMACCESS(2)
+    "st1        {v1.8b}, [%2], #8              \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_u),     // %1
+    "+r"(dst_v),     // %2
+    "+r"(pix)        // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+    "v20", "v21", "v22", "v23", "v24", "v25"
+  );
+}
+#endif  // HAS_ARGBTOUV411ROW_NEON
+
+// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+#define RGBTOUV(QB, QG, QR) \
+    "vmul.s16   q8, " #QB ", q10               \n"  /* B                    */ \
+    "vmls.s16   q8, " #QG ", q11               \n"  /* G                    */ \
+    "vmls.s16   q8, " #QR ", q12               \n"  /* R                    */ \
+    "vadd.u16   q8, q8, q15                    \n"  /* +128 -> unsigned     */ \
+    "vmul.s16   q9, " #QR ", q10               \n"  /* R                    */ \
+    "vmls.s16   q9, " #QG ", q14               \n"  /* G                    */ \
+    "vmls.s16   q9, " #QB ", q13               \n"  /* B                    */ \
+    "vadd.u16   q9, q9, q15                    \n"  /* +128 -> unsigned     */ \
+    "vqshrn.u16  d0, q8, #8                    \n"  /* 16 bit to 8 bit U    */ \
+    "vqshrn.u16  d1, q9, #8                    \n"  /* 16 bit to 8 bit V    */
+
+// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
+#ifdef HAS_ARGBTOUVROW_NEON
+void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
+    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q0, q1, q2)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(src_stride_argb),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_ARGBTOUVROW_NEON
+
+// TODO(fbarchard): Subsample match C code.
+#ifdef HAS_ARGBTOUVJROW_NEON
+void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #127 / 2                  \n"  // UB / VR 0.500 coefficient
+    "vmov.s16   q11, #84 / 2                   \n"  // UG -0.33126 coefficient
+    "vmov.s16   q12, #43 / 2                   \n"  // UR -0.16874 coefficient
+    "vmov.s16   q13, #20 / 2                   \n"  // VB -0.08131 coefficient
+    "vmov.s16   q14, #107 / 2                  \n"  // VG -0.41869 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
+    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q0, q1, q2)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(src_stride_argb),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_ARGBTOUVJROW_NEON
+
+#ifdef HAS_BGRATOUVROW_NEON
+void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_bgra
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 BGRA pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 BGRA pixels.
+    "vpaddl.u8  q3, q3                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more BGRA pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 BGRA pixels.
+    "vpadal.u8  q3, q7                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q6                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q1, q1, #1                     \n"  // 2x average
+    "vrshr.u16  q2, q2, #1                     \n"
+    "vrshr.u16  q3, q3, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q3, q2, q1)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_bgra),  // %0
+    "+r"(src_stride_bgra),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_BGRATOUVROW_NEON
+
+#ifdef HAS_ABGRTOUVROW_NEON
+void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_abgr
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ABGR pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ABGR pixels.
+    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ABGR pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ABGR pixels.
+    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q2, q1, q0)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_abgr),  // %0
+    "+r"(src_stride_abgr),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_ABGRTOUVROW_NEON
+
+#ifdef HAS_RGBATOUVROW_NEON
+void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_rgba
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 RGBA pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 RGBA pixels.
+    "vpaddl.u8  q0, q1                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q2                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q3                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more RGBA pixels.
+    MEMACCESS(1)
+    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 RGBA pixels.
+    "vpadal.u8  q0, q5                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q6                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q7                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q0, q1, q2)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_rgba),  // %0
+    "+r"(src_stride_rgba),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_RGBATOUVROW_NEON
+
+#ifdef HAS_RGB24TOUVROW_NEON
+void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
+                       uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_rgb24
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RGB24 pixels.
+    MEMACCESS(0)
+    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RGB24 pixels.
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RGB24 pixels.
+    MEMACCESS(1)
+    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RGB24 pixels.
+    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q0, q1, q2)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb24),  // %0
+    "+r"(src_stride_rgb24),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_RGB24TOUVROW_NEON
+
+#ifdef HAS_RAWTOUVROW_NEON
+void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
+                     uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_raw
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RAW pixels.
+    MEMACCESS(0)
+    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RAW pixels.
+    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RAW pixels.
+    MEMACCESS(1)
+    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RAW pixels.
+    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
+
+    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
+    "vrshr.u16  q1, q1, #1                     \n"
+    "vrshr.u16  q2, q2, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
+    RGBTOUV(q2, q1, q0)
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_raw),  // %0
+    "+r"(src_stride_raw),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_RAWTOUVROW_NEON
+
+// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+#ifdef HAS_RGB565TOUVROW_NEON
+void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
+                        uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
+    RGB565TOARGB
+    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // next 8 RGB565 pixels.
+    RGB565TOARGB
+    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // load 8 RGB565 pixels.
+    RGB565TOARGB
+    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // next 8 RGB565 pixels.
+    RGB565TOARGB
+    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
+    "vrshr.u16  q5, q5, #1                     \n"
+    "vrshr.u16  q6, q6, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
+    "vmul.s16   q8, q4, q10                    \n"  // B
+    "vmls.s16   q8, q5, q11                    \n"  // G
+    "vmls.s16   q8, q6, q12                    \n"  // R
+    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
+    "vmul.s16   q9, q6, q10                    \n"  // R
+    "vmls.s16   q9, q5, q14                    \n"  // G
+    "vmls.s16   q9, q4, q13                    \n"  // B
+    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
+    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb565),  // %0
+    "+r"(src_stride_rgb565),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_RGB565TOUVROW_NEON
+
+// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+#ifdef HAS_ARGB1555TOUVROW_NEON
+void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
+                        uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
+    RGB555TOARGB
+    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB1555 pixels.
+    RGB555TOARGB
+    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB1555 pixels.
+    RGB555TOARGB
+    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB1555 pixels.
+    RGB555TOARGB
+    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
+    "vrshr.u16  q5, q5, #1                     \n"
+    "vrshr.u16  q6, q6, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
+    "vmul.s16   q8, q4, q10                    \n"  // B
+    "vmls.s16   q8, q5, q11                    \n"  // G
+    "vmls.s16   q8, q6, q12                    \n"  // R
+    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
+    "vmul.s16   q9, q6, q10                    \n"  // R
+    "vmls.s16   q9, q5, q14                    \n"  // G
+    "vmls.s16   q9, q4, q13                    \n"  // B
+    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
+    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb1555),  // %0
+    "+r"(src_stride_argb1555),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_ARGB1555TOUVROW_NEON
+
+// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
+#ifdef HAS_ARGB4444TOUVROW_NEON
+void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
+                          uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "add        %1, %0, %1                     \n"  // src_stride + src_argb
+    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
+    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
+    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
+    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
+    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
+    "vmov.u16   q15, #0x8080                   \n"  // 128.5
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
+    ARGB4444TOARGB
+    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB4444 pixels.
+    ARGB4444TOARGB
+    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB4444 pixels.
+    ARGB4444TOARGB
+    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
+    MEMACCESS(1)
+    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB4444 pixels.
+    ARGB4444TOARGB
+    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
+    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
+    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
+
+    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
+    "vrshr.u16  q5, q5, #1                     \n"
+    "vrshr.u16  q6, q6, #1                     \n"
+
+    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
+    "vmul.s16   q8, q4, q10                    \n"  // B
+    "vmls.s16   q8, q5, q11                    \n"  // G
+    "vmls.s16   q8, q6, q12                    \n"  // R
+    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
+    "vmul.s16   q9, q6, q10                    \n"  // R
+    "vmls.s16   q9, q5, q14                    \n"  // G
+    "vmls.s16   q9, q4, q13                    \n"  // B
+    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
+    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
+    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
+    MEMACCESS(2)
+    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
+    MEMACCESS(3)
+    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
+    "bgt        1b                             \n"
+  : "+r"(src_argb4444),  // %0
+    "+r"(src_stride_argb4444),  // %1
+    "+r"(dst_u),     // %2
+    "+r"(dst_v),     // %3
+    "+r"(pix)        // %4
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+  );
+}
+#endif  // HAS_ARGB4444TOUVROW_NEON
+
+#ifdef HAS_RGB565TOYROW_NEON
+void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    RGB565TOARGB
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb565),  // %0
+    "+r"(dst_y),       // %1
+    "+r"(pix)          // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+  );
+}
+#endif  // HAS_RGB565TOYROW_NEON
+
+#ifdef HAS_ARGB1555TOYROW_NEON
+void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGB1555TOARGB
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb1555),  // %0
+    "+r"(dst_y),         // %1
+    "+r"(pix)            // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+  );
+}
+#endif  // HAS_ARGB1555TOYROW_NEON
+
+#ifdef HAS_ARGB4444TOYROW_NEON
+void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
+  asm volatile (
+    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
+    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
+    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
+    "vmov.u8    d27, #16                       \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    ARGB4444TOARGB
+    "vmull.u8   q2, d0, d24                    \n"  // B
+    "vmlal.u8   q2, d1, d25                    \n"  // G
+    "vmlal.u8   q2, d2, d26                    \n"  // R
+    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
+    "vqadd.u8   d0, d27                        \n"
+    MEMACCESS(1)
+    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_argb4444),  // %0
+    "+r"(dst_y),         // %1
+    "+r"(pix)            // %2
+  :
+  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
+  );
+}
+#endif  // HAS_ARGB4444TOYROW_NEON
+
+#ifdef HAS_BGRATOYROW_NEON
+void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
+  asm volatile (
+    "movi       v4.8b, #33                     \n"  // R * 0.2578 coefficient
+    "movi       v5.8b, #65                     \n"  // G * 0.5078 coefficient
+    "movi       v6.8b, #13                     \n"  // B * 0.1016 coefficient
+    "movi       v7.8b, #16                     \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 pixels of BGRA.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v16.8h, v1.8b, v4.8b           \n"  // R
+    "umlal      v16.8h, v2.8b, v5.8b           \n"  // G
+    "umlal      v16.8h, v3.8b, v6.8b           \n"  // B
+    "sqrshrun   v0.8b, v16.8h, #7              \n"  // 16 bit to 8 bit Y
+    "uqadd      v0.8b, v0.8b, v7.8b            \n"
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_bgra),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
+  );
+}
+#endif  // HAS_BGRATOYROW_NEON
+
+#ifdef HAS_ABGRTOYROW_NEON
+void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
+  asm volatile (
+    "movi       v4.8b, #33                     \n"  // R * 0.2578 coefficient
+    "movi       v5.8b, #65                     \n"  // G * 0.5078 coefficient
+    "movi       v6.8b, #13                     \n"  // B * 0.1016 coefficient
+    "movi       v7.8b, #16                     \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 pixels of ABGR.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v16.8h, v0.8b, v4.8b           \n"  // R
+    "umlal      v16.8h, v1.8b, v5.8b           \n"  // G
+    "umlal      v16.8h, v2.8b, v6.8b           \n"  // B
+    "sqrshrun   v0.8b, v16.8h, #7              \n"  // 16 bit to 8 bit Y
+    "uqadd      v0.8b, v0.8b, v7.8b            \n"
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_abgr),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
+  );
+}
+#endif  // HAS_ABGRTOYROW_NEON
+
+#ifdef HAS_RGBATOYROW_NEON
+void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
+  asm volatile (
+    "movi       v4.8b, #13                     \n"  // B * 0.1016 coefficient
+    "movi       v5.8b, #65                     \n"  // G * 0.5078 coefficient
+    "movi       v6.8b, #33                     \n"  // R * 0.2578 coefficient
+    "movi       v7.8b, #16                     \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 pixels of RGBA.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v16.8h, v1.8b, v4.8b           \n"  // B
+    "umlal      v16.8h, v2.8b, v5.8b           \n"  // G
+    "umlal      v16.8h, v3.8b, v6.8b           \n"  // R
+    "sqrshrun   v0.8b, v16.8h, #7              \n"  // 16 bit to 8 bit Y
+    "uqadd      v0.8b, v0.8b, v7.8b            \n"
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_rgba),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
+  );
+}
+#endif  // HAS_RGBATOYROW_NEON
+
+#ifdef HAS_RGB24TOYROW_NEON
+void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
+  asm volatile (
+    "movi       v4.8b, #13                     \n"  // B * 0.1016 coefficient
+    "movi       v5.8b, #65                     \n"  // G * 0.5078 coefficient
+    "movi       v6.8b, #33                     \n"  // R * 0.2578 coefficient
+    "movi       v7.8b, #16                     \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld3        {v0.8b-v2.8b}, [%0], #24       \n"  // load 8 pixels of RGB24.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v16.8h, v0.8b, v4.8b           \n"  // B
+    "umlal      v16.8h, v1.8b, v5.8b           \n"  // G
+    "umlal      v16.8h, v2.8b, v6.8b           \n"  // R
+    "sqrshrun   v0.8b, v16.8h, #7              \n"  // 16 bit to 8 bit Y
+    "uqadd      v0.8b, v0.8b, v7.8b            \n"
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_rgb24),  // %0
+    "+r"(dst_y),      // %1
+    "+r"(pix)         // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
+  );
+}
+#endif  // HAS_RGB24TOYROW_NEON
+
+#ifdef HAS_RAWTOYROW_NEON
+void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
+  asm volatile (
+    "movi       v4.8b, #33                     \n"  // R * 0.2578 coefficient
+    "movi       v5.8b, #65                     \n"  // G * 0.5078 coefficient
+    "movi       v6.8b, #13                     \n"  // B * 0.1016 coefficient
+    "movi       v7.8b, #16                     \n"  // Add 16 constant
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld3        {v0.8b-v2.8b}, [%0], #24       \n"  // load 8 pixels of RAW.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v16.8h, v0.8b, v4.8b           \n"  // B
+    "umlal      v16.8h, v1.8b, v5.8b           \n"  // G
+    "umlal      v16.8h, v2.8b, v6.8b           \n"  // R
+    "sqrshrun   v0.8b, v16.8h, #7              \n"  // 16 bit to 8 bit Y
+    "uqadd      v0.8b, v0.8b, v7.8b            \n"
+    MEMACCESS(1)
+    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
+    "bgt        1b                             \n"
+  : "+r"(src_raw),  // %0
+    "+r"(dst_y),    // %1
+    "+r"(pix)       // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
+  );
+}
+#endif  // HAS_RAWTOYROW_NEON
+
+// Bilinear filter 16x2 -> 16x1
+#ifdef HAS_INTERPOLATEROW_NEON
+void InterpolateRow_NEON(uint8* dst_ptr,
+                         const uint8* src_ptr, ptrdiff_t src_stride,
+                         int dst_width, int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  const uint8* src_ptr1 = src_ptr + src_stride;
+  asm volatile (
+    "cmp        %4, #0                         \n"
+    "beq        100f                           \n"
+    "cmp        %4, #64                        \n"
+    "beq        75f                            \n"
+    "cmp        %4, #128                       \n"
+    "beq        50f                            \n"
+    "cmp        %4, #192                       \n"
+    "beq        25f                            \n"
+
+    "dup        v5.16b, %w4                    \n"
+    "dup        v4.16b, %w5                    \n"
+    // General purpose row blend.
+  "1:                                          \n"
+    MEMACCESS(1)
+    "ld1        {v0.16b}, [%1], #16            \n"
+    MEMACCESS(2)
+    "ld1        {v1.16b}, [%2], #16            \n"
+    "subs       %3, %3, #16                    \n"
+    "umull      v2.8h, v0.8b,  v4.8b           \n"
+    "umull2     v3.8h, v0.16b, v4.16b          \n"
+    "umlal      v2.8h, v1.8b,  v5.8b           \n"
+    "umlal2     v3.8h, v1.16b, v5.16b          \n"
+    "rshrn      v0.8b,  v2.8h, #8              \n"
+    "rshrn2     v0.16b, v3.8h, #8              \n"
+    MEMACCESS(0)
+    "st1        {v0.16b}, [%0], #16            \n"
+    "bgt        1b                             \n"
+    "b          99f                            \n"
+
+    // Blend 25 / 75.
+  "25:                                         \n"
+    MEMACCESS(1)
+    "ld1        {v0.16b}, [%1], #16            \n"
+    MEMACCESS(2)
+    "ld1        {v1.16b}, [%2], #16            \n"
+    "subs       %3, %3, #16                    \n"
+    "urhadd     v0.16b, v0.16b, v1.16b         \n"
+    "urhadd     v0.16b, v0.16b, v1.16b         \n"
+    MEMACCESS(0)
+    "st1        {v0.16b}, [%0], #16            \n"
+    "bgt        25b                            \n"
+    "b          99f                            \n"
+
+    // Blend 50 / 50.
+  "50:                                         \n"
+    MEMACCESS(1)
+    "ld1        {v0.16b}, [%1], #16            \n"
+    MEMACCESS(2)
+    "ld1        {v1.16b}, [%2], #16            \n"
+    "subs       %3, %3, #16                    \n"
+    "urhadd     v0.16b, v0.16b, v1.16b         \n"
+    MEMACCESS(0)
+    "st1        {v0.16b}, [%0], #16            \n"
+    "bgt        50b                            \n"
+    "b          99f                            \n"
+
+    // Blend 75 / 25.
+  "75:                                         \n"
+    MEMACCESS(1)
+    "ld1        {v1.16b}, [%1], #16            \n"
+    MEMACCESS(2)
+    "ld1        {v0.16b}, [%2], #16            \n"
+    "subs       %3, %3, #16                    \n"
+    "urhadd     v0.16b, v0.16b, v1.16b         \n"
+    "urhadd     v0.16b, v0.16b, v1.16b         \n"
+    MEMACCESS(0)
+    "st1        {v0.16b}, [%0], #16            \n"
+    "bgt        75b                            \n"
+    "b          99f                            \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+  "100:                                        \n"
+    MEMACCESS(1)
+    "ld1        {v0.16b}, [%1], #16            \n"
+    "subs       %3, %3, #16                    \n"
+    MEMACCESS(0)
+    "st1        {v0.16b}, [%0], #16            \n"
+    "bgt        100b                           \n"
+
+  "99:                                         \n"
+  : "+r"(dst_ptr),          // %0
+    "+r"(src_ptr),          // %1
+    "+r"(src_ptr1),         // %2
+    "+r"(dst_width),        // %3
+    "+r"(y1_fraction),      // %4
+    "+r"(y0_fraction)       // %5
+  :
+  : "cc", "memory", "v0", "v1", "v3", "v4", "v5"
+  );
+}
+#endif  // HAS_INTERPOLATEROW_NEON
+
+// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
+#ifdef HAS_ARGBBLENDROW_NEON
+void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
+                       uint8* dst_argb, int width) {
+  asm volatile (
+    "subs       %3, %3, #8                     \n"
+    "blt        89f                            \n"
+    // Blend 8 pixels.
+  "8:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 pixels of ARGB0.
+    MEMACCESS(1)
+    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load 8 pixels of ARGB1.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "umull      v16.8h, v4.8b, v3.8b           \n"  // db * a
+    "umull      v17.8h, v5.8b, v3.8b           \n"  // dg * a
+    "umull      v18.8h, v6.8b, v3.8b           \n"  // dr * a
+    "uqrshrn    v16.8b, v16.8h, #8             \n"  // db >>= 8
+    "uqrshrn    v17.8b, v17.8h, #8             \n"  // dg >>= 8
+    "uqrshrn    v18.8b, v18.8h, #8             \n"  // dr >>= 8
+    "uqsub      v4.8b, v4.8b, v16.8b           \n"  // db - (db * a / 256)
+    "uqsub      v5.8b, v5.8b, v17.8b           \n"  // dg - (dg * a / 256)
+    "uqsub      v6.8b, v6.8b, v18.8b           \n"  // dr - (dr * a / 256)
+    "uqadd      v0.8b, v0.8b, v4.8b            \n"  // + sb
+    "uqadd      v1.8b, v1.8b, v5.8b            \n"  // + sg
+    "uqadd      v2.8b, v2.8b, v6.8b            \n"  // + sr
+    "movi       v3.8b, #255                    \n"  // a = 255
+    MEMACCESS(2)
+    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 pixels of ARGB.
+    "bge        8b                             \n"
+
+  "89:                                         \n"
+    "adds       %3, %3, #8-1                   \n"
+    "blt        99f                            \n"
+
+    // Blend 1 pixels.
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.b-v3.b}[0], [%0], #4       \n"  // load 1 pixel ARGB0.
+    MEMACCESS(1)
+    "ld4        {v4.b-v7.b}[0], [%1], #4       \n"  // load 1 pixel ARGB1.
+    "subs       %3, %3, #1                     \n"  // 1 processed per loop.
+    "umull      v16.8h, v4.8b, v3.8b           \n"  // db * a
+    "umull      v17.8h, v5.8b, v3.8b           \n"  // dg * a
+    "umull      v18.8h, v6.8b, v3.8b           \n"  // dr * a
+    "uqrshrn    v16.8b, v16.8h, #8             \n"  // db >>= 8
+    "uqrshrn    v17.8b, v17.8h, #8             \n"  // dg >>= 8
+    "uqrshrn    v18.8b, v18.8h, #8             \n"  // dr >>= 8
+    "uqsub      v4.8b, v4.8b, v16.8b           \n"  // db - (db * a / 256)
+    "uqsub      v5.8b, v5.8b, v17.8b           \n"  // dg - (dg * a / 256)
+    "uqsub      v6.8b, v6.8b, v18.8b           \n"  // dr - (dr * a / 256)
+    "uqadd      v0.8b, v0.8b, v4.8b            \n"  // + sb
+    "uqadd      v1.8b, v1.8b, v5.8b            \n"  // + sg
+    "uqadd      v2.8b, v2.8b, v6.8b            \n"  // + sr
+    "movi       v3.8b, #255                    \n"  // a = 255
+    MEMACCESS(2)
+    "st4        {v0.b-v3.b}[0], [%2], #4       \n"  // store 1 pixel.
+    "bge        1b                             \n"
+
+  "99:                                         \n"
+
+  : "+r"(src_argb0),    // %0
+    "+r"(src_argb1),    // %1
+    "+r"(dst_argb),     // %2
+    "+r"(width)         // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+    "v16", "v17", "v18"
+  );
+}
+#endif  // HAS_ARGBBLENDROW_NEON
+
+// Attenuate 8 pixels at a time.
+#ifdef HAS_ARGBATTENUATEROW_NEON
+void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
+  asm volatile (
+    // Attenuate 8 pixels.
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v4.8h, v0.8b, v3.8b            \n"  // b * a
+    "umull      v5.8h, v1.8b, v3.8b            \n"  // g * a
+    "umull      v6.8h, v2.8b, v3.8b            \n"  // r * a
+    "uqrshrn    v0.8b, v4.8h, #8               \n"  // b >>= 8
+    "uqrshrn    v1.8b, v5.8h, #8               \n"  // g >>= 8
+    "uqrshrn    v2.8b, v6.8h, #8               \n"  // r >>= 8
+    MEMACCESS(1)
+    "st4        {v0.8b-v3.8b}, [%1], #32       \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_argb),   // %1
+    "+r"(width)       // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
+  );
+}
+#endif  // HAS_ARGBATTENUATEROW_NEON
+
+// Quantize 8 ARGB pixels (32 bytes).
+// dst = (dst * scale >> 16) * interval_size + interval_offset;
+#ifdef HAS_ARGBQUANTIZEROW_NEON
+void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
+                          int interval_offset, int width) {
+  asm volatile (
+    "dup        v4.8h, %w2                     \n"
+    "ushr       v4.8h, v4.8h, #1               \n"  // scale >>= 1
+    "dup        v5.8h, %w3                     \n"  // interval multiply.
+    "dup        v6.8h, %w4                     \n"  // interval add
+
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0]            \n"  // load 8 pixels of ARGB.
+    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
+    "uxtl       v0.8h, v0.8b                   \n"  // b (0 .. 255)
+    "uxtl       v1.8h, v1.8b                   \n"
+    "uxtl       v2.8h, v2.8b                   \n"
+    "sqdmulh    v0.8h, v0.8h, v4.8h            \n"  // b * scale
+    "sqdmulh    v1.8h, v1.8h, v4.8h            \n"  // g
+    "sqdmulh    v2.8h, v2.8h, v4.8h            \n"  // r
+    "mul        v0.8h, v0.8h, v5.8h            \n"  // b * interval_size
+    "mul        v1.8h, v1.8h, v5.8h            \n"  // g
+    "mul        v2.8h, v2.8h, v5.8h            \n"  // r
+    "add        v0.8h, v0.8h, v6.8h            \n"  // b + interval_offset
+    "add        v1.8h, v1.8h, v6.8h            \n"  // g
+    "add        v2.8h, v2.8h, v6.8h            \n"  // r
+    "uqxtn      v0.8b, v0.8h                   \n"
+    "uqxtn      v1.8b, v1.8h                   \n"
+    "uqxtn      v2.8b, v2.8h                   \n"
+    MEMACCESS(0)
+    "st4        {v0.8b-v3.8b}, [%0], #32       \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(dst_argb),       // %0
+    "+r"(width)           // %1
+  : "r"(scale),           // %2
+    "r"(interval_size),   // %3
+    "r"(interval_offset)  // %4
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
+  );
+}
+#endif  // HAS_ARGBQUANTIZEROW_NEON
+
+// Shade 8 pixels at a time by specified value.
+// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
+// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
+#ifdef HAS_ARGBSHADEROW_NEON
+void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
+                       uint32 value) {
+  asm volatile (
+    "dup        v0.4s, %w3                     \n"  // duplicate scale value.
+    "zip1       v0.8b, v0.8b, v0.8b            \n"  // v0.8b aarrggbb.
+    "ushr       v0.8h, v0.8h, #1               \n"  // scale / 2.
+
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v4.8b-v7.8b}, [%0], #32       \n"  // load 8 pixels of ARGB.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "uxtl       v4.8h, v4.8b                   \n"  // b (0 .. 255)
+    "uxtl       v5.8h, v5.8b                   \n"
+    "uxtl       v6.8h, v6.8b                   \n"
+    "uxtl       v7.8h, v7.8b                   \n"
+    "sqrdmulh   v4.8h, v4.8h, v0.h[0]          \n"  // b * scale * 2
+    "sqrdmulh   v5.8h, v5.8h, v0.h[1]          \n"  // g
+    "sqrdmulh   v6.8h, v6.8h, v0.h[2]          \n"  // r
+    "sqrdmulh   v7.8h, v7.8h, v0.h[3]          \n"  // a
+    "uqxtn      v4.8b, v4.8h                   \n"
+    "uqxtn      v5.8b, v5.8h                   \n"
+    "uqxtn      v6.8b, v6.8h                   \n"
+    "uqxtn      v7.8b, v7.8h                   \n"
+    MEMACCESS(1)
+    "st4        {v4.8b-v7.8b}, [%1], #32       \n"  // store 8 pixels of ARGB.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),       // %0
+    "+r"(dst_argb),       // %1
+    "+r"(width)           // %2
+  : "r"(value)            // %3
+  : "cc", "memory", "v0", "v4", "v5", "v6", "v7"
+  );
+}
+#endif  // HAS_ARGBSHADEROW_NEON
+
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
+// Similar to ARGBToYJ but stores ARGB.
+// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
+#ifdef HAS_ARGBGRAYROW_NEON
+void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
+  asm volatile (
+    "movi       v24.8b, #15                    \n"  // B * 0.11400 coefficient
+    "movi       v25.8b, #75                    \n"  // G * 0.58700 coefficient
+    "movi       v26.8b, #38                    \n"  // R * 0.29900 coefficient
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "umull      v4.8h, v0.8b, v24.8b           \n"  // B
+    "umlal      v4.8h, v1.8b, v25.8b           \n"  // G
+    "umlal      v4.8h, v2.8b, v26.8b           \n"  // R
+    "sqrshrun   v0.8b, v4.8h, #7               \n"  // 15 bit to 8 bit B
+    "mov        v1.8b, v0.8b                   \n"  // G
+    "mov        v2.8b, v0.8b                   \n"  // R
+    MEMACCESS(1)
+    "st4        {v0.8b-v3.8b}, [%1], #32       \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(width)      // %2
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26"
+  );
+}
+#endif  // HAS_ARGBGRAYROW_NEON
+
+// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
+//    b = (r * 35 + g * 68 + b * 17) >> 7
+//    g = (r * 45 + g * 88 + b * 22) >> 7
+//    r = (r * 50 + g * 98 + b * 24) >> 7
+
+#ifdef HAS_ARGBSEPIAROW_NEON
+void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
+  asm volatile (
+    "movi       v20.8b, #17                    \n"  // BB coefficient
+    "movi       v21.8b, #68                    \n"  // BG coefficient
+    "movi       v22.8b, #35                    \n"  // BR coefficient
+    "movi       v24.8b, #22                    \n"  // GB coefficient
+    "movi       v25.8b, #88                    \n"  // GG coefficient
+    "movi       v26.8b, #45                    \n"  // GR coefficient
+    "movi       v28.8b, #24                    \n"  // BB coefficient
+    "movi       v29.8b, #98                    \n"  // BG coefficient
+    "movi       v30.8b, #50                    \n"  // BR coefficient
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0]            \n"  // load 8 ARGB pixels.
+    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
+    "umull      v4.8h, v0.8b, v20.8b           \n"  // B to Sepia B
+    "umlal      v4.8h, v1.8b, v21.8b           \n"  // G
+    "umlal      v4.8h, v2.8b, v22.8b           \n"  // R
+    "umull      v5.8h, v0.8b, v24.8b           \n"  // B to Sepia G
+    "umlal      v5.8h, v1.8b, v25.8b           \n"  // G
+    "umlal      v5.8h, v2.8b, v26.8b           \n"  // R
+    "umull      v6.8h, v0.8b, v28.8b           \n"  // B to Sepia R
+    "umlal      v6.8h, v1.8b, v29.8b           \n"  // G
+    "umlal      v6.8h, v2.8b, v30.8b           \n"  // R
+    "uqshrn     v0.8b, v4.8h, #7               \n"  // 16 bit to 8 bit B
+    "uqshrn     v1.8b, v5.8h, #7               \n"  // 16 bit to 8 bit G
+    "uqshrn     v2.8b, v6.8h, #7               \n"  // 16 bit to 8 bit R
+    MEMACCESS(0)
+    "st4        {v0.8b-v3.8b}, [%0], #32       \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(dst_argb),  // %0
+    "+r"(width)      // %1
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+    "v20", "v21", "v22", "v24", "v25", "v26", "v28", "v29", "v30"
+  );
+}
+#endif  // HAS_ARGBSEPIAROW_NEON
+
+// Tranform 8 ARGB pixels (32 bytes) with color matrix.
+// TODO(fbarchard): Was same as Sepia except matrix is provided.  This function
+// needs to saturate.  Consider doing a non-saturating version.
+#ifdef HAS_ARGBCOLORMATRIXROW_NEON
+void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
+                             const int8* matrix_argb, int width) {
+  asm volatile (
+    MEMACCESS(3)
+    "ld1        {v2.16b}, [%3]                 \n"  // load 3 ARGB vectors.
+    "sxtl       v0.8h, v2.8b                   \n"  // B,G coefficients s16.
+    "sxtl2      v1.8h, v2.16b                  \n"  // R,A coefficients s16.
+
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v16.8b-v19.8b}, [%0], #32      \n"  // load 8 ARGB pixels.
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
+    "uxtl       v16.8h, v16.8b                 \n"  // b (0 .. 255) 16 bit
+    "uxtl       v17.8h, v17.8b                 \n"  // g
+    "uxtl       v18.8h, v18.8b                 \n"  // r
+    "uxtl       v19.8h, v19.8b                 \n"  // a
+    "mul        v22.8h, v16.8h, v0.h[0]        \n"  // B = B * Matrix B
+    "mul        v23.8h, v16.8h, v0.h[4]        \n"  // G = B * Matrix G
+    "mul        v24.8h, v16.8h, v1.h[0]        \n"  // R = B * Matrix R
+    "mul        v25.8h, v16.8h, v1.h[4]        \n"  // A = B * Matrix A
+    "mul        v4.8h, v17.8h, v0.h[1]         \n"  // B += G * Matrix B
+    "mul        v5.8h, v17.8h, v0.h[5]         \n"  // G += G * Matrix G
+    "mul        v6.8h, v17.8h, v1.h[1]         \n"  // R += G * Matrix R
+    "mul        v7.8h, v17.8h, v1.h[5]         \n"  // A += G * Matrix A
+    "sqadd      v22.8h, v22.8h, v4.8h          \n"  // Accumulate B
+    "sqadd      v23.8h, v23.8h, v5.8h          \n"  // Accumulate G
+    "sqadd      v24.8h, v24.8h, v6.8h          \n"  // Accumulate R
+    "sqadd      v25.8h, v25.8h, v7.8h          \n"  // Accumulate A
+    "mul        v4.8h, v18.8h, v0.h[2]         \n"  // B += R * Matrix B
+    "mul        v5.8h, v18.8h, v0.h[6]         \n"  // G += R * Matrix G
+    "mul        v6.8h, v18.8h, v1.h[2]         \n"  // R += R * Matrix R
+    "mul        v7.8h, v18.8h, v1.h[6]         \n"  // A += R * Matrix A
+    "sqadd      v22.8h, v22.8h, v4.8h          \n"  // Accumulate B
+    "sqadd      v23.8h, v23.8h, v5.8h          \n"  // Accumulate G
+    "sqadd      v24.8h, v24.8h, v6.8h          \n"  // Accumulate R
+    "sqadd      v25.8h, v25.8h, v7.8h          \n"  // Accumulate A
+    "mul        v4.8h, v19.8h, v0.h[3]         \n"  // B += A * Matrix B
+    "mul        v5.8h, v19.8h, v0.h[7]         \n"  // G += A * Matrix G
+    "mul        v6.8h, v19.8h, v1.h[3]         \n"  // R += A * Matrix R
+    "mul        v7.8h, v19.8h, v1.h[7]         \n"  // A += A * Matrix A
+    "sqadd      v22.8h, v22.8h, v4.8h          \n"  // Accumulate B
+    "sqadd      v23.8h, v23.8h, v5.8h          \n"  // Accumulate G
+    "sqadd      v24.8h, v24.8h, v6.8h          \n"  // Accumulate R
+    "sqadd      v25.8h, v25.8h, v7.8h          \n"  // Accumulate A
+    "sqshrun    v16.8b, v22.8h, #6             \n"  // 16 bit to 8 bit B
+    "sqshrun    v17.8b, v23.8h, #6             \n"  // 16 bit to 8 bit G
+    "sqshrun    v18.8b, v24.8h, #6             \n"  // 16 bit to 8 bit R
+    "sqshrun    v19.8b, v25.8h, #6             \n"  // 16 bit to 8 bit A
+    MEMACCESS(1)
+    "st4        {v16.8b-v19.8b}, [%1], #32     \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_argb),   // %1
+    "+r"(width)       // %2
+  : "r"(matrix_argb)  // %3
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+    "v18", "v19", "v22", "v23", "v24", "v25"
+  );
+}
+#endif  // HAS_ARGBCOLORMATRIXROW_NEON
+
+// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
+// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
+#ifdef HAS_ARGBMULTIPLYROW_NEON
+void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  asm volatile (
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
+    MEMACCESS(1)
+    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load 8 more ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "umull      v0.8h, v0.8b, v4.8b            \n"  // multiply B
+    "umull      v1.8h, v1.8b, v5.8b            \n"  // multiply G
+    "umull      v2.8h, v2.8b, v6.8b            \n"  // multiply R
+    "umull      v3.8h, v3.8b, v7.8b            \n"  // multiply A
+    "rshrn      v0.8b, v0.8h, #8               \n"  // 16 bit to 8 bit B
+    "rshrn      v1.8b, v1.8h, #8               \n"  // 16 bit to 8 bit G
+    "rshrn      v2.8b, v2.8h, #8               \n"  // 16 bit to 8 bit R
+    "rshrn      v3.8b, v3.8h, #8               \n"  // 16 bit to 8 bit A
+    MEMACCESS(2)
+    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
+  );
+}
+#endif  // HAS_ARGBMULTIPLYROW_NEON
+
+// Add 2 rows of ARGB pixels together, 8 pixels at a time.
+#ifdef HAS_ARGBADDROW_NEON
+void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
+                     uint8* dst_argb, int width) {
+  asm volatile (
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
+    MEMACCESS(1)
+    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load 8 more ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "uqadd      v0.8b, v0.8b, v4.8b            \n"
+    "uqadd      v1.8b, v1.8b, v5.8b            \n"
+    "uqadd      v2.8b, v2.8b, v6.8b            \n"
+    "uqadd      v3.8b, v3.8b, v7.8b            \n"
+    MEMACCESS(2)
+    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
+  );
+}
+#endif  // HAS_ARGBADDROW_NEON
+
+// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
+#ifdef HAS_ARGBSUBTRACTROW_NEON
+void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  asm volatile (
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
+    MEMACCESS(1)
+    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load 8 more ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "uqsub      v0.8b, v0.8b, v4.8b            \n"
+    "uqsub      v1.8b, v1.8b, v5.8b            \n"
+    "uqsub      v2.8b, v2.8b, v6.8b            \n"
+    "uqsub      v3.8b, v3.8b, v7.8b            \n"
+    MEMACCESS(2)
+    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
+  );
+}
+#endif  // HAS_ARGBSUBTRACTROW_NEON
+
+// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
+// A = 255
+// R = Sobel
+// G = Sobel
+// B = Sobel
+#ifdef HAS_SOBELROW_NEON
+void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width) {
+  asm volatile (
+    "movi       v3.8b, #255                    \n"  // alpha
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.8b}, [%0], #8              \n"  // load 8 sobelx.
+    MEMACCESS(1)
+    "ld1        {v1.8b}, [%1], #8              \n"  // load 8 sobely.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "uqadd      v0.8b, v0.8b, v1.8b            \n"  // add
+    "mov        v1.8b, v0.8b                   \n"
+    "mov        v2.8b, v0.8b                   \n"
+    MEMACCESS(2)
+    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_argb),    // %2
+    "+r"(width)        // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3"
+  );
+}
+#endif  // HAS_SOBELROW_NEON
+
+// Adds Sobel X and Sobel Y and stores Sobel into plane.
+#ifdef HAS_SOBELTOPLANEROW_NEON
+void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width) {
+  asm volatile (
+    // 16 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b}, [%0], #16            \n"  // load 16 sobelx.
+    MEMACCESS(1)
+    "ld1        {v1.16b}, [%1], #16            \n"  // load 16 sobely.
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
+    "uqadd      v0.16b, v0.16b, v1.16b         \n"  // add
+    MEMACCESS(2)
+    "st1        {v0.16b}, [%2], #16            \n"  // store 16 pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_y),       // %2
+    "+r"(width)        // %3
+  :
+  : "cc", "memory", "v0", "v1"
+  );
+}
+#endif  // HAS_SOBELTOPLANEROW_NEON
+
+// Mixes Sobel X, Sobel Y and Sobel into ARGB.
+// A = 255
+// R = Sobel X
+// G = Sobel
+// B = Sobel Y
+#ifdef HAS_SOBELXYROW_NEON
+void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width) {
+  asm volatile (
+    "movi       v3.8b, #255                    \n"  // alpha
+    // 8 pixel loop.
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v2.8b}, [%0], #8              \n"  // load 8 sobelx.
+    MEMACCESS(1)
+    "ld1        {v0.8b}, [%1], #8              \n"  // load 8 sobely.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "uqadd      v1.8b, v0.8b, v2.8b            \n"  // add
+    MEMACCESS(2)
+    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_argb),    // %2
+    "+r"(width)        // %3
+  :
+  : "cc", "memory", "v0", "v1", "v2", "v3"
+  );
+}
+#endif  // HAS_SOBELXYROW_NEON
+
+// SobelX as a matrix is
+// -1  0  1
+// -2  0  2
+// -1  0  1
+#ifdef HAS_SOBELXROW_NEON
+void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
+                    const uint8* src_y2, uint8* dst_sobelx, int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.8b}, [%0],%5               \n"  // top
+    MEMACCESS(0)
+    "ld1        {v1.8b}, [%0],%6               \n"
+    "usubl      v0.8h, v0.8b, v1.8b            \n"
+    MEMACCESS(1)
+    "ld1        {v2.8b}, [%1],%5               \n"  // center * 2
+    MEMACCESS(1)
+    "ld1        {v3.8b}, [%1],%6               \n"
+    "usubl      v1.8h, v2.8b, v3.8b            \n"
+    "add        v0.8h, v0.8h, v1.8h            \n"
+    "add        v0.8h, v0.8h, v1.8h            \n"
+    MEMACCESS(2)
+    "ld1        {v2.8b}, [%2],%5               \n"  // bottom
+    MEMACCESS(2)
+    "ld1        {v3.8b}, [%2],%6               \n"
+    "subs       %4, %4, #8                     \n"  // 8 pixels
+    "usubl      v1.8h, v2.8b, v3.8b            \n"
+    "add        v0.8h, v0.8h, v1.8h            \n"
+    "abs        v0.8h, v0.8h                   \n"
+    "uqxtn      v0.8b, v0.8h                   \n"
+    MEMACCESS(3)
+    "st1        {v0.8b}, [%3], #8              \n"  // store 8 sobelx
+    "bgt        1b                             \n"
+  : "+r"(src_y0),      // %0
+    "+r"(src_y1),      // %1
+    "+r"(src_y2),      // %2
+    "+r"(dst_sobelx),  // %3
+    "+r"(width)        // %4
+  : "r"(2),            // %5
+    "r"(6)             // %6
+  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
+  );
+}
+#endif  // HAS_SOBELXROW_NEON
+
+// SobelY as a matrix is
+// -1 -2 -1
+//  0  0  0
+//  1  2  1
+#ifdef HAS_SOBELYROW_NEON
+void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
+                    uint8* dst_sobely, int width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.8b}, [%0],%4               \n"  // left
+    MEMACCESS(1)
+    "ld1        {v1.8b}, [%1],%4               \n"
+    "usubl      v0.8h, v0.8b, v1.8b            \n"
+    MEMACCESS(0)
+    "ld1        {v2.8b}, [%0],%4               \n"  // center * 2
+    MEMACCESS(1)
+    "ld1        {v3.8b}, [%1],%4               \n"
+    "usubl      v1.8h, v2.8b, v3.8b            \n"
+    "add        v0.8h, v0.8h, v1.8h            \n"
+    "add        v0.8h, v0.8h, v1.8h            \n"
+    MEMACCESS(0)
+    "ld1        {v2.8b}, [%0],%5               \n"  // right
+    MEMACCESS(1)
+    "ld1        {v3.8b}, [%1],%5               \n"
+    "subs       %3, %3, #8                     \n"  // 8 pixels
+    "usubl      v1.8h, v2.8b, v3.8b            \n"
+    "add        v0.8h, v0.8h, v1.8h            \n"
+    "abs        v0.8h, v0.8h                   \n"
+    "uqxtn      v0.8b, v0.8h                   \n"
+    MEMACCESS(2)
+    "st1        {v0.8b}, [%2], #8              \n"  // store 8 sobely
+    "bgt        1b                             \n"
+  : "+r"(src_y0),      // %0
+    "+r"(src_y1),      // %1
+    "+r"(dst_sobely),  // %2
+    "+r"(width)        // %3
+  : "r"(1),            // %4
+    "r"(6)             // %5
+  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
+  );
+}
+#endif  // HAS_SOBELYROW_NEON
+#endif  // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/row_posix.cc b/src/main/jni/libyuv/source/row_posix.cc
new file mode 100644
index 000000000..106fda568
--- /dev/null
+++ b/src/main/jni/libyuv/source/row_posix.cc
@@ -0,0 +1,6443 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+
+#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
+
+// Constants for ARGB
+static vec8 kARGBToY = {
+  13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
+};
+
+// JPeg full range.
+static vec8 kARGBToYJ = {
+  15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
+};
+#endif  // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
+
+#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
+
+static vec8 kARGBToU = {
+  112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
+};
+
+static vec8 kARGBToUJ = {
+  127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
+};
+
+static vec8 kARGBToV = {
+  -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
+};
+
+static vec8 kARGBToVJ = {
+  -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
+};
+
+// Constants for BGRA
+static vec8 kBGRAToY = {
+  0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
+};
+
+static vec8 kBGRAToU = {
+  0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
+};
+
+static vec8 kBGRAToV = {
+  0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
+};
+
+// Constants for ABGR
+static vec8 kABGRToY = {
+  33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
+};
+
+static vec8 kABGRToU = {
+  -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
+};
+
+static vec8 kABGRToV = {
+  112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
+};
+
+// Constants for RGBA.
+static vec8 kRGBAToY = {
+  0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
+};
+
+static vec8 kRGBAToU = {
+  0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
+};
+
+static vec8 kRGBAToV = {
+  0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
+};
+
+static uvec8 kAddY16 = {
+  16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
+};
+
+static vec16 kAddYJ64 = {
+  64, 64, 64, 64, 64, 64, 64, 64
+};
+
+static uvec8 kAddUV128 = {
+  128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
+  128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
+};
+
+static uvec16 kAddUVJ128 = {
+  0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
+};
+#endif  // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
+
+#ifdef HAS_RGB24TOARGBROW_SSSE3
+
+// Shuffle table for converting RGB24 to ARGB.
+static uvec8 kShuffleMaskRGB24ToARGB = {
+  0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
+};
+
+// Shuffle table for converting RAW to ARGB.
+static uvec8 kShuffleMaskRAWToARGB = {
+  2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
+};
+
+// Shuffle table for converting ARGB to RGB24.
+static uvec8 kShuffleMaskARGBToRGB24 = {
+  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
+};
+
+// Shuffle table for converting ARGB to RAW.
+static uvec8 kShuffleMaskARGBToRAW = {
+  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
+};
+
+// Shuffle table for converting ARGBToRGB24 for I422ToRGB24.  First 8 + next 4
+static uvec8 kShuffleMaskARGBToRGB24_0 = {
+  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
+};
+
+// Shuffle table for converting ARGB to RAW.
+static uvec8 kShuffleMaskARGBToRAW_0 = {
+  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
+};
+#endif  // HAS_RGB24TOARGBROW_SSSE3
+
+#if defined(TESTING) && defined(__x86_64__)
+void TestRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
+  asm volatile (
+    ".p2align  5                               \n"
+    "mov       %%eax,%%eax                     \n"
+    "mov       %%ebx,%%ebx                     \n"
+    "mov       %%ecx,%%ecx                     \n"
+    "mov       %%edx,%%edx                     \n"
+    "mov       %%esi,%%esi                     \n"
+    "mov       %%edi,%%edi                     \n"
+    "mov       %%ebp,%%ebp                     \n"
+    "mov       %%esp,%%esp                     \n"
+    ".p2align  5                               \n"
+    "mov       %%r8d,%%r8d                     \n"
+    "mov       %%r9d,%%r9d                     \n"
+    "mov       %%r10d,%%r10d                   \n"
+    "mov       %%r11d,%%r11d                   \n"
+    "mov       %%r12d,%%r12d                   \n"
+    "mov       %%r13d,%%r13d                   \n"
+    "mov       %%r14d,%%r14d                   \n"
+    "mov       %%r15d,%%r15d                   \n"
+    ".p2align  5                               \n"
+    "lea       (%%rax),%%eax                   \n"
+    "lea       (%%rbx),%%ebx                   \n"
+    "lea       (%%rcx),%%ecx                   \n"
+    "lea       (%%rdx),%%edx                   \n"
+    "lea       (%%rsi),%%esi                   \n"
+    "lea       (%%rdi),%%edi                   \n"
+    "lea       (%%rbp),%%ebp                   \n"
+    "lea       (%%rsp),%%esp                   \n"
+    ".p2align  5                               \n"
+    "lea       (%%r8),%%r8d                    \n"
+    "lea       (%%r9),%%r9d                    \n"
+    "lea       (%%r10),%%r10d                  \n"
+    "lea       (%%r11),%%r11d                  \n"
+    "lea       (%%r12),%%r12d                  \n"
+    "lea       (%%r13),%%r13d                  \n"
+    "lea       (%%r14),%%r14d                  \n"
+    "lea       (%%r15),%%r15d                  \n"
+
+    ".p2align  5                               \n"
+    "lea       0x10(%%rax),%%eax               \n"
+    "lea       0x10(%%rbx),%%ebx               \n"
+    "lea       0x10(%%rcx),%%ecx               \n"
+    "lea       0x10(%%rdx),%%edx               \n"
+    "lea       0x10(%%rsi),%%esi               \n"
+    "lea       0x10(%%rdi),%%edi               \n"
+    "lea       0x10(%%rbp),%%ebp               \n"
+    "lea       0x10(%%rsp),%%esp               \n"
+    ".p2align  5                               \n"
+    "lea       0x10(%%r8),%%r8d                \n"
+    "lea       0x10(%%r9),%%r9d                \n"
+    "lea       0x10(%%r10),%%r10d              \n"
+    "lea       0x10(%%r11),%%r11d              \n"
+    "lea       0x10(%%r12),%%r12d              \n"
+    "lea       0x10(%%r13),%%r13d              \n"
+    "lea       0x10(%%r14),%%r14d              \n"
+    "lea       0x10(%%r15),%%r15d              \n"
+
+    ".p2align  5                               \n"
+    "add       0x10,%%eax                      \n"
+    "add       0x10,%%ebx                      \n"
+    "add       0x10,%%ecx                      \n"
+    "add       0x10,%%edx                      \n"
+    "add       0x10,%%esi                      \n"
+    "add       0x10,%%edi                      \n"
+    "add       0x10,%%ebp                      \n"
+    "add       0x10,%%esp                      \n"
+    ".p2align  5                               \n"
+    "add       0x10,%%r8d                      \n"
+    "add       0x10,%%r9d                      \n"
+    "add       0x10,%%r10d                     \n"
+    "add       0x10,%%r11d                     \n"
+    "add       0x10,%%r12d                     \n"
+    "add       0x10,%%r13d                     \n"
+    "add       0x10,%%r14d                     \n"
+    "add       0x10,%%r15d                     \n"
+
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "movq      " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src_y),     // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+#endif  // TESTING
+
+#ifdef HAS_I400TOARGBROW_SSE2
+void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pslld     $0x18,%%xmm5                    \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movq      " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm0,%%xmm0                   \n"
+    "punpckhwd %%xmm1,%%xmm1                   \n"
+    "por       %%xmm5,%%xmm0                   \n"
+    "por       %%xmm5,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src_y),     // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
+                                  int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pslld     $0x18,%%xmm5                    \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movq      " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm0,%%xmm0                   \n"
+    "punpckhwd %%xmm1,%%xmm1                   \n"
+    "por       %%xmm5,%%xmm0                   \n"
+    "por       %%xmm5,%%xmm1                   \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src_y),     // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_I400TOARGBROW_SSE2
+
+#ifdef HAS_RGB24TOARGBROW_SSSE3
+void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"  // generate mask 0xff000000
+    "pslld     $0x18,%%xmm5                    \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm3   \n"
+    "lea       " MEMLEA(0x30,0) ",%0           \n"
+    "movdqa    %%xmm3,%%xmm2                   \n"
+    "palignr   $0x8,%%xmm1,%%xmm2              \n"
+    "pshufb    %%xmm4,%%xmm2                   \n"
+    "por       %%xmm5,%%xmm2                   \n"
+    "palignr   $0xc,%%xmm0,%%xmm1              \n"
+    "pshufb    %%xmm4,%%xmm0                   \n"
+    "movdqa    %%xmm2," MEMACCESS2(0x20,1) "   \n"
+    "por       %%xmm5,%%xmm0                   \n"
+    "pshufb    %%xmm4,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "por       %%xmm5,%%xmm1                   \n"
+    "palignr   $0x4,%%xmm3,%%xmm3              \n"
+    "pshufb    %%xmm4,%%xmm3                   \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "por       %%xmm5,%%xmm3                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm3," MEMACCESS2(0x30,1) "   \n"
+    "lea       " MEMLEA(0x40,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_rgb24),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  : "m"(kShuffleMaskRGB24ToARGB)  // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"  // generate mask 0xff000000
+    "pslld     $0x18,%%xmm5                    \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm3   \n"
+    "lea       " MEMLEA(0x30,0) ",%0           \n"
+    "movdqa    %%xmm3,%%xmm2                   \n"
+    "palignr   $0x8,%%xmm1,%%xmm2              \n"
+    "pshufb    %%xmm4,%%xmm2                   \n"
+    "por       %%xmm5,%%xmm2                   \n"
+    "palignr   $0xc,%%xmm0,%%xmm1              \n"
+    "pshufb    %%xmm4,%%xmm0                   \n"
+    "movdqa    %%xmm2," MEMACCESS2(0x20,1) "   \n"
+    "por       %%xmm5,%%xmm0                   \n"
+    "pshufb    %%xmm4,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "por       %%xmm5,%%xmm1                   \n"
+    "palignr   $0x4,%%xmm3,%%xmm3              \n"
+    "pshufb    %%xmm4,%%xmm3                   \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "por       %%xmm5,%%xmm3                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm3," MEMACCESS2(0x30,1) "   \n"
+    "lea       " MEMLEA(0x40,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_raw),   // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  : "m"(kShuffleMaskRAWToARGB)  // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
+  asm volatile (
+    "mov       $0x1080108,%%eax                \n"
+    "movd      %%eax,%%xmm5                    \n"
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+    "mov       $0x20802080,%%eax               \n"
+    "movd      %%eax,%%xmm6                    \n"
+    "pshufd    $0x0,%%xmm6,%%xmm6              \n"
+    "pcmpeqb   %%xmm3,%%xmm3                   \n"
+    "psllw     $0xb,%%xmm3                     \n"
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "psllw     $0xa,%%xmm4                     \n"
+    "psrlw     $0x5,%%xmm4                     \n"
+    "pcmpeqb   %%xmm7,%%xmm7                   \n"
+    "psllw     $0x8,%%xmm7                     \n"
+    "sub       %0,%1                           \n"
+    "sub       %0,%1                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "pand      %%xmm3,%%xmm1                   \n"
+    "psllw     $0xb,%%xmm2                     \n"
+    "pmulhuw   %%xmm5,%%xmm1                   \n"
+    "pmulhuw   %%xmm5,%%xmm2                   \n"
+    "psllw     $0x8,%%xmm1                     \n"
+    "por       %%xmm2,%%xmm1                   \n"
+    "pand      %%xmm4,%%xmm0                   \n"
+    "pmulhuw   %%xmm6,%%xmm0                   \n"
+    "por       %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "punpcklbw %%xmm0,%%xmm1                   \n"
+    "punpckhbw %%xmm0,%%xmm2                   \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm1,0x00,1,0,2)           //  movdqa  %%xmm1,(%1,%0,2)
+    MEMOPMEM(movdqa,xmm2,0x10,1,0,2)           //  movdqa  %%xmm2,0x10(%1,%0,2)
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(pix)   // %2
+  :
+  : "memory", "cc", "eax"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
+  asm volatile (
+    "mov       $0x1080108,%%eax                \n"
+    "movd      %%eax,%%xmm5                    \n"
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+    "mov       $0x42004200,%%eax               \n"
+    "movd      %%eax,%%xmm6                    \n"
+    "pshufd    $0x0,%%xmm6,%%xmm6              \n"
+    "pcmpeqb   %%xmm3,%%xmm3                   \n"
+    "psllw     $0xb,%%xmm3                     \n"
+    "movdqa    %%xmm3,%%xmm4                   \n"
+    "psrlw     $0x6,%%xmm4                     \n"
+    "pcmpeqb   %%xmm7,%%xmm7                   \n"
+    "psllw     $0x8,%%xmm7                     \n"
+    "sub       %0,%1                           \n"
+    "sub       %0,%1                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "psllw     $0x1,%%xmm1                     \n"
+    "psllw     $0xb,%%xmm2                     \n"
+    "pand      %%xmm3,%%xmm1                   \n"
+    "pmulhuw   %%xmm5,%%xmm2                   \n"
+    "pmulhuw   %%xmm5,%%xmm1                   \n"
+    "psllw     $0x8,%%xmm1                     \n"
+    "por       %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "pand      %%xmm4,%%xmm0                   \n"
+    "psraw     $0x8,%%xmm2                     \n"
+    "pmulhuw   %%xmm6,%%xmm0                   \n"
+    "pand      %%xmm7,%%xmm2                   \n"
+    "por       %%xmm2,%%xmm0                   \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "punpcklbw %%xmm0,%%xmm1                   \n"
+    "punpckhbw %%xmm0,%%xmm2                   \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm1,0x00,1,0,2)           //  movdqa  %%xmm1,(%1,%0,2)
+    MEMOPMEM(movdqa,xmm2,0x10,1,0,2)           //  movdqa  %%xmm2,0x10(%1,%0,2)
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(pix)   // %2
+  :
+  : "memory", "cc", "eax"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
+  asm volatile (
+    "mov       $0xf0f0f0f,%%eax                \n"
+    "movd      %%eax,%%xmm4                    \n"
+    "pshufd    $0x0,%%xmm4,%%xmm4              \n"
+    "movdqa    %%xmm4,%%xmm5                   \n"
+    "pslld     $0x4,%%xmm5                     \n"
+    "sub       %0,%1                           \n"
+    "sub       %0,%1                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "pand      %%xmm4,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm3                   \n"
+    "psllw     $0x4,%%xmm1                     \n"
+    "psrlw     $0x4,%%xmm3                     \n"
+    "por       %%xmm1,%%xmm0                   \n"
+    "por       %%xmm3,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm2,%%xmm0                   \n"
+    "punpckhbw %%xmm2,%%xmm1                   \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,2)           //  movdqa  %%xmm0,(%1,%0,2)
+    MEMOPMEM(movdqa,xmm1,0x10,1,0,2)           //  movdqa  %%xmm1,0x10(%1,%0,2)
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(pix)   // %2
+  :
+  : "memory", "cc", "eax"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
+  asm volatile (
+    "movdqa    %3,%%xmm6                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "pshufb    %%xmm6,%%xmm0                   \n"
+    "pshufb    %%xmm6,%%xmm1                   \n"
+    "pshufb    %%xmm6,%%xmm2                   \n"
+    "pshufb    %%xmm6,%%xmm3                   \n"
+    "movdqa    %%xmm1,%%xmm4                   \n"
+    "psrldq    $0x4,%%xmm1                     \n"
+    "pslldq    $0xc,%%xmm4                     \n"
+    "movdqa    %%xmm2,%%xmm5                   \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pslldq    $0x8,%%xmm5                     \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "por       %%xmm5,%%xmm1                   \n"
+    "psrldq    $0x8,%%xmm2                     \n"
+    "pslldq    $0x4,%%xmm3                     \n"
+    "por       %%xmm3,%%xmm2                   \n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "movdqu    %%xmm2," MEMACCESS2(0x20,1) "   \n"
+    "lea       " MEMLEA(0x30,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(pix)   // %2
+  : "m"(kShuffleMaskARGBToRGB24)  // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+
+void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
+  asm volatile (
+    "movdqa    %3,%%xmm6                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "pshufb    %%xmm6,%%xmm0                   \n"
+    "pshufb    %%xmm6,%%xmm1                   \n"
+    "pshufb    %%xmm6,%%xmm2                   \n"
+    "pshufb    %%xmm6,%%xmm3                   \n"
+    "movdqa    %%xmm1,%%xmm4                   \n"
+    "psrldq    $0x4,%%xmm1                     \n"
+    "pslldq    $0xc,%%xmm4                     \n"
+    "movdqa    %%xmm2,%%xmm5                   \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pslldq    $0x8,%%xmm5                     \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "por       %%xmm5,%%xmm1                   \n"
+    "psrldq    $0x8,%%xmm2                     \n"
+    "pslldq    $0x4,%%xmm3                     \n"
+    "por       %%xmm3,%%xmm2                   \n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "movdqu    %%xmm2," MEMACCESS2(0x20,1) "   \n"
+    "lea       " MEMLEA(0x30,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(pix)   // %2
+  : "m"(kShuffleMaskARGBToRAW)  // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+
+void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm3,%%xmm3                   \n"
+    "psrld     $0x1b,%%xmm3                    \n"
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "psrld     $0x1a,%%xmm4                    \n"
+    "pslld     $0x5,%%xmm4                     \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pslld     $0xb,%%xmm5                     \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "pslld     $0x8,%%xmm0                     \n"
+    "psrld     $0x3,%%xmm1                     \n"
+    "psrld     $0x5,%%xmm2                     \n"
+    "psrad     $0x10,%%xmm0                    \n"
+    "pand      %%xmm3,%%xmm1                   \n"
+    "pand      %%xmm4,%%xmm2                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "por       %%xmm2,%%xmm1                   \n"
+    "por       %%xmm1,%%xmm0                   \n"
+    "packssdw  %%xmm0,%%xmm0                   \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x4,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(pix)   // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "psrld     $0x1b,%%xmm4                    \n"
+    "movdqa    %%xmm4,%%xmm5                   \n"
+    "pslld     $0x5,%%xmm5                     \n"
+    "movdqa    %%xmm4,%%xmm6                   \n"
+    "pslld     $0xa,%%xmm6                     \n"
+    "pcmpeqb   %%xmm7,%%xmm7                   \n"
+    "pslld     $0xf,%%xmm7                     \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm3                   \n"
+    "psrad     $0x10,%%xmm0                    \n"
+    "psrld     $0x3,%%xmm1                     \n"
+    "psrld     $0x6,%%xmm2                     \n"
+    "psrld     $0x9,%%xmm3                     \n"
+    "pand      %%xmm7,%%xmm0                   \n"
+    "pand      %%xmm4,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm2                   \n"
+    "pand      %%xmm6,%%xmm3                   \n"
+    "por       %%xmm1,%%xmm0                   \n"
+    "por       %%xmm3,%%xmm2                   \n"
+    "por       %%xmm2,%%xmm0                   \n"
+    "packssdw  %%xmm0,%%xmm0                   \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMACCESS2(0x8,1) ",%1        \n"
+    "sub       $0x4,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(pix)   // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "psllw     $0xc,%%xmm4                     \n"
+    "movdqa    %%xmm4,%%xmm3                   \n"
+    "psrlw     $0x8,%%xmm3                     \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm3,%%xmm0                   \n"
+    "pand      %%xmm4,%%xmm1                   \n"
+    "psrlq     $0x4,%%xmm0                     \n"
+    "psrlq     $0x8,%%xmm1                     \n"
+    "por       %%xmm1,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x4,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(pix)   // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
+#endif
+  );
+}
+#endif  // HAS_RGB24TOARGBROW_SSSE3
+
+#ifdef HAS_ARGBTOYROW_SSSE3
+void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %4,%%xmm5                       \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kARGBToY),   // %3
+    "m"(kAddY16)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %4,%%xmm5                       \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kARGBToY),   // %3
+    "m"(kAddY16)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBTOYROW_SSSE3
+
+#ifdef HAS_ARGBTOYJROW_SSSE3
+void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %3,%%xmm4                       \n"
+    "movdqa    %4,%%xmm5                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "paddw     %%xmm5,%%xmm0                   \n"
+    "paddw     %%xmm5,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kARGBToYJ),  // %3
+    "m"(kAddYJ64)    // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %3,%%xmm4                       \n"
+    "movdqa    %4,%%xmm5                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "paddw     %%xmm5,%%xmm0                   \n"
+    "paddw     %%xmm5,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kARGBToYJ),  // %3
+    "m"(kAddYJ64)    // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBTOYJROW_SSSE3
+
+#ifdef HAS_ARGBTOUVROW_SSSE3
+// TODO(fbarchard): pass xmm constants to single block of assembly.
+// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
+// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
+// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around
+// and considered unsafe.
+void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToU),  // %0
+    "m"(kARGBToV),  // %1
+    "m"(kAddUV128)  // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
+    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
+    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
+    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps    %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_argb)) // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3.
+void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                        uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToUJ),  // %0
+    "m"(kARGBToVJ),  // %1
+    "m"(kAddUVJ128)  // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
+    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
+    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
+    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "paddw     %%xmm5,%%xmm0                   \n"
+    "paddw     %%xmm5,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_argb)) // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToU),         // %0
+    "m"(kARGBToV),         // %1
+    "m"(kAddUV128)         // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm1                   \n"
+    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_argb)) // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                                  uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToUJ),         // %0
+    "m"(kARGBToVJ),         // %1
+    "m"(kAddUVJ128)         // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm1                   \n"
+    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "paddw     %%xmm5,%%xmm0                   \n"
+    "paddw     %%xmm5,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_argb))
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
+                          int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToU),  // %0
+    "m"(kARGBToV),  // %1
+    "m"(kAddUV128)  // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm6                   \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm2                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm2                     \n"
+    "packsswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    "pmaddubsw %%xmm3,%%xmm0                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm2                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm2                     \n"
+    "packsswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,2,1)           //  movdqa  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),        // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6"
+#endif
+  );
+}
+
+void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_u,
+                                    uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToU),  // %0
+    "m"(kARGBToV),  // %1
+    "m"(kAddUV128)  // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm6                   \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm2                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm2                     \n"
+    "packsswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    "pmaddubsw %%xmm3,%%xmm0                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm2                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm2                     \n"
+    "packsswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,2,1)           //  movdqu  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),        // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6"
+#endif
+  );
+}
+
+void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
+                          uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToU),  // %0
+    "m"(kARGBToV),  // %1
+    "m"(kAddUV128)  // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
+                                    uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kARGBToU),  // %0
+    "m"(kARGBToV),  // %1
+    "m"(kAddUV128)  // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %4,%%xmm5                       \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_bgra),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kBGRAToY),   // %3
+    "m"(kAddY16)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %4,%%xmm5                       \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_bgra),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kBGRAToY),   // %3
+    "m"(kAddY16)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kBGRAToU),         // %0
+    "m"(kBGRAToV),         // %1
+    "m"(kAddUV128)         // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
+    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
+    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
+    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_bgra0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_bgra)) // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kBGRAToU),         // %0
+    "m"(kBGRAToV),         // %1
+    "m"(kAddUV128)         // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm1                   \n"
+    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_bgra0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_bgra)) // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %4,%%xmm5                       \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_abgr),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kABGRToY),   // %3
+    "m"(kAddY16)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %4,%%xmm5                       \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_abgr),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kABGRToY),   // %3
+    "m"(kAddY16)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %4,%%xmm5                       \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_rgba),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kRGBAToY),   // %3
+    "m"(kAddY16)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
+  asm volatile (
+    "movdqa    %4,%%xmm5                       \n"
+    "movdqa    %3,%%xmm4                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm4,%%xmm3                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "phaddw    %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm2                     \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_rgba),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  : "m"(kRGBAToY),   // %3
+    "m"(kAddY16)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kABGRToU),         // %0
+    "m"(kABGRToV),         // %1
+    "m"(kAddUV128)         // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
+    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
+    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
+    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_abgr0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_abgr)) // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kABGRToU),         // %0
+    "m"(kABGRToV),         // %1
+    "m"(kAddUV128)         // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm1                   \n"
+    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_abgr0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_abgr)) // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kRGBAToU),         // %0
+    "m"(kRGBAToV),         // %1
+    "m"(kAddUV128)         // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
+    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
+    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
+    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_rgba0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_rgba))
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kRGBAToU),         // %0
+    "m"(kRGBAToV),         // %1
+    "m"(kAddUV128)         // %2
+  );
+  asm volatile (
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm1                   \n"
+    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm7                   \n"
+    "shufps    $0x88,%%xmm6,%%xmm2             \n"
+    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
+    "pavgb     %%xmm7,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm2                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "phaddw    %%xmm2,%%xmm0                   \n"
+    "phaddw    %%xmm6,%%xmm1                   \n"
+    "psraw     $0x8,%%xmm0                     \n"
+    "psraw     $0x8,%%xmm1                     \n"
+    "packsswb  %%xmm1,%%xmm0                   \n"
+    "paddb     %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movlps    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_rgba0),       // %0
+    "+r"(dst_u),           // %1
+    "+r"(dst_v),           // %2
+    "+rm"(width)           // %3
+  : "r"((intptr_t)(src_stride_rgba)) // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_ARGBTOUVROW_SSSE3
+
+#ifdef HAS_I422TOARGBROW_SSSE3
+#define UB 127 /* min(63,(int8)(2.018 * 64)) */
+#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
+#define UR 0
+
+#define VB 0
+#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
+#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
+
+// Bias
+#define BB UB * 128 + VB * 128
+#define BG UG * 128 + VG * 128
+#define BR UR * 128 + VR * 128
+
+#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
+
+struct {
+  vec8 kUVToB;  // 0
+  vec8 kUVToG;  // 16
+  vec8 kUVToR;  // 32
+  vec16 kUVBiasB;  // 48
+  vec16 kUVBiasG;  // 64
+  vec16 kUVBiasR;  // 80
+  vec16 kYSub16;  // 96
+  vec16 kYToRgb;  // 112
+  vec8 kVUToB;  // 128
+  vec8 kVUToG;  // 144
+  vec8 kVUToR;  // 160
+} static SIMD_ALIGNED(kYuvConstants) = {
+  { UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB },
+  { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
+  { UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR },
+  { BB, BB, BB, BB, BB, BB, BB, BB },
+  { BG, BG, BG, BG, BG, BG, BG, BG },
+  { BR, BR, BR, BR, BR, BR, BR, BR },
+  { 16, 16, 16, 16, 16, 16, 16, 16 },
+  { YG, YG, YG, YG, YG, YG, YG, YG },
+  { VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB },
+  { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
+  { VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR }
+};
+
+
+// Read 8 UV from 411
+#define READYUV444                                                             \
+    "movq       " MEMACCESS([u_buf]) ",%%xmm0                   \n"            \
+    BUNDLEALIGN                                                                \
+    MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1)                            \
+    "lea        " MEMLEA(0x8, [u_buf]) ",%[u_buf]               \n"            \
+    "punpcklbw  %%xmm1,%%xmm0                                   \n"
+
+// Read 4 UV from 422, upsample to 8 UV
+#define READYUV422                                                             \
+    "movd       " MEMACCESS([u_buf]) ",%%xmm0                   \n"            \
+    BUNDLEALIGN                                                                \
+    MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)                            \
+    "lea        " MEMLEA(0x4, [u_buf]) ",%[u_buf]               \n"            \
+    "punpcklbw  %%xmm1,%%xmm0                                   \n"            \
+    "punpcklwd  %%xmm0,%%xmm0                                   \n"
+
+// Read 2 UV from 411, upsample to 8 UV
+#define READYUV411                                                             \
+    "movd       " MEMACCESS([u_buf]) ",%%xmm0                   \n"            \
+    BUNDLEALIGN                                                                \
+    MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)                            \
+    "lea        " MEMLEA(0x2, [u_buf]) ",%[u_buf]               \n"            \
+    "punpcklbw  %%xmm1,%%xmm0                                   \n"            \
+    "punpcklwd  %%xmm0,%%xmm0                                   \n"            \
+    "punpckldq  %%xmm0,%%xmm0                                   \n"
+
+// Read 4 UV from NV12, upsample to 8 UV
+#define READNV12                                                               \
+    "movq       " MEMACCESS([uv_buf]) ",%%xmm0                  \n"            \
+    "lea        " MEMLEA(0x8, [uv_buf]) ",%[uv_buf]             \n"            \
+    "punpcklwd  %%xmm0,%%xmm0                                   \n"
+
+// Convert 8 pixels: 8 UV and 8 Y
+#define YUVTORGB                                                               \
+    "movdqa     %%xmm0,%%xmm1                                   \n"            \
+    "movdqa     %%xmm0,%%xmm2                                   \n"            \
+    "pmaddubsw  " MEMACCESS([kYuvConstants]) ",%%xmm0           \n"            \
+    "pmaddubsw  " MEMACCESS2(16, [kYuvConstants]) ",%%xmm1      \n"            \
+    "pmaddubsw  " MEMACCESS2(32, [kYuvConstants]) ",%%xmm2      \n"            \
+    "psubw      " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0      \n"            \
+    "psubw      " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1      \n"            \
+    "psubw      " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2      \n"            \
+    "movq       " MEMACCESS([y_buf]) ",%%xmm3                   \n"            \
+    "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n"            \
+    "punpcklbw  %%xmm4,%%xmm3                                   \n"            \
+    "psubsw     " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3      \n"            \
+    "pmullw     " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3     \n"            \
+    "paddsw     %%xmm3,%%xmm0                                   \n"            \
+    "paddsw     %%xmm3,%%xmm1                                   \n"            \
+    "paddsw     %%xmm3,%%xmm2                                   \n"            \
+    "psraw      $0x6,%%xmm0                                     \n"            \
+    "psraw      $0x6,%%xmm1                                     \n"            \
+    "psraw      $0x6,%%xmm2                                     \n"            \
+    "packuswb   %%xmm0,%%xmm0                                   \n"            \
+    "packuswb   %%xmm1,%%xmm1                                   \n"            \
+    "packuswb   %%xmm2,%%xmm2                                   \n"
+
+// Convert 8 pixels: 8 VU and 8 Y
+#define YVUTORGB                                                               \
+    "movdqa     %%xmm0,%%xmm1                                   \n"            \
+    "movdqa     %%xmm0,%%xmm2                                   \n"            \
+    "pmaddubsw  " MEMACCESS2(128, [kYuvConstants]) ",%%xmm0     \n"            \
+    "pmaddubsw  " MEMACCESS2(144, [kYuvConstants]) ",%%xmm1     \n"            \
+    "pmaddubsw  " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2     \n"            \
+    "psubw      " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0      \n"            \
+    "psubw      " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1      \n"            \
+    "psubw      " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2      \n"            \
+    "movq       " MEMACCESS([y_buf]) ",%%xmm3                   \n"            \
+    "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n"            \
+    "punpcklbw  %%xmm4,%%xmm3                                   \n"            \
+    "psubsw     " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3      \n"            \
+    "pmullw     " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3     \n"            \
+    "paddsw     %%xmm3,%%xmm0                                   \n"            \
+    "paddsw     %%xmm3,%%xmm1                                   \n"            \
+    "paddsw     %%xmm3,%%xmm2                                   \n"            \
+    "psraw      $0x6,%%xmm0                                     \n"            \
+    "psraw      $0x6,%%xmm1                                     \n"            \
+    "psraw      $0x6,%%xmm2                                     \n"            \
+    "packuswb   %%xmm0,%%xmm0                                   \n"            \
+    "packuswb   %%xmm1,%%xmm1                                   \n"            \
+    "packuswb   %%xmm2,%%xmm2                                   \n"
+
+void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_argb,
+                                int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV444
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "   \n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb]  \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
+                                 const uint8* u_buf,
+                                 const uint8* v_buf,
+                                 uint8* dst_rgb24,
+                                 int width) {
+// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
+#if defined(__i386__)
+  asm volatile (
+    "movdqa    %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
+    "movdqa    %[kShuffleMaskARGBToRGB24],%%xmm6   \n"
+  :: [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
+    [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24));
+#endif
+
+  asm volatile (
+#if !defined(__i386__)
+    "movdqa    %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
+    "movdqa    %[kShuffleMaskARGBToRGB24],%%xmm6   \n"
+#endif
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm2,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "pshufb    %%xmm5,%%xmm0                   \n"
+    "pshufb    %%xmm6,%%xmm1                   \n"
+    "palignr   $0xc,%%xmm0,%%xmm1              \n"
+    "movq      %%xmm0," MEMACCESS([dst_rgb24]) "\n"
+    "movdqu    %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n"
+    "lea       " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_rgb24]"+r"(dst_rgb24),  // %[dst_rgb24]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB)
+#if !defined(__i386__)
+    , [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
+    [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
+#endif
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+
+void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* dst_raw,
+                               int width) {
+// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
+#if defined(__i386__)
+  asm volatile (
+    "movdqa    %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
+    "movdqa    %[kShuffleMaskARGBToRAW],%%xmm6   \n"
+  :: [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
+    [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW));
+#endif
+
+  asm volatile (
+#if !defined(__i386__)
+    "movdqa    %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
+    "movdqa    %[kShuffleMaskARGBToRAW],%%xmm6   \n"
+#endif
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm2,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "pshufb    %%xmm5,%%xmm0                   \n"
+    "pshufb    %%xmm6,%%xmm1                   \n"
+    "palignr   $0xc,%%xmm0,%%xmm1              \n"
+    "movq      %%xmm0," MEMACCESS([dst_raw]) " \n"
+    "movdqu    %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
+    "lea       " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_raw]"+r"(dst_raw),  // %[dst_raw]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB)
+#if !defined(__i386__)
+    , [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
+    [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
+#endif
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+
+void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_argb,
+                                int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_argb,
+                                int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV411
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* uv_buf,
+                                uint8* dst_argb,
+                                int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READNV12
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+  // Does not use r14.
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
+                                const uint8* uv_buf,
+                                uint8* dst_argb,
+                                int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READNV12
+    YVUTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+  // Does not use r14.
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                          const uint8* u_buf,
+                                          const uint8* v_buf,
+                                          uint8* dst_argb,
+                                          int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV444
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                          const uint8* u_buf,
+                                          const uint8* v_buf,
+                                          uint8* dst_argb,
+                                          int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                          const uint8* u_buf,
+                                          const uint8* v_buf,
+                                          uint8* dst_argb,
+                                          int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV411
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                          const uint8* uv_buf,
+                                          uint8* dst_argb,
+                                          int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READNV12
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+  // Does not use r14.
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                          const uint8* uv_buf,
+                                          uint8* dst_argb,
+                                          int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READNV12
+    YVUTORGB
+    "punpcklbw %%xmm1,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm0                   \n"
+    "punpckhwd %%xmm2,%%xmm1                   \n"
+    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
+    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
+    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+  // Does not use r14.
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_bgra,
+                                int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "punpcklbw %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm2,%%xmm5                   \n"
+    "movdqa    %%xmm5,%%xmm0                   \n"
+    "punpcklwd %%xmm1,%%xmm5                   \n"
+    "punpckhwd %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm5," MEMACCESS([dst_bgra]) "\n"
+    "movdqa    %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n"
+    "lea       " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_bgra]"+r"(dst_bgra),  // %[dst_bgra]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_abgr,
+                                int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm2                   \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm1                   \n"
+    "punpcklwd %%xmm0,%%xmm2                   \n"
+    "punpckhwd %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2," MEMACCESS([dst_abgr]) "\n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n"
+    "lea       " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* dst_rgba,
+                                int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "punpcklbw %%xmm2,%%xmm1                   \n"
+    "punpcklbw %%xmm0,%%xmm5                   \n"
+    "movdqa    %%xmm5,%%xmm0                   \n"
+    "punpcklwd %%xmm1,%%xmm5                   \n"
+    "punpckhwd %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm5," MEMACCESS([dst_rgba]) "\n"
+    "movdqa    %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n"
+    "lea       " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_rgba]"+r"(dst_rgba),  // %[dst_rgba]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
+                                          const uint8* u_buf,
+                                          const uint8* v_buf,
+                                          uint8* dst_bgra,
+                                          int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "punpcklbw %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm2,%%xmm5                   \n"
+    "movdqa    %%xmm5,%%xmm0                   \n"
+    "punpcklwd %%xmm1,%%xmm5                   \n"
+    "punpckhwd %%xmm1,%%xmm0                   \n"
+    "movdqu    %%xmm5," MEMACCESS([dst_bgra]) "\n"
+    "movdqu    %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n"
+    "lea       " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_bgra]"+r"(dst_bgra),  // %[dst_bgra]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
+                                          const uint8* u_buf,
+                                          const uint8* v_buf,
+                                          uint8* dst_abgr,
+                                          int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "punpcklbw %%xmm1,%%xmm2                   \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm1                   \n"
+    "punpcklwd %%xmm0,%%xmm2                   \n"
+    "punpckhwd %%xmm0,%%xmm1                   \n"
+    "movdqu    %%xmm2," MEMACCESS([dst_abgr]) "\n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n"
+    "lea       " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
+                                          const uint8* u_buf,
+                                          const uint8* v_buf,
+                                          uint8* dst_rgba,
+                                          int width) {
+  asm volatile (
+    "sub       %[u_buf],%[v_buf]               \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+    LABELALIGN
+  "1:                                          \n"
+    READYUV422
+    YUVTORGB
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "punpcklbw %%xmm2,%%xmm1                   \n"
+    "punpcklbw %%xmm0,%%xmm5                   \n"
+    "movdqa    %%xmm5,%%xmm0                   \n"
+    "punpcklwd %%xmm1,%%xmm5                   \n"
+    "punpckhwd %%xmm1,%%xmm0                   \n"
+    "movdqu    %%xmm5," MEMACCESS([dst_rgba]) "\n"
+    "movdqu    %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n"
+    "lea       " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
+    "sub       $0x8,%[width]                   \n"
+    "jg        1b                              \n"
+  : [y_buf]"+r"(y_buf),    // %[y_buf]
+    [u_buf]"+r"(u_buf),    // %[u_buf]
+    [v_buf]"+r"(v_buf),    // %[v_buf]
+    [dst_rgba]"+r"(dst_rgba),  // %[dst_rgba]
+    [width]"+rm"(width)    // %[width]
+  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+#endif  // HAS_I422TOARGBROW_SSSE3
+
+#ifdef HAS_YTOARGBROW_SSE2
+void YToARGBRow_SSE2(const uint8* y_buf,
+                     uint8* dst_argb,
+                     int width) {
+  asm volatile (
+    "pxor      %%xmm5,%%xmm5                   \n"
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "pslld     $0x18,%%xmm4                    \n"
+    "mov       $0x00100010,%%eax               \n"
+    "movd      %%eax,%%xmm3                    \n"
+    "pshufd    $0x0,%%xmm3,%%xmm3              \n"
+    "mov       $0x004a004a,%%eax               \n"
+    "movd      %%eax,%%xmm2                    \n"
+    "pshufd    $0x0,%%xmm2,%%xmm2              \n"
+    LABELALIGN
+  "1:                                          \n"
+    // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
+    "movq      " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "psubusw   %%xmm3,%%xmm0                   \n"
+    "pmullw    %%xmm2,%%xmm0                   \n"
+    "psrlw     $6, %%xmm0                      \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+
+    // Step 2: Weave into ARGB
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm0,%%xmm0                   \n"
+    "punpckhwd %%xmm1,%%xmm1                   \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "por       %%xmm4,%%xmm1                   \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(y_buf),     // %0
+    "+r"(dst_argb),  // %1
+    "+rm"(width)     // %2
+  :
+  : "memory", "cc", "eax"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
+#endif
+  );
+}
+#endif  // HAS_YTOARGBROW_SSE2
+
+#ifdef HAS_MIRRORROW_SSSE3
+// Shuffle table for reversing the bytes.
+static uvec8 kShuffleMirror = {
+  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
+};
+
+void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+  intptr_t temp_width = (intptr_t)(width);
+  asm volatile (
+    "movdqa    %3,%%xmm5                       \n"
+    "lea       " MEMLEA(-0x10,0) ",%0          \n"
+    LABELALIGN
+  "1:                                          \n"
+    MEMOPREG(movdqa,0x00,0,2,1,xmm0)           //  movdqa  (%0,%2),%%xmm0
+    "pshufb    %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(temp_width)  // %2
+  : "m"(kShuffleMirror) // %3
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_MIRRORROW_SSSE3
+
+#ifdef HAS_MIRRORROW_SSE2
+void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
+  intptr_t temp_width = (intptr_t)(width);
+  asm volatile (
+    "lea       " MEMLEA(-0x10,0) ",%0          \n"
+    LABELALIGN
+  "1:                                          \n"
+    MEMOPREG(movdqu,0x00,0,2,1,xmm0)           //  movdqu  (%0,%2),%%xmm0
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "psllw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "por       %%xmm1,%%xmm0                   \n"
+    "pshuflw   $0x1b,%%xmm0,%%xmm0             \n"
+    "pshufhw   $0x1b,%%xmm0,%%xmm0             \n"
+    "pshufd    $0x4e,%%xmm0,%%xmm0             \n"
+    "sub       $0x10,%2                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1)",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(temp_width)  // %2
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+#endif  // HAS_MIRRORROW_SSE2
+
+#ifdef HAS_MIRRORROW_UV_SSSE3
+// Shuffle table for reversing the bytes of UV channels.
+static uvec8 kShuffleMirrorUV = {
+  14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
+};
+void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
+                       int width) {
+  intptr_t temp_width = (intptr_t)(width);
+  asm volatile (
+    "movdqa    %4,%%xmm1                       \n"
+    "lea       " MEMLEA4(-0x10,0,3,2) ",%0       \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(-0x10,0) ",%0            \n"
+    "pshufb    %%xmm1,%%xmm0                   \n"
+    "sub       $8,%3                           \n"
+    "movlpd    %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movhpd,xmm0,0x00,1,2,1)           //  movhpd    %%xmm0,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src),      // %0
+    "+r"(dst_u),    // %1
+    "+r"(dst_v),    // %2
+    "+r"(temp_width)  // %3
+  : "m"(kShuffleMirrorUV)  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+#endif  // HAS_MIRRORROW_UV_SSSE3
+
+#ifdef HAS_ARGBMIRRORROW_SSSE3
+// Shuffle table for reversing the bytes.
+static uvec8 kARGBShuffleMirror = {
+  12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
+};
+
+void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+  intptr_t temp_width = (intptr_t)(width);
+  asm volatile (
+    "lea       " MEMLEA4(-0x10,0,2,4) ",%0     \n"
+    "movdqa    %3,%%xmm5                       \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "pshufb    %%xmm5,%%xmm0                   \n"
+    "lea       " MEMLEA(-0x10,0) ",%0          \n"
+    "sub       $0x4,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(temp_width)  // %2
+  : "m"(kARGBShuffleMirror)  // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBMIRRORROW_SSSE3
+
+#ifdef HAS_SPLITUVROW_SSE2
+void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb    %%xmm5,%%xmm5                    \n"
+    "psrlw      $0x8,%%xmm5                      \n"
+    "sub        %1,%2                            \n"
+    LABELALIGN
+  "1:                                            \n"
+    "movdqa     " MEMACCESS(0) ",%%xmm0          \n"
+    "movdqa     " MEMACCESS2(0x10,0) ",%%xmm1    \n"
+    "lea        " MEMLEA(0x20,0) ",%0            \n"
+    "movdqa     %%xmm0,%%xmm2                    \n"
+    "movdqa     %%xmm1,%%xmm3                    \n"
+    "pand       %%xmm5,%%xmm0                    \n"
+    "pand       %%xmm5,%%xmm1                    \n"
+    "packuswb   %%xmm1,%%xmm0                    \n"
+    "psrlw      $0x8,%%xmm2                      \n"
+    "psrlw      $0x8,%%xmm3                      \n"
+    "packuswb   %%xmm3,%%xmm2                    \n"
+    "movdqa     %%xmm0," MEMACCESS(1) "          \n"
+    MEMOPMEM(movdqa,xmm2,0x00,1,2,1)             // movdqa     %%xmm2,(%1,%2)
+    "lea        " MEMLEA(0x10,1) ",%1            \n"
+    "sub        $0x10,%3                         \n"
+    "jg         1b                               \n"
+  : "+r"(src_uv),     // %0
+    "+r"(dst_u),      // %1
+    "+r"(dst_v),      // %2
+    "+r"(pix)         // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+
+void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                               int pix) {
+  asm volatile (
+    "pcmpeqb    %%xmm5,%%xmm5                    \n"
+    "psrlw      $0x8,%%xmm5                      \n"
+    "sub        %1,%2                            \n"
+    LABELALIGN
+  "1:                                            \n"
+    "movdqu     " MEMACCESS(0) ",%%xmm0          \n"
+    "movdqu     " MEMACCESS2(0x10,0) ",%%xmm1    \n"
+    "lea        " MEMLEA(0x20,0) ",%0            \n"
+    "movdqa     %%xmm0,%%xmm2                    \n"
+    "movdqa     %%xmm1,%%xmm3                    \n"
+    "pand       %%xmm5,%%xmm0                    \n"
+    "pand       %%xmm5,%%xmm1                    \n"
+    "packuswb   %%xmm1,%%xmm0                    \n"
+    "psrlw      $0x8,%%xmm2                      \n"
+    "psrlw      $0x8,%%xmm3                      \n"
+    "packuswb   %%xmm3,%%xmm2                    \n"
+    "movdqu     %%xmm0," MEMACCESS(1) "          \n"
+    MEMOPMEM(movdqu,xmm2,0x00,1,2,1)             //  movdqu     %%xmm2,(%1,%2)
+    "lea        " MEMLEA(0x10,1) ",%1            \n"
+    "sub        $0x10,%3                         \n"
+    "jg         1b                               \n"
+  : "+r"(src_uv),     // %0
+    "+r"(dst_u),      // %1
+    "+r"(dst_v),      // %2
+    "+r"(pix)         // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_SPLITUVROW_SSE2
+
+#ifdef HAS_MERGEUVROW_SSE2
+void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                     int width) {
+  asm volatile (
+    "sub       %0,%1                             \n"
+    LABELALIGN
+  "1:                                            \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0           \n"
+    MEMOPREG(movdqa,0x00,0,1,1,xmm1)             //  movdqa    (%0,%1,1),%%xmm1
+    "lea       " MEMLEA(0x10,0) ",%0             \n"
+    "movdqa    %%xmm0,%%xmm2                     \n"
+    "punpcklbw %%xmm1,%%xmm0                     \n"
+    "punpckhbw %%xmm1,%%xmm2                     \n"
+    "movdqa    %%xmm0," MEMACCESS(2) "           \n"
+    "movdqa    %%xmm2," MEMACCESS2(0x10,2) "     \n"
+    "lea       " MEMLEA(0x20,2) ",%2             \n"
+    "sub       $0x10,%3                          \n"
+    "jg        1b                                \n"
+  : "+r"(src_u),     // %0
+    "+r"(src_v),     // %1
+    "+r"(dst_uv),    // %2
+    "+r"(width)      // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2"
+#endif
+  );
+}
+
+void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
+                               uint8* dst_uv, int width) {
+  asm volatile (
+    "sub       %0,%1                             \n"
+    LABELALIGN
+  "1:                                            \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0           \n"
+    MEMOPREG(movdqu,0x00,0,1,1,xmm1)             //  movdqu    (%0,%1,1),%%xmm1
+    "lea       " MEMLEA(0x10,0) ",%0             \n"
+    "movdqa    %%xmm0,%%xmm2                     \n"
+    "punpcklbw %%xmm1,%%xmm0                     \n"
+    "punpckhbw %%xmm1,%%xmm2                     \n"
+    "movdqu    %%xmm0," MEMACCESS(2) "           \n"
+    "movdqu    %%xmm2," MEMACCESS2(0x10,2) "     \n"
+    "lea       " MEMLEA(0x20,2) ",%2             \n"
+    "sub       $0x10,%3                          \n"
+    "jg        1b                                \n"
+  : "+r"(src_u),     // %0
+    "+r"(src_v),     // %1
+    "+r"(dst_uv),    // %2
+    "+r"(width)      // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2"
+#endif
+  );
+}
+#endif  // HAS_MERGEUVROW_SSE2
+
+#ifdef HAS_COPYROW_SSE2
+void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "sub       $0x20,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(count)  // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+#endif  // HAS_COPYROW_SSE2
+
+#ifdef HAS_COPYROW_X86
+void CopyRow_X86(const uint8* src, uint8* dst, int width) {
+  size_t width_tmp = (size_t)(width);
+  asm volatile (
+    "shr       $0x2,%2                         \n"
+    "rep movsl " MEMMOVESTRING(0,1) "          \n"
+  : "+S"(src),  // %0
+    "+D"(dst),  // %1
+    "+c"(width_tmp) // %2
+  :
+  : "memory", "cc"
+  );
+}
+#endif  // HAS_COPYROW_X86
+
+#ifdef HAS_COPYROW_ERMS
+// Unaligned Multiple of 1.
+void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
+  size_t width_tmp = (size_t)(width);
+  asm volatile (
+    "rep movsb " MEMMOVESTRING(0,1) "          \n"
+  : "+S"(src),  // %0
+    "+D"(dst),  // %1
+    "+c"(width_tmp) // %2
+  :
+  : "memory", "cc"
+  );
+}
+#endif  // HAS_COPYROW_ERMS
+
+#ifdef HAS_ARGBCOPYALPHAROW_SSE2
+// width in pixels
+void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm0,%%xmm0                   \n"
+    "pslld     $0x18,%%xmm0                    \n"
+    "pcmpeqb   %%xmm1,%%xmm1                   \n"
+    "psrld     $0x8,%%xmm1                     \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm3   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm4         \n"
+    "movdqa    " MEMACCESS2(0x10,1) ",%%xmm5   \n"
+    "pand      %%xmm0,%%xmm2                   \n"
+    "pand      %%xmm0,%%xmm3                   \n"
+    "pand      %%xmm1,%%xmm4                   \n"
+    "pand      %%xmm1,%%xmm5                   \n"
+    "por       %%xmm4,%%xmm2                   \n"
+    "por       %%xmm5,%%xmm3                   \n"
+    "movdqa    %%xmm2," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm3," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(width)  // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBCOPYALPHAROW_SSE2
+
+#ifdef HAS_ARGBCOPYALPHAROW_AVX2
+// width in pixels
+void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
+  asm volatile (
+    "vpcmpeqb  %%ymm0,%%ymm0,%%ymm0            \n"
+    "vpsrld    $0x8,%%ymm0,%%ymm0              \n"
+    LABELALIGN
+  "1:                                          \n"
+    "vmovdqu   " MEMACCESS(0) ",%%ymm1         \n"
+    "vmovdqu   " MEMACCESS2(0x20,0) ",%%ymm2   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1        \n"
+    "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2  \n"
+    "vmovdqu   %%ymm1," MEMACCESS(1) "         \n"
+    "vmovdqu   %%ymm2," MEMACCESS2(0x20,1) "   \n"
+    "lea       " MEMLEA(0x40,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+    "vzeroupper                                \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(width)  // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2"
+#endif
+  );
+}
+#endif  // HAS_ARGBCOPYALPHAROW_AVX2
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
+// width in pixels
+void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm0,%%xmm0                   \n"
+    "pslld     $0x18,%%xmm0                    \n"
+    "pcmpeqb   %%xmm1,%%xmm1                   \n"
+    "psrld     $0x8,%%xmm1                     \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movq      " MEMACCESS(0) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "punpcklbw %%xmm2,%%xmm2                   \n"
+    "punpckhwd %%xmm2,%%xmm3                   \n"
+    "punpcklwd %%xmm2,%%xmm2                   \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm4         \n"
+    "movdqa    " MEMACCESS2(0x10,1) ",%%xmm5   \n"
+    "pand      %%xmm0,%%xmm2                   \n"
+    "pand      %%xmm0,%%xmm3                   \n"
+    "pand      %%xmm1,%%xmm4                   \n"
+    "pand      %%xmm1,%%xmm5                   \n"
+    "por       %%xmm4,%%xmm2                   \n"
+    "por       %%xmm5,%%xmm3                   \n"
+    "movdqa    %%xmm2," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm3," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(width)  // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBCOPYYTOALPHAROW_SSE2
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
+// width in pixels
+void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
+  asm volatile (
+    "vpcmpeqb  %%ymm0,%%ymm0,%%ymm0            \n"
+    "vpsrld    $0x8,%%ymm0,%%ymm0              \n"
+    LABELALIGN
+  "1:                                          \n"
+    "vpmovzxbd " MEMACCESS(0) ",%%ymm1         \n"
+    "vpmovzxbd " MEMACCESS2(0x8,0) ",%%ymm2    \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "vpslld    $0x18,%%ymm1,%%ymm1             \n"
+    "vpslld    $0x18,%%ymm2,%%ymm2             \n"
+    "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1        \n"
+    "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2  \n"
+    "vmovdqu   %%ymm1," MEMACCESS(1) "         \n"
+    "vmovdqu   %%ymm2," MEMACCESS2(0x20,1) "   \n"
+    "lea       " MEMLEA(0x40,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+    "vzeroupper                                \n"
+  : "+r"(src),   // %0
+    "+r"(dst),   // %1
+    "+r"(width)  // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2"
+#endif
+  );
+}
+#endif  // HAS_ARGBCOPYYTOALPHAROW_AVX2
+
+#ifdef HAS_SETROW_X86
+void SetRow_X86(uint8* dst, uint32 v32, int width) {
+  size_t width_tmp = (size_t)(width);
+  asm volatile (
+    "shr       $0x2,%1                         \n"
+    "rep stosl " MEMSTORESTRING(eax,0) "       \n"
+    : "+D"(dst),       // %0
+      "+c"(width_tmp)  // %1
+    : "a"(v32)         // %2
+    : "memory", "cc");
+}
+
+void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
+                   int dst_stride, int height) {
+  for (int y = 0; y < height; ++y) {
+    size_t width_tmp = (size_t)(width);
+    uint32* d = (uint32*)(dst);
+    asm volatile (
+      "rep stosl " MEMSTORESTRING(eax,0) "     \n"
+      : "+D"(d),         // %0
+        "+c"(width_tmp)  // %1
+      : "a"(v32)         // %2
+      : "memory", "cc");
+    dst += dst_stride;
+  }
+}
+#endif  // HAS_SETROW_X86
+
+#ifdef HAS_YUY2TOYROW_SSE2
+void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_yuy2),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqa,0x00,0,4,1,xmm2)           //  movdqa  (%0,%4,1),%%xmm2
+    MEMOPREG(movdqa,0x10,0,4,1,xmm3)           //  movdqa  0x10(%0,%4,1),%%xmm3
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_yuy2),    // %0
+    "+r"(dst_u),       // %1
+    "+r"(dst_v),       // %2
+    "+r"(pix)          // %3
+  : "r"((intptr_t)(stride_yuy2))  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+
+void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
+                         uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_yuy2),    // %0
+    "+r"(dst_u),       // %1
+    "+r"(dst_v),       // %2
+    "+r"(pix)          // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
+                               uint8* dst_y, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_yuy2),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
+                                int stride_yuy2,
+                                uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqu,0x00,0,4,1,xmm2)           //  movdqu  (%0,%4,1),%%xmm2
+    MEMOPREG(movdqu,0x10,0,4,1,xmm3)           //  movdqu  0x10(%0,%4,1),%%xmm3
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_yuy2),    // %0
+    "+r"(dst_u),       // %1
+    "+r"(dst_v),       // %2
+    "+r"(pix)          // %3
+  : "r"((intptr_t)(stride_yuy2))  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+
+void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
+                                   uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_yuy2),    // %0
+    "+r"(dst_u),       // %1
+    "+r"(dst_v),       // %2
+    "+r"(pix)          // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_uyvy),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+
+void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqa,0x00,0,4,1,xmm2)           //  movdqa  (%0,%4,1),%%xmm2
+    MEMOPREG(movdqa,0x10,0,4,1,xmm3)           //  movdqa  0x10(%0,%4,1),%%xmm3
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_uyvy),    // %0
+    "+r"(dst_u),       // %1
+    "+r"(dst_v),       // %2
+    "+r"(pix)          // %3
+  : "r"((intptr_t)(stride_uyvy))  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+
+void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
+                         uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_uyvy),    // %0
+    "+r"(dst_u),       // %1
+    "+r"(dst_v),       // %2
+    "+r"(pix)          // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
+                               uint8* dst_y, int pix) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_uyvy),  // %0
+    "+r"(dst_y),     // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+
+void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
+                                uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqu,0x00,0,4,1,xmm2)           //  movdqu  (%0,%4,1),%%xmm2
+    MEMOPREG(movdqu,0x10,0,4,1,xmm3)           //  movdqu  0x10(%0,%4,1),%%xmm3
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_uyvy),    // %0
+    "+r"(dst_u),       // %1
+    "+r"(dst_v),       // %2
+    "+r"(pix)          // %3
+  : "r"((intptr_t)(stride_uyvy))  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+
+void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
+                                   uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+    "sub       %1,%2                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x10,%3                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_uyvy),    // %0
+    "+r"(dst_u),       // %1
+    "+r"(dst_v),       // %2
+    "+r"(pix)          // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_YUY2TOYROW_SSE2
+
+#ifdef HAS_ARGBBLENDROW_SSE2
+// Blend 8 pixels at a time.
+void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+                       uint8* dst_argb, int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm7,%%xmm7                   \n"
+    "psrlw     $0xf,%%xmm7                     \n"
+    "pcmpeqb   %%xmm6,%%xmm6                   \n"
+    "psrlw     $0x8,%%xmm6                     \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psllw     $0x8,%%xmm5                     \n"
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "pslld     $0x18,%%xmm4                    \n"
+    "sub       $0x1,%3                         \n"
+    "je        91f                             \n"
+    "jl        99f                             \n"
+
+    // 1 pixel loop until destination pointer is aligned.
+  "10:                                         \n"
+    "test      $0xf,%2                         \n"
+    "je        19f                             \n"
+    "movd      " MEMACCESS(0) ",%%xmm3         \n"
+    "lea       " MEMLEA(0x4,0) ",%0            \n"
+    "movdqa    %%xmm3,%%xmm0                   \n"
+    "pxor      %%xmm4,%%xmm3                   \n"
+    "movd      " MEMACCESS(1) ",%%xmm2         \n"
+    "psrlw     $0x8,%%xmm3                     \n"
+    "pshufhw   $0xf5,%%xmm3,%%xmm3             \n"
+    "pshuflw   $0xf5,%%xmm3,%%xmm3             \n"
+    "pand      %%xmm6,%%xmm2                   \n"
+    "paddw     %%xmm7,%%xmm3                   \n"
+    "pmullw    %%xmm3,%%xmm2                   \n"
+    "movd      " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x4,1) ",%1            \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pmullw    %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm2                     \n"
+    "paddusb   %%xmm2,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x1,%3                         \n"
+    "movd      %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x4,2) ",%2            \n"
+    "jge       10b                             \n"
+
+  "19:                                         \n"
+    "add       $1-4,%3                         \n"
+    "jl        49f                             \n"
+
+    // 4 pixel loop.
+    LABELALIGN
+  "41:                                         \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm3         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm3,%%xmm0                   \n"
+    "pxor      %%xmm4,%%xmm3                   \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
+    "psrlw     $0x8,%%xmm3                     \n"
+    "pshufhw   $0xf5,%%xmm3,%%xmm3             \n"
+    "pshuflw   $0xf5,%%xmm3,%%xmm3             \n"
+    "pand      %%xmm6,%%xmm2                   \n"
+    "paddw     %%xmm7,%%xmm3                   \n"
+    "pmullw    %%xmm3,%%xmm2                   \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pmullw    %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm2                     \n"
+    "paddusb   %%xmm2,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jge       41b                             \n"
+
+  "49:                                         \n"
+    "add       $0x3,%3                         \n"
+    "jl        99f                             \n"
+
+    // 1 pixel loop.
+  "91:                                         \n"
+    "movd      " MEMACCESS(0) ",%%xmm3         \n"
+    "lea       " MEMLEA(0x4,0) ",%0            \n"
+    "movdqa    %%xmm3,%%xmm0                   \n"
+    "pxor      %%xmm4,%%xmm3                   \n"
+    "movd      " MEMACCESS(1) ",%%xmm2         \n"
+    "psrlw     $0x8,%%xmm3                     \n"
+    "pshufhw   $0xf5,%%xmm3,%%xmm3             \n"
+    "pshuflw   $0xf5,%%xmm3,%%xmm3             \n"
+    "pand      %%xmm6,%%xmm2                   \n"
+    "paddw     %%xmm7,%%xmm3                   \n"
+    "pmullw    %%xmm3,%%xmm2                   \n"
+    "movd      " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x4,1) ",%1            \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pmullw    %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm2                     \n"
+    "paddusb   %%xmm2,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x1,%3                         \n"
+    "movd      %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x4,2) ",%2            \n"
+    "jge       91b                             \n"
+  "99:                                         \n"
+  : "+r"(src_argb0),    // %0
+    "+r"(src_argb1),    // %1
+    "+r"(dst_argb),     // %2
+    "+r"(width)         // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_ARGBBLENDROW_SSE2
+
+#ifdef HAS_ARGBBLENDROW_SSSE3
+// Shuffle table for isolating alpha.
+static uvec8 kShuffleAlpha = {
+  3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
+  11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
+};
+
+// Blend 8 pixels at a time
+// Shuffle table for reversing the bytes.
+
+// Same as SSE2, but replaces
+//    psrlw      xmm3, 8          // alpha
+//    pshufhw    xmm3, xmm3,0F5h  // 8 alpha words
+//    pshuflw    xmm3, xmm3,0F5h
+// with..
+//    pshufb     xmm3, kShuffleAlpha // alpha
+
+void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
+                        uint8* dst_argb, int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm7,%%xmm7                   \n"
+    "psrlw     $0xf,%%xmm7                     \n"
+    "pcmpeqb   %%xmm6,%%xmm6                   \n"
+    "psrlw     $0x8,%%xmm6                     \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psllw     $0x8,%%xmm5                     \n"
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "pslld     $0x18,%%xmm4                    \n"
+    "sub       $0x1,%3                         \n"
+    "je        91f                             \n"
+    "jl        99f                             \n"
+
+    // 1 pixel loop until destination pointer is aligned.
+  "10:                                         \n"
+    "test      $0xf,%2                         \n"
+    "je        19f                             \n"
+    "movd      " MEMACCESS(0) ",%%xmm3         \n"
+    "lea       " MEMLEA(0x4,0) ",%0            \n"
+    "movdqa    %%xmm3,%%xmm0                   \n"
+    "pxor      %%xmm4,%%xmm3                   \n"
+    "movd      " MEMACCESS(1) ",%%xmm2         \n"
+    "pshufb    %4,%%xmm3                       \n"
+    "pand      %%xmm6,%%xmm2                   \n"
+    "paddw     %%xmm7,%%xmm3                   \n"
+    "pmullw    %%xmm3,%%xmm2                   \n"
+    "movd      " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x4,1) ",%1            \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pmullw    %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm2                     \n"
+    "paddusb   %%xmm2,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x1,%3                         \n"
+    "movd      %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x4,2) ",%2            \n"
+    "jge       10b                             \n"
+
+  "19:                                         \n"
+    "add       $1-4,%3                         \n"
+    "jl        49f                             \n"
+    "test      $0xf,%0                         \n"
+    "jne       41f                             \n"
+    "test      $0xf,%1                         \n"
+    "jne       41f                             \n"
+
+    // 4 pixel loop.
+    LABELALIGN
+  "40:                                         \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm3         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm3,%%xmm0                   \n"
+    "pxor      %%xmm4,%%xmm3                   \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
+    "pshufb    %4,%%xmm3                       \n"
+    "pand      %%xmm6,%%xmm2                   \n"
+    "paddw     %%xmm7,%%xmm3                   \n"
+    "pmullw    %%xmm3,%%xmm2                   \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pmullw    %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm2                     \n"
+    "paddusb   %%xmm2,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jge       40b                             \n"
+    "jmp       49f                             \n"
+
+    // 4 pixel unaligned loop.
+    LABELALIGN
+  "41:                                         \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm3         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm3,%%xmm0                   \n"
+    "pxor      %%xmm4,%%xmm3                   \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
+    "pshufb    %4,%%xmm3                       \n"
+    "pand      %%xmm6,%%xmm2                   \n"
+    "paddw     %%xmm7,%%xmm3                   \n"
+    "pmullw    %%xmm3,%%xmm2                   \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pmullw    %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm2                     \n"
+    "paddusb   %%xmm2,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jge       41b                             \n"
+
+  "49:                                         \n"
+    "add       $0x3,%3                         \n"
+    "jl        99f                             \n"
+
+    // 1 pixel loop.
+  "91:                                         \n"
+    "movd      " MEMACCESS(0) ",%%xmm3         \n"
+    "lea       " MEMLEA(0x4,0) ",%0            \n"
+    "movdqa    %%xmm3,%%xmm0                   \n"
+    "pxor      %%xmm4,%%xmm3                   \n"
+    "movd      " MEMACCESS(1) ",%%xmm2         \n"
+    "pshufb    %4,%%xmm3                       \n"
+    "pand      %%xmm6,%%xmm2                   \n"
+    "paddw     %%xmm7,%%xmm3                   \n"
+    "pmullw    %%xmm3,%%xmm2                   \n"
+    "movd      " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x4,1) ",%1            \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "por       %%xmm4,%%xmm0                   \n"
+    "pmullw    %%xmm3,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm2                     \n"
+    "paddusb   %%xmm2,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x1,%3                         \n"
+    "movd      %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x4,2) ",%2            \n"
+    "jge       91b                             \n"
+  "99:                                         \n"
+  : "+r"(src_argb0),    // %0
+    "+r"(src_argb1),    // %1
+    "+r"(dst_argb),     // %2
+    "+r"(width)         // %3
+  : "m"(kShuffleAlpha)  // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_ARGBBLENDROW_SSSE3
+
+#ifdef HAS_ARGBATTENUATEROW_SSE2
+// Attenuate 4 pixels at a time.
+// aligned to 16 bytes
+void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "pslld     $0x18,%%xmm4                    \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrld     $0x8,%%xmm5                     \n"
+
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "pshufhw   $0xff,%%xmm0,%%xmm2             \n"
+    "pshuflw   $0xff,%%xmm2,%%xmm2             \n"
+    "pmulhuw   %%xmm2,%%xmm0                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
+    "punpckhbw %%xmm1,%%xmm1                   \n"
+    "pshufhw   $0xff,%%xmm1,%%xmm2             \n"
+    "pshuflw   $0xff,%%xmm2,%%xmm2             \n"
+    "pmulhuw   %%xmm2,%%xmm1                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "pand      %%xmm4,%%xmm2                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "por       %%xmm2,%%xmm0                   \n"
+    "sub       $0x4,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),    // %0
+    "+r"(dst_argb),    // %1
+    "+r"(width)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBATTENUATEROW_SSE2
+
+#ifdef HAS_ARGBATTENUATEROW_SSSE3
+// Shuffle table duplicating alpha
+static uvec8 kShuffleAlpha0 = {
+  3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
+};
+static uvec8 kShuffleAlpha1 = {
+  11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
+  15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
+};
+// Attenuate 4 pixels at a time.
+// aligned to 16 bytes
+void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
+  asm volatile (
+    "pcmpeqb   %%xmm3,%%xmm3                   \n"
+    "pslld     $0x18,%%xmm3                    \n"
+    "movdqa    %3,%%xmm4                       \n"
+    "movdqa    %4,%%xmm5                       \n"
+
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "pshufb    %%xmm4,%%xmm0                   \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
+    "punpcklbw %%xmm1,%%xmm1                   \n"
+    "pmulhuw   %%xmm1,%%xmm0                   \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
+    "pshufb    %%xmm5,%%xmm1                   \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm2         \n"
+    "punpckhbw %%xmm2,%%xmm2                   \n"
+    "pmulhuw   %%xmm2,%%xmm1                   \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "pand      %%xmm3,%%xmm2                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "por       %%xmm2,%%xmm0                   \n"
+    "sub       $0x4,%2                         \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),    // %0
+    "+r"(dst_argb),    // %1
+    "+r"(width)        // %2
+  : "m"(kShuffleAlpha0),  // %3
+    "m"(kShuffleAlpha1)  // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBATTENUATEROW_SSSE3
+
+#ifdef HAS_ARGBUNATTENUATEROW_SSE2
+// Unattenuate 4 pixels at a time.
+// aligned to 16 bytes
+void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
+                             int width) {
+  uintptr_t alpha = 0;
+  asm volatile (
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movzb     " MEMACCESS2(0x03,0) ",%3       \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    MEMOPREG(movd,0x00,4,3,4,xmm2)             //  movd      0x0(%4,%3,4),%%xmm2
+    "movzb     " MEMACCESS2(0x07,0) ",%3       \n"
+    MEMOPREG(movd,0x00,4,3,4,xmm3)             //  movd      0x0(%4,%3,4),%%xmm3
+    "pshuflw   $0x40,%%xmm2,%%xmm2             \n"
+    "pshuflw   $0x40,%%xmm3,%%xmm3             \n"
+    "movlhps   %%xmm3,%%xmm2                   \n"
+    "pmulhuw   %%xmm2,%%xmm0                   \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
+    "movzb     " MEMACCESS2(0x0b,0) ",%3       \n"
+    "punpckhbw %%xmm1,%%xmm1                   \n"
+    BUNDLEALIGN
+    MEMOPREG(movd,0x00,4,3,4,xmm2)             //  movd      0x0(%4,%3,4),%%xmm2
+    "movzb     " MEMACCESS2(0x0f,0) ",%3       \n"
+    MEMOPREG(movd,0x00,4,3,4,xmm3)             //  movd      0x0(%4,%3,4),%%xmm3
+    "pshuflw   $0x40,%%xmm2,%%xmm2             \n"
+    "pshuflw   $0x40,%%xmm3,%%xmm3             \n"
+    "movlhps   %%xmm3,%%xmm2                   \n"
+    "pmulhuw   %%xmm2,%%xmm1                   \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%2                         \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),    // %0
+    "+r"(dst_argb),    // %1
+    "+r"(width),       // %2
+    "+r"(alpha)        // %3
+  : "r"(fixed_invtbl8)  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBUNATTENUATEROW_SSE2
+
+#ifdef HAS_ARGBGRAYROW_SSSE3
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
+void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
+  asm volatile (
+    "movdqa    %3,%%xmm4                       \n"
+    "movdqa    %4,%%xmm5                       \n"
+
+    // 8 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "paddw     %%xmm5,%%xmm0                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm3   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "psrld     $0x18,%%xmm2                    \n"
+    "psrld     $0x18,%%xmm3                    \n"
+    "packuswb  %%xmm3,%%xmm2                   \n"
+    "packuswb  %%xmm2,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm3                   \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "punpcklbw %%xmm2,%%xmm3                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm3,%%xmm0                   \n"
+    "punpckhwd %%xmm3,%%xmm1                   \n"
+    "sub       $0x8,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_argb),   // %1
+    "+r"(width)       // %2
+  : "m"(kARGBToYJ),   // %3
+    "m"(kAddYJ64)     // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBGRAYROW_SSSE3
+
+#ifdef HAS_ARGBSEPIAROW_SSSE3
+//    b = (r * 35 + g * 68 + b * 17) >> 7
+//    g = (r * 45 + g * 88 + b * 22) >> 7
+//    r = (r * 50 + g * 98 + b * 24) >> 7
+// Constant for ARGB color to sepia tone
+static vec8 kARGBToSepiaB = {
+  17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
+};
+
+static vec8 kARGBToSepiaG = {
+  22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
+};
+
+static vec8 kARGBToSepiaR = {
+  24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
+};
+
+// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
+void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
+  asm volatile (
+    "movdqa    %2,%%xmm2                       \n"
+    "movdqa    %3,%%xmm3                       \n"
+    "movdqa    %4,%%xmm4                       \n"
+
+    // 8 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
+    "pmaddubsw %%xmm2,%%xmm0                   \n"
+    "pmaddubsw %%xmm2,%%xmm6                   \n"
+    "phaddw    %%xmm6,%%xmm0                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm5         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "pmaddubsw %%xmm3,%%xmm5                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "phaddw    %%xmm1,%%xmm5                   \n"
+    "psrlw     $0x7,%%xmm5                     \n"
+    "packuswb  %%xmm5,%%xmm5                   \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm5         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "pmaddubsw %%xmm4,%%xmm5                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "phaddw    %%xmm1,%%xmm5                   \n"
+    "psrlw     $0x7,%%xmm5                     \n"
+    "packuswb  %%xmm5,%%xmm5                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "psrld     $0x18,%%xmm6                    \n"
+    "psrld     $0x18,%%xmm1                    \n"
+    "packuswb  %%xmm1,%%xmm6                   \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "punpcklbw %%xmm6,%%xmm5                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm5,%%xmm0                   \n"
+    "punpckhwd %%xmm5,%%xmm1                   \n"
+    "sub       $0x8,%1                         \n"
+    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,0) "   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "jg        1b                              \n"
+  : "+r"(dst_argb),      // %0
+    "+r"(width)          // %1
+  : "m"(kARGBToSepiaB),  // %2
+    "m"(kARGBToSepiaG),  // %3
+    "m"(kARGBToSepiaR)   // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+#endif  // HAS_ARGBSEPIAROW_SSSE3
+
+#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
+// Tranform 8 ARGB pixels (32 bytes) with color matrix.
+// Same as Sepia except matrix is provided.
+void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                              const int8* matrix_argb, int width) {
+  asm volatile (
+    "movdqu    " MEMACCESS(3) ",%%xmm5         \n"
+    "pshufd    $0x00,%%xmm5,%%xmm2             \n"
+    "pshufd    $0x55,%%xmm5,%%xmm3             \n"
+    "pshufd    $0xaa,%%xmm5,%%xmm4             \n"
+    "pshufd    $0xff,%%xmm5,%%xmm5             \n"
+
+    // 8 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm7   \n"
+    "pmaddubsw %%xmm2,%%xmm0                   \n"
+    "pmaddubsw %%xmm2,%%xmm7                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "pmaddubsw %%xmm3,%%xmm6                   \n"
+    "pmaddubsw %%xmm3,%%xmm1                   \n"
+    "phaddsw   %%xmm7,%%xmm0                   \n"
+    "phaddsw   %%xmm1,%%xmm6                   \n"
+    "psraw     $0x6,%%xmm0                     \n"
+    "psraw     $0x6,%%xmm6                     \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "punpcklbw %%xmm6,%%xmm0                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm7   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "pmaddubsw %%xmm4,%%xmm7                   \n"
+    "phaddsw   %%xmm7,%%xmm1                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm7   \n"
+    "pmaddubsw %%xmm5,%%xmm6                   \n"
+    "pmaddubsw %%xmm5,%%xmm7                   \n"
+    "phaddsw   %%xmm7,%%xmm6                   \n"
+    "psraw     $0x6,%%xmm1                     \n"
+    "psraw     $0x6,%%xmm6                     \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "punpcklbw %%xmm6,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm6                   \n"
+    "punpcklwd %%xmm1,%%xmm0                   \n"
+    "punpckhwd %%xmm1,%%xmm6                   \n"
+    "sub       $0x8,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm6," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),      // %0
+    "+r"(dst_argb),      // %1
+    "+r"(width)          // %2
+  : "r"(matrix_argb)     // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_ARGBCOLORMATRIXROW_SSSE3
+
+#ifdef HAS_ARGBQUANTIZEROW_SSE2
+// Quantize 4 ARGB pixels (16 bytes).
+// aligned to 16 bytes
+void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
+                          int interval_offset, int width) {
+  asm volatile (
+    "movd      %2,%%xmm2                       \n"
+    "movd      %3,%%xmm3                       \n"
+    "movd      %4,%%xmm4                       \n"
+    "pshuflw   $0x40,%%xmm2,%%xmm2             \n"
+    "pshufd    $0x44,%%xmm2,%%xmm2             \n"
+    "pshuflw   $0x40,%%xmm3,%%xmm3             \n"
+    "pshufd    $0x44,%%xmm3,%%xmm3             \n"
+    "pshuflw   $0x40,%%xmm4,%%xmm4             \n"
+    "pshufd    $0x44,%%xmm4,%%xmm4             \n"
+    "pxor      %%xmm5,%%xmm5                   \n"
+    "pcmpeqb   %%xmm6,%%xmm6                   \n"
+    "pslld     $0x18,%%xmm6                    \n"
+
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "pmulhuw   %%xmm2,%%xmm0                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
+    "punpckhbw %%xmm5,%%xmm1                   \n"
+    "pmulhuw   %%xmm2,%%xmm1                   \n"
+    "pmullw    %%xmm3,%%xmm0                   \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm7         \n"
+    "pmullw    %%xmm3,%%xmm1                   \n"
+    "pand      %%xmm6,%%xmm7                   \n"
+    "paddw     %%xmm4,%%xmm0                   \n"
+    "paddw     %%xmm4,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "por       %%xmm7,%%xmm0                   \n"
+    "sub       $0x4,%1                         \n"
+    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "jg        1b                              \n"
+  : "+r"(dst_argb),       // %0
+    "+r"(width)           // %1
+  : "r"(scale),           // %2
+    "r"(interval_size),   // %3
+    "r"(interval_offset)  // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_ARGBQUANTIZEROW_SSE2
+
+#ifdef HAS_ARGBSHADEROW_SSE2
+// Shade 4 pixels at a time by specified value.
+// Aligned to 16 bytes.
+void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
+                       uint32 value) {
+  asm volatile (
+    "movd      %3,%%xmm2                       \n"
+    "punpcklbw %%xmm2,%%xmm2                   \n"
+    "punpcklqdq %%xmm2,%%xmm2                  \n"
+
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "punpckhbw %%xmm1,%%xmm1                   \n"
+    "pmulhuw   %%xmm2,%%xmm0                   \n"
+    "pmulhuw   %%xmm2,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(width)      // %2
+  : "r"(value)       // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2"
+#endif
+  );
+}
+#endif  // HAS_ARGBSHADEROW_SSE2
+
+#ifdef HAS_ARGBMULTIPLYROW_SSE2
+// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
+void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  asm volatile (
+    "pxor      %%xmm5,%%xmm5                   \n"
+
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "movdqu    %%xmm0,%%xmm1                   \n"
+    "movdqu    %%xmm2,%%xmm3                   \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "punpckhbw %%xmm1,%%xmm1                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "punpckhbw %%xmm5,%%xmm3                   \n"
+    "pmulhuw   %%xmm2,%%xmm0                   \n"
+    "pmulhuw   %%xmm3,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBMULTIPLYROW_SSE2
+
+#ifdef HAS_ARGBADDROW_SSE2
+// Add 2 rows of ARGB pixels together, 4 pixels at a time.
+void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+                     uint8* dst_argb, int width) {
+  asm volatile (
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+#endif  // HAS_ARGBADDROW_SSE2
+
+#ifdef HAS_ARGBSUBTRACTROW_SSE2
+// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
+void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  asm volatile (
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "psubusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb0),  // %0
+    "+r"(src_argb1),  // %1
+    "+r"(dst_argb),   // %2
+    "+r"(width)       // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+#endif  // HAS_ARGBSUBTRACTROW_SSE2
+
+#ifdef HAS_SOBELXROW_SSE2
+// SobelX as a matrix is
+// -1  0  1
+// -2  0  2
+// -1  0  1
+void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
+                    const uint8* src_y2, uint8* dst_sobelx, int width) {
+  asm volatile (
+    "sub       %0,%1                           \n"
+    "sub       %0,%2                           \n"
+    "sub       %0,%3                           \n"
+    "pxor      %%xmm5,%%xmm5                   \n"
+
+    // 8 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movq      " MEMACCESS(0) ",%%xmm0         \n"
+    "movq      " MEMACCESS2(0x2,0) ",%%xmm1    \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm1                   \n"
+    "psubw     %%xmm1,%%xmm0                   \n"
+    BUNDLEALIGN
+    MEMOPREG(movq,0x00,0,1,1,xmm1)             //  movq      (%0,%1,1),%%xmm1
+    MEMOPREG(movq,0x02,0,1,1,xmm2)             //  movq      0x2(%0,%1,1),%%xmm2
+    "punpcklbw %%xmm5,%%xmm1                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "psubw     %%xmm2,%%xmm1                   \n"
+    BUNDLEALIGN
+    MEMOPREG(movq,0x00,0,2,1,xmm2)             //  movq      (%0,%2,1),%%xmm2
+    MEMOPREG(movq,0x02,0,2,1,xmm3)             //  movq      0x2(%0,%2,1),%%xmm3
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "punpcklbw %%xmm5,%%xmm3                   \n"
+    "psubw     %%xmm3,%%xmm2                   \n"
+    "paddw     %%xmm2,%%xmm0                   \n"
+    "paddw     %%xmm1,%%xmm0                   \n"
+    "paddw     %%xmm1,%%xmm0                   \n"
+    "pxor      %%xmm1,%%xmm1                   \n"
+    "psubw     %%xmm0,%%xmm1                   \n"
+    "pmaxsw    %%xmm1,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "sub       $0x8,%4                         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm0,0x00,0,3,1)             //  movq      %%xmm0,(%0,%3,1)
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "jg        1b                              \n"
+  : "+r"(src_y0),      // %0
+    "+r"(src_y1),      // %1
+    "+r"(src_y2),      // %2
+    "+r"(dst_sobelx),  // %3
+    "+r"(width)        // %4
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_SOBELXROW_SSE2
+
+#ifdef HAS_SOBELYROW_SSE2
+// SobelY as a matrix is
+// -1 -2 -1
+//  0  0  0
+//  1  2  1
+void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
+                    uint8* dst_sobely, int width) {
+  asm volatile (
+    "sub       %0,%1                           \n"
+    "sub       %0,%2                           \n"
+    "pxor      %%xmm5,%%xmm5                   \n"
+
+    // 8 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movq      " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(movq,0x00,0,1,1,xmm1)             //  movq      (%0,%1,1),%%xmm1
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm1                   \n"
+    "psubw     %%xmm1,%%xmm0                   \n"
+    BUNDLEALIGN
+    "movq      " MEMACCESS2(0x1,0) ",%%xmm1    \n"
+    MEMOPREG(movq,0x01,0,1,1,xmm2)             //  movq      0x1(%0,%1,1),%%xmm2
+    "punpcklbw %%xmm5,%%xmm1                   \n"
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "psubw     %%xmm2,%%xmm1                   \n"
+    BUNDLEALIGN
+    "movq      " MEMACCESS2(0x2,0) ",%%xmm2    \n"
+    MEMOPREG(movq,0x02,0,1,1,xmm3)             //  movq      0x2(%0,%1,1),%%xmm3
+    "punpcklbw %%xmm5,%%xmm2                   \n"
+    "punpcklbw %%xmm5,%%xmm3                   \n"
+    "psubw     %%xmm3,%%xmm2                   \n"
+    "paddw     %%xmm2,%%xmm0                   \n"
+    "paddw     %%xmm1,%%xmm0                   \n"
+    "paddw     %%xmm1,%%xmm0                   \n"
+    "pxor      %%xmm1,%%xmm1                   \n"
+    "psubw     %%xmm0,%%xmm1                   \n"
+    "pmaxsw    %%xmm1,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "sub       $0x8,%3                         \n"
+    BUNDLEALIGN
+    MEMOPMEM(movq,xmm0,0x00,0,2,1)             //  movq      %%xmm0,(%0,%2,1)
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "jg        1b                              \n"
+  : "+r"(src_y0),      // %0
+    "+r"(src_y1),      // %1
+    "+r"(dst_sobely),  // %2
+    "+r"(width)        // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_SOBELYROW_SSE2
+
+#ifdef HAS_SOBELROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
+// A = 255
+// R = Sobel
+// G = Sobel
+// B = Sobel
+void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                   uint8* dst_argb, int width) {
+  asm volatile (
+    "sub       %0,%1                           \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pslld     $0x18,%%xmm5                    \n"
+
+    // 8 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,0,1,1,xmm1)           //  movdqa    (%0,%1,1),%%xmm1
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "punpcklbw %%xmm0,%%xmm2                   \n"
+    "punpckhbw %%xmm0,%%xmm0                   \n"
+    "movdqa    %%xmm2,%%xmm1                   \n"
+    "punpcklwd %%xmm2,%%xmm1                   \n"
+    "punpckhwd %%xmm2,%%xmm2                   \n"
+    "por       %%xmm5,%%xmm1                   \n"
+    "por       %%xmm5,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm3                   \n"
+    "punpcklwd %%xmm0,%%xmm3                   \n"
+    "punpckhwd %%xmm0,%%xmm0                   \n"
+    "por       %%xmm5,%%xmm3                   \n"
+    "por       %%xmm5,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movdqa    %%xmm1," MEMACCESS(2) "         \n"
+    "movdqa    %%xmm2," MEMACCESS2(0x10,2) "   \n"
+    "movdqa    %%xmm3," MEMACCESS2(0x20,2) "   \n"
+    "movdqa    %%xmm0," MEMACCESS2(0x30,2) "   \n"
+    "lea       " MEMLEA(0x40,2) ",%2           \n"
+    "jg        1b                              \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_argb),    // %2
+    "+r"(width)        // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_SOBELROW_SSE2
+
+#ifdef HAS_SOBELTOPLANEROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into a plane.
+void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width) {
+  asm volatile (
+    "sub       %0,%1                           \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pslld     $0x18,%%xmm5                    \n"
+
+    // 8 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,0,1,1,xmm1)           //  movdqa    (%0,%1,1),%%xmm1
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jg        1b                              \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_y),       // %2
+    "+r"(width)        // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+#endif  // HAS_SOBELTOPLANEROW_SSE2
+
+#ifdef HAS_SOBELXYROW_SSE2
+// Mixes Sobel X, Sobel Y and Sobel into ARGB.
+// A = 255
+// R = Sobel X
+// G = Sobel
+// B = Sobel Y
+void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width) {
+  asm volatile (
+    "sub       %0,%1                           \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+
+    // 8 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,0,1,1,xmm1)           //  movdqa    (%0,%1,1),%%xmm1
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "paddusb   %%xmm1,%%xmm2                   \n"
+    "movdqa    %%xmm0,%%xmm3                   \n"
+    "punpcklbw %%xmm5,%%xmm3                   \n"
+    "punpckhbw %%xmm5,%%xmm0                   \n"
+    "movdqa    %%xmm1,%%xmm4                   \n"
+    "punpcklbw %%xmm2,%%xmm4                   \n"
+    "punpckhbw %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm4,%%xmm6                   \n"
+    "punpcklwd %%xmm3,%%xmm6                   \n"
+    "punpckhwd %%xmm3,%%xmm4                   \n"
+    "movdqa    %%xmm1,%%xmm7                   \n"
+    "punpcklwd %%xmm0,%%xmm7                   \n"
+    "punpckhwd %%xmm0,%%xmm1                   \n"
+    "sub       $0x10,%3                        \n"
+    "movdqa    %%xmm6," MEMACCESS(2) "         \n"
+    "movdqa    %%xmm4," MEMACCESS2(0x10,2) "   \n"
+    "movdqa    %%xmm7," MEMACCESS2(0x20,2) "   \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x30,2) "   \n"
+    "lea       " MEMLEA(0x40,2) ",%2           \n"
+    "jg        1b                              \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_argb),    // %2
+    "+r"(width)        // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_SOBELXYROW_SSE2
+
+#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
+// Creates a table of cumulative sums where each value is a sum of all values
+// above and to the left of the value, inclusive of the value.
+void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
+                                  const int32* previous_cumsum, int width) {
+  asm volatile (
+    "pxor      %%xmm0,%%xmm0                   \n"
+    "pxor      %%xmm1,%%xmm1                   \n"
+    "sub       $0x4,%3                         \n"
+    "jl        49f                             \n"
+    "test      $0xf,%1                         \n"
+    "jne       49f                             \n"
+
+  // 4 pixel loop                              \n"
+    LABELALIGN
+  "40:                                         \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm2,%%xmm4                   \n"
+    "punpcklbw %%xmm1,%%xmm2                   \n"
+    "movdqa    %%xmm2,%%xmm3                   \n"
+    "punpcklwd %%xmm1,%%xmm2                   \n"
+    "punpckhwd %%xmm1,%%xmm3                   \n"
+    "punpckhbw %%xmm1,%%xmm4                   \n"
+    "movdqa    %%xmm4,%%xmm5                   \n"
+    "punpcklwd %%xmm1,%%xmm4                   \n"
+    "punpckhwd %%xmm1,%%xmm5                   \n"
+    "paddd     %%xmm2,%%xmm0                   \n"
+    "movdqa    " MEMACCESS(2) ",%%xmm2         \n"
+    "paddd     %%xmm0,%%xmm2                   \n"
+    "paddd     %%xmm3,%%xmm0                   \n"
+    "movdqa    " MEMACCESS2(0x10,2) ",%%xmm3   \n"
+    "paddd     %%xmm0,%%xmm3                   \n"
+    "paddd     %%xmm4,%%xmm0                   \n"
+    "movdqa    " MEMACCESS2(0x20,2) ",%%xmm4   \n"
+    "paddd     %%xmm0,%%xmm4                   \n"
+    "paddd     %%xmm5,%%xmm0                   \n"
+    "movdqa    " MEMACCESS2(0x30,2) ",%%xmm5   \n"
+    "lea       " MEMLEA(0x40,2) ",%2           \n"
+    "paddd     %%xmm0,%%xmm5                   \n"
+    "movdqa    %%xmm2," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm3," MEMACCESS2(0x10,1) "   \n"
+    "movdqa    %%xmm4," MEMACCESS2(0x20,1) "   \n"
+    "movdqa    %%xmm5," MEMACCESS2(0x30,1) "   \n"
+    "lea       " MEMLEA(0x40,1) ",%1           \n"
+    "sub       $0x4,%3                         \n"
+    "jge       40b                             \n"
+
+  "49:                                         \n"
+    "add       $0x3,%3                         \n"
+    "jl        19f                             \n"
+
+  // 1 pixel loop                              \n"
+    LABELALIGN
+  "10:                                         \n"
+    "movd      " MEMACCESS(0) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x4,0) ",%0            \n"
+    "punpcklbw %%xmm1,%%xmm2                   \n"
+    "punpcklwd %%xmm1,%%xmm2                   \n"
+    "paddd     %%xmm2,%%xmm0                   \n"
+    "movdqu    " MEMACCESS(2) ",%%xmm2         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "paddd     %%xmm0,%%xmm2                   \n"
+    "movdqu    %%xmm2," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "sub       $0x1,%3                         \n"
+    "jge       10b                             \n"
+
+  "19:                                         \n"
+  : "+r"(row),  // %0
+    "+r"(cumsum),  // %1
+    "+r"(previous_cumsum),  // %2
+    "+r"(width)  // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_COMPUTECUMULATIVESUMROW_SSE2
+
+#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
+                                    int width, int area, uint8* dst,
+                                    int count) {
+  asm volatile (
+    "movd      %5,%%xmm5                       \n"
+    "cvtdq2ps  %%xmm5,%%xmm5                   \n"
+    "rcpss     %%xmm5,%%xmm4                   \n"
+    "pshufd    $0x0,%%xmm4,%%xmm4              \n"
+    "sub       $0x4,%3                         \n"
+    "jl        49f                             \n"
+    "cmpl      $0x80,%5                        \n"
+    "ja        40f                             \n"
+
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+    "pcmpeqb   %%xmm6,%%xmm6                   \n"
+    "psrld     $0x10,%%xmm6                    \n"
+    "cvtdq2ps  %%xmm6,%%xmm6                   \n"
+    "addps     %%xmm6,%%xmm5                   \n"
+    "mulps     %%xmm4,%%xmm5                   \n"
+    "cvtps2dq  %%xmm5,%%xmm5                   \n"
+    "packssdw  %%xmm5,%%xmm5                   \n"
+
+  // 4 pixel small loop                        \n"
+    LABELALIGN
+  "4:                                         \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    BUNDLEALIGN
+    MEMOPREG(psubd,0x00,0,4,4,xmm0)            // psubd    0x00(%0,%4,4),%%xmm0
+    MEMOPREG(psubd,0x10,0,4,4,xmm1)            // psubd    0x10(%0,%4,4),%%xmm1
+    MEMOPREG(psubd,0x20,0,4,4,xmm2)            // psubd    0x20(%0,%4,4),%%xmm2
+    MEMOPREG(psubd,0x30,0,4,4,xmm3)            // psubd    0x30(%0,%4,4),%%xmm3
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "psubd     " MEMACCESS(1) ",%%xmm0         \n"
+    "psubd     " MEMACCESS2(0x10,1) ",%%xmm1   \n"
+    "psubd     " MEMACCESS2(0x20,1) ",%%xmm2   \n"
+    "psubd     " MEMACCESS2(0x30,1) ",%%xmm3   \n"
+    BUNDLEALIGN
+    MEMOPREG(paddd,0x00,1,4,4,xmm0)            // paddd    0x00(%1,%4,4),%%xmm0
+    MEMOPREG(paddd,0x10,1,4,4,xmm1)            // paddd    0x10(%1,%4,4),%%xmm1
+    MEMOPREG(paddd,0x20,1,4,4,xmm2)            // paddd    0x20(%1,%4,4),%%xmm2
+    MEMOPREG(paddd,0x30,1,4,4,xmm3)            // paddd    0x30(%1,%4,4),%%xmm3
+    "lea       " MEMLEA(0x40,1) ",%1           \n"
+    "packssdw  %%xmm1,%%xmm0                   \n"
+    "packssdw  %%xmm3,%%xmm2                   \n"
+    "pmulhuw   %%xmm5,%%xmm0                   \n"
+    "pmulhuw   %%xmm5,%%xmm2                   \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "sub       $0x4,%3                         \n"
+    "jge       4b                              \n"
+    "jmp       49f                             \n"
+
+  // 4 pixel loop                              \n"
+    LABELALIGN
+  "40:                                         \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
+    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
+    BUNDLEALIGN
+    MEMOPREG(psubd,0x00,0,4,4,xmm0)            // psubd    0x00(%0,%4,4),%%xmm0
+    MEMOPREG(psubd,0x10,0,4,4,xmm1)            // psubd    0x10(%0,%4,4),%%xmm1
+    MEMOPREG(psubd,0x20,0,4,4,xmm2)            // psubd    0x20(%0,%4,4),%%xmm2
+    MEMOPREG(psubd,0x30,0,4,4,xmm3)            // psubd    0x30(%0,%4,4),%%xmm3
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "psubd     " MEMACCESS(1) ",%%xmm0         \n"
+    "psubd     " MEMACCESS2(0x10,1) ",%%xmm1   \n"
+    "psubd     " MEMACCESS2(0x20,1) ",%%xmm2   \n"
+    "psubd     " MEMACCESS2(0x30,1) ",%%xmm3   \n"
+    BUNDLEALIGN
+    MEMOPREG(paddd,0x00,1,4,4,xmm0)            // paddd    0x00(%1,%4,4),%%xmm0
+    MEMOPREG(paddd,0x10,1,4,4,xmm1)            // paddd    0x10(%1,%4,4),%%xmm1
+    MEMOPREG(paddd,0x20,1,4,4,xmm2)            // paddd    0x20(%1,%4,4),%%xmm2
+    MEMOPREG(paddd,0x30,1,4,4,xmm3)            // paddd    0x30(%1,%4,4),%%xmm3
+    "lea       " MEMLEA(0x40,1) ",%1           \n"
+    "cvtdq2ps  %%xmm0,%%xmm0                   \n"
+    "cvtdq2ps  %%xmm1,%%xmm1                   \n"
+    "mulps     %%xmm4,%%xmm0                   \n"
+    "mulps     %%xmm4,%%xmm1                   \n"
+    "cvtdq2ps  %%xmm2,%%xmm2                   \n"
+    "cvtdq2ps  %%xmm3,%%xmm3                   \n"
+    "mulps     %%xmm4,%%xmm2                   \n"
+    "mulps     %%xmm4,%%xmm3                   \n"
+    "cvtps2dq  %%xmm0,%%xmm0                   \n"
+    "cvtps2dq  %%xmm1,%%xmm1                   \n"
+    "cvtps2dq  %%xmm2,%%xmm2                   \n"
+    "cvtps2dq  %%xmm3,%%xmm3                   \n"
+    "packssdw  %%xmm1,%%xmm0                   \n"
+    "packssdw  %%xmm3,%%xmm2                   \n"
+    "packuswb  %%xmm2,%%xmm0                   \n"
+    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "sub       $0x4,%3                         \n"
+    "jge       40b                             \n"
+
+  "49:                                         \n"
+    "add       $0x3,%3                         \n"
+    "jl        19f                             \n"
+
+  // 1 pixel loop                              \n"
+    LABELALIGN
+  "10:                                         \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(psubd,0x00,0,4,4,xmm0)            // psubd    0x00(%0,%4,4),%%xmm0
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "psubd     " MEMACCESS(1) ",%%xmm0         \n"
+    BUNDLEALIGN
+    MEMOPREG(paddd,0x00,1,4,4,xmm0)            // paddd    0x00(%1,%4,4),%%xmm0
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "cvtdq2ps  %%xmm0,%%xmm0                   \n"
+    "mulps     %%xmm4,%%xmm0                   \n"
+    "cvtps2dq  %%xmm0,%%xmm0                   \n"
+    "packssdw  %%xmm0,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movd      %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x4,2) ",%2            \n"
+    "sub       $0x1,%3                         \n"
+    "jge       10b                             \n"
+  "19:                                         \n"
+  : "+r"(topleft),  // %0
+    "+r"(botleft),  // %1
+    "+r"(dst),      // %2
+    "+rm"(count)    // %3
+  : "r"((intptr_t)(width)),  // %4
+    "rm"(area)     // %5
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+#endif  // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+
+#ifdef HAS_ARGBAFFINEROW_SSE2
+// Copy ARGB pixels from source image with slope to a row of destination.
+LIBYUV_API
+void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
+                        uint8* dst_argb, const float* src_dudv, int width) {
+  intptr_t src_argb_stride_temp = src_argb_stride;
+  intptr_t temp = 0;
+  asm volatile (
+    "movq      " MEMACCESS(3) ",%%xmm2         \n"
+    "movq      " MEMACCESS2(0x08,3) ",%%xmm7   \n"
+    "shl       $0x10,%1                        \n"
+    "add       $0x4,%1                         \n"
+    "movd      %1,%%xmm5                       \n"
+    "sub       $0x4,%4                         \n"
+    "jl        49f                             \n"
+
+    "pshufd    $0x44,%%xmm7,%%xmm7             \n"
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+    "movdqa    %%xmm2,%%xmm0                   \n"
+    "addps     %%xmm7,%%xmm0                   \n"
+    "movlhps   %%xmm0,%%xmm2                   \n"
+    "movdqa    %%xmm7,%%xmm4                   \n"
+    "addps     %%xmm4,%%xmm4                   \n"
+    "movdqa    %%xmm2,%%xmm3                   \n"
+    "addps     %%xmm4,%%xmm3                   \n"
+    "addps     %%xmm4,%%xmm4                   \n"
+
+  // 4 pixel loop                              \n"
+    LABELALIGN
+  "40:                                         \n"
+    "cvttps2dq %%xmm2,%%xmm0                   \n"  // x, y float to int first 2
+    "cvttps2dq %%xmm3,%%xmm1                   \n"  // x, y float to int next 2
+    "packssdw  %%xmm1,%%xmm0                   \n"  // x, y as 8 shorts
+    "pmaddwd   %%xmm5,%%xmm0                   \n"  // off = x * 4 + y * stride
+    "movd      %%xmm0,%k1                      \n"
+    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
+    "movd      %%xmm0,%k5                      \n"
+    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
+    BUNDLEALIGN
+    MEMOPREG(movd,0x00,0,1,1,xmm1)             //  movd      (%0,%1,1),%%xmm1
+    MEMOPREG(movd,0x00,0,5,1,xmm6)             //  movd      (%0,%5,1),%%xmm6
+    "punpckldq %%xmm6,%%xmm1                   \n"
+    "addps     %%xmm4,%%xmm2                   \n"
+    "movq      %%xmm1," MEMACCESS(2) "         \n"
+    "movd      %%xmm0,%k1                      \n"
+    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
+    "movd      %%xmm0,%k5                      \n"
+    BUNDLEALIGN
+    MEMOPREG(movd,0x00,0,1,1,xmm0)             //  movd      (%0,%1,1),%%xmm0
+    MEMOPREG(movd,0x00,0,5,1,xmm6)             //  movd      (%0,%5,1),%%xmm6
+    "punpckldq %%xmm6,%%xmm0                   \n"
+    "addps     %%xmm4,%%xmm3                   \n"
+    "sub       $0x4,%4                         \n"
+    "movq      %%xmm0," MEMACCESS2(0x08,2) "   \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jge       40b                             \n"
+
+  "49:                                         \n"
+    "add       $0x3,%4                         \n"
+    "jl        19f                             \n"
+
+  // 1 pixel loop                              \n"
+    LABELALIGN
+  "10:                                         \n"
+    "cvttps2dq %%xmm2,%%xmm0                   \n"
+    "packssdw  %%xmm0,%%xmm0                   \n"
+    "pmaddwd   %%xmm5,%%xmm0                   \n"
+    "addps     %%xmm7,%%xmm2                   \n"
+    "movd      %%xmm0,%k1                      \n"
+    BUNDLEALIGN
+    MEMOPREG(movd,0x00,0,1,1,xmm0)             //  movd      (%0,%1,1),%%xmm0
+    "sub       $0x1,%4                         \n"
+    "movd      %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x04,2) ",%2           \n"
+    "jge       10b                             \n"
+  "19:                                         \n"
+  : "+r"(src_argb),  // %0
+    "+r"(src_argb_stride_temp),  // %1
+    "+r"(dst_argb),  // %2
+    "+r"(src_dudv),  // %3
+    "+rm"(width),    // %4
+    "+r"(temp)   // %5
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_ARGBAFFINEROW_SSE2
+
+#ifdef HAS_INTERPOLATEROW_SSSE3
+// Bilinear filter 16x2 -> 16x1
+void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                          ptrdiff_t src_stride, int dst_width,
+                          int source_y_fraction) {
+  asm volatile (
+    "sub       %1,%0                           \n"
+    "shr       %3                              \n"
+    "cmp       $0x0,%3                         \n"
+    "je        100f                            \n"
+    "cmp       $0x20,%3                        \n"
+    "je        75f                             \n"
+    "cmp       $0x40,%3                        \n"
+    "je        50f                             \n"
+    "cmp       $0x60,%3                        \n"
+    "je        25f                             \n"
+
+    "movd      %3,%%xmm0                       \n"
+    "neg       %3                              \n"
+    "add       $0x80,%3                        \n"
+    "movd      %3,%%xmm5                       \n"
+    "punpcklbw %%xmm0,%%xmm5                   \n"
+    "punpcklwd %%xmm5,%%xmm5                   \n"
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+
+    // General purpose row blend.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,1,4,1,xmm2)
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm2,%%xmm0                   \n"
+    "punpckhbw %%xmm2,%%xmm1                   \n"
+    "pmaddubsw %%xmm5,%%xmm0                   \n"
+    "pmaddubsw %%xmm5,%%xmm1                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+    "jmp       99f                             \n"
+
+    // Blend 25 / 75.
+    LABELALIGN
+  "25:                                         \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,1,4,1,xmm1)
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        25b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 50 / 50.
+    LABELALIGN
+  "50:                                         \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,1,4,1,xmm1)
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        50b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 75 / 25.
+    LABELALIGN
+  "75:                                         \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm1         \n"
+    MEMOPREG(movdqa,0x00,1,4,1,xmm0)
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        75b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+    LABELALIGN
+  "100:                                        \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    "sub       $0x10,%2                        \n"
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        100b                            \n"
+
+  "99:                                         \n"
+  : "+r"(dst_ptr),    // %0
+    "+r"(src_ptr),    // %1
+    "+r"(dst_width),  // %2
+    "+r"(source_y_fraction)  // %3
+  : "r"((intptr_t)(src_stride))  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_INTERPOLATEROW_SSSE3
+
+#ifdef HAS_INTERPOLATEROW_SSE2
+// Bilinear filter 16x2 -> 16x1
+void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                         ptrdiff_t src_stride, int dst_width,
+                         int source_y_fraction) {
+  asm volatile (
+    "sub       %1,%0                           \n"
+    "shr       %3                              \n"
+    "cmp       $0x0,%3                         \n"
+    "je        100f                            \n"
+    "cmp       $0x20,%3                        \n"
+    "je        75f                             \n"
+    "cmp       $0x40,%3                        \n"
+    "je        50f                             \n"
+    "cmp       $0x60,%3                        \n"
+    "je        25f                             \n"
+
+    "movd      %3,%%xmm0                       \n"
+    "neg       %3                              \n"
+    "add       $0x80,%3                        \n"
+    "movd      %3,%%xmm5                       \n"
+    "punpcklbw %%xmm0,%%xmm5                   \n"
+    "punpcklwd %%xmm5,%%xmm5                   \n"
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+
+    // General purpose row blend.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,1,4,1,xmm2)           //  movdqa    (%1,%4,1),%%xmm2
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm2,%%xmm3                   \n"
+    "punpcklbw %%xmm4,%%xmm2                   \n"
+    "punpckhbw %%xmm4,%%xmm3                   \n"
+    "punpcklbw %%xmm4,%%xmm0                   \n"
+    "punpckhbw %%xmm4,%%xmm1                   \n"
+    "psubw     %%xmm0,%%xmm2                   \n"
+    "psubw     %%xmm1,%%xmm3                   \n"
+    "paddw     %%xmm2,%%xmm2                   \n"
+    "paddw     %%xmm3,%%xmm3                   \n"
+    "pmulhw    %%xmm5,%%xmm2                   \n"
+    "pmulhw    %%xmm5,%%xmm3                   \n"
+    "paddw     %%xmm2,%%xmm0                   \n"
+    "paddw     %%xmm3,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+    "jmp       99f                             \n"
+
+    // Blend 25 / 75.
+    LABELALIGN
+  "25:                                         \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,1,4,1,xmm1)           //  movdqa    (%1,%4,1),%%xmm1
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        25b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 50 / 50.
+    LABELALIGN
+  "50:                                         \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,1,4,1,xmm1)           //  movdqa    (%1,%4,1),%%xmm1
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        50b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 75 / 25.
+    LABELALIGN
+  "75:                                         \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm1         \n"
+    MEMOPREG(movdqa,0x00,1,4,1,xmm0)           //  movdqa    (%1,%4,1),%%xmm0
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        75b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+    LABELALIGN
+  "100:                                        \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    "sub       $0x10,%2                        \n"
+    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        100b                            \n"
+
+  "99:                                         \n"
+  : "+r"(dst_ptr),    // %0
+    "+r"(src_ptr),    // %1
+    "+r"(dst_width),  // %2
+    "+r"(source_y_fraction)  // %3
+  : "r"((intptr_t)(src_stride))  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_INTERPOLATEROW_SSE2
+
+#ifdef HAS_INTERPOLATEROW_SSSE3
+// Bilinear filter 16x2 -> 16x1
+void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                                    ptrdiff_t src_stride, int dst_width,
+                                    int source_y_fraction) {
+  asm volatile (
+    "sub       %1,%0                           \n"
+    "shr       %3                              \n"
+    "cmp       $0x0,%3                         \n"
+    "je        100f                            \n"
+    "cmp       $0x20,%3                        \n"
+    "je        75f                             \n"
+    "cmp       $0x40,%3                        \n"
+    "je        50f                             \n"
+    "cmp       $0x60,%3                        \n"
+    "je        25f                             \n"
+
+    "movd      %3,%%xmm0                       \n"
+    "neg       %3                              \n"
+    "add       $0x80,%3                        \n"
+    "movd      %3,%%xmm5                       \n"
+    "punpcklbw %%xmm0,%%xmm5                   \n"
+    "punpcklwd %%xmm5,%%xmm5                   \n"
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+
+    // General purpose row blend.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqu,0x00,1,4,1,xmm2)
+    "movdqu    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm2,%%xmm0                   \n"
+    "punpckhbw %%xmm2,%%xmm1                   \n"
+    "pmaddubsw %%xmm5,%%xmm0                   \n"
+    "pmaddubsw %%xmm5,%%xmm1                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "psrlw     $0x7,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+    "jmp       99f                             \n"
+
+    // Blend 25 / 75.
+    LABELALIGN
+  "25:                                         \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqu,0x00,1,4,1,xmm1)
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        25b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 50 / 50.
+    LABELALIGN
+  "50:                                         \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqu,0x00,1,4,1,xmm1)
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        50b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 75 / 25.
+    LABELALIGN
+  "75:                                         \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
+    MEMOPREG(movdqu,0x00,1,4,1,xmm0)
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        75b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+    LABELALIGN
+  "100:                                        \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
+    "sub       $0x10,%2                        \n"
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        100b                            \n"
+
+  "99:                                         \n"
+  : "+r"(dst_ptr),    // %0
+    "+r"(src_ptr),    // %1
+    "+r"(dst_width),  // %2
+    "+r"(source_y_fraction)  // %3
+  : "r"((intptr_t)(src_stride))  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm5"
+#endif
+  );
+}
+#endif   // HAS_INTERPOLATEROW_SSSE3
+
+#ifdef HAS_INTERPOLATEROW_SSE2
+// Bilinear filter 16x2 -> 16x1
+void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                                   ptrdiff_t src_stride, int dst_width,
+                                   int source_y_fraction) {
+  asm volatile (
+    "sub       %1,%0                           \n"
+    "shr       %3                              \n"
+    "cmp       $0x0,%3                         \n"
+    "je        100f                            \n"
+    "cmp       $0x20,%3                        \n"
+    "je        75f                             \n"
+    "cmp       $0x40,%3                        \n"
+    "je        50f                             \n"
+    "cmp       $0x60,%3                        \n"
+    "je        25f                             \n"
+
+    "movd      %3,%%xmm0                       \n"
+    "neg       %3                              \n"
+    "add       $0x80,%3                        \n"
+    "movd      %3,%%xmm5                       \n"
+    "punpcklbw %%xmm0,%%xmm5                   \n"
+    "punpcklwd %%xmm5,%%xmm5                   \n"
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+    "pxor      %%xmm4,%%xmm4                   \n"
+
+    // General purpose row blend.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqu,0x00,1,4,1,xmm2)           //  movdqu    (%1,%4,1),%%xmm2
+    "movdqu    %%xmm0,%%xmm1                   \n"
+    "movdqu    %%xmm2,%%xmm3                   \n"
+    "punpcklbw %%xmm4,%%xmm2                   \n"
+    "punpckhbw %%xmm4,%%xmm3                   \n"
+    "punpcklbw %%xmm4,%%xmm0                   \n"
+    "punpckhbw %%xmm4,%%xmm1                   \n"
+    "psubw     %%xmm0,%%xmm2                   \n"
+    "psubw     %%xmm1,%%xmm3                   \n"
+    "paddw     %%xmm2,%%xmm2                   \n"
+    "paddw     %%xmm3,%%xmm3                   \n"
+    "pmulhw    %%xmm5,%%xmm2                   \n"
+    "pmulhw    %%xmm5,%%xmm3                   \n"
+    "paddw     %%xmm2,%%xmm0                   \n"
+    "paddw     %%xmm3,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+    "jmp       99f                             \n"
+
+    // Blend 25 / 75.
+    LABELALIGN
+  "25:                                         \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqu,0x00,1,4,1,xmm1)           //  movdqu    (%1,%4,1),%%xmm1
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        25b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 50 / 50.
+    LABELALIGN
+  "50:                                         \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
+    MEMOPREG(movdqu,0x00,1,4,1,xmm1)           //  movdqu    (%1,%4,1),%%xmm1
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        50b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 75 / 25.
+    LABELALIGN
+  "75:                                         \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
+    MEMOPREG(movdqu,0x00,1,4,1,xmm0)           //  movdqu    (%1,%4,1),%%xmm0
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "pavgb     %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%2                        \n"
+    BUNDLEALIGN
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        75b                             \n"
+    "jmp       99f                             \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+    LABELALIGN
+  "100:                                        \n"
+    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
+    "sub       $0x10,%2                        \n"
+    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        100b                            \n"
+
+  "99:                                         \n"
+  : "+r"(dst_ptr),    // %0
+    "+r"(src_ptr),    // %1
+    "+r"(dst_width),  // %2
+    "+r"(source_y_fraction)  // %3
+  : "r"((intptr_t)(src_stride))  // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_INTERPOLATEROW_SSE2
+
+#ifdef HAS_HALFROW_SSE2
+void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix) {
+  asm volatile (
+    "sub       %0,%1                           \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(pavgb,0x00,0,3,1,xmm0)            //  pavgb     (%0,%3),%%xmm0
+    "sub       $0x10,%2                        \n"
+    MEMOPMEM(movdqa,xmm0,0x00,0,1,1)           //  movdqa    %%xmm0,(%0,%1)
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "jg        1b                              \n"
+  : "+r"(src_uv),  // %0
+    "+r"(dst_uv),  // %1
+    "+r"(pix)      // %2
+  : "r"((intptr_t)(src_uv_stride))  // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+      , "xmm0"
+#endif
+  );
+}
+#endif  // HAS_HALFROW_SSE2
+
+#ifdef HAS_ARGBTOBAYERROW_SSSE3
+void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
+                          uint32 selector, int pix) {
+  asm volatile (
+    // NaCL caveat - assumes movd is from GPR
+    "movd      %3,%%xmm5                       \n"
+    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pshufb    %%xmm5,%%xmm0                   \n"
+    "pshufb    %%xmm5,%%xmm1                   \n"
+    "punpckldq %%xmm1,%%xmm0                   \n"
+    "sub       $0x8,%2                         \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_bayer), // %1
+    "+r"(pix)        // %2
+  : "g"(selector)    // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBTOBAYERROW_SSSE3
+
+#ifdef HAS_ARGBTOBAYERGGROW_SSE2
+void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
+                           uint32 selector, int pix) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrld     $0x18,%%xmm5                    \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "psrld     $0x8,%%xmm0                     \n"
+    "psrld     $0x8,%%xmm1                     \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "packssdw  %%xmm1,%%xmm0                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x8,%2                         \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_bayer), // %1
+    "+r"(pix)        // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBTOBAYERGGROW_SSE2
+
+#ifdef HAS_ARGBSHUFFLEROW_SSSE3
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                          const uint8* shuffler, int pix) {
+  asm volatile (
+    "movdqa    " MEMACCESS(3) ",%%xmm5         \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pshufb    %%xmm5,%%xmm0                   \n"
+    "pshufb    %%xmm5,%%xmm1                   \n"
+    "sub       $0x8,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  : "r"(shuffler)    // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                                    const uint8* shuffler, int pix) {
+  asm volatile (
+    "movdqa    " MEMACCESS(3) ",%%xmm5         \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pshufb    %%xmm5,%%xmm0                   \n"
+    "pshufb    %%xmm5,%%xmm1                   \n"
+    "sub       $0x8,%2                         \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  : "r"(shuffler)    // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBSHUFFLEROW_SSSE3
+
+#ifdef HAS_ARGBSHUFFLEROW_AVX2
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix) {
+  asm volatile (
+    "vbroadcastf128 " MEMACCESS(3) ",%%ymm5    \n"
+    LABELALIGN
+  "1:                                          \n"
+    "vmovdqu   " MEMACCESS(0) ",%%ymm0         \n"
+    "vmovdqu   " MEMACCESS2(0x20,0) ",%%ymm1   \n"
+    "lea       " MEMLEA(0x40,0) ",%0           \n"
+    "vpshufb   %%ymm5,%%ymm0,%%ymm0            \n"
+    "vpshufb   %%ymm5,%%ymm1,%%ymm1            \n"
+    "sub       $0x10,%2                        \n"
+    "vmovdqu   %%ymm0," MEMACCESS(1) "         \n"
+    "vmovdqu   %%ymm1," MEMACCESS2(0x20,1) "   \n"
+    "lea       " MEMLEA(0x40,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(pix)        // %2
+  : "r"(shuffler)    // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBSHUFFLEROW_AVX2
+
+#ifdef HAS_ARGBSHUFFLEROW_SSE2
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix) {
+  uintptr_t pixel_temp = 0u;
+  asm volatile (
+    "pxor      %%xmm5,%%xmm5                   \n"
+    "mov       " MEMACCESS(4) ",%k2            \n"
+    "cmp       $0x3000102,%k2                  \n"
+    "je        3012f                           \n"
+    "cmp       $0x10203,%k2                    \n"
+    "je        123f                            \n"
+    "cmp       $0x30201,%k2                    \n"
+    "je        321f                            \n"
+    "cmp       $0x2010003,%k2                  \n"
+    "je        2103f                           \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movzb     " MEMACCESS(4) ",%2             \n"
+    MEMOPARG(movzb,0x00,0,2,1,2) "             \n"  //  movzb     (%0,%2,1),%2
+    "mov       %b2," MEMACCESS(1) "            \n"
+    "movzb     " MEMACCESS2(0x1,4) ",%2        \n"
+    MEMOPARG(movzb,0x00,0,2,1,2) "             \n"  //  movzb     (%0,%2,1),%2
+    "mov       %b2," MEMACCESS2(0x1,1) "       \n"
+    BUNDLEALIGN
+    "movzb     " MEMACCESS2(0x2,4) ",%2        \n"
+    MEMOPARG(movzb,0x00,0,2,1,2) "             \n"  //  movzb     (%0,%2,1),%2
+    "mov       %b2," MEMACCESS2(0x2,1) "       \n"
+    "movzb     " MEMACCESS2(0x3,4) ",%2        \n"
+    MEMOPARG(movzb,0x00,0,2,1,2) "             \n"  //  movzb     (%0,%2,1),%2
+    "mov       %b2," MEMACCESS2(0x3,1) "       \n"
+    "lea       " MEMLEA(0x4,0) ",%0            \n"
+    "lea       " MEMLEA(0x4,1) ",%1            \n"
+    "sub       $0x1,%3                         \n"
+    "jg        1b                              \n"
+    "jmp       99f                             \n"
+
+    LABELALIGN
+  "123:                                        \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "punpckhbw %%xmm5,%%xmm1                   \n"
+    "pshufhw   $0x1b,%%xmm0,%%xmm0             \n"
+    "pshuflw   $0x1b,%%xmm0,%%xmm0             \n"
+    "pshufhw   $0x1b,%%xmm1,%%xmm1             \n"
+    "pshuflw   $0x1b,%%xmm1,%%xmm1             \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        123b                            \n"
+    "jmp       99f                             \n"
+
+    LABELALIGN
+  "321:                                        \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "punpckhbw %%xmm5,%%xmm1                   \n"
+    "pshufhw   $0x39,%%xmm0,%%xmm0             \n"
+    "pshuflw   $0x39,%%xmm0,%%xmm0             \n"
+    "pshufhw   $0x39,%%xmm1,%%xmm1             \n"
+    "pshuflw   $0x39,%%xmm1,%%xmm1             \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        321b                            \n"
+    "jmp       99f                             \n"
+
+    LABELALIGN
+  "2103:                                       \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "punpckhbw %%xmm5,%%xmm1                   \n"
+    "pshufhw   $0x93,%%xmm0,%%xmm0             \n"
+    "pshuflw   $0x93,%%xmm0,%%xmm0             \n"
+    "pshufhw   $0x93,%%xmm1,%%xmm1             \n"
+    "pshuflw   $0x93,%%xmm1,%%xmm1             \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        2103b                           \n"
+    "jmp       99f                             \n"
+
+    LABELALIGN
+  "3012:                                       \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "punpckhbw %%xmm5,%%xmm1                   \n"
+    "pshufhw   $0xc6,%%xmm0,%%xmm0             \n"
+    "pshuflw   $0xc6,%%xmm0,%%xmm0             \n"
+    "pshufhw   $0xc6,%%xmm1,%%xmm1             \n"
+    "pshuflw   $0xc6,%%xmm1,%%xmm1             \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        3012b                           \n"
+
+  "99:                                         \n"
+  : "+r"(src_argb),    // %0
+    "+r"(dst_argb),    // %1
+    "+d"(pixel_temp),  // %2
+    "+r"(pix)         // %3
+  : "r"(shuffler)      // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBSHUFFLEROW_SSE2
+
+#ifdef HAS_I422TOYUY2ROW_SSE2
+void I422ToYUY2Row_SSE2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_frame, int width) {
+ asm volatile (
+    "sub       %1,%2                             \n"
+    LABELALIGN
+  "1:                                            \n"
+    "movq      " MEMACCESS(1) ",%%xmm2           \n"
+    MEMOPREG(movq,0x00,1,2,1,xmm3)               //  movq    (%1,%2,1),%%xmm3
+    "lea       " MEMLEA(0x8,1) ",%1              \n"
+    "punpcklbw %%xmm3,%%xmm2                     \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0           \n"
+    "lea       " MEMLEA(0x10,0) ",%0             \n"
+    "movdqa    %%xmm0,%%xmm1                     \n"
+    "punpcklbw %%xmm2,%%xmm0                     \n"
+    "punpckhbw %%xmm2,%%xmm1                     \n"
+    "movdqu    %%xmm0," MEMACCESS(3) "           \n"
+    "movdqu    %%xmm1," MEMACCESS2(0x10,3) "     \n"
+    "lea       " MEMLEA(0x20,3) ",%3             \n"
+    "sub       $0x10,%4                          \n"
+    "jg         1b                               \n"
+    : "+r"(src_y),  // %0
+      "+r"(src_u),  // %1
+      "+r"(src_v),  // %2
+      "+r"(dst_frame),  // %3
+      "+rm"(width)  // %4
+    :
+    : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3"
+#endif
+  );
+}
+#endif  // HAS_I422TOYUY2ROW_SSE2
+
+#ifdef HAS_I422TOUYVYROW_SSE2
+void I422ToUYVYRow_SSE2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_frame, int width) {
+ asm volatile (
+    "sub        %1,%2                            \n"
+    LABELALIGN
+  "1:                                            \n"
+    "movq      " MEMACCESS(1) ",%%xmm2           \n"
+    MEMOPREG(movq,0x00,1,2,1,xmm3)               //  movq    (%1,%2,1),%%xmm3
+    "lea       " MEMLEA(0x8,1) ",%1              \n"
+    "punpcklbw %%xmm3,%%xmm2                     \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0           \n"
+    "movdqa    %%xmm2,%%xmm1                     \n"
+    "lea       " MEMLEA(0x10,0) ",%0             \n"
+    "punpcklbw %%xmm0,%%xmm1                     \n"
+    "punpckhbw %%xmm0,%%xmm2                     \n"
+    "movdqu    %%xmm1," MEMACCESS(3) "           \n"
+    "movdqu    %%xmm2," MEMACCESS2(0x10,3) "     \n"
+    "lea       " MEMLEA(0x20,3) ",%3             \n"
+    "sub       $0x10,%4                          \n"
+    "jg         1b                               \n"
+    : "+r"(src_y),  // %0
+      "+r"(src_u),  // %1
+      "+r"(src_v),  // %2
+      "+r"(dst_frame),  // %3
+      "+rm"(width)  // %4
+    :
+    : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3"
+#endif
+  );
+}
+#endif  // HAS_I422TOUYVYROW_SSE2
+
+#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
+void ARGBPolynomialRow_SSE2(const uint8* src_argb,
+                            uint8* dst_argb, const float* poly,
+                            int width) {
+  asm volatile (
+    "pxor      %%xmm3,%%xmm3                   \n"
+
+    // 2 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movq      " MEMACCESS(0) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "punpcklbw %%xmm3,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm4                   \n"
+    "punpcklwd %%xmm3,%%xmm0                   \n"
+    "punpckhwd %%xmm3,%%xmm4                   \n"
+    "cvtdq2ps  %%xmm0,%%xmm0                   \n"
+    "cvtdq2ps  %%xmm4,%%xmm4                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm4,%%xmm5                   \n"
+    "mulps     " MEMACCESS2(0x10,3) ",%%xmm0   \n"
+    "mulps     " MEMACCESS2(0x10,3) ",%%xmm4   \n"
+    "addps     " MEMACCESS(3) ",%%xmm0         \n"
+    "addps     " MEMACCESS(3) ",%%xmm4         \n"
+    "movdqa    %%xmm1,%%xmm2                   \n"
+    "movdqa    %%xmm5,%%xmm6                   \n"
+    "mulps     %%xmm1,%%xmm2                   \n"
+    "mulps     %%xmm5,%%xmm6                   \n"
+    "mulps     %%xmm2,%%xmm1                   \n"
+    "mulps     %%xmm6,%%xmm5                   \n"
+    "mulps     " MEMACCESS2(0x20,3) ",%%xmm2   \n"
+    "mulps     " MEMACCESS2(0x20,3) ",%%xmm6   \n"
+    "mulps     " MEMACCESS2(0x30,3) ",%%xmm1   \n"
+    "mulps     " MEMACCESS2(0x30,3) ",%%xmm5   \n"
+    "addps     %%xmm2,%%xmm0                   \n"
+    "addps     %%xmm6,%%xmm4                   \n"
+    "addps     %%xmm1,%%xmm0                   \n"
+    "addps     %%xmm5,%%xmm4                   \n"
+    "cvttps2dq %%xmm0,%%xmm0                   \n"
+    "cvttps2dq %%xmm4,%%xmm4                   \n"
+    "packuswb  %%xmm4,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "sub       $0x2,%2                         \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(width)      // %2
+  : "r"(poly)        // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+#endif  // HAS_ARGBPOLYNOMIALROW_SSE2
+
+#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
+void ARGBPolynomialRow_AVX2(const uint8* src_argb,
+                            uint8* dst_argb, const float* poly,
+                            int width) {
+  asm volatile (
+    "vbroadcastf128 " MEMACCESS(3) ",%%ymm4     \n"
+    "vbroadcastf128 " MEMACCESS2(0x10,3) ",%%ymm5 \n"
+    "vbroadcastf128 " MEMACCESS2(0x20,3) ",%%ymm6 \n"
+    "vbroadcastf128 " MEMACCESS2(0x30,3) ",%%ymm7 \n"
+
+    // 2 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "vpmovzxbd   " MEMACCESS(0) ",%%ymm0       \n"  // 2 ARGB pixels
+    "lea         " MEMLEA(0x8,0) ",%0          \n"
+    "vcvtdq2ps   %%ymm0,%%ymm0                 \n"  // X 8 floats
+    "vmulps      %%ymm0,%%ymm0,%%ymm2          \n"  // X * X
+    "vmulps      %%ymm7,%%ymm0,%%ymm3          \n"  // C3 * X
+    "vfmadd132ps %%ymm5,%%ymm4,%%ymm0          \n"  // result = C0 + C1 * X
+    "vfmadd231ps %%ymm6,%%ymm2,%%ymm0          \n"  // result += C2 * X * X
+    "vfmadd231ps %%ymm3,%%ymm2,%%ymm0          \n"  // result += C3 * X * X * X
+    "vcvttps2dq  %%ymm0,%%ymm0                 \n"
+    "vpackusdw   %%ymm0,%%ymm0,%%ymm0          \n"
+    "vpermq      $0xd8,%%ymm0,%%ymm0           \n"
+    "vpackuswb   %%xmm0,%%xmm0,%%xmm0          \n"
+    "sub         $0x2,%2                       \n"
+    "vmovq       %%xmm0," MEMACCESS(1) "       \n"
+    "lea         " MEMLEA(0x8,1) ",%1          \n"
+    "jg          1b                            \n"
+    "vzeroupper                                \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(width)      // %2
+  : "r"(poly)        // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+// TODO(fbarchard): declare ymm usage when applicable.
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_ARGBPOLYNOMIALROW_AVX2
+
+#ifdef HAS_ARGBCOLORTABLEROW_X86
+// Tranform ARGB pixels with color table.
+void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
+                           int width) {
+  uintptr_t pixel_temp = 0u;
+  asm volatile (
+    // 1 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movzb     " MEMACCESS(0) ",%1             \n"
+    "lea       " MEMLEA(0x4,0) ",%0            \n"
+    MEMOPARG(movzb,0x00,3,1,4,1) "             \n"  // movzb (%3,%1,4),%1
+    "mov       %b1," MEMACCESS2(-0x4,0) "      \n"
+    "movzb     " MEMACCESS2(-0x3,0) ",%1       \n"
+    MEMOPARG(movzb,0x01,3,1,4,1) "             \n"  // movzb 0x1(%3,%1,4),%1
+    "mov       %b1," MEMACCESS2(-0x3,0) "      \n"
+    "movzb     " MEMACCESS2(-0x2,0) ",%1       \n"
+    MEMOPARG(movzb,0x02,3,1,4,1) "             \n"  // movzb 0x2(%3,%1,4),%1
+    "mov       %b1," MEMACCESS2(-0x2,0) "      \n"
+    "movzb     " MEMACCESS2(-0x1,0) ",%1       \n"
+    MEMOPARG(movzb,0x03,3,1,4,1) "             \n"  // movzb 0x3(%3,%1,4),%1
+    "mov       %b1," MEMACCESS2(-0x1,0) "      \n"
+    "dec       %2                              \n"
+    "jg        1b                              \n"
+  : "+r"(dst_argb),   // %0
+    "+d"(pixel_temp), // %1
+    "+r"(width)       // %2
+  : "r"(table_argb)   // %3
+  : "memory", "cc");
+}
+#endif  // HAS_ARGBCOLORTABLEROW_X86
+
+#ifdef HAS_RGBCOLORTABLEROW_X86
+// Tranform RGB pixels with color table.
+void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
+  uintptr_t pixel_temp = 0u;
+  asm volatile (
+    // 1 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movzb     " MEMACCESS(0) ",%1             \n"
+    "lea       " MEMLEA(0x4,0) ",%0            \n"
+    MEMOPARG(movzb,0x00,3,1,4,1) "             \n"  // movzb (%3,%1,4),%1
+    "mov       %b1," MEMACCESS2(-0x4,0) "      \n"
+    "movzb     " MEMACCESS2(-0x3,0) ",%1       \n"
+    MEMOPARG(movzb,0x01,3,1,4,1) "             \n"  // movzb 0x1(%3,%1,4),%1
+    "mov       %b1," MEMACCESS2(-0x3,0) "      \n"
+    "movzb     " MEMACCESS2(-0x2,0) ",%1       \n"
+    MEMOPARG(movzb,0x02,3,1,4,1) "             \n"  // movzb 0x2(%3,%1,4),%1
+    "mov       %b1," MEMACCESS2(-0x2,0) "      \n"
+    "dec       %2                              \n"
+    "jg        1b                              \n"
+  : "+r"(dst_argb),   // %0
+    "+d"(pixel_temp), // %1
+    "+r"(width)       // %2
+  : "r"(table_argb)   // %3
+  : "memory", "cc");
+}
+#endif  // HAS_RGBCOLORTABLEROW_X86
+
+#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
+// Tranform RGB pixels with luma table.
+void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                                 int width,
+                                 const uint8* luma, uint32 lumacoeff) {
+  uintptr_t pixel_temp = 0u;
+  uintptr_t table_temp = 0u;
+  asm volatile (
+    "movd      %6,%%xmm3                       \n"
+    "pshufd    $0x0,%%xmm3,%%xmm3              \n"
+    "pcmpeqb   %%xmm4,%%xmm4                   \n"
+    "psllw     $0x8,%%xmm4                     \n"
+    "pxor      %%xmm5,%%xmm5                   \n"
+
+    // 4 pixel loop.
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(2) ",%%xmm0         \n"
+    "pmaddubsw %%xmm3,%%xmm0                   \n"
+    "phaddw    %%xmm0,%%xmm0                   \n"
+    "pand      %%xmm4,%%xmm0                   \n"
+    "punpcklwd %%xmm5,%%xmm0                   \n"
+    "movd      %%xmm0,%k1                      \n"  // 32 bit offset
+    "add       %5,%1                           \n"
+    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
+
+    "movzb     " MEMACCESS(2) ",%0             \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS(3) "            \n"
+    "movzb     " MEMACCESS2(0x1,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0x1,3) "       \n"
+    "movzb     " MEMACCESS2(0x2,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0x2,3) "       \n"
+    "movzb     " MEMACCESS2(0x3,2) ",%0        \n"
+    "mov       %b0," MEMACCESS2(0x3,3) "       \n"
+
+    "movd      %%xmm0,%k1                      \n"  // 32 bit offset
+    "add       %5,%1                           \n"
+    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
+
+    "movzb     " MEMACCESS2(0x4,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0x4,3) "       \n"
+    BUNDLEALIGN
+    "movzb     " MEMACCESS2(0x5,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0x5,3) "       \n"
+    "movzb     " MEMACCESS2(0x6,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0x6,3) "       \n"
+    "movzb     " MEMACCESS2(0x7,2) ",%0        \n"
+    "mov       %b0," MEMACCESS2(0x7,3) "       \n"
+
+    "movd      %%xmm0,%k1                      \n"  // 32 bit offset
+    "add       %5,%1                           \n"
+    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
+
+    "movzb     " MEMACCESS2(0x8,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0x8,3) "       \n"
+    "movzb     " MEMACCESS2(0x9,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0x9,3) "       \n"
+    "movzb     " MEMACCESS2(0xa,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0xa,3) "       \n"
+    "movzb     " MEMACCESS2(0xb,2) ",%0        \n"
+    "mov       %b0," MEMACCESS2(0xb,3) "       \n"
+
+    "movd      %%xmm0,%k1                      \n"  // 32 bit offset
+    "add       %5,%1                           \n"
+
+    "movzb     " MEMACCESS2(0xc,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0xc,3) "       \n"
+    "movzb     " MEMACCESS2(0xd,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0xd,3) "       \n"
+    "movzb     " MEMACCESS2(0xe,2) ",%0        \n"
+    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
+    "mov       %b0," MEMACCESS2(0xe,3) "       \n"
+    "movzb     " MEMACCESS2(0xf,2) ",%0        \n"
+    "mov       %b0," MEMACCESS2(0xf,3) "       \n"
+    "sub       $0x4,%4                         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "lea       " MEMLEA(0x10,3) ",%3           \n"
+    "jg        1b                              \n"
+  : "+d"(pixel_temp),  // %0
+    "+a"(table_temp),  // %1
+    "+r"(src_argb),    // %2
+    "+r"(dst_argb),    // %3
+    "+rm"(width)       // %4
+  : "r"(luma),         // %5
+    "rm"(lumacoeff)    // %6
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3
+
+#endif  // defined(__x86_64__) || defined(__i386__)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/row_win.cc b/src/main/jni/libyuv/source/row_win.cc
new file mode 100644
index 000000000..f58fc5138
--- /dev/null
+++ b/src/main/jni/libyuv/source/row_win.cc
@@ -0,0 +1,7402 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#if defined (_M_X64) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
+#include <emmintrin.h>
+#include <tmmintrin.h>  // For _mm_maddubs_epi16
+#endif
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for Visual C.
+#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
+
+#define YG 74  /* (int8)(1.164 * 64 + 0.5) */
+
+#define UB 127  /* min(127,(int8)(2.018 * 64)) */
+#define UG -25  /* (int8)(-0.391 * 64 - 0.5) */
+#define UR 0
+
+#define VB 0
+#define VG -52  /* (int8)(-0.813 * 64 - 0.5) */
+#define VR 102  /* (int8)(1.596 * 64 + 0.5) */
+
+// Bias
+#define BB UB * 128 + VB * 128
+#define BG UG * 128 + VG * 128
+#define BR UR * 128 + VR * 128
+
+static const vec8 kUVToB = {
+  UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
+};
+
+static const vec8 kUVToR = {
+  UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
+};
+
+static const vec8 kUVToG = {
+  UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
+};
+
+static const vec8 kVUToB = {
+  VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
+};
+
+static const vec8 kVUToR = {
+  VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
+};
+
+static const vec8 kVUToG = {
+  VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
+};
+
+static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
+static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
+static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
+static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
+static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
+
+// 64 bit
+#if defined(_M_X64)
+
+// Aligned destination version.
+__declspec(align(16))
+void I422ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         int width) {
+  __m128i xmm0, xmm1, xmm2, xmm3;
+  const __m128i xmm5 = _mm_set1_epi8(-1);
+  const __m128i xmm4 = _mm_setzero_si128();
+  const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
+
+  while (width > 0) {
+    xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
+    xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
+    xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
+    xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
+    xmm1 = _mm_load_si128(&xmm0);
+    xmm2 = _mm_load_si128(&xmm0);
+    xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
+    xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
+    xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
+    xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);
+    xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);
+    xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
+    xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
+    xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
+    xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
+    xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
+    xmm0 = _mm_adds_epi16(xmm0, xmm3);
+    xmm1 = _mm_adds_epi16(xmm1, xmm3);
+    xmm2 = _mm_adds_epi16(xmm2, xmm3);
+    xmm0 = _mm_srai_epi16(xmm0, 6);
+    xmm1 = _mm_srai_epi16(xmm1, 6);
+    xmm2 = _mm_srai_epi16(xmm2, 6);
+    xmm0 = _mm_packus_epi16(xmm0, xmm0);
+    xmm1 = _mm_packus_epi16(xmm1, xmm1);
+    xmm2 = _mm_packus_epi16(xmm2, xmm2);
+    xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
+    xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
+    xmm1 = _mm_load_si128(&xmm0);
+    xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
+    xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
+
+    _mm_store_si128((__m128i *)dst_argb, xmm0);
+    _mm_store_si128((__m128i *)(dst_argb + 16), xmm1);
+
+    y_buf += 8;
+    u_buf += 4;
+    dst_argb += 32;
+    width -= 8;
+  }
+}
+
+// Unaligned destination version.
+void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* u_buf,
+                                   const uint8* v_buf,
+                                   uint8* dst_argb,
+                                   int width) {
+  __m128i xmm0, xmm1, xmm2, xmm3;
+  const __m128i xmm5 = _mm_set1_epi8(-1);
+  const __m128i xmm4 = _mm_setzero_si128();
+  const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
+
+  while (width > 0) {
+    xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
+    xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
+    xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
+    xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
+    xmm1 = _mm_load_si128(&xmm0);
+    xmm2 = _mm_load_si128(&xmm0);
+    xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
+    xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
+    xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
+    xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);
+    xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);
+    xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
+    xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
+    xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
+    xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
+    xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
+    xmm0 = _mm_adds_epi16(xmm0, xmm3);
+    xmm1 = _mm_adds_epi16(xmm1, xmm3);
+    xmm2 = _mm_adds_epi16(xmm2, xmm3);
+    xmm0 = _mm_srai_epi16(xmm0, 6);
+    xmm1 = _mm_srai_epi16(xmm1, 6);
+    xmm2 = _mm_srai_epi16(xmm2, 6);
+    xmm0 = _mm_packus_epi16(xmm0, xmm0);
+    xmm1 = _mm_packus_epi16(xmm1, xmm1);
+    xmm2 = _mm_packus_epi16(xmm2, xmm2);
+    xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
+    xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
+    xmm1 = _mm_load_si128(&xmm0);
+    xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
+    xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
+
+    _mm_storeu_si128((__m128i *)dst_argb, xmm0);
+    _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1);
+
+    y_buf += 8;
+    u_buf += 4;
+    dst_argb += 32;
+    width -= 8;
+  }
+}
+// 32 bit
+#else  // defined(_M_X64)
+
+#ifdef HAS_ARGBTOYROW_SSSE3
+
+// Constants for ARGB.
+static const vec8 kARGBToY = {
+  13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
+};
+
+// JPeg full range.
+static const vec8 kARGBToYJ = {
+  15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
+};
+
+static const vec8 kARGBToU = {
+  112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
+};
+
+static const vec8 kARGBToUJ = {
+  127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
+};
+
+static const vec8 kARGBToV = {
+  -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
+};
+
+static const vec8 kARGBToVJ = {
+  -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
+};
+
+// vpermd for vphaddw + vpackuswb vpermd.
+static const lvec32 kPermdARGBToY_AVX = {
+  0, 4, 1, 5, 2, 6, 3, 7
+};
+
+// vpshufb for vphaddw + vpackuswb packed to shorts.
+static const lvec8 kShufARGBToUV_AVX = {
+  0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
+  0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
+};
+
+// Constants for BGRA.
+static const vec8 kBGRAToY = {
+  0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
+};
+
+static const vec8 kBGRAToU = {
+  0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
+};
+
+static const vec8 kBGRAToV = {
+  0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
+};
+
+// Constants for ABGR.
+static const vec8 kABGRToY = {
+  33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
+};
+
+static const vec8 kABGRToU = {
+  -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
+};
+
+static const vec8 kABGRToV = {
+  112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
+};
+
+// Constants for RGBA.
+static const vec8 kRGBAToY = {
+  0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
+};
+
+static const vec8 kRGBAToU = {
+  0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
+};
+
+static const vec8 kRGBAToV = {
+  0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
+};
+
+static const uvec8 kAddY16 = {
+  16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
+};
+
+static const vec16 kAddYJ64 = {
+  64, 64, 64, 64, 64, 64, 64, 64
+};
+
+static const uvec8 kAddUV128 = {
+  128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
+  128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
+};
+
+static const uvec16 kAddUVJ128 = {
+  0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
+};
+
+// Shuffle table for converting RGB24 to ARGB.
+static const uvec8 kShuffleMaskRGB24ToARGB = {
+  0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
+};
+
+// Shuffle table for converting RAW to ARGB.
+static const uvec8 kShuffleMaskRAWToARGB = {
+  2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
+};
+
+// Shuffle table for converting ARGB to RGB24.
+static const uvec8 kShuffleMaskARGBToRGB24 = {
+  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
+};
+
+// Shuffle table for converting ARGB to RAW.
+static const uvec8 kShuffleMaskARGBToRAW = {
+  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
+};
+
+// Shuffle table for converting ARGBToRGB24 for I422ToRGB24.  First 8 + next 4
+static const uvec8 kShuffleMaskARGBToRGB24_0 = {
+  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
+};
+
+// Shuffle table for converting ARGB to RAW.
+static const uvec8 kShuffleMaskARGBToRAW_0 = {
+  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
+};
+
+// Duplicates gray value 3 times and fills in alpha opaque.
+__declspec(naked) __declspec(align(16))
+void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
+  __asm {
+    mov        eax, [esp + 4]        // src_y
+    mov        edx, [esp + 8]        // dst_argb
+    mov        ecx, [esp + 12]       // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0xff000000
+    pslld      xmm5, 24
+
+    align      4
+  convertloop:
+    movq       xmm0, qword ptr [eax]
+    lea        eax,  [eax + 8]
+    punpcklbw  xmm0, xmm0
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm0
+    punpckhwd  xmm1, xmm1
+    por        xmm0, xmm5
+    por        xmm1, xmm5
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx, [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
+                                  int pix) {
+  __asm {
+    mov        eax, [esp + 4]        // src_y
+    mov        edx, [esp + 8]        // dst_argb
+    mov        ecx, [esp + 12]       // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0xff000000
+    pslld      xmm5, 24
+
+    align      4
+  convertloop:
+    movq       xmm0, qword ptr [eax]
+    lea        eax,  [eax + 8]
+    punpcklbw  xmm0, xmm0
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm0
+    punpckhwd  xmm1, xmm1
+    por        xmm0, xmm5
+    por        xmm1, xmm5
+    movdqu     [edx], xmm0
+    movdqu     [edx + 16], xmm1
+    lea        edx, [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
+  __asm {
+    mov       eax, [esp + 4]   // src_rgb24
+    mov       edx, [esp + 8]   // dst_argb
+    mov       ecx, [esp + 12]  // pix
+    pcmpeqb   xmm5, xmm5       // generate mask 0xff000000
+    pslld     xmm5, 24
+    movdqa    xmm4, kShuffleMaskRGB24ToARGB
+
+    align      4
+ convertloop:
+    movdqu    xmm0, [eax]
+    movdqu    xmm1, [eax + 16]
+    movdqu    xmm3, [eax + 32]
+    lea       eax, [eax + 48]
+    movdqa    xmm2, xmm3
+    palignr   xmm2, xmm1, 8    // xmm2 = { xmm3[0:3] xmm1[8:15]}
+    pshufb    xmm2, xmm4
+    por       xmm2, xmm5
+    palignr   xmm1, xmm0, 12   // xmm1 = { xmm3[0:7] xmm0[12:15]}
+    pshufb    xmm0, xmm4
+    movdqa    [edx + 32], xmm2
+    por       xmm0, xmm5
+    pshufb    xmm1, xmm4
+    movdqa    [edx], xmm0
+    por       xmm1, xmm5
+    palignr   xmm3, xmm3, 4    // xmm3 = { xmm3[4:15]}
+    pshufb    xmm3, xmm4
+    movdqa    [edx + 16], xmm1
+    por       xmm3, xmm5
+    sub       ecx, 16
+    movdqa    [edx + 48], xmm3
+    lea       edx, [edx + 64]
+    jg        convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
+                        int pix) {
+  __asm {
+    mov       eax, [esp + 4]   // src_raw
+    mov       edx, [esp + 8]   // dst_argb
+    mov       ecx, [esp + 12]  // pix
+    pcmpeqb   xmm5, xmm5       // generate mask 0xff000000
+    pslld     xmm5, 24
+    movdqa    xmm4, kShuffleMaskRAWToARGB
+
+    align      4
+ convertloop:
+    movdqu    xmm0, [eax]
+    movdqu    xmm1, [eax + 16]
+    movdqu    xmm3, [eax + 32]
+    lea       eax, [eax + 48]
+    movdqa    xmm2, xmm3
+    palignr   xmm2, xmm1, 8    // xmm2 = { xmm3[0:3] xmm1[8:15]}
+    pshufb    xmm2, xmm4
+    por       xmm2, xmm5
+    palignr   xmm1, xmm0, 12   // xmm1 = { xmm3[0:7] xmm0[12:15]}
+    pshufb    xmm0, xmm4
+    movdqa    [edx + 32], xmm2
+    por       xmm0, xmm5
+    pshufb    xmm1, xmm4
+    movdqa    [edx], xmm0
+    por       xmm1, xmm5
+    palignr   xmm3, xmm3, 4    // xmm3 = { xmm3[4:15]}
+    pshufb    xmm3, xmm4
+    movdqa    [edx + 16], xmm1
+    por       xmm3, xmm5
+    sub       ecx, 16
+    movdqa    [edx + 48], xmm3
+    lea       edx, [edx + 64]
+    jg        convertloop
+    ret
+  }
+}
+
+// pmul method to replicate bits.
+// Math to replicate bits:
+// (v << 8) | (v << 3)
+// v * 256 + v * 8
+// v * (256 + 8)
+// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
+// 20 instructions.
+__declspec(naked) __declspec(align(16))
+void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
+                          int pix) {
+  __asm {
+    mov       eax, 0x01080108  // generate multiplier to repeat 5 bits
+    movd      xmm5, eax
+    pshufd    xmm5, xmm5, 0
+    mov       eax, 0x20802080  // multiplier shift by 5 and then repeat 6 bits
+    movd      xmm6, eax
+    pshufd    xmm6, xmm6, 0
+    pcmpeqb   xmm3, xmm3       // generate mask 0xf800f800 for Red
+    psllw     xmm3, 11
+    pcmpeqb   xmm4, xmm4       // generate mask 0x07e007e0 for Green
+    psllw     xmm4, 10
+    psrlw     xmm4, 5
+    pcmpeqb   xmm7, xmm7       // generate mask 0xff00ff00 for Alpha
+    psllw     xmm7, 8
+
+    mov       eax, [esp + 4]   // src_rgb565
+    mov       edx, [esp + 8]   // dst_argb
+    mov       ecx, [esp + 12]  // pix
+    sub       edx, eax
+    sub       edx, eax
+
+    align      4
+ convertloop:
+    movdqu    xmm0, [eax]   // fetch 8 pixels of bgr565
+    movdqa    xmm1, xmm0
+    movdqa    xmm2, xmm0
+    pand      xmm1, xmm3    // R in upper 5 bits
+    psllw     xmm2, 11      // B in upper 5 bits
+    pmulhuw   xmm1, xmm5    // * (256 + 8)
+    pmulhuw   xmm2, xmm5    // * (256 + 8)
+    psllw     xmm1, 8
+    por       xmm1, xmm2    // RB
+    pand      xmm0, xmm4    // G in middle 6 bits
+    pmulhuw   xmm0, xmm6    // << 5 * (256 + 4)
+    por       xmm0, xmm7    // AG
+    movdqa    xmm2, xmm1
+    punpcklbw xmm1, xmm0
+    punpckhbw xmm2, xmm0
+    movdqa    [eax * 2 + edx], xmm1  // store 4 pixels of ARGB
+    movdqa    [eax * 2 + edx + 16], xmm2  // store next 4 pixels of ARGB
+    lea       eax, [eax + 16]
+    sub       ecx, 8
+    jg        convertloop
+    ret
+  }
+}
+
+// 24 instructions
+__declspec(naked) __declspec(align(16))
+void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
+                            int pix) {
+  __asm {
+    mov       eax, 0x01080108  // generate multiplier to repeat 5 bits
+    movd      xmm5, eax
+    pshufd    xmm5, xmm5, 0
+    mov       eax, 0x42004200  // multiplier shift by 6 and then repeat 5 bits
+    movd      xmm6, eax
+    pshufd    xmm6, xmm6, 0
+    pcmpeqb   xmm3, xmm3       // generate mask 0xf800f800 for Red
+    psllw     xmm3, 11
+    movdqa    xmm4, xmm3       // generate mask 0x03e003e0 for Green
+    psrlw     xmm4, 6
+    pcmpeqb   xmm7, xmm7       // generate mask 0xff00ff00 for Alpha
+    psllw     xmm7, 8
+
+    mov       eax, [esp + 4]   // src_argb1555
+    mov       edx, [esp + 8]   // dst_argb
+    mov       ecx, [esp + 12]  // pix
+    sub       edx, eax
+    sub       edx, eax
+
+    align      4
+ convertloop:
+    movdqu    xmm0, [eax]   // fetch 8 pixels of 1555
+    movdqa    xmm1, xmm0
+    movdqa    xmm2, xmm0
+    psllw     xmm1, 1       // R in upper 5 bits
+    psllw     xmm2, 11      // B in upper 5 bits
+    pand      xmm1, xmm3
+    pmulhuw   xmm2, xmm5    // * (256 + 8)
+    pmulhuw   xmm1, xmm5    // * (256 + 8)
+    psllw     xmm1, 8
+    por       xmm1, xmm2    // RB
+    movdqa    xmm2, xmm0
+    pand      xmm0, xmm4    // G in middle 5 bits
+    psraw     xmm2, 8       // A
+    pmulhuw   xmm0, xmm6    // << 6 * (256 + 8)
+    pand      xmm2, xmm7
+    por       xmm0, xmm2    // AG
+    movdqa    xmm2, xmm1
+    punpcklbw xmm1, xmm0
+    punpckhbw xmm2, xmm0
+    movdqa    [eax * 2 + edx], xmm1  // store 4 pixels of ARGB
+    movdqa    [eax * 2 + edx + 16], xmm2  // store next 4 pixels of ARGB
+    lea       eax, [eax + 16]
+    sub       ecx, 8
+    jg        convertloop
+    ret
+  }
+}
+
+// 18 instructions.
+__declspec(naked) __declspec(align(16))
+void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
+                            int pix) {
+  __asm {
+    mov       eax, 0x0f0f0f0f  // generate mask 0x0f0f0f0f
+    movd      xmm4, eax
+    pshufd    xmm4, xmm4, 0
+    movdqa    xmm5, xmm4       // 0xf0f0f0f0 for high nibbles
+    pslld     xmm5, 4
+    mov       eax, [esp + 4]   // src_argb4444
+    mov       edx, [esp + 8]   // dst_argb
+    mov       ecx, [esp + 12]  // pix
+    sub       edx, eax
+    sub       edx, eax
+
+    align      4
+ convertloop:
+    movdqu    xmm0, [eax]   // fetch 8 pixels of bgra4444
+    movdqa    xmm2, xmm0
+    pand      xmm0, xmm4    // mask low nibbles
+    pand      xmm2, xmm5    // mask high nibbles
+    movdqa    xmm1, xmm0
+    movdqa    xmm3, xmm2
+    psllw     xmm1, 4
+    psrlw     xmm3, 4
+    por       xmm0, xmm1
+    por       xmm2, xmm3
+    movdqa    xmm1, xmm0
+    punpcklbw xmm0, xmm2
+    punpckhbw xmm1, xmm2
+    movdqa    [eax * 2 + edx], xmm0  // store 4 pixels of ARGB
+    movdqa    [eax * 2 + edx + 16], xmm1  // store next 4 pixels of ARGB
+    lea       eax, [eax + 16]
+    sub       ecx, 8
+    jg        convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
+  __asm {
+    mov       eax, [esp + 4]   // src_argb
+    mov       edx, [esp + 8]   // dst_rgb
+    mov       ecx, [esp + 12]  // pix
+    movdqa    xmm6, kShuffleMaskARGBToRGB24
+
+    align      4
+ convertloop:
+    movdqu    xmm0, [eax]   // fetch 16 pixels of argb
+    movdqu    xmm1, [eax + 16]
+    movdqu    xmm2, [eax + 32]
+    movdqu    xmm3, [eax + 48]
+    lea       eax, [eax + 64]
+    pshufb    xmm0, xmm6    // pack 16 bytes of ARGB to 12 bytes of RGB
+    pshufb    xmm1, xmm6
+    pshufb    xmm2, xmm6
+    pshufb    xmm3, xmm6
+    movdqa    xmm4, xmm1   // 4 bytes from 1 for 0
+    psrldq    xmm1, 4      // 8 bytes from 1
+    pslldq    xmm4, 12     // 4 bytes from 1 for 0
+    movdqa    xmm5, xmm2   // 8 bytes from 2 for 1
+    por       xmm0, xmm4   // 4 bytes from 1 for 0
+    pslldq    xmm5, 8      // 8 bytes from 2 for 1
+    movdqu    [edx], xmm0  // store 0
+    por       xmm1, xmm5   // 8 bytes from 2 for 1
+    psrldq    xmm2, 8      // 4 bytes from 2
+    pslldq    xmm3, 4      // 12 bytes from 3 for 2
+    por       xmm2, xmm3   // 12 bytes from 3 for 2
+    movdqu    [edx + 16], xmm1   // store 1
+    movdqu    [edx + 32], xmm2   // store 2
+    lea       edx, [edx + 48]
+    sub       ecx, 16
+    jg        convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
+  __asm {
+    mov       eax, [esp + 4]   // src_argb
+    mov       edx, [esp + 8]   // dst_rgb
+    mov       ecx, [esp + 12]  // pix
+    movdqa    xmm6, kShuffleMaskARGBToRAW
+
+    align      4
+ convertloop:
+    movdqu    xmm0, [eax]   // fetch 16 pixels of argb
+    movdqu    xmm1, [eax + 16]
+    movdqu    xmm2, [eax + 32]
+    movdqu    xmm3, [eax + 48]
+    lea       eax, [eax + 64]
+    pshufb    xmm0, xmm6    // pack 16 bytes of ARGB to 12 bytes of RGB
+    pshufb    xmm1, xmm6
+    pshufb    xmm2, xmm6
+    pshufb    xmm3, xmm6
+    movdqa    xmm4, xmm1   // 4 bytes from 1 for 0
+    psrldq    xmm1, 4      // 8 bytes from 1
+    pslldq    xmm4, 12     // 4 bytes from 1 for 0
+    movdqa    xmm5, xmm2   // 8 bytes from 2 for 1
+    por       xmm0, xmm4   // 4 bytes from 1 for 0
+    pslldq    xmm5, 8      // 8 bytes from 2 for 1
+    movdqu    [edx], xmm0  // store 0
+    por       xmm1, xmm5   // 8 bytes from 2 for 1
+    psrldq    xmm2, 8      // 4 bytes from 2
+    pslldq    xmm3, 4      // 12 bytes from 3 for 2
+    por       xmm2, xmm3   // 12 bytes from 3 for 2
+    movdqu    [edx + 16], xmm1   // store 1
+    movdqu    [edx + 32], xmm2   // store 2
+    lea       edx, [edx + 48]
+    sub       ecx, 16
+    jg        convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
+  __asm {
+    mov       eax, [esp + 4]   // src_argb
+    mov       edx, [esp + 8]   // dst_rgb
+    mov       ecx, [esp + 12]  // pix
+    pcmpeqb   xmm3, xmm3       // generate mask 0x0000001f
+    psrld     xmm3, 27
+    pcmpeqb   xmm4, xmm4       // generate mask 0x000007e0
+    psrld     xmm4, 26
+    pslld     xmm4, 5
+    pcmpeqb   xmm5, xmm5       // generate mask 0xfffff800
+    pslld     xmm5, 11
+
+    align      4
+ convertloop:
+    movdqa    xmm0, [eax]   // fetch 4 pixels of argb
+    movdqa    xmm1, xmm0    // B
+    movdqa    xmm2, xmm0    // G
+    pslld     xmm0, 8       // R
+    psrld     xmm1, 3       // B
+    psrld     xmm2, 5       // G
+    psrad     xmm0, 16      // R
+    pand      xmm1, xmm3    // B
+    pand      xmm2, xmm4    // G
+    pand      xmm0, xmm5    // R
+    por       xmm1, xmm2    // BG
+    por       xmm0, xmm1    // BGR
+    packssdw  xmm0, xmm0
+    lea       eax, [eax + 16]
+    movq      qword ptr [edx], xmm0  // store 4 pixels of RGB565
+    lea       edx, [edx + 8]
+    sub       ecx, 4
+    jg        convertloop
+    ret
+  }
+}
+
+// TODO(fbarchard): Improve sign extension/packing.
+__declspec(naked) __declspec(align(16))
+void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
+  __asm {
+    mov       eax, [esp + 4]   // src_argb
+    mov       edx, [esp + 8]   // dst_rgb
+    mov       ecx, [esp + 12]  // pix
+    pcmpeqb   xmm4, xmm4       // generate mask 0x0000001f
+    psrld     xmm4, 27
+    movdqa    xmm5, xmm4       // generate mask 0x000003e0
+    pslld     xmm5, 5
+    movdqa    xmm6, xmm4       // generate mask 0x00007c00
+    pslld     xmm6, 10
+    pcmpeqb   xmm7, xmm7       // generate mask 0xffff8000
+    pslld     xmm7, 15
+
+    align      4
+ convertloop:
+    movdqa    xmm0, [eax]   // fetch 4 pixels of argb
+    movdqa    xmm1, xmm0    // B
+    movdqa    xmm2, xmm0    // G
+    movdqa    xmm3, xmm0    // R
+    psrad     xmm0, 16      // A
+    psrld     xmm1, 3       // B
+    psrld     xmm2, 6       // G
+    psrld     xmm3, 9       // R
+    pand      xmm0, xmm7    // A
+    pand      xmm1, xmm4    // B
+    pand      xmm2, xmm5    // G
+    pand      xmm3, xmm6    // R
+    por       xmm0, xmm1    // BA
+    por       xmm2, xmm3    // GR
+    por       xmm0, xmm2    // BGRA
+    packssdw  xmm0, xmm0
+    lea       eax, [eax + 16]
+    movq      qword ptr [edx], xmm0  // store 4 pixels of ARGB1555
+    lea       edx, [edx + 8]
+    sub       ecx, 4
+    jg        convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
+  __asm {
+    mov       eax, [esp + 4]   // src_argb
+    mov       edx, [esp + 8]   // dst_rgb
+    mov       ecx, [esp + 12]  // pix
+    pcmpeqb   xmm4, xmm4       // generate mask 0xf000f000
+    psllw     xmm4, 12
+    movdqa    xmm3, xmm4       // generate mask 0x00f000f0
+    psrlw     xmm3, 8
+
+    align      4
+ convertloop:
+    movdqa    xmm0, [eax]   // fetch 4 pixels of argb
+    movdqa    xmm1, xmm0
+    pand      xmm0, xmm3    // low nibble
+    pand      xmm1, xmm4    // high nibble
+    psrl      xmm0, 4
+    psrl      xmm1, 8
+    por       xmm0, xmm1
+    packuswb  xmm0, xmm0
+    lea       eax, [eax + 16]
+    movq      qword ptr [edx], xmm0  // store 4 pixels of ARGB4444
+    lea       edx, [edx + 8]
+    sub       ecx, 4
+    jg        convertloop
+    ret
+  }
+}
+
+// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
+__declspec(naked) __declspec(align(16))
+void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm5, kAddY16
+    movdqa     xmm4, kARGBToY
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
+__declspec(naked) __declspec(align(16))
+void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm4, kARGBToYJ
+    movdqa     xmm5, kAddYJ64
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    paddw      xmm0, xmm5  // Add .5 for rounding.
+    paddw      xmm2, xmm5
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+#ifdef HAS_ARGBTOYROW_AVX2
+// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
+__declspec(naked) __declspec(align(32))
+void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    vbroadcastf128 ymm4, kARGBToY
+    vbroadcastf128 ymm5, kAddY16
+    vmovdqa    ymm6, kPermdARGBToY_AVX
+
+    align      4
+ convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    vmovdqu    ymm2, [eax + 64]
+    vmovdqu    ymm3, [eax + 96]
+    vpmaddubsw ymm0, ymm0, ymm4
+    vpmaddubsw ymm1, ymm1, ymm4
+    vpmaddubsw ymm2, ymm2, ymm4
+    vpmaddubsw ymm3, ymm3, ymm4
+    lea        eax, [eax + 128]
+    vphaddw    ymm0, ymm0, ymm1  // mutates.
+    vphaddw    ymm2, ymm2, ymm3
+    vpsrlw     ymm0, ymm0, 7
+    vpsrlw     ymm2, ymm2, 7
+    vpackuswb  ymm0, ymm0, ymm2  // mutates.
+    vpermd     ymm0, ymm6, ymm0  // For vphaddw + vpackuswb mutation.
+    vpaddb     ymm0, ymm0, ymm5
+    sub        ecx, 32
+    vmovdqu    [edx], ymm0
+    lea        edx, [edx + 32]
+    jg         convertloop
+    vzeroupper
+    ret
+  }
+}
+#endif  //  HAS_ARGBTOYROW_AVX2
+
+#ifdef HAS_ARGBTOYROW_AVX2
+// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
+__declspec(naked) __declspec(align(32))
+void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    vbroadcastf128 ymm4, kARGBToYJ
+    vbroadcastf128 ymm5, kAddYJ64
+    vmovdqa    ymm6, kPermdARGBToY_AVX
+
+    align      4
+ convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    vmovdqu    ymm2, [eax + 64]
+    vmovdqu    ymm3, [eax + 96]
+    vpmaddubsw ymm0, ymm0, ymm4
+    vpmaddubsw ymm1, ymm1, ymm4
+    vpmaddubsw ymm2, ymm2, ymm4
+    vpmaddubsw ymm3, ymm3, ymm4
+    lea        eax, [eax + 128]
+    vphaddw    ymm0, ymm0, ymm1  // mutates.
+    vphaddw    ymm2, ymm2, ymm3
+    vpaddw     ymm0, ymm0, ymm5  // Add .5 for rounding.
+    vpaddw     ymm2, ymm2, ymm5
+    vpsrlw     ymm0, ymm0, 7
+    vpsrlw     ymm2, ymm2, 7
+    vpackuswb  ymm0, ymm0, ymm2  // mutates.
+    vpermd     ymm0, ymm6, ymm0  // For vphaddw + vpackuswb mutation.
+    sub        ecx, 32
+    vmovdqu    [edx], ymm0
+    lea        edx, [edx + 32]
+    jg         convertloop
+
+    vzeroupper
+    ret
+  }
+}
+#endif  //  HAS_ARGBTOYJROW_AVX2
+
+__declspec(naked) __declspec(align(16))
+void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm5, kAddY16
+    movdqa     xmm4, kARGBToY
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm4, kARGBToYJ
+    movdqa     xmm5, kAddYJ64
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    paddw      xmm0, xmm5
+    paddw      xmm2, xmm5
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm5, kAddY16
+    movdqa     xmm4, kBGRAToY
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm5, kAddY16
+    movdqa     xmm4, kBGRAToY
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm5, kAddY16
+    movdqa     xmm4, kABGRToY
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm5, kAddY16
+    movdqa     xmm4, kABGRToY
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm5, kAddY16
+    movdqa     xmm4, kRGBAToY
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_y */
+    mov        ecx, [esp + 12]  /* pix */
+    movdqa     xmm5, kAddY16
+    movdqa     xmm4, kRGBAToY
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm2, xmm4
+    pmaddubsw  xmm3, xmm4
+    lea        eax, [eax + 64]
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psrlw      xmm0, 7
+    psrlw      xmm2, 7
+    packuswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kARGBToU
+    movdqa     xmm6, kARGBToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pavgb      xmm0, [eax + esi]
+    pavgb      xmm1, [eax + esi + 16]
+    pavgb      xmm2, [eax + esi + 32]
+    pavgb      xmm3, [eax + esi + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                        uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kARGBToUJ
+    movdqa     xmm6, kARGBToVJ
+    movdqa     xmm5, kAddUVJ128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pavgb      xmm0, [eax + esi]
+    pavgb      xmm1, [eax + esi + 16]
+    pavgb      xmm2, [eax + esi + 32]
+    pavgb      xmm3, [eax + esi + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    paddw      xmm0, xmm5            // +.5 rounding -> unsigned
+    paddw      xmm1, xmm5
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+#ifdef HAS_ARGBTOUVROW_AVX2
+__declspec(naked) __declspec(align(32))
+void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
+                      uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    vbroadcastf128 ymm5, kAddUV128
+    vbroadcastf128 ymm6, kARGBToV
+    vbroadcastf128 ymm7, kARGBToU
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 32x2 argb pixels to 16x1 */
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    vmovdqu    ymm2, [eax + 64]
+    vmovdqu    ymm3, [eax + 96]
+    vpavgb     ymm0, ymm0, [eax + esi]
+    vpavgb     ymm1, ymm1, [eax + esi + 32]
+    vpavgb     ymm2, ymm2, [eax + esi + 64]
+    vpavgb     ymm3, ymm3, [eax + esi + 96]
+    lea        eax,  [eax + 128]
+    vshufps    ymm4, ymm0, ymm1, 0x88
+    vshufps    ymm0, ymm0, ymm1, 0xdd
+    vpavgb     ymm0, ymm0, ymm4  // mutated by vshufps
+    vshufps    ymm4, ymm2, ymm3, 0x88
+    vshufps    ymm2, ymm2, ymm3, 0xdd
+    vpavgb     ymm2, ymm2, ymm4  // mutated by vshufps
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 32 different pixels, its 16 pixels of U and 16 of V
+    vpmaddubsw ymm1, ymm0, ymm7  // U
+    vpmaddubsw ymm3, ymm2, ymm7
+    vpmaddubsw ymm0, ymm0, ymm6  // V
+    vpmaddubsw ymm2, ymm2, ymm6
+    vphaddw    ymm1, ymm1, ymm3  // mutates
+    vphaddw    ymm0, ymm0, ymm2
+    vpsraw     ymm1, ymm1, 8
+    vpsraw     ymm0, ymm0, 8
+    vpacksswb  ymm0, ymm1, ymm0  // mutates
+    vpermq     ymm0, ymm0, 0xd8  // For vpacksswb
+    vpshufb    ymm0, ymm0, kShufARGBToUV_AVX  // For vshufps + vphaddw
+    vpaddb     ymm0, ymm0, ymm5  // -> unsigned
+
+    // step 3 - store 16 U and 16 V values
+    sub         ecx, 32
+    vextractf128 [edx], ymm0, 0 // U
+    vextractf128 [edx + edi], ymm0, 1 // V
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBTOUVROW_AVX2
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kARGBToU
+    movdqa     xmm6, kARGBToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    movdqu     xmm4, [eax + esi]
+    pavgb      xmm0, xmm4
+    movdqu     xmm4, [eax + esi + 16]
+    pavgb      xmm1, xmm4
+    movdqu     xmm4, [eax + esi + 32]
+    pavgb      xmm2, xmm4
+    movdqu     xmm4, [eax + esi + 48]
+    pavgb      xmm3, xmm4
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kARGBToUJ
+    movdqa     xmm6, kARGBToVJ
+    movdqa     xmm5, kAddUVJ128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    movdqu     xmm4, [eax + esi]
+    pavgb      xmm0, xmm4
+    movdqu     xmm4, [eax + esi + 16]
+    pavgb      xmm1, xmm4
+    movdqu     xmm4, [eax + esi + 32]
+    pavgb      xmm2, xmm4
+    movdqu     xmm4, [eax + esi + 48]
+    pavgb      xmm3, xmm4
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    paddw      xmm0, xmm5            // +.5 rounding -> unsigned
+    paddw      xmm1, xmm5
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
+                          uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]   // src_argb
+    mov        edx, [esp + 4 + 8]   // dst_u
+    mov        edi, [esp + 4 + 12]  // dst_v
+    mov        ecx, [esp + 4 + 16]  // pix
+    movdqa     xmm7, kARGBToU
+    movdqa     xmm6, kARGBToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* convert to U and V */
+    movdqa     xmm0, [eax]          // U
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm7
+    pmaddubsw  xmm1, xmm7
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm3, xmm7
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psraw      xmm0, 8
+    psraw      xmm2, 8
+    packsswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx,  16
+    movdqa     [edx], xmm0
+
+    movdqa     xmm0, [eax]          // V
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm6
+    pmaddubsw  xmm1, xmm6
+    pmaddubsw  xmm2, xmm6
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psraw      xmm0, 8
+    psraw      xmm2, 8
+    packsswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    lea        eax,  [eax + 64]
+    movdqa     [edx + edi], xmm0
+    lea        edx,  [edx + 16]
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
+                                    uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]   // src_argb
+    mov        edx, [esp + 4 + 8]   // dst_u
+    mov        edi, [esp + 4 + 12]  // dst_v
+    mov        ecx, [esp + 4 + 16]  // pix
+    movdqa     xmm7, kARGBToU
+    movdqa     xmm6, kARGBToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* convert to U and V */
+    movdqu     xmm0, [eax]          // U
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm7
+    pmaddubsw  xmm1, xmm7
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm3, xmm7
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psraw      xmm0, 8
+    psraw      xmm2, 8
+    packsswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    sub        ecx,  16
+    movdqu     [edx], xmm0
+
+    movdqu     xmm0, [eax]          // V
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    pmaddubsw  xmm0, xmm6
+    pmaddubsw  xmm1, xmm6
+    pmaddubsw  xmm2, xmm6
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm1
+    phaddw     xmm2, xmm3
+    psraw      xmm0, 8
+    psraw      xmm2, 8
+    packsswb   xmm0, xmm2
+    paddb      xmm0, xmm5
+    lea        eax,  [eax + 64]
+    movdqu     [edx + edi], xmm0
+    lea        edx,  [edx + 16]
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
+                          uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]   // src_argb
+    mov        edx, [esp + 4 + 8]   // dst_u
+    mov        edi, [esp + 4 + 12]  // dst_v
+    mov        ecx, [esp + 4 + 16]  // pix
+    movdqa     xmm7, kARGBToU
+    movdqa     xmm6, kARGBToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
+                                    uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]   // src_argb
+    mov        edx, [esp + 4 + 8]   // dst_u
+    mov        edi, [esp + 4 + 12]  // dst_v
+    mov        ecx, [esp + 4 + 16]  // pix
+    movdqa     xmm7, kARGBToU
+    movdqa     xmm6, kARGBToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kBGRAToU
+    movdqa     xmm6, kBGRAToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pavgb      xmm0, [eax + esi]
+    pavgb      xmm1, [eax + esi + 16]
+    pavgb      xmm2, [eax + esi + 32]
+    pavgb      xmm3, [eax + esi + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kBGRAToU
+    movdqa     xmm6, kBGRAToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    movdqu     xmm4, [eax + esi]
+    pavgb      xmm0, xmm4
+    movdqu     xmm4, [eax + esi + 16]
+    pavgb      xmm1, xmm4
+    movdqu     xmm4, [eax + esi + 32]
+    pavgb      xmm2, xmm4
+    movdqu     xmm4, [eax + esi + 48]
+    pavgb      xmm3, xmm4
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kABGRToU
+    movdqa     xmm6, kABGRToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pavgb      xmm0, [eax + esi]
+    pavgb      xmm1, [eax + esi + 16]
+    pavgb      xmm2, [eax + esi + 32]
+    pavgb      xmm3, [eax + esi + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kABGRToU
+    movdqa     xmm6, kABGRToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    movdqu     xmm4, [eax + esi]
+    pavgb      xmm0, xmm4
+    movdqu     xmm4, [eax + esi + 16]
+    pavgb      xmm1, xmm4
+    movdqu     xmm4, [eax + esi + 32]
+    pavgb      xmm2, xmm4
+    movdqu     xmm4, [eax + esi + 48]
+    pavgb      xmm3, xmm4
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kRGBAToU
+    movdqa     xmm6, kRGBAToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+    pavgb      xmm0, [eax + esi]
+    pavgb      xmm1, [eax + esi + 16]
+    pavgb      xmm2, [eax + esi + 32]
+    pavgb      xmm3, [eax + esi + 48]
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
+                                 uint8* dst_u, uint8* dst_v, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb
+    mov        esi, [esp + 8 + 8]   // src_stride_argb
+    mov        edx, [esp + 8 + 12]  // dst_u
+    mov        edi, [esp + 8 + 16]  // dst_v
+    mov        ecx, [esp + 8 + 20]  // pix
+    movdqa     xmm7, kRGBAToU
+    movdqa     xmm6, kRGBAToV
+    movdqa     xmm5, kAddUV128
+    sub        edi, edx             // stride from u to v
+
+    align      4
+ convertloop:
+    /* step 1 - subsample 16x2 argb pixels to 8x1 */
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + 32]
+    movdqu     xmm3, [eax + 48]
+    movdqu     xmm4, [eax + esi]
+    pavgb      xmm0, xmm4
+    movdqu     xmm4, [eax + esi + 16]
+    pavgb      xmm1, xmm4
+    movdqu     xmm4, [eax + esi + 32]
+    pavgb      xmm2, xmm4
+    movdqu     xmm4, [eax + esi + 48]
+    pavgb      xmm3, xmm4
+    lea        eax,  [eax + 64]
+    movdqa     xmm4, xmm0
+    shufps     xmm0, xmm1, 0x88
+    shufps     xmm4, xmm1, 0xdd
+    pavgb      xmm0, xmm4
+    movdqa     xmm4, xmm2
+    shufps     xmm2, xmm3, 0x88
+    shufps     xmm4, xmm3, 0xdd
+    pavgb      xmm2, xmm4
+
+    // step 2 - convert to U and V
+    // from here down is very similar to Y code except
+    // instead of 16 different pixels, its 8 pixels of U and 8 of V
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    pmaddubsw  xmm0, xmm7  // U
+    pmaddubsw  xmm2, xmm7
+    pmaddubsw  xmm1, xmm6  // V
+    pmaddubsw  xmm3, xmm6
+    phaddw     xmm0, xmm2
+    phaddw     xmm1, xmm3
+    psraw      xmm0, 8
+    psraw      xmm1, 8
+    packsswb   xmm0, xmm1
+    paddb      xmm0, xmm5            // -> unsigned
+
+    // step 3 - store 8 U and 8 V values
+    sub        ecx, 16
+    movlps     qword ptr [edx], xmm0 // U
+    movhps     qword ptr [edx + edi], xmm0 // V
+    lea        edx, [edx + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBTOYROW_SSSE3
+
+#ifdef HAS_I422TOARGBROW_AVX2
+
+static const lvec8 kUVToB_AVX = {
+  UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
+  UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
+};
+static const lvec8 kUVToR_AVX = {
+  UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR,
+  UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
+};
+static const lvec8 kUVToG_AVX = {
+  UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
+  UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
+};
+static const lvec16 kYToRgb_AVX = {
+  YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
+};
+static const lvec16 kYSub16_AVX = {
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+};
+static const lvec16 kUVBiasB_AVX = {
+  BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
+};
+static const lvec16 kUVBiasG_AVX = {
+  BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG
+};
+static const lvec16 kUVBiasR_AVX = {
+  BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
+};
+
+// 16 pixels
+// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
+__declspec(naked) __declspec(align(16))
+void I422ToARGBRow_AVX2(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_argb,
+                        int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // argb
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha
+    vpxor      ymm4, ymm4, ymm4
+
+    align      4
+ convertloop:
+    vmovq      xmm0, qword ptr [esi]          //  U
+    vmovq      xmm1, qword ptr [esi + edi]    //  V
+    lea        esi,  [esi + 8]
+    vpunpcklbw ymm0, ymm0, ymm1               // UV
+    vpermq     ymm0, ymm0, 0xd8
+    vpunpcklwd ymm0, ymm0, ymm0              // UVUV
+    vpmaddubsw ymm2, ymm0, kUVToB_AVX        // scale B UV
+    vpmaddubsw ymm1, ymm0, kUVToG_AVX        // scale G UV
+    vpmaddubsw ymm0, ymm0, kUVToR_AVX        // scale R UV
+    vpsubw     ymm2, ymm2, kUVBiasB_AVX      // unbias back to signed
+    vpsubw     ymm1, ymm1, kUVBiasG_AVX
+    vpsubw     ymm0, ymm0, kUVBiasR_AVX
+
+    // Step 2: Find Y contribution to 16 R,G,B values
+    vmovdqu    xmm3, [eax]                  // NOLINT
+    lea        eax, [eax + 16]
+    vpermq     ymm3, ymm3, 0xd8
+    vpunpcklbw ymm3, ymm3, ymm4
+    vpsubsw    ymm3, ymm3, kYSub16_AVX
+    vpmullw    ymm3, ymm3, kYToRgb_AVX
+    vpaddsw    ymm2, ymm2, ymm3           // B += Y
+    vpaddsw    ymm1, ymm1, ymm3           // G += Y
+    vpaddsw    ymm0, ymm0, ymm3           // R += Y
+    vpsraw     ymm2, ymm2, 6
+    vpsraw     ymm1, ymm1, 6
+    vpsraw     ymm0, ymm0, 6
+    vpackuswb  ymm2, ymm2, ymm2           // B
+    vpackuswb  ymm1, ymm1, ymm1           // G
+    vpackuswb  ymm0, ymm0, ymm0           // R
+
+    // Step 3: Weave into ARGB
+    vpunpcklbw ymm2, ymm2, ymm1           // BG
+    vpermq     ymm2, ymm2, 0xd8
+    vpunpcklbw ymm0, ymm0, ymm5           // RA
+    vpermq     ymm0, ymm0, 0xd8
+    vpunpcklwd ymm1, ymm2, ymm0           // BGRA first 8 pixels
+    vpunpckhwd ymm2, ymm2, ymm0           // BGRA next 8 pixels
+    vmovdqu    [edx], ymm1
+    vmovdqu    [edx + 32], ymm2
+    lea        edx,  [edx + 64]
+    sub        ecx, 16
+    jg         convertloop
+    vzeroupper
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_I422TOARGBROW_AVX2
+
+#ifdef HAS_I422TOARGBROW_SSSE3
+
+// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
+
+// Read 8 UV from 444.
+#define READYUV444 __asm {                                                     \
+    __asm movq       xmm0, qword ptr [esi] /* U */                /* NOLINT */ \
+    __asm movq       xmm1, qword ptr [esi + edi] /* V */          /* NOLINT */ \
+    __asm lea        esi,  [esi + 8]                                           \
+    __asm punpcklbw  xmm0, xmm1           /* UV */                             \
+  }
+
+// Read 4 UV from 422, upsample to 8 UV.
+#define READYUV422 __asm {                                                     \
+    __asm movd       xmm0, [esi]          /* U */                              \
+    __asm movd       xmm1, [esi + edi]    /* V */                              \
+    __asm lea        esi,  [esi + 4]                                           \
+    __asm punpcklbw  xmm0, xmm1           /* UV */                             \
+    __asm punpcklwd  xmm0, xmm0           /* UVUV (upsample) */                \
+  }
+
+// Read 2 UV from 411, upsample to 8 UV.
+#define READYUV411 __asm {                                                     \
+    __asm movzx      ebx, word ptr [esi]        /* U */           /* NOLINT */ \
+    __asm movd       xmm0, ebx                                                 \
+    __asm movzx      ebx, word ptr [esi + edi]  /* V */           /* NOLINT */ \
+    __asm movd       xmm1, ebx                                                 \
+    __asm lea        esi,  [esi + 2]                                           \
+    __asm punpcklbw  xmm0, xmm1           /* UV */                             \
+    __asm punpcklwd  xmm0, xmm0           /* UVUV (upsample) */                \
+    __asm punpckldq  xmm0, xmm0           /* UVUV (upsample) */                \
+  }
+
+// Read 4 UV from NV12, upsample to 8 UV.
+#define READNV12 __asm {                                                       \
+    __asm movq       xmm0, qword ptr [esi] /* UV */               /* NOLINT */ \
+    __asm lea        esi,  [esi + 8]                                           \
+    __asm punpcklwd  xmm0, xmm0           /* UVUV (upsample) */                \
+  }
+
+// Convert 8 pixels: 8 UV and 8 Y.
+#define YUVTORGB __asm {                                                       \
+    /* Step 1: Find 4 UV contributions to 8 R,G,B values */                    \
+    __asm movdqa     xmm1, xmm0                                                \
+    __asm movdqa     xmm2, xmm0                                                \
+    __asm pmaddubsw  xmm0, kUVToB        /* scale B UV */                      \
+    __asm pmaddubsw  xmm1, kUVToG        /* scale G UV */                      \
+    __asm pmaddubsw  xmm2, kUVToR        /* scale R UV */                      \
+    __asm psubw      xmm0, kUVBiasB      /* unbias back to signed */           \
+    __asm psubw      xmm1, kUVBiasG                                            \
+    __asm psubw      xmm2, kUVBiasR                                            \
+    /* Step 2: Find Y contribution to 8 R,G,B values */                        \
+    __asm movq       xmm3, qword ptr [eax]                        /* NOLINT */ \
+    __asm lea        eax, [eax + 8]                                            \
+    __asm punpcklbw  xmm3, xmm4                                                \
+    __asm psubsw     xmm3, kYSub16                                             \
+    __asm pmullw     xmm3, kYToRgb                                             \
+    __asm paddsw     xmm0, xmm3           /* B += Y */                         \
+    __asm paddsw     xmm1, xmm3           /* G += Y */                         \
+    __asm paddsw     xmm2, xmm3           /* R += Y */                         \
+    __asm psraw      xmm0, 6                                                   \
+    __asm psraw      xmm1, 6                                                   \
+    __asm psraw      xmm2, 6                                                   \
+    __asm packuswb   xmm0, xmm0           /* B */                              \
+    __asm packuswb   xmm1, xmm1           /* G */                              \
+    __asm packuswb   xmm2, xmm2           /* R */                              \
+  }
+
+// Convert 8 pixels: 8 VU and 8 Y.
+#define YVUTORGB __asm {                                                       \
+    /* Step 1: Find 4 UV contributions to 8 R,G,B values */                    \
+    __asm movdqa     xmm1, xmm0                                                \
+    __asm movdqa     xmm2, xmm0                                                \
+    __asm pmaddubsw  xmm0, kVUToB        /* scale B UV */                      \
+    __asm pmaddubsw  xmm1, kVUToG        /* scale G UV */                      \
+    __asm pmaddubsw  xmm2, kVUToR        /* scale R UV */                      \
+    __asm psubw      xmm0, kUVBiasB      /* unbias back to signed */           \
+    __asm psubw      xmm1, kUVBiasG                                            \
+    __asm psubw      xmm2, kUVBiasR                                            \
+    /* Step 2: Find Y contribution to 8 R,G,B values */                        \
+    __asm movq       xmm3, qword ptr [eax]                        /* NOLINT */ \
+    __asm lea        eax, [eax + 8]                                            \
+    __asm punpcklbw  xmm3, xmm4                                                \
+    __asm psubsw     xmm3, kYSub16                                             \
+    __asm pmullw     xmm3, kYToRgb                                             \
+    __asm paddsw     xmm0, xmm3           /* B += Y */                         \
+    __asm paddsw     xmm1, xmm3           /* G += Y */                         \
+    __asm paddsw     xmm2, xmm3           /* R += Y */                         \
+    __asm psraw      xmm0, 6                                                   \
+    __asm psraw      xmm1, 6                                                   \
+    __asm psraw      xmm2, 6                                                   \
+    __asm packuswb   xmm0, xmm0           /* B */                              \
+    __asm packuswb   xmm1, xmm1           /* G */                              \
+    __asm packuswb   xmm2, xmm2           /* R */                              \
+  }
+
+// 8 pixels, dest aligned 16.
+// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I444ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // argb
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV444
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I422ToRGB24Row_SSSE3(const uint8* y_buf,
+                          const uint8* u_buf,
+                          const uint8* v_buf,
+                          uint8* dst_rgb24,
+                          int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // rgb24
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+    movdqa     xmm5, kShuffleMaskARGBToRGB24_0
+    movdqa     xmm6, kShuffleMaskARGBToRGB24
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into RRGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm2           // RR
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRR first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRR next 4 pixels
+    pshufb     xmm0, xmm5           // Pack into first 8 and last 4 bytes.
+    pshufb     xmm1, xmm6           // Pack into first 12 bytes.
+    palignr    xmm1, xmm0, 12       // last 4 bytes of xmm0 + 12 from xmm1
+    movq       qword ptr [edx], xmm0  // First 8 bytes
+    movdqu     [edx + 8], xmm1      // Last 16 bytes. = 24 bytes, 8 RGB pixels.
+    lea        edx,  [edx + 24]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I422ToRAWRow_SSSE3(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* dst_raw,
+                        int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // raw
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+    movdqa     xmm5, kShuffleMaskARGBToRAW_0
+    movdqa     xmm6, kShuffleMaskARGBToRAW
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into RRGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm2           // RR
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRR first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRR next 4 pixels
+    pshufb     xmm0, xmm5           // Pack into first 8 and last 4 bytes.
+    pshufb     xmm1, xmm6           // Pack into first 12 bytes.
+    palignr    xmm1, xmm0, 12       // last 4 bytes of xmm0 + 12 from xmm1
+    movq       qword ptr [edx], xmm0  // First 8 bytes
+    movdqu     [edx + 8], xmm1      // Last 16 bytes. = 24 bytes, 8 RGB pixels.
+    lea        edx,  [edx + 24]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, dest unaligned.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I422ToRGB565Row_SSSE3(const uint8* y_buf,
+                           const uint8* u_buf,
+                           const uint8* v_buf,
+                           uint8* rgb565_buf,
+                           int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // rgb565
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+    pcmpeqb    xmm5, xmm5       // generate mask 0x0000001f
+    psrld      xmm5, 27
+    pcmpeqb    xmm6, xmm6       // generate mask 0x000007e0
+    psrld      xmm6, 26
+    pslld      xmm6, 5
+    pcmpeqb    xmm7, xmm7       // generate mask 0xfffff800
+    pslld      xmm7, 11
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into RRGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm2           // RR
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRR first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRR next 4 pixels
+
+    // Step 3b: RRGB -> RGB565
+    movdqa     xmm3, xmm0    // B  first 4 pixels of argb
+    movdqa     xmm2, xmm0    // G
+    pslld      xmm0, 8       // R
+    psrld      xmm3, 3       // B
+    psrld      xmm2, 5       // G
+    psrad      xmm0, 16      // R
+    pand       xmm3, xmm5    // B
+    pand       xmm2, xmm6    // G
+    pand       xmm0, xmm7    // R
+    por        xmm3, xmm2    // BG
+    por        xmm0, xmm3    // BGR
+    movdqa     xmm3, xmm1    // B  next 4 pixels of argb
+    movdqa     xmm2, xmm1    // G
+    pslld      xmm1, 8       // R
+    psrld      xmm3, 3       // B
+    psrld      xmm2, 5       // G
+    psrad      xmm1, 16      // R
+    pand       xmm3, xmm5    // B
+    pand       xmm2, xmm6    // G
+    pand       xmm1, xmm7    // R
+    por        xmm3, xmm2    // BG
+    por        xmm1, xmm3    // BGR
+    packssdw   xmm0, xmm1
+    sub        ecx, 8
+    movdqu     [edx], xmm0   // store 8 pixels of RGB565
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I422ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // argb
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+// Similar to I420 but duplicate UV once more.
+__declspec(naked) __declspec(align(16))
+void I411ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_argb,
+                         int width) {
+  __asm {
+    push       ebx
+    push       esi
+    push       edi
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ecx, [esp + 12 + 20]  // width
+    sub        edi, esi
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV411  // modifies EBX
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    pop        ebx
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void NV12ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* uv_buf,
+                         uint8* dst_argb,
+                         int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // Y
+    mov        esi, [esp + 4 + 8]   // UV
+    mov        edx, [esp + 4 + 12]  // argb
+    mov        ecx, [esp + 4 + 16]  // width
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READNV12
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void NV21ToARGBRow_SSSE3(const uint8* y_buf,
+                         const uint8* uv_buf,
+                         uint8* dst_argb,
+                         int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // Y
+    mov        esi, [esp + 4 + 8]   // VU
+    mov        edx, [esp + 4 + 12]  // argb
+    mov        ecx, [esp + 4 + 16]  // width
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READNV12
+    YVUTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, unaligned.
+// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* u_buf,
+                                   const uint8* v_buf,
+                                   uint8* dst_argb,
+                                   int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // argb
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV444
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqu     [edx], xmm0
+    movdqu     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, unaligned.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* u_buf,
+                                   const uint8* v_buf,
+                                   uint8* dst_argb,
+                                   int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // argb
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqu     [edx], xmm0
+    movdqu     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, unaligned.
+// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+// Similar to I420 but duplicate UV once more.
+__declspec(naked) __declspec(align(16))
+void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* u_buf,
+                                   const uint8* v_buf,
+                                   uint8* dst_argb,
+                                   int width) {
+  __asm {
+    push       ebx
+    push       esi
+    push       edi
+    mov        eax, [esp + 12 + 4]   // Y
+    mov        esi, [esp + 12 + 8]   // U
+    mov        edi, [esp + 12 + 12]  // V
+    mov        edx, [esp + 12 + 16]  // argb
+    mov        ecx, [esp + 12 + 20]  // width
+    sub        edi, esi
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV411  // modifies EBX
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqu     [edx], xmm0
+    movdqu     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    pop        ebx
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* uv_buf,
+                                   uint8* dst_argb,
+                                   int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // Y
+    mov        esi, [esp + 4 + 8]   // UV
+    mov        edx, [esp + 4 + 12]  // argb
+    mov        ecx, [esp + 4 + 16]  // width
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READNV12
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqu     [edx], xmm0
+    movdqu     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+
+// 8 pixels, dest aligned 16.
+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
+__declspec(naked) __declspec(align(16))
+void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* uv_buf,
+                                   uint8* dst_argb,
+                                   int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // Y
+    mov        esi, [esp + 4 + 8]   // VU
+    mov        edx, [esp + 4 + 12]  // argb
+    mov        ecx, [esp + 4 + 16]  // width
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READNV12
+    YVUTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm0, xmm1           // BG
+    punpcklbw  xmm2, xmm5           // RA
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
+    movdqu     [edx], xmm0
+    movdqu     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void I422ToBGRARow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_bgra,
+                         int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // bgra
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into BGRA
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    punpcklbw  xmm1, xmm0           // GB
+    punpcklbw  xmm5, xmm2           // AR
+    movdqa     xmm0, xmm5
+    punpcklwd  xmm5, xmm1           // BGRA first 4 pixels
+    punpckhwd  xmm0, xmm1           // BGRA next 4 pixels
+    movdqa     [edx], xmm5
+    movdqa     [edx + 16], xmm0
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* u_buf,
+                                   const uint8* v_buf,
+                                   uint8* dst_bgra,
+                                   int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // bgra
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into BGRA
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    punpcklbw  xmm1, xmm0           // GB
+    punpcklbw  xmm5, xmm2           // AR
+    movdqa     xmm0, xmm5
+    punpcklwd  xmm5, xmm1           // BGRA first 4 pixels
+    punpckhwd  xmm0, xmm1           // BGRA next 4 pixels
+    movdqu     [edx], xmm5
+    movdqu     [edx + 16], xmm0
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void I422ToABGRRow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_abgr,
+                         int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // abgr
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm2, xmm1           // RG
+    punpcklbw  xmm0, xmm5           // BA
+    movdqa     xmm1, xmm2
+    punpcklwd  xmm2, xmm0           // RGBA first 4 pixels
+    punpckhwd  xmm1, xmm0           // RGBA next 4 pixels
+    movdqa     [edx], xmm2
+    movdqa     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* u_buf,
+                                   const uint8* v_buf,
+                                   uint8* dst_abgr,
+                                   int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // abgr
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into ARGB
+    punpcklbw  xmm2, xmm1           // RG
+    punpcklbw  xmm0, xmm5           // BA
+    movdqa     xmm1, xmm2
+    punpcklwd  xmm2, xmm0           // RGBA first 4 pixels
+    punpckhwd  xmm1, xmm0           // RGBA next 4 pixels
+    movdqu     [edx], xmm2
+    movdqu     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void I422ToRGBARow_SSSE3(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* dst_rgba,
+                         int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // rgba
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into RGBA
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    punpcklbw  xmm1, xmm2           // GR
+    punpcklbw  xmm5, xmm0           // AB
+    movdqa     xmm0, xmm5
+    punpcklwd  xmm5, xmm1           // RGBA first 4 pixels
+    punpckhwd  xmm0, xmm1           // RGBA next 4 pixels
+    movdqa     [edx], xmm5
+    movdqa     [edx + 16], xmm0
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
+                                   const uint8* u_buf,
+                                   const uint8* v_buf,
+                                   uint8* dst_rgba,
+                                   int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // Y
+    mov        esi, [esp + 8 + 8]   // U
+    mov        edi, [esp + 8 + 12]  // V
+    mov        edx, [esp + 8 + 16]  // rgba
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        edi, esi
+    pxor       xmm4, xmm4
+
+    align      4
+ convertloop:
+    READYUV422
+    YUVTORGB
+
+    // Step 3: Weave into RGBA
+    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
+    punpcklbw  xmm1, xmm2           // GR
+    punpcklbw  xmm5, xmm0           // AB
+    movdqa     xmm0, xmm5
+    punpcklwd  xmm5, xmm1           // RGBA first 4 pixels
+    punpckhwd  xmm0, xmm1           // RGBA next 4 pixels
+    movdqu     [edx], xmm5
+    movdqu     [edx + 16], xmm0
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+#endif  // HAS_I422TOARGBROW_SSSE3
+
+#ifdef HAS_YTOARGBROW_SSE2
+__declspec(naked) __declspec(align(16))
+void YToARGBRow_SSE2(const uint8* y_buf,
+                     uint8* rgb_buf,
+                     int width) {
+  __asm {
+    pxor       xmm5, xmm5
+    pcmpeqb    xmm4, xmm4           // generate mask 0xff000000
+    pslld      xmm4, 24
+    mov        eax, 0x00100010
+    movd       xmm3, eax
+    pshufd     xmm3, xmm3, 0
+    mov        eax, 0x004a004a       // 74
+    movd       xmm2, eax
+    pshufd     xmm2, xmm2,0
+    mov        eax, [esp + 4]       // Y
+    mov        edx, [esp + 8]       // rgb
+    mov        ecx, [esp + 12]      // width
+
+    align      4
+ convertloop:
+    // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
+    movq       xmm0, qword ptr [eax]
+    lea        eax, [eax + 8]
+    punpcklbw  xmm0, xmm5           // 0.Y
+    psubusw    xmm0, xmm3
+    pmullw     xmm0, xmm2
+    psrlw      xmm0, 6
+    packuswb   xmm0, xmm0           // G
+
+    // Step 2: Weave into ARGB
+    punpcklbw  xmm0, xmm0           // GG
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm0           // BGRA first 4 pixels
+    punpckhwd  xmm1, xmm1           // BGRA next 4 pixels
+    por        xmm0, xmm4
+    por        xmm1, xmm4
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx,  [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    ret
+  }
+}
+#endif  // HAS_YTOARGBROW_SSE2
+
+#ifdef HAS_MIRRORROW_SSSE3
+// Shuffle table for reversing the bytes.
+static const uvec8 kShuffleMirror = {
+  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
+};
+
+__declspec(naked) __declspec(align(16))
+void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov       eax, [esp + 4]   // src
+    mov       edx, [esp + 8]   // dst
+    mov       ecx, [esp + 12]  // width
+    movdqa    xmm5, kShuffleMirror
+    lea       eax, [eax - 16]
+
+    align      4
+ convertloop:
+    movdqa    xmm0, [eax + ecx]
+    pshufb    xmm0, xmm5
+    sub       ecx, 16
+    movdqa    [edx], xmm0
+    lea       edx, [edx + 16]
+    jg        convertloop
+    ret
+  }
+}
+#endif  // HAS_MIRRORROW_SSSE3
+
+#ifdef HAS_MIRRORROW_AVX2
+// Shuffle table for reversing the bytes.
+static const ulvec8 kShuffleMirror_AVX2 = {
+  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u,
+  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
+};
+
+__declspec(naked) __declspec(align(16))
+void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov       eax, [esp + 4]   // src
+    mov       edx, [esp + 8]   // dst
+    mov       ecx, [esp + 12]  // width
+    vmovdqa   ymm5, kShuffleMirror_AVX2
+    lea       eax, [eax - 32]
+
+    align      4
+ convertloop:
+    vmovdqu   ymm0, [eax + ecx]
+    vpshufb   ymm0, ymm0, ymm5
+    vpermq    ymm0, ymm0, 0x4e  // swap high and low halfs
+    sub       ecx, 32
+    vmovdqu   [edx], ymm0
+    lea       edx, [edx + 32]
+    jg        convertloop
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_MIRRORROW_AVX2
+
+#ifdef HAS_MIRRORROW_SSE2
+// SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3
+// version can not.
+__declspec(naked) __declspec(align(16))
+void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov       eax, [esp + 4]   // src
+    mov       edx, [esp + 8]   // dst
+    mov       ecx, [esp + 12]  // width
+    lea       eax, [eax - 16]
+
+    align      4
+ convertloop:
+    movdqu    xmm0, [eax + ecx]
+    movdqa    xmm1, xmm0        // swap bytes
+    psllw     xmm0, 8
+    psrlw     xmm1, 8
+    por       xmm0, xmm1
+    pshuflw   xmm0, xmm0, 0x1b  // swap words
+    pshufhw   xmm0, xmm0, 0x1b
+    pshufd    xmm0, xmm0, 0x4e  // swap qwords
+    sub       ecx, 16
+    movdqu    [edx], xmm0
+    lea       edx, [edx + 16]
+    jg        convertloop
+    ret
+  }
+}
+#endif  // HAS_MIRRORROW_SSE2
+
+#ifdef HAS_MIRRORROW_UV_SSSE3
+// Shuffle table for reversing the bytes of UV channels.
+static const uvec8 kShuffleMirrorUV = {
+  14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
+};
+
+__declspec(naked) __declspec(align(16))
+void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
+                       int width) {
+  __asm {
+    push      edi
+    mov       eax, [esp + 4 + 4]   // src
+    mov       edx, [esp + 4 + 8]   // dst_u
+    mov       edi, [esp + 4 + 12]  // dst_v
+    mov       ecx, [esp + 4 + 16]  // width
+    movdqa    xmm1, kShuffleMirrorUV
+    lea       eax, [eax + ecx * 2 - 16]
+    sub       edi, edx
+
+    align      4
+ convertloop:
+    movdqa    xmm0, [eax]
+    lea       eax, [eax - 16]
+    pshufb    xmm0, xmm1
+    sub       ecx, 8
+    movlpd    qword ptr [edx], xmm0
+    movhpd    qword ptr [edx + edi], xmm0
+    lea       edx, [edx + 8]
+    jg        convertloop
+
+    pop       edi
+    ret
+  }
+}
+#endif  // HAS_MIRRORROW_UV_SSSE3
+
+#ifdef HAS_ARGBMIRRORROW_SSSE3
+// Shuffle table for reversing the bytes.
+static const uvec8 kARGBShuffleMirror = {
+  12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
+};
+
+__declspec(naked) __declspec(align(16))
+void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov       eax, [esp + 4]   // src
+    mov       edx, [esp + 8]   // dst
+    mov       ecx, [esp + 12]  // width
+    lea       eax, [eax - 16 + ecx * 4]  // last 4 pixels.
+    movdqa    xmm5, kARGBShuffleMirror
+
+    align      4
+ convertloop:
+    movdqa    xmm0, [eax]
+    lea       eax, [eax - 16]
+    pshufb    xmm0, xmm5
+    sub       ecx, 4
+    movdqa    [edx], xmm0
+    lea       edx, [edx + 16]
+    jg        convertloop
+    ret
+  }
+}
+#endif  // HAS_ARGBMIRRORROW_SSSE3
+
+#ifdef HAS_ARGBMIRRORROW_AVX2
+// Shuffle table for reversing the bytes.
+static const ulvec32 kARGBShuffleMirror_AVX2 = {
+  7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
+};
+
+__declspec(naked) __declspec(align(16))
+void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov       eax, [esp + 4]   // src
+    mov       edx, [esp + 8]   // dst
+    mov       ecx, [esp + 12]  // width
+    lea       eax, [eax - 32]
+    vmovdqa   ymm5, kARGBShuffleMirror_AVX2
+
+    align      4
+ convertloop:
+    vpermd    ymm0, ymm5, [eax + ecx * 4]  // permute dword order
+    sub       ecx, 8
+    vmovdqu   [edx], ymm0
+    lea       edx, [edx + 32]
+    jg        convertloop
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBMIRRORROW_AVX2
+
+#ifdef HAS_SPLITUVROW_SSE2
+__declspec(naked) __declspec(align(16))
+void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_uv
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    movdqa     xmm2, xmm0
+    movdqa     xmm3, xmm1
+    pand       xmm0, xmm5   // even bytes
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    psrlw      xmm2, 8      // odd bytes
+    psrlw      xmm3, 8
+    packuswb   xmm2, xmm3
+    movdqa     [edx], xmm0
+    movdqa     [edx + edi], xmm2
+    lea        edx, [edx + 16]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                               int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_uv
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    movdqa     xmm2, xmm0
+    movdqa     xmm3, xmm1
+    pand       xmm0, xmm5   // even bytes
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    psrlw      xmm2, 8      // odd bytes
+    psrlw      xmm3, 8
+    packuswb   xmm2, xmm3
+    movdqu     [edx], xmm0
+    movdqu     [edx + edi], xmm2
+    lea        edx, [edx + 16]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+#endif  // HAS_SPLITUVROW_SSE2
+
+#ifdef HAS_SPLITUVROW_AVX2
+__declspec(naked) __declspec(align(16))
+void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_uv
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
+    vpsrlw     ymm5, ymm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    lea        eax,  [eax + 64]
+    vpsrlw     ymm2, ymm0, 8      // odd bytes
+    vpsrlw     ymm3, ymm1, 8
+    vpand      ymm0, ymm0, ymm5   // even bytes
+    vpand      ymm1, ymm1, ymm5
+    vpackuswb  ymm0, ymm0, ymm1
+    vpackuswb  ymm2, ymm2, ymm3
+    vpermq     ymm0, ymm0, 0xd8
+    vpermq     ymm2, ymm2, 0xd8
+    vmovdqu    [edx], ymm0
+    vmovdqu    [edx + edi], ymm2
+    lea        edx, [edx + 32]
+    sub        ecx, 32
+    jg         convertloop
+
+    pop        edi
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_SPLITUVROW_AVX2
+
+#ifdef HAS_MERGEUVROW_SSE2
+__declspec(naked) __declspec(align(16))
+void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                     int width) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_u
+    mov        edx, [esp + 4 + 8]    // src_v
+    mov        edi, [esp + 4 + 12]   // dst_uv
+    mov        ecx, [esp + 4 + 16]   // width
+    sub        edx, eax
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]      // read 16 U's
+    movdqa     xmm1, [eax + edx]  // and 16 V's
+    lea        eax,  [eax + 16]
+    movdqa     xmm2, xmm0
+    punpcklbw  xmm0, xmm1       // first 8 UV pairs
+    punpckhbw  xmm2, xmm1       // next 8 UV pairs
+    movdqa     [edi], xmm0
+    movdqa     [edi + 16], xmm2
+    lea        edi, [edi + 32]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
+                               uint8* dst_uv, int width) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_u
+    mov        edx, [esp + 4 + 8]    // src_v
+    mov        edi, [esp + 4 + 12]   // dst_uv
+    mov        ecx, [esp + 4 + 16]   // width
+    sub        edx, eax
+
+    align      4
+  convertloop:
+    movdqu     xmm0, [eax]      // read 16 U's
+    movdqu     xmm1, [eax + edx]  // and 16 V's
+    lea        eax,  [eax + 16]
+    movdqa     xmm2, xmm0
+    punpcklbw  xmm0, xmm1       // first 8 UV pairs
+    punpckhbw  xmm2, xmm1       // next 8 UV pairs
+    movdqu     [edi], xmm0
+    movdqu     [edi + 16], xmm2
+    lea        edi, [edi + 32]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+#endif  //  HAS_MERGEUVROW_SSE2
+
+#ifdef HAS_MERGEUVROW_AVX2
+__declspec(naked) __declspec(align(16))
+void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                     int width) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_u
+    mov        edx, [esp + 4 + 8]    // src_v
+    mov        edi, [esp + 4 + 12]   // dst_uv
+    mov        ecx, [esp + 4 + 16]   // width
+    sub        edx, eax
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]           // read 32 U's
+    vmovdqu    ymm1, [eax + edx]     // and 32 V's
+    lea        eax,  [eax + 32]
+    vpunpcklbw ymm2, ymm0, ymm1      // low 16 UV pairs. mutated qqword 0,2
+    vpunpckhbw ymm0, ymm0, ymm1      // high 16 UV pairs. mutated qqword 1,3
+    vperm2i128 ymm1, ymm2, ymm0, 0x20  // low 128 of ymm2 and low 128 of ymm0
+    vperm2i128 ymm2, ymm2, ymm0, 0x31  // high 128 of ymm2 and high 128 of ymm0
+    vmovdqu    [edi], ymm1
+    vmovdqu    [edi + 32], ymm2
+    lea        edi, [edi + 64]
+    sub        ecx, 32
+    jg         convertloop
+
+    pop        edi
+    vzeroupper
+    ret
+  }
+}
+#endif  //  HAS_MERGEUVROW_AVX2
+
+#ifdef HAS_COPYROW_SSE2
+// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
+__declspec(naked) __declspec(align(16))
+void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
+  __asm {
+    mov        eax, [esp + 4]   // src
+    mov        edx, [esp + 8]   // dst
+    mov        ecx, [esp + 12]  // count
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax, [eax + 32]
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx, [edx + 32]
+    sub        ecx, 32
+    jg         convertloop
+    ret
+  }
+}
+#endif  // HAS_COPYROW_SSE2
+
+// Unaligned Multiple of 1.
+__declspec(naked) __declspec(align(16))
+void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
+  __asm {
+    mov        eax, esi
+    mov        edx, edi
+    mov        esi, [esp + 4]   // src
+    mov        edi, [esp + 8]   // dst
+    mov        ecx, [esp + 12]  // count
+    rep movsb
+    mov        edi, edx
+    mov        esi, eax
+    ret
+  }
+}
+
+#ifdef HAS_COPYROW_X86
+__declspec(naked) __declspec(align(16))
+void CopyRow_X86(const uint8* src, uint8* dst, int count) {
+  __asm {
+    mov        eax, esi
+    mov        edx, edi
+    mov        esi, [esp + 4]   // src
+    mov        edi, [esp + 8]   // dst
+    mov        ecx, [esp + 12]  // count
+    shr        ecx, 2
+    rep movsd
+    mov        edi, edx
+    mov        esi, eax
+    ret
+  }
+}
+#endif  // HAS_COPYROW_X86
+
+#ifdef HAS_ARGBCOPYALPHAROW_SSE2
+// width in pixels
+__declspec(naked) __declspec(align(16))
+void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov        eax, [esp + 4]   // src
+    mov        edx, [esp + 8]   // dst
+    mov        ecx, [esp + 12]  // count
+    pcmpeqb    xmm0, xmm0       // generate mask 0xff000000
+    pslld      xmm0, 24
+    pcmpeqb    xmm1, xmm1       // generate mask 0x00ffffff
+    psrld      xmm1, 8
+
+    align      4
+  convertloop:
+    movdqa     xmm2, [eax]
+    movdqa     xmm3, [eax + 16]
+    lea        eax, [eax + 32]
+    movdqa     xmm4, [edx]
+    movdqa     xmm5, [edx + 16]
+    pand       xmm2, xmm0
+    pand       xmm3, xmm0
+    pand       xmm4, xmm1
+    pand       xmm5, xmm1
+    por        xmm2, xmm4
+    por        xmm3, xmm5
+    movdqa     [edx], xmm2
+    movdqa     [edx + 16], xmm3
+    lea        edx, [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    ret
+  }
+}
+#endif  // HAS_ARGBCOPYALPHAROW_SSE2
+
+#ifdef HAS_ARGBCOPYALPHAROW_AVX2
+// width in pixels
+__declspec(naked) __declspec(align(16))
+void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov        eax, [esp + 4]   // src
+    mov        edx, [esp + 8]   // dst
+    mov        ecx, [esp + 12]  // count
+    vpcmpeqb   ymm0, ymm0, ymm0
+    vpsrld     ymm0, ymm0, 8    // generate mask 0x00ffffff
+
+    align      4
+  convertloop:
+    vmovdqu    ymm1, [eax]
+    vmovdqu    ymm2, [eax + 32]
+    lea        eax, [eax + 64]
+    vpblendvb  ymm1, ymm1, [edx], ymm0
+    vpblendvb  ymm2, ymm2, [edx + 32], ymm0
+    vmovdqu    [edx], ymm1
+    vmovdqu    [edx + 32], ymm2
+    lea        edx, [edx + 64]
+    sub        ecx, 16
+    jg         convertloop
+
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBCOPYALPHAROW_AVX2
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
+// width in pixels
+__declspec(naked) __declspec(align(16))
+void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov        eax, [esp + 4]   // src
+    mov        edx, [esp + 8]   // dst
+    mov        ecx, [esp + 12]  // count
+    pcmpeqb    xmm0, xmm0       // generate mask 0xff000000
+    pslld      xmm0, 24
+    pcmpeqb    xmm1, xmm1       // generate mask 0x00ffffff
+    psrld      xmm1, 8
+
+    align      4
+  convertloop:
+    movq       xmm2, qword ptr [eax]  // 8 Y's
+    lea        eax, [eax + 8]
+    punpcklbw  xmm2, xmm2
+    punpckhwd  xmm3, xmm2
+    punpcklwd  xmm2, xmm2
+    movdqa     xmm4, [edx]
+    movdqa     xmm5, [edx + 16]
+    pand       xmm2, xmm0
+    pand       xmm3, xmm0
+    pand       xmm4, xmm1
+    pand       xmm5, xmm1
+    por        xmm2, xmm4
+    por        xmm3, xmm5
+    movdqa     [edx], xmm2
+    movdqa     [edx + 16], xmm3
+    lea        edx, [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    ret
+  }
+}
+#endif  // HAS_ARGBCOPYYTOALPHAROW_SSE2
+
+#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
+// width in pixels
+__declspec(naked) __declspec(align(16))
+void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov        eax, [esp + 4]   // src
+    mov        edx, [esp + 8]   // dst
+    mov        ecx, [esp + 12]  // count
+    vpcmpeqb   ymm0, ymm0, ymm0
+    vpsrld     ymm0, ymm0, 8    // generate mask 0x00ffffff
+
+    align      4
+  convertloop:
+    vpmovzxbd  ymm1, qword ptr [eax]
+    vpmovzxbd  ymm2, qword ptr [eax + 8]
+    lea        eax, [eax + 16]
+    vpslld     ymm1, ymm1, 24
+    vpslld     ymm2, ymm2, 24
+    vpblendvb  ymm1, ymm1, [edx], ymm0
+    vpblendvb  ymm2, ymm2, [edx + 32], ymm0
+    vmovdqu    [edx], ymm1
+    vmovdqu    [edx + 32], ymm2
+    lea        edx, [edx + 64]
+    sub        ecx, 16
+    jg         convertloop
+
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBCOPYYTOALPHAROW_AVX2
+
+#ifdef HAS_SETROW_X86
+// SetRow8 writes 'count' bytes using a 32 bit value repeated.
+__declspec(naked) __declspec(align(16))
+void SetRow_X86(uint8* dst, uint32 v32, int count) {
+  __asm {
+    mov        edx, edi
+    mov        edi, [esp + 4]   // dst
+    mov        eax, [esp + 8]   // v32
+    mov        ecx, [esp + 12]  // count
+    shr        ecx, 2
+    rep stosd
+    mov        edi, edx
+    ret
+  }
+}
+
+// SetRow32 writes 'count' words using a 32 bit value repeated.
+__declspec(naked) __declspec(align(16))
+void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
+                   int dst_stride, int height) {
+  __asm {
+    push       esi
+    push       edi
+    push       ebp
+    mov        edi, [esp + 12 + 4]   // dst
+    mov        eax, [esp + 12 + 8]   // v32
+    mov        ebp, [esp + 12 + 12]  // width
+    mov        edx, [esp + 12 + 16]  // dst_stride
+    mov        esi, [esp + 12 + 20]  // height
+    lea        ecx, [ebp * 4]
+    sub        edx, ecx             // stride - width * 4
+
+    align      4
+  convertloop:
+    mov        ecx, ebp
+    rep stosd
+    add        edi, edx
+    sub        esi, 1
+    jg         convertloop
+
+    pop        ebp
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_SETROW_X86
+
+#ifdef HAS_YUY2TOYROW_AVX2
+__declspec(naked) __declspec(align(16))
+void YUY2ToYRow_AVX2(const uint8* src_yuy2,
+                     uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_yuy2
+    mov        edx, [esp + 8]    // dst_y
+    mov        ecx, [esp + 12]   // pix
+    vpcmpeqb   ymm5, ymm5, ymm5  // generate mask 0x00ff00ff
+    vpsrlw     ymm5, ymm5, 8
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    lea        eax,  [eax + 64]
+    vpand      ymm0, ymm0, ymm5   // even bytes are Y
+    vpand      ymm1, ymm1, ymm5
+    vpackuswb  ymm0, ymm0, ymm1   // mutates.
+    vpermq     ymm0, ymm0, 0xd8
+    sub        ecx, 32
+    vmovdqu    [edx], ymm0
+    lea        edx, [edx + 32]
+    jg         convertloop
+    vzeroupper
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_yuy2
+    mov        esi, [esp + 8 + 8]    // stride_yuy2
+    mov        edx, [esp + 8 + 12]   // dst_u
+    mov        edi, [esp + 8 + 16]   // dst_v
+    mov        ecx, [esp + 8 + 20]   // pix
+    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
+    vpsrlw     ymm5, ymm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    vpavgb     ymm0, ymm0, [eax + esi]
+    vpavgb     ymm1, ymm1, [eax + esi + 32]
+    lea        eax,  [eax + 64]
+    vpsrlw     ymm0, ymm0, 8      // YUYV -> UVUV
+    vpsrlw     ymm1, ymm1, 8
+    vpackuswb  ymm0, ymm0, ymm1   // mutates.
+    vpermq     ymm0, ymm0, 0xd8
+    vpand      ymm1, ymm0, ymm5  // U
+    vpsrlw     ymm0, ymm0, 8     // V
+    vpackuswb  ymm1, ymm1, ymm1  // mutates.
+    vpackuswb  ymm0, ymm0, ymm0  // mutates.
+    vpermq     ymm1, ymm1, 0xd8
+    vpermq     ymm0, ymm0, 0xd8
+    vextractf128 [edx], ymm1, 0  // U
+    vextractf128 [edx + edi], ymm0, 0 // V
+    lea        edx, [edx + 16]
+    sub        ecx, 32
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    vzeroupper
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
+                         uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_yuy2
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
+    vpsrlw     ymm5, ymm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    lea        eax,  [eax + 64]
+    vpsrlw     ymm0, ymm0, 8      // YUYV -> UVUV
+    vpsrlw     ymm1, ymm1, 8
+    vpackuswb  ymm0, ymm0, ymm1   // mutates.
+    vpermq     ymm0, ymm0, 0xd8
+    vpand      ymm1, ymm0, ymm5  // U
+    vpsrlw     ymm0, ymm0, 8     // V
+    vpackuswb  ymm1, ymm1, ymm1  // mutates.
+    vpackuswb  ymm0, ymm0, ymm0  // mutates.
+    vpermq     ymm1, ymm1, 0xd8
+    vpermq     ymm0, ymm0, 0xd8
+    vextractf128 [edx], ymm1, 0  // U
+    vextractf128 [edx + edi], ymm0, 0 // V
+    lea        edx, [edx + 16]
+    sub        ecx, 32
+    jg         convertloop
+
+    pop        edi
+    vzeroupper
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToYRow_AVX2(const uint8* src_uyvy,
+                     uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_uyvy
+    mov        edx, [esp + 8]    // dst_y
+    mov        ecx, [esp + 12]   // pix
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    lea        eax,  [eax + 64]
+    vpsrlw     ymm0, ymm0, 8      // odd bytes are Y
+    vpsrlw     ymm1, ymm1, 8
+    vpackuswb  ymm0, ymm0, ymm1   // mutates.
+    vpermq     ymm0, ymm0, 0xd8
+    sub        ecx, 32
+    vmovdqu    [edx], ymm0
+    lea        edx, [edx + 32]
+    jg         convertloop
+    ret
+    vzeroupper
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_yuy2
+    mov        esi, [esp + 8 + 8]    // stride_yuy2
+    mov        edx, [esp + 8 + 12]   // dst_u
+    mov        edi, [esp + 8 + 16]   // dst_v
+    mov        ecx, [esp + 8 + 20]   // pix
+    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
+    vpsrlw     ymm5, ymm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    vpavgb     ymm0, ymm0, [eax + esi]
+    vpavgb     ymm1, ymm1, [eax + esi + 32]
+    lea        eax,  [eax + 64]
+    vpand      ymm0, ymm0, ymm5   // UYVY -> UVUV
+    vpand      ymm1, ymm1, ymm5
+    vpackuswb  ymm0, ymm0, ymm1   // mutates.
+    vpermq     ymm0, ymm0, 0xd8
+    vpand      ymm1, ymm0, ymm5  // U
+    vpsrlw     ymm0, ymm0, 8     // V
+    vpackuswb  ymm1, ymm1, ymm1  // mutates.
+    vpackuswb  ymm0, ymm0, ymm0  // mutates.
+    vpermq     ymm1, ymm1, 0xd8
+    vpermq     ymm0, ymm0, 0xd8
+    vextractf128 [edx], ymm1, 0  // U
+    vextractf128 [edx + edi], ymm0, 0 // V
+    lea        edx, [edx + 16]
+    sub        ecx, 32
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    vzeroupper
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
+                         uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_yuy2
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
+    vpsrlw     ymm5, ymm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    lea        eax,  [eax + 64]
+    vpand      ymm0, ymm0, ymm5   // UYVY -> UVUV
+    vpand      ymm1, ymm1, ymm5
+    vpackuswb  ymm0, ymm0, ymm1   // mutates.
+    vpermq     ymm0, ymm0, 0xd8
+    vpand      ymm1, ymm0, ymm5  // U
+    vpsrlw     ymm0, ymm0, 8     // V
+    vpackuswb  ymm1, ymm1, ymm1  // mutates.
+    vpackuswb  ymm0, ymm0, ymm0  // mutates.
+    vpermq     ymm1, ymm1, 0xd8
+    vpermq     ymm0, ymm0, 0xd8
+    vextractf128 [edx], ymm1, 0  // U
+    vextractf128 [edx + edi], ymm0, 0 // V
+    lea        edx, [edx + 16]
+    sub        ecx, 32
+    jg         convertloop
+
+    pop        edi
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_YUY2TOYROW_AVX2
+
+#ifdef HAS_YUY2TOYROW_SSE2
+__declspec(naked) __declspec(align(16))
+void YUY2ToYRow_SSE2(const uint8* src_yuy2,
+                     uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_yuy2
+    mov        edx, [esp + 8]    // dst_y
+    mov        ecx, [esp + 12]   // pix
+    pcmpeqb    xmm5, xmm5        // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    pand       xmm0, xmm5   // even bytes are Y
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_yuy2
+    mov        esi, [esp + 8 + 8]    // stride_yuy2
+    mov        edx, [esp + 8 + 12]   // dst_u
+    mov        edi, [esp + 8 + 16]   // dst_v
+    mov        ecx, [esp + 8 + 20]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + esi]
+    movdqa     xmm3, [eax + esi + 16]
+    lea        eax,  [eax + 32]
+    pavgb      xmm0, xmm2
+    pavgb      xmm1, xmm3
+    psrlw      xmm0, 8      // YUYV -> UVUV
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    movdqa     xmm1, xmm0
+    pand       xmm0, xmm5  // U
+    packuswb   xmm0, xmm0
+    psrlw      xmm1, 8     // V
+    packuswb   xmm1, xmm1
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + edi], xmm1
+    lea        edx, [edx + 8]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
+                         uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_yuy2
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    psrlw      xmm0, 8      // YUYV -> UVUV
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    movdqa     xmm1, xmm0
+    pand       xmm0, xmm5  // U
+    packuswb   xmm0, xmm0
+    psrlw      xmm1, 8     // V
+    packuswb   xmm1, xmm1
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + edi], xmm1
+    lea        edx, [edx + 8]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
+                               uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_yuy2
+    mov        edx, [esp + 8]    // dst_y
+    mov        ecx, [esp + 12]   // pix
+    pcmpeqb    xmm5, xmm5        // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+
+    align      4
+  convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    pand       xmm0, xmm5   // even bytes are Y
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
+                                uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_yuy2
+    mov        esi, [esp + 8 + 8]    // stride_yuy2
+    mov        edx, [esp + 8 + 12]   // dst_u
+    mov        edi, [esp + 8 + 16]   // dst_v
+    mov        ecx, [esp + 8 + 20]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + esi]
+    movdqu     xmm3, [eax + esi + 16]
+    lea        eax,  [eax + 32]
+    pavgb      xmm0, xmm2
+    pavgb      xmm1, xmm3
+    psrlw      xmm0, 8      // YUYV -> UVUV
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    movdqa     xmm1, xmm0
+    pand       xmm0, xmm5  // U
+    packuswb   xmm0, xmm0
+    psrlw      xmm1, 8     // V
+    packuswb   xmm1, xmm1
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + edi], xmm1
+    lea        edx, [edx + 8]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
+                                   uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_yuy2
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    psrlw      xmm0, 8      // YUYV -> UVUV
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    movdqa     xmm1, xmm0
+    pand       xmm0, xmm5  // U
+    packuswb   xmm0, xmm0
+    psrlw      xmm1, 8     // V
+    packuswb   xmm1, xmm1
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + edi], xmm1
+    lea        edx, [edx + 8]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToYRow_SSE2(const uint8* src_uyvy,
+                     uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_uyvy
+    mov        edx, [esp + 8]    // dst_y
+    mov        ecx, [esp + 12]   // pix
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    psrlw      xmm0, 8    // odd bytes are Y
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
+                      uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_yuy2
+    mov        esi, [esp + 8 + 8]    // stride_yuy2
+    mov        edx, [esp + 8 + 12]   // dst_u
+    mov        edi, [esp + 8 + 16]   // dst_v
+    mov        ecx, [esp + 8 + 20]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + esi]
+    movdqa     xmm3, [eax + esi + 16]
+    lea        eax,  [eax + 32]
+    pavgb      xmm0, xmm2
+    pavgb      xmm1, xmm3
+    pand       xmm0, xmm5   // UYVY -> UVUV
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    movdqa     xmm1, xmm0
+    pand       xmm0, xmm5  // U
+    packuswb   xmm0, xmm0
+    psrlw      xmm1, 8     // V
+    packuswb   xmm1, xmm1
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + edi], xmm1
+    lea        edx, [edx + 8]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
+                         uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_yuy2
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    pand       xmm0, xmm5   // UYVY -> UVUV
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    movdqa     xmm1, xmm0
+    pand       xmm0, xmm5  // U
+    packuswb   xmm0, xmm0
+    psrlw      xmm1, 8     // V
+    packuswb   xmm1, xmm1
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + edi], xmm1
+    lea        edx, [edx + 8]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
+                               uint8* dst_y, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_uyvy
+    mov        edx, [esp + 8]    // dst_y
+    mov        ecx, [esp + 12]   // pix
+
+    align      4
+  convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    psrlw      xmm0, 8    // odd bytes are Y
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
+                                uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_yuy2
+    mov        esi, [esp + 8 + 8]    // stride_yuy2
+    mov        edx, [esp + 8 + 12]   // dst_u
+    mov        edi, [esp + 8 + 16]   // dst_v
+    mov        ecx, [esp + 8 + 20]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + esi]
+    movdqu     xmm3, [eax + esi + 16]
+    lea        eax,  [eax + 32]
+    pavgb      xmm0, xmm2
+    pavgb      xmm1, xmm3
+    pand       xmm0, xmm5   // UYVY -> UVUV
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    movdqa     xmm1, xmm0
+    pand       xmm0, xmm5  // U
+    packuswb   xmm0, xmm0
+    psrlw      xmm1, 8     // V
+    packuswb   xmm1, xmm1
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + edi], xmm1
+    lea        edx, [edx + 8]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
+                                   uint8* dst_u, uint8* dst_v, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_yuy2
+    mov        edx, [esp + 4 + 8]    // dst_u
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+    sub        edi, edx
+
+    align      4
+  convertloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    pand       xmm0, xmm5   // UYVY -> UVUV
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    movdqa     xmm1, xmm0
+    pand       xmm0, xmm5  // U
+    packuswb   xmm0, xmm0
+    psrlw      xmm1, 8     // V
+    packuswb   xmm1, xmm1
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + edi], xmm1
+    lea        edx, [edx + 8]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    ret
+  }
+}
+#endif  // HAS_YUY2TOYROW_SSE2
+
+#ifdef HAS_ARGBBLENDROW_SSE2
+// Blend 8 pixels at a time.
+__declspec(naked) __declspec(align(16))
+void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+                       uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_argb0
+    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+    pcmpeqb    xmm7, xmm7       // generate constant 1
+    psrlw      xmm7, 15
+    pcmpeqb    xmm6, xmm6       // generate mask 0x00ff00ff
+    psrlw      xmm6, 8
+    pcmpeqb    xmm5, xmm5       // generate mask 0xff00ff00
+    psllw      xmm5, 8
+    pcmpeqb    xmm4, xmm4       // generate mask 0xff000000
+    pslld      xmm4, 24
+
+    sub        ecx, 1
+    je         convertloop1     // only 1 pixel?
+    jl         convertloop1b
+
+    // 1 pixel loop until destination pointer is aligned.
+  alignloop1:
+    test       edx, 15          // aligned?
+    je         alignloop1b
+    movd       xmm3, [eax]
+    lea        eax, [eax + 4]
+    movdqa     xmm0, xmm3       // src argb
+    pxor       xmm3, xmm4       // ~alpha
+    movd       xmm2, [esi]      // _r_b
+    psrlw      xmm3, 8          // alpha
+    pshufhw    xmm3, xmm3, 0F5h // 8 alpha words
+    pshuflw    xmm3, xmm3, 0F5h
+    pand       xmm2, xmm6       // _r_b
+    paddw      xmm3, xmm7       // 256 - alpha
+    pmullw     xmm2, xmm3       // _r_b * alpha
+    movd       xmm1, [esi]      // _a_g
+    lea        esi, [esi + 4]
+    psrlw      xmm1, 8          // _a_g
+    por        xmm0, xmm4       // set alpha to 255
+    pmullw     xmm1, xmm3       // _a_g * alpha
+    psrlw      xmm2, 8          // _r_b convert to 8 bits again
+    paddusb    xmm0, xmm2       // + src argb
+    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
+    paddusb    xmm0, xmm1       // + src argb
+    sub        ecx, 1
+    movd       [edx], xmm0
+    lea        edx, [edx + 4]
+    jge        alignloop1
+
+  alignloop1b:
+    add        ecx, 1 - 4
+    jl         convertloop4b
+
+    // 4 pixel loop.
+  convertloop4:
+    movdqu     xmm3, [eax]      // src argb
+    lea        eax, [eax + 16]
+    movdqa     xmm0, xmm3       // src argb
+    pxor       xmm3, xmm4       // ~alpha
+    movdqu     xmm2, [esi]      // _r_b
+    psrlw      xmm3, 8          // alpha
+    pshufhw    xmm3, xmm3, 0F5h // 8 alpha words
+    pshuflw    xmm3, xmm3, 0F5h
+    pand       xmm2, xmm6       // _r_b
+    paddw      xmm3, xmm7       // 256 - alpha
+    pmullw     xmm2, xmm3       // _r_b * alpha
+    movdqu     xmm1, [esi]      // _a_g
+    lea        esi, [esi + 16]
+    psrlw      xmm1, 8          // _a_g
+    por        xmm0, xmm4       // set alpha to 255
+    pmullw     xmm1, xmm3       // _a_g * alpha
+    psrlw      xmm2, 8          // _r_b convert to 8 bits again
+    paddusb    xmm0, xmm2       // + src argb
+    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
+    paddusb    xmm0, xmm1       // + src argb
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jge        convertloop4
+
+  convertloop4b:
+    add        ecx, 4 - 1
+    jl         convertloop1b
+
+    // 1 pixel loop.
+  convertloop1:
+    movd       xmm3, [eax]      // src argb
+    lea        eax, [eax + 4]
+    movdqa     xmm0, xmm3       // src argb
+    pxor       xmm3, xmm4       // ~alpha
+    movd       xmm2, [esi]      // _r_b
+    psrlw      xmm3, 8          // alpha
+    pshufhw    xmm3, xmm3, 0F5h // 8 alpha words
+    pshuflw    xmm3, xmm3, 0F5h
+    pand       xmm2, xmm6       // _r_b
+    paddw      xmm3, xmm7       // 256 - alpha
+    pmullw     xmm2, xmm3       // _r_b * alpha
+    movd       xmm1, [esi]      // _a_g
+    lea        esi, [esi + 4]
+    psrlw      xmm1, 8          // _a_g
+    por        xmm0, xmm4       // set alpha to 255
+    pmullw     xmm1, xmm3       // _a_g * alpha
+    psrlw      xmm2, 8          // _r_b convert to 8 bits again
+    paddusb    xmm0, xmm2       // + src argb
+    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
+    paddusb    xmm0, xmm1       // + src argb
+    sub        ecx, 1
+    movd       [edx], xmm0
+    lea        edx, [edx + 4]
+    jge        convertloop1
+
+  convertloop1b:
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBBLENDROW_SSE2
+
+#ifdef HAS_ARGBBLENDROW_SSSE3
+// Shuffle table for isolating alpha.
+static const uvec8 kShuffleAlpha = {
+  3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
+  11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
+};
+// Same as SSE2, but replaces:
+//    psrlw      xmm3, 8          // alpha
+//    pshufhw    xmm3, xmm3, 0F5h // 8 alpha words
+//    pshuflw    xmm3, xmm3, 0F5h
+// with..
+//    pshufb     xmm3, kShuffleAlpha // alpha
+// Blend 8 pixels at a time.
+
+__declspec(naked) __declspec(align(16))
+void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
+                        uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_argb0
+    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+    pcmpeqb    xmm7, xmm7       // generate constant 0x0001
+    psrlw      xmm7, 15
+    pcmpeqb    xmm6, xmm6       // generate mask 0x00ff00ff
+    psrlw      xmm6, 8
+    pcmpeqb    xmm5, xmm5       // generate mask 0xff00ff00
+    psllw      xmm5, 8
+    pcmpeqb    xmm4, xmm4       // generate mask 0xff000000
+    pslld      xmm4, 24
+
+    sub        ecx, 1
+    je         convertloop1     // only 1 pixel?
+    jl         convertloop1b
+
+    // 1 pixel loop until destination pointer is aligned.
+  alignloop1:
+    test       edx, 15          // aligned?
+    je         alignloop1b
+    movd       xmm3, [eax]
+    lea        eax, [eax + 4]
+    movdqa     xmm0, xmm3       // src argb
+    pxor       xmm3, xmm4       // ~alpha
+    movd       xmm2, [esi]      // _r_b
+    pshufb     xmm3, kShuffleAlpha // alpha
+    pand       xmm2, xmm6       // _r_b
+    paddw      xmm3, xmm7       // 256 - alpha
+    pmullw     xmm2, xmm3       // _r_b * alpha
+    movd       xmm1, [esi]      // _a_g
+    lea        esi, [esi + 4]
+    psrlw      xmm1, 8          // _a_g
+    por        xmm0, xmm4       // set alpha to 255
+    pmullw     xmm1, xmm3       // _a_g * alpha
+    psrlw      xmm2, 8          // _r_b convert to 8 bits again
+    paddusb    xmm0, xmm2       // + src argb
+    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
+    paddusb    xmm0, xmm1       // + src argb
+    sub        ecx, 1
+    movd       [edx], xmm0
+    lea        edx, [edx + 4]
+    jge        alignloop1
+
+  alignloop1b:
+    add        ecx, 1 - 4
+    jl         convertloop4b
+
+    test       eax, 15          // unaligned?
+    jne        convertuloop4
+    test       esi, 15          // unaligned?
+    jne        convertuloop4
+
+    // 4 pixel loop.
+  convertloop4:
+    movdqa     xmm3, [eax]      // src argb
+    lea        eax, [eax + 16]
+    movdqa     xmm0, xmm3       // src argb
+    pxor       xmm3, xmm4       // ~alpha
+    movdqa     xmm2, [esi]      // _r_b
+    pshufb     xmm3, kShuffleAlpha // alpha
+    pand       xmm2, xmm6       // _r_b
+    paddw      xmm3, xmm7       // 256 - alpha
+    pmullw     xmm2, xmm3       // _r_b * alpha
+    movdqa     xmm1, [esi]      // _a_g
+    lea        esi, [esi + 16]
+    psrlw      xmm1, 8          // _a_g
+    por        xmm0, xmm4       // set alpha to 255
+    pmullw     xmm1, xmm3       // _a_g * alpha
+    psrlw      xmm2, 8          // _r_b convert to 8 bits again
+    paddusb    xmm0, xmm2       // + src argb
+    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
+    paddusb    xmm0, xmm1       // + src argb
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jge        convertloop4
+    jmp        convertloop4b
+
+    // 4 pixel unaligned loop.
+  convertuloop4:
+    movdqu     xmm3, [eax]      // src argb
+    lea        eax, [eax + 16]
+    movdqa     xmm0, xmm3       // src argb
+    pxor       xmm3, xmm4       // ~alpha
+    movdqu     xmm2, [esi]      // _r_b
+    pshufb     xmm3, kShuffleAlpha // alpha
+    pand       xmm2, xmm6       // _r_b
+    paddw      xmm3, xmm7       // 256 - alpha
+    pmullw     xmm2, xmm3       // _r_b * alpha
+    movdqu     xmm1, [esi]      // _a_g
+    lea        esi, [esi + 16]
+    psrlw      xmm1, 8          // _a_g
+    por        xmm0, xmm4       // set alpha to 255
+    pmullw     xmm1, xmm3       // _a_g * alpha
+    psrlw      xmm2, 8          // _r_b convert to 8 bits again
+    paddusb    xmm0, xmm2       // + src argb
+    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
+    paddusb    xmm0, xmm1       // + src argb
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jge        convertuloop4
+
+  convertloop4b:
+    add        ecx, 4 - 1
+    jl         convertloop1b
+
+    // 1 pixel loop.
+  convertloop1:
+    movd       xmm3, [eax]      // src argb
+    lea        eax, [eax + 4]
+    movdqa     xmm0, xmm3       // src argb
+    pxor       xmm3, xmm4       // ~alpha
+    movd       xmm2, [esi]      // _r_b
+    pshufb     xmm3, kShuffleAlpha // alpha
+    pand       xmm2, xmm6       // _r_b
+    paddw      xmm3, xmm7       // 256 - alpha
+    pmullw     xmm2, xmm3       // _r_b * alpha
+    movd       xmm1, [esi]      // _a_g
+    lea        esi, [esi + 4]
+    psrlw      xmm1, 8          // _a_g
+    por        xmm0, xmm4       // set alpha to 255
+    pmullw     xmm1, xmm3       // _a_g * alpha
+    psrlw      xmm2, 8          // _r_b convert to 8 bits again
+    paddusb    xmm0, xmm2       // + src argb
+    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
+    paddusb    xmm0, xmm1       // + src argb
+    sub        ecx, 1
+    movd       [edx], xmm0
+    lea        edx, [edx + 4]
+    jge        convertloop1
+
+  convertloop1b:
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBBLENDROW_SSSE3
+
+#ifdef HAS_ARGBATTENUATEROW_SSE2
+// Attenuate 4 pixels at a time.
+// Aligned to 16 bytes.
+__declspec(naked) __declspec(align(16))
+void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
+  __asm {
+    mov        eax, [esp + 4]   // src_argb0
+    mov        edx, [esp + 8]   // dst_argb
+    mov        ecx, [esp + 12]  // width
+    pcmpeqb    xmm4, xmm4       // generate mask 0xff000000
+    pslld      xmm4, 24
+    pcmpeqb    xmm5, xmm5       // generate mask 0x00ffffff
+    psrld      xmm5, 8
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]      // read 4 pixels
+    punpcklbw  xmm0, xmm0       // first 2
+    pshufhw    xmm2, xmm0, 0FFh // 8 alpha words
+    pshuflw    xmm2, xmm2, 0FFh
+    pmulhuw    xmm0, xmm2       // rgb * a
+    movdqa     xmm1, [eax]      // read 4 pixels
+    punpckhbw  xmm1, xmm1       // next 2 pixels
+    pshufhw    xmm2, xmm1, 0FFh // 8 alpha words
+    pshuflw    xmm2, xmm2, 0FFh
+    pmulhuw    xmm1, xmm2       // rgb * a
+    movdqa     xmm2, [eax]      // alphas
+    lea        eax, [eax + 16]
+    psrlw      xmm0, 8
+    pand       xmm2, xmm4
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    pand       xmm0, xmm5       // keep original alphas
+    por        xmm0, xmm2
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    ret
+  }
+}
+#endif  // HAS_ARGBATTENUATEROW_SSE2
+
+#ifdef HAS_ARGBATTENUATEROW_SSSE3
+// Shuffle table duplicating alpha.
+static const uvec8 kShuffleAlpha0 = {
+  3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
+};
+static const uvec8 kShuffleAlpha1 = {
+  11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
+  15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
+};
+__declspec(naked) __declspec(align(16))
+void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
+  __asm {
+    mov        eax, [esp + 4]   // src_argb0
+    mov        edx, [esp + 8]   // dst_argb
+    mov        ecx, [esp + 12]  // width
+    pcmpeqb    xmm3, xmm3       // generate mask 0xff000000
+    pslld      xmm3, 24
+    movdqa     xmm4, kShuffleAlpha0
+    movdqa     xmm5, kShuffleAlpha1
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]      // read 4 pixels
+    pshufb     xmm0, xmm4       // isolate first 2 alphas
+    movdqu     xmm1, [eax]      // read 4 pixels
+    punpcklbw  xmm1, xmm1       // first 2 pixel rgbs
+    pmulhuw    xmm0, xmm1       // rgb * a
+    movdqu     xmm1, [eax]      // read 4 pixels
+    pshufb     xmm1, xmm5       // isolate next 2 alphas
+    movdqu     xmm2, [eax]      // read 4 pixels
+    punpckhbw  xmm2, xmm2       // next 2 pixel rgbs
+    pmulhuw    xmm1, xmm2       // rgb * a
+    movdqu     xmm2, [eax]      // mask original alpha
+    lea        eax, [eax + 16]
+    pand       xmm2, xmm3
+    psrlw      xmm0, 8
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    por        xmm0, xmm2       // copy original alpha
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    ret
+  }
+}
+#endif  // HAS_ARGBATTENUATEROW_SSSE3
+
+#ifdef HAS_ARGBATTENUATEROW_AVX2
+// Shuffle table duplicating alpha.
+static const ulvec8 kShuffleAlpha_AVX2 = {
+  6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
+  14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
+  6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
+  14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
+};
+__declspec(naked) __declspec(align(16))
+void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
+  __asm {
+    mov        eax, [esp + 4]   // src_argb0
+    mov        edx, [esp + 8]   // dst_argb
+    mov        ecx, [esp + 12]  // width
+    sub        edx, eax
+    vmovdqa    ymm4, kShuffleAlpha_AVX2
+    vpcmpeqb   ymm5, ymm5, ymm5 // generate mask 0xff000000
+    vpslld     ymm5, ymm5, 24
+
+    align      4
+ convertloop:
+    vmovdqu    ymm6, [eax]       // read 8 pixels.
+    vpunpcklbw ymm0, ymm6, ymm6  // low 4 pixels. mutated.
+    vpunpckhbw ymm1, ymm6, ymm6  // high 4 pixels. mutated.
+    vpshufb    ymm2, ymm0, ymm4  // low 4 alphas
+    vpshufb    ymm3, ymm1, ymm4  // high 4 alphas
+    vpmulhuw   ymm0, ymm0, ymm2  // rgb * a
+    vpmulhuw   ymm1, ymm1, ymm3  // rgb * a
+    vpand      ymm6, ymm6, ymm5  // isolate alpha
+    vpsrlw     ymm0, ymm0, 8
+    vpsrlw     ymm1, ymm1, 8
+    vpackuswb  ymm0, ymm0, ymm1  // unmutated.
+    vpor       ymm0, ymm0, ymm6  // copy original alpha
+    sub        ecx, 8
+    vmovdqu    [eax + edx], ymm0
+    lea        eax, [eax + 32]
+    jg         convertloop
+
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBATTENUATEROW_AVX2
+
+#ifdef HAS_ARGBUNATTENUATEROW_SSE2
+// Unattenuate 4 pixels at a time.
+// Aligned to 16 bytes.
+__declspec(naked) __declspec(align(16))
+void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
+                             int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_argb0
+    mov        edx, [esp + 8 + 8]   // dst_argb
+    mov        ecx, [esp + 8 + 12]  // width
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]      // read 4 pixels
+    movzx      esi, byte ptr [eax + 3]  // first alpha
+    movzx      edi, byte ptr [eax + 7]  // second alpha
+    punpcklbw  xmm0, xmm0       // first 2
+    movd       xmm2, dword ptr fixed_invtbl8[esi * 4]
+    movd       xmm3, dword ptr fixed_invtbl8[edi * 4]
+    pshuflw    xmm2, xmm2, 040h // first 4 inv_alpha words.  1, a, a, a
+    pshuflw    xmm3, xmm3, 040h // next 4 inv_alpha words
+    movlhps    xmm2, xmm3
+    pmulhuw    xmm0, xmm2       // rgb * a
+
+    movdqu     xmm1, [eax]      // read 4 pixels
+    movzx      esi, byte ptr [eax + 11]  // third alpha
+    movzx      edi, byte ptr [eax + 15]  // forth alpha
+    punpckhbw  xmm1, xmm1       // next 2
+    movd       xmm2, dword ptr fixed_invtbl8[esi * 4]
+    movd       xmm3, dword ptr fixed_invtbl8[edi * 4]
+    pshuflw    xmm2, xmm2, 040h // first 4 inv_alpha words
+    pshuflw    xmm3, xmm3, 040h // next 4 inv_alpha words
+    movlhps    xmm2, xmm3
+    pmulhuw    xmm1, xmm2       // rgb * a
+    lea        eax, [eax + 16]
+
+    packuswb   xmm0, xmm1
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBUNATTENUATEROW_SSE2
+
+#ifdef HAS_ARGBUNATTENUATEROW_AVX2
+// Shuffle table duplicating alpha.
+static const ulvec8 kUnattenShuffleAlpha_AVX2 = {
+  0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
+  0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
+};
+// TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
+// USE_GATHER is not on by default, due to being a slow instruction.
+#ifdef USE_GATHER
+__declspec(naked) __declspec(align(16))
+void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
+                             int width) {
+  __asm {
+    mov        eax, [esp + 4]   // src_argb0
+    mov        edx, [esp + 8]   // dst_argb
+    mov        ecx, [esp + 12]  // width
+    sub        edx, eax
+    vmovdqa    ymm4, kUnattenShuffleAlpha_AVX2
+
+    align      4
+ convertloop:
+    vmovdqu    ymm6, [eax]       // read 8 pixels.
+    vpcmpeqb   ymm5, ymm5, ymm5  // generate mask 0xffffffff for gather.
+    vpsrld     ymm2, ymm6, 24    // alpha in low 8 bits.
+    vpunpcklbw ymm0, ymm6, ymm6  // low 4 pixels. mutated.
+    vpunpckhbw ymm1, ymm6, ymm6  // high 4 pixels. mutated.
+    vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5  // ymm5 cleared.  1, a
+    vpunpcklwd ymm2, ymm3, ymm3  // low 4 inverted alphas. mutated. 1, 1, a, a
+    vpunpckhwd ymm3, ymm3, ymm3  // high 4 inverted alphas. mutated.
+    vpshufb    ymm2, ymm2, ymm4  // replicate low 4 alphas. 1, a, a, a
+    vpshufb    ymm3, ymm3, ymm4  // replicate high 4 alphas
+    vpmulhuw   ymm0, ymm0, ymm2  // rgb * ia
+    vpmulhuw   ymm1, ymm1, ymm3  // rgb * ia
+    vpackuswb  ymm0, ymm0, ymm1  // unmutated.
+    sub        ecx, 8
+    vmovdqu    [eax + edx], ymm0
+    lea        eax, [eax + 32]
+    jg         convertloop
+
+    vzeroupper
+    ret
+  }
+}
+#else  // USE_GATHER
+__declspec(naked) __declspec(align(16))
+void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
+                             int width) {
+  __asm {
+
+    mov        eax, [esp + 4]   // src_argb0
+    mov        edx, [esp + 8]   // dst_argb
+    mov        ecx, [esp + 12]  // width
+    sub        edx, eax
+    vmovdqa    ymm5, kUnattenShuffleAlpha_AVX2
+
+    push       esi
+    push       edi
+
+    align      4
+ convertloop:
+    // replace VPGATHER
+    movzx      esi, byte ptr [eax + 3]                 // alpha0
+    movzx      edi, byte ptr [eax + 7]                 // alpha1
+    vmovd      xmm0, dword ptr fixed_invtbl8[esi * 4]  // [1,a0]
+    vmovd      xmm1, dword ptr fixed_invtbl8[edi * 4]  // [1,a1]
+    movzx      esi, byte ptr [eax + 11]                // alpha2
+    movzx      edi, byte ptr [eax + 15]                // alpha3
+    vpunpckldq xmm6, xmm0, xmm1                        // [1,a1,1,a0]
+    vmovd      xmm2, dword ptr fixed_invtbl8[esi * 4]  // [1,a2]
+    vmovd      xmm3, dword ptr fixed_invtbl8[edi * 4]  // [1,a3]
+    movzx      esi, byte ptr [eax + 19]                // alpha4
+    movzx      edi, byte ptr [eax + 23]                // alpha5
+    vpunpckldq xmm7, xmm2, xmm3                        // [1,a3,1,a2]
+    vmovd      xmm0, dword ptr fixed_invtbl8[esi * 4]  // [1,a4]
+    vmovd      xmm1, dword ptr fixed_invtbl8[edi * 4]  // [1,a5]
+    movzx      esi, byte ptr [eax + 27]                // alpha6
+    movzx      edi, byte ptr [eax + 31]                // alpha7
+    vpunpckldq xmm0, xmm0, xmm1                        // [1,a5,1,a4]
+    vmovd      xmm2, dword ptr fixed_invtbl8[esi * 4]  // [1,a6]
+    vmovd      xmm3, dword ptr fixed_invtbl8[edi * 4]  // [1,a7]
+    vpunpckldq xmm2, xmm2, xmm3                        // [1,a7,1,a6]
+    vpunpcklqdq xmm3, xmm6, xmm7                       // [1,a3,1,a2,1,a1,1,a0]
+    vpunpcklqdq xmm0, xmm0, xmm2                       // [1,a7,1,a6,1,a5,1,a4]
+    vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0]
+    // end of VPGATHER
+
+    vmovdqu    ymm6, [eax]       // read 8 pixels.
+    vpunpcklbw ymm0, ymm6, ymm6  // low 4 pixels. mutated.
+    vpunpckhbw ymm1, ymm6, ymm6  // high 4 pixels. mutated.
+    vpunpcklwd ymm2, ymm3, ymm3  // low 4 inverted alphas. mutated. 1, 1, a, a
+    vpunpckhwd ymm3, ymm3, ymm3  // high 4 inverted alphas. mutated.
+    vpshufb    ymm2, ymm2, ymm5  // replicate low 4 alphas. 1, a, a, a
+    vpshufb    ymm3, ymm3, ymm5  // replicate high 4 alphas
+    vpmulhuw   ymm0, ymm0, ymm2  // rgb * ia
+    vpmulhuw   ymm1, ymm1, ymm3  // rgb * ia
+    vpackuswb  ymm0, ymm0, ymm1  // unmutated.
+    sub        ecx, 8
+    vmovdqu    [eax + edx], ymm0
+    lea        eax, [eax + 32]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    vzeroupper
+    ret
+  }
+}
+#endif  // USE_GATHER
+#endif  // HAS_ARGBATTENUATEROW_AVX2
+
+#ifdef HAS_ARGBGRAYROW_SSSE3
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
+__declspec(naked) __declspec(align(16))
+void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_argb */
+    mov        ecx, [esp + 12]  /* width */
+    movdqa     xmm4, kARGBToYJ
+    movdqa     xmm5, kAddYJ64
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]  // G
+    movdqa     xmm1, [eax + 16]
+    pmaddubsw  xmm0, xmm4
+    pmaddubsw  xmm1, xmm4
+    phaddw     xmm0, xmm1
+    paddw      xmm0, xmm5  // Add .5 for rounding.
+    psrlw      xmm0, 7
+    packuswb   xmm0, xmm0   // 8 G bytes
+    movdqa     xmm2, [eax]  // A
+    movdqa     xmm3, [eax + 16]
+    lea        eax, [eax + 32]
+    psrld      xmm2, 24
+    psrld      xmm3, 24
+    packuswb   xmm2, xmm3
+    packuswb   xmm2, xmm2   // 8 A bytes
+    movdqa     xmm3, xmm0   // Weave into GG, GA, then GGGA
+    punpcklbw  xmm0, xmm0   // 8 GG words
+    punpcklbw  xmm3, xmm2   // 8 GA words
+    movdqa     xmm1, xmm0
+    punpcklwd  xmm0, xmm3   // GGGA first 4
+    punpckhwd  xmm1, xmm3   // GGGA next 4
+    sub        ecx, 8
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx, [edx + 32]
+    jg         convertloop
+    ret
+  }
+}
+#endif  // HAS_ARGBGRAYROW_SSSE3
+
+#ifdef HAS_ARGBSEPIAROW_SSSE3
+//    b = (r * 35 + g * 68 + b * 17) >> 7
+//    g = (r * 45 + g * 88 + b * 22) >> 7
+//    r = (r * 50 + g * 98 + b * 24) >> 7
+// Constant for ARGB color to sepia tone.
+static const vec8 kARGBToSepiaB = {
+  17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
+};
+
+static const vec8 kARGBToSepiaG = {
+  22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
+};
+
+static const vec8 kARGBToSepiaR = {
+  24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
+};
+
+// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
+__declspec(naked) __declspec(align(16))
+void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
+  __asm {
+    mov        eax, [esp + 4]   /* dst_argb */
+    mov        ecx, [esp + 8]   /* width */
+    movdqa     xmm2, kARGBToSepiaB
+    movdqa     xmm3, kARGBToSepiaG
+    movdqa     xmm4, kARGBToSepiaR
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]  // B
+    movdqa     xmm6, [eax + 16]
+    pmaddubsw  xmm0, xmm2
+    pmaddubsw  xmm6, xmm2
+    phaddw     xmm0, xmm6
+    psrlw      xmm0, 7
+    packuswb   xmm0, xmm0   // 8 B values
+    movdqa     xmm5, [eax]  // G
+    movdqa     xmm1, [eax + 16]
+    pmaddubsw  xmm5, xmm3
+    pmaddubsw  xmm1, xmm3
+    phaddw     xmm5, xmm1
+    psrlw      xmm5, 7
+    packuswb   xmm5, xmm5   // 8 G values
+    punpcklbw  xmm0, xmm5   // 8 BG values
+    movdqa     xmm5, [eax]  // R
+    movdqa     xmm1, [eax + 16]
+    pmaddubsw  xmm5, xmm4
+    pmaddubsw  xmm1, xmm4
+    phaddw     xmm5, xmm1
+    psrlw      xmm5, 7
+    packuswb   xmm5, xmm5   // 8 R values
+    movdqa     xmm6, [eax]  // A
+    movdqa     xmm1, [eax + 16]
+    psrld      xmm6, 24
+    psrld      xmm1, 24
+    packuswb   xmm6, xmm1
+    packuswb   xmm6, xmm6   // 8 A values
+    punpcklbw  xmm5, xmm6   // 8 RA values
+    movdqa     xmm1, xmm0   // Weave BG, RA together
+    punpcklwd  xmm0, xmm5   // BGRA first 4
+    punpckhwd  xmm1, xmm5   // BGRA next 4
+    sub        ecx, 8
+    movdqa     [eax], xmm0
+    movdqa     [eax + 16], xmm1
+    lea        eax, [eax + 32]
+    jg         convertloop
+    ret
+  }
+}
+#endif  // HAS_ARGBSEPIAROW_SSSE3
+
+#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
+// Tranform 8 ARGB pixels (32 bytes) with color matrix.
+// Same as Sepia except matrix is provided.
+// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
+// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
+__declspec(naked) __declspec(align(16))
+void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                              const int8* matrix_argb, int width) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_argb */
+    mov        ecx, [esp + 12]  /* matrix_argb */
+    movdqu     xmm5, [ecx]
+    pshufd     xmm2, xmm5, 0x00
+    pshufd     xmm3, xmm5, 0x55
+    pshufd     xmm4, xmm5, 0xaa
+    pshufd     xmm5, xmm5, 0xff
+    mov        ecx, [esp + 16]  /* width */
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]  // B
+    movdqa     xmm7, [eax + 16]
+    pmaddubsw  xmm0, xmm2
+    pmaddubsw  xmm7, xmm2
+    movdqa     xmm6, [eax]  // G
+    movdqa     xmm1, [eax + 16]
+    pmaddubsw  xmm6, xmm3
+    pmaddubsw  xmm1, xmm3
+    phaddsw    xmm0, xmm7   // B
+    phaddsw    xmm6, xmm1   // G
+    psraw      xmm0, 6      // B
+    psraw      xmm6, 6      // G
+    packuswb   xmm0, xmm0   // 8 B values
+    packuswb   xmm6, xmm6   // 8 G values
+    punpcklbw  xmm0, xmm6   // 8 BG values
+    movdqa     xmm1, [eax]  // R
+    movdqa     xmm7, [eax + 16]
+    pmaddubsw  xmm1, xmm4
+    pmaddubsw  xmm7, xmm4
+    phaddsw    xmm1, xmm7   // R
+    movdqa     xmm6, [eax]  // A
+    movdqa     xmm7, [eax + 16]
+    pmaddubsw  xmm6, xmm5
+    pmaddubsw  xmm7, xmm5
+    phaddsw    xmm6, xmm7   // A
+    psraw      xmm1, 6      // R
+    psraw      xmm6, 6      // A
+    packuswb   xmm1, xmm1   // 8 R values
+    packuswb   xmm6, xmm6   // 8 A values
+    punpcklbw  xmm1, xmm6   // 8 RA values
+    movdqa     xmm6, xmm0   // Weave BG, RA together
+    punpcklwd  xmm0, xmm1   // BGRA first 4
+    punpckhwd  xmm6, xmm1   // BGRA next 4
+    sub        ecx, 8
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm6
+    lea        eax, [eax + 32]
+    lea        edx, [edx + 32]
+    jg         convertloop
+    ret
+  }
+}
+#endif  // HAS_ARGBCOLORMATRIXROW_SSSE3
+
+#ifdef HAS_ARGBQUANTIZEROW_SSE2
+// Quantize 4 ARGB pixels (16 bytes).
+// Aligned to 16 bytes.
+__declspec(naked) __declspec(align(16))
+void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
+                          int interval_offset, int width) {
+  __asm {
+    mov        eax, [esp + 4]    /* dst_argb */
+    movd       xmm2, [esp + 8]   /* scale */
+    movd       xmm3, [esp + 12]  /* interval_size */
+    movd       xmm4, [esp + 16]  /* interval_offset */
+    mov        ecx, [esp + 20]   /* width */
+    pshuflw    xmm2, xmm2, 040h
+    pshufd     xmm2, xmm2, 044h
+    pshuflw    xmm3, xmm3, 040h
+    pshufd     xmm3, xmm3, 044h
+    pshuflw    xmm4, xmm4, 040h
+    pshufd     xmm4, xmm4, 044h
+    pxor       xmm5, xmm5  // constant 0
+    pcmpeqb    xmm6, xmm6  // generate mask 0xff000000
+    pslld      xmm6, 24
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]  // read 4 pixels
+    punpcklbw  xmm0, xmm5   // first 2 pixels
+    pmulhuw    xmm0, xmm2   // pixel * scale >> 16
+    movdqa     xmm1, [eax]  // read 4 pixels
+    punpckhbw  xmm1, xmm5   // next 2 pixels
+    pmulhuw    xmm1, xmm2
+    pmullw     xmm0, xmm3   // * interval_size
+    movdqa     xmm7, [eax]  // read 4 pixels
+    pmullw     xmm1, xmm3
+    pand       xmm7, xmm6   // mask alpha
+    paddw      xmm0, xmm4   // + interval_size / 2
+    paddw      xmm1, xmm4
+    packuswb   xmm0, xmm1
+    por        xmm0, xmm7
+    sub        ecx, 4
+    movdqa     [eax], xmm0
+    lea        eax, [eax + 16]
+    jg         convertloop
+    ret
+  }
+}
+#endif  // HAS_ARGBQUANTIZEROW_SSE2
+
+#ifdef HAS_ARGBSHADEROW_SSE2
+// Shade 4 pixels at a time by specified value.
+// Aligned to 16 bytes.
+__declspec(naked) __declspec(align(16))
+void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
+                       uint32 value) {
+  __asm {
+    mov        eax, [esp + 4]   // src_argb
+    mov        edx, [esp + 8]   // dst_argb
+    mov        ecx, [esp + 12]  // width
+    movd       xmm2, [esp + 16]  // value
+    punpcklbw  xmm2, xmm2
+    punpcklqdq xmm2, xmm2
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]      // read 4 pixels
+    lea        eax, [eax + 16]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm0       // first 2
+    punpckhbw  xmm1, xmm1       // next 2
+    pmulhuw    xmm0, xmm2       // argb * value
+    pmulhuw    xmm1, xmm2       // argb * value
+    psrlw      xmm0, 8
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    ret
+  }
+}
+#endif  // HAS_ARGBSHADEROW_SSE2
+
+#ifdef HAS_ARGBMULTIPLYROW_SSE2
+// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
+__declspec(naked) __declspec(align(16))
+void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_argb0
+    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+    pxor       xmm5, xmm5  // constant 0
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]        // read 4 pixels from src_argb0
+    movdqu     xmm2, [esi]        // read 4 pixels from src_argb1
+    movdqu     xmm1, xmm0
+    movdqu     xmm3, xmm2
+    punpcklbw  xmm0, xmm0         // first 2
+    punpckhbw  xmm1, xmm1         // next 2
+    punpcklbw  xmm2, xmm5         // first 2
+    punpckhbw  xmm3, xmm5         // next 2
+    pmulhuw    xmm0, xmm2         // src_argb0 * src_argb1 first 2
+    pmulhuw    xmm1, xmm3         // src_argb0 * src_argb1 next 2
+    lea        eax, [eax + 16]
+    lea        esi, [esi + 16]
+    packuswb   xmm0, xmm1
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBMULTIPLYROW_SSE2
+
+#ifdef HAS_ARGBADDROW_SSE2
+// Add 2 rows of ARGB pixels together, 4 pixels at a time.
+// TODO(fbarchard): Port this to posix, neon and other math functions.
+__declspec(naked) __declspec(align(16))
+void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+                     uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_argb0
+    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+
+    sub        ecx, 4
+    jl         convertloop49
+
+    align      4
+ convertloop4:
+    movdqu     xmm0, [eax]        // read 4 pixels from src_argb0
+    lea        eax, [eax + 16]
+    movdqu     xmm1, [esi]        // read 4 pixels from src_argb1
+    lea        esi, [esi + 16]
+    paddusb    xmm0, xmm1         // src_argb0 + src_argb1
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jge        convertloop4
+
+ convertloop49:
+    add        ecx, 4 - 1
+    jl         convertloop19
+
+ convertloop1:
+    movd       xmm0, [eax]        // read 1 pixels from src_argb0
+    lea        eax, [eax + 4]
+    movd       xmm1, [esi]        // read 1 pixels from src_argb1
+    lea        esi, [esi + 4]
+    paddusb    xmm0, xmm1         // src_argb0 + src_argb1
+    sub        ecx, 1
+    movd       [edx], xmm0
+    lea        edx, [edx + 4]
+    jge        convertloop1
+
+ convertloop19:
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBADDROW_SSE2
+
+#ifdef HAS_ARGBSUBTRACTROW_SSE2
+// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
+__declspec(naked) __declspec(align(16))
+void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_argb0
+    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+
+    align      4
+ convertloop:
+    movdqu     xmm0, [eax]        // read 4 pixels from src_argb0
+    lea        eax, [eax + 16]
+    movdqu     xmm1, [esi]        // read 4 pixels from src_argb1
+    lea        esi, [esi + 16]
+    psubusb    xmm0, xmm1         // src_argb0 - src_argb1
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBSUBTRACTROW_SSE2
+
+#ifdef HAS_ARGBMULTIPLYROW_AVX2
+// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
+__declspec(naked) __declspec(align(16))
+void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_argb0
+    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+    vpxor      ymm5, ymm5, ymm5     // constant 0
+
+    align      4
+ convertloop:
+    vmovdqu    ymm1, [eax]        // read 8 pixels from src_argb0
+    lea        eax, [eax + 32]
+    vmovdqu    ymm3, [esi]        // read 8 pixels from src_argb1
+    lea        esi, [esi + 32]
+    vpunpcklbw ymm0, ymm1, ymm1   // low 4
+    vpunpckhbw ymm1, ymm1, ymm1   // high 4
+    vpunpcklbw ymm2, ymm3, ymm5   // low 4
+    vpunpckhbw ymm3, ymm3, ymm5   // high 4
+    vpmulhuw   ymm0, ymm0, ymm2   // src_argb0 * src_argb1 low 4
+    vpmulhuw   ymm1, ymm1, ymm3   // src_argb0 * src_argb1 high 4
+    vpackuswb  ymm0, ymm0, ymm1
+    vmovdqu    [edx], ymm0
+    lea        edx, [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        esi
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBMULTIPLYROW_AVX2
+
+#ifdef HAS_ARGBADDROW_AVX2
+// Add 2 rows of ARGB pixels together, 8 pixels at a time.
+__declspec(naked) __declspec(align(16))
+void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
+                     uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_argb0
+    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+
+    align      4
+ convertloop:
+    vmovdqu    ymm0, [eax]              // read 8 pixels from src_argb0
+    lea        eax, [eax + 32]
+    vpaddusb   ymm0, ymm0, [esi]        // add 8 pixels from src_argb1
+    lea        esi, [esi + 32]
+    vmovdqu    [edx], ymm0
+    lea        edx, [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        esi
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBADDROW_AVX2
+
+#ifdef HAS_ARGBSUBTRACTROW_AVX2
+// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
+__declspec(naked) __declspec(align(16))
+void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
+                          uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_argb0
+    mov        esi, [esp + 4 + 8]   // src_argb1
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+
+    align      4
+ convertloop:
+    vmovdqu    ymm0, [eax]              // read 8 pixels from src_argb0
+    lea        eax, [eax + 32]
+    vpsubusb   ymm0, ymm0, [esi]        // src_argb0 - src_argb1
+    lea        esi, [esi + 32]
+    vmovdqu    [edx], ymm0
+    lea        edx, [edx + 32]
+    sub        ecx, 8
+    jg         convertloop
+
+    pop        esi
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBSUBTRACTROW_AVX2
+
+#ifdef HAS_SOBELXROW_SSE2
+// SobelX as a matrix is
+// -1  0  1
+// -2  0  2
+// -1  0  1
+__declspec(naked) __declspec(align(16))
+void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
+                    const uint8* src_y2, uint8* dst_sobelx, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   // src_y0
+    mov        esi, [esp + 8 + 8]   // src_y1
+    mov        edi, [esp + 8 + 12]  // src_y2
+    mov        edx, [esp + 8 + 16]  // dst_sobelx
+    mov        ecx, [esp + 8 + 20]  // width
+    sub        esi, eax
+    sub        edi, eax
+    sub        edx, eax
+    pxor       xmm5, xmm5  // constant 0
+
+    align      4
+ convertloop:
+    movq       xmm0, qword ptr [eax]            // read 8 pixels from src_y0[0]
+    movq       xmm1, qword ptr [eax + 2]        // read 8 pixels from src_y0[2]
+    punpcklbw  xmm0, xmm5
+    punpcklbw  xmm1, xmm5
+    psubw      xmm0, xmm1
+    movq       xmm1, qword ptr [eax + esi]      // read 8 pixels from src_y1[0]
+    movq       xmm2, qword ptr [eax + esi + 2]  // read 8 pixels from src_y1[2]
+    punpcklbw  xmm1, xmm5
+    punpcklbw  xmm2, xmm5
+    psubw      xmm1, xmm2
+    movq       xmm2, qword ptr [eax + edi]      // read 8 pixels from src_y2[0]
+    movq       xmm3, qword ptr [eax + edi + 2]  // read 8 pixels from src_y2[2]
+    punpcklbw  xmm2, xmm5
+    punpcklbw  xmm3, xmm5
+    psubw      xmm2, xmm3
+    paddw      xmm0, xmm2
+    paddw      xmm0, xmm1
+    paddw      xmm0, xmm1
+    pxor       xmm1, xmm1   // abs = max(xmm0, -xmm0).  SSSE3 could use pabsw
+    psubw      xmm1, xmm0
+    pmaxsw     xmm0, xmm1
+    packuswb   xmm0, xmm0
+    sub        ecx, 8
+    movq       qword ptr [eax + edx], xmm0
+    lea        eax, [eax + 8]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_SOBELXROW_SSE2
+
+#ifdef HAS_SOBELYROW_SSE2
+// SobelY as a matrix is
+// -1 -2 -1
+//  0  0  0
+//  1  2  1
+__declspec(naked) __declspec(align(16))
+void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
+                    uint8* dst_sobely, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_y0
+    mov        esi, [esp + 4 + 8]   // src_y1
+    mov        edx, [esp + 4 + 12]  // dst_sobely
+    mov        ecx, [esp + 4 + 16]  // width
+    sub        esi, eax
+    sub        edx, eax
+    pxor       xmm5, xmm5  // constant 0
+
+    align      4
+ convertloop:
+    movq       xmm0, qword ptr [eax]            // read 8 pixels from src_y0[0]
+    movq       xmm1, qword ptr [eax + esi]      // read 8 pixels from src_y1[0]
+    punpcklbw  xmm0, xmm5
+    punpcklbw  xmm1, xmm5
+    psubw      xmm0, xmm1
+    movq       xmm1, qword ptr [eax + 1]        // read 8 pixels from src_y0[1]
+    movq       xmm2, qword ptr [eax + esi + 1]  // read 8 pixels from src_y1[1]
+    punpcklbw  xmm1, xmm5
+    punpcklbw  xmm2, xmm5
+    psubw      xmm1, xmm2
+    movq       xmm2, qword ptr [eax + 2]        // read 8 pixels from src_y0[2]
+    movq       xmm3, qword ptr [eax + esi + 2]  // read 8 pixels from src_y1[2]
+    punpcklbw  xmm2, xmm5
+    punpcklbw  xmm3, xmm5
+    psubw      xmm2, xmm3
+    paddw      xmm0, xmm2
+    paddw      xmm0, xmm1
+    paddw      xmm0, xmm1
+    pxor       xmm1, xmm1   // abs = max(xmm0, -xmm0).  SSSE3 could use pabsw
+    psubw      xmm1, xmm0
+    pmaxsw     xmm0, xmm1
+    packuswb   xmm0, xmm0
+    sub        ecx, 8
+    movq       qword ptr [eax + edx], xmm0
+    lea        eax, [eax + 8]
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_SOBELYROW_SSE2
+
+#ifdef HAS_SOBELROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
+// A = 255
+// R = Sobel
+// G = Sobel
+// B = Sobel
+__declspec(naked) __declspec(align(16))
+void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                   uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_sobelx
+    mov        esi, [esp + 4 + 8]   // src_sobely
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+    sub        esi, eax
+    pcmpeqb    xmm5, xmm5           // alpha 255
+    pslld      xmm5, 24             // 0xff000000
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]            // read 16 pixels src_sobelx
+    movdqa     xmm1, [eax + esi]      // read 16 pixels src_sobely
+    lea        eax, [eax + 16]
+    paddusb    xmm0, xmm1             // sobel = sobelx + sobely
+    movdqa     xmm2, xmm0             // GG
+    punpcklbw  xmm2, xmm0             // First 8
+    punpckhbw  xmm0, xmm0             // Next 8
+    movdqa     xmm1, xmm2             // GGGG
+    punpcklwd  xmm1, xmm2             // First 4
+    punpckhwd  xmm2, xmm2             // Next 4
+    por        xmm1, xmm5             // GGGA
+    por        xmm2, xmm5
+    movdqa     xmm3, xmm0             // GGGG
+    punpcklwd  xmm3, xmm0             // Next 4
+    punpckhwd  xmm0, xmm0             // Last 4
+    por        xmm3, xmm5             // GGGA
+    por        xmm0, xmm5
+    sub        ecx, 16
+    movdqa     [edx], xmm1
+    movdqa     [edx + 16], xmm2
+    movdqa     [edx + 32], xmm3
+    movdqa     [edx + 48], xmm0
+    lea        edx, [edx + 64]
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_SOBELROW_SSE2
+
+#ifdef HAS_SOBELTOPLANEROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into a plane.
+__declspec(naked) __declspec(align(16))
+void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_sobelx
+    mov        esi, [esp + 4 + 8]   // src_sobely
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+    sub        esi, eax
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]            // read 16 pixels src_sobelx
+    movdqa     xmm1, [eax + esi]      // read 16 pixels src_sobely
+    lea        eax, [eax + 16]
+    paddusb    xmm0, xmm1             // sobel = sobelx + sobely
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_SOBELTOPLANEROW_SSE2
+
+#ifdef HAS_SOBELXYROW_SSE2
+// Mixes Sobel X, Sobel Y and Sobel into ARGB.
+// A = 255
+// R = Sobel X
+// G = Sobel
+// B = Sobel Y
+__declspec(naked) __declspec(align(16))
+void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                     uint8* dst_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_sobelx
+    mov        esi, [esp + 4 + 8]   // src_sobely
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+    sub        esi, eax
+    pcmpeqb    xmm5, xmm5           // alpha 255
+
+    align      4
+ convertloop:
+    movdqa     xmm0, [eax]            // read 16 pixels src_sobelx
+    movdqa     xmm1, [eax + esi]      // read 16 pixels src_sobely
+    lea        eax, [eax + 16]
+    movdqa     xmm2, xmm0
+    paddusb    xmm2, xmm1             // sobel = sobelx + sobely
+    movdqa     xmm3, xmm0             // XA
+    punpcklbw  xmm3, xmm5
+    punpckhbw  xmm0, xmm5
+    movdqa     xmm4, xmm1             // YS
+    punpcklbw  xmm4, xmm2
+    punpckhbw  xmm1, xmm2
+    movdqa     xmm6, xmm4             // YSXA
+    punpcklwd  xmm6, xmm3             // First 4
+    punpckhwd  xmm4, xmm3             // Next 4
+    movdqa     xmm7, xmm1             // YSXA
+    punpcklwd  xmm7, xmm0             // Next 4
+    punpckhwd  xmm1, xmm0             // Last 4
+    sub        ecx, 16
+    movdqa     [edx], xmm6
+    movdqa     [edx + 16], xmm4
+    movdqa     [edx + 32], xmm7
+    movdqa     [edx + 48], xmm1
+    lea        edx, [edx + 64]
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_SOBELXYROW_SSE2
+
+#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+// Consider float CumulativeSum.
+// Consider calling CumulativeSum one row at time as needed.
+// Consider circular CumulativeSum buffer of radius * 2 + 1 height.
+// Convert cumulative sum for an area to an average for 1 pixel.
+// topleft is pointer to top left of CumulativeSum buffer for area.
+// botleft is pointer to bottom left of CumulativeSum buffer.
+// width is offset from left to right of area in CumulativeSum buffer measured
+//   in number of ints.
+// area is the number of pixels in the area being averaged.
+// dst points to pixel to store result to.
+// count is number of averaged pixels to produce.
+// Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte
+// aligned.
+void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
+                                    int width, int area, uint8* dst,
+                                    int count) {
+  __asm {
+    mov        eax, topleft  // eax topleft
+    mov        esi, botleft  // esi botleft
+    mov        edx, width
+    movd       xmm5, area
+    mov        edi, dst
+    mov        ecx, count
+    cvtdq2ps   xmm5, xmm5
+    rcpss      xmm4, xmm5  // 1.0f / area
+    pshufd     xmm4, xmm4, 0
+    sub        ecx, 4
+    jl         l4b
+
+    cmp        area, 128  // 128 pixels will not overflow 15 bits.
+    ja         l4
+
+    pshufd     xmm5, xmm5, 0        // area
+    pcmpeqb    xmm6, xmm6           // constant of 65536.0 - 1 = 65535.0
+    psrld      xmm6, 16
+    cvtdq2ps   xmm6, xmm6
+    addps      xmm5, xmm6           // (65536.0 + area - 1)
+    mulps      xmm5, xmm4           // (65536.0 + area - 1) * 1 / area
+    cvtps2dq   xmm5, xmm5           // 0.16 fixed point
+    packssdw   xmm5, xmm5           // 16 bit shorts
+
+    // 4 pixel loop small blocks.
+    align      4
+  s4:
+    // top left
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+
+    // - top right
+    psubd      xmm0, [eax + edx * 4]
+    psubd      xmm1, [eax + edx * 4 + 16]
+    psubd      xmm2, [eax + edx * 4 + 32]
+    psubd      xmm3, [eax + edx * 4 + 48]
+    lea        eax, [eax + 64]
+
+    // - bottom left
+    psubd      xmm0, [esi]
+    psubd      xmm1, [esi + 16]
+    psubd      xmm2, [esi + 32]
+    psubd      xmm3, [esi + 48]
+
+    // + bottom right
+    paddd      xmm0, [esi + edx * 4]
+    paddd      xmm1, [esi + edx * 4 + 16]
+    paddd      xmm2, [esi + edx * 4 + 32]
+    paddd      xmm3, [esi + edx * 4 + 48]
+    lea        esi, [esi + 64]
+
+    packssdw   xmm0, xmm1  // pack 4 pixels into 2 registers
+    packssdw   xmm2, xmm3
+
+    pmulhuw    xmm0, xmm5
+    pmulhuw    xmm2, xmm5
+
+    packuswb   xmm0, xmm2
+    movdqu     [edi], xmm0
+    lea        edi, [edi + 16]
+    sub        ecx, 4
+    jge        s4
+
+    jmp        l4b
+
+    // 4 pixel loop
+    align      4
+  l4:
+    // top left
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + 32]
+    movdqa     xmm3, [eax + 48]
+
+    // - top right
+    psubd      xmm0, [eax + edx * 4]
+    psubd      xmm1, [eax + edx * 4 + 16]
+    psubd      xmm2, [eax + edx * 4 + 32]
+    psubd      xmm3, [eax + edx * 4 + 48]
+    lea        eax, [eax + 64]
+
+    // - bottom left
+    psubd      xmm0, [esi]
+    psubd      xmm1, [esi + 16]
+    psubd      xmm2, [esi + 32]
+    psubd      xmm3, [esi + 48]
+
+    // + bottom right
+    paddd      xmm0, [esi + edx * 4]
+    paddd      xmm1, [esi + edx * 4 + 16]
+    paddd      xmm2, [esi + edx * 4 + 32]
+    paddd      xmm3, [esi + edx * 4 + 48]
+    lea        esi, [esi + 64]
+
+    cvtdq2ps   xmm0, xmm0   // Average = Sum * 1 / Area
+    cvtdq2ps   xmm1, xmm1
+    mulps      xmm0, xmm4
+    mulps      xmm1, xmm4
+    cvtdq2ps   xmm2, xmm2
+    cvtdq2ps   xmm3, xmm3
+    mulps      xmm2, xmm4
+    mulps      xmm3, xmm4
+    cvtps2dq   xmm0, xmm0
+    cvtps2dq   xmm1, xmm1
+    cvtps2dq   xmm2, xmm2
+    cvtps2dq   xmm3, xmm3
+    packssdw   xmm0, xmm1
+    packssdw   xmm2, xmm3
+    packuswb   xmm0, xmm2
+    movdqu     [edi], xmm0
+    lea        edi, [edi + 16]
+    sub        ecx, 4
+    jge        l4
+
+  l4b:
+    add        ecx, 4 - 1
+    jl         l1b
+
+    // 1 pixel loop
+    align      4
+  l1:
+    movdqa     xmm0, [eax]
+    psubd      xmm0, [eax + edx * 4]
+    lea        eax, [eax + 16]
+    psubd      xmm0, [esi]
+    paddd      xmm0, [esi + edx * 4]
+    lea        esi, [esi + 16]
+    cvtdq2ps   xmm0, xmm0
+    mulps      xmm0, xmm4
+    cvtps2dq   xmm0, xmm0
+    packssdw   xmm0, xmm0
+    packuswb   xmm0, xmm0
+    movd       dword ptr [edi], xmm0
+    lea        edi, [edi + 4]
+    sub        ecx, 1
+    jge        l1
+  l1b:
+  }
+}
+#endif  // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
+
+#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
+// Creates a table of cumulative sums where each value is a sum of all values
+// above and to the left of the value.
+void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
+                                  const int32* previous_cumsum, int width) {
+  __asm {
+    mov        eax, row
+    mov        edx, cumsum
+    mov        esi, previous_cumsum
+    mov        ecx, width
+    pxor       xmm0, xmm0
+    pxor       xmm1, xmm1
+
+    sub        ecx, 4
+    jl         l4b
+    test       edx, 15
+    jne        l4b
+
+    // 4 pixel loop
+    align      4
+  l4:
+    movdqu     xmm2, [eax]  // 4 argb pixels 16 bytes.
+    lea        eax, [eax + 16]
+    movdqa     xmm4, xmm2
+
+    punpcklbw  xmm2, xmm1
+    movdqa     xmm3, xmm2
+    punpcklwd  xmm2, xmm1
+    punpckhwd  xmm3, xmm1
+
+    punpckhbw  xmm4, xmm1
+    movdqa     xmm5, xmm4
+    punpcklwd  xmm4, xmm1
+    punpckhwd  xmm5, xmm1
+
+    paddd      xmm0, xmm2
+    movdqa     xmm2, [esi]  // previous row above.
+    paddd      xmm2, xmm0
+
+    paddd      xmm0, xmm3
+    movdqa     xmm3, [esi + 16]
+    paddd      xmm3, xmm0
+
+    paddd      xmm0, xmm4
+    movdqa     xmm4, [esi + 32]
+    paddd      xmm4, xmm0
+
+    paddd      xmm0, xmm5
+    movdqa     xmm5, [esi + 48]
+    lea        esi, [esi + 64]
+    paddd      xmm5, xmm0
+
+    movdqa     [edx], xmm2
+    movdqa     [edx + 16], xmm3
+    movdqa     [edx + 32], xmm4
+    movdqa     [edx + 48], xmm5
+
+    lea        edx, [edx + 64]
+    sub        ecx, 4
+    jge        l4
+
+  l4b:
+    add        ecx, 4 - 1
+    jl         l1b
+
+    // 1 pixel loop
+    align      4
+  l1:
+    movd       xmm2, dword ptr [eax]  // 1 argb pixel 4 bytes.
+    lea        eax, [eax + 4]
+    punpcklbw  xmm2, xmm1
+    punpcklwd  xmm2, xmm1
+    paddd      xmm0, xmm2
+    movdqu     xmm2, [esi]
+    lea        esi, [esi + 16]
+    paddd      xmm2, xmm0
+    movdqu     [edx], xmm2
+    lea        edx, [edx + 16]
+    sub        ecx, 1
+    jge        l1
+
+ l1b:
+  }
+}
+#endif  // HAS_COMPUTECUMULATIVESUMROW_SSE2
+
+#ifdef HAS_ARGBAFFINEROW_SSE2
+// Copy ARGB pixels from source image with slope to a row of destination.
+__declspec(naked) __declspec(align(16))
+LIBYUV_API
+void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
+                        uint8* dst_argb, const float* uv_dudv, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 12]  // src_argb
+    mov        esi, [esp + 16]  // stride
+    mov        edx, [esp + 20]  // dst_argb
+    mov        ecx, [esp + 24]  // pointer to uv_dudv
+    movq       xmm2, qword ptr [ecx]  // uv
+    movq       xmm7, qword ptr [ecx + 8]  // dudv
+    mov        ecx, [esp + 28]  // width
+    shl        esi, 16          // 4, stride
+    add        esi, 4
+    movd       xmm5, esi
+    sub        ecx, 4
+    jl         l4b
+
+    // setup for 4 pixel loop
+    pshufd     xmm7, xmm7, 0x44  // dup dudv
+    pshufd     xmm5, xmm5, 0  // dup 4, stride
+    movdqa     xmm0, xmm2    // x0, y0, x1, y1
+    addps      xmm0, xmm7
+    movlhps    xmm2, xmm0
+    movdqa     xmm4, xmm7
+    addps      xmm4, xmm4    // dudv *= 2
+    movdqa     xmm3, xmm2    // x2, y2, x3, y3
+    addps      xmm3, xmm4
+    addps      xmm4, xmm4    // dudv *= 4
+
+    // 4 pixel loop
+    align      4
+  l4:
+    cvttps2dq  xmm0, xmm2    // x, y float to int first 2
+    cvttps2dq  xmm1, xmm3    // x, y float to int next 2
+    packssdw   xmm0, xmm1    // x, y as 8 shorts
+    pmaddwd    xmm0, xmm5    // offsets = x * 4 + y * stride.
+    movd       esi, xmm0
+    pshufd     xmm0, xmm0, 0x39  // shift right
+    movd       edi, xmm0
+    pshufd     xmm0, xmm0, 0x39  // shift right
+    movd       xmm1, [eax + esi]  // read pixel 0
+    movd       xmm6, [eax + edi]  // read pixel 1
+    punpckldq  xmm1, xmm6     // combine pixel 0 and 1
+    addps      xmm2, xmm4    // x, y += dx, dy first 2
+    movq       qword ptr [edx], xmm1
+    movd       esi, xmm0
+    pshufd     xmm0, xmm0, 0x39  // shift right
+    movd       edi, xmm0
+    movd       xmm6, [eax + esi]  // read pixel 2
+    movd       xmm0, [eax + edi]  // read pixel 3
+    punpckldq  xmm6, xmm0     // combine pixel 2 and 3
+    addps      xmm3, xmm4    // x, y += dx, dy next 2
+    sub        ecx, 4
+    movq       qword ptr 8[edx], xmm6
+    lea        edx, [edx + 16]
+    jge        l4
+
+  l4b:
+    add        ecx, 4 - 1
+    jl         l1b
+
+    // 1 pixel loop
+    align      4
+  l1:
+    cvttps2dq  xmm0, xmm2    // x, y float to int
+    packssdw   xmm0, xmm0    // x, y as shorts
+    pmaddwd    xmm0, xmm5    // offset = x * 4 + y * stride
+    addps      xmm2, xmm7    // x, y += dx, dy
+    movd       esi, xmm0
+    movd       xmm0, [eax + esi]  // copy a pixel
+    sub        ecx, 1
+    movd       [edx], xmm0
+    lea        edx, [edx + 4]
+    jge        l1
+  l1b:
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBAFFINEROW_SSE2
+
+#ifdef HAS_INTERPOLATEROW_AVX2
+// Bilinear filter 16x2 -> 16x1
+__declspec(naked) __declspec(align(16))
+void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
+                          ptrdiff_t src_stride, int dst_width,
+                          int source_y_fraction) {
+  __asm {
+    push       esi
+    push       edi
+    mov        edi, [esp + 8 + 4]   // dst_ptr
+    mov        esi, [esp + 8 + 8]   // src_ptr
+    mov        edx, [esp + 8 + 12]  // src_stride
+    mov        ecx, [esp + 8 + 16]  // dst_width
+    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
+    shr        eax, 1
+    // Dispatch to specialized filters if applicable.
+    cmp        eax, 0
+    je         xloop100  // 0 / 128.  Blend 100 / 0.
+    sub        edi, esi
+    cmp        eax, 32
+    je         xloop75   // 32 / 128 is 0.25.  Blend 75 / 25.
+    cmp        eax, 64
+    je         xloop50   // 64 / 128 is 0.50.  Blend 50 / 50.
+    cmp        eax, 96
+    je         xloop25   // 96 / 128 is 0.75.  Blend 25 / 75.
+
+    vmovd      xmm0, eax  // high fraction 0..127
+    neg        eax
+    add        eax, 128
+    vmovd      xmm5, eax  // low fraction 128..1
+    vpunpcklbw xmm5, xmm5, xmm0
+    vpunpcklwd xmm5, xmm5, xmm5
+    vpxor      ymm0, ymm0, ymm0
+    vpermd     ymm5, ymm0, ymm5
+
+    align      4
+  xloop:
+    vmovdqu    ymm0, [esi]
+    vmovdqu    ymm2, [esi + edx]
+    vpunpckhbw ymm1, ymm0, ymm2  // mutates
+    vpunpcklbw ymm0, ymm0, ymm2  // mutates
+    vpmaddubsw ymm0, ymm0, ymm5
+    vpmaddubsw ymm1, ymm1, ymm5
+    vpsrlw     ymm0, ymm0, 7
+    vpsrlw     ymm1, ymm1, 7
+    vpackuswb  ymm0, ymm0, ymm1  // unmutates
+    sub        ecx, 32
+    vmovdqu    [esi + edi], ymm0
+    lea        esi, [esi + 32]
+    jg         xloop
+    jmp        xloop99
+
+    // Blend 25 / 75.
+    align      4
+  xloop25:
+    vmovdqu    ymm0, [esi]
+    vpavgb     ymm0, ymm0, [esi + edx]
+    vpavgb     ymm0, ymm0, [esi + edx]
+    sub        ecx, 32
+    vmovdqu    [esi + edi], ymm0
+    lea        esi, [esi + 32]
+    jg         xloop25
+    jmp        xloop99
+
+    // Blend 50 / 50.
+    align      4
+  xloop50:
+    vmovdqu    ymm0, [esi]
+    vpavgb     ymm0, ymm0, [esi + edx]
+    sub        ecx, 32
+    vmovdqu    [esi + edi], ymm0
+    lea        esi, [esi + 32]
+    jg         xloop50
+    jmp        xloop99
+
+    // Blend 75 / 25.
+    align      4
+  xloop75:
+    vmovdqu    ymm0, [esi + edx]
+    vpavgb     ymm0, ymm0, [esi]
+    vpavgb     ymm0, ymm0, [esi]
+    sub        ecx, 32
+    vmovdqu     [esi + edi], ymm0
+    lea        esi, [esi + 32]
+    jg         xloop75
+    jmp        xloop99
+
+    // Blend 100 / 0 - Copy row unchanged.
+    align      4
+  xloop100:
+    rep movsb
+
+  xloop99:
+    pop        edi
+    pop        esi
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_INTERPOLATEROW_AVX2
+
+#ifdef HAS_INTERPOLATEROW_SSSE3
+// Bilinear filter 16x2 -> 16x1
+__declspec(naked) __declspec(align(16))
+void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                          ptrdiff_t src_stride, int dst_width,
+                          int source_y_fraction) {
+  __asm {
+    push       esi
+    push       edi
+    mov        edi, [esp + 8 + 4]   // dst_ptr
+    mov        esi, [esp + 8 + 8]   // src_ptr
+    mov        edx, [esp + 8 + 12]  // src_stride
+    mov        ecx, [esp + 8 + 16]  // dst_width
+    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
+    sub        edi, esi
+    shr        eax, 1
+    // Dispatch to specialized filters if applicable.
+    cmp        eax, 0
+    je         xloop100  // 0 / 128.  Blend 100 / 0.
+    cmp        eax, 32
+    je         xloop75   // 32 / 128 is 0.25.  Blend 75 / 25.
+    cmp        eax, 64
+    je         xloop50   // 64 / 128 is 0.50.  Blend 50 / 50.
+    cmp        eax, 96
+    je         xloop25   // 96 / 128 is 0.75.  Blend 25 / 75.
+
+    movd       xmm0, eax  // high fraction 0..127
+    neg        eax
+    add        eax, 128
+    movd       xmm5, eax  // low fraction 128..1
+    punpcklbw  xmm5, xmm0
+    punpcklwd  xmm5, xmm5
+    pshufd     xmm5, xmm5, 0
+
+    align      4
+  xloop:
+    movdqa     xmm0, [esi]
+    movdqa     xmm2, [esi + edx]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm2
+    punpckhbw  xmm1, xmm2
+    pmaddubsw  xmm0, xmm5
+    pmaddubsw  xmm1, xmm5
+    psrlw      xmm0, 7
+    psrlw      xmm1, 7
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop
+    jmp        xloop99
+
+    // Blend 25 / 75.
+    align      4
+  xloop25:
+    movdqa     xmm0, [esi]
+    movdqa     xmm1, [esi + edx]
+    pavgb      xmm0, xmm1
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop25
+    jmp        xloop99
+
+    // Blend 50 / 50.
+    align      4
+  xloop50:
+    movdqa     xmm0, [esi]
+    movdqa     xmm1, [esi + edx]
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop50
+    jmp        xloop99
+
+    // Blend 75 / 25.
+    align      4
+  xloop75:
+    movdqa     xmm1, [esi]
+    movdqa     xmm0, [esi + edx]
+    pavgb      xmm0, xmm1
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop75
+    jmp        xloop99
+
+    // Blend 100 / 0 - Copy row unchanged.
+    align      4
+  xloop100:
+    movdqa     xmm0, [esi]
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop100
+
+  xloop99:
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_INTERPOLATEROW_SSSE3
+
+#ifdef HAS_INTERPOLATEROW_SSE2
+// Bilinear filter 16x2 -> 16x1
+__declspec(naked) __declspec(align(16))
+void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                         ptrdiff_t src_stride, int dst_width,
+                         int source_y_fraction) {
+  __asm {
+    push       esi
+    push       edi
+    mov        edi, [esp + 8 + 4]   // dst_ptr
+    mov        esi, [esp + 8 + 8]   // src_ptr
+    mov        edx, [esp + 8 + 12]  // src_stride
+    mov        ecx, [esp + 8 + 16]  // dst_width
+    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
+    sub        edi, esi
+    // Dispatch to specialized filters if applicable.
+    cmp        eax, 0
+    je         xloop100  // 0 / 256.  Blend 100 / 0.
+    cmp        eax, 64
+    je         xloop75   // 64 / 256 is 0.25.  Blend 75 / 25.
+    cmp        eax, 128
+    je         xloop50   // 128 / 256 is 0.50.  Blend 50 / 50.
+    cmp        eax, 192
+    je         xloop25   // 192 / 256 is 0.75.  Blend 25 / 75.
+
+    movd       xmm5, eax            // xmm5 = y fraction
+    punpcklbw  xmm5, xmm5
+    psrlw      xmm5, 1
+    punpcklwd  xmm5, xmm5
+    punpckldq  xmm5, xmm5
+    punpcklqdq xmm5, xmm5
+    pxor       xmm4, xmm4
+
+    align      4
+  xloop:
+    movdqa     xmm0, [esi]  // row0
+    movdqa     xmm2, [esi + edx]  // row1
+    movdqa     xmm1, xmm0
+    movdqa     xmm3, xmm2
+    punpcklbw  xmm2, xmm4
+    punpckhbw  xmm3, xmm4
+    punpcklbw  xmm0, xmm4
+    punpckhbw  xmm1, xmm4
+    psubw      xmm2, xmm0  // row1 - row0
+    psubw      xmm3, xmm1
+    paddw      xmm2, xmm2  // 9 bits * 15 bits = 8.16
+    paddw      xmm3, xmm3
+    pmulhw     xmm2, xmm5  // scale diff
+    pmulhw     xmm3, xmm5
+    paddw      xmm0, xmm2  // sum rows
+    paddw      xmm1, xmm3
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop
+    jmp        xloop99
+
+    // Blend 25 / 75.
+    align      4
+  xloop25:
+    movdqa     xmm0, [esi]
+    movdqa     xmm1, [esi + edx]
+    pavgb      xmm0, xmm1
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop25
+    jmp        xloop99
+
+    // Blend 50 / 50.
+    align      4
+  xloop50:
+    movdqa     xmm0, [esi]
+    movdqa     xmm1, [esi + edx]
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop50
+    jmp        xloop99
+
+    // Blend 75 / 25.
+    align      4
+  xloop75:
+    movdqa     xmm1, [esi]
+    movdqa     xmm0, [esi + edx]
+    pavgb      xmm0, xmm1
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop75
+    jmp        xloop99
+
+    // Blend 100 / 0 - Copy row unchanged.
+    align      4
+  xloop100:
+    movdqa     xmm0, [esi]
+    sub        ecx, 16
+    movdqa     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop100
+
+  xloop99:
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_INTERPOLATEROW_SSE2
+
+// Bilinear filter 16x2 -> 16x1
+__declspec(naked) __declspec(align(16))
+void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                                    ptrdiff_t src_stride, int dst_width,
+                                    int source_y_fraction) {
+  __asm {
+    push       esi
+    push       edi
+    mov        edi, [esp + 8 + 4]   // dst_ptr
+    mov        esi, [esp + 8 + 8]   // src_ptr
+    mov        edx, [esp + 8 + 12]  // src_stride
+    mov        ecx, [esp + 8 + 16]  // dst_width
+    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
+    sub        edi, esi
+    shr        eax, 1
+    // Dispatch to specialized filters if applicable.
+    cmp        eax, 0
+    je         xloop100  // 0 / 128.  Blend 100 / 0.
+    cmp        eax, 32
+    je         xloop75   // 32 / 128 is 0.25.  Blend 75 / 25.
+    cmp        eax, 64
+    je         xloop50   // 64 / 128 is 0.50.  Blend 50 / 50.
+    cmp        eax, 96
+    je         xloop25   // 96 / 128 is 0.75.  Blend 25 / 75.
+
+    movd       xmm0, eax  // high fraction 0..127
+    neg        eax
+    add        eax, 128
+    movd       xmm5, eax  // low fraction 128..1
+    punpcklbw  xmm5, xmm0
+    punpcklwd  xmm5, xmm5
+    pshufd     xmm5, xmm5, 0
+
+    align      4
+  xloop:
+    movdqu     xmm0, [esi]
+    movdqu     xmm2, [esi + edx]
+    movdqu     xmm1, xmm0
+    punpcklbw  xmm0, xmm2
+    punpckhbw  xmm1, xmm2
+    pmaddubsw  xmm0, xmm5
+    pmaddubsw  xmm1, xmm5
+    psrlw      xmm0, 7
+    psrlw      xmm1, 7
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop
+    jmp        xloop99
+
+    // Blend 25 / 75.
+    align      4
+  xloop25:
+    movdqu     xmm0, [esi]
+    movdqu     xmm1, [esi + edx]
+    pavgb      xmm0, xmm1
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop25
+    jmp        xloop99
+
+    // Blend 50 / 50.
+    align      4
+  xloop50:
+    movdqu     xmm0, [esi]
+    movdqu     xmm1, [esi + edx]
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop50
+    jmp        xloop99
+
+    // Blend 75 / 25.
+    align      4
+  xloop75:
+    movdqu     xmm1, [esi]
+    movdqu     xmm0, [esi + edx]
+    pavgb      xmm0, xmm1
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop75
+    jmp        xloop99
+
+    // Blend 100 / 0 - Copy row unchanged.
+    align      4
+  xloop100:
+    movdqu     xmm0, [esi]
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop100
+
+  xloop99:
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+#ifdef HAS_INTERPOLATEROW_SSE2
+// Bilinear filter 16x2 -> 16x1
+__declspec(naked) __declspec(align(16))
+void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                                   ptrdiff_t src_stride, int dst_width,
+                                   int source_y_fraction) {
+  __asm {
+    push       esi
+    push       edi
+    mov        edi, [esp + 8 + 4]   // dst_ptr
+    mov        esi, [esp + 8 + 8]   // src_ptr
+    mov        edx, [esp + 8 + 12]  // src_stride
+    mov        ecx, [esp + 8 + 16]  // dst_width
+    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
+    sub        edi, esi
+    // Dispatch to specialized filters if applicable.
+    cmp        eax, 0
+    je         xloop100  // 0 / 256.  Blend 100 / 0.
+    cmp        eax, 64
+    je         xloop75   // 64 / 256 is 0.25.  Blend 75 / 25.
+    cmp        eax, 128
+    je         xloop50   // 128 / 256 is 0.50.  Blend 50 / 50.
+    cmp        eax, 192
+    je         xloop25   // 192 / 256 is 0.75.  Blend 25 / 75.
+
+    movd       xmm5, eax            // xmm5 = y fraction
+    punpcklbw  xmm5, xmm5
+    psrlw      xmm5, 1
+    punpcklwd  xmm5, xmm5
+    punpckldq  xmm5, xmm5
+    punpcklqdq xmm5, xmm5
+    pxor       xmm4, xmm4
+
+    align      4
+  xloop:
+    movdqu     xmm0, [esi]  // row0
+    movdqu     xmm2, [esi + edx]  // row1
+    movdqu     xmm1, xmm0
+    movdqu     xmm3, xmm2
+    punpcklbw  xmm2, xmm4
+    punpckhbw  xmm3, xmm4
+    punpcklbw  xmm0, xmm4
+    punpckhbw  xmm1, xmm4
+    psubw      xmm2, xmm0  // row1 - row0
+    psubw      xmm3, xmm1
+    paddw      xmm2, xmm2  // 9 bits * 15 bits = 8.16
+    paddw      xmm3, xmm3
+    pmulhw     xmm2, xmm5  // scale diff
+    pmulhw     xmm3, xmm5
+    paddw      xmm0, xmm2  // sum rows
+    paddw      xmm1, xmm3
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop
+    jmp        xloop99
+
+    // Blend 25 / 75.
+    align      4
+  xloop25:
+    movdqu     xmm0, [esi]
+    movdqu     xmm1, [esi + edx]
+    pavgb      xmm0, xmm1
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop25
+    jmp        xloop99
+
+    // Blend 50 / 50.
+    align      4
+  xloop50:
+    movdqu     xmm0, [esi]
+    movdqu     xmm1, [esi + edx]
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop50
+    jmp        xloop99
+
+    // Blend 75 / 25.
+    align      4
+  xloop75:
+    movdqu     xmm1, [esi]
+    movdqu     xmm0, [esi + edx]
+    pavgb      xmm0, xmm1
+    pavgb      xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop75
+    jmp        xloop99
+
+    // Blend 100 / 0 - Copy row unchanged.
+    align      4
+  xloop100:
+    movdqu     xmm0, [esi]
+    sub        ecx, 16
+    movdqu     [esi + edi], xmm0
+    lea        esi, [esi + 16]
+    jg         xloop100
+
+  xloop99:
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_INTERPOLATEROW_SSE2
+
+__declspec(naked) __declspec(align(16))
+void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_uv
+    mov        edx, [esp + 4 + 8]    // src_uv_stride
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    sub        edi, eax
+
+    align      4
+  convertloop:
+    movdqa     xmm0, [eax]
+    pavgb      xmm0, [eax + edx]
+    sub        ecx, 16
+    movdqa     [eax + edi], xmm0
+    lea        eax,  [eax + 16]
+    jg         convertloop
+    pop        edi
+    ret
+  }
+}
+
+#ifdef HAS_HALFROW_AVX2
+__declspec(naked) __declspec(align(16))
+void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
+                  uint8* dst_uv, int pix) {
+  __asm {
+    push       edi
+    mov        eax, [esp + 4 + 4]    // src_uv
+    mov        edx, [esp + 4 + 8]    // src_uv_stride
+    mov        edi, [esp + 4 + 12]   // dst_v
+    mov        ecx, [esp + 4 + 16]   // pix
+    sub        edi, eax
+
+    align      4
+  convertloop:
+    vmovdqu    ymm0, [eax]
+    vpavgb     ymm0, ymm0, [eax + edx]
+    sub        ecx, 32
+    vmovdqu    [eax + edi], ymm0
+    lea        eax,  [eax + 32]
+    jg         convertloop
+
+    pop        edi
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_HALFROW_AVX2
+
+__declspec(naked) __declspec(align(16))
+void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
+                          uint32 selector, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_argb
+    mov        edx, [esp + 8]    // dst_bayer
+    movd       xmm5, [esp + 12]  // selector
+    mov        ecx, [esp + 16]   // pix
+    pshufd     xmm5, xmm5, 0
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax, [eax + 32]
+    pshufb     xmm0, xmm5
+    pshufb     xmm1, xmm5
+    punpckldq  xmm0, xmm1
+    sub        ecx, 8
+    movq       qword ptr [edx], xmm0
+    lea        edx, [edx + 8]
+    jg         wloop
+    ret
+  }
+}
+
+// Specialized ARGB to Bayer that just isolates G channel.
+__declspec(naked) __declspec(align(16))
+void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
+                           uint32 selector, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_argb
+    mov        edx, [esp + 8]    // dst_bayer
+                                 // selector
+    mov        ecx, [esp + 16]   // pix
+    pcmpeqb    xmm5, xmm5        // generate mask 0x000000ff
+    psrld      xmm5, 24
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax, [eax + 32]
+    psrld      xmm0, 8  // Move green to bottom.
+    psrld      xmm1, 8
+    pand       xmm0, xmm5
+    pand       xmm1, xmm5
+    packssdw   xmm0, xmm1
+    packuswb   xmm0, xmm1
+    sub        ecx, 8
+    movq       qword ptr [edx], xmm0
+    lea        edx, [edx + 8]
+    jg         wloop
+    ret
+  }
+}
+
+// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
+__declspec(naked) __declspec(align(16))
+void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                          const uint8* shuffler, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_argb
+    mov        edx, [esp + 8]    // dst_argb
+    mov        ecx, [esp + 12]   // shuffler
+    movdqa     xmm5, [ecx]
+    mov        ecx, [esp + 16]   // pix
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax, [eax + 32]
+    pshufb     xmm0, xmm5
+    pshufb     xmm1, xmm5
+    sub        ecx, 8
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx, [edx + 32]
+    jg         wloop
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                                    const uint8* shuffler, int pix) {
+  __asm {
+    mov        eax, [esp + 4]    // src_argb
+    mov        edx, [esp + 8]    // dst_argb
+    mov        ecx, [esp + 12]   // shuffler
+    movdqa     xmm5, [ecx]
+    mov        ecx, [esp + 16]   // pix
+
+    align      4
+  wloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    lea        eax, [eax + 32]
+    pshufb     xmm0, xmm5
+    pshufb     xmm1, xmm5
+    sub        ecx, 8
+    movdqu     [edx], xmm0
+    movdqu     [edx + 16], xmm1
+    lea        edx, [edx + 32]
+    jg         wloop
+    ret
+  }
+}
+
+#ifdef HAS_ARGBSHUFFLEROW_AVX2
+__declspec(naked) __declspec(align(16))
+void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix) {
+  __asm {
+    mov        eax, [esp + 4]     // src_argb
+    mov        edx, [esp + 8]     // dst_argb
+    mov        ecx, [esp + 12]    // shuffler
+    vbroadcastf128 ymm5, [ecx]    // same shuffle in high as low.
+    mov        ecx, [esp + 16]    // pix
+
+    align      4
+  wloop:
+    vmovdqu    ymm0, [eax]
+    vmovdqu    ymm1, [eax + 32]
+    lea        eax, [eax + 64]
+    vpshufb    ymm0, ymm0, ymm5
+    vpshufb    ymm1, ymm1, ymm5
+    sub        ecx, 16
+    vmovdqu    [edx], ymm0
+    vmovdqu    [edx + 32], ymm1
+    lea        edx, [edx + 64]
+    jg         wloop
+
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBSHUFFLEROW_AVX2
+
+__declspec(naked) __declspec(align(16))
+void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
+                         const uint8* shuffler, int pix) {
+  __asm {
+    push       ebx
+    push       esi
+    mov        eax, [esp + 8 + 4]    // src_argb
+    mov        edx, [esp + 8 + 8]    // dst_argb
+    mov        esi, [esp + 8 + 12]   // shuffler
+    mov        ecx, [esp + 8 + 16]   // pix
+    pxor       xmm5, xmm5
+
+    mov        ebx, [esi]   // shuffler
+    cmp        ebx, 0x03000102
+    je         shuf_3012
+    cmp        ebx, 0x00010203
+    je         shuf_0123
+    cmp        ebx, 0x00030201
+    je         shuf_0321
+    cmp        ebx, 0x02010003
+    je         shuf_2103
+
+  // TODO(fbarchard): Use one source pointer and 3 offsets.
+  shuf_any1:
+    movzx      ebx, byte ptr [esi]
+    movzx      ebx, byte ptr [eax + ebx]
+    mov        [edx], bl
+    movzx      ebx, byte ptr [esi + 1]
+    movzx      ebx, byte ptr [eax + ebx]
+    mov        [edx + 1], bl
+    movzx      ebx, byte ptr [esi + 2]
+    movzx      ebx, byte ptr [eax + ebx]
+    mov        [edx + 2], bl
+    movzx      ebx, byte ptr [esi + 3]
+    movzx      ebx, byte ptr [eax + ebx]
+    mov        [edx + 3], bl
+    lea        eax, [eax + 4]
+    lea        edx, [edx + 4]
+    sub        ecx, 1
+    jg         shuf_any1
+    jmp        shuf99
+
+    align      4
+  shuf_0123:
+    movdqu     xmm0, [eax]
+    lea        eax, [eax + 16]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm5
+    punpckhbw  xmm1, xmm5
+    pshufhw    xmm0, xmm0, 01Bh   // 1B = 00011011 = 0x0123 = BGRAToARGB
+    pshuflw    xmm0, xmm0, 01Bh
+    pshufhw    xmm1, xmm1, 01Bh
+    pshuflw    xmm1, xmm1, 01Bh
+    packuswb   xmm0, xmm1
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         shuf_0123
+    jmp        shuf99
+
+    align      4
+  shuf_0321:
+    movdqu     xmm0, [eax]
+    lea        eax, [eax + 16]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm5
+    punpckhbw  xmm1, xmm5
+    pshufhw    xmm0, xmm0, 039h   // 39 = 00111001 = 0x0321 = RGBAToARGB
+    pshuflw    xmm0, xmm0, 039h
+    pshufhw    xmm1, xmm1, 039h
+    pshuflw    xmm1, xmm1, 039h
+    packuswb   xmm0, xmm1
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         shuf_0321
+    jmp        shuf99
+
+    align      4
+  shuf_2103:
+    movdqu     xmm0, [eax]
+    lea        eax, [eax + 16]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm5
+    punpckhbw  xmm1, xmm5
+    pshufhw    xmm0, xmm0, 093h   // 93 = 10010011 = 0x2103 = ARGBToRGBA
+    pshuflw    xmm0, xmm0, 093h
+    pshufhw    xmm1, xmm1, 093h
+    pshuflw    xmm1, xmm1, 093h
+    packuswb   xmm0, xmm1
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         shuf_2103
+    jmp        shuf99
+
+    align      4
+  shuf_3012:
+    movdqu     xmm0, [eax]
+    lea        eax, [eax + 16]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm5
+    punpckhbw  xmm1, xmm5
+    pshufhw    xmm0, xmm0, 0C6h   // C6 = 11000110 = 0x3012 = ABGRToARGB
+    pshuflw    xmm0, xmm0, 0C6h
+    pshufhw    xmm1, xmm1, 0C6h
+    pshuflw    xmm1, xmm1, 0C6h
+    packuswb   xmm0, xmm1
+    sub        ecx, 4
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         shuf_3012
+
+  shuf99:
+    pop        esi
+    pop        ebx
+    ret
+  }
+}
+
+// YUY2 - Macro-pixel = 2 image pixels
+// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
+
+// UYVY - Macro-pixel = 2 image pixels
+// U0Y0V0Y1
+
+__declspec(naked) __declspec(align(16))
+void I422ToYUY2Row_SSE2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_frame, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_y
+    mov        esi, [esp + 8 + 8]    // src_u
+    mov        edx, [esp + 8 + 12]   // src_v
+    mov        edi, [esp + 8 + 16]   // dst_frame
+    mov        ecx, [esp + 8 + 20]   // width
+    sub        edx, esi
+
+    align      4
+  convertloop:
+    movq       xmm2, qword ptr [esi] // U
+    movq       xmm3, qword ptr [esi + edx] // V
+    lea        esi, [esi + 8]
+    punpcklbw  xmm2, xmm3 // UV
+    movdqu     xmm0, [eax] // Y
+    lea        eax, [eax + 16]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm2 // YUYV
+    punpckhbw  xmm1, xmm2
+    movdqu     [edi], xmm0
+    movdqu     [edi + 16], xmm1
+    lea        edi, [edi + 32]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+__declspec(naked) __declspec(align(16))
+void I422ToUYVYRow_SSE2(const uint8* src_y,
+                        const uint8* src_u,
+                        const uint8* src_v,
+                        uint8* dst_frame, int width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_y
+    mov        esi, [esp + 8 + 8]    // src_u
+    mov        edx, [esp + 8 + 12]   // src_v
+    mov        edi, [esp + 8 + 16]   // dst_frame
+    mov        ecx, [esp + 8 + 20]   // width
+    sub        edx, esi
+
+    align      4
+  convertloop:
+    movq       xmm2, qword ptr [esi] // U
+    movq       xmm3, qword ptr [esi + edx] // V
+    lea        esi, [esi + 8]
+    punpcklbw  xmm2, xmm3 // UV
+    movdqu     xmm0, [eax] // Y
+    movdqa     xmm1, xmm2
+    lea        eax, [eax + 16]
+    punpcklbw  xmm1, xmm0 // UYVY
+    punpckhbw  xmm2, xmm0
+    movdqu     [edi], xmm1
+    movdqu     [edi + 16], xmm2
+    lea        edi, [edi + 32]
+    sub        ecx, 16
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
+__declspec(naked) __declspec(align(16))
+void ARGBPolynomialRow_SSE2(const uint8* src_argb,
+                            uint8* dst_argb, const float* poly,
+                            int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   /* src_argb */
+    mov        edx, [esp + 4 + 8]   /* dst_argb */
+    mov        esi, [esp + 4 + 12]  /* poly */
+    mov        ecx, [esp + 4 + 16]  /* width */
+    pxor       xmm3, xmm3  // 0 constant for zero extending bytes to ints.
+
+    // 2 pixel loop.
+    align      4
+ convertloop:
+//    pmovzxbd  xmm0, dword ptr [eax]  // BGRA pixel
+//    pmovzxbd  xmm4, dword ptr [eax + 4]  // BGRA pixel
+    movq       xmm0, qword ptr [eax]  // BGRABGRA
+    lea        eax, [eax + 8]
+    punpcklbw  xmm0, xmm3
+    movdqa     xmm4, xmm0
+    punpcklwd  xmm0, xmm3  // pixel 0
+    punpckhwd  xmm4, xmm3  // pixel 1
+    cvtdq2ps   xmm0, xmm0  // 4 floats
+    cvtdq2ps   xmm4, xmm4
+    movdqa     xmm1, xmm0  // X
+    movdqa     xmm5, xmm4
+    mulps      xmm0, [esi + 16]  // C1 * X
+    mulps      xmm4, [esi + 16]
+    addps      xmm0, [esi]  // result = C0 + C1 * X
+    addps      xmm4, [esi]
+    movdqa     xmm2, xmm1
+    movdqa     xmm6, xmm5
+    mulps      xmm2, xmm1  // X * X
+    mulps      xmm6, xmm5
+    mulps      xmm1, xmm2  // X * X * X
+    mulps      xmm5, xmm6
+    mulps      xmm2, [esi + 32]  // C2 * X * X
+    mulps      xmm6, [esi + 32]
+    mulps      xmm1, [esi + 48]  // C3 * X * X * X
+    mulps      xmm5, [esi + 48]
+    addps      xmm0, xmm2  // result += C2 * X * X
+    addps      xmm4, xmm6
+    addps      xmm0, xmm1  // result += C3 * X * X * X
+    addps      xmm4, xmm5
+    cvttps2dq  xmm0, xmm0
+    cvttps2dq  xmm4, xmm4
+    packuswb   xmm0, xmm4
+    packuswb   xmm0, xmm0
+    sub        ecx, 2
+    movq       qword ptr [edx], xmm0
+    lea        edx, [edx + 8]
+    jg         convertloop
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBPOLYNOMIALROW_SSE2
+
+#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
+__declspec(naked) __declspec(align(16))
+void ARGBPolynomialRow_AVX2(const uint8* src_argb,
+                            uint8* dst_argb, const float* poly,
+                            int width) {
+  __asm {
+    mov        eax, [esp + 4]   /* src_argb */
+    mov        edx, [esp + 8]   /* dst_argb */
+    mov        ecx, [esp + 12]   /* poly */
+    vbroadcastf128 ymm4, [ecx]       // C0
+    vbroadcastf128 ymm5, [ecx + 16]  // C1
+    vbroadcastf128 ymm6, [ecx + 32]  // C2
+    vbroadcastf128 ymm7, [ecx + 48]  // C3
+    mov        ecx, [esp + 16]  /* width */
+
+    // 2 pixel loop.
+    align      4
+ convertloop:
+    vpmovzxbd   ymm0, qword ptr [eax]  // 2 BGRA pixels
+    lea         eax, [eax + 8]
+    vcvtdq2ps   ymm0, ymm0        // X 8 floats
+    vmulps      ymm2, ymm0, ymm0  // X * X
+    vmulps      ymm3, ymm0, ymm7  // C3 * X
+    vfmadd132ps ymm0, ymm4, ymm5  // result = C0 + C1 * X
+    vfmadd231ps ymm0, ymm2, ymm6  // result += C2 * X * X
+    vfmadd231ps ymm0, ymm2, ymm3  // result += C3 * X * X * X
+    vcvttps2dq  ymm0, ymm0
+    vpackusdw   ymm0, ymm0, ymm0  // b0g0r0a0_00000000_b0g0r0a0_00000000
+    vpermq      ymm0, ymm0, 0xd8  // b0g0r0a0_b0g0r0a0_00000000_00000000
+    vpackuswb   xmm0, xmm0, xmm0  // bgrabgra_00000000_00000000_00000000
+    sub         ecx, 2
+    vmovq       qword ptr [edx], xmm0
+    lea         edx, [edx + 8]
+    jg          convertloop
+    vzeroupper
+    ret
+  }
+}
+#endif  // HAS_ARGBPOLYNOMIALROW_AVX2
+
+#ifdef HAS_ARGBCOLORTABLEROW_X86
+// Tranform ARGB pixels with color table.
+__declspec(naked) __declspec(align(16))
+void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
+                           int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   /* dst_argb */
+    mov        esi, [esp + 4 + 8]   /* table_argb */
+    mov        ecx, [esp + 4 + 12]  /* width */
+
+    // 1 pixel loop.
+    align      4
+  convertloop:
+    movzx      edx, byte ptr [eax]
+    lea        eax, [eax + 4]
+    movzx      edx, byte ptr [esi + edx * 4]
+    mov        byte ptr [eax - 4], dl
+    movzx      edx, byte ptr [eax - 4 + 1]
+    movzx      edx, byte ptr [esi + edx * 4 + 1]
+    mov        byte ptr [eax - 4 + 1], dl
+    movzx      edx, byte ptr [eax - 4 + 2]
+    movzx      edx, byte ptr [esi + edx * 4 + 2]
+    mov        byte ptr [eax - 4 + 2], dl
+    movzx      edx, byte ptr [eax - 4 + 3]
+    movzx      edx, byte ptr [esi + edx * 4 + 3]
+    mov        byte ptr [eax - 4 + 3], dl
+    dec        ecx
+    jg         convertloop
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBCOLORTABLEROW_X86
+
+#ifdef HAS_RGBCOLORTABLEROW_X86
+// Tranform RGB pixels with color table.
+__declspec(naked) __declspec(align(16))
+void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   /* dst_argb */
+    mov        esi, [esp + 4 + 8]   /* table_argb */
+    mov        ecx, [esp + 4 + 12]  /* width */
+
+    // 1 pixel loop.
+    align      4
+  convertloop:
+    movzx      edx, byte ptr [eax]
+    lea        eax, [eax + 4]
+    movzx      edx, byte ptr [esi + edx * 4]
+    mov        byte ptr [eax - 4], dl
+    movzx      edx, byte ptr [eax - 4 + 1]
+    movzx      edx, byte ptr [esi + edx * 4 + 1]
+    mov        byte ptr [eax - 4 + 1], dl
+    movzx      edx, byte ptr [eax - 4 + 2]
+    movzx      edx, byte ptr [esi + edx * 4 + 2]
+    mov        byte ptr [eax - 4 + 2], dl
+    dec        ecx
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_RGBCOLORTABLEROW_X86
+
+#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
+// Tranform RGB pixels with luma table.
+__declspec(naked) __declspec(align(16))
+void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
+                                 int width,
+                                 const uint8* luma, uint32 lumacoeff) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]   /* src_argb */
+    mov        edi, [esp + 8 + 8]   /* dst_argb */
+    mov        ecx, [esp + 8 + 12]  /* width */
+    movd       xmm2, dword ptr [esp + 8 + 16]  // luma table
+    movd       xmm3, dword ptr [esp + 8 + 20]  // lumacoeff
+    pshufd     xmm2, xmm2, 0
+    pshufd     xmm3, xmm3, 0
+    pcmpeqb    xmm4, xmm4        // generate mask 0xff00ff00
+    psllw      xmm4, 8
+    pxor       xmm5, xmm5
+
+    // 4 pixel loop.
+    align      4
+  convertloop:
+    movdqu     xmm0, qword ptr [eax]      // generate luma ptr
+    pmaddubsw  xmm0, xmm3
+    phaddw     xmm0, xmm0
+    pand       xmm0, xmm4  // mask out low bits
+    punpcklwd  xmm0, xmm5
+    paddd      xmm0, xmm2  // add table base
+    movd       esi, xmm0
+    pshufd     xmm0, xmm0, 0x39  // 00111001 to rotate right 32
+
+    movzx      edx, byte ptr [eax]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi], dl
+    movzx      edx, byte ptr [eax + 1]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 1], dl
+    movzx      edx, byte ptr [eax + 2]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 2], dl
+    movzx      edx, byte ptr [eax + 3]  // copy alpha.
+    mov        byte ptr [edi + 3], dl
+
+    movd       esi, xmm0
+    pshufd     xmm0, xmm0, 0x39  // 00111001 to rotate right 32
+
+    movzx      edx, byte ptr [eax + 4]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 4], dl
+    movzx      edx, byte ptr [eax + 5]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 5], dl
+    movzx      edx, byte ptr [eax + 6]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 6], dl
+    movzx      edx, byte ptr [eax + 7]  // copy alpha.
+    mov        byte ptr [edi + 7], dl
+
+    movd       esi, xmm0
+    pshufd     xmm0, xmm0, 0x39  // 00111001 to rotate right 32
+
+    movzx      edx, byte ptr [eax + 8]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 8], dl
+    movzx      edx, byte ptr [eax + 9]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 9], dl
+    movzx      edx, byte ptr [eax + 10]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 10], dl
+    movzx      edx, byte ptr [eax + 11]  // copy alpha.
+    mov        byte ptr [edi + 11], dl
+
+    movd       esi, xmm0
+
+    movzx      edx, byte ptr [eax + 12]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 12], dl
+    movzx      edx, byte ptr [eax + 13]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 13], dl
+    movzx      edx, byte ptr [eax + 14]
+    movzx      edx, byte ptr [esi + edx]
+    mov        byte ptr [edi + 14], dl
+    movzx      edx, byte ptr [eax + 15]  // copy alpha.
+    mov        byte ptr [edi + 15], dl
+
+    sub        ecx, 4
+    lea        eax, [eax + 16]
+    lea        edi, [edi + 16]
+    jg         convertloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3
+
+#endif  // defined(_M_X64)
+#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/row_x86.asm b/src/main/jni/libyuv/source/row_x86.asm
new file mode 100644
index 000000000..0cb326f8e
--- /dev/null
+++ b/src/main/jni/libyuv/source/row_x86.asm
@@ -0,0 +1,146 @@
+;
+; Copyright 2012 The LibYuv Project Authors. All rights reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%ifdef __YASM_VERSION_ID__
+%if __YASM_VERSION_ID__ < 01020000h
+%error AVX2 is supported only by yasm 1.2.0 or later.
+%endif
+%endif
+%include "x86inc.asm"
+
+SECTION .text
+
+; cglobal numeric constants are parameters, gpr regs, mm regs
+
+; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
+
+%macro YUY2TOYROW 2-3
+cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
+%ifidn %1,YUY2
+    pcmpeqb    m2, m2, m2        ; generate mask 0x00ff00ff
+    psrlw      m2, m2, 8
+%endif
+
+    ALIGN      4
+.convertloop:
+    mov%2      m0, [src_yuy2q]
+    mov%2      m1, [src_yuy2q + mmsize]
+    lea        src_yuy2q, [src_yuy2q + mmsize * 2]
+%ifidn %1,YUY2
+    pand       m0, m0, m2   ; YUY2 even bytes are Y
+    pand       m1, m1, m2
+%else
+    psrlw      m0, m0, 8    ; UYVY odd bytes are Y
+    psrlw      m1, m1, 8
+%endif
+    packuswb   m0, m0, m1
+%if cpuflag(AVX2)
+    vpermq     m0, m0, 0xd8
+%endif
+    sub        pixd, mmsize
+    mov%2      [dst_yq], m0
+    lea        dst_yq, [dst_yq + mmsize]
+    jg         .convertloop
+    REP_RET
+%endmacro
+
+; TODO(fbarchard): Remove MMX.  Add SSSE3 pshufb version.
+INIT_MMX MMX
+YUY2TOYROW YUY2,a,
+YUY2TOYROW YUY2,u,_Unaligned
+YUY2TOYROW UYVY,a,
+YUY2TOYROW UYVY,u,_Unaligned
+INIT_XMM SSE2
+YUY2TOYROW YUY2,a,
+YUY2TOYROW YUY2,u,_Unaligned
+YUY2TOYROW UYVY,a,
+YUY2TOYROW UYVY,u,_Unaligned
+INIT_YMM AVX2
+YUY2TOYROW YUY2,a,
+YUY2TOYROW UYVY,a,
+
+; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
+
+%macro SplitUVRow 1-2
+cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
+    pcmpeqb    m4, m4, m4        ; generate mask 0x00ff00ff
+    psrlw      m4, m4, 8
+    sub        dst_vq, dst_uq
+
+    ALIGN      4
+.convertloop:
+    mov%1      m0, [src_uvq]
+    mov%1      m1, [src_uvq + mmsize]
+    lea        src_uvq, [src_uvq + mmsize * 2]
+    psrlw      m2, m0, 8         ; odd bytes
+    psrlw      m3, m1, 8
+    pand       m0, m0, m4        ; even bytes
+    pand       m1, m1, m4
+    packuswb   m0, m0, m1
+    packuswb   m2, m2, m3
+%if cpuflag(AVX2)
+    vpermq     m0, m0, 0xd8
+    vpermq     m2, m2, 0xd8
+%endif
+    mov%1      [dst_uq], m0
+    mov%1      [dst_uq + dst_vq], m2
+    lea        dst_uq, [dst_uq + mmsize]
+    sub        pixd, mmsize
+    jg         .convertloop
+    REP_RET
+%endmacro
+
+INIT_MMX MMX
+SplitUVRow a,
+SplitUVRow u,_Unaligned
+INIT_XMM SSE2
+SplitUVRow a,
+SplitUVRow u,_Unaligned
+INIT_YMM AVX2
+SplitUVRow a,
+
+; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+;                      int width);
+
+%macro MergeUVRow_ 1-2
+cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
+    sub        src_vq, src_uq
+
+    ALIGN      4
+.convertloop:
+    mov%1      m0, [src_uq]
+    mov%1      m1, [src_vq]
+    lea        src_uq, [src_uq + mmsize]
+    punpcklbw  m2, m0, m1       // first 8 UV pairs
+    punpckhbw  m0, m0, m1       // next 8 UV pairs
+%if cpuflag(AVX2)
+    vperm2i128 m1, m2, m0, 0x20  // low 128 of ymm2 and low 128 of ymm0
+    vperm2i128 m2, m2, m0, 0x31  // high 128 of ymm2 and high 128 of ymm0
+    mov%1      [dst_uvq], m1
+    mov%1      [dst_uvq + mmsize], m2
+%else
+    mov%1      [dst_uvq], m2
+    mov%1      [dst_uvq + mmsize], m0
+%endif
+    lea        dst_uvq, [dst_uvq + mmsize * 2]
+    sub        pixd, mmsize
+    jg         .convertloop
+    REP_RET
+%endmacro
+
+INIT_MMX MMX
+MergeUVRow_ a,
+MergeUVRow_ u,_Unaligned
+INIT_XMM SSE2
+MergeUVRow_ a,
+MergeUVRow_ u,_Unaligned
+INIT_YMM AVX2
+MergeUVRow_ a,
+
diff --git a/src/main/jni/libyuv/source/scale.cc b/src/main/jni/libyuv/source/scale.cc
new file mode 100644
index 000000000..5b33b5f04
--- /dev/null
+++ b/src/main/jni/libyuv/source/scale.cc
@@ -0,0 +1,1716 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/scale.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"  // For CopyPlane
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Remove this macro if OVERREAD is safe.
+#define AVOID_OVERREAD 1
+
+static __inline int Abs(int v) {
+  return v >= 0 ? v : -v;
+}
+
+#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
+
+// Scale plane, 1/2
+// This is an optimized version for scaling down a plane to 1/2 of
+// its original size.
+
+static void ScalePlaneDown2(int src_width, int src_height,
+                            int dst_width, int dst_height,
+                            int src_stride, int dst_stride,
+                            const uint8* src_ptr, uint8* dst_ptr,
+                            enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width) =
+    filtering == kFilterNone ? ScaleRowDown2_C :
+        (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
+        ScaleRowDown2Box_C);
+  int row_stride = src_stride << 1;
+  if (!filtering) {
+    src_ptr += src_stride;  // Point to odd rows.
+    src_stride = 0;
+  }
+
+#if defined(HAS_SCALEROWDOWN2_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
+    ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
+  }
+#elif defined(HAS_SCALEROWDOWN2_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
+    ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
+        (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
+        ScaleRowDown2Box_Unaligned_SSE2);
+    if (IS_ALIGNED(src_ptr, 16) &&
+        IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
+        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
+          (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
+          ScaleRowDown2Box_SSE2);
+    }
+  }
+#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
+      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
+      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
+    ScaleRowDown2 = filtering ?
+        ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
+  }
+#endif
+
+  if (filtering == kFilterLinear) {
+    src_stride = 0;
+  }
+  // TODO(fbarchard): Loop through source height to allow odd height.
+  for (y = 0; y < dst_height; ++y) {
+    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
+    src_ptr += row_stride;
+    dst_ptr += dst_stride;
+  }
+}
+
+static void ScalePlaneDown2_16(int src_width, int src_height,
+                               int dst_width, int dst_height,
+                               int src_stride, int dst_stride,
+                               const uint16* src_ptr, uint16* dst_ptr,
+                               enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
+                        uint16* dst_ptr, int dst_width) =
+    filtering == kFilterNone ? ScaleRowDown2_16_C :
+        (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
+        ScaleRowDown2Box_16_C);
+  int row_stride = src_stride << 1;
+  if (!filtering) {
+    src_ptr += src_stride;  // Point to odd rows.
+    src_stride = 0;
+  }
+
+#if defined(HAS_SCALEROWDOWN2_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
+    ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
+        ScaleRowDown2_16_NEON;
+  }
+#elif defined(HAS_SCALEROWDOWN2_16_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
+    ScaleRowDown2 = filtering == kFilterNone ?
+        ScaleRowDown2_Unaligned_16_SSE2 :
+        (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
+        ScaleRowDown2Box_Unaligned_16_SSE2);
+    if (IS_ALIGNED(src_ptr, 16) &&
+        IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
+        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
+          (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
+          ScaleRowDown2Box_16_SSE2);
+    }
+  }
+#elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
+      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
+      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
+    ScaleRowDown2 = filtering ?
+        ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
+  }
+#endif
+
+  if (filtering == kFilterLinear) {
+    src_stride = 0;
+  }
+  // TODO(fbarchard): Loop through source height to allow odd height.
+  for (y = 0; y < dst_height; ++y) {
+    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
+    src_ptr += row_stride;
+    dst_ptr += dst_stride;
+  }
+}
+
+// Scale plane, 1/4
+// This is an optimized version for scaling down a plane to 1/4 of
+// its original size.
+
+static void ScalePlaneDown4(int src_width, int src_height,
+                            int dst_width, int dst_height,
+                            int src_stride, int dst_stride,
+                            const uint8* src_ptr, uint8* dst_ptr,
+                            enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width) =
+      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
+  int row_stride = src_stride << 2;
+  if (!filtering) {
+    src_ptr += src_stride * 2;  // Point to row 2.
+    src_stride = 0;
+  }
+#if defined(HAS_SCALEROWDOWN4_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
+    ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
+  }
+#elif defined(HAS_SCALEROWDOWN4_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
+      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+    ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
+  }
+#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
+      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
+    ScaleRowDown4 = filtering ?
+        ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
+  }
+#endif
+
+  if (filtering == kFilterLinear) {
+    src_stride = 0;
+  }
+  for (y = 0; y < dst_height; ++y) {
+    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
+    src_ptr += row_stride;
+    dst_ptr += dst_stride;
+  }
+}
+
+static void ScalePlaneDown4_16(int src_width, int src_height,
+                               int dst_width, int dst_height,
+                               int src_stride, int dst_stride,
+                               const uint16* src_ptr, uint16* dst_ptr,
+                               enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
+                        uint16* dst_ptr, int dst_width) =
+      filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
+  int row_stride = src_stride << 2;
+  if (!filtering) {
+    src_ptr += src_stride * 2;  // Point to row 2.
+    src_stride = 0;
+  }
+#if defined(HAS_SCALEROWDOWN4_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
+    ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
+        ScaleRowDown4_16_NEON;
+  }
+#elif defined(HAS_SCALEROWDOWN4_16_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
+      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+    ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
+        ScaleRowDown4_16_SSE2;
+  }
+#elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
+      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
+    ScaleRowDown4 = filtering ?
+        ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
+  }
+#endif
+
+  if (filtering == kFilterLinear) {
+    src_stride = 0;
+  }
+  for (y = 0; y < dst_height; ++y) {
+    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
+    src_ptr += row_stride;
+    dst_ptr += dst_stride;
+  }
+}
+
+// Scale plane down, 3/4
+
+static void ScalePlaneDown34(int src_width, int src_height,
+                             int dst_width, int dst_height,
+                             int src_stride, int dst_stride,
+                             const uint8* src_ptr, uint8* dst_ptr,
+                             enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+  void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  assert(dst_width % 3 == 0);
+  if (!filtering) {
+    ScaleRowDown34_0 = ScaleRowDown34_C;
+    ScaleRowDown34_1 = ScaleRowDown34_C;
+  } else {
+    ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
+    ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
+  }
+#if defined(HAS_SCALEROWDOWN34_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
+    if (!filtering) {
+      ScaleRowDown34_0 = ScaleRowDown34_NEON;
+      ScaleRowDown34_1 = ScaleRowDown34_NEON;
+    } else {
+      ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
+      ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
+    }
+  }
+#endif
+#if defined(HAS_SCALEROWDOWN34_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
+      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+    if (!filtering) {
+      ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
+      ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
+    } else {
+      ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
+      ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
+      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
+    if (!filtering) {
+      ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
+      ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
+    } else {
+      ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
+      ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
+    }
+  }
+#endif
+
+  for (y = 0; y < dst_height - 2; y += 3) {
+    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
+                     dst_ptr, dst_width);
+    src_ptr += src_stride * 2;
+    dst_ptr += dst_stride;
+  }
+
+  // Remainder 1 or 2 rows with last row vertically unfiltered
+  if ((dst_height % 3) == 2) {
+    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
+  } else if ((dst_height % 3) == 1) {
+    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
+  }
+}
+
+static void ScalePlaneDown34_16(int src_width, int src_height,
+                                int dst_width, int dst_height,
+                                int src_stride, int dst_stride,
+                                const uint16* src_ptr, uint16* dst_ptr,
+                                enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst_ptr, int dst_width);
+  void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst_ptr, int dst_width);
+  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  assert(dst_width % 3 == 0);
+  if (!filtering) {
+    ScaleRowDown34_0 = ScaleRowDown34_16_C;
+    ScaleRowDown34_1 = ScaleRowDown34_16_C;
+  } else {
+    ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
+    ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
+  }
+#if defined(HAS_SCALEROWDOWN34_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
+    if (!filtering) {
+      ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
+      ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
+    } else {
+      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
+      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
+    }
+  }
+#endif
+#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
+      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+    if (!filtering) {
+      ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
+      ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
+    } else {
+      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
+      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
+      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
+    if (!filtering) {
+      ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
+      ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
+    } else {
+      ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
+      ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
+    }
+  }
+#endif
+
+  for (y = 0; y < dst_height - 2; y += 3) {
+    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
+                     dst_ptr, dst_width);
+    src_ptr += src_stride * 2;
+    dst_ptr += dst_stride;
+  }
+
+  // Remainder 1 or 2 rows with last row vertically unfiltered
+  if ((dst_height % 3) == 2) {
+    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
+  } else if ((dst_height % 3) == 1) {
+    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
+  }
+}
+
+
+// Scale plane, 3/8
+// This is an optimized version for scaling down a plane to 3/8
+// of its original size.
+//
+// Uses box filter arranges like this
+// aaabbbcc -> abc
+// aaabbbcc    def
+// aaabbbcc    ghi
+// dddeeeff
+// dddeeeff
+// dddeeeff
+// ggghhhii
+// ggghhhii
+// Boxes are 3x3, 2x3, 3x2 and 2x2
+
+static void ScalePlaneDown38(int src_width, int src_height,
+                             int dst_width, int dst_height,
+                             int src_stride, int dst_stride,
+                             const uint8* src_ptr, uint8* dst_ptr,
+                             enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+  void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width);
+  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  assert(dst_width % 3 == 0);
+  if (!filtering) {
+    ScaleRowDown38_3 = ScaleRowDown38_C;
+    ScaleRowDown38_2 = ScaleRowDown38_C;
+  } else {
+    ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
+    ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
+  }
+#if defined(HAS_SCALEROWDOWN38_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
+    if (!filtering) {
+      ScaleRowDown38_3 = ScaleRowDown38_NEON;
+      ScaleRowDown38_2 = ScaleRowDown38_NEON;
+    } else {
+      ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
+      ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
+    }
+  }
+#elif defined(HAS_SCALEROWDOWN38_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
+      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+    if (!filtering) {
+      ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
+      ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
+    } else {
+      ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
+      ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
+    }
+  }
+#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
+      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
+    if (!filtering) {
+      ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
+      ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
+    } else {
+      ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
+      ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
+    }
+  }
+#endif
+
+  for (y = 0; y < dst_height - 2; y += 3) {
+    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride * 3;
+    dst_ptr += dst_stride;
+    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride * 3;
+    dst_ptr += dst_stride;
+    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride * 2;
+    dst_ptr += dst_stride;
+  }
+
+  // Remainder 1 or 2 rows with last row vertically unfiltered
+  if ((dst_height % 3) == 2) {
+    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride * 3;
+    dst_ptr += dst_stride;
+    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
+  } else if ((dst_height % 3) == 1) {
+    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
+  }
+}
+
+static void ScalePlaneDown38_16(int src_width, int src_height,
+                                int dst_width, int dst_height,
+                                int src_stride, int dst_stride,
+                                const uint16* src_ptr, uint16* dst_ptr,
+                                enum FilterMode filtering) {
+  int y;
+  void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst_ptr, int dst_width);
+  void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst_ptr, int dst_width);
+  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
+  assert(dst_width % 3 == 0);
+  if (!filtering) {
+    ScaleRowDown38_3 = ScaleRowDown38_16_C;
+    ScaleRowDown38_2 = ScaleRowDown38_16_C;
+  } else {
+    ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
+    ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
+  }
+#if defined(HAS_SCALEROWDOWN38_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
+    if (!filtering) {
+      ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
+      ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
+    } else {
+      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
+      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
+    }
+  }
+#elif defined(HAS_SCALEROWDOWN38_16_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
+      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+    if (!filtering) {
+      ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
+      ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
+    } else {
+      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
+      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
+    }
+  }
+#elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
+      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
+    if (!filtering) {
+      ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
+      ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
+    } else {
+      ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
+      ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
+    }
+  }
+#endif
+
+  for (y = 0; y < dst_height - 2; y += 3) {
+    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride * 3;
+    dst_ptr += dst_stride;
+    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride * 3;
+    dst_ptr += dst_stride;
+    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride * 2;
+    dst_ptr += dst_stride;
+  }
+
+  // Remainder 1 or 2 rows with last row vertically unfiltered
+  if ((dst_height % 3) == 2) {
+    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
+    src_ptr += src_stride * 3;
+    dst_ptr += dst_stride;
+    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
+  } else if ((dst_height % 3) == 1) {
+    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
+  }
+}
+
+static __inline uint32 SumBox(int iboxwidth, int iboxheight,
+                              ptrdiff_t src_stride, const uint8* src_ptr) {
+  uint32 sum = 0u;
+  int y;
+  assert(iboxwidth > 0);
+  assert(iboxheight > 0);
+  for (y = 0; y < iboxheight; ++y) {
+    int x;
+    for (x = 0; x < iboxwidth; ++x) {
+      sum += src_ptr[x];
+    }
+    src_ptr += src_stride;
+  }
+  return sum;
+}
+
+static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
+                                 ptrdiff_t src_stride, const uint16* src_ptr) {
+  uint32 sum = 0u;
+  int y;
+  assert(iboxwidth > 0);
+  assert(iboxheight > 0);
+  for (y = 0; y < iboxheight; ++y) {
+    int x;
+    for (x = 0; x < iboxwidth; ++x) {
+      sum += src_ptr[x];
+    }
+    src_ptr += src_stride;
+  }
+  return sum;
+}
+
+static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
+                               int x, int dx, ptrdiff_t src_stride,
+                               const uint8* src_ptr, uint8* dst_ptr) {
+  int i;
+  int boxwidth;
+  for (i = 0; i < dst_width; ++i) {
+    int ix = x >> 16;
+    x += dx;
+    boxwidth = (x >> 16) - ix;
+    *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
+        (boxwidth * boxheight);
+  }
+}
+
+static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
+                                  int x, int dx, ptrdiff_t src_stride,
+                                  const uint16* src_ptr, uint16* dst_ptr) {
+  int i;
+  int boxwidth;
+  for (i = 0; i < dst_width; ++i) {
+    int ix = x >> 16;
+    x += dx;
+    boxwidth = (x >> 16) - ix;
+    *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
+        (boxwidth * boxheight);
+  }
+}
+
+static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
+  uint32 sum = 0u;
+  int x;
+  assert(iboxwidth > 0);
+  for (x = 0; x < iboxwidth; ++x) {
+    sum += src_ptr[x];
+  }
+  return sum;
+}
+
+static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
+  uint32 sum = 0u;
+  int x;
+  assert(iboxwidth > 0);
+  for (x = 0; x < iboxwidth; ++x) {
+    sum += src_ptr[x];
+  }
+  return sum;
+}
+
+static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
+                            const uint16* src_ptr, uint8* dst_ptr) {
+  int i;
+  int scaletbl[2];
+  int minboxwidth = (dx >> 16);
+  int* scaleptr = scaletbl - minboxwidth;
+  int boxwidth;
+  scaletbl[0] = 65536 / (minboxwidth * boxheight);
+  scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
+  for (i = 0; i < dst_width; ++i) {
+    int ix = x >> 16;
+    x += dx;
+    boxwidth = (x >> 16) - ix;
+    *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
+  }
+}
+
+static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
+                               const uint32* src_ptr, uint16* dst_ptr) {
+  int i;
+  int scaletbl[2];
+  int minboxwidth = (dx >> 16);
+  int* scaleptr = scaletbl - minboxwidth;
+  int boxwidth;
+  scaletbl[0] = 65536 / (minboxwidth * boxheight);
+  scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
+  for (i = 0; i < dst_width; ++i) {
+    int ix = x >> 16;
+    x += dx;
+    boxwidth = (x >> 16) - ix;
+    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
+        scaleptr[boxwidth] >> 16;
+  }
+}
+
+static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
+                            const uint16* src_ptr, uint8* dst_ptr) {
+  int boxwidth = (dx >> 16);
+  int scaleval = 65536 / (boxwidth * boxheight);
+  int i;
+  for (i = 0; i < dst_width; ++i) {
+    *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
+    x += boxwidth;
+  }
+}
+
+static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
+                               const uint32* src_ptr, uint16* dst_ptr) {
+  int boxwidth = (dx >> 16);
+  int scaleval = 65536 / (boxwidth * boxheight);
+  int i;
+  for (i = 0; i < dst_width; ++i) {
+    *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
+    x += boxwidth;
+  }
+}
+
+// Scale plane down to any dimensions, with interpolation.
+// (boxfilter).
+//
+// Same method as SimpleScale, which is fixed point, outputting
+// one pixel of destination using fixed point (16.16) to step
+// through source, sampling a box of pixel with simple
+// averaging.
+static void ScalePlaneBox(int src_width, int src_height,
+                          int dst_width, int dst_height,
+                          int src_stride, int dst_stride,
+                          const uint8* src_ptr, uint8* dst_ptr) {
+  int j;
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  const int max_y = (src_height << 16);
+  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+  // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
+  if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
+    uint8* dst = dst_ptr;
+    int j;
+    for (j = 0; j < dst_height; ++j) {
+      int boxheight;
+      int iy = y >> 16;
+      const uint8* src = src_ptr + iy * src_stride;
+      y += dy;
+      if (y > max_y) {
+        y = max_y;
+      }
+      boxheight = (y >> 16) - iy;
+      ScalePlaneBoxRow_C(dst_width, boxheight,
+                         x, dx, src_stride,
+                         src, dst);
+      dst += dst_stride;
+    }
+    return;
+  }
+  {
+    // Allocate a row buffer of uint16.
+    align_buffer_64(row16, src_width * 2);
+    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
+        const uint16* src_ptr, uint8* dst_ptr) =
+        (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
+    void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
+        uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
+
+#if defined(HAS_SCALEADDROWS_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) &&
+#ifdef AVOID_OVERREAD
+        IS_ALIGNED(src_width, 16) &&
+#endif
+        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+      ScaleAddRows = ScaleAddRows_SSE2;
+    }
+#endif
+
+    for (j = 0; j < dst_height; ++j) {
+      int boxheight;
+      int iy = y >> 16;
+      const uint8* src = src_ptr + iy * src_stride;
+      y += dy;
+      if (y > (src_height << 16)) {
+        y = (src_height << 16);
+      }
+      boxheight = (y >> 16) - iy;
+      ScaleAddRows(src, src_stride, (uint16*)(row16),
+                 src_width, boxheight);
+      ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
+                 dst_ptr);
+      dst_ptr += dst_stride;
+    }
+    free_aligned_buffer_64(row16);
+  }
+}
+
+static void ScalePlaneBox_16(int src_width, int src_height,
+                             int dst_width, int dst_height,
+                             int src_stride, int dst_stride,
+                             const uint16* src_ptr, uint16* dst_ptr) {
+  int j;
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  const int max_y = (src_height << 16);
+  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+  // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
+  if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
+    uint16* dst = dst_ptr;
+    int j;
+    for (j = 0; j < dst_height; ++j) {
+      int boxheight;
+      int iy = y >> 16;
+      const uint16* src = src_ptr + iy * src_stride;
+      y += dy;
+      if (y > max_y) {
+        y = max_y;
+      }
+      boxheight = (y >> 16) - iy;
+      ScalePlaneBoxRow_16_C(dst_width, boxheight,
+                            x, dx, src_stride,
+                            src, dst);
+      dst += dst_stride;
+    }
+    return;
+  }
+  {
+    // Allocate a row buffer of uint32.
+    align_buffer_64(row32, src_width * 4);
+    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
+        const uint32* src_ptr, uint16* dst_ptr) =
+        (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
+    void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
+        uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
+
+#if defined(HAS_SCALEADDROWS_16_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) &&
+#ifdef AVOID_OVERREAD
+        IS_ALIGNED(src_width, 16) &&
+#endif
+        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+      ScaleAddRows = ScaleAddRows_16_SSE2;
+    }
+#endif
+
+    for (j = 0; j < dst_height; ++j) {
+      int boxheight;
+      int iy = y >> 16;
+      const uint16* src = src_ptr + iy * src_stride;
+      y += dy;
+      if (y > (src_height << 16)) {
+        y = (src_height << 16);
+      }
+      boxheight = (y >> 16) - iy;
+      ScaleAddRows(src, src_stride, (uint32*)(row32),
+                 src_width, boxheight);
+      ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
+                 dst_ptr);
+      dst_ptr += dst_stride;
+    }
+    free_aligned_buffer_64(row32);
+  }
+}
+
+// Scale plane down with bilinear interpolation.
+void ScalePlaneBilinearDown(int src_width, int src_height,
+                            int dst_width, int dst_height,
+                            int src_stride, int dst_stride,
+                            const uint8* src_ptr, uint8* dst_ptr,
+                            enum FilterMode filtering) {
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
+  // Allocate a row buffer.
+  align_buffer_64(row, src_width);
+
+  const int max_y = (src_height - 1) << 16;
+  int j;
+  void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
+      int dst_width, int x, int dx) =
+      (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
+  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_C;
+  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+
+#if defined(HAS_INTERPOLATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_SSE2;
+    if (IS_ALIGNED(src_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_SSSE3;
+    if (IS_ALIGNED(src_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
+    InterpolateRow = InterpolateRow_Any_AVX2;
+    if (IS_ALIGNED(src_width, 32)) {
+      InterpolateRow = InterpolateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_NEON;
+    if (IS_ALIGNED(src_width, 16)) {
+      InterpolateRow = InterpolateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
+    if (IS_ALIGNED(src_width, 4)) {
+      InterpolateRow = InterpolateRow_MIPS_DSPR2;
+    }
+  }
+#endif
+
+
+#if defined(HAS_SCALEFILTERCOLS_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+    ScaleFilterCols = ScaleFilterCols_SSSE3;
+  }
+#endif
+  if (y > max_y) {
+    y = max_y;
+  }
+
+  for (j = 0; j < dst_height; ++j) {
+    int yi = y >> 16;
+    const uint8* src = src_ptr + yi * src_stride;
+    if (filtering == kFilterLinear) {
+      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
+    } else {
+      int yf = (y >> 8) & 255;
+      InterpolateRow(row, src, src_stride, src_width, yf);
+      ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
+    }
+    dst_ptr += dst_stride;
+    y += dy;
+    if (y > max_y) {
+      y = max_y;
+    }
+  }
+  free_aligned_buffer_64(row);
+}
+
+void ScalePlaneBilinearDown_16(int src_width, int src_height,
+                               int dst_width, int dst_height,
+                               int src_stride, int dst_stride,
+                               const uint16* src_ptr, uint16* dst_ptr,
+                               enum FilterMode filtering) {
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
+  // Allocate a row buffer.
+  align_buffer_64(row, src_width * 2);
+
+  const int max_y = (src_height - 1) << 16;
+  int j;
+  void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
+      int dst_width, int x, int dx) =
+      (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
+  void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_16_C;
+  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+
+#if defined(HAS_INTERPOLATEROW_16_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_SSE2;
+    if (IS_ALIGNED(src_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
+      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+        InterpolateRow = InterpolateRow_16_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_SSSE3;
+    if (IS_ALIGNED(src_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
+      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
+        InterpolateRow = InterpolateRow_16_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
+    InterpolateRow = InterpolateRow_Any_16_AVX2;
+    if (IS_ALIGNED(src_width, 32)) {
+      InterpolateRow = InterpolateRow_16_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_NEON;
+    if (IS_ALIGNED(src_width, 16)) {
+      InterpolateRow = InterpolateRow_16_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
+    if (IS_ALIGNED(src_width, 4)) {
+      InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
+    }
+  }
+#endif
+
+
+#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
+  }
+#endif
+  if (y > max_y) {
+    y = max_y;
+  }
+
+  for (j = 0; j < dst_height; ++j) {
+    int yi = y >> 16;
+    const uint16* src = src_ptr + yi * src_stride;
+    if (filtering == kFilterLinear) {
+      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
+    } else {
+      int yf = (y >> 8) & 255;
+      InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
+      ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
+    }
+    dst_ptr += dst_stride;
+    y += dy;
+    if (y > max_y) {
+      y = max_y;
+    }
+  }
+  free_aligned_buffer_64(row);
+}
+
+// Scale up down with bilinear interpolation.
+void ScalePlaneBilinearUp(int src_width, int src_height,
+                          int dst_width, int dst_height,
+                          int src_stride, int dst_stride,
+                          const uint8* src_ptr, uint8* dst_ptr,
+                          enum FilterMode filtering) {
+  int j;
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  const int max_y = (src_height - 1) << 16;
+  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_C;
+  void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
+       int dst_width, int x, int dx) =
+       filtering ? ScaleFilterCols_C : ScaleCols_C;
+  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+
+#if defined(HAS_INTERPOLATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_SSE2;
+    if (IS_ALIGNED(dst_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_SSSE3;
+    if (IS_ALIGNED(dst_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
+    InterpolateRow = InterpolateRow_Any_AVX2;
+    if (IS_ALIGNED(dst_width, 32)) {
+      InterpolateRow = InterpolateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_NEON;
+    if (IS_ALIGNED(dst_width, 16)) {
+      InterpolateRow = InterpolateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
+    if (IS_ALIGNED(dst_width, 4)) {
+      InterpolateRow = InterpolateRow_MIPS_DSPR2;
+    }
+  }
+#endif
+
+  if (filtering && src_width >= 32768) {
+    ScaleFilterCols = ScaleFilterCols64_C;
+  }
+#if defined(HAS_SCALEFILTERCOLS_SSSE3)
+  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+    ScaleFilterCols = ScaleFilterCols_SSSE3;
+  }
+#endif
+  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
+    ScaleFilterCols = ScaleColsUp2_C;
+#if defined(HAS_SCALECOLS_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
+        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
+        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleFilterCols = ScaleColsUp2_SSE2;
+    }
+#endif
+  }
+
+  if (y > max_y) {
+    y = max_y;
+  }
+  {
+    int yi = y >> 16;
+    const uint8* src = src_ptr + yi * src_stride;
+
+    // Allocate 2 row buffers.
+    const int kRowSize = (dst_width + 15) & ~15;
+    align_buffer_64(row, kRowSize * 2);
+
+    uint8* rowptr = row;
+    int rowstride = kRowSize;
+    int lasty = yi;
+
+    ScaleFilterCols(rowptr, src, dst_width, x, dx);
+    if (src_height > 1) {
+      src += src_stride;
+    }
+    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
+    src += src_stride;
+
+    for (j = 0; j < dst_height; ++j) {
+      yi = y >> 16;
+      if (yi != lasty) {
+        if (y > max_y) {
+          y = max_y;
+          yi = y >> 16;
+          src = src_ptr + yi * src_stride;
+        }
+        if (yi != lasty) {
+          ScaleFilterCols(rowptr, src, dst_width, x, dx);
+          rowptr += rowstride;
+          rowstride = -rowstride;
+          lasty = yi;
+          src += src_stride;
+        }
+      }
+      if (filtering == kFilterLinear) {
+        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
+      } else {
+        int yf = (y >> 8) & 255;
+        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
+      }
+      dst_ptr += dst_stride;
+      y += dy;
+    }
+    free_aligned_buffer_64(row);
+  }
+}
+
+void ScalePlaneBilinearUp_16(int src_width, int src_height,
+                             int dst_width, int dst_height,
+                             int src_stride, int dst_stride,
+                             const uint16* src_ptr, uint16* dst_ptr,
+                             enum FilterMode filtering) {
+  int j;
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  const int max_y = (src_height - 1) << 16;
+  void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_16_C;
+  void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
+       int dst_width, int x, int dx) =
+       filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
+  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+
+#if defined(HAS_INTERPOLATEROW_16_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_SSE2;
+    if (IS_ALIGNED(dst_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
+      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_16_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_SSSE3;
+    if (IS_ALIGNED(dst_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
+      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_16_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
+    InterpolateRow = InterpolateRow_Any_16_AVX2;
+    if (IS_ALIGNED(dst_width, 32)) {
+      InterpolateRow = InterpolateRow_16_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_NEON;
+    if (IS_ALIGNED(dst_width, 16)) {
+      InterpolateRow = InterpolateRow_16_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
+    if (IS_ALIGNED(dst_width, 4)) {
+      InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
+    }
+  }
+#endif
+
+  if (filtering && src_width >= 32768) {
+    ScaleFilterCols = ScaleFilterCols64_16_C;
+  }
+#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
+  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+    ScaleFilterCols = ScaleFilterCols_16_SSSE3;
+  }
+#endif
+  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
+    ScaleFilterCols = ScaleColsUp2_16_C;
+#if defined(HAS_SCALECOLS_16_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
+        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
+        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleFilterCols = ScaleColsUp2_16_SSE2;
+    }
+#endif
+  }
+
+  if (y > max_y) {
+    y = max_y;
+  }
+  {
+    int yi = y >> 16;
+    const uint16* src = src_ptr + yi * src_stride;
+
+    // Allocate 2 row buffers.
+    const int kRowSize = (dst_width + 15) & ~15;
+    align_buffer_64(row, kRowSize * 4);
+
+    uint16* rowptr = (uint16*)row;
+    int rowstride = kRowSize;
+    int lasty = yi;
+
+    ScaleFilterCols(rowptr, src, dst_width, x, dx);
+    if (src_height > 1) {
+      src += src_stride;
+    }
+    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
+    src += src_stride;
+
+    for (j = 0; j < dst_height; ++j) {
+      yi = y >> 16;
+      if (yi != lasty) {
+        if (y > max_y) {
+          y = max_y;
+          yi = y >> 16;
+          src = src_ptr + yi * src_stride;
+        }
+        if (yi != lasty) {
+          ScaleFilterCols(rowptr, src, dst_width, x, dx);
+          rowptr += rowstride;
+          rowstride = -rowstride;
+          lasty = yi;
+          src += src_stride;
+        }
+      }
+      if (filtering == kFilterLinear) {
+        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
+      } else {
+        int yf = (y >> 8) & 255;
+        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
+      }
+      dst_ptr += dst_stride;
+      y += dy;
+    }
+    free_aligned_buffer_64(row);
+  }
+}
+
+// Scale Plane to/from any dimensions, without interpolation.
+// Fixed point math is used for performance: The upper 16 bits
+// of x and dx is the integer part of the source position and
+// the lower 16 bits are the fixed decimal part.
+
+static void ScalePlaneSimple(int src_width, int src_height,
+                             int dst_width, int dst_height,
+                             int src_stride, int dst_stride,
+                             const uint8* src_ptr, uint8* dst_ptr) {
+  int i;
+  void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
+      int dst_width, int x, int dx) = ScaleCols_C;
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+
+  if (src_width * 2 == dst_width && x < 0x8000) {
+    ScaleCols = ScaleColsUp2_C;
+#if defined(HAS_SCALECOLS_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
+        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
+        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleCols = ScaleColsUp2_SSE2;
+    }
+#endif
+  }
+
+  for (i = 0; i < dst_height; ++i) {
+    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
+              dst_width, x, dx);
+    dst_ptr += dst_stride;
+    y += dy;
+  }
+}
+
+static void ScalePlaneSimple_16(int src_width, int src_height,
+                                int dst_width, int dst_height,
+                                int src_stride, int dst_stride,
+                                const uint16* src_ptr, uint16* dst_ptr) {
+  int i;
+  void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
+      int dst_width, int x, int dx) = ScaleCols_16_C;
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+
+  if (src_width * 2 == dst_width && x < 0x8000) {
+    ScaleCols = ScaleColsUp2_16_C;
+#if defined(HAS_SCALECOLS_16_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
+        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
+        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleCols = ScaleColsUp2_16_SSE2;
+    }
+#endif
+  }
+
+  for (i = 0; i < dst_height; ++i) {
+    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
+              dst_width, x, dx);
+    dst_ptr += dst_stride;
+    y += dy;
+  }
+}
+
+// Scale a plane.
+// This function dispatches to a specialized scaler based on scale factor.
+
+LIBYUV_API
+void ScalePlane(const uint8* src, int src_stride,
+                int src_width, int src_height,
+                uint8* dst, int dst_stride,
+                int dst_width, int dst_height,
+                enum FilterMode filtering) {
+  // Simplify filtering when possible.
+  filtering = ScaleFilterReduce(src_width, src_height,
+                                dst_width, dst_height,
+                                filtering);
+
+  // Negative height means invert the image.
+  if (src_height < 0) {
+    src_height = -src_height;
+    src = src + (src_height - 1) * src_stride;
+    src_stride = -src_stride;
+  }
+
+  // Use specialized scales to improve performance for common resolutions.
+  // For example, all the 1/2 scalings will use ScalePlaneDown2()
+  if (dst_width == src_width && dst_height == src_height) {
+    // Straight copy.
+    CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
+    return;
+  }
+  if (dst_width == src_width) {
+    int dy = FixedDiv(src_height, dst_height);
+    // Arbitrary scale vertically, but unscaled vertically.
+    ScalePlaneVertical(src_height,
+                       dst_width, dst_height,
+                       src_stride, dst_stride, src, dst,
+                       0, 0, dy, 1, filtering);
+    return;
+  }
+  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
+    // Scale down.
+    if (4 * dst_width == 3 * src_width &&
+        4 * dst_height == 3 * src_height) {
+      // optimized, 3/4
+      ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
+                       src_stride, dst_stride, src, dst, filtering);
+      return;
+    }
+    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
+      // optimized, 1/2
+      ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
+                      src_stride, dst_stride, src, dst, filtering);
+      return;
+    }
+    // 3/8 rounded up for odd sized chroma height.
+    if (8 * dst_width == 3 * src_width &&
+        dst_height == ((src_height * 3 + 7) / 8)) {
+      // optimized, 3/8
+      ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
+                       src_stride, dst_stride, src, dst, filtering);
+      return;
+    }
+    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
+               filtering != kFilterBilinear) {
+      // optimized, 1/4
+      ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
+                      src_stride, dst_stride, src, dst, filtering);
+      return;
+    }
+  }
+  if (filtering == kFilterBox && dst_height * 2 < src_height) {
+    ScalePlaneBox(src_width, src_height, dst_width, dst_height,
+                  src_stride, dst_stride, src, dst);
+    return;
+  }
+  if (filtering && dst_height > src_height) {
+    ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
+                         src_stride, dst_stride, src, dst, filtering);
+    return;
+  }
+  if (filtering) {
+    ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
+                           src_stride, dst_stride, src, dst, filtering);
+    return;
+  }
+  ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
+                   src_stride, dst_stride, src, dst);
+}
+
+LIBYUV_API
+void ScalePlane_16(const uint16* src, int src_stride,
+                  int src_width, int src_height,
+                  uint16* dst, int dst_stride,
+                  int dst_width, int dst_height,
+                  enum FilterMode filtering) {
+  // Simplify filtering when possible.
+  filtering = ScaleFilterReduce(src_width, src_height,
+                                dst_width, dst_height,
+                                filtering);
+
+  // Negative height means invert the image.
+  if (src_height < 0) {
+    src_height = -src_height;
+    src = src + (src_height - 1) * src_stride;
+    src_stride = -src_stride;
+  }
+
+  // Use specialized scales to improve performance for common resolutions.
+  // For example, all the 1/2 scalings will use ScalePlaneDown2()
+  if (dst_width == src_width && dst_height == src_height) {
+    // Straight copy.
+    CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
+    return;
+  }
+  if (dst_width == src_width) {
+    int dy = FixedDiv(src_height, dst_height);
+    // Arbitrary scale vertically, but unscaled vertically.
+    ScalePlaneVertical_16(src_height,
+                          dst_width, dst_height,
+                          src_stride, dst_stride, src, dst,
+                          0, 0, dy, 1, filtering);
+    return;
+  }
+  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
+    // Scale down.
+    if (4 * dst_width == 3 * src_width &&
+        4 * dst_height == 3 * src_height) {
+      // optimized, 3/4
+      ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
+                          src_stride, dst_stride, src, dst, filtering);
+      return;
+    }
+    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
+      // optimized, 1/2
+      ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
+                         src_stride, dst_stride, src, dst, filtering);
+      return;
+    }
+    // 3/8 rounded up for odd sized chroma height.
+    if (8 * dst_width == 3 * src_width &&
+        dst_height == ((src_height * 3 + 7) / 8)) {
+      // optimized, 3/8
+      ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
+                          src_stride, dst_stride, src, dst, filtering);
+      return;
+    }
+    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
+               filtering != kFilterBilinear) {
+      // optimized, 1/4
+      ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
+                         src_stride, dst_stride, src, dst, filtering);
+      return;
+    }
+  }
+  if (filtering == kFilterBox && dst_height * 2 < src_height) {
+    ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
+                     src_stride, dst_stride, src, dst);
+    return;
+  }
+  if (filtering && dst_height > src_height) {
+    ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
+                            src_stride, dst_stride, src, dst, filtering);
+    return;
+  }
+  if (filtering) {
+    ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
+                              src_stride, dst_stride, src, dst, filtering);
+    return;
+  }
+  ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
+                      src_stride, dst_stride, src, dst);
+}
+
+// Scale an I420 image.
+// This function in turn calls a scaling function for each plane.
+
+LIBYUV_API
+int I420Scale(const uint8* src_y, int src_stride_y,
+              const uint8* src_u, int src_stride_u,
+              const uint8* src_v, int src_stride_v,
+              int src_width, int src_height,
+              uint8* dst_y, int dst_stride_y,
+              uint8* dst_u, int dst_stride_u,
+              uint8* dst_v, int dst_stride_v,
+              int dst_width, int dst_height,
+              enum FilterMode filtering) {
+  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
+  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
+  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+      !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
+    return -1;
+  }
+
+  ScalePlane(src_y, src_stride_y, src_width, src_height,
+             dst_y, dst_stride_y, dst_width, dst_height,
+             filtering);
+  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
+             dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
+             filtering);
+  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
+             dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
+             filtering);
+  return 0;
+}
+
+LIBYUV_API
+int I420Scale_16(const uint16* src_y, int src_stride_y,
+                 const uint16* src_u, int src_stride_u,
+                 const uint16* src_v, int src_stride_v,
+                 int src_width, int src_height,
+                 uint16* dst_y, int dst_stride_y,
+                 uint16* dst_u, int dst_stride_u,
+                 uint16* dst_v, int dst_stride_v,
+                 int dst_width, int dst_height,
+                 enum FilterMode filtering) {
+  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
+  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
+  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+      !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
+    return -1;
+  }
+
+  ScalePlane_16(src_y, src_stride_y, src_width, src_height,
+                dst_y, dst_stride_y, dst_width, dst_height,
+                filtering);
+  ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
+                dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
+                filtering);
+  ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
+                dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
+                filtering);
+  return 0;
+}
+
+// Deprecated api
+LIBYUV_API
+int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
+          int src_stride_y, int src_stride_u, int src_stride_v,
+          int src_width, int src_height,
+          uint8* dst_y, uint8* dst_u, uint8* dst_v,
+          int dst_stride_y, int dst_stride_u, int dst_stride_v,
+          int dst_width, int dst_height,
+          LIBYUV_BOOL interpolate) {
+  return I420Scale(src_y, src_stride_y,
+                   src_u, src_stride_u,
+                   src_v, src_stride_v,
+                   src_width, src_height,
+                   dst_y, dst_stride_y,
+                   dst_u, dst_stride_u,
+                   dst_v, dst_stride_v,
+                   dst_width, dst_height,
+                   interpolate ? kFilterBox : kFilterNone);
+}
+
+// Deprecated api
+LIBYUV_API
+int ScaleOffset(const uint8* src, int src_width, int src_height,
+                uint8* dst, int dst_width, int dst_height, int dst_yoffset,
+                LIBYUV_BOOL interpolate) {
+  // Chroma requires offset to multiple of 2.
+  int dst_yoffset_even = dst_yoffset & ~1;
+  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
+  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
+  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
+  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
+  int aheight = dst_height - dst_yoffset_even * 2;  // actual output height
+  const uint8* src_y = src;
+  const uint8* src_u = src + src_width * src_height;
+  const uint8* src_v = src + src_width * src_height +
+                             src_halfwidth * src_halfheight;
+  uint8* dst_y = dst + dst_yoffset_even * dst_width;
+  uint8* dst_u = dst + dst_width * dst_height +
+                 (dst_yoffset_even >> 1) * dst_halfwidth;
+  uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
+                 (dst_yoffset_even >> 1) * dst_halfwidth;
+  if (!src || src_width <= 0 || src_height <= 0 ||
+      !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
+      dst_yoffset_even >= dst_height) {
+    return -1;
+  }
+  return I420Scale(src_y, src_width,
+                   src_u, src_halfwidth,
+                   src_v, src_halfwidth,
+                   src_width, src_height,
+                   dst_y, dst_width,
+                   dst_u, dst_halfwidth,
+                   dst_v, dst_halfwidth,
+                   dst_width, aheight,
+                   interpolate ? kFilterBox : kFilterNone);
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/scale_argb.cc b/src/main/jni/libyuv/source/scale_argb.cc
new file mode 100644
index 000000000..e339cd7c7
--- /dev/null
+++ b/src/main/jni/libyuv/source/scale_argb.cc
@@ -0,0 +1,809 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/scale.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"  // For CopyARGB
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+static __inline int Abs(int v) {
+  return v >= 0 ? v : -v;
+}
+
+// ScaleARGB ARGB, 1/2
+// This is an optimized version for scaling down a ARGB to 1/2 of
+// its original size.
+static void ScaleARGBDown2(int src_width, int src_height,
+                           int dst_width, int dst_height,
+                           int src_stride, int dst_stride,
+                           const uint8* src_argb, uint8* dst_argb,
+                           int x, int dx, int y, int dy,
+                           enum FilterMode filtering) {
+  int j;
+  int row_stride = src_stride * (dy >> 16);
+  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
+                            uint8* dst_argb, int dst_width) =
+    filtering == kFilterNone ? ScaleARGBRowDown2_C :
+        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
+        ScaleARGBRowDown2Box_C);
+  assert(dx == 65536 * 2);  // Test scale factor of 2.
+  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
+  // Advance to odd row, even column.
+  if (filtering == kFilterBilinear) {
+    src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
+  } else {
+    src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
+  }
+
+#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+    ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
+        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
+        ScaleARGBRowDown2Box_SSE2);
+  }
+#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
+      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
+    ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
+        ScaleARGBRowDown2_NEON;
+  }
+#endif
+
+  if (filtering == kFilterLinear) {
+    src_stride = 0;
+  }
+  for (j = 0; j < dst_height; ++j) {
+    ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
+    src_argb += row_stride;
+    dst_argb += dst_stride;
+  }
+}
+
+// ScaleARGB ARGB, 1/4
+// This is an optimized version for scaling down a ARGB to 1/4 of
+// its original size.
+static void ScaleARGBDown4Box(int src_width, int src_height,
+                              int dst_width, int dst_height,
+                              int src_stride, int dst_stride,
+                              const uint8* src_argb, uint8* dst_argb,
+                              int x, int dx, int y, int dy) {
+  int j;
+  // Allocate 2 rows of ARGB.
+  const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
+  align_buffer_64(row, kRowSize * 2);
+  int row_stride = src_stride * (dy >> 16);
+  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
+    uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
+  // Advance to odd row, even column.
+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
+  assert(dx == 65536 * 4);  // Test scale factor of 4.
+  assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
+#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
+  }
+#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
+      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
+    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
+  }
+#endif
+  for (j = 0; j < dst_height; ++j) {
+    ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
+    ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
+                      row + kRowSize, dst_width * 2);
+    ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
+    src_argb += row_stride;
+    dst_argb += dst_stride;
+  }
+  free_aligned_buffer_64(row);
+}
+
+// ScaleARGB ARGB Even
+// This is an optimized version for scaling down a ARGB to even
+// multiple of its original size.
+static void ScaleARGBDownEven(int src_width, int src_height,
+                              int dst_width, int dst_height,
+                              int src_stride, int dst_stride,
+                              const uint8* src_argb, uint8* dst_argb,
+                              int x, int dx, int y, int dy,
+                              enum FilterMode filtering) {
+  int j;
+  int col_step = dx >> 16;
+  int row_stride = (dy >> 16) * src_stride;
+  void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
+                               int src_step, uint8* dst_argb, int dst_width) =
+      filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
+  assert(IS_ALIGNED(src_width, 2));
+  assert(IS_ALIGNED(src_height, 2));
+  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
+#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
+        ScaleARGBRowDownEven_SSE2;
+  }
+#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
+      IS_ALIGNED(src_argb, 4)) {
+    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
+        ScaleARGBRowDownEven_NEON;
+  }
+#endif
+
+  if (filtering == kFilterLinear) {
+    src_stride = 0;
+  }
+  for (j = 0; j < dst_height; ++j) {
+    ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
+    src_argb += row_stride;
+    dst_argb += dst_stride;
+  }
+}
+
+// Scale ARGB down with bilinear interpolation.
+static void ScaleARGBBilinearDown(int src_width, int src_height,
+                                  int dst_width, int dst_height,
+                                  int src_stride, int dst_stride,
+                                  const uint8* src_argb, uint8* dst_argb,
+                                  int x, int dx, int y, int dy,
+                                  enum FilterMode filtering) {
+  int j;
+  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_C;
+  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
+      int dst_width, int x, int dx) =
+      (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
+  int64 xlast = x + (int64)(dst_width - 1) * dx;
+  int64 xl = (dx >= 0) ? x : xlast;
+  int64 xr = (dx >= 0) ? xlast : x;
+  int clip_src_width;
+  xl = (xl >> 16) & ~3;  // Left edge aligned.
+  xr = (xr >> 16) + 1;  // Right most pixel used.  Bilinear uses 2 pixels.
+  xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
+  if (xr > src_width) {
+    xr = src_width;
+  }
+  clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
+  src_argb += xl * 4;
+  x -= (int)(xl << 16);
+#if defined(HAS_INTERPOLATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_SSE2;
+    if (IS_ALIGNED(clip_src_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_SSSE3;
+    if (IS_ALIGNED(clip_src_width, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
+    InterpolateRow = InterpolateRow_Any_AVX2;
+    if (IS_ALIGNED(clip_src_width, 32)) {
+      InterpolateRow = InterpolateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
+    InterpolateRow = InterpolateRow_Any_NEON;
+    if (IS_ALIGNED(clip_src_width, 16)) {
+      InterpolateRow = InterpolateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
+      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
+    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
+    if (IS_ALIGNED(clip_src_width, 4)) {
+      InterpolateRow = InterpolateRow_MIPS_DSPR2;
+    }
+  }
+#endif
+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
+  }
+#endif
+  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
+  // Allocate a row of ARGB.
+  {
+    align_buffer_64(row, clip_src_width * 4);
+
+    const int max_y = (src_height - 1) << 16;
+    if (y > max_y) {
+      y = max_y;
+    }
+    for (j = 0; j < dst_height; ++j) {
+      int yi = y >> 16;
+      const uint8* src = src_argb + yi * src_stride;
+      if (filtering == kFilterLinear) {
+        ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
+      } else {
+        int yf = (y >> 8) & 255;
+        InterpolateRow(row, src, src_stride, clip_src_width, yf);
+        ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
+      }
+      dst_argb += dst_stride;
+      y += dy;
+      if (y > max_y) {
+        y = max_y;
+      }
+    }
+    free_aligned_buffer_64(row);
+  }
+}
+
+// Scale ARGB up with bilinear interpolation.
+static void ScaleARGBBilinearUp(int src_width, int src_height,
+                                int dst_width, int dst_height,
+                                int src_stride, int dst_stride,
+                                const uint8* src_argb, uint8* dst_argb,
+                                int x, int dx, int y, int dy,
+                                enum FilterMode filtering) {
+  int j;
+  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_C;
+  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
+      int dst_width, int x, int dx) =
+      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
+  const int max_y = (src_height - 1) << 16;
+#if defined(HAS_INTERPOLATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_SSE2;
+    if (IS_ALIGNED(dst_width, 4)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_SSSE3;
+    if (IS_ALIGNED(dst_width, 4)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
+    InterpolateRow = InterpolateRow_Any_AVX2;
+    if (IS_ALIGNED(dst_width, 8)) {
+      InterpolateRow = InterpolateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_NEON;
+    if (IS_ALIGNED(dst_width, 4)) {
+      InterpolateRow = InterpolateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
+    InterpolateRow = InterpolateRow_MIPS_DSPR2;
+  }
+#endif
+  if (src_width >= 32768) {
+    ScaleARGBFilterCols = filtering ?
+        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
+  }
+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
+  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
+  }
+#endif
+#if defined(HAS_SCALEARGBCOLS_SSE2)
+  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
+    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
+  }
+#endif
+  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
+    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
+        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
+        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
+    }
+#endif
+  }
+
+  if (y > max_y) {
+    y = max_y;
+  }
+
+  {
+    int yi = y >> 16;
+    const uint8* src = src_argb + yi * src_stride;
+
+    // Allocate 2 rows of ARGB.
+    const int kRowSize = (dst_width * 4 + 15) & ~15;
+    align_buffer_64(row, kRowSize * 2);
+
+    uint8* rowptr = row;
+    int rowstride = kRowSize;
+    int lasty = yi;
+
+    ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
+    if (src_height > 1) {
+      src += src_stride;
+    }
+    ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
+    src += src_stride;
+
+    for (j = 0; j < dst_height; ++j) {
+      yi = y >> 16;
+      if (yi != lasty) {
+        if (y > max_y) {
+          y = max_y;
+          yi = y >> 16;
+          src = src_argb + yi * src_stride;
+        }
+        if (yi != lasty) {
+          ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
+          rowptr += rowstride;
+          rowstride = -rowstride;
+          lasty = yi;
+          src += src_stride;
+        }
+      }
+      if (filtering == kFilterLinear) {
+        InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
+      } else {
+        int yf = (y >> 8) & 255;
+        InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
+      }
+      dst_argb += dst_stride;
+      y += dy;
+    }
+    free_aligned_buffer_64(row);
+  }
+}
+
+#ifdef YUVSCALEUP
+// Scale YUV to ARGB up with bilinear interpolation.
+static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
+                                     int dst_width, int dst_height,
+                                     int src_stride_y,
+                                     int src_stride_u,
+                                     int src_stride_v,
+                                     int dst_stride_argb,
+                                     const uint8* src_y,
+                                     const uint8* src_u,
+                                     const uint8* src_v,
+                                     uint8* dst_argb,
+                                     int x, int dx, int y, int dy,
+                                     enum FilterMode filtering) {
+  int j;
+  void (*I422ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToARGBRow_C;
+#if defined(HAS_I422TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(src_width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I422ToARGBRow = I422ToARGBRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(src_width, 16)) {
+      I422ToARGBRow = I422ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(src_width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+  }
+#endif
+
+  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_C;
+#if defined(HAS_INTERPOLATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_SSE2;
+    if (IS_ALIGNED(dst_width, 4)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        InterpolateRow = InterpolateRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_SSSE3;
+    if (IS_ALIGNED(dst_width, 4)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        InterpolateRow = InterpolateRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
+    InterpolateRow = InterpolateRow_Any_AVX2;
+    if (IS_ALIGNED(dst_width, 8)) {
+      InterpolateRow = InterpolateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
+    InterpolateRow = InterpolateRow_Any_NEON;
+    if (IS_ALIGNED(dst_width, 4)) {
+      InterpolateRow = InterpolateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    InterpolateRow = InterpolateRow_MIPS_DSPR2;
+  }
+#endif
+
+  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
+      int dst_width, int x, int dx) =
+      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
+  if (src_width >= 32768) {
+    ScaleARGBFilterCols = filtering ?
+        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
+  }
+#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
+  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
+    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
+  }
+#endif
+#if defined(HAS_SCALEARGBCOLS_SSE2)
+  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
+    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
+  }
+#endif
+  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
+    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
+        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
+        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
+    }
+#endif
+  }
+
+  const int max_y = (src_height - 1) << 16;
+  if (y > max_y) {
+    y = max_y;
+  }
+  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
+  int yi = y >> 16;
+  int uv_yi = yi >> kYShift;
+  const uint8* src_row_y = src_y + yi * src_stride_y;
+  const uint8* src_row_u = src_u + uv_yi * src_stride_u;
+  const uint8* src_row_v = src_v + uv_yi * src_stride_v;
+
+  // Allocate 2 rows of ARGB.
+  const int kRowSize = (dst_width * 4 + 15) & ~15;
+  align_buffer_64(row, kRowSize * 2);
+
+  // Allocate 1 row of ARGB for source conversion.
+  align_buffer_64(argb_row, src_width * 4);
+
+  uint8* rowptr = row;
+  int rowstride = kRowSize;
+  int lasty = yi;
+
+  // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
+  ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
+  if (src_height > 1) {
+    src_row_y += src_stride_y;
+    if (yi & 1) {
+      src_row_u += src_stride_u;
+      src_row_v += src_stride_v;
+    }
+  }
+  ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
+  if (src_height > 2) {
+    src_row_y += src_stride_y;
+    if (!(yi & 1)) {
+      src_row_u += src_stride_u;
+      src_row_v += src_stride_v;
+    }
+  }
+
+  for (j = 0; j < dst_height; ++j) {
+    yi = y >> 16;
+    if (yi != lasty) {
+      if (y > max_y) {
+        y = max_y;
+        yi = y >> 16;
+        uv_yi = yi >> kYShift;
+        src_row_y = src_y + yi * src_stride_y;
+        src_row_u = src_u + uv_yi * src_stride_u;
+        src_row_v = src_v + uv_yi * src_stride_v;
+      }
+      if (yi != lasty) {
+        // TODO(fbarchard): Convert the clipped region of row.
+        I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
+        ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
+        rowptr += rowstride;
+        rowstride = -rowstride;
+        lasty = yi;
+        src_row_y += src_stride_y;
+        if (yi & 1) {
+          src_row_u += src_stride_u;
+          src_row_v += src_stride_v;
+        }
+      }
+    }
+    if (filtering == kFilterLinear) {
+      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
+    } else {
+      int yf = (y >> 8) & 255;
+      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
+    }
+    dst_argb += dst_stride_argb;
+    y += dy;
+  }
+  free_aligned_buffer_64(row);
+  free_aligned_buffer_64(row_argb);
+}
+#endif
+
+// Scale ARGB to/from any dimensions, without interpolation.
+// Fixed point math is used for performance: The upper 16 bits
+// of x and dx is the integer part of the source position and
+// the lower 16 bits are the fixed decimal part.
+
+static void ScaleARGBSimple(int src_width, int src_height,
+                            int dst_width, int dst_height,
+                            int src_stride, int dst_stride,
+                            const uint8* src_argb, uint8* dst_argb,
+                            int x, int dx, int y, int dy) {
+  int j;
+  void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
+      int dst_width, int x, int dx) =
+      (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
+#if defined(HAS_SCALEARGBCOLS_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
+    ScaleARGBCols = ScaleARGBCols_SSE2;
+  }
+#endif
+  if (src_width * 2 == dst_width && x < 0x8000) {
+    ScaleARGBCols = ScaleARGBColsUp2_C;
+#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
+    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
+        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
+        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+      ScaleARGBCols = ScaleARGBColsUp2_SSE2;
+    }
+#endif
+  }
+
+  for (j = 0; j < dst_height; ++j) {
+    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
+                  dst_width, x, dx);
+    dst_argb += dst_stride;
+    y += dy;
+  }
+}
+
+// ScaleARGB a ARGB.
+// This function in turn calls a scaling function
+// suitable for handling the desired resolutions.
+static void ScaleARGB(const uint8* src, int src_stride,
+                      int src_width, int src_height,
+                      uint8* dst, int dst_stride,
+                      int dst_width, int dst_height,
+                      int clip_x, int clip_y, int clip_width, int clip_height,
+                      enum FilterMode filtering) {
+  // Initial source x/y coordinate and step values as 16.16 fixed point.
+  int x = 0;
+  int y = 0;
+  int dx = 0;
+  int dy = 0;
+  // ARGB does not support box filter yet, but allow the user to pass it.
+  // Simplify filtering when possible.
+  filtering = ScaleFilterReduce(src_width, src_height,
+                                dst_width, dst_height,
+                                filtering);
+
+  // Negative src_height means invert the image.
+  if (src_height < 0) {
+    src_height = -src_height;
+    src = src + (src_height - 1) * src_stride;
+    src_stride = -src_stride;
+  }
+  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
+             &x, &y, &dx, &dy);
+  src_width = Abs(src_width);
+  if (clip_x) {
+    int64 clipf = (int64)(clip_x) * dx;
+    x += (clipf & 0xffff);
+    src += (clipf >> 16) * 4;
+    dst += clip_x * 4;
+  }
+  if (clip_y) {
+    int64 clipf = (int64)(clip_y) * dy;
+    y += (clipf & 0xffff);
+    src += (clipf >> 16) * src_stride;
+    dst += clip_y * dst_stride;
+  }
+
+  // Special case for integer step values.
+  if (((dx | dy) & 0xffff) == 0) {
+    if (!dx || !dy) {  // 1 pixel wide and/or tall.
+      filtering = kFilterNone;
+    } else {
+      // Optimized even scale down. ie 2, 4, 6, 8, 10x.
+      if (!(dx & 0x10000) && !(dy & 0x10000)) {
+        if (dx == 0x20000) {
+          // Optimized 1/2 downsample.
+          ScaleARGBDown2(src_width, src_height,
+                         clip_width, clip_height,
+                         src_stride, dst_stride, src, dst,
+                         x, dx, y, dy, filtering);
+          return;
+        }
+        if (dx == 0x40000 && filtering == kFilterBox) {
+          // Optimized 1/4 box downsample.
+          ScaleARGBDown4Box(src_width, src_height,
+                            clip_width, clip_height,
+                            src_stride, dst_stride, src, dst,
+                            x, dx, y, dy);
+          return;
+        }
+        ScaleARGBDownEven(src_width, src_height,
+                          clip_width, clip_height,
+                          src_stride, dst_stride, src, dst,
+                          x, dx, y, dy, filtering);
+        return;
+      }
+      // Optimized odd scale down. ie 3, 5, 7, 9x.
+      if ((dx & 0x10000) && (dy & 0x10000)) {
+        filtering = kFilterNone;
+        if (dx == 0x10000 && dy == 0x10000) {
+          // Straight copy.
+          ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
+                   dst, dst_stride, clip_width, clip_height);
+          return;
+        }
+      }
+    }
+  }
+  if (dx == 0x10000 && (x & 0xffff) == 0) {
+    // Arbitrary scale vertically, but unscaled vertically.
+    ScalePlaneVertical(src_height,
+                       clip_width, clip_height,
+                       src_stride, dst_stride, src, dst,
+                       x, y, dy, 4, filtering);
+    return;
+  }
+  if (filtering && dy < 65536) {
+    ScaleARGBBilinearUp(src_width, src_height,
+                        clip_width, clip_height,
+                        src_stride, dst_stride, src, dst,
+                        x, dx, y, dy, filtering);
+    return;
+  }
+  if (filtering) {
+    ScaleARGBBilinearDown(src_width, src_height,
+                          clip_width, clip_height,
+                          src_stride, dst_stride, src, dst,
+                          x, dx, y, dy, filtering);
+    return;
+  }
+  ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
+                  src_stride, dst_stride, src, dst,
+                  x, dx, y, dy);
+}
+
+LIBYUV_API
+int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
+                  int src_width, int src_height,
+                  uint8* dst_argb, int dst_stride_argb,
+                  int dst_width, int dst_height,
+                  int clip_x, int clip_y, int clip_width, int clip_height,
+                  enum FilterMode filtering) {
+  if (!src_argb || src_width == 0 || src_height == 0 ||
+      !dst_argb || dst_width <= 0 || dst_height <= 0 ||
+      clip_x < 0 || clip_y < 0 ||
+      (clip_x + clip_width) > dst_width ||
+      (clip_y + clip_height) > dst_height) {
+    return -1;
+  }
+  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
+            dst_argb, dst_stride_argb, dst_width, dst_height,
+            clip_x, clip_y, clip_width, clip_height, filtering);
+  return 0;
+}
+
+// Scale an ARGB image.
+LIBYUV_API
+int ARGBScale(const uint8* src_argb, int src_stride_argb,
+              int src_width, int src_height,
+              uint8* dst_argb, int dst_stride_argb,
+              int dst_width, int dst_height,
+              enum FilterMode filtering) {
+  if (!src_argb || src_width == 0 || src_height == 0 ||
+      !dst_argb || dst_width <= 0 || dst_height <= 0) {
+    return -1;
+  }
+  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
+            dst_argb, dst_stride_argb, dst_width, dst_height,
+            0, 0, dst_width, dst_height, filtering);
+  return 0;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/scale_common.cc b/src/main/jni/libyuv/source/scale_common.cc
new file mode 100644
index 000000000..e4b2acc41
--- /dev/null
+++ b/src/main/jni/libyuv/source/scale_common.cc
@@ -0,0 +1,1165 @@
+/*
+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/scale.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "libyuv/cpu_id.h"
+#include "libyuv/planar_functions.h"  // For CopyARGB
+#include "libyuv/row.h"
+#include "libyuv/scale_row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+static __inline int Abs(int v) {
+  return v >= 0 ? v : -v;
+}
+
+// CPU agnostic row functions
+void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                     uint8* dst, int dst_width) {
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = src_ptr[1];
+    dst[1] = src_ptr[3];
+    dst += 2;
+    src_ptr += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = src_ptr[1];
+  }
+}
+
+void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                        uint16* dst, int dst_width) {
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = src_ptr[1];
+    dst[1] = src_ptr[3];
+    dst += 2;
+    src_ptr += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = src_ptr[1];
+  }
+}
+
+void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst, int dst_width) {
+  const uint8* s = src_ptr;
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = (s[0] + s[1] + 1) >> 1;
+    dst[1] = (s[2] + s[3] + 1) >> 1;
+    dst += 2;
+    s += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = (s[0] + s[1] + 1) >> 1;
+  }
+}
+
+void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                              uint16* dst, int dst_width) {
+  const uint16* s = src_ptr;
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = (s[0] + s[1] + 1) >> 1;
+    dst[1] = (s[2] + s[3] + 1) >> 1;
+    dst += 2;
+    s += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = (s[0] + s[1] + 1) >> 1;
+  }
+}
+
+void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst, int dst_width) {
+  const uint8* s = src_ptr;
+  const uint8* t = src_ptr + src_stride;
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
+    dst += 2;
+    s += 4;
+    t += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+  }
+}
+
+void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst, int dst_width) {
+  const uint16* s = src_ptr;
+  const uint16* t = src_ptr + src_stride;
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
+    dst += 2;
+    s += 4;
+    t += 4;
+  }
+  if (dst_width & 1) {
+    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
+  }
+}
+
+void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                     uint8* dst, int dst_width) {
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = src_ptr[2];
+    dst[1] = src_ptr[6];
+    dst += 2;
+    src_ptr += 8;
+  }
+  if (dst_width & 1) {
+    dst[0] = src_ptr[2];
+  }
+}
+
+void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                        uint16* dst, int dst_width) {
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = src_ptr[2];
+    dst[1] = src_ptr[6];
+    dst += 2;
+    src_ptr += 8;
+  }
+  if (dst_width & 1) {
+    dst[0] = src_ptr[2];
+  }
+}
+
+void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst, int dst_width) {
+  intptr_t stride = src_stride;
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
+             src_ptr[stride + 0] + src_ptr[stride + 1] +
+             src_ptr[stride + 2] + src_ptr[stride + 3] +
+             src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
+             src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
+             src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
+             src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
+             8) >> 4;
+    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
+             src_ptr[stride + 4] + src_ptr[stride + 5] +
+             src_ptr[stride + 6] + src_ptr[stride + 7] +
+             src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
+             src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
+             src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
+             src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
+             8) >> 4;
+    dst += 2;
+    src_ptr += 8;
+  }
+  if (dst_width & 1) {
+    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
+             src_ptr[stride + 0] + src_ptr[stride + 1] +
+             src_ptr[stride + 2] + src_ptr[stride + 3] +
+             src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
+             src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
+             src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
+             src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
+             8) >> 4;
+  }
+}
+
+void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst, int dst_width) {
+  intptr_t stride = src_stride;
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
+             src_ptr[stride + 0] + src_ptr[stride + 1] +
+             src_ptr[stride + 2] + src_ptr[stride + 3] +
+             src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
+             src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
+             src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
+             src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
+             8) >> 4;
+    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
+             src_ptr[stride + 4] + src_ptr[stride + 5] +
+             src_ptr[stride + 6] + src_ptr[stride + 7] +
+             src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
+             src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
+             src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
+             src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
+             8) >> 4;
+    dst += 2;
+    src_ptr += 8;
+  }
+  if (dst_width & 1) {
+    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
+             src_ptr[stride + 0] + src_ptr[stride + 1] +
+             src_ptr[stride + 2] + src_ptr[stride + 3] +
+             src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
+             src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
+             src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
+             src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
+             8) >> 4;
+  }
+}
+
+void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                      uint8* dst, int dst_width) {
+  int x;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (x = 0; x < dst_width; x += 3) {
+    dst[0] = src_ptr[0];
+    dst[1] = src_ptr[1];
+    dst[2] = src_ptr[3];
+    dst += 3;
+    src_ptr += 4;
+  }
+}
+
+void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                         uint16* dst, int dst_width) {
+  int x;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (x = 0; x < dst_width; x += 3) {
+    dst[0] = src_ptr[0];
+    dst[1] = src_ptr[1];
+    dst[2] = src_ptr[3];
+    dst += 3;
+    src_ptr += 4;
+  }
+}
+
+// Filter rows 0 and 1 together, 3 : 1
+void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* d, int dst_width) {
+  const uint8* s = src_ptr;
+  const uint8* t = src_ptr + src_stride;
+  int x;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (x = 0; x < dst_width; x += 3) {
+    uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+    uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+    uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+    uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+    uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+    uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+    d[0] = (a0 * 3 + b0 + 2) >> 2;
+    d[1] = (a1 * 3 + b1 + 2) >> 2;
+    d[2] = (a2 * 3 + b2 + 2) >> 2;
+    d += 3;
+    s += 4;
+    t += 4;
+  }
+}
+
+void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* d, int dst_width) {
+  const uint16* s = src_ptr;
+  const uint16* t = src_ptr + src_stride;
+  int x;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (x = 0; x < dst_width; x += 3) {
+    uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+    uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+    uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+    uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+    uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+    uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+    d[0] = (a0 * 3 + b0 + 2) >> 2;
+    d[1] = (a1 * 3 + b1 + 2) >> 2;
+    d[2] = (a2 * 3 + b2 + 2) >> 2;
+    d += 3;
+    s += 4;
+    t += 4;
+  }
+}
+
+// Filter rows 1 and 2 together, 1 : 1
+void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* d, int dst_width) {
+  const uint8* s = src_ptr;
+  const uint8* t = src_ptr + src_stride;
+  int x;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (x = 0; x < dst_width; x += 3) {
+    uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+    uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+    uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+    uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+    uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+    uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+    d[0] = (a0 + b0 + 1) >> 1;
+    d[1] = (a1 + b1 + 1) >> 1;
+    d[2] = (a2 + b2 + 1) >> 1;
+    d += 3;
+    s += 4;
+    t += 4;
+  }
+}
+
+void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* d, int dst_width) {
+  const uint16* s = src_ptr;
+  const uint16* t = src_ptr + src_stride;
+  int x;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (x = 0; x < dst_width; x += 3) {
+    uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
+    uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
+    uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
+    uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
+    uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
+    uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
+    d[0] = (a0 + b0 + 1) >> 1;
+    d[1] = (a1 + b1 + 1) >> 1;
+    d[2] = (a2 + b2 + 1) >> 1;
+    d += 3;
+    s += 4;
+    t += 4;
+  }
+}
+
+// Scales a single row of pixels using point sampling.
+void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
+                 int dst_width, int x, int dx) {
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    dst_ptr[0] = src_ptr[x >> 16];
+    x += dx;
+    dst_ptr[1] = src_ptr[x >> 16];
+    x += dx;
+    dst_ptr += 2;
+  }
+  if (dst_width & 1) {
+    dst_ptr[0] = src_ptr[x >> 16];
+  }
+}
+
+void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                    int dst_width, int x, int dx) {
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    dst_ptr[0] = src_ptr[x >> 16];
+    x += dx;
+    dst_ptr[1] = src_ptr[x >> 16];
+    x += dx;
+    dst_ptr += 2;
+  }
+  if (dst_width & 1) {
+    dst_ptr[0] = src_ptr[x >> 16];
+  }
+}
+
+// Scales a single row of pixels up by 2x using point sampling.
+void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
+                    int dst_width, int x, int dx) {
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
+    src_ptr += 1;
+    dst_ptr += 2;
+  }
+  if (dst_width & 1) {
+    dst_ptr[0] = src_ptr[0];
+  }
+}
+
+void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                       int dst_width, int x, int dx) {
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
+    src_ptr += 1;
+    dst_ptr += 2;
+  }
+  if (dst_width & 1) {
+    dst_ptr[0] = src_ptr[0];
+  }
+}
+
+// (1-f)a + fb can be replaced with a + f(b-a)
+#define BLENDER(a, b, f) (uint8)((int)(a) + \
+    ((int)(f) * ((int)(b) - (int)(a)) >> 16))
+
+void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
+                       int dst_width, int x, int dx) {
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    int xi = x >> 16;
+    int a = src_ptr[xi];
+    int b = src_ptr[xi + 1];
+    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+    x += dx;
+    xi = x >> 16;
+    a = src_ptr[xi];
+    b = src_ptr[xi + 1];
+    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
+    x += dx;
+    dst_ptr += 2;
+  }
+  if (dst_width & 1) {
+    int xi = x >> 16;
+    int a = src_ptr[xi];
+    int b = src_ptr[xi + 1];
+    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+  }
+}
+
+void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
+                         int dst_width, int x32, int dx) {
+  int64 x = (int64)(x32);
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    int64 xi = x >> 16;
+    int a = src_ptr[xi];
+    int b = src_ptr[xi + 1];
+    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+    x += dx;
+    xi = x >> 16;
+    a = src_ptr[xi];
+    b = src_ptr[xi + 1];
+    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
+    x += dx;
+    dst_ptr += 2;
+  }
+  if (dst_width & 1) {
+    int64 xi = x >> 16;
+    int a = src_ptr[xi];
+    int b = src_ptr[xi + 1];
+    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+  }
+}
+#undef BLENDER
+
+#define BLENDER(a, b, f) (uint16)((int)(a) + \
+    ((int)(f) * ((int)(b) - (int)(a)) >> 16))
+
+void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                       int dst_width, int x, int dx) {
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    int xi = x >> 16;
+    int a = src_ptr[xi];
+    int b = src_ptr[xi + 1];
+    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+    x += dx;
+    xi = x >> 16;
+    a = src_ptr[xi];
+    b = src_ptr[xi + 1];
+    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
+    x += dx;
+    dst_ptr += 2;
+  }
+  if (dst_width & 1) {
+    int xi = x >> 16;
+    int a = src_ptr[xi];
+    int b = src_ptr[xi + 1];
+    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+  }
+}
+
+void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                         int dst_width, int x32, int dx) {
+  int64 x = (int64)(x32);
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    int64 xi = x >> 16;
+    int a = src_ptr[xi];
+    int b = src_ptr[xi + 1];
+    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+    x += dx;
+    xi = x >> 16;
+    a = src_ptr[xi];
+    b = src_ptr[xi + 1];
+    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
+    x += dx;
+    dst_ptr += 2;
+  }
+  if (dst_width & 1) {
+    int64 xi = x >> 16;
+    int a = src_ptr[xi];
+    int b = src_ptr[xi + 1];
+    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
+  }
+}
+#undef BLENDER
+
+void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                      uint8* dst, int dst_width) {
+  int x;
+  assert(dst_width % 3 == 0);
+  for (x = 0; x < dst_width; x += 3) {
+    dst[0] = src_ptr[0];
+    dst[1] = src_ptr[3];
+    dst[2] = src_ptr[6];
+    dst += 3;
+    src_ptr += 8;
+  }
+}
+
+void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                         uint16* dst, int dst_width) {
+  int x;
+  assert(dst_width % 3 == 0);
+  for (x = 0; x < dst_width; x += 3) {
+    dst[0] = src_ptr[0];
+    dst[1] = src_ptr[3];
+    dst[2] = src_ptr[6];
+    dst += 3;
+    src_ptr += 8;
+  }
+}
+
+// 8x3 -> 3x1
+void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
+                            ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width) {
+  intptr_t stride = src_stride;
+  int i;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (i = 0; i < dst_width; i += 3) {
+    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
+        src_ptr[stride + 0] + src_ptr[stride + 1] +
+        src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
+        src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
+        (65536 / 9) >> 16;
+    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
+        src_ptr[stride + 3] + src_ptr[stride + 4] +
+        src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
+        src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
+        (65536 / 9) >> 16;
+    dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
+        src_ptr[stride + 6] + src_ptr[stride + 7] +
+        src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
+        (65536 / 6) >> 16;
+    src_ptr += 8;
+    dst_ptr += 3;
+  }
+}
+
+void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint16* dst_ptr, int dst_width) {
+  intptr_t stride = src_stride;
+  int i;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (i = 0; i < dst_width; i += 3) {
+    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
+        src_ptr[stride + 0] + src_ptr[stride + 1] +
+        src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
+        src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
+        (65536 / 9) >> 16;
+    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
+        src_ptr[stride + 3] + src_ptr[stride + 4] +
+        src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
+        src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
+        (65536 / 9) >> 16;
+    dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
+        src_ptr[stride + 6] + src_ptr[stride + 7] +
+        src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
+        (65536 / 6) >> 16;
+    src_ptr += 8;
+    dst_ptr += 3;
+  }
+}
+
+// 8x2 -> 3x1
+void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width) {
+  intptr_t stride = src_stride;
+  int i;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (i = 0; i < dst_width; i += 3) {
+    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
+        src_ptr[stride + 0] + src_ptr[stride + 1] +
+        src_ptr[stride + 2]) * (65536 / 6) >> 16;
+    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
+        src_ptr[stride + 3] + src_ptr[stride + 4] +
+        src_ptr[stride + 5]) * (65536 / 6) >> 16;
+    dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
+        src_ptr[stride + 6] + src_ptr[stride + 7]) *
+        (65536 / 4) >> 16;
+    src_ptr += 8;
+    dst_ptr += 3;
+  }
+}
+
+void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* dst_ptr, int dst_width) {
+  intptr_t stride = src_stride;
+  int i;
+  assert((dst_width % 3 == 0) && (dst_width > 0));
+  for (i = 0; i < dst_width; i += 3) {
+    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
+        src_ptr[stride + 0] + src_ptr[stride + 1] +
+        src_ptr[stride + 2]) * (65536 / 6) >> 16;
+    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
+        src_ptr[stride + 3] + src_ptr[stride + 4] +
+        src_ptr[stride + 5]) * (65536 / 6) >> 16;
+    dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
+        src_ptr[stride + 6] + src_ptr[stride + 7]) *
+        (65536 / 4) >> 16;
+    src_ptr += 8;
+    dst_ptr += 3;
+  }
+}
+
+void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                    uint16* dst_ptr, int src_width, int src_height) {
+  int x;
+  assert(src_width > 0);
+  assert(src_height > 0);
+  for (x = 0; x < src_width; ++x) {
+    const uint8* s = src_ptr + x;
+    unsigned int sum = 0u;
+    int y;
+    for (y = 0; y < src_height; ++y) {
+      sum += s[0];
+      s += src_stride;
+    }
+    // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
+    dst_ptr[x] = sum < 65535u ? sum : 65535u;
+  }
+}
+
+void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                       uint32* dst_ptr, int src_width, int src_height) {
+  int x;
+  assert(src_width > 0);
+  assert(src_height > 0);
+  for (x = 0; x < src_width; ++x) {
+    const uint16* s = src_ptr + x;
+    unsigned int sum = 0u;
+    int y;
+    for (y = 0; y < src_height; ++y) {
+      sum += s[0];
+      s += src_stride;
+    }
+    // No risk of overflow here now
+    dst_ptr[x] = sum;
+  }
+}
+
+void ScaleARGBRowDown2_C(const uint8* src_argb,
+                         ptrdiff_t src_stride,
+                         uint8* dst_argb, int dst_width) {
+  const uint32* src = (const uint32*)(src_argb);
+  uint32* dst = (uint32*)(dst_argb);
+
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = src[1];
+    dst[1] = src[3];
+    src += 4;
+    dst += 2;
+  }
+  if (dst_width & 1) {
+    dst[0] = src[1];
+  }
+}
+
+void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
+                               ptrdiff_t src_stride,
+                               uint8* dst_argb, int dst_width) {
+  int x;
+  for (x = 0; x < dst_width; ++x) {
+    dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
+    dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
+    dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
+    dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
+    src_argb += 8;
+    dst_argb += 4;
+  }
+}
+
+void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
+                            uint8* dst_argb, int dst_width) {
+  int x;
+  for (x = 0; x < dst_width; ++x) {
+    dst_argb[0] = (src_argb[0] + src_argb[4] +
+                  src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
+    dst_argb[1] = (src_argb[1] + src_argb[5] +
+                  src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
+    dst_argb[2] = (src_argb[2] + src_argb[6] +
+                  src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
+    dst_argb[3] = (src_argb[3] + src_argb[7] +
+                  src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
+    src_argb += 8;
+    dst_argb += 4;
+  }
+}
+
+void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
+                            int src_stepx,
+                            uint8* dst_argb, int dst_width) {
+  const uint32* src = (const uint32*)(src_argb);
+  uint32* dst = (uint32*)(dst_argb);
+
+  int x;
+  for (x = 0; x < dst_width - 1; x += 2) {
+    dst[0] = src[0];
+    dst[1] = src[src_stepx];
+    src += src_stepx * 2;
+    dst += 2;
+  }
+  if (dst_width & 1) {
+    dst[0] = src[0];
+  }
+}
+
+void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
+                               ptrdiff_t src_stride,
+                               int src_stepx,
+                               uint8* dst_argb, int dst_width) {
+  int x;
+  for (x = 0; x < dst_width; ++x) {
+    dst_argb[0] = (src_argb[0] + src_argb[4] +
+                  src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
+    dst_argb[1] = (src_argb[1] + src_argb[5] +
+                  src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
+    dst_argb[2] = (src_argb[2] + src_argb[6] +
+                  src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
+    dst_argb[3] = (src_argb[3] + src_argb[7] +
+                  src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
+    src_argb += src_stepx * 4;
+    dst_argb += 4;
+  }
+}
+
+// Scales a single row of pixels using point sampling.
+void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
+                     int dst_width, int x, int dx) {
+  const uint32* src = (const uint32*)(src_argb);
+  uint32* dst = (uint32*)(dst_argb);
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    dst[0] = src[x >> 16];
+    x += dx;
+    dst[1] = src[x >> 16];
+    x += dx;
+    dst += 2;
+  }
+  if (dst_width & 1) {
+    dst[0] = src[x >> 16];
+  }
+}
+
+void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
+                       int dst_width, int x32, int dx) {
+  int64 x = (int64)(x32);
+  const uint32* src = (const uint32*)(src_argb);
+  uint32* dst = (uint32*)(dst_argb);
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    dst[0] = src[x >> 16];
+    x += dx;
+    dst[1] = src[x >> 16];
+    x += dx;
+    dst += 2;
+  }
+  if (dst_width & 1) {
+    dst[0] = src[x >> 16];
+  }
+}
+
+// Scales a single row of pixels up by 2x using point sampling.
+void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
+                        int dst_width, int x, int dx) {
+  const uint32* src = (const uint32*)(src_argb);
+  uint32* dst = (uint32*)(dst_argb);
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    dst[1] = dst[0] = src[0];
+    src += 1;
+    dst += 2;
+  }
+  if (dst_width & 1) {
+    dst[0] = src[0];
+  }
+}
+
+// Mimics SSSE3 blender
+#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
+#define BLENDERC(a, b, f, s) (uint32)( \
+    BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
+#define BLENDER(a, b, f) \
+    BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
+    BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
+
+void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
+                           int dst_width, int x, int dx) {
+  const uint32* src = (const uint32*)(src_argb);
+  uint32* dst = (uint32*)(dst_argb);
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    int xi = x >> 16;
+    int xf = (x >> 9) & 0x7f;
+    uint32 a = src[xi];
+    uint32 b = src[xi + 1];
+    dst[0] = BLENDER(a, b, xf);
+    x += dx;
+    xi = x >> 16;
+    xf = (x >> 9) & 0x7f;
+    a = src[xi];
+    b = src[xi + 1];
+    dst[1] = BLENDER(a, b, xf);
+    x += dx;
+    dst += 2;
+  }
+  if (dst_width & 1) {
+    int xi = x >> 16;
+    int xf = (x >> 9) & 0x7f;
+    uint32 a = src[xi];
+    uint32 b = src[xi + 1];
+    dst[0] = BLENDER(a, b, xf);
+  }
+}
+
+void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
+                             int dst_width, int x32, int dx) {
+  int64 x = (int64)(x32);
+  const uint32* src = (const uint32*)(src_argb);
+  uint32* dst = (uint32*)(dst_argb);
+  int j;
+  for (j = 0; j < dst_width - 1; j += 2) {
+    int64 xi = x >> 16;
+    int xf = (x >> 9) & 0x7f;
+    uint32 a = src[xi];
+    uint32 b = src[xi + 1];
+    dst[0] = BLENDER(a, b, xf);
+    x += dx;
+    xi = x >> 16;
+    xf = (x >> 9) & 0x7f;
+    a = src[xi];
+    b = src[xi + 1];
+    dst[1] = BLENDER(a, b, xf);
+    x += dx;
+    dst += 2;
+  }
+  if (dst_width & 1) {
+    int64 xi = x >> 16;
+    int xf = (x >> 9) & 0x7f;
+    uint32 a = src[xi];
+    uint32 b = src[xi + 1];
+    dst[0] = BLENDER(a, b, xf);
+  }
+}
+#undef BLENDER1
+#undef BLENDERC
+#undef BLENDER
+
+// Scale plane vertically with bilinear interpolation.
+void ScalePlaneVertical(int src_height,
+                        int dst_width, int dst_height,
+                        int src_stride, int dst_stride,
+                        const uint8* src_argb, uint8* dst_argb,
+                        int x, int y, int dy,
+                        int bpp, enum FilterMode filtering) {
+  // TODO(fbarchard): Allow higher bpp.
+  int dst_width_bytes = dst_width * bpp;
+  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_C;
+  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
+  int j;
+  assert(bpp >= 1 && bpp <= 4);
+  assert(src_height != 0);
+  assert(dst_width > 0);
+  assert(dst_height > 0);
+  src_argb += (x >> 16) * bpp;
+#if defined(HAS_INTERPOLATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
+    InterpolateRow = InterpolateRow_Any_SSE2;
+    if (IS_ALIGNED(dst_width_bytes, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
+    InterpolateRow = InterpolateRow_Any_SSSE3;
+    if (IS_ALIGNED(dst_width_bytes, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
+    InterpolateRow = InterpolateRow_Any_AVX2;
+    if (IS_ALIGNED(dst_width_bytes, 32)) {
+      InterpolateRow = InterpolateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
+    InterpolateRow = InterpolateRow_Any_NEON;
+    if (IS_ALIGNED(dst_width_bytes, 16)) {
+      InterpolateRow = InterpolateRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
+      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
+    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
+    if (IS_ALIGNED(dst_width_bytes, 4)) {
+      InterpolateRow = InterpolateRow_MIPS_DSPR2;
+    }
+  }
+#endif
+  for (j = 0; j < dst_height; ++j) {
+    int yi;
+    int yf;
+    if (y > max_y) {
+      y = max_y;
+    }
+    yi = y >> 16;
+    yf = filtering ? ((y >> 8) & 255) : 0;
+    InterpolateRow(dst_argb, src_argb + yi * src_stride,
+                   src_stride, dst_width_bytes, yf);
+    dst_argb += dst_stride;
+    y += dy;
+  }
+}
+void ScalePlaneVertical_16(int src_height,
+                           int dst_width, int dst_height,
+                           int src_stride, int dst_stride,
+                           const uint16* src_argb, uint16* dst_argb,
+                           int x, int y, int dy,
+                           int wpp, enum FilterMode filtering) {
+  // TODO(fbarchard): Allow higher wpp.
+  int dst_width_words = dst_width * wpp;
+  void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
+      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
+      InterpolateRow_16_C;
+  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
+  int j;
+  assert(wpp >= 1 && wpp <= 2);
+  assert(src_height != 0);
+  assert(dst_width > 0);
+  assert(dst_height > 0);
+  src_argb += (x >> 16) * wpp;
+#if defined(HAS_INTERPOLATEROW_16_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_SSE2;
+    if (IS_ALIGNED(dst_width_bytes, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_16_SSE2;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_SSSE3;
+    if (IS_ALIGNED(dst_width_bytes, 16)) {
+      InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
+        InterpolateRow = InterpolateRow_16_SSSE3;
+      }
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
+    InterpolateRow = InterpolateRow_Any_16_AVX2;
+    if (IS_ALIGNED(dst_width_bytes, 32)) {
+      InterpolateRow = InterpolateRow_16_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROW_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
+    InterpolateRow = InterpolateRow_Any_16_NEON;
+    if (IS_ALIGNED(dst_width_bytes, 16)) {
+      InterpolateRow = InterpolateRow_16_NEON;
+    }
+  }
+#endif
+#if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
+      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
+    InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
+    if (IS_ALIGNED(dst_width_bytes, 4)) {
+      InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
+    }
+  }
+#endif
+  for (j = 0; j < dst_height; ++j) {
+    int yi;
+    int yf;
+    if (y > max_y) {
+      y = max_y;
+    }
+    yi = y >> 16;
+    yf = filtering ? ((y >> 8) & 255) : 0;
+    InterpolateRow(dst_argb, src_argb + yi * src_stride,
+                   src_stride, dst_width_words, yf);
+    dst_argb += dst_stride;
+    y += dy;
+  }
+}
+
+// Simplify the filtering based on scale factors.
+enum FilterMode ScaleFilterReduce(int src_width, int src_height,
+                                  int dst_width, int dst_height,
+                                  enum FilterMode filtering) {
+  if (src_width < 0) {
+    src_width = -src_width;
+  }
+  if (src_height < 0) {
+    src_height = -src_height;
+  }
+  if (filtering == kFilterBox) {
+    // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
+    if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
+      filtering = kFilterBilinear;
+    }
+    // If scaling to larger, switch from Box to Bilinear.
+    if (dst_width >= src_width || dst_height >= src_height) {
+      filtering = kFilterBilinear;
+    }
+  }
+  if (filtering == kFilterBilinear) {
+    if (src_height == 1) {
+      filtering = kFilterLinear;
+    }
+    // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
+    if (dst_height == src_height || dst_height * 3 == src_height) {
+      filtering = kFilterLinear;
+    }
+    // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
+    // avoid reading 2 pixels horizontally that causes memory exception.
+    if (src_width == 1) {
+      filtering = kFilterNone;
+    }
+  }
+  if (filtering == kFilterLinear) {
+    if (src_width == 1) {
+      filtering = kFilterNone;
+    }
+    // TODO(fbarchard): Detect any odd scale factor and reduce to None.
+    if (dst_width == src_width || dst_width * 3 == src_width) {
+      filtering = kFilterNone;
+    }
+  }
+  return filtering;
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+int FixedDiv_C(int num, int div) {
+  return (int)(((int64)(num) << 16) / div);
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+int FixedDiv1_C(int num, int div) {
+  return (int)((((int64)(num) << 16) - 0x00010001) /
+                          (div - 1));
+}
+
+#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
+
+// Compute slope values for stepping.
+void ScaleSlope(int src_width, int src_height,
+                int dst_width, int dst_height,
+                enum FilterMode filtering,
+                int* x, int* y, int* dx, int* dy) {
+  assert(x != NULL);
+  assert(y != NULL);
+  assert(dx != NULL);
+  assert(dy != NULL);
+  assert(src_width != 0);
+  assert(src_height != 0);
+  assert(dst_width > 0);
+  assert(dst_height > 0);
+  // Check for 1 pixel and avoid FixedDiv overflow.
+  if (dst_width == 1 && src_width >= 32768) {
+    dst_width = src_width;
+  }
+  if (dst_height == 1 && src_height >= 32768) {
+    dst_height = src_height;
+  }
+  if (filtering == kFilterBox) {
+    // Scale step for point sampling duplicates all pixels equally.
+    *dx = FixedDiv(Abs(src_width), dst_width);
+    *dy = FixedDiv(src_height, dst_height);
+    *x = 0;
+    *y = 0;
+  } else if (filtering == kFilterBilinear) {
+    // Scale step for bilinear sampling renders last pixel once for upsample.
+    if (dst_width <= Abs(src_width)) {
+      *dx = FixedDiv(Abs(src_width), dst_width);
+      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
+    } else if (dst_width > 1) {
+      *dx = FixedDiv1(Abs(src_width), dst_width);
+      *x = 0;
+    }
+    if (dst_height <= src_height) {
+      *dy = FixedDiv(src_height,  dst_height);
+      *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
+    } else if (dst_height > 1) {
+      *dy = FixedDiv1(src_height, dst_height);
+      *y = 0;
+    }
+  } else if (filtering == kFilterLinear) {
+    // Scale step for bilinear sampling renders last pixel once for upsample.
+    if (dst_width <= Abs(src_width)) {
+      *dx = FixedDiv(Abs(src_width), dst_width);
+      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
+    } else if (dst_width > 1) {
+      *dx = FixedDiv1(Abs(src_width), dst_width);
+      *x = 0;
+    }
+    *dy = FixedDiv(src_height, dst_height);
+    *y = *dy >> 1;
+  } else {
+    // Scale step for point sampling duplicates all pixels equally.
+    *dx = FixedDiv(Abs(src_width), dst_width);
+    *dy = FixedDiv(src_height, dst_height);
+    *x = CENTERSTART(*dx, 0);
+    *y = CENTERSTART(*dy, 0);
+  }
+  // Negative src_width means horizontally mirror.
+  if (src_width < 0) {
+    *x += (dst_width - 1) * *dx;
+    *dx = -*dx;
+    // src_width = -src_width;   // Caller must do this.
+  }
+}
+#undef CENTERSTART
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/scale_mips.cc b/src/main/jni/libyuv/source/scale_mips.cc
new file mode 100644
index 000000000..3eb4f27c4
--- /dev/null
+++ b/src/main/jni/libyuv/source/scale_mips.cc
@@ -0,0 +1,654 @@
+/*
+ *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/basic_types.h"
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC MIPS DSPR2
+#if !defined(LIBYUV_DISABLE_MIPS) && \
+    defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
+    (_MIPS_SIM == _MIPS_SIM_ABI32)
+
+void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst, int dst_width) {
+  __asm__ __volatile__(
+    ".set push                                     \n"
+    ".set noreorder                                \n"
+
+    "srl            $t9, %[dst_width], 4           \n"  // iterations -> by 16
+    "beqz           $t9, 2f                        \n"
+    " nop                                          \n"
+
+    ".p2align       2                              \n"
+  "1:                                              \n"
+    "lw             $t0, 0(%[src_ptr])             \n"  // |3|2|1|0|
+    "lw             $t1, 4(%[src_ptr])             \n"  // |7|6|5|4|
+    "lw             $t2, 8(%[src_ptr])             \n"  // |11|10|9|8|
+    "lw             $t3, 12(%[src_ptr])            \n"  // |15|14|13|12|
+    "lw             $t4, 16(%[src_ptr])            \n"  // |19|18|17|16|
+    "lw             $t5, 20(%[src_ptr])            \n"  // |23|22|21|20|
+    "lw             $t6, 24(%[src_ptr])            \n"  // |27|26|25|24|
+    "lw             $t7, 28(%[src_ptr])            \n"  // |31|30|29|28|
+    // TODO(fbarchard): Use odd pixels instead of even.
+    "precr.qb.ph    $t8, $t1, $t0                  \n"  // |6|4|2|0|
+    "precr.qb.ph    $t0, $t3, $t2                  \n"  // |14|12|10|8|
+    "precr.qb.ph    $t1, $t5, $t4                  \n"  // |22|20|18|16|
+    "precr.qb.ph    $t2, $t7, $t6                  \n"  // |30|28|26|24|
+    "addiu          %[src_ptr], %[src_ptr], 32     \n"
+    "addiu          $t9, $t9, -1                   \n"
+    "sw             $t8, 0(%[dst])                 \n"
+    "sw             $t0, 4(%[dst])                 \n"
+    "sw             $t1, 8(%[dst])                 \n"
+    "sw             $t2, 12(%[dst])                \n"
+    "bgtz           $t9, 1b                        \n"
+    " addiu         %[dst], %[dst], 16             \n"
+
+  "2:                                              \n"
+    "andi           $t9, %[dst_width], 0xf         \n"  // residue
+    "beqz           $t9, 3f                        \n"
+    " nop                                          \n"
+
+  "21:                                             \n"
+    "lbu            $t0, 0(%[src_ptr])             \n"
+    "addiu          %[src_ptr], %[src_ptr], 2      \n"
+    "addiu          $t9, $t9, -1                   \n"
+    "sb             $t0, 0(%[dst])                 \n"
+    "bgtz           $t9, 21b                       \n"
+    " addiu         %[dst], %[dst], 1              \n"
+
+  "3:                                              \n"
+    ".set pop                                      \n"
+  : [src_ptr] "+r" (src_ptr),
+    [dst] "+r" (dst)
+  : [dst_width] "r" (dst_width)
+  : "t0", "t1", "t2", "t3", "t4", "t5",
+    "t6", "t7", "t8", "t9"
+  );
+}
+
+void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                 uint8* dst, int dst_width) {
+  const uint8* t = src_ptr + src_stride;
+
+  __asm__ __volatile__ (
+    ".set push                                    \n"
+    ".set noreorder                               \n"
+
+    "srl            $t9, %[dst_width], 3          \n"  // iterations -> step 8
+    "bltz           $t9, 2f                       \n"
+    " nop                                         \n"
+
+    ".p2align       2                             \n"
+  "1:                                             \n"
+    "lw             $t0, 0(%[src_ptr])            \n"  // |3|2|1|0|
+    "lw             $t1, 4(%[src_ptr])            \n"  // |7|6|5|4|
+    "lw             $t2, 8(%[src_ptr])            \n"  // |11|10|9|8|
+    "lw             $t3, 12(%[src_ptr])           \n"  // |15|14|13|12|
+    "lw             $t4, 0(%[t])                  \n"  // |19|18|17|16|
+    "lw             $t5, 4(%[t])                  \n"  // |23|22|21|20|
+    "lw             $t6, 8(%[t])                  \n"  // |27|26|25|24|
+    "lw             $t7, 12(%[t])                 \n"  // |31|30|29|28|
+    "addiu          $t9, $t9, -1                  \n"
+    "srl            $t8, $t0, 16                  \n"  // |X|X|3|2|
+    "ins            $t0, $t4, 16, 16              \n"  // |17|16|1|0|
+    "ins            $t4, $t8, 0, 16               \n"  // |19|18|3|2|
+    "raddu.w.qb     $t0, $t0                      \n"  // |17+16+1+0|
+    "raddu.w.qb     $t4, $t4                      \n"  // |19+18+3+2|
+    "shra_r.w       $t0, $t0, 2                   \n"  // |t0+2|>>2
+    "shra_r.w       $t4, $t4, 2                   \n"  // |t4+2|>>2
+    "srl            $t8, $t1, 16                  \n"  // |X|X|7|6|
+    "ins            $t1, $t5, 16, 16              \n"  // |21|20|5|4|
+    "ins            $t5, $t8, 0, 16               \n"  // |22|23|7|6|
+    "raddu.w.qb     $t1, $t1                      \n"  // |21+20+5+4|
+    "raddu.w.qb     $t5, $t5                      \n"  // |23+22+7+6|
+    "shra_r.w       $t1, $t1, 2                   \n"  // |t1+2|>>2
+    "shra_r.w       $t5, $t5, 2                   \n"  // |t5+2|>>2
+    "srl            $t8, $t2, 16                  \n"  // |X|X|11|10|
+    "ins            $t2, $t6, 16, 16              \n"  // |25|24|9|8|
+    "ins            $t6, $t8, 0, 16               \n"  // |27|26|11|10|
+    "raddu.w.qb     $t2, $t2                      \n"  // |25+24+9+8|
+    "raddu.w.qb     $t6, $t6                      \n"  // |27+26+11+10|
+    "shra_r.w       $t2, $t2, 2                   \n"  // |t2+2|>>2
+    "shra_r.w       $t6, $t6, 2                   \n"  // |t5+2|>>2
+    "srl            $t8, $t3, 16                  \n"  // |X|X|15|14|
+    "ins            $t3, $t7, 16, 16              \n"  // |29|28|13|12|
+    "ins            $t7, $t8, 0, 16               \n"  // |31|30|15|14|
+    "raddu.w.qb     $t3, $t3                      \n"  // |29+28+13+12|
+    "raddu.w.qb     $t7, $t7                      \n"  // |31+30+15+14|
+    "shra_r.w       $t3, $t3, 2                   \n"  // |t3+2|>>2
+    "shra_r.w       $t7, $t7, 2                   \n"  // |t7+2|>>2
+    "addiu          %[src_ptr], %[src_ptr], 16    \n"
+    "addiu          %[t], %[t], 16                \n"
+    "sb             $t0, 0(%[dst])                \n"
+    "sb             $t4, 1(%[dst])                \n"
+    "sb             $t1, 2(%[dst])                \n"
+    "sb             $t5, 3(%[dst])                \n"
+    "sb             $t2, 4(%[dst])                \n"
+    "sb             $t6, 5(%[dst])                \n"
+    "sb             $t3, 6(%[dst])                \n"
+    "sb             $t7, 7(%[dst])                \n"
+    "bgtz           $t9, 1b                       \n"
+    " addiu         %[dst], %[dst], 8             \n"
+
+  "2:                                             \n"
+    "andi           $t9, %[dst_width], 0x7        \n"  // x = residue
+    "beqz           $t9, 3f                       \n"
+    " nop                                         \n"
+
+    "21:                                          \n"
+    "lwr            $t1, 0(%[src_ptr])            \n"
+    "lwl            $t1, 3(%[src_ptr])            \n"
+    "lwr            $t2, 0(%[t])                  \n"
+    "lwl            $t2, 3(%[t])                  \n"
+    "srl            $t8, $t1, 16                  \n"
+    "ins            $t1, $t2, 16, 16              \n"
+    "ins            $t2, $t8, 0, 16               \n"
+    "raddu.w.qb     $t1, $t1                      \n"
+    "raddu.w.qb     $t2, $t2                      \n"
+    "shra_r.w       $t1, $t1, 2                   \n"
+    "shra_r.w       $t2, $t2, 2                   \n"
+    "sb             $t1, 0(%[dst])                \n"
+    "sb             $t2, 1(%[dst])                \n"
+    "addiu          %[src_ptr], %[src_ptr], 4     \n"
+    "addiu          $t9, $t9, -2                  \n"
+    "addiu          %[t], %[t], 4                 \n"
+    "bgtz           $t9, 21b                      \n"
+    " addiu         %[dst], %[dst], 2             \n"
+
+  "3:                                             \n"
+    ".set pop                                     \n"
+
+  : [src_ptr] "+r" (src_ptr),
+    [dst] "+r" (dst), [t] "+r" (t)
+  : [dst_width] "r" (dst_width)
+  : "t0", "t1", "t2", "t3", "t4", "t5",
+    "t6", "t7", "t8", "t9"
+  );
+}
+
+void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst, int dst_width) {
+  __asm__ __volatile__ (
+      ".set push                                    \n"
+      ".set noreorder                               \n"
+
+      "srl            $t9, %[dst_width], 3          \n"
+      "beqz           $t9, 2f                       \n"
+      " nop                                         \n"
+
+      ".p2align       2                             \n"
+     "1:                                            \n"
+      "lw             $t1, 0(%[src_ptr])            \n"  // |3|2|1|0|
+      "lw             $t2, 4(%[src_ptr])            \n"  // |7|6|5|4|
+      "lw             $t3, 8(%[src_ptr])            \n"  // |11|10|9|8|
+      "lw             $t4, 12(%[src_ptr])           \n"  // |15|14|13|12|
+      "lw             $t5, 16(%[src_ptr])           \n"  // |19|18|17|16|
+      "lw             $t6, 20(%[src_ptr])           \n"  // |23|22|21|20|
+      "lw             $t7, 24(%[src_ptr])           \n"  // |27|26|25|24|
+      "lw             $t8, 28(%[src_ptr])           \n"  // |31|30|29|28|
+      "precr.qb.ph    $t1, $t2, $t1                 \n"  // |6|4|2|0|
+      "precr.qb.ph    $t2, $t4, $t3                 \n"  // |14|12|10|8|
+      "precr.qb.ph    $t5, $t6, $t5                 \n"  // |22|20|18|16|
+      "precr.qb.ph    $t6, $t8, $t7                 \n"  // |30|28|26|24|
+      "precr.qb.ph    $t1, $t2, $t1                 \n"  // |12|8|4|0|
+      "precr.qb.ph    $t5, $t6, $t5                 \n"  // |28|24|20|16|
+      "addiu          %[src_ptr], %[src_ptr], 32    \n"
+      "addiu          $t9, $t9, -1                  \n"
+      "sw             $t1, 0(%[dst])                \n"
+      "sw             $t5, 4(%[dst])                \n"
+      "bgtz           $t9, 1b                       \n"
+      " addiu         %[dst], %[dst], 8             \n"
+
+    "2:                                             \n"
+      "andi           $t9, %[dst_width], 7          \n"  // residue
+      "beqz           $t9, 3f                       \n"
+      " nop                                         \n"
+
+    "21:                                            \n"
+      "lbu            $t1, 0(%[src_ptr])            \n"
+      "addiu          %[src_ptr], %[src_ptr], 4     \n"
+      "addiu          $t9, $t9, -1                  \n"
+      "sb             $t1, 0(%[dst])                \n"
+      "bgtz           $t9, 21b                      \n"
+      " addiu         %[dst], %[dst], 1             \n"
+
+    "3:                                             \n"
+      ".set pop                                     \n"
+      : [src_ptr] "+r" (src_ptr),
+        [dst] "+r" (dst)
+      : [dst_width] "r" (dst_width)
+      : "t1", "t2", "t3", "t4", "t5",
+        "t6", "t7", "t8", "t9"
+  );
+}
+
+void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                 uint8* dst, int dst_width) {
+  intptr_t stride = src_stride;
+  const uint8* s1 = src_ptr + stride;
+  const uint8* s2 = s1 + stride;
+  const uint8* s3 = s2 + stride;
+
+  __asm__ __volatile__ (
+      ".set push                                  \n"
+      ".set noreorder                             \n"
+
+      "srl           $t9, %[dst_width], 1         \n"
+      "andi          $t8, %[dst_width], 1         \n"
+
+      ".p2align      2                            \n"
+     "1:                                          \n"
+      "lw            $t0, 0(%[src_ptr])           \n"  // |3|2|1|0|
+      "lw            $t1, 0(%[s1])                \n"  // |7|6|5|4|
+      "lw            $t2, 0(%[s2])                \n"  // |11|10|9|8|
+      "lw            $t3, 0(%[s3])                \n"  // |15|14|13|12|
+      "lw            $t4, 4(%[src_ptr])           \n"  // |19|18|17|16|
+      "lw            $t5, 4(%[s1])                \n"  // |23|22|21|20|
+      "lw            $t6, 4(%[s2])                \n"  // |27|26|25|24|
+      "lw            $t7, 4(%[s3])                \n"  // |31|30|29|28|
+      "raddu.w.qb    $t0, $t0                     \n"  // |3 + 2 + 1 + 0|
+      "raddu.w.qb    $t1, $t1                     \n"  // |7 + 6 + 5 + 4|
+      "raddu.w.qb    $t2, $t2                     \n"  // |11 + 10 + 9 + 8|
+      "raddu.w.qb    $t3, $t3                     \n"  // |15 + 14 + 13 + 12|
+      "raddu.w.qb    $t4, $t4                     \n"  // |19 + 18 + 17 + 16|
+      "raddu.w.qb    $t5, $t5                     \n"  // |23 + 22 + 21 + 20|
+      "raddu.w.qb    $t6, $t6                     \n"  // |27 + 26 + 25 + 24|
+      "raddu.w.qb    $t7, $t7                     \n"  // |31 + 30 + 29 + 28|
+      "add           $t0, $t0, $t1                \n"
+      "add           $t1, $t2, $t3                \n"
+      "add           $t0, $t0, $t1                \n"
+      "add           $t4, $t4, $t5                \n"
+      "add           $t6, $t6, $t7                \n"
+      "add           $t4, $t4, $t6                \n"
+      "shra_r.w      $t0, $t0, 4                  \n"
+      "shra_r.w      $t4, $t4, 4                  \n"
+      "sb            $t0, 0(%[dst])               \n"
+      "sb            $t4, 1(%[dst])               \n"
+      "addiu         %[src_ptr], %[src_ptr], 8    \n"
+      "addiu         %[s1], %[s1], 8              \n"
+      "addiu         %[s2], %[s2], 8              \n"
+      "addiu         %[s3], %[s3], 8              \n"
+      "addiu         $t9, $t9, -1                 \n"
+      "bgtz          $t9, 1b                      \n"
+      " addiu        %[dst], %[dst], 2            \n"
+      "beqz          $t8, 2f                      \n"
+      " nop                                       \n"
+
+      "lw            $t0, 0(%[src_ptr])           \n"  // |3|2|1|0|
+      "lw            $t1, 0(%[s1])                \n"  // |7|6|5|4|
+      "lw            $t2, 0(%[s2])                \n"  // |11|10|9|8|
+      "lw            $t3, 0(%[s3])                \n"  // |15|14|13|12|
+      "raddu.w.qb    $t0, $t0                     \n"  // |3 + 2 + 1 + 0|
+      "raddu.w.qb    $t1, $t1                     \n"  // |7 + 6 + 5 + 4|
+      "raddu.w.qb    $t2, $t2                     \n"  // |11 + 10 + 9 + 8|
+      "raddu.w.qb    $t3, $t3                     \n"  // |15 + 14 + 13 + 12|
+      "add           $t0, $t0, $t1                \n"
+      "add           $t1, $t2, $t3                \n"
+      "add           $t0, $t0, $t1                \n"
+      "shra_r.w      $t0, $t0, 4                  \n"
+      "sb            $t0, 0(%[dst])               \n"
+
+      "2:                                         \n"
+      ".set pop                                   \n"
+
+      : [src_ptr] "+r" (src_ptr),
+        [dst] "+r" (dst),
+        [s1] "+r" (s1),
+        [s2] "+r" (s2),
+        [s3] "+r" (s3)
+      : [dst_width] "r" (dst_width)
+      : "t0", "t1", "t2", "t3", "t4", "t5",
+        "t6","t7", "t8", "t9"
+  );
+}
+
+void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width) {
+  __asm__ __volatile__ (
+      ".set push                                          \n"
+      ".set noreorder                                     \n"
+      ".p2align        2                                  \n"
+    "1:                                                   \n"
+      "lw              $t1, 0(%[src_ptr])                 \n"  // |3|2|1|0|
+      "lw              $t2, 4(%[src_ptr])                 \n"  // |7|6|5|4|
+      "lw              $t3, 8(%[src_ptr])                 \n"  // |11|10|9|8|
+      "lw              $t4, 12(%[src_ptr])                \n"  // |15|14|13|12|
+      "lw              $t5, 16(%[src_ptr])                \n"  // |19|18|17|16|
+      "lw              $t6, 20(%[src_ptr])                \n"  // |23|22|21|20|
+      "lw              $t7, 24(%[src_ptr])                \n"  // |27|26|25|24|
+      "lw              $t8, 28(%[src_ptr])                \n"  // |31|30|29|28|
+      "precrq.qb.ph    $t0, $t2, $t4                      \n"  // |7|5|15|13|
+      "precrq.qb.ph    $t9, $t6, $t8                      \n"  // |23|21|31|30|
+      "addiu           %[dst_width], %[dst_width], -24    \n"
+      "ins             $t1, $t1, 8, 16                    \n"  // |3|1|0|X|
+      "ins             $t4, $t0, 8, 16                    \n"  // |X|15|13|12|
+      "ins             $t5, $t5, 8, 16                    \n"  // |19|17|16|X|
+      "ins             $t8, $t9, 8, 16                    \n"  // |X|31|29|28|
+      "addiu           %[src_ptr], %[src_ptr], 32         \n"
+      "packrl.ph       $t0, $t3, $t0                      \n"  // |9|8|7|5|
+      "packrl.ph       $t9, $t7, $t9                      \n"  // |25|24|23|21|
+      "prepend         $t1, $t2, 8                        \n"  // |4|3|1|0|
+      "prepend         $t3, $t4, 24                       \n"  // |15|13|12|11|
+      "prepend         $t5, $t6, 8                        \n"  // |20|19|17|16|
+      "prepend         $t7, $t8, 24                       \n"  // |31|29|28|27|
+      "sw              $t1, 0(%[dst])                     \n"
+      "sw              $t0, 4(%[dst])                     \n"
+      "sw              $t3, 8(%[dst])                     \n"
+      "sw              $t5, 12(%[dst])                    \n"
+      "sw              $t9, 16(%[dst])                    \n"
+      "sw              $t7, 20(%[dst])                    \n"
+      "bnez            %[dst_width], 1b                   \n"
+      " addiu          %[dst], %[dst], 24                 \n"
+      ".set pop                                           \n"
+      : [src_ptr] "+r" (src_ptr),
+        [dst] "+r" (dst),
+        [dst_width] "+r" (dst_width)
+      :
+      : "t0", "t1", "t2", "t3", "t4", "t5",
+        "t6","t7", "t8", "t9"
+  );
+}
+
+void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                     uint8* d, int dst_width) {
+  __asm__ __volatile__ (
+      ".set push                                         \n"
+      ".set noreorder                                    \n"
+      "repl.ph           $t3, 3                          \n"  // 0x00030003
+
+     ".p2align           2                               \n"
+    "1:                                                  \n"
+      "lw                $t0, 0(%[src_ptr])              \n"  // |S3|S2|S1|S0|
+      "lwx               $t1, %[src_stride](%[src_ptr])  \n"  // |T3|T2|T1|T0|
+      "rotr              $t2, $t0, 8                     \n"  // |S0|S3|S2|S1|
+      "rotr              $t6, $t1, 8                     \n"  // |T0|T3|T2|T1|
+      "muleu_s.ph.qbl    $t4, $t2, $t3                   \n"  // |S0*3|S3*3|
+      "muleu_s.ph.qbl    $t5, $t6, $t3                   \n"  // |T0*3|T3*3|
+      "andi              $t0, $t2, 0xFFFF                \n"  // |0|0|S2|S1|
+      "andi              $t1, $t6, 0xFFFF                \n"  // |0|0|T2|T1|
+      "raddu.w.qb        $t0, $t0                        \n"
+      "raddu.w.qb        $t1, $t1                        \n"
+      "shra_r.w          $t0, $t0, 1                     \n"
+      "shra_r.w          $t1, $t1, 1                     \n"
+      "preceu.ph.qbr     $t2, $t2                        \n"  // |0|S2|0|S1|
+      "preceu.ph.qbr     $t6, $t6                        \n"  // |0|T2|0|T1|
+      "rotr              $t2, $t2, 16                    \n"  // |0|S1|0|S2|
+      "rotr              $t6, $t6, 16                    \n"  // |0|T1|0|T2|
+      "addu.ph           $t2, $t2, $t4                   \n"
+      "addu.ph           $t6, $t6, $t5                   \n"
+      "sll               $t5, $t0, 1                     \n"
+      "add               $t0, $t5, $t0                   \n"
+      "shra_r.ph         $t2, $t2, 2                     \n"
+      "shra_r.ph         $t6, $t6, 2                     \n"
+      "shll.ph           $t4, $t2, 1                     \n"
+      "addq.ph           $t4, $t4, $t2                   \n"
+      "addu              $t0, $t0, $t1                   \n"
+      "addiu             %[src_ptr], %[src_ptr], 4       \n"
+      "shra_r.w          $t0, $t0, 2                     \n"
+      "addu.ph           $t6, $t6, $t4                   \n"
+      "shra_r.ph         $t6, $t6, 2                     \n"
+      "srl               $t1, $t6, 16                    \n"
+      "addiu             %[dst_width], %[dst_width], -3  \n"
+      "sb                $t1, 0(%[d])                    \n"
+      "sb                $t0, 1(%[d])                    \n"
+      "sb                $t6, 2(%[d])                    \n"
+      "bgtz              %[dst_width], 1b                \n"
+      " addiu            %[d], %[d], 3                   \n"
+    "3:                                                  \n"
+      ".set pop                                          \n"
+      : [src_ptr] "+r" (src_ptr),
+        [src_stride] "+r" (src_stride),
+        [d] "+r" (d),
+        [dst_width] "+r" (dst_width)
+      :
+      : "t0", "t1", "t2", "t3",
+        "t4", "t5", "t6"
+  );
+}
+
+void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                     uint8* d, int dst_width) {
+  __asm__ __volatile__ (
+      ".set push                                           \n"
+      ".set noreorder                                      \n"
+      "repl.ph           $t2, 3                            \n"  // 0x00030003
+
+      ".p2align          2                                 \n"
+    "1:                                                    \n"
+      "lw                $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
+      "lwx               $t1, %[src_stride](%[src_ptr])    \n"  // |T3|T2|T1|T0|
+      "rotr              $t4, $t0, 8                       \n"  // |S0|S3|S2|S1|
+      "rotr              $t6, $t1, 8                       \n"  // |T0|T3|T2|T1|
+      "muleu_s.ph.qbl    $t3, $t4, $t2                     \n"  // |S0*3|S3*3|
+      "muleu_s.ph.qbl    $t5, $t6, $t2                     \n"  // |T0*3|T3*3|
+      "andi              $t0, $t4, 0xFFFF                  \n"  // |0|0|S2|S1|
+      "andi              $t1, $t6, 0xFFFF                  \n"  // |0|0|T2|T1|
+      "raddu.w.qb        $t0, $t0                          \n"
+      "raddu.w.qb        $t1, $t1                          \n"
+      "shra_r.w          $t0, $t0, 1                       \n"
+      "shra_r.w          $t1, $t1, 1                       \n"
+      "preceu.ph.qbr     $t4, $t4                          \n"  // |0|S2|0|S1|
+      "preceu.ph.qbr     $t6, $t6                          \n"  // |0|T2|0|T1|
+      "rotr              $t4, $t4, 16                      \n"  // |0|S1|0|S2|
+      "rotr              $t6, $t6, 16                      \n"  // |0|T1|0|T2|
+      "addu.ph           $t4, $t4, $t3                     \n"
+      "addu.ph           $t6, $t6, $t5                     \n"
+      "shra_r.ph         $t6, $t6, 2                       \n"
+      "shra_r.ph         $t4, $t4, 2                       \n"
+      "addu.ph           $t6, $t6, $t4                     \n"
+      "addiu             %[src_ptr], %[src_ptr], 4         \n"
+      "shra_r.ph         $t6, $t6, 1                       \n"
+      "addu              $t0, $t0, $t1                     \n"
+      "addiu             %[dst_width], %[dst_width], -3    \n"
+      "shra_r.w          $t0, $t0, 1                       \n"
+      "srl               $t1, $t6, 16                      \n"
+      "sb                $t1, 0(%[d])                      \n"
+      "sb                $t0, 1(%[d])                      \n"
+      "sb                $t6, 2(%[d])                      \n"
+      "bgtz              %[dst_width], 1b                  \n"
+      " addiu            %[d], %[d], 3                     \n"
+    "3:                                                    \n"
+      ".set pop                                            \n"
+      : [src_ptr] "+r" (src_ptr),
+        [src_stride] "+r" (src_stride),
+        [d] "+r" (d),
+        [dst_width] "+r" (dst_width)
+      :
+      : "t0", "t1", "t2", "t3",
+        "t4", "t5", "t6"
+  );
+}
+
+void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width) {
+  __asm__ __volatile__ (
+      ".set push                                     \n"
+      ".set noreorder                                \n"
+
+      ".p2align   2                                  \n"
+    "1:                                              \n"
+      "lw         $t0, 0(%[src_ptr])                 \n"  // |3|2|1|0|
+      "lw         $t1, 4(%[src_ptr])                 \n"  // |7|6|5|4|
+      "lw         $t2, 8(%[src_ptr])                 \n"  // |11|10|9|8|
+      "lw         $t3, 12(%[src_ptr])                \n"  // |15|14|13|12|
+      "lw         $t4, 16(%[src_ptr])                \n"  // |19|18|17|16|
+      "lw         $t5, 20(%[src_ptr])                \n"  // |23|22|21|20|
+      "lw         $t6, 24(%[src_ptr])                \n"  // |27|26|25|24|
+      "lw         $t7, 28(%[src_ptr])                \n"  // |31|30|29|28|
+      "wsbh       $t0, $t0                           \n"  // |2|3|0|1|
+      "wsbh       $t6, $t6                           \n"  // |26|27|24|25|
+      "srl        $t0, $t0, 8                        \n"  // |X|2|3|0|
+      "srl        $t3, $t3, 16                       \n"  // |X|X|15|14|
+      "srl        $t5, $t5, 16                       \n"  // |X|X|23|22|
+      "srl        $t7, $t7, 16                       \n"  // |X|X|31|30|
+      "ins        $t1, $t2, 24, 8                    \n"  // |8|6|5|4|
+      "ins        $t6, $t5, 0, 8                     \n"  // |26|27|24|22|
+      "ins        $t1, $t0, 0, 16                    \n"  // |8|6|3|0|
+      "ins        $t6, $t7, 24, 8                    \n"  // |30|27|24|22|
+      "prepend    $t2, $t3, 24                       \n"  // |X|15|14|11|
+      "ins        $t4, $t4, 16, 8                    \n"  // |19|16|17|X|
+      "ins        $t4, $t2, 0, 16                    \n"  // |19|16|14|11|
+      "addiu      %[src_ptr], %[src_ptr], 32         \n"
+      "addiu      %[dst_width], %[dst_width], -12    \n"
+      "addiu      $t8,%[dst_width], -12              \n"
+      "sw         $t1, 0(%[dst])                     \n"
+      "sw         $t4, 4(%[dst])                     \n"
+      "sw         $t6, 8(%[dst])                     \n"
+      "bgez       $t8, 1b                            \n"
+      " addiu     %[dst], %[dst], 12                 \n"
+      ".set pop                                      \n"
+      : [src_ptr] "+r" (src_ptr),
+        [dst] "+r" (dst),
+        [dst_width] "+r" (dst_width)
+      :
+      : "t0", "t1", "t2", "t3", "t4",
+        "t5", "t6", "t7", "t8"
+  );
+}
+
+void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                     uint8* dst_ptr, int dst_width) {
+  intptr_t stride = src_stride;
+  const uint8* t = src_ptr + stride;
+  const int c = 0x2AAA;
+
+  __asm__ __volatile__ (
+      ".set push                                         \n"
+      ".set noreorder                                    \n"
+
+      ".p2align        2                                 \n"
+    "1:                                                  \n"
+      "lw              $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
+      "lw              $t1, 4(%[src_ptr])                \n"  // |S7|S6|S5|S4|
+      "lw              $t2, 0(%[t])                      \n"  // |T3|T2|T1|T0|
+      "lw              $t3, 4(%[t])                      \n"  // |T7|T6|T5|T4|
+      "rotr            $t1, $t1, 16                      \n"  // |S5|S4|S7|S6|
+      "packrl.ph       $t4, $t1, $t3                     \n"  // |S7|S6|T7|T6|
+      "packrl.ph       $t5, $t3, $t1                     \n"  // |T5|T4|S5|S4|
+      "raddu.w.qb      $t4, $t4                          \n"  // S7+S6+T7+T6
+      "raddu.w.qb      $t5, $t5                          \n"  // T5+T4+S5+S4
+      "precrq.qb.ph    $t6, $t0, $t2                     \n"  // |S3|S1|T3|T1|
+      "precrq.qb.ph    $t6, $t6, $t6                     \n"  // |S3|T3|S3|T3|
+      "srl             $t4, $t4, 2                       \n"  // t4 / 4
+      "srl             $t6, $t6, 16                      \n"  // |0|0|S3|T3|
+      "raddu.w.qb      $t6, $t6                          \n"  // 0+0+S3+T3
+      "addu            $t6, $t5, $t6                     \n"
+      "mul             $t6, $t6, %[c]                    \n"  // t6 * 0x2AAA
+      "sll             $t0, $t0, 8                       \n"  // |S2|S1|S0|0|
+      "sll             $t2, $t2, 8                       \n"  // |T2|T1|T0|0|
+      "raddu.w.qb      $t0, $t0                          \n"  // S2+S1+S0+0
+      "raddu.w.qb      $t2, $t2                          \n"  // T2+T1+T0+0
+      "addu            $t0, $t0, $t2                     \n"
+      "mul             $t0, $t0, %[c]                    \n"  // t0 * 0x2AAA
+      "addiu           %[src_ptr], %[src_ptr], 8         \n"
+      "addiu           %[t], %[t], 8                     \n"
+      "addiu           %[dst_width], %[dst_width], -3    \n"
+      "addiu           %[dst_ptr], %[dst_ptr], 3         \n"
+      "srl             $t6, $t6, 16                      \n"
+      "srl             $t0, $t0, 16                      \n"
+      "sb              $t4, -1(%[dst_ptr])               \n"
+      "sb              $t6, -2(%[dst_ptr])               \n"
+      "bgtz            %[dst_width], 1b                  \n"
+      " sb             $t0, -3(%[dst_ptr])               \n"
+      ".set pop                                          \n"
+      : [src_ptr] "+r" (src_ptr),
+        [dst_ptr] "+r" (dst_ptr),
+        [t] "+r" (t),
+        [dst_width] "+r" (dst_width)
+      : [c] "r" (c)
+      : "t0", "t1", "t2", "t3", "t4", "t5", "t6"
+  );
+}
+
+void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
+                                     ptrdiff_t src_stride,
+                                     uint8* dst_ptr, int dst_width) {
+  intptr_t stride = src_stride;
+  const uint8* s1 = src_ptr + stride;
+  stride += stride;
+  const uint8* s2 = src_ptr + stride;
+  const int c1 = 0x1C71;
+  const int c2 = 0x2AAA;
+
+  __asm__ __volatile__ (
+      ".set push                                         \n"
+      ".set noreorder                                    \n"
+
+      ".p2align        2                                 \n"
+    "1:                                                  \n"
+      "lw              $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
+      "lw              $t1, 4(%[src_ptr])                \n"  // |S7|S6|S5|S4|
+      "lw              $t2, 0(%[s1])                     \n"  // |T3|T2|T1|T0|
+      "lw              $t3, 4(%[s1])                     \n"  // |T7|T6|T5|T4|
+      "lw              $t4, 0(%[s2])                     \n"  // |R3|R2|R1|R0|
+      "lw              $t5, 4(%[s2])                     \n"  // |R7|R6|R5|R4|
+      "rotr            $t1, $t1, 16                      \n"  // |S5|S4|S7|S6|
+      "packrl.ph       $t6, $t1, $t3                     \n"  // |S7|S6|T7|T6|
+      "raddu.w.qb      $t6, $t6                          \n"  // S7+S6+T7+T6
+      "packrl.ph       $t7, $t3, $t1                     \n"  // |T5|T4|S5|S4|
+      "raddu.w.qb      $t7, $t7                          \n"  // T5+T4+S5+S4
+      "sll             $t8, $t5, 16                      \n"  // |R5|R4|0|0|
+      "raddu.w.qb      $t8, $t8                          \n"  // R5+R4
+      "addu            $t7, $t7, $t8                     \n"
+      "srl             $t8, $t5, 16                      \n"  // |0|0|R7|R6|
+      "raddu.w.qb      $t8, $t8                          \n"  // R7 + R6
+      "addu            $t6, $t6, $t8                     \n"
+      "mul             $t6, $t6, %[c2]                   \n"  // t6 * 0x2AAA
+      "precrq.qb.ph    $t8, $t0, $t2                     \n"  // |S3|S1|T3|T1|
+      "precrq.qb.ph    $t8, $t8, $t4                     \n"  // |S3|T3|R3|R1|
+      "srl             $t8, $t8, 8                       \n"  // |0|S3|T3|R3|
+      "raddu.w.qb      $t8, $t8                          \n"  // S3 + T3 + R3
+      "addu            $t7, $t7, $t8                     \n"
+      "mul             $t7, $t7, %[c1]                   \n"  // t7 * 0x1C71
+      "sll             $t0, $t0, 8                       \n"  // |S2|S1|S0|0|
+      "sll             $t2, $t2, 8                       \n"  // |T2|T1|T0|0|
+      "sll             $t4, $t4, 8                       \n"  // |R2|R1|R0|0|
+      "raddu.w.qb      $t0, $t0                          \n"
+      "raddu.w.qb      $t2, $t2                          \n"
+      "raddu.w.qb      $t4, $t4                          \n"
+      "addu            $t0, $t0, $t2                     \n"
+      "addu            $t0, $t0, $t4                     \n"
+      "mul             $t0, $t0, %[c1]                   \n"  // t0 * 0x1C71
+      "addiu           %[src_ptr], %[src_ptr], 8         \n"
+      "addiu           %[s1], %[s1], 8                   \n"
+      "addiu           %[s2], %[s2], 8                   \n"
+      "addiu           %[dst_width], %[dst_width], -3    \n"
+      "addiu           %[dst_ptr], %[dst_ptr], 3         \n"
+      "srl             $t6, $t6, 16                      \n"
+      "srl             $t7, $t7, 16                      \n"
+      "srl             $t0, $t0, 16                      \n"
+      "sb              $t6, -1(%[dst_ptr])               \n"
+      "sb              $t7, -2(%[dst_ptr])               \n"
+      "bgtz            %[dst_width], 1b                  \n"
+      " sb             $t0, -3(%[dst_ptr])               \n"
+      ".set pop                                          \n"
+      : [src_ptr] "+r" (src_ptr),
+        [dst_ptr] "+r" (dst_ptr),
+        [s1] "+r" (s1),
+        [s2] "+r" (s2),
+        [dst_width] "+r" (dst_width)
+      : [c1] "r" (c1), [c2] "r" (c2)
+      : "t0", "t1", "t2", "t3", "t4",
+        "t5", "t6", "t7", "t8"
+  );
+}
+
+#endif  // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
diff --git a/src/main/jni/libyuv/source/scale_neon.cc b/src/main/jni/libyuv/source/scale_neon.cc
new file mode 100644
index 000000000..1b8a5ba58
--- /dev/null
+++ b/src/main/jni/libyuv/source/scale_neon.cc
@@ -0,0 +1,764 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon.
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
+
+// NEON downscalers with interpolation.
+// Provided by Fritz Koenig
+
+// Read 32x1 throw away even pixels, and write 16x1.
+void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst, int dst_width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    // load even pixels into q0, odd into q1
+    MEMACCESS(0)
+    "vld2.8     {q0, q1}, [%0]!                \n"
+    "subs       %2, %2, #16                    \n"  // 16 processed per loop
+    MEMACCESS(1)
+    "vst1.8     {q1}, [%1]!                    \n"  // store odd pixels
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst),              // %1
+    "+r"(dst_width)         // %2
+  :
+  : "q0", "q1"              // Clobber List
+  );
+}
+
+// Read 32x2 average down and write 16x1.
+void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst, int dst_width) {
+  asm volatile (
+    // change the stride to row 2 pointer
+    "add        %1, %0                         \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0, q1}, [%0]!                \n"  // load row 1 and post inc
+    MEMACCESS(1)
+    "vld1.8     {q2, q3}, [%1]!                \n"  // load row 2 and post inc
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop
+    "vpaddl.u8  q0, q0                         \n"  // row 1 add adjacent
+    "vpaddl.u8  q1, q1                         \n"
+    "vpadal.u8  q0, q2                         \n"  // row 2 add adjacent + row1
+    "vpadal.u8  q1, q3                         \n"
+    "vrshrn.u16 d0, q0, #2                     \n"  // downshift, round and pack
+    "vrshrn.u16 d1, q1, #2                     \n"
+    MEMACCESS(2)
+    "vst1.8     {q0}, [%2]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(src_stride),       // %1
+    "+r"(dst),              // %2
+    "+r"(dst_width)         // %3
+  :
+  : "q0", "q1", "q2", "q3"     // Clobber List
+  );
+}
+
+void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n" // src line 0
+    "subs       %2, %2, #8                     \n" // 8 processed per loop
+    MEMACCESS(1)
+    "vst1.8     {d2}, [%1]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width)         // %2
+  :
+  : "q0", "q1", "memory", "cc"
+  );
+}
+
+void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width) {
+  const uint8* src_ptr1 = src_ptr + src_stride;
+  const uint8* src_ptr2 = src_ptr + src_stride * 2;
+  const uint8* src_ptr3 = src_ptr + src_stride * 3;
+asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {q0}, [%0]!                    \n"   // load up 16x4
+    MEMACCESS(3)
+    "vld1.8     {q1}, [%3]!                    \n"
+    MEMACCESS(4)
+    "vld1.8     {q2}, [%4]!                    \n"
+    MEMACCESS(5)
+    "vld1.8     {q3}, [%5]!                    \n"
+    "subs       %2, %2, #4                     \n"
+    "vpaddl.u8  q0, q0                         \n"
+    "vpadal.u8  q0, q1                         \n"
+    "vpadal.u8  q0, q2                         \n"
+    "vpadal.u8  q0, q3                         \n"
+    "vpaddl.u16 q0, q0                         \n"
+    "vrshrn.u32 d0, q0, #4                     \n"   // divide by 16 w/rounding
+    "vmovn.u16  d0, q0                         \n"
+    MEMACCESS(1)
+    "vst1.32    {d0[0]}, [%1]!                 \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),   // %0
+    "+r"(dst_ptr),   // %1
+    "+r"(dst_width), // %2
+    "+r"(src_ptr1),  // %3
+    "+r"(src_ptr2),  // %4
+    "+r"(src_ptr3)   // %5
+  :
+  : "q0", "q1", "q2", "q3", "memory", "cc"
+  );
+}
+
+// Down scale from 4 to 3 pixels. Use the neon multilane read/write
+// to load up the every 4th pixel into a 4 different registers.
+// Point samples 32 pixels to 24 pixels.
+void ScaleRowDown34_NEON(const uint8* src_ptr,
+                         ptrdiff_t src_stride,
+                         uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d1, d2, d3}, [%0]!      \n" // src line 0
+    "subs       %2, %2, #24                  \n"
+    "vmov       d2, d3                       \n" // order d0, d1, d2
+    MEMACCESS(1)
+    "vst3.8     {d0, d1, d2}, [%1]!          \n"
+    "bgt        1b                           \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width)         // %2
+  :
+  : "d0", "d1", "d2", "d3", "memory", "cc"
+  );
+}
+
+void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "vmov.u8    d24, #3                        \n"
+    "add        %3, %0                         \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n" // src line 0
+    MEMACCESS(3)
+    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n" // src line 1
+    "subs         %2, %2, #24                  \n"
+
+    // filter src line 0 with src line 1
+    // expand chars to shorts to allow for room
+    // when adding lines together
+    "vmovl.u8     q8, d4                       \n"
+    "vmovl.u8     q9, d5                       \n"
+    "vmovl.u8     q10, d6                      \n"
+    "vmovl.u8     q11, d7                      \n"
+
+    // 3 * line_0 + line_1
+    "vmlal.u8     q8, d0, d24                  \n"
+    "vmlal.u8     q9, d1, d24                  \n"
+    "vmlal.u8     q10, d2, d24                 \n"
+    "vmlal.u8     q11, d3, d24                 \n"
+
+    // (3 * line_0 + line_1) >> 2
+    "vqrshrn.u16  d0, q8, #2                   \n"
+    "vqrshrn.u16  d1, q9, #2                   \n"
+    "vqrshrn.u16  d2, q10, #2                  \n"
+    "vqrshrn.u16  d3, q11, #2                  \n"
+
+    // a0 = (src[0] * 3 + s[1] * 1) >> 2
+    "vmovl.u8     q8, d1                       \n"
+    "vmlal.u8     q8, d0, d24                  \n"
+    "vqrshrn.u16  d0, q8, #2                   \n"
+
+    // a1 = (src[1] * 1 + s[2] * 1) >> 1
+    "vrhadd.u8    d1, d1, d2                   \n"
+
+    // a2 = (src[2] * 1 + s[3] * 3) >> 2
+    "vmovl.u8     q8, d2                       \n"
+    "vmlal.u8     q8, d3, d24                  \n"
+    "vqrshrn.u16  d2, q8, #2                   \n"
+
+    MEMACCESS(1)
+    "vst3.8       {d0, d1, d2}, [%1]!          \n"
+
+    "bgt          1b                           \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width),        // %2
+    "+r"(src_stride)        // %3
+  :
+  : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc"
+  );
+}
+
+void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "vmov.u8    d24, #3                        \n"
+    "add        %3, %0                         \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n" // src line 0
+    MEMACCESS(3)
+    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n" // src line 1
+    "subs         %2, %2, #24                  \n"
+    // average src line 0 with src line 1
+    "vrhadd.u8    q0, q0, q2                   \n"
+    "vrhadd.u8    q1, q1, q3                   \n"
+
+    // a0 = (src[0] * 3 + s[1] * 1) >> 2
+    "vmovl.u8     q3, d1                       \n"
+    "vmlal.u8     q3, d0, d24                  \n"
+    "vqrshrn.u16  d0, q3, #2                   \n"
+
+    // a1 = (src[1] * 1 + s[2] * 1) >> 1
+    "vrhadd.u8    d1, d1, d2                   \n"
+
+    // a2 = (src[2] * 1 + s[3] * 3) >> 2
+    "vmovl.u8     q3, d2                       \n"
+    "vmlal.u8     q3, d3, d24                  \n"
+    "vqrshrn.u16  d2, q3, #2                   \n"
+
+    MEMACCESS(1)
+    "vst3.8       {d0, d1, d2}, [%1]!          \n"
+    "bgt          1b                           \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width),        // %2
+    "+r"(src_stride)        // %3
+  :
+  : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc"
+  );
+}
+
+#define HAS_SCALEROWDOWN38_NEON
+static uvec8 kShuf38 =
+  { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
+static uvec8 kShuf38_2 =
+  { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
+static vec16 kMult38_Div6 =
+  { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
+    65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
+static vec16 kMult38_Div9 =
+  { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
+    65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
+
+// 32 -> 12
+void ScaleRowDown38_NEON(const uint8* src_ptr,
+                         ptrdiff_t src_stride,
+                         uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    MEMACCESS(3)
+    "vld1.8     {q3}, [%3]                     \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d0, d1, d2, d3}, [%0]!        \n"
+    "subs       %2, %2, #12                    \n"
+    "vtbl.u8    d4, {d0, d1, d2, d3}, d6       \n"
+    "vtbl.u8    d5, {d0, d1, d2, d3}, d7       \n"
+    MEMACCESS(1)
+    "vst1.8     {d4}, [%1]!                    \n"
+    MEMACCESS(1)
+    "vst1.32    {d5[0]}, [%1]!                 \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width)         // %2
+  : "r"(&kShuf38)           // %3
+  : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"
+  );
+}
+
+// 32x3 -> 12x1
+void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
+                                      ptrdiff_t src_stride,
+                                      uint8* dst_ptr, int dst_width) {
+  const uint8* src_ptr1 = src_ptr + src_stride * 2;
+
+  asm volatile (
+    MEMACCESS(5)
+    "vld1.16    {q13}, [%5]                    \n"
+    MEMACCESS(6)
+    "vld1.8     {q14}, [%6]                    \n"
+    MEMACCESS(7)
+    "vld1.8     {q15}, [%7]                    \n"
+    "add        %3, %0                         \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+
+    // d0 = 00 40 01 41 02 42 03 43
+    // d1 = 10 50 11 51 12 52 13 53
+    // d2 = 20 60 21 61 22 62 23 63
+    // d3 = 30 70 31 71 32 72 33 73
+    MEMACCESS(0)
+    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n"
+    MEMACCESS(3)
+    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n"
+    MEMACCESS(4)
+    "vld4.8       {d16, d17, d18, d19}, [%4]!  \n"
+    "subs         %2, %2, #12                  \n"
+
+    // Shuffle the input data around to get align the data
+    //  so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+    // d0 = 00 10 01 11 02 12 03 13
+    // d1 = 40 50 41 51 42 52 43 53
+    "vtrn.u8      d0, d1                       \n"
+    "vtrn.u8      d4, d5                       \n"
+    "vtrn.u8      d16, d17                     \n"
+
+    // d2 = 20 30 21 31 22 32 23 33
+    // d3 = 60 70 61 71 62 72 63 73
+    "vtrn.u8      d2, d3                       \n"
+    "vtrn.u8      d6, d7                       \n"
+    "vtrn.u8      d18, d19                     \n"
+
+    // d0 = 00+10 01+11 02+12 03+13
+    // d2 = 40+50 41+51 42+52 43+53
+    "vpaddl.u8    q0, q0                       \n"
+    "vpaddl.u8    q2, q2                       \n"
+    "vpaddl.u8    q8, q8                       \n"
+
+    // d3 = 60+70 61+71 62+72 63+73
+    "vpaddl.u8    d3, d3                       \n"
+    "vpaddl.u8    d7, d7                       \n"
+    "vpaddl.u8    d19, d19                     \n"
+
+    // combine source lines
+    "vadd.u16     q0, q2                       \n"
+    "vadd.u16     q0, q8                       \n"
+    "vadd.u16     d4, d3, d7                   \n"
+    "vadd.u16     d4, d19                      \n"
+
+    // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
+    //             + s[6 + st * 1] + s[7 + st * 1]
+    //             + s[6 + st * 2] + s[7 + st * 2]) / 6
+    "vqrdmulh.s16 q2, q2, q13                  \n"
+    "vmovn.u16    d4, q2                       \n"
+
+    // Shuffle 2,3 reg around so that 2 can be added to the
+    //  0,1 reg and 3 can be added to the 4,5 reg. This
+    //  requires expanding from u8 to u16 as the 0,1 and 4,5
+    //  registers are already expanded. Then do transposes
+    //  to get aligned.
+    // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+    "vmovl.u8     q1, d2                       \n"
+    "vmovl.u8     q3, d6                       \n"
+    "vmovl.u8     q9, d18                      \n"
+
+    // combine source lines
+    "vadd.u16     q1, q3                       \n"
+    "vadd.u16     q1, q9                       \n"
+
+    // d4 = xx 20 xx 30 xx 22 xx 32
+    // d5 = xx 21 xx 31 xx 23 xx 33
+    "vtrn.u32     d2, d3                       \n"
+
+    // d4 = xx 20 xx 21 xx 22 xx 23
+    // d5 = xx 30 xx 31 xx 32 xx 33
+    "vtrn.u16     d2, d3                       \n"
+
+    // 0+1+2, 3+4+5
+    "vadd.u16     q0, q1                       \n"
+
+    // Need to divide, but can't downshift as the the value
+    //  isn't a power of 2. So multiply by 65536 / n
+    //  and take the upper 16 bits.
+    "vqrdmulh.s16 q0, q0, q15                  \n"
+
+    // Align for table lookup, vtbl requires registers to
+    //  be adjacent
+    "vmov.u8      d2, d4                       \n"
+
+    "vtbl.u8      d3, {d0, d1, d2}, d28        \n"
+    "vtbl.u8      d4, {d0, d1, d2}, d29        \n"
+
+    MEMACCESS(1)
+    "vst1.8       {d3}, [%1]!                  \n"
+    MEMACCESS(1)
+    "vst1.32      {d4[0]}, [%1]!               \n"
+    "bgt          1b                           \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width),        // %2
+    "+r"(src_stride),       // %3
+    "+r"(src_ptr1)          // %4
+  : "r"(&kMult38_Div6),     // %5
+    "r"(&kShuf38_2),        // %6
+    "r"(&kMult38_Div9)      // %7
+  : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc"
+  );
+}
+
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    MEMACCESS(4)
+    "vld1.16    {q13}, [%4]                    \n"
+    MEMACCESS(5)
+    "vld1.8     {q14}, [%5]                    \n"
+    "add        %3, %0                         \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+
+    // d0 = 00 40 01 41 02 42 03 43
+    // d1 = 10 50 11 51 12 52 13 53
+    // d2 = 20 60 21 61 22 62 23 63
+    // d3 = 30 70 31 71 32 72 33 73
+    MEMACCESS(0)
+    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n"
+    MEMACCESS(3)
+    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n"
+    "subs         %2, %2, #12                  \n"
+
+    // Shuffle the input data around to get align the data
+    //  so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+    // d0 = 00 10 01 11 02 12 03 13
+    // d1 = 40 50 41 51 42 52 43 53
+    "vtrn.u8      d0, d1                       \n"
+    "vtrn.u8      d4, d5                       \n"
+
+    // d2 = 20 30 21 31 22 32 23 33
+    // d3 = 60 70 61 71 62 72 63 73
+    "vtrn.u8      d2, d3                       \n"
+    "vtrn.u8      d6, d7                       \n"
+
+    // d0 = 00+10 01+11 02+12 03+13
+    // d2 = 40+50 41+51 42+52 43+53
+    "vpaddl.u8    q0, q0                       \n"
+    "vpaddl.u8    q2, q2                       \n"
+
+    // d3 = 60+70 61+71 62+72 63+73
+    "vpaddl.u8    d3, d3                       \n"
+    "vpaddl.u8    d7, d7                       \n"
+
+    // combine source lines
+    "vadd.u16     q0, q2                       \n"
+    "vadd.u16     d4, d3, d7                   \n"
+
+    // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
+    "vqrshrn.u16  d4, q2, #2                   \n"
+
+    // Shuffle 2,3 reg around so that 2 can be added to the
+    //  0,1 reg and 3 can be added to the 4,5 reg. This
+    //  requires expanding from u8 to u16 as the 0,1 and 4,5
+    //  registers are already expanded. Then do transposes
+    //  to get aligned.
+    // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+    "vmovl.u8     q1, d2                       \n"
+    "vmovl.u8     q3, d6                       \n"
+
+    // combine source lines
+    "vadd.u16     q1, q3                       \n"
+
+    // d4 = xx 20 xx 30 xx 22 xx 32
+    // d5 = xx 21 xx 31 xx 23 xx 33
+    "vtrn.u32     d2, d3                       \n"
+
+    // d4 = xx 20 xx 21 xx 22 xx 23
+    // d5 = xx 30 xx 31 xx 32 xx 33
+    "vtrn.u16     d2, d3                       \n"
+
+    // 0+1+2, 3+4+5
+    "vadd.u16     q0, q1                       \n"
+
+    // Need to divide, but can't downshift as the the value
+    //  isn't a power of 2. So multiply by 65536 / n
+    //  and take the upper 16 bits.
+    "vqrdmulh.s16 q0, q0, q13                  \n"
+
+    // Align for table lookup, vtbl requires registers to
+    //  be adjacent
+    "vmov.u8      d2, d4                       \n"
+
+    "vtbl.u8      d3, {d0, d1, d2}, d28        \n"
+    "vtbl.u8      d4, {d0, d1, d2}, d29        \n"
+
+    MEMACCESS(1)
+    "vst1.8       {d3}, [%1]!                  \n"
+    MEMACCESS(1)
+    "vst1.32      {d4[0]}, [%1]!               \n"
+    "bgt          1b                           \n"
+  : "+r"(src_ptr),       // %0
+    "+r"(dst_ptr),       // %1
+    "+r"(dst_width),     // %2
+    "+r"(src_stride)     // %3
+  : "r"(&kMult38_Div6),  // %4
+    "r"(&kShuf38_2)      // %5
+  : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"
+  );
+}
+
+// 16x2 -> 16x1
+void ScaleFilterRows_NEON(uint8* dst_ptr,
+                          const uint8* src_ptr, ptrdiff_t src_stride,
+                          int dst_width, int source_y_fraction) {
+  asm volatile (
+    "cmp          %4, #0                       \n"
+    "beq          100f                         \n"
+    "add          %2, %1                       \n"
+    "cmp          %4, #64                      \n"
+    "beq          75f                          \n"
+    "cmp          %4, #128                     \n"
+    "beq          50f                          \n"
+    "cmp          %4, #192                     \n"
+    "beq          25f                          \n"
+
+    "vdup.8       d5, %4                       \n"
+    "rsb          %4, #256                     \n"
+    "vdup.8       d4, %4                       \n"
+    // General purpose row blend.
+  "1:                                          \n"
+    MEMACCESS(1)
+    "vld1.8       {q0}, [%1]!                  \n"
+    MEMACCESS(2)
+    "vld1.8       {q1}, [%2]!                  \n"
+    "subs         %3, %3, #16                  \n"
+    "vmull.u8     q13, d0, d4                  \n"
+    "vmull.u8     q14, d1, d4                  \n"
+    "vmlal.u8     q13, d2, d5                  \n"
+    "vmlal.u8     q14, d3, d5                  \n"
+    "vrshrn.u16   d0, q13, #8                  \n"
+    "vrshrn.u16   d1, q14, #8                  \n"
+    MEMACCESS(0)
+    "vst1.8       {q0}, [%0]!                  \n"
+    "bgt          1b                           \n"
+    "b            99f                          \n"
+
+    // Blend 25 / 75.
+  "25:                                         \n"
+    MEMACCESS(1)
+    "vld1.8       {q0}, [%1]!                  \n"
+    MEMACCESS(2)
+    "vld1.8       {q1}, [%2]!                  \n"
+    "subs         %3, %3, #16                  \n"
+    "vrhadd.u8    q0, q1                       \n"
+    "vrhadd.u8    q0, q1                       \n"
+    MEMACCESS(0)
+    "vst1.8       {q0}, [%0]!                  \n"
+    "bgt          25b                          \n"
+    "b            99f                          \n"
+
+    // Blend 50 / 50.
+  "50:                                         \n"
+    MEMACCESS(1)
+    "vld1.8       {q0}, [%1]!                  \n"
+    MEMACCESS(2)
+    "vld1.8       {q1}, [%2]!                  \n"
+    "subs         %3, %3, #16                  \n"
+    "vrhadd.u8    q0, q1                       \n"
+    MEMACCESS(0)
+    "vst1.8       {q0}, [%0]!                  \n"
+    "bgt          50b                          \n"
+    "b            99f                          \n"
+
+    // Blend 75 / 25.
+  "75:                                         \n"
+    MEMACCESS(1)
+    "vld1.8       {q1}, [%1]!                  \n"
+    MEMACCESS(2)
+    "vld1.8       {q0}, [%2]!                  \n"
+    "subs         %3, %3, #16                  \n"
+    "vrhadd.u8    q0, q1                       \n"
+    "vrhadd.u8    q0, q1                       \n"
+    MEMACCESS(0)
+    "vst1.8       {q0}, [%0]!                  \n"
+    "bgt          75b                          \n"
+    "b            99f                          \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+  "100:                                        \n"
+    MEMACCESS(1)
+    "vld1.8       {q0}, [%1]!                  \n"
+    "subs         %3, %3, #16                  \n"
+    MEMACCESS(0)
+    "vst1.8       {q0}, [%0]!                  \n"
+    "bgt          100b                         \n"
+
+  "99:                                         \n"
+    MEMACCESS(0)
+    "vst1.8       {d1[7]}, [%0]                \n"
+  : "+r"(dst_ptr),          // %0
+    "+r"(src_ptr),          // %1
+    "+r"(src_stride),       // %2
+    "+r"(dst_width),        // %3
+    "+r"(source_y_fraction) // %4
+  :
+  : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"
+  );
+}
+
+void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst, int dst_width) {
+  asm volatile (
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    // load even pixels into q0, odd into q1
+    MEMACCESS(0)
+    "vld2.32    {q0, q1}, [%0]!                \n"
+    MEMACCESS(0)
+    "vld2.32    {q2, q3}, [%0]!                \n"
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop
+    MEMACCESS(1)
+    "vst1.8     {q1}, [%1]!                    \n"  // store odd pixels
+    MEMACCESS(1)
+    "vst1.8     {q3}, [%1]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst),              // %1
+    "+r"(dst_width)         // %2
+  :
+  : "memory", "cc", "q0", "q1", "q2", "q3"  // Clobber List
+  );
+}
+
+void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width) {
+  asm volatile (
+    // change the stride to row 2 pointer
+    "add        %1, %1, %0                     \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
+    MEMACCESS(0)
+    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
+    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
+    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
+    "vpaddl.u8  q3, q3                         \n"  // A 16 bytes -> 8 shorts.
+    MEMACCESS(1)
+    "vld4.8     {d16, d18, d20, d22}, [%1]!    \n"  // load 8 more ARGB pixels.
+    MEMACCESS(1)
+    "vld4.8     {d17, d19, d21, d23}, [%1]!    \n"  // load last 8 ARGB pixels.
+    "vpadal.u8  q0, q8                         \n"  // B 16 bytes -> 8 shorts.
+    "vpadal.u8  q1, q9                         \n"  // G 16 bytes -> 8 shorts.
+    "vpadal.u8  q2, q10                        \n"  // R 16 bytes -> 8 shorts.
+    "vpadal.u8  q3, q11                        \n"  // A 16 bytes -> 8 shorts.
+    "vrshrn.u16 d0, q0, #2                     \n"  // downshift, round and pack
+    "vrshrn.u16 d1, q1, #2                     \n"
+    "vrshrn.u16 d2, q2, #2                     \n"
+    "vrshrn.u16 d3, q3, #2                     \n"
+    MEMACCESS(2)
+    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(src_stride),       // %1
+    "+r"(dst),              // %2
+    "+r"(dst_width)         // %3
+  :
+  : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
+  );
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+void ScaleARGBRowDownEven_NEON(const uint8* src_argb,  ptrdiff_t src_stride,
+                               int src_stepx, uint8* dst_argb, int dst_width) {
+  asm volatile (
+    "mov        r12, %3, lsl #2                \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.32    {d0[0]}, [%0], r12             \n"
+    MEMACCESS(0)
+    "vld1.32    {d0[1]}, [%0], r12             \n"
+    MEMACCESS(0)
+    "vld1.32    {d1[0]}, [%0], r12             \n"
+    MEMACCESS(0)
+    "vld1.32    {d1[1]}, [%0], r12             \n"
+    "subs       %2, %2, #4                     \n"  // 4 pixels per loop.
+    MEMACCESS(1)
+    "vst1.8     {q0}, [%1]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src_argb),    // %0
+    "+r"(dst_argb),    // %1
+    "+r"(dst_width)    // %2
+  : "r"(src_stepx)     // %3
+  : "memory", "cc", "r12", "q0"
+  );
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+                                  int src_stepx,
+                                  uint8* dst_argb, int dst_width) {
+  asm volatile (
+    "mov        r12, %4, lsl #2                \n"
+    "add        %1, %1, %0                     \n"
+    ".p2align   2                              \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "vld1.8     {d0}, [%0], r12                \n"  // Read 4 2x2 blocks -> 2x1
+    MEMACCESS(1)
+    "vld1.8     {d1}, [%1], r12                \n"
+    MEMACCESS(0)
+    "vld1.8     {d2}, [%0], r12                \n"
+    MEMACCESS(1)
+    "vld1.8     {d3}, [%1], r12                \n"
+    MEMACCESS(0)
+    "vld1.8     {d4}, [%0], r12                \n"
+    MEMACCESS(1)
+    "vld1.8     {d5}, [%1], r12                \n"
+    MEMACCESS(0)
+    "vld1.8     {d6}, [%0], r12                \n"
+    MEMACCESS(1)
+    "vld1.8     {d7}, [%1], r12                \n"
+    "vaddl.u8   q0, d0, d1                     \n"
+    "vaddl.u8   q1, d2, d3                     \n"
+    "vaddl.u8   q2, d4, d5                     \n"
+    "vaddl.u8   q3, d6, d7                     \n"
+    "vswp.8     d1, d2                         \n"  // ab_cd -> ac_bd
+    "vswp.8     d5, d6                         \n"  // ef_gh -> eg_fh
+    "vadd.u16   q0, q0, q1                     \n"  // (a+b)_(c+d)
+    "vadd.u16   q2, q2, q3                     \n"  // (e+f)_(g+h)
+    "vrshrn.u16 d0, q0, #2                     \n"  // first 2 pixels.
+    "vrshrn.u16 d1, q2, #2                     \n"  // next 2 pixels.
+    "subs       %3, %3, #4                     \n"  // 4 pixels per loop.
+    MEMACCESS(2)
+    "vst1.8     {q0}, [%2]!                    \n"
+    "bgt        1b                             \n"
+  : "+r"(src_argb),    // %0
+    "+r"(src_stride),  // %1
+    "+r"(dst_argb),    // %2
+    "+r"(dst_width)    // %3
+  : "r"(src_stepx)     // %4
+  : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
+  );
+}
+
+#endif  // __ARM_NEON__
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/scale_neon64.cc b/src/main/jni/libyuv/source/scale_neon64.cc
new file mode 100644
index 000000000..44df55c6c
--- /dev/null
+++ b/src/main/jni/libyuv/source/scale_neon64.cc
@@ -0,0 +1,789 @@
+/*
+ *  Copyright 2014 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC Neon.
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#ifdef HAS_SCALEROWDOWN2_NEON
+// Read 32x1 throw away even pixels, and write 16x1.
+void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst, int dst_width) {
+  asm volatile (
+  "1:                                          \n"
+    // load even pixels into v0, odd into v1
+    MEMACCESS(0)
+    "ld2        {v0.16b, v1.16b}, [%0], #32    \n"
+    "subs       %2, %2, #16                    \n"  // 16 processed per loop
+    MEMACCESS(1)
+    "st1        {v1.16b}, [%1], #16            \n"  // store odd pixels
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst),              // %1
+    "+r"(dst_width)         // %2
+  :
+  : "v0", "v1"              // Clobber List
+  );
+}
+#endif //HAS_SCALEROWDOWN2_NEON
+
+#ifdef HAS_SCALEROWDOWN2_NEON
+// Read 32x2 average down and write 16x1.
+void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst, int dst_width) {
+  asm volatile (
+    // change the stride to row 2 pointer
+    "add        %1, %1, %0                     \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b, v1.16b}, [%0], #32    \n"  // load row 1 and post inc
+    MEMACCESS(1)
+    "ld1        {v2.16b, v3.16b}, [%1], #32    \n"  // load row 2 and post inc
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop
+    "uaddlp     v0.8h, v0.16b                  \n"  // row 1 add adjacent
+    "uaddlp     v1.8h, v1.16b                  \n"
+    "uadalp     v0.8h, v2.16b                  \n"  // row 2 add adjacent + row1
+    "uadalp     v1.8h, v3.16b                  \n"
+    "rshrn      v0.8b, v0.8h, #2               \n"  // downshift, round and pack
+    "rshrn2     v0.16b, v1.8h, #2              \n"
+    MEMACCESS(2)
+    "st1        {v0.16b}, [%2], #16            \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(src_stride),       // %1
+    "+r"(dst),              // %2
+    "+r"(dst_width)         // %3
+  :
+  : "v0", "v1", "v2", "v3"     // Clobber List
+  );
+}
+#endif //HAS_SCALEROWDOWN2_NEON
+
+#ifdef HAS_SCALEROWDOWN4_NEON
+void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width) {
+  asm volatile (
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld4     {v0.8b-3.8b}, [%0], #32           \n"  // src line 0
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop
+    MEMACCESS(1)
+    "st1     {v2.8b}, [%1], #8                 \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width)         // %2
+  :
+  : "v0", "v1", "v2", "v3", "memory", "cc"
+  );
+}
+#endif //HAS_SCALEROWDOWN4_NEON
+
+#ifdef HAS_SCALEROWDOWN4_NEON
+void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width) {
+  const uint8* src_ptr1 = src_ptr + src_stride;
+  const uint8* src_ptr2 = src_ptr + src_stride * 2;
+  const uint8* src_ptr3 = src_ptr + src_stride * 3;
+asm volatile (
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1     {v0.16b}, [%0], #16               \n"   // load up 16x4
+    MEMACCESS(3)
+    "ld1     {v1.16b}, [%3], #16               \n"
+    MEMACCESS(4)
+    "ld1     {v2.16b}, [%4], #16               \n"
+    MEMACCESS(5)
+    "ld1     {v3.16b}, [%5], #16               \n"
+    "subs       %2, %2, #4                     \n"
+    "uaddlp  v0.8h, v0.16b                     \n"
+    "uadalp  v0.8h, v1.16b                     \n"
+    "uadalp  v0.8h, v2.16b                     \n"
+    "uadalp  v0.8h, v3.16b                     \n"
+    "addp    v0.8h, v0.8h, v0.8h               \n"
+    "rshrn   v0.8b, v0.8h, #4                  \n"   // divide by 16 w/rounding
+    MEMACCESS(1)
+    "st1    {v0.s}[0], [%1], #4                \n"
+    "bgt        1b                             \n"
+  : "+r"(src_ptr),   // %0
+    "+r"(dst_ptr),   // %1
+    "+r"(dst_width), // %2
+    "+r"(src_ptr1),  // %3
+    "+r"(src_ptr2),  // %4
+    "+r"(src_ptr3)   // %5
+  :
+  : "v0", "v1", "v2", "v3", "memory", "cc"
+  );
+}
+#endif //HAS_SCALEROWDOWN4_NEON
+
+#ifdef HAS_SCALEROWDOWN34_NEON
+// Down scale from 4 to 3 pixels. Use the neon multilane read/write
+// to load up the every 4th pixel into a 4 different registers.
+// Point samples 32 pixels to 24 pixels.
+void ScaleRowDown34_NEON(const uint8* src_ptr,
+                         ptrdiff_t src_stride,
+                         uint8* dst_ptr, int dst_width) {
+  asm volatile (
+  "1:                                                  \n"
+    MEMACCESS(0)
+    "ld4       {v0.8b-v3.8b}, [%0], #32                \n"  // src line 0
+    "subs      %2, %2, #24                             \n"
+    "mov       v2.8b, v3.8b                            \n"  // order v0, v1, v2
+    MEMACCESS(1)
+    "st3       {v0.8b-v2.8b}, [%1], #24                \n"
+    "bgt       1b                                      \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width)         // %2
+  :
+  : "v0", "v1", "v2", "v3", "memory", "cc"
+  );
+}
+#endif //HAS_SCALEROWDOWN34_NEON
+
+#ifdef HAS_SCALEROWDOWN34_NEON
+void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "movi      v20.8b, #3                              \n"
+    "add       %3, %3, %0                              \n"
+  "1:                                                  \n"
+    MEMACCESS(0)
+    "ld4       {v0.8b-v3.8b}, [%0], #32                \n"  // src line 0
+    MEMACCESS(3)
+    "ld4       {v4.8b-v7.8b}, [%3], #32                \n"  // src line 1
+    "subs         %2, %2, #24                          \n"
+
+    // filter src line 0 with src line 1
+    // expand chars to shorts to allow for room
+    // when adding lines together
+    "ushll     v16.8h, v4.8b, #0                       \n"
+    "ushll     v17.8h, v5.8b, #0                       \n"
+    "ushll     v18.8h, v6.8b, #0                       \n"
+    "ushll     v19.8h, v7.8b, #0                       \n"
+
+    // 3 * line_0 + line_1
+    "umlal     v16.8h, v0.8b, v20.8b                   \n"
+    "umlal     v17.8h, v1.8b, v20.8b                   \n"
+    "umlal     v18.8h, v2.8b, v20.8b                   \n"
+    "umlal     v19.8h, v3.8b, v20.8b                   \n"
+
+    // (3 * line_0 + line_1) >> 2
+    "uqrshrn   v0.8b, v16.8h, #2                       \n"
+    "uqrshrn   v1.8b, v17.8h, #2                       \n"
+    "uqrshrn   v2.8b, v18.8h, #2                       \n"
+    "uqrshrn   v3.8b, v19.8h, #2                       \n"
+
+    // a0 = (src[0] * 3 + s[1] * 1) >> 2
+    "ushll     v16.8h, v1.8b, #0                       \n"
+    "umlal     v16.8h, v0.8b, v20.8b                   \n"
+    "uqrshrn   v0.8b, v16.8h, #2                       \n"
+
+    // a1 = (src[1] * 1 + s[2] * 1) >> 1
+    "urhadd    v1.8b, v1.8b, v2.8b                     \n"
+
+    // a2 = (src[2] * 1 + s[3] * 3) >> 2
+    "ushll     v16.8h, v2.8b, #0                       \n"
+    "umlal     v16.8h, v3.8b, v20.8b                   \n"
+    "uqrshrn   v2.8b, v16.8h, #2                       \n"
+
+    MEMACCESS(1)
+    "st3       {v0.8b-v2.8b}, [%1], #24                \n"
+
+    "bgt       1b                                      \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width),        // %2
+    "+r"(src_stride)        // %3
+  :
+  : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19",
+    "v20", "memory", "cc"
+  );
+}
+#endif //ScaleRowDown34_0_Box_NEON
+
+#ifdef HAS_SCALEROWDOWN34_NEON
+void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "movi      v20.8b, #3                              \n"
+    "add       %3, %3, %0                              \n"
+  "1:                                                  \n"
+    MEMACCESS(0)
+    "ld4       {v0.8b-v3.8b}, [%0], #32                \n"  // src line 0
+    MEMACCESS(3)
+    "ld4       {v4.8b-v7.8b}, [%3], #32                \n"  // src line 1
+    "subs         %2, %2, #24                          \n"
+    // average src line 0 with src line 1
+    "urhadd    v0.8b, v0.8b, v4.8b                     \n"
+    "urhadd    v1.8b, v1.8b, v5.8b                     \n"
+    "urhadd    v2.8b, v2.8b, v6.8b                     \n"
+    "urhadd    v3.8b, v3.8b, v7.8b                     \n"
+
+    // a0 = (src[0] * 3 + s[1] * 1) >> 2
+    "ushll     v4.8h, v1.8b, #0                        \n"
+    "umlal     v4.8h, v0.8b, v20.8b                    \n"
+    "uqrshrn   v0.8b, v4.8h, #2                        \n"
+
+    // a1 = (src[1] * 1 + s[2] * 1) >> 1
+    "urhadd    v1.8b, v1.8b, v2.8b                     \n"
+
+    // a2 = (src[2] * 1 + s[3] * 3) >> 2
+    "ushll     v4.8h, v2.8b, #0                        \n"
+    "umlal     v4.8h, v3.8b, v20.8b                    \n"
+    "uqrshrn   v2.8b, v4.8h, #2                        \n"
+
+    MEMACCESS(1)
+    "st3       {v0.8b-v2.8b}, [%1], #24                \n"
+    "bgt       1b                                      \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width),        // %2
+    "+r"(src_stride)        // %3
+  :
+  : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
+  );
+}
+#endif //HAS_SCALEROWDOWN34_NEON
+
+#ifdef HAS_SCALEROWDOWN38_NEON
+static uvec8 kShuf38 =
+  { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
+static uvec8 kShuf38_2 =
+  { 0, 16, 32, 2, 18, 33, 4, 20, 34, 6, 22, 35, 0, 0, 0, 0 };
+static vec16 kMult38_Div6 =
+  { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
+    65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
+static vec16 kMult38_Div9 =
+  { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
+    65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
+
+// 32 -> 12
+void ScaleRowDown38_NEON(const uint8* src_ptr,
+                         ptrdiff_t src_stride,
+                         uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    MEMACCESS(3)
+    "ld1       {v3.16b}, [%3]                          \n"
+  "1:                                                  \n"
+    MEMACCESS(0)
+    "ld1       {v0.16b, v1.16b}, [%0], #32             \n"
+    "subs      %2, %2, #12                             \n"
+    "tbl       v2.16b, {v0.16b, v1.16b}, v3.16b        \n"
+    MEMACCESS(1)
+    "st1       {v2.8b}, [%1], #8                       \n"
+    MEMACCESS(1)
+    "st1       {v2.s}[2], [%1], #4                     \n"
+    "bgt       1b                                      \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width)         // %2
+  : "r"(&kShuf38)           // %3
+  : "v0", "v1", "v2", "v3", "memory", "cc"
+  );
+}
+
+#endif //HAS_SCALEROWDOWN38_NEON
+
+#ifdef HAS_SCALEROWDOWN38_NEON
+// 32x3 -> 12x1
+void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
+                                      ptrdiff_t src_stride,
+                                      uint8* dst_ptr, int dst_width) {
+  const uint8* src_ptr1 = src_ptr + src_stride * 2;
+
+  asm volatile (
+    MEMACCESS(5)
+    "ld1       {v29.8h}, [%5]                          \n"
+    MEMACCESS(6)
+    "ld1       {v30.16b}, [%6]                         \n"
+    MEMACCESS(7)
+    "ld1       {v31.8h}, [%7]                          \n"
+    "add       %3, %3, %0                              \n"
+  "1:                                                  \n"
+
+    // 00 40 01 41 02 42 03 43
+    // 10 50 11 51 12 52 13 53
+    // 20 60 21 61 22 62 23 63
+    // 30 70 31 71 32 72 33 73
+    MEMACCESS(0)
+    "ld4       {v0.8b-v3.8b}, [%0], #32                \n"
+    MEMACCESS(3)
+    "ld4       {v4.8b-v7.8b}, [%3], #32                \n"
+    MEMACCESS(4)
+    "ld4       {v16.8b-v19.8b}, [%4], #32              \n"
+    "subs      %2, %2, #12                             \n"
+
+    // Shuffle the input data around to get align the data
+    //  so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+    // 00 10 01 11 02 12 03 13
+    // 40 50 41 51 42 52 43 53
+    "trn1      v20.8b, v0.8b, v1.8b                    \n"
+    "trn2      v21.8b, v0.8b, v1.8b                    \n"
+    "trn1      v22.8b, v4.8b, v5.8b                    \n"
+    "trn2      v23.8b, v4.8b, v5.8b                    \n"
+    "trn1      v24.8b, v16.8b, v17.8b                  \n"
+    "trn2      v25.8b, v16.8b, v17.8b                  \n"
+
+    // 20 30 21 31 22 32 23 33
+    // 60 70 61 71 62 72 63 73
+    "trn1      v0.8b, v2.8b, v3.8b                     \n"
+    "trn2      v1.8b, v2.8b, v3.8b                     \n"
+    "trn1      v4.8b, v6.8b, v7.8b                     \n"
+    "trn2      v5.8b, v6.8b, v7.8b                     \n"
+    "trn1      v16.8b, v18.8b, v19.8b                  \n"
+    "trn2      v17.8b, v18.8b, v19.8b                  \n"
+
+    // 00+10 01+11 02+12 03+13
+    // 40+50 41+51 42+52 43+53
+    "uaddlp    v20.4h, v20.8b                          \n"
+    "uaddlp    v21.4h, v21.8b                          \n"
+    "uaddlp    v22.4h, v22.8b                          \n"
+    "uaddlp    v23.4h, v23.8b                          \n"
+    "uaddlp    v24.4h, v24.8b                          \n"
+    "uaddlp    v25.4h, v25.8b                          \n"
+
+    // 60+70 61+71 62+72 63+73
+    "uaddlp    v1.4h, v1.8b                            \n"
+    "uaddlp    v5.4h, v5.8b                            \n"
+    "uaddlp    v17.4h, v17.8b                          \n"
+
+    // combine source lines
+    "add       v20.4h, v20.4h, v22.4h                  \n"
+    "add       v21.4h, v21.4h, v23.4h                  \n"
+    "add       v20.4h, v20.4h, v24.4h                  \n"
+    "add       v21.4h, v21.4h, v25.4h                  \n"
+    "add       v2.4h, v1.4h, v5.4h                     \n"
+    "add       v2.4h, v2.4h, v17.4h                    \n"
+
+    // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
+    //             + s[6 + st * 1] + s[7 + st * 1]
+    //             + s[6 + st * 2] + s[7 + st * 2]) / 6
+    "sqrdmulh  v2.8h, v2.8h, v29.8h                    \n"
+    "xtn       v2.8b,  v2.8h                           \n"
+
+    // Shuffle 2,3 reg around so that 2 can be added to the
+    //  0,1 reg and 3 can be added to the 4,5 reg. This
+    //  requires expanding from u8 to u16 as the 0,1 and 4,5
+    //  registers are already expanded. Then do transposes
+    //  to get aligned.
+    // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+    "ushll     v16.8h, v16.8b, #0                      \n"
+    "uaddl     v0.8h, v0.8b, v4.8b                     \n"
+
+    // combine source lines
+    "add       v0.8h, v0.8h, v16.8h                    \n"
+
+    // xx 20 xx 21 xx 22 xx 23
+    // xx 30 xx 31 xx 32 xx 33
+    "trn1      v1.8h, v0.8h, v0.8h                     \n"
+    "trn2      v4.8h, v0.8h, v0.8h                     \n"
+    "xtn       v0.4h, v1.4s                            \n"
+    "xtn       v4.4h, v4.4s                            \n"
+
+    // 0+1+2, 3+4+5
+    "add       v20.8h, v20.8h, v0.8h                   \n"
+    "add       v21.8h, v21.8h, v4.8h                   \n"
+
+    // Need to divide, but can't downshift as the the value
+    //  isn't a power of 2. So multiply by 65536 / n
+    //  and take the upper 16 bits.
+    "sqrdmulh  v0.8h, v20.8h, v31.8h                   \n"
+    "sqrdmulh  v1.8h, v21.8h, v31.8h                   \n"
+
+    // Align for table lookup, vtbl requires registers to
+    //  be adjacent
+    "tbl       v3.16b, {v0.16b, v1.16b, v2.16b}, v30.16b \n"
+
+    MEMACCESS(1)
+    "st1       {v3.8b}, [%1], #8                       \n"
+    MEMACCESS(1)
+    "st1       {v3.s}[2], [%1], #4                     \n"
+    "bgt       1b                                      \n"
+  : "+r"(src_ptr),          // %0
+    "+r"(dst_ptr),          // %1
+    "+r"(dst_width),        // %2
+    "+r"(src_stride),       // %3
+    "+r"(src_ptr1)          // %4
+  : "r"(&kMult38_Div6),     // %5
+    "r"(&kShuf38_2),        // %6
+    "r"(&kMult38_Div9)      // %7
+  : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+    "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v29",
+    "v30", "v31", "memory", "cc"
+  );
+}
+#endif //HAS_SCALEROWDOWN38_NEON
+
+#ifdef HAS_SCALEROWDOWN38_NEON
+// 32x2 -> 12x1
+void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    MEMACCESS(4)
+    "ld1       {v30.8h}, [%4]                          \n"
+    MEMACCESS(5)
+    "ld1       {v31.16b}, [%5]                         \n"
+    "add       %3, %3, %0                              \n"
+  "1:                                                  \n"
+
+    // 00 40 01 41 02 42 03 43
+    // 10 50 11 51 12 52 13 53
+    // 20 60 21 61 22 62 23 63
+    // 30 70 31 71 32 72 33 73
+    MEMACCESS(0)
+    "ld4       {v0.8b-v3.8b}, [%0], #32                \n"
+    MEMACCESS(3)
+    "ld4       {v4.8b-v7.8b}, [%3], #32                \n"
+    "subs      %2, %2, #12                             \n"
+
+    // Shuffle the input data around to get align the data
+    //  so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
+    // 00 10 01 11 02 12 03 13
+    // 40 50 41 51 42 52 43 53
+    "trn1      v16.8b, v0.8b, v1.8b                    \n"
+    "trn2      v17.8b, v0.8b, v1.8b                    \n"
+    "trn1      v18.8b, v4.8b, v5.8b                    \n"
+    "trn2      v19.8b, v4.8b, v5.8b                    \n"
+
+    // 20 30 21 31 22 32 23 33
+    // 60 70 61 71 62 72 63 73
+    "trn1      v0.8b, v2.8b, v3.8b                     \n"
+    "trn2      v1.8b, v2.8b, v3.8b                     \n"
+    "trn1      v4.8b, v6.8b, v7.8b                     \n"
+    "trn2      v5.8b, v6.8b, v7.8b                     \n"
+
+    // 00+10 01+11 02+12 03+13
+    // 40+50 41+51 42+52 43+53
+    "uaddlp    v16.4h, v16.8b                          \n"
+    "uaddlp    v17.4h, v17.8b                          \n"
+    "uaddlp    v18.4h, v18.8b                          \n"
+    "uaddlp    v19.4h, v19.8b                          \n"
+
+    // 60+70 61+71 62+72 63+73
+    "uaddlp    v1.4h, v1.8b                            \n"
+    "uaddlp    v5.4h, v5.8b                            \n"
+
+    // combine source lines
+    "add       v16.4h, v16.4h, v18.4h                  \n"
+    "add       v17.4h, v17.4h, v19.4h                  \n"
+    "add       v2.4h, v1.4h, v5.4h                     \n"
+
+    // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
+    "uqrshrn   v2.8b, v2.8h, #2                        \n"
+
+    // Shuffle 2,3 reg around so that 2 can be added to the
+    //  0,1 reg and 3 can be added to the 4,5 reg. This
+    //  requires expanding from u8 to u16 as the 0,1 and 4,5
+    //  registers are already expanded. Then do transposes
+    //  to get aligned.
+    // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
+
+    // combine source lines
+    "uaddl     v0.8h, v0.8b, v4.8b                     \n"
+
+    // xx 20 xx 21 xx 22 xx 23
+    // xx 30 xx 31 xx 32 xx 33
+    "trn1      v1.8h, v0.8h, v0.8h                     \n"
+    "trn2      v4.8h, v0.8h, v0.8h                     \n"
+    "xtn       v0.4h, v1.4s                            \n"
+    "xtn       v4.4h, v4.4s                            \n"
+
+    // 0+1+2, 3+4+5
+    "add       v16.8h, v16.8h, v0.8h                   \n"
+    "add       v17.8h, v17.8h, v4.8h                   \n"
+
+    // Need to divide, but can't downshift as the the value
+    //  isn't a power of 2. So multiply by 65536 / n
+    //  and take the upper 16 bits.
+    "sqrdmulh  v0.8h, v16.8h, v30.8h                   \n"
+    "sqrdmulh  v1.8h, v17.8h, v30.8h                   \n"
+
+    // Align for table lookup, vtbl requires registers to
+    //  be adjacent
+
+    "tbl       v3.16b, {v0.16b, v1.16b, v2.16b}, v31.16b \n"
+
+    MEMACCESS(1)
+    "st1       {v3.8b}, [%1], #8                       \n"
+    MEMACCESS(1)
+    "st1       {v3.s}[2], [%1], #4                     \n"
+    "bgt       1b                                      \n"
+  : "+r"(src_ptr),       // %0
+    "+r"(dst_ptr),       // %1
+    "+r"(dst_width),     // %2
+    "+r"(src_stride)     // %3
+  : "r"(&kMult38_Div6),  // %4
+    "r"(&kShuf38_2)      // %5
+  : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
+    "v18", "v19", "v30", "v31", "memory", "cc"
+  );
+}
+#endif //HAS_SCALEROWDOWN38_NEON
+
+// 16x2 -> 16x1
+void ScaleFilterRows_NEON(uint8* dst_ptr,
+                          const uint8* src_ptr, ptrdiff_t src_stride,
+                          int dst_width, int source_y_fraction) {
+    int y_fraction = 256 - source_y_fraction;
+  asm volatile (
+    "cmp          %4, #0                       \n"
+    "beq          100f                         \n"
+    "add          %2, %2, %1                   \n"
+    "cmp          %4, #64                      \n"
+    "beq          75f                          \n"
+    "cmp          %4, #128                     \n"
+    "beq          50f                          \n"
+    "cmp          %4, #192                     \n"
+    "beq          25f                          \n"
+
+    "dup          v5.8b, %w4                   \n"
+    "dup          v4.8b, %w5                   \n"
+    // General purpose row blend.
+  "1:                                          \n"
+    MEMACCESS(1)
+    "ld1          {v0.16b}, [%1], #16          \n"
+    MEMACCESS(2)
+    "ld1          {v1.16b}, [%2], #16          \n"
+    "subs         %3, %3, #16                  \n"
+    "umull        v6.8h, v0.8b, v4.8b          \n"
+    "umull2       v7.8h, v0.16b, v4.16b        \n"
+    "umlal        v6.8h, v1.8b, v5.8b          \n"
+    "umlal2       v7.8h, v1.16b, v5.16b        \n"
+    "rshrn        v0.8b, v6.8h, #8             \n"
+    "rshrn2       v0.16b, v7.8h, #8            \n"
+    MEMACCESS(0)
+    "st1          {v0.16b}, [%0], #16          \n"
+    "bgt          1b                           \n"
+    "b            99f                          \n"
+
+    // Blend 25 / 75.
+  "25:                                         \n"
+    MEMACCESS(1)
+    "ld1          {v0.16b}, [%1], #16          \n"
+    MEMACCESS(2)
+    "ld1          {v1.16b}, [%2], #16          \n"
+    "subs         %3, %3, #16                  \n"
+    "urhadd       v0.16b, v0.16b, v1.16b       \n"
+    "urhadd       v0.16b, v0.16b, v1.16b       \n"
+    MEMACCESS(0)
+    "st1          {v0.16b}, [%0], #16          \n"
+    "bgt          25b                          \n"
+    "b            99f                          \n"
+
+    // Blend 50 / 50.
+  "50:                                         \n"
+    MEMACCESS(1)
+    "ld1          {v0.16b}, [%1], #16          \n"
+    MEMACCESS(2)
+    "ld1          {v1.16b}, [%2], #16          \n"
+    "subs         %3, %3, #16                  \n"
+    "urhadd       v0.16b, v0.16b, v1.16b       \n"
+    MEMACCESS(0)
+    "st1          {v0.16b}, [%0], #16          \n"
+    "bgt          50b                          \n"
+    "b            99f                          \n"
+
+    // Blend 75 / 25.
+  "75:                                         \n"
+    MEMACCESS(1)
+    "ld1          {v1.16b}, [%1], #16          \n"
+    MEMACCESS(2)
+    "ld1          {v0.16b}, [%2], #16          \n"
+    "subs         %3, %3, #16                  \n"
+    "urhadd       v0.16b, v0.16b, v1.16b       \n"
+    "urhadd       v0.16b, v0.16b, v1.16b       \n"
+    MEMACCESS(0)
+    "st1          {v0.16b}, [%0], #16          \n"
+    "bgt          75b                          \n"
+    "b            99f                          \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+  "100:                                        \n"
+    MEMACCESS(1)
+    "ld1          {v0.16b}, [%1], #16          \n"
+    "subs         %3, %3, #16                  \n"
+    MEMACCESS(0)
+    "st1          {v0.16b}, [%0], #16          \n"
+    "bgt          100b                         \n"
+
+  "99:                                         \n"
+    MEMACCESS(0)
+    "st1          {v0.b}[15], [%0]             \n"
+  : "+r"(dst_ptr),          // %0
+    "+r"(src_ptr),          // %1
+    "+r"(src_stride),       // %2
+    "+r"(dst_width),        // %3
+    "+r"(source_y_fraction),// %4
+    "+r"(y_fraction)        // %5
+  :
+  : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc"
+  );
+}
+
+#ifdef HAS_SCALEARGBROWDOWN2_NEON
+void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst, int dst_width) {
+  asm volatile (
+  "1:                                          \n"
+    // load even pixels into q0, odd into q1
+    MEMACCESS (0)
+    "ld2        {v0.4s, v1.4s}, [%0], #32      \n"
+    MEMACCESS (0)
+    "ld2        {v2.4s, v3.4s}, [%0], #32      \n"
+    "subs       %2, %2, #8                     \n"  // 8 processed per loop
+    MEMACCESS (1)
+    "st1        {v1.16b}, [%1], #16            \n"  // store odd pixels
+    MEMACCESS (1)
+    "st1        {v3.16b}, [%1], #16            \n"
+    "bgt        1b                             \n"
+  : "+r" (src_ptr),          // %0
+    "+r" (dst),              // %1
+    "+r" (dst_width)         // %2
+  :
+  : "memory", "cc", "v0", "v1", "v2", "v3"  // Clobber List
+  );
+}
+#endif //HAS_SCALEARGBROWDOWN2_NEON
+
+#ifdef HAS_SCALEARGBROWDOWN2_NEON
+void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width) {
+  asm volatile (
+    // change the stride to row 2 pointer
+    "add        %1, %1, %0                     \n"
+  "1:                                          \n"
+    MEMACCESS (0)
+    "ld4        {v0.16b - v3.16b}, [%0], #64   \n"  // load 8 ARGB pixels.
+    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
+    "uaddlp     v0.8h, v0.16b                  \n"  // B 16 bytes -> 8 shorts.
+    "uaddlp     v1.8h, v1.16b                  \n"  // G 16 bytes -> 8 shorts.
+    "uaddlp     v2.8h, v2.16b                  \n"  // R 16 bytes -> 8 shorts.
+    "uaddlp     v3.8h, v3.16b                  \n"  // A 16 bytes -> 8 shorts.
+    MEMACCESS (1)
+    "ld4        {v16.16b - v19.16b}, [%1], #64 \n"  // load 8 more ARGB pixels.
+    "uadalp     v0.8h, v16.16b                 \n"  // B 16 bytes -> 8 shorts.
+    "uadalp     v1.8h, v17.16b                 \n"  // G 16 bytes -> 8 shorts.
+    "uadalp     v2.8h, v18.16b                 \n"  // R 16 bytes -> 8 shorts.
+    "uadalp     v3.8h, v19.16b                 \n"  // A 16 bytes -> 8 shorts.
+    "rshrn      v0.8b, v0.8h, #2               \n"  // downshift, round and pack
+    "rshrn      v1.8b, v1.8h, #2               \n"
+    "rshrn      v2.8b, v2.8h, #2               \n"
+    "rshrn      v3.8b, v3.8h, #2               \n"
+    MEMACCESS (2)
+    "st4        {v0.8b - v3.8b}, [%2], #32     \n"
+    "bgt        1b                             \n"
+  : "+r" (src_ptr),          // %0
+    "+r" (src_stride),       // %1
+    "+r" (dst),              // %2
+    "+r" (dst_width)         // %3
+  :
+  : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
+  );
+}
+#endif //HAS_SCALEARGBROWDOWN2_NEON
+
+#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+void ScaleARGBRowDownEven_NEON(const uint8* src_argb,  ptrdiff_t src_stride,
+                               int src_stepx, uint8* dst_argb, int dst_width) {
+  asm volatile (
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.s}[0], [%0], %3            \n"
+    MEMACCESS(0)
+    "ld1        {v0.s}[1], [%0], %3            \n"
+    MEMACCESS(0)
+    "ld1        {v0.s}[2], [%0], %3            \n"
+    MEMACCESS(0)
+    "ld1        {v0.s}[3], [%0], %3            \n"
+    "subs       %2, %2, #4                     \n"  // 4 pixels per loop.
+    MEMACCESS(1)
+    "st1        {v0.16b}, [%1], #16            \n"
+    "bgt        1b                             \n"
+  : "+r"(src_argb),    // %0
+    "+r"(dst_argb),    // %1
+    "+r"(dst_width)    // %2
+  : "r"(src_stepx * 4) // %3
+  : "memory", "cc", "v0"
+  );
+}
+#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
+
+#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
+// Reads 4 pixels at a time.
+// Alignment requirement: src_argb 4 byte aligned.
+// TODO, might be worth another optimization pass in future.
+// It could be upgraded to 8 pixels at a time to start with.
+void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+                                  int src_stepx,
+                                  uint8* dst_argb, int dst_width) {
+  asm volatile (
+    "add        %1, %1, %0                     \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1     {v0.8b}, [%0], %4                 \n"  // Read 4 2x2 blocks -> 2x1
+    MEMACCESS(1)
+    "ld1     {v1.8b}, [%1], %4                 \n"
+    MEMACCESS(0)
+    "ld1     {v2.8b}, [%0], %4                 \n"
+    MEMACCESS(1)
+    "ld1     {v3.8b}, [%1], %4                 \n"
+    MEMACCESS(0)
+    "ld1     {v4.8b}, [%0], %4                 \n"
+    MEMACCESS(1)
+    "ld1     {v5.8b}, [%1], %4                 \n"
+    MEMACCESS(0)
+    "ld1     {v6.8b}, [%0], %4                 \n"
+    MEMACCESS(1)
+    "ld1     {v7.8b}, [%1], %4                 \n"
+    "uaddl   v0.8h, v0.8b, v1.8b               \n"
+    "uaddl   v2.8h, v2.8b, v3.8b               \n"
+    "uaddl   v4.8h, v4.8b, v5.8b               \n"
+    "uaddl   v6.8h, v6.8b, v7.8b               \n"
+    "mov     v16.d[1], v0.d[1]                 \n"  // ab_cd -> ac_bd
+    "mov     v0.d[1], v2.d[0]                  \n"
+    "mov     v2.d[0], v16.d[1]                 \n"
+    "mov     v16.d[1], v4.d[1]                 \n"  // ef_gh -> eg_fh
+    "mov     v4.d[1], v6.d[0]                  \n"
+    "mov     v6.d[0], v16.d[1]                 \n"
+    "add     v0.8h, v0.8h, v2.8h               \n"  // (a+b)_(c+d)
+    "add     v4.8h, v4.8h, v6.8h               \n"  // (e+f)_(g+h)
+    "rshrn   v0.8b, v0.8h, #2                  \n"  // first 2 pixels.
+    "rshrn2  v0.16b, v4.8h, #2                 \n"  // next 2 pixels.
+    "subs       %3, %3, #4                     \n"  // 4 pixels per loop.
+    MEMACCESS(2)
+    "st1     {v0.16b}, [%2], #16               \n"
+    "bgt        1b                             \n"
+  : "+r"(src_argb),    // %0
+    "+r"(src_stride),  // %1
+    "+r"(dst_argb),    // %2
+    "+r"(dst_width)    // %3
+  : "r"(src_stepx * 4) // %4
+  : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
+  );
+}
+#endif  // HAS_SCALEARGBROWDOWNEVEN_NEON
+#endif  // __aarch64__
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/scale_posix.cc b/src/main/jni/libyuv/source/scale_posix.cc
new file mode 100644
index 000000000..352e66782
--- /dev/null
+++ b/src/main/jni/libyuv/source/scale_posix.cc
@@ -0,0 +1,1315 @@
+/*
+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+
+// Offsets for source bytes 0 to 9
+static uvec8 kShuf0 =
+  { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
+static uvec8 kShuf1 =
+  { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+static uvec8 kShuf2 =
+  { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+// Offsets for source bytes 0 to 10
+static uvec8 kShuf01 =
+  { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
+
+// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
+static uvec8 kShuf11 =
+  { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
+
+// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+static uvec8 kShuf21 =
+  { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
+
+// Coefficients for source bytes 0 to 10
+static uvec8 kMadd01 =
+  { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
+
+// Coefficients for source bytes 10 to 21
+static uvec8 kMadd11 =
+  { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
+
+// Coefficients for source bytes 21 to 31
+static uvec8 kMadd21 =
+  { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
+
+// Coefficients for source bytes 21 to 31
+static vec16 kRound34 =
+  { 2, 2, 2, 2, 2, 2, 2, 2 };
+
+static uvec8 kShuf38a =
+  { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+static uvec8 kShuf38b =
+  { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
+
+// Arrange words 0,3,6 into 0,1,2
+static uvec8 kShufAc =
+  { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+// Arrange words 0,3,6 into 3,4,5
+static uvec8 kShufAc3 =
+  { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
+
+// Scaling values for boxes of 3x3 and 2x3
+static uvec16 kScaleAc33 =
+  { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
+
+// Arrange first value for pixels 0,1,2,3,4,5
+static uvec8 kShufAb0 =
+  { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
+
+// Arrange second value for pixels 0,1,2,3,4,5
+static uvec8 kShufAb1 =
+  { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
+
+// Arrange third value for pixels 0,1,2,3,4,5
+static uvec8 kShufAb2 =
+  { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
+
+// Scaling values for boxes of 3x2 and 2x2
+static uvec16 kScaleAb2 =
+  { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
+
+// GCC versions of row functions are verbatim conversions from Visual C.
+// Generated using gcc disassembly on Visual C object file:
+// objdump -D yuvscaler.obj >yuvscaler.txt
+
+void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),    // %0
+    "+r"(dst_ptr),    // %1
+    "+r"(dst_width)   // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+
+void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10, 0) ",%%xmm1  \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "movdqa    %%xmm1,%%xmm3                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "pand      %%xmm5,%%xmm2                   \n"
+    "pand      %%xmm5,%%xmm3                   \n"
+    "pavgw     %%xmm2,%%xmm0                   \n"
+    "pavgw     %%xmm3,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),    // %0
+    "+r"(dst_ptr),    // %1
+    "+r"(dst_width)   // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    MEMOPREG(movdqa,0x00,0,3,1,xmm2)           //  movdqa  (%0,%3,1),%%xmm2
+    BUNDLEALIGN
+    MEMOPREG(movdqa,0x10,0,3,1,xmm3)           //  movdqa  0x10(%0,%3,1),%%xmm3
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "movdqa    %%xmm1,%%xmm3                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "pand      %%xmm5,%%xmm2                   \n"
+    "pand      %%xmm5,%%xmm3                   \n"
+    "pavgw     %%xmm2,%%xmm0                   \n"
+    "pavgw     %%xmm3,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),    // %0
+    "+r"(dst_ptr),    // %1
+    "+r"(dst_width)   // %2
+  : "r"((intptr_t)(src_stride))   // %3
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+
+void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                  uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),    // %0
+    "+r"(dst_ptr),    // %1
+    "+r"(dst_width)   // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+
+void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
+                                        ptrdiff_t src_stride,
+                                        uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "movdqa    %%xmm1,%%xmm3                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "pand      %%xmm5,%%xmm2                   \n"
+    "pand      %%xmm5,%%xmm3                   \n"
+    "pavgw     %%xmm2,%%xmm0                   \n"
+    "pavgw     %%xmm3,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),    // %0
+    "+r"(dst_ptr),    // %1
+    "+r"(dst_width)   // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
+                                     ptrdiff_t src_stride,
+                                     uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrlw     $0x8,%%xmm5                     \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    MEMOPREG(movdqu,0x00,0,3,1,xmm2)           //  movdqu  (%0,%3,1),%%xmm2
+    BUNDLEALIGN
+    MEMOPREG(movdqu,0x10,0,3,1,xmm3)           //  movdqu  0x10(%0,%3,1),%%xmm3
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "movdqa    %%xmm1,%%xmm3                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "pand      %%xmm5,%%xmm2                   \n"
+    "pand      %%xmm5,%%xmm3                   \n"
+    "pavgw     %%xmm2,%%xmm0                   \n"
+    "pavgw     %%xmm3,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "sub       $0x10,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),    // %0
+    "+r"(dst_ptr),    // %1
+    "+r"(dst_width)   // %2
+  : "r"((intptr_t)(src_stride))   // %3
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+
+void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "psrld     $0x18,%%xmm5                    \n"
+    "pslld     $0x10,%%xmm5                    \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pand      %%xmm5,%%xmm0                   \n"
+    "pand      %%xmm5,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),    // %0
+    "+r"(dst_ptr),    // %1
+    "+r"(dst_width)   // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm5"
+#endif
+  );
+}
+
+void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width) {
+  intptr_t stridex3 = 0;
+  asm volatile (
+    "pcmpeqb   %%xmm7,%%xmm7                   \n"
+    "psrlw     $0x8,%%xmm7                     \n"
+    "lea       " MEMLEA4(0x00,4,4,2) ",%3      \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    MEMOPREG(movdqa,0x00,0,4,1,xmm2)           //  movdqa  (%0,%4,1),%%xmm2
+    BUNDLEALIGN
+    MEMOPREG(movdqa,0x10,0,4,1,xmm3)           //  movdqa  0x10(%0,%4,1),%%xmm3
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    MEMOPREG(movdqa,0x00,0,4,2,xmm2)           //  movdqa  (%0,%4,2),%%xmm2
+    BUNDLEALIGN
+    MEMOPREG(movdqa,0x10,0,4,2,xmm3)           //  movdqa  0x10(%0,%4,2),%%xmm3
+    MEMOPREG(movdqa,0x00,0,3,1,xmm4)           //  movdqa  (%0,%3,1),%%xmm4
+    MEMOPREG(movdqa,0x10,0,3,1,xmm5)           //  movdqa  0x10(%0,%3,1),%%xmm5
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm4,%%xmm2                   \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm5,%%xmm3                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "movdqa    %%xmm1,%%xmm3                   \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "pand      %%xmm7,%%xmm2                   \n"
+    "pand      %%xmm7,%%xmm3                   \n"
+    "pavgw     %%xmm2,%%xmm0                   \n"
+    "pavgw     %%xmm3,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "pand      %%xmm7,%%xmm2                   \n"
+    "pavgw     %%xmm2,%%xmm0                   \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x8,1) ",%1            \n"
+    "sub       $0x8,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),     // %0
+    "+r"(dst_ptr),     // %1
+    "+r"(dst_width),   // %2
+    "+r"(stridex3)     // %3
+  : "r"((intptr_t)(src_stride))    // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
+#endif
+  );
+}
+
+void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                          uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "movdqa    %0,%%xmm3                       \n"
+    "movdqa    %1,%%xmm4                       \n"
+    "movdqa    %2,%%xmm5                       \n"
+  :
+  : "m"(kShuf0),  // %0
+    "m"(kShuf1),  // %1
+    "m"(kShuf2)   // %2
+  );
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm2   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "movdqa    %%xmm2,%%xmm1                   \n"
+    "palignr   $0x8,%%xmm0,%%xmm1              \n"
+    "pshufb    %%xmm3,%%xmm0                   \n"
+    "pshufb    %%xmm4,%%xmm1                   \n"
+    "pshufb    %%xmm5,%%xmm2                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "movq      %%xmm1," MEMACCESS2(0x8,1) "    \n"
+    "movq      %%xmm2," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x18,1) ",%1           \n"
+    "sub       $0x18,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),   // %0
+    "+r"(dst_ptr),   // %1
+    "+r"(dst_width)  // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "movdqa    %0,%%xmm2                       \n"  // kShuf01
+    "movdqa    %1,%%xmm3                       \n"  // kShuf11
+    "movdqa    %2,%%xmm4                       \n"  // kShuf21
+  :
+  : "m"(kShuf01),  // %0
+    "m"(kShuf11),  // %1
+    "m"(kShuf21)   // %2
+  );
+  asm volatile (
+    "movdqa    %0,%%xmm5                       \n"  // kMadd01
+    "movdqa    %1,%%xmm0                       \n"  // kMadd11
+    "movdqa    %2,%%xmm1                       \n"  // kRound34
+  :
+  : "m"(kMadd01),  // %0
+    "m"(kMadd11),  // %1
+    "m"(kRound34)  // %2
+  );
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
+    MEMOPREG(movdqa,0x00,0,3,1,xmm7)           //  movdqa  (%0,%3),%%xmm7
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "pshufb    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm5,%%xmm6                   \n"
+    "paddsw    %%xmm1,%%xmm6                   \n"
+    "psrlw     $0x2,%%xmm6                     \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "movq      %%xmm6," MEMACCESS(1) "         \n"
+    "movdqu    " MEMACCESS2(0x8,0) ",%%xmm6    \n"
+    MEMOPREG(movdqu,0x8,0,3,1,xmm7)            //  movdqu  0x8(%0,%3),%%xmm7
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "pshufb    %%xmm3,%%xmm6                   \n"
+    "pmaddubsw %%xmm0,%%xmm6                   \n"
+    "paddsw    %%xmm1,%%xmm6                   \n"
+    "psrlw     $0x2,%%xmm6                     \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "movq      %%xmm6," MEMACCESS2(0x8,1) "    \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqa,0x10,0,3,1,xmm7)           //  movdqa  0x10(%0,%3),%%xmm7
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "pshufb    %%xmm4,%%xmm6                   \n"
+    "pmaddubsw %4,%%xmm6                       \n"
+    "paddsw    %%xmm1,%%xmm6                   \n"
+    "psrlw     $0x2,%%xmm6                     \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "movq      %%xmm6," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x18,1) ",%1           \n"
+    "sub       $0x18,%2                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),   // %0
+    "+r"(dst_ptr),   // %1
+    "+r"(dst_width)  // %2
+  : "r"((intptr_t)(src_stride)),  // %3
+    "m"(kMadd21)     // %4
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "movdqa    %0,%%xmm2                       \n"  // kShuf01
+    "movdqa    %1,%%xmm3                       \n"  // kShuf11
+    "movdqa    %2,%%xmm4                       \n"  // kShuf21
+  :
+  : "m"(kShuf01),  // %0
+    "m"(kShuf11),  // %1
+    "m"(kShuf21)   // %2
+  );
+  asm volatile (
+    "movdqa    %0,%%xmm5                       \n"  // kMadd01
+    "movdqa    %1,%%xmm0                       \n"  // kMadd11
+    "movdqa    %2,%%xmm1                       \n"  // kRound34
+  :
+  : "m"(kMadd01),  // %0
+    "m"(kMadd11),  // %1
+    "m"(kRound34)  // %2
+  );
+
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
+    MEMOPREG(movdqa,0x00,0,3,1,xmm7)           //  movdqa  (%0,%3,1),%%xmm7
+    "pavgb     %%xmm6,%%xmm7                   \n"
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "pshufb    %%xmm2,%%xmm6                   \n"
+    "pmaddubsw %%xmm5,%%xmm6                   \n"
+    "paddsw    %%xmm1,%%xmm6                   \n"
+    "psrlw     $0x2,%%xmm6                     \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "movq      %%xmm6," MEMACCESS(1) "         \n"
+    "movdqu    " MEMACCESS2(0x8,0) ",%%xmm6    \n"
+    MEMOPREG(movdqu,0x8,0,3,1,xmm7)            //  movdqu  0x8(%0,%3,1),%%xmm7
+    "pavgb     %%xmm6,%%xmm7                   \n"
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "pshufb    %%xmm3,%%xmm6                   \n"
+    "pmaddubsw %%xmm0,%%xmm6                   \n"
+    "paddsw    %%xmm1,%%xmm6                   \n"
+    "psrlw     $0x2,%%xmm6                     \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "movq      %%xmm6," MEMACCESS2(0x8,1) "    \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
+    MEMOPREG(movdqa,0x10,0,3,1,xmm7)           //  movdqa  0x10(%0,%3,1),%%xmm7
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm6,%%xmm7                   \n"
+    "pavgb     %%xmm7,%%xmm6                   \n"
+    "pshufb    %%xmm4,%%xmm6                   \n"
+    "pmaddubsw %4,%%xmm6                       \n"
+    "paddsw    %%xmm1,%%xmm6                   \n"
+    "psrlw     $0x2,%%xmm6                     \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "movq      %%xmm6," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x18,1) ",%1           \n"
+    "sub       $0x18,%2                        \n"
+    "jg        1b                              \n"
+    : "+r"(src_ptr),   // %0
+      "+r"(dst_ptr),   // %1
+      "+r"(dst_width)  // %2
+    : "r"((intptr_t)(src_stride)),  // %3
+      "m"(kMadd21)     // %4
+    : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                          uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "movdqa    %3,%%xmm4                       \n"
+    "movdqa    %4,%%xmm5                       \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pshufb    %%xmm4,%%xmm0                   \n"
+    "pshufb    %%xmm5,%%xmm1                   \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "movq      %%xmm0," MEMACCESS(1) "         \n"
+    "movhlps   %%xmm0,%%xmm1                   \n"
+    "movd      %%xmm1," MEMACCESS2(0x8,1) "    \n"
+    "lea       " MEMLEA(0xc,1) ",%1            \n"
+    "sub       $0xc,%2                         \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),   // %0
+    "+r"(dst_ptr),   // %1
+    "+r"(dst_width)  // %2
+  : "m"(kShuf38a),   // %3
+    "m"(kShuf38b)    // %4
+  : "memory", "cc"
+#if defined(__SSE2__)
+      , "xmm0", "xmm1", "xmm4", "xmm5"
+#endif
+  );
+}
+
+void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "movdqa    %0,%%xmm2                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm4                       \n"
+    "movdqa    %3,%%xmm5                       \n"
+  :
+  : "m"(kShufAb0),   // %0
+    "m"(kShufAb1),   // %1
+    "m"(kShufAb2),   // %2
+    "m"(kScaleAb2)   // %3
+  );
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(pavgb,0x00,0,3,1,xmm0)            //  pavgb   (%0,%3,1),%%xmm0
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "pshufb    %%xmm2,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm6                   \n"
+    "pshufb    %%xmm3,%%xmm6                   \n"
+    "paddusw   %%xmm6,%%xmm1                   \n"
+    "pshufb    %%xmm4,%%xmm0                   \n"
+    "paddusw   %%xmm0,%%xmm1                   \n"
+    "pmulhuw   %%xmm5,%%xmm1                   \n"
+    "packuswb  %%xmm1,%%xmm1                   \n"
+    "sub       $0x6,%2                         \n"
+    "movd      %%xmm1," MEMACCESS(1) "         \n"
+    "psrlq     $0x10,%%xmm1                    \n"
+    "movd      %%xmm1," MEMACCESS2(0x2,1) "    \n"
+    "lea       " MEMLEA(0x6,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),     // %0
+    "+r"(dst_ptr),     // %1
+    "+r"(dst_width)    // %2
+  : "r"((intptr_t)(src_stride))  // %3
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+
+void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width) {
+  asm volatile (
+    "movdqa    %0,%%xmm2                       \n"
+    "movdqa    %1,%%xmm3                       \n"
+    "movdqa    %2,%%xmm4                       \n"
+    "pxor      %%xmm5,%%xmm5                   \n"
+  :
+  : "m"(kShufAc),    // %0
+    "m"(kShufAc3),   // %1
+    "m"(kScaleAc33)  // %2
+  );
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(movdqa,0x00,0,3,1,xmm6)           //  movdqa  (%0,%3,1),%%xmm6
+    "movhlps   %%xmm0,%%xmm1                   \n"
+    "movhlps   %%xmm6,%%xmm7                   \n"
+    "punpcklbw %%xmm5,%%xmm0                   \n"
+    "punpcklbw %%xmm5,%%xmm1                   \n"
+    "punpcklbw %%xmm5,%%xmm6                   \n"
+    "punpcklbw %%xmm5,%%xmm7                   \n"
+    "paddusw   %%xmm6,%%xmm0                   \n"
+    "paddusw   %%xmm7,%%xmm1                   \n"
+    MEMOPREG(movdqa,0x00,0,3,2,xmm6)           //  movdqa  (%0,%3,2),%%xmm6
+    "lea       " MEMLEA(0x10,0) ",%0           \n"
+    "movhlps   %%xmm6,%%xmm7                   \n"
+    "punpcklbw %%xmm5,%%xmm6                   \n"
+    "punpcklbw %%xmm5,%%xmm7                   \n"
+    "paddusw   %%xmm6,%%xmm0                   \n"
+    "paddusw   %%xmm7,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm6                   \n"
+    "psrldq    $0x2,%%xmm0                     \n"
+    "paddusw   %%xmm0,%%xmm6                   \n"
+    "psrldq    $0x2,%%xmm0                     \n"
+    "paddusw   %%xmm0,%%xmm6                   \n"
+    "pshufb    %%xmm2,%%xmm6                   \n"
+    "movdqa    %%xmm1,%%xmm7                   \n"
+    "psrldq    $0x2,%%xmm1                     \n"
+    "paddusw   %%xmm1,%%xmm7                   \n"
+    "psrldq    $0x2,%%xmm1                     \n"
+    "paddusw   %%xmm1,%%xmm7                   \n"
+    "pshufb    %%xmm3,%%xmm7                   \n"
+    "paddusw   %%xmm7,%%xmm6                   \n"
+    "pmulhuw   %%xmm4,%%xmm6                   \n"
+    "packuswb  %%xmm6,%%xmm6                   \n"
+    "sub       $0x6,%2                         \n"
+    "movd      %%xmm6," MEMACCESS(1) "         \n"
+    "psrlq     $0x10,%%xmm6                    \n"
+    "movd      %%xmm6," MEMACCESS2(0x2,1) "    \n"
+    "lea       " MEMLEA(0x6,1) ",%1            \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),    // %0
+    "+r"(dst_ptr),    // %1
+    "+r"(dst_width)   // %2
+  : "r"((intptr_t)(src_stride))   // %3
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+
+void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                       uint16* dst_ptr, int src_width, int src_height) {
+  int tmp_height = 0;
+  intptr_t tmp_src = 0;
+  asm volatile (
+    "pxor      %%xmm4,%%xmm4                   \n"
+    "sub       $0x1,%5                         \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "mov       %0,%3                           \n"
+    "add       %6,%0                           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm4,%%xmm0                   \n"
+    "punpckhbw %%xmm4,%%xmm1                   \n"
+    "mov       %5,%2                           \n"
+    "test      %2,%2                           \n"
+    "je        3f                              \n"
+
+    LABELALIGN
+  "2:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
+    "add       %6,%0                           \n"
+    "movdqa    %%xmm2,%%xmm3                   \n"
+    "punpcklbw %%xmm4,%%xmm2                   \n"
+    "punpckhbw %%xmm4,%%xmm3                   \n"
+    "paddusw   %%xmm2,%%xmm0                   \n"
+    "paddusw   %%xmm3,%%xmm1                   \n"
+    "sub       $0x1,%2                         \n"
+    "jg        2b                              \n"
+
+    LABELALIGN
+  "3:                                          \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
+    "lea       " MEMLEA(0x10,3) ",%0           \n"
+    "lea       " MEMLEA(0x20,1) ",%1           \n"
+    "sub       $0x10,%4                        \n"
+    "jg        1b                              \n"
+  : "+r"(src_ptr),     // %0
+    "+r"(dst_ptr),     // %1
+    "+r"(tmp_height),  // %2
+    "+r"(tmp_src),     // %3
+    "+r"(src_width),   // %4
+    "+rm"(src_height)  // %5
+  : "rm"((intptr_t)(src_stride))  // %6
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
+#endif
+  );
+}
+
+// Bilinear column filtering. SSSE3 version.
+void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                           int dst_width, int x, int dx) {
+  intptr_t x0 = 0, x1 = 0, temp_pixel = 0;
+  asm volatile (
+    "movd      %6,%%xmm2                       \n"
+    "movd      %7,%%xmm3                       \n"
+    "movl      $0x04040000,%k2                 \n"
+    "movd      %k2,%%xmm5                      \n"
+    "pcmpeqb   %%xmm6,%%xmm6                   \n"
+    "psrlw     $0x9,%%xmm6                     \n"
+    "pextrw    $0x1,%%xmm2,%k3                 \n"
+    "subl      $0x2,%5                         \n"
+    "jl        29f                             \n"
+    "movdqa    %%xmm2,%%xmm0                   \n"
+    "paddd     %%xmm3,%%xmm0                   \n"
+    "punpckldq %%xmm0,%%xmm2                   \n"
+    "punpckldq %%xmm3,%%xmm3                   \n"
+    "paddd     %%xmm3,%%xmm3                   \n"
+    "pextrw    $0x3,%%xmm2,%k4                 \n"
+
+    LABELALIGN
+  "2:                                          \n"
+    "movdqa    %%xmm2,%%xmm1                   \n"
+    "paddd     %%xmm3,%%xmm2                   \n"
+    MEMOPARG(movzwl,0x00,1,3,1,k2)             //  movzwl  (%1,%3,1),%k2
+    "movd      %k2,%%xmm0                      \n"
+    "psrlw     $0x9,%%xmm1                     \n"
+    BUNDLEALIGN
+    MEMOPARG(movzwl,0x00,1,4,1,k2)             //  movzwl  (%1,%4,1),%k2
+    "movd      %k2,%%xmm4                      \n"
+    "pshufb    %%xmm5,%%xmm1                   \n"
+    "punpcklwd %%xmm4,%%xmm0                   \n"
+    "pxor      %%xmm6,%%xmm1                   \n"
+    "pmaddubsw %%xmm1,%%xmm0                   \n"
+    "pextrw    $0x1,%%xmm2,%k3                 \n"
+    "pextrw    $0x3,%%xmm2,%k4                 \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movd      %%xmm0,%k2                      \n"
+    "mov       %w2," MEMACCESS(0) "            \n"
+    "lea       " MEMLEA(0x2,0) ",%0            \n"
+    "sub       $0x2,%5                         \n"
+    "jge       2b                              \n"
+
+    LABELALIGN
+  "29:                                         \n"
+    "addl      $0x1,%5                         \n"
+    "jl        99f                             \n"
+    MEMOPARG(movzwl,0x00,1,3,1,k2)             //  movzwl  (%1,%3,1),%k2
+    "movd      %k2,%%xmm0                      \n"
+    "psrlw     $0x9,%%xmm2                     \n"
+    "pshufb    %%xmm5,%%xmm2                   \n"
+    "pxor      %%xmm6,%%xmm2                   \n"
+    "pmaddubsw %%xmm2,%%xmm0                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movd      %%xmm0,%k2                      \n"
+    "mov       %b2," MEMACCESS(0) "            \n"
+  "99:                                         \n"
+  : "+r"(dst_ptr),     // %0
+    "+r"(src_ptr),     // %1
+    "+a"(temp_pixel),  // %2
+    "+r"(x0),          // %3
+    "+r"(x1),          // %4
+    "+rm"(dst_width)   // %5
+  : "rm"(x),           // %6
+    "rm"(dx)           // %7
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+
+// Reads 4 pixels, duplicates them and writes 8 pixels.
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                       int dst_width, int x, int dx) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "punpckhbw %%xmm1,%%xmm1                   \n"
+    "sub       $0x20,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,0) "   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "jg        1b                              \n"
+
+  : "+r"(dst_ptr),     // %0
+    "+r"(src_ptr),     // %1
+    "+r"(dst_width)    // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+
+void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
+                            ptrdiff_t src_stride,
+                            uint8* dst_argb, int dst_width) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "shufps    $0xdd,%%xmm1,%%xmm0             \n"
+    "sub       $0x4,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(dst_width)  // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+
+void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
+                                  ptrdiff_t src_stride,
+                                  uint8* dst_argb, int dst_width) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm2             \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "sub       $0x4,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),  // %0
+    "+r"(dst_argb),  // %1
+    "+r"(dst_width)  // %2
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+
+void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
+                               ptrdiff_t src_stride,
+                               uint8* dst_argb, int dst_width) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
+    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
+    BUNDLEALIGN
+    MEMOPREG(movdqa,0x00,0,3,1,xmm2)           //  movdqa   (%0,%3,1),%%xmm2
+    MEMOPREG(movdqa,0x10,0,3,1,xmm3)           //  movdqa   0x10(%0,%3,1),%%xmm3
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm2             \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "sub       $0x4,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),   // %0
+    "+r"(dst_argb),   // %1
+    "+r"(dst_width)   // %2
+  : "r"((intptr_t)(src_stride))   // %3
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3"
+#endif
+  );
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: dst_argb 16 byte aligned.
+void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                               int src_stepx,
+                               uint8* dst_argb, int dst_width) {
+  intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
+  intptr_t src_stepx_x12 = 0;
+  asm volatile (
+    "lea       " MEMLEA3(0x00,1,4) ",%1        \n"
+    "lea       " MEMLEA4(0x00,1,1,2) ",%4      \n"
+    LABELALIGN
+  "1:                                          \n"
+    "movd      " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(movd,0x00,0,1,1,xmm1)             //  movd      (%0,%1,1),%%xmm1
+    "punpckldq %%xmm1,%%xmm0                   \n"
+    BUNDLEALIGN
+    MEMOPREG(movd,0x00,0,1,2,xmm2)             //  movd      (%0,%1,2),%%xmm2
+    MEMOPREG(movd,0x00,0,4,1,xmm3)             //  movd      (%0,%4,1),%%xmm3
+    "lea       " MEMLEA4(0x00,0,1,4) ",%0      \n"
+    "punpckldq %%xmm3,%%xmm2                   \n"
+    "punpcklqdq %%xmm2,%%xmm0                  \n"
+    "sub       $0x4,%3                         \n"
+    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),      // %0
+    "+r"(src_stepx_x4),  // %1
+    "+r"(dst_argb),      // %2
+    "+r"(dst_width),     // %3
+    "+r"(src_stepx_x12)  // %4
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3"
+#endif
+  );
+}
+
+// Blends four 2x2 to 4x1.
+// Alignment requirement: dst_argb 16 byte aligned.
+void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
+                                  ptrdiff_t src_stride, int src_stepx,
+                                  uint8* dst_argb, int dst_width) {
+  intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
+  intptr_t src_stepx_x12 = 0;
+  intptr_t row1 = (intptr_t)(src_stride);
+  asm volatile (
+    "lea       " MEMLEA3(0x00,1,4) ",%1        \n"
+    "lea       " MEMLEA4(0x00,1,1,2) ",%4      \n"
+    "lea       " MEMLEA4(0x00,0,5,1) ",%5      \n"
+
+    LABELALIGN
+  "1:                                          \n"
+    "movq      " MEMACCESS(0) ",%%xmm0         \n"
+    MEMOPREG(movhps,0x00,0,1,1,xmm0)           //  movhps    (%0,%1,1),%%xmm0
+    MEMOPREG(movq,0x00,0,1,2,xmm1)             //  movq      (%0,%1,2),%%xmm1
+    BUNDLEALIGN
+    MEMOPREG(movhps,0x00,0,4,1,xmm1)           //  movhps    (%0,%4,1),%%xmm1
+    "lea       " MEMLEA4(0x00,0,1,4) ",%0      \n"
+    "movq      " MEMACCESS(5) ",%%xmm2         \n"
+    BUNDLEALIGN
+    MEMOPREG(movhps,0x00,5,1,1,xmm2)           //  movhps    (%5,%1,1),%%xmm2
+    MEMOPREG(movq,0x00,5,1,2,xmm3)             //  movq      (%5,%1,2),%%xmm3
+    MEMOPREG(movhps,0x00,5,4,1,xmm3)           //  movhps    (%5,%4,1),%%xmm3
+    "lea       " MEMLEA4(0x00,5,1,4) ",%5      \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "pavgb     %%xmm3,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm2                   \n"
+    "shufps    $0x88,%%xmm1,%%xmm0             \n"
+    "shufps    $0xdd,%%xmm1,%%xmm2             \n"
+    "pavgb     %%xmm2,%%xmm0                   \n"
+    "sub       $0x4,%3                         \n"
+    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),       // %0
+    "+r"(src_stepx_x4),   // %1
+    "+r"(dst_argb),       // %2
+    "+rm"(dst_width),     // %3
+    "+r"(src_stepx_x12),  // %4
+    "+r"(row1)            // %5
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3"
+#endif
+  );
+}
+
+void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
+                        int dst_width, int x, int dx) {
+  intptr_t x0 = 0, x1 = 0;
+  asm volatile (
+    "movd      %5,%%xmm2                       \n"
+    "movd      %6,%%xmm3                       \n"
+    "pshufd    $0x0,%%xmm2,%%xmm2              \n"
+    "pshufd    $0x11,%%xmm3,%%xmm0             \n"
+    "paddd     %%xmm0,%%xmm2                   \n"
+    "paddd     %%xmm3,%%xmm3                   \n"
+    "pshufd    $0x5,%%xmm3,%%xmm0              \n"
+    "paddd     %%xmm0,%%xmm2                   \n"
+    "paddd     %%xmm3,%%xmm3                   \n"
+    "pshufd    $0x0,%%xmm3,%%xmm3              \n"
+    "pextrw    $0x1,%%xmm2,%k0                 \n"
+    "pextrw    $0x3,%%xmm2,%k1                 \n"
+    "cmp       $0x0,%4                         \n"
+    "jl        99f                             \n"
+    "sub       $0x4,%4                         \n"
+    "jl        49f                             \n"
+
+    LABELALIGN
+  "40:                                         \n"
+    MEMOPREG(movd,0x00,3,0,4,xmm0)             //  movd      (%3,%0,4),%%xmm0
+    MEMOPREG(movd,0x00,3,1,4,xmm1)             //  movd      (%3,%1,4),%%xmm1
+    "pextrw    $0x5,%%xmm2,%k0                 \n"
+    "pextrw    $0x7,%%xmm2,%k1                 \n"
+    "paddd     %%xmm3,%%xmm2                   \n"
+    "punpckldq %%xmm1,%%xmm0                   \n"
+    MEMOPREG(movd,0x00,3,0,4,xmm1)             //  movd      (%3,%0,4),%%xmm1
+    MEMOPREG(movd,0x00,3,1,4,xmm4)             //  movd      (%3,%1,4),%%xmm4
+    "pextrw    $0x1,%%xmm2,%k0                 \n"
+    "pextrw    $0x3,%%xmm2,%k1                 \n"
+    "punpckldq %%xmm4,%%xmm1                   \n"
+    "punpcklqdq %%xmm1,%%xmm0                  \n"
+    "sub       $0x4,%4                         \n"
+    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x10,2) ",%2           \n"
+    "jge       40b                             \n"
+
+  "49:                                         \n"
+    "test      $0x2,%4                         \n"
+    "je        29f                             \n"
+    BUNDLEALIGN
+    MEMOPREG(movd,0x00,3,0,4,xmm0)             //  movd      (%3,%0,4),%%xmm0
+    MEMOPREG(movd,0x00,3,1,4,xmm1)             //  movd      (%3,%1,4),%%xmm1
+    "pextrw    $0x5,%%xmm2,%k0                 \n"
+    "punpckldq %%xmm1,%%xmm0                   \n"
+    "movq      %%xmm0," MEMACCESS(2) "         \n"
+    "lea       " MEMLEA(0x8,2) ",%2            \n"
+  "29:                                         \n"
+    "test      $0x1,%4                         \n"
+    "je        99f                             \n"
+    MEMOPREG(movd,0x00,3,0,4,xmm0)             //  movd      (%3,%0,4),%%xmm0
+    "movd      %%xmm0," MEMACCESS(2) "         \n"
+  "99:                                         \n"
+  : "+a"(x0),          // %0
+    "+d"(x1),          // %1
+    "+r"(dst_argb),    // %2
+    "+r"(src_argb),    // %3
+    "+r"(dst_width)    // %4
+  : "rm"(x),           // %5
+    "rm"(dx)           // %6
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
+#endif
+  );
+}
+
+// Reads 4 pixels, duplicates them and writes 8 pixels.
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
+                           int dst_width, int x, int dx) {
+  asm volatile (
+    LABELALIGN
+  "1:                                          \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
+    "lea       " MEMLEA(0x10,1) ",%1           \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpckldq %%xmm0,%%xmm0                   \n"
+    "punpckhdq %%xmm1,%%xmm1                   \n"
+    "sub       $0x8,%2                         \n"
+    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
+    "movdqa    %%xmm1," MEMACCESS2(0x10,0) "   \n"
+    "lea       " MEMLEA(0x20,0) ",%0           \n"
+    "jg        1b                              \n"
+
+  : "+r"(dst_argb),    // %0
+    "+r"(src_argb),    // %1
+    "+r"(dst_width)    // %2
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+
+// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
+static uvec8 kShuffleColARGB = {
+  0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u,  // bbggrraa 1st pixel
+  8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u  // bbggrraa 2nd pixel
+};
+
+// Shuffle table for duplicating 2 fractions into 8 bytes each
+static uvec8 kShuffleFractions = {
+  0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
+};
+
+// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
+void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
+                               int dst_width, int x, int dx) {
+  intptr_t x0 = 0, x1 = 0;
+  asm volatile (
+    "movdqa    %0,%%xmm4                       \n"
+    "movdqa    %1,%%xmm5                       \n"
+  :
+  : "m"(kShuffleColARGB),  // %0
+    "m"(kShuffleFractions)  // %1
+  );
+
+  asm volatile (
+    "movd      %5,%%xmm2                       \n"
+    "movd      %6,%%xmm3                       \n"
+    "pcmpeqb   %%xmm6,%%xmm6                   \n"
+    "psrlw     $0x9,%%xmm6                     \n"
+    "pextrw    $0x1,%%xmm2,%k3                 \n"
+    "sub       $0x2,%2                         \n"
+    "jl        29f                             \n"
+    "movdqa    %%xmm2,%%xmm0                   \n"
+    "paddd     %%xmm3,%%xmm0                   \n"
+    "punpckldq %%xmm0,%%xmm2                   \n"
+    "punpckldq %%xmm3,%%xmm3                   \n"
+    "paddd     %%xmm3,%%xmm3                   \n"
+    "pextrw    $0x3,%%xmm2,%k4                 \n"
+
+    LABELALIGN
+  "2:                                          \n"
+    "movdqa    %%xmm2,%%xmm1                   \n"
+    "paddd     %%xmm3,%%xmm2                   \n"
+    MEMOPREG(movq,0x00,1,3,4,xmm0)             //  movq      (%1,%3,4),%%xmm0
+    "psrlw     $0x9,%%xmm1                     \n"
+    BUNDLEALIGN
+    MEMOPREG(movhps,0x00,1,4,4,xmm0)           //  movhps    (%1,%4,4),%%xmm0
+    "pshufb    %%xmm5,%%xmm1                   \n"
+    "pshufb    %%xmm4,%%xmm0                   \n"
+    "pxor      %%xmm6,%%xmm1                   \n"
+    "pmaddubsw %%xmm1,%%xmm0                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "pextrw    $0x1,%%xmm2,%k3                 \n"
+    "pextrw    $0x3,%%xmm2,%k4                 \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movq      %%xmm0," MEMACCESS(0) "         \n"
+    "lea       " MEMLEA(0x8,0) ",%0            \n"
+    "sub       $0x2,%2                         \n"
+    "jge       2b                              \n"
+
+    LABELALIGN
+  "29:                                         \n"
+    "add       $0x1,%2                         \n"
+    "jl        99f                             \n"
+    "psrlw     $0x9,%%xmm2                     \n"
+    BUNDLEALIGN
+    MEMOPREG(movq,0x00,1,3,4,xmm0)             //  movq      (%1,%3,4),%%xmm0
+    "pshufb    %%xmm5,%%xmm2                   \n"
+    "pshufb    %%xmm4,%%xmm0                   \n"
+    "pxor      %%xmm6,%%xmm2                   \n"
+    "pmaddubsw %%xmm2,%%xmm0                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "movd      %%xmm0," MEMACCESS(0) "         \n"
+
+    LABELALIGN
+  "99:                                         \n"
+  : "+r"(dst_argb),    // %0
+    "+r"(src_argb),    // %1
+    "+rm"(dst_width),  // %2
+    "+r"(x0),          // %3
+    "+r"(x1)           // %4
+  : "rm"(x),           // %5
+    "rm"(dx)           // %6
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+  );
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+int FixedDiv_X86(int num, int div) {
+  asm volatile (
+    "cdq                                       \n"
+    "shld      $0x10,%%eax,%%edx               \n"
+    "shl       $0x10,%%eax                     \n"
+    "idiv      %1                              \n"
+    "mov       %0, %%eax                       \n"
+    : "+a"(num)  // %0
+    : "c"(div)   // %1
+    : "memory", "cc", "edx"
+  );
+  return num;
+}
+
+// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
+int FixedDiv1_X86(int num, int div) {
+  asm volatile (
+    "cdq                                       \n"
+    "shld      $0x10,%%eax,%%edx               \n"
+    "shl       $0x10,%%eax                     \n"
+    "sub       $0x10001,%%eax                  \n"
+    "sbb       $0x0,%%edx                      \n"
+    "sub       $0x1,%1                         \n"
+    "idiv      %1                              \n"
+    "mov       %0, %%eax                       \n"
+    : "+a"(num)  // %0
+    : "c"(div)   // %1
+    : "memory", "cc", "edx"
+  );
+  return num;
+}
+
+#endif  // defined(__x86_64__) || defined(__i386__)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/scale_win.cc b/src/main/jni/libyuv/source/scale_win.cc
new file mode 100644
index 000000000..840b9738d
--- /dev/null
+++ b/src/main/jni/libyuv/source/scale_win.cc
@@ -0,0 +1,1320 @@
+/*
+ *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// This module is for Visual C x86.
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+// Offsets for source bytes 0 to 9
+static uvec8 kShuf0 =
+  { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
+static uvec8 kShuf1 =
+  { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+static uvec8 kShuf2 =
+  { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+// Offsets for source bytes 0 to 10
+static uvec8 kShuf01 =
+  { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
+
+// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
+static uvec8 kShuf11 =
+  { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
+
+// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
+static uvec8 kShuf21 =
+  { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
+
+// Coefficients for source bytes 0 to 10
+static uvec8 kMadd01 =
+  { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
+
+// Coefficients for source bytes 10 to 21
+static uvec8 kMadd11 =
+  { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
+
+// Coefficients for source bytes 21 to 31
+static uvec8 kMadd21 =
+  { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
+
+// Coefficients for source bytes 21 to 31
+static vec16 kRound34 =
+  { 2, 2, 2, 2, 2, 2, 2, 2 };
+
+static uvec8 kShuf38a =
+  { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+static uvec8 kShuf38b =
+  { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
+
+// Arrange words 0,3,6 into 0,1,2
+static uvec8 kShufAc =
+  { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
+
+// Arrange words 0,3,6 into 3,4,5
+static uvec8 kShufAc3 =
+  { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
+
+// Scaling values for boxes of 3x3 and 2x3
+static uvec16 kScaleAc33 =
+  { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
+
+// Arrange first value for pixels 0,1,2,3,4,5
+static uvec8 kShufAb0 =
+  { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
+
+// Arrange second value for pixels 0,1,2,3,4,5
+static uvec8 kShufAb1 =
+  { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
+
+// Arrange third value for pixels 0,1,2,3,4,5
+static uvec8 kShufAb2 =
+  { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
+
+// Scaling values for boxes of 3x2 and 2x2
+static uvec16 kScaleAb2 =
+  { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
+
+// Reads 32 pixels, throws half away and writes 16 pixels.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_ptr
+                                     // src_stride ignored
+    mov        edx, [esp + 12]       // dst_ptr
+    mov        ecx, [esp + 16]       // dst_width
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    psrlw      xmm0, 8               // isolate odd pixels.
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Blends 32x1 rectangle to 16x1.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                              uint8* dst_ptr, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_ptr
+                                     // src_stride
+    mov        edx, [esp + 12]       // dst_ptr
+    mov        ecx, [esp + 16]       // dst_width
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+
+    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
+    psrlw      xmm0, 8
+    movdqa     xmm3, xmm1
+    psrlw      xmm1, 8
+    pand       xmm2, xmm5
+    pand       xmm3, xmm5
+    pavgw      xmm0, xmm2
+    pavgw      xmm1, xmm3
+    packuswb   xmm0, xmm1
+
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Blends 32x2 rectangle to 16x1.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]    // src_ptr
+    mov        esi, [esp + 4 + 8]    // src_stride
+    mov        edx, [esp + 4 + 12]   // dst_ptr
+    mov        ecx, [esp + 4 + 16]   // dst_width
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + esi]
+    movdqa     xmm3, [eax + esi + 16]
+    lea        eax,  [eax + 32]
+    pavgb      xmm0, xmm2            // average rows
+    pavgb      xmm1, xmm3
+
+    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
+    psrlw      xmm0, 8
+    movdqa     xmm3, xmm1
+    psrlw      xmm1, 8
+    pand       xmm2, xmm5
+    pand       xmm3, xmm5
+    pavgw      xmm0, xmm2
+    pavgw      xmm1, xmm3
+    packuswb   xmm0, xmm1
+
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    pop        esi
+    ret
+  }
+}
+
+// Reads 32 pixels, throws half away and writes 16 pixels.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
+                                  ptrdiff_t src_stride,
+                                  uint8* dst_ptr, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_ptr
+                                     // src_stride ignored
+    mov        edx, [esp + 12]       // dst_ptr
+    mov        ecx, [esp + 16]       // dst_width
+
+    align      4
+  wloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    psrlw      xmm0, 8               // isolate odd pixels.
+    psrlw      xmm1, 8
+    packuswb   xmm0, xmm1
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Blends 32x1 rectangle to 16x1.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
+                                        ptrdiff_t src_stride,
+                                        uint8* dst_ptr, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_ptr
+                                     // src_stride
+    mov        edx, [esp + 12]       // dst_ptr
+    mov        ecx, [esp + 16]       // dst_width
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+
+    align      4
+  wloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+
+    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
+    psrlw      xmm0, 8
+    movdqa     xmm3, xmm1
+    psrlw      xmm1, 8
+    pand       xmm2, xmm5
+    pand       xmm3, xmm5
+    pavgw      xmm0, xmm2
+    pavgw      xmm1, xmm3
+    packuswb   xmm0, xmm1
+
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Blends 32x2 rectangle to 16x1.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
+                                     ptrdiff_t src_stride,
+                                     uint8* dst_ptr, int dst_width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]    // src_ptr
+    mov        esi, [esp + 4 + 8]    // src_stride
+    mov        edx, [esp + 4 + 12]   // dst_ptr
+    mov        ecx, [esp + 4 + 16]   // dst_width
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
+    psrlw      xmm5, 8
+
+    align      4
+  wloop:
+    movdqu     xmm0, [eax]
+    movdqu     xmm1, [eax + 16]
+    movdqu     xmm2, [eax + esi]
+    movdqu     xmm3, [eax + esi + 16]
+    lea        eax,  [eax + 32]
+    pavgb      xmm0, xmm2            // average rows
+    pavgb      xmm1, xmm3
+
+    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
+    psrlw      xmm0, 8
+    movdqa     xmm3, xmm1
+    psrlw      xmm1, 8
+    pand       xmm2, xmm5
+    pand       xmm3, xmm5
+    pavgw      xmm0, xmm2
+    pavgw      xmm1, xmm3
+    packuswb   xmm0, xmm1
+
+    sub        ecx, 16
+    movdqu     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    pop        esi
+    ret
+  }
+}
+
+// Point samples 32 pixels to 8 pixels.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                        uint8* dst_ptr, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_ptr
+                                     // src_stride ignored
+    mov        edx, [esp + 12]       // dst_ptr
+    mov        ecx, [esp + 16]       // dst_width
+    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff0000
+    psrld      xmm5, 24
+    pslld      xmm5, 16
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    pand       xmm0, xmm5
+    pand       xmm1, xmm5
+    packuswb   xmm0, xmm1
+    psrlw      xmm0, 8
+    packuswb   xmm0, xmm0
+    sub        ecx, 8
+    movq       qword ptr [edx], xmm0
+    lea        edx, [edx + 8]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Blends 32x4 rectangle to 8x1.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                           uint8* dst_ptr, int dst_width) {
+  __asm {
+    push       esi
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_ptr
+    mov        esi, [esp + 8 + 8]    // src_stride
+    mov        edx, [esp + 8 + 12]   // dst_ptr
+    mov        ecx, [esp + 8 + 16]   // dst_width
+    lea        edi, [esi + esi * 2]  // src_stride * 3
+    pcmpeqb    xmm7, xmm7            // generate mask 0x00ff00ff
+    psrlw      xmm7, 8
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + esi]
+    movdqa     xmm3, [eax + esi + 16]
+    pavgb      xmm0, xmm2            // average rows
+    pavgb      xmm1, xmm3
+    movdqa     xmm2, [eax + esi * 2]
+    movdqa     xmm3, [eax + esi * 2 + 16]
+    movdqa     xmm4, [eax + edi]
+    movdqa     xmm5, [eax + edi + 16]
+    lea        eax, [eax + 32]
+    pavgb      xmm2, xmm4
+    pavgb      xmm3, xmm5
+    pavgb      xmm0, xmm2
+    pavgb      xmm1, xmm3
+
+    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
+    psrlw      xmm0, 8
+    movdqa     xmm3, xmm1
+    psrlw      xmm1, 8
+    pand       xmm2, xmm7
+    pand       xmm3, xmm7
+    pavgw      xmm0, xmm2
+    pavgw      xmm1, xmm3
+    packuswb   xmm0, xmm1
+
+    movdqa     xmm2, xmm0            // average columns (16 to 8 pixels)
+    psrlw      xmm0, 8
+    pand       xmm2, xmm7
+    pavgw      xmm0, xmm2
+    packuswb   xmm0, xmm0
+
+    sub        ecx, 8
+    movq       qword ptr [edx], xmm0
+    lea        edx, [edx + 8]
+    jg         wloop
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// Point samples 32 pixels to 24 pixels.
+// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
+// Then shuffled to do the scaling.
+
+// Note that movdqa+palign may be better than movdqu.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                          uint8* dst_ptr, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_ptr
+                                     // src_stride ignored
+    mov        edx, [esp + 12]       // dst_ptr
+    mov        ecx, [esp + 16]       // dst_width
+    movdqa     xmm3, kShuf0
+    movdqa     xmm4, kShuf1
+    movdqa     xmm5, kShuf2
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    movdqa     xmm2, xmm1
+    palignr    xmm1, xmm0, 8
+    pshufb     xmm0, xmm3
+    pshufb     xmm1, xmm4
+    pshufb     xmm2, xmm5
+    movq       qword ptr [edx], xmm0
+    movq       qword ptr [edx + 8], xmm1
+    movq       qword ptr [edx + 16], xmm2
+    lea        edx, [edx + 24]
+    sub        ecx, 24
+    jg         wloop
+
+    ret
+  }
+}
+
+// Blends 32x2 rectangle to 24x1
+// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
+// Then shuffled to do the scaling.
+
+// Register usage:
+// xmm0 src_row 0
+// xmm1 src_row 1
+// xmm2 shuf 0
+// xmm3 shuf 1
+// xmm4 shuf 2
+// xmm5 madd 0
+// xmm6 madd 1
+// xmm7 kRound34
+
+// Note that movdqa+palign may be better than movdqu.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]    // src_ptr
+    mov        esi, [esp + 4 + 8]    // src_stride
+    mov        edx, [esp + 4 + 12]   // dst_ptr
+    mov        ecx, [esp + 4 + 16]   // dst_width
+    movdqa     xmm2, kShuf01
+    movdqa     xmm3, kShuf11
+    movdqa     xmm4, kShuf21
+    movdqa     xmm5, kMadd01
+    movdqa     xmm6, kMadd11
+    movdqa     xmm7, kRound34
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]           // pixels 0..7
+    movdqa     xmm1, [eax + esi]
+    pavgb      xmm0, xmm1
+    pshufb     xmm0, xmm2
+    pmaddubsw  xmm0, xmm5
+    paddsw     xmm0, xmm7
+    psrlw      xmm0, 2
+    packuswb   xmm0, xmm0
+    movq       qword ptr [edx], xmm0
+    movdqu     xmm0, [eax + 8]       // pixels 8..15
+    movdqu     xmm1, [eax + esi + 8]
+    pavgb      xmm0, xmm1
+    pshufb     xmm0, xmm3
+    pmaddubsw  xmm0, xmm6
+    paddsw     xmm0, xmm7
+    psrlw      xmm0, 2
+    packuswb   xmm0, xmm0
+    movq       qword ptr [edx + 8], xmm0
+    movdqa     xmm0, [eax + 16]      // pixels 16..23
+    movdqa     xmm1, [eax + esi + 16]
+    lea        eax, [eax + 32]
+    pavgb      xmm0, xmm1
+    pshufb     xmm0, xmm4
+    movdqa     xmm1, kMadd21
+    pmaddubsw  xmm0, xmm1
+    paddsw     xmm0, xmm7
+    psrlw      xmm0, 2
+    packuswb   xmm0, xmm0
+    sub        ecx, 24
+    movq       qword ptr [edx + 16], xmm0
+    lea        edx, [edx + 24]
+    jg         wloop
+
+    pop        esi
+    ret
+  }
+}
+
+// Note that movdqa+palign may be better than movdqu.
+// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]    // src_ptr
+    mov        esi, [esp + 4 + 8]    // src_stride
+    mov        edx, [esp + 4 + 12]   // dst_ptr
+    mov        ecx, [esp + 4 + 16]   // dst_width
+    movdqa     xmm2, kShuf01
+    movdqa     xmm3, kShuf11
+    movdqa     xmm4, kShuf21
+    movdqa     xmm5, kMadd01
+    movdqa     xmm6, kMadd11
+    movdqa     xmm7, kRound34
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]           // pixels 0..7
+    movdqa     xmm1, [eax + esi]
+    pavgb      xmm1, xmm0
+    pavgb      xmm0, xmm1
+    pshufb     xmm0, xmm2
+    pmaddubsw  xmm0, xmm5
+    paddsw     xmm0, xmm7
+    psrlw      xmm0, 2
+    packuswb   xmm0, xmm0
+    movq       qword ptr [edx], xmm0
+    movdqu     xmm0, [eax + 8]       // pixels 8..15
+    movdqu     xmm1, [eax + esi + 8]
+    pavgb      xmm1, xmm0
+    pavgb      xmm0, xmm1
+    pshufb     xmm0, xmm3
+    pmaddubsw  xmm0, xmm6
+    paddsw     xmm0, xmm7
+    psrlw      xmm0, 2
+    packuswb   xmm0, xmm0
+    movq       qword ptr [edx + 8], xmm0
+    movdqa     xmm0, [eax + 16]      // pixels 16..23
+    movdqa     xmm1, [eax + esi + 16]
+    lea        eax, [eax + 32]
+    pavgb      xmm1, xmm0
+    pavgb      xmm0, xmm1
+    pshufb     xmm0, xmm4
+    movdqa     xmm1, kMadd21
+    pmaddubsw  xmm0, xmm1
+    paddsw     xmm0, xmm7
+    psrlw      xmm0, 2
+    packuswb   xmm0, xmm0
+    sub        ecx, 24
+    movq       qword ptr [edx + 16], xmm0
+    lea        edx, [edx+24]
+    jg         wloop
+
+    pop        esi
+    ret
+  }
+}
+
+// 3/8 point sampler
+
+// Scale 32 pixels to 12
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                          uint8* dst_ptr, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_ptr
+                                     // src_stride ignored
+    mov        edx, [esp + 12]       // dst_ptr
+    mov        ecx, [esp + 16]       // dst_width
+    movdqa     xmm4, kShuf38a
+    movdqa     xmm5, kShuf38b
+
+    align      4
+  xloop:
+    movdqa     xmm0, [eax]           // 16 pixels -> 0,1,2,3,4,5
+    movdqa     xmm1, [eax + 16]      // 16 pixels -> 6,7,8,9,10,11
+    lea        eax, [eax + 32]
+    pshufb     xmm0, xmm4
+    pshufb     xmm1, xmm5
+    paddusb    xmm0, xmm1
+
+    sub        ecx, 12
+    movq       qword ptr [edx], xmm0  // write 12 pixels
+    movhlps    xmm1, xmm0
+    movd       [edx + 8], xmm1
+    lea        edx, [edx + 12]
+    jg         xloop
+
+    ret
+  }
+}
+
+// Scale 16x3 pixels to 6x1 with interpolation
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]    // src_ptr
+    mov        esi, [esp + 4 + 8]    // src_stride
+    mov        edx, [esp + 4 + 12]   // dst_ptr
+    mov        ecx, [esp + 4 + 16]   // dst_width
+    movdqa     xmm2, kShufAc
+    movdqa     xmm3, kShufAc3
+    movdqa     xmm4, kScaleAc33
+    pxor       xmm5, xmm5
+
+    align      4
+  xloop:
+    movdqa     xmm0, [eax]           // sum up 3 rows into xmm0/1
+    movdqa     xmm6, [eax + esi]
+    movhlps    xmm1, xmm0
+    movhlps    xmm7, xmm6
+    punpcklbw  xmm0, xmm5
+    punpcklbw  xmm1, xmm5
+    punpcklbw  xmm6, xmm5
+    punpcklbw  xmm7, xmm5
+    paddusw    xmm0, xmm6
+    paddusw    xmm1, xmm7
+    movdqa     xmm6, [eax + esi * 2]
+    lea        eax, [eax + 16]
+    movhlps    xmm7, xmm6
+    punpcklbw  xmm6, xmm5
+    punpcklbw  xmm7, xmm5
+    paddusw    xmm0, xmm6
+    paddusw    xmm1, xmm7
+
+    movdqa     xmm6, xmm0            // 8 pixels -> 0,1,2 of xmm6
+    psrldq     xmm0, 2
+    paddusw    xmm6, xmm0
+    psrldq     xmm0, 2
+    paddusw    xmm6, xmm0
+    pshufb     xmm6, xmm2
+
+    movdqa     xmm7, xmm1            // 8 pixels -> 3,4,5 of xmm6
+    psrldq     xmm1, 2
+    paddusw    xmm7, xmm1
+    psrldq     xmm1, 2
+    paddusw    xmm7, xmm1
+    pshufb     xmm7, xmm3
+    paddusw    xmm6, xmm7
+
+    pmulhuw    xmm6, xmm4            // divide by 9,9,6, 9,9,6
+    packuswb   xmm6, xmm6
+
+    sub        ecx, 6
+    movd       [edx], xmm6           // write 6 pixels
+    psrlq      xmm6, 16
+    movd       [edx + 2], xmm6
+    lea        edx, [edx + 6]
+    jg         xloop
+
+    pop        esi
+    ret
+  }
+}
+
+// Scale 16x2 pixels to 6x1 with interpolation
+__declspec(naked) __declspec(align(16))
+void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
+                                ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]    // src_ptr
+    mov        esi, [esp + 4 + 8]    // src_stride
+    mov        edx, [esp + 4 + 12]   // dst_ptr
+    mov        ecx, [esp + 4 + 16]   // dst_width
+    movdqa     xmm2, kShufAb0
+    movdqa     xmm3, kShufAb1
+    movdqa     xmm4, kShufAb2
+    movdqa     xmm5, kScaleAb2
+
+    align      4
+  xloop:
+    movdqa     xmm0, [eax]           // average 2 rows into xmm0
+    pavgb      xmm0, [eax + esi]
+    lea        eax, [eax + 16]
+
+    movdqa     xmm1, xmm0            // 16 pixels -> 0,1,2,3,4,5 of xmm1
+    pshufb     xmm1, xmm2
+    movdqa     xmm6, xmm0
+    pshufb     xmm6, xmm3
+    paddusw    xmm1, xmm6
+    pshufb     xmm0, xmm4
+    paddusw    xmm1, xmm0
+
+    pmulhuw    xmm1, xmm5            // divide by 3,3,2, 3,3,2
+    packuswb   xmm1, xmm1
+
+    sub        ecx, 6
+    movd       [edx], xmm1           // write 6 pixels
+    psrlq      xmm1, 16
+    movd       [edx + 2], xmm1
+    lea        edx, [edx + 6]
+    jg         xloop
+
+    pop        esi
+    ret
+  }
+}
+
+// Reads 16xN bytes and produces 16 shorts at a time.
+// TODO(fbarchard): Make this handle 4xN bytes for any width ARGB.
+__declspec(naked) __declspec(align(16))
+void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
+                       uint16* dst_ptr, int src_width,
+                       int src_height) {
+  __asm {
+    push       esi
+    push       edi
+    push       ebx
+    push       ebp
+    mov        esi, [esp + 16 + 4]   // src_ptr
+    mov        edx, [esp + 16 + 8]   // src_stride
+    mov        edi, [esp + 16 + 12]  // dst_ptr
+    mov        ecx, [esp + 16 + 16]  // dst_width
+    mov        ebx, [esp + 16 + 20]  // height
+    pxor       xmm4, xmm4
+    dec        ebx
+
+    align      4
+  xloop:
+    // first row
+    movdqa     xmm0, [esi]
+    lea        eax, [esi + edx]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm4
+    punpckhbw  xmm1, xmm4
+    lea        esi, [esi + 16]
+    mov        ebp, ebx
+    test       ebp, ebp
+    je         ydone
+
+    // sum remaining rows
+    align      4
+  yloop:
+    movdqa     xmm2, [eax]       // read 16 pixels
+    lea        eax, [eax + edx]  // advance to next row
+    movdqa     xmm3, xmm2
+    punpcklbw  xmm2, xmm4
+    punpckhbw  xmm3, xmm4
+    paddusw    xmm0, xmm2        // sum 16 words
+    paddusw    xmm1, xmm3
+    sub        ebp, 1
+    jg         yloop
+
+    align      4
+  ydone:
+    movdqa     [edi], xmm0
+    movdqa     [edi + 16], xmm1
+    lea        edi, [edi + 32]
+
+    sub        ecx, 16
+    jg         xloop
+
+    pop        ebp
+    pop        ebx
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// Bilinear column filtering. SSSE3 version.
+// TODO(fbarchard): Port to Neon
+// TODO(fbarchard): Switch the following:
+//    xor        ebx, ebx
+//    mov        bx, word ptr [esi + eax]  // 2 source x0 pixels
+// To
+//    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
+// when drmemory bug fixed.
+// https://code.google.com/p/drmemory/issues/detail?id=1396
+
+__declspec(naked) __declspec(align(16))
+void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
+                           int dst_width, int x, int dx) {
+  __asm {
+    push       ebx
+    push       esi
+    push       edi
+    mov        edi, [esp + 12 + 4]    // dst_ptr
+    mov        esi, [esp + 12 + 8]    // src_ptr
+    mov        ecx, [esp + 12 + 12]   // dst_width
+    movd       xmm2, [esp + 12 + 16]  // x
+    movd       xmm3, [esp + 12 + 20]  // dx
+    mov        eax, 0x04040000      // shuffle to line up fractions with pixel.
+    movd       xmm5, eax
+    pcmpeqb    xmm6, xmm6           // generate 0x007f for inverting fraction.
+    psrlw      xmm6, 9
+    pextrw     eax, xmm2, 1         // get x0 integer. preroll
+    sub        ecx, 2
+    jl         xloop29
+
+    movdqa     xmm0, xmm2           // x1 = x0 + dx
+    paddd      xmm0, xmm3
+    punpckldq  xmm2, xmm0           // x0 x1
+    punpckldq  xmm3, xmm3           // dx dx
+    paddd      xmm3, xmm3           // dx * 2, dx * 2
+    pextrw     edx, xmm2, 3         // get x1 integer. preroll
+
+    // 2 Pixel loop.
+    align      4
+  xloop2:
+    movdqa     xmm1, xmm2           // x0, x1 fractions.
+    paddd      xmm2, xmm3           // x += dx
+    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
+    movd       xmm0, ebx
+    psrlw      xmm1, 9              // 7 bit fractions.
+    movzx      ebx, word ptr [esi + edx]  // 2 source x1 pixels
+    movd       xmm4, ebx
+    pshufb     xmm1, xmm5           // 0011
+    punpcklwd  xmm0, xmm4
+    pxor       xmm1, xmm6           // 0..7f and 7f..0
+    pmaddubsw  xmm0, xmm1           // 16 bit, 2 pixels.
+    pextrw     eax, xmm2, 1         // get x0 integer. next iteration.
+    pextrw     edx, xmm2, 3         // get x1 integer. next iteration.
+    psrlw      xmm0, 7              // 8.7 fixed point to low 8 bits.
+    packuswb   xmm0, xmm0           // 8 bits, 2 pixels.
+    movd       ebx, xmm0
+    mov        [edi], bx
+    lea        edi, [edi + 2]
+    sub        ecx, 2               // 2 pixels
+    jge        xloop2
+
+    align      4
+ xloop29:
+
+    add        ecx, 2 - 1
+    jl         xloop99
+
+    // 1 pixel remainder
+    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
+    movd       xmm0, ebx
+    psrlw      xmm2, 9              // 7 bit fractions.
+    pshufb     xmm2, xmm5           // 0011
+    pxor       xmm2, xmm6           // 0..7f and 7f..0
+    pmaddubsw  xmm0, xmm2           // 16 bit
+    psrlw      xmm0, 7              // 8.7 fixed point to low 8 bits.
+    packuswb   xmm0, xmm0           // 8 bits
+    movd       ebx, xmm0
+    mov        [edi], bl
+
+    align      4
+ xloop99:
+
+    pop        edi
+    pop        esi
+    pop        ebx
+    ret
+  }
+}
+
+// Reads 16 pixels, duplicates them and writes 32 pixels.
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
+                       int dst_width, int x, int dx) {
+  __asm {
+    mov        edx, [esp + 4]    // dst_ptr
+    mov        eax, [esp + 8]    // src_ptr
+    mov        ecx, [esp + 12]   // dst_width
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    lea        eax,  [eax + 16]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm0
+    punpckhbw  xmm1, xmm1
+    sub        ecx, 32
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx, [edx + 32]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
+                            ptrdiff_t src_stride,
+                            uint8* dst_argb, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_argb
+                                     // src_stride ignored
+    mov        edx, [esp + 12]       // dst_argb
+    mov        ecx, [esp + 16]       // dst_width
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    shufps     xmm0, xmm1, 0xdd
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Blends 8x1 rectangle to 4x1.
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
+                                  ptrdiff_t src_stride,
+                                  uint8* dst_argb, int dst_width) {
+  __asm {
+    mov        eax, [esp + 4]        // src_argb
+                                     // src_stride ignored
+    mov        edx, [esp + 12]       // dst_argb
+    mov        ecx, [esp + 16]       // dst_width
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    lea        eax,  [eax + 32]
+    movdqa     xmm2, xmm0
+    shufps     xmm0, xmm1, 0x88      // even pixels
+    shufps     xmm2, xmm1, 0xdd      // odd pixels
+    pavgb      xmm0, xmm2
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Blends 8x2 rectangle to 4x1.
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
+                               ptrdiff_t src_stride,
+                               uint8* dst_argb, int dst_width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]    // src_argb
+    mov        esi, [esp + 4 + 8]    // src_stride
+    mov        edx, [esp + 4 + 12]   // dst_argb
+    mov        ecx, [esp + 4 + 16]   // dst_width
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    movdqa     xmm1, [eax + 16]
+    movdqa     xmm2, [eax + esi]
+    movdqa     xmm3, [eax + esi + 16]
+    lea        eax,  [eax + 32]
+    pavgb      xmm0, xmm2            // average rows
+    pavgb      xmm1, xmm3
+    movdqa     xmm2, xmm0            // average columns (8 to 4 pixels)
+    shufps     xmm0, xmm1, 0x88      // even pixels
+    shufps     xmm2, xmm1, 0xdd      // odd pixels
+    pavgb      xmm0, xmm2
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    pop        esi
+    ret
+  }
+}
+
+// Reads 4 pixels at a time.
+// Alignment requirement: dst_argb 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
+                               int src_stepx,
+                               uint8* dst_argb, int dst_width) {
+  __asm {
+    push       ebx
+    push       edi
+    mov        eax, [esp + 8 + 4]    // src_argb
+                                     // src_stride ignored
+    mov        ebx, [esp + 8 + 12]   // src_stepx
+    mov        edx, [esp + 8 + 16]   // dst_argb
+    mov        ecx, [esp + 8 + 20]   // dst_width
+    lea        ebx, [ebx * 4]
+    lea        edi, [ebx + ebx * 2]
+
+    align      4
+  wloop:
+    movd       xmm0, [eax]
+    movd       xmm1, [eax + ebx]
+    punpckldq  xmm0, xmm1
+    movd       xmm2, [eax + ebx * 2]
+    movd       xmm3, [eax + edi]
+    lea        eax,  [eax + ebx * 4]
+    punpckldq  xmm2, xmm3
+    punpcklqdq xmm0, xmm2
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    pop        edi
+    pop        ebx
+    ret
+  }
+}
+
+// Blends four 2x2 to 4x1.
+// Alignment requirement: dst_argb 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
+                                  ptrdiff_t src_stride,
+                                  int src_stepx,
+                                  uint8* dst_argb, int dst_width) {
+  __asm {
+    push       ebx
+    push       esi
+    push       edi
+    mov        eax, [esp + 12 + 4]    // src_argb
+    mov        esi, [esp + 12 + 8]    // src_stride
+    mov        ebx, [esp + 12 + 12]   // src_stepx
+    mov        edx, [esp + 12 + 16]   // dst_argb
+    mov        ecx, [esp + 12 + 20]   // dst_width
+    lea        esi, [eax + esi]       // row1 pointer
+    lea        ebx, [ebx * 4]
+    lea        edi, [ebx + ebx * 2]
+
+    align      4
+  wloop:
+    movq       xmm0, qword ptr [eax]  // row0 4 pairs
+    movhps     xmm0, qword ptr [eax + ebx]
+    movq       xmm1, qword ptr [eax + ebx * 2]
+    movhps     xmm1, qword ptr [eax + edi]
+    lea        eax,  [eax + ebx * 4]
+    movq       xmm2, qword ptr [esi]  // row1 4 pairs
+    movhps     xmm2, qword ptr [esi + ebx]
+    movq       xmm3, qword ptr [esi + ebx * 2]
+    movhps     xmm3, qword ptr [esi + edi]
+    lea        esi,  [esi + ebx * 4]
+    pavgb      xmm0, xmm2            // average rows
+    pavgb      xmm1, xmm3
+    movdqa     xmm2, xmm0            // average columns (8 to 4 pixels)
+    shufps     xmm0, xmm1, 0x88      // even pixels
+    shufps     xmm2, xmm1, 0xdd      // odd pixels
+    pavgb      xmm0, xmm2
+    sub        ecx, 4
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         wloop
+
+    pop        edi
+    pop        esi
+    pop        ebx
+    ret
+  }
+}
+
+// Column scaling unfiltered. SSE2 version.
+__declspec(naked) __declspec(align(16))
+void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
+                        int dst_width, int x, int dx) {
+  __asm {
+    push       edi
+    push       esi
+    mov        edi, [esp + 8 + 4]    // dst_argb
+    mov        esi, [esp + 8 + 8]    // src_argb
+    mov        ecx, [esp + 8 + 12]   // dst_width
+    movd       xmm2, [esp + 8 + 16]  // x
+    movd       xmm3, [esp + 8 + 20]  // dx
+
+    pshufd     xmm2, xmm2, 0         // x0 x0 x0 x0
+    pshufd     xmm0, xmm3, 0x11      // dx  0 dx  0
+    paddd      xmm2, xmm0
+    paddd      xmm3, xmm3            // 0, 0, 0,  dx * 2
+    pshufd     xmm0, xmm3, 0x05      // dx * 2, dx * 2, 0, 0
+    paddd      xmm2, xmm0            // x3 x2 x1 x0
+    paddd      xmm3, xmm3            // 0, 0, 0,  dx * 4
+    pshufd     xmm3, xmm3, 0         // dx * 4, dx * 4, dx * 4, dx * 4
+
+    pextrw     eax, xmm2, 1          // get x0 integer.
+    pextrw     edx, xmm2, 3          // get x1 integer.
+
+    cmp        ecx, 0
+    jle        xloop99
+    sub        ecx, 4
+    jl         xloop49
+
+    // 4 Pixel loop.
+    align      4
+ xloop4:
+    movd       xmm0, [esi + eax * 4]  // 1 source x0 pixels
+    movd       xmm1, [esi + edx * 4]  // 1 source x1 pixels
+    pextrw     eax, xmm2, 5           // get x2 integer.
+    pextrw     edx, xmm2, 7           // get x3 integer.
+    paddd      xmm2, xmm3             // x += dx
+    punpckldq  xmm0, xmm1             // x0 x1
+
+    movd       xmm1, [esi + eax * 4]  // 1 source x2 pixels
+    movd       xmm4, [esi + edx * 4]  // 1 source x3 pixels
+    pextrw     eax, xmm2, 1           // get x0 integer. next iteration.
+    pextrw     edx, xmm2, 3           // get x1 integer. next iteration.
+    punpckldq  xmm1, xmm4             // x2 x3
+    punpcklqdq xmm0, xmm1             // x0 x1 x2 x3
+    sub        ecx, 4                 // 4 pixels
+    movdqu     [edi], xmm0
+    lea        edi, [edi + 16]
+    jge        xloop4
+
+    align      4
+ xloop49:
+    test       ecx, 2
+    je         xloop29
+
+    // 2 Pixels.
+    movd       xmm0, [esi + eax * 4]  // 1 source x0 pixels
+    movd       xmm1, [esi + edx * 4]  // 1 source x1 pixels
+    pextrw     eax, xmm2, 5           // get x2 integer.
+    punpckldq  xmm0, xmm1             // x0 x1
+
+    movq       qword ptr [edi], xmm0
+    lea        edi, [edi + 8]
+
+ xloop29:
+    test       ecx, 1
+    je         xloop99
+
+    // 1 Pixels.
+    movd       xmm0, [esi + eax * 4]  // 1 source x2 pixels
+    movd       dword ptr [edi], xmm0
+    align      4
+ xloop99:
+
+    pop        esi
+    pop        edi
+    ret
+  }
+}
+
+// Bilinear row filtering combines 2x1 -> 1x1. SSSE3 version.
+// TODO(fbarchard): Port to Neon
+
+// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
+static uvec8 kShuffleColARGB = {
+  0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u,  // bbggrraa 1st pixel
+  8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u  // bbggrraa 2nd pixel
+};
+
+// Shuffle table for duplicating 2 fractions into 8 bytes each
+static uvec8 kShuffleFractions = {
+  0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
+};
+
+__declspec(naked) __declspec(align(16))
+void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
+                               int dst_width, int x, int dx) {
+  __asm {
+    push       esi
+    push       edi
+    mov        edi, [esp + 8 + 4]    // dst_argb
+    mov        esi, [esp + 8 + 8]    // src_argb
+    mov        ecx, [esp + 8 + 12]   // dst_width
+    movd       xmm2, [esp + 8 + 16]  // x
+    movd       xmm3, [esp + 8 + 20]  // dx
+    movdqa     xmm4, kShuffleColARGB
+    movdqa     xmm5, kShuffleFractions
+    pcmpeqb    xmm6, xmm6           // generate 0x007f for inverting fraction.
+    psrlw      xmm6, 9
+    pextrw     eax, xmm2, 1         // get x0 integer. preroll
+    sub        ecx, 2
+    jl         xloop29
+
+    movdqa     xmm0, xmm2           // x1 = x0 + dx
+    paddd      xmm0, xmm3
+    punpckldq  xmm2, xmm0           // x0 x1
+    punpckldq  xmm3, xmm3           // dx dx
+    paddd      xmm3, xmm3           // dx * 2, dx * 2
+    pextrw     edx, xmm2, 3         // get x1 integer. preroll
+
+    // 2 Pixel loop.
+    align      4
+  xloop2:
+    movdqa     xmm1, xmm2           // x0, x1 fractions.
+    paddd      xmm2, xmm3           // x += dx
+    movq       xmm0, qword ptr [esi + eax * 4]  // 2 source x0 pixels
+    psrlw      xmm1, 9              // 7 bit fractions.
+    movhps     xmm0, qword ptr [esi + edx * 4]  // 2 source x1 pixels
+    pshufb     xmm1, xmm5           // 0000000011111111
+    pshufb     xmm0, xmm4           // arrange pixels into pairs
+    pxor       xmm1, xmm6           // 0..7f and 7f..0
+    pmaddubsw  xmm0, xmm1           // argb_argb 16 bit, 2 pixels.
+    pextrw     eax, xmm2, 1         // get x0 integer. next iteration.
+    pextrw     edx, xmm2, 3         // get x1 integer. next iteration.
+    psrlw      xmm0, 7              // argb 8.7 fixed point to low 8 bits.
+    packuswb   xmm0, xmm0           // argb_argb 8 bits, 2 pixels.
+    movq       qword ptr [edi], xmm0
+    lea        edi, [edi + 8]
+    sub        ecx, 2               // 2 pixels
+    jge        xloop2
+
+    align      4
+ xloop29:
+
+    add        ecx, 2 - 1
+    jl         xloop99
+
+    // 1 pixel remainder
+    psrlw      xmm2, 9              // 7 bit fractions.
+    movq       xmm0, qword ptr [esi + eax * 4]  // 2 source x0 pixels
+    pshufb     xmm2, xmm5           // 00000000
+    pshufb     xmm0, xmm4           // arrange pixels into pairs
+    pxor       xmm2, xmm6           // 0..7f and 7f..0
+    pmaddubsw  xmm0, xmm2           // argb 16 bit, 1 pixel.
+    psrlw      xmm0, 7
+    packuswb   xmm0, xmm0           // argb 8 bits, 1 pixel.
+    movd       [edi], xmm0
+
+    align      4
+ xloop99:
+
+    pop        edi
+    pop        esi
+    ret
+  }
+}
+
+// Reads 4 pixels, duplicates them and writes 8 pixels.
+// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
+__declspec(naked) __declspec(align(16))
+void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
+                           int dst_width, int x, int dx) {
+  __asm {
+    mov        edx, [esp + 4]    // dst_argb
+    mov        eax, [esp + 8]    // src_argb
+    mov        ecx, [esp + 12]   // dst_width
+
+    align      4
+  wloop:
+    movdqa     xmm0, [eax]
+    lea        eax,  [eax + 16]
+    movdqa     xmm1, xmm0
+    punpckldq  xmm0, xmm0
+    punpckhdq  xmm1, xmm1
+    sub        ecx, 8
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx, [edx + 32]
+    jg         wloop
+
+    ret
+  }
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+__declspec(naked) __declspec(align(16))
+int FixedDiv_X86(int num, int div) {
+  __asm {
+    mov        eax, [esp + 4]    // num
+    cdq                          // extend num to 64 bits
+    shld       edx, eax, 16      // 32.16
+    shl        eax, 16
+    idiv       dword ptr [esp + 8]
+    ret
+  }
+}
+
+// Divide num by div and return as 16.16 fixed point result.
+__declspec(naked) __declspec(align(16))
+int FixedDiv1_X86(int num, int div) {
+  __asm {
+    mov        eax, [esp + 4]    // num
+    mov        ecx, [esp + 8]    // denom
+    cdq                          // extend num to 64 bits
+    shld       edx, eax, 16      // 32.16
+    shl        eax, 16
+    sub        eax, 0x00010001
+    sbb        edx, 0
+    sub        ecx, 1
+    idiv       ecx
+    ret
+  }
+}
+
+#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
diff --git a/src/main/jni/libyuv/source/video_common.cc b/src/main/jni/libyuv/source/video_common.cc
new file mode 100644
index 000000000..efbedf46e
--- /dev/null
+++ b/src/main/jni/libyuv/source/video_common.cc
@@ -0,0 +1,64 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "libyuv/video_common.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
+
+struct FourCCAliasEntry {
+  uint32 alias;
+  uint32 canonical;
+};
+
+static const struct FourCCAliasEntry kFourCCAliases[] = {
+  {FOURCC_IYUV, FOURCC_I420},
+  {FOURCC_YU16, FOURCC_I422},
+  {FOURCC_YU24, FOURCC_I444},
+  {FOURCC_YUYV, FOURCC_YUY2},
+  {FOURCC_YUVS, FOURCC_YUY2},  // kCMPixelFormat_422YpCbCr8_yuvs
+  {FOURCC_HDYC, FOURCC_UYVY},
+  {FOURCC_2VUY, FOURCC_UYVY},  // kCMPixelFormat_422YpCbCr8
+  {FOURCC_JPEG, FOURCC_MJPG},  // Note: JPEG has DHT while MJPG does not.
+  {FOURCC_DMB1, FOURCC_MJPG},
+  {FOURCC_BA81, FOURCC_BGGR},
+  {FOURCC_RGB3, FOURCC_RAW },
+  {FOURCC_BGR3, FOURCC_24BG},
+  {FOURCC_CM32, FOURCC_BGRA},  // kCMPixelFormat_32ARGB
+  {FOURCC_CM24, FOURCC_RAW },  // kCMPixelFormat_24RGB
+  {FOURCC_L555, FOURCC_RGBO},  // kCMPixelFormat_16LE555
+  {FOURCC_L565, FOURCC_RGBP},  // kCMPixelFormat_16LE565
+  {FOURCC_5551, FOURCC_RGBO},  // kCMPixelFormat_16LE5551
+};
+// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
+//  {FOURCC_BGRA, FOURCC_ARGB},  // kCMPixelFormat_32BGRA
+
+LIBYUV_API
+uint32 CanonicalFourCC(uint32 fourcc) {
+  int i;
+  for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
+    if (kFourCCAliases[i].alias == fourcc) {
+      return kFourCCAliases[i].canonical;
+    }
+  }
+  // Not an alias, so return it as-is.
+  return fourcc;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
diff --git a/src/main/jni/libyuv/source/x86inc.asm b/src/main/jni/libyuv/source/x86inc.asm
new file mode 100644
index 000000000..cb5c32df3
--- /dev/null
+++ b/src/main/jni/libyuv/source/x86inc.asm
@@ -0,0 +1,1136 @@
+;*****************************************************************************
+;* x86inc.asm: x264asm abstraction layer
+;*****************************************************************************
+;* Copyright (C) 2005-2012 x264 project
+;*
+;* Authors: Loren Merritt <lorenm@u.washington.edu>
+;*          Anton Mitrofanov <BugMaster@narod.ru>
+;*          Jason Garrett-Glaser <darkshikari@gmail.com>
+;*          Henrik Gramner <hengar-6@student.ltu.se>
+;*
+;* Permission to use, copy, modify, and/or distribute this software for any
+;* purpose with or without fee is hereby granted, provided that the above
+;* copyright notice and this permission notice appear in all copies.
+;*
+;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+;*****************************************************************************
+
+; This is a header file for the x264ASM assembly language, which uses
+; NASM/YASM syntax combined with a large number of macros to provide easy
+; abstraction between different calling conventions (x86_32, win64, linux64).
+; It also has various other useful features to simplify writing the kind of
+; DSP functions that are most often used in x264.
+
+; Unlike the rest of x264, this file is available under an ISC license, as it
+; has significant usefulness outside of x264 and we want it to be available
+; to the largest audience possible.  Of course, if you modify it for your own
+; purposes to add a new feature, we strongly encourage contributing a patch
+; as this feature might be useful for others as well.  Send patches or ideas
+; to x264-devel@videolan.org .
+
+; Local changes for libyuv:
+; remove %define program_name and references in labels
+; rename cpus to uppercase
+
+%define WIN64  0
+%define UNIX64 0
+%if ARCH_X86_64
+    %ifidn __OUTPUT_FORMAT__,win32
+        %define WIN64  1
+    %elifidn __OUTPUT_FORMAT__,win64
+        %define WIN64  1
+    %else
+        %define UNIX64 1
+    %endif
+%endif
+
+%ifdef PREFIX
+    %define mangle(x) _ %+ x
+%else
+    %define mangle(x) x
+%endif
+
+; Name of the .rodata section.
+; Kludge: Something on OS X fails to align .rodata even given an align attribute,
+; so use a different read-only section.
+%macro SECTION_RODATA 0-1 16
+    %ifidn __OUTPUT_FORMAT__,macho64
+        SECTION .text align=%1
+    %elifidn __OUTPUT_FORMAT__,macho
+        SECTION .text align=%1
+        fakegot:
+    %elifidn __OUTPUT_FORMAT__,aout
+        section .text
+    %else
+        SECTION .rodata align=%1
+    %endif
+%endmacro
+
+; aout does not support align=
+%macro SECTION_TEXT 0-1 16
+    %ifidn __OUTPUT_FORMAT__,aout
+        SECTION .text
+    %else
+        SECTION .text align=%1
+    %endif
+%endmacro
+
+%if WIN64
+    %define PIC
+%elif ARCH_X86_64 == 0
+; x86_32 doesn't require PIC.
+; Some distros prefer shared objects to be PIC, but nothing breaks if
+; the code contains a few textrels, so we'll skip that complexity.
+    %undef PIC
+%endif
+%ifdef PIC
+    default rel
+%endif
+
+; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
+CPU amdnop
+
+; Macros to eliminate most code duplication between x86_32 and x86_64:
+; Currently this works only for leaf functions which load all their arguments
+; into registers at the start, and make no other use of the stack. Luckily that
+; covers most of x264's asm.
+
+; PROLOGUE:
+; %1 = number of arguments. loads them from stack if needed.
+; %2 = number of registers used. pushes callee-saved regs if needed.
+; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
+; %4 = list of names to define to registers
+; PROLOGUE can also be invoked by adding the same options to cglobal
+
+; e.g.
+; cglobal foo, 2,3,0, dst, src, tmp
+; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
+
+; TODO Some functions can use some args directly from the stack. If they're the
+; last args then you can just not declare them, but if they're in the middle
+; we need more flexible macro.
+
+; RET:
+; Pops anything that was pushed by PROLOGUE, and returns.
+
+; REP_RET:
+; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
+; which are slow when a normal ret follows a branch.
+
+; registers:
+; rN and rNq are the native-size register holding function argument N
+; rNd, rNw, rNb are dword, word, and byte size
+; rNh is the high 8 bits of the word size
+; rNm is the original location of arg N (a register or on the stack), dword
+; rNmp is native size
+
+%macro DECLARE_REG 2-3
+    %define r%1q %2
+    %define r%1d %2d
+    %define r%1w %2w
+    %define r%1b %2b
+    %define r%1h %2h
+    %if %0 == 2
+        %define r%1m  %2d
+        %define r%1mp %2
+    %elif ARCH_X86_64 ; memory
+        %define r%1m [rsp + stack_offset + %3]
+        %define r%1mp qword r %+ %1m
+    %else
+        %define r%1m [esp + stack_offset + %3]
+        %define r%1mp dword r %+ %1m
+    %endif
+    %define r%1  %2
+%endmacro
+
+%macro DECLARE_REG_SIZE 3
+    %define r%1q r%1
+    %define e%1q r%1
+    %define r%1d e%1
+    %define e%1d e%1
+    %define r%1w %1
+    %define e%1w %1
+    %define r%1h %3
+    %define e%1h %3
+    %define r%1b %2
+    %define e%1b %2
+%if ARCH_X86_64 == 0
+    %define r%1  e%1
+%endif
+%endmacro
+
+DECLARE_REG_SIZE ax, al, ah
+DECLARE_REG_SIZE bx, bl, bh
+DECLARE_REG_SIZE cx, cl, ch
+DECLARE_REG_SIZE dx, dl, dh
+DECLARE_REG_SIZE si, sil, null
+DECLARE_REG_SIZE di, dil, null
+DECLARE_REG_SIZE bp, bpl, null
+
+; t# defines for when per-arch register allocation is more complex than just function arguments
+
+%macro DECLARE_REG_TMP 1-*
+    %assign %%i 0
+    %rep %0
+        CAT_XDEFINE t, %%i, r%1
+        %assign %%i %%i+1
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro DECLARE_REG_TMP_SIZE 0-*
+    %rep %0
+        %define t%1q t%1 %+ q
+        %define t%1d t%1 %+ d
+        %define t%1w t%1 %+ w
+        %define t%1h t%1 %+ h
+        %define t%1b t%1 %+ b
+        %rotate 1
+    %endrep
+%endmacro
+
+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
+
+%if ARCH_X86_64
+    %define gprsize 8
+%else
+    %define gprsize 4
+%endif
+
+%macro PUSH 1
+    push %1
+    %assign stack_offset stack_offset+gprsize
+%endmacro
+
+%macro POP 1
+    pop %1
+    %assign stack_offset stack_offset-gprsize
+%endmacro
+
+%macro PUSH_IF_USED 1-*
+    %rep %0
+        %if %1 < regs_used
+            PUSH r%1
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro POP_IF_USED 1-*
+    %rep %0
+        %if %1 < regs_used
+            pop r%1
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro LOAD_IF_USED 1-*
+    %rep %0
+        %if %1 < num_args
+            mov r%1, r %+ %1 %+ mp
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro SUB 2
+    sub %1, %2
+    %ifidn %1, rsp
+        %assign stack_offset stack_offset+(%2)
+    %endif
+%endmacro
+
+%macro ADD 2
+    add %1, %2
+    %ifidn %1, rsp
+        %assign stack_offset stack_offset-(%2)
+    %endif
+%endmacro
+
+%macro movifnidn 2
+    %ifnidn %1, %2
+        mov %1, %2
+    %endif
+%endmacro
+
+%macro movsxdifnidn 2
+    %ifnidn %1, %2
+        movsxd %1, %2
+    %endif
+%endmacro
+
+%macro ASSERT 1
+    %if (%1) == 0
+        %error assert failed
+    %endif
+%endmacro
+
+%macro DEFINE_ARGS 0-*
+    %ifdef n_arg_names
+        %assign %%i 0
+        %rep n_arg_names
+            CAT_UNDEF arg_name %+ %%i, q
+            CAT_UNDEF arg_name %+ %%i, d
+            CAT_UNDEF arg_name %+ %%i, w
+            CAT_UNDEF arg_name %+ %%i, h
+            CAT_UNDEF arg_name %+ %%i, b
+            CAT_UNDEF arg_name %+ %%i, m
+            CAT_UNDEF arg_name %+ %%i, mp
+            CAT_UNDEF arg_name, %%i
+            %assign %%i %%i+1
+        %endrep
+    %endif
+
+    %xdefine %%stack_offset stack_offset
+    %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
+    %assign %%i 0
+    %rep %0
+        %xdefine %1q r %+ %%i %+ q
+        %xdefine %1d r %+ %%i %+ d
+        %xdefine %1w r %+ %%i %+ w
+        %xdefine %1h r %+ %%i %+ h
+        %xdefine %1b r %+ %%i %+ b
+        %xdefine %1m r %+ %%i %+ m
+        %xdefine %1mp r %+ %%i %+ mp
+        CAT_XDEFINE arg_name, %%i, %1
+        %assign %%i %%i+1
+        %rotate 1
+    %endrep
+    %xdefine stack_offset %%stack_offset
+    %assign n_arg_names %0
+%endmacro
+
+%if WIN64 ; Windows x64 ;=================================================
+
+DECLARE_REG 0,  rcx
+DECLARE_REG 1,  rdx
+DECLARE_REG 2,  R8
+DECLARE_REG 3,  R9
+DECLARE_REG 4,  R10, 40
+DECLARE_REG 5,  R11, 48
+DECLARE_REG 6,  rax, 56
+DECLARE_REG 7,  rdi, 64
+DECLARE_REG 8,  rsi, 72
+DECLARE_REG 9,  rbx, 80
+DECLARE_REG 10, rbp, 88
+DECLARE_REG 11, R12, 96
+DECLARE_REG 12, R13, 104
+DECLARE_REG 13, R14, 112
+DECLARE_REG 14, R15, 120
+
+%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
+    %assign num_args %1
+    %assign regs_used %2
+    ASSERT regs_used >= num_args
+    ASSERT regs_used <= 15
+    PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
+    %if mmsize == 8
+        %assign xmm_regs_used 0
+    %else
+        WIN64_SPILL_XMM %3
+    %endif
+    LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
+    DEFINE_ARGS %4
+%endmacro
+
+%macro WIN64_SPILL_XMM 1
+    %assign xmm_regs_used %1
+    ASSERT xmm_regs_used <= 16
+    %if xmm_regs_used > 6
+        SUB rsp, (xmm_regs_used-6)*16+16
+        %assign %%i xmm_regs_used
+        %rep (xmm_regs_used-6)
+            %assign %%i %%i-1
+            movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i
+        %endrep
+    %endif
+%endmacro
+
+%macro WIN64_RESTORE_XMM_INTERNAL 1
+    %if xmm_regs_used > 6
+        %assign %%i xmm_regs_used
+        %rep (xmm_regs_used-6)
+            %assign %%i %%i-1
+            movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)]
+        %endrep
+        add %1, (xmm_regs_used-6)*16+16
+    %endif
+%endmacro
+
+%macro WIN64_RESTORE_XMM 1
+    WIN64_RESTORE_XMM_INTERNAL %1
+    %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16
+    %assign xmm_regs_used 0
+%endmacro
+
+%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
+
+%macro RET 0
+    WIN64_RESTORE_XMM_INTERNAL rsp
+    POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
+%if mmsize == 32
+    vzeroupper
+%endif
+    ret
+%endmacro
+
+%elif ARCH_X86_64 ; *nix x64 ;=============================================
+
+DECLARE_REG 0,  rdi
+DECLARE_REG 1,  rsi
+DECLARE_REG 2,  rdx
+DECLARE_REG 3,  rcx
+DECLARE_REG 4,  R8
+DECLARE_REG 5,  R9
+DECLARE_REG 6,  rax, 8
+DECLARE_REG 7,  R10, 16
+DECLARE_REG 8,  R11, 24
+DECLARE_REG 9,  rbx, 32
+DECLARE_REG 10, rbp, 40
+DECLARE_REG 11, R12, 48
+DECLARE_REG 12, R13, 56
+DECLARE_REG 13, R14, 64
+DECLARE_REG 14, R15, 72
+
+%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
+    %assign num_args %1
+    %assign regs_used %2
+    ASSERT regs_used >= num_args
+    ASSERT regs_used <= 15
+    PUSH_IF_USED 9, 10, 11, 12, 13, 14
+    LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
+    DEFINE_ARGS %4
+%endmacro
+
+%define has_epilogue regs_used > 9 || mmsize == 32
+
+%macro RET 0
+    POP_IF_USED 14, 13, 12, 11, 10, 9
+%if mmsize == 32
+    vzeroupper
+%endif
+    ret
+%endmacro
+
+%else ; X86_32 ;==============================================================
+
+DECLARE_REG 0, eax, 4
+DECLARE_REG 1, ecx, 8
+DECLARE_REG 2, edx, 12
+DECLARE_REG 3, ebx, 16
+DECLARE_REG 4, esi, 20
+DECLARE_REG 5, edi, 24
+DECLARE_REG 6, ebp, 28
+%define rsp esp
+
+%macro DECLARE_ARG 1-*
+    %rep %0
+        %define r%1m [esp + stack_offset + 4*%1 + 4]
+        %define r%1mp dword r%1m
+        %rotate 1
+    %endrep
+%endmacro
+
+DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
+
+%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
+    %assign num_args %1
+    %assign regs_used %2
+    %if regs_used > 7
+        %assign regs_used 7
+    %endif
+    ASSERT regs_used >= num_args
+    PUSH_IF_USED 3, 4, 5, 6
+    LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
+    DEFINE_ARGS %4
+%endmacro
+
+%define has_epilogue regs_used > 3 || mmsize == 32
+
+%macro RET 0
+    POP_IF_USED 6, 5, 4, 3
+%if mmsize == 32
+    vzeroupper
+%endif
+    ret
+%endmacro
+
+%endif ;======================================================================
+
+%if WIN64 == 0
+%macro WIN64_SPILL_XMM 1
+%endmacro
+%macro WIN64_RESTORE_XMM 1
+%endmacro
+%endif
+
+%macro REP_RET 0
+    %if has_epilogue
+        RET
+    %else
+        rep ret
+    %endif
+%endmacro
+
+%macro TAIL_CALL 2 ; callee, is_nonadjacent
+    %if has_epilogue
+        call %1
+        RET
+    %elif %2
+        jmp %1
+    %endif
+%endmacro
+
+;=============================================================================
+; arch-independent part
+;=============================================================================
+
+%assign function_align 16
+
+; Begin a function.
+; Applies any symbol mangling needed for C linkage, and sets up a define such that
+; subsequent uses of the function name automatically refer to the mangled version.
+; Appends cpuflags to the function name if cpuflags has been specified.
+%macro cglobal 1-2+ ; name, [PROLOGUE args]
+%if %0 == 1
+    cglobal_internal %1 %+ SUFFIX
+%else
+    cglobal_internal %1 %+ SUFFIX, %2
+%endif
+%endmacro
+%macro cglobal_internal 1-2+
+    %ifndef cglobaled_%1
+        %xdefine %1 mangle(%1)
+        %xdefine %1.skip_prologue %1 %+ .skip_prologue
+        CAT_XDEFINE cglobaled_, %1, 1
+    %endif
+    %xdefine current_function %1
+    %ifidn __OUTPUT_FORMAT__,elf
+        global %1:function hidden
+    %else
+        global %1
+    %endif
+    align function_align
+    %1:
+    RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
+    %assign stack_offset 0
+    %if %0 > 1
+        PROLOGUE %2
+    %endif
+%endmacro
+
+%macro cextern 1
+    %xdefine %1 mangle(%1)
+    CAT_XDEFINE cglobaled_, %1, 1
+    extern %1
+%endmacro
+
+; like cextern, but without the prefix
+%macro cextern_naked 1
+    %xdefine %1 mangle(%1)
+    CAT_XDEFINE cglobaled_, %1, 1
+    extern %1
+%endmacro
+
+%macro const 2+
+    %xdefine %1 mangle(%1)
+    global %1
+    %1: %2
+%endmacro
+
+; This is needed for ELF, otherwise the GNU linker assumes the stack is
+; executable by default.
+%ifidn __OUTPUT_FORMAT__,elf
+SECTION .note.GNU-stack noalloc noexec nowrite progbits
+%endif
+%ifidn __OUTPUT_FORMAT__,elf32
+section .note.GNU-stack noalloc noexec nowrite progbits
+%endif
+%ifidn __OUTPUT_FORMAT__,elf64
+section .note.GNU-stack noalloc noexec nowrite progbits
+%endif
+
+; cpuflags
+
+%assign cpuflags_MMX      (1<<0)
+%assign cpuflags_MMX2     (1<<1) | cpuflags_MMX
+%assign cpuflags_3dnow    (1<<2) | cpuflags_MMX
+%assign cpuflags_3dnow2   (1<<3) | cpuflags_3dnow
+%assign cpuflags_SSE      (1<<4) | cpuflags_MMX2
+%assign cpuflags_SSE2     (1<<5) | cpuflags_SSE
+%assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2
+%assign cpuflags_SSE3     (1<<7) | cpuflags_SSE2
+%assign cpuflags_SSSE3    (1<<8) | cpuflags_SSE3
+%assign cpuflags_SSE4     (1<<9) | cpuflags_SSSE3
+%assign cpuflags_SSE42    (1<<10)| cpuflags_SSE4
+%assign cpuflags_AVX      (1<<11)| cpuflags_SSE42
+%assign cpuflags_xop      (1<<12)| cpuflags_AVX
+%assign cpuflags_fma4     (1<<13)| cpuflags_AVX
+%assign cpuflags_AVX2     (1<<14)| cpuflags_AVX
+%assign cpuflags_fma3     (1<<15)| cpuflags_AVX
+
+%assign cpuflags_cache32  (1<<16)
+%assign cpuflags_cache64  (1<<17)
+%assign cpuflags_slowctz  (1<<18)
+%assign cpuflags_lzcnt    (1<<19)
+%assign cpuflags_misalign (1<<20)
+%assign cpuflags_aligned  (1<<21) ; not a cpu feature, but a function variant
+%assign cpuflags_atom     (1<<22)
+%assign cpuflags_bmi1     (1<<23)
+%assign cpuflags_bmi2     (1<<24)|cpuflags_bmi1
+%assign cpuflags_tbm      (1<<25)|cpuflags_bmi1
+
+%define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
+%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
+
+; Takes up to 2 cpuflags from the above list.
+; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
+; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
+%macro INIT_CPUFLAGS 0-2
+    %if %0 >= 1
+        %xdefine cpuname %1
+        %assign cpuflags cpuflags_%1
+        %if %0 >= 2
+            %xdefine cpuname %1_%2
+            %assign cpuflags cpuflags | cpuflags_%2
+        %endif
+        %xdefine SUFFIX _ %+ cpuname
+        %if cpuflag(AVX)
+            %assign AVX_enabled 1
+        %endif
+        %if mmsize == 16 && notcpuflag(SSE2)
+            %define mova movaps
+            %define movu movups
+            %define movnta movntps
+        %endif
+        %if cpuflag(aligned)
+            %define movu mova
+        %elifidn %1, SSE3
+            %define movu lddqu
+        %endif
+    %else
+        %xdefine SUFFIX
+        %undef cpuname
+        %undef cpuflags
+    %endif
+%endmacro
+
+; merge MMX and SSE*
+
+%macro CAT_XDEFINE 3
+    %xdefine %1%2 %3
+%endmacro
+
+%macro CAT_UNDEF 2
+    %undef %1%2
+%endmacro
+
+%macro INIT_MMX 0-1+
+    %assign AVX_enabled 0
+    %define RESET_MM_PERMUTATION INIT_MMX %1
+    %define mmsize 8
+    %define num_mmregs 8
+    %define mova movq
+    %define movu movq
+    %define movh movd
+    %define movnta movntq
+    %assign %%i 0
+    %rep 8
+    CAT_XDEFINE m, %%i, mm %+ %%i
+    CAT_XDEFINE nmm, %%i, %%i
+    %assign %%i %%i+1
+    %endrep
+    %rep 8
+    CAT_UNDEF m, %%i
+    CAT_UNDEF nmm, %%i
+    %assign %%i %%i+1
+    %endrep
+    INIT_CPUFLAGS %1
+%endmacro
+
+%macro INIT_XMM 0-1+
+    %assign AVX_enabled 0
+    %define RESET_MM_PERMUTATION INIT_XMM %1
+    %define mmsize 16
+    %define num_mmregs 8
+    %if ARCH_X86_64
+    %define num_mmregs 16
+    %endif
+    %define mova movdqa
+    %define movu movdqu
+    %define movh movq
+    %define movnta movntdq
+    %assign %%i 0
+    %rep num_mmregs
+    CAT_XDEFINE m, %%i, xmm %+ %%i
+    CAT_XDEFINE nxmm, %%i, %%i
+    %assign %%i %%i+1
+    %endrep
+    INIT_CPUFLAGS %1
+%endmacro
+
+%macro INIT_YMM 0-1+
+    %assign AVX_enabled 1
+    %define RESET_MM_PERMUTATION INIT_YMM %1
+    %define mmsize 32
+    %define num_mmregs 8
+    %if ARCH_X86_64
+    %define num_mmregs 16
+    %endif
+    %define mova vmovaps
+    %define movu vmovups
+    %undef movh
+    %define movnta vmovntps
+    %assign %%i 0
+    %rep num_mmregs
+    CAT_XDEFINE m, %%i, ymm %+ %%i
+    CAT_XDEFINE nymm, %%i, %%i
+    %assign %%i %%i+1
+    %endrep
+    INIT_CPUFLAGS %1
+%endmacro
+
+INIT_XMM
+
+; I often want to use macros that permute their arguments. e.g. there's no
+; efficient way to implement butterfly or transpose or dct without swapping some
+; arguments.
+;
+; I would like to not have to manually keep track of the permutations:
+; If I insert a permutation in the middle of a function, it should automatically
+; change everything that follows. For more complex macros I may also have multiple
+; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
+;
+; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
+; permutes its arguments. It's equivalent to exchanging the contents of the
+; registers, except that this way you exchange the register names instead, so it
+; doesn't cost any cycles.
+
+%macro PERMUTE 2-* ; takes a list of pairs to swap
+%rep %0/2
+    %xdefine tmp%2 m%2
+    %xdefine ntmp%2 nm%2
+    %rotate 2
+%endrep
+%rep %0/2
+    %xdefine m%1 tmp%2
+    %xdefine nm%1 ntmp%2
+    %undef tmp%2
+    %undef ntmp%2
+    %rotate 2
+%endrep
+%endmacro
+
+%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
+%rep %0-1
+%ifdef m%1
+    %xdefine tmp m%1
+    %xdefine m%1 m%2
+    %xdefine m%2 tmp
+    CAT_XDEFINE n, m%1, %1
+    CAT_XDEFINE n, m%2, %2
+%else
+    ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
+    ; Be careful using this mode in nested macros though, as in some cases there may be
+    ; other copies of m# that have already been dereferenced and don't get updated correctly.
+    %xdefine %%n1 n %+ %1
+    %xdefine %%n2 n %+ %2
+    %xdefine tmp m %+ %%n1
+    CAT_XDEFINE m, %%n1, m %+ %%n2
+    CAT_XDEFINE m, %%n2, tmp
+    CAT_XDEFINE n, m %+ %%n1, %%n1
+    CAT_XDEFINE n, m %+ %%n2, %%n2
+%endif
+    %undef tmp
+    %rotate 1
+%endrep
+%endmacro
+
+; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
+; calls to that function will automatically load the permutation, so values can
+; be returned in mmregs.
+%macro SAVE_MM_PERMUTATION 0-1
+    %if %0
+        %xdefine %%f %1_m
+    %else
+        %xdefine %%f current_function %+ _m
+    %endif
+    %assign %%i 0
+    %rep num_mmregs
+        CAT_XDEFINE %%f, %%i, m %+ %%i
+    %assign %%i %%i+1
+    %endrep
+%endmacro
+
+%macro LOAD_MM_PERMUTATION 1 ; name to load from
+    %ifdef %1_m0
+        %assign %%i 0
+        %rep num_mmregs
+            CAT_XDEFINE m, %%i, %1_m %+ %%i
+            CAT_XDEFINE n, m %+ %%i, %%i
+        %assign %%i %%i+1
+        %endrep
+    %endif
+%endmacro
+
+; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
+%macro call 1
+    call_internal %1, %1 %+ SUFFIX
+%endmacro
+%macro call_internal 2
+    %xdefine %%i %1
+    %ifndef cglobaled_%1
+        %ifdef cglobaled_%2
+            %xdefine %%i %2
+        %endif
+    %endif
+    call %%i
+    LOAD_MM_PERMUTATION %%i
+%endmacro
+
+; Substitutions that reduce instruction size but are functionally equivalent
+%macro add 2
+    %ifnum %2
+        %if %2==128
+            sub %1, -128
+        %else
+            add %1, %2
+        %endif
+    %else
+        add %1, %2
+    %endif
+%endmacro
+
+%macro sub 2
+    %ifnum %2
+        %if %2==128
+            add %1, -128
+        %else
+            sub %1, %2
+        %endif
+    %else
+        sub %1, %2
+    %endif
+%endmacro
+
+;=============================================================================
+; AVX abstraction layer
+;=============================================================================
+
+%assign i 0
+%rep 16
+    %if i < 8
+        CAT_XDEFINE sizeofmm, i, 8
+    %endif
+    CAT_XDEFINE sizeofxmm, i, 16
+    CAT_XDEFINE sizeofymm, i, 32
+%assign i i+1
+%endrep
+%undef i
+
+%macro CHECK_AVX_INSTR_EMU 3-*
+    %xdefine %%opcode %1
+    %xdefine %%dst %2
+    %rep %0-2
+        %ifidn %%dst, %3
+            %error non-AVX emulation of ``%%opcode'' is not supported
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
+;%1 == instruction
+;%2 == 1 if float, 0 if int
+;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
+;%4 == number of operands given
+;%5+: operands
+%macro RUN_AVX_INSTR 6-7+
+    %ifid %6
+        %define %%sizeofreg sizeof%6
+    %elifid %5
+        %define %%sizeofreg sizeof%5
+    %else
+        %define %%sizeofreg mmsize
+    %endif
+    %if %%sizeofreg==32
+        %if %4>=3
+            v%1 %5, %6, %7
+        %else
+            v%1 %5, %6
+        %endif
+    %else
+        %if %%sizeofreg==8
+            %define %%regmov movq
+        %elif %2
+            %define %%regmov movaps
+        %else
+            %define %%regmov movdqa
+        %endif
+
+        %if %4>=3+%3
+            %ifnidn %5, %6
+                %if AVX_enabled && %%sizeofreg==16
+                    v%1 %5, %6, %7
+                %else
+                    CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
+                    %%regmov %5, %6
+                    %1 %5, %7
+                %endif
+            %else
+                %1 %5, %7
+            %endif
+        %elif %4>=3
+            %1 %5, %6, %7
+        %else
+            %1 %5, %6
+        %endif
+    %endif
+%endmacro
+
+; 3arg AVX ops with a memory arg can only have it in src2,
+; whereas SSE emulation of 3arg prefers to have it in src1 (i.e. the mov).
+; So, if the op is symmetric and the wrong one is memory, swap them.
+%macro RUN_AVX_INSTR1 8
+    %assign %%swap 0
+    %if AVX_enabled
+        %ifnid %6
+            %assign %%swap 1
+        %endif
+    %elifnidn %5, %6
+        %ifnid %7
+            %assign %%swap 1
+        %endif
+    %endif
+    %if %%swap && %3 == 0 && %8 == 1
+        RUN_AVX_INSTR %1, %2, %3, %4, %5, %7, %6
+    %else
+        RUN_AVX_INSTR %1, %2, %3, %4, %5, %6, %7
+    %endif
+%endmacro
+
+;%1 == instruction
+;%2 == 1 if float, 0 if int
+;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
+;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
+%macro AVX_INSTR 4
+    %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4
+        %ifidn %3, fnord
+            RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
+        %elifidn %4, fnord
+            RUN_AVX_INSTR1 %6, %7, %8, 3, %1, %2, %3, %9
+        %elifidn %5, fnord
+            RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
+        %else
+            RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
+        %endif
+    %endmacro
+%endmacro
+
+AVX_INSTR addpd, 1, 0, 1
+AVX_INSTR addps, 1, 0, 1
+AVX_INSTR addsd, 1, 0, 1
+AVX_INSTR addss, 1, 0, 1
+AVX_INSTR addsubpd, 1, 0, 0
+AVX_INSTR addsubps, 1, 0, 0
+AVX_INSTR andpd, 1, 0, 1
+AVX_INSTR andps, 1, 0, 1
+AVX_INSTR andnpd, 1, 0, 0
+AVX_INSTR andnps, 1, 0, 0
+AVX_INSTR blendpd, 1, 0, 0
+AVX_INSTR blendps, 1, 0, 0
+AVX_INSTR blendvpd, 1, 0, 0
+AVX_INSTR blendvps, 1, 0, 0
+AVX_INSTR cmppd, 1, 0, 0
+AVX_INSTR cmpps, 1, 0, 0
+AVX_INSTR cmpsd, 1, 0, 0
+AVX_INSTR cmpss, 1, 0, 0
+AVX_INSTR cvtdq2ps, 1, 0, 0
+AVX_INSTR cvtps2dq, 1, 0, 0
+AVX_INSTR divpd, 1, 0, 0
+AVX_INSTR divps, 1, 0, 0
+AVX_INSTR divsd, 1, 0, 0
+AVX_INSTR divss, 1, 0, 0
+AVX_INSTR dppd, 1, 1, 0
+AVX_INSTR dpps, 1, 1, 0
+AVX_INSTR haddpd, 1, 0, 0
+AVX_INSTR haddps, 1, 0, 0
+AVX_INSTR hsubpd, 1, 0, 0
+AVX_INSTR hsubps, 1, 0, 0
+AVX_INSTR maxpd, 1, 0, 1
+AVX_INSTR maxps, 1, 0, 1
+AVX_INSTR maxsd, 1, 0, 1
+AVX_INSTR maxss, 1, 0, 1
+AVX_INSTR minpd, 1, 0, 1
+AVX_INSTR minps, 1, 0, 1
+AVX_INSTR minsd, 1, 0, 1
+AVX_INSTR minss, 1, 0, 1
+AVX_INSTR movhlps, 1, 0, 0
+AVX_INSTR movlhps, 1, 0, 0
+AVX_INSTR movsd, 1, 0, 0
+AVX_INSTR movss, 1, 0, 0
+AVX_INSTR mpsadbw, 0, 1, 0
+AVX_INSTR mulpd, 1, 0, 1
+AVX_INSTR mulps, 1, 0, 1
+AVX_INSTR mulsd, 1, 0, 1
+AVX_INSTR mulss, 1, 0, 1
+AVX_INSTR orpd, 1, 0, 1
+AVX_INSTR orps, 1, 0, 1
+AVX_INSTR pabsb, 0, 0, 0
+AVX_INSTR pabsw, 0, 0, 0
+AVX_INSTR pabsd, 0, 0, 0
+AVX_INSTR packsswb, 0, 0, 0
+AVX_INSTR packssdw, 0, 0, 0
+AVX_INSTR packuswb, 0, 0, 0
+AVX_INSTR packusdw, 0, 0, 0
+AVX_INSTR paddb, 0, 0, 1
+AVX_INSTR paddw, 0, 0, 1
+AVX_INSTR paddd, 0, 0, 1
+AVX_INSTR paddq, 0, 0, 1
+AVX_INSTR paddsb, 0, 0, 1
+AVX_INSTR paddsw, 0, 0, 1
+AVX_INSTR paddusb, 0, 0, 1
+AVX_INSTR paddusw, 0, 0, 1
+AVX_INSTR palignr, 0, 1, 0
+AVX_INSTR pand, 0, 0, 1
+AVX_INSTR pandn, 0, 0, 0
+AVX_INSTR pavgb, 0, 0, 1
+AVX_INSTR pavgw, 0, 0, 1
+AVX_INSTR pblendvb, 0, 0, 0
+AVX_INSTR pblendw, 0, 1, 0
+AVX_INSTR pcmpestri, 0, 0, 0
+AVX_INSTR pcmpestrm, 0, 0, 0
+AVX_INSTR pcmpistri, 0, 0, 0
+AVX_INSTR pcmpistrm, 0, 0, 0
+AVX_INSTR pcmpeqb, 0, 0, 1
+AVX_INSTR pcmpeqw, 0, 0, 1
+AVX_INSTR pcmpeqd, 0, 0, 1
+AVX_INSTR pcmpeqq, 0, 0, 1
+AVX_INSTR pcmpgtb, 0, 0, 0
+AVX_INSTR pcmpgtw, 0, 0, 0
+AVX_INSTR pcmpgtd, 0, 0, 0
+AVX_INSTR pcmpgtq, 0, 0, 0
+AVX_INSTR phaddw, 0, 0, 0
+AVX_INSTR phaddd, 0, 0, 0
+AVX_INSTR phaddsw, 0, 0, 0
+AVX_INSTR phsubw, 0, 0, 0
+AVX_INSTR phsubd, 0, 0, 0
+AVX_INSTR phsubsw, 0, 0, 0
+AVX_INSTR pmaddwd, 0, 0, 1
+AVX_INSTR pmaddubsw, 0, 0, 0
+AVX_INSTR pmaxsb, 0, 0, 1
+AVX_INSTR pmaxsw, 0, 0, 1
+AVX_INSTR pmaxsd, 0, 0, 1
+AVX_INSTR pmaxub, 0, 0, 1
+AVX_INSTR pmaxuw, 0, 0, 1
+AVX_INSTR pmaxud, 0, 0, 1
+AVX_INSTR pminsb, 0, 0, 1
+AVX_INSTR pminsw, 0, 0, 1
+AVX_INSTR pminsd, 0, 0, 1
+AVX_INSTR pminub, 0, 0, 1
+AVX_INSTR pminuw, 0, 0, 1
+AVX_INSTR pminud, 0, 0, 1
+AVX_INSTR pmovmskb, 0, 0, 0
+AVX_INSTR pmulhuw, 0, 0, 1
+AVX_INSTR pmulhrsw, 0, 0, 1
+AVX_INSTR pmulhw, 0, 0, 1
+AVX_INSTR pmullw, 0, 0, 1
+AVX_INSTR pmulld, 0, 0, 1
+AVX_INSTR pmuludq, 0, 0, 1
+AVX_INSTR pmuldq, 0, 0, 1
+AVX_INSTR por, 0, 0, 1
+AVX_INSTR psadbw, 0, 0, 1
+AVX_INSTR pshufb, 0, 0, 0
+AVX_INSTR pshufd, 0, 1, 0
+AVX_INSTR pshufhw, 0, 1, 0
+AVX_INSTR pshuflw, 0, 1, 0
+AVX_INSTR psignb, 0, 0, 0
+AVX_INSTR psignw, 0, 0, 0
+AVX_INSTR psignd, 0, 0, 0
+AVX_INSTR psllw, 0, 0, 0
+AVX_INSTR pslld, 0, 0, 0
+AVX_INSTR psllq, 0, 0, 0
+AVX_INSTR pslldq, 0, 0, 0
+AVX_INSTR psraw, 0, 0, 0
+AVX_INSTR psrad, 0, 0, 0
+AVX_INSTR psrlw, 0, 0, 0
+AVX_INSTR psrld, 0, 0, 0
+AVX_INSTR psrlq, 0, 0, 0
+AVX_INSTR psrldq, 0, 0, 0
+AVX_INSTR psubb, 0, 0, 0
+AVX_INSTR psubw, 0, 0, 0
+AVX_INSTR psubd, 0, 0, 0
+AVX_INSTR psubq, 0, 0, 0
+AVX_INSTR psubsb, 0, 0, 0
+AVX_INSTR psubsw, 0, 0, 0
+AVX_INSTR psubusb, 0, 0, 0
+AVX_INSTR psubusw, 0, 0, 0
+AVX_INSTR ptest, 0, 0, 0
+AVX_INSTR punpckhbw, 0, 0, 0
+AVX_INSTR punpckhwd, 0, 0, 0
+AVX_INSTR punpckhdq, 0, 0, 0
+AVX_INSTR punpckhqdq, 0, 0, 0
+AVX_INSTR punpcklbw, 0, 0, 0
+AVX_INSTR punpcklwd, 0, 0, 0
+AVX_INSTR punpckldq, 0, 0, 0
+AVX_INSTR punpcklqdq, 0, 0, 0
+AVX_INSTR pxor, 0, 0, 1
+AVX_INSTR shufps, 1, 1, 0
+AVX_INSTR subpd, 1, 0, 0
+AVX_INSTR subps, 1, 0, 0
+AVX_INSTR subsd, 1, 0, 0
+AVX_INSTR subss, 1, 0, 0
+AVX_INSTR unpckhpd, 1, 0, 0
+AVX_INSTR unpckhps, 1, 0, 0
+AVX_INSTR unpcklpd, 1, 0, 0
+AVX_INSTR unpcklps, 1, 0, 0
+AVX_INSTR xorpd, 1, 0, 1
+AVX_INSTR xorps, 1, 0, 1
+
+; 3DNow instructions, for sharing code between AVX, SSE and 3DN
+AVX_INSTR pfadd, 1, 0, 1
+AVX_INSTR pfsub, 1, 0, 0
+AVX_INSTR pfmul, 1, 0, 1
+
+; base-4 constants for shuffles
+%assign i 0
+%rep 256
+    %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
+    %if j < 10
+        CAT_XDEFINE q000, j, i
+    %elif j < 100
+        CAT_XDEFINE q00, j, i
+    %elif j < 1000
+        CAT_XDEFINE q0, j, i
+    %else
+        CAT_XDEFINE q, j, i
+    %endif
+%assign i i+1
+%endrep
+%undef i
+%undef j
+
+%macro FMA_INSTR 3
+    %macro %1 4-7 %1, %2, %3
+        %if cpuflag(xop)
+            v%5 %1, %2, %3, %4
+        %else
+            %6 %1, %2, %3
+            %7 %1, %4
+        %endif
+    %endmacro
+%endmacro
+
+FMA_INSTR  pmacsdd,  pmulld, paddd
+FMA_INSTR  pmacsww,  pmullw, paddw
+FMA_INSTR pmadcswd, pmaddwd, paddd
+
+; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
+; This lets us use tzcnt without bumping the yasm version requirement yet.
+%define tzcnt rep bsf
diff --git a/src/main/jni/opus/celt/_kiss_fft_guts.h b/src/main/jni/opus/celt/_kiss_fft_guts.h
new file mode 100644
index 000000000..aefe490e1
--- /dev/null
+++ b/src/main/jni/opus/celt/_kiss_fft_guts.h
@@ -0,0 +1,183 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_GUTS_H
+#define KISS_FFT_GUTS_H
+
+#define MIN(a,b) ((a)<(b) ? (a):(b))
+#define MAX(a,b) ((a)>(b) ? (a):(b))
+
+/* kiss_fft.h
+   defines kiss_fft_scalar as either short or a float type
+   and defines
+   typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+#include "kiss_fft.h"
+
+/*
+  Explanation of macros dealing with complex math:
+
+   C_MUL(m,a,b)         : m = a*b
+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
+   C_SUB( res, a,b)     : res = a - b
+   C_SUBFROM( res , a)  : res -= a
+   C_ADDTO( res , a)    : res += a
+ * */
+#ifdef FIXED_POINT
+#include "arch.h"
+
+
+#define SAMP_MAX 2147483647
+#define TWID_MAX 32767
+#define TRIG_UPSCALE 1
+
+#define SAMP_MIN -SAMP_MAX
+
+
+#   define S_MUL(a,b) MULT16_32_Q15(b, a)
+
+#   define C_MUL(m,a,b) \
+      do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
+
+#   define C_MULC(m,a,b) \
+      do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
+
+#   define C_MUL4(m,a,b) \
+      do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
+          (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
+
+#   define C_MULBYSCALAR( c, s ) \
+      do{ (c).r =  S_MUL( (c).r , s ) ;\
+          (c).i =  S_MUL( (c).i , s ) ; }while(0)
+
+#   define DIVSCALAR(x,k) \
+        (x) = S_MUL(  x, (TWID_MAX-((k)>>1))/(k)+1 )
+
+#   define C_FIXDIV(c,div) \
+        do {    DIVSCALAR( (c).r , div);  \
+                DIVSCALAR( (c).i  , div); }while (0)
+
+#define  C_ADD( res, a,b)\
+    do {(res).r=ADD32((a).r,(b).r);  (res).i=ADD32((a).i,(b).i); \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do {(res).r=SUB32((a).r,(b).r);  (res).i=SUB32((a).i,(b).i); \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do {(res).r = ADD32((res).r, (a).r);  (res).i = ADD32((res).i,(a).i);\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \
+    }while(0)
+
+#if defined(OPUS_ARM_INLINE_ASM)
+#include "arm/kiss_fft_armv4.h"
+#endif
+
+#if defined(OPUS_ARM_INLINE_EDSP)
+#include "arm/kiss_fft_armv5e.h"
+#endif
+
+#else  /* not FIXED_POINT*/
+
+#   define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+#define C_MULC(m,a,b) \
+    do{ (m).r = (a).r*(b).r + (a).i*(b).i;\
+        (m).i = (a).i*(b).r - (a).r*(b).i; }while(0)
+
+#define C_MUL4(m,a,b) C_MUL(m,a,b)
+
+#   define C_FIXDIV(c,div) /* NOOP */
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r *= (s);\
+        (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#ifndef C_ADD
+#define  C_ADD( res, a,b)\
+    do { \
+            CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+            CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+            (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do { \
+            CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+            CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+            (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do { \
+            CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+            CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+            (res).r += (a).r;  (res).i += (a).i;\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {\
+            CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+            CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+            (res).r -= (a).r;  (res).i -= (a).i; \
+    }while(0)
+#endif /* C_ADD defined */
+
+#ifdef FIXED_POINT
+/*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
+#  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
+#  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))
+#  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))
+#  define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+#  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+#  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+#  define HALF_OF(x) ((x)*_mm_set1_ps(.5f))
+#else
+#  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+#  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+#  define HALF_OF(x) ((x)*.5f)
+#endif
+
+#define  kf_cexp(x,phase) \
+        do{ \
+                (x)->r = KISS_FFT_COS(phase);\
+                (x)->i = KISS_FFT_SIN(phase);\
+        }while(0)
+
+#define  kf_cexp2(x,phase) \
+   do{ \
+      (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
+      (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
+}while(0)
+
+#endif /* KISS_FFT_GUTS_H */
diff --git a/src/main/jni/opus/celt/arch.h b/src/main/jni/opus/celt/arch.h
new file mode 100644
index 000000000..3bbcd3663
--- /dev/null
+++ b/src/main/jni/opus/celt/arch.h
@@ -0,0 +1,214 @@
+/* Copyright (c) 2003-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file arch.h
+   @brief Various architecture definitions for CELT
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ARCH_H
+#define ARCH_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+# if !defined(__GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define __GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define __GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+#define CELT_SIG_SCALE 32768.f
+
+#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
+#ifdef ENABLE_ASSERTIONS
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
+{
+   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
+   abort();
+}
+#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}}
+#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}}
+#else
+#define celt_assert(cond)
+#define celt_assert2(cond, message)
+#endif
+
+#define IMUL32(a,b) ((a)*(b))
+
+#define ABS(x) ((x) < 0 ? (-(x)) : (x))      /**< Absolute integer value. */
+#define ABS16(x) ((x) < 0 ? (-(x)) : (x))    /**< Absolute 16-bit value.  */
+#define MIN16(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 16-bit value.   */
+#define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */
+#define ABS32(x) ((x) < 0 ? (-(x)) : (x))    /**< Absolute 32-bit value.  */
+#define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 32-bit value.   */
+#define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
+#define IMIN(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum int value.   */
+#define IMAX(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum int value.   */
+#define UADD32(a,b) ((a)+(b))
+#define USUB32(a,b) ((a)-(b))
+
+#define PRINT_MIPS(file)
+
+#ifdef FIXED_POINT
+
+typedef opus_int16 opus_val16;
+typedef opus_int32 opus_val32;
+
+typedef opus_val32 celt_sig;
+typedef opus_val16 celt_norm;
+typedef opus_val32 celt_ener;
+
+#define Q15ONE 32767
+
+#define SIG_SHIFT 12
+
+#define NORM_SCALING 16384
+
+#define DB_SHIFT 10
+
+#define EPSILON 1
+#define VERY_SMALL 0
+#define VERY_LARGE16 ((opus_val16)32767)
+#define Q15_ONE ((opus_val16)32767)
+
+#define SCALEIN(a)      (a)
+#define SCALEOUT(a)     (a)
+
+#ifdef FIXED_DEBUG
+#include "fixed_debug.h"
+#else
+
+#include "fixed_generic.h"
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/fixed_armv5e.h"
+#elif defined (OPUS_ARM_INLINE_ASM)
+#include "arm/fixed_armv4.h"
+#elif defined (BFIN_ASM)
+#include "fixed_bfin.h"
+#elif defined (TI_C5X_ASM)
+#include "fixed_c5x.h"
+#elif defined (TI_C6X_ASM)
+#include "fixed_c6x.h"
+#endif
+
+#endif
+
+#else /* FIXED_POINT */
+
+typedef float opus_val16;
+typedef float opus_val32;
+
+typedef float celt_sig;
+typedef float celt_norm;
+typedef float celt_ener;
+
+#define Q15ONE 1.0f
+
+#define NORM_SCALING 1.f
+
+#define EPSILON 1e-15f
+#define VERY_SMALL 1e-30f
+#define VERY_LARGE16 1e15f
+#define Q15_ONE ((opus_val16)1.f)
+
+#define QCONST16(x,bits) (x)
+#define QCONST32(x,bits) (x)
+
+#define NEG16(x) (-(x))
+#define NEG32(x) (-(x))
+#define EXTRACT16(x) (x)
+#define EXTEND32(x) (x)
+#define SHR16(a,shift) (a)
+#define SHL16(a,shift) (a)
+#define SHR32(a,shift) (a)
+#define SHL32(a,shift) (a)
+#define PSHR32(a,shift) (a)
+#define VSHR32(a,shift) (a)
+
+#define PSHR(a,shift)   (a)
+#define SHR(a,shift)    (a)
+#define SHL(a,shift)    (a)
+#define SATURATE(x,a)   (x)
+#define SATURATE16(x)   (x)
+
+#define ROUND16(a,shift)  (a)
+#define HALF16(x)       (.5f*(x))
+#define HALF32(x)       (.5f*(x))
+
+#define ADD16(a,b) ((a)+(b))
+#define SUB16(a,b) ((a)-(b))
+#define ADD32(a,b) ((a)+(b))
+#define SUB32(a,b) ((a)-(b))
+#define MULT16_16_16(a,b)     ((a)*(b))
+#define MULT16_16(a,b)     ((opus_val32)(a)*(opus_val32)(b))
+#define MAC16_16(c,a,b)     ((c)+(opus_val32)(a)*(opus_val32)(b))
+
+#define MULT16_32_Q15(a,b)     ((a)*(b))
+#define MULT16_32_Q16(a,b)     ((a)*(b))
+
+#define MULT32_32_Q31(a,b)     ((a)*(b))
+
+#define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
+
+#define MULT16_16_Q11_32(a,b)     ((a)*(b))
+#define MULT16_16_Q11(a,b)     ((a)*(b))
+#define MULT16_16_Q13(a,b)     ((a)*(b))
+#define MULT16_16_Q14(a,b)     ((a)*(b))
+#define MULT16_16_Q15(a,b)     ((a)*(b))
+#define MULT16_16_P15(a,b)     ((a)*(b))
+#define MULT16_16_P13(a,b)     ((a)*(b))
+#define MULT16_16_P14(a,b)     ((a)*(b))
+#define MULT16_32_P16(a,b)     ((a)*(b))
+
+#define DIV32_16(a,b)     (((opus_val32)(a))/(opus_val16)(b))
+#define DIV32(a,b)     (((opus_val32)(a))/(opus_val32)(b))
+
+#define SCALEIN(a)      ((a)*CELT_SIG_SCALE)
+#define SCALEOUT(a)     ((a)*(1/CELT_SIG_SCALE))
+
+#endif /* !FIXED_POINT */
+
+#ifndef GLOBAL_STACK_SIZE
+#ifdef FIXED_POINT
+#define GLOBAL_STACK_SIZE 100000
+#else
+#define GLOBAL_STACK_SIZE 100000
+#endif
+#endif
+
+#endif /* ARCH_H */
diff --git a/src/main/jni/opus/celt/arm/arm2gnu.pl b/src/main/jni/opus/celt/arm/arm2gnu.pl
new file mode 100644
index 000000000..eab42efa2
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/arm2gnu.pl
@@ -0,0 +1,316 @@
+#!/usr/bin/perl
+
+my $bigend;  # little/big endian
+my $nxstack;
+
+$nxstack = 0;
+
+eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
+    if $running_under_some_shell;
+
+while ($ARGV[0] =~ /^-/) {
+    $_ = shift;
+  last if /^--/;
+    if (/^-n/) {
+    $nflag++;
+    next;
+    }
+    die "I don't recognize this switch: $_\\n";
+}
+$printit++ unless $nflag;
+
+$\ = "\n";      # automatically add newline on print
+$n=0;
+
+$thumb = 0;     # ARM mode by default, not Thumb.
+@proc_stack = ();
+
+LINE:
+while (<>) {
+
+    # For ADRLs we need to add a new line after the substituted one.
+    $addPadding = 0;
+
+    # First, we do not dare to touch *anything* inside double quotes, do we?
+    # Second, if you want a dollar character in the string,
+    # insert two of them -- that's how ARM C and assembler treat strings.
+    s/^([A-Za-z_]\w*)[ \t]+DCB[ \t]*\"/$1:   .ascii \"/   && do { s/\$\$/\$/g; next };
+    s/\bDCB\b[ \t]*\"/.ascii \"/                          && do { s/\$\$/\$/g; next };
+    s/^(\S+)\s+RN\s+(\S+)/$1 .req r$2/                    && do { s/\$\$/\$/g; next };
+    # If there's nothing on a line but a comment, don't try to apply any further
+    #  substitutions (this is a cheap hack to avoid mucking up the license header)
+    s/^([ \t]*);/$1@/                                     && do { s/\$\$/\$/g; next };
+    # If substituted -- leave immediately !
+
+    s/@/,:/;
+    s/;/@/;
+    while ( /@.*'/ ) {
+      s/(@.*)'/$1/g;
+    }
+    s/\{FALSE\}/0/g;
+    s/\{TRUE\}/1/g;
+    s/\{(\w\w\w\w+)\}/$1/g;
+    s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
+    s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
+    s/\bIMPORT\b/.extern/;
+    s/\bEXPORT\b/.global/;
+    s/^(\s+)\[/$1IF/;
+    s/^(\s+)\|/$1ELSE/;
+    s/^(\s+)\]/$1ENDIF/;
+    s/IF *:DEF:/ .ifdef/;
+    s/IF *:LNOT: *:DEF:/ .ifndef/;
+    s/ELSE/ .else/;
+    s/ENDIF/ .endif/;
+
+    if( /\bIF\b/ ) {
+      s/\bIF\b/ .if/;
+      s/=/==/;
+    }
+    if ( $n == 2) {
+        s/\$/\\/g;
+    }
+    if ($n == 1) {
+        s/\$//g;
+        s/label//g;
+    $n = 2;
+      }
+    if ( /MACRO/ ) {
+      s/MACRO *\n/.macro/;
+      $n=1;
+    }
+    if ( /\bMEND\b/ ) {
+      s/\bMEND\b/.endm/;
+      $n=0;
+    }
+
+    # ".rdata" doesn't work in 'as' version 2.13.2, as it is ".rodata" there.
+    #
+    if ( /\bAREA\b/ ) {
+        my $align;
+        $align = "2";
+        if ( /ALIGN=(\d+)/ ) {
+            $align = $1;
+        }
+        if ( /CODE/ ) {
+            $nxstack = 1;
+        }
+        s/^(.+)CODE(.+)READONLY(.*)/    .text/;
+        s/^(.+)DATA(.+)READONLY(.*)/    .section .rdata/;
+        s/^(.+)\|\|\.data\|\|(.+)/    .data/;
+        s/^(.+)\|\|\.bss\|\|(.+)/    .bss/;
+        s/$/;   .p2align $align/;
+        # Enable NEON instructions but don't produce a binary that requires
+        # ARMv7. RVCT does not have equivalent directives, so we just do this
+        # for all CODE areas.
+        if ( /.text/ ) {
+            # Separating .arch, .fpu, etc., by semicolons does not work (gas
+            # thinks the semicolon is part of the arch name, even when there's
+            # whitespace separating them). Sadly this means our line numbers
+            # won't match the original source file (we could use the .line
+            # directive, which is documented to be obsolete, but then gdb will
+            # show the wrong line in the translated source file).
+            s/$/;   .arch armv7-a\n   .fpu neon\n   .object_arch armv4t/;
+        }
+    }
+
+    s/\|\|\.constdata\$(\d+)\|\|/.L_CONST$1/;       # ||.constdata$3||
+    s/\|\|\.bss\$(\d+)\|\|/.L_BSS$1/;               # ||.bss$2||
+    s/\|\|\.data\$(\d+)\|\|/.L_DATA$1/;             # ||.data$2||
+    s/\|\|([a-zA-Z0-9_]+)\@([a-zA-Z0-9_]+)\|\|/@ $&/;
+    s/^(\s+)\%(\s)/    .space $1/;
+
+    s/\|(.+)\.(\d+)\|/\.$1_$2/;                     # |L80.123| -> .L80_123
+    s/\bCODE32\b/.code 32/ && do {$thumb = 0};
+    s/\bCODE16\b/.code 16/ && do {$thumb = 1};
+    if (/\bPROC\b/)
+    {
+        my $prefix;
+        my $proc;
+        /^([A-Za-z_\.]\w+)\b/;
+        $proc = $1;
+        $prefix = "";
+        if ($proc)
+        {
+            $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc);
+            push(@proc_stack, $proc);
+            s/^[A-Za-z_\.]\w+/$&:/;
+        }
+        $prefix = $prefix."\t.thumb_func; " if ($thumb);
+        s/\bPROC\b/@ $&/;
+        $_ = $prefix.$_;
+    }
+    s/^(\s*)(S|Q|SH|U|UQ|UH)ASX\b/$1$2ADDSUBX/;
+    s/^(\s*)(S|Q|SH|U|UQ|UH)SAX\b/$1$2SUBADDX/;
+    if (/\bENDP\b/)
+    {
+        my $proc;
+        s/\bENDP\b/@ $&/;
+        $proc = pop(@proc_stack);
+        $_ = "\t.size $proc, .-$proc".$_ if ($proc);
+    }
+    s/\bSUBT\b/@ $&/;
+    s/\bDATA\b/@ $&/;   # DATA directive is deprecated -- Asm guide, p.7-25
+    s/\bKEEP\b/@ $&/;
+    s/\bEXPORTAS\b/@ $&/;
+    s/\|\|(.)+\bEQU\b/@ $&/;
+    s/\|\|([\w\$]+)\|\|/$1/;
+    s/\bENTRY\b/@ $&/;
+    s/\bASSERT\b/@ $&/;
+    s/\bGBLL\b/@ $&/;
+    s/\bGBLA\b/@ $&/;
+    s/^\W+OPT\b/@ $&/;
+    s/:OR:/|/g;
+    s/:SHL:/<</g;
+    s/:SHR:/>>/g;
+    s/:AND:/&/g;
+    s/:LAND:/&&/g;
+    s/CPSR/cpsr/;
+    s/SPSR/spsr/;
+    s/ALIGN$/.balign 4/;
+    s/ALIGN\s+([0-9x]+)$/.balign $1/;
+    s/psr_cxsf/psr_all/;
+    s/LTORG/.ltorg/;
+    s/^([A-Za-z_]\w*)[ \t]+EQU/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+SETL/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+SETA/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+\*/ .set $1,/;
+
+    #  {PC} + 0xdeadfeed  -->  . + 0xdeadfeed
+    s/\{PC\} \+/ \. +/;
+
+    # Single hex constant on the line !
+    #
+    # >>> NOTE <<<
+    #   Double-precision floats in gcc are always mixed-endian, which means
+    #   bytes in two words are little-endian, but words are big-endian.
+    #   So, 0x0000deadfeed0000 would be stored as 0x0000dead at low address
+    #   and 0xfeed0000 at high address.
+    #
+    s/\bDCFD\b[ \t]+0x([a-fA-F0-9]{8})([a-fA-F0-9]{8})/.long 0x$1, 0x$2/;
+    # Only decimal constants on the line, no hex !
+    s/\bDCFD\b[ \t]+([0-9\.\-]+)/.double $1/;
+
+    # Single hex constant on the line !
+#    s/\bDCFS\b[ \t]+0x([a-f0-9]{8})([a-f0-9]{8})/.long 0x$1, 0x$2/;
+    # Only decimal constants on the line, no hex !
+#    s/\bDCFS\b[ \t]+([0-9\.\-]+)/.double $1/;
+    s/\bDCFS[ \t]+0x/.word 0x/;
+    s/\bDCFS\b/.float/;
+
+    s/^([A-Za-z_]\w*)[ \t]+DCD/$1 .word/;
+    s/\bDCD\b/.word/;
+    s/^([A-Za-z_]\w*)[ \t]+DCW/$1 .short/;
+    s/\bDCW\b/.short/;
+    s/^([A-Za-z_]\w*)[ \t]+DCB/$1 .byte/;
+    s/\bDCB\b/.byte/;
+    s/^([A-Za-z_]\w*)[ \t]+\%/.comm $1,/;
+    s/^[A-Za-z_\.]\w+/$&:/;
+    s/^(\d+)/$1:/;
+    s/\%(\d+)/$1b_or_f/;
+    s/\%[Bb](\d+)/$1b/;
+    s/\%[Ff](\d+)/$1f/;
+    s/\%[Ff][Tt](\d+)/$1f/;
+    s/&([\dA-Fa-f]+)/0x$1/;
+    if ( /\b2_[01]+\b/ ) {
+      s/\b2_([01]+)\b/conv$1&&&&/g;
+      while ( /[01][01][01][01]&&&&/ ) {
+        s/0000&&&&/&&&&0/g;
+        s/0001&&&&/&&&&1/g;
+        s/0010&&&&/&&&&2/g;
+        s/0011&&&&/&&&&3/g;
+        s/0100&&&&/&&&&4/g;
+        s/0101&&&&/&&&&5/g;
+        s/0110&&&&/&&&&6/g;
+        s/0111&&&&/&&&&7/g;
+        s/1000&&&&/&&&&8/g;
+        s/1001&&&&/&&&&9/g;
+        s/1010&&&&/&&&&A/g;
+        s/1011&&&&/&&&&B/g;
+        s/1100&&&&/&&&&C/g;
+        s/1101&&&&/&&&&D/g;
+        s/1110&&&&/&&&&E/g;
+        s/1111&&&&/&&&&F/g;
+      }
+      s/000&&&&/&&&&0/g;
+      s/001&&&&/&&&&1/g;
+      s/010&&&&/&&&&2/g;
+      s/011&&&&/&&&&3/g;
+      s/100&&&&/&&&&4/g;
+      s/101&&&&/&&&&5/g;
+      s/110&&&&/&&&&6/g;
+      s/111&&&&/&&&&7/g;
+      s/00&&&&/&&&&0/g;
+      s/01&&&&/&&&&1/g;
+      s/10&&&&/&&&&2/g;
+      s/11&&&&/&&&&3/g;
+      s/0&&&&/&&&&0/g;
+      s/1&&&&/&&&&1/g;
+      s/conv&&&&/0x/g;
+    }
+
+    if ( /commandline/)
+    {
+        if( /-bigend/)
+        {
+            $bigend=1;
+        }
+    }
+
+    if ( /\bDCDU\b/ )
+    {
+        my $cmd=$_;
+        my $value;
+        my $prefix;
+        my $w1;
+        my $w2;
+        my $w3;
+        my $w4;
+
+        s/\s+DCDU\b/@ $&/;
+
+        $cmd =~ /\bDCDU\b\s+0x(\d+)/;
+        $value = $1;
+        $value =~ /(\w\w)(\w\w)(\w\w)(\w\w)/;
+        $w1 = $1;
+        $w2 = $2;
+        $w3 = $3;
+        $w4 = $4;
+
+        if( $bigend ne "")
+        {
+            # big endian
+            $prefix = "\t.byte\t0x".$w1.";".
+                      "\t.byte\t0x".$w2.";".
+                      "\t.byte\t0x".$w3.";".
+                      "\t.byte\t0x".$w4."; ";
+        }
+        else
+        {
+            # little endian
+            $prefix = "\t.byte\t0x".$w4.";".
+                      "\t.byte\t0x".$w3.";".
+                      "\t.byte\t0x".$w2.";".
+                      "\t.byte\t0x".$w1."; ";
+        }
+        $_=$prefix.$_;
+    }
+
+    if ( /\badrl\b/i )
+    {
+        s/\badrl\s+(\w+)\s*,\s*(\w+)/ldr $1,=$2/i;
+        $addPadding = 1;
+    }
+    s/\bEND\b/@ END/;
+} continue {
+    printf ("%s", $_) if $printit;
+    if ($addPadding != 0)
+    {
+        printf ("   mov r0,r0\n");
+        $addPadding = 0;
+    }
+}
+#If we had a code section, mark that this object doesn't need an executable
+# stack.
+if ($nxstack) {
+    printf ("    .section\t.note.GNU-stack,\"\",\%\%progbits\n");
+}
diff --git a/src/main/jni/opus/celt/arm/arm_celt_map.c b/src/main/jni/opus/celt/arm/arm_celt_map.c
new file mode 100644
index 000000000..547a84d14
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/arm_celt_map.c
@@ -0,0 +1,49 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pitch.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if defined(FIXED_POINT)
+opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+    const opus_val16 *, opus_val32 *, int , int) = {
+  celt_pitch_xcorr_c,               /* ARMv4 */
+  MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
+  MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
+  MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
+};
+# else
+#  error "Floating-point implementation is not supported by ARM asm yet." \
+ "Reconfigure with --disable-rtcd or send patches."
+# endif
+
+#endif
diff --git a/src/main/jni/opus/celt/arm/armcpu.c b/src/main/jni/opus/celt/arm/armcpu.c
new file mode 100644
index 000000000..17685258b
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/armcpu.c
@@ -0,0 +1,174 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef OPUS_HAVE_RTCD
+
+#include "armcpu.h"
+#include "cpu_support.h"
+#include "os_support.h"
+#include "opus_types.h"
+
+#define OPUS_CPU_ARM_V4    (1)
+#define OPUS_CPU_ARM_EDSP  (1<<1)
+#define OPUS_CPU_ARM_MEDIA (1<<2)
+#define OPUS_CPU_ARM_NEON  (1<<3)
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
+  opus_uint32 flags;
+  flags=0;
+  /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
+   * instructions via their assembled hex code.
+   * All of these instructions should be essentially nops. */
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+  __try{
+    /*PLD [r13]*/
+    __emit(0xF5DDF000);
+    flags|=OPUS_CPU_ARM_EDSP;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+  __try{
+    /*SHADD8 r3,r3,r3*/
+    __emit(0xE6333F93);
+    flags|=OPUS_CPU_ARM_MEDIA;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   if defined(OPUS_ARM_MAY_HAVE_NEON)
+  __try{
+    /*VORR q0,q0,q0*/
+    __emit(0xF2200150);
+    flags|=OPUS_CPU_ARM_NEON;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   endif
+#  endif
+# endif
+  return flags;
+}
+
+#elif defined(__linux__)
+/* Linux based */
+opus_uint32 opus_cpu_capabilities(void)
+{
+  opus_uint32 flags = 0;
+  FILE *cpuinfo;
+
+  /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+   * Android */
+  cpuinfo = fopen("/proc/cpuinfo", "r");
+
+  if(cpuinfo != NULL)
+  {
+    /* 512 should be enough for anybody (it's even enough for all the flags that
+     * x86 has accumulated... so far). */
+    char buf[512];
+
+    while(fgets(buf, 512, cpuinfo) != NULL)
+    {
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON)
+      /* Search for edsp and neon flag */
+      if(memcmp(buf, "Features", 8) == 0)
+      {
+        char *p;
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
+        p = strstr(buf, " edsp");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_EDSP;
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
+        p = strstr(buf, " neon");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_NEON;
+#  endif
+      }
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+      /* Search for media capabilities (>= ARMv6) */
+      if(memcmp(buf, "CPU architecture:", 17) == 0)
+      {
+        int version;
+        version = atoi(buf+17);
+
+        if(version >= 6)
+          flags |= OPUS_CPU_ARM_MEDIA;
+      }
+# endif
+    }
+
+    fclose(cpuinfo);
+  }
+  return flags;
+}
+#else
+/* The feature registers which can tell us what the processor supports are
+ * accessible in priveleged modes only, so we can't have a general user-space
+ * detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+   "your platform.  Reconfigure with --disable-rtcd (or send patches)."
+#endif
+
+int opus_select_arch(void)
+{
+  opus_uint32 flags = opus_cpu_capabilities();
+  int arch = 0;
+
+  if(!(flags & OPUS_CPU_ARM_EDSP))
+    return arch;
+  arch++;
+
+  if(!(flags & OPUS_CPU_ARM_MEDIA))
+    return arch;
+  arch++;
+
+  if(!(flags & OPUS_CPU_ARM_NEON))
+    return arch;
+  arch++;
+
+  return arch;
+}
+
+#endif
diff --git a/src/main/jni/opus/celt/arm/armcpu.h b/src/main/jni/opus/celt/arm/armcpu.h
new file mode 100644
index 000000000..ac5744606
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/armcpu.h
@@ -0,0 +1,71 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(ARMCPU_H)
+# define ARMCPU_H
+
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+#  define MAY_HAVE_EDSP(name) name ## _edsp
+# else
+#  define MAY_HAVE_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#  define MAY_HAVE_MEDIA(name) name ## _media
+# else
+#  define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+#  define MAY_HAVE_NEON(name) name ## _neon
+# else
+#  define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_EDSP)
+#  define PRESUME_EDSP(name) name ## _edsp
+# else
+#  define PRESUME_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_PRESUME_MEDIA)
+#  define PRESUME_MEDIA(name) name ## _media
+# else
+#  define PRESUME_MEDIA(name) PRESUME_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_NEON)
+#  define PRESUME_NEON(name) name ## _neon
+# else
+#  define PRESUME_NEON(name) PRESUME_MEDIA(name)
+# endif
+
+# if defined(OPUS_HAVE_RTCD)
+int opus_select_arch(void);
+# endif
+
+#endif
diff --git a/src/main/jni/opus/celt/arm/armopts.s.in b/src/main/jni/opus/celt/arm/armopts.s.in
new file mode 100644
index 000000000..3d8aaf275
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/armopts.s.in
@@ -0,0 +1,37 @@
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+; Set the following to 1 if we have EDSP instructions
+;  (LDRD/STRD, etc., ARMv5E and later).
+OPUS_ARM_MAY_HAVE_EDSP  * @OPUS_ARM_MAY_HAVE_EDSP@
+
+; Set the following to 1 if we have ARMv6 media instructions.
+OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@
+
+; Set the following to 1 if we have NEON (some ARMv7)
+OPUS_ARM_MAY_HAVE_NEON  * @OPUS_ARM_MAY_HAVE_NEON@
+
+END
diff --git a/src/main/jni/opus/celt/arm/celt_pitch_xcorr_arm.s b/src/main/jni/opus/celt/arm/celt_pitch_xcorr_arm.s
new file mode 100644
index 000000000..09917b16b
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/celt_pitch_xcorr_arm.s
@@ -0,0 +1,545 @@
+; Copyright (c) 2007-2008 CSIRO
+; Copyright (c) 2007-2009 Xiph.Org Foundation
+; Copyright (c) 2013      Parrot
+; Written by Aurélien Zanelli
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  AREA  |.text|, CODE, READONLY
+
+  GET    celt/arm/armopts.s
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+  EXPORT celt_pitch_xcorr_edsp
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+  EXPORT celt_pitch_xcorr_neon
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+
+; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+xcorr_kernel_neon PROC
+  ; input:
+  ;   r3     = int         len
+  ;   r4     = opus_val16 *x
+  ;   r5     = opus_val16 *y
+  ;   q0     = opus_val32  sum[4]
+  ; output:
+  ;   q0     = opus_val32  sum[4]
+  ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+  ; internal usage:
+  ;   r12 = int j
+  ;   d3  = y_3|y_2|y_1|y_0
+  ;   q2  = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+  ;   q3  = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+  ;   q8  = scratch
+  ;
+  ; Load y[0...3]
+  ; This requires len>0 to always be valid (which we assert in the C code).
+  VLD1.16      {d5}, [r5]!
+  SUBS         r12, r3, #8
+  BLE xcorr_kernel_neon_process4
+; Process 8 samples at a time.
+; This loop loads one y value more than we actually need. Therefore we have to
+; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+; reading past the end of the array.
+xcorr_kernel_neon_process8
+  ; This loop has 19 total instructions (10 cycles to issue, minimum), with
+  ; - 2 cycles of ARM insrtuctions,
+  ; - 10 cycles of load/store/byte permute instructions, and
+  ; - 9 cycles of data processing instructions.
+  ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+  ; latter two categories, meaning the whole loop should run in 10 cycles per
+  ; iteration, barring cache misses.
+  ;
+  ; Load x[0...7]
+  VLD1.16      {d6, d7}, [r4]!
+  ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+  ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+  VAND         d3, d5, d5
+  SUBS         r12, r12, #8
+  ; Load y[4...11]
+  VLD1.16      {d4, d5}, [r5]!
+  VMLAL.S16    q0, d3, d6[0]
+  VEXT.16      d16, d3, d4, #1
+  VMLAL.S16    q0, d4, d7[0]
+  VEXT.16      d17, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d3, d4, #2
+  VMLAL.S16    q0, d17, d7[1]
+  VEXT.16      d17, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d3, d4, #3
+  VMLAL.S16    q0, d17, d7[2]
+  VEXT.16      d17, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+  VMLAL.S16    q0, d17, d7[3]
+  BGT xcorr_kernel_neon_process8
+; Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4
+  ADDS         r12, r12, #4
+  BLE xcorr_kernel_neon_process2
+  ; Load x[0...3]
+  VLD1.16      d6, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #4
+  ; Load y[4...7]
+  VLD1.16      d5, [r5]!
+  VMLAL.S16    q0, d4, d6[0]
+  VEXT.16      d16, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+; Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2
+  ADDS         r12, r12, #2
+  BLE xcorr_kernel_neon_process1
+  ; Load x[0...1]
+  VLD2.16      {d6[],d7[]}, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #2
+  ; Load y[4...5]
+  VLD1.32      {d5[]}, [r5]!
+  VMLAL.S16    q0, d4, d6
+  VEXT.16      d16, d4, d5, #1
+  ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+  ; instead of VEXT, since it's a data-processing instruction.
+  VSRI.64      d5, d4, #32
+  VMLAL.S16    q0, d16, d7
+; Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1
+  ; Load next *x
+  VLD1.16      {d6[]}, [r4]!
+  ADDS         r12, r12, #1
+  ; y[0...3] are left in d5 from prior iteration(s) (if any)
+  VMLAL.S16    q0, d5, d6
+  MOVLE        pc, lr
+; Now process 1 last sample, not reading ahead.
+  ; Load last *y
+  VLD1.16      {d4[]}, [r5]!
+  VSRI.64      d4, d5, #16
+  ; Load last *x
+  VLD1.16      {d6[]}, [r4]!
+  VMLAL.S16    q0, d4, d6
+  MOV          pc, lr
+  ENDP
+
+; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+;  opus_val32 *xcorr, int len, int max_pitch)
+celt_pitch_xcorr_neon PROC
+  ; input:
+  ;   r0  = opus_val16 *_x
+  ;   r1  = opus_val16 *_y
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = int         maxcorr
+  ; internal usage:
+  ;   r4  = opus_val16 *x (for xcorr_kernel_neon())
+  ;   r5  = opus_val16 *y (for xcorr_kernel_neon())
+  ;   r6  = int         max_pitch
+  ;   r12 = int         j
+  ;   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+  STMFD        sp!, {r4-r6, lr}
+  LDR          r6, [sp, #16]
+  VMOV.S32     q15, #1
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  SUBS         r6, r6, #4
+  BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4
+  ; xcorr_kernel_neon parameters:
+  ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+  MOV          r4, r0
+  MOV          r5, r1
+  VEOR         q0, q0, q0
+  ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+  ; So we don't save/restore any other registers.
+  BL xcorr_kernel_neon
+  SUBS         r6, r6, #4
+  VST1.32      {q0}, [r2]!
+  ; _y += 4
+  ADD          r1, r1, #8
+  VMAX.S32     q15, q15, q0
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  BGE celt_pitch_xcorr_neon_process4
+; We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done
+  ADDS         r6, r6, #4
+  ; Reduce maxcorr to a single value
+  VMAX.S32     d30, d30, d31
+  VPMAX.S32    d30, d30, d30
+  ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+  BLE celt_pitch_xcorr_neon_done
+; Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining
+  MOV          r4, r0
+  MOV          r5, r1
+  VMOV.I32     q0, #0
+  SUBS         r12, r3, #8
+  BLT celt_pitch_xcorr_neon_process_remaining4
+; Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8
+  ; Load x[0...7]
+  VLD1.16      {q1}, [r4]!
+  ; Load y[0...7]
+  VLD1.16      {q2}, [r5]!
+  SUBS         r12, r12, #8
+  VMLAL.S16    q0, d4, d2
+  VMLAL.S16    q0, d5, d3
+  BGE celt_pitch_xcorr_neon_process_remaining_loop8
+; Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4
+  ADDS         r12, r12, #4
+  BLT celt_pitch_xcorr_neon_process_remaining4_done
+  ; Load x[0...3]
+  VLD1.16      {d2}, [r4]!
+  ; Load y[0...3]
+  VLD1.16      {d3}, [r5]!
+  SUB          r12, r12, #4
+  VMLAL.S16    q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done
+  ; Reduce the sum to a single value.
+  VADD.S32     d0, d0, d1
+  VPADDL.S32   d0, d0
+  ADDS         r12, r12, #4
+  BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+; Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1
+  VLD1.16      {d2[]}, [r4]!
+  VLD1.16      {d3[]}, [r5]!
+  SUBS         r12, r12, #1
+  VMLAL.S16    q0, d2, d3
+  BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done
+  VST1.32      {d0[0]}, [r2]!
+  VMAX.S32     d30, d30, d0
+  SUBS         r6, r6, #1
+  ; _y++
+  ADD          r1, r1, #2
+  ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+  BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done
+  VMOV.32      r0, d30[0]
+  LDMFD        sp!, {r4-r6, pc}
+  ENDP
+
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+
+; This will get used on ARMv7 devices without NEON, so it has been optimized
+; to take advantage of dual-issuing where possible.
+xcorr_kernel_edsp PROC
+  ; input:
+  ;   r3      = int         len
+  ;   r4      = opus_val16 *_x (must be 32-bit aligned)
+  ;   r5      = opus_val16 *_y (must be 32-bit aligned)
+  ;   r6...r9 = opus_val32  sum[4]
+  ; output:
+  ;   r6...r9 = opus_val32  sum[4]
+  ; preserved: r0-r5
+  ; internal usage
+  ;   r2      = int         j
+  ;   r12,r14 = opus_val16  x[4]
+  ;   r10,r11 = opus_val16  y[4]
+  STMFD        sp!, {r2,r4,r5,lr}
+  LDR          r10, [r5], #4      ; Load y[0...1]
+  SUBS         r2, r3, #4         ; j = len-4
+  LDR          r11, [r5], #4      ; Load y[2...3]
+  BLE xcorr_kernel_edsp_process4_done
+  LDR          r12, [r4], #4      ; Load x[0...1]
+  ; Stall
+xcorr_kernel_edsp_process4
+  ; The multiplies must issue from pipeline 0, and can't dual-issue with each
+  ; other. Every other instruction here dual-issues with a multiply, and is
+  ; thus "free". There should be no stalls in the body of the loop.
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_0,y_0)
+  LDR          r14, [r4], #4      ; Load x[2...3]
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x_0,y_1)
+  SUBS         r2, r2, #4         ; j-=4
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_0,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x_0,y_3)
+  SMLATT       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_1,y_1)
+  LDR          r10, [r5], #4      ; Load y[4...5]
+  SMLATB       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],x_1,y_2)
+  SMLATT       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_1,y_3)
+  SMLATB       r9, r12, r10, r9   ; sum[3] = MAC16_16(sum[3],x_1,y_4)
+  LDRGT        r12, [r4], #4      ; Load x[0...1]
+  SMLABB       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_2,y_2)
+  SMLABT       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x_2,y_3)
+  SMLABB       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_2,y_4)
+  SMLABT       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x_2,y_5)
+  SMLATT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_3,y_3)
+  LDR          r11, [r5], #4      ; Load y[6...7]
+  SMLATB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],x_3,y_4)
+  SMLATT       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_3,y_5)
+  SMLATB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],x_3,y_6)
+  BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done
+  ADDS         r2, r2, #4
+  BLE xcorr_kernel_edsp_done
+  LDRH         r12, [r4], #2      ; r12 = *x++
+  SUBS         r2, r2, #1         ; j--
+  ; Stall
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_0)
+  LDRGTH       r14, [r4], #2      ; r14 = *x++
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x,y_1)
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x,y_3)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_1)
+  SUBS         r2, r2, #1         ; j--
+  SMLABB       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x,y_2)
+  LDRH         r10, [r5], #2      ; r10 = y_4 = *y++
+  SMLABT       r8, r14, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_3)
+  LDRGTH       r12, [r4], #2      ; r12 = *x++
+  SMLABB       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x,y_4)
+  BLE xcorr_kernel_edsp_done
+  SMLABB       r6, r12, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_2)
+  CMP          r2, #1             ; j--
+  SMLABT       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_3)
+  LDRH         r2, [r5], #2       ; r2 = y_5 = *y++
+  SMLABB       r8, r12, r10, r8   ; sum[2] = MAC16_16(sum[2],tmp,y_4)
+  LDRGTH       r14, [r4]          ; r14 = *x
+  SMLABB       r9, r12, r2, r9    ; sum[3] = MAC16_16(sum[3],tmp,y_5)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_3)
+  LDRH         r11, [r5]          ; r11 = y_6 = *y
+  SMLABB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_4)
+  SMLABB       r8, r14, r2, r8    ; sum[2] = MAC16_16(sum[2],tmp,y_5)
+  SMLABB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done
+  LDMFD        sp!, {r2,r4,r5,pc}
+  ENDP
+
+celt_pitch_xcorr_edsp PROC
+  ; input:
+  ;   r0  = opus_val16 *_x (must be 32-bit aligned)
+  ;   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = maxcorr
+  ; internal usage
+  ;   r4  = opus_val16 *x
+  ;   r5  = opus_val16 *y
+  ;   r6  = opus_val32  sum0
+  ;   r7  = opus_val32  sum1
+  ;   r8  = opus_val32  sum2
+  ;   r9  = opus_val32  sum3
+  ;   r1  = int         max_pitch
+  ;   r12 = int         j
+  STMFD        sp!, {r4-r11, lr}
+  MOV          r5, r1
+  LDR          r1, [sp, #36]
+  MOV          r4, r0
+  TST          r5, #3
+  ; maxcorr = 1
+  MOV          r0, #1
+  BEQ          celt_pitch_xcorr_edsp_process1u_done
+; Compute one sum at the start to make y 32-bit aligned.
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  LDRH         r8, [r5], #2
+  BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+  LDR          r6, [r4], #4
+  MOV          r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4
+  LDR          r9, [r5], #4
+  SMLABT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLATB       r14, r6, r9, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLABT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATB       r14, r7, r8, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGT        r6, [r4], #4
+  BGT celt_pitch_xcorr_edsp_process1u_loop4
+  MOV          r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done
+  ADDS         r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1
+  LDRGEH       r6, [r4], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14    ; sum = MAC16_16(sum, *x, *y)
+  SUBGES       r12, r12, #1
+  LDRGTH       r8, [r5], #2
+  BGT celt_pitch_xcorr_edsp_process1u_loop1
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ADD          r5, r5, #2
+  MOVLT        r0, r14
+  SUBS         r1, r1, #1
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+  SUBS         r1, r1, #4
+  BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4
+  ; xcorr_kernel_edsp parameters:
+  ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+  MOV          r6, #0
+  MOV          r7, #0
+  MOV          r8, #0
+  MOV          r9, #0
+  BL xcorr_kernel_edsp  ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+  ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+  CMP          r0, r6
+  ; _y+=4
+  ADD          r5, r5, #8
+  MOVLT        r0, r6
+  CMP          r0, r7
+  MOVLT        r0, r7
+  CMP          r0, r8
+  MOVLT        r0, r8
+  CMP          r0, r9
+  MOVLT        r0, r9
+  STMIA        r2!, {r6-r9}
+  SUBS         r1, r1, #4
+  BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2
+  ADDS         r1, r1, #2
+  BLT celt_pitch_xcorr_edsp_process1a
+  SUBS         r12, r3, #4
+  ; {r10, r11} = {sum0, sum1} = {0, 0}
+  MOV          r10, #0
+  MOV          r11, #0
+  LDR          r8, [r5], #4
+  BLE celt_pitch_xcorr_edsp_process2_loop_done
+  LDR          r6, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+  LDRGT        r6, [r4], #4
+  SMLABB       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_2, y_2)
+  SMLABT       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_2, y_3)
+  SMLATT       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_3, y_3)
+  LDRGT        r9, [r5], #4
+  SMLATB       r11, r7, r8, r11     ; sum1 = MAC16_16(sum1, x_3, y_4)
+  BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done
+  ADDS         r12, r12, #2
+  BLE  celt_pitch_xcorr_edsp_process2_1
+  LDR          r6, [r4], #4
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r9, [r5], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUB          r12, r12, #2
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  MOV          r8, r9
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1
+  LDRH         r6, [r4], #2
+  ADDS         r12, r12, #1
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDRGTH       r7, [r4], #2
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  BLE celt_pitch_xcorr_edsp_process2_done
+  LDRH         r9, [r5], #2
+  SMLABT       r10, r7, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_1)
+  SMLABB       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum0)
+  CMP          r0, r10
+  ADD          r5, r5, #2
+  MOVLT        r0, r10
+  SUB          r1, r1, #2
+  ; maxcorr = max(maxcorr, sum1)
+  CMP          r0, r11
+  ; xcorr[i] = sum
+  STR          r10, [r2], #4
+  MOVLT        r0, r11
+  STR          r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a
+  ADDS         r1, r1, #1
+  BLT celt_pitch_xcorr_edsp_done
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  BLT celt_pitch_xcorr_edsp_process1a_loop_done
+  LDR          r6, [r4], #4
+  LDR          r8, [r5], #4
+  LDR          r7, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4
+  SMLABB       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDRGE        r6, [r4], #4
+  SMLABB       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  LDRGE        r8, [r5], #4
+  SMLATT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGE        r7, [r4], #4
+  LDRGE        r9, [r5], #4
+  BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done
+  ADDS         r12, r12, #2
+  LDRGE        r6, [r4], #4
+  LDRGE        r8, [r5], #4
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBGE        r12, r12, #2
+  SMLATTGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  ADDS         r12, r12, #1
+  LDRGEH       r6, [r4], #2
+  LDRGEH       r8, [r5], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, *x, *y)
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  MOVLT        r0, r14
+celt_pitch_xcorr_edsp_done
+  LDMFD        sp!, {r4-r11, pc}
+  ENDP
+
+ENDIF
+
+END
diff --git a/src/main/jni/opus/celt/arm/fixed_armv4.h b/src/main/jni/opus/celt/arm/fixed_armv4.h
new file mode 100644
index 000000000..b690bc8ce
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/fixed_armv4.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 2013 Xiph.Org Foundation and contributors */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARMv4_H
+#define FIXED_ARMv4_H
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b),"r"(a<<16)
+  );
+  return rd_hi;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b))
+
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b), "r"(a<<16)
+  );
+  /*We intentionally don't OR in the high bit of rd_lo for speed.*/
+  return rd_hi<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
+
+
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#undef MULT32_32_Q31
+#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31)
+
+#endif
diff --git a/src/main/jni/opus/celt/arm/fixed_armv5e.h b/src/main/jni/opus/celt/arm/fixed_armv5e.h
new file mode 100644
index 000000000..1194a7d3e
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/fixed_armv5e.h
@@ -0,0 +1,116 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO
+   Copyright (C) 2013      Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARMv5E_H
+#define FIXED_ARMv5E_H
+
+#include "fixed_armv4.h"
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b),"r"(a)
+  );
+  return res;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))
+
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(a)
+  );
+  return res<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
+
+
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_32_Q15\n\t"
+      "smlawb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(b<<1), "r"(a), "r"(c)
+  );
+  return res;
+}
+#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
+
+/** 16x16 multiply-add where the result fits in 32 bits */
+#undef MAC16_16
+static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_16\n\t"
+      "smlabb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b), "r"(c)
+  );
+  return res;
+}
+#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b))
+
+/** 16x16 multiplication where the result fits in 32 bits */
+#undef MULT16_16
+static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_16\n\t"
+      "smulbb %0, %1, %2;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
+
+#endif
diff --git a/src/main/jni/opus/celt/arm/kiss_fft_armv4.h b/src/main/jni/opus/celt/arm/kiss_fft_armv4.h
new file mode 100644
index 000000000..e4faad6f2
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/kiss_fft_armv4.h
@@ -0,0 +1,121 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_ARMv4_H
+#define KISS_FFT_ARMv4_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef FIXED_POINT
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL4\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #17\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #15\n\t" \
+            "mov %[br], %[br], lsr #17\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #15\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MULC\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mr], r0, %[br]\n\t" \
+            "smlal %[tt], %[mr], r1, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mi], r1, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mi], r0, %[bi]\n\t" \
+            "orr %[mr], %[tt], %[mr], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mi], %[br], %[mi], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#endif /* FIXED_POINT */
+
+#endif /* KISS_FFT_ARMv4_H */
diff --git a/src/main/jni/opus/celt/arm/kiss_fft_armv5e.h b/src/main/jni/opus/celt/arm/kiss_fft_armv5e.h
new file mode 100644
index 000000000..9eca183d7
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/kiss_fft_armv5e.h
@@ -0,0 +1,118 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_ARMv5E_H
+#define KISS_FFT_ARMv5E_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef FIXED_POINT
+
+#if defined(__thumb__)||defined(__thumb2__)
+#define LDRD_CONS "Q"
+#else
+#define LDRD_CONS "Uq"
+#endif
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHL32(mi__, 1); \
+    } \
+    while(0)
+
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL4\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHR32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHR32(mi__, 1); \
+    } \
+    while(0)
+
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+        int mr__; \
+        int mi1__; \
+        int mi2__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MULC\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mr], %[aval], %[bval]\n\t" \
+            "smulwb %[mi1], %H[aval], %[bval]\n\t" \
+            "smulwt %[mi2], %[aval], %[bval]\n\t" \
+            "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \
+            : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(mr__, 1); \
+        (m).i = SHL32(SUB32(mi1__, mi2__), 1); \
+    } \
+    while(0)
+
+#endif /* FIXED_POINT */
+
+#endif /* KISS_FFT_GUTS_H */
diff --git a/src/main/jni/opus/celt/arm/pitch_arm.h b/src/main/jni/opus/celt/arm/pitch_arm.h
new file mode 100644
index 000000000..a07f8ac2f
--- /dev/null
+++ b/src/main/jni/opus/celt/arm/pitch_arm.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(PITCH_ARM_H)
+# define PITCH_ARM_H
+
+# include "armcpu.h"
+
+# if defined(FIXED_POINT)
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
+opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch);
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#   define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
+opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch);
+#  endif
+
+#  if !defined(OPUS_HAVE_RTCD)
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
+#  endif
+
+# endif
+
+#endif
diff --git a/src/main/jni/opus/celt/bands.c b/src/main/jni/opus/celt/bands.c
new file mode 100644
index 000000000..cce56e2f6
--- /dev/null
+++ b/src/main/jni/opus/celt/bands.c
@@ -0,0 +1,1518 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008-2009 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include "bands.h"
+#include "modes.h"
+#include "vq.h"
+#include "cwrs.h"
+#include "stack_alloc.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "rate.h"
+#include "quant_bands.h"
+#include "pitch.h"
+
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
+{
+   int i;
+   for (i=0;i<N;i++)
+   {
+      if (val < thresholds[i])
+         break;
+   }
+   if (i>prev && val < thresholds[prev]+hysteresis[prev])
+      i=prev;
+   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
+      i=prev;
+   return i;
+}
+
+opus_uint32 celt_lcg_rand(opus_uint32 seed)
+{
+   return 1664525 * seed + 1013904223;
+}
+
+/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
+   with this approximation is important because it has an impact on the bit allocation */
+static opus_int16 bitexact_cos(opus_int16 x)
+{
+   opus_int32 tmp;
+   opus_int16 x2;
+   tmp = (4096+((opus_int32)(x)*(x)))>>13;
+   celt_assert(tmp<=32767);
+   x2 = tmp;
+   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
+   celt_assert(x2<=32766);
+   return 1+x2;
+}
+
+static int bitexact_log2tan(int isin,int icos)
+{
+   int lc;
+   int ls;
+   lc=EC_ILOG(icos);
+   ls=EC_ILOG(isin);
+   icos<<=15-lc;
+   isin<<=15-ls;
+   return (ls-lc)*(1<<11)
+         +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
+         -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
+}
+
+#ifdef FIXED_POINT
+/* Compute the amplitude (sqrt energy) in each of the bands */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j;
+         opus_val32 maxval=0;
+         opus_val32 sum = 0;
+
+         j=M*eBands[i]; do {
+            maxval = MAX32(maxval, X[j+c*N]);
+            maxval = MAX32(maxval, -X[j+c*N]);
+         } while (++j<M*eBands[i+1]);
+
+         if (maxval > 0)
+         {
+            int shift = celt_ilog2(maxval)-10;
+            j=M*eBands[i]; do {
+               sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
+                                   EXTRACT16(VSHR32(X[j+c*N],shift)));
+            } while (++j<M*eBands[i+1]);
+            /* We're adding one here to ensure the normalized band isn't larger than unity norm */
+            bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
+         } else {
+            bandE[i+c*m->nbEBands] = EPSILON;
+         }
+         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
+      }
+   } while (++c<C);
+   /*printf ("\n");*/
+}
+
+/* Normalise each band such that the energy is one. */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      i=0; do {
+         opus_val16 g;
+         int j,shift;
+         opus_val16 E;
+         shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
+         E = VSHR32(bandE[i+c*m->nbEBands], shift);
+         g = EXTRACT16(celt_rcp(SHL32(E,3)));
+         j=M*eBands[i]; do {
+            X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
+         } while (++j<M*eBands[i+1]);
+      } while (++i<end);
+   } while (++c<C);
+}
+
+#else /* FIXED_POINT */
+/* Compute the amplitude (sqrt energy) in each of the bands */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j;
+         opus_val32 sum = 1e-27f;
+         for (j=M*eBands[i];j<M*eBands[i+1];j++)
+            sum += X[j+c*N]*X[j+c*N];
+         bandE[i+c*m->nbEBands] = celt_sqrt(sum);
+         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
+      }
+   } while (++c<C);
+   /*printf ("\n");*/
+}
+
+/* Normalise each band such that the energy is one. */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j;
+         opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
+         for (j=M*eBands[i];j<M*eBands[i+1];j++)
+            X[j+c*N] = freq[j+c*N]*g;
+      }
+   } while (++c<C);
+}
+
+#endif /* FIXED_POINT */
+
+/* De-normalise the energy to produce the synthesis from the unit-energy bands */
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
+   c=0; do {
+      celt_sig * OPUS_RESTRICT f;
+      const celt_norm * OPUS_RESTRICT x;
+      f = freq+c*N;
+      x = X+c*N+M*eBands[start];
+      for (i=0;i<M*eBands[start];i++)
+         *f++ = 0;
+      for (i=start;i<end;i++)
+      {
+         int j, band_end;
+         opus_val16 g;
+         opus_val16 lg;
+#ifdef FIXED_POINT
+         int shift;
+#endif
+         j=M*eBands[i];
+         band_end = M*eBands[i+1];
+         lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+#ifndef FIXED_POINT
+         g = celt_exp2(lg);
+#else
+         /* Handle the integer part of the log energy */
+         shift = 16-(lg>>DB_SHIFT);
+         if (shift>31)
+         {
+            shift=0;
+            g=0;
+         } else {
+            /* Handle the fractional part. */
+            g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+         }
+         /* Handle extreme gains with negative shift. */
+         if (shift<0)
+         {
+            /* For shift < -2 we'd be likely to overflow, so we're capping
+               the gain here. This shouldn't happen unless the bitstream is
+               already corrupted. */
+            if (shift < -2)
+            {
+               g = 32767;
+               shift = -2;
+            }
+            do {
+               *f++ = SHL32(MULT16_16(*x++, g), -shift);
+            } while (++j<band_end);
+         } else
+#endif
+         /* Be careful of the fixed-point "else" just above when changing this code */
+         do {
+            *f++ = SHR32(MULT16_16(*x++, g), shift);
+         } while (++j<band_end);
+      }
+      celt_assert(start <= end);
+      for (i=M*eBands[end];i<N;i++)
+         *f++ = 0;
+   } while (++c<C);
+}
+
+/* This prevents energy collapse for transients with multiple short MDCTs */
+void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
+      int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
+      opus_val16 *prev2logE, int *pulses, opus_uint32 seed)
+{
+   int c, i, j, k;
+   for (i=start;i<end;i++)
+   {
+      int N0;
+      opus_val16 thresh, sqrt_1;
+      int depth;
+#ifdef FIXED_POINT
+      int shift;
+      opus_val32 thresh32;
+#endif
+
+      N0 = m->eBands[i+1]-m->eBands[i];
+      /* depth in 1/8 bits */
+      depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM);
+
+#ifdef FIXED_POINT
+      thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
+      thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
+      {
+         opus_val32 t;
+         t = N0<<LM;
+         shift = celt_ilog2(t)>>1;
+         t = SHL32(t, (7-shift)<<1);
+         sqrt_1 = celt_rsqrt_norm(t);
+      }
+#else
+      thresh = .5f*celt_exp2(-.125f*depth);
+      sqrt_1 = celt_rsqrt(N0<<LM);
+#endif
+
+      c=0; do
+      {
+         celt_norm *X;
+         opus_val16 prev1;
+         opus_val16 prev2;
+         opus_val32 Ediff;
+         opus_val16 r;
+         int renormalize=0;
+         prev1 = prev1logE[c*m->nbEBands+i];
+         prev2 = prev2logE[c*m->nbEBands+i];
+         if (C==1)
+         {
+            prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
+            prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
+         }
+         Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
+         Ediff = MAX32(0, Ediff);
+
+#ifdef FIXED_POINT
+         if (Ediff < 16384)
+         {
+            opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
+            r = 2*MIN16(16383,r32);
+         } else {
+            r = 0;
+         }
+         if (LM==3)
+            r = MULT16_16_Q14(23170, MIN32(23169, r));
+         r = SHR16(MIN16(thresh, r),1);
+         r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
+#else
+         /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
+            short blocks don't have the same energy as long */
+         r = 2.f*celt_exp2(-Ediff);
+         if (LM==3)
+            r *= 1.41421356f;
+         r = MIN16(thresh, r);
+         r = r*sqrt_1;
+#endif
+         X = X_+c*size+(m->eBands[i]<<LM);
+         for (k=0;k<1<<LM;k++)
+         {
+            /* Detect collapse */
+            if (!(collapse_masks[i*C+c]&1<<k))
+            {
+               /* Fill with noise */
+               for (j=0;j<N0;j++)
+               {
+                  seed = celt_lcg_rand(seed);
+                  X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
+               }
+               renormalize = 1;
+            }
+         }
+         /* We just added some energy, so we need to renormalise */
+         if (renormalize)
+            renormalise_vector(X, N0<<LM, Q15ONE);
+      } while (++c<C);
+   }
+}
+
+static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N)
+{
+   int i = bandID;
+   int j;
+   opus_val16 a1, a2;
+   opus_val16 left, right;
+   opus_val16 norm;
+#ifdef FIXED_POINT
+   int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
+#endif
+   left = VSHR32(bandE[i],shift);
+   right = VSHR32(bandE[i+m->nbEBands],shift);
+   norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
+   a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
+   a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
+   for (j=0;j<N;j++)
+   {
+      celt_norm r, l;
+      l = X[j];
+      r = Y[j];
+      X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
+      /* Side is not encoded, no need to calculate */
+   }
+}
+
+static void stereo_split(celt_norm *X, celt_norm *Y, int N)
+{
+   int j;
+   for (j=0;j<N;j++)
+   {
+      celt_norm r, l;
+      l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
+      r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
+      X[j] = l+r;
+      Y[j] = r-l;
+   }
+}
+
+static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
+{
+   int j;
+   opus_val32 xp=0, side=0;
+   opus_val32 El, Er;
+   opus_val16 mid2;
+#ifdef FIXED_POINT
+   int kl, kr;
+#endif
+   opus_val32 t, lgain, rgain;
+
+   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
+   dual_inner_prod(Y, X, Y, N, &xp, &side);
+   /* Compensating for the mid normalization */
+   xp = MULT16_32_Q15(mid, xp);
+   /* mid and side are in Q15, not Q14 like X and Y */
+   mid2 = SHR32(mid, 1);
+   El = MULT16_16(mid2, mid2) + side - 2*xp;
+   Er = MULT16_16(mid2, mid2) + side + 2*xp;
+   if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
+   {
+      for (j=0;j<N;j++)
+         Y[j] = X[j];
+      return;
+   }
+
+#ifdef FIXED_POINT
+   kl = celt_ilog2(El)>>1;
+   kr = celt_ilog2(Er)>>1;
+#endif
+   t = VSHR32(El, (kl-7)<<1);
+   lgain = celt_rsqrt_norm(t);
+   t = VSHR32(Er, (kr-7)<<1);
+   rgain = celt_rsqrt_norm(t);
+
+#ifdef FIXED_POINT
+   if (kl < 7)
+      kl = 7;
+   if (kr < 7)
+      kr = 7;
+#endif
+
+   for (j=0;j<N;j++)
+   {
+      celt_norm r, l;
+      /* Apply mid scaling (side is already scaled) */
+      l = MULT16_16_Q15(mid, X[j]);
+      r = Y[j];
+      X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
+      Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
+   }
+}
+
+/* Decide whether we should spread the pulses in the current frame */
+int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
+      int end, int C, int M)
+{
+   int i, c, N0;
+   int sum = 0, nbBands=0;
+   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+   int decision;
+   int hf_sum=0;
+
+   celt_assert(end>0);
+
+   N0 = M*m->shortMdctSize;
+
+   if (M*(eBands[end]-eBands[end-1]) <= 8)
+      return SPREAD_NONE;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j, N, tmp=0;
+         int tcount[3] = {0,0,0};
+         celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
+         N = M*(eBands[i+1]-eBands[i]);
+         if (N<=8)
+            continue;
+         /* Compute rough CDF of |x[j]| */
+         for (j=0;j<N;j++)
+         {
+            opus_val32 x2N; /* Q13 */
+
+            x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
+            if (x2N < QCONST16(0.25f,13))
+               tcount[0]++;
+            if (x2N < QCONST16(0.0625f,13))
+               tcount[1]++;
+            if (x2N < QCONST16(0.015625f,13))
+               tcount[2]++;
+         }
+
+         /* Only include four last bands (8 kHz and up) */
+         if (i>m->nbEBands-4)
+            hf_sum += 32*(tcount[1]+tcount[0])/N;
+         tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
+         sum += tmp*256;
+         nbBands++;
+      }
+   } while (++c<C);
+
+   if (update_hf)
+   {
+      if (hf_sum)
+         hf_sum /= C*(4-m->nbEBands+end);
+      *hf_average = (*hf_average+hf_sum)>>1;
+      hf_sum = *hf_average;
+      if (*tapset_decision==2)
+         hf_sum += 4;
+      else if (*tapset_decision==0)
+         hf_sum -= 4;
+      if (hf_sum > 22)
+         *tapset_decision=2;
+      else if (hf_sum > 18)
+         *tapset_decision=1;
+      else
+         *tapset_decision=0;
+   }
+   /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
+   celt_assert(nbBands>0); /* end has to be non-zero */
+   sum /= nbBands;
+   /* Recursive averaging */
+   sum = (sum+*average)>>1;
+   *average = sum;
+   /* Hysteresis */
+   sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
+   if (sum < 80)
+   {
+      decision = SPREAD_AGGRESSIVE;
+   } else if (sum < 256)
+   {
+      decision = SPREAD_NORMAL;
+   } else if (sum < 384)
+   {
+      decision = SPREAD_LIGHT;
+   } else {
+      decision = SPREAD_NONE;
+   }
+#ifdef FUZZING
+   decision = rand()&0x3;
+   *tapset_decision=rand()%3;
+#endif
+   return decision;
+}
+
+/* Indexing table for converting from natural Hadamard to ordery Hadamard
+   This is essentially a bit-reversed Gray, on top of which we've added
+   an inversion of the order because we want the DC at the end rather than
+   the beginning. The lines are for N=2, 4, 8, 16 */
+static const int ordery_table[] = {
+       1,  0,
+       3,  0,  2,  1,
+       7,  0,  4,  3,  6,  1,  5,  2,
+      15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
+};
+
+static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
+{
+   int i,j;
+   VARDECL(celt_norm, tmp);
+   int N;
+   SAVE_STACK;
+   N = N0*stride;
+   ALLOC(tmp, N, celt_norm);
+   celt_assert(stride>0);
+   if (hadamard)
+   {
+      const int *ordery = ordery_table+stride-2;
+      for (i=0;i<stride;i++)
+      {
+         for (j=0;j<N0;j++)
+            tmp[ordery[i]*N0+j] = X[j*stride+i];
+      }
+   } else {
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[i*N0+j] = X[j*stride+i];
+   }
+   for (j=0;j<N;j++)
+      X[j] = tmp[j];
+   RESTORE_STACK;
+}
+
+static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
+{
+   int i,j;
+   VARDECL(celt_norm, tmp);
+   int N;
+   SAVE_STACK;
+   N = N0*stride;
+   ALLOC(tmp, N, celt_norm);
+   if (hadamard)
+   {
+      const int *ordery = ordery_table+stride-2;
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[j*stride+i] = X[ordery[i]*N0+j];
+   } else {
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[j*stride+i] = X[i*N0+j];
+   }
+   for (j=0;j<N;j++)
+      X[j] = tmp[j];
+   RESTORE_STACK;
+}
+
+void haar1(celt_norm *X, int N0, int stride)
+{
+   int i, j;
+   N0 >>= 1;
+   for (i=0;i<stride;i++)
+      for (j=0;j<N0;j++)
+      {
+         celt_norm tmp1, tmp2;
+         tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
+         tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
+         X[stride*2*j+i] = tmp1 + tmp2;
+         X[stride*(2*j+1)+i] = tmp1 - tmp2;
+      }
+}
+
+static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
+{
+   static const opus_int16 exp2_table8[8] =
+      {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
+   int qn, qb;
+   int N2 = 2*N-1;
+   if (stereo && N==2)
+      N2--;
+   /* The upper limit ensures that in a stereo split with itheta==16384, we'll
+       always have enough bits left over to code at least one pulse in the
+       side; otherwise it would collapse, since it doesn't get folded. */
+   qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
+
+   qb = IMIN(8<<BITRES, qb);
+
+   if (qb<(1<<BITRES>>1)) {
+      qn = 1;
+   } else {
+      qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
+      qn = (qn+1)>>1<<1;
+   }
+   celt_assert(qn <= 256);
+   return qn;
+}
+
+struct band_ctx {
+   int encode;
+   const CELTMode *m;
+   int i;
+   int intensity;
+   int spread;
+   int tf_change;
+   ec_ctx *ec;
+   opus_int32 remaining_bits;
+   const celt_ener *bandE;
+   opus_uint32 seed;
+};
+
+struct split_ctx {
+   int inv;
+   int imid;
+   int iside;
+   int delta;
+   int itheta;
+   int qalloc;
+};
+
+static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
+      celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
+      int LM,
+      int stereo, int *fill)
+{
+   int qn;
+   int itheta=0;
+   int delta;
+   int imid, iside;
+   int qalloc;
+   int pulse_cap;
+   int offset;
+   opus_int32 tell;
+   int inv=0;
+   int encode;
+   const CELTMode *m;
+   int i;
+   int intensity;
+   ec_ctx *ec;
+   const celt_ener *bandE;
+
+   encode = ctx->encode;
+   m = ctx->m;
+   i = ctx->i;
+   intensity = ctx->intensity;
+   ec = ctx->ec;
+   bandE = ctx->bandE;
+
+   /* Decide on the resolution to give to the split parameter theta */
+   pulse_cap = m->logN[i]+LM*(1<<BITRES);
+   offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
+   qn = compute_qn(N, *b, offset, pulse_cap, stereo);
+   if (stereo && i>=intensity)
+      qn = 1;
+   if (encode)
+   {
+      /* theta is the atan() of the ratio between the (normalized)
+         side and mid. With just that parameter, we can re-scale both
+         mid and side because we know that 1) they have unit norm and
+         2) they are orthogonal. */
+      itheta = stereo_itheta(X, Y, stereo, N);
+   }
+   tell = ec_tell_frac(ec);
+   if (qn!=1)
+   {
+      if (encode)
+         itheta = (itheta*qn+8192)>>14;
+
+      /* Entropy coding of the angle. We use a uniform pdf for the
+         time split, a step for stereo, and a triangular one for the rest. */
+      if (stereo && N>2)
+      {
+         int p0 = 3;
+         int x = itheta;
+         int x0 = qn/2;
+         int ft = p0*(x0+1) + x0;
+         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
+         if (encode)
+         {
+            ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+         } else {
+            int fs;
+            fs=ec_decode(ec,ft);
+            if (fs<(x0+1)*p0)
+               x=fs/p0;
+            else
+               x=x0+1+(fs-(x0+1)*p0);
+            ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+            itheta = x;
+         }
+      } else if (B0>1 || stereo) {
+         /* Uniform pdf */
+         if (encode)
+            ec_enc_uint(ec, itheta, qn+1);
+         else
+            itheta = ec_dec_uint(ec, qn+1);
+      } else {
+         int fs=1, ft;
+         ft = ((qn>>1)+1)*((qn>>1)+1);
+         if (encode)
+         {
+            int fl;
+
+            fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
+            fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
+             ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+
+            ec_encode(ec, fl, fl+fs, ft);
+         } else {
+            /* Triangular pdf */
+            int fl=0;
+            int fm;
+            fm = ec_decode(ec, ft);
+
+            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
+            {
+               itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
+               fs = itheta + 1;
+               fl = itheta*(itheta + 1)>>1;
+            }
+            else
+            {
+               itheta = (2*(qn + 1)
+                - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
+               fs = qn + 1 - itheta;
+               fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+            }
+
+            ec_dec_update(ec, fl, fl+fs, ft);
+         }
+      }
+      itheta = (opus_int32)itheta*16384/qn;
+      if (encode && stereo)
+      {
+         if (itheta==0)
+            intensity_stereo(m, X, Y, bandE, i, N);
+         else
+            stereo_split(X, Y, N);
+      }
+      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
+               Let's do that at higher complexity */
+   } else if (stereo) {
+      if (encode)
+      {
+         inv = itheta > 8192;
+         if (inv)
+         {
+            int j;
+            for (j=0;j<N;j++)
+               Y[j] = -Y[j];
+         }
+         intensity_stereo(m, X, Y, bandE, i, N);
+      }
+      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
+      {
+         if (encode)
+            ec_enc_bit_logp(ec, inv, 2);
+         else
+            inv = ec_dec_bit_logp(ec, 2);
+      } else
+         inv = 0;
+      itheta = 0;
+   }
+   qalloc = ec_tell_frac(ec) - tell;
+   *b -= qalloc;
+
+   if (itheta == 0)
+   {
+      imid = 32767;
+      iside = 0;
+      *fill &= (1<<B)-1;
+      delta = -16384;
+   } else if (itheta == 16384)
+   {
+      imid = 0;
+      iside = 32767;
+      *fill &= ((1<<B)-1)<<B;
+      delta = 16384;
+   } else {
+      imid = bitexact_cos((opus_int16)itheta);
+      iside = bitexact_cos((opus_int16)(16384-itheta));
+      /* This is the mid vs side allocation that minimizes squared error
+         in that band. */
+      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
+   }
+
+   sctx->inv = inv;
+   sctx->imid = imid;
+   sctx->iside = iside;
+   sctx->delta = delta;
+   sctx->itheta = itheta;
+   sctx->qalloc = qalloc;
+}
+static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
+      celt_norm *lowband_out)
+{
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int c;
+   int stereo;
+   celt_norm *x = X;
+   int encode;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   ec = ctx->ec;
+
+   stereo = Y != NULL;
+   c=0; do {
+      int sign=0;
+      if (ctx->remaining_bits>=1<<BITRES)
+      {
+         if (encode)
+         {
+            sign = x[0]<0;
+            ec_enc_bits(ec, sign, 1);
+         } else {
+            sign = ec_dec_bits(ec, 1);
+         }
+         ctx->remaining_bits -= 1<<BITRES;
+         b-=1<<BITRES;
+      }
+      if (resynth)
+         x[0] = sign ? -NORM_SCALING : NORM_SCALING;
+      x = Y;
+   } while (++c<1+stereo);
+   if (lowband_out)
+      lowband_out[0] = SHR16(X[0],4);
+   return 1;
+}
+
+/* This function is responsible for encoding and decoding a mono partition.
+   It can split the band in two and transmit the energy difference with
+   the two half-bands. It can be called recursively so bands can end up being
+   split in 8 parts. */
+static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM,
+      opus_val16 gain, int fill)
+{
+   const unsigned char *cache;
+   int q;
+   int curr_bits;
+   int imid=0, iside=0;
+   int B0=B;
+   opus_val16 mid=0, side=0;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   celt_norm *Y=NULL;
+   int encode;
+   const CELTMode *m;
+   int i;
+   int spread;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   m = ctx->m;
+   i = ctx->i;
+   spread = ctx->spread;
+   ec = ctx->ec;
+
+   /* If we need 1.5 more bit than we can produce, split the band in two. */
+   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
+   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
+   {
+      int mbits, sbits, delta;
+      int itheta;
+      int qalloc;
+      struct split_ctx sctx;
+      celt_norm *next_lowband2=NULL;
+      opus_int32 rebalance;
+
+      N >>= 1;
+      Y = X+N;
+      LM -= 1;
+      if (B==1)
+         fill = (fill&1)|(fill<<1);
+      B = (B+1)>>1;
+
+      compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
+            LM, 0, &fill);
+      imid = sctx.imid;
+      iside = sctx.iside;
+      delta = sctx.delta;
+      itheta = sctx.itheta;
+      qalloc = sctx.qalloc;
+#ifdef FIXED_POINT
+      mid = imid;
+      side = iside;
+#else
+      mid = (1.f/32768)*imid;
+      side = (1.f/32768)*iside;
+#endif
+
+      /* Give more bits to low-energy MDCTs than they would otherwise deserve */
+      if (B0>1 && (itheta&0x3fff))
+      {
+         if (itheta > 8192)
+            /* Rough approximation for pre-echo masking */
+            delta -= delta>>(4-LM);
+         else
+            /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
+            delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
+      }
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+
+      if (lowband)
+         next_lowband2 = lowband+N; /* >32-bit split case */
+
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         cm = quant_partition(ctx, X, N, mbits, B,
+               lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, Y, N, sbits, B,
+               next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+      } else {
+         cm = quant_partition(ctx, Y, N, sbits, B,
+               next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, X, N, mbits, B,
+               lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+      }
+   } else {
+      /* This is the basic no-split case */
+      q = bits2pulses(m, i, LM, b);
+      curr_bits = pulses2bits(m, i, LM, q);
+      ctx->remaining_bits -= curr_bits;
+
+      /* Ensures we can never bust the budget */
+      while (ctx->remaining_bits < 0 && q > 0)
+      {
+         ctx->remaining_bits += curr_bits;
+         q--;
+         curr_bits = pulses2bits(m, i, LM, q);
+         ctx->remaining_bits -= curr_bits;
+      }
+
+      if (q!=0)
+      {
+         int K = get_pulses(q);
+
+         /* Finally do the actual quantization */
+         if (encode)
+         {
+            cm = alg_quant(X, N, K, spread, B, ec
+#ifdef RESYNTH
+                 , gain
+#endif
+                 );
+         } else {
+            cm = alg_unquant(X, N, K, spread, B, ec, gain);
+         }
+      } else {
+         /* If there's no pulse, fill the band anyway */
+         int j;
+         if (resynth)
+         {
+            unsigned cm_mask;
+            /* B can be as large as 16, so this shift might overflow an int on a
+               16-bit platform; use a long to get defined behavior.*/
+            cm_mask = (unsigned)(1UL<<B)-1;
+            fill &= cm_mask;
+            if (!fill)
+            {
+               for (j=0;j<N;j++)
+                  X[j] = 0;
+            } else {
+               if (lowband == NULL)
+               {
+                  /* Noise */
+                  for (j=0;j<N;j++)
+                  {
+                     ctx->seed = celt_lcg_rand(ctx->seed);
+                     X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
+                  }
+                  cm = cm_mask;
+               } else {
+                  /* Folded spectrum */
+                  for (j=0;j<N;j++)
+                  {
+                     opus_val16 tmp;
+                     ctx->seed = celt_lcg_rand(ctx->seed);
+                     /* About 48 dB below the "normal" folding level */
+                     tmp = QCONST16(1.0f/256, 10);
+                     tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
+                     X[j] = lowband[j]+tmp;
+                  }
+                  cm = fill;
+               }
+               renormalise_vector(X, N, gain);
+            }
+         }
+      }
+   }
+
+   return cm;
+}
+
+
+/* This function is responsible for encoding and decoding a band for the mono case. */
+static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM, celt_norm *lowband_out,
+      opus_val16 gain, celt_norm *lowband_scratch, int fill)
+{
+   int N0=N;
+   int N_B=N;
+   int N_B0;
+   int B0=B;
+   int time_divide=0;
+   int recombine=0;
+   int longBlocks;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int k;
+   int encode;
+   int tf_change;
+
+   encode = ctx->encode;
+   tf_change = ctx->tf_change;
+
+   longBlocks = B0==1;
+
+   N_B /= B;
+
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, NULL, b, lowband_out);
+   }
+
+   if (tf_change>0)
+      recombine = tf_change;
+   /* Band recombining to increase frequency resolution */
+
+   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
+   {
+      int j;
+      for (j=0;j<N;j++)
+         lowband_scratch[j] = lowband[j];
+      lowband = lowband_scratch;
+   }
+
+   for (k=0;k<recombine;k++)
+   {
+      static const unsigned char bit_interleave_table[16]={
+            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
+      };
+      if (encode)
+         haar1(X, N>>k, 1<<k);
+      if (lowband)
+         haar1(lowband, N>>k, 1<<k);
+      fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
+   }
+   B>>=recombine;
+   N_B<<=recombine;
+
+   /* Increasing the time resolution */
+   while ((N_B&1) == 0 && tf_change<0)
+   {
+      if (encode)
+         haar1(X, N_B, B);
+      if (lowband)
+         haar1(lowband, N_B, B);
+      fill |= fill<<B;
+      B <<= 1;
+      N_B >>= 1;
+      time_divide++;
+      tf_change++;
+   }
+   B0=B;
+   N_B0 = N_B;
+
+   /* Reorganize the samples in time order instead of frequency order */
+   if (B0>1)
+   {
+      if (encode)
+         deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      if (lowband)
+         deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
+   }
+
+   cm = quant_partition(ctx, X, N, b, B, lowband,
+         LM, gain, fill);
+
+   /* This code is used by the decoder and by the resynthesis-enabled encoder */
+   if (resynth)
+   {
+      /* Undo the sample reorganization going from time order to frequency order */
+      if (B0>1)
+         interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+
+      /* Undo time-freq changes that we did earlier */
+      N_B = N_B0;
+      B = B0;
+      for (k=0;k<time_divide;k++)
+      {
+         B >>= 1;
+         N_B <<= 1;
+         cm |= cm>>B;
+         haar1(X, N_B, B);
+      }
+
+      for (k=0;k<recombine;k++)
+      {
+         static const unsigned char bit_deinterleave_table[16]={
+               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
+               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
+         };
+         cm = bit_deinterleave_table[cm];
+         haar1(X, N0>>k, 1<<k);
+      }
+      B<<=recombine;
+
+      /* Scale output for later folding */
+      if (lowband_out)
+      {
+         int j;
+         opus_val16 n;
+         n = celt_sqrt(SHL32(EXTEND32(N0),22));
+         for (j=0;j<N0;j++)
+            lowband_out[j] = MULT16_16_Q15(n,X[j]);
+      }
+      cm &= (1<<B)-1;
+   }
+   return cm;
+}
+
+
+/* This function is responsible for encoding and decoding a band for the stereo case. */
+static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
+      int N, int b, int B, celt_norm *lowband,
+      int LM, celt_norm *lowband_out,
+      celt_norm *lowband_scratch, int fill)
+{
+   int imid=0, iside=0;
+   int inv = 0;
+   opus_val16 mid=0, side=0;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int mbits, sbits, delta;
+   int itheta;
+   int qalloc;
+   struct split_ctx sctx;
+   int orig_fill;
+   int encode;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   ec = ctx->ec;
+
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, Y, b, lowband_out);
+   }
+
+   orig_fill = fill;
+
+   compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
+         LM, 1, &fill);
+   inv = sctx.inv;
+   imid = sctx.imid;
+   iside = sctx.iside;
+   delta = sctx.delta;
+   itheta = sctx.itheta;
+   qalloc = sctx.qalloc;
+#ifdef FIXED_POINT
+   mid = imid;
+   side = iside;
+#else
+   mid = (1.f/32768)*imid;
+   side = (1.f/32768)*iside;
+#endif
+
+   /* This is a special case for N=2 that only works for stereo and takes
+      advantage of the fact that mid and side are orthogonal to encode
+      the side with just one bit. */
+   if (N==2)
+   {
+      int c;
+      int sign=0;
+      celt_norm *x2, *y2;
+      mbits = b;
+      sbits = 0;
+      /* Only need one bit for the side. */
+      if (itheta != 0 && itheta != 16384)
+         sbits = 1<<BITRES;
+      mbits -= sbits;
+      c = itheta > 8192;
+      ctx->remaining_bits -= qalloc+sbits;
+
+      x2 = c ? Y : X;
+      y2 = c ? X : Y;
+      if (sbits)
+      {
+         if (encode)
+         {
+            /* Here we only need to encode a sign for the side. */
+            sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
+            ec_enc_bits(ec, sign, 1);
+         } else {
+            sign = ec_dec_bits(ec, 1);
+         }
+      }
+      sign = 1-2*sign;
+      /* We use orig_fill here because we want to fold the side, but if
+         itheta==16384, we'll have cleared the low bits of fill. */
+      cm = quant_band(ctx, x2, N, mbits, B, lowband,
+            LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
+      /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
+         and there's no need to worry about mixing with the other channel. */
+      y2[0] = -sign*x2[1];
+      y2[1] = sign*x2[0];
+      if (resynth)
+      {
+         celt_norm tmp;
+         X[0] = MULT16_16_Q15(mid, X[0]);
+         X[1] = MULT16_16_Q15(mid, X[1]);
+         Y[0] = MULT16_16_Q15(side, Y[0]);
+         Y[1] = MULT16_16_Q15(side, Y[1]);
+         tmp = X[0];
+         X[0] = SUB16(tmp,Y[0]);
+         Y[0] = ADD16(tmp,Y[0]);
+         tmp = X[1];
+         X[1] = SUB16(tmp,Y[1]);
+         Y[1] = ADD16(tmp,Y[1]);
+      }
+   } else {
+      /* "Normal" split code */
+      opus_int32 rebalance;
+
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm = quant_band(ctx, X, N, mbits, B,
+               lowband, LM, lowband_out,
+               Q15ONE, lowband_scratch, fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm |= quant_band(ctx, Y, N, sbits, B,
+               NULL, LM, NULL,
+               side, NULL, fill>>B);
+      } else {
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm = quant_band(ctx, Y, N, sbits, B,
+               NULL, LM, NULL,
+               side, NULL, fill>>B);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm |= quant_band(ctx, X, N, mbits, B,
+               lowband, LM, lowband_out,
+               Q15ONE, lowband_scratch, fill);
+      }
+   }
+
+
+   /* This code is used by the decoder and by the resynthesis-enabled encoder */
+   if (resynth)
+   {
+      if (N!=2)
+         stereo_merge(X, Y, mid, N);
+      if (inv)
+      {
+         int j;
+         for (j=0;j<N;j++)
+            Y[j] = -Y[j];
+      }
+   }
+   return cm;
+}
+
+
+void quant_all_bands(int encode, const CELTMode *m, int start, int end,
+      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
+      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
+      opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed)
+{
+   int i;
+   opus_int32 remaining_bits;
+   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+   celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
+   VARDECL(celt_norm, _norm);
+   celt_norm *lowband_scratch;
+   int B;
+   int M;
+   int lowband_offset;
+   int update_lowband = 1;
+   int C = Y_ != NULL ? 2 : 1;
+   int norm_offset;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !encode;
+#endif
+   struct band_ctx ctx;
+   SAVE_STACK;
+
+   M = 1<<LM;
+   B = shortBlocks ? M : 1;
+   norm_offset = M*eBands[start];
+   /* No need to allocate norm for the last band because we don't need an
+      output in that band. */
+   ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
+   norm = _norm;
+   norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
+   /* We can use the last band as scratch space because we don't need that
+      scratch space for the last band. */
+   lowband_scratch = X_+M*eBands[m->nbEBands-1];
+
+   lowband_offset = 0;
+   ctx.bandE = bandE;
+   ctx.ec = ec;
+   ctx.encode = encode;
+   ctx.intensity = intensity;
+   ctx.m = m;
+   ctx.seed = *seed;
+   ctx.spread = spread;
+   for (i=start;i<end;i++)
+   {
+      opus_int32 tell;
+      int b;
+      int N;
+      opus_int32 curr_balance;
+      int effective_lowband=-1;
+      celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
+      int tf_change=0;
+      unsigned x_cm;
+      unsigned y_cm;
+      int last;
+
+      ctx.i = i;
+      last = (i==end-1);
+
+      X = X_+M*eBands[i];
+      if (Y_!=NULL)
+         Y = Y_+M*eBands[i];
+      else
+         Y = NULL;
+      N = M*eBands[i+1]-M*eBands[i];
+      tell = ec_tell_frac(ec);
+
+      /* Compute how many bits we want to allocate to this band */
+      if (i != start)
+         balance -= tell;
+      remaining_bits = total_bits-tell-1;
+      ctx.remaining_bits = remaining_bits;
+      if (i <= codedBands-1)
+      {
+         curr_balance = balance / IMIN(3, codedBands-i);
+         b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
+      } else {
+         b = 0;
+      }
+
+      if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
+            lowband_offset = i;
+
+      tf_change = tf_res[i];
+      ctx.tf_change = tf_change;
+      if (i>=m->effEBands)
+      {
+         X=norm;
+         if (Y_!=NULL)
+            Y = norm;
+         lowband_scratch = NULL;
+      }
+      if (i==end-1)
+         lowband_scratch = NULL;
+
+      /* Get a conservative estimate of the collapse_mask's for the bands we're
+         going to be folding from. */
+      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
+      {
+         int fold_start;
+         int fold_end;
+         int fold_i;
+         /* This ensures we never repeat spectral content within one band */
+         effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
+         fold_start = lowband_offset;
+         while(M*eBands[--fold_start] > effective_lowband+norm_offset);
+         fold_end = lowband_offset-1;
+         while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
+         x_cm = y_cm = 0;
+         fold_i = fold_start; do {
+           x_cm |= collapse_masks[fold_i*C+0];
+           y_cm |= collapse_masks[fold_i*C+C-1];
+         } while (++fold_i<fold_end);
+      }
+      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
+         always) be non-zero. */
+      else
+         x_cm = y_cm = (1<<B)-1;
+
+      if (dual_stereo && i==intensity)
+      {
+         int j;
+
+         /* Switch off dual stereo to do intensity. */
+         dual_stereo = 0;
+         if (resynth)
+            for (j=0;j<M*eBands[i]-norm_offset;j++)
+               norm[j] = HALF32(norm[j]+norm2[j]);
+      }
+      if (dual_stereo)
+      {
+         x_cm = quant_band(&ctx, X, N, b/2, B,
+               effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+               last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
+         y_cm = quant_band(&ctx, Y, N, b/2, B,
+               effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
+               last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
+      } else {
+         if (Y!=NULL)
+         {
+            x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                        last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
+         } else {
+            x_cm = quant_band(&ctx, X, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                        last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
+         }
+         y_cm = x_cm;
+      }
+      collapse_masks[i*C+0] = (unsigned char)x_cm;
+      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
+      balance += pulses[i] + tell;
+
+      /* Update the folding position only as long as we have 1 bit/sample depth. */
+      update_lowband = b>(N<<BITRES);
+   }
+   *seed = ctx.seed;
+
+   RESTORE_STACK;
+}
+
diff --git a/src/main/jni/opus/celt/bands.h b/src/main/jni/opus/celt/bands.h
new file mode 100644
index 000000000..96ba52a64
--- /dev/null
+++ b/src/main/jni/opus/celt/bands.h
@@ -0,0 +1,114 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008-2009 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef BANDS_H
+#define BANDS_H
+
+#include "arch.h"
+#include "modes.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "rate.h"
+
+/** Compute the amplitude (sqrt energy) in each of the bands
+ * @param m Mode data
+ * @param X Spectrum
+ * @param bandE Square root of the energy for each band (returned)
+ */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
+
+/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
+
+/** Normalise each band of X such that the energy in each band is
+    equal to 1
+ * @param m Mode data
+ * @param X Spectrum (returned normalised)
+ * @param bandE Square root of the energy for each band
+ */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M);
+
+/** Denormalise each band of X to restore full amplitude
+ * @param m Mode data
+ * @param X Spectrum (returned de-normalised)
+ * @param bandE Square root of the energy for each band
+ */
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
+
+#define SPREAD_NONE       (0)
+#define SPREAD_LIGHT      (1)
+#define SPREAD_NORMAL     (2)
+#define SPREAD_AGGRESSIVE (3)
+
+int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
+      int end, int C, int M);
+
+#ifdef MEASURE_NORM_MSE
+void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C);
+#endif
+
+void haar1(celt_norm *X, int N0, int stride);
+
+/** Quantisation/encoding of the residual spectrum
+ * @param encode flag that indicates whether we're encoding (1) or decoding (0)
+ * @param m Mode data
+ * @param start First band to process
+ * @param end Last band to process + 1
+ * @param X Residual (normalised)
+ * @param Y Residual (normalised) for second channel (or NULL for mono)
+ * @param collapse_masks Anti-collapse tracking mask
+ * @param bandE Square root of the energy for each band
+ * @param pulses Bit allocation (per band) for PVQ
+ * @param shortBlocks Zero for long blocks, non-zero for short blocks
+ * @param spread Amount of spreading to use
+ * @param dual_stereo Zero for MS stereo, non-zero for dual stereo
+ * @param intensity First band to use intensity stereo
+ * @param tf_res Time-frequency resolution change
+ * @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
+ * @param balance Number of unallocated bits
+ * @param en Entropy coder state
+ * @param LM log2() of the number of 2.5 subframes in the frame
+ * @param codedBands Last band to receive bits + 1
+ * @param seed Random generator seed
+ */
+void quant_all_bands(int encode, const CELTMode *m, int start, int end,
+      celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
+      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
+      opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
+
+void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
+      int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
+      opus_val16 *prev2logE, int *pulses, opus_uint32 seed);
+
+opus_uint32 celt_lcg_rand(opus_uint32 seed);
+
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev);
+
+#endif /* BANDS_H */
diff --git a/src/main/jni/opus/celt/celt.c b/src/main/jni/opus/celt/celt.c
new file mode 100644
index 000000000..3e0ce6e6a
--- /dev/null
+++ b/src/main/jni/opus/celt/celt.c
@@ -0,0 +1,223 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_C
+
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+#ifndef PACKAGE_VERSION
+#define PACKAGE_VERSION "unknown"
+#endif
+
+
+int resampling_factor(opus_int32 rate)
+{
+   int ret;
+   switch (rate)
+   {
+   case 48000:
+      ret = 1;
+      break;
+   case 24000:
+      ret = 2;
+      break;
+   case 16000:
+      ret = 3;
+      break;
+   case 12000:
+      ret = 4;
+      break;
+   case 8000:
+      ret = 6;
+      break;
+   default:
+#ifndef CUSTOM_MODES
+      celt_assert(0);
+#endif
+      ret = 0;
+      break;
+   }
+   return ret;
+}
+
+#ifndef OVERRIDE_COMB_FILTER_CONST
+static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   opus_val32 x0, x1, x2, x3, x4;
+   int i;
+   x4 = x[-T-2];
+   x3 = x[-T-1];
+   x2 = x[-T];
+   x1 = x[-T+1];
+   for (i=0;i<N;i++)
+   {
+      x0=x[i-T+2];
+      y[i] = x[i]
+               + MULT16_32_Q15(g10,x2)
+               + MULT16_32_Q15(g11,ADD32(x1,x3))
+               + MULT16_32_Q15(g12,ADD32(x0,x4));
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+
+}
+#endif
+
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap)
+{
+   int i;
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
+   opus_val16 g00, g01, g02, g10, g11, g12;
+   opus_val32 x0, x1, x2, x3, x4;
+   static const opus_val16 gains[3][3] = {
+         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
+         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
+         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
+
+   if (g0==0 && g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y, x, N);
+      return;
+   }
+   g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
+   x1 = x[-T1+1];
+   x2 = x[-T1  ];
+   x3 = x[-T1-1];
+   x4 = x[-T1-2];
+   for (i=0;i<overlap;i++)
+   {
+      opus_val16 f;
+      x0=x[i-T1+2];
+      f = MULT16_16_Q15(window[i],window[i]);
+      y[i] = x[i]
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+               + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+
+   }
+   if (g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+      return;
+   }
+
+   /* Compute the part with the constant filter. */
+   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
+}
+
+const signed char tf_select_table[4][8] = {
+      {0, -1, 0, -1,    0,-1, 0,-1},
+      {0, -1, 0, -2,    1, 0, 1,-1},
+      {0, -2, 0, -3,    2, 0, 1,-1},
+      {0, -2, 0, -3,    3, 0, 1,-1},
+};
+
+
+void init_caps(const CELTMode *m,int *cap,int LM,int C)
+{
+   int i;
+   for (i=0;i<m->nbEBands;i++)
+   {
+      int N;
+      N=(m->eBands[i+1]-m->eBands[i])<<LM;
+      cap[i] = (m->cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2;
+   }
+}
+
+
+
+const char *opus_strerror(int error)
+{
+   static const char * const error_strings[8] = {
+      "success",
+      "invalid argument",
+      "buffer too small",
+      "internal error",
+      "corrupted stream",
+      "request not implemented",
+      "invalid state",
+      "memory allocation failed"
+   };
+   if (error > 0 || error < -7)
+      return "unknown error";
+   else
+      return error_strings[-error];
+}
+
+const char *opus_get_version_string(void)
+{
+    return "libopus " PACKAGE_VERSION
+#ifdef FIXED_POINT
+          "-fixed"
+#endif
+#ifdef FUZZING
+          "-fuzzing"
+#endif
+          ;
+}
diff --git a/src/main/jni/opus/celt/celt.h b/src/main/jni/opus/celt/celt.h
new file mode 100644
index 000000000..5deea1f0a
--- /dev/null
+++ b/src/main/jni/opus/celt/celt.h
@@ -0,0 +1,218 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/**
+  @file celt.h
+  @brief Contains all the functions for encoding and decoding audio
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_H
+#define CELT_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+#include "opus_custom.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "arch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CELTEncoder OpusCustomEncoder
+#define CELTDecoder OpusCustomDecoder
+#define CELTMode OpusCustomMode
+
+typedef struct {
+   int valid;
+   float tonality;
+   float tonality_slope;
+   float noisiness;
+   float activity;
+   float music_prob;
+   int        bandwidth;
+}AnalysisInfo;
+
+#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+
+#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
+
+/* Encoder/decoder Requests */
+
+/* Expose this option again when variable framesize actually works */
+#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */
+
+
+#define CELT_SET_PREDICTION_REQUEST    10002
+/** Controls the use of interframe prediction.
+    0=Independent frames
+    1=Short term interframe prediction allowed
+    2=Long term prediction allowed
+ */
+#define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_INPUT_CLIPPING_REQUEST    10004
+#define CELT_SET_INPUT_CLIPPING(x) CELT_SET_INPUT_CLIPPING_REQUEST, __opus_check_int(x)
+
+#define CELT_GET_AND_CLEAR_ERROR_REQUEST   10007
+#define CELT_GET_AND_CLEAR_ERROR(x) CELT_GET_AND_CLEAR_ERROR_REQUEST, __opus_check_int_ptr(x)
+
+#define CELT_SET_CHANNELS_REQUEST    10008
+#define CELT_SET_CHANNELS(x) CELT_SET_CHANNELS_REQUEST, __opus_check_int(x)
+
+
+/* Internal */
+#define CELT_SET_START_BAND_REQUEST    10010
+#define CELT_SET_START_BAND(x) CELT_SET_START_BAND_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_END_BAND_REQUEST    10012
+#define CELT_SET_END_BAND(x) CELT_SET_END_BAND_REQUEST, __opus_check_int(x)
+
+#define CELT_GET_MODE_REQUEST    10015
+/** Get the CELTMode used by an encoder or decoder */
+#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x)
+
+#define CELT_SET_SIGNALLING_REQUEST    10016
+#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_TONALITY_REQUEST    10018
+#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_SLOPE_REQUEST    10020
+#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_ANALYSIS_REQUEST    10022
+#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
+
+#define OPUS_SET_LFE_REQUEST    10024
+#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
+
+#define OPUS_SET_ENERGY_MASK_REQUEST    10026
+#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
+
+/* Encoder stuff */
+
+int celt_encoder_get_size(int channels);
+
+int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch);
+
+
+
+/* Decoder stuff */
+
+int celt_decoder_get_size(int channels);
+
+
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
+
+int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec);
+
+#define celt_encoder_ctl opus_custom_encoder_ctl
+#define celt_decoder_ctl opus_custom_decoder_ctl
+
+
+#ifdef CUSTOM_MODES
+#define OPUS_CUSTOM_NOSTATIC
+#else
+#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE
+#endif
+
+static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
+/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
+static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
+
+static const unsigned char tapset_icdf[3]={2,1,0};
+
+#ifdef CUSTOM_MODES
+static const unsigned char toOpusTable[20] = {
+      0xE0, 0xE8, 0xF0, 0xF8,
+      0xC0, 0xC8, 0xD0, 0xD8,
+      0xA0, 0xA8, 0xB0, 0xB8,
+      0x00, 0x00, 0x00, 0x00,
+      0x80, 0x88, 0x90, 0x98,
+};
+
+static const unsigned char fromOpusTable[16] = {
+      0x80, 0x88, 0x90, 0x98,
+      0x40, 0x48, 0x50, 0x58,
+      0x20, 0x28, 0x30, 0x38,
+      0x00, 0x08, 0x10, 0x18
+};
+
+static OPUS_INLINE int toOpus(unsigned char c)
+{
+   int ret=0;
+   if (c<0xA0)
+      ret = toOpusTable[c>>3];
+   if (ret == 0)
+      return -1;
+   else
+      return ret|(c&0x7);
+}
+
+static OPUS_INLINE int fromOpus(unsigned char c)
+{
+   if (c<0x80)
+      return -1;
+   else
+      return fromOpusTable[(c>>3)-16] | (c&0x7);
+}
+#endif /* CUSTOM_MODES */
+
+#define COMBFILTER_MAXPERIOD 1024
+#define COMBFILTER_MINPERIOD 15
+
+extern const signed char tf_select_table[4][8];
+
+int resampling_factor(opus_int32 rate);
+
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
+
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap);
+
+void init_caps(const CELTMode *m,int *cap,int LM,int C);
+
+#ifdef RESYNTH
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
+
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CELT_H */
diff --git a/src/main/jni/opus/celt/celt_decoder.c b/src/main/jni/opus/celt/celt_decoder.c
new file mode 100644
index 000000000..830398eed
--- /dev/null
+++ b/src/main/jni/opus/celt/celt_decoder.c
@@ -0,0 +1,1195 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_DECODER_C
+
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+/**********************************************************************/
+/*                                                                    */
+/*                             DECODER                                */
+/*                                                                    */
+/**********************************************************************/
+#define DECODE_BUFFER_SIZE 2048
+
+/** Decoder state
+ @brief Decoder state
+ */
+struct OpusCustomDecoder {
+   const OpusCustomMode *mode;
+   int overlap;
+   int channels;
+   int stream_channels;
+
+   int downsample;
+   int start, end;
+   int signalling;
+   int arch;
+
+   /* Everything beyond this point gets cleared on a reset */
+#define DECODER_RESET_START rng
+
+   opus_uint32 rng;
+   int error;
+   int last_pitch_index;
+   int loss_count;
+   int postfilter_period;
+   int postfilter_period_old;
+   opus_val16 postfilter_gain;
+   opus_val16 postfilter_gain_old;
+   int postfilter_tapset;
+   int postfilter_tapset_old;
+
+   celt_sig preemph_memD[2];
+
+   celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
+   /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
+   /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
+   /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
+};
+
+int celt_decoder_get_size(int channels)
+{
+   const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+   return opus_custom_decoder_get_size(mode, channels);
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
+{
+   int size = sizeof(struct CELTDecoder)
+            + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
+            + channels*LPC_ORDER*sizeof(opus_val16)
+            + 4*2*mode->nbEBands*sizeof(opus_val16);
+   return size;
+}
+
+#ifdef CUSTOM_MODES
+CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
+{
+   int ret;
+   CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
+   ret = opus_custom_decoder_init(st, mode, channels);
+   if (ret != OPUS_OK)
+   {
+      opus_custom_decoder_destroy(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+#endif /* CUSTOM_MODES */
+
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels)
+{
+   int ret;
+   ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
+   if (ret != OPUS_OK)
+      return ret;
+   st->downsample = resampling_factor(sampling_rate);
+   if (st->downsample==0)
+      return OPUS_BAD_ARG;
+   else
+      return OPUS_OK;
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels)
+{
+   if (channels < 0 || channels > 2)
+      return OPUS_BAD_ARG;
+
+   if (st==NULL)
+      return OPUS_ALLOC_FAIL;
+
+   OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels));
+
+   st->mode = mode;
+   st->overlap = mode->overlap;
+   st->stream_channels = st->channels = channels;
+
+   st->downsample = 1;
+   st->start = 0;
+   st->end = st->mode->effEBands;
+   st->signalling = 1;
+   st->arch = opus_select_arch();
+
+   st->loss_count = 0;
+
+   opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
+
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_decoder_destroy(CELTDecoder *st)
+{
+   opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+
+static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
+{
+#ifdef FIXED_POINT
+   x = PSHR32(x, SIG_SHIFT);
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return EXTRACT16(x);
+#else
+   return (opus_val16)x;
+#endif
+}
+
+#ifndef RESYNTH
+static
+#endif
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
+{
+   int c;
+   int Nd;
+   int apply_downsampling=0;
+   opus_val16 coef0;
+
+   coef0 = coef[0];
+   Nd = N/downsample;
+   c=0; do {
+      int j;
+      celt_sig * OPUS_RESTRICT x;
+      opus_val16  * OPUS_RESTRICT y;
+      celt_sig m = mem[c];
+      x =in[c];
+      y = pcm+c;
+#ifdef CUSTOM_MODES
+      if (coef[1] != 0)
+      {
+         opus_val16 coef1 = coef[1];
+         opus_val16 coef3 = coef[3];
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp)
+                          - MULT16_32_Q15(coef1, x[j]);
+            tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else
+#endif
+      if (downsample>1)
+      {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp);
+            y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+         }
+      }
+      mem[c] = m;
+
+      if (apply_downsampling)
+      {
+         /* Perform down-sampling */
+         for (j=0;j<Nd;j++)
+            y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+      }
+   } while (++c<C);
+}
+
+/** Compute the IMDCT and apply window for all sub-frames and
+    all channels in a frame */
+#ifndef RESYNTH
+static
+#endif
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
+{
+   int b, c;
+   int B;
+   int N;
+   int shift;
+   const int overlap = OVERLAP(mode);
+
+   if (shortBlocks)
+   {
+      B = shortBlocks;
+      N = mode->shortMdctSize;
+      shift = mode->maxLM;
+   } else {
+      B = 1;
+      N = mode->shortMdctSize<<LM;
+      shift = mode->maxLM-LM;
+   }
+   c=0; do {
+      /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
+   } while (++c<C);
+}
+
+static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
+{
+   int i, curr, tf_select;
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   opus_uint32 budget;
+   opus_uint32 tell;
+
+   budget = dec->storage*8;
+   tell = ec_tell(dec);
+   logp = isTransient ? 2 : 4;
+   tf_select_rsv = LM>0 && tell+logp+1<=budget;
+   budget -= tf_select_rsv;
+   tf_changed = curr = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         curr ^= ec_dec_bit_logp(dec, logp);
+         tell = ec_tell(dec);
+         tf_changed |= curr;
+      }
+      tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   tf_select = 0;
+   if (tf_select_rsv &&
+     tf_select_table[LM][4*isTransient+0+tf_changed] !=
+     tf_select_table[LM][4*isTransient+2+tf_changed])
+   {
+      tf_select = ec_dec_bit_logp(dec, 1);
+   }
+   for (i=start;i<end;i++)
+   {
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+   }
+}
+
+/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
+   CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
+   current value corresponds to a pitch of 66.67 Hz. */
+#define PLC_PITCH_LAG_MAX (720)
+/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
+   pitch of 480 Hz. */
+#define PLC_PITCH_LAG_MIN (100)
+
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
+{
+   int c;
+   int i;
+   const int C = st->channels;
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   int start;
+   int downsample;
+   int loss_count;
+   int noise_based;
+   const opus_int16 *eBands;
+   VARDECL(celt_sig, scratch);
+   SAVE_STACK;
+
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<C);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
+   oldBandE = lpc+C*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+
+   loss_count = st->loss_count;
+   start = st->start;
+   downsample = st->downsample;
+   noise_based = loss_count >= 5 || start != 0;
+   ALLOC(scratch, noise_based?N*C:N, celt_sig);
+   if (noise_based)
+   {
+      /* Noise-based PLC/CNG */
+      celt_sig *freq;
+      VARDECL(celt_norm, X);
+      opus_uint32 seed;
+      opus_val16 *plcLogE;
+      int end;
+      int effEnd;
+
+      end = st->end;
+      effEnd = IMAX(start, IMIN(end, mode->effEBands));
+
+      /* Share the interleaved signal MDCT coefficient buffer with the
+         deemphasis scratch buffer. */
+      freq = scratch;
+      ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+
+      if (loss_count >= 5)
+         plcLogE = backgroundLogE;
+      else {
+         /* Energy decay */
+         opus_val16 decay = loss_count==0 ?
+               QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
+         c=0; do
+         {
+            for (i=start;i<end;i++)
+               oldBandE[c*nbEBands+i] -= decay;
+         } while (++c<C);
+         plcLogE = oldBandE;
+      }
+      seed = st->rng;
+      for (c=0;c<C;c++)
+      {
+         for (i=start;i<effEnd;i++)
+         {
+            int j;
+            int boffs;
+            int blen;
+            boffs = N*c+(eBands[i]<<LM);
+            blen = (eBands[i+1]-eBands[i])<<LM;
+            for (j=0;j<blen;j++)
+            {
+               seed = celt_lcg_rand(seed);
+               X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
+            }
+            renormalise_vector(X+boffs, blen, Q15ONE);
+         }
+      }
+      st->rng = seed;
+
+      denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
+
+      c=0; do {
+         int bound = eBands[effEnd]<<LM;
+         if (downsample!=1)
+            bound = IMIN(bound, N/downsample);
+         for (i=bound;i<N;i++)
+            freq[c*N+i] = 0;
+      } while (++c<C);
+      c=0; do {
+         OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
+               DECODE_BUFFER_SIZE-N+(overlap>>1));
+      } while (++c<C);
+      compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
+   } else {
+      /* Pitch-based PLC */
+      const opus_val16 *window;
+      opus_val16 fade = Q15ONE;
+      int pitch_index;
+      VARDECL(opus_val32, etmp);
+      VARDECL(opus_val16, exc);
+
+      if (loss_count == 0)
+      {
+         VARDECL( opus_val16, lp_pitch_buf );
+         ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
+         pitch_downsample(decode_mem, lp_pitch_buf,
+               DECODE_BUFFER_SIZE, C, st->arch);
+         pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+               DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
+         pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
+         st->last_pitch_index = pitch_index;
+      } else {
+         pitch_index = st->last_pitch_index;
+         fade = QCONST16(.8f,15);
+      }
+
+      ALLOC(etmp, overlap, opus_val32);
+      ALLOC(exc, MAX_PERIOD, opus_val16);
+      window = mode->window;
+      c=0; do {
+         opus_val16 decay;
+         opus_val16 attenuation;
+         opus_val32 S1=0;
+         celt_sig *buf;
+         int extrapolation_offset;
+         int extrapolation_len;
+         int exc_length;
+         int j;
+
+         buf = decode_mem[c];
+         for (i=0;i<MAX_PERIOD;i++) {
+            exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
+         }
+
+         if (loss_count == 0)
+         {
+            opus_val32 ac[LPC_ORDER+1];
+            /* Compute LPC coefficients for the last MAX_PERIOD samples before
+               the first loss so we can work in the excitation-filter domain. */
+            _celt_autocorr(exc, ac, window, overlap,
+                   LPC_ORDER, MAX_PERIOD, st->arch);
+            /* Add a noise floor of -40 dB. */
+#ifdef FIXED_POINT
+            ac[0] += SHR32(ac[0],13);
+#else
+            ac[0] *= 1.0001f;
+#endif
+            /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
+            for (i=1;i<=LPC_ORDER;i++)
+            {
+               /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef FIXED_POINT
+               ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+               ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
+#endif
+            }
+            _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+         }
+         /* We want the excitation for 2 pitch periods in order to look for a
+            decaying signal, but we can't get more than MAX_PERIOD. */
+         exc_length = IMIN(2*pitch_index, MAX_PERIOD);
+         /* Initialize the LPC history with the samples just before the start
+            of the region for which we're computing the excitation. */
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            for (i=0;i<LPC_ORDER;i++)
+            {
+               lpc_mem[i] =
+                     ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+            }
+            /* Compute the excitation for exc_length samples before the loss. */
+            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
+         }
+
+         /* Check if the waveform is decaying, and if so how fast.
+            We do this to avoid adding energy when concealing in a segment
+            with decaying energy. */
+         {
+            opus_val32 E1=1, E2=1;
+            int decay_length;
+#ifdef FIXED_POINT
+            int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20);
+#endif
+            decay_length = exc_length>>1;
+            for (i=0;i<decay_length;i++)
+            {
+               opus_val16 e;
+               e = exc[MAX_PERIOD-decay_length+i];
+               E1 += SHR32(MULT16_16(e, e), shift);
+               e = exc[MAX_PERIOD-2*decay_length+i];
+               E2 += SHR32(MULT16_16(e, e), shift);
+            }
+            E1 = MIN32(E1, E2);
+            decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2));
+         }
+
+         /* Move the decoder memory one frame to the left to give us room to
+            add the data for the new frame. We ignore the overlap that extends
+            past the end of the buffer, because we aren't going to use it. */
+         OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N);
+
+         /* Extrapolate from the end of the excitation with a period of
+            "pitch_index", scaling down each period by an additional factor of
+            "decay". */
+         extrapolation_offset = MAX_PERIOD-pitch_index;
+         /* We need to extrapolate enough samples to cover a complete MDCT
+            window (including overlap/2 samples on both sides). */
+         extrapolation_len = N+overlap;
+         /* We also apply fading if this is not the first loss. */
+         attenuation = MULT16_16_Q15(fade, decay);
+         for (i=j=0;i<extrapolation_len;i++,j++)
+         {
+            opus_val16 tmp;
+            if (j >= pitch_index) {
+               j -= pitch_index;
+               attenuation = MULT16_16_Q15(attenuation, decay);
+            }
+            buf[DECODE_BUFFER_SIZE-N+i] =
+                  SHL32(EXTEND32(MULT16_16_Q15(attenuation,
+                        exc[extrapolation_offset+j])), SIG_SHIFT);
+            /* Compute the energy of the previously decoded signal whose
+               excitation we're copying. */
+            tmp = ROUND16(
+                  buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
+                  SIG_SHIFT);
+            S1 += SHR32(MULT16_16(tmp, tmp), 8);
+         }
+
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            /* Copy the last decoded samples (prior to the overlap region) to
+               synthesis filter memory so we can have a continuous signal. */
+            for (i=0;i<LPC_ORDER;i++)
+               lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
+            /* Apply the synthesis filter to convert the excitation back into
+               the signal domain. */
+            celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
+                  buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+                  lpc_mem);
+         }
+
+         /* Check if the synthesis energy is higher than expected, which can
+            happen with the signal changes during our window. If so,
+            attenuate. */
+         {
+            opus_val32 S2=0;
+            for (i=0;i<extrapolation_len;i++)
+            {
+               opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
+               S2 += SHR32(MULT16_16(tmp, tmp), 8);
+            }
+            /* This checks for an "explosion" in the synthesis. */
+#ifdef FIXED_POINT
+            if (!(S1 > SHR32(S2,2)))
+#else
+            /* The float test is written this way to catch NaNs in the output
+               of the IIR filter at the same time. */
+            if (!(S1 > 0.2f*S2))
+#endif
+            {
+               for (i=0;i<extrapolation_len;i++)
+                  buf[DECODE_BUFFER_SIZE-N+i] = 0;
+            } else if (S1 < S2)
+            {
+               opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
+               for (i=0;i<overlap;i++)
+               {
+                  opus_val16 tmp_g = Q15ONE
+                        - MULT16_16_Q15(window[i], Q15ONE-ratio);
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+               for (i=overlap;i<extrapolation_len;i++)
+               {
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+            }
+         }
+
+         /* Apply the pre-filter to the MDCT overlap for the next frame because
+            the post-filter will be re-applied in the decoder after the MDCT
+            overlap. */
+         comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
+              st->postfilter_period, st->postfilter_period, overlap,
+              -st->postfilter_gain, -st->postfilter_gain,
+              st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
+
+         /* Simulate TDAC on the concealed audio so that it blends with the
+            MDCT of the next frame. */
+         for (i=0;i<overlap/2;i++)
+         {
+            buf[DECODE_BUFFER_SIZE+i] =
+               MULT16_32_Q15(window[i], etmp[overlap-1-i])
+               + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
+         }
+      } while (++c<C);
+   }
+
+   deemphasis(out_syn, pcm, N, C, downsample,
+         mode->preemph, st->preemph_memD, scratch);
+
+   st->loss_count = loss_count+1;
+
+   RESTORE_STACK;
+}
+
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
+{
+   int c, i, N;
+   int spread_decision;
+   opus_int32 bits;
+   ec_dec _dec;
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_norm, X);
+   VARDECL(int, fine_quant);
+   VARDECL(int, pulses);
+   VARDECL(int, cap);
+   VARDECL(int, offsets);
+   VARDECL(int, fine_priority);
+   VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+
+   int shortBlocks;
+   int isTransient;
+   int intra_ener;
+   const int CC = st->channels;
+   int LM, M;
+   int effEnd;
+   int codedBands;
+   int alloc_trim;
+   int postfilter_pitch;
+   opus_val16 postfilter_gain;
+   int intensity=0;
+   int dual_stereo=0;
+   opus_int32 total_bits;
+   opus_int32 balance;
+   opus_int32 tell;
+   int dynalloc_logp;
+   int postfilter_tapset;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence;
+   int C = st->stream_channels;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   const opus_int16 *eBands;
+   ALLOC_STACK;
+
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   frame_size *= st->downsample;
+
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+   } while (++c<CC);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
+   oldBandE = lpc+CC*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+
+#ifdef CUSTOM_MODES
+   if (st->signalling && data!=NULL)
+   {
+      int data0=data[0];
+      /* Convert "standard mode" to Opus header */
+      if (mode->Fs==48000 && mode->shortMdctSize==120)
+      {
+         data0 = fromOpus(data0);
+         if (data0<0)
+            return OPUS_INVALID_PACKET;
+      }
+      st->end = IMAX(1, mode->effEBands-2*(data0>>5));
+      LM = (data0>>3)&0x3;
+      C = 1 + ((data0>>2)&0x1);
+      data++;
+      len--;
+      if (LM>mode->maxLM)
+         return OPUS_INVALID_PACKET;
+      if (frame_size < mode->shortMdctSize<<LM)
+         return OPUS_BUFFER_TOO_SMALL;
+      else
+         frame_size = mode->shortMdctSize<<LM;
+   } else {
+#else
+   {
+#endif
+      for (LM=0;LM<=mode->maxLM;LM++)
+         if (mode->shortMdctSize<<LM==frame_size)
+            break;
+      if (LM>mode->maxLM)
+         return OPUS_BAD_ARG;
+   }
+   M=1<<LM;
+
+   if (len<0 || len>1275 || pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   N = M*mode->shortMdctSize;
+
+   effEnd = st->end;
+   if (effEnd > mode->effEBands)
+      effEnd = mode->effEBands;
+
+   if (data == NULL || len<=1)
+   {
+      celt_decode_lost(st, pcm, N, LM);
+      RESTORE_STACK;
+      return frame_size/st->downsample;
+   }
+
+   if (dec == NULL)
+   {
+      ec_dec_init(&_dec,(unsigned char*)data,len);
+      dec = &_dec;
+   }
+
+   if (C==1)
+   {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]);
+   }
+
+   total_bits = len*8;
+   tell = ec_tell(dec);
+
+   if (tell >= total_bits)
+      silence = 1;
+   else if (tell==1)
+      silence = ec_dec_bit_logp(dec, 15);
+   else
+      silence = 0;
+   if (silence)
+   {
+      /* Pretend we've read all the remaining bits */
+      tell = len*8;
+      dec->nbits_total+=tell-ec_tell(dec);
+   }
+
+   postfilter_gain = 0;
+   postfilter_pitch = 0;
+   postfilter_tapset = 0;
+   if (st->start==0 && tell+16 <= total_bits)
+   {
+      if(ec_dec_bit_logp(dec, 1))
+      {
+         int qg, octave;
+         octave = ec_dec_uint(dec, 6);
+         postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
+         qg = ec_dec_bits(dec, 3);
+         if (ec_tell(dec)+2<=total_bits)
+            postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
+         postfilter_gain = QCONST16(.09375f,15)*(qg+1);
+      }
+      tell = ec_tell(dec);
+   }
+
+   if (LM > 0 && tell+3 <= total_bits)
+   {
+      isTransient = ec_dec_bit_logp(dec, 3);
+      tell = ec_tell(dec);
+   }
+   else
+      isTransient = 0;
+
+   if (isTransient)
+      shortBlocks = M;
+   else
+      shortBlocks = 0;
+
+   /* Decode the global flags (first symbols in the stream) */
+   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+   /* Get band energies */
+   unquant_coarse_energy(mode, st->start, st->end, oldBandE,
+         intra_ener, dec, C, LM);
+
+   ALLOC(tf_res, nbEBands, int);
+   tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
+
+   tell = ec_tell(dec);
+   spread_decision = SPREAD_NORMAL;
+   if (tell+4 <= total_bits)
+      spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
+
+   ALLOC(cap, nbEBands, int);
+
+   init_caps(mode,cap,LM,C);
+
+   ALLOC(offsets, nbEBands, int);
+
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   tell = ec_tell_frac(dec);
+   for (i=st->start;i<st->end;i++)
+   {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      width = C*(eBands[i+1]-eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
+      {
+         int flag;
+         flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
+         tell = ec_tell_frac(dec);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_bits -= quanta;
+         dynalloc_loop_logp = 1;
+      }
+      offsets[i] = boost;
+      /* Making dynalloc more likely */
+      if (boost>0)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+   }
+
+   ALLOC(fine_quant, nbEBands, int);
+   alloc_trim = tell+(6<<BITRES) <= total_bits ?
+         ec_dec_icdf(dec, trim_icdf, 7) : 5;
+
+   bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+
+   codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
+
+   unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
+
+   /* Decode fixed codebook */
+   ALLOC(collapse_masks, C*nbEBands, unsigned char);
+   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+
+   quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+         NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
+         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
+
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = ec_dec_bits(dec, 1);
+   }
+
+   unquant_energy_finalise(mode, st->start, st->end, oldBandE,
+         fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
+
+   if (anti_collapse_on)
+      anti_collapse(mode, X, collapse_masks, LM, C, N,
+            st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+
+   ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
+
+   if (silence)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+      for (i=0;i<C*N;i++)
+         freq[i] = 0;
+   } else {
+      /* Synthesis */
+      denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+   }
+   c=0; do {
+      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+   } while (++c<CC);
+
+   c=0; do {
+      int bound = M*eBands[effEnd];
+      if (st->downsample!=1)
+         bound = IMIN(bound, N/st->downsample);
+      for (i=bound;i<N;i++)
+         freq[c*N+i] = 0;
+   } while (++c<C);
+
+   c=0; do {
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<CC);
+
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<N;i++)
+         freq[N+i] = freq[i];
+   }
+   if (CC==1&&C==2)
+   {
+      for (i=0;i<N;i++)
+         freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
+   }
+
+   /* Compute inverse MDCTs */
+   compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
+
+   c=0; do {
+      st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
+      st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
+      comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
+            st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
+            mode->window, overlap);
+      if (LM!=0)
+         comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
+               st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
+               mode->window, overlap);
+
+   } while (++c<CC);
+   st->postfilter_period_old = st->postfilter_period;
+   st->postfilter_gain_old = st->postfilter_gain;
+   st->postfilter_tapset_old = st->postfilter_tapset;
+   st->postfilter_period = postfilter_pitch;
+   st->postfilter_gain = postfilter_gain;
+   st->postfilter_tapset = postfilter_tapset;
+   if (LM!=0)
+   {
+      st->postfilter_period_old = st->postfilter_period;
+      st->postfilter_gain_old = st->postfilter_gain;
+      st->postfilter_tapset_old = st->postfilter_tapset;
+   }
+
+   if (C==1) {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[nbEBands+i]=oldBandE[i];
+   }
+
+   /* In case start or end were to change */
+   if (!isTransient)
+   {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE2[i] = oldLogE[i];
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = oldBandE[i];
+      for (i=0;i<2*nbEBands;i++)
+         backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
+   } else {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   c=0; do
+   {
+      for (i=0;i<st->start;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      for (i=st->end;i<nbEBands;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+   } while (++c<2);
+   st->rng = dec->rng;
+
+   /* We reuse freq[] as scratch space for the de-emphasis */
+   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
+   st->loss_count = 0;
+   RESTORE_STACK;
+   if (ec_tell(dec) > 8*len)
+      return OPUS_INTERNAL_ERROR;
+   if(ec_get_error(dec))
+      st->error = 1;
+   return frame_size/st->downsample;
+}
+
+
+#ifdef CUSTOM_MODES
+
+#ifdef FIXED_POINT
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(opus_int16, out);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+
+   ALLOC(out, C*N, opus_int16);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j]=out[j]*(1.f/32768.f);
+
+   RESTORE_STACK;
+   return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+
+#else
+
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(celt_sig, out);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+   ALLOC(out, C*N, celt_sig);
+
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j] = FLOAT2INT16 (out[j]);
+
+   RESTORE_STACK;
+   return ret;
+}
+
+#endif
+#endif /* CUSTOM_MODES */
+
+int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
+{
+   va_list ap;
+
+   va_start(ap, request);
+   switch (request)
+   {
+      case CELT_SET_START_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<0 || value>=st->mode->nbEBands)
+            goto bad_arg;
+         st->start = value;
+      }
+      break;
+      case CELT_SET_END_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>st->mode->nbEBands)
+            goto bad_arg;
+         st->end = value;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
+      case CELT_GET_AND_CLEAR_ERROR_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value=st->error;
+         st->error = 0;
+      }
+      break;
+      case OPUS_GET_LOOKAHEAD_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->overlap/st->downsample;
+      }
+      break;
+      case OPUS_RESET_STATE:
+      {
+         int i;
+         opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
+         lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
+         oldBandE = lpc+st->channels*LPC_ORDER;
+         oldLogE = oldBandE + 2*st->mode->nbEBands;
+         oldLogE2 = oldLogE + 2*st->mode->nbEBands;
+         OPUS_CLEAR((char*)&st->DECODER_RESET_START,
+               opus_custom_decoder_get_size(st->mode, st->channels)-
+               ((char*)&st->DECODER_RESET_START - (char*)st));
+         for (i=0;i<2*st->mode->nbEBands;i++)
+            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      break;
+      case OPUS_GET_PITCH_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->postfilter_period;
+      }
+      break;
+      case CELT_GET_MODE_REQUEST:
+      {
+         const CELTMode ** value = va_arg(ap, const CELTMode**);
+         if (value==0)
+            goto bad_arg;
+         *value=st->mode;
+      }
+      break;
+      case CELT_SET_SIGNALLING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->signalling = value;
+      }
+      break;
+      case OPUS_GET_FINAL_RANGE_REQUEST:
+      {
+         opus_uint32 * value = va_arg(ap, opus_uint32 *);
+         if (value==0)
+            goto bad_arg;
+         *value=st->rng;
+      }
+      break;
+      default:
+         goto bad_request;
+   }
+   va_end(ap);
+   return OPUS_OK;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+bad_request:
+      va_end(ap);
+  return OPUS_UNIMPLEMENTED;
+}
diff --git a/src/main/jni/opus/celt/celt_encoder.c b/src/main/jni/opus/celt/celt_encoder.c
new file mode 100644
index 000000000..ffff0775d
--- /dev/null
+++ b/src/main/jni/opus/celt/celt_encoder.c
@@ -0,0 +1,2353 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define CELT_ENCODER_C
+
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+
+/** Encoder state
+ @brief Encoder state
+ */
+struct OpusCustomEncoder {
+   const OpusCustomMode *mode;     /**< Mode used by the encoder */
+   int overlap;
+   int channels;
+   int stream_channels;
+
+   int force_intra;
+   int clip;
+   int disable_pf;
+   int complexity;
+   int upsample;
+   int start, end;
+
+   opus_int32 bitrate;
+   int vbr;
+   int signalling;
+   int constrained_vbr;      /* If zero, VBR can do whatever it likes with the rate */
+   int loss_rate;
+   int lsb_depth;
+   int variable_duration;
+   int lfe;
+   int arch;
+
+   /* Everything beyond this point gets cleared on a reset */
+#define ENCODER_RESET_START rng
+
+   opus_uint32 rng;
+   int spread_decision;
+   opus_val32 delayedIntra;
+   int tonal_average;
+   int lastCodedBands;
+   int hf_average;
+   int tapset_decision;
+
+   int prefilter_period;
+   opus_val16 prefilter_gain;
+   int prefilter_tapset;
+#ifdef RESYNTH
+   int prefilter_period_old;
+   opus_val16 prefilter_gain_old;
+   int prefilter_tapset_old;
+#endif
+   int consec_transient;
+   AnalysisInfo analysis;
+
+   opus_val32 preemph_memE[2];
+   opus_val32 preemph_memD[2];
+
+   /* VBR-related parameters */
+   opus_int32 vbr_reservoir;
+   opus_int32 vbr_drift;
+   opus_int32 vbr_offset;
+   opus_int32 vbr_count;
+   opus_val32 overlap_max;
+   opus_val16 stereo_saving;
+   int intensity;
+   opus_val16 *energy_mask;
+   opus_val16 spec_avg;
+
+#ifdef RESYNTH
+   /* +MAX_PERIOD/2 to make space for overlap */
+   celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2];
+#endif
+
+   celt_sig in_mem[1]; /* Size = channels*mode->overlap */
+   /* celt_sig prefilter_mem[],  Size = channels*COMBFILTER_MAXPERIOD */
+   /* opus_val16 oldBandE[],     Size = channels*mode->nbEBands */
+   /* opus_val16 oldLogE[],      Size = channels*mode->nbEBands */
+   /* opus_val16 oldLogE2[],     Size = channels*mode->nbEBands */
+};
+
+int celt_encoder_get_size(int channels)
+{
+   CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+   return opus_custom_encoder_get_size(mode, channels);
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels)
+{
+   int size = sizeof(struct CELTEncoder)
+         + (channels*mode->overlap-1)*sizeof(celt_sig)    /* celt_sig in_mem[channels*mode->overlap]; */
+         + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */
+         + 3*channels*mode->nbEBands*sizeof(opus_val16);  /* opus_val16 oldBandE[channels*mode->nbEBands]; */
+                                                          /* opus_val16 oldLogE[channels*mode->nbEBands]; */
+                                                          /* opus_val16 oldLogE2[channels*mode->nbEBands]; */
+   return size;
+}
+
+#ifdef CUSTOM_MODES
+CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error)
+{
+   int ret;
+   CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels));
+   /* init will handle the NULL case */
+   ret = opus_custom_encoder_init(st, mode, channels);
+   if (ret != OPUS_OK)
+   {
+      opus_custom_encoder_destroy(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+#endif /* CUSTOM_MODES */
+
+static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
+                                         int channels, int arch)
+{
+   if (channels < 0 || channels > 2)
+      return OPUS_BAD_ARG;
+
+   if (st==NULL || mode==NULL)
+      return OPUS_ALLOC_FAIL;
+
+   OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
+
+   st->mode = mode;
+   st->overlap = mode->overlap;
+   st->stream_channels = st->channels = channels;
+
+   st->upsample = 1;
+   st->start = 0;
+   st->end = st->mode->effEBands;
+   st->signalling = 1;
+
+   st->arch = arch;
+
+   st->constrained_vbr = 1;
+   st->clip = 1;
+
+   st->bitrate = OPUS_BITRATE_MAX;
+   st->vbr = 0;
+   st->force_intra  = 0;
+   st->complexity = 5;
+   st->lsb_depth=24;
+
+   opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
+
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
+{
+   return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch());
+}
+#endif
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch)
+{
+   int ret;
+   ret = opus_custom_encoder_init_arch(st,
+           opus_custom_mode_create(48000, 960, NULL), channels, arch);
+   if (ret != OPUS_OK)
+      return ret;
+   st->upsample = resampling_factor(sampling_rate);
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_encoder_destroy(CELTEncoder *st)
+{
+   opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+
+
+static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
+                              opus_val16 *tf_estimate, int *tf_chan)
+{
+   int i;
+   VARDECL(opus_val16, tmp);
+   opus_val32 mem0,mem1;
+   int is_transient = 0;
+   opus_int32 mask_metric = 0;
+   int c;
+   opus_val16 tf_max;
+   int len2;
+   /* Table of 6*64/x, trained on real data to minimize the average error */
+   static const unsigned char inv_table[128] = {
+         255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25,
+          23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12,
+          12, 12, 11, 11, 11, 10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,
+           8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,
+           6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,  5,
+           5,  5,  5,  5,  5,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+           4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  3,  3,
+           3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,
+   };
+   SAVE_STACK;
+   ALLOC(tmp, len, opus_val16);
+
+   len2=len/2;
+   for (c=0;c<C;c++)
+   {
+      opus_val32 mean;
+      opus_int32 unmask=0;
+      opus_val32 norm;
+      opus_val16 maxE;
+      mem0=0;
+      mem1=0;
+      /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x,y;
+         x = SHR32(in[i+c*len],SIG_SHIFT);
+         y = ADD32(mem0, x);
+#ifdef FIXED_POINT
+         mem0 = mem1 + y - SHL32(x,1);
+         mem1 = x - SHR32(y,1);
+#else
+         mem0 = mem1 + y - 2*x;
+         mem1 = x - .5f*y;
+#endif
+         tmp[i] = EXTRACT16(SHR32(y,2));
+         /*printf("%f ", tmp[i]);*/
+      }
+      /*printf("\n");*/
+      /* First few samples are bad because we don't propagate the memory */
+      for (i=0;i<12;i++)
+         tmp[i] = 0;
+
+#ifdef FIXED_POINT
+      /* Normalize tmp to max range */
+      {
+         int shift=0;
+         shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len));
+         if (shift!=0)
+         {
+            for (i=0;i<len;i++)
+               tmp[i] = SHL16(tmp[i], shift);
+         }
+      }
+#endif
+
+      mean=0;
+      mem0=0;
+      /* Grouping by two to reduce complexity */
+      /* Forward pass to compute the post-echo threshold*/
+      for (i=0;i<len2;i++)
+      {
+         opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16);
+         mean += x2;
+#ifdef FIXED_POINT
+         /* FIXME: Use PSHR16() instead */
+         tmp[i] = mem0 + PSHR32(x2-mem0,4);
+#else
+         tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0);
+#endif
+         mem0 = tmp[i];
+      }
+
+      mem0=0;
+      maxE=0;
+      /* Backward pass to compute the pre-echo threshold */
+      for (i=len2-1;i>=0;i--)
+      {
+#ifdef FIXED_POINT
+         /* FIXME: Use PSHR16() instead */
+         tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
+#else
+         tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
+#endif
+         mem0 = tmp[i];
+         maxE = MAX16(maxE, mem0);
+      }
+      /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/
+
+      /* Compute the ratio of the "frame energy" over the harmonic mean of the energy.
+         This essentially corresponds to a bitrate-normalized temporal noise-to-mask
+         ratio */
+
+      /* As a compromise with the old transient detector, frame energy is the
+         geometric mean of the energy and half the max */
+#ifdef FIXED_POINT
+      /* Costs two sqrt() to avoid overflows */
+      mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1)));
+#else
+      mean = celt_sqrt(mean * maxE*.5*len2);
+#endif
+      /* Inverse of the mean energy in Q15+6 */
+      norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1));
+      /* Compute harmonic mean discarding the unreliable boundaries
+         The data is smooth, so we only take 1/4th of the samples */
+      unmask=0;
+      for (i=12;i<len2-5;i+=4)
+      {
+         int id;
+#ifdef FIXED_POINT
+         id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */
+#else
+         id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */
+#endif
+         unmask += inv_table[id];
+      }
+      /*printf("%d\n", unmask);*/
+      /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */
+      unmask = 64*unmask*4/(6*(len2-17));
+      if (unmask>mask_metric)
+      {
+         *tf_chan = c;
+         mask_metric = unmask;
+      }
+   }
+   is_transient = mask_metric>200;
+
+   /* Arbitrary metric for VBR boost */
+   tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
+   /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
+   *tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
+   /*printf("%d %f\n", tf_max, mask_metric);*/
+   RESTORE_STACK;
+#ifdef FUZZING
+   is_transient = rand()&0x1;
+#endif
+   /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/
+   return is_transient;
+}
+
+/* Looks for sudden increases of energy to decide whether we need to patch
+   the transient decision */
+int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
+      int end, int C)
+{
+   int i, c;
+   opus_val32 mean_diff=0;
+   opus_val16 spread_old[26];
+   /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to
+      avoid false detection caused by irrelevant bands */
+   if (C==1)
+   {
+      spread_old[0] = oldE[0];
+      for (i=1;i<end;i++)
+         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
+   } else {
+      spread_old[0] = MAX16(oldE[0],oldE[nbEBands]);
+      for (i=1;i<end;i++)
+         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
+                               MAX16(oldE[i],oldE[i+nbEBands]));
+   }
+   for (i=end-2;i>=0;i--)
+      spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
+   /* Compute mean increase */
+   c=0; do {
+      for (i=2;i<end-1;i++)
+      {
+         opus_val16 x1, x2;
+         x1 = MAX16(0, newE[i]);
+         x2 = MAX16(0, spread_old[i]);
+         mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
+      }
+   } while (++c<C);
+   mean_diff = DIV32(mean_diff, C*(end-3));
+   /*printf("%f %f %d\n", mean_diff, max_diff, count);*/
+   return mean_diff > QCONST16(1.f, DB_SHIFT);
+}
+
+/** Apply window and compute the MDCT for all sub-frames and
+    all channels in a frame */
+static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
+                          celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
+{
+   const int overlap = OVERLAP(mode);
+   int N;
+   int B;
+   int shift;
+   int i, b, c;
+   if (shortBlocks)
+   {
+      B = shortBlocks;
+      N = mode->shortMdctSize;
+      shift = mode->maxLM;
+   } else {
+      B = 1;
+      N = mode->shortMdctSize<<LM;
+      shift = mode->maxLM-LM;
+   }
+   c=0; do {
+      for (b=0;b<B;b++)
+      {
+         /* Interleaving the sub-frames while doing the MDCTs */
+         clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
+      }
+   } while (++c<CC);
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<B*N;i++)
+         out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i]));
+   }
+   if (upsample != 1)
+   {
+      c=0; do
+      {
+         int bound = B*N/upsample;
+         for (i=0;i<bound;i++)
+            out[c*B*N+i] *= upsample;
+         for (;i<B*N;i++)
+            out[c*B*N+i] = 0;
+      } while (++c<C);
+   }
+}
+
+
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
+{
+   int i;
+   opus_val16 coef0;
+   celt_sig m;
+   int Nu;
+
+   coef0 = coef[0];
+
+
+   Nu = N/upsample;
+   if (upsample!=1)
+   {
+      for (i=0;i<N;i++)
+         inp[i] = 0;
+   }
+   for (i=0;i<Nu;i++)
+   {
+      celt_sig x;
+
+      x = SCALEIN(pcmp[CC*i]);
+#ifndef FIXED_POINT
+      /* Replace NaNs with zeros */
+      if (!(x==x))
+         x = 0;
+#endif
+      inp[i*upsample] = x;
+   }
+
+#ifndef FIXED_POINT
+   if (clip)
+   {
+      /* Clip input to avoid encoding non-portable files */
+      for (i=0;i<Nu;i++)
+         inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample]));
+   }
+#else
+   (void)clip; /* Avoids a warning about clip being unused. */
+#endif
+   m = *mem;
+#ifdef CUSTOM_MODES
+   if (coef[1] != 0)
+   {
+      opus_val16 coef1 = coef[1];
+      opus_val16 coef2 = coef[2];
+      for (i=0;i<N;i++)
+      {
+         celt_sig x, tmp;
+         x = inp[i];
+         /* Apply pre-emphasis */
+         tmp = MULT16_16(coef2, x);
+         inp[i] = tmp + m;
+         m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp);
+      }
+   } else
+#endif
+   {
+      for (i=0;i<N;i++)
+      {
+         celt_sig x;
+         x = SHL32(inp[i], SIG_SHIFT);
+         /* Apply pre-emphasis */
+         inp[i] = x + m;
+         m = - MULT16_32_Q15(coef0, x);
+      }
+   }
+   *mem = m;
+}
+
+
+
+static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias)
+{
+   int i;
+   opus_val32 L1;
+   L1 = 0;
+   for (i=0;i<N;i++)
+      L1 += EXTEND32(ABS16(tmp[i]));
+   /* When in doubt, prefer good freq resolution */
+   L1 = MAC16_32_Q15(L1, LM*bias, L1);
+   return L1;
+
+}
+
+static int tf_analysis(const CELTMode *m, int len, int isTransient,
+      int *tf_res, int lambda, celt_norm *X, int N0, int LM,
+      int *tf_sum, opus_val16 tf_estimate, int tf_chan)
+{
+   int i;
+   VARDECL(int, metric);
+   int cost0;
+   int cost1;
+   VARDECL(int, path0);
+   VARDECL(int, path1);
+   VARDECL(celt_norm, tmp);
+   VARDECL(celt_norm, tmp_1);
+   int sel;
+   int selcost[2];
+   int tf_select=0;
+   opus_val16 bias;
+
+   SAVE_STACK;
+   bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate));
+   /*printf("%f ", bias);*/
+
+   ALLOC(metric, len, int);
+   ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+   ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+   ALLOC(path0, len, int);
+   ALLOC(path1, len, int);
+
+   *tf_sum = 0;
+   for (i=0;i<len;i++)
+   {
+      int j, k, N;
+      int narrow;
+      opus_val32 L1, best_L1;
+      int best_level=0;
+      N = (m->eBands[i+1]-m->eBands[i])<<LM;
+      /* band is too narrow to be split down to LM=-1 */
+      narrow = (m->eBands[i+1]-m->eBands[i])==1;
+      for (j=0;j<N;j++)
+         tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
+      /* Just add the right channel if we're in stereo */
+      /*if (C==2)
+         for (j=0;j<N;j++)
+            tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/
+      L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias);
+      best_L1 = L1;
+      /* Check the -1 case for transients */
+      if (isTransient && !narrow)
+      {
+         for (j=0;j<N;j++)
+            tmp_1[j] = tmp[j];
+         haar1(tmp_1, N>>LM, 1<<LM);
+         L1 = l1_metric(tmp_1, N, LM+1, bias);
+         if (L1<best_L1)
+         {
+            best_L1 = L1;
+            best_level = -1;
+         }
+      }
+      /*printf ("%f ", L1);*/
+      for (k=0;k<LM+!(isTransient||narrow);k++)
+      {
+         int B;
+
+         if (isTransient)
+            B = (LM-k-1);
+         else
+            B = k+1;
+
+         haar1(tmp, N>>k, 1<<k);
+
+         L1 = l1_metric(tmp, N, B, bias);
+
+         if (L1 < best_L1)
+         {
+            best_L1 = L1;
+            best_level = k+1;
+         }
+      }
+      /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
+      /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */
+      if (isTransient)
+         metric[i] = 2*best_level;
+      else
+         metric[i] = -2*best_level;
+      *tf_sum += (isTransient ? LM : 0) - metric[i]/2;
+      /* For bands that can't be split to -1, set the metric to the half-way point to avoid
+         biasing the decision */
+      if (narrow && (metric[i]==0 || metric[i]==-2*LM))
+         metric[i]-=1;
+      /*printf("%d ", metric[i]);*/
+   }
+   /*printf("\n");*/
+   /* Search for the optimal tf resolution, including tf_select */
+   tf_select = 0;
+   for (sel=0;sel<2;sel++)
+   {
+      cost0 = 0;
+      cost1 = isTransient ? 0 : lambda;
+      for (i=1;i<len;i++)
+      {
+         int curr0, curr1;
+         curr0 = IMIN(cost0, cost1 + lambda);
+         curr1 = IMIN(cost0 + lambda, cost1);
+         cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+         cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
+      }
+      cost0 = IMIN(cost0, cost1);
+      selcost[sel]=cost0;
+   }
+   /* For now, we're conservative and only allow tf_select=1 for transients.
+    * If tests confirm it's useful for non-transients, we could allow it. */
+   if (selcost[1]<selcost[0] && isTransient)
+      tf_select=1;
+   cost0 = 0;
+   cost1 = isTransient ? 0 : lambda;
+   /* Viterbi forward pass */
+   for (i=1;i<len;i++)
+   {
+      int curr0, curr1;
+      int from0, from1;
+
+      from0 = cost0;
+      from1 = cost1 + lambda;
+      if (from0 < from1)
+      {
+         curr0 = from0;
+         path0[i]= 0;
+      } else {
+         curr0 = from1;
+         path0[i]= 1;
+      }
+
+      from0 = cost0 + lambda;
+      from1 = cost1;
+      if (from0 < from1)
+      {
+         curr1 = from0;
+         path1[i]= 0;
+      } else {
+         curr1 = from1;
+         path1[i]= 1;
+      }
+      cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+      cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
+   }
+   tf_res[len-1] = cost0 < cost1 ? 0 : 1;
+   /* Viterbi backward pass to check the decisions */
+   for (i=len-2;i>=0;i--)
+   {
+      if (tf_res[i+1] == 1)
+         tf_res[i] = path1[i+1];
+      else
+         tf_res[i] = path0[i+1];
+   }
+   /*printf("%d %f\n", *tf_sum, tf_estimate);*/
+   RESTORE_STACK;
+#ifdef FUZZING
+   tf_select = rand()&0x1;
+   tf_res[0] = rand()&0x1;
+   for (i=1;i<len;i++)
+      tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0);
+#endif
+   return tf_select;
+}
+
+static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
+{
+   int curr, i;
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   opus_uint32 budget;
+   opus_uint32 tell;
+   budget = enc->storage*8;
+   tell = ec_tell(enc);
+   logp = isTransient ? 2 : 4;
+   /* Reserve space to code the tf_select decision. */
+   tf_select_rsv = LM>0 && tell+logp+1 <= budget;
+   budget -= tf_select_rsv;
+   curr = tf_changed = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
+         tell = ec_tell(enc);
+         curr = tf_res[i];
+         tf_changed |= curr;
+      }
+      else
+         tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   /* Only code tf_select if it would actually make a difference. */
+   if (tf_select_rsv &&
+         tf_select_table[LM][4*isTransient+0+tf_changed]!=
+         tf_select_table[LM][4*isTransient+2+tf_changed])
+      ec_enc_bit_logp(enc, tf_select, 1);
+   else
+      tf_select = 0;
+   for (i=start;i<end;i++)
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+   /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
+}
+
+
+static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
+      const opus_val16 *bandLogE, int end, int LM, int C, int N0,
+      AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
+      int intensity, opus_val16 surround_trim)
+{
+   int i;
+   opus_val32 diff=0;
+   int c;
+   int trim_index = 5;
+   opus_val16 trim = QCONST16(5.f, 8);
+   opus_val16 logXC, logXC2;
+   if (C==2)
+   {
+      opus_val16 sum = 0; /* Q10 */
+      opus_val16 minXC; /* Q10 */
+      /* Compute inter-channel correlation for low frequencies */
+      for (i=0;i<8;i++)
+      {
+         int j;
+         opus_val32 partial = 0;
+         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+            partial = MAC16_16(partial, X[j], X[N0+j]);
+         sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
+      }
+      sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
+      sum = MIN16(QCONST16(1.f, 10), ABS16(sum));
+      minXC = sum;
+      for (i=8;i<intensity;i++)
+      {
+         int j;
+         opus_val32 partial = 0;
+         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+            partial = MAC16_16(partial, X[j], X[N0+j]);
+         minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));
+      }
+      minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));
+      /*printf ("%f\n", sum);*/
+      if (sum > QCONST16(.995f,10))
+         trim_index-=4;
+      else if (sum > QCONST16(.92f,10))
+         trim_index-=3;
+      else if (sum > QCONST16(.85f,10))
+         trim_index-=2;
+      else if (sum > QCONST16(.8f,10))
+         trim_index-=1;
+      /* mid-side savings estimations based on the LF average*/
+      logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
+      /* mid-side savings estimations based on min correlation */
+      logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
+#ifdef FIXED_POINT
+      /* Compensate for Q20 vs Q14 input and convert output to Q8 */
+      logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+      logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+#endif
+
+      trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
+      *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2));
+   }
+
+   /* Estimate spectral tilt */
+   c=0; do {
+      for (i=0;i<end-1;i++)
+      {
+         diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);
+      }
+   } while (++c<C);
+   diff /= C*(end-1);
+   /*printf("%f\n", diff);*/
+   if (diff > QCONST16(2.f, DB_SHIFT))
+      trim_index--;
+   if (diff > QCONST16(8.f, DB_SHIFT))
+      trim_index--;
+   if (diff < -QCONST16(4.f, DB_SHIFT))
+      trim_index++;
+   if (diff < -QCONST16(10.f, DB_SHIFT))
+      trim_index++;
+   trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+   trim -= SHR16(surround_trim, DB_SHIFT-8);
+   trim -= 2*SHR16(tf_estimate, 14-8);
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid)
+   {
+      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
+            (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
+   }
+#endif
+
+#ifdef FIXED_POINT
+   trim_index = PSHR32(trim, 8);
+#else
+   trim_index = (int)floor(.5f+trim);
+#endif
+   if (trim_index<0)
+      trim_index = 0;
+   if (trim_index>10)
+      trim_index = 10;
+   /*printf("%d\n", trim_index);*/
+#ifdef FUZZING
+   trim_index = rand()%11;
+#endif
+   return trim_index;
+}
+
+static int stereo_analysis(const CELTMode *m, const celt_norm *X,
+      int LM, int N0)
+{
+   int i;
+   int thetas;
+   opus_val32 sumLR = EPSILON, sumMS = EPSILON;
+
+   /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
+   for (i=0;i<13;i++)
+   {
+      int j;
+      for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+      {
+         opus_val32 L, R, M, S;
+         /* We cast to 32-bit first because of the -32768 case */
+         L = EXTEND32(X[j]);
+         R = EXTEND32(X[N0+j]);
+         M = ADD32(L, R);
+         S = SUB32(L, R);
+         sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R)));
+         sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S)));
+      }
+   }
+   sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
+   thetas = 13;
+   /* We don't need thetas for lower bands with LM<=1 */
+   if (LM<=1)
+      thetas -= 8;
+   return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
+         > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
+}
+
+static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
+      int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
+      int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
+      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
+{
+   int i, c;
+   opus_int32 tot_boost=0;
+   opus_val16 maxDepth;
+   VARDECL(opus_val16, follower);
+   VARDECL(opus_val16, noise_floor);
+   SAVE_STACK;
+   ALLOC(follower, C*nbEBands, opus_val16);
+   ALLOC(noise_floor, C*nbEBands, opus_val16);
+   for (i=0;i<nbEBands;i++)
+      offsets[i] = 0;
+   /* Dynamic allocation code */
+   maxDepth=-QCONST16(31.9f, DB_SHIFT);
+   for (i=0;i<end;i++)
+   {
+      /* Noise floor must take into account eMeans, the depth, the width of the bands
+         and the preemphasis filter (approx. square of bark band ID) */
+      noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i])
+            +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
+            +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
+   }
+   c=0;do
+   {
+      for (i=0;i<end;i++)
+         maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]);
+   } while (++c<C);
+   /* Make sure that dynamic allocation can't make us bust the budget */
+   if (effectiveBytes > 50 && LM>=1 && !lfe)
+   {
+      int last=0;
+      c=0;do
+      {
+         follower[c*nbEBands] = bandLogE2[c*nbEBands];
+         for (i=1;i<end;i++)
+         {
+            /* The last band to be at least 3 dB higher than the previous one
+               is the last we'll consider. Otherwise, we run into problems on
+               bandlimited signals. */
+            if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
+               last=i;
+            follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
+         }
+         for (i=last-1;i>=0;i--)
+            follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
+         for (i=0;i<end;i++)
+            follower[c*nbEBands+i] = MAX16(follower[c*nbEBands+i], noise_floor[i]);
+      } while (++c<C);
+      if (C==2)
+      {
+         for (i=start;i<end;i++)
+         {
+            /* Consider 24 dB "cross-talk" */
+            follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[         i]-QCONST16(4.f,DB_SHIFT));
+            follower[         i] = MAX16(follower[         i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT));
+            follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));
+         }
+      } else {
+         for (i=start;i<end;i++)
+         {
+            follower[i] = MAX16(0, bandLogE[i]-follower[i]);
+         }
+      }
+      for (i=start;i<end;i++)
+         follower[i] = MAX16(follower[i], surround_dynalloc[i]);
+      /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
+      if ((!vbr || constrained_vbr)&&!isTransient)
+      {
+         for (i=start;i<end;i++)
+            follower[i] = HALF16(follower[i]);
+      }
+      for (i=start;i<end;i++)
+      {
+         int width;
+         int boost;
+         int boost_bits;
+
+         if (i<8)
+            follower[i] *= 2;
+         if (i>=12)
+            follower[i] = HALF16(follower[i]);
+         follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
+
+         width = C*(eBands[i+1]-eBands[i])<<LM;
+         if (width<6)
+         {
+            boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT);
+            boost_bits = boost*width<<BITRES;
+         } else if (width > 48) {
+            boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT);
+            boost_bits = (boost*width<<BITRES)/8;
+         } else {
+            boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
+            boost_bits = boost*6<<BITRES;
+         }
+         /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */
+         if ((!vbr || (constrained_vbr&&!isTransient))
+               && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4)
+         {
+            opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3);
+            offsets[i] = cap-tot_boost;
+            tot_boost = cap;
+            break;
+         } else {
+            offsets[i] = boost;
+            tot_boost += boost_bits;
+         }
+      }
+   }
+   *tot_boost_ = tot_boost;
+   RESTORE_STACK;
+   return maxDepth;
+}
+
+
+static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
+      int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes)
+{
+   int c;
+   VARDECL(celt_sig, _pre);
+   celt_sig *pre[2];
+   const CELTMode *mode;
+   int pitch_index;
+   opus_val16 gain1;
+   opus_val16 pf_threshold;
+   int pf_on;
+   int qg;
+   SAVE_STACK;
+
+   mode = st->mode;
+   ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
+
+   pre[0] = _pre;
+   pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
+
+
+   c=0; do {
+      OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
+      OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
+   } while (++c<CC);
+
+   if (enabled)
+   {
+      VARDECL(opus_val16, pitch_buf);
+      ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
+
+      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
+      /* Don't search for the fir last 1.5 octave of the range because
+         there's too many false-positives due to short-term correlation */
+      pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
+            COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index,
+            st->arch);
+      pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
+
+      gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
+            N, &pitch_index, st->prefilter_period, st->prefilter_gain);
+      if (pitch_index > COMBFILTER_MAXPERIOD-2)
+         pitch_index = COMBFILTER_MAXPERIOD-2;
+      gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
+      /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
+      if (st->loss_rate>2)
+         gain1 = HALF32(gain1);
+      if (st->loss_rate>4)
+         gain1 = HALF32(gain1);
+      if (st->loss_rate>8)
+         gain1 = 0;
+   } else {
+      gain1 = 0;
+      pitch_index = COMBFILTER_MINPERIOD;
+   }
+
+   /* Gain threshold for enabling the prefilter/postfilter */
+   pf_threshold = QCONST16(.2f,15);
+
+   /* Adjusting the threshold based on rate and continuity */
+   if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
+      pf_threshold += QCONST16(.2f,15);
+   if (nbAvailableBytes<25)
+      pf_threshold += QCONST16(.1f,15);
+   if (nbAvailableBytes<35)
+      pf_threshold += QCONST16(.1f,15);
+   if (st->prefilter_gain > QCONST16(.4f,15))
+      pf_threshold -= QCONST16(.1f,15);
+   if (st->prefilter_gain > QCONST16(.55f,15))
+      pf_threshold -= QCONST16(.1f,15);
+
+   /* Hard threshold at 0.2 */
+   pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
+   if (gain1<pf_threshold)
+   {
+      gain1 = 0;
+      pf_on = 0;
+      qg = 0;
+   } else {
+      /*This block is not gated by a total bits check only because
+        of the nbAvailableBytes check above.*/
+      if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
+         gain1=st->prefilter_gain;
+
+#ifdef FIXED_POINT
+      qg = ((gain1+1536)>>10)/3-1;
+#else
+      qg = (int)floor(.5f+gain1*32/3)-1;
+#endif
+      qg = IMAX(0, IMIN(7, qg));
+      gain1 = QCONST16(0.09375f,15)*(qg+1);
+      pf_on = 1;
+   }
+   /*printf("%d %f\n", pitch_index, gain1);*/
+
+   c=0; do {
+      int offset = mode->shortMdctSize-st->overlap;
+      st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+      OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
+      if (offset)
+         comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
+               st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
+               st->prefilter_tapset, st->prefilter_tapset, NULL, 0);
+
+      comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
+            st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
+            st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap);
+      OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
+
+      if (N>COMBFILTER_MAXPERIOD)
+      {
+         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
+      } else {
+         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
+         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
+      }
+   } while (++c<CC);
+
+   RESTORE_STACK;
+   *gain = gain1;
+   *pitch = pitch_index;
+   *qgain = qg;
+   return pf_on;
+}
+
+static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target,
+      int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity,
+      int constrained_vbr, opus_val16 stereo_saving, int tot_boost,
+      opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth,
+      int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking,
+      opus_val16 temporal_vbr)
+{
+   /* The target rate in 8th bits per frame */
+   opus_int32 target;
+   int coded_bins;
+   int coded_bands;
+   opus_val16 tf_calibration;
+   int nbEBands;
+   const opus_int16 *eBands;
+
+   nbEBands = mode->nbEBands;
+   eBands = mode->eBands;
+
+   coded_bands = lastCodedBands ? lastCodedBands : nbEBands;
+   coded_bins = eBands[coded_bands]<<LM;
+   if (C==2)
+      coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM;
+
+   target = base_target;
+
+   /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid && analysis->activity<.4)
+      target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
+#endif
+   /* Stereo savings */
+   if (C==2)
+   {
+      int coded_stereo_bands;
+      int coded_stereo_dof;
+      opus_val16 max_frac;
+      coded_stereo_bands = IMIN(intensity, coded_bands);
+      coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
+      /* Maximum fraction of the bits we can save if the signal is mono. */
+      max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
+      stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8));
+      /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
+      target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target),
+                      SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
+   }
+   /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
+   target += tot_boost-(16<<LM);
+   /* Apply transient boost, compensating for average boost. */
+   tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ?
+                    QCONST16(0.02f,14) : QCONST16(0.04f,14);
+   target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
+
+#ifndef DISABLE_FLOAT_API
+   /* Apply tonality boost */
+   if (analysis->valid && !lfe)
+   {
+      opus_int32 tonal_target;
+      float tonal;
+
+      /* Tonality boost (compensating for the average). */
+      tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f;
+      tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
+      if (pitch_change)
+         tonal_target +=  (opus_int32)((coded_bins<<BITRES)*.8f);
+      /*printf("%f %f ", analysis->tonality, tonal);*/
+      target = tonal_target;
+   }
+#endif
+
+   if (has_surround_mask&&!lfe)
+   {
+      opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT);
+      /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/
+      target = IMAX(target/4, surround_target);
+   }
+
+   {
+      opus_int32 floor_depth;
+      int bins;
+      bins = eBands[nbEBands-2]<<LM;
+      /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/
+      floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
+      floor_depth = IMAX(floor_depth, target>>2);
+      target = IMIN(target, floor_depth);
+      /*printf("%f %d\n", maxDepth, floor_depth);*/
+   }
+
+   if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
+   {
+      opus_val16 rate_factor;
+#ifdef FIXED_POINT
+      rate_factor = MAX16(0,(bitrate-32000));
+#else
+      rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
+#endif
+      if (constrained_vbr)
+         rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
+      target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
+
+   }
+
+   if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
+   {
+      opus_val16 amount;
+      opus_val16 tvbr_factor;
+      amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate)));
+      tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT);
+      target += (opus_int32)MULT16_32_Q15(tvbr_factor, target);
+   }
+
+   /* Don't allow more than doubling the rate */
+   target = IMIN(2*base_target, target);
+
+   return target;
+}
+
+int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
+{
+   int i, c, N;
+   opus_int32 bits;
+   ec_enc _enc;
+   VARDECL(celt_sig, in);
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_norm, X);
+   VARDECL(celt_ener, bandE);
+   VARDECL(opus_val16, bandLogE);
+   VARDECL(opus_val16, bandLogE2);
+   VARDECL(int, fine_quant);
+   VARDECL(opus_val16, error);
+   VARDECL(int, pulses);
+   VARDECL(int, cap);
+   VARDECL(int, offsets);
+   VARDECL(int, fine_priority);
+   VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
+   celt_sig *prefilter_mem;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2;
+   int shortBlocks=0;
+   int isTransient=0;
+   const int CC = st->channels;
+   const int C = st->stream_channels;
+   int LM, M;
+   int tf_select;
+   int nbFilledBytes, nbAvailableBytes;
+   int effEnd;
+   int codedBands;
+   int tf_sum;
+   int alloc_trim;
+   int pitch_index=COMBFILTER_MINPERIOD;
+   opus_val16 gain1 = 0;
+   int dual_stereo=0;
+   int effectiveBytes;
+   int dynalloc_logp;
+   opus_int32 vbr_rate;
+   opus_int32 total_bits;
+   opus_int32 total_boost;
+   opus_int32 balance;
+   opus_int32 tell;
+   int prefilter_tapset=0;
+   int pf_on;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence=0;
+   int tf_chan = 0;
+   opus_val16 tf_estimate;
+   int pitch_change=0;
+   opus_int32 tot_boost;
+   opus_val32 sample_max;
+   opus_val16 maxDepth;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   const opus_int16 *eBands;
+   int secondMdct;
+   int signalBandwidth;
+   int transient_got_disabled=0;
+   opus_val16 surround_masking=0;
+   opus_val16 temporal_vbr=0;
+   opus_val16 surround_trim = 0;
+   opus_int32 equiv_rate = 510000;
+   VARDECL(opus_val16, surround_dynalloc);
+   ALLOC_STACK;
+
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   tf_estimate = 0;
+   if (nbCompressedBytes<2 || pcm==NULL)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
+   frame_size *= st->upsample;
+   for (LM=0;LM<=mode->maxLM;LM++)
+      if (mode->shortMdctSize<<LM==frame_size)
+         break;
+   if (LM>mode->maxLM)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   M=1<<LM;
+   N = M*mode->shortMdctSize;
+
+   prefilter_mem = st->in_mem+CC*(st->overlap);
+   oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD));
+   oldLogE = oldBandE + CC*nbEBands;
+   oldLogE2 = oldLogE + CC*nbEBands;
+
+   if (enc==NULL)
+   {
+      tell=1;
+      nbFilledBytes=0;
+   } else {
+      tell=ec_tell(enc);
+      nbFilledBytes=(tell+4)>>3;
+   }
+
+#ifdef CUSTOM_MODES
+   if (st->signalling && enc==NULL)
+   {
+      int tmp = (mode->effEBands-st->end)>>1;
+      st->end = IMAX(1, mode->effEBands-tmp);
+      compressed[0] = tmp<<5;
+      compressed[0] |= LM<<3;
+      compressed[0] |= (C==2)<<2;
+      /* Convert "standard mode" to Opus header */
+      if (mode->Fs==48000 && mode->shortMdctSize==120)
+      {
+         int c0 = toOpus(compressed[0]);
+         if (c0<0)
+         {
+            RESTORE_STACK;
+            return OPUS_BAD_ARG;
+         }
+         compressed[0] = c0;
+      }
+      compressed++;
+      nbCompressedBytes--;
+   }
+#else
+   celt_assert(st->signalling==0);
+#endif
+
+   /* Can't produce more than 1275 output bytes */
+   nbCompressedBytes = IMIN(nbCompressedBytes,1275);
+   nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
+
+   if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX)
+   {
+      opus_int32 den=mode->Fs>>BITRES;
+      vbr_rate=(st->bitrate*frame_size+(den>>1))/den;
+#ifdef CUSTOM_MODES
+      if (st->signalling)
+         vbr_rate -= 8<<BITRES;
+#endif
+      effectiveBytes = vbr_rate>>(3+BITRES);
+   } else {
+      opus_int32 tmp;
+      vbr_rate = 0;
+      tmp = st->bitrate*frame_size;
+      if (tell>1)
+         tmp += tell;
+      if (st->bitrate!=OPUS_BITRATE_MAX)
+         nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
+               (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
+      effectiveBytes = nbCompressedBytes;
+   }
+   if (st->bitrate != OPUS_BITRATE_MAX)
+      equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50);
+
+   if (enc==NULL)
+   {
+      ec_enc_init(&_enc, compressed, nbCompressedBytes);
+      enc = &_enc;
+   }
+
+   if (vbr_rate>0)
+   {
+      /* Computes the max bit-rate allowed in VBR mode to avoid violating the
+          target rate and buffering.
+         We must do this up front so that bust-prevention logic triggers
+          correctly if we don't have enough bits. */
+      if (st->constrained_vbr)
+      {
+         opus_int32 vbr_bound;
+         opus_int32 max_allowed;
+         /* We could use any multiple of vbr_rate as bound (depending on the
+             delay).
+            This is clamped to ensure we use at least two bytes if the encoder
+             was entirely empty, but to allow 0 in hybrid mode. */
+         vbr_bound = vbr_rate;
+         max_allowed = IMIN(IMAX(tell==1?2:0,
+               (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)),
+               nbAvailableBytes);
+         if(max_allowed < nbAvailableBytes)
+         {
+            nbCompressedBytes = nbFilledBytes+max_allowed;
+            nbAvailableBytes = max_allowed;
+            ec_enc_shrink(enc, nbCompressedBytes);
+         }
+      }
+   }
+   total_bits = nbCompressedBytes*8;
+
+   effEnd = st->end;
+   if (effEnd > mode->effEBands)
+      effEnd = mode->effEBands;
+
+   ALLOC(in, CC*(N+st->overlap), celt_sig);
+
+   sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample));
+   st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample);
+   sample_max=MAX32(sample_max, st->overlap_max);
+#ifdef FIXED_POINT
+   silence = (sample_max==0);
+#else
+   silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth));
+#endif
+#ifdef FUZZING
+   if ((rand()&0x3F)==0)
+      silence = 1;
+#endif
+   if (tell==1)
+      ec_enc_bit_logp(enc, silence, 15);
+   else
+      silence=0;
+   if (silence)
+   {
+      /*In VBR mode there is no need to send more than the minimum. */
+      if (vbr_rate>0)
+      {
+         effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
+         total_bits=nbCompressedBytes*8;
+         nbAvailableBytes=2;
+         ec_enc_shrink(enc, nbCompressedBytes);
+      }
+      /* Pretend we've filled all the remaining bits with zeros
+            (that's what the initialiser did anyway) */
+      tell = nbCompressedBytes*8;
+      enc->nbits_total+=tell-ec_tell(enc);
+   }
+   c=0; do {
+      celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
+                  mode->preemph, st->preemph_memE+c, st->clip);
+   } while (++c<CC);
+
+
+
+   /* Find pitch period and gain */
+   {
+      int enabled;
+      int qg;
+      enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
+            && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);
+
+      prefilter_tapset = st->tapset_decision;
+      pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
+      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
+            && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
+         pitch_change = 1;
+      if (pf_on==0)
+      {
+         if(st->start==0 && tell+16<=total_bits)
+            ec_enc_bit_logp(enc, 0, 1);
+      } else {
+         /*This block is not gated by a total bits check only because
+           of the nbAvailableBytes check above.*/
+         int octave;
+         ec_enc_bit_logp(enc, 1, 1);
+         pitch_index += 1;
+         octave = EC_ILOG(pitch_index)-5;
+         ec_enc_uint(enc, octave, 6);
+         ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
+         pitch_index -= 1;
+         ec_enc_bits(enc, qg, 3);
+         ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
+      }
+   }
+
+   isTransient = 0;
+   shortBlocks = 0;
+   if (st->complexity >= 1 && !st->lfe)
+   {
+      isTransient = transient_analysis(in, N+st->overlap, CC,
+            &tf_estimate, &tf_chan);
+   }
+   if (LM>0 && ec_tell(enc)+3<=total_bits)
+   {
+      if (isTransient)
+         shortBlocks = M;
+   } else {
+      isTransient = 0;
+      transient_got_disabled=1;
+   }
+
+   ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
+   ALLOC(bandE,nbEBands*CC, celt_ener);
+   ALLOC(bandLogE,nbEBands*CC, opus_val16);
+
+   secondMdct = shortBlocks && st->complexity>=8;
+   ALLOC(bandLogE2, C*nbEBands, opus_val16);
+   if (secondMdct)
+   {
+      compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
+      compute_band_energies(mode, freq, bandE, effEnd, C, M);
+      amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C);
+      for (i=0;i<C*nbEBands;i++)
+         bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+   }
+
+   compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+   if (CC==2&&C==1)
+      tf_chan = 0;
+   compute_band_energies(mode, freq, bandE, effEnd, C, M);
+
+   if (st->lfe)
+   {
+      for (i=2;i<st->end;i++)
+      {
+         bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
+         bandE[i] = MAX32(bandE[i], EPSILON);
+      }
+   }
+   amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+
+   ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
+   for(i=0;i<st->end;i++)
+      surround_dynalloc[i] = 0;
+   /* This computes how much masking takes place between surround channels */
+   if (st->start==0&&st->energy_mask&&!st->lfe)
+   {
+      int mask_end;
+      int midband;
+      int count_dynalloc;
+      opus_val32 mask_avg=0;
+      opus_val32 diff=0;
+      int count=0;
+      mask_end = IMAX(2,st->lastCodedBands);
+      for (c=0;c<C;c++)
+      {
+         for(i=0;i<mask_end;i++)
+         {
+            opus_val16 mask;
+            mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
+                   QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+            if (mask > 0)
+               mask = HALF16(mask);
+            mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
+            count += eBands[i+1]-eBands[i];
+            diff += MULT16_16(mask, 1+2*i-mask_end);
+         }
+      }
+      mask_avg = DIV32_16(mask_avg,count);
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
+      /* Again, being conservative */
+      diff = HALF32(diff);
+      diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
+      /* Find the band that's in the middle of the coded spectrum */
+      for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
+      count_dynalloc=0;
+      for(i=0;i<mask_end;i++)
+      {
+         opus_val32 lin;
+         opus_val16 unmask;
+         lin = mask_avg + diff*(i-midband);
+         if (C==2)
+            unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
+         else
+            unmask = st->energy_mask[i];
+         unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
+         unmask -= lin;
+         if (unmask > QCONST16(.25f, DB_SHIFT))
+         {
+            surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
+            count_dynalloc++;
+         }
+      }
+      if (count_dynalloc>=3)
+      {
+         /* If we need dynalloc in many bands, it's probably because our
+            initial masking rate was too low. */
+         mask_avg += QCONST16(.25f, DB_SHIFT);
+         if (mask_avg>0)
+         {
+            /* Something went really wrong in the original calculations,
+               disabling masking. */
+            mask_avg = 0;
+            diff = 0;
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = 0;
+         } else {
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
+         }
+      }
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      /* Convert to 1/64th units used for the trim */
+      surround_trim = 64*diff;
+      /*printf("%d %d ", mask_avg, surround_trim);*/
+      surround_masking = mask_avg;
+   }
+   /* Temporal VBR (but not for LFE) */
+   if (!st->lfe)
+   {
+      opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
+      opus_val32 frame_avg=0;
+      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+      for(i=st->start;i<st->end;i++)
+      {
+         follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset);
+         if (C==2)
+            follow = MAX16(follow, bandLogE[i+nbEBands]-offset);
+         frame_avg += follow;
+      }
+      frame_avg /= (st->end-st->start);
+      temporal_vbr = SUB16(frame_avg,st->spec_avg);
+      temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr));
+      st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr);
+   }
+   /*for (i=0;i<21;i++)
+      printf("%f ", bandLogE[i]);
+   printf("\n");*/
+
+   if (!secondMdct)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         bandLogE2[i] = bandLogE[i];
+   }
+
+   /* Last chance to catch any transient we might have missed in the
+      time-domain analysis */
+   if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe)
+   {
+      if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C))
+      {
+         isTransient = 1;
+         shortBlocks = M;
+         compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+         compute_band_energies(mode, freq, bandE, effEnd, C, M);
+         amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+         /* Compensate for the scaling of short vs long mdcts */
+         for (i=0;i<C*nbEBands;i++)
+            bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+         tf_estimate = QCONST16(.2f,14);
+      }
+   }
+
+   if (LM>0 && ec_tell(enc)+3<=total_bits)
+      ec_enc_bit_logp(enc, isTransient, 3);
+
+   ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
+
+   /* Band normalisation */
+   normalise_bands(mode, freq, X, bandE, effEnd, C, M);
+
+   ALLOC(tf_res, nbEBands, int);
+   /* Disable variable tf resolution for hybrid and at very low bitrate */
+   if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2 && !st->lfe)
+   {
+      int lambda;
+      if (effectiveBytes<40)
+         lambda = 12;
+      else if (effectiveBytes<60)
+         lambda = 6;
+      else if (effectiveBytes<100)
+         lambda = 4;
+      else
+         lambda = 3;
+      lambda*=2;
+      tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan);
+      for (i=effEnd;i<st->end;i++)
+         tf_res[i] = tf_res[effEnd-1];
+   } else {
+      tf_sum = 0;
+      for (i=0;i<st->end;i++)
+         tf_res[i] = isTransient;
+      tf_select=0;
+   }
+
+   ALLOC(error, C*nbEBands, opus_val16);
+   quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE,
+         oldBandE, total_bits, error, enc,
+         C, LM, nbAvailableBytes, st->force_intra,
+         &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe);
+
+   tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
+
+   if (ec_tell(enc)+4<=total_bits)
+   {
+      if (st->lfe)
+      {
+         st->tapset_decision = 0;
+         st->spread_decision = SPREAD_NORMAL;
+      } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0)
+      {
+         if (st->complexity == 0)
+            st->spread_decision = SPREAD_NONE;
+         else
+            st->spread_decision = SPREAD_NORMAL;
+      } else {
+         /* Disable new spreading+tapset estimator until we can show it works
+            better than the old one. So far it seems like spreading_decision()
+            works best. */
+#if 0
+         if (st->analysis.valid)
+         {
+            static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
+            static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
+            static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
+            static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
+            st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
+            st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
+         } else
+#endif
+         {
+            st->spread_decision = spreading_decision(mode, X,
+                  &st->tonal_average, st->spread_decision, &st->hf_average,
+                  &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
+         }
+         /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
+         /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
+      }
+      ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
+   }
+
+   ALLOC(offsets, nbEBands, int);
+
+   maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
+         st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
+         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
+   /* For LFE, everything interesting is in the first band */
+   if (st->lfe)
+      offsets[0] = IMIN(8, effectiveBytes/3);
+   ALLOC(cap, nbEBands, int);
+   init_caps(mode,cap,LM,C);
+
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   total_boost = 0;
+   tell = ec_tell_frac(enc);
+   for (i=st->start;i<st->end;i++)
+   {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      int j;
+      width = C*(eBands[i+1]-eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
+            && boost < cap[i]; j++)
+      {
+         int flag;
+         flag = j<offsets[i];
+         ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
+         tell = ec_tell_frac(enc);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_boost += quanta;
+         dynalloc_loop_logp = 1;
+      }
+      /* Making dynalloc more likely */
+      if (j)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+      offsets[i] = boost;
+   }
+
+   if (C==2)
+   {
+      static const opus_val16 intensity_thresholds[21]=
+      /* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19  20  off*/
+        {  1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134};
+      static const opus_val16 intensity_histeresis[21]=
+        {  1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6,  8, 8};
+
+      /* Always use MS for 2.5 ms frames until we can do a better analysis */
+      if (LM!=0)
+         dual_stereo = stereo_analysis(mode, X, LM, N);
+
+      st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000),
+            intensity_thresholds, intensity_histeresis, 21, st->intensity);
+      st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
+   }
+
+   alloc_trim = 5;
+   if (tell+(6<<BITRES) <= total_bits - total_boost)
+   {
+      if (st->lfe)
+         alloc_trim = 5;
+      else
+         alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
+            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
+      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
+      tell = ec_tell_frac(enc);
+   }
+
+   /* Variable bitrate */
+   if (vbr_rate>0)
+   {
+     opus_val16 alpha;
+     opus_int32 delta;
+     /* The target rate in 8th bits per frame */
+     opus_int32 target, base_target;
+     opus_int32 min_allowed;
+     int lm_diff = mode->maxLM - LM;
+
+     /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
+        The CELT allocator will just not be able to use more than that anyway. */
+     nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
+     base_target = vbr_rate - ((40*C+20)<<BITRES);
+
+     if (st->constrained_vbr)
+        base_target += (st->vbr_offset>>lm_diff);
+
+     target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
+           st->lastCodedBands, C, st->intensity, st->constrained_vbr,
+           st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
+           st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking,
+           temporal_vbr);
+
+     /* The current offset is removed from the target and the space used
+        so far is added*/
+     target=target+tell;
+     /* In VBR mode the frame size must not be reduced so much that it would
+         result in the encoder running out of bits.
+        The margin of 2 bytes ensures that none of the bust-prevention logic
+         in the decoder will have triggered so far. */
+     min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes;
+
+     nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
+     nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
+     nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
+
+     /* By how much did we "miss" the target on that frame */
+     delta = target - vbr_rate;
+
+     target=nbAvailableBytes<<(BITRES+3);
+
+     /*If the frame is silent we don't adjust our drift, otherwise
+       the encoder will shoot to very high rates after hitting a
+       span of silence, but we do allow the bitres to refill.
+       This means that we'll undershoot our target in CVBR/VBR modes
+       on files with lots of silence. */
+     if(silence)
+     {
+       nbAvailableBytes = 2;
+       target = 2*8<<BITRES;
+       delta = 0;
+     }
+
+     if (st->vbr_count < 970)
+     {
+        st->vbr_count++;
+        alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
+     } else
+        alpha = QCONST16(.001f,15);
+     /* How many bits have we used in excess of what we're allowed */
+     if (st->constrained_vbr)
+        st->vbr_reservoir += target - vbr_rate;
+     /*printf ("%d\n", st->vbr_reservoir);*/
+
+     /* Compute the offset we need to apply in order to reach the target */
+     if (st->constrained_vbr)
+     {
+        st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
+        st->vbr_offset = -st->vbr_drift;
+     }
+     /*printf ("%d\n", st->vbr_drift);*/
+
+     if (st->constrained_vbr && st->vbr_reservoir < 0)
+     {
+        /* We're under the min value -- increase rate */
+        int adjust = (-st->vbr_reservoir)/(8<<BITRES);
+        /* Unless we're just coding silence */
+        nbAvailableBytes += silence?0:adjust;
+        st->vbr_reservoir = 0;
+        /*printf ("+%d\n", adjust);*/
+     }
+     nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
+     /*printf("%d\n", nbCompressedBytes*50*8);*/
+     /* This moves the raw bits to take into account the new compressed size */
+     ec_enc_shrink(enc, nbCompressedBytes);
+   }
+
+   /* Bit allocation */
+   ALLOC(fine_quant, nbEBands, int);
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+
+   /* bits =           packet size                    - where we are - safety*/
+   bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+   signalBandwidth = st->end-1;
+#ifndef DISABLE_FLOAT_API
+   if (st->analysis.valid)
+   {
+      int min_bandwidth;
+      if (equiv_rate < (opus_int32)32000*C)
+         min_bandwidth = 13;
+      else if (equiv_rate < (opus_int32)48000*C)
+         min_bandwidth = 16;
+      else if (equiv_rate < (opus_int32)60000*C)
+         min_bandwidth = 18;
+      else  if (equiv_rate < (opus_int32)80000*C)
+         min_bandwidth = 19;
+      else
+         min_bandwidth = 20;
+      signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth);
+   }
+#endif
+   if (st->lfe)
+      signalBandwidth = 1;
+   codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+         alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
+   if (st->lastCodedBands)
+      st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands));
+   else
+      st->lastCodedBands = codedBands;
+
+   quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
+
+   /* Residual quantisation */
+   ALLOC(collapse_masks, C*nbEBands, unsigned char);
+   quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+         bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res,
+         nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
+
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = st->consec_transient<2;
+#ifdef FUZZING
+      anti_collapse_on = rand()&0x1;
+#endif
+      ec_enc_bits(enc, anti_collapse_on, 1);
+   }
+   quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
+
+   if (silence)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+   }
+
+#ifdef RESYNTH
+   /* Re-synthesis of the coded audio if required */
+   {
+      celt_sig *out_mem[2];
+
+      if (anti_collapse_on)
+      {
+         anti_collapse(mode, X, collapse_masks, LM, C, N,
+               st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+      }
+
+      if (silence)
+      {
+         for (i=0;i<C*N;i++)
+            freq[i] = 0;
+      } else {
+         /* Synthesis */
+         denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+      }
+
+      c=0; do {
+         OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
+      } while (++c<CC);
+
+      if (CC==2&&C==1)
+      {
+         for (i=0;i<N;i++)
+            freq[N+i] = freq[i];
+      }
+
+      c=0; do {
+         out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
+      } while (++c<CC);
+
+      compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM);
+
+      c=0; do {
+         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+         st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
+         comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
+               st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
+               mode->window, st->overlap);
+         if (LM!=0)
+            comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
+                  st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
+                  mode->window, overlap);
+      } while (++c<CC);
+
+      /* We reuse freq[] as scratch space for the de-emphasis */
+      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq);
+      st->prefilter_period_old = st->prefilter_period;
+      st->prefilter_gain_old = st->prefilter_gain;
+      st->prefilter_tapset_old = st->prefilter_tapset;
+   }
+#endif
+
+   st->prefilter_period = pitch_index;
+   st->prefilter_gain = gain1;
+   st->prefilter_tapset = prefilter_tapset;
+#ifdef RESYNTH
+   if (LM!=0)
+   {
+      st->prefilter_period_old = st->prefilter_period;
+      st->prefilter_gain_old = st->prefilter_gain;
+      st->prefilter_tapset_old = st->prefilter_tapset;
+   }
+#endif
+
+   if (CC==2&&C==1) {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[nbEBands+i]=oldBandE[i];
+   }
+
+   if (!isTransient)
+   {
+      for (i=0;i<CC*nbEBands;i++)
+         oldLogE2[i] = oldLogE[i];
+      for (i=0;i<CC*nbEBands;i++)
+         oldLogE[i] = oldBandE[i];
+   } else {
+      for (i=0;i<CC*nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   /* In case start or end were to change */
+   c=0; do
+   {
+      for (i=0;i<st->start;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      for (i=st->end;i<nbEBands;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+   } while (++c<CC);
+
+   if (isTransient || transient_got_disabled)
+      st->consec_transient++;
+   else
+      st->consec_transient=0;
+   st->rng = enc->rng;
+
+   /* If there's any room left (can only happen for very high rates),
+      it's already filled with zeros */
+   ec_enc_done(enc);
+
+#ifdef CUSTOM_MODES
+   if (st->signalling)
+      nbCompressedBytes++;
+#endif
+
+   RESTORE_STACK;
+   if (ec_get_error(enc))
+      return OPUS_INTERNAL_ERROR;
+   else
+      return nbCompressedBytes;
+}
+
+
+#ifdef CUSTOM_MODES
+
+#ifdef FIXED_POINT
+int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   int j, ret, C, N;
+   VARDECL(opus_int16, in);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+   ALLOC(in, C*N, opus_int16);
+
+   for (j=0;j<C*N;j++)
+     in[j] = FLOAT2INT16(pcm[j]);
+
+   ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
+#ifdef RESYNTH
+   for (j=0;j<C*N;j++)
+      ((float*)pcm)[j]=in[j]*(1.f/32768.f);
+#endif
+   RESTORE_STACK;
+   return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+#else
+
+int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   int j, ret, C, N;
+   VARDECL(celt_sig, in);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C=st->channels;
+   N=frame_size;
+   ALLOC(in, C*N, celt_sig);
+   for (j=0;j<C*N;j++) {
+     in[j] = SCALEOUT(pcm[j]);
+   }
+
+   ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
+#ifdef RESYNTH
+   for (j=0;j<C*N;j++)
+      ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]);
+#endif
+   RESTORE_STACK;
+   return ret;
+}
+
+int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
+}
+
+#endif
+
+#endif /* CUSTOM_MODES */
+
+int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
+{
+   va_list ap;
+
+   va_start(ap, request);
+   switch (request)
+   {
+      case OPUS_SET_COMPLEXITY_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>10)
+            goto bad_arg;
+         st->complexity = value;
+      }
+      break;
+      case CELT_SET_START_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<0 || value>=st->mode->nbEBands)
+            goto bad_arg;
+         st->start = value;
+      }
+      break;
+      case CELT_SET_END_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>st->mode->nbEBands)
+            goto bad_arg;
+         st->end = value;
+      }
+      break;
+      case CELT_SET_PREDICTION_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>2)
+            goto bad_arg;
+         st->disable_pf = value<=1;
+         st->force_intra = value==0;
+      }
+      break;
+      case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>100)
+            goto bad_arg;
+         st->loss_rate = value;
+      }
+      break;
+      case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->constrained_vbr = value;
+      }
+      break;
+      case OPUS_SET_VBR_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->vbr = value;
+      }
+      break;
+      case OPUS_SET_BITRATE_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<=500 && value!=OPUS_BITRATE_MAX)
+            goto bad_arg;
+         value = IMIN(value, 260000*st->channels);
+         st->bitrate = value;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
+      case OPUS_SET_LSB_DEPTH_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          if (value<8 || value>24)
+             goto bad_arg;
+          st->lsb_depth=value;
+      }
+      break;
+      case OPUS_GET_LSB_DEPTH_REQUEST:
+      {
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          *value=st->lsb_depth;
+      }
+      break;
+      case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          st->variable_duration = value;
+      }
+      break;
+      case OPUS_RESET_STATE:
+      {
+         int i;
+         opus_val16 *oldBandE, *oldLogE, *oldLogE2;
+         oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD));
+         oldLogE = oldBandE + st->channels*st->mode->nbEBands;
+         oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
+         OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
+               opus_custom_encoder_get_size(st->mode, st->channels)-
+               ((char*)&st->ENCODER_RESET_START - (char*)st));
+         for (i=0;i<st->channels*st->mode->nbEBands;i++)
+            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+         st->vbr_offset = 0;
+         st->delayedIntra = 1;
+         st->spread_decision = SPREAD_NORMAL;
+         st->tonal_average = 256;
+         st->hf_average = 0;
+         st->tapset_decision = 0;
+      }
+      break;
+#ifdef CUSTOM_MODES
+      case CELT_SET_INPUT_CLIPPING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->clip = value;
+      }
+      break;
+#endif
+      case CELT_SET_SIGNALLING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->signalling = value;
+      }
+      break;
+      case CELT_SET_ANALYSIS_REQUEST:
+      {
+         AnalysisInfo *info = va_arg(ap, AnalysisInfo *);
+         if (info)
+            OPUS_COPY(&st->analysis, info, 1);
+      }
+      break;
+      case CELT_GET_MODE_REQUEST:
+      {
+         const CELTMode ** value = va_arg(ap, const CELTMode**);
+         if (value==0)
+            goto bad_arg;
+         *value=st->mode;
+      }
+      break;
+      case OPUS_GET_FINAL_RANGE_REQUEST:
+      {
+         opus_uint32 * value = va_arg(ap, opus_uint32 *);
+         if (value==0)
+            goto bad_arg;
+         *value=st->rng;
+      }
+      break;
+      case OPUS_SET_LFE_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          st->lfe = value;
+      }
+      break;
+      case OPUS_SET_ENERGY_MASK_REQUEST:
+      {
+          opus_val16 *value = va_arg(ap, opus_val16*);
+          st->energy_mask = value;
+      }
+      break;
+      default:
+         goto bad_request;
+   }
+   va_end(ap);
+   return OPUS_OK;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+bad_request:
+   va_end(ap);
+   return OPUS_UNIMPLEMENTED;
+}
diff --git a/src/main/jni/opus/celt/celt_lpc.c b/src/main/jni/opus/celt/celt_lpc.c
new file mode 100644
index 000000000..fa29d626e
--- /dev/null
+++ b/src/main/jni/opus/celt/celt_lpc.c
@@ -0,0 +1,309 @@
+/* Copyright (c) 2009-2010 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+void _celt_lpc(
+      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
+const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
+int          p
+)
+{
+   int i, j;
+   opus_val32 r;
+   opus_val32 error = ac[0];
+#ifdef FIXED_POINT
+   opus_val32 lpc[LPC_ORDER];
+#else
+   float *lpc = _lpc;
+#endif
+
+   for (i = 0; i < p; i++)
+      lpc[i] = 0;
+   if (ac[0] != 0)
+   {
+      for (i = 0; i < p; i++) {
+         /* Sum up this iteration's reflection coefficient */
+         opus_val32 rr = 0;
+         for (j = 0; j < i; j++)
+            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+         rr += SHR32(ac[i + 1],3);
+         r = -frac_div32(SHL32(rr,3), error);
+         /*  Update LPC coefficients and total error */
+         lpc[i] = SHR32(r,3);
+         for (j = 0; j < (i+1)>>1; j++)
+         {
+            opus_val32 tmp1, tmp2;
+            tmp1 = lpc[j];
+            tmp2 = lpc[i-1-j];
+            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
+            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
+         }
+
+         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
+         /* Bail out once we get 30 dB gain */
+#ifdef FIXED_POINT
+         if (error<SHR32(ac[0],10))
+            break;
+#else
+         if (error<.001f*ac[0])
+            break;
+#endif
+      }
+   }
+#ifdef FIXED_POINT
+   for (i=0;i<p;i++)
+      _lpc[i] = ROUND16(lpc[i],16);
+#endif
+}
+
+void celt_fir(const opus_val16 *_x,
+         const opus_val16 *num,
+         opus_val16 *_y,
+         int N,
+         int ord,
+         opus_val16 *mem)
+{
+   int i,j;
+   VARDECL(opus_val16, rnum);
+   VARDECL(opus_val16, x);
+   SAVE_STACK;
+
+   ALLOC(rnum, ord, opus_val16);
+   ALLOC(x, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   for(i=0;i<ord;i++)
+      x[i] = mem[ord-i-1];
+   for (i=0;i<N;i++)
+      x[i+ord]=_x[i];
+   for(i=0;i<ord;i++)
+      mem[i] = _x[N-i-1];
+#ifdef SMALL_FOOTPRINT
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
+      for (j=0;j<ord;j++)
+      {
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      }
+      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
+   }
+#else
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(rnum, x+i, sum, ord);
+      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
+      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
+      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
+      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+#endif
+   RESTORE_STACK;
+}
+
+void celt_iir(const opus_val32 *_x,
+         const opus_val16 *den,
+         opus_val32 *_y,
+         int N,
+         int ord,
+         opus_val16 *mem)
+{
+#ifdef SMALL_FOOTPRINT
+   int i,j;
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+      {
+         sum -= MULT16_16(den[j],mem[j]);
+      }
+      for (j=ord-1;j>=1;j--)
+      {
+         mem[j]=mem[j-1];
+      }
+      mem[0] = ROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+#else
+   int i,j;
+   VARDECL(opus_val16, rden);
+   VARDECL(opus_val16, y);
+   SAVE_STACK;
+
+   celt_assert((ord&3)==0);
+   ALLOC(rden, ord, opus_val16);
+   ALLOC(y, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rden[i] = den[ord-i-1];
+   for(i=0;i<ord;i++)
+      y[i] = -mem[ord-i-1];
+   for(;i<N+ord;i++)
+      y[i]=0;
+   for (i=0;i<N-3;i+=4)
+   {
+      /* Unroll by 4 as if it were an FIR filter */
+      opus_val32 sum[4];
+      sum[0]=_x[i];
+      sum[1]=_x[i+1];
+      sum[2]=_x[i+2];
+      sum[3]=_x[i+3];
+      xcorr_kernel(rden, y+i, sum, ord);
+
+      /* Patch up the result to compensate for the fact that this is an IIR */
+      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
+      _y[i  ] = sum[0];
+      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
+      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
+      _y[i+1] = sum[1];
+      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
+      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
+      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
+      _y[i+2] = sum[2];
+
+      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
+      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
+      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
+      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
+      _y[i+3] = sum[3];
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+         sum -= MULT16_16(rden[j],y[i+j]);
+      y[i+ord] = ROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+   for(i=0;i<ord;i++)
+      mem[i] = _y[N-i-1];
+   RESTORE_STACK;
+#endif
+}
+
+int _celt_autocorr(
+                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
+                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
+                   const opus_val16       *window,
+                   int          overlap,
+                   int          lag,
+                   int          n,
+                   int          arch
+                  )
+{
+   opus_val32 d;
+   int i, k;
+   int fastN=n-lag;
+   int shift;
+   const opus_val16 *xptr;
+   VARDECL(opus_val16, xx);
+   SAVE_STACK;
+   ALLOC(xx, n, opus_val16);
+   celt_assert(n>0);
+   celt_assert(overlap>=0);
+   if (overlap == 0)
+   {
+      xptr = x;
+   } else {
+      for (i=0;i<n;i++)
+         xx[i] = x[i];
+      for (i=0;i<overlap;i++)
+      {
+         xx[i] = MULT16_16_Q15(x[i],window[i]);
+         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+      }
+      xptr = xx;
+   }
+   shift=0;
+#ifdef FIXED_POINT
+   {
+      opus_val32 ac0;
+      ac0 = 1+(n<<7);
+      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
+      for(i=(n&1);i<n;i+=2)
+      {
+         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
+         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
+      }
+
+      shift = celt_ilog2(ac0)-30+10;
+      shift = (shift)/2;
+      if (shift>0)
+      {
+         for(i=0;i<n;i++)
+            xx[i] = PSHR32(xptr[i], shift);
+         xptr = xx;
+      } else
+         shift = 0;
+   }
+#endif
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
+   for (k=0;k<=lag;k++)
+   {
+      for (i = k+fastN, d = 0; i < n; i++)
+         d = MAC16_16(d, xptr[i], xptr[i-k]);
+      ac[k] += d;
+   }
+#ifdef FIXED_POINT
+   shift = 2*shift;
+   if (shift<=0)
+      ac[0] += SHL32((opus_int32)1, -shift);
+   if (ac[0] < 268435456)
+   {
+      int shift2 = 29 - EC_ILOG(ac[0]);
+      for (i=0;i<=lag;i++)
+         ac[i] = SHL32(ac[i], shift2);
+      shift -= shift2;
+   } else if (ac[0] >= 536870912)
+   {
+      int shift2=1;
+      if (ac[0] >= 1073741824)
+         shift2++;
+      for (i=0;i<=lag;i++)
+         ac[i] = SHR32(ac[i], shift2);
+      shift += shift2;
+   }
+#endif
+
+   RESTORE_STACK;
+   return shift;
+}
diff --git a/src/main/jni/opus/celt/celt_lpc.h b/src/main/jni/opus/celt/celt_lpc.h
new file mode 100644
index 000000000..dc2a0a3d2
--- /dev/null
+++ b/src/main/jni/opus/celt/celt_lpc.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2009-2010 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PLC_H
+#define PLC_H
+
+#include "arch.h"
+
+#define LPC_ORDER 24
+
+void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
+
+void celt_fir(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         opus_val16 *mem);
+
+void celt_iir(const opus_val32 *x,
+         const opus_val16 *den,
+         opus_val32 *y,
+         int N,
+         int ord,
+         opus_val16 *mem);
+
+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
+         const opus_val16 *window, int overlap, int lag, int n, int arch);
+
+#endif /* PLC_H */
diff --git a/src/main/jni/opus/celt/cpu_support.h b/src/main/jni/opus/celt/cpu_support.h
new file mode 100644
index 000000000..d68dbe62c
--- /dev/null
+++ b/src/main/jni/opus/celt/cpu_support.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
+#include "arm/armcpu.h"
+
+/* We currently support 4 ARM variants:
+ * arch[0] -> ARMv4
+ * arch[1] -> ARMv5E
+ * arch[2] -> ARMv6
+ * arch[3] -> NEON
+ */
+#define OPUS_ARCHMASK 3
+
+#else
+#define OPUS_ARCHMASK 0
+
+static OPUS_INLINE int opus_select_arch(void)
+{
+  return 0;
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/celt/cwrs.c b/src/main/jni/opus/celt/cwrs.c
new file mode 100644
index 000000000..ad980cc7d
--- /dev/null
+++ b/src/main/jni/opus/celt/cwrs.c
@@ -0,0 +1,697 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2007-2009 Timothy B. Terriberry
+   Written by Timothy B. Terriberry and Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "os_support.h"
+#include "cwrs.h"
+#include "mathops.h"
+#include "arch.h"
+
+#ifdef CUSTOM_MODES
+
+/*Guaranteed to return a conservatively large estimate of the binary logarithm
+   with frac bits of fractional precision.
+  Tested for all possible 32-bit inputs with frac=4, where the maximum
+   overestimation is 0.06254243 bits.*/
+int log2_frac(opus_uint32 val, int frac)
+{
+  int l;
+  l=EC_ILOG(val);
+  if(val&(val-1)){
+    /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
+       before the shift would cause overflow (e.g., for 0xFFFFxxxx).
+       Doesn't work for val=0, but that case fails the test above.*/
+    if(l>16)val=((val-1)>>(l-16))+1;
+    else val<<=16-l;
+    l=(l-1)<<frac;
+    /*Note that we always need one iteration, since the rounding up above means
+       that we might need to adjust the integer part of the logarithm.*/
+    do{
+      int b;
+      b=(int)(val>>16);
+      l+=b<<frac;
+      val=(val+b)>>b;
+      val=(val*val+0x7FFF)>>15;
+    }
+    while(frac-->0);
+    /*If val is not exactly 0x8000, then we have to round up the remainder.*/
+    return l+(val>0x8000);
+  }
+  /*Exact powers of two require no rounding.*/
+  else return (l-1)<<frac;
+}
+#endif
+
+/*Although derived separately, the pulse vector coding scheme is equivalent to
+   a Pyramid Vector Quantizer \cite{Fis86}.
+  Some additional notes about an early version appear at
+   http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
+   and the definitions of some terms have evolved since that was written.
+
+  The conversion from a pulse vector to an integer index (encoding) and back
+   (decoding) is governed by two related functions, V(N,K) and U(N,K).
+
+  V(N,K) = the number of combinations, with replacement, of N items, taken K
+   at a time, when a sign bit is added to each item taken at least once (i.e.,
+   the number of N-dimensional unit pulse vectors with K pulses).
+  One way to compute this is via
+    V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
+   where choose() is the binomial function.
+  A table of values for N<10 and K<10 looks like:
+  V[10][10] = {
+    {1,  0,   0,    0,    0,     0,     0,      0,      0,       0},
+    {1,  2,   2,    2,    2,     2,     2,      2,      2,       2},
+    {1,  4,   8,   12,   16,    20,    24,     28,     32,      36},
+    {1,  6,  18,   38,   66,   102,   146,    198,    258,     326},
+    {1,  8,  32,   88,  192,   360,   608,    952,   1408,    1992},
+    {1, 10,  50,  170,  450,  1002,  1970,   3530,   5890,    9290},
+    {1, 12,  72,  292,  912,  2364,  5336,  10836,  20256,   35436},
+    {1, 14,  98,  462, 1666,  4942, 12642,  28814,  59906,  115598},
+    {1, 16, 128,  688, 2816,  9424, 27008,  68464, 157184,  332688},
+    {1, 18, 162,  978, 4482, 16722, 53154, 148626, 374274,  864146}
+  };
+
+  U(N,K) = the number of such combinations wherein N-1 objects are taken at
+   most K-1 at a time.
+  This is given by
+    U(N,K) = sum(k=0...K-1,V(N-1,k))
+           = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
+  The latter expression also makes clear that U(N,K) is half the number of such
+   combinations wherein the first object is taken at least once.
+  Although it may not be clear from either of these definitions, U(N,K) is the
+   natural function to work with when enumerating the pulse vector codebooks,
+   not V(N,K).
+  U(N,K) is not well-defined for N=0, but with the extension
+    U(0,K) = K>0 ? 0 : 1,
+   the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
+  U[10][10] = {
+    {1, 0,  0,   0,    0,    0,     0,     0,      0,      0},
+    {0, 1,  1,   1,    1,    1,     1,     1,      1,      1},
+    {0, 1,  3,   5,    7,    9,    11,    13,     15,     17},
+    {0, 1,  5,  13,   25,   41,    61,    85,    113,    145},
+    {0, 1,  7,  25,   63,  129,   231,   377,    575,    833},
+    {0, 1,  9,  41,  129,  321,   681,  1289,   2241,   3649},
+    {0, 1, 11,  61,  231,  681,  1683,  3653,   7183,  13073},
+    {0, 1, 13,  85,  377, 1289,  3653,  8989,  19825,  40081},
+    {0, 1, 15, 113,  575, 2241,  7183, 19825,  48639, 108545},
+    {0, 1, 17, 145,  833, 3649, 13073, 40081, 108545, 265729}
+  };
+
+  With this extension, V(N,K) may be written in terms of U(N,K):
+    V(N,K) = U(N,K) + U(N,K+1)
+   for all N>=0, K>=0.
+  Thus U(N,K+1) represents the number of combinations where the first element
+   is positive or zero, and U(N,K) represents the number of combinations where
+   it is negative.
+  With a large enough table of U(N,K) values, we could write O(N) encoding
+   and O(min(N*log(K),N+K)) decoding routines, but such a table would be
+   prohibitively large for small embedded devices (K may be as large as 32767
+   for small N, and N may be as large as 200).
+
+  Both functions obey the same recurrence relation:
+    V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
+    U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
+   for all N>0, K>0, with different initial conditions at N=0 or K=0.
+  This allows us to construct a row of one of the tables above given the
+   previous row or the next row.
+  Thus we can derive O(NK) encoding and decoding routines with O(K) memory
+   using only addition and subtraction.
+
+  When encoding, we build up from the U(2,K) row and work our way forwards.
+  When decoding, we need to start at the U(N,K) row and work our way backwards,
+   which requires a means of computing U(N,K).
+  U(N,K) may be computed from two previous values with the same N:
+    U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
+   for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
+   previous values with the same K:
+    U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
+   for all K>1.
+  This allows us to construct an arbitrary row of the U(N,K) table by starting
+   with the first two values, which are constants.
+  This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
+   multiplications.
+  Similar relations can be derived for V(N,K), but are not used here.
+
+  For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
+   polynomial for fixed N.
+  The first few are
+    U(1,K) = 1,
+    U(2,K) = 2*K-1,
+    U(3,K) = (2*K-2)*K+1,
+    U(4,K) = (((4*K-6)*K+8)*K-3)/3,
+    U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
+   and
+    V(1,K) = 2,
+    V(2,K) = 4*K,
+    V(3,K) = 4*K*K+2,
+    V(4,K) = 8*(K*K+2)*K/3,
+    V(5,K) = ((4*K*K+20)*K*K+6)/3,
+   for all K>0.
+  This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
+   small N (and indeed decoding is also O(N) for N<3).
+
+  @ARTICLE{Fis86,
+    author="Thomas R. Fischer",
+    title="A Pyramid Vector Quantizer",
+    journal="IEEE Transactions on Information Theory",
+    volume="IT-32",
+    number=4,
+    pages="568--583",
+    month=Jul,
+    year=1986
+  }*/
+
+#if !defined(SMALL_FOOTPRINT)
+
+/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/
+# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)])
+/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N
+   with K pulses allocated to it.*/
+# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1))
+
+/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)).
+  Thus, the number of entries in row I is the larger of the maximum number of
+   pulses we will ever allocate for a given N=I (K=128, or however many fit in
+   32 bits, whichever is smaller), plus one, and the maximum N for which
+   K=I-1 pulses fit in 32 bits.
+  The largest band size in an Opus Custom mode is 208.
+  Otherwise, we can limit things to the set of N which can be achieved by
+   splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48,
+   44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/
+#if defined(CUSTOM_MODES)
+static const opus_uint32 CELT_PVQ_U_DATA[1488]={
+#else
+static const opus_uint32 CELT_PVQ_U_DATA[1272]={
+#endif
+  /*N=0, K=0...176:*/
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0,
+#endif
+  /*N=1, K=1...176:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1,
+#endif
+  /*N=2, K=2...176:*/
+  3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
+  43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+  81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113,
+  115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143,
+  145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173,
+  175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203,
+  205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233,
+  235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263,
+  265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293,
+  295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323,
+  325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381,
+  383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411,
+  413, 415,
+#endif
+  /*N=3, K=3...176:*/
+  13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613,
+  685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861,
+  1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785,
+  3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385,
+  6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661,
+  9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961,
+  13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745,
+  17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013,
+  21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765,
+  26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001,
+  31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721,
+  37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925,
+  43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613,
+  50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785,
+  57461, 58141, 58825, 59513, 60205, 60901, 61601,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565,
+  70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013,
+  78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113,
+#endif
+  /*N=4, K=4...176:*/
+  63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017,
+  7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775,
+  30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153,
+  82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193,
+  161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575,
+  267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217,
+  410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951,
+  597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609,
+  833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023,
+  1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407,
+  1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759,
+  1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175,
+  2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751,
+  2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583,
+  3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767,
+  3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399,
+  4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575,
+  5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391,
+  6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943,
+  7085049, 7207551,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783,
+  8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967,
+  9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199,
+  10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177,
+  11912575,
+#endif
+  /*N=5, K=5...176:*/
+  321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041,
+  50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401,
+  330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241,
+  1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241,
+  2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801,
+  4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849,
+  8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849,
+  13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809,
+  20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881,
+  29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641,
+  40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081,
+  55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609,
+  73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049,
+  95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641,
+  122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041,
+  155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321,
+  193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969,
+  238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889,
+  290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401,
+  351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241,
+  420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561,
+  500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929,
+  590359041, 604167209, 618216201, 632508801,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241,
+  755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161,
+  878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329,
+  1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041,
+  1143412929, 1166053121, 1189027881, 1212340489, 1235994241,
+#endif
+  /*N=6, K=6...96:*/
+  1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047,
+  335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409,
+  2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793,
+  11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455,
+  29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189,
+  64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651,
+  128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185,
+  235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647,
+  402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229,
+  655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283,
+  1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135,
+  1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187,
+  2011371957, 2120032959,
+#if defined(CUSTOM_MODES)
+  /*...109:*/
+  2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U,
+  3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U,
+  4012305913U,
+#endif
+  /*N=7, K=7...54*/
+  8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777,
+  1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233,
+  19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013,
+  88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805,
+  292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433,
+  793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821,
+  1667010073, 1870535785, 2094367717,
+#if defined(CUSTOM_MODES)
+  /*...60:*/
+  2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U,
+#endif
+  /*N=8, K=8...37*/
+  48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767,
+  9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017,
+  104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351,
+  638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615,
+  2229491905U,
+#if defined(CUSTOM_MODES)
+  /*...40:*/
+  2691463695U, 3233240945U, 3866006015U,
+#endif
+  /*N=9, K=9...28:*/
+  265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777,
+  39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145,
+  628496897, 872893441, 1196924561, 1621925137, 2173806145U,
+#if defined(CUSTOM_MODES)
+  /*...29:*/
+  2883810113U,
+#endif
+  /*N=10, K=10...24:*/
+  1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073,
+  254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U,
+  3375210671U,
+  /*N=11, K=11...19:*/
+  8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585,
+  948062325, 1616336765,
+#if defined(CUSTOM_MODES)
+  /*...20:*/
+  2684641785U,
+#endif
+  /*N=12, K=12...18:*/
+  45046719, 103274625, 224298231, 464387817, 921406335, 1759885185,
+  3248227095U,
+  /*N=13, K=13...16:*/
+  251595969, 579168825, 1267854873, 2653649025U,
+  /*N=14, K=14:*/
+  1409933619
+};
+
+#if defined(CUSTOM_MODES)
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
+  CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
+  CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
+  CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455,
+  CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
+};
+#else
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
+  CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
+  CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
+  CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240,
+  CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257
+};
+#endif
+
+#if defined(CUSTOM_MODES)
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+  int k;
+  /*_maxk==0 => there's nothing to do.*/
+  celt_assert(_maxk>0);
+  _bits[0]=0;
+  for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac);
+}
+#endif
+
+static opus_uint32 icwrs(int _n,const int *_y){
+  opus_uint32 i;
+  int         j;
+  int         k;
+  celt_assert(_n>=2);
+  j=_n-1;
+  i=_y[j]<0;
+  k=abs(_y[j]);
+  do{
+    j--;
+    i+=CELT_PVQ_U(_n-j,k);
+    k+=abs(_y[j]);
+    if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1);
+  }
+  while(j>0);
+  return i;
+}
+
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
+  celt_assert(_k>0);
+  ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
+}
+
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
+  opus_uint32 p;
+  int         s;
+  int         k0;
+  celt_assert(_k>0);
+  celt_assert(_n>1);
+  while(_n>2){
+    opus_uint32 q;
+    /*Lots of pulses case:*/
+    if(_k>=_n){
+      const opus_uint32 *row;
+      row=CELT_PVQ_U_ROW[_n];
+      /*Are the pulses in this dimension negative?*/
+      p=row[_k+1];
+      s=-(_i>=p);
+      _i-=p&s;
+      /*Count how many pulses were placed in this dimension.*/
+      k0=_k;
+      q=row[_n];
+      if(q>_i){
+        celt_assert(p>q);
+        _k=_n;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+      }
+      else for(p=row[_k];p>_i;p=row[_k])_k--;
+      _i-=p;
+      *_y++=(k0-_k+s)^s;
+    }
+    /*Lots of dimensions case:*/
+    else{
+      /*Are there any pulses in this dimension at all?*/
+      p=CELT_PVQ_U_ROW[_k][_n];
+      q=CELT_PVQ_U_ROW[_k+1][_n];
+      if(p<=_i&&_i<q){
+        _i-=p;
+        *_y++=0;
+      }
+      else{
+        /*Are the pulses in this dimension negative?*/
+        s=-(_i>=q);
+        _i-=q&s;
+        /*Count how many pulses were placed in this dimension.*/
+        k0=_k;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+        _i-=p;
+        *_y++=(k0-_k+s)^s;
+      }
+    }
+    _n--;
+  }
+  /*_n==2*/
+  p=2*_k+1;
+  s=-(_i>=p);
+  _i-=p&s;
+  k0=_k;
+  _k=(_i+1)>>1;
+  if(_k)_i-=2*_k-1;
+  *_y++=(k0-_k+s)^s;
+  /*_n==1*/
+  s=-(int)_i;
+  *_y=(_k+s)^s;
+}
+
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+  cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
+}
+
+#else /* SMALL_FOOTPRINT */
+
+/*Computes the next row/column of any recurrence that obeys the relation
+   u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
+  _ui0 is the base case for the new row/column.*/
+static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
+  opus_uint32 ui1;
+  unsigned      j;
+  /*This do-while will overrun the array if we don't have storage for at least
+     2 values.*/
+  j=1; do {
+    ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
+    _ui[j-1]=_ui0;
+    _ui0=ui1;
+  } while (++j<_len);
+  _ui[j-1]=_ui0;
+}
+
+/*Computes the previous row/column of any recurrence that obeys the relation
+   u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
+  _ui0 is the base case for the new row/column.*/
+static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
+  opus_uint32 ui1;
+  unsigned      j;
+  /*This do-while will overrun the array if we don't have storage for at least
+     2 values.*/
+  j=1; do {
+    ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
+    _ui[j-1]=_ui0;
+    _ui0=ui1;
+  } while (++j<_n);
+  _ui[j-1]=_ui0;
+}
+
+/*Compute V(_n,_k), as well as U(_n,0..._k+1).
+  _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
+static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
+  opus_uint32 um2;
+  unsigned      len;
+  unsigned      k;
+  len=_k+2;
+  /*We require storage at least 3 values (e.g., _k>0).*/
+  celt_assert(len>=3);
+  _u[0]=0;
+  _u[1]=um2=1;
+  /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+  /*If _n==1, _u[i] should be 1 for i>1.*/
+  celt_assert(_n>=2);
+  /*If _k==0, the following do-while loop will overflow the buffer.*/
+  celt_assert(_k>0);
+  k=2;
+  do _u[k]=(k<<1)-1;
+  while(++k<len);
+  for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
+  return _u[_k]+_u[_k+1];
+}
+
+/*Returns the _i'th combination of _k elements chosen from a set of size _n
+   with associated sign bits.
+  _y: Returns the vector of pulses.
+  _u: Must contain entries [0..._k+1] of row _n of U() on input.
+      Its contents will be destructively modified.*/
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
+  int j;
+  celt_assert(_n>0);
+  j=0;
+  do{
+    opus_uint32 p;
+    int           s;
+    int           yj;
+    p=_u[_k+1];
+    s=-(_i>=p);
+    _i-=p&s;
+    yj=_k;
+    p=_u[_k];
+    while(p>_i)p=_u[--_k];
+    _i-=p;
+    yj-=_k;
+    _y[j]=(yj+s)^s;
+    uprev(_u,_k+2,0);
+  }
+  while(++j<_n);
+}
+
+/*Returns the index of the given combination of K elements chosen from a set
+   of size 1 with associated sign bits.
+  _y: The vector of pulses, whose sum of absolute values is K.
+  _k: Returns K.*/
+static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){
+  *_k=abs(_y[0]);
+  return _y[0]<0;
+}
+
+/*Returns the index of the given combination of K elements chosen from a set
+   of size _n with associated sign bits.
+  _y:  The vector of pulses, whose sum of absolute values must be _k.
+  _nc: Returns V(_n,_k).*/
+static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
+ opus_uint32 *_u){
+  opus_uint32 i;
+  int         j;
+  int         k;
+  /*We can't unroll the first two iterations of the loop unless _n>=2.*/
+  celt_assert(_n>=2);
+  _u[0]=0;
+  for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
+  i=icwrs1(_y+_n-1,&k);
+  j=_n-2;
+  i+=_u[k];
+  k+=abs(_y[j]);
+  if(_y[j]<0)i+=_u[k+1];
+  while(j-->0){
+    unext(_u,_k+2,0);
+    i+=_u[k];
+    k+=abs(_y[j]);
+    if(_y[j]<0)i+=_u[k+1];
+  }
+  *_nc=_u[k]+_u[k+1];
+  return i;
+}
+
+#ifdef CUSTOM_MODES
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+  int k;
+  /*_maxk==0 => there's nothing to do.*/
+  celt_assert(_maxk>0);
+  _bits[0]=0;
+  if (_n==1)
+  {
+    for (k=1;k<=_maxk;k++)
+      _bits[k] = 1<<_frac;
+  }
+  else {
+    VARDECL(opus_uint32,u);
+    SAVE_STACK;
+    ALLOC(u,_maxk+2U,opus_uint32);
+    ncwrs_urow(_n,_maxk,u);
+    for(k=1;k<=_maxk;k++)
+      _bits[k]=log2_frac(u[k]+u[k+1],_frac);
+    RESTORE_STACK;
+  }
+}
+#endif /* CUSTOM_MODES */
+
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
+  opus_uint32 i;
+  VARDECL(opus_uint32,u);
+  opus_uint32 nc;
+  SAVE_STACK;
+  celt_assert(_k>0);
+  ALLOC(u,_k+2U,opus_uint32);
+  i=icwrs(_n,_k,&nc,_y,u);
+  ec_enc_uint(_enc,i,nc);
+  RESTORE_STACK;
+}
+
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+  VARDECL(opus_uint32,u);
+  SAVE_STACK;
+  celt_assert(_k>0);
+  ALLOC(u,_k+2U,opus_uint32);
+  cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
+  RESTORE_STACK;
+}
+
+#endif /* SMALL_FOOTPRINT */
diff --git a/src/main/jni/opus/celt/cwrs.h b/src/main/jni/opus/celt/cwrs.h
new file mode 100644
index 000000000..7dfbd076d
--- /dev/null
+++ b/src/main/jni/opus/celt/cwrs.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2007-2009 Timothy B. Terriberry
+   Written by Timothy B. Terriberry and Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CWRS_H
+#define CWRS_H
+
+#include "arch.h"
+#include "stack_alloc.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef CUSTOM_MODES
+int log2_frac(opus_uint32 val, int frac);
+#endif
+
+void get_required_bits(opus_int16 *bits, int N, int K, int frac);
+
+void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
+
+void decode_pulses(int *_y, int N, int K, ec_dec *dec);
+
+#endif /* CWRS_H */
diff --git a/src/main/jni/opus/celt/ecintrin.h b/src/main/jni/opus/celt/ecintrin.h
new file mode 100644
index 000000000..2263cff6b
--- /dev/null
+++ b/src/main/jni/opus/celt/ecintrin.h
@@ -0,0 +1,87 @@
+/* Copyright (c) 2003-2008 Timothy B. Terriberry
+   Copyright (c) 2008 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*Some common macros for potential platform-specific optimization.*/
+#include "opus_types.h"
+#include <math.h>
+#include <limits.h>
+#include "arch.h"
+#if !defined(_ecintrin_H)
+# define _ecintrin_H (1)
+
+/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly
+   versions of these functions which can substantially improve performance.
+  We define macros for them to allow easy incorporation of these non-ANSI
+   features.*/
+
+/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
+   given an appropriate architecture, but the branchless bit-twiddling versions
+   are just as fast, and do not require any special target architecture.
+  Earlier gcc versions (3.x) compiled both code to the same assembly
+   instructions, because of the way they represented ((_b)>(_a)) internally.*/
+# define EC_MINI(_a,_b)      ((_a)+(((_b)-(_a))&-((_b)<(_a))))
+
+/*Count leading zeros.
+  This macro should only be used for implementing ec_ilog(), if it is defined.
+  All other code should use EC_ILOG() instead.*/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+# include <intrin.h>
+/*In _DEBUG mode this is not an intrinsic by default.*/
+# pragma intrinsic(_BitScanReverse)
+
+static __inline int ec_bsr(unsigned long _x){
+  unsigned long ret;
+  _BitScanReverse(&ret,_x);
+  return (int)ret;
+}
+# define EC_CLZ0    (1)
+# define EC_CLZ(_x) (-ec_bsr(_x))
+#elif defined(ENABLE_TI_DSPLIB)
+# include "dsplib.h"
+# define EC_CLZ0    (31)
+# define EC_CLZ(_x) (_lnorm(_x))
+#elif __GNUC_PREREQ(3,4)
+# if INT_MAX>=2147483647
+#  define EC_CLZ0    ((int)sizeof(unsigned)*CHAR_BIT)
+#  define EC_CLZ(_x) (__builtin_clz(_x))
+# elif LONG_MAX>=2147483647L
+#  define EC_CLZ0    ((int)sizeof(unsigned long)*CHAR_BIT)
+#  define EC_CLZ(_x) (__builtin_clzl(_x))
+# endif
+#endif
+
+#if defined(EC_CLZ)
+/*Note that __builtin_clz is not defined when _x==0, according to the gcc
+   documentation (and that of the BSR instruction that implements it on x86).
+  The majority of the time we can never pass it zero.
+  When we need to, it can be special cased.*/
+# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x))
+#else
+int ec_ilog(opus_uint32 _v);
+# define EC_ILOG(_x) (ec_ilog(_x))
+#endif
+#endif
diff --git a/src/main/jni/opus/celt/entcode.c b/src/main/jni/opus/celt/entcode.c
new file mode 100644
index 000000000..fa5d7c7c2
--- /dev/null
+++ b/src/main/jni/opus/celt/entcode.c
@@ -0,0 +1,93 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "entcode.h"
+#include "arch.h"
+
+#if !defined(EC_CLZ)
+/*This is a fallback for systems where we don't know how to access
+   a BSR or CLZ instruction (see ecintrin.h).
+  If you are optimizing Opus on a new platform and it has a native CLZ or
+   BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
+   an easy performance win.*/
+int ec_ilog(opus_uint32 _v){
+  /*On a Pentium M, this branchless version tested as the fastest on
+     1,000,000,000 random 32-bit integers, edging out a similar version with
+     branches, and a 256-entry LUT version.*/
+  int ret;
+  int m;
+  ret=!!_v;
+  m=!!(_v&0xFFFF0000)<<4;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xFF00)<<3;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xF0)<<2;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xC)<<1;
+  _v>>=m;
+  ret|=m;
+  ret+=!!(_v&0x2);
+  return ret;
+}
+#endif
+
+opus_uint32 ec_tell_frac(ec_ctx *_this){
+  opus_uint32 nbits;
+  opus_uint32 r;
+  int         l;
+  int         i;
+  /*To handle the non-integral number of bits still left in the encoder/decoder
+     state, we compute the worst-case number of bits of val that must be
+     encoded to ensure that the value is inside the range for any possible
+     subsequent bits.
+    The computation here is independent of val itself (the decoder does not
+     even track that value), even though the real number of bits used after
+     ec_enc_done() may be 1 smaller if rng is a power of two and the
+     corresponding trailing bits of val are all zeros.
+    If we did try to track that special case, then coding a value with a
+     probability of 1/(1<<n) might sometimes appear to use more than n bits.
+    This may help explain the surprising result that a newly initialized
+     encoder or decoder claims to have used 1 bit.*/
+  nbits=_this->nbits_total<<BITRES;
+  l=EC_ILOG(_this->rng);
+  r=_this->rng>>(l-16);
+  for(i=BITRES;i-->0;){
+    int b;
+    r=r*r>>15;
+    b=(int)(r>>16);
+    l=l<<1|b;
+    r>>=b;
+  }
+  return nbits-l;
+}
diff --git a/src/main/jni/opus/celt/entcode.h b/src/main/jni/opus/celt/entcode.h
new file mode 100644
index 000000000..dd13e49e5
--- /dev/null
+++ b/src/main/jni/opus/celt/entcode.h
@@ -0,0 +1,117 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if !defined(_entcode_H)
+# define _entcode_H (1)
+# include <limits.h>
+# include <stddef.h>
+# include "ecintrin.h"
+
+/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
+   larger type, you can speed up the decoder by using it here.*/
+typedef opus_uint32           ec_window;
+typedef struct ec_ctx         ec_ctx;
+typedef struct ec_ctx         ec_enc;
+typedef struct ec_ctx         ec_dec;
+
+# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT)
+
+/*The number of bits to use for the range-coded part of unsigned integers.*/
+# define EC_UINT_BITS   (8)
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+   3 => 1/8th bits.*/
+# define BITRES 3
+
+/*The entropy encoder/decoder context.
+  We use the same structure for both, so that common functions like ec_tell()
+   can be used on either one.*/
+struct ec_ctx{
+   /*Buffered input/output.*/
+   unsigned char *buf;
+   /*The size of the buffer.*/
+   opus_uint32    storage;
+   /*The offset at which the last byte containing raw bits was read/written.*/
+   opus_uint32    end_offs;
+   /*Bits that will be read from/written at the end.*/
+   ec_window      end_window;
+   /*Number of valid bits in end_window.*/
+   int            nend_bits;
+   /*The total number of whole bits read/written.
+     This does not include partial bits currently in the range coder.*/
+   int            nbits_total;
+   /*The offset at which the next range coder byte will be read/written.*/
+   opus_uint32    offs;
+   /*The number of values in the current range.*/
+   opus_uint32    rng;
+   /*In the decoder: the difference between the top of the current range and
+      the input value, minus one.
+     In the encoder: the low end of the current range.*/
+   opus_uint32    val;
+   /*In the decoder: the saved normalization factor from ec_decode().
+     In the encoder: the number of oustanding carry propagating symbols.*/
+   opus_uint32    ext;
+   /*A buffered input/output symbol, awaiting carry propagation.*/
+   int            rem;
+   /*Nonzero if an error occurred.*/
+   int            error;
+};
+
+static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){
+  return _this->offs;
+}
+
+static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){
+  return _this->buf;
+}
+
+static OPUS_INLINE int ec_get_error(ec_ctx *_this){
+  return _this->error;
+}
+
+/*Returns the number of bits "used" by the encoded or decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+static OPUS_INLINE int ec_tell(ec_ctx *_this){
+  return _this->nbits_total-EC_ILOG(_this->rng);
+}
+
+/*Returns the number of bits "used" by the encoded or decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits scaled by 2**BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+opus_uint32 ec_tell_frac(ec_ctx *_this);
+
+#endif
diff --git a/src/main/jni/opus/celt/entdec.c b/src/main/jni/opus/celt/entdec.c
new file mode 100644
index 000000000..3c264685c
--- /dev/null
+++ b/src/main/jni/opus/celt/entdec.c
@@ -0,0 +1,245 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stddef.h>
+#include "os_support.h"
+#include "arch.h"
+#include "entdec.h"
+#include "mfrngcod.h"
+
+/*A range decoder.
+  This is an entropy decoder based upon \cite{Mar79}, which is itself a
+   rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
+  It is very similar to arithmetic encoding, except that encoding is done with
+   digits in any base, instead of with bits, and so it is faster when using
+   larger bases (i.e.: a byte).
+  The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
+   is the base, longer than the theoretical optimum, but to my knowledge there
+   is no published justification for this claim.
+  This only seems true when using near-infinite precision arithmetic so that
+   the process is carried out with no rounding errors.
+
+  An excellent description of implementation details is available at
+   http://www.arturocampos.com/ac_range.html
+  A recent work \cite{MNW98} which proposes several changes to arithmetic
+   encoding for efficiency actually re-discovers many of the principles
+   behind range encoding, and presents a good theoretical analysis of them.
+
+  End of stream is handled by writing out the smallest number of bits that
+   ensures that the stream will be correctly decoded regardless of the value of
+   any subsequent bits.
+  ec_tell() can be used to determine how many bits were needed to decode
+   all the symbols thus far; other data can be packed in the remaining bits of
+   the input buffer.
+  @PHDTHESIS{Pas76,
+    author="Richard Clark Pasco",
+    title="Source coding algorithms for fast data compression",
+    school="Dept. of Electrical Engineering, Stanford University",
+    address="Stanford, CA",
+    month=May,
+    year=1976
+  }
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video & Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf"
+  }*/
+
+static int ec_read_byte(ec_dec *_this){
+  return _this->offs<_this->storage?_this->buf[_this->offs++]:0;
+}
+
+static int ec_read_byte_from_end(ec_dec *_this){
+  return _this->end_offs<_this->storage?
+   _this->buf[_this->storage-++(_this->end_offs)]:0;
+}
+
+/*Normalizes the contents of val and rng so that rng lies entirely in the
+   high-order symbol.*/
+static void ec_dec_normalize(ec_dec *_this){
+  /*If the range is too small, rescale it and input some bits.*/
+  while(_this->rng<=EC_CODE_BOT){
+    int sym;
+    _this->nbits_total+=EC_SYM_BITS;
+    _this->rng<<=EC_SYM_BITS;
+    /*Use up the remaining bits from our last symbol.*/
+    sym=_this->rem;
+    /*Read the next value from the input.*/
+    _this->rem=ec_read_byte(_this);
+    /*Take the rest of the bits we need from this new symbol.*/
+    sym=(sym<<EC_SYM_BITS|_this->rem)>>(EC_SYM_BITS-EC_CODE_EXTRA);
+    /*And subtract them from val, capped to be less than EC_CODE_TOP.*/
+    _this->val=((_this->val<<EC_SYM_BITS)+(EC_SYM_MAX&~sym))&(EC_CODE_TOP-1);
+  }
+}
+
+void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
+  _this->buf=_buf;
+  _this->storage=_storage;
+  _this->end_offs=0;
+  _this->end_window=0;
+  _this->nend_bits=0;
+  /*This is the offset from which ec_tell() will subtract partial bits.
+    The final value after the ec_dec_normalize() call will be the same as in
+     the encoder, but we have to compensate for the bits that are added there.*/
+  _this->nbits_total=EC_CODE_BITS+1
+   -((EC_CODE_BITS-EC_CODE_EXTRA)/EC_SYM_BITS)*EC_SYM_BITS;
+  _this->offs=0;
+  _this->rng=1U<<EC_CODE_EXTRA;
+  _this->rem=ec_read_byte(_this);
+  _this->val=_this->rng-1-(_this->rem>>(EC_SYM_BITS-EC_CODE_EXTRA));
+  _this->error=0;
+  /*Normalize the interval.*/
+  ec_dec_normalize(_this);
+}
+
+unsigned ec_decode(ec_dec *_this,unsigned _ft){
+  unsigned s;
+  _this->ext=_this->rng/_ft;
+  s=(unsigned)(_this->val/_this->ext);
+  return _ft-EC_MINI(s+1,_ft);
+}
+
+unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){
+   unsigned s;
+   _this->ext=_this->rng>>_bits;
+   s=(unsigned)(_this->val/_this->ext);
+   return (1U<<_bits)-EC_MINI(s+1U,1U<<_bits);
+}
+
+void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){
+  opus_uint32 s;
+  s=IMUL32(_this->ext,_ft-_fh);
+  _this->val-=s;
+  _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s;
+  ec_dec_normalize(_this);
+}
+
+/*The probability of having a "one" is 1/(1<<_logp).*/
+int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){
+  opus_uint32 r;
+  opus_uint32 d;
+  opus_uint32 s;
+  int         ret;
+  r=_this->rng;
+  d=_this->val;
+  s=r>>_logp;
+  ret=d<s;
+  if(!ret)_this->val=d-s;
+  _this->rng=ret?s:r-s;
+  ec_dec_normalize(_this);
+  return ret;
+}
+
+int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){
+  opus_uint32 r;
+  opus_uint32 d;
+  opus_uint32 s;
+  opus_uint32 t;
+  int         ret;
+  s=_this->rng;
+  d=_this->val;
+  r=s>>_ftb;
+  ret=-1;
+  do{
+    t=s;
+    s=IMUL32(r,_icdf[++ret]);
+  }
+  while(d<s);
+  _this->val=d-s;
+  _this->rng=t-s;
+  ec_dec_normalize(_this);
+  return ret;
+}
+
+opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){
+  unsigned ft;
+  unsigned s;
+  int      ftb;
+  /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
+  celt_assert(_ft>1);
+  _ft--;
+  ftb=EC_ILOG(_ft);
+  if(ftb>EC_UINT_BITS){
+    opus_uint32 t;
+    ftb-=EC_UINT_BITS;
+    ft=(unsigned)(_ft>>ftb)+1;
+    s=ec_decode(_this,ft);
+    ec_dec_update(_this,s,s+1,ft);
+    t=(opus_uint32)s<<ftb|ec_dec_bits(_this,ftb);
+    if(t<=_ft)return t;
+    _this->error=1;
+    return _ft;
+  }
+  else{
+    _ft++;
+    s=ec_decode(_this,(unsigned)_ft);
+    ec_dec_update(_this,s,s+1,(unsigned)_ft);
+    return s;
+  }
+}
+
+opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){
+  ec_window   window;
+  int         available;
+  opus_uint32 ret;
+  window=_this->end_window;
+  available=_this->nend_bits;
+  if((unsigned)available<_bits){
+    do{
+      window|=(ec_window)ec_read_byte_from_end(_this)<<available;
+      available+=EC_SYM_BITS;
+    }
+    while(available<=EC_WINDOW_SIZE-EC_SYM_BITS);
+  }
+  ret=(opus_uint32)window&(((opus_uint32)1<<_bits)-1U);
+  window>>=_bits;
+  available-=_bits;
+  _this->end_window=window;
+  _this->nend_bits=available;
+  _this->nbits_total+=_bits;
+  return ret;
+}
diff --git a/src/main/jni/opus/celt/entdec.h b/src/main/jni/opus/celt/entdec.h
new file mode 100644
index 000000000..d8ab31873
--- /dev/null
+++ b/src/main/jni/opus/celt/entdec.h
@@ -0,0 +1,100 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_entdec_H)
+# define _entdec_H (1)
+# include <limits.h>
+# include "entcode.h"
+
+/*Initializes the decoder.
+  _buf: The input buffer to use.
+  Return: 0 on success, or a negative value on error.*/
+void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage);
+
+/*Calculates the cumulative frequency for the next symbol.
+  This can then be fed into the probability model to determine what that
+   symbol is, and the additional frequency information required to advance to
+   the next symbol.
+  This function cannot be called more than once without a corresponding call to
+   ec_dec_update(), or decoding will not proceed correctly.
+  _ft: The total frequency of the symbols in the alphabet the next symbol was
+        encoded with.
+  Return: A cumulative frequency representing the encoded symbol.
+          If the cumulative frequency of all the symbols before the one that
+           was encoded was fl, and the cumulative frequency of all the symbols
+           up to and including the one encoded is fh, then the returned value
+           will fall in the range [fl,fh).*/
+unsigned ec_decode(ec_dec *_this,unsigned _ft);
+
+/*Equivalent to ec_decode() with _ft==1<<_bits.*/
+unsigned ec_decode_bin(ec_dec *_this,unsigned _bits);
+
+/*Advance the decoder past the next symbol using the frequency information the
+   symbol was encoded with.
+  Exactly one call to ec_decode() must have been made so that all necessary
+   intermediate calculations are performed.
+  _fl:  The cumulative frequency of all symbols that come before the symbol
+         decoded.
+  _fh:  The cumulative frequency of all symbols up to and including the symbol
+         decoded.
+        Together with _fl, this defines the range [_fl,_fh) in which the value
+         returned above must fall.
+  _ft:  The total frequency of the symbols in the alphabet the symbol decoded
+         was encoded in.
+        This must be the same as passed to the preceding call to ec_decode().*/
+void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft);
+
+/* Decode a bit that has a 1/(1<<_logp) probability of being a one */
+int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
+
+/*Decodes a symbol given an "inverse" CDF table.
+  No call to ec_dec_update() is necessary after this call.
+  _icdf: The "inverse" CDF, such that symbol s falls in the range
+          [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb.
+         The values must be monotonically non-increasing, and the last value
+          must be 0.
+  _ftb: The number of bits of precision in the cumulative distribution.
+  Return: The decoded symbol s.*/
+int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
+
+/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
+  The bits must have been encoded with ec_enc_uint().
+  No call to ec_dec_update() is necessary after this call.
+  _ft: The number of integers that can be decoded (one more than the max).
+       This must be at least one, and no more than 2**32-1.
+  Return: The decoded bits.*/
+opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft);
+
+/*Extracts a sequence of raw bits from the stream.
+  The bits must have been encoded with ec_enc_bits().
+  No call to ec_dec_update() is necessary after this call.
+  _ftb: The number of bits to extract.
+        This must be between 0 and 25, inclusive.
+  Return: The decoded bits.*/
+opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb);
+
+#endif
diff --git a/src/main/jni/opus/celt/entenc.c b/src/main/jni/opus/celt/entenc.c
new file mode 100644
index 000000000..a7e34ecef
--- /dev/null
+++ b/src/main/jni/opus/celt/entenc.c
@@ -0,0 +1,294 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+# include "config.h"
+#endif
+#include "os_support.h"
+#include "arch.h"
+#include "entenc.h"
+#include "mfrngcod.h"
+
+/*A range encoder.
+  See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
+
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video \& Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
+  }*/
+
+static int ec_write_byte(ec_enc *_this,unsigned _value){
+  if(_this->offs+_this->end_offs>=_this->storage)return -1;
+  _this->buf[_this->offs++]=(unsigned char)_value;
+  return 0;
+}
+
+static int ec_write_byte_at_end(ec_enc *_this,unsigned _value){
+  if(_this->offs+_this->end_offs>=_this->storage)return -1;
+  _this->buf[_this->storage-++(_this->end_offs)]=(unsigned char)_value;
+  return 0;
+}
+
+/*Outputs a symbol, with a carry bit.
+  If there is a potential to propagate a carry over several symbols, they are
+   buffered until it can be determined whether or not an actual carry will
+   occur.
+  If the counter for the buffered symbols overflows, then the stream becomes
+   undecodable.
+  This gives a theoretical limit of a few billion symbols in a single packet on
+   32-bit systems.
+  The alternative is to truncate the range in order to force a carry, but
+   requires similar carry tracking in the decoder, needlessly slowing it down.*/
+static void ec_enc_carry_out(ec_enc *_this,int _c){
+  if(_c!=EC_SYM_MAX){
+    /*No further carry propagation possible, flush buffer.*/
+    int carry;
+    carry=_c>>EC_SYM_BITS;
+    /*Don't output a byte on the first write.
+      This compare should be taken care of by branch-prediction thereafter.*/
+    if(_this->rem>=0)_this->error|=ec_write_byte(_this,_this->rem+carry);
+    if(_this->ext>0){
+      unsigned sym;
+      sym=(EC_SYM_MAX+carry)&EC_SYM_MAX;
+      do _this->error|=ec_write_byte(_this,sym);
+      while(--(_this->ext)>0);
+    }
+    _this->rem=_c&EC_SYM_MAX;
+  }
+  else _this->ext++;
+}
+
+static void ec_enc_normalize(ec_enc *_this){
+  /*If the range is too small, output some bits and rescale it.*/
+  while(_this->rng<=EC_CODE_BOT){
+    ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT));
+    /*Move the next-to-high-order symbol into the high-order position.*/
+    _this->val=(_this->val<<EC_SYM_BITS)&(EC_CODE_TOP-1);
+    _this->rng<<=EC_SYM_BITS;
+    _this->nbits_total+=EC_SYM_BITS;
+  }
+}
+
+void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
+  _this->buf=_buf;
+  _this->end_offs=0;
+  _this->end_window=0;
+  _this->nend_bits=0;
+  /*This is the offset from which ec_tell() will subtract partial bits.*/
+  _this->nbits_total=EC_CODE_BITS+1;
+  _this->offs=0;
+  _this->rng=EC_CODE_TOP;
+  _this->rem=-1;
+  _this->val=0;
+  _this->ext=0;
+  _this->storage=_size;
+  _this->error=0;
+}
+
+void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
+  opus_uint32 r;
+  r=_this->rng/_ft;
+  if(_fl>0){
+    _this->val+=_this->rng-IMUL32(r,(_ft-_fl));
+    _this->rng=IMUL32(r,(_fh-_fl));
+  }
+  else _this->rng-=IMUL32(r,(_ft-_fh));
+  ec_enc_normalize(_this);
+}
+
+void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){
+  opus_uint32 r;
+  r=_this->rng>>_bits;
+  if(_fl>0){
+    _this->val+=_this->rng-IMUL32(r,((1U<<_bits)-_fl));
+    _this->rng=IMUL32(r,(_fh-_fl));
+  }
+  else _this->rng-=IMUL32(r,((1U<<_bits)-_fh));
+  ec_enc_normalize(_this);
+}
+
+/*The probability of having a "one" is 1/(1<<_logp).*/
+void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){
+  opus_uint32 r;
+  opus_uint32 s;
+  opus_uint32 l;
+  r=_this->rng;
+  l=_this->val;
+  s=r>>_logp;
+  r-=s;
+  if(_val)_this->val=l+r;
+  _this->rng=_val?s:r;
+  ec_enc_normalize(_this);
+}
+
+void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){
+  opus_uint32 r;
+  r=_this->rng>>_ftb;
+  if(_s>0){
+    _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]);
+    _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]);
+  }
+  else _this->rng-=IMUL32(r,_icdf[_s]);
+  ec_enc_normalize(_this);
+}
+
+void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){
+  unsigned  ft;
+  unsigned  fl;
+  int       ftb;
+  /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
+  celt_assert(_ft>1);
+  _ft--;
+  ftb=EC_ILOG(_ft);
+  if(ftb>EC_UINT_BITS){
+    ftb-=EC_UINT_BITS;
+    ft=(_ft>>ftb)+1;
+    fl=(unsigned)(_fl>>ftb);
+    ec_encode(_this,fl,fl+1,ft);
+    ec_enc_bits(_this,_fl&(((opus_uint32)1<<ftb)-1U),ftb);
+  }
+  else ec_encode(_this,_fl,_fl+1,_ft+1);
+}
+
+void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _bits){
+  ec_window window;
+  int       used;
+  window=_this->end_window;
+  used=_this->nend_bits;
+  celt_assert(_bits>0);
+  if(used+_bits>EC_WINDOW_SIZE){
+    do{
+      _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX);
+      window>>=EC_SYM_BITS;
+      used-=EC_SYM_BITS;
+    }
+    while(used>=EC_SYM_BITS);
+  }
+  window|=(ec_window)_fl<<used;
+  used+=_bits;
+  _this->end_window=window;
+  _this->nend_bits=used;
+  _this->nbits_total+=_bits;
+}
+
+void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits){
+  int      shift;
+  unsigned mask;
+  celt_assert(_nbits<=EC_SYM_BITS);
+  shift=EC_SYM_BITS-_nbits;
+  mask=((1<<_nbits)-1)<<shift;
+  if(_this->offs>0){
+    /*The first byte has been finalized.*/
+    _this->buf[0]=(unsigned char)((_this->buf[0]&~mask)|_val<<shift);
+  }
+  else if(_this->rem>=0){
+    /*The first byte is still awaiting carry propagation.*/
+    _this->rem=(_this->rem&~mask)|_val<<shift;
+  }
+  else if(_this->rng<=(EC_CODE_TOP>>_nbits)){
+    /*The renormalization loop has never been run.*/
+    _this->val=(_this->val&~((opus_uint32)mask<<EC_CODE_SHIFT))|
+     (opus_uint32)_val<<(EC_CODE_SHIFT+shift);
+  }
+  /*The encoder hasn't even encoded _nbits of data yet.*/
+  else _this->error=-1;
+}
+
+void ec_enc_shrink(ec_enc *_this,opus_uint32 _size){
+  celt_assert(_this->offs+_this->end_offs<=_size);
+  OPUS_MOVE(_this->buf+_size-_this->end_offs,
+   _this->buf+_this->storage-_this->end_offs,_this->end_offs);
+  _this->storage=_size;
+}
+
+void ec_enc_done(ec_enc *_this){
+  ec_window   window;
+  int         used;
+  opus_uint32 msk;
+  opus_uint32 end;
+  int         l;
+  /*We output the minimum number of bits that ensures that the symbols encoded
+     thus far will be decoded correctly regardless of the bits that follow.*/
+  l=EC_CODE_BITS-EC_ILOG(_this->rng);
+  msk=(EC_CODE_TOP-1)>>l;
+  end=(_this->val+msk)&~msk;
+  if((end|msk)>=_this->val+_this->rng){
+    l++;
+    msk>>=1;
+    end=(_this->val+msk)&~msk;
+  }
+  while(l>0){
+    ec_enc_carry_out(_this,(int)(end>>EC_CODE_SHIFT));
+    end=(end<<EC_SYM_BITS)&(EC_CODE_TOP-1);
+    l-=EC_SYM_BITS;
+  }
+  /*If we have a buffered byte flush it into the output buffer.*/
+  if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0);
+  /*If we have buffered extra bits, flush them as well.*/
+  window=_this->end_window;
+  used=_this->nend_bits;
+  while(used>=EC_SYM_BITS){
+    _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX);
+    window>>=EC_SYM_BITS;
+    used-=EC_SYM_BITS;
+  }
+  /*Clear any excess space and add any remaining extra bits to the last byte.*/
+  if(!_this->error){
+    OPUS_CLEAR(_this->buf+_this->offs,
+     _this->storage-_this->offs-_this->end_offs);
+    if(used>0){
+      /*If there's no range coder data at all, give up.*/
+      if(_this->end_offs>=_this->storage)_this->error=-1;
+      else{
+        l=-l;
+        /*If we've busted, don't add too many extra bits to the last byte; it
+           would corrupt the range coder data, and that's more important.*/
+        if(_this->offs+_this->end_offs>=_this->storage&&l<used){
+          window&=(1<<l)-1;
+          _this->error=-1;
+        }
+        _this->buf[_this->storage-_this->end_offs-1]|=(unsigned char)window;
+      }
+    }
+  }
+}
diff --git a/src/main/jni/opus/celt/entenc.h b/src/main/jni/opus/celt/entenc.h
new file mode 100644
index 000000000..796bc4d57
--- /dev/null
+++ b/src/main/jni/opus/celt/entenc.h
@@ -0,0 +1,110 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_entenc_H)
+# define _entenc_H (1)
+# include <stddef.h>
+# include "entcode.h"
+
+/*Initializes the encoder.
+  _buf:  The buffer to store output bytes in.
+  _size: The size of the buffer, in chars.*/
+void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size);
+/*Encodes a symbol given its frequency information.
+  The frequency information must be discernable by the decoder, assuming it
+   has read only the previous symbols from the stream.
+  It is allowable to change the frequency information, or even the entire
+   source alphabet, so long as the decoder can tell from the context of the
+   previously encoded information that it is supposed to do so as well.
+  _fl: The cumulative frequency of all symbols that come before the one to be
+        encoded.
+  _fh: The cumulative frequency of all symbols up to and including the one to
+        be encoded.
+       Together with _fl, this defines the range [_fl,_fh) in which the
+        decoded value will fall.
+  _ft: The sum of the frequencies of all the symbols*/
+void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft);
+
+/*Equivalent to ec_encode() with _ft==1<<_bits.*/
+void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits);
+
+/* Encode a bit that has a 1/(1<<_logp) probability of being a one */
+void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp);
+
+/*Encodes a symbol given an "inverse" CDF table.
+  _s:    The index of the symbol to encode.
+  _icdf: The "inverse" CDF, such that symbol _s falls in the range
+          [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb.
+         The values must be monotonically non-increasing, and the last value
+          must be 0.
+  _ftb: The number of bits of precision in the cumulative distribution.*/
+void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
+
+/*Encodes a raw unsigned integer in the stream.
+  _fl: The integer to encode.
+  _ft: The number of integers that can be encoded (one more than the max).
+       This must be at least one, and no more than 2**32-1.*/
+void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft);
+
+/*Encodes a sequence of raw bits in the stream.
+  _fl:  The bits to encode.
+  _ftb: The number of bits to encode.
+        This must be between 1 and 25, inclusive.*/
+void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb);
+
+/*Overwrites a few bits at the very start of an existing stream, after they
+   have already been encoded.
+  This makes it possible to have a few flags up front, where it is easy for
+   decoders to access them without parsing the whole stream, even if their
+   values are not determined until late in the encoding process, without having
+   to buffer all the intermediate symbols in the encoder.
+  In order for this to work, at least _nbits bits must have already been
+   encoded using probabilities that are an exact power of two.
+  The encoder can verify the number of encoded bits is sufficient, but cannot
+   check this latter condition.
+  _val:   The bits to encode (in the least _nbits significant bits).
+          They will be decoded in order from most-significant to least.
+  _nbits: The number of bits to overwrite.
+          This must be no more than 8.*/
+void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits);
+
+/*Compacts the data to fit in the target size.
+  This moves up the raw bits at the end of the current buffer so they are at
+   the end of the new buffer size.
+  The caller must ensure that the amount of data that's already been written
+   will fit in the new size.
+  _size: The number of bytes in the new buffer.
+         This must be large enough to contain the bits already written, and
+          must be no larger than the existing size.*/
+void ec_enc_shrink(ec_enc *_this,opus_uint32 _size);
+
+/*Indicates that there are no more symbols to encode.
+  All reamining output bytes are flushed to the output buffer.
+  ec_enc_init() must be called before the encoder can be used again.*/
+void ec_enc_done(ec_enc *_this);
+
+#endif
diff --git a/src/main/jni/opus/celt/fixed_debug.h b/src/main/jni/opus/celt/fixed_debug.h
new file mode 100644
index 000000000..80bc94910
--- /dev/null
+++ b/src/main/jni/opus/celt/fixed_debug.h
@@ -0,0 +1,773 @@
+/* Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2012 Xiph.Org Foundation */
+/**
+   @file fixed_debug.h
+   @brief Fixed-point operations with debugging
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_DEBUG_H
+#define FIXED_DEBUG_H
+
+#include <stdio.h>
+#include "opus_defines.h"
+
+#ifdef CELT_C
+OPUS_EXPORT opus_int64 celt_mips=0;
+#else
+extern opus_int64 celt_mips;
+#endif
+
+#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16)
+
+#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
+#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
+#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL)
+#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1))
+
+#define SHR(a,b) SHR32(a,b)
+#define PSHR(a,b) PSHR32(a,b)
+
+static OPUS_INLINE short NEG16(int x)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "NEG16: input is not short: %d\n", (int)x);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = -x;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "NEG16: output is not short: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+static OPUS_INLINE int NEG32(opus_int64 x)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(x))
+   {
+      fprintf (stderr, "NEG16: input is not int: %d\n", (int)x);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = -x;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "NEG16: output is not int: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__)
+static OPUS_INLINE short EXTRACT16_(int x, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = x;
+   celt_mips++;
+   return res;
+}
+
+#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__)
+static OPUS_INLINE int EXTEND32_(int x, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = x;
+   celt_mips++;
+   return res;
+}
+
+#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a>>shift;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a<<shift;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+static OPUS_INLINE int SHR32(opus_int64 a, int shift)
+{
+   opus_int64  res;
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a>>shift;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SHR32: output is not int: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+#define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
+{
+   opus_int64  res;
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a<<shift;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SHL32: output is not int: %lld<<%d = %lld in %s: line %d\n", a, shift, res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((opus_val32)(1)<<((shift))>>1))),shift))
+#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
+
+#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
+#define HALF16(x)  (SHR16(x,1))
+#define HALF32(x)  (SHR32(x,1))
+
+//#define SHR(a,shift) ((a) >> (shift))
+//#define SHL(a,shift) ((a) << (shift))
+
+#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short SUB16_(int a, int b, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#undef UADD32
+#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+{
+   opus_uint64 res;
+   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+   {
+      fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_UINT(res))
+   {
+      fprintf (stderr, "UADD32: output is not uint32: %llu in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#undef USUB32
+#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+{
+   opus_uint64 res;
+   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+   {
+      fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (a<b)
+   {
+      fprintf (stderr, "USUB32: inputs underflow: %llu < %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_UINT(res))
+   {
+      fprintf (stderr, "USUB32: output is not uint32: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+/* result fits in 16 bits */
+static OPUS_INLINE short MULT16_16_16(int a, int b)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a*b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_16: output is not short: %d\n", res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define MAC16_16(c,a,b)     (celt_mips-=2,ADD32((c),MULT16_16((a),(b))))
+
+#define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (ABS32(b)>=((opus_val32)(1)<<(15+Q)))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = (((opus_int64)a)*(opus_int64)b) >> Q;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (Q==15)
+      celt_mips+=3;
+   else
+      celt_mips+=4;
+   return res;
+}
+
+#define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (ABS32(b)>=((opus_int64)(1)<<(15+Q)))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((((opus_int64)a)*(opus_int64)b) + (((opus_val32)(1)<<Q)>>1))>> Q;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d in %s: line %d\n\n", Q, (int)a, (int)b,(int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (Q==15)
+      celt_mips+=4;
+   else
+      celt_mips+=5;
+   return res;
+}
+
+#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
+#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
+
+static OPUS_INLINE int SATURATE(int a, int b)
+{
+   if (a>b)
+      a=b;
+   if (a<-b)
+      a = -b;
+   celt_mips+=3;
+   return a;
+}
+
+static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a)
+{
+   celt_mips+=3;
+   if (a>32767)
+      return 32767;
+   else if (a<-32768)
+      return -32768;
+   else return a;
+}
+
+static OPUS_INLINE int MULT16_16_Q11_32(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 11;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_Q13(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 13;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_Q14(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 14;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+
+#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 15;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=1;
+   return res;
+}
+
+static OPUS_INLINE short MULT16_16_P13(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 4096;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 13;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=4;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_P14(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 8192;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 14;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=4;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_P15(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 16384;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 15;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__)
+
+static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (b==0)
+   {
+      fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+      return 0;
+   }
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a/b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line);
+      if (res>32767)
+         res = 32767;
+      if (res<-32768)
+         res = -32768;
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=35;
+   return res;
+}
+
+#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (b==0)
+   {
+      fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+      return 0;
+   }
+
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a/b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=70;
+   return res;
+}
+
+#undef PRINT_MIPS
+#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0);
+
+#endif
diff --git a/src/main/jni/opus/celt/fixed_generic.h b/src/main/jni/opus/celt/fixed_generic.h
new file mode 100644
index 000000000..ecf018a24
--- /dev/null
+++ b/src/main/jni/opus/celt/fixed_generic.h
@@ -0,0 +1,134 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO */
+/**
+   @file fixed_generic.h
+   @brief Generic fixed-point operations
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_GENERIC_H
+#define FIXED_GENERIC_H
+
+/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
+#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
+#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
+/** Compile-time conversion of float constant to 16-bit value */
+#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+/** Compile-time conversion of float constant to 32-bit value */
+#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+/** Negate a 16-bit value */
+#define NEG16(x) (-(x))
+/** Negate a 32-bit value */
+#define NEG32(x) (-(x))
+
+/** Change a 32-bit value into a 16-bit value. The value is assumed to fit in 16-bit, otherwise the result is undefined */
+#define EXTRACT16(x) ((opus_val16)(x))
+/** Change a 16-bit value into a 32-bit value */
+#define EXTEND32(x) ((opus_val32)(x))
+
+/** Arithmetic shift-right of a 16-bit value */
+#define SHR16(a,shift) ((a) >> (shift))
+/** Arithmetic shift-left of a 16-bit value */
+#define SHL16(a,shift) ((opus_int16)((opus_uint16)(a)<<(shift)))
+/** Arithmetic shift-right of a 32-bit value */
+#define SHR32(a,shift) ((a) >> (shift))
+/** Arithmetic shift-left of a 32-bit value */
+#define SHL32(a,shift) ((opus_int32)((opus_uint32)(a)<<(shift)))
+
+/** 32-bit arithmetic shift right with rounding-to-nearest instead of rounding down */
+#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift))
+/** 32-bit arithmetic shift right where the argument can be negative */
+#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
+
+/** "RAW" macros, should not be used outside of this header file */
+#define SHR(a,shift) ((a) >> (shift))
+#define SHL(a,shift) SHL32(a,shift)
+#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
+#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+
+#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
+
+/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
+#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
+/** Divide by two */
+#define HALF16(x)  (SHR16(x,1))
+#define HALF32(x)  (SHR32(x,1))
+
+/** Add two 16-bit values */
+#define ADD16(a,b) ((opus_val16)((opus_val16)(a)+(opus_val16)(b)))
+/** Subtract two 16-bit values */
+#define SUB16(a,b) ((opus_val16)(a)-(opus_val16)(b))
+/** Add two 32-bit values */
+#define ADD32(a,b) ((opus_val32)(a)+(opus_val32)(b))
+/** Subtract two 32-bit values */
+#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
+
+/** 16x16 multiplication where the result fits in 16 bits */
+#define MULT16_16_16(a,b)     ((((opus_val16)(a))*((opus_val16)(b))))
+
+/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */
+/** 16x16 multiplication where the result fits in 32 bits */
+#define MULT16_16(a,b)     (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b)))
+
+/** 16x16 multiply-add where the result fits in 32 bits */
+#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+
+#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
+#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
+#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
+
+#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
+#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
+#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
+
+/** Divide a 32-bit value by a 16-bit value. Result fits in 16 bits */
+#define DIV32_16(a,b) ((opus_val16)(((opus_val32)(a))/((opus_val16)(b))))
+
+/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
+#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
+
+#endif
diff --git a/src/main/jni/opus/celt/float_cast.h b/src/main/jni/opus/celt/float_cast.h
new file mode 100644
index 000000000..ede657486
--- /dev/null
+++ b/src/main/jni/opus/celt/float_cast.h
@@ -0,0 +1,140 @@
+/* Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com> */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Version 1.1 */
+
+#ifndef FLOAT_CAST_H
+#define FLOAT_CAST_H
+
+
+#include "arch.h"
+
+/*============================================================================
+**      On Intel Pentium processors (especially PIII and probably P4), converting
+**      from float to int is very slow. To meet the C specs, the code produced by
+**      most C compilers targeting Pentium needs to change the FPU rounding mode
+**      before the float to int conversion is performed.
+**
+**      Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
+**      is this flushing of the pipeline which is so slow.
+**
+**      Fortunately the ISO C99 specifications define the functions lrint, lrintf,
+**      llrint and llrintf which fix this problem as a side effect.
+**
+**      On Unix-like systems, the configure process should have detected the
+**      presence of these functions. If they weren't found we have to replace them
+**      here with a standard C cast.
+*/
+
+/*
+**      The C99 prototypes for lrint and lrintf are as follows:
+**
+**              long int lrintf (float x) ;
+**              long int lrint  (double x) ;
+*/
+
+/*      The presence of the required functions are detected during the configure
+**      process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
+**      the config.h file.
+*/
+
+#if (HAVE_LRINTF)
+
+/*      These defines enable functionality introduced with the 1999 ISO C
+**      standard. They must be defined before the inclusion of math.h to
+**      engage them. If optimisation is enabled, these functions will be
+**      inlined. With optimisation switched off, you have to link in the
+**      maths library using -lm.
+*/
+
+#define _ISOC9X_SOURCE 1
+#define _ISOC99_SOURCE 1
+
+#define __USE_ISOC9X 1
+#define __USE_ISOC99 1
+
+#include <math.h>
+#define float2int(x) lrintf(x)
+
+#elif (defined(HAVE_LRINT))
+
+#define _ISOC9X_SOURCE 1
+#define _ISOC99_SOURCE 1
+
+#define __USE_ISOC9X 1
+#define __USE_ISOC99 1
+
+#include <math.h>
+#define float2int(x) lrint(x)
+
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64))
+        #include <xmmintrin.h>
+
+        __inline long int float2int(float value)
+        {
+                return _mm_cvtss_si32(_mm_load_ss(&value));
+        }
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32))
+        #include <math.h>
+
+        /*      Win32 doesn't seem to have these functions.
+        **      Therefore implement OPUS_INLINE versions of these functions here.
+        */
+
+        __inline long int
+        float2int (float flt)
+        {       int intgr;
+
+                _asm
+                {       fld flt
+                        fistp intgr
+                } ;
+
+                return intgr ;
+        }
+
+#else
+
+#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L)
+        /* supported by gcc in C99 mode, but not by all other compilers */
+        #warning "Don't have the functions lrint() and lrintf ()."
+        #warning "Replacing these functions with a standard C cast."
+#endif /* __STDC_VERSION__ >= 199901L */
+        #include <math.h>
+        #define float2int(flt) ((int)(floor(.5+flt)))
+#endif
+
+#ifndef DISABLE_FLOAT_API
+static OPUS_INLINE opus_int16 FLOAT2INT16(float x)
+{
+   x = x*CELT_SIG_SCALE;
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return (opus_int16)float2int(x);
+}
+#endif /* DISABLE_FLOAT_API */
+
+#endif /* FLOAT_CAST_H */
diff --git a/src/main/jni/opus/celt/kiss_fft.c b/src/main/jni/opus/celt/kiss_fft.c
new file mode 100644
index 000000000..ad706c739
--- /dev/null
+++ b/src/main/jni/opus/celt/kiss_fft.c
@@ -0,0 +1,719 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+  Lots of modifications by Jean-Marc Valin
+  Copyright (c) 2005-2007, Xiph.Org Foundation
+  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+/* This code is originally from Mark Borgerding's KISS-FFT but has been
+   heavily modified to better suit Opus */
+
+#ifndef SKIP_CONFIG_H
+#  ifdef HAVE_CONFIG_H
+#    include "config.h"
+#  endif
+#endif
+
+#include "_kiss_fft_guts.h"
+#include "arch.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+/* The guts header contains all the multiplication and addition macros that are defined for
+   complex numbers.  It also delares the kf_ internal functions.
+*/
+
+static void kf_bfly2(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx * Fout2;
+   const kiss_twiddle_cpx * tw1;
+   int i,j;
+   kiss_fft_cpx * Fout_beg = Fout;
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout2 = Fout + m;
+      tw1 = st->twiddles;
+      for(j=0;j<m;j++)
+      {
+         kiss_fft_cpx t;
+         Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
+         Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
+         C_MUL (t,  *Fout2 , *tw1);
+         tw1 += fstride;
+         C_SUB( *Fout2 ,  *Fout , t );
+         C_ADDTO( *Fout ,  t );
+         ++Fout2;
+         ++Fout;
+      }
+   }
+}
+
+static void ki_bfly2(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx * Fout2;
+   const kiss_twiddle_cpx * tw1;
+   kiss_fft_cpx t;
+   int i,j;
+   kiss_fft_cpx * Fout_beg = Fout;
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout2 = Fout + m;
+      tw1 = st->twiddles;
+      for(j=0;j<m;j++)
+      {
+         C_MULC (t,  *Fout2 , *tw1);
+         tw1 += fstride;
+         C_SUB( *Fout2 ,  *Fout , t );
+         C_ADDTO( *Fout ,  t );
+         ++Fout2;
+         ++Fout;
+      }
+   }
+}
+
+static void kf_bfly4(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+   kiss_fft_cpx scratch[6];
+   const size_t m2=2*m;
+   const size_t m3=3*m;
+   int i, j;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw3 = tw2 = tw1 = st->twiddles;
+      for (j=0;j<m;j++)
+      {
+         C_MUL4(scratch[0],Fout[m] , *tw1 );
+         C_MUL4(scratch[1],Fout[m2] , *tw2 );
+         C_MUL4(scratch[2],Fout[m3] , *tw3 );
+
+         Fout->r = PSHR32(Fout->r, 2);
+         Fout->i = PSHR32(Fout->i, 2);
+         C_SUB( scratch[5] , *Fout, scratch[1] );
+         C_ADDTO(*Fout, scratch[1]);
+         C_ADD( scratch[3] , scratch[0] , scratch[2] );
+         C_SUB( scratch[4] , scratch[0] , scratch[2] );
+         C_SUB( Fout[m2], *Fout, scratch[3] );
+         tw1 += fstride;
+         tw2 += fstride*2;
+         tw3 += fstride*3;
+         C_ADDTO( *Fout , scratch[3] );
+
+         Fout[m].r = scratch[5].r + scratch[4].i;
+         Fout[m].i = scratch[5].i - scratch[4].r;
+         Fout[m3].r = scratch[5].r - scratch[4].i;
+         Fout[m3].i = scratch[5].i + scratch[4].r;
+         ++Fout;
+      }
+   }
+}
+
+static void ki_bfly4(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+   kiss_fft_cpx scratch[6];
+   const size_t m2=2*m;
+   const size_t m3=3*m;
+   int i, j;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw3 = tw2 = tw1 = st->twiddles;
+      for (j=0;j<m;j++)
+      {
+         C_MULC(scratch[0],Fout[m] , *tw1 );
+         C_MULC(scratch[1],Fout[m2] , *tw2 );
+         C_MULC(scratch[2],Fout[m3] , *tw3 );
+
+         C_SUB( scratch[5] , *Fout, scratch[1] );
+         C_ADDTO(*Fout, scratch[1]);
+         C_ADD( scratch[3] , scratch[0] , scratch[2] );
+         C_SUB( scratch[4] , scratch[0] , scratch[2] );
+         C_SUB( Fout[m2], *Fout, scratch[3] );
+         tw1 += fstride;
+         tw2 += fstride*2;
+         tw3 += fstride*3;
+         C_ADDTO( *Fout , scratch[3] );
+
+         Fout[m].r = scratch[5].r - scratch[4].i;
+         Fout[m].i = scratch[5].i + scratch[4].r;
+         Fout[m3].r = scratch[5].r + scratch[4].i;
+         Fout[m3].i = scratch[5].i - scratch[4].r;
+         ++Fout;
+      }
+   }
+}
+
+#ifndef RADIX_TWO_ONLY
+
+static void kf_bfly3(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   int i;
+   size_t k;
+   const size_t m2 = 2*m;
+   const kiss_twiddle_cpx *tw1,*tw2;
+   kiss_fft_cpx scratch[5];
+   kiss_twiddle_cpx epi3;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+   epi3 = st->twiddles[fstride*m];
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw1=tw2=st->twiddles;
+      k=m;
+      do {
+         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+         C_MUL(scratch[1],Fout[m] , *tw1);
+         C_MUL(scratch[2],Fout[m2] , *tw2);
+
+         C_ADD(scratch[3],scratch[1],scratch[2]);
+         C_SUB(scratch[0],scratch[1],scratch[2]);
+         tw1 += fstride;
+         tw2 += fstride*2;
+
+         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+         C_MULBYSCALAR( scratch[0] , epi3.i );
+
+         C_ADDTO(*Fout,scratch[3]);
+
+         Fout[m2].r = Fout[m].r + scratch[0].i;
+         Fout[m2].i = Fout[m].i - scratch[0].r;
+
+         Fout[m].r -= scratch[0].i;
+         Fout[m].i += scratch[0].r;
+
+         ++Fout;
+      } while(--k);
+   }
+}
+
+static void ki_bfly3(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   int i, k;
+   const size_t m2 = 2*m;
+   const kiss_twiddle_cpx *tw1,*tw2;
+   kiss_fft_cpx scratch[5];
+   kiss_twiddle_cpx epi3;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+   epi3 = st->twiddles[fstride*m];
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw1=tw2=st->twiddles;
+      k=m;
+      do{
+
+         C_MULC(scratch[1],Fout[m] , *tw1);
+         C_MULC(scratch[2],Fout[m2] , *tw2);
+
+         C_ADD(scratch[3],scratch[1],scratch[2]);
+         C_SUB(scratch[0],scratch[1],scratch[2]);
+         tw1 += fstride;
+         tw2 += fstride*2;
+
+         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+         C_MULBYSCALAR( scratch[0] , -epi3.i );
+
+         C_ADDTO(*Fout,scratch[3]);
+
+         Fout[m2].r = Fout[m].r + scratch[0].i;
+         Fout[m2].i = Fout[m].i - scratch[0].r;
+
+         Fout[m].r -= scratch[0].i;
+         Fout[m].i += scratch[0].r;
+
+         ++Fout;
+      }while(--k);
+   }
+}
+
+static void kf_bfly5(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+   int i, u;
+   kiss_fft_cpx scratch[13];
+   const kiss_twiddle_cpx * twiddles = st->twiddles;
+   const kiss_twiddle_cpx *tw;
+   kiss_twiddle_cpx ya,yb;
+   kiss_fft_cpx * Fout_beg = Fout;
+
+   ya = twiddles[fstride*m];
+   yb = twiddles[fstride*2*m];
+   tw=st->twiddles;
+
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      for ( u=0; u<m; ++u ) {
+         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+         scratch[0] = *Fout0;
+
+         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+         C_ADD( scratch[7],scratch[1],scratch[4]);
+         C_SUB( scratch[10],scratch[1],scratch[4]);
+         C_ADD( scratch[8],scratch[2],scratch[3]);
+         C_SUB( scratch[9],scratch[2],scratch[3]);
+
+         Fout0->r += scratch[7].r + scratch[8].r;
+         Fout0->i += scratch[7].i + scratch[8].i;
+
+         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+         C_SUB(*Fout1,scratch[5],scratch[6]);
+         C_ADD(*Fout4,scratch[5],scratch[6]);
+
+         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+         C_ADD(*Fout2,scratch[11],scratch[12]);
+         C_SUB(*Fout3,scratch[11],scratch[12]);
+
+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+   }
+}
+
+static void ki_bfly5(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+   int i, u;
+   kiss_fft_cpx scratch[13];
+   const kiss_twiddle_cpx * twiddles = st->twiddles;
+   const kiss_twiddle_cpx *tw;
+   kiss_twiddle_cpx ya,yb;
+   kiss_fft_cpx * Fout_beg = Fout;
+
+   ya = twiddles[fstride*m];
+   yb = twiddles[fstride*2*m];
+   tw=st->twiddles;
+
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      for ( u=0; u<m; ++u ) {
+         scratch[0] = *Fout0;
+
+         C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
+         C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
+         C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
+         C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+         C_ADD( scratch[7],scratch[1],scratch[4]);
+         C_SUB( scratch[10],scratch[1],scratch[4]);
+         C_ADD( scratch[8],scratch[2],scratch[3]);
+         C_SUB( scratch[9],scratch[2],scratch[3]);
+
+         Fout0->r += scratch[7].r + scratch[8].r;
+         Fout0->i += scratch[7].i + scratch[8].i;
+
+         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+         scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
+         scratch[6].i =  S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
+
+         C_SUB(*Fout1,scratch[5],scratch[6]);
+         C_ADD(*Fout4,scratch[5],scratch[6]);
+
+         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+         scratch[12].r =  S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
+         scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
+
+         C_ADD(*Fout2,scratch[11],scratch[12]);
+         C_SUB(*Fout3,scratch[11],scratch[12]);
+
+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+   }
+}
+
+#endif
+
+
+#ifdef CUSTOM_MODES
+
+static
+void compute_bitrev_table(
+         int Fout,
+         opus_int16 *f,
+         const size_t fstride,
+         int in_stride,
+         opus_int16 * factors,
+         const kiss_fft_state *st
+            )
+{
+   const int p=*factors++; /* the radix  */
+   const int m=*factors++; /* stage's fft length/p */
+
+    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
+   if (m==1)
+   {
+      int j;
+      for (j=0;j<p;j++)
+      {
+         *f = Fout+j;
+         f += fstride*in_stride;
+      }
+   } else {
+      int j;
+      for (j=0;j<p;j++)
+      {
+         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
+         f += fstride*in_stride;
+         Fout += m;
+      }
+   }
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+static
+int kf_factor(int n,opus_int16 * facbuf)
+{
+    int p=4;
+
+    /*factor out powers of 4, powers of 2, then any remaining primes */
+    do {
+        while (n % p) {
+            switch (p) {
+                case 4: p = 2; break;
+                case 2: p = 3; break;
+                default: p += 2; break;
+            }
+            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
+                p = n;          /* no more factors, skip to end */
+        }
+        n /= p;
+#ifdef RADIX_TWO_ONLY
+        if (p!=2 && p != 4)
+#else
+        if (p>5)
+#endif
+        {
+           return 0;
+        }
+        *facbuf++ = p;
+        *facbuf++ = n;
+    } while (n > 1);
+    return 1;
+}
+
+static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
+{
+   int i;
+#ifdef FIXED_POINT
+   for (i=0;i<nfft;++i) {
+      opus_val32 phase = -i;
+      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
+   }
+#else
+   for (i=0;i<nfft;++i) {
+      const double pi=3.14159265358979323846264338327;
+      double phase = ( -2*pi /nfft ) * i;
+      kf_cexp(twiddles+i, phase );
+   }
+#endif
+}
+
+/*
+ *
+ * Allocates all necessary storage space for the fft and ifft.
+ * The return value is a contiguous block of memory.  As such,
+ * It can be freed with free().
+ * */
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,  const kiss_fft_state *base)
+{
+    kiss_fft_state *st=NULL;
+    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
+
+    if ( lenmem==NULL ) {
+        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
+    }else{
+        if (mem != NULL && *lenmem >= memneeded)
+            st = (kiss_fft_state*)mem;
+        *lenmem = memneeded;
+    }
+    if (st) {
+        opus_int16 *bitrev;
+        kiss_twiddle_cpx *twiddles;
+
+        st->nfft=nfft;
+#ifndef FIXED_POINT
+        st->scale = 1.f/nfft;
+#endif
+        if (base != NULL)
+        {
+           st->twiddles = base->twiddles;
+           st->shift = 0;
+           while (nfft<<st->shift != base->nfft && st->shift < 32)
+              st->shift++;
+           if (st->shift>=32)
+              goto fail;
+        } else {
+           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
+           compute_twiddles(twiddles, nfft);
+           st->shift = -1;
+        }
+        if (!kf_factor(nfft,st->factors))
+        {
+           goto fail;
+        }
+
+        /* bitrev */
+        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
+        if (st->bitrev==NULL)
+            goto fail;
+        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
+    }
+    return st;
+fail:
+    opus_fft_free(st);
+    return NULL;
+}
+
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
+{
+   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
+}
+
+void opus_fft_free(const kiss_fft_state *cfg)
+{
+   if (cfg)
+   {
+      opus_free((opus_int16*)cfg->bitrev);
+      if (cfg->shift < 0)
+         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
+      opus_free((kiss_fft_state*)cfg);
+   }
+}
+
+#endif /* CUSTOM_MODES */
+
+void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+    int m2, m;
+    int p;
+    int L;
+    int fstride[MAXFACTORS];
+    int i;
+    int shift;
+
+    /* st->shift can be -1 */
+    shift = st->shift>0 ? st->shift : 0;
+
+    celt_assert2 (fin != fout, "In-place FFT not supported");
+    /* Bit-reverse the input */
+    for (i=0;i<st->nfft;i++)
+    {
+       fout[st->bitrev[i]] = fin[i];
+#ifndef FIXED_POINT
+       fout[st->bitrev[i]].r *= st->scale;
+       fout[st->bitrev[i]].i *= st->scale;
+#endif
+    }
+
+    fstride[0] = 1;
+    L=0;
+    do {
+       p = st->factors[2*L];
+       m = st->factors[2*L+1];
+       fstride[L+1] = fstride[L]*p;
+       L++;
+    } while(m!=1);
+    m = st->factors[2*L-1];
+    for (i=L-1;i>=0;i--)
+    {
+       if (i!=0)
+          m2 = st->factors[2*i-1];
+       else
+          m2 = 1;
+       switch (st->factors[2*i])
+       {
+       case 2:
+          kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+       case 4:
+          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+ #ifndef RADIX_TWO_ONLY
+       case 3:
+          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+       case 5:
+          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+ #endif
+       }
+       m = m2;
+    }
+}
+
+void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+   int m2, m;
+   int p;
+   int L;
+   int fstride[MAXFACTORS];
+   int i;
+   int shift;
+
+   /* st->shift can be -1 */
+   shift = st->shift>0 ? st->shift : 0;
+   celt_assert2 (fin != fout, "In-place FFT not supported");
+   /* Bit-reverse the input */
+   for (i=0;i<st->nfft;i++)
+      fout[st->bitrev[i]] = fin[i];
+
+   fstride[0] = 1;
+   L=0;
+   do {
+      p = st->factors[2*L];
+      m = st->factors[2*L+1];
+      fstride[L+1] = fstride[L]*p;
+      L++;
+   } while(m!=1);
+   m = st->factors[2*L-1];
+   for (i=L-1;i>=0;i--)
+   {
+      if (i!=0)
+         m2 = st->factors[2*i-1];
+      else
+         m2 = 1;
+      switch (st->factors[2*i])
+      {
+      case 2:
+         ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+         break;
+      case 4:
+         ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+         break;
+#ifndef RADIX_TWO_ONLY
+      case 3:
+         ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+         break;
+      case 5:
+         ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+         break;
+#endif
+      }
+      m = m2;
+   }
+}
+
diff --git a/src/main/jni/opus/celt/kiss_fft.h b/src/main/jni/opus/celt/kiss_fft.h
new file mode 100644
index 000000000..66332e3bb
--- /dev/null
+++ b/src/main/jni/opus/celt/kiss_fft.h
@@ -0,0 +1,139 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+  Lots of modifications by Jean-Marc Valin
+  Copyright (c) 2005-2007, Xiph.Org Foundation
+  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <math.h>
+#include "arch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
+#else
+#define KISS_FFT_MALLOC opus_alloc
+#endif
+
+#ifdef FIXED_POINT
+#include "arch.h"
+
+#  define kiss_fft_scalar opus_int32
+#  define kiss_twiddle_scalar opus_int16
+
+
+#else
+# ifndef kiss_fft_scalar
+/*  default is float */
+#   define kiss_fft_scalar float
+#   define kiss_twiddle_scalar float
+#   define KF_SUFFIX _celt_single
+# endif
+#endif
+
+typedef struct {
+    kiss_fft_scalar r;
+    kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct {
+   kiss_twiddle_scalar r;
+   kiss_twiddle_scalar i;
+}kiss_twiddle_cpx;
+
+#define MAXFACTORS 8
+/* e.g. an fft of length 128 has 4 factors
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+typedef struct kiss_fft_state{
+    int nfft;
+#ifndef FIXED_POINT
+    kiss_fft_scalar scale;
+#endif
+    int shift;
+    opus_int16 factors[2*MAXFACTORS];
+    const opus_int16 *bitrev;
+    const kiss_twiddle_cpx *twiddles;
+} kiss_fft_state;
+
+/*typedef struct kiss_fft_state* kiss_fft_cfg;*/
+
+/**
+ *  opus_fft_alloc
+ *
+ *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ *  typical usage:      kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL);
+ *
+ *  The return value from fft_alloc is a cfg buffer used internally
+ *  by the fft routine or NULL.
+ *
+ *  If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc.
+ *  The returned value should be free()d when done to avoid memory leaks.
+ *
+ *  The state can be placed in a user supplied buffer 'mem':
+ *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ *      then the function places the cfg in mem and the size used in *lenmem
+ *      and returns mem.
+ *
+ *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ *      then the function returns NULL and places the minimum cfg
+ *      buffer size in *lenmem.
+ * */
+
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
+
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
+
+/**
+ * opus_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be  f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be   F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+    f[k].r and f[k].i
+ * */
+void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+void opus_fft_free(const kiss_fft_state *cfg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/celt/laplace.c b/src/main/jni/opus/celt/laplace.c
new file mode 100644
index 000000000..a7bca874b
--- /dev/null
+++ b/src/main/jni/opus/celt/laplace.c
@@ -0,0 +1,134 @@
+/* Copyright (c) 2007 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "laplace.h"
+#include "mathops.h"
+
+/* The minimum probability of an energy delta (out of 32768). */
+#define LAPLACE_LOG_MINP (0)
+#define LAPLACE_MINP (1<<LAPLACE_LOG_MINP)
+/* The minimum number of guaranteed representable energy deltas (in one
+    direction). */
+#define LAPLACE_NMIN (16)
+
+/* When called, decay is positive and at most 11456. */
+static unsigned ec_laplace_get_freq1(unsigned fs0, int decay)
+{
+   unsigned ft;
+   ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN) - fs0;
+   return ft*(opus_int32)(16384-decay)>>15;
+}
+
+void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay)
+{
+   unsigned fl;
+   int val = *value;
+   fl = 0;
+   if (val)
+   {
+      int s;
+      int i;
+      s = -(val<0);
+      val = (val+s)^s;
+      fl = fs;
+      fs = ec_laplace_get_freq1(fs, decay);
+      /* Search the decaying part of the PDF.*/
+      for (i=1; fs > 0 && i < val; i++)
+      {
+         fs *= 2;
+         fl += fs+2*LAPLACE_MINP;
+         fs = (fs*(opus_int32)decay)>>15;
+      }
+      /* Everything beyond that has probability LAPLACE_MINP. */
+      if (!fs)
+      {
+         int di;
+         int ndi_max;
+         ndi_max = (32768-fl+LAPLACE_MINP-1)>>LAPLACE_LOG_MINP;
+         ndi_max = (ndi_max-s)>>1;
+         di = IMIN(val - i, ndi_max - 1);
+         fl += (2*di+1+s)*LAPLACE_MINP;
+         fs = IMIN(LAPLACE_MINP, 32768-fl);
+         *value = (i+di+s)^s;
+      }
+      else
+      {
+         fs += LAPLACE_MINP;
+         fl += fs&~s;
+      }
+      celt_assert(fl+fs<=32768);
+      celt_assert(fs>0);
+   }
+   ec_encode_bin(enc, fl, fl+fs, 15);
+}
+
+int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay)
+{
+   int val=0;
+   unsigned fl;
+   unsigned fm;
+   fm = ec_decode_bin(dec, 15);
+   fl = 0;
+   if (fm >= fs)
+   {
+      val++;
+      fl = fs;
+      fs = ec_laplace_get_freq1(fs, decay)+LAPLACE_MINP;
+      /* Search the decaying part of the PDF.*/
+      while(fs > LAPLACE_MINP && fm >= fl+2*fs)
+      {
+         fs *= 2;
+         fl += fs;
+         fs = ((fs-2*LAPLACE_MINP)*(opus_int32)decay)>>15;
+         fs += LAPLACE_MINP;
+         val++;
+      }
+      /* Everything beyond that has probability LAPLACE_MINP. */
+      if (fs <= LAPLACE_MINP)
+      {
+         int di;
+         di = (fm-fl)>>(LAPLACE_LOG_MINP+1);
+         val += di;
+         fl += 2*di*LAPLACE_MINP;
+      }
+      if (fm < fl+fs)
+         val = -val;
+      else
+         fl += fs;
+   }
+   celt_assert(fl<32768);
+   celt_assert(fs>0);
+   celt_assert(fl<=fm);
+   celt_assert(fm<IMIN(fl+fs,32768));
+   ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768);
+   return val;
+}
diff --git a/src/main/jni/opus/celt/laplace.h b/src/main/jni/opus/celt/laplace.h
new file mode 100644
index 000000000..46c14b5da
--- /dev/null
+++ b/src/main/jni/opus/celt/laplace.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2007 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "entenc.h"
+#include "entdec.h"
+
+/** Encode a value that is assumed to be the realisation of a
+    Laplace-distributed random process
+ @param enc Entropy encoder state
+ @param value Value to encode
+ @param fs Probability of 0, multiplied by 32768
+ @param decay Probability of the value +/- 1, multiplied by 16384
+*/
+void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay);
+
+/** Decode a value that is assumed to be the realisation of a
+    Laplace-distributed random process
+ @param dec Entropy decoder state
+ @param fs Probability of 0, multiplied by 32768
+ @param decay Probability of the value +/- 1, multiplied by 16384
+ @return Value decoded
+ */
+int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay);
diff --git a/src/main/jni/opus/celt/mathops.c b/src/main/jni/opus/celt/mathops.c
new file mode 100644
index 000000000..3f8c5dcc0
--- /dev/null
+++ b/src/main/jni/opus/celt/mathops.c
@@ -0,0 +1,208 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file mathops.h
+   @brief Various math functions
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mathops.h"
+
+/*Compute floor(sqrt(_val)) with exact arithmetic.
+  This has been tested on all possible 32-bit inputs.*/
+unsigned isqrt32(opus_uint32 _val){
+  unsigned b;
+  unsigned g;
+  int      bshift;
+  /*Uses the second method from
+     http://www.azillionmonkeys.com/qed/sqroot.html
+    The main idea is to search for the largest binary digit b such that
+     (g+b)*(g+b) <= _val, and add it to the solution g.*/
+  g=0;
+  bshift=(EC_ILOG(_val)-1)>>1;
+  b=1U<<bshift;
+  do{
+    opus_uint32 t;
+    t=(((opus_uint32)g<<1)+b)<<bshift;
+    if(t<=_val){
+      g+=b;
+      _val-=t;
+    }
+    b>>=1;
+    bshift--;
+  }
+  while(bshift>=0);
+  return g;
+}
+
+#ifdef FIXED_POINT
+
+opus_val32 frac_div32(opus_val32 a, opus_val32 b)
+{
+   opus_val16 rcp;
+   opus_val32 result, rem;
+   int shift = celt_ilog2(b)-29;
+   a = VSHR32(a,shift);
+   b = VSHR32(b,shift);
+   /* 16-bit reciprocal */
+   rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
+   result = MULT16_32_Q15(rcp, a);
+   rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
+   result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
+   if (result >= 536870912)       /*  2^29 */
+      return 2147483647;          /*  2^31 - 1 */
+   else if (result <= -536870912) /* -2^29 */
+      return -2147483647;         /* -2^31 */
+   else
+      return SHL32(result, 2);
+}
+
+/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
+opus_val16 celt_rsqrt_norm(opus_val32 x)
+{
+   opus_val16 n;
+   opus_val16 r;
+   opus_val16 r2;
+   opus_val16 y;
+   /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
+   n = x-32768;
+   /* Get a rough initial guess for the root.
+      The optimal minimax quadratic approximation (using relative error) is
+       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
+      Coefficients here, and the final result r, are Q14.*/
+   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
+   /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
+      We can compute the result from n and r using Q15 multiplies with some
+       adjustment, carefully done to avoid overflow.
+      Range of y is [-1564,1594]. */
+   r2 = MULT16_16_Q15(r, r);
+   y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
+   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
+      This yields the Q14 reciprocal square root of the Q16 x, with a maximum
+       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
+       peak absolute error of 2.26591/16384. */
+   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
+              SUB16(MULT16_16_Q15(y, 12288), 16384))));
+}
+
+/** Sqrt approximation (QX input, QX/2 output) */
+opus_val32 celt_sqrt(opus_val32 x)
+{
+   int k;
+   opus_val16 n;
+   opus_val32 rt;
+   static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
+   if (x==0)
+      return 0;
+   else if (x>=1073741824)
+      return 32767;
+   k = (celt_ilog2(x)>>1)-7;
+   x = VSHR32(x, 2*k);
+   n = x-32768;
+   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
+              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
+   rt = VSHR32(rt,7-k);
+   return rt;
+}
+
+#define L1 32767
+#define L2 -7651
+#define L3 8277
+#define L4 -626
+
+static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
+{
+   opus_val16 x2;
+
+   x2 = MULT16_16_P15(x,x);
+   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
+                                                                                ))))))));
+}
+
+#undef L1
+#undef L2
+#undef L3
+#undef L4
+
+opus_val16 celt_cos_norm(opus_val32 x)
+{
+   x = x&0x0001ffff;
+   if (x>SHL32(EXTEND32(1), 16))
+      x = SUB32(SHL32(EXTEND32(1), 17),x);
+   if (x&0x00007fff)
+   {
+      if (x<SHL32(EXTEND32(1), 15))
+      {
+         return _celt_cos_pi_2(EXTRACT16(x));
+      } else {
+         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
+      }
+   } else {
+      if (x&0x0000ffff)
+         return 0;
+      else if (x&0x0001ffff)
+         return -32767;
+      else
+         return 32767;
+   }
+}
+
+/** Reciprocal approximation (Q15 input, Q16 output) */
+opus_val32 celt_rcp(opus_val32 x)
+{
+   int i;
+   opus_val16 n;
+   opus_val16 r;
+   celt_assert2(x>0, "celt_rcp() only defined for positive values");
+   i = celt_ilog2(x);
+   /* n is Q15 with range [0,1). */
+   n = VSHR32(x,i-15)-32768;
+   /* Start with a linear approximation:
+      r = 1.8823529411764706-0.9411764705882353*n.
+      The coefficients and the result are Q14 in the range [15420,30840].*/
+   r = ADD16(30840, MULT16_16_Q15(-15420, n));
+   /* Perform two Newton iterations:
+      r -= r*((r*n)-1.Q15)
+         = r*((r*n)+(r-1.Q15)). */
+   r = SUB16(r, MULT16_16_Q15(r,
+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
+   /* We subtract an extra 1 in the second iteration to avoid overflow; it also
+       neatly compensates for truncation error in the rest of the process. */
+   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
+   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
+       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+       error of 1.24665/32768. */
+   return VSHR32(EXTEND32(r),i-16);
+}
+
+#endif
diff --git a/src/main/jni/opus/celt/mathops.h b/src/main/jni/opus/celt/mathops.h
new file mode 100644
index 000000000..a0525a961
--- /dev/null
+++ b/src/main/jni/opus/celt/mathops.h
@@ -0,0 +1,258 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file mathops.h
+   @brief Various math functions
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef MATHOPS_H
+#define MATHOPS_H
+
+#include "arch.h"
+#include "entcode.h"
+#include "os_support.h"
+
+/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
+#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
+
+unsigned isqrt32(opus_uint32 _val);
+
+#ifndef OVERRIDE_CELT_MAXABS16
+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+{
+   int i;
+   opus_val16 maxval = 0;
+   opus_val16 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX16(maxval, x[i]);
+      minval = MIN16(minval, x[i]);
+   }
+   return MAX32(EXTEND32(maxval),-EXTEND32(minval));
+}
+#endif
+
+#ifndef OVERRIDE_CELT_MAXABS32
+#ifdef FIXED_POINT
+static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+{
+   int i;
+   opus_val32 maxval = 0;
+   opus_val32 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX32(maxval, x[i]);
+      minval = MIN32(minval, x[i]);
+   }
+   return MAX32(maxval, -minval);
+}
+#else
+#define celt_maxabs32(x,len) celt_maxabs16(x,len)
+#endif
+#endif
+
+
+#ifndef FIXED_POINT
+
+#define PI 3.141592653f
+#define celt_sqrt(x) ((float)sqrt(x))
+#define celt_rsqrt(x) (1.f/celt_sqrt(x))
+#define celt_rsqrt_norm(x) (celt_rsqrt(x))
+#define celt_cos_norm(x) ((float)cos((.5f*PI)*(x)))
+#define celt_rcp(x) (1.f/(x))
+#define celt_div(a,b) ((a)/(b))
+#define frac_div32(a,b) ((float)(a)/(b))
+
+#ifdef FLOAT_APPROX
+
+/* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127
+         denorm, +/- inf and NaN are *not* handled */
+
+/** Base-2 log approximation (log2(x)). */
+static OPUS_INLINE float celt_log2(float x)
+{
+   int integer;
+   float frac;
+   union {
+      float f;
+      opus_uint32 i;
+   } in;
+   in.f = x;
+   integer = (in.i>>23)-127;
+   in.i -= integer<<23;
+   frac = in.f - 1.5f;
+   frac = -0.41445418f + frac*(0.95909232f
+          + frac*(-0.33951290f + frac*0.16541097f));
+   return 1+integer+frac;
+}
+
+/** Base-2 exponential approximation (2^x). */
+static OPUS_INLINE float celt_exp2(float x)
+{
+   int integer;
+   float frac;
+   union {
+      float f;
+      opus_uint32 i;
+   } res;
+   integer = floor(x);
+   if (integer < -50)
+      return 0;
+   frac = x-integer;
+   /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
+   res.f = 0.99992522f + frac * (0.69583354f
+           + frac * (0.22606716f + 0.078024523f*frac));
+   res.i = (res.i + (integer<<23)) & 0x7fffffff;
+   return res.f;
+}
+
+#else
+#define celt_log2(x) ((float)(1.442695040888963387*log(x)))
+#define celt_exp2(x) ((float)exp(0.6931471805599453094*(x)))
+#endif
+
+#endif
+
+#ifdef FIXED_POINT
+
+#include "os_support.h"
+
+#ifndef OVERRIDE_CELT_ILOG2
+/** Integer log in base2. Undefined for zero and negative numbers */
+static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
+{
+   celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers");
+   return EC_ILOG(x)-1;
+}
+#endif
+
+
+/** Integer log in base2. Defined for zero, but not for negative numbers */
+static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x)
+{
+   return x <= 0 ? 0 : celt_ilog2(x);
+}
+
+opus_val16 celt_rsqrt_norm(opus_val32 x);
+
+opus_val32 celt_sqrt(opus_val32 x);
+
+opus_val16 celt_cos_norm(opus_val32 x);
+
+/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */
+static OPUS_INLINE opus_val16 celt_log2(opus_val32 x)
+{
+   int i;
+   opus_val16 n, frac;
+   /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605,
+       0.15530808010959576, -0.08556153059057618 */
+   static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401};
+   if (x==0)
+      return -32767;
+   i = celt_ilog2(x);
+   n = VSHR32(x,i-15)-32768-16384;
+   frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4]))))))));
+   return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT);
+}
+
+/*
+ K0 = 1
+ K1 = log(2)
+ K2 = 3-4*log(2)
+ K3 = 3*log(2) - 2
+*/
+#define D0 16383
+#define D1 22804
+#define D2 14819
+#define D3 10204
+
+static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
+{
+   opus_val16 frac;
+   frac = SHL16(x, 4);
+   return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
+}
+/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
+static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
+{
+   int integer;
+   opus_val16 frac;
+   integer = SHR16(x,10);
+   if (integer>14)
+      return 0x7f000000;
+   else if (integer < -15)
+      return 0;
+   frac = celt_exp2_frac(x-SHL16(integer,10));
+   return VSHR32(EXTEND32(frac), -integer-2);
+}
+
+opus_val32 celt_rcp(opus_val32 x);
+
+#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b))
+
+opus_val32 frac_div32(opus_val32 a, opus_val32 b);
+
+#define M1 32767
+#define M2 -21
+#define M3 -11943
+#define M4 4936
+
+/* Atan approximation using a 4th order polynomial. Input is in Q15 format
+   and normalized by pi/4. Output is in Q15 format */
+static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x)
+{
+   return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x)))))));
+}
+
+#undef M1
+#undef M2
+#undef M3
+#undef M4
+
+/* atan2() approximation valid for positive input values */
+static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
+{
+   if (y < x)
+   {
+      opus_val32 arg;
+      arg = celt_div(SHL32(EXTEND32(y),15),x);
+      if (arg >= 32767)
+         arg = 32767;
+      return SHR16(celt_atan01(EXTRACT16(arg)),1);
+   } else {
+      opus_val32 arg;
+      arg = celt_div(SHL32(EXTEND32(x),15),y);
+      if (arg >= 32767)
+         arg = 32767;
+      return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1);
+   }
+}
+
+#endif /* FIXED_POINT */
+#endif /* MATHOPS_H */
diff --git a/src/main/jni/opus/celt/mdct.c b/src/main/jni/opus/celt/mdct.c
new file mode 100644
index 000000000..90a214ad0
--- /dev/null
+++ b/src/main/jni/opus/celt/mdct.c
@@ -0,0 +1,311 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+#include "mdct.h"
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include <math.h>
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+#ifdef CUSTOM_MODES
+
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
+{
+   int i;
+   int N4;
+   kiss_twiddle_scalar *trig;
+#if defined(FIXED_POINT)
+   int N2=N>>1;
+#endif
+   l->n = N;
+   N4 = N>>2;
+   l->maxshift = maxshift;
+   for (i=0;i<=maxshift;i++)
+   {
+      if (i==0)
+         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
+      else
+         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
+#ifndef ENABLE_TI_DSPLIB55
+      if (l->kfft[i]==NULL)
+         return 0;
+#endif
+   }
+   l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
+   if (l->trig==NULL)
+     return 0;
+   /* We have enough points that sine isn't necessary */
+#if defined(FIXED_POINT)
+   for (i=0;i<=N4;i++)
+      trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
+#else
+   for (i=0;i<=N4;i++)
+      trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
+#endif
+   return 1;
+}
+
+void clt_mdct_clear(mdct_lookup *l)
+{
+   int i;
+   for (i=0;i<=l->maxshift;i++)
+      opus_fft_free(l->kfft[i]);
+   opus_free((kiss_twiddle_scalar*)l->trig);
+}
+
+#endif /* CUSTOM_MODES */
+
+/* Forward MDCT trashes the input array */
+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride)
+{
+   int i;
+   int N, N2, N4;
+   kiss_twiddle_scalar sine;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_scalar, f2);
+   SAVE_STACK;
+   N = l->n;
+   N >>= shift;
+   N2 = N>>1;
+   N4 = N>>2;
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N2, kiss_fft_scalar);
+   /* sin(x) ~= x here */
+#ifdef FIXED_POINT
+   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
+#else
+   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
+#endif
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         re = yp[0];
+         im = yp[1];
+         yr = -S_MUL(re,t[i<<shift])  -  S_MUL(im,t[(N4-i)<<shift]);
+         yi = -S_MUL(im,t[i<<shift])  +  S_MUL(re,t[(N4-i)<<shift]);
+         /* works because the cos is nearly one */
+         *yp++ = yr + S_MUL(yi,sine);
+         *yp++ = yi - S_MUL(yr,sine);
+      }
+   }
+
+   /* N/4 complex FFT, down-scales by 4/N */
+   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
+         yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
+         /* works because the cos is nearly one */
+         *yp1 = yr - S_MUL(yi,sine);
+         *yp2 = yi + S_MUL(yr,sine);;
+         fp += 2;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+
+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
+{
+   int i;
+   int N, N2, N4;
+   kiss_twiddle_scalar sine;
+   VARDECL(kiss_fft_scalar, f2);
+   SAVE_STACK;
+   N = l->n;
+   N >>= shift;
+   N2 = N>>1;
+   N4 = N>>2;
+   ALLOC(f2, N2, kiss_fft_scalar);
+   /* sin(x) ~= x here */
+#ifdef FIXED_POINT
+   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
+#else
+   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
+#endif
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
+         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
+         /* works because the cos is nearly one */
+         *yp++ = yr - S_MUL(yi,sine);
+         *yp++ = yi + S_MUL(yr,sine);
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
+   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         re = yp0[0];
+         im = yp0[1];
+         t0 = t[i<<shift];
+         t1 = t[(N4-i)<<shift];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) - S_MUL(im,t1);
+         yi = S_MUL(im,t0) + S_MUL(re,t1);
+         re = yp1[0];
+         im = yp1[1];
+         /* works because the cos is nearly one */
+         yp0[0] = -(yr - S_MUL(yi,sine));
+         yp1[1] = yi + S_MUL(yr,sine);
+
+         t0 = t[(N4-i-1)<<shift];
+         t1 = t[(i+1)<<shift];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) - S_MUL(im,t1);
+         yi = S_MUL(im,t0) + S_MUL(re,t1);
+         /* works because the cos is nearly one */
+         yp1[0] = -(yr - S_MUL(yi,sine));
+         yp0[1] = yi + S_MUL(yr,sine);
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+   RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/celt/mdct.h b/src/main/jni/opus/celt/mdct.h
new file mode 100644
index 000000000..d72182138
--- /dev/null
+++ b/src/main/jni/opus/celt/mdct.h
@@ -0,0 +1,70 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+
+#ifndef MDCT_H
+#define MDCT_H
+
+#include "opus_defines.h"
+#include "kiss_fft.h"
+#include "arch.h"
+
+typedef struct {
+   int n;
+   int maxshift;
+   const kiss_fft_state *kfft[4];
+   const kiss_twiddle_scalar * OPUS_RESTRICT trig;
+} mdct_lookup;
+
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
+void clt_mdct_clear(mdct_lookup *l);
+
+/** Compute a forward MDCT and scale by 4/N, trashes the input array */
+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride);
+
+/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
+    (scales implicitly by 1/2) */
+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
+
+#endif
diff --git a/src/main/jni/opus/celt/mfrngcod.h b/src/main/jni/opus/celt/mfrngcod.h
new file mode 100644
index 000000000..809152a59
--- /dev/null
+++ b/src/main/jni/opus/celt/mfrngcod.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2001-2008 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_mfrngcode_H)
+# define _mfrngcode_H (1)
+# include "entcode.h"
+
+/*Constants used by the entropy encoder/decoder.*/
+
+/*The number of bits to output at a time.*/
+# define EC_SYM_BITS   (8)
+/*The total number of bits in each of the state registers.*/
+# define EC_CODE_BITS  (32)
+/*The maximum symbol value.*/
+# define EC_SYM_MAX    ((1U<<EC_SYM_BITS)-1)
+/*Bits to shift by to move a symbol into the high-order position.*/
+# define EC_CODE_SHIFT (EC_CODE_BITS-EC_SYM_BITS-1)
+/*Carry bit of the high-order range symbol.*/
+# define EC_CODE_TOP   (((opus_uint32)1U)<<(EC_CODE_BITS-1))
+/*Low-order bit of the high-order range symbol.*/
+# define EC_CODE_BOT   (EC_CODE_TOP>>EC_SYM_BITS)
+/*The number of bits available for the last, partial symbol in the code field.*/
+# define EC_CODE_EXTRA ((EC_CODE_BITS-2)%EC_SYM_BITS+1)
+#endif
diff --git a/src/main/jni/opus/celt/modes.c b/src/main/jni/opus/celt/modes.c
new file mode 100644
index 000000000..42e68e1cb
--- /dev/null
+++ b/src/main/jni/opus/celt/modes.c
@@ -0,0 +1,438 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "celt.h"
+#include "modes.h"
+#include "rate.h"
+#include "os_support.h"
+#include "stack_alloc.h"
+#include "quant_bands.h"
+
+static const opus_int16 eband5ms[] = {
+/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
+  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
+};
+
+/* Alternate tuning (partially derived from Vorbis) */
+#define BITALLOC_SIZE 11
+/* Bit allocation table in units of 1/32 bit/sample (0.1875 dB SNR) */
+static const unsigned char band_allocation[] = {
+/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ 90, 80, 75, 69, 63, 56, 49, 40, 34, 29, 20, 18, 10,  0,  0,  0,  0,  0,  0,  0,  0,
+110,100, 90, 84, 78, 71, 65, 58, 51, 45, 39, 32, 26, 20, 12,  0,  0,  0,  0,  0,  0,
+118,110,103, 93, 86, 80, 75, 70, 65, 59, 53, 47, 40, 31, 23, 15,  4,  0,  0,  0,  0,
+126,119,112,104, 95, 89, 83, 78, 72, 66, 60, 54, 47, 39, 32, 25, 17, 12,  1,  0,  0,
+134,127,120,114,103, 97, 91, 85, 78, 72, 66, 60, 54, 47, 41, 35, 29, 23, 16, 10,  1,
+144,137,130,124,113,107,101, 95, 88, 82, 76, 70, 64, 57, 51, 45, 39, 33, 26, 15,  1,
+152,145,138,132,123,117,111,105, 98, 92, 86, 80, 74, 67, 61, 55, 49, 43, 36, 20,  1,
+162,155,148,142,133,127,121,115,108,102, 96, 90, 84, 77, 71, 65, 59, 53, 46, 30,  1,
+172,165,158,152,143,137,131,125,118,112,106,100, 94, 87, 81, 75, 69, 63, 56, 45, 20,
+200,200,200,200,200,200,200,200,198,193,188,183,178,173,168,163,158,153,148,129,104,
+};
+
+#ifndef CUSTOM_MODES_ONLY
+ #ifdef FIXED_POINT
+  #include "static_modes_fixed.h"
+ #else
+  #include "static_modes_float.h"
+ #endif
+#endif /* CUSTOM_MODES_ONLY */
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+#ifdef CUSTOM_MODES
+
+/* Defining 25 critical bands for the full 0-20 kHz audio bandwidth
+   Taken from http://ccrma.stanford.edu/~jos/bbt/Bark_Frequency_Scale.html */
+#define BARK_BANDS 25
+static const opus_int16 bark_freq[BARK_BANDS+1] = {
+      0,   100,   200,   300,   400,
+    510,   630,   770,   920,  1080,
+   1270,  1480,  1720,  2000,  2320,
+   2700,  3150,  3700,  4400,  5300,
+   6400,  7700,  9500, 12000, 15500,
+  20000};
+
+static opus_int16 *compute_ebands(opus_int32 Fs, int frame_size, int res, int *nbEBands)
+{
+   opus_int16 *eBands;
+   int i, j, lin, low, high, nBark, offset=0;
+
+   /* All modes that have 2.5 ms short blocks use the same definition */
+   if (Fs == 400*(opus_int32)frame_size)
+   {
+      *nbEBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1;
+      eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+1));
+      for (i=0;i<*nbEBands+1;i++)
+         eBands[i] = eband5ms[i];
+      return eBands;
+   }
+   /* Find the number of critical bands supported by our sampling rate */
+   for (nBark=1;nBark<BARK_BANDS;nBark++)
+    if (bark_freq[nBark+1]*2 >= Fs)
+       break;
+
+   /* Find where the linear part ends (i.e. where the spacing is more than min_width */
+   for (lin=0;lin<nBark;lin++)
+      if (bark_freq[lin+1]-bark_freq[lin] >= res)
+         break;
+
+   low = (bark_freq[lin]+res/2)/res;
+   high = nBark-lin;
+   *nbEBands = low+high;
+   eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+2));
+
+   if (eBands==NULL)
+      return NULL;
+
+   /* Linear spacing (min_width) */
+   for (i=0;i<low;i++)
+      eBands[i] = i;
+   if (low>0)
+      offset = eBands[low-1]*res - bark_freq[lin-1];
+   /* Spacing follows critical bands */
+   for (i=0;i<high;i++)
+   {
+      int target = bark_freq[lin+i];
+      /* Round to an even value */
+      eBands[i+low] = (target+offset/2+res)/(2*res)*2;
+      offset = eBands[i+low]*res - target;
+   }
+   /* Enforce the minimum spacing at the boundary */
+   for (i=0;i<*nbEBands;i++)
+      if (eBands[i] < i)
+         eBands[i] = i;
+   /* Round to an even value */
+   eBands[*nbEBands] = (bark_freq[nBark]+res)/(2*res)*2;
+   if (eBands[*nbEBands] > frame_size)
+      eBands[*nbEBands] = frame_size;
+   for (i=1;i<*nbEBands-1;i++)
+   {
+      if (eBands[i+1]-eBands[i] < eBands[i]-eBands[i-1])
+      {
+         eBands[i] -= (2*eBands[i]-eBands[i-1]-eBands[i+1])/2;
+      }
+   }
+   /* Remove any empty bands. */
+   for (i=j=0;i<*nbEBands;i++)
+      if(eBands[i+1]>eBands[j])
+         eBands[++j]=eBands[i+1];
+   *nbEBands=j;
+
+   for (i=1;i<*nbEBands;i++)
+   {
+      /* Every band must be smaller than the last band. */
+      celt_assert(eBands[i]-eBands[i-1]<=eBands[*nbEBands]-eBands[*nbEBands-1]);
+      /* Each band must be no larger than twice the size of the previous one. */
+      celt_assert(eBands[i+1]-eBands[i]<=2*(eBands[i]-eBands[i-1]));
+   }
+
+   return eBands;
+}
+
+static void compute_allocation_table(CELTMode *mode)
+{
+   int i, j;
+   unsigned char *allocVectors;
+   int maxBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1;
+
+   mode->nbAllocVectors = BITALLOC_SIZE;
+   allocVectors = opus_alloc(sizeof(unsigned char)*(BITALLOC_SIZE*mode->nbEBands));
+   if (allocVectors==NULL)
+      return;
+
+   /* Check for standard mode */
+   if (mode->Fs == 400*(opus_int32)mode->shortMdctSize)
+   {
+      for (i=0;i<BITALLOC_SIZE*mode->nbEBands;i++)
+         allocVectors[i] = band_allocation[i];
+      mode->allocVectors = allocVectors;
+      return;
+   }
+   /* If not the standard mode, interpolate */
+   /* Compute per-codec-band allocation from per-critical-band matrix */
+   for (i=0;i<BITALLOC_SIZE;i++)
+   {
+      for (j=0;j<mode->nbEBands;j++)
+      {
+         int k;
+         for (k=0;k<maxBands;k++)
+         {
+            if (400*(opus_int32)eband5ms[k] > mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize)
+               break;
+         }
+         if (k>maxBands-1)
+            allocVectors[i*mode->nbEBands+j] = band_allocation[i*maxBands + maxBands-1];
+         else {
+            opus_int32 a0, a1;
+            a1 = mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize - 400*(opus_int32)eband5ms[k-1];
+            a0 = 400*(opus_int32)eband5ms[k] - mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize;
+            allocVectors[i*mode->nbEBands+j] = (a0*band_allocation[i*maxBands+k-1]
+                                             + a1*band_allocation[i*maxBands+k])/(a0+a1);
+         }
+      }
+   }
+
+   /*printf ("\n");
+   for (i=0;i<BITALLOC_SIZE;i++)
+   {
+      for (j=0;j<mode->nbEBands;j++)
+         printf ("%d ", allocVectors[i*mode->nbEBands+j]);
+      printf ("\n");
+   }
+   exit(0);*/
+
+   mode->allocVectors = allocVectors;
+}
+
+#endif /* CUSTOM_MODES */
+
+CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
+{
+   int i;
+#ifdef CUSTOM_MODES
+   CELTMode *mode=NULL;
+   int res;
+   opus_val16 *window;
+   opus_int16 *logN;
+   int LM;
+   ALLOC_STACK;
+#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
+   if (global_stack==NULL)
+      goto failure;
+#endif
+#endif
+
+#ifndef CUSTOM_MODES_ONLY
+   for (i=0;i<TOTAL_MODES;i++)
+   {
+      int j;
+      for (j=0;j<4;j++)
+      {
+         if (Fs == static_mode_list[i]->Fs &&
+               (frame_size<<j) == static_mode_list[i]->shortMdctSize*static_mode_list[i]->nbShortMdcts)
+         {
+            if (error)
+               *error = OPUS_OK;
+            return (CELTMode*)static_mode_list[i];
+         }
+      }
+   }
+#endif /* CUSTOM_MODES_ONLY */
+
+#ifndef CUSTOM_MODES
+   if (error)
+      *error = OPUS_BAD_ARG;
+   return NULL;
+#else
+
+   /* The good thing here is that permutation of the arguments will automatically be invalid */
+
+   if (Fs < 8000 || Fs > 96000)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   if (frame_size < 40 || frame_size > 1024 || frame_size%2!=0)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   /* Frames of less than 1ms are not supported. */
+   if ((opus_int32)frame_size*1000 < Fs)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+
+   if ((opus_int32)frame_size*75 >= Fs && (frame_size%16)==0)
+   {
+     LM = 3;
+   } else if ((opus_int32)frame_size*150 >= Fs && (frame_size%8)==0)
+   {
+     LM = 2;
+   } else if ((opus_int32)frame_size*300 >= Fs && (frame_size%4)==0)
+   {
+     LM = 1;
+   } else
+   {
+     LM = 0;
+   }
+
+   /* Shorts longer than 3.3ms are not supported. */
+   if ((opus_int32)(frame_size>>LM)*300 > Fs)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+
+   mode = opus_alloc(sizeof(CELTMode));
+   if (mode==NULL)
+      goto failure;
+   mode->Fs = Fs;
+
+   /* Pre/de-emphasis depends on sampling rate. The "standard" pre-emphasis
+      is defined as A(z) = 1 - 0.85*z^-1 at 48 kHz. Other rates should
+      approximate that. */
+   if(Fs < 12000) /* 8 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.3500061035f, 15);
+      mode->preemph[1] = -QCONST16(0.1799926758f, 15);
+      mode->preemph[2] =  QCONST16(0.2719968125f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(3.6765136719f, 13);
+   } else if(Fs < 24000) /* 16 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.6000061035f, 15);
+      mode->preemph[1] = -QCONST16(0.1799926758f, 15);
+      mode->preemph[2] =  QCONST16(0.4424998650f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(2.2598876953f, 13);
+   } else if(Fs < 40000) /* 32 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.7799987793f, 15);
+      mode->preemph[1] = -QCONST16(0.1000061035f, 15);
+      mode->preemph[2] =  QCONST16(0.7499771125f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(1.3333740234f, 13);
+   } else /* 48 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.8500061035f, 15);
+      mode->preemph[1] =  QCONST16(0.0f, 15);
+      mode->preemph[2] =  QCONST16(1.f, SIG_SHIFT);
+      mode->preemph[3] =  QCONST16(1.f, 13);
+   }
+
+   mode->maxLM = LM;
+   mode->nbShortMdcts = 1<<LM;
+   mode->shortMdctSize = frame_size/mode->nbShortMdcts;
+   res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize);
+
+   mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands);
+   if (mode->eBands==NULL)
+      goto failure;
+#if !defined(SMALL_FOOTPRINT)
+   /* Make sure we don't allocate a band larger than our PVQ table.
+      208 should be enough, but let's be paranoid. */
+   if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM >
+    208) {
+       goto failure;
+   }
+#endif
+
+   mode->effEBands = mode->nbEBands;
+   while (mode->eBands[mode->effEBands] > mode->shortMdctSize)
+      mode->effEBands--;
+
+   /* Overlap must be divisible by 4 */
+   mode->overlap = ((mode->shortMdctSize>>2)<<2);
+
+   compute_allocation_table(mode);
+   if (mode->allocVectors==NULL)
+      goto failure;
+
+   window = (opus_val16*)opus_alloc(mode->overlap*sizeof(opus_val16));
+   if (window==NULL)
+      goto failure;
+
+#ifndef FIXED_POINT
+   for (i=0;i<mode->overlap;i++)
+      window[i] = Q15ONE*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap));
+#else
+   for (i=0;i<mode->overlap;i++)
+      window[i] = MIN32(32767,floor(.5+32768.*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap))));
+#endif
+   mode->window = window;
+
+   logN = (opus_int16*)opus_alloc(mode->nbEBands*sizeof(opus_int16));
+   if (logN==NULL)
+      goto failure;
+
+   for (i=0;i<mode->nbEBands;i++)
+      logN[i] = log2_frac(mode->eBands[i+1]-mode->eBands[i], BITRES);
+   mode->logN = logN;
+
+   compute_pulse_cache(mode, mode->maxLM);
+
+   if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
+           mode->maxLM) == 0)
+      goto failure;
+
+   if (error)
+      *error = OPUS_OK;
+
+   return mode;
+failure:
+   if (error)
+      *error = OPUS_ALLOC_FAIL;
+   if (mode!=NULL)
+      opus_custom_mode_destroy(mode);
+   return NULL;
+#endif /* !CUSTOM_MODES */
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_mode_destroy(CELTMode *mode)
+{
+   if (mode == NULL)
+      return;
+#ifndef CUSTOM_MODES_ONLY
+   {
+     int i;
+     for (i=0;i<TOTAL_MODES;i++)
+     {
+        if (mode == static_mode_list[i])
+        {
+           return;
+        }
+     }
+   }
+#endif /* CUSTOM_MODES_ONLY */
+   opus_free((opus_int16*)mode->eBands);
+   opus_free((opus_int16*)mode->allocVectors);
+
+   opus_free((opus_val16*)mode->window);
+   opus_free((opus_int16*)mode->logN);
+
+   opus_free((opus_int16*)mode->cache.index);
+   opus_free((unsigned char*)mode->cache.bits);
+   opus_free((unsigned char*)mode->cache.caps);
+   clt_mdct_clear(&mode->mdct);
+
+   opus_free((CELTMode *)mode);
+}
+#endif
diff --git a/src/main/jni/opus/celt/modes.h b/src/main/jni/opus/celt/modes.h
new file mode 100644
index 000000000..c8340f987
--- /dev/null
+++ b/src/main/jni/opus/celt/modes.h
@@ -0,0 +1,83 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef MODES_H
+#define MODES_H
+
+#include "opus_types.h"
+#include "celt.h"
+#include "arch.h"
+#include "mdct.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#define MAX_PERIOD 1024
+
+#ifndef OVERLAP
+#define OVERLAP(mode) ((mode)->overlap)
+#endif
+
+#ifndef FRAMESIZE
+#define FRAMESIZE(mode) ((mode)->mdctSize)
+#endif
+
+typedef struct {
+   int size;
+   const opus_int16 *index;
+   const unsigned char *bits;
+   const unsigned char *caps;
+} PulseCache;
+
+/** Mode definition (opaque)
+ @brief Mode definition
+ */
+struct OpusCustomMode {
+   opus_int32 Fs;
+   int          overlap;
+
+   int          nbEBands;
+   int          effEBands;
+   opus_val16    preemph[4];
+   const opus_int16   *eBands;   /**< Definition for each "pseudo-critical band" */
+
+   int         maxLM;
+   int         nbShortMdcts;
+   int         shortMdctSize;
+
+   int          nbAllocVectors; /**< Number of lines in the matrix below */
+   const unsigned char   *allocVectors;   /**< Number of bits in each band for several rates */
+   const opus_int16 *logN;
+
+   const opus_val16 *window;
+   mdct_lookup mdct;
+   PulseCache cache;
+};
+
+
+#endif
diff --git a/src/main/jni/opus/celt/opus_custom_demo.c b/src/main/jni/opus/celt/opus_custom_demo.c
new file mode 100644
index 000000000..ae41c0de5
--- /dev/null
+++ b/src/main/jni/opus/celt/opus_custom_demo.c
@@ -0,0 +1,210 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus_custom.h"
+#include "arch.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define MAX_PACKET 1275
+
+int main(int argc, char *argv[])
+{
+   int err;
+   char *inFile, *outFile;
+   FILE *fin, *fout;
+   OpusCustomMode *mode=NULL;
+   OpusCustomEncoder *enc;
+   OpusCustomDecoder *dec;
+   int len;
+   opus_int32 frame_size, channels, rate;
+   int bytes_per_packet;
+   unsigned char data[MAX_PACKET];
+   int complexity;
+#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+   int i;
+   double rmsd = 0;
+#endif
+   int count = 0;
+   opus_int32 skip;
+   opus_int16 *in, *out;
+   if (argc != 9 && argc != 8 && argc != 7)
+   {
+      fprintf (stderr, "Usage: test_opus_custom <rate> <channels> <frame size> "
+               " <bytes per packet> [<complexity> [packet loss rate]] "
+               "<input> <output>\n");
+      return 1;
+   }
+
+   rate = (opus_int32)atol(argv[1]);
+   channels = atoi(argv[2]);
+   frame_size = atoi(argv[3]);
+   mode = opus_custom_mode_create(rate, frame_size, NULL);
+   if (mode == NULL)
+   {
+      fprintf(stderr, "failed to create a mode\n");
+      return 1;
+   }
+
+   bytes_per_packet = atoi(argv[4]);
+   if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET)
+   {
+      fprintf (stderr, "bytes per packet must be between 0 and %d\n",
+                        MAX_PACKET);
+      return 1;
+   }
+
+   inFile = argv[argc-2];
+   fin = fopen(inFile, "rb");
+   if (!fin)
+   {
+      fprintf (stderr, "Could not open input file %s\n", argv[argc-2]);
+      return 1;
+   }
+   outFile = argv[argc-1];
+   fout = fopen(outFile, "wb+");
+   if (!fout)
+   {
+      fprintf (stderr, "Could not open output file %s\n", argv[argc-1]);
+      fclose(fin);
+      return 1;
+   }
+
+   enc = opus_custom_encoder_create(mode, channels, &err);
+   if (err != 0)
+   {
+      fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err));
+      fclose(fin);
+      fclose(fout);
+      return 1;
+   }
+   dec = opus_custom_decoder_create(mode, channels, &err);
+   if (err != 0)
+   {
+      fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err));
+      fclose(fin);
+      fclose(fout);
+      return 1;
+   }
+   opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip));
+
+   if (argc>7)
+   {
+      complexity=atoi(argv[5]);
+      opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity));
+   }
+
+   in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
+   out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
+
+   while (!feof(fin))
+   {
+      int ret;
+      err = fread(in, sizeof(short), frame_size*channels, fin);
+      if (feof(fin))
+         break;
+      len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet);
+      if (len <= 0)
+         fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len));
+
+      /* This is for simulating bit errors */
+#if 0
+      int errors = 0;
+      int eid = 0;
+      /* This simulates random bit error */
+      for (i=0;i<len*8;i++)
+      {
+         if (rand()%atoi(argv[8])==0)
+         {
+            if (i<64)
+            {
+               errors++;
+               eid = i;
+            }
+            data[i/8] ^= 1<<(7-(i%8));
+         }
+      }
+      if (errors == 1)
+         data[eid/8] ^= 1<<(7-(eid%8));
+      else if (errors%2 == 1)
+         data[rand()%8] ^= 1<<rand()%8;
+#endif
+
+#if 1 /* Set to zero to use the encoder's output instead */
+      /* This is to simulate packet loss */
+      if (argc==9 && rand()%1000<atoi(argv[argc-3]))
+      /*if (errors && (errors%2==0))*/
+         ret = opus_custom_decode(dec, NULL, len, out, frame_size);
+      else
+         ret = opus_custom_decode(dec, data, len, out, frame_size);
+      if (ret < 0)
+         fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret));
+#else
+      for (i=0;i<ret*channels;i++)
+         out[i] = in[i];
+#endif
+#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+      for (i=0;i<ret*channels;i++)
+      {
+         rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]);
+         /*out[i] -= in[i];*/
+      }
+#endif
+      count++;
+      fwrite(out+skip*channels, sizeof(short), (ret-skip)*channels, fout);
+      skip = 0;
+   }
+   PRINT_MIPS(stderr);
+
+   opus_custom_encoder_destroy(enc);
+   opus_custom_decoder_destroy(dec);
+   fclose(fin);
+   fclose(fout);
+   opus_custom_mode_destroy(mode);
+   free(in);
+   free(out);
+#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+   if (rmsd > 0)
+   {
+      rmsd = sqrt(rmsd/(1.0*frame_size*channels*count));
+      fprintf (stderr, "Error: encoder doesn't match decoder\n");
+      fprintf (stderr, "RMS mismatch is %f\n", rmsd);
+      return 1;
+   } else {
+      fprintf (stderr, "Encoder matches decoder!!\n");
+   }
+#endif
+   return 0;
+}
+
diff --git a/src/main/jni/opus/celt/os_support.h b/src/main/jni/opus/celt/os_support.h
new file mode 100644
index 000000000..5e47e3cff
--- /dev/null
+++ b/src/main/jni/opus/celt/os_support.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2007 Jean-Marc Valin
+
+   File: os_support.h
+   This is the (tiny) OS abstraction layer. Aside from math.h, this is the
+   only place where system headers are allowed.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OS_SUPPORT_H
+#define OS_SUPPORT_H
+
+#ifdef CUSTOM_SUPPORT
+#  include "custom_support.h"
+#endif
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
+#ifndef OVERRIDE_OPUS_ALLOC
+static OPUS_INLINE void *opus_alloc (size_t size)
+{
+   return malloc(size);
+}
+#endif
+
+/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
+#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
+static OPUS_INLINE void *opus_alloc_scratch (size_t size)
+{
+   /* Scratch space doesn't need to be cleared */
+   return opus_alloc(size);
+}
+#endif
+
+/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
+#ifndef OVERRIDE_OPUS_FREE
+static OPUS_INLINE void opus_free (void *ptr)
+{
+   free(ptr);
+}
+#endif
+
+/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking  */
+#ifndef OVERRIDE_OPUS_COPY
+#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
+#endif
+
+/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term
+    provides compile-time type checking */
+#ifndef OVERRIDE_OPUS_MOVE
+#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
+#endif
+
+/** Set n elements of dst to zero, starting at address s */
+#ifndef OVERRIDE_OPUS_CLEAR
+#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
+#endif
+
+/*#ifdef __GNUC__
+#pragma GCC poison printf sprintf
+#pragma GCC poison malloc free realloc calloc
+#endif*/
+
+#endif /* OS_SUPPORT_H */
+
diff --git a/src/main/jni/opus/celt/pitch.c b/src/main/jni/opus/celt/pitch.c
new file mode 100644
index 000000000..d2b305441
--- /dev/null
+++ b/src/main/jni/opus/celt/pitch.c
@@ -0,0 +1,537 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.c
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pitch.h"
+#include "os_support.h"
+#include "modes.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "celt_lpc.h"
+
+static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
+                            int max_pitch, int *best_pitch
+#ifdef FIXED_POINT
+                            , int yshift, opus_val32 maxcorr
+#endif
+                            )
+{
+   int i, j;
+   opus_val32 Syy=1;
+   opus_val16 best_num[2];
+   opus_val32 best_den[2];
+#ifdef FIXED_POINT
+   int xshift;
+
+   xshift = celt_ilog2(maxcorr)-14;
+#endif
+
+   best_num[0] = -1;
+   best_num[1] = -1;
+   best_den[0] = 0;
+   best_den[1] = 0;
+   best_pitch[0] = 0;
+   best_pitch[1] = 1;
+   for (j=0;j<len;j++)
+      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
+   for (i=0;i<max_pitch;i++)
+   {
+      if (xcorr[i]>0)
+      {
+         opus_val16 num;
+         opus_val32 xcorr16;
+         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
+#ifndef FIXED_POINT
+         /* Considering the range of xcorr16, this should avoid both underflows
+            and overflows (inf) when squaring xcorr16 */
+         xcorr16 *= 1e-12f;
+#endif
+         num = MULT16_16_Q15(xcorr16,xcorr16);
+         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
+         {
+            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
+            {
+               best_num[1] = best_num[0];
+               best_den[1] = best_den[0];
+               best_pitch[1] = best_pitch[0];
+               best_num[0] = num;
+               best_den[0] = Syy;
+               best_pitch[0] = i;
+            } else {
+               best_num[1] = num;
+               best_den[1] = Syy;
+               best_pitch[1] = i;
+            }
+         }
+      }
+      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
+      Syy = MAX32(1, Syy);
+   }
+}
+
+static void celt_fir5(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         opus_val16 *mem)
+{
+   int i;
+   opus_val16 num0, num1, num2, num3, num4;
+   opus_val32 mem0, mem1, mem2, mem3, mem4;
+   num0=num[0];
+   num1=num[1];
+   num2=num[2];
+   num3=num[3];
+   num4=num[4];
+   mem0=mem[0];
+   mem1=mem[1];
+   mem2=mem[2];
+   mem3=mem[3];
+   mem4=mem[4];
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      sum = MAC16_16(sum,num0,mem0);
+      sum = MAC16_16(sum,num1,mem1);
+      sum = MAC16_16(sum,num2,mem2);
+      sum = MAC16_16(sum,num3,mem3);
+      sum = MAC16_16(sum,num4,mem4);
+      mem4 = mem3;
+      mem3 = mem2;
+      mem2 = mem1;
+      mem1 = mem0;
+      mem0 = x[i];
+      y[i] = ROUND16(sum, SIG_SHIFT);
+   }
+   mem[0]=mem0;
+   mem[1]=mem1;
+   mem[2]=mem2;
+   mem[3]=mem3;
+   mem[4]=mem4;
+}
+
+
+void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
+      int len, int C, int arch)
+{
+   int i;
+   opus_val32 ac[5];
+   opus_val16 tmp=Q15ONE;
+   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
+   opus_val16 lpc2[5];
+   opus_val16 c1 = QCONST16(.8f,15);
+#ifdef FIXED_POINT
+   int shift;
+   opus_val32 maxabs = celt_maxabs32(x[0], len);
+   if (C==2)
+   {
+      opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
+      maxabs = MAX32(maxabs, maxabs_1);
+   }
+   if (maxabs<1)
+      maxabs=1;
+   shift = celt_ilog2(maxabs)-10;
+   if (shift<0)
+      shift=0;
+   if (C==2)
+      shift++;
+#endif
+   for (i=1;i<len>>1;i++)
+      x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
+   x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
+   if (C==2)
+   {
+      for (i=1;i<len>>1;i++)
+         x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
+      x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
+   }
+
+   _celt_autocorr(x_lp, ac, NULL, 0,
+                  4, len>>1, arch);
+
+   /* Noise floor -40 dB */
+#ifdef FIXED_POINT
+   ac[0] += SHR32(ac[0],13);
+#else
+   ac[0] *= 1.0001f;
+#endif
+   /* Lag windowing */
+   for (i=1;i<=4;i++)
+   {
+      /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef FIXED_POINT
+      ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+      ac[i] -= ac[i]*(.008f*i)*(.008f*i);
+#endif
+   }
+
+   _celt_lpc(lpc, ac, 4);
+   for (i=0;i<4;i++)
+   {
+      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
+      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
+   }
+   /* Add a zero */
+   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
+   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
+   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
+   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
+   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
+   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
+}
+
+#if 0 /* This is a simple version of the pitch correlation that should work
+         well on DSPs like Blackfin and TI C5x/C6x */
+
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i, j;
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, x[j],y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
+}
+
+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
+
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i,j;
+   /*The EDSP version requires that max_pitch is at least 1, and that _x is
+      32-bit aligned.
+     Since it's hard to put asserts in assembly, put them here.*/
+   celt_assert(max_pitch>0);
+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(_x, _y+i, sum, len);
+      xcorr[i]=sum[0];
+      xcorr[i+1]=sum[1];
+      xcorr[i+2]=sum[2];
+      xcorr[i+3]=sum[3];
+#ifdef FIXED_POINT
+      sum[0] = MAX32(sum[0], sum[1]);
+      sum[2] = MAX32(sum[2], sum[3]);
+      sum[0] = MAX32(sum[0], sum[2]);
+      maxcorr = MAX32(maxcorr, sum[0]);
+#endif
+   }
+   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+   for (;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, _x[j],_y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
+}
+
+#endif
+void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
+                  int len, int max_pitch, int *pitch, int arch)
+{
+   int i, j;
+   int lag;
+   int best_pitch[2]={0,0};
+   VARDECL(opus_val16, x_lp4);
+   VARDECL(opus_val16, y_lp4);
+   VARDECL(opus_val32, xcorr);
+#ifdef FIXED_POINT
+   opus_val32 maxcorr;
+   opus_val32 xmax, ymax;
+   int shift=0;
+#endif
+   int offset;
+
+   SAVE_STACK;
+
+   celt_assert(len>0);
+   celt_assert(max_pitch>0);
+   lag = len+max_pitch;
+
+   ALLOC(x_lp4, len>>2, opus_val16);
+   ALLOC(y_lp4, lag>>2, opus_val16);
+   ALLOC(xcorr, max_pitch>>1, opus_val32);
+
+   /* Downsample by 2 again */
+   for (j=0;j<len>>2;j++)
+      x_lp4[j] = x_lp[2*j];
+   for (j=0;j<lag>>2;j++)
+      y_lp4[j] = y[2*j];
+
+#ifdef FIXED_POINT
+   xmax = celt_maxabs16(x_lp4, len>>2);
+   ymax = celt_maxabs16(y_lp4, lag>>2);
+   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
+   if (shift>0)
+   {
+      for (j=0;j<len>>2;j++)
+         x_lp4[j] = SHR16(x_lp4[j], shift);
+      for (j=0;j<lag>>2;j++)
+         y_lp4[j] = SHR16(y_lp4[j], shift);
+      /* Use double the shift for a MAC */
+      shift *= 2;
+   } else {
+      shift = 0;
+   }
+#endif
+
+   /* Coarse search with 4x decimation */
+
+#ifdef FIXED_POINT
+   maxcorr =
+#endif
+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
+
+   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
+#ifdef FIXED_POINT
+                   , 0, maxcorr
+#endif
+                   );
+
+   /* Finer search with 2x decimation */
+#ifdef FIXED_POINT
+   maxcorr=1;
+#endif
+   for (i=0;i<max_pitch>>1;i++)
+   {
+      opus_val32 sum=0;
+      xcorr[i] = 0;
+      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
+         continue;
+      for (j=0;j<len>>1;j++)
+         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
+      xcorr[i] = MAX32(-1, sum);
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+   find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
+#ifdef FIXED_POINT
+                   , shift+1, maxcorr
+#endif
+                   );
+
+   /* Refine by pseudo-interpolation */
+   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
+   {
+      opus_val32 a, b, c;
+      a = xcorr[best_pitch[0]-1];
+      b = xcorr[best_pitch[0]];
+      c = xcorr[best_pitch[0]+1];
+      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
+         offset = 1;
+      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
+         offset = -1;
+      else
+         offset = 0;
+   } else {
+      offset = 0;
+   }
+   *pitch = 2*best_pitch[0]-offset;
+
+   RESTORE_STACK;
+}
+
+static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
+      int N, int *T0_, int prev_period, opus_val16 prev_gain)
+{
+   int k, i, T, T0;
+   opus_val16 g, g0;
+   opus_val16 pg;
+   opus_val32 xy,xx,yy,xy2;
+   opus_val32 xcorr[3];
+   opus_val32 best_xy, best_yy;
+   int offset;
+   int minperiod0;
+   VARDECL(opus_val32, yy_lookup);
+   SAVE_STACK;
+
+   minperiod0 = minperiod;
+   maxperiod /= 2;
+   minperiod /= 2;
+   *T0_ /= 2;
+   prev_period /= 2;
+   N /= 2;
+   x += maxperiod;
+   if (*T0_>=maxperiod)
+      *T0_=maxperiod-1;
+
+   T = T0 = *T0_;
+   ALLOC(yy_lookup, maxperiod+1, opus_val32);
+   dual_inner_prod(x, x, x-T0, N, &xx, &xy);
+   yy_lookup[0] = xx;
+   yy=xx;
+   for (i=1;i<=maxperiod;i++)
+   {
+      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
+      yy_lookup[i] = MAX32(0, yy);
+   }
+   yy = yy_lookup[T0];
+   best_xy = xy;
+   best_yy = yy;
+#ifdef FIXED_POINT
+      {
+         opus_val32 x2y2;
+         int sh, t;
+         x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
+         sh = celt_ilog2(x2y2)>>1;
+         t = VSHR32(x2y2, 2*(sh-7));
+         g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
+      }
+#else
+      g = g0 = xy/celt_sqrt(1+xx*yy);
+#endif
+   /* Look for any pitch at T/k */
+   for (k=2;k<=15;k++)
+   {
+      int T1, T1b;
+      opus_val16 g1;
+      opus_val16 cont=0;
+      opus_val16 thresh;
+      T1 = (2*T0+k)/(2*k);
+      if (T1 < minperiod)
+         break;
+      /* Look for another strong correlation at T1b */
+      if (k==2)
+      {
+         if (T1+T0>maxperiod)
+            T1b = T0;
+         else
+            T1b = T0+T1;
+      } else
+      {
+         T1b = (2*second_check[k]*T0+k)/(2*k);
+      }
+      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
+      xy += xy2;
+      yy = yy_lookup[T1] + yy_lookup[T1b];
+#ifdef FIXED_POINT
+      {
+         opus_val32 x2y2;
+         int sh, t;
+         x2y2 = 1+MULT32_32_Q31(xx,yy);
+         sh = celt_ilog2(x2y2)>>1;
+         t = VSHR32(x2y2, 2*(sh-7));
+         g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
+      }
+#else
+      g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
+#endif
+      if (abs(T1-prev_period)<=1)
+         cont = prev_gain;
+      else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
+         cont = HALF32(prev_gain);
+      else
+         cont = 0;
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
+      if (g1 > thresh)
+      {
+         best_xy = xy;
+         best_yy = yy;
+         T = T1;
+         g = g1;
+      }
+   }
+   best_xy = MAX32(0, best_xy);
+   if (best_yy <= best_xy)
+      pg = Q15ONE;
+   else
+      pg = SHR32(frac_div32(best_xy,best_yy+1),16);
+
+   for (k=0;k<3;k++)
+   {
+      int T1 = T+k-1;
+      xy = 0;
+      for (i=0;i<N;i++)
+         xy = MAC16_16(xy, x[i], x[i-T1]);
+      xcorr[k] = xy;
+   }
+   if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
+      offset = 1;
+   else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
+      offset = -1;
+   else
+      offset = 0;
+   if (pg > g)
+      pg = g;
+   *T0_ = 2*T+offset;
+
+   if (*T0_<minperiod0)
+      *T0_=minperiod0;
+   RESTORE_STACK;
+   return pg;
+}
diff --git a/src/main/jni/opus/celt/pitch.h b/src/main/jni/opus/celt/pitch.h
new file mode 100644
index 000000000..df317ecc1
--- /dev/null
+++ b/src/main/jni/opus/celt/pitch.h
@@ -0,0 +1,173 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_H
+#define PITCH_H
+
+#include "modes.h"
+#include "cpu_support.h"
+
+#if defined(__SSE__) && !defined(FIXED_POINT)
+#include "x86/pitch_sse.h"
+#endif
+
+#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)
+# include "arm/pitch_arm.h"
+#endif
+
+void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
+      int len, int C, int arch);
+
+void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
+                  int len, int max_pitch, int *pitch, int arch);
+
+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
+      int N, int *T0, int prev_period, opus_val16 prev_gain);
+
+/* OPT: This is the kernel you really want to optimize. It gets used a lot
+   by the prefilter and by the PLC. */
+#ifndef OVERRIDE_XCORR_KERNEL
+static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   opus_val16 y_0, y_1, y_2, y_3;
+   celt_assert(len>=3);
+   y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+   y_0=*y++;
+   y_1=*y++;
+   y_2=*y++;
+   for (j=0;j<len-3;j+=4)
+   {
+      opus_val16 tmp;
+      tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+      tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+      tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+      tmp=*x++;
+      y_2=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_3);
+      sum[1] = MAC16_16(sum[1],tmp,y_0);
+      sum[2] = MAC16_16(sum[2],tmp,y_1);
+      sum[3] = MAC16_16(sum[3],tmp,y_2);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+   }
+   if (j<len)
+   {
+      opus_val16 tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+   }
+}
+#endif /* OVERRIDE_XCORR_KERNEL */
+
+#ifndef OVERRIDE_DUAL_INNER_PROD
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   opus_val32 xy01=0;
+   opus_val32 xy02=0;
+   for (i=0;i<N;i++)
+   {
+      xy01 = MAC16_16(xy01, x[i], y01[i]);
+      xy02 = MAC16_16(xy02, x[i], y02[i]);
+   }
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+#endif
+
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch);
+
+#if !defined(OVERRIDE_PITCH_XCORR)
+/*Is run-time CPU detection enabled on this platform?*/
+# if defined(OPUS_HAVE_RTCD)
+extern
+#  if defined(FIXED_POINT)
+opus_val32
+#  else
+void
+#  endif
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+# else
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
+# endif
+#endif
+
+#endif
diff --git a/src/main/jni/opus/celt/quant_bands.c b/src/main/jni/opus/celt/quant_bands.c
new file mode 100644
index 000000000..ac6952c26
--- /dev/null
+++ b/src/main/jni/opus/celt/quant_bands.c
@@ -0,0 +1,556 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "quant_bands.h"
+#include "laplace.h"
+#include <math.h>
+#include "os_support.h"
+#include "arch.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+#include "rate.h"
+
+#ifdef FIXED_POINT
+/* Mean energy in each band quantized in Q4 */
+const signed char eMeans[25] = {
+      103,100, 92, 85, 81,
+       77, 72, 70, 78, 75,
+       73, 71, 78, 74, 69,
+       72, 70, 74, 76, 71,
+       60, 60, 60, 60, 60
+};
+#else
+/* Mean energy in each band quantized in Q4 and converted back to float */
+const opus_val16 eMeans[25] = {
+      6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
+      4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
+      4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
+      4.500000f, 4.375000f, 4.625000f, 4.750000f, 4.437500f,
+      3.750000f, 3.750000f, 3.750000f, 3.750000f, 3.750000f
+};
+#endif
+/* prediction coefficients: 0.9, 0.8, 0.65, 0.5 */
+#ifdef FIXED_POINT
+static const opus_val16 pred_coef[4] = {29440, 26112, 21248, 16384};
+static const opus_val16 beta_coef[4] = {30147, 22282, 12124, 6554};
+static const opus_val16 beta_intra = 4915;
+#else
+static const opus_val16 pred_coef[4] = {29440/32768., 26112/32768., 21248/32768., 16384/32768.};
+static const opus_val16 beta_coef[4] = {30147/32768., 22282/32768., 12124/32768., 6554/32768.};
+static const opus_val16 beta_intra = 4915/32768.;
+#endif
+
+/*Parameters of the Laplace-like probability models used for the coarse energy.
+  There is one pair of parameters for each frame size, prediction type
+   (inter/intra), and band number.
+  The first number of each pair is the probability of 0, and the second is the
+   decay rate, both in Q8 precision.*/
+static const unsigned char e_prob_model[4][2][42] = {
+   /*120 sample frames.*/
+   {
+      /*Inter*/
+      {
+          72, 127,  65, 129,  66, 128,  65, 128,  64, 128,  62, 128,  64, 128,
+          64, 128,  92,  78,  92,  79,  92,  78,  90,  79, 116,  41, 115,  40,
+         114,  40, 132,  26, 132,  26, 145,  17, 161,  12, 176,  10, 177,  11
+      },
+      /*Intra*/
+      {
+          24, 179,  48, 138,  54, 135,  54, 132,  53, 134,  56, 133,  55, 132,
+          55, 132,  61, 114,  70,  96,  74,  88,  75,  88,  87,  74,  89,  66,
+          91,  67, 100,  59, 108,  50, 120,  40, 122,  37,  97,  43,  78,  50
+      }
+   },
+   /*240 sample frames.*/
+   {
+      /*Inter*/
+      {
+          83,  78,  84,  81,  88,  75,  86,  74,  87,  71,  90,  73,  93,  74,
+          93,  74, 109,  40, 114,  36, 117,  34, 117,  34, 143,  17, 145,  18,
+         146,  19, 162,  12, 165,  10, 178,   7, 189,   6, 190,   8, 177,   9
+      },
+      /*Intra*/
+      {
+          23, 178,  54, 115,  63, 102,  66,  98,  69,  99,  74,  89,  71,  91,
+          73,  91,  78,  89,  86,  80,  92,  66,  93,  64, 102,  59, 103,  60,
+         104,  60, 117,  52, 123,  44, 138,  35, 133,  31,  97,  38,  77,  45
+      }
+   },
+   /*480 sample frames.*/
+   {
+      /*Inter*/
+      {
+          61,  90,  93,  60, 105,  42, 107,  41, 110,  45, 116,  38, 113,  38,
+         112,  38, 124,  26, 132,  27, 136,  19, 140,  20, 155,  14, 159,  16,
+         158,  18, 170,  13, 177,  10, 187,   8, 192,   6, 175,   9, 159,  10
+      },
+      /*Intra*/
+      {
+          21, 178,  59, 110,  71,  86,  75,  85,  84,  83,  91,  66,  88,  73,
+          87,  72,  92,  75,  98,  72, 105,  58, 107,  54, 115,  52, 114,  55,
+         112,  56, 129,  51, 132,  40, 150,  33, 140,  29,  98,  35,  77,  42
+      }
+   },
+   /*960 sample frames.*/
+   {
+      /*Inter*/
+      {
+          42, 121,  96,  66, 108,  43, 111,  40, 117,  44, 123,  32, 120,  36,
+         119,  33, 127,  33, 134,  34, 139,  21, 147,  23, 152,  20, 158,  25,
+         154,  26, 166,  21, 173,  16, 184,  13, 184,  10, 150,  13, 139,  15
+      },
+      /*Intra*/
+      {
+          22, 178,  63, 114,  74,  82,  84,  83,  92,  82, 103,  62,  96,  72,
+          96,  67, 101,  73, 107,  72, 113,  55, 118,  52, 125,  52, 118,  52,
+         117,  55, 135,  49, 137,  39, 157,  32, 145,  29,  97,  33,  77,  40
+      }
+   }
+};
+
+static const unsigned char small_energy_icdf[3]={2,1,0};
+
+static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C)
+{
+   int c, i;
+   opus_val32 dist = 0;
+   c=0; do {
+      for (i=start;i<end;i++)
+      {
+         opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3));
+         dist = MAC16_16(dist, d,d);
+      }
+   } while (++c<C);
+   return MIN32(200,SHR32(dist,2*DB_SHIFT-6));
+}
+
+static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
+      const opus_val16 *eBands, opus_val16 *oldEBands,
+      opus_int32 budget, opus_int32 tell,
+      const unsigned char *prob_model, opus_val16 *error, ec_enc *enc,
+      int C, int LM, int intra, opus_val16 max_decay, int lfe)
+{
+   int i, c;
+   int badness = 0;
+   opus_val32 prev[2] = {0,0};
+   opus_val16 coef;
+   opus_val16 beta;
+
+   if (tell+3 <= budget)
+      ec_enc_bit_logp(enc, intra, 3);
+   if (intra)
+   {
+      coef = 0;
+      beta = beta_intra;
+   } else {
+      beta = beta_coef[LM];
+      coef = pred_coef[LM];
+   }
+
+   /* Encode at a fixed coarse resolution */
+   for (i=start;i<end;i++)
+   {
+      c=0;
+      do {
+         int bits_left;
+         int qi, qi0;
+         opus_val32 q;
+         opus_val16 x;
+         opus_val32 f, tmp;
+         opus_val16 oldE;
+         opus_val16 decay_bound;
+         x = eBands[i+c*m->nbEBands];
+         oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
+#ifdef FIXED_POINT
+         f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c];
+         /* Rounding to nearest integer here is really important! */
+         qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7);
+         decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT),
+               SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay)));
+#else
+         f = x-coef*oldE-prev[c];
+         /* Rounding to nearest integer here is really important! */
+         qi = (int)floor(.5f+f);
+         decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay;
+#endif
+         /* Prevent the energy from going down too quickly (e.g. for bands
+            that have just one bin) */
+         if (qi < 0 && x < decay_bound)
+         {
+            qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT);
+            if (qi > 0)
+               qi = 0;
+         }
+         qi0 = qi;
+         /* If we don't have enough bits to encode all the energy, just assume
+             something safe. */
+         tell = ec_tell(enc);
+         bits_left = budget-tell-3*C*(end-i);
+         if (i!=start && bits_left < 30)
+         {
+            if (bits_left < 24)
+               qi = IMIN(1, qi);
+            if (bits_left < 16)
+               qi = IMAX(-1, qi);
+         }
+         if (lfe && i>=2)
+            qi = IMIN(qi, 0);
+         if (budget-tell >= 15)
+         {
+            int pi;
+            pi = 2*IMIN(i,20);
+            ec_laplace_encode(enc, &qi,
+                  prob_model[pi]<<7, prob_model[pi+1]<<6);
+         }
+         else if(budget-tell >= 2)
+         {
+            qi = IMAX(-1, IMIN(qi, 1));
+            ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2);
+         }
+         else if(budget-tell >= 1)
+         {
+            qi = IMIN(0, qi);
+            ec_enc_bit_logp(enc, -qi, 1);
+         }
+         else
+            qi = -1;
+         error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT);
+         badness += abs(qi0-qi);
+         q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
+
+         tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7);
+#ifdef FIXED_POINT
+         tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
+#endif
+         oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
+         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
+      } while (++c < C);
+   }
+   return lfe ? 0 : badness;
+}
+
+void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
+      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
+      opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes,
+      int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe)
+{
+   int intra;
+   opus_val16 max_decay;
+   VARDECL(opus_val16, oldEBands_intra);
+   VARDECL(opus_val16, error_intra);
+   ec_enc enc_start_state;
+   opus_uint32 tell;
+   int badness1=0;
+   opus_int32 intra_bias;
+   opus_val32 new_distortion;
+   SAVE_STACK;
+
+   intra = force_intra || (!two_pass && *delayedIntra>2*C*(end-start) && nbAvailableBytes > (end-start)*C);
+   intra_bias = (opus_int32)((budget**delayedIntra*loss_rate)/(C*512));
+   new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m->nbEBands, C);
+
+   tell = ec_tell(enc);
+   if (tell+3 > budget)
+      two_pass = intra = 0;
+
+   max_decay = QCONST16(16.f,DB_SHIFT);
+   if (end-start>10)
+   {
+#ifdef FIXED_POINT
+      max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
+#else
+      max_decay = MIN32(max_decay, .125f*nbAvailableBytes);
+#endif
+   }
+   if (lfe)
+      max_decay=3;
+   enc_start_state = *enc;
+
+   ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
+   ALLOC(error_intra, C*m->nbEBands, opus_val16);
+   OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands);
+
+   if (two_pass || intra)
+   {
+      badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget,
+            tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe);
+   }
+
+   if (!intra)
+   {
+      unsigned char *intra_buf;
+      ec_enc enc_intra_state;
+      opus_int32 tell_intra;
+      opus_uint32 nstart_bytes;
+      opus_uint32 nintra_bytes;
+      opus_uint32 save_bytes;
+      int badness2;
+      VARDECL(unsigned char, intra_bits);
+
+      tell_intra = ec_tell_frac(enc);
+
+      enc_intra_state = *enc;
+
+      nstart_bytes = ec_range_bytes(&enc_start_state);
+      nintra_bytes = ec_range_bytes(&enc_intra_state);
+      intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes;
+      save_bytes = nintra_bytes-nstart_bytes;
+      if (save_bytes == 0)
+         save_bytes = ALLOC_NONE;
+      ALLOC(intra_bits, save_bytes, unsigned char);
+      /* Copy bits from intra bit-stream */
+      OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes);
+
+      *enc = enc_start_state;
+
+      badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget,
+            tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe);
+
+      if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra)))
+      {
+         *enc = enc_intra_state;
+         /* Copy intra bits to bit-stream */
+         OPUS_COPY(intra_buf, intra_bits, nintra_bytes - nstart_bytes);
+         OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands);
+         OPUS_COPY(error, error_intra, C*m->nbEBands);
+         intra = 1;
+      }
+   } else {
+      OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands);
+      OPUS_COPY(error, error_intra, C*m->nbEBands);
+   }
+
+   if (intra)
+      *delayedIntra = new_distortion;
+   else
+      *delayedIntra = ADD32(MULT16_32_Q15(MULT16_16_Q15(pred_coef[LM], pred_coef[LM]),*delayedIntra),
+            new_distortion);
+
+   RESTORE_STACK;
+}
+
+void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C)
+{
+   int i, c;
+
+   /* Encode finer resolution */
+   for (i=start;i<end;i++)
+   {
+      opus_int16 frac = 1<<fine_quant[i];
+      if (fine_quant[i] <= 0)
+         continue;
+      c=0;
+      do {
+         int q2;
+         opus_val16 offset;
+#ifdef FIXED_POINT
+         /* Has to be without rounding */
+         q2 = (error[i+c*m->nbEBands]+QCONST16(.5f,DB_SHIFT))>>(DB_SHIFT-fine_quant[i]);
+#else
+         q2 = (int)floor((error[i+c*m->nbEBands]+.5f)*frac);
+#endif
+         if (q2 > frac-1)
+            q2 = frac-1;
+         if (q2<0)
+            q2 = 0;
+         ec_enc_bits(enc, q2, fine_quant[i]);
+#ifdef FIXED_POINT
+         offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
+#else
+         offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
+#endif
+         oldEBands[i+c*m->nbEBands] += offset;
+         error[i+c*m->nbEBands] -= offset;
+         /*printf ("%f ", error[i] - offset);*/
+      } while (++c < C);
+   }
+}
+
+void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C)
+{
+   int i, prio, c;
+
+   /* Use up the remaining bits */
+   for (prio=0;prio<2;prio++)
+   {
+      for (i=start;i<end && bits_left>=C ;i++)
+      {
+         if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio)
+            continue;
+         c=0;
+         do {
+            int q2;
+            opus_val16 offset;
+            q2 = error[i+c*m->nbEBands]<0 ? 0 : 1;
+            ec_enc_bits(enc, q2, 1);
+#ifdef FIXED_POINT
+            offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
+#else
+            offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
+#endif
+            oldEBands[i+c*m->nbEBands] += offset;
+            bits_left--;
+         } while (++c < C);
+      }
+   }
+}
+
+void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM)
+{
+   const unsigned char *prob_model = e_prob_model[LM][intra];
+   int i, c;
+   opus_val32 prev[2] = {0, 0};
+   opus_val16 coef;
+   opus_val16 beta;
+   opus_int32 budget;
+   opus_int32 tell;
+
+   if (intra)
+   {
+      coef = 0;
+      beta = beta_intra;
+   } else {
+      beta = beta_coef[LM];
+      coef = pred_coef[LM];
+   }
+
+   budget = dec->storage*8;
+
+   /* Decode at a fixed coarse resolution */
+   for (i=start;i<end;i++)
+   {
+      c=0;
+      do {
+         int qi;
+         opus_val32 q;
+         opus_val32 tmp;
+         /* It would be better to express this invariant as a
+            test on C at function entry, but that isn't enough
+            to make the static analyzer happy. */
+         celt_assert(c<2);
+         tell = ec_tell(dec);
+         if(budget-tell>=15)
+         {
+            int pi;
+            pi = 2*IMIN(i,20);
+            qi = ec_laplace_decode(dec,
+                  prob_model[pi]<<7, prob_model[pi+1]<<6);
+         }
+         else if(budget-tell>=2)
+         {
+            qi = ec_dec_icdf(dec, small_energy_icdf, 2);
+            qi = (qi>>1)^-(qi&1);
+         }
+         else if(budget-tell>=1)
+         {
+            qi = -ec_dec_bit_logp(dec, 1);
+         }
+         else
+            qi = -1;
+         q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
+
+         oldEBands[i+c*m->nbEBands] = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
+         tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7);
+#ifdef FIXED_POINT
+         tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
+#endif
+         oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
+         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
+      } while (++c < C);
+   }
+}
+
+void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C)
+{
+   int i, c;
+   /* Decode finer resolution */
+   for (i=start;i<end;i++)
+   {
+      if (fine_quant[i] <= 0)
+         continue;
+      c=0;
+      do {
+         int q2;
+         opus_val16 offset;
+         q2 = ec_dec_bits(dec, fine_quant[i]);
+#ifdef FIXED_POINT
+         offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
+#else
+         offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
+#endif
+         oldEBands[i+c*m->nbEBands] += offset;
+      } while (++c < C);
+   }
+}
+
+void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant,  int *fine_priority, int bits_left, ec_dec *dec, int C)
+{
+   int i, prio, c;
+
+   /* Use up the remaining bits */
+   for (prio=0;prio<2;prio++)
+   {
+      for (i=start;i<end && bits_left>=C ;i++)
+      {
+         if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio)
+            continue;
+         c=0;
+         do {
+            int q2;
+            opus_val16 offset;
+            q2 = ec_dec_bits(dec, 1);
+#ifdef FIXED_POINT
+            offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
+#else
+            offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
+#endif
+            oldEBands[i+c*m->nbEBands] += offset;
+            bits_left--;
+         } while (++c < C);
+      }
+   }
+}
+
+void amp2Log2(const CELTMode *m, int effEnd, int end,
+      celt_ener *bandE, opus_val16 *bandLogE, int C)
+{
+   int c, i;
+   c=0;
+   do {
+      for (i=0;i<effEnd;i++)
+         bandLogE[i+c*m->nbEBands] =
+               celt_log2(SHL32(bandE[i+c*m->nbEBands],2))
+               - SHL16((opus_val16)eMeans[i],6);
+      for (i=effEnd;i<end;i++)
+         bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
+   } while (++c < C);
+}
diff --git a/src/main/jni/opus/celt/quant_bands.h b/src/main/jni/opus/celt/quant_bands.h
new file mode 100644
index 000000000..0490bca4b
--- /dev/null
+++ b/src/main/jni/opus/celt/quant_bands.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef QUANT_BANDS
+#define QUANT_BANDS
+
+#include "arch.h"
+#include "modes.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "mathops.h"
+
+#ifdef FIXED_POINT
+extern const signed char eMeans[25];
+#else
+extern const opus_val16 eMeans[25];
+#endif
+
+void amp2Log2(const CELTMode *m, int effEnd, int end,
+      celt_ener *bandE, opus_val16 *bandLogE, int C);
+
+void log2Amp(const CELTMode *m, int start, int end,
+      celt_ener *eBands, const opus_val16 *oldEBands, int C);
+
+void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
+      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
+      opus_val16 *error, ec_enc *enc, int C, int LM,
+      int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra,
+      int two_pass, int loss_rate, int lfe);
+
+void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C);
+
+void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C);
+
+void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM);
+
+void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C);
+
+void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C);
+
+#endif /* QUANT_BANDS */
diff --git a/src/main/jni/opus/celt/rate.c b/src/main/jni/opus/celt/rate.c
new file mode 100644
index 000000000..e13d839d6
--- /dev/null
+++ b/src/main/jni/opus/celt/rate.c
@@ -0,0 +1,638 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include "modes.h"
+#include "cwrs.h"
+#include "arch.h"
+#include "os_support.h"
+
+#include "entcode.h"
+#include "rate.h"
+
+static const unsigned char LOG2_FRAC_TABLE[24]={
+   0,
+   8,13,
+  16,19,21,23,
+  24,26,27,28,29,30,31,32,
+  32,33,34,34,35,36,36,37,37
+};
+
+#ifdef CUSTOM_MODES
+
+/*Determines if V(N,K) fits in a 32-bit unsigned integer.
+  N and K are themselves limited to 15 bits.*/
+static int fits_in32(int _n, int _k)
+{
+   static const opus_int16 maxN[15] = {
+      32767, 32767, 32767, 1476, 283, 109,  60,  40,
+       29,  24,  20,  18,  16,  14,  13};
+   static const opus_int16 maxK[15] = {
+      32767, 32767, 32767, 32767, 1172, 238,  95,  53,
+       36,  27,  22,  18,  16,  15,  13};
+   if (_n>=14)
+   {
+      if (_k>=14)
+         return 0;
+      else
+         return _n <= maxN[_k];
+   } else {
+      return _k <= maxK[_n];
+   }
+}
+
+void compute_pulse_cache(CELTMode *m, int LM)
+{
+   int C;
+   int i;
+   int j;
+   int curr=0;
+   int nbEntries=0;
+   int entryN[100], entryK[100], entryI[100];
+   const opus_int16 *eBands = m->eBands;
+   PulseCache *cache = &m->cache;
+   opus_int16 *cindex;
+   unsigned char *bits;
+   unsigned char *cap;
+
+   cindex = (opus_int16 *)opus_alloc(sizeof(cache->index[0])*m->nbEBands*(LM+2));
+   cache->index = cindex;
+
+   /* Scan for all unique band sizes */
+   for (i=0;i<=LM+1;i++)
+   {
+      for (j=0;j<m->nbEBands;j++)
+      {
+         int k;
+         int N = (eBands[j+1]-eBands[j])<<i>>1;
+         cindex[i*m->nbEBands+j] = -1;
+         /* Find other bands that have the same size */
+         for (k=0;k<=i;k++)
+         {
+            int n;
+            for (n=0;n<m->nbEBands && (k!=i || n<j);n++)
+            {
+               if (N == (eBands[n+1]-eBands[n])<<k>>1)
+               {
+                  cindex[i*m->nbEBands+j] = cindex[k*m->nbEBands+n];
+                  break;
+               }
+            }
+         }
+         if (cache->index[i*m->nbEBands+j] == -1 && N!=0)
+         {
+            int K;
+            entryN[nbEntries] = N;
+            K = 0;
+            while (fits_in32(N,get_pulses(K+1)) && K<MAX_PSEUDO)
+               K++;
+            entryK[nbEntries] = K;
+            cindex[i*m->nbEBands+j] = curr;
+            entryI[nbEntries] = curr;
+
+            curr += K+1;
+            nbEntries++;
+         }
+      }
+   }
+   bits = (unsigned char *)opus_alloc(sizeof(unsigned char)*curr);
+   cache->bits = bits;
+   cache->size = curr;
+   /* Compute the cache for all unique sizes */
+   for (i=0;i<nbEntries;i++)
+   {
+      unsigned char *ptr = bits+entryI[i];
+      opus_int16 tmp[MAX_PULSES+1];
+      get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES);
+      for (j=1;j<=entryK[i];j++)
+         ptr[j] = tmp[get_pulses(j)]-1;
+      ptr[0] = entryK[i];
+   }
+
+   /* Compute the maximum rate for each band at which we'll reliably use as
+       many bits as we ask for. */
+   cache->caps = cap = (unsigned char *)opus_alloc(sizeof(cache->caps[0])*(LM+1)*2*m->nbEBands);
+   for (i=0;i<=LM;i++)
+   {
+      for (C=1;C<=2;C++)
+      {
+         for (j=0;j<m->nbEBands;j++)
+         {
+            int N0;
+            int max_bits;
+            N0 = m->eBands[j+1]-m->eBands[j];
+            /* N=1 bands only have a sign bit and fine bits. */
+            if (N0<<i == 1)
+               max_bits = C*(1+MAX_FINE_BITS)<<BITRES;
+            else
+            {
+               const unsigned char *pcache;
+               opus_int32           num;
+               opus_int32           den;
+               int                  LM0;
+               int                  N;
+               int                  offset;
+               int                  ndof;
+               int                  qb;
+               int                  k;
+               LM0 = 0;
+               /* Even-sized bands bigger than N=2 can be split one more time.
+                  As of commit 44203907 all bands >1 are even, including custom modes.*/
+               if (N0 > 2)
+               {
+                  N0>>=1;
+                  LM0--;
+               }
+               /* N0=1 bands can't be split down to N<2. */
+               else if (N0 <= 1)
+               {
+                  LM0=IMIN(i,1);
+                  N0<<=LM0;
+               }
+               /* Compute the cost for the lowest-level PVQ of a fully split
+                   band. */
+               pcache = bits + cindex[(LM0+1)*m->nbEBands+j];
+               max_bits = pcache[pcache[0]]+1;
+               /* Add in the cost of coding regular splits. */
+               N = N0;
+               for(k=0;k<i-LM0;k++){
+                  max_bits <<= 1;
+                  /* Offset the number of qtheta bits by log2(N)/2
+                      + QTHETA_OFFSET compared to their "fair share" of
+                      total/N */
+                  offset = ((m->logN[j]+((LM0+k)<<BITRES))>>1)-QTHETA_OFFSET;
+                  /* The number of qtheta bits we'll allocate if the remainder
+                      is to be max_bits.
+                     The average measured cost for theta is 0.89701 times qb,
+                      approximated here as 459/512. */
+                  num=459*(opus_int32)((2*N-1)*offset+max_bits);
+                  den=((opus_int32)(2*N-1)<<9)-459;
+                  qb = IMIN((num+(den>>1))/den, 57);
+                  celt_assert(qb >= 0);
+                  max_bits += qb;
+                  N <<= 1;
+               }
+               /* Add in the cost of a stereo split, if necessary. */
+               if (C==2)
+               {
+                  max_bits <<= 1;
+                  offset = ((m->logN[j]+(i<<BITRES))>>1)-(N==2?QTHETA_OFFSET_TWOPHASE:QTHETA_OFFSET);
+                  ndof = 2*N-1-(N==2);
+                  /* The average measured cost for theta with the step PDF is
+                      0.95164 times qb, approximated here as 487/512. */
+                  num = (N==2?512:487)*(opus_int32)(max_bits+ndof*offset);
+                  den = ((opus_int32)ndof<<9)-(N==2?512:487);
+                  qb = IMIN((num+(den>>1))/den, (N==2?64:61));
+                  celt_assert(qb >= 0);
+                  max_bits += qb;
+               }
+               /* Add the fine bits we'll use. */
+               /* Compensate for the extra DoF in stereo */
+               ndof = C*N + ((C==2 && N>2) ? 1 : 0);
+               /* Offset the number of fine bits by log2(N)/2 + FINE_OFFSET
+                   compared to their "fair share" of total/N */
+               offset = ((m->logN[j] + (i<<BITRES))>>1)-FINE_OFFSET;
+               /* N=2 is the only point that doesn't match the curve */
+               if (N==2)
+                  offset += 1<<BITRES>>2;
+               /* The number of fine bits we'll allocate if the remainder is
+                   to be max_bits. */
+               num = max_bits+ndof*offset;
+               den = (ndof-1)<<BITRES;
+               qb = IMIN((num+(den>>1))/den, MAX_FINE_BITS);
+               celt_assert(qb >= 0);
+               max_bits += C*qb<<BITRES;
+            }
+            max_bits = (4*max_bits/(C*((m->eBands[j+1]-m->eBands[j])<<i)))-64;
+            celt_assert(max_bits >= 0);
+            celt_assert(max_bits < 256);
+            *cap++ = (unsigned char)max_bits;
+         }
+      }
+   }
+}
+
+#endif /* CUSTOM_MODES */
+
+#define ALLOC_STEPS 6
+
+static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
+      const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
+      int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
+      int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+{
+   opus_int32 psum;
+   int lo, hi;
+   int i, j;
+   int logM;
+   int stereo;
+   int codedBands=-1;
+   int alloc_floor;
+   opus_int32 left, percoeff;
+   int done;
+   opus_int32 balance;
+   SAVE_STACK;
+
+   alloc_floor = C<<BITRES;
+   stereo = C>1;
+
+   logM = LM<<BITRES;
+   lo = 0;
+   hi = 1<<ALLOC_STEPS;
+   for (i=0;i<ALLOC_STEPS;i++)
+   {
+      int mid = (lo+hi)>>1;
+      psum = 0;
+      done = 0;
+      for (j=end;j-->start;)
+      {
+         int tmp = bits1[j] + (mid*(opus_int32)bits2[j]>>ALLOC_STEPS);
+         if (tmp >= thresh[j] || done)
+         {
+            done = 1;
+            /* Don't allocate more than we can actually use */
+            psum += IMIN(tmp, cap[j]);
+         } else {
+            if (tmp >= alloc_floor)
+               psum += alloc_floor;
+         }
+      }
+      if (psum > total)
+         hi = mid;
+      else
+         lo = mid;
+   }
+   psum = 0;
+   /*printf ("interp bisection gave %d\n", lo);*/
+   done = 0;
+   for (j=end;j-->start;)
+   {
+      int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS);
+      if (tmp < thresh[j] && !done)
+      {
+         if (tmp >= alloc_floor)
+            tmp = alloc_floor;
+         else
+            tmp = 0;
+      } else
+         done = 1;
+      /* Don't allocate more than we can actually use */
+      tmp = IMIN(tmp, cap[j]);
+      bits[j] = tmp;
+      psum += tmp;
+   }
+
+   /* Decide which bands to skip, working backwards from the end. */
+   for (codedBands=end;;codedBands--)
+   {
+      int band_width;
+      int band_bits;
+      int rem;
+      j = codedBands-1;
+      /* Never skip the first band, nor a band that has been boosted by
+          dynalloc.
+         In the first case, we'd be coding a bit to signal we're going to waste
+          all the other bits.
+         In the second case, we'd be coding a bit to redistribute all the bits
+          we just signaled should be cocentrated in this band. */
+      if (j<=skip_start)
+      {
+         /* Give the bit we reserved to end skipping back. */
+         total += skip_rsv;
+         break;
+      }
+      /*Figure out how many left-over bits we would be adding to this band.
+        This can include bits we've stolen back from higher, skipped bands.*/
+      left = total-psum;
+      percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+      left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
+      rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0);
+      band_width = m->eBands[codedBands]-m->eBands[j];
+      band_bits = (int)(bits[j] + percoeff*band_width + rem);
+      /*Only code a skip decision if we're above the threshold for this band.
+        Otherwise it is force-skipped.
+        This ensures that we have enough bits to code the skip flag.*/
+      if (band_bits >= IMAX(thresh[j], alloc_floor+(1<<BITRES)))
+      {
+         if (encode)
+         {
+            /*This if() block is the only part of the allocation function that
+               is not a mandatory part of the bitstream: any bands we choose to
+               skip here must be explicitly signaled.*/
+            /*Choose a threshold with some hysteresis to keep bands from
+               fluctuating in and out.*/
+#ifdef FUZZING
+            if ((rand()&0x1) == 0)
+#else
+            if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
+#endif
+            {
+               ec_enc_bit_logp(ec, 1, 1);
+               break;
+            }
+            ec_enc_bit_logp(ec, 0, 1);
+         } else if (ec_dec_bit_logp(ec, 1)) {
+            break;
+         }
+         /*We used a bit to skip this band.*/
+         psum += 1<<BITRES;
+         band_bits -= 1<<BITRES;
+      }
+      /*Reclaim the bits originally allocated to this band.*/
+      psum -= bits[j]+intensity_rsv;
+      if (intensity_rsv > 0)
+         intensity_rsv = LOG2_FRAC_TABLE[j-start];
+      psum += intensity_rsv;
+      if (band_bits >= alloc_floor)
+      {
+         /*If we have enough for a fine energy bit per channel, use it.*/
+         psum += alloc_floor;
+         bits[j] = alloc_floor;
+      } else {
+         /*Otherwise this band gets nothing at all.*/
+         bits[j] = 0;
+      }
+   }
+
+   celt_assert(codedBands > start);
+   /* Code the intensity and dual stereo parameters. */
+   if (intensity_rsv > 0)
+   {
+      if (encode)
+      {
+         *intensity = IMIN(*intensity, codedBands);
+         ec_enc_uint(ec, *intensity-start, codedBands+1-start);
+      }
+      else
+         *intensity = start+ec_dec_uint(ec, codedBands+1-start);
+   }
+   else
+      *intensity = 0;
+   if (*intensity <= start)
+   {
+      total += dual_stereo_rsv;
+      dual_stereo_rsv = 0;
+   }
+   if (dual_stereo_rsv > 0)
+   {
+      if (encode)
+         ec_enc_bit_logp(ec, *dual_stereo, 1);
+      else
+         *dual_stereo = ec_dec_bit_logp(ec, 1);
+   }
+   else
+      *dual_stereo = 0;
+
+   /* Allocate the remaining bits */
+   left = total-psum;
+   percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+   left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
+   for (j=start;j<codedBands;j++)
+      bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j]));
+   for (j=start;j<codedBands;j++)
+   {
+      int tmp = (int)IMIN(left, m->eBands[j+1]-m->eBands[j]);
+      bits[j] += tmp;
+      left -= tmp;
+   }
+   /*for (j=0;j<end;j++)printf("%d ", bits[j]);printf("\n");*/
+
+   balance = 0;
+   for (j=start;j<codedBands;j++)
+   {
+      int N0, N, den;
+      int offset;
+      int NClogN;
+      opus_int32 excess, bit;
+
+      celt_assert(bits[j] >= 0);
+      N0 = m->eBands[j+1]-m->eBands[j];
+      N=N0<<LM;
+      bit = (opus_int32)bits[j]+balance;
+
+      if (N>1)
+      {
+         excess = MAX32(bit-cap[j],0);
+         bits[j] = bit-excess;
+
+         /* Compensate for the extra DoF in stereo */
+         den=(C*N+ ((C==2 && N>2 && !*dual_stereo && j<*intensity) ? 1 : 0));
+
+         NClogN = den*(m->logN[j] + logM);
+
+         /* Offset for the number of fine bits by log2(N)/2 + FINE_OFFSET
+            compared to their "fair share" of total/N */
+         offset = (NClogN>>1)-den*FINE_OFFSET;
+
+         /* N=2 is the only point that doesn't match the curve */
+         if (N==2)
+            offset += den<<BITRES>>2;
+
+         /* Changing the offset for allocating the second and third
+             fine energy bit */
+         if (bits[j] + offset < den*2<<BITRES)
+            offset += NClogN>>2;
+         else if (bits[j] + offset < den*3<<BITRES)
+            offset += NClogN>>3;
+
+         /* Divide with rounding */
+         ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES));
+
+         /* Make sure not to bust */
+         if (C*ebits[j] > (bits[j]>>BITRES))
+            ebits[j] = bits[j] >> stereo >> BITRES;
+
+         /* More than that is useless because that's about as far as PVQ can go */
+         ebits[j] = IMIN(ebits[j], MAX_FINE_BITS);
+
+         /* If we rounded down or capped this band, make it a candidate for the
+             final fine energy pass */
+         fine_priority[j] = ebits[j]*(den<<BITRES) >= bits[j]+offset;
+
+         /* Remove the allocated fine bits; the rest are assigned to PVQ */
+         bits[j] -= C*ebits[j]<<BITRES;
+
+      } else {
+         /* For N=1, all bits go to fine energy except for a single sign bit */
+         excess = MAX32(0,bit-(C<<BITRES));
+         bits[j] = bit-excess;
+         ebits[j] = 0;
+         fine_priority[j] = 1;
+      }
+
+      /* Fine energy can't take advantage of the re-balancing in
+          quant_all_bands().
+         Instead, do the re-balancing here.*/
+      if(excess > 0)
+      {
+         int extra_fine;
+         int extra_bits;
+         extra_fine = IMIN(excess>>(stereo+BITRES),MAX_FINE_BITS-ebits[j]);
+         ebits[j] += extra_fine;
+         extra_bits = extra_fine*C<<BITRES;
+         fine_priority[j] = extra_bits >= excess-balance;
+         excess -= extra_bits;
+      }
+      balance = excess;
+
+      celt_assert(bits[j] >= 0);
+      celt_assert(ebits[j] >= 0);
+   }
+   /* Save any remaining bits over the cap for the rebalancing in
+       quant_all_bands(). */
+   *_balance = balance;
+
+   /* The skipped bands use all their bits for fine energy. */
+   for (;j<end;j++)
+   {
+      ebits[j] = bits[j] >> stereo >> BITRES;
+      celt_assert(C*ebits[j]<<BITRES == bits[j]);
+      bits[j] = 0;
+      fine_priority[j] = ebits[j]<1;
+   }
+   RESTORE_STACK;
+   return codedBands;
+}
+
+int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+{
+   int lo, hi, len, j;
+   int codedBands;
+   int skip_start;
+   int skip_rsv;
+   int intensity_rsv;
+   int dual_stereo_rsv;
+   VARDECL(int, bits1);
+   VARDECL(int, bits2);
+   VARDECL(int, thresh);
+   VARDECL(int, trim_offset);
+   SAVE_STACK;
+
+   total = IMAX(total, 0);
+   len = m->nbEBands;
+   skip_start = start;
+   /* Reserve a bit to signal the end of manually skipped bands. */
+   skip_rsv = total >= 1<<BITRES ? 1<<BITRES : 0;
+   total -= skip_rsv;
+   /* Reserve bits for the intensity and dual stereo parameters. */
+   intensity_rsv = dual_stereo_rsv = 0;
+   if (C==2)
+   {
+      intensity_rsv = LOG2_FRAC_TABLE[end-start];
+      if (intensity_rsv>total)
+         intensity_rsv = 0;
+      else
+      {
+         total -= intensity_rsv;
+         dual_stereo_rsv = total>=1<<BITRES ? 1<<BITRES : 0;
+         total -= dual_stereo_rsv;
+      }
+   }
+   ALLOC(bits1, len, int);
+   ALLOC(bits2, len, int);
+   ALLOC(thresh, len, int);
+   ALLOC(trim_offset, len, int);
+
+   for (j=start;j<end;j++)
+   {
+      /* Below this threshold, we're sure not to allocate any PVQ bits */
+      thresh[j] = IMAX((C)<<BITRES, (3*(m->eBands[j+1]-m->eBands[j])<<LM<<BITRES)>>4);
+      /* Tilt of the allocation curve */
+      trim_offset[j] = C*(m->eBands[j+1]-m->eBands[j])*(alloc_trim-5-LM)*(end-j-1)
+            *(1<<(LM+BITRES))>>6;
+      /* Giving less resolution to single-coefficient bands because they get
+         more benefit from having one coarse value per coefficient*/
+      if ((m->eBands[j+1]-m->eBands[j])<<LM==1)
+         trim_offset[j] -= C<<BITRES;
+   }
+   lo = 1;
+   hi = m->nbAllocVectors - 1;
+   do
+   {
+      int done = 0;
+      int psum = 0;
+      int mid = (lo+hi) >> 1;
+      for (j=end;j-->start;)
+      {
+         int bitsj;
+         int N = m->eBands[j+1]-m->eBands[j];
+         bitsj = C*N*m->allocVectors[mid*len+j]<<LM>>2;
+         if (bitsj > 0)
+            bitsj = IMAX(0, bitsj + trim_offset[j]);
+         bitsj += offsets[j];
+         if (bitsj >= thresh[j] || done)
+         {
+            done = 1;
+            /* Don't allocate more than we can actually use */
+            psum += IMIN(bitsj, cap[j]);
+         } else {
+            if (bitsj >= C<<BITRES)
+               psum += C<<BITRES;
+         }
+      }
+      if (psum > total)
+         hi = mid - 1;
+      else
+         lo = mid + 1;
+      /*printf ("lo = %d, hi = %d\n", lo, hi);*/
+   }
+   while (lo <= hi);
+   hi = lo--;
+   /*printf ("interp between %d and %d\n", lo, hi);*/
+   for (j=start;j<end;j++)
+   {
+      int bits1j, bits2j;
+      int N = m->eBands[j+1]-m->eBands[j];
+      bits1j = C*N*m->allocVectors[lo*len+j]<<LM>>2;
+      bits2j = hi>=m->nbAllocVectors ?
+            cap[j] : C*N*m->allocVectors[hi*len+j]<<LM>>2;
+      if (bits1j > 0)
+         bits1j = IMAX(0, bits1j + trim_offset[j]);
+      if (bits2j > 0)
+         bits2j = IMAX(0, bits2j + trim_offset[j]);
+      if (lo > 0)
+         bits1j += offsets[j];
+      bits2j += offsets[j];
+      if (offsets[j]>0)
+         skip_start = j;
+      bits2j = IMAX(0,bits2j-bits1j);
+      bits1[j] = bits1j;
+      bits2[j] = bits2j;
+   }
+   codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap,
+         total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv,
+         pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth);
+   RESTORE_STACK;
+   return codedBands;
+}
+
diff --git a/src/main/jni/opus/celt/rate.h b/src/main/jni/opus/celt/rate.h
new file mode 100644
index 000000000..f1e066112
--- /dev/null
+++ b/src/main/jni/opus/celt/rate.h
@@ -0,0 +1,101 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef RATE_H
+#define RATE_H
+
+#define MAX_PSEUDO 40
+#define LOG_MAX_PSEUDO 6
+
+#define MAX_PULSES 128
+
+#define MAX_FINE_BITS 8
+
+#define FINE_OFFSET 21
+#define QTHETA_OFFSET 4
+#define QTHETA_OFFSET_TWOPHASE 16
+
+#include "cwrs.h"
+#include "modes.h"
+
+void compute_pulse_cache(CELTMode *m, int LM);
+
+static OPUS_INLINE int get_pulses(int i)
+{
+   return i<8 ? i : (8 + (i&7)) << ((i>>3)-1);
+}
+
+static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits)
+{
+   int i;
+   int lo, hi;
+   const unsigned char *cache;
+
+   LM++;
+   cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
+
+   lo = 0;
+   hi = cache[0];
+   bits--;
+   for (i=0;i<LOG_MAX_PSEUDO;i++)
+   {
+      int mid = (lo+hi+1)>>1;
+      /* OPT: Make sure this is implemented with a conditional move */
+      if ((int)cache[mid] >= bits)
+         hi = mid;
+      else
+         lo = mid;
+   }
+   if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits)
+      return lo;
+   else
+      return hi;
+}
+
+static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
+{
+   const unsigned char *cache;
+
+   LM++;
+   cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
+   return pulses == 0 ? 0 : cache[pulses]+1;
+}
+
+/** Compute the pulse allocation, i.e. how many pulses will go in each
+  * band.
+ @param m mode
+ @param offsets Requested increase or decrease in the number of bits for
+                each band
+ @param total Number of bands
+ @param pulses Number of pulses per band (returned)
+ @return Total number of bits allocated
+*/
+int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero,
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
+
+#endif
diff --git a/src/main/jni/opus/celt/stack_alloc.h b/src/main/jni/opus/celt/stack_alloc.h
new file mode 100644
index 000000000..316a6ce12
--- /dev/null
+++ b/src/main/jni/opus/celt/stack_alloc.h
@@ -0,0 +1,178 @@
+/* Copyright (C) 2002-2003 Jean-Marc Valin
+   Copyright (C) 2007-2009 Xiph.Org Foundation */
+/**
+   @file stack_alloc.h
+   @brief Temporary memory allocation on stack
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef STACK_ALLOC_H
+#define STACK_ALLOC_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK))
+#error "Opus requires one of VAR_ARRAYS, USE_ALLOCA, or NONTHREADSAFE_PSEUDOSTACK be defined to select the temporary allocation mode."
+#endif
+
+#ifdef USE_ALLOCA
+# ifdef WIN32
+#  include <malloc.h>
+# else
+#  ifdef HAVE_ALLOCA_H
+#   include <alloca.h>
+#  else
+#   include <stdlib.h>
+#  endif
+# endif
+#endif
+
+/**
+ * @def ALIGN(stack, size)
+ *
+ * Aligns the stack to a 'size' boundary
+ *
+ * @param stack Stack
+ * @param size  New size boundary
+ */
+
+/**
+ * @def PUSH(stack, size, type)
+ *
+ * Allocates 'size' elements of type 'type' on the stack
+ *
+ * @param stack Stack
+ * @param size  Number of elements
+ * @param type  Type of element
+ */
+
+/**
+ * @def VARDECL(var)
+ *
+ * Declare variable on stack
+ *
+ * @param var Variable to declare
+ */
+
+/**
+ * @def ALLOC(var, size, type)
+ *
+ * Allocate 'size' elements of 'type' on stack
+ *
+ * @param var  Name of variable to allocate
+ * @param size Number of elements
+ * @param type Type of element
+ */
+
+#if defined(VAR_ARRAYS)
+
+#define VARDECL(type, var)
+#define ALLOC(var, size, type) type var[size]
+#define SAVE_STACK
+#define RESTORE_STACK
+#define ALLOC_STACK
+/* C99 does not allow VLAs of size zero */
+#define ALLOC_NONE 1
+
+#elif defined(USE_ALLOCA)
+
+#define VARDECL(type, var) type *var
+
+# ifdef WIN32
+#  define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size)))
+# else
+#  define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+# endif
+
+#define SAVE_STACK
+#define RESTORE_STACK
+#define ALLOC_STACK
+#define ALLOC_NONE 0
+
+#else
+
+#ifdef CELT_C
+char *global_stack=0;
+#else
+extern char *global_stack;
+#endif /* CELT_C */
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+
+#ifdef CELT_C
+char *global_stack_top=0;
+#else
+extern char *global_stack_top;
+#endif /* CELT_C */
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+#define PUSH(stack, size, type) (VALGRIND_MAKE_MEM_NOACCESS(stack, global_stack_top-stack),ALIGN((stack),sizeof(type)/sizeof(char)),VALGRIND_MAKE_MEM_UNDEFINED(stack, ((size)*sizeof(type)/sizeof(char))),(stack)+=(2*(size)*sizeof(type)/sizeof(char)),(type*)((stack)-(2*(size)*sizeof(type)/sizeof(char))))
+#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack))
+#define ALLOC_STACK char *_saved_stack; ((global_stack = (global_stack==0) ? ((global_stack_top=opus_alloc_scratch(GLOBAL_STACK_SIZE*2)+(GLOBAL_STACK_SIZE*2))-(GLOBAL_STACK_SIZE*2)) : global_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)); _saved_stack = global_stack;
+
+#else
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
+#define RESTORE_STACK (global_stack = _saved_stack)
+#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack;
+
+#endif /* ENABLE_VALGRIND */
+
+#include "os_support.h"
+#define VARDECL(type, var) type *var
+#define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
+#define SAVE_STACK char *_saved_stack = global_stack;
+#define ALLOC_NONE 0
+
+#endif /* VAR_ARRAYS */
+
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0)
+#define OPUS_FPRINTF fprintf
+
+#else
+
+static OPUS_INLINE int _opus_false(void) {return 0;}
+#define OPUS_CHECK_ARRAY(ptr, len) _opus_false()
+#define OPUS_CHECK_VALUE(value) _opus_false()
+#define OPUS_PRINT_INT(value) do{}while(0)
+#define OPUS_FPRINTF (void)
+
+#endif
+
+
+#endif /* STACK_ALLOC_H */
diff --git a/src/main/jni/opus/celt/static_modes_fixed.h b/src/main/jni/opus/celt/static_modes_fixed.h
new file mode 100644
index 000000000..216df9e60
--- /dev/null
+++ b/src/main/jni/opus/celt/static_modes_fixed.h
@@ -0,0 +1,595 @@
+/* The contents of this file was automatically generated by dump_modes.c
+   with arguments: 48000 960
+   It contains static definitions for some pre-defined modes. */
+#include "modes.h"
+#include "rate.h"
+
+#ifndef DEF_WINDOW120
+#define DEF_WINDOW120
+static const opus_val16 window120[120] = {
+2, 20, 55, 108, 178,
+266, 372, 494, 635, 792,
+966, 1157, 1365, 1590, 1831,
+2089, 2362, 2651, 2956, 3276,
+3611, 3961, 4325, 4703, 5094,
+5499, 5916, 6346, 6788, 7241,
+7705, 8179, 8663, 9156, 9657,
+10167, 10684, 11207, 11736, 12271,
+12810, 13353, 13899, 14447, 14997,
+15547, 16098, 16648, 17197, 17744,
+18287, 18827, 19363, 19893, 20418,
+20936, 21447, 21950, 22445, 22931,
+23407, 23874, 24330, 24774, 25208,
+25629, 26039, 26435, 26819, 27190,
+27548, 27893, 28224, 28541, 28845,
+29135, 29411, 29674, 29924, 30160,
+30384, 30594, 30792, 30977, 31151,
+31313, 31463, 31602, 31731, 31849,
+31958, 32057, 32148, 32229, 32303,
+32370, 32429, 32481, 32528, 32568,
+32604, 32634, 32661, 32683, 32701,
+32717, 32729, 32740, 32748, 32754,
+32758, 32762, 32764, 32766, 32767,
+32767, 32767, 32767, 32767, 32767,
+};
+#endif
+
+#ifndef DEF_LOGN400
+#define DEF_LOGN400
+static const opus_int16 logN400[21] = {
+0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
+#endif
+
+#ifndef DEF_PULSE_CACHE50
+#define DEF_PULSE_CACHE50
+static const opus_int16 cache_index50[105] = {
+-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
+82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
+41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
+41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
+318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
+305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
+240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
+};
+static const unsigned char cache_bits50[392] = {
+40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
+31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
+51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
+66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
+64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
+94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
+124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
+97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
+142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
+28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
+153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
+229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
+166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
+86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
+25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
+185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
+110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
+74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
+163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
+228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
+90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
+87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
+106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
+224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
+182, 234, };
+static const unsigned char cache_caps50[168] = {
+224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
+178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
+240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
+160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
+138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
+204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
+185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
+207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
+188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
+193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
+204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
+140, 66, 40, };
+#endif
+
+#ifndef FFT_TWIDDLES48000_960
+#define FFT_TWIDDLES48000_960
+static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
+{32767, 0}, {32766, -429},
+{32757, -858}, {32743, -1287},
+{32724, -1715}, {32698, -2143},
+{32667, -2570}, {32631, -2998},
+{32588, -3425}, {32541, -3851},
+{32488, -4277}, {32429, -4701},
+{32364, -5125}, {32295, -5548},
+{32219, -5971}, {32138, -6393},
+{32051, -6813}, {31960, -7231},
+{31863, -7650}, {31760, -8067},
+{31652, -8481}, {31539, -8895},
+{31419, -9306}, {31294, -9716},
+{31165, -10126}, {31030, -10532},
+{30889, -10937}, {30743, -11340},
+{30592, -11741}, {30436, -12141},
+{30274, -12540}, {30107, -12935},
+{29936, -13328}, {29758, -13718},
+{29577, -14107}, {29390, -14493},
+{29197, -14875}, {29000, -15257},
+{28797, -15635}, {28590, -16010},
+{28379, -16384}, {28162, -16753},
+{27940, -17119}, {27714, -17484},
+{27482, -17845}, {27246, -18205},
+{27006, -18560}, {26760, -18911},
+{26510, -19260}, {26257, -19606},
+{25997, -19947}, {25734, -20286},
+{25466, -20621}, {25194, -20952},
+{24918, -21281}, {24637, -21605},
+{24353, -21926}, {24063, -22242},
+{23770, -22555}, {23473, -22865},
+{23171, -23171}, {22866, -23472},
+{22557, -23769}, {22244, -24063},
+{21927, -24352}, {21606, -24636},
+{21282, -24917}, {20954, -25194},
+{20622, -25465}, {20288, -25733},
+{19949, -25997}, {19607, -26255},
+{19261, -26509}, {18914, -26760},
+{18561, -27004}, {18205, -27246},
+{17846, -27481}, {17485, -27713},
+{17122, -27940}, {16755, -28162},
+{16385, -28378}, {16012, -28590},
+{15636, -28797}, {15258, -28999},
+{14878, -29197}, {14494, -29389},
+{14108, -29576}, {13720, -29757},
+{13329, -29934}, {12937, -30107},
+{12540, -30274}, {12142, -30435},
+{11744, -30592}, {11342, -30743},
+{10939, -30889}, {10534, -31030},
+{10127, -31164}, {9718, -31294},
+{9307, -31418}, {8895, -31537},
+{8482, -31652}, {8067, -31759},
+{7650, -31862}, {7233, -31960},
+{6815, -32051}, {6393, -32138},
+{5973, -32219}, {5549, -32294},
+{5127, -32364}, {4703, -32429},
+{4278, -32487}, {3852, -32541},
+{3426, -32588}, {2999, -32630},
+{2572, -32667}, {2144, -32698},
+{1716, -32724}, {1287, -32742},
+{860, -32757}, {430, -32766},
+{0, -32767}, {-429, -32766},
+{-858, -32757}, {-1287, -32743},
+{-1715, -32724}, {-2143, -32698},
+{-2570, -32667}, {-2998, -32631},
+{-3425, -32588}, {-3851, -32541},
+{-4277, -32488}, {-4701, -32429},
+{-5125, -32364}, {-5548, -32295},
+{-5971, -32219}, {-6393, -32138},
+{-6813, -32051}, {-7231, -31960},
+{-7650, -31863}, {-8067, -31760},
+{-8481, -31652}, {-8895, -31539},
+{-9306, -31419}, {-9716, -31294},
+{-10126, -31165}, {-10532, -31030},
+{-10937, -30889}, {-11340, -30743},
+{-11741, -30592}, {-12141, -30436},
+{-12540, -30274}, {-12935, -30107},
+{-13328, -29936}, {-13718, -29758},
+{-14107, -29577}, {-14493, -29390},
+{-14875, -29197}, {-15257, -29000},
+{-15635, -28797}, {-16010, -28590},
+{-16384, -28379}, {-16753, -28162},
+{-17119, -27940}, {-17484, -27714},
+{-17845, -27482}, {-18205, -27246},
+{-18560, -27006}, {-18911, -26760},
+{-19260, -26510}, {-19606, -26257},
+{-19947, -25997}, {-20286, -25734},
+{-20621, -25466}, {-20952, -25194},
+{-21281, -24918}, {-21605, -24637},
+{-21926, -24353}, {-22242, -24063},
+{-22555, -23770}, {-22865, -23473},
+{-23171, -23171}, {-23472, -22866},
+{-23769, -22557}, {-24063, -22244},
+{-24352, -21927}, {-24636, -21606},
+{-24917, -21282}, {-25194, -20954},
+{-25465, -20622}, {-25733, -20288},
+{-25997, -19949}, {-26255, -19607},
+{-26509, -19261}, {-26760, -18914},
+{-27004, -18561}, {-27246, -18205},
+{-27481, -17846}, {-27713, -17485},
+{-27940, -17122}, {-28162, -16755},
+{-28378, -16385}, {-28590, -16012},
+{-28797, -15636}, {-28999, -15258},
+{-29197, -14878}, {-29389, -14494},
+{-29576, -14108}, {-29757, -13720},
+{-29934, -13329}, {-30107, -12937},
+{-30274, -12540}, {-30435, -12142},
+{-30592, -11744}, {-30743, -11342},
+{-30889, -10939}, {-31030, -10534},
+{-31164, -10127}, {-31294, -9718},
+{-31418, -9307}, {-31537, -8895},
+{-31652, -8482}, {-31759, -8067},
+{-31862, -7650}, {-31960, -7233},
+{-32051, -6815}, {-32138, -6393},
+{-32219, -5973}, {-32294, -5549},
+{-32364, -5127}, {-32429, -4703},
+{-32487, -4278}, {-32541, -3852},
+{-32588, -3426}, {-32630, -2999},
+{-32667, -2572}, {-32698, -2144},
+{-32724, -1716}, {-32742, -1287},
+{-32757, -860}, {-32766, -430},
+{-32767, 0}, {-32766, 429},
+{-32757, 858}, {-32743, 1287},
+{-32724, 1715}, {-32698, 2143},
+{-32667, 2570}, {-32631, 2998},
+{-32588, 3425}, {-32541, 3851},
+{-32488, 4277}, {-32429, 4701},
+{-32364, 5125}, {-32295, 5548},
+{-32219, 5971}, {-32138, 6393},
+{-32051, 6813}, {-31960, 7231},
+{-31863, 7650}, {-31760, 8067},
+{-31652, 8481}, {-31539, 8895},
+{-31419, 9306}, {-31294, 9716},
+{-31165, 10126}, {-31030, 10532},
+{-30889, 10937}, {-30743, 11340},
+{-30592, 11741}, {-30436, 12141},
+{-30274, 12540}, {-30107, 12935},
+{-29936, 13328}, {-29758, 13718},
+{-29577, 14107}, {-29390, 14493},
+{-29197, 14875}, {-29000, 15257},
+{-28797, 15635}, {-28590, 16010},
+{-28379, 16384}, {-28162, 16753},
+{-27940, 17119}, {-27714, 17484},
+{-27482, 17845}, {-27246, 18205},
+{-27006, 18560}, {-26760, 18911},
+{-26510, 19260}, {-26257, 19606},
+{-25997, 19947}, {-25734, 20286},
+{-25466, 20621}, {-25194, 20952},
+{-24918, 21281}, {-24637, 21605},
+{-24353, 21926}, {-24063, 22242},
+{-23770, 22555}, {-23473, 22865},
+{-23171, 23171}, {-22866, 23472},
+{-22557, 23769}, {-22244, 24063},
+{-21927, 24352}, {-21606, 24636},
+{-21282, 24917}, {-20954, 25194},
+{-20622, 25465}, {-20288, 25733},
+{-19949, 25997}, {-19607, 26255},
+{-19261, 26509}, {-18914, 26760},
+{-18561, 27004}, {-18205, 27246},
+{-17846, 27481}, {-17485, 27713},
+{-17122, 27940}, {-16755, 28162},
+{-16385, 28378}, {-16012, 28590},
+{-15636, 28797}, {-15258, 28999},
+{-14878, 29197}, {-14494, 29389},
+{-14108, 29576}, {-13720, 29757},
+{-13329, 29934}, {-12937, 30107},
+{-12540, 30274}, {-12142, 30435},
+{-11744, 30592}, {-11342, 30743},
+{-10939, 30889}, {-10534, 31030},
+{-10127, 31164}, {-9718, 31294},
+{-9307, 31418}, {-8895, 31537},
+{-8482, 31652}, {-8067, 31759},
+{-7650, 31862}, {-7233, 31960},
+{-6815, 32051}, {-6393, 32138},
+{-5973, 32219}, {-5549, 32294},
+{-5127, 32364}, {-4703, 32429},
+{-4278, 32487}, {-3852, 32541},
+{-3426, 32588}, {-2999, 32630},
+{-2572, 32667}, {-2144, 32698},
+{-1716, 32724}, {-1287, 32742},
+{-860, 32757}, {-430, 32766},
+{0, 32767}, {429, 32766},
+{858, 32757}, {1287, 32743},
+{1715, 32724}, {2143, 32698},
+{2570, 32667}, {2998, 32631},
+{3425, 32588}, {3851, 32541},
+{4277, 32488}, {4701, 32429},
+{5125, 32364}, {5548, 32295},
+{5971, 32219}, {6393, 32138},
+{6813, 32051}, {7231, 31960},
+{7650, 31863}, {8067, 31760},
+{8481, 31652}, {8895, 31539},
+{9306, 31419}, {9716, 31294},
+{10126, 31165}, {10532, 31030},
+{10937, 30889}, {11340, 30743},
+{11741, 30592}, {12141, 30436},
+{12540, 30274}, {12935, 30107},
+{13328, 29936}, {13718, 29758},
+{14107, 29577}, {14493, 29390},
+{14875, 29197}, {15257, 29000},
+{15635, 28797}, {16010, 28590},
+{16384, 28379}, {16753, 28162},
+{17119, 27940}, {17484, 27714},
+{17845, 27482}, {18205, 27246},
+{18560, 27006}, {18911, 26760},
+{19260, 26510}, {19606, 26257},
+{19947, 25997}, {20286, 25734},
+{20621, 25466}, {20952, 25194},
+{21281, 24918}, {21605, 24637},
+{21926, 24353}, {22242, 24063},
+{22555, 23770}, {22865, 23473},
+{23171, 23171}, {23472, 22866},
+{23769, 22557}, {24063, 22244},
+{24352, 21927}, {24636, 21606},
+{24917, 21282}, {25194, 20954},
+{25465, 20622}, {25733, 20288},
+{25997, 19949}, {26255, 19607},
+{26509, 19261}, {26760, 18914},
+{27004, 18561}, {27246, 18205},
+{27481, 17846}, {27713, 17485},
+{27940, 17122}, {28162, 16755},
+{28378, 16385}, {28590, 16012},
+{28797, 15636}, {28999, 15258},
+{29197, 14878}, {29389, 14494},
+{29576, 14108}, {29757, 13720},
+{29934, 13329}, {30107, 12937},
+{30274, 12540}, {30435, 12142},
+{30592, 11744}, {30743, 11342},
+{30889, 10939}, {31030, 10534},
+{31164, 10127}, {31294, 9718},
+{31418, 9307}, {31537, 8895},
+{31652, 8482}, {31759, 8067},
+{31862, 7650}, {31960, 7233},
+{32051, 6815}, {32138, 6393},
+{32219, 5973}, {32294, 5549},
+{32364, 5127}, {32429, 4703},
+{32487, 4278}, {32541, 3852},
+{32588, 3426}, {32630, 2999},
+{32667, 2572}, {32698, 2144},
+{32724, 1716}, {32742, 1287},
+{32757, 860}, {32766, 430},
+};
+#ifndef FFT_BITREV480
+#define FFT_BITREV480
+static const opus_int16 fft_bitrev480[480] = {
+0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
+450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
+345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
+215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
+110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
+430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
+325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
+181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
+76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
+396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
+291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
+161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
+56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
+362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
+257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
+127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
+22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
+472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
+342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
+237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
+93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
+438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
+308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
+203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
+73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
+418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
+274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
+169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
+39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
+384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
+254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
+149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+};
+#endif
+
+#ifndef FFT_BITREV240
+#define FFT_BITREV240
+static const opus_int16 fft_bitrev240[240] = {
+0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
+225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
+170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
+115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
+46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
+216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
+161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
+92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
+37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
+207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
+138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
+83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
+28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
+184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
+129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
+74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+};
+#endif
+
+#ifndef FFT_BITREV120
+#define FFT_BITREV120
+static const opus_int16 fft_bitrev120[120] = {
+0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
+110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
+76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
+56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
+22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
+93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
+73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
+39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+};
+#endif
+
+#ifndef FFT_BITREV60
+#define FFT_BITREV60
+static const opus_int16 fft_bitrev60[60] = {
+0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
+46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
+37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
+28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+};
+#endif
+
+#ifndef FFT_STATE48000_960_0
+#define FFT_STATE48000_960_0
+static const kiss_fft_state fft_state48000_960_0 = {
+480,    /* nfft */
+-1,     /* shift */
+{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev480,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_1
+#define FFT_STATE48000_960_1
+static const kiss_fft_state fft_state48000_960_1 = {
+240,    /* nfft */
+1,      /* shift */
+{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev240,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_2
+#define FFT_STATE48000_960_2
+static const kiss_fft_state fft_state48000_960_2 = {
+120,    /* nfft */
+2,      /* shift */
+{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev120,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_3
+#define FFT_STATE48000_960_3
+static const kiss_fft_state fft_state48000_960_3 = {
+60,     /* nfft */
+3,      /* shift */
+{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev60,   /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#endif
+
+#ifndef MDCT_TWIDDLES960
+#define MDCT_TWIDDLES960
+static const opus_val16 mdct_twiddles960[481] = {
+32767, 32767, 32767, 32767, 32766,
+32763, 32762, 32759, 32757, 32753,
+32751, 32747, 32743, 32738, 32733,
+32729, 32724, 32717, 32711, 32705,
+32698, 32690, 32683, 32676, 32667,
+32658, 32650, 32640, 32631, 32620,
+32610, 32599, 32588, 32577, 32566,
+32554, 32541, 32528, 32515, 32502,
+32487, 32474, 32459, 32444, 32429,
+32413, 32397, 32381, 32364, 32348,
+32331, 32313, 32294, 32277, 32257,
+32239, 32219, 32200, 32180, 32159,
+32138, 32118, 32096, 32074, 32051,
+32029, 32006, 31984, 31960, 31936,
+31912, 31888, 31863, 31837, 31812,
+31786, 31760, 31734, 31707, 31679,
+31652, 31624, 31596, 31567, 31539,
+31508, 31479, 31450, 31419, 31388,
+31357, 31326, 31294, 31262, 31230,
+31198, 31164, 31131, 31097, 31063,
+31030, 30994, 30959, 30924, 30889,
+30853, 30816, 30779, 30743, 30705,
+30668, 30629, 30592, 30553, 30515,
+30475, 30435, 30396, 30356, 30315,
+30274, 30233, 30191, 30149, 30107,
+30065, 30022, 29979, 29936, 29891,
+29847, 29803, 29758, 29713, 29668,
+29622, 29577, 29529, 29483, 29436,
+29390, 29341, 29293, 29246, 29197,
+29148, 29098, 29050, 29000, 28949,
+28899, 28848, 28797, 28746, 28694,
+28642, 28590, 28537, 28485, 28432,
+28378, 28324, 28271, 28217, 28162,
+28106, 28051, 27995, 27940, 27884,
+27827, 27770, 27713, 27657, 27598,
+27540, 27481, 27423, 27365, 27305,
+27246, 27187, 27126, 27066, 27006,
+26945, 26883, 26822, 26760, 26698,
+26636, 26574, 26510, 26448, 26383,
+26320, 26257, 26191, 26127, 26062,
+25997, 25931, 25866, 25800, 25734,
+25667, 25601, 25533, 25466, 25398,
+25330, 25262, 25194, 25125, 25056,
+24987, 24917, 24848, 24778, 24707,
+24636, 24566, 24495, 24424, 24352,
+24280, 24208, 24135, 24063, 23990,
+23917, 23842, 23769, 23695, 23622,
+23546, 23472, 23398, 23322, 23246,
+23171, 23095, 23018, 22942, 22866,
+22788, 22711, 22634, 22557, 22478,
+22400, 22322, 22244, 22165, 22085,
+22006, 21927, 21846, 21766, 21687,
+21606, 21524, 21443, 21363, 21282,
+21199, 21118, 21035, 20954, 20870,
+20788, 20705, 20621, 20538, 20455,
+20371, 20286, 20202, 20118, 20034,
+19947, 19863, 19777, 19692, 19606,
+19520, 19434, 19347, 19260, 19174,
+19088, 18999, 18911, 18825, 18737,
+18648, 18560, 18472, 18384, 18294,
+18205, 18116, 18025, 17936, 17846,
+17757, 17666, 17576, 17485, 17395,
+17303, 17212, 17122, 17030, 16937,
+16846, 16755, 16662, 16569, 16477,
+16385, 16291, 16198, 16105, 16012,
+15917, 15824, 15730, 15636, 15541,
+15447, 15352, 15257, 15162, 15067,
+14973, 14875, 14781, 14685, 14589,
+14493, 14396, 14300, 14204, 14107,
+14010, 13914, 13815, 13718, 13621,
+13524, 13425, 13328, 13230, 13133,
+13033, 12935, 12836, 12738, 12638,
+12540, 12441, 12341, 12241, 12142,
+12044, 11943, 11843, 11744, 11643,
+11542, 11442, 11342, 11241, 11139,
+11039, 10939, 10836, 10736, 10635,
+10534, 10431, 10330, 10228, 10127,
+10024, 9921, 9820, 9718, 9614,
+9512, 9410, 9306, 9204, 9101,
+8998, 8895, 8791, 8689, 8585,
+8481, 8377, 8274, 8171, 8067,
+7962, 7858, 7753, 7650, 7545,
+7441, 7336, 7231, 7129, 7023,
+6917, 6813, 6709, 6604, 6498,
+6393, 6288, 6182, 6077, 5973,
+5867, 5760, 5656, 5549, 5445,
+5339, 5232, 5127, 5022, 4914,
+4809, 4703, 4596, 4490, 4384,
+4278, 4171, 4065, 3958, 3852,
+3745, 3640, 3532, 3426, 3318,
+3212, 3106, 2998, 2891, 2786,
+2679, 2570, 2465, 2358, 2251,
+2143, 2037, 1929, 1823, 1715,
+1609, 1501, 1393, 1287, 1180,
+1073, 964, 858, 751, 644,
+535, 429, 322, 214, 107,
+0, };
+#endif
+
+static const CELTMode mode48000_960_120 = {
+48000,  /* Fs */
+120,    /* overlap */
+21,     /* nbEBands */
+21,     /* effEBands */
+{27853, 0, 4096, 8192, },       /* preemph */
+eband5ms,       /* eBands */
+3,      /* maxLM */
+8,      /* nbShortMdcts */
+120,    /* shortMdctSize */
+11,     /* nbAllocVectors */
+band_allocation,        /* allocVectors */
+logN400,        /* logN */
+window120,      /* window */
+{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960},    /* mdct */
+{392, cache_index50, cache_bits50, cache_caps50},       /* cache */
+};
+
+/* List of all the available modes */
+#define TOTAL_MODES 1
+static const CELTMode * const static_mode_list[TOTAL_MODES] = {
+&mode48000_960_120,
+};
diff --git a/src/main/jni/opus/celt/static_modes_float.h b/src/main/jni/opus/celt/static_modes_float.h
new file mode 100644
index 000000000..5d7e7b8e6
--- /dev/null
+++ b/src/main/jni/opus/celt/static_modes_float.h
@@ -0,0 +1,599 @@
+/* The contents of this file was automatically generated by dump_modes.c
+   with arguments: 48000 960
+   It contains static definitions for some pre-defined modes. */
+#include "modes.h"
+#include "rate.h"
+
+#ifndef DEF_WINDOW120
+#define DEF_WINDOW120
+static const opus_val16 window120[120] = {
+6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f,
+0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
+0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
+0.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f,
+0.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f,
+0.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f,
+0.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f,
+0.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f,
+0.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f,
+0.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f,
+0.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f,
+0.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f,
+0.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f,
+0.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f,
+0.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f,
+0.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f,
+0.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f,
+0.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f,
+0.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f,
+0.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f,
+0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
+0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
+0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
+0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
+};
+#endif
+
+#ifndef DEF_LOGN400
+#define DEF_LOGN400
+static const opus_int16 logN400[21] = {
+0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
+#endif
+
+#ifndef DEF_PULSE_CACHE50
+#define DEF_PULSE_CACHE50
+static const opus_int16 cache_index50[105] = {
+-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
+82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
+41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
+41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
+318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
+305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
+240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
+};
+static const unsigned char cache_bits50[392] = {
+40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
+31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
+51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
+66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
+64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
+94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
+124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
+97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
+142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
+28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
+153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
+229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
+166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
+86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
+25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
+185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
+110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
+74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
+163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
+228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
+90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
+87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
+106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
+224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
+182, 234, };
+static const unsigned char cache_caps50[168] = {
+224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
+178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
+240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
+160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
+138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
+204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
+185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
+207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
+188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
+193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
+204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
+140, 66, 40, };
+#endif
+
+#ifndef FFT_TWIDDLES48000_960
+#define FFT_TWIDDLES48000_960
+static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
+{1.0000000f, -0.0000000f}, {0.99991433f, -0.013089596f},
+{0.99965732f, -0.026176948f}, {0.99922904f, -0.039259816f},
+{0.99862953f, -0.052335956f}, {0.99785892f, -0.065403129f},
+{0.99691733f, -0.078459096f}, {0.99580493f, -0.091501619f},
+{0.99452190f, -0.10452846f}, {0.99306846f, -0.11753740f},
+{0.99144486f, -0.13052619f}, {0.98965139f, -0.14349262f},
+{0.98768834f, -0.15643447f}, {0.98555606f, -0.16934950f},
+{0.98325491f, -0.18223553f}, {0.98078528f, -0.19509032f},
+{0.97814760f, -0.20791169f}, {0.97534232f, -0.22069744f},
+{0.97236992f, -0.23344536f}, {0.96923091f, -0.24615329f},
+{0.96592583f, -0.25881905f}, {0.96245524f, -0.27144045f},
+{0.95881973f, -0.28401534f}, {0.95501994f, -0.29654157f},
+{0.95105652f, -0.30901699f}, {0.94693013f, -0.32143947f},
+{0.94264149f, -0.33380686f}, {0.93819134f, -0.34611706f},
+{0.93358043f, -0.35836795f}, {0.92880955f, -0.37055744f},
+{0.92387953f, -0.38268343f}, {0.91879121f, -0.39474386f},
+{0.91354546f, -0.40673664f}, {0.90814317f, -0.41865974f},
+{0.90258528f, -0.43051110f}, {0.89687274f, -0.44228869f},
+{0.89100652f, -0.45399050f}, {0.88498764f, -0.46561452f},
+{0.87881711f, -0.47715876f}, {0.87249601f, -0.48862124f},
+{0.86602540f, -0.50000000f}, {0.85940641f, -0.51129309f},
+{0.85264016f, -0.52249856f}, {0.84572782f, -0.53361452f},
+{0.83867057f, -0.54463904f}, {0.83146961f, -0.55557023f},
+{0.82412619f, -0.56640624f}, {0.81664156f, -0.57714519f},
+{0.80901699f, -0.58778525f}, {0.80125381f, -0.59832460f},
+{0.79335334f, -0.60876143f}, {0.78531693f, -0.61909395f},
+{0.77714596f, -0.62932039f}, {0.76884183f, -0.63943900f},
+{0.76040597f, -0.64944805f}, {0.75183981f, -0.65934582f},
+{0.74314483f, -0.66913061f}, {0.73432251f, -0.67880075f},
+{0.72537437f, -0.68835458f}, {0.71630194f, -0.69779046f},
+{0.70710678f, -0.70710678f}, {0.69779046f, -0.71630194f},
+{0.68835458f, -0.72537437f}, {0.67880075f, -0.73432251f},
+{0.66913061f, -0.74314483f}, {0.65934582f, -0.75183981f},
+{0.64944805f, -0.76040597f}, {0.63943900f, -0.76884183f},
+{0.62932039f, -0.77714596f}, {0.61909395f, -0.78531693f},
+{0.60876143f, -0.79335334f}, {0.59832460f, -0.80125381f},
+{0.58778525f, -0.80901699f}, {0.57714519f, -0.81664156f},
+{0.56640624f, -0.82412619f}, {0.55557023f, -0.83146961f},
+{0.54463904f, -0.83867057f}, {0.53361452f, -0.84572782f},
+{0.52249856f, -0.85264016f}, {0.51129309f, -0.85940641f},
+{0.50000000f, -0.86602540f}, {0.48862124f, -0.87249601f},
+{0.47715876f, -0.87881711f}, {0.46561452f, -0.88498764f},
+{0.45399050f, -0.89100652f}, {0.44228869f, -0.89687274f},
+{0.43051110f, -0.90258528f}, {0.41865974f, -0.90814317f},
+{0.40673664f, -0.91354546f}, {0.39474386f, -0.91879121f},
+{0.38268343f, -0.92387953f}, {0.37055744f, -0.92880955f},
+{0.35836795f, -0.93358043f}, {0.34611706f, -0.93819134f},
+{0.33380686f, -0.94264149f}, {0.32143947f, -0.94693013f},
+{0.30901699f, -0.95105652f}, {0.29654157f, -0.95501994f},
+{0.28401534f, -0.95881973f}, {0.27144045f, -0.96245524f},
+{0.25881905f, -0.96592583f}, {0.24615329f, -0.96923091f},
+{0.23344536f, -0.97236992f}, {0.22069744f, -0.97534232f},
+{0.20791169f, -0.97814760f}, {0.19509032f, -0.98078528f},
+{0.18223553f, -0.98325491f}, {0.16934950f, -0.98555606f},
+{0.15643447f, -0.98768834f}, {0.14349262f, -0.98965139f},
+{0.13052619f, -0.99144486f}, {0.11753740f, -0.99306846f},
+{0.10452846f, -0.99452190f}, {0.091501619f, -0.99580493f},
+{0.078459096f, -0.99691733f}, {0.065403129f, -0.99785892f},
+{0.052335956f, -0.99862953f}, {0.039259816f, -0.99922904f},
+{0.026176948f, -0.99965732f}, {0.013089596f, -0.99991433f},
+{6.1230318e-17f, -1.0000000f}, {-0.013089596f, -0.99991433f},
+{-0.026176948f, -0.99965732f}, {-0.039259816f, -0.99922904f},
+{-0.052335956f, -0.99862953f}, {-0.065403129f, -0.99785892f},
+{-0.078459096f, -0.99691733f}, {-0.091501619f, -0.99580493f},
+{-0.10452846f, -0.99452190f}, {-0.11753740f, -0.99306846f},
+{-0.13052619f, -0.99144486f}, {-0.14349262f, -0.98965139f},
+{-0.15643447f, -0.98768834f}, {-0.16934950f, -0.98555606f},
+{-0.18223553f, -0.98325491f}, {-0.19509032f, -0.98078528f},
+{-0.20791169f, -0.97814760f}, {-0.22069744f, -0.97534232f},
+{-0.23344536f, -0.97236992f}, {-0.24615329f, -0.96923091f},
+{-0.25881905f, -0.96592583f}, {-0.27144045f, -0.96245524f},
+{-0.28401534f, -0.95881973f}, {-0.29654157f, -0.95501994f},
+{-0.30901699f, -0.95105652f}, {-0.32143947f, -0.94693013f},
+{-0.33380686f, -0.94264149f}, {-0.34611706f, -0.93819134f},
+{-0.35836795f, -0.93358043f}, {-0.37055744f, -0.92880955f},
+{-0.38268343f, -0.92387953f}, {-0.39474386f, -0.91879121f},
+{-0.40673664f, -0.91354546f}, {-0.41865974f, -0.90814317f},
+{-0.43051110f, -0.90258528f}, {-0.44228869f, -0.89687274f},
+{-0.45399050f, -0.89100652f}, {-0.46561452f, -0.88498764f},
+{-0.47715876f, -0.87881711f}, {-0.48862124f, -0.87249601f},
+{-0.50000000f, -0.86602540f}, {-0.51129309f, -0.85940641f},
+{-0.52249856f, -0.85264016f}, {-0.53361452f, -0.84572782f},
+{-0.54463904f, -0.83867057f}, {-0.55557023f, -0.83146961f},
+{-0.56640624f, -0.82412619f}, {-0.57714519f, -0.81664156f},
+{-0.58778525f, -0.80901699f}, {-0.59832460f, -0.80125381f},
+{-0.60876143f, -0.79335334f}, {-0.61909395f, -0.78531693f},
+{-0.62932039f, -0.77714596f}, {-0.63943900f, -0.76884183f},
+{-0.64944805f, -0.76040597f}, {-0.65934582f, -0.75183981f},
+{-0.66913061f, -0.74314483f}, {-0.67880075f, -0.73432251f},
+{-0.68835458f, -0.72537437f}, {-0.69779046f, -0.71630194f},
+{-0.70710678f, -0.70710678f}, {-0.71630194f, -0.69779046f},
+{-0.72537437f, -0.68835458f}, {-0.73432251f, -0.67880075f},
+{-0.74314483f, -0.66913061f}, {-0.75183981f, -0.65934582f},
+{-0.76040597f, -0.64944805f}, {-0.76884183f, -0.63943900f},
+{-0.77714596f, -0.62932039f}, {-0.78531693f, -0.61909395f},
+{-0.79335334f, -0.60876143f}, {-0.80125381f, -0.59832460f},
+{-0.80901699f, -0.58778525f}, {-0.81664156f, -0.57714519f},
+{-0.82412619f, -0.56640624f}, {-0.83146961f, -0.55557023f},
+{-0.83867057f, -0.54463904f}, {-0.84572782f, -0.53361452f},
+{-0.85264016f, -0.52249856f}, {-0.85940641f, -0.51129309f},
+{-0.86602540f, -0.50000000f}, {-0.87249601f, -0.48862124f},
+{-0.87881711f, -0.47715876f}, {-0.88498764f, -0.46561452f},
+{-0.89100652f, -0.45399050f}, {-0.89687274f, -0.44228869f},
+{-0.90258528f, -0.43051110f}, {-0.90814317f, -0.41865974f},
+{-0.91354546f, -0.40673664f}, {-0.91879121f, -0.39474386f},
+{-0.92387953f, -0.38268343f}, {-0.92880955f, -0.37055744f},
+{-0.93358043f, -0.35836795f}, {-0.93819134f, -0.34611706f},
+{-0.94264149f, -0.33380686f}, {-0.94693013f, -0.32143947f},
+{-0.95105652f, -0.30901699f}, {-0.95501994f, -0.29654157f},
+{-0.95881973f, -0.28401534f}, {-0.96245524f, -0.27144045f},
+{-0.96592583f, -0.25881905f}, {-0.96923091f, -0.24615329f},
+{-0.97236992f, -0.23344536f}, {-0.97534232f, -0.22069744f},
+{-0.97814760f, -0.20791169f}, {-0.98078528f, -0.19509032f},
+{-0.98325491f, -0.18223553f}, {-0.98555606f, -0.16934950f},
+{-0.98768834f, -0.15643447f}, {-0.98965139f, -0.14349262f},
+{-0.99144486f, -0.13052619f}, {-0.99306846f, -0.11753740f},
+{-0.99452190f, -0.10452846f}, {-0.99580493f, -0.091501619f},
+{-0.99691733f, -0.078459096f}, {-0.99785892f, -0.065403129f},
+{-0.99862953f, -0.052335956f}, {-0.99922904f, -0.039259816f},
+{-0.99965732f, -0.026176948f}, {-0.99991433f, -0.013089596f},
+{-1.0000000f, -1.2246064e-16f}, {-0.99991433f, 0.013089596f},
+{-0.99965732f, 0.026176948f}, {-0.99922904f, 0.039259816f},
+{-0.99862953f, 0.052335956f}, {-0.99785892f, 0.065403129f},
+{-0.99691733f, 0.078459096f}, {-0.99580493f, 0.091501619f},
+{-0.99452190f, 0.10452846f}, {-0.99306846f, 0.11753740f},
+{-0.99144486f, 0.13052619f}, {-0.98965139f, 0.14349262f},
+{-0.98768834f, 0.15643447f}, {-0.98555606f, 0.16934950f},
+{-0.98325491f, 0.18223553f}, {-0.98078528f, 0.19509032f},
+{-0.97814760f, 0.20791169f}, {-0.97534232f, 0.22069744f},
+{-0.97236992f, 0.23344536f}, {-0.96923091f, 0.24615329f},
+{-0.96592583f, 0.25881905f}, {-0.96245524f, 0.27144045f},
+{-0.95881973f, 0.28401534f}, {-0.95501994f, 0.29654157f},
+{-0.95105652f, 0.30901699f}, {-0.94693013f, 0.32143947f},
+{-0.94264149f, 0.33380686f}, {-0.93819134f, 0.34611706f},
+{-0.93358043f, 0.35836795f}, {-0.92880955f, 0.37055744f},
+{-0.92387953f, 0.38268343f}, {-0.91879121f, 0.39474386f},
+{-0.91354546f, 0.40673664f}, {-0.90814317f, 0.41865974f},
+{-0.90258528f, 0.43051110f}, {-0.89687274f, 0.44228869f},
+{-0.89100652f, 0.45399050f}, {-0.88498764f, 0.46561452f},
+{-0.87881711f, 0.47715876f}, {-0.87249601f, 0.48862124f},
+{-0.86602540f, 0.50000000f}, {-0.85940641f, 0.51129309f},
+{-0.85264016f, 0.52249856f}, {-0.84572782f, 0.53361452f},
+{-0.83867057f, 0.54463904f}, {-0.83146961f, 0.55557023f},
+{-0.82412619f, 0.56640624f}, {-0.81664156f, 0.57714519f},
+{-0.80901699f, 0.58778525f}, {-0.80125381f, 0.59832460f},
+{-0.79335334f, 0.60876143f}, {-0.78531693f, 0.61909395f},
+{-0.77714596f, 0.62932039f}, {-0.76884183f, 0.63943900f},
+{-0.76040597f, 0.64944805f}, {-0.75183981f, 0.65934582f},
+{-0.74314483f, 0.66913061f}, {-0.73432251f, 0.67880075f},
+{-0.72537437f, 0.68835458f}, {-0.71630194f, 0.69779046f},
+{-0.70710678f, 0.70710678f}, {-0.69779046f, 0.71630194f},
+{-0.68835458f, 0.72537437f}, {-0.67880075f, 0.73432251f},
+{-0.66913061f, 0.74314483f}, {-0.65934582f, 0.75183981f},
+{-0.64944805f, 0.76040597f}, {-0.63943900f, 0.76884183f},
+{-0.62932039f, 0.77714596f}, {-0.61909395f, 0.78531693f},
+{-0.60876143f, 0.79335334f}, {-0.59832460f, 0.80125381f},
+{-0.58778525f, 0.80901699f}, {-0.57714519f, 0.81664156f},
+{-0.56640624f, 0.82412619f}, {-0.55557023f, 0.83146961f},
+{-0.54463904f, 0.83867057f}, {-0.53361452f, 0.84572782f},
+{-0.52249856f, 0.85264016f}, {-0.51129309f, 0.85940641f},
+{-0.50000000f, 0.86602540f}, {-0.48862124f, 0.87249601f},
+{-0.47715876f, 0.87881711f}, {-0.46561452f, 0.88498764f},
+{-0.45399050f, 0.89100652f}, {-0.44228869f, 0.89687274f},
+{-0.43051110f, 0.90258528f}, {-0.41865974f, 0.90814317f},
+{-0.40673664f, 0.91354546f}, {-0.39474386f, 0.91879121f},
+{-0.38268343f, 0.92387953f}, {-0.37055744f, 0.92880955f},
+{-0.35836795f, 0.93358043f}, {-0.34611706f, 0.93819134f},
+{-0.33380686f, 0.94264149f}, {-0.32143947f, 0.94693013f},
+{-0.30901699f, 0.95105652f}, {-0.29654157f, 0.95501994f},
+{-0.28401534f, 0.95881973f}, {-0.27144045f, 0.96245524f},
+{-0.25881905f, 0.96592583f}, {-0.24615329f, 0.96923091f},
+{-0.23344536f, 0.97236992f}, {-0.22069744f, 0.97534232f},
+{-0.20791169f, 0.97814760f}, {-0.19509032f, 0.98078528f},
+{-0.18223553f, 0.98325491f}, {-0.16934950f, 0.98555606f},
+{-0.15643447f, 0.98768834f}, {-0.14349262f, 0.98965139f},
+{-0.13052619f, 0.99144486f}, {-0.11753740f, 0.99306846f},
+{-0.10452846f, 0.99452190f}, {-0.091501619f, 0.99580493f},
+{-0.078459096f, 0.99691733f}, {-0.065403129f, 0.99785892f},
+{-0.052335956f, 0.99862953f}, {-0.039259816f, 0.99922904f},
+{-0.026176948f, 0.99965732f}, {-0.013089596f, 0.99991433f},
+{-1.8369095e-16f, 1.0000000f}, {0.013089596f, 0.99991433f},
+{0.026176948f, 0.99965732f}, {0.039259816f, 0.99922904f},
+{0.052335956f, 0.99862953f}, {0.065403129f, 0.99785892f},
+{0.078459096f, 0.99691733f}, {0.091501619f, 0.99580493f},
+{0.10452846f, 0.99452190f}, {0.11753740f, 0.99306846f},
+{0.13052619f, 0.99144486f}, {0.14349262f, 0.98965139f},
+{0.15643447f, 0.98768834f}, {0.16934950f, 0.98555606f},
+{0.18223553f, 0.98325491f}, {0.19509032f, 0.98078528f},
+{0.20791169f, 0.97814760f}, {0.22069744f, 0.97534232f},
+{0.23344536f, 0.97236992f}, {0.24615329f, 0.96923091f},
+{0.25881905f, 0.96592583f}, {0.27144045f, 0.96245524f},
+{0.28401534f, 0.95881973f}, {0.29654157f, 0.95501994f},
+{0.30901699f, 0.95105652f}, {0.32143947f, 0.94693013f},
+{0.33380686f, 0.94264149f}, {0.34611706f, 0.93819134f},
+{0.35836795f, 0.93358043f}, {0.37055744f, 0.92880955f},
+{0.38268343f, 0.92387953f}, {0.39474386f, 0.91879121f},
+{0.40673664f, 0.91354546f}, {0.41865974f, 0.90814317f},
+{0.43051110f, 0.90258528f}, {0.44228869f, 0.89687274f},
+{0.45399050f, 0.89100652f}, {0.46561452f, 0.88498764f},
+{0.47715876f, 0.87881711f}, {0.48862124f, 0.87249601f},
+{0.50000000f, 0.86602540f}, {0.51129309f, 0.85940641f},
+{0.52249856f, 0.85264016f}, {0.53361452f, 0.84572782f},
+{0.54463904f, 0.83867057f}, {0.55557023f, 0.83146961f},
+{0.56640624f, 0.82412619f}, {0.57714519f, 0.81664156f},
+{0.58778525f, 0.80901699f}, {0.59832460f, 0.80125381f},
+{0.60876143f, 0.79335334f}, {0.61909395f, 0.78531693f},
+{0.62932039f, 0.77714596f}, {0.63943900f, 0.76884183f},
+{0.64944805f, 0.76040597f}, {0.65934582f, 0.75183981f},
+{0.66913061f, 0.74314483f}, {0.67880075f, 0.73432251f},
+{0.68835458f, 0.72537437f}, {0.69779046f, 0.71630194f},
+{0.70710678f, 0.70710678f}, {0.71630194f, 0.69779046f},
+{0.72537437f, 0.68835458f}, {0.73432251f, 0.67880075f},
+{0.74314483f, 0.66913061f}, {0.75183981f, 0.65934582f},
+{0.76040597f, 0.64944805f}, {0.76884183f, 0.63943900f},
+{0.77714596f, 0.62932039f}, {0.78531693f, 0.61909395f},
+{0.79335334f, 0.60876143f}, {0.80125381f, 0.59832460f},
+{0.80901699f, 0.58778525f}, {0.81664156f, 0.57714519f},
+{0.82412619f, 0.56640624f}, {0.83146961f, 0.55557023f},
+{0.83867057f, 0.54463904f}, {0.84572782f, 0.53361452f},
+{0.85264016f, 0.52249856f}, {0.85940641f, 0.51129309f},
+{0.86602540f, 0.50000000f}, {0.87249601f, 0.48862124f},
+{0.87881711f, 0.47715876f}, {0.88498764f, 0.46561452f},
+{0.89100652f, 0.45399050f}, {0.89687274f, 0.44228869f},
+{0.90258528f, 0.43051110f}, {0.90814317f, 0.41865974f},
+{0.91354546f, 0.40673664f}, {0.91879121f, 0.39474386f},
+{0.92387953f, 0.38268343f}, {0.92880955f, 0.37055744f},
+{0.93358043f, 0.35836795f}, {0.93819134f, 0.34611706f},
+{0.94264149f, 0.33380686f}, {0.94693013f, 0.32143947f},
+{0.95105652f, 0.30901699f}, {0.95501994f, 0.29654157f},
+{0.95881973f, 0.28401534f}, {0.96245524f, 0.27144045f},
+{0.96592583f, 0.25881905f}, {0.96923091f, 0.24615329f},
+{0.97236992f, 0.23344536f}, {0.97534232f, 0.22069744f},
+{0.97814760f, 0.20791169f}, {0.98078528f, 0.19509032f},
+{0.98325491f, 0.18223553f}, {0.98555606f, 0.16934950f},
+{0.98768834f, 0.15643447f}, {0.98965139f, 0.14349262f},
+{0.99144486f, 0.13052619f}, {0.99306846f, 0.11753740f},
+{0.99452190f, 0.10452846f}, {0.99580493f, 0.091501619f},
+{0.99691733f, 0.078459096f}, {0.99785892f, 0.065403129f},
+{0.99862953f, 0.052335956f}, {0.99922904f, 0.039259816f},
+{0.99965732f, 0.026176948f}, {0.99991433f, 0.013089596f},
+};
+#ifndef FFT_BITREV480
+#define FFT_BITREV480
+static const opus_int16 fft_bitrev480[480] = {
+0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
+450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
+345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
+215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
+110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
+430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
+325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
+181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
+76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
+396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
+291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
+161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
+56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
+362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
+257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
+127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
+22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
+472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
+342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
+237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
+93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
+438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
+308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
+203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
+73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
+418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
+274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
+169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
+39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
+384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
+254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
+149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+};
+#endif
+
+#ifndef FFT_BITREV240
+#define FFT_BITREV240
+static const opus_int16 fft_bitrev240[240] = {
+0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
+225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
+170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
+115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
+46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
+216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
+161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
+92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
+37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
+207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
+138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
+83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
+28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
+184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
+129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
+74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+};
+#endif
+
+#ifndef FFT_BITREV120
+#define FFT_BITREV120
+static const opus_int16 fft_bitrev120[120] = {
+0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
+110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
+76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
+56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
+22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
+93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
+73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
+39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+};
+#endif
+
+#ifndef FFT_BITREV60
+#define FFT_BITREV60
+static const opus_int16 fft_bitrev60[60] = {
+0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
+46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
+37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
+28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+};
+#endif
+
+#ifndef FFT_STATE48000_960_0
+#define FFT_STATE48000_960_0
+static const kiss_fft_state fft_state48000_960_0 = {
+480,    /* nfft */
+0.002083333f,   /* scale */
+-1,     /* shift */
+{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev480,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_1
+#define FFT_STATE48000_960_1
+static const kiss_fft_state fft_state48000_960_1 = {
+240,    /* nfft */
+0.004166667f,   /* scale */
+1,      /* shift */
+{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev240,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_2
+#define FFT_STATE48000_960_2
+static const kiss_fft_state fft_state48000_960_2 = {
+120,    /* nfft */
+0.008333333f,   /* scale */
+2,      /* shift */
+{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev120,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_3
+#define FFT_STATE48000_960_3
+static const kiss_fft_state fft_state48000_960_3 = {
+60,     /* nfft */
+0.016666667f,   /* scale */
+3,      /* shift */
+{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev60,   /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#endif
+
+#ifndef MDCT_TWIDDLES960
+#define MDCT_TWIDDLES960
+static const opus_val16 mdct_twiddles960[481] = {
+1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f,
+0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f,
+0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f,
+0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f,
+0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f,
+0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f,
+0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f,
+0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f,
+0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f,
+0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f,
+0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f,
+0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f,
+0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f,
+0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f,
+0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f,
+0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f,
+0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f,
+0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f,
+0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f,
+0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f,
+0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f,
+0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f,
+0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f,
+0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f,
+0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f,
+0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f,
+0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f,
+0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f,
+0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f,
+0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f,
+0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f,
+0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f,
+0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f,
+0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f,
+0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f,
+0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f,
+0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f,
+0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f,
+0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f,
+0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f,
+0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f,
+0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f,
+0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f,
+0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f,
+0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f,
+0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f,
+0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f,
+0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f,
+0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f,
+0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f,
+0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f,
+0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f,
+0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f,
+0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f,
+0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f,
+0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f,
+0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f,
+0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f,
+0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f,
+0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f,
+0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f,
+0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f,
+0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f,
+0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f,
+0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f,
+0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f,
+0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f,
+0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f,
+0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f,
+0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f,
+0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f,
+0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f,
+0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f,
+0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f,
+0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f,
+0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f,
+0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f,
+0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f,
+0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f,
+0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f,
+0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f,
+0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f,
+0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f,
+0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f,
+0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f,
+0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f,
+0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f,
+0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f,
+0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f,
+0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f,
+0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f,
+0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f,
+0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f,
+0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f,
+0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f,
+0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f,
+-4.3711390e-08f, };
+#endif
+
+static const CELTMode mode48000_960_120 = {
+48000,  /* Fs */
+120,    /* overlap */
+21,     /* nbEBands */
+21,     /* effEBands */
+{0.85000610f, 0.0000000f, 1.0000000f, 1.0000000f, },    /* preemph */
+eband5ms,       /* eBands */
+3,      /* maxLM */
+8,      /* nbShortMdcts */
+120,    /* shortMdctSize */
+11,     /* nbAllocVectors */
+band_allocation,        /* allocVectors */
+logN400,        /* logN */
+window120,      /* window */
+{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960},    /* mdct */
+{392, cache_index50, cache_bits50, cache_caps50},       /* cache */
+};
+
+/* List of all the available modes */
+#define TOTAL_MODES 1
+static const CELTMode * const static_mode_list[TOTAL_MODES] = {
+&mode48000_960_120,
+};
diff --git a/src/main/jni/opus/celt/vq.c b/src/main/jni/opus/celt/vq.c
new file mode 100644
index 000000000..98a0f36c9
--- /dev/null
+++ b/src/main/jni/opus/celt/vq.c
@@ -0,0 +1,415 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mathops.h"
+#include "cwrs.h"
+#include "vq.h"
+#include "arch.h"
+#include "os_support.h"
+#include "bands.h"
+#include "rate.h"
+
+static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
+{
+   int i;
+   celt_norm *Xptr;
+   Xptr = X;
+   for (i=0;i<len-stride;i++)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
+      *Xptr++      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+   }
+   Xptr = &X[len-2*stride-1];
+   for (i=len-2*stride-1;i>=0;i--)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
+      *Xptr--      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+   }
+}
+
+static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
+{
+   static const int SPREAD_FACTOR[3]={15,10,5};
+   int i;
+   opus_val16 c, s;
+   opus_val16 gain, theta;
+   int stride2=0;
+   int factor;
+
+   if (2*K>=len || spread==SPREAD_NONE)
+      return;
+   factor = SPREAD_FACTOR[spread-1];
+
+   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
+   theta = HALF16(MULT16_16_Q15(gain,gain));
+
+   c = celt_cos_norm(EXTEND32(theta));
+   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
+
+   if (len>=8*stride)
+   {
+      stride2 = 1;
+      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
+         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
+      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
+         stride2++;
+   }
+   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
+      extract_collapse_mask().*/
+   len /= stride;
+   for (i=0;i<stride;i++)
+   {
+      if (dir < 0)
+      {
+         if (stride2)
+            exp_rotation1(X+i*len, len, stride2, s, c);
+         exp_rotation1(X+i*len, len, 1, c, s);
+      } else {
+         exp_rotation1(X+i*len, len, 1, c, -s);
+         if (stride2)
+            exp_rotation1(X+i*len, len, stride2, s, -c);
+      }
+   }
+}
+
+/** Takes the pitch vector and the decoded residual vector, computes the gain
+    that will give ||p+g*y||=1 and mixes the residual with the pitch. */
+static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
+      int N, opus_val32 Ryy, opus_val16 gain)
+{
+   int i;
+#ifdef FIXED_POINT
+   int k;
+#endif
+   opus_val32 t;
+   opus_val16 g;
+
+#ifdef FIXED_POINT
+   k = celt_ilog2(Ryy)>>1;
+#endif
+   t = VSHR32(Ryy, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   i=0;
+   do
+      X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
+   while (++i < N);
+}
+
+static unsigned extract_collapse_mask(int *iy, int N, int B)
+{
+   unsigned collapse_mask;
+   int N0;
+   int i;
+   if (B<=1)
+      return 1;
+   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
+      exp_rotation().*/
+   N0 = N/B;
+   collapse_mask = 0;
+   i=0; do {
+      int j;
+      j=0; do {
+         collapse_mask |= (iy[i*N0+j]!=0)<<i;
+      } while (++j<N0);
+   } while (++i<B);
+   return collapse_mask;
+}
+
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
+#ifdef RESYNTH
+   , opus_val16 gain
+#endif
+   )
+{
+   VARDECL(celt_norm, y);
+   VARDECL(int, iy);
+   VARDECL(opus_val16, signx);
+   int i, j;
+   opus_val16 s;
+   int pulsesLeft;
+   opus_val32 sum;
+   opus_val32 xy;
+   opus_val16 yy;
+   unsigned collapse_mask;
+   SAVE_STACK;
+
+   celt_assert2(K>0, "alg_quant() needs at least one pulse");
+   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
+
+   ALLOC(y, N, celt_norm);
+   ALLOC(iy, N, int);
+   ALLOC(signx, N, opus_val16);
+
+   exp_rotation(X, N, 1, B, K, spread);
+
+   /* Get rid of the sign */
+   sum = 0;
+   j=0; do {
+      if (X[j]>0)
+         signx[j]=1;
+      else {
+         signx[j]=-1;
+         X[j]=-X[j];
+      }
+      iy[j] = 0;
+      y[j] = 0;
+   } while (++j<N);
+
+   xy = yy = 0;
+
+   pulsesLeft = K;
+
+   /* Do a pre-search by projecting on the pyramid */
+   if (K > (N>>1))
+   {
+      opus_val16 rcp;
+      j=0; do {
+         sum += X[j];
+      }  while (++j<N);
+
+      /* If X is too small, just replace it with a pulse at 0 */
+#ifdef FIXED_POINT
+      if (sum <= K)
+#else
+      /* Prevents infinities and NaNs from causing too many pulses
+         to be allocated. 64 is an approximation of infinity here. */
+      if (!(sum > EPSILON && sum < 64))
+#endif
+      {
+         X[0] = QCONST16(1.f,14);
+         j=1; do
+            X[j]=0;
+         while (++j<N);
+         sum = QCONST16(1.f,14);
+      }
+      rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
+      j=0; do {
+#ifdef FIXED_POINT
+         /* It's really important to round *towards zero* here */
+         iy[j] = MULT16_16_Q15(X[j],rcp);
+#else
+         iy[j] = (int)floor(rcp*X[j]);
+#endif
+         y[j] = (celt_norm)iy[j];
+         yy = MAC16_16(yy, y[j],y[j]);
+         xy = MAC16_16(xy, X[j],y[j]);
+         y[j] *= 2;
+         pulsesLeft -= iy[j];
+      }  while (++j<N);
+   }
+   celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
+
+   /* This should never happen, but just in case it does (e.g. on silence)
+      we fill the first bin with pulses. */
+#ifdef FIXED_POINT_DEBUG
+   celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
+#endif
+   if (pulsesLeft > N+3)
+   {
+      opus_val16 tmp = (opus_val16)pulsesLeft;
+      yy = MAC16_16(yy, tmp, tmp);
+      yy = MAC16_16(yy, tmp, y[0]);
+      iy[0] += pulsesLeft;
+      pulsesLeft=0;
+   }
+
+   s = 1;
+   for (i=0;i<pulsesLeft;i++)
+   {
+      int best_id;
+      opus_val32 best_num = -VERY_LARGE16;
+      opus_val16 best_den = 0;
+#ifdef FIXED_POINT
+      int rshift;
+#endif
+#ifdef FIXED_POINT
+      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
+#endif
+      best_id = 0;
+      /* The squared magnitude term gets added anyway, so we might as well
+         add it outside the loop */
+      yy = ADD32(yy, 1);
+      j=0;
+      do {
+         opus_val16 Rxy, Ryy;
+         /* Temporary sums of the new pulse(s) */
+         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
+         /* We're multiplying y[j] by two so we don't have to do it here */
+         Ryy = ADD16(yy, y[j]);
+
+         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
+            Rxy is positive because the sign is pre-computed) */
+         Rxy = MULT16_16_Q15(Rxy,Rxy);
+         /* The idea is to check for num/den >= best_num/best_den, but that way
+            we can do it without any division */
+         /* OPT: Make sure to use conditional moves here */
+         if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
+         {
+            best_den = Ryy;
+            best_num = Rxy;
+            best_id = j;
+         }
+      } while (++j<N);
+
+      /* Updating the sums of the new pulse(s) */
+      xy = ADD32(xy, EXTEND32(X[best_id]));
+      /* We're multiplying y[j] by two so we don't have to do it here */
+      yy = ADD16(yy, y[best_id]);
+
+      /* Only now that we've made the final choice, update y/iy */
+      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
+      y[best_id] += 2*s;
+      iy[best_id]++;
+   }
+
+   /* Put the original sign back */
+   j=0;
+   do {
+      X[j] = MULT16_16(signx[j],X[j]);
+      if (signx[j] < 0)
+         iy[j] = -iy[j];
+   } while (++j<N);
+   encode_pulses(iy, N, K, enc);
+
+#ifdef RESYNTH
+   normalise_residual(iy, X, N, yy, gain);
+   exp_rotation(X, N, -1, B, K, spread);
+#endif
+
+   collapse_mask = extract_collapse_mask(iy, N, B);
+   RESTORE_STACK;
+   return collapse_mask;
+}
+
+/** Decode pulse vector and combine the result with the pitch vector to produce
+    the final normalised signal in the current band. */
+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
+      ec_dec *dec, opus_val16 gain)
+{
+   int i;
+   opus_val32 Ryy;
+   unsigned collapse_mask;
+   VARDECL(int, iy);
+   SAVE_STACK;
+
+   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
+   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
+   ALLOC(iy, N, int);
+   decode_pulses(iy, N, K, dec);
+   Ryy = 0;
+   i=0;
+   do {
+      Ryy = MAC16_16(Ryy, iy[i], iy[i]);
+   } while (++i < N);
+   normalise_residual(iy, X, N, Ryy, gain);
+   exp_rotation(X, N, -1, B, K, spread);
+   collapse_mask = extract_collapse_mask(iy, N, B);
+   RESTORE_STACK;
+   return collapse_mask;
+}
+
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
+{
+   int i;
+#ifdef FIXED_POINT
+   int k;
+#endif
+   opus_val32 E = EPSILON;
+   opus_val16 g;
+   opus_val32 t;
+   celt_norm *xptr = X;
+   for (i=0;i<N;i++)
+   {
+      E = MAC16_16(E, *xptr, *xptr);
+      xptr++;
+   }
+#ifdef FIXED_POINT
+   k = celt_ilog2(E)>>1;
+#endif
+   t = VSHR32(E, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   xptr = X;
+   for (i=0;i<N;i++)
+   {
+      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
+      xptr++;
+   }
+   /*return celt_sqrt(E);*/
+}
+
+int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
+{
+   int i;
+   int itheta;
+   opus_val16 mid, side;
+   opus_val32 Emid, Eside;
+
+   Emid = Eside = EPSILON;
+   if (stereo)
+   {
+      for (i=0;i<N;i++)
+      {
+         celt_norm m, s;
+         m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
+         s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
+         Emid = MAC16_16(Emid, m, m);
+         Eside = MAC16_16(Eside, s, s);
+      }
+   } else {
+      for (i=0;i<N;i++)
+      {
+         celt_norm m, s;
+         m = X[i];
+         s = Y[i];
+         Emid = MAC16_16(Emid, m, m);
+         Eside = MAC16_16(Eside, s, s);
+      }
+   }
+   mid = celt_sqrt(Emid);
+   side = celt_sqrt(Eside);
+#ifdef FIXED_POINT
+   /* 0.63662 = 2/pi */
+   itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
+#else
+   itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
+#endif
+
+   return itheta;
+}
diff --git a/src/main/jni/opus/celt/vq.h b/src/main/jni/opus/celt/vq.h
new file mode 100644
index 000000000..ffdc69cdc
--- /dev/null
+++ b/src/main/jni/opus/celt/vq.h
@@ -0,0 +1,70 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file vq.h
+   @brief Vector quantisation of the residual
+ */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef VQ_H
+#define VQ_H
+
+#include "entenc.h"
+#include "entdec.h"
+#include "modes.h"
+
+/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
+  * the pitch and a combination of pulses such that its norm is still equal
+  * to 1. This is the function that will typically require the most CPU.
+ * @param X Residual signal to quantise/encode (returns quantised version)
+ * @param N Number of samples to encode
+ * @param K Number of pulses to use
+ * @param enc Entropy encoder state
+ * @ret A mask indicating which blocks in the band received pulses
+*/
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
+      ec_enc *enc
+#ifdef RESYNTH
+      , opus_val16 gain
+#endif
+      );
+
+/** Algebraic pulse decoder
+ * @param X Decoded normalised spectrum (returned)
+ * @param N Number of samples to decode
+ * @param K Number of pulses to use
+ * @param dec Entropy decoder state
+ * @ret A mask indicating which blocks in the band received pulses
+ */
+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
+      ec_dec *dec, opus_val16 gain);
+
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain);
+
+int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N);
+
+#endif /* VQ_H */
diff --git a/src/main/jni/opus/celt/x86/pitch_sse.h b/src/main/jni/opus/celt/x86/pitch_sse.h
new file mode 100644
index 000000000..695122a5a
--- /dev/null
+++ b/src/main/jni/opus/celt/x86/pitch_sse.h
@@ -0,0 +1,156 @@
+/* Copyright (c) 2013 Jean-Marc Valin and John Ridges */
+/**
+   @file pitch_sse.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_SSE_H
+#define PITCH_SSE_H
+
+#include <xmmintrin.h>
+#include "arch.h"
+
+#define OVERRIDE_XCORR_KERNEL
+static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+{
+   int j;
+   __m128 xsum1, xsum2;
+   xsum1 = _mm_loadu_ps(sum);
+   xsum2 = _mm_setzero_ps();
+
+   for (j = 0; j < len-3; j += 4)
+   {
+      __m128 x0 = _mm_loadu_ps(x+j);
+      __m128 yj = _mm_loadu_ps(y+j);
+      __m128 y3 = _mm_loadu_ps(y+j+3);
+
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
+                                          _mm_shuffle_ps(yj,y3,0x49)));
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
+                                          _mm_shuffle_ps(yj,y3,0x9e)));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
+   }
+   if (j < len)
+   {
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+      if (++j < len)
+      {
+         xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+         if (++j < len)
+         {
+            xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+         }
+      }
+   }
+   _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
+}
+
+#define OVERRIDE_DUAL_INNER_PROD
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   __m128 xsum1, xsum2;
+   xsum1 = _mm_setzero_ps();
+   xsum2 = _mm_setzero_ps();
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 xi = _mm_loadu_ps(x+i);
+      __m128 y1i = _mm_loadu_ps(y01+i);
+      __m128 y2i = _mm_loadu_ps(y02+i);
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
+   }
+   /* Horizontal sum */
+   xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
+   xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
+   _mm_store_ss(xy1, xsum1);
+   xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
+   xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
+   _mm_store_ss(xy2, xsum2);
+   for (;i<N;i++)
+   {
+      *xy1 = MAC16_16(*xy1, x[i], y01[i]);
+      *xy2 = MAC16_16(*xy2, x[i], y02[i]);
+   }
+}
+
+#define OVERRIDE_COMB_FILTER_CONST
+static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   int i;
+   __m128 x0v;
+   __m128 g10v, g11v, g12v;
+   g10v = _mm_load1_ps(&g10);
+   g11v = _mm_load1_ps(&g11);
+   g12v = _mm_load1_ps(&g12);
+   x0v = _mm_loadu_ps(&x[-T-2]);
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 yi, yi2, x1v, x2v, x3v, x4v;
+      const opus_val32 *xp = &x[i-T-2];
+      yi = _mm_loadu_ps(x+i);
+      x4v = _mm_loadu_ps(xp+4);
+#if 0
+      /* Slower version with all loads */
+      x1v = _mm_loadu_ps(xp+1);
+      x2v = _mm_loadu_ps(xp+2);
+      x3v = _mm_loadu_ps(xp+3);
+#else
+      x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
+      x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
+      x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
+#endif
+
+      yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
+#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
+      yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
+      yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+#else
+      /* Use partial sums */
+      yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
+                       _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+      yi = _mm_add_ps(yi, yi2);
+#endif
+      x0v=x4v;
+      _mm_storeu_ps(y+i, yi);
+   }
+#ifdef CUSTOM_MODES
+   for (;i<N;i++)
+   {
+      y[i] = x[i]
+               + MULT16_32_Q15(g10,x[i-T])
+               + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+               + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
+   }
+#endif
+}
+
+#endif
diff --git a/src/main/jni/opus/include/opus.h b/src/main/jni/opus/include/opus.h
new file mode 100644
index 000000000..93a53a2ff
--- /dev/null
+++ b/src/main/jni/opus/include/opus.h
@@ -0,0 +1,978 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus.h
+ * @brief Opus reference implementation API
+ */
+
+#ifndef OPUS_H
+#define OPUS_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @mainpage Opus
+ *
+ * The Opus codec is designed for interactive speech and audio transmission over the Internet.
+ * It is designed by the IETF Codec Working Group and incorporates technology from
+ * Skype's SILK codec and Xiph.Org's CELT codec.
+ *
+ * The Opus codec is designed to handle a wide range of interactive audio applications,
+ * including Voice over IP, videoconferencing, in-game chat, and even remote live music
+ * performances. It can scale from low bit-rate narrowband speech to very high quality
+ * stereo music. Its main features are:
+
+ * @li Sampling rates from 8 to 48 kHz
+ * @li Bit-rates from 6 kb/s to 510 kb/s
+ * @li Support for both constant bit-rate (CBR) and variable bit-rate (VBR)
+ * @li Audio bandwidth from narrowband to full-band
+ * @li Support for speech and music
+ * @li Support for mono and stereo
+ * @li Support for multichannel (up to 255 channels)
+ * @li Frame sizes from 2.5 ms to 60 ms
+ * @li Good loss robustness and packet loss concealment (PLC)
+ * @li Floating point and fixed-point implementation
+ *
+ * Documentation sections:
+ * @li @ref opus_encoder
+ * @li @ref opus_decoder
+ * @li @ref opus_repacketizer
+ * @li @ref opus_multistream
+ * @li @ref opus_libinfo
+ * @li @ref opus_custom
+ */
+
+/** @defgroup opus_encoder Opus Encoder
+  * @{
+  *
+  * @brief This page describes the process and functions used to encode Opus.
+  *
+  * Since Opus is a stateful codec, the encoding process starts with creating an encoder
+  * state. This can be done with:
+  *
+  * @code
+  * int          error;
+  * OpusEncoder *enc;
+  * enc = opus_encoder_create(Fs, channels, application, &error);
+  * @endcode
+  *
+  * From this point, @c enc can be used for encoding an audio stream. An encoder state
+  * @b must @b not be used for more than one stream at the same time. Similarly, the encoder
+  * state @b must @b not be re-initialized for each frame.
+  *
+  * While opus_encoder_create() allocates memory for the state, it's also possible
+  * to initialize pre-allocated memory:
+  *
+  * @code
+  * int          size;
+  * int          error;
+  * OpusEncoder *enc;
+  * size = opus_encoder_get_size(channels);
+  * enc = malloc(size);
+  * error = opus_encoder_init(enc, Fs, channels, application);
+  * @endcode
+  *
+  * where opus_encoder_get_size() returns the required size for the encoder state. Note that
+  * future versions of this code may change the size, so no assuptions should be made about it.
+  *
+  * The encoder state is always continuous in memory and only a shallow copy is sufficient
+  * to copy it (e.g. memcpy())
+  *
+  * It is possible to change some of the encoder's settings using the opus_encoder_ctl()
+  * interface. All these settings already default to the recommended value, so they should
+  * only be changed when necessary. The most common settings one may want to change are:
+  *
+  * @code
+  * opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate));
+  * opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
+  * opus_encoder_ctl(enc, OPUS_SET_SIGNAL(signal_type));
+  * @endcode
+  *
+  * where
+  *
+  * @arg bitrate is in bits per second (b/s)
+  * @arg complexity is a value from 1 to 10, where 1 is the lowest complexity and 10 is the highest
+  * @arg signal_type is either OPUS_AUTO (default), OPUS_SIGNAL_VOICE, or OPUS_SIGNAL_MUSIC
+  *
+  * See @ref opus_encoderctls and @ref opus_genericctls for a complete list of parameters that can be set or queried. Most parameters can be set or changed at any time during a stream.
+  *
+  * To encode a frame, opus_encode() or opus_encode_float() must be called with exactly one frame (2.5, 5, 10, 20, 40 or 60 ms) of audio data:
+  * @code
+  * len = opus_encode(enc, audio_frame, frame_size, packet, max_packet);
+  * @endcode
+  *
+  * where
+  * <ul>
+  * <li>audio_frame is the audio data in opus_int16 (or float for opus_encode_float())</li>
+  * <li>frame_size is the duration of the frame in samples (per channel)</li>
+  * <li>packet is the byte array to which the compressed data is written</li>
+  * <li>max_packet is the maximum number of bytes that can be written in the packet (4000 bytes is recommended).
+  *     Do not use max_packet to control VBR target bitrate, instead use the #OPUS_SET_BITRATE CTL.</li>
+  * </ul>
+  *
+  * opus_encode() and opus_encode_float() return the number of bytes actually written to the packet.
+  * The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value
+  * is 1 byte, then the packet does not need to be transmitted (DTX).
+  *
+  * Once the encoder state if no longer needed, it can be destroyed with
+  *
+  * @code
+  * opus_encoder_destroy(enc);
+  * @endcode
+  *
+  * If the encoder was created with opus_encoder_init() rather than opus_encoder_create(),
+  * then no action is required aside from potentially freeing the memory that was manually
+  * allocated for it (calling free(enc) for the example above)
+  *
+  */
+
+/** Opus encoder state.
+  * This contains the complete state of an Opus encoder.
+  * It is position independent and can be freely copied.
+  * @see opus_encoder_create,opus_encoder_init
+  */
+typedef struct OpusEncoder OpusEncoder;
+
+/** Gets the size of an <code>OpusEncoder</code> structure.
+  * @param[in] channels <tt>int</tt>: Number of channels.
+  *                                   This must be 1 or 2.
+  * @returns The size in bytes.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_encoder_get_size(int channels);
+
+/**
+ */
+
+/** Allocates and initializes an encoder state.
+ * There are three coding modes:
+ *
+ * @ref OPUS_APPLICATION_VOIP gives best quality at a given bitrate for voice
+ *    signals. It enhances the  input signal by high-pass filtering and
+ *    emphasizing formants and harmonics. Optionally  it includes in-band
+ *    forward error correction to protect against packet loss. Use this
+ *    mode for typical VoIP applications. Because of the enhancement,
+ *    even at high bitrates the output may sound different from the input.
+ *
+ * @ref OPUS_APPLICATION_AUDIO gives best quality at a given bitrate for most
+ *    non-voice signals like music. Use this mode for music and mixed
+ *    (music/voice) content, broadcast, and applications requiring less
+ *    than 15 ms of coding delay.
+ *
+ * @ref OPUS_APPLICATION_RESTRICTED_LOWDELAY configures low-delay mode that
+ *    disables the speech-optimized mode in exchange for slightly reduced delay.
+ *    This mode can only be set on an newly initialized or freshly reset encoder
+ *    because it changes the codec delay.
+ *
+ * This is useful when the caller knows that the speech-optimized modes will not be needed (use with caution).
+ * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz)
+ *                                     This must be one of 8000, 12000, 16000,
+ *                                     24000, or 48000.
+ * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal
+ * @param [in] application <tt>int</tt>: Coding mode (@ref OPUS_APPLICATION_VOIP/@ref OPUS_APPLICATION_AUDIO/@ref OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ * @param [out] error <tt>int*</tt>: @ref opus_errorcodes
+ * @note Regardless of the sampling rate and number channels selected, the Opus encoder
+ * can switch to a lower audio bandwidth or number of channels if the bitrate
+ * selected is too low. This also means that it is safe to always use 48 kHz stereo input
+ * and let the encoder optimize the encoding.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusEncoder *opus_encoder_create(
+    opus_int32 Fs,
+    int channels,
+    int application,
+    int *error
+);
+
+/** Initializes a previously allocated encoder state
+  * The memory pointed to by st must be at least the size returned by opus_encoder_get_size().
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see opus_encoder_create(),opus_encoder_get_size()
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+  * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz)
+ *                                      This must be one of 8000, 12000, 16000,
+ *                                      24000, or 48000.
+  * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal
+  * @param [in] application <tt>int</tt>: Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO/OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+  * @retval #OPUS_OK Success or @ref opus_errorcodes
+  */
+OPUS_EXPORT int opus_encoder_init(
+    OpusEncoder *st,
+    opus_int32 Fs,
+    int channels,
+    int application
+) OPUS_ARG_NONNULL(1);
+
+/** Encodes an Opus frame.
+  * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>opus_int16*</tt>: Input signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)
+  * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
+  *                                      input signal.
+  *                                      This must be an Opus frame size for
+  *                                      the encoder's sampling rate.
+  *                                      For example, at 48 kHz the permitted
+  *                                      values are 120, 240, 480, 960, 1920,
+  *                                      and 2880.
+  *                                      Passing in a duration of less than
+  *                                      10 ms (480 samples at 48 kHz) will
+  *                                      prevent the encoder from using the LPC
+  *                                      or hybrid modes.
+  * @param [out] data <tt>unsigned char*</tt>: Output payload.
+  *                                            This must contain storage for at
+  *                                            least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode(
+    OpusEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes an Opus frame from floating point input.
+  * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>float*</tt>: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0.
+  *          Samples with a range beyond +/-1.0 are supported but will
+  *          be clipped by decoders using the integer API and should
+  *          only be used if it is known that the far end supports
+  *          extended dynamic range.
+  *          length is frame_size*channels*sizeof(float)
+  * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
+  *                                      input signal.
+  *                                      This must be an Opus frame size for
+  *                                      the encoder's sampling rate.
+  *                                      For example, at 48 kHz the permitted
+  *                                      values are 120, 240, 480, 960, 1920,
+  *                                      and 2880.
+  *                                      Passing in a duration of less than
+  *                                      10 ms (480 samples at 48 kHz) will
+  *                                      prevent the encoder from using the LPC
+  *                                      or hybrid modes.
+  * @param [out] data <tt>unsigned char*</tt>: Output payload.
+  *                                            This must contain storage for at
+  *                                            least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode_float(
+    OpusEncoder *st,
+    const float *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Frees an <code>OpusEncoder</code> allocated by opus_encoder_create().
+  * @param[in] st <tt>OpusEncoder*</tt>: State to be freed.
+  */
+OPUS_EXPORT void opus_encoder_destroy(OpusEncoder *st);
+
+/** Perform a CTL function on an Opus encoder.
+  *
+  * Generally the request and subsequent arguments are generated
+  * by a convenience macro.
+  * @param st <tt>OpusEncoder*</tt>: Encoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls or
+  *                @ref opus_encoderctls.
+  * @see opus_genericctls
+  * @see opus_encoderctls
+  */
+OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+/**@}*/
+
+/** @defgroup opus_decoder Opus Decoder
+  * @{
+  *
+  * @brief This page describes the process and functions used to decode Opus.
+  *
+  * The decoding process also starts with creating a decoder
+  * state. This can be done with:
+  * @code
+  * int          error;
+  * OpusDecoder *dec;
+  * dec = opus_decoder_create(Fs, channels, &error);
+  * @endcode
+  * where
+  * @li Fs is the sampling rate and must be 8000, 12000, 16000, 24000, or 48000
+  * @li channels is the number of channels (1 or 2)
+  * @li error will hold the error code in case of failure (or #OPUS_OK on success)
+  * @li the return value is a newly created decoder state to be used for decoding
+  *
+  * While opus_decoder_create() allocates memory for the state, it's also possible
+  * to initialize pre-allocated memory:
+  * @code
+  * int          size;
+  * int          error;
+  * OpusDecoder *dec;
+  * size = opus_decoder_get_size(channels);
+  * dec = malloc(size);
+  * error = opus_decoder_init(dec, Fs, channels);
+  * @endcode
+  * where opus_decoder_get_size() returns the required size for the decoder state. Note that
+  * future versions of this code may change the size, so no assuptions should be made about it.
+  *
+  * The decoder state is always continuous in memory and only a shallow copy is sufficient
+  * to copy it (e.g. memcpy())
+  *
+  * To decode a frame, opus_decode() or opus_decode_float() must be called with a packet of compressed audio data:
+  * @code
+  * frame_size = opus_decode(dec, packet, len, decoded, max_size, 0);
+  * @endcode
+  * where
+  *
+  * @li packet is the byte array containing the compressed data
+  * @li len is the exact number of bytes contained in the packet
+  * @li decoded is the decoded audio data in opus_int16 (or float for opus_decode_float())
+  * @li max_size is the max duration of the frame in samples (per channel) that can fit into the decoded_frame array
+  *
+  * opus_decode() and opus_decode_float() return the number of samples (per channel) decoded from the packet.
+  * If that value is negative, then an error has occurred. This can occur if the packet is corrupted or if the audio
+  * buffer is too small to hold the decoded audio.
+  *
+  * Opus is a stateful codec with overlapping blocks and as a result Opus
+  * packets are not coded independently of each other. Packets must be
+  * passed into the decoder serially and in the correct order for a correct
+  * decode. Lost packets can be replaced with loss concealment by calling
+  * the decoder with a null pointer and zero length for the missing packet.
+  *
+  * A single codec state may only be accessed from a single thread at
+  * a time and any required locking must be performed by the caller. Separate
+  * streams must be decoded with separate decoder states and can be decoded
+  * in parallel unless the library was compiled with NONTHREADSAFE_PSEUDOSTACK
+  * defined.
+  *
+  */
+
+/** Opus decoder state.
+  * This contains the complete state of an Opus decoder.
+  * It is position independent and can be freely copied.
+  * @see opus_decoder_create,opus_decoder_init
+  */
+typedef struct OpusDecoder OpusDecoder;
+
+/** Gets the size of an <code>OpusDecoder</code> structure.
+  * @param [in] channels <tt>int</tt>: Number of channels.
+  *                                    This must be 1 or 2.
+  * @returns The size in bytes.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_size(int channels);
+
+/** Allocates and initializes a decoder state.
+  * @param [in] Fs <tt>opus_int32</tt>: Sample rate to decode at (Hz).
+  *                                     This must be one of 8000, 12000, 16000,
+  *                                     24000, or 48000.
+  * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode
+  * @param [out] error <tt>int*</tt>: #OPUS_OK Success or @ref opus_errorcodes
+  *
+  * Internally Opus stores data at 48000 Hz, so that should be the default
+  * value for Fs. However, the decoder can efficiently decode to buffers
+  * at 8, 12, 16, and 24 kHz so if for some reason the caller cannot use
+  * data at the full sample rate, or knows the compressed data doesn't
+  * use the full frequency range, it can request decoding at a reduced
+  * rate. Likewise, the decoder is capable of filling in either mono or
+  * interleaved stereo pcm buffers, at the caller's request.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusDecoder *opus_decoder_create(
+    opus_int32 Fs,
+    int channels,
+    int *error
+);
+
+/** Initializes a previously allocated decoder state.
+  * The state must be at least the size returned by opus_decoder_get_size().
+  * This is intended for applications which use their own allocator instead of malloc. @see opus_decoder_create,opus_decoder_get_size
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @param [in] st <tt>OpusDecoder*</tt>: Decoder state.
+  * @param [in] Fs <tt>opus_int32</tt>: Sampling rate to decode to (Hz).
+  *                                     This must be one of 8000, 12000, 16000,
+  *                                     24000, or 48000.
+  * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode
+  * @retval #OPUS_OK Success or @ref opus_errorcodes
+  */
+OPUS_EXPORT int opus_decoder_init(
+    OpusDecoder *st,
+    opus_int32 Fs,
+    int channels
+) OPUS_ARG_NONNULL(1);
+
+/** Decode an Opus packet.
+  * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload*
+  * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(opus_int16)
+  * @param [in] frame_size Number of samples per channel of available space in \a pcm.
+  *  If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
+  *  not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
+  *  then frame_size needs to be exactly the duration of audio that is missing, otherwise the
+  *  decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
+  *  FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
+  * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
+  *  decoded. If no such data is available, the frame is decoded as if it were lost.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode(
+    OpusDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    opus_int16 *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode an Opus packet with floating point output.
+  * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload
+  * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(float)
+  * @param [in] frame_size Number of samples per channel of available space in \a pcm.
+  *  If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
+  *  not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
+  *  then frame_size needs to be exactly the duration of audio that is missing, otherwise the
+  *  decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
+  *  FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
+  * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
+  *  decoded. If no such data is available the frame is decoded as if it were lost.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode_float(
+    OpusDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    float *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus decoder.
+  *
+  * Generally the request and subsequent arguments are generated
+  * by a convenience macro.
+  * @param st <tt>OpusDecoder*</tt>: Decoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls or
+  *                @ref opus_decoderctls.
+  * @see opus_genericctls
+  * @see opus_decoderctls
+  */
+OPUS_EXPORT int opus_decoder_ctl(OpusDecoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/** Frees an <code>OpusDecoder</code> allocated by opus_decoder_create().
+  * @param[in] st <tt>OpusDecoder*</tt>: State to be freed.
+  */
+OPUS_EXPORT void opus_decoder_destroy(OpusDecoder *st);
+
+/** Parse an opus packet into one or more frames.
+  * Opus_decode will perform this operation internally so most applications do
+  * not need to use this function.
+  * This function does not copy the frames, the returned pointers are pointers into
+  * the input packet.
+  * @param [in] data <tt>char*</tt>: Opus packet to be parsed
+  * @param [in] len <tt>opus_int32</tt>: size of data
+  * @param [out] out_toc <tt>char*</tt>: TOC pointer
+  * @param [out] frames <tt>char*[48]</tt> encapsulated frames
+  * @param [out] size <tt>opus_int16[48]</tt> sizes of the encapsulated frames
+  * @param [out] payload_offset <tt>int*</tt>: returns the position of the payload within the packet (in bytes)
+  * @returns number of frames
+  */
+OPUS_EXPORT int opus_packet_parse(
+   const unsigned char *data,
+   opus_int32 len,
+   unsigned char *out_toc,
+   const unsigned char *frames[48],
+   opus_int16 size[48],
+   int *payload_offset
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Gets the bandwidth of an Opus packet.
+  * @param [in] data <tt>char*</tt>: Opus packet
+  * @retval OPUS_BANDWIDTH_NARROWBAND Narrowband (4kHz bandpass)
+  * @retval OPUS_BANDWIDTH_MEDIUMBAND Mediumband (6kHz bandpass)
+  * @retval OPUS_BANDWIDTH_WIDEBAND Wideband (8kHz bandpass)
+  * @retval OPUS_BANDWIDTH_SUPERWIDEBAND Superwideband (12kHz bandpass)
+  * @retval OPUS_BANDWIDTH_FULLBAND Fullband (20kHz bandpass)
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_bandwidth(const unsigned char *data) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples per frame from an Opus packet.
+  * @param [in] data <tt>char*</tt>: Opus packet.
+  *                                  This must contain at least one byte of
+  *                                  data.
+  * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz.
+  *                                     This must be a multiple of 400, or
+  *                                     inaccurate results will be returned.
+  * @returns Number of samples per frame.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_samples_per_frame(const unsigned char *data, opus_int32 Fs) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of channels from an Opus packet.
+  * @param [in] data <tt>char*</tt>: Opus packet
+  * @returns Number of channels
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_channels(const unsigned char *data) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of frames in an Opus packet.
+  * @param [in] packet <tt>char*</tt>: Opus packet
+  * @param [in] len <tt>opus_int32</tt>: Length of packet
+  * @returns Number of frames
+  * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples of an Opus packet.
+  * @param [in] packet <tt>char*</tt>: Opus packet
+  * @param [in] len <tt>opus_int32</tt>: Length of packet
+  * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz.
+  *                                     This must be a multiple of 400, or
+  *                                     inaccurate results will be returned.
+  * @returns Number of samples
+  * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, opus_int32 Fs) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples of an Opus packet.
+  * @param [in] dec <tt>OpusDecoder*</tt>: Decoder state
+  * @param [in] packet <tt>char*</tt>: Opus packet
+  * @param [in] len <tt>opus_int32</tt>: Length of packet
+  * @returns Number of samples
+  * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+/** Applies soft-clipping to bring a float signal within the [-1,1] range. If
+  * the signal is already in that range, nothing is done. If there are values
+  * outside of [-1,1], then the signal is clipped as smoothly as possible to
+  * both fit in the range and avoid creating excessive distortion in the
+  * process.
+  * @param [in,out] pcm <tt>float*</tt>: Input PCM and modified PCM
+  * @param [in] frame_size <tt>int</tt> Number of samples per channel to process
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @param [in,out] softclip_mem <tt>float*</tt>: State memory for the soft clipping process (one float per channel, initialized to zero)
+  */
+OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, float *softclip_mem);
+
+
+/**@}*/
+
+/** @defgroup opus_repacketizer Repacketizer
+  * @{
+  *
+  * The repacketizer can be used to merge multiple Opus packets into a single
+  * packet or alternatively to split Opus packets that have previously been
+  * merged. Splitting valid Opus packets is always guaranteed to succeed,
+  * whereas merging valid packets only succeeds if all frames have the same
+  * mode, bandwidth, and frame size, and when the total duration of the merged
+  * packet is no more than 120 ms.
+  * The repacketizer currently only operates on elementary Opus
+  * streams. It will not manipualte multistream packets successfully, except in
+  * the degenerate case where they consist of data from a single stream.
+  *
+  * The repacketizing process starts with creating a repacketizer state, either
+  * by calling opus_repacketizer_create() or by allocating the memory yourself,
+  * e.g.,
+  * @code
+  * OpusRepacketizer *rp;
+  * rp = (OpusRepacketizer*)malloc(opus_repacketizer_get_size());
+  * if (rp != NULL)
+  *     opus_repacketizer_init(rp);
+  * @endcode
+  *
+  * Then the application should submit packets with opus_repacketizer_cat(),
+  * extract new packets with opus_repacketizer_out() or
+  * opus_repacketizer_out_range(), and then reset the state for the next set of
+  * input packets via opus_repacketizer_init().
+  *
+  * For example, to split a sequence of packets into individual frames:
+  * @code
+  * unsigned char *data;
+  * int len;
+  * while (get_next_packet(&data, &len))
+  * {
+  *   unsigned char out[1276];
+  *   opus_int32 out_len;
+  *   int nb_frames;
+  *   int err;
+  *   int i;
+  *   err = opus_repacketizer_cat(rp, data, len);
+  *   if (err != OPUS_OK)
+  *   {
+  *     release_packet(data);
+  *     return err;
+  *   }
+  *   nb_frames = opus_repacketizer_get_nb_frames(rp);
+  *   for (i = 0; i < nb_frames; i++)
+  *   {
+  *     out_len = opus_repacketizer_out_range(rp, i, i+1, out, sizeof(out));
+  *     if (out_len < 0)
+  *     {
+  *        release_packet(data);
+  *        return (int)out_len;
+  *     }
+  *     output_next_packet(out, out_len);
+  *   }
+  *   opus_repacketizer_init(rp);
+  *   release_packet(data);
+  * }
+  * @endcode
+  *
+  * Alternatively, to combine a sequence of frames into packets that each
+  * contain up to <code>TARGET_DURATION_MS</code> milliseconds of data:
+  * @code
+  * // The maximum number of packets with duration TARGET_DURATION_MS occurs
+  * // when the frame size is 2.5 ms, for a total of (TARGET_DURATION_MS*2/5)
+  * // packets.
+  * unsigned char *data[(TARGET_DURATION_MS*2/5)+1];
+  * opus_int32 len[(TARGET_DURATION_MS*2/5)+1];
+  * int nb_packets;
+  * unsigned char out[1277*(TARGET_DURATION_MS*2/2)];
+  * opus_int32 out_len;
+  * int prev_toc;
+  * nb_packets = 0;
+  * while (get_next_packet(data+nb_packets, len+nb_packets))
+  * {
+  *   int nb_frames;
+  *   int err;
+  *   nb_frames = opus_packet_get_nb_frames(data[nb_packets], len[nb_packets]);
+  *   if (nb_frames < 1)
+  *   {
+  *     release_packets(data, nb_packets+1);
+  *     return nb_frames;
+  *   }
+  *   nb_frames += opus_repacketizer_get_nb_frames(rp);
+  *   // If adding the next packet would exceed our target, or it has an
+  *   // incompatible TOC sequence, output the packets we already have before
+  *   // submitting it.
+  *   // N.B., The nb_packets > 0 check ensures we've submitted at least one
+  *   // packet since the last call to opus_repacketizer_init(). Otherwise a
+  *   // single packet longer than TARGET_DURATION_MS would cause us to try to
+  *   // output an (invalid) empty packet. It also ensures that prev_toc has
+  *   // been set to a valid value. Additionally, len[nb_packets] > 0 is
+  *   // guaranteed by the call to opus_packet_get_nb_frames() above, so the
+  *   // reference to data[nb_packets][0] should be valid.
+  *   if (nb_packets > 0 && (
+  *       ((prev_toc & 0xFC) != (data[nb_packets][0] & 0xFC)) ||
+  *       opus_packet_get_samples_per_frame(data[nb_packets], 48000)*nb_frames >
+  *       TARGET_DURATION_MS*48))
+  *   {
+  *     out_len = opus_repacketizer_out(rp, out, sizeof(out));
+  *     if (out_len < 0)
+  *     {
+  *        release_packets(data, nb_packets+1);
+  *        return (int)out_len;
+  *     }
+  *     output_next_packet(out, out_len);
+  *     opus_repacketizer_init(rp);
+  *     release_packets(data, nb_packets);
+  *     data[0] = data[nb_packets];
+  *     len[0] = len[nb_packets];
+  *     nb_packets = 0;
+  *   }
+  *   err = opus_repacketizer_cat(rp, data[nb_packets], len[nb_packets]);
+  *   if (err != OPUS_OK)
+  *   {
+  *     release_packets(data, nb_packets+1);
+  *     return err;
+  *   }
+  *   prev_toc = data[nb_packets][0];
+  *   nb_packets++;
+  * }
+  * // Output the final, partial packet.
+  * if (nb_packets > 0)
+  * {
+  *   out_len = opus_repacketizer_out(rp, out, sizeof(out));
+  *   release_packets(data, nb_packets);
+  *   if (out_len < 0)
+  *     return (int)out_len;
+  *   output_next_packet(out, out_len);
+  * }
+  * @endcode
+  *
+  * An alternate way of merging packets is to simply call opus_repacketizer_cat()
+  * unconditionally until it fails. At that point, the merged packet can be
+  * obtained with opus_repacketizer_out() and the input packet for which
+  * opus_repacketizer_cat() needs to be re-added to a newly reinitialized
+  * repacketizer state.
+  */
+
+typedef struct OpusRepacketizer OpusRepacketizer;
+
+/** Gets the size of an <code>OpusRepacketizer</code> structure.
+  * @returns The size in bytes.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_size(void);
+
+/** (Re)initializes a previously allocated repacketizer state.
+  * The state must be at least the size returned by opus_repacketizer_get_size().
+  * This can be used for applications which use their own allocator instead of
+  * malloc().
+  * It must also be called to reset the queue of packets waiting to be
+  * repacketized, which is necessary if the maximum packet duration of 120 ms
+  * is reached or if you wish to submit packets with a different Opus
+  * configuration (coding mode, audio bandwidth, frame size, or channel count).
+  * Failure to do so will prevent a new packet from being added with
+  * opus_repacketizer_cat().
+  * @see opus_repacketizer_create
+  * @see opus_repacketizer_get_size
+  * @see opus_repacketizer_cat
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to
+  *                                       (re)initialize.
+  * @returns A pointer to the same repacketizer state that was passed in.
+  */
+OPUS_EXPORT OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1);
+
+/** Allocates memory and initializes the new repacketizer with
+ * opus_repacketizer_init().
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusRepacketizer *opus_repacketizer_create(void);
+
+/** Frees an <code>OpusRepacketizer</code> allocated by
+  * opus_repacketizer_create().
+  * @param[in] rp <tt>OpusRepacketizer*</tt>: State to be freed.
+  */
+OPUS_EXPORT void opus_repacketizer_destroy(OpusRepacketizer *rp);
+
+/** Add a packet to the current repacketizer state.
+  * This packet must match the configuration of any packets already submitted
+  * for repacketization since the last call to opus_repacketizer_init().
+  * This means that it must have the same coding mode, audio bandwidth, frame
+  * size, and channel count.
+  * This can be checked in advance by examining the top 6 bits of the first
+  * byte of the packet, and ensuring they match the top 6 bits of the first
+  * byte of any previously submitted packet.
+  * The total duration of audio in the repacketizer state also must not exceed
+  * 120 ms, the maximum duration of a single packet, after adding this packet.
+  *
+  * The contents of the current repacketizer state can be extracted into new
+  * packets using opus_repacketizer_out() or opus_repacketizer_out_range().
+  *
+  * In order to add a packet with a different configuration or to add more
+  * audio beyond 120 ms, you must clear the repacketizer state by calling
+  * opus_repacketizer_init().
+  * If a packet is too large to add to the current repacketizer state, no part
+  * of it is added, even if it contains multiple frames, some of which might
+  * fit.
+  * If you wish to be able to add parts of such packets, you should first use
+  * another repacketizer to split the packet into pieces and add them
+  * individually.
+  * @see opus_repacketizer_out_range
+  * @see opus_repacketizer_out
+  * @see opus_repacketizer_init
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to which to
+  *                                       add the packet.
+  * @param[in] data <tt>const unsigned char*</tt>: The packet data.
+  *                                                The application must ensure
+  *                                                this pointer remains valid
+  *                                                until the next call to
+  *                                                opus_repacketizer_init() or
+  *                                                opus_repacketizer_destroy().
+  * @param len <tt>opus_int32</tt>: The number of bytes in the packet data.
+  * @returns An error code indicating whether or not the operation succeeded.
+  * @retval #OPUS_OK The packet's contents have been added to the repacketizer
+  *                  state.
+  * @retval #OPUS_INVALID_PACKET The packet did not have a valid TOC sequence,
+  *                              the packet's TOC sequence was not compatible
+  *                              with previously submitted packets (because
+  *                              the coding mode, audio bandwidth, frame size,
+  *                              or channel count did not match), or adding
+  *                              this packet would increase the total amount of
+  *                              audio stored in the repacketizer state to more
+  *                              than 120 ms.
+  */
+OPUS_EXPORT int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+
+/** Construct a new packet from data previously submitted to the repacketizer
+  * state via opus_repacketizer_cat().
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to
+  *                                       construct the new packet.
+  * @param begin <tt>int</tt>: The index of the first frame in the current
+  *                            repacketizer state to include in the output.
+  * @param end <tt>int</tt>: One past the index of the last frame in the
+  *                          current repacketizer state to include in the
+  *                          output.
+  * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to
+  *                                                 store the output packet.
+  * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in
+  *                                    the output buffer. In order to guarantee
+  *                                    success, this should be at least
+  *                                    <code>1276</code> for a single frame,
+  *                                    or for multiple frames,
+  *                                    <code>1277*(end-begin)</code>.
+  *                                    However, <code>1*(end-begin)</code> plus
+  *                                    the size of all packet data submitted to
+  *                                    the repacketizer since the last call to
+  *                                    opus_repacketizer_init() or
+  *                                    opus_repacketizer_create() is also
+  *                                    sufficient, and possibly much smaller.
+  * @returns The total size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG <code>[begin,end)</code> was an invalid range of
+  *                       frames (begin < 0, begin >= end, or end >
+  *                       opus_repacketizer_get_nb_frames()).
+  * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the
+  *                                complete output packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Return the total number of frames contained in packet data submitted to
+  * the repacketizer state so far via opus_repacketizer_cat() since the last
+  * call to opus_repacketizer_init() or opus_repacketizer_create().
+  * This defines the valid range of packets that can be extracted with
+  * opus_repacketizer_out_range() or opus_repacketizer_out().
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state containing the
+  *                                       frames.
+  * @returns The total number of frames contained in the packet data submitted
+  *          to the repacketizer state.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1);
+
+/** Construct a new packet from data previously submitted to the repacketizer
+  * state via opus_repacketizer_cat().
+  * This is a convenience routine that returns all the data submitted so far
+  * in a single packet.
+  * It is equivalent to calling
+  * @code
+  * opus_repacketizer_out_range(rp, 0, opus_repacketizer_get_nb_frames(rp),
+  *                             data, maxlen)
+  * @endcode
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to
+  *                                       construct the new packet.
+  * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to
+  *                                                 store the output packet.
+  * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in
+  *                                    the output buffer. In order to guarantee
+  *                                    success, this should be at least
+  *                                    <code>1277*opus_repacketizer_get_nb_frames(rp)</code>.
+  *                                    However,
+  *                                    <code>1*opus_repacketizer_get_nb_frames(rp)</code>
+  *                                    plus the size of all packet data
+  *                                    submitted to the repacketizer since the
+  *                                    last call to opus_repacketizer_init() or
+  *                                    opus_repacketizer_create() is also
+  *                                    sufficient, and possibly much smaller.
+  * @returns The total size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the
+  *                                complete output packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1);
+
+/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence).
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to pad.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+  *                                 This must be at least as large as len.
+  * @returns an error code
+  * @retval #OPUS_OK \a on success.
+  * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
+/** Remove all padding from a given Opus packet and rewrite the TOC sequence to
+  * minimize space usage.
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to strip.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @returns The new size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG \a len was less than 1.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len);
+
+/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence).
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to pad.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+  *                                 This must be at least 1.
+  * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+  *                                 This must be at least as large as len.
+  * @returns an error code
+  * @retval #OPUS_OK \a on success.
+  * @retval #OPUS_BAD_ARG \a len was less than 1.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams);
+
+/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to
+  * minimize space usage.
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to strip.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+  *                                 This must be at least 1.
+  * @returns The new size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_H */
diff --git a/src/main/jni/opus/include/opus_custom.h b/src/main/jni/opus/include/opus_custom.h
new file mode 100644
index 000000000..41f36bf2f
--- /dev/null
+++ b/src/main/jni/opus/include/opus_custom.h
@@ -0,0 +1,342 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008-2012 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+  @file opus_custom.h
+  @brief Opus-Custom reference implementation API
+ */
+
+#ifndef OPUS_CUSTOM_H
+#define OPUS_CUSTOM_H
+
+#include "opus_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CUSTOM_MODES
+# define OPUS_CUSTOM_EXPORT OPUS_EXPORT
+# define OPUS_CUSTOM_EXPORT_STATIC OPUS_EXPORT
+#else
+# define OPUS_CUSTOM_EXPORT
+# ifdef OPUS_BUILD
+#  define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE
+# else
+#  define OPUS_CUSTOM_EXPORT_STATIC
+# endif
+#endif
+
+/** @defgroup opus_custom Opus Custom
+  * @{
+  *  Opus Custom is an optional part of the Opus specification and
+  * reference implementation which uses a distinct API from the regular
+  * API and supports frame sizes that are not normally supported.\ Use
+  * of Opus Custom is discouraged for all but very special applications
+  * for which a frame size different from 2.5, 5, 10, or 20 ms is needed
+  * (for either complexity or latency reasons) and where interoperability
+  * is less important.
+  *
+  * In addition to the interoperability limitations the use of Opus custom
+  * disables a substantial chunk of the codec and generally lowers the
+  * quality available at a given bitrate. Normally when an application needs
+  * a different frame size from the codec it should buffer to match the
+  * sizes but this adds a small amount of delay which may be important
+  * in some very low latency applications. Some transports (especially
+  * constant rate RF transports) may also work best with frames of
+  * particular durations.
+  *
+  * Libopus only supports custom modes if they are enabled at compile time.
+  *
+  * The Opus Custom API is similar to the regular API but the
+  * @ref opus_encoder_create and @ref opus_decoder_create calls take
+  * an additional mode parameter which is a structure produced by
+  * a call to @ref opus_custom_mode_create. Both the encoder and decoder
+  * must create a mode using the same sample rate (fs) and frame size
+  * (frame size) so these parameters must either be signaled out of band
+  * or fixed in a particular implementation.
+  *
+  * Similar to regular Opus the custom modes support on the fly frame size
+  * switching, but the sizes available depend on the particular frame size in
+  * use. For some initial frame sizes on a single on the fly size is available.
+  */
+
+/** Contains the state of an encoder. One encoder state is needed
+    for each stream. It is initialized once at the beginning of the
+    stream. Do *not* re-initialize the state for every frame.
+   @brief Encoder state
+ */
+typedef struct OpusCustomEncoder OpusCustomEncoder;
+
+/** State of the decoder. One decoder state is needed for each stream.
+    It is initialized once at the beginning of the stream. Do *not*
+    re-initialize the state for every frame.
+   @brief Decoder state
+ */
+typedef struct OpusCustomDecoder OpusCustomDecoder;
+
+/** The mode contains all the information necessary to create an
+    encoder. Both the encoder and decoder need to be initialized
+    with exactly the same mode, otherwise the output will be
+    corrupted.
+   @brief Mode configuration
+ */
+typedef struct OpusCustomMode OpusCustomMode;
+
+/** Creates a new mode struct. This will be passed to an encoder or
+  * decoder. The mode MUST NOT BE DESTROYED until the encoders and
+  * decoders that use it are destroyed as well.
+  * @param [in] Fs <tt>int</tt>: Sampling rate (8000 to 96000 Hz)
+  * @param [in] frame_size <tt>int</tt>: Number of samples (per channel) to encode in each
+  *        packet (64 - 1024, prime factorization must contain zero or more 2s, 3s, or 5s and no other primes)
+  * @param [out] error <tt>int*</tt>: Returned error code (if NULL, no error will be returned)
+  * @return A newly created mode
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error);
+
+/** Destroys a mode struct. Only call this after all encoders and
+  * decoders using this mode are destroyed as well.
+  * @param [in] mode <tt>OpusCustomMode*</tt>: Mode to be freed.
+  */
+OPUS_CUSTOM_EXPORT void opus_custom_mode_destroy(OpusCustomMode *mode);
+
+
+#if !defined(OPUS_BUILD) || defined(CELT_ENCODER_C)
+
+/* Encoder */
+/** Gets the size of an OpusCustomEncoder structure.
+  * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @returns size
+  */
+OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_size(
+    const OpusCustomMode *mode,
+    int channels
+) OPUS_ARG_NONNULL(1);
+
+# ifdef CUSTOM_MODES
+/** Initializes a previously allocated encoder state
+  * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size.
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see opus_custom_encoder_create(),opus_custom_encoder_get_size()
+  * To reset a previously initialized state use the OPUS_RESET_STATE CTL.
+  * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+  * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of
+  *  the stream (must be the same characteristics as used for the
+  *  decoder)
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @return OPUS_OK Success or @ref opus_errorcodes
+  */
+OPUS_CUSTOM_EXPORT int opus_custom_encoder_init(
+    OpusCustomEncoder *st,
+    const OpusCustomMode *mode,
+    int channels
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+# endif
+#endif
+
+
+/** Creates a new encoder state. Each stream needs its own encoder
+  * state (can't be shared across simultaneous streams).
+  * @param [in] mode <tt>OpusCustomMode*</tt>: Contains all the information about the characteristics of
+  *  the stream (must be the same characteristics as used for the
+  *  decoder)
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @param [out] error <tt>int*</tt>: Returns an error code
+  * @return Newly created encoder state.
+*/
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomEncoder *opus_custom_encoder_create(
+    const OpusCustomMode *mode,
+    int channels,
+    int *error
+) OPUS_ARG_NONNULL(1);
+
+
+/** Destroys a an encoder state.
+  * @param[in] st <tt>OpusCustomEncoder*</tt>: State to be freed.
+  */
+OPUS_CUSTOM_EXPORT void opus_custom_encoder_destroy(OpusCustomEncoder *st);
+
+/** Encodes a frame of audio.
+  * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>float*</tt>: PCM audio in float format, with a normal range of +/-1.0.
+  *          Samples with a range beyond +/-1.0 are supported but will
+  *          be clipped by decoders using the integer API and should
+  *          only be used if it is known that the far end supports
+  *          extended dynamic range. There must be exactly
+  *          frame_size samples per channel.
+  * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal
+  * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long.
+  * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame
+  *          (can change from one frame to another)
+  * @return Number of bytes written to "compressed".
+  *       If negative, an error has occurred (see error codes). It is IMPORTANT that
+  *       the length returned be somehow transmitted to the decoder. Otherwise, no
+  *       decoding is possible.
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode_float(
+    OpusCustomEncoder *st,
+    const float *pcm,
+    int frame_size,
+    unsigned char *compressed,
+    int maxCompressedBytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes a frame of audio.
+  * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>opus_int16*</tt>: PCM audio in signed 16-bit format (native endian).
+  *          There must be exactly frame_size samples per channel.
+  * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal
+  * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long.
+  * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame
+  *          (can change from one frame to another)
+  * @return Number of bytes written to "compressed".
+  *       If negative, an error has occurred (see error codes). It is IMPORTANT that
+  *       the length returned be somehow transmitted to the decoder. Otherwise, no
+  *       decoding is possible.
+ */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode(
+    OpusCustomEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *compressed,
+    int maxCompressedBytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus custom encoder.
+  *
+  * Generally the request and subsequent arguments are generated
+  * by a convenience macro.
+  * @see opus_encoderctls
+  */
+OPUS_CUSTOM_EXPORT int opus_custom_encoder_ctl(OpusCustomEncoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1);
+
+
+#if !defined(OPUS_BUILD) || defined(CELT_DECODER_C)
+/* Decoder */
+
+/** Gets the size of an OpusCustomDecoder structure.
+  * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @returns size
+  */
+OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_decoder_get_size(
+    const OpusCustomMode *mode,
+    int channels
+) OPUS_ARG_NONNULL(1);
+
+/** Initializes a previously allocated decoder state
+  * The memory pointed to by st must be the size returned by opus_custom_decoder_get_size.
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see opus_custom_decoder_create(),opus_custom_decoder_get_size()
+  * To reset a previously initialized state use the OPUS_RESET_STATE CTL.
+  * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+  * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of
+  *  the stream (must be the same characteristics as used for the
+  *  encoder)
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @return OPUS_OK Success or @ref opus_errorcodes
+  */
+OPUS_CUSTOM_EXPORT_STATIC int opus_custom_decoder_init(
+    OpusCustomDecoder *st,
+    const OpusCustomMode *mode,
+    int channels
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+#endif
+
+
+/** Creates a new decoder state. Each stream needs its own decoder state (can't
+  * be shared across simultaneous streams).
+  * @param [in] mode <tt>OpusCustomMode</tt>: Contains all the information about the characteristics of the
+  *          stream (must be the same characteristics as used for the encoder)
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @param [out] error <tt>int*</tt>: Returns an error code
+  * @return Newly created decoder state.
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomDecoder *opus_custom_decoder_create(
+    const OpusCustomMode *mode,
+    int channels,
+    int *error
+) OPUS_ARG_NONNULL(1);
+
+/** Destroys a an decoder state.
+  * @param[in] st <tt>OpusCustomDecoder*</tt>: State to be freed.
+  */
+OPUS_CUSTOM_EXPORT void opus_custom_decoder_destroy(OpusCustomDecoder *st);
+
+/** Decode an opus custom frame with floating point output
+  * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>int</tt>: Number of bytes in payload
+  * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(float)
+  * @param [in] frame_size Number of samples per channel of available space in *pcm.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode_float(
+    OpusCustomDecoder *st,
+    const unsigned char *data,
+    int len,
+    float *pcm,
+    int frame_size
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode an opus custom frame
+  * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>int</tt>: Number of bytes in payload
+  * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(opus_int16)
+  * @param [in] frame_size Number of samples per channel of available space in *pcm.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode(
+    OpusCustomDecoder *st,
+    const unsigned char *data,
+    int len,
+    opus_int16 *pcm,
+    int frame_size
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus custom decoder.
+  *
+  * Generally the request and subsequent arguments are generated
+  * by a convenience macro.
+  * @see opus_genericctls
+  */
+OPUS_CUSTOM_EXPORT int opus_custom_decoder_ctl(OpusCustomDecoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_CUSTOM_H */
diff --git a/src/main/jni/opus/include/opus_defines.h b/src/main/jni/opus/include/opus_defines.h
new file mode 100644
index 000000000..265089f65
--- /dev/null
+++ b/src/main/jni/opus/include/opus_defines.h
@@ -0,0 +1,726 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus_defines.h
+ * @brief Opus reference implementation constants
+ */
+
+#ifndef OPUS_DEFINES_H
+#define OPUS_DEFINES_H
+
+#include "opus_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @defgroup opus_errorcodes Error codes
+ * @{
+ */
+/** No error @hideinitializer*/
+#define OPUS_OK                0
+/** One or more invalid/out of range arguments @hideinitializer*/
+#define OPUS_BAD_ARG          -1
+/** The mode struct passed is invalid @hideinitializer*/
+#define OPUS_BUFFER_TOO_SMALL -2
+/** An internal error was detected @hideinitializer*/
+#define OPUS_INTERNAL_ERROR   -3
+/** The compressed data passed is corrupted @hideinitializer*/
+#define OPUS_INVALID_PACKET   -4
+/** Invalid/unsupported request number @hideinitializer*/
+#define OPUS_UNIMPLEMENTED    -5
+/** An encoder or decoder structure is invalid or already freed @hideinitializer*/
+#define OPUS_INVALID_STATE    -6
+/** Memory allocation has failed @hideinitializer*/
+#define OPUS_ALLOC_FAIL       -7
+/**@}*/
+
+/** @cond OPUS_INTERNAL_DOC */
+/**Export control for opus functions */
+
+#ifndef OPUS_EXPORT
+# if defined(WIN32)
+#  ifdef OPUS_BUILD
+#   define OPUS_EXPORT __declspec(dllexport)
+#  else
+#   define OPUS_EXPORT
+#  endif
+# elif defined(__GNUC__) && defined(OPUS_BUILD)
+#  define OPUS_EXPORT __attribute__ ((visibility ("default")))
+# else
+#  define OPUS_EXPORT
+# endif
+#endif
+
+# if !defined(OPUS_GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define OPUS_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define OPUS_GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+# if OPUS_GNUC_PREREQ(3,0)
+#  define OPUS_RESTRICT __restrict__
+# elif (defined(_MSC_VER) && _MSC_VER >= 1400)
+#  define OPUS_RESTRICT __restrict
+# else
+#  define OPUS_RESTRICT
+# endif
+#else
+# define OPUS_RESTRICT restrict
+#endif
+
+#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+# if OPUS_GNUC_PREREQ(2,7)
+#  define OPUS_INLINE __inline__
+# elif (defined(_MSC_VER))
+#  define OPUS_INLINE __inline
+# else
+#  define OPUS_INLINE
+# endif
+#else
+# define OPUS_INLINE inline
+#endif
+
+/**Warning attributes for opus functions
+  * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out
+  * some paranoid null checks. */
+#if defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4)
+# define OPUS_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
+#else
+# define OPUS_WARN_UNUSED_RESULT
+#endif
+#if !defined(OPUS_BUILD) && defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4)
+# define OPUS_ARG_NONNULL(_x)  __attribute__ ((__nonnull__(_x)))
+#else
+# define OPUS_ARG_NONNULL(_x)
+#endif
+
+/** These are the actual Encoder CTL ID numbers.
+  * They should not be used directly by applications.
+  * In general, SETs should be even and GETs should be odd.*/
+#define OPUS_SET_APPLICATION_REQUEST         4000
+#define OPUS_GET_APPLICATION_REQUEST         4001
+#define OPUS_SET_BITRATE_REQUEST             4002
+#define OPUS_GET_BITRATE_REQUEST             4003
+#define OPUS_SET_MAX_BANDWIDTH_REQUEST       4004
+#define OPUS_GET_MAX_BANDWIDTH_REQUEST       4005
+#define OPUS_SET_VBR_REQUEST                 4006
+#define OPUS_GET_VBR_REQUEST                 4007
+#define OPUS_SET_BANDWIDTH_REQUEST           4008
+#define OPUS_GET_BANDWIDTH_REQUEST           4009
+#define OPUS_SET_COMPLEXITY_REQUEST          4010
+#define OPUS_GET_COMPLEXITY_REQUEST          4011
+#define OPUS_SET_INBAND_FEC_REQUEST          4012
+#define OPUS_GET_INBAND_FEC_REQUEST          4013
+#define OPUS_SET_PACKET_LOSS_PERC_REQUEST    4014
+#define OPUS_GET_PACKET_LOSS_PERC_REQUEST    4015
+#define OPUS_SET_DTX_REQUEST                 4016
+#define OPUS_GET_DTX_REQUEST                 4017
+#define OPUS_SET_VBR_CONSTRAINT_REQUEST      4020
+#define OPUS_GET_VBR_CONSTRAINT_REQUEST      4021
+#define OPUS_SET_FORCE_CHANNELS_REQUEST      4022
+#define OPUS_GET_FORCE_CHANNELS_REQUEST      4023
+#define OPUS_SET_SIGNAL_REQUEST              4024
+#define OPUS_GET_SIGNAL_REQUEST              4025
+#define OPUS_GET_LOOKAHEAD_REQUEST           4027
+/* #define OPUS_RESET_STATE 4028 */
+#define OPUS_GET_SAMPLE_RATE_REQUEST         4029
+#define OPUS_GET_FINAL_RANGE_REQUEST         4031
+#define OPUS_GET_PITCH_REQUEST               4033
+#define OPUS_SET_GAIN_REQUEST                4034
+#define OPUS_GET_GAIN_REQUEST                4045 /* Should have been 4035 */
+#define OPUS_SET_LSB_DEPTH_REQUEST           4036
+#define OPUS_GET_LSB_DEPTH_REQUEST           4037
+#define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039
+#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
+#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
+#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
+#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043
+
+/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
+
+/* Macros to trigger compilation errors when the wrong types are provided to a CTL */
+#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
+#define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr)))
+#define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr)))
+#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr)))
+/** @endcond */
+
+/** @defgroup opus_ctlvalues Pre-defined values for CTL interface
+  * @see opus_genericctls, opus_encoderctls
+  * @{
+  */
+/* Values for the various encoder CTLs */
+#define OPUS_AUTO                           -1000 /**<Auto/default setting @hideinitializer*/
+#define OPUS_BITRATE_MAX                       -1 /**<Maximum bitrate @hideinitializer*/
+
+/** Best for most VoIP/videoconference applications where listening quality and intelligibility matter most
+ * @hideinitializer */
+#define OPUS_APPLICATION_VOIP                2048
+/** Best for broadcast/high-fidelity application where the decoded audio should be as close as possible to the input
+ * @hideinitializer */
+#define OPUS_APPLICATION_AUDIO               2049
+/** Only use when lowest-achievable latency is what matters most. Voice-optimized modes cannot be used.
+ * @hideinitializer */
+#define OPUS_APPLICATION_RESTRICTED_LOWDELAY 2051
+
+#define OPUS_SIGNAL_VOICE                    3001 /**< Signal being encoded is voice */
+#define OPUS_SIGNAL_MUSIC                    3002 /**< Signal being encoded is music */
+#define OPUS_BANDWIDTH_NARROWBAND            1101 /**< 4 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_MEDIUMBAND            1102 /**< 6 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_WIDEBAND              1103 /**< 8 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_SUPERWIDEBAND         1104 /**<12 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_FULLBAND              1105 /**<20 kHz bandpass @hideinitializer*/
+
+#define OPUS_FRAMESIZE_ARG                   5000 /**< Select frame size from the argument (default) */
+#define OPUS_FRAMESIZE_2_5_MS                5001 /**< Use 2.5 ms frames */
+#define OPUS_FRAMESIZE_5_MS                  5002 /**< Use 5 ms frames */
+#define OPUS_FRAMESIZE_10_MS                 5003 /**< Use 10 ms frames */
+#define OPUS_FRAMESIZE_20_MS                 5004 /**< Use 20 ms frames */
+#define OPUS_FRAMESIZE_40_MS                 5005 /**< Use 40 ms frames */
+#define OPUS_FRAMESIZE_60_MS                 5006 /**< Use 60 ms frames */
+
+/**@}*/
+
+
+/** @defgroup opus_encoderctls Encoder related CTLs
+  *
+  * These are convenience macros for use with the \c opus_encode_ctl
+  * interface. They are used to generate the appropriate series of
+  * arguments for that call, passing the correct type, size and so
+  * on as expected for each particular request.
+  *
+  * Some usage examples:
+  *
+  * @code
+  * int ret;
+  * ret = opus_encoder_ctl(enc_ctx, OPUS_SET_BANDWIDTH(OPUS_AUTO));
+  * if (ret != OPUS_OK) return ret;
+  *
+  * opus_int32 rate;
+  * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&rate));
+  *
+  * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE);
+  * @endcode
+  *
+  * @see opus_genericctls, opus_encoder
+  * @{
+  */
+
+/** Configures the encoder's computational complexity.
+  * The supported range is 0-10 inclusive with 10 representing the highest complexity.
+  * @see OPUS_GET_COMPLEXITY
+  * @param[in] x <tt>opus_int32</tt>: Allowed values: 0-10, inclusive.
+  *
+  * @hideinitializer */
+#define OPUS_SET_COMPLEXITY(x) OPUS_SET_COMPLEXITY_REQUEST, __opus_check_int(x)
+/** Gets the encoder's complexity configuration.
+  * @see OPUS_SET_COMPLEXITY
+  * @param[out] x <tt>opus_int32 *</tt>: Returns a value in the range 0-10,
+  *                                      inclusive.
+  * @hideinitializer */
+#define OPUS_GET_COMPLEXITY(x) OPUS_GET_COMPLEXITY_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the bitrate in the encoder.
+  * Rates from 500 to 512000 bits per second are meaningful, as well as the
+  * special values #OPUS_AUTO and #OPUS_BITRATE_MAX.
+  * The value #OPUS_BITRATE_MAX can be used to cause the codec to use as much
+  * rate as it can, which is useful for controlling the rate by adjusting the
+  * output buffer size.
+  * @see OPUS_GET_BITRATE
+  * @param[in] x <tt>opus_int32</tt>: Bitrate in bits per second. The default
+  *                                   is determined based on the number of
+  *                                   channels and the input sampling rate.
+  * @hideinitializer */
+#define OPUS_SET_BITRATE(x) OPUS_SET_BITRATE_REQUEST, __opus_check_int(x)
+/** Gets the encoder's bitrate configuration.
+  * @see OPUS_SET_BITRATE
+  * @param[out] x <tt>opus_int32 *</tt>: Returns the bitrate in bits per second.
+  *                                      The default is determined based on the
+  *                                      number of channels and the input
+  *                                      sampling rate.
+  * @hideinitializer */
+#define OPUS_GET_BITRATE(x) OPUS_GET_BITRATE_REQUEST, __opus_check_int_ptr(x)
+
+/** Enables or disables variable bitrate (VBR) in the encoder.
+  * The configured bitrate may not be met exactly because frames must
+  * be an integer number of bytes in length.
+  * @warning Only the MDCT mode of Opus can provide hard CBR behavior.
+  * @see OPUS_GET_VBR
+  * @see OPUS_SET_VBR_CONSTRAINT
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Hard CBR. For LPC/hybrid modes at very low bit-rate, this can
+  *               cause noticeable quality degradation.</dd>
+  * <dt>1</dt><dd>VBR (default). The exact type of VBR is controlled by
+  *               #OPUS_SET_VBR_CONSTRAINT.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_VBR(x) OPUS_SET_VBR_REQUEST, __opus_check_int(x)
+/** Determine if variable bitrate (VBR) is enabled in the encoder.
+  * @see OPUS_SET_VBR
+  * @see OPUS_GET_VBR_CONSTRAINT
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>Hard CBR.</dd>
+  * <dt>1</dt><dd>VBR (default). The exact type of VBR may be retrieved via
+  *               #OPUS_GET_VBR_CONSTRAINT.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_VBR(x) OPUS_GET_VBR_REQUEST, __opus_check_int_ptr(x)
+
+/** Enables or disables constrained VBR in the encoder.
+  * This setting is ignored when the encoder is in CBR mode.
+  * @warning Only the MDCT mode of Opus currently heeds the constraint.
+  *  Speech mode ignores it completely, hybrid mode may fail to obey it
+  *  if the LPC layer uses more bitrate than the constraint would have
+  *  permitted.
+  * @see OPUS_GET_VBR_CONSTRAINT
+  * @see OPUS_SET_VBR
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Unconstrained VBR.</dd>
+  * <dt>1</dt><dd>Constrained VBR (default). This creates a maximum of one
+  *               frame of buffering delay assuming a transport with a
+  *               serialization speed of the nominal bitrate.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_VBR_CONSTRAINT(x) OPUS_SET_VBR_CONSTRAINT_REQUEST, __opus_check_int(x)
+/** Determine if constrained VBR is enabled in the encoder.
+  * @see OPUS_SET_VBR_CONSTRAINT
+  * @see OPUS_GET_VBR
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>Unconstrained VBR.</dd>
+  * <dt>1</dt><dd>Constrained VBR (default).</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_VBR_CONSTRAINT(x) OPUS_GET_VBR_CONSTRAINT_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures mono/stereo forcing in the encoder.
+  * This can force the encoder to produce packets encoded as either mono or
+  * stereo, regardless of the format of the input audio. This is useful when
+  * the caller knows that the input signal is currently a mono source embedded
+  * in a stereo stream.
+  * @see OPUS_GET_FORCE_CHANNELS
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd>
+  * <dt>1</dt>         <dd>Forced mono</dd>
+  * <dt>2</dt>         <dd>Forced stereo</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_FORCE_CHANNELS(x) OPUS_SET_FORCE_CHANNELS_REQUEST, __opus_check_int(x)
+/** Gets the encoder's forced channel configuration.
+  * @see OPUS_SET_FORCE_CHANNELS
+  * @param[out] x <tt>opus_int32 *</tt>:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd>
+  * <dt>1</dt>         <dd>Forced mono</dd>
+  * <dt>2</dt>         <dd>Forced stereo</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_FORCE_CHANNELS(x) OPUS_GET_FORCE_CHANNELS_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the maximum bandpass that the encoder will select automatically.
+  * Applications should normally use this instead of #OPUS_SET_BANDWIDTH
+  * (leaving that set to the default, #OPUS_AUTO). This allows the
+  * application to set an upper bound based on the type of input it is
+  * providing, but still gives the encoder the freedom to reduce the bandpass
+  * when the bitrate becomes too low, for better overall quality.
+  * @see OPUS_GET_MAX_BANDWIDTH
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>OPUS_BANDWIDTH_NARROWBAND</dt>    <dd>4 kHz passband</dd>
+  * <dt>OPUS_BANDWIDTH_MEDIUMBAND</dt>    <dd>6 kHz passband</dd>
+  * <dt>OPUS_BANDWIDTH_WIDEBAND</dt>      <dd>8 kHz passband</dd>
+  * <dt>OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+  * <dt>OPUS_BANDWIDTH_FULLBAND</dt>     <dd>20 kHz passband (default)</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_MAX_BANDWIDTH(x) OPUS_SET_MAX_BANDWIDTH_REQUEST, __opus_check_int(x)
+
+/** Gets the encoder's configured maximum allowed bandpass.
+  * @see OPUS_SET_MAX_BANDWIDTH
+  * @param[out] x <tt>opus_int32 *</tt>: Allowed values:
+  * <dl>
+  * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt>    <dd>4 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt>    <dd>6 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt>      <dd>8 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_FULLBAND</dt>     <dd>20 kHz passband (default)</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_MAX_BANDWIDTH(x) OPUS_GET_MAX_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
+
+/** Sets the encoder's bandpass to a specific value.
+  * This prevents the encoder from automatically selecting the bandpass based
+  * on the available bitrate. If an application knows the bandpass of the input
+  * audio it is providing, it should normally use #OPUS_SET_MAX_BANDWIDTH
+  * instead, which still gives the encoder the freedom to reduce the bandpass
+  * when the bitrate becomes too low, for better overall quality.
+  * @see OPUS_GET_BANDWIDTH
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt>                    <dd>(default)</dd>
+  * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt>    <dd>4 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt>    <dd>6 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt>      <dd>8 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_FULLBAND</dt>     <dd>20 kHz passband</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_BANDWIDTH(x) OPUS_SET_BANDWIDTH_REQUEST, __opus_check_int(x)
+
+/** Configures the type of signal being encoded.
+  * This is a hint which helps the encoder's mode selection.
+  * @see OPUS_GET_SIGNAL
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt>        <dd>(default)</dd>
+  * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd>
+  * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_SIGNAL(x) OPUS_SET_SIGNAL_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured signal type.
+  * @see OPUS_SET_SIGNAL
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt>        <dd>(default)</dd>
+  * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd>
+  * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_SIGNAL(x) OPUS_GET_SIGNAL_REQUEST, __opus_check_int_ptr(x)
+
+
+/** Configures the encoder's intended application.
+  * The initial value is a mandatory argument to the encoder_create function.
+  * @see OPUS_GET_APPLICATION
+  * @param[in] x <tt>opus_int32</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_APPLICATION(x) OPUS_SET_APPLICATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured application.
+  * @see OPUS_SET_APPLICATION
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the sampling rate the encoder or decoder was initialized with.
+  * This simply returns the <code>Fs</code> value passed to opus_encoder_init()
+  * or opus_decoder_init().
+  * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
+  * @hideinitializer
+  */
+#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the total samples of delay added by the entire codec.
+  * This can be queried by the encoder and then the provided number of samples can be
+  * skipped on from the start of the decoder's output to provide time aligned input
+  * and output. From the perspective of a decoding application the real data begins this many
+  * samples late.
+  *
+  * The decoder contribution to this delay is identical for all decoders, but the
+  * encoder portion of the delay may vary from implementation to implementation,
+  * version to version, or even depend on the encoder's initial configuration.
+  * Applications needing delay compensation should call this CTL rather than
+  * hard-coding a value.
+  * @param[out] x <tt>opus_int32 *</tt>:   Number of lookahead samples
+  * @hideinitializer */
+#define OPUS_GET_LOOKAHEAD(x) OPUS_GET_LOOKAHEAD_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of inband forward error correction (FEC).
+  * @note This is only applicable to the LPC layer
+  * @see OPUS_GET_INBAND_FEC
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Disable inband FEC (default).</dd>
+  * <dt>1</dt><dd>Enable inband FEC.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_INBAND_FEC(x) OPUS_SET_INBAND_FEC_REQUEST, __opus_check_int(x)
+/** Gets encoder's configured use of inband forward error correction.
+  * @see OPUS_SET_INBAND_FEC
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>Inband FEC disabled (default).</dd>
+  * <dt>1</dt><dd>Inband FEC enabled.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's expected packet loss percentage.
+  * Higher values with trigger progressively more loss resistant behavior in the encoder
+  * at the expense of quality at a given bitrate in the lossless case, but greater quality
+  * under loss.
+  * @see OPUS_GET_PACKET_LOSS_PERC
+  * @param[in] x <tt>opus_int32</tt>:   Loss percentage in the range 0-100, inclusive (default: 0).
+  * @hideinitializer */
+#define OPUS_SET_PACKET_LOSS_PERC(x) OPUS_SET_PACKET_LOSS_PERC_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured packet loss percentage.
+  * @see OPUS_SET_PACKET_LOSS_PERC
+  * @param[out] x <tt>opus_int32 *</tt>: Returns the configured loss percentage
+  *                                      in the range 0-100, inclusive (default: 0).
+  * @hideinitializer */
+#define OPUS_GET_PACKET_LOSS_PERC(x) OPUS_GET_PACKET_LOSS_PERC_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of discontinuous transmission (DTX).
+  * @note This is only applicable to the LPC layer
+  * @see OPUS_GET_DTX
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Disable DTX (default).</dd>
+  * <dt>1</dt><dd>Enabled DTX.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_DTX(x) OPUS_SET_DTX_REQUEST, __opus_check_int(x)
+/** Gets encoder's configured use of discontinuous transmission.
+  * @see OPUS_SET_DTX
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>DTX disabled (default).</dd>
+  * <dt>1</dt><dd>DTX enabled.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x)
+/** Configures the depth of signal being encoded.
+  * This is a hint which helps the encoder identify silence and near-silence.
+  * @see OPUS_GET_LSB_DEPTH
+  * @param[in] x <tt>opus_int32</tt>: Input precision in bits, between 8 and 24
+  *                                   (default: 24).
+  * @hideinitializer */
+#define OPUS_SET_LSB_DEPTH(x) OPUS_SET_LSB_DEPTH_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured signal depth.
+  * @see OPUS_SET_LSB_DEPTH
+  * @param[out] x <tt>opus_int32 *</tt>: Input precision in bits, between 8 and
+  *                                      24 (default: 24).
+  * @hideinitializer */
+#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
+  * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
+  * @hideinitializer */
+#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of variable duration frames.
+  * When variable duration is enabled, the encoder is free to use a shorter frame
+  * size than the one requested in the opus_encode*() call.
+  * It is then the user's responsibility
+  * to verify how much audio was encoded by checking the ToC byte of the encoded
+  * packet. The part of the audio that was not encoded needs to be resent to the
+  * encoder for the next call. Do not use this option unless you <b>really</b>
+  * know what you are doing.
+  * @see OPUS_GET_EXPERT_VARIABLE_DURATION
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
+  * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured use of variable duration frames.
+  * @see OPUS_SET_EXPERT_VARIABLE_DURATION
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
+  * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** If set to 1, disables almost all use of prediction, making frames almost
+    completely independent. This reduces quality. (default : 0)
+  * @hideinitializer */
+#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured prediction status.
+  * @hideinitializer */
+#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_genericctls Generic CTLs
+  *
+  * These macros are used with the \c opus_decoder_ctl and
+  * \c opus_encoder_ctl calls to generate a particular
+  * request.
+  *
+  * When called on an \c OpusDecoder they apply to that
+  * particular decoder instance. When called on an
+  * \c OpusEncoder they apply to the corresponding setting
+  * on that encoder instance, if present.
+  *
+  * Some usage examples:
+  *
+  * @code
+  * int ret;
+  * opus_int32 pitch;
+  * ret = opus_decoder_ctl(dec_ctx, OPUS_GET_PITCH(&pitch));
+  * if (ret == OPUS_OK) return ret;
+  *
+  * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE);
+  * opus_decoder_ctl(dec_ctx, OPUS_RESET_STATE);
+  *
+  * opus_int32 enc_bw, dec_bw;
+  * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&enc_bw));
+  * opus_decoder_ctl(dec_ctx, OPUS_GET_BANDWIDTH(&dec_bw));
+  * if (enc_bw != dec_bw) {
+  *   printf("packet bandwidth mismatch!\n");
+  * }
+  * @endcode
+  *
+  * @see opus_encoder, opus_decoder_ctl, opus_encoder_ctl, opus_decoderctls, opus_encoderctls
+  * @{
+  */
+
+/** Resets the codec state to be equivalent to a freshly initialized state.
+  * This should be called when switching streams in order to prevent
+  * the back to back decoding from giving different results from
+  * one at a time decoding.
+  * @hideinitializer */
+#define OPUS_RESET_STATE 4028
+
+/** Gets the final state of the codec's entropy coder.
+  * This is used for testing purposes,
+  * The encoder and decoder state should be identical after coding a payload
+  * (assuming no data corruption or software bugs)
+  *
+  * @param[out] x <tt>opus_uint32 *</tt>: Entropy coder state
+  *
+  * @hideinitializer */
+#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x)
+
+/** Gets the pitch of the last decoded frame, if available.
+  * This can be used for any post-processing algorithm requiring the use of pitch,
+  * e.g. time stretching/shortening. If the last frame was not voiced, or if the
+  * pitch was not coded in the frame, then zero is returned.
+  *
+  * This CTL is only implemented for decoder instances.
+  *
+  * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
+  *
+  * @hideinitializer */
+#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the encoder's configured bandpass or the decoder's last bandpass.
+  * @see OPUS_SET_BANDWIDTH
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt>                    <dd>(default)</dd>
+  * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt>    <dd>4 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt>    <dd>6 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt>      <dd>8 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_FULLBAND</dt>     <dd>20 kHz passband</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_decoderctls Decoder related CTLs
+  * @see opus_genericctls, opus_encoderctls, opus_decoder
+  * @{
+  */
+
+/** Configures decoder gain adjustment.
+  * Scales the decoded output by a factor specified in Q8 dB units.
+  * This has a maximum range of -32768 to 32767 inclusive, and returns
+  * OPUS_BAD_ARG otherwise. The default is zero indicating no adjustment.
+  * This setting survives decoder reset.
+  *
+  * gain = pow(10, x/(20.0*256))
+  *
+  * @param[in] x <tt>opus_int32</tt>:   Amount to scale PCM signal by in Q8 dB units.
+  * @hideinitializer */
+#define OPUS_SET_GAIN(x) OPUS_SET_GAIN_REQUEST, __opus_check_int(x)
+/** Gets the decoder's configured gain adjustment. @see OPUS_SET_GAIN
+  *
+  * @param[out] x <tt>opus_int32 *</tt>: Amount to scale PCM signal by in Q8 dB units.
+  * @hideinitializer */
+#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_libinfo Opus library information functions
+  * @{
+  */
+
+/** Converts an opus error code into a human readable string.
+  *
+  * @param[in] error <tt>int</tt>: Error number
+  * @returns Error string
+  */
+OPUS_EXPORT const char *opus_strerror(int error);
+
+/** Gets the libopus version string.
+  *
+  * @returns Version string
+  */
+OPUS_EXPORT const char *opus_get_version_string(void);
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_DEFINES_H */
diff --git a/src/main/jni/opus/include/opus_multistream.h b/src/main/jni/opus/include/opus_multistream.h
new file mode 100644
index 000000000..ae5997934
--- /dev/null
+++ b/src/main/jni/opus/include/opus_multistream.h
@@ -0,0 +1,660 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus_multistream.h
+ * @brief Opus reference implementation multistream API
+ */
+
+#ifndef OPUS_MULTISTREAM_H
+#define OPUS_MULTISTREAM_H
+
+#include "opus.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @cond OPUS_INTERNAL_DOC */
+
+/** Macros to trigger compilation errors when the wrong types are provided to a
+  * CTL. */
+/**@{*/
+#define __opus_check_encstate_ptr(ptr) ((ptr) + ((ptr) - (OpusEncoder**)(ptr)))
+#define __opus_check_decstate_ptr(ptr) ((ptr) + ((ptr) - (OpusDecoder**)(ptr)))
+/**@}*/
+
+/** These are the actual encoder and decoder CTL ID numbers.
+  * They should not be used directly by applications.
+  * In general, SETs should be even and GETs should be odd.*/
+/**@{*/
+#define OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST 5120
+#define OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST 5122
+/**@}*/
+
+/** @endcond */
+
+/** @defgroup opus_multistream_ctls Multistream specific encoder and decoder CTLs
+  *
+  * These are convenience macros that are specific to the
+  * opus_multistream_encoder_ctl() and opus_multistream_decoder_ctl()
+  * interface.
+  * The CTLs from @ref opus_genericctls, @ref opus_encoderctls, and
+  * @ref opus_decoderctls may be applied to a multistream encoder or decoder as
+  * well.
+  * In addition, you may retrieve the encoder or decoder state for an specific
+  * stream via #OPUS_MULTISTREAM_GET_ENCODER_STATE or
+  * #OPUS_MULTISTREAM_GET_DECODER_STATE and apply CTLs to it individually.
+  */
+/**@{*/
+
+/** Gets the encoder state for an individual stream of a multistream encoder.
+  * @param[in] x <tt>opus_int32</tt>: The index of the stream whose encoder you
+  *                                   wish to retrieve.
+  *                                   This must be non-negative and less than
+  *                                   the <code>streams</code> parameter used
+  *                                   to initialize the encoder.
+  * @param[out] y <tt>OpusEncoder**</tt>: Returns a pointer to the given
+  *                                       encoder state.
+  * @retval OPUS_BAD_ARG The index of the requested stream was out of range.
+  * @hideinitializer
+  */
+#define OPUS_MULTISTREAM_GET_ENCODER_STATE(x,y) OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST, __opus_check_int(x), __opus_check_encstate_ptr(y)
+
+/** Gets the decoder state for an individual stream of a multistream decoder.
+  * @param[in] x <tt>opus_int32</tt>: The index of the stream whose decoder you
+  *                                   wish to retrieve.
+  *                                   This must be non-negative and less than
+  *                                   the <code>streams</code> parameter used
+  *                                   to initialize the decoder.
+  * @param[out] y <tt>OpusDecoder**</tt>: Returns a pointer to the given
+  *                                       decoder state.
+  * @retval OPUS_BAD_ARG The index of the requested stream was out of range.
+  * @hideinitializer
+  */
+#define OPUS_MULTISTREAM_GET_DECODER_STATE(x,y) OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST, __opus_check_int(x), __opus_check_decstate_ptr(y)
+
+/**@}*/
+
+/** @defgroup opus_multistream Opus Multistream API
+  * @{
+  *
+  * The multistream API allows individual Opus streams to be combined into a
+  * single packet, enabling support for up to 255 channels. Unlike an
+  * elementary Opus stream, the encoder and decoder must negotiate the channel
+  * configuration before the decoder can successfully interpret the data in the
+  * packets produced by the encoder. Some basic information, such as packet
+  * duration, can be computed without any special negotiation.
+  *
+  * The format for multistream Opus packets is defined in the
+  * <a href="http://tools.ietf.org/html/draft-terriberry-oggopus">Ogg
+  * encapsulation specification</a> and is based on the self-delimited Opus
+  * framing described in Appendix B of <a href="http://tools.ietf.org/html/rfc6716">RFC 6716</a>.
+  * Normal Opus packets are just a degenerate case of multistream Opus packets,
+  * and can be encoded or decoded with the multistream API by setting
+  * <code>streams</code> to <code>1</code> when initializing the encoder or
+  * decoder.
+  *
+  * Multistream Opus streams can contain up to 255 elementary Opus streams.
+  * These may be either "uncoupled" or "coupled", indicating that the decoder
+  * is configured to decode them to either 1 or 2 channels, respectively.
+  * The streams are ordered so that all coupled streams appear at the
+  * beginning.
+  *
+  * A <code>mapping</code> table defines which decoded channel <code>i</code>
+  * should be used for each input/output (I/O) channel <code>j</code>. This table is
+  * typically provided as an unsigned char array.
+  * Let <code>i = mapping[j]</code> be the index for I/O channel <code>j</code>.
+  * If <code>i < 2*coupled_streams</code>, then I/O channel <code>j</code> is
+  * encoded as the left channel of stream <code>(i/2)</code> if <code>i</code>
+  * is even, or  as the right channel of stream <code>(i/2)</code> if
+  * <code>i</code> is odd. Otherwise, I/O channel <code>j</code> is encoded as
+  * mono in stream <code>(i - coupled_streams)</code>, unless it has the special
+  * value 255, in which case it is omitted from the encoding entirely (the
+  * decoder will reproduce it as silence). Each value <code>i</code> must either
+  * be the special value 255 or be less than <code>streams + coupled_streams</code>.
+  *
+  * The output channels specified by the encoder
+  * should use the
+  * <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+  * channel ordering</a>. A decoder may wish to apply an additional permutation
+  * to the mapping the encoder used to achieve a different output channel
+  * order (e.g. for outputing in WAV order).
+  *
+  * Each multistream packet contains an Opus packet for each stream, and all of
+  * the Opus packets in a single multistream packet must have the same
+  * duration. Therefore the duration of a multistream packet can be extracted
+  * from the TOC sequence of the first stream, which is located at the
+  * beginning of the packet, just like an elementary Opus stream:
+  *
+  * @code
+  * int nb_samples;
+  * int nb_frames;
+  * nb_frames = opus_packet_get_nb_frames(data, len);
+  * if (nb_frames < 1)
+  *   return nb_frames;
+  * nb_samples = opus_packet_get_samples_per_frame(data, 48000) * nb_frames;
+  * @endcode
+  *
+  * The general encoding and decoding process proceeds exactly the same as in
+  * the normal @ref opus_encoder and @ref opus_decoder APIs.
+  * See their documentation for an overview of how to use the corresponding
+  * multistream functions.
+  */
+
+/** Opus multistream encoder state.
+  * This contains the complete state of a multistream Opus encoder.
+  * It is position independent and can be freely copied.
+  * @see opus_multistream_encoder_create
+  * @see opus_multistream_encoder_init
+  */
+typedef struct OpusMSEncoder OpusMSEncoder;
+
+/** Opus multistream decoder state.
+  * This contains the complete state of a multistream Opus decoder.
+  * It is position independent and can be freely copied.
+  * @see opus_multistream_decoder_create
+  * @see opus_multistream_decoder_init
+  */
+typedef struct OpusMSDecoder OpusMSDecoder;
+
+/**\name Multistream encoder functions */
+/**@{*/
+
+/** Gets the size of an OpusMSEncoder structure.
+  * @param streams <tt>int</tt>: The total number of streams to encode from the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+  *                                      to encode.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      encoded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @returns The size in bytes on success, or a negative error code
+  *          (see @ref opus_errorcodes) on error.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_encoder_get_size(
+      int streams,
+      int coupled_streams
+);
+
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_surround_encoder_get_size(
+      int channels,
+      int mapping_family
+);
+
+
+/** Allocates and initializes a multistream encoder state.
+  * Call opus_multistream_encoder_destroy() to release
+  * this object when finished.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels in the input signal.
+  *                               This must be at most 255.
+  *                               It may be greater than the number of
+  *                               coded channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams to encode from the
+  *                              input.
+  *                              This must be no more than the number of channels.
+  * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+  *                                      to encode.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      encoded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than the number of input channels.
+  * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+  *                    encoded channels to input channels, as described in
+  *                    @ref opus_multistream. As an extra constraint, the
+  *                    multistream encoder does not allow encoding coupled
+  *                    streams for which one channel is unused since this
+  *                    is never a good idea.
+  * @param application <tt>int</tt>: The target encoder application.
+  *                                  This must be one of the following:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
+  *                                   code (see @ref opus_errorcodes) on
+  *                                   failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_encoder_create(
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application,
+      int *error
+) OPUS_ARG_NONNULL(5);
+
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_encoder_create(
+      opus_int32 Fs,
+      int channels,
+      int mapping_family,
+      int *streams,
+      int *coupled_streams,
+      unsigned char *mapping,
+      int application,
+      int *error
+) OPUS_ARG_NONNULL(5);
+
+/** Initialize a previously allocated multistream encoder state.
+  * The memory pointed to by \a st must be at least the size returned by
+  * opus_multistream_encoder_get_size().
+  * This is intended for applications which use their own allocator instead of
+  * malloc.
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @see opus_multistream_encoder_create
+  * @see opus_multistream_encoder_get_size
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels in the input signal.
+  *                               This must be at most 255.
+  *                               It may be greater than the number of
+  *                               coded channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams to encode from the
+  *                              input.
+  *                              This must be no more than the number of channels.
+  * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+  *                                      to encode.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      encoded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than the number of input channels.
+  * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+  *                    encoded channels to input channels, as described in
+  *                    @ref opus_multistream. As an extra constraint, the
+  *                    multistream encoder does not allow encoding coupled
+  *                    streams for which one channel is unused since this
+  *                    is never a good idea.
+  * @param application <tt>int</tt>: The target encoder application.
+  *                                  This must be one of the following:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
+  *          on failure.
+  */
+OPUS_EXPORT int opus_multistream_encoder_init(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+OPUS_EXPORT int opus_multistream_surround_encoder_init(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int mapping_family,
+      int *streams,
+      int *coupled_streams,
+      unsigned char *mapping,
+      int application
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+/** Encodes a multistream Opus frame.
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+  * @param[in] pcm <tt>const opus_int16*</tt>: The input signal as interleaved
+  *                                            samples.
+  *                                            This must contain
+  *                                            <code>frame_size*channels</code>
+  *                                            samples.
+  * @param frame_size <tt>int</tt>: Number of samples per channel in the input
+  *                                 signal.
+  *                                 This must be an Opus frame size for the
+  *                                 encoder's sampling rate.
+  *                                 For example, at 48 kHz the permitted values
+  *                                 are 120, 240, 480, 960, 1920, and 2880.
+  *                                 Passing in a duration of less than 10 ms
+  *                                 (480 samples at 48 kHz) will prevent the
+  *                                 encoder from using the LPC or hybrid modes.
+  * @param[out] data <tt>unsigned char*</tt>: Output payload.
+  *                                           This must contain storage for at
+  *                                           least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode(
+    OpusMSEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes a multistream Opus frame from floating point input.
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+  * @param[in] pcm <tt>const float*</tt>: The input signal as interleaved
+  *                                       samples with a normal range of
+  *                                       +/-1.0.
+  *                                       Samples with a range beyond +/-1.0
+  *                                       are supported but will be clipped by
+  *                                       decoders using the integer API and
+  *                                       should only be used if it is known
+  *                                       that the far end supports extended
+  *                                       dynamic range.
+  *                                       This must contain
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: Number of samples per channel in the input
+  *                                 signal.
+  *                                 This must be an Opus frame size for the
+  *                                 encoder's sampling rate.
+  *                                 For example, at 48 kHz the permitted values
+  *                                 are 120, 240, 480, 960, 1920, and 2880.
+  *                                 Passing in a duration of less than 10 ms
+  *                                 (480 samples at 48 kHz) will prevent the
+  *                                 encoder from using the LPC or hybrid modes.
+  * @param[out] data <tt>unsigned char*</tt>: Output payload.
+  *                                           This must contain storage for at
+  *                                           least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode_float(
+      OpusMSEncoder *st,
+      const float *pcm,
+      int frame_size,
+      unsigned char *data,
+      opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Frees an <code>OpusMSEncoder</code> allocated by
+  * opus_multistream_encoder_create().
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to be freed.
+  */
+OPUS_EXPORT void opus_multistream_encoder_destroy(OpusMSEncoder *st);
+
+/** Perform a CTL function on a multistream Opus encoder.
+  *
+  * Generally the request and subsequent arguments are generated by a
+  * convenience macro.
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls,
+  *                @ref opus_encoderctls, or @ref opus_multistream_ctls.
+  * @see opus_genericctls
+  * @see opus_encoderctls
+  * @see opus_multistream_ctls
+  */
+OPUS_EXPORT int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/**@}*/
+
+/**\name Multistream decoder functions */
+/**@{*/
+
+/** Gets the size of an <code>OpusMSDecoder</code> structure.
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @returns The size in bytes on success, or a negative error code
+  *          (see @ref opus_errorcodes) on error.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_decoder_get_size(
+      int streams,
+      int coupled_streams
+);
+
+/** Allocates and initializes a multistream decoder state.
+  * Call opus_multistream_decoder_destroy() to release
+  * this object when finished.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels to output.
+  *                               This must be at most 255.
+  *                               It may be different from the number of coded
+  *                               channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+  *                    coded channels to output channels, as described in
+  *                    @ref opus_multistream.
+  * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
+  *                                   code (see @ref opus_errorcodes) on
+  *                                   failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSDecoder *opus_multistream_decoder_create(
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int *error
+) OPUS_ARG_NONNULL(5);
+
+/** Intialize a previously allocated decoder state object.
+  * The memory pointed to by \a st must be at least the size returned by
+  * opus_multistream_encoder_get_size().
+  * This is intended for applications which use their own allocator instead of
+  * malloc.
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @see opus_multistream_decoder_create
+  * @see opus_multistream_deocder_get_size
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels to output.
+  *                               This must be at most 255.
+  *                               It may be different from the number of coded
+  *                               channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+  *                    coded channels to output channels, as described in
+  *                    @ref opus_multistream.
+  * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
+  *          on failure.
+  */
+OPUS_EXPORT int opus_multistream_decoder_init(
+      OpusMSDecoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+/** Decode a multistream Opus packet.
+  * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+  * @param[in] data <tt>const unsigned char*</tt>: Input payload.
+  *                                                Use a <code>NULL</code>
+  *                                                pointer to indicate packet
+  *                                                loss.
+  * @param len <tt>opus_int32</tt>: Number of bytes in payload.
+  * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
+  *                                       samples.
+  *                                       This must contain room for
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: The number of samples per channel of
+  *                                 available space in \a pcm.
+  *                                 If this is less than the maximum packet duration
+  *                                 (120 ms; 5760 for 48kHz), this function will not be capable
+  *                                 of decoding some packets. In the case of PLC (data==NULL)
+  *                                 or FEC (decode_fec=1), then frame_size needs to be exactly
+  *                                 the duration of audio that is missing, otherwise the
+  *                                 decoder will not be in the optimal state to decode the
+  *                                 next incoming packet. For the PLC and FEC cases, frame_size
+  *                                 <b>must</b> be a multiple of 2.5 ms.
+  * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
+  *                                 forward error correction data be decoded.
+  *                                 If no such data is available, the frame is
+  *                                 decoded as if it were lost.
+  * @returns Number of samples decoded on success or a negative error code
+  *          (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode(
+    OpusMSDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    opus_int16 *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode a multistream Opus packet with floating point output.
+  * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+  * @param[in] data <tt>const unsigned char*</tt>: Input payload.
+  *                                                Use a <code>NULL</code>
+  *                                                pointer to indicate packet
+  *                                                loss.
+  * @param len <tt>opus_int32</tt>: Number of bytes in payload.
+  * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
+  *                                       samples.
+  *                                       This must contain room for
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: The number of samples per channel of
+  *                                 available space in \a pcm.
+  *                                 If this is less than the maximum packet duration
+  *                                 (120 ms; 5760 for 48kHz), this function will not be capable
+  *                                 of decoding some packets. In the case of PLC (data==NULL)
+  *                                 or FEC (decode_fec=1), then frame_size needs to be exactly
+  *                                 the duration of audio that is missing, otherwise the
+  *                                 decoder will not be in the optimal state to decode the
+  *                                 next incoming packet. For the PLC and FEC cases, frame_size
+  *                                 <b>must</b> be a multiple of 2.5 ms.
+  * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
+  *                                 forward error correction data be decoded.
+  *                                 If no such data is available, the frame is
+  *                                 decoded as if it were lost.
+  * @returns Number of samples decoded on success or a negative error code
+  *          (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode_float(
+    OpusMSDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    float *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on a multistream Opus decoder.
+  *
+  * Generally the request and subsequent arguments are generated by a
+  * convenience macro.
+  * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls,
+  *                @ref opus_decoderctls, or @ref opus_multistream_ctls.
+  * @see opus_genericctls
+  * @see opus_decoderctls
+  * @see opus_multistream_ctls
+  */
+OPUS_EXPORT int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/** Frees an <code>OpusMSDecoder</code> allocated by
+  * opus_multistream_decoder_create().
+  * @param st <tt>OpusMSDecoder</tt>: Multistream decoder state to be freed.
+  */
+OPUS_EXPORT void opus_multistream_decoder_destroy(OpusMSDecoder *st);
+
+/**@}*/
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_MULTISTREAM_H */
diff --git a/src/main/jni/opus/include/opus_types.h b/src/main/jni/opus/include/opus_types.h
new file mode 100644
index 000000000..b28e03aea
--- /dev/null
+++ b/src/main/jni/opus/include/opus_types.h
@@ -0,0 +1,159 @@
+/* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */
+/* Modified by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* opus_types.h based on ogg_types.h from libogg */
+
+/**
+   @file opus_types.h
+   @brief Opus reference implementation types
+*/
+#ifndef OPUS_TYPES_H
+#define OPUS_TYPES_H
+
+/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
+#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
+#include <stdint.h>
+
+   typedef int16_t opus_int16;
+   typedef uint16_t opus_uint16;
+   typedef int32_t opus_int32;
+   typedef uint32_t opus_uint32;
+#elif defined(_WIN32)
+
+#  if defined(__CYGWIN__)
+#    include <_G_config.h>
+     typedef _G_int32_t opus_int32;
+     typedef _G_uint32_t opus_uint32;
+     typedef _G_int16 opus_int16;
+     typedef _G_uint16 opus_uint16;
+#  elif defined(__MINGW32__)
+     typedef short opus_int16;
+     typedef unsigned short opus_uint16;
+     typedef int opus_int32;
+     typedef unsigned int opus_uint32;
+#  elif defined(__MWERKS__)
+     typedef int opus_int32;
+     typedef unsigned int opus_uint32;
+     typedef short opus_int16;
+     typedef unsigned short opus_uint16;
+#  else
+     /* MSVC/Borland */
+     typedef __int32 opus_int32;
+     typedef unsigned __int32 opus_uint32;
+     typedef __int16 opus_int16;
+     typedef unsigned __int16 opus_uint16;
+#  endif
+
+#elif defined(__MACOS__)
+
+#  include <sys/types.h>
+   typedef SInt16 opus_int16;
+   typedef UInt16 opus_uint16;
+   typedef SInt32 opus_int32;
+   typedef UInt32 opus_uint32;
+
+#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
+
+#  include <sys/types.h>
+   typedef int16_t opus_int16;
+   typedef u_int16_t opus_uint16;
+   typedef int32_t opus_int32;
+   typedef u_int32_t opus_uint32;
+
+#elif defined(__BEOS__)
+
+   /* Be */
+#  include <inttypes.h>
+   typedef int16 opus_int16;
+   typedef u_int16 opus_uint16;
+   typedef int32_t opus_int32;
+   typedef u_int32_t opus_uint32;
+
+#elif defined (__EMX__)
+
+   /* OS/2 GCC */
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#elif defined (DJGPP)
+
+   /* DJGPP */
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#elif defined(R5900)
+
+   /* PS2 EE */
+   typedef int opus_int32;
+   typedef unsigned opus_uint32;
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+
+#elif defined(__SYMBIAN32__)
+
+   /* Symbian GCC */
+   typedef signed short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef signed int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
+
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef long opus_int32;
+   typedef unsigned long opus_uint32;
+
+#elif defined(CONFIG_TI_C6X)
+
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#else
+
+   /* Give up, take a reasonable guess */
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#endif
+
+#define opus_int         int                     /* used for counters etc; at least 16 bits */
+#define opus_int64       long long
+#define opus_int8        signed char
+
+#define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */
+#define opus_uint64      unsigned long long
+#define opus_uint8       unsigned char
+
+#endif  /* OPUS_TYPES_H */
diff --git a/src/main/jni/opus/ogg/bitwise.c b/src/main/jni/opus/ogg/bitwise.c
new file mode 100644
index 000000000..68aca6754
--- /dev/null
+++ b/src/main/jni/opus/ogg/bitwise.c
@@ -0,0 +1,857 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE Ogg CONTAINER SOURCE CODE.              *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitwise.c 18051 2011-08-04 17:56:39Z giles $
+
+ ********************************************************************/
+
+/* We're 'LSb' endian; if we write a word but read individual bits,
+   then we'll read the lsb first */
+
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <ogg/ogg.h>
+
+#define BUFFER_INCREMENT 256
+
+static const unsigned long mask[]=
+{0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f,
+ 0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff,
+ 0x000003ff,0x000007ff,0x00000fff,0x00001fff,0x00003fff,
+ 0x00007fff,0x0000ffff,0x0001ffff,0x0003ffff,0x0007ffff,
+ 0x000fffff,0x001fffff,0x003fffff,0x007fffff,0x00ffffff,
+ 0x01ffffff,0x03ffffff,0x07ffffff,0x0fffffff,0x1fffffff,
+ 0x3fffffff,0x7fffffff,0xffffffff };
+
+static const unsigned int mask8B[]=
+{0x00,0x80,0xc0,0xe0,0xf0,0xf8,0xfc,0xfe,0xff};
+
+void oggpack_writeinit(oggpack_buffer *b){
+  memset(b,0,sizeof(*b));
+  b->ptr=b->buffer=_ogg_malloc(BUFFER_INCREMENT);
+  b->buffer[0]='\0';
+  b->storage=BUFFER_INCREMENT;
+}
+
+void oggpackB_writeinit(oggpack_buffer *b){
+  oggpack_writeinit(b);
+}
+
+int oggpack_writecheck(oggpack_buffer *b){
+  if(!b->ptr || !b->storage)return -1;
+  return 0;
+}
+
+int oggpackB_writecheck(oggpack_buffer *b){
+  return oggpack_writecheck(b);
+}
+
+void oggpack_writetrunc(oggpack_buffer *b,long bits){
+  long bytes=bits>>3;
+  if(b->ptr){
+    bits-=bytes*8;
+    b->ptr=b->buffer+bytes;
+    b->endbit=bits;
+    b->endbyte=bytes;
+    *b->ptr&=mask[bits];
+  }
+}
+
+void oggpackB_writetrunc(oggpack_buffer *b,long bits){
+  long bytes=bits>>3;
+  if(b->ptr){
+    bits-=bytes*8;
+    b->ptr=b->buffer+bytes;
+    b->endbit=bits;
+    b->endbyte=bytes;
+    *b->ptr&=mask8B[bits];
+  }
+}
+
+/* Takes only up to 32 bits. */
+void oggpack_write(oggpack_buffer *b,unsigned long value,int bits){
+  if(bits<0 || bits>32) goto err;
+  if(b->endbyte>=b->storage-4){
+    void *ret;
+    if(!b->ptr)return;
+    if(b->storage>LONG_MAX-BUFFER_INCREMENT) goto err;
+    ret=_ogg_realloc(b->buffer,b->storage+BUFFER_INCREMENT);
+    if(!ret) goto err;
+    b->buffer=ret;
+    b->storage+=BUFFER_INCREMENT;
+    b->ptr=b->buffer+b->endbyte;
+  }
+
+  value&=mask[bits];
+  bits+=b->endbit;
+
+  b->ptr[0]|=value<<b->endbit;
+
+  if(bits>=8){
+    b->ptr[1]=(unsigned char)(value>>(8-b->endbit));
+    if(bits>=16){
+      b->ptr[2]=(unsigned char)(value>>(16-b->endbit));
+      if(bits>=24){
+        b->ptr[3]=(unsigned char)(value>>(24-b->endbit));
+        if(bits>=32){
+          if(b->endbit)
+            b->ptr[4]=(unsigned char)(value>>(32-b->endbit));
+          else
+            b->ptr[4]=0;
+        }
+      }
+    }
+  }
+
+  b->endbyte+=bits/8;
+  b->ptr+=bits/8;
+  b->endbit=bits&7;
+  return;
+ err:
+  oggpack_writeclear(b);
+}
+
+/* Takes only up to 32 bits. */
+void oggpackB_write(oggpack_buffer *b,unsigned long value,int bits){
+  if(bits<0 || bits>32) goto err;
+  if(b->endbyte>=b->storage-4){
+    void *ret;
+    if(!b->ptr)return;
+    if(b->storage>LONG_MAX-BUFFER_INCREMENT) goto err;
+    ret=_ogg_realloc(b->buffer,b->storage+BUFFER_INCREMENT);
+    if(!ret) goto err;
+    b->buffer=ret;
+    b->storage+=BUFFER_INCREMENT;
+    b->ptr=b->buffer+b->endbyte;
+  }
+
+  value=(value&mask[bits])<<(32-bits);
+  bits+=b->endbit;
+
+  b->ptr[0]|=value>>(24+b->endbit);
+
+  if(bits>=8){
+    b->ptr[1]=(unsigned char)(value>>(16+b->endbit));
+    if(bits>=16){
+      b->ptr[2]=(unsigned char)(value>>(8+b->endbit));
+      if(bits>=24){
+        b->ptr[3]=(unsigned char)(value>>(b->endbit));
+        if(bits>=32){
+          if(b->endbit)
+            b->ptr[4]=(unsigned char)(value<<(8-b->endbit));
+          else
+            b->ptr[4]=0;
+        }
+      }
+    }
+  }
+
+  b->endbyte+=bits/8;
+  b->ptr+=bits/8;
+  b->endbit=bits&7;
+  return;
+ err:
+  oggpack_writeclear(b);
+}
+
+void oggpack_writealign(oggpack_buffer *b){
+  int bits=8-b->endbit;
+  if(bits<8)
+    oggpack_write(b,0,bits);
+}
+
+void oggpackB_writealign(oggpack_buffer *b){
+  int bits=8-b->endbit;
+  if(bits<8)
+    oggpackB_write(b,0,bits);
+}
+
+static void oggpack_writecopy_helper(oggpack_buffer *b,
+                                     void *source,
+                                     long bits,
+                                     void (*w)(oggpack_buffer *,
+                                               unsigned long,
+                                               int),
+                                     int msb){
+  unsigned char *ptr=(unsigned char *)source;
+
+  long bytes=bits/8;
+  bits-=bytes*8;
+
+  if(b->endbit){
+    int i;
+    /* unaligned copy.  Do it the hard way. */
+    for(i=0;i<bytes;i++)
+      w(b,(unsigned long)(ptr[i]),8);
+  }else{
+    /* aligned block copy */
+    if(b->endbyte+bytes+1>=b->storage){
+      void *ret;
+      if(!b->ptr) goto err;
+      if(b->endbyte+bytes+BUFFER_INCREMENT>b->storage) goto err;
+      b->storage=b->endbyte+bytes+BUFFER_INCREMENT;
+      ret=_ogg_realloc(b->buffer,b->storage);
+      if(!ret) goto err;
+      b->buffer=ret;
+      b->ptr=b->buffer+b->endbyte;
+    }
+
+    memmove(b->ptr,source,bytes);
+    b->ptr+=bytes;
+    b->endbyte+=bytes;
+    *b->ptr=0;
+
+  }
+  if(bits){
+    if(msb)
+      w(b,(unsigned long)(ptr[bytes]>>(8-bits)),bits);
+    else
+      w(b,(unsigned long)(ptr[bytes]),bits);
+  }
+  return;
+ err:
+  oggpack_writeclear(b);
+}
+
+void oggpack_writecopy(oggpack_buffer *b,void *source,long bits){
+  oggpack_writecopy_helper(b,source,bits,oggpack_write,0);
+}
+
+void oggpackB_writecopy(oggpack_buffer *b,void *source,long bits){
+  oggpack_writecopy_helper(b,source,bits,oggpackB_write,1);
+}
+
+void oggpack_reset(oggpack_buffer *b){
+  if(!b->ptr)return;
+  b->ptr=b->buffer;
+  b->buffer[0]=0;
+  b->endbit=b->endbyte=0;
+}
+
+void oggpackB_reset(oggpack_buffer *b){
+  oggpack_reset(b);
+}
+
+void oggpack_writeclear(oggpack_buffer *b){
+  if(b->buffer)_ogg_free(b->buffer);
+  memset(b,0,sizeof(*b));
+}
+
+void oggpackB_writeclear(oggpack_buffer *b){
+  oggpack_writeclear(b);
+}
+
+void oggpack_readinit(oggpack_buffer *b,unsigned char *buf,int bytes){
+  memset(b,0,sizeof(*b));
+  b->buffer=b->ptr=buf;
+  b->storage=bytes;
+}
+
+void oggpackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes){
+  oggpack_readinit(b,buf,bytes);
+}
+
+/* Read in bits without advancing the bitptr; bits <= 32 */
+long oggpack_look(oggpack_buffer *b,int bits){
+  unsigned long ret;
+  unsigned long m;
+
+  if(bits<0 || bits>32) return -1;
+  m=mask[bits];
+  bits+=b->endbit;
+
+  if(b->endbyte >= b->storage-4){
+    /* not the main path */
+    if(b->endbyte > b->storage-((bits+7)>>3)) return -1;
+    /* special case to avoid reading b->ptr[0], which might be past the end of
+        the buffer; also skips some useless accounting */
+    else if(!bits)return(0L);
+  }
+
+  ret=b->ptr[0]>>b->endbit;
+  if(bits>8){
+    ret|=b->ptr[1]<<(8-b->endbit);
+    if(bits>16){
+      ret|=b->ptr[2]<<(16-b->endbit);
+      if(bits>24){
+        ret|=b->ptr[3]<<(24-b->endbit);
+        if(bits>32 && b->endbit)
+          ret|=b->ptr[4]<<(32-b->endbit);
+      }
+    }
+  }
+  return(m&ret);
+}
+
+/* Read in bits without advancing the bitptr; bits <= 32 */
+long oggpackB_look(oggpack_buffer *b,int bits){
+  unsigned long ret;
+  int m=32-bits;
+
+  if(m<0 || m>32) return -1;
+  bits+=b->endbit;
+
+  if(b->endbyte >= b->storage-4){
+    /* not the main path */
+    if(b->endbyte > b->storage-((bits+7)>>3)) return -1;
+    /* special case to avoid reading b->ptr[0], which might be past the end of
+        the buffer; also skips some useless accounting */
+    else if(!bits)return(0L);
+  }
+
+  ret=b->ptr[0]<<(24+b->endbit);
+  if(bits>8){
+    ret|=b->ptr[1]<<(16+b->endbit);
+    if(bits>16){
+      ret|=b->ptr[2]<<(8+b->endbit);
+      if(bits>24){
+        ret|=b->ptr[3]<<(b->endbit);
+        if(bits>32 && b->endbit)
+          ret|=b->ptr[4]>>(8-b->endbit);
+      }
+    }
+  }
+  return ((ret&0xffffffff)>>(m>>1))>>((m+1)>>1);
+}
+
+long oggpack_look1(oggpack_buffer *b){
+  if(b->endbyte>=b->storage)return(-1);
+  return((b->ptr[0]>>b->endbit)&1);
+}
+
+long oggpackB_look1(oggpack_buffer *b){
+  if(b->endbyte>=b->storage)return(-1);
+  return((b->ptr[0]>>(7-b->endbit))&1);
+}
+
+void oggpack_adv(oggpack_buffer *b,int bits){
+  bits+=b->endbit;
+
+  if(b->endbyte > b->storage-((bits+7)>>3)) goto overflow;
+
+  b->ptr+=bits/8;
+  b->endbyte+=bits/8;
+  b->endbit=bits&7;
+  return;
+
+ overflow:
+  b->ptr=NULL;
+  b->endbyte=b->storage;
+  b->endbit=1;
+}
+
+void oggpackB_adv(oggpack_buffer *b,int bits){
+  oggpack_adv(b,bits);
+}
+
+void oggpack_adv1(oggpack_buffer *b){
+  if(++(b->endbit)>7){
+    b->endbit=0;
+    b->ptr++;
+    b->endbyte++;
+  }
+}
+
+void oggpackB_adv1(oggpack_buffer *b){
+  oggpack_adv1(b);
+}
+
+/* bits <= 32 */
+long oggpack_read(oggpack_buffer *b,int bits){
+  long ret;
+  unsigned long m;
+
+  if(bits<0 || bits>32) goto err;
+  m=mask[bits];
+  bits+=b->endbit;
+
+  if(b->endbyte >= b->storage-4){
+    /* not the main path */
+    if(b->endbyte > b->storage-((bits+7)>>3)) goto overflow;
+    /* special case to avoid reading b->ptr[0], which might be past the end of
+        the buffer; also skips some useless accounting */
+    else if(!bits)return(0L);
+  }
+
+  ret=b->ptr[0]>>b->endbit;
+  if(bits>8){
+    ret|=b->ptr[1]<<(8-b->endbit);
+    if(bits>16){
+      ret|=b->ptr[2]<<(16-b->endbit);
+      if(bits>24){
+        ret|=b->ptr[3]<<(24-b->endbit);
+        if(bits>32 && b->endbit){
+          ret|=b->ptr[4]<<(32-b->endbit);
+        }
+      }
+    }
+  }
+  ret&=m;
+  b->ptr+=bits/8;
+  b->endbyte+=bits/8;
+  b->endbit=bits&7;
+  return ret;
+
+ overflow:
+ err:
+  b->ptr=NULL;
+  b->endbyte=b->storage;
+  b->endbit=1;
+  return -1L;
+}
+
+/* bits <= 32 */
+long oggpackB_read(oggpack_buffer *b,int bits){
+  long ret;
+  long m=32-bits;
+
+  if(m<0 || m>32) goto err;
+  bits+=b->endbit;
+
+  if(b->endbyte+4>=b->storage){
+    /* not the main path */
+    if(b->endbyte > b->storage-((bits+7)>>3)) goto overflow;
+    /* special case to avoid reading b->ptr[0], which might be past the end of
+        the buffer; also skips some useless accounting */
+    else if(!bits)return(0L);
+  }
+
+  ret=b->ptr[0]<<(24+b->endbit);
+  if(bits>8){
+    ret|=b->ptr[1]<<(16+b->endbit);
+    if(bits>16){
+      ret|=b->ptr[2]<<(8+b->endbit);
+      if(bits>24){
+        ret|=b->ptr[3]<<(b->endbit);
+        if(bits>32 && b->endbit)
+          ret|=b->ptr[4]>>(8-b->endbit);
+      }
+    }
+  }
+  ret=((ret&0xffffffffUL)>>(m>>1))>>((m+1)>>1);
+
+  b->ptr+=bits/8;
+  b->endbyte+=bits/8;
+  b->endbit=bits&7;
+  return ret;
+
+ overflow:
+ err:
+  b->ptr=NULL;
+  b->endbyte=b->storage;
+  b->endbit=1;
+  return -1L;
+}
+
+long oggpack_read1(oggpack_buffer *b){
+  long ret;
+
+  if(b->endbyte >= b->storage) goto overflow;
+  ret=(b->ptr[0]>>b->endbit)&1;
+
+  b->endbit++;
+  if(b->endbit>7){
+    b->endbit=0;
+    b->ptr++;
+    b->endbyte++;
+  }
+  return ret;
+
+ overflow:
+  b->ptr=NULL;
+  b->endbyte=b->storage;
+  b->endbit=1;
+  return -1L;
+}
+
+long oggpackB_read1(oggpack_buffer *b){
+  long ret;
+
+  if(b->endbyte >= b->storage) goto overflow;
+  ret=(b->ptr[0]>>(7-b->endbit))&1;
+
+  b->endbit++;
+  if(b->endbit>7){
+    b->endbit=0;
+    b->ptr++;
+    b->endbyte++;
+  }
+  return ret;
+
+ overflow:
+  b->ptr=NULL;
+  b->endbyte=b->storage;
+  b->endbit=1;
+  return -1L;
+}
+
+long oggpack_bytes(oggpack_buffer *b){
+  return(b->endbyte+(b->endbit+7)/8);
+}
+
+long oggpack_bits(oggpack_buffer *b){
+  return(b->endbyte*8+b->endbit);
+}
+
+long oggpackB_bytes(oggpack_buffer *b){
+  return oggpack_bytes(b);
+}
+
+long oggpackB_bits(oggpack_buffer *b){
+  return oggpack_bits(b);
+}
+
+unsigned char *oggpack_get_buffer(oggpack_buffer *b){
+  return(b->buffer);
+}
+
+unsigned char *oggpackB_get_buffer(oggpack_buffer *b){
+  return oggpack_get_buffer(b);
+}
+
+/* Self test of the bitwise routines; everything else is based on
+   them, so they damned well better be solid. */
+
+#ifdef _V_SELFTEST
+#include <stdio.h>
+
+static int ilog(unsigned int v){
+  int ret=0;
+  while(v){
+    ret++;
+    v>>=1;
+  }
+  return(ret);
+}
+
+oggpack_buffer o;
+oggpack_buffer r;
+
+void report(char *in){
+  fprintf(stderr,"%s",in);
+  exit(1);
+}
+
+void cliptest(unsigned long *b,int vals,int bits,int *comp,int compsize){
+  long bytes,i;
+  unsigned char *buffer;
+
+  oggpack_reset(&o);
+  for(i=0;i<vals;i++)
+    oggpack_write(&o,b[i],bits?bits:ilog(b[i]));
+  buffer=oggpack_get_buffer(&o);
+  bytes=oggpack_bytes(&o);
+  if(bytes!=compsize)report("wrong number of bytes!\n");
+  for(i=0;i<bytes;i++)if(buffer[i]!=comp[i]){
+    for(i=0;i<bytes;i++)fprintf(stderr,"%x %x\n",(int)buffer[i],(int)comp[i]);
+    report("wrote incorrect value!\n");
+  }
+  oggpack_readinit(&r,buffer,bytes);
+  for(i=0;i<vals;i++){
+    int tbit=bits?bits:ilog(b[i]);
+    if(oggpack_look(&r,tbit)==-1)
+      report("out of data!\n");
+    if(oggpack_look(&r,tbit)!=(b[i]&mask[tbit]))
+      report("looked at incorrect value!\n");
+    if(tbit==1)
+      if(oggpack_look1(&r)!=(b[i]&mask[tbit]))
+        report("looked at single bit incorrect value!\n");
+    if(tbit==1){
+      if(oggpack_read1(&r)!=(b[i]&mask[tbit]))
+        report("read incorrect single bit value!\n");
+    }else{
+    if(oggpack_read(&r,tbit)!=(b[i]&mask[tbit]))
+      report("read incorrect value!\n");
+    }
+  }
+  if(oggpack_bytes(&r)!=bytes)report("leftover bytes after read!\n");
+}
+
+void cliptestB(unsigned long *b,int vals,int bits,int *comp,int compsize){
+  long bytes,i;
+  unsigned char *buffer;
+
+  oggpackB_reset(&o);
+  for(i=0;i<vals;i++)
+    oggpackB_write(&o,b[i],bits?bits:ilog(b[i]));
+  buffer=oggpackB_get_buffer(&o);
+  bytes=oggpackB_bytes(&o);
+  if(bytes!=compsize)report("wrong number of bytes!\n");
+  for(i=0;i<bytes;i++)if(buffer[i]!=comp[i]){
+    for(i=0;i<bytes;i++)fprintf(stderr,"%x %x\n",(int)buffer[i],(int)comp[i]);
+    report("wrote incorrect value!\n");
+  }
+  oggpackB_readinit(&r,buffer,bytes);
+  for(i=0;i<vals;i++){
+    int tbit=bits?bits:ilog(b[i]);
+    if(oggpackB_look(&r,tbit)==-1)
+      report("out of data!\n");
+    if(oggpackB_look(&r,tbit)!=(b[i]&mask[tbit]))
+      report("looked at incorrect value!\n");
+    if(tbit==1)
+      if(oggpackB_look1(&r)!=(b[i]&mask[tbit]))
+        report("looked at single bit incorrect value!\n");
+    if(tbit==1){
+      if(oggpackB_read1(&r)!=(b[i]&mask[tbit]))
+        report("read incorrect single bit value!\n");
+    }else{
+    if(oggpackB_read(&r,tbit)!=(b[i]&mask[tbit]))
+      report("read incorrect value!\n");
+    }
+  }
+  if(oggpackB_bytes(&r)!=bytes)report("leftover bytes after read!\n");
+}
+
+int main(void){
+  unsigned char *buffer;
+  long bytes,i;
+  static unsigned long testbuffer1[]=
+    {18,12,103948,4325,543,76,432,52,3,65,4,56,32,42,34,21,1,23,32,546,456,7,
+       567,56,8,8,55,3,52,342,341,4,265,7,67,86,2199,21,7,1,5,1,4};
+  int test1size=43;
+
+  static unsigned long testbuffer2[]=
+    {216531625L,1237861823,56732452,131,3212421,12325343,34547562,12313212,
+       1233432,534,5,346435231,14436467,7869299,76326614,167548585,
+       85525151,0,12321,1,349528352};
+  int test2size=21;
+
+  static unsigned long testbuffer3[]=
+    {1,0,14,0,1,0,12,0,1,0,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,1,1,1,0,0,1,
+       0,1,30,1,1,1,0,0,1,0,0,0,12,0,11,0,1,0,0,1};
+  int test3size=56;
+
+  static unsigned long large[]=
+    {2136531625L,2137861823,56732452,131,3212421,12325343,34547562,12313212,
+       1233432,534,5,2146435231,14436467,7869299,76326614,167548585,
+       85525151,0,12321,1,2146528352};
+
+  int onesize=33;
+  static int one[33]={146,25,44,151,195,15,153,176,233,131,196,65,85,172,47,40,
+                    34,242,223,136,35,222,211,86,171,50,225,135,214,75,172,
+                    223,4};
+  static int oneB[33]={150,101,131,33,203,15,204,216,105,193,156,65,84,85,222,
+                       8,139,145,227,126,34,55,244,171,85,100,39,195,173,18,
+                       245,251,128};
+
+  int twosize=6;
+  static int two[6]={61,255,255,251,231,29};
+  static int twoB[6]={247,63,255,253,249,120};
+
+  int threesize=54;
+  static int three[54]={169,2,232,252,91,132,156,36,89,13,123,176,144,32,254,
+                      142,224,85,59,121,144,79,124,23,67,90,90,216,79,23,83,
+                      58,135,196,61,55,129,183,54,101,100,170,37,127,126,10,
+                      100,52,4,14,18,86,77,1};
+  static int threeB[54]={206,128,42,153,57,8,183,251,13,89,36,30,32,144,183,
+                         130,59,240,121,59,85,223,19,228,180,134,33,107,74,98,
+                         233,253,196,135,63,2,110,114,50,155,90,127,37,170,104,
+                         200,20,254,4,58,106,176,144,0};
+
+  int foursize=38;
+  static int four[38]={18,6,163,252,97,194,104,131,32,1,7,82,137,42,129,11,72,
+                     132,60,220,112,8,196,109,64,179,86,9,137,195,208,122,169,
+                     28,2,133,0,1};
+  static int fourB[38]={36,48,102,83,243,24,52,7,4,35,132,10,145,21,2,93,2,41,
+                        1,219,184,16,33,184,54,149,170,132,18,30,29,98,229,67,
+                        129,10,4,32};
+
+  int fivesize=45;
+  static int five[45]={169,2,126,139,144,172,30,4,80,72,240,59,130,218,73,62,
+                     241,24,210,44,4,20,0,248,116,49,135,100,110,130,181,169,
+                     84,75,159,2,1,0,132,192,8,0,0,18,22};
+  static int fiveB[45]={1,84,145,111,245,100,128,8,56,36,40,71,126,78,213,226,
+                        124,105,12,0,133,128,0,162,233,242,67,152,77,205,77,
+                        172,150,169,129,79,128,0,6,4,32,0,27,9,0};
+
+  int sixsize=7;
+  static int six[7]={17,177,170,242,169,19,148};
+  static int sixB[7]={136,141,85,79,149,200,41};
+
+  /* Test read/write together */
+  /* Later we test against pregenerated bitstreams */
+  oggpack_writeinit(&o);
+
+  fprintf(stderr,"\nSmall preclipped packing (LSb): ");
+  cliptest(testbuffer1,test1size,0,one,onesize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nNull bit call (LSb): ");
+  cliptest(testbuffer3,test3size,0,two,twosize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nLarge preclipped packing (LSb): ");
+  cliptest(testbuffer2,test2size,0,three,threesize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\n32 bit preclipped packing (LSb): ");
+  oggpack_reset(&o);
+  for(i=0;i<test2size;i++)
+    oggpack_write(&o,large[i],32);
+  buffer=oggpack_get_buffer(&o);
+  bytes=oggpack_bytes(&o);
+  oggpack_readinit(&r,buffer,bytes);
+  for(i=0;i<test2size;i++){
+    if(oggpack_look(&r,32)==-1)report("out of data. failed!");
+    if(oggpack_look(&r,32)!=large[i]){
+      fprintf(stderr,"%ld != %ld (%lx!=%lx):",oggpack_look(&r,32),large[i],
+              oggpack_look(&r,32),large[i]);
+      report("read incorrect value!\n");
+    }
+    oggpack_adv(&r,32);
+  }
+  if(oggpack_bytes(&r)!=bytes)report("leftover bytes after read!\n");
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nSmall unclipped packing (LSb): ");
+  cliptest(testbuffer1,test1size,7,four,foursize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nLarge unclipped packing (LSb): ");
+  cliptest(testbuffer2,test2size,17,five,fivesize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nSingle bit unclipped packing (LSb): ");
+  cliptest(testbuffer3,test3size,1,six,sixsize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nTesting read past end (LSb): ");
+  oggpack_readinit(&r,(unsigned char *)"\0\0\0\0\0\0\0\0",8);
+  for(i=0;i<64;i++){
+    if(oggpack_read(&r,1)!=0){
+      fprintf(stderr,"failed; got -1 prematurely.\n");
+      exit(1);
+    }
+  }
+  if(oggpack_look(&r,1)!=-1 ||
+     oggpack_read(&r,1)!=-1){
+      fprintf(stderr,"failed; read past end without -1.\n");
+      exit(1);
+  }
+  oggpack_readinit(&r,(unsigned char *)"\0\0\0\0\0\0\0\0",8);
+  if(oggpack_read(&r,30)!=0 || oggpack_read(&r,16)!=0){
+      fprintf(stderr,"failed 2; got -1 prematurely.\n");
+      exit(1);
+  }
+
+  if(oggpack_look(&r,18)!=0 ||
+     oggpack_look(&r,18)!=0){
+    fprintf(stderr,"failed 3; got -1 prematurely.\n");
+      exit(1);
+  }
+  if(oggpack_look(&r,19)!=-1 ||
+     oggpack_look(&r,19)!=-1){
+    fprintf(stderr,"failed; read past end without -1.\n");
+      exit(1);
+  }
+  if(oggpack_look(&r,32)!=-1 ||
+     oggpack_look(&r,32)!=-1){
+    fprintf(stderr,"failed; read past end without -1.\n");
+      exit(1);
+  }
+  oggpack_writeclear(&o);
+  fprintf(stderr,"ok.\n");
+
+  /********** lazy, cut-n-paste retest with MSb packing ***********/
+
+  /* Test read/write together */
+  /* Later we test against pregenerated bitstreams */
+  oggpackB_writeinit(&o);
+
+  fprintf(stderr,"\nSmall preclipped packing (MSb): ");
+  cliptestB(testbuffer1,test1size,0,oneB,onesize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nNull bit call (MSb): ");
+  cliptestB(testbuffer3,test3size,0,twoB,twosize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nLarge preclipped packing (MSb): ");
+  cliptestB(testbuffer2,test2size,0,threeB,threesize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\n32 bit preclipped packing (MSb): ");
+  oggpackB_reset(&o);
+  for(i=0;i<test2size;i++)
+    oggpackB_write(&o,large[i],32);
+  buffer=oggpackB_get_buffer(&o);
+  bytes=oggpackB_bytes(&o);
+  oggpackB_readinit(&r,buffer,bytes);
+  for(i=0;i<test2size;i++){
+    if(oggpackB_look(&r,32)==-1)report("out of data. failed!");
+    if(oggpackB_look(&r,32)!=large[i]){
+      fprintf(stderr,"%ld != %ld (%lx!=%lx):",oggpackB_look(&r,32),large[i],
+              oggpackB_look(&r,32),large[i]);
+      report("read incorrect value!\n");
+    }
+    oggpackB_adv(&r,32);
+  }
+  if(oggpackB_bytes(&r)!=bytes)report("leftover bytes after read!\n");
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nSmall unclipped packing (MSb): ");
+  cliptestB(testbuffer1,test1size,7,fourB,foursize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nLarge unclipped packing (MSb): ");
+  cliptestB(testbuffer2,test2size,17,fiveB,fivesize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nSingle bit unclipped packing (MSb): ");
+  cliptestB(testbuffer3,test3size,1,sixB,sixsize);
+  fprintf(stderr,"ok.");
+
+  fprintf(stderr,"\nTesting read past end (MSb): ");
+  oggpackB_readinit(&r,(unsigned char *)"\0\0\0\0\0\0\0\0",8);
+  for(i=0;i<64;i++){
+    if(oggpackB_read(&r,1)!=0){
+      fprintf(stderr,"failed; got -1 prematurely.\n");
+      exit(1);
+    }
+  }
+  if(oggpackB_look(&r,1)!=-1 ||
+     oggpackB_read(&r,1)!=-1){
+      fprintf(stderr,"failed; read past end without -1.\n");
+      exit(1);
+  }
+  oggpackB_readinit(&r,(unsigned char *)"\0\0\0\0\0\0\0\0",8);
+  if(oggpackB_read(&r,30)!=0 || oggpackB_read(&r,16)!=0){
+      fprintf(stderr,"failed 2; got -1 prematurely.\n");
+      exit(1);
+  }
+
+  if(oggpackB_look(&r,18)!=0 ||
+     oggpackB_look(&r,18)!=0){
+    fprintf(stderr,"failed 3; got -1 prematurely.\n");
+      exit(1);
+  }
+  if(oggpackB_look(&r,19)!=-1 ||
+     oggpackB_look(&r,19)!=-1){
+    fprintf(stderr,"failed; read past end without -1.\n");
+      exit(1);
+  }
+  if(oggpackB_look(&r,32)!=-1 ||
+     oggpackB_look(&r,32)!=-1){
+    fprintf(stderr,"failed; read past end without -1.\n");
+      exit(1);
+  }
+  oggpackB_writeclear(&o);
+  fprintf(stderr,"ok.\n\n");
+
+
+  return(0);
+}
+#endif  /* _V_SELFTEST */
+
+#undef BUFFER_INCREMENT
diff --git a/src/main/jni/opus/ogg/framing.c b/src/main/jni/opus/ogg/framing.c
new file mode 100644
index 000000000..3a2f0a605
--- /dev/null
+++ b/src/main/jni/opus/ogg/framing.c
@@ -0,0 +1,2111 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE Ogg CONTAINER SOURCE CODE.              *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2010             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: code raw packets into framed OggSquish stream and
+           decode Ogg streams back into raw packets
+ last mod: $Id: framing.c 18758 2013-01-08 16:29:56Z tterribe $
+
+ note: The CRC code is directly derived from public domain code by
+ Ross Williams (ross@guest.adelaide.edu.au).  See docs/framing.html
+ for details.
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <ogg/ogg.h>
+
+/* A complete description of Ogg framing exists in docs/framing.html */
+
+int ogg_page_version(const ogg_page *og){
+  return((int)(og->header[4]));
+}
+
+int ogg_page_continued(const ogg_page *og){
+  return((int)(og->header[5]&0x01));
+}
+
+int ogg_page_bos(const ogg_page *og){
+  return((int)(og->header[5]&0x02));
+}
+
+int ogg_page_eos(const ogg_page *og){
+  return((int)(og->header[5]&0x04));
+}
+
+ogg_int64_t ogg_page_granulepos(const ogg_page *og){
+  unsigned char *page=og->header;
+  ogg_int64_t granulepos=page[13]&(0xff);
+  granulepos= (granulepos<<8)|(page[12]&0xff);
+  granulepos= (granulepos<<8)|(page[11]&0xff);
+  granulepos= (granulepos<<8)|(page[10]&0xff);
+  granulepos= (granulepos<<8)|(page[9]&0xff);
+  granulepos= (granulepos<<8)|(page[8]&0xff);
+  granulepos= (granulepos<<8)|(page[7]&0xff);
+  granulepos= (granulepos<<8)|(page[6]&0xff);
+  return(granulepos);
+}
+
+int ogg_page_serialno(const ogg_page *og){
+  return(og->header[14] |
+         (og->header[15]<<8) |
+         (og->header[16]<<16) |
+         (og->header[17]<<24));
+}
+
+long ogg_page_pageno(const ogg_page *og){
+  return(og->header[18] |
+         (og->header[19]<<8) |
+         (og->header[20]<<16) |
+         (og->header[21]<<24));
+}
+
+
+
+/* returns the number of packets that are completed on this page (if
+   the leading packet is begun on a previous page, but ends on this
+   page, it's counted */
+
+/* NOTE:
+   If a page consists of a packet begun on a previous page, and a new
+   packet begun (but not completed) on this page, the return will be:
+     ogg_page_packets(page)   ==1,
+     ogg_page_continued(page) !=0
+
+   If a page happens to be a single packet that was begun on a
+   previous page, and spans to the next page (in the case of a three or
+   more page packet), the return will be:
+     ogg_page_packets(page)   ==0,
+     ogg_page_continued(page) !=0
+*/
+
+int ogg_page_packets(const ogg_page *og){
+  int i,n=og->header[26],count=0;
+  for(i=0;i<n;i++)
+    if(og->header[27+i]<255)count++;
+  return(count);
+}
+
+
+#if 0
+/* helper to initialize lookup for direct-table CRC (illustrative; we
+   use the static init below) */
+
+static ogg_uint32_t _ogg_crc_entry(unsigned long index){
+  int           i;
+  unsigned long r;
+
+  r = index << 24;
+  for (i=0; i<8; i++)
+    if (r & 0x80000000UL)
+      r = (r << 1) ^ 0x04c11db7; /* The same as the ethernet generator
+                                    polynomial, although we use an
+                                    unreflected alg and an init/final
+                                    of 0, not 0xffffffff */
+    else
+       r<<=1;
+ return (r & 0xffffffffUL);
+}
+#endif
+
+static const ogg_uint32_t crc_lookup[256]={
+  0x00000000,0x04c11db7,0x09823b6e,0x0d4326d9,
+  0x130476dc,0x17c56b6b,0x1a864db2,0x1e475005,
+  0x2608edb8,0x22c9f00f,0x2f8ad6d6,0x2b4bcb61,
+  0x350c9b64,0x31cd86d3,0x3c8ea00a,0x384fbdbd,
+  0x4c11db70,0x48d0c6c7,0x4593e01e,0x4152fda9,
+  0x5f15adac,0x5bd4b01b,0x569796c2,0x52568b75,
+  0x6a1936c8,0x6ed82b7f,0x639b0da6,0x675a1011,
+  0x791d4014,0x7ddc5da3,0x709f7b7a,0x745e66cd,
+  0x9823b6e0,0x9ce2ab57,0x91a18d8e,0x95609039,
+  0x8b27c03c,0x8fe6dd8b,0x82a5fb52,0x8664e6e5,
+  0xbe2b5b58,0xbaea46ef,0xb7a96036,0xb3687d81,
+  0xad2f2d84,0xa9ee3033,0xa4ad16ea,0xa06c0b5d,
+  0xd4326d90,0xd0f37027,0xddb056fe,0xd9714b49,
+  0xc7361b4c,0xc3f706fb,0xceb42022,0xca753d95,
+  0xf23a8028,0xf6fb9d9f,0xfbb8bb46,0xff79a6f1,
+  0xe13ef6f4,0xe5ffeb43,0xe8bccd9a,0xec7dd02d,
+  0x34867077,0x30476dc0,0x3d044b19,0x39c556ae,
+  0x278206ab,0x23431b1c,0x2e003dc5,0x2ac12072,
+  0x128e9dcf,0x164f8078,0x1b0ca6a1,0x1fcdbb16,
+  0x018aeb13,0x054bf6a4,0x0808d07d,0x0cc9cdca,
+  0x7897ab07,0x7c56b6b0,0x71159069,0x75d48dde,
+  0x6b93dddb,0x6f52c06c,0x6211e6b5,0x66d0fb02,
+  0x5e9f46bf,0x5a5e5b08,0x571d7dd1,0x53dc6066,
+  0x4d9b3063,0x495a2dd4,0x44190b0d,0x40d816ba,
+  0xaca5c697,0xa864db20,0xa527fdf9,0xa1e6e04e,
+  0xbfa1b04b,0xbb60adfc,0xb6238b25,0xb2e29692,
+  0x8aad2b2f,0x8e6c3698,0x832f1041,0x87ee0df6,
+  0x99a95df3,0x9d684044,0x902b669d,0x94ea7b2a,
+  0xe0b41de7,0xe4750050,0xe9362689,0xedf73b3e,
+  0xf3b06b3b,0xf771768c,0xfa325055,0xfef34de2,
+  0xc6bcf05f,0xc27dede8,0xcf3ecb31,0xcbffd686,
+  0xd5b88683,0xd1799b34,0xdc3abded,0xd8fba05a,
+  0x690ce0ee,0x6dcdfd59,0x608edb80,0x644fc637,
+  0x7a089632,0x7ec98b85,0x738aad5c,0x774bb0eb,
+  0x4f040d56,0x4bc510e1,0x46863638,0x42472b8f,
+  0x5c007b8a,0x58c1663d,0x558240e4,0x51435d53,
+  0x251d3b9e,0x21dc2629,0x2c9f00f0,0x285e1d47,
+  0x36194d42,0x32d850f5,0x3f9b762c,0x3b5a6b9b,
+  0x0315d626,0x07d4cb91,0x0a97ed48,0x0e56f0ff,
+  0x1011a0fa,0x14d0bd4d,0x19939b94,0x1d528623,
+  0xf12f560e,0xf5ee4bb9,0xf8ad6d60,0xfc6c70d7,
+  0xe22b20d2,0xe6ea3d65,0xeba91bbc,0xef68060b,
+  0xd727bbb6,0xd3e6a601,0xdea580d8,0xda649d6f,
+  0xc423cd6a,0xc0e2d0dd,0xcda1f604,0xc960ebb3,
+  0xbd3e8d7e,0xb9ff90c9,0xb4bcb610,0xb07daba7,
+  0xae3afba2,0xaafbe615,0xa7b8c0cc,0xa379dd7b,
+  0x9b3660c6,0x9ff77d71,0x92b45ba8,0x9675461f,
+  0x8832161a,0x8cf30bad,0x81b02d74,0x857130c3,
+  0x5d8a9099,0x594b8d2e,0x5408abf7,0x50c9b640,
+  0x4e8ee645,0x4a4ffbf2,0x470cdd2b,0x43cdc09c,
+  0x7b827d21,0x7f436096,0x7200464f,0x76c15bf8,
+  0x68860bfd,0x6c47164a,0x61043093,0x65c52d24,
+  0x119b4be9,0x155a565e,0x18197087,0x1cd86d30,
+  0x029f3d35,0x065e2082,0x0b1d065b,0x0fdc1bec,
+  0x3793a651,0x3352bbe6,0x3e119d3f,0x3ad08088,
+  0x2497d08d,0x2056cd3a,0x2d15ebe3,0x29d4f654,
+  0xc5a92679,0xc1683bce,0xcc2b1d17,0xc8ea00a0,
+  0xd6ad50a5,0xd26c4d12,0xdf2f6bcb,0xdbee767c,
+  0xe3a1cbc1,0xe760d676,0xea23f0af,0xeee2ed18,
+  0xf0a5bd1d,0xf464a0aa,0xf9278673,0xfde69bc4,
+  0x89b8fd09,0x8d79e0be,0x803ac667,0x84fbdbd0,
+  0x9abc8bd5,0x9e7d9662,0x933eb0bb,0x97ffad0c,
+  0xafb010b1,0xab710d06,0xa6322bdf,0xa2f33668,
+  0xbcb4666d,0xb8757bda,0xb5365d03,0xb1f740b4};
+
+/* init the encode/decode logical stream state */
+
+int ogg_stream_init(ogg_stream_state *os,int serialno){
+  if(os){
+    memset(os,0,sizeof(*os));
+    os->body_storage=16*1024;
+    os->lacing_storage=1024;
+
+    os->body_data=_ogg_malloc(os->body_storage*sizeof(*os->body_data));
+    os->lacing_vals=_ogg_malloc(os->lacing_storage*sizeof(*os->lacing_vals));
+    os->granule_vals=_ogg_malloc(os->lacing_storage*sizeof(*os->granule_vals));
+
+    if(!os->body_data || !os->lacing_vals || !os->granule_vals){
+      ogg_stream_clear(os);
+      return -1;
+    }
+
+    os->serialno=serialno;
+
+    return(0);
+  }
+  return(-1);
+}
+
+/* async/delayed error detection for the ogg_stream_state */
+int ogg_stream_check(ogg_stream_state *os){
+  if(!os || !os->body_data) return -1;
+  return 0;
+}
+
+/* _clear does not free os, only the non-flat storage within */
+int ogg_stream_clear(ogg_stream_state *os){
+  if(os){
+    if(os->body_data)_ogg_free(os->body_data);
+    if(os->lacing_vals)_ogg_free(os->lacing_vals);
+    if(os->granule_vals)_ogg_free(os->granule_vals);
+
+    memset(os,0,sizeof(*os));
+  }
+  return(0);
+}
+
+int ogg_stream_destroy(ogg_stream_state *os){
+  if(os){
+    ogg_stream_clear(os);
+    _ogg_free(os);
+  }
+  return(0);
+}
+
+/* Helpers for ogg_stream_encode; this keeps the structure and
+   what's happening fairly clear */
+
+static int _os_body_expand(ogg_stream_state *os,long needed){
+  if(os->body_storage-needed<=os->body_fill){
+    long body_storage;
+    void *ret;
+    if(os->body_storage>LONG_MAX-needed){
+      ogg_stream_clear(os);
+      return -1;
+    }
+    body_storage=os->body_storage+needed;
+    if(body_storage<LONG_MAX-1024)body_storage+=1024;
+    ret=_ogg_realloc(os->body_data,body_storage*sizeof(*os->body_data));
+    if(!ret){
+      ogg_stream_clear(os);
+      return -1;
+    }
+    os->body_storage=body_storage;
+    os->body_data=ret;
+  }
+  return 0;
+}
+
+static int _os_lacing_expand(ogg_stream_state *os,long needed){
+  if(os->lacing_storage-needed<=os->lacing_fill){
+    long lacing_storage;
+    void *ret;
+    if(os->lacing_storage>LONG_MAX-needed){
+      ogg_stream_clear(os);
+      return -1;
+    }
+    lacing_storage=os->lacing_storage+needed;
+    if(lacing_storage<LONG_MAX-32)lacing_storage+=32;
+    ret=_ogg_realloc(os->lacing_vals,lacing_storage*sizeof(*os->lacing_vals));
+    if(!ret){
+      ogg_stream_clear(os);
+      return -1;
+    }
+    os->lacing_vals=ret;
+    ret=_ogg_realloc(os->granule_vals,lacing_storage*
+                     sizeof(*os->granule_vals));
+    if(!ret){
+      ogg_stream_clear(os);
+      return -1;
+    }
+    os->granule_vals=ret;
+    os->lacing_storage=lacing_storage;
+  }
+  return 0;
+}
+
+/* checksum the page */
+/* Direct table CRC; note that this will be faster in the future if we
+   perform the checksum simultaneously with other copies */
+
+void ogg_page_checksum_set(ogg_page *og){
+  if(og){
+    ogg_uint32_t crc_reg=0;
+    int i;
+
+    /* safety; needed for API behavior, but not framing code */
+    og->header[22]=0;
+    og->header[23]=0;
+    og->header[24]=0;
+    og->header[25]=0;
+
+    for(i=0;i<og->header_len;i++)
+      crc_reg=(crc_reg<<8)^crc_lookup[((crc_reg >> 24)&0xff)^og->header[i]];
+    for(i=0;i<og->body_len;i++)
+      crc_reg=(crc_reg<<8)^crc_lookup[((crc_reg >> 24)&0xff)^og->body[i]];
+
+    og->header[22]=(unsigned char)(crc_reg&0xff);
+    og->header[23]=(unsigned char)((crc_reg>>8)&0xff);
+    og->header[24]=(unsigned char)((crc_reg>>16)&0xff);
+    og->header[25]=(unsigned char)((crc_reg>>24)&0xff);
+  }
+}
+
+/* submit data to the internal buffer of the framing engine */
+int ogg_stream_iovecin(ogg_stream_state *os, ogg_iovec_t *iov, int count,
+                       long e_o_s, ogg_int64_t granulepos){
+
+  long bytes = 0, lacing_vals;
+  int i;
+
+  if(ogg_stream_check(os)) return -1;
+  if(!iov) return 0;
+
+  for (i = 0; i < count; ++i){
+    if(iov[i].iov_len>LONG_MAX) return -1;
+    if(bytes>LONG_MAX-(long)iov[i].iov_len) return -1;
+    bytes += (long)iov[i].iov_len;
+  }
+  lacing_vals=bytes/255+1;
+
+  if(os->body_returned){
+    /* advance packet data according to the body_returned pointer. We
+       had to keep it around to return a pointer into the buffer last
+       call */
+
+    os->body_fill-=os->body_returned;
+    if(os->body_fill)
+      memmove(os->body_data,os->body_data+os->body_returned,
+              os->body_fill);
+    os->body_returned=0;
+  }
+
+  /* make sure we have the buffer storage */
+  if(_os_body_expand(os,bytes) || _os_lacing_expand(os,lacing_vals))
+    return -1;
+
+  /* Copy in the submitted packet.  Yes, the copy is a waste; this is
+     the liability of overly clean abstraction for the time being.  It
+     will actually be fairly easy to eliminate the extra copy in the
+     future */
+
+  for (i = 0; i < count; ++i) {
+    memcpy(os->body_data+os->body_fill, iov[i].iov_base, iov[i].iov_len);
+    os->body_fill += (int)iov[i].iov_len;
+  }
+
+  /* Store lacing vals for this packet */
+  for(i=0;i<lacing_vals-1;i++){
+    os->lacing_vals[os->lacing_fill+i]=255;
+    os->granule_vals[os->lacing_fill+i]=os->granulepos;
+  }
+  os->lacing_vals[os->lacing_fill+i]=bytes%255;
+  os->granulepos=os->granule_vals[os->lacing_fill+i]=granulepos;
+
+  /* flag the first segment as the beginning of the packet */
+  os->lacing_vals[os->lacing_fill]|= 0x100;
+
+  os->lacing_fill+=lacing_vals;
+
+  /* for the sake of completeness */
+  os->packetno++;
+
+  if(e_o_s)os->e_o_s=1;
+
+  return(0);
+}
+
+int ogg_stream_packetin(ogg_stream_state *os,ogg_packet *op){
+  ogg_iovec_t iov;
+  iov.iov_base = op->packet;
+  iov.iov_len = op->bytes;
+  return ogg_stream_iovecin(os, &iov, 1, op->e_o_s, op->granulepos);
+}
+
+/* Conditionally flush a page; force==0 will only flush nominal-size
+   pages, force==1 forces us to flush a page regardless of page size
+   so long as there's any data available at all. */
+static int ogg_stream_flush_i(ogg_stream_state *os,ogg_page *og, int force, int nfill){
+  int i;
+  int vals=0;
+  int maxvals=(os->lacing_fill>255?255:os->lacing_fill);
+  int bytes=0;
+  long acc=0;
+  ogg_int64_t granule_pos=-1;
+
+  if(ogg_stream_check(os)) return(0);
+  if(maxvals==0) return(0);
+
+  /* construct a page */
+  /* decide how many segments to include */
+
+  /* If this is the initial header case, the first page must only include
+     the initial header packet */
+  if(os->b_o_s==0){  /* 'initial header page' case */
+    granule_pos=0;
+    for(vals=0;vals<maxvals;vals++){
+      if((os->lacing_vals[vals]&0x0ff)<255){
+        vals++;
+        break;
+      }
+    }
+  }else{
+
+    /* The extra packets_done, packet_just_done logic here attempts to do two things:
+       1) Don't unneccessarily span pages.
+       2) Unless necessary, don't flush pages if there are less than four packets on
+          them; this expands page size to reduce unneccessary overhead if incoming packets
+          are large.
+       These are not necessary behaviors, just 'always better than naive flushing'
+       without requiring an application to explicitly request a specific optimized
+       behavior. We'll want an explicit behavior setup pathway eventually as well. */
+
+    int packets_done=0;
+    int packet_just_done=0;
+    for(vals=0;vals<maxvals;vals++){
+      if(acc>nfill && packet_just_done>=4){
+        force=1;
+        break;
+      }
+      acc+=os->lacing_vals[vals]&0x0ff;
+      if((os->lacing_vals[vals]&0xff)<255){
+        granule_pos=os->granule_vals[vals];
+        packet_just_done=++packets_done;
+      }else
+        packet_just_done=0;
+    }
+    if(vals==255)force=1;
+  }
+
+  if(!force) return(0);
+
+  /* construct the header in temp storage */
+  memcpy(os->header,"OggS",4);
+
+  /* stream structure version */
+  os->header[4]=0x00;
+
+  /* continued packet flag? */
+  os->header[5]=0x00;
+  if((os->lacing_vals[0]&0x100)==0)os->header[5]|=0x01;
+  /* first page flag? */
+  if(os->b_o_s==0)os->header[5]|=0x02;
+  /* last page flag? */
+  if(os->e_o_s && os->lacing_fill==vals)os->header[5]|=0x04;
+  os->b_o_s=1;
+
+  /* 64 bits of PCM position */
+  for(i=6;i<14;i++){
+    os->header[i]=(unsigned char)(granule_pos&0xff);
+    granule_pos>>=8;
+  }
+
+  /* 32 bits of stream serial number */
+  {
+    long serialno=os->serialno;
+    for(i=14;i<18;i++){
+      os->header[i]=(unsigned char)(serialno&0xff);
+      serialno>>=8;
+    }
+  }
+
+  /* 32 bits of page counter (we have both counter and page header
+     because this val can roll over) */
+  if(os->pageno==-1)os->pageno=0; /* because someone called
+                                     stream_reset; this would be a
+                                     strange thing to do in an
+                                     encode stream, but it has
+                                     plausible uses */
+  {
+    long pageno=os->pageno++;
+    for(i=18;i<22;i++){
+      os->header[i]=(unsigned char)(pageno&0xff);
+      pageno>>=8;
+    }
+  }
+
+  /* zero for computation; filled in later */
+  os->header[22]=0;
+  os->header[23]=0;
+  os->header[24]=0;
+  os->header[25]=0;
+
+  /* segment table */
+  os->header[26]=(unsigned char)(vals&0xff);
+  for(i=0;i<vals;i++)
+    bytes+=os->header[i+27]=(unsigned char)(os->lacing_vals[i]&0xff);
+
+  /* set pointers in the ogg_page struct */
+  og->header=os->header;
+  og->header_len=os->header_fill=vals+27;
+  og->body=os->body_data+os->body_returned;
+  og->body_len=bytes;
+
+  /* advance the lacing data and set the body_returned pointer */
+
+  os->lacing_fill-=vals;
+  memmove(os->lacing_vals,os->lacing_vals+vals,os->lacing_fill*sizeof(*os->lacing_vals));
+  memmove(os->granule_vals,os->granule_vals+vals,os->lacing_fill*sizeof(*os->granule_vals));
+  os->body_returned+=bytes;
+
+  /* calculate the checksum */
+
+  ogg_page_checksum_set(og);
+
+  /* done */
+  return(1);
+}
+
+/* This will flush remaining packets into a page (returning nonzero),
+   even if there is not enough data to trigger a flush normally
+   (undersized page). If there are no packets or partial packets to
+   flush, ogg_stream_flush returns 0.  Note that ogg_stream_flush will
+   try to flush a normal sized page like ogg_stream_pageout; a call to
+   ogg_stream_flush does not guarantee that all packets have flushed.
+   Only a return value of 0 from ogg_stream_flush indicates all packet
+   data is flushed into pages.
+
+   since ogg_stream_flush will flush the last page in a stream even if
+   it's undersized, you almost certainly want to use ogg_stream_pageout
+   (and *not* ogg_stream_flush) unless you specifically need to flush
+   a page regardless of size in the middle of a stream. */
+
+int ogg_stream_flush(ogg_stream_state *os,ogg_page *og){
+  return ogg_stream_flush_i(os,og,1,4096);
+}
+
+/* Like the above, but an argument is provided to adjust the nominal
+   page size for applications which are smart enough to provide their
+   own delay based flushing */
+
+int ogg_stream_flush_fill(ogg_stream_state *os,ogg_page *og, int nfill){
+  return ogg_stream_flush_i(os,og,1,nfill);
+}
+
+/* This constructs pages from buffered packet segments.  The pointers
+returned are to static buffers; do not free. The returned buffers are
+good only until the next call (using the same ogg_stream_state) */
+
+int ogg_stream_pageout(ogg_stream_state *os, ogg_page *og){
+  int force=0;
+  if(ogg_stream_check(os)) return 0;
+
+  if((os->e_o_s&&os->lacing_fill) ||          /* 'were done, now flush' case */
+     (os->lacing_fill&&!os->b_o_s))           /* 'initial header page' case */
+    force=1;
+
+  return(ogg_stream_flush_i(os,og,force,4096));
+}
+
+/* Like the above, but an argument is provided to adjust the nominal
+page size for applications which are smart enough to provide their
+own delay based flushing */
+
+int ogg_stream_pageout_fill(ogg_stream_state *os, ogg_page *og, int nfill){
+  int force=0;
+  if(ogg_stream_check(os)) return 0;
+
+  if((os->e_o_s&&os->lacing_fill) ||          /* 'were done, now flush' case */
+     (os->lacing_fill&&!os->b_o_s))           /* 'initial header page' case */
+    force=1;
+
+  return(ogg_stream_flush_i(os,og,force,nfill));
+}
+
+int ogg_stream_eos(ogg_stream_state *os){
+  if(ogg_stream_check(os)) return 1;
+  return os->e_o_s;
+}
+
+/* DECODING PRIMITIVES: packet streaming layer **********************/
+
+/* This has two layers to place more of the multi-serialno and paging
+   control in the application's hands.  First, we expose a data buffer
+   using ogg_sync_buffer().  The app either copies into the
+   buffer, or passes it directly to read(), etc.  We then call
+   ogg_sync_wrote() to tell how many bytes we just added.
+
+   Pages are returned (pointers into the buffer in ogg_sync_state)
+   by ogg_sync_pageout().  The page is then submitted to
+   ogg_stream_pagein() along with the appropriate
+   ogg_stream_state* (ie, matching serialno).  We then get raw
+   packets out calling ogg_stream_packetout() with a
+   ogg_stream_state. */
+
+/* initialize the struct to a known state */
+int ogg_sync_init(ogg_sync_state *oy){
+  if(oy){
+    oy->storage = -1; /* used as a readiness flag */
+    memset(oy,0,sizeof(*oy));
+  }
+  return(0);
+}
+
+/* clear non-flat storage within */
+int ogg_sync_clear(ogg_sync_state *oy){
+  if(oy){
+    if(oy->data)_ogg_free(oy->data);
+    memset(oy,0,sizeof(*oy));
+  }
+  return(0);
+}
+
+int ogg_sync_destroy(ogg_sync_state *oy){
+  if(oy){
+    ogg_sync_clear(oy);
+    _ogg_free(oy);
+  }
+  return(0);
+}
+
+int ogg_sync_check(ogg_sync_state *oy){
+  if(oy->storage<0) return -1;
+  return 0;
+}
+
+char *ogg_sync_buffer(ogg_sync_state *oy, long size){
+  if(ogg_sync_check(oy)) return NULL;
+
+  /* first, clear out any space that has been previously returned */
+  if(oy->returned){
+    oy->fill-=oy->returned;
+    if(oy->fill>0)
+      memmove(oy->data,oy->data+oy->returned,oy->fill);
+    oy->returned=0;
+  }
+
+  if(size>oy->storage-oy->fill){
+    /* We need to extend the internal buffer */
+    long newsize=size+oy->fill+4096; /* an extra page to be nice */
+    void *ret;
+
+    if(oy->data)
+      ret=_ogg_realloc(oy->data,newsize);
+    else
+      ret=_ogg_malloc(newsize);
+    if(!ret){
+      ogg_sync_clear(oy);
+      return NULL;
+    }
+    oy->data=ret;
+    oy->storage=newsize;
+  }
+
+  /* expose a segment at least as large as requested at the fill mark */
+  return((char *)oy->data+oy->fill);
+}
+
+int ogg_sync_wrote(ogg_sync_state *oy, long bytes){
+  if(ogg_sync_check(oy))return -1;
+  if(oy->fill+bytes>oy->storage)return -1;
+  oy->fill+=bytes;
+  return(0);
+}
+
+/* sync the stream.  This is meant to be useful for finding page
+   boundaries.
+
+   return values for this:
+  -n) skipped n bytes
+   0) page not ready; more data (no bytes skipped)
+   n) page synced at current location; page length n bytes
+
+*/
+
+long ogg_sync_pageseek(ogg_sync_state *oy,ogg_page *og){
+  unsigned char *page=oy->data+oy->returned;
+  unsigned char *next;
+  long bytes=oy->fill-oy->returned;
+
+  if(ogg_sync_check(oy))return 0;
+
+  if(oy->headerbytes==0){
+    int headerbytes,i;
+    if(bytes<27)return(0); /* not enough for a header */
+
+    /* verify capture pattern */
+    if(memcmp(page,"OggS",4))goto sync_fail;
+
+    headerbytes=page[26]+27;
+    if(bytes<headerbytes)return(0); /* not enough for header + seg table */
+
+    /* count up body length in the segment table */
+
+    for(i=0;i<page[26];i++)
+      oy->bodybytes+=page[27+i];
+    oy->headerbytes=headerbytes;
+  }
+
+  if(oy->bodybytes+oy->headerbytes>bytes)return(0);
+
+  /* The whole test page is buffered.  Verify the checksum */
+  {
+    /* Grab the checksum bytes, set the header field to zero */
+    char chksum[4];
+    ogg_page log;
+
+    memcpy(chksum,page+22,4);
+    memset(page+22,0,4);
+
+    /* set up a temp page struct and recompute the checksum */
+    log.header=page;
+    log.header_len=oy->headerbytes;
+    log.body=page+oy->headerbytes;
+    log.body_len=oy->bodybytes;
+    ogg_page_checksum_set(&log);
+
+    /* Compare */
+    if(memcmp(chksum,page+22,4)){
+      /* D'oh.  Mismatch! Corrupt page (or miscapture and not a page
+         at all) */
+      /* replace the computed checksum with the one actually read in */
+      memcpy(page+22,chksum,4);
+
+      /* Bad checksum. Lose sync */
+      goto sync_fail;
+    }
+  }
+
+  /* yes, have a whole page all ready to go */
+  {
+    unsigned char *page=oy->data+oy->returned;
+    long bytes;
+
+    if(og){
+      og->header=page;
+      og->header_len=oy->headerbytes;
+      og->body=page+oy->headerbytes;
+      og->body_len=oy->bodybytes;
+    }
+
+    oy->unsynced=0;
+    oy->returned+=(bytes=oy->headerbytes+oy->bodybytes);
+    oy->headerbytes=0;
+    oy->bodybytes=0;
+    return(bytes);
+  }
+
+ sync_fail:
+
+  oy->headerbytes=0;
+  oy->bodybytes=0;
+
+  /* search for possible capture */
+  next=memchr(page+1,'O',bytes-1);
+  if(!next)
+    next=oy->data+oy->fill;
+
+  oy->returned=(int)(next-oy->data);
+  return((long)-(next-page));
+}
+
+/* sync the stream and get a page.  Keep trying until we find a page.
+   Suppress 'sync errors' after reporting the first.
+
+   return values:
+   -1) recapture (hole in data)
+    0) need more data
+    1) page returned
+
+   Returns pointers into buffered data; invalidated by next call to
+   _stream, _clear, _init, or _buffer */
+
+int ogg_sync_pageout(ogg_sync_state *oy, ogg_page *og){
+
+  if(ogg_sync_check(oy))return 0;
+
+  /* all we need to do is verify a page at the head of the stream
+     buffer.  If it doesn't verify, we look for the next potential
+     frame */
+
+  for(;;){
+    long ret=ogg_sync_pageseek(oy,og);
+    if(ret>0){
+      /* have a page */
+      return(1);
+    }
+    if(ret==0){
+      /* need more data */
+      return(0);
+    }
+
+    /* head did not start a synced page... skipped some bytes */
+    if(!oy->unsynced){
+      oy->unsynced=1;
+      return(-1);
+    }
+
+    /* loop. keep looking */
+
+  }
+}
+
+/* add the incoming page to the stream state; we decompose the page
+   into packet segments here as well. */
+
+int ogg_stream_pagein(ogg_stream_state *os, ogg_page *og){
+  unsigned char *header=og->header;
+  unsigned char *body=og->body;
+  long           bodysize=og->body_len;
+  int            segptr=0;
+
+  int version=ogg_page_version(og);
+  int continued=ogg_page_continued(og);
+  int bos=ogg_page_bos(og);
+  int eos=ogg_page_eos(og);
+  ogg_int64_t granulepos=ogg_page_granulepos(og);
+  int serialno=ogg_page_serialno(og);
+  long pageno=ogg_page_pageno(og);
+  int segments=header[26];
+
+  if(ogg_stream_check(os)) return -1;
+
+  /* clean up 'returned data' */
+  {
+    long lr=os->lacing_returned;
+    long br=os->body_returned;
+
+    /* body data */
+    if(br){
+      os->body_fill-=br;
+      if(os->body_fill)
+        memmove(os->body_data,os->body_data+br,os->body_fill);
+      os->body_returned=0;
+    }
+
+    if(lr){
+      /* segment table */
+      if(os->lacing_fill-lr){
+        memmove(os->lacing_vals,os->lacing_vals+lr,
+                (os->lacing_fill-lr)*sizeof(*os->lacing_vals));
+        memmove(os->granule_vals,os->granule_vals+lr,
+                (os->lacing_fill-lr)*sizeof(*os->granule_vals));
+      }
+      os->lacing_fill-=lr;
+      os->lacing_packet-=lr;
+      os->lacing_returned=0;
+    }
+  }
+
+  /* check the serial number */
+  if(serialno!=os->serialno)return(-1);
+  if(version>0)return(-1);
+
+  if(_os_lacing_expand(os,segments+1)) return -1;
+
+  /* are we in sequence? */
+  if(pageno!=os->pageno){
+    int i;
+
+    /* unroll previous partial packet (if any) */
+    for(i=os->lacing_packet;i<os->lacing_fill;i++)
+      os->body_fill-=os->lacing_vals[i]&0xff;
+    os->lacing_fill=os->lacing_packet;
+
+    /* make a note of dropped data in segment table */
+    if(os->pageno!=-1){
+      os->lacing_vals[os->lacing_fill++]=0x400;
+      os->lacing_packet++;
+    }
+  }
+
+  /* are we a 'continued packet' page?  If so, we may need to skip
+     some segments */
+  if(continued){
+    if(os->lacing_fill<1 ||
+       os->lacing_vals[os->lacing_fill-1]==0x400){
+      bos=0;
+      for(;segptr<segments;segptr++){
+        int val=header[27+segptr];
+        body+=val;
+        bodysize-=val;
+        if(val<255){
+          segptr++;
+          break;
+        }
+      }
+    }
+  }
+
+  if(bodysize){
+    if(_os_body_expand(os,bodysize)) return -1;
+    memcpy(os->body_data+os->body_fill,body,bodysize);
+    os->body_fill+=bodysize;
+  }
+
+  {
+    int saved=-1;
+    while(segptr<segments){
+      int val=header[27+segptr];
+      os->lacing_vals[os->lacing_fill]=val;
+      os->granule_vals[os->lacing_fill]=-1;
+
+      if(bos){
+        os->lacing_vals[os->lacing_fill]|=0x100;
+        bos=0;
+      }
+
+      if(val<255)saved=os->lacing_fill;
+
+      os->lacing_fill++;
+      segptr++;
+
+      if(val<255)os->lacing_packet=os->lacing_fill;
+    }
+
+    /* set the granulepos on the last granuleval of the last full packet */
+    if(saved!=-1){
+      os->granule_vals[saved]=granulepos;
+    }
+
+  }
+
+  if(eos){
+    os->e_o_s=1;
+    if(os->lacing_fill>0)
+      os->lacing_vals[os->lacing_fill-1]|=0x200;
+  }
+
+  os->pageno=pageno+1;
+
+  return(0);
+}
+
+/* clear things to an initial state.  Good to call, eg, before seeking */
+int ogg_sync_reset(ogg_sync_state *oy){
+  if(ogg_sync_check(oy))return -1;
+
+  oy->fill=0;
+  oy->returned=0;
+  oy->unsynced=0;
+  oy->headerbytes=0;
+  oy->bodybytes=0;
+  return(0);
+}
+
+int ogg_stream_reset(ogg_stream_state *os){
+  if(ogg_stream_check(os)) return -1;
+
+  os->body_fill=0;
+  os->body_returned=0;
+
+  os->lacing_fill=0;
+  os->lacing_packet=0;
+  os->lacing_returned=0;
+
+  os->header_fill=0;
+
+  os->e_o_s=0;
+  os->b_o_s=0;
+  os->pageno=-1;
+  os->packetno=0;
+  os->granulepos=0;
+
+  return(0);
+}
+
+int ogg_stream_reset_serialno(ogg_stream_state *os,int serialno){
+  if(ogg_stream_check(os)) return -1;
+  ogg_stream_reset(os);
+  os->serialno=serialno;
+  return(0);
+}
+
+static int _packetout(ogg_stream_state *os,ogg_packet *op,int adv){
+
+  /* The last part of decode. We have the stream broken into packet
+     segments.  Now we need to group them into packets (or return the
+     out of sync markers) */
+
+  int ptr=os->lacing_returned;
+
+  if(os->lacing_packet<=ptr)return(0);
+
+  if(os->lacing_vals[ptr]&0x400){
+    /* we need to tell the codec there's a gap; it might need to
+       handle previous packet dependencies. */
+    os->lacing_returned++;
+    os->packetno++;
+    return(-1);
+  }
+
+  if(!op && !adv)return(1); /* just using peek as an inexpensive way
+                               to ask if there's a whole packet
+                               waiting */
+
+  /* Gather the whole packet. We'll have no holes or a partial packet */
+  {
+    int size=os->lacing_vals[ptr]&0xff;
+    long bytes=size;
+    int eos=os->lacing_vals[ptr]&0x200; /* last packet of the stream? */
+    int bos=os->lacing_vals[ptr]&0x100; /* first packet of the stream? */
+
+    while(size==255){
+      int val=os->lacing_vals[++ptr];
+      size=val&0xff;
+      if(val&0x200)eos=0x200;
+      bytes+=size;
+    }
+
+    if(op){
+      op->e_o_s=eos;
+      op->b_o_s=bos;
+      op->packet=os->body_data+os->body_returned;
+      op->packetno=os->packetno;
+      op->granulepos=os->granule_vals[ptr];
+      op->bytes=bytes;
+    }
+
+    if(adv){
+      os->body_returned+=bytes;
+      os->lacing_returned=ptr+1;
+      os->packetno++;
+    }
+  }
+  return(1);
+}
+
+int ogg_stream_packetout(ogg_stream_state *os,ogg_packet *op){
+  if(ogg_stream_check(os)) return 0;
+  return _packetout(os,op,1);
+}
+
+int ogg_stream_packetpeek(ogg_stream_state *os,ogg_packet *op){
+  if(ogg_stream_check(os)) return 0;
+  return _packetout(os,op,0);
+}
+
+void ogg_packet_clear(ogg_packet *op) {
+  _ogg_free(op->packet);
+  memset(op, 0, sizeof(*op));
+}
+
+#ifdef _V_SELFTEST
+#include <stdio.h>
+
+ogg_stream_state os_en, os_de;
+ogg_sync_state oy;
+
+void checkpacket(ogg_packet *op,long len, int no, long pos){
+  long j;
+  static int sequence=0;
+  static int lastno=0;
+
+  if(op->bytes!=len){
+    fprintf(stderr,"incorrect packet length (%ld != %ld)!\n",op->bytes,len);
+    exit(1);
+  }
+  if(op->granulepos!=pos){
+    fprintf(stderr,"incorrect packet granpos (%ld != %ld)!\n",(long)op->granulepos,pos);
+    exit(1);
+  }
+
+  /* packet number just follows sequence/gap; adjust the input number
+     for that */
+  if(no==0){
+    sequence=0;
+  }else{
+    sequence++;
+    if(no>lastno+1)
+      sequence++;
+  }
+  lastno=no;
+  if(op->packetno!=sequence){
+    fprintf(stderr,"incorrect packet sequence %ld != %d\n",
+            (long)(op->packetno),sequence);
+    exit(1);
+  }
+
+  /* Test data */
+  for(j=0;j<op->bytes;j++)
+    if(op->packet[j]!=((j+no)&0xff)){
+      fprintf(stderr,"body data mismatch (1) at pos %ld: %x!=%lx!\n\n",
+              j,op->packet[j],(j+no)&0xff);
+      exit(1);
+    }
+}
+
+void check_page(unsigned char *data,const int *header,ogg_page *og){
+  long j;
+  /* Test data */
+  for(j=0;j<og->body_len;j++)
+    if(og->body[j]!=data[j]){
+      fprintf(stderr,"body data mismatch (2) at pos %ld: %x!=%x!\n\n",
+              j,data[j],og->body[j]);
+      exit(1);
+    }
+
+  /* Test header */
+  for(j=0;j<og->header_len;j++){
+    if(og->header[j]!=header[j]){
+      fprintf(stderr,"header content mismatch at pos %ld:\n",j);
+      for(j=0;j<header[26]+27;j++)
+        fprintf(stderr," (%ld)%02x:%02x",j,header[j],og->header[j]);
+      fprintf(stderr,"\n");
+      exit(1);
+    }
+  }
+  if(og->header_len!=header[26]+27){
+    fprintf(stderr,"header length incorrect! (%ld!=%d)\n",
+            og->header_len,header[26]+27);
+    exit(1);
+  }
+}
+
+void print_header(ogg_page *og){
+  int j;
+  fprintf(stderr,"\nHEADER:\n");
+  fprintf(stderr,"  capture: %c %c %c %c  version: %d  flags: %x\n",
+          og->header[0],og->header[1],og->header[2],og->header[3],
+          (int)og->header[4],(int)og->header[5]);
+
+  fprintf(stderr,"  granulepos: %d  serialno: %d  pageno: %ld\n",
+          (og->header[9]<<24)|(og->header[8]<<16)|
+          (og->header[7]<<8)|og->header[6],
+          (og->header[17]<<24)|(og->header[16]<<16)|
+          (og->header[15]<<8)|og->header[14],
+          ((long)(og->header[21])<<24)|(og->header[20]<<16)|
+          (og->header[19]<<8)|og->header[18]);
+
+  fprintf(stderr,"  checksum: %02x:%02x:%02x:%02x\n  segments: %d (",
+          (int)og->header[22],(int)og->header[23],
+          (int)og->header[24],(int)og->header[25],
+          (int)og->header[26]);
+
+  for(j=27;j<og->header_len;j++)
+    fprintf(stderr,"%d ",(int)og->header[j]);
+  fprintf(stderr,")\n\n");
+}
+
+void copy_page(ogg_page *og){
+  unsigned char *temp=_ogg_malloc(og->header_len);
+  memcpy(temp,og->header,og->header_len);
+  og->header=temp;
+
+  temp=_ogg_malloc(og->body_len);
+  memcpy(temp,og->body,og->body_len);
+  og->body=temp;
+}
+
+void free_page(ogg_page *og){
+  _ogg_free (og->header);
+  _ogg_free (og->body);
+}
+
+void error(void){
+  fprintf(stderr,"error!\n");
+  exit(1);
+}
+
+/* 17 only */
+const int head1_0[] = {0x4f,0x67,0x67,0x53,0,0x06,
+                       0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,0,0,0,0,
+                       0x15,0xed,0xec,0x91,
+                       1,
+                       17};
+
+/* 17, 254, 255, 256, 500, 510, 600 byte, pad */
+const int head1_1[] = {0x4f,0x67,0x67,0x53,0,0x02,
+                       0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,0,0,0,0,
+                       0x59,0x10,0x6c,0x2c,
+                       1,
+                       17};
+const int head2_1[] = {0x4f,0x67,0x67,0x53,0,0x04,
+                       0x07,0x18,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,1,0,0,0,
+                       0x89,0x33,0x85,0xce,
+                       13,
+                       254,255,0,255,1,255,245,255,255,0,
+                       255,255,90};
+
+/* nil packets; beginning,middle,end */
+const int head1_2[] = {0x4f,0x67,0x67,0x53,0,0x02,
+                       0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,0,0,0,0,
+                       0xff,0x7b,0x23,0x17,
+                       1,
+                       0};
+const int head2_2[] = {0x4f,0x67,0x67,0x53,0,0x04,
+                       0x07,0x28,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,1,0,0,0,
+                       0x5c,0x3f,0x66,0xcb,
+                       17,
+                       17,254,255,0,0,255,1,0,255,245,255,255,0,
+                       255,255,90,0};
+
+/* large initial packet */
+const int head1_3[] = {0x4f,0x67,0x67,0x53,0,0x02,
+                       0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,0,0,0,0,
+                       0x01,0x27,0x31,0xaa,
+                       18,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,255,10};
+
+const int head2_3[] = {0x4f,0x67,0x67,0x53,0,0x04,
+                       0x07,0x08,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,1,0,0,0,
+                       0x7f,0x4e,0x8a,0xd2,
+                       4,
+                       255,4,255,0};
+
+
+/* continuing packet test */
+const int head1_4[] = {0x4f,0x67,0x67,0x53,0,0x02,
+                       0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,0,0,0,0,
+                       0xff,0x7b,0x23,0x17,
+                       1,
+                       0};
+
+const int head2_4[] = {0x4f,0x67,0x67,0x53,0,0x00,
+                       0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+                       0x01,0x02,0x03,0x04,1,0,0,0,
+                       0xf8,0x3c,0x19,0x79,
+                       255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255};
+
+const int head3_4[] = {0x4f,0x67,0x67,0x53,0,0x05,
+                       0x07,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,2,0,0,0,
+                       0x38,0xe6,0xb6,0x28,
+                       6,
+                       255,220,255,4,255,0};
+
+
+/* spill expansion test */
+const int head1_4b[] = {0x4f,0x67,0x67,0x53,0,0x02,
+                        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                        0x01,0x02,0x03,0x04,0,0,0,0,
+                        0xff,0x7b,0x23,0x17,
+                        1,
+                        0};
+
+const int head2_4b[] = {0x4f,0x67,0x67,0x53,0,0x00,
+                        0x07,0x10,0x00,0x00,0x00,0x00,0x00,0x00,
+                        0x01,0x02,0x03,0x04,1,0,0,0,
+                        0xce,0x8f,0x17,0x1a,
+                        23,
+                        255,255,255,255,255,255,255,255,
+                        255,255,255,255,255,255,255,255,255,10,255,4,255,0,0};
+
+
+const int head3_4b[] = {0x4f,0x67,0x67,0x53,0,0x04,
+                        0x07,0x14,0x00,0x00,0x00,0x00,0x00,0x00,
+                        0x01,0x02,0x03,0x04,2,0,0,0,
+                        0x9b,0xb2,0x50,0xa1,
+                        1,
+                        0};
+
+/* page with the 255 segment limit */
+const int head1_5[] = {0x4f,0x67,0x67,0x53,0,0x02,
+                       0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,0,0,0,0,
+                       0xff,0x7b,0x23,0x17,
+                       1,
+                       0};
+
+const int head2_5[] = {0x4f,0x67,0x67,0x53,0,0x00,
+                       0x07,0xfc,0x03,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,1,0,0,0,
+                       0xed,0x2a,0x2e,0xa7,
+                       255,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10,10,
+                       10,10,10,10,10,10,10};
+
+const int head3_5[] = {0x4f,0x67,0x67,0x53,0,0x04,
+                       0x07,0x00,0x04,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,2,0,0,0,
+                       0x6c,0x3b,0x82,0x3d,
+                       1,
+                       50};
+
+
+/* packet that overspans over an entire page */
+const int head1_6[] = {0x4f,0x67,0x67,0x53,0,0x02,
+                       0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,0,0,0,0,
+                       0xff,0x7b,0x23,0x17,
+                       1,
+                       0};
+
+const int head2_6[] = {0x4f,0x67,0x67,0x53,0,0x00,
+                       0x07,0x04,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,1,0,0,0,
+                       0x68,0x22,0x7c,0x3d,
+                       255,
+                       100,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255};
+
+const int head3_6[] = {0x4f,0x67,0x67,0x53,0,0x01,
+                       0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+                       0x01,0x02,0x03,0x04,2,0,0,0,
+                       0xf4,0x87,0xba,0xf3,
+                       255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255};
+
+const int head4_6[] = {0x4f,0x67,0x67,0x53,0,0x05,
+                       0x07,0x10,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,3,0,0,0,
+                       0xf7,0x2f,0x6c,0x60,
+                       5,
+                       254,255,4,255,0};
+
+/* packet that overspans over an entire page */
+const int head1_7[] = {0x4f,0x67,0x67,0x53,0,0x02,
+                       0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,0,0,0,0,
+                       0xff,0x7b,0x23,0x17,
+                       1,
+                       0};
+
+const int head2_7[] = {0x4f,0x67,0x67,0x53,0,0x00,
+                       0x07,0x04,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,1,0,0,0,
+                       0x68,0x22,0x7c,0x3d,
+                       255,
+                       100,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255,255,255,
+                       255,255,255,255,255,255};
+
+const int head3_7[] = {0x4f,0x67,0x67,0x53,0,0x05,
+                       0x07,0x08,0x00,0x00,0x00,0x00,0x00,0x00,
+                       0x01,0x02,0x03,0x04,2,0,0,0,
+                       0xd4,0xe0,0x60,0xe5,
+                       1,
+                       0};
+
+void test_pack(const int *pl, const int **headers, int byteskip,
+               int pageskip, int packetskip){
+  unsigned char *data=_ogg_malloc(1024*1024); /* for scripted test cases only */
+  long inptr=0;
+  long outptr=0;
+  long deptr=0;
+  long depacket=0;
+  long granule_pos=7,pageno=0;
+  int i,j,packets,pageout=pageskip;
+  int eosflag=0;
+  int bosflag=0;
+
+  int byteskipcount=0;
+
+  ogg_stream_reset(&os_en);
+  ogg_stream_reset(&os_de);
+  ogg_sync_reset(&oy);
+
+  for(packets=0;packets<packetskip;packets++)
+    depacket+=pl[packets];
+
+  for(packets=0;;packets++)if(pl[packets]==-1)break;
+
+  for(i=0;i<packets;i++){
+    /* construct a test packet */
+    ogg_packet op;
+    int len=pl[i];
+
+    op.packet=data+inptr;
+    op.bytes=len;
+    op.e_o_s=(pl[i+1]<0?1:0);
+    op.granulepos=granule_pos;
+
+    granule_pos+=1024;
+
+    for(j=0;j<len;j++)data[inptr++]=i+j;
+
+    /* submit the test packet */
+    ogg_stream_packetin(&os_en,&op);
+
+    /* retrieve any finished pages */
+    {
+      ogg_page og;
+
+      while(ogg_stream_pageout(&os_en,&og)){
+        /* We have a page.  Check it carefully */
+
+        fprintf(stderr,"%ld, ",pageno);
+
+        if(headers[pageno]==NULL){
+          fprintf(stderr,"coded too many pages!\n");
+          exit(1);
+        }
+
+        check_page(data+outptr,headers[pageno],&og);
+
+        outptr+=og.body_len;
+        pageno++;
+        if(pageskip){
+          bosflag=1;
+          pageskip--;
+          deptr+=og.body_len;
+        }
+
+        /* have a complete page; submit it to sync/decode */
+
+        {
+          ogg_page og_de;
+          ogg_packet op_de,op_de2;
+          char *buf=ogg_sync_buffer(&oy,og.header_len+og.body_len);
+          char *next=buf;
+          byteskipcount+=og.header_len;
+          if(byteskipcount>byteskip){
+            memcpy(next,og.header,byteskipcount-byteskip);
+            next+=byteskipcount-byteskip;
+            byteskipcount=byteskip;
+          }
+
+          byteskipcount+=og.body_len;
+          if(byteskipcount>byteskip){
+            memcpy(next,og.body,byteskipcount-byteskip);
+            next+=byteskipcount-byteskip;
+            byteskipcount=byteskip;
+          }
+
+          ogg_sync_wrote(&oy,next-buf);
+
+          while(1){
+            int ret=ogg_sync_pageout(&oy,&og_de);
+            if(ret==0)break;
+            if(ret<0)continue;
+            /* got a page.  Happy happy.  Verify that it's good. */
+
+            fprintf(stderr,"(%d), ",pageout);
+
+            check_page(data+deptr,headers[pageout],&og_de);
+            deptr+=og_de.body_len;
+            pageout++;
+
+            /* submit it to deconstitution */
+            ogg_stream_pagein(&os_de,&og_de);
+
+            /* packets out? */
+            while(ogg_stream_packetpeek(&os_de,&op_de2)>0){
+              ogg_stream_packetpeek(&os_de,NULL);
+              ogg_stream_packetout(&os_de,&op_de); /* just catching them all */
+
+              /* verify peek and out match */
+              if(memcmp(&op_de,&op_de2,sizeof(op_de))){
+                fprintf(stderr,"packetout != packetpeek! pos=%ld\n",
+                        depacket);
+                exit(1);
+              }
+
+              /* verify the packet! */
+              /* check data */
+              if(memcmp(data+depacket,op_de.packet,op_de.bytes)){
+                fprintf(stderr,"packet data mismatch in decode! pos=%ld\n",
+                        depacket);
+                exit(1);
+              }
+              /* check bos flag */
+              if(bosflag==0 && op_de.b_o_s==0){
+                fprintf(stderr,"b_o_s flag not set on packet!\n");
+                exit(1);
+              }
+              if(bosflag && op_de.b_o_s){
+                fprintf(stderr,"b_o_s flag incorrectly set on packet!\n");
+                exit(1);
+              }
+              bosflag=1;
+              depacket+=op_de.bytes;
+
+              /* check eos flag */
+              if(eosflag){
+                fprintf(stderr,"Multiple decoded packets with eos flag!\n");
+                exit(1);
+              }
+
+              if(op_de.e_o_s)eosflag=1;
+
+              /* check granulepos flag */
+              if(op_de.granulepos!=-1){
+                fprintf(stderr," granule:%ld ",(long)op_de.granulepos);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  _ogg_free(data);
+  if(headers[pageno]!=NULL){
+    fprintf(stderr,"did not write last page!\n");
+    exit(1);
+  }
+  if(headers[pageout]!=NULL){
+    fprintf(stderr,"did not decode last page!\n");
+    exit(1);
+  }
+  if(inptr!=outptr){
+    fprintf(stderr,"encoded page data incomplete!\n");
+    exit(1);
+  }
+  if(inptr!=deptr){
+    fprintf(stderr,"decoded page data incomplete!\n");
+    exit(1);
+  }
+  if(inptr!=depacket){
+    fprintf(stderr,"decoded packet data incomplete!\n");
+    exit(1);
+  }
+  if(!eosflag){
+    fprintf(stderr,"Never got a packet with EOS set!\n");
+    exit(1);
+  }
+  fprintf(stderr,"ok.\n");
+}
+
+int main(void){
+
+  ogg_stream_init(&os_en,0x04030201);
+  ogg_stream_init(&os_de,0x04030201);
+  ogg_sync_init(&oy);
+
+  /* Exercise each code path in the framing code.  Also verify that
+     the checksums are working.  */
+
+  {
+    /* 17 only */
+    const int packets[]={17, -1};
+    const int *headret[]={head1_0,NULL};
+
+    fprintf(stderr,"testing single page encoding... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+  {
+    /* 17, 254, 255, 256, 500, 510, 600 byte, pad */
+    const int packets[]={17, 254, 255, 256, 500, 510, 600, -1};
+    const int *headret[]={head1_1,head2_1,NULL};
+
+    fprintf(stderr,"testing basic page encoding... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+  {
+    /* nil packets; beginning,middle,end */
+    const int packets[]={0,17, 254, 255, 0, 256, 0, 500, 510, 600, 0, -1};
+    const int *headret[]={head1_2,head2_2,NULL};
+
+    fprintf(stderr,"testing basic nil packets... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+  {
+    /* large initial packet */
+    const int packets[]={4345,259,255,-1};
+    const int *headret[]={head1_3,head2_3,NULL};
+
+    fprintf(stderr,"testing initial-packet lacing > 4k... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+  {
+    /* continuing packet test; with page spill expansion, we have to
+       overflow the lacing table. */
+    const int packets[]={0,65500,259,255,-1};
+    const int *headret[]={head1_4,head2_4,head3_4,NULL};
+
+    fprintf(stderr,"testing single packet page span... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+  {
+    /* spill expand packet test */
+    const int packets[]={0,4345,259,255,0,0,-1};
+    const int *headret[]={head1_4b,head2_4b,head3_4b,NULL};
+
+    fprintf(stderr,"testing page spill expansion... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+  /* page with the 255 segment limit */
+  {
+
+    const int packets[]={0,10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,10,
+                   10,10,10,10,10,10,10,50,-1};
+    const int *headret[]={head1_5,head2_5,head3_5,NULL};
+
+    fprintf(stderr,"testing max packet segments... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+  {
+    /* packet that overspans over an entire page */
+    const int packets[]={0,100,130049,259,255,-1};
+    const int *headret[]={head1_6,head2_6,head3_6,head4_6,NULL};
+
+    fprintf(stderr,"testing very large packets... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+  {
+    /* test for the libogg 1.1.1 resync in large continuation bug
+       found by Josh Coalson)  */
+    const int packets[]={0,100,130049,259,255,-1};
+    const int *headret[]={head1_6,head2_6,head3_6,head4_6,NULL};
+
+    fprintf(stderr,"testing continuation resync in very large packets... ");
+    test_pack(packets,headret,100,2,3);
+  }
+
+  {
+    /* term only page.  why not? */
+    const int packets[]={0,100,64770,-1};
+    const int *headret[]={head1_7,head2_7,head3_7,NULL};
+
+    fprintf(stderr,"testing zero data page (1 nil packet)... ");
+    test_pack(packets,headret,0,0,0);
+  }
+
+
+
+  {
+    /* build a bunch of pages for testing */
+    unsigned char *data=_ogg_malloc(1024*1024);
+    int pl[]={0, 1,1,98,4079, 1,1,2954,2057, 76,34,912,0,234,1000,1000, 1000,300,-1};
+    int inptr=0,i,j;
+    ogg_page og[5];
+
+    ogg_stream_reset(&os_en);
+
+    for(i=0;pl[i]!=-1;i++){
+      ogg_packet op;
+      int len=pl[i];
+
+      op.packet=data+inptr;
+      op.bytes=len;
+      op.e_o_s=(pl[i+1]<0?1:0);
+      op.granulepos=(i+1)*1000;
+
+      for(j=0;j<len;j++)data[inptr++]=i+j;
+      ogg_stream_packetin(&os_en,&op);
+    }
+
+    _ogg_free(data);
+
+    /* retrieve finished pages */
+    for(i=0;i<5;i++){
+      if(ogg_stream_pageout(&os_en,&og[i])==0){
+        fprintf(stderr,"Too few pages output building sync tests!\n");
+        exit(1);
+      }
+      copy_page(&og[i]);
+    }
+
+    /* Test lost pages on pagein/packetout: no rollback */
+    {
+      ogg_page temp;
+      ogg_packet test;
+
+      fprintf(stderr,"Testing loss of pages... ");
+
+      ogg_sync_reset(&oy);
+      ogg_stream_reset(&os_de);
+      for(i=0;i<5;i++){
+        memcpy(ogg_sync_buffer(&oy,og[i].header_len),og[i].header,
+               og[i].header_len);
+        ogg_sync_wrote(&oy,og[i].header_len);
+        memcpy(ogg_sync_buffer(&oy,og[i].body_len),og[i].body,og[i].body_len);
+        ogg_sync_wrote(&oy,og[i].body_len);
+      }
+
+      ogg_sync_pageout(&oy,&temp);
+      ogg_stream_pagein(&os_de,&temp);
+      ogg_sync_pageout(&oy,&temp);
+      ogg_stream_pagein(&os_de,&temp);
+      ogg_sync_pageout(&oy,&temp);
+      /* skip */
+      ogg_sync_pageout(&oy,&temp);
+      ogg_stream_pagein(&os_de,&temp);
+
+      /* do we get the expected results/packets? */
+
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,0,0,0);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,1,1,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,1,2,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,98,3,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,4079,4,5000);
+      if(ogg_stream_packetout(&os_de,&test)!=-1){
+        fprintf(stderr,"Error: loss of page did not return error\n");
+        exit(1);
+      }
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,76,9,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,34,10,-1);
+      fprintf(stderr,"ok.\n");
+    }
+
+    /* Test lost pages on pagein/packetout: rollback with continuation */
+    {
+      ogg_page temp;
+      ogg_packet test;
+
+      fprintf(stderr,"Testing loss of pages (rollback required)... ");
+
+      ogg_sync_reset(&oy);
+      ogg_stream_reset(&os_de);
+      for(i=0;i<5;i++){
+        memcpy(ogg_sync_buffer(&oy,og[i].header_len),og[i].header,
+               og[i].header_len);
+        ogg_sync_wrote(&oy,og[i].header_len);
+        memcpy(ogg_sync_buffer(&oy,og[i].body_len),og[i].body,og[i].body_len);
+        ogg_sync_wrote(&oy,og[i].body_len);
+      }
+
+      ogg_sync_pageout(&oy,&temp);
+      ogg_stream_pagein(&os_de,&temp);
+      ogg_sync_pageout(&oy,&temp);
+      ogg_stream_pagein(&os_de,&temp);
+      ogg_sync_pageout(&oy,&temp);
+      ogg_stream_pagein(&os_de,&temp);
+      ogg_sync_pageout(&oy,&temp);
+      /* skip */
+      ogg_sync_pageout(&oy,&temp);
+      ogg_stream_pagein(&os_de,&temp);
+
+      /* do we get the expected results/packets? */
+
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,0,0,0);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,1,1,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,1,2,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,98,3,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,4079,4,5000);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,1,5,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,1,6,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,2954,7,-1);
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,2057,8,9000);
+      if(ogg_stream_packetout(&os_de,&test)!=-1){
+        fprintf(stderr,"Error: loss of page did not return error\n");
+        exit(1);
+      }
+      if(ogg_stream_packetout(&os_de,&test)!=1)error();
+      checkpacket(&test,300,17,18000);
+      fprintf(stderr,"ok.\n");
+    }
+
+    /* the rest only test sync */
+    {
+      ogg_page og_de;
+      /* Test fractional page inputs: incomplete capture */
+      fprintf(stderr,"Testing sync on partial inputs... ");
+      ogg_sync_reset(&oy);
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header,
+             3);
+      ogg_sync_wrote(&oy,3);
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+
+      /* Test fractional page inputs: incomplete fixed header */
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header+3,
+             20);
+      ogg_sync_wrote(&oy,20);
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+
+      /* Test fractional page inputs: incomplete header */
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header+23,
+             5);
+      ogg_sync_wrote(&oy,5);
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+
+      /* Test fractional page inputs: incomplete body */
+
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header+28,
+             og[1].header_len-28);
+      ogg_sync_wrote(&oy,og[1].header_len-28);
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+
+      memcpy(ogg_sync_buffer(&oy,og[1].body_len),og[1].body,1000);
+      ogg_sync_wrote(&oy,1000);
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+
+      memcpy(ogg_sync_buffer(&oy,og[1].body_len),og[1].body+1000,
+             og[1].body_len-1000);
+      ogg_sync_wrote(&oy,og[1].body_len-1000);
+      if(ogg_sync_pageout(&oy,&og_de)<=0)error();
+
+      fprintf(stderr,"ok.\n");
+    }
+
+    /* Test fractional page inputs: page + incomplete capture */
+    {
+      ogg_page og_de;
+      fprintf(stderr,"Testing sync on 1+partial inputs... ");
+      ogg_sync_reset(&oy);
+
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header,
+             og[1].header_len);
+      ogg_sync_wrote(&oy,og[1].header_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[1].body_len),og[1].body,
+             og[1].body_len);
+      ogg_sync_wrote(&oy,og[1].body_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header,
+             20);
+      ogg_sync_wrote(&oy,20);
+      if(ogg_sync_pageout(&oy,&og_de)<=0)error();
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header+20,
+             og[1].header_len-20);
+      ogg_sync_wrote(&oy,og[1].header_len-20);
+      memcpy(ogg_sync_buffer(&oy,og[1].body_len),og[1].body,
+             og[1].body_len);
+      ogg_sync_wrote(&oy,og[1].body_len);
+      if(ogg_sync_pageout(&oy,&og_de)<=0)error();
+
+      fprintf(stderr,"ok.\n");
+    }
+
+    /* Test recapture: garbage + page */
+    {
+      ogg_page og_de;
+      fprintf(stderr,"Testing search for capture... ");
+      ogg_sync_reset(&oy);
+
+      /* 'garbage' */
+      memcpy(ogg_sync_buffer(&oy,og[1].body_len),og[1].body,
+             og[1].body_len);
+      ogg_sync_wrote(&oy,og[1].body_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header,
+             og[1].header_len);
+      ogg_sync_wrote(&oy,og[1].header_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[1].body_len),og[1].body,
+             og[1].body_len);
+      ogg_sync_wrote(&oy,og[1].body_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[2].header_len),og[2].header,
+             20);
+      ogg_sync_wrote(&oy,20);
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+      if(ogg_sync_pageout(&oy,&og_de)<=0)error();
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+
+      memcpy(ogg_sync_buffer(&oy,og[2].header_len),og[2].header+20,
+             og[2].header_len-20);
+      ogg_sync_wrote(&oy,og[2].header_len-20);
+      memcpy(ogg_sync_buffer(&oy,og[2].body_len),og[2].body,
+             og[2].body_len);
+      ogg_sync_wrote(&oy,og[2].body_len);
+      if(ogg_sync_pageout(&oy,&og_de)<=0)error();
+
+      fprintf(stderr,"ok.\n");
+    }
+
+    /* Test recapture: page + garbage + page */
+    {
+      ogg_page og_de;
+      fprintf(stderr,"Testing recapture... ");
+      ogg_sync_reset(&oy);
+
+      memcpy(ogg_sync_buffer(&oy,og[1].header_len),og[1].header,
+             og[1].header_len);
+      ogg_sync_wrote(&oy,og[1].header_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[1].body_len),og[1].body,
+             og[1].body_len);
+      ogg_sync_wrote(&oy,og[1].body_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[2].header_len),og[2].header,
+             og[2].header_len);
+      ogg_sync_wrote(&oy,og[2].header_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[2].header_len),og[2].header,
+             og[2].header_len);
+      ogg_sync_wrote(&oy,og[2].header_len);
+
+      if(ogg_sync_pageout(&oy,&og_de)<=0)error();
+
+      memcpy(ogg_sync_buffer(&oy,og[2].body_len),og[2].body,
+             og[2].body_len-5);
+      ogg_sync_wrote(&oy,og[2].body_len-5);
+
+      memcpy(ogg_sync_buffer(&oy,og[3].header_len),og[3].header,
+             og[3].header_len);
+      ogg_sync_wrote(&oy,og[3].header_len);
+
+      memcpy(ogg_sync_buffer(&oy,og[3].body_len),og[3].body,
+             og[3].body_len);
+      ogg_sync_wrote(&oy,og[3].body_len);
+
+      if(ogg_sync_pageout(&oy,&og_de)>0)error();
+      if(ogg_sync_pageout(&oy,&og_de)<=0)error();
+
+      fprintf(stderr,"ok.\n");
+    }
+
+    /* Free page data that was previously copied */
+    {
+      for(i=0;i<5;i++){
+        free_page(&og[i]);
+      }
+    }
+  }
+
+  return(0);
+}
+
+#endif
diff --git a/src/main/jni/opus/ogg/ogg.h b/src/main/jni/opus/ogg/ogg.h
new file mode 100644
index 000000000..cea4ebed7
--- /dev/null
+++ b/src/main/jni/opus/ogg/ogg.h
@@ -0,0 +1,210 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2007             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: toplevel libogg include
+ last mod: $Id: ogg.h 18044 2011-08-01 17:55:20Z gmaxwell $
+
+ ********************************************************************/
+#ifndef _OGG_H
+#define _OGG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <ogg/os_types.h>
+
+typedef struct {
+  void *iov_base;
+  size_t iov_len;
+} ogg_iovec_t;
+
+typedef struct {
+  long endbyte;
+  int  endbit;
+
+  unsigned char *buffer;
+  unsigned char *ptr;
+  long storage;
+} oggpack_buffer;
+
+/* ogg_page is used to encapsulate the data in one Ogg bitstream page *****/
+
+typedef struct {
+  unsigned char *header;
+  long header_len;
+  unsigned char *body;
+  long body_len;
+} ogg_page;
+
+/* ogg_stream_state contains the current encode/decode state of a logical
+   Ogg bitstream **********************************************************/
+
+typedef struct {
+  unsigned char   *body_data;    /* bytes from packet bodies */
+  long    body_storage;          /* storage elements allocated */
+  long    body_fill;             /* elements stored; fill mark */
+  long    body_returned;         /* elements of fill returned */
+
+
+  int     *lacing_vals;      /* The values that will go to the segment table */
+  ogg_int64_t *granule_vals; /* granulepos values for headers. Not compact
+                                this way, but it is simple coupled to the
+                                lacing fifo */
+  long    lacing_storage;
+  long    lacing_fill;
+  long    lacing_packet;
+  long    lacing_returned;
+
+  unsigned char    header[282];      /* working space for header encode */
+  int              header_fill;
+
+  int     e_o_s;          /* set when we have buffered the last packet in the
+                             logical bitstream */
+  int     b_o_s;          /* set after we've written the initial page
+                             of a logical bitstream */
+  long    serialno;
+  long    pageno;
+  ogg_int64_t  packetno;  /* sequence number for decode; the framing
+                             knows where there's a hole in the data,
+                             but we need coupling so that the codec
+                             (which is in a separate abstraction
+                             layer) also knows about the gap */
+  ogg_int64_t   granulepos;
+
+} ogg_stream_state;
+
+/* ogg_packet is used to encapsulate the data and metadata belonging
+   to a single raw Ogg/Vorbis packet *************************************/
+
+typedef struct {
+  unsigned char *packet;
+  long  bytes;
+  long  b_o_s;
+  long  e_o_s;
+
+  ogg_int64_t  granulepos;
+
+  ogg_int64_t  packetno;     /* sequence number for decode; the framing
+                                knows where there's a hole in the data,
+                                but we need coupling so that the codec
+                                (which is in a separate abstraction
+                                layer) also knows about the gap */
+} ogg_packet;
+
+typedef struct {
+  unsigned char *data;
+  int storage;
+  int fill;
+  int returned;
+
+  int unsynced;
+  int headerbytes;
+  int bodybytes;
+} ogg_sync_state;
+
+/* Ogg BITSTREAM PRIMITIVES: bitstream ************************/
+
+extern void  oggpack_writeinit(oggpack_buffer *b);
+extern int   oggpack_writecheck(oggpack_buffer *b);
+extern void  oggpack_writetrunc(oggpack_buffer *b,long bits);
+extern void  oggpack_writealign(oggpack_buffer *b);
+extern void  oggpack_writecopy(oggpack_buffer *b,void *source,long bits);
+extern void  oggpack_reset(oggpack_buffer *b);
+extern void  oggpack_writeclear(oggpack_buffer *b);
+extern void  oggpack_readinit(oggpack_buffer *b,unsigned char *buf,int bytes);
+extern void  oggpack_write(oggpack_buffer *b,unsigned long value,int bits);
+extern long  oggpack_look(oggpack_buffer *b,int bits);
+extern long  oggpack_look1(oggpack_buffer *b);
+extern void  oggpack_adv(oggpack_buffer *b,int bits);
+extern void  oggpack_adv1(oggpack_buffer *b);
+extern long  oggpack_read(oggpack_buffer *b,int bits);
+extern long  oggpack_read1(oggpack_buffer *b);
+extern long  oggpack_bytes(oggpack_buffer *b);
+extern long  oggpack_bits(oggpack_buffer *b);
+extern unsigned char *oggpack_get_buffer(oggpack_buffer *b);
+
+extern void  oggpackB_writeinit(oggpack_buffer *b);
+extern int   oggpackB_writecheck(oggpack_buffer *b);
+extern void  oggpackB_writetrunc(oggpack_buffer *b,long bits);
+extern void  oggpackB_writealign(oggpack_buffer *b);
+extern void  oggpackB_writecopy(oggpack_buffer *b,void *source,long bits);
+extern void  oggpackB_reset(oggpack_buffer *b);
+extern void  oggpackB_writeclear(oggpack_buffer *b);
+extern void  oggpackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes);
+extern void  oggpackB_write(oggpack_buffer *b,unsigned long value,int bits);
+extern long  oggpackB_look(oggpack_buffer *b,int bits);
+extern long  oggpackB_look1(oggpack_buffer *b);
+extern void  oggpackB_adv(oggpack_buffer *b,int bits);
+extern void  oggpackB_adv1(oggpack_buffer *b);
+extern long  oggpackB_read(oggpack_buffer *b,int bits);
+extern long  oggpackB_read1(oggpack_buffer *b);
+extern long  oggpackB_bytes(oggpack_buffer *b);
+extern long  oggpackB_bits(oggpack_buffer *b);
+extern unsigned char *oggpackB_get_buffer(oggpack_buffer *b);
+
+/* Ogg BITSTREAM PRIMITIVES: encoding **************************/
+
+extern int      ogg_stream_packetin(ogg_stream_state *os, ogg_packet *op);
+extern int      ogg_stream_iovecin(ogg_stream_state *os, ogg_iovec_t *iov,
+                                   int count, long e_o_s, ogg_int64_t granulepos);
+extern int      ogg_stream_pageout(ogg_stream_state *os, ogg_page *og);
+extern int      ogg_stream_pageout_fill(ogg_stream_state *os, ogg_page *og, int nfill);
+extern int      ogg_stream_flush(ogg_stream_state *os, ogg_page *og);
+extern int      ogg_stream_flush_fill(ogg_stream_state *os, ogg_page *og, int nfill);
+
+/* Ogg BITSTREAM PRIMITIVES: decoding **************************/
+
+extern int      ogg_sync_init(ogg_sync_state *oy);
+extern int      ogg_sync_clear(ogg_sync_state *oy);
+extern int      ogg_sync_reset(ogg_sync_state *oy);
+extern int      ogg_sync_destroy(ogg_sync_state *oy);
+extern int      ogg_sync_check(ogg_sync_state *oy);
+
+extern char    *ogg_sync_buffer(ogg_sync_state *oy, long size);
+extern int      ogg_sync_wrote(ogg_sync_state *oy, long bytes);
+extern long     ogg_sync_pageseek(ogg_sync_state *oy,ogg_page *og);
+extern int      ogg_sync_pageout(ogg_sync_state *oy, ogg_page *og);
+extern int      ogg_stream_pagein(ogg_stream_state *os, ogg_page *og);
+extern int      ogg_stream_packetout(ogg_stream_state *os,ogg_packet *op);
+extern int      ogg_stream_packetpeek(ogg_stream_state *os,ogg_packet *op);
+
+/* Ogg BITSTREAM PRIMITIVES: general ***************************/
+
+extern int      ogg_stream_init(ogg_stream_state *os,int serialno);
+extern int      ogg_stream_clear(ogg_stream_state *os);
+extern int      ogg_stream_reset(ogg_stream_state *os);
+extern int      ogg_stream_reset_serialno(ogg_stream_state *os,int serialno);
+extern int      ogg_stream_destroy(ogg_stream_state *os);
+extern int      ogg_stream_check(ogg_stream_state *os);
+extern int      ogg_stream_eos(ogg_stream_state *os);
+
+extern void     ogg_page_checksum_set(ogg_page *og);
+
+extern int      ogg_page_version(const ogg_page *og);
+extern int      ogg_page_continued(const ogg_page *og);
+extern int      ogg_page_bos(const ogg_page *og);
+extern int      ogg_page_eos(const ogg_page *og);
+extern ogg_int64_t  ogg_page_granulepos(const ogg_page *og);
+extern int      ogg_page_serialno(const ogg_page *og);
+extern long     ogg_page_pageno(const ogg_page *og);
+extern int      ogg_page_packets(const ogg_page *og);
+
+extern void     ogg_packet_clear(ogg_packet *op);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _OGG_H */
diff --git a/src/main/jni/opus/ogg/os_types.h b/src/main/jni/opus/ogg/os_types.h
new file mode 100644
index 000000000..d6691b703
--- /dev/null
+++ b/src/main/jni/opus/ogg/os_types.h
@@ -0,0 +1,147 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: #ifdef jail to whip a few platforms into the UNIX ideal.
+ last mod: $Id: os_types.h 17712 2010-12-03 17:10:02Z xiphmont $
+
+ ********************************************************************/
+#ifndef _OS_TYPES_H
+#define _OS_TYPES_H
+
+/* make it easy on the folks that want to compile the libs with a
+   different malloc than stdlib */
+#define _ogg_malloc  malloc
+#define _ogg_calloc  calloc
+#define _ogg_realloc realloc
+#define _ogg_free    free
+
+#if defined(_WIN32) 
+
+#  if defined(__CYGWIN__)
+#    include <stdint.h>
+     typedef int16_t ogg_int16_t;
+     typedef uint16_t ogg_uint16_t;
+     typedef int32_t ogg_int32_t;
+     typedef uint32_t ogg_uint32_t;
+     typedef int64_t ogg_int64_t;
+     typedef uint64_t ogg_uint64_t;
+#  elif defined(__MINGW32__)
+#    include <sys/types.h>
+     typedef short ogg_int16_t;
+     typedef unsigned short ogg_uint16_t;
+     typedef int ogg_int32_t;
+     typedef unsigned int ogg_uint32_t;
+     typedef long long ogg_int64_t;
+     typedef unsigned long long ogg_uint64_t;
+#  elif defined(__MWERKS__)
+     typedef long long ogg_int64_t;
+     typedef int ogg_int32_t;
+     typedef unsigned int ogg_uint32_t;
+     typedef short ogg_int16_t;
+     typedef unsigned short ogg_uint16_t;
+#  else
+     /* MSVC/Borland */
+     typedef __int64 ogg_int64_t;
+     typedef __int32 ogg_int32_t;
+     typedef unsigned __int32 ogg_uint32_t;
+     typedef __int16 ogg_int16_t;
+     typedef unsigned __int16 ogg_uint16_t;
+#  endif
+
+#elif defined(__MACOS__)
+
+#  include <sys/types.h>
+   typedef SInt16 ogg_int16_t;
+   typedef UInt16 ogg_uint16_t;
+   typedef SInt32 ogg_int32_t;
+   typedef UInt32 ogg_uint32_t;
+   typedef SInt64 ogg_int64_t;
+
+#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
+
+#  include <inttypes.h>
+   typedef int16_t ogg_int16_t;
+   typedef uint16_t ogg_uint16_t;
+   typedef int32_t ogg_int32_t;
+   typedef uint32_t ogg_uint32_t;
+   typedef int64_t ogg_int64_t;
+
+#elif defined(__HAIKU__)
+
+  /* Haiku */
+#  include <sys/types.h>
+   typedef short ogg_int16_t;
+   typedef unsigned short ogg_uint16_t;
+   typedef int ogg_int32_t;
+   typedef unsigned int ogg_uint32_t;
+   typedef long long ogg_int64_t;
+
+#elif defined(__BEOS__)
+
+   /* Be */
+#  include <inttypes.h>
+   typedef int16_t ogg_int16_t;
+   typedef uint16_t ogg_uint16_t;
+   typedef int32_t ogg_int32_t;
+   typedef uint32_t ogg_uint32_t;
+   typedef int64_t ogg_int64_t;
+
+#elif defined (__EMX__)
+
+   /* OS/2 GCC */
+   typedef short ogg_int16_t;
+   typedef unsigned short ogg_uint16_t;
+   typedef int ogg_int32_t;
+   typedef unsigned int ogg_uint32_t;
+   typedef long long ogg_int64_t;
+
+#elif defined (DJGPP)
+
+   /* DJGPP */
+   typedef short ogg_int16_t;
+   typedef int ogg_int32_t;
+   typedef unsigned int ogg_uint32_t;
+   typedef long long ogg_int64_t;
+
+#elif defined(R5900)
+
+   /* PS2 EE */
+   typedef long ogg_int64_t;
+   typedef int ogg_int32_t;
+   typedef unsigned ogg_uint32_t;
+   typedef short ogg_int16_t;
+
+#elif defined(__SYMBIAN32__)
+
+   /* Symbian GCC */
+   typedef signed short ogg_int16_t;
+   typedef unsigned short ogg_uint16_t;
+   typedef signed int ogg_int32_t;
+   typedef unsigned int ogg_uint32_t;
+   typedef long long int ogg_int64_t;
+
+#elif defined(__TMS320C6X__)
+
+   /* TI C64x compiler */
+   typedef signed short ogg_int16_t;
+   typedef unsigned short ogg_uint16_t;
+   typedef signed int ogg_int32_t;
+   typedef unsigned int ogg_uint32_t;
+   typedef long long int ogg_int64_t;
+
+#else
+
+#  include <ogg/config_types.h>
+
+#endif
+
+#endif  /* _OS_TYPES_H */
diff --git a/src/main/jni/opus/opusfile/info.c b/src/main/jni/opus/opusfile/info.c
new file mode 100644
index 000000000..6cf98516a
--- /dev/null
+++ b/src/main/jni/opus/opusfile/info.c
@@ -0,0 +1,683 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "internal.h"
+#include <limits.h>
+#include <string.h>
+
+static unsigned op_parse_uint16le(const unsigned char *_data){
+  return _data[0]|_data[1]<<8;
+}
+
+static int op_parse_int16le(const unsigned char *_data){
+  int ret;
+  ret=_data[0]|_data[1]<<8;
+  return (ret^0x8000)-0x8000;
+}
+
+static opus_uint32 op_parse_uint32le(const unsigned char *_data){
+  return _data[0]|_data[1]<<8|_data[2]<<16|_data[3]<<24;
+}
+
+static opus_uint32 op_parse_uint32be(const unsigned char *_data){
+  return _data[3]|_data[2]<<8|_data[1]<<16|_data[0]<<24;
+}
+
+int opus_head_parse(OpusHead *_head,const unsigned char *_data,size_t _len){
+  OpusHead head;
+  if(_len<8)return OP_ENOTFORMAT;
+  if(memcmp(_data,"OpusHead",8)!=0)return OP_ENOTFORMAT;
+  if(_len<9)return OP_EBADHEADER;
+  head.version=_data[8];
+  if(head.version>15)return OP_EVERSION;
+  if(_len<19)return OP_EBADHEADER;
+  head.channel_count=_data[9];
+  head.pre_skip=op_parse_uint16le(_data+10);
+  head.input_sample_rate=op_parse_uint32le(_data+12);
+  head.output_gain=op_parse_int16le(_data+16);
+  head.mapping_family=_data[18];
+  if(head.mapping_family==0){
+    if(head.channel_count<1||head.channel_count>2)return OP_EBADHEADER;
+    if(head.version<=1&&_len>19)return OP_EBADHEADER;
+    head.stream_count=1;
+    head.coupled_count=head.channel_count-1;
+    if(_head!=NULL){
+      _head->mapping[0]=0;
+      _head->mapping[1]=1;
+    }
+  }
+  else if(head.mapping_family==1){
+    size_t size;
+    int    ci;
+    if(head.channel_count<1||head.channel_count>8)return OP_EBADHEADER;
+    size=21+head.channel_count;
+    if(_len<size||head.version<=1&&_len>size)return OP_EBADHEADER;
+    head.stream_count=_data[19];
+    if(head.stream_count<1)return OP_EBADHEADER;
+    head.coupled_count=_data[20];
+    if(head.coupled_count>head.stream_count)return OP_EBADHEADER;
+    for(ci=0;ci<head.channel_count;ci++){
+      if(_data[21+ci]>=head.stream_count+head.coupled_count
+       &&_data[21+ci]!=255){
+        return OP_EBADHEADER;
+      }
+    }
+    if(_head!=NULL)memcpy(_head->mapping,_data+21,head.channel_count);
+  }
+  /*General purpose players should not attempt to play back content with
+     channel mapping family 255.*/
+  else if(head.mapping_family==255)return OP_EIMPL;
+  /*No other channel mapping families are currently defined.*/
+  else return OP_EBADHEADER;
+  if(_head!=NULL)memcpy(_head,&head,head.mapping-(unsigned char *)&head);
+  return 0;
+}
+
+void opus_tags_init(OpusTags *_tags){
+  memset(_tags,0,sizeof(*_tags));
+}
+
+void opus_tags_clear(OpusTags *_tags){
+  int ci;
+  for(ci=_tags->comments;ci-->0;)_ogg_free(_tags->user_comments[ci]);
+  _ogg_free(_tags->user_comments);
+  _ogg_free(_tags->comment_lengths);
+  _ogg_free(_tags->vendor);
+}
+
+/*Ensure there's room for up to _ncomments comments.*/
+static int op_tags_ensure_capacity(OpusTags *_tags,size_t _ncomments){
+  char   **user_comments;
+  int     *comment_lengths;
+  size_t   size;
+  if(OP_UNLIKELY(_ncomments>=(size_t)INT_MAX))return OP_EFAULT;
+  size=sizeof(*_tags->comment_lengths)*(_ncomments+1);
+  if(size/sizeof(*_tags->comment_lengths)!=_ncomments+1)return OP_EFAULT;
+  comment_lengths=(int *)_ogg_realloc(_tags->comment_lengths,size);
+  if(OP_UNLIKELY(comment_lengths==NULL))return OP_EFAULT;
+  comment_lengths[_ncomments]=0;
+  _tags->comment_lengths=comment_lengths;
+  size=sizeof(*_tags->user_comments)*(_ncomments+1);
+  if(size/sizeof(*_tags->user_comments)!=_ncomments+1)return OP_EFAULT;
+  user_comments=(char **)_ogg_realloc(_tags->user_comments,size);
+  if(OP_UNLIKELY(user_comments==NULL))return OP_EFAULT;
+  user_comments[_ncomments]=NULL;
+  _tags->user_comments=user_comments;
+  return 0;
+}
+
+/*Duplicate a (possibly non-NUL terminated) string with a known length.*/
+static char *op_strdup_with_len(const char *_s,size_t _len){
+  size_t  size;
+  char   *ret;
+  size=sizeof(*ret)*(_len+1);
+  if(OP_UNLIKELY(size<_len))return NULL;
+  ret=(char *)_ogg_malloc(size);
+  if(OP_LIKELY(ret!=NULL)){
+    ret=(char *)memcpy(ret,_s,sizeof(*ret)*_len);
+    ret[_len]='\0';
+  }
+  return ret;
+}
+
+/*The actual implementation of opus_tags_parse().
+  Unlike the public API, this function requires _tags to already be
+   initialized, modifies its contents before success is guaranteed, and assumes
+   the caller will clear it on error.*/
+static int opus_tags_parse_impl(OpusTags *_tags,
+ const unsigned char *_data,size_t _len){
+  opus_uint32 count;
+  size_t      len;
+  int         ncomments;
+  int         ci;
+  len=_len;
+  if(len<8)return OP_ENOTFORMAT;
+  if(memcmp(_data,"OpusTags",8)!=0)return OP_ENOTFORMAT;
+  if(len<16)return OP_EBADHEADER;
+  _data+=8;
+  len-=8;
+  count=op_parse_uint32le(_data);
+  _data+=4;
+  len-=4;
+  if(count>len)return OP_EBADHEADER;
+  if(_tags!=NULL){
+    _tags->vendor=op_strdup_with_len((char *)_data,count);
+    if(_tags->vendor==NULL)return OP_EFAULT;
+  }
+  _data+=count;
+  len-=count;
+  if(len<4)return OP_EBADHEADER;
+  count=op_parse_uint32le(_data);
+  _data+=4;
+  len-=4;
+  /*Check to make sure there's minimally sufficient data left in the packet.*/
+  if(count>len>>2)return OP_EBADHEADER;
+  /*Check for overflow (the API limits this to an int).*/
+  if(count>(opus_uint32)INT_MAX-1)return OP_EFAULT;
+  if(_tags!=NULL){
+    int ret;
+    ret=op_tags_ensure_capacity(_tags,count);
+    if(ret<0)return ret;
+  }
+  ncomments=(int)count;
+  for(ci=0;ci<ncomments;ci++){
+    /*Check to make sure there's minimally sufficient data left in the packet.*/
+    if((size_t)(ncomments-ci)>len>>2)return OP_EBADHEADER;
+    count=op_parse_uint32le(_data);
+    _data+=4;
+    len-=4;
+    if(count>len)return OP_EBADHEADER;
+    /*Check for overflow (the API limits this to an int).*/
+    if(count>(opus_uint32)INT_MAX)return OP_EFAULT;
+    if(_tags!=NULL){
+      _tags->user_comments[ci]=op_strdup_with_len((char *)_data,count);
+      if(_tags->user_comments[ci]==NULL)return OP_EFAULT;
+      _tags->comment_lengths[ci]=(int)count;
+      _tags->comments=ci+1;
+    }
+    _data+=count;
+    len-=count;
+  }
+  return 0;
+}
+
+int opus_tags_parse(OpusTags *_tags,const unsigned char *_data,size_t _len){
+  if(_tags!=NULL){
+    OpusTags tags;
+    int      ret;
+    opus_tags_init(&tags);
+    ret=opus_tags_parse_impl(&tags,_data,_len);
+    if(ret<0)opus_tags_clear(&tags);
+    else *_tags=*&tags;
+    return ret;
+  }
+  else return opus_tags_parse_impl(NULL,_data,_len);
+}
+
+/*The actual implementation of opus_tags_copy().
+  Unlike the public API, this function requires _dst to already be
+   initialized, modifies its contents before success is guaranteed, and assumes
+   the caller will clear it on error.*/
+static int opus_tags_copy_impl(OpusTags *_dst,const OpusTags *_src){
+  char *vendor;
+  int   ncomments;
+  int   ret;
+  int   ci;
+  vendor=_src->vendor;
+  _dst->vendor=op_strdup_with_len(vendor,strlen(vendor));
+  if(OP_UNLIKELY(_dst->vendor==NULL))return OP_EFAULT;
+  ncomments=_src->comments;
+  ret=op_tags_ensure_capacity(_dst,ncomments);
+  if(OP_UNLIKELY(ret<0))return ret;
+  for(ci=0;ci<ncomments;ci++){
+    int len;
+    len=_src->comment_lengths[ci];
+    OP_ASSERT(len>=0);
+    _dst->user_comments[ci]=op_strdup_with_len(_src->user_comments[ci],len);
+    if(OP_UNLIKELY(_dst->user_comments[ci]==NULL))return OP_EFAULT;
+    _dst->comment_lengths[ci]=len;
+    _dst->comments=ci+1;
+  }
+  return 0;
+}
+
+int opus_tags_copy(OpusTags *_dst,const OpusTags *_src){
+  OpusTags dst;
+  int      ret;
+  opus_tags_init(&dst);
+  ret=opus_tags_copy_impl(&dst,_src);
+  if(OP_UNLIKELY(ret<0))opus_tags_clear(&dst);
+  else *_dst=*&dst;
+  return 0;
+}
+
+int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value){
+  char *comment;
+  int   tag_len;
+  int   value_len;
+  int   ncomments;
+  int   ret;
+  ncomments=_tags->comments;
+  ret=op_tags_ensure_capacity(_tags,ncomments+1);
+  if(OP_UNLIKELY(ret<0))return ret;
+  tag_len=strlen(_tag);
+  value_len=strlen(_value);
+  /*+2 for '=' and '\0'.*/
+  _tags->comment_lengths[ncomments]=0;
+  _tags->user_comments[ncomments]=comment=
+   (char *)_ogg_malloc(sizeof(*comment)*(tag_len+value_len+2));
+  if(OP_UNLIKELY(comment==NULL))return OP_EFAULT;
+  _tags->comment_lengths[ncomments]=tag_len+value_len+1;
+  memcpy(comment,_tag,sizeof(*comment)*tag_len);
+  comment[tag_len]='=';
+  memcpy(comment+tag_len+1,_value,sizeof(*comment)*(value_len+1));
+  return 0;
+}
+
+int opus_tags_add_comment(OpusTags *_tags,const char *_comment){
+  int comment_len;
+  int ncomments;
+  int ret;
+  ncomments=_tags->comments;
+  ret=op_tags_ensure_capacity(_tags,ncomments+1);
+  if(OP_UNLIKELY(ret<0))return ret;
+  comment_len=(int)strlen(_comment);
+  _tags->comment_lengths[ncomments]=0;
+  _tags->user_comments[ncomments]=op_strdup_with_len(_comment,comment_len);
+  if(OP_UNLIKELY(_tags->user_comments[ncomments]==NULL))return OP_EFAULT;
+  _tags->comment_lengths[ncomments]=comment_len;
+  return 0;
+}
+
+int opus_tagcompare(const char *_tag_name,const char *_comment){
+  return opus_tagncompare(_tag_name,strlen(_tag_name),_comment);
+}
+
+int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment){
+  int ret;
+  OP_ASSERT(_tag_len>=0);
+  ret=op_strncasecmp(_tag_name,_comment,_tag_len);
+  return ret?ret:'='-_comment[_tag_len];
+}
+
+const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count){
+  char **user_comments;
+  int    tag_len;
+  int    found;
+  int    ncomments;
+  int    ci;
+  tag_len=strlen(_tag);
+  ncomments=_tags->comments;
+  user_comments=_tags->user_comments;
+  found=0;
+  for(ci=0;ci<ncomments;ci++){
+    if(!opus_tagncompare(_tag,tag_len,user_comments[ci])){
+      /*We return a pointer to the data, not a copy.*/
+      if(_count==found++)return user_comments[ci]+tag_len+1;
+    }
+  }
+  /*Didn't find anything.*/
+  return NULL;
+}
+
+int opus_tags_query_count(const OpusTags *_tags,const char *_tag){
+  char **user_comments;
+  int    tag_len;
+  int    found;
+  int    ncomments;
+  int    ci;
+  tag_len=strlen(_tag);
+  ncomments=_tags->comments;
+  user_comments=_tags->user_comments;
+  found=0;
+  for(ci=0;ci<ncomments;ci++){
+    if(!opus_tagncompare(_tag,tag_len,user_comments[ci]))found++;
+  }
+  return found;
+}
+
+int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8){
+  char **comments;
+  int    ncomments;
+  int    ci;
+  comments=_tags->user_comments;
+  ncomments=_tags->comments;
+  /*Look for the first valid R128_TRACK_GAIN tag and use that.*/
+  for(ci=0;ci<ncomments;ci++){
+    if(opus_tagncompare("R128_TRACK_GAIN",15,comments[ci])==0){
+      char       *p;
+      opus_int32  gain_q8;
+      int         negative;
+      p=comments[ci]+16;
+      negative=0;
+      if(*p=='-'){
+        negative=-1;
+        p++;
+      }
+      else if(*p=='+')p++;
+      gain_q8=0;
+      while(*p>='0'&&*p<='9'){
+        gain_q8=10*gain_q8+*p-'0';
+        if(gain_q8>32767-negative)break;
+        p++;
+      }
+      /*This didn't look like a signed 16-bit decimal integer.
+        Not a valid R128_TRACK_GAIN tag.*/
+      if(*p!='\0')continue;
+      *_gain_q8=(int)(gain_q8+negative^negative);
+      return 0;
+    }
+  }
+  return OP_FALSE;
+}
+
+static int op_is_jpeg(const unsigned char *_buf,size_t _buf_sz){
+  return _buf_sz>=11&&memcmp(_buf,"\xFF\xD8\xFF\xE0",4)==0
+   &&(_buf[4]<<8|_buf[5])>=16&&memcmp(_buf+6,"JFIF",5)==0;
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+   JPEG.
+  On failure, simply leaves its outputs unmodified.*/
+static void op_extract_jpeg_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+  if(op_is_jpeg(_buf,_buf_sz)){
+    size_t offs;
+    offs=2;
+    for(;;){
+      size_t segment_len;
+      int    marker;
+      while(offs<_buf_sz&&_buf[offs]!=0xFF)offs++;
+      while(offs<_buf_sz&&_buf[offs]==0xFF)offs++;
+      marker=_buf[offs];
+      offs++;
+      /*If we hit EOI* (end of image), or another SOI* (start of image),
+         or SOS (start of scan), then stop now.*/
+      if(offs>=_buf_sz||(marker>=0xD8&&marker<=0xDA))break;
+      /*RST* (restart markers): skip (no segment length).*/
+      else if(marker>=0xD0&&marker<=0xD7)continue;
+      /*Read the length of the marker segment.*/
+      if(_buf_sz-offs<2)break;
+      segment_len=_buf[offs]<<8|_buf[offs+1];
+      if(segment_len<2||_buf_sz-offs<segment_len)break;
+      if(marker==0xC0||(marker>0xC0&&marker<0xD0&&(marker&3)!=0)){
+        /*Found a SOFn (start of frame) marker segment:*/
+        if(segment_len>=8){
+          *_height=_buf[offs+3]<<8|_buf[offs+4];
+          *_width=_buf[offs+5]<<8|_buf[offs+6];
+          *_depth=_buf[offs+2]*_buf[offs+7];
+          *_colors=0;
+          *_has_palette=0;
+        }
+        break;
+      }
+      /*Other markers: skip the whole marker segment.*/
+      offs+=segment_len;
+    }
+  }
+}
+
+static int op_is_png(const unsigned char *_buf,size_t _buf_sz){
+  return _buf_sz>=8&&memcmp(_buf,"\x89PNG\x0D\x0A\x1A\x0A",8)==0;
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+   PNG.
+  On failure, simply leaves its outputs unmodified.*/
+static void op_extract_png_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+  if(op_is_png(_buf,_buf_sz)){
+    size_t offs;
+    offs=8;
+    while(_buf_sz-offs>=12){
+      ogg_uint32_t chunk_len;
+      chunk_len=op_parse_uint32be(_buf+offs);
+      if(chunk_len>_buf_sz-(offs+12))break;
+      else if(chunk_len==13&&memcmp(_buf+offs+4,"IHDR",4)==0){
+        int color_type;
+        *_width=op_parse_uint32be(_buf+offs+8);
+        *_height=op_parse_uint32be(_buf+offs+12);
+        color_type=_buf[offs+17];
+        if(color_type==3){
+          *_depth=24;
+          *_has_palette=1;
+        }
+        else{
+          int sample_depth;
+          sample_depth=_buf[offs+16];
+          if(color_type==0)*_depth=sample_depth;
+          else if(color_type==2)*_depth=sample_depth*3;
+          else if(color_type==4)*_depth=sample_depth*2;
+          else if(color_type==6)*_depth=sample_depth*4;
+          *_colors=0;
+          *_has_palette=0;
+          break;
+        }
+      }
+      else if(*_has_palette>0&&memcmp(_buf+offs+4,"PLTE",4)==0){
+        *_colors=chunk_len/3;
+        break;
+      }
+      offs+=12+chunk_len;
+    }
+  }
+}
+
+static int op_is_gif(const unsigned char *_buf,size_t _buf_sz){
+  return _buf_sz>=6&&(memcmp(_buf,"GIF87a",6)==0||memcmp(_buf,"GIF89a",6)==0);
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+   GIF.
+  On failure, simply leaves its outputs unmodified.*/
+static void op_extract_gif_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+  if(op_is_gif(_buf,_buf_sz)&&_buf_sz>=14){
+    *_width=_buf[6]|_buf[7]<<8;
+    *_height=_buf[8]|_buf[9]<<8;
+    /*libFLAC hard-codes the depth to 24.*/
+    *_depth=24;
+    *_colors=1<<((_buf[10]&7)+1);
+    *_has_palette=1;
+  }
+}
+
+/*The actual implementation of opus_picture_tag_parse().
+  Unlike the public API, this function requires _pic to already be
+   initialized, modifies its contents before success is guaranteed, and assumes
+   the caller will clear it on error.*/
+static int opus_picture_tag_parse_impl(OpusPictureTag *_pic,const char *_tag,
+ unsigned char *_buf,size_t _buf_sz,size_t _base64_sz){
+  opus_int32   picture_type;
+  opus_uint32  mime_type_length;
+  char        *mime_type;
+  opus_uint32  description_length;
+  char        *description;
+  opus_uint32  width;
+  opus_uint32  height;
+  opus_uint32  depth;
+  opus_uint32  colors;
+  opus_uint32  data_length;
+  opus_uint32  file_width;
+  opus_uint32  file_height;
+  opus_uint32  file_depth;
+  opus_uint32  file_colors;
+  int          format;
+  int          has_palette;
+  int          colors_set;
+  size_t       i;
+  /*Decode the BASE64 data.*/
+  for(i=0;i<_base64_sz;i++){
+    opus_uint32 value;
+    int         j;
+    value=0;
+    for(j=0;j<4;j++){
+      unsigned c;
+      unsigned d;
+      c=(unsigned char)_tag[4*i+j];
+      if(c=='+')d=62;
+      else if(c=='/')d=63;
+      else if(c>='0'&&c<='9')d=52+c-'0';
+      else if(c>='a'&&c<='z')d=26+c-'a';
+      else if(c>='A'&&c<='Z')d=c-'A';
+      else if(c=='='&&3*i+j>_buf_sz)d=0;
+      else return OP_ENOTFORMAT;
+      value=value<<6|d;
+    }
+    _buf[3*i]=(unsigned char)(value>>16);
+    if(3*i+1<_buf_sz){
+      _buf[3*i+1]=(unsigned char)(value>>8);
+      if(3*i+2<_buf_sz)_buf[3*i+2]=(unsigned char)value;
+    }
+  }
+  i=0;
+  picture_type=op_parse_uint32be(_buf+i);
+  i+=4;
+  /*Extract the MIME type.*/
+  mime_type_length=op_parse_uint32be(_buf+i);
+  i+=4;
+  if(mime_type_length>_buf_sz-32)return OP_ENOTFORMAT;
+  mime_type=(char *)_ogg_malloc(sizeof(*_pic->mime_type)*(mime_type_length+1));
+  if(mime_type==NULL)return OP_EFAULT;
+  memcpy(mime_type,_buf+i,sizeof(*mime_type)*mime_type_length);
+  mime_type[mime_type_length]='\0';
+  _pic->mime_type=mime_type;
+  i+=mime_type_length;
+  /*Extract the description string.*/
+  description_length=op_parse_uint32be(_buf+i);
+  i+=4;
+  if(description_length>_buf_sz-mime_type_length-32)return OP_ENOTFORMAT;
+  description=
+   (char *)_ogg_malloc(sizeof(*_pic->mime_type)*(description_length+1));
+  if(description==NULL)return OP_EFAULT;
+  memcpy(description,_buf+i,sizeof(*description)*description_length);
+  description[description_length]='\0';
+  _pic->description=description;
+  i+=description_length;
+  /*Extract the remaining fields.*/
+  width=op_parse_uint32be(_buf+i);
+  i+=4;
+  height=op_parse_uint32be(_buf+i);
+  i+=4;
+  depth=op_parse_uint32be(_buf+i);
+  i+=4;
+  colors=op_parse_uint32be(_buf+i);
+  i+=4;
+  /*If one of these is set, they all must be, but colors==0 is a valid value.*/
+  colors_set=width!=0||height!=0||depth!=0||colors!=0;
+  if(width==0||height==0||depth==0&&colors_set)return OP_ENOTFORMAT;
+  data_length=op_parse_uint32be(_buf+i);
+  i+=4;
+  if(data_length>_buf_sz-i)return OP_ENOTFORMAT;
+  /*Trim extraneous data so we don't copy it below.*/
+  _buf_sz=i+data_length;
+  /*Attempt to determine the image format.*/
+  format=OP_PIC_FORMAT_UNKNOWN;
+  if(mime_type_length==3&&strcmp(mime_type,"-->")==0){
+    format=OP_PIC_FORMAT_URL;
+    /*Picture type 1 must be a 32x32 PNG.*/
+    if(picture_type==1&&(width!=0||height!=0)&&(width!=32||height!=32)){
+      return OP_ENOTFORMAT;
+    }
+    /*Append a terminating NUL for the convenience of our callers.*/
+    _buf[_buf_sz++]='\0';
+  }
+  else{
+    if(mime_type_length==10
+     &&op_strncasecmp(mime_type,"image/jpeg",mime_type_length)==0){
+      if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG;
+    }
+    else if(mime_type_length==9
+     &&op_strncasecmp(mime_type,"image/png",mime_type_length)==0){
+      if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG;
+    }
+    else if(mime_type_length==9
+     &&op_strncasecmp(mime_type,"image/gif",mime_type_length)==0){
+      if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF;
+    }
+    else if(mime_type_length==0||(mime_type_length==6
+     &&op_strncasecmp(mime_type,"image/",mime_type_length)==0)){
+      if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG;
+      else if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG;
+      else if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF;
+    }
+    file_width=file_height=file_depth=file_colors=0;
+    has_palette=-1;
+    switch(format){
+      case OP_PIC_FORMAT_JPEG:{
+        op_extract_jpeg_params(_buf+i,data_length,
+         &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+      }break;
+      case OP_PIC_FORMAT_PNG:{
+        op_extract_png_params(_buf+i,data_length,
+         &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+      }break;
+      case OP_PIC_FORMAT_GIF:{
+        op_extract_gif_params(_buf+i,data_length,
+         &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+      }break;
+    }
+    if(has_palette>=0){
+      /*If we successfully extracted these parameters from the image, override
+         any declared values.*/
+      width=file_width;
+      height=file_height;
+      depth=file_depth;
+      colors=file_colors;
+    }
+    /*Picture type 1 must be a 32x32 PNG.*/
+    if(picture_type==1&&(format!=OP_PIC_FORMAT_PNG||width!=32||height!=32)){
+      return OP_ENOTFORMAT;
+    }
+  }
+  /*Adjust _buf_sz instead of using data_length to capture the terminating NUL
+     for URLs.*/
+  _buf_sz-=i;
+  memmove(_buf,_buf+i,sizeof(*_buf)*_buf_sz);
+  _buf=(unsigned char *)_ogg_realloc(_buf,_buf_sz);
+  if(_buf_sz>0&&_buf==NULL)return OP_EFAULT;
+  _pic->type=picture_type;
+  _pic->width=width;
+  _pic->height=height;
+  _pic->depth=depth;
+  _pic->colors=colors;
+  _pic->data_length=data_length;
+  _pic->data=_buf;
+  _pic->format=format;
+  return 0;
+}
+
+int opus_picture_tag_parse(OpusPictureTag *_pic,const char *_tag){
+  OpusPictureTag  pic;
+  unsigned char  *buf;
+  size_t          base64_sz;
+  size_t          buf_sz;
+  size_t          tag_length;
+  int             ret;
+  if(opus_tagncompare("METADATA_BLOCK_PICTURE",22,_tag)==0)_tag+=23;
+  /*Figure out how much BASE64-encoded data we have.*/
+  tag_length=strlen(_tag);
+  if(tag_length&3)return OP_ENOTFORMAT;
+  base64_sz=tag_length>>2;
+  buf_sz=3*base64_sz;
+  if(buf_sz<32)return OP_ENOTFORMAT;
+  if(_tag[tag_length-1]=='=')buf_sz--;
+  if(_tag[tag_length-2]=='=')buf_sz--;
+  if(buf_sz<32)return OP_ENOTFORMAT;
+  /*Allocate an extra byte to allow appending a terminating NUL to URL data.*/
+  buf=(unsigned char *)_ogg_malloc(sizeof(*buf)*(buf_sz+1));
+  if(buf==NULL)return OP_EFAULT;
+  opus_picture_tag_init(&pic);
+  ret=opus_picture_tag_parse_impl(&pic,_tag,buf,buf_sz,base64_sz);
+  if(ret<0){
+    opus_picture_tag_clear(&pic);
+    _ogg_free(buf);
+  }
+  else *_pic=*&pic;
+  return ret;
+}
+
+void opus_picture_tag_init(OpusPictureTag *_pic){
+  memset(_pic,0,sizeof(*_pic));
+}
+
+void opus_picture_tag_clear(OpusPictureTag *_pic){
+  _ogg_free(_pic->description);
+  _ogg_free(_pic->mime_type);
+  _ogg_free(_pic->data);
+}
diff --git a/src/main/jni/opus/opusfile/internal.c b/src/main/jni/opus/opusfile/internal.c
new file mode 100644
index 000000000..96c80def8
--- /dev/null
+++ b/src/main/jni/opus/opusfile/internal.c
@@ -0,0 +1,42 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "internal.h"
+
+#if defined(OP_ENABLE_ASSERTIONS)
+void op_fatal_impl(const char *_str,const char *_file,int _line){
+  fprintf(stderr,"Fatal (internal) error in %s, line %i: %s\n",
+   _file,_line,_str);
+  abort();
+}
+#endif
+
+/*A version of strncasecmp() that is guaranteed to only ignore the case of
+   ASCII characters.*/
+int op_strncasecmp(const char *_a,const char *_b,int _n){
+  int i;
+  for(i=0;i<_n;i++){
+    int a;
+    int b;
+    int d;
+    a=_a[i];
+    b=_b[i];
+    if(a>='a'&&a<='z')a-='a'-'A';
+    if(b>='a'&&b<='z')b-='a'-'A';
+    d=a-b;
+    if(d)return d;
+  }
+  return 0;
+}
diff --git a/src/main/jni/opus/opusfile/internal.h b/src/main/jni/opus/opusfile/internal.h
new file mode 100644
index 000000000..b1109deb9
--- /dev/null
+++ b/src/main/jni/opus/opusfile/internal.h
@@ -0,0 +1,249 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+#if !defined(_opusfile_internal_h)
+# define _opusfile_internal_h (1)
+
+# if !defined(_REENTRANT)
+#  define _REENTRANT
+# endif
+# if !defined(_GNU_SOURCE)
+#  define _GNU_SOURCE
+# endif
+# if !defined(_LARGEFILE_SOURCE)
+#  define _LARGEFILE_SOURCE
+# endif
+# if !defined(_LARGEFILE64_SOURCE)
+#  define _LARGEFILE64_SOURCE
+# endif
+# if !defined(_FILE_OFFSET_BITS)
+#  define _FILE_OFFSET_BITS 64
+# endif
+
+# include <stdlib.h>
+# include "opusfile.h"
+
+typedef struct OggOpusLink OggOpusLink;
+
+# if defined(OP_FIXED_POINT)
+
+typedef opus_int16 op_sample;
+
+# else
+
+typedef float      op_sample;
+
+/*We're using this define to test for libopus 1.1 or later until libopus
+   provides a better mechanism.*/
+#  if defined(OPUS_GET_EXPERT_FRAME_DURATION_REQUEST)
+/*Enable soft clipping prevention in 16-bit decodes.*/
+#   define OP_SOFT_CLIP (1)
+#  endif
+
+# endif
+
+# if OP_GNUC_PREREQ(4,2)
+/*Disable excessive warnings about the order of operations.*/
+#  pragma GCC diagnostic ignored "-Wparentheses"
+# elif defined(_MSC_VER)
+/*Disable excessive warnings about the order of operations.*/
+#  pragma warning(disable:4554)
+/*Disable warnings about "deprecated" POSIX functions.*/
+#  pragma warning(disable:4996)
+# endif
+
+# if OP_GNUC_PREREQ(3,0)
+/*Another alternative is
+    (__builtin_constant_p(_x)?!!(_x):__builtin_expect(!!(_x),1))
+   but that evaluates _x multiple times, which may be bad.*/
+#  define OP_LIKELY(_x) (__builtin_expect(!!(_x),1))
+#  define OP_UNLIKELY(_x) (__builtin_expect(!!(_x),0))
+# else
+#  define OP_LIKELY(_x)   (!!(_x))
+#  define OP_UNLIKELY(_x) (!!(_x))
+# endif
+
+# if defined(OP_ENABLE_ASSERTIONS)
+#  if OP_GNUC_PREREQ(2,5)||__SUNPRO_C>=0x590
+__attribute__((noreturn))
+#  endif
+void op_fatal_impl(const char *_str,const char *_file,int _line);
+
+#  define OP_FATAL(_str) (op_fatal_impl(_str,__FILE__,__LINE__))
+
+#  define OP_ASSERT(_cond) \
+  do{ \
+    if(OP_UNLIKELY(!(_cond)))OP_FATAL("assertion failed: " #_cond); \
+  } \
+  while(0)
+#  define OP_ALWAYS_TRUE(_cond) OP_ASSERT(_cond)
+
+# else
+#  define OP_FATAL(_str) abort()
+#  define OP_ASSERT(_cond)
+#  define OP_ALWAYS_TRUE(_cond) ((void)(_cond))
+# endif
+
+# define OP_INT64_MAX (2*(((ogg_int64_t)1<<62)-1)|1)
+# define OP_INT64_MIN (-OP_INT64_MAX-1)
+# define OP_INT32_MAX (2*(((ogg_int32_t)1<<30)-1)|1)
+# define OP_INT32_MIN (-OP_INT32_MAX-1)
+
+# define OP_MIN(_a,_b)        ((_a)<(_b)?(_a):(_b))
+# define OP_MAX(_a,_b)        ((_a)>(_b)?(_a):(_b))
+# define OP_CLAMP(_lo,_x,_hi) (OP_MAX(_lo,OP_MIN(_x,_hi)))
+
+/*Advance a file offset by the given amount, clamping against OP_INT64_MAX.
+  This is used to advance a known offset by things like OP_CHUNK_SIZE or
+   OP_PAGE_SIZE_MAX, while making sure to avoid signed overflow.
+  It assumes that both _offset and _amount are non-negative.*/
+#define OP_ADV_OFFSET(_offset,_amount) \
+ (OP_MIN(_offset,OP_INT64_MAX-(_amount))+(_amount))
+
+/*The maximum channel count for any mapping we'll actually decode.*/
+# define OP_NCHANNELS_MAX (8)
+
+/*Initial state.*/
+# define  OP_NOTOPEN   (0)
+/*We've found the first Opus stream in the first link.*/
+# define  OP_PARTOPEN  (1)
+# define  OP_OPENED    (2)
+/*We've found the first Opus stream in the current link.*/
+# define  OP_STREAMSET (3)
+/*We've initialized the decoder for the chosen Opus stream in the current
+   link.*/
+# define  OP_INITSET   (4)
+
+/*Information cached for a single link in a chained Ogg Opus file.
+  We choose the first Opus stream encountered in each link to play back (and
+   require at least one).*/
+struct OggOpusLink{
+  /*The byte offset of the first header page in this link.*/
+  opus_int64   offset;
+  /*The byte offset of the first data page from the chosen Opus stream in this
+     link (after the headers).*/
+  opus_int64   data_offset;
+  /*The byte offset of the last page from the chosen Opus stream in this link.
+    This is used when seeking to ensure we find a page before the last one, so
+     that end-trimming calculations work properly.
+    This is only valid for seekable sources.*/
+  opus_int64   end_offset;
+  /*The granule position of the last sample.
+    This is only valid for seekable sources.*/
+  ogg_int64_t  pcm_end;
+  /*The granule position before the first sample.*/
+  ogg_int64_t  pcm_start;
+  /*The serial number.*/
+  ogg_uint32_t serialno;
+  /*The contents of the info header.*/
+  OpusHead     head;
+  /*The contents of the comment header.*/
+  OpusTags     tags;
+};
+
+struct OggOpusFile{
+  /*The callbacks used to access the data source.*/
+  OpusFileCallbacks  callbacks;
+  /*A FILE *, memory bufer, etc.*/
+  void              *source;
+  /*Whether or not we can seek with this data source.*/
+  int                seekable;
+  /*The number of links in this chained Ogg Opus file.*/
+  int                nlinks;
+  /*The cached information from each link in a chained Ogg Opus file.
+    If source isn't seekable (e.g., it's a pipe), only the current link
+     appears.*/
+  OggOpusLink       *links;
+  /*The number of serial numbers from a single link.*/
+  int                nserialnos;
+  /*The capacity of the list of serial numbers from a single link.*/
+  int                cserialnos;
+  /*Storage for the list of serial numbers from a single link.*/
+  ogg_uint32_t      *serialnos;
+  /*This is the current offset of the data processed by the ogg_sync_state.
+    After a seek, this should be set to the target offset so that we can track
+     the byte offsets of subsequent pages.
+    After a call to op_get_next_page(), this will point to the first byte after
+     that page.*/
+  opus_int64         offset;
+  /*The total size of this data source, or -1 if it's unseekable.*/
+  opus_int64         end;
+  /*Used to locate pages in the data source.*/
+  ogg_sync_state     oy;
+  /*One of OP_NOTOPEN, OP_PARTOPEN, OP_OPENED, OP_STREAMSET, OP_INITSET.*/
+  int                ready_state;
+  /*The current link being played back.*/
+  int                cur_link;
+  /*The number of decoded samples to discard from the start of decoding.*/
+  opus_int32         cur_discard_count;
+  /*The granule position of the previous packet (current packet start time).*/
+  ogg_int64_t        prev_packet_gp;
+  /*The number of bytes read since the last bitrate query, including framing.*/
+  opus_int64         bytes_tracked;
+  /*The number of samples decoded since the last bitrate query.*/
+  ogg_int64_t        samples_tracked;
+  /*Takes physical pages and welds them into a logical stream of packets.*/
+  ogg_stream_state   os;
+  /*Re-timestamped packets from a single page.
+    Buffering these relies on the undocumented libogg behavior that ogg_packet
+     pointers remain valid until the next page is submitted to the
+     ogg_stream_state they came from.*/
+  ogg_packet         op[255];
+  /*The index of the next packet to return.*/
+  int                op_pos;
+  /*The total number of packets available.*/
+  int                op_count;
+  /*Central working state for the packet-to-PCM decoder.*/
+  OpusMSDecoder     *od;
+  /*The application-provided packet decode callback.*/
+  op_decode_cb_func  decode_cb;
+  /*The application-provided packet decode callback context.*/
+  void              *decode_cb_ctx;
+  /*The stream count used to initialize the decoder.*/
+  int                od_stream_count;
+  /*The coupled stream count used to initialize the decoder.*/
+  int                od_coupled_count;
+  /*The channel count used to initialize the decoder.*/
+  int                od_channel_count;
+  /*The channel mapping used to initialize the decoder.*/
+  unsigned char      od_mapping[OP_NCHANNELS_MAX];
+  /*The buffered data for one decoded packet.*/
+  op_sample         *od_buffer;
+  /*The current position in the decoded buffer.*/
+  int                od_buffer_pos;
+  /*The number of valid samples in the decoded buffer.*/
+  int                od_buffer_size;
+  /*The type of gain offset to apply.
+    One of OP_HEADER_GAIN, OP_TRACK_GAIN, or OP_ABSOLUTE_GAIN.*/
+  int                gain_type;
+  /*The offset to apply to the gain.*/
+  opus_int32         gain_offset_q8;
+  /*Internal state for soft clipping and dithering float->short output.*/
+#if !defined(OP_FIXED_POINT)
+# if defined(OP_SOFT_CLIP)
+  float              clip_state[OP_NCHANNELS_MAX];
+# endif
+  float              dither_a[OP_NCHANNELS_MAX*4];
+  float              dither_b[OP_NCHANNELS_MAX*4];
+  opus_uint32        dither_seed;
+  int                dither_mute;
+  int                dither_disabled;
+  /*The number of channels represented by the internal state.
+    This gets set to 0 whenever anything that would prevent state propagation
+     occurs (switching between the float/short APIs, or between the
+     stereo/multistream APIs).*/
+  int                state_channel_count;
+#endif
+};
+
+int op_strncasecmp(const char *_a,const char *_b,int _n);
+
+#endif
diff --git a/src/main/jni/opus/opusfile/opusfile.c b/src/main/jni/opus/opusfile/opusfile.c
new file mode 100644
index 000000000..392ddb29e
--- /dev/null
+++ b/src/main/jni/opus/opusfile/opusfile.c
@@ -0,0 +1,3163 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012           *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $
+
+ ********************************************************************/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "internal.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <math.h>
+
+#include "opusfile.h"
+
+/*This implementation is largely based off of libvorbisfile.
+  All of the Ogg bits work roughly the same, though I have made some
+   "improvements" that have not been folded back there, yet.*/
+
+/*A 'chained bitstream' is an Ogg Opus bitstream that contains more than one
+   logical bitstream arranged end to end (the only form of Ogg multiplexing
+   supported by this library.
+  Grouping (parallel multiplexing) is not supported, except to the extent that
+   if there are multiple logical Ogg streams in a single link of the chain, we
+   will ignore all but the first Opus stream we find.*/
+
+/*An Ogg Opus file can be played beginning to end (streamed) without worrying
+   ahead of time about chaining (see opusdec from the opus-tools package).
+  If we have the whole file, however, and want random access
+   (seeking/scrubbing) or desire to know the total length/time of a file, we
+   need to account for the possibility of chaining.*/
+
+/*We can handle things a number of ways.
+  We can determine the entire bitstream structure right off the bat, or find
+   pieces on demand.
+  This library determines and caches structure for the entire bitstream, but
+   builds a virtual decoder on the fly when moving between links in the chain.*/
+
+/*There are also different ways to implement seeking.
+  Enough information exists in an Ogg bitstream to seek to sample-granularity
+   positions in the output.
+  Or, one can seek by picking some portion of the stream roughly in the desired
+   area if we only want coarse navigation through the stream.
+  We implement and expose both strategies.*/
+
+/*The maximum number of bytes in a page (including the page headers).*/
+#define OP_PAGE_SIZE_MAX  (65307)
+/*The default amount to seek backwards per step when trying to find the
+   previous page.
+  This must be at least as large as the maximum size of a page.*/
+#define OP_CHUNK_SIZE     (65536)
+/*The maximum amount to seek backwards per step when trying to find the
+   previous page.*/
+#define OP_CHUNK_SIZE_MAX (1024*(opus_int32)1024)
+/*A smaller read size is needed for low-rate streaming.*/
+#define OP_READ_SIZE      (2048)
+
+int op_test(OpusHead *_head,
+ const unsigned char *_initial_data,size_t _initial_bytes){
+  ogg_sync_state  oy;
+  char           *data;
+  int             err;
+  /*The first page of a normal Opus file will be at most 57 bytes (27 Ogg
+     page header bytes + 1 lacing value + 21 Opus header bytes + 8 channel
+     mapping bytes).
+    It will be at least 47 bytes (27 Ogg page header bytes + 1 lacing value +
+     19 Opus header bytes using channel mapping family 0).
+    If we don't have at least that much data, give up now.*/
+  if(_initial_bytes<47)return OP_FALSE;
+  /*Only proceed if we start with the magic OggS string.
+    This is to prevent us spending a lot of time allocating memory and looking
+     for Ogg pages in non-Ogg files.*/
+  if(memcmp(_initial_data,"OggS",4)!=0)return OP_ENOTFORMAT;
+  ogg_sync_init(&oy);
+  data=ogg_sync_buffer(&oy,_initial_bytes);
+  if(data!=NULL){
+    ogg_stream_state os;
+    ogg_page         og;
+    int              ret;
+    memcpy(data,_initial_data,_initial_bytes);
+    ogg_sync_wrote(&oy,_initial_bytes);
+    ogg_stream_init(&os,-1);
+    err=OP_FALSE;
+    do{
+      ogg_packet op;
+      ret=ogg_sync_pageout(&oy,&og);
+      /*Ignore holes.*/
+      if(ret<0)continue;
+      /*Stop if we run out of data.*/
+      if(!ret)break;
+      ogg_stream_reset_serialno(&os,ogg_page_serialno(&og));
+      ogg_stream_pagein(&os,&og);
+      /*Only process the first packet on this page (if it's a BOS packet,
+         it's required to be the only one).*/
+      if(ogg_stream_packetout(&os,&op)==1){
+        if(op.b_o_s){
+          ret=opus_head_parse(_head,op.packet,op.bytes);
+          /*If this didn't look like Opus, keep going.*/
+          if(ret==OP_ENOTFORMAT)continue;
+          /*Otherwise we're done, one way or another.*/
+          err=ret;
+        }
+        /*We finished parsing the headers.
+          There is no Opus to be found.*/
+        else err=OP_ENOTFORMAT;
+      }
+    }
+    while(err==OP_FALSE);
+    ogg_stream_clear(&os);
+  }
+  else err=OP_EFAULT;
+  ogg_sync_clear(&oy);
+  return err;
+}
+
+/*Many, many internal helpers.
+  The intention is not to be confusing.
+  Rampant duplication and monolithic function implementation (though we do have
+   some large, omnibus functions still) would be harder to understand anyway.
+  The high level functions are last.
+  Begin grokking near the end of the file if you prefer to read things
+   top-down.*/
+
+/*The read/seek functions track absolute position within the stream.*/
+
+/*Read a little more data from the file/pipe into the ogg_sync framer.
+  _nbytes: The maximum number of bytes to read.
+  Return: A positive number of bytes read on success, 0 on end-of-file, or a
+           negative value on failure.*/
+static int op_get_data(OggOpusFile *_of,int _nbytes){
+  unsigned char *buffer;
+  int            nbytes;
+  OP_ASSERT(_nbytes>0);
+  buffer=(unsigned char *)ogg_sync_buffer(&_of->oy,_nbytes);
+  nbytes=(int)(*_of->callbacks.read)(_of->source,buffer,_nbytes);
+  OP_ASSERT(nbytes<=_nbytes);
+  if(OP_LIKELY(nbytes>0))ogg_sync_wrote(&_of->oy,nbytes);
+  return nbytes;
+}
+
+/*Save a tiny smidge of verbosity to make the code more readable.*/
+static int op_seek_helper(OggOpusFile *_of,opus_int64 _offset){
+  if(_offset==_of->offset)return 0;
+  if(_of->callbacks.seek==NULL||
+   (*_of->callbacks.seek)(_of->source,_offset,SEEK_SET)){
+    return OP_EREAD;
+  }
+  _of->offset=_offset;
+  ogg_sync_reset(&_of->oy);
+  return 0;
+}
+
+/*Get the current position indicator of the underlying source.
+  This should be the same as the value reported by tell().*/
+static opus_int64 op_position(const OggOpusFile *_of){
+  /*The current position indicator is _not_ simply offset.
+    We may also have unprocessed, buffered data in the sync state.*/
+  return _of->offset+_of->oy.fill-_of->oy.returned;
+}
+
+/*From the head of the stream, get the next page.
+  _boundary specifies if the function is allowed to fetch more data from the
+   stream (and how much) or only use internally buffered data.
+  _boundary: -1: Unbounded search.
+              0: Read no additional data.
+                 Use only cached data.
+              n: Search for the start of a new page up to file position n.
+  Return: n>=0:       Found a page at absolute offset n.
+          OP_FALSE:   Hit the _boundary limit.
+          OP_EREAD:   An underlying read operation failed.
+          OP_BADLINK: We hit end-of-file before reaching _boundary.*/
+static opus_int64 op_get_next_page(OggOpusFile *_of,ogg_page *_og,
+ opus_int64 _boundary){
+  while(_boundary<=0||_of->offset<_boundary){
+    int more;
+    more=ogg_sync_pageseek(&_of->oy,_og);
+    /*Skipped (-more) bytes.*/
+    if(OP_UNLIKELY(more<0))_of->offset-=more;
+    else if(more==0){
+      int read_nbytes;
+      int ret;
+      /*Send more paramedics.*/
+      if(!_boundary)return OP_FALSE;
+      if(_boundary<0)read_nbytes=OP_READ_SIZE;
+      else{
+        opus_int64 position;
+        position=op_position(_of);
+        if(position>=_boundary)return OP_FALSE;
+        read_nbytes=(int)OP_MIN(_boundary-position,OP_READ_SIZE);
+      }
+      ret=op_get_data(_of,read_nbytes);
+      if(OP_UNLIKELY(ret<0))return OP_EREAD;
+      if(OP_UNLIKELY(ret==0)){
+        /*Only fail cleanly on EOF if we didn't have a known boundary.
+          Otherwise, we should have been able to reach that boundary, and this
+           is a fatal error.*/
+        return OP_UNLIKELY(_boundary<0)?OP_FALSE:OP_EBADLINK;
+      }
+    }
+    else{
+      /*Got a page.
+        Return the page start offset and advance the internal offset past the
+         page end.*/
+      opus_int64 page_offset;
+      page_offset=_of->offset;
+      _of->offset+=more;
+      OP_ASSERT(page_offset>=0);
+      return page_offset;
+    }
+  }
+  return OP_FALSE;
+}
+
+static int op_add_serialno(const ogg_page *_og,
+ ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){
+  ogg_uint32_t *serialnos;
+  int           nserialnos;
+  int           cserialnos;
+  ogg_uint32_t s;
+  s=ogg_page_serialno(_og);
+  serialnos=*_serialnos;
+  nserialnos=*_nserialnos;
+  cserialnos=*_cserialnos;
+  if(OP_UNLIKELY(nserialnos>=cserialnos)){
+    if(OP_UNLIKELY(cserialnos>INT_MAX-1>>1))return OP_EFAULT;
+    cserialnos=2*cserialnos+1;
+    OP_ASSERT(nserialnos<cserialnos);
+    serialnos=(ogg_uint32_t *)_ogg_realloc(serialnos,
+     sizeof(*serialnos)*cserialnos);
+    if(OP_UNLIKELY(serialnos==NULL))return OP_EFAULT;
+  }
+  serialnos[nserialnos++]=s;
+  *_serialnos=serialnos;
+  *_nserialnos=nserialnos;
+  *_cserialnos=cserialnos;
+  return 0;
+}
+
+/*Returns nonzero if found.*/
+static int op_lookup_serialno(ogg_uint32_t _s,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+  int i;
+  for(i=0;i<_nserialnos&&_serialnos[i]!=_s;i++);
+  return i<_nserialnos;
+}
+
+static int op_lookup_page_serialno(const ogg_page *_og,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+  return op_lookup_serialno(ogg_page_serialno(_og),_serialnos,_nserialnos);
+}
+
+typedef struct OpusSeekRecord OpusSeekRecord;
+
+/*We use this to remember the pages we found while enumerating the links of a
+   chained stream.
+  We keep track of the starting and ending offsets, as well as the point we
+   started searching from, so we know where to bisect.
+  We also keep the serial number, so we can tell if the page belonged to the
+   current link or not, as well as the granule position, to aid in estimating
+   the start of the link.*/
+struct OpusSeekRecord{
+  /*The earliest byte we know of such that reading forward from it causes
+     capture to be regained at this page.*/
+  opus_int64   search_start;
+  /*The offset of this page.*/
+  opus_int64   offset;
+  /*The size of this page.*/
+  opus_int32   size;
+  /*The serial number of this page.*/
+  ogg_uint32_t serialno;
+  /*The granule position of this page.*/
+  ogg_int64_t  gp;
+};
+
+/*Find the last page beginning before _offset with a valid granule position.
+  There is no '_boundary' parameter as it will always have to read more data.
+  This is much dirtier than the above, as Ogg doesn't have any backward search
+   linkage.
+  This search prefers pages of the specified serial number.
+  If a page of the specified serial number is spotted during the
+   seek-back-and-read-forward, it will return the info of last page of the
+   matching serial number, instead of the very last page, unless the very last
+   page belongs to a different link than preferred serial number.
+  If no page of the specified serial number is seen, it will return the info of
+   the last page.
+  [out] _sr:   Returns information about the page that was found on success.
+  _offset:     The _offset before which to find a page.
+               Any page returned will consist of data entirely before _offset.
+  _serialno:   The preferred serial number.
+               If a page with this serial number is found, it will be returned
+                even if another page in the same link is found closer to
+                _offset.
+               This is purely opportunistic: there is no guarantee such a page
+                will be found if it exists.
+  _serialnos:  The list of serial numbers in the link that contains the
+                preferred serial number.
+  _nserialnos: The number of serial numbers in the current link.
+  Return: 0 on success, or a negative value on failure.
+          OP_EREAD:    Failed to read more data (error or EOF).
+          OP_EBADLINK: We couldn't find a page even after seeking back to the
+                        start of the stream.*/
+static int op_get_prev_page_serial(OggOpusFile *_of,OpusSeekRecord *_sr,
+ opus_int64 _offset,ogg_uint32_t _serialno,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+  OpusSeekRecord preferred_sr;
+  ogg_page       og;
+  opus_int64     begin;
+  opus_int64     end;
+  opus_int64     original_end;
+  opus_int32     chunk_size;
+  int            preferred_found;
+  original_end=end=begin=_offset;
+  preferred_found=0;
+  _offset=-1;
+  chunk_size=OP_CHUNK_SIZE;
+  do{
+    opus_int64 search_start;
+    int        ret;
+    OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX);
+    begin=OP_MAX(begin-chunk_size,0);
+    ret=op_seek_helper(_of,begin);
+    if(OP_UNLIKELY(ret<0))return ret;
+    search_start=begin;
+    while(_of->offset<end){
+      opus_int64   llret;
+      ogg_uint32_t serialno;
+      llret=op_get_next_page(_of,&og,end);
+      if(OP_UNLIKELY(llret<OP_FALSE))return (int)llret;
+      else if(llret==OP_FALSE)break;
+      serialno=ogg_page_serialno(&og);
+      /*Save the information for this page.
+        We're not interested in the page itself... just the serial number, byte
+         offset, page size, and granule position.*/
+      _sr->search_start=search_start;
+      _sr->offset=_offset=llret;
+      _sr->serialno=serialno;
+      OP_ASSERT(_of->offset-_offset>=0);
+      OP_ASSERT(_of->offset-_offset<=OP_PAGE_SIZE_MAX);
+      _sr->size=(opus_int32)(_of->offset-_offset);
+      _sr->gp=ogg_page_granulepos(&og);
+      /*If this page is from the stream we're looking for, remember it.*/
+      if(serialno==_serialno){
+        preferred_found=1;
+        *&preferred_sr=*_sr;
+      }
+      if(!op_lookup_serialno(serialno,_serialnos,_nserialnos)){
+        /*We fell off the end of the link, which means we seeked back too far
+           and shouldn't have been looking in that link to begin with.
+          If we found the preferred serial number, forget that we saw it.*/
+        preferred_found=0;
+      }
+      search_start=llret+1;
+    }
+    /*We started from the beginning of the stream and found nothing.
+      This should be impossible unless the contents of the source changed out
+       from under us after we read from it.*/
+    if(OP_UNLIKELY(!begin)&&OP_UNLIKELY(_offset<0))return OP_EBADLINK;
+    /*Bump up the chunk size.
+      This is mildly helpful when seeks are very expensive (http).*/
+    chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+    /*Avoid quadratic complexity if we hit an invalid patch of the file.*/
+    end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end);
+  }
+  while(_offset<0);
+  if(preferred_found)*_sr=*&preferred_sr;
+  return 0;
+}
+
+/*Find the last page beginning before _offset with the given serial number and
+   a valid granule position.
+  Unlike the above search, this continues until it finds such a page, but does
+   not stray outside the current link.
+  We could implement it (inefficiently) by calling op_get_prev_page_serial()
+   repeatedly until it returned a page that had both our preferred serial
+   number and a valid granule position, but doing it with a separate function
+   allows us to avoid repeatedly re-scanning valid pages from other streams as
+   we seek-back-and-read-forward.
+  [out] _gp:   Returns the granule position of the page that was found on
+                success.
+  _offset:     The _offset before which to find a page.
+               Any page returned will consist of data entirely before _offset.
+  _serialno:   The target serial number.
+  _serialnos:  The list of serial numbers in the link that contains the
+                preferred serial number.
+  _nserialnos: The number of serial numbers in the current link.
+  Return: The offset of the page on success, or a negative value on failure.
+          OP_EREAD:    Failed to read more data (error or EOF).
+          OP_EBADLINK: We couldn't find a page even after seeking back past the
+                        beginning of the link.*/
+static opus_int64 op_get_last_page(OggOpusFile *_of,ogg_int64_t *_gp,
+ opus_int64 _offset,ogg_uint32_t _serialno,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+  ogg_page    og;
+  ogg_int64_t gp;
+  opus_int64  begin;
+  opus_int64  end;
+  opus_int64  original_end;
+  opus_int32  chunk_size;
+  /*The target serial number must belong to the current link.*/
+  OP_ASSERT(op_lookup_serialno(_serialno,_serialnos,_nserialnos));
+  original_end=end=begin=_offset;
+  _offset=-1;
+  /*We shouldn't have to initialize gp, but gcc is too dumb to figure out that
+     ret>=0 implies we entered the if(page_gp!=-1) block at least once.*/
+  gp=-1;
+  chunk_size=OP_CHUNK_SIZE;
+  do{
+    int left_link;
+    int ret;
+    OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX);
+    begin=OP_MAX(begin-chunk_size,0);
+    ret=op_seek_helper(_of,begin);
+    if(OP_UNLIKELY(ret<0))return ret;
+    left_link=0;
+    while(_of->offset<end){
+      opus_int64   llret;
+      ogg_uint32_t serialno;
+      llret=op_get_next_page(_of,&og,end);
+      if(OP_UNLIKELY(llret<OP_FALSE))return llret;
+      else if(llret==OP_FALSE)break;
+      serialno=ogg_page_serialno(&og);
+      if(serialno==_serialno){
+        ogg_int64_t page_gp;
+        /*The page is from the right stream...*/
+        page_gp=ogg_page_granulepos(&og);
+        if(page_gp!=-1){
+          /*And has a valid granule position.
+            Let's remember it.*/
+          _offset=llret;
+          gp=page_gp;
+        }
+      }
+      else if(OP_UNLIKELY(!op_lookup_serialno(serialno,
+       _serialnos,_nserialnos))){
+        /*We fell off the start of the link, which means we don't need to keep
+           seeking any farther back.*/
+        left_link=1;
+      }
+    }
+    /*We started from at or before the beginning of the link and found nothing.
+      This should be impossible unless the contents of the source changed out
+       from under us after we read from it.*/
+    if((OP_UNLIKELY(left_link)||OP_UNLIKELY(!begin))&&OP_UNLIKELY(_offset<0)){
+      return OP_EBADLINK;
+    }
+    /*Bump up the chunk size.
+      This is mildly helpful when seeks are very expensive (http).*/
+    chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+    /*Avoid quadratic complexity if we hit an invalid patch of the file.*/
+    end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end);
+  }
+  while(_offset<0);
+  *_gp=gp;
+  return _offset;
+}
+
+/*Uses the local ogg_stream storage in _of.
+  This is important for non-streaming input sources.*/
+static int op_fetch_headers_impl(OggOpusFile *_of,OpusHead *_head,
+ OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos,
+ int *_cserialnos,ogg_page *_og){
+  ogg_packet op;
+  int        ret;
+  if(_serialnos!=NULL)*_nserialnos=0;
+  /*Extract the serialnos of all BOS pages plus the first set of Opus headers
+     we see in the link.*/
+  while(ogg_page_bos(_og)){
+    if(_serialnos!=NULL){
+      if(OP_UNLIKELY(op_lookup_page_serialno(_og,*_serialnos,*_nserialnos))){
+        /*A dupe serialnumber in an initial header packet set==invalid stream.*/
+        return OP_EBADHEADER;
+      }
+      ret=op_add_serialno(_og,_serialnos,_nserialnos,_cserialnos);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    if(_of->ready_state<OP_STREAMSET){
+      /*We don't have an Opus stream in this link yet, so begin prospective
+         stream setup.
+        We need a stream to get packets.*/
+      ogg_stream_reset_serialno(&_of->os,ogg_page_serialno(_og));
+      ogg_stream_pagein(&_of->os,_og);
+      if(OP_LIKELY(ogg_stream_packetout(&_of->os,&op)>0)){
+        ret=opus_head_parse(_head,op.packet,op.bytes);
+        /*If it's just a stream type we don't recognize, ignore it.*/
+        if(ret==OP_ENOTFORMAT)continue;
+        /*Everything else is fatal.*/
+        if(OP_UNLIKELY(ret<0))return ret;
+        /*Found a valid Opus header.
+          Continue setup.*/
+        _of->ready_state=OP_STREAMSET;
+      }
+    }
+    /*Get the next page.
+      No need to clamp the boundary offset against _of->end, as all errors
+       become OP_ENOTFORMAT.*/
+    if(OP_UNLIKELY(op_get_next_page(_of,_og,
+     OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+      return OP_ENOTFORMAT;
+    }
+    /*If this page also belongs to our Opus stream, submit it and break.*/
+    if(_of->ready_state==OP_STREAMSET
+     &&_of->os.serialno==ogg_page_serialno(_og)){
+      ogg_stream_pagein(&_of->os,_og);
+      break;
+    }
+  }
+  if(OP_UNLIKELY(_of->ready_state!=OP_STREAMSET))return OP_ENOTFORMAT;
+  /*Loop getting packets.*/
+  for(;;){
+    switch(ogg_stream_packetout(&_of->os,&op)){
+      case 0:{
+        /*Loop getting pages.*/
+        for(;;){
+          /*No need to clamp the boundary offset against _of->end, as all
+             errors become OP_EBADHEADER.*/
+          if(OP_UNLIKELY(op_get_next_page(_of,_og,
+           OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+            return OP_EBADHEADER;
+          }
+          /*If this page belongs to the correct stream, go parse it.*/
+          if(_of->os.serialno==ogg_page_serialno(_og)){
+            ogg_stream_pagein(&_of->os,_og);
+            break;
+          }
+          /*If the link ends before we see the Opus comment header, abort.*/
+          if(OP_UNLIKELY(ogg_page_bos(_og)))return OP_EBADHEADER;
+          /*Otherwise, keep looking.*/
+        }
+      }break;
+      /*We shouldn't get a hole in the headers!*/
+      case -1:return OP_EBADHEADER;
+      default:{
+        /*Got a packet.
+          It should be the comment header.*/
+        ret=opus_tags_parse(_tags,op.packet,op.bytes);
+        if(OP_UNLIKELY(ret<0))return ret;
+        /*Make sure the page terminated at the end of the comment header.
+          If there is another packet on the page, or part of a packet, then
+           reject the stream.
+          Otherwise seekable sources won't be able to seek back to the start
+           properly.*/
+        ret=ogg_stream_packetout(&_of->os,&op);
+        if(OP_UNLIKELY(ret!=0)
+         ||OP_UNLIKELY(_og->header[_og->header_len-1]==255)){
+          /*If we fail, the caller assumes our tags are uninitialized.*/
+          opus_tags_clear(_tags);
+          return OP_EBADHEADER;
+        }
+        return 0;
+      }
+    }
+  }
+}
+
+static int op_fetch_headers(OggOpusFile *_of,OpusHead *_head,
+ OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos,
+ int *_cserialnos,ogg_page *_og){
+  ogg_page og;
+  int      ret;
+  if(!_og){
+    /*No need to clamp the boundary offset against _of->end, as all errors
+       become OP_ENOTFORMAT.*/
+    if(OP_UNLIKELY(op_get_next_page(_of,&og,
+     OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+      return OP_ENOTFORMAT;
+    }
+    _og=&og;
+  }
+  _of->ready_state=OP_OPENED;
+  ret=op_fetch_headers_impl(_of,_head,_tags,_serialnos,_nserialnos,
+   _cserialnos,_og);
+  /*Revert back from OP_STREAMSET to OP_OPENED on failure, to prevent
+     double-free of the tags in an unseekable stream.*/
+  if(OP_UNLIKELY(ret<0))_of->ready_state=OP_OPENED;
+  return ret;
+}
+
+/*Granule position manipulation routines.
+  A granule position is defined to be an unsigned 64-bit integer, with the
+   special value -1 in two's complement indicating an unset or invalid granule
+   position.
+  We are not guaranteed to have an unsigned 64-bit type, so we construct the
+   following routines that
+   a) Properly order negative numbers as larger than positive numbers, and
+   b) Check for underflow or overflow past the special -1 value.
+  This lets us operate on the full, valid range of granule positions in a
+   consistent and safe manner.
+  This full range is organized into distinct regions:
+   [ -1 (invalid) ][ 0 ... OP_INT64_MAX ][ OP_INT64_MIN ... -2 ][-1 (invalid) ]
+
+  No one should actually use granule positions so large that they're negative,
+   even if they are technically valid, as very little software handles them
+   correctly (including most of Xiph.Org's).
+  This library also refuses to support durations so large they won't fit in a
+   signed 64-bit integer (to avoid exposing this mess to the application, and
+   to simplify a good deal of internal arithmetic), so the only way to use them
+   successfully is if pcm_start is very large.
+  This means there isn't anything you can do with negative granule positions
+   that you couldn't have done with purely non-negative ones.
+  The main purpose of these routines is to allow us to think very explicitly
+   about the possible failure cases of all granule position manipulations.*/
+
+/*Safely adds a small signed integer to a valid (not -1) granule position.
+  The result can use the full 64-bit range of values (both positive and
+   negative), but will fail on overflow (wrapping past -1; wrapping past
+   OP_INT64_MAX is explicitly okay).
+  [out] _dst_gp: The resulting granule position.
+                 Only modified on success.
+  _src_gp:       The granule position to add to.
+                 This must not be -1.
+  _delta:        The amount to add.
+                 This is allowed to be up to 32 bits to support the maximum
+                  duration of a single Ogg page (255 packets * 120 ms per
+                  packet == 1,468,800 samples at 48 kHz).
+  Return: 0 on success, or OP_EINVAL if the result would wrap around past -1.*/
+static int op_granpos_add(ogg_int64_t *_dst_gp,ogg_int64_t _src_gp,
+ opus_int32 _delta){
+  /*The code below handles this case correctly, but there's no reason we
+     should ever be called with these values, so make sure we aren't.*/
+  OP_ASSERT(_src_gp!=-1);
+  if(_delta>0){
+    /*Adding this amount to the granule position would overflow its 64-bit
+       range.*/
+    if(OP_UNLIKELY(_src_gp<0)&&OP_UNLIKELY(_src_gp>=-1-_delta))return OP_EINVAL;
+    if(OP_UNLIKELY(_src_gp>OP_INT64_MAX-_delta)){
+      /*Adding this amount to the granule position would overflow the positive
+         half of its 64-bit range.
+        Since signed overflow is undefined in C, do it in a way the compiler
+         isn't allowed to screw up.*/
+      _delta-=(opus_int32)(OP_INT64_MAX-_src_gp)+1;
+      _src_gp=OP_INT64_MIN;
+    }
+  }
+  else if(_delta<0){
+    /*Subtracting this amount from the granule position would underflow its
+       64-bit range.*/
+    if(_src_gp>=0&&OP_UNLIKELY(_src_gp<-_delta))return OP_EINVAL;
+    if(OP_UNLIKELY(_src_gp<OP_INT64_MIN-_delta)){
+      /*Subtracting this amount from the granule position would underflow the
+         negative half of its 64-bit range.
+        Since signed underflow is undefined in C, do it in a way the compiler
+         isn't allowed to screw up.*/
+      _delta+=(opus_int32)(_src_gp-OP_INT64_MIN)+1;
+      _src_gp=OP_INT64_MAX;
+    }
+  }
+  *_dst_gp=_src_gp+_delta;
+  return 0;
+}
+
+/*Safely computes the difference between two granule positions.
+  The difference must fit in a signed 64-bit integer, or the function fails.
+  It correctly handles the case where the granule position has wrapped around
+   from positive values to negative ones.
+  [out] _delta: The difference between the granule positions.
+                Only modified on success.
+  _gp_a:        The granule position to subtract from.
+                This must not be -1.
+  _gp_b:        The granule position to subtract.
+                This must not be -1.
+  Return: 0 on success, or OP_EINVAL if the result would not fit in a signed
+           64-bit integer.*/
+static int op_granpos_diff(ogg_int64_t *_delta,
+ ogg_int64_t _gp_a,ogg_int64_t _gp_b){
+  int gp_a_negative;
+  int gp_b_negative;
+  /*The code below handles these cases correctly, but there's no reason we
+     should ever be called with these values, so make sure we aren't.*/
+  OP_ASSERT(_gp_a!=-1);
+  OP_ASSERT(_gp_b!=-1);
+  gp_a_negative=OP_UNLIKELY(_gp_a<0);
+  gp_b_negative=OP_UNLIKELY(_gp_b<0);
+  if(OP_UNLIKELY(gp_a_negative^gp_b_negative)){
+    ogg_int64_t da;
+    ogg_int64_t db;
+    if(gp_a_negative){
+      /*_gp_a has wrapped to a negative value but _gp_b hasn't: the difference
+         should be positive.*/
+      /*Step 1: Handle wrapping.*/
+      /*_gp_a < 0 => da < 0.*/
+      da=(OP_INT64_MIN-_gp_a)-1;
+      /*_gp_b >= 0  => db >= 0.*/
+      db=OP_INT64_MAX-_gp_b;
+      /*Step 2: Check for overflow.*/
+      if(OP_UNLIKELY(OP_INT64_MAX+da<db))return OP_EINVAL;
+      *_delta=db-da;
+    }
+    else{
+      /*_gp_b has wrapped to a negative value but _gp_a hasn't: the difference
+         should be negative.*/
+      /*Step 1: Handle wrapping.*/
+      /*_gp_a >= 0 => da <= 0*/
+      da=_gp_a+OP_INT64_MIN;
+      /*_gp_b < 0 => db <= 0*/
+      db=OP_INT64_MIN-_gp_b;
+      /*Step 2: Check for overflow.*/
+      if(OP_UNLIKELY(da<OP_INT64_MIN-db))return OP_EINVAL;
+      *_delta=da+db;
+    }
+  }
+  else *_delta=_gp_a-_gp_b;
+  return 0;
+}
+
+static int op_granpos_cmp(ogg_int64_t _gp_a,ogg_int64_t _gp_b){
+  /*The invalid granule position -1 should behave like NaN: neither greater
+     than nor less than any other granule position, nor equal to any other
+     granule position, including itself.
+    However, that means there isn't anything we could sensibly return from this
+     function for it.*/
+  OP_ASSERT(_gp_a!=-1);
+  OP_ASSERT(_gp_b!=-1);
+  /*Handle the wrapping cases.*/
+  if(OP_UNLIKELY(_gp_a<0)){
+    if(_gp_b>=0)return 1;
+    /*Else fall through.*/
+  }
+  else if(OP_UNLIKELY(_gp_b<0))return -1;
+  /*No wrapping case.*/
+  return (_gp_a>_gp_b)-(_gp_b>_gp_a);
+}
+
+/*Returns the duration of the packet (in samples at 48 kHz), or a negative
+   value on error.*/
+static int op_get_packet_duration(const unsigned char *_data,int _len){
+  int nframes;
+  int frame_size;
+  int nsamples;
+  nframes=opus_packet_get_nb_frames(_data,_len);
+  if(OP_UNLIKELY(nframes<0))return OP_EBADPACKET;
+  frame_size=opus_packet_get_samples_per_frame(_data,48000);
+  nsamples=nframes*frame_size;
+  if(OP_UNLIKELY(nsamples>120*48))return OP_EBADPACKET;
+  return nsamples;
+}
+
+/*This function more properly belongs in info.c, but we define it here to allow
+   the static granule position manipulation functions to remain static.*/
+ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp){
+  opus_int32 pre_skip;
+  pre_skip=_head->pre_skip;
+  if(_gp!=-1&&op_granpos_add(&_gp,_gp,-pre_skip))_gp=-1;
+  return _gp;
+}
+
+/*Grab all the packets currently in the stream state, and compute their
+   durations.
+  _of->op_count is set to the number of packets collected.
+  [out] _durations: Returns the durations of the individual packets.
+  Return: The total duration of all packets, or OP_HOLE if there was a hole.*/
+static opus_int32 op_collect_audio_packets(OggOpusFile *_of,
+ int _durations[255]){
+  opus_int32 total_duration;
+  int        op_count;
+  /*Count the durations of all packets in the page.*/
+  op_count=0;
+  total_duration=0;
+  for(;;){
+    int ret;
+    /*This takes advantage of undocumented libogg behavior that returned
+       ogg_packet buffers are valid at least until the next page is
+       submitted.
+      Relying on this is not too terrible, as _none_ of the Ogg memory
+       ownership/lifetime rules are well-documented.
+      But I can read its code and know this will work.*/
+    ret=ogg_stream_packetout(&_of->os,_of->op+op_count);
+    if(!ret)break;
+    if(OP_UNLIKELY(ret<0)){
+      /*We shouldn't get holes in the middle of pages.*/
+      OP_ASSERT(op_count==0);
+      /*Set the return value and break out of the loop.
+        We want to make sure op_count gets set to 0, because we've ingested a
+         page, so any previously loaded packets are now invalid.*/
+      total_duration=OP_HOLE;
+      break;
+    }
+    /*Unless libogg is broken, we can't get more than 255 packets from a
+       single page.*/
+    OP_ASSERT(op_count<255);
+    _durations[op_count]=op_get_packet_duration(_of->op[op_count].packet,
+     _of->op[op_count].bytes);
+    if(OP_LIKELY(_durations[op_count]>0)){
+      /*With at most 255 packets on a page, this can't overflow.*/
+      total_duration+=_durations[op_count++];
+    }
+    /*Ignore packets with an invalid TOC sequence.*/
+    else if(op_count>0){
+      /*But save the granule position, if there was one.*/
+      _of->op[op_count-1].granulepos=_of->op[op_count].granulepos;
+    }
+  }
+  _of->op_pos=0;
+  _of->op_count=op_count;
+  return total_duration;
+}
+
+/*Starting from current cursor position, get the initial PCM offset of the next
+   page.
+  This also validates the granule position on the first page with a completed
+   audio data packet, as required by the spec.
+  If this link is completely empty (no pages with completed packets), then this
+   function sets pcm_start=pcm_end=0 and returns the BOS page of the next link
+   (if any).
+  In the seekable case, we initialize pcm_end=-1 before calling this function,
+   so that later we can detect that the link was empty before calling
+   op_find_final_pcm_offset().
+  [inout] _link: The link for which to find pcm_start.
+  [out] _og:     Returns the BOS page of the next link if this link was empty.
+                 In the unseekable case, we can then feed this to
+                  op_fetch_headers() to start the next link.
+                 The caller may pass NULL (e.g., for seekable streams), in
+                  which case this page will be discarded.
+  Return: 0 on success, 1 if there is a buffered BOS page available, or a
+           negative value on unrecoverable error.*/
+static int op_find_initial_pcm_offset(OggOpusFile *_of,
+ OggOpusLink *_link,ogg_page *_og){
+  ogg_page     og;
+  ogg_int64_t  pcm_start;
+  ogg_int64_t  prev_packet_gp;
+  ogg_int64_t  cur_page_gp;
+  ogg_uint32_t serialno;
+  opus_int32   total_duration;
+  int          durations[255];
+  int          cur_page_eos;
+  int          op_count;
+  int          pi;
+  if(_og==NULL)_og=&og;
+  serialno=_of->os.serialno;
+  op_count=0;
+  /*We shouldn't have to initialize total_duration, but gcc is too dumb to
+     figure out that op_count>0 implies we've been through the whole loop at
+     least once.*/
+  total_duration=0;
+  do{
+    opus_int64 llret;
+    llret=op_get_next_page(_of,_og,_of->end);
+    /*We should get a page unless the file is truncated or mangled.
+      Otherwise there are no audio data packets in the whole logical stream.*/
+    if(OP_UNLIKELY(llret<0)){
+      /*Fail if there was a read error.*/
+      if(llret<OP_FALSE)return (int)llret;
+      /*Fail if the pre-skip is non-zero, since it's asking us to skip more
+         samples than exist.*/
+      if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP;
+      /*Set pcm_end and end_offset so we can skip the call to
+         op_find_final_pcm_offset().*/
+      _link->pcm_start=_link->pcm_end=0;
+      _link->end_offset=_link->data_offset;
+      return 0;
+    }
+    /*Similarly, if we hit the next link in the chain, we've gone too far.*/
+    if(OP_UNLIKELY(ogg_page_bos(_og))){
+      if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP;
+      /*Set pcm_end and end_offset so we can skip the call to
+         op_find_final_pcm_offset().*/
+      _link->pcm_end=_link->pcm_start=0;
+      _link->end_offset=_link->data_offset;
+      /*Tell the caller we've got a buffered page for them.*/
+      return 1;
+    }
+    /*Ignore pages from other streams (not strictly necessary, because of the
+       checks in ogg_stream_pagein(), but saves some work).*/
+    if(serialno!=(ogg_uint32_t)ogg_page_serialno(_og))continue;
+    ogg_stream_pagein(&_of->os,_og);
+    /*Bitrate tracking: add the header's bytes here.
+      The body bytes are counted when we consume the packets.*/
+    _of->bytes_tracked+=_og->header_len;
+    /*Count the durations of all packets in the page.*/
+    do total_duration=op_collect_audio_packets(_of,durations);
+    /*Ignore holes.*/
+    while(OP_UNLIKELY(total_duration<0));
+    op_count=_of->op_count;
+  }
+  while(op_count<=0);
+  /*We found the first page with a completed audio data packet: actually look
+     at the granule position.
+    RFC 3533 says, "A special value of -1 (in two's complement) indicates that
+     no packets finish on this page," which does not say that a granule
+     position that is NOT -1 indicates that some packets DO finish on that page
+     (even though this was the intention, libogg itself violated this intention
+     for years before we fixed it).
+    The Ogg Opus specification only imposes its start-time requirements
+     on the granule position of the first page with completed packets,
+     so we ignore any set granule positions until then.*/
+  cur_page_gp=_of->op[op_count-1].granulepos;
+  /*But getting a packet without a valid granule position on the page is not
+     okay.*/
+  if(cur_page_gp==-1)return OP_EBADTIMESTAMP;
+  cur_page_eos=_of->op[op_count-1].e_o_s;
+  if(OP_LIKELY(!cur_page_eos)){
+    /*The EOS flag wasn't set.
+      Work backwards from the provided granule position to get the starting PCM
+       offset.*/
+    if(OP_UNLIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){
+      /*The starting granule position MUST not be smaller than the amount of
+         audio on the first page with completed packets.*/
+      return OP_EBADTIMESTAMP;
+    }
+  }
+  else{
+    /*The first page with completed packets was also the last.*/
+    if(OP_LIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){
+      /*If there's less audio on the page than indicated by the granule
+         position, then we're doing end-trimming, and the starting PCM offset
+         is zero by spec mandate.*/
+      pcm_start=0;
+      /*However, the end-trimming MUST not ask us to trim more samples than
+         exist after applying the pre-skip.*/
+      if(OP_UNLIKELY(op_granpos_cmp(cur_page_gp,_link->head.pre_skip)<0)){
+        return OP_EBADTIMESTAMP;
+      }
+    }
+  }
+  /*Timestamp the individual packets.*/
+  prev_packet_gp=pcm_start;
+  for(pi=0;pi<op_count;pi++){
+    if(cur_page_eos){
+      ogg_int64_t diff;
+      OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp));
+      diff=durations[pi]-diff;
+      /*If we have samples to trim...*/
+      if(diff>0){
+        /*If we trimmed the entire packet, stop (the spec says encoders
+           shouldn't do this, but we support it anyway).*/
+        if(OP_UNLIKELY(diff>durations[pi]))break;
+        _of->op[pi].granulepos=prev_packet_gp=cur_page_gp;
+        /*Move the EOS flag to this packet, if necessary, so we'll trim the
+           samples.*/
+        _of->op[pi].e_o_s=1;
+        continue;
+      }
+    }
+    /*Update the granule position as normal.*/
+    OP_ALWAYS_TRUE(!op_granpos_add(&_of->op[pi].granulepos,
+     prev_packet_gp,durations[pi]));
+    prev_packet_gp=_of->op[pi].granulepos;
+  }
+  /*Update the packet count after end-trimming.*/
+  _of->op_count=pi;
+  _of->cur_discard_count=_link->head.pre_skip;
+  _of->prev_packet_gp=_link->pcm_start=pcm_start;
+  return 0;
+}
+
+/*Starting from current cursor position, get the final PCM offset of the
+   previous page.
+  This also validates the duration of the link, which, while not strictly
+   required by the spec, we need to ensure duration calculations don't
+   overflow.
+  This is only done for seekable sources.
+  We must validate that op_find_initial_pcm_offset() succeeded for this link
+   before calling this function, otherwise it will scan the entire stream
+   backwards until it reaches the start, and then fail.*/
+static int op_find_final_pcm_offset(OggOpusFile *_of,
+ const ogg_uint32_t *_serialnos,int _nserialnos,OggOpusLink *_link,
+ opus_int64 _offset,ogg_uint32_t _end_serialno,ogg_int64_t _end_gp,
+ ogg_int64_t *_total_duration){
+  ogg_int64_t  total_duration;
+  ogg_int64_t  duration;
+  ogg_uint32_t cur_serialno;
+  /*For the time being, fetch end PCM offset the simple way.*/
+  cur_serialno=_link->serialno;
+  if(_end_serialno!=cur_serialno||_end_gp==-1){
+    _offset=op_get_last_page(_of,&_end_gp,_offset,
+     cur_serialno,_serialnos,_nserialnos);
+    if(OP_UNLIKELY(_offset<0))return (int)_offset;
+  }
+  /*At worst we should have found the first page with completed packets.*/
+  if(OP_UNLIKELY(_offset<_link->data_offset))return OP_EBADLINK;
+  /*This implementation requires that the difference between the first and last
+     granule positions in each link be representable in a signed, 64-bit
+     number, and that each link also have at least as many samples as the
+     pre-skip requires.*/
+  if(OP_UNLIKELY(op_granpos_diff(&duration,_end_gp,_link->pcm_start)<0)
+   ||OP_UNLIKELY(duration<_link->head.pre_skip)){
+    return OP_EBADTIMESTAMP;
+  }
+  /*We also require that the total duration be representable in a signed,
+     64-bit number.*/
+  duration-=_link->head.pre_skip;
+  total_duration=*_total_duration;
+  if(OP_UNLIKELY(OP_INT64_MAX-duration<total_duration))return OP_EBADTIMESTAMP;
+  *_total_duration=total_duration+duration;
+  _link->pcm_end=_end_gp;
+  _link->end_offset=_offset;
+  return 0;
+}
+
+/*Rescale the number _x from the range [0,_from] to [0,_to].
+  _from and _to must be positive.*/
+static opus_int64 op_rescale64(opus_int64 _x,opus_int64 _from,opus_int64 _to){
+  opus_int64 frac;
+  opus_int64 ret;
+  int        i;
+  if(_x>=_from)return _to;
+  if(_x<=0)return 0;
+  frac=0;
+  for(i=0;i<63;i++){
+    frac<<=1;
+    OP_ASSERT(_x<=_from);
+    if(_x>=_from>>1){
+      _x-=_from-_x;
+      frac|=1;
+    }
+    else _x<<=1;
+  }
+  ret=0;
+  for(i=0;i<63;i++){
+    if(frac&1)ret=(ret&_to&1)+(ret>>1)+(_to>>1);
+    else ret>>=1;
+    frac>>=1;
+  }
+  return ret;
+}
+
+/*The minimum granule position spacing allowed for making predictions.
+  This corresponds to about 1 second of audio at 48 kHz for both Opus and
+   Vorbis, or one keyframe interval in Theora with the default keyframe spacing
+   of 256.*/
+#define OP_GP_SPACING_MIN (48000)
+
+/*Try to estimate the location of the next link using the current seek
+   records, assuming the initial granule position of any streams we've found is
+   0.*/
+static opus_int64 op_predict_link_start(const OpusSeekRecord *_sr,int _nsr,
+ opus_int64 _searched,opus_int64 _end_searched,opus_int32 _bias){
+  opus_int64 bisect;
+  int        sri;
+  int        srj;
+  /*Require that we be at least OP_CHUNK_SIZE from the end.
+    We don't require that we be at least OP_CHUNK_SIZE from the beginning,
+     because if we are we'll just scan forward without seeking.*/
+  _end_searched-=OP_CHUNK_SIZE;
+  if(_searched>=_end_searched)return -1;
+  bisect=_end_searched;
+  for(sri=0;sri<_nsr;sri++){
+    ogg_int64_t  gp1;
+    ogg_int64_t  gp2_min;
+    ogg_uint32_t serialno1;
+    opus_int64   offset1;
+    /*If the granule position is negative, either it's invalid or we'd cause
+       overflow.*/
+    gp1=_sr[sri].gp;
+    if(gp1<0)continue;
+    /*We require some minimum distance between granule positions to make an
+       estimate.
+      We don't actually know what granule position scheme is being used,
+       because we have no idea what kind of stream these came from.
+      Therefore we require a minimum spacing between them, with the
+       expectation that while bitrates and granule position increments might
+       vary locally in quite complex ways, they are globally smooth.*/
+    if(OP_UNLIKELY(op_granpos_add(&gp2_min,gp1,OP_GP_SPACING_MIN)<0)){
+      /*No granule position would satisfy us.*/
+      continue;
+    }
+    offset1=_sr[sri].offset;
+    serialno1=_sr[sri].serialno;
+    for(srj=sri;srj-->0;){
+      ogg_int64_t gp2;
+      opus_int64  offset2;
+      opus_int64  num;
+      ogg_int64_t den;
+      ogg_int64_t ipart;
+      gp2=_sr[srj].gp;
+      if(gp2<gp2_min)continue;
+      /*Oh, and also make sure these came from the same stream.*/
+      if(_sr[srj].serialno!=serialno1)continue;
+      offset2=_sr[srj].offset;
+      /*For once, we can subtract with impunity.*/
+      den=gp2-gp1;
+      ipart=gp2/den;
+      num=offset2-offset1;
+      OP_ASSERT(num>0);
+      if(ipart>0&&(offset2-_searched)/ipart<num)continue;
+      offset2-=ipart*num;
+      gp2-=ipart*den;
+      offset2-=op_rescale64(gp2,den,num)-_bias;
+      if(offset2<_searched)continue;
+      bisect=OP_MIN(bisect,offset2);
+      break;
+    }
+  }
+  return bisect>=_end_searched?-1:bisect;
+}
+
+/*Finds each bitstream link, one at a time, using a bisection search.
+  This has to begin by knowing the offset of the first link's initial page.*/
+static int op_bisect_forward_serialno(OggOpusFile *_of,
+ opus_int64 _searched,OpusSeekRecord *_sr,int _csr,
+ ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){
+  ogg_page      og;
+  OggOpusLink  *links;
+  int           nlinks;
+  int           clinks;
+  ogg_uint32_t *serialnos;
+  int           nserialnos;
+  ogg_int64_t   total_duration;
+  int           nsr;
+  int           ret;
+  links=_of->links;
+  nlinks=clinks=_of->nlinks;
+  total_duration=0;
+  /*We start with one seek record, for the last page in the file.
+    We build up a list of records for places we seek to during link
+     enumeration.
+    This list is kept sorted in reverse order.
+    We only care about seek locations that were _not_ in the current link,
+     therefore we can add them one at a time to the end of the list as we
+     improve the lower bound on the location where the next link starts.*/
+  nsr=1;
+  for(;;){
+    opus_int64  end_searched;
+    opus_int64  bisect;
+    opus_int64  next;
+    opus_int64  last;
+    ogg_int64_t end_offset;
+    ogg_int64_t end_gp;
+    int         sri;
+    serialnos=*_serialnos;
+    nserialnos=*_nserialnos;
+    if(OP_UNLIKELY(nlinks>=clinks)){
+      if(OP_UNLIKELY(clinks>INT_MAX-1>>1))return OP_EFAULT;
+      clinks=2*clinks+1;
+      OP_ASSERT(nlinks<clinks);
+      links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*clinks);
+      if(OP_UNLIKELY(links==NULL))return OP_EFAULT;
+      _of->links=links;
+    }
+    /*Invariants:
+      We have the headers and serial numbers for the link beginning at 'begin'.
+      We have the offset and granule position of the last page in the file
+       (potentially not a page we care about).*/
+    /*Scan the seek records we already have to save us some bisection.*/
+    for(sri=0;sri<nsr;sri++){
+      if(op_lookup_serialno(_sr[sri].serialno,serialnos,nserialnos))break;
+    }
+    /*Is the last page in our current list of serial numbers?*/
+    if(sri<=0)break;
+    /*Last page wasn't found.
+      We have at least one more link.*/
+    last=-1;
+    end_searched=_sr[sri-1].search_start;
+    next=_sr[sri-1].offset;
+    end_gp=-1;
+    if(sri<nsr){
+      _searched=_sr[sri].offset+_sr[sri].size;
+      if(_sr[sri].serialno==links[nlinks-1].serialno){
+        end_gp=_sr[sri].gp;
+        end_offset=_sr[sri].offset;
+      }
+    }
+    nsr=sri;
+    bisect=-1;
+    /*If we've already found the end of at least one link, try to pick the
+       first bisection point at twice the average link size.
+      This is a good choice for files with lots of links that are all about the
+       same size.*/
+    if(nlinks>1){
+      opus_int64 last_offset;
+      opus_int64 avg_link_size;
+      opus_int64 upper_limit;
+      last_offset=links[nlinks-1].offset;
+      avg_link_size=last_offset/(nlinks-1);
+      upper_limit=end_searched-OP_CHUNK_SIZE-avg_link_size;
+      if(OP_LIKELY(last_offset>_searched-avg_link_size)
+       &&OP_LIKELY(last_offset<upper_limit)){
+        bisect=last_offset+avg_link_size;
+        if(OP_LIKELY(bisect<upper_limit))bisect+=avg_link_size;
+      }
+    }
+    /*We guard against garbage separating the last and first pages of two
+       links below.*/
+    while(_searched<end_searched){
+      opus_int32 next_bias;
+      /*If we don't have a better estimate, use simple bisection.*/
+      if(bisect==-1)bisect=_searched+(end_searched-_searched>>1);
+      /*If we're within OP_CHUNK_SIZE of the start, scan forward.*/
+      if(bisect-_searched<OP_CHUNK_SIZE)bisect=_searched;
+      /*Otherwise we're skipping data.
+        Forget the end page, if we saw one, as we might miss a later one.*/
+      else end_gp=-1;
+      ret=op_seek_helper(_of,bisect);
+      if(OP_UNLIKELY(ret<0))return ret;
+      last=op_get_next_page(_of,&og,_sr[nsr-1].offset);
+      if(OP_UNLIKELY(last<OP_FALSE))return (int)last;
+      next_bias=0;
+      if(last==OP_FALSE)end_searched=bisect;
+      else{
+        ogg_uint32_t serialno;
+        ogg_int64_t  gp;
+        serialno=ogg_page_serialno(&og);
+        gp=ogg_page_granulepos(&og);
+        if(!op_lookup_serialno(serialno,serialnos,nserialnos)){
+          end_searched=bisect;
+          next=last;
+          /*In reality we should always have enough room, but be paranoid.*/
+          if(OP_LIKELY(nsr<_csr)){
+            _sr[nsr].search_start=bisect;
+            _sr[nsr].offset=last;
+            OP_ASSERT(_of->offset-last>=0);
+            OP_ASSERT(_of->offset-last<=OP_PAGE_SIZE_MAX);
+            _sr[nsr].size=(opus_int32)(_of->offset-last);
+            _sr[nsr].serialno=serialno;
+            _sr[nsr].gp=gp;
+            nsr++;
+          }
+        }
+        else{
+          _searched=_of->offset;
+          next_bias=OP_CHUNK_SIZE;
+          if(serialno==links[nlinks-1].serialno){
+            /*This page was from the stream we want, remember it.
+              If it's the last such page in the link, we won't have to go back
+               looking for it later.*/
+            end_gp=gp;
+            end_offset=last;
+          }
+        }
+      }
+      bisect=op_predict_link_start(_sr,nsr,_searched,end_searched,next_bias);
+    }
+    /*Bisection point found.
+      Get the final granule position of the previous link, assuming
+       op_find_initial_pcm_offset() didn't already determine the link was
+       empty.*/
+    if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){
+      if(end_gp==-1){
+        /*If we don't know where the end page is, we'll have to seek back and
+           look for it, starting from the end of the link.*/
+        end_offset=next;
+        /*Also forget the last page we read.
+          It won't be available after the seek.*/
+        last=-1;
+      }
+      ret=op_find_final_pcm_offset(_of,serialnos,nserialnos,
+       links+nlinks-1,end_offset,links[nlinks-1].serialno,end_gp,
+       &total_duration);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    if(last!=next){
+      /*The last page we read was not the first page the next link.
+        Move the cursor position to the offset of that first page.
+        This only performs an actual seek if the first page of the next link
+         does not start at the end of the last page from the current Opus
+         stream with a valid granule position.*/
+      ret=op_seek_helper(_of,next);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    ret=op_fetch_headers(_of,&links[nlinks].head,&links[nlinks].tags,
+     _serialnos,_nserialnos,_cserialnos,last!=next?NULL:&og);
+    if(OP_UNLIKELY(ret<0))return ret;
+    links[nlinks].offset=next;
+    links[nlinks].data_offset=_of->offset;
+    links[nlinks].serialno=_of->os.serialno;
+    links[nlinks].pcm_end=-1;
+    /*This might consume a page from the next link, however the next bisection
+       always starts with a seek.*/
+    ret=op_find_initial_pcm_offset(_of,links+nlinks,NULL);
+    if(OP_UNLIKELY(ret<0))return ret;
+    _searched=_of->offset;
+    /*Mark the current link count so it can be cleaned up on error.*/
+    _of->nlinks=++nlinks;
+  }
+  /*Last page is in the starting serialno list, so we've reached the last link.
+    Now find the last granule position for it (if we didn't the first time we
+     looked at the end of the stream, and if op_find_initial_pcm_offset()
+     didn't already determine the link was empty).*/
+  if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){
+    ret=op_find_final_pcm_offset(_of,serialnos,nserialnos,
+     links+nlinks-1,_sr[0].offset,_sr[0].serialno,_sr[0].gp,&total_duration);
+    if(OP_UNLIKELY(ret<0))return ret;
+  }
+  /*Trim back the links array if necessary.*/
+  links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*nlinks);
+  if(OP_LIKELY(links!=NULL))_of->links=links;
+  /*We also don't need these anymore.*/
+  _ogg_free(*_serialnos);
+  *_serialnos=NULL;
+  *_cserialnos=*_nserialnos=0;
+  return 0;
+}
+
+static void op_update_gain(OggOpusFile *_of){
+  OpusHead   *head;
+  opus_int32  gain_q8;
+  int         li;
+  /*If decode isn't ready, then we'll apply the gain when we initialize the
+     decoder.*/
+  if(_of->ready_state<OP_INITSET)return;
+  gain_q8=_of->gain_offset_q8;
+  li=_of->seekable?_of->cur_link:0;
+  head=&_of->links[li].head;
+  /*We don't have to worry about overflow here because the header gain and
+     track gain must lie in the range [-32768,32767], and the user-supplied
+     offset has been pre-clamped to [-98302,98303].*/
+  switch(_of->gain_type){
+    case OP_TRACK_GAIN:{
+      int track_gain_q8;
+      track_gain_q8=0;
+      opus_tags_get_track_gain(&_of->links[li].tags,&track_gain_q8);
+      gain_q8+=track_gain_q8;
+    }
+    /*Fall through.*/
+    case OP_HEADER_GAIN:gain_q8+=head->output_gain;break;
+    case OP_ABSOLUTE_GAIN:break;
+    default:OP_ASSERT(0);
+  }
+  gain_q8=OP_CLAMP(-32768,gain_q8,32767);
+  OP_ASSERT(_of->od!=NULL);
+#if defined(OPUS_SET_GAIN)
+  opus_multistream_decoder_ctl(_of->od,OPUS_SET_GAIN(gain_q8));
+#else
+/*A fallback that works with both float and fixed-point is a bunch of work,
+   so just force people to use a sufficiently new version.
+  This is deployed well enough at this point that this shouldn't be a burden.*/
+# error "libopus 1.0.1 or later required"
+#endif
+}
+
+static int op_make_decode_ready(OggOpusFile *_of){
+  const OpusHead *head;
+  int             li;
+  int             stream_count;
+  int             coupled_count;
+  int             channel_count;
+  if(_of->ready_state>OP_STREAMSET)return 0;
+  if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET))return OP_EFAULT;
+  li=_of->seekable?_of->cur_link:0;
+  head=&_of->links[li].head;
+  stream_count=head->stream_count;
+  coupled_count=head->coupled_count;
+  channel_count=head->channel_count;
+  /*Check to see if the current decoder is compatible with the current link.*/
+  if(_of->od!=NULL&&_of->od_stream_count==stream_count
+   &&_of->od_coupled_count==coupled_count&&_of->od_channel_count==channel_count
+   &&memcmp(_of->od_mapping,head->mapping,
+   sizeof(*head->mapping)*channel_count)==0){
+    opus_multistream_decoder_ctl(_of->od,OPUS_RESET_STATE);
+  }
+  else{
+    int err;
+    opus_multistream_decoder_destroy(_of->od);
+    _of->od=opus_multistream_decoder_create(48000,channel_count,
+     stream_count,coupled_count,head->mapping,&err);
+    if(_of->od==NULL)return OP_EFAULT;
+    _of->od_stream_count=stream_count;
+    _of->od_coupled_count=coupled_count;
+    _of->od_channel_count=channel_count;
+    memcpy(_of->od_mapping,head->mapping,sizeof(*head->mapping)*channel_count);
+  }
+  _of->ready_state=OP_INITSET;
+  _of->bytes_tracked=0;
+  _of->samples_tracked=0;
+#if !defined(OP_FIXED_POINT)
+  _of->state_channel_count=0;
+  /*Use the serial number for the PRNG seed to get repeatable output for
+     straight play-throughs.*/
+  _of->dither_seed=_of->links[li].serialno;
+#endif
+  op_update_gain(_of);
+  return 0;
+}
+
+static int op_open_seekable2_impl(OggOpusFile *_of){
+  /*64 seek records should be enough for anybody.
+    Actually, with a bisection search in a 63-bit range down to OP_CHUNK_SIZE
+     granularity, much more than enough.*/
+  OpusSeekRecord sr[64];
+  opus_int64     data_offset;
+  int            ret;
+  /*We can seek, so set out learning all about this file.*/
+  (*_of->callbacks.seek)(_of->source,0,SEEK_END);
+  _of->offset=_of->end=(*_of->callbacks.tell)(_of->source);
+  if(OP_UNLIKELY(_of->end<0))return OP_EREAD;
+  data_offset=_of->links[0].data_offset;
+  if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK;
+  /*Get the offset of the last page of the physical bitstream, or, if we're
+     lucky, the last Opus page of the first link, as most Ogg Opus files will
+     contain a single logical bitstream.*/
+  ret=op_get_prev_page_serial(_of,sr,_of->end,
+   _of->links[0].serialno,_of->serialnos,_of->nserialnos);
+  if(OP_UNLIKELY(ret<0))return ret;
+  /*If there's any trailing junk, forget about it.*/
+  _of->end=sr[0].offset+sr[0].size;
+  if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK;
+  /*Now enumerate the bitstream structure.*/
+  return op_bisect_forward_serialno(_of,data_offset,sr,sizeof(sr)/sizeof(*sr),
+   &_of->serialnos,&_of->nserialnos,&_of->cserialnos);
+}
+
+static int op_open_seekable2(OggOpusFile *_of){
+  ogg_sync_state    oy_start;
+  ogg_stream_state  os_start;
+  ogg_packet       *op_start;
+  opus_int64        start_offset;
+  int               start_op_count;
+  int               ret;
+  /*We're partially open and have a first link header state in storage in _of.
+    Save off that stream state so we can come back to it.
+    It would be simpler to just dump all this state and seek back to
+     links[0].data_offset when we're done.
+    But we do the extra work to allow us to seek back to _exactly_ the same
+     stream position we're at now.
+    This allows, e.g., the HTTP backend to continue reading from the original
+     connection (if it's still available), instead of opening a new one.
+    This means we can open and start playing a normal Opus file with a single
+     link and reasonable packet sizes using only two HTTP requests.*/
+  start_op_count=_of->op_count;
+  /*This is a bit too large to put on the stack unconditionally.*/
+  op_start=(ogg_packet *)_ogg_malloc(sizeof(*op_start)*start_op_count);
+  if(op_start==NULL)return OP_EFAULT;
+  *&oy_start=_of->oy;
+  *&os_start=_of->os;
+  start_offset=_of->offset;
+  memcpy(op_start,_of->op,sizeof(*op_start)*start_op_count);
+  OP_ASSERT((*_of->callbacks.tell)(_of->source)==op_position(_of));
+  ogg_sync_init(&_of->oy);
+  ogg_stream_init(&_of->os,-1);
+  ret=op_open_seekable2_impl(_of);
+  /*Restore the old stream state.*/
+  ogg_stream_clear(&_of->os);
+  ogg_sync_clear(&_of->oy);
+  *&_of->oy=*&oy_start;
+  *&_of->os=*&os_start;
+  _of->offset=start_offset;
+  _of->op_count=start_op_count;
+  memcpy(_of->op,op_start,sizeof(*_of->op)*start_op_count);
+  _ogg_free(op_start);
+  _of->prev_packet_gp=_of->links[0].pcm_start;
+  _of->cur_discard_count=_of->links[0].head.pre_skip;
+  if(OP_UNLIKELY(ret<0))return ret;
+  /*And restore the position indicator.*/
+  ret=(*_of->callbacks.seek)(_of->source,op_position(_of),SEEK_SET);
+  return OP_UNLIKELY(ret<0)?OP_EREAD:0;
+}
+
+/*Clear out the current logical bitstream decoder.*/
+static void op_decode_clear(OggOpusFile *_of){
+  /*We don't actually free the decoder.
+    We might be able to re-use it for the next link.*/
+  _of->op_count=0;
+  _of->od_buffer_size=0;
+  _of->prev_packet_gp=-1;
+  if(!_of->seekable){
+    OP_ASSERT(_of->ready_state>=OP_INITSET);
+    opus_tags_clear(&_of->links[0].tags);
+  }
+  _of->ready_state=OP_OPENED;
+}
+
+static void op_clear(OggOpusFile *_of){
+  OggOpusLink *links;
+  _ogg_free(_of->od_buffer);
+  if(_of->od!=NULL)opus_multistream_decoder_destroy(_of->od);
+  links=_of->links;
+  if(!_of->seekable){
+    if(_of->ready_state>OP_OPENED||_of->ready_state==OP_PARTOPEN){
+      opus_tags_clear(&links[0].tags);
+    }
+  }
+  else if(OP_LIKELY(links!=NULL)){
+    int nlinks;
+    int link;
+    nlinks=_of->nlinks;
+    for(link=0;link<nlinks;link++)opus_tags_clear(&links[link].tags);
+  }
+  _ogg_free(links);
+  _ogg_free(_of->serialnos);
+  ogg_stream_clear(&_of->os);
+  ogg_sync_clear(&_of->oy);
+  if(_of->callbacks.close!=NULL)(*_of->callbacks.close)(_of->source);
+}
+
+static int op_open1(OggOpusFile *_of,
+ void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes){
+  ogg_page  og;
+  ogg_page *pog;
+  int       seekable;
+  int       ret;
+  memset(_of,0,sizeof(*_of));
+  _of->end=-1;
+  _of->source=_source;
+  *&_of->callbacks=*_cb;
+  /*At a minimum, we need to be able to read data.*/
+  if(OP_UNLIKELY(_of->callbacks.read==NULL))return OP_EREAD;
+  /*Initialize the framing state.*/
+  ogg_sync_init(&_of->oy);
+  /*Perhaps some data was previously read into a buffer for testing against
+     other stream types.
+    Allow initialization from this previously read data (especially as we may
+     be reading from a non-seekable stream).
+    This requires copying it into a buffer allocated by ogg_sync_buffer() and
+     doesn't support seeking, so this is not a good mechanism to use for
+     decoding entire files from RAM.*/
+  if(_initial_bytes>0){
+    char *buffer;
+    buffer=ogg_sync_buffer(&_of->oy,_initial_bytes);
+    memcpy(buffer,_initial_data,_initial_bytes*sizeof(*buffer));
+    ogg_sync_wrote(&_of->oy,_initial_bytes);
+  }
+  /*Can we seek?
+    Stevens suggests the seek test is portable.*/
+  seekable=_cb->seek!=NULL&&(*_cb->seek)(_source,0,SEEK_CUR)!=-1;
+  /*If seek is implemented, tell must also be implemented.*/
+  if(seekable){
+    opus_int64 pos;
+    if(OP_UNLIKELY(_of->callbacks.tell==NULL))return OP_EINVAL;
+    pos=(*_of->callbacks.tell)(_of->source);
+    /*If the current position is not equal to the initial bytes consumed,
+       absolute seeking will not work.*/
+    if(OP_UNLIKELY(pos!=(opus_int64)_initial_bytes))return OP_EINVAL;
+  }
+  _of->seekable=seekable;
+  /*Don't seek yet.
+    Set up a 'single' (current) logical bitstream entry for partial open.*/
+  _of->links=(OggOpusLink *)_ogg_malloc(sizeof(*_of->links));
+  /*The serialno gets filled in later by op_fetch_headers().*/
+  ogg_stream_init(&_of->os,-1);
+  pog=NULL;
+  for(;;){
+    /*Fetch all BOS pages, store the Opus header and all seen serial numbers,
+      and load subsequent Opus setup headers.*/
+    ret=op_fetch_headers(_of,&_of->links[0].head,&_of->links[0].tags,
+     &_of->serialnos,&_of->nserialnos,&_of->cserialnos,pog);
+    if(OP_UNLIKELY(ret<0))break;
+    _of->nlinks=1;
+    _of->links[0].offset=0;
+    _of->links[0].data_offset=_of->offset;
+    _of->links[0].pcm_end=-1;
+    _of->links[0].serialno=_of->os.serialno;
+    /*Fetch the initial PCM offset.*/
+    ret=op_find_initial_pcm_offset(_of,_of->links,&og);
+    if(seekable||OP_LIKELY(ret<=0))break;
+    /*This link was empty, but we already have the BOS page for the next one in
+       og.
+      We can't seek, so start processing the next link right now.*/
+    opus_tags_clear(&_of->links[0].tags);
+    _of->nlinks=0;
+    if(!seekable)_of->cur_link++;
+    pog=&og;
+  }
+  if(OP_LIKELY(ret>=0))_of->ready_state=OP_PARTOPEN;
+  return ret;
+}
+
+static int op_open2(OggOpusFile *_of){
+  int ret;
+  OP_ASSERT(_of->ready_state==OP_PARTOPEN);
+  if(_of->seekable){
+    _of->ready_state=OP_OPENED;
+    ret=op_open_seekable2(_of);
+  }
+  else ret=0;
+  if(OP_LIKELY(ret>=0)){
+    /*We have buffered packets from op_find_initial_pcm_offset().
+      Move to OP_INITSET so we can use them.*/
+    _of->ready_state=OP_STREAMSET;
+    ret=op_make_decode_ready(_of);
+    if(OP_LIKELY(ret>=0))return 0;
+  }
+  /*Don't auto-close the stream on failure.*/
+  _of->callbacks.close=NULL;
+  op_clear(_of);
+  return ret;
+}
+
+OggOpusFile *op_test_callbacks(void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes,int *_error){
+  OggOpusFile *of;
+  int          ret;
+  of=(OggOpusFile *)_ogg_malloc(sizeof(*of));
+  ret=OP_EFAULT;
+  if(OP_LIKELY(of!=NULL)){
+    ret=op_open1(of,_source,_cb,_initial_data,_initial_bytes);
+    if(OP_LIKELY(ret>=0)){
+      if(_error!=NULL)*_error=0;
+      return of;
+    }
+    /*Don't auto-close the stream on failure.*/
+    of->callbacks.close=NULL;
+    op_clear(of);
+    _ogg_free(of);
+  }
+  if(_error!=NULL)*_error=ret;
+  return NULL;
+}
+
+OggOpusFile *op_open_callbacks(void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes,int *_error){
+  OggOpusFile *of;
+  of=op_test_callbacks(_source,_cb,_initial_data,_initial_bytes,_error);
+  if(OP_LIKELY(of!=NULL)){
+    int ret;
+    ret=op_open2(of);
+    if(OP_LIKELY(ret>=0))return of;
+    if(_error!=NULL)*_error=ret;
+    _ogg_free(of);
+  }
+  return NULL;
+}
+
+/*Convenience routine to clean up from failure for the open functions that
+   create their own streams.*/
+static OggOpusFile *op_open_close_on_failure(void *_source,
+ const OpusFileCallbacks *_cb,int *_error){
+  OggOpusFile *of;
+  if(OP_UNLIKELY(_source==NULL)){
+    if(_error!=NULL)*_error=OP_EFAULT;
+    return NULL;
+  }
+  of=op_open_callbacks(_source,_cb,NULL,0,_error);
+  if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source);
+  return of;
+}
+
+OggOpusFile *op_open_file(const char *_path,int *_error){
+  OpusFileCallbacks cb;
+  return op_open_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error);
+}
+
+OggOpusFile *op_open_memory(const unsigned char *_data,size_t _size,
+ int *_error){
+  OpusFileCallbacks cb;
+  return op_open_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb,
+   _error);
+}
+
+/*Convenience routine to clean up from failure for the open functions that
+   create their own streams.*/
+static OggOpusFile *op_test_close_on_failure(void *_source,
+ const OpusFileCallbacks *_cb,int *_error){
+  OggOpusFile *of;
+  if(OP_UNLIKELY(_source==NULL)){
+    if(_error!=NULL)*_error=OP_EFAULT;
+    return NULL;
+  }
+  of=op_test_callbacks(_source,_cb,NULL,0,_error);
+  if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source);
+  return of;
+}
+
+OggOpusFile *op_test_file(const char *_path,int *_error){
+  OpusFileCallbacks cb;
+  return op_test_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error);
+}
+
+OggOpusFile *op_test_memory(const unsigned char *_data,size_t _size,
+ int *_error){
+  OpusFileCallbacks cb;
+  return op_test_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb,
+   _error);
+}
+
+int op_test_open(OggOpusFile *_of){
+  int ret;
+  if(OP_UNLIKELY(_of->ready_state!=OP_PARTOPEN))return OP_EINVAL;
+  ret=op_open2(_of);
+  /*op_open2() will clear this structure on failure.
+    Reset its contents to prevent double-frees in op_free().*/
+  if(OP_UNLIKELY(ret<0))memset(_of,0,sizeof(*_of));
+  return ret;
+}
+
+void op_free(OggOpusFile *_of){
+  if(OP_LIKELY(_of!=NULL)){
+    op_clear(_of);
+    _ogg_free(_of);
+  }
+}
+
+int op_seekable(const OggOpusFile *_of){
+  return _of->seekable;
+}
+
+int op_link_count(const OggOpusFile *_of){
+  return _of->nlinks;
+}
+
+ogg_uint32_t op_serialno(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+  if(!_of->seekable)_li=0;
+  return _of->links[_li<0?_of->cur_link:_li].serialno;
+}
+
+int op_channel_count(const OggOpusFile *_of,int _li){
+  return op_head(_of,_li)->channel_count;
+}
+
+opus_int64 op_raw_total(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED)
+   ||OP_UNLIKELY(!_of->seekable)
+   ||OP_UNLIKELY(_li>=_of->nlinks)){
+    return OP_EINVAL;
+  }
+  if(_li<0)return _of->end-_of->links[0].offset;
+  return (_li+1>=_of->nlinks?_of->end:_of->links[_li+1].offset)
+   -_of->links[_li].offset;
+}
+
+ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li){
+  OggOpusLink *links;
+  ogg_int64_t  diff;
+  int          nlinks;
+  nlinks=_of->nlinks;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED)
+   ||OP_UNLIKELY(!_of->seekable)
+   ||OP_UNLIKELY(_li>=nlinks)){
+    return OP_EINVAL;
+  }
+  links=_of->links;
+  /*We verify that the granule position differences are larger than the
+     pre-skip and that the total duration does not overflow during link
+     enumeration, so we don't have to check here.*/
+  if(_li<0){
+    ogg_int64_t pcm_total;
+    int         li;
+    pcm_total=0;
+    for(li=0;li<nlinks;li++){
+      OP_ALWAYS_TRUE(!op_granpos_diff(&diff,
+       links[li].pcm_end,links[li].pcm_start));
+      pcm_total+=diff-links[li].head.pre_skip;
+    }
+    return pcm_total;
+  }
+  OP_ALWAYS_TRUE(!op_granpos_diff(&diff,
+   links[_li].pcm_end,links[_li].pcm_start));
+  return diff-links[_li].head.pre_skip;
+}
+
+const OpusHead *op_head(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+  if(!_of->seekable)_li=0;
+  return &_of->links[_li<0?_of->cur_link:_li].head;
+}
+
+const OpusTags *op_tags(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+  if(!_of->seekable){
+    if(_of->ready_state<OP_STREAMSET&&_of->ready_state!=OP_PARTOPEN){
+      return NULL;
+    }
+    _li=0;
+  }
+  else if(_li<0)_li=_of->ready_state>=OP_STREAMSET?_of->cur_link:0;
+  return &_of->links[_li].tags;
+}
+
+int op_current_link(const OggOpusFile *_of){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  return _of->cur_link;
+}
+
+/*Compute an average bitrate given a byte and sample count.
+  Return: The bitrate in bits per second.*/
+static opus_int32 op_calc_bitrate(opus_int64 _bytes,ogg_int64_t _samples){
+  /*These rates are absurd, but let's handle them anyway.*/
+  if(OP_UNLIKELY(_bytes>(OP_INT64_MAX-(_samples>>1))/(48000*8))){
+    ogg_int64_t den;
+    if(OP_UNLIKELY(_bytes/(OP_INT32_MAX/(48000*8))>=_samples)){
+      return OP_INT32_MAX;
+    }
+    den=_samples/(48000*8);
+    return (opus_int32)((_bytes+(den>>1))/den);
+  }
+  if(OP_UNLIKELY(_samples<=0))return OP_INT32_MAX;
+  /*This can't actually overflow in normal operation: even with a pre-skip of
+     545 2.5 ms frames with 8 streams running at 1282*8+1 bytes per packet
+     (1275 byte frames + Opus framing overhead + Ogg lacing values), that all
+     produce a single sample of decoded output, we still don't top 45 Mbps.
+    The only way to get bitrates larger than that is with excessive Opus
+     padding, more encoded streams than output channels, or lots and lots of
+     Ogg pages with no packets on them.*/
+  return (opus_int32)OP_MIN((_bytes*48000*8+(_samples>>1))/_samples,
+   OP_INT32_MAX);
+}
+
+opus_int32 op_bitrate(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED)||OP_UNLIKELY(!_of->seekable)
+   ||OP_UNLIKELY(_li>=_of->nlinks)){
+    return OP_EINVAL;
+  }
+  return op_calc_bitrate(op_raw_total(_of,_li),op_pcm_total(_of,_li));
+}
+
+opus_int32 op_bitrate_instant(OggOpusFile *_of){
+  ogg_int64_t samples_tracked;
+  opus_int32  ret;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  samples_tracked=_of->samples_tracked;
+  if(OP_UNLIKELY(samples_tracked==0))return OP_FALSE;
+  ret=op_calc_bitrate(_of->bytes_tracked,samples_tracked);
+  _of->bytes_tracked=0;
+  _of->samples_tracked=0;
+  return ret;
+}
+
+/*Fetch and process a page.
+  This handles the case where we're at a bitstream boundary and dumps the
+   decoding machine.
+  If the decoding machine is unloaded, it loads it.
+  It also keeps prev_packet_gp up to date (seek and read both use this; seek
+   uses a special hack with _readp).
+  Return: <0) Error, OP_HOLE (lost packet), or OP_EOF.
+           0) Need more data (only if _readp==0).
+           1) Got at least one audio data packet.*/
+static int op_fetch_and_process_page(OggOpusFile *_of,
+ ogg_page *_og,opus_int64 _page_pos,int _readp,int _spanp,int _ignore_holes){
+  OggOpusLink  *links;
+  ogg_uint32_t  cur_serialno;
+  int           seekable;
+  int           cur_link;
+  int           ret;
+  /*We shouldn't get here if we have unprocessed packets.*/
+  OP_ASSERT(_of->ready_state<OP_INITSET||_of->op_pos>=_of->op_count);
+  if(!_readp)return 0;
+  seekable=_of->seekable;
+  links=_of->links;
+  cur_link=seekable?_of->cur_link:0;
+  cur_serialno=links[cur_link].serialno;
+  /*Handle one page.*/
+  for(;;){
+    ogg_page og;
+    OP_ASSERT(_of->ready_state>=OP_OPENED);
+    /*This loop is not strictly necessary, but there's no sense in doing the
+       extra checks of the larger loop for the common case in a multiplexed
+       bistream where the page is simply part of a different logical
+       bitstream.*/
+    do{
+      /*If we were given a page to use, use it.*/
+      if(_og!=NULL){
+        *&og=*_og;
+        _og=NULL;
+      }
+      /*Keep reading until we get a page with the correct serialno.*/
+      else _page_pos=op_get_next_page(_of,&og,_of->end);
+      /*EOF: Leave uninitialized.*/
+      if(_page_pos<0)return _page_pos<OP_FALSE?(int)_page_pos:OP_EOF;
+      if(OP_LIKELY(_of->ready_state>=OP_STREAMSET)){
+        if(cur_serialno!=(ogg_uint32_t)ogg_page_serialno(&og)){
+          /*Two possibilities:
+             1) Another stream is multiplexed into this logical section, or*/
+          if(OP_LIKELY(!ogg_page_bos(&og)))continue;
+          /* 2) Our decoding just traversed a bitstream boundary.*/
+          if(!_spanp)return OP_EOF;
+          if(OP_LIKELY(_of->ready_state>=OP_INITSET))op_decode_clear(_of);
+          break;
+        }
+      }
+      /*Bitrate tracking: add the header's bytes here.
+        The body bytes are counted when we consume the packets.*/
+      _of->bytes_tracked+=og.header_len;
+    }
+    while(0);
+    /*Do we need to load a new machine before submitting the page?
+      This is different in the seekable and non-seekable cases.
+      In the seekable case, we already have all the header information loaded
+       and cached.
+      We just initialize the machine with it and continue on our merry way.
+      In the non-seekable (streaming) case, we'll only be at a boundary if we
+       just left the previous logical bitstream, and we're now nominally at the
+       header of the next bitstream.*/
+    if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET)){
+      if(seekable){
+        ogg_uint32_t serialno;
+        int          nlinks;
+        int          li;
+        serialno=ogg_page_serialno(&og);
+        /*Match the serialno to bitstream section.
+          We use this rather than offset positions to avoid problems near
+           logical bitstream boundaries.*/
+        nlinks=_of->nlinks;
+        for(li=0;li<nlinks&&links[li].serialno!=serialno;li++);
+        /*Not a desired Opus bitstream section.
+          Keep trying.*/
+        if(li>=nlinks)continue;
+        cur_serialno=serialno;
+        _of->cur_link=cur_link=li;
+        ogg_stream_reset_serialno(&_of->os,serialno);
+        _of->ready_state=OP_STREAMSET;
+        /*If we're at the start of this link, initialize the granule position
+           and pre-skip tracking.*/
+        if(_page_pos<=links[cur_link].data_offset){
+          _of->prev_packet_gp=links[cur_link].pcm_start;
+          _of->cur_discard_count=links[cur_link].head.pre_skip;
+          /*Ignore a hole at the start of a new link (this is common for
+             streams joined in the middle) or after seeking.*/
+          _ignore_holes=1;
+        }
+      }
+      else{
+        do{
+          /*We're streaming.
+            Fetch the two header packets, build the info struct.*/
+          ret=op_fetch_headers(_of,&links[0].head,&links[0].tags,
+           NULL,NULL,NULL,&og);
+          if(OP_UNLIKELY(ret<0))return ret;
+          /*op_find_initial_pcm_offset() will suppress any initial hole for us,
+             so no need to set _ignore_holes.*/
+          ret=op_find_initial_pcm_offset(_of,links,&og);
+          if(OP_UNLIKELY(ret<0))return ret;
+          _of->links[0].serialno=cur_serialno=_of->os.serialno;
+          _of->cur_link++;
+        }
+        /*If the link was empty, keep going, because we already have the
+           BOS page of the next one in og.*/
+        while(OP_UNLIKELY(ret>0));
+        /*If we didn't get any packets out of op_find_initial_pcm_offset(),
+           keep going (this is possible if end-trimming trimmed them all).*/
+        if(_of->op_count<=0)continue;
+        /*Otherwise, we're done.*/
+        ret=op_make_decode_ready(_of);
+        if(OP_UNLIKELY(ret<0))return ret;
+        return 1;
+      }
+    }
+    /*The buffered page is the data we want, and we're ready for it.
+      Add it to the stream state.*/
+    if(OP_UNLIKELY(_of->ready_state==OP_STREAMSET)){
+      ret=op_make_decode_ready(_of);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    /*Extract all the packets from the current page.*/
+    ogg_stream_pagein(&_of->os,&og);
+    if(OP_LIKELY(_of->ready_state>=OP_INITSET)){
+      opus_int32 total_duration;
+      int        durations[255];
+      int        op_count;
+      total_duration=op_collect_audio_packets(_of,durations);
+      if(OP_UNLIKELY(total_duration<0)){
+        /*Drain the packets from the page anyway.*/
+        total_duration=op_collect_audio_packets(_of,durations);
+        OP_ASSERT(total_duration>=0);
+        /*Report holes to the caller.*/
+        if(!_ignore_holes)return OP_HOLE;
+      }
+      op_count=_of->op_count;
+      /*If we found at least one audio data packet, compute per-packet granule
+         positions for them.*/
+      if(op_count>0){
+        ogg_int64_t diff;
+        ogg_int64_t prev_packet_gp;
+        ogg_int64_t cur_packet_gp;
+        ogg_int64_t cur_page_gp;
+        int         cur_page_eos;
+        int         pi;
+        cur_page_gp=_of->op[op_count-1].granulepos;
+        cur_page_eos=_of->op[op_count-1].e_o_s;
+        prev_packet_gp=_of->prev_packet_gp;
+        if(OP_UNLIKELY(prev_packet_gp==-1)){
+          opus_int32 cur_discard_count;
+          /*This is the first call after a raw seek.
+            Try to reconstruct prev_packet_gp from scratch.*/
+          OP_ASSERT(seekable);
+          if(OP_UNLIKELY(cur_page_eos)){
+            /*If the first page we hit after our seek was the EOS page, and
+               we didn't start from data_offset or before, we don't have
+               enough information to do end-trimming.
+              Proceed to the next link, rather than risk playing back some
+               samples that shouldn't have been played.*/
+            _of->op_count=0;
+            continue;
+          }
+          /*By default discard 80 ms of data after a seek, unless we seek
+             into the pre-skip region.*/
+          cur_discard_count=80*48;
+          cur_page_gp=_of->op[op_count-1].granulepos;
+          /*Try to initialize prev_packet_gp.
+            If the current page had packets but didn't have a granule
+             position, or the granule position it had was too small (both
+             illegal), just use the starting granule position for the link.*/
+          prev_packet_gp=links[cur_link].pcm_start;
+          if(OP_LIKELY(cur_page_gp!=-1)){
+            op_granpos_add(&prev_packet_gp,cur_page_gp,-total_duration);
+          }
+          if(OP_LIKELY(!op_granpos_diff(&diff,
+           prev_packet_gp,links[cur_link].pcm_start))){
+            opus_int32 pre_skip;
+            /*If we start at the beginning of the pre-skip region, or we're
+               at least 80 ms from the end of the pre-skip region, we discard
+               to the end of the pre-skip region.
+              Otherwise, we still use the 80 ms default, which will discard
+               past the end of the pre-skip region.*/
+            pre_skip=links[cur_link].head.pre_skip;
+            if(diff>=0&&diff<=OP_MAX(0,pre_skip-80*48)){
+              cur_discard_count=pre_skip-(int)diff;
+            }
+          }
+          _of->cur_discard_count=cur_discard_count;
+        }
+        if(OP_UNLIKELY(cur_page_gp==-1)){
+          /*This page had completed packets but didn't have a valid granule
+             position.
+            This is illegal, but we'll try to handle it by continuing to count
+             forwards from the previous page.*/
+          if(op_granpos_add(&cur_page_gp,prev_packet_gp,total_duration)<0){
+            /*The timestamp for this page overflowed.*/
+            cur_page_gp=links[cur_link].pcm_end;
+          }
+        }
+        /*If we hit the last page, handle end-trimming.*/
+        if(OP_UNLIKELY(cur_page_eos)
+         &&OP_LIKELY(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp))
+         &&OP_LIKELY(diff<total_duration)){
+          cur_packet_gp=prev_packet_gp;
+          for(pi=0;pi<op_count;pi++){
+            diff=durations[pi]-diff;
+            /*If we have samples to trim...*/
+            if(diff>0){
+              /*If we trimmed the entire packet, stop (the spec says encoders
+                 shouldn't do this, but we support it anyway).*/
+              if(OP_UNLIKELY(diff>durations[pi]))break;
+              cur_packet_gp=cur_page_gp;
+              /*Move the EOS flag to this packet, if necessary, so we'll trim
+                 the samples during decode.*/
+              _of->op[pi].e_o_s=1;
+            }
+            else{
+              /*Update the granule position as normal.*/
+              OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp,
+               cur_packet_gp,durations[pi]));
+            }
+            _of->op[pi].granulepos=cur_packet_gp;
+            OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,cur_packet_gp));
+          }
+        }
+        else{
+          /*Propagate timestamps to earlier packets.
+            op_granpos_add(&prev_packet_gp,prev_packet_gp,total_duration)
+             should succeed and give prev_packet_gp==cur_page_gp.
+            But we don't bother to check that, as there isn't much we can do
+             if it's not true.
+            The only thing we guarantee is that the start and end granule
+             positions of the packets are valid, and that they are monotonic
+             within a page.
+            They might be completely out of range for this link (we'll check
+             that elsewhere), or non-monotonic between pages.*/
+          if(OP_UNLIKELY(op_granpos_add(&prev_packet_gp,
+           cur_page_gp,-total_duration)<0)){
+            /*The starting timestamp for the first packet on this page
+               underflowed.
+              This is illegal, but we ignore it.*/
+            prev_packet_gp=0;
+          }
+          for(pi=0;pi<op_count;pi++){
+            if(OP_UNLIKELY(op_granpos_add(&cur_packet_gp,
+             cur_page_gp,-total_duration)<0)){
+              /*The start timestamp for this packet underflowed.
+                This is illegal, but we ignore it.*/
+              cur_packet_gp=0;
+            }
+            total_duration-=durations[pi];
+            OP_ASSERT(total_duration>=0);
+            OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp,
+             cur_packet_gp,durations[pi]));
+            _of->op[pi].granulepos=cur_packet_gp;
+          }
+          OP_ASSERT(total_duration==0);
+        }
+        _of->prev_packet_gp=prev_packet_gp;
+        _of->op_count=pi;
+        /*If end-trimming didn't trim all the packets, we're done.*/
+        if(OP_LIKELY(pi>0))return 1;
+      }
+    }
+  }
+}
+
+int op_raw_seek(OggOpusFile *_of,opus_int64 _pos){
+  int ret;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  /*Don't dump the decoder state if we can't seek.*/
+  if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK;
+  if(OP_UNLIKELY(_pos<0)||OP_UNLIKELY(_pos>_of->end))return OP_EINVAL;
+  /*Clear out any buffered, decoded data.*/
+  op_decode_clear(_of);
+  _of->bytes_tracked=0;
+  _of->samples_tracked=0;
+  ret=op_seek_helper(_of,_pos);
+  if(OP_UNLIKELY(ret<0))return OP_EREAD;
+  ret=op_fetch_and_process_page(_of,NULL,-1,1,1,1);
+  /*If we hit EOF, op_fetch_and_process_page() leaves us uninitialized.
+    Instead, jump to the end.*/
+  if(ret==OP_EOF){
+    int cur_link;
+    op_decode_clear(_of);
+    cur_link=_of->nlinks-1;
+    _of->cur_link=cur_link;
+    _of->prev_packet_gp=_of->links[cur_link].pcm_end;
+    _of->cur_discard_count=0;
+    ret=0;
+  }
+  else if(ret>0)ret=0;
+  return ret;
+}
+
+/*Convert a PCM offset relative to the start of the whole stream to a granule
+   position in an individual link.*/
+static ogg_int64_t op_get_granulepos(const OggOpusFile *_of,
+ ogg_int64_t _pcm_offset,int *_li){
+  const OggOpusLink *links;
+  ogg_int64_t        duration;
+  int                nlinks;
+  int                li;
+  OP_ASSERT(_pcm_offset>=0);
+  nlinks=_of->nlinks;
+  links=_of->links;
+  for(li=0;OP_LIKELY(li<nlinks);li++){
+    ogg_int64_t pcm_start;
+    opus_int32  pre_skip;
+    pcm_start=links[li].pcm_start;
+    pre_skip=links[li].head.pre_skip;
+    OP_ALWAYS_TRUE(!op_granpos_diff(&duration,links[li].pcm_end,pcm_start));
+    duration-=pre_skip;
+    if(_pcm_offset<duration){
+      _pcm_offset+=pre_skip;
+      if(OP_UNLIKELY(pcm_start>OP_INT64_MAX-_pcm_offset)){
+        /*Adding this amount to the granule position would overflow the positive
+           half of its 64-bit range.
+          Since signed overflow is undefined in C, do it in a way the compiler
+           isn't allowed to screw up.*/
+        _pcm_offset-=OP_INT64_MAX-pcm_start+1;
+        pcm_start=OP_INT64_MIN;
+      }
+      pcm_start+=_pcm_offset;
+      *_li=li;
+      return pcm_start;
+    }
+    _pcm_offset-=duration;
+  }
+  return -1;
+}
+
+/*This controls how close the target has to be to use the current stream
+   position to subdivide the initial range.
+  Two minutes seems to be a good default.*/
+#define OP_CUR_TIME_THRESH (120*48*(opus_int32)1000)
+
+/*Note: The OP_SMALL_FOOTPRINT #define doesn't (currently) save much code size,
+   but it's meant to serve as documentation for portions of the seeking
+   algorithm that are purely optional, to aid others learning from/porting this
+   code to other contexts.*/
+/*#define OP_SMALL_FOOTPRINT (1)*/
+
+/*Search within link _li for the page with the highest granule position
+   preceding (or equal to) _target_gp.
+  There is a danger here: missing pages or incorrect frame number information
+   in the bitstream could make our task impossible.
+  Account for that (and report it as an error condition).*/
+static int op_pcm_seek_page(OggOpusFile *_of,
+ ogg_int64_t _target_gp,int _li){
+  const OggOpusLink *link;
+  ogg_page           og;
+  ogg_int64_t        pcm_pre_skip;
+  ogg_int64_t        pcm_start;
+  ogg_int64_t        pcm_end;
+  ogg_int64_t        best_gp;
+  ogg_int64_t        diff;
+  ogg_uint32_t       serialno;
+  opus_int32         pre_skip;
+  opus_int64         begin;
+  opus_int64         end;
+  opus_int64         boundary;
+  opus_int64         best;
+  opus_int64         page_offset;
+  opus_int64         d0;
+  opus_int64         d1;
+  opus_int64         d2;
+  int                force_bisect;
+  int                ret;
+  _of->bytes_tracked=0;
+  _of->samples_tracked=0;
+  link=_of->links+_li;
+  best_gp=pcm_start=link->pcm_start;
+  pcm_end=link->pcm_end;
+  serialno=link->serialno;
+  best=begin=link->data_offset;
+  page_offset=-1;
+  /*We discard the first 80 ms of data after a seek, so seek back that much
+     farther.
+    If we can't, simply seek to the beginning of the link.*/
+  if(OP_UNLIKELY(op_granpos_add(&_target_gp,_target_gp,-80*48)<0)
+   ||OP_UNLIKELY(op_granpos_cmp(_target_gp,pcm_start)<0)){
+    _target_gp=pcm_start;
+  }
+  /*Special case seeking to the start of the link.*/
+  pre_skip=link->head.pre_skip;
+  OP_ALWAYS_TRUE(!op_granpos_add(&pcm_pre_skip,pcm_start,pre_skip));
+  if(op_granpos_cmp(_target_gp,pcm_pre_skip)<0)end=boundary=begin;
+  else{
+    end=boundary=link->end_offset;
+#if !defined(OP_SMALL_FOOTPRINT)
+    /*If we were decoding from this link, we can narrow the range a bit.*/
+    if(_li==_of->cur_link&&_of->ready_state>=OP_INITSET){
+      opus_int64 offset;
+      int        op_count;
+      op_count=_of->op_count;
+      /*The only way the offset can be invalid _and_ we can fail the granule
+         position checks below is if someone changed the contents of the last
+         page since we read it.
+        We'd be within our rights to just return OP_EBADLINK in that case, but
+         we'll simply ignore the current position instead.*/
+      offset=_of->offset;
+      if(op_count>0&&OP_LIKELY(offset<=end)){
+        ogg_int64_t gp;
+        /*Make sure the timestamp is valid.
+          The granule position might be -1 if we collected the packets from a
+           page without a granule position after reporting a hole.*/
+        gp=_of->op[op_count-1].granulepos;
+        if(OP_LIKELY(gp!=-1)&&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<0)
+         &&OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)){
+          OP_ALWAYS_TRUE(!op_granpos_diff(&diff,gp,_target_gp));
+          /*We only actually use the current time if either
+            a) We can cut off at least half the range, or
+            b) We're seeking sufficiently close to the current position that
+                it's likely to be informative.
+            Otherwise it appears using the whole link range to estimate the
+             first seek location gives better results, on average.*/
+          if(diff<0){
+            OP_ASSERT(offset>=begin);
+            if(offset-begin>=end-begin>>1||diff>-OP_CUR_TIME_THRESH){
+              best=begin=offset;
+              best_gp=pcm_start=gp;
+            }
+          }
+          else{
+            ogg_int64_t prev_page_gp;
+            /*We might get lucky and already have the packet with the target
+               buffered.
+              Worth checking.
+              For very small files (with all of the data in a single page,
+               generally 1 second or less), we can loop them continuously
+               without seeking at all.*/
+            OP_ALWAYS_TRUE(!op_granpos_add(&prev_page_gp,_of->op[0].granulepos,
+             op_get_packet_duration(_of->op[0].packet,_of->op[0].bytes)));
+            if(op_granpos_cmp(prev_page_gp,_target_gp)<=0){
+              /*Don't call op_decode_clear(), because it will dump our
+                 packets.*/
+              _of->op_pos=0;
+              _of->od_buffer_size=0;
+              _of->prev_packet_gp=prev_page_gp;
+              _of->ready_state=OP_STREAMSET;
+              return op_make_decode_ready(_of);
+            }
+            /*No such luck.
+              Check if we can cut off at least half the range, though.*/
+            if(offset-begin<=end-begin>>1||diff<OP_CUR_TIME_THRESH){
+              /*We really want the page start here, but this will do.*/
+              end=boundary=offset;
+              pcm_end=gp;
+            }
+          }
+        }
+      }
+    }
+#endif
+  }
+  /*This code was originally based on the "new search algorithm by HB (Nicholas
+     Vinen)" from libvorbisfile.
+    It has been modified substantially since.*/
+  op_decode_clear(_of);
+  /*Initialize the interval size history.*/
+  d2=d1=d0=end-begin;
+  force_bisect=0;
+  while(begin<end){
+    opus_int64 bisect;
+    opus_int64 next_boundary;
+    opus_int32 chunk_size;
+    if(end-begin<OP_CHUNK_SIZE)bisect=begin;
+    else{
+      /*Update the interval size history.*/
+      d0=d1>>1;
+      d1=d2>>1;
+      d2=end-begin>>1;
+      if(force_bisect)bisect=begin+(end-begin>>1);
+      else{
+        ogg_int64_t diff2;
+        OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start));
+        OP_ALWAYS_TRUE(!op_granpos_diff(&diff2,pcm_end,pcm_start));
+        /*Take a (pretty decent) guess.*/
+        bisect=begin+op_rescale64(diff,diff2,end-begin)-OP_CHUNK_SIZE;
+      }
+      if(bisect-OP_CHUNK_SIZE<begin)bisect=begin;
+      force_bisect=0;
+    }
+    if(bisect!=_of->offset){
+      page_offset=-1;
+      ret=op_seek_helper(_of,bisect);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    chunk_size=OP_CHUNK_SIZE;
+    next_boundary=boundary;
+    while(begin<end){
+      page_offset=op_get_next_page(_of,&og,boundary);
+      if(page_offset<0){
+        if(page_offset<OP_FALSE)return (int)page_offset;
+        /*There are no more pages in our interval from our stream with a valid
+           timestamp that start at position bisect or later.*/
+        /*If we scanned the whole interval, we're done.*/
+        if(bisect<=begin+1)end=begin;
+        else{
+          /*Otherwise, back up one chunk.*/
+          bisect=OP_MAX(bisect-chunk_size,begin);
+          ret=op_seek_helper(_of,bisect);
+          if(OP_UNLIKELY(ret<0))return ret;
+          /*Bump up the chunk size.*/
+          chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+          /*If we did find a page from another stream or without a timestamp,
+             don't read past it.*/
+          boundary=next_boundary;
+        }
+      }
+      else{
+        ogg_int64_t gp;
+        /*Save the offset of the first page we found after the seek, regardless
+           of the stream it came from or whether or not it has a timestamp.*/
+        next_boundary=OP_MIN(page_offset,next_boundary);
+        if(serialno!=(ogg_uint32_t)ogg_page_serialno(&og))continue;
+        gp=ogg_page_granulepos(&og);
+        if(gp==-1)continue;
+        if(op_granpos_cmp(gp,_target_gp)<0){
+          /*We found a page that ends before our target.
+            Advance to the raw offset of the next page.*/
+          begin=_of->offset;
+          if(OP_UNLIKELY(op_granpos_cmp(pcm_start,gp)>0)
+           ||OP_UNLIKELY(op_granpos_cmp(pcm_end,gp)<0)){
+            /*Don't let pcm_start get out of range!
+              That could happen with an invalid timestamp.*/
+            break;
+          }
+          /*Save the byte offset of the end of the page with this granule
+             position.*/
+          best=begin;
+          best_gp=pcm_start=gp;
+          OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start));
+          /*If we're more than a second away from our target, break out and
+             do another bisection.*/
+          if(diff>48000)break;
+          /*Otherwise, keep scanning forward (do NOT use begin+1).*/
+          bisect=begin;
+        }
+        else{
+          /*We found a page that ends after our target.*/
+          /*If we scanned the whole interval before we found it, we're done.*/
+          if(bisect<=begin+1)end=begin;
+          else{
+            end=bisect;
+            /*In later iterations, don't read past the first page we found.*/
+            boundary=next_boundary;
+            /*If we're not making much progress shrinking the interval size,
+               start forcing straight bisection to limit the worst case.*/
+            force_bisect=end-begin>d0*2;
+            /*Don't let pcm_end get out of range!
+              That could happen with an invalid timestamp.*/
+            if(OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)
+             &&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<=0)){
+              pcm_end=gp;
+            }
+            break;
+          }
+        }
+      }
+    }
+  }
+  /*Found our page.
+    Seek to the end of it and update prev_packet_gp.
+    Our caller will set cur_discard_count.
+    This is an easier case than op_raw_seek(), as we don't need to keep any
+     packets from the page we found.*/
+  /*Seek, if necessary.*/
+  if(best!=page_offset){
+    page_offset=-1;
+    ret=op_seek_helper(_of,best);
+    if(OP_UNLIKELY(ret<0))return ret;
+  }
+  OP_ASSERT(op_granpos_cmp(best_gp,pcm_start)>=0);
+  _of->cur_link=_li;
+  _of->ready_state=OP_STREAMSET;
+  _of->prev_packet_gp=best_gp;
+  ogg_stream_reset_serialno(&_of->os,serialno);
+  ret=op_fetch_and_process_page(_of,page_offset<0?NULL:&og,page_offset,1,0,1);
+  if(OP_UNLIKELY(ret<=0))return OP_EBADLINK;
+  /*Verify result.*/
+  if(OP_UNLIKELY(op_granpos_cmp(_of->prev_packet_gp,_target_gp)>0)){
+    return OP_EBADLINK;
+  }
+  return 0;
+}
+
+int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset){
+  const OggOpusLink *link;
+  ogg_int64_t        pcm_start;
+  ogg_int64_t        target_gp;
+  ogg_int64_t        prev_packet_gp;
+  ogg_int64_t        skip;
+  ogg_int64_t        diff;
+  int                op_count;
+  int                op_pos;
+  int                ret;
+  int                li;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK;
+  if(OP_UNLIKELY(_pcm_offset<0))return OP_EINVAL;
+  target_gp=op_get_granulepos(_of,_pcm_offset,&li);
+  if(OP_UNLIKELY(target_gp==-1))return OP_EINVAL;
+  link=_of->links+li;
+  pcm_start=link->pcm_start;
+  OP_ALWAYS_TRUE(!op_granpos_diff(&_pcm_offset,target_gp,pcm_start));
+#if !defined(OP_SMALL_FOOTPRINT)
+  /*For small (90 ms or less) forward seeks within the same link, just decode
+     forward.
+    This also optimizes the case of seeking to the current position.*/
+  if(li==_of->cur_link&&_of->ready_state>=OP_INITSET){
+    ogg_int64_t gp;
+    gp=_of->prev_packet_gp;
+    if(OP_LIKELY(gp!=-1)){
+      int nbuffered;
+      nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0);
+      OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered));
+      /*We do _not_ add cur_discard_count to gp.
+        Otherwise the total amount to discard could grow without bound, and it
+         would be better just to do a full seek.*/
+      if(OP_LIKELY(!op_granpos_diff(&diff,gp,pcm_start))){
+        ogg_int64_t discard_count;
+        discard_count=_pcm_offset-diff;
+        /*We use a threshold of 90 ms instead of 80, since 80 ms is the
+           _minimum_ we would have discarded after a full seek.
+          Assuming 20 ms frames (the default), we'd discard 90 ms on average.*/
+        if(discard_count>=0&&OP_UNLIKELY(discard_count<90*48)){
+          _of->cur_discard_count=(opus_int32)discard_count;
+          return 0;
+        }
+      }
+    }
+  }
+#endif
+  ret=op_pcm_seek_page(_of,target_gp,li);
+  if(OP_UNLIKELY(ret<0))return ret;
+  /*Now skip samples until we actually get to our target.*/
+  /*Figure out where we should skip to.*/
+  if(_pcm_offset<=link->head.pre_skip)skip=0;
+  else skip=OP_MAX(_pcm_offset-80*48,0);
+  OP_ASSERT(_pcm_offset-skip>=0);
+  OP_ASSERT(_pcm_offset-skip<OP_INT32_MAX-120*48);
+  /*Skip packets until we find one with samples past our skip target.*/
+  for(;;){
+    op_count=_of->op_count;
+    prev_packet_gp=_of->prev_packet_gp;
+    for(op_pos=_of->op_pos;op_pos<op_count;op_pos++){
+      ogg_int64_t cur_packet_gp;
+      cur_packet_gp=_of->op[op_pos].granulepos;
+      if(OP_LIKELY(!op_granpos_diff(&diff,cur_packet_gp,pcm_start))
+       &&diff>skip){
+        break;
+      }
+      prev_packet_gp=cur_packet_gp;
+    }
+    _of->prev_packet_gp=prev_packet_gp;
+    _of->op_pos=op_pos;
+    if(op_pos<op_count)break;
+    /*We skipped all the packets on this page.
+      Fetch another.*/
+    ret=op_fetch_and_process_page(_of,NULL,-1,1,0,1);
+    if(OP_UNLIKELY(ret<=0))return OP_EBADLINK;
+  }
+  OP_ALWAYS_TRUE(!op_granpos_diff(&diff,prev_packet_gp,pcm_start));
+  /*We skipped too far.
+    Either the timestamps were illegal or there was a hole in the data.*/
+  if(diff>skip)return OP_EBADLINK;
+  OP_ASSERT(_pcm_offset-diff<OP_INT32_MAX);
+  /*TODO: If there are further holes/illegal timestamps, we still won't decode
+     to the correct sample.
+    However, at least op_pcm_tell() will report the correct value immediately
+     after returning.*/
+  _of->cur_discard_count=(opus_int32)(_pcm_offset-diff);
+  return 0;
+}
+
+opus_int64 op_raw_tell(const OggOpusFile *_of){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  return _of->offset;
+}
+
+/*Convert a granule position from a given link to a PCM offset relative to the
+   start of the whole stream.
+  For unseekable sources, this gets reset to 0 at the beginning of each link.*/
+static ogg_int64_t op_get_pcm_offset(const OggOpusFile *_of,
+ ogg_int64_t _gp,int _li){
+  const OggOpusLink *links;
+  ogg_int64_t        pcm_offset;
+  ogg_int64_t        delta;
+  int                li;
+  links=_of->links;
+  pcm_offset=0;
+  OP_ASSERT(_li<_of->nlinks);
+  for(li=0;li<_li;li++){
+    OP_ALWAYS_TRUE(!op_granpos_diff(&delta,
+     links[li].pcm_end,links[li].pcm_start));
+    delta-=links[li].head.pre_skip;
+    pcm_offset+=delta;
+  }
+  OP_ASSERT(_li>=0);
+  if(_of->seekable&&OP_UNLIKELY(op_granpos_cmp(_gp,links[_li].pcm_end)>0)){
+    _gp=links[_li].pcm_end;
+  }
+  if(OP_LIKELY(op_granpos_cmp(_gp,links[_li].pcm_start)>0)){
+    if(OP_UNLIKELY(op_granpos_diff(&delta,_gp,links[_li].pcm_start)<0)){
+      /*This means an unseekable stream claimed to have a page from more than
+         2 billion days after we joined.*/
+      OP_ASSERT(!_of->seekable);
+      return OP_INT64_MAX;
+    }
+    if(delta<links[_li].head.pre_skip)delta=0;
+    else delta-=links[_li].head.pre_skip;
+    /*In the seekable case, _gp was limited by pcm_end.
+      In the unseekable case, pcm_offset should be 0.*/
+    OP_ASSERT(pcm_offset<=OP_INT64_MAX-delta);
+    pcm_offset+=delta;
+  }
+  return pcm_offset;
+}
+
+ogg_int64_t op_pcm_tell(const OggOpusFile *_of){
+  ogg_int64_t gp;
+  int         nbuffered;
+  int         li;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  gp=_of->prev_packet_gp;
+  if(gp==-1)return 0;
+  nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0);
+  OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered));
+  li=_of->seekable?_of->cur_link:0;
+  if(op_granpos_add(&gp,gp,_of->cur_discard_count)<0){
+    gp=_of->links[li].pcm_end;
+  }
+  return op_get_pcm_offset(_of,gp,li);
+}
+
+void op_set_decode_callback(OggOpusFile *_of,
+ op_decode_cb_func _decode_cb,void *_ctx){
+  _of->decode_cb=_decode_cb;
+  _of->decode_cb_ctx=_ctx;
+}
+
+int op_set_gain_offset(OggOpusFile *_of,
+ int _gain_type,opus_int32 _gain_offset_q8){
+  if(_gain_type!=OP_HEADER_GAIN&&_gain_type!=OP_TRACK_GAIN
+   &&_gain_type!=OP_ABSOLUTE_GAIN){
+    return OP_EINVAL;
+  }
+  _of->gain_type=_gain_type;
+  /*The sum of header gain and track gain lies in the range [-65536,65534].
+    These bounds allow the offset to set the final value to anywhere in the
+     range [-32768,32767], which is what we'll clamp it to before applying.*/
+  _of->gain_offset_q8=OP_CLAMP(-98302,_gain_offset_q8,98303);
+  op_update_gain(_of);
+  return 0;
+}
+
+void op_set_dither_enabled(OggOpusFile *_of,int _enabled){
+#if !defined(OP_FIXED_POINT)
+  _of->dither_disabled=!_enabled;
+  if(!_enabled)_of->dither_mute=65;
+#endif
+}
+
+/*Allocate the decoder scratch buffer.
+  This is done lazily, since if the user provides large enough buffers, we'll
+   never need it.*/
+static int op_init_buffer(OggOpusFile *_of){
+  int nchannels_max;
+  if(_of->seekable){
+    const OggOpusLink *links;
+    int                nlinks;
+    int                li;
+    links=_of->links;
+    nlinks=_of->nlinks;
+    nchannels_max=1;
+    for(li=0;li<nlinks;li++){
+      nchannels_max=OP_MAX(nchannels_max,links[li].head.channel_count);
+    }
+  }
+  else nchannels_max=OP_NCHANNELS_MAX;
+  _of->od_buffer=(op_sample *)_ogg_malloc(
+   sizeof(*_of->od_buffer)*nchannels_max*120*48);
+  if(_of->od_buffer==NULL)return OP_EFAULT;
+  return 0;
+}
+
+/*Decode a single packet into the target buffer.*/
+static int op_decode(OggOpusFile *_of,op_sample *_pcm,
+ const ogg_packet *_op,int _nsamples,int _nchannels){
+  int ret;
+  /*First we try using the application-provided decode callback.*/
+  if(_of->decode_cb!=NULL){
+#if defined(OP_FIXED_POINT)
+    ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op,
+     _nsamples,_nchannels,OP_DEC_FORMAT_SHORT,_of->cur_link);
+#else
+    ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op,
+     _nsamples,_nchannels,OP_DEC_FORMAT_FLOAT,_of->cur_link);
+#endif
+  }
+  else ret=OP_DEC_USE_DEFAULT;
+  /*If the application didn't want to handle decoding, do it ourselves.*/
+  if(ret==OP_DEC_USE_DEFAULT){
+#if defined(OP_FIXED_POINT)
+    ret=opus_multistream_decode(_of->od,
+     _op->packet,_op->bytes,_pcm,_nsamples,0);
+#else
+    ret=opus_multistream_decode_float(_of->od,
+     _op->packet,_op->bytes,_pcm,_nsamples,0);
+#endif
+    OP_ASSERT(ret<0||ret==_nsamples);
+  }
+  /*If the application returned a positive value other than 0 or
+     OP_DEC_USE_DEFAULT, fail.*/
+  else if(OP_UNLIKELY(ret>0))return OP_EBADPACKET;
+  if(OP_UNLIKELY(ret<0))return OP_EBADPACKET;
+  return ret;
+}
+
+/*Read more samples from the stream, using the same API as op_read() or
+   op_read_float().*/
+static int op_read_native(OggOpusFile *_of,
+ op_sample *_pcm,int _buf_size,int *_li){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  for(;;){
+    int ret;
+    if(OP_LIKELY(_of->ready_state>=OP_INITSET)){
+      int nchannels;
+      int od_buffer_pos;
+      int nsamples;
+      int op_pos;
+      nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count;
+      od_buffer_pos=_of->od_buffer_pos;
+      nsamples=_of->od_buffer_size-od_buffer_pos;
+      /*If we have buffered samples, return them.*/
+      if(nsamples>0){
+        if(nsamples*nchannels>_buf_size)nsamples=_buf_size/nchannels;
+        memcpy(_pcm,_of->od_buffer+nchannels*od_buffer_pos,
+         sizeof(*_pcm)*nchannels*nsamples);
+        od_buffer_pos+=nsamples;
+        _of->od_buffer_pos=od_buffer_pos;
+        if(_li!=NULL)*_li=_of->cur_link;
+        return nsamples;
+      }
+      /*If we have buffered packets, decode one.*/
+      op_pos=_of->op_pos;
+      if(OP_LIKELY(op_pos<_of->op_count)){
+        const ogg_packet *pop;
+        ogg_int64_t       diff;
+        opus_int32        cur_discard_count;
+        int               duration;
+        int               trimmed_duration;
+        pop=_of->op+op_pos++;
+        _of->op_pos=op_pos;
+        cur_discard_count=_of->cur_discard_count;
+        duration=op_get_packet_duration(pop->packet,pop->bytes);
+        /*We don't buffer packets with an invalid TOC sequence.*/
+        OP_ASSERT(duration>0);
+        trimmed_duration=duration;
+        /*Perform end-trimming.*/
+        if(OP_UNLIKELY(pop->e_o_s)){
+          if(OP_UNLIKELY(op_granpos_cmp(pop->granulepos,
+           _of->prev_packet_gp)<=0)){
+            trimmed_duration=0;
+          }
+          else if(OP_LIKELY(!op_granpos_diff(&diff,
+           pop->granulepos,_of->prev_packet_gp))){
+            trimmed_duration=(int)OP_MIN(diff,trimmed_duration);
+          }
+        }
+        _of->prev_packet_gp=pop->granulepos;
+        if(OP_UNLIKELY(duration*nchannels>_buf_size)){
+          op_sample *buf;
+          /*If the user's buffer is too small, decode into a scratch buffer.*/
+          buf=_of->od_buffer;
+          if(OP_UNLIKELY(buf==NULL)){
+            ret=op_init_buffer(_of);
+            if(OP_UNLIKELY(ret<0))return ret;
+            buf=_of->od_buffer;
+          }
+          ret=op_decode(_of,buf,pop,duration,nchannels);
+          if(OP_UNLIKELY(ret<0))return ret;
+          /*Perform pre-skip/pre-roll.*/
+          od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count);
+          cur_discard_count-=od_buffer_pos;
+          _of->cur_discard_count=cur_discard_count;
+          _of->od_buffer_pos=od_buffer_pos;
+          _of->od_buffer_size=trimmed_duration;
+          /*Update bitrate tracking based on the actual samples we used from
+             what was decoded.*/
+          _of->bytes_tracked+=pop->bytes;
+          _of->samples_tracked+=trimmed_duration-od_buffer_pos;
+        }
+        else{
+          /*Otherwise decode directly into the user's buffer.*/
+          ret=op_decode(_of,_pcm,pop,duration,nchannels);
+          if(OP_UNLIKELY(ret<0))return ret;
+          if(OP_LIKELY(trimmed_duration>0)){
+            /*Perform pre-skip/pre-roll.*/
+            od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count);
+            cur_discard_count-=od_buffer_pos;
+            _of->cur_discard_count=cur_discard_count;
+            trimmed_duration-=od_buffer_pos;
+            if(OP_LIKELY(trimmed_duration>0)
+             &&OP_UNLIKELY(od_buffer_pos>0)){
+              memmove(_pcm,_pcm+od_buffer_pos*nchannels,
+               sizeof(*_pcm)*trimmed_duration*nchannels);
+            }
+            /*Update bitrate tracking based on the actual samples we used from
+               what was decoded.*/
+            _of->bytes_tracked+=pop->bytes;
+            _of->samples_tracked+=trimmed_duration;
+            if(OP_LIKELY(trimmed_duration>0)){
+              if(_li!=NULL)*_li=_of->cur_link;
+              return trimmed_duration;
+            }
+          }
+        }
+        /*Don't grab another page yet.
+          This one might have more packets, or might have buffered data now.*/
+        continue;
+      }
+    }
+    /*Suck in another page.*/
+    ret=op_fetch_and_process_page(_of,NULL,-1,1,1,0);
+    if(OP_UNLIKELY(ret==OP_EOF)){
+      if(_li!=NULL)*_li=_of->cur_link;
+      return 0;
+    }
+    if(OP_UNLIKELY(ret<0))return ret;
+  }
+}
+
+/*A generic filter to apply to the decoded audio data.
+  _src is non-const because we will destructively modify the contents of the
+   source buffer that we consume in some cases.*/
+typedef int (*op_read_filter_func)(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels);
+
+/*Decode some samples and then apply a custom filter to them.
+  This is used to convert to different output formats.*/
+static int op_filter_read_native(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_read_filter_func _filter,int *_li){
+  int ret;
+  /*Ensure we have some decoded samples in our buffer.*/
+  ret=op_read_native(_of,NULL,0,_li);
+  /*Now apply the filter to them.*/
+  if(OP_LIKELY(ret>=0)&&OP_LIKELY(_of->ready_state>=OP_INITSET)){
+    int od_buffer_pos;
+    od_buffer_pos=_of->od_buffer_pos;
+    ret=_of->od_buffer_size-od_buffer_pos;
+    if(OP_LIKELY(ret>0)){
+      int nchannels;
+      nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count;
+      ret=(*_filter)(_of,_dst,_dst_sz,
+       _of->od_buffer+nchannels*od_buffer_pos,ret,nchannels);
+      OP_ASSERT(ret>=0);
+      OP_ASSERT(ret<=_of->od_buffer_size-od_buffer_pos);
+      od_buffer_pos+=ret;
+      _of->od_buffer_pos=od_buffer_pos;
+    }
+  }
+  return ret;
+}
+
+#if !defined(OP_FIXED_POINT)||!defined(OP_DISABLE_FLOAT_API)
+
+/*Matrices for downmixing from the supported channel counts to stereo.
+  The matrices with 5 or more channels are normalized to a total volume of 2.0,
+   since most mixes sound too quiet if normalized to 1.0 (as there is generally
+   little volume in the side/rear channels).*/
+static const float OP_STEREO_DOWNMIX[OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={
+  /*3.0*/
+  {
+    {0.5858F,0.0F},{0.4142F,0.4142F},{0.0F,0.5858F}
+  },
+  /*quadrophonic*/
+  {
+    {0.4226F,0.0F},{0.0F,0.4226F},{0.366F,0.2114F},{0.2114F,0.336F}
+  },
+  /*5.0*/
+  {
+    {0.651F,0.0F},{0.46F,0.46F},{0.0F,0.651F},{0.5636F,0.3254F},
+    {0.3254F,0.5636F}
+  },
+  /*5.1*/
+  {
+    {0.529F,0.0F},{0.3741F,0.3741F},{0.0F,0.529F},{0.4582F,0.2645F},
+    {0.2645F,0.4582F},{0.3741F,0.3741F}
+  },
+  /*6.1*/
+  {
+    {0.4553F,0.0F},{0.322F,0.322F},{0.0F,0.4553F},{0.3943F,0.2277F},
+    {0.2277F,0.3943F},{0.2788F,0.2788F},{0.322F,0.322F}
+  },
+  /*7.1*/
+  {
+    {0.3886F,0.0F},{0.2748F,0.2748F},{0.0F,0.3886F},{0.3366F,0.1943F},
+    {0.1943F,0.3366F},{0.3366F,0.1943F},{0.1943F,0.3366F},{0.2748F,0.2748F}
+  }
+};
+
+#endif
+
+#if defined(OP_FIXED_POINT)
+
+/*Matrices for downmixing from the supported channel counts to stereo.
+  The matrices with 5 or more channels are normalized to a total volume of 2.0,
+   since most mixes sound too quiet if normalized to 1.0 (as there is generally
+   little volume in the side/rear channels).
+  Hence we keep the coefficients in Q14, so the downmix values won't overflow a
+   32-bit number.*/
+static const opus_int16 OP_STEREO_DOWNMIX_Q14
+ [OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={
+  /*3.0*/
+  {
+    {9598,0},{6786,6786},{0,9598}
+  },
+  /*quadrophonic*/
+  {
+    {6924,0},{0,6924},{5996,3464},{3464,5996}
+  },
+  /*5.0*/
+  {
+    {10666,0},{7537,7537},{0,10666},{9234,5331},{5331,9234}
+  },
+  /*5.1*/
+  {
+    {8668,0},{6129,6129},{0,8668},{7507,4335},{4335,7507},{6129,6129}
+  },
+  /*6.1*/
+  {
+    {7459,0},{5275,5275},{0,7459},{6460,3731},{3731,6460},{4568,4568},
+    {5275,5275}
+  },
+  /*7.1*/
+  {
+    {6368,0},{4502,4502},{0,6368},{5515,3183},{3183,5515},{5515,3183},
+    {3183,5515},{4502,4502}
+  }
+};
+
+int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){
+  return op_read_native(_of,_pcm,_buf_size,_li);
+}
+
+static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+  (void)_of;
+  _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+  if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src));
+  else{
+    opus_int16 *dst;
+    int         i;
+    dst=(opus_int16 *)_dst;
+    if(_nchannels==1){
+      for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i];
+    }
+    else{
+      for(i=0;i<_nsamples;i++){
+        opus_int32 l;
+        opus_int32 r;
+        int        ci;
+        l=r=0;
+        for(ci=0;ci<_nchannels;ci++){
+          opus_int32 s;
+          s=_src[_nchannels*i+ci];
+          l+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][0]*s;
+          r+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][1]*s;
+        }
+        /*TODO: For 5 or more channels, we should do soft clipping here.*/
+        dst[2*i+0]=(opus_int16)OP_CLAMP(-32768,l+8192>>14,32767);
+        dst[2*i+1]=(opus_int16)OP_CLAMP(-32768,r+8192>>14,32767);
+      }
+    }
+  }
+  return _nsamples;
+}
+
+int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){
+  return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL);
+}
+
+# if !defined(OP_DISABLE_FLOAT_API)
+
+static int op_short2float_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+  float *dst;
+  int    i;
+  (void)_of;
+  dst=(float *)_dst;
+  if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels;
+  _dst_sz=_nsamples*_nchannels;
+  for(i=0;i<_dst_sz;i++)dst[i]=(1.0F/32768)*_src[i];
+  return _nsamples;
+}
+
+int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){
+  return op_filter_read_native(_of,_pcm,_buf_size,op_short2float_filter,_li);
+}
+
+static int op_short2float_stereo_filter(OggOpusFile *_of,
+ void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){
+  float *dst;
+  int    i;
+  dst=(float *)_dst;
+  _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+  if(_nchannels==1){
+    _nsamples=op_short2float_filter(_of,dst,_nsamples,_src,_nsamples,1);
+    for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i];
+  }
+  else if(_nchannels<5){
+    /*For 3 or 4 channels, we can downmix in fixed point without risk of
+       clipping.*/
+    if(_nchannels>2){
+      _nsamples=op_stereo_filter(_of,_src,_nsamples*2,
+       _src,_nsamples,_nchannels);
+    }
+    return op_short2float_filter(_of,dst,_dst_sz,_src,_nsamples,2);
+  }
+  else{
+    /*For 5 or more channels, we convert to floats and then downmix (so that we
+       don't risk clipping).*/
+    for(i=0;i<_nsamples;i++){
+      float l;
+      float r;
+      int   ci;
+      l=r=0;
+      for(ci=0;ci<_nchannels;ci++){
+        float s;
+        s=(1.0F/32768)*_src[_nchannels*i+ci];
+        l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*s;
+        r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*s;
+      }
+      dst[2*i+0]=l;
+      dst[2*i+1]=r;
+    }
+  }
+  return _nsamples;
+}
+
+int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){
+  return op_filter_read_native(_of,_pcm,_buf_size,
+   op_short2float_stereo_filter,NULL);
+}
+
+# endif
+
+#else
+
+# if defined(OP_HAVE_LRINTF)
+#  include <math.h>
+#  define op_float2int(_x) (lrintf(_x))
+# else
+#  define op_float2int(_x) ((int)((_x)+((_x)<0?-0.5F:0.5F)))
+# endif
+
+/*The dithering code here is adapted from opusdec, part of opus-tools.
+  It was originally written by Greg Maxwell.*/
+
+static opus_uint32 op_rand(opus_uint32 _seed){
+  return _seed*96314165+907633515&0xFFFFFFFFU;
+}
+
+/*This implements 16-bit quantization with full triangular dither and IIR noise
+   shaping.
+  The noise shaping filters were designed by Sebastian Gesemann, and are based
+   on the LAME ATH curves with flattening to limit their peak gain to 20 dB.
+  Everyone else's noise shaping filters are mildly crazy.
+  The 48 kHz version of this filter is just a warped version of the 44.1 kHz
+   filter and probably could be improved by shifting the HF shelf up in
+   frequency a little bit, since 48 kHz has a bit more room and being more
+   conservative against bat-ears is probably more important than more noise
+   suppression.
+  This process can increase the peak level of the signal (in theory by the peak
+   error of 1.5 +20 dB, though that is unobservably rare).
+  To avoid clipping, the signal is attenuated by a couple thousandths of a dB.
+  Initially, the approach taken here was to only attenuate by the 99.9th
+   percentile, making clipping rare but not impossible (like SoX), but the
+   limited gain of the filter means that the worst case was only two
+   thousandths of a dB more, so this just uses the worst case.
+  The attenuation is probably also helpful to prevent clipping in the DAC
+   reconstruction filters or downstream resampling, in any case.*/
+
+# define OP_GAIN (32753.0F)
+
+# define OP_PRNG_GAIN (1.0F/0xFFFFFFFF)
+
+/*48 kHz noise shaping filter, sd=2.34.*/
+
+static const float OP_FCOEF_B[4]={
+  2.2374F,-0.7339F,-0.1251F,-0.6033F
+};
+
+static const float OP_FCOEF_A[4]={
+  0.9030F,0.0116F,-0.5853F,-0.2571F
+};
+
+static int op_float2short_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ float *_src,int _nsamples,int _nchannels){
+  opus_int16 *dst;
+  int         ci;
+  int         i;
+  dst=(opus_int16 *)_dst;
+  if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels;
+# if defined(OP_SOFT_CLIP)
+  if(_of->state_channel_count!=_nchannels){
+    for(ci=0;ci<_nchannels;ci++)_of->clip_state[ci]=0;
+  }
+  opus_pcm_soft_clip(_src,_nsamples,_nchannels,_of->clip_state);
+# endif
+  if(_of->dither_disabled){
+    for(i=0;i<_nchannels*_nsamples;i++){
+      dst[i]=op_float2int(OP_CLAMP(-32768,32768.0F*_src[i],32767));
+    }
+  }
+  else{
+    opus_uint32 seed;
+    int         mute;
+    seed=_of->dither_seed;
+    mute=_of->dither_mute;
+    if(_of->state_channel_count!=_nchannels)mute=65;
+    /*In order to avoid replacing digital silence with quiet dither noise, we
+       mute if the output has been silent for a while.*/
+    if(mute>64)memset(_of->dither_a,0,sizeof(*_of->dither_a)*4*_nchannels);
+    for(i=0;i<_nsamples;i++){
+      int silent;
+      silent=1;
+      for(ci=0;ci<_nchannels;ci++){
+        float r;
+        float s;
+        float err;
+        int   si;
+        int   j;
+        s=_src[_nchannels*i+ci];
+        silent&=s==0;
+        s*=OP_GAIN;
+        err=0;
+        for(j=0;j<4;j++){
+          err+=OP_FCOEF_B[j]*_of->dither_b[ci*4+j]
+           -OP_FCOEF_A[j]*_of->dither_a[ci*4+j];
+        }
+        for(j=3;j-->0;)_of->dither_a[ci*4+j+1]=_of->dither_a[ci*4+j];
+        for(j=3;j-->0;)_of->dither_b[ci*4+j+1]=_of->dither_b[ci*4+j];
+        _of->dither_a[ci*4]=err;
+        s-=err;
+        if(mute>16)r=0;
+        else{
+          seed=op_rand(seed);
+          r=seed*OP_PRNG_GAIN;
+          seed=op_rand(seed);
+          r-=seed*OP_PRNG_GAIN;
+        }
+        /*Clamp in float out of paranoia that the input will be > 96 dBFS and
+           wrap if the integer is clamped.*/
+        si=op_float2int(OP_CLAMP(-32768,s+r,32767));
+        dst[_nchannels*i+ci]=(opus_int16)si;
+        /*Including clipping in the noise shaping is generally disastrous: the
+           futile effort to restore the clipped energy results in more clipping.
+          However, small amounts---at the level which could normally be created
+           by dither and rounding---are harmless and can even reduce clipping
+           somewhat due to the clipping sometimes reducing the dither + rounding
+           error.*/
+        _of->dither_b[ci*4]=mute>16?0:OP_CLAMP(-1.5F,si-s,1.5F);
+      }
+      mute++;
+      if(!silent)mute=0;
+    }
+    _of->dither_mute=OP_MIN(mute,65);
+    _of->dither_seed=seed;
+  }
+  _of->state_channel_count=_nchannels;
+  return _nsamples;
+}
+
+int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){
+  return op_filter_read_native(_of,_pcm,_buf_size,op_float2short_filter,_li);
+}
+
+int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){
+  _of->state_channel_count=0;
+  return op_read_native(_of,_pcm,_buf_size,_li);
+}
+
+static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+  (void)_of;
+  _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+  if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src));
+  else{
+    float *dst;
+    int    i;
+    dst=(float *)_dst;
+    if(_nchannels==1){
+      for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i];
+    }
+    else{
+      for(i=0;i<_nsamples;i++){
+        float l;
+        float r;
+        int   ci;
+        l=r=0;
+        for(ci=0;ci<_nchannels;ci++){
+          l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*_src[_nchannels*i+ci];
+          r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*_src[_nchannels*i+ci];
+        }
+        dst[2*i+0]=l;
+        dst[2*i+1]=r;
+      }
+    }
+  }
+  return _nsamples;
+}
+
+static int op_float2short_stereo_filter(OggOpusFile *_of,
+ void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){
+  opus_int16 *dst;
+  dst=(opus_int16 *)_dst;
+  if(_nchannels==1){
+    int i;
+    _nsamples=op_float2short_filter(_of,dst,_dst_sz>>1,_src,_nsamples,1);
+    for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i];
+  }
+  else{
+    if(_nchannels>2){
+      _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+      _nsamples=op_stereo_filter(_of,_src,_nsamples*2,
+       _src,_nsamples,_nchannels);
+    }
+    _nsamples=op_float2short_filter(_of,dst,_dst_sz,_src,_nsamples,2);
+  }
+  return _nsamples;
+}
+
+int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){
+  return op_filter_read_native(_of,_pcm,_buf_size,
+   op_float2short_stereo_filter,NULL);
+}
+
+int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){
+  _of->state_channel_count=0;
+  return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL);
+}
+
+#endif
diff --git a/src/main/jni/opus/opusfile/opusfile.h b/src/main/jni/opus/opusfile/opusfile.h
new file mode 100644
index 000000000..ae58da989
--- /dev/null
+++ b/src/main/jni/opus/opusfile/opusfile.h
@@ -0,0 +1,2089 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012           *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.h 17182 2010-04-29 03:48:32Z xiphmont $
+
+ ********************************************************************/
+#if !defined(_opusfile_h)
+# define _opusfile_h (1)
+
+/**\mainpage
+   \section Introduction
+
+   This is the documentation for the <tt>libopusfile</tt> C API.
+
+   The <tt>libopusfile</tt> package provides a convenient high-level API for
+    decoding and basic manipulation of all Ogg Opus audio streams.
+   <tt>libopusfile</tt> is implemented as a layer on top of Xiph.Org's
+    reference
+    <tt><a href="https://www.xiph.org/ogg/doc/libogg/reference.html">libogg</a></tt>
+    and
+    <tt><a href="https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/index.html">libopus</a></tt>
+    libraries.
+
+   <tt>libopusfile</tt> provides several sets of built-in routines for
+    file/stream access, and may also use custom stream I/O routines provided by
+    the embedded environment.
+   There are built-in I/O routines provided for ANSI-compliant
+    <code>stdio</code> (<code>FILE *</code>), memory buffers, and URLs
+    (including <file:> URLs, plus optionally <http:> and <https:> URLs).
+
+   \section Organization
+
+   The main API is divided into several sections:
+   - \ref stream_open_close
+   - \ref stream_info
+   - \ref stream_decoding
+   - \ref stream_seeking
+
+   Several additional sections are not tied to the main API.
+   - \ref stream_callbacks
+   - \ref header_info
+   - \ref error_codes
+
+   \section Overview
+
+   The <tt>libopusfile</tt> API always decodes files to 48&nbsp;kHz.
+   The original sample rate is not preserved by the lossy compression, though
+    it is stored in the header to allow you to resample to it after decoding
+    (the <tt>libopusfile</tt> API does not currently provide a resampler,
+    but the
+    <a href="http://www.speex.org/docs/manual/speex-manual/node7.html#SECTION00760000000000000000">the
+    Speex resampler</a> is a good choice if you need one).
+   In general, if you are playing back the audio, you should leave it at
+    48&nbsp;kHz, provided your audio hardware supports it.
+   When decoding to a file, it may be worth resampling back to the original
+    sample rate, so as not to surprise users who might not expect the sample
+    rate to change after encoding to Opus and decoding.
+
+   Opus files can contain anywhere from 1 to 255 channels of audio.
+   The channel mappings for up to 8 channels are the same as the
+    <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+    mappings</a>.
+   A special stereo API can convert everything to 2 channels, making it simple
+    to support multichannel files in an application which only has stereo
+    output.
+   Although the <tt>libopusfile</tt> ABI provides support for the theoretical
+    maximum number of channels, the current implementation does not support
+    files with more than 8 channels, as they do not have well-defined channel
+    mappings.
+
+   Like all Ogg files, Opus files may be "chained".
+   That is, multiple Opus files may be combined into a single, longer file just
+    by concatenating the original files.
+   This is commonly done in internet radio streaming, as it allows the title
+    and artist to be updated each time the song changes, since each link in the
+    chain includes its own set of metadata.
+
+   <tt>libopusfile</tt> fully supports chained files.
+   It will decode the first Opus stream found in each link of a chained file
+    (ignoring any other streams that might be concurrently multiplexed with it,
+    such as a video stream).
+
+   The channel count can also change between links.
+   If your application is not prepared to deal with this, it can use the stereo
+    API to ensure the audio from all links will always get decoded into a
+    common format.
+   Since <tt>libopusfile</tt> always decodes to 48&nbsp;kHz, you do not have to
+    worry about the sample rate changing between links (as was possible with
+    Vorbis).
+   This makes application support for chained files with <tt>libopusfile</tt>
+    very easy.*/
+
+# if defined(__cplusplus)
+extern "C" {
+# endif
+
+# include <stdarg.h>
+# include <stdio.h>
+# include <ogg/ogg.h>
+# include "opus_multistream.h"
+
+/**@cond PRIVATE*/
+
+/*Enable special features for gcc and gcc-compatible compilers.*/
+# if !defined(OP_GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define OP_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define OP_GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+# if OP_GNUC_PREREQ(4,0)
+#  pragma GCC visibility push(default)
+# endif
+
+typedef struct OpusHead          OpusHead;
+typedef struct OpusTags          OpusTags;
+typedef struct OpusPictureTag    OpusPictureTag;
+typedef struct OpusServerInfo    OpusServerInfo;
+typedef struct OpusFileCallbacks OpusFileCallbacks;
+typedef struct OggOpusFile       OggOpusFile;
+
+/*Warning attributes for libopusfile functions.*/
+# if OP_GNUC_PREREQ(3,4)
+#  define OP_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+# else
+#  define OP_WARN_UNUSED_RESULT
+# endif
+# if OP_GNUC_PREREQ(3,4)
+#  define OP_ARG_NONNULL(_x) __attribute__((__nonnull__(_x)))
+# else
+#  define OP_ARG_NONNULL(_x)
+# endif
+
+/**@endcond*/
+
+/**\defgroup error_codes Error Codes*/
+/*@{*/
+/**\name List of possible error codes
+   Many of the functions in this library return a negative error code when a
+    function fails.
+   This list provides a brief explanation of the common errors.
+   See each individual function for more details on what a specific error code
+    means in that context.*/
+/*@{*/
+
+/**A request did not succeed.*/
+#define OP_FALSE         (-1)
+/*Currently not used externally.*/
+#define OP_EOF           (-2)
+/**There was a hole in the page sequence numbers (e.g., a page was corrupt or
+    missing).*/
+#define OP_HOLE          (-3)
+/**An underlying read, seek, or tell operation failed when it should have
+    succeeded.*/
+#define OP_EREAD         (-128)
+/**A <code>NULL</code> pointer was passed where one was unexpected, or an
+    internal memory allocation failed, or an internal library error was
+    encountered.*/
+#define OP_EFAULT        (-129)
+/**The stream used a feature that is not implemented, such as an unsupported
+    channel family.*/
+#define OP_EIMPL         (-130)
+/**One or more parameters to a function were invalid.*/
+#define OP_EINVAL        (-131)
+/**A purported Ogg Opus stream did not begin with an Ogg page, a purported
+    header packet did not start with one of the required strings, "OpusHead" or
+    "OpusTags", or a link in a chained file was encountered that did not
+    contain any logical Opus streams.*/
+#define OP_ENOTFORMAT    (-132)
+/**A required header packet was not properly formatted, contained illegal
+    values, or was missing altogether.*/
+#define OP_EBADHEADER    (-133)
+/**The ID header contained an unrecognized version number.*/
+#define OP_EVERSION      (-134)
+/*Currently not used at all.*/
+#define OP_ENOTAUDIO     (-135)
+/**An audio packet failed to decode properly.
+   This is usually caused by a multistream Ogg packet where the durations of
+    the individual Opus packets contained in it are not all the same.*/
+#define OP_EBADPACKET    (-136)
+/**We failed to find data we had seen before, or the bitstream structure was
+    sufficiently malformed that seeking to the target destination was
+    impossible.*/
+#define OP_EBADLINK      (-137)
+/**An operation that requires seeking was requested on an unseekable stream.*/
+#define OP_ENOSEEK       (-138)
+/**The first or last granule position of a link failed basic validity checks.*/
+#define OP_EBADTIMESTAMP (-139)
+
+/*@}*/
+/*@}*/
+
+/**\defgroup header_info Header Information*/
+/*@{*/
+
+/**The maximum number of channels in an Ogg Opus stream.*/
+#define OPUS_CHANNEL_COUNT_MAX (255)
+
+/**Ogg Opus bitstream information.
+   This contains the basic playback parameters for a stream, and corresponds to
+    the initial ID header packet of an Ogg Opus stream.*/
+struct OpusHead{
+  /**The Ogg Opus format version, in the range 0...255.
+     The top 4 bits represent a "major" version, and the bottom four bits
+      represent backwards-compatible "minor" revisions.
+     The current specification describes version 1.
+     This library will recognize versions up through 15 as backwards compatible
+      with the current specification.
+     An earlier draft of the specification described a version 0, but the only
+      difference between version 1 and version 0 is that version 0 did
+      not specify the semantics for handling the version field.*/
+  int           version;
+  /**The number of channels, in the range 1...255.*/
+  int           channel_count;
+  /**The number of samples that should be discarded from the beginning of the
+      stream.*/
+  unsigned      pre_skip;
+  /**The sampling rate of the original input.
+     All Opus audio is coded at 48 kHz, and should also be decoded at 48 kHz
+      for playback (unless the target hardware does not support this sampling
+      rate).
+     However, this field may be used to resample the audio back to the original
+      sampling rate, for example, when saving the output to a file.*/
+  opus_uint32   input_sample_rate;
+  /**The gain to apply to the decoded output, in dB, as a Q8 value in the range
+      -32768...32767.
+     The <tt>libopusfile</tt> API will automatically apply this gain to the
+      decoded output before returning it, scaling it by
+      <code>pow(10,output_gain/(20.0*256))</code>.*/
+  int           output_gain;
+  /**The channel mapping family, in the range 0...255.
+     Channel mapping family 0 covers mono or stereo in a single stream.
+     Channel mapping family 1 covers 1 to 8 channels in one or more streams,
+      using the Vorbis speaker assignments.
+     Channel mapping family 255 covers 1 to 255 channels in one or more
+      streams, but without any defined speaker assignment.*/
+  int           mapping_family;
+  /**The number of Opus streams in each Ogg packet, in the range 1...255.*/
+  int           stream_count;
+  /**The number of coupled Opus streams in each Ogg packet, in the range
+      0...127.
+     This must satisfy <code>0 <= coupled_count <= stream_count</code> and
+      <code>coupled_count + stream_count <= 255</code>.
+     The coupled streams appear first, before all uncoupled streams, in an Ogg
+      Opus packet.*/
+  int           coupled_count;
+  /**The mapping from coded stream channels to output channels.
+     Let <code>index=mapping[k]</code> be the value for channel <code>k</code>.
+     If <code>index<2*coupled_count</code>, then it refers to the left channel
+      from stream <code>(index/2)</code> if even, and the right channel from
+      stream <code>(index/2)</code> if odd.
+     Otherwise, it refers to the output of the uncoupled stream
+      <code>(index-coupled_count)</code>.*/
+  unsigned char mapping[OPUS_CHANNEL_COUNT_MAX];
+};
+
+/**The metadata from an Ogg Opus stream.
+
+   This structure holds the in-stream metadata corresponding to the 'comment'
+    header packet of an Ogg Opus stream.
+   The comment header is meant to be used much like someone jotting a quick
+    note on the label of a CD.
+   It should be a short, to the point text note that can be more than a couple
+    words, but not more than a short paragraph.
+
+   The metadata is stored as a series of (tag, value) pairs, in length-encoded
+    string vectors, using the same format as Vorbis (without the final "framing
+    bit"), Theora, and Speex, except for the packet header.
+   The first occurrence of the '=' character delimits the tag and value.
+   A particular tag may occur more than once, and order is significant.
+   The character set encoding for the strings is always UTF-8, but the tag
+    names are limited to ASCII, and treated as case-insensitive.
+   See <a href="http://www.xiph.org/vorbis/doc/v-comment.html">the Vorbis
+    comment header specification</a> for details.
+
+   In filling in this structure, <tt>libopusfile</tt> will null-terminate the
+    #user_comments strings for safety.
+   However, the bitstream format itself treats them as 8-bit clean vectors,
+    possibly containing NUL characters, so the #comment_lengths array should be
+    treated as their authoritative length.
+
+   This structure is binary and source-compatible with a
+    <code>vorbis_comment</code>, and pointers to it may be freely cast to
+    <code>vorbis_comment</code> pointers, and vice versa.
+   It is provided as a separate type to avoid introducing a compile-time
+    dependency on the libvorbis headers.*/
+struct OpusTags{
+  /**The array of comment string vectors.*/
+  char **user_comments;
+  /**An array of the corresponding length of each vector, in bytes.*/
+  int   *comment_lengths;
+  /**The total number of comment streams.*/
+  int    comments;
+  /**The null-terminated vendor string.
+     This identifies the software used to encode the stream.*/
+  char  *vendor;
+};
+
+/**\name Picture tag image formats*/
+/*@{*/
+
+/**The MIME type was not recognized, or the image data did not match the
+    declared MIME type.*/
+#define OP_PIC_FORMAT_UNKNOWN (-1)
+/**The MIME type indicates the image data is really a URL.*/
+#define OP_PIC_FORMAT_URL     (0)
+/**The image is a JPEG.*/
+#define OP_PIC_FORMAT_JPEG    (1)
+/**The image is a PNG.*/
+#define OP_PIC_FORMAT_PNG     (2)
+/**The image is a GIF.*/
+#define OP_PIC_FORMAT_GIF     (3)
+
+/*@}*/
+
+/**The contents of a METADATA_BLOCK_PICTURE tag.*/
+struct OpusPictureTag{
+  /**The picture type according to the ID3v2 APIC frame:
+     <ol start="0">
+     <li>Other</li>
+     <li>32x32 pixels 'file icon' (PNG only)</li>
+     <li>Other file icon</li>
+     <li>Cover (front)</li>
+     <li>Cover (back)</li>
+     <li>Leaflet page</li>
+     <li>Media (e.g. label side of CD)</li>
+     <li>Lead artist/lead performer/soloist</li>
+     <li>Artist/performer</li>
+     <li>Conductor</li>
+     <li>Band/Orchestra</li>
+     <li>Composer</li>
+     <li>Lyricist/text writer</li>
+     <li>Recording Location</li>
+     <li>During recording</li>
+     <li>During performance</li>
+     <li>Movie/video screen capture</li>
+     <li>A bright colored fish</li>
+     <li>Illustration</li>
+     <li>Band/artist logotype</li>
+     <li>Publisher/Studio logotype</li>
+     </ol>
+     Others are reserved and should not be used.
+     There may only be one each of picture type 1 and 2 in a file.*/
+  opus_int32     type;
+  /**The MIME type of the picture, in printable ASCII characters 0x20-0x7E.
+     The MIME type may also be <code>"-->"</code> to signify that the data part
+      is a URL pointing to the picture instead of the picture data itself.
+     In this case, a terminating NUL is appended to the URL string in #data,
+      but #data_length is set to the length of the string excluding that
+      terminating NUL.*/
+  char          *mime_type;
+  /**The description of the picture, in UTF-8.*/
+  char          *description;
+  /**The width of the picture in pixels.*/
+  opus_uint32    width;
+  /**The height of the picture in pixels.*/
+  opus_uint32    height;
+  /**The color depth of the picture in bits-per-pixel (<em>not</em>
+      bits-per-channel).*/
+  opus_uint32    depth;
+  /**For indexed-color pictures (e.g., GIF), the number of colors used, or 0
+      for non-indexed pictures.*/
+  opus_uint32    colors;
+  /**The length of the picture data in bytes.*/
+  opus_uint32    data_length;
+  /**The binary picture data.*/
+  unsigned char *data;
+  /**The format of the picture data, if known.
+     One of
+     <ul>
+     <li>#OP_PIC_FORMAT_UNKNOWN,</li>
+     <li>#OP_PIC_FORMAT_URL,</li>
+     <li>#OP_PIC_FORMAT_JPEG,</li>
+     <li>#OP_PIC_FORMAT_PNG, or</li>
+     <li>#OP_PIC_FORMAT_GIF.</li>
+     </ul>*/
+  int            format;
+};
+
+/**\name Functions for manipulating header data
+
+   These functions manipulate the #OpusHead and #OpusTags structures,
+    which describe the audio parameters and tag-value metadata, respectively.
+   These can be used to query the headers returned by <tt>libopusfile</tt>, or
+    to parse Opus headers from sources other than an Ogg Opus stream, provided
+    they use the same format.*/
+/*@{*/
+
+/**Parses the contents of the ID header packet of an Ogg Opus stream.
+   \param[out] _head Returns the contents of the parsed packet.
+                     The contents of this structure are untouched on error.
+                     This may be <code>NULL</code> to merely test the header
+                      for validity.
+   \param[in]  _data The contents of the ID header packet.
+   \param      _len  The number of bytes of data in the ID header packet.
+   \return 0 on success or a negative value on error.
+   \retval #OP_ENOTFORMAT If the data does not start with the "OpusHead"
+                           string.
+   \retval #OP_EVERSION   If the version field signaled a version this library
+                           does not know how to parse.
+   \retval #OP_EIMPL      If the channel mapping family was 255, which general
+                           purpose players should not attempt to play.
+   \retval #OP_EBADHEADER If the contents of the packet otherwise violate the
+                           Ogg Opus specification:
+                          <ul>
+                           <li>Insufficient data,</li>
+                           <li>Too much data for the known minor versions,</li>
+                           <li>An unrecognized channel mapping family,</li>
+                           <li>Zero channels or too many channels,</li>
+                           <li>Zero coded streams,</li>
+                           <li>Too many coupled streams, or</li>
+                           <li>An invalid channel mapping index.</li>
+                          </ul>*/
+OP_WARN_UNUSED_RESULT int opus_head_parse(OpusHead *_head,
+ const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2);
+
+/**Converts a granule position to a sample offset for a given Ogg Opus stream.
+   The sample offset is simply <code>_gp-_head->pre_skip</code>.
+   Granule position values smaller than OpusHead#pre_skip correspond to audio
+    that should never be played, and thus have no associated sample offset.
+   This function returns -1 for such values.
+   This function also correctly handles extremely large granule positions,
+    which may have wrapped around to a negative number when stored in a signed
+    ogg_int64_t value.
+   \param _head The #OpusHead information from the ID header of the stream.
+   \param _gp   The granule position to convert.
+   \return The sample offset associated with the given granule position
+            (counting at a 48 kHz sampling rate), or the special value -1 on
+            error (i.e., the granule position was smaller than the pre-skip
+            amount).*/
+ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp)
+ OP_ARG_NONNULL(1);
+
+/**Parses the contents of the 'comment' header packet of an Ogg Opus stream.
+   \param[out] _tags An uninitialized #OpusTags structure.
+                     This returns the contents of the parsed packet.
+                     The contents of this structure are untouched on error.
+                     This may be <code>NULL</code> to merely test the header
+                      for validity.
+   \param[in]  _data The contents of the 'comment' header packet.
+   \param      _len  The number of bytes of data in the 'info' header packet.
+   \retval 0              Success.
+   \retval #OP_ENOTFORMAT If the data does not start with the "OpusTags"
+                           string.
+   \retval #OP_EBADHEADER If the contents of the packet otherwise violate the
+                           Ogg Opus specification.
+   \retval #OP_EFAULT     If there wasn't enough memory to store the tags.*/
+OP_WARN_UNUSED_RESULT int opus_tags_parse(OpusTags *_tags,
+ const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2);
+
+/**Performs a deep copy of an #OpusTags structure.
+   \param _dst The #OpusTags structure to copy into.
+               If this function fails, the contents of this structure remain
+                untouched.
+   \param _src The #OpusTags structure to copy from.
+   \retval 0          Success.
+   \retval #OP_EFAULT If there wasn't enough memory to copy the tags.*/
+int opus_tags_copy(OpusTags *_dst,const OpusTags *_src) OP_ARG_NONNULL(1);
+
+/**Initializes an #OpusTags structure.
+   This should be called on a freshly allocated #OpusTags structure before
+    attempting to use it.
+   \param _tags The #OpusTags structure to initialize.*/
+void opus_tags_init(OpusTags *_tags) OP_ARG_NONNULL(1);
+
+/**Add a (tag, value) pair to an initialized #OpusTags structure.
+   \note Neither opus_tags_add() nor opus_tags_add_comment() support values
+    containing embedded NULs, although the bitstream format does support them.
+   To add such tags, you will need to manipulate the #OpusTags structure
+    directly.
+   \param _tags  The #OpusTags structure to add the (tag, value) pair to.
+   \param _tag   A NUL-terminated, case-insensitive, ASCII string containing
+                  the tag to add (without an '=' character).
+   \param _value A NUL-terminated UTF-8 containing the corresponding value.
+   \return 0 on success, or a negative value on failure.
+   \retval #OP_EFAULT An internal memory allocation failed.*/
+int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) OP_ARG_NONNULL(3);
+
+/**Add a comment to an initialized #OpusTags structure.
+   \note Neither opus_tags_add_comment() nor opus_tags_add() support comments
+    containing embedded NULs, although the bitstream format does support them.
+   To add such tags, you will need to manipulate the #OpusTags structure
+    directly.
+   \param _tags    The #OpusTags structure to add the comment to.
+   \param _comment A NUL-terminated UTF-8 string containing the comment in
+                    "TAG=value" form.
+   \return 0 on success, or a negative value on failure.
+   \retval #OP_EFAULT An internal memory allocation failed.*/
+int opus_tags_add_comment(OpusTags *_tags,const char *_comment)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Look up a comment value by its tag.
+   \param _tags  An initialized #OpusTags structure.
+   \param _tag   The tag to look up.
+   \param _count The instance of the tag.
+                 The same tag can appear multiple times, each with a distinct
+                  value, so an index is required to retrieve them all.
+                 The order in which these values appear is significant and
+                  should be preserved.
+                 Use opus_tags_query_count() to get the legal range for the
+                  \a _count parameter.
+   \return A pointer to the queried tag's value.
+           This points directly to data in the #OpusTags structure.
+           It should not be modified or freed by the application, and
+            modifications to the structure may invalidate the pointer.
+   \retval NULL If no matching tag is found.*/
+const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Look up the number of instances of a tag.
+   Call this first when querying for a specific tag and then iterate over the
+    number of instances with separate calls to opus_tags_query() to retrieve
+    all the values for that tag in order.
+   \param _tags An initialized #OpusTags structure.
+   \param _tag  The tag to look up.
+   \return The number of instances of this particular tag.*/
+int opus_tags_query_count(const OpusTags *_tags,const char *_tag)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Get the track gain from an R128_TRACK_GAIN tag, if one was specified.
+   This searches for the first R128_TRACK_GAIN tag with a valid signed,
+    16-bit decimal integer value and returns the value.
+   This routine is exposed merely for convenience for applications which wish
+    to do something special with the track gain (i.e., display it).
+   If you simply wish to apply the track gain instead of the header gain, you
+    can use op_set_gain_offset() with an #OP_TRACK_GAIN type and no offset.
+   \param      _tags    An initialized #OpusTags structure.
+   \param[out] _gain_q8 The track gain, in 1/256ths of a dB.
+                        This will lie in the range [-32768,32767], and should
+                         be applied in <em>addition</em> to the header gain.
+                        On error, no value is returned, and the previous
+                         contents remain unchanged.
+   \return 0 on success, or a negative value on error.
+   \retval #OP_FALSE There was no track gain available in the given tags.*/
+int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Clears the #OpusTags structure.
+   This should be called on an #OpusTags structure after it is no longer
+    needed.
+   It will free all memory used by the structure members.
+   \param _tags The #OpusTags structure to clear.*/
+void opus_tags_clear(OpusTags *_tags) OP_ARG_NONNULL(1);
+
+/**Check if \a _comment is an instance of a \a _tag_name tag.
+   \see opus_tagncompare
+   \param _tag_name A NUL-terminated, case-insensitive, ASCII string containing
+                     the name of the tag to check for (without the terminating
+                     '=' character).
+   \param _comment  The comment string to check.
+   \return An integer less than, equal to, or greater than zero if \a _comment
+            is found respectively, to be less than, to match, or be greater
+            than a "tag=value" string whose tag matches \a _tag_name.*/
+int opus_tagcompare(const char *_tag_name,const char *_comment);
+
+/**Check if \a _comment is an instance of a \a _tag_name tag.
+   This version is slightly more efficient than opus_tagcompare() if the length
+    of the tag name is already known (e.g., because it is a constant).
+   \see opus_tagcompare
+   \param _tag_name A case-insensitive ASCII string containing the name of the
+                     tag to check for (without the terminating '=' character).
+   \param _tag_len  The number of characters in the tag name.
+                    This must be non-negative.
+   \param _comment  The comment string to check.
+   \return An integer less than, equal to, or greater than zero if \a _comment
+            is found respectively, to be less than, to match, or be greater
+            than a "tag=value" string whose tag matches the first \a _tag_len
+            characters of \a _tag_name.*/
+int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment);
+
+/**Parse a single METADATA_BLOCK_PICTURE tag.
+   This decodes the BASE64-encoded content of the tag and returns a structure
+    with the MIME type, description, image parameters (if known), and the
+    compressed image data.
+   If the MIME type indicates the presence of an image format we recognize
+    (JPEG, PNG, or GIF) and the actual image data contains the magic signature
+    associated with that format, then the OpusPictureTag::format field will be
+    set to the corresponding format.
+   This is provided as a convenience to avoid requiring applications to parse
+    the MIME type and/or do their own format detection for the commonly used
+    formats.
+   In this case, we also attempt to extract the image parameters directly from
+    the image data (overriding any that were present in the tag, which the
+    specification says applications are not meant to rely on).
+   The application must still provide its own support for actually decoding the
+    image data and, if applicable, retrieving that data from URLs.
+   \param[out] _pic Returns the parsed picture data.
+                    No sanitation is done on the type, MIME type, or
+                     description fields, so these might return invalid values.
+                    The contents of this structure are left unmodified on
+                     failure.
+   \param      _tag The METADATA_BLOCK_PICTURE tag contents.
+                    The leading "METADATA_BLOCK_PICTURE=" portion is optional,
+                     to allow the function to be used on either directly on the
+                     values in OpusTags::user_comments or on the return value
+                     of opus_tags_query().
+   \return 0 on success or a negative value on error.
+   \retval #OP_ENOTFORMAT The METADATA_BLOCK_PICTURE contents were not valid.
+   \retval #OP_EFAULT     There was not enough memory to store the picture tag
+                           contents.*/
+OP_WARN_UNUSED_RESULT int opus_picture_tag_parse(OpusPictureTag *_pic,
+ const char *_tag) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Initializes an #OpusPictureTag structure.
+   This should be called on a freshly allocated #OpusPictureTag structure
+    before attempting to use it.
+   \param _pic The #OpusPictureTag structure to initialize.*/
+void opus_picture_tag_init(OpusPictureTag *_pic) OP_ARG_NONNULL(1);
+
+/**Clears the #OpusPictureTag structure.
+   This should be called on an #OpusPictureTag structure after it is no longer
+    needed.
+   It will free all memory used by the structure members.
+   \param _pic The #OpusPictureTag structure to clear.*/
+void opus_picture_tag_clear(OpusPictureTag *_pic) OP_ARG_NONNULL(1);
+
+/*@}*/
+
+/*@}*/
+
+/**\defgroup url_options URL Reading Options*/
+/*@{*/
+/**\name URL reading options
+   Options for op_url_stream_create() and associated functions.
+   These allow you to provide proxy configuration parameters, skip SSL
+    certificate checks, etc.
+   Options are processed in order, and if the same option is passed multiple
+    times, only the value specified by the last occurrence has an effect
+    (unless otherwise specified).
+   They may be expanded in the future.*/
+/*@{*/
+
+/**@cond PRIVATE*/
+
+/*These are the raw numbers used to define the request codes.
+  They should not be used directly.*/
+#define OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST (6464)
+#define OP_HTTP_PROXY_HOST_REQUEST            (6528)
+#define OP_HTTP_PROXY_PORT_REQUEST            (6592)
+#define OP_HTTP_PROXY_USER_REQUEST            (6656)
+#define OP_HTTP_PROXY_PASS_REQUEST            (6720)
+#define OP_GET_SERVER_INFO_REQUEST            (6784)
+
+#define OP_URL_OPT(_request) ((_request)+(char *)0)
+
+/*These macros trigger compilation errors or warnings if the wrong types are
+   provided to one of the URL options.*/
+#define OP_CHECK_INT(_x) ((void)((_x)==(opus_int32)0),(opus_int32)(_x))
+#define OP_CHECK_CONST_CHAR_PTR(_x) ((_x)+((_x)-(const char *)(_x)))
+#define OP_CHECK_SERVER_INFO_PTR(_x) ((_x)+((_x)-(OpusServerInfo *)(_x)))
+
+/**@endcond*/
+
+/**HTTP/Shoutcast/Icecast server information associated with a URL.*/
+struct OpusServerInfo{
+  /**The name of the server (icy-name/ice-name).
+     This is <code>NULL</code> if there was no <code>icy-name</code> or
+      <code>ice-name</code> header.*/
+  char        *name;
+  /**A short description of the server (icy-description/ice-description).
+     This is <code>NULL</code> if there was no <code>icy-description</code> or
+      <code>ice-description</code> header.*/
+  char        *description;
+  /**The genre the server falls under (icy-genre/ice-genre).
+     This is <code>NULL</code> if there was no <code>icy-genre</code> or
+      <code>ice-genre</code> header.*/
+  char        *genre;
+  /**The homepage for the server (icy-url/ice-url).
+     This is <code>NULL</code> if there was no <code>icy-url</code> or
+      <code>ice-url</code> header.*/
+  char        *url;
+  /**The software used by the origin server (Server).
+     This is <code>NULL</code> if there was no <code>Server</code> header.*/
+  char        *server;
+  /**The media type of the entity sent to the recepient (Content-Type).
+     This is <code>NULL</code> if there was no <code>Content-Type</code>
+      header.*/
+  char        *content_type;
+  /**The nominal stream bitrate in kbps (icy-br/ice-bitrate).
+     This is <code>-1</code> if there was no <code>icy-br</code> or
+      <code>ice-bitrate</code> header.*/
+  opus_int32   bitrate_kbps;
+  /**Flag indicating whether the server is public (<code>1</code>) or not
+      (<code>0</code>) (icy-pub/ice-public).
+     This is <code>-1</code> if there was no <code>icy-pub</code> or
+      <code>ice-public</code> header.*/
+  int          is_public;
+  /**Flag indicating whether the server is using HTTPS instead of HTTP.
+     This is <code>0</code> unless HTTPS is being used.
+     This may not match the protocol used in the original URL if there were
+      redirections.*/
+  int          is_ssl;
+};
+
+/**Initializes an #OpusServerInfo structure.
+   All fields are set as if the corresponding header was not available.
+   \param _info The #OpusServerInfo structure to initialize.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.*/
+void opus_server_info_init(OpusServerInfo *_info) OP_ARG_NONNULL(1);
+
+/**Clears the #OpusServerInfo structure.
+   This should be called on an #OpusServerInfo structure after it is no longer
+    needed.
+   It will free all memory used by the structure members.
+   \param _info The #OpusServerInfo structure to clear.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.*/
+void opus_server_info_clear(OpusServerInfo *_info) OP_ARG_NONNULL(1);
+
+/**Skip the certificate check when connecting via TLS/SSL (https).
+   \param _b <code>opus_int32</code>: Whether or not to skip the certificate
+              check.
+             The check will be skipped if \a _b is non-zero, and will not be
+              skipped if \a _b is zero.
+   \hideinitializer*/
+#define OP_SSL_SKIP_CERTIFICATE_CHECK(_b) \
+ OP_URL_OPT(OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST),OP_CHECK_INT(_b)
+
+/**Proxy connections through the given host.
+   If no port is specified via #OP_HTTP_PROXY_PORT, the port number defaults
+    to 8080 (http-alt).
+   All proxy parameters are ignored for non-http and non-https URLs.
+   \param _host <code>const char *</code>: The proxy server hostname.
+                This may be <code>NULL</code> to disable the use of a proxy
+                 server.
+   \hideinitializer*/
+#define OP_HTTP_PROXY_HOST(_host) \
+ OP_URL_OPT(OP_HTTP_PROXY_HOST_REQUEST),OP_CHECK_CONST_CHAR_PTR(_host)
+
+/**Use the given port when proxying connections.
+   This option only has an effect if #OP_HTTP_PROXY_HOST is specified with a
+    non-<code>NULL</code> \a _host.
+   If this option is not provided, the proxy port number defaults to 8080
+    (http-alt).
+   All proxy parameters are ignored for non-http and non-https URLs.
+   \param _port <code>opus_int32</code>: The proxy server port.
+                This must be in the range 0...65535 (inclusive), or the
+                 URL function this is passed to will fail.
+   \hideinitializer*/
+#define OP_HTTP_PROXY_PORT(_port) \
+ OP_URL_OPT(OP_HTTP_PROXY_PORT_REQUEST),OP_CHECK_INT(_port)
+
+/**Use the given user name for authentication when proxying connections.
+   All proxy parameters are ignored for non-http and non-https URLs.
+   \param _user const char *: The proxy server user name.
+                              This may be <code>NULL</code> to disable proxy
+                               authentication.
+                              A non-<code>NULL</code> value only has an effect
+                               if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_PASS
+                               are also specified with non-<code>NULL</code>
+                               arguments.
+   \hideinitializer*/
+#define OP_HTTP_PROXY_USER(_user) \
+ OP_URL_OPT(OP_HTTP_PROXY_USER_REQUEST),OP_CHECK_CONST_CHAR_PTR(_user)
+
+/**Use the given password for authentication when proxying connections.
+   All proxy parameters are ignored for non-http and non-https URLs.
+   \param _pass const char *: The proxy server password.
+                              This may be <code>NULL</code> to disable proxy
+                               authentication.
+                              A non-<code>NULL</code> value only has an effect
+                               if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_USER
+                               are also specified with non-<code>NULL</code>
+                               arguments.
+   \hideinitializer*/
+#define OP_HTTP_PROXY_PASS(_pass) \
+ OP_URL_OPT(OP_HTTP_PROXY_PASS_REQUEST),OP_CHECK_CONST_CHAR_PTR(_pass)
+
+/**Parse information about the streaming server (if any) and return it.
+   Very little validation is done.
+   In particular, OpusServerInfo::url may not be a valid URL,
+    OpusServerInfo::bitrate_kbps may not really be in kbps, and
+    OpusServerInfo::content_type may not be a valid MIME type.
+   The character set of the string fields is not specified anywhere, and should
+    not be assumed to be valid UTF-8.
+   \param _info OpusServerInfo *: Returns information about the server.
+                                  If there is any error opening the stream, the
+                                   contents of this structure remain
+                                   unmodified.
+                                  On success, fills in the structure with the
+                                   server information that was available, if
+                                   any.
+                                  After a successful return, the contents of
+                                   this structure should be freed by calling
+                                   opus_server_info_clear().
+   \hideinitializer*/
+#define OP_GET_SERVER_INFO(_info) \
+ OP_URL_OPT(OP_GET_SERVER_INFO_REQUEST),OP_CHECK_SERVER_INFO_PTR(_info)
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_callbacks Abstract Stream Reading Interface*/
+/*@{*/
+/**\name Functions for reading from streams
+   These functions define the interface used to read from and seek in a stream
+    of data.
+   A stream does not need to implement seeking, but the decoder will not be
+    able to seek if it does not do so.
+   These functions also include some convenience routines for working with
+    standard <code>FILE</code> pointers, complete streams stored in a single
+    block of memory, or URLs.*/
+/*@{*/
+
+/**Reads up to \a _nbytes bytes of data from \a _stream.
+   \param      _stream The stream to read from.
+   \param[out] _ptr    The buffer to store the data in.
+   \param      _nbytes The maximum number of bytes to read.
+                       This function may return fewer, though it will not
+                        return zero unless it reaches end-of-file.
+   \return The number of bytes successfully read, or a negative value on
+            error.*/
+typedef int (*op_read_func)(void *_stream,unsigned char *_ptr,int _nbytes);
+
+/**Sets the position indicator for \a _stream.
+   The new position, measured in bytes, is obtained by adding \a _offset
+    bytes to the position specified by \a _whence.
+   If \a _whence is set to <code>SEEK_SET</code>, <code>SEEK_CUR</code>, or
+    <code>SEEK_END</code>, the offset is relative to the start of the stream,
+    the current position indicator, or end-of-file, respectively.
+   \retval 0  Success.
+   \retval -1 Seeking is not supported or an error occurred.
+              <code>errno</code> need not be set.*/
+typedef int (*op_seek_func)(void *_stream,opus_int64 _offset,int _whence);
+
+/**Obtains the current value of the position indicator for \a _stream.
+   \return The current position indicator.*/
+typedef opus_int64 (*op_tell_func)(void *_stream);
+
+/**Closes the underlying stream.
+   \retval 0   Success.
+   \retval EOF An error occurred.
+               <code>errno</code> need not be set.*/
+typedef int (*op_close_func)(void *_stream);
+
+/**The callbacks used to access non-<code>FILE</code> stream resources.
+   The function prototypes are basically the same as for the stdio functions
+    <code>fread()</code>, <code>fseek()</code>, <code>ftell()</code>, and
+    <code>fclose()</code>.
+   The differences are that the <code>FILE *</code> arguments have been
+    replaced with a <code>void *</code>, which is to be used as a pointer to
+    whatever internal data these functions might need, that #seek and #tell
+    take and return 64-bit offsets, and that #seek <em>must</em> return -1 if
+    the stream is unseekable.*/
+struct OpusFileCallbacks{
+  /**Used to read data from the stream.
+     This must not be <code>NULL</code>.*/
+  op_read_func  read;
+  /**Used to seek in the stream.
+     This may be <code>NULL</code> if seeking is not implemented.*/
+  op_seek_func  seek;
+  /**Used to return the current read position in the stream.
+     This may be <code>NULL</code> if seeking is not implemented.*/
+  op_tell_func  tell;
+  /**Used to close the stream when the decoder is freed.
+     This may be <code>NULL</code> to leave the stream open.*/
+  op_close_func close;
+};
+
+/**Opens a stream with <code>fopen()</code> and fills in a set of callbacks
+    that can be used to access it.
+   This is useful to avoid writing your own portable 64-bit seeking wrappers,
+    and also avoids cross-module linking issues on Windows, where a
+    <code>FILE *</code> must be accessed by routines defined in the same module
+    that opened it.
+   \param[out] _cb   The callbacks to use for this file.
+                     If there is an error opening the file, nothing will be
+                      filled in here.
+   \param      _path The path to the file to open.
+                     On Windows, this string must be UTF-8 (to allow access to
+                      files whose names cannot be represented in the current
+                      MBCS code page).
+                     All other systems use the native character encoding.
+   \param      _mode The mode to open the file in.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_fopen(OpusFileCallbacks *_cb,
+ const char *_path,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2)
+ OP_ARG_NONNULL(3);
+
+/**Opens a stream with <code>fdopen()</code> and fills in a set of callbacks
+    that can be used to access it.
+   This is useful to avoid writing your own portable 64-bit seeking wrappers,
+    and also avoids cross-module linking issues on Windows, where a
+    <code>FILE *</code> must be accessed by routines defined in the same module
+    that opened it.
+   \param[out] _cb   The callbacks to use for this file.
+                     If there is an error opening the file, nothing will be
+                      filled in here.
+   \param      _fd   The file descriptor to open.
+   \param      _mode The mode to open the file in.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_fdopen(OpusFileCallbacks *_cb,
+ int _fd,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(3);
+
+/**Opens a stream with <code>freopen()</code> and fills in a set of callbacks
+    that can be used to access it.
+   This is useful to avoid writing your own portable 64-bit seeking wrappers,
+    and also avoids cross-module linking issues on Windows, where a
+    <code>FILE *</code> must be accessed by routines defined in the same module
+    that opened it.
+   \param[out] _cb     The callbacks to use for this file.
+                       If there is an error opening the file, nothing will be
+                        filled in here.
+   \param      _path   The path to the file to open.
+                       On Windows, this string must be UTF-8 (to allow access
+                        to files whose names cannot be represented in the
+                        current MBCS code page).
+                       All other systems use the native character encoding.
+   \param      _mode   The mode to open the file in.
+   \param      _stream A stream previously returned by op_fopen(), op_fdopen(),
+                        or op_freopen().
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_freopen(OpusFileCallbacks *_cb,
+ const char *_path,const char *_mode,void *_stream) OP_ARG_NONNULL(1)
+ OP_ARG_NONNULL(2) OP_ARG_NONNULL(3) OP_ARG_NONNULL(4);
+
+/**Creates a stream that reads from the given block of memory.
+   This block of memory must contain the complete stream to decode.
+   This is useful for caching small streams (e.g., sound effects) in RAM.
+   \param[out] _cb   The callbacks to use for this stream.
+                     If there is an error creating the stream, nothing will be
+                      filled in here.
+   \param      _data The block of memory to read from.
+   \param      _size The size of the block of memory.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_mem_stream_create(OpusFileCallbacks *_cb,
+ const unsigned char *_data,size_t _size) OP_ARG_NONNULL(1);
+
+/**Creates a stream that reads from the given URL.
+   This function behaves identically to op_url_stream_create(), except that it
+    takes a va_list instead of a variable number of arguments.
+   It does not call the <code>va_end</code> macro, and because it invokes the
+    <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \param[out]    _cb  The callbacks to use for this stream.
+                       If there is an error creating the stream, nothing will
+                        be filled in here.
+   \param         _url The URL to read from.
+                       Currently only the <file:>, <http:>, and <https:>
+                        schemes are supported.
+                       Both <http:> and <https:> may be disabled at compile
+                        time, in which case opening such URLs will always fail.
+                       Currently this only supports URIs.
+                       IRIs should be converted to UTF-8 and URL-escaped, with
+                        internationalized domain names encoded in punycode,
+                        before passing them to this function.
+   \param[in,out] _ap  A list of the \ref url_options "optional flags" to use.
+                       This is a variable-length list of options terminated
+                        with <code>NULL</code>.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_url_stream_vcreate(OpusFileCallbacks *_cb,
+ const char *_url,va_list _ap) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Creates a stream that reads from the given URL.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \param[out] _cb  The callbacks to use for this stream.
+                    If there is an error creating the stream, nothing will be
+                     filled in here.
+   \param      _url The URL to read from.
+                    Currently only the <file:>, <http:>, and <https:> schemes
+                     are supported.
+                    Both <http:> and <https:> may be disabled at compile time,
+                     in which case opening such URLs will always fail.
+                    Currently this only supports URIs.
+                    IRIs should be converted to UTF-8 and URL-escaped, with
+                     internationalized domain names encoded in punycode, before
+                     passing them to this function.
+   \param      ...  The \ref url_options "optional flags" to use.
+                    This is a variable-length list of options terminated with
+                     <code>NULL</code>.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_url_stream_create(OpusFileCallbacks *_cb,
+ const char *_url,...) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_open_close Opening and Closing*/
+/*@{*/
+/**\name Functions for opening and closing streams
+
+   These functions allow you to test a stream to see if it is Opus, open it,
+    and close it.
+   Several flavors are provided for each of the built-in stream types, plus a
+    more general version which takes a set of application-provided callbacks.*/
+/*@{*/
+
+/**Test to see if this is an Opus stream.
+   For good results, you will need at least 57 bytes (for a pure Opus-only
+    stream).
+   Something like 512 bytes will give more reliable results for multiplexed
+    streams.
+   This function is meant to be a quick-rejection filter.
+   Its purpose is not to guarantee that a stream is a valid Opus stream, but to
+    ensure that it looks enough like Opus that it isn't going to be recognized
+    as some other format (except possibly an Opus stream that is also
+    multiplexed with other codecs, such as video).
+   \param[out] _head     The parsed ID header contents.
+                         You may pass <code>NULL</code> if you do not need
+                          this information.
+                         If the function fails, the contents of this structure
+                          remain untouched.
+   \param _initial_data  An initial buffer of data from the start of the
+                          stream.
+   \param _initial_bytes The number of bytes in \a _initial_data.
+   \return 0 if the data appears to be Opus, or a negative value on error.
+   \retval #OP_FALSE      There was not enough data to tell if this was an Opus
+                           stream or not.
+   \retval #OP_EFAULT     An internal memory allocation failed.
+   \retval #OP_EIMPL      The stream used a feature that is not implemented,
+                           such as an unsupported channel family.
+   \retval #OP_ENOTFORMAT If the data did not contain a recognizable ID
+                           header for an Opus stream.
+   \retval #OP_EVERSION   If the version field signaled a version this library
+                           does not know how to parse.
+   \retval #OP_EBADHEADER The ID header was not properly formatted or contained
+                           illegal values.*/
+int op_test(OpusHead *_head,
+ const unsigned char *_initial_data,size_t _initial_bytes);
+
+/**Open a stream from the given file path.
+   \param      _path  The path to the file to open.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      The failure code will be #OP_EFAULT if the file could not
+                       be opened, or one of the other failure codes from
+                       op_open_callbacks() otherwise.
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_file(const char *_path,int *_error)
+ OP_ARG_NONNULL(1);
+
+/**Open a stream from a memory buffer.
+   \param      _data  The memory buffer to open.
+   \param      _size  The number of bytes in the buffer.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      See op_open_callbacks() for a full list of failure codes.
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_memory(const unsigned char *_data,
+ size_t _size,int *_error);
+
+/**Open a stream from a URL.
+   This function behaves identically to op_open_url(), except that it
+    takes a va_list instead of a variable number of arguments.
+   It does not call the <code>va_end</code> macro, and because it invokes the
+    <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \param         _url   The URL to open.
+                         Currently only the <file:>, <http:>, and <https:>
+                          schemes are supported.
+                         Both <http:> and <https:> may be disabled at compile
+                          time, in which case opening such URLs will always
+                          fail.
+                         Currently this only supports URIs.
+                         IRIs should be converted to UTF-8 and URL-escaped,
+                          with internationalized domain names encoded in
+                          punycode, before passing them to this function.
+   \param[out]    _error Returns 0 on success, or a failure code on error.
+                         You may pass in <code>NULL</code> if you don't want
+                          the failure code.
+                         See op_open_callbacks() for a full list of failure
+                          codes.
+   \param[in,out] _ap    A list of the \ref url_options "optional flags" to
+                          use.
+                         This is a variable-length list of options terminated
+                          with <code>NULL</code>.
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_vopen_url(const char *_url,
+ int *_error,va_list _ap) OP_ARG_NONNULL(1);
+
+/**Open a stream from a URL.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \param      _url   The URL to open.
+                      Currently only the <file:>, <http:>, and <https:> schemes
+                       are supported.
+                      Both <http:> and <https:> may be disabled at compile
+                       time, in which case opening such URLs will always fail.
+                      Currently this only supports URIs.
+                      IRIs should be converted to UTF-8 and URL-escaped, with
+                       internationalized domain names encoded in punycode,
+                       before passing them to this function.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      See op_open_callbacks() for a full list of failure codes.
+   \param      ...    The \ref url_options "optional flags" to use.
+                      This is a variable-length list of options terminated with
+                       <code>NULL</code>.
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_url(const char *_url,
+ int *_error,...) OP_ARG_NONNULL(1);
+
+/**Open a stream using the given set of callbacks to access it.
+   \param _source        The stream to read from (e.g., a <code>FILE *</code>).
+   \param _cb            The callbacks with which to access the stream.
+                         <code><a href="#op_read_func">read()</a></code> must
+                          be implemented.
+                         <code><a href="#op_seek_func">seek()</a></code> and
+                          <code><a href="#op_tell_func">tell()</a></code> may
+                          be <code>NULL</code>, or may always return -1 to
+                          indicate a source is unseekable, but if
+                          <code><a href="#op_seek_func">seek()</a></code> is
+                          implemented and succeeds on a particular source, then
+                          <code><a href="#op_tell_func">tell()</a></code> must
+                          also.
+                         <code><a href="#op_close_func">close()</a></code> may
+                          be <code>NULL</code>, but if it is not, it will be
+                          called when the \c OggOpusFile is destroyed by
+                          op_free().
+                         It will not be called if op_open_callbacks() fails
+                          with an error.
+   \param _initial_data  An initial buffer of data from the start of the
+                          stream.
+                         Applications can read some number of bytes from the
+                          start of the stream to help identify this as an Opus
+                          stream, and then provide them here to allow the
+                          stream to be opened, even if it is unseekable.
+   \param _initial_bytes The number of bytes in \a _initial_data.
+                         If the stream is seekable, its current position (as
+                          reported by
+                          <code><a href="#opus_tell_func">tell()</a></code>
+                          at the start of this function) must be equal to
+                          \a _initial_bytes.
+                         Otherwise, seeking to absolute positions will
+                          generate inconsistent results.
+   \param[out] _error    Returns 0 on success, or a failure code on error.
+                         You may pass in <code>NULL</code> if you don't want
+                          the failure code.
+                         The failure code will be one of
+                         <dl>
+                           <dt>#OP_EREAD</dt>
+                           <dd>An underlying read, seek, or tell operation
+                            failed when it should have succeeded, or we failed
+                            to find data in the stream we had seen before.</dd>
+                           <dt>#OP_EFAULT</dt>
+                           <dd>There was a memory allocation failure, or an
+                            internal library error.</dd>
+                           <dt>#OP_EIMPL</dt>
+                           <dd>The stream used a feature that is not
+                            implemented, such as an unsupported channel
+                            family.</dd>
+                           <dt>#OP_EINVAL</dt>
+                           <dd><code><a href="#op_seek_func">seek()</a></code>
+                            was implemented and succeeded on this source, but
+                            <code><a href="#op_tell_func">tell()</a></code>
+                            did not, or the starting position indicator was
+                            not equal to \a _initial_bytes.</dd>
+                           <dt>#OP_ENOTFORMAT</dt>
+                           <dd>The stream contained a link that did not have
+                            any logical Opus streams in it.</dd>
+                           <dt>#OP_EBADHEADER</dt>
+                           <dd>A required header packet was not properly
+                            formatted, contained illegal values, or was missing
+                            altogether.</dd>
+                           <dt>#OP_EVERSION</dt>
+                           <dd>An ID header contained an unrecognized version
+                            number.</dd>
+                           <dt>#OP_EBADLINK</dt>
+                           <dd>We failed to find data we had seen before after
+                            seeking.</dd>
+                           <dt>#OP_EBADTIMESTAMP</dt>
+                           <dd>The first or last timestamp in a link failed
+                            basic validity checks.</dd>
+                         </dl>
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.
+           <tt>libopusfile</tt> does <em>not</em> take ownership of the source
+            if the call fails.
+           The calling application is responsible for closing the source if
+            this call returns an error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_callbacks(void *_source,
+ const OpusFileCallbacks *_cb,const unsigned char *_initial_data,
+ size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2);
+
+/**Partially open a stream from the given file path.
+   \see op_test_callbacks
+   \param      _path  The path to the file to open.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      The failure code will be #OP_EFAULT if the file could not
+                       be opened, or one of the other failure codes from
+                       op_open_callbacks() otherwise.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_file(const char *_path,int *_error)
+ OP_ARG_NONNULL(1);
+
+/**Partially open a stream from a memory buffer.
+   \see op_test_callbacks
+   \param      _data  The memory buffer to open.
+   \param      _size  The number of bytes in the buffer.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      See op_open_callbacks() for a full list of failure codes.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_memory(const unsigned char *_data,
+ size_t _size,int *_error);
+
+/**Partially open a stream from a URL.
+   This function behaves identically to op_test_url(), except that it
+    takes a va_list instead of a variable number of arguments.
+   It does not call the <code>va_end</code> macro, and because it invokes the
+    <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \see op_test_url
+   \see op_test_callbacks
+   \param         _url    The URL to open.
+                          Currently only the <file:>, <http:>, and <https:>
+                           schemes are supported.
+                          Both <http:> and <https:> may be disabled at compile
+                           time, in which case opening such URLs will always
+                           fail.
+                          Currently this only supports URIs.
+                          IRIs should be converted to UTF-8 and URL-escaped,
+                           with internationalized domain names encoded in
+                           punycode, before passing them to this function.
+   \param[out]    _error  Returns 0 on success, or a failure code on error.
+                          You may pass in <code>NULL</code> if you don't want
+                           the failure code.
+                          See op_open_callbacks() for a full list of failure
+                           codes.
+   \param[in,out] _ap     A list of the \ref url_options "optional flags" to
+                           use.
+                          This is a variable-length list of options terminated
+                           with <code>NULL</code>.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_vtest_url(const char *_url,
+ int *_error,va_list _ap) OP_ARG_NONNULL(1);
+
+/**Partially open a stream from a URL.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \see op_test_callbacks
+   \param      _url    The URL to open.
+                       Currently only the <file:>, <http:>, and <https:>
+                        schemes are supported.
+                       Both <http:> and <https:> may be disabled at compile
+                        time, in which case opening such URLs will always fail.
+                       Currently this only supports URIs.
+                       IRIs should be converted to UTF-8 and URL-escaped, with
+                        internationalized domain names encoded in punycode,
+                        before passing them to this function.
+   \param[out] _error  Returns 0 on success, or a failure code on error.
+                       You may pass in <code>NULL</code> if you don't want the
+                        failure code.
+                       See op_open_callbacks() for a full list of failure
+                        codes.
+   \param      ...     The \ref url_options "optional flags" to use.
+                       This is a variable-length list of options terminated
+                        with <code>NULL</code>.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_url(const char *_url,
+ int *_error,...) OP_ARG_NONNULL(1);
+
+/**Partially open a stream using the given set of callbacks to access it.
+   This tests for Opusness and loads the headers for the first link.
+   It does not seek (although it tests for seekability).
+   You can query a partially open stream for the few pieces of basic
+    information returned by op_serialno(), op_channel_count(), op_head(), and
+    op_tags() (but only for the first link).
+   You may also determine if it is seekable via a call to op_seekable().
+   You cannot read audio from the stream, seek, get the size or duration,
+    get information from links other than the first one, or even get the total
+    number of links until you finish opening the stream with op_test_open().
+   If you do not need to do any of these things, you can dispose of it with
+    op_free() instead.
+
+   This function is provided mostly to simplify porting existing code that used
+    <tt>libvorbisfile</tt>.
+   For new code, you are likely better off using op_test() instead, which
+    is less resource-intensive, requires less data to succeed, and imposes a
+    hard limit on the amount of data it examines (important for unseekable
+    sources, where all such data must be buffered until you are sure of the
+    stream type).
+   \param _source        The stream to read from (e.g., a <code>FILE *</code>).
+   \param _cb            The callbacks with which to access the stream.
+                         <code><a href="#op_read_func">read()</a></code> must
+                          be implemented.
+                         <code><a href="#op_seek_func">seek()</a></code> and
+                          <code><a href="#op_tell_func">tell()</a></code> may
+                          be <code>NULL</code>, or may always return -1 to
+                          indicate a source is unseekable, but if
+                          <code><a href="#op_seek_func">seek()</a></code> is
+                          implemented and succeeds on a particular source, then
+                          <code><a href="#op_tell_func">tell()</a></code> must
+                          also.
+                         <code><a href="#op_close_func">close()</a></code> may
+                          be <code>NULL</code>, but if it is not, it will be
+                          called when the \c OggOpusFile is destroyed by
+                          op_free().
+                         It will not be called if op_open_callbacks() fails
+                          with an error.
+   \param _initial_data  An initial buffer of data from the start of the
+                          stream.
+                         Applications can read some number of bytes from the
+                          start of the stream to help identify this as an Opus
+                          stream, and then provide them here to allow the
+                          stream to be tested more thoroughly, even if it is
+                          unseekable.
+   \param _initial_bytes The number of bytes in \a _initial_data.
+                         If the stream is seekable, its current position (as
+                          reported by
+                          <code><a href="#opus_tell_func">tell()</a></code>
+                          at the start of this function) must be equal to
+                          \a _initial_bytes.
+                         Otherwise, seeking to absolute positions will
+                          generate inconsistent results.
+   \param[out] _error    Returns 0 on success, or a failure code on error.
+                         You may pass in <code>NULL</code> if you don't want
+                          the failure code.
+                         See op_open_callbacks() for a full list of failure
+                          codes.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.
+           <tt>libopusfile</tt> does <em>not</em> take ownership of the source
+            if the call fails.
+           The calling application is responsible for closing the source if
+            this call returns an error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_callbacks(void *_source,
+ const OpusFileCallbacks *_cb,const unsigned char *_initial_data,
+ size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2);
+
+/**Finish opening a stream partially opened with op_test_callbacks() or one of
+    the associated convenience functions.
+   If this function fails, you are still responsible for freeing the
+    \c OggOpusFile with op_free().
+   \param _of The \c OggOpusFile to finish opening.
+   \return 0 on success, or a negative value on error.
+   \retval #OP_EREAD         An underlying read, seek, or tell operation failed
+                              when it should have succeeded.
+   \retval #OP_EFAULT        There was a memory allocation failure, or an
+                              internal library error.
+   \retval #OP_EIMPL         The stream used a feature that is not implemented,
+                              such as an unsupported channel family.
+   \retval #OP_EINVAL        The stream was not partially opened with
+                              op_test_callbacks() or one of the associated
+                              convenience functions.
+   \retval #OP_ENOTFORMAT    The stream contained a link that did not have any
+                              logical Opus streams in it.
+   \retval #OP_EBADHEADER    A required header packet was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An ID header contained an unrecognized version
+                              number.
+   \retval #OP_EBADLINK      We failed to find data we had seen before after
+                              seeking.
+   \retval #OP_EBADTIMESTAMP The first or last timestamp in a link failed basic
+                              validity checks.*/
+int op_test_open(OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Release all memory used by an \c OggOpusFile.
+   \param _of The \c OggOpusFile to free.*/
+void op_free(OggOpusFile *_of);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_info Stream Information*/
+/*@{*/
+/**\name Functions for obtaining information about streams
+
+   These functions allow you to get basic information about a stream, including
+    seekability, the number of links (for chained streams), plus the size,
+    duration, bitrate, header parameters, and meta information for each link
+    (or, where available, the stream as a whole).
+   Some of these (size, duration) are only available for seekable streams.
+   You can also query the current stream position, link, and playback time,
+    and instantaneous bitrate during playback.
+
+   Some of these functions may be used successfully on the partially open
+    streams returned by op_test_callbacks() or one of the associated
+    convenience functions.
+   Their documention will indicate so explicitly.*/
+/*@{*/
+
+/**Returns whether or not the data source being read is seekable.
+   This is true if
+   <ol>
+   <li>The <code><a href="#op_seek_func">seek()</a></code> and
+    <code><a href="#op_tell_func">tell()</a></code> callbacks are both
+    non-<code>NULL</code>,</li>
+   <li>The <code><a href="#op_seek_func">seek()</a></code> callback was
+    successfully executed at least once, and</li>
+   <li>The <code><a href="#op_tell_func">tell()</a></code> callback was
+    successfully able to report the position indicator afterwards.</li>
+   </ol>
+   This function may be called on partially-opened streams.
+   \param _of The \c OggOpusFile whose seekable status is to be returned.
+   \return A non-zero value if seekable, and 0 if unseekable.*/
+int op_seekable(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Returns the number of links in this chained stream.
+   This function may be called on partially-opened streams, but it will always
+    return 1.
+   The actual number of links is not known until the stream is fully opened.
+   \param _of The \c OggOpusFile from which to retrieve the link count.
+   \return For fully-open seekable sources, this returns the total number of
+            links in the whole stream, which will be at least 1.
+           For partially-open or unseekable sources, this always returns 1.*/
+int op_link_count(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Get the serial number of the given link in a (possibly-chained) Ogg Opus
+    stream.
+   This function may be called on partially-opened streams, but it will always
+    return the serial number of the Opus stream in the first link.
+   \param _of The \c OggOpusFile from which to retrieve the serial number.
+   \param _li The index of the link whose serial number should be retrieved.
+              Use a negative number to get the serial number of the current
+               link.
+   \return The serial number of the given link.
+           If \a _li is greater than the total number of links, this returns
+            the serial number of the last link.
+           If the source is not seekable, this always returns the serial number
+            of the current link.*/
+opus_uint32 op_serialno(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the channel count of the given link in a (possibly-chained) Ogg Opus
+    stream.
+   This is equivalent to <code>op_head(_of,_li)->channel_count</code>, but
+    is provided for convenience.
+   This function may be called on partially-opened streams, but it will always
+    return the channel count of the Opus stream in the first link.
+   \param _of The \c OggOpusFile from which to retrieve the channel count.
+   \param _li The index of the link whose channel count should be retrieved.
+              Use a negative number to get the channel count of the current
+               link.
+   \return The channel count of the given link.
+           If \a _li is greater than the total number of links, this returns
+            the channel count of the last link.
+           If the source is not seekable, this always returns the channel count
+            of the current link.*/
+int op_channel_count(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the total (compressed) size of the stream, or of an individual link in
+    a (possibly-chained) Ogg Opus stream, including all headers and Ogg muxing
+    overhead.
+   \param _of The \c OggOpusFile from which to retrieve the compressed size.
+   \param _li The index of the link whose compressed size should be computed.
+              Use a negative number to get the compressed size of the entire
+               stream.
+   \return The compressed size of the entire stream if \a _li is negative, the
+            compressed size of link \a _li if it is non-negative, or a negative
+            value on error.
+           The compressed size of the entire stream may be smaller than that
+            of the underlying source if trailing garbage was detected in the
+            file.
+   \retval #OP_EINVAL The source is not seekable (so we can't know the length),
+                       \a _li wasn't less than the total number of links in
+                       the stream, or the stream was only partially open.*/
+opus_int64 op_raw_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the total PCM length (number of samples at 48 kHz) of the stream, or of
+    an individual link in a (possibly-chained) Ogg Opus stream.
+   Users looking for <code>op_time_total()</code> should use op_pcm_total()
+    instead.
+   Because timestamps in Opus are fixed at 48 kHz, there is no need for a
+    separate function to convert this to seconds (and leaving it out avoids
+    introducing floating point to the API, for those that wish to avoid it).
+   \param _of The \c OggOpusFile from which to retrieve the PCM offset.
+   \param _li The index of the link whose PCM length should be computed.
+              Use a negative number to get the PCM length of the entire stream.
+   \return The PCM length of the entire stream if \a _li is negative, the PCM
+            length of link \a _li if it is non-negative, or a negative value on
+            error.
+   \retval #OP_EINVAL The source is not seekable (so we can't know the length),
+                       \a _li wasn't less than the total number of links in
+                       the stream, or the stream was only partially open.*/
+ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the ID header information for the given link in a (possibly chained) Ogg
+    Opus stream.
+   This function may be called on partially-opened streams, but it will always
+    return the ID header information of the Opus stream in the first link.
+   \param _of The \c OggOpusFile from which to retrieve the ID header
+               information.
+   \param _li The index of the link whose ID header information should be
+               retrieved.
+              Use a negative number to get the ID header information of the
+               current link.
+              For an unseekable stream, \a _li is ignored, and the ID header
+               information for the current link is always returned, if
+               available.
+   \return The contents of the ID header for the given link.*/
+const OpusHead *op_head(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the comment header information for the given link in a (possibly
+    chained) Ogg Opus stream.
+   This function may be called on partially-opened streams, but it will always
+    return the tags from the Opus stream in the first link.
+   \param _of The \c OggOpusFile from which to retrieve the comment header
+               information.
+   \param _li The index of the link whose comment header information should be
+               retrieved.
+              Use a negative number to get the comment header information of
+               the current link.
+              For an unseekable stream, \a _li is ignored, and the comment
+               header information for the current link is always returned, if
+               available.
+   \return The contents of the comment header for the given link, or
+            <code>NULL</code> if this is an unseekable stream that encountered
+            an invalid link.*/
+const OpusTags *op_tags(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Retrieve the index of the current link.
+   This is the link that produced the data most recently read by
+    op_read_float() or its associated functions, or, after a seek, the link
+    that the seek target landed in.
+   Reading more data may advance the link index (even on the first read after a
+    seek).
+   \param _of The \c OggOpusFile from which to retrieve the current link index.
+   \return The index of the current link on success, or a negative value on
+            failure.
+           For seekable streams, this is a number between 0 and the value
+            returned by op_link_count().
+           For unseekable streams, this value starts at 0 and increments by one
+            each time a new link is encountered (even though op_link_count()
+            always returns 1).
+   \retval #OP_EINVAL The stream was only partially open.*/
+int op_current_link(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Computes the bitrate for a given link in a (possibly chained) Ogg Opus
+    stream.
+   The stream must be seekable to compute the bitrate.
+   For unseekable streams, use op_bitrate_instant() to get periodic estimates.
+   \param _of The \c OggOpusFile from which to retrieve the bitrate.
+   \param _li The index of the link whose bitrate should be computed.
+              USe a negative number to get the bitrate of the whole stream.
+   \return The bitrate on success, or a negative value on error.
+   \retval #OP_EINVAL The stream was only partially open, the stream was not
+                       seekable, or \a _li was larger than the number of
+                       links.*/
+opus_int32 op_bitrate(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Compute the instantaneous bitrate, measured as the ratio of bits to playable
+    samples decoded since a) the last call to op_bitrate_instant(), b) the last
+    seek, or c) the start of playback, whichever was most recent.
+   This will spike somewhat after a seek or at the start/end of a chain
+    boundary, as pre-skip, pre-roll, and end-trimming causes samples to be
+    decoded but not played.
+   \param _of The \c OggOpusFile from which to retrieve the bitrate.
+   \return The bitrate, in bits per second, or a negative value on error.
+   \retval #OP_FALSE  No data has been decoded since any of the events
+                       described above.
+   \retval #OP_EINVAL The stream was only partially open.*/
+opus_int32 op_bitrate_instant(OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Obtain the current value of the position indicator for \a _of.
+   \param _of The \c OggOpusFile from which to retrieve the position indicator.
+   \return The byte position that is currently being read from.
+   \retval #OP_EINVAL The stream was only partially open.*/
+opus_int64 op_raw_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Obtain the PCM offset of the next sample to be read.
+   If the stream is not properly timestamped, this might not increment by the
+    proper amount between reads, or even return monotonically increasing
+    values.
+   \param _of The \c OggOpusFile from which to retrieve the PCM offset.
+   \return The PCM offset of the next sample to be read.
+   \retval #OP_EINVAL The stream was only partially open.*/
+ogg_int64_t op_pcm_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_seeking Seeking*/
+/*@{*/
+/**\name Functions for seeking in Opus streams
+
+   These functions let you seek in Opus streams, if the underlying source
+    support it.
+   Seeking is implemented for all built-in stream I/O routines, though some
+    individual sources may not be seekable (pipes, live HTTP streams, or HTTP
+    streams from a server that does not support <code>Range</code> requests).
+
+   op_raw_seek() is the fastest: it is guaranteed to perform at most one
+    physical seek, but, since the target is a byte position, makes no guarantee
+    how close to a given time it will come.
+   op_pcm_seek() provides sample-accurate seeking.
+   The number of physical seeks it requires is still quite small (often 1 or
+    2, even in highly variable bitrate streams).
+
+   Seeking in Opus requires decoding some pre-roll amount before playback to
+    allow the internal state to converge (as if recovering from packet loss).
+   This is handled internally by <tt>libopusfile</tt>, but means there is
+    little extra overhead for decoding up to the exact position requested
+    (since it must decode some amount of audio anyway).
+   It also means that decoding after seeking may not return exactly the same
+    values as would be obtained by decoding the stream straight through.
+   However, such differences are expected to be smaller than the loss
+    introduced by Opus's lossy compression.*/
+/*@{*/
+
+/**Seek to a byte offset relative to the <b>compressed</b> data.
+   This also scans packets to update the PCM cursor.
+   It will cross a logical bitstream boundary, but only if it can't get any
+    packets out of the tail of the link to which it seeks.
+   \param _of          The \c OggOpusFile in which to seek.
+   \param _byte_offset The byte position to seek to.
+   \return 0 on success, or a negative error code on failure.
+   \retval #OP_EREAD    The underlying seek operation failed.
+   \retval #OP_EINVAL   The stream was only partially open, or the target was
+                         outside the valid range for the stream.
+   \retval #OP_ENOSEEK  This stream is not seekable.
+   \retval #OP_EBADLINK Failed to initialize a decoder for a stream for an
+                         unknown reason.*/
+int op_raw_seek(OggOpusFile *_of,opus_int64 _byte_offset) OP_ARG_NONNULL(1);
+
+/**Seek to the specified PCM offset, such that decoding will begin at exactly
+    the requested position.
+   \param _of         The \c OggOpusFile in which to seek.
+   \param _pcm_offset The PCM offset to seek to.
+                      This is in samples at 48 kHz relative to the start of the
+                       stream.
+   \return 0 on success, or a negative value on error.
+   \retval #OP_EREAD    An underlying read or seek operation failed.
+   \retval #OP_EINVAL   The stream was only partially open, or the target was
+                         outside the valid range for the stream.
+   \retval #OP_ENOSEEK  This stream is not seekable.
+   \retval #OP_EBADLINK We failed to find data we had seen before, or the
+                         bitstream structure was sufficiently malformed that
+                         seeking to the target destination was impossible.*/
+int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_decoding Decoding*/
+/*@{*/
+/**\name Functions for decoding audio data
+
+   These functions retrieve actual decoded audio data from the stream.
+   The general functions, op_read() and op_read_float() return 16-bit or
+    floating-point output, both using native endian ordering.
+   The number of channels returned can change from link to link in a chained
+    stream.
+   There are special functions, op_read_stereo() and op_read_float_stereo(),
+    which always output two channels, to simplify applications which do not
+    wish to handle multichannel audio.
+   These downmix multichannel files to two channels, so they can always return
+    samples in the same format for every link in a chained file.
+
+   If the rest of your audio processing chain can handle floating point, those
+    routines should be preferred, as floating point output avoids introducing
+    clipping and other issues which might be avoided entirely if, e.g., you
+    scale down the volume at some other stage.
+   However, if you intend to direct consume 16-bit samples, the conversion in
+    <tt>libopusfile</tt> provides noise-shaping dithering and, if compiled
+    against <tt>libopus</tt>&nbsp;1.1 or later, soft-clipping prevention.
+
+   <tt>libopusfile</tt> can also be configured at compile time to use the
+    fixed-point <tt>libopus</tt> API.
+   If so, <tt>libopusfile</tt>'s floating-point API may also be disabled.
+   In that configuration, nothing in <tt>libopusfile</tt> will use any
+    floating-point operations, to simplify support on devices without an
+    adequate FPU.
+
+   \warning HTTPS streams may be be vulnerable to truncation attacks if you do
+    not check the error return code from op_read_float() or its associated
+    functions.
+   If the remote peer does not close the connection gracefully (with a TLS
+    "close notify" message), these functions will return #OP_EREAD instead of 0
+    when they reach the end of the file.
+   If you are reading from an <https:> URL (particularly if seeking is not
+    supported), you should make sure to check for this error and warn the user
+    appropriately.*/
+/*@{*/
+
+/**Indicates that the decoding callback should produce signed 16-bit
+    native-endian output samples.*/
+#define OP_DEC_FORMAT_SHORT (7008)
+/**Indicates that the decoding callback should produce 32-bit native-endian
+    float samples.*/
+#define OP_DEC_FORMAT_FLOAT (7040)
+
+/**Indicates that the decoding callback did not decode anything, and that
+    <tt>libopusfile</tt> should decode normally instead.*/
+#define OP_DEC_USE_DEFAULT  (6720)
+
+/**Called to decode an Opus packet.
+   This should invoke the functional equivalent of opus_multistream_decode() or
+    opus_multistream_decode_float(), except that it returns 0 on success
+    instead of the number of decoded samples (which is known a priori).
+   \param _ctx       The application-provided callback context.
+   \param _decoder   The decoder to use to decode the packet.
+   \param[out] _pcm  The buffer to decode into.
+                     This will always have enough room for \a _nchannels of
+                      \a _nsamples samples, which should be placed into this
+                      buffer interleaved.
+   \param _op        The packet to decode.
+                     This will always have its granule position set to a valid
+                      value.
+   \param _nsamples  The number of samples expected from the packet.
+   \param _nchannels The number of channels expected from the packet.
+   \param _format    The desired sample output format.
+                     This is either #OP_DEC_FORMAT_SHORT or
+                      #OP_DEC_FORMAT_FLOAT.
+   \param _li        The index of the link from which this packet was decoded.
+   \return A non-negative value on success, or a negative value on error.
+           The error codes should be the same as those returned by
+            opus_multistream_decode() or opus_multistream_decode_float().
+   \retval 0                   Decoding was successful.
+                               The application has filled the buffer with
+                                exactly <code>\a _nsamples*\a
+                                _nchannels</code> samples in the requested
+                                format.
+   \retval #OP_DEC_USE_DEFAULT No decoding was done.
+                               <tt>libopusfile</tt> should decode normally
+                                instead.*/
+typedef int (*op_decode_cb_func)(void *_ctx,OpusMSDecoder *_decoder,void *_pcm,
+ const ogg_packet *_op,int _nsamples,int _nchannels,int _format,int _li);
+
+/**Sets the packet decode callback function.
+   This is called once for each packet that needs to be decoded.
+   A call to this function is no guarantee that the audio will eventually be
+    delivered to the application.
+   Some or all of the data from the packet may be discarded (i.e., at the
+    beginning or end of a link, or after a seek), however the callback is
+    required to provide all of it.
+   \param _of        The \c OggOpusFile on which to set the decode callback.
+   \param _decode_cb The callback function to call.
+                     This may be <code>NULL</code> to disable calling the
+                      callback.
+   \param _ctx       The application-provided context pointer to pass to the
+                      callback on each call.*/
+void op_set_decode_callback(OggOpusFile *_of,
+ op_decode_cb_func _decode_cb,void *_ctx) OP_ARG_NONNULL(1);
+
+/**Gain offset type that indicates that the provided offset is relative to the
+    header gain.
+   This is the default.*/
+#define OP_HEADER_GAIN   (0)
+
+/**Gain offset type that indicates that the provided offset is relative to the
+    R128_TRACK_GAIN value (if any), in addition to the header gain.*/
+#define OP_TRACK_GAIN    (3008)
+
+/**Gain offset type that indicates that the provided offset should be used as
+    the gain directly, without applying any the header or track gains.*/
+#define OP_ABSOLUTE_GAIN (3009)
+
+/**Sets the gain to be used for decoded output.
+   By default, the gain in the header is applied with no additional offset.
+   The total gain (including header gain and/or track gain, if applicable, and
+    this offset), will be clamped to [-32768,32767]/256 dB.
+   This is more than enough to saturate or underflow 16-bit PCM.
+   \note The new gain will not be applied to any already buffered, decoded
+    output.
+   This means you cannot change it sample-by-sample, as at best it will be
+    updated packet-by-packet.
+   It is meant for setting a target volume level, rather than applying smooth
+    fades, etc.
+   \param _of             The \c OggOpusFile on which to set the gain offset.
+   \param _gain_type      One of #OP_HEADER_GAIN, #OP_TRACK_GAIN, or
+                           #OP_ABSOLUTE_GAIN.
+   \param _gain_offset_q8 The gain offset to apply, in 1/256ths of a dB.
+   \return 0 on success or a negative value on error.
+   \retval #OP_EINVAL The \a _gain_type was unrecognized.*/
+int op_set_gain_offset(OggOpusFile *_of,
+ int _gain_type,opus_int32 _gain_offset_q8) OP_ARG_NONNULL(1);
+
+/**Sets whether or not dithering is enabled for 16-bit decoding.
+   By default, when <tt>libopusfile</tt> is compiled to use floating-point
+    internally, calling op_read() or op_read_stereo() will first decode to
+    float, and then convert to fixed-point using noise-shaping dithering.
+   This flag can be used to disable that dithering.
+   When the application uses op_read_float() or op_read_float_stereo(), or when
+    the library has been compiled to decode directly to fixed point, this flag
+    has no effect.
+   \param _of      The \c OggOpusFile on which to enable or disable dithering.
+   \param _enabled A non-zero value to enable dithering, or 0 to disable it.*/
+void op_set_dither_enabled(OggOpusFile *_of,int _enabled) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream.
+   \note Although \a _buf_size must indicate the total number of values that
+    can be stored in \a _pcm, the return value is the number of samples
+    <em>per channel</em>.
+   This is done because
+   <ol>
+   <li>The channel count cannot be known a priori (reading more samples might
+        advance us into the next link, with a different channel count), so
+        \a _buf_size cannot also be in units of samples per channel,</li>
+   <li>Returning the samples per channel matches the <code>libopus</code> API
+        as closely as we're able,</li>
+   <li>Returning the total number of values instead of samples per channel
+        would mean the caller would need a division to compute the samples per
+        channel, and might worry about the possibility of getting back samples
+        for some channels and not others, and</li>
+   <li>This approach is relatively fool-proof: if an application passes too
+        small a value to \a _buf_size, they will simply get fewer samples back,
+        and if they assume the return value is the total number of values, then
+        they will simply read too few (rather than reading too many and going
+        off the end of the buffer).</li>
+   </ol>
+   \param      _of       The \c OggOpusFile from which to read.
+   \param[out] _pcm      A buffer in which to store the output PCM samples, as
+                          signed native-endian 16-bit values at 48&nbsp;kHz
+                          with a nominal range of <code>[-32768,32767)</code>.
+                         Multiple channels are interleaved using the
+                          <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+                          channel ordering</a>.
+                         This must have room for at least \a _buf_size values.
+   \param      _buf_size The number of values that can be stored in \a _pcm.
+                         It is recommended that this be large enough for at
+                          least 120 ms of data at 48 kHz per channel (5760
+                          values per channel).
+                         Smaller buffers will simply return less data, possibly
+                          consuming more memory to buffer the data internally.
+                         <tt>libopusfile</tt> may return less data than
+                          requested.
+                         If so, there is no guarantee that the remaining data
+                          in \a _pcm will be unmodified.
+   \param[out] _li       The index of the link this data was decoded from.
+                         You may pass <code>NULL</code> if you do not need this
+                          information.
+                         If this function fails (returning a negative value),
+                          this parameter is left unset.
+   \return The number of samples read per channel on success, or a negative
+            value on failure.
+           The channel count can be retrieved on success by calling
+            <code>op_head(_of,*_li)</code>.
+           The number of samples returned may be 0 if the buffer was too small
+            to store even a single sample for all channels, or if end-of-file
+            was reached.
+           The list of possible failure codes follows.
+           Most of them can only be returned by unseekable, chained streams
+            that encounter a new link.
+   \retval #OP_HOLE          There was a hole in the data, and some samples
+                              may have been skipped.
+                             Call this function again to continue decoding
+                              past the hole.
+   \retval #OP_EREAD         An underlying read operation failed.
+                             This may signal a truncation attack from an
+                              <https:> source.
+   \retval #OP_EFAULT        An internal memory allocation failed.
+   \retval #OP_EIMPL         An unseekable stream encountered a new link that
+                              used a feature that is not implemented, such as
+                              an unsupported channel family.
+   \retval #OP_EINVAL        The stream was only partially open.
+   \retval #OP_ENOTFORMAT    An unseekable stream encountered a new link that
+                              did not have any logical Opus streams in it.
+   \retval #OP_EBADHEADER    An unseekable stream encountered a new link with a
+                              required header packet that was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An unseekable stream encountered a new link with
+                              an ID header that contained an unrecognized
+                              version number.
+   \retval #OP_EBADPACKET    Failed to properly decode the next packet.
+   \retval #OP_EBADLINK      We failed to find data we had seen before.
+   \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+                              a starting timestamp that failed basic validity
+                              checks.*/
+OP_WARN_UNUSED_RESULT int op_read(OggOpusFile *_of,
+ opus_int16 *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream.
+   \note Although \a _buf_size must indicate the total number of values that
+    can be stored in \a _pcm, the return value is the number of samples
+    <em>per channel</em>.
+   <ol>
+   <li>The channel count cannot be known a priori (reading more samples might
+        advance us into the next link, with a different channel count), so
+        \a _buf_size cannot also be in units of samples per channel,</li>
+   <li>Returning the samples per channel matches the <code>libopus</code> API
+        as closely as we're able,</li>
+   <li>Returning the total number of values instead of samples per channel
+        would mean the caller would need a division to compute the samples per
+        channel, and might worry about the possibility of getting back samples
+        for some channels and not others, and</li>
+   <li>This approach is relatively fool-proof: if an application passes too
+        small a value to \a _buf_size, they will simply get fewer samples back,
+        and if they assume the return value is the total number of values, then
+        they will simply read too few (rather than reading too many and going
+        off the end of the buffer).</li>
+   </ol>
+   \param      _of       The \c OggOpusFile from which to read.
+   \param[out] _pcm      A buffer in which to store the output PCM samples as
+                          signed floats at 48&nbsp;kHz with a nominal range of
+                          <code>[-1.0,1.0]</code>.
+                         Multiple channels are interleaved using the
+                          <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+                          channel ordering</a>.
+                         This must have room for at least \a _buf_size floats.
+   \param      _buf_size The number of floats that can be stored in \a _pcm.
+                         It is recommended that this be large enough for at
+                          least 120 ms of data at 48 kHz per channel (5760
+                          samples per channel).
+                         Smaller buffers will simply return less data, possibly
+                          consuming more memory to buffer the data internally.
+                         If less than \a _buf_size values are returned,
+                          <tt>libopusfile</tt> makes no guarantee that the
+                          remaining data in \a _pcm will be unmodified.
+   \param[out] _li       The index of the link this data was decoded from.
+                         You may pass <code>NULL</code> if you do not need this
+                          information.
+                         If this function fails (returning a negative value),
+                          this parameter is left unset.
+   \return The number of samples read per channel on success, or a negative
+            value on failure.
+           The channel count can be retrieved on success by calling
+            <code>op_head(_of,*_li)</code>.
+           The number of samples returned may be 0 if the buffer was too small
+            to store even a single sample for all channels, or if end-of-file
+            was reached.
+           The list of possible failure codes follows.
+           Most of them can only be returned by unseekable, chained streams
+            that encounter a new link.
+   \retval #OP_HOLE          There was a hole in the data, and some samples
+                              may have been skipped.
+                             Call this function again to continue decoding
+                              past the hole.
+   \retval #OP_EREAD         An underlying read operation failed.
+                             This may signal a truncation attack from an
+                              <https:> source.
+   \retval #OP_EFAULT        An internal memory allocation failed.
+   \retval #OP_EIMPL         An unseekable stream encountered a new link that
+                              used a feature that is not implemented, such as
+                              an unsupported channel family.
+   \retval #OP_EINVAL        The stream was only partially open.
+   \retval #OP_ENOTFORMAT    An unseekable stream encountered a new link that
+                              did not have any logical Opus streams in it.
+   \retval #OP_EBADHEADER    An unseekable stream encountered a new link with a
+                              required header packet that was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An unseekable stream encountered a new link with
+                              an ID header that contained an unrecognized
+                              version number.
+   \retval #OP_EBADPACKET    Failed to properly decode the next packet.
+   \retval #OP_EBADLINK      We failed to find data we had seen before.
+   \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+                              a starting timestamp that failed basic validity
+                              checks.*/
+OP_WARN_UNUSED_RESULT int op_read_float(OggOpusFile *_of,
+ float *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream and downmixes to stereo, if necessary.
+   This function is intended for simple players that want a uniform output
+    format, even if the channel count changes between links in a chained
+    stream.
+   \note \a _buf_size indicates the total number of values that can be stored
+    in \a _pcm, while the return value is the number of samples <em>per
+    channel</em>, even though the channel count is known, for consistency with
+    op_read().
+   \param      _of       The \c OggOpusFile from which to read.
+   \param[out] _pcm      A buffer in which to store the output PCM samples, as
+                          signed native-endian 16-bit values at 48&nbsp;kHz
+                          with a nominal range of <code>[-32768,32767)</code>.
+                         The left and right channels are interleaved in the
+                          buffer.
+                         This must have room for at least \a _buf_size values.
+   \param      _buf_size The number of values that can be stored in \a _pcm.
+                         It is recommended that this be large enough for at
+                          least 120 ms of data at 48 kHz per channel (11520
+                          values total).
+                         Smaller buffers will simply return less data, possibly
+                          consuming more memory to buffer the data internally.
+                         If less than \a _buf_size values are returned,
+                          <tt>libopusfile</tt> makes no guarantee that the
+                          remaining data in \a _pcm will be unmodified.
+   \return The number of samples read per channel on success, or a negative
+            value on failure.
+           The number of samples returned may be 0 if the buffer was too small
+            to store even a single sample for both channels, or if end-of-file
+            was reached.
+           The list of possible failure codes follows.
+           Most of them can only be returned by unseekable, chained streams
+            that encounter a new link.
+   \retval #OP_HOLE          There was a hole in the data, and some samples
+                              may have been skipped.
+                             Call this function again to continue decoding
+                              past the hole.
+   \retval #OP_EREAD         An underlying read operation failed.
+                             This may signal a truncation attack from an
+                              <https:> source.
+   \retval #OP_EFAULT        An internal memory allocation failed.
+   \retval #OP_EIMPL         An unseekable stream encountered a new link that
+                              used a feature that is not implemented, such as
+                              an unsupported channel family.
+   \retval #OP_EINVAL        The stream was only partially open.
+   \retval #OP_ENOTFORMAT    An unseekable stream encountered a new link that
+                              did not have any logical Opus streams in it.
+   \retval #OP_EBADHEADER    An unseekable stream encountered a new link with a
+                              required header packet that was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An unseekable stream encountered a new link with
+                              an ID header that contained an unrecognized
+                              version number.
+   \retval #OP_EBADPACKET    Failed to properly decode the next packet.
+   \retval #OP_EBADLINK      We failed to find data we had seen before.
+   \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+                              a starting timestamp that failed basic validity
+                              checks.*/
+OP_WARN_UNUSED_RESULT int op_read_stereo(OggOpusFile *_of,
+ opus_int16 *_pcm,int _buf_size) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream and downmixes to stereo, if necessary.
+   This function is intended for simple players that want a uniform output
+    format, even if the channel count changes between links in a chained
+    stream.
+   \note \a _buf_size indicates the total number of values that can be stored
+    in \a _pcm, while the return value is the number of samples <em>per
+    channel</em>, even though the channel count is known, for consistency with
+    op_read_float().
+   \param      _of       The \c OggOpusFile from which to read.
+   \param[out] _pcm      A buffer in which to store the output PCM samples, as
+                          signed floats at 48&nbsp;kHz with a nominal range of
+                          <code>[-1.0,1.0]</code>.
+                         The left and right channels are interleaved in the
+                          buffer.
+                         This must have room for at least \a _buf_size values.
+   \param      _buf_size The number of values that can be stored in \a _pcm.
+                         It is recommended that this be large enough for at
+                          least 120 ms of data at 48 kHz per channel (11520
+                          values total).
+                         Smaller buffers will simply return less data, possibly
+                          consuming more memory to buffer the data internally.
+                         If less than \a _buf_size values are returned,
+                          <tt>libopusfile</tt> makes no guarantee that the
+                          remaining data in \a _pcm will be unmodified.
+   \return The number of samples read per channel on success, or a negative
+            value on failure.
+           The number of samples returned may be 0 if the buffer was too small
+            to store even a single sample for both channels, or if end-of-file
+            was reached.
+           The list of possible failure codes follows.
+           Most of them can only be returned by unseekable, chained streams
+            that encounter a new link.
+   \retval #OP_HOLE          There was a hole in the data, and some samples
+                              may have been skipped.
+                             Call this function again to continue decoding
+                              past the hole.
+   \retval #OP_EREAD         An underlying read operation failed.
+                             This may signal a truncation attack from an
+                              <https:> source.
+   \retval #OP_EFAULT        An internal memory allocation failed.
+   \retval #OP_EIMPL         An unseekable stream encountered a new link that
+                              used a feature that is not implemented, such as
+                              an unsupported channel family.
+   \retval #OP_EINVAL        The stream was only partially open.
+   \retval #OP_ENOTFORMAT    An unseekable stream encountered a new link that
+                              that did not have any logical Opus streams in it.
+   \retval #OP_EBADHEADER    An unseekable stream encountered a new link with a
+                              required header packet that was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An unseekable stream encountered a new link with
+                              an ID header that contained an unrecognized
+                              version number.
+   \retval #OP_EBADPACKET    Failed to properly decode the next packet.
+   \retval #OP_EBADLINK      We failed to find data we had seen before.
+   \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+                              a starting timestamp that failed basic validity
+                              checks.*/
+OP_WARN_UNUSED_RESULT int op_read_float_stereo(OggOpusFile *_of,
+ float *_pcm,int _buf_size) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+# if OP_GNUC_PREREQ(4,0)
+#  pragma GCC visibility pop
+# endif
+
+# if defined(__cplusplus)
+}
+# endif
+
+#endif
diff --git a/src/main/jni/opus/opusfile/stream.c b/src/main/jni/opus/opusfile/stream.c
new file mode 100644
index 000000000..0238a6b31
--- /dev/null
+++ b/src/main/jni/opus/opusfile/stream.c
@@ -0,0 +1,366 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012           *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $
+
+ ********************************************************************/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "internal.h"
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#if defined(_WIN32)
+# include <io.h>
+#endif
+
+typedef struct OpusMemStream OpusMemStream;
+
+#define OP_MEM_SIZE_MAX (~(size_t)0>>1)
+#define OP_MEM_DIFF_MAX ((ptrdiff_t)OP_MEM_SIZE_MAX)
+
+/*The context information needed to read from a block of memory as if it were a
+   file.*/
+struct OpusMemStream{
+  /*The block of memory to read from.*/
+  const unsigned char *data;
+  /*The total size of the block.
+    This must be at most OP_MEM_SIZE_MAX to prevent signed overflow while
+     seeking.*/
+  ptrdiff_t            size;
+  /*The current file position.
+    This is allowed to be set arbitrarily greater than size (i.e., past the end
+     of the block, though we will not read data past the end of the block), but
+     is not allowed to be negative (i.e., before the beginning of the block).*/
+  ptrdiff_t            pos;
+};
+
+static int op_fread(void *_stream,unsigned char *_ptr,int _buf_size){
+  FILE   *stream;
+  size_t  ret;
+  /*Check for empty read.*/
+  if(_buf_size<=0)return 0;
+  stream=(FILE *)_stream;
+  ret=fread(_ptr,1,_buf_size,stream);
+  OP_ASSERT(ret<=(size_t)_buf_size);
+  /*If ret==0 and !feof(stream), there was a read error.*/
+  return ret>0||feof(stream)?(int)ret:OP_EREAD;
+}
+
+static int op_fseek(void *_stream,opus_int64 _offset,int _whence){
+#if defined(_WIN32)
+  /*_fseeki64() is not exposed until MSCVCRT80.
+    This is the default starting with MSVC 2005 (_MSC_VER>=1400), but we want
+     to allow linking against older MSVCRT versions for compatibility back to
+     XP without installing extra runtime libraries.
+    i686-pc-mingw32 does not have fseeko() and requires
+     __MSVCRT_VERSION__>=0x800 for _fseeki64(), which screws up linking with
+     other libraries (that don't use MSVCRT80 from MSVC 2005 by default).
+    i686-w64-mingw32 does have fseeko() and respects _FILE_OFFSET_BITS, but I
+     don't know how to detect that at compile time.
+    We could just use fseeko64() (which is available in both), but its
+     implemented using fgetpos()/fsetpos() just like this code, except without
+     the overflow checking, so we prefer our version.*/
+  opus_int64 pos;
+  /*We don't use fpos_t directly because it might be a struct if __STDC__ is
+     non-zero or _INTEGRAL_MAX_BITS < 64.
+    I'm not certain when the latter is true, but someone could in theory set
+     the former.
+    Either way, it should be binary compatible with a normal 64-bit int (this
+     assumption is not portable, but I believe it is true for MSVCRT).*/
+  OP_ASSERT(sizeof(pos)==sizeof(fpos_t));
+  /*Translate the seek to an absolute one.*/
+  if(_whence==SEEK_CUR){
+    int ret;
+    ret=fgetpos((FILE *)_stream,(fpos_t *)&pos);
+    if(ret)return ret;
+  }
+  else if(_whence==SEEK_END)pos=_filelengthi64(_fileno((FILE *)_stream));
+  else if(_whence==SEEK_SET)pos=0;
+  else return -1;
+  /*Check for errors or overflow.*/
+  if(pos<0||_offset<-pos||_offset>OP_INT64_MAX-pos)return -1;
+  pos+=_offset;
+  return fsetpos((FILE *)_stream,(fpos_t *)&pos);
+#else
+  /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer
+     it except on Windows.*/
+  return fseeko((FILE *)_stream,(off_t)_offset,_whence);
+#endif
+}
+
+static opus_int64 op_ftell(void *_stream){
+#if defined(_WIN32)
+  /*_ftelli64() is not exposed until MSCVCRT80, and ftello()/ftello64() have
+     the same problems as fseeko()/fseeko64() in MingW.
+    See above for a more detailed explanation.*/
+  opus_int64 pos;
+  OP_ASSERT(sizeof(pos)==sizeof(fpos_t));
+  return fgetpos((FILE *)_stream,(fpos_t *)&pos)?-1:pos;
+#else
+  /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer
+     it except on Windows.*/
+  return ftello((FILE *)_stream);
+#endif
+}
+
+static const OpusFileCallbacks OP_FILE_CALLBACKS={
+  op_fread,
+  op_fseek,
+  op_ftell,
+  (op_close_func)fclose
+};
+
+#if defined(_WIN32)
+# include <stddef.h>
+# include <errno.h>
+
+/*Windows doesn't accept UTF-8 by default, and we don't have a wchar_t API,
+   so if we just pass the path to fopen(), then there'd be no way for a user
+   of our API to open a Unicode filename.
+  Instead, we translate from UTF-8 to UTF-16 and use Windows' wchar_t API.
+  This makes this API more consistent with platforms where the character set
+   used by fopen is the same as used on disk, which is generally UTF-8, and
+   with our metadata API, which always uses UTF-8.*/
+static wchar_t *op_utf8_to_utf16(const char *_src){
+  wchar_t *dst;
+  size_t   len;
+  len=strlen(_src);
+  /*Worst-case output is 1 wide character per 1 input character.*/
+  dst=(wchar_t *)_ogg_malloc(sizeof(*dst)*(len+1));
+  if(dst!=NULL){
+    size_t si;
+    size_t di;
+    for(di=si=0;si<len;si++){
+      int c0;
+      c0=(unsigned char)_src[si];
+      if(!(c0&0x80)){
+        /*Start byte says this is a 1-byte sequence.*/
+        dst[di++]=(wchar_t)c0;
+        continue;
+      }
+      else{
+        int c1;
+        /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
+        c1=(unsigned char)_src[si+1];
+        if((c1&0xC0)==0x80){
+          /*Found at least one continuation byte.*/
+          if((c0&0xE0)==0xC0){
+            wchar_t w;
+            /*Start byte says this is a 2-byte sequence.*/
+            w=(c0&0x1F)<<6|c1&0x3F;
+            if(w>=0x80U){
+              /*This is a 2-byte sequence that is not overlong.*/
+              dst[di++]=w;
+              si++;
+              continue;
+            }
+          }
+          else{
+            int c2;
+            /*This is safe, because c1 was not 0 and _src is NUL-terminated.*/
+            c2=(unsigned char)_src[si+2];
+            if((c2&0xC0)==0x80){
+              /*Found at least two continuation bytes.*/
+              if((c0&0xF0)==0xE0){
+                wchar_t w;
+                /*Start byte says this is a 3-byte sequence.*/
+                w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F;
+                if(w>=0x800U&&(w<0xD800||w>=0xE000)&&w<0xFFFE){
+                  /*This is a 3-byte sequence that is not overlong, not a
+                     UTF-16 surrogate pair value, and not a 'not a character'
+                     value.*/
+                  dst[di++]=w;
+                  si+=2;
+                  continue;
+                }
+              }
+              else{
+                int c3;
+                /*This is safe, because c2 was not 0 and _src is
+                   NUL-terminated.*/
+                c3=(unsigned char)_src[si+3];
+                if((c3&0xC0)==0x80){
+                  /*Found at least three continuation bytes.*/
+                  if((c0&0xF8)==0xF0){
+                    opus_uint32 w;
+                    /*Start byte says this is a 4-byte sequence.*/
+                    w=(c0&7)<<18|(c1&0x3F)<<12|(c2&0x3F)<<6&(c3&0x3F);
+                    if(w>=0x10000U&&w<0x110000U){
+                      /*This is a 4-byte sequence that is not overlong and not
+                         greater than the largest valid Unicode code point.
+                        Convert it to a surrogate pair.*/
+                      w-=0x10000;
+                      dst[di++]=(wchar_t)(0xD800+(w>>10));
+                      dst[di++]=(wchar_t)(0xDC00+(w&0x3FF));
+                      si+=3;
+                      continue;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      /*If we got here, we encountered an illegal UTF-8 sequence.*/
+      _ogg_free(dst);
+      return NULL;
+    }
+    OP_ASSERT(di<=len);
+    dst[di]='\0';
+  }
+  return dst;
+}
+
+#endif
+
+void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){
+  FILE *fp;
+#if !defined(_WIN32)
+  fp=fopen(_path,_mode);
+#else
+  fp=NULL;
+  if(_path==NULL||_mode==NULL)errno=EINVAL;
+  else{
+    wchar_t *wpath;
+    wchar_t *wmode;
+    wpath=op_utf8_to_utf16(_path);
+    wmode=op_utf8_to_utf16(_mode);
+    if(wmode==NULL)errno=EINVAL;
+    else if(wpath==NULL)errno=ENOENT;
+    else fp=_wfopen(wpath,wmode);
+    _ogg_free(wmode);
+    _ogg_free(wpath);
+  }
+#endif
+  if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+  return fp;
+}
+
+void *op_fdopen(OpusFileCallbacks *_cb,int _fd,const char *_mode){
+  FILE *fp;
+  fp=fdopen(_fd,_mode);
+  if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+  return fp;
+}
+
+void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode,
+ void *_stream){
+  FILE *fp;
+#if !defined(_WIN32)
+  fp=freopen(_path,_mode,(FILE *)_stream);
+#else
+  fp=NULL;
+  if(_path==NULL||_mode==NULL)errno=EINVAL;
+  else{
+    wchar_t *wpath;
+    wchar_t *wmode;
+    wpath=op_utf8_to_utf16(_path);
+    wmode=op_utf8_to_utf16(_mode);
+    if(wmode==NULL)errno=EINVAL;
+    else if(wpath==NULL)errno=ENOENT;
+    else fp=_wfreopen(wpath,wmode,(FILE *)_stream);
+    _ogg_free(wmode);
+    _ogg_free(wpath);
+  }
+#endif
+  if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+  return fp;
+}
+
+static int op_mem_read(void *_stream,unsigned char *_ptr,int _buf_size){
+  OpusMemStream *stream;
+  ptrdiff_t      size;
+  ptrdiff_t      pos;
+  stream=(OpusMemStream *)_stream;
+  /*Check for empty read.*/
+  if(_buf_size<=0)return 0;
+  size=stream->size;
+  pos=stream->pos;
+  /*Check for EOF.*/
+  if(pos>=size)return 0;
+  /*Check for a short read.*/
+  _buf_size=(int)OP_MIN(size-pos,_buf_size);
+  memcpy(_ptr,stream->data+pos,_buf_size);
+  pos+=_buf_size;
+  stream->pos=pos;
+  return _buf_size;
+}
+
+static int op_mem_seek(void *_stream,opus_int64 _offset,int _whence){
+  OpusMemStream *stream;
+  ptrdiff_t      pos;
+  stream=(OpusMemStream *)_stream;
+  pos=stream->pos;
+  OP_ASSERT(pos>=0);
+  switch(_whence){
+    case SEEK_SET:{
+      /*Check for overflow:*/
+      if(_offset<0||_offset>OP_MEM_DIFF_MAX)return -1;
+      pos=(ptrdiff_t)_offset;
+    }break;
+    case SEEK_CUR:{
+      /*Check for overflow:*/
+      if(_offset<-pos||_offset>OP_MEM_DIFF_MAX-pos)return -1;
+      pos=(ptrdiff_t)(pos+_offset);
+    }break;
+    case SEEK_END:{
+      ptrdiff_t size;
+      size=stream->size;
+      OP_ASSERT(size>=0);
+      /*Check for overflow:*/
+      if(_offset>size||_offset<size-OP_MEM_DIFF_MAX)return -1;
+      pos=(ptrdiff_t)(size-_offset);
+    }break;
+    default:return -1;
+  }
+  stream->pos=pos;
+  return 0;
+}
+
+static opus_int64 op_mem_tell(void *_stream){
+  OpusMemStream *stream;
+  stream=(OpusMemStream *)_stream;
+  return (ogg_int64_t)stream->pos;
+}
+
+static int op_mem_close(void *_stream){
+  _ogg_free(_stream);
+  return 0;
+}
+
+static const OpusFileCallbacks OP_MEM_CALLBACKS={
+  op_mem_read,
+  op_mem_seek,
+  op_mem_tell,
+  op_mem_close
+};
+
+void *op_mem_stream_create(OpusFileCallbacks *_cb,
+ const unsigned char *_data,size_t _size){
+  OpusMemStream *stream;
+  if(_size>OP_MEM_SIZE_MAX)return NULL;
+  stream=(OpusMemStream *)_ogg_malloc(sizeof(*stream));
+  if(stream!=NULL){
+    *_cb=*&OP_MEM_CALLBACKS;
+    stream->data=_data;
+    stream->size=_size;
+    stream->pos=0;
+  }
+  return stream;
+}
diff --git a/src/main/jni/opus/silk/A2NLSF.c b/src/main/jni/opus/silk/A2NLSF.c
new file mode 100644
index 000000000..74b1b95d6
--- /dev/null
+++ b/src/main/jni/opus/silk/A2NLSF.c
@@ -0,0 +1,252 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/* Conversion between prediction filter coefficients and NLSFs  */
+/* Requires the order to be an even number                      */
+/* A piecewise linear approximation maps LSF <-> cos(LSF)       */
+/* Therefore the result is not accurate NLSFs, but the two      */
+/* functions are accurate inverses of each other                */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "tables.h"
+
+/* Number of binary divisions, when not in low complexity mode */
+#define BIN_DIV_STEPS_A2NLSF_FIX      3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */
+#define MAX_ITERATIONS_A2NLSF_FIX    30
+
+/* Helper function for A2NLSF(..)                    */
+/* Transforms polynomials from cos(n*f) to cos(f)^n  */
+static OPUS_INLINE void silk_A2NLSF_trans_poly(
+    opus_int32          *p,                     /* I/O    Polynomial                                */
+    const opus_int      dd                      /* I      Polynomial order (= filter order / 2 )    */
+)
+{
+    opus_int k, n;
+
+    for( k = 2; k <= dd; k++ ) {
+        for( n = dd; n > k; n-- ) {
+            p[ n - 2 ] -= p[ n ];
+        }
+        p[ k - 2 ] -= silk_LSHIFT( p[ k ], 1 );
+    }
+}
+/* Helper function for A2NLSF(..) */
+/* Polynomial evaluation          */
+static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16     */
+    opus_int32          *p,                     /* I    Polynomial, Q16                         */
+    const opus_int32    x,                      /* I    Evaluation point, Q12                   */
+    const opus_int      dd                      /* I    Order                                   */
+)
+{
+    opus_int   n;
+    opus_int32 x_Q16, y32;
+
+    y32 = p[ dd ];                                  /* Q16 */
+    x_Q16 = silk_LSHIFT( x, 4 );
+    for( n = dd - 1; n >= 0; n-- ) {
+        y32 = silk_SMLAWW( p[ n ], y32, x_Q16 );    /* Q16 */
+    }
+    return y32;
+}
+
+static OPUS_INLINE void silk_A2NLSF_init(
+     const opus_int32    *a_Q16,
+     opus_int32          *P,
+     opus_int32          *Q,
+     const opus_int      dd
+)
+{
+    opus_int k;
+
+    /* Convert filter coefs to even and odd polynomials */
+    P[dd] = silk_LSHIFT( 1, 16 );
+    Q[dd] = silk_LSHIFT( 1, 16 );
+    for( k = 0; k < dd; k++ ) {
+        P[ k ] = -a_Q16[ dd - k - 1 ] - a_Q16[ dd + k ];    /* Q16 */
+        Q[ k ] = -a_Q16[ dd - k - 1 ] + a_Q16[ dd + k ];    /* Q16 */
+    }
+
+    /* Divide out zeros as we have that for even filter orders, */
+    /* z =  1 is always a root in Q, and                        */
+    /* z = -1 is always a root in P                             */
+    for( k = dd; k > 0; k-- ) {
+        P[ k - 1 ] -= P[ k ];
+        Q[ k - 1 ] += Q[ k ];
+    }
+
+    /* Transform polynomials from cos(n*f) to cos(f)^n */
+    silk_A2NLSF_trans_poly( P, dd );
+    silk_A2NLSF_trans_poly( Q, dd );
+}
+
+/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients      */
+/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
+void silk_A2NLSF(
+    opus_int16                  *NLSF,              /* O    Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
+    opus_int32                  *a_Q16,             /* I/O  Monic whitening filter coefficients in Q16 [d]              */
+    const opus_int              d                   /* I    Filter order (must be even)                                 */
+)
+{
+    opus_int      i, k, m, dd, root_ix, ffrac;
+    opus_int32 xlo, xhi, xmid;
+    opus_int32 ylo, yhi, ymid, thr;
+    opus_int32 nom, den;
+    opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+    opus_int32 Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+    opus_int32 *PQ[ 2 ];
+    opus_int32 *p;
+
+    /* Store pointers to array */
+    PQ[ 0 ] = P;
+    PQ[ 1 ] = Q;
+
+    dd = silk_RSHIFT( d, 1 );
+
+    silk_A2NLSF_init( a_Q16, P, Q, dd );
+
+    /* Find roots, alternating between P and Q */
+    p = P;                          /* Pointer to polynomial */
+
+    xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/
+    ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+
+    if( ylo < 0 ) {
+        /* Set the first NLSF to zero and move on to the next */
+        NLSF[ 0 ] = 0;
+        p = Q;                      /* Pointer to polynomial */
+        ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+        root_ix = 1;                /* Index of current root */
+    } else {
+        root_ix = 0;                /* Index of current root */
+    }
+    k = 1;                          /* Loop counter */
+    i = 0;                          /* Counter for bandwidth expansions applied */
+    thr = 0;
+    while( 1 ) {
+        /* Evaluate polynomial */
+        xhi = silk_LSFCosTab_FIX_Q12[ k ]; /* Q12 */
+        yhi = silk_A2NLSF_eval_poly( p, xhi, dd );
+
+        /* Detect zero crossing */
+        if( ( ylo <= 0 && yhi >= thr ) || ( ylo >= 0 && yhi <= -thr ) ) {
+            if( yhi == 0 ) {
+                /* If the root lies exactly at the end of the current       */
+                /* interval, look for the next root in the next interval    */
+                thr = 1;
+            } else {
+                thr = 0;
+            }
+            /* Binary division */
+            ffrac = -256;
+            for( m = 0; m < BIN_DIV_STEPS_A2NLSF_FIX; m++ ) {
+                /* Evaluate polynomial */
+                xmid = silk_RSHIFT_ROUND( xlo + xhi, 1 );
+                ymid = silk_A2NLSF_eval_poly( p, xmid, dd );
+
+                /* Detect zero crossing */
+                if( ( ylo <= 0 && ymid >= 0 ) || ( ylo >= 0 && ymid <= 0 ) ) {
+                    /* Reduce frequency */
+                    xhi = xmid;
+                    yhi = ymid;
+                } else {
+                    /* Increase frequency */
+                    xlo = xmid;
+                    ylo = ymid;
+                    ffrac = silk_ADD_RSHIFT( ffrac, 128, m );
+                }
+            }
+
+            /* Interpolate */
+            if( silk_abs( ylo ) < 65536 ) {
+                /* Avoid dividing by zero */
+                den = ylo - yhi;
+                nom = silk_LSHIFT( ylo, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) + silk_RSHIFT( den, 1 );
+                if( den != 0 ) {
+                    ffrac += silk_DIV32( nom, den );
+                }
+            } else {
+                /* No risk of dividing by zero because abs(ylo - yhi) >= abs(ylo) >= 65536 */
+                ffrac += silk_DIV32( ylo, silk_RSHIFT( ylo - yhi, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) );
+            }
+            NLSF[ root_ix ] = (opus_int16)silk_min_32( silk_LSHIFT( (opus_int32)k, 8 ) + ffrac, silk_int16_MAX );
+
+            silk_assert( NLSF[ root_ix ] >= 0 );
+
+            root_ix++;        /* Next root */
+            if( root_ix >= d ) {
+                /* Found all roots */
+                break;
+            }
+            /* Alternate pointer to polynomial */
+            p = PQ[ root_ix & 1 ];
+
+            /* Evaluate polynomial */
+            xlo = silk_LSFCosTab_FIX_Q12[ k - 1 ]; /* Q12*/
+            ylo = silk_LSHIFT( 1 - ( root_ix & 2 ), 12 );
+        } else {
+            /* Increment loop counter */
+            k++;
+            xlo = xhi;
+            ylo = yhi;
+            thr = 0;
+
+            if( k > LSF_COS_TAB_SZ_FIX ) {
+                i++;
+                if( i > MAX_ITERATIONS_A2NLSF_FIX ) {
+                    /* Set NLSFs to white spectrum and exit */
+                    NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 );
+                    for( k = 1; k < d; k++ ) {
+                        NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] );
+                    }
+                    return;
+                }
+
+                /* Error: Apply progressively more bandwidth expansion and run again */
+                silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/
+
+                silk_A2NLSF_init( a_Q16, P, Q, dd );
+                p = P;                            /* Pointer to polynomial */
+                xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/
+                ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+                if( ylo < 0 ) {
+                    /* Set the first NLSF to zero and move on to the next */
+                    NLSF[ 0 ] = 0;
+                    p = Q;                        /* Pointer to polynomial */
+                    ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+                    root_ix = 1;                  /* Index of current root */
+                } else {
+                    root_ix = 0;                  /* Index of current root */
+                }
+                k = 1;                            /* Reset loop counter */
+            }
+        }
+    }
+}
diff --git a/src/main/jni/opus/silk/API.h b/src/main/jni/opus/silk/API.h
new file mode 100644
index 000000000..f0601bcf6
--- /dev/null
+++ b/src/main/jni/opus/silk/API.h
@@ -0,0 +1,133 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_API_H
+#define SILK_API_H
+
+#include "control.h"
+#include "typedef.h"
+#include "errors.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define SILK_MAX_FRAMES_PER_PACKET  3
+
+/* Struct for TOC (Table of Contents) */
+typedef struct {
+    opus_int    VADFlag;                                /* Voice activity for packet                            */
+    opus_int    VADFlags[ SILK_MAX_FRAMES_PER_PACKET ]; /* Voice activity for each frame in packet              */
+    opus_int    inbandFECFlag;                          /* Flag indicating if packet contains in-band FEC       */
+} silk_TOC_struct;
+
+/****************************************/
+/* Encoder functions                    */
+/****************************************/
+
+/***********************************************/
+/* Get size in bytes of the Silk encoder state */
+/***********************************************/
+opus_int silk_Get_Encoder_Size(                         /* O    Returns error code                              */
+    opus_int                        *encSizeBytes       /* O    Number of bytes in SILK encoder state           */
+);
+
+/*************************/
+/* Init or reset encoder */
+/*************************/
+opus_int silk_InitEncoder(                              /* O    Returns error code                              */
+    void                            *encState,          /* I/O  State                                           */
+    int                              arch,              /* I    Run-time architecture                           */
+    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
+);
+
+/**************************/
+/* Encode frame with Silk */
+/**************************/
+/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what                     */
+/* encControl->payloadSize_ms is set to                                                                         */
+opus_int silk_Encode(                                   /* O    Returns error code                              */
+    void                            *encState,          /* I/O  State                                           */
+    silk_EncControlStruct           *encControl,        /* I    Control status                                  */
+    const opus_int16                *samplesIn,         /* I    Speech sample input vector                      */
+    opus_int                        nSamplesIn,         /* I    Number of samples in input vector               */
+    ec_enc                          *psRangeEnc,        /* I/O  Compressor data structure                       */
+    opus_int32                      *nBytesOut,         /* I/O  Number of bytes in payload (input: Max bytes)   */
+    const opus_int                  prefillFlag         /* I    Flag to indicate prefilling buffers no coding   */
+);
+
+/****************************************/
+/* Decoder functions                    */
+/****************************************/
+
+/***********************************************/
+/* Get size in bytes of the Silk decoder state */
+/***********************************************/
+opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
+    opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
+);
+
+/*************************/
+/* Init or Reset decoder */
+/*************************/
+opus_int silk_InitDecoder(                              /* O    Returns error code                              */
+    void                            *decState           /* I/O  State                                           */
+);
+
+/******************/
+/* Decode a frame */
+/******************/
+opus_int silk_Decode(                                   /* O    Returns error code                              */
+    void*                           decState,           /* I/O  State                                           */
+    silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
+    opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
+    opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
+    ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
+    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
+    opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
+);
+
+#if 0
+/**************************************/
+/* Get table of contents for a packet */
+/**************************************/
+opus_int silk_get_TOC(
+    const opus_uint8                *payload,           /* I    Payload data                                */
+    const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
+    const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
+    silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
+);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/CNG.c b/src/main/jni/opus/silk/CNG.c
new file mode 100644
index 000000000..8481d95db
--- /dev/null
+++ b/src/main/jni/opus/silk/CNG.c
@@ -0,0 +1,172 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+/* Generates excitation for CNG LPC synthesis */
+static OPUS_INLINE void silk_CNG_exc(
+    opus_int32                       residual_Q10[],     /* O    CNG residual signal Q10                     */
+    opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
+    opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
+    opus_int                         length,             /* I    Length                                      */
+    opus_int32                       *rand_seed          /* I/O  Seed to random index generator              */
+)
+{
+    opus_int32 seed;
+    opus_int   i, idx, exc_mask;
+
+    exc_mask = CNG_BUF_MASK_MAX;
+    while( exc_mask > length ) {
+        exc_mask = silk_RSHIFT( exc_mask, 1 );
+    }
+
+    seed = *rand_seed;
+    for( i = 0; i < length; i++ ) {
+        seed = silk_RAND( seed );
+        idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
+        silk_assert( idx >= 0 );
+        silk_assert( idx <= CNG_BUF_MASK_MAX );
+        residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+    }
+    *rand_seed = seed;
+}
+
+void silk_CNG_Reset(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state                               */
+)
+{
+    opus_int i, NLSF_step_Q15, NLSF_acc_Q15;
+
+    NLSF_step_Q15 = silk_DIV32_16( silk_int16_MAX, psDec->LPC_order + 1 );
+    NLSF_acc_Q15 = 0;
+    for( i = 0; i < psDec->LPC_order; i++ ) {
+        NLSF_acc_Q15 += NLSF_step_Q15;
+        psDec->sCNG.CNG_smth_NLSF_Q15[ i ] = NLSF_acc_Q15;
+    }
+    psDec->sCNG.CNG_smth_Gain_Q16 = 0;
+    psDec->sCNG.rand_seed = 3176576;
+}
+
+/* Updates CNG estimate, and applies the CNG when packet was lost   */
+void silk_CNG(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
+    opus_int16                  frame[],                        /* I/O  Signal                                      */
+    opus_int                    length                          /* I    Length of residual                          */
+)
+{
+    opus_int   i, subfr;
+    opus_int32 sum_Q6, max_Gain_Q16;
+    opus_int16 A_Q12[ MAX_LPC_ORDER ];
+    silk_CNG_struct *psCNG = &psDec->sCNG;
+    SAVE_STACK;
+
+    if( psDec->fs_kHz != psCNG->fs_kHz ) {
+        /* Reset state */
+        silk_CNG_Reset( psDec );
+
+        psCNG->fs_kHz = psDec->fs_kHz;
+    }
+    if( psDec->lossCnt == 0 && psDec->prevSignalType == TYPE_NO_VOICE_ACTIVITY ) {
+        /* Update CNG parameters */
+
+        /* Smoothing of LSF's  */
+        for( i = 0; i < psDec->LPC_order; i++ ) {
+            psCNG->CNG_smth_NLSF_Q15[ i ] += silk_SMULWB( (opus_int32)psDec->prevNLSF_Q15[ i ] - (opus_int32)psCNG->CNG_smth_NLSF_Q15[ i ], CNG_NLSF_SMTH_Q16 );
+        }
+        /* Find the subframe with the highest gain */
+        max_Gain_Q16 = 0;
+        subfr        = 0;
+        for( i = 0; i < psDec->nb_subfr; i++ ) {
+            if( psDecCtrl->Gains_Q16[ i ] > max_Gain_Q16 ) {
+                max_Gain_Q16 = psDecCtrl->Gains_Q16[ i ];
+                subfr        = i;
+            }
+        }
+        /* Update CNG excitation buffer with excitation from this subframe */
+        silk_memmove( &psCNG->CNG_exc_buf_Q14[ psDec->subfr_length ], psCNG->CNG_exc_buf_Q14, ( psDec->nb_subfr - 1 ) * psDec->subfr_length * sizeof( opus_int32 ) );
+        silk_memcpy(   psCNG->CNG_exc_buf_Q14, &psDec->exc_Q14[ subfr * psDec->subfr_length ], psDec->subfr_length * sizeof( opus_int32 ) );
+
+        /* Smooth gains */
+        for( i = 0; i < psDec->nb_subfr; i++ ) {
+            psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 );
+        }
+    }
+
+    /* Add CNG when packet is lost or during DTX */
+    if( psDec->lossCnt ) {
+        VARDECL( opus_int32, CNG_sig_Q10 );
+
+        ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
+
+        /* Generate CNG excitation */
+        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
+
+        /* Convert CNG NLSF to filter representation */
+        silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
+
+        /* Generate CNG signal, by synthesis filtering */
+        silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        for( i = 0; i < length; i++ ) {
+            silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+            if( psDec->LPC_order == 16 ) {
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
+            }
+
+            /* Update states */
+            CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
+
+            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
+        }
+        silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+    } else {
+        silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order *  sizeof( opus_int32 ) );
+    }
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/HP_variable_cutoff.c b/src/main/jni/opus/silk/HP_variable_cutoff.c
new file mode 100644
index 000000000..bbe10f04c
--- /dev/null
+++ b/src/main/jni/opus/silk/HP_variable_cutoff.c
@@ -0,0 +1,77 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#ifdef FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+#include "tuning_parameters.h"
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+    silk_encoder_state_Fxx          state_Fxx[]                         /* I/O  Encoder states                              */
+)
+{
+   opus_int   quality_Q15;
+   opus_int32 pitch_freq_Hz_Q16, pitch_freq_log_Q7, delta_freq_Q7;
+   silk_encoder_state *psEncC1 = &state_Fxx[ 0 ].sCmn;
+
+   /* Adaptive cutoff frequency: estimate low end of pitch frequency range */
+   if( psEncC1->prevSignalType == TYPE_VOICED ) {
+      /* difference, in log domain */
+      pitch_freq_Hz_Q16 = silk_DIV32_16( silk_LSHIFT( silk_MUL( psEncC1->fs_kHz, 1000 ), 16 ), psEncC1->prevLag );
+      pitch_freq_log_Q7 = silk_lin2log( pitch_freq_Hz_Q16 ) - ( 16 << 7 );
+
+      /* adjustment based on quality */
+      quality_Q15 = psEncC1->input_quality_bands_Q15[ 0 ];
+      pitch_freq_log_Q7 = silk_SMLAWB( pitch_freq_log_Q7, silk_SMULWB( silk_LSHIFT( -quality_Q15, 2 ), quality_Q15 ),
+            pitch_freq_log_Q7 - ( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ) ) );
+
+      /* delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; */
+      delta_freq_Q7 = pitch_freq_log_Q7 - silk_RSHIFT( psEncC1->variable_HP_smth1_Q15, 8 );
+      if( delta_freq_Q7 < 0 ) {
+         /* less smoothing for decreasing pitch frequency, to track something close to the minimum */
+         delta_freq_Q7 = silk_MUL( delta_freq_Q7, 3 );
+      }
+
+      /* limit delta, to reduce impact of outliers in pitch estimation */
+      delta_freq_Q7 = silk_LIMIT_32( delta_freq_Q7, -SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ), SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ) );
+
+      /* update smoother */
+      psEncC1->variable_HP_smth1_Q15 = silk_SMLAWB( psEncC1->variable_HP_smth1_Q15,
+            silk_SMULBB( psEncC1->speech_activity_Q8, delta_freq_Q7 ), SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF1, 16 ) );
+
+      /* limit frequency range */
+      psEncC1->variable_HP_smth1_Q15 = silk_LIMIT_32( psEncC1->variable_HP_smth1_Q15,
+            silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ),
+            silk_LSHIFT( silk_lin2log( VARIABLE_HP_MAX_CUTOFF_HZ ), 8 ) );
+   }
+}
diff --git a/src/main/jni/opus/silk/Inlines.h b/src/main/jni/opus/silk/Inlines.h
new file mode 100644
index 000000000..ec986cdfd
--- /dev/null
+++ b/src/main/jni/opus/silk/Inlines.h
@@ -0,0 +1,188 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/*! \file silk_Inlines.h
+ *  \brief silk_Inlines.h defines OPUS_INLINE signal processing functions.
+ */
+
+#ifndef SILK_FIX_INLINES_H
+#define SILK_FIX_INLINES_H
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+/* count leading zeros of opus_int64 */
+static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in )
+{
+    opus_int32 in_upper;
+
+    in_upper = (opus_int32)silk_RSHIFT64(in, 32);
+    if (in_upper == 0) {
+        /* Search in the lower 32 bits */
+        return 32 + silk_CLZ32( (opus_int32) in );
+    } else {
+        /* Search in the upper 32 bits */
+        return silk_CLZ32( in_upper );
+    }
+}
+
+/* get number of leading zeros and fractional part (the bits right after the leading one */
+static OPUS_INLINE void silk_CLZ_FRAC(
+    opus_int32 in,            /* I  input                               */
+    opus_int32 *lz,           /* O  number of leading zeros             */
+    opus_int32 *frac_Q7       /* O  the 7 bits right after the leading one */
+)
+{
+    opus_int32 lzeros = silk_CLZ32(in);
+
+    * lz = lzeros;
+    * frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f;
+}
+
+/* Approximation of square root                                          */
+/* Accuracy: < +/- 10%  for output values > 15                           */
+/*           < +/- 2.5% for output values > 120                          */
+static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x )
+{
+    opus_int32 y, lz, frac_Q7;
+
+    if( x <= 0 ) {
+        return 0;
+    }
+
+    silk_CLZ_FRAC(x, &lz, &frac_Q7);
+
+    if( lz & 1 ) {
+        y = 32768;
+    } else {
+        y = 46214;        /* 46214 = sqrt(2) * 32768 */
+    }
+
+    /* get scaling right */
+    y >>= silk_RSHIFT(lz, 1);
+
+    /* increment using fractional part of input */
+    y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7));
+
+    return y;
+}
+
+/* Divide two int32 values and return result as int32 in a given Q-domain */
+static OPUS_INLINE opus_int32 silk_DIV32_varQ(   /* O    returns a good approximation of "(a32 << Qres) / b32" */
+    const opus_int32     a32,               /* I    numerator (Q0)                  */
+    const opus_int32     b32,               /* I    denominator (Q0)                */
+    const opus_int       Qres               /* I    Q-domain of result (>= 0)       */
+)
+{
+    opus_int   a_headrm, b_headrm, lshift;
+    opus_int32 b32_inv, a32_nrm, b32_nrm, result;
+
+    silk_assert( b32 != 0 );
+    silk_assert( Qres >= 0 );
+
+    /* Compute number of bits head room and normalize inputs */
+    a_headrm = silk_CLZ32( silk_abs(a32) ) - 1;
+    a32_nrm = silk_LSHIFT(a32, a_headrm);                                       /* Q: a_headrm                  */
+    b_headrm = silk_CLZ32( silk_abs(b32) ) - 1;
+    b32_nrm = silk_LSHIFT(b32, b_headrm);                                       /* Q: b_headrm                  */
+
+    /* Inverse of b32, with 14 bits of precision */
+    b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) );   /* Q: 29 + 16 - b_headrm        */
+
+    /* First approximation */
+    result = silk_SMULWB(a32_nrm, b32_inv);                                     /* Q: 29 + a_headrm - b_headrm  */
+
+    /* Compute residual by subtracting product of denominator and first approximation */
+    /* It's OK to overflow because the final value of a32_nrm should always be small */
+    a32_nrm = silk_SUB32_ovflw(a32_nrm, silk_LSHIFT_ovflw( silk_SMMUL(b32_nrm, result), 3 ));  /* Q: a_headrm   */
+
+    /* Refinement */
+    result = silk_SMLAWB(result, a32_nrm, b32_inv);                             /* Q: 29 + a_headrm - b_headrm  */
+
+    /* Convert to Qres domain */
+    lshift = 29 + a_headrm - b_headrm - Qres;
+    if( lshift < 0 ) {
+        return silk_LSHIFT_SAT32(result, -lshift);
+    } else {
+        if( lshift < 32){
+            return silk_RSHIFT(result, lshift);
+        } else {
+            /* Avoid undefined result */
+            return 0;
+        }
+    }
+}
+
+/* Invert int32 value and return result as int32 in a given Q-domain */
+static OPUS_INLINE opus_int32 silk_INVERSE32_varQ(   /* O    returns a good approximation of "(1 << Qres) / b32" */
+    const opus_int32     b32,                   /* I    denominator (Q0)                */
+    const opus_int       Qres                   /* I    Q-domain of result (> 0)        */
+)
+{
+    opus_int   b_headrm, lshift;
+    opus_int32 b32_inv, b32_nrm, err_Q32, result;
+
+    silk_assert( b32 != 0 );
+    silk_assert( Qres > 0 );
+
+    /* Compute number of bits head room and normalize input */
+    b_headrm = silk_CLZ32( silk_abs(b32) ) - 1;
+    b32_nrm = silk_LSHIFT(b32, b_headrm);                                       /* Q: b_headrm                */
+
+    /* Inverse of b32, with 14 bits of precision */
+    b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) );   /* Q: 29 + 16 - b_headrm    */
+
+    /* First approximation */
+    result = silk_LSHIFT(b32_inv, 16);                                          /* Q: 61 - b_headrm            */
+
+    /* Compute residual by subtracting product of denominator and first approximation from one */
+    err_Q32 = silk_LSHIFT( ((opus_int32)1<<29) - silk_SMULWB(b32_nrm, b32_inv), 3 );        /* Q32                        */
+
+    /* Refinement */
+    result = silk_SMLAWW(result, err_Q32, b32_inv);                             /* Q: 61 - b_headrm            */
+
+    /* Convert to Qres domain */
+    lshift = 61 - b_headrm - Qres;
+    if( lshift <= 0 ) {
+        return silk_LSHIFT_SAT32(result, -lshift);
+    } else {
+        if( lshift < 32){
+            return silk_RSHIFT(result, lshift);
+        }else{
+            /* Avoid undefined result */
+            return 0;
+        }
+    }
+}
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_FIX_INLINES_H */
diff --git a/src/main/jni/opus/silk/LPC_analysis_filter.c b/src/main/jni/opus/silk/LPC_analysis_filter.c
new file mode 100644
index 000000000..9d1f16cb7
--- /dev/null
+++ b/src/main/jni/opus/silk/LPC_analysis_filter.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "celt_lpc.h"
+
+/*******************************************/
+/* LPC analysis filter                     */
+/* NB! State is kept internally and the    */
+/* filter always starts with zero state    */
+/* first d output samples are set to zero  */
+/*******************************************/
+
+void silk_LPC_analysis_filter(
+    opus_int16                  *out,               /* O    Output signal                                               */
+    const opus_int16            *in,                /* I    Input signal                                                */
+    const opus_int16            *B,                 /* I    MA prediction coefficients, Q12 [order]                     */
+    const opus_int32            len,                /* I    Signal length                                               */
+    const opus_int32            d                   /* I    Filter order                                                */
+)
+{
+    opus_int   j;
+#ifdef FIXED_POINT
+    opus_int16 mem[SILK_MAX_ORDER_LPC];
+    opus_int16 num[SILK_MAX_ORDER_LPC];
+#else
+    int ix;
+    opus_int32       out32_Q12, out32;
+    const opus_int16 *in_ptr;
+#endif
+
+    silk_assert( d >= 6 );
+    silk_assert( (d & 1) == 0 );
+    silk_assert( d <= len );
+
+#ifdef FIXED_POINT
+    silk_assert( d <= SILK_MAX_ORDER_LPC );
+    for ( j = 0; j < d; j++ ) {
+        num[ j ] = -B[ j ];
+    }
+    for (j=0;j<d;j++) {
+        mem[ j ] = in[ d - j - 1 ];
+    }
+    celt_fir( in + d, num, out + d, len - d, d, mem );
+    for ( j = 0; j < d; j++ ) {
+        out[ j ] = 0;
+    }
+#else
+    for( ix = d; ix < len; ix++ ) {
+        in_ptr = &in[ ix - 1 ];
+
+        out32_Q12 = silk_SMULBB( in_ptr[  0 ], B[ 0 ] );
+        /* Allowing wrap around so that two wraps can cancel each other. The rare
+           cases where the result wraps around can only be triggered by invalid streams*/
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -1 ], B[ 1 ] );
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -2 ], B[ 2 ] );
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -3 ], B[ 3 ] );
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -4 ], B[ 4 ] );
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -5 ], B[ 5 ] );
+        for( j = 6; j < d; j += 2 ) {
+            out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j     ], B[ j     ] );
+            out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j - 1 ], B[ j + 1 ] );
+        }
+
+        /* Subtract prediction */
+        out32_Q12 = silk_SUB32_ovflw( silk_LSHIFT( (opus_int32)in_ptr[ 1 ], 12 ), out32_Q12 );
+
+        /* Scale to Q0 */
+        out32 = silk_RSHIFT_ROUND( out32_Q12, 12 );
+
+        /* Saturate output */
+        out[ ix ] = (opus_int16)silk_SAT16( out32 );
+    }
+
+    /* Set first d output samples to zero */
+    silk_memset( out, 0, d * sizeof( opus_int16 ) );
+#endif
+}
diff --git a/src/main/jni/opus/silk/LPC_inv_pred_gain.c b/src/main/jni/opus/silk/LPC_inv_pred_gain.c
new file mode 100644
index 000000000..4af89aa5f
--- /dev/null
+++ b/src/main/jni/opus/silk/LPC_inv_pred_gain.c
@@ -0,0 +1,154 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+#define QA                          24
+#define A_LIMIT                     SILK_FIX_CONST( 0.99975, QA )
+
+#define MUL32_FRAC_Q(a32, b32, Q)   ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q)))
+
+/* Compute inverse of LPC prediction gain, and                          */
+/* test if LPC coefficients are stable (all poles within unit circle)   */
+static opus_int32 LPC_inverse_pred_gain_QA(                 /* O   Returns inverse prediction gain in energy domain, Q30    */
+    opus_int32           A_QA[ 2 ][ SILK_MAX_ORDER_LPC ],   /* I   Prediction coefficients                                  */
+    const opus_int       order                              /* I   Prediction order                                         */
+)
+{
+    opus_int   k, n, mult2Q;
+    opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA;
+    opus_int32 *Aold_QA, *Anew_QA;
+
+    Anew_QA = A_QA[ order & 1 ];
+
+    invGain_Q30 = (opus_int32)1 << 30;
+    for( k = order - 1; k > 0; k-- ) {
+        /* Check for stability */
+        if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) {
+            return 0;
+        }
+
+        /* Set RC equal to negated AR coef */
+        rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA );
+
+        /* rc_mult1_Q30 range: [ 1 : 2^30 ] */
+        rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+        silk_assert( rc_mult1_Q30 > ( 1 << 15 ) );                   /* reduce A_LIMIT if fails */
+        silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
+
+        /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
+        mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
+        rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
+
+        /* Update inverse gain */
+        /* invGain_Q30 range: [ 0 : 2^30 ] */
+        invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
+        silk_assert( invGain_Q30 >= 0           );
+        silk_assert( invGain_Q30 <= ( 1 << 30 ) );
+
+        /* Swap pointers */
+        Aold_QA = Anew_QA;
+        Anew_QA = A_QA[ k & 1 ];
+
+        /* Update AR coefficient */
+        for( n = 0; n < k; n++ ) {
+            tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 );
+            Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q );
+        }
+    }
+
+    /* Check for stability */
+    if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) {
+        return 0;
+    }
+
+    /* Set RC equal to negated AR coef */
+    rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA );
+
+    /* Range: [ 1 : 2^30 ] */
+    rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+
+    /* Update inverse gain */
+    /* Range: [ 0 : 2^30 ] */
+    invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
+    silk_assert( invGain_Q30 >= 0     );
+    silk_assert( invGain_Q30 <= 1<<30 );
+
+    return invGain_Q30;
+}
+
+/* For input in Q12 domain */
+opus_int32 silk_LPC_inverse_pred_gain(              /* O   Returns inverse prediction gain in energy domain, Q30        */
+    const opus_int16            *A_Q12,             /* I   Prediction coefficients, Q12 [order]                         */
+    const opus_int              order               /* I   Prediction order                                             */
+)
+{
+    opus_int   k;
+    opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
+    opus_int32 *Anew_QA;
+    opus_int32 DC_resp = 0;
+
+    Anew_QA = Atmp_QA[ order & 1 ];
+
+    /* Increase Q domain of the AR coefficients */
+    for( k = 0; k < order; k++ ) {
+        DC_resp += (opus_int32)A_Q12[ k ];
+        Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
+    }
+    /* If the DC is unstable, we don't even need to do the full calculations */
+    if( DC_resp >= 4096 ) {
+        return 0;
+    }
+    return LPC_inverse_pred_gain_QA( Atmp_QA, order );
+}
+
+#ifdef FIXED_POINT
+
+/* For input in Q24 domain */
+opus_int32 silk_LPC_inverse_pred_gain_Q24(          /* O    Returns inverse prediction gain in energy domain, Q30       */
+    const opus_int32            *A_Q24,             /* I    Prediction coefficients [order]                             */
+    const opus_int              order               /* I    Prediction order                                            */
+)
+{
+    opus_int   k;
+    opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
+    opus_int32 *Anew_QA;
+
+    Anew_QA = Atmp_QA[ order & 1 ];
+
+    /* Increase Q domain of the AR coefficients */
+    for( k = 0; k < order; k++ ) {
+        Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA );
+    }
+
+    return LPC_inverse_pred_gain_QA( Atmp_QA, order );
+}
+#endif
diff --git a/src/main/jni/opus/silk/LP_variable_cutoff.c b/src/main/jni/opus/silk/LP_variable_cutoff.c
new file mode 100644
index 000000000..f639e1f89
--- /dev/null
+++ b/src/main/jni/opus/silk/LP_variable_cutoff.c
@@ -0,0 +1,135 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/*
+    Elliptic/Cauer filters designed with 0.1 dB passband ripple,
+    80 dB minimum stopband attenuation, and
+    [0.95 : 0.15 : 0.35] normalized cut off frequencies.
+*/
+
+#include "main.h"
+
+/* Helper function, interpolates the filter taps */
+static OPUS_INLINE void silk_LP_interpolate_filter_taps(
+    opus_int32           B_Q28[ TRANSITION_NB ],
+    opus_int32           A_Q28[ TRANSITION_NA ],
+    const opus_int       ind,
+    const opus_int32     fac_Q16
+)
+{
+    opus_int nb, na;
+
+    if( ind < TRANSITION_INT_NUM - 1 ) {
+        if( fac_Q16 > 0 ) {
+            if( fac_Q16 < 32768 ) { /* fac_Q16 is in range of a 16-bit int */
+                /* Piece-wise linear interpolation of B and A */
+                for( nb = 0; nb < TRANSITION_NB; nb++ ) {
+                    B_Q28[ nb ] = silk_SMLAWB(
+                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
+                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] -
+                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
+                        fac_Q16 );
+                }
+                for( na = 0; na < TRANSITION_NA; na++ ) {
+                    A_Q28[ na ] = silk_SMLAWB(
+                        silk_Transition_LP_A_Q28[ ind     ][ na ],
+                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ] -
+                        silk_Transition_LP_A_Q28[ ind     ][ na ],
+                        fac_Q16 );
+                }
+            } else { /* ( fac_Q16 - ( 1 << 16 ) ) is in range of a 16-bit int */
+                silk_assert( fac_Q16 - ( 1 << 16 ) == silk_SAT16( fac_Q16 - ( 1 << 16 ) ) );
+                /* Piece-wise linear interpolation of B and A */
+                for( nb = 0; nb < TRANSITION_NB; nb++ ) {
+                    B_Q28[ nb ] = silk_SMLAWB(
+                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ],
+                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] -
+                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
+                        fac_Q16 - ( (opus_int32)1 << 16 ) );
+                }
+                for( na = 0; na < TRANSITION_NA; na++ ) {
+                    A_Q28[ na ] = silk_SMLAWB(
+                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ],
+                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ] -
+                        silk_Transition_LP_A_Q28[ ind     ][ na ],
+                        fac_Q16 - ( (opus_int32)1 << 16 ) );
+                }
+            }
+        } else {
+            silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ ind ], TRANSITION_NB * sizeof( opus_int32 ) );
+            silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ ind ], TRANSITION_NA * sizeof( opus_int32 ) );
+        }
+    } else {
+        silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NB * sizeof( opus_int32 ) );
+        silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NA * sizeof( opus_int32 ) );
+    }
+}
+
+/* Low-pass filter with variable cutoff frequency based on  */
+/* piece-wise linear interpolation between elliptic filters */
+/* Start by setting psEncC->mode <> 0;                      */
+/* Deactivate by setting psEncC->mode = 0;                  */
+void silk_LP_variable_cutoff(
+    silk_LP_state               *psLP,                          /* I/O  LP filter state                             */
+    opus_int16                  *frame,                         /* I/O  Low-pass filtered output signal             */
+    const opus_int              frame_length                    /* I    Frame length                                */
+)
+{
+    opus_int32   B_Q28[ TRANSITION_NB ], A_Q28[ TRANSITION_NA ], fac_Q16 = 0;
+    opus_int     ind = 0;
+
+    silk_assert( psLP->transition_frame_no >= 0 && psLP->transition_frame_no <= TRANSITION_FRAMES );
+
+    /* Run filter if needed */
+    if( psLP->mode != 0 ) {
+        /* Calculate index and interpolation factor for interpolation */
+#if( TRANSITION_INT_STEPS == 64 )
+        fac_Q16 = silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 - 6 );
+#else
+        fac_Q16 = silk_DIV32_16( silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 ), TRANSITION_FRAMES );
+#endif
+        ind      = silk_RSHIFT( fac_Q16, 16 );
+        fac_Q16 -= silk_LSHIFT( ind, 16 );
+
+        silk_assert( ind >= 0 );
+        silk_assert( ind < TRANSITION_INT_NUM );
+
+        /* Interpolate filter coefficients */
+        silk_LP_interpolate_filter_taps( B_Q28, A_Q28, ind, fac_Q16 );
+
+        /* Update transition frame number for next frame */
+        psLP->transition_frame_no = silk_LIMIT( psLP->transition_frame_no + psLP->mode, 0, TRANSITION_FRAMES );
+
+        /* ARMA low-pass filtering */
+        silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 );
+        silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1);
+    }
+}
diff --git a/src/main/jni/opus/silk/MacroCount.h b/src/main/jni/opus/silk/MacroCount.h
new file mode 100644
index 000000000..834817d05
--- /dev/null
+++ b/src/main/jni/opus/silk/MacroCount.h
@@ -0,0 +1,718 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SIGPROCFIX_API_MACROCOUNT_H
+#define SIGPROCFIX_API_MACROCOUNT_H
+#include <stdio.h>
+
+#ifdef    silk_MACRO_COUNT
+#define varDefine opus_int64 ops_count = 0;
+
+extern opus_int64 ops_count;
+
+static OPUS_INLINE opus_int64 silk_SaveCount(){
+    return(ops_count);
+}
+
+static OPUS_INLINE opus_int64 silk_SaveResetCount(){
+    opus_int64 ret;
+
+    ret = ops_count;
+    ops_count = 0;
+    return(ret);
+}
+
+static OPUS_INLINE silk_PrintCount(){
+    printf("ops_count = %d \n ", (opus_int32)ops_count);
+}
+
+#undef silk_MUL
+static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = a32 * b32;
+    return ret;
+}
+
+#undef silk_MUL_uint
+static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){
+    opus_uint32 ret;
+    ops_count += 4;
+    ret = a32 * b32;
+    return ret;
+}
+#undef silk_MLA
+static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = a32 + b32 * c32;
+    return ret;
+}
+
+#undef silk_MLA_uint
+static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){
+    opus_uint32 ret;
+    ops_count += 4;
+    ret = a32 + b32 * c32;
+    return ret;
+}
+
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 5;
+    ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
+    return ret;
+}
+#undef    silk_SMLAWB
+static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 5;
+    ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)));
+    return ret;
+}
+
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
+    return ret;
+}
+#undef silk_SMLAWT
+static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
+    return ret;
+}
+
+#undef silk_SMULBB
+static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32);
+    return ret;
+}
+#undef silk_SMLABB
+static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
+    return ret;
+}
+
+#undef silk_SMULBT
+static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16);
+    return ret;
+}
+
+#undef silk_SMLABT
+static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
+    return ret;
+}
+
+#undef silk_SMULTT
+static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = (a32 >> 16) * (b32 >> 16);
+    return ret;
+}
+
+#undef    silk_SMLATT
+static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a32 + (b32 >> 16) * (c32 >> 16);
+    return ret;
+}
+
+
+/* multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode)*/
+#undef    silk_MLA_ovflw
+#define silk_MLA_ovflw silk_MLA
+
+#undef silk_SMLABB_ovflw
+#define silk_SMLABB_ovflw silk_SMLABB
+
+#undef silk_SMLABT_ovflw
+#define silk_SMLABT_ovflw silk_SMLABT
+
+#undef silk_SMLATT_ovflw
+#define silk_SMLATT_ovflw silk_SMLATT
+
+#undef silk_SMLAWB_ovflw
+#define silk_SMLAWB_ovflw silk_SMLAWB
+
+#undef silk_SMLAWT_ovflw
+#define silk_SMLAWT_ovflw silk_SMLAWT
+
+#undef silk_SMULL
+static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){
+    opus_int64 ret;
+    ops_count += 8;
+    ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32));
+    return ret;
+}
+
+#undef    silk_SMLAL
+static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){
+    opus_int64 ret;
+    ops_count += 8;
+    ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32));
+    return ret;
+}
+#undef    silk_SMLALBB
+static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){
+    opus_int64 ret;
+    ops_count += 4;
+    ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16));
+    return ret;
+}
+
+#undef    SigProcFIX_CLZ16
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16)
+{
+    opus_int32 out32 = 0;
+    ops_count += 10;
+    if( in16 == 0 ) {
+        return 16;
+    }
+    /* test nibbles */
+    if( in16 & 0xFF00 ) {
+        if( in16 & 0xF000 ) {
+            in16 >>= 12;
+        } else {
+            out32 += 4;
+            in16 >>= 8;
+        }
+    } else {
+        if( in16 & 0xFFF0 ) {
+            out32 += 8;
+            in16 >>= 4;
+        } else {
+            out32 += 12;
+        }
+    }
+    /* test bits and return */
+    if( in16 & 0xC ) {
+        if( in16 & 0x8 )
+            return out32 + 0;
+        else
+            return out32 + 1;
+    } else {
+        if( in16 & 0xE )
+            return out32 + 2;
+        else
+            return out32 + 3;
+    }
+}
+
+#undef SigProcFIX_CLZ32
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32)
+{
+    /* test highest 16 bits and convert to opus_int16 */
+    ops_count += 2;
+    if( in32 & 0xFFFF0000 ) {
+        return SigProcFIX_CLZ16((opus_int16)(in32 >> 16));
+    } else {
+        return SigProcFIX_CLZ16((opus_int16)in32) + 16;
+    }
+}
+
+#undef silk_DIV32
+static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){
+    ops_count += 64;
+    return a32 / b32;
+}
+
+#undef silk_DIV32_16
+static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){
+    ops_count += 32;
+    return a32 / b32;
+}
+
+#undef silk_SAT8
+static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){
+    opus_int8 tmp;
+    ops_count += 1;
+    tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX  : \
+                    ((a) < silk_int8_MIN ? silk_int8_MIN  : (a)));
+    return(tmp);
+}
+
+#undef silk_SAT16
+static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){
+    opus_int16 tmp;
+    ops_count += 1;
+    tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX  : \
+                     ((a) < silk_int16_MIN ? silk_int16_MIN  : (a)));
+    return(tmp);
+}
+#undef silk_SAT32
+static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){
+    opus_int32 tmp;
+    ops_count += 1;
+    tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX  : \
+                     ((a) < silk_int32_MIN ? silk_int32_MIN  : (a)));
+    return(tmp);
+}
+#undef silk_POS_SAT32
+static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){
+    opus_int32 tmp;
+    ops_count += 1;
+    tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a));
+    return(tmp);
+}
+
+#undef silk_ADD_POS_SAT8
+static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){
+    opus_int8 tmp;
+    ops_count += 1;
+    tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX  : ((a)+(b)));
+    return(tmp);
+}
+#undef silk_ADD_POS_SAT16
+static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){
+    opus_int16 tmp;
+    ops_count += 1;
+    tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)));
+    return(tmp);
+}
+
+#undef silk_ADD_POS_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
+    opus_int32 tmp;
+    ops_count += 1;
+    tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)));
+    return(tmp);
+}
+
+#undef silk_ADD_POS_SAT64
+static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
+    opus_int64 tmp;
+    ops_count += 1;
+    tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)));
+    return(tmp);
+}
+
+#undef    silk_LSHIFT8
+static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
+    opus_int8 ret;
+    ops_count += 1;
+    ret = a << shift;
+    return ret;
+}
+#undef    silk_LSHIFT16
+static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){
+    opus_int16 ret;
+    ops_count += 1;
+    ret = a << shift;
+    return ret;
+}
+#undef    silk_LSHIFT32
+static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a << shift;
+    return ret;
+}
+#undef    silk_LSHIFT64
+static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){
+    ops_count += 1;
+    return a << shift;
+}
+
+#undef    silk_LSHIFT_ovflw
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){
+    ops_count += 1;
+    return a << shift;
+}
+
+#undef    silk_LSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){
+    opus_uint32 ret;
+    ops_count += 1;
+    ret = a << shift;
+    return ret;
+}
+
+#undef    silk_RSHIFT8
+static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+#undef    silk_RSHIFT16
+static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+#undef    silk_RSHIFT32
+static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+#undef    silk_RSHIFT64
+static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+
+#undef    silk_RSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+
+#undef    silk_ADD_LSHIFT
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + (b << shift);
+    return ret;                /* shift >= 0*/
+}
+#undef    silk_ADD_LSHIFT32
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + (b << shift);
+    return ret;                /* shift >= 0*/
+}
+#undef    silk_ADD_LSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+    opus_uint32 ret;
+    ops_count += 1;
+    ret = a + (b << shift);
+    return ret;                /* shift >= 0*/
+}
+#undef    silk_ADD_RSHIFT
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + (b >> shift);
+    return ret;                /* shift  > 0*/
+}
+#undef    silk_ADD_RSHIFT32
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + (b >> shift);
+    return ret;                /* shift  > 0*/
+}
+#undef    silk_ADD_RSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+    opus_uint32 ret;
+    ops_count += 1;
+    ret = a + (b >> shift);
+    return ret;                /* shift  > 0*/
+}
+#undef    silk_SUB_LSHIFT32
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a - (b << shift);
+    return ret;                /* shift >= 0*/
+}
+#undef    silk_SUB_RSHIFT32
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a - (b >> shift);
+    return ret;                /* shift  > 0*/
+}
+
+#undef    silk_RSHIFT_ROUND
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 3;
+    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+    return ret;
+}
+
+#undef    silk_RSHIFT_ROUND64
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){
+    opus_int64 ret;
+    ops_count += 6;
+    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+    return ret;
+}
+
+#undef    silk_abs_int64
+static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){
+    ops_count += 1;
+    return (((a) >  0)  ? (a) : -(a));            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/
+}
+
+#undef    silk_abs_int32
+static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){
+    ops_count += 1;
+    return silk_abs(a);
+}
+
+
+#undef silk_min
+static silk_min(a, b){
+    ops_count += 1;
+    return (((a) < (b)) ? (a) :  (b));
+}
+#undef silk_max
+static silk_max(a, b){
+    ops_count += 1;
+    return (((a) > (b)) ? (a) :  (b));
+}
+#undef silk_sign
+static silk_sign(a){
+    ops_count += 1;
+    return ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ));
+}
+
+#undef    silk_ADD16
+static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){
+    opus_int16 ret;
+    ops_count += 1;
+    ret = a + b;
+    return ret;
+}
+
+#undef    silk_ADD32
+static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + b;
+    return ret;
+}
+
+#undef    silk_ADD64
+static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){
+    opus_int64 ret;
+    ops_count += 2;
+    ret = a + b;
+    return ret;
+}
+
+#undef    silk_SUB16
+static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){
+    opus_int16 ret;
+    ops_count += 1;
+    ret = a - b;
+    return ret;
+}
+
+#undef    silk_SUB32
+static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a - b;
+    return ret;
+}
+
+#undef    silk_SUB64
+static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){
+    opus_int64 ret;
+    ops_count += 2;
+    ret = a - b;
+    return ret;
+}
+
+#undef silk_ADD_SAT16
+static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) {
+    opus_int16 res;
+    /* Nb will be counted in AKP_add32 and silk_SAT16*/
+    res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
+    return res;
+}
+
+#undef silk_ADD_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){
+    opus_int32 res;
+    ops_count += 1;
+    res =    ((((a32) + (b32)) & 0x80000000) == 0 ?                                    \
+            ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) :    \
+            ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) );
+    return res;
+}
+
+#undef silk_ADD_SAT64
+static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) {
+    opus_int64 res;
+    ops_count += 1;
+    res =    ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ?                                \
+            ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) :    \
+            ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) );
+    return res;
+}
+
+#undef silk_SUB_SAT16
+static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) {
+    opus_int16 res;
+    silk_assert(0);
+    /* Nb will be counted in sub-macros*/
+    res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
+    return res;
+}
+
+#undef silk_SUB_SAT32
+static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) {
+    opus_int32 res;
+    ops_count += 1;
+    res =     ((((a32)-(b32)) & 0x80000000) == 0 ?                                            \
+            (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) :    \
+            ((((a32)^0x80000000) & (b32)  & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) );
+    return res;
+}
+
+#undef silk_SUB_SAT64
+static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) {
+    opus_int64 res;
+    ops_count += 1;
+    res =    ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ?                                                        \
+            (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) :    \
+            ((((a64)^0x8000000000000000LL) & (b64)  & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) );
+
+    return res;
+}
+
+#undef    silk_SMULWW
+static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    /* Nb will be counted in sub-macros*/
+    ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16));
+    return ret;
+}
+
+#undef    silk_SMLAWW
+static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    /* Nb will be counted in sub-macros*/
+    ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16));
+    return ret;
+}
+
+#undef    silk_min_int
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
+{
+    ops_count += 1;
+    return (((a) < (b)) ? (a) : (b));
+}
+
+#undef    silk_min_16
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+{
+    ops_count += 1;
+    return (((a) < (b)) ? (a) : (b));
+}
+#undef    silk_min_32
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+{
+    ops_count += 1;
+    return (((a) < (b)) ? (a) : (b));
+}
+#undef    silk_min_64
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+{
+    ops_count += 1;
+    return (((a) < (b)) ? (a) : (b));
+}
+
+/* silk_min() versions with typecast in the function call */
+#undef    silk_max_int
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
+{
+    ops_count += 1;
+    return (((a) > (b)) ? (a) : (b));
+}
+#undef    silk_max_16
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+{
+    ops_count += 1;
+    return (((a) > (b)) ? (a) : (b));
+}
+#undef    silk_max_32
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+{
+    ops_count += 1;
+    return (((a) > (b)) ? (a) : (b));
+}
+
+#undef    silk_max_64
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+{
+    ops_count += 1;
+    return (((a) > (b)) ? (a) : (b));
+}
+
+
+#undef silk_LIMIT_int
+static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2)
+{
+    opus_int ret;
+    ops_count += 6;
+
+    ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+        : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+
+    return(ret);
+}
+
+#undef silk_LIMIT_16
+static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2)
+{
+    opus_int16 ret;
+    ops_count += 6;
+
+    ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+        : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+
+return(ret);
+}
+
+
+#undef silk_LIMIT_32
+static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
+{
+    opus_int32 ret;
+    ops_count += 6;
+
+    ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+        : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+    return(ret);
+}
+
+#else
+#define varDefine
+#define silk_SaveCount()
+
+#endif
+#endif
+
diff --git a/src/main/jni/opus/silk/MacroDebug.h b/src/main/jni/opus/silk/MacroDebug.h
new file mode 100644
index 000000000..35aedc5c5
--- /dev/null
+++ b/src/main/jni/opus/silk/MacroDebug.h
@@ -0,0 +1,952 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (C) 2012 Xiph.Org Foundation
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef MACRO_DEBUG_H
+#define MACRO_DEBUG_H
+
+/* Redefine macro functions with extensive assertion in DEBUG mode.
+   As functions can't be undefined, this file can't work with SigProcFIX_MacroCount.h */
+
+#if ( defined (FIXED_DEBUG) || ( 0 && defined (_DEBUG) ) ) && !defined (silk_MACRO_COUNT)
+
+#undef silk_ADD16
+#define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){
+    opus_int16 ret;
+
+    ret = a + b;
+    if ( ret != silk_ADD_SAT16( a, b ) )
+    {
+        fprintf (stderr, "silk_ADD16(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_ADD32
+#define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){
+    opus_int32 ret;
+
+    ret = a + b;
+    if ( ret != silk_ADD_SAT32( a, b ) )
+    {
+        fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_ADD64
+#define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){
+    opus_int64 ret;
+
+    ret = a + b;
+    if ( ret != silk_ADD_SAT64( a, b ) )
+    {
+        fprintf (stderr, "silk_ADD64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SUB16
+#define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){
+    opus_int16 ret;
+
+    ret = a - b;
+    if ( ret != silk_SUB_SAT16( a, b ) )
+    {
+        fprintf (stderr, "silk_SUB16(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SUB32
+#define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){
+    opus_int32 ret;
+
+    ret = a - b;
+    if ( ret != silk_SUB_SAT32( a, b ) )
+    {
+        fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SUB64
+#define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){
+    opus_int64 ret;
+
+    ret = a - b;
+    if ( ret != silk_SUB_SAT64( a, b ) )
+    {
+        fprintf (stderr, "silk_SUB64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_ADD_SAT16
+#define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) {
+    opus_int16 res;
+    res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
+    if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) )
+    {
+        fprintf (stderr, "silk_ADD_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_ADD_SAT32
+#define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 res;
+    res =   ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ?       \
+            ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \
+            ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) );
+    if ( res != silk_SAT32( (opus_int64)a32 + (opus_int64)b32 ) )
+    {
+        fprintf (stderr, "silk_ADD_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_ADD_SAT64
+#define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) {
+    opus_int64 res;
+    int        fail = 0;
+    res =   ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ?                                 \
+            ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \
+            ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) );
+    if( res != a64 + b64 ) {
+        /* Check that we saturated to the correct extreme value */
+        if ( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) ||
+               ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ) )
+        {
+            fail = 1;
+        }
+    } else {
+        /* Saturation not necessary */
+        fail = res != a64 + b64;
+    }
+    if ( fail )
+    {
+        fprintf (stderr, "silk_ADD_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_SUB_SAT16
+#define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) {
+    opus_int16 res;
+    res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
+    if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) )
+    {
+        fprintf (stderr, "silk_SUB_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_SUB_SAT32
+#define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) {
+    opus_int32 res;
+    res =   ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ?                \
+            (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \
+            ((((a32)^0x80000000) & (b32)  & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) );
+    if ( res != silk_SAT32( (opus_int64)a32 - (opus_int64)b32 ) )
+    {
+        fprintf (stderr, "silk_SUB_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_SUB_SAT64
+#define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) {
+    opus_int64 res;
+    int        fail = 0;
+    res =   ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ?                                                    \
+            (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \
+            ((((a64)^0x8000000000000000LL) & (b64)  & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) );
+    if( res != a64 - b64 ) {
+        /* Check that we saturated to the correct extreme value */
+        if( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) ||
+              ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ))
+        {
+            fail = 1;
+        }
+    } else {
+        /* Saturation not necessary */
+        fail = res != a64 - b64;
+    }
+    if ( fail )
+    {
+        fprintf (stderr, "silk_SUB_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_MUL
+#define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 ret;
+    opus_int64 ret64;
+    ret = a32 * b32;
+    ret64 = (opus_int64)a32 * (opus_int64)b32;
+    if ( (opus_int64)ret != ret64 )
+    {
+        fprintf (stderr, "silk_MUL(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_MUL_uint
+#define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){
+    opus_uint32 ret;
+    ret = a32 * b32;
+    if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 )
+    {
+        fprintf (stderr, "silk_MUL_uint(%u, %u) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_MLA
+#define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + b32 * c32;
+    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
+    {
+        fprintf (stderr, "silk_MLA(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_MLA_uint
+#define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){
+    opus_uint32 ret;
+    ret = a32 + b32 * c32;
+    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
+    {
+        fprintf (stderr, "silk_MLA_uint(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMULWB
+#define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 ret;
+    ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
+    if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 )
+    {
+        fprintf (stderr, "silk_SMULWB(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMLAWB
+#define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) );
+    if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) )
+    {
+        fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMULWT
+#define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 ret;
+    ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
+    if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 )
+    {
+        fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMLAWT
+#define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
+    if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) )
+    {
+        fprintf (stderr, "silk_SMLAWT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMULL
+#define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){
+    opus_int64 ret64;
+    int        fail = 0;
+    ret64 = a64 * b64;
+    if( b64 != 0 ) {
+        fail = a64 != (ret64 / b64);
+    } else if( a64 != 0 ) {
+        fail = b64 != (ret64 / a64);
+    }
+    if ( fail )
+    {
+        fprintf (stderr, "silk_SMULL(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret64;
+}
+
+/* no checking needed for silk_SMULBB */
+#undef silk_SMLABB
+#define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
+    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 )
+    {
+        fprintf (stderr, "silk_SMLABB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+/* no checking needed for silk_SMULBT */
+#undef silk_SMLABT
+#define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
+    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) )
+    {
+        fprintf (stderr, "silk_SMLABT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+/* no checking needed for silk_SMULTT */
+#undef silk_SMLATT
+#define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + (b32 >> 16) * (c32 >> 16);
+    if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) )
+    {
+        fprintf (stderr, "silk_SMLATT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMULWW
+#define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 ret, tmp1, tmp2;
+    opus_int64 ret64;
+    int        fail = 0;
+
+    ret  = silk_SMULWB( a32, b32 );
+    tmp1 = silk_RSHIFT_ROUND( b32, 16 );
+    tmp2 = silk_MUL( a32, tmp1 );
+
+    fail |= (opus_int64)tmp2 != (opus_int64) a32 * (opus_int64) tmp1;
+
+    tmp1 = ret;
+    ret  = silk_ADD32( tmp1, tmp2 );
+    fail |= silk_ADD32( tmp1, tmp2 ) != silk_ADD_SAT32( tmp1, tmp2 );
+
+    ret64 = silk_RSHIFT64( silk_SMULL( a32, b32 ), 16 );
+    fail |= (opus_int64)ret != ret64;
+
+    if ( fail )
+    {
+        fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+
+    return ret;
+}
+
+#undef silk_SMLAWW
+#define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret, tmp;
+
+    tmp = silk_SMULWW( b32, c32 );
+    ret = silk_ADD32( a32, tmp );
+    if ( ret != silk_ADD_SAT32( a32, tmp ) )
+    {
+        fprintf (stderr, "silk_SMLAWW(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
+#undef  silk_MLA_ovflw
+#define silk_MLA_ovflw(a32, b32, c32)    ((a32) + ((b32) * (c32)))
+#undef  silk_SMLABB_ovflw
+#define silk_SMLABB_ovflw(a32, b32, c32)    ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))
+
+/* no checking needed for silk_SMULL
+   no checking needed for silk_SMLAL
+   no checking needed for silk_SMLALBB
+   no checking needed for SigProcFIX_CLZ16
+   no checking needed for SigProcFIX_CLZ32*/
+
+#undef silk_DIV32
+#define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    if ( b32 == 0 )
+    {
+        fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a32 / b32;
+}
+
+#undef silk_DIV32_16
+#define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    int fail = 0;
+    fail |= b32 == 0;
+    fail |= b32 > silk_int16_MAX;
+    fail |= b32 < silk_int16_MIN;
+    if ( fail )
+    {
+        fprintf (stderr, "silk_DIV32_16(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a32 / b32;
+}
+
+/* no checking needed for silk_SAT8
+   no checking needed for silk_SAT16
+   no checking needed for silk_SAT32
+   no checking needed for silk_POS_SAT32
+   no checking needed for silk_ADD_POS_SAT8
+   no checking needed for silk_ADD_POS_SAT16
+   no checking needed for silk_ADD_POS_SAT32
+   no checking needed for silk_ADD_POS_SAT64 */
+
+#undef silk_LSHIFT8
+#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+    opus_int8 ret;
+    int       fail = 0;
+    ret = a << shift;
+    fail |= shift < 0;
+    fail |= shift >= 8;
+    fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+    if ( fail )
+    {
+        fprintf (stderr, "silk_LSHIFT8(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_LSHIFT16
+#define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+    opus_int16 ret;
+    int        fail = 0;
+    ret = a << shift;
+    fail |= shift < 0;
+    fail |= shift >= 16;
+    fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+    if ( fail )
+    {
+        fprintf (stderr, "silk_LSHIFT16(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_LSHIFT32
+#define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    int        fail = 0;
+    ret = a << shift;
+    fail |= shift < 0;
+    fail |= shift >= 32;
+    fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+    if ( fail )
+    {
+        fprintf (stderr, "silk_LSHIFT32(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_LSHIFT64
+#define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){
+    opus_int64 ret;
+    int        fail = 0;
+    ret = a << shift;
+    fail |= shift < 0;
+    fail |= shift >= 64;
+    fail |= (ret>>shift) != ((opus_int64)a);
+    if ( fail )
+    {
+        fprintf (stderr, "silk_LSHIFT64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_LSHIFT_ovflw
+#define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */
+    {
+        fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a << shift;
+}
+
+#undef silk_LSHIFT_uint
+#define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+    opus_uint32 ret;
+    ret = a << shift;
+    if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift))
+    {
+        fprintf (stderr, "silk_LSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_RSHIFT8
+#define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift>=8) )
+    {
+        fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_RSHIFT16
+#define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift>=16) )
+    {
+        fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_RSHIFT32
+#define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift>=32) )
+    {
+        fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_RSHIFT64
+#define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){
+    if ( (shift < 0) || (shift>=64) )
+    {
+        fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_RSHIFT_uint
+#define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift>32) )
+    {
+        fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_ADD_LSHIFT
+#define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){
+    opus_int16 ret;
+    ret = a + (b << shift);
+    if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+    {
+        fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift >= 0 */
+}
+
+#undef silk_ADD_LSHIFT32
+#define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = a + (b << shift);
+    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+    {
+        fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift >= 0 */
+}
+
+#undef silk_ADD_LSHIFT_uint
+#define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+    opus_uint32 ret;
+    ret = a + (b << shift);
+    if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+    {
+        fprintf (stderr, "silk_ADD_LSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift >= 0 */
+}
+
+#undef silk_ADD_RSHIFT
+#define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){
+    opus_int16 ret;
+    ret = a + (b >> shift);
+    if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+    {
+        fprintf (stderr, "silk_ADD_RSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift  > 0 */
+}
+
+#undef silk_ADD_RSHIFT32
+#define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = a + (b >> shift);
+    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+    {
+        fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift  > 0 */
+}
+
+#undef silk_ADD_RSHIFT_uint
+#define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+    opus_uint32 ret;
+    ret = a + (b >> shift);
+    if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+    {
+        fprintf (stderr, "silk_ADD_RSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift  > 0 */
+}
+
+#undef silk_SUB_LSHIFT32
+#define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = a - (b << shift);
+    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) )
+    {
+        fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift >= 0 */
+}
+
+#undef silk_SUB_RSHIFT32
+#define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = a - (b >> shift);
+    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) )
+    {
+        fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift  > 0 */
+}
+
+#undef silk_RSHIFT_ROUND
+#define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+    /* the marco definition can't handle a shift of zero */
+    if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) )
+    {
+        fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_RSHIFT_ROUND64
+#define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){
+    opus_int64 ret;
+    /* the marco definition can't handle a shift of zero */
+    if ( (shift <= 0) || (shift>=64) )
+    {
+        fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+    return ret;
+}
+
+/* silk_abs is used on floats also, so doesn't work... */
+/*#undef silk_abs
+static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){
+    silk_assert(a != 0x80000000);
+    return (((a) >  0)  ? (a) : -(a));            // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN
+}*/
+
+#undef silk_abs_int64
+#define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){
+    if ( a == silk_int64_MIN )
+    {
+        fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return (((a) >  0)  ? (a) : -(a));            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
+}
+
+#undef silk_abs_int32
+#define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){
+    if ( a == silk_int32_MIN )
+    {
+        fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return silk_abs(a);
+}
+
+#undef silk_CHECK_FIT8
+#define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){
+    opus_int8 ret;
+    ret = (opus_int8)a;
+    if ( (opus_int64)ret != a )
+    {
+        fprintf (stderr, "silk_CHECK_FIT8(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return( ret );
+}
+
+#undef silk_CHECK_FIT16
+#define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){
+    opus_int16 ret;
+    ret = (opus_int16)a;
+    if ( (opus_int64)ret != a )
+    {
+        fprintf (stderr, "silk_CHECK_FIT16(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return( ret );
+}
+
+#undef silk_CHECK_FIT32
+#define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){
+    opus_int32 ret;
+    ret = (opus_int32)a;
+    if ( (opus_int64)ret != a )
+    {
+        fprintf (stderr, "silk_CHECK_FIT32(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return( ret );
+}
+
+/* no checking for silk_NSHIFT_MUL_32_32
+   no checking for silk_NSHIFT_MUL_16_16
+   no checking needed for silk_min
+   no checking needed for silk_max
+   no checking needed for silk_sign
+*/
+
+#endif
+#endif /* MACRO_DEBUG_H */
diff --git a/src/main/jni/opus/silk/NLSF2A.c b/src/main/jni/opus/silk/NLSF2A.c
new file mode 100644
index 000000000..b1c559ea6
--- /dev/null
+++ b/src/main/jni/opus/silk/NLSF2A.c
@@ -0,0 +1,178 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* conversion between prediction filter coefficients and LSFs   */
+/* order should be even                                         */
+/* a piecewise linear approximation maps LSF <-> cos(LSF)       */
+/* therefore the result is not accurate LSFs, but the two       */
+/* functions are accurate inverses of each other                */
+
+#include "SigProc_FIX.h"
+#include "tables.h"
+
+#define QA      16
+
+/* helper function for NLSF2A(..) */
+static OPUS_INLINE void silk_NLSF2A_find_poly(
+    opus_int32          *out,      /* O    intermediate polynomial, QA [dd+1]        */
+    const opus_int32    *cLSF,     /* I    vector of interleaved 2*cos(LSFs), QA [d] */
+    opus_int            dd         /* I    polynomial order (= 1/2 * filter order)   */
+)
+{
+    opus_int   k, n;
+    opus_int32 ftmp;
+
+    out[0] = silk_LSHIFT( 1, QA );
+    out[1] = -cLSF[0];
+    for( k = 1; k < dd; k++ ) {
+        ftmp = cLSF[2*k];            /* QA*/
+        out[k+1] = silk_LSHIFT( out[k-1], 1 ) - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[k] ), QA );
+        for( n = k; n > 1; n-- ) {
+            out[n] += out[n-2] - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[n-1] ), QA );
+        }
+        out[1] -= ftmp;
+    }
+}
+
+/* compute whitening filter coefficients from normalized line spectral frequencies */
+void silk_NLSF2A(
+    opus_int16                  *a_Q12,             /* O    monic whitening filter coefficients in Q12,  [ d ]          */
+    const opus_int16            *NLSF,              /* I    normalized line spectral frequencies in Q15, [ d ]          */
+    const opus_int              d                   /* I    filter order (should be even)                               */
+)
+{
+    /* This ordering was found to maximize quality. It improves numerical accuracy of
+       silk_NLSF2A_find_poly() compared to "standard" ordering. */
+    static const unsigned char ordering16[16] = {
+      0, 15, 8, 7, 4, 11, 12, 3, 2, 13, 10, 5, 6, 9, 14, 1
+    };
+    static const unsigned char ordering10[10] = {
+      0, 9, 6, 3, 4, 5, 8, 1, 2, 7
+    };
+    const unsigned char *ordering;
+    opus_int   k, i, dd;
+    opus_int32 cos_LSF_QA[ SILK_MAX_ORDER_LPC ];
+    opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+    opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta;
+    opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ];
+    opus_int32 maxabs, absval, idx=0, sc_Q16;
+
+    silk_assert( LSF_COS_TAB_SZ_FIX == 128 );
+    silk_assert( d==10||d==16 );
+
+    /* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */
+    ordering = d == 16 ? ordering16 : ordering10;
+    for( k = 0; k < d; k++ ) {
+        silk_assert(NLSF[k] >= 0 );
+
+        /* f_int on a scale 0-127 (rounded down) */
+        f_int = silk_RSHIFT( NLSF[k], 15 - 7 );
+
+        /* f_frac, range: 0..255 */
+        f_frac = NLSF[k] - silk_LSHIFT( f_int, 15 - 7 );
+
+        silk_assert(f_int >= 0);
+        silk_assert(f_int < LSF_COS_TAB_SZ_FIX );
+
+        /* Read start and end value from table */
+        cos_val = silk_LSFCosTab_FIX_Q12[ f_int ];                /* Q12 */
+        delta   = silk_LSFCosTab_FIX_Q12[ f_int + 1 ] - cos_val;  /* Q12, with a range of 0..200 */
+
+        /* Linear interpolation */
+        cos_LSF_QA[ordering[k]] = silk_RSHIFT_ROUND( silk_LSHIFT( cos_val, 8 ) + silk_MUL( delta, f_frac ), 20 - QA ); /* QA */
+    }
+
+    dd = silk_RSHIFT( d, 1 );
+
+    /* generate even and odd polynomials using convolution */
+    silk_NLSF2A_find_poly( P, &cos_LSF_QA[ 0 ], dd );
+    silk_NLSF2A_find_poly( Q, &cos_LSF_QA[ 1 ], dd );
+
+    /* convert even and odd polynomials to opus_int32 Q12 filter coefs */
+    for( k = 0; k < dd; k++ ) {
+        Ptmp = P[ k+1 ] + P[ k ];
+        Qtmp = Q[ k+1 ] - Q[ k ];
+
+        /* the Ptmp and Qtmp values at this stage need to fit in int32 */
+        a32_QA1[ k ]     = -Qtmp - Ptmp;        /* QA+1 */
+        a32_QA1[ d-k-1 ] =  Qtmp - Ptmp;        /* QA+1 */
+    }
+
+    /* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
+    for( i = 0; i < 10; i++ ) {
+        /* Find maximum absolute value and its index */
+        maxabs = 0;
+        for( k = 0; k < d; k++ ) {
+            absval = silk_abs( a32_QA1[k] );
+            if( absval > maxabs ) {
+                maxabs = absval;
+                idx    = k;
+            }
+        }
+        maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 );                                          /* QA+1 -> Q12 */
+
+        if( maxabs > silk_int16_MAX ) {
+            /* Reduce magnitude of prediction coefficients */
+            maxabs = silk_min( maxabs, 163838 );  /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
+            sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
+                                        silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
+            silk_bwexpander_32( a32_QA1, d, sc_Q16 );
+        } else {
+            break;
+        }
+    }
+
+    if( i == 10 ) {
+        /* Reached the last iteration, clip the coefficients */
+        for( k = 0; k < d; k++ ) {
+            a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) );  /* QA+1 -> Q12 */
+            a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 );
+        }
+    } else {
+        for( k = 0; k < d; k++ ) {
+            a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 );                /* QA+1 -> Q12 */
+        }
+    }
+
+    for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
+        if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
+            /* Prediction coefficients are (too close to) unstable; apply bandwidth expansion   */
+            /* on the unscaled coefficients, convert to Q12 and measure again                   */
+            silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
+            for( k = 0; k < d; k++ ) {
+                a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 );            /* QA+1 -> Q12 */
+            }
+        } else {
+            break;
+        }
+    }
+}
+
diff --git a/src/main/jni/opus/silk/NLSF_VQ.c b/src/main/jni/opus/silk/NLSF_VQ.c
new file mode 100644
index 000000000..69b6e22e1
--- /dev/null
+++ b/src/main/jni/opus/silk/NLSF_VQ.c
@@ -0,0 +1,68 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
+void silk_NLSF_VQ(
+    opus_int32                  err_Q26[],                      /* O    Quantization errors [K]                     */
+    const opus_int16            in_Q15[],                       /* I    Input vectors to be quantized [LPC_order]   */
+    const opus_uint8            pCB_Q8[],                       /* I    Codebook vectors [K*LPC_order]              */
+    const opus_int              K,                              /* I    Number of codebook vectors                  */
+    const opus_int              LPC_order                       /* I    Number of LPCs                              */
+)
+{
+    opus_int        i, m;
+    opus_int32      diff_Q15, sum_error_Q30, sum_error_Q26;
+
+    silk_assert( LPC_order <= 16 );
+    silk_assert( ( LPC_order & 1 ) == 0 );
+
+    /* Loop over codebook */
+    for( i = 0; i < K; i++ ) {
+        sum_error_Q26 = 0;
+        for( m = 0; m < LPC_order; m += 2 ) {
+            /* Compute weighted squared quantization error for index m */
+            diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
+            sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 );
+
+            /* Compute weighted squared quantization error for index m + 1 */
+            diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
+            sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 );
+
+            sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 );
+
+            silk_assert( sum_error_Q26 >= 0 );
+            silk_assert( sum_error_Q30 >= 0 );
+        }
+        err_Q26[ i ] = sum_error_Q26;
+    }
+}
diff --git a/src/main/jni/opus/silk/NLSF_VQ_weights_laroia.c b/src/main/jni/opus/silk/NLSF_VQ_weights_laroia.c
new file mode 100644
index 000000000..04894c59a
--- /dev/null
+++ b/src/main/jni/opus/silk/NLSF_VQ_weights_laroia.c
@@ -0,0 +1,80 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "define.h"
+#include "SigProc_FIX.h"
+
+/*
+R. Laroia, N. Phamdo and N. Farvardin, "Robust and Efficient Quantization of Speech LSP
+Parameters Using Structured Vector Quantization", Proc. IEEE Int. Conf. Acoust., Speech,
+Signal Processing, pp. 641-644, 1991.
+*/
+
+/* Laroia low complexity NLSF weights */
+void silk_NLSF_VQ_weights_laroia(
+    opus_int16                  *pNLSFW_Q_OUT,      /* O     Pointer to input vector weights [D]                        */
+    const opus_int16            *pNLSF_Q15,         /* I     Pointer to input vector         [D]                        */
+    const opus_int              D                   /* I     Input vector dimension (even)                              */
+)
+{
+    opus_int   k;
+    opus_int32 tmp1_int, tmp2_int;
+
+    silk_assert( D > 0 );
+    silk_assert( ( D & 1 ) == 0 );
+
+    /* First value */
+    tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 );
+    tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+    tmp2_int = silk_max_int( pNLSF_Q15[ 1 ] - pNLSF_Q15[ 0 ], 1 );
+    tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int );
+    pNLSFW_Q_OUT[ 0 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+    silk_assert( pNLSFW_Q_OUT[ 0 ] > 0 );
+
+    /* Main loop */
+    for( k = 1; k < D - 1; k += 2 ) {
+        tmp1_int = silk_max_int( pNLSF_Q15[ k + 1 ] - pNLSF_Q15[ k ], 1 );
+        tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+        pNLSFW_Q_OUT[ k ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+        silk_assert( pNLSFW_Q_OUT[ k ] > 0 );
+
+        tmp2_int = silk_max_int( pNLSF_Q15[ k + 2 ] - pNLSF_Q15[ k + 1 ], 1 );
+        tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int );
+        pNLSFW_Q_OUT[ k + 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+        silk_assert( pNLSFW_Q_OUT[ k + 1 ] > 0 );
+    }
+
+    /* Last value */
+    tmp1_int = silk_max_int( ( 1 << 15 ) - pNLSF_Q15[ D - 1 ], 1 );
+    tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+    pNLSFW_Q_OUT[ D - 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+    silk_assert( pNLSFW_Q_OUT[ D - 1 ] > 0 );
+}
diff --git a/src/main/jni/opus/silk/NLSF_decode.c b/src/main/jni/opus/silk/NLSF_decode.c
new file mode 100644
index 000000000..9f715060b
--- /dev/null
+++ b/src/main/jni/opus/silk/NLSF_decode.c
@@ -0,0 +1,101 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Predictive dequantizer for NLSF residuals */
+static OPUS_INLINE void silk_NLSF_residual_dequant(               /* O    Returns RD value in Q30                     */
+          opus_int16         x_Q10[],                        /* O    Output [ order ]                            */
+    const opus_int8          indices[],                      /* I    Quantization indices [ order ]              */
+    const opus_uint8         pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
+    const opus_int           quant_step_size_Q16,            /* I    Quantization step size                      */
+    const opus_int16         order                           /* I    Number of input values                      */
+)
+{
+    opus_int     i, out_Q10, pred_Q10;
+
+    out_Q10 = 0;
+    for( i = order-1; i >= 0; i-- ) {
+        pred_Q10 = silk_RSHIFT( silk_SMULBB( out_Q10, (opus_int16)pred_coef_Q8[ i ] ), 8 );
+        out_Q10  = silk_LSHIFT( indices[ i ], 10 );
+        if( out_Q10 > 0 ) {
+            out_Q10 = silk_SUB16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+        } else if( out_Q10 < 0 ) {
+            out_Q10 = silk_ADD16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+        }
+        out_Q10  = silk_SMLAWB( pred_Q10, (opus_int32)out_Q10, quant_step_size_Q16 );
+        x_Q10[ i ] = out_Q10;
+    }
+}
+
+
+/***********************/
+/* NLSF vector decoder */
+/***********************/
+void silk_NLSF_decode(
+          opus_int16            *pNLSF_Q15,                     /* O    Quantized NLSF vector [ LPC_ORDER ]         */
+          opus_int8             *NLSFIndices,                   /* I    Codebook path vector [ LPC_ORDER + 1 ]      */
+    const silk_NLSF_CB_struct   *psNLSF_CB                      /* I    Codebook object                             */
+)
+{
+    opus_int         i;
+    opus_uint8       pred_Q8[  MAX_LPC_ORDER ];
+    opus_int16       ec_ix[    MAX_LPC_ORDER ];
+    opus_int16       res_Q10[  MAX_LPC_ORDER ];
+    opus_int16       W_tmp_QW[ MAX_LPC_ORDER ];
+    opus_int32       W_tmp_Q9, NLSF_Q15_tmp;
+    const opus_uint8 *pCB_element;
+
+    /* Decode first stage */
+    pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
+    for( i = 0; i < psNLSF_CB->order; i++ ) {
+        pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 );
+    }
+
+    /* Unpack entropy table indices and predictor for current CB1 index */
+    silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] );
+
+    /* Predictive residual dequantizer */
+    silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order );
+
+    /* Weights from codebook vector */
+    silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order );
+
+    /* Apply inverse square-rooted weights and add to output */
+    for( i = 0; i < psNLSF_CB->order; i++ ) {
+        W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
+        NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) );
+        pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 );
+    }
+
+    /* NLSF stabilization */
+    silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
+}
diff --git a/src/main/jni/opus/silk/NLSF_del_dec_quant.c b/src/main/jni/opus/silk/NLSF_del_dec_quant.c
new file mode 100644
index 000000000..504dbbd04
--- /dev/null
+++ b/src/main/jni/opus/silk/NLSF_del_dec_quant.c
@@ -0,0 +1,207 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Delayed-decision quantizer for NLSF residuals */
+opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns RD value in Q25                     */
+    opus_int8                   indices[],                      /* O    Quantization indices [ order ]              */
+    const opus_int16            x_Q10[],                        /* I    Input [ order ]                             */
+    const opus_int16            w_Q5[],                         /* I    Weights [ order ]                           */
+    const opus_uint8            pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
+    const opus_int16            ec_ix[],                        /* I    Indices to entropy coding tables [ order ]  */
+    const opus_uint8            ec_rates_Q5[],                  /* I    Rates []                                    */
+    const opus_int              quant_step_size_Q16,            /* I    Quantization step size                      */
+    const opus_int16            inv_quant_step_size_Q6,         /* I    Inverse quantization step size              */
+    const opus_int32            mu_Q20,                         /* I    R/D tradeoff                                */
+    const opus_int16            order                           /* I    Number of input values                      */
+)
+{
+    opus_int         i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
+    opus_int         pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
+    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
+    opus_int         ind_sort[         NLSF_QUANT_DEL_DEC_STATES ];
+    opus_int8        ind[              NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
+    opus_int16       prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
+    opus_int32       RD_Q25[       2 * NLSF_QUANT_DEL_DEC_STATES ];
+    opus_int32       RD_min_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
+    opus_int32       RD_max_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
+    const opus_uint8 *rates_Q5;
+
+    silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 );     /* must be power of two */
+
+    nStates = 1;
+    RD_Q25[ 0 ] = 0;
+    prev_out_Q10[ 0 ] = 0;
+    for( i = order - 1; ; i-- ) {
+        rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
+        pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
+        in_Q10 = x_Q10[ i ];
+        for( j = 0; j < nStates; j++ ) {
+            pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
+            res_Q10  = silk_SUB16( in_Q10, pred_Q10 );
+            ind_tmp  = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 );
+            ind_tmp  = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
+            ind[ j ][ i ] = (opus_int8)ind_tmp;
+
+            /* compute outputs for ind_tmp and ind_tmp + 1 */
+            out0_Q10 = silk_LSHIFT( ind_tmp, 10 );
+            out1_Q10 = silk_ADD16( out0_Q10, 1024 );
+            if( ind_tmp > 0 ) {
+                out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+                out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            } else if( ind_tmp == 0 ) {
+                out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            } else if( ind_tmp == -1 ) {
+                out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            } else {
+                out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+                out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            }
+            out0_Q10  = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
+            out1_Q10  = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
+            out0_Q10  = silk_ADD16( out0_Q10, pred_Q10 );
+            out1_Q10  = silk_ADD16( out1_Q10, pred_Q10 );
+            prev_out_Q10[ j           ] = out0_Q10;
+            prev_out_Q10[ j + nStates ] = out1_Q10;
+
+            /* compute RD for ind_tmp and ind_tmp + 1 */
+            if( ind_tmp + 1 >= NLSF_QUANT_MAX_AMPLITUDE ) {
+                if( ind_tmp + 1 == NLSF_QUANT_MAX_AMPLITUDE ) {
+                    rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ];
+                    rate1_Q5 = 280;
+                } else {
+                    rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, 43, ind_tmp );
+                    rate1_Q5 = silk_ADD16( rate0_Q5, 43 );
+                }
+            } else if( ind_tmp <= -NLSF_QUANT_MAX_AMPLITUDE ) {
+                if( ind_tmp == -NLSF_QUANT_MAX_AMPLITUDE ) {
+                    rate0_Q5 = 280;
+                    rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ];
+                } else {
+                    rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, -43, ind_tmp );
+                    rate1_Q5 = silk_SUB16( rate0_Q5, 43 );
+                }
+            } else {
+                rate0_Q5 = rates_Q5[ ind_tmp +     NLSF_QUANT_MAX_AMPLITUDE ];
+                rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ];
+            }
+            RD_tmp_Q25            = RD_Q25[ j ];
+            diff_Q10              = silk_SUB16( in_Q10, out0_Q10 );
+            RD_Q25[ j ]           = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 );
+            diff_Q10              = silk_SUB16( in_Q10, out1_Q10 );
+            RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
+        }
+
+        if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) {
+            /* double number of states and copy */
+            for( j = 0; j < nStates; j++ ) {
+                ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
+            }
+            nStates = silk_LSHIFT( nStates, 1 );
+            for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+                ind[ j ][ i ] = ind[ j - nStates ][ i ];
+            }
+        } else if( i > 0 ) {
+            /* sort lower and upper half of RD_Q25, pairwise */
+            for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+                if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) {
+                    RD_max_Q25[ j ]                         = RD_Q25[ j ];
+                    RD_min_Q25[ j ]                         = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ];
+                    RD_Q25[ j ]                             = RD_min_Q25[ j ];
+                    RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] = RD_max_Q25[ j ];
+                    /* swap prev_out values */
+                    out0_Q10 = prev_out_Q10[ j ];
+                    prev_out_Q10[ j ] = prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ];
+                    prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ] = out0_Q10;
+                    ind_sort[ j ] = j + NLSF_QUANT_DEL_DEC_STATES;
+                } else {
+                    RD_min_Q25[ j ] = RD_Q25[ j ];
+                    RD_max_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ];
+                    ind_sort[ j ] = j;
+                }
+            }
+            /* compare the highest RD values of the winning half with the lowest one in the losing half, and copy if necessary */
+            /* afterwards ind_sort[] will contain the indices of the NLSF_QUANT_DEL_DEC_STATES winning RD values */
+            while( 1 ) {
+                min_max_Q25 = silk_int32_MAX;
+                max_min_Q25 = 0;
+                ind_min_max = 0;
+                ind_max_min = 0;
+                for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+                    if( min_max_Q25 > RD_max_Q25[ j ] ) {
+                        min_max_Q25 = RD_max_Q25[ j ];
+                        ind_min_max = j;
+                    }
+                    if( max_min_Q25 < RD_min_Q25[ j ] ) {
+                        max_min_Q25 = RD_min_Q25[ j ];
+                        ind_max_min = j;
+                    }
+                }
+                if( min_max_Q25 >= max_min_Q25 ) {
+                    break;
+                }
+                /* copy ind_min_max to ind_max_min */
+                ind_sort[     ind_max_min ] = ind_sort[     ind_min_max ] ^ NLSF_QUANT_DEL_DEC_STATES;
+                RD_Q25[       ind_max_min ] = RD_Q25[       ind_min_max + NLSF_QUANT_DEL_DEC_STATES ];
+                prev_out_Q10[ ind_max_min ] = prev_out_Q10[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ];
+                RD_min_Q25[   ind_max_min ] = 0;
+                RD_max_Q25[   ind_min_max ] = silk_int32_MAX;
+                silk_memcpy( ind[ ind_max_min ], ind[ ind_min_max ], MAX_LPC_ORDER * sizeof( opus_int8 ) );
+            }
+            /* increment index if it comes from the upper half */
+            for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+                ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 );
+            }
+        } else {  /* i == 0 */
+            break;
+        }
+    }
+
+    /* last sample: find winner, copy indices and return RD value */
+    ind_tmp = 0;
+    min_Q25 = silk_int32_MAX;
+    for( j = 0; j < 2 * NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+        if( min_Q25 > RD_Q25[ j ] ) {
+            min_Q25 = RD_Q25[ j ];
+            ind_tmp = j;
+        }
+    }
+    for( j = 0; j < order; j++ ) {
+        indices[ j ] = ind[ ind_tmp & ( NLSF_QUANT_DEL_DEC_STATES - 1 ) ][ j ];
+        silk_assert( indices[ j ] >= -NLSF_QUANT_MAX_AMPLITUDE_EXT );
+        silk_assert( indices[ j ] <=  NLSF_QUANT_MAX_AMPLITUDE_EXT );
+    }
+    indices[ 0 ] += silk_RSHIFT( ind_tmp, NLSF_QUANT_DEL_DEC_STATES_LOG2 );
+    silk_assert( indices[ 0 ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT );
+    silk_assert( min_Q25 >= 0 );
+    return min_Q25;
+}
diff --git a/src/main/jni/opus/silk/NLSF_encode.c b/src/main/jni/opus/silk/NLSF_encode.c
new file mode 100644
index 000000000..03a036fda
--- /dev/null
+++ b/src/main/jni/opus/silk/NLSF_encode.c
@@ -0,0 +1,136 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+/***********************/
+/* NLSF vector encoder */
+/***********************/
+opus_int32 silk_NLSF_encode(                                    /* O    Returns RD value in Q25                     */
+          opus_int8             *NLSFIndices,                   /* I    Codebook path vector [ LPC_ORDER + 1 ]      */
+          opus_int16            *pNLSF_Q15,                     /* I/O  Quantized NLSF vector [ LPC_ORDER ]         */
+    const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
+    const opus_int16            *pW_QW,                         /* I    NLSF weight vector [ LPC_ORDER ]            */
+    const opus_int              NLSF_mu_Q20,                    /* I    Rate weight for the RD optimization         */
+    const opus_int              nSurvivors,                     /* I    Max survivors after first stage             */
+    const opus_int              signalType                      /* I    Signal type: 0/1/2                          */
+)
+{
+    opus_int         i, s, ind1, bestIndex, prob_Q8, bits_q7;
+    opus_int32       W_tmp_Q9;
+    VARDECL( opus_int32, err_Q26 );
+    VARDECL( opus_int32, RD_Q25 );
+    VARDECL( opus_int, tempIndices1 );
+    VARDECL( opus_int8, tempIndices2 );
+    opus_int16       res_Q15[      MAX_LPC_ORDER ];
+    opus_int16       res_Q10[      MAX_LPC_ORDER ];
+    opus_int16       NLSF_tmp_Q15[ MAX_LPC_ORDER ];
+    opus_int16       W_tmp_QW[     MAX_LPC_ORDER ];
+    opus_int16       W_adj_Q5[     MAX_LPC_ORDER ];
+    opus_uint8       pred_Q8[      MAX_LPC_ORDER ];
+    opus_int16       ec_ix[        MAX_LPC_ORDER ];
+    const opus_uint8 *pCB_element, *iCDF_ptr;
+    SAVE_STACK;
+
+    silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS );
+    silk_assert( signalType >= 0 && signalType <= 2 );
+    silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 );
+
+    /* NLSF stabilization */
+    silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
+
+    /* First stage: VQ */
+    ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 );
+    silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order );
+
+    /* Sort the quantization errors */
+    ALLOC( tempIndices1, nSurvivors, opus_int );
+    silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
+
+    ALLOC( RD_Q25, nSurvivors, opus_int32 );
+    ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 );
+
+    /* Loop over survivors */
+    for( s = 0; s < nSurvivors; s++ ) {
+        ind1 = tempIndices1[ s ];
+
+        /* Residual after first stage */
+        pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ];
+        for( i = 0; i < psNLSF_CB->order; i++ ) {
+            NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 );
+            res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ];
+        }
+
+        /* Weights from codebook vector */
+        silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order );
+
+        /* Apply square-rooted weights */
+        for( i = 0; i < psNLSF_CB->order; i++ ) {
+            W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
+            res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 );
+        }
+
+        /* Modify input weights accordingly */
+        for( i = 0; i < psNLSF_CB->order; i++ ) {
+            W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] );
+        }
+
+        /* Unpack entropy table indices and predictor for current CB1 index */
+        silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, ind1 );
+
+        /* Trellis quantizer */
+        RD_Q25[ s ] = silk_NLSF_del_dec_quant( &tempIndices2[ s * MAX_LPC_ORDER ], res_Q10, W_adj_Q5, pred_Q8, ec_ix,
+            psNLSF_CB->ec_Rates_Q5, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->invQuantStepSize_Q6, NLSF_mu_Q20, psNLSF_CB->order );
+
+        /* Add rate for first stage */
+        iCDF_ptr = &psNLSF_CB->CB1_iCDF[ ( signalType >> 1 ) * psNLSF_CB->nVectors ];
+        if( ind1 == 0 ) {
+            prob_Q8 = 256 - iCDF_ptr[ ind1 ];
+        } else {
+            prob_Q8 = iCDF_ptr[ ind1 - 1 ] - iCDF_ptr[ ind1 ];
+        }
+        bits_q7 = ( 8 << 7 ) - silk_lin2log( prob_Q8 );
+        RD_Q25[ s ] = silk_SMLABB( RD_Q25[ s ], bits_q7, silk_RSHIFT( NLSF_mu_Q20, 2 ) );
+    }
+
+    /* Find the lowest rate-distortion error */
+    silk_insertion_sort_increasing( RD_Q25, &bestIndex, nSurvivors, 1 );
+
+    NLSFIndices[ 0 ] = (opus_int8)tempIndices1[ bestIndex ];
+    silk_memcpy( &NLSFIndices[ 1 ], &tempIndices2[ bestIndex * MAX_LPC_ORDER ], psNLSF_CB->order * sizeof( opus_int8 ) );
+
+    /* Decode */
+    silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
+
+    RESTORE_STACK;
+    return RD_Q25[ 0 ];
+}
diff --git a/src/main/jni/opus/silk/NLSF_stabilize.c b/src/main/jni/opus/silk/NLSF_stabilize.c
new file mode 100644
index 000000000..1fa1ea379
--- /dev/null
+++ b/src/main/jni/opus/silk/NLSF_stabilize.c
@@ -0,0 +1,142 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* NLSF stabilizer:                                         */
+/*                                                          */
+/* - Moves NLSFs further apart if they are too close        */
+/* - Moves NLSFs away from borders if they are too close    */
+/* - High effort to achieve a modification with minimum     */
+/*     Euclidean distance to input vector                   */
+/* - Output are sorted NLSF coefficients                    */
+/*                                                          */
+
+#include "SigProc_FIX.h"
+
+/* Constant Definitions */
+#define MAX_LOOPS        20
+
+/* NLSF stabilizer, for a single input data vector */
+void silk_NLSF_stabilize(
+          opus_int16            *NLSF_Q15,          /* I/O   Unstable/stabilized normalized LSF vector in Q15 [L]       */
+    const opus_int16            *NDeltaMin_Q15,     /* I     Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1]   */
+    const opus_int              L                   /* I     Number of NLSF parameters in the input vector              */
+)
+{
+    opus_int   i, I=0, k, loops;
+    opus_int16 center_freq_Q15;
+    opus_int32 diff_Q15, min_diff_Q15, min_center_Q15, max_center_Q15;
+
+    /* This is necessary to ensure an output within range of a opus_int16 */
+    silk_assert( NDeltaMin_Q15[L] >= 1 );
+
+    for( loops = 0; loops < MAX_LOOPS; loops++ ) {
+        /**************************/
+        /* Find smallest distance */
+        /**************************/
+        /* First element */
+        min_diff_Q15 = NLSF_Q15[0] - NDeltaMin_Q15[0];
+        I = 0;
+        /* Middle elements */
+        for( i = 1; i <= L-1; i++ ) {
+            diff_Q15 = NLSF_Q15[i] - ( NLSF_Q15[i-1] + NDeltaMin_Q15[i] );
+            if( diff_Q15 < min_diff_Q15 ) {
+                min_diff_Q15 = diff_Q15;
+                I = i;
+            }
+        }
+        /* Last element */
+        diff_Q15 = ( 1 << 15 ) - ( NLSF_Q15[L-1] + NDeltaMin_Q15[L] );
+        if( diff_Q15 < min_diff_Q15 ) {
+            min_diff_Q15 = diff_Q15;
+            I = L;
+        }
+
+        /***************************************************/
+        /* Now check if the smallest distance non-negative */
+        /***************************************************/
+        if( min_diff_Q15 >= 0 ) {
+            return;
+        }
+
+        if( I == 0 ) {
+            /* Move away from lower limit */
+            NLSF_Q15[0] = NDeltaMin_Q15[0];
+
+        } else if( I == L) {
+            /* Move away from higher limit */
+            NLSF_Q15[L-1] = ( 1 << 15 ) - NDeltaMin_Q15[L];
+
+        } else {
+            /* Find the lower extreme for the location of the current center frequency */
+            min_center_Q15 = 0;
+            for( k = 0; k < I; k++ ) {
+                min_center_Q15 += NDeltaMin_Q15[k];
+            }
+            min_center_Q15 += silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+
+            /* Find the upper extreme for the location of the current center frequency */
+            max_center_Q15 = 1 << 15;
+            for( k = L; k > I; k-- ) {
+                max_center_Q15 -= NDeltaMin_Q15[k];
+            }
+            max_center_Q15 -= silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+
+            /* Move apart, sorted by value, keeping the same center frequency */
+            center_freq_Q15 = (opus_int16)silk_LIMIT_32( silk_RSHIFT_ROUND( (opus_int32)NLSF_Q15[I-1] + (opus_int32)NLSF_Q15[I], 1 ),
+                min_center_Q15, max_center_Q15 );
+            NLSF_Q15[I-1] = center_freq_Q15 - silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+            NLSF_Q15[I] = NLSF_Q15[I-1] + NDeltaMin_Q15[I];
+        }
+    }
+
+    /* Safe and simple fall back method, which is less ideal than the above */
+    if( loops == MAX_LOOPS )
+    {
+        /* Insertion sort (fast for already almost sorted arrays):   */
+        /* Best case:  O(n)   for an already sorted array            */
+        /* Worst case: O(n^2) for an inversely sorted array          */
+        silk_insertion_sort_increasing_all_values_int16( &NLSF_Q15[0], L );
+
+        /* First NLSF should be no less than NDeltaMin[0] */
+        NLSF_Q15[0] = silk_max_int( NLSF_Q15[0], NDeltaMin_Q15[0] );
+
+        /* Keep delta_min distance between the NLSFs */
+        for( i = 1; i < L; i++ )
+            NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], NLSF_Q15[i-1] + NDeltaMin_Q15[i] );
+
+        /* Last NLSF should be no higher than 1 - NDeltaMin[L] */
+        NLSF_Q15[L-1] = silk_min_int( NLSF_Q15[L-1], (1<<15) - NDeltaMin_Q15[L] );
+
+        /* Keep NDeltaMin distance between the NLSFs */
+        for( i = L-2; i >= 0; i-- )
+            NLSF_Q15[i] = silk_min_int( NLSF_Q15[i], NLSF_Q15[i+1] - NDeltaMin_Q15[i+1] );
+    }
+}
diff --git a/src/main/jni/opus/silk/NLSF_unpack.c b/src/main/jni/opus/silk/NLSF_unpack.c
new file mode 100644
index 000000000..17bd23f75
--- /dev/null
+++ b/src/main/jni/opus/silk/NLSF_unpack.c
@@ -0,0 +1,55 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Unpack predictor values and indices for entropy coding tables */
+void silk_NLSF_unpack(
+          opus_int16            ec_ix[],                        /* O    Indices to entropy tables [ LPC_ORDER ]     */
+          opus_uint8            pred_Q8[],                      /* O    LSF predictor [ LPC_ORDER ]                 */
+    const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
+    const opus_int              CB1_index                       /* I    Index of vector in first LSF codebook       */
+)
+{
+    opus_int   i;
+    opus_uint8 entry;
+    const opus_uint8 *ec_sel_ptr;
+
+    ec_sel_ptr = &psNLSF_CB->ec_sel[ CB1_index * psNLSF_CB->order / 2 ];
+    for( i = 0; i < psNLSF_CB->order; i += 2 ) {
+        entry = *ec_sel_ptr++;
+        ec_ix  [ i     ] = silk_SMULBB( silk_RSHIFT( entry, 1 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 );
+        pred_Q8[ i     ] = psNLSF_CB->pred_Q8[ i + ( entry & 1 ) * ( psNLSF_CB->order - 1 ) ];
+        ec_ix  [ i + 1 ] = silk_SMULBB( silk_RSHIFT( entry, 5 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 );
+        pred_Q8[ i + 1 ] = psNLSF_CB->pred_Q8[ i + ( silk_RSHIFT( entry, 4 ) & 1 ) * ( psNLSF_CB->order - 1 ) + 1 ];
+    }
+}
+
diff --git a/src/main/jni/opus/silk/NSQ.c b/src/main/jni/opus/silk/NSQ.c
new file mode 100644
index 000000000..cf5b3fd54
--- /dev/null
+++ b/src/main/jni/opus/silk/NSQ.c
@@ -0,0 +1,446 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE void silk_nsq_scale_states(
+    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
+    opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
+    const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
+    opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
+    opus_int            subfr,                  /* I    subframe number                 */
+    const opus_int      LTP_scale_Q14,          /* I                                    */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
+    const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
+    const opus_int      signal_type             /* I    Signal type                     */
+);
+
+static OPUS_INLINE void silk_noise_shape_quantizer(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            Lambda_Q10,             /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
+    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+);
+
+void silk_NSQ(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+)
+{
+    opus_int            k, lag, start_idx, LSF_interpolation_flag;
+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
+    opus_int16          *pxq;
+    VARDECL( opus_int32, sLTP_Q15 );
+    VARDECL( opus_int16, sLTP );
+    opus_int32          HarmShapeFIRPacked_Q14;
+    opus_int            offset_Q10;
+    VARDECL( opus_int32, x_sc_Q10 );
+    SAVE_STACK;
+
+    NSQ->rand_seed = psIndices->Seed;
+
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
+    lag = NSQ->lagPrev;
+
+    silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+    offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+
+    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+        LSF_interpolation_flag = 0;
+    } else {
+        LSF_interpolation_flag = 1;
+    }
+
+    ALLOC( sLTP_Q15,
+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+    /* Set up pointers to start of sub frame */
+    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
+    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
+    for( k = 0; k < psEncC->nb_subfr; k++ ) {
+        A_Q12      = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ];
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER ];
+        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Noise shape parameters */
+        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+        NSQ->rewhite_flag = 0;
+        if( psIndices->signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = pitchL[ k ];
+
+            /* Re-whitening */
+            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+                /* Rewhiten with new A coefs */
+                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+                silk_assert( start_idx > 0 );
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
+
+                NSQ->rewhite_flag = 1;
+                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+            }
+        }
+
+        silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
+
+        silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
+            AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
+            offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+
+        x_Q3   += psEncC->subfr_length;
+        pulses += psEncC->subfr_length;
+        pxq    += psEncC->subfr_length;
+    }
+
+    /* Update lagPrev for next frame */
+    NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
+
+    /* Save quantized speech and noise shaping signals */
+    /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */
+    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
+
+/***********************************/
+/* silk_noise_shape_quantizer  */
+/***********************************/
+static OPUS_INLINE void silk_noise_shape_quantizer(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            Lambda_Q10,             /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
+    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+)
+{
+    opus_int     i, j;
+    opus_int32   LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
+    opus_int32   n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
+    opus_int32   exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    /* Set up short term AR state */
+    psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
+
+    for( i = 0; i < length; i++ ) {
+        /* Generate dither */
+        NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
+
+        /* Short-term prediction */
+        silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+        LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[  0 ], a_Q12[ 0 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
+        if( predictLPCOrder == 16 ) {
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
+        }
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q13 = 2;
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[  0 ], b_Q14[ 0 ] );
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+            pred_lag_ptr++;
+        } else {
+            LTP_pred_Q13 = 0;
+        }
+
+        /* Noise shape feedback */
+        silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+        tmp2 = psLPC_Q14[ 0 ];
+        tmp1 = NSQ->sAR2_Q14[ 0 ];
+        NSQ->sAR2_Q14[ 0 ] = tmp2;
+        n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
+        for( j = 2; j < shapingLPCOrder; j += 2 ) {
+            tmp2 = NSQ->sAR2_Q14[ j - 1 ];
+            NSQ->sAR2_Q14[ j - 1 ] = tmp1;
+            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
+            tmp1 = NSQ->sAR2_Q14[ j + 0 ];
+            NSQ->sAR2_Q14[ j + 0 ] = tmp2;
+            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
+        }
+        NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+
+        n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 );                                /* Q11 -> Q12 */
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
+
+        n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
+        n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 );
+
+        silk_assert( lag > 0 || signalType != TYPE_VOICED );
+
+        /* Combine prediction and noise shaping signals */
+        tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 );        /* Q12 */
+        tmp1 = silk_SUB32( tmp1, n_LF_Q12 );                                    /* Q12 */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 );
+            shp_lag_ptr++;
+
+            tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 );                       /* Q13 */
+            tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 );                          /* Q13 */
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 3 );                                /* Q10 */
+        } else {
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 2 );                                /* Q10 */
+        }
+
+        r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 );                              /* residual error Q10 */
+
+        /* Flip sign depending on dither */
+        if ( NSQ->rand_seed < 0 ) {
+           r_Q10 = -r_Q10;
+        }
+        r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+        /* Find two quantization level candidates and measure their rate-distortion */
+        q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+        q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+        if( q1_Q0 > 0 ) {
+            q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+            q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+            q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+            rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+            rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+        } else if( q1_Q0 == 0 ) {
+            q1_Q10  = offset_Q10;
+            q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+            rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+            rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+        } else if( q1_Q0 == -1 ) {
+            q2_Q10  = offset_Q10;
+            q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+            rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+            rd2_Q20 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
+        } else {            /* Q1_Q0 < -1 */
+            q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+            q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+            q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+            rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+            rd2_Q20 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+        }
+        rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
+        rd1_Q20 = silk_SMLABB( rd1_Q20, rr_Q10, rr_Q10 );
+        rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
+        rd2_Q20 = silk_SMLABB( rd2_Q20, rr_Q10, rr_Q10 );
+
+        if( rd2_Q20 < rd1_Q20 ) {
+            q1_Q10 = q2_Q10;
+        }
+
+        pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 );
+
+        /* Excitation */
+        exc_Q14 = silk_LSHIFT( q1_Q10, 4 );
+        if ( NSQ->rand_seed < 0 ) {
+           exc_Q14 = -exc_Q14;
+        }
+
+        /* Add predictions */
+        LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 );
+        xq_Q14      = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 );
+
+        /* Scale XQ back to normal level before saving */
+        xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( xq_Q14, Gain_Q10 ), 8 ) );
+
+        /* Update states */
+        psLPC_Q14++;
+        *psLPC_Q14 = xq_Q14;
+        sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 );
+        NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14;
+
+        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 );
+        sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 );
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Make dither dependent on quantized signal */
+        NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] );
+    }
+
+    /* Update LPC synth buffer */
+    silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+}
+
+static OPUS_INLINE void silk_nsq_scale_states(
+    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
+    opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
+    const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
+    opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
+    opus_int            subfr,                  /* I    subframe number                 */
+    const opus_int      LTP_scale_Q14,          /* I                                    */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
+    const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
+    const opus_int      signal_type             /* I    Signal type                     */
+)
+{
+    opus_int   i, lag;
+    opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+
+    lag          = pitchL[ subfr ];
+    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+    silk_assert( inv_gain_Q31 != 0 );
+
+    /* Calculate gain adjustment factor */
+    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+    } else {
+        gain_adj_Q16 = (opus_int32)1 << 16;
+    }
+
+    /* Scale input */
+    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+    for( i = 0; i < psEncC->subfr_length; i++ ) {
+        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+    }
+
+    /* Save inverse gain */
+    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+    if( NSQ->rewhite_flag ) {
+        if( subfr == 0 ) {
+            /* Do LTP downscaling */
+            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+        }
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+            silk_assert( i < MAX_FRAME_LENGTH );
+            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+        }
+    }
+
+    /* Adjust for changing gain */
+    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+        /* Scale long-term shaping state */
+        for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+            NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+        }
+
+        /* Scale long-term prediction state */
+        if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+                sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+            }
+        }
+
+        NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 );
+
+        /* Scale short-term prediction and shaping states */
+        for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+            NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] );
+        }
+        for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+            NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] );
+        }
+    }
+}
diff --git a/src/main/jni/opus/silk/NSQ_del_dec.c b/src/main/jni/opus/silk/NSQ_del_dec.c
new file mode 100644
index 000000000..522be4066
--- /dev/null
+++ b/src/main/jni/opus/silk/NSQ_del_dec.c
@@ -0,0 +1,719 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+typedef struct {
+    opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
+    opus_int32 RandState[ DECISION_DELAY ];
+    opus_int32 Q_Q10[     DECISION_DELAY ];
+    opus_int32 Xq_Q14[    DECISION_DELAY ];
+    opus_int32 Pred_Q15[  DECISION_DELAY ];
+    opus_int32 Shape_Q14[ DECISION_DELAY ];
+    opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32 LF_AR_Q14;
+    opus_int32 Seed;
+    opus_int32 SeedInit;
+    opus_int32 RD_Q10;
+} NSQ_del_dec_struct;
+
+typedef struct {
+    opus_int32 Q_Q10;
+    opus_int32 RD_Q10;
+    opus_int32 xq_Q14;
+    opus_int32 LF_AR_Q14;
+    opus_int32 sLTP_shp_Q14;
+    opus_int32 LPC_exc_Q14;
+} NSQ_sample_struct;
+
+typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
+    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
+    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
+    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
+    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
+    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
+    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
+    opus_int            subfr,                      /* I    Subframe number                     */
+    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
+    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
+    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
+    const opus_int      signal_type,                /* I    Signal type                         */
+    const opus_int      decisionDelay               /* I    Decision delay                      */
+);
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+);
+
+void silk_NSQ_del_dec(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+)
+{
+    opus_int            i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
+    opus_int            last_smple_idx, smpl_buf_idx, decisionDelay;
+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
+    opus_int16          *pxq;
+    VARDECL( opus_int32, sLTP_Q15 );
+    VARDECL( opus_int16, sLTP );
+    opus_int32          HarmShapeFIRPacked_Q14;
+    opus_int            offset_Q10;
+    opus_int32          RDmin_Q10, Gain_Q10;
+    VARDECL( opus_int32, x_sc_Q10 );
+    VARDECL( opus_int32, delayedGain_Q10 );
+    VARDECL( NSQ_del_dec_struct, psDelDec );
+    NSQ_del_dec_struct  *psDD;
+    SAVE_STACK;
+
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
+    lag = NSQ->lagPrev;
+
+    silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+    /* Initialize delayed decision states */
+    ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );
+    silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
+    for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
+        psDD                 = &psDelDec[ k ];
+        psDD->Seed           = ( k + psIndices->Seed ) & 3;
+        psDD->SeedInit       = psDD->Seed;
+        psDD->RD_Q10         = 0;
+        psDD->LF_AR_Q14      = NSQ->sLF_AR_shp_Q14;
+        psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
+        silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+        silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
+    }
+
+    offset_Q10   = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+    smpl_buf_idx = 0; /* index of oldest samples */
+
+    decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length );
+
+    /* For voiced frames limit the decision delay to lower than the pitch lag */
+    if( psIndices->signalType == TYPE_VOICED ) {
+        for( k = 0; k < psEncC->nb_subfr; k++ ) {
+            decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 );
+        }
+    } else {
+        if( lag > 0 ) {
+            decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 );
+        }
+    }
+
+    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+        LSF_interpolation_flag = 0;
+    } else {
+        LSF_interpolation_flag = 1;
+    }
+
+    ALLOC( sLTP_Q15,
+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+    ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
+    /* Set up pointers to start of sub frame */
+    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
+    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
+    subfr = 0;
+    for( k = 0; k < psEncC->nb_subfr; k++ ) {
+        A_Q12      = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER           ];
+        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Noise shape parameters */
+        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+        NSQ->rewhite_flag = 0;
+        if( psIndices->signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = pitchL[ k ];
+
+            /* Re-whitening */
+            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+                if( k == 2 ) {
+                    /* RESET DELAYED DECISIONS */
+                    /* Find winner */
+                    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+                    Winner_ind = 0;
+                    for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
+                        if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
+                            RDmin_Q10 = psDelDec[ i ].RD_Q10;
+                            Winner_ind = i;
+                        }
+                    }
+                    for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
+                        if( i != Winner_ind ) {
+                            psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 );
+                            silk_assert( psDelDec[ i ].RD_Q10 >= 0 );
+                        }
+                    }
+
+                    /* Copy final part of signals from winner state to output and long-term filter states */
+                    psDD = &psDelDec[ Winner_ind ];
+                    last_smple_idx = smpl_buf_idx + decisionDelay;
+                    for( i = 0; i < decisionDelay; i++ ) {
+                        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+                        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+                        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
+                        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+                    }
+
+                    subfr = 0;
+                }
+
+                /* Rewhiten with new A coefs */
+                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+                silk_assert( start_idx > 0 );
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
+
+                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+                NSQ->rewhite_flag = 1;
+            }
+        }
+
+        silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
+            psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
+
+        silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
+            delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
+            Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
+            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
+
+        x_Q3   += psEncC->subfr_length;
+        pulses += psEncC->subfr_length;
+        pxq    += psEncC->subfr_length;
+    }
+
+    /* Find winner */
+    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+    Winner_ind = 0;
+    for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
+        if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
+            RDmin_Q10 = psDelDec[ k ].RD_Q10;
+            Winner_ind = k;
+        }
+    }
+
+    /* Copy final part of signals from winner state to output and long-term filter states */
+    psDD = &psDelDec[ Winner_ind ];
+    psIndices->Seed = psDD->SeedInit;
+    last_smple_idx = smpl_buf_idx + decisionDelay;
+    Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
+    for( i = 0; i < decisionDelay; i++ ) {
+        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
+        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+    }
+    silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) );
+
+    /* Update states */
+    NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
+    NSQ->lagPrev        = pitchL[ psEncC->nb_subfr - 1 ];
+
+    /* Save quantized speech signal */
+    /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
+    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+)
+{
+    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
+    opus_int32   Winner_rand_state;
+    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
+    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
+    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+    VARDECL( NSQ_sample_pair, psSampleState );
+    NSQ_del_dec_struct *psDD;
+    NSQ_sample_struct  *psSS;
+    SAVE_STACK;
+
+    silk_assert( nStatesDelayedDecision > 0 );
+    ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    for( i = 0; i < length; i++ ) {
+        /* Perform common calculations used in all states */
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q14 = 2;
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[  0 ], b_Q14[ 0 ] );
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+            LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
+            pred_lag_ptr++;
+        } else {
+            LTP_pred_Q14 = 0;
+        }
+
+        /* Long-term shaping */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
+            shp_lag_ptr++;
+        } else {
+            n_LTP_Q14 = 0;
+        }
+
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            /* Delayed decision state */
+            psDD = &psDelDec[ k ];
+
+            /* Sample state */
+            psSS = psSampleState[ k ];
+
+            /* Generate dither */
+            psDD->Seed = silk_RAND( psDD->Seed );
+
+            /* Pointer used in short term prediction and shaping */
+            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
+            /* Short-term prediction */
+            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[  0 ], a_Q12[ 0 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
+            if( predictLPCOrder == 16 ) {
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
+            }
+            LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
+
+            /* Noise shape feedback */
+            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+            /* Output of lowpass section */
+            tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
+            psDD->sAR2_Q14[ 0 ] = tmp2;
+            n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
+            /* Loop over allpass sections */
+            for( j = 2; j < shapingLPCOrder; j += 2 ) {
+                /* Output of allpass section */
+                tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
+                psDD->sAR2_Q14[ j - 1 ] = tmp1;
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
+                /* Output of allpass section */
+                tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
+                psDD->sAR2_Q14[ j + 0 ] = tmp2;
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
+            }
+            psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+
+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
+
+            n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
+            n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
+            n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
+
+            /* Input minus prediction plus noise feedback                       */
+            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
+            tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
+            tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
+            tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
+
+            r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
+
+            /* Flip sign depending on dither */
+            if ( psDD->Seed < 0 ) {
+                r_Q10 = -r_Q10;
+            }
+            r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+            /* Find two quantization level candidates and measure their rate-distortion */
+            q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+            q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+            if( q1_Q0 > 0 ) {
+                q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+            } else if( q1_Q0 == 0 ) {
+                q1_Q10  = offset_Q10;
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+            } else if( q1_Q0 == -1 ) {
+                q2_Q10  = offset_Q10;
+                q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
+            } else {            /* q1_Q0 < -1 */
+                q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+            }
+            rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
+            rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
+            rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
+            rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
+
+            if( rd1_Q10 < rd2_Q10 ) {
+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                psSS[ 0 ].Q_Q10  = q1_Q10;
+                psSS[ 1 ].Q_Q10  = q2_Q10;
+            } else {
+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                psSS[ 0 ].Q_Q10  = q2_Q10;
+                psSS[ 1 ].Q_Q10  = q1_Q10;
+            }
+
+            /* Update states for best quantization */
+
+            /* Quantized excitation */
+            exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
+            if ( psDD->Seed < 0 ) {
+                exc_Q14 = -exc_Q14;
+            }
+
+            /* Add predictions */
+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+            /* Update states */
+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+            psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+            psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+            psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
+            psSS[ 0 ].xq_Q14       = xq_Q14;
+
+            /* Update states for second best quantization */
+
+            /* Quantized excitation */
+            exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
+            if ( psDD->Seed < 0 ) {
+                exc_Q14 = -exc_Q14;
+            }
+
+
+            /* Add predictions */
+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+            /* Update states */
+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+            psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+            psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+            psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
+            psSS[ 1 ].xq_Q14       = xq_Q14;
+        }
+
+        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
+        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */
+
+        /* Find winner */
+        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
+        Winner_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                Winner_ind = k;
+            }
+        }
+
+        /* Increase RD values of expired states */
+        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
+                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
+                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
+                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
+            }
+        }
+
+        /* Find worst in first set and best in second set */
+        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
+        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
+        RDmax_ind = 0;
+        RDmin_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            /* find worst in first set */
+            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
+                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                RDmax_ind = k;
+            }
+            /* find best in second set */
+            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
+                RDmin_ind = k;
+            }
+        }
+
+        /* Replace a state if best from second set outperforms worst in first set */
+        if( RDmin_Q10 < RDmax_Q10 ) {
+            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
+                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
+            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
+        }
+
+        /* Write samples from winner to output and long-term filter states */
+        psDD = &psDelDec[ Winner_ind ];
+        if( subfr > 0 || i >= decisionDelay ) {
+            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
+            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
+            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
+        }
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Update states */
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            psDD                                     = &psDelDec[ k ];
+            psSS                                     = &psSampleState[ k ][ 0 ];
+            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
+            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
+            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
+            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
+            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
+            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
+            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
+            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
+            psDD->RD_Q10                             = psSS->RD_Q10;
+        }
+        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
+    }
+    /* Update LPC states */
+    for( k = 0; k < nStatesDelayedDecision; k++ ) {
+        psDD = &psDelDec[ k ];
+        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    }
+    RESTORE_STACK;
+}
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
+    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
+    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
+    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
+    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
+    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
+    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
+    opus_int            subfr,                      /* I    Subframe number                     */
+    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
+    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
+    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
+    const opus_int      signal_type,                /* I    Signal type                         */
+    const opus_int      decisionDelay               /* I    Decision delay                      */
+)
+{
+    opus_int            i, k, lag;
+    opus_int32          gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+    NSQ_del_dec_struct  *psDD;
+
+    lag          = pitchL[ subfr ];
+    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+    silk_assert( inv_gain_Q31 != 0 );
+
+    /* Calculate gain adjustment factor */
+    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+    } else {
+        gain_adj_Q16 = (opus_int32)1 << 16;
+    }
+
+    /* Scale input */
+    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+    for( i = 0; i < psEncC->subfr_length; i++ ) {
+        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+    }
+
+    /* Save inverse gain */
+    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+    if( NSQ->rewhite_flag ) {
+        if( subfr == 0 ) {
+            /* Do LTP downscaling */
+            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+        }
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+            silk_assert( i < MAX_FRAME_LENGTH );
+            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+        }
+    }
+
+    /* Adjust for changing gain */
+    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+        /* Scale long-term shaping state */
+        for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+            NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+        }
+
+        /* Scale long-term prediction state */
+        if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) {
+                sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+            }
+        }
+
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            psDD = &psDelDec[ k ];
+
+            /* Scale scalar states */
+            psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
+
+            /* Scale short-term prediction and shaping states */
+            for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+                psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] );
+            }
+            for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+                psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] );
+            }
+            for( i = 0; i < DECISION_DELAY; i++ ) {
+                psDD->Pred_Q15[  i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[  i ] );
+                psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
+            }
+        }
+    }
+}
diff --git a/src/main/jni/opus/silk/PLC.c b/src/main/jni/opus/silk/PLC.c
new file mode 100644
index 000000000..01f40014c
--- /dev/null
+++ b/src/main/jni/opus/silk/PLC.c
@@ -0,0 +1,423 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+#include "PLC.h"
+
+#define NB_ATT 2
+static const opus_int16 HARM_ATT_Q15[NB_ATT]              = { 32440, 31130 }; /* 0.99, 0.95 */
+static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT]  = { 31130, 26214 }; /* 0.95, 0.8 */
+static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */
+
+static OPUS_INLINE void silk_PLC_update(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl          /* I/O Decoder control      */
+);
+
+static OPUS_INLINE void silk_PLC_conceal(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
+    opus_int16                          frame[]             /* O LPC residual signal    */
+);
+
+
+void silk_PLC_Reset(
+    silk_decoder_state                  *psDec              /* I/O Decoder state        */
+)
+{
+    psDec->sPLC.pitchL_Q8 = silk_LSHIFT( psDec->frame_length, 8 - 1 );
+    psDec->sPLC.prevGain_Q16[ 0 ] = SILK_FIX_CONST( 1, 16 );
+    psDec->sPLC.prevGain_Q16[ 1 ] = SILK_FIX_CONST( 1, 16 );
+    psDec->sPLC.subfr_length = 20;
+    psDec->sPLC.nb_subfr = 2;
+}
+
+void silk_PLC(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
+    opus_int16                          frame[],            /* I/O  signal              */
+    opus_int                            lost                /* I Loss flag              */
+)
+{
+    /* PLC control function */
+    if( psDec->fs_kHz != psDec->sPLC.fs_kHz ) {
+        silk_PLC_Reset( psDec );
+        psDec->sPLC.fs_kHz = psDec->fs_kHz;
+    }
+
+    if( lost ) {
+        /****************************/
+        /* Generate Signal          */
+        /****************************/
+        silk_PLC_conceal( psDec, psDecCtrl, frame );
+
+        psDec->lossCnt++;
+    } else {
+        /****************************/
+        /* Update state             */
+        /****************************/
+        silk_PLC_update( psDec, psDecCtrl );
+    }
+}
+
+/**************************************************/
+/* Update state of PLC                            */
+/**************************************************/
+static OPUS_INLINE void silk_PLC_update(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl          /* I/O Decoder control      */
+)
+{
+    opus_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14;
+    opus_int   i, j;
+    silk_PLC_struct *psPLC;
+
+    psPLC = &psDec->sPLC;
+
+    /* Update parameters used in case of packet loss */
+    psDec->prevSignalType = psDec->indices.signalType;
+    LTP_Gain_Q14 = 0;
+    if( psDec->indices.signalType == TYPE_VOICED ) {
+        /* Find the parameters for the last subframe which contains a pitch pulse */
+        for( j = 0; j * psDec->subfr_length < psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; j++ ) {
+            if( j == psDec->nb_subfr ) {
+                break;
+            }
+            temp_LTP_Gain_Q14 = 0;
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                temp_LTP_Gain_Q14 += psDecCtrl->LTPCoef_Q14[ ( psDec->nb_subfr - 1 - j ) * LTP_ORDER  + i ];
+            }
+            if( temp_LTP_Gain_Q14 > LTP_Gain_Q14 ) {
+                LTP_Gain_Q14 = temp_LTP_Gain_Q14;
+                silk_memcpy( psPLC->LTPCoef_Q14,
+                    &psDecCtrl->LTPCoef_Q14[ silk_SMULBB( psDec->nb_subfr - 1 - j, LTP_ORDER ) ],
+                    LTP_ORDER * sizeof( opus_int16 ) );
+
+                psPLC->pitchL_Q8 = silk_LSHIFT( psDecCtrl->pitchL[ psDec->nb_subfr - 1 - j ], 8 );
+            }
+        }
+
+        silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) );
+        psPLC->LTPCoef_Q14[ LTP_ORDER / 2 ] = LTP_Gain_Q14;
+
+        /* Limit LT coefs */
+        if( LTP_Gain_Q14 < V_PITCH_GAIN_START_MIN_Q14 ) {
+            opus_int   scale_Q10;
+            opus_int32 tmp;
+
+            tmp = silk_LSHIFT( V_PITCH_GAIN_START_MIN_Q14, 10 );
+            scale_Q10 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) );
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q10 ), 10 );
+            }
+        } else if( LTP_Gain_Q14 > V_PITCH_GAIN_START_MAX_Q14 ) {
+            opus_int   scale_Q14;
+            opus_int32 tmp;
+
+            tmp = silk_LSHIFT( V_PITCH_GAIN_START_MAX_Q14, 14 );
+            scale_Q14 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) );
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q14 ), 14 );
+            }
+        }
+    } else {
+        psPLC->pitchL_Q8 = silk_LSHIFT( silk_SMULBB( psDec->fs_kHz, 18 ), 8 );
+        silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ));
+    }
+
+    /* Save LPC coeficients */
+    silk_memcpy( psPLC->prevLPC_Q12, psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) );
+    psPLC->prevLTP_scale_Q14 = psDecCtrl->LTP_scale_Q14;
+
+    /* Save last two gains */
+    silk_memcpy( psPLC->prevGain_Q16, &psDecCtrl->Gains_Q16[ psDec->nb_subfr - 2 ], 2 * sizeof( opus_int32 ) );
+
+    psPLC->subfr_length = psDec->subfr_length;
+    psPLC->nb_subfr = psDec->nb_subfr;
+}
+
+static OPUS_INLINE void silk_PLC_conceal(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
+    opus_int16                          frame[]             /* O LPC residual signal    */
+)
+{
+    opus_int   i, j, k;
+    opus_int   lag, idx, sLTP_buf_idx, shift1, shift2;
+    opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q30;
+    opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
+    opus_int32 LPC_pred_Q10, LTP_pred_Q12;
+    opus_int16 rand_scale_Q14;
+    opus_int16 *B_Q14, *exc_buf_ptr;
+    opus_int32 *sLPC_Q14_ptr;
+    VARDECL( opus_int16, exc_buf );
+    opus_int16 A_Q12[ MAX_LPC_ORDER ];
+    VARDECL( opus_int16, sLTP );
+    VARDECL( opus_int32, sLTP_Q14 );
+    silk_PLC_struct *psPLC = &psDec->sPLC;
+    opus_int32 prevGain_Q10[2];
+    SAVE_STACK;
+
+    ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
+    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+    ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+
+    prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
+    prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
+
+    if( psDec->first_frame_after_reset ) {
+       silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) );
+    }
+
+    /* Find random noise component */
+    /* Scale previous excitation signal */
+    exc_buf_ptr = exc_buf;
+    for( k = 0; k < 2; k++ ) {
+        for( i = 0; i < psPLC->subfr_length; i++ ) {
+            exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
+                silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) );
+        }
+        exc_buf_ptr += psPLC->subfr_length;
+    }
+    /* Find the subframe with lowest energy of the last two and use that as random noise generator */
+    silk_sum_sqr_shift( &energy1, &shift1, exc_buf,                         psPLC->subfr_length );
+    silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length );
+
+    if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
+        /* First sub-frame has lowest energy */
+        rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, ( psPLC->nb_subfr - 1 ) * psPLC->subfr_length - RAND_BUF_SIZE ) ];
+    } else {
+        /* Second sub-frame has lowest energy */
+        rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, psPLC->nb_subfr * psPLC->subfr_length - RAND_BUF_SIZE ) ];
+    }
+
+    /* Set up Gain to random noise component */
+    B_Q14          = psPLC->LTPCoef_Q14;
+    rand_scale_Q14 = psPLC->randScale_Q14;
+
+    /* Set up attenuation gains */
+    harm_Gain_Q15 = HARM_ATT_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+    if( psDec->prevSignalType == TYPE_VOICED ) {
+        rand_Gain_Q15 = PLC_RAND_ATTENUATE_V_Q15[  silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+    } else {
+        rand_Gain_Q15 = PLC_RAND_ATTENUATE_UV_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+    }
+
+    /* LPC concealment. Apply BWE to previous LPC */
+    silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) );
+
+    /* Preload LPC coeficients to array on stack. Gives small performance gain */
+    silk_memcpy( A_Q12, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
+
+    /* First Lost frame */
+    if( psDec->lossCnt == 0 ) {
+        rand_scale_Q14 = 1 << 14;
+
+        /* Reduce random noise Gain for voiced frames */
+        if( psDec->prevSignalType == TYPE_VOICED ) {
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                rand_scale_Q14 -= B_Q14[ i ];
+            }
+            rand_scale_Q14 = silk_max_16( 3277, rand_scale_Q14 ); /* 0.2 */
+            rand_scale_Q14 = (opus_int16)silk_RSHIFT( silk_SMULBB( rand_scale_Q14, psPLC->prevLTP_scale_Q14 ), 14 );
+        } else {
+            /* Reduce random noise for unvoiced frames with high LPC gain */
+            opus_int32 invGain_Q30, down_scale_Q30;
+
+            invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order );
+
+            down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 );
+            down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 );
+            down_scale_Q30 = silk_LSHIFT( down_scale_Q30, LOG2_INV_LPC_GAIN_HIGH_THRES );
+
+            rand_Gain_Q15 = silk_RSHIFT( silk_SMULWB( down_scale_Q30, rand_Gain_Q15 ), 14 );
+        }
+    }
+
+    rand_seed    = psPLC->rand_seed;
+    lag          = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
+    sLTP_buf_idx = psDec->ltp_mem_length;
+
+    /* Rewhiten LTP state */
+    idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
+    silk_assert( idx > 0 );
+    silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order );
+    /* Scale LTP state */
+    inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 );
+    inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 );
+    for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) {
+        sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] );
+    }
+
+    /***************************/
+    /* LTP synthesis filtering */
+    /***************************/
+    for( k = 0; k < psDec->nb_subfr; k++ ) {
+        /* Set up pointer */
+        pred_lag_ptr = &sLTP_Q14[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+        for( i = 0; i < psDec->subfr_length; i++ ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q12 = 2;
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[  0 ], B_Q14[ 0 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -1 ], B_Q14[ 1 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -2 ], B_Q14[ 2 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -3 ], B_Q14[ 3 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -4 ], B_Q14[ 4 ] );
+            pred_lag_ptr++;
+
+            /* Generate LPC excitation */
+            rand_seed = silk_RAND( rand_seed );
+            idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK;
+            sLTP_Q14[ sLTP_buf_idx ] = silk_LSHIFT32( silk_SMLAWB( LTP_pred_Q12, rand_ptr[ idx ], rand_scale_Q14 ), 2 );
+            sLTP_buf_idx++;
+        }
+
+        /* Gradually reduce LTP gain */
+        for( j = 0; j < LTP_ORDER; j++ ) {
+            B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 );
+        }
+        /* Gradually reduce excitation gain */
+        rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
+
+        /* Slowly increase pitch lag */
+        psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 );
+        psPLC->pitchL_Q8 = silk_min_32( psPLC->pitchL_Q8, silk_LSHIFT( silk_SMULBB( MAX_PITCH_LAG_MS, psDec->fs_kHz ), 8 ) );
+        lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
+    }
+
+    /***************************/
+    /* LPC synthesis filtering */
+    /***************************/
+    sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ];
+
+    /* Copy LPC state */
+    silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+    silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
+    for( i = 0; i < psDec->frame_length; i++ ) {
+        /* partly unrolled */
+        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+        LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+        for( j = 10; j < psDec->LPC_order; j++ ) {
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] );
+        }
+
+        /* Add prediction to LPC excitation */
+        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+
+        /* Scale with Gain */
+        frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );
+    }
+
+    /* Save LPC state */
+    silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+    /**************************************/
+    /* Update states                      */
+    /**************************************/
+    psPLC->rand_seed     = rand_seed;
+    psPLC->randScale_Q14 = rand_scale_Q14;
+    for( i = 0; i < MAX_NB_SUBFR; i++ ) {
+        psDecCtrl->pitchL[ i ] = lag;
+    }
+    RESTORE_STACK;
+}
+
+/* Glues concealed frames with new good received frames */
+void silk_PLC_glue_frames(
+    silk_decoder_state                  *psDec,             /* I/O decoder state        */
+    opus_int16                          frame[],            /* I/O signal               */
+    opus_int                            length              /* I length of signal       */
+)
+{
+    opus_int   i, energy_shift;
+    opus_int32 energy;
+    silk_PLC_struct *psPLC;
+    psPLC = &psDec->sPLC;
+
+    if( psDec->lossCnt ) {
+        /* Calculate energy in concealed residual */
+        silk_sum_sqr_shift( &psPLC->conc_energy, &psPLC->conc_energy_shift, frame, length );
+
+        psPLC->last_frame_lost = 1;
+    } else {
+        if( psDec->sPLC.last_frame_lost ) {
+            /* Calculate residual in decoded signal if last frame was lost */
+            silk_sum_sqr_shift( &energy, &energy_shift, frame, length );
+
+            /* Normalize energies */
+            if( energy_shift > psPLC->conc_energy_shift ) {
+                psPLC->conc_energy = silk_RSHIFT( psPLC->conc_energy, energy_shift - psPLC->conc_energy_shift );
+            } else if( energy_shift < psPLC->conc_energy_shift ) {
+                energy = silk_RSHIFT( energy, psPLC->conc_energy_shift - energy_shift );
+            }
+
+            /* Fade in the energy difference */
+            if( energy > psPLC->conc_energy ) {
+                opus_int32 frac_Q24, LZ;
+                opus_int32 gain_Q16, slope_Q16;
+
+                LZ = silk_CLZ32( psPLC->conc_energy );
+                LZ = LZ - 1;
+                psPLC->conc_energy = silk_LSHIFT( psPLC->conc_energy, LZ );
+                energy = silk_RSHIFT( energy, silk_max_32( 24 - LZ, 0 ) );
+
+                frac_Q24 = silk_DIV32( psPLC->conc_energy, silk_max( energy, 1 ) );
+
+                gain_Q16 = silk_LSHIFT( silk_SQRT_APPROX( frac_Q24 ), 4 );
+                slope_Q16 = silk_DIV32_16( ( (opus_int32)1 << 16 ) - gain_Q16, length );
+                /* Make slope 4x steeper to avoid missing onsets after DTX */
+                slope_Q16 = silk_LSHIFT( slope_Q16, 2 );
+
+                for( i = 0; i < length; i++ ) {
+                    frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] );
+                    gain_Q16 += slope_Q16;
+                    if( gain_Q16 > (opus_int32)1 << 16 ) {
+                        break;
+                    }
+                }
+            }
+        }
+        psPLC->last_frame_lost = 0;
+    }
+}
diff --git a/src/main/jni/opus/silk/PLC.h b/src/main/jni/opus/silk/PLC.h
new file mode 100644
index 000000000..f1e2eccc6
--- /dev/null
+++ b/src/main/jni/opus/silk/PLC.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_PLC_H
+#define SILK_PLC_H
+
+#include "main.h"
+
+#define BWE_COEF                        0.99
+#define V_PITCH_GAIN_START_MIN_Q14      11469               /* 0.7 in Q14               */
+#define V_PITCH_GAIN_START_MAX_Q14      15565               /* 0.95 in Q14              */
+#define MAX_PITCH_LAG_MS                18
+#define RAND_BUF_SIZE                   128
+#define RAND_BUF_MASK                   ( RAND_BUF_SIZE - 1 )
+#define LOG2_INV_LPC_GAIN_HIGH_THRES    3                   /* 2^3 = 8 dB LPC gain      */
+#define LOG2_INV_LPC_GAIN_LOW_THRES     8                   /* 2^8 = 24 dB LPC gain     */
+#define PITCH_DRIFT_FAC_Q16             655                 /* 0.01 in Q16              */
+
+void silk_PLC_Reset(
+    silk_decoder_state                  *psDec              /* I/O Decoder state        */
+);
+
+void silk_PLC(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
+    opus_int16                          frame[],            /* I/O  signal              */
+    opus_int                            lost                /* I Loss flag              */
+);
+
+void silk_PLC_glue_frames(
+    silk_decoder_state                  *psDec,             /* I/O decoder state        */
+    opus_int16                          frame[],            /* I/O signal               */
+    opus_int                            length              /* I length of signal       */
+);
+
+#endif
+
diff --git a/src/main/jni/opus/silk/SigProc_FIX.h b/src/main/jni/opus/silk/SigProc_FIX.h
new file mode 100644
index 000000000..1b5805791
--- /dev/null
+++ b/src/main/jni/opus/silk/SigProc_FIX.h
@@ -0,0 +1,594 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_H
+#define SILK_SIGPROC_FIX_H
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+/*#define silk_MACRO_COUNT */          /* Used to enable WMOPS counting */
+
+#define SILK_MAX_ORDER_LPC            16            /* max order of the LPC analysis in schur() and k2a() */
+
+#include <string.h>                                 /* for memset(), memcpy(), memmove() */
+#include "typedef.h"
+#include "resampler_structs.h"
+#include "macros.h"
+
+
+/********************************************************************/
+/*                    SIGNAL PROCESSING FUNCTIONS                   */
+/********************************************************************/
+
+/*!
+ * Initialize/reset the resampler state for a given pair of input/output sampling rates
+*/
+opus_int silk_resampler_init(
+    silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
+    opus_int32                  Fs_Hz_in,           /* I    Input sampling rate (Hz)                                    */
+    opus_int32                  Fs_Hz_out,          /* I    Output sampling rate (Hz)                                   */
+    opus_int                    forEnc              /* I    If 1: encoder; if 0: decoder                                */
+);
+
+/*!
+ * Resampler: convert from one sampling rate to another
+ */
+opus_int silk_resampler(
+    silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
+    opus_int16                  out[],              /* O    Output signal                                               */
+    const opus_int16            in[],               /* I    Input signal                                                */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+);
+
+/*!
+* Downsample 2x, mediocre quality
+*/
+void silk_resampler_down2(
+    opus_int32                  *S,                 /* I/O  State vector [ 2 ]                                          */
+    opus_int16                  *out,               /* O    Output signal [ len ]                                       */
+    const opus_int16            *in,                /* I    Input signal [ floor(len/2) ]                               */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+);
+
+/*!
+ * Downsample by a factor 2/3, low quality
+*/
+void silk_resampler_down2_3(
+    opus_int32                  *S,                 /* I/O  State vector [ 6 ]                                          */
+    opus_int16                  *out,               /* O    Output signal [ floor(2*inLen/3) ]                          */
+    const opus_int16            *in,                /* I    Input signal [ inLen ]                                      */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+);
+
+/*!
+ * second order ARMA filter;
+ * slower than biquad() but uses more precise coefficients
+ * can handle (slowly) varying coefficients
+ */
+void silk_biquad_alt(
+    const opus_int16            *in,                /* I     input signal                                               */
+    const opus_int32            *B_Q28,             /* I     MA coefficients [3]                                        */
+    const opus_int32            *A_Q28,             /* I     AR coefficients [2]                                        */
+    opus_int32                  *S,                 /* I/O   State vector [2]                                           */
+    opus_int16                  *out,               /* O     output signal                                              */
+    const opus_int32            len,                /* I     signal length (must be even)                               */
+    opus_int                    stride              /* I     Operate on interleaved signal if > 1                       */
+);
+
+/* Variable order MA prediction error filter. */
+void silk_LPC_analysis_filter(
+    opus_int16                  *out,               /* O    Output signal                                               */
+    const opus_int16            *in,                /* I    Input signal                                                */
+    const opus_int16            *B,                 /* I    MA prediction coefficients, Q12 [order]                     */
+    const opus_int32            len,                /* I    Signal length                                               */
+    const opus_int32            d                   /* I    Filter order                                                */
+);
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander(
+    opus_int16                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int              d,                  /* I    Length of ar                                                */
+    opus_int32                  chirp_Q16           /* I    Chirp factor (typically in the range 0 to 1)                */
+);
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander_32(
+    opus_int32                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int              d,                  /* I    Length of ar                                                */
+    opus_int32                  chirp_Q16           /* I    Chirp factor in Q16                                         */
+);
+
+/* Compute inverse of LPC prediction gain, and                           */
+/* test if LPC coefficients are stable (all poles within unit circle)    */
+opus_int32 silk_LPC_inverse_pred_gain(              /* O   Returns inverse prediction gain in energy domain, Q30        */
+    const opus_int16            *A_Q12,             /* I   Prediction coefficients, Q12 [order]                         */
+    const opus_int              order               /* I   Prediction order                                             */
+);
+
+/* For input in Q24 domain */
+opus_int32 silk_LPC_inverse_pred_gain_Q24(          /* O    Returns inverse prediction gain in energy domain, Q30       */
+    const opus_int32            *A_Q24,             /* I    Prediction coefficients [order]                             */
+    const opus_int              order               /* I    Prediction order                                            */
+);
+
+/* Split signal in two decimated bands using first-order allpass filters */
+void silk_ana_filt_bank_1(
+    const opus_int16            *in,                /* I    Input signal [N]                                            */
+    opus_int32                  *S,                 /* I/O  State vector [2]                                            */
+    opus_int16                  *outL,              /* O    Low band [N/2]                                              */
+    opus_int16                  *outH,              /* O    High band [N/2]                                             */
+    const opus_int32            N                   /* I    Number of input samples                                     */
+);
+
+/********************************************************************/
+/*                        SCALAR FUNCTIONS                          */
+/********************************************************************/
+
+/* Approximation of 128 * log2() (exact inverse of approx 2^() below) */
+/* Convert input to a log scale    */
+opus_int32 silk_lin2log(
+    const opus_int32            inLin               /* I  input in linear scale                                         */
+);
+
+/* Approximation of a sigmoid function */
+opus_int silk_sigm_Q15(
+    opus_int                    in_Q5               /* I                                                                */
+);
+
+/* Approximation of 2^() (exact inverse of approx log2() above) */
+/* Convert input to a linear scale */
+opus_int32 silk_log2lin(
+    const opus_int32            inLog_Q7            /* I  input on log scale                                            */
+);
+
+/* Compute number of bits to right shift the sum of squares of a vector    */
+/* of int16s to make it fit in an int32                                    */
+void silk_sum_sqr_shift(
+    opus_int32                  *energy,            /* O   Energy of x, after shifting to the right                     */
+    opus_int                    *shift,             /* O   Number of bits right shift applied to energy                 */
+    const opus_int16            *x,                 /* I   Input vector                                                 */
+    opus_int                    len                 /* I   Length of input vector                                       */
+);
+
+/* Calculates the reflection coefficients from the correlation sequence    */
+/* Faster than schur64(), but much less accurate.                          */
+/* uses SMLAWB(), requiring armv5E and higher.                             */
+opus_int32 silk_schur(                              /* O    Returns residual energy                                     */
+    opus_int16                  *rc_Q15,            /* O    reflection coefficients [order] Q15                         */
+    const opus_int32            *c,                 /* I    correlations [order+1]                                      */
+    const opus_int32            order               /* I    prediction order                                            */
+);
+
+/* Calculates the reflection coefficients from the correlation sequence    */
+/* Slower than schur(), but more accurate.                                 */
+/* Uses SMULL(), available on armv4                                        */
+opus_int32 silk_schur64(                            /* O    returns residual energy                                     */
+    opus_int32                  rc_Q16[],           /* O    Reflection coefficients [order] Q16                         */
+    const opus_int32            c[],                /* I    Correlations [order+1]                                      */
+    opus_int32                  order               /* I    Prediction order                                            */
+);
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a(
+    opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
+    const opus_int16            *rc_Q15,            /* I    Reflection coefficients [order] Q15                         */
+    const opus_int32            order               /* I    Prediction order                                            */
+);
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_Q16(
+    opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
+    const opus_int32            *rc_Q16,            /* I    Reflection coefficients [order] Q16                         */
+    const opus_int32            order               /* I    Prediction order                                            */
+);
+
+/* Apply sine window to signal vector.                              */
+/* Window types:                                                    */
+/*    1 -> sine window from 0 to pi/2                               */
+/*    2 -> sine window from pi/2 to pi                              */
+/* every other sample of window is linearly interpolated, for speed */
+void silk_apply_sine_window(
+    opus_int16                  px_win[],           /* O    Pointer to windowed signal                                  */
+    const opus_int16            px[],               /* I    Pointer to input signal                                     */
+    const opus_int              win_type,           /* I    Selects a window type                                       */
+    const opus_int              length              /* I    Window length, multiple of 4                                */
+);
+
+/* Compute autocorrelation */
+void silk_autocorr(
+    opus_int32                  *results,           /* O    Result (length correlationCount)                            */
+    opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
+    const opus_int16            *inputData,         /* I    Input data to correlate                                     */
+    const opus_int              inputDataSize,      /* I    Length of input                                             */
+    const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
+    int                         arch                /* I    Run-time architecture                                       */
+);
+
+void silk_decode_pitch(
+    opus_int16                  lagIndex,           /* I                                                                */
+    opus_int8                   contourIndex,       /* O                                                                */
+    opus_int                    pitch_lags[],       /* O    4 pitch values                                              */
+    const opus_int              Fs_kHz,             /* I    sampling frequency (kHz)                                    */
+    const opus_int              nb_subfr            /* I    number of sub frames                                        */
+);
+
+opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
+    opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
+    opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
+    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
+    const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
+    int                         arch                /* I    Run-time architecture                                       */
+);
+
+/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients      */
+/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
+void silk_A2NLSF(
+    opus_int16                  *NLSF,              /* O    Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
+    opus_int32                  *a_Q16,             /* I/O  Monic whitening filter coefficients in Q16 [d]              */
+    const opus_int              d                   /* I    Filter order (must be even)                                 */
+);
+
+/* compute whitening filter coefficients from normalized line spectral frequencies */
+void silk_NLSF2A(
+    opus_int16                  *a_Q12,             /* O    monic whitening filter coefficients in Q12,  [ d ]          */
+    const opus_int16            *NLSF,              /* I    normalized line spectral frequencies in Q15, [ d ]          */
+    const opus_int              d                   /* I    filter order (should be even)                               */
+);
+
+void silk_insertion_sort_increasing(
+    opus_int32                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
+    opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
+    const opus_int              L,                  /* I     Vector length                                              */
+    const opus_int              K                   /* I     Number of correctly sorted positions                       */
+);
+
+void silk_insertion_sort_decreasing_int16(
+    opus_int16                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
+    opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
+    const opus_int              L,                  /* I     Vector length                                              */
+    const opus_int              K                   /* I     Number of correctly sorted positions                       */
+);
+
+void silk_insertion_sort_increasing_all_values_int16(
+     opus_int16                 *a,                 /* I/O   Unsorted / Sorted vector                                   */
+     const opus_int             L                   /* I     Vector length                                              */
+);
+
+/* NLSF stabilizer, for a single input data vector */
+void silk_NLSF_stabilize(
+          opus_int16            *NLSF_Q15,          /* I/O   Unstable/stabilized normalized LSF vector in Q15 [L]       */
+    const opus_int16            *NDeltaMin_Q15,     /* I     Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1]   */
+    const opus_int              L                   /* I     Number of NLSF parameters in the input vector              */
+);
+
+/* Laroia low complexity NLSF weights */
+void silk_NLSF_VQ_weights_laroia(
+    opus_int16                  *pNLSFW_Q_OUT,      /* O     Pointer to input vector weights [D]                        */
+    const opus_int16            *pNLSF_Q15,         /* I     Pointer to input vector         [D]                        */
+    const opus_int              D                   /* I     Input vector dimension (even)                              */
+);
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
+);
+
+/* Copy and multiply a vector by a constant */
+void silk_scale_copy_vector16(
+    opus_int16                  *data_out,
+    const opus_int16            *data_in,
+    opus_int32                  gain_Q16,           /* I    Gain in Q16                                                 */
+    const opus_int              dataSize            /* I    Length                                                      */
+);
+
+/* Some for the LTP related function requires Q26 to work.*/
+void silk_scale_vector32_Q26_lshift_18(
+    opus_int32                  *data1,             /* I/O  Q0/Q18                                                      */
+    opus_int32                  gain_Q26,           /* I    Q26                                                         */
+    opus_int                    dataSize            /* I    length                                                      */
+);
+
+/********************************************************************/
+/*                        INLINE ARM MATH                           */
+/********************************************************************/
+
+/*    return sum( inVec1[i] * inVec2[i] ) */
+opus_int32 silk_inner_prod_aligned(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+);
+
+opus_int32 silk_inner_prod_aligned_scale(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              scale,              /*    I number of bits to shift                                     */
+    const opus_int              len                 /*    I vector lengths                                              */
+);
+
+opus_int64 silk_inner_prod16_aligned_64(
+    const opus_int16            *inVec1,            /*    I input vector 1                                              */
+    const opus_int16            *inVec2,            /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+);
+
+/********************************************************************/
+/*                                MACROS                            */
+/********************************************************************/
+
+/* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
+   left. Output is 32bit int.
+   Note: contemporary compilers recognize the C expression below and
+   compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
+static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
+{
+    opus_uint32 x = (opus_uint32) a32;
+    opus_uint32 r = (opus_uint32) rot;
+    opus_uint32 m = (opus_uint32) -rot;
+    if( rot == 0 ) {
+        return a32;
+    } else if( rot < 0 ) {
+        return (opus_int32) ((x << m) | (x >> (32 - m)));
+    } else {
+        return (opus_int32) ((x << (32 - r)) | (x >> r));
+    }
+}
+
+/* Allocate opus_int16 aligned to 4-byte memory address */
+#if EMBEDDED_ARM
+#define silk_DWORD_ALIGN __attribute__((aligned(4)))
+#else
+#define silk_DWORD_ALIGN
+#endif
+
+/* Useful Macros that can be adjusted to other platforms */
+#define silk_memcpy(dest, src, size)        memcpy((dest), (src), (size))
+#define silk_memset(dest, src, size)        memset((dest), (src), (size))
+#define silk_memmove(dest, src, size)       memmove((dest), (src), (size))
+
+/* Fixed point macros */
+
+/* (a32 * b32) output have to be 32bit int */
+#define silk_MUL(a32, b32)                  ((a32) * (b32))
+
+/* (a32 * b32) output have to be 32bit uint */
+#define silk_MUL_uint(a32, b32)             silk_MUL(a32, b32)
+
+/* a32 + (b32 * c32) output have to be 32bit int */
+#define silk_MLA(a32, b32, c32)             silk_ADD32((a32),((b32) * (c32)))
+
+/* a32 + (b32 * c32) output have to be 32bit uint */
+#define silk_MLA_uint(a32, b32, c32)        silk_MLA(a32, b32, c32)
+
+/* ((a32 >> 16)  * (b32 >> 16)) output have to be 32bit int */
+#define silk_SMULTT(a32, b32)               (((a32) >> 16) * ((b32) >> 16))
+
+/* a32 + ((a32 >> 16)  * (b32 >> 16)) output have to be 32bit int */
+#define silk_SMLATT(a32, b32, c32)          silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16))
+
+#define silk_SMLALBB(a64, b16, c16)         silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16)))
+
+/* (a32 * b32) */
+#define silk_SMULL(a32, b32)                ((opus_int64)(a32) * /*(opus_int64)*/(b32))
+
+/* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
+   (just standard two's complement implementation-specific behaviour) */
+#define silk_ADD32_ovflw(a, b)              ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b)))
+/* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
+   (just standard two's complement implementation-specific behaviour) */
+#define silk_SUB32_ovflw(a, b)              ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b)))
+
+/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
+#define silk_MLA_ovflw(a32, b32, c32)       silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32))
+#define silk_SMLABB_ovflw(a32, b32, c32)    (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
+
+#define silk_DIV32_16(a32, b16)             ((opus_int32)((a32) / (b16)))
+#define silk_DIV32(a32, b32)                ((opus_int32)((a32) / (b32)))
+
+/* These macros enables checking for overflow in silk_API_Debug.h*/
+#define silk_ADD16(a, b)                    ((a) + (b))
+#define silk_ADD32(a, b)                    ((a) + (b))
+#define silk_ADD64(a, b)                    ((a) + (b))
+
+#define silk_SUB16(a, b)                    ((a) - (b))
+#define silk_SUB32(a, b)                    ((a) - (b))
+#define silk_SUB64(a, b)                    ((a) - (b))
+
+#define silk_SAT8(a)                        ((a) > silk_int8_MAX ? silk_int8_MAX  :       \
+                                            ((a) < silk_int8_MIN ? silk_int8_MIN  : (a)))
+#define silk_SAT16(a)                       ((a) > silk_int16_MAX ? silk_int16_MAX :      \
+                                            ((a) < silk_int16_MIN ? silk_int16_MIN : (a)))
+#define silk_SAT32(a)                       ((a) > silk_int32_MAX ? silk_int32_MAX :      \
+                                            ((a) < silk_int32_MIN ? silk_int32_MIN : (a)))
+
+#define silk_CHECK_FIT8(a)                  (a)
+#define silk_CHECK_FIT16(a)                 (a)
+#define silk_CHECK_FIT32(a)                 (a)
+
+#define silk_ADD_SAT16(a, b)                (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) )
+#define silk_ADD_SAT64(a, b)                ((((a) + (b)) & 0x8000000000000000LL) == 0 ?                            \
+                                            ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \
+                                            ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) )
+
+#define silk_SUB_SAT16(a, b)                (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) )
+#define silk_SUB_SAT64(a, b)                ((((a)-(b)) & 0x8000000000000000LL) == 0 ?                                               \
+                                            (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \
+                                            ((((a)^0x8000000000000000LL) & (b)  & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) )
+
+/* Saturation for positive input values */
+#define silk_POS_SAT32(a)                   ((a) > silk_int32_MAX ? silk_int32_MAX : (a))
+
+/* Add with saturation for positive input values */
+#define silk_ADD_POS_SAT8(a, b)             ((((a)+(b)) & 0x80)                 ? silk_int8_MAX  : ((a)+(b)))
+#define silk_ADD_POS_SAT16(a, b)            ((((a)+(b)) & 0x8000)               ? silk_int16_MAX : ((a)+(b)))
+#define silk_ADD_POS_SAT32(a, b)            ((((a)+(b)) & 0x80000000)           ? silk_int32_MAX : ((a)+(b)))
+#define silk_ADD_POS_SAT64(a, b)            ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)))
+
+#define silk_LSHIFT8(a, shift)              ((opus_int8)((opus_uint8)(a)<<(shift)))         /* shift >= 0, shift < 8  */
+#define silk_LSHIFT16(a, shift)             ((opus_int16)((opus_uint16)(a)<<(shift)))       /* shift >= 0, shift < 16 */
+#define silk_LSHIFT32(a, shift)             ((opus_int32)((opus_uint32)(a)<<(shift)))       /* shift >= 0, shift < 32 */
+#define silk_LSHIFT64(a, shift)             ((opus_int64)((opus_uint64)(a)<<(shift)))       /* shift >= 0, shift < 64 */
+#define silk_LSHIFT(a, shift)               silk_LSHIFT32(a, shift)                         /* shift >= 0, shift < 32 */
+
+#define silk_RSHIFT8(a, shift)              ((a)>>(shift))                                  /* shift >= 0, shift < 8  */
+#define silk_RSHIFT16(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 16 */
+#define silk_RSHIFT32(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 32 */
+#define silk_RSHIFT64(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 64 */
+#define silk_RSHIFT(a, shift)               silk_RSHIFT32(a, shift)                         /* shift >= 0, shift < 32 */
+
+/* saturates before shifting */
+#define silk_LSHIFT_SAT32(a, shift)         (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \
+                                                    silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) ))
+
+#define silk_LSHIFT_ovflw(a, shift)         ((opus_int32)((opus_uint32)(a) << (shift)))     /* shift >= 0, allowed to overflow */
+#define silk_LSHIFT_uint(a, shift)          ((a) << (shift))                                /* shift >= 0 */
+#define silk_RSHIFT_uint(a, shift)          ((a) >> (shift))                                /* shift >= 0 */
+
+#define silk_ADD_LSHIFT(a, b, shift)        ((a) + silk_LSHIFT((b), (shift)))               /* shift >= 0 */
+#define silk_ADD_LSHIFT32(a, b, shift)      silk_ADD32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
+#define silk_ADD_LSHIFT_uint(a, b, shift)   ((a) + silk_LSHIFT_uint((b), (shift)))          /* shift >= 0 */
+#define silk_ADD_RSHIFT(a, b, shift)        ((a) + silk_RSHIFT((b), (shift)))               /* shift >= 0 */
+#define silk_ADD_RSHIFT32(a, b, shift)      silk_ADD32((a), silk_RSHIFT32((b), (shift)))    /* shift >= 0 */
+#define silk_ADD_RSHIFT_uint(a, b, shift)   ((a) + silk_RSHIFT_uint((b), (shift)))          /* shift >= 0 */
+#define silk_SUB_LSHIFT32(a, b, shift)      silk_SUB32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
+#define silk_SUB_RSHIFT32(a, b, shift)      silk_SUB32((a), silk_RSHIFT32((b), (shift)))    /* shift >= 0 */
+
+/* Requires that shift > 0 */
+#define silk_RSHIFT_ROUND(a, shift)         ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
+#define silk_RSHIFT_ROUND64(a, shift)       ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
+
+/* Number of rightshift required to fit the multiplication */
+#define silk_NSHIFT_MUL_32_32(a, b)         ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) )
+#define silk_NSHIFT_MUL_16_16(a, b)         ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) )
+
+
+#define silk_min(a, b)                      (((a) < (b)) ? (a) : (b))
+#define silk_max(a, b)                      (((a) > (b)) ? (a) : (b))
+
+/* Macro to convert floating-point constants to fixed-point */
+#define SILK_FIX_CONST( C, Q )              ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))
+
+/* silk_min() versions with typecast in the function call */
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
+{
+    return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+{
+    return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+{
+    return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+{
+    return (((a) < (b)) ? (a) : (b));
+}
+
+/* silk_min() versions with typecast in the function call */
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
+{
+    return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+{
+    return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+{
+    return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+{
+    return (((a) > (b)) ? (a) : (b));
+}
+
+#define silk_LIMIT( a, limit1, limit2)      ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+                                                                 : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
+
+#define silk_LIMIT_int                      silk_LIMIT
+#define silk_LIMIT_16                       silk_LIMIT
+#define silk_LIMIT_32                       silk_LIMIT
+
+#define silk_abs(a)                         (((a) >  0)  ? (a) : -(a))            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
+#define silk_abs_int(a)                     (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1)))
+#define silk_abs_int32(a)                   (((a) ^ ((a) >> 31)) - ((a) >> 31))
+#define silk_abs_int64(a)                   (((a) >  0)  ? (a) : -(a))
+
+#define silk_sign(a)                        ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ))
+
+/* PSEUDO-RANDOM GENERATOR                                                          */
+/* Make sure to store the result as the seed for the next call (also in between     */
+/* frames), otherwise result won't be random at all. When only using some of the    */
+/* bits, take the most significant bits by right-shifting.                          */
+#define silk_RAND(seed)                     (silk_MLA_ovflw(907633515, (seed), 196314165))
+
+/*  Add some multiplication functions that can be easily mapped to ARM. */
+
+/*    silk_SMMUL: Signed top word multiply.
+          ARMv6        2 instruction cycles.
+          ARMv3M+      3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/
+/*#define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
+/* the following seems faster on x86 */
+#define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
+
+#include "Inlines.h"
+#include "MacroCount.h"
+#include "MacroDebug.h"
+
+#ifdef OPUS_ARM_INLINE_ASM
+#include "arm/SigProc_FIX_armv4.h"
+#endif
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/SigProc_FIX_armv5e.h"
+#endif
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_SIGPROC_FIX_H */
diff --git a/src/main/jni/opus/silk/VAD.c b/src/main/jni/opus/silk/VAD.c
new file mode 100644
index 000000000..a80909814
--- /dev/null
+++ b/src/main/jni/opus/silk/VAD.c
@@ -0,0 +1,357 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+/* Silk VAD noise level estimation */
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
+    const opus_int32             pX[ VAD_N_BANDS ], /* I    subband energies                            */
+    silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
+);
+
+/**********************************/
+/* Initialization of the Silk VAD */
+/**********************************/
+opus_int silk_VAD_Init(                                         /* O    Return value, 0 if success                  */
+    silk_VAD_state              *psSilk_VAD                     /* I/O  Pointer to Silk VAD state                   */
+)
+{
+    opus_int b, ret = 0;
+
+    /* reset state memory */
+    silk_memset( psSilk_VAD, 0, sizeof( silk_VAD_state ) );
+
+    /* init noise levels */
+    /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        psSilk_VAD->NoiseLevelBias[ b ] = silk_max_32( silk_DIV32_16( VAD_NOISE_LEVELS_BIAS, b + 1 ), 1 );
+    }
+
+    /* Initialize state */
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        psSilk_VAD->NL[ b ]     = silk_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] );
+        psSilk_VAD->inv_NL[ b ] = silk_DIV32( silk_int32_MAX, psSilk_VAD->NL[ b ] );
+    }
+    psSilk_VAD->counter = 15;
+
+    /* init smoothed energy-to-noise ratio*/
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256;       /* 100 * 256 --> 20 dB SNR */
+    }
+
+    return( ret );
+}
+
+/* Weighting factors for tilt measure */
+static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 };
+
+/***************************************/
+/* Get the speech activity level in Q8 */
+/***************************************/
+opus_int silk_VAD_GetSA_Q8(                                     /* O    Return value, 0 if success                  */
+    silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
+    const opus_int16            pIn[]                           /* I    PCM input                                   */
+)
+{
+    opus_int   SA_Q15, pSNR_dB_Q7, input_tilt;
+    opus_int   decimated_framelength1, decimated_framelength2;
+    opus_int   decimated_framelength;
+    opus_int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
+    opus_int32 sumSquared, smooth_coef_Q16;
+    opus_int16 HPstateTmp;
+    VARDECL( opus_int16, X );
+    opus_int32 Xnrg[ VAD_N_BANDS ];
+    opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
+    opus_int32 speech_nrg, x_tmp;
+    opus_int   X_offset[ VAD_N_BANDS ];
+    opus_int   ret = 0;
+    silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
+    SAVE_STACK;
+
+    /* Safety checks */
+    silk_assert( VAD_N_BANDS == 4 );
+    silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
+    silk_assert( psEncC->frame_length <= 512 );
+    silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
+
+    /***********************/
+    /* Filter and Decimate */
+    /***********************/
+    decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
+    decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
+    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
+    /* Decimate into 4 bands:
+       0       L      3L       L              3L                             5L
+               -      --       -              --                             --
+               8       8       2               4                              4
+
+       [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |
+
+       They're arranged to allow the minimal ( frame_length / 4 ) extra
+       scratch space during the downsampling process */
+    X_offset[ 0 ] = 0;
+    X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
+    X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
+    X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
+    ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
+
+    /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
+    silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[  0 ],
+        X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
+
+    /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
+        X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
+
+    /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
+        X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
+
+    /*********************************************/
+    /* HP filter on lowest band (differentiator) */
+    /*********************************************/
+    X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
+    HPstateTmp = X[ decimated_framelength - 1 ];
+    for( i = decimated_framelength - 1; i > 0; i-- ) {
+        X[ i - 1 ]  = silk_RSHIFT( X[ i - 1 ], 1 );
+        X[ i ]     -= X[ i - 1 ];
+    }
+    X[ 0 ] -= psSilk_VAD->HPstate;
+    psSilk_VAD->HPstate = HPstateTmp;
+
+    /*************************************/
+    /* Calculate the energy in each band */
+    /*************************************/
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* Find the decimated framelength in the non-uniformly divided bands */
+        decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );
+
+        /* Split length into subframe lengths */
+        dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 );
+        dec_subframe_offset = 0;
+
+        /* Compute energy per sub-frame */
+        /* initialize with summed energy of last subframe */
+        Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];
+        for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {
+            sumSquared = 0;
+            for( i = 0; i < dec_subframe_length; i++ ) {
+                /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2.            */
+                /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */
+                x_tmp = silk_RSHIFT(
+                    X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
+                sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
+
+                /* Safety check */
+                silk_assert( sumSquared >= 0 );
+            }
+
+            /* Add/saturate summed energy of current subframe */
+            if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {
+                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );
+            } else {
+                /* Look-ahead subframe */
+                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) );
+            }
+
+            dec_subframe_offset += dec_subframe_length;
+        }
+        psSilk_VAD->XnrgSubfr[ b ] = sumSquared;
+    }
+
+    /********************/
+    /* Noise estimation */
+    /********************/
+    silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );
+
+    /***********************************************/
+    /* Signal-plus-noise to noise ratio estimation */
+    /***********************************************/
+    sumSquared = 0;
+    input_tilt = 0;
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];
+        if( speech_nrg > 0 ) {
+            /* Divide, with sufficient resolution */
+            if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {
+                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );
+            } else {
+                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 );
+            }
+
+            /* Convert to log domain */
+            SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;
+
+            /* Sum-of-squares */
+            sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 );          /* Q14 */
+
+            /* Tilt measure */
+            if( speech_nrg < ( (opus_int32)1 << 20 ) ) {
+                /* Scale down SNR value for small subband speech energies */
+                SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );
+            }
+            input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );
+        } else {
+            NrgToNoiseRatio_Q8[ b ] = 256;
+        }
+    }
+
+    /* Mean-of-squares */
+    sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */
+
+    /* Root-mean-square approximation, scale to dBs, and write to output pointer */
+    pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */
+
+    /*********************************/
+    /* Speech Probability Estimation */
+    /*********************************/
+    SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );
+
+    /**************************/
+    /* Frequency Tilt Measure */
+    /**************************/
+    psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );
+
+    /**************************************************/
+    /* Scale the sigmoid output based on power levels */
+    /**************************************************/
+    speech_nrg = 0;
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
+        speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
+    }
+
+    /* Power scaling */
+    if( speech_nrg <= 0 ) {
+        SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
+    } else if( speech_nrg < 32768 ) {
+        if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 );
+        } else {
+            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 );
+        }
+
+        /* square-root */
+        speech_nrg = silk_SQRT_APPROX( speech_nrg );
+        SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
+    }
+
+    /* Copy the resulting speech activity in Q8 */
+    psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
+
+    /***********************************/
+    /* Energy Level and SNR estimation */
+    /***********************************/
+    /* Smoothing coefficient */
+    smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) );
+
+    if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+        smooth_coef_Q16 >>= 1;
+    }
+
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* compute smoothed energy-to-noise ratio per band */
+        psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ],
+            NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 );
+
+        /* signal to noise ratio in dB per band */
+        SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );
+        /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
+        psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) );
+    }
+
+    RESTORE_STACK;
+    return( ret );
+}
+
+/**************************/
+/* Noise level estimation */
+/**************************/
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
+    const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
+    silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
+)
+{
+    opus_int   k;
+    opus_int32 nl, nrg, inv_nrg;
+    opus_int   coef, min_coef;
+
+    /* Initially faster smoothing */
+    if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */
+        min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 );
+    } else {
+        min_coef = 0;
+    }
+
+    for( k = 0; k < VAD_N_BANDS; k++ ) {
+        /* Get old noise level estimate for current band */
+        nl = psSilk_VAD->NL[ k ];
+        silk_assert( nl >= 0 );
+
+        /* Add bias */
+        nrg = silk_ADD_POS_SAT32( pX[ k ], psSilk_VAD->NoiseLevelBias[ k ] );
+        silk_assert( nrg > 0 );
+
+        /* Invert energies */
+        inv_nrg = silk_DIV32( silk_int32_MAX, nrg );
+        silk_assert( inv_nrg >= 0 );
+
+        /* Less update when subband energy is high */
+        if( nrg > silk_LSHIFT( nl, 3 ) ) {
+            coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3;
+        } else if( nrg < nl ) {
+            coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16;
+        } else {
+            coef = silk_SMULWB( silk_SMULWW( inv_nrg, nl ), VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 << 1 );
+        }
+
+        /* Initially faster smoothing */
+        coef = silk_max_int( coef, min_coef );
+
+        /* Smooth inverse energies */
+        psSilk_VAD->inv_NL[ k ] = silk_SMLAWB( psSilk_VAD->inv_NL[ k ], inv_nrg - psSilk_VAD->inv_NL[ k ], coef );
+        silk_assert( psSilk_VAD->inv_NL[ k ] >= 0 );
+
+        /* Compute noise level by inverting again */
+        nl = silk_DIV32( silk_int32_MAX, psSilk_VAD->inv_NL[ k ] );
+        silk_assert( nl >= 0 );
+
+        /* Limit noise levels (guarantee 7 bits of head room) */
+        nl = silk_min( nl, 0x00FFFFFF );
+
+        /* Store as part of state */
+        psSilk_VAD->NL[ k ] = nl;
+    }
+
+    /* Increment frame counter */
+    psSilk_VAD->counter++;
+}
diff --git a/src/main/jni/opus/silk/VQ_WMat_EC.c b/src/main/jni/opus/silk/VQ_WMat_EC.c
new file mode 100644
index 000000000..13d5d34ed
--- /dev/null
+++ b/src/main/jni/opus/silk/VQ_WMat_EC.c
@@ -0,0 +1,120 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
+void silk_VQ_WMat_EC(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+)
+{
+    opus_int   k, gain_tmp_Q7;
+    const opus_int8 *cb_row_Q7;
+    opus_int16 diff_Q14[ 5 ];
+    opus_int32 sum1_Q14, sum2_Q16;
+
+    /* Loop over codebook */
+    *rate_dist_Q14 = silk_int32_MAX;
+    cb_row_Q7 = cb_Q7;
+    for( k = 0; k < L; k++ ) {
+	    gain_tmp_Q7 = cb_gain_Q7[k];
+
+        diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
+        diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
+        diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );
+        diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 );
+        diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 );
+
+        /* Weighted rate */
+        sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
+
+		/* Penalty for too large gain */
+		sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
+
+        silk_assert( sum1_Q14 >= 0 );
+
+        /* first row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[  1 ], diff_Q14[ 1 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  2 ], diff_Q14[ 2 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  3 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  4 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  0 ], diff_Q14[ 0 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 0 ] );
+
+        /* second row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[  7 ], diff_Q14[ 2 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  8 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  9 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  6 ], diff_Q14[ 1 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 1 ] );
+
+        /* third row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 13 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 2 ] );
+
+        /* fourth row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 19 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 3 ] );
+
+        /* last row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 24 ], diff_Q14[ 4 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 4 ] );
+
+        silk_assert( sum1_Q14 >= 0 );
+
+        /* find best */
+        if( sum1_Q14 < *rate_dist_Q14 ) {
+            *rate_dist_Q14 = sum1_Q14;
+            *ind = (opus_int8)k;
+			*gain_Q7 = gain_tmp_Q7;
+        }
+
+        /* Go to next cbk vector */
+        cb_row_Q7 += LTP_ORDER;
+    }
+}
diff --git a/src/main/jni/opus/silk/ana_filt_bank_1.c b/src/main/jni/opus/silk/ana_filt_bank_1.c
new file mode 100644
index 000000000..24cfb03fd
--- /dev/null
+++ b/src/main/jni/opus/silk/ana_filt_bank_1.c
@@ -0,0 +1,74 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Coefficients for 2-band filter bank based on first-order allpass filters */
+static opus_int16 A_fb1_20 = 5394 << 1;
+static opus_int16 A_fb1_21 = -24290; /* (opus_int16)(20623 << 1) */
+
+/* Split signal into two decimated bands using first-order allpass filters */
+void silk_ana_filt_bank_1(
+    const opus_int16            *in,                /* I    Input signal [N]                                            */
+    opus_int32                  *S,                 /* I/O  State vector [2]                                            */
+    opus_int16                  *outL,              /* O    Low band [N/2]                                              */
+    opus_int16                  *outH,              /* O    High band [N/2]                                             */
+    const opus_int32            N                   /* I    Number of input samples                                     */
+)
+{
+    opus_int      k, N2 = silk_RSHIFT( N, 1 );
+    opus_int32    in32, X, Y, out_1, out_2;
+
+    /* Internal variables and state are in Q10 format */
+    for( k = 0; k < N2; k++ ) {
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 );
+
+        /* All-pass section for even input sample */
+        Y      = silk_SUB32( in32, S[ 0 ] );
+        X      = silk_SMLAWB( Y, Y, A_fb1_21 );
+        out_1  = silk_ADD32( S[ 0 ], X );
+        S[ 0 ] = silk_ADD32( in32, X );
+
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 );
+
+        /* All-pass section for odd input sample, and add to output of previous section */
+        Y      = silk_SUB32( in32, S[ 1 ] );
+        X      = silk_SMULWB( Y, A_fb1_20 );
+        out_2  = silk_ADD32( S[ 1 ], X );
+        S[ 1 ] = silk_ADD32( in32, X );
+
+        /* Add/subtract, convert back to int16 and store to output */
+        outL[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_ADD32( out_2, out_1 ), 11 ) );
+        outH[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SUB32( out_2, out_1 ), 11 ) );
+    }
+}
diff --git a/src/main/jni/opus/silk/arm/SigProc_FIX_armv4.h b/src/main/jni/opus/silk/arm/SigProc_FIX_armv4.h
new file mode 100644
index 000000000..ff62b1e5d
--- /dev/null
+++ b/src/main/jni/opus/silk/arm/SigProc_FIX_armv4.h
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (C) 2013 Xiph.Org Foundation and contributors
+Copyright (c) 2013       Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_ARMv4_H
+#define SILK_SIGPROC_FIX_ARMv4_H
+
+#undef silk_MLA
+static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  opus_int32 res;
+  __asm__(
+      "#silk_MLA\n\t"
+      "mla %0, %1, %2, %3\n\t"
+      : "=&r"(res)
+      : "r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c))
+
+#endif
diff --git a/src/main/jni/opus/silk/arm/SigProc_FIX_armv5e.h b/src/main/jni/opus/silk/arm/SigProc_FIX_armv5e.h
new file mode 100644
index 000000000..617a09cab
--- /dev/null
+++ b/src/main/jni/opus/silk/arm/SigProc_FIX_armv5e.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (c) 2013       Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_ARMv5E_H
+#define SILK_SIGPROC_FIX_ARMv5E_H
+
+#undef silk_SMULTT
+static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
+{
+  opus_int32 res;
+  __asm__(
+      "#silk_SMULTT\n\t"
+      "smultt %0, %1, %2\n\t"
+      : "=r"(res)
+      : "%r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b))
+
+#undef silk_SMLATT
+static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  opus_int32 res;
+  __asm__(
+      "#silk_SMLATT\n\t"
+      "smlatt %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "%r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c))
+
+#endif
diff --git a/src/main/jni/opus/silk/arm/macros_armv4.h b/src/main/jni/opus/silk/arm/macros_armv4.h
new file mode 100644
index 000000000..3f30e9728
--- /dev/null
+++ b/src/main/jni/opus/silk/arm/macros_armv4.h
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (C) 2013 Xiph.Org Foundation and contributors.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARMv4_H
+#define SILK_MACROS_ARMv4_H
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#silk_SMULWB\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(a), "r"(b<<16)
+  );
+  return rd_hi;
+}
+#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#silk_SMULWT\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(a), "r"(b&~0xFFFF)
+  );
+  return rd_hi;
+}
+#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#undef silk_SMLAWT
+#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c))
+
+/* (a32 * b32) >> 16 */
+#undef silk_SMULWW
+static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+    "#silk_SMULWW\n\t"
+    "smull %0, %1, %2, %3\n\t"
+    : "=&r"(rd_lo), "=&r"(rd_hi)
+    : "%r"(a), "r"(b)
+  );
+  return (rd_hi<<16)+(rd_lo>>16);
+}
+#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
+
+#undef silk_SMLAWW
+static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+    "#silk_SMLAWW\n\t"
+    "smull %0, %1, %2, %3\n\t"
+    : "=&r"(rd_lo), "=&r"(rd_hi)
+    : "%r"(b), "r"(c)
+  );
+  return a+(rd_hi<<16)+(rd_lo>>16);
+}
+#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
+
+#endif /* SILK_MACROS_ARMv4_H */
diff --git a/src/main/jni/opus/silk/arm/macros_armv5e.h b/src/main/jni/opus/silk/arm/macros_armv5e.h
new file mode 100644
index 000000000..aad4117e4
--- /dev/null
+++ b/src/main/jni/opus/silk/arm/macros_armv5e.h
@@ -0,0 +1,213 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (c) 2013       Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARMv5E_H
+#define SILK_MACROS_ARMv5E_H
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
+{
+  int res;
+  __asm__(
+      "#silk_SMULWB\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
+ opus_int16 c)
+{
+  int res;
+  __asm__(
+      "#silk_SMLAWB\n\t"
+      "smlawb %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_SMULWT\n\t"
+      "smulwt %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#undef silk_SMLAWT
+static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  int res;
+  __asm__(
+      "#silk_SMLAWT\n\t"
+      "smlawt %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c))
+
+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
+#undef silk_SMULBB
+static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_SMULBB\n\t"
+      "smulbb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "%r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
+#undef silk_SMLABB
+static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  int res;
+  __asm__(
+      "#silk_SMLABB\n\t"
+      "smlabb %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "%r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c))
+
+/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
+#undef silk_SMULBT
+static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_SMULBT\n\t"
+      "smulbt %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
+#undef silk_SMLABT
+static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  int res;
+  __asm__(
+      "#silk_SMLABT\n\t"
+      "smlabt %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c))
+
+/* add/subtract with output saturated */
+#undef silk_ADD_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_ADD_SAT32\n\t"
+      "qadd %0, %1, %2\n\t"
+      : "=r"(res)
+      : "%r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))
+
+#undef silk_SUB_SAT32
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_SUB_SAT32\n\t"
+      "qsub %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))
+
+#undef silk_CLZ16
+static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16)
+{
+  int res;
+  __asm__(
+      "#silk_CLZ16\n\t"
+      "clz %0, %1;\n"
+      : "=r"(res)
+      : "r"(in16<<16|0x8000)
+  );
+  return res;
+}
+#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))
+
+#undef silk_CLZ32
+static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32)
+{
+  int res;
+  __asm__(
+      "#silk_CLZ32\n\t"
+      "clz %0, %1\n\t"
+      : "=r"(res)
+      : "r"(in32)
+  );
+  return res;
+}
+#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32))
+
+#endif /* SILK_MACROS_ARMv5E_H */
diff --git a/src/main/jni/opus/silk/biquad_alt.c b/src/main/jni/opus/silk/biquad_alt.c
new file mode 100644
index 000000000..d55f5ee92
--- /dev/null
+++ b/src/main/jni/opus/silk/biquad_alt.c
@@ -0,0 +1,78 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/*                                                                      *
+ * silk_biquad_alt.c                                              *
+ *                                                                      *
+ * Second order ARMA filter                                             *
+ * Can handle slowly varying filter coefficients                        *
+ *                                                                      */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Second order ARMA filter, alternative implementation */
+void silk_biquad_alt(
+    const opus_int16            *in,                /* I     input signal                                               */
+    const opus_int32            *B_Q28,             /* I     MA coefficients [3]                                        */
+    const opus_int32            *A_Q28,             /* I     AR coefficients [2]                                        */
+    opus_int32                  *S,                 /* I/O   State vector [2]                                           */
+    opus_int16                  *out,               /* O     output signal                                              */
+    const opus_int32            len,                /* I     signal length (must be even)                               */
+    opus_int                    stride              /* I     Operate on interleaved signal if > 1                       */
+)
+{
+    /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
+    opus_int   k;
+    opus_int32 inval, A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14;
+
+    /* Negate A_Q28 values and split in two parts */
+    A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF;        /* lower part */
+    A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 );      /* upper part */
+    A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF;        /* lower part */
+    A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 );      /* upper part */
+
+    for( k = 0; k < len; k++ ) {
+        /* S[ 0 ], S[ 1 ]: Q12 */
+        inval = in[ k * stride ];
+        out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 );
+
+        S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 );
+        S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14, A0_U_Q28 );
+        S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], inval);
+
+        S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A1_L_Q28 ), 14 );
+        S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14, A1_U_Q28 );
+        S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval );
+
+        /* Scale back to Q0 and saturate */
+        out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) );
+    }
+}
diff --git a/src/main/jni/opus/silk/bwexpander.c b/src/main/jni/opus/silk/bwexpander.c
new file mode 100644
index 000000000..2eb445669
--- /dev/null
+++ b/src/main/jni/opus/silk/bwexpander.c
@@ -0,0 +1,51 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander(
+    opus_int16                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int              d,                  /* I    Length of ar                                                */
+    opus_int32                  chirp_Q16           /* I    Chirp factor (typically in the range 0 to 1)                */
+)
+{
+    opus_int   i;
+    opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536;
+
+    /* NB: Dont use silk_SMULWB, instead of silk_RSHIFT_ROUND( silk_MUL(), 16 ), below.  */
+    /* Bias in silk_SMULWB can lead to unstable filters                                */
+    for( i = 0; i < d - 1; i++ ) {
+        ar[ i ]    = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ i ]             ), 16 );
+        chirp_Q16 +=            silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 );
+    }
+    ar[ d - 1 ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ d - 1 ] ), 16 );
+}
diff --git a/src/main/jni/opus/silk/bwexpander_32.c b/src/main/jni/opus/silk/bwexpander_32.c
new file mode 100644
index 000000000..d0010f73d
--- /dev/null
+++ b/src/main/jni/opus/silk/bwexpander_32.c
@@ -0,0 +1,50 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander_32(
+    opus_int32                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int              d,                  /* I    Length of ar                                                */
+    opus_int32                  chirp_Q16           /* I    Chirp factor in Q16                                         */
+)
+{
+    opus_int   i;
+    opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536;
+
+    for( i = 0; i < d - 1; i++ ) {
+        ar[ i ]    = silk_SMULWW( chirp_Q16, ar[ i ] );
+        chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 );
+    }
+    ar[ d - 1 ] = silk_SMULWW( chirp_Q16, ar[ d - 1 ] );
+}
+
diff --git a/src/main/jni/opus/silk/check_control_input.c b/src/main/jni/opus/silk/check_control_input.c
new file mode 100644
index 000000000..b5de9ce48
--- /dev/null
+++ b/src/main/jni/opus/silk/check_control_input.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "control.h"
+#include "errors.h"
+
+/* Check encoder control struct */
+opus_int check_control_input(
+    silk_EncControlStruct        *encControl                    /* I    Control structure                           */
+)
+{
+    silk_assert( encControl != NULL );
+
+    if( ( ( encControl->API_sampleRate            !=  8000 ) &&
+          ( encControl->API_sampleRate            != 12000 ) &&
+          ( encControl->API_sampleRate            != 16000 ) &&
+          ( encControl->API_sampleRate            != 24000 ) &&
+          ( encControl->API_sampleRate            != 32000 ) &&
+          ( encControl->API_sampleRate            != 44100 ) &&
+          ( encControl->API_sampleRate            != 48000 ) ) ||
+        ( ( encControl->desiredInternalSampleRate !=  8000 ) &&
+          ( encControl->desiredInternalSampleRate != 12000 ) &&
+          ( encControl->desiredInternalSampleRate != 16000 ) ) ||
+        ( ( encControl->maxInternalSampleRate     !=  8000 ) &&
+          ( encControl->maxInternalSampleRate     != 12000 ) &&
+          ( encControl->maxInternalSampleRate     != 16000 ) ) ||
+        ( ( encControl->minInternalSampleRate     !=  8000 ) &&
+          ( encControl->minInternalSampleRate     != 12000 ) &&
+          ( encControl->minInternalSampleRate     != 16000 ) ) ||
+          ( encControl->minInternalSampleRate > encControl->desiredInternalSampleRate ) ||
+          ( encControl->maxInternalSampleRate < encControl->desiredInternalSampleRate ) ||
+          ( encControl->minInternalSampleRate > encControl->maxInternalSampleRate ) ) {
+        silk_assert( 0 );
+        return SILK_ENC_FS_NOT_SUPPORTED;
+    }
+    if( encControl->payloadSize_ms != 10 &&
+        encControl->payloadSize_ms != 20 &&
+        encControl->payloadSize_ms != 40 &&
+        encControl->payloadSize_ms != 60 ) {
+        silk_assert( 0 );
+        return SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
+    }
+    if( encControl->packetLossPercentage < 0 || encControl->packetLossPercentage > 100 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_LOSS_RATE;
+    }
+    if( encControl->useDTX < 0 || encControl->useDTX > 1 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_DTX_SETTING;
+    }
+    if( encControl->useCBR < 0 || encControl->useCBR > 1 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_CBR_SETTING;
+    }
+    if( encControl->useInBandFEC < 0 || encControl->useInBandFEC > 1 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_INBAND_FEC_SETTING;
+    }
+    if( encControl->nChannelsAPI < 1 || encControl->nChannelsAPI > ENCODER_NUM_CHANNELS ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+    }
+    if( encControl->nChannelsInternal < 1 || encControl->nChannelsInternal > ENCODER_NUM_CHANNELS ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+    }
+    if( encControl->nChannelsInternal > encControl->nChannelsAPI ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+    }
+    if( encControl->complexity < 0 || encControl->complexity > 10 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_COMPLEXITY_SETTING;
+    }
+
+    return SILK_NO_ERROR;
+}
diff --git a/src/main/jni/opus/silk/code_signs.c b/src/main/jni/opus/silk/code_signs.c
new file mode 100644
index 000000000..0419ea262
--- /dev/null
+++ b/src/main/jni/opus/silk/code_signs.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/*#define silk_enc_map(a)                ((a) > 0 ? 1 : 0)*/
+/*#define silk_dec_map(a)                ((a) > 0 ? 1 : -1)*/
+/* shifting avoids if-statement */
+#define silk_enc_map(a)                  ( silk_RSHIFT( (a), 15 ) + 1 )
+#define silk_dec_map(a)                  ( silk_LSHIFT( (a),  1 ) - 1 )
+
+/* Encodes signs of excitation */
+void silk_encode_signs(
+    ec_enc                      *psRangeEnc,                        /* I/O  Compressor data structure                   */
+    const opus_int8             pulses[],                           /* I    pulse signal                                */
+    opus_int                    length,                             /* I    length of input                             */
+    const opus_int              signalType,                         /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
+    const opus_int              sum_pulses[ MAX_NB_SHELL_BLOCKS ]   /* I    Sum of absolute pulses per block            */
+)
+{
+    opus_int         i, j, p;
+    opus_uint8       icdf[ 2 ];
+    const opus_int8  *q_ptr;
+    const opus_uint8 *icdf_ptr;
+
+    icdf[ 1 ] = 0;
+    q_ptr = pulses;
+    i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) );
+    icdf_ptr = &silk_sign_iCDF[ i ];
+    length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH );
+    for( i = 0; i < length; i++ ) {
+        p = sum_pulses[ i ];
+        if( p > 0 ) {
+            icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ];
+            for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) {
+                if( q_ptr[ j ] != 0 ) {
+                    ec_enc_icdf( psRangeEnc, silk_enc_map( q_ptr[ j ]), icdf, 8 );
+                }
+            }
+        }
+        q_ptr += SHELL_CODEC_FRAME_LENGTH;
+    }
+}
+
+/* Decodes signs of excitation */
+void silk_decode_signs(
+    ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
+    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int                    length,                             /* I    length of input                             */
+    const opus_int              signalType,                         /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
+    const opus_int              sum_pulses[ MAX_NB_SHELL_BLOCKS ]   /* I    Sum of absolute pulses per block            */
+)
+{
+    opus_int         i, j, p;
+    opus_uint8       icdf[ 2 ];
+    opus_int         *q_ptr;
+    const opus_uint8 *icdf_ptr;
+
+    icdf[ 1 ] = 0;
+    q_ptr = pulses;
+    i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) );
+    icdf_ptr = &silk_sign_iCDF[ i ];
+    length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH );
+    for( i = 0; i < length; i++ ) {
+        p = sum_pulses[ i ];
+        if( p > 0 ) {
+            icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ];
+            for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) {
+                if( q_ptr[ j ] > 0 ) {
+                    /* attach sign */
+#if 0
+                    /* conditional implementation */
+                    if( ec_dec_icdf( psRangeDec, icdf, 8 ) == 0 ) {
+                        q_ptr[ j ] = -q_ptr[ j ];
+                    }
+#else
+                    /* implementation with shift, subtraction, multiplication */
+                    q_ptr[ j ] *= silk_dec_map( ec_dec_icdf( psRangeDec, icdf, 8 ) );
+#endif
+                }
+            }
+        }
+        q_ptr += SHELL_CODEC_FRAME_LENGTH;
+    }
+}
diff --git a/src/main/jni/opus/silk/control.h b/src/main/jni/opus/silk/control.h
new file mode 100644
index 000000000..747e5426a
--- /dev/null
+++ b/src/main/jni/opus/silk/control.h
@@ -0,0 +1,142 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_CONTROL_H
+#define SILK_CONTROL_H
+
+#include "typedef.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Decoder API flags */
+#define FLAG_DECODE_NORMAL                      0
+#define FLAG_PACKET_LOST                        1
+#define FLAG_DECODE_LBRR                        2
+
+/***********************************************/
+/* Structure for controlling encoder operation */
+/***********************************************/
+typedef struct {
+    /* I:   Number of channels; 1/2                                                         */
+    opus_int32 nChannelsAPI;
+
+    /* I:   Number of channels; 1/2                                                         */
+    opus_int32 nChannelsInternal;
+
+    /* I:   Input signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000   */
+    opus_int32 API_sampleRate;
+
+    /* I:   Maximum internal sampling rate in Hertz; 8000/12000/16000                       */
+    opus_int32 maxInternalSampleRate;
+
+    /* I:   Minimum internal sampling rate in Hertz; 8000/12000/16000                       */
+    opus_int32 minInternalSampleRate;
+
+    /* I:   Soft request for internal sampling rate in Hertz; 8000/12000/16000              */
+    opus_int32 desiredInternalSampleRate;
+
+    /* I:   Number of samples per packet in milliseconds; 10/20/40/60                       */
+    opus_int payloadSize_ms;
+
+    /* I:   Bitrate during active speech in bits/second; internally limited                 */
+    opus_int32 bitRate;
+
+    /* I:   Uplink packet loss in percent (0-100)                                           */
+    opus_int packetLossPercentage;
+
+    /* I:   Complexity mode; 0 is lowest, 10 is highest complexity                          */
+    opus_int complexity;
+
+    /* I:   Flag to enable in-band Forward Error Correction (FEC); 0/1                      */
+    opus_int useInBandFEC;
+
+    /* I:   Flag to enable discontinuous transmission (DTX); 0/1                            */
+    opus_int useDTX;
+
+    /* I:   Flag to use constant bitrate                                                    */
+    opus_int useCBR;
+
+    /* I:   Maximum number of bits allowed for the frame                                    */
+    opus_int maxBits;
+
+    /* I:   Causes a smooth downmix to mono                                                 */
+    opus_int toMono;
+
+    /* I:   Opus encoder is allowing us to switch bandwidth                                 */
+    opus_int opusCanSwitch;
+
+    /* I: Make frames as independent as possible (but still use LPC)                        */
+    opus_int reducedDependency;
+
+    /* O:   Internal sampling rate used, in Hertz; 8000/12000/16000                         */
+    opus_int32 internalSampleRate;
+
+    /* O: Flag that bandwidth switching is allowed (because low voice activity)             */
+    opus_int allowBandwidthSwitch;
+
+    /* O:   Flag that SILK runs in WB mode without variable LP filter (use for switching between WB/SWB/FB) */
+    opus_int inWBmodeWithoutVariableLP;
+
+    /* O:   Stereo width */
+    opus_int stereoWidth_Q14;
+
+    /* O:   Tells the Opus encoder we're ready to switch                                    */
+    opus_int switchReady;
+
+} silk_EncControlStruct;
+
+/**************************************************************************/
+/* Structure for controlling decoder operation and reading decoder status */
+/**************************************************************************/
+typedef struct {
+    /* I:   Number of channels; 1/2                                                         */
+    opus_int32 nChannelsAPI;
+
+    /* I:   Number of channels; 1/2                                                         */
+    opus_int32 nChannelsInternal;
+
+    /* I:   Output signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000  */
+    opus_int32 API_sampleRate;
+
+    /* I:   Internal sampling rate used, in Hertz; 8000/12000/16000                         */
+    opus_int32 internalSampleRate;
+
+    /* I:   Number of samples per packet in milliseconds; 10/20/40/60                       */
+    opus_int payloadSize_ms;
+
+    /* O:   Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz      */
+    opus_int prevPitchLag;
+} silk_DecControlStruct;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/control_SNR.c b/src/main/jni/opus/silk/control_SNR.c
new file mode 100644
index 000000000..f04e69fce
--- /dev/null
+++ b/src/main/jni/opus/silk/control_SNR.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "tuning_parameters.h"
+
+/* Control SNR of redidual quantizer */
+opus_int silk_control_SNR(
+    silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
+    opus_int32                  TargetRate_bps                  /* I    Target max bitrate (bps)                    */
+)
+{
+    opus_int k, ret = SILK_NO_ERROR;
+    opus_int32 frac_Q6;
+    const opus_int32 *rateTable;
+
+    /* Set bitrate/coding quality */
+    TargetRate_bps = silk_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS );
+    if( TargetRate_bps != psEncC->TargetRate_bps ) {
+        psEncC->TargetRate_bps = TargetRate_bps;
+
+        /* If new TargetRate_bps, translate to SNR_dB value */
+        if( psEncC->fs_kHz == 8 ) {
+            rateTable = silk_TargetRate_table_NB;
+        } else if( psEncC->fs_kHz == 12 ) {
+            rateTable = silk_TargetRate_table_MB;
+        } else {
+            rateTable = silk_TargetRate_table_WB;
+        }
+
+        /* Reduce bitrate for 10 ms modes in these calculations */
+        if( psEncC->nb_subfr == 2 ) {
+            TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS;
+        }
+
+        /* Find bitrate interval in table and interpolate */
+        for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) {
+            if( TargetRate_bps <= rateTable[ k ] ) {
+                frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ),
+                                                 rateTable[ k ] - rateTable[ k - 1 ] );
+                psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] );
+                break;
+            }
+        }
+
+        /* Reduce coding quality whenever LBRR is enabled, to free up some bits */
+        if( psEncC->LBRR_enabled ) {
+            psEncC->SNR_dB_Q7 = silk_SMLABB( psEncC->SNR_dB_Q7, 12 - psEncC->LBRR_GainIncreases, SILK_FIX_CONST( -0.25, 7 ) );
+        }
+    }
+
+    return ret;
+}
diff --git a/src/main/jni/opus/silk/control_audio_bandwidth.c b/src/main/jni/opus/silk/control_audio_bandwidth.c
new file mode 100644
index 000000000..4f9bc5cbd
--- /dev/null
+++ b/src/main/jni/opus/silk/control_audio_bandwidth.c
@@ -0,0 +1,126 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "tuning_parameters.h"
+
+/* Control internal sampling rate */
+opus_int silk_control_audio_bandwidth(
+    silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
+    silk_EncControlStruct       *encControl                     /* I    Control structure                           */
+)
+{
+    opus_int   fs_kHz;
+    opus_int32 fs_Hz;
+
+    fs_kHz = psEncC->fs_kHz;
+    fs_Hz = silk_SMULBB( fs_kHz, 1000 );
+    if( fs_Hz == 0 ) {
+        /* Encoder has just been initialized */
+        fs_Hz  = silk_min( psEncC->desiredInternal_fs_Hz, psEncC->API_fs_Hz );
+        fs_kHz = silk_DIV32_16( fs_Hz, 1000 );
+    } else if( fs_Hz > psEncC->API_fs_Hz || fs_Hz > psEncC->maxInternal_fs_Hz || fs_Hz < psEncC->minInternal_fs_Hz ) {
+        /* Make sure internal rate is not higher than external rate or maximum allowed, or lower than minimum allowed */
+        fs_Hz  = psEncC->API_fs_Hz;
+        fs_Hz  = silk_min( fs_Hz, psEncC->maxInternal_fs_Hz );
+        fs_Hz  = silk_max( fs_Hz, psEncC->minInternal_fs_Hz );
+        fs_kHz = silk_DIV32_16( fs_Hz, 1000 );
+    } else {
+        /* State machine for the internal sampling rate switching */
+        if( psEncC->sLP.transition_frame_no >= TRANSITION_FRAMES ) {
+            /* Stop transition phase */
+            psEncC->sLP.mode = 0;
+        }
+        if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) {
+            /* Check if we should switch down */
+            if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz )
+            {
+                /* Switch down */
+                if( psEncC->sLP.mode == 0 ) {
+                    /* New transition */
+                    psEncC->sLP.transition_frame_no = TRANSITION_FRAMES;
+
+                    /* Reset transition filter state */
+                    silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
+                }
+                if( encControl->opusCanSwitch ) {
+                    /* Stop transition phase */
+                    psEncC->sLP.mode = 0;
+
+                    /* Switch to a lower sample frequency */
+                    fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8;
+                } else {
+                   if( psEncC->sLP.transition_frame_no <= 0 ) {
+                       encControl->switchReady = 1;
+                       /* Make room for redundancy */
+                       encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 );
+                   } else {
+                       /* Direction: down (at double speed) */
+                       psEncC->sLP.mode = -2;
+                   }
+                }
+            }
+            else
+            /* Check if we should switch up */
+            if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz )
+            {
+                /* Switch up */
+                if( encControl->opusCanSwitch ) {
+                    /* Switch to a higher sample frequency */
+                    fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16;
+
+                    /* New transition */
+                    psEncC->sLP.transition_frame_no = 0;
+
+                    /* Reset transition filter state */
+                    silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
+
+                    /* Direction: up */
+                    psEncC->sLP.mode = 1;
+                } else {
+                   if( psEncC->sLP.mode == 0 ) {
+                       encControl->switchReady = 1;
+                       /* Make room for redundancy */
+                       encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 );
+                   } else {
+                       /* Direction: up */
+                       psEncC->sLP.mode = 1;
+                   }
+                }
+            } else {
+               if (psEncC->sLP.mode<0)
+                  psEncC->sLP.mode = 1;
+            }
+        }
+    }
+
+    return fs_kHz;
+}
diff --git a/src/main/jni/opus/silk/control_codec.c b/src/main/jni/opus/silk/control_codec.c
new file mode 100644
index 000000000..1f674bddb
--- /dev/null
+++ b/src/main/jni/opus/silk/control_codec.c
@@ -0,0 +1,422 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#ifdef FIXED_POINT
+#include "main_FIX.h"
+#define silk_encoder_state_Fxx      silk_encoder_state_FIX
+#else
+#include "main_FLP.h"
+#define silk_encoder_state_Fxx      silk_encoder_state_FLP
+#endif
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+#include "pitch_est_defines.h"
+
+static opus_int silk_setup_resamplers(
+    silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
+    opus_int                        fs_kHz              /* I                        */
+);
+
+static opus_int silk_setup_fs(
+    silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
+    opus_int                        fs_kHz,             /* I                        */
+    opus_int                        PacketSize_ms       /* I                        */
+);
+
+static opus_int silk_setup_complexity(
+    silk_encoder_state              *psEncC,            /* I/O                      */
+    opus_int                        Complexity          /* I                        */
+);
+
+static OPUS_INLINE opus_int silk_setup_LBRR(
+    silk_encoder_state              *psEncC,            /* I/O                      */
+    const opus_int32                TargetRate_bps      /* I                        */
+);
+
+
+/* Control encoder */
+opus_int silk_control_encoder(
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk encoder state                                               */
+    silk_EncControlStruct           *encControl,                            /* I    Control structure                                                           */
+    const opus_int32                TargetRate_bps,                         /* I    Target max bitrate (bps)                                                    */
+    const opus_int                  allow_bw_switch,                        /* I    Flag to allow switching audio bandwidth                                     */
+    const opus_int                  channelNb,                              /* I    Channel number                                                              */
+    const opus_int                  force_fs_kHz
+)
+{
+    opus_int   fs_kHz, ret = 0;
+
+    psEnc->sCmn.useDTX                 = encControl->useDTX;
+    psEnc->sCmn.useCBR                 = encControl->useCBR;
+    psEnc->sCmn.API_fs_Hz              = encControl->API_sampleRate;
+    psEnc->sCmn.maxInternal_fs_Hz      = encControl->maxInternalSampleRate;
+    psEnc->sCmn.minInternal_fs_Hz      = encControl->minInternalSampleRate;
+    psEnc->sCmn.desiredInternal_fs_Hz  = encControl->desiredInternalSampleRate;
+    psEnc->sCmn.useInBandFEC           = encControl->useInBandFEC;
+    psEnc->sCmn.nChannelsAPI           = encControl->nChannelsAPI;
+    psEnc->sCmn.nChannelsInternal      = encControl->nChannelsInternal;
+    psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch;
+    psEnc->sCmn.channelNb              = channelNb;
+
+    if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) {
+        if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) {
+            /* Change in API sampling rate in the middle of encoding a packet */
+            ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz );
+        }
+        return ret;
+    }
+
+    /* Beyond this point we know that there are no previously coded frames in the payload buffer */
+
+    /********************************************/
+    /* Determine internal sampling rate         */
+    /********************************************/
+    fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl );
+    if( force_fs_kHz ) {
+       fs_kHz = force_fs_kHz;
+    }
+    /********************************************/
+    /* Prepare resampler and buffered data      */
+    /********************************************/
+    ret += silk_setup_resamplers( psEnc, fs_kHz );
+
+    /********************************************/
+    /* Set internal sampling frequency          */
+    /********************************************/
+    ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms );
+
+    /********************************************/
+    /* Set encoding complexity                  */
+    /********************************************/
+    ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity  );
+
+    /********************************************/
+    /* Set packet loss rate measured by farend  */
+    /********************************************/
+    psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage;
+
+    /********************************************/
+    /* Set LBRR usage                           */
+    /********************************************/
+    ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps );
+
+    psEnc->sCmn.controlled_since_last_payload = 1;
+
+    return ret;
+}
+
+static opus_int silk_setup_resamplers(
+    silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
+    opus_int                         fs_kHz              /* I                        */
+)
+{
+    opus_int   ret = SILK_NO_ERROR;
+    SAVE_STACK;
+
+    if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz )
+    {
+        if( psEnc->sCmn.fs_kHz == 0 ) {
+            /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
+            ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 );
+        } else {
+            VARDECL( opus_int16, x_buf_API_fs_Hz );
+            VARDECL( silk_resampler_state_struct, temp_resampler_state );
+#ifdef FIXED_POINT
+            opus_int16 *x_bufFIX = psEnc->x_buf;
+#else
+            VARDECL( opus_int16, x_bufFIX );
+            opus_int32 new_buf_samples;
+#endif
+            opus_int32 api_buf_samples;
+            opus_int32 old_buf_samples;
+            opus_int32 buf_length_ms;
+
+            buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS;
+            old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz;
+
+#ifndef FIXED_POINT
+            new_buf_samples = buf_length_ms * fs_kHz;
+            ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ),
+                   opus_int16 );
+            silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples );
+#endif
+
+            /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */
+            ALLOC( temp_resampler_state, 1, silk_resampler_state_struct );
+            ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );
+
+            /* Calculate number of samples to temporarily upsample */
+            api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 );
+
+            /* Temporary resampling of x_buf data to API_fs_Hz */
+            ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 );
+            ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples );
+
+            /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
+            ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 );
+
+            /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */
+            ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples );
+
+#ifndef FIXED_POINT
+            silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples);
+#endif
+        }
+    }
+
+    psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz;
+
+    RESTORE_STACK;
+    return ret;
+}
+
+static opus_int silk_setup_fs(
+    silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
+    opus_int                        fs_kHz,             /* I                        */
+    opus_int                        PacketSize_ms       /* I                        */
+)
+{
+    opus_int ret = SILK_NO_ERROR;
+
+    /* Set packet size */
+    if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) {
+        if( ( PacketSize_ms !=  10 ) &&
+            ( PacketSize_ms !=  20 ) &&
+            ( PacketSize_ms !=  40 ) &&
+            ( PacketSize_ms !=  60 ) ) {
+            ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
+        }
+        if( PacketSize_ms <= 10 ) {
+            psEnc->sCmn.nFramesPerPacket = 1;
+            psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1;
+            psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz );
+            psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
+            if( psEnc->sCmn.fs_kHz == 8 ) {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+            } else {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+            }
+        } else {
+            psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS );
+            psEnc->sCmn.nb_subfr = MAX_NB_SUBFR;
+            psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz );
+            psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
+            if( psEnc->sCmn.fs_kHz == 8 ) {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+            } else {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
+            }
+        }
+        psEnc->sCmn.PacketSize_ms  = PacketSize_ms;
+        psEnc->sCmn.TargetRate_bps = 0;         /* trigger new SNR computation */
+    }
+
+    /* Set internal sampling frequency */
+    silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
+    silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
+    if( psEnc->sCmn.fs_kHz != fs_kHz ) {
+        /* reset part of the state */
+        silk_memset( &psEnc->sShape,               0, sizeof( psEnc->sShape ) );
+        silk_memset( &psEnc->sPrefilt,             0, sizeof( psEnc->sPrefilt ) );
+        silk_memset( &psEnc->sCmn.sNSQ,            0, sizeof( psEnc->sCmn.sNSQ ) );
+        silk_memset( psEnc->sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+        silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );
+        psEnc->sCmn.inputBufIx                  = 0;
+        psEnc->sCmn.nFramesEncoded              = 0;
+        psEnc->sCmn.TargetRate_bps              = 0;     /* trigger new SNR computation */
+
+        /* Initialize non-zero parameters */
+        psEnc->sCmn.prevLag                     = 100;
+        psEnc->sCmn.first_frame_after_reset     = 1;
+        psEnc->sPrefilt.lagPrev                 = 100;
+        psEnc->sShape.LastGainIndex             = 10;
+        psEnc->sCmn.sNSQ.lagPrev                = 100;
+        psEnc->sCmn.sNSQ.prev_gain_Q16          = 65536;
+        psEnc->sCmn.prevSignalType              = TYPE_NO_VOICE_ACTIVITY;
+
+        psEnc->sCmn.fs_kHz = fs_kHz;
+        if( psEnc->sCmn.fs_kHz == 8 ) {
+            if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+            } else {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+            }
+        } else {
+            if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
+            } else {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+            }
+        }
+        if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) {
+            psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER;
+            psEnc->sCmn.psNLSF_CB  = &silk_NLSF_CB_NB_MB;
+        } else {
+            psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER;
+            psEnc->sCmn.psNLSF_CB  = &silk_NLSF_CB_WB;
+        }
+        psEnc->sCmn.subfr_length   = SUB_FRAME_LENGTH_MS * fs_kHz;
+        psEnc->sCmn.frame_length   = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );
+        psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
+        psEnc->sCmn.la_pitch       = silk_SMULBB( LA_PITCH_MS, fs_kHz );
+        psEnc->sCmn.max_pitch_lag  = silk_SMULBB( 18, fs_kHz );
+        if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+            psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
+        } else {
+            psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
+        }
+        if( psEnc->sCmn.fs_kHz == 16 ) {
+            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 );
+            psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
+        } else if( psEnc->sCmn.fs_kHz == 12 ) {
+            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 );
+            psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
+        } else {
+            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 );
+            psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
+        }
+    }
+
+    /* Check that settings are valid */
+    silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length );
+
+    return ret;
+}
+
+static opus_int silk_setup_complexity(
+    silk_encoder_state              *psEncC,            /* I/O                      */
+    opus_int                        Complexity          /* I                        */
+)
+{
+    opus_int ret = 0;
+
+    /* Set encoding complexity */
+    silk_assert( Complexity >= 0 && Complexity <= 10 );
+    if( Complexity < 2 ) {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MIN_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.8, 16 );
+        psEncC->pitchEstimationLPCOrder         = 6;
+        psEncC->shapingLPCOrder                 = 8;
+        psEncC->la_shape                        = 3 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = 1;
+        psEncC->useInterpolatedNLSFs            = 0;
+        psEncC->LTPQuantLowComplexity           = 1;
+        psEncC->NLSF_MSVQ_Survivors             = 2;
+        psEncC->warping_Q16                     = 0;
+    } else if( Complexity < 4 ) {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MID_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.76, 16 );
+        psEncC->pitchEstimationLPCOrder         = 8;
+        psEncC->shapingLPCOrder                 = 10;
+        psEncC->la_shape                        = 5 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = 1;
+        psEncC->useInterpolatedNLSFs            = 0;
+        psEncC->LTPQuantLowComplexity           = 0;
+        psEncC->NLSF_MSVQ_Survivors             = 4;
+        psEncC->warping_Q16                     = 0;
+    } else if( Complexity < 6 ) {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MID_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.74, 16 );
+        psEncC->pitchEstimationLPCOrder         = 10;
+        psEncC->shapingLPCOrder                 = 12;
+        psEncC->la_shape                        = 5 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = 2;
+        psEncC->useInterpolatedNLSFs            = 1;
+        psEncC->LTPQuantLowComplexity           = 0;
+        psEncC->NLSF_MSVQ_Survivors             = 8;
+        psEncC->warping_Q16                     = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+    } else if( Complexity < 8 ) {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MID_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.72, 16 );
+        psEncC->pitchEstimationLPCOrder         = 12;
+        psEncC->shapingLPCOrder                 = 14;
+        psEncC->la_shape                        = 5 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = 3;
+        psEncC->useInterpolatedNLSFs            = 1;
+        psEncC->LTPQuantLowComplexity           = 0;
+        psEncC->NLSF_MSVQ_Survivors             = 16;
+        psEncC->warping_Q16                     = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+    } else {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MAX_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.7, 16 );
+        psEncC->pitchEstimationLPCOrder         = 16;
+        psEncC->shapingLPCOrder                 = 16;
+        psEncC->la_shape                        = 5 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = MAX_DEL_DEC_STATES;
+        psEncC->useInterpolatedNLSFs            = 1;
+        psEncC->LTPQuantLowComplexity           = 0;
+        psEncC->NLSF_MSVQ_Survivors             = 32;
+        psEncC->warping_Q16                     = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+    }
+
+    /* Do not allow higher pitch estimation LPC order than predict LPC order */
+    psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder );
+    psEncC->shapeWinLength          = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape;
+    psEncC->Complexity              = Complexity;
+
+    silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER );
+    silk_assert( psEncC->shapingLPCOrder         <= MAX_SHAPE_LPC_ORDER      );
+    silk_assert( psEncC->nStatesDelayedDecision  <= MAX_DEL_DEC_STATES       );
+    silk_assert( psEncC->warping_Q16             <= 32767                    );
+    silk_assert( psEncC->la_shape                <= LA_SHAPE_MAX             );
+    silk_assert( psEncC->shapeWinLength          <= SHAPE_LPC_WIN_MAX        );
+    silk_assert( psEncC->NLSF_MSVQ_Survivors     <= NLSF_VQ_MAX_SURVIVORS    );
+
+    return ret;
+}
+
+static OPUS_INLINE opus_int silk_setup_LBRR(
+    silk_encoder_state          *psEncC,            /* I/O                      */
+    const opus_int32            TargetRate_bps      /* I                        */
+)
+{
+    opus_int   ret = SILK_NO_ERROR;
+    opus_int32 LBRR_rate_thres_bps;
+
+    psEncC->LBRR_enabled = 0;
+    if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) {
+        if( psEncC->fs_kHz == 8 ) {
+            LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS;
+        } else if( psEncC->fs_kHz == 12 ) {
+            LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS;
+        } else {
+            LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS;
+        }
+        LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) );
+
+        if( TargetRate_bps > LBRR_rate_thres_bps ) {
+            /* Set gain increase for coding LBRR excitation */
+            psEncC->LBRR_enabled = 1;
+            psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
+        }
+    }
+
+    return ret;
+}
diff --git a/src/main/jni/opus/silk/debug.c b/src/main/jni/opus/silk/debug.c
new file mode 100644
index 000000000..9253faf71
--- /dev/null
+++ b/src/main/jni/opus/silk/debug.c
@@ -0,0 +1,170 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "debug.h"
+#include "SigProc_FIX.h"
+
+#if SILK_TIC_TOC
+
+#ifdef _WIN32
+
+#if (defined(_WIN32) || defined(_WINCE))
+#include <windows.h>    /* timer */
+#else   /* Linux or Mac*/
+#include <sys/time.h>
+#endif
+
+unsigned long silk_GetHighResolutionTime(void) /* O  time in usec*/
+{
+    /* Returns a time counter in microsec   */
+    /* the resolution is platform dependent */
+    /* but is typically 1.62 us resolution  */
+    LARGE_INTEGER lpPerformanceCount;
+    LARGE_INTEGER lpFrequency;
+    QueryPerformanceCounter(&lpPerformanceCount);
+    QueryPerformanceFrequency(&lpFrequency);
+    return (unsigned long)((1000000*(lpPerformanceCount.QuadPart)) / lpFrequency.QuadPart);
+}
+#else   /* Linux or Mac*/
+unsigned long GetHighResolutionTime(void) /* O  time in usec*/
+{
+    struct timeval tv;
+    gettimeofday(&tv, 0);
+    return((tv.tv_sec*1000000)+(tv.tv_usec));
+}
+#endif
+
+int           silk_Timer_nTimers = 0;
+int           silk_Timer_depth_ctr = 0;
+char          silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN];
+#ifdef WIN32
+LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX];
+#else
+unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX];
+#endif
+unsigned int  silk_Timer_cnt[silk_NUM_TIMERS_MAX];
+opus_int64     silk_Timer_min[silk_NUM_TIMERS_MAX];
+opus_int64     silk_Timer_sum[silk_NUM_TIMERS_MAX];
+opus_int64     silk_Timer_max[silk_NUM_TIMERS_MAX];
+opus_int64     silk_Timer_depth[silk_NUM_TIMERS_MAX];
+
+#ifdef WIN32
+void silk_TimerSave(char *file_name)
+{
+    if( silk_Timer_nTimers > 0 )
+    {
+        int k;
+        FILE *fp;
+        LARGE_INTEGER lpFrequency;
+        LARGE_INTEGER lpPerformanceCount1, lpPerformanceCount2;
+        int del = 0x7FFFFFFF;
+        double avg, sum_avg;
+        /* estimate overhead of calling performance counters */
+        for( k = 0; k < 1000; k++ ) {
+            QueryPerformanceCounter(&lpPerformanceCount1);
+            QueryPerformanceCounter(&lpPerformanceCount2);
+            lpPerformanceCount2.QuadPart -= lpPerformanceCount1.QuadPart;
+            if( (int)lpPerformanceCount2.LowPart < del )
+                del = lpPerformanceCount2.LowPart;
+        }
+        QueryPerformanceFrequency(&lpFrequency);
+        /* print results to file */
+        sum_avg = 0.0f;
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {
+            if (silk_Timer_depth[k] == 0) {
+                sum_avg += (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart * silk_Timer_cnt[k];
+            }
+        }
+        fp = fopen(file_name, "w");
+        fprintf(fp, "                                min         avg     %%         max      count\n");
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {
+            if (silk_Timer_depth[k] == 0) {
+                fprintf(fp, "%-28s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 1) {
+                fprintf(fp, " %-27s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 2) {
+                fprintf(fp, "  %-26s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 3) {
+                fprintf(fp, "   %-25s", silk_Timer_tags[k]);
+            } else {
+                fprintf(fp, "    %-24s", silk_Timer_tags[k]);
+            }
+            avg = (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart;
+            fprintf(fp, "%8.2f", (1e6 * (silk_max_64(silk_Timer_min[k] - del, 0))) / lpFrequency.QuadPart);
+            fprintf(fp, "%12.2f %6.2f", avg, 100.0 * avg / sum_avg * silk_Timer_cnt[k]);
+            fprintf(fp, "%12.2f", (1e6 * (silk_max_64(silk_Timer_max[k] - del, 0))) / lpFrequency.QuadPart);
+            fprintf(fp, "%10d\n", silk_Timer_cnt[k]);
+        }
+        fprintf(fp, "                                microseconds\n");
+        fclose(fp);
+    }
+}
+#else
+void silk_TimerSave(char *file_name)
+{
+    if( silk_Timer_nTimers > 0 )
+    {
+        int k;
+        FILE *fp;
+        /* print results to file */
+        fp = fopen(file_name, "w");
+        fprintf(fp, "                                min         avg         max      count\n");
+        for( k = 0; k < silk_Timer_nTimers; k++ )
+        {
+            if (silk_Timer_depth[k] == 0) {
+                fprintf(fp, "%-28s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 1) {
+                fprintf(fp, " %-27s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 2) {
+                fprintf(fp, "  %-26s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 3) {
+                fprintf(fp, "   %-25s", silk_Timer_tags[k]);
+            } else {
+                fprintf(fp, "    %-24s", silk_Timer_tags[k]);
+            }
+            fprintf(fp, "%d ", silk_Timer_min[k]);
+            fprintf(fp, "%f ", (double)silk_Timer_sum[k] / (double)silk_Timer_cnt[k]);
+            fprintf(fp, "%d ", silk_Timer_max[k]);
+            fprintf(fp, "%10d\n", silk_Timer_cnt[k]);
+        }
+        fprintf(fp, "                                microseconds\n");
+        fclose(fp);
+    }
+}
+#endif
+
+#endif /* SILK_TIC_TOC */
+
+#if SILK_DEBUG
+FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ];
+int silk_debug_store_count = 0;
+#endif /* SILK_DEBUG */
+
diff --git a/src/main/jni/opus/silk/debug.h b/src/main/jni/opus/silk/debug.h
new file mode 100644
index 000000000..efb6d3e99
--- /dev/null
+++ b/src/main/jni/opus/silk/debug.h
@@ -0,0 +1,279 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_DEBUG_H
+#define SILK_DEBUG_H
+
+#include "typedef.h"
+#include <stdio.h>      /* file writing */
+#include <string.h>     /* strcpy, strcmp */
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+unsigned long GetHighResolutionTime(void); /* O  time in usec*/
+
+/* make SILK_DEBUG dependent on compiler's _DEBUG */
+#if defined _WIN32
+    #ifdef _DEBUG
+        #define SILK_DEBUG  1
+    #else
+        #define SILK_DEBUG  0
+    #endif
+
+    /* overrule the above */
+    #if 0
+    /*  #define NO_ASSERTS*/
+    #undef  SILK_DEBUG
+    #define SILK_DEBUG  1
+    #endif
+#else
+    #define SILK_DEBUG  0
+#endif
+
+/* Flag for using timers */
+#define SILK_TIC_TOC    0
+
+
+#if SILK_TIC_TOC
+
+#if (defined(_WIN32) || defined(_WINCE))
+#include <windows.h>    /* timer */
+#else   /* Linux or Mac*/
+#include <sys/time.h>
+#endif
+
+/*********************************/
+/* timer functions for profiling */
+/*********************************/
+/* example:                                                         */
+/*                                                                  */
+/* TIC(LPC)                                                         */
+/* do_LPC(in_vec, order, acoef);    // do LPC analysis              */
+/* TOC(LPC)                                                         */
+/*                                                                  */
+/* and call the following just before exiting (from main)           */
+/*                                                                  */
+/* silk_TimerSave("silk_TimingData.txt");                           */
+/*                                                                  */
+/* results are now in silk_TimingData.txt                           */
+
+void silk_TimerSave(char *file_name);
+
+/* max number of timers (in different locations) */
+#define silk_NUM_TIMERS_MAX                  50
+/* max length of name tags in TIC(..), TOC(..) */
+#define silk_NUM_TIMERS_MAX_TAG_LEN          30
+
+extern int           silk_Timer_nTimers;
+extern int           silk_Timer_depth_ctr;
+extern char          silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN];
+#ifdef _WIN32
+extern LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX];
+#else
+extern unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX];
+#endif
+extern unsigned int  silk_Timer_cnt[silk_NUM_TIMERS_MAX];
+extern opus_int64    silk_Timer_sum[silk_NUM_TIMERS_MAX];
+extern opus_int64    silk_Timer_max[silk_NUM_TIMERS_MAX];
+extern opus_int64    silk_Timer_min[silk_NUM_TIMERS_MAX];
+extern opus_int64    silk_Timer_depth[silk_NUM_TIMERS_MAX];
+
+/* WARNING: TIC()/TOC can measure only up to 0.1 seconds at a time */
+#ifdef _WIN32
+#define TIC(TAG_NAME) {                                     \
+    static int init = 0;                                    \
+    static int ID = -1;                                     \
+    if( init == 0 )                                         \
+    {                                                       \
+        int k;                                              \
+        init = 1;                                           \
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {         \
+            if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \
+                ID = k;                                     \
+                break;                                      \
+            }                                               \
+        }                                                   \
+        if (ID == -1) {                                     \
+            ID = silk_Timer_nTimers;                        \
+            silk_Timer_nTimers++;                           \
+            silk_Timer_depth[ID] = silk_Timer_depth_ctr;    \
+            strcpy(silk_Timer_tags[ID], #TAG_NAME);         \
+            silk_Timer_cnt[ID] = 0;                         \
+            silk_Timer_sum[ID] = 0;                         \
+            silk_Timer_min[ID] = 0xFFFFFFFF;                \
+            silk_Timer_max[ID] = 0;                         \
+        }                                                   \
+    }                                                       \
+    silk_Timer_depth_ctr++;                                 \
+    QueryPerformanceCounter(&silk_Timer_start[ID]);         \
+}
+#else
+#define TIC(TAG_NAME) {                                     \
+    static int init = 0;                                    \
+    static int ID = -1;                                     \
+    if( init == 0 )                                         \
+    {                                                       \
+        int k;                                              \
+        init = 1;                                           \
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {         \
+        if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) {  \
+                ID = k;                                     \
+                break;                                      \
+            }                                               \
+        }                                                   \
+        if (ID == -1) {                                     \
+            ID = silk_Timer_nTimers;                        \
+            silk_Timer_nTimers++;                           \
+            silk_Timer_depth[ID] = silk_Timer_depth_ctr;    \
+            strcpy(silk_Timer_tags[ID], #TAG_NAME);         \
+            silk_Timer_cnt[ID] = 0;                         \
+            silk_Timer_sum[ID] = 0;                         \
+            silk_Timer_min[ID] = 0xFFFFFFFF;                \
+            silk_Timer_max[ID] = 0;                         \
+        }                                                   \
+    }                                                       \
+    silk_Timer_depth_ctr++;                                 \
+    silk_Timer_start[ID] = GetHighResolutionTime();         \
+}
+#endif
+
+#ifdef _WIN32
+#define TOC(TAG_NAME) {                                             \
+    LARGE_INTEGER lpPerformanceCount;                               \
+    static int init = 0;                                            \
+    static int ID = 0;                                              \
+    if( init == 0 )                                                 \
+    {                                                               \
+        int k;                                                      \
+        init = 1;                                                   \
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {                 \
+            if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) {      \
+                ID = k;                                             \
+                break;                                              \
+            }                                                       \
+        }                                                           \
+    }                                                               \
+    QueryPerformanceCounter(&lpPerformanceCount);                   \
+    lpPerformanceCount.QuadPart -= silk_Timer_start[ID].QuadPart;   \
+    if((lpPerformanceCount.QuadPart < 100000000) &&                 \
+        (lpPerformanceCount.QuadPart >= 0)) {                       \
+        silk_Timer_cnt[ID]++;                                       \
+        silk_Timer_sum[ID] += lpPerformanceCount.QuadPart;          \
+        if( lpPerformanceCount.QuadPart > silk_Timer_max[ID] )      \
+            silk_Timer_max[ID] = lpPerformanceCount.QuadPart;       \
+        if( lpPerformanceCount.QuadPart < silk_Timer_min[ID] )      \
+            silk_Timer_min[ID] = lpPerformanceCount.QuadPart;       \
+    }                                                               \
+    silk_Timer_depth_ctr--;                                         \
+}
+#else
+#define TOC(TAG_NAME) {                                             \
+    unsigned long endTime;                                          \
+    static int init = 0;                                            \
+    static int ID = 0;                                              \
+    if( init == 0 )                                                 \
+    {                                                               \
+        int k;                                                      \
+        init = 1;                                                   \
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {                 \
+            if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) {      \
+                ID = k;                                             \
+                break;                                              \
+            }                                                       \
+        }                                                           \
+    }                                                               \
+    endTime = GetHighResolutionTime();                              \
+    endTime -= silk_Timer_start[ID];                                \
+    if((endTime < 100000000) &&                                     \
+        (endTime >= 0)) {                                           \
+        silk_Timer_cnt[ID]++;                                       \
+        silk_Timer_sum[ID] += endTime;                              \
+        if( endTime > silk_Timer_max[ID] )                          \
+            silk_Timer_max[ID] = endTime;                           \
+        if( endTime < silk_Timer_min[ID] )                          \
+            silk_Timer_min[ID] = endTime;                           \
+    }                                                               \
+        silk_Timer_depth_ctr--;                                     \
+}
+#endif
+
+#else /* SILK_TIC_TOC */
+
+/* define macros as empty strings */
+#define TIC(TAG_NAME)
+#define TOC(TAG_NAME)
+#define silk_TimerSave(FILE_NAME)
+
+#endif /* SILK_TIC_TOC */
+
+
+#if SILK_DEBUG
+/************************************/
+/* write data to file for debugging */
+/************************************/
+/* Example: DEBUG_STORE_DATA(testfile.pcm, &RIN[0], 160*sizeof(opus_int16)); */
+
+#define silk_NUM_STORES_MAX                                  100
+extern FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ];
+extern int silk_debug_store_count;
+
+/* Faster way of storing the data */
+#define DEBUG_STORE_DATA( FILE_NAME, DATA_PTR, N_BYTES ) {          \
+    static opus_int init = 0, cnt = 0;                              \
+    static FILE **fp;                                               \
+    if (init == 0) {                                                \
+        init = 1;                                                   \
+        cnt = silk_debug_store_count++;                             \
+        silk_debug_store_fp[ cnt ] = fopen(#FILE_NAME, "wb");       \
+    }                                                               \
+    fwrite((DATA_PTR), (N_BYTES), 1, silk_debug_store_fp[ cnt ]);   \
+}
+
+/* Call this at the end of main() */
+#define SILK_DEBUG_STORE_CLOSE_FILES {                              \
+    opus_int i;                                                     \
+    for( i = 0; i < silk_debug_store_count; i++ ) {                 \
+        fclose( silk_debug_store_fp[ i ] );                         \
+    }                                                               \
+}
+
+#else /* SILK_DEBUG */
+
+/* define macros as empty strings */
+#define DEBUG_STORE_DATA(FILE_NAME, DATA_PTR, N_BYTES)
+#define SILK_DEBUG_STORE_CLOSE_FILES
+
+#endif /* SILK_DEBUG */
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_DEBUG_H */
diff --git a/src/main/jni/opus/silk/dec_API.c b/src/main/jni/opus/silk/dec_API.c
new file mode 100644
index 000000000..4cbcf7151
--- /dev/null
+++ b/src/main/jni/opus/silk/dec_API.c
@@ -0,0 +1,397 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "API.h"
+#include "main.h"
+#include "stack_alloc.h"
+
+/************************/
+/* Decoder Super Struct */
+/************************/
+typedef struct {
+    silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
+    stereo_dec_state                sStereo;
+    opus_int                         nChannelsAPI;
+    opus_int                         nChannelsInternal;
+    opus_int                         prev_decode_only_middle;
+} silk_decoder;
+
+/*********************/
+/* Decoder functions */
+/*********************/
+
+opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
+    opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
+)
+{
+    opus_int ret = SILK_NO_ERROR;
+
+    *decSizeBytes = sizeof( silk_decoder );
+
+    return ret;
+}
+
+/* Reset decoder state */
+opus_int silk_InitDecoder(                              /* O    Returns error code                              */
+    void                            *decState           /* I/O  State                                           */
+)
+{
+    opus_int n, ret = SILK_NO_ERROR;
+    silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
+
+    for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
+        ret  = silk_init_decoder( &channel_state[ n ] );
+    }
+    silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
+    /* Not strictly needed, but it's cleaner that way */
+    ((silk_decoder *)decState)->prev_decode_only_middle = 0;
+
+    return ret;
+}
+
+/* Decode a frame */
+opus_int silk_Decode(                                   /* O    Returns error code                              */
+    void*                           decState,           /* I/O  State                                           */
+    silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
+    opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
+    opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
+    ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
+    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
+    opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
+)
+{
+    opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
+    opus_int32 nSamplesOutDec, LBRR_symbol;
+    opus_int16 *samplesOut1_tmp[ 2 ];
+    VARDECL( opus_int16, samplesOut1_tmp_storage );
+    VARDECL( opus_int16, samplesOut2_tmp );
+    opus_int32 MS_pred_Q13[ 2 ] = { 0 };
+    opus_int16 *resample_out_ptr;
+    silk_decoder *psDec = ( silk_decoder * )decState;
+    silk_decoder_state *channel_state = psDec->channel_state;
+    opus_int has_side;
+    opus_int stereo_to_mono;
+    SAVE_STACK;
+
+    silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
+
+    /**********************************/
+    /* Test if first frame in payload */
+    /**********************************/
+    if( newPacketFlag ) {
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+            channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
+        }
+    }
+
+    /* If Mono -> Stereo transition in bitstream: init state of second channel */
+    if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
+        ret += silk_init_decoder( &channel_state[ 1 ] );
+    }
+
+    stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
+                     ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
+
+    if( channel_state[ 0 ].nFramesDecoded == 0 ) {
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+            opus_int fs_kHz_dec;
+            if( decControl->payloadSize_ms == 0 ) {
+                /* Assuming packet loss, use 10 ms */
+                channel_state[ n ].nFramesPerPacket = 1;
+                channel_state[ n ].nb_subfr = 2;
+            } else if( decControl->payloadSize_ms == 10 ) {
+                channel_state[ n ].nFramesPerPacket = 1;
+                channel_state[ n ].nb_subfr = 2;
+            } else if( decControl->payloadSize_ms == 20 ) {
+                channel_state[ n ].nFramesPerPacket = 1;
+                channel_state[ n ].nb_subfr = 4;
+            } else if( decControl->payloadSize_ms == 40 ) {
+                channel_state[ n ].nFramesPerPacket = 2;
+                channel_state[ n ].nb_subfr = 4;
+            } else if( decControl->payloadSize_ms == 60 ) {
+                channel_state[ n ].nFramesPerPacket = 3;
+                channel_state[ n ].nb_subfr = 4;
+            } else {
+                silk_assert( 0 );
+                RESTORE_STACK;
+                return SILK_DEC_INVALID_FRAME_SIZE;
+            }
+            fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
+            if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
+                silk_assert( 0 );
+                RESTORE_STACK;
+                return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
+            }
+            ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
+        }
+    }
+
+    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
+        silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
+        silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
+        silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
+    }
+    psDec->nChannelsAPI      = decControl->nChannelsAPI;
+    psDec->nChannelsInternal = decControl->nChannelsInternal;
+
+    if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
+        ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
+        RESTORE_STACK;
+        return( ret );
+    }
+
+    if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
+        /* First decoder call for this payload */
+        /* Decode VAD flags and LBRR flag */
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+            for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
+                channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
+            }
+            channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
+        }
+        /* Decode LBRR flags */
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+            silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
+            if( channel_state[ n ].LBRR_flag ) {
+                if( channel_state[ n ].nFramesPerPacket == 1 ) {
+                    channel_state[ n ].LBRR_flags[ 0 ] = 1;
+                } else {
+                    LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
+                    for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
+                        channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
+                    }
+                }
+            }
+        }
+
+        if( lostFlag == FLAG_DECODE_NORMAL ) {
+            /* Regular decoding: skip all LBRR data */
+            for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
+                for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+                    if( channel_state[ n ].LBRR_flags[ i ] ) {
+                        opus_int pulses[ MAX_FRAME_LENGTH ];
+                        opus_int condCoding;
+
+                        if( decControl->nChannelsInternal == 2 && n == 0 ) {
+                            silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
+                            if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
+                                silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
+                            }
+                        }
+                        /* Use conditional coding if previous frame available */
+                        if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
+                            condCoding = CODE_CONDITIONALLY;
+                        } else {
+                            condCoding = CODE_INDEPENDENTLY;
+                        }
+                        silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
+                        silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
+                            channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
+                    }
+                }
+            }
+        }
+    }
+
+    /* Get MS predictor index */
+    if( decControl->nChannelsInternal == 2 ) {
+        if(   lostFlag == FLAG_DECODE_NORMAL ||
+            ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
+        {
+            silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
+            /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
+            if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
+                ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
+            {
+                silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
+            } else {
+                decode_only_middle = 0;
+            }
+        } else {
+            for( n = 0; n < 2; n++ ) {
+                MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
+            }
+        }
+    }
+
+    /* Reset side channel decoder prediction memory for first frame with side coding */
+    if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
+        silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
+        silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
+        psDec->channel_state[ 1 ].lagPrev        = 100;
+        psDec->channel_state[ 1 ].LastGainIndex  = 10;
+        psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+        psDec->channel_state[ 1 ].first_frame_after_reset = 1;
+    }
+
+    ALLOC( samplesOut1_tmp_storage,
+           decControl->nChannelsInternal*(
+               channel_state[ 0 ].frame_length + 2 ),
+           opus_int16 );
+    samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
+    samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+                           + channel_state[ 0 ].frame_length + 2;
+
+    if( lostFlag == FLAG_DECODE_NORMAL ) {
+        has_side = !decode_only_middle;
+    } else {
+        has_side = !psDec->prev_decode_only_middle
+              || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
+    }
+    /* Call decoder for one frame */
+    for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+        if( n == 0 || has_side ) {
+            opus_int FrameIndex;
+            opus_int condCoding;
+
+            FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
+            /* Use independent coding if no previous frame available */
+            if( FrameIndex <= 0 ) {
+                condCoding = CODE_INDEPENDENTLY;
+            } else if( lostFlag == FLAG_DECODE_LBRR ) {
+                condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
+            } else if( n > 0 && psDec->prev_decode_only_middle ) {
+                /* If we skipped a side frame in this packet, we don't
+                   need LTP scaling; the LTP state is well-defined. */
+                condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
+            } else {
+                condCoding = CODE_CONDITIONALLY;
+            }
+            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
+        } else {
+            silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
+        }
+        channel_state[ n ].nFramesDecoded++;
+    }
+
+    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
+        /* Convert Mid/Side to Left/Right */
+        silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
+    } else {
+        /* Buffering */
+        silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
+        silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
+    }
+
+    /* Number of output samples */
+    *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
+
+    /* Set up pointers to temp buffers */
+    ALLOC( samplesOut2_tmp,
+           decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
+    if( decControl->nChannelsAPI == 2 ) {
+        resample_out_ptr = samplesOut2_tmp;
+    } else {
+        resample_out_ptr = samplesOut;
+    }
+
+    for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
+
+        /* Resample decoded signal to API_sampleRate */
+        ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
+
+        /* Interleave if stereo output and stereo stream */
+        if( decControl->nChannelsAPI == 2 ) {
+            for( i = 0; i < *nSamplesOut; i++ ) {
+                samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
+            }
+        }
+    }
+
+    /* Create two channel output from mono stream */
+    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
+        if ( stereo_to_mono ){
+            /* Resample right channel for newly collapsed stereo just in case
+               we weren't doing collapsing when switching to mono */
+            ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
+
+            for( i = 0; i < *nSamplesOut; i++ ) {
+                samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
+            }
+        } else {
+            for( i = 0; i < *nSamplesOut; i++ ) {
+                samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
+            }
+        }
+    }
+
+    /* Export pitch lag, measured at 48 kHz sampling rate */
+    if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
+        int mult_tab[ 3 ] = { 6, 4, 3 };
+        decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
+    } else {
+        decControl->prevPitchLag = 0;
+    }
+
+    if( lostFlag == FLAG_PACKET_LOST ) {
+       /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
+          if we lose packets when the energy is going down */
+       for ( i = 0; i < psDec->nChannelsInternal; i++ )
+          psDec->channel_state[ i ].LastGainIndex = 10;
+    } else {
+       psDec->prev_decode_only_middle = decode_only_middle;
+    }
+    RESTORE_STACK;
+    return ret;
+}
+
+#if 0
+/* Getting table of contents for a packet */
+opus_int silk_get_TOC(
+    const opus_uint8                *payload,           /* I    Payload data                                */
+    const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
+    const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
+    silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
+)
+{
+    opus_int i, flags, ret = SILK_NO_ERROR;
+
+    if( nBytesIn < 1 ) {
+        return -1;
+    }
+    if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
+        return -1;
+    }
+
+    silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
+
+    /* For stereo, extract the flags for the mid channel */
+    flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
+
+    Silk_TOC->inbandFECFlag = flags & 1;
+    for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
+        flags = silk_RSHIFT( flags, 1 );
+        Silk_TOC->VADFlags[ i ] = flags & 1;
+        Silk_TOC->VADFlag |= flags & 1;
+    }
+
+    return ret;
+}
+#endif
diff --git a/src/main/jni/opus/silk/decode_core.c b/src/main/jni/opus/silk/decode_core.c
new file mode 100644
index 000000000..a820bf11d
--- /dev/null
+++ b/src/main/jni/opus/silk/decode_core.c
@@ -0,0 +1,238 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+/**********************************************************/
+/* Core decoder. Performs inverse NSQ operation LTP + LPC */
+/**********************************************************/
+void silk_decode_core(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
+    opus_int16                  xq[],                           /* O    Decoded speech                              */
+    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+)
+{
+    opus_int   i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
+    opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ];
+    VARDECL( opus_int16, sLTP );
+    VARDECL( opus_int32, sLTP_Q15 );
+    opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10;
+    opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14;
+    VARDECL( opus_int32, res_Q14 );
+    VARDECL( opus_int32, sLPC_Q14 );
+    SAVE_STACK;
+
+    silk_assert( psDec->prev_gain_Q16 != 0 );
+
+    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+    ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+    ALLOC( res_Q14, psDec->subfr_length, opus_int32 );
+    ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 );
+
+    offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ];
+
+    if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) {
+        NLSF_interpolation_flag = 1;
+    } else {
+        NLSF_interpolation_flag = 0;
+    }
+
+    /* Decode excitation */
+    rand_seed = psDec->indices.Seed;
+    for( i = 0; i < psDec->frame_length; i++ ) {
+        rand_seed = silk_RAND( rand_seed );
+        psDec->exc_Q14[ i ] = silk_LSHIFT( (opus_int32)pulses[ i ], 14 );
+        if( psDec->exc_Q14[ i ] > 0 ) {
+            psDec->exc_Q14[ i ] -= QUANT_LEVEL_ADJUST_Q10 << 4;
+        } else
+        if( psDec->exc_Q14[ i ] < 0 ) {
+            psDec->exc_Q14[ i ] += QUANT_LEVEL_ADJUST_Q10 << 4;
+        }
+        psDec->exc_Q14[ i ] += offset_Q10 << 4;
+        if( rand_seed < 0 ) {
+           psDec->exc_Q14[ i ] = -psDec->exc_Q14[ i ];
+        }
+
+        rand_seed = silk_ADD32_ovflw( rand_seed, pulses[ i ] );
+    }
+
+    /* Copy LPC state */
+    silk_memcpy( sLPC_Q14, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+    pexc_Q14 = psDec->exc_Q14;
+    pxq      = xq;
+    sLTP_buf_idx = psDec->ltp_mem_length;
+    /* Loop over subframes */
+    for( k = 0; k < psDec->nb_subfr; k++ ) {
+        pres_Q14 = res_Q14;
+        A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ];
+
+        /* Preload LPC coeficients to array on stack. Gives small performance gain */
+        silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
+        B_Q14        = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
+        signalType   = psDec->indices.signalType;
+
+        Gain_Q10     = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
+        inv_gain_Q31 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 47 );
+
+        /* Calculate gain adjustment factor */
+        if( psDecCtrl->Gains_Q16[ k ] != psDec->prev_gain_Q16 ) {
+            gain_adj_Q16 =  silk_DIV32_varQ( psDec->prev_gain_Q16, psDecCtrl->Gains_Q16[ k ], 16 );
+
+            /* Scale short term state */
+            for( i = 0; i < MAX_LPC_ORDER; i++ ) {
+                sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, sLPC_Q14[ i ] );
+            }
+        } else {
+            gain_adj_Q16 = (opus_int32)1 << 16;
+        }
+
+        /* Save inv_gain */
+        silk_assert( inv_gain_Q31 != 0 );
+        psDec->prev_gain_Q16 = psDecCtrl->Gains_Q16[ k ];
+
+        /* Avoid abrupt transition from voiced PLC to unvoiced normal decoding */
+        if( psDec->lossCnt && psDec->prevSignalType == TYPE_VOICED &&
+            psDec->indices.signalType != TYPE_VOICED && k < MAX_NB_SUBFR/2 ) {
+
+            silk_memset( B_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) );
+            B_Q14[ LTP_ORDER/2 ] = SILK_FIX_CONST( 0.25, 14 );
+
+            signalType = TYPE_VOICED;
+            psDecCtrl->pitchL[ k ] = psDec->lagPrev;
+        }
+
+        if( signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = psDecCtrl->pitchL[ k ];
+
+            /* Re-whitening */
+            if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) {
+                /* Rewhiten with new A coefs */
+                start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
+                silk_assert( start_idx > 0 );
+
+                if( k == 2 ) {
+                    silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) );
+                }
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ],
+                    A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order );
+
+                /* After rewhitening the LTP state is unscaled */
+                if( k == 0 ) {
+                    /* Do LTP downscaling to reduce inter-packet dependency */
+                    inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, psDecCtrl->LTP_scale_Q14 ), 2 );
+                }
+                for( i = 0; i < lag + LTP_ORDER/2; i++ ) {
+                    sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWB( inv_gain_Q31, sLTP[ psDec->ltp_mem_length - i - 1 ] );
+                }
+            } else {
+                /* Update LTP state when Gain changes */
+                if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+                    for( i = 0; i < lag + LTP_ORDER/2; i++ ) {
+                        sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ sLTP_buf_idx - i - 1 ] );
+                    }
+                }
+            }
+        }
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Set up pointer */
+            pred_lag_ptr = &sLTP_Q15[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+            for( i = 0; i < psDec->subfr_length; i++ ) {
+                /* Unrolled loop */
+                /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+                LTP_pred_Q13 = 2;
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[  0 ], B_Q14[ 0 ] );
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], B_Q14[ 1 ] );
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], B_Q14[ 2 ] );
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], B_Q14[ 3 ] );
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], B_Q14[ 4 ] );
+                pred_lag_ptr++;
+
+                /* Generate LPC excitation */
+                pres_Q14[ i ] = silk_ADD_LSHIFT32( pexc_Q14[ i ], LTP_pred_Q13, 1 );
+
+                /* Update states */
+                sLTP_Q15[ sLTP_buf_idx ] = silk_LSHIFT( pres_Q14[ i ], 1 );
+                sLTP_buf_idx++;
+            }
+        } else {
+            pres_Q14 = pexc_Q14;
+        }
+
+        for( i = 0; i < psDec->subfr_length; i++ ) {
+            /* Short-term prediction */
+            silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12_tmp[ 0 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12_tmp[ 1 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12_tmp[ 2 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12_tmp[ 3 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12_tmp[ 4 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12_tmp[ 5 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  7 ], A_Q12_tmp[ 6 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  8 ], A_Q12_tmp[ 7 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  9 ], A_Q12_tmp[ 8 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] );
+            if( psDec->LPC_order == 16 ) {
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12_tmp[ 10 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12_tmp[ 11 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12_tmp[ 12 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12_tmp[ 13 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12_tmp[ 14 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12_tmp[ 15 ] );
+            }
+
+            /* Add prediction to LPC excitation */
+            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 );
+
+            /* Scale with gain */
+            pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );
+        }
+
+        /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */
+
+        /* Update LPC filter state */
+        silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        pexc_Q14 += psDec->subfr_length;
+        pxq      += psDec->subfr_length;
+    }
+
+    /* Save LPC state */
+    silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/decode_frame.c b/src/main/jni/opus/silk/decode_frame.c
new file mode 100644
index 000000000..abc00a3d5
--- /dev/null
+++ b/src/main/jni/opus/silk/decode_frame.c
@@ -0,0 +1,128 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+#include "PLC.h"
+
+/****************/
+/* Decode frame */
+/****************/
+opus_int silk_decode_frame(
+    silk_decoder_state          *psDec,                         /* I/O  Pointer to Silk decoder state               */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int16                  pOut[],                         /* O    Pointer to output speech frame              */
+    opus_int32                  *pN,                            /* O    Pointer to size of output frame             */
+    opus_int                    lostFlag,                       /* I    0: no loss, 1 loss, 2 decode fec            */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+)
+{
+    VARDECL( silk_decoder_control, psDecCtrl );
+    opus_int         L, mv_len, ret = 0;
+    VARDECL( opus_int, pulses );
+    SAVE_STACK;
+
+    L = psDec->frame_length;
+    ALLOC( psDecCtrl, 1, silk_decoder_control );
+    ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
+                   ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
+    psDecCtrl->LTP_scale_Q14 = 0;
+
+    /* Safety checks */
+    silk_assert( L > 0 && L <= MAX_FRAME_LENGTH );
+
+    if(   lostFlag == FLAG_DECODE_NORMAL ||
+        ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
+    {
+        /*********************************************/
+        /* Decode quantization indices of side info  */
+        /*********************************************/
+        silk_decode_indices( psDec, psRangeDec, psDec->nFramesDecoded, lostFlag, condCoding );
+
+        /*********************************************/
+        /* Decode quantization indices of excitation */
+        /*********************************************/
+        silk_decode_pulses( psRangeDec, pulses, psDec->indices.signalType,
+                psDec->indices.quantOffsetType, psDec->frame_length );
+
+        /********************************************/
+        /* Decode parameters and pulse signal       */
+        /********************************************/
+        silk_decode_parameters( psDec, psDecCtrl, condCoding );
+
+        /********************************************************/
+        /* Run inverse NSQ                                      */
+        /********************************************************/
+        silk_decode_core( psDec, psDecCtrl, pOut, pulses );
+
+        /********************************************************/
+        /* Update PLC state                                     */
+        /********************************************************/
+        silk_PLC( psDec, psDecCtrl, pOut, 0 );
+
+        psDec->lossCnt = 0;
+        psDec->prevSignalType = psDec->indices.signalType;
+        silk_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 );
+
+        /* A frame has been decoded without errors */
+        psDec->first_frame_after_reset = 0;
+    } else {
+        /* Handle packet loss by extrapolation */
+        silk_PLC( psDec, psDecCtrl, pOut, 1 );
+    }
+
+    /*************************/
+    /* Update output buffer. */
+    /*************************/
+    silk_assert( psDec->ltp_mem_length >= psDec->frame_length );
+    mv_len = psDec->ltp_mem_length - psDec->frame_length;
+    silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+    silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+
+    /****************************************************************/
+    /* Ensure smooth connection of extrapolated and good frames     */
+    /****************************************************************/
+    silk_PLC_glue_frames( psDec, pOut, L );
+
+    /************************************************/
+    /* Comfort noise generation / estimation        */
+    /************************************************/
+    silk_CNG( psDec, psDecCtrl, pOut, L );
+
+    /* Update some decoder state variables */
+    psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
+
+    /* Set output frame length */
+    *pN = L;
+
+    RESTORE_STACK;
+    return ret;
+}
diff --git a/src/main/jni/opus/silk/decode_indices.c b/src/main/jni/opus/silk/decode_indices.c
new file mode 100644
index 000000000..7afe5c26c
--- /dev/null
+++ b/src/main/jni/opus/silk/decode_indices.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Decode side-information parameters from payload */
+void silk_decode_indices(
+    silk_decoder_state          *psDec,                         /* I/O  State                                       */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    FrameIndex,                     /* I    Frame number                                */
+    opus_int                    decode_LBRR,                    /* I    Flag indicating LBRR data is being decoded  */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+)
+{
+    opus_int   i, k, Ix;
+    opus_int   decode_absolute_lagIndex, delta_lagIndex;
+    opus_int16 ec_ix[ MAX_LPC_ORDER ];
+    opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
+
+    /*******************************************/
+    /* Decode signal type and quantizer offset */
+    /*******************************************/
+    if( decode_LBRR || psDec->VAD_flags[ FrameIndex ] ) {
+        Ix = ec_dec_icdf( psRangeDec, silk_type_offset_VAD_iCDF, 8 ) + 2;
+    } else {
+        Ix = ec_dec_icdf( psRangeDec, silk_type_offset_no_VAD_iCDF, 8 );
+    }
+    psDec->indices.signalType      = (opus_int8)silk_RSHIFT( Ix, 1 );
+    psDec->indices.quantOffsetType = (opus_int8)( Ix & 1 );
+
+    /****************/
+    /* Decode gains */
+    /****************/
+    /* First subframe */
+    if( condCoding == CODE_CONDITIONALLY ) {
+        /* Conditional coding */
+        psDec->indices.GainsIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 );
+    } else {
+        /* Independent coding, in two stages: MSB bits followed by 3 LSBs */
+        psDec->indices.GainsIndices[ 0 ]  = (opus_int8)silk_LSHIFT( ec_dec_icdf( psRangeDec, silk_gain_iCDF[ psDec->indices.signalType ], 8 ), 3 );
+        psDec->indices.GainsIndices[ 0 ] += (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform8_iCDF, 8 );
+    }
+
+    /* Remaining subframes */
+    for( i = 1; i < psDec->nb_subfr; i++ ) {
+        psDec->indices.GainsIndices[ i ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 );
+    }
+
+    /**********************/
+    /* Decode LSF Indices */
+    /**********************/
+    psDec->indices.NLSFIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->CB1_iCDF[ ( psDec->indices.signalType >> 1 ) * psDec->psNLSF_CB->nVectors ], 8 );
+    silk_NLSF_unpack( ec_ix, pred_Q8, psDec->psNLSF_CB, psDec->indices.NLSFIndices[ 0 ] );
+    silk_assert( psDec->psNLSF_CB->order == psDec->LPC_order );
+    for( i = 0; i < psDec->psNLSF_CB->order; i++ ) {
+        Ix = ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+        if( Ix == 0 ) {
+            Ix -= ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 );
+        } else if( Ix == 2 * NLSF_QUANT_MAX_AMPLITUDE ) {
+            Ix += ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 );
+        }
+        psDec->indices.NLSFIndices[ i+1 ] = (opus_int8)( Ix - NLSF_QUANT_MAX_AMPLITUDE );
+    }
+
+    /* Decode LSF interpolation factor */
+    if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+        psDec->indices.NLSFInterpCoef_Q2 = (opus_int8)ec_dec_icdf( psRangeDec, silk_NLSF_interpolation_factor_iCDF, 8 );
+    } else {
+        psDec->indices.NLSFInterpCoef_Q2 = 4;
+    }
+
+    if( psDec->indices.signalType == TYPE_VOICED )
+    {
+        /*********************/
+        /* Decode pitch lags */
+        /*********************/
+        /* Get lag index */
+        decode_absolute_lagIndex = 1;
+        if( condCoding == CODE_CONDITIONALLY && psDec->ec_prevSignalType == TYPE_VOICED ) {
+            /* Decode Delta index */
+            delta_lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_delta_iCDF, 8 );
+            if( delta_lagIndex > 0 ) {
+                delta_lagIndex = delta_lagIndex - 9;
+                psDec->indices.lagIndex = (opus_int16)( psDec->ec_prevLagIndex + delta_lagIndex );
+                decode_absolute_lagIndex = 0;
+            }
+        }
+        if( decode_absolute_lagIndex ) {
+            /* Absolute decoding */
+            psDec->indices.lagIndex  = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_lag_iCDF, 8 ) * silk_RSHIFT( psDec->fs_kHz, 1 );
+            psDec->indices.lagIndex += (opus_int16)ec_dec_icdf( psRangeDec, psDec->pitch_lag_low_bits_iCDF, 8 );
+        }
+        psDec->ec_prevLagIndex = psDec->indices.lagIndex;
+
+        /* Get countour index */
+        psDec->indices.contourIndex = (opus_int8)ec_dec_icdf( psRangeDec, psDec->pitch_contour_iCDF, 8 );
+
+        /********************/
+        /* Decode LTP gains */
+        /********************/
+        /* Decode PERIndex value */
+        psDec->indices.PERIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_per_index_iCDF, 8 );
+
+        for( k = 0; k < psDec->nb_subfr; k++ ) {
+            psDec->indices.LTPIndex[ k ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_gain_iCDF_ptrs[ psDec->indices.PERIndex ], 8 );
+        }
+
+        /**********************/
+        /* Decode LTP scaling */
+        /**********************/
+        if( condCoding == CODE_INDEPENDENTLY ) {
+            psDec->indices.LTP_scaleIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTPscale_iCDF, 8 );
+        } else {
+            psDec->indices.LTP_scaleIndex = 0;
+        }
+    }
+    psDec->ec_prevSignalType = psDec->indices.signalType;
+
+    /***************/
+    /* Decode seed */
+    /***************/
+    psDec->indices.Seed = (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform4_iCDF, 8 );
+}
diff --git a/src/main/jni/opus/silk/decode_parameters.c b/src/main/jni/opus/silk/decode_parameters.c
new file mode 100644
index 000000000..e345b1dce
--- /dev/null
+++ b/src/main/jni/opus/silk/decode_parameters.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Decode parameters from payload */
+void silk_decode_parameters(
+    silk_decoder_state          *psDec,                         /* I/O  State                                       */
+    silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+)
+{
+    opus_int   i, k, Ix;
+    opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], pNLSF0_Q15[ MAX_LPC_ORDER ];
+    const opus_int8 *cbk_ptr_Q7;
+
+    /* Dequant Gains */
+    silk_gains_dequant( psDecCtrl->Gains_Q16, psDec->indices.GainsIndices,
+        &psDec->LastGainIndex, condCoding == CODE_CONDITIONALLY, psDec->nb_subfr );
+
+    /****************/
+    /* Decode NLSFs */
+    /****************/
+    silk_NLSF_decode( pNLSF_Q15, psDec->indices.NLSFIndices, psDec->psNLSF_CB );
+
+    /* Convert NLSF parameters to AR prediction filter coefficients */
+    silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order );
+
+    /* If just reset, e.g., because internal Fs changed, do not allow interpolation */
+    /* improves the case of packet loss in the first frame after a switch           */
+    if( psDec->first_frame_after_reset == 1 ) {
+        psDec->indices.NLSFInterpCoef_Q2 = 4;
+    }
+
+    if( psDec->indices.NLSFInterpCoef_Q2 < 4 ) {
+        /* Calculation of the interpolated NLSF0 vector from the interpolation factor, */
+        /* the previous NLSF1, and the current NLSF1                                   */
+        for( i = 0; i < psDec->LPC_order; i++ ) {
+            pNLSF0_Q15[ i ] = psDec->prevNLSF_Q15[ i ] + silk_RSHIFT( silk_MUL( psDec->indices.NLSFInterpCoef_Q2,
+                pNLSF_Q15[ i ] - psDec->prevNLSF_Q15[ i ] ), 2 );
+        }
+
+        /* Convert NLSF parameters to AR prediction filter coefficients */
+        silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order );
+    } else {
+        /* Copy LPC coefficients for first half from second half */
+        silk_memcpy( psDecCtrl->PredCoef_Q12[ 0 ], psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) );
+    }
+
+    silk_memcpy( psDec->prevNLSF_Q15, pNLSF_Q15, psDec->LPC_order * sizeof( opus_int16 ) );
+
+    /* After a packet loss do BWE of LPC coefs */
+    if( psDec->lossCnt ) {
+        silk_bwexpander( psDecCtrl->PredCoef_Q12[ 0 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 );
+        silk_bwexpander( psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 );
+    }
+
+    if( psDec->indices.signalType == TYPE_VOICED ) {
+        /*********************/
+        /* Decode pitch lags */
+        /*********************/
+
+        /* Decode pitch values */
+        silk_decode_pitch( psDec->indices.lagIndex, psDec->indices.contourIndex, psDecCtrl->pitchL, psDec->fs_kHz, psDec->nb_subfr );
+
+        /* Decode Codebook Index */
+        cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ psDec->indices.PERIndex ]; /* set pointer to start of codebook */
+
+        for( k = 0; k < psDec->nb_subfr; k++ ) {
+            Ix = psDec->indices.LTPIndex[ k ];
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER + i ] = silk_LSHIFT( cbk_ptr_Q7[ Ix * LTP_ORDER + i ], 7 );
+            }
+        }
+
+        /**********************/
+        /* Decode LTP scaling */
+        /**********************/
+        Ix = psDec->indices.LTP_scaleIndex;
+        psDecCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ Ix ];
+    } else {
+        silk_memset( psDecCtrl->pitchL,      0,             psDec->nb_subfr * sizeof( opus_int   ) );
+        silk_memset( psDecCtrl->LTPCoef_Q14, 0, LTP_ORDER * psDec->nb_subfr * sizeof( opus_int16 ) );
+        psDec->indices.PERIndex  = 0;
+        psDecCtrl->LTP_scale_Q14 = 0;
+    }
+}
diff --git a/src/main/jni/opus/silk/decode_pitch.c b/src/main/jni/opus/silk/decode_pitch.c
new file mode 100644
index 000000000..fedbc6a52
--- /dev/null
+++ b/src/main/jni/opus/silk/decode_pitch.c
@@ -0,0 +1,77 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/***********************************************************
+* Pitch analyser function
+********************************************************** */
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+
+void silk_decode_pitch(
+    opus_int16                  lagIndex,           /* I                                                                */
+    opus_int8                   contourIndex,       /* O                                                                */
+    opus_int                    pitch_lags[],       /* O    4 pitch values                                              */
+    const opus_int              Fs_kHz,             /* I    sampling frequency (kHz)                                    */
+    const opus_int              nb_subfr            /* I    number of sub frames                                        */
+)
+{
+    opus_int   lag, k, min_lag, max_lag, cbk_size;
+    const opus_int8 *Lag_CB_ptr;
+
+    if( Fs_kHz == 8 ) {
+        if( nb_subfr == PE_MAX_NB_SUBFR ) {
+            Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+            cbk_size   = PE_NB_CBKS_STAGE2_EXT;
+        } else {
+            silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
+            Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+            cbk_size   = PE_NB_CBKS_STAGE2_10MS;
+        }
+    } else {
+        if( nb_subfr == PE_MAX_NB_SUBFR ) {
+            Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+            cbk_size   = PE_NB_CBKS_STAGE3_MAX;
+        } else {
+            silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
+            Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+            cbk_size   = PE_NB_CBKS_STAGE3_10MS;
+        }
+    }
+
+    min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz );
+    max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz );
+    lag = min_lag + lagIndex;
+
+    for( k = 0; k < nb_subfr; k++ ) {
+        pitch_lags[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, contourIndex, cbk_size );
+        pitch_lags[ k ] = silk_LIMIT( pitch_lags[ k ], min_lag, max_lag );
+    }
+}
diff --git a/src/main/jni/opus/silk/decode_pulses.c b/src/main/jni/opus/silk/decode_pulses.c
new file mode 100644
index 000000000..e8a87c2ab
--- /dev/null
+++ b/src/main/jni/opus/silk/decode_pulses.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/*********************************************/
+/* Decode quantization indices of excitation */
+/*********************************************/
+void silk_decode_pulses(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    const opus_int              signalType,                     /* I    Sigtype                                     */
+    const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
+    const opus_int              frame_length                    /* I    Frame length                                */
+)
+{
+    opus_int   i, j, k, iter, abs_q, nLS, RateLevelIndex;
+    opus_int   sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ];
+    opus_int   *pulses_ptr;
+    const opus_uint8 *cdf_ptr;
+
+    /*********************/
+    /* Decode rate level */
+    /*********************/
+    RateLevelIndex = ec_dec_icdf( psRangeDec, silk_rate_levels_iCDF[ signalType >> 1 ], 8 );
+
+    /* Calculate number of shell blocks */
+    silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH );
+    iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH );
+    if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) {
+        silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
+        iter++;
+    }
+
+    /***************************************************/
+    /* Sum-Weighted-Pulses Decoding                    */
+    /***************************************************/
+    cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ];
+    for( i = 0; i < iter; i++ ) {
+        nLshifts[ i ] = 0;
+        sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 );
+
+        /* LSB indication */
+        while( sum_pulses[ i ] == MAX_PULSES + 1 ) {
+            nLshifts[ i ]++;
+            /* When we've already got 10 LSBs, we shift the table to not allow (MAX_PULSES + 1) */
+            sum_pulses[ i ] = ec_dec_icdf( psRangeDec,
+                    silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 );
+        }
+    }
+
+    /***************************************************/
+    /* Shell decoding                                  */
+    /***************************************************/
+    for( i = 0; i < iter; i++ ) {
+        if( sum_pulses[ i ] > 0 ) {
+            silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] );
+        } else {
+            silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) );
+        }
+    }
+
+    /***************************************************/
+    /* LSB Decoding                                    */
+    /***************************************************/
+    for( i = 0; i < iter; i++ ) {
+        if( nLshifts[ i ] > 0 ) {
+            nLS = nLshifts[ i ];
+            pulses_ptr = &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ];
+            for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+                abs_q = pulses_ptr[ k ];
+                for( j = 0; j < nLS; j++ ) {
+                    abs_q = silk_LSHIFT( abs_q, 1 );
+                    abs_q += ec_dec_icdf( psRangeDec, silk_lsb_iCDF, 8 );
+                }
+                pulses_ptr[ k ] = abs_q;
+            }
+            /* Mark the number of pulses non-zero for sign decoding. */
+            sum_pulses[ i ] |= nLS << 5;
+        }
+    }
+
+    /****************************************/
+    /* Decode and add signs to pulse signal */
+    /****************************************/
+    silk_decode_signs( psRangeDec, pulses, frame_length, signalType, quantOffsetType, sum_pulses );
+}
diff --git a/src/main/jni/opus/silk/decoder_set_fs.c b/src/main/jni/opus/silk/decoder_set_fs.c
new file mode 100644
index 000000000..eef0fd25e
--- /dev/null
+++ b/src/main/jni/opus/silk/decoder_set_fs.c
@@ -0,0 +1,108 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Set decoder sampling rate */
+opus_int silk_decoder_set_fs(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state pointer                       */
+    opus_int                    fs_kHz,                         /* I    Sampling frequency (kHz)                    */
+    opus_int32                  fs_API_Hz                       /* I    API Sampling frequency (Hz)                 */
+)
+{
+    opus_int frame_length, ret = 0;
+
+    silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
+    silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 );
+
+    /* New (sub)frame length */
+    psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz );
+    frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length );
+
+    /* Initialize resampler when switching internal or external sampling frequency */
+    if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) {
+        /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */
+        ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz, 0 );
+
+        psDec->fs_API_hz = fs_API_Hz;
+    }
+
+    if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) {
+        if( fs_kHz == 8 ) {
+            if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+                psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+            } else {
+                psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+            }
+        } else {
+            if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+                psDec->pitch_contour_iCDF = silk_pitch_contour_iCDF;
+            } else {
+                psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+            }
+        }
+        if( psDec->fs_kHz != fs_kHz ) {
+            psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
+            if( fs_kHz == 8 || fs_kHz == 12 ) {
+                psDec->LPC_order = MIN_LPC_ORDER;
+                psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB;
+            } else {
+                psDec->LPC_order = MAX_LPC_ORDER;
+                psDec->psNLSF_CB = &silk_NLSF_CB_WB;
+            }
+            if( fs_kHz == 16 ) {
+                psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
+            } else if( fs_kHz == 12 ) {
+                psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
+            } else if( fs_kHz == 8 ) {
+                psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
+            } else {
+                /* unsupported sampling rate */
+                silk_assert( 0 );
+            }
+            psDec->first_frame_after_reset = 1;
+            psDec->lagPrev                 = 100;
+            psDec->LastGainIndex           = 10;
+            psDec->prevSignalType          = TYPE_NO_VOICE_ACTIVITY;
+            silk_memset( psDec->outBuf, 0, sizeof(psDec->outBuf));
+            silk_memset( psDec->sLPC_Q14_buf, 0, sizeof(psDec->sLPC_Q14_buf) );
+        }
+
+        psDec->fs_kHz       = fs_kHz;
+        psDec->frame_length = frame_length;
+    }
+
+    /* Check that settings are valid */
+    silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH );
+
+    return ret;
+}
+
diff --git a/src/main/jni/opus/silk/define.h b/src/main/jni/opus/silk/define.h
new file mode 100644
index 000000000..c47aca9f5
--- /dev/null
+++ b/src/main/jni/opus/silk/define.h
@@ -0,0 +1,235 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_DEFINE_H
+#define SILK_DEFINE_H
+
+#include "errors.h"
+#include "typedef.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Max number of encoder channels (1/2) */
+#define ENCODER_NUM_CHANNELS                    2
+/* Number of decoder channels (1/2) */
+#define DECODER_NUM_CHANNELS                    2
+
+#define MAX_FRAMES_PER_PACKET                   3
+
+/* Limits on bitrate */
+#define MIN_TARGET_RATE_BPS                     5000
+#define MAX_TARGET_RATE_BPS                     80000
+#define TARGET_RATE_TAB_SZ                      8
+
+/* LBRR thresholds */
+#define LBRR_NB_MIN_RATE_BPS                    12000
+#define LBRR_MB_MIN_RATE_BPS                    14000
+#define LBRR_WB_MIN_RATE_BPS                    16000
+
+/* DTX settings */
+#define NB_SPEECH_FRAMES_BEFORE_DTX             10      /* eq 200 ms */
+#define MAX_CONSECUTIVE_DTX                     20      /* eq 400 ms */
+
+/* Maximum sampling frequency */
+#define MAX_FS_KHZ                              16
+#define MAX_API_FS_KHZ                          48
+
+/* Signal types */
+#define TYPE_NO_VOICE_ACTIVITY                  0
+#define TYPE_UNVOICED                           1
+#define TYPE_VOICED                             2
+
+/* Conditional coding types */
+#define CODE_INDEPENDENTLY                      0
+#define CODE_INDEPENDENTLY_NO_LTP_SCALING       1
+#define CODE_CONDITIONALLY                      2
+
+/* Settings for stereo processing */
+#define STEREO_QUANT_TAB_SIZE                   16
+#define STEREO_QUANT_SUB_STEPS                  5
+#define STEREO_INTERP_LEN_MS                    8       /* must be even */
+#define STEREO_RATIO_SMOOTH_COEF                0.01    /* smoothing coef for signal norms and stereo width */
+
+/* Range of pitch lag estimates */
+#define PITCH_EST_MIN_LAG_MS                    2       /* 2 ms -> 500 Hz */
+#define PITCH_EST_MAX_LAG_MS                    18      /* 18 ms -> 56 Hz */
+
+/* Maximum number of subframes */
+#define MAX_NB_SUBFR                            4
+
+/* Number of samples per frame */
+#define LTP_MEM_LENGTH_MS                       20
+#define SUB_FRAME_LENGTH_MS                     5
+#define MAX_SUB_FRAME_LENGTH                    ( SUB_FRAME_LENGTH_MS * MAX_FS_KHZ )
+#define MAX_FRAME_LENGTH_MS                     ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR )
+#define MAX_FRAME_LENGTH                        ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ )
+
+/* Milliseconds of lookahead for pitch analysis */
+#define LA_PITCH_MS                             2
+#define LA_PITCH_MAX                            ( LA_PITCH_MS * MAX_FS_KHZ )
+
+/* Order of LPC used in find pitch */
+#define MAX_FIND_PITCH_LPC_ORDER                16
+
+/* Length of LPC window used in find pitch */
+#define FIND_PITCH_LPC_WIN_MS                   ( 20 + (LA_PITCH_MS << 1) )
+#define FIND_PITCH_LPC_WIN_MS_2_SF              ( 10 + (LA_PITCH_MS << 1) )
+#define FIND_PITCH_LPC_WIN_MAX                  ( FIND_PITCH_LPC_WIN_MS * MAX_FS_KHZ )
+
+/* Milliseconds of lookahead for noise shape analysis */
+#define LA_SHAPE_MS                             5
+#define LA_SHAPE_MAX                            ( LA_SHAPE_MS * MAX_FS_KHZ )
+
+/* Maximum length of LPC window used in noise shape analysis */
+#define SHAPE_LPC_WIN_MAX                       ( 15 * MAX_FS_KHZ )
+
+/* dB level of lowest gain quantization level */
+#define MIN_QGAIN_DB                            2
+/* dB level of highest gain quantization level */
+#define MAX_QGAIN_DB                            88
+/* Number of gain quantization levels */
+#define N_LEVELS_QGAIN                          64
+/* Max increase in gain quantization index */
+#define MAX_DELTA_GAIN_QUANT                    36
+/* Max decrease in gain quantization index */
+#define MIN_DELTA_GAIN_QUANT                    -4
+
+/* Quantization offsets (multiples of 4) */
+#define OFFSET_VL_Q10                           32
+#define OFFSET_VH_Q10                           100
+#define OFFSET_UVL_Q10                          100
+#define OFFSET_UVH_Q10                          240
+
+#define QUANT_LEVEL_ADJUST_Q10                  80
+
+/* Maximum numbers of iterations used to stabilize an LPC vector */
+#define MAX_LPC_STABILIZE_ITERATIONS            16
+#define MAX_PREDICTION_POWER_GAIN               1e4f
+#define MAX_PREDICTION_POWER_GAIN_AFTER_RESET   1e2f
+
+#define MAX_LPC_ORDER                           16
+#define MIN_LPC_ORDER                           10
+
+/* Find Pred Coef defines */
+#define LTP_ORDER                               5
+
+/* LTP quantization settings */
+#define NB_LTP_CBKS                             3
+
+/* Flag to use harmonic noise shaping */
+#define USE_HARM_SHAPING                        1
+
+/* Max LPC order of noise shaping filters */
+#define MAX_SHAPE_LPC_ORDER                     16
+
+#define HARM_SHAPE_FIR_TAPS                     3
+
+/* Maximum number of delayed decision states */
+#define MAX_DEL_DEC_STATES                      4
+
+#define LTP_BUF_LENGTH                          512
+#define LTP_MASK                                ( LTP_BUF_LENGTH - 1 )
+
+#define DECISION_DELAY                          32
+#define DECISION_DELAY_MASK                     ( DECISION_DELAY - 1 )
+
+/* Number of subframes for excitation entropy coding */
+#define SHELL_CODEC_FRAME_LENGTH                16
+#define LOG2_SHELL_CODEC_FRAME_LENGTH           4
+#define MAX_NB_SHELL_BLOCKS                     ( MAX_FRAME_LENGTH / SHELL_CODEC_FRAME_LENGTH )
+
+/* Number of rate levels, for entropy coding of excitation */
+#define N_RATE_LEVELS                           10
+
+/* Maximum sum of pulses per shell coding frame */
+#define MAX_PULSES                              16
+
+#define MAX_MATRIX_SIZE                         MAX_LPC_ORDER /* Max of LPC Order and LTP order */
+
+#if( MAX_LPC_ORDER > DECISION_DELAY )
+# define NSQ_LPC_BUF_LENGTH                     MAX_LPC_ORDER
+#else
+# define NSQ_LPC_BUF_LENGTH                     DECISION_DELAY
+#endif
+
+/***************************/
+/* Voice activity detector */
+/***************************/
+#define VAD_N_BANDS                             4
+
+#define VAD_INTERNAL_SUBFRAMES_LOG2             2
+#define VAD_INTERNAL_SUBFRAMES                  ( 1 << VAD_INTERNAL_SUBFRAMES_LOG2 )
+
+#define VAD_NOISE_LEVEL_SMOOTH_COEF_Q16         1024    /* Must be <  4096 */
+#define VAD_NOISE_LEVELS_BIAS                   50
+
+/* Sigmoid settings */
+#define VAD_NEGATIVE_OFFSET_Q5                  128     /* sigmoid is 0 at -128 */
+#define VAD_SNR_FACTOR_Q16                      45000
+
+/* smoothing for SNR measurement */
+#define VAD_SNR_SMOOTH_COEF_Q18                 4096
+
+/* Size of the piecewise linear cosine approximation table for the LSFs */
+#define LSF_COS_TAB_SZ_FIX                      128
+
+/******************/
+/* NLSF quantizer */
+/******************/
+#define NLSF_W_Q                                2
+#define NLSF_VQ_MAX_VECTORS                     32
+#define NLSF_VQ_MAX_SURVIVORS                   32
+#define NLSF_QUANT_MAX_AMPLITUDE                4
+#define NLSF_QUANT_MAX_AMPLITUDE_EXT            10
+#define NLSF_QUANT_LEVEL_ADJ                    0.1
+#define NLSF_QUANT_DEL_DEC_STATES_LOG2          2
+#define NLSF_QUANT_DEL_DEC_STATES               ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 )
+
+/* Transition filtering for mode switching */
+#define TRANSITION_TIME_MS                      5120    /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/
+#define TRANSITION_NB                           3       /* Hardcoded in tables */
+#define TRANSITION_NA                           2       /* Hardcoded in tables */
+#define TRANSITION_INT_NUM                      5       /* Hardcoded in tables */
+#define TRANSITION_FRAMES                       ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS )
+#define TRANSITION_INT_STEPS                    ( TRANSITION_FRAMES  / ( TRANSITION_INT_NUM - 1 ) )
+
+/* BWE factors to apply after packet loss */
+#define BWE_AFTER_LOSS_Q16                      63570
+
+/* Defines for CN generation */
+#define CNG_BUF_MASK_MAX                        255     /* 2^floor(log2(MAX_FRAME_LENGTH))-1    */
+#define CNG_GAIN_SMTH_Q16                       4634    /* 0.25^(1/4)                           */
+#define CNG_NLSF_SMTH_Q16                       16348   /* 0.25                                 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/enc_API.c b/src/main/jni/opus/silk/enc_API.c
new file mode 100644
index 000000000..43739efc2
--- /dev/null
+++ b/src/main/jni/opus/silk/enc_API.c
@@ -0,0 +1,556 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "define.h"
+#include "API.h"
+#include "control.h"
+#include "typedef.h"
+#include "stack_alloc.h"
+#include "structs.h"
+#include "tuning_parameters.h"
+#ifdef FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+
+/***************************************/
+/* Read control structure from encoder */
+/***************************************/
+static opus_int silk_QueryEncoder(                      /* O    Returns error code                              */
+    const void                      *encState,          /* I    State                                           */
+    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
+);
+
+/****************************************/
+/* Encoder functions                    */
+/****************************************/
+
+opus_int silk_Get_Encoder_Size(                         /* O    Returns error code                              */
+    opus_int                        *encSizeBytes       /* O    Number of bytes in SILK encoder state           */
+)
+{
+    opus_int ret = SILK_NO_ERROR;
+
+    *encSizeBytes = sizeof( silk_encoder );
+
+    return ret;
+}
+
+/*************************/
+/* Init or Reset encoder */
+/*************************/
+opus_int silk_InitEncoder(                              /* O    Returns error code                              */
+    void                            *encState,          /* I/O  State                                           */
+    int                              arch,              /* I    Run-time architecture                           */
+    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
+)
+{
+    silk_encoder *psEnc;
+    opus_int n, ret = SILK_NO_ERROR;
+
+    psEnc = (silk_encoder *)encState;
+
+    /* Reset encoder */
+    silk_memset( psEnc, 0, sizeof( silk_encoder ) );
+    for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
+        if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
+            silk_assert( 0 );
+        }
+    }
+
+    psEnc->nChannelsAPI = 1;
+    psEnc->nChannelsInternal = 1;
+
+    /* Read control structure */
+    if( ret += silk_QueryEncoder( encState, encStatus ) ) {
+        silk_assert( 0 );
+    }
+
+    return ret;
+}
+
+/***************************************/
+/* Read control structure from encoder */
+/***************************************/
+static opus_int silk_QueryEncoder(                      /* O    Returns error code                              */
+    const void                      *encState,          /* I    State                                           */
+    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
+)
+{
+    opus_int ret = SILK_NO_ERROR;
+    silk_encoder_state_Fxx *state_Fxx;
+    silk_encoder *psEnc = (silk_encoder *)encState;
+
+    state_Fxx = psEnc->state_Fxx;
+
+    encStatus->nChannelsAPI              = psEnc->nChannelsAPI;
+    encStatus->nChannelsInternal         = psEnc->nChannelsInternal;
+    encStatus->API_sampleRate            = state_Fxx[ 0 ].sCmn.API_fs_Hz;
+    encStatus->maxInternalSampleRate     = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz;
+    encStatus->minInternalSampleRate     = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz;
+    encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz;
+    encStatus->payloadSize_ms            = state_Fxx[ 0 ].sCmn.PacketSize_ms;
+    encStatus->bitRate                   = state_Fxx[ 0 ].sCmn.TargetRate_bps;
+    encStatus->packetLossPercentage      = state_Fxx[ 0 ].sCmn.PacketLoss_perc;
+    encStatus->complexity                = state_Fxx[ 0 ].sCmn.Complexity;
+    encStatus->useInBandFEC              = state_Fxx[ 0 ].sCmn.useInBandFEC;
+    encStatus->useDTX                    = state_Fxx[ 0 ].sCmn.useDTX;
+    encStatus->useCBR                    = state_Fxx[ 0 ].sCmn.useCBR;
+    encStatus->internalSampleRate        = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
+    encStatus->allowBandwidthSwitch      = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch;
+    encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0;
+
+    return ret;
+}
+
+
+/**************************/
+/* Encode frame with Silk */
+/**************************/
+/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what                     */
+/* encControl->payloadSize_ms is set to                                                                         */
+opus_int silk_Encode(                                   /* O    Returns error code                              */
+    void                            *encState,          /* I/O  State                                           */
+    silk_EncControlStruct           *encControl,        /* I    Control status                                  */
+    const opus_int16                *samplesIn,         /* I    Speech sample input vector                      */
+    opus_int                        nSamplesIn,         /* I    Number of samples in input vector               */
+    ec_enc                          *psRangeEnc,        /* I/O  Compressor data structure                       */
+    opus_int32                      *nBytesOut,         /* I/O  Number of bytes in payload (input: Max bytes)   */
+    const opus_int                  prefillFlag         /* I    Flag to indicate prefilling buffers no coding   */
+)
+{
+    opus_int   n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
+    opus_int   nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
+    opus_int   nSamplesFromInput = 0, nSamplesFromInputMax;
+    opus_int   speech_act_thr_for_switch_Q8;
+    opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
+    silk_encoder *psEnc = ( silk_encoder * )encState;
+    VARDECL( opus_int16, buf );
+    opus_int transition, curr_block, tot_blocks;
+    SAVE_STACK;
+
+    if (encControl->reducedDependency)
+    {
+       psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
+       psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
+    }
+    psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
+
+    /* Check values in encoder control structure */
+    if( ( ret = check_control_input( encControl ) != 0 ) ) {
+        silk_assert( 0 );
+        RESTORE_STACK;
+        return ret;
+    }
+
+    encControl->switchReady = 0;
+
+    if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
+        /* Mono -> Stereo transition: init state of second channel and stereo state */
+        ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
+        silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
+        silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
+        psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
+        psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
+        psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
+        psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
+        psEnc->sStereo.width_prev_Q14 = 0;
+        psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
+        if( psEnc->nChannelsAPI == 2 ) {
+            silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) );
+            silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State,     &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State,     sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) );
+        }
+    }
+
+    transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal);
+
+    psEnc->nChannelsAPI = encControl->nChannelsAPI;
+    psEnc->nChannelsInternal = encControl->nChannelsInternal;
+
+    nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
+    tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
+    curr_block = 0;
+    if( prefillFlag ) {
+        /* Only accept input length of 10 ms */
+        if( nBlocksOf10ms != 1 ) {
+            silk_assert( 0 );
+            RESTORE_STACK;
+            return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+        }
+        /* Reset Encoder */
+        for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+            ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
+            silk_assert( !ret );
+        }
+        tmp_payloadSize_ms = encControl->payloadSize_ms;
+        encControl->payloadSize_ms = 10;
+        tmp_complexity = encControl->complexity;
+        encControl->complexity = 0;
+        for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+            psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+            psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
+        }
+    } else {
+        /* Only accept input lengths that are a multiple of 10 ms */
+        if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
+            silk_assert( 0 );
+            RESTORE_STACK;
+            return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+        }
+        /* Make sure no more than one packet can be produced */
+        if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
+            silk_assert( 0 );
+            RESTORE_STACK;
+            return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+        }
+    }
+
+    TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 );
+    for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+        /* Force the side channel to the same rate as the mid */
+        opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
+        if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
+            silk_assert( 0 );
+            RESTORE_STACK;
+            return ret;
+        }
+        if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
+            for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
+                psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
+            }
+        }
+        psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
+    }
+    silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
+
+    /* Input buffering/resampling and encoding */
+    nSamplesToBufferMax =
+        10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
+    nSamplesFromInputMax =
+        silk_DIV32_16( nSamplesToBufferMax *
+                           psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
+                       psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
+    ALLOC( buf, nSamplesFromInputMax, opus_int16 );
+    while( 1 ) {
+        nSamplesToBuffer  = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
+        nSamplesToBuffer  = silk_min( nSamplesToBuffer, nSamplesToBufferMax );
+        nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
+        /* Resample and write to buffer */
+        if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
+            opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
+            for( n = 0; n < nSamplesFromInput; n++ ) {
+                buf[ n ] = samplesIn[ 2 * n ];
+            }
+            /* Making sure to start both resamplers from the same state when switching from mono to stereo */
+            if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
+               silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
+            }
+
+            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+
+            nSamplesToBuffer  = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
+            nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
+            for( n = 0; n < nSamplesFromInput; n++ ) {
+                buf[ n ] = samplesIn[ 2 * n + 1 ];
+            }
+            ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
+                &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+
+            psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
+        } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
+            /* Combine left and right channels before resampling */
+            for( n = 0; n < nSamplesFromInput; n++ ) {
+                sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
+                buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum,  1 );
+            }
+            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            /* On the first mono frame, average the results for the two resampler states  */
+            if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) {
+               ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
+                   &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+               for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) {
+                  psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] =
+                        silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
+                                  + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
+               }
+            }
+            psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+        } else {
+            silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
+            silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
+            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+        }
+
+        samplesIn  += nSamplesFromInput * encControl->nChannelsAPI;
+        nSamplesIn -= nSamplesFromInput;
+
+        /* Default */
+        psEnc->allowBandwidthSwitch = 0;
+
+        /* Silk encoder */
+        if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) {
+            /* Enough data in input buffer, so encode */
+            silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+            silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
+
+            /* Deal with LBRR data */
+            if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) {
+                /* Create space at start of payload for VAD and FEC flags */
+                opus_uint8 iCDF[ 2 ] = { 0, 0 };
+                iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
+                ec_enc_icdf( psRangeEnc, 0, iCDF, 8 );
+
+                /* Encode any LBRR data from previous packet */
+                /* Encode LBRR flags */
+                for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                    LBRR_symbol = 0;
+                    for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
+                        LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i );
+                    }
+                    psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0;
+                    if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) {
+                        ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 );
+                    }
+                }
+
+                /* Code LBRR indices and excitation signals */
+                for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
+                    for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                        if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) {
+                            opus_int condCoding;
+
+                            if( encControl->nChannelsInternal == 2 && n == 0 ) {
+                                silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] );
+                                /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */
+                                if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) {
+                                    silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] );
+                                }
+                            }
+                            /* Use conditional coding if previous frame available */
+                            if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) {
+                                condCoding = CODE_CONDITIONALLY;
+                            } else {
+                                condCoding = CODE_INDEPENDENTLY;
+                            }
+                            silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding );
+                            silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType,
+                                psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length );
+                        }
+                    }
+                }
+
+                /* Reset LBRR flags */
+                for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                    silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) );
+                }
+            }
+
+            silk_HP_variable_cutoff( psEnc->state_Fxx );
+
+            /* Total target bits for packet */
+            nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
+            /* Subtract half of the bits already used */
+            if( !prefillFlag ) {
+                nBits -= ec_tell( psRangeEnc ) >> 1;
+            }
+            /* Divide by number of uncoded frames left in packet */
+            nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket - psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded );
+            /* Convert to bits/second */
+            if( encControl->payloadSize_ms == 10 ) {
+                TargetRate_bps = silk_SMULBB( nBits, 100 );
+            } else {
+                TargetRate_bps = silk_SMULBB( nBits, 50 );
+            }
+            /* Subtract fraction of bits in excess of target in previous packets */
+            TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
+            /* Never exceed input bitrate */
+            TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 );
+
+            /* Convert Left/Right to Mid/Side */
+            if( encControl->nChannelsInternal == 2 ) {
+                silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
+                    psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
+                    MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
+                    psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+                if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+                    /* Reset side channel encoder memory for first frame with side coding */
+                    if( psEnc->prev_decode_only_middle == 1 ) {
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sShape,               0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt,             0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ,            0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
+                        silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
+                        psEnc->state_Fxx[ 1 ].sCmn.prevLag                 = 100;
+                        psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev            = 100;
+                        psEnc->state_Fxx[ 1 ].sShape.LastGainIndex         = 10;
+                        psEnc->state_Fxx[ 1 ].sCmn.prevSignalType          = TYPE_NO_VOICE_ACTIVITY;
+                        psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16      = 65536;
+                        psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
+                    }
+                    silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
+                } else {
+                    psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
+                }
+                if( !prefillFlag ) {
+                    silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+                    if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+                        silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+                    }
+                }
+            } else {
+                /* Buffering */
+                silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
+                silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
+            }
+            silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
+
+            /* Encode */
+            for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                opus_int maxBits, useCBR;
+
+                /* Handling rate constraints */
+                maxBits = encControl->maxBits;
+                if( tot_blocks == 2 && curr_block == 0 ) {
+                    maxBits = maxBits * 3 / 5;
+                } else if( tot_blocks == 3 ) {
+                    if( curr_block == 0 ) {
+                        maxBits = maxBits * 2 / 5;
+                    } else if( curr_block == 1 ) {
+                        maxBits = maxBits * 3 / 4;
+                    }
+                }
+                useCBR = encControl->useCBR && curr_block == tot_blocks - 1;
+
+                if( encControl->nChannelsInternal == 1 ) {
+                    channelRate_bps = TargetRate_bps;
+                } else {
+                    channelRate_bps = MStargetRates_bps[ n ];
+                    if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) {
+                        useCBR = 0;
+                        /* Give mid up to 1/2 of the max bits for that frame */
+                        maxBits -= encControl->maxBits / ( tot_blocks * 2 );
+                    }
+                }
+
+                if( channelRate_bps > 0 ) {
+                    opus_int condCoding;
+
+                    silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps );
+
+                    /* Use independent coding if no previous frame available */
+                    if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) {
+                        condCoding = CODE_INDEPENDENTLY;
+                    } else if( n > 0 && psEnc->prev_decode_only_middle ) {
+                        /* If we skipped a side frame in this packet, we don't
+                           need LTP scaling; the LTP state is well-defined. */
+                        condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
+                    } else {
+                        condCoding = CODE_CONDITIONALLY;
+                    }
+                    if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) {
+                        silk_assert( 0 );
+                    }
+                }
+                psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+                psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
+                psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
+            }
+            psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
+
+            /* Insert VAD and FEC flags at beginning of bitstream */
+            if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) {
+                flags = 0;
+                for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                    for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
+                        flags  = silk_LSHIFT( flags, 1 );
+                        flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ];
+                    }
+                    flags  = silk_LSHIFT( flags, 1 );
+                    flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
+                }
+                if( !prefillFlag ) {
+                    ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
+                }
+
+                /* Return zero bytes if all channels DTXed */
+                if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
+                    *nBytesOut = 0;
+                }
+
+                psEnc->nBitsExceeded += *nBytesOut * 8;
+                psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
+                psEnc->nBitsExceeded  = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 );
+
+                /* Update flag indicating if bandwidth switching is allowed */
+                speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ),
+                    SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms );
+                if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) {
+                    psEnc->allowBandwidthSwitch = 1;
+                    psEnc->timeSinceSwitchAllowed_ms = 0;
+                } else {
+                    psEnc->allowBandwidthSwitch = 0;
+                    psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms;
+                }
+            }
+
+            if( nSamplesIn == 0 ) {
+                break;
+            }
+        } else {
+            break;
+        }
+        curr_block++;
+    }
+
+    psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
+
+    encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
+    encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
+    encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
+    encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14;
+    if( prefillFlag ) {
+        encControl->payloadSize_ms = tmp_payloadSize_ms;
+        encControl->complexity = tmp_complexity;
+        for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+            psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+            psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0;
+        }
+    }
+
+    RESTORE_STACK;
+    return ret;
+}
+
diff --git a/src/main/jni/opus/silk/encode_indices.c b/src/main/jni/opus/silk/encode_indices.c
new file mode 100644
index 000000000..666c8c0b1
--- /dev/null
+++ b/src/main/jni/opus/silk/encode_indices.c
@@ -0,0 +1,181 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Encode side-information parameters to payload */
+void silk_encode_indices(
+    silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int                    FrameIndex,                     /* I    Frame number                                */
+    opus_int                    encode_LBRR,                    /* I    Flag indicating LBRR data is being encoded  */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+)
+{
+    opus_int   i, k, typeOffset;
+    opus_int   encode_absolute_lagIndex, delta_lagIndex;
+    opus_int16 ec_ix[ MAX_LPC_ORDER ];
+    opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
+    const SideInfoIndices *psIndices;
+
+    if( encode_LBRR ) {
+         psIndices = &psEncC->indices_LBRR[ FrameIndex ];
+    } else {
+         psIndices = &psEncC->indices;
+    }
+
+    /*******************************************/
+    /* Encode signal type and quantizer offset */
+    /*******************************************/
+    typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType;
+    silk_assert( typeOffset >= 0 && typeOffset < 6 );
+    silk_assert( encode_LBRR == 0 || typeOffset >= 2 );
+    if( encode_LBRR || typeOffset >= 2 ) {
+        ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 );
+    } else {
+        ec_enc_icdf( psRangeEnc, typeOffset, silk_type_offset_no_VAD_iCDF, 8 );
+    }
+
+    /****************/
+    /* Encode gains */
+    /****************/
+    /* first subframe */
+    if( condCoding == CODE_CONDITIONALLY ) {
+        /* conditional coding */
+        silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 );
+        ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ], silk_delta_gain_iCDF, 8 );
+    } else {
+        /* independent coding, in two stages: MSB bits followed by 3 LSBs */
+        silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < N_LEVELS_QGAIN );
+        ec_enc_icdf( psRangeEnc, silk_RSHIFT( psIndices->GainsIndices[ 0 ], 3 ), silk_gain_iCDF[ psIndices->signalType ], 8 );
+        ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ] & 7, silk_uniform8_iCDF, 8 );
+    }
+
+    /* remaining subframes */
+    for( i = 1; i < psEncC->nb_subfr; i++ ) {
+        silk_assert( psIndices->GainsIndices[ i ] >= 0 && psIndices->GainsIndices[ i ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 );
+        ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ i ], silk_delta_gain_iCDF, 8 );
+    }
+
+    /****************/
+    /* Encode NLSFs */
+    /****************/
+    ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 );
+    silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] );
+    silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder );
+    for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) {
+        if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) {
+            ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+            ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 );
+        } else if( psIndices->NLSFIndices[ i+1 ] <= -NLSF_QUANT_MAX_AMPLITUDE ) {
+            ec_enc_icdf( psRangeEnc, 0, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+            ec_enc_icdf( psRangeEnc, -psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 );
+        } else {
+            ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] + NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+        }
+    }
+
+    /* Encode NLSF interpolation factor */
+    if( psEncC->nb_subfr == MAX_NB_SUBFR ) {
+        silk_assert( psIndices->NLSFInterpCoef_Q2 >= 0 && psIndices->NLSFInterpCoef_Q2 < 5 );
+        ec_enc_icdf( psRangeEnc, psIndices->NLSFInterpCoef_Q2, silk_NLSF_interpolation_factor_iCDF, 8 );
+    }
+
+    if( psIndices->signalType == TYPE_VOICED )
+    {
+        /*********************/
+        /* Encode pitch lags */
+        /*********************/
+        /* lag index */
+        encode_absolute_lagIndex = 1;
+        if( condCoding == CODE_CONDITIONALLY && psEncC->ec_prevSignalType == TYPE_VOICED ) {
+            /* Delta Encoding */
+            delta_lagIndex = psIndices->lagIndex - psEncC->ec_prevLagIndex;
+            if( delta_lagIndex < -8 || delta_lagIndex > 11 ) {
+                delta_lagIndex = 0;
+            } else {
+                delta_lagIndex = delta_lagIndex + 9;
+                encode_absolute_lagIndex = 0; /* Only use delta */
+            }
+            silk_assert( delta_lagIndex >= 0 && delta_lagIndex < 21 );
+            ec_enc_icdf( psRangeEnc, delta_lagIndex, silk_pitch_delta_iCDF, 8 );
+        }
+        if( encode_absolute_lagIndex ) {
+            /* Absolute encoding */
+            opus_int32 pitch_high_bits, pitch_low_bits;
+            pitch_high_bits = silk_DIV32_16( psIndices->lagIndex, silk_RSHIFT( psEncC->fs_kHz, 1 ) );
+            pitch_low_bits = psIndices->lagIndex - silk_SMULBB( pitch_high_bits, silk_RSHIFT( psEncC->fs_kHz, 1 ) );
+            silk_assert( pitch_low_bits < psEncC->fs_kHz / 2 );
+            silk_assert( pitch_high_bits < 32 );
+            ec_enc_icdf( psRangeEnc, pitch_high_bits, silk_pitch_lag_iCDF, 8 );
+            ec_enc_icdf( psRangeEnc, pitch_low_bits, psEncC->pitch_lag_low_bits_iCDF, 8 );
+        }
+        psEncC->ec_prevLagIndex = psIndices->lagIndex;
+
+        /* Countour index */
+        silk_assert(   psIndices->contourIndex  >= 0 );
+        silk_assert( ( psIndices->contourIndex < 34 && psEncC->fs_kHz  > 8 && psEncC->nb_subfr == 4 ) ||
+                    ( psIndices->contourIndex < 11 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 4 ) ||
+                    ( psIndices->contourIndex < 12 && psEncC->fs_kHz  > 8 && psEncC->nb_subfr == 2 ) ||
+                    ( psIndices->contourIndex <  3 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 2 ) );
+        ec_enc_icdf( psRangeEnc, psIndices->contourIndex, psEncC->pitch_contour_iCDF, 8 );
+
+        /********************/
+        /* Encode LTP gains */
+        /********************/
+        /* PERIndex value */
+        silk_assert( psIndices->PERIndex >= 0 && psIndices->PERIndex < 3 );
+        ec_enc_icdf( psRangeEnc, psIndices->PERIndex, silk_LTP_per_index_iCDF, 8 );
+
+        /* Codebook Indices */
+        for( k = 0; k < psEncC->nb_subfr; k++ ) {
+            silk_assert( psIndices->LTPIndex[ k ] >= 0 && psIndices->LTPIndex[ k ] < ( 8 << psIndices->PERIndex ) );
+            ec_enc_icdf( psRangeEnc, psIndices->LTPIndex[ k ], silk_LTP_gain_iCDF_ptrs[ psIndices->PERIndex ], 8 );
+        }
+
+        /**********************/
+        /* Encode LTP scaling */
+        /**********************/
+        if( condCoding == CODE_INDEPENDENTLY ) {
+            silk_assert( psIndices->LTP_scaleIndex >= 0 && psIndices->LTP_scaleIndex < 3 );
+            ec_enc_icdf( psRangeEnc, psIndices->LTP_scaleIndex, silk_LTPscale_iCDF, 8 );
+        }
+        silk_assert( !condCoding || psIndices->LTP_scaleIndex == 0 );
+    }
+
+    psEncC->ec_prevSignalType = psIndices->signalType;
+
+    /***************/
+    /* Encode seed */
+    /***************/
+    silk_assert( psIndices->Seed >= 0 && psIndices->Seed < 4 );
+    ec_enc_icdf( psRangeEnc, psIndices->Seed, silk_uniform4_iCDF, 8 );
+}
diff --git a/src/main/jni/opus/silk/encode_pulses.c b/src/main/jni/opus/silk/encode_pulses.c
new file mode 100644
index 000000000..a4501438d
--- /dev/null
+++ b/src/main/jni/opus/silk/encode_pulses.c
@@ -0,0 +1,206 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+/*********************************************/
+/* Encode quantization indices of excitation */
+/*********************************************/
+
+static OPUS_INLINE opus_int combine_and_check(    /* return ok                           */
+    opus_int         *pulses_comb,           /* O                                   */
+    const opus_int   *pulses_in,             /* I                                   */
+    opus_int         max_pulses,             /* I    max value for sum of pulses    */
+    opus_int         len                     /* I    number of output values        */
+)
+{
+    opus_int k, sum;
+
+    for( k = 0; k < len; k++ ) {
+        sum = pulses_in[ 2 * k ] + pulses_in[ 2 * k + 1 ];
+        if( sum > max_pulses ) {
+            return 1;
+        }
+        pulses_comb[ k ] = sum;
+    }
+
+    return 0;
+}
+
+/* Encode quantization indices of excitation */
+void silk_encode_pulses(
+    ec_enc                      *psRangeEnc,                    /* I/O  compressor data structure                   */
+    const opus_int              signalType,                     /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
+    opus_int8                   pulses[],                       /* I    quantization indices                        */
+    const opus_int              frame_length                    /* I    Frame length                                */
+)
+{
+    opus_int   i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0;
+    opus_int32 abs_q, minSumBits_Q5, sumBits_Q5;
+    VARDECL( opus_int, abs_pulses );
+    VARDECL( opus_int, sum_pulses );
+    VARDECL( opus_int, nRshifts );
+    opus_int   pulses_comb[ 8 ];
+    opus_int   *abs_pulses_ptr;
+    const opus_int8 *pulses_ptr;
+    const opus_uint8 *cdf_ptr;
+    const opus_uint8 *nBits_ptr;
+    SAVE_STACK;
+
+    silk_memset( pulses_comb, 0, 8 * sizeof( opus_int ) ); /* Fixing Valgrind reported problem*/
+
+    /****************************/
+    /* Prepare for shell coding */
+    /****************************/
+    /* Calculate number of shell blocks */
+    silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH );
+    iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH );
+    if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) {
+        silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
+        iter++;
+        silk_memset( &pulses[ frame_length ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof(opus_int8));
+    }
+
+    /* Take the absolute value of the pulses */
+    ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int );
+    silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) );
+    for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) {
+        abs_pulses[i+0] = ( opus_int )silk_abs( pulses[ i + 0 ] );
+        abs_pulses[i+1] = ( opus_int )silk_abs( pulses[ i + 1 ] );
+        abs_pulses[i+2] = ( opus_int )silk_abs( pulses[ i + 2 ] );
+        abs_pulses[i+3] = ( opus_int )silk_abs( pulses[ i + 3 ] );
+    }
+
+    /* Calc sum pulses per shell code frame */
+    ALLOC( sum_pulses, iter, opus_int );
+    ALLOC( nRshifts, iter, opus_int );
+    abs_pulses_ptr = abs_pulses;
+    for( i = 0; i < iter; i++ ) {
+        nRshifts[ i ] = 0;
+
+        while( 1 ) {
+            /* 1+1 -> 2 */
+            scale_down = combine_and_check( pulses_comb, abs_pulses_ptr, silk_max_pulses_table[ 0 ], 8 );
+            /* 2+2 -> 4 */
+            scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 1 ], 4 );
+            /* 4+4 -> 8 */
+            scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 2 ], 2 );
+            /* 8+8 -> 16 */
+            scale_down += combine_and_check( &sum_pulses[ i ], pulses_comb, silk_max_pulses_table[ 3 ], 1 );
+
+            if( scale_down ) {
+                /* We need to downscale the quantization signal */
+                nRshifts[ i ]++;
+                for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+                    abs_pulses_ptr[ k ] = silk_RSHIFT( abs_pulses_ptr[ k ], 1 );
+                }
+            } else {
+                /* Jump out of while(1) loop and go to next shell coding frame */
+                break;
+            }
+        }
+        abs_pulses_ptr += SHELL_CODEC_FRAME_LENGTH;
+    }
+
+    /**************/
+    /* Rate level */
+    /**************/
+    /* find rate level that leads to fewest bits for coding of pulses per block info */
+    minSumBits_Q5 = silk_int32_MAX;
+    for( k = 0; k < N_RATE_LEVELS - 1; k++ ) {
+        nBits_ptr  = silk_pulses_per_block_BITS_Q5[ k ];
+        sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ];
+        for( i = 0; i < iter; i++ ) {
+            if( nRshifts[ i ] > 0 ) {
+                sumBits_Q5 += nBits_ptr[ MAX_PULSES + 1 ];
+            } else {
+                sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ];
+            }
+        }
+        if( sumBits_Q5 < minSumBits_Q5 ) {
+            minSumBits_Q5 = sumBits_Q5;
+            RateLevelIndex = k;
+        }
+    }
+    ec_enc_icdf( psRangeEnc, RateLevelIndex, silk_rate_levels_iCDF[ signalType >> 1 ], 8 );
+
+    /***************************************************/
+    /* Sum-Weighted-Pulses Encoding                    */
+    /***************************************************/
+    cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ];
+    for( i = 0; i < iter; i++ ) {
+        if( nRshifts[ i ] == 0 ) {
+            ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 );
+        } else {
+            ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, cdf_ptr, 8 );
+            for( k = 0; k < nRshifts[ i ] - 1; k++ ) {
+                ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
+            }
+            ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
+        }
+    }
+
+    /******************/
+    /* Shell Encoding */
+    /******************/
+    for( i = 0; i < iter; i++ ) {
+        if( sum_pulses[ i ] > 0 ) {
+            silk_shell_encoder( psRangeEnc, &abs_pulses[ i * SHELL_CODEC_FRAME_LENGTH ] );
+        }
+    }
+
+    /****************/
+    /* LSB Encoding */
+    /****************/
+    for( i = 0; i < iter; i++ ) {
+        if( nRshifts[ i ] > 0 ) {
+            pulses_ptr = &pulses[ i * SHELL_CODEC_FRAME_LENGTH ];
+            nLS = nRshifts[ i ] - 1;
+            for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+                abs_q = (opus_int8)silk_abs( pulses_ptr[ k ] );
+                for( j = nLS; j > 0; j-- ) {
+                    bit = silk_RSHIFT( abs_q, j ) & 1;
+                    ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 );
+                }
+                bit = abs_q & 1;
+                ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 );
+            }
+        }
+    }
+
+    /****************/
+    /* Encode signs */
+    /****************/
+    silk_encode_signs( psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses );
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/errors.h b/src/main/jni/opus/silk/errors.h
new file mode 100644
index 000000000..45070800f
--- /dev/null
+++ b/src/main/jni/opus/silk/errors.h
@@ -0,0 +1,98 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_ERRORS_H
+#define SILK_ERRORS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/******************/
+/* Error messages */
+/******************/
+#define SILK_NO_ERROR                               0
+
+/**************************/
+/* Encoder error messages */
+/**************************/
+
+/* Input length is not a multiple of 10 ms, or length is longer than the packet length */
+#define SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES        -101
+
+/* Sampling frequency not 8000, 12000 or 16000 Hertz */
+#define SILK_ENC_FS_NOT_SUPPORTED                   -102
+
+/* Packet size not 10, 20, 40, or 60 ms */
+#define SILK_ENC_PACKET_SIZE_NOT_SUPPORTED          -103
+
+/* Allocated payload buffer too short */
+#define SILK_ENC_PAYLOAD_BUF_TOO_SHORT              -104
+
+/* Loss rate not between 0 and 100 percent */
+#define SILK_ENC_INVALID_LOSS_RATE                  -105
+
+/* Complexity setting not valid, use 0...10 */
+#define SILK_ENC_INVALID_COMPLEXITY_SETTING         -106
+
+/* Inband FEC setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_INBAND_FEC_SETTING         -107
+
+/* DTX setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_DTX_SETTING                -108
+
+/* CBR setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_CBR_SETTING                -109
+
+/* Internal encoder error */
+#define SILK_ENC_INTERNAL_ERROR                     -110
+
+/* Internal encoder error */
+#define SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR   -111
+
+/**************************/
+/* Decoder error messages */
+/**************************/
+
+/* Output sampling frequency lower than internal decoded sampling frequency */
+#define SILK_DEC_INVALID_SAMPLING_FREQUENCY         -200
+
+/* Payload size exceeded the maximum allowed 1024 bytes */
+#define SILK_DEC_PAYLOAD_TOO_LARGE                  -201
+
+/* Payload has bit errors */
+#define SILK_DEC_PAYLOAD_ERROR                      -202
+
+/* Payload has bit errors */
+#define SILK_DEC_INVALID_FRAME_SIZE                 -203
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/fixed/LTP_analysis_filter_FIX.c b/src/main/jni/opus/silk/fixed/LTP_analysis_filter_FIX.c
new file mode 100644
index 000000000..a94190808
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/LTP_analysis_filter_FIX.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+
+void silk_LTP_analysis_filter_FIX(
+    opus_int16                      *LTP_res,                               /* O    LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length )  */
+    const opus_int16                *x,                                     /* I    Pointer to input signal with at least max( pitchL ) preceding samples       */
+    const opus_int16                LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I    LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe                   */
+    const opus_int                  pitchL[ MAX_NB_SUBFR ],                 /* I    Pitch lag, one for each subframe                                            */
+    const opus_int32                invGains_Q16[ MAX_NB_SUBFR ],           /* I    Inverse quantization gains, one for each subframe                           */
+    const opus_int                  subfr_length,                           /* I    Length of each subframe                                                     */
+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
+    const opus_int                  pre_length                              /* I    Length of the preceding samples starting at &x[0] for each subframe         */
+)
+{
+    const opus_int16 *x_ptr, *x_lag_ptr;
+    opus_int16   Btmp_Q14[ LTP_ORDER ];
+    opus_int16   *LTP_res_ptr;
+    opus_int     k, i, j;
+    opus_int32   LTP_est;
+
+    x_ptr = x;
+    LTP_res_ptr = LTP_res;
+    for( k = 0; k < nb_subfr; k++ ) {
+
+        x_lag_ptr = x_ptr - pitchL[ k ];
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ];
+        }
+
+        /* LTP analysis FIR filter */
+        for( i = 0; i < subfr_length + pre_length; i++ ) {
+            LTP_res_ptr[ i ] = x_ptr[ i ];
+
+            /* Long-term prediction */
+            LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] );
+            for( j = 1; j < LTP_ORDER; j++ ) {
+                LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] );
+            }
+            LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/
+
+            /* Subtract long-term prediction */
+            LTP_res_ptr[ i ] = (opus_int16)silk_SAT16( (opus_int32)x_ptr[ i ] - LTP_est );
+
+            /* Scale residual */
+            LTP_res_ptr[ i ] = silk_SMULWB( invGains_Q16[ k ], LTP_res_ptr[ i ] );
+
+            x_lag_ptr++;
+        }
+
+        /* Update pointers */
+        LTP_res_ptr += subfr_length + pre_length;
+        x_ptr       += subfr_length;
+    }
+}
+
diff --git a/src/main/jni/opus/silk/fixed/LTP_scale_ctrl_FIX.c b/src/main/jni/opus/silk/fixed/LTP_scale_ctrl_FIX.c
new file mode 100644
index 000000000..3dcedef89
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/LTP_scale_ctrl_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+)
+{
+    opus_int round_loss;
+
+    if( condCoding == CODE_INDEPENDENTLY ) {
+        /* Only scale if first frame in packet */
+        round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket;
+        psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT(
+            silk_SMULWB( silk_SMULBB( round_loss, psEncCtrl->LTPredCodGain_Q7 ), SILK_FIX_CONST( 0.1, 9 ) ), 0, 2 );
+    } else {
+        /* Default is minimum scaling */
+        psEnc->sCmn.indices.LTP_scaleIndex = 0;
+    }
+    psEncCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ];
+}
diff --git a/src/main/jni/opus/silk/fixed/apply_sine_window_FIX.c b/src/main/jni/opus/silk/fixed/apply_sine_window_FIX.c
new file mode 100644
index 000000000..4502b7130
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/apply_sine_window_FIX.c
@@ -0,0 +1,101 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Apply sine window to signal vector.                                      */
+/* Window types:                                                            */
+/*    1 -> sine window from 0 to pi/2                                       */
+/*    2 -> sine window from pi/2 to pi                                      */
+/* Every other sample is linearly interpolated, for speed.                  */
+/* Window length must be between 16 and 120 (incl) and a multiple of 4.     */
+
+/* Matlab code for table:
+   for k=16:9*4:16+2*9*4, fprintf(' %7.d,', -round(65536*pi ./ (k:4:k+8*4))); fprintf('\n'); end
+*/
+static const opus_int16 freq_table_Q16[ 27 ] = {
+   12111,    9804,    8235,    7100,    6239,    5565,    5022,    4575,    4202,
+    3885,    3612,    3375,    3167,    2984,    2820,    2674,    2542,    2422,
+    2313,    2214,    2123,    2038,    1961,    1889,    1822,    1760,    1702,
+};
+
+void silk_apply_sine_window(
+    opus_int16                  px_win[],           /* O    Pointer to windowed signal                                  */
+    const opus_int16            px[],               /* I    Pointer to input signal                                     */
+    const opus_int              win_type,           /* I    Selects a window type                                       */
+    const opus_int              length              /* I    Window length, multiple of 4                                */
+)
+{
+    opus_int   k, f_Q16, c_Q16;
+    opus_int32 S0_Q16, S1_Q16;
+
+    silk_assert( win_type == 1 || win_type == 2 );
+
+    /* Length must be in a range from 16 to 120 and a multiple of 4 */
+    silk_assert( length >= 16 && length <= 120 );
+    silk_assert( ( length & 3 ) == 0 );
+
+    /* Frequency */
+    k = ( length >> 2 ) - 4;
+    silk_assert( k >= 0 && k <= 26 );
+    f_Q16 = (opus_int)freq_table_Q16[ k ];
+
+    /* Factor used for cosine approximation */
+    c_Q16 = silk_SMULWB( (opus_int32)f_Q16, -f_Q16 );
+    silk_assert( c_Q16 >= -32768 );
+
+    /* initialize state */
+    if( win_type == 1 ) {
+        /* start from 0 */
+        S0_Q16 = 0;
+        /* approximation of sin(f) */
+        S1_Q16 = f_Q16 + silk_RSHIFT( length, 3 );
+    } else {
+        /* start from 1 */
+        S0_Q16 = ( (opus_int32)1 << 16 );
+        /* approximation of cos(f) */
+        S1_Q16 = ( (opus_int32)1 << 16 ) + silk_RSHIFT( c_Q16, 1 ) + silk_RSHIFT( length, 4 );
+    }
+
+    /* Uses the recursive equation:   sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f)    */
+    /* 4 samples at a time */
+    for( k = 0; k < length; k += 4 ) {
+        px_win[ k ]     = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k ] );
+        px_win[ k + 1 ] = (opus_int16)silk_SMULWB( S1_Q16, px[ k + 1] );
+        S0_Q16 = silk_SMULWB( S1_Q16, c_Q16 ) + silk_LSHIFT( S1_Q16, 1 ) - S0_Q16 + 1;
+        S0_Q16 = silk_min( S0_Q16, ( (opus_int32)1 << 16 ) );
+
+        px_win[ k + 2 ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k + 2] );
+        px_win[ k + 3 ] = (opus_int16)silk_SMULWB( S0_Q16, px[ k + 3 ] );
+        S1_Q16 = silk_SMULWB( S0_Q16, c_Q16 ) + silk_LSHIFT( S0_Q16, 1 ) - S1_Q16;
+        S1_Q16 = silk_min( S1_Q16, ( (opus_int32)1 << 16 ) );
+    }
+}
diff --git a/src/main/jni/opus/silk/fixed/autocorr_FIX.c b/src/main/jni/opus/silk/fixed/autocorr_FIX.c
new file mode 100644
index 000000000..de95c9869
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/autocorr_FIX.c
@@ -0,0 +1,48 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "celt_lpc.h"
+
+/* Compute autocorrelation */
+void silk_autocorr(
+    opus_int32                  *results,           /* O    Result (length correlationCount)                            */
+    opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
+    const opus_int16            *inputData,         /* I    Input data to correlate                                     */
+    const opus_int              inputDataSize,      /* I    Length of input                                             */
+    const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
+    int                         arch                /* I    Run-time architecture                                       */
+)
+{
+    opus_int   corrCount;
+    corrCount = silk_min_int( inputDataSize, correlationCount );
+    *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch);
+}
diff --git a/src/main/jni/opus/silk/fixed/burg_modified_FIX.c b/src/main/jni/opus/silk/fixed/burg_modified_FIX.c
new file mode 100644
index 000000000..db348295b
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/burg_modified_FIX.c
@@ -0,0 +1,279 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "tuning_parameters.h"
+#include "pitch.h"
+
+#define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
+
+#define QA                          25
+#define N_BITS_HEAD_ROOM            2
+#define MIN_RSHIFTS                 -16
+#define MAX_RSHIFTS                 (32 - QA)
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
+)
+{
+    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+    opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
+    const opus_int16 *x_ptr;
+    opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
+    opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
+    opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
+    opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
+    opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
+    opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
+
+    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+    /* Compute autocorrelations, added over subframes */
+    silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
+    if( rshifts > MAX_RSHIFTS ) {
+        C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
+        silk_assert( C0 > 0 );
+        rshifts = MAX_RSHIFTS;
+    } else {
+        lz = silk_CLZ32( C0 ) - 1;
+        rshifts_extra = N_BITS_HEAD_ROOM - lz;
+        if( rshifts_extra > 0 ) {
+            rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
+            C0 = silk_RSHIFT32( C0, rshifts_extra );
+        } else {
+            rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
+            C0 = silk_LSHIFT32( C0, -rshifts_extra );
+        }
+        rshifts += rshifts_extra;
+    }
+    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
+    silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+    if( rshifts > 0 ) {
+        for( s = 0; s < nb_subfr; s++ ) {
+            x_ptr = x + s * subfr_length;
+            for( n = 1; n < D + 1; n++ ) {
+                C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
+                    silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
+            }
+        }
+    } else {
+        for( s = 0; s < nb_subfr; s++ ) {
+            int i;
+            opus_int32 d;
+            x_ptr = x + s * subfr_length;
+            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
+            for( n = 1; n < D + 1; n++ ) {
+               for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
+                  d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
+               xcorr[ n - 1 ] += d;
+            }
+            for( n = 1; n < D + 1; n++ ) {
+                C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
+            }
+        }
+    }
+    silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+
+    /* Initialize */
+    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
+
+    invGain_Q30 = (opus_int32)1 << 30;
+    reached_max_gain = 0;
+    for( n = 0; n < D; n++ ) {
+        /* Update first row of correlation matrix (without first element) */
+        /* Update last row of correlation matrix (without last element, stored in reversed order) */
+        /* Update C * Af */
+        /* Update C * flipud(Af) (stored in reversed order) */
+        if( rshifts > -2 ) {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
+                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
+                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
+                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
+                for( k = 0; k < n; k++ ) {
+                    C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
+                    C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+                    Atmp_QA = Af_QA[ k ];
+                    tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
+                    tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
+                }
+                tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
+                tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
+                for( k = 0; k <= n; k++ ) {
+                    CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
+                    CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
+                }
+            }
+        } else {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
+                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
+                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
+                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
+                for( k = 0; k < n; k++ ) {
+                    C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
+                    C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+                    Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
+                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
+                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
+                }
+                tmp1 = -tmp1;                                                                           /* Q17 */
+                tmp2 = -tmp2;                                                                           /* Q17 */
+                for( k = 0; k <= n; k++ ) {
+                    CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
+                        silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
+                    CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
+                        silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
+                }
+            }
+        }
+
+        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+        tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
+        tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
+        num  = 0;                                                                                       /* Q( -rshifts ) */
+        nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
+        for( k = 0; k < n; k++ ) {
+            Atmp_QA = Af_QA[ k ];
+            lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
+            lz = silk_min( 32 - QA, lz );
+            Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
+
+            tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
+                                                                                Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
+        }
+        CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
+        CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
+        num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
+        num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
+
+        /* Calculate the next order reflection (parcor) coefficient */
+        if( silk_abs( num ) < nrg ) {
+            rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
+        } else {
+            rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
+        }
+
+        /* Update inverse prediction gain */
+        tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+        tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
+        if( tmp1 <= minInvGain_Q30 ) {
+            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+            tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
+            rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
+            /* Newton-Raphson iteration */
+            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
+            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
+            if( num < 0 ) {
+                /* Ensure adjusted reflection coefficients has the original sign */
+                rc_Q31 = -rc_Q31;
+            }
+            invGain_Q30 = minInvGain_Q30;
+            reached_max_gain = 1;
+        } else {
+            invGain_Q30 = tmp1;
+        }
+
+        /* Update the AR coefficients */
+        for( k = 0; k < (n + 1) >> 1; k++ ) {
+            tmp1 = Af_QA[ k ];                                                                  /* QA */
+            tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
+            Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
+            Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
+        }
+        Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
+
+        if( reached_max_gain ) {
+            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+            for( k = n + 1; k < D; k++ ) {
+                Af_QA[ k ] = 0;
+            }
+            break;
+        }
+
+        /* Update C * Af and C * Ab */
+        for( k = 0; k <= n + 1; k++ ) {
+            tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
+            tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
+            CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
+            CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
+        }
+    }
+
+    if( reached_max_gain ) {
+        for( k = 0; k < D; k++ ) {
+            /* Scale coefficients */
+            A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
+        }
+        /* Subtract energy of preceding samples from C0 */
+        if( rshifts > 0 ) {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
+            }
+        } else {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
+            }
+        }
+        /* Approximate residual energy */
+        *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
+        *res_nrg_Q = -rshifts;
+    } else {
+        /* Return residual energy */
+        nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
+        tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
+        for( k = 0; k < D; k++ ) {
+            Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
+            nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
+            tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
+            A_Q16[ k ] = -Atmp1;
+        }
+        *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
+        *res_nrg_Q = -rshifts;
+    }
+}
diff --git a/src/main/jni/opus/silk/fixed/corrMatrix_FIX.c b/src/main/jni/opus/silk/fixed/corrMatrix_FIX.c
new file mode 100644
index 000000000..c61727053
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/corrMatrix_FIX.c
@@ -0,0 +1,156 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/**********************************************************************
+ * Correlation Matrix Computations for LS estimate.
+ **********************************************************************/
+
+#include "main_FIX.h"
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FIX(
+    const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */
+    const opus_int16                *t,                                     /* I    Target vector [L]                                                           */
+    const opus_int                  L,                                      /* I    Length of vectors                                                           */
+    const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
+    opus_int32                      *Xt,                                    /* O    Pointer to X'*t correlation vector [order]                                  */
+    const opus_int                  rshifts                                 /* I    Right shifts of correlations                                                */
+)
+{
+    opus_int         lag, i;
+    const opus_int16 *ptr1, *ptr2;
+    opus_int32       inner_prod;
+
+    ptr1 = &x[ order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */
+    ptr2 = t;
+    /* Calculate X'*t */
+    if( rshifts > 0 ) {
+        /* Right shifting used */
+        for( lag = 0; lag < order; lag++ ) {
+            inner_prod = 0;
+            for( i = 0; i < L; i++ ) {
+                inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts );
+            }
+            Xt[ lag ] = inner_prod; /* X[:,lag]'*t */
+            ptr1--; /* Go to next column of X */
+        }
+    } else {
+        silk_assert( rshifts == 0 );
+        for( lag = 0; lag < order; lag++ ) {
+            Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L ); /* X[:,lag]'*t */
+            ptr1--; /* Go to next column of X */
+        }
+    }
+}
+
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FIX(
+    const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */
+    const opus_int                  L,                                      /* I    Length of vectors                                                           */
+    const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
+    const opus_int                  head_room,                              /* I    Desired headroom                                                            */
+    opus_int32                      *XX,                                    /* O    Pointer to X'*X correlation matrix [ order x order ]                        */
+    opus_int                        *rshifts                                /* I/O  Right shifts of correlations                                                */
+)
+{
+    opus_int         i, j, lag, rshifts_local, head_room_rshifts;
+    opus_int32       energy;
+    const opus_int16 *ptr1, *ptr2;
+
+    /* Calculate energy to find shift used to fit in 32 bits */
+    silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 );
+    /* Add shifts to get the desired head room */
+    head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 );
+
+    energy = silk_RSHIFT32( energy, head_room_rshifts );
+    rshifts_local += head_room_rshifts;
+
+    /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */
+    /* Remove contribution of first order - 1 samples */
+    for( i = 0; i < order - 1; i++ ) {
+        energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local );
+    }
+    if( rshifts_local < *rshifts ) {
+        /* Adjust energy */
+        energy = silk_RSHIFT32( energy, *rshifts - rshifts_local );
+        rshifts_local = *rshifts;
+    }
+
+    /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */
+    /* Fill out the diagonal of the correlation matrix */
+    matrix_ptr( XX, 0, 0, order ) = energy;
+    ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */
+    for( j = 1; j < order; j++ ) {
+        energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) );
+        energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) );
+        matrix_ptr( XX, j, j, order ) = energy;
+    }
+
+    ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */
+    /* Calculate the remaining elements of the correlation matrix */
+    if( rshifts_local > 0 ) {
+        /* Right shifting used */
+        for( lag = 1; lag < order; lag++ ) {
+            /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
+            energy = 0;
+            for( i = 0; i < L; i++ ) {
+                energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local );
+            }
+            /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
+            matrix_ptr( XX, lag, 0, order ) = energy;
+            matrix_ptr( XX, 0, lag, order ) = energy;
+            for( j = 1; j < ( order - lag ); j++ ) {
+                energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) );
+                energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) );
+                matrix_ptr( XX, lag + j, j, order ) = energy;
+                matrix_ptr( XX, j, lag + j, order ) = energy;
+            }
+            ptr2--; /* Update pointer to first sample of next column (lag) in X */
+        }
+    } else {
+        for( lag = 1; lag < order; lag++ ) {
+            /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
+            energy = silk_inner_prod_aligned( ptr1, ptr2, L );
+            matrix_ptr( XX, lag, 0, order ) = energy;
+            matrix_ptr( XX, 0, lag, order ) = energy;
+            /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
+            for( j = 1; j < ( order - lag ); j++ ) {
+                energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) );
+                energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] );
+                matrix_ptr( XX, lag + j, j, order ) = energy;
+                matrix_ptr( XX, j, lag + j, order ) = energy;
+            }
+            ptr2--;/* Update pointer to first sample of next column (lag) in X */
+        }
+    }
+    *rshifts = rshifts_local;
+}
+
diff --git a/src/main/jni/opus/silk/fixed/encode_frame_FIX.c b/src/main/jni/opus/silk/fixed/encode_frame_FIX.c
new file mode 100644
index 000000000..b490986b9
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/encode_frame_FIX.c
@@ -0,0 +1,385 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */
+static OPUS_INLINE void silk_LBRR_encode_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
+    const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
+    opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
+);
+
+void silk_encode_do_VAD_FIX(
+    silk_encoder_state_FIX          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+)
+{
+    /****************************/
+    /* Voice Activity Detection */
+    /****************************/
+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+
+    /**************************************************/
+    /* Convert speech activity into VAD and DTX flags */
+    /**************************************************/
+    if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+        psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
+        psEnc->sCmn.noSpeechCounter++;
+        if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
+            psEnc->sCmn.inDTX = 0;
+        } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
+            psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
+            psEnc->sCmn.inDTX           = 0;
+        }
+        psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
+    } else {
+        psEnc->sCmn.noSpeechCounter    = 0;
+        psEnc->sCmn.inDTX              = 0;
+        psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+        psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+    }
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    opus_int32                      *pnBytesOut,                            /* O    Pointer to number of payload bytes;                                         */
+    ec_enc                          *psRangeEnc,                            /* I/O  compressor data structure                                                   */
+    opus_int                        condCoding,                             /* I    The type of conditional coding to use                                       */
+    opus_int                        maxBits,                                /* I    If > 0: maximum number of output bits                                       */
+    opus_int                        useCBR                                  /* I    Flag to force constant-bitrate operation                                    */
+)
+{
+    silk_encoder_control_FIX sEncCtrl;
+    opus_int     i, iter, maxIter, found_upper, found_lower, ret = 0;
+    opus_int16   *x_frame;
+    ec_enc       sRangeEnc_copy, sRangeEnc_copy2;
+    silk_nsq_state sNSQ_copy, sNSQ_copy2;
+    opus_int32   seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
+    opus_int32   gainsID, gainsID_lower, gainsID_upper;
+    opus_int16   gainMult_Q8;
+    opus_int16   ec_prevLagIndex_copy;
+    opus_int     ec_prevSignalType_copy;
+    opus_int8    LastGainIndex_copy2;
+    SAVE_STACK;
+
+    /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
+    LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
+
+    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+    /**************************************************************/
+    /* Set up Input Pointers, and insert frame in input buffer   */
+    /*************************************************************/
+    /* start of frame to encode */
+    x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;
+
+    /***************************************/
+    /* Ensure smooth bandwidth transitions */
+    /***************************************/
+    silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+    /*******************************************/
+    /* Copy new frame to front of input buffer */
+    /*******************************************/
+    silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) );
+
+    if( !psEnc->sCmn.prefillFlag ) {
+        VARDECL( opus_int32, xfw_Q3 );
+        VARDECL( opus_int16, res_pitch );
+        VARDECL( opus_uint8, ec_buf_copy );
+        opus_int16 *res_pitch_frame;
+
+        ALLOC( res_pitch,
+               psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length
+                   + psEnc->sCmn.ltp_mem_length, opus_int16 );
+        /* start of pitch LPC residual frame */
+        res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length;
+
+        /*****************************************/
+        /* Find pitch lags, initial LPC analysis */
+        /*****************************************/
+        silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
+
+        /************************/
+        /* Noise shape analysis */
+        /************************/
+        silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch );
+
+        /***************************************************/
+        /* Find linear prediction coefficients (LPC + LTP) */
+        /***************************************************/
+        silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
+
+        /****************************************/
+        /* Process gains                        */
+        /****************************************/
+        silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding );
+
+        /*****************************************/
+        /* Prefiltering for noise shaper         */
+        /*****************************************/
+        ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 );
+        silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame );
+
+        /****************************************/
+        /* Low Bitrate Redundant Encoding       */
+        /****************************************/
+        silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding );
+
+        /* Loop over quantizer and entropy coding to control bitrate */
+        maxIter = 6;
+        gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
+        found_lower = 0;
+        found_upper = 0;
+        gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+        gainsID_lower = -1;
+        gainsID_upper = -1;
+        /* Copy part of the input state */
+        silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
+        silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+        seed_copy = psEnc->sCmn.indices.Seed;
+        ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
+        ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
+        ALLOC( ec_buf_copy, 1275, opus_uint8 );
+        for( iter = 0; ; iter++ ) {
+            if( gainsID == gainsID_lower ) {
+                nBits = nBits_lower;
+            } else if( gainsID == gainsID_upper ) {
+                nBits = nBits_upper;
+            } else {
+                /* Restore part of the input state */
+                if( iter > 0 ) {
+                    silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
+                    silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
+                    psEnc->sCmn.indices.Seed = seed_copy;
+                    psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
+                    psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
+                }
+
+                /*****************************************/
+                /* Noise shaping quantization            */
+                /*****************************************/
+                if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+                    silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
+                           sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
+                           sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+                } else {
+                    silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
+                            sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
+                            sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+                }
+
+                /****************************************/
+                /* Encode Parameters                    */
+                /****************************************/
+                silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
+
+                /****************************************/
+                /* Encode Excitation Signal             */
+                /****************************************/
+                silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
+                    psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
+
+                nBits = ec_tell( psRangeEnc );
+
+                if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
+                    break;
+                }
+            }
+
+            if( iter == maxIter ) {
+                if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
+                    /* Restore output state from earlier iteration that did meet the bitrate budget */
+                    silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
+                    silk_assert( sRangeEnc_copy2.offs <= 1275 );
+                    silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
+                    silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
+                    psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
+                }
+                break;
+            }
+
+            if( nBits > maxBits ) {
+                if( found_lower == 0 && iter >= 2 ) {
+                    /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
+                    sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 );
+                    found_upper = 0;
+                    gainsID_upper = -1;
+                } else {
+                    found_upper = 1;
+                    nBits_upper = nBits;
+                    gainMult_upper = gainMult_Q8;
+                    gainsID_upper = gainsID;
+                }
+            } else if( nBits < maxBits - 5 ) {
+                found_lower = 1;
+                nBits_lower = nBits;
+                gainMult_lower = gainMult_Q8;
+                if( gainsID != gainsID_lower ) {
+                    gainsID_lower = gainsID;
+                    /* Copy part of the output state */
+                    silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
+                    silk_assert( psRangeEnc->offs <= 1275 );
+                    silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
+                    silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+                    LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
+                }
+            } else {
+                /* Within 5 bits of budget: close enough */
+                break;
+            }
+
+            if( ( found_lower & found_upper ) == 0 ) {
+                /* Adjust gain according to high-rate rate/distortion curve */
+                opus_int32 gain_factor_Q16;
+                gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
+                gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
+                if( nBits > maxBits ) {
+                    gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
+                }
+                gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+            } else {
+                /* Adjust gain by interpolating */
+                gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower );
+                /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
+                if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
+                    gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
+                } else
+                if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
+                    gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
+                }
+            }
+
+            for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+                sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
+            }
+ 
+            /* Quantize gains */
+            psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
+            silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16,
+                  &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+            /* Unique identifier of gains vector */
+            gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+        }
+    }
+
+    /* Update input buffer */
+    silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
+        ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) );
+
+    /* Exit without entropy coding */
+    if( psEnc->sCmn.prefillFlag ) {
+        /* No payload */
+        *pnBytesOut = 0;
+        RESTORE_STACK;
+        return ret;
+    }
+
+    /* Parameters needed for next frame */
+    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
+    /****************************************/
+    /* Finalize payload                     */
+    /****************************************/
+    psEnc->sCmn.first_frame_after_reset = 0;
+    /* Payload size */
+    *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
+
+    RESTORE_STACK;
+    return ret;
+}
+
+/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
+static OPUS_INLINE void silk_LBRR_encode_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
+    const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
+    opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
+)
+{
+    opus_int32   TempGains_Q16[ MAX_NB_SUBFR ];
+    SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
+    silk_nsq_state sNSQ_LBRR;
+
+    /*******************************************/
+    /* Control use of inband LBRR              */
+    /*******************************************/
+    if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
+        psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+
+        /* Copy noise shaping quantizer state and quantization indices from regular encoding */
+        silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+        silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
+
+        /* Save original gains */
+        silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+
+        if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
+            /* First frame in packet or previous frame not LBRR coded */
+            psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
+
+            /* Increase Gains to get target LBRR rate */
+            psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases;
+            psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
+        }
+
+        /* Decode to get gains in sync with decoder         */
+        /* Overwrite unquantized gains with quantized gains */
+        silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices,
+            &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+        /*****************************************/
+        /* Noise shaping quantization            */
+        /*****************************************/
+        if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+            silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
+                psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
+                psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
+                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+        } else {
+            silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
+                psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
+                psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
+                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+        }
+
+        /* Restore original gains */
+        silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+    }
+}
diff --git a/src/main/jni/opus/silk/fixed/find_LPC_FIX.c b/src/main/jni/opus/silk/fixed/find_LPC_FIX.c
new file mode 100644
index 000000000..783d32e20
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/find_LPC_FIX.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Finds LPC vector from correlations, and converts to NLSF */
+void silk_find_LPC_FIX(
+    silk_encoder_state              *psEncC,                                /* I/O  Encoder state                                                               */
+    opus_int16                      NLSF_Q15[],                             /* O    NLSFs                                                                       */
+    const opus_int16                x[],                                    /* I    Input signal                                                                */
+    const opus_int32                minInvGain_Q30                          /* I    Inverse of max prediction gain                                              */
+)
+{
+    opus_int     k, subfr_length;
+    opus_int32   a_Q16[ MAX_LPC_ORDER ];
+    opus_int     isInterpLower, shift;
+    opus_int32   res_nrg0, res_nrg1;
+    opus_int     rshift0, rshift1;
+
+    /* Used only for LSF interpolation */
+    opus_int32   a_tmp_Q16[ MAX_LPC_ORDER ], res_nrg_interp, res_nrg, res_tmp_nrg;
+    opus_int     res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q;
+    opus_int16   a_tmp_Q12[ MAX_LPC_ORDER ];
+    opus_int16   NLSF0_Q15[ MAX_LPC_ORDER ];
+    SAVE_STACK;
+
+    subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;
+
+    /* Default: no interpolation */
+    psEncC->indices.NLSFInterpCoef_Q2 = 4;
+
+    /* Burg AR analysis for the full frame */
+    silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch );
+
+    if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
+        VARDECL( opus_int16, LPC_res );
+
+        /* Optimal solution for last 10 ms */
+        silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch );
+
+        /* subtract residual energy here, as that's easier than adding it to the    */
+        /* residual energy of the first 10 ms in each iteration of the search below */
+        shift = res_tmp_nrg_Q - res_nrg_Q;
+        if( shift >= 0 ) {
+            if( shift < 32 ) {
+                res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift );
+            }
+        } else {
+            silk_assert( shift > -32 );
+            res_nrg   = silk_RSHIFT( res_nrg, -shift ) - res_tmp_nrg;
+            res_nrg_Q = res_tmp_nrg_Q;
+        }
+
+        /* Convert to NLSFs */
+        silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder );
+
+        ALLOC( LPC_res, 2 * subfr_length, opus_int16 );
+
+        /* Search over interpolation indices to find the one with lowest residual energy */
+        for( k = 3; k >= 0; k-- ) {
+            /* Interpolate NLSFs for first half */
+            silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder );
+
+            /* Convert to LPC for residual energy evaluation */
+            silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder );
+
+            /* Calculate residual energy with NLSF interpolation */
+            silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder );
+
+            silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder,                subfr_length - psEncC->predictLPCOrder );
+            silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder );
+
+            /* Add subframe energies from first half frame */
+            shift = rshift0 - rshift1;
+            if( shift >= 0 ) {
+                res_nrg1         = silk_RSHIFT( res_nrg1, shift );
+                res_nrg_interp_Q = -rshift0;
+            } else {
+                res_nrg0         = silk_RSHIFT( res_nrg0, -shift );
+                res_nrg_interp_Q = -rshift1;
+            }
+            res_nrg_interp = silk_ADD32( res_nrg0, res_nrg1 );
+
+            /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */
+            shift = res_nrg_interp_Q - res_nrg_Q;
+            if( shift >= 0 ) {
+                if( silk_RSHIFT( res_nrg_interp, shift ) < res_nrg ) {
+                    isInterpLower = silk_TRUE;
+                } else {
+                    isInterpLower = silk_FALSE;
+                }
+            } else {
+                if( -shift < 32 ) {
+                    if( res_nrg_interp < silk_RSHIFT( res_nrg, -shift ) ) {
+                        isInterpLower = silk_TRUE;
+                    } else {
+                        isInterpLower = silk_FALSE;
+                    }
+                } else {
+                    isInterpLower = silk_FALSE;
+                }
+            }
+
+            /* Determine whether current interpolated NLSFs are best so far */
+            if( isInterpLower == silk_TRUE ) {
+                /* Interpolation has lower residual energy */
+                res_nrg   = res_nrg_interp;
+                res_nrg_Q = res_nrg_interp_Q;
+                psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k;
+            }
+        }
+    }
+
+    if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) {
+        /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */
+        silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder );
+    }
+
+    silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/fixed/find_LTP_FIX.c b/src/main/jni/opus/silk/fixed/find_LTP_FIX.c
new file mode 100644
index 000000000..8c4d70376
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/find_LTP_FIX.c
@@ -0,0 +1,244 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "tuning_parameters.h"
+
+/* Head room for correlations */
+#define LTP_CORRS_HEAD_ROOM                             2
+
+void silk_fit_LTP(
+    opus_int32 LTP_coefs_Q16[ LTP_ORDER ],
+    opus_int16 LTP_coefs_Q14[ LTP_ORDER ]
+);
+
+void silk_find_LTP_FIX(
+    opus_int16                      b_Q14[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                                                   */
+    opus_int32                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization                                           */
+    opus_int                        *LTPredCodGain_Q7,                      /* O    LTP coding gain                                                             */
+    const opus_int16                r_lpc[],                                /* I    residual signal after LPC signal + state for first 10 ms                    */
+    const opus_int                  lag[ MAX_NB_SUBFR ],                    /* I    LTP lags                                                                    */
+    const opus_int32                Wght_Q15[ MAX_NB_SUBFR ],               /* I    weights                                                                     */
+    const opus_int                  subfr_length,                           /* I    subframe length                                                             */
+    const opus_int                  nb_subfr,                               /* I    number of subframes                                                         */
+    const opus_int                  mem_offset,                             /* I    number of samples in LTP memory                                             */
+    opus_int                        corr_rshifts[ MAX_NB_SUBFR ]            /* O    right shifts applied to correlations                                        */
+)
+{
+    opus_int   i, k, lshift;
+    const opus_int16 *r_ptr, *lag_ptr;
+    opus_int16 *b_Q14_ptr;
+
+    opus_int32 regu;
+    opus_int32 *WLTP_ptr;
+    opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26;
+    opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits;
+
+    opus_int32 temp32, denom32;
+    opus_int   extra_shifts;
+    opus_int   rr_shifts, maxRshifts, maxRshifts_wxtra, LZs;
+    opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16;
+    opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];
+    opus_int32 wd, m_Q12;
+
+    b_Q14_ptr = b_Q14;
+    WLTP_ptr  = WLTP;
+    r_ptr     = &r_lpc[ mem_offset ];
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );
+
+        silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */
+
+        /* Assure headroom */
+        LZs = silk_CLZ32( rr[k] );
+        if( LZs < LTP_CORRS_HEAD_ROOM ) {
+            rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs );
+            rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs );
+        }
+        corr_rshifts[ k ] = rr_shifts;
+        silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] );  /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */
+
+        /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */
+        silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] );  /* Rr_fix_ptr   in Q( -corr_rshifts[ k ] ) */
+        if( corr_rshifts[ k ] > rr_shifts ) {
+            rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */
+        }
+        silk_assert( rr[ k ] >= 0 );
+
+        regu = 1;
+        regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+        regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+        regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+        silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER );
+
+        silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */
+
+        /* Limit and store in Q14 */
+        silk_fit_LTP( b_Q16, b_Q14_ptr );
+
+        /* Calculate residual energy */
+        nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */
+
+        /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */
+        extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM );
+        denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */
+            silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts );    /* Q( -corr_rshifts[ k ] + extra_shifts ) */
+        denom32 = silk_max( denom32, 1 );
+        silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX );                       /* Wght always < 0.5 in Q0 */
+        temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 );             /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */
+        temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 );               /* Q26 */
+
+        /* Limit temp such that the below scaling never wraps around */
+        WLTP_max = 0;
+        for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {
+            WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max );
+        }
+        lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */
+        silk_assert( 26 - 18 + lshift >= 0 );
+        if( 26 - 18 + lshift < 31 ) {
+            temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) );
+        }
+
+        silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */
+
+        w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */
+        silk_assert( w[k] >= 0 );
+
+        r_ptr     += subfr_length;
+        b_Q14_ptr += LTP_ORDER;
+        WLTP_ptr  += LTP_ORDER * LTP_ORDER;
+    }
+
+    maxRshifts = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts );
+    }
+
+    /* Compute LTP coding gain */
+    if( LTPredCodGain_Q7 != NULL ) {
+        LPC_LTP_res_nrg = 0;
+        LPC_res_nrg     = 0;
+        silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */
+        for( k = 0; k < nb_subfr; k++ ) {
+            LPC_res_nrg     = silk_ADD32( LPC_res_nrg,     silk_RSHIFT( silk_ADD32( silk_SMULWB(  rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */
+            LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */
+        }
+        LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */
+
+        div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 );
+        *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) );
+
+        silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) );
+    }
+
+    /* smoothing */
+    /* d = sum( B, 1 ); */
+    b_Q14_ptr = b_Q14;
+    for( k = 0; k < nb_subfr; k++ ) {
+        d_Q14[ k ] = 0;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            d_Q14[ k ] += b_Q14_ptr[ i ];
+        }
+        b_Q14_ptr += LTP_ORDER;
+    }
+
+    /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */
+
+    /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */
+    max_abs_d_Q14 = 0;
+    max_w_bits    = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) );
+        /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */
+        /* Find bits needed in Q( 18 - maxRshifts ) */
+        max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts );
+    }
+
+    /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */
+    silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) );
+
+    /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */
+    extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14;
+
+    /* Subtract what we got available; bits in output var plus maxRshifts */
+    extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */
+    extra_shifts = silk_max_int( extra_shifts, 0 );
+
+    maxRshifts_wxtra = maxRshifts + extra_shifts;
+
+    temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */
+    wd = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        /* w has at least 2 bits of headroom so no overflow should happen */
+        temp32 = silk_ADD32( temp32,                     silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) );                      /* Q( 18 - maxRshifts_wxtra ) */
+        wd     = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */
+    }
+    m_Q12 = silk_DIV32_varQ( wd, temp32, 12 );
+
+    b_Q14_ptr = b_Q14;
+    for( k = 0; k < nb_subfr; k++ ) {
+        /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */
+        if( 2 - corr_rshifts[k] > 0 ) {
+            temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] );
+        } else {
+            temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 );
+        }
+
+        g_Q26 = silk_MUL(
+            silk_DIV32(
+                SILK_FIX_CONST( LTP_SMOOTHING, 26 ),
+                silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ),                          /* Q10 */
+            silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) );    /* Q16 */
+
+        temp32 = 0;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 );     /* 1638_Q14 = 0.1_Q0 */
+            temp32 += delta_b_Q14[ i ];                                 /* Q14 */
+        }
+        temp32 = silk_DIV32( g_Q26, temp32 );                           /* Q14 -> Q12 */
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 );
+        }
+        b_Q14_ptr += LTP_ORDER;
+    }
+}
+
+void silk_fit_LTP(
+    opus_int32 LTP_coefs_Q16[ LTP_ORDER ],
+    opus_int16 LTP_coefs_Q14[ LTP_ORDER ]
+)
+{
+    opus_int i;
+
+    for( i = 0; i < LTP_ORDER; i++ ) {
+        LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) );
+    }
+}
diff --git a/src/main/jni/opus/silk/fixed/find_pitch_lags_FIX.c b/src/main/jni/opus/silk/fixed/find_pitch_lags_FIX.c
new file mode 100644
index 000000000..620f8dcd2
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/find_pitch_lags_FIX.c
@@ -0,0 +1,145 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Find pitch lags */
+void silk_find_pitch_lags_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    opus_int16                      res[],                                  /* O    residual                                                                    */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    int                             arch                                    /* I    Run-time architecture                                                       */
+)
+{
+    opus_int   buf_len, i, scale;
+    opus_int32 thrhld_Q13, res_nrg;
+    const opus_int16 *x_buf, *x_buf_ptr;
+    VARDECL( opus_int16, Wsig );
+    opus_int16 *Wsig_ptr;
+    opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
+    opus_int16 rc_Q15[    MAX_FIND_PITCH_LPC_ORDER ];
+    opus_int32 A_Q24[     MAX_FIND_PITCH_LPC_ORDER ];
+    opus_int16 A_Q12[     MAX_FIND_PITCH_LPC_ORDER ];
+    SAVE_STACK;
+
+    /******************************************/
+    /* Set up buffer lengths etc based on Fs  */
+    /******************************************/
+    buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length;
+
+    /* Safety check */
+    silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
+
+    x_buf = x - psEnc->sCmn.ltp_mem_length;
+
+    /*************************************/
+    /* Estimate LPC AR coefficients      */
+    /*************************************/
+
+    /* Calculate windowed signal */
+
+    ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 );
+
+    /* First LA_LTP samples */
+    x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;
+    Wsig_ptr  = Wsig;
+    silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch );
+
+    /* Middle un - windowed samples */
+    Wsig_ptr  += psEnc->sCmn.la_pitch;
+    x_buf_ptr += psEnc->sCmn.la_pitch;
+    silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) );
+
+    /* Last LA_LTP samples */
+    Wsig_ptr  += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
+    x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
+    silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
+
+    /* Calculate autocorrelation sequence */
+    silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );
+
+    /* Add white noise, as fraction of energy */
+    auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1;
+
+    /* Calculate the reflection coefficients using schur */
+    res_nrg = silk_schur( rc_Q15, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    /* Prediction gain */
+    psEncCtrl->predGain_Q16 = silk_DIV32_varQ( auto_corr[ 0 ], silk_max_int( res_nrg, 1 ), 16 );
+
+    /* Convert reflection coefficients to prediction coefficients */
+    silk_k2a( A_Q24, rc_Q15, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    /* Convert From 32 bit Q24 to 16 bit Q12 coefs */
+    for( i = 0; i < psEnc->sCmn.pitchEstimationLPCOrder; i++ ) {
+        A_Q12[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( A_Q24[ i ], 12 ) );
+    }
+
+    /* Do BWE */
+    silk_bwexpander( A_Q12, psEnc->sCmn.pitchEstimationLPCOrder, SILK_FIX_CONST( FIND_PITCH_BANDWIDTH_EXPANSION, 16 ) );
+
+    /*****************************************/
+    /* LPC analysis filtering                */
+    /*****************************************/
+    silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
+        /* Threshold for pitch estimator */
+        thrhld_Q13 = SILK_FIX_CONST( 0.6, 13 );
+        thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.004, 13 ), psEnc->sCmn.pitchEstimationLPCOrder );
+        thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1,   21  ), psEnc->sCmn.speech_activity_Q8 );
+        thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.15,  13 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) );
+        thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1,   14 ), psEnc->sCmn.input_tilt_Q15 );
+        thrhld_Q13 = silk_SAT16(  thrhld_Q13 );
+
+        /*****************************************/
+        /* Call pitch estimator                  */
+        /*****************************************/
+        if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex,
+                &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16,
+                (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr,
+                psEnc->sCmn.arch) == 0 )
+        {
+            psEnc->sCmn.indices.signalType = TYPE_VOICED;
+        } else {
+            psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+        }
+    } else {
+        silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) );
+        psEnc->sCmn.indices.lagIndex = 0;
+        psEnc->sCmn.indices.contourIndex = 0;
+        psEnc->LTPCorr_Q15 = 0;
+    }
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/fixed/find_pred_coefs_FIX.c b/src/main/jni/opus/silk/fixed/find_pred_coefs_FIX.c
new file mode 100644
index 000000000..5c22f8288
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/find_pred_coefs_FIX.c
@@ -0,0 +1,147 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+
+void silk_find_pred_coefs_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    const opus_int16                res_pitch[],                            /* I    Residual from pitch analysis                                                */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+)
+{
+    opus_int         i;
+    opus_int32       invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ];
+    opus_int16       NLSF_Q15[ MAX_LPC_ORDER ];
+    const opus_int16 *x_ptr;
+    opus_int16       *x_pre_ptr;
+    VARDECL( opus_int16, LPC_in_pre );
+    opus_int32       tmp, min_gain_Q16, minInvGain_Q30;
+    opus_int         LTP_corrs_rshift[ MAX_NB_SUBFR ];
+    SAVE_STACK;
+
+    /* weighting for weighted least squares */
+    min_gain_Q16 = silk_int32_MAX >> 6;
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        min_gain_Q16 = silk_min( min_gain_Q16, psEncCtrl->Gains_Q16[ i ] );
+    }
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        /* Divide to Q16 */
+        silk_assert( psEncCtrl->Gains_Q16[ i ] > 0 );
+        /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */
+        invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 );
+
+        /* Ensure Wght_Q15 a minimum value 1 */
+        invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 );
+
+        /* Square the inverted gains */
+        silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) );
+        tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] );
+        Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 );
+
+        /* Invert the inverted and normalized gains */
+        local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] );
+    }
+
+    ALLOC( LPC_in_pre,
+           psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder
+               + psEnc->sCmn.frame_length, opus_int16 );
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        VARDECL( opus_int32, WLTP );
+
+        /**********/
+        /* VOICED */
+        /**********/
+        silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+
+        ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );
+
+        /* LTP analysis */
+        silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,
+            res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,
+            psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift );
+
+        /* Quantize LTP gain parameters */
+        silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
+
+        /* Control LTP scaling */
+        silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );
+
+        /* Create LTP residual */
+        silk_LTP_analysis_filter_FIX( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14,
+            psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+    } else {
+        /************/
+        /* UNVOICED */
+        /************/
+        /* Create signal with prepended subframes, scaled by inverse gains */
+        x_ptr     = x - psEnc->sCmn.predictLPCOrder;
+        x_pre_ptr = LPC_in_pre;
+        for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+            silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ],
+                psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
+            x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
+            x_ptr     += psEnc->sCmn.subfr_length;
+        }
+
+        silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );
+        psEncCtrl->LTPredCodGain_Q7 = 0;
+		psEnc->sCmn.sum_log_gain_Q7 = 0;
+    }
+
+    /* Limit on total predictive coding gain */
+    if( psEnc->sCmn.first_frame_after_reset ) {
+        minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 );
+    } else {        
+        minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) );      /* Q16 */
+        minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, 
+            silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ), 
+                silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 );
+    }
+
+    /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */
+    silk_find_LPC_FIX( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain_Q30 );
+
+    /* Quantize LSFs */
+    silk_process_NLSFs( &psEnc->sCmn, psEncCtrl->PredCoef_Q12, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );
+
+    /* Calculate residual energy using quantized LPC coefficients */
+    silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains,
+        psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+    /* Copy to prediction struct for use in next frame for interpolation */
+    silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/fixed/k2a_FIX.c b/src/main/jni/opus/silk/fixed/k2a_FIX.c
new file mode 100644
index 000000000..5fee599bc
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/k2a_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a(
+    opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
+    const opus_int16            *rc_Q15,            /* I    Reflection coefficients [order] Q15                         */
+    const opus_int32            order               /* I    Prediction order                                            */
+)
+{
+    opus_int   k, n;
+    opus_int32 Atmp[ SILK_MAX_ORDER_LPC ];
+
+    for( k = 0; k < order; k++ ) {
+        for( n = 0; n < k; n++ ) {
+            Atmp[ n ] = A_Q24[ n ];
+        }
+        for( n = 0; n < k; n++ ) {
+            A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] );
+        }
+        A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 );
+    }
+}
diff --git a/src/main/jni/opus/silk/fixed/k2a_Q16_FIX.c b/src/main/jni/opus/silk/fixed/k2a_Q16_FIX.c
new file mode 100644
index 000000000..3b0398754
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/k2a_Q16_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_Q16(
+    opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
+    const opus_int32            *rc_Q16,            /* I    Reflection coefficients [order] Q16                         */
+    const opus_int32            order               /* I    Prediction order                                            */
+)
+{
+    opus_int   k, n;
+    opus_int32 Atmp[ SILK_MAX_ORDER_LPC ];
+
+    for( k = 0; k < order; k++ ) {
+        for( n = 0; n < k; n++ ) {
+            Atmp[ n ] = A_Q24[ n ];
+        }
+        for( n = 0; n < k; n++ ) {
+            A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] );
+        }
+        A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 );
+    }
+}
diff --git a/src/main/jni/opus/silk/fixed/main_FIX.h b/src/main/jni/opus/silk/fixed/main_FIX.h
new file mode 100644
index 000000000..a56ca07a2
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/main_FIX.h
@@ -0,0 +1,257 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_FIX_H
+#define SILK_MAIN_FIX_H
+
+#include "SigProc_FIX.h"
+#include "structs_FIX.h"
+#include "control.h"
+#include "main.h"
+#include "PLC.h"
+#include "debug.h"
+#include "entenc.h"
+
+#ifndef FORCE_CPP_BUILD
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#endif
+
+#define silk_encoder_state_Fxx      silk_encoder_state_FIX
+#define silk_encode_do_VAD_Fxx      silk_encode_do_VAD_FIX
+#define silk_encode_frame_Fxx       silk_encode_frame_FIX
+
+/*********************/
+/* Encoder Functions */
+/*********************/
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+    silk_encoder_state_Fxx          state_Fxx[]                             /* I/O  Encoder states                                                              */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FIX(
+    silk_encoder_state_FIX          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+);
+
+/* Encoder main function */
+opus_int silk_encode_frame_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    opus_int32                      *pnBytesOut,                            /* O    Pointer to number of payload bytes;                                         */
+    ec_enc                          *psRangeEnc,                            /* I/O  compressor data structure                                                   */
+    opus_int                        condCoding,                             /* I    The type of conditional coding to use                                       */
+    opus_int                        maxBits,                                /* I    If > 0: maximum number of output bits                                       */
+    opus_int                        useCBR                                  /* I    Flag to force constant-bitrate operation                                    */
+);
+
+/* Initializes the Silk encoder state */
+opus_int silk_init_encoder(
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+);
+
+/* Control the Silk encoder */
+opus_int silk_control_encoder(
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk encoder state                                               */
+    silk_EncControlStruct           *encControl,                            /* I    Control structure                                                           */
+    const opus_int32                TargetRate_bps,                         /* I    Target max bitrate (bps)                                                    */
+    const opus_int                  allow_bw_switch,                        /* I    Flag to allow switching audio bandwidth                                     */
+    const opus_int                  channelNb,                              /* I    Channel number                                                              */
+    const opus_int                  force_fs_kHz
+);
+
+/****************/
+/* Prefiltering */
+/****************/
+void silk_prefilter_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
+    const silk_encoder_control_FIX  *psEncCtrl,                             /* I    Encoder control                                                             */
+    opus_int32                      xw_Q10[],                               /* O    Weighted signal                                                             */
+    const opus_int16                x[]                                     /* I    Speech signal                                                               */
+);
+
+/**************************/
+/* Noise shaping analysis */
+/**************************/
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
+    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+);
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FIX(
+          opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
+          opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */
+    const opus_int16                *input,                                 /* I    Input data to correlate                                                     */
+    const opus_int                  warping_Q16,                            /* I    Warping coefficient                                                         */
+    const opus_int                  length,                                 /* I    Length of input                                                             */
+    const opus_int                  order                                   /* I    Correlation order (even)                                                    */
+);
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+);
+
+/**********************************************/
+/* Prediction Analysis                        */
+/**********************************************/
+/* Find pitch lags */
+void silk_find_pitch_lags_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    opus_int16                      res[],                                  /* O    residual                                                                    */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    int                             arch                                    /* I    Run-time architecture                                                       */
+);
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    const opus_int16                res_pitch[],                            /* I    Residual from pitch analysis                                                */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+);
+
+/* LPC analysis */
+void silk_find_LPC_FIX(
+    silk_encoder_state              *psEncC,                                /* I/O  Encoder state                                                               */
+    opus_int16                      NLSF_Q15[],                             /* O    NLSFs                                                                       */
+    const opus_int16                x[],                                    /* I    Input signal                                                                */
+    const opus_int32                minInvGain_Q30                          /* I    Inverse of max prediction gain                                              */
+);
+
+/* LTP analysis */
+void silk_find_LTP_FIX(
+    opus_int16                      b_Q14[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                                                   */
+    opus_int32                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization                                           */
+    opus_int                        *LTPredCodGain_Q7,                      /* O    LTP coding gain                                                             */
+    const opus_int16                r_lpc[],                                /* I    residual signal after LPC signal + state for first 10 ms                    */
+    const opus_int                  lag[ MAX_NB_SUBFR ],                    /* I    LTP lags                                                                    */
+    const opus_int32                Wght_Q15[ MAX_NB_SUBFR ],               /* I    weights                                                                     */
+    const opus_int                  subfr_length,                           /* I    subframe length                                                             */
+    const opus_int                  nb_subfr,                               /* I    number of subframes                                                         */
+    const opus_int                  mem_offset,                             /* I    number of samples in LTP memory                                             */
+    opus_int                        corr_rshifts[ MAX_NB_SUBFR ]            /* O    right shifts applied to correlations                                        */
+);
+
+void silk_LTP_analysis_filter_FIX(
+    opus_int16                      *LTP_res,                               /* O    LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length )  */
+    const opus_int16                *x,                                     /* I    Pointer to input signal with at least max( pitchL ) preceding samples       */
+    const opus_int16                LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I    LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe                   */
+    const opus_int                  pitchL[ MAX_NB_SUBFR ],                 /* I    Pitch lag, one for each subframe                                            */
+    const opus_int32                invGains_Q16[ MAX_NB_SUBFR ],           /* I    Inverse quantization gains, one for each subframe                           */
+    const opus_int                  subfr_length,                           /* I    Length of each subframe                                                     */
+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
+    const opus_int                  pre_length                              /* I    Length of the preceding samples starting at &x[0] for each subframe         */
+);
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order   */
+/* of preceding samples                                                                 */
+void silk_residual_energy_FIX(
+          opus_int32                nrgs[ MAX_NB_SUBFR ],                   /* O    Residual energy per subframe                                                */
+          opus_int                  nrgsQ[ MAX_NB_SUBFR ],                  /* O    Q value per subframe                                                        */
+    const opus_int16                x[],                                    /* I    Input signal                                                                */
+          opus_int16                a_Q12[ 2 ][ MAX_LPC_ORDER ],            /* I    AR coefs for each frame half                                                */
+    const opus_int32                gains[ MAX_NB_SUBFR ],                  /* I    Quantization gains                                                          */
+    const opus_int                  subfr_length,                           /* I    Subframe length                                                             */
+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
+    const opus_int                  LPC_order                               /* I    LPC order                                                                   */
+);
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+opus_int32 silk_residual_energy16_covar_FIX(
+    const opus_int16                *c,                                     /* I    Prediction vector                                                           */
+    const opus_int32                *wXX,                                   /* I    Correlation matrix                                                          */
+    const opus_int32                *wXx,                                   /* I    Correlation vector                                                          */
+    opus_int32                      wxx,                                    /* I    Signal energy                                                               */
+    opus_int                        D,                                      /* I    Dimension                                                                   */
+    opus_int                        cQ                                      /* I    Q value for c vector 0 - 15                                                 */
+);
+
+/* Processing of gains */
+void silk_process_gains_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control                                                             */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+);
+
+/******************/
+/* Linear Algebra */
+/******************/
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FIX(
+    const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */
+    const opus_int                  L,                                      /* I    Length of vectors                                                           */
+    const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
+    const opus_int                  head_room,                              /* I    Desired headroom                                                            */
+    opus_int32                      *XX,                                    /* O    Pointer to X'*X correlation matrix [ order x order ]                        */
+    opus_int                        *rshifts                                /* I/O  Right shifts of correlations                                                */
+);
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FIX(
+    const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */
+    const opus_int16                *t,                                     /* I    Target vector [L]                                                           */
+    const opus_int                  L,                                      /* I    Length of vectors                                                           */
+    const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
+    opus_int32                      *Xt,                                    /* O    Pointer to X'*t correlation vector [order]                                  */
+    const opus_int                  rshifts                                 /* I    Right shifts of correlations                                                */
+);
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FIX(
+    opus_int32                      *XX,                                    /* I/O  Correlation matrices                                                        */
+    opus_int32                      *xx,                                    /* I/O  Correlation values                                                          */
+    opus_int32                      noise,                                  /* I    Noise to add                                                                */
+    opus_int                        D                                       /* I    Dimension of XX                                                             */
+);
+
+/* Solves Ax = b, assuming A is symmetric */
+void silk_solve_LDL_FIX(
+    opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */
+    opus_int                        M,                                      /* I    Size of matrix                                                              */
+    const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */
+    opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */
+);
+
+#ifndef FORCE_CPP_BUILD
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* FORCE_CPP_BUILD */
+#endif /* SILK_MAIN_FIX_H */
diff --git a/src/main/jni/opus/silk/fixed/noise_shape_analysis_FIX.c b/src/main/jni/opus/silk/fixed/noise_shape_analysis_FIX.c
new file mode 100644
index 000000000..e24d2e9d3
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/noise_shape_analysis_FIX.c
@@ -0,0 +1,445 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */
+/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
+/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
+/* coefficient in an array of coefficients, for monic filters.                                    */
+static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/
+    const opus_int32     *coefs_Q24,
+    opus_int             lambda_Q16,
+    opus_int             order
+) {
+    opus_int   i;
+    opus_int32 gain_Q24;
+
+    lambda_Q16 = -lambda_Q16;
+    gain_Q24 = coefs_Q24[ order - 1 ];
+    for( i = order - 2; i >= 0; i-- ) {
+        gain_Q24 = silk_SMLAWB( coefs_Q24[ i ], gain_Q24, lambda_Q16 );
+    }
+    gain_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), gain_Q24, -lambda_Q16 );
+    return silk_INVERSE32_varQ( gain_Q24, 40 );
+}
+
+/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum     */
+/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
+static OPUS_INLINE void limit_warped_coefs(
+    opus_int32           *coefs_syn_Q24,
+    opus_int32           *coefs_ana_Q24,
+    opus_int             lambda_Q16,
+    opus_int32           limit_Q24,
+    opus_int             order
+) {
+    opus_int   i, iter, ind = 0;
+    opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16;
+    opus_int32 nom_Q16, den_Q24;
+
+    /* Convert to monic coefficients */
+    lambda_Q16 = -lambda_Q16;
+    for( i = order - 1; i > 0; i-- ) {
+        coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+        coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+    }
+    lambda_Q16 = -lambda_Q16;
+    nom_Q16  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16,        lambda_Q16 );
+    den_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 );
+    gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+    den_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 );
+    gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+    for( i = 0; i < order; i++ ) {
+        coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+        coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+    }
+
+    for( iter = 0; iter < 10; iter++ ) {
+        /* Find maximum absolute value */
+        maxabs_Q24 = -1;
+        for( i = 0; i < order; i++ ) {
+            tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) );
+            if( tmp > maxabs_Q24 ) {
+                maxabs_Q24 = tmp;
+                ind = i;
+            }
+        }
+        if( maxabs_Q24 <= limit_Q24 ) {
+            /* Coefficients are within range - done */
+            return;
+        }
+
+        /* Convert back to true warped coefficients */
+        for( i = 1; i < order; i++ ) {
+            coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+            coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+        }
+        gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 );
+        gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 );
+        for( i = 0; i < order; i++ ) {
+            coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+            coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+        }
+
+        /* Apply bandwidth expansion */
+        chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ(
+            silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ),
+            silk_MUL( maxabs_Q24, ind + 1 ), 22 );
+        silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 );
+        silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 );
+
+        /* Convert to monic warped coefficients */
+        lambda_Q16 = -lambda_Q16;
+        for( i = order - 1; i > 0; i-- ) {
+            coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+            coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+        }
+        lambda_Q16 = -lambda_Q16;
+        nom_Q16  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16,        lambda_Q16 );
+        den_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 );
+        gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+        den_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 );
+        gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+        for( i = 0; i < order; i++ ) {
+            coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+            coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+        }
+    }
+    silk_assert( 0 );
+}
+
+/**************************************************************/
+/* Compute noise shaping coefficients and initial gain values */
+/**************************************************************/
+void silk_noise_shape_analysis_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
+    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+)
+{
+    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+    opus_int     k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
+    opus_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
+    opus_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
+    opus_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
+    opus_int32   auto_corr[     MAX_SHAPE_LPC_ORDER + 1 ];
+    opus_int32   refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32   AR1_Q24[       MAX_SHAPE_LPC_ORDER ];
+    opus_int32   AR2_Q24[       MAX_SHAPE_LPC_ORDER ];
+    VARDECL( opus_int16, x_windowed );
+    const opus_int16 *x_ptr, *pitch_res_ptr;
+    SAVE_STACK;
+
+    /* Point to start of first LPC analysis block */
+    x_ptr = x - psEnc->sCmn.la_shape;
+
+    /****************/
+    /* GAIN CONTROL */
+    /****************/
+    SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7;
+
+    /* Input quality is the average of the quality in the lowest two VAD bands */
+    psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ]
+        + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 );
+
+    /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
+    psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 -
+        SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 );
+
+    /* Reduce coding SNR during low speech activity */
+    if( psEnc->sCmn.useCBR == 0 ) {
+        b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8;
+        b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 );
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+            silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ),                                       /* Q11*/
+            silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) );     /* Q12*/
+    }
+
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce gains for periodic signals */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );
+    } else {
+        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+            silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ),
+            SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );
+    }
+
+    /*************************/
+    /* SPARSENESS PROCESSING */
+    /*************************/
+    /* Set quantizer offset */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Initially set to 0; may be overruled in process_gains(..) */
+        psEnc->sCmn.indices.quantOffsetType = 0;
+        psEncCtrl->sparseness_Q8 = 0;
+    } else {
+        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+        nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
+        energy_variation_Q7 = 0;
+        log_energy_prev_Q7  = 0;
+        pitch_res_ptr = pitch_res;
+        for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+            silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
+            nrg += silk_RSHIFT( nSamples, scale );           /* Q(-scale)*/
+
+            log_energy_Q7 = silk_lin2log( nrg );
+            if( k > 0 ) {
+                energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 );
+            }
+            log_energy_prev_Q7 = log_energy_Q7;
+            pitch_res_ptr += nSamples;
+        }
+
+        psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -
+            SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );
+
+        /* Set quantization offset depending on sparseness measure */
+        if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+
+        /* Increase coding SNR for sparse signals */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );
+    }
+
+    /*******************************/
+    /* Control bandwidth expansion */
+    /*******************************/
+    /* More BWE for signals with high prediction gain */
+    strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
+    BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
+        silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
+    delta_Q16  = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
+        SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
+    BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );
+    BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );
+    /* BWExp1 will be applied after BWExp2, so make it relative */
+    BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );
+
+    if( psEnc->sCmn.warping_Q16 > 0 ) {
+        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+        warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) );
+    } else {
+        warping_Q16 = 0;
+    }
+
+    /********************************************/
+    /* Compute noise shaping AR coefs and gains */
+    /********************************************/
+    ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Apply window: sine slope followed by flat part followed by cosine slope */
+        opus_int shift, slope_part, flat_part;
+        flat_part = psEnc->sCmn.fs_kHz * 3;
+        slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );
+
+        silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part );
+        shift = slope_part;
+        silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) );
+        shift += flat_part;
+        silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+        /* Update pointer: next LPC analysis block */
+        x_ptr += psEnc->sCmn.subfr_length;
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Calculate warped auto correlation */
+            silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+        } else {
+            /* Calculate regular auto correlation */
+            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
+        }
+
+        /* Add white noise, as a fraction of energy */
+        auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),
+            SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );
+
+        /* Calculate the reflection coefficients using schur */
+        nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
+        silk_assert( nrg >= 0 );
+
+        /* Convert reflection coefficients to prediction coefficients */
+        silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
+
+        Qnrg = -scale;          /* range: -12...30*/
+        silk_assert( Qnrg >= -12 );
+        silk_assert( Qnrg <=  30 );
+
+        /* Make sure that Qnrg is an even number */
+        if( Qnrg & 1 ) {
+            Qnrg -= 1;
+            nrg >>= 1;
+        }
+
+        tmp32 = silk_SQRT_APPROX( nrg );
+        Qnrg >>= 1;             /* range: -6...15*/
+
+        psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( tmp32, 16 - Qnrg );
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Adjust gain for warping */
+            gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
+            silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+            if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {
+               psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
+            } else {
+               psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+            }
+        }
+
+        /* Bandwidth expansion for synthesis filter shaping */
+        silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );
+
+        /* Compute noise shaping filter coefficients */
+        silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );
+
+        /* Bandwidth expansion for analysis filter shaping */
+        silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );
+        silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );
+
+        /* Ratio of prediction gains, in energy domain */
+        pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder );
+        nrg         = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder );
+
+        /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/
+        pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );
+        psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );
+
+        /* Convert to monic warped prediction coefficients and limit absolute values */
+        limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
+
+        /* Convert from Q24 to Q13 and store in int16 */
+        for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
+            psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
+            psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
+        }
+    }
+
+    /*****************/
+    /* Gain tweaking */
+    /*****************/
+    /* Increase gains during low speech activity and put lower limit on gains */
+    gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) );
+    gain_add_Q16  = silk_log2lin(  silk_SMLAWB(  SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) );
+    silk_assert( gain_mult_Q16 > 0 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+        silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+        psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );
+    }
+
+    gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),
+        psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );
+    }
+
+    /************************************************/
+    /* Control low-frequency shaping and noise tilt */
+    /************************************************/
+    /* Less low frequency shaping for noisy inputs */
+    strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ),
+        SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) );
+    strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 );
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+        /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+        opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz );
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] );
+            /* Pack two coefficients in one int32 */
+            psEncCtrl->LF_shp_Q14[ k ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 );
+            psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+        }
+        silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/
+        Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) -
+            silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ),
+                silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) );
+    } else {
+        b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/
+        /* Pack two coefficients in one int32 */
+        psEncCtrl->LF_shp_Q14[ 0 ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 -
+            silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 );
+        psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+        for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ];
+        }
+        Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 );
+    }
+
+    /****************************/
+    /* HARMONIC SHAPING CONTROL */
+    /****************************/
+    /* Control boosting of harmonic frequencies */
+    HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),
+        psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );
+
+    /* More harmonic boost for noisy input signals */
+    HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,
+        SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );
+
+    if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* More harmonic noise shaping for high bitrates or noisy input */
+        HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),
+                SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ),
+                psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) );
+
+        /* Less harmonic noise shaping for less periodic signals */
+        HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ),
+            silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) );
+    } else {
+        HarmShapeGain_Q16 = 0;
+    }
+
+    /*************************/
+    /* Smooth over subframes */
+    /*************************/
+    for( k = 0; k < MAX_NB_SUBFR; k++ ) {
+        psShapeSt->HarmBoost_smth_Q16 =
+            silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16,     HarmBoost_Q16     - psShapeSt->HarmBoost_smth_Q16,     SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+        psShapeSt->HarmShapeGain_smth_Q16 =
+            silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+        psShapeSt->Tilt_smth_Q16 =
+            silk_SMLAWB( psShapeSt->Tilt_smth_Q16,          Tilt_Q16          - psShapeSt->Tilt_smth_Q16,          SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+
+        psEncCtrl->HarmBoost_Q14[ k ]     = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16,     2 );
+        psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
+        psEncCtrl->Tilt_Q14[ k ]          = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16,          2 );
+    }
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/fixed/pitch_analysis_core_FIX.c b/src/main/jni/opus/silk/fixed/pitch_analysis_core_FIX.c
new file mode 100644
index 000000000..1641a0fbc
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/pitch_analysis_core_FIX.c
@@ -0,0 +1,744 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/***********************************************************
+* Pitch analyser function
+********************************************************** */
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+#include "stack_alloc.h"
+#include "debug.h"
+#include "pitch.h"
+
+#define SCRATCH_SIZE    22
+#define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )
+#define SF_LENGTH_8KHZ  ( PE_SUBFR_LENGTH_MS * 8 )
+#define MIN_LAG_4KHZ    ( PE_MIN_LAG_MS * 4 )
+#define MIN_LAG_8KHZ    ( PE_MIN_LAG_MS * 8 )
+#define MAX_LAG_4KHZ    ( PE_MAX_LAG_MS * 4 )
+#define MAX_LAG_8KHZ    ( PE_MAX_LAG_MS * 8 - 1 )
+#define CSTRIDE_4KHZ    ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
+#define CSTRIDE_8KHZ    ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
+#define D_COMP_MIN      ( MIN_LAG_8KHZ - 3 )
+#define D_COMP_MAX      ( MAX_LAG_8KHZ + 4 )
+#define D_COMP_STRIDE   ( D_COMP_MAX - D_COMP_MIN )
+
+typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
+
+/************************************************************/
+/* Internally used functions                                */
+/************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
+    const opus_int16  frame[],                         /* I vector to correlate         */
+    opus_int          start_lag,                       /* I lag offset to search around */
+    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
+    opus_int          nb_subfr,                        /* I number of subframes         */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
+);
+
+static void silk_P_Ana_calc_energy_st3(
+    silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */
+    const opus_int16  frame[],                         /* I vector to calc energy in    */
+    opus_int          start_lag,                       /* I lag offset to search around */
+    opus_int          sf_length,                       /* I length of one 5 ms subframe */
+    opus_int          nb_subfr,                        /* I number of subframes         */
+    opus_int          complexity                       /* I Complexity setting          */
+);
+
+/*************************************************************/
+/*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
+/*************************************************************/
+opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
+    opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
+    opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
+    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
+    const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
+    int                         arch                /* I    Run-time architecture                                       */
+)
+{
+    VARDECL( opus_int16, frame_8kHz );
+    VARDECL( opus_int16, frame_4kHz );
+    opus_int32 filt_state[ 6 ];
+    const opus_int16 *input_frame_ptr;
+    opus_int   i, k, d, j;
+    VARDECL( opus_int16, C );
+    VARDECL( opus_int32, xcorr32 );
+    const opus_int16 *target_ptr, *basis_ptr;
+    opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
+    opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
+    VARDECL( opus_int16, d_comp );
+    opus_int32 sum, threshold, lag_counter;
+    opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
+    opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
+    VARDECL( silk_pe_stage3_vals, energies_st3 );
+    VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
+    opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
+    opus_int   sf_length;
+    opus_int   min_lag;
+    opus_int   max_lag;
+    opus_int32 contour_bias_Q15, diff;
+    opus_int   nb_cbk_search, cbk_size;
+    opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
+    const opus_int8 *Lag_CB_ptr;
+    SAVE_STACK;
+    /* Check for valid sampling frequency */
+    silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
+
+    /* Check for valid complexity setting */
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
+    silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
+
+    /* Set up frame lengths max / min lag for the sampling frequency */
+    frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
+    frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
+    frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
+    sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
+    min_lag           = PE_MIN_LAG_MS * Fs_kHz;
+    max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
+
+    /* Resample from input sampled at Fs_kHz to 8 kHz */
+    ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
+    if( Fs_kHz == 16 ) {
+        silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+        silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
+    } else if( Fs_kHz == 12 ) {
+        silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
+        silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
+    } else {
+        silk_assert( Fs_kHz == 8 );
+        silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
+    }
+
+    /* Decimate again to 4 kHz */
+    silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
+    ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
+    silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
+
+    /* Low-pass filter */
+    for( i = frame_length_4kHz - 1; i > 0; i-- ) {
+        frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
+    }
+
+    /*******************************************************************************
+    ** Scale 4 kHz signal down to prevent correlations measures from overflowing
+    ** find scaling as max scaling for each 8kHz(?) subframe
+    *******************************************************************************/
+
+    /* Inner product is calculated with different lengths, so scale for the worst case */
+    silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
+    if( shift > 0 ) {
+        shift = silk_RSHIFT( shift, 1 );
+        for( i = 0; i < frame_length_4kHz; i++ ) {
+            frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
+        }
+    }
+
+    /******************************************************************************
+    * FIRST STAGE, operating in 4 khz
+    ******************************************************************************/
+    ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
+    ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
+    silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
+    target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
+    for( k = 0; k < nb_subfr >> 1; k++ ) {
+        /* Check that we are within range of the array */
+        silk_assert( target_ptr >= frame_4kHz );
+        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+        basis_ptr = target_ptr - MIN_LAG_4KHZ;
+
+        /* Check that we are within range of the array */
+        silk_assert( basis_ptr >= frame_4kHz );
+        silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
+
+        /* Calculate first vector products before loop */
+        cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
+        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
+        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ ) );
+        normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
+
+        matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
+            (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                      /* Q13 */
+
+        /* From now on normalizer is computed recursively */
+        for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
+            basis_ptr--;
+
+            /* Check that we are within range of the array */
+            silk_assert( basis_ptr >= frame_4kHz );
+            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+            cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
+
+            /* Add contribution of new sample and remove contribution from oldest sample */
+            normalizer = silk_ADD32( normalizer,
+                silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
+                silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
+
+            matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
+                (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                  /* Q13 */
+        }
+        /* Update target pointer */
+        target_ptr += SF_LENGTH_8KHZ;
+    }
+
+    /* Combine two subframes into single correlation measure and apply short-lag bias */
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
+            sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
+                + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ );               /* Q14 */
+            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
+            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
+        }
+    } else {
+        /* Only short-lag bias */
+        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
+            sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */
+            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
+            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
+        }
+    }
+
+    /* Sort */
+    length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
+    silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
+    silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
+                                          length_d_srch );
+
+    /* Escape if correlation is very low already here */
+    Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */
+    if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
+        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+        *LTPCorr_Q15  = 0;
+        *lagIndex     = 0;
+        *contourIndex = 0;
+        RESTORE_STACK;
+        return 1;
+    }
+
+    threshold = silk_SMULWB( search_thres1_Q16, Cmax );
+    for( i = 0; i < length_d_srch; i++ ) {
+        /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
+        if( C[ i ] > threshold ) {
+            d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
+        } else {
+            length_d_srch = i;
+            break;
+        }
+    }
+    silk_assert( length_d_srch > 0 );
+
+    ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
+    for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
+        d_comp[ i - D_COMP_MIN ] = 0;
+    }
+    for( i = 0; i < length_d_srch; i++ ) {
+        d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
+    }
+
+    /* Convolution */
+    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
+        d_comp[ i - D_COMP_MIN ] +=
+            d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
+    }
+
+    length_d_srch = 0;
+    for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
+        if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
+            d_srch[ length_d_srch ] = i;
+            length_d_srch++;
+        }
+    }
+
+    /* Convolution */
+    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
+        d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
+            + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
+    }
+
+    length_d_comp = 0;
+    for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
+        if( d_comp[ i - D_COMP_MIN ] > 0 ) {
+            d_comp[ length_d_comp ] = i - 2;
+            length_d_comp++;
+        }
+    }
+
+    /**********************************************************************************
+    ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
+    *************************************************************************************/
+
+    /******************************************************************************
+    ** Scale signal down to avoid correlations measures from overflowing
+    *******************************************************************************/
+    /* find scaling as max scaling for each subframe */
+    silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
+    if( shift > 0 ) {
+        shift = silk_RSHIFT( shift, 1 );
+        for( i = 0; i < frame_length_8kHz; i++ ) {
+            frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
+        }
+    }
+
+    /*********************************************************************************
+    * Find energy of each subframe projected onto its history, for a range of delays
+    *********************************************************************************/
+    silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
+
+    target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
+    for( k = 0; k < nb_subfr; k++ ) {
+
+        /* Check that we are within range of the array */
+        silk_assert( target_ptr >= frame_8kHz );
+        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
+
+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
+        for( j = 0; j < length_d_comp; j++ ) {
+            d = d_comp[ j ];
+            basis_ptr = target_ptr - d;
+
+            /* Check that we are within range of the array */
+            silk_assert( basis_ptr >= frame_8kHz );
+            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
+
+            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
+            if( cross_corr > 0 ) {
+                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
+                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
+                    (opus_int16)silk_DIV32_varQ( cross_corr,
+                                                 silk_ADD32( energy_target,
+                                                             energy_basis ),
+                                                 13 + 1 );                                      /* Q13 */
+            } else {
+                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
+            }
+        }
+        target_ptr += SF_LENGTH_8KHZ;
+    }
+
+    /* search over lag range and lags codebook */
+    /* scale factor for lag codebook, as a function of center lag */
+
+    CCmax   = silk_int32_MIN;
+    CCmax_b = silk_int32_MIN;
+
+    CBimax = 0; /* To avoid returning undefined lag values */
+    lag = -1;   /* To check if lag with strong enough correlation has been found */
+
+    if( prevLag > 0 ) {
+        if( Fs_kHz == 12 ) {
+            prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
+        } else if( Fs_kHz == 16 ) {
+            prevLag = silk_RSHIFT( prevLag, 1 );
+        }
+        prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
+    } else {
+        prevLag_log2_Q7 = 0;
+    }
+    silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
+    /* Set up stage 2 codebook based on number of subframes */
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        cbk_size   = PE_NB_CBKS_STAGE2_EXT;
+        Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+        if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
+            /* If input is 8 khz use a larger codebook here because it is last stage */
+            nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
+        } else {
+            nb_cbk_search = PE_NB_CBKS_STAGE2;
+        }
+    } else {
+        cbk_size       = PE_NB_CBKS_STAGE2_10MS;
+        Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+        nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
+    }
+
+    for( k = 0; k < length_d_srch; k++ ) {
+        d = d_srch[ k ];
+        for( j = 0; j < nb_cbk_search; j++ ) {
+            CC[ j ] = 0;
+            for( i = 0; i < nb_subfr; i++ ) {
+                opus_int d_subfr;
+                /* Try all codebooks */
+                d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
+                CC[ j ] = CC[ j ]
+                    + (opus_int32)matrix_ptr( C, i,
+                                              d_subfr - ( MIN_LAG_8KHZ - 2 ),
+                                              CSTRIDE_8KHZ );
+            }
+        }
+        /* Find best codebook */
+        CCmax_new = silk_int32_MIN;
+        CBimax_new = 0;
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            if( CC[ i ] > CCmax_new ) {
+                CCmax_new = CC[ i ];
+                CBimax_new = i;
+            }
+        }
+
+        /* Bias towards shorter lags */
+        lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
+        silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
+        silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
+        CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
+
+        /* Bias towards previous lag */
+        silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
+        if( prevLag > 0 ) {
+            delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
+            silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
+            delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
+            prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
+            prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
+            CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
+        }
+
+        if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
+            CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */
+            silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ      /* Lag must be in range                             */
+         ) {
+            CCmax_b = CCmax_new_b;
+            CCmax   = CCmax_new;
+            lag     = d;
+            CBimax  = CBimax_new;
+        }
+    }
+
+    if( lag == -1 ) {
+        /* No suitable candidate found */
+        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+        *LTPCorr_Q15  = 0;
+        *lagIndex     = 0;
+        *contourIndex = 0;
+        RESTORE_STACK;
+        return 1;
+    }
+
+    /* Output normalized correlation */
+    *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
+    silk_assert( *LTPCorr_Q15 >= 0 );
+
+    if( Fs_kHz > 8 ) {
+        VARDECL( opus_int16, scratch_mem );
+        /***************************************************************************/
+        /* Scale input signal down to avoid correlations measures from overflowing */
+        /***************************************************************************/
+        /* find scaling as max scaling for each subframe */
+        silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
+        ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
+        if( shift > 0 ) {
+            /* Move signal to scratch mem because the input signal should be unchanged */
+            shift = silk_RSHIFT( shift, 1 );
+            for( i = 0; i < frame_length; i++ ) {
+                scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
+            }
+            input_frame_ptr = scratch_mem;
+        } else {
+            input_frame_ptr = frame;
+        }
+
+        /* Search in original signal */
+
+        CBimax_old = CBimax;
+        /* Compensate for decimation */
+        silk_assert( lag == silk_SAT16( lag ) );
+        if( Fs_kHz == 12 ) {
+            lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
+        } else if( Fs_kHz == 16 ) {
+            lag = silk_LSHIFT( lag, 1 );
+        } else {
+            lag = silk_SMULBB( lag, 3 );
+        }
+
+        lag = silk_LIMIT_int( lag, min_lag, max_lag );
+        start_lag = silk_max_int( lag - 2, min_lag );
+        end_lag   = silk_min_int( lag + 2, max_lag );
+        lag_new   = lag;                                    /* to avoid undefined lag */
+        CBimax    = 0;                                      /* to avoid undefined lag */
+
+        CCmax = silk_int32_MIN;
+        /* pitch lags according to second stage */
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
+        }
+
+        /* Set up codebook parameters according to complexity setting and frame length */
+        if( nb_subfr == PE_MAX_NB_SUBFR ) {
+            nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
+            cbk_size        = PE_NB_CBKS_STAGE3_MAX;
+            Lag_CB_ptr      = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        } else {
+            nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
+            cbk_size        = PE_NB_CBKS_STAGE3_10MS;
+            Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        }
+
+        /* Calculate the correlations and energies needed in stage 3 */
+        ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
+        ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
+        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
+        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+
+        lag_counter = 0;
+        silk_assert( lag == silk_SAT16( lag ) );
+        contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
+
+        target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
+        for( d = start_lag; d <= end_lag; d++ ) {
+            for( j = 0; j < nb_cbk_search; j++ ) {
+                cross_corr = 0;
+                energy     = energy_target;
+                for( k = 0; k < nb_subfr; k++ ) {
+                    cross_corr = silk_ADD32( cross_corr,
+                        matrix_ptr( cross_corr_st3, k, j,
+                                    nb_cbk_search )[ lag_counter ] );
+                    energy     = silk_ADD32( energy,
+                        matrix_ptr( energies_st3, k, j,
+                                    nb_cbk_search )[ lag_counter ] );
+                    silk_assert( energy >= 0 );
+                }
+                if( cross_corr > 0 ) {
+                    CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 );          /* Q13 */
+                    /* Reduce depending on flatness of contour */
+                    diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j );            /* Q15 */
+                    silk_assert( diff == silk_SAT16( diff ) );
+                    CCmax_new = silk_SMULWB( CCmax_new, diff );                         /* Q14 */
+                } else {
+                    CCmax_new = 0;
+                }
+
+                if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
+                    CCmax   = CCmax_new;
+                    lag_new = d;
+                    CBimax  = j;
+                }
+            }
+            lag_counter++;
+        }
+
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
+        }
+        *lagIndex = (opus_int16)( lag_new - min_lag);
+        *contourIndex = (opus_int8)CBimax;
+    } else {        /* Fs_kHz == 8 */
+        /* Save Lags */
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
+        }
+        *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
+        *contourIndex = (opus_int8)CBimax;
+    }
+    silk_assert( *lagIndex >= 0 );
+    /* return as voiced */
+    RESTORE_STACK;
+    return 0;
+}
+
+/***********************************************************************
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
+    const opus_int16  frame[],                         /* I vector to correlate         */
+    opus_int          start_lag,                       /* I lag offset to search around */
+    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
+    opus_int          nb_subfr,                        /* I number of subframes         */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
+)
+{
+    const opus_int16 *target_ptr;
+    opus_int   i, j, k, lag_counter, lag_low, lag_high;
+    opus_int   nb_cbk_search, delta, idx, cbk_size;
+    VARDECL( opus_int32, scratch_mem );
+    VARDECL( opus_int32, xcorr32 );
+    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+    SAVE_STACK;
+
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+    } else {
+        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+    }
+    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
+    ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
+
+    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_counter = 0;
+
+        /* Calculate the correlations for each subframe */
+        lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
+        for( j = lag_low; j <= lag_high; j++ ) {
+            silk_assert( lag_counter < SCRATCH_SIZE );
+            scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
+            lag_counter++;
+        }
+
+        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            /* Fill out the 3 dim array that stores the correlations for */
+            /* each code_book vector for each start lag */
+            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+                silk_assert( idx + j < SCRATCH_SIZE );
+                silk_assert( idx + j < lag_counter );
+                matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
+                    scratch_mem[ idx + j ];
+            }
+        }
+        target_ptr += sf_length;
+    }
+    RESTORE_STACK;
+}
+
+/********************************************************************/
+/* Calculate the energies for first two subframes. The energies are */
+/* calculated recursively.                                          */
+/********************************************************************/
+static void silk_P_Ana_calc_energy_st3(
+    silk_pe_stage3_vals energies_st3[],                 /* O 3 DIM energy array */
+    const opus_int16  frame[],                          /* I vector to calc energy in    */
+    opus_int          start_lag,                        /* I lag offset to search around */
+    opus_int          sf_length,                        /* I length of one 5 ms subframe */
+    opus_int          nb_subfr,                         /* I number of subframes         */
+    opus_int          complexity                        /* I Complexity setting          */
+)
+{
+    const opus_int16 *target_ptr, *basis_ptr;
+    opus_int32 energy;
+    opus_int   k, i, j, lag_counter;
+    opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
+    VARDECL( opus_int32, scratch_mem );
+    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+    SAVE_STACK;
+
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+    } else {
+        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+    }
+    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
+
+    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_counter = 0;
+
+        /* Calculate the energy for first lag */
+        basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
+        energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
+        silk_assert( energy >= 0 );
+        scratch_mem[ lag_counter ] = energy;
+        lag_counter++;
+
+        lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
+        for( i = 1; i < lag_diff; i++ ) {
+            /* remove part outside new window */
+            energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
+            silk_assert( energy >= 0 );
+
+            /* add part that comes into window */
+            energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
+            silk_assert( energy >= 0 );
+            silk_assert( lag_counter < SCRATCH_SIZE );
+            scratch_mem[ lag_counter ] = energy;
+            lag_counter++;
+        }
+
+        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            /* Fill out the 3 dim array that stores the correlations for    */
+            /* each code_book vector for each start lag                     */
+            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+                silk_assert( idx + j < SCRATCH_SIZE );
+                silk_assert( idx + j < lag_counter );
+                matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
+                    scratch_mem[ idx + j ];
+                silk_assert(
+                    matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
+            }
+        }
+        target_ptr += sf_length;
+    }
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/fixed/prefilter_FIX.c b/src/main/jni/opus/silk/fixed/prefilter_FIX.c
new file mode 100644
index 000000000..d381730c2
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/prefilter_FIX.c
@@ -0,0 +1,209 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Prefilter for finding Quantizer input signal */
+static OPUS_INLINE void silk_prefilt_FIX(
+    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
+    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
+    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
+    opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */
+    opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */
+    opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */
+    opus_int                    lag,                        /* I    Lag for harmonic shaping            */
+    opus_int                    length                      /* I    Length of signals                   */
+);
+
+void silk_warped_LPC_analysis_filter_FIX(
+          opus_int32            state[],                    /* I/O  State [order + 1]                   */
+          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order                       /* I    Filter order (even)                 */
+)
+{
+    opus_int     n, i;
+    opus_int32   acc_Q11, tmp1, tmp2;
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+
+    for( n = 0; n < length; n++ ) {
+        /* Output of lowpass section */
+        tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
+        state[ 0 ] = silk_LSHIFT( input[ n ], 14 );
+        /* Output of allpass section */
+        tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
+        state[ 1 ] = tmp2;
+        acc_Q11 = silk_RSHIFT( order, 1 );
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
+        /* Loop over allpass sections */
+        for( i = 2; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
+            state[ i ] = tmp1;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
+            state[ i + 1 ] = tmp2;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
+        }
+        state[ order ] = tmp1;
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
+        res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
+    }
+}
+
+void silk_prefilter_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
+    const silk_encoder_control_FIX  *psEncCtrl,                             /* I    Encoder control                                                             */
+    opus_int32                      xw_Q3[],                                /* O    Weighted signal                                                             */
+    const opus_int16                x[]                                     /* I    Speech signal                                                               */
+)
+{
+    silk_prefilter_state_FIX *P = &psEnc->sPrefilt;
+    opus_int   j, k, lag;
+    opus_int32 tmp_32;
+    const opus_int16 *AR1_shp_Q13;
+    const opus_int16 *px;
+    opus_int32 *pxw_Q3;
+    opus_int   HarmShapeGain_Q12, Tilt_Q14;
+    opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14;
+    VARDECL( opus_int32, x_filt_Q12 );
+    VARDECL( opus_int32, st_res_Q2 );
+    opus_int16 B_Q10[ 2 ];
+    SAVE_STACK;
+
+    /* Set up pointers */
+    px  = x;
+    pxw_Q3 = xw_Q3;
+    lag = P->lagPrev;
+    ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 );
+    ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Update Variables that change per sub frame */
+        if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+            lag = psEncCtrl->pitchL[ k ];
+        }
+
+        /* Noise shape parameters */
+        HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] );
+        silk_assert( HarmShapeGain_Q12 >= 0 );
+        HarmShapeFIRPacked_Q12  =                          silk_RSHIFT( HarmShapeGain_Q12, 2 );
+        HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 );
+        Tilt_Q14    = psEncCtrl->Tilt_Q14[   k ];
+        LF_shp_Q14  = psEncCtrl->LF_shp_Q14[ k ];
+        AR1_shp_Q13 = &psEncCtrl->AR1_Q13[   k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Short term FIR filtering*/
+        silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px,
+            psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
+
+        /* Reduce (mainly) low frequencies during harmonic emphasis */
+        B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 );
+        tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 );   /* Q26 */
+        tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) );    /* Q26 */
+        tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] );                                                /* Q24 */
+        tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 );                                                                     /* Q10 */
+        B_Q10[ 1 ]= silk_SAT16( tmp_32 );
+        x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] );
+        for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) {
+            x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] );
+        }
+        P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ];
+
+        silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length );
+
+        px  += psEnc->sCmn.subfr_length;
+        pxw_Q3 += psEnc->sCmn.subfr_length;
+    }
+
+    P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    RESTORE_STACK;
+}
+
+/* Prefilter for finding Quantizer input signal */
+static OPUS_INLINE void silk_prefilt_FIX(
+    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
+    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
+    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
+    opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */
+    opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */
+    opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */
+    opus_int                    lag,                        /* I    Lag for harmonic shaping            */
+    opus_int                    length                      /* I    Length of signals                   */
+)
+{
+    opus_int   i, idx, LTP_shp_buf_idx;
+    opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
+    opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
+    opus_int16 *LTP_shp_buf;
+
+    /* To speed up use temp variables instead of using the struct */
+    LTP_shp_buf     = P->sLTP_shp;
+    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+    sLF_AR_shp_Q12  = P->sLF_AR_shp_Q12;
+    sLF_MA_shp_Q12  = P->sLF_MA_shp_Q12;
+
+    for( i = 0; i < length; i++ ) {
+        if( lag > 0 ) {
+            /* unrolled loop */
+            silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+            idx = lag + LTP_shp_buf_idx;
+            n_LTP_Q12 = silk_SMULBB(            LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+            n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+            n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+        } else {
+            n_LTP_Q12 = 0;
+        }
+
+        n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+        n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+        sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+        sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );
+
+        LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+        LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+        xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );
+    }
+
+    /* Copy temp variable back to state */
+    P->sLF_AR_shp_Q12   = sLF_AR_shp_Q12;
+    P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;
+    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
diff --git a/src/main/jni/opus/silk/fixed/process_gains_FIX.c b/src/main/jni/opus/silk/fixed/process_gains_FIX.c
new file mode 100644
index 000000000..05aba3178
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/process_gains_FIX.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "tuning_parameters.h"
+
+/* Processing of gains */
+void silk_process_gains_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control                                                             */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+)
+{
+    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+    opus_int     k;
+    opus_int32   s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10;
+
+    /* Gain reduction when LTP coding gain is high */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */
+        s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) );
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 );
+        }
+    }
+
+    /* Limit the quantized signal */
+    /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */
+    InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin(
+        silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length );
+
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Soft limit on ratio residual energy and squared gains */
+        ResNrg     = psEncCtrl->ResNrg[ k ];
+        ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 );
+        if( psEncCtrl->ResNrgQ[ k ] > 0 ) {
+            ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] );
+        } else {
+            if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) {
+                ResNrgPart = silk_int32_MAX;
+            } else {
+                ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] );
+            }
+        }
+        gain = psEncCtrl->Gains_Q16[ k ];
+        gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) );
+        if( gain_squared < silk_int16_MAX ) {
+            /* recalculate with higher precision */
+            gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain );
+            silk_assert( gain_squared > 0 );
+            gain = silk_SQRT_APPROX( gain_squared );                    /* Q8   */
+            gain = silk_min( gain, silk_int32_MAX >> 8 );
+            psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 );   /* Q16  */
+        } else {
+            gain = silk_SQRT_APPROX( gain_squared );                    /* Q0   */
+            gain = silk_min( gain, silk_int32_MAX >> 16 );
+            psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 16 );  /* Q16  */
+        }
+    }
+
+    /* Save unquantized gains and gain Index */
+    silk_memcpy( psEncCtrl->GainsUnq_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+    psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex;
+
+    /* Quantize gains */
+    silk_gains_quant( psEnc->sCmn.indices.GainsIndices, psEncCtrl->Gains_Q16,
+        &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+    /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        if( psEncCtrl->LTPredCodGain_Q7 + silk_RSHIFT( psEnc->sCmn.input_tilt_Q15, 8 ) > SILK_FIX_CONST( 1.0, 7 ) ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+    }
+
+    /* Quantizer boundary adjustment */
+    quant_offset_Q10 = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ];
+    psEncCtrl->Lambda_Q10 = SILK_FIX_CONST( LAMBDA_OFFSET, 10 )
+                          + silk_SMULBB( SILK_FIX_CONST( LAMBDA_DELAYED_DECISIONS, 10 ), psEnc->sCmn.nStatesDelayedDecision )
+                          + silk_SMULWB( SILK_FIX_CONST( LAMBDA_SPEECH_ACT,        18 ), psEnc->sCmn.speech_activity_Q8     )
+                          + silk_SMULWB( SILK_FIX_CONST( LAMBDA_INPUT_QUALITY,     12 ), psEncCtrl->input_quality_Q14       )
+                          + silk_SMULWB( SILK_FIX_CONST( LAMBDA_CODING_QUALITY,    12 ), psEncCtrl->coding_quality_Q14      )
+                          + silk_SMULWB( SILK_FIX_CONST( LAMBDA_QUANT_OFFSET,      16 ), quant_offset_Q10                   );
+
+    silk_assert( psEncCtrl->Lambda_Q10 > 0 );
+    silk_assert( psEncCtrl->Lambda_Q10 < SILK_FIX_CONST( 2, 10 ) );
+}
diff --git a/src/main/jni/opus/silk/fixed/regularize_correlations_FIX.c b/src/main/jni/opus/silk/fixed/regularize_correlations_FIX.c
new file mode 100644
index 000000000..a2836b05f
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/regularize_correlations_FIX.c
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FIX(
+    opus_int32                      *XX,                                    /* I/O  Correlation matrices                                                        */
+    opus_int32                      *xx,                                    /* I/O  Correlation values                                                          */
+    opus_int32                      noise,                                  /* I    Noise to add                                                                */
+    opus_int                        D                                       /* I    Dimension of XX                                                             */
+)
+{
+    opus_int i;
+    for( i = 0; i < D; i++ ) {
+        matrix_ptr( &XX[ 0 ], i, i, D ) = silk_ADD32( matrix_ptr( &XX[ 0 ], i, i, D ), noise );
+    }
+    xx[ 0 ] += noise;
+}
diff --git a/src/main/jni/opus/silk/fixed/residual_energy16_FIX.c b/src/main/jni/opus/silk/fixed/residual_energy16_FIX.c
new file mode 100644
index 000000000..ebffb2a66
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/residual_energy16_FIX.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+opus_int32 silk_residual_energy16_covar_FIX(
+    const opus_int16                *c,                                     /* I    Prediction vector                                                           */
+    const opus_int32                *wXX,                                   /* I    Correlation matrix                                                          */
+    const opus_int32                *wXx,                                   /* I    Correlation vector                                                          */
+    opus_int32                      wxx,                                    /* I    Signal energy                                                               */
+    opus_int                        D,                                      /* I    Dimension                                                                   */
+    opus_int                        cQ                                      /* I    Q value for c vector 0 - 15                                                 */
+)
+{
+    opus_int   i, j, lshifts, Qxtra;
+    opus_int32 c_max, w_max, tmp, tmp2, nrg;
+    opus_int   cn[ MAX_MATRIX_SIZE ];
+    const opus_int32 *pRow;
+
+    /* Safety checks */
+    silk_assert( D >=  0 );
+    silk_assert( D <= 16 );
+    silk_assert( cQ >  0 );
+    silk_assert( cQ < 16 );
+
+    lshifts = 16 - cQ;
+    Qxtra = lshifts;
+
+    c_max = 0;
+    for( i = 0; i < D; i++ ) {
+        c_max = silk_max_32( c_max, silk_abs( (opus_int32)c[ i ] ) );
+    }
+    Qxtra = silk_min_int( Qxtra, silk_CLZ32( c_max ) - 17 );
+
+    w_max = silk_max_32( wXX[ 0 ], wXX[ D * D - 1 ] );
+    Qxtra = silk_min_int( Qxtra, silk_CLZ32( silk_MUL( D, silk_RSHIFT( silk_SMULWB( w_max, c_max ), 4 ) ) ) - 5 );
+    Qxtra = silk_max_int( Qxtra, 0 );
+    for( i = 0; i < D; i++ ) {
+        cn[ i ] = silk_LSHIFT( ( opus_int )c[ i ], Qxtra );
+        silk_assert( silk_abs(cn[i]) <= ( silk_int16_MAX + 1 ) ); /* Check that silk_SMLAWB can be used */
+    }
+    lshifts -= Qxtra;
+
+    /* Compute wxx - 2 * wXx * c */
+    tmp = 0;
+    for( i = 0; i < D; i++ ) {
+        tmp = silk_SMLAWB( tmp, wXx[ i ], cn[ i ] );
+    }
+    nrg = silk_RSHIFT( wxx, 1 + lshifts ) - tmp;                         /* Q: -lshifts - 1 */
+
+    /* Add c' * wXX * c, assuming wXX is symmetric */
+    tmp2 = 0;
+    for( i = 0; i < D; i++ ) {
+        tmp = 0;
+        pRow = &wXX[ i * D ];
+        for( j = i + 1; j < D; j++ ) {
+            tmp = silk_SMLAWB( tmp, pRow[ j ], cn[ j ] );
+        }
+        tmp  = silk_SMLAWB( tmp,  silk_RSHIFT( pRow[ i ], 1 ), cn[ i ] );
+        tmp2 = silk_SMLAWB( tmp2, tmp,                        cn[ i ] );
+    }
+    nrg = silk_ADD_LSHIFT32( nrg, tmp2, lshifts );                       /* Q: -lshifts - 1 */
+
+    /* Keep one bit free always, because we add them for LSF interpolation */
+    if( nrg < 1 ) {
+        nrg = 1;
+    } else if( nrg > silk_RSHIFT( silk_int32_MAX, lshifts + 2 ) ) {
+        nrg = silk_int32_MAX >> 1;
+    } else {
+        nrg = silk_LSHIFT( nrg, lshifts + 1 );                           /* Q0 */
+    }
+    return nrg;
+
+}
diff --git a/src/main/jni/opus/silk/fixed/residual_energy_FIX.c b/src/main/jni/opus/silk/fixed/residual_energy_FIX.c
new file mode 100644
index 000000000..105ae3180
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/residual_energy_FIX.c
@@ -0,0 +1,97 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order   */
+/* of preceding samples                                                                 */
+void silk_residual_energy_FIX(
+          opus_int32                nrgs[ MAX_NB_SUBFR ],                   /* O    Residual energy per subframe                                                */
+          opus_int                  nrgsQ[ MAX_NB_SUBFR ],                  /* O    Q value per subframe                                                        */
+    const opus_int16                x[],                                    /* I    Input signal                                                                */
+          opus_int16                a_Q12[ 2 ][ MAX_LPC_ORDER ],            /* I    AR coefs for each frame half                                                */
+    const opus_int32                gains[ MAX_NB_SUBFR ],                  /* I    Quantization gains                                                          */
+    const opus_int                  subfr_length,                           /* I    Subframe length                                                             */
+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
+    const opus_int                  LPC_order                               /* I    LPC order                                                                   */
+)
+{
+    opus_int         offset, i, j, rshift, lz1, lz2;
+    opus_int16       *LPC_res_ptr;
+    VARDECL( opus_int16, LPC_res );
+    const opus_int16 *x_ptr;
+    opus_int32       tmp32;
+    SAVE_STACK;
+
+    x_ptr  = x;
+    offset = LPC_order + subfr_length;
+
+    /* Filter input to create the LPC residual for each frame half, and measure subframe energies */
+    ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 );
+    silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );
+    for( i = 0; i < nb_subfr >> 1; i++ ) {
+        /* Calculate half frame LPC residual signal including preceding samples */
+        silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order );
+
+        /* Point to first subframe of the just calculated LPC residual signal */
+        LPC_res_ptr = LPC_res + LPC_order;
+        for( j = 0; j < ( MAX_NB_SUBFR >> 1 ); j++ ) {
+            /* Measure subframe energy */
+            silk_sum_sqr_shift( &nrgs[ i * ( MAX_NB_SUBFR >> 1 ) + j ], &rshift, LPC_res_ptr, subfr_length );
+
+            /* Set Q values for the measured energy */
+            nrgsQ[ i * ( MAX_NB_SUBFR >> 1 ) + j ] = -rshift;
+
+            /* Move to next subframe */
+            LPC_res_ptr += offset;
+        }
+        /* Move to next frame half */
+        x_ptr += ( MAX_NB_SUBFR >> 1 ) * offset;
+    }
+
+    /* Apply the squared subframe gains */
+    for( i = 0; i < nb_subfr; i++ ) {
+        /* Fully upscale gains and energies */
+        lz1 = silk_CLZ32( nrgs[  i ] ) - 1;
+        lz2 = silk_CLZ32( gains[ i ] ) - 1;
+
+        tmp32 = silk_LSHIFT32( gains[ i ], lz2 );
+
+        /* Find squared gains */
+        tmp32 = silk_SMMUL( tmp32, tmp32 ); /* Q( 2 * lz2 - 32 )*/
+
+        /* Scale energies */
+        nrgs[ i ] = silk_SMMUL( tmp32, silk_LSHIFT32( nrgs[ i ], lz1 ) ); /* Q( nrgsQ[ i ] + lz1 + 2 * lz2 - 32 - 32 )*/
+        nrgsQ[ i ] += lz1 + 2 * lz2 - 32 - 32;
+    }
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/fixed/schur64_FIX.c b/src/main/jni/opus/silk/fixed/schur64_FIX.c
new file mode 100644
index 000000000..764a10ef3
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/schur64_FIX.c
@@ -0,0 +1,92 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Slower than schur(), but more accurate.                              */
+/* Uses SMULL(), available on armv4                                     */
+opus_int32 silk_schur64(                            /* O    returns residual energy                                     */
+    opus_int32                  rc_Q16[],           /* O    Reflection coefficients [order] Q16                         */
+    const opus_int32            c[],                /* I    Correlations [order+1]                                      */
+    opus_int32                  order               /* I    Prediction order                                            */
+)
+{
+    opus_int   k, n;
+    opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+    opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31;
+
+    silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+    /* Check for invalid input */
+    if( c[ 0 ] <= 0 ) {
+        silk_memset( rc_Q16, 0, order * sizeof( opus_int32 ) );
+        return 0;
+    }
+
+    for( k = 0; k < order + 1; k++ ) {
+        C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ];
+    }
+
+    for( k = 0; k < order; k++ ) {
+        /* Check that we won't be getting an unstable rc, otherwise stop here. */
+        if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) {
+           if ( C[ k + 1 ][ 0 ] > 0 ) {
+              rc_Q16[ k ] = -SILK_FIX_CONST( .99f, 16 );
+           } else {
+              rc_Q16[ k ] = SILK_FIX_CONST( .99f, 16 );
+           }
+           k++;
+           break;
+        }
+
+        /* Get reflection coefficient: divide two Q30 values and get result in Q31 */
+        rc_tmp_Q31 = silk_DIV32_varQ( -C[ k + 1 ][ 0 ], C[ 0 ][ 1 ], 31 );
+
+        /* Save the output */
+        rc_Q16[ k ] = silk_RSHIFT_ROUND( rc_tmp_Q31, 15 );
+
+        /* Update correlations */
+        for( n = 0; n < order - k; n++ ) {
+            Ctmp1_Q30 = C[ n + k + 1 ][ 0 ];
+            Ctmp2_Q30 = C[ n ][ 1 ];
+
+            /* Multiply and add the highest int32 */
+            C[ n + k + 1 ][ 0 ] = Ctmp1_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp2_Q30, 1 ), rc_tmp_Q31 );
+            C[ n ][ 1 ]         = Ctmp2_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp1_Q30, 1 ), rc_tmp_Q31 );
+        }
+    }
+
+    for(; k < order; k++ ) {
+       rc_Q16[ k ] = 0;
+    }
+
+    return silk_max_32( 1, C[ 0 ][ 1 ] );
+}
diff --git a/src/main/jni/opus/silk/fixed/schur_FIX.c b/src/main/jni/opus/silk/fixed/schur_FIX.c
new file mode 100644
index 000000000..c4c0ef23b
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/schur_FIX.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Faster than schur64(), but much less accurate.                       */
+/* uses SMLAWB(), requiring armv5E and higher.                          */
+opus_int32 silk_schur(                              /* O    Returns residual energy                                     */
+    opus_int16                  *rc_Q15,            /* O    reflection coefficients [order] Q15                         */
+    const opus_int32            *c,                 /* I    correlations [order+1]                                      */
+    const opus_int32            order               /* I    prediction order                                            */
+)
+{
+    opus_int        k, n, lz;
+    opus_int32    C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+    opus_int32    Ctmp1, Ctmp2, rc_tmp_Q15;
+
+    silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+    /* Get number of leading zeros */
+    lz = silk_CLZ32( c[ 0 ] );
+
+    /* Copy correlations and adjust level to Q30 */
+    if( lz < 2 ) {
+        /* lz must be 1, so shift one to the right */
+        for( k = 0; k < order + 1; k++ ) {
+            C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 );
+        }
+    } else if( lz > 2 ) {
+        /* Shift to the left */
+        lz -= 2;
+        for( k = 0; k < order + 1; k++ ) {
+            C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz );
+        }
+    } else {
+        /* No need to shift */
+        for( k = 0; k < order + 1; k++ ) {
+            C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ];
+        }
+    }
+
+    for( k = 0; k < order; k++ ) {
+        /* Check that we won't be getting an unstable rc, otherwise stop here. */
+        if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) {
+           if ( C[ k + 1 ][ 0 ] > 0 ) {
+              rc_Q15[ k ] = -SILK_FIX_CONST( .99f, 15 );
+           } else {
+              rc_Q15[ k ] = SILK_FIX_CONST( .99f, 15 );
+           }
+           k++;
+           break;
+        }
+
+        /* Get reflection coefficient */
+        rc_tmp_Q15 = -silk_DIV32_16( C[ k + 1 ][ 0 ], silk_max_32( silk_RSHIFT( C[ 0 ][ 1 ], 15 ), 1 ) );
+
+        /* Clip (shouldn't happen for properly conditioned inputs) */
+        rc_tmp_Q15 = silk_SAT16( rc_tmp_Q15 );
+
+        /* Store */
+        rc_Q15[ k ] = (opus_int16)rc_tmp_Q15;
+
+        /* Update correlations */
+        for( n = 0; n < order - k; n++ ) {
+            Ctmp1 = C[ n + k + 1 ][ 0 ];
+            Ctmp2 = C[ n ][ 1 ];
+            C[ n + k + 1 ][ 0 ] = silk_SMLAWB( Ctmp1, silk_LSHIFT( Ctmp2, 1 ), rc_tmp_Q15 );
+            C[ n ][ 1 ]         = silk_SMLAWB( Ctmp2, silk_LSHIFT( Ctmp1, 1 ), rc_tmp_Q15 );
+        }
+    }
+
+    for(; k < order; k++ ) {
+       rc_Q15[ k ] = 0;
+    }
+
+    /* return residual energy */
+    return silk_max_32( 1, C[ 0 ][ 1 ] );
+}
diff --git a/src/main/jni/opus/silk/fixed/solve_LS_FIX.c b/src/main/jni/opus/silk/fixed/solve_LS_FIX.c
new file mode 100644
index 000000000..51d7d49d0
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/solve_LS_FIX.c
@@ -0,0 +1,249 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/*****************************/
+/* Internal function headers */
+/*****************************/
+
+typedef struct {
+    opus_int32 Q36_part;
+    opus_int32 Q48_part;
+} inv_D_t;
+
+/* Factorize square matrix A into LDL form */
+static OPUS_INLINE void silk_LDL_factorize_FIX(
+    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
+    opus_int            M,          /* I   Size of Matrix                                               */
+    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
+    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
+);
+
+/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
+    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
+    opus_int            M,          /* I    Dim of Matrix equation                                      */
+    const opus_int32    *b,         /* I    b Vector                                                    */
+    opus_int32          *x_Q16      /* O    x Vector                                                    */
+);
+
+/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
+    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
+    const opus_int      M,          /* I    Dim of Matrix equation                                      */
+    const opus_int32    *b,         /* I    b Vector                                                    */
+    opus_int32          *x_Q16      /* O    x Vector                                                    */
+);
+
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
+    opus_int32          T[],        /* I/O  Numenator vector                                            */
+    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
+    opus_int            M           /* I    dimension                                                   */
+);
+
+/* Solves Ax = b, assuming A is symmetric */
+void silk_solve_LDL_FIX(
+    opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */
+    opus_int                        M,                                      /* I    Size of matrix                                                              */
+    const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */
+    opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */
+)
+{
+    VARDECL( opus_int32, L_Q16 );
+    opus_int32 Y[      MAX_MATRIX_SIZE ];
+    inv_D_t   inv_D[  MAX_MATRIX_SIZE ];
+    SAVE_STACK;
+
+    silk_assert( M <= MAX_MATRIX_SIZE );
+    ALLOC( L_Q16, M * M, opus_int32 );
+
+    /***************************************************
+    Factorize A by LDL such that A = L*D*L',
+    where L is lower triangular with ones on diagonal
+    ****************************************************/
+    silk_LDL_factorize_FIX( A, M, L_Q16, inv_D );
+
+    /****************************************************
+    * substitute D*L'*x = Y. ie:
+    L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b
+    ******************************************************/
+    silk_LS_SolveFirst_FIX( L_Q16, M, b, Y );
+
+    /****************************************************
+    D*L'*x = Y <=> L'*x = inv(D)*Y, because D is
+    diagonal just multiply with 1/d_i
+    ****************************************************/
+    silk_LS_divide_Q16_FIX( Y, inv_D, M );
+
+    /****************************************************
+    x = inv(L') * inv(D) * Y
+    *****************************************************/
+    silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );
+    RESTORE_STACK;
+}
+
+static OPUS_INLINE void silk_LDL_factorize_FIX(
+    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
+    opus_int            M,          /* I   Size of Matrix                                               */
+    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
+    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
+)
+{
+    opus_int   i, j, k, status, loop_count;
+    const opus_int32 *ptr1, *ptr2;
+    opus_int32 diag_min_value, tmp_32, err;
+    opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ];
+    opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48;
+
+    silk_assert( M <= MAX_MATRIX_SIZE );
+
+    status = 1;
+    diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 );
+    for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) {
+        status = 0;
+        for( j = 0; j < M; j++ ) {
+            ptr1 = matrix_adr( L_Q16, j, 0, M );
+            tmp_32 = 0;
+            for( i = 0; i < j; i++ ) {
+                v_Q0[ i ] = silk_SMULWW(         D_Q0[ i ], ptr1[ i ] ); /* Q0 */
+                tmp_32    = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */
+            }
+            tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 );
+
+            if( tmp_32 < diag_min_value ) {
+                tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 );
+                /* Matrix not positive semi-definite, or ill conditioned */
+                for( i = 0; i < M; i++ ) {
+                    matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 );
+                }
+                status = 1;
+                break;
+            }
+            D_Q0[ j ] = tmp_32;                         /* always < max(Correlation) */
+
+            /* two-step division */
+            one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 );                    /* Q36 */
+            one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 );                   /* Q40 */
+            err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) );     /* Q24 */
+            one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 );                 /* Q48 */
+
+            /* Save 1/Ds */
+            inv_D[ j ].Q36_part = one_div_diag_Q36;
+            inv_D[ j ].Q48_part = one_div_diag_Q48;
+
+            matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */
+            ptr1 = matrix_adr( A, j, 0, M );
+            ptr2 = matrix_adr( L_Q16, j + 1, 0, M );
+            for( i = j + 1; i < M; i++ ) {
+                tmp_32 = 0;
+                for( k = 0; k < j; k++ ) {
+                    tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */
+                }
+                tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */
+
+                /* tmp_32 / D_Q0[j] : Divide to Q16 */
+                matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ),
+                    silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
+
+                /* go to next column */
+                ptr2 += M;
+            }
+        }
+    }
+
+    silk_assert( status == 0 );
+}
+
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
+    opus_int32          T[],        /* I/O  Numenator vector                                            */
+    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
+    opus_int            M           /* I    dimension                                                   */
+)
+{
+    opus_int   i;
+    opus_int32 tmp_32;
+    opus_int32 one_div_diag_Q36, one_div_diag_Q48;
+
+    for( i = 0; i < M; i++ ) {
+        one_div_diag_Q36 = inv_D[ i ].Q36_part;
+        one_div_diag_Q48 = inv_D[ i ].Q48_part;
+
+        tmp_32 = T[ i ];
+        T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
+    }
+}
+
+/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
+    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
+    opus_int            M,          /* I    Dim of Matrix equation                                      */
+    const opus_int32    *b,         /* I    b Vector                                                    */
+    opus_int32          *x_Q16      /* O    x Vector                                                    */
+)
+{
+    opus_int i, j;
+    const opus_int32 *ptr32;
+    opus_int32 tmp_32;
+
+    for( i = 0; i < M; i++ ) {
+        ptr32 = matrix_adr( L_Q16, i, 0, M );
+        tmp_32 = 0;
+        for( j = 0; j < i; j++ ) {
+            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] );
+        }
+        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
+    }
+}
+
+/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
+    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
+    const opus_int      M,          /* I    Dim of Matrix equation                                      */
+    const opus_int32    *b,         /* I    b Vector                                                    */
+    opus_int32          *x_Q16      /* O    x Vector                                                    */
+)
+{
+    opus_int i, j;
+    const opus_int32 *ptr32;
+    opus_int32 tmp_32;
+
+    for( i = M - 1; i >= 0; i-- ) {
+        ptr32 = matrix_adr( L_Q16, 0, i, M );
+        tmp_32 = 0;
+        for( j = M - 1; j > i; j-- ) {
+            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] );
+        }
+        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
+    }
+}
diff --git a/src/main/jni/opus/silk/fixed/structs_FIX.h b/src/main/jni/opus/silk/fixed/structs_FIX.h
new file mode 100644
index 000000000..244b47934
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/structs_FIX.h
@@ -0,0 +1,133 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_FIX_H
+#define SILK_STRUCTS_FIX_H
+
+#include "typedef.h"
+#include "main.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/********************************/
+/* Noise shaping analysis state */
+/********************************/
+typedef struct {
+    opus_int8                   LastGainIndex;
+    opus_int32                  HarmBoost_smth_Q16;
+    opus_int32                  HarmShapeGain_smth_Q16;
+    opus_int32                  Tilt_smth_Q16;
+} silk_shape_state_FIX;
+
+/********************************/
+/* Prefilter state              */
+/********************************/
+typedef struct {
+    opus_int16                  sLTP_shp[ LTP_BUF_LENGTH ];
+    opus_int32                  sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ];
+    opus_int                    sLTP_shp_buf_idx;
+    opus_int32                  sLF_AR_shp_Q12;
+    opus_int32                  sLF_MA_shp_Q12;
+    opus_int32                  sHarmHP_Q2;
+    opus_int32                  rand_seed;
+    opus_int                    lagPrev;
+} silk_prefilter_state_FIX;
+
+/********************************/
+/* Encoder state FIX            */
+/********************************/
+typedef struct {
+    silk_encoder_state          sCmn;                                   /* Common struct, shared with floating-point code       */
+    silk_shape_state_FIX        sShape;                                 /* Shape state                                          */
+    silk_prefilter_state_FIX    sPrefilt;                               /* Prefilter State                                      */
+
+    /* Buffer for find pitch and noise shape analysis */
+    silk_DWORD_ALIGN opus_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis  */
+    opus_int                    LTPCorr_Q15;                            /* Normalized correlation from pitch lag estimator      */
+} silk_encoder_state_FIX;
+
+/************************/
+/* Encoder control FIX  */
+/************************/
+typedef struct {
+    /* Prediction and coding parameters */
+    opus_int32                  Gains_Q16[ MAX_NB_SUBFR ];
+    silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+    opus_int16                  LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+    opus_int                    LTP_scale_Q14;
+    opus_int                    pitchL[ MAX_NB_SUBFR ];
+
+    /* Noise shaping parameters */
+    /* Testing */
+    silk_DWORD_ALIGN opus_int16 AR1_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    silk_DWORD_ALIGN opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    opus_int32                  LF_shp_Q14[        MAX_NB_SUBFR ];      /* Packs two int16 coefficients per int32 value         */
+    opus_int                    GainsPre_Q14[      MAX_NB_SUBFR ];
+    opus_int                    HarmBoost_Q14[     MAX_NB_SUBFR ];
+    opus_int                    Tilt_Q14[          MAX_NB_SUBFR ];
+    opus_int                    HarmShapeGain_Q14[ MAX_NB_SUBFR ];
+    opus_int                    Lambda_Q10;
+    opus_int                    input_quality_Q14;
+    opus_int                    coding_quality_Q14;
+
+    /* measures */
+    opus_int                    sparseness_Q8;
+    opus_int32                  predGain_Q16;
+    opus_int                    LTPredCodGain_Q7;
+    opus_int32                  ResNrg[ MAX_NB_SUBFR ];                 /* Residual energy per subframe                         */
+    opus_int                    ResNrgQ[ MAX_NB_SUBFR ];                /* Q domain for the residual energy > 0                 */
+
+    /* Parameters for CBR mode */
+    opus_int32                  GainsUnq_Q16[ MAX_NB_SUBFR ];
+    opus_int8                   lastGainIndexPrev;
+} silk_encoder_control_FIX;
+
+/************************/
+/* Encoder Super Struct */
+/************************/
+typedef struct {
+    silk_encoder_state_FIX      state_Fxx[ ENCODER_NUM_CHANNELS ];
+    stereo_enc_state            sStereo;
+    opus_int32                  nBitsExceeded;
+    opus_int                    nChannelsAPI;
+    opus_int                    nChannelsInternal;
+    opus_int                    nPrevChannelsInternal;
+    opus_int                    timeSinceSwitchAllowed_ms;
+    opus_int                    allowBandwidthSwitch;
+    opus_int                    prev_decode_only_middle;
+} silk_encoder;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/fixed/vector_ops_FIX.c b/src/main/jni/opus/silk/fixed/vector_ops_FIX.c
new file mode 100644
index 000000000..509c8b35a
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/vector_ops_FIX.c
@@ -0,0 +1,96 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Copy and multiply a vector by a constant */
+void silk_scale_copy_vector16(
+    opus_int16                  *data_out,
+    const opus_int16            *data_in,
+    opus_int32                  gain_Q16,           /* I    Gain in Q16                                                 */
+    const opus_int              dataSize            /* I    Length                                                      */
+)
+{
+    opus_int  i;
+    opus_int32 tmp32;
+
+    for( i = 0; i < dataSize; i++ ) {
+        tmp32 = silk_SMULWB( gain_Q16, data_in[ i ] );
+        data_out[ i ] = (opus_int16)silk_CHECK_FIT16( tmp32 );
+    }
+}
+
+/* Multiply a vector by a constant */
+void silk_scale_vector32_Q26_lshift_18(
+    opus_int32                  *data1,             /* I/O  Q0/Q18                                                      */
+    opus_int32                  gain_Q26,           /* I    Q26                                                         */
+    opus_int                    dataSize            /* I    length                                                      */
+)
+{
+    opus_int  i;
+
+    for( i = 0; i < dataSize; i++ ) {
+        data1[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( silk_SMULL( data1[ i ], gain_Q26 ), 8 ) );    /* OUTPUT: Q18 */
+    }
+}
+
+/* sum = for(i=0;i<len;i++)inVec1[i]*inVec2[i];      ---        inner product   */
+/* Note for ARM asm:                                                            */
+/*        * inVec1 and inVec2 should be at least 2 byte aligned.                */
+/*        * len should be positive 16bit integer.                               */
+/*        * only when len>6, memory access can be reduced by half.              */
+opus_int32 silk_inner_prod_aligned(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int   i;
+    opus_int32 sum = 0;
+    for( i = 0; i < len; i++ ) {
+        sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
+    }
+    return sum;
+}
+
+opus_int64 silk_inner_prod16_aligned_64(
+    const opus_int16            *inVec1,            /*    I input vector 1                                              */
+    const opus_int16            *inVec2,            /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int   i;
+    opus_int64 sum = 0;
+    for( i = 0; i < len; i++ ) {
+        sum = silk_SMLALBB( sum, inVec1[ i ], inVec2[ i ] );
+    }
+    return sum;
+}
diff --git a/src/main/jni/opus/silk/fixed/warped_autocorrelation_FIX.c b/src/main/jni/opus/silk/fixed/warped_autocorrelation_FIX.c
new file mode 100644
index 000000000..a4a579b10
--- /dev/null
+++ b/src/main/jni/opus/silk/fixed/warped_autocorrelation_FIX.c
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FIX.h"
+
+#define QC  10
+#define QS  14
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FIX(
+          opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
+          opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */
+    const opus_int16                *input,                                 /* I    Input data to correlate                                                     */
+    const opus_int                  warping_Q16,                            /* I    Warping coefficient                                                         */
+    const opus_int                  length,                                 /* I    Length of input                                                             */
+    const opus_int                  order                                   /* I    Correlation order (even)                                                    */
+)
+{
+    opus_int   n, i, lsh;
+    opus_int32 tmp1_QS, tmp2_QS;
+    opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+    opus_int64 corr_QC[  MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+    silk_assert( 2 * QS - QC >= 0 );
+
+    /* Loop over samples */
+    for( n = 0; n < length; n++ ) {
+        tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+        /* Loop over allpass sections */
+        for( i = 0; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+            state_QS[ i ]  = tmp1_QS;
+            corr_QC[  i ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC );
+            /* Output of allpass section */
+            tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+            state_QS[ i + 1 ]  = tmp2_QS;
+            corr_QC[  i + 1 ] += silk_RSHIFT64( silk_SMULL( tmp2_QS, state_QS[ 0 ] ), 2 * QS - QC );
+        }
+        state_QS[ order ] = tmp1_QS;
+        corr_QC[  order ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC );
+    }
+
+    lsh = silk_CLZ64( corr_QC[ 0 ] ) - 35;
+    lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC );
+    *scale = -( QC + lsh );
+    silk_assert( *scale >= -30 && *scale <= 12 );
+    if( lsh >= 0 ) {
+        for( i = 0; i < order + 1; i++ ) {
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_LSHIFT64( corr_QC[ i ], lsh ) );
+        }
+    } else {
+        for( i = 0; i < order + 1; i++ ) {
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( corr_QC[ i ], -lsh ) );
+        }
+    }
+    silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
+}
diff --git a/src/main/jni/opus/silk/float/LPC_analysis_filter_FLP.c b/src/main/jni/opus/silk/float/LPC_analysis_filter_FLP.c
new file mode 100644
index 000000000..cae89a0a1
--- /dev/null
+++ b/src/main/jni/opus/silk/float/LPC_analysis_filter_FLP.c
@@ -0,0 +1,249 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include "main_FLP.h"
+
+/************************************************/
+/* LPC analysis filter                          */
+/* NB! State is kept internally and the         */
+/* filter always starts with zero state         */
+/* first Order output samples are set to zero   */
+/************************************************/
+
+/* 16th order LPC analysis filter, does not write first 16 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter16_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 16; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ]  * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ]  * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ]  * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ]  * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ]  * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ]  * PredCoef[ 5 ]  +
+                   s_ptr[ -6 ]  * PredCoef[ 6 ]  +
+                   s_ptr[ -7 ]  * PredCoef[ 7 ]  +
+                   s_ptr[ -8 ]  * PredCoef[ 8 ]  +
+                   s_ptr[ -9 ]  * PredCoef[ 9 ]  +
+                   s_ptr[ -10 ] * PredCoef[ 10 ] +
+                   s_ptr[ -11 ] * PredCoef[ 11 ] +
+                   s_ptr[ -12 ] * PredCoef[ 12 ] +
+                   s_ptr[ -13 ] * PredCoef[ 13 ] +
+                   s_ptr[ -14 ] * PredCoef[ 14 ] +
+                   s_ptr[ -15 ] * PredCoef[ 15 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/* 12th order LPC analysis filter, does not write first 12 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter12_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 12; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ]  * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ]  * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ]  * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ]  * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ]  * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ]  * PredCoef[ 5 ]  +
+                   s_ptr[ -6 ]  * PredCoef[ 6 ]  +
+                   s_ptr[ -7 ]  * PredCoef[ 7 ]  +
+                   s_ptr[ -8 ]  * PredCoef[ 8 ]  +
+                   s_ptr[ -9 ]  * PredCoef[ 9 ]  +
+                   s_ptr[ -10 ] * PredCoef[ 10 ] +
+                   s_ptr[ -11 ] * PredCoef[ 11 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/* 10th order LPC analysis filter, does not write first 10 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter10_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 10; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ] * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ] * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ] * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ] * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ] * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ] * PredCoef[ 5 ]  +
+                   s_ptr[ -6 ] * PredCoef[ 6 ]  +
+                   s_ptr[ -7 ] * PredCoef[ 7 ]  +
+                   s_ptr[ -8 ] * PredCoef[ 8 ]  +
+                   s_ptr[ -9 ] * PredCoef[ 9 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/* 8th order LPC analysis filter, does not write first 8 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter8_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 8; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ] * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ] * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ] * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ] * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ] * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ] * PredCoef[ 5 ]  +
+                   s_ptr[ -6 ] * PredCoef[ 6 ]  +
+                   s_ptr[ -7 ] * PredCoef[ 7 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/* 6th order LPC analysis filter, does not write first 6 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter6_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 6; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ] * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ] * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ] * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ] * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ] * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ] * PredCoef[ 5 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/************************************************/
+/* LPC analysis filter                          */
+/* NB! State is kept internally and the         */
+/* filter always starts with zero state         */
+/* first Order output samples are set to zero   */
+/************************************************/
+void silk_LPC_analysis_filter_FLP(
+    silk_float                      r_LPC[],                            /* O    LPC residual signal                         */
+    const silk_float                PredCoef[],                         /* I    LPC coefficients                            */
+    const silk_float                s[],                                /* I    Input signal                                */
+    const opus_int                  length,                             /* I    Length of input signal                      */
+    const opus_int                  Order                               /* I    LPC order                                   */
+)
+{
+    silk_assert( Order <= length );
+
+    switch( Order ) {
+        case 6:
+            silk_LPC_analysis_filter6_FLP(  r_LPC, PredCoef, s, length );
+        break;
+
+        case 8:
+            silk_LPC_analysis_filter8_FLP(  r_LPC, PredCoef, s, length );
+        break;
+
+        case 10:
+            silk_LPC_analysis_filter10_FLP( r_LPC, PredCoef, s, length );
+        break;
+
+        case 12:
+            silk_LPC_analysis_filter12_FLP( r_LPC, PredCoef, s, length );
+        break;
+
+        case 16:
+            silk_LPC_analysis_filter16_FLP( r_LPC, PredCoef, s, length );
+        break;
+
+        default:
+            silk_assert( 0 );
+        break;
+    }
+
+    /* Set first Order output samples to zero */
+    silk_memset( r_LPC, 0, Order * sizeof( silk_float ) );
+}
+
diff --git a/src/main/jni/opus/silk/float/LPC_inv_pred_gain_FLP.c b/src/main/jni/opus/silk/float/LPC_inv_pred_gain_FLP.c
new file mode 100644
index 000000000..25178bacd
--- /dev/null
+++ b/src/main/jni/opus/silk/float/LPC_inv_pred_gain_FLP.c
@@ -0,0 +1,76 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "SigProc_FLP.h"
+
+#define RC_THRESHOLD        0.9999f
+
+/* compute inverse of LPC prediction gain, and                          */
+/* test if LPC coefficients are stable (all poles within unit circle)   */
+/* this code is based on silk_a2k_FLP()                                 */
+silk_float silk_LPC_inverse_pred_gain_FLP(  /* O    return inverse prediction gain, energy domain               */
+    const silk_float    *A,                 /* I    prediction coefficients [order]                             */
+    opus_int32          order               /* I    prediction order                                            */
+)
+{
+    opus_int   k, n;
+    double     invGain, rc, rc_mult1, rc_mult2;
+    silk_float Atmp[ 2 ][ SILK_MAX_ORDER_LPC ];
+    silk_float *Aold, *Anew;
+
+    Anew = Atmp[ order & 1 ];
+    silk_memcpy( Anew, A, order * sizeof(silk_float) );
+
+    invGain = 1.0;
+    for( k = order - 1; k > 0; k-- ) {
+        rc = -Anew[ k ];
+        if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) {
+            return 0.0f;
+        }
+        rc_mult1 = 1.0f - rc * rc;
+        rc_mult2 = 1.0f / rc_mult1;
+        invGain *= rc_mult1;
+        /* swap pointers */
+        Aold = Anew;
+        Anew = Atmp[ k & 1 ];
+        for( n = 0; n < k; n++ ) {
+            Anew[ n ] = (silk_float)( ( Aold[ n ] - Aold[ k - n - 1 ] * rc ) * rc_mult2 );
+        }
+    }
+    rc = -Anew[ 0 ];
+    if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) {
+        return 0.0f;
+    }
+    rc_mult1 = 1.0f - rc * rc;
+    invGain *= rc_mult1;
+    return (silk_float)invGain;
+}
diff --git a/src/main/jni/opus/silk/float/LTP_analysis_filter_FLP.c b/src/main/jni/opus/silk/float/LTP_analysis_filter_FLP.c
new file mode 100644
index 000000000..849b7c1c5
--- /dev/null
+++ b/src/main/jni/opus/silk/float/LTP_analysis_filter_FLP.c
@@ -0,0 +1,75 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+
+void silk_LTP_analysis_filter_FLP(
+    silk_float                      *LTP_res,                           /* O    LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */
+    const silk_float                *x,                                 /* I    Input signal, with preceding samples        */
+    const silk_float                B[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    LTP coefficients for each subframe          */
+    const opus_int                  pitchL[   MAX_NB_SUBFR ],           /* I    Pitch lags                                  */
+    const silk_float                invGains[ MAX_NB_SUBFR ],           /* I    Inverse quantization gains                  */
+    const opus_int                  subfr_length,                       /* I    Length of each subframe                     */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  pre_length                          /* I    Preceding samples for each subframe         */
+)
+{
+    const silk_float *x_ptr, *x_lag_ptr;
+    silk_float   Btmp[ LTP_ORDER ];
+    silk_float   *LTP_res_ptr;
+    silk_float   inv_gain;
+    opus_int     k, i, j;
+
+    x_ptr = x;
+    LTP_res_ptr = LTP_res;
+    for( k = 0; k < nb_subfr; k++ ) {
+        x_lag_ptr = x_ptr - pitchL[ k ];
+        inv_gain = invGains[ k ];
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            Btmp[ i ] = B[ k * LTP_ORDER + i ];
+        }
+
+        /* LTP analysis FIR filter */
+        for( i = 0; i < subfr_length + pre_length; i++ ) {
+            LTP_res_ptr[ i ] = x_ptr[ i ];
+            /* Subtract long-term prediction */
+            for( j = 0; j < LTP_ORDER; j++ ) {
+                LTP_res_ptr[ i ] -= Btmp[ j ] * x_lag_ptr[ LTP_ORDER / 2 - j ];
+            }
+            LTP_res_ptr[ i ] *= inv_gain;
+            x_lag_ptr++;
+        }
+
+        /* Update pointers */
+        LTP_res_ptr += subfr_length + pre_length;
+        x_ptr       += subfr_length;
+    }
+}
diff --git a/src/main/jni/opus/silk/float/LTP_scale_ctrl_FLP.c b/src/main/jni/opus/silk/float/LTP_scale_ctrl_FLP.c
new file mode 100644
index 000000000..8dbe29d0f
--- /dev/null
+++ b/src/main/jni/opus/silk/float/LTP_scale_ctrl_FLP.c
@@ -0,0 +1,52 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+
+void silk_LTP_scale_ctrl_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+)
+{
+    opus_int   round_loss;
+
+    if( condCoding == CODE_INDEPENDENTLY ) {
+        /* Only scale if first frame in packet */
+        round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket;
+        psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( round_loss * psEncCtrl->LTPredCodGain * 0.1f, 0.0f, 2.0f );
+    } else {
+        /* Default is minimum scaling */
+        psEnc->sCmn.indices.LTP_scaleIndex = 0;
+    }
+
+    psEncCtrl->LTP_scale = (silk_float)silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ] / 16384.0f;
+}
diff --git a/src/main/jni/opus/silk/float/SigProc_FLP.h b/src/main/jni/opus/silk/float/SigProc_FLP.h
new file mode 100644
index 000000000..f0cb3733b
--- /dev/null
+++ b/src/main/jni/opus/silk/float/SigProc_FLP.h
@@ -0,0 +1,204 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FLP_H
+#define SILK_SIGPROC_FLP_H
+
+#include "SigProc_FIX.h"
+#include "float_cast.h"
+#include <math.h>
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+/********************************************************************/
+/*                    SIGNAL PROCESSING FUNCTIONS                   */
+/********************************************************************/
+
+/* Chirp (bw expand) LP AR filter */
+void silk_bwexpander_FLP(
+    silk_float          *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int      d,                  /* I    length of ar                                                */
+    const silk_float    chirp               /* I    chirp factor (typically in range (0..1) )                   */
+);
+
+/* compute inverse of LPC prediction gain, and                          */
+/* test if LPC coefficients are stable (all poles within unit circle)   */
+/* this code is based on silk_FLP_a2k()                                 */
+silk_float silk_LPC_inverse_pred_gain_FLP(  /* O    return inverse prediction gain, energy domain               */
+    const silk_float    *A,                 /* I    prediction coefficients [order]                             */
+    opus_int32          order               /* I    prediction order                                            */
+);
+
+silk_float silk_schur_FLP(                  /* O    returns residual energy                                     */
+    silk_float          refl_coef[],        /* O    reflection coefficients (length order)                      */
+    const silk_float    auto_corr[],        /* I    autocorrelation sequence (length order+1)                   */
+    opus_int            order               /* I    order                                                       */
+);
+
+void silk_k2a_FLP(
+    silk_float          *A,                 /* O     prediction coefficients [order]                            */
+    const silk_float    *rc,                /* I     reflection coefficients [order]                            */
+    opus_int32          order               /* I     prediction order                                           */
+);
+
+/* Solve the normal equations using the Levinson-Durbin recursion */
+silk_float silk_levinsondurbin_FLP(         /* O    prediction error energy                                     */
+    silk_float          A[],                /* O    prediction coefficients [order]                             */
+    const silk_float    corr[],             /* I    input auto-correlations [order + 1]                         */
+    const opus_int      order               /* I    prediction order                                            */
+);
+
+/* compute autocorrelation */
+void silk_autocorrelation_FLP(
+    silk_float          *results,           /* O    result (length correlationCount)                            */
+    const silk_float    *inputData,         /* I    input data to correlate                                     */
+    opus_int            inputDataSize,      /* I    length of input                                             */
+    opus_int            correlationCount    /* I    number of correlation taps to compute                       */
+);
+
+opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const silk_float    *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int            *pitch_out,         /* O    Pitch lag values [nb_subfr]                                 */
+    opus_int16          *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8           *contourIndex,      /* O    Pitch contour Index                                         */
+    silk_float          *LTPCorr,           /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int            prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const silk_float    search_thres1,      /* I    First stage threshold for lag candidates 0 - 1              */
+    const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
+    const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
+);
+
+void silk_insertion_sort_decreasing_FLP(
+    silk_float          *a,                 /* I/O  Unsorted / Sorted vector                                    */
+    opus_int            *idx,               /* O    Index vector for the sorted elements                        */
+    const opus_int      L,                  /* I    Vector length                                               */
+    const opus_int      K                   /* I    Number of correctly sorted positions                        */
+);
+
+/* Compute reflection coefficients from input signal */
+silk_float silk_burg_modified_FLP(          /* O    returns residual energy                                     */
+    silk_float          A[],                /* O    prediction coefficients (length order)                      */
+    const silk_float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
+    const silk_float    minInvGain,         /* I    minimum inverse prediction gain                             */
+    const opus_int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
+    const opus_int      nb_subfr,           /* I    number of subframes stacked in x                            */
+    const opus_int      D                   /* I    order                                                       */
+);
+
+/* multiply a vector by a constant */
+void silk_scale_vector_FLP(
+    silk_float          *data1,
+    silk_float          gain,
+    opus_int            dataSize
+);
+
+/* copy and multiply a vector by a constant */
+void silk_scale_copy_vector_FLP(
+    silk_float          *data_out,
+    const silk_float    *data_in,
+    silk_float          gain,
+    opus_int            dataSize
+);
+
+/* inner product of two silk_float arrays, with result as double */
+double silk_inner_product_FLP(
+    const silk_float    *data1,
+    const silk_float    *data2,
+    opus_int            dataSize
+);
+
+/* sum of squares of a silk_float array, with result as double */
+double silk_energy_FLP(
+    const silk_float    *data,
+    opus_int            dataSize
+);
+
+/********************************************************************/
+/*                                MACROS                            */
+/********************************************************************/
+
+#define PI              (3.1415926536f)
+
+#define silk_min_float( a, b )                  (((a) < (b)) ? (a) :  (b))
+#define silk_max_float( a, b )                  (((a) > (b)) ? (a) :  (b))
+#define silk_abs_float( a )                     ((silk_float)fabs(a))
+
+/* sigmoid function */
+static OPUS_INLINE silk_float silk_sigmoid( silk_float x )
+{
+    return (silk_float)(1.0 / (1.0 + exp(-x)));
+}
+
+/* floating-point to integer conversion (rounding) */
+static OPUS_INLINE opus_int32 silk_float2int( silk_float x )
+{
+    return (opus_int32)float2int( x );
+}
+
+/* floating-point to integer conversion (rounding) */
+static OPUS_INLINE void silk_float2short_array(
+    opus_int16       *out,
+    const silk_float *in,
+    opus_int32       length
+)
+{
+    opus_int32 k;
+    for( k = length - 1; k >= 0; k-- ) {
+        out[k] = silk_SAT16( (opus_int32)float2int( in[k] ) );
+    }
+}
+
+/* integer to floating-point conversion */
+static OPUS_INLINE void silk_short2float_array(
+    silk_float       *out,
+    const opus_int16 *in,
+    opus_int32       length
+)
+{
+    opus_int32 k;
+    for( k = length - 1; k >= 0; k-- ) {
+        out[k] = (silk_float)in[k];
+    }
+}
+
+/* using log2() helps the fixed-point conversion */
+static OPUS_INLINE silk_float silk_log2( double x )
+{
+    return ( silk_float )( 3.32192809488736 * log10( x ) );
+}
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_SIGPROC_FLP_H */
diff --git a/src/main/jni/opus/silk/float/apply_sine_window_FLP.c b/src/main/jni/opus/silk/float/apply_sine_window_FLP.c
new file mode 100644
index 000000000..6aae57c0a
--- /dev/null
+++ b/src/main/jni/opus/silk/float/apply_sine_window_FLP.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Apply sine window to signal vector   */
+/* Window types:                        */
+/*  1 -> sine window from 0 to pi/2     */
+/*  2 -> sine window from pi/2 to pi    */
+void silk_apply_sine_window_FLP(
+    silk_float                      px_win[],                           /* O    Pointer to windowed signal                  */
+    const silk_float                px[],                               /* I    Pointer to input signal                     */
+    const opus_int                  win_type,                           /* I    Selects a window type                       */
+    const opus_int                  length                              /* I    Window length, multiple of 4                */
+)
+{
+    opus_int   k;
+    silk_float freq, c, S0, S1;
+
+    silk_assert( win_type == 1 || win_type == 2 );
+
+    /* Length must be multiple of 4 */
+    silk_assert( ( length & 3 ) == 0 );
+
+    freq = PI / ( length + 1 );
+
+    /* Approximation of 2 * cos(f) */
+    c = 2.0f - freq * freq;
+
+    /* Initialize state */
+    if( win_type < 2 ) {
+        /* Start from 0 */
+        S0 = 0.0f;
+        /* Approximation of sin(f) */
+        S1 = freq;
+    } else {
+        /* Start from 1 */
+        S0 = 1.0f;
+        /* Approximation of cos(f) */
+        S1 = 0.5f * c;
+    }
+
+    /* Uses the recursive equation:   sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f)   */
+    /* 4 samples at a time */
+    for( k = 0; k < length; k += 4 ) {
+        px_win[ k + 0 ] = px[ k + 0 ] * 0.5f * ( S0 + S1 );
+        px_win[ k + 1 ] = px[ k + 1 ] * S1;
+        S0 = c * S1 - S0;
+        px_win[ k + 2 ] = px[ k + 2 ] * 0.5f * ( S1 + S0 );
+        px_win[ k + 3 ] = px[ k + 3 ] * S0;
+        S1 = c * S0 - S1;
+    }
+}
diff --git a/src/main/jni/opus/silk/float/autocorrelation_FLP.c b/src/main/jni/opus/silk/float/autocorrelation_FLP.c
new file mode 100644
index 000000000..8b8a9e659
--- /dev/null
+++ b/src/main/jni/opus/silk/float/autocorrelation_FLP.c
@@ -0,0 +1,52 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "typedef.h"
+#include "SigProc_FLP.h"
+
+/* compute autocorrelation */
+void silk_autocorrelation_FLP(
+    silk_float          *results,           /* O    result (length correlationCount)                            */
+    const silk_float    *inputData,         /* I    input data to correlate                                     */
+    opus_int            inputDataSize,      /* I    length of input                                             */
+    opus_int            correlationCount    /* I    number of correlation taps to compute                       */
+)
+{
+    opus_int i;
+
+    if( correlationCount > inputDataSize ) {
+        correlationCount = inputDataSize;
+    }
+
+    for( i = 0; i < correlationCount; i++ ) {
+        results[ i ] =  (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i );
+    }
+}
diff --git a/src/main/jni/opus/silk/float/burg_modified_FLP.c b/src/main/jni/opus/silk/float/burg_modified_FLP.c
new file mode 100644
index 000000000..ea5dc25a9
--- /dev/null
+++ b/src/main/jni/opus/silk/float/burg_modified_FLP.c
@@ -0,0 +1,186 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+#include "tuning_parameters.h"
+#include "define.h"
+
+#define MAX_FRAME_SIZE              384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
+
+/* Compute reflection coefficients from input signal */
+silk_float silk_burg_modified_FLP(          /* O    returns residual energy                                     */
+    silk_float          A[],                /* O    prediction coefficients (length order)                      */
+    const silk_float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
+    const silk_float    minInvGain,         /* I    minimum inverse prediction gain                             */
+    const opus_int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
+    const opus_int      nb_subfr,           /* I    number of subframes stacked in x                            */
+    const opus_int      D                   /* I    order                                                       */
+)
+{
+    opus_int         k, n, s, reached_max_gain;
+    double           C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
+    const silk_float *x_ptr;
+    double           C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
+    double           CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
+    double           Af[ SILK_MAX_ORDER_LPC ];
+
+    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+    /* Compute autocorrelations, added over subframes */
+    C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
+    silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
+    for( s = 0; s < nb_subfr; s++ ) {
+        x_ptr = x + s * subfr_length;
+        for( n = 1; n < D + 1; n++ ) {
+            C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
+        }
+    }
+    silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
+
+    /* Initialize */
+    CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
+    invGain = 1.0f;
+    reached_max_gain = 0;
+    for( n = 0; n < D; n++ ) {
+        /* Update first row of correlation matrix (without first element) */
+        /* Update last row of correlation matrix (without last element, stored in reversed order) */
+        /* Update C * Af */
+        /* Update C * flipud(Af) (stored in reversed order) */
+        for( s = 0; s < nb_subfr; s++ ) {
+            x_ptr = x + s * subfr_length;
+            tmp1 = x_ptr[ n ];
+            tmp2 = x_ptr[ subfr_length - n - 1 ];
+            for( k = 0; k < n; k++ ) {
+                C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
+                C_last_row[ k ]  -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
+                Atmp = Af[ k ];
+                tmp1 += x_ptr[ n - k - 1 ] * Atmp;
+                tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
+            }
+            for( k = 0; k <= n; k++ ) {
+                CAf[ k ] -= tmp1 * x_ptr[ n - k ];
+                CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
+            }
+        }
+        tmp1 = C_first_row[ n ];
+        tmp2 = C_last_row[ n ];
+        for( k = 0; k < n; k++ ) {
+            Atmp = Af[ k ];
+            tmp1 += C_last_row[  n - k - 1 ] * Atmp;
+            tmp2 += C_first_row[ n - k - 1 ] * Atmp;
+        }
+        CAf[ n + 1 ] = tmp1;
+        CAb[ n + 1 ] = tmp2;
+
+        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+        num = CAb[ n + 1 ];
+        nrg_b = CAb[ 0 ];
+        nrg_f = CAf[ 0 ];
+        for( k = 0; k < n; k++ ) {
+            Atmp = Af[ k ];
+            num   += CAb[ n - k ] * Atmp;
+            nrg_b += CAb[ k + 1 ] * Atmp;
+            nrg_f += CAf[ k + 1 ] * Atmp;
+        }
+        silk_assert( nrg_f > 0.0 );
+        silk_assert( nrg_b > 0.0 );
+
+        /* Calculate the next order reflection (parcor) coefficient */
+        rc = -2.0 * num / ( nrg_f + nrg_b );
+        silk_assert( rc > -1.0 && rc < 1.0 );
+
+        /* Update inverse prediction gain */
+        tmp1 = invGain * ( 1.0 - rc * rc );
+        if( tmp1 <= minInvGain ) {
+            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+            rc = sqrt( 1.0 - minInvGain / invGain );
+            if( num > 0 ) {
+                /* Ensure adjusted reflection coefficients has the original sign */
+                rc = -rc;
+            }
+            invGain = minInvGain;
+            reached_max_gain = 1;
+        } else {
+            invGain = tmp1;
+        }
+
+        /* Update the AR coefficients */
+        for( k = 0; k < (n + 1) >> 1; k++ ) {
+            tmp1 = Af[ k ];
+            tmp2 = Af[ n - k - 1 ];
+            Af[ k ]         = tmp1 + rc * tmp2;
+            Af[ n - k - 1 ] = tmp2 + rc * tmp1;
+        }
+        Af[ n ] = rc;
+
+        if( reached_max_gain ) {
+            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+            for( k = n + 1; k < D; k++ ) {
+                Af[ k ] = 0.0;
+            }
+            break;
+        }
+
+        /* Update C * Af and C * Ab */
+        for( k = 0; k <= n + 1; k++ ) {
+            tmp1 = CAf[ k ];
+            CAf[ k ]          += rc * CAb[ n - k + 1 ];
+            CAb[ n - k + 1  ] += rc * tmp1;
+        }
+    }
+
+    if( reached_max_gain ) {
+        /* Convert to silk_float */
+        for( k = 0; k < D; k++ ) {
+            A[ k ] = (silk_float)( -Af[ k ] );
+        }
+        /* Subtract energy of preceding samples from C0 */
+        for( s = 0; s < nb_subfr; s++ ) {
+            C0 -= silk_energy_FLP( x + s * subfr_length, D );
+        }
+        /* Approximate residual energy */
+        nrg_f = C0 * invGain;
+    } else {
+        /* Compute residual energy and store coefficients as silk_float */
+        nrg_f = CAf[ 0 ];
+        tmp1 = 1.0;
+        for( k = 0; k < D; k++ ) {
+            Atmp = Af[ k ];
+            nrg_f += CAf[ k + 1 ] * Atmp;
+            tmp1  += Atmp * Atmp;
+            A[ k ] = (silk_float)(-Atmp);
+        }
+        nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
+    }
+
+    /* Return residual energy */
+    return (silk_float)nrg_f;
+}
diff --git a/src/main/jni/opus/silk/float/bwexpander_FLP.c b/src/main/jni/opus/silk/float/bwexpander_FLP.c
new file mode 100644
index 000000000..d55a4d79a
--- /dev/null
+++ b/src/main/jni/opus/silk/float/bwexpander_FLP.c
@@ -0,0 +1,49 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* Chirp (bw expand) LP AR filter */
+void silk_bwexpander_FLP(
+    silk_float          *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int      d,                  /* I    length of ar                                                */
+    const silk_float    chirp               /* I    chirp factor (typically in range (0..1) )                   */
+)
+{
+    opus_int   i;
+    silk_float cfac = chirp;
+
+    for( i = 0; i < d - 1; i++ ) {
+        ar[ i ] *=  cfac;
+        cfac    *=  chirp;
+    }
+    ar[ d - 1 ] *=  cfac;
+}
diff --git a/src/main/jni/opus/silk/float/corrMatrix_FLP.c b/src/main/jni/opus/silk/float/corrMatrix_FLP.c
new file mode 100644
index 000000000..eae6a1cfc
--- /dev/null
+++ b/src/main/jni/opus/silk/float/corrMatrix_FLP.c
@@ -0,0 +1,93 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/**********************************************************************
+ * Correlation matrix computations for LS estimate.
+ **********************************************************************/
+
+#include "main_FLP.h"
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FLP(
+    const silk_float                *x,                                 /* I    x vector [L+order-1] used to create X       */
+    const silk_float                *t,                                 /* I    Target vector [L]                           */
+    const opus_int                  L,                                  /* I    Length of vecors                            */
+    const opus_int                  Order,                              /* I    Max lag for correlation                     */
+    silk_float                      *Xt                                 /* O    X'*t correlation vector [order]             */
+)
+{
+    opus_int lag;
+    const silk_float *ptr1;
+
+    ptr1 = &x[ Order - 1 ];                     /* Points to first sample of column 0 of X: X[:,0] */
+    for( lag = 0; lag < Order; lag++ ) {
+        /* Calculate X[:,lag]'*t */
+        Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L );
+        ptr1--;                                 /* Next column of X */
+    }
+}
+
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FLP(
+    const silk_float                *x,                                 /* I    x vector [ L+order-1 ] used to create X     */
+    const opus_int                  L,                                  /* I    Length of vectors                           */
+    const opus_int                  Order,                              /* I    Max lag for correlation                     */
+    silk_float                      *XX                                 /* O    X'*X correlation matrix [order x order]     */
+)
+{
+    opus_int j, lag;
+    double  energy;
+    const silk_float *ptr1, *ptr2;
+
+    ptr1 = &x[ Order - 1 ];                     /* First sample of column 0 of X */
+    energy = silk_energy_FLP( ptr1, L );  /* X[:,0]'*X[:,0] */
+    matrix_ptr( XX, 0, 0, Order ) = ( silk_float )energy;
+    for( j = 1; j < Order; j++ ) {
+        /* Calculate X[:,j]'*X[:,j] */
+        energy += ptr1[ -j ] * ptr1[ -j ] - ptr1[ L - j ] * ptr1[ L - j ];
+        matrix_ptr( XX, j, j, Order ) = ( silk_float )energy;
+    }
+
+    ptr2 = &x[ Order - 2 ];                     /* First sample of column 1 of X */
+    for( lag = 1; lag < Order; lag++ ) {
+        /* Calculate X[:,0]'*X[:,lag] */
+        energy = silk_inner_product_FLP( ptr1, ptr2, L );
+        matrix_ptr( XX, lag, 0, Order ) = ( silk_float )energy;
+        matrix_ptr( XX, 0, lag, Order ) = ( silk_float )energy;
+        /* Calculate X[:,j]'*X[:,j + lag] */
+        for( j = 1; j < ( Order - lag ); j++ ) {
+            energy += ptr1[ -j ] * ptr2[ -j ] - ptr1[ L - j ] * ptr2[ L - j ];
+            matrix_ptr( XX, lag + j, j, Order ) = ( silk_float )energy;
+            matrix_ptr( XX, j, lag + j, Order ) = ( silk_float )energy;
+        }
+        ptr2--;                                 /* Next column of X */
+    }
+}
diff --git a/src/main/jni/opus/silk/float/encode_frame_FLP.c b/src/main/jni/opus/silk/float/encode_frame_FLP.c
new file mode 100644
index 000000000..d54e2686e
--- /dev/null
+++ b/src/main/jni/opus/silk/float/encode_frame_FLP.c
@@ -0,0 +1,372 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
+static OPUS_INLINE void silk_LBRR_encode_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                xfw[],                              /* I    Input signal                                */
+    opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
+);
+
+void silk_encode_do_VAD_FLP(
+    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+)
+{
+    /****************************/
+    /* Voice Activity Detection */
+    /****************************/
+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+
+    /**************************************************/
+    /* Convert speech activity into VAD and DTX flags */
+    /**************************************************/
+    if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+        psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
+        psEnc->sCmn.noSpeechCounter++;
+        if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
+            psEnc->sCmn.inDTX = 0;
+        } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
+            psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
+            psEnc->sCmn.inDTX           = 0;
+        }
+        psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
+    } else {
+        psEnc->sCmn.noSpeechCounter    = 0;
+        psEnc->sCmn.inDTX              = 0;
+        psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+        psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+    }
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    opus_int32                      *pnBytesOut,                        /* O    Number of payload bytes;                    */
+    ec_enc                          *psRangeEnc,                        /* I/O  compressor data structure                   */
+    opus_int                        condCoding,                         /* I    The type of conditional coding to use       */
+    opus_int                        maxBits,                            /* I    If > 0: maximum number of output bits       */
+    opus_int                        useCBR                              /* I    Flag to force constant-bitrate operation    */
+)
+{
+    silk_encoder_control_FLP sEncCtrl;
+    opus_int     i, iter, maxIter, found_upper, found_lower, ret = 0;
+    silk_float   *x_frame, *res_pitch_frame;
+    silk_float   xfw[ MAX_FRAME_LENGTH ];
+    silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
+    ec_enc       sRangeEnc_copy, sRangeEnc_copy2;
+    silk_nsq_state sNSQ_copy, sNSQ_copy2;
+    opus_int32   seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
+    opus_int32   gainsID, gainsID_lower, gainsID_upper;
+    opus_int16   gainMult_Q8;
+    opus_int16   ec_prevLagIndex_copy;
+    opus_int     ec_prevSignalType_copy;
+    opus_int8    LastGainIndex_copy2;
+    opus_int32   pGains_Q16[ MAX_NB_SUBFR ];
+    opus_uint8   ec_buf_copy[ 1275 ];
+
+    /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
+    LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
+
+    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+    /**************************************************************/
+    /* Set up Input Pointers, and insert frame in input buffer    */
+    /**************************************************************/
+    /* pointers aligned with start of frame to encode */
+    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
+    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
+
+    /***************************************/
+    /* Ensure smooth bandwidth transitions */
+    /***************************************/
+    silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+    /*******************************************/
+    /* Copy new frame to front of input buffer */
+    /*******************************************/
+    silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+    /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */
+    for( i = 0; i < 8; i++ ) {
+        x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
+    }
+
+    if( !psEnc->sCmn.prefillFlag ) {
+        /*****************************************/
+        /* Find pitch lags, initial LPC analysis */
+        /*****************************************/
+        silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
+
+        /************************/
+        /* Noise shape analysis */
+        /************************/
+        silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
+
+        /***************************************************/
+        /* Find linear prediction coefficients (LPC + LTP) */
+        /***************************************************/
+        silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
+
+        /****************************************/
+        /* Process gains                        */
+        /****************************************/
+        silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding );
+
+        /*****************************************/
+        /* Prefiltering for noise shaper         */
+        /*****************************************/
+        silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame );
+
+        /****************************************/
+        /* Low Bitrate Redundant Encoding       */
+        /****************************************/
+        silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding );
+
+        /* Loop over quantizer and entroy coding to control bitrate */
+        maxIter = 6;
+        gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
+        found_lower = 0;
+        found_upper = 0;
+        gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+        gainsID_lower = -1;
+        gainsID_upper = -1;
+        /* Copy part of the input state */
+        silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
+        silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+        seed_copy = psEnc->sCmn.indices.Seed;
+        ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
+        ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
+        for( iter = 0; ; iter++ ) {
+            if( gainsID == gainsID_lower ) {
+                nBits = nBits_lower;
+            } else if( gainsID == gainsID_upper ) {
+                nBits = nBits_upper;
+            } else {
+                /* Restore part of the input state */
+                if( iter > 0 ) {
+                    silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
+                    silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
+                    psEnc->sCmn.indices.Seed = seed_copy;
+                    psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
+                    psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
+                }
+
+                /*****************************************/
+                /* Noise shaping quantization            */
+                /*****************************************/
+                silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw );
+
+                /****************************************/
+                /* Encode Parameters                    */
+                /****************************************/
+                silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
+
+                /****************************************/
+                /* Encode Excitation Signal             */
+                /****************************************/
+                silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
+                      psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
+
+                nBits = ec_tell( psRangeEnc );
+
+                if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
+                    break;
+                }
+            }
+
+            if( iter == maxIter ) {
+                if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
+                    /* Restore output state from earlier iteration that did meet the bitrate budget */
+                    silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
+                    silk_assert( sRangeEnc_copy2.offs <= 1275 );
+                    silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
+                    silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
+                    psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
+                }
+                break;
+            }
+
+            if( nBits > maxBits ) {
+                if( found_lower == 0 && iter >= 2 ) {
+                    /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
+                    sEncCtrl.Lambda *= 1.5f;
+                    found_upper = 0;
+                    gainsID_upper = -1;
+                } else {
+                    found_upper = 1;
+                    nBits_upper = nBits;
+                    gainMult_upper = gainMult_Q8;
+                    gainsID_upper = gainsID;
+                }
+            } else if( nBits < maxBits - 5 ) {
+                found_lower = 1;
+                nBits_lower = nBits;
+                gainMult_lower = gainMult_Q8;
+                if( gainsID != gainsID_lower ) {
+                    gainsID_lower = gainsID;
+                    /* Copy part of the output state */
+                    silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
+                    silk_assert( psRangeEnc->offs <= 1275 );
+                    silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
+                    silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+                    LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
+                }
+            } else {
+                /* Within 5 bits of budget: close enough */
+                break;
+            }
+
+            if( ( found_lower & found_upper ) == 0 ) {
+                /* Adjust gain according to high-rate rate/distortion curve */
+                opus_int32 gain_factor_Q16;
+                gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
+                gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
+                if( nBits > maxBits ) {
+                    gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
+                }
+                gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+            } else {
+                /* Adjust gain by interpolating */
+                gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower );
+                /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
+                if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
+                    gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
+                } else
+                if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
+                    gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
+                }
+            }
+
+            for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+                pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
+            }
+
+            /* Quantize gains */
+            psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
+            silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
+                  &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+            /* Unique identifier of gains vector */
+            gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+
+            /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+            for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+                sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f;
+            }
+        }
+    }
+
+    /* Update input buffer */
+    silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
+        ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
+
+    /* Exit without entropy coding */
+    if( psEnc->sCmn.prefillFlag ) {
+        /* No payload */
+        *pnBytesOut = 0;
+        return ret;
+    }
+
+    /* Parameters needed for next frame */
+    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
+    /****************************************/
+    /* Finalize payload                     */
+    /****************************************/
+    psEnc->sCmn.first_frame_after_reset = 0;
+    /* Payload size */
+    *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
+
+    return ret;
+}
+
+/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
+static OPUS_INLINE void silk_LBRR_encode_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                xfw[],                              /* I    Input signal                                */
+    opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
+)
+{
+    opus_int     k;
+    opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
+    silk_float   TempGains[ MAX_NB_SUBFR ];
+    SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
+    silk_nsq_state sNSQ_LBRR;
+
+    /*******************************************/
+    /* Control use of inband LBRR              */
+    /*******************************************/
+    if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
+        psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+
+        /* Copy noise shaping quantizer state and quantization indices from regular encoding */
+        silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+        silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
+
+        /* Save original gains */
+        silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
+
+        if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
+            /* First frame in packet or previous frame not LBRR coded */
+            psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
+
+            /* Increase Gains to get target LBRR rate */
+            psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases;
+            psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
+        }
+
+        /* Decode to get gains in sync with decoder */
+        silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices,
+            &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+        /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+        for( k = 0; k <  psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f );
+        }
+
+        /*****************************************/
+        /* Noise shaping quantization            */
+        /*****************************************/
+        silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
+            psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
+
+        /* Restore original gains */
+        silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
+    }
+}
diff --git a/src/main/jni/opus/silk/float/energy_FLP.c b/src/main/jni/opus/silk/float/energy_FLP.c
new file mode 100644
index 000000000..24b8179f9
--- /dev/null
+++ b/src/main/jni/opus/silk/float/energy_FLP.c
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* sum of squares of a silk_float array, with result as double */
+double silk_energy_FLP(
+    const silk_float    *data,
+    opus_int            dataSize
+)
+{
+    opus_int  i, dataSize4;
+    double   result;
+
+    /* 4x unrolled loop */
+    result = 0.0;
+    dataSize4 = dataSize & 0xFFFC;
+    for( i = 0; i < dataSize4; i += 4 ) {
+        result += data[ i + 0 ] * (double)data[ i + 0 ] +
+                  data[ i + 1 ] * (double)data[ i + 1 ] +
+                  data[ i + 2 ] * (double)data[ i + 2 ] +
+                  data[ i + 3 ] * (double)data[ i + 3 ];
+    }
+
+    /* add any remaining products */
+    for( ; i < dataSize; i++ ) {
+        result += data[ i ] * (double)data[ i ];
+    }
+
+    silk_assert( result >= 0.0 );
+    return result;
+}
diff --git a/src/main/jni/opus/silk/float/find_LPC_FLP.c b/src/main/jni/opus/silk/float/find_LPC_FLP.c
new file mode 100644
index 000000000..61c1ad955
--- /dev/null
+++ b/src/main/jni/opus/silk/float/find_LPC_FLP.c
@@ -0,0 +1,104 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "define.h"
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* LPC analysis */
+void silk_find_LPC_FLP(
+    silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */
+    opus_int16                      NLSF_Q15[],                         /* O    NLSFs                                       */
+    const silk_float                x[],                                /* I    Input signal                                */
+    const silk_float                minInvGain                          /* I    Inverse of max prediction gain              */
+)
+{
+    opus_int    k, subfr_length;
+    silk_float  a[ MAX_LPC_ORDER ];
+
+    /* Used only for NLSF interpolation */
+    silk_float  res_nrg, res_nrg_2nd, res_nrg_interp;
+    opus_int16  NLSF0_Q15[ MAX_LPC_ORDER ];
+    silk_float  a_tmp[ MAX_LPC_ORDER ];
+    silk_float  LPC_res[ MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ];
+
+    subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;
+
+    /* Default: No interpolation */
+    psEncC->indices.NLSFInterpCoef_Q2 = 4;
+
+    /* Burg AR analysis for the full frame */
+    res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );
+
+    if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
+        /* Optimal solution for last 10 ms; subtract residual energy here, as that's easier than        */
+        /* adding it to the residual energy of the first 10 ms in each iteration of the search below    */
+        res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder );
+
+        /* Convert to NLSFs */
+        silk_A2NLSF_FLP( NLSF_Q15, a_tmp, psEncC->predictLPCOrder );
+
+        /* Search over interpolation indices to find the one with lowest residual energy */
+        res_nrg_2nd = silk_float_MAX;
+        for( k = 3; k >= 0; k-- ) {
+            /* Interpolate NLSFs for first half */
+            silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder );
+
+            /* Convert to LPC for residual energy evaluation */
+            silk_NLSF2A_FLP( a_tmp, NLSF0_Q15, psEncC->predictLPCOrder );
+
+            /* Calculate residual energy with LSF interpolation */
+            silk_LPC_analysis_filter_FLP( LPC_res, a_tmp, x, 2 * subfr_length, psEncC->predictLPCOrder );
+            res_nrg_interp = (silk_float)(
+                silk_energy_FLP( LPC_res + psEncC->predictLPCOrder,                subfr_length - psEncC->predictLPCOrder ) +
+                silk_energy_FLP( LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ) );
+
+            /* Determine whether current interpolated NLSFs are best so far */
+            if( res_nrg_interp < res_nrg ) {
+                /* Interpolation has lower residual energy */
+                res_nrg = res_nrg_interp;
+                psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k;
+            } else if( res_nrg_interp > res_nrg_2nd ) {
+                /* No reason to continue iterating - residual energies will continue to climb */
+                break;
+            }
+            res_nrg_2nd = res_nrg_interp;
+        }
+    }
+
+    if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) {
+        /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */
+        silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder );
+    }
+
+    silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || 
+        ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
+}
diff --git a/src/main/jni/opus/silk/float/find_LTP_FLP.c b/src/main/jni/opus/silk/float/find_LTP_FLP.c
new file mode 100644
index 000000000..722999601
--- /dev/null
+++ b/src/main/jni/opus/silk/float/find_LTP_FLP.c
@@ -0,0 +1,132 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+void silk_find_LTP_FLP(
+    silk_float                      b[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                   */
+    silk_float                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization       */
+    silk_float                      *LTPredCodGain,                     /* O    LTP coding gain                             */
+    const silk_float                r_lpc[],                            /* I    LPC residual                                */
+    const opus_int                  lag[  MAX_NB_SUBFR ],               /* I    LTP lags                                    */
+    const silk_float                Wght[ MAX_NB_SUBFR ],               /* I    Weights                                     */
+    const opus_int                  subfr_length,                       /* I    Subframe length                             */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  mem_offset                          /* I    Number of samples in LTP memory             */
+)
+{
+    opus_int   i, k;
+    silk_float *b_ptr, temp, *WLTP_ptr;
+    silk_float LPC_res_nrg, LPC_LTP_res_nrg;
+    silk_float d[ MAX_NB_SUBFR ], m, g, delta_b[ LTP_ORDER ];
+    silk_float w[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], regu;
+    silk_float Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];
+    const silk_float *r_ptr, *lag_ptr;
+
+    b_ptr    = b;
+    WLTP_ptr = WLTP;
+    r_ptr    = &r_lpc[ mem_offset ];
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );
+
+        silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, WLTP_ptr );
+        silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr );
+
+        rr[ k ] = ( silk_float )silk_energy_FLP( r_ptr, subfr_length );
+        regu = 1.0f + rr[ k ] +
+            matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ) +
+            matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER );
+        regu *= LTP_DAMPING / 3;
+        silk_regularize_correlations_FLP( WLTP_ptr, &rr[ k ], regu, LTP_ORDER );
+        silk_solve_LDL_FLP( WLTP_ptr, LTP_ORDER, Rr, b_ptr );
+
+        /* Calculate residual energy */
+        nrg[ k ] = silk_residual_energy_covar_FLP( b_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER );
+
+        temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length );
+        silk_scale_vector_FLP( WLTP_ptr, temp, LTP_ORDER * LTP_ORDER );
+        w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER / 2, LTP_ORDER / 2, LTP_ORDER );
+
+        r_ptr    += subfr_length;
+        b_ptr    += LTP_ORDER;
+        WLTP_ptr += LTP_ORDER * LTP_ORDER;
+    }
+
+    /* Compute LTP coding gain */
+    if( LTPredCodGain != NULL ) {
+        LPC_LTP_res_nrg = 1e-6f;
+        LPC_res_nrg     = 0.0f;
+        for( k = 0; k < nb_subfr; k++ ) {
+            LPC_res_nrg     += rr[  k ] * Wght[ k ];
+            LPC_LTP_res_nrg += nrg[ k ] * Wght[ k ];
+        }
+
+        silk_assert( LPC_LTP_res_nrg > 0 );
+        *LTPredCodGain = 3.0f * silk_log2( LPC_res_nrg / LPC_LTP_res_nrg );
+    }
+
+    /* Smoothing */
+    /* d = sum( B, 1 ); */
+    b_ptr = b;
+    for( k = 0; k < nb_subfr; k++ ) {
+        d[ k ] = 0;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            d[ k ] += b_ptr[ i ];
+        }
+        b_ptr += LTP_ORDER;
+    }
+    /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */
+    temp = 1e-3f;
+    for( k = 0; k < nb_subfr; k++ ) {
+        temp += w[ k ];
+    }
+    m = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        m += d[ k ] * w[ k ];
+    }
+    m = m / temp;
+
+    b_ptr = b;
+    for( k = 0; k < nb_subfr; k++ ) {
+        g = LTP_SMOOTHING / ( LTP_SMOOTHING + w[ k ] ) * ( m - d[ k ] );
+        temp = 0;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            delta_b[ i ] = silk_max_float( b_ptr[ i ], 0.1f );
+            temp += delta_b[ i ];
+        }
+        temp = g / temp;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            b_ptr[ i ] = b_ptr[ i ] + delta_b[ i ] * temp;
+        }
+        b_ptr += LTP_ORDER;
+    }
+}
diff --git a/src/main/jni/opus/silk/float/find_pitch_lags_FLP.c b/src/main/jni/opus/silk/float/find_pitch_lags_FLP.c
new file mode 100644
index 000000000..f3b22d25c
--- /dev/null
+++ b/src/main/jni/opus/silk/float/find_pitch_lags_FLP.c
@@ -0,0 +1,132 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+void silk_find_pitch_lags_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    silk_float                      res[],                              /* O    Residual                                    */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    int                             arch                                /* I    Run-time architecture                       */
+)
+{
+    opus_int   buf_len;
+    silk_float thrhld, res_nrg;
+    const silk_float *x_buf_ptr, *x_buf;
+    silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
+    silk_float A[         MAX_FIND_PITCH_LPC_ORDER ];
+    silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ];
+    silk_float Wsig[      FIND_PITCH_LPC_WIN_MAX ];
+    silk_float *Wsig_ptr;
+
+    /******************************************/
+    /* Set up buffer lengths etc based on Fs  */
+    /******************************************/
+    buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length;
+
+    /* Safety check */
+    silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
+
+    x_buf = x - psEnc->sCmn.ltp_mem_length;
+
+    /******************************************/
+    /* Estimate LPC AR coeficients            */
+    /******************************************/
+
+    /* Calculate windowed signal */
+
+    /* First LA_LTP samples */
+    x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;
+    Wsig_ptr  = Wsig;
+    silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch );
+
+    /* Middle non-windowed samples */
+    Wsig_ptr  += psEnc->sCmn.la_pitch;
+    x_buf_ptr += psEnc->sCmn.la_pitch;
+    silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ) ) * sizeof( silk_float ) );
+
+    /* Last LA_LTP samples */
+    Wsig_ptr  += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 );
+    x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 );
+    silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
+
+    /* Calculate autocorrelation sequence */
+    silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 );
+
+    /* Add white noise, as a fraction of the energy */
+    auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1;
+
+    /* Calculate the reflection coefficients using Schur */
+    res_nrg = silk_schur_FLP( refl_coef, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    /* Prediction gain */
+    psEncCtrl->predGain = auto_corr[ 0 ] / silk_max_float( res_nrg, 1.0f );
+
+    /* Convert reflection coefficients to prediction coefficients */
+    silk_k2a_FLP( A, refl_coef, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    /* Bandwidth expansion */
+    silk_bwexpander_FLP( A, psEnc->sCmn.pitchEstimationLPCOrder, FIND_PITCH_BANDWIDTH_EXPANSION );
+
+    /*****************************************/
+    /* LPC analysis filtering                */
+    /*****************************************/
+    silk_LPC_analysis_filter_FLP( res, A, x_buf, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
+        /* Threshold for pitch estimator */
+        thrhld  = 0.6f;
+        thrhld -= 0.004f * psEnc->sCmn.pitchEstimationLPCOrder;
+        thrhld -= 0.1f   * psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f );
+        thrhld -= 0.15f  * (psEnc->sCmn.prevSignalType >> 1);
+        thrhld -= 0.1f   * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f );
+
+        /*****************************************/
+        /* Call Pitch estimator                  */
+        /*****************************************/
+        if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex,
+            &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f,
+            thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 )
+        {
+            psEnc->sCmn.indices.signalType = TYPE_VOICED;
+        } else {
+            psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+        }
+    } else {
+        silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) );
+        psEnc->sCmn.indices.lagIndex = 0;
+        psEnc->sCmn.indices.contourIndex = 0;
+        psEnc->LTPCorr = 0;
+    }
+}
diff --git a/src/main/jni/opus/silk/float/find_pred_coefs_FLP.c b/src/main/jni/opus/silk/float/find_pred_coefs_FLP.c
new file mode 100644
index 000000000..ea2c6c432
--- /dev/null
+++ b/src/main/jni/opus/silk/float/find_pred_coefs_FLP.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                res_pitch[],                        /* I    Residual from pitch analysis                */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+)
+{
+    opus_int         i;
+    silk_float       WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];
+    silk_float       invGains[ MAX_NB_SUBFR ], Wght[ MAX_NB_SUBFR ];
+    opus_int16       NLSF_Q15[ MAX_LPC_ORDER ];
+    const silk_float *x_ptr;
+    silk_float       *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ];
+    silk_float       minInvGain;
+
+    /* Weighting for weighted least squares */
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        silk_assert( psEncCtrl->Gains[ i ] > 0.0f );
+        invGains[ i ] = 1.0f / psEncCtrl->Gains[ i ];
+        Wght[ i ]     = invGains[ i ] * invGains[ i ];
+    }
+
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /**********/
+        /* VOICED */
+        /**********/
+        silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+
+        /* LTP analysis */
+        silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch,
+            psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length );
+
+        /* Quantize LTP gain parameters */
+        silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr );
+
+        /* Control LTP scaling */
+        silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding );
+
+        /* Create LTP residual */
+        silk_LTP_analysis_filter_FLP( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef,
+            psEncCtrl->pitchL, invGains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+    } else {
+        /************/
+        /* UNVOICED */
+        /************/
+        /* Create signal with prepended subframes, scaled by inverse gains */
+        x_ptr     = x - psEnc->sCmn.predictLPCOrder;
+        x_pre_ptr = LPC_in_pre;
+        for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+            silk_scale_copy_vector_FLP( x_pre_ptr, x_ptr, invGains[ i ],
+                psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
+            x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
+            x_ptr     += psEnc->sCmn.subfr_length;
+        }
+        silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );
+        psEncCtrl->LTPredCodGain = 0.0f;
+		psEnc->sCmn.sum_log_gain_Q7 = 0;
+    }
+
+    /* Limit on total predictive coding gain */
+    if( psEnc->sCmn.first_frame_after_reset ) {
+        minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET;
+    } else {        
+        minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) /  MAX_PREDICTION_POWER_GAIN;
+        minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality;
+    }
+
+    /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */
+    silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain );
+
+    /* Quantize LSFs */
+    silk_process_NLSFs_FLP( &psEnc->sCmn, psEncCtrl->PredCoef, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );
+
+    /* Calculate residual energy using quantized LPC coefficients */
+    silk_residual_energy_FLP( psEncCtrl->ResNrg, LPC_in_pre, psEncCtrl->PredCoef, psEncCtrl->Gains,
+        psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+    /* Copy to prediction struct for use in next frame for interpolation */
+    silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+}
+
diff --git a/src/main/jni/opus/silk/float/inner_product_FLP.c b/src/main/jni/opus/silk/float/inner_product_FLP.c
new file mode 100644
index 000000000..029c01291
--- /dev/null
+++ b/src/main/jni/opus/silk/float/inner_product_FLP.c
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* inner product of two silk_float arrays, with result as double */
+double silk_inner_product_FLP(
+    const silk_float    *data1,
+    const silk_float    *data2,
+    opus_int            dataSize
+)
+{
+    opus_int  i, dataSize4;
+    double   result;
+
+    /* 4x unrolled loop */
+    result = 0.0;
+    dataSize4 = dataSize & 0xFFFC;
+    for( i = 0; i < dataSize4; i += 4 ) {
+        result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
+                  data1[ i + 1 ] * (double)data2[ i + 1 ] +
+                  data1[ i + 2 ] * (double)data2[ i + 2 ] +
+                  data1[ i + 3 ] * (double)data2[ i + 3 ];
+    }
+
+    /* add any remaining products */
+    for( ; i < dataSize; i++ ) {
+        result += data1[ i ] * (double)data2[ i ];
+    }
+
+    return result;
+}
diff --git a/src/main/jni/opus/silk/float/k2a_FLP.c b/src/main/jni/opus/silk/float/k2a_FLP.c
new file mode 100644
index 000000000..12af4e766
--- /dev/null
+++ b/src/main/jni/opus/silk/float/k2a_FLP.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_FLP(
+    silk_float          *A,                 /* O     prediction coefficients [order]                            */
+    const silk_float    *rc,                /* I     reflection coefficients [order]                            */
+    opus_int32          order               /* I     prediction order                                           */
+)
+{
+    opus_int   k, n;
+    silk_float Atmp[ SILK_MAX_ORDER_LPC ];
+
+    for( k = 0; k < order; k++ ) {
+        for( n = 0; n < k; n++ ) {
+            Atmp[ n ] = A[ n ];
+        }
+        for( n = 0; n < k; n++ ) {
+            A[ n ] += Atmp[ k - n - 1 ] * rc[ k ];
+        }
+        A[ k ] = -rc[ k ];
+    }
+}
diff --git a/src/main/jni/opus/silk/float/levinsondurbin_FLP.c b/src/main/jni/opus/silk/float/levinsondurbin_FLP.c
new file mode 100644
index 000000000..f0ba60698
--- /dev/null
+++ b/src/main/jni/opus/silk/float/levinsondurbin_FLP.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* Solve the normal equations using the Levinson-Durbin recursion */
+silk_float silk_levinsondurbin_FLP(         /* O    prediction error energy                                     */
+    silk_float          A[],                /* O    prediction coefficients    [order]                          */
+    const silk_float    corr[],             /* I    input auto-correlations [order + 1]                         */
+    const opus_int      order               /* I    prediction order                                            */
+)
+{
+    opus_int   i, mHalf, m;
+    silk_float min_nrg, nrg, t, km, Atmp1, Atmp2;
+
+    min_nrg = 1e-12f * corr[ 0 ] + 1e-9f;
+    nrg = corr[ 0 ];
+    nrg = silk_max_float(min_nrg, nrg);
+    A[ 0 ] = corr[ 1 ] / nrg;
+    nrg -= A[ 0 ] * corr[ 1 ];
+    nrg = silk_max_float(min_nrg, nrg);
+
+    for( m = 1; m < order; m++ )
+    {
+        t = corr[ m + 1 ];
+        for( i = 0; i < m; i++ ) {
+            t -= A[ i ] * corr[ m - i ];
+        }
+
+        /* reflection coefficient */
+        km = t / nrg;
+
+        /* residual energy */
+        nrg -= km * t;
+        nrg = silk_max_float(min_nrg, nrg);
+
+        mHalf = m >> 1;
+        for( i = 0; i < mHalf; i++ ) {
+            Atmp1 = A[ i ];
+            Atmp2 = A[ m - i - 1 ];
+            A[ m - i - 1 ] -= km * Atmp1;
+            A[ i ]         -= km * Atmp2;
+        }
+        if( m & 1 ) {
+            A[ mHalf ]     -= km * A[ mHalf ];
+        }
+        A[ m ] = km;
+    }
+
+    /* return the residual energy */
+    return nrg;
+}
+
diff --git a/src/main/jni/opus/silk/float/main_FLP.h b/src/main/jni/opus/silk/float/main_FLP.h
new file mode 100644
index 000000000..fb553b61a
--- /dev/null
+++ b/src/main/jni/opus/silk/float/main_FLP.h
@@ -0,0 +1,312 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_FLP_H
+#define SILK_MAIN_FLP_H
+
+#include "SigProc_FLP.h"
+#include "SigProc_FIX.h"
+#include "structs_FLP.h"
+#include "main.h"
+#include "define.h"
+#include "debug.h"
+#include "entenc.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define silk_encoder_state_Fxx      silk_encoder_state_FLP
+#define silk_encode_do_VAD_Fxx      silk_encode_do_VAD_FLP
+#define silk_encode_frame_Fxx       silk_encode_frame_FLP
+
+/*********************/
+/* Encoder Functions */
+/*********************/
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+    silk_encoder_state_Fxx          state_Fxx[]                         /* I/O  Encoder states                              */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FLP(
+    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+);
+
+/* Encoder main function */
+opus_int silk_encode_frame_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    opus_int32                      *pnBytesOut,                        /* O    Number of payload bytes;                    */
+    ec_enc                          *psRangeEnc,                        /* I/O  compressor data structure                   */
+    opus_int                        condCoding,                         /* I    The type of conditional coding to use       */
+    opus_int                        maxBits,                            /* I    If > 0: maximum number of output bits       */
+    opus_int                        useCBR                              /* I    Flag to force constant-bitrate operation    */
+);
+
+/* Initializes the Silk encoder state */
+opus_int silk_init_encoder(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    int                              arch                               /* I    Run-tim architecture                        */
+);
+
+/* Control the Silk encoder */
+opus_int silk_control_encoder(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Pointer to Silk encoder state FLP           */
+    silk_EncControlStruct           *encControl,                        /* I    Control structure                           */
+    const opus_int32                TargetRate_bps,                     /* I    Target max bitrate (bps)                    */
+    const opus_int                  allow_bw_switch,                    /* I    Flag to allow switching audio bandwidth     */
+    const opus_int                  channelNb,                          /* I    Channel number                              */
+    const opus_int                  force_fs_kHz
+);
+
+/****************/
+/* Prefiltering */
+/****************/
+void silk_prefilter_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    const silk_encoder_control_FLP  *psEncCtrl,                         /* I    Encoder control FLP                         */
+    silk_float                      xw[],                               /* O    Weighted signal                             */
+    const silk_float                x[]                                 /* I    Speech signal                               */
+);
+
+/**************************/
+/* Noise shaping analysis */
+/**************************/
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                *pitch_res,                         /* I    LPC residual from pitch analysis            */
+    const silk_float                *x                                  /* I    Input signal [frame_length + la_shape]      */
+);
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FLP(
+    silk_float                      *corr,                              /* O    Result [order + 1]                          */
+    const silk_float                *input,                             /* I    Input data to correlate                     */
+    const silk_float                warping,                            /* I    Warping coefficient                         */
+    const opus_int                  length,                             /* I    Length of input                             */
+    const opus_int                  order                               /* I    Correlation order (even)                    */
+);
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+);
+
+/**********************************************/
+/* Prediction Analysis                        */
+/**********************************************/
+/* Find pitch lags */
+void silk_find_pitch_lags_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    silk_float                      res[],                              /* O    Residual                                    */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    int                             arch                                /* I    Run-time architecture                       */
+);
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                res_pitch[],                        /* I    Residual from pitch analysis                */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+);
+
+/* LPC analysis */
+void silk_find_LPC_FLP(
+    silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */
+    opus_int16                      NLSF_Q15[],                         /* O    NLSFs                                       */
+    const silk_float                x[],                                /* I    Input signal                                */
+    const silk_float                minInvGain                          /* I    Prediction gain from LTP (dB)               */
+);
+
+/* LTP analysis */
+void silk_find_LTP_FLP(
+    silk_float                      b[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                   */
+    silk_float                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization       */
+    silk_float                      *LTPredCodGain,                     /* O    LTP coding gain                             */
+    const silk_float                r_lpc[],                            /* I    LPC residual                                */
+    const opus_int                  lag[  MAX_NB_SUBFR ],               /* I    LTP lags                                    */
+    const silk_float                Wght[ MAX_NB_SUBFR ],               /* I    Weights                                     */
+    const opus_int                  subfr_length,                       /* I    Subframe length                             */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  mem_offset                          /* I    Number of samples in LTP memory             */
+);
+
+void silk_LTP_analysis_filter_FLP(
+    silk_float                      *LTP_res,                           /* O    LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */
+    const silk_float                *x,                                 /* I    Input signal, with preceding samples        */
+    const silk_float                B[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    LTP coefficients for each subframe          */
+    const opus_int                  pitchL[   MAX_NB_SUBFR ],           /* I    Pitch lags                                  */
+    const silk_float                invGains[ MAX_NB_SUBFR ],           /* I    Inverse quantization gains                  */
+    const opus_int                  subfr_length,                       /* I    Length of each subframe                     */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  pre_length                          /* I    Preceding samples for each subframe         */
+);
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order   */
+/* of preceding samples                                                                 */
+void silk_residual_energy_FLP(
+    silk_float                      nrgs[ MAX_NB_SUBFR ],               /* O    Residual energy per subframe                */
+    const silk_float                x[],                                /* I    Input signal                                */
+    silk_float                      a[ 2 ][ MAX_LPC_ORDER ],            /* I    AR coefs for each frame half                */
+    const silk_float                gains[],                            /* I    Quantization gains                          */
+    const opus_int                  subfr_length,                       /* I    Subframe length                             */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+);
+
+/* 16th order LPC analysis filter */
+void silk_LPC_analysis_filter_FLP(
+    silk_float                      r_LPC[],                            /* O    LPC residual signal                         */
+    const silk_float                PredCoef[],                         /* I    LPC coefficients                            */
+    const silk_float                s[],                                /* I    Input signal                                */
+    const opus_int                  length,                             /* I    Length of input signal                      */
+    const opus_int                  Order                               /* I    LPC order                                   */
+);
+
+/* LTP tap quantizer */
+void silk_quant_LTP_gains_FLP(
+    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */
+    opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */
+    opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */
+    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */
+    const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
+    const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
+    const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
+    const opus_int                  nb_subfr                            /* I    number of subframes                         */
+);
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+silk_float silk_residual_energy_covar_FLP(                              /* O    Weighted residual energy                    */
+    const silk_float                *c,                                 /* I    Filter coefficients                         */
+    silk_float                      *wXX,                               /* I/O  Weighted correlation matrix, reg. out       */
+    const silk_float                *wXx,                               /* I    Weighted correlation vector                 */
+    const silk_float                wxx,                                /* I    Weighted correlation value                  */
+    const opus_int                  D                                   /* I    Dimension                                   */
+);
+
+/* Processing of gains */
+void silk_process_gains_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+);
+
+/******************/
+/* Linear Algebra */
+/******************/
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FLP(
+    const silk_float                *x,                                 /* I    x vector [ L+order-1 ] used to create X     */
+    const opus_int                  L,                                  /* I    Length of vectors                           */
+    const opus_int                  Order,                              /* I    Max lag for correlation                     */
+    silk_float                      *XX                                 /* O    X'*X correlation matrix [order x order]     */
+);
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FLP(
+    const silk_float                *x,                                 /* I    x vector [L+order-1] used to create X       */
+    const silk_float                *t,                                 /* I    Target vector [L]                           */
+    const opus_int                  L,                                  /* I    Length of vecors                            */
+    const opus_int                  Order,                              /* I    Max lag for correlation                     */
+    silk_float                      *Xt                                 /* O    X'*t correlation vector [order]             */
+);
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FLP(
+    silk_float                      *XX,                                /* I/O  Correlation matrices                        */
+    silk_float                      *xx,                                /* I/O  Correlation values                          */
+    const silk_float                noise,                              /* I    Noise energy to add                         */
+    const opus_int                  D                                   /* I    Dimension of XX                             */
+);
+
+/* Function to solve linear equation Ax = b, where A is an MxM symmetric matrix */
+void silk_solve_LDL_FLP(
+    silk_float                      *A,                                 /* I/O  Symmetric square matrix, out: reg.          */
+    const opus_int                  M,                                  /* I    Size of matrix                              */
+    const silk_float                *b,                                 /* I    Pointer to b vector                         */
+    silk_float                      *x                                  /* O    Pointer to x solution vector                */
+);
+
+/* Apply sine window to signal vector.  */
+/* Window types:                        */
+/*  1 -> sine window from 0 to pi/2     */
+/*  2 -> sine window from pi/2 to pi    */
+void silk_apply_sine_window_FLP(
+    silk_float                      px_win[],                           /* O    Pointer to windowed signal                  */
+    const silk_float                px[],                               /* I    Pointer to input signal                     */
+    const opus_int                  win_type,                           /* I    Selects a window type                       */
+    const opus_int                  length                              /* I    Window length, multiple of 4                */
+);
+
+/* Wrapper functions. Call flp / fix code */
+
+/* Convert AR filter coefficients to NLSF parameters */
+void silk_A2NLSF_FLP(
+    opus_int16                      *NLSF_Q15,                          /* O    NLSF vector      [ LPC_order ]              */
+    const silk_float                *pAR,                               /* I    LPC coefficients [ LPC_order ]              */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+);
+
+/* Convert NLSF parameters to AR prediction filter coefficients */
+void silk_NLSF2A_FLP(
+    silk_float                      *pAR,                               /* O    LPC coefficients [ LPC_order ]              */
+    const opus_int16                *NLSF_Q15,                          /* I    NLSF vector      [ LPC_order ]              */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+);
+
+/* Limit, stabilize, and quantize NLSFs */
+void silk_process_NLSFs_FLP(
+    silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */
+    silk_float                      PredCoef[ 2 ][ MAX_LPC_ORDER ],     /* O    Prediction coefficients                     */
+    opus_int16                      NLSF_Q15[      MAX_LPC_ORDER ],     /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
+    const opus_int16                prev_NLSF_Q15[ MAX_LPC_ORDER ]      /* I    Previous Normalized LSFs (0 - (2^15-1))     */
+);
+
+/* Floating-point Silk NSQ wrapper      */
+void silk_NSQ_wrapper_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    SideInfoIndices                 *psIndices,                         /* I/O  Quantization indices                        */
+    silk_nsq_state                  *psNSQ,                             /* I/O  Noise Shaping Quantzation state             */
+    opus_int8                       pulses[],                           /* O    Quantized pulse signal                      */
+    const silk_float                x[]                                 /* I    Prefiltered input signal                    */
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/float/noise_shape_analysis_FLP.c b/src/main/jni/opus/silk/float/noise_shape_analysis_FLP.c
new file mode 100644
index 000000000..65f6ea587
--- /dev/null
+++ b/src/main/jni/opus/silk/float/noise_shape_analysis_FLP.c
@@ -0,0 +1,365 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */
+/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
+/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
+/* coefficient in an array of coefficients, for monic filters.                                    */
+static OPUS_INLINE silk_float warped_gain(
+    const silk_float     *coefs,
+    silk_float           lambda,
+    opus_int             order
+) {
+    opus_int   i;
+    silk_float gain;
+
+    lambda = -lambda;
+    gain = coefs[ order - 1 ];
+    for( i = order - 2; i >= 0; i-- ) {
+        gain = lambda * gain + coefs[ i ];
+    }
+    return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) );
+}
+
+/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum     */
+/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
+static OPUS_INLINE void warped_true2monic_coefs(
+    silk_float           *coefs_syn,
+    silk_float           *coefs_ana,
+    silk_float           lambda,
+    silk_float           limit,
+    opus_int             order
+) {
+    opus_int   i, iter, ind = 0;
+    silk_float tmp, maxabs, chirp, gain_syn, gain_ana;
+
+    /* Convert to monic coefficients */
+    for( i = order - 1; i > 0; i-- ) {
+        coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ];
+        coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ];
+    }
+    gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] );
+    gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] );
+    for( i = 0; i < order; i++ ) {
+        coefs_syn[ i ] *= gain_syn;
+        coefs_ana[ i ] *= gain_ana;
+    }
+
+    /* Limit */
+    for( iter = 0; iter < 10; iter++ ) {
+        /* Find maximum absolute value */
+        maxabs = -1.0f;
+        for( i = 0; i < order; i++ ) {
+            tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) );
+            if( tmp > maxabs ) {
+                maxabs = tmp;
+                ind = i;
+            }
+        }
+        if( maxabs <= limit ) {
+            /* Coefficients are within range - done */
+            return;
+        }
+
+        /* Convert back to true warped coefficients */
+        for( i = 1; i < order; i++ ) {
+            coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ];
+            coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ];
+        }
+        gain_syn = 1.0f / gain_syn;
+        gain_ana = 1.0f / gain_ana;
+        for( i = 0; i < order; i++ ) {
+            coefs_syn[ i ] *= gain_syn;
+            coefs_ana[ i ] *= gain_ana;
+        }
+
+        /* Apply bandwidth expansion */
+        chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) );
+        silk_bwexpander_FLP( coefs_syn, order, chirp );
+        silk_bwexpander_FLP( coefs_ana, order, chirp );
+
+        /* Convert to monic warped coefficients */
+        for( i = order - 1; i > 0; i-- ) {
+            coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ];
+            coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ];
+        }
+        gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] );
+        gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] );
+        for( i = 0; i < order; i++ ) {
+            coefs_syn[ i ] *= gain_syn;
+            coefs_ana[ i ] *= gain_ana;
+        }
+    }
+    silk_assert( 0 );
+}
+
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                *pitch_res,                         /* I    LPC residual from pitch analysis            */
+    const silk_float                *x                                  /* I    Input signal [frame_length + la_shape]      */
+)
+{
+    silk_shape_state_FLP *psShapeSt = &psEnc->sShape;
+    opus_int     k, nSamples;
+    silk_float   SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt;
+    silk_float   nrg, pre_nrg, log_energy, log_energy_prev, energy_variation;
+    silk_float   delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping;
+    silk_float   x_windowed[ SHAPE_LPC_WIN_MAX ];
+    silk_float   auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ];
+    const silk_float *x_ptr, *pitch_res_ptr;
+
+    /* Point to start of first LPC analysis block */
+    x_ptr = x - psEnc->sCmn.la_shape;
+
+    /****************/
+    /* GAIN CONTROL */
+    /****************/
+    SNR_adj_dB = psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f );
+
+    /* Input quality is the average of the quality in the lowest two VAD bands */
+    psEncCtrl->input_quality = 0.5f * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ] ) * ( 1.0f / 32768.0f );
+
+    /* Coding quality level, between 0.0 and 1.0 */
+    psEncCtrl->coding_quality = silk_sigmoid( 0.25f * ( SNR_adj_dB - 20.0f ) );
+
+    if( psEnc->sCmn.useCBR == 0 ) {
+        /* Reduce coding SNR during low speech activity */
+        b = 1.0f - psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f );
+        SNR_adj_dB -= BG_SNR_DECR_dB * psEncCtrl->coding_quality * ( 0.5f + 0.5f * psEncCtrl->input_quality ) * b * b;
+    }
+
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce gains for periodic signals */
+        SNR_adj_dB += HARM_SNR_INCR_dB * psEnc->LTPCorr;
+    } else {
+        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+        SNR_adj_dB += ( -0.4f * psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) + 6.0f ) * ( 1.0f - psEncCtrl->input_quality );
+    }
+
+    /*************************/
+    /* SPARSENESS PROCESSING */
+    /*************************/
+    /* Set quantizer offset */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Initially set to 0; may be overruled in process_gains(..) */
+        psEnc->sCmn.indices.quantOffsetType = 0;
+        psEncCtrl->sparseness = 0.0f;
+    } else {
+        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+        nSamples = 2 * psEnc->sCmn.fs_kHz;
+        energy_variation = 0.0f;
+        log_energy_prev  = 0.0f;
+        pitch_res_ptr = pitch_res;
+        for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+            nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples );
+            log_energy = silk_log2( nrg );
+            if( k > 0 ) {
+                energy_variation += silk_abs_float( log_energy - log_energy_prev );
+            }
+            log_energy_prev = log_energy;
+            pitch_res_ptr += nSamples;
+        }
+        psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) );
+
+        /* Set quantization offset depending on sparseness measure */
+        if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+
+        /* Increase coding SNR for sparse signals */
+        SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f );
+    }
+
+    /*******************************/
+    /* Control bandwidth expansion */
+    /*******************************/
+    /* More BWE for signals with high prediction gain */
+    strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain;           /* between 0.0 and 1.0 */
+    BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength );
+    delta  = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality );
+    BWExp1 -= delta;
+    BWExp2 += delta;
+    /* BWExp1 will be applied after BWExp2, so make it relative */
+    BWExp1 /= BWExp2;
+
+    if( psEnc->sCmn.warping_Q16 > 0 ) {
+        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+        warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality;
+    } else {
+        warping = 0.0f;
+    }
+
+    /********************************************/
+    /* Compute noise shaping AR coefs and gains */
+    /********************************************/
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Apply window: sine slope followed by flat part followed by cosine slope */
+        opus_int shift, slope_part, flat_part;
+        flat_part = psEnc->sCmn.fs_kHz * 3;
+        slope_part = ( psEnc->sCmn.shapeWinLength - flat_part ) / 2;
+
+        silk_apply_sine_window_FLP( x_windowed, x_ptr, 1, slope_part );
+        shift = slope_part;
+        silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(silk_float) );
+        shift += flat_part;
+        silk_apply_sine_window_FLP( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+        /* Update pointer: next LPC analysis block */
+        x_ptr += psEnc->sCmn.subfr_length;
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Calculate warped auto correlation */
+            silk_warped_autocorrelation_FLP( auto_corr, x_windowed, warping,
+                psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+        } else {
+            /* Calculate regular auto correlation */
+            silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );
+        }
+
+        /* Add white noise, as a fraction of energy */
+        auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION;
+
+        /* Convert correlations to prediction coefficients, and compute residual energy */
+        nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder );
+        psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg );
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Adjust gain for warping */
+            psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder );
+        }
+
+        /* Bandwidth expansion for synthesis filter shaping */
+        silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 );
+
+        /* Compute noise shaping filter coefficients */
+        silk_memcpy(
+            &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ],
+            &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ],
+            psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) );
+
+        /* Bandwidth expansion for analysis filter shaping */
+        silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 );
+
+        /* Ratio of prediction gains, in energy domain */
+        pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder );
+        nrg     = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder );
+        psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg );
+
+        /* Convert to monic warped prediction coefficients and limit absolute values */
+        warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ],
+            warping, 3.999f, psEnc->sCmn.shapingLPCOrder );
+    }
+
+    /*****************/
+    /* Gain tweaking */
+    /*****************/
+    /* Increase gains during low speech activity */
+    gain_mult = (silk_float)pow( 2.0f, -0.16f * SNR_adj_dB );
+    gain_add  = (silk_float)pow( 2.0f,  0.16f * MIN_QGAIN_DB );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->Gains[ k ] *= gain_mult;
+        psEncCtrl->Gains[ k ] += gain_add;
+    }
+
+    gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT;
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->GainsPre[ k ] *= gain_mult;
+    }
+
+    /************************************************/
+    /* Control low-frequency shaping and noise tilt */
+    /************************************************/
+    /* Less low frequency shaping for noisy inputs */
+    strength = LOW_FREQ_SHAPING * ( 1.0f + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] * ( 1.0f / 32768.0f ) - 1.0f ) );
+    strength *= psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f );
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+        /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            b = 0.2f / psEnc->sCmn.fs_kHz + 3.0f / psEncCtrl->pitchL[ k ];
+            psEncCtrl->LF_MA_shp[ k ] = -1.0f + b;
+            psEncCtrl->LF_AR_shp[ k ] =  1.0f - b - b * strength;
+        }
+        Tilt = - HP_NOISE_COEF -
+            (1 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f );
+    } else {
+        b = 1.3f / psEnc->sCmn.fs_kHz;
+        psEncCtrl->LF_MA_shp[ 0 ] = -1.0f + b;
+        psEncCtrl->LF_AR_shp[ 0 ] =  1.0f - b - b * strength * 0.6f;
+        for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->LF_MA_shp[ k ] = psEncCtrl->LF_MA_shp[ 0 ];
+            psEncCtrl->LF_AR_shp[ k ] = psEncCtrl->LF_AR_shp[ 0 ];
+        }
+        Tilt = -HP_NOISE_COEF;
+    }
+
+    /****************************/
+    /* HARMONIC SHAPING CONTROL */
+    /****************************/
+    /* Control boosting of harmonic frequencies */
+    HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr;
+
+    /* More harmonic boost for noisy input signals */
+    HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality );
+
+    if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Harmonic noise shaping */
+        HarmShapeGain = HARMONIC_SHAPING;
+
+        /* More harmonic noise shaping for high bitrates or noisy input */
+        HarmShapeGain += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING *
+            ( 1.0f - ( 1.0f - psEncCtrl->coding_quality ) * psEncCtrl->input_quality );
+
+        /* Less harmonic noise shaping for less periodic signals */
+        HarmShapeGain *= ( silk_float )sqrt( psEnc->LTPCorr );
+    } else {
+        HarmShapeGain = 0.0f;
+    }
+
+    /*************************/
+    /* Smooth over subframes */
+    /*************************/
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psShapeSt->HarmBoost_smth     += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth );
+        psEncCtrl->HarmBoost[ k ]      = psShapeSt->HarmBoost_smth;
+        psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth );
+        psEncCtrl->HarmShapeGain[ k ]  = psShapeSt->HarmShapeGain_smth;
+        psShapeSt->Tilt_smth          += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth );
+        psEncCtrl->Tilt[ k ]           = psShapeSt->Tilt_smth;
+    }
+}
diff --git a/src/main/jni/opus/silk/float/pitch_analysis_core_FLP.c b/src/main/jni/opus/silk/float/pitch_analysis_core_FLP.c
new file mode 100644
index 000000000..e58f041bd
--- /dev/null
+++ b/src/main/jni/opus/silk/float/pitch_analysis_core_FLP.c
@@ -0,0 +1,630 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/*****************************************************************************
+* Pitch analyser function
+******************************************************************************/
+#include "SigProc_FLP.h"
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+#include "pitch.h"
+
+#define SCRATCH_SIZE        22
+
+/************************************************************/
+/* Internally used functions                                */
+/************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+    silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
+);
+
+static void silk_P_Ana_calc_energy_st3(
+    silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity          /* I Complexity setting                                             */
+);
+
+/************************************************************/
+/* CORE PITCH ANALYSIS FUNCTION                             */
+/************************************************************/
+opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const silk_float    *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int            *pitch_out,         /* O    Pitch lag values [nb_subfr]                                 */
+    opus_int16          *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8           *contourIndex,      /* O    Pitch contour Index                                         */
+    silk_float          *LTPCorr,           /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int            prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const silk_float    search_thres1,      /* I    First stage threshold for lag candidates 0 - 1              */
+    const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
+    const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
+)
+{
+    opus_int   i, k, d, j;
+    silk_float frame_8kHz[  PE_MAX_FRAME_LENGTH_MS * 8 ];
+    silk_float frame_4kHz[  PE_MAX_FRAME_LENGTH_MS * 4 ];
+    opus_int16 frame_8_FIX[ PE_MAX_FRAME_LENGTH_MS * 8 ];
+    opus_int16 frame_4_FIX[ PE_MAX_FRAME_LENGTH_MS * 4 ];
+    opus_int32 filt_state[ 6 ];
+    silk_float threshold, contour_bias;
+    silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ];
+    opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ];
+    silk_float CC[ PE_NB_CBKS_STAGE2_EXT ];
+    const silk_float *target_ptr, *basis_ptr;
+    double    cross_corr, normalizer, energy, energy_tmp;
+    opus_int   d_srch[ PE_D_SRCH_LENGTH ];
+    opus_int16 d_comp[ (PE_MAX_LAG >> 1) + 5 ];
+    opus_int   length_d_srch, length_d_comp;
+    silk_float Cmax, CCmax, CCmax_b, CCmax_new_b, CCmax_new;
+    opus_int   CBimax, CBimax_new, lag, start_lag, end_lag, lag_new;
+    opus_int   cbk_size;
+    silk_float lag_log2, prevLag_log2, delta_lag_log2_sqr;
+    silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
+    silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
+    opus_int   lag_counter;
+    opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
+    opus_int   sf_length, sf_length_8kHz, sf_length_4kHz;
+    opus_int   min_lag, min_lag_8kHz, min_lag_4kHz;
+    opus_int   max_lag, max_lag_8kHz, max_lag_4kHz;
+    opus_int   nb_cbk_search;
+    const opus_int8 *Lag_CB_ptr;
+
+    /* Check for valid sampling frequency */
+    silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
+
+    /* Check for valid complexity setting */
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f );
+    silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f );
+
+    /* Set up frame lengths max / min lag for the sampling frequency */
+    frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
+    frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
+    frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
+    sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
+    sf_length_4kHz    = PE_SUBFR_LENGTH_MS * 4;
+    sf_length_8kHz    = PE_SUBFR_LENGTH_MS * 8;
+    min_lag           = PE_MIN_LAG_MS * Fs_kHz;
+    min_lag_4kHz      = PE_MIN_LAG_MS * 4;
+    min_lag_8kHz      = PE_MIN_LAG_MS * 8;
+    max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
+    max_lag_4kHz      = PE_MAX_LAG_MS * 4;
+    max_lag_8kHz      = PE_MAX_LAG_MS * 8 - 1;
+
+    /* Resample from input sampled at Fs_kHz to 8 kHz */
+    if( Fs_kHz == 16 ) {
+        /* Resample to 16 -> 8 khz */
+        opus_int16 frame_16_FIX[ 16 * PE_MAX_FRAME_LENGTH_MS ];
+        silk_float2short_array( frame_16_FIX, frame, frame_length );
+        silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+        silk_resampler_down2( filt_state, frame_8_FIX, frame_16_FIX, frame_length );
+        silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz );
+    } else if( Fs_kHz == 12 ) {
+        /* Resample to 12 -> 8 khz */
+        opus_int16 frame_12_FIX[ 12 * PE_MAX_FRAME_LENGTH_MS ];
+        silk_float2short_array( frame_12_FIX, frame, frame_length );
+        silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
+        silk_resampler_down2_3( filt_state, frame_8_FIX, frame_12_FIX, frame_length );
+        silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz );
+    } else {
+        silk_assert( Fs_kHz == 8 );
+        silk_float2short_array( frame_8_FIX, frame, frame_length_8kHz );
+    }
+
+    /* Decimate again to 4 kHz */
+    silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+    silk_resampler_down2( filt_state, frame_4_FIX, frame_8_FIX, frame_length_8kHz );
+    silk_short2float_array( frame_4kHz, frame_4_FIX, frame_length_4kHz );
+
+    /* Low-pass filter */
+    for( i = frame_length_4kHz - 1; i > 0; i-- ) {
+        frame_4kHz[ i ] += frame_4kHz[ i - 1 ];
+    }
+
+    /******************************************************************************
+    * FIRST STAGE, operating in 4 khz
+    ******************************************************************************/
+    silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
+    target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
+    for( k = 0; k < nb_subfr >> 1; k++ ) {
+        /* Check that we are within range of the array */
+        silk_assert( target_ptr >= frame_4kHz );
+        silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+        basis_ptr = target_ptr - min_lag_4kHz;
+
+        /* Check that we are within range of the array */
+        silk_assert( basis_ptr >= frame_4kHz );
+        silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );
+
+        /* Calculate first vector products before loop */
+        cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
+        normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 
+                     silk_energy_FLP( basis_ptr,  sf_length_8kHz ) + 
+                     sf_length_8kHz * 4000.0f;
+
+        C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );
+
+        /* From now on normalizer is computed recursively */
+        for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
+            basis_ptr--;
+
+            /* Check that we are within range of the array */
+            silk_assert( basis_ptr >= frame_4kHz );
+            silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+            cross_corr = xcorr[ max_lag_4kHz - d ];
+
+            /* Add contribution of new sample and remove contribution from oldest sample */
+            normalizer +=
+                basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] -
+                basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ];
+            C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer );
+        }
+        /* Update target pointer */
+        target_ptr += sf_length_8kHz;
+    }
+
+    /* Apply short-lag bias */
+    for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
+        C[ 0 ][ i ] -= C[ 0 ][ i ] * i / 4096.0f;
+    }
+
+    /* Sort */
+    length_d_srch = 4 + 2 * complexity;
+    silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
+    silk_insertion_sort_decreasing_FLP( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );
+
+    /* Escape if correlation is very low already here */
+    Cmax = C[ 0 ][ min_lag_4kHz ];
+    if( Cmax < 0.2f ) {
+        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+        *LTPCorr      = 0.0f;
+        *lagIndex     = 0;
+        *contourIndex = 0;
+        return 1;
+    }
+
+    threshold = search_thres1 * Cmax;
+    for( i = 0; i < length_d_srch; i++ ) {
+        /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
+        if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) {
+            d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 );
+        } else {
+            length_d_srch = i;
+            break;
+        }
+    }
+    silk_assert( length_d_srch > 0 );
+
+    for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) {
+        d_comp[ i ] = 0;
+    }
+    for( i = 0; i < length_d_srch; i++ ) {
+        d_comp[ d_srch[ i ] ] = 1;
+    }
+
+    /* Convolution */
+    for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
+        d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ];
+    }
+
+    length_d_srch = 0;
+    for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) {
+        if( d_comp[ i + 1 ] > 0 ) {
+            d_srch[ length_d_srch ] = i;
+            length_d_srch++;
+        }
+    }
+
+    /* Convolution */
+    for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
+        d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ];
+    }
+
+    length_d_comp = 0;
+    for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) {
+        if( d_comp[ i ] > 0 ) {
+            d_comp[ length_d_comp ] = (opus_int16)( i - 2 );
+            length_d_comp++;
+        }
+    }
+
+    /**********************************************************************************
+    ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
+    *************************************************************************************/
+    /*********************************************************************************
+    * Find energy of each subframe projected onto its history, for a range of delays
+    *********************************************************************************/
+    silk_memset( C, 0, PE_MAX_NB_SUBFR*((PE_MAX_LAG >> 1) + 5) * sizeof(silk_float));
+
+    if( Fs_kHz == 8 ) {
+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * 8 ];
+    } else {
+        target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
+    }
+    for( k = 0; k < nb_subfr; k++ ) {
+        energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0;
+        for( j = 0; j < length_d_comp; j++ ) {
+            d = d_comp[ j ];
+            basis_ptr = target_ptr - d;
+            cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );
+            if( cross_corr > 0.0f ) {
+                energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );
+                C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );
+            } else {
+                C[ k ][ d ] = 0.0f;
+            }
+        }
+        target_ptr += sf_length_8kHz;
+    }
+
+    /* search over lag range and lags codebook */
+    /* scale factor for lag codebook, as a function of center lag */
+
+    CCmax   = 0.0f; /* This value doesn't matter */
+    CCmax_b = -1000.0f;
+
+    CBimax = 0; /* To avoid returning undefined lag values */
+    lag = -1;   /* To check if lag with strong enough correlation has been found */
+
+    if( prevLag > 0 ) {
+        if( Fs_kHz == 12 ) {
+            prevLag = silk_LSHIFT( prevLag, 1 ) / 3;
+        } else if( Fs_kHz == 16 ) {
+            prevLag = silk_RSHIFT( prevLag, 1 );
+        }
+        prevLag_log2 = silk_log2( (silk_float)prevLag );
+    } else {
+        prevLag_log2 = 0;
+    }
+
+    /* Set up stage 2 codebook based on number of subframes */
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        cbk_size   = PE_NB_CBKS_STAGE2_EXT;
+        Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+        if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
+            /* If input is 8 khz use a larger codebook here because it is last stage */
+            nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
+        } else {
+            nb_cbk_search = PE_NB_CBKS_STAGE2;
+        }
+    } else {
+        cbk_size       = PE_NB_CBKS_STAGE2_10MS;
+        Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+        nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
+    }
+
+    for( k = 0; k < length_d_srch; k++ ) {
+        d = d_srch[ k ];
+        for( j = 0; j < nb_cbk_search; j++ ) {
+            CC[j] = 0.0f;
+            for( i = 0; i < nb_subfr; i++ ) {
+                /* Try all codebooks */
+                CC[ j ] += C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )];
+            }
+        }
+        /* Find best codebook */
+        CCmax_new  = -1000.0f;
+        CBimax_new = 0;
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            if( CC[ i ] > CCmax_new ) {
+                CCmax_new = CC[ i ];
+                CBimax_new = i;
+            }
+        }
+
+        /* Bias towards shorter lags */
+        lag_log2 = silk_log2( (silk_float)d );
+        CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
+
+        /* Bias towards previous lag */
+        if( prevLag > 0 ) {
+            delta_lag_log2_sqr = lag_log2 - prevLag_log2;
+            delta_lag_log2_sqr *= delta_lag_log2_sqr;
+            CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f );
+        }
+
+        if( CCmax_new_b > CCmax_b &&                /* Find maximum biased correlation                  */
+            CCmax_new > nb_subfr * search_thres2    /* Correlation needs to be high enough to be voiced */
+        ) {
+            CCmax_b = CCmax_new_b;
+            CCmax   = CCmax_new;
+            lag     = d;
+            CBimax  = CBimax_new;
+        }
+    }
+
+    if( lag == -1 ) {
+        /* No suitable candidate found */
+        silk_memset( pitch_out, 0, PE_MAX_NB_SUBFR * sizeof(opus_int) );
+        *LTPCorr      = 0.0f;
+        *lagIndex     = 0;
+        *contourIndex = 0;
+        return 1;
+    }
+
+    /* Output normalized correlation */
+    *LTPCorr = (silk_float)( CCmax / nb_subfr );
+    silk_assert( *LTPCorr >= 0.0f );
+
+    if( Fs_kHz > 8 ) {
+        /* Search in original signal */
+
+        /* Compensate for decimation */
+        silk_assert( lag == silk_SAT16( lag ) );
+        if( Fs_kHz == 12 ) {
+            lag = silk_RSHIFT_ROUND( silk_SMULBB( lag, 3 ), 1 );
+        } else { /* Fs_kHz == 16 */
+            lag = silk_LSHIFT( lag, 1 );
+        }
+
+        lag = silk_LIMIT_int( lag, min_lag, max_lag );
+        start_lag = silk_max_int( lag - 2, min_lag );
+        end_lag   = silk_min_int( lag + 2, max_lag );
+        lag_new   = lag;                                    /* to avoid undefined lag */
+        CBimax    = 0;                                      /* to avoid undefined lag */
+
+        CCmax = -1000.0f;
+
+        /* Calculate the correlations and energies needed in stage 3 */
+        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
+        silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity );
+
+        lag_counter = 0;
+        silk_assert( lag == silk_SAT16( lag ) );
+        contour_bias = PE_FLATCONTOUR_BIAS / lag;
+
+        /* Set up cbk parameters according to complexity setting and frame length */
+        if( nb_subfr == PE_MAX_NB_SUBFR ) {
+            nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
+            cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+            Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        } else {
+            nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+            cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+            Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        }
+
+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+        energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0;
+        for( d = start_lag; d <= end_lag; d++ ) {
+            for( j = 0; j < nb_cbk_search; j++ ) {
+                cross_corr = 0.0;
+                energy = energy_tmp;
+                for( k = 0; k < nb_subfr; k++ ) {
+                    cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ];
+                    energy     +=   energies_st3[ k ][ j ][ lag_counter ];
+                }
+                if( cross_corr > 0.0 ) {
+                    CCmax_new = (silk_float)( 2 * cross_corr / energy );
+                    /* Reduce depending on flatness of contour */
+                    CCmax_new *= 1.0f - contour_bias * j;
+                } else {
+                    CCmax_new = 0.0f;
+                }
+
+                if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
+                    CCmax   = CCmax_new;
+                    lag_new = d;
+                    CBimax  = j;
+                }
+            }
+            lag_counter++;
+        }
+
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
+        }
+        *lagIndex = (opus_int16)( lag_new - min_lag );
+        *contourIndex = (opus_int8)CBimax;
+    } else {        /* Fs_kHz == 8 */
+        /* Save Lags */
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
+        }
+        *lagIndex = (opus_int16)( lag - min_lag_8kHz );
+        *contourIndex = (opus_int8)CBimax;
+    }
+    silk_assert( *lagIndex >= 0 );
+    /* return as voiced */
+    return 0;
+}
+
+/***********************************************************************
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+    silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
+)
+{
+    const silk_float *target_ptr;
+    opus_int   i, j, k, lag_counter, lag_low, lag_high;
+    opus_int   nb_cbk_search, delta, idx, cbk_size;
+    silk_float scratch_mem[ SCRATCH_SIZE ];
+    opus_val32 xcorr[ SCRATCH_SIZE ];
+    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+    } else {
+        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+    }
+
+    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_counter = 0;
+
+        /* Calculate the correlations for each subframe */
+        lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch );
+        for( j = lag_low; j <= lag_high; j++ ) {
+            silk_assert( lag_counter < SCRATCH_SIZE );
+            scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
+            lag_counter++;
+        }
+
+        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            /* Fill out the 3 dim array that stores the correlations for */
+            /* each code_book vector for each start lag */
+            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+                silk_assert( idx + j < SCRATCH_SIZE );
+                silk_assert( idx + j < lag_counter );
+                cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
+            }
+        }
+        target_ptr += sf_length;
+    }
+}
+
+/********************************************************************/
+/* Calculate the energies for first two subframes. The energies are */
+/* calculated recursively.                                          */
+/********************************************************************/
+static void silk_P_Ana_calc_energy_st3(
+    silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity          /* I Complexity setting                                             */
+)
+{
+    const silk_float *target_ptr, *basis_ptr;
+    double    energy;
+    opus_int   k, i, j, lag_counter;
+    opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
+    silk_float scratch_mem[ SCRATCH_SIZE ];
+    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+    } else {
+        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+    }
+
+    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_counter = 0;
+
+        /* Calculate the energy for first lag */
+        basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
+        energy = silk_energy_FLP( basis_ptr, sf_length ) + 1e-3;
+        silk_assert( energy >= 0.0 );
+        scratch_mem[lag_counter] = (silk_float)energy;
+        lag_counter++;
+
+        lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
+        for( i = 1; i < lag_diff; i++ ) {
+            /* remove part outside new window */
+            energy -= basis_ptr[sf_length - i] * (double)basis_ptr[sf_length - i];
+            silk_assert( energy >= 0.0 );
+
+            /* add part that comes into window */
+            energy += basis_ptr[ -i ] * (double)basis_ptr[ -i ];
+            silk_assert( energy >= 0.0 );
+            silk_assert( lag_counter < SCRATCH_SIZE );
+            scratch_mem[lag_counter] = (silk_float)energy;
+            lag_counter++;
+        }
+
+        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            /* Fill out the 3 dim array that stores the correlations for    */
+            /* each code_book vector for each start lag                     */
+            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+                silk_assert( idx + j < SCRATCH_SIZE );
+                silk_assert( idx + j < lag_counter );
+                energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
+                silk_assert( energies_st3[ k ][ i ][ j ] >= 0.0f );
+            }
+        }
+        target_ptr += sf_length;
+    }
+}
diff --git a/src/main/jni/opus/silk/float/prefilter_FLP.c b/src/main/jni/opus/silk/float/prefilter_FLP.c
new file mode 100644
index 000000000..8bc32fb41
--- /dev/null
+++ b/src/main/jni/opus/silk/float/prefilter_FLP.c
@@ -0,0 +1,206 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/*
+* Prefilter for finding Quantizer input signal
+*/
+static OPUS_INLINE void silk_prefilt_FLP(
+    silk_prefilter_state_FLP    *P,                 /* I/O state */
+    silk_float                  st_res[],           /* I */
+    silk_float                  xw[],               /* O */
+    silk_float                  *HarmShapeFIR,      /* I */
+    silk_float                  Tilt,               /* I */
+    silk_float                  LF_MA_shp,          /* I */
+    silk_float                  LF_AR_shp,          /* I */
+    opus_int                    lag,                /* I */
+    opus_int                    length              /* I */
+);
+
+static void silk_warped_LPC_analysis_filter_FLP(
+          silk_float                 state[],            /* I/O  State [order + 1]                       */
+          silk_float                 res[],              /* O    Residual signal [length]                */
+    const silk_float                 coef[],             /* I    Coefficients [order]                    */
+    const silk_float                 input[],            /* I    Input signal [length]                   */
+    const silk_float                 lambda,             /* I    Warping factor                          */
+    const opus_int                   length,             /* I    Length of input signal                  */
+    const opus_int                   order               /* I    Filter order (even)                     */
+)
+{
+    opus_int     n, i;
+    silk_float   acc, tmp1, tmp2;
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+
+    for( n = 0; n < length; n++ ) {
+        /* Output of lowpass section */
+        tmp2 = state[ 0 ] + lambda * state[ 1 ];
+        state[ 0 ] = input[ n ];
+        /* Output of allpass section */
+        tmp1 = state[ 1 ] + lambda * ( state[ 2 ] - tmp2 );
+        state[ 1 ] = tmp2;
+        acc = coef[ 0 ] * tmp2;
+        /* Loop over allpass sections */
+        for( i = 2; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = state[ i ] + lambda * ( state[ i + 1 ] - tmp1 );
+            state[ i ] = tmp1;
+            acc += coef[ i - 1 ] * tmp1;
+            /* Output of allpass section */
+            tmp1 = state[ i + 1 ] + lambda * ( state[ i + 2 ] - tmp2 );
+            state[ i + 1 ] = tmp2;
+            acc += coef[ i ] * tmp2;
+        }
+        state[ order ] = tmp1;
+        acc += coef[ order - 1 ] * tmp1;
+        res[ n ] = input[ n ] - acc;
+    }
+}
+
+/*
+* silk_prefilter. Main prefilter function
+*/
+void silk_prefilter_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    const silk_encoder_control_FLP  *psEncCtrl,                         /* I    Encoder control FLP                         */
+    silk_float                      xw[],                               /* O    Weighted signal                             */
+    const silk_float                x[]                                 /* I    Speech signal                               */
+)
+{
+    silk_prefilter_state_FLP *P = &psEnc->sPrefilt;
+    opus_int   j, k, lag;
+    silk_float HarmShapeGain, Tilt, LF_MA_shp, LF_AR_shp;
+    silk_float B[ 2 ];
+    const silk_float *AR1_shp;
+    const silk_float *px;
+    silk_float *pxw;
+    silk_float HarmShapeFIR[ 3 ];
+    silk_float st_res[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
+
+    /* Set up pointers */
+    px  = x;
+    pxw = xw;
+    lag = P->lagPrev;
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Update Variables that change per sub frame */
+        if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+            lag = psEncCtrl->pitchL[ k ];
+        }
+
+        /* Noise shape parameters */
+        HarmShapeGain = psEncCtrl->HarmShapeGain[ k ] * ( 1.0f - psEncCtrl->HarmBoost[ k ] );
+        HarmShapeFIR[ 0 ] = 0.25f               * HarmShapeGain;
+        HarmShapeFIR[ 1 ] = 32767.0f / 65536.0f * HarmShapeGain;
+        HarmShapeFIR[ 2 ] = 0.25f               * HarmShapeGain;
+        Tilt      =  psEncCtrl->Tilt[ k ];
+        LF_MA_shp =  psEncCtrl->LF_MA_shp[ k ];
+        LF_AR_shp =  psEncCtrl->LF_AR_shp[ k ];
+        AR1_shp   = &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Short term FIR filtering */
+        silk_warped_LPC_analysis_filter_FLP( P->sAR_shp, st_res, AR1_shp, px,
+            (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
+
+        /* Reduce (mainly) low frequencies during harmonic emphasis */
+        B[ 0 ] =  psEncCtrl->GainsPre[ k ];
+        B[ 1 ] = -psEncCtrl->GainsPre[ k ] *
+            ( psEncCtrl->HarmBoost[ k ] * HarmShapeGain + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT );
+        pxw[ 0 ] = B[ 0 ] * st_res[ 0 ] + B[ 1 ] * P->sHarmHP;
+        for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) {
+            pxw[ j ] = B[ 0 ] * st_res[ j ] + B[ 1 ] * st_res[ j - 1 ];
+        }
+        P->sHarmHP = st_res[ psEnc->sCmn.subfr_length - 1 ];
+
+        silk_prefilt_FLP( P, pxw, pxw, HarmShapeFIR, Tilt, LF_MA_shp, LF_AR_shp, lag, psEnc->sCmn.subfr_length );
+
+        px  += psEnc->sCmn.subfr_length;
+        pxw += psEnc->sCmn.subfr_length;
+    }
+    P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+}
+
+/*
+* Prefilter for finding Quantizer input signal
+*/
+static OPUS_INLINE void silk_prefilt_FLP(
+    silk_prefilter_state_FLP    *P,                 /* I/O state */
+    silk_float                  st_res[],           /* I */
+    silk_float                  xw[],               /* O */
+    silk_float                  *HarmShapeFIR,      /* I */
+    silk_float                  Tilt,               /* I */
+    silk_float                  LF_MA_shp,          /* I */
+    silk_float                  LF_AR_shp,          /* I */
+    opus_int                    lag,                /* I */
+    opus_int                    length              /* I */
+)
+{
+    opus_int   i;
+    opus_int   idx, LTP_shp_buf_idx;
+    silk_float n_Tilt, n_LF, n_LTP;
+    silk_float sLF_AR_shp, sLF_MA_shp;
+    silk_float *LTP_shp_buf;
+
+    /* To speed up use temp variables instead of using the struct */
+    LTP_shp_buf     = P->sLTP_shp;
+    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+    sLF_AR_shp      = P->sLF_AR_shp;
+    sLF_MA_shp      = P->sLF_MA_shp;
+
+    for( i = 0; i < length; i++ ) {
+        if( lag > 0 ) {
+            silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+            idx = lag + LTP_shp_buf_idx;
+            n_LTP  = LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ] * HarmShapeFIR[ 0 ];
+            n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ] * HarmShapeFIR[ 1 ];
+            n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ] * HarmShapeFIR[ 2 ];
+        } else {
+            n_LTP = 0;
+        }
+
+        n_Tilt = sLF_AR_shp * Tilt;
+        n_LF   = sLF_AR_shp * LF_AR_shp + sLF_MA_shp * LF_MA_shp;
+
+        sLF_AR_shp = st_res[ i ] - n_Tilt;
+        sLF_MA_shp = sLF_AR_shp - n_LF;
+
+        LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+        LTP_shp_buf[ LTP_shp_buf_idx ] = sLF_MA_shp;
+
+        xw[ i ] = sLF_MA_shp - n_LTP;
+    }
+    /* Copy temp variable back to state */
+    P->sLF_AR_shp       = sLF_AR_shp;
+    P->sLF_MA_shp       = sLF_MA_shp;
+    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
diff --git a/src/main/jni/opus/silk/float/process_gains_FLP.c b/src/main/jni/opus/silk/float/process_gains_FLP.c
new file mode 100644
index 000000000..c0da0dae4
--- /dev/null
+++ b/src/main/jni/opus/silk/float/process_gains_FLP.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Processing of gains */
+void silk_process_gains_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+)
+{
+    silk_shape_state_FLP *psShapeSt = &psEnc->sShape;
+    opus_int     k;
+    opus_int32   pGains_Q16[ MAX_NB_SUBFR ];
+    silk_float   s, InvMaxSqrVal, gain, quant_offset;
+
+    /* Gain reduction when LTP coding gain is high */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        s = 1.0f - 0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) );
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->Gains[ k ] *= s;
+        }
+    }
+
+    /* Limit the quantized signal */
+    InvMaxSqrVal = ( silk_float )( pow( 2.0f, 0.33f * ( 21.0f - psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) ) ) / psEnc->sCmn.subfr_length );
+
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Soft limit on ratio residual energy and squared gains */
+        gain = psEncCtrl->Gains[ k ];
+        gain = ( silk_float )sqrt( gain * gain + psEncCtrl->ResNrg[ k ] * InvMaxSqrVal );
+        psEncCtrl->Gains[ k ] = silk_min_float( gain, 32767.0f );
+    }
+
+    /* Prepare gains for noise shaping quantization */
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        pGains_Q16[ k ] = (opus_int32)( psEncCtrl->Gains[ k ] * 65536.0f );
+    }
+
+    /* Save unquantized gains and gain Index */
+    silk_memcpy( psEncCtrl->GainsUnq_Q16, pGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+    psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex;
+
+    /* Quantize gains */
+    silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
+            &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+    /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->Gains[ k ] = pGains_Q16[ k ] / 65536.0f;
+    }
+
+    /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        if( psEncCtrl->LTPredCodGain + psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ) > 1.0f ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+    }
+
+    /* Quantizer boundary adjustment */
+    quant_offset = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ] / 1024.0f;
+    psEncCtrl->Lambda = LAMBDA_OFFSET
+                      + LAMBDA_DELAYED_DECISIONS * psEnc->sCmn.nStatesDelayedDecision
+                      + LAMBDA_SPEECH_ACT        * psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f )
+                      + LAMBDA_INPUT_QUALITY     * psEncCtrl->input_quality
+                      + LAMBDA_CODING_QUALITY    * psEncCtrl->coding_quality
+                      + LAMBDA_QUANT_OFFSET      * quant_offset;
+
+    silk_assert( psEncCtrl->Lambda > 0.0f );
+    silk_assert( psEncCtrl->Lambda < 2.0f );
+}
diff --git a/src/main/jni/opus/silk/float/regularize_correlations_FLP.c b/src/main/jni/opus/silk/float/regularize_correlations_FLP.c
new file mode 100644
index 000000000..df4612604
--- /dev/null
+++ b/src/main/jni/opus/silk/float/regularize_correlations_FLP.c
@@ -0,0 +1,48 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FLP(
+    silk_float                      *XX,                                /* I/O  Correlation matrices                        */
+    silk_float                      *xx,                                /* I/O  Correlation values                          */
+    const silk_float                noise,                              /* I    Noise energy to add                         */
+    const opus_int                  D                                   /* I    Dimension of XX                             */
+)
+{
+    opus_int i;
+
+    for( i = 0; i < D; i++ ) {
+        matrix_ptr( &XX[ 0 ], i, i, D ) += noise;
+    }
+    xx[ 0 ] += noise;
+}
diff --git a/src/main/jni/opus/silk/float/residual_energy_FLP.c b/src/main/jni/opus/silk/float/residual_energy_FLP.c
new file mode 100644
index 000000000..b2e03a86a
--- /dev/null
+++ b/src/main/jni/opus/silk/float/residual_energy_FLP.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+
+#define MAX_ITERATIONS_RESIDUAL_NRG         10
+#define REGULARIZATION_FACTOR               1e-8f
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+silk_float silk_residual_energy_covar_FLP(                              /* O    Weighted residual energy                    */
+    const silk_float                *c,                                 /* I    Filter coefficients                         */
+    silk_float                      *wXX,                               /* I/O  Weighted correlation matrix, reg. out       */
+    const silk_float                *wXx,                               /* I    Weighted correlation vector                 */
+    const silk_float                wxx,                                /* I    Weighted correlation value                  */
+    const opus_int                  D                                   /* I    Dimension                                   */
+)
+{
+    opus_int   i, j, k;
+    silk_float tmp, nrg = 0.0f, regularization;
+
+    /* Safety checks */
+    silk_assert( D >= 0 );
+
+    regularization = REGULARIZATION_FACTOR * ( wXX[ 0 ] + wXX[ D * D - 1 ] );
+    for( k = 0; k < MAX_ITERATIONS_RESIDUAL_NRG; k++ ) {
+        nrg = wxx;
+
+        tmp = 0.0f;
+        for( i = 0; i < D; i++ ) {
+            tmp += wXx[ i ] * c[ i ];
+        }
+        nrg -= 2.0f * tmp;
+
+        /* compute c' * wXX * c, assuming wXX is symmetric */
+        for( i = 0; i < D; i++ ) {
+            tmp = 0.0f;
+            for( j = i + 1; j < D; j++ ) {
+                tmp += matrix_c_ptr( wXX, i, j, D ) * c[ j ];
+            }
+            nrg += c[ i ] * ( 2.0f * tmp + matrix_c_ptr( wXX, i, i, D ) * c[ i ] );
+        }
+        if( nrg > 0 ) {
+            break;
+        } else {
+            /* Add white noise */
+            for( i = 0; i < D; i++ ) {
+                matrix_c_ptr( wXX, i, i, D ) +=  regularization;
+            }
+            /* Increase noise for next run */
+            regularization *= 2.0f;
+        }
+    }
+    if( k == MAX_ITERATIONS_RESIDUAL_NRG ) {
+        silk_assert( nrg == 0 );
+        nrg = 1.0f;
+    }
+
+    return nrg;
+}
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order   */
+/* of preceding samples                                                                 */
+void silk_residual_energy_FLP(
+    silk_float                      nrgs[ MAX_NB_SUBFR ],               /* O    Residual energy per subframe                */
+    const silk_float                x[],                                /* I    Input signal                                */
+    silk_float                      a[ 2 ][ MAX_LPC_ORDER ],            /* I    AR coefs for each frame half                */
+    const silk_float                gains[],                            /* I    Quantization gains                          */
+    const opus_int                  subfr_length,                       /* I    Subframe length                             */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+)
+{
+    opus_int     shift;
+    silk_float   *LPC_res_ptr, LPC_res[ ( MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ) / 2 ];
+
+    LPC_res_ptr = LPC_res + LPC_order;
+    shift = LPC_order + subfr_length;
+
+    /* Filter input to create the LPC residual for each frame half, and measure subframe energies */
+    silk_LPC_analysis_filter_FLP( LPC_res, a[ 0 ], x + 0 * shift, 2 * shift, LPC_order );
+    nrgs[ 0 ] = ( silk_float )( gains[ 0 ] * gains[ 0 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) );
+    nrgs[ 1 ] = ( silk_float )( gains[ 1 ] * gains[ 1 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) );
+
+    if( nb_subfr == MAX_NB_SUBFR ) {
+        silk_LPC_analysis_filter_FLP( LPC_res, a[ 1 ], x + 2 * shift, 2 * shift, LPC_order );
+        nrgs[ 2 ] = ( silk_float )( gains[ 2 ] * gains[ 2 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) );
+        nrgs[ 3 ] = ( silk_float )( gains[ 3 ] * gains[ 3 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) );
+    }
+}
diff --git a/src/main/jni/opus/silk/float/scale_copy_vector_FLP.c b/src/main/jni/opus/silk/float/scale_copy_vector_FLP.c
new file mode 100644
index 000000000..20db32b3b
--- /dev/null
+++ b/src/main/jni/opus/silk/float/scale_copy_vector_FLP.c
@@ -0,0 +1,57 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* copy and multiply a vector by a constant */
+void silk_scale_copy_vector_FLP(
+    silk_float          *data_out,
+    const silk_float    *data_in,
+    silk_float          gain,
+    opus_int            dataSize
+)
+{
+    opus_int  i, dataSize4;
+
+    /* 4x unrolled loop */
+    dataSize4 = dataSize & 0xFFFC;
+    for( i = 0; i < dataSize4; i += 4 ) {
+        data_out[ i + 0 ] = gain * data_in[ i + 0 ];
+        data_out[ i + 1 ] = gain * data_in[ i + 1 ];
+        data_out[ i + 2 ] = gain * data_in[ i + 2 ];
+        data_out[ i + 3 ] = gain * data_in[ i + 3 ];
+    }
+
+    /* any remaining elements */
+    for( ; i < dataSize; i++ ) {
+        data_out[ i ] = gain * data_in[ i ];
+    }
+}
diff --git a/src/main/jni/opus/silk/float/scale_vector_FLP.c b/src/main/jni/opus/silk/float/scale_vector_FLP.c
new file mode 100644
index 000000000..108fdcbed
--- /dev/null
+++ b/src/main/jni/opus/silk/float/scale_vector_FLP.c
@@ -0,0 +1,56 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* multiply a vector by a constant */
+void silk_scale_vector_FLP(
+    silk_float          *data1,
+    silk_float          gain,
+    opus_int            dataSize
+)
+{
+    opus_int  i, dataSize4;
+
+    /* 4x unrolled loop */
+    dataSize4 = dataSize & 0xFFFC;
+    for( i = 0; i < dataSize4; i += 4 ) {
+        data1[ i + 0 ] *= gain;
+        data1[ i + 1 ] *= gain;
+        data1[ i + 2 ] *= gain;
+        data1[ i + 3 ] *= gain;
+    }
+
+    /* any remaining elements */
+    for( ; i < dataSize; i++ ) {
+        data1[ i ] *= gain;
+    }
+}
diff --git a/src/main/jni/opus/silk/float/schur_FLP.c b/src/main/jni/opus/silk/float/schur_FLP.c
new file mode 100644
index 000000000..ee436f835
--- /dev/null
+++ b/src/main/jni/opus/silk/float/schur_FLP.c
@@ -0,0 +1,70 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+silk_float silk_schur_FLP(                  /* O    returns residual energy                                     */
+    silk_float          refl_coef[],        /* O    reflection coefficients (length order)                      */
+    const silk_float    auto_corr[],        /* I    autocorrelation sequence (length order+1)                   */
+    opus_int            order               /* I    order                                                       */
+)
+{
+    opus_int   k, n;
+    silk_float C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+    silk_float Ctmp1, Ctmp2, rc_tmp;
+
+    silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+    /* Copy correlations */
+    for( k = 0; k < order+1; k++ ) {
+        C[ k ][ 0 ] = C[ k ][ 1 ] = auto_corr[ k ];
+    }
+
+    for( k = 0; k < order; k++ ) {
+        /* Get reflection coefficient */
+        rc_tmp = -C[ k + 1 ][ 0 ] / silk_max_float( C[ 0 ][ 1 ], 1e-9f );
+
+        /* Save the output */
+        refl_coef[ k ] = rc_tmp;
+
+        /* Update correlations */
+        for( n = 0; n < order - k; n++ ) {
+            Ctmp1 = C[ n + k + 1 ][ 0 ];
+            Ctmp2 = C[ n ][ 1 ];
+            C[ n + k + 1 ][ 0 ] = Ctmp1 + Ctmp2 * rc_tmp;
+            C[ n ][ 1 ]         = Ctmp2 + Ctmp1 * rc_tmp;
+        }
+    }
+
+    /* Return residual energy */
+    return C[ 0 ][ 1 ];
+}
+
diff --git a/src/main/jni/opus/silk/float/solve_LS_FLP.c b/src/main/jni/opus/silk/float/solve_LS_FLP.c
new file mode 100644
index 000000000..7c90d665a
--- /dev/null
+++ b/src/main/jni/opus/silk/float/solve_LS_FLP.c
@@ -0,0 +1,207 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/**********************************************************************
+ * LDL Factorisation. Finds the upper triangular matrix L and the diagonal
+ * Matrix D (only the diagonal elements returned in a vector)such that
+ * the symmetric matric A is given by A = L*D*L'.
+ **********************************************************************/
+static OPUS_INLINE void silk_LDL_FLP(
+    silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */
+    opus_int            M,          /* I    Size of Matrix                                                  */
+    silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */
+    silk_float          *Dinv       /* I/O  Pointer to vector holding the inverse diagonal elements of D    */
+);
+
+/**********************************************************************
+ * Function to solve linear equation Ax = b, when A is a MxM lower
+ * triangular matrix, with ones on the diagonal.
+ **********************************************************************/
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
+    opus_int            M,          /* I    Dim of Matrix equation                                          */
+    const silk_float    *b,         /* I    b Vector                                                        */
+    silk_float          *x          /* O    x Vector                                                        */
+);
+
+/**********************************************************************
+ * Function to solve linear equation (A^T)x = b, when A is a MxM lower
+ * triangular, with ones on the diagonal. (ie then A^T is upper triangular)
+ **********************************************************************/
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
+    opus_int            M,          /* I    Dim of Matrix equation                                          */
+    const silk_float    *b,         /* I    b Vector                                                        */
+    silk_float          *x          /* O    x Vector                                                        */
+);
+
+/**********************************************************************
+ * Function to solve linear equation Ax = b, when A is a MxM
+ * symmetric square matrix - using LDL factorisation
+ **********************************************************************/
+void silk_solve_LDL_FLP(
+    silk_float                      *A,                                 /* I/O  Symmetric square matrix, out: reg.          */
+    const opus_int                  M,                                  /* I    Size of matrix                              */
+    const silk_float                *b,                                 /* I    Pointer to b vector                         */
+    silk_float                      *x                                  /* O    Pointer to x solution vector                */
+)
+{
+    opus_int   i;
+    silk_float L[    MAX_MATRIX_SIZE ][ MAX_MATRIX_SIZE ];
+    silk_float T[    MAX_MATRIX_SIZE ];
+    silk_float Dinv[ MAX_MATRIX_SIZE ]; /* inverse diagonal elements of D*/
+
+    silk_assert( M <= MAX_MATRIX_SIZE );
+
+    /***************************************************
+    Factorize A by LDL such that A = L*D*(L^T),
+    where L is lower triangular with ones on diagonal
+    ****************************************************/
+    silk_LDL_FLP( A, M, &L[ 0 ][ 0 ], Dinv );
+
+    /****************************************************
+    * substitute D*(L^T) = T. ie:
+    L*D*(L^T)*x = b => L*T = b <=> T = inv(L)*b
+    ******************************************************/
+    silk_SolveWithLowerTriangularWdiagOnes_FLP( &L[ 0 ][ 0 ], M, b, T );
+
+    /****************************************************
+    D*(L^T)*x = T <=> (L^T)*x = inv(D)*T, because D is
+    diagonal just multiply with 1/d_i
+    ****************************************************/
+    for( i = 0; i < M; i++ ) {
+        T[ i ] = T[ i ] * Dinv[ i ];
+    }
+    /****************************************************
+    x = inv(L') * inv(D) * T
+    *****************************************************/
+    silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x );
+}
+
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
+    opus_int            M,          /* I    Dim of Matrix equation                                          */
+    const silk_float    *b,         /* I    b Vector                                                        */
+    silk_float          *x          /* O    x Vector                                                        */
+)
+{
+    opus_int   i, j;
+    silk_float temp;
+    const silk_float *ptr1;
+
+    for( i = M - 1; i >= 0; i-- ) {
+        ptr1 =  matrix_adr( L, 0, i, M );
+        temp = 0;
+        for( j = M - 1; j > i ; j-- ) {
+            temp += ptr1[ j * M ] * x[ j ];
+        }
+        temp = b[ i ] - temp;
+        x[ i ] = temp;
+    }
+}
+
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
+    opus_int            M,          /* I    Dim of Matrix equation                                          */
+    const silk_float    *b,         /* I    b Vector                                                        */
+    silk_float          *x          /* O    x Vector                                                        */
+)
+{
+    opus_int   i, j;
+    silk_float temp;
+    const silk_float *ptr1;
+
+    for( i = 0; i < M; i++ ) {
+        ptr1 =  matrix_adr( L, i, 0, M );
+        temp = 0;
+        for( j = 0; j < i; j++ ) {
+            temp += ptr1[ j ] * x[ j ];
+        }
+        temp = b[ i ] - temp;
+        x[ i ] = temp;
+    }
+}
+
+static OPUS_INLINE void silk_LDL_FLP(
+    silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */
+    opus_int            M,          /* I    Size of Matrix                                                  */
+    silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */
+    silk_float          *Dinv       /* I/O  Pointer to vector holding the inverse diagonal elements of D    */
+)
+{
+    opus_int i, j, k, loop_count, err = 1;
+    silk_float *ptr1, *ptr2;
+    double temp, diag_min_value;
+    silk_float v[ MAX_MATRIX_SIZE ], D[ MAX_MATRIX_SIZE ]; /* temp arrays*/
+
+    silk_assert( M <= MAX_MATRIX_SIZE );
+
+    diag_min_value = FIND_LTP_COND_FAC * 0.5f * ( A[ 0 ] + A[ M * M - 1 ] );
+    for( loop_count = 0; loop_count < M && err == 1; loop_count++ ) {
+        err = 0;
+        for( j = 0; j < M; j++ ) {
+            ptr1 = matrix_adr( L, j, 0, M );
+            temp = matrix_ptr( A, j, j, M ); /* element in row j column j*/
+            for( i = 0; i < j; i++ ) {
+                v[ i ] = ptr1[ i ] * D[ i ];
+                temp  -= ptr1[ i ] * v[ i ];
+            }
+            if( temp < diag_min_value ) {
+                /* Badly conditioned matrix: add white noise and run again */
+                temp = ( loop_count + 1 ) * diag_min_value - temp;
+                for( i = 0; i < M; i++ ) {
+                    matrix_ptr( A, i, i, M ) += ( silk_float )temp;
+                }
+                err = 1;
+                break;
+            }
+            D[ j ]    = ( silk_float )temp;
+            Dinv[ j ] = ( silk_float )( 1.0f / temp );
+            matrix_ptr( L, j, j, M ) = 1.0f;
+
+            ptr1 = matrix_adr( A, j, 0, M );
+            ptr2 = matrix_adr( L, j + 1, 0, M);
+            for( i = j + 1; i < M; i++ ) {
+                temp = 0.0;
+                for( k = 0; k < j; k++ ) {
+                    temp += ptr2[ k ] * v[ k ];
+                }
+                matrix_ptr( L, i, j, M ) = ( silk_float )( ( ptr1[ i ] - temp ) * Dinv[ j ] );
+                ptr2 += M; /* go to next column*/
+            }
+        }
+    }
+    silk_assert( err == 0 );
+}
+
diff --git a/src/main/jni/opus/silk/float/sort_FLP.c b/src/main/jni/opus/silk/float/sort_FLP.c
new file mode 100644
index 000000000..f08d7592c
--- /dev/null
+++ b/src/main/jni/opus/silk/float/sort_FLP.c
@@ -0,0 +1,83 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Insertion sort (fast for already almost sorted arrays):  */
+/* Best case:  O(n)   for an already sorted array           */
+/* Worst case: O(n^2) for an inversely sorted array         */
+
+#include "typedef.h"
+#include "SigProc_FLP.h"
+
+void silk_insertion_sort_decreasing_FLP(
+    silk_float          *a,                 /* I/O  Unsorted / Sorted vector                                    */
+    opus_int            *idx,               /* O    Index vector for the sorted elements                        */
+    const opus_int      L,                  /* I    Vector length                                               */
+    const opus_int      K                   /* I    Number of correctly sorted positions                        */
+)
+{
+    silk_float value;
+    opus_int   i, j;
+
+    /* Safety checks */
+    silk_assert( K >  0 );
+    silk_assert( L >  0 );
+    silk_assert( L >= K );
+
+    /* Write start indices in index vector */
+    for( i = 0; i < K; i++ ) {
+        idx[ i ] = i;
+    }
+
+    /* Sort vector elements by value, decreasing order */
+    for( i = 1; i < K; i++ ) {
+        value = a[ i ];
+        for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+            a[ j + 1 ]   = a[ j ];      /* Shift value */
+            idx[ j + 1 ] = idx[ j ];    /* Shift index */
+        }
+        a[ j + 1 ]   = value;   /* Write value */
+        idx[ j + 1 ] = i;       /* Write index */
+    }
+
+    /* If less than L values are asked check the remaining values,      */
+    /* but only spend CPU to ensure that the K first values are correct */
+    for( i = K; i < L; i++ ) {
+        value = a[ i ];
+        if( value > a[ K - 1 ] ) {
+            for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+                a[ j + 1 ]   = a[ j ];      /* Shift value */
+                idx[ j + 1 ] = idx[ j ];    /* Shift index */
+            }
+            a[ j + 1 ]   = value;   /* Write value */
+            idx[ j + 1 ] = i;       /* Write index */
+        }
+    }
+}
diff --git a/src/main/jni/opus/silk/float/structs_FLP.h b/src/main/jni/opus/silk/float/structs_FLP.h
new file mode 100644
index 000000000..bb529e71a
--- /dev/null
+++ b/src/main/jni/opus/silk/float/structs_FLP.h
@@ -0,0 +1,131 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_FLP_H
+#define SILK_STRUCTS_FLP_H
+
+#include "typedef.h"
+#include "main.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/********************************/
+/* Noise shaping analysis state */
+/********************************/
+typedef struct {
+    opus_int8                   LastGainIndex;
+    silk_float                  HarmBoost_smth;
+    silk_float                  HarmShapeGain_smth;
+    silk_float                  Tilt_smth;
+} silk_shape_state_FLP;
+
+/********************************/
+/* Prefilter state              */
+/********************************/
+typedef struct {
+    silk_float                  sLTP_shp[ LTP_BUF_LENGTH ];
+    silk_float                  sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ];
+    opus_int                    sLTP_shp_buf_idx;
+    silk_float                  sLF_AR_shp;
+    silk_float                  sLF_MA_shp;
+    silk_float                  sHarmHP;
+    opus_int32                  rand_seed;
+    opus_int                    lagPrev;
+} silk_prefilter_state_FLP;
+
+/********************************/
+/* Encoder state FLP            */
+/********************************/
+typedef struct {
+    silk_encoder_state          sCmn;                               /* Common struct, shared with fixed-point code */
+    silk_shape_state_FLP        sShape;                             /* Noise shaping state */
+    silk_prefilter_state_FLP    sPrefilt;                           /* Prefilter State */
+
+    /* Buffer for find pitch and noise shape analysis */
+    silk_float                  x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */
+    silk_float                  LTPCorr;                            /* Normalized correlation from pitch lag estimator */
+} silk_encoder_state_FLP;
+
+/************************/
+/* Encoder control FLP  */
+/************************/
+typedef struct {
+    /* Prediction and coding parameters */
+    silk_float                  Gains[ MAX_NB_SUBFR ];
+    silk_float                  PredCoef[ 2 ][ MAX_LPC_ORDER ];     /* holds interpolated and final coefficients */
+    silk_float                  LTPCoef[LTP_ORDER * MAX_NB_SUBFR];
+    silk_float                  LTP_scale;
+    opus_int                    pitchL[ MAX_NB_SUBFR ];
+
+    /* Noise shaping parameters */
+    silk_float                  AR1[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    silk_float                  AR2[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    silk_float                  LF_MA_shp[     MAX_NB_SUBFR ];
+    silk_float                  LF_AR_shp[     MAX_NB_SUBFR ];
+    silk_float                  GainsPre[      MAX_NB_SUBFR ];
+    silk_float                  HarmBoost[     MAX_NB_SUBFR ];
+    silk_float                  Tilt[          MAX_NB_SUBFR ];
+    silk_float                  HarmShapeGain[ MAX_NB_SUBFR ];
+    silk_float                  Lambda;
+    silk_float                  input_quality;
+    silk_float                  coding_quality;
+
+    /* Measures */
+    silk_float                  sparseness;
+    silk_float                  predGain;
+    silk_float                  LTPredCodGain;
+    silk_float                  ResNrg[ MAX_NB_SUBFR ];             /* Residual energy per subframe */
+
+    /* Parameters for CBR mode */
+    opus_int32                  GainsUnq_Q16[ MAX_NB_SUBFR ];
+    opus_int8                   lastGainIndexPrev;
+} silk_encoder_control_FLP;
+
+/************************/
+/* Encoder Super Struct */
+/************************/
+typedef struct {
+    silk_encoder_state_FLP      state_Fxx[ ENCODER_NUM_CHANNELS ];
+    stereo_enc_state            sStereo;
+    opus_int32                  nBitsExceeded;
+    opus_int                    nChannelsAPI;
+    opus_int                    nChannelsInternal;
+    opus_int                    nPrevChannelsInternal;
+    opus_int                    timeSinceSwitchAllowed_ms;
+    opus_int                    allowBandwidthSwitch;
+    opus_int                    prev_decode_only_middle;
+} silk_encoder;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/float/warped_autocorrelation_FLP.c b/src/main/jni/opus/silk/float/warped_autocorrelation_FLP.c
new file mode 100644
index 000000000..542414f48
--- /dev/null
+++ b/src/main/jni/opus/silk/float/warped_autocorrelation_FLP.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FLP(
+    silk_float                      *corr,                              /* O    Result [order + 1]                          */
+    const silk_float                *input,                             /* I    Input data to correlate                     */
+    const silk_float                warping,                            /* I    Warping coefficient                         */
+    const opus_int                  length,                             /* I    Length of input                             */
+    const opus_int                  order                               /* I    Correlation order (even)                    */
+)
+{
+    opus_int    n, i;
+    double      tmp1, tmp2;
+    double      state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+    double      C[     MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+
+    /* Loop over samples */
+    for( n = 0; n < length; n++ ) {
+        tmp1 = input[ n ];
+        /* Loop over allpass sections */
+        for( i = 0; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = state[ i ] + warping * ( state[ i + 1 ] - tmp1 );
+            state[ i ] = tmp1;
+            C[ i ] += state[ 0 ] * tmp1;
+            /* Output of allpass section */
+            tmp1 = state[ i + 1 ] + warping * ( state[ i + 2 ] - tmp2 );
+            state[ i + 1 ] = tmp2;
+            C[ i + 1 ] += state[ 0 ] * tmp2;
+        }
+        state[ order ] = tmp1;
+        C[ order ] += state[ 0 ] * tmp1;
+    }
+
+    /* Copy correlations in silk_float output format */
+    for( i = 0; i < order + 1; i++ ) {
+        corr[ i ] = ( silk_float )C[ i ];
+    }
+}
diff --git a/src/main/jni/opus/silk/float/wrappers_FLP.c b/src/main/jni/opus/silk/float/wrappers_FLP.c
new file mode 100644
index 000000000..350599b20
--- /dev/null
+++ b/src/main/jni/opus/silk/float/wrappers_FLP.c
@@ -0,0 +1,201 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Wrappers. Calls flp / fix code */
+
+/* Convert AR filter coefficients to NLSF parameters */
+void silk_A2NLSF_FLP(
+    opus_int16                      *NLSF_Q15,                          /* O    NLSF vector      [ LPC_order ]              */
+    const silk_float                *pAR,                               /* I    LPC coefficients [ LPC_order ]              */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+)
+{
+    opus_int   i;
+    opus_int32 a_fix_Q16[ MAX_LPC_ORDER ];
+
+    for( i = 0; i < LPC_order; i++ ) {
+        a_fix_Q16[ i ] = silk_float2int( pAR[ i ] * 65536.0f );
+    }
+
+    silk_A2NLSF( NLSF_Q15, a_fix_Q16, LPC_order );
+}
+
+/* Convert LSF parameters to AR prediction filter coefficients */
+void silk_NLSF2A_FLP(
+    silk_float                      *pAR,                               /* O    LPC coefficients [ LPC_order ]              */
+    const opus_int16                *NLSF_Q15,                          /* I    NLSF vector      [ LPC_order ]              */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+)
+{
+    opus_int   i;
+    opus_int16 a_fix_Q12[ MAX_LPC_ORDER ];
+
+    silk_NLSF2A( a_fix_Q12, NLSF_Q15, LPC_order );
+
+    for( i = 0; i < LPC_order; i++ ) {
+        pAR[ i ] = ( silk_float )a_fix_Q12[ i ] * ( 1.0f / 4096.0f );
+    }
+}
+
+/******************************************/
+/* Floating-point NLSF processing wrapper */
+/******************************************/
+void silk_process_NLSFs_FLP(
+    silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */
+    silk_float                      PredCoef[ 2 ][ MAX_LPC_ORDER ],     /* O    Prediction coefficients                     */
+    opus_int16                      NLSF_Q15[      MAX_LPC_ORDER ],     /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
+    const opus_int16                prev_NLSF_Q15[ MAX_LPC_ORDER ]      /* I    Previous Normalized LSFs (0 - (2^15-1))     */
+)
+{
+    opus_int     i, j;
+    opus_int16   PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+
+    silk_process_NLSFs( psEncC, PredCoef_Q12, NLSF_Q15, prev_NLSF_Q15);
+
+    for( j = 0; j < 2; j++ ) {
+        for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
+            PredCoef[ j ][ i ] = ( silk_float )PredCoef_Q12[ j ][ i ] * ( 1.0f / 4096.0f );
+        }
+    }
+}
+
+/****************************************/
+/* Floating-point Silk NSQ wrapper      */
+/****************************************/
+void silk_NSQ_wrapper_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    SideInfoIndices                 *psIndices,                         /* I/O  Quantization indices                        */
+    silk_nsq_state                  *psNSQ,                             /* I/O  Noise Shaping Quantzation state             */
+    opus_int8                       pulses[],                           /* O    Quantized pulse signal                      */
+    const silk_float                x[]                                 /* I    Prefiltered input signal                    */
+)
+{
+    opus_int     i, j;
+    opus_int32   x_Q3[ MAX_FRAME_LENGTH ];
+    opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
+    silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+    opus_int16   LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+    opus_int     LTP_scale_Q14;
+
+    /* Noise shaping parameters */
+    opus_int16   AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    opus_int32   LF_shp_Q14[ MAX_NB_SUBFR ];         /* Packs two int16 coefficients per int32 value             */
+    opus_int     Lambda_Q10;
+    opus_int     Tilt_Q14[ MAX_NB_SUBFR ];
+    opus_int     HarmShapeGain_Q14[ MAX_NB_SUBFR ];
+
+    /* Convert control struct to fix control struct */
+    /* Noise shape parameters */
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        for( j = 0; j < psEnc->sCmn.shapingLPCOrder; j++ ) {
+            AR2_Q13[ i * MAX_SHAPE_LPC_ORDER + j ] = silk_float2int( psEncCtrl->AR2[ i * MAX_SHAPE_LPC_ORDER + j ] * 8192.0f );
+        }
+    }
+
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        LF_shp_Q14[ i ] =   silk_LSHIFT32( silk_float2int( psEncCtrl->LF_AR_shp[ i ]     * 16384.0f ), 16 ) |
+                              (opus_uint16)silk_float2int( psEncCtrl->LF_MA_shp[ i ]     * 16384.0f );
+        Tilt_Q14[ i ]   =        (opus_int)silk_float2int( psEncCtrl->Tilt[ i ]          * 16384.0f );
+        HarmShapeGain_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->HarmShapeGain[ i ] * 16384.0f );
+    }
+    Lambda_Q10 = ( opus_int )silk_float2int( psEncCtrl->Lambda * 1024.0f );
+
+    /* prediction and coding parameters */
+    for( i = 0; i < psEnc->sCmn.nb_subfr * LTP_ORDER; i++ ) {
+        LTPCoef_Q14[ i ] = (opus_int16)silk_float2int( psEncCtrl->LTPCoef[ i ] * 16384.0f );
+    }
+
+    for( j = 0; j < 2; j++ ) {
+        for( i = 0; i < psEnc->sCmn.predictLPCOrder; i++ ) {
+            PredCoef_Q12[ j ][ i ] = (opus_int16)silk_float2int( psEncCtrl->PredCoef[ j ][ i ] * 4096.0f );
+        }
+    }
+
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        Gains_Q16[ i ] = silk_float2int( psEncCtrl->Gains[ i ] * 65536.0f );
+        silk_assert( Gains_Q16[ i ] > 0 );
+    }
+
+    if( psIndices->signalType == TYPE_VOICED ) {
+        LTP_scale_Q14 = silk_LTPScales_table_Q14[ psIndices->LTP_scaleIndex ];
+    } else {
+        LTP_scale_Q14 = 0;
+    }
+
+    /* Convert input to fix */
+    for( i = 0; i < psEnc->sCmn.frame_length; i++ ) {
+        x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] );
+    }
+
+    /* Call NSQ */
+    if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+        silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
+            AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
+    } else {
+        silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
+            AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
+    }
+}
+
+/***********************************************/
+/* Floating-point Silk LTP quantiation wrapper */
+/***********************************************/
+void silk_quant_LTP_gains_FLP(
+    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */
+    opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */
+    opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */
+    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */
+    const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
+    const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
+    const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
+    const opus_int                  nb_subfr                            /* I    number of subframes                         */
+)
+{
+    opus_int   i;
+    opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ];
+    opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ];
+
+    for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
+        B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f );
+    }
+    for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) {
+        W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f );
+    }
+
+    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr );
+
+    for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
+        B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f );
+    }
+}
diff --git a/src/main/jni/opus/silk/gain_quant.c b/src/main/jni/opus/silk/gain_quant.c
new file mode 100644
index 000000000..64ccd0611
--- /dev/null
+++ b/src/main/jni/opus/silk/gain_quant.c
@@ -0,0 +1,141 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+#define OFFSET                  ( ( MIN_QGAIN_DB * 128 ) / 6 + 16 * 128 )
+#define SCALE_Q16               ( ( 65536 * ( N_LEVELS_QGAIN - 1 ) ) / ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) )
+#define INV_SCALE_Q16           ( ( 65536 * ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) / ( N_LEVELS_QGAIN - 1 ) )
+
+/* Gain scalar quantization with hysteresis, uniform on log scale */
+void silk_gains_quant(
+    opus_int8                   ind[ MAX_NB_SUBFR ],            /* O    gain indices                                */
+    opus_int32                  gain_Q16[ MAX_NB_SUBFR ],       /* I/O  gains (quantized out)                       */
+    opus_int8                   *prev_ind,                      /* I/O  last index in previous frame                */
+    const opus_int              conditional,                    /* I    first gain is delta coded if 1              */
+    const opus_int              nb_subfr                        /* I    number of subframes                         */
+)
+{
+    opus_int k, double_step_size_threshold;
+
+    for( k = 0; k < nb_subfr; k++ ) {
+        /* Convert to log scale, scale, floor() */
+        ind[ k ] = silk_SMULWB( SCALE_Q16, silk_lin2log( gain_Q16[ k ] ) - OFFSET );
+
+        /* Round towards previous quantized gain (hysteresis) */
+        if( ind[ k ] < *prev_ind ) {
+            ind[ k ]++;
+        }
+        ind[ k ] = silk_LIMIT_int( ind[ k ], 0, N_LEVELS_QGAIN - 1 );
+
+        /* Compute delta indices and limit */
+        if( k == 0 && conditional == 0 ) {
+            /* Full index */
+            ind[ k ] = silk_LIMIT_int( ind[ k ], *prev_ind + MIN_DELTA_GAIN_QUANT, N_LEVELS_QGAIN - 1 );
+            *prev_ind = ind[ k ];
+        } else {
+            /* Delta index */
+            ind[ k ] = ind[ k ] - *prev_ind;
+
+            /* Double the quantization step size for large gain increases, so that the max gain level can be reached */
+            double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind;
+            if( ind[ k ] > double_step_size_threshold ) {
+                ind[ k ] = double_step_size_threshold + silk_RSHIFT( ind[ k ] - double_step_size_threshold + 1, 1 );
+            }
+
+            ind[ k ] = silk_LIMIT_int( ind[ k ], MIN_DELTA_GAIN_QUANT, MAX_DELTA_GAIN_QUANT );
+
+            /* Accumulate deltas */
+            if( ind[ k ] > double_step_size_threshold ) {
+                *prev_ind += silk_LSHIFT( ind[ k ], 1 ) - double_step_size_threshold;
+            } else {
+                *prev_ind += ind[ k ];
+            }
+
+            /* Shift to make non-negative */
+            ind[ k ] -= MIN_DELTA_GAIN_QUANT;
+        }
+
+        /* Scale and convert to linear scale */
+        gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */
+    }
+}
+
+/* Gains scalar dequantization, uniform on log scale */
+void silk_gains_dequant(
+    opus_int32                  gain_Q16[ MAX_NB_SUBFR ],       /* O    quantized gains                             */
+    const opus_int8             ind[ MAX_NB_SUBFR ],            /* I    gain indices                                */
+    opus_int8                   *prev_ind,                      /* I/O  last index in previous frame                */
+    const opus_int              conditional,                    /* I    first gain is delta coded if 1              */
+    const opus_int              nb_subfr                        /* I    number of subframes                          */
+)
+{
+    opus_int   k, ind_tmp, double_step_size_threshold;
+
+    for( k = 0; k < nb_subfr; k++ ) {
+        if( k == 0 && conditional == 0 ) {
+            /* Gain index is not allowed to go down more than 16 steps (~21.8 dB) */
+            *prev_ind = silk_max_int( ind[ k ], *prev_ind - 16 );
+        } else {
+            /* Delta index */
+            ind_tmp = ind[ k ] + MIN_DELTA_GAIN_QUANT;
+
+            /* Accumulate deltas */
+            double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind;
+            if( ind_tmp > double_step_size_threshold ) {
+                *prev_ind += silk_LSHIFT( ind_tmp, 1 ) - double_step_size_threshold;
+            } else {
+                *prev_ind += ind_tmp;
+            }
+        }
+        *prev_ind = silk_LIMIT_int( *prev_ind, 0, N_LEVELS_QGAIN - 1 );
+
+        /* Scale and convert to linear scale */
+        gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */
+    }
+}
+
+/* Compute unique identifier of gain indices vector */
+opus_int32 silk_gains_ID(                                       /* O    returns unique identifier of gains          */
+    const opus_int8             ind[ MAX_NB_SUBFR ],            /* I    gain indices                                */
+    const opus_int              nb_subfr                        /* I    number of subframes                         */
+)
+{
+    opus_int   k;
+    opus_int32 gainsID;
+
+    gainsID = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        gainsID = silk_ADD_LSHIFT32( ind[ k ], gainsID, 8 );
+    }
+
+    return gainsID;
+}
diff --git a/src/main/jni/opus/silk/init_decoder.c b/src/main/jni/opus/silk/init_decoder.c
new file mode 100644
index 000000000..f887c6788
--- /dev/null
+++ b/src/main/jni/opus/silk/init_decoder.c
@@ -0,0 +1,56 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/************************/
+/* Init Decoder State   */
+/************************/
+opus_int silk_init_decoder(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */
+)
+{
+    /* Clear the entire encoder state, except anything copied */
+    silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+
+    /* Used to deactivate LSF interpolation */
+    psDec->first_frame_after_reset = 1;
+    psDec->prev_gain_Q16 = 65536;
+
+    /* Reset CNG state */
+    silk_CNG_Reset( psDec );
+
+    /* Reset PLC state */
+    silk_PLC_Reset( psDec );
+
+    return(0);
+}
+
diff --git a/src/main/jni/opus/silk/init_encoder.c b/src/main/jni/opus/silk/init_encoder.c
new file mode 100644
index 000000000..65995c33f
--- /dev/null
+++ b/src/main/jni/opus/silk/init_encoder.c
@@ -0,0 +1,64 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#ifdef FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+#include "tuning_parameters.h"
+#include "cpu_support.h"
+
+/*********************************/
+/* Initialize Silk Encoder state */
+/*********************************/
+opus_int silk_init_encoder(
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+)
+{
+    opus_int ret = 0;
+
+    /* Clear the entire encoder state */
+    silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) );
+
+    psEnc->sCmn.arch = arch;
+
+    psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 );
+    psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15;
+
+    /* Used to deactivate LSF interpolation, pitch prediction */
+    psEnc->sCmn.first_frame_after_reset = 1;
+
+    /* Initialize Silk VAD */
+    ret += silk_VAD_Init( &psEnc->sCmn.sVAD );
+
+    return  ret;
+}
diff --git a/src/main/jni/opus/silk/inner_prod_aligned.c b/src/main/jni/opus/silk/inner_prod_aligned.c
new file mode 100644
index 000000000..257ae9e04
--- /dev/null
+++ b/src/main/jni/opus/silk/inner_prod_aligned.c
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+opus_int32 silk_inner_prod_aligned_scale(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              scale,              /*    I number of bits to shift                                     */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int   i;
+    opus_int32 sum = 0;
+    for( i = 0; i < len; i++ ) {
+        sum = silk_ADD_RSHIFT32( sum, silk_SMULBB( inVec1[ i ], inVec2[ i ] ), scale );
+    }
+    return sum;
+}
diff --git a/src/main/jni/opus/silk/interpolate.c b/src/main/jni/opus/silk/interpolate.c
new file mode 100644
index 000000000..1bd8ca4d5
--- /dev/null
+++ b/src/main/jni/opus/silk/interpolate.c
@@ -0,0 +1,51 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Interpolate two vectors */
+void silk_interpolate(
+    opus_int16                  xi[ MAX_LPC_ORDER ],            /* O    interpolated vector                         */
+    const opus_int16            x0[ MAX_LPC_ORDER ],            /* I    first vector                                */
+    const opus_int16            x1[ MAX_LPC_ORDER ],            /* I    second vector                               */
+    const opus_int              ifact_Q2,                       /* I    interp. factor, weight on 2nd vector        */
+    const opus_int              d                               /* I    number of parameters                        */
+)
+{
+    opus_int i;
+
+    silk_assert( ifact_Q2 >= 0 );
+    silk_assert( ifact_Q2 <= 4 );
+
+    for( i = 0; i < d; i++ ) {
+        xi[ i ] = (opus_int16)silk_ADD_RSHIFT( x0[ i ], silk_SMULBB( x1[ i ] - x0[ i ], ifact_Q2 ), 2 );
+    }
+}
diff --git a/src/main/jni/opus/silk/lin2log.c b/src/main/jni/opus/silk/lin2log.c
new file mode 100644
index 000000000..d4fe51532
--- /dev/null
+++ b/src/main/jni/opus/silk/lin2log.c
@@ -0,0 +1,46 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+/* Approximation of 128 * log2() (very close inverse of silk_log2lin()) */
+/* Convert input to a log scale    */
+opus_int32 silk_lin2log(
+    const opus_int32            inLin               /* I  input in linear scale                                         */
+)
+{
+    opus_int32 lz, frac_Q7;
+
+    silk_CLZ_FRAC( inLin, &lz, &frac_Q7 );
+
+    /* Piece-wise parabolic approximation */
+    return silk_LSHIFT( 31 - lz, 7 ) + silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 );
+}
+
diff --git a/src/main/jni/opus/silk/log2lin.c b/src/main/jni/opus/silk/log2lin.c
new file mode 100644
index 000000000..a692e009d
--- /dev/null
+++ b/src/main/jni/opus/silk/log2lin.c
@@ -0,0 +1,58 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Approximation of 2^() (very close inverse of silk_lin2log()) */
+/* Convert input to a linear scale    */
+opus_int32 silk_log2lin( 
+    const opus_int32            inLog_Q7            /* I  input on log scale                                            */
+)
+{
+    opus_int32 out, frac_Q7;
+
+    if( inLog_Q7 < 0 ) {
+        return 0;
+    } else if ( inLog_Q7 >= 3967 ) {
+		return silk_int32_MAX;
+	}
+
+    out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) );
+    frac_Q7 = inLog_Q7 & 0x7F;
+    if( inLog_Q7 < 2048 ) {
+        /* Piece-wise parabolic approximation */
+        out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 );
+    } else {
+        /* Piece-wise parabolic approximation */
+        out = silk_MLA( out, silk_RSHIFT( out, 7 ), silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) );
+    }
+    return out;
+}
diff --git a/src/main/jni/opus/silk/macros.h b/src/main/jni/opus/silk/macros.h
new file mode 100644
index 000000000..a84e5a5d3
--- /dev/null
+++ b/src/main/jni/opus/silk/macros.h
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_H
+#define SILK_MACROS_H
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+/* This is an OPUS_INLINE header file for general platform. */
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#define silk_SMULWT(a32, b32)            (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
+
+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
+#define silk_SMULBB(a32, b32)            ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32)))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
+#define silk_SMLABB(a32, b32, c32)       ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))
+
+/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
+#define silk_SMULBT(a32, b32)            ((opus_int32)((opus_int16)(a32)) * ((b32) >> 16))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
+#define silk_SMLABT(a32, b32, c32)       ((a32) + ((opus_int32)((opus_int16)(b32))) * ((c32) >> 16))
+
+/* a64 + (b32 * c32) */
+#define silk_SMLAL(a64, b32, c32)        (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32))))
+
+/* (a32 * b32) >> 16 */
+#define silk_SMULWW(a32, b32)            silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
+
+/* a32 + ((b32 * c32) >> 16) */
+#define silk_SMLAWW(a32, b32, c32)       silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
+
+/* add/subtract with output saturated */
+#define silk_ADD_SAT32(a, b)             ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ?                              \
+                                        ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) :   \
+                                        ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) )
+
+#define silk_SUB_SAT32(a, b)             ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ?                                        \
+                                        (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
+                                        ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
+
+#include "ecintrin.h"
+
+static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
+{
+    return 32 - EC_ILOG(in16<<16|0x8000);
+}
+
+static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
+{
+    return in32 ? 32 - EC_ILOG(in32) : 32;
+}
+
+/* Row based */
+#define matrix_ptr(Matrix_base_adr, row, column, N) \
+    (*((Matrix_base_adr) + ((row)*(N)+(column))))
+#define matrix_adr(Matrix_base_adr, row, column, N) \
+      ((Matrix_base_adr) + ((row)*(N)+(column)))
+
+/* Column based */
+#ifndef matrix_c_ptr
+#   define matrix_c_ptr(Matrix_base_adr, row, column, M) \
+    (*((Matrix_base_adr) + ((row)+(M)*(column))))
+#endif
+
+#ifdef OPUS_ARM_INLINE_ASM
+#include "arm/macros_armv4.h"
+#endif
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/macros_armv5e.h"
+#endif
+
+#endif /* SILK_MACROS_H */
+
diff --git a/src/main/jni/opus/silk/main.h b/src/main/jni/opus/silk/main.h
new file mode 100644
index 000000000..2bdf89784
--- /dev/null
+++ b/src/main/jni/opus/silk/main.h
@@ -0,0 +1,438 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_H
+#define SILK_MAIN_H
+
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "structs.h"
+#include "tables.h"
+#include "PLC.h"
+#include "control.h"
+#include "debug.h"
+#include "entenc.h"
+#include "entdec.h"
+
+/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
+void silk_stereo_LR_to_MS(
+    stereo_enc_state            *state,                         /* I/O  State                                       */
+    opus_int16                  x1[],                           /* I/O  Left input signal, becomes mid signal       */
+    opus_int16                  x2[],                           /* I/O  Right input signal, becomes side signal     */
+    opus_int8                   ix[ 2 ][ 3 ],                   /* O    Quantization indices                        */
+    opus_int8                   *mid_only_flag,                 /* O    Flag: only mid signal coded                 */
+    opus_int32                  mid_side_rates_bps[],           /* O    Bitrates for mid and side signals           */
+    opus_int32                  total_rate_bps,                 /* I    Total bitrate                               */
+    opus_int                    prev_speech_act_Q8,             /* I    Speech activity level in previous frame     */
+    opus_int                    toMono,                         /* I    Last frame before a stereo->mono transition */
+    opus_int                    fs_kHz,                         /* I    Sample rate (kHz)                           */
+    opus_int                    frame_length                    /* I    Number of samples                           */
+);
+
+/* Convert adaptive Mid/Side representation to Left/Right stereo signal */
+void silk_stereo_MS_to_LR(
+    stereo_dec_state            *state,                         /* I/O  State                                       */
+    opus_int16                  x1[],                           /* I/O  Left input signal, becomes mid signal       */
+    opus_int16                  x2[],                           /* I/O  Right input signal, becomes side signal     */
+    const opus_int32            pred_Q13[],                     /* I    Predictors                                  */
+    opus_int                    fs_kHz,                         /* I    Samples rate (kHz)                          */
+    opus_int                    frame_length                    /* I    Number of samples                           */
+);
+
+/* Find least-squares prediction gain for one signal based on another and quantize it */
+opus_int32 silk_stereo_find_predictor(                          /* O    Returns predictor in Q13                    */
+    opus_int32                  *ratio_Q14,                     /* O    Ratio of residual and mid energies          */
+    const opus_int16            x[],                            /* I    Basis signal                                */
+    const opus_int16            y[],                            /* I    Target signal                               */
+    opus_int32                  mid_res_amp_Q0[],               /* I/O  Smoothed mid, residual norms                */
+    opus_int                    length,                         /* I    Number of samples                           */
+    opus_int                    smooth_coef_Q16                 /* I    Smoothing coefficient                       */
+);
+
+/* Quantize mid/side predictors */
+void silk_stereo_quant_pred(
+    opus_int32                  pred_Q13[],                     /* I/O  Predictors (out: quantized)                 */
+    opus_int8                   ix[ 2 ][ 3 ]                    /* O    Quantization indices                        */
+);
+
+/* Entropy code the mid/side quantization indices */
+void silk_stereo_encode_pred(
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int8                   ix[ 2 ][ 3 ]                    /* I    Quantization indices                        */
+);
+
+/* Entropy code the mid-only flag */
+void silk_stereo_encode_mid_only(
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int8                   mid_only_flag
+);
+
+/* Decode mid/side predictors */
+void silk_stereo_decode_pred(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int32                  pred_Q13[]                      /* O    Predictors                                  */
+);
+
+/* Decode mid-only flag */
+void silk_stereo_decode_mid_only(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    *decode_only_mid                /* O    Flag that only mid channel has been coded   */
+);
+
+/* Encodes signs of excitation */
+void silk_encode_signs(
+    ec_enc                      *psRangeEnc,                        /* I/O  Compressor data structure                   */
+    const opus_int8             pulses[],                           /* I    pulse signal                                */
+    opus_int                    length,                             /* I    length of input                             */
+    const opus_int              signalType,                         /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
+    const opus_int              sum_pulses[ MAX_NB_SHELL_BLOCKS ]   /* I    Sum of absolute pulses per block            */
+);
+
+/* Decodes signs of excitation */
+void silk_decode_signs(
+    ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
+    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int                    length,                             /* I    length of input                             */
+    const opus_int              signalType,                         /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
+    const opus_int              sum_pulses[ MAX_NB_SHELL_BLOCKS ]   /* I    Sum of absolute pulses per block            */
+);
+
+/* Check encoder control struct */
+opus_int check_control_input(
+    silk_EncControlStruct        *encControl                    /* I    Control structure                           */
+);
+
+/* Control internal sampling rate */
+opus_int silk_control_audio_bandwidth(
+    silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
+    silk_EncControlStruct       *encControl                     /* I    Control structure                           */
+);
+
+/* Control SNR of redidual quantizer */
+opus_int silk_control_SNR(
+    silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
+    opus_int32                  TargetRate_bps                  /* I    Target max bitrate (bps)                    */
+);
+
+/***************/
+/* Shell coder */
+/***************/
+
+/* Encode quantization indices of excitation */
+void silk_encode_pulses(
+    ec_enc                      *psRangeEnc,                    /* I/O  compressor data structure                   */
+    const opus_int              signalType,                     /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
+    opus_int8                   pulses[],                       /* I    quantization indices                        */
+    const opus_int              frame_length                    /* I    Frame length                                */
+);
+
+/* Shell encoder, operates on one shell code frame of 16 pulses */
+void silk_shell_encoder(
+    ec_enc                      *psRangeEnc,                    /* I/O  compressor data structure                   */
+    const opus_int              *pulses0                        /* I    data: nonnegative pulse amplitudes          */
+);
+
+/* Shell decoder, operates on one shell code frame of 16 pulses */
+void silk_shell_decoder(
+    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
+);
+
+/* Gain scalar quantization with hysteresis, uniform on log scale */
+void silk_gains_quant(
+    opus_int8                   ind[ MAX_NB_SUBFR ],            /* O    gain indices                                */
+    opus_int32                  gain_Q16[ MAX_NB_SUBFR ],       /* I/O  gains (quantized out)                       */
+    opus_int8                   *prev_ind,                      /* I/O  last index in previous frame                */
+    const opus_int              conditional,                    /* I    first gain is delta coded if 1              */
+    const opus_int              nb_subfr                        /* I    number of subframes                         */
+);
+
+/* Gains scalar dequantization, uniform on log scale */
+void silk_gains_dequant(
+    opus_int32                  gain_Q16[ MAX_NB_SUBFR ],       /* O    quantized gains                             */
+    const opus_int8             ind[ MAX_NB_SUBFR ],            /* I    gain indices                                */
+    opus_int8                   *prev_ind,                      /* I/O  last index in previous frame                */
+    const opus_int              conditional,                    /* I    first gain is delta coded if 1              */
+    const opus_int              nb_subfr                        /* I    number of subframes                          */
+);
+
+/* Compute unique identifier of gain indices vector */
+opus_int32 silk_gains_ID(                                       /* O    returns unique identifier of gains          */
+    const opus_int8             ind[ MAX_NB_SUBFR ],            /* I    gain indices                                */
+    const opus_int              nb_subfr                        /* I    number of subframes                         */
+);
+
+/* Interpolate two vectors */
+void silk_interpolate(
+    opus_int16                  xi[ MAX_LPC_ORDER ],            /* O    interpolated vector                         */
+    const opus_int16            x0[ MAX_LPC_ORDER ],            /* I    first vector                                */
+    const opus_int16            x1[ MAX_LPC_ORDER ],            /* I    second vector                               */
+    const opus_int              ifact_Q2,                       /* I    interp. factor, weight on 2nd vector        */
+    const opus_int              d                               /* I    number of parameters                        */
+);
+
+/* LTP tap quantizer */
+void silk_quant_LTP_gains(
+    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
+    opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
+    opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
+	opus_int32					*sum_gain_dB_Q7,							/* I/O  Cumulative max prediction gain  */
+    const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
+    opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
+    opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
+    const opus_int              nb_subfr                                    /* I    number of subframes             */
+);
+
+/* Entropy constrained matrix-weighted VQ, for a single input data vector */
+void silk_VQ_WMat_EC(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+);
+
+/************************************/
+/* Noise shaping quantization (NSQ) */
+/************************************/
+void silk_NSQ(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+);
+
+/* Noise shaping using delayed decision */
+void silk_NSQ_del_dec(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+);
+
+/************/
+/* Silk VAD */
+/************/
+/* Initialize the Silk VAD */
+opus_int silk_VAD_Init(                                         /* O    Return value, 0 if success                  */
+    silk_VAD_state              *psSilk_VAD                     /* I/O  Pointer to Silk VAD state                   */
+);
+
+/* Get speech activity level in Q8 */
+opus_int silk_VAD_GetSA_Q8(                                     /* O    Return value, 0 if success                  */
+    silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
+    const opus_int16            pIn[]                           /* I    PCM input                                   */
+);
+
+/* Low-pass filter with variable cutoff frequency based on  */
+/* piece-wise linear interpolation between elliptic filters */
+/* Start by setting transition_frame_no = 1;                */
+void silk_LP_variable_cutoff(
+    silk_LP_state               *psLP,                          /* I/O  LP filter state                             */
+    opus_int16                  *frame,                         /* I/O  Low-pass filtered output signal             */
+    const opus_int              frame_length                    /* I    Frame length                                */
+);
+
+/******************/
+/* NLSF Quantizer */
+/******************/
+/* Limit, stabilize, convert and quantize NLSFs */
+void silk_process_NLSFs(
+    silk_encoder_state          *psEncC,                            /* I/O  Encoder state                               */
+    opus_int16                  PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O    Prediction coefficients                     */
+    opus_int16                  pNLSF_Q15[         MAX_LPC_ORDER ], /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
+    const opus_int16            prev_NLSFq_Q15[    MAX_LPC_ORDER ]  /* I    Previous Normalized LSFs (0 - (2^15-1))     */
+);
+
+opus_int32 silk_NLSF_encode(                                    /* O    Returns RD value in Q25                     */
+          opus_int8             *NLSFIndices,                   /* I    Codebook path vector [ LPC_ORDER + 1 ]      */
+          opus_int16            *pNLSF_Q15,                     /* I/O  Quantized NLSF vector [ LPC_ORDER ]         */
+    const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
+    const opus_int16            *pW_QW,                         /* I    NLSF weight vector [ LPC_ORDER ]            */
+    const opus_int              NLSF_mu_Q20,                    /* I    Rate weight for the RD optimization         */
+    const opus_int              nSurvivors,                     /* I    Max survivors after first stage             */
+    const opus_int              signalType                      /* I    Signal type: 0/1/2                          */
+);
+
+/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
+void silk_NLSF_VQ(
+    opus_int32                  err_Q26[],                      /* O    Quantization errors [K]                     */
+    const opus_int16            in_Q15[],                       /* I    Input vectors to be quantized [LPC_order]   */
+    const opus_uint8            pCB_Q8[],                       /* I    Codebook vectors [K*LPC_order]              */
+    const opus_int              K,                              /* I    Number of codebook vectors                  */
+    const opus_int              LPC_order                       /* I    Number of LPCs                              */
+);
+
+/* Delayed-decision quantizer for NLSF residuals */
+opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns RD value in Q25                     */
+    opus_int8                   indices[],                      /* O    Quantization indices [ order ]              */
+    const opus_int16            x_Q10[],                        /* I    Input [ order ]                             */
+    const opus_int16            w_Q5[],                         /* I    Weights [ order ]                           */
+    const opus_uint8            pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
+    const opus_int16            ec_ix[],                        /* I    Indices to entropy coding tables [ order ]  */
+    const opus_uint8            ec_rates_Q5[],                  /* I    Rates []                                    */
+    const opus_int              quant_step_size_Q16,            /* I    Quantization step size                      */
+    const opus_int16            inv_quant_step_size_Q6,         /* I    Inverse quantization step size              */
+    const opus_int32            mu_Q20,                         /* I    R/D tradeoff                                */
+    const opus_int16            order                           /* I    Number of input values                      */
+);
+
+/* Unpack predictor values and indices for entropy coding tables */
+void silk_NLSF_unpack(
+          opus_int16            ec_ix[],                        /* O    Indices to entropy tables [ LPC_ORDER ]     */
+          opus_uint8            pred_Q8[],                      /* O    LSF predictor [ LPC_ORDER ]                 */
+    const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
+    const opus_int              CB1_index                       /* I    Index of vector in first LSF codebook       */
+);
+
+/***********************/
+/* NLSF vector decoder */
+/***********************/
+void silk_NLSF_decode(
+          opus_int16            *pNLSF_Q15,                     /* O    Quantized NLSF vector [ LPC_ORDER ]         */
+          opus_int8             *NLSFIndices,                   /* I    Codebook path vector [ LPC_ORDER + 1 ]      */
+    const silk_NLSF_CB_struct   *psNLSF_CB                      /* I    Codebook object                             */
+);
+
+/****************************************************/
+/* Decoder Functions                                */
+/****************************************************/
+opus_int silk_init_decoder(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */
+);
+
+/* Set decoder sampling rate */
+opus_int silk_decoder_set_fs(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state pointer                       */
+    opus_int                    fs_kHz,                         /* I    Sampling frequency (kHz)                    */
+    opus_int32                  fs_API_Hz                       /* I    API Sampling frequency (Hz)                 */
+);
+
+/****************/
+/* Decode frame */
+/****************/
+opus_int silk_decode_frame(
+    silk_decoder_state          *psDec,                         /* I/O  Pointer to Silk decoder state               */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int16                  pOut[],                         /* O    Pointer to output speech frame              */
+    opus_int32                  *pN,                            /* O    Pointer to size of output frame             */
+    opus_int                    lostFlag,                       /* I    0: no loss, 1 loss, 2 decode fec            */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+);
+
+/* Decode indices from bitstream */
+void silk_decode_indices(
+    silk_decoder_state          *psDec,                         /* I/O  State                                       */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    FrameIndex,                     /* I    Frame number                                */
+    opus_int                    decode_LBRR,                    /* I    Flag indicating LBRR data is being decoded  */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+);
+
+/* Decode parameters from payload */
+void silk_decode_parameters(
+    silk_decoder_state          *psDec,                         /* I/O  State                                       */
+    silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+);
+
+/* Core decoder. Performs inverse NSQ operation LTP + LPC */
+void silk_decode_core(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
+    opus_int16                  xq[],                           /* O    Decoded speech                              */
+    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+);
+
+/* Decode quantization indices of excitation (Shell coding) */
+void silk_decode_pulses(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    const opus_int              signalType,                     /* I    Sigtype                                     */
+    const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
+    const opus_int              frame_length                    /* I    Frame length                                */
+);
+
+/******************/
+/* CNG */
+/******************/
+
+/* Reset CNG */
+void silk_CNG_Reset(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state                               */
+);
+
+/* Updates CNG estimate, and applies the CNG when packet was lost */
+void silk_CNG(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
+    opus_int16                  frame[],                        /* I/O  Signal                                      */
+    opus_int                    length                          /* I    Length of residual                          */
+);
+
+/* Encoding of various parameters */
+void silk_encode_indices(
+    silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int                    FrameIndex,                     /* I    Frame number                                */
+    opus_int                    encode_LBRR,                    /* I    Flag indicating LBRR data is being encoded  */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+);
+
+#endif
diff --git a/src/main/jni/opus/silk/pitch_est_defines.h b/src/main/jni/opus/silk/pitch_est_defines.h
new file mode 100644
index 000000000..e1e4b5d76
--- /dev/null
+++ b/src/main/jni/opus/silk/pitch_est_defines.h
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_PE_DEFINES_H
+#define SILK_PE_DEFINES_H
+
+#include "SigProc_FIX.h"
+
+/********************************************************/
+/* Definitions for pitch estimator                      */
+/********************************************************/
+
+#define PE_MAX_FS_KHZ               16 /* Maximum sampling frequency used */
+
+#define PE_MAX_NB_SUBFR             4
+#define PE_SUBFR_LENGTH_MS          5   /* 5 ms */
+
+#define PE_LTP_MEM_LENGTH_MS        ( 4 * PE_SUBFR_LENGTH_MS )
+
+#define PE_MAX_FRAME_LENGTH_MS      ( PE_LTP_MEM_LENGTH_MS + PE_MAX_NB_SUBFR * PE_SUBFR_LENGTH_MS )
+#define PE_MAX_FRAME_LENGTH         ( PE_MAX_FRAME_LENGTH_MS * PE_MAX_FS_KHZ )
+#define PE_MAX_FRAME_LENGTH_ST_1    ( PE_MAX_FRAME_LENGTH >> 2 )
+#define PE_MAX_FRAME_LENGTH_ST_2    ( PE_MAX_FRAME_LENGTH >> 1 )
+
+#define PE_MAX_LAG_MS               18           /* 18 ms -> 56 Hz */
+#define PE_MIN_LAG_MS               2            /* 2 ms -> 500 Hz */
+#define PE_MAX_LAG                  ( PE_MAX_LAG_MS * PE_MAX_FS_KHZ )
+#define PE_MIN_LAG                  ( PE_MIN_LAG_MS * PE_MAX_FS_KHZ )
+
+#define PE_D_SRCH_LENGTH            24
+
+#define PE_NB_STAGE3_LAGS           5
+
+#define PE_NB_CBKS_STAGE2           3
+#define PE_NB_CBKS_STAGE2_EXT       11
+
+#define PE_NB_CBKS_STAGE3_MAX       34
+#define PE_NB_CBKS_STAGE3_MID       24
+#define PE_NB_CBKS_STAGE3_MIN       16
+
+#define PE_NB_CBKS_STAGE3_10MS      12
+#define PE_NB_CBKS_STAGE2_10MS      3
+
+#define PE_SHORTLAG_BIAS            0.2f    /* for logarithmic weighting    */
+#define PE_PREVLAG_BIAS             0.2f    /* for logarithmic weighting    */
+#define PE_FLATCONTOUR_BIAS         0.05f
+
+#define SILK_PE_MIN_COMPLEX         0
+#define SILK_PE_MID_COMPLEX         1
+#define SILK_PE_MAX_COMPLEX         2
+
+/* Tables for 20 ms frames */
+extern const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ];
+extern const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ];
+extern const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ];
+extern const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ];
+
+/* Tables for 10 ms frames */
+extern const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ 3 ];
+extern const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 12 ];
+extern const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ];
+
+#endif
+
diff --git a/src/main/jni/opus/silk/pitch_est_tables.c b/src/main/jni/opus/silk/pitch_est_tables.c
new file mode 100644
index 000000000..81a8bacac
--- /dev/null
+++ b/src/main/jni/opus/silk/pitch_est_tables.c
@@ -0,0 +1,99 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "typedef.h"
+#include "pitch_est_defines.h"
+
+const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ PE_NB_CBKS_STAGE2_10MS ] =
+{
+    {0, 1, 0},
+    {0, 0, 1}
+};
+
+const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ PE_NB_CBKS_STAGE3_10MS ] =
+{
+    { 0, 0, 1,-1, 1,-1, 2,-2, 2,-2, 3,-3},
+    { 0, 1, 0, 1,-1, 2,-1, 2,-2, 3,-2, 3}
+};
+
+const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ] =
+{
+    {-3, 7},
+    {-2, 7}
+};
+
+const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ] =
+{
+    {0, 2,-1,-1,-1, 0, 0, 1, 1, 0, 1},
+    {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0},
+    {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0},
+    {0,-1, 2, 1, 0, 1, 1, 0, 0,-1,-1}
+};
+
+const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ] =
+{
+    {0, 0, 1,-1, 0, 1,-1, 0,-1, 1,-2, 2,-2,-2, 2,-3, 2, 3,-3,-4, 3,-4, 4, 4,-5, 5,-6,-5, 6,-7, 6, 5, 8,-9},
+    {0, 0, 1, 0, 0, 0, 0, 0, 0, 0,-1, 1, 0, 0, 1,-1, 0, 1,-1,-1, 1,-1, 2, 1,-1, 2,-2,-2, 2,-2, 2, 2, 3,-3},
+    {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,-1, 1, 0, 0, 2, 1,-1, 2,-1,-1, 2,-1, 2, 2,-1, 3,-2,-2,-2, 3},
+    {0, 1, 0, 0, 1, 0, 1,-1, 2,-1, 2,-1, 2, 3,-2, 3,-2,-2, 4, 4,-3, 5,-3,-4, 6,-4, 6, 5,-5, 8,-6,-5,-7, 9}
+};
+
+const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ] =
+{
+    /* Lags to search for low number of stage3 cbks */
+    {
+        {-5,8},
+        {-1,6},
+        {-1,6},
+        {-4,10}
+    },
+    /* Lags to search for middle number of stage3 cbks */
+    {
+        {-6,10},
+        {-2,6},
+        {-1,6},
+        {-5,10}
+    },
+    /* Lags to search for max number of stage3 cbks */
+    {
+        {-9,12},
+        {-3,7},
+        {-2,7},
+        {-7,13}
+    }
+};
+
+const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ] =
+{
+    PE_NB_CBKS_STAGE3_MIN,
+    PE_NB_CBKS_STAGE3_MID,
+    PE_NB_CBKS_STAGE3_MAX
+};
diff --git a/src/main/jni/opus/silk/process_NLSFs.c b/src/main/jni/opus/silk/process_NLSFs.c
new file mode 100644
index 000000000..c27cf0304
--- /dev/null
+++ b/src/main/jni/opus/silk/process_NLSFs.c
@@ -0,0 +1,105 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Limit, stabilize, convert and quantize NLSFs */
+void silk_process_NLSFs(
+    silk_encoder_state          *psEncC,                            /* I/O  Encoder state                               */
+    opus_int16                  PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O    Prediction coefficients                     */
+    opus_int16                  pNLSF_Q15[         MAX_LPC_ORDER ], /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
+    const opus_int16            prev_NLSFq_Q15[    MAX_LPC_ORDER ]  /* I    Previous Normalized LSFs (0 - (2^15-1))     */
+)
+{
+    opus_int     i, doInterpolate;
+    opus_int     NLSF_mu_Q20;
+    opus_int32   i_sqr_Q15;
+    opus_int16   pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
+    opus_int16   pNLSFW_QW[ MAX_LPC_ORDER ];
+    opus_int16   pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
+
+    silk_assert( psEncC->speech_activity_Q8 >=   0 );
+    silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) );
+    silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) );
+
+    /***********************/
+    /* Calculate mu values */
+    /***********************/
+    /* NLSF_mu  = 0.003 - 0.0015 * psEnc->speech_activity; */
+    NLSF_mu_Q20 = silk_SMLAWB( SILK_FIX_CONST( 0.003, 20 ), SILK_FIX_CONST( -0.001, 28 ), psEncC->speech_activity_Q8 );
+    if( psEncC->nb_subfr == 2 ) {
+        /* Multiply by 1.5 for 10 ms packets */
+        NLSF_mu_Q20 = silk_ADD_RSHIFT( NLSF_mu_Q20, NLSF_mu_Q20, 1 );
+    }
+
+    silk_assert( NLSF_mu_Q20 >  0 );
+    silk_assert( NLSF_mu_Q20 <= SILK_FIX_CONST( 0.005, 20 ) );
+
+    /* Calculate NLSF weights */
+    silk_NLSF_VQ_weights_laroia( pNLSFW_QW, pNLSF_Q15, psEncC->predictLPCOrder );
+
+    /* Update NLSF weights for interpolated NLSFs */
+    doInterpolate = ( psEncC->useInterpolatedNLSFs == 1 ) && ( psEncC->indices.NLSFInterpCoef_Q2 < 4 );
+    if( doInterpolate ) {
+        /* Calculate the interpolated NLSF vector for the first half */
+        silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
+            psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
+
+        /* Calculate first half NLSF weights for the interpolated NLSFs */
+        silk_NLSF_VQ_weights_laroia( pNLSFW0_temp_QW, pNLSF0_temp_Q15, psEncC->predictLPCOrder );
+
+        /* Update NLSF weights with contribution from first half */
+        i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 );
+        for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
+            pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 );
+            silk_assert( pNLSFW_QW[ i ] >= 1 );
+        }
+    }
+
+    silk_NLSF_encode( psEncC->indices.NLSFIndices, pNLSF_Q15, psEncC->psNLSF_CB, pNLSFW_QW,
+        NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType );
+
+    /* Convert quantized NLSFs back to LPC coefficients */
+    silk_NLSF2A( PredCoef_Q12[ 1 ], pNLSF_Q15, psEncC->predictLPCOrder );
+
+    if( doInterpolate ) {
+        /* Calculate the interpolated, quantized LSF vector for the first half */
+        silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
+            psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
+
+        /* Convert back to LPC coefficients */
+        silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder );
+
+    } else {
+        /* Copy LPC coefficients for first half from second half */
+        silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
+    }
+}
diff --git a/src/main/jni/opus/silk/quant_LTP_gains.c b/src/main/jni/opus/silk/quant_LTP_gains.c
new file mode 100644
index 000000000..fd0870da1
--- /dev/null
+++ b/src/main/jni/opus/silk/quant_LTP_gains.c
@@ -0,0 +1,128 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "tuning_parameters.h"
+
+void silk_quant_LTP_gains(
+    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
+    opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
+    opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
+	opus_int32					*sum_log_gain_Q7,							/* I/O  Cumulative max prediction gain  */
+    const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
+    opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
+    opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
+    const opus_int              nb_subfr                                    /* I    number of subframes             */
+)
+{
+    opus_int             j, k, cbk_size;
+    opus_int8            temp_idx[ MAX_NB_SUBFR ];
+    const opus_uint8     *cl_ptr_Q5;
+    const opus_int8      *cbk_ptr_Q7;
+    const opus_uint8     *cbk_gain_ptr_Q7;
+    const opus_int16     *b_Q14_ptr;
+    const opus_int32     *W_Q18_ptr;
+    opus_int32           rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14;
+	opus_int32           sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7;
+
+    /***************************************************/
+    /* iterate over different codebooks with different */
+    /* rates/distortions, and choose best */
+    /***************************************************/
+    min_rate_dist_Q14 = silk_int32_MAX;
+    best_sum_log_gain_Q7 = 0;
+    for( k = 0; k < 3; k++ ) {
+        /* Safety margin for pitch gain control, to take into account factors
+           such as state rescaling/rewhitening. */
+        opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 );
+
+        cl_ptr_Q5  = silk_LTP_gain_BITS_Q5_ptrs[ k ];
+        cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[        k ];
+        cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ];
+        cbk_size   = silk_LTP_vq_sizes[          k ];
+
+        /* Set up pointer to first subframe */
+        W_Q18_ptr = W_Q18;
+        b_Q14_ptr = B_Q14;
+
+        rate_dist_Q14 = 0;
+		sum_log_gain_tmp_Q7 = *sum_log_gain_Q7;
+        for( j = 0; j < nb_subfr; j++ ) {
+			max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) 
+										+ SILK_FIX_CONST( 7, 7 ) ) - gain_safety;
+
+            silk_VQ_WMat_EC(
+                &temp_idx[ j ],         /* O    index of best codebook vector                           */
+                &rate_dist_Q14_subfr,   /* O    best weighted quantization error + mu * rate            */
+				&gain_Q7,               /* O    sum of absolute LTP coefficients                        */
+                b_Q14_ptr,              /* I    input vector to be quantized                            */
+                W_Q18_ptr,              /* I    weighting matrix                                        */
+                cbk_ptr_Q7,             /* I    codebook                                                */
+                cbk_gain_ptr_Q7,        /* I    codebook effective gains                                */
+                cl_ptr_Q5,              /* I    code length for each codebook vector                    */
+                mu_Q9,                  /* I    tradeoff between weighted error and rate                */
+				max_gain_Q7,            /* I    maximum sum of absolute LTP coefficients                */
+                cbk_size                /* I    number of vectors in codebook                           */
+            );
+
+            rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr );
+            sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7
+                                + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 ));
+
+            b_Q14_ptr += LTP_ORDER;
+            W_Q18_ptr += LTP_ORDER * LTP_ORDER;
+        }
+
+        /* Avoid never finding a codebook */
+        rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 );
+
+        if( rate_dist_Q14 < min_rate_dist_Q14 ) {
+            min_rate_dist_Q14 = rate_dist_Q14;
+            *periodicity_index = (opus_int8)k;
+            silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) );
+			best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7;
+        }
+
+        /* Break early in low-complexity mode if rate distortion is below threshold */
+        if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) {
+            break;
+        }
+    }
+
+    cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ];
+    for( j = 0; j < nb_subfr; j++ ) {
+        for( k = 0; k < LTP_ORDER; k++ ) {
+            B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 );
+        }
+    }
+	*sum_log_gain_Q7 = best_sum_log_gain_Q7;
+}
+
diff --git a/src/main/jni/opus/silk/resampler.c b/src/main/jni/opus/silk/resampler.c
new file mode 100644
index 000000000..374fbb372
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler.c
@@ -0,0 +1,215 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/*
+ * Matrix of resampling methods used:
+ *                                 Fs_out (kHz)
+ *                        8      12     16     24     48
+ *
+ *               8        C      UF     U      UF     UF
+ *              12        AF     C      UF     U      UF
+ * Fs_in (kHz)  16        D      AF     C      UF     UF
+ *              24        AF     D      AF     C      U
+ *              48        AF     AF     AF     D      C
+ *
+ * C   -> Copy (no resampling)
+ * D   -> Allpass-based 2x downsampling
+ * U   -> Allpass-based 2x upsampling
+ * UF  -> Allpass-based 2x upsampling followed by FIR interpolation
+ * AF  -> AR2 filter followed by FIR interpolation
+ */
+
+#include "resampler_private.h"
+
+/* Tables with delay compensation values to equalize total delay for different modes */
+static const opus_int8 delay_matrix_enc[ 5 ][ 3 ] = {
+/* in  \ out  8  12  16 */
+/*  8 */   {  6,  0,  3 },
+/* 12 */   {  0,  7,  3 },
+/* 16 */   {  0,  1, 10 },
+/* 24 */   {  0,  2,  6 },
+/* 48 */   { 18, 10, 12 }
+};
+
+static const opus_int8 delay_matrix_dec[ 3 ][ 5 ] = {
+/* in  \ out  8  12  16  24  48 */
+/*  8 */   {  4,  0,  2,  0,  0 },
+/* 12 */   {  0,  9,  4,  7,  4 },
+/* 16 */   {  0,  3, 12,  7,  7 }
+};
+
+/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0, 1, 2, 3, 4] */
+#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 )
+
+#define USE_silk_resampler_copy                     (0)
+#define USE_silk_resampler_private_up2_HQ_wrapper   (1)
+#define USE_silk_resampler_private_IIR_FIR          (2)
+#define USE_silk_resampler_private_down_FIR         (3)
+
+/* Initialize/reset the resampler state for a given pair of input/output sampling rates */
+opus_int silk_resampler_init(
+    silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
+    opus_int32                  Fs_Hz_in,           /* I    Input sampling rate (Hz)                                    */
+    opus_int32                  Fs_Hz_out,          /* I    Output sampling rate (Hz)                                   */
+    opus_int                    forEnc              /* I    If 1: encoder; if 0: decoder                                */
+)
+{
+    opus_int up2x;
+
+    /* Clear state */
+    silk_memset( S, 0, sizeof( silk_resampler_state_struct ) );
+
+    /* Input checking */
+    if( forEnc ) {
+        if( ( Fs_Hz_in  != 8000 && Fs_Hz_in  != 12000 && Fs_Hz_in  != 16000 && Fs_Hz_in  != 24000 && Fs_Hz_in  != 48000 ) ||
+            ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) {
+            silk_assert( 0 );
+            return -1;
+        }
+        S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ];
+    } else {
+        if( ( Fs_Hz_in  != 8000 && Fs_Hz_in  != 12000 && Fs_Hz_in  != 16000 ) ||
+            ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) {
+            silk_assert( 0 );
+            return -1;
+        }
+        S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ];
+    }
+
+    S->Fs_in_kHz  = silk_DIV32_16( Fs_Hz_in,  1000 );
+    S->Fs_out_kHz = silk_DIV32_16( Fs_Hz_out, 1000 );
+
+    /* Number of samples processed per batch */
+    S->batchSize = S->Fs_in_kHz * RESAMPLER_MAX_BATCH_SIZE_MS;
+
+    /* Find resampler with the right sampling ratio */
+    up2x = 0;
+    if( Fs_Hz_out > Fs_Hz_in ) {
+        /* Upsample */
+        if( Fs_Hz_out == silk_MUL( Fs_Hz_in, 2 ) ) {                            /* Fs_out : Fs_in = 2 : 1 */
+            /* Special case: directly use 2x upsampler */
+            S->resampler_function = USE_silk_resampler_private_up2_HQ_wrapper;
+        } else {
+            /* Default resampler */
+            S->resampler_function = USE_silk_resampler_private_IIR_FIR;
+            up2x = 1;
+        }
+    } else if ( Fs_Hz_out < Fs_Hz_in ) {
+        /* Downsample */
+         S->resampler_function = USE_silk_resampler_private_down_FIR;
+        if( silk_MUL( Fs_Hz_out, 4 ) == silk_MUL( Fs_Hz_in, 3 ) ) {             /* Fs_out : Fs_in = 3 : 4 */
+            S->FIR_Fracs = 3;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0;
+            S->Coefs = silk_Resampler_3_4_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 3 ) == silk_MUL( Fs_Hz_in, 2 ) ) {      /* Fs_out : Fs_in = 2 : 3 */
+            S->FIR_Fracs = 2;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0;
+            S->Coefs = silk_Resampler_2_3_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 2 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 2 */
+            S->FIR_Fracs = 1;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR1;
+            S->Coefs = silk_Resampler_1_2_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 3 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 3 */
+            S->FIR_Fracs = 1;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+            S->Coefs = silk_Resampler_1_3_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 4 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 4 */
+            S->FIR_Fracs = 1;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+            S->Coefs = silk_Resampler_1_4_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 6 */
+            S->FIR_Fracs = 1;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+            S->Coefs = silk_Resampler_1_6_COEFS;
+        } else {
+            /* None available */
+            silk_assert( 0 );
+            return -1;
+        }
+    } else {
+        /* Input and output sampling rates are equal: copy */
+        S->resampler_function = USE_silk_resampler_copy;
+    }
+
+    /* Ratio of input/output samples */
+    S->invRatio_Q16 = silk_LSHIFT32( silk_DIV32( silk_LSHIFT32( Fs_Hz_in, 14 + up2x ), Fs_Hz_out ), 2 );
+    /* Make sure the ratio is rounded up */
+    while( silk_SMULWW( S->invRatio_Q16, Fs_Hz_out ) < silk_LSHIFT32( Fs_Hz_in, up2x ) ) {
+        S->invRatio_Q16++;
+    }
+
+    return 0;
+}
+
+/* Resampler: convert from one sampling rate to another */
+/* Input and output sampling rate are at most 48000 Hz  */
+opus_int silk_resampler(
+    silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
+    opus_int16                  out[],              /* O    Output signal                                               */
+    const opus_int16            in[],               /* I    Input signal                                                */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+)
+{
+    opus_int nSamples;
+
+    /* Need at least 1 ms of input data */
+    silk_assert( inLen >= S->Fs_in_kHz );
+    /* Delay can't exceed the 1 ms of buffering */
+    silk_assert( S->inputDelay <= S->Fs_in_kHz );
+
+    nSamples = S->Fs_in_kHz - S->inputDelay;
+
+    /* Copy to delay buffer */
+    silk_memcpy( &S->delayBuf[ S->inputDelay ], in, nSamples * sizeof( opus_int16 ) );
+
+    switch( S->resampler_function ) {
+        case USE_silk_resampler_private_up2_HQ_wrapper:
+            silk_resampler_private_up2_HQ_wrapper( S, out, S->delayBuf, S->Fs_in_kHz );
+            silk_resampler_private_up2_HQ_wrapper( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+            break;
+        case USE_silk_resampler_private_IIR_FIR:
+            silk_resampler_private_IIR_FIR( S, out, S->delayBuf, S->Fs_in_kHz );
+            silk_resampler_private_IIR_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+            break;
+        case USE_silk_resampler_private_down_FIR:
+            silk_resampler_private_down_FIR( S, out, S->delayBuf, S->Fs_in_kHz );
+            silk_resampler_private_down_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+            break;
+        default:
+            silk_memcpy( out, S->delayBuf, S->Fs_in_kHz * sizeof( opus_int16 ) );
+            silk_memcpy( &out[ S->Fs_out_kHz ], &in[ nSamples ], ( inLen - S->Fs_in_kHz ) * sizeof( opus_int16 ) );
+    }
+
+    /* Copy to delay buffer */
+    silk_memcpy( S->delayBuf, &in[ inLen - S->inputDelay ], S->inputDelay * sizeof( opus_int16 ) );
+
+    return 0;
+}
diff --git a/src/main/jni/opus/silk/resampler_down2.c b/src/main/jni/opus/silk/resampler_down2.c
new file mode 100644
index 000000000..cec363464
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_down2.c
@@ -0,0 +1,74 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_rom.h"
+
+/* Downsample by a factor 2 */
+void silk_resampler_down2(
+    opus_int32                  *S,                 /* I/O  State vector [ 2 ]                                          */
+    opus_int16                  *out,               /* O    Output signal [ floor(len/2) ]                              */
+    const opus_int16            *in,                /* I    Input signal [ len ]                                        */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+)
+{
+    opus_int32 k, len2 = silk_RSHIFT32( inLen, 1 );
+    opus_int32 in32, out32, Y, X;
+
+    silk_assert( silk_resampler_down2_0 > 0 );
+    silk_assert( silk_resampler_down2_1 < 0 );
+
+    /* Internal variables and state are in Q10 format */
+    for( k = 0; k < len2; k++ ) {
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 );
+
+        /* All-pass section for even input sample */
+        Y      = silk_SUB32( in32, S[ 0 ] );
+        X      = silk_SMLAWB( Y, Y, silk_resampler_down2_1 );
+        out32  = silk_ADD32( S[ 0 ], X );
+        S[ 0 ] = silk_ADD32( in32, X );
+
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 );
+
+        /* All-pass section for odd input sample, and add to output of previous section */
+        Y      = silk_SUB32( in32, S[ 1 ] );
+        X      = silk_SMULWB( Y, silk_resampler_down2_0 );
+        out32  = silk_ADD32( out32, S[ 1 ] );
+        out32  = silk_ADD32( out32, X );
+        S[ 1 ] = silk_ADD32( in32, X );
+
+        /* Add, convert back to int16 and store to output */
+        out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32, 11 ) );
+    }
+}
+
diff --git a/src/main/jni/opus/silk/resampler_down2_3.c b/src/main/jni/opus/silk/resampler_down2_3.c
new file mode 100644
index 000000000..4342614dc
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_down2_3.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+#define ORDER_FIR                   4
+
+/* Downsample by a factor 2/3, low quality */
+void silk_resampler_down2_3(
+    opus_int32                  *S,                 /* I/O  State vector [ 6 ]                                          */
+    opus_int16                  *out,               /* O    Output signal [ floor(2*inLen/3) ]                          */
+    const opus_int16            *in,                /* I    Input signal [ inLen ]                                      */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+)
+{
+    opus_int32 nSamplesIn, counter, res_Q6;
+    VARDECL( opus_int32, buf );
+    opus_int32 *buf_ptr;
+    SAVE_STACK;
+
+    ALLOC( buf, RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR, opus_int32 );
+
+    /* Copy buffered samples to start of buffer */
+    silk_memcpy( buf, S, ORDER_FIR * sizeof( opus_int32 ) );
+
+    /* Iterate over blocks of frameSizeIn input samples */
+    while( 1 ) {
+        nSamplesIn = silk_min( inLen, RESAMPLER_MAX_BATCH_SIZE_IN );
+
+        /* Second-order AR filter (output in Q8) */
+        silk_resampler_private_AR2( &S[ ORDER_FIR ], &buf[ ORDER_FIR ], in,
+            silk_Resampler_2_3_COEFS_LQ, nSamplesIn );
+
+        /* Interpolate filtered signal */
+        buf_ptr = buf;
+        counter = nSamplesIn;
+        while( counter > 2 ) {
+            /* Inner product */
+            res_Q6 = silk_SMULWB(         buf_ptr[ 0 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] );
+
+            /* Scale down, saturate and store in output array */
+            *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+
+            res_Q6 = silk_SMULWB(         buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] );
+
+            /* Scale down, saturate and store in output array */
+            *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+
+            buf_ptr += 3;
+            counter -= 3;
+        }
+
+        in += nSamplesIn;
+        inLen -= nSamplesIn;
+
+        if( inLen > 0 ) {
+            /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+            silk_memcpy( buf, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) );
+        } else {
+            break;
+        }
+    }
+
+    /* Copy last part of filtered signal to the state for the next call */
+    silk_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/resampler_private.h b/src/main/jni/opus/silk/resampler_private.h
new file mode 100644
index 000000000..422a7d9d9
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_private.h
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_RESAMPLER_PRIVATE_H
+#define SILK_RESAMPLER_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_structs.h"
+#include "resampler_rom.h"
+
+/* Number of input samples to process in the inner loop */
+#define RESAMPLER_MAX_BATCH_SIZE_MS             10
+#define RESAMPLER_MAX_FS_KHZ                    48
+#define RESAMPLER_MAX_BATCH_SIZE_IN             ( RESAMPLER_MAX_BATCH_SIZE_MS * RESAMPLER_MAX_FS_KHZ )
+
+/* Description: Hybrid IIR/FIR polyphase implementation of resampling */
+void silk_resampler_private_IIR_FIR(
+    void                            *SS,            /* I/O  Resampler state             */
+    opus_int16                      out[],          /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    opus_int32                      inLen           /* I    Number of input samples     */
+);
+
+/* Description: Hybrid IIR/FIR polyphase implementation of resampling */
+void silk_resampler_private_down_FIR(
+    void                            *SS,            /* I/O  Resampler state             */
+    opus_int16                      out[],          /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    opus_int32                      inLen           /* I    Number of input samples     */
+);
+
+/* Upsample by a factor 2, high quality */
+void silk_resampler_private_up2_HQ_wrapper(
+    void                            *SS,            /* I/O  Resampler state (unused)    */
+    opus_int16                      *out,           /* O    Output signal [ 2 * len ]   */
+    const opus_int16                *in,            /* I    Input signal [ len ]        */
+    opus_int32                      len             /* I    Number of input samples     */
+);
+
+/* Upsample by a factor 2, high quality */
+void silk_resampler_private_up2_HQ(
+    opus_int32                      *S,             /* I/O  Resampler state [ 6 ]       */
+    opus_int16                      *out,           /* O    Output signal [ 2 * len ]   */
+    const opus_int16                *in,            /* I    Input signal [ len ]        */
+    opus_int32                      len             /* I    Number of input samples     */
+);
+
+/* Second order AR filter */
+void silk_resampler_private_AR2(
+    opus_int32                      S[],            /* I/O  State vector [ 2 ]          */
+    opus_int32                      out_Q8[],       /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    const opus_int16                A_Q14[],        /* I    AR coefficients, Q14        */
+    opus_int32                      len             /* I    Signal length               */
+);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* SILK_RESAMPLER_PRIVATE_H */
diff --git a/src/main/jni/opus/silk/resampler_private_AR2.c b/src/main/jni/opus/silk/resampler_private_AR2.c
new file mode 100644
index 000000000..5fff23714
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_private_AR2.c
@@ -0,0 +1,55 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+
+/* Second order AR filter with single delay elements */
+void silk_resampler_private_AR2(
+    opus_int32                      S[],            /* I/O  State vector [ 2 ]          */
+    opus_int32                      out_Q8[],       /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    const opus_int16                A_Q14[],        /* I    AR coefficients, Q14        */
+    opus_int32                      len             /* I    Signal length               */
+)
+{
+    opus_int32    k;
+    opus_int32    out32;
+
+    for( k = 0; k < len; k++ ) {
+        out32       = silk_ADD_LSHIFT32( S[ 0 ], (opus_int32)in[ k ], 8 );
+        out_Q8[ k ] = out32;
+        out32       = silk_LSHIFT( out32, 2 );
+        S[ 0 ]      = silk_SMLAWB( S[ 1 ], out32, A_Q14[ 0 ] );
+        S[ 1 ]      = silk_SMULWB( out32, A_Q14[ 1 ] );
+    }
+}
+
diff --git a/src/main/jni/opus/silk/resampler_private_IIR_FIR.c b/src/main/jni/opus/silk/resampler_private_IIR_FIR.c
new file mode 100644
index 000000000..6b2b3a2e1
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_private_IIR_FIR.c
@@ -0,0 +1,107 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL(
+    opus_int16  *out,
+    opus_int16  *buf,
+    opus_int32  max_index_Q16,
+    opus_int32  index_increment_Q16
+)
+{
+    opus_int32 index_Q16, res_Q15;
+    opus_int16 *buf_ptr;
+    opus_int32 table_index;
+
+    /* Interpolate upsampled signal and store in output array */
+    for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+        table_index = silk_SMULWB( index_Q16 & 0xFFFF, 12 );
+        buf_ptr = &buf[ index_Q16 >> 16 ];
+
+        res_Q15 = silk_SMULBB(          buf_ptr[ 0 ], silk_resampler_frac_FIR_12[      table_index ][ 0 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 1 ], silk_resampler_frac_FIR_12[      table_index ][ 1 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 2 ], silk_resampler_frac_FIR_12[      table_index ][ 2 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 3 ], silk_resampler_frac_FIR_12[      table_index ][ 3 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 4 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 3 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 5 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 2 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 6 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 1 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 7 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 0 ] );
+        *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q15, 15 ) );
+    }
+    return out;
+}
+/* Upsample using a combination of allpass-based 2x upsampling and FIR interpolation */
+void silk_resampler_private_IIR_FIR(
+    void                            *SS,            /* I/O  Resampler state             */
+    opus_int16                      out[],          /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    opus_int32                      inLen           /* I    Number of input samples     */
+)
+{
+    silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+    opus_int32 nSamplesIn;
+    opus_int32 max_index_Q16, index_increment_Q16;
+    VARDECL( opus_int16, buf );
+    SAVE_STACK;
+
+    ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 );
+
+    /* Copy buffered samples to start of buffer */
+    silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+
+    /* Iterate over blocks of frameSizeIn input samples */
+    index_increment_Q16 = S->invRatio_Q16;
+    while( 1 ) {
+        nSamplesIn = silk_min( inLen, S->batchSize );
+
+        /* Upsample 2x */
+        silk_resampler_private_up2_HQ( S->sIIR, &buf[ RESAMPLER_ORDER_FIR_12 ], in, nSamplesIn );
+
+        max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 + 1 );         /* + 1 because 2x upsampling */
+        out = silk_resampler_private_IIR_FIR_INTERPOL( out, buf, max_index_Q16, index_increment_Q16 );
+        in += nSamplesIn;
+        inLen -= nSamplesIn;
+
+        if( inLen > 0 ) {
+            /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+            silk_memcpy( buf, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+        } else {
+            break;
+        }
+    }
+
+    /* Copy last part of filtered signal to the state for the next call */
+    silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/resampler_private_down_FIR.c b/src/main/jni/opus/silk/resampler_private_down_FIR.c
new file mode 100644
index 000000000..783e42b35
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_private_down_FIR.c
@@ -0,0 +1,194 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
+    opus_int16          *out,
+    opus_int32          *buf,
+    const opus_int16    *FIR_Coefs,
+    opus_int            FIR_Order,
+    opus_int            FIR_Fracs,
+    opus_int32          max_index_Q16,
+    opus_int32          index_increment_Q16
+)
+{
+    opus_int32 index_Q16, res_Q6;
+    opus_int32 *buf_ptr;
+    opus_int32 interpol_ind;
+    const opus_int16 *interpol_ptr;
+
+    switch( FIR_Order ) {
+        case RESAMPLER_DOWN_ORDER_FIR0:
+            for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+                /* Integer part gives pointer to buffered input */
+                buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+                /* Fractional part gives interpolation coefficients */
+                interpol_ind = silk_SMULWB( index_Q16 & 0xFFFF, FIR_Fracs );
+
+                /* Inner product */
+                interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * interpol_ind ];
+                res_Q6 = silk_SMULWB(         buf_ptr[ 0 ], interpol_ptr[ 0 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], interpol_ptr[ 1 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], interpol_ptr[ 2 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], interpol_ptr[ 3 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], interpol_ptr[ 4 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 5 ], interpol_ptr[ 5 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 6 ], interpol_ptr[ 6 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 7 ], interpol_ptr[ 7 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 8 ], interpol_ptr[ 8 ] );
+                interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * ( FIR_Fracs - 1 - interpol_ind ) ];
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 17 ], interpol_ptr[ 0 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 16 ], interpol_ptr[ 1 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 15 ], interpol_ptr[ 2 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 14 ], interpol_ptr[ 3 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 13 ], interpol_ptr[ 4 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 12 ], interpol_ptr[ 5 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 11 ], interpol_ptr[ 6 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 10 ], interpol_ptr[ 7 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[  9 ], interpol_ptr[ 8 ] );
+
+                /* Scale down, saturate and store in output array */
+                *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+            }
+            break;
+        case RESAMPLER_DOWN_ORDER_FIR1:
+            for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+                /* Integer part gives pointer to buffered input */
+                buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+                /* Inner product */
+                res_Q6 = silk_SMULWB(         silk_ADD32( buf_ptr[  0 ], buf_ptr[ 23 ] ), FIR_Coefs[  0 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  1 ], buf_ptr[ 22 ] ), FIR_Coefs[  1 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  2 ], buf_ptr[ 21 ] ), FIR_Coefs[  2 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  3 ], buf_ptr[ 20 ] ), FIR_Coefs[  3 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  4 ], buf_ptr[ 19 ] ), FIR_Coefs[  4 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  5 ], buf_ptr[ 18 ] ), FIR_Coefs[  5 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  6 ], buf_ptr[ 17 ] ), FIR_Coefs[  6 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  7 ], buf_ptr[ 16 ] ), FIR_Coefs[  7 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  8 ], buf_ptr[ 15 ] ), FIR_Coefs[  8 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  9 ], buf_ptr[ 14 ] ), FIR_Coefs[  9 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 13 ] ), FIR_Coefs[ 10 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 12 ] ), FIR_Coefs[ 11 ] );
+
+                /* Scale down, saturate and store in output array */
+                *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+            }
+            break;
+        case RESAMPLER_DOWN_ORDER_FIR2:
+            for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+                /* Integer part gives pointer to buffered input */
+                buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+                /* Inner product */
+                res_Q6 = silk_SMULWB(         silk_ADD32( buf_ptr[  0 ], buf_ptr[ 35 ] ), FIR_Coefs[  0 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  1 ], buf_ptr[ 34 ] ), FIR_Coefs[  1 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  2 ], buf_ptr[ 33 ] ), FIR_Coefs[  2 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  3 ], buf_ptr[ 32 ] ), FIR_Coefs[  3 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  4 ], buf_ptr[ 31 ] ), FIR_Coefs[  4 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  5 ], buf_ptr[ 30 ] ), FIR_Coefs[  5 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  6 ], buf_ptr[ 29 ] ), FIR_Coefs[  6 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  7 ], buf_ptr[ 28 ] ), FIR_Coefs[  7 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  8 ], buf_ptr[ 27 ] ), FIR_Coefs[  8 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  9 ], buf_ptr[ 26 ] ), FIR_Coefs[  9 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 25 ] ), FIR_Coefs[ 10 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 24 ] ), FIR_Coefs[ 11 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 12 ], buf_ptr[ 23 ] ), FIR_Coefs[ 12 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 13 ], buf_ptr[ 22 ] ), FIR_Coefs[ 13 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 14 ], buf_ptr[ 21 ] ), FIR_Coefs[ 14 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 15 ], buf_ptr[ 20 ] ), FIR_Coefs[ 15 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 16 ], buf_ptr[ 19 ] ), FIR_Coefs[ 16 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 17 ], buf_ptr[ 18 ] ), FIR_Coefs[ 17 ] );
+
+                /* Scale down, saturate and store in output array */
+                *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+            }
+            break;
+        default:
+            silk_assert( 0 );
+    }
+    return out;
+}
+
+/* Resample with a 2nd order AR filter followed by FIR interpolation */
+void silk_resampler_private_down_FIR(
+    void                            *SS,            /* I/O  Resampler state             */
+    opus_int16                      out[],          /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    opus_int32                      inLen           /* I    Number of input samples     */
+)
+{
+    silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+    opus_int32 nSamplesIn;
+    opus_int32 max_index_Q16, index_increment_Q16;
+    VARDECL( opus_int32, buf );
+    const opus_int16 *FIR_Coefs;
+    SAVE_STACK;
+
+    ALLOC( buf, S->batchSize + S->FIR_Order, opus_int32 );
+
+    /* Copy buffered samples to start of buffer */
+    silk_memcpy( buf, S->sFIR.i32, S->FIR_Order * sizeof( opus_int32 ) );
+
+    FIR_Coefs = &S->Coefs[ 2 ];
+
+    /* Iterate over blocks of frameSizeIn input samples */
+    index_increment_Q16 = S->invRatio_Q16;
+    while( 1 ) {
+        nSamplesIn = silk_min( inLen, S->batchSize );
+
+        /* Second-order AR filter (output in Q8) */
+        silk_resampler_private_AR2( S->sIIR, &buf[ S->FIR_Order ], in, S->Coefs, nSamplesIn );
+
+        max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 );
+
+        /* Interpolate filtered signal */
+        out = silk_resampler_private_down_FIR_INTERPOL( out, buf, FIR_Coefs, S->FIR_Order,
+            S->FIR_Fracs, max_index_Q16, index_increment_Q16 );
+
+        in += nSamplesIn;
+        inLen -= nSamplesIn;
+
+        if( inLen > 1 ) {
+            /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+            silk_memcpy( buf, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) );
+        } else {
+            break;
+        }
+    }
+
+    /* Copy last part of filtered signal to the state for the next call */
+    silk_memcpy( S->sFIR.i32, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/resampler_private_up2_HQ.c b/src/main/jni/opus/silk/resampler_private_up2_HQ.c
new file mode 100644
index 000000000..c7ec8de36
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_private_up2_HQ.c
@@ -0,0 +1,113 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+
+/* Upsample by a factor 2, high quality */
+/* Uses 2nd order allpass filters for the 2x upsampling, followed by a      */
+/* notch filter just above Nyquist.                                         */
+void silk_resampler_private_up2_HQ(
+    opus_int32                      *S,             /* I/O  Resampler state [ 6 ]       */
+    opus_int16                      *out,           /* O    Output signal [ 2 * len ]   */
+    const opus_int16                *in,            /* I    Input signal [ len ]        */
+    opus_int32                      len             /* I    Number of input samples     */
+)
+{
+    opus_int32 k;
+    opus_int32 in32, out32_1, out32_2, Y, X;
+
+    silk_assert( silk_resampler_up2_hq_0[ 0 ] > 0 );
+    silk_assert( silk_resampler_up2_hq_0[ 1 ] > 0 );
+    silk_assert( silk_resampler_up2_hq_0[ 2 ] < 0 );
+    silk_assert( silk_resampler_up2_hq_1[ 0 ] > 0 );
+    silk_assert( silk_resampler_up2_hq_1[ 1 ] > 0 );
+    silk_assert( silk_resampler_up2_hq_1[ 2 ] < 0 );
+
+    /* Internal variables and state are in Q10 format */
+    for( k = 0; k < len; k++ ) {
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ k ], 10 );
+
+        /* First all-pass section for even output sample */
+        Y       = silk_SUB32( in32, S[ 0 ] );
+        X       = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 0 ] );
+        out32_1 = silk_ADD32( S[ 0 ], X );
+        S[ 0 ]  = silk_ADD32( in32, X );
+
+        /* Second all-pass section for even output sample */
+        Y       = silk_SUB32( out32_1, S[ 1 ] );
+        X       = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 1 ] );
+        out32_2 = silk_ADD32( S[ 1 ], X );
+        S[ 1 ]  = silk_ADD32( out32_1, X );
+
+        /* Third all-pass section for even output sample */
+        Y       = silk_SUB32( out32_2, S[ 2 ] );
+        X       = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_0[ 2 ] );
+        out32_1 = silk_ADD32( S[ 2 ], X );
+        S[ 2 ]  = silk_ADD32( out32_2, X );
+
+        /* Apply gain in Q15, convert back to int16 and store to output */
+        out[ 2 * k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) );
+
+        /* First all-pass section for odd output sample */
+        Y       = silk_SUB32( in32, S[ 3 ] );
+        X       = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 0 ] );
+        out32_1 = silk_ADD32( S[ 3 ], X );
+        S[ 3 ]  = silk_ADD32( in32, X );
+
+        /* Second all-pass section for odd output sample */
+        Y       = silk_SUB32( out32_1, S[ 4 ] );
+        X       = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 1 ] );
+        out32_2 = silk_ADD32( S[ 4 ], X );
+        S[ 4 ]  = silk_ADD32( out32_1, X );
+
+        /* Third all-pass section for odd output sample */
+        Y       = silk_SUB32( out32_2, S[ 5 ] );
+        X       = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_1[ 2 ] );
+        out32_1 = silk_ADD32( S[ 5 ], X );
+        S[ 5 ]  = silk_ADD32( out32_2, X );
+
+        /* Apply gain in Q15, convert back to int16 and store to output */
+        out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) );
+    }
+}
+
+void silk_resampler_private_up2_HQ_wrapper(
+    void                            *SS,            /* I/O  Resampler state (unused)    */
+    opus_int16                      *out,           /* O    Output signal [ 2 * len ]   */
+    const opus_int16                *in,            /* I    Input signal [ len ]        */
+    opus_int32                      len             /* I    Number of input samples     */
+)
+{
+    silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+    silk_resampler_private_up2_HQ( S->sIIR, out, in, len );
+}
diff --git a/src/main/jni/opus/silk/resampler_rom.c b/src/main/jni/opus/silk/resampler_rom.c
new file mode 100644
index 000000000..2d502706f
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_rom.c
@@ -0,0 +1,96 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Filter coefficients for IIR/FIR polyphase resampling     *
+ * Total size: 179 Words (358 Bytes)                        */
+
+#include "resampler_private.h"
+
+/* Matlab code for the notch filter coefficients: */
+/* B = [1, 0.147, 1];  A = [1, 0.107, 0.89]; G = 0.93; freqz(G * B, A, 2^14, 16e3); axis([0, 8000, -10, 1]) */
+/* fprintf('\t%6d, %6d, %6d, %6d\n', round(B(2)*2^16), round(-A(2)*2^16), round((1-A(3))*2^16), round(G*2^15)) */
+/* const opus_int16 silk_resampler_up2_hq_notch[ 4 ] = { 9634,  -7012,   7209,  30474 }; */
+
+/* Tables with IIR and FIR coefficients for fractional downsamplers (123 Words) */
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = {
+	-20694, -13867,
+	   -49,     64,     17,   -157,    353,   -496,    163,  11047,  22205,
+	   -39,      6,     91,   -170,    186,     23,   -896,   6336,  19928,
+	   -19,    -36,    102,    -89,    -24,    328,   -951,   2568,  15909,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = {
+	-14457, -14019,
+	    64,    128,   -122,     36,    310,   -768,    584,   9267,  17733,
+	    12,    128,     18,   -142,    288,   -117,   -865,   4123,  14459,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ] = {
+	   616, -14323,
+	   -10,     39,     58,    -46,    -84,    120,    184,   -315,   -541,   1284,   5380,   9024,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+	 16102, -15162,
+	   -13,      0,     20,     26,      5,    -31,    -43,     -4,     65,     90,      7,   -157,   -248,    -44,    593,   1583,   2612,   3271,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+	 22500, -15099,
+	     3,    -14,    -20,    -15,      2,     25,     37,     25,    -16,    -71,   -107,    -79,     50,    292,    623,    982,   1288,   1464,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+	 27540, -15257,
+	    17,     12,      8,      1,    -10,    -22,    -30,    -32,    -22,      3,     44,    100,    168,    243,    317,    381,    429,    455,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = {
+     -2797,  -6507,
+      4697,  10739,
+      1567,   8276,
+};
+
+/* Table with interplation fractions of 1/24, 3/24, 5/24, ... , 23/24 : 23/24 (46 Words) */
+silk_DWORD_ALIGN const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ] = {
+	{  189,  -600,   617, 30567 },
+	{  117,  -159, -1070, 29704 },
+	{   52,   221, -2392, 28276 },
+	{   -4,   529, -3350, 26341 },
+	{  -48,   758, -3956, 23973 },
+	{  -80,   905, -4235, 21254 },
+	{  -99,   972, -4222, 18278 },
+	{ -107,   967, -3957, 15143 },
+	{ -103,   896, -3487, 11950 },
+	{  -91,   773, -2865,  8798 },
+	{  -71,   611, -2143,  5784 },
+	{  -46,   425, -1375,  2996 },
+};
diff --git a/src/main/jni/opus/silk/resampler_rom.h b/src/main/jni/opus/silk/resampler_rom.h
new file mode 100644
index 000000000..490b3388d
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_rom.h
@@ -0,0 +1,68 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_FIX_RESAMPLER_ROM_H
+#define SILK_FIX_RESAMPLER_ROM_H
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+#include "typedef.h"
+#include "resampler_structs.h"
+
+#define RESAMPLER_DOWN_ORDER_FIR0               18
+#define RESAMPLER_DOWN_ORDER_FIR1               24
+#define RESAMPLER_DOWN_ORDER_FIR2               36
+#define RESAMPLER_ORDER_FIR_12                  8
+
+/* Tables for 2x downsampler */
+static const opus_int16 silk_resampler_down2_0 = 9872;
+static const opus_int16 silk_resampler_down2_1 = 39809 - 65536;
+
+/* Tables for 2x upsampler, high quality */
+static const opus_int16 silk_resampler_up2_hq_0[ 3 ] = { 1746, 14986, 39083 - 65536 };
+static const opus_int16 silk_resampler_up2_hq_1[ 3 ] = { 6854, 25769, 55542 - 65536 };
+
+/* Tables with IIR and FIR coefficients for fractional downsamplers */
+extern const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ];
+extern const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ];
+extern const opus_int16 silk_Resampler_1_2_COEFS[ 2 +     RESAMPLER_DOWN_ORDER_FIR1 / 2 ];
+extern const opus_int16 silk_Resampler_1_3_COEFS[ 2 +     RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_1_4_COEFS[ 2 +     RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_1_6_COEFS[ 2 +     RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ];
+
+/* Table with interplation fractions of 1/24, 3/24, ..., 23/24 */
+extern const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ];
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_FIX_RESAMPLER_ROM_H */
diff --git a/src/main/jni/opus/silk/resampler_structs.h b/src/main/jni/opus/silk/resampler_structs.h
new file mode 100644
index 000000000..9e9457d11
--- /dev/null
+++ b/src/main/jni/opus/silk/resampler_structs.h
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_RESAMPLER_STRUCTS_H
+#define SILK_RESAMPLER_STRUCTS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SILK_RESAMPLER_MAX_FIR_ORDER                 36
+#define SILK_RESAMPLER_MAX_IIR_ORDER                 6
+
+typedef struct _silk_resampler_state_struct{
+    opus_int32       sIIR[ SILK_RESAMPLER_MAX_IIR_ORDER ]; /* this must be the first element of this struct */
+    union{
+        opus_int32   i32[ SILK_RESAMPLER_MAX_FIR_ORDER ];
+        opus_int16   i16[ SILK_RESAMPLER_MAX_FIR_ORDER ];
+    }                sFIR;
+    opus_int16       delayBuf[ 48 ];
+    opus_int         resampler_function;
+    opus_int         batchSize;
+    opus_int32       invRatio_Q16;
+    opus_int         FIR_Order;
+    opus_int         FIR_Fracs;
+    opus_int         Fs_in_kHz;
+    opus_int         Fs_out_kHz;
+    opus_int         inputDelay;
+    const opus_int16 *Coefs;
+} silk_resampler_state_struct;
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* SILK_RESAMPLER_STRUCTS_H */
+
diff --git a/src/main/jni/opus/silk/shell_coder.c b/src/main/jni/opus/silk/shell_coder.c
new file mode 100644
index 000000000..796f57d6c
--- /dev/null
+++ b/src/main/jni/opus/silk/shell_coder.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* shell coder; pulse-subframe length is hardcoded */
+
+static OPUS_INLINE void combine_pulses(
+    opus_int         *out,   /* O    combined pulses vector [len] */
+    const opus_int   *in,    /* I    input vector       [2 * len] */
+    const opus_int   len     /* I    number of OUTPUT samples     */
+)
+{
+    opus_int k;
+    for( k = 0; k < len; k++ ) {
+        out[ k ] = in[ 2 * k ] + in[ 2 * k + 1 ];
+    }
+}
+
+static OPUS_INLINE void encode_split(
+    ec_enc                      *psRangeEnc,    /* I/O  compressor data structure                   */
+    const opus_int              p_child1,       /* I    pulse amplitude of first child subframe     */
+    const opus_int              p,              /* I    pulse amplitude of current subframe         */
+    const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
+)
+{
+    if( p > 0 ) {
+        ec_enc_icdf( psRangeEnc, p_child1, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 );
+    }
+}
+
+static OPUS_INLINE void decode_split(
+    opus_int                    *p_child1,      /* O    pulse amplitude of first child subframe     */
+    opus_int                    *p_child2,      /* O    pulse amplitude of second child subframe    */
+    ec_dec                      *psRangeDec,    /* I/O  Compressor data structure                   */
+    const opus_int              p,              /* I    pulse amplitude of current subframe         */
+    const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
+)
+{
+    if( p > 0 ) {
+        p_child1[ 0 ] = ec_dec_icdf( psRangeDec, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 );
+        p_child2[ 0 ] = p - p_child1[ 0 ];
+    } else {
+        p_child1[ 0 ] = 0;
+        p_child2[ 0 ] = 0;
+    }
+}
+
+/* Shell encoder, operates on one shell code frame of 16 pulses */
+void silk_shell_encoder(
+    ec_enc                      *psRangeEnc,                    /* I/O  compressor data structure                   */
+    const opus_int              *pulses0                        /* I    data: nonnegative pulse amplitudes          */
+)
+{
+    opus_int pulses1[ 8 ], pulses2[ 4 ], pulses3[ 2 ], pulses4[ 1 ];
+
+    /* this function operates on one shell code frame of 16 pulses */
+    silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
+
+    /* tree representation per pulse-subframe */
+    combine_pulses( pulses1, pulses0, 8 );
+    combine_pulses( pulses2, pulses1, 4 );
+    combine_pulses( pulses3, pulses2, 2 );
+    combine_pulses( pulses4, pulses3, 1 );
+
+    encode_split( psRangeEnc, pulses3[  0 ], pulses4[ 0 ], silk_shell_code_table3 );
+
+    encode_split( psRangeEnc, pulses2[  0 ], pulses3[ 0 ], silk_shell_code_table2 );
+
+    encode_split( psRangeEnc, pulses1[  0 ], pulses2[ 0 ], silk_shell_code_table1 );
+    encode_split( psRangeEnc, pulses0[  0 ], pulses1[ 0 ], silk_shell_code_table0 );
+    encode_split( psRangeEnc, pulses0[  2 ], pulses1[ 1 ], silk_shell_code_table0 );
+
+    encode_split( psRangeEnc, pulses1[  2 ], pulses2[ 1 ], silk_shell_code_table1 );
+    encode_split( psRangeEnc, pulses0[  4 ], pulses1[ 2 ], silk_shell_code_table0 );
+    encode_split( psRangeEnc, pulses0[  6 ], pulses1[ 3 ], silk_shell_code_table0 );
+
+    encode_split( psRangeEnc, pulses2[  2 ], pulses3[ 1 ], silk_shell_code_table2 );
+
+    encode_split( psRangeEnc, pulses1[  4 ], pulses2[ 2 ], silk_shell_code_table1 );
+    encode_split( psRangeEnc, pulses0[  8 ], pulses1[ 4 ], silk_shell_code_table0 );
+    encode_split( psRangeEnc, pulses0[ 10 ], pulses1[ 5 ], silk_shell_code_table0 );
+
+    encode_split( psRangeEnc, pulses1[  6 ], pulses2[ 3 ], silk_shell_code_table1 );
+    encode_split( psRangeEnc, pulses0[ 12 ], pulses1[ 6 ], silk_shell_code_table0 );
+    encode_split( psRangeEnc, pulses0[ 14 ], pulses1[ 7 ], silk_shell_code_table0 );
+}
+
+
+/* Shell decoder, operates on one shell code frame of 16 pulses */
+void silk_shell_decoder(
+    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
+)
+{
+    opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
+
+    /* this function operates on one shell code frame of 16 pulses */
+    silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
+
+    decode_split( &pulses3[  0 ], &pulses3[  1 ], psRangeDec, pulses4,      silk_shell_code_table3 );
+
+    decode_split( &pulses2[  0 ], &pulses2[  1 ], psRangeDec, pulses3[ 0 ], silk_shell_code_table2 );
+
+    decode_split( &pulses1[  0 ], &pulses1[  1 ], psRangeDec, pulses2[ 0 ], silk_shell_code_table1 );
+    decode_split( &pulses0[  0 ], &pulses0[  1 ], psRangeDec, pulses1[ 0 ], silk_shell_code_table0 );
+    decode_split( &pulses0[  2 ], &pulses0[  3 ], psRangeDec, pulses1[ 1 ], silk_shell_code_table0 );
+
+    decode_split( &pulses1[  2 ], &pulses1[  3 ], psRangeDec, pulses2[ 1 ], silk_shell_code_table1 );
+    decode_split( &pulses0[  4 ], &pulses0[  5 ], psRangeDec, pulses1[ 2 ], silk_shell_code_table0 );
+    decode_split( &pulses0[  6 ], &pulses0[  7 ], psRangeDec, pulses1[ 3 ], silk_shell_code_table0 );
+
+    decode_split( &pulses2[  2 ], &pulses2[  3 ], psRangeDec, pulses3[ 1 ], silk_shell_code_table2 );
+
+    decode_split( &pulses1[  4 ], &pulses1[  5 ], psRangeDec, pulses2[ 2 ], silk_shell_code_table1 );
+    decode_split( &pulses0[  8 ], &pulses0[  9 ], psRangeDec, pulses1[ 4 ], silk_shell_code_table0 );
+    decode_split( &pulses0[ 10 ], &pulses0[ 11 ], psRangeDec, pulses1[ 5 ], silk_shell_code_table0 );
+
+    decode_split( &pulses1[  6 ], &pulses1[  7 ], psRangeDec, pulses2[ 3 ], silk_shell_code_table1 );
+    decode_split( &pulses0[ 12 ], &pulses0[ 13 ], psRangeDec, pulses1[ 6 ], silk_shell_code_table0 );
+    decode_split( &pulses0[ 14 ], &pulses0[ 15 ], psRangeDec, pulses1[ 7 ], silk_shell_code_table0 );
+}
diff --git a/src/main/jni/opus/silk/sigm_Q15.c b/src/main/jni/opus/silk/sigm_Q15.c
new file mode 100644
index 000000000..3c507d255
--- /dev/null
+++ b/src/main/jni/opus/silk/sigm_Q15.c
@@ -0,0 +1,76 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Approximate sigmoid function */
+
+#include "SigProc_FIX.h"
+
+/* fprintf(1, '%d, ', round(1024 * ([1 ./ (1 + exp(-(1:5))), 1] - 1 ./ (1 + exp(-(0:5)))))); */
+static const opus_int32 sigm_LUT_slope_Q10[ 6 ] = {
+    237, 153, 73, 30, 12, 7
+};
+/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp(-(0:5))))); */
+static const opus_int32 sigm_LUT_pos_Q15[ 6 ] = {
+    16384, 23955, 28861, 31213, 32178, 32548
+};
+/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp((0:5))))); */
+static const opus_int32 sigm_LUT_neg_Q15[ 6 ] = {
+    16384, 8812, 3906, 1554, 589, 219
+};
+
+opus_int silk_sigm_Q15(
+    opus_int                    in_Q5               /* I                                                                */
+)
+{
+    opus_int ind;
+
+    if( in_Q5 < 0 ) {
+        /* Negative input */
+        in_Q5 = -in_Q5;
+        if( in_Q5 >= 6 * 32 ) {
+            return 0;        /* Clip */
+        } else {
+            /* Linear interpolation of look up table */
+            ind = silk_RSHIFT( in_Q5, 5 );
+            return( sigm_LUT_neg_Q15[ ind ] - silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) );
+        }
+    } else {
+        /* Positive input */
+        if( in_Q5 >= 6 * 32 ) {
+            return 32767;        /* clip */
+        } else {
+            /* Linear interpolation of look up table */
+            ind = silk_RSHIFT( in_Q5, 5 );
+            return( sigm_LUT_pos_Q15[ ind ] + silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) );
+        }
+    }
+}
+
diff --git a/src/main/jni/opus/silk/sort.c b/src/main/jni/opus/silk/sort.c
new file mode 100644
index 000000000..8670dbdd0
--- /dev/null
+++ b/src/main/jni/opus/silk/sort.c
@@ -0,0 +1,154 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Insertion sort (fast for already almost sorted arrays):   */
+/* Best case:  O(n)   for an already sorted array            */
+/* Worst case: O(n^2) for an inversely sorted array          */
+/*                                                           */
+/* Shell short:    http://en.wikipedia.org/wiki/Shell_sort   */
+
+#include "SigProc_FIX.h"
+
+void silk_insertion_sort_increasing(
+    opus_int32           *a,             /* I/O   Unsorted / Sorted vector               */
+    opus_int             *idx,           /* O     Index vector for the sorted elements   */
+    const opus_int       L,              /* I     Vector length                          */
+    const opus_int       K               /* I     Number of correctly sorted positions   */
+)
+{
+    opus_int32    value;
+    opus_int        i, j;
+
+    /* Safety checks */
+    silk_assert( K >  0 );
+    silk_assert( L >  0 );
+    silk_assert( L >= K );
+
+    /* Write start indices in index vector */
+    for( i = 0; i < K; i++ ) {
+        idx[ i ] = i;
+    }
+
+    /* Sort vector elements by value, increasing order */
+    for( i = 1; i < K; i++ ) {
+        value = a[ i ];
+        for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+            a[ j + 1 ]   = a[ j ];       /* Shift value */
+            idx[ j + 1 ] = idx[ j ];     /* Shift index */
+        }
+        a[ j + 1 ]   = value;   /* Write value */
+        idx[ j + 1 ] = i;       /* Write index */
+    }
+
+    /* If less than L values are asked for, check the remaining values, */
+    /* but only spend CPU to ensure that the K first values are correct */
+    for( i = K; i < L; i++ ) {
+        value = a[ i ];
+        if( value < a[ K - 1 ] ) {
+            for( j = K - 2; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+                a[ j + 1 ]   = a[ j ];       /* Shift value */
+                idx[ j + 1 ] = idx[ j ];     /* Shift index */
+            }
+            a[ j + 1 ]   = value;   /* Write value */
+            idx[ j + 1 ] = i;       /* Write index */
+        }
+    }
+}
+
+#ifdef FIXED_POINT
+/* This function is only used by the fixed-point build */
+void silk_insertion_sort_decreasing_int16(
+    opus_int16                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
+    opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
+    const opus_int              L,                  /* I     Vector length                                              */
+    const opus_int              K                   /* I     Number of correctly sorted positions                       */
+)
+{
+    opus_int i, j;
+    opus_int value;
+
+    /* Safety checks */
+    silk_assert( K >  0 );
+    silk_assert( L >  0 );
+    silk_assert( L >= K );
+
+    /* Write start indices in index vector */
+    for( i = 0; i < K; i++ ) {
+        idx[ i ] = i;
+    }
+
+    /* Sort vector elements by value, decreasing order */
+    for( i = 1; i < K; i++ ) {
+        value = a[ i ];
+        for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+            a[ j + 1 ]   = a[ j ];     /* Shift value */
+            idx[ j + 1 ] = idx[ j ];   /* Shift index */
+        }
+        a[ j + 1 ]   = value;   /* Write value */
+        idx[ j + 1 ] = i;       /* Write index */
+    }
+
+    /* If less than L values are asked for, check the remaining values, */
+    /* but only spend CPU to ensure that the K first values are correct */
+    for( i = K; i < L; i++ ) {
+        value = a[ i ];
+        if( value > a[ K - 1 ] ) {
+            for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+                a[ j + 1 ]   = a[ j ];     /* Shift value */
+                idx[ j + 1 ] = idx[ j ];   /* Shift index */
+            }
+            a[ j + 1 ]   = value;   /* Write value */
+            idx[ j + 1 ] = i;       /* Write index */
+        }
+    }
+}
+#endif
+
+void silk_insertion_sort_increasing_all_values_int16(
+     opus_int16                 *a,                 /* I/O   Unsorted / Sorted vector                                   */
+     const opus_int             L                   /* I     Vector length                                              */
+)
+{
+    opus_int    value;
+    opus_int    i, j;
+
+    /* Safety checks */
+    silk_assert( L >  0 );
+
+    /* Sort vector elements by value, increasing order */
+    for( i = 1; i < L; i++ ) {
+        value = a[ i ];
+        for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+            a[ j + 1 ] = a[ j ]; /* Shift value */
+        }
+        a[ j + 1 ] = value; /* Write value */
+    }
+}
diff --git a/src/main/jni/opus/silk/stereo_LR_to_MS.c b/src/main/jni/opus/silk/stereo_LR_to_MS.c
new file mode 100644
index 000000000..42906e6f6
--- /dev/null
+++ b/src/main/jni/opus/silk/stereo_LR_to_MS.c
@@ -0,0 +1,229 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+#include "stack_alloc.h"
+
+/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
+void silk_stereo_LR_to_MS(
+    stereo_enc_state            *state,                         /* I/O  State                                       */
+    opus_int16                  x1[],                           /* I/O  Left input signal, becomes mid signal       */
+    opus_int16                  x2[],                           /* I/O  Right input signal, becomes side signal     */
+    opus_int8                   ix[ 2 ][ 3 ],                   /* O    Quantization indices                        */
+    opus_int8                   *mid_only_flag,                 /* O    Flag: only mid signal coded                 */
+    opus_int32                  mid_side_rates_bps[],           /* O    Bitrates for mid and side signals           */
+    opus_int32                  total_rate_bps,                 /* I    Total bitrate                               */
+    opus_int                    prev_speech_act_Q8,             /* I    Speech activity level in previous frame     */
+    opus_int                    toMono,                         /* I    Last frame before a stereo->mono transition */
+    opus_int                    fs_kHz,                         /* I    Sample rate (kHz)                           */
+    opus_int                    frame_length                    /* I    Number of samples                           */
+)
+{
+    opus_int   n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;
+    opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13;
+    opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24;
+    VARDECL( opus_int16, side );
+    VARDECL( opus_int16, LP_mid );
+    VARDECL( opus_int16, HP_mid );
+    VARDECL( opus_int16, LP_side );
+    VARDECL( opus_int16, HP_side );
+    opus_int16 *mid = &x1[ -2 ];
+    SAVE_STACK;
+
+    ALLOC( side, frame_length + 2, opus_int16 );
+    /* Convert to basic mid/side signals */
+    for( n = 0; n < frame_length + 2; n++ ) {
+        sum  = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ];
+        diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ];
+        mid[  n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
+        side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) );
+    }
+
+    /* Buffering */
+    silk_memcpy( mid,  state->sMid,  2 * sizeof( opus_int16 ) );
+    silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) );
+    silk_memcpy( state->sMid,  &mid[  frame_length ], 2 * sizeof( opus_int16 ) );
+    silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );
+
+    /* LP and HP filter mid signal */
+    ALLOC( LP_mid, frame_length, opus_int16 );
+    ALLOC( HP_mid, frame_length, opus_int16 );
+    for( n = 0; n < frame_length; n++ ) {
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
+        LP_mid[ n ] = sum;
+        HP_mid[ n ] = mid[ n + 1 ] - sum;
+    }
+
+    /* LP and HP filter side signal */
+    ALLOC( LP_side, frame_length, opus_int16 );
+    ALLOC( HP_side, frame_length, opus_int16 );
+    for( n = 0; n < frame_length; n++ ) {
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
+        LP_side[ n ] = sum;
+        HP_side[ n ] = side[ n + 1 ] - sum;
+    }
+
+    /* Find energies and predictors */
+    is10msFrame = frame_length == 10 * fs_kHz;
+    smooth_coef_Q16 = is10msFrame ?
+        SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :
+        SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF,     16 );
+    smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 );
+
+    pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 );
+    pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 );
+    /* Ratio of the norms of residual and mid signals */
+    frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 );
+    frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) );
+
+    /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
+    total_rate_bps -= is10msFrame ? 1200 : 600;      /* Subtract approximate bitrate for coding stereo parameters */
+    if( total_rate_bps < 1 ) {
+        total_rate_bps = 1;
+    }
+    min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 );
+    silk_assert( min_mid_rate_bps < 32767 );
+    /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */
+    frac_3_Q16 = silk_MUL( 3, frac_Q16 );
+    mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 );
+    /* If Mid bitrate below minimum, reduce stereo width */
+    if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) {
+        mid_side_rates_bps[ 0 ] = min_mid_rate_bps;
+        mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
+        /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */
+        width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps,
+            silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 );
+        width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) );
+    } else {
+        mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
+        width_Q14 = SILK_FIX_CONST( 1, 14 );
+    }
+
+    /* Smoother */
+    state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 );
+
+    /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */
+    *mid_only_flag = 0;
+    if( toMono ) {
+        /* Last frame before stereo->mono transition; collapse stereo width */
+        width_Q14 = 0;
+        pred_Q13[ 0 ] = 0;
+        pred_Q13[ 1 ] = 0;
+        silk_stereo_quant_pred( pred_Q13, ix );
+    } else if( state->width_prev_Q14 == 0 &&
+        ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) )
+    {
+        /* Code as panned-mono; previous frame already had zero width */
+        /* Scale down and quantize predictors */
+        pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+        pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+        silk_stereo_quant_pred( pred_Q13, ix );
+        /* Collapse stereo width */
+        width_Q14 = 0;
+        pred_Q13[ 0 ] = 0;
+        pred_Q13[ 1 ] = 0;
+        mid_side_rates_bps[ 0 ] = total_rate_bps;
+        mid_side_rates_bps[ 1 ] = 0;
+        *mid_only_flag = 1;
+    } else if( state->width_prev_Q14 != 0 &&
+        ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) )
+    {
+        /* Transition to zero-width stereo */
+        /* Scale down and quantize predictors */
+        pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+        pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+        silk_stereo_quant_pred( pred_Q13, ix );
+        /* Collapse stereo width */
+        width_Q14 = 0;
+        pred_Q13[ 0 ] = 0;
+        pred_Q13[ 1 ] = 0;
+    } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) {
+        /* Full-width stereo coding */
+        silk_stereo_quant_pred( pred_Q13, ix );
+        width_Q14 = SILK_FIX_CONST( 1, 14 );
+    } else {
+        /* Reduced-width stereo coding; scale down and quantize predictors */
+        pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+        pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+        silk_stereo_quant_pred( pred_Q13, ix );
+        width_Q14 = state->smth_width_Q14;
+    }
+
+    /* Make sure to keep on encoding until the tapered output has been transmitted */
+    if( *mid_only_flag == 1 ) {
+        state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz;
+        if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) {
+            *mid_only_flag = 0;
+        } else {
+            /* Limit to avoid wrapping around */
+            state->silent_side_len = 10000;
+        }
+    } else {
+        state->silent_side_len = 0;
+    }
+
+    if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) {
+        mid_side_rates_bps[ 1 ] = 1;
+        mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]);
+    }
+
+    /* Interpolate predictors and subtract prediction from side channel */
+    pred0_Q13  = -state->pred_prev_Q13[ 0 ];
+    pred1_Q13  = -state->pred_prev_Q13[ 1 ];
+    w_Q24      =  silk_LSHIFT( state->width_prev_Q14, 10 );
+    denom_Q16  = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
+    delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
+    delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
+    deltaw_Q24 =  silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 );
+    for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
+        pred0_Q13 += delta0_Q13;
+        pred1_Q13 += delta1_Q13;
+        w_Q24   += deltaw_Q24;
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
+        sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
+        x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+    }
+
+    pred0_Q13 = -pred_Q13[ 0 ];
+    pred1_Q13 = -pred_Q13[ 1 ];
+    w_Q24     =  silk_LSHIFT( width_Q14, 10 );
+    for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
+        sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
+        x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+    }
+    state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];
+    state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];
+    state->width_prev_Q14     = (opus_int16)width_Q14;
+    RESTORE_STACK;
+}
diff --git a/src/main/jni/opus/silk/stereo_MS_to_LR.c b/src/main/jni/opus/silk/stereo_MS_to_LR.c
new file mode 100644
index 000000000..62521a4f3
--- /dev/null
+++ b/src/main/jni/opus/silk/stereo_MS_to_LR.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Convert adaptive Mid/Side representation to Left/Right stereo signal */
+void silk_stereo_MS_to_LR(
+    stereo_dec_state            *state,                         /* I/O  State                                       */
+    opus_int16                  x1[],                           /* I/O  Left input signal, becomes mid signal       */
+    opus_int16                  x2[],                           /* I/O  Right input signal, becomes side signal     */
+    const opus_int32            pred_Q13[],                     /* I    Predictors                                  */
+    opus_int                    fs_kHz,                         /* I    Samples rate (kHz)                          */
+    opus_int                    frame_length                    /* I    Number of samples                           */
+)
+{
+    opus_int   n, denom_Q16, delta0_Q13, delta1_Q13;
+    opus_int32 sum, diff, pred0_Q13, pred1_Q13;
+
+    /* Buffering */
+    silk_memcpy( x1, state->sMid,  2 * sizeof( opus_int16 ) );
+    silk_memcpy( x2, state->sSide, 2 * sizeof( opus_int16 ) );
+    silk_memcpy( state->sMid,  &x1[ frame_length ], 2 * sizeof( opus_int16 ) );
+    silk_memcpy( state->sSide, &x2[ frame_length ], 2 * sizeof( opus_int16 ) );
+
+    /* Interpolate predictors and add prediction to side channel */
+    pred0_Q13  = state->pred_prev_Q13[ 0 ];
+    pred1_Q13  = state->pred_prev_Q13[ 1 ];
+    denom_Q16  = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
+    delta0_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
+    delta1_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
+    for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
+        pred0_Q13 += delta0_Q13;
+        pred1_Q13 += delta1_Q13;
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 );       /* Q11 */
+        sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 );         /* Q8  */
+        sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 );        /* Q8  */
+        x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+    }
+    pred0_Q13 = pred_Q13[ 0 ];
+    pred1_Q13 = pred_Q13[ 1 ];
+    for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 );       /* Q11 */
+        sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 );         /* Q8  */
+        sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 );        /* Q8  */
+        x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+    }
+    state->pred_prev_Q13[ 0 ] = pred_Q13[ 0 ];
+    state->pred_prev_Q13[ 1 ] = pred_Q13[ 1 ];
+
+    /* Convert to left/right signals */
+    for( n = 0; n < frame_length; n++ ) {
+        sum  = x1[ n + 1 ] + (opus_int32)x2[ n + 1 ];
+        diff = x1[ n + 1 ] - (opus_int32)x2[ n + 1 ];
+        x1[ n + 1 ] = (opus_int16)silk_SAT16( sum );
+        x2[ n + 1 ] = (opus_int16)silk_SAT16( diff );
+    }
+}
diff --git a/src/main/jni/opus/silk/stereo_decode_pred.c b/src/main/jni/opus/silk/stereo_decode_pred.c
new file mode 100644
index 000000000..56ba3925e
--- /dev/null
+++ b/src/main/jni/opus/silk/stereo_decode_pred.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Decode mid/side predictors */
+void silk_stereo_decode_pred(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int32                  pred_Q13[]                      /* O    Predictors                                  */
+)
+{
+    opus_int   n, ix[ 2 ][ 3 ];
+    opus_int32 low_Q13, step_Q13;
+
+    /* Entropy decoding */
+    n = ec_dec_icdf( psRangeDec, silk_stereo_pred_joint_iCDF, 8 );
+    ix[ 0 ][ 2 ] = silk_DIV32_16( n, 5 );
+    ix[ 1 ][ 2 ] = n - 5 * ix[ 0 ][ 2 ];
+    for( n = 0; n < 2; n++ ) {
+        ix[ n ][ 0 ] = ec_dec_icdf( psRangeDec, silk_uniform3_iCDF, 8 );
+        ix[ n ][ 1 ] = ec_dec_icdf( psRangeDec, silk_uniform5_iCDF, 8 );
+    }
+
+    /* Dequantize */
+    for( n = 0; n < 2; n++ ) {
+        ix[ n ][ 0 ] += 3 * ix[ n ][ 2 ];
+        low_Q13 = silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] ];
+        step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] + 1 ] - low_Q13,
+            SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) );
+        pred_Q13[ n ] = silk_SMLABB( low_Q13, step_Q13, 2 * ix[ n ][ 1 ] + 1 );
+    }
+
+    /* Subtract second from first predictor (helps when actually applying these) */
+    pred_Q13[ 0 ] -= pred_Q13[ 1 ];
+}
+
+/* Decode mid-only flag */
+void silk_stereo_decode_mid_only(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    *decode_only_mid                /* O    Flag that only mid channel has been coded   */
+)
+{
+    /* Decode flag that only mid channel is coded */
+    *decode_only_mid = ec_dec_icdf( psRangeDec, silk_stereo_only_code_mid_iCDF, 8 );
+}
diff --git a/src/main/jni/opus/silk/stereo_encode_pred.c b/src/main/jni/opus/silk/stereo_encode_pred.c
new file mode 100644
index 000000000..e6dd19506
--- /dev/null
+++ b/src/main/jni/opus/silk/stereo_encode_pred.c
@@ -0,0 +1,62 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Entropy code the mid/side quantization indices */
+void silk_stereo_encode_pred(
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int8                   ix[ 2 ][ 3 ]                    /* I    Quantization indices                        */
+)
+{
+    opus_int   n;
+
+    /* Entropy coding */
+    n = 5 * ix[ 0 ][ 2 ] + ix[ 1 ][ 2 ];
+    silk_assert( n < 25 );
+    ec_enc_icdf( psRangeEnc, n, silk_stereo_pred_joint_iCDF, 8 );
+    for( n = 0; n < 2; n++ ) {
+        silk_assert( ix[ n ][ 0 ] < 3 );
+        silk_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS );
+        ec_enc_icdf( psRangeEnc, ix[ n ][ 0 ], silk_uniform3_iCDF, 8 );
+        ec_enc_icdf( psRangeEnc, ix[ n ][ 1 ], silk_uniform5_iCDF, 8 );
+    }
+}
+
+/* Entropy code the mid-only flag */
+void silk_stereo_encode_mid_only(
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int8                   mid_only_flag
+)
+{
+    /* Encode flag that only mid channel is coded */
+    ec_enc_icdf( psRangeEnc, mid_only_flag, silk_stereo_only_code_mid_iCDF, 8 );
+}
diff --git a/src/main/jni/opus/silk/stereo_find_predictor.c b/src/main/jni/opus/silk/stereo_find_predictor.c
new file mode 100644
index 000000000..e30e90bdd
--- /dev/null
+++ b/src/main/jni/opus/silk/stereo_find_predictor.c
@@ -0,0 +1,79 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Find least-squares prediction gain for one signal based on another and quantize it */
+opus_int32 silk_stereo_find_predictor(                          /* O    Returns predictor in Q13                    */
+    opus_int32                  *ratio_Q14,                     /* O    Ratio of residual and mid energies          */
+    const opus_int16            x[],                            /* I    Basis signal                                */
+    const opus_int16            y[],                            /* I    Target signal                               */
+    opus_int32                  mid_res_amp_Q0[],               /* I/O  Smoothed mid, residual norms                */
+    opus_int                    length,                         /* I    Number of samples                           */
+    opus_int                    smooth_coef_Q16                 /* I    Smoothing coefficient                       */
+)
+{
+    opus_int   scale, scale1, scale2;
+    opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10;
+
+    /* Find  predictor */
+    silk_sum_sqr_shift( &nrgx, &scale1, x, length );
+    silk_sum_sqr_shift( &nrgy, &scale2, y, length );
+    scale = silk_max_int( scale1, scale2 );
+    scale = scale + ( scale & 1 );          /* make even */
+    nrgy = silk_RSHIFT32( nrgy, scale - scale2 );
+    nrgx = silk_RSHIFT32( nrgx, scale - scale1 );
+    nrgx = silk_max_int( nrgx, 1 );
+    corr = silk_inner_prod_aligned_scale( x, y, scale, length );
+    pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 );
+    pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 );
+    pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 );
+
+    /* Faster update for signals with large prediction parameters */
+    smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) );
+
+    /* Smoothed mid and residual norms */
+    silk_assert( smooth_coef_Q16 < 32768 );
+    scale = silk_RSHIFT( scale, 1 );
+    mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ],
+        smooth_coef_Q16 );
+    /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */
+    nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 );
+    nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 );
+    mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ],
+        smooth_coef_Q16 );
+
+    /* Ratio of smoothed residual and mid norms */
+    *ratio_Q14 = silk_DIV32_varQ( mid_res_amp_Q0[ 1 ], silk_max( mid_res_amp_Q0[ 0 ], 1 ), 14 );
+    *ratio_Q14 = silk_LIMIT( *ratio_Q14, 0, 32767 );
+
+    return pred_Q13;
+}
diff --git a/src/main/jni/opus/silk/stereo_quant_pred.c b/src/main/jni/opus/silk/stereo_quant_pred.c
new file mode 100644
index 000000000..d4ced6c3e
--- /dev/null
+++ b/src/main/jni/opus/silk/stereo_quant_pred.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "main.h"
+
+/* Quantize mid/side predictors */
+void silk_stereo_quant_pred(
+    opus_int32                  pred_Q13[],                     /* I/O  Predictors (out: quantized)                 */
+    opus_int8                   ix[ 2 ][ 3 ]                    /* O    Quantization indices                        */
+)
+{
+    opus_int   i, j, n;
+    opus_int32 low_Q13, step_Q13, lvl_Q13, err_min_Q13, err_Q13, quant_pred_Q13 = 0;
+
+    /* Quantize */
+    for( n = 0; n < 2; n++ ) {
+        /* Brute-force search over quantization levels */
+        err_min_Q13 = silk_int32_MAX;
+        for( i = 0; i < STEREO_QUANT_TAB_SIZE - 1; i++ ) {
+            low_Q13 = silk_stereo_pred_quant_Q13[ i ];
+            step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ i + 1 ] - low_Q13,
+                SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) );
+            for( j = 0; j < STEREO_QUANT_SUB_STEPS; j++ ) {
+                lvl_Q13 = silk_SMLABB( low_Q13, step_Q13, 2 * j + 1 );
+                err_Q13 = silk_abs( pred_Q13[ n ] - lvl_Q13 );
+                if( err_Q13 < err_min_Q13 ) {
+                    err_min_Q13 = err_Q13;
+                    quant_pred_Q13 = lvl_Q13;
+                    ix[ n ][ 0 ] = i;
+                    ix[ n ][ 1 ] = j;
+                } else {
+                    /* Error increasing, so we're past the optimum */
+                    goto done;
+                }
+            }
+        }
+        done:
+        ix[ n ][ 2 ]  = silk_DIV32_16( ix[ n ][ 0 ], 3 );
+        ix[ n ][ 0 ] -= ix[ n ][ 2 ] * 3;
+        pred_Q13[ n ] = quant_pred_Q13;
+    }
+
+    /* Subtract second from first predictor (helps when actually applying these) */
+    pred_Q13[ 0 ] -= pred_Q13[ 1 ];
+}
diff --git a/src/main/jni/opus/silk/structs.h b/src/main/jni/opus/silk/structs.h
new file mode 100644
index 000000000..1826b36a8
--- /dev/null
+++ b/src/main/jni/opus/silk/structs.h
@@ -0,0 +1,327 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_H
+#define SILK_STRUCTS_H
+
+#include "typedef.h"
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/************************************/
+/* Noise shaping quantization state */
+/************************************/
+typedef struct {
+    opus_int16                  xq[           2 * MAX_FRAME_LENGTH ]; /* Buffer for quantized output signal                             */
+    opus_int32                  sLTP_shp_Q14[ 2 * MAX_FRAME_LENGTH ];
+    opus_int32                  sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
+    opus_int32                  sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32                  sLF_AR_shp_Q14;
+    opus_int                    lagPrev;
+    opus_int                    sLTP_buf_idx;
+    opus_int                    sLTP_shp_buf_idx;
+    opus_int32                  rand_seed;
+    opus_int32                  prev_gain_Q16;
+    opus_int                    rewhite_flag;
+} silk_nsq_state;
+
+/********************************/
+/* VAD state                    */
+/********************************/
+typedef struct {
+    opus_int32                  AnaState[ 2 ];                  /* Analysis filterbank state: 0-8 kHz                                   */
+    opus_int32                  AnaState1[ 2 ];                 /* Analysis filterbank state: 0-4 kHz                                   */
+    opus_int32                  AnaState2[ 2 ];                 /* Analysis filterbank state: 0-2 kHz                                   */
+    opus_int32                  XnrgSubfr[ VAD_N_BANDS ];       /* Subframe energies                                                    */
+    opus_int32                  NrgRatioSmth_Q8[ VAD_N_BANDS ]; /* Smoothed energy level in each band                                   */
+    opus_int16                  HPstate;                        /* State of differentiator in the lowest band                           */
+    opus_int32                  NL[ VAD_N_BANDS ];              /* Noise energy level in each band                                      */
+    opus_int32                  inv_NL[ VAD_N_BANDS ];          /* Inverse noise energy level in each band                              */
+    opus_int32                  NoiseLevelBias[ VAD_N_BANDS ];  /* Noise level estimator bias/offset                                    */
+    opus_int32                  counter;                        /* Frame counter used in the initial phase                              */
+} silk_VAD_state;
+
+/* Variable cut-off low-pass filter state */
+typedef struct {
+    opus_int32                   In_LP_State[ 2 ];           /* Low pass filter state */
+    opus_int32                   transition_frame_no;        /* Counter which is mapped to a cut-off frequency */
+    opus_int                     mode;                       /* Operating mode, <0: switch down, >0: switch up; 0: do nothing           */
+} silk_LP_state;
+
+/* Structure containing NLSF codebook */
+typedef struct {
+    const opus_int16             nVectors;
+    const opus_int16             order;
+    const opus_int16             quantStepSize_Q16;
+    const opus_int16             invQuantStepSize_Q6;
+    const opus_uint8             *CB1_NLSF_Q8;
+    const opus_uint8             *CB1_iCDF;
+    const opus_uint8             *pred_Q8;
+    const opus_uint8             *ec_sel;
+    const opus_uint8             *ec_iCDF;
+    const opus_uint8             *ec_Rates_Q5;
+    const opus_int16             *deltaMin_Q15;
+} silk_NLSF_CB_struct;
+
+typedef struct {
+    opus_int16                   pred_prev_Q13[ 2 ];
+    opus_int16                   sMid[ 2 ];
+    opus_int16                   sSide[ 2 ];
+    opus_int32                   mid_side_amp_Q0[ 4 ];
+    opus_int16                   smth_width_Q14;
+    opus_int16                   width_prev_Q14;
+    opus_int16                   silent_side_len;
+    opus_int8                    predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ];
+    opus_int8                    mid_only_flags[ MAX_FRAMES_PER_PACKET ];
+} stereo_enc_state;
+
+typedef struct {
+    opus_int16                   pred_prev_Q13[ 2 ];
+    opus_int16                   sMid[ 2 ];
+    opus_int16                   sSide[ 2 ];
+} stereo_dec_state;
+
+typedef struct {
+    opus_int8                    GainsIndices[ MAX_NB_SUBFR ];
+    opus_int8                    LTPIndex[ MAX_NB_SUBFR ];
+    opus_int8                    NLSFIndices[ MAX_LPC_ORDER + 1 ];
+    opus_int16                   lagIndex;
+    opus_int8                    contourIndex;
+    opus_int8                    signalType;
+    opus_int8                    quantOffsetType;
+    opus_int8                    NLSFInterpCoef_Q2;
+    opus_int8                    PERIndex;
+    opus_int8                    LTP_scaleIndex;
+    opus_int8                    Seed;
+} SideInfoIndices;
+
+/********************************/
+/* Encoder state                */
+/********************************/
+typedef struct {
+    opus_int32                   In_HP_State[ 2 ];                  /* High pass filter state                                           */
+    opus_int32                   variable_HP_smth1_Q15;             /* State of first smoother                                          */
+    opus_int32                   variable_HP_smth2_Q15;             /* State of second smoother                                         */
+    silk_LP_state                sLP;                               /* Low pass filter state                                            */
+    silk_VAD_state               sVAD;                              /* Voice activity detector state                                    */
+    silk_nsq_state               sNSQ;                              /* Noise Shape Quantizer State                                      */
+    opus_int16                   prev_NLSFq_Q15[ MAX_LPC_ORDER ];   /* Previously quantized NLSF vector                                 */
+    opus_int                     speech_activity_Q8;                /* Speech activity                                                  */
+    opus_int                     allow_bandwidth_switch;            /* Flag indicating that switching of internal bandwidth is allowed  */
+    opus_int8                    LBRRprevLastGainIndex;
+    opus_int8                    prevSignalType;
+    opus_int                     prevLag;
+    opus_int                     pitch_LPC_win_length;
+    opus_int                     max_pitch_lag;                     /* Highest possible pitch lag (samples)                             */
+    opus_int32                   API_fs_Hz;                         /* API sampling frequency (Hz)                                      */
+    opus_int32                   prev_API_fs_Hz;                    /* Previous API sampling frequency (Hz)                             */
+    opus_int                     maxInternal_fs_Hz;                 /* Maximum internal sampling frequency (Hz)                         */
+    opus_int                     minInternal_fs_Hz;                 /* Minimum internal sampling frequency (Hz)                         */
+    opus_int                     desiredInternal_fs_Hz;             /* Soft request for internal sampling frequency (Hz)                */
+    opus_int                     fs_kHz;                            /* Internal sampling frequency (kHz)                                */
+    opus_int                     nb_subfr;                          /* Number of 5 ms subframes in a frame                              */
+    opus_int                     frame_length;                      /* Frame length (samples)                                           */
+    opus_int                     subfr_length;                      /* Subframe length (samples)                                        */
+    opus_int                     ltp_mem_length;                    /* Length of LTP memory                                             */
+    opus_int                     la_pitch;                          /* Look-ahead for pitch analysis (samples)                          */
+    opus_int                     la_shape;                          /* Look-ahead for noise shape analysis (samples)                    */
+    opus_int                     shapeWinLength;                    /* Window length for noise shape analysis (samples)                 */
+    opus_int32                   TargetRate_bps;                    /* Target bitrate (bps)                                             */
+    opus_int                     PacketSize_ms;                     /* Number of milliseconds to put in each packet                     */
+    opus_int                     PacketLoss_perc;                   /* Packet loss rate measured by farend                              */
+    opus_int32                   frameCounter;
+    opus_int                     Complexity;                        /* Complexity setting                                               */
+    opus_int                     nStatesDelayedDecision;            /* Number of states in delayed decision quantization                */
+    opus_int                     useInterpolatedNLSFs;              /* Flag for using NLSF interpolation                                */
+    opus_int                     shapingLPCOrder;                   /* Filter order for noise shaping filters                           */
+    opus_int                     predictLPCOrder;                   /* Filter order for prediction filters                              */
+    opus_int                     pitchEstimationComplexity;         /* Complexity level for pitch estimator                             */
+    opus_int                     pitchEstimationLPCOrder;           /* Whitening filter order for pitch estimator                       */
+    opus_int32                   pitchEstimationThreshold_Q16;      /* Threshold for pitch estimator                                    */
+    opus_int                     LTPQuantLowComplexity;             /* Flag for low complexity LTP quantization                         */
+    opus_int                     mu_LTP_Q9;                         /* Rate-distortion tradeoff in LTP quantization                     */
+    opus_int32                   sum_log_gain_Q7;					/* Cumulative max prediction gain									*/
+    opus_int                     NLSF_MSVQ_Survivors;               /* Number of survivors in NLSF MSVQ                                 */
+    opus_int                     first_frame_after_reset;           /* Flag for deactivating NLSF interpolation, pitch prediction       */
+    opus_int                     controlled_since_last_payload;     /* Flag for ensuring codec_control only runs once per packet        */
+    opus_int                     warping_Q16;                       /* Warping parameter for warped noise shaping                       */
+    opus_int                     useCBR;                            /* Flag to enable constant bitrate                                  */
+    opus_int                     prefillFlag;                       /* Flag to indicate that only buffers are prefilled, no coding      */
+    const opus_uint8             *pitch_lag_low_bits_iCDF;          /* Pointer to iCDF table for low bits of pitch lag index            */
+    const opus_uint8             *pitch_contour_iCDF;               /* Pointer to iCDF table for pitch contour index                    */
+    const silk_NLSF_CB_struct    *psNLSF_CB;                        /* Pointer to NLSF codebook                                         */
+    opus_int                     input_quality_bands_Q15[ VAD_N_BANDS ];
+    opus_int                     input_tilt_Q15;
+    opus_int                     SNR_dB_Q7;                         /* Quality setting                                                  */
+
+    opus_int8                    VAD_flags[ MAX_FRAMES_PER_PACKET ];
+    opus_int8                    LBRR_flag;
+    opus_int                     LBRR_flags[ MAX_FRAMES_PER_PACKET ];
+
+    SideInfoIndices              indices;
+    opus_int8                    pulses[ MAX_FRAME_LENGTH ];
+
+    int                          arch;
+
+    /* Input/output buffering */
+    opus_int16                   inputBuf[ MAX_FRAME_LENGTH + 2 ];  /* Buffer containing input signal                                   */
+    opus_int                     inputBufIx;
+    opus_int                     nFramesPerPacket;
+    opus_int                     nFramesEncoded;                    /* Number of frames analyzed in current packet                      */
+
+    opus_int                     nChannelsAPI;
+    opus_int                     nChannelsInternal;
+    opus_int                     channelNb;
+
+    /* Parameters For LTP scaling Control */
+    opus_int                     frames_since_onset;
+
+    /* Specifically for entropy coding */
+    opus_int                     ec_prevSignalType;
+    opus_int16                   ec_prevLagIndex;
+
+    silk_resampler_state_struct resampler_state;
+
+    /* DTX */
+    opus_int                     useDTX;                            /* Flag to enable DTX                                               */
+    opus_int                     inDTX;                             /* Flag to signal DTX period                                        */
+    opus_int                     noSpeechCounter;                   /* Counts concecutive nonactive frames, used by DTX                 */
+
+    /* Inband Low Bitrate Redundancy (LBRR) data */
+    opus_int                     useInBandFEC;                      /* Saves the API setting for query                                  */
+    opus_int                     LBRR_enabled;                      /* Depends on useInBandFRC, bitrate and packet loss rate            */
+    opus_int                     LBRR_GainIncreases;                /* Gains increment for coding LBRR frames                           */
+    SideInfoIndices              indices_LBRR[ MAX_FRAMES_PER_PACKET ];
+    opus_int8                    pulses_LBRR[ MAX_FRAMES_PER_PACKET ][ MAX_FRAME_LENGTH ];
+} silk_encoder_state;
+
+
+/* Struct for Packet Loss Concealment */
+typedef struct {
+    opus_int32                  pitchL_Q8;                          /* Pitch lag to use for voiced concealment                          */
+    opus_int16                  LTPCoef_Q14[ LTP_ORDER ];           /* LTP coeficients to use for voiced concealment                    */
+    opus_int16                  prevLPC_Q12[ MAX_LPC_ORDER ];
+    opus_int                    last_frame_lost;                    /* Was previous frame lost                                          */
+    opus_int32                  rand_seed;                          /* Seed for unvoiced signal generation                              */
+    opus_int16                  randScale_Q14;                      /* Scaling of unvoiced random signal                                */
+    opus_int32                  conc_energy;
+    opus_int                    conc_energy_shift;
+    opus_int16                  prevLTP_scale_Q14;
+    opus_int32                  prevGain_Q16[ 2 ];
+    opus_int                    fs_kHz;
+    opus_int                    nb_subfr;
+    opus_int                    subfr_length;
+} silk_PLC_struct;
+
+/* Struct for CNG */
+typedef struct {
+    opus_int32                  CNG_exc_buf_Q14[ MAX_FRAME_LENGTH ];
+    opus_int16                  CNG_smth_NLSF_Q15[ MAX_LPC_ORDER ];
+    opus_int32                  CNG_synth_state[ MAX_LPC_ORDER ];
+    opus_int32                  CNG_smth_Gain_Q16;
+    opus_int32                  rand_seed;
+    opus_int                    fs_kHz;
+} silk_CNG_struct;
+
+/********************************/
+/* Decoder state                */
+/********************************/
+typedef struct {
+    opus_int32                  prev_gain_Q16;
+    opus_int32                  exc_Q14[ MAX_FRAME_LENGTH ];
+    opus_int32                  sLPC_Q14_buf[ MAX_LPC_ORDER ];
+    opus_int16                  outBuf[ MAX_FRAME_LENGTH + 2 * MAX_SUB_FRAME_LENGTH ];  /* Buffer for output signal                     */
+    opus_int                    lagPrev;                            /* Previous Lag                                                     */
+    opus_int8                   LastGainIndex;                      /* Previous gain index                                              */
+    opus_int                    fs_kHz;                             /* Sampling frequency in kHz                                        */
+    opus_int32                  fs_API_hz;                          /* API sample frequency (Hz)                                        */
+    opus_int                    nb_subfr;                           /* Number of 5 ms subframes in a frame                              */
+    opus_int                    frame_length;                       /* Frame length (samples)                                           */
+    opus_int                    subfr_length;                       /* Subframe length (samples)                                        */
+    opus_int                    ltp_mem_length;                     /* Length of LTP memory                                             */
+    opus_int                    LPC_order;                          /* LPC order                                                        */
+    opus_int16                  prevNLSF_Q15[ MAX_LPC_ORDER ];      /* Used to interpolate LSFs                                         */
+    opus_int                    first_frame_after_reset;            /* Flag for deactivating NLSF interpolation                         */
+    const opus_uint8            *pitch_lag_low_bits_iCDF;           /* Pointer to iCDF table for low bits of pitch lag index            */
+    const opus_uint8            *pitch_contour_iCDF;                /* Pointer to iCDF table for pitch contour index                    */
+
+    /* For buffering payload in case of more frames per packet */
+    opus_int                    nFramesDecoded;
+    opus_int                    nFramesPerPacket;
+
+    /* Specifically for entropy coding */
+    opus_int                    ec_prevSignalType;
+    opus_int16                  ec_prevLagIndex;
+
+    opus_int                    VAD_flags[ MAX_FRAMES_PER_PACKET ];
+    opus_int                    LBRR_flag;
+    opus_int                    LBRR_flags[ MAX_FRAMES_PER_PACKET ];
+
+    silk_resampler_state_struct resampler_state;
+
+    const silk_NLSF_CB_struct   *psNLSF_CB;                         /* Pointer to NLSF codebook                                         */
+
+    /* Quantization indices */
+    SideInfoIndices             indices;
+
+    /* CNG state */
+    silk_CNG_struct             sCNG;
+
+    /* Stuff used for PLC */
+    opus_int                    lossCnt;
+    opus_int                    prevSignalType;
+
+    silk_PLC_struct sPLC;
+
+} silk_decoder_state;
+
+/************************/
+/* Decoder control      */
+/************************/
+typedef struct {
+    /* Prediction and coding parameters */
+    opus_int                    pitchL[ MAX_NB_SUBFR ];
+    opus_int32                  Gains_Q16[ MAX_NB_SUBFR ];
+    /* Holds interpolated and final coefficients, 4-byte aligned */
+    silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+    opus_int16                  LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+    opus_int                    LTP_scale_Q14;
+} silk_decoder_control;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/sum_sqr_shift.c b/src/main/jni/opus/silk/sum_sqr_shift.c
new file mode 100644
index 000000000..12514c991
--- /dev/null
+++ b/src/main/jni/opus/silk/sum_sqr_shift.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Compute number of bits to right shift the sum of squares of a vector */
+/* of int16s to make it fit in an int32                                 */
+void silk_sum_sqr_shift(
+    opus_int32                  *energy,            /* O   Energy of x, after shifting to the right                     */
+    opus_int                    *shift,             /* O   Number of bits right shift applied to energy                 */
+    const opus_int16            *x,                 /* I   Input vector                                                 */
+    opus_int                    len                 /* I   Length of input vector                                       */
+)
+{
+    opus_int   i, shft;
+    opus_int32 nrg_tmp, nrg;
+
+    nrg  = 0;
+    shft = 0;
+    len--;
+    for( i = 0; i < len; i += 2 ) {
+        nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] );
+        nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] );
+        if( nrg < 0 ) {
+            /* Scale down */
+            nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+            shft = 2;
+            break;
+        }
+    }
+    for( ; i < len; i += 2 ) {
+        nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
+        nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] );
+        nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft );
+        if( nrg < 0 ) {
+            /* Scale down */
+            nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+            shft += 2;
+        }
+    }
+    if( i == len ) {
+        /* One sample left to process */
+        nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
+        nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
+    }
+
+    /* Make sure to have at least one extra leading zero (two leading zeros in total) */
+    if( nrg & 0xC0000000 ) {
+        nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+        shft += 2;
+    }
+
+    /* Output arguments */
+    *shift  = shft;
+    *energy = nrg;
+}
+
diff --git a/src/main/jni/opus/silk/table_LSF_cos.c b/src/main/jni/opus/silk/table_LSF_cos.c
new file mode 100644
index 000000000..ec9dc6392
--- /dev/null
+++ b/src/main/jni/opus/silk/table_LSF_cos.c
@@ -0,0 +1,70 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "tables.h"
+
+/* Cosine approximation table for LSF conversion */
+/* Q12 values (even) */
+const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ] = {
+            8192,             8190,             8182,             8170,
+            8152,             8130,             8104,             8072,
+            8034,             7994,             7946,             7896,
+            7840,             7778,             7714,             7644,
+            7568,             7490,             7406,             7318,
+            7226,             7128,             7026,             6922,
+            6812,             6698,             6580,             6458,
+            6332,             6204,             6070,             5934,
+            5792,             5648,             5502,             5352,
+            5198,             5040,             4880,             4718,
+            4552,             4382,             4212,             4038,
+            3862,             3684,             3502,             3320,
+            3136,             2948,             2760,             2570,
+            2378,             2186,             1990,             1794,
+            1598,             1400,             1202,             1002,
+             802,              602,              402,              202,
+               0,             -202,             -402,             -602,
+            -802,            -1002,            -1202,            -1400,
+           -1598,            -1794,            -1990,            -2186,
+           -2378,            -2570,            -2760,            -2948,
+           -3136,            -3320,            -3502,            -3684,
+           -3862,            -4038,            -4212,            -4382,
+           -4552,            -4718,            -4880,            -5040,
+           -5198,            -5352,            -5502,            -5648,
+           -5792,            -5934,            -6070,            -6204,
+           -6332,            -6458,            -6580,            -6698,
+           -6812,            -6922,            -7026,            -7128,
+           -7226,            -7318,            -7406,            -7490,
+           -7568,            -7644,            -7714,            -7778,
+           -7840,            -7896,            -7946,            -7994,
+           -8034,            -8072,            -8104,            -8130,
+           -8152,            -8170,            -8182,            -8190,
+           -8192
+};
diff --git a/src/main/jni/opus/silk/tables.h b/src/main/jni/opus/silk/tables.h
new file mode 100644
index 000000000..a91431e85
--- /dev/null
+++ b/src/main/jni/opus/silk/tables.h
@@ -0,0 +1,122 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TABLES_H
+#define SILK_TABLES_H
+
+#include "define.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Entropy coding tables (with size in bytes indicated) */
+extern const opus_uint8  silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ];                                 /* 24 */
+extern const opus_uint8  silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ];   /* 41 */
+
+extern const opus_uint8  silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ];/* 32 */
+extern const opus_uint8  silk_pitch_delta_iCDF[ 21 ];                                               /*  21 */
+extern const opus_uint8  silk_pitch_contour_iCDF[ 34 ];                                             /*  34 */
+extern const opus_uint8  silk_pitch_contour_NB_iCDF[ 11 ];                                          /*  11 */
+extern const opus_uint8  silk_pitch_contour_10_ms_iCDF[ 12 ];                                       /*  12 */
+extern const opus_uint8  silk_pitch_contour_10_ms_NB_iCDF[ 3 ];                                     /*   3 */
+
+extern const opus_uint8  silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ MAX_PULSES + 2 ];             /* 180 */
+extern const opus_uint8  silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ MAX_PULSES + 2 ];      /* 162 */
+
+extern const opus_uint8  silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ];                           /*  18 */
+extern const opus_uint8  silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ];                        /*  18 */
+
+extern const opus_uint8  silk_max_pulses_table[ 4 ];                                                /*   4 */
+
+extern const opus_uint8  silk_shell_code_table0[ 152 ];                                             /* 152 */
+extern const opus_uint8  silk_shell_code_table1[ 152 ];                                             /* 152 */
+extern const opus_uint8  silk_shell_code_table2[ 152 ];                                             /* 152 */
+extern const opus_uint8  silk_shell_code_table3[ 152 ];                                             /* 152 */
+extern const opus_uint8  silk_shell_code_table_offsets[ MAX_PULSES + 1 ];                           /*  17 */
+
+extern const opus_uint8  silk_lsb_iCDF[ 2 ];                                                        /*   2 */
+
+extern const opus_uint8  silk_sign_iCDF[ 42 ];                                                      /*  42 */
+
+extern const opus_uint8  silk_uniform3_iCDF[ 3 ];                                                   /*   3 */
+extern const opus_uint8  silk_uniform4_iCDF[ 4 ];                                                   /*   4 */
+extern const opus_uint8  silk_uniform5_iCDF[ 5 ];                                                   /*   5 */
+extern const opus_uint8  silk_uniform6_iCDF[ 6 ];                                                   /*   6 */
+extern const opus_uint8  silk_uniform8_iCDF[ 8 ];                                                   /*   8 */
+
+extern const opus_uint8  silk_NLSF_EXT_iCDF[ 7 ];                                                   /*   7 */
+
+extern const opus_uint8  silk_LTP_per_index_iCDF[ 3 ];                                              /*   3 */
+extern const opus_uint8  * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ];                            /*   3 */
+extern const opus_uint8  * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ];                         /*   3 */
+extern const opus_int16  silk_LTP_gain_middle_avg_RD_Q14;
+extern const opus_int8   * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ];                                /* 168 */
+extern const opus_uint8  * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS];
+
+extern const opus_int8   silk_LTP_vq_sizes[ NB_LTP_CBKS ];                                          /*   3 */
+
+extern const opus_uint8  silk_LTPscale_iCDF[ 3 ];                                                   /*   4 */
+extern const opus_int16  silk_LTPScales_table_Q14[ 3 ];                                             /*   6 */
+
+extern const opus_uint8  silk_type_offset_VAD_iCDF[ 4 ];                                            /*   4 */
+extern const opus_uint8  silk_type_offset_no_VAD_iCDF[ 2 ];                                         /*   2 */
+
+extern const opus_int16  silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ];                       /*  32 */
+extern const opus_uint8  silk_stereo_pred_joint_iCDF[ 25 ];                                         /*  25 */
+extern const opus_uint8  silk_stereo_only_code_mid_iCDF[ 2 ];                                       /*   2 */
+
+extern const opus_uint8  * const silk_LBRR_flags_iCDF_ptr[ 2 ];                                     /*  10 */
+
+extern const opus_uint8  silk_NLSF_interpolation_factor_iCDF[ 5 ];                                  /*   5 */
+
+extern const silk_NLSF_CB_struct silk_NLSF_CB_WB;                                                   /* 1040 */
+extern const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB;                                                /* 728 */
+
+/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */
+extern const opus_int32  silk_TargetRate_table_NB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
+extern const opus_int32  silk_TargetRate_table_MB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
+extern const opus_int32  silk_TargetRate_table_WB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
+extern const opus_int16  silk_SNR_table_Q1[         TARGET_RATE_TAB_SZ ];                           /*  32 */
+
+/* Quantization offsets */
+extern const opus_int16  silk_Quantization_Offsets_Q10[ 2 ][ 2 ];                                   /*   8 */
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+extern const opus_int32  silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ];           /*  60 */
+extern const opus_int32  silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ];           /*  60 */
+
+/* Rom table with cosine values */
+extern const opus_int16  silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ];                          /* 258 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/main/jni/opus/silk/tables_LTP.c b/src/main/jni/opus/silk/tables_LTP.c
new file mode 100644
index 000000000..0e6a0254d
--- /dev/null
+++ b/src/main/jni/opus/silk/tables_LTP.c
@@ -0,0 +1,296 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_LTP_per_index_iCDF[3] = {
+       179,     99,      0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_0[8] = {
+        71,     56,     43,     30,     21,     12,      6,      0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_1[16] = {
+       199,    165,    144,    124,    109,     96,     84,     71,
+        61,     51,     42,     32,     23,     15,      8,      0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_2[32] = {
+       241,    225,    211,    199,    187,    175,    164,    153,
+       142,    132,    123,    114,    105,     96,     88,     80,
+        72,     64,     57,     50,     44,     38,     33,     29,
+        24,     20,     16,     12,      9,      5,      2,      0
+};
+
+const opus_int16 silk_LTP_gain_middle_avg_RD_Q14 = 12304;
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_0[8] = {
+        15,    131,    138,    138,    155,    155,    173,    173
+};
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_1[16] = {
+        69,     93,    115,    118,    131,    138,    141,    138,
+       150,    150,    155,    150,    155,    160,    166,    160
+};
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_2[32] = {
+       131,    128,    134,    141,    141,    141,    145,    145,
+       145,    150,    155,    155,    155,    155,    160,    160,
+       160,    160,    166,    166,    173,    173,    182,    192,
+       182,    192,    192,    192,    205,    192,    205,    224
+};
+
+const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[NB_LTP_CBKS] = {
+    silk_LTP_gain_iCDF_0,
+    silk_LTP_gain_iCDF_1,
+    silk_LTP_gain_iCDF_2
+};
+
+const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[NB_LTP_CBKS] = {
+    silk_LTP_gain_BITS_Q5_0,
+    silk_LTP_gain_BITS_Q5_1,
+    silk_LTP_gain_BITS_Q5_2
+};
+
+static const opus_int8 silk_LTP_gain_vq_0[8][5] =
+{
+{
+         4,      6,     24,      7,      5
+},
+{
+         0,      0,      2,      0,      0
+},
+{
+        12,     28,     41,     13,     -4
+},
+{
+        -9,     15,     42,     25,     14
+},
+{
+         1,     -2,     62,     41,     -9
+},
+{
+       -10,     37,     65,     -4,      3
+},
+{
+        -6,      4,     66,      7,     -8
+},
+{
+        16,     14,     38,     -3,     33
+}
+};
+
+static const opus_int8 silk_LTP_gain_vq_1[16][5] =
+{
+{
+        13,     22,     39,     23,     12
+},
+{
+        -1,     36,     64,     27,     -6
+},
+{
+        -7,     10,     55,     43,     17
+},
+{
+         1,      1,      8,      1,      1
+},
+{
+         6,    -11,     74,     53,     -9
+},
+{
+       -12,     55,     76,    -12,      8
+},
+{
+        -3,      3,     93,     27,     -4
+},
+{
+        26,     39,     59,      3,     -8
+},
+{
+         2,      0,     77,     11,      9
+},
+{
+        -8,     22,     44,     -6,      7
+},
+{
+        40,      9,     26,      3,      9
+},
+{
+        -7,     20,    101,     -7,      4
+},
+{
+         3,     -8,     42,     26,      0
+},
+{
+       -15,     33,     68,      2,     23
+},
+{
+        -2,     55,     46,     -2,     15
+},
+{
+         3,     -1,     21,     16,     41
+}
+};
+
+static const opus_int8 silk_LTP_gain_vq_2[32][5] =
+{
+{
+        -6,     27,     61,     39,      5
+},
+{
+       -11,     42,     88,      4,      1
+},
+{
+        -2,     60,     65,      6,     -4
+},
+{
+        -1,     -5,     73,     56,      1
+},
+{
+        -9,     19,     94,     29,     -9
+},
+{
+         0,     12,     99,      6,      4
+},
+{
+         8,    -19,    102,     46,    -13
+},
+{
+         3,      2,     13,      3,      2
+},
+{
+         9,    -21,     84,     72,    -18
+},
+{
+       -11,     46,    104,    -22,      8
+},
+{
+        18,     38,     48,     23,      0
+},
+{
+       -16,     70,     83,    -21,     11
+},
+{
+         5,    -11,    117,     22,     -8
+},
+{
+        -6,     23,    117,    -12,      3
+},
+{
+         3,     -8,     95,     28,      4
+},
+{
+       -10,     15,     77,     60,    -15
+},
+{
+        -1,      4,    124,      2,     -4
+},
+{
+         3,     38,     84,     24,    -25
+},
+{
+         2,     13,     42,     13,     31
+},
+{
+        21,     -4,     56,     46,     -1
+},
+{
+        -1,     35,     79,    -13,     19
+},
+{
+        -7,     65,     88,     -9,    -14
+},
+{
+        20,      4,     81,     49,    -29
+},
+{
+        20,      0,     75,      3,    -17
+},
+{
+         5,     -9,     44,     92,     -8
+},
+{
+         1,     -3,     22,     69,     31
+},
+{
+        -6,     95,     41,    -12,      5
+},
+{
+        39,     67,     16,     -4,      1
+},
+{
+         0,     -6,    120,     55,    -36
+},
+{
+       -13,     44,    122,      4,    -24
+},
+{
+        81,      5,     11,      3,      7
+},
+{
+         2,      0,      9,     10,     88
+}
+};
+
+const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = {
+    (opus_int8 *)&silk_LTP_gain_vq_0[0][0],
+    (opus_int8 *)&silk_LTP_gain_vq_1[0][0],
+    (opus_int8 *)&silk_LTP_gain_vq_2[0][0]
+};
+
+/* Maximum frequency-dependent response of the pitch taps above,
+   computed as max(abs(freqz(taps))) */
+static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = {
+      46,      2,     90,     87,     93,     91,     82,     98
+};
+
+static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = {
+     109,    120,    118,     12,    113,    115,    117,    119,
+      99,     59,     87,    111,     63,    111,    112,     80
+};
+
+static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = {
+     126,    124,    125,    124,    129,    121,    126,     23,
+     132,    127,    127,    127,    126,    127,    122,    133,
+     130,    134,    101,    118,    119,    145,    126,     86,
+     124,    120,    123,    119,    170,    173,    107,    109
+};
+
+const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = {
+    &silk_LTP_gain_vq_0_gain[0],
+    &silk_LTP_gain_vq_1_gain[0],
+    &silk_LTP_gain_vq_2_gain[0]
+};
+
+const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = {
+    8, 16, 32
+};
diff --git a/src/main/jni/opus/silk/tables_NLSF_CB_NB_MB.c b/src/main/jni/opus/silk/tables_NLSF_CB_NB_MB.c
new file mode 100644
index 000000000..8c59d207a
--- /dev/null
+++ b/src/main/jni/opus/silk/tables_NLSF_CB_NB_MB.c
@@ -0,0 +1,159 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "tables.h"
+
+static const opus_uint8 silk_NLSF_CB1_NB_MB_Q8[ 320 ] = {
+        12,     35,     60,     83,    108,    132,    157,    180,
+       206,    228,     15,     32,     55,     77,    101,    125,
+       151,    175,    201,    225,     19,     42,     66,     89,
+       114,    137,    162,    184,    209,    230,     12,     25,
+        50,     72,     97,    120,    147,    172,    200,    223,
+        26,     44,     69,     90,    114,    135,    159,    180,
+       205,    225,     13,     22,     53,     80,    106,    130,
+       156,    180,    205,    228,     15,     25,     44,     64,
+        90,    115,    142,    168,    196,    222,     19,     24,
+        62,     82,    100,    120,    145,    168,    190,    214,
+        22,     31,     50,     79,    103,    120,    151,    170,
+       203,    227,     21,     29,     45,     65,    106,    124,
+       150,    171,    196,    224,     30,     49,     75,     97,
+       121,    142,    165,    186,    209,    229,     19,     25,
+        52,     70,     93,    116,    143,    166,    192,    219,
+        26,     34,     62,     75,     97,    118,    145,    167,
+       194,    217,     25,     33,     56,     70,     91,    113,
+       143,    165,    196,    223,     21,     34,     51,     72,
+        97,    117,    145,    171,    196,    222,     20,     29,
+        50,     67,     90,    117,    144,    168,    197,    221,
+        22,     31,     48,     66,     95,    117,    146,    168,
+       196,    222,     24,     33,     51,     77,    116,    134,
+       158,    180,    200,    224,     21,     28,     70,     87,
+       106,    124,    149,    170,    194,    217,     26,     33,
+        53,     64,     83,    117,    152,    173,    204,    225,
+        27,     34,     65,     95,    108,    129,    155,    174,
+       210,    225,     20,     26,     72,     99,    113,    131,
+       154,    176,    200,    219,     34,     43,     61,     78,
+        93,    114,    155,    177,    205,    229,     23,     29,
+        54,     97,    124,    138,    163,    179,    209,    229,
+        30,     38,     56,     89,    118,    129,    158,    178,
+       200,    231,     21,     29,     49,     63,     85,    111,
+       142,    163,    193,    222,     27,     48,     77,    103,
+       133,    158,    179,    196,    215,    232,     29,     47,
+        74,     99,    124,    151,    176,    198,    220,    237,
+        33,     42,     61,     76,     93,    121,    155,    174,
+       207,    225,     29,     53,     87,    112,    136,    154,
+       170,    188,    208,    227,     24,     30,     52,     84,
+       131,    150,    166,    186,    203,    229,     37,     48,
+        64,     84,    104,    118,    156,    177,    201,    230
+};
+
+static const opus_uint8 silk_NLSF_CB1_iCDF_NB_MB[ 64 ] = {
+       212,    178,    148,    129,    108,     96,     85,     82,
+        79,     77,     61,     59,     57,     56,     51,     49,
+        48,     45,     42,     41,     40,     38,     36,     34,
+        31,     30,     21,     12,     10,      3,      1,      0,
+       255,    245,    244,    236,    233,    225,    217,    203,
+       190,    176,    175,    161,    149,    136,    125,    114,
+       102,     91,     81,     71,     60,     52,     43,     35,
+        28,     20,     19,     18,     12,     11,      5,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_SELECT_NB_MB[ 160 ] = {
+        16,      0,      0,      0,      0,     99,     66,     36,
+        36,     34,     36,     34,     34,     34,     34,     83,
+        69,     36,     52,     34,    116,    102,     70,     68,
+        68,    176,    102,     68,     68,     34,     65,     85,
+        68,     84,     36,    116,    141,    152,    139,    170,
+       132,    187,    184,    216,    137,    132,    249,    168,
+       185,    139,    104,    102,    100,     68,     68,    178,
+       218,    185,    185,    170,    244,    216,    187,    187,
+       170,    244,    187,    187,    219,    138,    103,    155,
+       184,    185,    137,    116,    183,    155,    152,    136,
+       132,    217,    184,    184,    170,    164,    217,    171,
+       155,    139,    244,    169,    184,    185,    170,    164,
+       216,    223,    218,    138,    214,    143,    188,    218,
+       168,    244,    141,    136,    155,    170,    168,    138,
+       220,    219,    139,    164,    219,    202,    216,    137,
+       168,    186,    246,    185,    139,    116,    185,    219,
+       185,    138,    100,    100,    134,    100,    102,     34,
+        68,     68,    100,     68,    168,    203,    221,    218,
+       168,    167,    154,    136,    104,     70,    164,    246,
+       171,    137,    139,    137,    155,    218,    219,    139
+};
+
+static const opus_uint8 silk_NLSF_CB2_iCDF_NB_MB[ 72 ] = {
+       255,    254,    253,    238,     14,      3,      2,      1,
+         0,    255,    254,    252,    218,     35,      3,      2,
+         1,      0,    255,    254,    250,    208,     59,      4,
+         2,      1,      0,    255,    254,    246,    194,     71,
+        10,      2,      1,      0,    255,    252,    236,    183,
+        82,      8,      2,      1,      0,    255,    252,    235,
+       180,     90,     17,      2,      1,      0,    255,    248,
+       224,    171,     97,     30,      4,      1,      0,    255,
+       254,    236,    173,     95,     37,      7,      1,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_BITS_NB_MB_Q5[ 72 ] = {
+       255,    255,    255,    131,      6,    145,    255,    255,
+       255,    255,    255,    236,     93,     15,     96,    255,
+       255,    255,    255,    255,    194,     83,     25,     71,
+       221,    255,    255,    255,    255,    162,     73,     34,
+        66,    162,    255,    255,    255,    210,    126,     73,
+        43,     57,    173,    255,    255,    255,    201,    125,
+        71,     48,     58,    130,    255,    255,    255,    166,
+       110,     73,     57,     62,    104,    210,    255,    255,
+       251,    123,     65,     55,     68,    100,    171,    255
+};
+
+static const opus_uint8 silk_NLSF_PRED_NB_MB_Q8[ 18 ] = {
+       179,    138,    140,    148,    151,    149,    153,    151,
+       163,    116,     67,     82,     59,     92,     72,    100,
+        89,     92
+};
+
+static const opus_int16 silk_NLSF_DELTA_MIN_NB_MB_Q15[ 11 ] = {
+       250,      3,      6,      3,      3,      3,      4,      3,
+         3,      3,    461
+};
+
+const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB =
+{
+    32,
+    10,
+    SILK_FIX_CONST( 0.18, 16 ),
+    SILK_FIX_CONST( 1.0 / 0.18, 6 ),
+    silk_NLSF_CB1_NB_MB_Q8,
+    silk_NLSF_CB1_iCDF_NB_MB,
+    silk_NLSF_PRED_NB_MB_Q8,
+    silk_NLSF_CB2_SELECT_NB_MB,
+    silk_NLSF_CB2_iCDF_NB_MB,
+    silk_NLSF_CB2_BITS_NB_MB_Q5,
+    silk_NLSF_DELTA_MIN_NB_MB_Q15,
+};
diff --git a/src/main/jni/opus/silk/tables_NLSF_CB_WB.c b/src/main/jni/opus/silk/tables_NLSF_CB_WB.c
new file mode 100644
index 000000000..50af87eb2
--- /dev/null
+++ b/src/main/jni/opus/silk/tables_NLSF_CB_WB.c
@@ -0,0 +1,198 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "tables.h"
+
+static const opus_uint8 silk_NLSF_CB1_WB_Q8[ 512 ] = {
+         7,     23,     38,     54,     69,     85,    100,    116,
+       131,    147,    162,    178,    193,    208,    223,    239,
+        13,     25,     41,     55,     69,     83,     98,    112,
+       127,    142,    157,    171,    187,    203,    220,    236,
+        15,     21,     34,     51,     61,     78,     92,    106,
+       126,    136,    152,    167,    185,    205,    225,    240,
+        10,     21,     36,     50,     63,     79,     95,    110,
+       126,    141,    157,    173,    189,    205,    221,    237,
+        17,     20,     37,     51,     59,     78,     89,    107,
+       123,    134,    150,    164,    184,    205,    224,    240,
+        10,     15,     32,     51,     67,     81,     96,    112,
+       129,    142,    158,    173,    189,    204,    220,    236,
+         8,     21,     37,     51,     65,     79,     98,    113,
+       126,    138,    155,    168,    179,    192,    209,    218,
+        12,     15,     34,     55,     63,     78,     87,    108,
+       118,    131,    148,    167,    185,    203,    219,    236,
+        16,     19,     32,     36,     56,     79,     91,    108,
+       118,    136,    154,    171,    186,    204,    220,    237,
+        11,     28,     43,     58,     74,     89,    105,    120,
+       135,    150,    165,    180,    196,    211,    226,    241,
+         6,     16,     33,     46,     60,     75,     92,    107,
+       123,    137,    156,    169,    185,    199,    214,    225,
+        11,     19,     30,     44,     57,     74,     89,    105,
+       121,    135,    152,    169,    186,    202,    218,    234,
+        12,     19,     29,     46,     57,     71,     88,    100,
+       120,    132,    148,    165,    182,    199,    216,    233,
+        17,     23,     35,     46,     56,     77,     92,    106,
+       123,    134,    152,    167,    185,    204,    222,    237,
+        14,     17,     45,     53,     63,     75,     89,    107,
+       115,    132,    151,    171,    188,    206,    221,    240,
+         9,     16,     29,     40,     56,     71,     88,    103,
+       119,    137,    154,    171,    189,    205,    222,    237,
+        16,     19,     36,     48,     57,     76,     87,    105,
+       118,    132,    150,    167,    185,    202,    218,    236,
+        12,     17,     29,     54,     71,     81,     94,    104,
+       126,    136,    149,    164,    182,    201,    221,    237,
+        15,     28,     47,     62,     79,     97,    115,    129,
+       142,    155,    168,    180,    194,    208,    223,    238,
+         8,     14,     30,     45,     62,     78,     94,    111,
+       127,    143,    159,    175,    192,    207,    223,    239,
+        17,     30,     49,     62,     79,     92,    107,    119,
+       132,    145,    160,    174,    190,    204,    220,    235,
+        14,     19,     36,     45,     61,     76,     91,    108,
+       121,    138,    154,    172,    189,    205,    222,    238,
+        12,     18,     31,     45,     60,     76,     91,    107,
+       123,    138,    154,    171,    187,    204,    221,    236,
+        13,     17,     31,     43,     53,     70,     83,    103,
+       114,    131,    149,    167,    185,    203,    220,    237,
+        17,     22,     35,     42,     58,     78,     93,    110,
+       125,    139,    155,    170,    188,    206,    224,    240,
+         8,     15,     34,     50,     67,     83,     99,    115,
+       131,    146,    162,    178,    193,    209,    224,    239,
+        13,     16,     41,     66,     73,     86,     95,    111,
+       128,    137,    150,    163,    183,    206,    225,    241,
+        17,     25,     37,     52,     63,     75,     92,    102,
+       119,    132,    144,    160,    175,    191,    212,    231,
+        19,     31,     49,     65,     83,    100,    117,    133,
+       147,    161,    174,    187,    200,    213,    227,    242,
+        18,     31,     52,     68,     88,    103,    117,    126,
+       138,    149,    163,    177,    192,    207,    223,    239,
+        16,     29,     47,     61,     76,     90,    106,    119,
+       133,    147,    161,    176,    193,    209,    224,    240,
+        15,     21,     35,     50,     61,     73,     86,     97,
+       110,    119,    129,    141,    175,    198,    218,    237
+};
+
+static const opus_uint8 silk_NLSF_CB1_iCDF_WB[ 64 ] = {
+       225,    204,    201,    184,    183,    175,    158,    154,
+       153,    135,    119,    115,    113,    110,    109,     99,
+        98,     95,     79,     68,     52,     50,     48,     45,
+        43,     32,     31,     27,     18,     10,      3,      0,
+       255,    251,    235,    230,    212,    201,    196,    182,
+       167,    166,    163,    151,    138,    124,    110,    104,
+        90,     78,     76,     70,     69,     57,     45,     34,
+        24,     21,     11,      6,      5,      4,      3,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_SELECT_WB[ 256 ] = {
+         0,      0,      0,      0,      0,      0,      0,      1,
+       100,    102,    102,     68,     68,     36,     34,     96,
+       164,    107,    158,    185,    180,    185,    139,    102,
+        64,     66,     36,     34,     34,      0,      1,     32,
+       208,    139,    141,    191,    152,    185,    155,    104,
+        96,    171,    104,    166,    102,    102,    102,    132,
+         1,      0,      0,      0,      0,     16,     16,      0,
+        80,    109,     78,    107,    185,    139,    103,    101,
+       208,    212,    141,    139,    173,    153,    123,    103,
+        36,      0,      0,      0,      0,      0,      0,      1,
+        48,      0,      0,      0,      0,      0,      0,     32,
+        68,    135,    123,    119,    119,    103,     69,     98,
+        68,    103,    120,    118,    118,    102,     71,     98,
+       134,    136,    157,    184,    182,    153,    139,    134,
+       208,    168,    248,     75,    189,    143,    121,    107,
+        32,     49,     34,     34,     34,      0,     17,      2,
+       210,    235,    139,    123,    185,    137,    105,    134,
+        98,    135,    104,    182,    100,    183,    171,    134,
+       100,     70,     68,     70,     66,     66,     34,    131,
+        64,    166,    102,     68,     36,      2,      1,      0,
+       134,    166,    102,     68,     34,     34,     66,    132,
+       212,    246,    158,    139,    107,    107,     87,    102,
+       100,    219,    125,    122,    137,    118,    103,    132,
+       114,    135,    137,    105,    171,    106,     50,     34,
+       164,    214,    141,    143,    185,    151,    121,    103,
+       192,     34,      0,      0,      0,      0,      0,      1,
+       208,    109,     74,    187,    134,    249,    159,    137,
+       102,    110,    154,    118,     87,    101,    119,    101,
+         0,      2,      0,     36,     36,     66,     68,     35,
+        96,    164,    102,    100,     36,      0,      2,     33,
+       167,    138,    174,    102,    100,     84,      2,      2,
+       100,    107,    120,    119,     36,    197,     24,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_iCDF_WB[ 72 ] = {
+       255,    254,    253,    244,     12,      3,      2,      1,
+         0,    255,    254,    252,    224,     38,      3,      2,
+         1,      0,    255,    254,    251,    209,     57,      4,
+         2,      1,      0,    255,    254,    244,    195,     69,
+         4,      2,      1,      0,    255,    251,    232,    184,
+        84,      7,      2,      1,      0,    255,    254,    240,
+       186,     86,     14,      2,      1,      0,    255,    254,
+       239,    178,     91,     30,      5,      1,      0,    255,
+       248,    227,    177,    100,     19,      2,      1,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_BITS_WB_Q5[ 72 ] = {
+       255,    255,    255,    156,      4,    154,    255,    255,
+       255,    255,    255,    227,    102,     15,     92,    255,
+       255,    255,    255,    255,    213,     83,     24,     72,
+       236,    255,    255,    255,    255,    150,     76,     33,
+        63,    214,    255,    255,    255,    190,    121,     77,
+        43,     55,    185,    255,    255,    255,    245,    137,
+        71,     43,     59,    139,    255,    255,    255,    255,
+       131,     66,     50,     66,    107,    194,    255,    255,
+       166,    116,     76,     55,     53,    125,    255,    255
+};
+
+static const opus_uint8 silk_NLSF_PRED_WB_Q8[ 30 ] = {
+       175,    148,    160,    176,    178,    173,    174,    164,
+       177,    174,    196,    182,    198,    192,    182,     68,
+        62,     66,     60,     72,    117,     85,     90,    118,
+       136,    151,    142,    160,    142,    155
+};
+
+static const opus_int16 silk_NLSF_DELTA_MIN_WB_Q15[ 17 ] = {
+       100,      3,     40,      3,      3,      3,      5,     14,
+        14,     10,     11,      3,      8,      9,      7,      3,
+       347
+};
+
+const silk_NLSF_CB_struct silk_NLSF_CB_WB =
+{
+    32,
+    16,
+    SILK_FIX_CONST( 0.15, 16 ),
+    SILK_FIX_CONST( 1.0 / 0.15, 6 ),
+    silk_NLSF_CB1_WB_Q8,
+    silk_NLSF_CB1_iCDF_WB,
+    silk_NLSF_PRED_WB_Q8,
+    silk_NLSF_CB2_SELECT_WB,
+    silk_NLSF_CB2_iCDF_WB,
+    silk_NLSF_CB2_BITS_WB_Q5,
+    silk_NLSF_DELTA_MIN_WB_Q15,
+};
+
diff --git a/src/main/jni/opus/silk/tables_gain.c b/src/main/jni/opus/silk/tables_gain.c
new file mode 100644
index 000000000..37e41d890
--- /dev/null
+++ b/src/main/jni/opus/silk/tables_gain.c
@@ -0,0 +1,63 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "tables.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ] =
+{
+{
+       224,    112,     44,     15,      3,      2,      1,      0
+},
+{
+       254,    237,    192,    132,     70,     23,      4,      0
+},
+{
+       255,    252,    226,    155,     61,     11,      2,      0
+}
+};
+
+const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ] = {
+       250,    245,    234,    203,     71,     50,     42,     38,
+        35,     33,     31,     29,     28,     27,     26,     25,
+        24,     23,     22,     21,     20,     19,     18,     17,
+        16,     15,     14,     13,     12,     11,     10,      9,
+         8,      7,      6,      5,      4,      3,      2,      1,
+         0
+};
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/main/jni/opus/silk/tables_other.c b/src/main/jni/opus/silk/tables_other.c
new file mode 100644
index 000000000..398686bf2
--- /dev/null
+++ b/src/main/jni/opus/silk/tables_other.c
@@ -0,0 +1,138 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "structs.h"
+#include "define.h"
+#include "tables.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */
+const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ] = {
+    0,      8000,   9400,   11500,  13500,  17500,  25000,  MAX_TARGET_RATE_BPS
+};
+const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ] = {
+    0,      9000,   12000,  14500,  18500,  24500,  35500,  MAX_TARGET_RATE_BPS
+};
+const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ] = {
+    0,      10500,  14000,  17000,  21500,  28500,  42000,  MAX_TARGET_RATE_BPS
+};
+const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ] = {
+    18,     29,     38,     40,     46,     52,     62,     84
+};
+
+/* Tables for stereo predictor coding */
+const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ] = {
+    -13732, -10050, -8266, -7526, -6500, -5000, -2950,  -820,
+       820,   2950,  5000,  6500,  7526,  8266, 10050, 13732
+};
+const opus_uint8  silk_stereo_pred_joint_iCDF[ 25 ] = {
+    249, 247, 246, 245, 244,
+    234, 210, 202, 201, 200,
+    197, 174,  82,  59,  56,
+     55,  54,  46,  22,  12,
+     11,  10,   9,   7,   0
+};
+const opus_uint8  silk_stereo_only_code_mid_iCDF[ 2 ] = { 64, 0 };
+
+/* Tables for LBRR flags */
+static const opus_uint8 silk_LBRR_flags_2_iCDF[ 3 ] = { 203, 150, 0 };
+static const opus_uint8 silk_LBRR_flags_3_iCDF[ 7 ] = { 215, 195, 166, 125, 110, 82, 0 };
+const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ] = {
+    silk_LBRR_flags_2_iCDF,
+    silk_LBRR_flags_3_iCDF
+};
+
+/* Table for LSB coding */
+const opus_uint8 silk_lsb_iCDF[ 2 ] = { 120, 0 };
+
+/* Tables for LTPScale */
+const opus_uint8 silk_LTPscale_iCDF[ 3 ] = { 128, 64, 0 };
+
+/* Tables for signal type and offset coding */
+const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ] = {
+       232,    158,    10,      0
+};
+const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ] = {
+       230,      0
+};
+
+/* Tables for NLSF interpolation factor */
+const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ] = { 243, 221, 192, 181, 0 };
+
+/* Quantization offsets */
+const opus_int16  silk_Quantization_Offsets_Q10[ 2 ][ 2 ] = {
+    { OFFSET_UVL_Q10, OFFSET_UVH_Q10 }, { OFFSET_VL_Q10, OFFSET_VH_Q10 }
+};
+
+/* Table for LTPScale */
+const opus_int16 silk_LTPScales_table_Q14[ 3 ] = { 15565, 12288, 8192 };
+
+/* Uniform entropy tables */
+const opus_uint8 silk_uniform3_iCDF[ 3 ] = { 171, 85, 0 };
+const opus_uint8 silk_uniform4_iCDF[ 4 ] = { 192, 128, 64, 0 };
+const opus_uint8 silk_uniform5_iCDF[ 5 ] = { 205, 154, 102, 51, 0 };
+const opus_uint8 silk_uniform6_iCDF[ 6 ] = { 213, 171, 128, 85, 43, 0 };
+const opus_uint8 silk_uniform8_iCDF[ 8 ] = { 224, 192, 160, 128, 96, 64, 32, 0 };
+
+const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ] = { 100, 40, 16, 7, 3, 1, 0 };
+
+/*  Elliptic/Cauer filters designed with 0.1 dB passband ripple,
+        80 dB minimum stopband attenuation, and
+        [0.95 : 0.15 : 0.35] normalized cut off frequencies. */
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ] =
+{
+{    250767114,  501534038,  250767114  },
+{    209867381,  419732057,  209867381  },
+{    170987846,  341967853,  170987846  },
+{    131531482,  263046905,  131531482  },
+{     89306658,  178584282,   89306658  }
+};
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ] =
+{
+{    506393414,  239854379  },
+{    411067935,  169683996  },
+{    306733530,  116694253  },
+{    185807084,   77959395  },
+{     35497197,   57401098  }
+};
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/src/main/jni/opus/silk/tables_pitch_lag.c b/src/main/jni/opus/silk/tables_pitch_lag.c
new file mode 100644
index 000000000..e80cc59a2
--- /dev/null
+++ b/src/main/jni/opus/silk/tables_pitch_lag.c
@@ -0,0 +1,69 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ] = {
+       253,    250,    244,    233,    212,    182,    150,    131,
+       120,    110,     98,     85,     72,     60,     49,     40,
+        32,     25,     19,     15,     13,     11,      9,      8,
+         7,      6,      5,      4,      3,      2,      1,      0
+};
+
+const opus_uint8 silk_pitch_delta_iCDF[21] = {
+       210,    208,    206,    203,    199,    193,    183,    168,
+       142,    104,     74,     52,     37,     27,     20,     14,
+        10,      6,      4,      2,      0
+};
+
+const opus_uint8 silk_pitch_contour_iCDF[34] = {
+       223,    201,    183,    167,    152,    138,    124,    111,
+        98,     88,     79,     70,     62,     56,     50,     44,
+        39,     35,     31,     27,     24,     21,     18,     16,
+        14,     12,     10,      8,      6,      4,      3,      2,
+         1,      0
+};
+
+const opus_uint8 silk_pitch_contour_NB_iCDF[11] = {
+       188,    176,    155,    138,    119,     97,     67,     43,
+        26,     10,      0
+};
+
+const opus_uint8 silk_pitch_contour_10_ms_iCDF[12] = {
+       165,    119,     80,     61,     47,     35,     27,     20,
+        14,      9,      4,      0
+};
+
+const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[3] = {
+       113,     63,      0
+};
+
+
diff --git a/src/main/jni/opus/silk/tables_pulses_per_block.c b/src/main/jni/opus/silk/tables_pulses_per_block.c
new file mode 100644
index 000000000..c7c01c889
--- /dev/null
+++ b/src/main/jni/opus/silk/tables_pulses_per_block.c
@@ -0,0 +1,264 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_max_pulses_table[ 4 ] = {
+         8,     10,     12,     16
+};
+
+const opus_uint8 silk_pulses_per_block_iCDF[ 10 ][ 18 ] = {
+{
+       125,     51,     26,     18,     15,     12,     11,     10,
+         9,      8,      7,      6,      5,      4,      3,      2,
+         1,      0
+},
+{
+       198,    105,     45,     22,     15,     12,     11,     10,
+         9,      8,      7,      6,      5,      4,      3,      2,
+         1,      0
+},
+{
+       213,    162,    116,     83,     59,     43,     32,     24,
+        18,     15,     12,      9,      7,      6,      5,      3,
+         2,      0
+},
+{
+       239,    187,    116,     59,     28,     16,     11,     10,
+         9,      8,      7,      6,      5,      4,      3,      2,
+         1,      0
+},
+{
+       250,    229,    188,    135,     86,     51,     30,     19,
+        13,     10,      8,      6,      5,      4,      3,      2,
+         1,      0
+},
+{
+       249,    235,    213,    185,    156,    128,    103,     83,
+        66,     53,     42,     33,     26,     21,     17,     13,
+        10,      0
+},
+{
+       254,    249,    235,    206,    164,    118,     77,     46,
+        27,     16,     10,      7,      5,      4,      3,      2,
+         1,      0
+},
+{
+       255,    253,    249,    239,    220,    191,    156,    119,
+        85,     57,     37,     23,     15,     10,      6,      4,
+         2,      0
+},
+{
+       255,    253,    251,    246,    237,    223,    203,    179,
+       152,    124,     98,     75,     55,     40,     29,     21,
+        15,      0
+},
+{
+       255,    254,    253,    247,    220,    162,    106,     67,
+        42,     28,     18,     12,      9,      6,      4,      3,
+         2,      0
+}
+};
+
+const opus_uint8 silk_pulses_per_block_BITS_Q5[ 9 ][ 18 ] = {
+{
+        31,     57,    107,    160,    205,    205,    255,    255,
+       255,    255,    255,    255,    255,    255,    255,    255,
+       255,    255
+},
+{
+        69,     47,     67,    111,    166,    205,    255,    255,
+       255,    255,    255,    255,    255,    255,    255,    255,
+       255,    255
+},
+{
+        82,     74,     79,     95,    109,    128,    145,    160,
+       173,    205,    205,    205,    224,    255,    255,    224,
+       255,    224
+},
+{
+       125,     74,     59,     69,     97,    141,    182,    255,
+       255,    255,    255,    255,    255,    255,    255,    255,
+       255,    255
+},
+{
+       173,    115,     85,     73,     76,     92,    115,    145,
+       173,    205,    224,    224,    255,    255,    255,    255,
+       255,    255
+},
+{
+       166,    134,    113,    102,    101,    102,    107,    118,
+       125,    138,    145,    155,    166,    182,    192,    192,
+       205,    150
+},
+{
+       224,    182,    134,    101,     83,     79,     85,     97,
+       120,    145,    173,    205,    224,    255,    255,    255,
+       255,    255
+},
+{
+       255,    224,    192,    150,    120,    101,     92,     89,
+        93,    102,    118,    134,    160,    182,    192,    224,
+       224,    224
+},
+{
+       255,    224,    224,    182,    155,    134,    118,    109,
+       104,    102,    106,    111,    118,    131,    145,    160,
+       173,    131
+}
+};
+
+const opus_uint8 silk_rate_levels_iCDF[ 2 ][ 9 ] =
+{
+{
+       241,    190,    178,    132,     87,     74,     41,     14,
+         0
+},
+{
+       223,    193,    157,    140,    106,     57,     39,     18,
+         0
+}
+};
+
+const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ 9 ] =
+{
+{
+       131,     74,    141,     79,     80,    138,     95,    104,
+       134
+},
+{
+        95,     99,     91,    125,     93,     76,    123,    115,
+       123
+}
+};
+
+const opus_uint8 silk_shell_code_table0[ 152 ] = {
+       128,      0,    214,     42,      0,    235,    128,     21,
+         0,    244,    184,     72,     11,      0,    248,    214,
+       128,     42,      7,      0,    248,    225,    170,     80,
+        25,      5,      0,    251,    236,    198,    126,     54,
+        18,      3,      0,    250,    238,    211,    159,     82,
+        35,     15,      5,      0,    250,    231,    203,    168,
+       128,     88,     53,     25,      6,      0,    252,    238,
+       216,    185,    148,    108,     71,     40,     18,      4,
+         0,    253,    243,    225,    199,    166,    128,     90,
+        57,     31,     13,      3,      0,    254,    246,    233,
+       212,    183,    147,    109,     73,     44,     23,     10,
+         2,      0,    255,    250,    240,    223,    198,    166,
+       128,     90,     58,     33,     16,      6,      1,      0,
+       255,    251,    244,    231,    210,    181,    146,    110,
+        75,     46,     25,     12,      5,      1,      0,    255,
+       253,    248,    238,    221,    196,    164,    128,     92,
+        60,     35,     18,      8,      3,      1,      0,    255,
+       253,    249,    242,    229,    208,    180,    146,    110,
+        76,     48,     27,     14,      7,      3,      1,      0
+};
+
+const opus_uint8 silk_shell_code_table1[ 152 ] = {
+       129,      0,    207,     50,      0,    236,    129,     20,
+         0,    245,    185,     72,     10,      0,    249,    213,
+       129,     42,      6,      0,    250,    226,    169,     87,
+        27,      4,      0,    251,    233,    194,    130,     62,
+        20,      4,      0,    250,    236,    207,    160,     99,
+        47,     17,      3,      0,    255,    240,    217,    182,
+       131,     81,     41,     11,      1,      0,    255,    254,
+       233,    201,    159,    107,     61,     20,      2,      1,
+         0,    255,    249,    233,    206,    170,    128,     86,
+        50,     23,      7,      1,      0,    255,    250,    238,
+       217,    186,    148,    108,     70,     39,     18,      6,
+         1,      0,    255,    252,    243,    226,    200,    166,
+       128,     90,     56,     30,     13,      4,      1,      0,
+       255,    252,    245,    231,    209,    180,    146,    110,
+        76,     47,     25,     11,      4,      1,      0,    255,
+       253,    248,    237,    219,    194,    163,    128,     93,
+        62,     37,     19,      8,      3,      1,      0,    255,
+       254,    250,    241,    226,    205,    177,    145,    111,
+        79,     51,     30,     15,      6,      2,      1,      0
+};
+
+const opus_uint8 silk_shell_code_table2[ 152 ] = {
+       129,      0,    203,     54,      0,    234,    129,     23,
+         0,    245,    184,     73,     10,      0,    250,    215,
+       129,     41,      5,      0,    252,    232,    173,     86,
+        24,      3,      0,    253,    240,    200,    129,     56,
+        15,      2,      0,    253,    244,    217,    164,     94,
+        38,     10,      1,      0,    253,    245,    226,    189,
+       132,     71,     27,      7,      1,      0,    253,    246,
+       231,    203,    159,    105,     56,     23,      6,      1,
+         0,    255,    248,    235,    213,    179,    133,     85,
+        47,     19,      5,      1,      0,    255,    254,    243,
+       221,    194,    159,    117,     70,     37,     12,      2,
+         1,      0,    255,    254,    248,    234,    208,    171,
+       128,     85,     48,     22,      8,      2,      1,      0,
+       255,    254,    250,    240,    220,    189,    149,    107,
+        67,     36,     16,      6,      2,      1,      0,    255,
+       254,    251,    243,    227,    201,    166,    128,     90,
+        55,     29,     13,      5,      2,      1,      0,    255,
+       254,    252,    246,    234,    213,    183,    147,    109,
+        73,     43,     22,     10,      4,      2,      1,      0
+};
+
+const opus_uint8 silk_shell_code_table3[ 152 ] = {
+       130,      0,    200,     58,      0,    231,    130,     26,
+         0,    244,    184,     76,     12,      0,    249,    214,
+       130,     43,      6,      0,    252,    232,    173,     87,
+        24,      3,      0,    253,    241,    203,    131,     56,
+        14,      2,      0,    254,    246,    221,    167,     94,
+        35,      8,      1,      0,    254,    249,    232,    193,
+       130,     65,     23,      5,      1,      0,    255,    251,
+       239,    211,    162,     99,     45,     15,      4,      1,
+         0,    255,    251,    243,    223,    186,    131,     74,
+        33,     11,      3,      1,      0,    255,    252,    245,
+       230,    202,    158,    105,     57,     24,      8,      2,
+         1,      0,    255,    253,    247,    235,    214,    179,
+       132,     84,     44,     19,      7,      2,      1,      0,
+       255,    254,    250,    240,    223,    196,    159,    112,
+        69,     36,     15,      6,      2,      1,      0,    255,
+       254,    253,    245,    231,    209,    176,    136,     93,
+        55,     27,     11,      3,      2,      1,      0,    255,
+       254,    253,    252,    239,    221,    194,    158,    117,
+        76,     42,     18,      4,      3,      2,      1,      0
+};
+
+const opus_uint8 silk_shell_code_table_offsets[ 17 ] = {
+         0,      0,      2,      5,      9,     14,     20,     27,
+        35,     44,     54,     65,     77,     90,    104,    119,
+       135
+};
+
+const opus_uint8 silk_sign_iCDF[ 42 ] = {
+       254,     49,     67,     77,     82,     93,     99,
+       198,     11,     18,     24,     31,     36,     45,
+       255,     46,     66,     78,     87,     94,    104,
+       208,     14,     21,     32,     42,     51,     66,
+       255,     94,    104,    109,    112,    115,    118,
+       248,     53,     69,     80,     88,     95,    102
+};
diff --git a/src/main/jni/opus/silk/tuning_parameters.h b/src/main/jni/opus/silk/tuning_parameters.h
new file mode 100644
index 000000000..e1057bbaa
--- /dev/null
+++ b/src/main/jni/opus/silk/tuning_parameters.h
@@ -0,0 +1,171 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TUNING_PARAMETERS_H
+#define SILK_TUNING_PARAMETERS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Decay time for bitreservoir */
+#define BITRESERVOIR_DECAY_TIME_MS                      500
+
+/*******************/
+/* Pitch estimator */
+/*******************/
+
+/* Level of noise floor for whitening filter LPC analysis in pitch analysis */
+#define FIND_PITCH_WHITE_NOISE_FRACTION                 1e-3f
+
+/* Bandwidth expansion for whitening filter in pitch analysis */
+#define FIND_PITCH_BANDWIDTH_EXPANSION                  0.99f
+
+/*********************/
+/* Linear prediction */
+/*********************/
+
+/* LPC analysis regularization */
+#define FIND_LPC_COND_FAC                               1e-5f
+
+/* LTP analysis defines */
+#define FIND_LTP_COND_FAC                               1e-5f
+#define LTP_DAMPING                                     0.05f
+#define LTP_SMOOTHING                                   0.1f
+
+/* LTP quantization settings */
+#define MU_LTP_QUANT_NB                                 0.03f
+#define MU_LTP_QUANT_MB                                 0.025f
+#define MU_LTP_QUANT_WB                                 0.02f
+
+/* Max cumulative LTP gain */
+#define MAX_SUM_LOG_GAIN_DB								250.0f
+
+/***********************/
+/* High pass filtering */
+/***********************/
+
+/* Smoothing parameters for low end of pitch frequency range estimation */
+#define VARIABLE_HP_SMTH_COEF1                          0.1f
+#define VARIABLE_HP_SMTH_COEF2                          0.015f
+#define VARIABLE_HP_MAX_DELTA_FREQ                      0.4f
+
+/* Min and max cut-off frequency values (-3 dB points) */
+#define VARIABLE_HP_MIN_CUTOFF_HZ                       60
+#define VARIABLE_HP_MAX_CUTOFF_HZ                       100
+
+/***********/
+/* Various */
+/***********/
+
+/* VAD threshold */
+#define SPEECH_ACTIVITY_DTX_THRES                       0.05f
+
+/* Speech Activity LBRR enable threshold */
+#define LBRR_SPEECH_ACTIVITY_THRES                      0.3f
+
+/*************************/
+/* Perceptual parameters */
+/*************************/
+
+/* reduction in coding SNR during low speech activity */
+#define BG_SNR_DECR_dB                                  2.0f
+
+/* factor for reducing quantization noise during voiced speech */
+#define HARM_SNR_INCR_dB                                2.0f
+
+/* factor for reducing quantization noise for unvoiced sparse signals */
+#define SPARSE_SNR_INCR_dB                              2.0f
+
+/* threshold for sparseness measure above which to use lower quantization offset during unvoiced */
+#define SPARSENESS_THRESHOLD_QNT_OFFSET                 0.75f
+
+/* warping control */
+#define WARPING_MULTIPLIER                              0.015f
+
+/* fraction added to first autocorrelation value */
+#define SHAPE_WHITE_NOISE_FRACTION                      5e-5f
+
+/* noise shaping filter chirp factor */
+#define BANDWIDTH_EXPANSION                             0.95f
+
+/* difference between chirp factors for analysis and synthesis noise shaping filters at low bitrates */
+#define LOW_RATE_BANDWIDTH_EXPANSION_DELTA              0.01f
+
+/* extra harmonic boosting (signal shaping) at low bitrates */
+#define LOW_RATE_HARMONIC_BOOST                         0.1f
+
+/* extra harmonic boosting (signal shaping) for noisy input signals */
+#define LOW_INPUT_QUALITY_HARMONIC_BOOST                0.1f
+
+/* harmonic noise shaping */
+#define HARMONIC_SHAPING                                0.3f
+
+/* extra harmonic noise shaping for high bitrates or noisy input */
+#define HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING       0.2f
+
+/* parameter for shaping noise towards higher frequencies */
+#define HP_NOISE_COEF                                   0.25f
+
+/* parameter for shaping noise even more towards higher frequencies during voiced speech */
+#define HARM_HP_NOISE_COEF                              0.35f
+
+/* parameter for applying a high-pass tilt to the input signal */
+#define INPUT_TILT                                      0.05f
+
+/* parameter for extra high-pass tilt to the input signal at high rates */
+#define HIGH_RATE_INPUT_TILT                            0.1f
+
+/* parameter for reducing noise at the very low frequencies */
+#define LOW_FREQ_SHAPING                                4.0f
+
+/* less reduction of noise at the very low frequencies for signals with low SNR at low frequencies */
+#define LOW_QUALITY_LOW_FREQ_SHAPING_DECR               0.5f
+
+/* subframe smoothing coefficient for HarmBoost, HarmShapeGain, Tilt (lower -> more smoothing) */
+#define SUBFR_SMTH_COEF                                 0.4f
+
+/* parameters defining the R/D tradeoff in the residual quantizer */
+#define LAMBDA_OFFSET                                   1.2f
+#define LAMBDA_SPEECH_ACT                               -0.2f
+#define LAMBDA_DELAYED_DECISIONS                        -0.05f
+#define LAMBDA_INPUT_QUALITY                            -0.1f
+#define LAMBDA_CODING_QUALITY                           -0.2f
+#define LAMBDA_QUANT_OFFSET                             0.8f
+
+/* Compensation in bitrate calculations for 10 ms modes */
+#define REDUCE_BITRATE_10_MS_BPS                        2200
+
+/* Maximum time before allowing a bandwidth transition */
+#define MAX_BANDWIDTH_SWITCH_DELAY_MS                   5000
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SILK_TUNING_PARAMETERS_H */
diff --git a/src/main/jni/opus/silk/typedef.h b/src/main/jni/opus/silk/typedef.h
new file mode 100644
index 000000000..97b7e709b
--- /dev/null
+++ b/src/main/jni/opus/silk/typedef.h
@@ -0,0 +1,78 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TYPEDEF_H
+#define SILK_TYPEDEF_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#ifndef FIXED_POINT
+# include <float.h>
+# define silk_float      float
+# define silk_float_MAX  FLT_MAX
+#endif
+
+#define silk_int64_MAX   ((opus_int64)0x7FFFFFFFFFFFFFFFLL)   /*  2^63 - 1 */
+#define silk_int64_MIN   ((opus_int64)0x8000000000000000LL)   /* -2^63 */
+#define silk_int32_MAX   0x7FFFFFFF                           /*  2^31 - 1 =  2147483647 */
+#define silk_int32_MIN   ((opus_int32)0x80000000)             /* -2^31     = -2147483648 */
+#define silk_int16_MAX   0x7FFF                               /*  2^15 - 1 =  32767 */
+#define silk_int16_MIN   ((opus_int16)0x8000)                 /* -2^15     = -32768 */
+#define silk_int8_MAX    0x7F                                 /*  2^7 - 1  =  127 */
+#define silk_int8_MIN    ((opus_int8)0x80)                    /* -2^7      = -128 */
+#define silk_uint8_MAX   0xFF                                 /*  2^8 - 1 = 255 */
+
+#define silk_TRUE        1
+#define silk_FALSE       0
+
+/* assertions */
+#if (defined _WIN32 && !defined _WINCE && !defined(__GNUC__) && !defined(NO_ASSERTS))
+# ifndef silk_assert
+#  include <crtdbg.h>      /* ASSERTE() */
+#  define silk_assert(COND)   _ASSERTE(COND)
+# endif
+#else
+# ifdef ENABLE_ASSERTIONS
+#  include <stdio.h>
+#  include <stdlib.h>
+#define silk_fatal(str) _silk_fatal(str, __FILE__, __LINE__);
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line)
+{
+   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
+   abort();
+}
+#  define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}}
+# else
+#  define silk_assert(COND)
+# endif
+#endif
+
+#endif /* SILK_TYPEDEF_H */
diff --git a/src/main/jni/opus/src/analysis.c b/src/main/jni/opus/src/analysis.c
new file mode 100644
index 000000000..778a62aab
--- /dev/null
+++ b/src/main/jni/opus/src/analysis.c
@@ -0,0 +1,645 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "kiss_fft.h"
+#include "celt.h"
+#include "modes.h"
+#include "arch.h"
+#include "quant_bands.h"
+#include <stdio.h>
+#include "analysis.h"
+#include "mlp.h"
+#include "stack_alloc.h"
+
+extern const MLP net;
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+static const float dct_table[128] = {
+        0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
+        0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
+        0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
+       -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
+        0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
+       -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
+        0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
+        0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
+        0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
+        0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
+        0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
+       -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
+        0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
+       -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
+        0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
+        0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
+};
+
+static const float analysis_window[240] = {
+      0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
+      0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
+      0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
+      0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
+      0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
+      0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
+      0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
+      0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
+      0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
+      0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
+      0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
+      0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
+      0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
+      0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
+      0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
+      0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
+      0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
+      0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
+      0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
+      0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
+      0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
+      0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
+      0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
+      0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
+      0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
+      0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
+      0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
+      0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
+      0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
+      0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
+};
+
+static const int tbands[NB_TBANDS+1] = {
+       2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
+};
+
+static const int extra_bands[NB_TOT_BANDS+1] = {
+      1, 2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
+};
+
+/*static const float tweight[NB_TBANDS+1] = {
+      .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
+};*/
+
+#define NB_TONAL_SKIP_BANDS 9
+
+#define cA 0.43157974f
+#define cB 0.67848403f
+#define cC 0.08595542f
+#define cE ((float)M_PI/2)
+static OPUS_INLINE float fast_atan2f(float y, float x) {
+   float x2, y2;
+   /* Should avoid underflow on the values we'll get */
+   if (ABS16(x)+ABS16(y)<1e-9f)
+   {
+      x*=1e12f;
+      y*=1e12f;
+   }
+   x2 = x*x;
+   y2 = y*y;
+   if(x2<y2){
+      float den = (y2 + cB*x2) * (y2 + cC*x2);
+      if (den!=0)
+         return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
+      else
+         return (y<0 ? -cE : cE);
+   }else{
+      float den = (x2 + cB*y2) * (x2 + cC*y2);
+      if (den!=0)
+         return  x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
+      else
+         return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
+   }
+}
+
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
+{
+   int pos;
+   int curr_lookahead;
+   float psum;
+   int i;
+
+   pos = tonal->read_pos;
+   curr_lookahead = tonal->write_pos-tonal->read_pos;
+   if (curr_lookahead<0)
+      curr_lookahead += DETECT_SIZE;
+
+   if (len > 480 && pos != tonal->write_pos)
+   {
+      pos++;
+      if (pos==DETECT_SIZE)
+         pos=0;
+   }
+   if (pos == tonal->write_pos)
+      pos--;
+   if (pos<0)
+      pos = DETECT_SIZE-1;
+   OPUS_COPY(info_out, &tonal->info[pos], 1);
+   tonal->read_subframe += len/120;
+   while (tonal->read_subframe>=4)
+   {
+      tonal->read_subframe -= 4;
+      tonal->read_pos++;
+   }
+   if (tonal->read_pos>=DETECT_SIZE)
+      tonal->read_pos-=DETECT_SIZE;
+
+   /* Compensate for the delay in the features themselves.
+      FIXME: Need a better estimate the 10 I just made up */
+   curr_lookahead = IMAX(curr_lookahead-10, 0);
+
+   psum=0;
+   /* Summing the probability of transition patterns that involve music at
+      time (DETECT_SIZE-curr_lookahead-1) */
+   for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
+      psum += tonal->pmusic[i];
+   for (;i<DETECT_SIZE;i++)
+      psum += tonal->pspeech[i];
+   psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
+   /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/
+
+   info_out->music_prob = psum;
+}
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
+{
+    int i, b;
+    const kiss_fft_state *kfft;
+    VARDECL(kiss_fft_cpx, in);
+    VARDECL(kiss_fft_cpx, out);
+    int N = 480, N2=240;
+    float * OPUS_RESTRICT A = tonal->angle;
+    float * OPUS_RESTRICT dA = tonal->d_angle;
+    float * OPUS_RESTRICT d2A = tonal->d2_angle;
+    VARDECL(float, tonality);
+    VARDECL(float, noisiness);
+    float band_tonality[NB_TBANDS];
+    float logE[NB_TBANDS];
+    float BFCC[8];
+    float features[25];
+    float frame_tonality;
+    float max_frame_tonality;
+    /*float tw_sum=0;*/
+    float frame_noisiness;
+    const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI);
+    float slope=0;
+    float frame_stationarity;
+    float relativeE;
+    float frame_probs[2];
+    float alpha, alphaE, alphaE2;
+    float frame_loudness;
+    float bandwidth_mask;
+    int bandwidth=0;
+    float maxE = 0;
+    float noise_floor;
+    int remaining;
+    AnalysisInfo *info;
+    SAVE_STACK;
+
+    tonal->last_transition++;
+    alpha = 1.f/IMIN(20, 1+tonal->count);
+    alphaE = 1.f/IMIN(50, 1+tonal->count);
+    alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
+
+    if (tonal->count<4)
+       tonal->music_prob = .5;
+    kfft = celt_mode->mdct.kfft[0];
+    if (tonal->count==0)
+       tonal->mem_fill = 240;
+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
+    if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
+    {
+       tonal->mem_fill += len;
+       /* Don't have enough to update the analysis */
+       RESTORE_STACK;
+       return;
+    }
+    info = &tonal->info[tonal->write_pos++];
+    if (tonal->write_pos>=DETECT_SIZE)
+       tonal->write_pos-=DETECT_SIZE;
+
+    ALLOC(in, 480, kiss_fft_cpx);
+    ALLOC(out, 480, kiss_fft_cpx);
+    ALLOC(tonality, 240, float);
+    ALLOC(noisiness, 240, float);
+    for (i=0;i<N2;i++)
+    {
+       float w = analysis_window[i];
+       in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
+       in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
+       in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
+       in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
+    }
+    OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
+    remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
+    tonal->mem_fill = 240 + remaining;
+    opus_fft(kfft, in, out);
+
+    for (i=1;i<N2;i++)
+    {
+       float X1r, X2r, X1i, X2i;
+       float angle, d_angle, d2_angle;
+       float angle2, d_angle2, d2_angle2;
+       float mod1, mod2, avg_mod;
+       X1r = (float)out[i].r+out[N-i].r;
+       X1i = (float)out[i].i-out[N-i].i;
+       X2r = (float)out[i].i+out[N-i].i;
+       X2i = (float)out[N-i].r-out[i].r;
+
+       angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
+       d_angle = angle - A[i];
+       d2_angle = d_angle - dA[i];
+
+       angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
+       d_angle2 = angle2 - angle;
+       d2_angle2 = d_angle2 - d_angle;
+
+       mod1 = d2_angle - (float)floor(.5+d2_angle);
+       noisiness[i] = ABS16(mod1);
+       mod1 *= mod1;
+       mod1 *= mod1;
+
+       mod2 = d2_angle2 - (float)floor(.5+d2_angle2);
+       noisiness[i] += ABS16(mod2);
+       mod2 *= mod2;
+       mod2 *= mod2;
+
+       avg_mod = .25f*(d2A[i]+2.f*mod1+mod2);
+       tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f;
+
+       A[i] = angle2;
+       dA[i] = d_angle2;
+       d2A[i] = mod2;
+    }
+
+    frame_tonality = 0;
+    max_frame_tonality = 0;
+    /*tw_sum = 0;*/
+    info->activity = 0;
+    frame_noisiness = 0;
+    frame_stationarity = 0;
+    if (!tonal->count)
+    {
+       for (b=0;b<NB_TBANDS;b++)
+       {
+          tonal->lowE[b] = 1e10;
+          tonal->highE[b] = -1e10;
+       }
+    }
+    relativeE = 0;
+    frame_loudness = 0;
+    for (b=0;b<NB_TBANDS;b++)
+    {
+       float E=0, tE=0, nE=0;
+       float L1, L2;
+       float stationarity;
+       for (i=tbands[b];i<tbands[b+1];i++)
+       {
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+#ifdef FIXED_POINT
+          /* FIXME: It's probably best to change the BFCC filter initial state instead */
+          binE *= 5.55e-17f;
+#endif
+          E += binE;
+          tE += binE*tonality[i];
+          nE += binE*2.f*(.5f-noisiness[i]);
+       }
+       tonal->E[tonal->E_count][b] = E;
+       frame_noisiness += nE/(1e-15f+E);
+
+       frame_loudness += (float)sqrt(E+1e-10f);
+       logE[b] = (float)log(E+1e-10f);
+       tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
+       tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
+       if (tonal->highE[b] < tonal->lowE[b]+1.f)
+       {
+          tonal->highE[b]+=.5f;
+          tonal->lowE[b]-=.5f;
+       }
+       relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);
+
+       L1=L2=0;
+       for (i=0;i<NB_FRAMES;i++)
+       {
+          L1 += (float)sqrt(tonal->E[i][b]);
+          L2 += tonal->E[i][b];
+       }
+
+       stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
+       stationarity *= stationarity;
+       stationarity *= stationarity;
+       frame_stationarity += stationarity;
+       /*band_tonality[b] = tE/(1e-15+E)*/;
+       band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
+#if 0
+       if (b>=NB_TONAL_SKIP_BANDS)
+       {
+          frame_tonality += tweight[b]*band_tonality[b];
+          tw_sum += tweight[b];
+       }
+#else
+       frame_tonality += band_tonality[b];
+       if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
+          frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
+#endif
+       max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality);
+       slope += band_tonality[b]*(b-8);
+       /*printf("%f %f ", band_tonality[b], stationarity);*/
+       tonal->prev_band_tonality[b] = band_tonality[b];
+    }
+
+    bandwidth_mask = 0;
+    bandwidth = 0;
+    maxE = 0;
+    noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+#ifdef FIXED_POINT
+    noise_floor *= 1<<(15+SIG_SHIFT);
+#endif
+    noise_floor *= noise_floor;
+    for (b=0;b<NB_TOT_BANDS;b++)
+    {
+       float E=0;
+       int band_start, band_end;
+       /* Keep a margin of 300 Hz for aliasing */
+       band_start = extra_bands[b];
+       band_end = extra_bands[b+1];
+       for (i=band_start;i<band_end;i++)
+       {
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+          E += binE;
+       }
+       maxE = MAX32(maxE, E);
+       tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
+       E = MAX32(E, tonal->meanE[b]);
+       /* Use a simple follower with 13 dB/Bark slope for spreading function */
+       bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
+       /* Consider the band "active" only if all these conditions are met:
+          1) less than 10 dB below the simple follower
+          2) less than 90 dB below the peak band (maximal masking possible considering
+             both the ATH and the loudness-dependent slope of the spreading function)
+          3) above the PCM quantization noise floor
+       */
+       if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
+          bandwidth = b;
+    }
+    if (tonal->count<=2)
+       bandwidth = 20;
+    frame_loudness = 20*(float)log10(frame_loudness);
+    tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
+    tonal->lowECount *= (1-alphaE);
+    if (frame_loudness < tonal->Etracker-30)
+       tonal->lowECount += alphaE;
+
+    for (i=0;i<8;i++)
+    {
+       float sum=0;
+       for (b=0;b<16;b++)
+          sum += dct_table[i*16+b]*logE[b];
+       BFCC[i] = sum;
+    }
+
+    frame_stationarity /= NB_TBANDS;
+    relativeE /= NB_TBANDS;
+    if (tonal->count<10)
+       relativeE = .5;
+    frame_noisiness /= NB_TBANDS;
+#if 1
+    info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
+#else
+    info->activity = .5*(1+frame_noisiness-frame_stationarity);
+#endif
+    frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
+    frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
+    tonal->prev_tonality = frame_tonality;
+
+    slope /= 8*8;
+    info->tonality_slope = slope;
+
+    tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
+    tonal->count++;
+    info->tonality = frame_tonality;
+
+    for (i=0;i<4;i++)
+       features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i];
+
+    for (i=0;i<4;i++)
+       tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
+
+    for (i=0;i<4;i++)
+        features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]);
+    for (i=0;i<3;i++)
+        features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
+
+    if (tonal->count > 5)
+    {
+       for (i=0;i<9;i++)
+          tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
+    }
+
+    for (i=0;i<8;i++)
+    {
+       tonal->mem[i+24] = tonal->mem[i+16];
+       tonal->mem[i+16] = tonal->mem[i+8];
+       tonal->mem[i+8] = tonal->mem[i];
+       tonal->mem[i] = BFCC[i];
+    }
+    for (i=0;i<9;i++)
+       features[11+i] = (float)sqrt(tonal->std[i]);
+    features[20] = info->tonality;
+    features[21] = info->activity;
+    features[22] = frame_stationarity;
+    features[23] = info->tonality_slope;
+    features[24] = tonal->lowECount;
+
+#ifndef DISABLE_FLOAT_API
+    mlp_process(&net, features, frame_probs);
+    frame_probs[0] = .5f*(frame_probs[0]+1);
+    /* Curve fitting between the MLP probability and the actual probability */
+    frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
+    /* Probability of active audio (as opposed to silence) */
+    frame_probs[1] = .5f*frame_probs[1]+.5f;
+    /* Consider that silence has a 50-50 probability. */
+    frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
+
+    /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
+    {
+       /* Probability of state transition */
+       float tau;
+       /* Represents independence of the MLP probabilities, where
+          beta=1 means fully independent. */
+       float beta;
+       /* Denormalized probability of speech (p0) and music (p1) after update */
+       float p0, p1;
+       /* Probabilities for "all speech" and "all music" */
+       float s0, m0;
+       /* Probability sum for renormalisation */
+       float psum;
+       /* Instantaneous probability of speech and music, with beta pre-applied. */
+       float speech0;
+       float music0;
+
+       /* One transition every 3 minutes of active audio */
+       tau = .00005f*frame_probs[1];
+       beta = .05f;
+       if (1) {
+          /* Adapt beta based on how "unexpected" the new prob is */
+          float p, q;
+          p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
+          q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
+          beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
+       }
+       /* p0 and p1 are the probabilities of speech and music at this frame
+          using only information from previous frame and applying the
+          state transition model */
+       p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
+       p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
+       /* We apply the current probability with exponent beta to work around
+          the fact that the probability estimates aren't independent. */
+       p0 *= (float)pow(1-frame_probs[0], beta);
+       p1 *= (float)pow(frame_probs[0], beta);
+       /* Normalise the probabilities to get the Marokv probability of music. */
+       tonal->music_prob = p1/(p0+p1);
+       info->music_prob = tonal->music_prob;
+
+       /* This chunk of code deals with delayed decision. */
+       psum=1e-20f;
+       /* Instantaneous probability of speech and music, with beta pre-applied. */
+       speech0 = (float)pow(1-frame_probs[0], beta);
+       music0  = (float)pow(frame_probs[0], beta);
+       if (tonal->count==1)
+       {
+          tonal->pspeech[0]=.5;
+          tonal->pmusic [0]=.5;
+       }
+       /* Updated probability of having only speech (s0) or only music (m0),
+          before considering the new observation. */
+       s0 = tonal->pspeech[0] + tonal->pspeech[1];
+       m0 = tonal->pmusic [0] + tonal->pmusic [1];
+       /* Updates s0 and m0 with instantaneous probability. */
+       tonal->pspeech[0] = s0*(1-tau)*speech0;
+       tonal->pmusic [0] = m0*(1-tau)*music0;
+       /* Propagate the transition probabilities */
+       for (i=1;i<DETECT_SIZE-1;i++)
+       {
+          tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
+          tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
+       }
+       /* Probability that the latest frame is speech, when all the previous ones were music. */
+       tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
+       /* Probability that the latest frame is music, when all the previous ones were speech. */
+       tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
+
+       /* Renormalise probabilities to 1 */
+       for (i=0;i<DETECT_SIZE;i++)
+          psum += tonal->pspeech[i] + tonal->pmusic[i];
+       psum = 1.f/psum;
+       for (i=0;i<DETECT_SIZE;i++)
+       {
+          tonal->pspeech[i] *= psum;
+          tonal->pmusic [i] *= psum;
+       }
+       psum = tonal->pmusic[0];
+       for (i=1;i<DETECT_SIZE;i++)
+          psum += tonal->pspeech[i];
+
+       /* Estimate our confidence in the speech/music decisions */
+       if (frame_probs[1]>.75)
+       {
+          if (tonal->music_prob>.9)
+          {
+             float adapt;
+             adapt = 1.f/(++tonal->music_confidence_count);
+             tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
+             tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
+          }
+          if (tonal->music_prob<.1)
+          {
+             float adapt;
+             adapt = 1.f/(++tonal->speech_confidence_count);
+             tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
+             tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
+          }
+       } else {
+          if (tonal->music_confidence_count==0)
+             tonal->music_confidence = .9f;
+          if (tonal->speech_confidence_count==0)
+             tonal->speech_confidence = .1f;
+       }
+    }
+    if (tonal->last_music != (tonal->music_prob>.5f))
+       tonal->last_transition=0;
+    tonal->last_music = tonal->music_prob>.5f;
+#else
+    info->music_prob = 0;
+#endif
+    /*for (i=0;i<25;i++)
+       printf("%f ", features[i]);
+    printf("\n");*/
+
+    info->bandwidth = bandwidth;
+    /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
+    info->noisiness = frame_noisiness;
+    info->valid = 1;
+    if (info_out!=NULL)
+       OPUS_COPY(info_out, info, 1);
+    RESTORE_STACK;
+}
+
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+{
+   int offset;
+   int pcm_len;
+
+   if (analysis_pcm != NULL)
+   {
+      /* Avoid overflow/wrap-around of the analysis buffer */
+      analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
+
+      pcm_len = analysis_frame_size - analysis->analysis_offset;
+      offset = analysis->analysis_offset;
+      do {
+         tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         offset += 480;
+         pcm_len -= 480;
+      } while (pcm_len>0);
+      analysis->analysis_offset = analysis_frame_size;
+
+      analysis->analysis_offset -= frame_size;
+   }
+
+   analysis_info->valid = 0;
+   tonality_get_info(analysis, analysis_info, frame_size);
+}
diff --git a/src/main/jni/opus/src/analysis.h b/src/main/jni/opus/src/analysis.h
new file mode 100644
index 000000000..be0388faa
--- /dev/null
+++ b/src/main/jni/opus/src/analysis.h
@@ -0,0 +1,90 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ANALYSIS_H
+#define ANALYSIS_H
+
+#include "celt.h"
+#include "opus_private.h"
+
+#define NB_FRAMES 8
+#define NB_TBANDS 18
+#define NB_TOT_BANDS 21
+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
+
+#define DETECT_SIZE 200
+
+typedef struct {
+   float angle[240];
+   float d_angle[240];
+   float d2_angle[240];
+   opus_val32 inmem[ANALYSIS_BUF_SIZE];
+   int   mem_fill;                      /* number of usable samples in the buffer */
+   float prev_band_tonality[NB_TBANDS];
+   float prev_tonality;
+   float E[NB_FRAMES][NB_TBANDS];
+   float lowE[NB_TBANDS];
+   float highE[NB_TBANDS];
+   float meanE[NB_TOT_BANDS];
+   float mem[32];
+   float cmean[8];
+   float std[9];
+   float music_prob;
+   float Etracker;
+   float lowECount;
+   int E_count;
+   int last_music;
+   int last_transition;
+   int count;
+   float subframe_mem[3];
+   int analysis_offset;
+   /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
+       pspeech[0] is the probability that all frames in the window are speech. */
+   float pspeech[DETECT_SIZE];
+   /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
+       pmusic[0] is the probability that all frames in the window are music. */
+   float pmusic[DETECT_SIZE];
+   float speech_confidence;
+   float music_confidence;
+   int speech_confidence_count;
+   int music_confidence_count;
+   int write_pos;
+   int read_pos;
+   int read_subframe;
+   AnalysisInfo info[DETECT_SIZE];
+} TonalityAnalysisState;
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
+     const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);
+
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
+
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
+
+#endif
diff --git a/src/main/jni/opus/src/mlp.c b/src/main/jni/opus/src/mlp.c
new file mode 100644
index 000000000..463860266
--- /dev/null
+++ b/src/main/jni/opus/src/mlp.c
@@ -0,0 +1,140 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#include <math.h>
+#include "mlp.h"
+#include "arch.h"
+#include "tansig_table.h"
+#define MAX_NEURONS 100
+
+#if 0
+static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
+{
+	int i;
+	opus_val16 xx; /* Q11 */
+	/*double x, y;*/
+	opus_val16 dy, yy; /* Q14 */
+	/*x = 1.9073e-06*_x;*/
+	if (_x>=QCONST32(8,19))
+		return QCONST32(1.,14);
+	if (_x<=-QCONST32(8,19))
+		return -QCONST32(1.,14);
+	xx = EXTRACT16(SHR32(_x, 8));
+	/*i = lrint(25*x);*/
+	i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
+	/*x -= .04*i;*/
+	xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
+	/*x = xx*(1./2048);*/
+	/*y = tansig_table[250+i];*/
+	yy = tansig_table[250+i];
+	/*y = yy*(1./16384);*/
+	dy = 16384-MULT16_16_Q14(yy,yy);
+	yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
+	return yy;
+}
+#else
+/*extern const float tansig_table[501];*/
+static OPUS_INLINE float tansig_approx(float x)
+{
+	int i;
+	float y, dy;
+	float sign=1;
+	/* Tests are reversed to catch NaNs */
+    if (!(x<8))
+        return 1;
+    if (!(x>-8))
+        return -1;
+	if (x<0)
+	{
+	   x=-x;
+	   sign=-1;
+	}
+	i = (int)floor(.5f+25*x);
+	x -= .04f*i;
+	y = tansig_table[i];
+	dy = 1-y*y;
+	y = y + x*dy*(1 - y*x);
+	return sign*y;
+}
+#endif
+
+#if 0
+void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
+{
+	int j;
+	opus_val16 hidden[MAX_NEURONS];
+	const opus_val16 *W = m->weights;
+	/* Copy to tmp_in */
+	for (j=0;j<m->topo[1];j++)
+	{
+		int k;
+		opus_val32 sum = SHL32(EXTEND32(*W++),8);
+		for (k=0;k<m->topo[0];k++)
+			sum = MAC16_16(sum, in[k],*W++);
+		hidden[j] = tansig_approx(sum);
+	}
+	for (j=0;j<m->topo[2];j++)
+	{
+		int k;
+		opus_val32 sum = SHL32(EXTEND32(*W++),14);
+		for (k=0;k<m->topo[1];k++)
+			sum = MAC16_16(sum, hidden[k], *W++);
+		out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
+	}
+}
+#else
+void mlp_process(const MLP *m, const float *in, float *out)
+{
+    int j;
+    float hidden[MAX_NEURONS];
+    const float *W = m->weights;
+    /* Copy to tmp_in */
+    for (j=0;j<m->topo[1];j++)
+    {
+        int k;
+        float sum = *W++;
+        for (k=0;k<m->topo[0];k++)
+            sum = sum + in[k]**W++;
+        hidden[j] = tansig_approx(sum);
+    }
+    for (j=0;j<m->topo[2];j++)
+    {
+        int k;
+        float sum = *W++;
+        for (k=0;k<m->topo[1];k++)
+            sum = sum + hidden[k]**W++;
+        out[j] = tansig_approx(sum);
+    }
+}
+#endif
diff --git a/src/main/jni/opus/src/mlp.h b/src/main/jni/opus/src/mlp.h
new file mode 100644
index 000000000..86c8e0617
--- /dev/null
+++ b/src/main/jni/opus/src/mlp.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _MLP_H_
+#define _MLP_H_
+
+#include "arch.h"
+
+typedef struct {
+	int layers;
+	const int *topo;
+	const float *weights;
+} MLP;
+
+void mlp_process(const MLP *m, const float *in, float *out);
+
+#endif /* _MLP_H_ */
diff --git a/src/main/jni/opus/src/mlp_data.c b/src/main/jni/opus/src/mlp_data.c
new file mode 100644
index 000000000..401c4c025
--- /dev/null
+++ b/src/main/jni/opus/src/mlp_data.c
@@ -0,0 +1,105 @@
+/* The contents of this file was automatically generated by mlp_train.c
+   It contains multi-layer perceptron (MLP) weights. */
+
+#include "mlp.h"
+
+/* RMS error was 0.138320, seed was 1361535663 */
+
+static const float weights[422] = {
+
+/* hidden layer */
+-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f,
+-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f,
+-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f,
+0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f,
+0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f,
+24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f,
+-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f,
+-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f,
+-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f,
+1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f,
+15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f,
+0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f,
+-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f,
+0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f,
+0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f,
+-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f,
+-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f,
+-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f,
+0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f,
+-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f,
+2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f,
+0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f,
+-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f,
+0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f,
+0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f,
+-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f,
+5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f,
+-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f,
+-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f,
+-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f,
+1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f,
+-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f,
+-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f,
+0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f,
+0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f,
+-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f,
+10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f,
+-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f,
+-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f,
+-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f,
+0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f,
+-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f,
+0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f,
+0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f,
+-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f,
+0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f,
+-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f,
+-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f,
+-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f,
+-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f,
+-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f,
+5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f,
+1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f,
+0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f,
+-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f,
+0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f,
+-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f,
+-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f,
+0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f,
+-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f,
+-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f,
+0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f,
+-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f,
+0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f,
+-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f,
+-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f,
+0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f,
+-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f,
+0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f,
+-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f,
+0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f,
+-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f,
+4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f,
+0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f,
+-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f,
+0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f,
+0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f,
+3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f,
+
+/* output layer */
+-0.381439f, 0.12115f, -0.906927f, 2.93878f, 1.6388f,
+0.882811f, 0.874344f, 1.21726f, -0.874545f, 0.321706f,
+0.785055f, 0.946558f, -0.575066f, -3.46553f, 0.884905f,
+0.0924047f, -9.90712f, 0.391338f, 0.160103f, -2.04954f,
+4.1455f, 0.0684029f, -0.144761f, -0.285282f, 0.379244f,
+-1.1584f, -0.0277241f, -9.85f, -4.82386f, 3.71333f,
+3.87308f, 3.52558f};
+
+static const int topo[3] = {25, 15, 2};
+
+const MLP net = {
+    3,
+    topo,
+    weights
+};
diff --git a/src/main/jni/opus/src/opus.c b/src/main/jni/opus/src/opus.c
new file mode 100644
index 000000000..30890b9cb
--- /dev/null
+++ b/src/main/jni/opus/src/opus.c
@@ -0,0 +1,329 @@
+/* Copyright (c) 2011 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus.h"
+#include "opus_private.h"
+
+#ifndef DISABLE_FLOAT_API
+OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem)
+{
+   int c;
+   int i;
+   float *x;
+
+   if (C<1 || N<1 || !_x || !declip_mem) return;
+
+   /* First thing: saturate everything to +/- 2 which is the highest level our
+      non-linearity can handle. At the point where the signal reaches +/-2,
+      the derivative will be zero anyway, so this doesn't introduce any
+      discontinuity in the derivative. */
+   for (i=0;i<N*C;i++)
+      _x[i] = MAX16(-2.f, MIN16(2.f, _x[i]));
+   for (c=0;c<C;c++)
+   {
+      float a;
+      float x0;
+      int curr;
+
+      x = _x+c;
+      a = declip_mem[c];
+      /* Continue applying the non-linearity from the previous frame to avoid
+         any discontinuity. */
+      for (i=0;i<N;i++)
+      {
+         if (x[i*C]*a>=0)
+            break;
+         x[i*C] = x[i*C]+a*x[i*C]*x[i*C];
+      }
+
+      curr=0;
+      x0 = x[0];
+      while(1)
+      {
+         int start, end;
+         float maxval;
+         int special=0;
+         int peak_pos;
+         for (i=curr;i<N;i++)
+         {
+            if (x[i*C]>1 || x[i*C]<-1)
+               break;
+         }
+         if (i==N)
+         {
+            a=0;
+            break;
+         }
+         peak_pos = i;
+         start=end=i;
+         maxval=ABS16(x[i*C]);
+         /* Look for first zero crossing before clipping */
+         while (start>0 && x[i*C]*x[(start-1)*C]>=0)
+            start--;
+         /* Look for first zero crossing after clipping */
+         while (end<N && x[i*C]*x[end*C]>=0)
+         {
+            /* Look for other peaks until the next zero-crossing. */
+            if (ABS16(x[end*C])>maxval)
+            {
+               maxval = ABS16(x[end*C]);
+               peak_pos = end;
+            }
+            end++;
+         }
+         /* Detect the special case where we clip before the first zero crossing */
+         special = (start==0 && x[i*C]*x[0]>=0);
+
+         /* Compute a such that maxval + a*maxval^2 = 1 */
+         a=(maxval-1)/(maxval*maxval);
+         if (x[i*C]>0)
+            a = -a;
+         /* Apply soft clipping */
+         for (i=start;i<end;i++)
+            x[i*C] = x[i*C]+a*x[i*C]*x[i*C];
+
+         if (special && peak_pos>=2)
+         {
+            /* Add a linear ramp from the first sample to the signal peak.
+               This avoids a discontinuity at the beginning of the frame. */
+            float delta;
+            float offset = x0-x[0];
+            delta = offset / peak_pos;
+            for (i=curr;i<peak_pos;i++)
+            {
+               offset -= delta;
+               x[i*C] += offset;
+               x[i*C] = MAX16(-1.f, MIN16(1.f, x[i*C]));
+            }
+         }
+         curr = end;
+         if (curr==N)
+            break;
+      }
+      declip_mem[c] = a;
+   }
+}
+#endif
+
+int encode_size(int size, unsigned char *data)
+{
+   if (size < 252)
+   {
+      data[0] = size;
+      return 1;
+   } else {
+      data[0] = 252+(size&0x3);
+      data[1] = (size-(int)data[0])>>2;
+      return 2;
+   }
+}
+
+static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size)
+{
+   if (len<1)
+   {
+      *size = -1;
+      return -1;
+   } else if (data[0]<252)
+   {
+      *size = data[0];
+      return 1;
+   } else if (len<2)
+   {
+      *size = -1;
+      return -1;
+   } else {
+      *size = 4*data[1] + data[0];
+      return 2;
+   }
+}
+
+int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
+      int self_delimited, unsigned char *out_toc,
+      const unsigned char *frames[48], opus_int16 size[48],
+      int *payload_offset, opus_int32 *packet_offset)
+{
+   int i, bytes;
+   int count;
+   int cbr;
+   unsigned char ch, toc;
+   int framesize;
+   opus_int32 last_size;
+   opus_int32 pad = 0;
+   const unsigned char *data0 = data;
+
+   if (size==NULL)
+      return OPUS_BAD_ARG;
+
+   framesize = opus_packet_get_samples_per_frame(data, 48000);
+
+   cbr = 0;
+   toc = *data++;
+   len--;
+   last_size = len;
+   switch (toc&0x3)
+   {
+   /* One frame */
+   case 0:
+      count=1;
+      break;
+   /* Two CBR frames */
+   case 1:
+      count=2;
+      cbr = 1;
+      if (!self_delimited)
+      {
+         if (len&0x1)
+            return OPUS_INVALID_PACKET;
+         last_size = len/2;
+         /* If last_size doesn't fit in size[0], we'll catch it later */
+         size[0] = (opus_int16)last_size;
+      }
+      break;
+   /* Two VBR frames */
+   case 2:
+      count = 2;
+      bytes = parse_size(data, len, size);
+      len -= bytes;
+      if (size[0]<0 || size[0] > len)
+         return OPUS_INVALID_PACKET;
+      data += bytes;
+      last_size = len-size[0];
+      break;
+   /* Multiple CBR/VBR frames (from 0 to 120 ms) */
+   default: /*case 3:*/
+      if (len<1)
+         return OPUS_INVALID_PACKET;
+      /* Number of frames encoded in bits 0 to 5 */
+      ch = *data++;
+      count = ch&0x3F;
+      if (count <= 0 || framesize*count > 5760)
+         return OPUS_INVALID_PACKET;
+      len--;
+      /* Padding flag is bit 6 */
+      if (ch&0x40)
+      {
+         int p;
+         do {
+            int tmp;
+            if (len<=0)
+               return OPUS_INVALID_PACKET;
+            p = *data++;
+            len--;
+            tmp = p==255 ? 254: p;
+            len -= tmp;
+            pad += tmp;
+         } while (p==255);
+      }
+      if (len<0)
+         return OPUS_INVALID_PACKET;
+      /* VBR flag is bit 7 */
+      cbr = !(ch&0x80);
+      if (!cbr)
+      {
+         /* VBR case */
+         last_size = len;
+         for (i=0;i<count-1;i++)
+         {
+            bytes = parse_size(data, len, size+i);
+            len -= bytes;
+            if (size[i]<0 || size[i] > len)
+               return OPUS_INVALID_PACKET;
+            data += bytes;
+            last_size -= bytes+size[i];
+         }
+         if (last_size<0)
+            return OPUS_INVALID_PACKET;
+      } else if (!self_delimited)
+      {
+         /* CBR case */
+         last_size = len/count;
+         if (last_size*count!=len)
+            return OPUS_INVALID_PACKET;
+         for (i=0;i<count-1;i++)
+            size[i] = (opus_int16)last_size;
+      }
+      break;
+   }
+   /* Self-delimited framing has an extra size for the last frame. */
+   if (self_delimited)
+   {
+      bytes = parse_size(data, len, size+count-1);
+      len -= bytes;
+      if (size[count-1]<0 || size[count-1] > len)
+         return OPUS_INVALID_PACKET;
+      data += bytes;
+      /* For CBR packets, apply the size to all the frames. */
+      if (cbr)
+      {
+         if (size[count-1]*count > len)
+            return OPUS_INVALID_PACKET;
+         for (i=0;i<count-1;i++)
+            size[i] = size[count-1];
+      } else if (bytes+size[count-1] > last_size)
+         return OPUS_INVALID_PACKET;
+   } else
+   {
+      /* Because it's not encoded explicitly, it's possible the size of the
+         last packet (or all the packets, for the CBR case) is larger than
+         1275. Reject them here.*/
+      if (last_size > 1275)
+         return OPUS_INVALID_PACKET;
+      size[count-1] = (opus_int16)last_size;
+   }
+
+   if (payload_offset)
+      *payload_offset = (int)(data-data0);
+
+   for (i=0;i<count;i++)
+   {
+      if (frames)
+         frames[i] = data;
+      data += size[i];
+   }
+
+   if (packet_offset)
+      *packet_offset = pad+(opus_int32)(data-data0);
+
+   if (out_toc)
+      *out_toc = toc;
+
+   return count;
+}
+
+int opus_packet_parse(const unsigned char *data, opus_int32 len,
+      unsigned char *out_toc, const unsigned char *frames[48],
+      opus_int16 size[48], int *payload_offset)
+{
+   return opus_packet_parse_impl(data, len, 0, out_toc,
+                                 frames, size, payload_offset, NULL);
+}
+
diff --git a/src/main/jni/opus/src/opus_compare.c b/src/main/jni/opus/src/opus_compare.c
new file mode 100644
index 000000000..06c67d752
--- /dev/null
+++ b/src/main/jni/opus/src/opus_compare.c
@@ -0,0 +1,379 @@
+/* Copyright (c) 2011-2012 Xiph.Org Foundation, Mozilla Corporation
+   Written by Jean-Marc Valin and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define OPUS_PI (3.14159265F)
+
+#define OPUS_COSF(_x)        ((float)cos(_x))
+#define OPUS_SINF(_x)        ((float)sin(_x))
+
+static void *check_alloc(void *_ptr){
+  if(_ptr==NULL){
+    fprintf(stderr,"Out of memory.\n");
+    exit(EXIT_FAILURE);
+  }
+  return _ptr;
+}
+
+static void *opus_malloc(size_t _size){
+  return check_alloc(malloc(_size));
+}
+
+static void *opus_realloc(void *_ptr,size_t _size){
+  return check_alloc(realloc(_ptr,_size));
+}
+
+static size_t read_pcm16(float **_samples,FILE *_fin,int _nchannels){
+  unsigned char  buf[1024];
+  float         *samples;
+  size_t         nsamples;
+  size_t         csamples;
+  size_t         xi;
+  size_t         nread;
+  samples=NULL;
+  nsamples=csamples=0;
+  for(;;){
+    nread=fread(buf,2*_nchannels,1024/(2*_nchannels),_fin);
+    if(nread<=0)break;
+    if(nsamples+nread>csamples){
+      do csamples=csamples<<1|1;
+      while(nsamples+nread>csamples);
+      samples=(float *)opus_realloc(samples,
+       _nchannels*csamples*sizeof(*samples));
+    }
+    for(xi=0;xi<nread;xi++){
+      int ci;
+      for(ci=0;ci<_nchannels;ci++){
+        int s;
+        s=buf[2*(xi*_nchannels+ci)+1]<<8|buf[2*(xi*_nchannels+ci)];
+        s=((s&0xFFFF)^0x8000)-0x8000;
+        samples[(nsamples+xi)*_nchannels+ci]=s;
+      }
+    }
+    nsamples+=nread;
+  }
+  *_samples=(float *)opus_realloc(samples,
+   _nchannels*nsamples*sizeof(*samples));
+  return nsamples;
+}
+
+static void band_energy(float *_out,float *_ps,const int *_bands,int _nbands,
+ const float *_in,int _nchannels,size_t _nframes,int _window_sz,
+ int _step,int _downsample){
+  float *window;
+  float *x;
+  float *c;
+  float *s;
+  size_t xi;
+  int    xj;
+  int    ps_sz;
+  window=(float *)opus_malloc((3+_nchannels)*_window_sz*sizeof(*window));
+  c=window+_window_sz;
+  s=c+_window_sz;
+  x=s+_window_sz;
+  ps_sz=_window_sz/2;
+  for(xj=0;xj<_window_sz;xj++){
+    window[xj]=0.5F-0.5F*OPUS_COSF((2*OPUS_PI/(_window_sz-1))*xj);
+  }
+  for(xj=0;xj<_window_sz;xj++){
+    c[xj]=OPUS_COSF((2*OPUS_PI/_window_sz)*xj);
+  }
+  for(xj=0;xj<_window_sz;xj++){
+    s[xj]=OPUS_SINF((2*OPUS_PI/_window_sz)*xj);
+  }
+  for(xi=0;xi<_nframes;xi++){
+    int ci;
+    int xk;
+    int bi;
+    for(ci=0;ci<_nchannels;ci++){
+      for(xk=0;xk<_window_sz;xk++){
+        x[ci*_window_sz+xk]=window[xk]*_in[(xi*_step+xk)*_nchannels+ci];
+      }
+    }
+    for(bi=xj=0;bi<_nbands;bi++){
+      float p[2]={0};
+      for(;xj<_bands[bi+1];xj++){
+        for(ci=0;ci<_nchannels;ci++){
+          float re;
+          float im;
+          int   ti;
+          ti=0;
+          re=im=0;
+          for(xk=0;xk<_window_sz;xk++){
+            re+=c[ti]*x[ci*_window_sz+xk];
+            im-=s[ti]*x[ci*_window_sz+xk];
+            ti+=xj;
+            if(ti>=_window_sz)ti-=_window_sz;
+          }
+          re*=_downsample;
+          im*=_downsample;
+          _ps[(xi*ps_sz+xj)*_nchannels+ci]=re*re+im*im+100000;
+          p[ci]+=_ps[(xi*ps_sz+xj)*_nchannels+ci];
+        }
+      }
+      if(_out){
+        _out[(xi*_nbands+bi)*_nchannels]=p[0]/(_bands[bi+1]-_bands[bi]);
+        if(_nchannels==2){
+          _out[(xi*_nbands+bi)*_nchannels+1]=p[1]/(_bands[bi+1]-_bands[bi]);
+        }
+      }
+    }
+  }
+  free(window);
+}
+
+#define NBANDS (21)
+#define NFREQS (240)
+
+/*Bands on which we compute the pseudo-NMR (Bark-derived
+  CELT bands).*/
+static const int BANDS[NBANDS+1]={
+  0,2,4,6,8,10,12,14,16,20,24,28,32,40,48,56,68,80,96,120,156,200
+};
+
+#define TEST_WIN_SIZE (480)
+#define TEST_WIN_STEP (120)
+
+int main(int _argc,const char **_argv){
+  FILE    *fin1;
+  FILE    *fin2;
+  float   *x;
+  float   *y;
+  float   *xb;
+  float   *X;
+  float   *Y;
+  double    err;
+  float    Q;
+  size_t   xlength;
+  size_t   ylength;
+  size_t   nframes;
+  size_t   xi;
+  int      ci;
+  int      xj;
+  int      bi;
+  int      nchannels;
+  unsigned rate;
+  int      downsample;
+  int      ybands;
+  int      yfreqs;
+  int      max_compare;
+  if(_argc<3||_argc>6){
+    fprintf(stderr,"Usage: %s [-s] [-r rate2] <file1.sw> <file2.sw>\n",
+     _argv[0]);
+    return EXIT_FAILURE;
+  }
+  nchannels=1;
+  if(strcmp(_argv[1],"-s")==0){
+    nchannels=2;
+    _argv++;
+  }
+  rate=48000;
+  ybands=NBANDS;
+  yfreqs=NFREQS;
+  downsample=1;
+  if(strcmp(_argv[1],"-r")==0){
+    rate=atoi(_argv[2]);
+    if(rate!=8000&&rate!=12000&&rate!=16000&&rate!=24000&&rate!=48000){
+      fprintf(stderr,
+       "Sampling rate must be 8000, 12000, 16000, 24000, or 48000\n");
+      return EXIT_FAILURE;
+    }
+    downsample=48000/rate;
+    switch(rate){
+      case  8000:ybands=13;break;
+      case 12000:ybands=15;break;
+      case 16000:ybands=17;break;
+      case 24000:ybands=19;break;
+    }
+    yfreqs=NFREQS/downsample;
+    _argv+=2;
+  }
+  fin1=fopen(_argv[1],"rb");
+  if(fin1==NULL){
+    fprintf(stderr,"Error opening '%s'.\n",_argv[1]);
+    return EXIT_FAILURE;
+  }
+  fin2=fopen(_argv[2],"rb");
+  if(fin2==NULL){
+    fprintf(stderr,"Error opening '%s'.\n",_argv[2]);
+    fclose(fin1);
+    return EXIT_FAILURE;
+  }
+  /*Read in the data and allocate scratch space.*/
+  xlength=read_pcm16(&x,fin1,2);
+  if(nchannels==1){
+    for(xi=0;xi<xlength;xi++)x[xi]=.5*(x[2*xi]+x[2*xi+1]);
+  }
+  fclose(fin1);
+  ylength=read_pcm16(&y,fin2,nchannels);
+  fclose(fin2);
+  if(xlength!=ylength*downsample){
+    fprintf(stderr,"Sample counts do not match (%lu!=%lu).\n",
+     (unsigned long)xlength,(unsigned long)ylength*downsample);
+    return EXIT_FAILURE;
+  }
+  if(xlength<TEST_WIN_SIZE){
+    fprintf(stderr,"Insufficient sample data (%lu<%i).\n",
+     (unsigned long)xlength,TEST_WIN_SIZE);
+    return EXIT_FAILURE;
+  }
+  nframes=(xlength-TEST_WIN_SIZE+TEST_WIN_STEP)/TEST_WIN_STEP;
+  xb=(float *)opus_malloc(nframes*NBANDS*nchannels*sizeof(*xb));
+  X=(float *)opus_malloc(nframes*NFREQS*nchannels*sizeof(*X));
+  Y=(float *)opus_malloc(nframes*yfreqs*nchannels*sizeof(*Y));
+  /*Compute the per-band spectral energy of the original signal
+     and the error.*/
+  band_energy(xb,X,BANDS,NBANDS,x,nchannels,nframes,
+   TEST_WIN_SIZE,TEST_WIN_STEP,1);
+  free(x);
+  band_energy(NULL,Y,BANDS,ybands,y,nchannels,nframes,
+   TEST_WIN_SIZE/downsample,TEST_WIN_STEP/downsample,downsample);
+  free(y);
+  for(xi=0;xi<nframes;xi++){
+    /*Frequency masking (low to high): 10 dB/Bark slope.*/
+    for(bi=1;bi<NBANDS;bi++){
+      for(ci=0;ci<nchannels;ci++){
+        xb[(xi*NBANDS+bi)*nchannels+ci]+=
+         0.1F*xb[(xi*NBANDS+bi-1)*nchannels+ci];
+      }
+    }
+    /*Frequency masking (high to low): 15 dB/Bark slope.*/
+    for(bi=NBANDS-1;bi-->0;){
+      for(ci=0;ci<nchannels;ci++){
+        xb[(xi*NBANDS+bi)*nchannels+ci]+=
+         0.03F*xb[(xi*NBANDS+bi+1)*nchannels+ci];
+      }
+    }
+    if(xi>0){
+      /*Temporal masking: -3 dB/2.5ms slope.*/
+      for(bi=0;bi<NBANDS;bi++){
+        for(ci=0;ci<nchannels;ci++){
+          xb[(xi*NBANDS+bi)*nchannels+ci]+=
+           0.5F*xb[((xi-1)*NBANDS+bi)*nchannels+ci];
+        }
+      }
+    }
+    /* Allowing some cross-talk */
+    if(nchannels==2){
+      for(bi=0;bi<NBANDS;bi++){
+        float l,r;
+        l=xb[(xi*NBANDS+bi)*nchannels+0];
+        r=xb[(xi*NBANDS+bi)*nchannels+1];
+        xb[(xi*NBANDS+bi)*nchannels+0]+=0.01F*r;
+        xb[(xi*NBANDS+bi)*nchannels+1]+=0.01F*l;
+      }
+    }
+
+    /* Apply masking */
+    for(bi=0;bi<ybands;bi++){
+      for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){
+        for(ci=0;ci<nchannels;ci++){
+          X[(xi*NFREQS+xj)*nchannels+ci]+=
+           0.1F*xb[(xi*NBANDS+bi)*nchannels+ci];
+          Y[(xi*yfreqs+xj)*nchannels+ci]+=
+           0.1F*xb[(xi*NBANDS+bi)*nchannels+ci];
+        }
+      }
+    }
+  }
+
+  /* Average of consecutive frames to make comparison slightly less sensitive */
+  for(bi=0;bi<ybands;bi++){
+    for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){
+      for(ci=0;ci<nchannels;ci++){
+         float xtmp;
+         float ytmp;
+         xtmp = X[xj*nchannels+ci];
+         ytmp = Y[xj*nchannels+ci];
+         for(xi=1;xi<nframes;xi++){
+           float xtmp2;
+           float ytmp2;
+           xtmp2 = X[(xi*NFREQS+xj)*nchannels+ci];
+           ytmp2 = Y[(xi*yfreqs+xj)*nchannels+ci];
+           X[(xi*NFREQS+xj)*nchannels+ci] += xtmp;
+           Y[(xi*yfreqs+xj)*nchannels+ci] += ytmp;
+           xtmp = xtmp2;
+           ytmp = ytmp2;
+         }
+      }
+    }
+  }
+
+  /*If working at a lower sampling rate, don't take into account the last
+     300 Hz to allow for different transition bands.
+    For 12 kHz, we don't skip anything, because the last band already skips
+     400 Hz.*/
+  if(rate==48000)max_compare=BANDS[NBANDS];
+  else if(rate==12000)max_compare=BANDS[ybands];
+  else max_compare=BANDS[ybands]-3;
+  err=0;
+  for(xi=0;xi<nframes;xi++){
+    double Ef;
+    Ef=0;
+    for(bi=0;bi<ybands;bi++){
+      double Eb;
+      Eb=0;
+      for(xj=BANDS[bi];xj<BANDS[bi+1]&&xj<max_compare;xj++){
+        for(ci=0;ci<nchannels;ci++){
+          float re;
+          float im;
+          re=Y[(xi*yfreqs+xj)*nchannels+ci]/X[(xi*NFREQS+xj)*nchannels+ci];
+          im=re-log(re)-1;
+          /*Make comparison less sensitive around the SILK/CELT cross-over to
+            allow for mode freedom in the filters.*/
+          if(xj>=79&&xj<=81)im*=0.1F;
+          if(xj==80)im*=0.1F;
+          Eb+=im;
+        }
+      }
+      Eb /= (BANDS[bi+1]-BANDS[bi])*nchannels;
+      Ef += Eb*Eb;
+    }
+    /*Using a fixed normalization value means we're willing to accept slightly
+       lower quality for lower sampling rates.*/
+    Ef/=NBANDS;
+    Ef*=Ef;
+    err+=Ef*Ef;
+  }
+  err=pow(err/nframes,1.0/16);
+  Q=100*(1-0.5*log(1+err)/log(1.13));
+  if(Q<0){
+    fprintf(stderr,"Test vector FAILS\n");
+    fprintf(stderr,"Internal weighted error is %f\n",err);
+    return EXIT_FAILURE;
+  }
+  else{
+    fprintf(stderr,"Test vector PASSES\n");
+    fprintf(stderr,
+     "Opus quality metric: %.1f %% (internal weighted error is %f)\n",Q,err);
+    return EXIT_SUCCESS;
+  }
+}
diff --git a/src/main/jni/opus/src/opus_decoder.c b/src/main/jni/opus/src/opus_decoder.c
new file mode 100644
index 000000000..919ba521b
--- /dev/null
+++ b/src/main/jni/opus/src/opus_decoder.c
@@ -0,0 +1,970 @@
+/* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#ifndef OPUS_BUILD
+# error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details."
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
+# pragma message "You appear to be compiling without optimization, if so opus will be very slow."
+#endif
+
+#include <stdarg.h>
+#include "celt.h"
+#include "opus.h"
+#include "entdec.h"
+#include "modes.h"
+#include "API.h"
+#include "stack_alloc.h"
+#include "float_cast.h"
+#include "opus_private.h"
+#include "os_support.h"
+#include "structs.h"
+#include "define.h"
+#include "mathops.h"
+#include "cpu_support.h"
+
+struct OpusDecoder {
+   int          celt_dec_offset;
+   int          silk_dec_offset;
+   int          channels;
+   opus_int32   Fs;          /** Sampling rate (at the API level) */
+   silk_DecControlStruct DecControl;
+   int          decode_gain;
+
+   /* Everything beyond this point gets cleared on a reset */
+#define OPUS_DECODER_RESET_START stream_channels
+   int          stream_channels;
+
+   int          bandwidth;
+   int          mode;
+   int          prev_mode;
+   int          frame_size;
+   int          prev_redundancy;
+   int          last_packet_duration;
+#ifndef FIXED_POINT
+   opus_val16   softclip_mem[2];
+#endif
+
+   opus_uint32  rangeFinal;
+};
+
+#ifdef FIXED_POINT
+static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
+   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
+}
+#endif
+
+
+int opus_decoder_get_size(int channels)
+{
+   int silkDecSizeBytes, celtDecSizeBytes;
+   int ret;
+   if (channels<1 || channels > 2)
+      return 0;
+   ret = silk_Get_Decoder_Size( &silkDecSizeBytes );
+   if(ret)
+      return 0;
+   silkDecSizeBytes = align(silkDecSizeBytes);
+   celtDecSizeBytes = celt_decoder_get_size(channels);
+   return align(sizeof(OpusDecoder))+silkDecSizeBytes+celtDecSizeBytes;
+}
+
+int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
+{
+   void *silk_dec;
+   CELTDecoder *celt_dec;
+   int ret, silkDecSizeBytes;
+
+   if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)
+    || (channels!=1&&channels!=2))
+      return OPUS_BAD_ARG;
+
+   OPUS_CLEAR((char*)st, opus_decoder_get_size(channels));
+   /* Initialize SILK encoder */
+   ret = silk_Get_Decoder_Size(&silkDecSizeBytes);
+   if (ret)
+      return OPUS_INTERNAL_ERROR;
+
+   silkDecSizeBytes = align(silkDecSizeBytes);
+   st->silk_dec_offset = align(sizeof(OpusDecoder));
+   st->celt_dec_offset = st->silk_dec_offset+silkDecSizeBytes;
+   silk_dec = (char*)st+st->silk_dec_offset;
+   celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+   st->stream_channels = st->channels = channels;
+
+   st->Fs = Fs;
+   st->DecControl.API_sampleRate = st->Fs;
+   st->DecControl.nChannelsAPI      = st->channels;
+
+   /* Reset decoder */
+   ret = silk_InitDecoder( silk_dec );
+   if(ret)return OPUS_INTERNAL_ERROR;
+
+   /* Initialize CELT decoder */
+   ret = celt_decoder_init(celt_dec, Fs, channels);
+   if(ret!=OPUS_OK)return OPUS_INTERNAL_ERROR;
+
+   celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0));
+
+   st->prev_mode = 0;
+   st->frame_size = Fs/400;
+   return OPUS_OK;
+}
+
+OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error)
+{
+   int ret;
+   OpusDecoder *st;
+   if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)
+    || (channels!=1&&channels!=2))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusDecoder *)opus_alloc(opus_decoder_get_size(channels));
+   if (st == NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_decoder_init(st, Fs, channels);
+   if (error)
+      *error = ret;
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   return st;
+}
+
+static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2,
+      opus_val16 *out, int overlap, int channels,
+      const opus_val16 *window, opus_int32 Fs)
+{
+   int i, c;
+   int inc = 48000/Fs;
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<overlap;i++)
+      {
+         opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+         out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]),
+                                   Q15ONE-w, in1[i*channels+c]), 15);
+      }
+   }
+}
+
+static int opus_packet_get_mode(const unsigned char *data)
+{
+   int mode;
+   if (data[0]&0x80)
+   {
+      mode = MODE_CELT_ONLY;
+   } else if ((data[0]&0x60) == 0x60)
+   {
+      mode = MODE_HYBRID;
+   } else {
+      mode = MODE_SILK_ONLY;
+   }
+   return mode;
+}
+
+static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+   void *silk_dec;
+   CELTDecoder *celt_dec;
+   int i, silk_ret=0, celt_ret=0;
+   ec_dec dec;
+   opus_int32 silk_frame_size;
+   int pcm_silk_size;
+   VARDECL(opus_int16, pcm_silk);
+   int pcm_transition_silk_size;
+   VARDECL(opus_val16, pcm_transition_silk);
+   int pcm_transition_celt_size;
+   VARDECL(opus_val16, pcm_transition_celt);
+   opus_val16 *pcm_transition;
+   int redundant_audio_size;
+   VARDECL(opus_val16, redundant_audio);
+
+   int audiosize;
+   int mode;
+   int transition=0;
+   int start_band;
+   int redundancy=0;
+   int redundancy_bytes = 0;
+   int celt_to_silk=0;
+   int c;
+   int F2_5, F5, F10, F20;
+   const opus_val16 *window;
+   opus_uint32 redundant_rng = 0;
+   ALLOC_STACK;
+
+   silk_dec = (char*)st+st->silk_dec_offset;
+   celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+   F20 = st->Fs/50;
+   F10 = F20>>1;
+   F5 = F10>>1;
+   F2_5 = F5>>1;
+   if (frame_size < F2_5)
+   {
+      RESTORE_STACK;
+      return OPUS_BUFFER_TOO_SMALL;
+   }
+   /* Limit frame_size to avoid excessive stack allocations. */
+   frame_size = IMIN(frame_size, st->Fs/25*3);
+   /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */
+   if (len<=1)
+   {
+      data = NULL;
+      /* In that case, don't conceal more than what the ToC says */
+      frame_size = IMIN(frame_size, st->frame_size);
+   }
+   if (data != NULL)
+   {
+      audiosize = st->frame_size;
+      mode = st->mode;
+      ec_dec_init(&dec,(unsigned char*)data,len);
+   } else {
+      audiosize = frame_size;
+      mode = st->prev_mode;
+
+      if (mode == 0)
+      {
+         /* If we haven't got any packet yet, all we can do is return zeros */
+         for (i=0;i<audiosize*st->channels;i++)
+            pcm[i] = 0;
+         RESTORE_STACK;
+         return audiosize;
+      }
+
+      /* Avoids trying to run the PLC on sizes other than 2.5 (CELT), 5 (CELT),
+         10, or 20 (e.g. 12.5 or 30 ms). */
+      if (audiosize > F20)
+      {
+         do {
+            int ret = opus_decode_frame(st, NULL, 0, pcm, IMIN(audiosize, F20), 0);
+            if (ret<0)
+            {
+               RESTORE_STACK;
+               return ret;
+            }
+            pcm += ret*st->channels;
+            audiosize -= ret;
+         } while (audiosize > 0);
+         RESTORE_STACK;
+         return frame_size;
+      } else if (audiosize < F20)
+      {
+         if (audiosize > F10)
+            audiosize = F10;
+         else if (mode != MODE_SILK_ONLY && audiosize > F5 && audiosize < F10)
+            audiosize = F5;
+      }
+   }
+
+   pcm_transition_silk_size = ALLOC_NONE;
+   pcm_transition_celt_size = ALLOC_NONE;
+   if (data!=NULL && st->prev_mode > 0 && (
+       (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy)
+    || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) )
+      )
+   {
+      transition = 1;
+      /* Decide where to allocate the stack memory for pcm_transition */
+      if (mode == MODE_CELT_ONLY)
+         pcm_transition_celt_size = F5*st->channels;
+      else
+         pcm_transition_silk_size = F5*st->channels;
+   }
+   ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16);
+   if (transition && mode == MODE_CELT_ONLY)
+   {
+      pcm_transition = pcm_transition_celt;
+      opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
+   }
+   if (audiosize > frame_size)
+   {
+      /*fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);*/
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   } else {
+      frame_size = audiosize;
+   }
+
+   /* Don't allocate any memory when in CELT-only mode */
+   pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
+   ALLOC(pcm_silk, pcm_silk_size, opus_int16);
+
+   /* SILK processing */
+   if (mode != MODE_CELT_ONLY)
+   {
+      int lost_flag, decoded_samples;
+      opus_int16 *pcm_ptr = pcm_silk;
+
+      if (st->prev_mode==MODE_CELT_ONLY)
+         silk_InitDecoder( silk_dec );
+
+      /* The SILK PLC cannot produce frames of less than 10 ms */
+      st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
+
+      if (data != NULL)
+      {
+        st->DecControl.nChannelsInternal = st->stream_channels;
+        if( mode == MODE_SILK_ONLY ) {
+           if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) {
+              st->DecControl.internalSampleRate = 8000;
+           } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) {
+              st->DecControl.internalSampleRate = 12000;
+           } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) {
+              st->DecControl.internalSampleRate = 16000;
+           } else {
+              st->DecControl.internalSampleRate = 16000;
+              silk_assert( 0 );
+           }
+        } else {
+           /* Hybrid mode */
+           st->DecControl.internalSampleRate = 16000;
+        }
+     }
+
+     lost_flag = data == NULL ? 1 : 2 * decode_fec;
+     decoded_samples = 0;
+     do {
+        /* Call SILK decoder */
+        int first_frame = decoded_samples == 0;
+        silk_ret = silk_Decode( silk_dec, &st->DecControl,
+                                lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size );
+        if( silk_ret ) {
+           if (lost_flag) {
+              /* PLC failure should not be fatal */
+              silk_frame_size = frame_size;
+              for (i=0;i<frame_size*st->channels;i++)
+                 pcm_ptr[i] = 0;
+           } else {
+             RESTORE_STACK;
+             return OPUS_INTERNAL_ERROR;
+           }
+        }
+        pcm_ptr += silk_frame_size * st->channels;
+        decoded_samples += silk_frame_size;
+      } while( decoded_samples < frame_size );
+   }
+
+   start_band = 0;
+   if (!decode_fec && mode != MODE_CELT_ONLY && data != NULL
+    && ec_tell(&dec)+17+20*(st->mode == MODE_HYBRID) <= 8*len)
+   {
+      /* Check if we have a redundant 0-8 kHz band */
+      if (mode == MODE_HYBRID)
+         redundancy = ec_dec_bit_logp(&dec, 12);
+      else
+         redundancy = 1;
+      if (redundancy)
+      {
+         celt_to_silk = ec_dec_bit_logp(&dec, 1);
+         /* redundancy_bytes will be at least two, in the non-hybrid
+            case due to the ec_tell() check above */
+         redundancy_bytes = mode==MODE_HYBRID ?
+               (opus_int32)ec_dec_uint(&dec, 256)+2 :
+               len-((ec_tell(&dec)+7)>>3);
+         len -= redundancy_bytes;
+         /* This is a sanity check. It should never happen for a valid
+            packet, so the exact behaviour is not normative. */
+         if (len*8 < ec_tell(&dec))
+         {
+            len = 0;
+            redundancy_bytes = 0;
+            redundancy = 0;
+         }
+         /* Shrink decoder because of raw bits */
+         dec.storage -= redundancy_bytes;
+      }
+   }
+   if (mode != MODE_CELT_ONLY)
+      start_band = 17;
+
+   {
+      int endband=21;
+
+      switch(st->bandwidth)
+      {
+      case OPUS_BANDWIDTH_NARROWBAND:
+         endband = 13;
+         break;
+      case OPUS_BANDWIDTH_MEDIUMBAND:
+      case OPUS_BANDWIDTH_WIDEBAND:
+         endband = 17;
+         break;
+      case OPUS_BANDWIDTH_SUPERWIDEBAND:
+         endband = 19;
+         break;
+      case OPUS_BANDWIDTH_FULLBAND:
+         endband = 21;
+         break;
+      }
+      celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband));
+      celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels));
+   }
+
+   if (redundancy)
+   {
+      transition = 0;
+      pcm_transition_silk_size=ALLOC_NONE;
+   }
+
+   ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16);
+
+   if (transition && mode != MODE_CELT_ONLY)
+   {
+      pcm_transition = pcm_transition_silk;
+      opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
+   }
+
+   /* Only allocation memory for redundancy if/when needed */
+   redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE;
+   ALLOC(redundant_audio, redundant_audio_size, opus_val16);
+
+   /* 5 ms redundant frame for CELT->SILK*/
+   if (redundancy && celt_to_silk)
+   {
+      celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes,
+                          redundant_audio, F5, NULL);
+      celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
+   }
+
+   /* MUST be after PLC */
+   celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band));
+
+   if (mode != MODE_SILK_ONLY)
+   {
+      int celt_frame_size = IMIN(F20, frame_size);
+      /* Make sure to discard any previous CELT state */
+      if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy)
+         celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+      /* Decode CELT */
+      celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data,
+                                     len, pcm, celt_frame_size, &dec);
+   } else {
+      unsigned char silence[2] = {0xFF, 0xFF};
+      for (i=0;i<frame_size*st->channels;i++)
+         pcm[i] = 0;
+      /* For hybrid -> SILK transitions, we let the CELT MDCT
+         do a fade-out by decoding a silence frame */
+      if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) )
+      {
+         celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+         celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL);
+      }
+   }
+
+   if (mode != MODE_CELT_ONLY)
+   {
+#ifdef FIXED_POINT
+      for (i=0;i<frame_size*st->channels;i++)
+         pcm[i] = SAT16(pcm[i] + pcm_silk[i]);
+#else
+      for (i=0;i<frame_size*st->channels;i++)
+         pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);
+#endif
+   }
+
+   {
+      const CELTMode *celt_mode;
+      celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode));
+      window = celt_mode->window;
+   }
+
+   /* 5 ms redundant frame for SILK->CELT */
+   if (redundancy && !celt_to_silk)
+   {
+      celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+      celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+
+      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL);
+      celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
+      smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
+                  pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
+   }
+   if (redundancy && celt_to_silk)
+   {
+      for (c=0;c<st->channels;c++)
+      {
+         for (i=0;i<F2_5;i++)
+            pcm[st->channels*i+c] = redundant_audio[st->channels*i+c];
+      }
+      smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5,
+                  pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs);
+   }
+   if (transition)
+   {
+      if (audiosize >= F5)
+      {
+         for (i=0;i<st->channels*F2_5;i++)
+            pcm[i] = pcm_transition[i];
+         smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5,
+                     pcm+st->channels*F2_5, F2_5,
+                     st->channels, window, st->Fs);
+      } else {
+         /* Not enough time to do a clean transition, but we do it anyway
+            This will not preserve amplitude perfectly and may introduce
+            a bit of temporal aliasing, but it shouldn't be too bad and
+            that's pretty much the best we can do. In any case, generating this
+            transition it pretty silly in the first place */
+         smooth_fade(pcm_transition, pcm,
+                     pcm, F2_5,
+                     st->channels, window, st->Fs);
+      }
+   }
+
+   if(st->decode_gain)
+   {
+      opus_val32 gain;
+      gain = celt_exp2(MULT16_16_P15(QCONST16(6.48814081e-4f, 25), st->decode_gain));
+      for (i=0;i<frame_size*st->channels;i++)
+      {
+         opus_val32 x;
+         x = MULT16_32_P16(pcm[i],gain);
+         pcm[i] = SATURATE(x, 32767);
+      }
+   }
+
+   if (len <= 1)
+      st->rangeFinal = 0;
+   else
+      st->rangeFinal = dec.rng ^ redundant_rng;
+
+   st->prev_mode = mode;
+   st->prev_redundancy = redundancy && !celt_to_silk;
+
+   if (celt_ret>=0)
+   {
+      if (OPUS_CHECK_ARRAY(pcm, audiosize*st->channels))
+         OPUS_PRINT_INT(audiosize);
+   }
+
+   RESTORE_STACK;
+   return celt_ret < 0 ? celt_ret : audiosize;
+
+}
+
+int opus_decode_native(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec,
+      int self_delimited, opus_int32 *packet_offset, int soft_clip)
+{
+   int i, nb_samples;
+   int count, offset;
+   unsigned char toc;
+   int packet_frame_size, packet_bandwidth, packet_mode, packet_stream_channels;
+   /* 48 x 2.5 ms = 120 ms */
+   opus_int16 size[48];
+   if (decode_fec<0 || decode_fec>1)
+      return OPUS_BAD_ARG;
+   /* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */
+   if ((decode_fec || len==0 || data==NULL) && frame_size%(st->Fs/400)!=0)
+      return OPUS_BAD_ARG;
+   if (len==0 || data==NULL)
+   {
+      int pcm_count=0;
+      do {
+         int ret;
+         ret = opus_decode_frame(st, NULL, 0, pcm+pcm_count*st->channels, frame_size-pcm_count, 0);
+         if (ret<0)
+            return ret;
+         pcm_count += ret;
+      } while (pcm_count < frame_size);
+      celt_assert(pcm_count == frame_size);
+      if (OPUS_CHECK_ARRAY(pcm, pcm_count*st->channels))
+         OPUS_PRINT_INT(pcm_count);
+      st->last_packet_duration = pcm_count;
+      return pcm_count;
+   } else if (len<0)
+      return OPUS_BAD_ARG;
+
+   packet_mode = opus_packet_get_mode(data);
+   packet_bandwidth = opus_packet_get_bandwidth(data);
+   packet_frame_size = opus_packet_get_samples_per_frame(data, st->Fs);
+   packet_stream_channels = opus_packet_get_nb_channels(data);
+
+   count = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL,
+                                  size, &offset, packet_offset);
+   if (count<0)
+      return count;
+
+   data += offset;
+
+   if (decode_fec)
+   {
+      int duration_copy;
+      int ret;
+      /* If no FEC can be present, run the PLC (recursive call) */
+      if (frame_size < packet_frame_size || packet_mode == MODE_CELT_ONLY || st->mode == MODE_CELT_ONLY)
+         return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip);
+      /* Otherwise, run the PLC on everything except the size for which we might have FEC */
+      duration_copy = st->last_packet_duration;
+      if (frame_size-packet_frame_size!=0)
+      {
+         ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip);
+         if (ret<0)
+         {
+            st->last_packet_duration = duration_copy;
+            return ret;
+         }
+         celt_assert(ret==frame_size-packet_frame_size);
+      }
+      /* Complete with FEC */
+      st->mode = packet_mode;
+      st->bandwidth = packet_bandwidth;
+      st->frame_size = packet_frame_size;
+      st->stream_channels = packet_stream_channels;
+      ret = opus_decode_frame(st, data, size[0], pcm+st->channels*(frame_size-packet_frame_size),
+            packet_frame_size, 1);
+      if (ret<0)
+         return ret;
+      else {
+         if (OPUS_CHECK_ARRAY(pcm, frame_size*st->channels))
+            OPUS_PRINT_INT(frame_size);
+         st->last_packet_duration = frame_size;
+         return frame_size;
+      }
+   }
+
+   if (count*packet_frame_size > frame_size)
+      return OPUS_BUFFER_TOO_SMALL;
+
+   /* Update the state as the last step to avoid updating it on an invalid packet */
+   st->mode = packet_mode;
+   st->bandwidth = packet_bandwidth;
+   st->frame_size = packet_frame_size;
+   st->stream_channels = packet_stream_channels;
+
+   nb_samples=0;
+   for (i=0;i<count;i++)
+   {
+      int ret;
+      ret = opus_decode_frame(st, data, size[i], pcm+nb_samples*st->channels, frame_size-nb_samples, 0);
+      if (ret<0)
+         return ret;
+      celt_assert(ret==packet_frame_size);
+      data += size[i];
+      nb_samples += ret;
+   }
+   st->last_packet_duration = nb_samples;
+   if (OPUS_CHECK_ARRAY(pcm, nb_samples*st->channels))
+      OPUS_PRINT_INT(nb_samples);
+#ifndef FIXED_POINT
+   if (soft_clip)
+      opus_pcm_soft_clip(pcm, nb_samples, st->channels, st->softclip_mem);
+   else
+      st->softclip_mem[0]=st->softclip_mem[1]=0;
+#endif
+   return nb_samples;
+}
+
+#ifdef FIXED_POINT
+
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+   if(frame_size<=0)
+      return OPUS_BAD_ARG;
+   return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_decode_float(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, float *pcm, int frame_size, int decode_fec)
+{
+   VARDECL(opus_int16, out);
+   int ret, i;
+   ALLOC_STACK;
+
+   if(frame_size<=0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   ALLOC(out, frame_size*st->channels, opus_int16);
+
+   ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0);
+   if (ret > 0)
+   {
+      for (i=0;i<ret*st->channels;i++)
+         pcm[i] = (1.f/32768.f)*(out[i]);
+   }
+   RESTORE_STACK;
+   return ret;
+}
+#endif
+
+
+#else
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
+{
+   VARDECL(float, out);
+   int ret, i;
+   ALLOC_STACK;
+
+   if(frame_size<=0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
+   ALLOC(out, frame_size*st->channels, float);
+
+   ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1);
+   if (ret > 0)
+   {
+      for (i=0;i<ret*st->channels;i++)
+         pcm[i] = FLOAT2INT16(out[i]);
+   }
+   RESTORE_STACK;
+   return ret;
+}
+
+int opus_decode_float(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+   if(frame_size<=0)
+      return OPUS_BAD_ARG;
+   return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0);
+}
+
+#endif
+
+int opus_decoder_ctl(OpusDecoder *st, int request, ...)
+{
+   int ret = OPUS_OK;
+   va_list ap;
+   void *silk_dec;
+   CELTDecoder *celt_dec;
+
+   silk_dec = (char*)st+st->silk_dec_offset;
+   celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+
+
+   va_start(ap, request);
+
+   switch (request)
+   {
+   case OPUS_GET_BANDWIDTH_REQUEST:
+   {
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->bandwidth;
+   }
+   break;
+   case OPUS_GET_FINAL_RANGE_REQUEST:
+   {
+      opus_uint32 *value = va_arg(ap, opus_uint32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->rangeFinal;
+   }
+   break;
+   case OPUS_RESET_STATE:
+   {
+      OPUS_CLEAR((char*)&st->OPUS_DECODER_RESET_START,
+            sizeof(OpusDecoder)-
+            ((char*)&st->OPUS_DECODER_RESET_START - (char*)st));
+
+      celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+      silk_InitDecoder( silk_dec );
+      st->stream_channels = st->channels;
+      st->frame_size = st->Fs/400;
+   }
+   break;
+   case OPUS_GET_SAMPLE_RATE_REQUEST:
+   {
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->Fs;
+   }
+   break;
+   case OPUS_GET_PITCH_REQUEST:
+   {
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      if (st->prev_mode == MODE_CELT_ONLY)
+         celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value));
+      else
+         *value = st->DecControl.prevPitchLag;
+   }
+   break;
+   case OPUS_GET_GAIN_REQUEST:
+   {
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->decode_gain;
+   }
+   break;
+   case OPUS_SET_GAIN_REQUEST:
+   {
+       opus_int32 value = va_arg(ap, opus_int32);
+       if (value<-32768 || value>32767)
+       {
+          goto bad_arg;
+       }
+       st->decode_gain = value;
+   }
+   break;
+   case OPUS_GET_LAST_PACKET_DURATION_REQUEST:
+   {
+      opus_uint32 *value = va_arg(ap, opus_uint32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->last_packet_duration;
+   }
+   break;
+   default:
+      /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/
+      ret = OPUS_UNIMPLEMENTED;
+      break;
+   }
+
+   va_end(ap);
+   return ret;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+}
+
+void opus_decoder_destroy(OpusDecoder *st)
+{
+   opus_free(st);
+}
+
+
+int opus_packet_get_bandwidth(const unsigned char *data)
+{
+   int bandwidth;
+   if (data[0]&0x80)
+   {
+      bandwidth = OPUS_BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3);
+      if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+         bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+   } else if ((data[0]&0x60) == 0x60)
+   {
+      bandwidth = (data[0]&0x10) ? OPUS_BANDWIDTH_FULLBAND :
+                                   OPUS_BANDWIDTH_SUPERWIDEBAND;
+   } else {
+      bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3);
+   }
+   return bandwidth;
+}
+
+int opus_packet_get_samples_per_frame(const unsigned char *data,
+      opus_int32 Fs)
+{
+   int audiosize;
+   if (data[0]&0x80)
+   {
+      audiosize = ((data[0]>>3)&0x3);
+      audiosize = (Fs<<audiosize)/400;
+   } else if ((data[0]&0x60) == 0x60)
+   {
+      audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
+   } else {
+      audiosize = ((data[0]>>3)&0x3);
+      if (audiosize == 3)
+         audiosize = Fs*60/1000;
+      else
+         audiosize = (Fs<<audiosize)/100;
+   }
+   return audiosize;
+}
+
+int opus_packet_get_nb_channels(const unsigned char *data)
+{
+   return (data[0]&0x4) ? 2 : 1;
+}
+
+int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len)
+{
+   int count;
+   if (len<1)
+      return OPUS_BAD_ARG;
+   count = packet[0]&0x3;
+   if (count==0)
+      return 1;
+   else if (count!=3)
+      return 2;
+   else if (len<2)
+      return OPUS_INVALID_PACKET;
+   else
+      return packet[1]&0x3F;
+}
+
+int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len,
+      opus_int32 Fs)
+{
+   int samples;
+   int count = opus_packet_get_nb_frames(packet, len);
+
+   if (count<0)
+      return count;
+
+   samples = count*opus_packet_get_samples_per_frame(packet, Fs);
+   /* Can't have more than 120 ms */
+   if (samples*25 > Fs*3)
+      return OPUS_INVALID_PACKET;
+   else
+      return samples;
+}
+
+int opus_decoder_get_nb_samples(const OpusDecoder *dec,
+      const unsigned char packet[], opus_int32 len)
+{
+   return opus_packet_get_nb_samples(packet, len, dec->Fs);
+}
diff --git a/src/main/jni/opus/src/opus_encoder.c b/src/main/jni/opus/src/opus_encoder.c
new file mode 100644
index 000000000..fbd3de639
--- /dev/null
+++ b/src/main/jni/opus/src/opus_encoder.c
@@ -0,0 +1,2488 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdarg.h>
+#include "celt.h"
+#include "entenc.h"
+#include "modes.h"
+#include "API.h"
+#include "stack_alloc.h"
+#include "float_cast.h"
+#include "opus.h"
+#include "arch.h"
+#include "opus_private.h"
+#include "os_support.h"
+#include "cpu_support.h"
+#include "analysis.h"
+#include "mathops.h"
+#include "tuning_parameters.h"
+#ifdef FIXED_POINT
+#include "fixed/structs_FIX.h"
+#else
+#include "float/structs_FLP.h"
+#endif
+
+#define MAX_ENCODER_BUFFER 480
+
+typedef struct {
+   opus_val32 XX, XY, YY;
+   opus_val16 smoothed_width;
+   opus_val16 max_follower;
+} StereoWidthState;
+
+struct OpusEncoder {
+    int          celt_enc_offset;
+    int          silk_enc_offset;
+    silk_EncControlStruct silk_mode;
+    int          application;
+    int          channels;
+    int          delay_compensation;
+    int          force_channels;
+    int          signal_type;
+    int          user_bandwidth;
+    int          max_bandwidth;
+    int          user_forced_mode;
+    int          voice_ratio;
+    opus_int32   Fs;
+    int          use_vbr;
+    int          vbr_constraint;
+    int          variable_duration;
+    opus_int32   bitrate_bps;
+    opus_int32   user_bitrate_bps;
+    int          lsb_depth;
+    int          encoder_buffer;
+    int          lfe;
+
+#define OPUS_ENCODER_RESET_START stream_channels
+    int          stream_channels;
+    opus_int16   hybrid_stereo_width_Q14;
+    opus_int32   variable_HP_smth2_Q15;
+    opus_val16   prev_HB_gain;
+    opus_val32   hp_mem[4];
+    int          mode;
+    int          prev_mode;
+    int          prev_channels;
+    int          prev_framesize;
+    int          bandwidth;
+    int          silk_bw_switch;
+    /* Sampling rate (at the API level) */
+    int          first;
+    opus_val16 * energy_masking;
+    StereoWidthState width_mem;
+    opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
+#ifndef DISABLE_FLOAT_API
+    TonalityAnalysisState analysis;
+    int          detected_bandwidth;
+    int          analysis_offset;
+#endif
+    opus_uint32  rangeFinal;
+    int          arch;
+};
+
+/* Transition tables for the voice and music. First column is the
+   middle (memoriless) threshold. The second column is the hysteresis
+   (difference with the middle) */
+static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
+        11000, 1000, /* NB<->MB */
+        14000, 1000, /* MB<->WB */
+        17000, 1000, /* WB<->SWB */
+        21000, 2000, /* SWB<->FB */
+};
+static const opus_int32 mono_music_bandwidth_thresholds[8] = {
+        12000, 1000, /* NB<->MB */
+        15000, 1000, /* MB<->WB */
+        18000, 2000, /* WB<->SWB */
+        22000, 2000, /* SWB<->FB */
+};
+static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
+        11000, 1000, /* NB<->MB */
+        14000, 1000, /* MB<->WB */
+        21000, 2000, /* WB<->SWB */
+        28000, 2000, /* SWB<->FB */
+};
+static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
+        12000, 1000, /* NB<->MB */
+        18000, 2000, /* MB<->WB */
+        21000, 2000, /* WB<->SWB */
+        30000, 2000, /* SWB<->FB */
+};
+/* Threshold bit-rates for switching between mono and stereo */
+static const opus_int32 stereo_voice_threshold = 30000;
+static const opus_int32 stereo_music_threshold = 30000;
+
+/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
+static const opus_int32 mode_thresholds[2][2] = {
+      /* voice */ /* music */
+      {  64000,      16000}, /* mono */
+      {  36000,      16000}, /* stereo */
+};
+
+int opus_encoder_get_size(int channels)
+{
+    int silkEncSizeBytes, celtEncSizeBytes;
+    int ret;
+    if (channels<1 || channels > 2)
+        return 0;
+    ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
+    if (ret)
+        return 0;
+    silkEncSizeBytes = align(silkEncSizeBytes);
+    celtEncSizeBytes = celt_encoder_get_size(channels);
+    return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes;
+}
+
+int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application)
+{
+    void *silk_enc;
+    CELTEncoder *celt_enc;
+    int err;
+    int ret, silkEncSizeBytes;
+
+   if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
+        (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
+        && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
+        return OPUS_BAD_ARG;
+
+    OPUS_CLEAR((char*)st, opus_encoder_get_size(channels));
+    /* Create SILK encoder */
+    ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
+    if (ret)
+        return OPUS_BAD_ARG;
+    silkEncSizeBytes = align(silkEncSizeBytes);
+    st->silk_enc_offset = align(sizeof(OpusEncoder));
+    st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes;
+    silk_enc = (char*)st+st->silk_enc_offset;
+    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+
+    st->stream_channels = st->channels = channels;
+
+    st->Fs = Fs;
+
+    st->arch = opus_select_arch();
+
+    ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode );
+    if(ret)return OPUS_INTERNAL_ERROR;
+
+    /* default SILK parameters */
+    st->silk_mode.nChannelsAPI              = channels;
+    st->silk_mode.nChannelsInternal         = channels;
+    st->silk_mode.API_sampleRate            = st->Fs;
+    st->silk_mode.maxInternalSampleRate     = 16000;
+    st->silk_mode.minInternalSampleRate     = 8000;
+    st->silk_mode.desiredInternalSampleRate = 16000;
+    st->silk_mode.payloadSize_ms            = 20;
+    st->silk_mode.bitRate                   = 25000;
+    st->silk_mode.packetLossPercentage      = 0;
+    st->silk_mode.complexity                = 9;
+    st->silk_mode.useInBandFEC              = 0;
+    st->silk_mode.useDTX                    = 0;
+    st->silk_mode.useCBR                    = 0;
+    st->silk_mode.reducedDependency         = 0;
+
+    /* Create CELT encoder */
+    /* Initialize CELT encoder */
+    err = celt_encoder_init(celt_enc, Fs, channels, st->arch);
+    if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;
+
+    celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
+    celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
+
+    st->use_vbr = 1;
+    /* Makes constrained VBR the default (safer for real-time use) */
+    st->vbr_constraint = 1;
+    st->user_bitrate_bps = OPUS_AUTO;
+    st->bitrate_bps = 3000+Fs*channels;
+    st->application = application;
+    st->signal_type = OPUS_AUTO;
+    st->user_bandwidth = OPUS_AUTO;
+    st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+    st->force_channels = OPUS_AUTO;
+    st->user_forced_mode = OPUS_AUTO;
+    st->voice_ratio = -1;
+    st->encoder_buffer = st->Fs/100;
+    st->lsb_depth = 24;
+    st->variable_duration = OPUS_FRAMESIZE_ARG;
+
+    /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead 
+       + 1.5 ms for SILK resamplers and stereo prediction) */
+    st->delay_compensation = st->Fs/250;
+
+    st->hybrid_stereo_width_Q14 = 1 << 14;
+    st->prev_HB_gain = Q15ONE;
+    st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+    st->first = 1;
+    st->mode = MODE_HYBRID;
+    st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
+
+    return OPUS_OK;
+}
+
+static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels)
+{
+   int period;
+   unsigned char toc;
+   period = 0;
+   while (framerate < 400)
+   {
+       framerate <<= 1;
+       period++;
+   }
+   if (mode == MODE_SILK_ONLY)
+   {
+       toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5;
+       toc |= (period-2)<<3;
+   } else if (mode == MODE_CELT_ONLY)
+   {
+       int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND;
+       if (tmp < 0)
+           tmp = 0;
+       toc = 0x80;
+       toc |= tmp << 5;
+       toc |= period<<3;
+   } else /* Hybrid */
+   {
+       toc = 0x60;
+       toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4;
+       toc |= (period-2)<<3;
+   }
+   toc |= (channels==2)<<2;
+   return toc;
+}
+
+#ifndef FIXED_POINT
+static void silk_biquad_float(
+    const opus_val16      *in,            /* I:    Input signal                   */
+    const opus_int32      *B_Q28,         /* I:    MA coefficients [3]            */
+    const opus_int32      *A_Q28,         /* I:    AR coefficients [2]            */
+    opus_val32            *S,             /* I/O:  State vector [2]               */
+    opus_val16            *out,           /* O:    Output signal                  */
+    const opus_int32      len,            /* I:    Signal length (must be even)   */
+    int stride
+)
+{
+    /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
+    opus_int   k;
+    opus_val32 vout;
+    opus_val32 inval;
+    opus_val32 A[2], B[3];
+
+    A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28)));
+    A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28)));
+    B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28)));
+    B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28)));
+    B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28)));
+
+    /* Negate A_Q28 values and split in two parts */
+
+    for( k = 0; k < len; k++ ) {
+        /* S[ 0 ], S[ 1 ]: Q12 */
+        inval = in[ k*stride ];
+        vout = S[ 0 ] + B[0]*inval;
+
+        S[ 0 ] = S[1] - vout*A[0] + B[1]*inval;
+
+        S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL;
+
+        /* Scale back to Q0 and saturate */
+        out[ k*stride ] = vout;
+    }
+}
+#endif
+
+static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+   opus_int32 B_Q28[ 3 ], A_Q28[ 2 ];
+   opus_int32 Fc_Q19, r_Q28, r_Q22;
+
+   silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) );
+   Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 );
+   silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 );
+
+   r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 );
+
+   /* b = r * [ 1; -2; 1 ]; */
+   /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */
+   B_Q28[ 0 ] = r_Q28;
+   B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 );
+   B_Q28[ 2 ] = r_Q28;
+
+   /* -r * ( 2 - Fc * Fc ); */
+   r_Q22  = silk_RSHIFT( r_Q28, 6 );
+   A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0,  22 ) );
+   A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 );
+
+#ifdef FIXED_POINT
+   silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels );
+   if( channels == 2 ) {
+       silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
+   }
+#else
+   silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels );
+   if( channels == 2 ) {
+       silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
+   }
+#endif
+}
+
+#ifdef FIXED_POINT
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+   int c, i;
+   int shift;
+
+   /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */
+   shift=celt_ilog2(Fs/(cutoff_Hz*3));
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x, tmp, y;
+         x = SHL32(EXTEND32(in[channels*i+c]), 15);
+         /* First stage */
+         tmp = x-hp_mem[2*c];
+         hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift);
+         /* Second stage */
+         y = tmp - hp_mem[2*c+1];
+         hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift);
+         out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767));
+      }
+   }
+}
+
+#else
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+   int c, i;
+   float coef;
+
+   coef = 4.0f*cutoff_Hz/Fs;
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x, tmp, y;
+         x = in[channels*i+c];
+         /* First stage */
+         tmp = x-hp_mem[2*c];
+         hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL;
+         /* Second stage */
+         y = tmp - hp_mem[2*c+1];
+         hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL;
+         out[channels*i+c] = y;
+      }
+   }
+}
+#endif
+
+static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
+        int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
+{
+    int i;
+    int overlap;
+    int inc;
+    inc = 48000/Fs;
+    overlap=overlap48/inc;
+    g1 = Q15ONE-g1;
+    g2 = Q15ONE-g2;
+    for (i=0;i<overlap;i++)
+    {
+       opus_val32 diff;
+       opus_val16 g, w;
+       w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+       g = SHR32(MAC16_16(MULT16_16(w,g2),
+             Q15ONE-w, g1), 15);
+       diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
+       diff = MULT16_16_Q15(g, diff);
+       out[i*channels] = out[i*channels] - diff;
+       out[i*channels+1] = out[i*channels+1] + diff;
+    }
+    for (;i<frame_size;i++)
+    {
+       opus_val32 diff;
+       diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
+       diff = MULT16_16_Q15(g2, diff);
+       out[i*channels] = out[i*channels] - diff;
+       out[i*channels+1] = out[i*channels+1] + diff;
+    }
+}
+
+static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
+        int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
+{
+    int i;
+    int inc;
+    int overlap;
+    int c;
+    inc = 48000/Fs;
+    overlap=overlap48/inc;
+    if (channels==1)
+    {
+       for (i=0;i<overlap;i++)
+       {
+          opus_val16 g, w;
+          w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+          g = SHR32(MAC16_16(MULT16_16(w,g2),
+                Q15ONE-w, g1), 15);
+          out[i] = MULT16_16_Q15(g, in[i]);
+       }
+    } else {
+       for (i=0;i<overlap;i++)
+       {
+          opus_val16 g, w;
+          w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+          g = SHR32(MAC16_16(MULT16_16(w,g2),
+                Q15ONE-w, g1), 15);
+          out[i*2] = MULT16_16_Q15(g, in[i*2]);
+          out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]);
+       }
+    }
+    c=0;do {
+       for (i=overlap;i<frame_size;i++)
+       {
+          out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]);
+       }
+    }
+    while (++c<channels);
+}
+
+OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error)
+{
+   int ret;
+   OpusEncoder *st;
+   if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
+       (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
+       && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels));
+   if (st == NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_encoder_init(st, Fs, channels, application);
+   if (error)
+      *error = ret;
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   return st;
+}
+
+static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
+{
+  if(!frame_size)frame_size=st->Fs/400;
+  if (st->user_bitrate_bps==OPUS_AUTO)
+    return 60*st->Fs/frame_size + st->Fs*st->channels;
+  else if (st->user_bitrate_bps==OPUS_BITRATE_MAX)
+    return max_data_bytes*8*st->Fs/frame_size;
+  else
+    return st->user_bitrate_bps;
+}
+
+#ifndef DISABLE_FLOAT_API
+/* Don't use more than 60 ms for the frame size analysis */
+#define MAX_DYNAMIC_FRAMESIZE 24
+/* Estimates how much the bitrate will be boosted based on the sub-frame energy */
+static float transient_boost(const float *E, const float *E_1, int LM, int maxM)
+{
+   int i;
+   int M;
+   float sumE=0, sumE_1=0;
+   float metric;
+
+   M = IMIN(maxM, (1<<LM)+1);
+   for (i=0;i<M;i++)
+   {
+      sumE += E[i];
+      sumE_1 += E_1[i];
+   }
+   metric = sumE*sumE_1/(M*M);
+   /*if (LM==3)
+      printf("%f\n", metric);*/
+   /*return metric>10 ? 1 : 0;*/
+   /*return MAX16(0,1-exp(-.25*(metric-2.)));*/
+   return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2))));
+}
+
+/* Viterbi decoding trying to find the best frame size combination using look-ahead
+
+   State numbering:
+    0: unused
+    1:  2.5 ms
+    2:  5 ms (#1)
+    3:  5 ms (#2)
+    4: 10 ms (#1)
+    5: 10 ms (#2)
+    6: 10 ms (#3)
+    7: 10 ms (#4)
+    8: 20 ms (#1)
+    9: 20 ms (#2)
+   10: 20 ms (#3)
+   11: 20 ms (#4)
+   12: 20 ms (#5)
+   13: 20 ms (#6)
+   14: 20 ms (#7)
+   15: 20 ms (#8)
+*/
+static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate)
+{
+   int i;
+   float cost[MAX_DYNAMIC_FRAMESIZE][16];
+   int states[MAX_DYNAMIC_FRAMESIZE][16];
+   float best_cost;
+   int best_state;
+   float factor;
+   /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */
+   if (rate<80)
+      factor=0;
+   else if (rate>160)
+      factor=1;
+   else
+      factor = (rate-80.f)/80.f;
+   /* Makes variable framesize less aggressive at lower bitrates, but I can't
+      find any valid theoretical justification for this (other than it seems
+      to help) */
+   for (i=0;i<16;i++)
+   {
+      /* Impossible state */
+      states[0][i] = -1;
+      cost[0][i] = 1e10;
+   }
+   for (i=0;i<4;i++)
+   {
+      cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1));
+      states[0][1<<i] = i;
+   }
+   for (i=1;i<N;i++)
+   {
+      int j;
+
+      /* Follow continuations */
+      for (j=2;j<16;j++)
+      {
+         cost[i][j] = cost[i-1][j-1];
+         states[i][j] = j-1;
+      }
+
+      /* New frames */
+      for(j=0;j<4;j++)
+      {
+         int k;
+         float min_cost;
+         float curr_cost;
+         states[i][1<<j] = 1;
+         min_cost = cost[i-1][1];
+         for(k=1;k<4;k++)
+         {
+            float tmp = cost[i-1][(1<<(k+1))-1];
+            if (tmp < min_cost)
+            {
+               states[i][1<<j] = (1<<(k+1))-1;
+               min_cost = tmp;
+            }
+         }
+         curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1));
+         cost[i][1<<j] = min_cost;
+         /* If part of the frame is outside the analysis window, only count part of the cost */
+         if (N-i < (1<<j))
+            cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j);
+         else
+            cost[i][1<<j] += curr_cost;
+      }
+   }
+
+   best_state=1;
+   best_cost = cost[N-1][1];
+   /* Find best end state (doesn't force a frame to end at N-1) */
+   for (i=2;i<16;i++)
+   {
+      if (cost[N-1][i]<best_cost)
+      {
+         best_cost = cost[N-1][i];
+         best_state = i;
+      }
+   }
+
+   /* Follow transitions back */
+   for (i=N-1;i>=0;i--)
+   {
+      /*printf("%d ", best_state);*/
+      best_state = states[i][best_state];
+   }
+   /*printf("%d\n", best_state);*/
+   return best_state;
+}
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+                int bitrate, opus_val16 tonality, float *mem, int buffering,
+                downmix_func downmix)
+{
+   int N;
+   int i;
+   float e[MAX_DYNAMIC_FRAMESIZE+4];
+   float e_1[MAX_DYNAMIC_FRAMESIZE+3];
+   opus_val32 memx;
+   int bestLM=0;
+   int subframe;
+   int pos;
+   VARDECL(opus_val32, sub);
+
+   subframe = Fs/400;
+   ALLOC(sub, subframe, opus_val32);
+   e[0]=mem[0];
+   e_1[0]=1.f/(EPSILON+mem[0]);
+   if (buffering)
+   {
+      /* Consider the CELT delay when not in restricted-lowdelay */
+      /* We assume the buffering is between 2.5 and 5 ms */
+      int offset = 2*subframe - buffering;
+      celt_assert(offset>=0 && offset <= subframe);
+      x += C*offset;
+      len -= offset;
+      e[1]=mem[1];
+      e_1[1]=1.f/(EPSILON+mem[1]);
+      e[2]=mem[2];
+      e_1[2]=1.f/(EPSILON+mem[2]);
+      pos = 3;
+   } else {
+      pos=1;
+   }
+   N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
+   /* Just silencing a warning, it's really initialized later */
+   memx = 0;
+   for (i=0;i<N;i++)
+   {
+      float tmp;
+      opus_val32 tmpx;
+      int j;
+      tmp=EPSILON;
+
+      downmix(x, sub, subframe, i*subframe, 0, -2, C);
+      if (i==0)
+         memx = sub[0];
+      for (j=0;j<subframe;j++)
+      {
+         tmpx = sub[j];
+         tmp += (tmpx-memx)*(float)(tmpx-memx);
+         memx = tmpx;
+      }
+      e[i+pos] = tmp;
+      e_1[i+pos] = 1.f/tmp;
+   }
+   /* Hack to get 20 ms working with APPLICATION_AUDIO
+      The real problem is that the corresponding memory needs to use 1.5 ms
+      from this frame and 1 ms from the next frame */
+   e[i+pos] = e[i+pos-1];
+   if (buffering)
+      N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2);
+   bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400);
+   mem[0] = e[1<<bestLM];
+   if (buffering)
+   {
+      mem[1] = e[(1<<bestLM)+1];
+      mem[2] = e[(1<<bestLM)+2];
+   }
+   return bestLM;
+}
+
+#endif
+
+#ifndef DISABLE_FLOAT_API
+#ifdef FIXED_POINT
+#define PCM2VAL(x) FLOAT2INT16(x)
+#else
+#define PCM2VAL(x) SCALEIN(x)
+#endif
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+   const float *x;
+   opus_val32 scale;
+   int j;
+   x = (const float *)_x;
+   for (j=0;j<subframe;j++)
+      sub[j] = PCM2VAL(x[(j+offset)*C+c1]);
+   if (c2>-1)
+   {
+      for (j=0;j<subframe;j++)
+         sub[j] += PCM2VAL(x[(j+offset)*C+c2]);
+   } else if (c2==-2)
+   {
+      int c;
+      for (c=1;c<C;c++)
+      {
+         for (j=0;j<subframe;j++)
+            sub[j] += PCM2VAL(x[(j+offset)*C+c]);
+      }
+   }
+#ifdef FIXED_POINT
+   scale = (1<<SIG_SHIFT);
+#else
+   scale = 1.f;
+#endif
+   if (C==-2)
+      scale /= C;
+   else
+      scale /= 2;
+   for (j=0;j<subframe;j++)
+      sub[j] *= scale;
+}
+#endif
+
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+   const opus_int16 *x;
+   opus_val32 scale;
+   int j;
+   x = (const opus_int16 *)_x;
+   for (j=0;j<subframe;j++)
+      sub[j] = x[(j+offset)*C+c1];
+   if (c2>-1)
+   {
+      for (j=0;j<subframe;j++)
+         sub[j] += x[(j+offset)*C+c2];
+   } else if (c2==-2)
+   {
+      int c;
+      for (c=1;c<C;c++)
+      {
+         for (j=0;j<subframe;j++)
+            sub[j] += x[(j+offset)*C+c];
+      }
+   }
+#ifdef FIXED_POINT
+   scale = (1<<SIG_SHIFT);
+#else
+   scale = 1.f/32768;
+#endif
+   if (C==-2)
+      scale /= C;
+   else
+      scale /= 2;
+   for (j=0;j<subframe;j++)
+      sub[j] *= scale;
+}
+
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
+{
+   int new_size;
+   if (frame_size<Fs/400)
+      return -1;
+   if (variable_duration == OPUS_FRAMESIZE_ARG)
+      new_size = frame_size;
+   else if (variable_duration == OPUS_FRAMESIZE_VARIABLE)
+      new_size = Fs/50;
+   else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS)
+      new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS));
+   else
+      return -1;
+   if (new_size>frame_size)
+      return -1;
+   if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&
+            50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs)
+      return -1;
+   return new_size;
+}
+
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+      int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+      int delay_compensation, downmix_func downmix
+#ifndef DISABLE_FLOAT_API
+      , float *subframe_mem
+#endif
+      )
+{
+#ifndef DISABLE_FLOAT_API
+   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+   {
+      int LM = 3;
+      LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps,
+            0, subframe_mem, delay_compensation, downmix);
+      while ((Fs/400<<LM)>frame_size)
+         LM--;
+      frame_size = (Fs/400<<LM);
+   } else
+#endif
+   {
+      frame_size = frame_size_select(frame_size, variable_duration, Fs);
+   }
+   if (frame_size<0)
+      return -1;
+   return frame_size;
+}
+
+opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
+{
+   opus_val16 corr;
+   opus_val16 ldiff;
+   opus_val16 width;
+   opus_val32 xx, xy, yy;
+   opus_val16 sqrt_xx, sqrt_yy;
+   opus_val16 qrrt_xx, qrrt_yy;
+   int frame_rate;
+   int i;
+   opus_val16 short_alpha;
+
+   frame_rate = Fs/frame_size;
+   short_alpha = Q15ONE - 25*Q15ONE/IMAX(50,frame_rate);
+   xx=xy=yy=0;
+   for (i=0;i<frame_size;i+=4)
+   {
+      opus_val32 pxx=0;
+      opus_val32 pxy=0;
+      opus_val32 pyy=0;
+      opus_val16 x, y;
+      x = pcm[2*i];
+      y = pcm[2*i+1];
+      pxx = SHR32(MULT16_16(x,x),2);
+      pxy = SHR32(MULT16_16(x,y),2);
+      pyy = SHR32(MULT16_16(y,y),2);
+      x = pcm[2*i+2];
+      y = pcm[2*i+3];
+      pxx += SHR32(MULT16_16(x,x),2);
+      pxy += SHR32(MULT16_16(x,y),2);
+      pyy += SHR32(MULT16_16(y,y),2);
+      x = pcm[2*i+4];
+      y = pcm[2*i+5];
+      pxx += SHR32(MULT16_16(x,x),2);
+      pxy += SHR32(MULT16_16(x,y),2);
+      pyy += SHR32(MULT16_16(y,y),2);
+      x = pcm[2*i+6];
+      y = pcm[2*i+7];
+      pxx += SHR32(MULT16_16(x,x),2);
+      pxy += SHR32(MULT16_16(x,y),2);
+      pyy += SHR32(MULT16_16(y,y),2);
+
+      xx += SHR32(pxx, 10);
+      xy += SHR32(pxy, 10);
+      yy += SHR32(pyy, 10);
+   }
+   mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX);
+   mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY);
+   mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY);
+   mem->XX = MAX32(0, mem->XX);
+   mem->XY = MAX32(0, mem->XY);
+   mem->YY = MAX32(0, mem->YY);
+   if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18))
+   {
+      sqrt_xx = celt_sqrt(mem->XX);
+      sqrt_yy = celt_sqrt(mem->YY);
+      qrrt_xx = celt_sqrt(sqrt_xx);
+      qrrt_yy = celt_sqrt(sqrt_yy);
+      /* Inter-channel correlation */
+      mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy);
+      corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16);
+      /* Approximate loudness difference */
+      ldiff = Q15ONE*ABS16(qrrt_xx-qrrt_yy)/(EPSILON+qrrt_xx+qrrt_yy);
+      width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff);
+      /* Smoothing over one second */
+      mem->smoothed_width += (width-mem->smoothed_width)/frame_rate;
+      /* Peak follower */
+      mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width);
+   } else {
+      width = 0;
+      corr=Q15ONE;
+      ldiff=0;
+   }
+   /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/
+   return EXTRACT16(MIN32(Q15ONE,20*mem->max_follower));
+}
+
+opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix)
+{
+    void *silk_enc;
+    CELTEncoder *celt_enc;
+    int i;
+    int ret=0;
+    opus_int32 nBytes;
+    ec_enc enc;
+    int bytes_target;
+    int prefill=0;
+    int start_band = 0;
+    int redundancy = 0;
+    int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
+    int celt_to_silk = 0;
+    VARDECL(opus_val16, pcm_buf);
+    int nb_compr_bytes;
+    int to_celt = 0;
+    opus_uint32 redundant_rng = 0;
+    int cutoff_Hz, hp_freq_smth1;
+    int voice_est; /* Probability of voice in Q7 */
+    opus_int32 equiv_rate;
+    int delay_compensation;
+    int frame_rate;
+    opus_int32 max_rate; /* Max bitrate we're allowed to use */
+    int curr_bandwidth;
+    opus_val16 HB_gain;
+    opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
+    int total_buffer;
+    opus_val16 stereo_width;
+    const CELTMode *celt_mode;
+    AnalysisInfo analysis_info;
+    int analysis_read_pos_bak=-1;
+    int analysis_read_subframe_bak=-1;
+    VARDECL(opus_val16, tmp_prefill);
+
+    ALLOC_STACK;
+
+    max_data_bytes = IMIN(1276, out_data_bytes);
+
+    st->rangeFinal = 0;
+    if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
+         50*frame_size != st->Fs &&  25*frame_size != st->Fs &&  50*frame_size != 3*st->Fs)
+         || (400*frame_size < st->Fs)
+         || max_data_bytes<=0
+         )
+    {
+       RESTORE_STACK;
+       return OPUS_BAD_ARG;
+    }
+    silk_enc = (char*)st+st->silk_enc_offset;
+    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+       delay_compensation = 0;
+    else
+       delay_compensation = st->delay_compensation;
+
+    lsb_depth = IMIN(lsb_depth, st->lsb_depth);
+
+    analysis_info.valid = 0;
+    celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
+#ifndef DISABLE_FLOAT_API
+#ifdef FIXED_POINT
+    if (st->silk_mode.complexity >= 10 && st->Fs==48000)
+#else
+    if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+#endif
+    {
+       analysis_read_pos_bak = st->analysis.read_pos;
+       analysis_read_subframe_bak = st->analysis.read_subframe;
+       run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
+             c1, c2, analysis_channels, st->Fs,
+             lsb_depth, downmix, &analysis_info);
+    }
+#endif
+
+    st->voice_ratio = -1;
+
+#ifndef DISABLE_FLOAT_API
+    st->detected_bandwidth = 0;
+    if (analysis_info.valid)
+    {
+       int analysis_bandwidth;
+       if (st->signal_type == OPUS_AUTO)
+          st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
+
+       analysis_bandwidth = analysis_info.bandwidth;
+       if (analysis_bandwidth<=12)
+          st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+       else if (analysis_bandwidth<=14)
+          st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+       else if (analysis_bandwidth<=16)
+          st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+       else if (analysis_bandwidth<=18)
+          st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+       else
+          st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+    }
+#endif
+
+    if (st->channels==2 && st->force_channels!=1)
+       stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
+    else
+       stereo_width = 0;
+    total_buffer = delay_compensation;
+    st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
+
+    frame_rate = st->Fs/frame_size;
+    if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
+       || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
+    {
+       /*If the space is too low to do something useful, emit 'PLC' frames.*/
+       int tocmode = st->mode;
+       int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth;
+       if (tocmode==0)
+          tocmode = MODE_SILK_ONLY;
+       if (frame_rate>100)
+          tocmode = MODE_CELT_ONLY;
+       if (frame_rate < 50)
+          tocmode = MODE_SILK_ONLY;
+       if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND)
+          bw=OPUS_BANDWIDTH_WIDEBAND;
+       else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND)
+          bw=OPUS_BANDWIDTH_NARROWBAND;
+       else if (bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
+          bw=OPUS_BANDWIDTH_SUPERWIDEBAND;
+       data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels);
+       RESTORE_STACK;
+       return 1;
+    }
+    if (!st->use_vbr)
+    {
+       int cbrBytes;
+       cbrBytes = IMIN( (st->bitrate_bps + 4*frame_rate)/(8*frame_rate) , max_data_bytes);
+       st->bitrate_bps = cbrBytes * (8*frame_rate);
+       max_data_bytes = cbrBytes;
+    }
+    max_rate = frame_rate*max_data_bytes*8;
+
+    /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
+    equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50);
+
+    if (st->signal_type == OPUS_SIGNAL_VOICE)
+       voice_est = 127;
+    else if (st->signal_type == OPUS_SIGNAL_MUSIC)
+       voice_est = 0;
+    else if (st->voice_ratio >= 0)
+    {
+       voice_est = st->voice_ratio*327>>8;
+       /* For AUDIO, never be more than 90% confident of having speech */
+       if (st->application == OPUS_APPLICATION_AUDIO)
+          voice_est = IMIN(voice_est, 115);
+    } else if (st->application == OPUS_APPLICATION_VOIP)
+       voice_est = 115;
+    else
+       voice_est = 48;
+
+    if (st->force_channels!=OPUS_AUTO && st->channels == 2)
+    {
+        st->stream_channels = st->force_channels;
+    } else {
+#ifdef FUZZING
+       /* Random mono/stereo decision */
+       if (st->channels == 2 && (rand()&0x1F)==0)
+          st->stream_channels = 3-st->stream_channels;
+#else
+       /* Rate-dependent mono-stereo decision */
+       if (st->channels == 2)
+       {
+          opus_int32 stereo_threshold;
+          stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14);
+          if (st->stream_channels == 2)
+             stereo_threshold -= 1000;
+          else
+             stereo_threshold += 1000;
+          st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1;
+       } else {
+          st->stream_channels = st->channels;
+       }
+#endif
+    }
+    equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50);
+
+    /* Mode selection depending on application and signal type */
+    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+    {
+       st->mode = MODE_CELT_ONLY;
+    } else if (st->user_forced_mode == OPUS_AUTO)
+    {
+#ifdef FUZZING
+       /* Random mode switching */
+       if ((rand()&0xF)==0)
+       {
+          if ((rand()&0x1)==0)
+             st->mode = MODE_CELT_ONLY;
+          else
+             st->mode = MODE_SILK_ONLY;
+       } else {
+          if (st->prev_mode==MODE_CELT_ONLY)
+             st->mode = MODE_CELT_ONLY;
+          else
+             st->mode = MODE_SILK_ONLY;
+       }
+#else
+       opus_int32 mode_voice, mode_music;
+       opus_int32 threshold;
+
+       /* Interpolate based on stereo width */
+       mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0])
+             + MULT16_32_Q15(stereo_width,mode_thresholds[1][0]));
+       mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1])
+             + MULT16_32_Q15(stereo_width,mode_thresholds[1][1]));
+       /* Interpolate based on speech/music probability */
+       threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
+       /* Bias towards SILK for VoIP because of some useful features */
+       if (st->application == OPUS_APPLICATION_VOIP)
+          threshold += 8000;
+
+       /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/
+       /* Hysteresis */
+       if (st->prev_mode == MODE_CELT_ONLY)
+           threshold -= 4000;
+       else if (st->prev_mode>0)
+           threshold += 4000;
+
+       st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
+
+       /* When FEC is enabled and there's enough packet loss, use SILK */
+       if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4)
+          st->mode = MODE_SILK_ONLY;
+       /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */
+       if (st->silk_mode.useDTX && voice_est > 100)
+          st->mode = MODE_SILK_ONLY;
+#endif
+    } else {
+       st->mode = st->user_forced_mode;
+    }
+
+    /* Override the chosen mode to make sure we meet the requested frame size */
+    if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100)
+       st->mode = MODE_CELT_ONLY;
+    if (st->lfe)
+       st->mode = MODE_CELT_ONLY;
+    /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */
+    if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8))
+       st->mode = MODE_CELT_ONLY;
+
+    if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0
+          && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)
+    {
+       /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
+       st->silk_mode.toMono = 1;
+       st->stream_channels = 2;
+    } else {
+       st->silk_mode.toMono = 0;
+    }
+
+    if (st->prev_mode > 0 &&
+        ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ||
+    (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)))
+    {
+        redundancy = 1;
+        celt_to_silk = (st->mode != MODE_CELT_ONLY);
+        if (!celt_to_silk)
+        {
+            /* Switch to SILK/hybrid if frame size is 10 ms or more*/
+            if (frame_size >= st->Fs/100)
+            {
+                st->mode = st->prev_mode;
+                to_celt = 1;
+            } else {
+                redundancy=0;
+            }
+        }
+    }
+    /* For the first frame at a new SILK bandwidth */
+    if (st->silk_bw_switch)
+    {
+       redundancy = 1;
+       celt_to_silk = 1;
+       st->silk_bw_switch = 0;
+       prefill=1;
+    }
+
+    if (redundancy)
+    {
+       /* Fair share of the max size allowed */
+       redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200));
+       /* For VBR, target the actual bitrate (subject to the limit above) */
+       if (st->use_vbr)
+          redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600);
+    }
+
+    if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY)
+    {
+        silk_EncControlStruct dummy;
+        silk_InitEncoder( silk_enc, st->arch, &dummy);
+        prefill=1;
+    }
+
+    /* Automatic (rate-dependent) bandwidth selection */
+    if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
+    {
+        const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds;
+        opus_int32 bandwidth_thresholds[8];
+        int bandwidth = OPUS_BANDWIDTH_FULLBAND;
+        opus_int32 equiv_rate2;
+
+        equiv_rate2 = equiv_rate;
+        if (st->mode != MODE_CELT_ONLY)
+        {
+           /* Adjust the threshold +/- 10% depending on complexity */
+           equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50;
+           /* CBR is less efficient by ~1 kb/s */
+           if (!st->use_vbr)
+              equiv_rate2 -= 1000;
+        }
+        if (st->channels==2 && st->force_channels!=1)
+        {
+           voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds;
+           music_bandwidth_thresholds = stereo_music_bandwidth_thresholds;
+        } else {
+           voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds;
+           music_bandwidth_thresholds = mono_music_bandwidth_thresholds;
+        }
+        /* Interpolate bandwidth thresholds depending on voice estimation */
+        for (i=0;i<8;i++)
+        {
+           bandwidth_thresholds[i] = music_bandwidth_thresholds[i]
+                    + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14);
+        }
+        do {
+            int threshold, hysteresis;
+            threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
+            hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1];
+            if (!st->first)
+            {
+                if (st->bandwidth >= bandwidth)
+                    threshold -= hysteresis;
+                else
+                    threshold += hysteresis;
+            }
+            if (equiv_rate2 >= threshold)
+                break;
+        } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
+        st->bandwidth = bandwidth;
+        /* Prevents any transition to SWB/FB until the SILK layer has fully
+           switched to WB mode and turned the variable LP filter off */
+        if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+            st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+    }
+
+    if (st->bandwidth>st->max_bandwidth)
+       st->bandwidth = st->max_bandwidth;
+
+    if (st->user_bandwidth != OPUS_AUTO)
+        st->bandwidth = st->user_bandwidth;
+
+    /* This prevents us from using hybrid at unsafe CBR/max rates */
+    if (st->mode != MODE_CELT_ONLY && max_rate < 15000)
+    {
+       st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND);
+    }
+
+    /* Prevents Opus from wasting bits on frequencies that are above
+       the Nyquist rate of the input signal */
+    if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND)
+        st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+    if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+        st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+    if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND)
+        st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+    if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND)
+        st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+#ifndef DISABLE_FLOAT_API
+    /* Use detected bandwidth to reduce the encoded bandwidth. */
+    if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO)
+    {
+       int min_detected_bandwidth;
+       /* Makes bandwidth detection more conservative just in case the detector
+          gets it wrong when we could have coded a high bandwidth transparently.
+          When operating in SILK/hybrid mode, we don't go below wideband to avoid
+          more complicated switches that require redundancy. */
+       if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY)
+          min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+       else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY)
+          min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+       else if (equiv_rate <= 30000*st->stream_channels)
+          min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+       else if (equiv_rate <= 44000*st->stream_channels)
+          min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+       else
+          min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+
+       st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth);
+       st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth);
+    }
+#endif
+    celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth));
+
+    /* CELT mode doesn't support mediumband, use wideband instead */
+    if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+        st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+    if (st->lfe)
+       st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+
+    /* Can't support higher than wideband for >20 ms frames */
+    if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND))
+    {
+       VARDECL(unsigned char, tmp_data);
+       int nb_frames;
+       int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
+       VARDECL(OpusRepacketizer, rp);
+       opus_int32 bytes_per_frame;
+       opus_int32 repacketize_len;
+
+#ifndef DISABLE_FLOAT_API
+       if (analysis_read_pos_bak!= -1)
+       {
+          st->analysis.read_pos = analysis_read_pos_bak;
+          st->analysis.read_subframe = analysis_read_subframe_bak;
+       }
+#endif
+
+       nb_frames = frame_size > st->Fs/25 ? 3 : 2;
+       bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames);
+
+       ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);
+
+       ALLOC(rp, 1, OpusRepacketizer);
+       opus_repacketizer_init(rp);
+
+       bak_mode = st->user_forced_mode;
+       bak_bandwidth = st->user_bandwidth;
+       bak_channels = st->force_channels;
+
+       st->user_forced_mode = st->mode;
+       st->user_bandwidth = st->bandwidth;
+       st->force_channels = st->stream_channels;
+       bak_to_mono = st->silk_mode.toMono;
+
+       if (bak_to_mono)
+          st->force_channels = 1;
+       else
+          st->prev_channels = st->stream_channels;
+       for (i=0;i<nb_frames;i++)
+       {
+          int tmp_len;
+          st->silk_mode.toMono = 0;
+          /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
+          if (to_celt && i==nb_frames-1)
+             st->user_forced_mode = MODE_CELT_ONLY;
+          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,
+                tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,
+                NULL, 0, c1, c2, analysis_channels, downmix);
+          if (tmp_len<0)
+          {
+             RESTORE_STACK;
+             return OPUS_INTERNAL_ERROR;
+          }
+          ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);
+          if (ret<0)
+          {
+             RESTORE_STACK;
+             return OPUS_INTERNAL_ERROR;
+          }
+       }
+       if (st->use_vbr)
+          repacketize_len = out_data_bytes;
+       else
+          repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes);
+       ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr);
+       if (ret<0)
+       {
+          RESTORE_STACK;
+          return OPUS_INTERNAL_ERROR;
+       }
+       st->user_forced_mode = bak_mode;
+       st->user_bandwidth = bak_bandwidth;
+       st->force_channels = bak_channels;
+       st->silk_mode.toMono = bak_to_mono;
+       RESTORE_STACK;
+       return ret;
+    }
+    curr_bandwidth = st->bandwidth;
+
+    /* Chooses the appropriate mode for speech
+       *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */
+    if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+        st->mode = MODE_HYBRID;
+    if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
+        st->mode = MODE_SILK_ONLY;
+
+    /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */
+    bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1;
+
+    data += 1;
+
+    ec_enc_init(&enc, data, max_data_bytes-1);
+
+    ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
+    for (i=0;i<total_buffer*st->channels;i++)
+       pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i];
+
+    if (st->mode == MODE_CELT_ONLY)
+       hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+    else
+       hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15;
+
+    st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15,
+          hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) );
+
+    /* convert from log scale to Hertz */
+    cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) );
+
+    if (st->application == OPUS_APPLICATION_VOIP)
+    {
+       hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
+    } else {
+       dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
+    }
+
+
+
+    /* SILK processing */
+    HB_gain = Q15ONE;
+    if (st->mode != MODE_CELT_ONLY)
+    {
+        opus_int32 total_bitRate, celt_rate;
+#ifdef FIXED_POINT
+       const opus_int16 *pcm_silk;
+#else
+       VARDECL(opus_int16, pcm_silk);
+       ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
+#endif
+
+        /* Distribute bits between SILK and CELT */
+        total_bitRate = 8 * bytes_target * frame_rate;
+        if( st->mode == MODE_HYBRID ) {
+            int HB_gain_ref;
+            /* Base rate for SILK */
+            st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) );
+            if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) {
+                /* SILK gets 2/3 of the remaining bits */
+                st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3;
+            } else { /* FULLBAND */
+                /* SILK gets 3/5 of the remaining bits */
+                st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5;
+            }
+            /* Don't let SILK use more than 80% */
+            if( st->silk_mode.bitRate > total_bitRate * 4/5 ) {
+                st->silk_mode.bitRate = total_bitRate * 4/5;
+            }
+            if (!st->energy_masking)
+            {
+               /* Increasingly attenuate high band when it gets allocated fewer bits */
+               celt_rate = total_bitRate - st->silk_mode.bitRate;
+               HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600;
+               HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6);
+               HB_gain = HB_gain < Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE;
+            }
+        } else {
+            /* SILK gets all bits */
+            st->silk_mode.bitRate = total_bitRate;
+        }
+
+        /* Surround masking for SILK */
+        if (st->energy_masking && st->use_vbr && !st->lfe)
+        {
+           opus_val32 mask_sum=0;
+           opus_val16 masking_depth;
+           opus_int32 rate_offset;
+           int c;
+           int end = 17;
+           opus_int16 srate = 16000;
+           if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
+           {
+              end = 13;
+              srate = 8000;
+           } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+           {
+              end = 15;
+              srate = 12000;
+           }
+           for (c=0;c<st->channels;c++)
+           {
+              for(i=0;i<end;i++)
+              {
+                 opus_val16 mask;
+                 mask = MAX16(MIN16(st->energy_masking[21*c+i],
+                        QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+                 if (mask > 0)
+                    mask = HALF16(mask);
+                 mask_sum += mask;
+              }
+           }
+           /* Conservative rate reduction, we cut the masking in half */
+           masking_depth = mask_sum / end*st->channels;
+           masking_depth += QCONST16(.2f, DB_SHIFT);
+           rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);
+           rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);
+           /* Split the rate change between the SILK and CELT part for hybrid. */
+           if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND)
+              st->silk_mode.bitRate += 3*rate_offset/5;
+           else
+              st->silk_mode.bitRate += rate_offset;
+           bytes_target += rate_offset * frame_size / (8 * st->Fs);
+        }
+
+        st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
+        st->silk_mode.nChannelsAPI = st->channels;
+        st->silk_mode.nChannelsInternal = st->stream_channels;
+        if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+            st->silk_mode.desiredInternalSampleRate = 8000;
+        } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+            st->silk_mode.desiredInternalSampleRate = 12000;
+        } else {
+            silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND );
+            st->silk_mode.desiredInternalSampleRate = 16000;
+        }
+        if( st->mode == MODE_HYBRID ) {
+            /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */
+            st->silk_mode.minInternalSampleRate = 16000;
+        } else {
+            st->silk_mode.minInternalSampleRate = 8000;
+        }
+
+        if (st->mode == MODE_SILK_ONLY)
+        {
+           opus_int32 effective_max_rate = max_rate;
+           st->silk_mode.maxInternalSampleRate = 16000;
+           if (frame_rate > 50)
+              effective_max_rate = effective_max_rate*2/3;
+           if (effective_max_rate < 13000)
+           {
+              st->silk_mode.maxInternalSampleRate = 12000;
+              st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate);
+           }
+           if (effective_max_rate < 9600)
+           {
+              st->silk_mode.maxInternalSampleRate = 8000;
+              st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate);
+           }
+        } else {
+           st->silk_mode.maxInternalSampleRate = 16000;
+        }
+
+        st->silk_mode.useCBR = !st->use_vbr;
+
+        /* Call SILK encoder for the low band */
+        nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes);
+
+        st->silk_mode.maxBits = nBytes*8;
+        /* Only allow up to 90% of the bits for hybrid mode*/
+        if (st->mode == MODE_HYBRID)
+           st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10;
+        if (st->silk_mode.useCBR)
+        {
+           st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8;
+           /* Reduce the initial target to make it easier to reach the CBR rate */
+           st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000);
+        }
+
+        if (prefill)
+        {
+            opus_int32 zero=0;
+            int prefill_offset;
+            /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode
+               a discontinuity. The exact location is what we need to avoid leaving any "gap"
+               in the audio when mixing with the redundant CELT frame. Here we can afford to
+               overwrite st->delay_buffer because the only thing that uses it before it gets
+               rewritten is tmp_prefill[] and even then only the part after the ramp really
+               gets used (rather than sent to the encoder and discarded) */
+            prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400);
+            gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset,
+                  0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs);
+            for(i=0;i<prefill_offset;i++)
+               st->delay_buffer[i]=0;
+#ifdef FIXED_POINT
+            pcm_silk = st->delay_buffer;
+#else
+            for (i=0;i<st->encoder_buffer*st->channels;i++)
+                pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]);
+#endif
+            silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 );
+        }
+
+#ifdef FIXED_POINT
+        pcm_silk = pcm_buf+total_buffer*st->channels;
+#else
+        for (i=0;i<frame_size*st->channels;i++)
+            pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]);
+#endif
+        ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 );
+        if( ret ) {
+            /*fprintf (stderr, "SILK encode error: %d\n", ret);*/
+            /* Handle error */
+           RESTORE_STACK;
+           return OPUS_INTERNAL_ERROR;
+        }
+        if (nBytes==0)
+        {
+           st->rangeFinal = 0;
+           data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
+           RESTORE_STACK;
+           return 1;
+        }
+        /* Extract SILK internal bandwidth for signaling in first byte */
+        if( st->mode == MODE_SILK_ONLY ) {
+            if( st->silk_mode.internalSampleRate == 8000 ) {
+               curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+            } else if( st->silk_mode.internalSampleRate == 12000 ) {
+               curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+            } else if( st->silk_mode.internalSampleRate == 16000 ) {
+               curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+            }
+        } else {
+            silk_assert( st->silk_mode.internalSampleRate == 16000 );
+        }
+
+        st->silk_mode.opusCanSwitch = st->silk_mode.switchReady;
+        /* FIXME: How do we allocate the redundancy for CBR? */
+        if (st->silk_mode.opusCanSwitch)
+        {
+           redundancy = 1;
+           celt_to_silk = 0;
+           st->silk_bw_switch = 1;
+        }
+    }
+
+    /* CELT processing */
+    {
+        int endband=21;
+
+        switch(curr_bandwidth)
+        {
+            case OPUS_BANDWIDTH_NARROWBAND:
+                endband = 13;
+                break;
+            case OPUS_BANDWIDTH_MEDIUMBAND:
+            case OPUS_BANDWIDTH_WIDEBAND:
+                endband = 17;
+                break;
+            case OPUS_BANDWIDTH_SUPERWIDEBAND:
+                endband = 19;
+                break;
+            case OPUS_BANDWIDTH_FULLBAND:
+                endband = 21;
+                break;
+        }
+        celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband));
+        celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels));
+    }
+    celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX));
+    if (st->mode != MODE_SILK_ONLY)
+    {
+        opus_val32 celt_pred=2;
+        celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
+        /* We may still decide to disable prediction later */
+        if (st->silk_mode.reducedDependency)
+           celt_pred = 0;
+        celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred));
+
+        if (st->mode == MODE_HYBRID)
+        {
+            int len;
+
+            len = (ec_tell(&enc)+7)>>3;
+            if (redundancy)
+               len += st->mode == MODE_HYBRID ? 3 : 1;
+            if( st->use_vbr ) {
+                nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs);
+            } else {
+                /* check if SILK used up too much */
+                nb_compr_bytes = len > bytes_target ? len : bytes_target;
+            }
+        } else {
+            if (st->use_vbr)
+            {
+                opus_int32 bonus=0;
+#ifndef DISABLE_FLOAT_API
+                if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
+                {
+                   bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50);
+                   if (analysis_info.valid)
+                      bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality));
+                }
+#endif
+                celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+                celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
+                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus));
+                nb_compr_bytes = max_data_bytes-1-redundancy_bytes;
+            } else {
+                nb_compr_bytes = bytes_target;
+            }
+        }
+
+    } else {
+        nb_compr_bytes = 0;
+    }
+
+    ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
+    if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
+    {
+       for (i=0;i<st->channels*st->Fs/400;i++)
+          tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
+    }
+
+    for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
+        st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size];
+    for (;i<st->encoder_buffer*st->channels;i++)
+        st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i];
+
+    /* gain_fade() and stereo_fade() need to be after the buffer copying
+       because we don't want any of this to affect the SILK part */
+    if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
+       gain_fade(pcm_buf, pcm_buf,
+             st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
+    }
+    st->prev_HB_gain = HB_gain;
+    if (st->mode != MODE_HYBRID || st->stream_channels==1)
+       st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000));
+    if( !st->energy_masking && st->channels == 2 ) {
+        /* Apply stereo width reduction (at low bitrates) */
+        if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
+            opus_val16 g1, g2;
+            g1 = st->hybrid_stereo_width_Q14;
+            g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14);
+#ifdef FIXED_POINT
+            g1 = g1==16384 ? Q15ONE : SHL16(g1,1);
+            g2 = g2==16384 ? Q15ONE : SHL16(g2,1);
+#else
+            g1 *= (1.f/16384);
+            g2 *= (1.f/16384);
+#endif
+            stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
+                  frame_size, st->channels, celt_mode->window, st->Fs);
+            st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
+        }
+    }
+
+    if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1))
+    {
+        /* For SILK mode, the redundancy is inferred from the length */
+        if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes))
+           ec_enc_bit_logp(&enc, redundancy, 12);
+        if (redundancy)
+        {
+            int max_redundancy;
+            ec_enc_bit_logp(&enc, celt_to_silk, 1);
+            if (st->mode == MODE_HYBRID)
+               max_redundancy = (max_data_bytes-1)-nb_compr_bytes;
+            else
+               max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3);
+            /* Target the same bit-rate for redundancy as for the rest,
+               up to a max of 257 bytes */
+            redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600);
+            redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes));
+            if (st->mode == MODE_HYBRID)
+                ec_enc_uint(&enc, redundancy_bytes-2, 256);
+        }
+    } else {
+        redundancy = 0;
+    }
+
+    if (!redundancy)
+    {
+       st->silk_bw_switch = 0;
+       redundancy_bytes = 0;
+    }
+    if (st->mode != MODE_CELT_ONLY)start_band=17;
+
+    if (st->mode == MODE_SILK_ONLY)
+    {
+        ret = (ec_tell(&enc)+7)>>3;
+        ec_enc_done(&enc);
+        nb_compr_bytes = ret;
+    } else {
+       nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes);
+       ec_enc_shrink(&enc, nb_compr_bytes);
+    }
+
+#ifndef DISABLE_FLOAT_API
+    if (redundancy || st->mode != MODE_SILK_ONLY)
+       celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+#endif
+
+    /* 5 ms redundant frame for CELT->SILK */
+    if (redundancy && celt_to_silk)
+    {
+        int err;
+        celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
+        celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
+        err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
+        if (err < 0)
+        {
+           RESTORE_STACK;
+           return OPUS_INTERNAL_ERROR;
+        }
+        celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
+        celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+    }
+
+    celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band));
+
+    if (st->mode != MODE_SILK_ONLY)
+    {
+        if (st->mode != st->prev_mode && st->prev_mode > 0)
+        {
+           unsigned char dummy[2];
+           celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+
+           /* Prefilling */
+           celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL);
+           celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
+        }
+        /* If false, we already busted the budget and we'll end up with a "PLC packet" */
+        if (ec_tell(&enc) <= 8*nb_compr_bytes)
+        {
+           ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
+           if (ret < 0)
+           {
+              RESTORE_STACK;
+              return OPUS_INTERNAL_ERROR;
+           }
+        }
+    }
+
+    /* 5 ms redundant frame for SILK->CELT */
+    if (redundancy && !celt_to_silk)
+    {
+        int err;
+        unsigned char dummy[2];
+        int N2, N4;
+        N2 = st->Fs/200;
+        N4 = st->Fs/400;
+
+        celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+        celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
+        celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
+
+        /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
+        celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
+
+        err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
+        if (err < 0)
+        {
+           RESTORE_STACK;
+           return OPUS_INTERNAL_ERROR;
+        }
+        celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
+    }
+
+
+
+    /* Signalling the mode in the first byte */
+    data--;
+    data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
+
+    st->rangeFinal = enc.rng ^ redundant_rng;
+
+    if (to_celt)
+        st->prev_mode = MODE_CELT_ONLY;
+    else
+        st->prev_mode = st->mode;
+    st->prev_channels = st->stream_channels;
+    st->prev_framesize = frame_size;
+
+    st->first = 0;
+
+    /* In the unlikely case that the SILK encoder busted its target, tell
+       the decoder to call the PLC */
+    if (ec_tell(&enc) > (max_data_bytes-1)*8)
+    {
+       if (max_data_bytes < 2)
+       {
+          RESTORE_STACK;
+          return OPUS_BUFFER_TOO_SMALL;
+       }
+       data[1] = 0;
+       ret = 1;
+       st->rangeFinal = 0;
+    } else if (st->mode==MODE_SILK_ONLY&&!redundancy)
+    {
+       /*When in LPC only mode it's perfectly
+         reasonable to strip off trailing zero bytes as
+         the required range decoder behavior is to
+         fill these in. This can't be done when the MDCT
+         modes are used because the decoder needs to know
+         the actual length for allocation purposes.*/
+       while(ret>2&&data[ret]==0)ret--;
+    }
+    /* Count ToC and redundancy */
+    ret += 1+redundancy_bytes;
+    if (!st->use_vbr)
+    {
+       if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK)
+
+       {
+          RESTORE_STACK;
+          return OPUS_INTERNAL_ERROR;
+       }
+       ret = max_data_bytes;
+    }
+    RESTORE_STACK;
+    return ret;
+}
+
+#ifdef FIXED_POINT
+
+#ifndef DISABLE_FLOAT_API
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
+      unsigned char *data, opus_int32 max_data_bytes)
+{
+   int i, ret;
+   int frame_size;
+   int delay_compensation;
+   VARDECL(opus_int16, in);
+   ALLOC_STACK;
+
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_float, st->analysis.subframe_mem);
+
+   ALLOC(in, frame_size*st->channels, opus_int16);
+
+   for (i=0;i<frame_size*st->channels;i++)
+      in[i] = FLOAT2INT16(pcm[i]);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+   RESTORE_STACK;
+   return ret;
+}
+#endif
+
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
+                unsigned char *data, opus_int32 out_data_bytes)
+{
+   int frame_size;
+   int delay_compensation;
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_int
+#ifndef DISABLE_FLOAT_API
+         , st->analysis.subframe_mem
+#endif
+         );
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+}
+
+#else
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
+      unsigned char *data, opus_int32 max_data_bytes)
+{
+   int i, ret;
+   int frame_size;
+   int delay_compensation;
+   VARDECL(float, in);
+   ALLOC_STACK;
+
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_int, st->analysis.subframe_mem);
+
+   ALLOC(in, frame_size*st->channels, float);
+
+   for (i=0;i<frame_size*st->channels;i++)
+      in[i] = (1.0f/32768)*pcm[i];
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+   RESTORE_STACK;
+   return ret;
+}
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
+                      unsigned char *data, opus_int32 out_data_bytes)
+{
+   int frame_size;
+   int delay_compensation;
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_float, st->analysis.subframe_mem);
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+}
+#endif
+
+
+int opus_encoder_ctl(OpusEncoder *st, int request, ...)
+{
+    int ret;
+    CELTEncoder *celt_enc;
+    va_list ap;
+
+    ret = OPUS_OK;
+    va_start(ap, request);
+
+    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+
+    switch (request)
+    {
+        case OPUS_SET_APPLICATION_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (   (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO
+                 && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+               || (!st->first && st->application != value))
+            {
+               ret = OPUS_BAD_ARG;
+               break;
+            }
+            st->application = value;
+        }
+        break;
+        case OPUS_GET_APPLICATION_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->application;
+        }
+        break;
+        case OPUS_SET_BITRATE_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX)
+            {
+                if (value <= 0)
+                    goto bad_arg;
+                else if (value <= 500)
+                    value = 500;
+                else if (value > (opus_int32)300000*st->channels)
+                    value = (opus_int32)300000*st->channels;
+            }
+            st->user_bitrate_bps = value;
+        }
+        break;
+        case OPUS_GET_BITRATE_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276);
+        }
+        break;
+        case OPUS_SET_FORCE_CHANNELS_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if((value<1 || value>st->channels) && value != OPUS_AUTO)
+            {
+               goto bad_arg;
+            }
+            st->force_channels = value;
+        }
+        break;
+        case OPUS_GET_FORCE_CHANNELS_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->force_channels;
+        }
+        break;
+        case OPUS_SET_MAX_BANDWIDTH_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) 
+            {
+               goto bad_arg;
+            }
+            st->max_bandwidth = value;
+            if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+                st->silk_mode.maxInternalSampleRate = 8000;
+            } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+                st->silk_mode.maxInternalSampleRate = 12000;
+            } else {
+                st->silk_mode.maxInternalSampleRate = 16000;
+            }
+        }
+        break;
+        case OPUS_GET_MAX_BANDWIDTH_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->max_bandwidth;
+        }
+        break;
+        case OPUS_SET_BANDWIDTH_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO)
+            {
+               goto bad_arg;
+            }
+            st->user_bandwidth = value;
+            if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+                st->silk_mode.maxInternalSampleRate = 8000;
+            } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+                st->silk_mode.maxInternalSampleRate = 12000;
+            } else {
+                st->silk_mode.maxInternalSampleRate = 16000;
+            }
+        }
+        break;
+        case OPUS_GET_BANDWIDTH_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->bandwidth;
+        }
+        break;
+        case OPUS_SET_DTX_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>1)
+            {
+               goto bad_arg;
+            }
+            st->silk_mode.useDTX = value;
+        }
+        break;
+        case OPUS_GET_DTX_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->silk_mode.useDTX;
+        }
+        break;
+        case OPUS_SET_COMPLEXITY_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>10)
+            {
+               goto bad_arg;
+            }
+            st->silk_mode.complexity = value;
+            celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value));
+        }
+        break;
+        case OPUS_GET_COMPLEXITY_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->silk_mode.complexity;
+        }
+        break;
+        case OPUS_SET_INBAND_FEC_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>1)
+            {
+               goto bad_arg;
+            }
+            st->silk_mode.useInBandFEC = value;
+        }
+        break;
+        case OPUS_GET_INBAND_FEC_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->silk_mode.useInBandFEC;
+        }
+        break;
+        case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value < 0 || value > 100)
+            {
+               goto bad_arg;
+            }
+            st->silk_mode.packetLossPercentage = value;
+            celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value));
+        }
+        break;
+        case OPUS_GET_PACKET_LOSS_PERC_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->silk_mode.packetLossPercentage;
+        }
+        break;
+        case OPUS_SET_VBR_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>1)
+            {
+               goto bad_arg;
+            }
+            st->use_vbr = value;
+            st->silk_mode.useCBR = 1-value;
+        }
+        break;
+        case OPUS_GET_VBR_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->use_vbr;
+        }
+        break;
+        case OPUS_SET_VOICE_RATIO_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value<-1 || value>100)
+            {
+               goto bad_arg;
+            }
+            st->voice_ratio = value;
+        }
+        break;
+        case OPUS_GET_VOICE_RATIO_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->voice_ratio;
+        }
+        break;
+        case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>1)
+            {
+               goto bad_arg;
+            }
+            st->vbr_constraint = value;
+        }
+        break;
+        case OPUS_GET_VBR_CONSTRAINT_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->vbr_constraint;
+        }
+        break;
+        case OPUS_SET_SIGNAL_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC)
+            {
+               goto bad_arg;
+            }
+            st->signal_type = value;
+        }
+        break;
+        case OPUS_GET_SIGNAL_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->signal_type;
+        }
+        break;
+        case OPUS_GET_LOOKAHEAD_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->Fs/400;
+            if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+                *value += st->delay_compensation;
+        }
+        break;
+        case OPUS_GET_SAMPLE_RATE_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->Fs;
+        }
+        break;
+        case OPUS_GET_FINAL_RANGE_REQUEST:
+        {
+            opus_uint32 *value = va_arg(ap, opus_uint32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->rangeFinal;
+        }
+        break;
+        case OPUS_SET_LSB_DEPTH_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value<8 || value>24)
+            {
+               goto bad_arg;
+            }
+            st->lsb_depth=value;
+        }
+        break;
+        case OPUS_GET_LSB_DEPTH_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->lsb_depth;
+        }
+        break;
+        case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value != OPUS_FRAMESIZE_ARG   && value != OPUS_FRAMESIZE_2_5_MS &&
+                value != OPUS_FRAMESIZE_5_MS  && value != OPUS_FRAMESIZE_10_MS  &&
+                value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS  &&
+                value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE)
+            {
+               goto bad_arg;
+            }
+            st->variable_duration = value;
+            celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value));
+        }
+        break;
+        case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->variable_duration;
+        }
+        break;
+        case OPUS_SET_PREDICTION_DISABLED_REQUEST:
+        {
+           opus_int32 value = va_arg(ap, opus_int32);
+           if (value > 1 || value < 0)
+              goto bad_arg;
+           st->silk_mode.reducedDependency = value;
+        }
+        break;
+        case OPUS_GET_PREDICTION_DISABLED_REQUEST:
+        {
+           opus_int32 *value = va_arg(ap, opus_int32*);
+           if (!value)
+              goto bad_arg;
+           *value = st->silk_mode.reducedDependency;
+        }
+        break;
+        case OPUS_RESET_STATE:
+        {
+           void *silk_enc;
+           silk_EncControlStruct dummy;
+           silk_enc = (char*)st+st->silk_enc_offset;
+
+           OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START,
+                 sizeof(OpusEncoder)-
+                 ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st));
+
+           celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+           silk_InitEncoder( silk_enc, st->arch, &dummy );
+           st->stream_channels = st->channels;
+           st->hybrid_stereo_width_Q14 = 1 << 14;
+           st->prev_HB_gain = Q15ONE;
+           st->first = 1;
+           st->mode = MODE_HYBRID;
+           st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
+           st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+        }
+        break;
+        case OPUS_SET_FORCE_MODE_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO)
+            {
+               goto bad_arg;
+            }
+            st->user_forced_mode = value;
+        }
+        break;
+        case OPUS_SET_LFE_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            st->lfe = value;
+            ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));
+        }
+        break;
+        case OPUS_SET_ENERGY_MASK_REQUEST:
+        {
+            opus_val16 *value = va_arg(ap, opus_val16*);
+            st->energy_masking = value;
+            ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));
+        }
+        break;
+
+        case CELT_GET_MODE_REQUEST:
+        {
+           const CELTMode ** value = va_arg(ap, const CELTMode**);
+           if (!value)
+           {
+              goto bad_arg;
+           }
+           ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));
+        }
+        break;
+        default:
+            /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/
+            ret = OPUS_UNIMPLEMENTED;
+            break;
+    }
+    va_end(ap);
+    return ret;
+bad_arg:
+    va_end(ap);
+    return OPUS_BAD_ARG;
+}
+
+void opus_encoder_destroy(OpusEncoder *st)
+{
+    opus_free(st);
+}
diff --git a/src/main/jni/opus/src/opus_multistream.c b/src/main/jni/opus/src/opus_multistream.c
new file mode 100644
index 000000000..09c3639b7
--- /dev/null
+++ b/src/main/jni/opus/src/opus_multistream.c
@@ -0,0 +1,92 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+
+
+int validate_layout(const ChannelLayout *layout)
+{
+   int i, max_channel;
+
+   max_channel = layout->nb_streams+layout->nb_coupled_streams;
+   if (max_channel>255)
+      return 0;
+   for (i=0;i<layout->nb_channels;i++)
+   {
+      if (layout->mapping[i] >= max_channel && layout->mapping[i] != 255)
+         return 0;
+   }
+   return 1;
+}
+
+
+int get_left_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+   int i;
+   i = (prev<0) ? 0 : prev+1;
+   for (;i<layout->nb_channels;i++)
+   {
+      if (layout->mapping[i]==stream_id*2)
+         return i;
+   }
+   return -1;
+}
+
+int get_right_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+   int i;
+   i = (prev<0) ? 0 : prev+1;
+   for (;i<layout->nb_channels;i++)
+   {
+      if (layout->mapping[i]==stream_id*2+1)
+         return i;
+   }
+   return -1;
+}
+
+int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+   int i;
+   i = (prev<0) ? 0 : prev+1;
+   for (;i<layout->nb_channels;i++)
+   {
+      if (layout->mapping[i]==stream_id+layout->nb_coupled_streams)
+         return i;
+   }
+   return -1;
+}
+
diff --git a/src/main/jni/opus/src/opus_multistream_decoder.c b/src/main/jni/opus/src/opus_multistream_decoder.c
new file mode 100644
index 000000000..a05fa1e76
--- /dev/null
+++ b/src/main/jni/opus/src/opus_multistream_decoder.c
@@ -0,0 +1,537 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+
+struct OpusMSDecoder {
+   ChannelLayout layout;
+   /* Decoder states go here */
+};
+
+
+
+
+/* DECODER */
+
+opus_int32 opus_multistream_decoder_get_size(int nb_streams, int nb_coupled_streams)
+{
+   int coupled_size;
+   int mono_size;
+
+   if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0;
+   coupled_size = opus_decoder_get_size(2);
+   mono_size = opus_decoder_get_size(1);
+   return align(sizeof(OpusMSDecoder))
+         + nb_coupled_streams * align(coupled_size)
+         + (nb_streams-nb_coupled_streams) * align(mono_size);
+}
+
+int opus_multistream_decoder_init(
+      OpusMSDecoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping
+)
+{
+   int coupled_size;
+   int mono_size;
+   int i, ret;
+   char *ptr;
+
+   if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+      return OPUS_BAD_ARG;
+
+   st->layout.nb_channels = channels;
+   st->layout.nb_streams = streams;
+   st->layout.nb_coupled_streams = coupled_streams;
+
+   for (i=0;i<st->layout.nb_channels;i++)
+      st->layout.mapping[i] = mapping[i];
+   if (!validate_layout(&st->layout))
+      return OPUS_BAD_ARG;
+
+   ptr = (char*)st + align(sizeof(OpusMSDecoder));
+   coupled_size = opus_decoder_get_size(2);
+   mono_size = opus_decoder_get_size(1);
+
+   for (i=0;i<st->layout.nb_coupled_streams;i++)
+   {
+      ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 2);
+      if(ret!=OPUS_OK)return ret;
+      ptr += align(coupled_size);
+   }
+   for (;i<st->layout.nb_streams;i++)
+   {
+      ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 1);
+      if(ret!=OPUS_OK)return ret;
+      ptr += align(mono_size);
+   }
+   return OPUS_OK;
+}
+
+
+OpusMSDecoder *opus_multistream_decoder_create(
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int *error
+)
+{
+   int ret;
+   OpusMSDecoder *st;
+   if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusMSDecoder *)opus_alloc(opus_multistream_decoder_get_size(streams, coupled_streams));
+   if (st==NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_multistream_decoder_init(st, Fs, channels, streams, coupled_streams, mapping);
+   if (error)
+      *error = ret;
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   return st;
+}
+
+typedef void (*opus_copy_channel_out_func)(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size
+);
+
+static int opus_multistream_packet_validate(const unsigned char *data,
+      opus_int32 len, int nb_streams, opus_int32 Fs)
+{
+   int s;
+   int count;
+   unsigned char toc;
+   opus_int16 size[48];
+   int samples=0;
+   opus_int32 packet_offset;
+
+   for (s=0;s<nb_streams;s++)
+   {
+      int tmp_samples;
+      if (len<=0)
+         return OPUS_INVALID_PACKET;
+      count = opus_packet_parse_impl(data, len, s!=nb_streams-1, &toc, NULL,
+                                     size, NULL, &packet_offset);
+      if (count<0)
+         return count;
+      tmp_samples = opus_packet_get_nb_samples(data, packet_offset, Fs);
+      if (s!=0 && samples != tmp_samples)
+         return OPUS_INVALID_PACKET;
+      samples = tmp_samples;
+      data += packet_offset;
+      len -= packet_offset;
+   }
+   return samples;
+}
+
+static int opus_multistream_decode_native(
+      OpusMSDecoder *st,
+      const unsigned char *data,
+      opus_int32 len,
+      void *pcm,
+      opus_copy_channel_out_func copy_channel_out,
+      int frame_size,
+      int decode_fec,
+      int soft_clip
+)
+{
+   opus_int32 Fs;
+   int coupled_size;
+   int mono_size;
+   int s, c;
+   char *ptr;
+   int do_plc=0;
+   VARDECL(opus_val16, buf);
+   ALLOC_STACK;
+
+   /* Limit frame_size to avoid excessive stack allocations. */
+   opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs));
+   frame_size = IMIN(frame_size, Fs/25*3);
+   ALLOC(buf, 2*frame_size, opus_val16);
+   ptr = (char*)st + align(sizeof(OpusMSDecoder));
+   coupled_size = opus_decoder_get_size(2);
+   mono_size = opus_decoder_get_size(1);
+
+   if (len==0)
+      do_plc = 1;
+   if (len < 0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   if (!do_plc && len < 2*st->layout.nb_streams-1)
+   {
+      RESTORE_STACK;
+      return OPUS_INVALID_PACKET;
+   }
+   if (!do_plc)
+   {
+      int ret = opus_multistream_packet_validate(data, len, st->layout.nb_streams, Fs);
+      if (ret < 0)
+      {
+         RESTORE_STACK;
+         return ret;
+      } else if (ret > frame_size)
+      {
+         RESTORE_STACK;
+         return OPUS_BUFFER_TOO_SMALL;
+      }
+   }
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      OpusDecoder *dec;
+      int packet_offset, ret;
+
+      dec = (OpusDecoder*)ptr;
+      ptr += (s < st->layout.nb_coupled_streams) ? align(coupled_size) : align(mono_size);
+
+      if (!do_plc && len<=0)
+      {
+         RESTORE_STACK;
+         return OPUS_INTERNAL_ERROR;
+      }
+      packet_offset = 0;
+      ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip);
+      data += packet_offset;
+      len -= packet_offset;
+      if (ret <= 0)
+      {
+         RESTORE_STACK;
+         return ret;
+      }
+      frame_size = ret;
+      if (s < st->layout.nb_coupled_streams)
+      {
+         int chan, prev;
+         prev = -1;
+         /* Copy "left" audio to the channel(s) where it belongs */
+         while ( (chan = get_left_channel(&st->layout, s, prev)) != -1)
+         {
+            (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+               buf, 2, frame_size);
+            prev = chan;
+         }
+         prev = -1;
+         /* Copy "right" audio to the channel(s) where it belongs */
+         while ( (chan = get_right_channel(&st->layout, s, prev)) != -1)
+         {
+            (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+               buf+1, 2, frame_size);
+            prev = chan;
+         }
+      } else {
+         int chan, prev;
+         prev = -1;
+         /* Copy audio to the channel(s) where it belongs */
+         while ( (chan = get_mono_channel(&st->layout, s, prev)) != -1)
+         {
+            (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+               buf, 1, frame_size);
+            prev = chan;
+         }
+      }
+   }
+   /* Handle muted channels */
+   for (c=0;c<st->layout.nb_channels;c++)
+   {
+      if (st->layout.mapping[c] == 255)
+      {
+         (*copy_channel_out)(pcm, st->layout.nb_channels, c,
+            NULL, 0, frame_size);
+      }
+   }
+   RESTORE_STACK;
+   return frame_size;
+}
+
+#if !defined(DISABLE_FLOAT_API)
+static void opus_copy_channel_out_float(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size
+)
+{
+   float *float_dst;
+   opus_int32 i;
+   float_dst = (float*)dst;
+   if (src != NULL)
+   {
+      for (i=0;i<frame_size;i++)
+#if defined(FIXED_POINT)
+         float_dst[i*dst_stride+dst_channel] = (1/32768.f)*src[i*src_stride];
+#else
+         float_dst[i*dst_stride+dst_channel] = src[i*src_stride];
+#endif
+   }
+   else
+   {
+      for (i=0;i<frame_size;i++)
+         float_dst[i*dst_stride+dst_channel] = 0;
+   }
+}
+#endif
+
+static void opus_copy_channel_out_short(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size
+)
+{
+   opus_int16 *short_dst;
+   opus_int32 i;
+   short_dst = (opus_int16*)dst;
+   if (src != NULL)
+   {
+      for (i=0;i<frame_size;i++)
+#if defined(FIXED_POINT)
+         short_dst[i*dst_stride+dst_channel] = src[i*src_stride];
+#else
+         short_dst[i*dst_stride+dst_channel] = FLOAT2INT16(src[i*src_stride]);
+#endif
+   }
+   else
+   {
+      for (i=0;i<frame_size;i++)
+         short_dst[i*dst_stride+dst_channel] = 0;
+   }
+}
+
+
+
+#ifdef FIXED_POINT
+int opus_multistream_decode(
+      OpusMSDecoder *st,
+      const unsigned char *data,
+      opus_int32 len,
+      opus_int16 *pcm,
+      int frame_size,
+      int decode_fec
+)
+{
+   return opus_multistream_decode_native(st, data, len,
+       pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_multistream_decode_float(OpusMSDecoder *st, const unsigned char *data,
+      opus_int32 len, float *pcm, int frame_size, int decode_fec)
+{
+   return opus_multistream_decode_native(st, data, len,
+       pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0);
+}
+#endif
+
+#else
+
+int opus_multistream_decode(OpusMSDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
+{
+   return opus_multistream_decode_native(st, data, len,
+       pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1);
+}
+
+int opus_multistream_decode_float(
+      OpusMSDecoder *st,
+      const unsigned char *data,
+      opus_int32 len,
+      float *pcm,
+      int frame_size,
+      int decode_fec
+)
+{
+   return opus_multistream_decode_native(st, data, len,
+       pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0);
+}
+#endif
+
+int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...)
+{
+   va_list ap;
+   int coupled_size, mono_size;
+   char *ptr;
+   int ret = OPUS_OK;
+
+   va_start(ap, request);
+
+   coupled_size = opus_decoder_get_size(2);
+   mono_size = opus_decoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSDecoder));
+   switch (request)
+   {
+       case OPUS_GET_BANDWIDTH_REQUEST:
+       case OPUS_GET_SAMPLE_RATE_REQUEST:
+       case OPUS_GET_GAIN_REQUEST:
+       case OPUS_GET_LAST_PACKET_DURATION_REQUEST:
+       {
+          OpusDecoder *dec;
+          /* For int32* GET params, just query the first stream */
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          dec = (OpusDecoder*)ptr;
+          ret = opus_decoder_ctl(dec, request, value);
+       }
+       break;
+       case OPUS_GET_FINAL_RANGE_REQUEST:
+       {
+          int s;
+          opus_uint32 *value = va_arg(ap, opus_uint32*);
+          opus_uint32 tmp;
+          if (!value)
+          {
+             goto bad_arg;
+          }
+          *value = 0;
+          for (s=0;s<st->layout.nb_streams;s++)
+          {
+             OpusDecoder *dec;
+             dec = (OpusDecoder*)ptr;
+             if (s < st->layout.nb_coupled_streams)
+                ptr += align(coupled_size);
+             else
+                ptr += align(mono_size);
+             ret = opus_decoder_ctl(dec, request, &tmp);
+             if (ret != OPUS_OK) break;
+             *value ^= tmp;
+          }
+       }
+       break;
+       case OPUS_RESET_STATE:
+       {
+          int s;
+          for (s=0;s<st->layout.nb_streams;s++)
+          {
+             OpusDecoder *dec;
+
+             dec = (OpusDecoder*)ptr;
+             if (s < st->layout.nb_coupled_streams)
+                ptr += align(coupled_size);
+             else
+                ptr += align(mono_size);
+             ret = opus_decoder_ctl(dec, OPUS_RESET_STATE);
+             if (ret != OPUS_OK)
+                break;
+          }
+       }
+       break;
+       case OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST:
+       {
+          int s;
+          opus_int32 stream_id;
+          OpusDecoder **value;
+          stream_id = va_arg(ap, opus_int32);
+          if (stream_id<0 || stream_id >= st->layout.nb_streams)
+             ret = OPUS_BAD_ARG;
+          value = va_arg(ap, OpusDecoder**);
+          if (!value)
+          {
+             goto bad_arg;
+          }
+          for (s=0;s<stream_id;s++)
+          {
+             if (s < st->layout.nb_coupled_streams)
+                ptr += align(coupled_size);
+             else
+                ptr += align(mono_size);
+          }
+          *value = (OpusDecoder*)ptr;
+       }
+       break;
+       case OPUS_SET_GAIN_REQUEST:
+       {
+          int s;
+          /* This works for int32 params */
+          opus_int32 value = va_arg(ap, opus_int32);
+          for (s=0;s<st->layout.nb_streams;s++)
+          {
+             OpusDecoder *dec;
+
+             dec = (OpusDecoder*)ptr;
+             if (s < st->layout.nb_coupled_streams)
+                ptr += align(coupled_size);
+             else
+                ptr += align(mono_size);
+             ret = opus_decoder_ctl(dec, request, value);
+             if (ret != OPUS_OK)
+                break;
+          }
+       }
+       break;
+       default:
+          ret = OPUS_UNIMPLEMENTED;
+       break;
+   }
+
+   va_end(ap);
+   return ret;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+}
+
+
+void opus_multistream_decoder_destroy(OpusMSDecoder *st)
+{
+    opus_free(st);
+}
diff --git a/src/main/jni/opus/src/opus_multistream_encoder.c b/src/main/jni/opus/src/opus_multistream_encoder.c
new file mode 100644
index 000000000..49e27913e
--- /dev/null
+++ b/src/main/jni/opus/src/opus_multistream_encoder.c
@@ -0,0 +1,1174 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "mdct.h"
+#include "modes.h"
+#include "bands.h"
+#include "quant_bands.h"
+
+typedef struct {
+   int nb_streams;
+   int nb_coupled_streams;
+   unsigned char mapping[8];
+} VorbisLayout;
+
+/* Index is nb_channel-1*/
+static const VorbisLayout vorbis_mappings[8] = {
+      {1, 0, {0}},                      /* 1: mono */
+      {1, 1, {0, 1}},                   /* 2: stereo */
+      {2, 1, {0, 2, 1}},                /* 3: 1-d surround */
+      {2, 2, {0, 1, 2, 3}},             /* 4: quadraphonic surround */
+      {3, 2, {0, 4, 1, 2, 3}},          /* 5: 5-channel surround */
+      {4, 2, {0, 4, 1, 2, 3, 5}},       /* 6: 5.1 surround */
+      {4, 3, {0, 4, 1, 2, 3, 5, 6}},    /* 7: 6.1 surround */
+      {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */
+};
+
+typedef void (*opus_copy_channel_in_func)(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size
+);
+
+struct OpusMSEncoder {
+   ChannelLayout layout;
+   int lfe_stream;
+   int application;
+   int variable_duration;
+   int surround;
+   opus_int32 bitrate_bps;
+   float subframe_mem[3];
+   /* Encoder states go here */
+   /* then opus_val32 window_mem[channels*120]; */
+   /* then opus_val32 preemph_mem[channels]; */
+};
+
+static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
+{
+   int s;
+   char *ptr;
+   int coupled_size, mono_size;
+
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+   }
+   return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
+}
+
+static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
+{
+   int s;
+   char *ptr;
+   int coupled_size, mono_size;
+
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+   }
+   return (opus_val32*)ptr;
+}
+
+static int validate_encoder_layout(const ChannelLayout *layout)
+{
+   int s;
+   for (s=0;s<layout->nb_streams;s++)
+   {
+      if (s < layout->nb_coupled_streams)
+      {
+         if (get_left_channel(layout, s, -1)==-1)
+            return 0;
+         if (get_right_channel(layout, s, -1)==-1)
+            return 0;
+      } else {
+         if (get_mono_channel(layout, s, -1)==-1)
+            return 0;
+      }
+   }
+   return 1;
+}
+
+static void channel_pos(int channels, int pos[8])
+{
+   /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */
+   if (channels==4)
+   {
+      pos[0]=1;
+      pos[1]=3;
+      pos[2]=1;
+      pos[3]=3;
+   } else if (channels==3||channels==5||channels==6)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=0;
+   } else if (channels==7)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=2;
+      pos[6]=0;
+   } else if (channels==8)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=1;
+      pos[6]=3;
+      pos[7]=0;
+   }
+}
+
+#if 1
+/* Computes a rough approximation of log2(2^a + 2^b) */
+static opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+   opus_val16 max;
+   opus_val32 diff;
+   opus_val16 frac;
+   static const opus_val16 diff_table[17] = {
+         QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT),
+         QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT),
+         QCONST16(0.0028123f, DB_SHIFT)
+   };
+   int low;
+   if (a>b)
+   {
+      max = a;
+      diff = SUB32(EXTEND32(a),EXTEND32(b));
+   } else {
+      max = b;
+      diff = SUB32(EXTEND32(b),EXTEND32(a));
+   }
+   if (diff >= QCONST16(8.f, DB_SHIFT))
+      return max;
+#ifdef FIXED_POINT
+   low = SHR32(diff, DB_SHIFT-1);
+   frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT);
+#else
+   low = (int)floor(2*diff);
+   frac = 2*diff - low;
+#endif
+   return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low]));
+}
+#else
+opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+   return log2(pow(4, a)+ pow(4, b))/2;
+}
+#endif
+
+void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
+      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+)
+{
+   int c;
+   int i;
+   int LM;
+   int pos[8] = {0};
+   int upsample;
+   int frame_size;
+   opus_val16 channel_offset;
+   opus_val32 bandE[21];
+   opus_val16 maskLogE[3][21];
+   VARDECL(opus_val32, in);
+   VARDECL(opus_val16, x);
+   VARDECL(opus_val32, freq);
+   SAVE_STACK;
+
+   upsample = resampling_factor(rate);
+   frame_size = len*upsample;
+
+   for (LM=0;LM<celt_mode->maxLM;LM++)
+      if (celt_mode->shortMdctSize<<LM==frame_size)
+         break;
+
+   ALLOC(in, frame_size+overlap, opus_val32);
+   ALLOC(x, len, opus_val16);
+   ALLOC(freq, frame_size, opus_val32);
+
+   channel_pos(channels, pos);
+
+   for (c=0;c<3;c++)
+      for (i=0;i<21;i++)
+         maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT);
+
+   for (c=0;c<channels;c++)
+   {
+      OPUS_COPY(in, mem+c*overlap, overlap);
+      (*copy_channel_in)(x, 1, pcm, channels, c, len);
+      celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
+      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+      if (upsample != 1)
+      {
+         int bound = len;
+         for (i=0;i<bound;i++)
+            freq[i] *= upsample;
+         for (;i<frame_size;i++)
+            freq[i] = 0;
+      }
+
+      compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM);
+      amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1);
+      /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */
+      for (i=1;i<21;i++)
+         bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT));
+      for (i=19;i>=0;i--)
+         bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT));
+      if (pos[c]==1)
+      {
+         for (i=0;i<21;i++)
+            maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]);
+      } else if (pos[c]==3)
+      {
+         for (i=0;i<21;i++)
+            maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]);
+      } else if (pos[c]==2)
+      {
+         for (i=0;i<21;i++)
+         {
+            maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+            maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+         }
+      }
+#if 0
+      for (i=0;i<21;i++)
+         printf("%f ", bandLogE[21*c+i]);
+      float sum=0;
+      for (i=0;i<21;i++)
+         sum += bandLogE[21*c+i];
+      printf("%f ", sum/21);
+#endif
+      OPUS_COPY(mem+c*overlap, in+frame_size, overlap);
+   }
+   for (i=0;i<21;i++)
+      maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]);
+   channel_offset = HALF16(celt_log2(QCONST32(2.f,14)/(channels-1)));
+   for (c=0;c<3;c++)
+      for (i=0;i<21;i++)
+         maskLogE[c][i] += channel_offset;
+#if 0
+   for (c=0;c<3;c++)
+   {
+      for (i=0;i<21;i++)
+         printf("%f ", maskLogE[c][i]);
+   }
+#endif
+   for (c=0;c<channels;c++)
+   {
+      opus_val16 *mask;
+      if (pos[c]!=0)
+      {
+         mask = &maskLogE[pos[c]-1][0];
+         for (i=0;i<21;i++)
+            bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i];
+      } else {
+         for (i=0;i<21;i++)
+            bandLogE[21*c+i] = 0;
+      }
+#if 0
+      for (i=0;i<21;i++)
+         printf("%f ", bandLogE[21*c+i]);
+      printf("\n");
+#endif
+#if 0
+      float sum=0;
+      for (i=0;i<21;i++)
+         sum += bandLogE[21*c+i];
+      printf("%f ", sum/(float)QCONST32(21.f, DB_SHIFT));
+      printf("\n");
+#endif
+   }
+   RESTORE_STACK;
+}
+
+opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams)
+{
+   int coupled_size;
+   int mono_size;
+
+   if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0;
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   return align(sizeof(OpusMSEncoder))
+        + nb_coupled_streams * align(coupled_size)
+        + (nb_streams-nb_coupled_streams) * align(mono_size);
+}
+
+opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_family)
+{
+   int nb_streams;
+   int nb_coupled_streams;
+   opus_int32 size;
+
+   if (mapping_family==0)
+   {
+      if (channels==1)
+      {
+         nb_streams=1;
+         nb_coupled_streams=0;
+      } else if (channels==2)
+      {
+         nb_streams=1;
+         nb_coupled_streams=1;
+      } else
+         return 0;
+   } else if (mapping_family==1 && channels<=8 && channels>=1)
+   {
+      nb_streams=vorbis_mappings[channels-1].nb_streams;
+      nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams;
+   } else if (mapping_family==255)
+   {
+      nb_streams=channels;
+      nb_coupled_streams=0;
+   } else
+      return 0;
+   size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
+   if (channels>2)
+   {
+      size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32));
+   }
+   return size;
+}
+
+
+static int opus_multistream_encoder_init_impl(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application,
+      int surround
+)
+{
+   int coupled_size;
+   int mono_size;
+   int i, ret;
+   char *ptr;
+
+   if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+      return OPUS_BAD_ARG;
+
+   st->layout.nb_channels = channels;
+   st->layout.nb_streams = streams;
+   st->layout.nb_coupled_streams = coupled_streams;
+   st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0;
+   if (!surround)
+      st->lfe_stream = -1;
+   st->bitrate_bps = OPUS_AUTO;
+   st->application = application;
+   st->variable_duration = OPUS_FRAMESIZE_ARG;
+   for (i=0;i<st->layout.nb_channels;i++)
+      st->layout.mapping[i] = mapping[i];
+   if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))
+      return OPUS_BAD_ARG;
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+
+   for (i=0;i<st->layout.nb_coupled_streams;i++)
+   {
+      ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application);
+      if(ret!=OPUS_OK)return ret;
+      if (i==st->lfe_stream)
+         opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
+      ptr += align(coupled_size);
+   }
+   for (;i<st->layout.nb_streams;i++)
+   {
+      ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application);
+      if (i==st->lfe_stream)
+         opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
+      if(ret!=OPUS_OK)return ret;
+      ptr += align(mono_size);
+   }
+   if (surround)
+   {
+      OPUS_CLEAR(ms_get_preemph_mem(st), channels);
+      OPUS_CLEAR(ms_get_window_mem(st), channels*120);
+   }
+   st->surround = surround;
+   return OPUS_OK;
+}
+
+int opus_multistream_encoder_init(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application
+)
+{
+   return opus_multistream_encoder_init_impl(st, Fs, channels, streams, coupled_streams, mapping, application, 0);
+}
+
+int opus_multistream_surround_encoder_init(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int mapping_family,
+      int *streams,
+      int *coupled_streams,
+      unsigned char *mapping,
+      int application
+)
+{
+   if ((channels>255) || (channels<1))
+      return OPUS_BAD_ARG;
+   st->lfe_stream = -1;
+   if (mapping_family==0)
+   {
+      if (channels==1)
+      {
+         *streams=1;
+         *coupled_streams=0;
+         mapping[0]=0;
+      } else if (channels==2)
+      {
+         *streams=1;
+         *coupled_streams=1;
+         mapping[0]=0;
+         mapping[1]=1;
+      } else
+         return OPUS_UNIMPLEMENTED;
+   } else if (mapping_family==1 && channels<=8 && channels>=1)
+   {
+      int i;
+      *streams=vorbis_mappings[channels-1].nb_streams;
+      *coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams;
+      for (i=0;i<channels;i++)
+         mapping[i] = vorbis_mappings[channels-1].mapping[i];
+      if (channels>=6)
+         st->lfe_stream = *streams-1;
+   } else if (mapping_family==255)
+   {
+      int i;
+      *streams=channels;
+      *coupled_streams=0;
+      for(i=0;i<channels;i++)
+         mapping[i] = i;
+   } else
+      return OPUS_UNIMPLEMENTED;
+   return opus_multistream_encoder_init_impl(st, Fs, channels, *streams, *coupled_streams,
+         mapping, application, channels>2&&mapping_family==1);
+}
+
+OpusMSEncoder *opus_multistream_encoder_create(
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application,
+      int *error
+)
+{
+   int ret;
+   OpusMSEncoder *st;
+   if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusMSEncoder *)opus_alloc(opus_multistream_encoder_get_size(streams, coupled_streams));
+   if (st==NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_multistream_encoder_init(st, Fs, channels, streams, coupled_streams, mapping, application);
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+
+OpusMSEncoder *opus_multistream_surround_encoder_create(
+      opus_int32 Fs,
+      int channels,
+      int mapping_family,
+      int *streams,
+      int *coupled_streams,
+      unsigned char *mapping,
+      int application,
+      int *error
+)
+{
+   int ret;
+   OpusMSEncoder *st;
+   if ((channels>255) || (channels<1))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family));
+   if (st==NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_multistream_surround_encoder_init(st, Fs, channels, mapping_family, streams, coupled_streams, mapping, application);
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+
+static void surround_rate_allocation(
+      OpusMSEncoder *st,
+      opus_int32 *rate,
+      int frame_size
+      )
+{
+   int i;
+   opus_int32 channel_rate;
+   opus_int32 Fs;
+   char *ptr;
+   int stream_offset;
+   int lfe_offset;
+   int coupled_ratio; /* Q8 */
+   int lfe_ratio;     /* Q8 */
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+
+   if (st->bitrate_bps > st->layout.nb_channels*40000)
+      stream_offset = 20000;
+   else
+      stream_offset = st->bitrate_bps/st->layout.nb_channels/2;
+   stream_offset += 60*(Fs/frame_size-50);
+   /* We start by giving each stream (coupled or uncoupled) the same bitrate.
+      This models the main saving of coupled channels over uncoupled. */
+   /* The LFE stream is an exception to the above and gets fewer bits. */
+   lfe_offset = 3500 + 60*(Fs/frame_size-50);
+   /* Coupled streams get twice the mono rate after the first 20 kb/s. */
+   coupled_ratio = 512;
+   /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */
+   lfe_ratio = 32;
+
+   /* Compute bitrate allocation between streams */
+   if (st->bitrate_bps==OPUS_AUTO)
+   {
+      channel_rate = Fs+60*Fs/frame_size;
+   } else if (st->bitrate_bps==OPUS_BITRATE_MAX)
+   {
+      channel_rate = 300000;
+   } else {
+      int nb_lfe;
+      int nb_uncoupled;
+      int nb_coupled;
+      int total;
+      nb_lfe = (st->lfe_stream!=-1);
+      nb_coupled = st->layout.nb_coupled_streams;
+      nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe;
+      total = (nb_uncoupled<<8)         /* mono */
+            + coupled_ratio*nb_coupled /* stereo */
+            + nb_lfe*lfe_ratio;
+      channel_rate = 256*(st->bitrate_bps-lfe_offset*nb_lfe-stream_offset*(nb_coupled+nb_uncoupled))/total;
+   }
+#ifndef FIXED_POINT
+   if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)
+   {
+      opus_int32 bonus;
+      bonus = 60*(Fs/frame_size-50);
+      channel_rate += bonus;
+   }
+#endif
+
+   for (i=0;i<st->layout.nb_streams;i++)
+   {
+      if (i<st->layout.nb_coupled_streams)
+         rate[i] = stream_offset+(channel_rate*coupled_ratio>>8);
+      else if (i!=st->lfe_stream)
+         rate[i] = stream_offset+channel_rate;
+      else
+         rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8);
+   }
+}
+
+/* Max size in case the encoder decides to return three frames */
+#define MS_FRAME_TMP (3*1275+7)
+static int opus_multistream_encode_native
+(
+    OpusMSEncoder *st,
+    opus_copy_channel_in_func copy_channel_in,
+    const void *pcm,
+    int analysis_frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes,
+    int lsb_depth,
+    downmix_func downmix
+)
+{
+   opus_int32 Fs;
+   int coupled_size;
+   int mono_size;
+   int s;
+   char *ptr;
+   int tot_size;
+   VARDECL(opus_val16, buf);
+   VARDECL(opus_val16, bandSMR);
+   unsigned char tmp_data[MS_FRAME_TMP];
+   OpusRepacketizer rp;
+   opus_int32 vbr;
+   const CELTMode *celt_mode;
+   opus_int32 bitrates[256];
+   opus_val16 bandLogE[42];
+   opus_val32 *mem = NULL;
+   opus_val32 *preemph_mem=NULL;
+   int frame_size;
+   ALLOC_STACK;
+
+   if (st->surround)
+   {
+      preemph_mem = ms_get_preemph_mem(st);
+      mem = ms_get_window_mem(st);
+   }
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr));
+   opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));
+
+   {
+      opus_int32 delay_compensation;
+      int channels;
+
+      channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
+      opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));
+      delay_compensation -= Fs/400;
+      frame_size = compute_frame_size(pcm, analysis_frame_size,
+            st->variable_duration, channels, Fs, st->bitrate_bps,
+            delay_compensation, downmix
+#ifndef DISABLE_FLOAT_API
+            , st->subframe_mem
+#endif
+            );
+   }
+
+   if (400*frame_size < Fs)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   /* Validate frame_size before using it to allocate stack space.
+      This mirrors the checks in opus_encode[_float](). */
+   if (400*frame_size != Fs && 200*frame_size != Fs &&
+       100*frame_size != Fs &&  50*frame_size != Fs &&
+        25*frame_size != Fs &&  50*frame_size != 3*Fs)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   ALLOC(buf, 2*frame_size, opus_val16);
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+
+   ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
+   if (st->surround)
+   {
+      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
+   }
+
+   if (max_data_bytes < 4*st->layout.nb_streams-1)
+   {
+      RESTORE_STACK;
+      return OPUS_BUFFER_TOO_SMALL;
+   }
+
+   /* Compute bitrate allocation between streams (this could be a lot better) */
+   surround_rate_allocation(st, bitrates, frame_size);
+
+   if (!vbr)
+      max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size));
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      OpusEncoder *enc;
+      enc = (OpusEncoder*)ptr;
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+      opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));
+      if (st->surround)
+      {
+         opus_int32 equiv_rate;
+         equiv_rate = st->bitrate_bps;
+         if (frame_size*50 < Fs)
+            equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels;
+         if (equiv_rate > 10000*st->layout.nb_channels)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+         else if (equiv_rate > 7000*st->layout.nb_channels)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND));
+         else if (equiv_rate > 5000*st->layout.nb_channels)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND));
+         else
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
+         if (s < st->layout.nb_coupled_streams)
+         {
+            /* To preserve the spatial image, force stereo CELT on coupled streams */
+            opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+            opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));
+         }
+      }
+   }
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   /* Counting ToC */
+   tot_size = 0;
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      OpusEncoder *enc;
+      int len;
+      int curr_max;
+      int c1, c2;
+
+      opus_repacketizer_init(&rp);
+      enc = (OpusEncoder*)ptr;
+      if (s < st->layout.nb_coupled_streams)
+      {
+         int i;
+         int left, right;
+         left = get_left_channel(&st->layout, s, -1);
+         right = get_right_channel(&st->layout, s, -1);
+         (*copy_channel_in)(buf, 2,
+            pcm, st->layout.nb_channels, left, frame_size);
+         (*copy_channel_in)(buf+1, 2,
+            pcm, st->layout.nb_channels, right, frame_size);
+         ptr += align(coupled_size);
+         if (st->surround)
+         {
+            for (i=0;i<21;i++)
+            {
+               bandLogE[i] = bandSMR[21*left+i];
+               bandLogE[21+i] = bandSMR[21*right+i];
+            }
+         }
+         c1 = left;
+         c2 = right;
+      } else {
+         int i;
+         int chan = get_mono_channel(&st->layout, s, -1);
+         (*copy_channel_in)(buf, 1,
+            pcm, st->layout.nb_channels, chan, frame_size);
+         ptr += align(mono_size);
+         if (st->surround)
+         {
+            for (i=0;i<21;i++)
+               bandLogE[i] = bandSMR[21*chan+i];
+         }
+         c1 = chan;
+         c2 = -1;
+      }
+      if (st->surround)
+         opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
+      /* number of bytes left (+Toc) */
+      curr_max = max_data_bytes - tot_size;
+      /* Reserve three bytes for the last stream and four for the others */
+      curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
+      curr_max = IMIN(curr_max,MS_FRAME_TMP);
+      if (!vbr && s == st->layout.nb_streams-1)
+         opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size)));
+      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth,
+            pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix);
+      if (len<0)
+      {
+         RESTORE_STACK;
+         return len;
+      }
+      /* We need to use the repacketizer to add the self-delimiting lengths
+         while taking into account the fact that the encoder can now return
+         more than one frame at a time (e.g. 60 ms CELT-only) */
+      opus_repacketizer_cat(&rp, tmp_data, len);
+      len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp),
+            data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1);
+      data += len;
+      tot_size += len;
+   }
+   /*printf("\n");*/
+   RESTORE_STACK;
+   return tot_size;
+}
+
+#if !defined(DISABLE_FLOAT_API)
+static void opus_copy_channel_in_float(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size
+)
+{
+   const float *float_src;
+   opus_int32 i;
+   float_src = (const float *)src;
+   for (i=0;i<frame_size;i++)
+#if defined(FIXED_POINT)
+      dst[i*dst_stride] = FLOAT2INT16(float_src[i*src_stride+src_channel]);
+#else
+      dst[i*dst_stride] = float_src[i*src_stride+src_channel];
+#endif
+}
+#endif
+
+static void opus_copy_channel_in_short(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size
+)
+{
+   const opus_int16 *short_src;
+   opus_int32 i;
+   short_src = (const opus_int16 *)src;
+   for (i=0;i<frame_size;i++)
+#if defined(FIXED_POINT)
+      dst[i*dst_stride] = short_src[i*src_stride+src_channel];
+#else
+      dst[i*dst_stride] = (1/32768.f)*short_src[i*src_stride+src_channel];
+#endif
+}
+
+
+#ifdef FIXED_POINT
+int opus_multistream_encode(
+    OpusMSEncoder *st,
+    const opus_val16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+)
+{
+   return opus_multistream_encode_native(st, opus_copy_channel_in_short,
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_multistream_encode_float(
+    OpusMSEncoder *st,
+    const float *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+)
+{
+   return opus_multistream_encode_native(st, opus_copy_channel_in_float,
+      pcm, frame_size, data, max_data_bytes, 16, downmix_float);
+}
+#endif
+
+#else
+
+int opus_multistream_encode_float
+(
+    OpusMSEncoder *st,
+    const opus_val16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+)
+{
+   return opus_multistream_encode_native(st, opus_copy_channel_in_float,
+      pcm, frame_size, data, max_data_bytes, 24, downmix_float);
+}
+
+int opus_multistream_encode(
+    OpusMSEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+)
+{
+   return opus_multistream_encode_native(st, opus_copy_channel_in_short,
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+}
+#endif
+
+int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
+{
+   va_list ap;
+   int coupled_size, mono_size;
+   char *ptr;
+   int ret = OPUS_OK;
+
+   va_start(ap, request);
+
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   switch (request)
+   {
+   case OPUS_SET_BITRATE_REQUEST:
+   {
+      opus_int32 value = va_arg(ap, opus_int32);
+      if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX)
+      {
+         goto bad_arg;
+      }
+      st->bitrate_bps = value;
+   }
+   break;
+   case OPUS_GET_BITRATE_REQUEST:
+   {
+      int s;
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = 0;
+      for (s=0;s<st->layout.nb_streams;s++)
+      {
+         opus_int32 rate;
+         OpusEncoder *enc;
+         enc = (OpusEncoder*)ptr;
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+         opus_encoder_ctl(enc, request, &rate);
+         *value += rate;
+      }
+   }
+   break;
+   case OPUS_GET_LSB_DEPTH_REQUEST:
+   case OPUS_GET_VBR_REQUEST:
+   case OPUS_GET_APPLICATION_REQUEST:
+   case OPUS_GET_BANDWIDTH_REQUEST:
+   case OPUS_GET_COMPLEXITY_REQUEST:
+   case OPUS_GET_PACKET_LOSS_PERC_REQUEST:
+   case OPUS_GET_DTX_REQUEST:
+   case OPUS_GET_VOICE_RATIO_REQUEST:
+   case OPUS_GET_VBR_CONSTRAINT_REQUEST:
+   case OPUS_GET_SIGNAL_REQUEST:
+   case OPUS_GET_LOOKAHEAD_REQUEST:
+   case OPUS_GET_SAMPLE_RATE_REQUEST:
+   case OPUS_GET_INBAND_FEC_REQUEST:
+   case OPUS_GET_FORCE_CHANNELS_REQUEST:
+   case OPUS_GET_PREDICTION_DISABLED_REQUEST:
+   {
+      OpusEncoder *enc;
+      /* For int32* GET params, just query the first stream */
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      enc = (OpusEncoder*)ptr;
+      ret = opus_encoder_ctl(enc, request, value);
+   }
+   break;
+   case OPUS_GET_FINAL_RANGE_REQUEST:
+   {
+      int s;
+      opus_uint32 *value = va_arg(ap, opus_uint32*);
+      opus_uint32 tmp;
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value=0;
+      for (s=0;s<st->layout.nb_streams;s++)
+      {
+         OpusEncoder *enc;
+         enc = (OpusEncoder*)ptr;
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+         ret = opus_encoder_ctl(enc, request, &tmp);
+         if (ret != OPUS_OK) break;
+         *value ^= tmp;
+      }
+   }
+   break;
+   case OPUS_SET_LSB_DEPTH_REQUEST:
+   case OPUS_SET_COMPLEXITY_REQUEST:
+   case OPUS_SET_VBR_REQUEST:
+   case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+   case OPUS_SET_MAX_BANDWIDTH_REQUEST:
+   case OPUS_SET_BANDWIDTH_REQUEST:
+   case OPUS_SET_SIGNAL_REQUEST:
+   case OPUS_SET_APPLICATION_REQUEST:
+   case OPUS_SET_INBAND_FEC_REQUEST:
+   case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+   case OPUS_SET_DTX_REQUEST:
+   case OPUS_SET_FORCE_MODE_REQUEST:
+   case OPUS_SET_FORCE_CHANNELS_REQUEST:
+   case OPUS_SET_PREDICTION_DISABLED_REQUEST:
+   {
+      int s;
+      /* This works for int32 params */
+      opus_int32 value = va_arg(ap, opus_int32);
+      for (s=0;s<st->layout.nb_streams;s++)
+      {
+         OpusEncoder *enc;
+
+         enc = (OpusEncoder*)ptr;
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+         ret = opus_encoder_ctl(enc, request, value);
+         if (ret != OPUS_OK)
+            break;
+      }
+   }
+   break;
+   case OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST:
+   {
+      int s;
+      opus_int32 stream_id;
+      OpusEncoder **value;
+      stream_id = va_arg(ap, opus_int32);
+      if (stream_id<0 || stream_id >= st->layout.nb_streams)
+         ret = OPUS_BAD_ARG;
+      value = va_arg(ap, OpusEncoder**);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      for (s=0;s<stream_id;s++)
+      {
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+      }
+      *value = (OpusEncoder*)ptr;
+   }
+   break;
+   case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+   {
+       opus_int32 value = va_arg(ap, opus_int32);
+       st->variable_duration = value;
+   }
+   break;
+   case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+   {
+       opus_int32 *value = va_arg(ap, opus_int32*);
+       if (!value)
+       {
+          goto bad_arg;
+       }
+       *value = st->variable_duration;
+   }
+   break;
+   case OPUS_RESET_STATE:
+   {
+      int s;
+      st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0;
+      if (st->surround)
+      {
+         OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels);
+         OPUS_CLEAR(ms_get_window_mem(st), st->layout.nb_channels*120);
+      }
+      for (s=0;s<st->layout.nb_streams;s++)
+      {
+         OpusEncoder *enc;
+         enc = (OpusEncoder*)ptr;
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+         ret = opus_encoder_ctl(enc, OPUS_RESET_STATE);
+         if (ret != OPUS_OK)
+            break;
+      }
+   }
+   break;
+   default:
+      ret = OPUS_UNIMPLEMENTED;
+      break;
+   }
+
+   va_end(ap);
+   return ret;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+}
+
+void opus_multistream_encoder_destroy(OpusMSEncoder *st)
+{
+    opus_free(st);
+}
diff --git a/src/main/jni/opus/src/opus_private.h b/src/main/jni/opus/src/opus_private.h
new file mode 100644
index 000000000..83225f2b6
--- /dev/null
+++ b/src/main/jni/opus/src/opus_private.h
@@ -0,0 +1,129 @@
+/* Copyright (c) 2012 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OPUS_PRIVATE_H
+#define OPUS_PRIVATE_H
+
+#include "arch.h"
+#include "opus.h"
+#include "celt.h"
+
+struct OpusRepacketizer {
+   unsigned char toc;
+   int nb_frames;
+   const unsigned char *frames[48];
+   opus_int16 len[48];
+   int framesize;
+};
+
+typedef struct ChannelLayout {
+   int nb_channels;
+   int nb_streams;
+   int nb_coupled_streams;
+   unsigned char mapping[256];
+} ChannelLayout;
+
+int validate_layout(const ChannelLayout *layout);
+int get_left_channel(const ChannelLayout *layout, int stream_id, int prev);
+int get_right_channel(const ChannelLayout *layout, int stream_id, int prev);
+int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
+
+
+
+#define MODE_SILK_ONLY          1000
+#define MODE_HYBRID             1001
+#define MODE_CELT_ONLY          1002
+
+#define OPUS_SET_VOICE_RATIO_REQUEST         11018
+#define OPUS_GET_VOICE_RATIO_REQUEST         11019
+
+/** Configures the encoder's expected percentage of voice
+  * opposed to music or other signals.
+  *
+  * @note This interface is currently more aspiration than actuality. It's
+  * ultimately expected to bias an automatic signal classifier, but it currently
+  * just shifts the static bitrate to mode mapping around a little bit.
+  *
+  * @param[in] x <tt>int</tt>:   Voice percentage in the range 0-100, inclusive.
+  * @hideinitializer */
+#define OPUS_SET_VOICE_RATIO(x) OPUS_SET_VOICE_RATIO_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured voice ratio value, @see OPUS_SET_VOICE_RATIO
+  *
+  * @param[out] x <tt>int*</tt>:  Voice percentage in the range 0-100, inclusive.
+  * @hideinitializer */
+#define OPUS_GET_VOICE_RATIO(x) OPUS_GET_VOICE_RATIO_REQUEST, __opus_check_int_ptr(x)
+
+
+#define OPUS_SET_FORCE_MODE_REQUEST    11002
+#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
+
+typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+                int bitrate, opus_val16 tonality, float *mem, int buffering,
+                downmix_func downmix);
+
+int encode_size(int size, unsigned char *data);
+
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
+
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+      int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+      int delay_compensation, downmix_func downmix
+#ifndef DISABLE_FLOAT_API
+      , float *subframe_mem
+#endif
+      );
+
+opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+      const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);
+
+int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
+      opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
+      opus_int32 *packet_offset, int soft_clip);
+
+/* Make sure everything's aligned to sizeof(void *) bytes */
+static OPUS_INLINE int align(int i)
+{
+    return (i+(int)sizeof(void *)-1)&-(int)sizeof(void *);
+}
+
+int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
+      int self_delimited, unsigned char *out_toc,
+      const unsigned char *frames[48], opus_int16 size[48],
+      int *payload_offset, opus_int32 *packet_offset);
+
+opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
+      unsigned char *data, opus_int32 maxlen, int self_delimited, int pad);
+
+int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
+#endif /* OPUS_PRIVATE_H */
diff --git a/src/main/jni/opus/src/repacketizer.c b/src/main/jni/opus/src/repacketizer.c
new file mode 100644
index 000000000..a62675ce9
--- /dev/null
+++ b/src/main/jni/opus/src/repacketizer.c
@@ -0,0 +1,345 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus.h"
+#include "opus_private.h"
+#include "os_support.h"
+
+
+int opus_repacketizer_get_size(void)
+{
+   return sizeof(OpusRepacketizer);
+}
+
+OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp)
+{
+   rp->nb_frames = 0;
+   return rp;
+}
+
+OpusRepacketizer *opus_repacketizer_create(void)
+{
+   OpusRepacketizer *rp;
+   rp=(OpusRepacketizer *)opus_alloc(opus_repacketizer_get_size());
+   if(rp==NULL)return NULL;
+   return opus_repacketizer_init(rp);
+}
+
+void opus_repacketizer_destroy(OpusRepacketizer *rp)
+{
+   opus_free(rp);
+}
+
+static int opus_repacketizer_cat_impl(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len, int self_delimited)
+{
+   unsigned char tmp_toc;
+   int curr_nb_frames,ret;
+   /* Set of check ToC */
+   if (len<1) return OPUS_INVALID_PACKET;
+   if (rp->nb_frames == 0)
+   {
+      rp->toc = data[0];
+      rp->framesize = opus_packet_get_samples_per_frame(data, 8000);
+   } else if ((rp->toc&0xFC) != (data[0]&0xFC))
+   {
+      /*fprintf(stderr, "toc mismatch: 0x%x vs 0x%x\n", rp->toc, data[0]);*/
+      return OPUS_INVALID_PACKET;
+   }
+   curr_nb_frames = opus_packet_get_nb_frames(data, len);
+   if(curr_nb_frames<1) return OPUS_INVALID_PACKET;
+
+   /* Check the 120 ms maximum packet size */
+   if ((curr_nb_frames+rp->nb_frames)*rp->framesize > 960)
+   {
+      return OPUS_INVALID_PACKET;
+   }
+
+   ret=opus_packet_parse_impl(data, len, self_delimited, &tmp_toc, &rp->frames[rp->nb_frames], &rp->len[rp->nb_frames], NULL, NULL);
+   if(ret<1)return ret;
+
+   rp->nb_frames += curr_nb_frames;
+   return OPUS_OK;
+}
+
+int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len)
+{
+   return opus_repacketizer_cat_impl(rp, data, len, 0);
+}
+
+int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp)
+{
+   return rp->nb_frames;
+}
+
+opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
+      unsigned char *data, opus_int32 maxlen, int self_delimited, int pad)
+{
+   int i, count;
+   opus_int32 tot_size;
+   opus_int16 *len;
+   const unsigned char **frames;
+   unsigned char * ptr;
+
+   if (begin<0 || begin>=end || end>rp->nb_frames)
+   {
+      /*fprintf(stderr, "%d %d %d\n", begin, end, rp->nb_frames);*/
+      return OPUS_BAD_ARG;
+   }
+   count = end-begin;
+
+   len = rp->len+begin;
+   frames = rp->frames+begin;
+   if (self_delimited)
+      tot_size = 1 + (len[count-1]>=252);
+   else
+      tot_size = 0;
+
+   ptr = data;
+   if (count==1)
+   {
+      /* Code 0 */
+      tot_size += len[0]+1;
+      if (tot_size > maxlen)
+         return OPUS_BUFFER_TOO_SMALL;
+      *ptr++ = rp->toc&0xFC;
+   } else if (count==2)
+   {
+      if (len[1] == len[0])
+      {
+         /* Code 1 */
+         tot_size += 2*len[0]+1;
+         if (tot_size > maxlen)
+            return OPUS_BUFFER_TOO_SMALL;
+         *ptr++ = (rp->toc&0xFC) | 0x1;
+      } else {
+         /* Code 2 */
+         tot_size += len[0]+len[1]+2+(len[0]>=252);
+         if (tot_size > maxlen)
+            return OPUS_BUFFER_TOO_SMALL;
+         *ptr++ = (rp->toc&0xFC) | 0x2;
+         ptr += encode_size(len[0], ptr);
+      }
+   }
+   if (count > 2 || (pad && tot_size < maxlen))
+   {
+      /* Code 3 */
+      int vbr;
+      int pad_amount=0;
+
+      /* Restart the process for the padding case */
+      ptr = data;
+      if (self_delimited)
+         tot_size = 1 + (len[count-1]>=252);
+      else
+         tot_size = 0;
+      vbr = 0;
+      for (i=1;i<count;i++)
+      {
+         if (len[i] != len[0])
+         {
+            vbr=1;
+            break;
+         }
+      }
+      if (vbr)
+      {
+         tot_size += 2;
+         for (i=0;i<count-1;i++)
+            tot_size += 1 + (len[i]>=252) + len[i];
+         tot_size += len[count-1];
+
+         if (tot_size > maxlen)
+            return OPUS_BUFFER_TOO_SMALL;
+         *ptr++ = (rp->toc&0xFC) | 0x3;
+         *ptr++ = count | 0x80;
+      } else {
+         tot_size += count*len[0]+2;
+         if (tot_size > maxlen)
+            return OPUS_BUFFER_TOO_SMALL;
+         *ptr++ = (rp->toc&0xFC) | 0x3;
+         *ptr++ = count;
+      }
+      pad_amount = pad ? (maxlen-tot_size) : 0;
+      if (pad_amount != 0)
+      {
+         int nb_255s;
+         data[1] |= 0x40;
+         nb_255s = (pad_amount-1)/255;
+         for (i=0;i<nb_255s;i++)
+            *ptr++ = 255;
+         *ptr++ = pad_amount-255*nb_255s-1;
+         tot_size += pad_amount;
+      }
+      if (vbr)
+      {
+         for (i=0;i<count-1;i++)
+            ptr += encode_size(len[i], ptr);
+      }
+   }
+   if (self_delimited) {
+      int sdlen = encode_size(len[count-1], ptr);
+      ptr += sdlen;
+   }
+   /* Copy the actual data */
+   for (i=0;i<count;i++)
+   {
+      /* Using OPUS_MOVE() instead of OPUS_COPY() in case we're doing in-place
+         padding from opus_packet_pad or opus_packet_unpad(). */
+      celt_assert(frames[i] + len[i] <= data || ptr <= frames[i]);
+      OPUS_MOVE(ptr, frames[i], len[i]);
+      ptr += len[i];
+   }
+   if (pad)
+   {
+      for (i=ptr-data;i<maxlen;i++)
+         data[i] = 0;
+   }
+   return tot_size;
+}
+
+opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen)
+{
+   return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0, 0);
+}
+
+opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen)
+{
+   return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0, 0);
+}
+
+int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len)
+{
+   OpusRepacketizer rp;
+   opus_int32 ret;
+   if (len < 1)
+      return OPUS_BAD_ARG;
+   if (len==new_len)
+      return OPUS_OK;
+   else if (len > new_len)
+      return OPUS_BAD_ARG;
+   opus_repacketizer_init(&rp);
+   /* Moving payload to the end of the packet so we can do in-place padding */
+   OPUS_MOVE(data+new_len-len, data, len);
+   opus_repacketizer_cat(&rp, data+new_len-len, len);
+   ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1);
+   if (ret > 0)
+      return OPUS_OK;
+   else
+      return ret;
+}
+
+opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len)
+{
+   OpusRepacketizer rp;
+   opus_int32 ret;
+   if (len < 1)
+      return OPUS_BAD_ARG;
+   opus_repacketizer_init(&rp);
+   ret = opus_repacketizer_cat(&rp, data, len);
+   if (ret < 0)
+      return ret;
+   ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, len, 0, 0);
+   celt_assert(ret > 0 && ret <= len);
+   return ret;
+}
+
+int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams)
+{
+   int s;
+   int count;
+   unsigned char toc;
+   opus_int16 size[48];
+   opus_int32 packet_offset;
+   opus_int32 amount;
+
+   if (len < 1)
+      return OPUS_BAD_ARG;
+   if (len==new_len)
+      return OPUS_OK;
+   else if (len > new_len)
+      return OPUS_BAD_ARG;
+   amount = new_len - len;
+   /* Seek to last stream */
+   for (s=0;s<nb_streams-1;s++)
+   {
+      if (len<=0)
+         return OPUS_INVALID_PACKET;
+      count = opus_packet_parse_impl(data, len, 1, &toc, NULL,
+                                     size, NULL, &packet_offset);
+      if (count<0)
+         return count;
+      data += packet_offset;
+      len -= packet_offset;
+   }
+   return opus_packet_pad(data, len, len+amount);
+}
+
+opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams)
+{
+   int s;
+   unsigned char toc;
+   opus_int16 size[48];
+   opus_int32 packet_offset;
+   OpusRepacketizer rp;
+   unsigned char *dst;
+   opus_int32 dst_len;
+
+   if (len < 1)
+      return OPUS_BAD_ARG;
+   dst = data;
+   dst_len = 0;
+   /* Unpad all frames */
+   for (s=0;s<nb_streams;s++)
+   {
+      opus_int32 ret;
+      int self_delimited = s!=nb_streams-1;
+      if (len<=0)
+         return OPUS_INVALID_PACKET;
+      opus_repacketizer_init(&rp);
+      ret = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL,
+                                     size, NULL, &packet_offset);
+      if (ret<0)
+         return ret;
+      ret = opus_repacketizer_cat_impl(&rp, data, packet_offset, self_delimited);
+      if (ret < 0)
+         return ret;
+      ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, dst, len, self_delimited, 0);
+      if (ret < 0)
+         return ret;
+      else
+         dst_len += ret;
+      dst += ret;
+      data += packet_offset;
+      len -= packet_offset;
+   }
+   return dst_len;
+}
+
diff --git a/src/main/jni/opus/src/repacketizer_demo.c b/src/main/jni/opus/src/repacketizer_demo.c
new file mode 100644
index 000000000..dc05c1b35
--- /dev/null
+++ b/src/main/jni/opus/src/repacketizer_demo.c
@@ -0,0 +1,217 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "opus.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_PACKETOUT 32000
+
+void usage(char *argv0)
+{
+   fprintf(stderr, "usage: %s [options] input_file output_file\n", argv0);
+}
+
+static void int_to_char(opus_uint32 i, unsigned char ch[4])
+{
+    ch[0] = i>>24;
+    ch[1] = (i>>16)&0xFF;
+    ch[2] = (i>>8)&0xFF;
+    ch[3] = i&0xFF;
+}
+
+static opus_uint32 char_to_int(unsigned char ch[4])
+{
+    return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16)
+         | ((opus_uint32)ch[2]<< 8) |  (opus_uint32)ch[3];
+}
+
+int main(int argc, char *argv[])
+{
+   int i, eof=0;
+   FILE *fin, *fout;
+   unsigned char packets[48][1500];
+   int len[48];
+   int rng[48];
+   OpusRepacketizer *rp;
+   unsigned char output_packet[MAX_PACKETOUT];
+   int merge = 1, split=0;
+
+   if (argc < 3)
+   {
+      usage(argv[0]);
+      return EXIT_FAILURE;
+   }
+   for (i=1;i<argc-2;i++)
+   {
+      if (strcmp(argv[i], "-merge")==0)
+      {
+         merge = atoi(argv[i+1]);
+         if(merge<1)
+         {
+            fprintf(stderr, "-merge parameter must be at least 1.\n");
+            return EXIT_FAILURE;
+         }
+         if(merge>48)
+         {
+            fprintf(stderr, "-merge parameter must be less than 48.\n");
+            return EXIT_FAILURE;
+         }
+         i++;
+      } else if (strcmp(argv[i], "-split")==0)
+         split = 1;
+      else
+      {
+         fprintf(stderr, "Unknown option: %s\n", argv[i]);
+         usage(argv[0]);
+         return EXIT_FAILURE;
+      }
+   }
+   fin = fopen(argv[argc-2], "r");
+   if(fin==NULL)
+   {
+     fprintf(stderr, "Error opening input file: %s\n", argv[argc-2]);
+     return EXIT_FAILURE;
+   }
+   fout = fopen(argv[argc-1], "w");
+   if(fout==NULL)
+   {
+     fprintf(stderr, "Error opening output file: %s\n", argv[argc-1]);
+     fclose(fin);
+     return EXIT_FAILURE;
+   }
+
+   rp = opus_repacketizer_create();
+   while (!eof)
+   {
+      int err;
+      int nb_packets=merge;
+      opus_repacketizer_init(rp);
+      for (i=0;i<nb_packets;i++)
+      {
+         unsigned char ch[4];
+         err = fread(ch, 1, 4, fin);
+         len[i] = char_to_int(ch);
+         /*fprintf(stderr, "in len = %d\n", len[i]);*/
+         if (len[i]>1500 || len[i]<0)
+         {
+             if (feof(fin))
+             {
+                eof = 1;
+             } else {
+                fprintf(stderr, "Invalid payload length\n");
+                fclose(fin);
+                fclose(fout);
+                return EXIT_FAILURE;
+             }
+             break;
+         }
+         err = fread(ch, 1, 4, fin);
+         rng[i] = char_to_int(ch);
+         err = fread(packets[i], 1, len[i], fin);
+         if (feof(fin))
+         {
+            eof = 1;
+            break;
+         }
+         err = opus_repacketizer_cat(rp, packets[i], len[i]);
+         if (err!=OPUS_OK)
+         {
+            fprintf(stderr, "opus_repacketizer_cat() failed: %s\n", opus_strerror(err));
+            break;
+         }
+      }
+      nb_packets = i;
+
+      if (eof)
+         break;
+
+      if (!split)
+      {
+         err = opus_repacketizer_out(rp, output_packet, MAX_PACKETOUT);
+         if (err>0) {
+            unsigned char int_field[4];
+            int_to_char(err, int_field);
+            if(fwrite(int_field, 1, 4, fout)!=4){
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            int_to_char(rng[nb_packets-1], int_field);
+            if (fwrite(int_field, 1, 4, fout)!=4) {
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) {
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            /*fprintf(stderr, "out len = %d\n", err);*/
+         } else {
+            fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err));
+         }
+      } else {
+         int nb_frames = opus_repacketizer_get_nb_frames(rp);
+         for (i=0;i<nb_frames;i++)
+         {
+            err = opus_repacketizer_out_range(rp, i, i+1, output_packet, MAX_PACKETOUT);
+            if (err>0) {
+               unsigned char int_field[4];
+               int_to_char(err, int_field);
+               if (fwrite(int_field, 1, 4, fout)!=4) {
+                  fprintf(stderr, "Error writing.\n");
+                  return EXIT_FAILURE;
+               }
+               if (i==nb_frames-1)
+                  int_to_char(rng[nb_packets-1], int_field);
+               else
+                  int_to_char(0, int_field);
+               if (fwrite(int_field, 1, 4, fout)!=4) {
+                  fprintf(stderr, "Error writing.\n");
+                  return EXIT_FAILURE;
+               }
+               if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) {
+                  fprintf(stderr, "Error writing.\n");
+                  return EXIT_FAILURE;
+               }
+               /*fprintf(stderr, "out len = %d\n", err);*/
+            } else {
+               fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err));
+            }
+
+         }
+      }
+   }
+
+   fclose(fin);
+   fclose(fout);
+   return EXIT_SUCCESS;
+}
diff --git a/src/main/jni/opus/src/tansig_table.h b/src/main/jni/opus/src/tansig_table.h
new file mode 100644
index 000000000..c76f844a7
--- /dev/null
+++ b/src/main/jni/opus/src/tansig_table.h
@@ -0,0 +1,45 @@
+/* This file is auto-generated by gen_tables */
+
+static const float tansig_table[201] = {
+0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
+0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
+0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
+0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
+0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
+0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
+0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
+0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
+0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
+0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
+0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
+0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
+0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
+0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
+0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
+0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
+0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
+0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
+0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
+0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
+0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
+0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
+0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
+0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
+0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
+0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
+0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
+0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
+0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
+0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
+0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
+0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
+0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
+0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
+0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
+0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
+0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
+0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
+1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+1.000000f,
+};
diff --git a/src/main/jni/sqlite.c b/src/main/jni/sqlite.c
new file mode 100644
index 000000000..124b3df2f
--- /dev/null
+++ b/src/main/jni/sqlite.c
@@ -0,0 +1,11 @@
+#include "sqlite/sqlite3.h"
+#include "sqlite.h"
+
+void throw_sqlite3_exception(JNIEnv *env, sqlite3 *handle, int errcode) {
+	if (SQLITE_OK == errcode) {
+		errcode = sqlite3_errcode(handle);
+	}
+	const char *errmsg = sqlite3_errmsg(handle);
+	jclass exClass = (*env)->FindClass(env, "org/telegram/SQLite/SQLiteException");
+	(*env)->ThrowNew(env, exClass, errmsg);
+}
diff --git a/src/main/jni/sqlite.h b/src/main/jni/sqlite.h
new file mode 100644
index 000000000..af19fcb85
--- /dev/null
+++ b/src/main/jni/sqlite.h
@@ -0,0 +1,10 @@
+#ifndef sqlite_h
+#define sqlite_h
+
+#include <jni.h>
+#include "sqlite/sqlite3.h"
+
+void throw_sqlite3_exception(JNIEnv* env, sqlite3 *handle, int errcode);
+jint sqliteOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env);
+
+#endif
diff --git a/src/main/jni/sqlite/sqlite3.c b/src/main/jni/sqlite/sqlite3.c
new file mode 100644
index 000000000..cae0c4ad2
--- /dev/null
+++ b/src/main/jni/sqlite/sqlite3.c
@@ -0,0 +1,153363 @@
+/******************************************************************************
+** This file is an amalgamation of many separate C source files from SQLite
+** version 3.8.8.1.  By combining all the individual C code files into this 
+** single large file, the entire code can be compiled as a single translation
+** unit.  This allows many compilers to do optimizations that would not be
+** possible if the files were compiled separately.  Performance improvements
+** of 5% or more are commonly seen when SQLite is compiled as a single
+** translation unit.
+**
+** This file is all you need to compile SQLite.  To use SQLite in other
+** programs, you need this file and the "sqlite3.h" header file that defines
+** the programming interface to the SQLite library.  (If you do not have 
+** the "sqlite3.h" header file at hand, you will find a copy embedded within
+** the text of this file.  Search for "Begin file sqlite3.h" to find the start
+** of the embedded sqlite3.h header file.) Additional code files may be needed
+** if you want a wrapper to interface SQLite with your choice of programming
+** language. The code for the "sqlite3" command-line shell is also in a
+** separate file. This file contains only code for the core SQLite library.
+*/
+#define SQLITE_CORE 1
+#define SQLITE_AMALGAMATION 1
+#ifndef SQLITE_PRIVATE
+# define SQLITE_PRIVATE static
+#endif
+#ifndef SQLITE_API
+# define SQLITE_API
+#endif
+/************** Begin file sqliteInt.h ***************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Internal interface definitions for SQLite.
+**
+*/
+#ifndef _SQLITEINT_H_
+#define _SQLITEINT_H_
+
+/*
+** Include the header file used to customize the compiler options for MSVC.
+** This should be done first so that it can successfully prevent spurious
+** compiler warnings due to subsequent content in this file and other files
+** that are included by this file.
+*/
+/************** Include msvc.h in the middle of sqliteInt.h ******************/
+/************** Begin file msvc.h ********************************************/
+/*
+** 2015 January 12
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains code that is specific to MSVC.
+*/
+#ifndef _MSVC_H_
+#define _MSVC_H_
+
+#if defined(_MSC_VER)
+#pragma warning(disable : 4054)
+#pragma warning(disable : 4055)
+#pragma warning(disable : 4100)
+#pragma warning(disable : 4127)
+#pragma warning(disable : 4152)
+#pragma warning(disable : 4189)
+#pragma warning(disable : 4206)
+#pragma warning(disable : 4210)
+#pragma warning(disable : 4232)
+#pragma warning(disable : 4244)
+#pragma warning(disable : 4305)
+#pragma warning(disable : 4306)
+#pragma warning(disable : 4702)
+#pragma warning(disable : 4706)
+#endif /* defined(_MSC_VER) */
+
+#endif /* _MSVC_H_ */
+
+/************** End of msvc.h ************************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+
+/*
+** These #defines should enable >2GB file support on POSIX if the
+** underlying operating system supports it.  If the OS lacks
+** large file support, or if the OS is windows, these should be no-ops.
+**
+** Ticket #2739:  The _LARGEFILE_SOURCE macro must appear before any
+** system #includes.  Hence, this block of code must be the very first
+** code in all source files.
+**
+** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
+** on the compiler command line.  This is necessary if you are compiling
+** on a recent machine (ex: Red Hat 7.2) but you want your code to work
+** on an older machine (ex: Red Hat 6.0).  If you compile on Red Hat 7.2
+** without this option, LFS is enable.  But LFS does not exist in the kernel
+** in Red Hat 6.0, so the code won't work.  Hence, for maximum binary
+** portability you should omit LFS.
+**
+** The previous paragraph was written in 2005.  (This paragraph is written
+** on 2008-11-28.) These days, all Linux kernels support large files, so
+** you should probably leave LFS enabled.  But some embedded platforms might
+** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful.
+**
+** Similar is true for Mac OS X.  LFS is only supported on Mac OS X 9 and later.
+*/
+#ifndef SQLITE_DISABLE_LFS
+# define _LARGE_FILE       1
+# ifndef _FILE_OFFSET_BITS
+#   define _FILE_OFFSET_BITS 64
+# endif
+# define _LARGEFILE_SOURCE 1
+#endif
+
+/* Needed for various definitions... */
+#if defined(__GNUC__) && !defined(_GNU_SOURCE)
+# define _GNU_SOURCE
+#endif
+
+#if defined(__OpenBSD__) && !defined(_BSD_SOURCE)
+# define _BSD_SOURCE
+#endif
+
+/*
+** For MinGW, check to see if we can include the header file containing its
+** version information, among other things.  Normally, this internal MinGW
+** header file would [only] be included automatically by other MinGW header
+** files; however, the contained version information is now required by this
+** header file to work around binary compatibility issues (see below) and
+** this is the only known way to reliably obtain it.  This entire #if block
+** would be completely unnecessary if there was any other way of detecting
+** MinGW via their preprocessor (e.g. if they customized their GCC to define
+** some MinGW-specific macros).  When compiling for MinGW, either the
+** _HAVE_MINGW_H or _HAVE__MINGW_H (note the extra underscore) macro must be
+** defined; otherwise, detection of conditions specific to MinGW will be
+** disabled.
+*/
+#if defined(_HAVE_MINGW_H)
+# include "mingw.h"
+#elif defined(_HAVE__MINGW_H)
+# include "_mingw.h"
+#endif
+
+/*
+** For MinGW version 4.x (and higher), check to see if the _USE_32BIT_TIME_T
+** define is required to maintain binary compatibility with the MSVC runtime
+** library in use (e.g. for Windows XP).
+*/
+#if !defined(_USE_32BIT_TIME_T) && !defined(_USE_64BIT_TIME_T) && \
+    defined(_WIN32) && !defined(_WIN64) && \
+    defined(__MINGW_MAJOR_VERSION) && __MINGW_MAJOR_VERSION >= 4 && \
+    defined(__MSVCRT__)
+# define _USE_32BIT_TIME_T
+#endif
+
+/* The public SQLite interface.  The _FILE_OFFSET_BITS macro must appear
+** first in QNX.  Also, the _USE_32BIT_TIME_T macro must appear first for
+** MinGW.
+*/
+/************** Include sqlite3.h in the middle of sqliteInt.h ***************/
+/************** Begin file sqlite3.h *****************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This header file defines the interface that the SQLite library
+** presents to client programs.  If a C-function, structure, datatype,
+** or constant definition does not appear in this file, then it is
+** not a published API of SQLite, is subject to change without
+** notice, and should not be referenced by programs that use SQLite.
+**
+** Some of the definitions that are in this file are marked as
+** "experimental".  Experimental interfaces are normally new
+** features recently added to SQLite.  We do not anticipate changes
+** to experimental interfaces but reserve the right to make minor changes
+** if experience from use "in the wild" suggest such changes are prudent.
+**
+** The official C-language API documentation for SQLite is derived
+** from comments in this file.  This file is the authoritative source
+** on how SQLite interfaces are suppose to operate.
+**
+** The name of this file under configuration management is "sqlite.h.in".
+** The makefile makes some minor changes to this file (such as inserting
+** the version number) and changes its name to "sqlite3.h" as
+** part of the build process.
+*/
+#ifndef _SQLITE3_H_
+#define _SQLITE3_H_
+#include <stdarg.h>     /* Needed for the definition of va_list */
+
+/*
+** Make sure we can call this stuff from C++.
+*/
+#if 0
+extern "C" {
+#endif
+
+
+/*
+** Add the ability to override 'extern'
+*/
+#ifndef SQLITE_EXTERN
+# define SQLITE_EXTERN extern
+#endif
+
+#ifndef SQLITE_API
+# define SQLITE_API
+#endif
+
+
+/*
+** These no-op macros are used in front of interfaces to mark those
+** interfaces as either deprecated or experimental.  New applications
+** should not use deprecated interfaces - they are supported for backwards
+** compatibility only.  Application writers should be aware that
+** experimental interfaces are subject to change in point releases.
+**
+** These macros used to resolve to various kinds of compiler magic that
+** would generate warning messages when they were used.  But that
+** compiler magic ended up generating such a flurry of bug reports
+** that we have taken it all out and gone back to using simple
+** noop macros.
+*/
+#define SQLITE_DEPRECATED
+#define SQLITE_EXPERIMENTAL
+
+/*
+** Ensure these symbols were not defined by some previous header file.
+*/
+#ifdef SQLITE_VERSION
+# undef SQLITE_VERSION
+#endif
+#ifdef SQLITE_VERSION_NUMBER
+# undef SQLITE_VERSION_NUMBER
+#endif
+
+/*
+** CAPI3REF: Compile-Time Library Version Numbers
+**
+** ^(The [SQLITE_VERSION] C preprocessor macro in the sqlite3.h header
+** evaluates to a string literal that is the SQLite version in the
+** format "X.Y.Z" where X is the major version number (always 3 for
+** SQLite3) and Y is the minor version number and Z is the release number.)^
+** ^(The [SQLITE_VERSION_NUMBER] C preprocessor macro resolves to an integer
+** with the value (X*1000000 + Y*1000 + Z) where X, Y, and Z are the same
+** numbers used in [SQLITE_VERSION].)^
+** The SQLITE_VERSION_NUMBER for any given release of SQLite will also
+** be larger than the release from which it is derived.  Either Y will
+** be held constant and Z will be incremented or else Y will be incremented
+** and Z will be reset to zero.
+**
+** Since version 3.6.18, SQLite source code has been stored in the
+** <a href="http://www.fossil-scm.org/">Fossil configuration management
+** system</a>.  ^The SQLITE_SOURCE_ID macro evaluates to
+** a string which identifies a particular check-in of SQLite
+** within its configuration management system.  ^The SQLITE_SOURCE_ID
+** string contains the date and time of the check-in (UTC) and an SHA1
+** hash of the entire source tree.
+**
+** See also: [sqlite3_libversion()],
+** [sqlite3_libversion_number()], [sqlite3_sourceid()],
+** [sqlite_version()] and [sqlite_source_id()].
+*/
+#define SQLITE_VERSION        "3.8.8.1"
+#define SQLITE_VERSION_NUMBER 3008008
+#define SQLITE_SOURCE_ID      "2015-01-20 16:51:25 f73337e3e289915a76ca96e7a05a1a8d4e890d55"
+
+/*
+** CAPI3REF: Run-Time Library Version Numbers
+** KEYWORDS: sqlite3_version, sqlite3_sourceid
+**
+** These interfaces provide the same information as the [SQLITE_VERSION],
+** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros
+** but are associated with the library instead of the header file.  ^(Cautious
+** programmers might include assert() statements in their application to
+** verify that values returned by these interfaces match the macros in
+** the header, and thus insure that the application is
+** compiled with matching library and header files.
+**
+** <blockquote><pre>
+** assert( sqlite3_libversion_number()==SQLITE_VERSION_NUMBER );
+** assert( strcmp(sqlite3_sourceid(),SQLITE_SOURCE_ID)==0 );
+** assert( strcmp(sqlite3_libversion(),SQLITE_VERSION)==0 );
+** </pre></blockquote>)^
+**
+** ^The sqlite3_version[] string constant contains the text of [SQLITE_VERSION]
+** macro.  ^The sqlite3_libversion() function returns a pointer to the
+** to the sqlite3_version[] string constant.  The sqlite3_libversion()
+** function is provided for use in DLLs since DLL users usually do not have
+** direct access to string constants within the DLL.  ^The
+** sqlite3_libversion_number() function returns an integer equal to
+** [SQLITE_VERSION_NUMBER].  ^The sqlite3_sourceid() function returns 
+** a pointer to a string constant whose value is the same as the 
+** [SQLITE_SOURCE_ID] C preprocessor macro.
+**
+** See also: [sqlite_version()] and [sqlite_source_id()].
+*/
+SQLITE_API const char sqlite3_version[] = SQLITE_VERSION;
+SQLITE_API const char *sqlite3_libversion(void);
+SQLITE_API const char *sqlite3_sourceid(void);
+SQLITE_API int sqlite3_libversion_number(void);
+
+/*
+** CAPI3REF: Run-Time Library Compilation Options Diagnostics
+**
+** ^The sqlite3_compileoption_used() function returns 0 or 1 
+** indicating whether the specified option was defined at 
+** compile time.  ^The SQLITE_ prefix may be omitted from the 
+** option name passed to sqlite3_compileoption_used().  
+**
+** ^The sqlite3_compileoption_get() function allows iterating
+** over the list of options that were defined at compile time by
+** returning the N-th compile time option string.  ^If N is out of range,
+** sqlite3_compileoption_get() returns a NULL pointer.  ^The SQLITE_ 
+** prefix is omitted from any strings returned by 
+** sqlite3_compileoption_get().
+**
+** ^Support for the diagnostic functions sqlite3_compileoption_used()
+** and sqlite3_compileoption_get() may be omitted by specifying the 
+** [SQLITE_OMIT_COMPILEOPTION_DIAGS] option at compile time.
+**
+** See also: SQL functions [sqlite_compileoption_used()] and
+** [sqlite_compileoption_get()] and the [compile_options pragma].
+*/
+#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
+SQLITE_API int sqlite3_compileoption_used(const char *zOptName);
+SQLITE_API const char *sqlite3_compileoption_get(int N);
+#endif
+
+/*
+** CAPI3REF: Test To See If The Library Is Threadsafe
+**
+** ^The sqlite3_threadsafe() function returns zero if and only if
+** SQLite was compiled with mutexing code omitted due to the
+** [SQLITE_THREADSAFE] compile-time option being set to 0.
+**
+** SQLite can be compiled with or without mutexes.  When
+** the [SQLITE_THREADSAFE] C preprocessor macro is 1 or 2, mutexes
+** are enabled and SQLite is threadsafe.  When the
+** [SQLITE_THREADSAFE] macro is 0, 
+** the mutexes are omitted.  Without the mutexes, it is not safe
+** to use SQLite concurrently from more than one thread.
+**
+** Enabling mutexes incurs a measurable performance penalty.
+** So if speed is of utmost importance, it makes sense to disable
+** the mutexes.  But for maximum safety, mutexes should be enabled.
+** ^The default behavior is for mutexes to be enabled.
+**
+** This interface can be used by an application to make sure that the
+** version of SQLite that it is linking against was compiled with
+** the desired setting of the [SQLITE_THREADSAFE] macro.
+**
+** This interface only reports on the compile-time mutex setting
+** of the [SQLITE_THREADSAFE] flag.  If SQLite is compiled with
+** SQLITE_THREADSAFE=1 or =2 then mutexes are enabled by default but
+** can be fully or partially disabled using a call to [sqlite3_config()]
+** with the verbs [SQLITE_CONFIG_SINGLETHREAD], [SQLITE_CONFIG_MULTITHREAD],
+** or [SQLITE_CONFIG_SERIALIZED].  ^(The return value of the
+** sqlite3_threadsafe() function shows only the compile-time setting of
+** thread safety, not any run-time changes to that setting made by
+** sqlite3_config(). In other words, the return value from sqlite3_threadsafe()
+** is unchanged by calls to sqlite3_config().)^
+**
+** See the [threading mode] documentation for additional information.
+*/
+SQLITE_API int sqlite3_threadsafe(void);
+
+/*
+** CAPI3REF: Database Connection Handle
+** KEYWORDS: {database connection} {database connections}
+**
+** Each open SQLite database is represented by a pointer to an instance of
+** the opaque structure named "sqlite3".  It is useful to think of an sqlite3
+** pointer as an object.  The [sqlite3_open()], [sqlite3_open16()], and
+** [sqlite3_open_v2()] interfaces are its constructors, and [sqlite3_close()]
+** and [sqlite3_close_v2()] are its destructors.  There are many other
+** interfaces (such as
+** [sqlite3_prepare_v2()], [sqlite3_create_function()], and
+** [sqlite3_busy_timeout()] to name but three) that are methods on an
+** sqlite3 object.
+*/
+typedef struct sqlite3 sqlite3;
+
+/*
+** CAPI3REF: 64-Bit Integer Types
+** KEYWORDS: sqlite_int64 sqlite_uint64
+**
+** Because there is no cross-platform way to specify 64-bit integer types
+** SQLite includes typedefs for 64-bit signed and unsigned integers.
+**
+** The sqlite3_int64 and sqlite3_uint64 are the preferred type definitions.
+** The sqlite_int64 and sqlite_uint64 types are supported for backwards
+** compatibility only.
+**
+** ^The sqlite3_int64 and sqlite_int64 types can store integer values
+** between -9223372036854775808 and +9223372036854775807 inclusive.  ^The
+** sqlite3_uint64 and sqlite_uint64 types can store integer values 
+** between 0 and +18446744073709551615 inclusive.
+*/
+#ifdef SQLITE_INT64_TYPE
+  typedef SQLITE_INT64_TYPE sqlite_int64;
+  typedef unsigned SQLITE_INT64_TYPE sqlite_uint64;
+#elif defined(_MSC_VER) || defined(__BORLANDC__)
+  typedef __int64 sqlite_int64;
+  typedef unsigned __int64 sqlite_uint64;
+#else
+  typedef long long int sqlite_int64;
+  typedef unsigned long long int sqlite_uint64;
+#endif
+typedef sqlite_int64 sqlite3_int64;
+typedef sqlite_uint64 sqlite3_uint64;
+
+/*
+** If compiling for a processor that lacks floating point support,
+** substitute integer for floating-point.
+*/
+#ifdef SQLITE_OMIT_FLOATING_POINT
+# define double sqlite3_int64
+#endif
+
+/*
+** CAPI3REF: Closing A Database Connection
+**
+** ^The sqlite3_close() and sqlite3_close_v2() routines are destructors
+** for the [sqlite3] object.
+** ^Calls to sqlite3_close() and sqlite3_close_v2() return [SQLITE_OK] if
+** the [sqlite3] object is successfully destroyed and all associated
+** resources are deallocated.
+**
+** ^If the database connection is associated with unfinalized prepared
+** statements or unfinished sqlite3_backup objects then sqlite3_close()
+** will leave the database connection open and return [SQLITE_BUSY].
+** ^If sqlite3_close_v2() is called with unfinalized prepared statements
+** and/or unfinished sqlite3_backups, then the database connection becomes
+** an unusable "zombie" which will automatically be deallocated when the
+** last prepared statement is finalized or the last sqlite3_backup is
+** finished.  The sqlite3_close_v2() interface is intended for use with
+** host languages that are garbage collected, and where the order in which
+** destructors are called is arbitrary.
+**
+** Applications should [sqlite3_finalize | finalize] all [prepared statements],
+** [sqlite3_blob_close | close] all [BLOB handles], and 
+** [sqlite3_backup_finish | finish] all [sqlite3_backup] objects associated
+** with the [sqlite3] object prior to attempting to close the object.  ^If
+** sqlite3_close_v2() is called on a [database connection] that still has
+** outstanding [prepared statements], [BLOB handles], and/or
+** [sqlite3_backup] objects then it returns [SQLITE_OK] and the deallocation
+** of resources is deferred until all [prepared statements], [BLOB handles],
+** and [sqlite3_backup] objects are also destroyed.
+**
+** ^If an [sqlite3] object is destroyed while a transaction is open,
+** the transaction is automatically rolled back.
+**
+** The C parameter to [sqlite3_close(C)] and [sqlite3_close_v2(C)]
+** must be either a NULL
+** pointer or an [sqlite3] object pointer obtained
+** from [sqlite3_open()], [sqlite3_open16()], or
+** [sqlite3_open_v2()], and not previously closed.
+** ^Calling sqlite3_close() or sqlite3_close_v2() with a NULL pointer
+** argument is a harmless no-op.
+*/
+SQLITE_API int sqlite3_close(sqlite3*);
+SQLITE_API int sqlite3_close_v2(sqlite3*);
+
+/*
+** The type for a callback function.
+** This is legacy and deprecated.  It is included for historical
+** compatibility and is not documented.
+*/
+typedef int (*sqlite3_callback)(void*,int,char**, char**);
+
+/*
+** CAPI3REF: One-Step Query Execution Interface
+**
+** The sqlite3_exec() interface is a convenience wrapper around
+** [sqlite3_prepare_v2()], [sqlite3_step()], and [sqlite3_finalize()],
+** that allows an application to run multiple statements of SQL
+** without having to use a lot of C code. 
+**
+** ^The sqlite3_exec() interface runs zero or more UTF-8 encoded,
+** semicolon-separate SQL statements passed into its 2nd argument,
+** in the context of the [database connection] passed in as its 1st
+** argument.  ^If the callback function of the 3rd argument to
+** sqlite3_exec() is not NULL, then it is invoked for each result row
+** coming out of the evaluated SQL statements.  ^The 4th argument to
+** sqlite3_exec() is relayed through to the 1st argument of each
+** callback invocation.  ^If the callback pointer to sqlite3_exec()
+** is NULL, then no callback is ever invoked and result rows are
+** ignored.
+**
+** ^If an error occurs while evaluating the SQL statements passed into
+** sqlite3_exec(), then execution of the current statement stops and
+** subsequent statements are skipped.  ^If the 5th parameter to sqlite3_exec()
+** is not NULL then any error message is written into memory obtained
+** from [sqlite3_malloc()] and passed back through the 5th parameter.
+** To avoid memory leaks, the application should invoke [sqlite3_free()]
+** on error message strings returned through the 5th parameter of
+** of sqlite3_exec() after the error message string is no longer needed.
+** ^If the 5th parameter to sqlite3_exec() is not NULL and no errors
+** occur, then sqlite3_exec() sets the pointer in its 5th parameter to
+** NULL before returning.
+**
+** ^If an sqlite3_exec() callback returns non-zero, the sqlite3_exec()
+** routine returns SQLITE_ABORT without invoking the callback again and
+** without running any subsequent SQL statements.
+**
+** ^The 2nd argument to the sqlite3_exec() callback function is the
+** number of columns in the result.  ^The 3rd argument to the sqlite3_exec()
+** callback is an array of pointers to strings obtained as if from
+** [sqlite3_column_text()], one for each column.  ^If an element of a
+** result row is NULL then the corresponding string pointer for the
+** sqlite3_exec() callback is a NULL pointer.  ^The 4th argument to the
+** sqlite3_exec() callback is an array of pointers to strings where each
+** entry represents the name of corresponding result column as obtained
+** from [sqlite3_column_name()].
+**
+** ^If the 2nd parameter to sqlite3_exec() is a NULL pointer, a pointer
+** to an empty string, or a pointer that contains only whitespace and/or 
+** SQL comments, then no SQL statements are evaluated and the database
+** is not changed.
+**
+** Restrictions:
+**
+** <ul>
+** <li> The application must insure that the 1st parameter to sqlite3_exec()
+**      is a valid and open [database connection].
+** <li> The application must not close the [database connection] specified by
+**      the 1st parameter to sqlite3_exec() while sqlite3_exec() is running.
+** <li> The application must not modify the SQL statement text passed into
+**      the 2nd parameter of sqlite3_exec() while sqlite3_exec() is running.
+** </ul>
+*/
+SQLITE_API int sqlite3_exec(
+  sqlite3*,                                  /* An open database */
+  const char *sql,                           /* SQL to be evaluated */
+  int (*callback)(void*,int,char**,char**),  /* Callback function */
+  void *,                                    /* 1st argument to callback */
+  char **errmsg                              /* Error msg written here */
+);
+
+/*
+** CAPI3REF: Result Codes
+** KEYWORDS: {result code definitions}
+**
+** Many SQLite functions return an integer result code from the set shown
+** here in order to indicate success or failure.
+**
+** New error codes may be added in future versions of SQLite.
+**
+** See also: [extended result code definitions]
+*/
+#define SQLITE_OK           0   /* Successful result */
+/* beginning-of-error-codes */
+#define SQLITE_ERROR        1   /* SQL error or missing database */
+#define SQLITE_INTERNAL     2   /* Internal logic error in SQLite */
+#define SQLITE_PERM         3   /* Access permission denied */
+#define SQLITE_ABORT        4   /* Callback routine requested an abort */
+#define SQLITE_BUSY         5   /* The database file is locked */
+#define SQLITE_LOCKED       6   /* A table in the database is locked */
+#define SQLITE_NOMEM        7   /* A malloc() failed */
+#define SQLITE_READONLY     8   /* Attempt to write a readonly database */
+#define SQLITE_INTERRUPT    9   /* Operation terminated by sqlite3_interrupt()*/
+#define SQLITE_IOERR       10   /* Some kind of disk I/O error occurred */
+#define SQLITE_CORRUPT     11   /* The database disk image is malformed */
+#define SQLITE_NOTFOUND    12   /* Unknown opcode in sqlite3_file_control() */
+#define SQLITE_FULL        13   /* Insertion failed because database is full */
+#define SQLITE_CANTOPEN    14   /* Unable to open the database file */
+#define SQLITE_PROTOCOL    15   /* Database lock protocol error */
+#define SQLITE_EMPTY       16   /* Database is empty */
+#define SQLITE_SCHEMA      17   /* The database schema changed */
+#define SQLITE_TOOBIG      18   /* String or BLOB exceeds size limit */
+#define SQLITE_CONSTRAINT  19   /* Abort due to constraint violation */
+#define SQLITE_MISMATCH    20   /* Data type mismatch */
+#define SQLITE_MISUSE      21   /* Library used incorrectly */
+#define SQLITE_NOLFS       22   /* Uses OS features not supported on host */
+#define SQLITE_AUTH        23   /* Authorization denied */
+#define SQLITE_FORMAT      24   /* Auxiliary database format error */
+#define SQLITE_RANGE       25   /* 2nd parameter to sqlite3_bind out of range */
+#define SQLITE_NOTADB      26   /* File opened that is not a database file */
+#define SQLITE_NOTICE      27   /* Notifications from sqlite3_log() */
+#define SQLITE_WARNING     28   /* Warnings from sqlite3_log() */
+#define SQLITE_ROW         100  /* sqlite3_step() has another row ready */
+#define SQLITE_DONE        101  /* sqlite3_step() has finished executing */
+/* end-of-error-codes */
+
+/*
+** CAPI3REF: Extended Result Codes
+** KEYWORDS: {extended result code definitions}
+**
+** In its default configuration, SQLite API routines return one of 30 integer
+** [result codes].  However, experience has shown that many of
+** these result codes are too coarse-grained.  They do not provide as
+** much information about problems as programmers might like.  In an effort to
+** address this, newer versions of SQLite (version 3.3.8 and later) include
+** support for additional result codes that provide more detailed information
+** about errors. These [extended result codes] are enabled or disabled
+** on a per database connection basis using the
+** [sqlite3_extended_result_codes()] API.  Or, the extended code for
+** the most recent error can be obtained using
+** [sqlite3_extended_errcode()].
+*/
+#define SQLITE_IOERR_READ              (SQLITE_IOERR | (1<<8))
+#define SQLITE_IOERR_SHORT_READ        (SQLITE_IOERR | (2<<8))
+#define SQLITE_IOERR_WRITE             (SQLITE_IOERR | (3<<8))
+#define SQLITE_IOERR_FSYNC             (SQLITE_IOERR | (4<<8))
+#define SQLITE_IOERR_DIR_FSYNC         (SQLITE_IOERR | (5<<8))
+#define SQLITE_IOERR_TRUNCATE          (SQLITE_IOERR | (6<<8))
+#define SQLITE_IOERR_FSTAT             (SQLITE_IOERR | (7<<8))
+#define SQLITE_IOERR_UNLOCK            (SQLITE_IOERR | (8<<8))
+#define SQLITE_IOERR_RDLOCK            (SQLITE_IOERR | (9<<8))
+#define SQLITE_IOERR_DELETE            (SQLITE_IOERR | (10<<8))
+#define SQLITE_IOERR_BLOCKED           (SQLITE_IOERR | (11<<8))
+#define SQLITE_IOERR_NOMEM             (SQLITE_IOERR | (12<<8))
+#define SQLITE_IOERR_ACCESS            (SQLITE_IOERR | (13<<8))
+#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
+#define SQLITE_IOERR_LOCK              (SQLITE_IOERR | (15<<8))
+#define SQLITE_IOERR_CLOSE             (SQLITE_IOERR | (16<<8))
+#define SQLITE_IOERR_DIR_CLOSE         (SQLITE_IOERR | (17<<8))
+#define SQLITE_IOERR_SHMOPEN           (SQLITE_IOERR | (18<<8))
+#define SQLITE_IOERR_SHMSIZE           (SQLITE_IOERR | (19<<8))
+#define SQLITE_IOERR_SHMLOCK           (SQLITE_IOERR | (20<<8))
+#define SQLITE_IOERR_SHMMAP            (SQLITE_IOERR | (21<<8))
+#define SQLITE_IOERR_SEEK              (SQLITE_IOERR | (22<<8))
+#define SQLITE_IOERR_DELETE_NOENT      (SQLITE_IOERR | (23<<8))
+#define SQLITE_IOERR_MMAP              (SQLITE_IOERR | (24<<8))
+#define SQLITE_IOERR_GETTEMPPATH       (SQLITE_IOERR | (25<<8))
+#define SQLITE_IOERR_CONVPATH          (SQLITE_IOERR | (26<<8))
+#define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
+#define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
+#define SQLITE_BUSY_SNAPSHOT           (SQLITE_BUSY   |  (2<<8))
+#define SQLITE_CANTOPEN_NOTEMPDIR      (SQLITE_CANTOPEN | (1<<8))
+#define SQLITE_CANTOPEN_ISDIR          (SQLITE_CANTOPEN | (2<<8))
+#define SQLITE_CANTOPEN_FULLPATH       (SQLITE_CANTOPEN | (3<<8))
+#define SQLITE_CANTOPEN_CONVPATH       (SQLITE_CANTOPEN | (4<<8))
+#define SQLITE_CORRUPT_VTAB            (SQLITE_CORRUPT | (1<<8))
+#define SQLITE_READONLY_RECOVERY       (SQLITE_READONLY | (1<<8))
+#define SQLITE_READONLY_CANTLOCK       (SQLITE_READONLY | (2<<8))
+#define SQLITE_READONLY_ROLLBACK       (SQLITE_READONLY | (3<<8))
+#define SQLITE_READONLY_DBMOVED        (SQLITE_READONLY | (4<<8))
+#define SQLITE_ABORT_ROLLBACK          (SQLITE_ABORT | (2<<8))
+#define SQLITE_CONSTRAINT_CHECK        (SQLITE_CONSTRAINT | (1<<8))
+#define SQLITE_CONSTRAINT_COMMITHOOK   (SQLITE_CONSTRAINT | (2<<8))
+#define SQLITE_CONSTRAINT_FOREIGNKEY   (SQLITE_CONSTRAINT | (3<<8))
+#define SQLITE_CONSTRAINT_FUNCTION     (SQLITE_CONSTRAINT | (4<<8))
+#define SQLITE_CONSTRAINT_NOTNULL      (SQLITE_CONSTRAINT | (5<<8))
+#define SQLITE_CONSTRAINT_PRIMARYKEY   (SQLITE_CONSTRAINT | (6<<8))
+#define SQLITE_CONSTRAINT_TRIGGER      (SQLITE_CONSTRAINT | (7<<8))
+#define SQLITE_CONSTRAINT_UNIQUE       (SQLITE_CONSTRAINT | (8<<8))
+#define SQLITE_CONSTRAINT_VTAB         (SQLITE_CONSTRAINT | (9<<8))
+#define SQLITE_CONSTRAINT_ROWID        (SQLITE_CONSTRAINT |(10<<8))
+#define SQLITE_NOTICE_RECOVER_WAL      (SQLITE_NOTICE | (1<<8))
+#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8))
+#define SQLITE_WARNING_AUTOINDEX       (SQLITE_WARNING | (1<<8))
+#define SQLITE_AUTH_USER               (SQLITE_AUTH | (1<<8))
+
+/*
+** CAPI3REF: Flags For File Open Operations
+**
+** These bit values are intended for use in the
+** 3rd parameter to the [sqlite3_open_v2()] interface and
+** in the 4th parameter to the [sqlite3_vfs.xOpen] method.
+*/
+#define SQLITE_OPEN_READONLY         0x00000001  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_READWRITE        0x00000002  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_CREATE           0x00000004  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_DELETEONCLOSE    0x00000008  /* VFS only */
+#define SQLITE_OPEN_EXCLUSIVE        0x00000010  /* VFS only */
+#define SQLITE_OPEN_AUTOPROXY        0x00000020  /* VFS only */
+#define SQLITE_OPEN_URI              0x00000040  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_MEMORY           0x00000080  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_MAIN_DB          0x00000100  /* VFS only */
+#define SQLITE_OPEN_TEMP_DB          0x00000200  /* VFS only */
+#define SQLITE_OPEN_TRANSIENT_DB     0x00000400  /* VFS only */
+#define SQLITE_OPEN_MAIN_JOURNAL     0x00000800  /* VFS only */
+#define SQLITE_OPEN_TEMP_JOURNAL     0x00001000  /* VFS only */
+#define SQLITE_OPEN_SUBJOURNAL       0x00002000  /* VFS only */
+#define SQLITE_OPEN_MASTER_JOURNAL   0x00004000  /* VFS only */
+#define SQLITE_OPEN_NOMUTEX          0x00008000  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_FULLMUTEX        0x00010000  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_SHAREDCACHE      0x00020000  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_PRIVATECACHE     0x00040000  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_WAL              0x00080000  /* VFS only */
+
+/* Reserved:                         0x00F00000 */
+
+/*
+** CAPI3REF: Device Characteristics
+**
+** The xDeviceCharacteristics method of the [sqlite3_io_methods]
+** object returns an integer which is a vector of these
+** bit values expressing I/O characteristics of the mass storage
+** device that holds the file that the [sqlite3_io_methods]
+** refers to.
+**
+** The SQLITE_IOCAP_ATOMIC property means that all writes of
+** any size are atomic.  The SQLITE_IOCAP_ATOMICnnn values
+** mean that writes of blocks that are nnn bytes in size and
+** are aligned to an address which is an integer multiple of
+** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
+** that when data is appended to a file, the data is appended
+** first then the size of the file is extended, never the other
+** way around.  The SQLITE_IOCAP_SEQUENTIAL property means that
+** information is written to disk in the same order as calls
+** to xWrite().  The SQLITE_IOCAP_POWERSAFE_OVERWRITE property means that
+** after reboot following a crash or power loss, the only bytes in a
+** file that were written at the application level might have changed
+** and that adjacent bytes, even bytes within the same sector are
+** guaranteed to be unchanged.  The SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN
+** flag indicate that a file cannot be deleted when open.  The
+** SQLITE_IOCAP_IMMUTABLE flag indicates that the file is on
+** read-only media and cannot be changed even by processes with
+** elevated privileges.
+*/
+#define SQLITE_IOCAP_ATOMIC                 0x00000001
+#define SQLITE_IOCAP_ATOMIC512              0x00000002
+#define SQLITE_IOCAP_ATOMIC1K               0x00000004
+#define SQLITE_IOCAP_ATOMIC2K               0x00000008
+#define SQLITE_IOCAP_ATOMIC4K               0x00000010
+#define SQLITE_IOCAP_ATOMIC8K               0x00000020
+#define SQLITE_IOCAP_ATOMIC16K              0x00000040
+#define SQLITE_IOCAP_ATOMIC32K              0x00000080
+#define SQLITE_IOCAP_ATOMIC64K              0x00000100
+#define SQLITE_IOCAP_SAFE_APPEND            0x00000200
+#define SQLITE_IOCAP_SEQUENTIAL             0x00000400
+#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN  0x00000800
+#define SQLITE_IOCAP_POWERSAFE_OVERWRITE    0x00001000
+#define SQLITE_IOCAP_IMMUTABLE              0x00002000
+
+/*
+** CAPI3REF: File Locking Levels
+**
+** SQLite uses one of these integer values as the second
+** argument to calls it makes to the xLock() and xUnlock() methods
+** of an [sqlite3_io_methods] object.
+*/
+#define SQLITE_LOCK_NONE          0
+#define SQLITE_LOCK_SHARED        1
+#define SQLITE_LOCK_RESERVED      2
+#define SQLITE_LOCK_PENDING       3
+#define SQLITE_LOCK_EXCLUSIVE     4
+
+/*
+** CAPI3REF: Synchronization Type Flags
+**
+** When SQLite invokes the xSync() method of an
+** [sqlite3_io_methods] object it uses a combination of
+** these integer values as the second argument.
+**
+** When the SQLITE_SYNC_DATAONLY flag is used, it means that the
+** sync operation only needs to flush data to mass storage.  Inode
+** information need not be flushed. If the lower four bits of the flag
+** equal SQLITE_SYNC_NORMAL, that means to use normal fsync() semantics.
+** If the lower four bits equal SQLITE_SYNC_FULL, that means
+** to use Mac OS X style fullsync instead of fsync().
+**
+** Do not confuse the SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags
+** with the [PRAGMA synchronous]=NORMAL and [PRAGMA synchronous]=FULL
+** settings.  The [synchronous pragma] determines when calls to the
+** xSync VFS method occur and applies uniformly across all platforms.
+** The SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags determine how
+** energetic or rigorous or forceful the sync operations are and
+** only make a difference on Mac OSX for the default SQLite code.
+** (Third-party VFS implementations might also make the distinction
+** between SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL, but among the
+** operating systems natively supported by SQLite, only Mac OSX
+** cares about the difference.)
+*/
+#define SQLITE_SYNC_NORMAL        0x00002
+#define SQLITE_SYNC_FULL          0x00003
+#define SQLITE_SYNC_DATAONLY      0x00010
+
+/*
+** CAPI3REF: OS Interface Open File Handle
+**
+** An [sqlite3_file] object represents an open file in the 
+** [sqlite3_vfs | OS interface layer].  Individual OS interface
+** implementations will
+** want to subclass this object by appending additional fields
+** for their own use.  The pMethods entry is a pointer to an
+** [sqlite3_io_methods] object that defines methods for performing
+** I/O operations on the open file.
+*/
+typedef struct sqlite3_file sqlite3_file;
+struct sqlite3_file {
+  const struct sqlite3_io_methods *pMethods;  /* Methods for an open file */
+};
+
+/*
+** CAPI3REF: OS Interface File Virtual Methods Object
+**
+** Every file opened by the [sqlite3_vfs.xOpen] method populates an
+** [sqlite3_file] object (or, more commonly, a subclass of the
+** [sqlite3_file] object) with a pointer to an instance of this object.
+** This object defines the methods used to perform various operations
+** against the open file represented by the [sqlite3_file] object.
+**
+** If the [sqlite3_vfs.xOpen] method sets the sqlite3_file.pMethods element 
+** to a non-NULL pointer, then the sqlite3_io_methods.xClose method
+** may be invoked even if the [sqlite3_vfs.xOpen] reported that it failed.  The
+** only way to prevent a call to xClose following a failed [sqlite3_vfs.xOpen]
+** is for the [sqlite3_vfs.xOpen] to set the sqlite3_file.pMethods element
+** to NULL.
+**
+** The flags argument to xSync may be one of [SQLITE_SYNC_NORMAL] or
+** [SQLITE_SYNC_FULL].  The first choice is the normal fsync().
+** The second choice is a Mac OS X style fullsync.  The [SQLITE_SYNC_DATAONLY]
+** flag may be ORed in to indicate that only the data of the file
+** and not its inode needs to be synced.
+**
+** The integer values to xLock() and xUnlock() are one of
+** <ul>
+** <li> [SQLITE_LOCK_NONE],
+** <li> [SQLITE_LOCK_SHARED],
+** <li> [SQLITE_LOCK_RESERVED],
+** <li> [SQLITE_LOCK_PENDING], or
+** <li> [SQLITE_LOCK_EXCLUSIVE].
+** </ul>
+** xLock() increases the lock. xUnlock() decreases the lock.
+** The xCheckReservedLock() method checks whether any database connection,
+** either in this process or in some other process, is holding a RESERVED,
+** PENDING, or EXCLUSIVE lock on the file.  It returns true
+** if such a lock exists and false otherwise.
+**
+** The xFileControl() method is a generic interface that allows custom
+** VFS implementations to directly control an open file using the
+** [sqlite3_file_control()] interface.  The second "op" argument is an
+** integer opcode.  The third argument is a generic pointer intended to
+** point to a structure that may contain arguments or space in which to
+** write return values.  Potential uses for xFileControl() might be
+** functions to enable blocking locks with timeouts, to change the
+** locking strategy (for example to use dot-file locks), to inquire
+** about the status of a lock, or to break stale locks.  The SQLite
+** core reserves all opcodes less than 100 for its own use.
+** A [file control opcodes | list of opcodes] less than 100 is available.
+** Applications that define a custom xFileControl method should use opcodes
+** greater than 100 to avoid conflicts.  VFS implementations should
+** return [SQLITE_NOTFOUND] for file control opcodes that they do not
+** recognize.
+**
+** The xSectorSize() method returns the sector size of the
+** device that underlies the file.  The sector size is the
+** minimum write that can be performed without disturbing
+** other bytes in the file.  The xDeviceCharacteristics()
+** method returns a bit vector describing behaviors of the
+** underlying device:
+**
+** <ul>
+** <li> [SQLITE_IOCAP_ATOMIC]
+** <li> [SQLITE_IOCAP_ATOMIC512]
+** <li> [SQLITE_IOCAP_ATOMIC1K]
+** <li> [SQLITE_IOCAP_ATOMIC2K]
+** <li> [SQLITE_IOCAP_ATOMIC4K]
+** <li> [SQLITE_IOCAP_ATOMIC8K]
+** <li> [SQLITE_IOCAP_ATOMIC16K]
+** <li> [SQLITE_IOCAP_ATOMIC32K]
+** <li> [SQLITE_IOCAP_ATOMIC64K]
+** <li> [SQLITE_IOCAP_SAFE_APPEND]
+** <li> [SQLITE_IOCAP_SEQUENTIAL]
+** </ul>
+**
+** The SQLITE_IOCAP_ATOMIC property means that all writes of
+** any size are atomic.  The SQLITE_IOCAP_ATOMICnnn values
+** mean that writes of blocks that are nnn bytes in size and
+** are aligned to an address which is an integer multiple of
+** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
+** that when data is appended to a file, the data is appended
+** first then the size of the file is extended, never the other
+** way around.  The SQLITE_IOCAP_SEQUENTIAL property means that
+** information is written to disk in the same order as calls
+** to xWrite().
+**
+** If xRead() returns SQLITE_IOERR_SHORT_READ it must also fill
+** in the unread portions of the buffer with zeros.  A VFS that
+** fails to zero-fill short reads might seem to work.  However,
+** failure to zero-fill short reads will eventually lead to
+** database corruption.
+*/
+typedef struct sqlite3_io_methods sqlite3_io_methods;
+struct sqlite3_io_methods {
+  int iVersion;
+  int (*xClose)(sqlite3_file*);
+  int (*xRead)(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst);
+  int (*xWrite)(sqlite3_file*, const void*, int iAmt, sqlite3_int64 iOfst);
+  int (*xTruncate)(sqlite3_file*, sqlite3_int64 size);
+  int (*xSync)(sqlite3_file*, int flags);
+  int (*xFileSize)(sqlite3_file*, sqlite3_int64 *pSize);
+  int (*xLock)(sqlite3_file*, int);
+  int (*xUnlock)(sqlite3_file*, int);
+  int (*xCheckReservedLock)(sqlite3_file*, int *pResOut);
+  int (*xFileControl)(sqlite3_file*, int op, void *pArg);
+  int (*xSectorSize)(sqlite3_file*);
+  int (*xDeviceCharacteristics)(sqlite3_file*);
+  /* Methods above are valid for version 1 */
+  int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**);
+  int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
+  void (*xShmBarrier)(sqlite3_file*);
+  int (*xShmUnmap)(sqlite3_file*, int deleteFlag);
+  /* Methods above are valid for version 2 */
+  int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp);
+  int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p);
+  /* Methods above are valid for version 3 */
+  /* Additional methods may be added in future releases */
+};
+
+/*
+** CAPI3REF: Standard File Control Opcodes
+** KEYWORDS: {file control opcodes} {file control opcode}
+**
+** These integer constants are opcodes for the xFileControl method
+** of the [sqlite3_io_methods] object and for the [sqlite3_file_control()]
+** interface.
+**
+** The [SQLITE_FCNTL_LOCKSTATE] opcode is used for debugging.  This
+** opcode causes the xFileControl method to write the current state of
+** the lock (one of [SQLITE_LOCK_NONE], [SQLITE_LOCK_SHARED],
+** [SQLITE_LOCK_RESERVED], [SQLITE_LOCK_PENDING], or [SQLITE_LOCK_EXCLUSIVE])
+** into an integer that the pArg argument points to. This capability
+** is used during testing and only needs to be supported when SQLITE_TEST
+** is defined.
+** <ul>
+** <li>[[SQLITE_FCNTL_SIZE_HINT]]
+** The [SQLITE_FCNTL_SIZE_HINT] opcode is used by SQLite to give the VFS
+** layer a hint of how large the database file will grow to be during the
+** current transaction.  This hint is not guaranteed to be accurate but it
+** is often close.  The underlying VFS might choose to preallocate database
+** file space based on this hint in order to help writes to the database
+** file run faster.
+**
+** <li>[[SQLITE_FCNTL_CHUNK_SIZE]]
+** The [SQLITE_FCNTL_CHUNK_SIZE] opcode is used to request that the VFS
+** extends and truncates the database file in chunks of a size specified
+** by the user. The fourth argument to [sqlite3_file_control()] should 
+** point to an integer (type int) containing the new chunk-size to use
+** for the nominated database. Allocating database file space in large
+** chunks (say 1MB at a time), may reduce file-system fragmentation and
+** improve performance on some systems.
+**
+** <li>[[SQLITE_FCNTL_FILE_POINTER]]
+** The [SQLITE_FCNTL_FILE_POINTER] opcode is used to obtain a pointer
+** to the [sqlite3_file] object associated with a particular database
+** connection.  See the [sqlite3_file_control()] documentation for
+** additional information.
+**
+** <li>[[SQLITE_FCNTL_SYNC_OMITTED]]
+** No longer in use.
+**
+** <li>[[SQLITE_FCNTL_SYNC]]
+** The [SQLITE_FCNTL_SYNC] opcode is generated internally by SQLite and
+** sent to the VFS immediately before the xSync method is invoked on a
+** database file descriptor. Or, if the xSync method is not invoked 
+** because the user has configured SQLite with 
+** [PRAGMA synchronous | PRAGMA synchronous=OFF] it is invoked in place 
+** of the xSync method. In most cases, the pointer argument passed with
+** this file-control is NULL. However, if the database file is being synced
+** as part of a multi-database commit, the argument points to a nul-terminated
+** string containing the transactions master-journal file name. VFSes that 
+** do not need this signal should silently ignore this opcode. Applications 
+** should not call [sqlite3_file_control()] with this opcode as doing so may 
+** disrupt the operation of the specialized VFSes that do require it.  
+**
+** <li>[[SQLITE_FCNTL_COMMIT_PHASETWO]]
+** The [SQLITE_FCNTL_COMMIT_PHASETWO] opcode is generated internally by SQLite
+** and sent to the VFS after a transaction has been committed immediately
+** but before the database is unlocked. VFSes that do not need this signal
+** should silently ignore this opcode. Applications should not call
+** [sqlite3_file_control()] with this opcode as doing so may disrupt the 
+** operation of the specialized VFSes that do require it.  
+**
+** <li>[[SQLITE_FCNTL_WIN32_AV_RETRY]]
+** ^The [SQLITE_FCNTL_WIN32_AV_RETRY] opcode is used to configure automatic
+** retry counts and intervals for certain disk I/O operations for the
+** windows [VFS] in order to provide robustness in the presence of
+** anti-virus programs.  By default, the windows VFS will retry file read,
+** file write, and file delete operations up to 10 times, with a delay
+** of 25 milliseconds before the first retry and with the delay increasing
+** by an additional 25 milliseconds with each subsequent retry.  This
+** opcode allows these two values (10 retries and 25 milliseconds of delay)
+** to be adjusted.  The values are changed for all database connections
+** within the same process.  The argument is a pointer to an array of two
+** integers where the first integer i the new retry count and the second
+** integer is the delay.  If either integer is negative, then the setting
+** is not changed but instead the prior value of that setting is written
+** into the array entry, allowing the current retry settings to be
+** interrogated.  The zDbName parameter is ignored.
+**
+** <li>[[SQLITE_FCNTL_PERSIST_WAL]]
+** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the
+** persistent [WAL | Write Ahead Log] setting.  By default, the auxiliary
+** write ahead log and shared memory files used for transaction control
+** are automatically deleted when the latest connection to the database
+** closes.  Setting persistent WAL mode causes those files to persist after
+** close.  Persisting the files is useful when other processes that do not
+** have write permission on the directory containing the database file want
+** to read the database file, as the WAL and shared memory files must exist
+** in order for the database to be readable.  The fourth parameter to
+** [sqlite3_file_control()] for this opcode should be a pointer to an integer.
+** That integer is 0 to disable persistent WAL mode or 1 to enable persistent
+** WAL mode.  If the integer is -1, then it is overwritten with the current
+** WAL persistence setting.
+**
+** <li>[[SQLITE_FCNTL_POWERSAFE_OVERWRITE]]
+** ^The [SQLITE_FCNTL_POWERSAFE_OVERWRITE] opcode is used to set or query the
+** persistent "powersafe-overwrite" or "PSOW" setting.  The PSOW setting
+** determines the [SQLITE_IOCAP_POWERSAFE_OVERWRITE] bit of the
+** xDeviceCharacteristics methods. The fourth parameter to
+** [sqlite3_file_control()] for this opcode should be a pointer to an integer.
+** That integer is 0 to disable zero-damage mode or 1 to enable zero-damage
+** mode.  If the integer is -1, then it is overwritten with the current
+** zero-damage mode setting.
+**
+** <li>[[SQLITE_FCNTL_OVERWRITE]]
+** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening
+** a write transaction to indicate that, unless it is rolled back for some
+** reason, the entire database file will be overwritten by the current 
+** transaction. This is used by VACUUM operations.
+**
+** <li>[[SQLITE_FCNTL_VFSNAME]]
+** ^The [SQLITE_FCNTL_VFSNAME] opcode can be used to obtain the names of
+** all [VFSes] in the VFS stack.  The names are of all VFS shims and the
+** final bottom-level VFS are written into memory obtained from 
+** [sqlite3_malloc()] and the result is stored in the char* variable
+** that the fourth parameter of [sqlite3_file_control()] points to.
+** The caller is responsible for freeing the memory when done.  As with
+** all file-control actions, there is no guarantee that this will actually
+** do anything.  Callers should initialize the char* variable to a NULL
+** pointer in case this file-control is not implemented.  This file-control
+** is intended for diagnostic use only.
+**
+** <li>[[SQLITE_FCNTL_PRAGMA]]
+** ^Whenever a [PRAGMA] statement is parsed, an [SQLITE_FCNTL_PRAGMA] 
+** file control is sent to the open [sqlite3_file] object corresponding
+** to the database file to which the pragma statement refers. ^The argument
+** to the [SQLITE_FCNTL_PRAGMA] file control is an array of
+** pointers to strings (char**) in which the second element of the array
+** is the name of the pragma and the third element is the argument to the
+** pragma or NULL if the pragma has no argument.  ^The handler for an
+** [SQLITE_FCNTL_PRAGMA] file control can optionally make the first element
+** of the char** argument point to a string obtained from [sqlite3_mprintf()]
+** or the equivalent and that string will become the result of the pragma or
+** the error message if the pragma fails. ^If the
+** [SQLITE_FCNTL_PRAGMA] file control returns [SQLITE_NOTFOUND], then normal 
+** [PRAGMA] processing continues.  ^If the [SQLITE_FCNTL_PRAGMA]
+** file control returns [SQLITE_OK], then the parser assumes that the
+** VFS has handled the PRAGMA itself and the parser generates a no-op
+** prepared statement.  ^If the [SQLITE_FCNTL_PRAGMA] file control returns
+** any result code other than [SQLITE_OK] or [SQLITE_NOTFOUND], that means
+** that the VFS encountered an error while handling the [PRAGMA] and the
+** compilation of the PRAGMA fails with an error.  ^The [SQLITE_FCNTL_PRAGMA]
+** file control occurs at the beginning of pragma statement analysis and so
+** it is able to override built-in [PRAGMA] statements.
+**
+** <li>[[SQLITE_FCNTL_BUSYHANDLER]]
+** ^The [SQLITE_FCNTL_BUSYHANDLER]
+** file-control may be invoked by SQLite on the database file handle
+** shortly after it is opened in order to provide a custom VFS with access
+** to the connections busy-handler callback. The argument is of type (void **)
+** - an array of two (void *) values. The first (void *) actually points
+** to a function of type (int (*)(void *)). In order to invoke the connections
+** busy-handler, this function should be invoked with the second (void *) in
+** the array as the only argument. If it returns non-zero, then the operation
+** should be retried. If it returns zero, the custom VFS should abandon the
+** current operation.
+**
+** <li>[[SQLITE_FCNTL_TEMPFILENAME]]
+** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control
+** to have SQLite generate a
+** temporary filename using the same algorithm that is followed to generate
+** temporary filenames for TEMP tables and other internal uses.  The
+** argument should be a char** which will be filled with the filename
+** written into memory obtained from [sqlite3_malloc()].  The caller should
+** invoke [sqlite3_free()] on the result to avoid a memory leak.
+**
+** <li>[[SQLITE_FCNTL_MMAP_SIZE]]
+** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the
+** maximum number of bytes that will be used for memory-mapped I/O.
+** The argument is a pointer to a value of type sqlite3_int64 that
+** is an advisory maximum number of bytes in the file to memory map.  The
+** pointer is overwritten with the old value.  The limit is not changed if
+** the value originally pointed to is negative, and so the current limit 
+** can be queried by passing in a pointer to a negative number.  This
+** file-control is used internally to implement [PRAGMA mmap_size].
+**
+** <li>[[SQLITE_FCNTL_TRACE]]
+** The [SQLITE_FCNTL_TRACE] file control provides advisory information
+** to the VFS about what the higher layers of the SQLite stack are doing.
+** This file control is used by some VFS activity tracing [shims].
+** The argument is a zero-terminated string.  Higher layers in the
+** SQLite stack may generate instances of this file control if
+** the [SQLITE_USE_FCNTL_TRACE] compile-time option is enabled.
+**
+** <li>[[SQLITE_FCNTL_HAS_MOVED]]
+** The [SQLITE_FCNTL_HAS_MOVED] file control interprets its argument as a
+** pointer to an integer and it writes a boolean into that integer depending
+** on whether or not the file has been renamed, moved, or deleted since it
+** was first opened.
+**
+** <li>[[SQLITE_FCNTL_WIN32_SET_HANDLE]]
+** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging.  This
+** opcode causes the xFileControl method to swap the file handle with the one
+** pointed to by the pArg argument.  This capability is used during testing
+** and only needs to be supported when SQLITE_TEST is defined.
+**
+** </ul>
+*/
+#define SQLITE_FCNTL_LOCKSTATE               1
+#define SQLITE_GET_LOCKPROXYFILE             2
+#define SQLITE_SET_LOCKPROXYFILE             3
+#define SQLITE_LAST_ERRNO                    4
+#define SQLITE_FCNTL_SIZE_HINT               5
+#define SQLITE_FCNTL_CHUNK_SIZE              6
+#define SQLITE_FCNTL_FILE_POINTER            7
+#define SQLITE_FCNTL_SYNC_OMITTED            8
+#define SQLITE_FCNTL_WIN32_AV_RETRY          9
+#define SQLITE_FCNTL_PERSIST_WAL            10
+#define SQLITE_FCNTL_OVERWRITE              11
+#define SQLITE_FCNTL_VFSNAME                12
+#define SQLITE_FCNTL_POWERSAFE_OVERWRITE    13
+#define SQLITE_FCNTL_PRAGMA                 14
+#define SQLITE_FCNTL_BUSYHANDLER            15
+#define SQLITE_FCNTL_TEMPFILENAME           16
+#define SQLITE_FCNTL_MMAP_SIZE              18
+#define SQLITE_FCNTL_TRACE                  19
+#define SQLITE_FCNTL_HAS_MOVED              20
+#define SQLITE_FCNTL_SYNC                   21
+#define SQLITE_FCNTL_COMMIT_PHASETWO        22
+#define SQLITE_FCNTL_WIN32_SET_HANDLE       23
+
+/*
+** CAPI3REF: Mutex Handle
+**
+** The mutex module within SQLite defines [sqlite3_mutex] to be an
+** abstract type for a mutex object.  The SQLite core never looks
+** at the internal representation of an [sqlite3_mutex].  It only
+** deals with pointers to the [sqlite3_mutex] object.
+**
+** Mutexes are created using [sqlite3_mutex_alloc()].
+*/
+typedef struct sqlite3_mutex sqlite3_mutex;
+
+/*
+** CAPI3REF: OS Interface Object
+**
+** An instance of the sqlite3_vfs object defines the interface between
+** the SQLite core and the underlying operating system.  The "vfs"
+** in the name of the object stands for "virtual file system".  See
+** the [VFS | VFS documentation] for further information.
+**
+** The value of the iVersion field is initially 1 but may be larger in
+** future versions of SQLite.  Additional fields may be appended to this
+** object when the iVersion value is increased.  Note that the structure
+** of the sqlite3_vfs object changes in the transaction between
+** SQLite version 3.5.9 and 3.6.0 and yet the iVersion field was not
+** modified.
+**
+** The szOsFile field is the size of the subclassed [sqlite3_file]
+** structure used by this VFS.  mxPathname is the maximum length of
+** a pathname in this VFS.
+**
+** Registered sqlite3_vfs objects are kept on a linked list formed by
+** the pNext pointer.  The [sqlite3_vfs_register()]
+** and [sqlite3_vfs_unregister()] interfaces manage this list
+** in a thread-safe way.  The [sqlite3_vfs_find()] interface
+** searches the list.  Neither the application code nor the VFS
+** implementation should use the pNext pointer.
+**
+** The pNext field is the only field in the sqlite3_vfs
+** structure that SQLite will ever modify.  SQLite will only access
+** or modify this field while holding a particular static mutex.
+** The application should never modify anything within the sqlite3_vfs
+** object once the object has been registered.
+**
+** The zName field holds the name of the VFS module.  The name must
+** be unique across all VFS modules.
+**
+** [[sqlite3_vfs.xOpen]]
+** ^SQLite guarantees that the zFilename parameter to xOpen
+** is either a NULL pointer or string obtained
+** from xFullPathname() with an optional suffix added.
+** ^If a suffix is added to the zFilename parameter, it will
+** consist of a single "-" character followed by no more than
+** 11 alphanumeric and/or "-" characters.
+** ^SQLite further guarantees that
+** the string will be valid and unchanged until xClose() is
+** called. Because of the previous sentence,
+** the [sqlite3_file] can safely store a pointer to the
+** filename if it needs to remember the filename for some reason.
+** If the zFilename parameter to xOpen is a NULL pointer then xOpen
+** must invent its own temporary name for the file.  ^Whenever the 
+** xFilename parameter is NULL it will also be the case that the
+** flags parameter will include [SQLITE_OPEN_DELETEONCLOSE].
+**
+** The flags argument to xOpen() includes all bits set in
+** the flags argument to [sqlite3_open_v2()].  Or if [sqlite3_open()]
+** or [sqlite3_open16()] is used, then flags includes at least
+** [SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]. 
+** If xOpen() opens a file read-only then it sets *pOutFlags to
+** include [SQLITE_OPEN_READONLY].  Other bits in *pOutFlags may be set.
+**
+** ^(SQLite will also add one of the following flags to the xOpen()
+** call, depending on the object being opened:
+**
+** <ul>
+** <li>  [SQLITE_OPEN_MAIN_DB]
+** <li>  [SQLITE_OPEN_MAIN_JOURNAL]
+** <li>  [SQLITE_OPEN_TEMP_DB]
+** <li>  [SQLITE_OPEN_TEMP_JOURNAL]
+** <li>  [SQLITE_OPEN_TRANSIENT_DB]
+** <li>  [SQLITE_OPEN_SUBJOURNAL]
+** <li>  [SQLITE_OPEN_MASTER_JOURNAL]
+** <li>  [SQLITE_OPEN_WAL]
+** </ul>)^
+**
+** The file I/O implementation can use the object type flags to
+** change the way it deals with files.  For example, an application
+** that does not care about crash recovery or rollback might make
+** the open of a journal file a no-op.  Writes to this journal would
+** also be no-ops, and any attempt to read the journal would return
+** SQLITE_IOERR.  Or the implementation might recognize that a database
+** file will be doing page-aligned sector reads and writes in a random
+** order and set up its I/O subsystem accordingly.
+**
+** SQLite might also add one of the following flags to the xOpen method:
+**
+** <ul>
+** <li> [SQLITE_OPEN_DELETEONCLOSE]
+** <li> [SQLITE_OPEN_EXCLUSIVE]
+** </ul>
+**
+** The [SQLITE_OPEN_DELETEONCLOSE] flag means the file should be
+** deleted when it is closed.  ^The [SQLITE_OPEN_DELETEONCLOSE]
+** will be set for TEMP databases and their journals, transient
+** databases, and subjournals.
+**
+** ^The [SQLITE_OPEN_EXCLUSIVE] flag is always used in conjunction
+** with the [SQLITE_OPEN_CREATE] flag, which are both directly
+** analogous to the O_EXCL and O_CREAT flags of the POSIX open()
+** API.  The SQLITE_OPEN_EXCLUSIVE flag, when paired with the 
+** SQLITE_OPEN_CREATE, is used to indicate that file should always
+** be created, and that it is an error if it already exists.
+** It is <i>not</i> used to indicate the file should be opened 
+** for exclusive access.
+**
+** ^At least szOsFile bytes of memory are allocated by SQLite
+** to hold the  [sqlite3_file] structure passed as the third
+** argument to xOpen.  The xOpen method does not have to
+** allocate the structure; it should just fill it in.  Note that
+** the xOpen method must set the sqlite3_file.pMethods to either
+** a valid [sqlite3_io_methods] object or to NULL.  xOpen must do
+** this even if the open fails.  SQLite expects that the sqlite3_file.pMethods
+** element will be valid after xOpen returns regardless of the success
+** or failure of the xOpen call.
+**
+** [[sqlite3_vfs.xAccess]]
+** ^The flags argument to xAccess() may be [SQLITE_ACCESS_EXISTS]
+** to test for the existence of a file, or [SQLITE_ACCESS_READWRITE] to
+** test whether a file is readable and writable, or [SQLITE_ACCESS_READ]
+** to test whether a file is at least readable.   The file can be a
+** directory.
+**
+** ^SQLite will always allocate at least mxPathname+1 bytes for the
+** output buffer xFullPathname.  The exact size of the output buffer
+** is also passed as a parameter to both  methods. If the output buffer
+** is not large enough, [SQLITE_CANTOPEN] should be returned. Since this is
+** handled as a fatal error by SQLite, vfs implementations should endeavor
+** to prevent this by setting mxPathname to a sufficiently large value.
+**
+** The xRandomness(), xSleep(), xCurrentTime(), and xCurrentTimeInt64()
+** interfaces are not strictly a part of the filesystem, but they are
+** included in the VFS structure for completeness.
+** The xRandomness() function attempts to return nBytes bytes
+** of good-quality randomness into zOut.  The return value is
+** the actual number of bytes of randomness obtained.
+** The xSleep() method causes the calling thread to sleep for at
+** least the number of microseconds given.  ^The xCurrentTime()
+** method returns a Julian Day Number for the current date and time as
+** a floating point value.
+** ^The xCurrentTimeInt64() method returns, as an integer, the Julian
+** Day Number multiplied by 86400000 (the number of milliseconds in 
+** a 24-hour day).  
+** ^SQLite will use the xCurrentTimeInt64() method to get the current
+** date and time if that method is available (if iVersion is 2 or 
+** greater and the function pointer is not NULL) and will fall back
+** to xCurrentTime() if xCurrentTimeInt64() is unavailable.
+**
+** ^The xSetSystemCall(), xGetSystemCall(), and xNestSystemCall() interfaces
+** are not used by the SQLite core.  These optional interfaces are provided
+** by some VFSes to facilitate testing of the VFS code. By overriding 
+** system calls with functions under its control, a test program can
+** simulate faults and error conditions that would otherwise be difficult
+** or impossible to induce.  The set of system calls that can be overridden
+** varies from one VFS to another, and from one version of the same VFS to the
+** next.  Applications that use these interfaces must be prepared for any
+** or all of these interfaces to be NULL or for their behavior to change
+** from one release to the next.  Applications must not attempt to access
+** any of these methods if the iVersion of the VFS is less than 3.
+*/
+typedef struct sqlite3_vfs sqlite3_vfs;
+typedef void (*sqlite3_syscall_ptr)(void);
+struct sqlite3_vfs {
+  int iVersion;            /* Structure version number (currently 3) */
+  int szOsFile;            /* Size of subclassed sqlite3_file */
+  int mxPathname;          /* Maximum file pathname length */
+  sqlite3_vfs *pNext;      /* Next registered VFS */
+  const char *zName;       /* Name of this virtual file system */
+  void *pAppData;          /* Pointer to application-specific data */
+  int (*xOpen)(sqlite3_vfs*, const char *zName, sqlite3_file*,
+               int flags, int *pOutFlags);
+  int (*xDelete)(sqlite3_vfs*, const char *zName, int syncDir);
+  int (*xAccess)(sqlite3_vfs*, const char *zName, int flags, int *pResOut);
+  int (*xFullPathname)(sqlite3_vfs*, const char *zName, int nOut, char *zOut);
+  void *(*xDlOpen)(sqlite3_vfs*, const char *zFilename);
+  void (*xDlError)(sqlite3_vfs*, int nByte, char *zErrMsg);
+  void (*(*xDlSym)(sqlite3_vfs*,void*, const char *zSymbol))(void);
+  void (*xDlClose)(sqlite3_vfs*, void*);
+  int (*xRandomness)(sqlite3_vfs*, int nByte, char *zOut);
+  int (*xSleep)(sqlite3_vfs*, int microseconds);
+  int (*xCurrentTime)(sqlite3_vfs*, double*);
+  int (*xGetLastError)(sqlite3_vfs*, int, char *);
+  /*
+  ** The methods above are in version 1 of the sqlite_vfs object
+  ** definition.  Those that follow are added in version 2 or later
+  */
+  int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*);
+  /*
+  ** The methods above are in versions 1 and 2 of the sqlite_vfs object.
+  ** Those below are for version 3 and greater.
+  */
+  int (*xSetSystemCall)(sqlite3_vfs*, const char *zName, sqlite3_syscall_ptr);
+  sqlite3_syscall_ptr (*xGetSystemCall)(sqlite3_vfs*, const char *zName);
+  const char *(*xNextSystemCall)(sqlite3_vfs*, const char *zName);
+  /*
+  ** The methods above are in versions 1 through 3 of the sqlite_vfs object.
+  ** New fields may be appended in figure versions.  The iVersion
+  ** value will increment whenever this happens. 
+  */
+};
+
+/*
+** CAPI3REF: Flags for the xAccess VFS method
+**
+** These integer constants can be used as the third parameter to
+** the xAccess method of an [sqlite3_vfs] object.  They determine
+** what kind of permissions the xAccess method is looking for.
+** With SQLITE_ACCESS_EXISTS, the xAccess method
+** simply checks whether the file exists.
+** With SQLITE_ACCESS_READWRITE, the xAccess method
+** checks whether the named directory is both readable and writable
+** (in other words, if files can be added, removed, and renamed within
+** the directory).
+** The SQLITE_ACCESS_READWRITE constant is currently used only by the
+** [temp_store_directory pragma], though this could change in a future
+** release of SQLite.
+** With SQLITE_ACCESS_READ, the xAccess method
+** checks whether the file is readable.  The SQLITE_ACCESS_READ constant is
+** currently unused, though it might be used in a future release of
+** SQLite.
+*/
+#define SQLITE_ACCESS_EXISTS    0
+#define SQLITE_ACCESS_READWRITE 1   /* Used by PRAGMA temp_store_directory */
+#define SQLITE_ACCESS_READ      2   /* Unused */
+
+/*
+** CAPI3REF: Flags for the xShmLock VFS method
+**
+** These integer constants define the various locking operations
+** allowed by the xShmLock method of [sqlite3_io_methods].  The
+** following are the only legal combinations of flags to the
+** xShmLock method:
+**
+** <ul>
+** <li>  SQLITE_SHM_LOCK | SQLITE_SHM_SHARED
+** <li>  SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE
+** <li>  SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED
+** <li>  SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE
+** </ul>
+**
+** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as
+** was given on the corresponding lock.  
+**
+** The xShmLock method can transition between unlocked and SHARED or
+** between unlocked and EXCLUSIVE.  It cannot transition between SHARED
+** and EXCLUSIVE.
+*/
+#define SQLITE_SHM_UNLOCK       1
+#define SQLITE_SHM_LOCK         2
+#define SQLITE_SHM_SHARED       4
+#define SQLITE_SHM_EXCLUSIVE    8
+
+/*
+** CAPI3REF: Maximum xShmLock index
+**
+** The xShmLock method on [sqlite3_io_methods] may use values
+** between 0 and this upper bound as its "offset" argument.
+** The SQLite core will never attempt to acquire or release a
+** lock outside of this range
+*/
+#define SQLITE_SHM_NLOCK        8
+
+
+/*
+** CAPI3REF: Initialize The SQLite Library
+**
+** ^The sqlite3_initialize() routine initializes the
+** SQLite library.  ^The sqlite3_shutdown() routine
+** deallocates any resources that were allocated by sqlite3_initialize().
+** These routines are designed to aid in process initialization and
+** shutdown on embedded systems.  Workstation applications using
+** SQLite normally do not need to invoke either of these routines.
+**
+** A call to sqlite3_initialize() is an "effective" call if it is
+** the first time sqlite3_initialize() is invoked during the lifetime of
+** the process, or if it is the first time sqlite3_initialize() is invoked
+** following a call to sqlite3_shutdown().  ^(Only an effective call
+** of sqlite3_initialize() does any initialization.  All other calls
+** are harmless no-ops.)^
+**
+** A call to sqlite3_shutdown() is an "effective" call if it is the first
+** call to sqlite3_shutdown() since the last sqlite3_initialize().  ^(Only
+** an effective call to sqlite3_shutdown() does any deinitialization.
+** All other valid calls to sqlite3_shutdown() are harmless no-ops.)^
+**
+** The sqlite3_initialize() interface is threadsafe, but sqlite3_shutdown()
+** is not.  The sqlite3_shutdown() interface must only be called from a
+** single thread.  All open [database connections] must be closed and all
+** other SQLite resources must be deallocated prior to invoking
+** sqlite3_shutdown().
+**
+** Among other things, ^sqlite3_initialize() will invoke
+** sqlite3_os_init().  Similarly, ^sqlite3_shutdown()
+** will invoke sqlite3_os_end().
+**
+** ^The sqlite3_initialize() routine returns [SQLITE_OK] on success.
+** ^If for some reason, sqlite3_initialize() is unable to initialize
+** the library (perhaps it is unable to allocate a needed resource such
+** as a mutex) it returns an [error code] other than [SQLITE_OK].
+**
+** ^The sqlite3_initialize() routine is called internally by many other
+** SQLite interfaces so that an application usually does not need to
+** invoke sqlite3_initialize() directly.  For example, [sqlite3_open()]
+** calls sqlite3_initialize() so the SQLite library will be automatically
+** initialized when [sqlite3_open()] is called if it has not be initialized
+** already.  ^However, if SQLite is compiled with the [SQLITE_OMIT_AUTOINIT]
+** compile-time option, then the automatic calls to sqlite3_initialize()
+** are omitted and the application must call sqlite3_initialize() directly
+** prior to using any other SQLite interface.  For maximum portability,
+** it is recommended that applications always invoke sqlite3_initialize()
+** directly prior to using any other SQLite interface.  Future releases
+** of SQLite may require this.  In other words, the behavior exhibited
+** when SQLite is compiled with [SQLITE_OMIT_AUTOINIT] might become the
+** default behavior in some future release of SQLite.
+**
+** The sqlite3_os_init() routine does operating-system specific
+** initialization of the SQLite library.  The sqlite3_os_end()
+** routine undoes the effect of sqlite3_os_init().  Typical tasks
+** performed by these routines include allocation or deallocation
+** of static resources, initialization of global variables,
+** setting up a default [sqlite3_vfs] module, or setting up
+** a default configuration using [sqlite3_config()].
+**
+** The application should never invoke either sqlite3_os_init()
+** or sqlite3_os_end() directly.  The application should only invoke
+** sqlite3_initialize() and sqlite3_shutdown().  The sqlite3_os_init()
+** interface is called automatically by sqlite3_initialize() and
+** sqlite3_os_end() is called by sqlite3_shutdown().  Appropriate
+** implementations for sqlite3_os_init() and sqlite3_os_end()
+** are built into SQLite when it is compiled for Unix, Windows, or OS/2.
+** When [custom builds | built for other platforms]
+** (using the [SQLITE_OS_OTHER=1] compile-time
+** option) the application must supply a suitable implementation for
+** sqlite3_os_init() and sqlite3_os_end().  An application-supplied
+** implementation of sqlite3_os_init() or sqlite3_os_end()
+** must return [SQLITE_OK] on success and some other [error code] upon
+** failure.
+*/
+SQLITE_API int sqlite3_initialize(void);
+SQLITE_API int sqlite3_shutdown(void);
+SQLITE_API int sqlite3_os_init(void);
+SQLITE_API int sqlite3_os_end(void);
+
+/*
+** CAPI3REF: Configuring The SQLite Library
+**
+** The sqlite3_config() interface is used to make global configuration
+** changes to SQLite in order to tune SQLite to the specific needs of
+** the application.  The default configuration is recommended for most
+** applications and so this routine is usually not necessary.  It is
+** provided to support rare applications with unusual needs.
+**
+** The sqlite3_config() interface is not threadsafe.  The application
+** must insure that no other SQLite interfaces are invoked by other
+** threads while sqlite3_config() is running.  Furthermore, sqlite3_config()
+** may only be invoked prior to library initialization using
+** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
+** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
+** [sqlite3_shutdown()] then it will return SQLITE_MISUSE.
+** Note, however, that ^sqlite3_config() can be called as part of the
+** implementation of an application-defined [sqlite3_os_init()].
+**
+** The first argument to sqlite3_config() is an integer
+** [configuration option] that determines
+** what property of SQLite is to be configured.  Subsequent arguments
+** vary depending on the [configuration option]
+** in the first argument.
+**
+** ^When a configuration option is set, sqlite3_config() returns [SQLITE_OK].
+** ^If the option is unknown or SQLite is unable to set the option
+** then this routine returns a non-zero [error code].
+*/
+SQLITE_API int sqlite3_config(int, ...);
+
+/*
+** CAPI3REF: Configure database connections
+**
+** The sqlite3_db_config() interface is used to make configuration
+** changes to a [database connection].  The interface is similar to
+** [sqlite3_config()] except that the changes apply to a single
+** [database connection] (specified in the first argument).
+**
+** The second argument to sqlite3_db_config(D,V,...)  is the
+** [SQLITE_DBCONFIG_LOOKASIDE | configuration verb] - an integer code 
+** that indicates what aspect of the [database connection] is being configured.
+** Subsequent arguments vary depending on the configuration verb.
+**
+** ^Calls to sqlite3_db_config() return SQLITE_OK if and only if
+** the call is considered successful.
+*/
+SQLITE_API int sqlite3_db_config(sqlite3*, int op, ...);
+
+/*
+** CAPI3REF: Memory Allocation Routines
+**
+** An instance of this object defines the interface between SQLite
+** and low-level memory allocation routines.
+**
+** This object is used in only one place in the SQLite interface.
+** A pointer to an instance of this object is the argument to
+** [sqlite3_config()] when the configuration option is
+** [SQLITE_CONFIG_MALLOC] or [SQLITE_CONFIG_GETMALLOC].  
+** By creating an instance of this object
+** and passing it to [sqlite3_config]([SQLITE_CONFIG_MALLOC])
+** during configuration, an application can specify an alternative
+** memory allocation subsystem for SQLite to use for all of its
+** dynamic memory needs.
+**
+** Note that SQLite comes with several [built-in memory allocators]
+** that are perfectly adequate for the overwhelming majority of applications
+** and that this object is only useful to a tiny minority of applications
+** with specialized memory allocation requirements.  This object is
+** also used during testing of SQLite in order to specify an alternative
+** memory allocator that simulates memory out-of-memory conditions in
+** order to verify that SQLite recovers gracefully from such
+** conditions.
+**
+** The xMalloc, xRealloc, and xFree methods must work like the
+** malloc(), realloc() and free() functions from the standard C library.
+** ^SQLite guarantees that the second argument to
+** xRealloc is always a value returned by a prior call to xRoundup.
+**
+** xSize should return the allocated size of a memory allocation
+** previously obtained from xMalloc or xRealloc.  The allocated size
+** is always at least as big as the requested size but may be larger.
+**
+** The xRoundup method returns what would be the allocated size of
+** a memory allocation given a particular requested size.  Most memory
+** allocators round up memory allocations at least to the next multiple
+** of 8.  Some allocators round up to a larger multiple or to a power of 2.
+** Every memory allocation request coming in through [sqlite3_malloc()]
+** or [sqlite3_realloc()] first calls xRoundup.  If xRoundup returns 0, 
+** that causes the corresponding memory allocation to fail.
+**
+** The xInit method initializes the memory allocator.  For example,
+** it might allocate any require mutexes or initialize internal data
+** structures.  The xShutdown method is invoked (indirectly) by
+** [sqlite3_shutdown()] and should deallocate any resources acquired
+** by xInit.  The pAppData pointer is used as the only parameter to
+** xInit and xShutdown.
+**
+** SQLite holds the [SQLITE_MUTEX_STATIC_MASTER] mutex when it invokes
+** the xInit method, so the xInit method need not be threadsafe.  The
+** xShutdown method is only called from [sqlite3_shutdown()] so it does
+** not need to be threadsafe either.  For all other methods, SQLite
+** holds the [SQLITE_MUTEX_STATIC_MEM] mutex as long as the
+** [SQLITE_CONFIG_MEMSTATUS] configuration option is turned on (which
+** it is by default) and so the methods are automatically serialized.
+** However, if [SQLITE_CONFIG_MEMSTATUS] is disabled, then the other
+** methods must be threadsafe or else make their own arrangements for
+** serialization.
+**
+** SQLite will never invoke xInit() more than once without an intervening
+** call to xShutdown().
+*/
+typedef struct sqlite3_mem_methods sqlite3_mem_methods;
+struct sqlite3_mem_methods {
+  void *(*xMalloc)(int);         /* Memory allocation function */
+  void (*xFree)(void*);          /* Free a prior allocation */
+  void *(*xRealloc)(void*,int);  /* Resize an allocation */
+  int (*xSize)(void*);           /* Return the size of an allocation */
+  int (*xRoundup)(int);          /* Round up request size to allocation size */
+  int (*xInit)(void*);           /* Initialize the memory allocator */
+  void (*xShutdown)(void*);      /* Deinitialize the memory allocator */
+  void *pAppData;                /* Argument to xInit() and xShutdown() */
+};
+
+/*
+** CAPI3REF: Configuration Options
+** KEYWORDS: {configuration option}
+**
+** These constants are the available integer configuration options that
+** can be passed as the first argument to the [sqlite3_config()] interface.
+**
+** New configuration options may be added in future releases of SQLite.
+** Existing configuration options might be discontinued.  Applications
+** should check the return code from [sqlite3_config()] to make sure that
+** the call worked.  The [sqlite3_config()] interface will return a
+** non-zero [error code] if a discontinued or unsupported configuration option
+** is invoked.
+**
+** <dl>
+** [[SQLITE_CONFIG_SINGLETHREAD]] <dt>SQLITE_CONFIG_SINGLETHREAD</dt>
+** <dd>There are no arguments to this option.  ^This option sets the
+** [threading mode] to Single-thread.  In other words, it disables
+** all mutexing and puts SQLite into a mode where it can only be used
+** by a single thread.   ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** it is not possible to change the [threading mode] from its default
+** value of Single-thread and so [sqlite3_config()] will return 
+** [SQLITE_ERROR] if called with the SQLITE_CONFIG_SINGLETHREAD
+** configuration option.</dd>
+**
+** [[SQLITE_CONFIG_MULTITHREAD]] <dt>SQLITE_CONFIG_MULTITHREAD</dt>
+** <dd>There are no arguments to this option.  ^This option sets the
+** [threading mode] to Multi-thread.  In other words, it disables
+** mutexing on [database connection] and [prepared statement] objects.
+** The application is responsible for serializing access to
+** [database connections] and [prepared statements].  But other mutexes
+** are enabled so that SQLite will be safe to use in a multi-threaded
+** environment as long as no two threads attempt to use the same
+** [database connection] at the same time.  ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** it is not possible to set the Multi-thread [threading mode] and
+** [sqlite3_config()] will return [SQLITE_ERROR] if called with the
+** SQLITE_CONFIG_MULTITHREAD configuration option.</dd>
+**
+** [[SQLITE_CONFIG_SERIALIZED]] <dt>SQLITE_CONFIG_SERIALIZED</dt>
+** <dd>There are no arguments to this option.  ^This option sets the
+** [threading mode] to Serialized. In other words, this option enables
+** all mutexes including the recursive
+** mutexes on [database connection] and [prepared statement] objects.
+** In this mode (which is the default when SQLite is compiled with
+** [SQLITE_THREADSAFE=1]) the SQLite library will itself serialize access
+** to [database connections] and [prepared statements] so that the
+** application is free to use the same [database connection] or the
+** same [prepared statement] in different threads at the same time.
+** ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** it is not possible to set the Serialized [threading mode] and
+** [sqlite3_config()] will return [SQLITE_ERROR] if called with the
+** SQLITE_CONFIG_SERIALIZED configuration option.</dd>
+**
+** [[SQLITE_CONFIG_MALLOC]] <dt>SQLITE_CONFIG_MALLOC</dt>
+** <dd> ^(The SQLITE_CONFIG_MALLOC option takes a single argument which is 
+** a pointer to an instance of the [sqlite3_mem_methods] structure.
+** The argument specifies
+** alternative low-level memory allocation routines to be used in place of
+** the memory allocation routines built into SQLite.)^ ^SQLite makes
+** its own private copy of the content of the [sqlite3_mem_methods] structure
+** before the [sqlite3_config()] call returns.</dd>
+**
+** [[SQLITE_CONFIG_GETMALLOC]] <dt>SQLITE_CONFIG_GETMALLOC</dt>
+** <dd> ^(The SQLITE_CONFIG_GETMALLOC option takes a single argument which
+** is a pointer to an instance of the [sqlite3_mem_methods] structure.
+** The [sqlite3_mem_methods]
+** structure is filled with the currently defined memory allocation routines.)^
+** This option can be used to overload the default memory allocation
+** routines with a wrapper that simulations memory allocation failure or
+** tracks memory usage, for example. </dd>
+**
+** [[SQLITE_CONFIG_MEMSTATUS]] <dt>SQLITE_CONFIG_MEMSTATUS</dt>
+** <dd> ^The SQLITE_CONFIG_MEMSTATUS option takes single argument of type int,
+** interpreted as a boolean, which enables or disables the collection of
+** memory allocation statistics. ^(When memory allocation statistics are
+** disabled, the following SQLite interfaces become non-operational:
+**   <ul>
+**   <li> [sqlite3_memory_used()]
+**   <li> [sqlite3_memory_highwater()]
+**   <li> [sqlite3_soft_heap_limit64()]
+**   <li> [sqlite3_status()]
+**   </ul>)^
+** ^Memory allocation statistics are enabled by default unless SQLite is
+** compiled with [SQLITE_DEFAULT_MEMSTATUS]=0 in which case memory
+** allocation statistics are disabled by default.
+** </dd>
+**
+** [[SQLITE_CONFIG_SCRATCH]] <dt>SQLITE_CONFIG_SCRATCH</dt>
+** <dd> ^The SQLITE_CONFIG_SCRATCH option specifies a static memory buffer
+** that SQLite can use for scratch memory.  ^(There are three arguments
+** to SQLITE_CONFIG_SCRATCH:  A pointer an 8-byte
+** aligned memory buffer from which the scratch allocations will be
+** drawn, the size of each scratch allocation (sz),
+** and the maximum number of scratch allocations (N).)^
+** The first argument must be a pointer to an 8-byte aligned buffer
+** of at least sz*N bytes of memory.
+** ^SQLite will not use more than one scratch buffers per thread.
+** ^SQLite will never request a scratch buffer that is more than 6
+** times the database page size.
+** ^If SQLite needs needs additional
+** scratch memory beyond what is provided by this configuration option, then 
+** [sqlite3_malloc()] will be used to obtain the memory needed.<p>
+** ^When the application provides any amount of scratch memory using
+** SQLITE_CONFIG_SCRATCH, SQLite avoids unnecessary large
+** [sqlite3_malloc|heap allocations].
+** This can help [Robson proof|prevent memory allocation failures] due to heap
+** fragmentation in low-memory embedded systems.
+** </dd>
+**
+** [[SQLITE_CONFIG_PAGECACHE]] <dt>SQLITE_CONFIG_PAGECACHE</dt>
+** <dd> ^The SQLITE_CONFIG_PAGECACHE option specifies a static memory buffer
+** that SQLite can use for the database page cache with the default page
+** cache implementation.  
+** This configuration should not be used if an application-define page
+** cache implementation is loaded using the [SQLITE_CONFIG_PCACHE2]
+** configuration option.
+** ^There are three arguments to SQLITE_CONFIG_PAGECACHE: A pointer to
+** 8-byte aligned
+** memory, the size of each page buffer (sz), and the number of pages (N).
+** The sz argument should be the size of the largest database page
+** (a power of two between 512 and 65536) plus some extra bytes for each
+** page header.  ^The number of extra bytes needed by the page header
+** can be determined using the [SQLITE_CONFIG_PCACHE_HDRSZ] option 
+** to [sqlite3_config()].
+** ^It is harmless, apart from the wasted memory,
+** for the sz parameter to be larger than necessary.  The first
+** argument should pointer to an 8-byte aligned block of memory that
+** is at least sz*N bytes of memory, otherwise subsequent behavior is
+** undefined.
+** ^SQLite will use the memory provided by the first argument to satisfy its
+** memory needs for the first N pages that it adds to cache.  ^If additional
+** page cache memory is needed beyond what is provided by this option, then
+** SQLite goes to [sqlite3_malloc()] for the additional storage space.</dd>
+**
+** [[SQLITE_CONFIG_HEAP]] <dt>SQLITE_CONFIG_HEAP</dt>
+** <dd> ^The SQLITE_CONFIG_HEAP option specifies a static memory buffer 
+** that SQLite will use for all of its dynamic memory allocation needs
+** beyond those provided for by [SQLITE_CONFIG_SCRATCH] and
+** [SQLITE_CONFIG_PAGECACHE].
+** ^The SQLITE_CONFIG_HEAP option is only available if SQLite is compiled
+** with either [SQLITE_ENABLE_MEMSYS3] or [SQLITE_ENABLE_MEMSYS5] and returns
+** [SQLITE_ERROR] if invoked otherwise.
+** ^There are three arguments to SQLITE_CONFIG_HEAP:
+** An 8-byte aligned pointer to the memory,
+** the number of bytes in the memory buffer, and the minimum allocation size.
+** ^If the first pointer (the memory pointer) is NULL, then SQLite reverts
+** to using its default memory allocator (the system malloc() implementation),
+** undoing any prior invocation of [SQLITE_CONFIG_MALLOC].  ^If the
+** memory pointer is not NULL then the alternative memory
+** allocator is engaged to handle all of SQLites memory allocation needs.
+** The first pointer (the memory pointer) must be aligned to an 8-byte
+** boundary or subsequent behavior of SQLite will be undefined.
+** The minimum allocation size is capped at 2**12. Reasonable values
+** for the minimum allocation size are 2**5 through 2**8.</dd>
+**
+** [[SQLITE_CONFIG_MUTEX]] <dt>SQLITE_CONFIG_MUTEX</dt>
+** <dd> ^(The SQLITE_CONFIG_MUTEX option takes a single argument which is a
+** pointer to an instance of the [sqlite3_mutex_methods] structure.
+** The argument specifies alternative low-level mutex routines to be used
+** in place the mutex routines built into SQLite.)^  ^SQLite makes a copy of
+** the content of the [sqlite3_mutex_methods] structure before the call to
+** [sqlite3_config()] returns. ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** the entire mutexing subsystem is omitted from the build and hence calls to
+** [sqlite3_config()] with the SQLITE_CONFIG_MUTEX configuration option will
+** return [SQLITE_ERROR].</dd>
+**
+** [[SQLITE_CONFIG_GETMUTEX]] <dt>SQLITE_CONFIG_GETMUTEX</dt>
+** <dd> ^(The SQLITE_CONFIG_GETMUTEX option takes a single argument which
+** is a pointer to an instance of the [sqlite3_mutex_methods] structure.  The
+** [sqlite3_mutex_methods]
+** structure is filled with the currently defined mutex routines.)^
+** This option can be used to overload the default mutex allocation
+** routines with a wrapper used to track mutex usage for performance
+** profiling or testing, for example.   ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** the entire mutexing subsystem is omitted from the build and hence calls to
+** [sqlite3_config()] with the SQLITE_CONFIG_GETMUTEX configuration option will
+** return [SQLITE_ERROR].</dd>
+**
+** [[SQLITE_CONFIG_LOOKASIDE]] <dt>SQLITE_CONFIG_LOOKASIDE</dt>
+** <dd> ^(The SQLITE_CONFIG_LOOKASIDE option takes two arguments that determine
+** the default size of lookaside memory on each [database connection].
+** The first argument is the
+** size of each lookaside buffer slot and the second is the number of
+** slots allocated to each database connection.)^  ^(SQLITE_CONFIG_LOOKASIDE
+** sets the <i>default</i> lookaside size. The [SQLITE_DBCONFIG_LOOKASIDE]
+** option to [sqlite3_db_config()] can be used to change the lookaside
+** configuration on individual connections.)^ </dd>
+**
+** [[SQLITE_CONFIG_PCACHE2]] <dt>SQLITE_CONFIG_PCACHE2</dt>
+** <dd> ^(The SQLITE_CONFIG_PCACHE2 option takes a single argument which is 
+** a pointer to an [sqlite3_pcache_methods2] object.  This object specifies
+** the interface to a custom page cache implementation.)^
+** ^SQLite makes a copy of the [sqlite3_pcache_methods2] object.</dd>
+**
+** [[SQLITE_CONFIG_GETPCACHE2]] <dt>SQLITE_CONFIG_GETPCACHE2</dt>
+** <dd> ^(The SQLITE_CONFIG_GETPCACHE2 option takes a single argument which
+** is a pointer to an [sqlite3_pcache_methods2] object.  SQLite copies of
+** the current page cache implementation into that object.)^ </dd>
+**
+** [[SQLITE_CONFIG_LOG]] <dt>SQLITE_CONFIG_LOG</dt>
+** <dd> The SQLITE_CONFIG_LOG option is used to configure the SQLite
+** global [error log].
+** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a
+** function with a call signature of void(*)(void*,int,const char*), 
+** and a pointer to void. ^If the function pointer is not NULL, it is
+** invoked by [sqlite3_log()] to process each logging event.  ^If the
+** function pointer is NULL, the [sqlite3_log()] interface becomes a no-op.
+** ^The void pointer that is the second argument to SQLITE_CONFIG_LOG is
+** passed through as the first parameter to the application-defined logger
+** function whenever that function is invoked.  ^The second parameter to
+** the logger function is a copy of the first parameter to the corresponding
+** [sqlite3_log()] call and is intended to be a [result code] or an
+** [extended result code].  ^The third parameter passed to the logger is
+** log message after formatting via [sqlite3_snprintf()].
+** The SQLite logging interface is not reentrant; the logger function
+** supplied by the application must not invoke any SQLite interface.
+** In a multi-threaded application, the application-defined logger
+** function must be threadsafe. </dd>
+**
+** [[SQLITE_CONFIG_URI]] <dt>SQLITE_CONFIG_URI
+** <dd>^(The SQLITE_CONFIG_URI option takes a single argument of type int.
+** If non-zero, then URI handling is globally enabled. If the parameter is zero,
+** then URI handling is globally disabled.)^ ^If URI handling is globally
+** enabled, all filenames passed to [sqlite3_open()], [sqlite3_open_v2()],
+** [sqlite3_open16()] or
+** specified as part of [ATTACH] commands are interpreted as URIs, regardless
+** of whether or not the [SQLITE_OPEN_URI] flag is set when the database
+** connection is opened. ^If it is globally disabled, filenames are
+** only interpreted as URIs if the SQLITE_OPEN_URI flag is set when the
+** database connection is opened. ^(By default, URI handling is globally
+** disabled. The default value may be changed by compiling with the
+** [SQLITE_USE_URI] symbol defined.)^
+**
+** [[SQLITE_CONFIG_COVERING_INDEX_SCAN]] <dt>SQLITE_CONFIG_COVERING_INDEX_SCAN
+** <dd>^The SQLITE_CONFIG_COVERING_INDEX_SCAN option takes a single integer
+** argument which is interpreted as a boolean in order to enable or disable
+** the use of covering indices for full table scans in the query optimizer.
+** ^The default setting is determined
+** by the [SQLITE_ALLOW_COVERING_INDEX_SCAN] compile-time option, or is "on"
+** if that compile-time option is omitted.
+** The ability to disable the use of covering indices for full table scans
+** is because some incorrectly coded legacy applications might malfunction
+** when the optimization is enabled.  Providing the ability to
+** disable the optimization allows the older, buggy application code to work
+** without change even with newer versions of SQLite.
+**
+** [[SQLITE_CONFIG_PCACHE]] [[SQLITE_CONFIG_GETPCACHE]]
+** <dt>SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE
+** <dd> These options are obsolete and should not be used by new code.
+** They are retained for backwards compatibility but are now no-ops.
+** </dd>
+**
+** [[SQLITE_CONFIG_SQLLOG]]
+** <dt>SQLITE_CONFIG_SQLLOG
+** <dd>This option is only available if sqlite is compiled with the
+** [SQLITE_ENABLE_SQLLOG] pre-processor macro defined. The first argument should
+** be a pointer to a function of type void(*)(void*,sqlite3*,const char*, int).
+** The second should be of type (void*). The callback is invoked by the library
+** in three separate circumstances, identified by the value passed as the
+** fourth parameter. If the fourth parameter is 0, then the database connection
+** passed as the second argument has just been opened. The third argument
+** points to a buffer containing the name of the main database file. If the
+** fourth parameter is 1, then the SQL statement that the third parameter
+** points to has just been executed. Or, if the fourth parameter is 2, then
+** the connection being passed as the second parameter is being closed. The
+** third parameter is passed NULL In this case.  An example of using this
+** configuration option can be seen in the "test_sqllog.c" source file in
+** the canonical SQLite source tree.</dd>
+**
+** [[SQLITE_CONFIG_MMAP_SIZE]]
+** <dt>SQLITE_CONFIG_MMAP_SIZE
+** <dd>^SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values
+** that are the default mmap size limit (the default setting for
+** [PRAGMA mmap_size]) and the maximum allowed mmap size limit.
+** ^The default setting can be overridden by each database connection using
+** either the [PRAGMA mmap_size] command, or by using the
+** [SQLITE_FCNTL_MMAP_SIZE] file control.  ^(The maximum allowed mmap size
+** will be silently truncated if necessary so that it does not exceed the
+** compile-time maximum mmap size set by the
+** [SQLITE_MAX_MMAP_SIZE] compile-time option.)^
+** ^If either argument to this option is negative, then that argument is
+** changed to its compile-time default.
+**
+** [[SQLITE_CONFIG_WIN32_HEAPSIZE]]
+** <dt>SQLITE_CONFIG_WIN32_HEAPSIZE
+** <dd>^The SQLITE_CONFIG_WIN32_HEAPSIZE option is only available if SQLite is
+** compiled for Windows with the [SQLITE_WIN32_MALLOC] pre-processor macro
+** defined. ^SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit unsigned integer value
+** that specifies the maximum size of the created heap.
+** </dl>
+**
+** [[SQLITE_CONFIG_PCACHE_HDRSZ]]
+** <dt>SQLITE_CONFIG_PCACHE_HDRSZ
+** <dd>^The SQLITE_CONFIG_PCACHE_HDRSZ option takes a single parameter which
+** is a pointer to an integer and writes into that integer the number of extra
+** bytes per page required for each page in [SQLITE_CONFIG_PAGECACHE].
+** The amount of extra space required can change depending on the compiler,
+** target platform, and SQLite version.
+**
+** [[SQLITE_CONFIG_PMASZ]]
+** <dt>SQLITE_CONFIG_PMASZ
+** <dd>^The SQLITE_CONFIG_PMASZ option takes a single parameter which
+** is an unsigned integer and sets the "Minimum PMA Size" for the multithreaded
+** sorter to that integer.  The default minimum PMA Size is set by the
+** [SQLITE_SORTER_PMASZ] compile-time option.  New threads are launched
+** to help with sort operations when multithreaded sorting
+** is enabled (using the [PRAGMA threads] command) and the amount of content
+** to be sorted exceeds the page size times the minimum of the
+** [PRAGMA cache_size] setting and this value.
+** </dl>
+*/
+#define SQLITE_CONFIG_SINGLETHREAD  1  /* nil */
+#define SQLITE_CONFIG_MULTITHREAD   2  /* nil */
+#define SQLITE_CONFIG_SERIALIZED    3  /* nil */
+#define SQLITE_CONFIG_MALLOC        4  /* sqlite3_mem_methods* */
+#define SQLITE_CONFIG_GETMALLOC     5  /* sqlite3_mem_methods* */
+#define SQLITE_CONFIG_SCRATCH       6  /* void*, int sz, int N */
+#define SQLITE_CONFIG_PAGECACHE     7  /* void*, int sz, int N */
+#define SQLITE_CONFIG_HEAP          8  /* void*, int nByte, int min */
+#define SQLITE_CONFIG_MEMSTATUS     9  /* boolean */
+#define SQLITE_CONFIG_MUTEX        10  /* sqlite3_mutex_methods* */
+#define SQLITE_CONFIG_GETMUTEX     11  /* sqlite3_mutex_methods* */
+/* previously SQLITE_CONFIG_CHUNKALLOC 12 which is now unused. */ 
+#define SQLITE_CONFIG_LOOKASIDE    13  /* int int */
+#define SQLITE_CONFIG_PCACHE       14  /* no-op */
+#define SQLITE_CONFIG_GETPCACHE    15  /* no-op */
+#define SQLITE_CONFIG_LOG          16  /* xFunc, void* */
+#define SQLITE_CONFIG_URI          17  /* int */
+#define SQLITE_CONFIG_PCACHE2      18  /* sqlite3_pcache_methods2* */
+#define SQLITE_CONFIG_GETPCACHE2   19  /* sqlite3_pcache_methods2* */
+#define SQLITE_CONFIG_COVERING_INDEX_SCAN 20  /* int */
+#define SQLITE_CONFIG_SQLLOG       21  /* xSqllog, void* */
+#define SQLITE_CONFIG_MMAP_SIZE    22  /* sqlite3_int64, sqlite3_int64 */
+#define SQLITE_CONFIG_WIN32_HEAPSIZE      23  /* int nByte */
+#define SQLITE_CONFIG_PCACHE_HDRSZ        24  /* int *psz */
+#define SQLITE_CONFIG_PMASZ               25  /* unsigned int szPma */
+
+/*
+** CAPI3REF: Database Connection Configuration Options
+**
+** These constants are the available integer configuration options that
+** can be passed as the second argument to the [sqlite3_db_config()] interface.
+**
+** New configuration options may be added in future releases of SQLite.
+** Existing configuration options might be discontinued.  Applications
+** should check the return code from [sqlite3_db_config()] to make sure that
+** the call worked.  ^The [sqlite3_db_config()] interface will return a
+** non-zero [error code] if a discontinued or unsupported configuration option
+** is invoked.
+**
+** <dl>
+** <dt>SQLITE_DBCONFIG_LOOKASIDE</dt>
+** <dd> ^This option takes three additional arguments that determine the 
+** [lookaside memory allocator] configuration for the [database connection].
+** ^The first argument (the third parameter to [sqlite3_db_config()] is a
+** pointer to a memory buffer to use for lookaside memory.
+** ^The first argument after the SQLITE_DBCONFIG_LOOKASIDE verb
+** may be NULL in which case SQLite will allocate the
+** lookaside buffer itself using [sqlite3_malloc()]. ^The second argument is the
+** size of each lookaside buffer slot.  ^The third argument is the number of
+** slots.  The size of the buffer in the first argument must be greater than
+** or equal to the product of the second and third arguments.  The buffer
+** must be aligned to an 8-byte boundary.  ^If the second argument to
+** SQLITE_DBCONFIG_LOOKASIDE is not a multiple of 8, it is internally
+** rounded down to the next smaller multiple of 8.  ^(The lookaside memory
+** configuration for a database connection can only be changed when that
+** connection is not currently using lookaside memory, or in other words
+** when the "current value" returned by
+** [sqlite3_db_status](D,[SQLITE_CONFIG_LOOKASIDE],...) is zero.
+** Any attempt to change the lookaside memory configuration when lookaside
+** memory is in use leaves the configuration unchanged and returns 
+** [SQLITE_BUSY].)^</dd>
+**
+** <dt>SQLITE_DBCONFIG_ENABLE_FKEY</dt>
+** <dd> ^This option is used to enable or disable the enforcement of
+** [foreign key constraints].  There should be two additional arguments.
+** The first argument is an integer which is 0 to disable FK enforcement,
+** positive to enable FK enforcement or negative to leave FK enforcement
+** unchanged.  The second parameter is a pointer to an integer into which
+** is written 0 or 1 to indicate whether FK enforcement is off or on
+** following this call.  The second parameter may be a NULL pointer, in
+** which case the FK enforcement setting is not reported back. </dd>
+**
+** <dt>SQLITE_DBCONFIG_ENABLE_TRIGGER</dt>
+** <dd> ^This option is used to enable or disable [CREATE TRIGGER | triggers].
+** There should be two additional arguments.
+** The first argument is an integer which is 0 to disable triggers,
+** positive to enable triggers or negative to leave the setting unchanged.
+** The second parameter is a pointer to an integer into which
+** is written 0 or 1 to indicate whether triggers are disabled or enabled
+** following this call.  The second parameter may be a NULL pointer, in
+** which case the trigger setting is not reported back. </dd>
+**
+** </dl>
+*/
+#define SQLITE_DBCONFIG_LOOKASIDE       1001  /* void* int int */
+#define SQLITE_DBCONFIG_ENABLE_FKEY     1002  /* int int* */
+#define SQLITE_DBCONFIG_ENABLE_TRIGGER  1003  /* int int* */
+
+
+/*
+** CAPI3REF: Enable Or Disable Extended Result Codes
+**
+** ^The sqlite3_extended_result_codes() routine enables or disables the
+** [extended result codes] feature of SQLite. ^The extended result
+** codes are disabled by default for historical compatibility.
+*/
+SQLITE_API int sqlite3_extended_result_codes(sqlite3*, int onoff);
+
+/*
+** CAPI3REF: Last Insert Rowid
+**
+** ^Each entry in most SQLite tables (except for [WITHOUT ROWID] tables)
+** has a unique 64-bit signed
+** integer key called the [ROWID | "rowid"]. ^The rowid is always available
+** as an undeclared column named ROWID, OID, or _ROWID_ as long as those
+** names are not also used by explicitly declared columns. ^If
+** the table has a column of type [INTEGER PRIMARY KEY] then that column
+** is another alias for the rowid.
+**
+** ^The sqlite3_last_insert_rowid(D) interface returns the [rowid] of the 
+** most recent successful [INSERT] into a rowid table or [virtual table]
+** on database connection D.
+** ^Inserts into [WITHOUT ROWID] tables are not recorded.
+** ^If no successful [INSERT]s into rowid tables
+** have ever occurred on the database connection D, 
+** then sqlite3_last_insert_rowid(D) returns zero.
+**
+** ^(If an [INSERT] occurs within a trigger or within a [virtual table]
+** method, then this routine will return the [rowid] of the inserted
+** row as long as the trigger or virtual table method is running.
+** But once the trigger or virtual table method ends, the value returned 
+** by this routine reverts to what it was before the trigger or virtual
+** table method began.)^
+**
+** ^An [INSERT] that fails due to a constraint violation is not a
+** successful [INSERT] and does not change the value returned by this
+** routine.  ^Thus INSERT OR FAIL, INSERT OR IGNORE, INSERT OR ROLLBACK,
+** and INSERT OR ABORT make no changes to the return value of this
+** routine when their insertion fails.  ^(When INSERT OR REPLACE
+** encounters a constraint violation, it does not fail.  The
+** INSERT continues to completion after deleting rows that caused
+** the constraint problem so INSERT OR REPLACE will always change
+** the return value of this interface.)^
+**
+** ^For the purposes of this routine, an [INSERT] is considered to
+** be successful even if it is subsequently rolled back.
+**
+** This function is accessible to SQL statements via the
+** [last_insert_rowid() SQL function].
+**
+** If a separate thread performs a new [INSERT] on the same
+** database connection while the [sqlite3_last_insert_rowid()]
+** function is running and thus changes the last insert [rowid],
+** then the value returned by [sqlite3_last_insert_rowid()] is
+** unpredictable and might not equal either the old or the new
+** last insert [rowid].
+*/
+SQLITE_API sqlite3_int64 sqlite3_last_insert_rowid(sqlite3*);
+
+/*
+** CAPI3REF: Count The Number Of Rows Modified
+**
+** ^This function returns the number of rows modified, inserted or
+** deleted by the most recently completed INSERT, UPDATE or DELETE
+** statement on the database connection specified by the only parameter.
+** ^Executing any other type of SQL statement does not modify the value
+** returned by this function.
+**
+** ^Only changes made directly by the INSERT, UPDATE or DELETE statement are
+** considered - auxiliary changes caused by [CREATE TRIGGER | triggers], 
+** [foreign key actions] or [REPLACE] constraint resolution are not counted.
+** 
+** Changes to a view that are intercepted by 
+** [INSTEAD OF trigger | INSTEAD OF triggers] are not counted. ^The value 
+** returned by sqlite3_changes() immediately after an INSERT, UPDATE or 
+** DELETE statement run on a view is always zero. Only changes made to real 
+** tables are counted.
+**
+** Things are more complicated if the sqlite3_changes() function is
+** executed while a trigger program is running. This may happen if the
+** program uses the [changes() SQL function], or if some other callback
+** function invokes sqlite3_changes() directly. Essentially:
+** 
+** <ul>
+**   <li> ^(Before entering a trigger program the value returned by
+**        sqlite3_changes() function is saved. After the trigger program 
+**        has finished, the original value is restored.)^
+** 
+**   <li> ^(Within a trigger program each INSERT, UPDATE and DELETE 
+**        statement sets the value returned by sqlite3_changes() 
+**        upon completion as normal. Of course, this value will not include 
+**        any changes performed by sub-triggers, as the sqlite3_changes() 
+**        value will be saved and restored after each sub-trigger has run.)^
+** </ul>
+** 
+** ^This means that if the changes() SQL function (or similar) is used
+** by the first INSERT, UPDATE or DELETE statement within a trigger, it 
+** returns the value as set when the calling statement began executing.
+** ^If it is used by the second or subsequent such statement within a trigger 
+** program, the value returned reflects the number of rows modified by the 
+** previous INSERT, UPDATE or DELETE statement within the same trigger.
+**
+** See also the [sqlite3_total_changes()] interface, the
+** [count_changes pragma], and the [changes() SQL function].
+**
+** If a separate thread makes changes on the same database connection
+** while [sqlite3_changes()] is running then the value returned
+** is unpredictable and not meaningful.
+*/
+SQLITE_API int sqlite3_changes(sqlite3*);
+
+/*
+** CAPI3REF: Total Number Of Rows Modified
+**
+** ^This function returns the total number of rows inserted, modified or
+** deleted by all [INSERT], [UPDATE] or [DELETE] statements completed
+** since the database connection was opened, including those executed as
+** part of trigger programs. ^Executing any other type of SQL statement
+** does not affect the value returned by sqlite3_total_changes().
+** 
+** ^Changes made as part of [foreign key actions] are included in the
+** count, but those made as part of REPLACE constraint resolution are
+** not. ^Changes to a view that are intercepted by INSTEAD OF triggers 
+** are not counted.
+** 
+** See also the [sqlite3_changes()] interface, the
+** [count_changes pragma], and the [total_changes() SQL function].
+**
+** If a separate thread makes changes on the same database connection
+** while [sqlite3_total_changes()] is running then the value
+** returned is unpredictable and not meaningful.
+*/
+SQLITE_API int sqlite3_total_changes(sqlite3*);
+
+/*
+** CAPI3REF: Interrupt A Long-Running Query
+**
+** ^This function causes any pending database operation to abort and
+** return at its earliest opportunity. This routine is typically
+** called in response to a user action such as pressing "Cancel"
+** or Ctrl-C where the user wants a long query operation to halt
+** immediately.
+**
+** ^It is safe to call this routine from a thread different from the
+** thread that is currently running the database operation.  But it
+** is not safe to call this routine with a [database connection] that
+** is closed or might close before sqlite3_interrupt() returns.
+**
+** ^If an SQL operation is very nearly finished at the time when
+** sqlite3_interrupt() is called, then it might not have an opportunity
+** to be interrupted and might continue to completion.
+**
+** ^An SQL operation that is interrupted will return [SQLITE_INTERRUPT].
+** ^If the interrupted SQL operation is an INSERT, UPDATE, or DELETE
+** that is inside an explicit transaction, then the entire transaction
+** will be rolled back automatically.
+**
+** ^The sqlite3_interrupt(D) call is in effect until all currently running
+** SQL statements on [database connection] D complete.  ^Any new SQL statements
+** that are started after the sqlite3_interrupt() call and before the 
+** running statements reaches zero are interrupted as if they had been
+** running prior to the sqlite3_interrupt() call.  ^New SQL statements
+** that are started after the running statement count reaches zero are
+** not effected by the sqlite3_interrupt().
+** ^A call to sqlite3_interrupt(D) that occurs when there are no running
+** SQL statements is a no-op and has no effect on SQL statements
+** that are started after the sqlite3_interrupt() call returns.
+**
+** If the database connection closes while [sqlite3_interrupt()]
+** is running then bad things will likely happen.
+*/
+SQLITE_API void sqlite3_interrupt(sqlite3*);
+
+/*
+** CAPI3REF: Determine If An SQL Statement Is Complete
+**
+** These routines are useful during command-line input to determine if the
+** currently entered text seems to form a complete SQL statement or
+** if additional input is needed before sending the text into
+** SQLite for parsing.  ^These routines return 1 if the input string
+** appears to be a complete SQL statement.  ^A statement is judged to be
+** complete if it ends with a semicolon token and is not a prefix of a
+** well-formed CREATE TRIGGER statement.  ^Semicolons that are embedded within
+** string literals or quoted identifier names or comments are not
+** independent tokens (they are part of the token in which they are
+** embedded) and thus do not count as a statement terminator.  ^Whitespace
+** and comments that follow the final semicolon are ignored.
+**
+** ^These routines return 0 if the statement is incomplete.  ^If a
+** memory allocation fails, then SQLITE_NOMEM is returned.
+**
+** ^These routines do not parse the SQL statements thus
+** will not detect syntactically incorrect SQL.
+**
+** ^(If SQLite has not been initialized using [sqlite3_initialize()] prior 
+** to invoking sqlite3_complete16() then sqlite3_initialize() is invoked
+** automatically by sqlite3_complete16().  If that initialization fails,
+** then the return value from sqlite3_complete16() will be non-zero
+** regardless of whether or not the input SQL is complete.)^
+**
+** The input to [sqlite3_complete()] must be a zero-terminated
+** UTF-8 string.
+**
+** The input to [sqlite3_complete16()] must be a zero-terminated
+** UTF-16 string in native byte order.
+*/
+SQLITE_API int sqlite3_complete(const char *sql);
+SQLITE_API int sqlite3_complete16(const void *sql);
+
+/*
+** CAPI3REF: Register A Callback To Handle SQLITE_BUSY Errors
+** KEYWORDS: {busy-handler callback} {busy handler}
+**
+** ^The sqlite3_busy_handler(D,X,P) routine sets a callback function X
+** that might be invoked with argument P whenever
+** an attempt is made to access a database table associated with
+** [database connection] D when another thread
+** or process has the table locked.
+** The sqlite3_busy_handler() interface is used to implement
+** [sqlite3_busy_timeout()] and [PRAGMA busy_timeout].
+**
+** ^If the busy callback is NULL, then [SQLITE_BUSY]
+** is returned immediately upon encountering the lock.  ^If the busy callback
+** is not NULL, then the callback might be invoked with two arguments.
+**
+** ^The first argument to the busy handler is a copy of the void* pointer which
+** is the third argument to sqlite3_busy_handler().  ^The second argument to
+** the busy handler callback is the number of times that the busy handler has
+** been invoked previously for the same locking event.  ^If the
+** busy callback returns 0, then no additional attempts are made to
+** access the database and [SQLITE_BUSY] is returned
+** to the application.
+** ^If the callback returns non-zero, then another attempt
+** is made to access the database and the cycle repeats.
+**
+** The presence of a busy handler does not guarantee that it will be invoked
+** when there is lock contention. ^If SQLite determines that invoking the busy
+** handler could result in a deadlock, it will go ahead and return [SQLITE_BUSY]
+** to the application instead of invoking the 
+** busy handler.
+** Consider a scenario where one process is holding a read lock that
+** it is trying to promote to a reserved lock and
+** a second process is holding a reserved lock that it is trying
+** to promote to an exclusive lock.  The first process cannot proceed
+** because it is blocked by the second and the second process cannot
+** proceed because it is blocked by the first.  If both processes
+** invoke the busy handlers, neither will make any progress.  Therefore,
+** SQLite returns [SQLITE_BUSY] for the first process, hoping that this
+** will induce the first process to release its read lock and allow
+** the second process to proceed.
+**
+** ^The default busy callback is NULL.
+**
+** ^(There can only be a single busy handler defined for each
+** [database connection].  Setting a new busy handler clears any
+** previously set handler.)^  ^Note that calling [sqlite3_busy_timeout()]
+** or evaluating [PRAGMA busy_timeout=N] will change the
+** busy handler and thus clear any previously set busy handler.
+**
+** The busy callback should not take any actions which modify the
+** database connection that invoked the busy handler.  In other words,
+** the busy handler is not reentrant.  Any such actions
+** result in undefined behavior.
+** 
+** A busy handler must not close the database connection
+** or [prepared statement] that invoked the busy handler.
+*/
+SQLITE_API int sqlite3_busy_handler(sqlite3*, int(*)(void*,int), void*);
+
+/*
+** CAPI3REF: Set A Busy Timeout
+**
+** ^This routine sets a [sqlite3_busy_handler | busy handler] that sleeps
+** for a specified amount of time when a table is locked.  ^The handler
+** will sleep multiple times until at least "ms" milliseconds of sleeping
+** have accumulated.  ^After at least "ms" milliseconds of sleeping,
+** the handler returns 0 which causes [sqlite3_step()] to return
+** [SQLITE_BUSY].
+**
+** ^Calling this routine with an argument less than or equal to zero
+** turns off all busy handlers.
+**
+** ^(There can only be a single busy handler for a particular
+** [database connection] at any given moment.  If another busy handler
+** was defined  (using [sqlite3_busy_handler()]) prior to calling
+** this routine, that other busy handler is cleared.)^
+**
+** See also:  [PRAGMA busy_timeout]
+*/
+SQLITE_API int sqlite3_busy_timeout(sqlite3*, int ms);
+
+/*
+** CAPI3REF: Convenience Routines For Running Queries
+**
+** This is a legacy interface that is preserved for backwards compatibility.
+** Use of this interface is not recommended.
+**
+** Definition: A <b>result table</b> is memory data structure created by the
+** [sqlite3_get_table()] interface.  A result table records the
+** complete query results from one or more queries.
+**
+** The table conceptually has a number of rows and columns.  But
+** these numbers are not part of the result table itself.  These
+** numbers are obtained separately.  Let N be the number of rows
+** and M be the number of columns.
+**
+** A result table is an array of pointers to zero-terminated UTF-8 strings.
+** There are (N+1)*M elements in the array.  The first M pointers point
+** to zero-terminated strings that  contain the names of the columns.
+** The remaining entries all point to query results.  NULL values result
+** in NULL pointers.  All other values are in their UTF-8 zero-terminated
+** string representation as returned by [sqlite3_column_text()].
+**
+** A result table might consist of one or more memory allocations.
+** It is not safe to pass a result table directly to [sqlite3_free()].
+** A result table should be deallocated using [sqlite3_free_table()].
+**
+** ^(As an example of the result table format, suppose a query result
+** is as follows:
+**
+** <blockquote><pre>
+**        Name        | Age
+**        -----------------------
+**        Alice       | 43
+**        Bob         | 28
+**        Cindy       | 21
+** </pre></blockquote>
+**
+** There are two column (M==2) and three rows (N==3).  Thus the
+** result table has 8 entries.  Suppose the result table is stored
+** in an array names azResult.  Then azResult holds this content:
+**
+** <blockquote><pre>
+**        azResult&#91;0] = "Name";
+**        azResult&#91;1] = "Age";
+**        azResult&#91;2] = "Alice";
+**        azResult&#91;3] = "43";
+**        azResult&#91;4] = "Bob";
+**        azResult&#91;5] = "28";
+**        azResult&#91;6] = "Cindy";
+**        azResult&#91;7] = "21";
+** </pre></blockquote>)^
+**
+** ^The sqlite3_get_table() function evaluates one or more
+** semicolon-separated SQL statements in the zero-terminated UTF-8
+** string of its 2nd parameter and returns a result table to the
+** pointer given in its 3rd parameter.
+**
+** After the application has finished with the result from sqlite3_get_table(),
+** it must pass the result table pointer to sqlite3_free_table() in order to
+** release the memory that was malloced.  Because of the way the
+** [sqlite3_malloc()] happens within sqlite3_get_table(), the calling
+** function must not try to call [sqlite3_free()] directly.  Only
+** [sqlite3_free_table()] is able to release the memory properly and safely.
+**
+** The sqlite3_get_table() interface is implemented as a wrapper around
+** [sqlite3_exec()].  The sqlite3_get_table() routine does not have access
+** to any internal data structures of SQLite.  It uses only the public
+** interface defined here.  As a consequence, errors that occur in the
+** wrapper layer outside of the internal [sqlite3_exec()] call are not
+** reflected in subsequent calls to [sqlite3_errcode()] or
+** [sqlite3_errmsg()].
+*/
+SQLITE_API int sqlite3_get_table(
+  sqlite3 *db,          /* An open database */
+  const char *zSql,     /* SQL to be evaluated */
+  char ***pazResult,    /* Results of the query */
+  int *pnRow,           /* Number of result rows written here */
+  int *pnColumn,        /* Number of result columns written here */
+  char **pzErrmsg       /* Error msg written here */
+);
+SQLITE_API void sqlite3_free_table(char **result);
+
+/*
+** CAPI3REF: Formatted String Printing Functions
+**
+** These routines are work-alikes of the "printf()" family of functions
+** from the standard C library.
+**
+** ^The sqlite3_mprintf() and sqlite3_vmprintf() routines write their
+** results into memory obtained from [sqlite3_malloc()].
+** The strings returned by these two routines should be
+** released by [sqlite3_free()].  ^Both routines return a
+** NULL pointer if [sqlite3_malloc()] is unable to allocate enough
+** memory to hold the resulting string.
+**
+** ^(The sqlite3_snprintf() routine is similar to "snprintf()" from
+** the standard C library.  The result is written into the
+** buffer supplied as the second parameter whose size is given by
+** the first parameter. Note that the order of the
+** first two parameters is reversed from snprintf().)^  This is an
+** historical accident that cannot be fixed without breaking
+** backwards compatibility.  ^(Note also that sqlite3_snprintf()
+** returns a pointer to its buffer instead of the number of
+** characters actually written into the buffer.)^  We admit that
+** the number of characters written would be a more useful return
+** value but we cannot change the implementation of sqlite3_snprintf()
+** now without breaking compatibility.
+**
+** ^As long as the buffer size is greater than zero, sqlite3_snprintf()
+** guarantees that the buffer is always zero-terminated.  ^The first
+** parameter "n" is the total size of the buffer, including space for
+** the zero terminator.  So the longest string that can be completely
+** written will be n-1 characters.
+**
+** ^The sqlite3_vsnprintf() routine is a varargs version of sqlite3_snprintf().
+**
+** These routines all implement some additional formatting
+** options that are useful for constructing SQL statements.
+** All of the usual printf() formatting options apply.  In addition, there
+** is are "%q", "%Q", and "%z" options.
+**
+** ^(The %q option works like %s in that it substitutes a nul-terminated
+** string from the argument list.  But %q also doubles every '\'' character.
+** %q is designed for use inside a string literal.)^  By doubling each '\''
+** character it escapes that character and allows it to be inserted into
+** the string.
+**
+** For example, assume the string variable zText contains text as follows:
+**
+** <blockquote><pre>
+**  char *zText = "It's a happy day!";
+** </pre></blockquote>
+**
+** One can use this text in an SQL statement as follows:
+**
+** <blockquote><pre>
+**  char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES('%q')", zText);
+**  sqlite3_exec(db, zSQL, 0, 0, 0);
+**  sqlite3_free(zSQL);
+** </pre></blockquote>
+**
+** Because the %q format string is used, the '\'' character in zText
+** is escaped and the SQL generated is as follows:
+**
+** <blockquote><pre>
+**  INSERT INTO table1 VALUES('It''s a happy day!')
+** </pre></blockquote>
+**
+** This is correct.  Had we used %s instead of %q, the generated SQL
+** would have looked like this:
+**
+** <blockquote><pre>
+**  INSERT INTO table1 VALUES('It's a happy day!');
+** </pre></blockquote>
+**
+** This second example is an SQL syntax error.  As a general rule you should
+** always use %q instead of %s when inserting text into a string literal.
+**
+** ^(The %Q option works like %q except it also adds single quotes around
+** the outside of the total string.  Additionally, if the parameter in the
+** argument list is a NULL pointer, %Q substitutes the text "NULL" (without
+** single quotes).)^  So, for example, one could say:
+**
+** <blockquote><pre>
+**  char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES(%Q)", zText);
+**  sqlite3_exec(db, zSQL, 0, 0, 0);
+**  sqlite3_free(zSQL);
+** </pre></blockquote>
+**
+** The code above will render a correct SQL statement in the zSQL
+** variable even if the zText variable is a NULL pointer.
+**
+** ^(The "%z" formatting option works like "%s" but with the
+** addition that after the string has been read and copied into
+** the result, [sqlite3_free()] is called on the input string.)^
+*/
+SQLITE_API char *sqlite3_mprintf(const char*,...);
+SQLITE_API char *sqlite3_vmprintf(const char*, va_list);
+SQLITE_API char *sqlite3_snprintf(int,char*,const char*, ...);
+SQLITE_API char *sqlite3_vsnprintf(int,char*,const char*, va_list);
+
+/*
+** CAPI3REF: Memory Allocation Subsystem
+**
+** The SQLite core uses these three routines for all of its own
+** internal memory allocation needs. "Core" in the previous sentence
+** does not include operating-system specific VFS implementation.  The
+** Windows VFS uses native malloc() and free() for some operations.
+**
+** ^The sqlite3_malloc() routine returns a pointer to a block
+** of memory at least N bytes in length, where N is the parameter.
+** ^If sqlite3_malloc() is unable to obtain sufficient free
+** memory, it returns a NULL pointer.  ^If the parameter N to
+** sqlite3_malloc() is zero or negative then sqlite3_malloc() returns
+** a NULL pointer.
+**
+** ^The sqlite3_malloc64(N) routine works just like
+** sqlite3_malloc(N) except that N is an unsigned 64-bit integer instead
+** of a signed 32-bit integer.
+**
+** ^Calling sqlite3_free() with a pointer previously returned
+** by sqlite3_malloc() or sqlite3_realloc() releases that memory so
+** that it might be reused.  ^The sqlite3_free() routine is
+** a no-op if is called with a NULL pointer.  Passing a NULL pointer
+** to sqlite3_free() is harmless.  After being freed, memory
+** should neither be read nor written.  Even reading previously freed
+** memory might result in a segmentation fault or other severe error.
+** Memory corruption, a segmentation fault, or other severe error
+** might result if sqlite3_free() is called with a non-NULL pointer that
+** was not obtained from sqlite3_malloc() or sqlite3_realloc().
+**
+** ^The sqlite3_realloc(X,N) interface attempts to resize a
+** prior memory allocation X to be at least N bytes.
+** ^If the X parameter to sqlite3_realloc(X,N)
+** is a NULL pointer then its behavior is identical to calling
+** sqlite3_malloc(N).
+** ^If the N parameter to sqlite3_realloc(X,N) is zero or
+** negative then the behavior is exactly the same as calling
+** sqlite3_free(X).
+** ^sqlite3_realloc(X,N) returns a pointer to a memory allocation
+** of at least N bytes in size or NULL if insufficient memory is available.
+** ^If M is the size of the prior allocation, then min(N,M) bytes
+** of the prior allocation are copied into the beginning of buffer returned
+** by sqlite3_realloc(X,N) and the prior allocation is freed.
+** ^If sqlite3_realloc(X,N) returns NULL and N is positive, then the
+** prior allocation is not freed.
+**
+** ^The sqlite3_realloc64(X,N) interfaces works the same as
+** sqlite3_realloc(X,N) except that N is a 64-bit unsigned integer instead
+** of a 32-bit signed integer.
+**
+** ^If X is a memory allocation previously obtained from sqlite3_malloc(),
+** sqlite3_malloc64(), sqlite3_realloc(), or sqlite3_realloc64(), then
+** sqlite3_msize(X) returns the size of that memory allocation in bytes.
+** ^The value returned by sqlite3_msize(X) might be larger than the number
+** of bytes requested when X was allocated.  ^If X is a NULL pointer then
+** sqlite3_msize(X) returns zero.  If X points to something that is not
+** the beginning of memory allocation, or if it points to a formerly
+** valid memory allocation that has now been freed, then the behavior
+** of sqlite3_msize(X) is undefined and possibly harmful.
+**
+** ^The memory returned by sqlite3_malloc(), sqlite3_realloc(),
+** sqlite3_malloc64(), and sqlite3_realloc64()
+** is always aligned to at least an 8 byte boundary, or to a
+** 4 byte boundary if the [SQLITE_4_BYTE_ALIGNED_MALLOC] compile-time
+** option is used.
+**
+** In SQLite version 3.5.0 and 3.5.1, it was possible to define
+** the SQLITE_OMIT_MEMORY_ALLOCATION which would cause the built-in
+** implementation of these routines to be omitted.  That capability
+** is no longer provided.  Only built-in memory allocators can be used.
+**
+** Prior to SQLite version 3.7.10, the Windows OS interface layer called
+** the system malloc() and free() directly when converting
+** filenames between the UTF-8 encoding used by SQLite
+** and whatever filename encoding is used by the particular Windows
+** installation.  Memory allocation errors were detected, but
+** they were reported back as [SQLITE_CANTOPEN] or
+** [SQLITE_IOERR] rather than [SQLITE_NOMEM].
+**
+** The pointer arguments to [sqlite3_free()] and [sqlite3_realloc()]
+** must be either NULL or else pointers obtained from a prior
+** invocation of [sqlite3_malloc()] or [sqlite3_realloc()] that have
+** not yet been released.
+**
+** The application must not read or write any part of
+** a block of memory after it has been released using
+** [sqlite3_free()] or [sqlite3_realloc()].
+*/
+SQLITE_API void *sqlite3_malloc(int);
+SQLITE_API void *sqlite3_malloc64(sqlite3_uint64);
+SQLITE_API void *sqlite3_realloc(void*, int);
+SQLITE_API void *sqlite3_realloc64(void*, sqlite3_uint64);
+SQLITE_API void sqlite3_free(void*);
+SQLITE_API sqlite3_uint64 sqlite3_msize(void*);
+
+/*
+** CAPI3REF: Memory Allocator Statistics
+**
+** SQLite provides these two interfaces for reporting on the status
+** of the [sqlite3_malloc()], [sqlite3_free()], and [sqlite3_realloc()]
+** routines, which form the built-in memory allocation subsystem.
+**
+** ^The [sqlite3_memory_used()] routine returns the number of bytes
+** of memory currently outstanding (malloced but not freed).
+** ^The [sqlite3_memory_highwater()] routine returns the maximum
+** value of [sqlite3_memory_used()] since the high-water mark
+** was last reset.  ^The values returned by [sqlite3_memory_used()] and
+** [sqlite3_memory_highwater()] include any overhead
+** added by SQLite in its implementation of [sqlite3_malloc()],
+** but not overhead added by the any underlying system library
+** routines that [sqlite3_malloc()] may call.
+**
+** ^The memory high-water mark is reset to the current value of
+** [sqlite3_memory_used()] if and only if the parameter to
+** [sqlite3_memory_highwater()] is true.  ^The value returned
+** by [sqlite3_memory_highwater(1)] is the high-water mark
+** prior to the reset.
+*/
+SQLITE_API sqlite3_int64 sqlite3_memory_used(void);
+SQLITE_API sqlite3_int64 sqlite3_memory_highwater(int resetFlag);
+
+/*
+** CAPI3REF: Pseudo-Random Number Generator
+**
+** SQLite contains a high-quality pseudo-random number generator (PRNG) used to
+** select random [ROWID | ROWIDs] when inserting new records into a table that
+** already uses the largest possible [ROWID].  The PRNG is also used for
+** the build-in random() and randomblob() SQL functions.  This interface allows
+** applications to access the same PRNG for other purposes.
+**
+** ^A call to this routine stores N bytes of randomness into buffer P.
+** ^The P parameter can be a NULL pointer.
+**
+** ^If this routine has not been previously called or if the previous
+** call had N less than one or a NULL pointer for P, then the PRNG is
+** seeded using randomness obtained from the xRandomness method of
+** the default [sqlite3_vfs] object.
+** ^If the previous call to this routine had an N of 1 or more and a
+** non-NULL P then the pseudo-randomness is generated
+** internally and without recourse to the [sqlite3_vfs] xRandomness
+** method.
+*/
+SQLITE_API void sqlite3_randomness(int N, void *P);
+
+/*
+** CAPI3REF: Compile-Time Authorization Callbacks
+**
+** ^This routine registers an authorizer callback with a particular
+** [database connection], supplied in the first argument.
+** ^The authorizer callback is invoked as SQL statements are being compiled
+** by [sqlite3_prepare()] or its variants [sqlite3_prepare_v2()],
+** [sqlite3_prepare16()] and [sqlite3_prepare16_v2()].  ^At various
+** points during the compilation process, as logic is being created
+** to perform various actions, the authorizer callback is invoked to
+** see if those actions are allowed.  ^The authorizer callback should
+** return [SQLITE_OK] to allow the action, [SQLITE_IGNORE] to disallow the
+** specific action but allow the SQL statement to continue to be
+** compiled, or [SQLITE_DENY] to cause the entire SQL statement to be
+** rejected with an error.  ^If the authorizer callback returns
+** any value other than [SQLITE_IGNORE], [SQLITE_OK], or [SQLITE_DENY]
+** then the [sqlite3_prepare_v2()] or equivalent call that triggered
+** the authorizer will fail with an error message.
+**
+** When the callback returns [SQLITE_OK], that means the operation
+** requested is ok.  ^When the callback returns [SQLITE_DENY], the
+** [sqlite3_prepare_v2()] or equivalent call that triggered the
+** authorizer will fail with an error message explaining that
+** access is denied. 
+**
+** ^The first parameter to the authorizer callback is a copy of the third
+** parameter to the sqlite3_set_authorizer() interface. ^The second parameter
+** to the callback is an integer [SQLITE_COPY | action code] that specifies
+** the particular action to be authorized. ^The third through sixth parameters
+** to the callback are zero-terminated strings that contain additional
+** details about the action to be authorized.
+**
+** ^If the action code is [SQLITE_READ]
+** and the callback returns [SQLITE_IGNORE] then the
+** [prepared statement] statement is constructed to substitute
+** a NULL value in place of the table column that would have
+** been read if [SQLITE_OK] had been returned.  The [SQLITE_IGNORE]
+** return can be used to deny an untrusted user access to individual
+** columns of a table.
+** ^If the action code is [SQLITE_DELETE] and the callback returns
+** [SQLITE_IGNORE] then the [DELETE] operation proceeds but the
+** [truncate optimization] is disabled and all rows are deleted individually.
+**
+** An authorizer is used when [sqlite3_prepare | preparing]
+** SQL statements from an untrusted source, to ensure that the SQL statements
+** do not try to access data they are not allowed to see, or that they do not
+** try to execute malicious statements that damage the database.  For
+** example, an application may allow a user to enter arbitrary
+** SQL queries for evaluation by a database.  But the application does
+** not want the user to be able to make arbitrary changes to the
+** database.  An authorizer could then be put in place while the
+** user-entered SQL is being [sqlite3_prepare | prepared] that
+** disallows everything except [SELECT] statements.
+**
+** Applications that need to process SQL from untrusted sources
+** might also consider lowering resource limits using [sqlite3_limit()]
+** and limiting database size using the [max_page_count] [PRAGMA]
+** in addition to using an authorizer.
+**
+** ^(Only a single authorizer can be in place on a database connection
+** at a time.  Each call to sqlite3_set_authorizer overrides the
+** previous call.)^  ^Disable the authorizer by installing a NULL callback.
+** The authorizer is disabled by default.
+**
+** The authorizer callback must not do anything that will modify
+** the database connection that invoked the authorizer callback.
+** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their
+** database connections for the meaning of "modify" in this paragraph.
+**
+** ^When [sqlite3_prepare_v2()] is used to prepare a statement, the
+** statement might be re-prepared during [sqlite3_step()] due to a 
+** schema change.  Hence, the application should ensure that the
+** correct authorizer callback remains in place during the [sqlite3_step()].
+**
+** ^Note that the authorizer callback is invoked only during
+** [sqlite3_prepare()] or its variants.  Authorization is not
+** performed during statement evaluation in [sqlite3_step()], unless
+** as stated in the previous paragraph, sqlite3_step() invokes
+** sqlite3_prepare_v2() to reprepare a statement after a schema change.
+*/
+SQLITE_API int sqlite3_set_authorizer(
+  sqlite3*,
+  int (*xAuth)(void*,int,const char*,const char*,const char*,const char*),
+  void *pUserData
+);
+
+/*
+** CAPI3REF: Authorizer Return Codes
+**
+** The [sqlite3_set_authorizer | authorizer callback function] must
+** return either [SQLITE_OK] or one of these two constants in order
+** to signal SQLite whether or not the action is permitted.  See the
+** [sqlite3_set_authorizer | authorizer documentation] for additional
+** information.
+**
+** Note that SQLITE_IGNORE is also used as a [conflict resolution mode]
+** returned from the [sqlite3_vtab_on_conflict()] interface.
+*/
+#define SQLITE_DENY   1   /* Abort the SQL statement with an error */
+#define SQLITE_IGNORE 2   /* Don't allow access, but don't generate an error */
+
+/*
+** CAPI3REF: Authorizer Action Codes
+**
+** The [sqlite3_set_authorizer()] interface registers a callback function
+** that is invoked to authorize certain SQL statement actions.  The
+** second parameter to the callback is an integer code that specifies
+** what action is being authorized.  These are the integer action codes that
+** the authorizer callback may be passed.
+**
+** These action code values signify what kind of operation is to be
+** authorized.  The 3rd and 4th parameters to the authorization
+** callback function will be parameters or NULL depending on which of these
+** codes is used as the second parameter.  ^(The 5th parameter to the
+** authorizer callback is the name of the database ("main", "temp",
+** etc.) if applicable.)^  ^The 6th parameter to the authorizer callback
+** is the name of the inner-most trigger or view that is responsible for
+** the access attempt or NULL if this access attempt is directly from
+** top-level SQL code.
+*/
+/******************************************* 3rd ************ 4th ***********/
+#define SQLITE_CREATE_INDEX          1   /* Index Name      Table Name      */
+#define SQLITE_CREATE_TABLE          2   /* Table Name      NULL            */
+#define SQLITE_CREATE_TEMP_INDEX     3   /* Index Name      Table Name      */
+#define SQLITE_CREATE_TEMP_TABLE     4   /* Table Name      NULL            */
+#define SQLITE_CREATE_TEMP_TRIGGER   5   /* Trigger Name    Table Name      */
+#define SQLITE_CREATE_TEMP_VIEW      6   /* View Name       NULL            */
+#define SQLITE_CREATE_TRIGGER        7   /* Trigger Name    Table Name      */
+#define SQLITE_CREATE_VIEW           8   /* View Name       NULL            */
+#define SQLITE_DELETE                9   /* Table Name      NULL            */
+#define SQLITE_DROP_INDEX           10   /* Index Name      Table Name      */
+#define SQLITE_DROP_TABLE           11   /* Table Name      NULL            */
+#define SQLITE_DROP_TEMP_INDEX      12   /* Index Name      Table Name      */
+#define SQLITE_DROP_TEMP_TABLE      13   /* Table Name      NULL            */
+#define SQLITE_DROP_TEMP_TRIGGER    14   /* Trigger Name    Table Name      */
+#define SQLITE_DROP_TEMP_VIEW       15   /* View Name       NULL            */
+#define SQLITE_DROP_TRIGGER         16   /* Trigger Name    Table Name      */
+#define SQLITE_DROP_VIEW            17   /* View Name       NULL            */
+#define SQLITE_INSERT               18   /* Table Name      NULL            */
+#define SQLITE_PRAGMA               19   /* Pragma Name     1st arg or NULL */
+#define SQLITE_READ                 20   /* Table Name      Column Name     */
+#define SQLITE_SELECT               21   /* NULL            NULL            */
+#define SQLITE_TRANSACTION          22   /* Operation       NULL            */
+#define SQLITE_UPDATE               23   /* Table Name      Column Name     */
+#define SQLITE_ATTACH               24   /* Filename        NULL            */
+#define SQLITE_DETACH               25   /* Database Name   NULL            */
+#define SQLITE_ALTER_TABLE          26   /* Database Name   Table Name      */
+#define SQLITE_REINDEX              27   /* Index Name      NULL            */
+#define SQLITE_ANALYZE              28   /* Table Name      NULL            */
+#define SQLITE_CREATE_VTABLE        29   /* Table Name      Module Name     */
+#define SQLITE_DROP_VTABLE          30   /* Table Name      Module Name     */
+#define SQLITE_FUNCTION             31   /* NULL            Function Name   */
+#define SQLITE_SAVEPOINT            32   /* Operation       Savepoint Name  */
+#define SQLITE_COPY                  0   /* No longer used */
+#define SQLITE_RECURSIVE            33   /* NULL            NULL            */
+
+/*
+** CAPI3REF: Tracing And Profiling Functions
+**
+** These routines register callback functions that can be used for
+** tracing and profiling the execution of SQL statements.
+**
+** ^The callback function registered by sqlite3_trace() is invoked at
+** various times when an SQL statement is being run by [sqlite3_step()].
+** ^The sqlite3_trace() callback is invoked with a UTF-8 rendering of the
+** SQL statement text as the statement first begins executing.
+** ^(Additional sqlite3_trace() callbacks might occur
+** as each triggered subprogram is entered.  The callbacks for triggers
+** contain a UTF-8 SQL comment that identifies the trigger.)^
+**
+** The [SQLITE_TRACE_SIZE_LIMIT] compile-time option can be used to limit
+** the length of [bound parameter] expansion in the output of sqlite3_trace().
+**
+** ^The callback function registered by sqlite3_profile() is invoked
+** as each SQL statement finishes.  ^The profile callback contains
+** the original statement text and an estimate of wall-clock time
+** of how long that statement took to run.  ^The profile callback
+** time is in units of nanoseconds, however the current implementation
+** is only capable of millisecond resolution so the six least significant
+** digits in the time are meaningless.  Future versions of SQLite
+** might provide greater resolution on the profiler callback.  The
+** sqlite3_profile() function is considered experimental and is
+** subject to change in future versions of SQLite.
+*/
+SQLITE_API void *sqlite3_trace(sqlite3*, void(*xTrace)(void*,const char*), void*);
+SQLITE_API SQLITE_EXPERIMENTAL void *sqlite3_profile(sqlite3*,
+   void(*xProfile)(void*,const char*,sqlite3_uint64), void*);
+
+/*
+** CAPI3REF: Query Progress Callbacks
+**
+** ^The sqlite3_progress_handler(D,N,X,P) interface causes the callback
+** function X to be invoked periodically during long running calls to
+** [sqlite3_exec()], [sqlite3_step()] and [sqlite3_get_table()] for
+** database connection D.  An example use for this
+** interface is to keep a GUI updated during a large query.
+**
+** ^The parameter P is passed through as the only parameter to the 
+** callback function X.  ^The parameter N is the approximate number of 
+** [virtual machine instructions] that are evaluated between successive
+** invocations of the callback X.  ^If N is less than one then the progress
+** handler is disabled.
+**
+** ^Only a single progress handler may be defined at one time per
+** [database connection]; setting a new progress handler cancels the
+** old one.  ^Setting parameter X to NULL disables the progress handler.
+** ^The progress handler is also disabled by setting N to a value less
+** than 1.
+**
+** ^If the progress callback returns non-zero, the operation is
+** interrupted.  This feature can be used to implement a
+** "Cancel" button on a GUI progress dialog box.
+**
+** The progress handler callback must not do anything that will modify
+** the database connection that invoked the progress handler.
+** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their
+** database connections for the meaning of "modify" in this paragraph.
+**
+*/
+SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
+
+/*
+** CAPI3REF: Opening A New Database Connection
+**
+** ^These routines open an SQLite database file as specified by the 
+** filename argument. ^The filename argument is interpreted as UTF-8 for
+** sqlite3_open() and sqlite3_open_v2() and as UTF-16 in the native byte
+** order for sqlite3_open16(). ^(A [database connection] handle is usually
+** returned in *ppDb, even if an error occurs.  The only exception is that
+** if SQLite is unable to allocate memory to hold the [sqlite3] object,
+** a NULL will be written into *ppDb instead of a pointer to the [sqlite3]
+** object.)^ ^(If the database is opened (and/or created) successfully, then
+** [SQLITE_OK] is returned.  Otherwise an [error code] is returned.)^ ^The
+** [sqlite3_errmsg()] or [sqlite3_errmsg16()] routines can be used to obtain
+** an English language description of the error following a failure of any
+** of the sqlite3_open() routines.
+**
+** ^The default encoding will be UTF-8 for databases created using
+** sqlite3_open() or sqlite3_open_v2().  ^The default encoding for databases
+** created using sqlite3_open16() will be UTF-16 in the native byte order.
+**
+** Whether or not an error occurs when it is opened, resources
+** associated with the [database connection] handle should be released by
+** passing it to [sqlite3_close()] when it is no longer required.
+**
+** The sqlite3_open_v2() interface works like sqlite3_open()
+** except that it accepts two additional parameters for additional control
+** over the new database connection.  ^(The flags parameter to
+** sqlite3_open_v2() can take one of
+** the following three values, optionally combined with the 
+** [SQLITE_OPEN_NOMUTEX], [SQLITE_OPEN_FULLMUTEX], [SQLITE_OPEN_SHAREDCACHE],
+** [SQLITE_OPEN_PRIVATECACHE], and/or [SQLITE_OPEN_URI] flags:)^
+**
+** <dl>
+** ^(<dt>[SQLITE_OPEN_READONLY]</dt>
+** <dd>The database is opened in read-only mode.  If the database does not
+** already exist, an error is returned.</dd>)^
+**
+** ^(<dt>[SQLITE_OPEN_READWRITE]</dt>
+** <dd>The database is opened for reading and writing if possible, or reading
+** only if the file is write protected by the operating system.  In either
+** case the database must already exist, otherwise an error is returned.</dd>)^
+**
+** ^(<dt>[SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]</dt>
+** <dd>The database is opened for reading and writing, and is created if
+** it does not already exist. This is the behavior that is always used for
+** sqlite3_open() and sqlite3_open16().</dd>)^
+** </dl>
+**
+** If the 3rd parameter to sqlite3_open_v2() is not one of the
+** combinations shown above optionally combined with other
+** [SQLITE_OPEN_READONLY | SQLITE_OPEN_* bits]
+** then the behavior is undefined.
+**
+** ^If the [SQLITE_OPEN_NOMUTEX] flag is set, then the database connection
+** opens in the multi-thread [threading mode] as long as the single-thread
+** mode has not been set at compile-time or start-time.  ^If the
+** [SQLITE_OPEN_FULLMUTEX] flag is set then the database connection opens
+** in the serialized [threading mode] unless single-thread was
+** previously selected at compile-time or start-time.
+** ^The [SQLITE_OPEN_SHAREDCACHE] flag causes the database connection to be
+** eligible to use [shared cache mode], regardless of whether or not shared
+** cache is enabled using [sqlite3_enable_shared_cache()].  ^The
+** [SQLITE_OPEN_PRIVATECACHE] flag causes the database connection to not
+** participate in [shared cache mode] even if it is enabled.
+**
+** ^The fourth parameter to sqlite3_open_v2() is the name of the
+** [sqlite3_vfs] object that defines the operating system interface that
+** the new database connection should use.  ^If the fourth parameter is
+** a NULL pointer then the default [sqlite3_vfs] object is used.
+**
+** ^If the filename is ":memory:", then a private, temporary in-memory database
+** is created for the connection.  ^This in-memory database will vanish when
+** the database connection is closed.  Future versions of SQLite might
+** make use of additional special filenames that begin with the ":" character.
+** It is recommended that when a database filename actually does begin with
+** a ":" character you should prefix the filename with a pathname such as
+** "./" to avoid ambiguity.
+**
+** ^If the filename is an empty string, then a private, temporary
+** on-disk database will be created.  ^This private database will be
+** automatically deleted as soon as the database connection is closed.
+**
+** [[URI filenames in sqlite3_open()]] <h3>URI Filenames</h3>
+**
+** ^If [URI filename] interpretation is enabled, and the filename argument
+** begins with "file:", then the filename is interpreted as a URI. ^URI
+** filename interpretation is enabled if the [SQLITE_OPEN_URI] flag is
+** set in the fourth argument to sqlite3_open_v2(), or if it has
+** been enabled globally using the [SQLITE_CONFIG_URI] option with the
+** [sqlite3_config()] method or by the [SQLITE_USE_URI] compile-time option.
+** As of SQLite version 3.7.7, URI filename interpretation is turned off
+** by default, but future releases of SQLite might enable URI filename
+** interpretation by default.  See "[URI filenames]" for additional
+** information.
+**
+** URI filenames are parsed according to RFC 3986. ^If the URI contains an
+** authority, then it must be either an empty string or the string 
+** "localhost". ^If the authority is not an empty string or "localhost", an 
+** error is returned to the caller. ^The fragment component of a URI, if 
+** present, is ignored.
+**
+** ^SQLite uses the path component of the URI as the name of the disk file
+** which contains the database. ^If the path begins with a '/' character, 
+** then it is interpreted as an absolute path. ^If the path does not begin 
+** with a '/' (meaning that the authority section is omitted from the URI)
+** then the path is interpreted as a relative path. 
+** ^(On windows, the first component of an absolute path 
+** is a drive specification (e.g. "C:").)^
+**
+** [[core URI query parameters]]
+** The query component of a URI may contain parameters that are interpreted
+** either by SQLite itself, or by a [VFS | custom VFS implementation].
+** SQLite and its built-in [VFSes] interpret the
+** following query parameters:
+**
+** <ul>
+**   <li> <b>vfs</b>: ^The "vfs" parameter may be used to specify the name of
+**     a VFS object that provides the operating system interface that should
+**     be used to access the database file on disk. ^If this option is set to
+**     an empty string the default VFS object is used. ^Specifying an unknown
+**     VFS is an error. ^If sqlite3_open_v2() is used and the vfs option is
+**     present, then the VFS specified by the option takes precedence over
+**     the value passed as the fourth parameter to sqlite3_open_v2().
+**
+**   <li> <b>mode</b>: ^(The mode parameter may be set to either "ro", "rw",
+**     "rwc", or "memory". Attempting to set it to any other value is
+**     an error)^. 
+**     ^If "ro" is specified, then the database is opened for read-only 
+**     access, just as if the [SQLITE_OPEN_READONLY] flag had been set in the 
+**     third argument to sqlite3_open_v2(). ^If the mode option is set to 
+**     "rw", then the database is opened for read-write (but not create) 
+**     access, as if SQLITE_OPEN_READWRITE (but not SQLITE_OPEN_CREATE) had 
+**     been set. ^Value "rwc" is equivalent to setting both 
+**     SQLITE_OPEN_READWRITE and SQLITE_OPEN_CREATE.  ^If the mode option is
+**     set to "memory" then a pure [in-memory database] that never reads
+**     or writes from disk is used. ^It is an error to specify a value for
+**     the mode parameter that is less restrictive than that specified by
+**     the flags passed in the third parameter to sqlite3_open_v2().
+**
+**   <li> <b>cache</b>: ^The cache parameter may be set to either "shared" or
+**     "private". ^Setting it to "shared" is equivalent to setting the
+**     SQLITE_OPEN_SHAREDCACHE bit in the flags argument passed to
+**     sqlite3_open_v2(). ^Setting the cache parameter to "private" is 
+**     equivalent to setting the SQLITE_OPEN_PRIVATECACHE bit.
+**     ^If sqlite3_open_v2() is used and the "cache" parameter is present in
+**     a URI filename, its value overrides any behavior requested by setting
+**     SQLITE_OPEN_PRIVATECACHE or SQLITE_OPEN_SHAREDCACHE flag.
+**
+**  <li> <b>psow</b>: ^The psow parameter indicates whether or not the
+**     [powersafe overwrite] property does or does not apply to the
+**     storage media on which the database file resides.
+**
+**  <li> <b>nolock</b>: ^The nolock parameter is a boolean query parameter
+**     which if set disables file locking in rollback journal modes.  This
+**     is useful for accessing a database on a filesystem that does not
+**     support locking.  Caution:  Database corruption might result if two
+**     or more processes write to the same database and any one of those
+**     processes uses nolock=1.
+**
+**  <li> <b>immutable</b>: ^The immutable parameter is a boolean query
+**     parameter that indicates that the database file is stored on
+**     read-only media.  ^When immutable is set, SQLite assumes that the
+**     database file cannot be changed, even by a process with higher
+**     privilege, and so the database is opened read-only and all locking
+**     and change detection is disabled.  Caution: Setting the immutable
+**     property on a database file that does in fact change can result
+**     in incorrect query results and/or [SQLITE_CORRUPT] errors.
+**     See also: [SQLITE_IOCAP_IMMUTABLE].
+**       
+** </ul>
+**
+** ^Specifying an unknown parameter in the query component of a URI is not an
+** error.  Future versions of SQLite might understand additional query
+** parameters.  See "[query parameters with special meaning to SQLite]" for
+** additional information.
+**
+** [[URI filename examples]] <h3>URI filename examples</h3>
+**
+** <table border="1" align=center cellpadding=5>
+** <tr><th> URI filenames <th> Results
+** <tr><td> file:data.db <td> 
+**          Open the file "data.db" in the current directory.
+** <tr><td> file:/home/fred/data.db<br>
+**          file:///home/fred/data.db <br> 
+**          file://localhost/home/fred/data.db <br> <td> 
+**          Open the database file "/home/fred/data.db".
+** <tr><td> file://darkstar/home/fred/data.db <td> 
+**          An error. "darkstar" is not a recognized authority.
+** <tr><td style="white-space:nowrap"> 
+**          file:///C:/Documents%20and%20Settings/fred/Desktop/data.db
+**     <td> Windows only: Open the file "data.db" on fred's desktop on drive
+**          C:. Note that the %20 escaping in this example is not strictly 
+**          necessary - space characters can be used literally
+**          in URI filenames.
+** <tr><td> file:data.db?mode=ro&cache=private <td> 
+**          Open file "data.db" in the current directory for read-only access.
+**          Regardless of whether or not shared-cache mode is enabled by
+**          default, use a private cache.
+** <tr><td> file:/home/fred/data.db?vfs=unix-dotfile <td>
+**          Open file "/home/fred/data.db". Use the special VFS "unix-dotfile"
+**          that uses dot-files in place of posix advisory locking.
+** <tr><td> file:data.db?mode=readonly <td> 
+**          An error. "readonly" is not a valid option for the "mode" parameter.
+** </table>
+**
+** ^URI hexadecimal escape sequences (%HH) are supported within the path and
+** query components of a URI. A hexadecimal escape sequence consists of a
+** percent sign - "%" - followed by exactly two hexadecimal digits 
+** specifying an octet value. ^Before the path or query components of a
+** URI filename are interpreted, they are encoded using UTF-8 and all 
+** hexadecimal escape sequences replaced by a single byte containing the
+** corresponding octet. If this process generates an invalid UTF-8 encoding,
+** the results are undefined.
+**
+** <b>Note to Windows users:</b>  The encoding used for the filename argument
+** of sqlite3_open() and sqlite3_open_v2() must be UTF-8, not whatever
+** codepage is currently defined.  Filenames containing international
+** characters must be converted to UTF-8 prior to passing them into
+** sqlite3_open() or sqlite3_open_v2().
+**
+** <b>Note to Windows Runtime users:</b>  The temporary directory must be set
+** prior to calling sqlite3_open() or sqlite3_open_v2().  Otherwise, various
+** features that require the use of temporary files may fail.
+**
+** See also: [sqlite3_temp_directory]
+*/
+SQLITE_API int sqlite3_open(
+  const char *filename,   /* Database filename (UTF-8) */
+  sqlite3 **ppDb          /* OUT: SQLite db handle */
+);
+SQLITE_API int sqlite3_open16(
+  const void *filename,   /* Database filename (UTF-16) */
+  sqlite3 **ppDb          /* OUT: SQLite db handle */
+);
+SQLITE_API int sqlite3_open_v2(
+  const char *filename,   /* Database filename (UTF-8) */
+  sqlite3 **ppDb,         /* OUT: SQLite db handle */
+  int flags,              /* Flags */
+  const char *zVfs        /* Name of VFS module to use */
+);
+
+/*
+** CAPI3REF: Obtain Values For URI Parameters
+**
+** These are utility routines, useful to VFS implementations, that check
+** to see if a database file was a URI that contained a specific query 
+** parameter, and if so obtains the value of that query parameter.
+**
+** If F is the database filename pointer passed into the xOpen() method of 
+** a VFS implementation when the flags parameter to xOpen() has one or 
+** more of the [SQLITE_OPEN_URI] or [SQLITE_OPEN_MAIN_DB] bits set and
+** P is the name of the query parameter, then
+** sqlite3_uri_parameter(F,P) returns the value of the P
+** parameter if it exists or a NULL pointer if P does not appear as a 
+** query parameter on F.  If P is a query parameter of F
+** has no explicit value, then sqlite3_uri_parameter(F,P) returns
+** a pointer to an empty string.
+**
+** The sqlite3_uri_boolean(F,P,B) routine assumes that P is a boolean
+** parameter and returns true (1) or false (0) according to the value
+** of P.  The sqlite3_uri_boolean(F,P,B) routine returns true (1) if the
+** value of query parameter P is one of "yes", "true", or "on" in any
+** case or if the value begins with a non-zero number.  The 
+** sqlite3_uri_boolean(F,P,B) routines returns false (0) if the value of
+** query parameter P is one of "no", "false", or "off" in any case or
+** if the value begins with a numeric zero.  If P is not a query
+** parameter on F or if the value of P is does not match any of the
+** above, then sqlite3_uri_boolean(F,P,B) returns (B!=0).
+**
+** The sqlite3_uri_int64(F,P,D) routine converts the value of P into a
+** 64-bit signed integer and returns that integer, or D if P does not
+** exist.  If the value of P is something other than an integer, then
+** zero is returned.
+** 
+** If F is a NULL pointer, then sqlite3_uri_parameter(F,P) returns NULL and
+** sqlite3_uri_boolean(F,P,B) returns B.  If F is not a NULL pointer and
+** is not a database file pathname pointer that SQLite passed into the xOpen
+** VFS method, then the behavior of this routine is undefined and probably
+** undesirable.
+*/
+SQLITE_API const char *sqlite3_uri_parameter(const char *zFilename, const char *zParam);
+SQLITE_API int sqlite3_uri_boolean(const char *zFile, const char *zParam, int bDefault);
+SQLITE_API sqlite3_int64 sqlite3_uri_int64(const char*, const char*, sqlite3_int64);
+
+
+/*
+** CAPI3REF: Error Codes And Messages
+**
+** ^The sqlite3_errcode() interface returns the numeric [result code] or
+** [extended result code] for the most recent failed sqlite3_* API call
+** associated with a [database connection]. If a prior API call failed
+** but the most recent API call succeeded, the return value from
+** sqlite3_errcode() is undefined.  ^The sqlite3_extended_errcode()
+** interface is the same except that it always returns the 
+** [extended result code] even when extended result codes are
+** disabled.
+**
+** ^The sqlite3_errmsg() and sqlite3_errmsg16() return English-language
+** text that describes the error, as either UTF-8 or UTF-16 respectively.
+** ^(Memory to hold the error message string is managed internally.
+** The application does not need to worry about freeing the result.
+** However, the error string might be overwritten or deallocated by
+** subsequent calls to other SQLite interface functions.)^
+**
+** ^The sqlite3_errstr() interface returns the English-language text
+** that describes the [result code], as UTF-8.
+** ^(Memory to hold the error message string is managed internally
+** and must not be freed by the application)^.
+**
+** When the serialized [threading mode] is in use, it might be the
+** case that a second error occurs on a separate thread in between
+** the time of the first error and the call to these interfaces.
+** When that happens, the second error will be reported since these
+** interfaces always report the most recent result.  To avoid
+** this, each thread can obtain exclusive use of the [database connection] D
+** by invoking [sqlite3_mutex_enter]([sqlite3_db_mutex](D)) before beginning
+** to use D and invoking [sqlite3_mutex_leave]([sqlite3_db_mutex](D)) after
+** all calls to the interfaces listed here are completed.
+**
+** If an interface fails with SQLITE_MISUSE, that means the interface
+** was invoked incorrectly by the application.  In that case, the
+** error code and message may or may not be set.
+*/
+SQLITE_API int sqlite3_errcode(sqlite3 *db);
+SQLITE_API int sqlite3_extended_errcode(sqlite3 *db);
+SQLITE_API const char *sqlite3_errmsg(sqlite3*);
+SQLITE_API const void *sqlite3_errmsg16(sqlite3*);
+SQLITE_API const char *sqlite3_errstr(int);
+
+/*
+** CAPI3REF: SQL Statement Object
+** KEYWORDS: {prepared statement} {prepared statements}
+**
+** An instance of this object represents a single SQL statement.
+** This object is variously known as a "prepared statement" or a
+** "compiled SQL statement" or simply as a "statement".
+**
+** The life of a statement object goes something like this:
+**
+** <ol>
+** <li> Create the object using [sqlite3_prepare_v2()] or a related
+**      function.
+** <li> Bind values to [host parameters] using the sqlite3_bind_*()
+**      interfaces.
+** <li> Run the SQL by calling [sqlite3_step()] one or more times.
+** <li> Reset the statement using [sqlite3_reset()] then go back
+**      to step 2.  Do this zero or more times.
+** <li> Destroy the object using [sqlite3_finalize()].
+** </ol>
+**
+** Refer to documentation on individual methods above for additional
+** information.
+*/
+typedef struct sqlite3_stmt sqlite3_stmt;
+
+/*
+** CAPI3REF: Run-time Limits
+**
+** ^(This interface allows the size of various constructs to be limited
+** on a connection by connection basis.  The first parameter is the
+** [database connection] whose limit is to be set or queried.  The
+** second parameter is one of the [limit categories] that define a
+** class of constructs to be size limited.  The third parameter is the
+** new limit for that construct.)^
+**
+** ^If the new limit is a negative number, the limit is unchanged.
+** ^(For each limit category SQLITE_LIMIT_<i>NAME</i> there is a 
+** [limits | hard upper bound]
+** set at compile-time by a C preprocessor macro called
+** [limits | SQLITE_MAX_<i>NAME</i>].
+** (The "_LIMIT_" in the name is changed to "_MAX_".))^
+** ^Attempts to increase a limit above its hard upper bound are
+** silently truncated to the hard upper bound.
+**
+** ^Regardless of whether or not the limit was changed, the 
+** [sqlite3_limit()] interface returns the prior value of the limit.
+** ^Hence, to find the current value of a limit without changing it,
+** simply invoke this interface with the third parameter set to -1.
+**
+** Run-time limits are intended for use in applications that manage
+** both their own internal database and also databases that are controlled
+** by untrusted external sources.  An example application might be a
+** web browser that has its own databases for storing history and
+** separate databases controlled by JavaScript applications downloaded
+** off the Internet.  The internal databases can be given the
+** large, default limits.  Databases managed by external sources can
+** be given much smaller limits designed to prevent a denial of service
+** attack.  Developers might also want to use the [sqlite3_set_authorizer()]
+** interface to further control untrusted SQL.  The size of the database
+** created by an untrusted script can be contained using the
+** [max_page_count] [PRAGMA].
+**
+** New run-time limit categories may be added in future releases.
+*/
+SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal);
+
+/*
+** CAPI3REF: Run-Time Limit Categories
+** KEYWORDS: {limit category} {*limit categories}
+**
+** These constants define various performance limits
+** that can be lowered at run-time using [sqlite3_limit()].
+** The synopsis of the meanings of the various limits is shown below.
+** Additional information is available at [limits | Limits in SQLite].
+**
+** <dl>
+** [[SQLITE_LIMIT_LENGTH]] ^(<dt>SQLITE_LIMIT_LENGTH</dt>
+** <dd>The maximum size of any string or BLOB or table row, in bytes.<dd>)^
+**
+** [[SQLITE_LIMIT_SQL_LENGTH]] ^(<dt>SQLITE_LIMIT_SQL_LENGTH</dt>
+** <dd>The maximum length of an SQL statement, in bytes.</dd>)^
+**
+** [[SQLITE_LIMIT_COLUMN]] ^(<dt>SQLITE_LIMIT_COLUMN</dt>
+** <dd>The maximum number of columns in a table definition or in the
+** result set of a [SELECT] or the maximum number of columns in an index
+** or in an ORDER BY or GROUP BY clause.</dd>)^
+**
+** [[SQLITE_LIMIT_EXPR_DEPTH]] ^(<dt>SQLITE_LIMIT_EXPR_DEPTH</dt>
+** <dd>The maximum depth of the parse tree on any expression.</dd>)^
+**
+** [[SQLITE_LIMIT_COMPOUND_SELECT]] ^(<dt>SQLITE_LIMIT_COMPOUND_SELECT</dt>
+** <dd>The maximum number of terms in a compound SELECT statement.</dd>)^
+**
+** [[SQLITE_LIMIT_VDBE_OP]] ^(<dt>SQLITE_LIMIT_VDBE_OP</dt>
+** <dd>The maximum number of instructions in a virtual machine program
+** used to implement an SQL statement.  This limit is not currently
+** enforced, though that might be added in some future release of
+** SQLite.</dd>)^
+**
+** [[SQLITE_LIMIT_FUNCTION_ARG]] ^(<dt>SQLITE_LIMIT_FUNCTION_ARG</dt>
+** <dd>The maximum number of arguments on a function.</dd>)^
+**
+** [[SQLITE_LIMIT_ATTACHED]] ^(<dt>SQLITE_LIMIT_ATTACHED</dt>
+** <dd>The maximum number of [ATTACH | attached databases].)^</dd>
+**
+** [[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]]
+** ^(<dt>SQLITE_LIMIT_LIKE_PATTERN_LENGTH</dt>
+** <dd>The maximum length of the pattern argument to the [LIKE] or
+** [GLOB] operators.</dd>)^
+**
+** [[SQLITE_LIMIT_VARIABLE_NUMBER]]
+** ^(<dt>SQLITE_LIMIT_VARIABLE_NUMBER</dt>
+** <dd>The maximum index number of any [parameter] in an SQL statement.)^
+**
+** [[SQLITE_LIMIT_TRIGGER_DEPTH]] ^(<dt>SQLITE_LIMIT_TRIGGER_DEPTH</dt>
+** <dd>The maximum depth of recursion for triggers.</dd>)^
+**
+** [[SQLITE_LIMIT_WORKER_THREADS]] ^(<dt>SQLITE_LIMIT_WORKER_THREADS</dt>
+** <dd>The maximum number of auxiliary worker threads that a single
+** [prepared statement] may start.</dd>)^
+** </dl>
+*/
+#define SQLITE_LIMIT_LENGTH                    0
+#define SQLITE_LIMIT_SQL_LENGTH                1
+#define SQLITE_LIMIT_COLUMN                    2
+#define SQLITE_LIMIT_EXPR_DEPTH                3
+#define SQLITE_LIMIT_COMPOUND_SELECT           4
+#define SQLITE_LIMIT_VDBE_OP                   5
+#define SQLITE_LIMIT_FUNCTION_ARG              6
+#define SQLITE_LIMIT_ATTACHED                  7
+#define SQLITE_LIMIT_LIKE_PATTERN_LENGTH       8
+#define SQLITE_LIMIT_VARIABLE_NUMBER           9
+#define SQLITE_LIMIT_TRIGGER_DEPTH            10
+#define SQLITE_LIMIT_WORKER_THREADS           11
+
+/*
+** CAPI3REF: Compiling An SQL Statement
+** KEYWORDS: {SQL statement compiler}
+**
+** To execute an SQL query, it must first be compiled into a byte-code
+** program using one of these routines.
+**
+** The first argument, "db", is a [database connection] obtained from a
+** prior successful call to [sqlite3_open()], [sqlite3_open_v2()] or
+** [sqlite3_open16()].  The database connection must not have been closed.
+**
+** The second argument, "zSql", is the statement to be compiled, encoded
+** as either UTF-8 or UTF-16.  The sqlite3_prepare() and sqlite3_prepare_v2()
+** interfaces use UTF-8, and sqlite3_prepare16() and sqlite3_prepare16_v2()
+** use UTF-16.
+**
+** ^If the nByte argument is less than zero, then zSql is read up to the
+** first zero terminator. ^If nByte is non-negative, then it is the maximum
+** number of  bytes read from zSql.  ^When nByte is non-negative, the
+** zSql string ends at either the first '\000' or '\u0000' character or
+** the nByte-th byte, whichever comes first. If the caller knows
+** that the supplied string is nul-terminated, then there is a small
+** performance advantage to be gained by passing an nByte parameter that
+** is equal to the number of bytes in the input string <i>including</i>
+** the nul-terminator bytes as this saves SQLite from having to
+** make a copy of the input string.
+**
+** ^If pzTail is not NULL then *pzTail is made to point to the first byte
+** past the end of the first SQL statement in zSql.  These routines only
+** compile the first statement in zSql, so *pzTail is left pointing to
+** what remains uncompiled.
+**
+** ^*ppStmt is left pointing to a compiled [prepared statement] that can be
+** executed using [sqlite3_step()].  ^If there is an error, *ppStmt is set
+** to NULL.  ^If the input text contains no SQL (if the input is an empty
+** string or a comment) then *ppStmt is set to NULL.
+** The calling procedure is responsible for deleting the compiled
+** SQL statement using [sqlite3_finalize()] after it has finished with it.
+** ppStmt may not be NULL.
+**
+** ^On success, the sqlite3_prepare() family of routines return [SQLITE_OK];
+** otherwise an [error code] is returned.
+**
+** The sqlite3_prepare_v2() and sqlite3_prepare16_v2() interfaces are
+** recommended for all new programs. The two older interfaces are retained
+** for backwards compatibility, but their use is discouraged.
+** ^In the "v2" interfaces, the prepared statement
+** that is returned (the [sqlite3_stmt] object) contains a copy of the
+** original SQL text. This causes the [sqlite3_step()] interface to
+** behave differently in three ways:
+**
+** <ol>
+** <li>
+** ^If the database schema changes, instead of returning [SQLITE_SCHEMA] as it
+** always used to do, [sqlite3_step()] will automatically recompile the SQL
+** statement and try to run it again. As many as [SQLITE_MAX_SCHEMA_RETRY]
+** retries will occur before sqlite3_step() gives up and returns an error.
+** </li>
+**
+** <li>
+** ^When an error occurs, [sqlite3_step()] will return one of the detailed
+** [error codes] or [extended error codes].  ^The legacy behavior was that
+** [sqlite3_step()] would only return a generic [SQLITE_ERROR] result code
+** and the application would have to make a second call to [sqlite3_reset()]
+** in order to find the underlying cause of the problem. With the "v2" prepare
+** interfaces, the underlying reason for the error is returned immediately.
+** </li>
+**
+** <li>
+** ^If the specific value bound to [parameter | host parameter] in the 
+** WHERE clause might influence the choice of query plan for a statement,
+** then the statement will be automatically recompiled, as if there had been 
+** a schema change, on the first  [sqlite3_step()] call following any change
+** to the [sqlite3_bind_text | bindings] of that [parameter]. 
+** ^The specific value of WHERE-clause [parameter] might influence the 
+** choice of query plan if the parameter is the left-hand side of a [LIKE]
+** or [GLOB] operator or if the parameter is compared to an indexed column
+** and the [SQLITE_ENABLE_STAT3] compile-time option is enabled.
+** </li>
+** </ol>
+*/
+SQLITE_API int sqlite3_prepare(
+  sqlite3 *db,            /* Database handle */
+  const char *zSql,       /* SQL statement, UTF-8 encoded */
+  int nByte,              /* Maximum length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,  /* OUT: Statement handle */
+  const char **pzTail     /* OUT: Pointer to unused portion of zSql */
+);
+SQLITE_API int sqlite3_prepare_v2(
+  sqlite3 *db,            /* Database handle */
+  const char *zSql,       /* SQL statement, UTF-8 encoded */
+  int nByte,              /* Maximum length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,  /* OUT: Statement handle */
+  const char **pzTail     /* OUT: Pointer to unused portion of zSql */
+);
+SQLITE_API int sqlite3_prepare16(
+  sqlite3 *db,            /* Database handle */
+  const void *zSql,       /* SQL statement, UTF-16 encoded */
+  int nByte,              /* Maximum length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,  /* OUT: Statement handle */
+  const void **pzTail     /* OUT: Pointer to unused portion of zSql */
+);
+SQLITE_API int sqlite3_prepare16_v2(
+  sqlite3 *db,            /* Database handle */
+  const void *zSql,       /* SQL statement, UTF-16 encoded */
+  int nByte,              /* Maximum length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,  /* OUT: Statement handle */
+  const void **pzTail     /* OUT: Pointer to unused portion of zSql */
+);
+
+/*
+** CAPI3REF: Retrieving Statement SQL
+**
+** ^This interface can be used to retrieve a saved copy of the original
+** SQL text used to create a [prepared statement] if that statement was
+** compiled using either [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()].
+*/
+SQLITE_API const char *sqlite3_sql(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Determine If An SQL Statement Writes The Database
+**
+** ^The sqlite3_stmt_readonly(X) interface returns true (non-zero) if
+** and only if the [prepared statement] X makes no direct changes to
+** the content of the database file.
+**
+** Note that [application-defined SQL functions] or
+** [virtual tables] might change the database indirectly as a side effect.  
+** ^(For example, if an application defines a function "eval()" that 
+** calls [sqlite3_exec()], then the following SQL statement would
+** change the database file through side-effects:
+**
+** <blockquote><pre>
+**    SELECT eval('DELETE FROM t1') FROM t2;
+** </pre></blockquote>
+**
+** But because the [SELECT] statement does not change the database file
+** directly, sqlite3_stmt_readonly() would still return true.)^
+**
+** ^Transaction control statements such as [BEGIN], [COMMIT], [ROLLBACK],
+** [SAVEPOINT], and [RELEASE] cause sqlite3_stmt_readonly() to return true,
+** since the statements themselves do not actually modify the database but
+** rather they control the timing of when other statements modify the 
+** database.  ^The [ATTACH] and [DETACH] statements also cause
+** sqlite3_stmt_readonly() to return true since, while those statements
+** change the configuration of a database connection, they do not make 
+** changes to the content of the database files on disk.
+*/
+SQLITE_API int sqlite3_stmt_readonly(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Determine If A Prepared Statement Has Been Reset
+**
+** ^The sqlite3_stmt_busy(S) interface returns true (non-zero) if the
+** [prepared statement] S has been stepped at least once using 
+** [sqlite3_step(S)] but has not run to completion and/or has not 
+** been reset using [sqlite3_reset(S)].  ^The sqlite3_stmt_busy(S)
+** interface returns false if S is a NULL pointer.  If S is not a 
+** NULL pointer and is not a pointer to a valid [prepared statement]
+** object, then the behavior is undefined and probably undesirable.
+**
+** This interface can be used in combination [sqlite3_next_stmt()]
+** to locate all prepared statements associated with a database 
+** connection that are in need of being reset.  This can be used,
+** for example, in diagnostic routines to search for prepared 
+** statements that are holding a transaction open.
+*/
+SQLITE_API int sqlite3_stmt_busy(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Dynamically Typed Value Object
+** KEYWORDS: {protected sqlite3_value} {unprotected sqlite3_value}
+**
+** SQLite uses the sqlite3_value object to represent all values
+** that can be stored in a database table. SQLite uses dynamic typing
+** for the values it stores.  ^Values stored in sqlite3_value objects
+** can be integers, floating point values, strings, BLOBs, or NULL.
+**
+** An sqlite3_value object may be either "protected" or "unprotected".
+** Some interfaces require a protected sqlite3_value.  Other interfaces
+** will accept either a protected or an unprotected sqlite3_value.
+** Every interface that accepts sqlite3_value arguments specifies
+** whether or not it requires a protected sqlite3_value.
+**
+** The terms "protected" and "unprotected" refer to whether or not
+** a mutex is held.  An internal mutex is held for a protected
+** sqlite3_value object but no mutex is held for an unprotected
+** sqlite3_value object.  If SQLite is compiled to be single-threaded
+** (with [SQLITE_THREADSAFE=0] and with [sqlite3_threadsafe()] returning 0)
+** or if SQLite is run in one of reduced mutex modes 
+** [SQLITE_CONFIG_SINGLETHREAD] or [SQLITE_CONFIG_MULTITHREAD]
+** then there is no distinction between protected and unprotected
+** sqlite3_value objects and they can be used interchangeably.  However,
+** for maximum code portability it is recommended that applications
+** still make the distinction between protected and unprotected
+** sqlite3_value objects even when not strictly required.
+**
+** ^The sqlite3_value objects that are passed as parameters into the
+** implementation of [application-defined SQL functions] are protected.
+** ^The sqlite3_value object returned by
+** [sqlite3_column_value()] is unprotected.
+** Unprotected sqlite3_value objects may only be used with
+** [sqlite3_result_value()] and [sqlite3_bind_value()].
+** The [sqlite3_value_blob | sqlite3_value_type()] family of
+** interfaces require protected sqlite3_value objects.
+*/
+typedef struct Mem sqlite3_value;
+
+/*
+** CAPI3REF: SQL Function Context Object
+**
+** The context in which an SQL function executes is stored in an
+** sqlite3_context object.  ^A pointer to an sqlite3_context object
+** is always first parameter to [application-defined SQL functions].
+** The application-defined SQL function implementation will pass this
+** pointer through into calls to [sqlite3_result_int | sqlite3_result()],
+** [sqlite3_aggregate_context()], [sqlite3_user_data()],
+** [sqlite3_context_db_handle()], [sqlite3_get_auxdata()],
+** and/or [sqlite3_set_auxdata()].
+*/
+typedef struct sqlite3_context sqlite3_context;
+
+/*
+** CAPI3REF: Binding Values To Prepared Statements
+** KEYWORDS: {host parameter} {host parameters} {host parameter name}
+** KEYWORDS: {SQL parameter} {SQL parameters} {parameter binding}
+**
+** ^(In the SQL statement text input to [sqlite3_prepare_v2()] and its variants,
+** literals may be replaced by a [parameter] that matches one of following
+** templates:
+**
+** <ul>
+** <li>  ?
+** <li>  ?NNN
+** <li>  :VVV
+** <li>  @VVV
+** <li>  $VVV
+** </ul>
+**
+** In the templates above, NNN represents an integer literal,
+** and VVV represents an alphanumeric identifier.)^  ^The values of these
+** parameters (also called "host parameter names" or "SQL parameters")
+** can be set using the sqlite3_bind_*() routines defined here.
+**
+** ^The first argument to the sqlite3_bind_*() routines is always
+** a pointer to the [sqlite3_stmt] object returned from
+** [sqlite3_prepare_v2()] or its variants.
+**
+** ^The second argument is the index of the SQL parameter to be set.
+** ^The leftmost SQL parameter has an index of 1.  ^When the same named
+** SQL parameter is used more than once, second and subsequent
+** occurrences have the same index as the first occurrence.
+** ^The index for named parameters can be looked up using the
+** [sqlite3_bind_parameter_index()] API if desired.  ^The index
+** for "?NNN" parameters is the value of NNN.
+** ^The NNN value must be between 1 and the [sqlite3_limit()]
+** parameter [SQLITE_LIMIT_VARIABLE_NUMBER] (default value: 999).
+**
+** ^The third argument is the value to bind to the parameter.
+** ^If the third parameter to sqlite3_bind_text() or sqlite3_bind_text16()
+** or sqlite3_bind_blob() is a NULL pointer then the fourth parameter
+** is ignored and the end result is the same as sqlite3_bind_null().
+**
+** ^(In those routines that have a fourth argument, its value is the
+** number of bytes in the parameter.  To be clear: the value is the
+** number of <u>bytes</u> in the value, not the number of characters.)^
+** ^If the fourth parameter to sqlite3_bind_text() or sqlite3_bind_text16()
+** is negative, then the length of the string is
+** the number of bytes up to the first zero terminator.
+** If the fourth parameter to sqlite3_bind_blob() is negative, then
+** the behavior is undefined.
+** If a non-negative fourth parameter is provided to sqlite3_bind_text()
+** or sqlite3_bind_text16() or sqlite3_bind_text64() then
+** that parameter must be the byte offset
+** where the NUL terminator would occur assuming the string were NUL
+** terminated.  If any NUL characters occur at byte offsets less than 
+** the value of the fourth parameter then the resulting string value will
+** contain embedded NULs.  The result of expressions involving strings
+** with embedded NULs is undefined.
+**
+** ^The fifth argument to the BLOB and string binding interfaces
+** is a destructor used to dispose of the BLOB or
+** string after SQLite has finished with it.  ^The destructor is called
+** to dispose of the BLOB or string even if the call to bind API fails.
+** ^If the fifth argument is
+** the special value [SQLITE_STATIC], then SQLite assumes that the
+** information is in static, unmanaged space and does not need to be freed.
+** ^If the fifth argument has the value [SQLITE_TRANSIENT], then
+** SQLite makes its own private copy of the data immediately, before
+** the sqlite3_bind_*() routine returns.
+**
+** ^The sixth argument to sqlite3_bind_text64() must be one of
+** [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE]
+** to specify the encoding of the text in the third parameter.  If
+** the sixth argument to sqlite3_bind_text64() is not one of the
+** allowed values shown above, or if the text encoding is different
+** from the encoding specified by the sixth parameter, then the behavior
+** is undefined.
+**
+** ^The sqlite3_bind_zeroblob() routine binds a BLOB of length N that
+** is filled with zeroes.  ^A zeroblob uses a fixed amount of memory
+** (just an integer to hold its size) while it is being processed.
+** Zeroblobs are intended to serve as placeholders for BLOBs whose
+** content is later written using
+** [sqlite3_blob_open | incremental BLOB I/O] routines.
+** ^A negative value for the zeroblob results in a zero-length BLOB.
+**
+** ^If any of the sqlite3_bind_*() routines are called with a NULL pointer
+** for the [prepared statement] or with a prepared statement for which
+** [sqlite3_step()] has been called more recently than [sqlite3_reset()],
+** then the call will return [SQLITE_MISUSE].  If any sqlite3_bind_()
+** routine is passed a [prepared statement] that has been finalized, the
+** result is undefined and probably harmful.
+**
+** ^Bindings are not cleared by the [sqlite3_reset()] routine.
+** ^Unbound parameters are interpreted as NULL.
+**
+** ^The sqlite3_bind_* routines return [SQLITE_OK] on success or an
+** [error code] if anything goes wrong.
+** ^[SQLITE_TOOBIG] might be returned if the size of a string or BLOB
+** exceeds limits imposed by [sqlite3_limit]([SQLITE_LIMIT_LENGTH]) or
+** [SQLITE_MAX_LENGTH].
+** ^[SQLITE_RANGE] is returned if the parameter
+** index is out of range.  ^[SQLITE_NOMEM] is returned if malloc() fails.
+**
+** See also: [sqlite3_bind_parameter_count()],
+** [sqlite3_bind_parameter_name()], and [sqlite3_bind_parameter_index()].
+*/
+SQLITE_API int sqlite3_bind_blob(sqlite3_stmt*, int, const void*, int n, void(*)(void*));
+SQLITE_API int sqlite3_bind_blob64(sqlite3_stmt*, int, const void*, sqlite3_uint64,
+                        void(*)(void*));
+SQLITE_API int sqlite3_bind_double(sqlite3_stmt*, int, double);
+SQLITE_API int sqlite3_bind_int(sqlite3_stmt*, int, int);
+SQLITE_API int sqlite3_bind_int64(sqlite3_stmt*, int, sqlite3_int64);
+SQLITE_API int sqlite3_bind_null(sqlite3_stmt*, int);
+SQLITE_API int sqlite3_bind_text(sqlite3_stmt*,int,const char*,int,void(*)(void*));
+SQLITE_API int sqlite3_bind_text16(sqlite3_stmt*, int, const void*, int, void(*)(void*));
+SQLITE_API int sqlite3_bind_text64(sqlite3_stmt*, int, const char*, sqlite3_uint64,
+                         void(*)(void*), unsigned char encoding);
+SQLITE_API int sqlite3_bind_value(sqlite3_stmt*, int, const sqlite3_value*);
+SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt*, int, int n);
+
+/*
+** CAPI3REF: Number Of SQL Parameters
+**
+** ^This routine can be used to find the number of [SQL parameters]
+** in a [prepared statement].  SQL parameters are tokens of the
+** form "?", "?NNN", ":AAA", "$AAA", or "@AAA" that serve as
+** placeholders for values that are [sqlite3_bind_blob | bound]
+** to the parameters at a later time.
+**
+** ^(This routine actually returns the index of the largest (rightmost)
+** parameter. For all forms except ?NNN, this will correspond to the
+** number of unique parameters.  If parameters of the ?NNN form are used,
+** there may be gaps in the list.)^
+**
+** See also: [sqlite3_bind_blob|sqlite3_bind()],
+** [sqlite3_bind_parameter_name()], and
+** [sqlite3_bind_parameter_index()].
+*/
+SQLITE_API int sqlite3_bind_parameter_count(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Name Of A Host Parameter
+**
+** ^The sqlite3_bind_parameter_name(P,N) interface returns
+** the name of the N-th [SQL parameter] in the [prepared statement] P.
+** ^(SQL parameters of the form "?NNN" or ":AAA" or "@AAA" or "$AAA"
+** have a name which is the string "?NNN" or ":AAA" or "@AAA" or "$AAA"
+** respectively.
+** In other words, the initial ":" or "$" or "@" or "?"
+** is included as part of the name.)^
+** ^Parameters of the form "?" without a following integer have no name
+** and are referred to as "nameless" or "anonymous parameters".
+**
+** ^The first host parameter has an index of 1, not 0.
+**
+** ^If the value N is out of range or if the N-th parameter is
+** nameless, then NULL is returned.  ^The returned string is
+** always in UTF-8 encoding even if the named parameter was
+** originally specified as UTF-16 in [sqlite3_prepare16()] or
+** [sqlite3_prepare16_v2()].
+**
+** See also: [sqlite3_bind_blob|sqlite3_bind()],
+** [sqlite3_bind_parameter_count()], and
+** [sqlite3_bind_parameter_index()].
+*/
+SQLITE_API const char *sqlite3_bind_parameter_name(sqlite3_stmt*, int);
+
+/*
+** CAPI3REF: Index Of A Parameter With A Given Name
+**
+** ^Return the index of an SQL parameter given its name.  ^The
+** index value returned is suitable for use as the second
+** parameter to [sqlite3_bind_blob|sqlite3_bind()].  ^A zero
+** is returned if no matching parameter is found.  ^The parameter
+** name must be given in UTF-8 even if the original statement
+** was prepared from UTF-16 text using [sqlite3_prepare16_v2()].
+**
+** See also: [sqlite3_bind_blob|sqlite3_bind()],
+** [sqlite3_bind_parameter_count()], and
+** [sqlite3_bind_parameter_index()].
+*/
+SQLITE_API int sqlite3_bind_parameter_index(sqlite3_stmt*, const char *zName);
+
+/*
+** CAPI3REF: Reset All Bindings On A Prepared Statement
+**
+** ^Contrary to the intuition of many, [sqlite3_reset()] does not reset
+** the [sqlite3_bind_blob | bindings] on a [prepared statement].
+** ^Use this routine to reset all host parameters to NULL.
+*/
+SQLITE_API int sqlite3_clear_bindings(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Number Of Columns In A Result Set
+**
+** ^Return the number of columns in the result set returned by the
+** [prepared statement]. ^This routine returns 0 if pStmt is an SQL
+** statement that does not return data (for example an [UPDATE]).
+**
+** See also: [sqlite3_data_count()]
+*/
+SQLITE_API int sqlite3_column_count(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Column Names In A Result Set
+**
+** ^These routines return the name assigned to a particular column
+** in the result set of a [SELECT] statement.  ^The sqlite3_column_name()
+** interface returns a pointer to a zero-terminated UTF-8 string
+** and sqlite3_column_name16() returns a pointer to a zero-terminated
+** UTF-16 string.  ^The first parameter is the [prepared statement]
+** that implements the [SELECT] statement. ^The second parameter is the
+** column number.  ^The leftmost column is number 0.
+**
+** ^The returned string pointer is valid until either the [prepared statement]
+** is destroyed by [sqlite3_finalize()] or until the statement is automatically
+** reprepared by the first call to [sqlite3_step()] for a particular run
+** or until the next call to
+** sqlite3_column_name() or sqlite3_column_name16() on the same column.
+**
+** ^If sqlite3_malloc() fails during the processing of either routine
+** (for example during a conversion from UTF-8 to UTF-16) then a
+** NULL pointer is returned.
+**
+** ^The name of a result column is the value of the "AS" clause for
+** that column, if there is an AS clause.  If there is no AS clause
+** then the name of the column is unspecified and may change from
+** one release of SQLite to the next.
+*/
+SQLITE_API const char *sqlite3_column_name(sqlite3_stmt*, int N);
+SQLITE_API const void *sqlite3_column_name16(sqlite3_stmt*, int N);
+
+/*
+** CAPI3REF: Source Of Data In A Query Result
+**
+** ^These routines provide a means to determine the database, table, and
+** table column that is the origin of a particular result column in
+** [SELECT] statement.
+** ^The name of the database or table or column can be returned as
+** either a UTF-8 or UTF-16 string.  ^The _database_ routines return
+** the database name, the _table_ routines return the table name, and
+** the origin_ routines return the column name.
+** ^The returned string is valid until the [prepared statement] is destroyed
+** using [sqlite3_finalize()] or until the statement is automatically
+** reprepared by the first call to [sqlite3_step()] for a particular run
+** or until the same information is requested
+** again in a different encoding.
+**
+** ^The names returned are the original un-aliased names of the
+** database, table, and column.
+**
+** ^The first argument to these interfaces is a [prepared statement].
+** ^These functions return information about the Nth result column returned by
+** the statement, where N is the second function argument.
+** ^The left-most column is column 0 for these routines.
+**
+** ^If the Nth column returned by the statement is an expression or
+** subquery and is not a column value, then all of these functions return
+** NULL.  ^These routine might also return NULL if a memory allocation error
+** occurs.  ^Otherwise, they return the name of the attached database, table,
+** or column that query result column was extracted from.
+**
+** ^As with all other SQLite APIs, those whose names end with "16" return
+** UTF-16 encoded strings and the other functions return UTF-8.
+**
+** ^These APIs are only available if the library was compiled with the
+** [SQLITE_ENABLE_COLUMN_METADATA] C-preprocessor symbol.
+**
+** If two or more threads call one or more of these routines against the same
+** prepared statement and column at the same time then the results are
+** undefined.
+**
+** If two or more threads call one or more
+** [sqlite3_column_database_name | column metadata interfaces]
+** for the same [prepared statement] and result column
+** at the same time then the results are undefined.
+*/
+SQLITE_API const char *sqlite3_column_database_name(sqlite3_stmt*,int);
+SQLITE_API const void *sqlite3_column_database_name16(sqlite3_stmt*,int);
+SQLITE_API const char *sqlite3_column_table_name(sqlite3_stmt*,int);
+SQLITE_API const void *sqlite3_column_table_name16(sqlite3_stmt*,int);
+SQLITE_API const char *sqlite3_column_origin_name(sqlite3_stmt*,int);
+SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt*,int);
+
+/*
+** CAPI3REF: Declared Datatype Of A Query Result
+**
+** ^(The first parameter is a [prepared statement].
+** If this statement is a [SELECT] statement and the Nth column of the
+** returned result set of that [SELECT] is a table column (not an
+** expression or subquery) then the declared type of the table
+** column is returned.)^  ^If the Nth column of the result set is an
+** expression or subquery, then a NULL pointer is returned.
+** ^The returned string is always UTF-8 encoded.
+**
+** ^(For example, given the database schema:
+**
+** CREATE TABLE t1(c1 VARIANT);
+**
+** and the following statement to be compiled:
+**
+** SELECT c1 + 1, c1 FROM t1;
+**
+** this routine would return the string "VARIANT" for the second result
+** column (i==1), and a NULL pointer for the first result column (i==0).)^
+**
+** ^SQLite uses dynamic run-time typing.  ^So just because a column
+** is declared to contain a particular type does not mean that the
+** data stored in that column is of the declared type.  SQLite is
+** strongly typed, but the typing is dynamic not static.  ^Type
+** is associated with individual values, not with the containers
+** used to hold those values.
+*/
+SQLITE_API const char *sqlite3_column_decltype(sqlite3_stmt*,int);
+SQLITE_API const void *sqlite3_column_decltype16(sqlite3_stmt*,int);
+
+/*
+** CAPI3REF: Evaluate An SQL Statement
+**
+** After a [prepared statement] has been prepared using either
+** [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()] or one of the legacy
+** interfaces [sqlite3_prepare()] or [sqlite3_prepare16()], this function
+** must be called one or more times to evaluate the statement.
+**
+** The details of the behavior of the sqlite3_step() interface depend
+** on whether the statement was prepared using the newer "v2" interface
+** [sqlite3_prepare_v2()] and [sqlite3_prepare16_v2()] or the older legacy
+** interface [sqlite3_prepare()] and [sqlite3_prepare16()].  The use of the
+** new "v2" interface is recommended for new applications but the legacy
+** interface will continue to be supported.
+**
+** ^In the legacy interface, the return value will be either [SQLITE_BUSY],
+** [SQLITE_DONE], [SQLITE_ROW], [SQLITE_ERROR], or [SQLITE_MISUSE].
+** ^With the "v2" interface, any of the other [result codes] or
+** [extended result codes] might be returned as well.
+**
+** ^[SQLITE_BUSY] means that the database engine was unable to acquire the
+** database locks it needs to do its job.  ^If the statement is a [COMMIT]
+** or occurs outside of an explicit transaction, then you can retry the
+** statement.  If the statement is not a [COMMIT] and occurs within an
+** explicit transaction then you should rollback the transaction before
+** continuing.
+**
+** ^[SQLITE_DONE] means that the statement has finished executing
+** successfully.  sqlite3_step() should not be called again on this virtual
+** machine without first calling [sqlite3_reset()] to reset the virtual
+** machine back to its initial state.
+**
+** ^If the SQL statement being executed returns any data, then [SQLITE_ROW]
+** is returned each time a new row of data is ready for processing by the
+** caller. The values may be accessed using the [column access functions].
+** sqlite3_step() is called again to retrieve the next row of data.
+**
+** ^[SQLITE_ERROR] means that a run-time error (such as a constraint
+** violation) has occurred.  sqlite3_step() should not be called again on
+** the VM. More information may be found by calling [sqlite3_errmsg()].
+** ^With the legacy interface, a more specific error code (for example,
+** [SQLITE_INTERRUPT], [SQLITE_SCHEMA], [SQLITE_CORRUPT], and so forth)
+** can be obtained by calling [sqlite3_reset()] on the
+** [prepared statement].  ^In the "v2" interface,
+** the more specific error code is returned directly by sqlite3_step().
+**
+** [SQLITE_MISUSE] means that the this routine was called inappropriately.
+** Perhaps it was called on a [prepared statement] that has
+** already been [sqlite3_finalize | finalized] or on one that had
+** previously returned [SQLITE_ERROR] or [SQLITE_DONE].  Or it could
+** be the case that the same database connection is being used by two or
+** more threads at the same moment in time.
+**
+** For all versions of SQLite up to and including 3.6.23.1, a call to
+** [sqlite3_reset()] was required after sqlite3_step() returned anything
+** other than [SQLITE_ROW] before any subsequent invocation of
+** sqlite3_step().  Failure to reset the prepared statement using 
+** [sqlite3_reset()] would result in an [SQLITE_MISUSE] return from
+** sqlite3_step().  But after version 3.6.23.1, sqlite3_step() began
+** calling [sqlite3_reset()] automatically in this circumstance rather
+** than returning [SQLITE_MISUSE].  This is not considered a compatibility
+** break because any application that ever receives an SQLITE_MISUSE error
+** is broken by definition.  The [SQLITE_OMIT_AUTORESET] compile-time option
+** can be used to restore the legacy behavior.
+**
+** <b>Goofy Interface Alert:</b> In the legacy interface, the sqlite3_step()
+** API always returns a generic error code, [SQLITE_ERROR], following any
+** error other than [SQLITE_BUSY] and [SQLITE_MISUSE].  You must call
+** [sqlite3_reset()] or [sqlite3_finalize()] in order to find one of the
+** specific [error codes] that better describes the error.
+** We admit that this is a goofy design.  The problem has been fixed
+** with the "v2" interface.  If you prepare all of your SQL statements
+** using either [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()] instead
+** of the legacy [sqlite3_prepare()] and [sqlite3_prepare16()] interfaces,
+** then the more specific [error codes] are returned directly
+** by sqlite3_step().  The use of the "v2" interface is recommended.
+*/
+SQLITE_API int sqlite3_step(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Number of columns in a result set
+**
+** ^The sqlite3_data_count(P) interface returns the number of columns in the
+** current row of the result set of [prepared statement] P.
+** ^If prepared statement P does not have results ready to return
+** (via calls to the [sqlite3_column_int | sqlite3_column_*()] of
+** interfaces) then sqlite3_data_count(P) returns 0.
+** ^The sqlite3_data_count(P) routine also returns 0 if P is a NULL pointer.
+** ^The sqlite3_data_count(P) routine returns 0 if the previous call to
+** [sqlite3_step](P) returned [SQLITE_DONE].  ^The sqlite3_data_count(P)
+** will return non-zero if previous call to [sqlite3_step](P) returned
+** [SQLITE_ROW], except in the case of the [PRAGMA incremental_vacuum]
+** where it always returns zero since each step of that multi-step
+** pragma returns 0 columns of data.
+**
+** See also: [sqlite3_column_count()]
+*/
+SQLITE_API int sqlite3_data_count(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Fundamental Datatypes
+** KEYWORDS: SQLITE_TEXT
+**
+** ^(Every value in SQLite has one of five fundamental datatypes:
+**
+** <ul>
+** <li> 64-bit signed integer
+** <li> 64-bit IEEE floating point number
+** <li> string
+** <li> BLOB
+** <li> NULL
+** </ul>)^
+**
+** These constants are codes for each of those types.
+**
+** Note that the SQLITE_TEXT constant was also used in SQLite version 2
+** for a completely different meaning.  Software that links against both
+** SQLite version 2 and SQLite version 3 should use SQLITE3_TEXT, not
+** SQLITE_TEXT.
+*/
+#define SQLITE_INTEGER  1
+#define SQLITE_FLOAT    2
+#define SQLITE_BLOB     4
+#define SQLITE_NULL     5
+#ifdef SQLITE_TEXT
+# undef SQLITE_TEXT
+#else
+# define SQLITE_TEXT     3
+#endif
+#define SQLITE3_TEXT     3
+
+/*
+** CAPI3REF: Result Values From A Query
+** KEYWORDS: {column access functions}
+**
+** These routines form the "result set" interface.
+**
+** ^These routines return information about a single column of the current
+** result row of a query.  ^In every case the first argument is a pointer
+** to the [prepared statement] that is being evaluated (the [sqlite3_stmt*]
+** that was returned from [sqlite3_prepare_v2()] or one of its variants)
+** and the second argument is the index of the column for which information
+** should be returned. ^The leftmost column of the result set has the index 0.
+** ^The number of columns in the result can be determined using
+** [sqlite3_column_count()].
+**
+** If the SQL statement does not currently point to a valid row, or if the
+** column index is out of range, the result is undefined.
+** These routines may only be called when the most recent call to
+** [sqlite3_step()] has returned [SQLITE_ROW] and neither
+** [sqlite3_reset()] nor [sqlite3_finalize()] have been called subsequently.
+** If any of these routines are called after [sqlite3_reset()] or
+** [sqlite3_finalize()] or after [sqlite3_step()] has returned
+** something other than [SQLITE_ROW], the results are undefined.
+** If [sqlite3_step()] or [sqlite3_reset()] or [sqlite3_finalize()]
+** are called from a different thread while any of these routines
+** are pending, then the results are undefined.
+**
+** ^The sqlite3_column_type() routine returns the
+** [SQLITE_INTEGER | datatype code] for the initial data type
+** of the result column.  ^The returned value is one of [SQLITE_INTEGER],
+** [SQLITE_FLOAT], [SQLITE_TEXT], [SQLITE_BLOB], or [SQLITE_NULL].  The value
+** returned by sqlite3_column_type() is only meaningful if no type
+** conversions have occurred as described below.  After a type conversion,
+** the value returned by sqlite3_column_type() is undefined.  Future
+** versions of SQLite may change the behavior of sqlite3_column_type()
+** following a type conversion.
+**
+** ^If the result is a BLOB or UTF-8 string then the sqlite3_column_bytes()
+** routine returns the number of bytes in that BLOB or string.
+** ^If the result is a UTF-16 string, then sqlite3_column_bytes() converts
+** the string to UTF-8 and then returns the number of bytes.
+** ^If the result is a numeric value then sqlite3_column_bytes() uses
+** [sqlite3_snprintf()] to convert that value to a UTF-8 string and returns
+** the number of bytes in that string.
+** ^If the result is NULL, then sqlite3_column_bytes() returns zero.
+**
+** ^If the result is a BLOB or UTF-16 string then the sqlite3_column_bytes16()
+** routine returns the number of bytes in that BLOB or string.
+** ^If the result is a UTF-8 string, then sqlite3_column_bytes16() converts
+** the string to UTF-16 and then returns the number of bytes.
+** ^If the result is a numeric value then sqlite3_column_bytes16() uses
+** [sqlite3_snprintf()] to convert that value to a UTF-16 string and returns
+** the number of bytes in that string.
+** ^If the result is NULL, then sqlite3_column_bytes16() returns zero.
+**
+** ^The values returned by [sqlite3_column_bytes()] and 
+** [sqlite3_column_bytes16()] do not include the zero terminators at the end
+** of the string.  ^For clarity: the values returned by
+** [sqlite3_column_bytes()] and [sqlite3_column_bytes16()] are the number of
+** bytes in the string, not the number of characters.
+**
+** ^Strings returned by sqlite3_column_text() and sqlite3_column_text16(),
+** even empty strings, are always zero-terminated.  ^The return
+** value from sqlite3_column_blob() for a zero-length BLOB is a NULL pointer.
+**
+** ^The object returned by [sqlite3_column_value()] is an
+** [unprotected sqlite3_value] object.  An unprotected sqlite3_value object
+** may only be used with [sqlite3_bind_value()] and [sqlite3_result_value()].
+** If the [unprotected sqlite3_value] object returned by
+** [sqlite3_column_value()] is used in any other way, including calls
+** to routines like [sqlite3_value_int()], [sqlite3_value_text()],
+** or [sqlite3_value_bytes()], then the behavior is undefined.
+**
+** These routines attempt to convert the value where appropriate.  ^For
+** example, if the internal representation is FLOAT and a text result
+** is requested, [sqlite3_snprintf()] is used internally to perform the
+** conversion automatically.  ^(The following table details the conversions
+** that are applied:
+**
+** <blockquote>
+** <table border="1">
+** <tr><th> Internal<br>Type <th> Requested<br>Type <th>  Conversion
+**
+** <tr><td>  NULL    <td> INTEGER   <td> Result is 0
+** <tr><td>  NULL    <td>  FLOAT    <td> Result is 0.0
+** <tr><td>  NULL    <td>   TEXT    <td> Result is a NULL pointer
+** <tr><td>  NULL    <td>   BLOB    <td> Result is a NULL pointer
+** <tr><td> INTEGER  <td>  FLOAT    <td> Convert from integer to float
+** <tr><td> INTEGER  <td>   TEXT    <td> ASCII rendering of the integer
+** <tr><td> INTEGER  <td>   BLOB    <td> Same as INTEGER->TEXT
+** <tr><td>  FLOAT   <td> INTEGER   <td> [CAST] to INTEGER
+** <tr><td>  FLOAT   <td>   TEXT    <td> ASCII rendering of the float
+** <tr><td>  FLOAT   <td>   BLOB    <td> [CAST] to BLOB
+** <tr><td>  TEXT    <td> INTEGER   <td> [CAST] to INTEGER
+** <tr><td>  TEXT    <td>  FLOAT    <td> [CAST] to REAL
+** <tr><td>  TEXT    <td>   BLOB    <td> No change
+** <tr><td>  BLOB    <td> INTEGER   <td> [CAST] to INTEGER
+** <tr><td>  BLOB    <td>  FLOAT    <td> [CAST] to REAL
+** <tr><td>  BLOB    <td>   TEXT    <td> Add a zero terminator if needed
+** </table>
+** </blockquote>)^
+**
+** The table above makes reference to standard C library functions atoi()
+** and atof().  SQLite does not really use these functions.  It has its
+** own equivalent internal routines.  The atoi() and atof() names are
+** used in the table for brevity and because they are familiar to most
+** C programmers.
+**
+** Note that when type conversions occur, pointers returned by prior
+** calls to sqlite3_column_blob(), sqlite3_column_text(), and/or
+** sqlite3_column_text16() may be invalidated.
+** Type conversions and pointer invalidations might occur
+** in the following cases:
+**
+** <ul>
+** <li> The initial content is a BLOB and sqlite3_column_text() or
+**      sqlite3_column_text16() is called.  A zero-terminator might
+**      need to be added to the string.</li>
+** <li> The initial content is UTF-8 text and sqlite3_column_bytes16() or
+**      sqlite3_column_text16() is called.  The content must be converted
+**      to UTF-16.</li>
+** <li> The initial content is UTF-16 text and sqlite3_column_bytes() or
+**      sqlite3_column_text() is called.  The content must be converted
+**      to UTF-8.</li>
+** </ul>
+**
+** ^Conversions between UTF-16be and UTF-16le are always done in place and do
+** not invalidate a prior pointer, though of course the content of the buffer
+** that the prior pointer references will have been modified.  Other kinds
+** of conversion are done in place when it is possible, but sometimes they
+** are not possible and in those cases prior pointers are invalidated.
+**
+** The safest and easiest to remember policy is to invoke these routines
+** in one of the following ways:
+**
+** <ul>
+**  <li>sqlite3_column_text() followed by sqlite3_column_bytes()</li>
+**  <li>sqlite3_column_blob() followed by sqlite3_column_bytes()</li>
+**  <li>sqlite3_column_text16() followed by sqlite3_column_bytes16()</li>
+** </ul>
+**
+** In other words, you should call sqlite3_column_text(),
+** sqlite3_column_blob(), or sqlite3_column_text16() first to force the result
+** into the desired format, then invoke sqlite3_column_bytes() or
+** sqlite3_column_bytes16() to find the size of the result.  Do not mix calls
+** to sqlite3_column_text() or sqlite3_column_blob() with calls to
+** sqlite3_column_bytes16(), and do not mix calls to sqlite3_column_text16()
+** with calls to sqlite3_column_bytes().
+**
+** ^The pointers returned are valid until a type conversion occurs as
+** described above, or until [sqlite3_step()] or [sqlite3_reset()] or
+** [sqlite3_finalize()] is called.  ^The memory space used to hold strings
+** and BLOBs is freed automatically.  Do <b>not</b> pass the pointers returned
+** from [sqlite3_column_blob()], [sqlite3_column_text()], etc. into
+** [sqlite3_free()].
+**
+** ^(If a memory allocation error occurs during the evaluation of any
+** of these routines, a default value is returned.  The default value
+** is either the integer 0, the floating point number 0.0, or a NULL
+** pointer.  Subsequent calls to [sqlite3_errcode()] will return
+** [SQLITE_NOMEM].)^
+*/
+SQLITE_API const void *sqlite3_column_blob(sqlite3_stmt*, int iCol);
+SQLITE_API int sqlite3_column_bytes(sqlite3_stmt*, int iCol);
+SQLITE_API int sqlite3_column_bytes16(sqlite3_stmt*, int iCol);
+SQLITE_API double sqlite3_column_double(sqlite3_stmt*, int iCol);
+SQLITE_API int sqlite3_column_int(sqlite3_stmt*, int iCol);
+SQLITE_API sqlite3_int64 sqlite3_column_int64(sqlite3_stmt*, int iCol);
+SQLITE_API const unsigned char *sqlite3_column_text(sqlite3_stmt*, int iCol);
+SQLITE_API const void *sqlite3_column_text16(sqlite3_stmt*, int iCol);
+SQLITE_API int sqlite3_column_type(sqlite3_stmt*, int iCol);
+SQLITE_API sqlite3_value *sqlite3_column_value(sqlite3_stmt*, int iCol);
+
+/*
+** CAPI3REF: Destroy A Prepared Statement Object
+**
+** ^The sqlite3_finalize() function is called to delete a [prepared statement].
+** ^If the most recent evaluation of the statement encountered no errors
+** or if the statement is never been evaluated, then sqlite3_finalize() returns
+** SQLITE_OK.  ^If the most recent evaluation of statement S failed, then
+** sqlite3_finalize(S) returns the appropriate [error code] or
+** [extended error code].
+**
+** ^The sqlite3_finalize(S) routine can be called at any point during
+** the life cycle of [prepared statement] S:
+** before statement S is ever evaluated, after
+** one or more calls to [sqlite3_reset()], or after any call
+** to [sqlite3_step()] regardless of whether or not the statement has
+** completed execution.
+**
+** ^Invoking sqlite3_finalize() on a NULL pointer is a harmless no-op.
+**
+** The application must finalize every [prepared statement] in order to avoid
+** resource leaks.  It is a grievous error for the application to try to use
+** a prepared statement after it has been finalized.  Any use of a prepared
+** statement after it has been finalized can result in undefined and
+** undesirable behavior such as segfaults and heap corruption.
+*/
+SQLITE_API int sqlite3_finalize(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Reset A Prepared Statement Object
+**
+** The sqlite3_reset() function is called to reset a [prepared statement]
+** object back to its initial state, ready to be re-executed.
+** ^Any SQL statement variables that had values bound to them using
+** the [sqlite3_bind_blob | sqlite3_bind_*() API] retain their values.
+** Use [sqlite3_clear_bindings()] to reset the bindings.
+**
+** ^The [sqlite3_reset(S)] interface resets the [prepared statement] S
+** back to the beginning of its program.
+**
+** ^If the most recent call to [sqlite3_step(S)] for the
+** [prepared statement] S returned [SQLITE_ROW] or [SQLITE_DONE],
+** or if [sqlite3_step(S)] has never before been called on S,
+** then [sqlite3_reset(S)] returns [SQLITE_OK].
+**
+** ^If the most recent call to [sqlite3_step(S)] for the
+** [prepared statement] S indicated an error, then
+** [sqlite3_reset(S)] returns an appropriate [error code].
+**
+** ^The [sqlite3_reset(S)] interface does not change the values
+** of any [sqlite3_bind_blob|bindings] on the [prepared statement] S.
+*/
+SQLITE_API int sqlite3_reset(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Create Or Redefine SQL Functions
+** KEYWORDS: {function creation routines}
+** KEYWORDS: {application-defined SQL function}
+** KEYWORDS: {application-defined SQL functions}
+**
+** ^These functions (collectively known as "function creation routines")
+** are used to add SQL functions or aggregates or to redefine the behavior
+** of existing SQL functions or aggregates.  The only differences between
+** these routines are the text encoding expected for
+** the second parameter (the name of the function being created)
+** and the presence or absence of a destructor callback for
+** the application data pointer.
+**
+** ^The first parameter is the [database connection] to which the SQL
+** function is to be added.  ^If an application uses more than one database
+** connection then application-defined SQL functions must be added
+** to each database connection separately.
+**
+** ^The second parameter is the name of the SQL function to be created or
+** redefined.  ^The length of the name is limited to 255 bytes in a UTF-8
+** representation, exclusive of the zero-terminator.  ^Note that the name
+** length limit is in UTF-8 bytes, not characters nor UTF-16 bytes.  
+** ^Any attempt to create a function with a longer name
+** will result in [SQLITE_MISUSE] being returned.
+**
+** ^The third parameter (nArg)
+** is the number of arguments that the SQL function or
+** aggregate takes. ^If this parameter is -1, then the SQL function or
+** aggregate may take any number of arguments between 0 and the limit
+** set by [sqlite3_limit]([SQLITE_LIMIT_FUNCTION_ARG]).  If the third
+** parameter is less than -1 or greater than 127 then the behavior is
+** undefined.
+**
+** ^The fourth parameter, eTextRep, specifies what
+** [SQLITE_UTF8 | text encoding] this SQL function prefers for
+** its parameters.  The application should set this parameter to
+** [SQLITE_UTF16LE] if the function implementation invokes 
+** [sqlite3_value_text16le()] on an input, or [SQLITE_UTF16BE] if the
+** implementation invokes [sqlite3_value_text16be()] on an input, or
+** [SQLITE_UTF16] if [sqlite3_value_text16()] is used, or [SQLITE_UTF8]
+** otherwise.  ^The same SQL function may be registered multiple times using
+** different preferred text encodings, with different implementations for
+** each encoding.
+** ^When multiple implementations of the same function are available, SQLite
+** will pick the one that involves the least amount of data conversion.
+**
+** ^The fourth parameter may optionally be ORed with [SQLITE_DETERMINISTIC]
+** to signal that the function will always return the same result given
+** the same inputs within a single SQL statement.  Most SQL functions are
+** deterministic.  The built-in [random()] SQL function is an example of a
+** function that is not deterministic.  The SQLite query planner is able to
+** perform additional optimizations on deterministic functions, so use
+** of the [SQLITE_DETERMINISTIC] flag is recommended where possible.
+**
+** ^(The fifth parameter is an arbitrary pointer.  The implementation of the
+** function can gain access to this pointer using [sqlite3_user_data()].)^
+**
+** ^The sixth, seventh and eighth parameters, xFunc, xStep and xFinal, are
+** pointers to C-language functions that implement the SQL function or
+** aggregate. ^A scalar SQL function requires an implementation of the xFunc
+** callback only; NULL pointers must be passed as the xStep and xFinal
+** parameters. ^An aggregate SQL function requires an implementation of xStep
+** and xFinal and NULL pointer must be passed for xFunc. ^To delete an existing
+** SQL function or aggregate, pass NULL pointers for all three function
+** callbacks.
+**
+** ^(If the ninth parameter to sqlite3_create_function_v2() is not NULL,
+** then it is destructor for the application data pointer. 
+** The destructor is invoked when the function is deleted, either by being
+** overloaded or when the database connection closes.)^
+** ^The destructor is also invoked if the call to
+** sqlite3_create_function_v2() fails.
+** ^When the destructor callback of the tenth parameter is invoked, it
+** is passed a single argument which is a copy of the application data 
+** pointer which was the fifth parameter to sqlite3_create_function_v2().
+**
+** ^It is permitted to register multiple implementations of the same
+** functions with the same name but with either differing numbers of
+** arguments or differing preferred text encodings.  ^SQLite will use
+** the implementation that most closely matches the way in which the
+** SQL function is used.  ^A function implementation with a non-negative
+** nArg parameter is a better match than a function implementation with
+** a negative nArg.  ^A function where the preferred text encoding
+** matches the database encoding is a better
+** match than a function where the encoding is different.  
+** ^A function where the encoding difference is between UTF16le and UTF16be
+** is a closer match than a function where the encoding difference is
+** between UTF8 and UTF16.
+**
+** ^Built-in functions may be overloaded by new application-defined functions.
+**
+** ^An application-defined function is permitted to call other
+** SQLite interfaces.  However, such calls must not
+** close the database connection nor finalize or reset the prepared
+** statement in which the function is running.
+*/
+SQLITE_API int sqlite3_create_function(
+  sqlite3 *db,
+  const char *zFunctionName,
+  int nArg,
+  int eTextRep,
+  void *pApp,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+  void (*xFinal)(sqlite3_context*)
+);
+SQLITE_API int sqlite3_create_function16(
+  sqlite3 *db,
+  const void *zFunctionName,
+  int nArg,
+  int eTextRep,
+  void *pApp,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+  void (*xFinal)(sqlite3_context*)
+);
+SQLITE_API int sqlite3_create_function_v2(
+  sqlite3 *db,
+  const char *zFunctionName,
+  int nArg,
+  int eTextRep,
+  void *pApp,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+  void (*xFinal)(sqlite3_context*),
+  void(*xDestroy)(void*)
+);
+
+/*
+** CAPI3REF: Text Encodings
+**
+** These constant define integer codes that represent the various
+** text encodings supported by SQLite.
+*/
+#define SQLITE_UTF8           1    /* IMP: R-37514-35566 */
+#define SQLITE_UTF16LE        2    /* IMP: R-03371-37637 */
+#define SQLITE_UTF16BE        3    /* IMP: R-51971-34154 */
+#define SQLITE_UTF16          4    /* Use native byte order */
+#define SQLITE_ANY            5    /* Deprecated */
+#define SQLITE_UTF16_ALIGNED  8    /* sqlite3_create_collation only */
+
+/*
+** CAPI3REF: Function Flags
+**
+** These constants may be ORed together with the 
+** [SQLITE_UTF8 | preferred text encoding] as the fourth argument
+** to [sqlite3_create_function()], [sqlite3_create_function16()], or
+** [sqlite3_create_function_v2()].
+*/
+#define SQLITE_DETERMINISTIC    0x800
+
+/*
+** CAPI3REF: Deprecated Functions
+** DEPRECATED
+**
+** These functions are [deprecated].  In order to maintain
+** backwards compatibility with older code, these functions continue 
+** to be supported.  However, new applications should avoid
+** the use of these functions.  To help encourage people to avoid
+** using these functions, we are not going to tell you what they do.
+*/
+#ifndef SQLITE_OMIT_DEPRECATED
+SQLITE_API SQLITE_DEPRECATED int sqlite3_aggregate_count(sqlite3_context*);
+SQLITE_API SQLITE_DEPRECATED int sqlite3_expired(sqlite3_stmt*);
+SQLITE_API SQLITE_DEPRECATED int sqlite3_transfer_bindings(sqlite3_stmt*, sqlite3_stmt*);
+SQLITE_API SQLITE_DEPRECATED int sqlite3_global_recover(void);
+SQLITE_API SQLITE_DEPRECATED void sqlite3_thread_cleanup(void);
+SQLITE_API SQLITE_DEPRECATED int sqlite3_memory_alarm(void(*)(void*,sqlite3_int64,int),
+                      void*,sqlite3_int64);
+#endif
+
+/*
+** CAPI3REF: Obtaining SQL Function Parameter Values
+**
+** The C-language implementation of SQL functions and aggregates uses
+** this set of interface routines to access the parameter values on
+** the function or aggregate.
+**
+** The xFunc (for scalar functions) or xStep (for aggregates) parameters
+** to [sqlite3_create_function()] and [sqlite3_create_function16()]
+** define callbacks that implement the SQL functions and aggregates.
+** The 3rd parameter to these callbacks is an array of pointers to
+** [protected sqlite3_value] objects.  There is one [sqlite3_value] object for
+** each parameter to the SQL function.  These routines are used to
+** extract values from the [sqlite3_value] objects.
+**
+** These routines work only with [protected sqlite3_value] objects.
+** Any attempt to use these routines on an [unprotected sqlite3_value]
+** object results in undefined behavior.
+**
+** ^These routines work just like the corresponding [column access functions]
+** except that these routines take a single [protected sqlite3_value] object
+** pointer instead of a [sqlite3_stmt*] pointer and an integer column number.
+**
+** ^The sqlite3_value_text16() interface extracts a UTF-16 string
+** in the native byte-order of the host machine.  ^The
+** sqlite3_value_text16be() and sqlite3_value_text16le() interfaces
+** extract UTF-16 strings as big-endian and little-endian respectively.
+**
+** ^(The sqlite3_value_numeric_type() interface attempts to apply
+** numeric affinity to the value.  This means that an attempt is
+** made to convert the value to an integer or floating point.  If
+** such a conversion is possible without loss of information (in other
+** words, if the value is a string that looks like a number)
+** then the conversion is performed.  Otherwise no conversion occurs.
+** The [SQLITE_INTEGER | datatype] after conversion is returned.)^
+**
+** Please pay particular attention to the fact that the pointer returned
+** from [sqlite3_value_blob()], [sqlite3_value_text()], or
+** [sqlite3_value_text16()] can be invalidated by a subsequent call to
+** [sqlite3_value_bytes()], [sqlite3_value_bytes16()], [sqlite3_value_text()],
+** or [sqlite3_value_text16()].
+**
+** These routines must be called from the same thread as
+** the SQL function that supplied the [sqlite3_value*] parameters.
+*/
+SQLITE_API const void *sqlite3_value_blob(sqlite3_value*);
+SQLITE_API int sqlite3_value_bytes(sqlite3_value*);
+SQLITE_API int sqlite3_value_bytes16(sqlite3_value*);
+SQLITE_API double sqlite3_value_double(sqlite3_value*);
+SQLITE_API int sqlite3_value_int(sqlite3_value*);
+SQLITE_API sqlite3_int64 sqlite3_value_int64(sqlite3_value*);
+SQLITE_API const unsigned char *sqlite3_value_text(sqlite3_value*);
+SQLITE_API const void *sqlite3_value_text16(sqlite3_value*);
+SQLITE_API const void *sqlite3_value_text16le(sqlite3_value*);
+SQLITE_API const void *sqlite3_value_text16be(sqlite3_value*);
+SQLITE_API int sqlite3_value_type(sqlite3_value*);
+SQLITE_API int sqlite3_value_numeric_type(sqlite3_value*);
+
+/*
+** CAPI3REF: Obtain Aggregate Function Context
+**
+** Implementations of aggregate SQL functions use this
+** routine to allocate memory for storing their state.
+**
+** ^The first time the sqlite3_aggregate_context(C,N) routine is called 
+** for a particular aggregate function, SQLite
+** allocates N of memory, zeroes out that memory, and returns a pointer
+** to the new memory. ^On second and subsequent calls to
+** sqlite3_aggregate_context() for the same aggregate function instance,
+** the same buffer is returned.  Sqlite3_aggregate_context() is normally
+** called once for each invocation of the xStep callback and then one
+** last time when the xFinal callback is invoked.  ^(When no rows match
+** an aggregate query, the xStep() callback of the aggregate function
+** implementation is never called and xFinal() is called exactly once.
+** In those cases, sqlite3_aggregate_context() might be called for the
+** first time from within xFinal().)^
+**
+** ^The sqlite3_aggregate_context(C,N) routine returns a NULL pointer 
+** when first called if N is less than or equal to zero or if a memory
+** allocate error occurs.
+**
+** ^(The amount of space allocated by sqlite3_aggregate_context(C,N) is
+** determined by the N parameter on first successful call.  Changing the
+** value of N in subsequent call to sqlite3_aggregate_context() within
+** the same aggregate function instance will not resize the memory
+** allocation.)^  Within the xFinal callback, it is customary to set
+** N=0 in calls to sqlite3_aggregate_context(C,N) so that no 
+** pointless memory allocations occur.
+**
+** ^SQLite automatically frees the memory allocated by 
+** sqlite3_aggregate_context() when the aggregate query concludes.
+**
+** The first parameter must be a copy of the
+** [sqlite3_context | SQL function context] that is the first parameter
+** to the xStep or xFinal callback routine that implements the aggregate
+** function.
+**
+** This routine must be called from the same thread in which
+** the aggregate SQL function is running.
+*/
+SQLITE_API void *sqlite3_aggregate_context(sqlite3_context*, int nBytes);
+
+/*
+** CAPI3REF: User Data For Functions
+**
+** ^The sqlite3_user_data() interface returns a copy of
+** the pointer that was the pUserData parameter (the 5th parameter)
+** of the [sqlite3_create_function()]
+** and [sqlite3_create_function16()] routines that originally
+** registered the application defined function.
+**
+** This routine must be called from the same thread in which
+** the application-defined function is running.
+*/
+SQLITE_API void *sqlite3_user_data(sqlite3_context*);
+
+/*
+** CAPI3REF: Database Connection For Functions
+**
+** ^The sqlite3_context_db_handle() interface returns a copy of
+** the pointer to the [database connection] (the 1st parameter)
+** of the [sqlite3_create_function()]
+** and [sqlite3_create_function16()] routines that originally
+** registered the application defined function.
+*/
+SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context*);
+
+/*
+** CAPI3REF: Function Auxiliary Data
+**
+** These functions may be used by (non-aggregate) SQL functions to
+** associate metadata with argument values. If the same value is passed to
+** multiple invocations of the same SQL function during query execution, under
+** some circumstances the associated metadata may be preserved.  An example
+** of where this might be useful is in a regular-expression matching
+** function. The compiled version of the regular expression can be stored as
+** metadata associated with the pattern string.  
+** Then as long as the pattern string remains the same,
+** the compiled regular expression can be reused on multiple
+** invocations of the same function.
+**
+** ^The sqlite3_get_auxdata() interface returns a pointer to the metadata
+** associated by the sqlite3_set_auxdata() function with the Nth argument
+** value to the application-defined function. ^If there is no metadata
+** associated with the function argument, this sqlite3_get_auxdata() interface
+** returns a NULL pointer.
+**
+** ^The sqlite3_set_auxdata(C,N,P,X) interface saves P as metadata for the N-th
+** argument of the application-defined function.  ^Subsequent
+** calls to sqlite3_get_auxdata(C,N) return P from the most recent
+** sqlite3_set_auxdata(C,N,P,X) call if the metadata is still valid or
+** NULL if the metadata has been discarded.
+** ^After each call to sqlite3_set_auxdata(C,N,P,X) where X is not NULL,
+** SQLite will invoke the destructor function X with parameter P exactly
+** once, when the metadata is discarded.
+** SQLite is free to discard the metadata at any time, including: <ul>
+** <li> when the corresponding function parameter changes, or
+** <li> when [sqlite3_reset()] or [sqlite3_finalize()] is called for the
+**      SQL statement, or
+** <li> when sqlite3_set_auxdata() is invoked again on the same parameter, or
+** <li> during the original sqlite3_set_auxdata() call when a memory 
+**      allocation error occurs. </ul>)^
+**
+** Note the last bullet in particular.  The destructor X in 
+** sqlite3_set_auxdata(C,N,P,X) might be called immediately, before the
+** sqlite3_set_auxdata() interface even returns.  Hence sqlite3_set_auxdata()
+** should be called near the end of the function implementation and the
+** function implementation should not make any use of P after
+** sqlite3_set_auxdata() has been called.
+**
+** ^(In practice, metadata is preserved between function calls for
+** function parameters that are compile-time constants, including literal
+** values and [parameters] and expressions composed from the same.)^
+**
+** These routines must be called from the same thread in which
+** the SQL function is running.
+*/
+SQLITE_API void *sqlite3_get_auxdata(sqlite3_context*, int N);
+SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(void*));
+
+
+/*
+** CAPI3REF: Constants Defining Special Destructor Behavior
+**
+** These are special values for the destructor that is passed in as the
+** final argument to routines like [sqlite3_result_blob()].  ^If the destructor
+** argument is SQLITE_STATIC, it means that the content pointer is constant
+** and will never change.  It does not need to be destroyed.  ^The
+** SQLITE_TRANSIENT value means that the content will likely change in
+** the near future and that SQLite should make its own private copy of
+** the content before returning.
+**
+** The typedef is necessary to work around problems in certain
+** C++ compilers.
+*/
+typedef void (*sqlite3_destructor_type)(void*);
+#define SQLITE_STATIC      ((sqlite3_destructor_type)0)
+#define SQLITE_TRANSIENT   ((sqlite3_destructor_type)-1)
+
+/*
+** CAPI3REF: Setting The Result Of An SQL Function
+**
+** These routines are used by the xFunc or xFinal callbacks that
+** implement SQL functions and aggregates.  See
+** [sqlite3_create_function()] and [sqlite3_create_function16()]
+** for additional information.
+**
+** These functions work very much like the [parameter binding] family of
+** functions used to bind values to host parameters in prepared statements.
+** Refer to the [SQL parameter] documentation for additional information.
+**
+** ^The sqlite3_result_blob() interface sets the result from
+** an application-defined function to be the BLOB whose content is pointed
+** to by the second parameter and which is N bytes long where N is the
+** third parameter.
+**
+** ^The sqlite3_result_zeroblob() interfaces set the result of
+** the application-defined function to be a BLOB containing all zero
+** bytes and N bytes in size, where N is the value of the 2nd parameter.
+**
+** ^The sqlite3_result_double() interface sets the result from
+** an application-defined function to be a floating point value specified
+** by its 2nd argument.
+**
+** ^The sqlite3_result_error() and sqlite3_result_error16() functions
+** cause the implemented SQL function to throw an exception.
+** ^SQLite uses the string pointed to by the
+** 2nd parameter of sqlite3_result_error() or sqlite3_result_error16()
+** as the text of an error message.  ^SQLite interprets the error
+** message string from sqlite3_result_error() as UTF-8. ^SQLite
+** interprets the string from sqlite3_result_error16() as UTF-16 in native
+** byte order.  ^If the third parameter to sqlite3_result_error()
+** or sqlite3_result_error16() is negative then SQLite takes as the error
+** message all text up through the first zero character.
+** ^If the third parameter to sqlite3_result_error() or
+** sqlite3_result_error16() is non-negative then SQLite takes that many
+** bytes (not characters) from the 2nd parameter as the error message.
+** ^The sqlite3_result_error() and sqlite3_result_error16()
+** routines make a private copy of the error message text before
+** they return.  Hence, the calling function can deallocate or
+** modify the text after they return without harm.
+** ^The sqlite3_result_error_code() function changes the error code
+** returned by SQLite as a result of an error in a function.  ^By default,
+** the error code is SQLITE_ERROR.  ^A subsequent call to sqlite3_result_error()
+** or sqlite3_result_error16() resets the error code to SQLITE_ERROR.
+**
+** ^The sqlite3_result_error_toobig() interface causes SQLite to throw an
+** error indicating that a string or BLOB is too long to represent.
+**
+** ^The sqlite3_result_error_nomem() interface causes SQLite to throw an
+** error indicating that a memory allocation failed.
+**
+** ^The sqlite3_result_int() interface sets the return value
+** of the application-defined function to be the 32-bit signed integer
+** value given in the 2nd argument.
+** ^The sqlite3_result_int64() interface sets the return value
+** of the application-defined function to be the 64-bit signed integer
+** value given in the 2nd argument.
+**
+** ^The sqlite3_result_null() interface sets the return value
+** of the application-defined function to be NULL.
+**
+** ^The sqlite3_result_text(), sqlite3_result_text16(),
+** sqlite3_result_text16le(), and sqlite3_result_text16be() interfaces
+** set the return value of the application-defined function to be
+** a text string which is represented as UTF-8, UTF-16 native byte order,
+** UTF-16 little endian, or UTF-16 big endian, respectively.
+** ^The sqlite3_result_text64() interface sets the return value of an
+** application-defined function to be a text string in an encoding
+** specified by the fifth (and last) parameter, which must be one
+** of [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE].
+** ^SQLite takes the text result from the application from
+** the 2nd parameter of the sqlite3_result_text* interfaces.
+** ^If the 3rd parameter to the sqlite3_result_text* interfaces
+** is negative, then SQLite takes result text from the 2nd parameter
+** through the first zero character.
+** ^If the 3rd parameter to the sqlite3_result_text* interfaces
+** is non-negative, then as many bytes (not characters) of the text
+** pointed to by the 2nd parameter are taken as the application-defined
+** function result.  If the 3rd parameter is non-negative, then it
+** must be the byte offset into the string where the NUL terminator would
+** appear if the string where NUL terminated.  If any NUL characters occur
+** in the string at a byte offset that is less than the value of the 3rd
+** parameter, then the resulting string will contain embedded NULs and the
+** result of expressions operating on strings with embedded NULs is undefined.
+** ^If the 4th parameter to the sqlite3_result_text* interfaces
+** or sqlite3_result_blob is a non-NULL pointer, then SQLite calls that
+** function as the destructor on the text or BLOB result when it has
+** finished using that result.
+** ^If the 4th parameter to the sqlite3_result_text* interfaces or to
+** sqlite3_result_blob is the special constant SQLITE_STATIC, then SQLite
+** assumes that the text or BLOB result is in constant space and does not
+** copy the content of the parameter nor call a destructor on the content
+** when it has finished using that result.
+** ^If the 4th parameter to the sqlite3_result_text* interfaces
+** or sqlite3_result_blob is the special constant SQLITE_TRANSIENT
+** then SQLite makes a copy of the result into space obtained from
+** from [sqlite3_malloc()] before it returns.
+**
+** ^The sqlite3_result_value() interface sets the result of
+** the application-defined function to be a copy the
+** [unprotected sqlite3_value] object specified by the 2nd parameter.  ^The
+** sqlite3_result_value() interface makes a copy of the [sqlite3_value]
+** so that the [sqlite3_value] specified in the parameter may change or
+** be deallocated after sqlite3_result_value() returns without harm.
+** ^A [protected sqlite3_value] object may always be used where an
+** [unprotected sqlite3_value] object is required, so either
+** kind of [sqlite3_value] object can be used with this interface.
+**
+** If these routines are called from within the different thread
+** than the one containing the application-defined function that received
+** the [sqlite3_context] pointer, the results are undefined.
+*/
+SQLITE_API void sqlite3_result_blob(sqlite3_context*, const void*, int, void(*)(void*));
+SQLITE_API void sqlite3_result_blob64(sqlite3_context*,const void*,
+                           sqlite3_uint64,void(*)(void*));
+SQLITE_API void sqlite3_result_double(sqlite3_context*, double);
+SQLITE_API void sqlite3_result_error(sqlite3_context*, const char*, int);
+SQLITE_API void sqlite3_result_error16(sqlite3_context*, const void*, int);
+SQLITE_API void sqlite3_result_error_toobig(sqlite3_context*);
+SQLITE_API void sqlite3_result_error_nomem(sqlite3_context*);
+SQLITE_API void sqlite3_result_error_code(sqlite3_context*, int);
+SQLITE_API void sqlite3_result_int(sqlite3_context*, int);
+SQLITE_API void sqlite3_result_int64(sqlite3_context*, sqlite3_int64);
+SQLITE_API void sqlite3_result_null(sqlite3_context*);
+SQLITE_API void sqlite3_result_text(sqlite3_context*, const char*, int, void(*)(void*));
+SQLITE_API void sqlite3_result_text64(sqlite3_context*, const char*,sqlite3_uint64,
+                           void(*)(void*), unsigned char encoding);
+SQLITE_API void sqlite3_result_text16(sqlite3_context*, const void*, int, void(*)(void*));
+SQLITE_API void sqlite3_result_text16le(sqlite3_context*, const void*, int,void(*)(void*));
+SQLITE_API void sqlite3_result_text16be(sqlite3_context*, const void*, int,void(*)(void*));
+SQLITE_API void sqlite3_result_value(sqlite3_context*, sqlite3_value*);
+SQLITE_API void sqlite3_result_zeroblob(sqlite3_context*, int n);
+
+/*
+** CAPI3REF: Define New Collating Sequences
+**
+** ^These functions add, remove, or modify a [collation] associated
+** with the [database connection] specified as the first argument.
+**
+** ^The name of the collation is a UTF-8 string
+** for sqlite3_create_collation() and sqlite3_create_collation_v2()
+** and a UTF-16 string in native byte order for sqlite3_create_collation16().
+** ^Collation names that compare equal according to [sqlite3_strnicmp()] are
+** considered to be the same name.
+**
+** ^(The third argument (eTextRep) must be one of the constants:
+** <ul>
+** <li> [SQLITE_UTF8],
+** <li> [SQLITE_UTF16LE],
+** <li> [SQLITE_UTF16BE],
+** <li> [SQLITE_UTF16], or
+** <li> [SQLITE_UTF16_ALIGNED].
+** </ul>)^
+** ^The eTextRep argument determines the encoding of strings passed
+** to the collating function callback, xCallback.
+** ^The [SQLITE_UTF16] and [SQLITE_UTF16_ALIGNED] values for eTextRep
+** force strings to be UTF16 with native byte order.
+** ^The [SQLITE_UTF16_ALIGNED] value for eTextRep forces strings to begin
+** on an even byte address.
+**
+** ^The fourth argument, pArg, is an application data pointer that is passed
+** through as the first argument to the collating function callback.
+**
+** ^The fifth argument, xCallback, is a pointer to the collating function.
+** ^Multiple collating functions can be registered using the same name but
+** with different eTextRep parameters and SQLite will use whichever
+** function requires the least amount of data transformation.
+** ^If the xCallback argument is NULL then the collating function is
+** deleted.  ^When all collating functions having the same name are deleted,
+** that collation is no longer usable.
+**
+** ^The collating function callback is invoked with a copy of the pArg 
+** application data pointer and with two strings in the encoding specified
+** by the eTextRep argument.  The collating function must return an
+** integer that is negative, zero, or positive
+** if the first string is less than, equal to, or greater than the second,
+** respectively.  A collating function must always return the same answer
+** given the same inputs.  If two or more collating functions are registered
+** to the same collation name (using different eTextRep values) then all
+** must give an equivalent answer when invoked with equivalent strings.
+** The collating function must obey the following properties for all
+** strings A, B, and C:
+**
+** <ol>
+** <li> If A==B then B==A.
+** <li> If A==B and B==C then A==C.
+** <li> If A&lt;B THEN B&gt;A.
+** <li> If A&lt;B and B&lt;C then A&lt;C.
+** </ol>
+**
+** If a collating function fails any of the above constraints and that
+** collating function is  registered and used, then the behavior of SQLite
+** is undefined.
+**
+** ^The sqlite3_create_collation_v2() works like sqlite3_create_collation()
+** with the addition that the xDestroy callback is invoked on pArg when
+** the collating function is deleted.
+** ^Collating functions are deleted when they are overridden by later
+** calls to the collation creation functions or when the
+** [database connection] is closed using [sqlite3_close()].
+**
+** ^The xDestroy callback is <u>not</u> called if the 
+** sqlite3_create_collation_v2() function fails.  Applications that invoke
+** sqlite3_create_collation_v2() with a non-NULL xDestroy argument should 
+** check the return code and dispose of the application data pointer
+** themselves rather than expecting SQLite to deal with it for them.
+** This is different from every other SQLite interface.  The inconsistency 
+** is unfortunate but cannot be changed without breaking backwards 
+** compatibility.
+**
+** See also:  [sqlite3_collation_needed()] and [sqlite3_collation_needed16()].
+*/
+SQLITE_API int sqlite3_create_collation(
+  sqlite3*, 
+  const char *zName, 
+  int eTextRep, 
+  void *pArg,
+  int(*xCompare)(void*,int,const void*,int,const void*)
+);
+SQLITE_API int sqlite3_create_collation_v2(
+  sqlite3*, 
+  const char *zName, 
+  int eTextRep, 
+  void *pArg,
+  int(*xCompare)(void*,int,const void*,int,const void*),
+  void(*xDestroy)(void*)
+);
+SQLITE_API int sqlite3_create_collation16(
+  sqlite3*, 
+  const void *zName,
+  int eTextRep, 
+  void *pArg,
+  int(*xCompare)(void*,int,const void*,int,const void*)
+);
+
+/*
+** CAPI3REF: Collation Needed Callbacks
+**
+** ^To avoid having to register all collation sequences before a database
+** can be used, a single callback function may be registered with the
+** [database connection] to be invoked whenever an undefined collation
+** sequence is required.
+**
+** ^If the function is registered using the sqlite3_collation_needed() API,
+** then it is passed the names of undefined collation sequences as strings
+** encoded in UTF-8. ^If sqlite3_collation_needed16() is used,
+** the names are passed as UTF-16 in machine native byte order.
+** ^A call to either function replaces the existing collation-needed callback.
+**
+** ^(When the callback is invoked, the first argument passed is a copy
+** of the second argument to sqlite3_collation_needed() or
+** sqlite3_collation_needed16().  The second argument is the database
+** connection.  The third argument is one of [SQLITE_UTF8], [SQLITE_UTF16BE],
+** or [SQLITE_UTF16LE], indicating the most desirable form of the collation
+** sequence function required.  The fourth parameter is the name of the
+** required collation sequence.)^
+**
+** The callback function should register the desired collation using
+** [sqlite3_create_collation()], [sqlite3_create_collation16()], or
+** [sqlite3_create_collation_v2()].
+*/
+SQLITE_API int sqlite3_collation_needed(
+  sqlite3*, 
+  void*, 
+  void(*)(void*,sqlite3*,int eTextRep,const char*)
+);
+SQLITE_API int sqlite3_collation_needed16(
+  sqlite3*, 
+  void*,
+  void(*)(void*,sqlite3*,int eTextRep,const void*)
+);
+
+#ifdef SQLITE_HAS_CODEC
+/*
+** Specify the key for an encrypted database.  This routine should be
+** called right after sqlite3_open().
+**
+** The code to implement this API is not available in the public release
+** of SQLite.
+*/
+SQLITE_API int sqlite3_key(
+  sqlite3 *db,                   /* Database to be rekeyed */
+  const void *pKey, int nKey     /* The key */
+);
+SQLITE_API int sqlite3_key_v2(
+  sqlite3 *db,                   /* Database to be rekeyed */
+  const char *zDbName,           /* Name of the database */
+  const void *pKey, int nKey     /* The key */
+);
+
+/*
+** Change the key on an open database.  If the current database is not
+** encrypted, this routine will encrypt it.  If pNew==0 or nNew==0, the
+** database is decrypted.
+**
+** The code to implement this API is not available in the public release
+** of SQLite.
+*/
+SQLITE_API int sqlite3_rekey(
+  sqlite3 *db,                   /* Database to be rekeyed */
+  const void *pKey, int nKey     /* The new key */
+);
+SQLITE_API int sqlite3_rekey_v2(
+  sqlite3 *db,                   /* Database to be rekeyed */
+  const char *zDbName,           /* Name of the database */
+  const void *pKey, int nKey     /* The new key */
+);
+
+/*
+** Specify the activation key for a SEE database.  Unless 
+** activated, none of the SEE routines will work.
+*/
+SQLITE_API void sqlite3_activate_see(
+  const char *zPassPhrase        /* Activation phrase */
+);
+#endif
+
+#ifdef SQLITE_ENABLE_CEROD
+/*
+** Specify the activation key for a CEROD database.  Unless 
+** activated, none of the CEROD routines will work.
+*/
+SQLITE_API void sqlite3_activate_cerod(
+  const char *zPassPhrase        /* Activation phrase */
+);
+#endif
+
+/*
+** CAPI3REF: Suspend Execution For A Short Time
+**
+** The sqlite3_sleep() function causes the current thread to suspend execution
+** for at least a number of milliseconds specified in its parameter.
+**
+** If the operating system does not support sleep requests with
+** millisecond time resolution, then the time will be rounded up to
+** the nearest second. The number of milliseconds of sleep actually
+** requested from the operating system is returned.
+**
+** ^SQLite implements this interface by calling the xSleep()
+** method of the default [sqlite3_vfs] object.  If the xSleep() method
+** of the default VFS is not implemented correctly, or not implemented at
+** all, then the behavior of sqlite3_sleep() may deviate from the description
+** in the previous paragraphs.
+*/
+SQLITE_API int sqlite3_sleep(int);
+
+/*
+** CAPI3REF: Name Of The Folder Holding Temporary Files
+**
+** ^(If this global variable is made to point to a string which is
+** the name of a folder (a.k.a. directory), then all temporary files
+** created by SQLite when using a built-in [sqlite3_vfs | VFS]
+** will be placed in that directory.)^  ^If this variable
+** is a NULL pointer, then SQLite performs a search for an appropriate
+** temporary file directory.
+**
+** Applications are strongly discouraged from using this global variable.
+** It is required to set a temporary folder on Windows Runtime (WinRT).
+** But for all other platforms, it is highly recommended that applications
+** neither read nor write this variable.  This global variable is a relic
+** that exists for backwards compatibility of legacy applications and should
+** be avoided in new projects.
+**
+** It is not safe to read or modify this variable in more than one
+** thread at a time.  It is not safe to read or modify this variable
+** if a [database connection] is being used at the same time in a separate
+** thread.
+** It is intended that this variable be set once
+** as part of process initialization and before any SQLite interface
+** routines have been called and that this variable remain unchanged
+** thereafter.
+**
+** ^The [temp_store_directory pragma] may modify this variable and cause
+** it to point to memory obtained from [sqlite3_malloc].  ^Furthermore,
+** the [temp_store_directory pragma] always assumes that any string
+** that this variable points to is held in memory obtained from 
+** [sqlite3_malloc] and the pragma may attempt to free that memory
+** using [sqlite3_free].
+** Hence, if this variable is modified directly, either it should be
+** made NULL or made to point to memory obtained from [sqlite3_malloc]
+** or else the use of the [temp_store_directory pragma] should be avoided.
+** Except when requested by the [temp_store_directory pragma], SQLite
+** does not free the memory that sqlite3_temp_directory points to.  If
+** the application wants that memory to be freed, it must do
+** so itself, taking care to only do so after all [database connection]
+** objects have been destroyed.
+**
+** <b>Note to Windows Runtime users:</b>  The temporary directory must be set
+** prior to calling [sqlite3_open] or [sqlite3_open_v2].  Otherwise, various
+** features that require the use of temporary files may fail.  Here is an
+** example of how to do this using C++ with the Windows Runtime:
+**
+** <blockquote><pre>
+** LPCWSTR zPath = Windows::Storage::ApplicationData::Current->
+** &nbsp;     TemporaryFolder->Path->Data();
+** char zPathBuf&#91;MAX_PATH + 1&#93;;
+** memset(zPathBuf, 0, sizeof(zPathBuf));
+** WideCharToMultiByte(CP_UTF8, 0, zPath, -1, zPathBuf, sizeof(zPathBuf),
+** &nbsp;     NULL, NULL);
+** sqlite3_temp_directory = sqlite3_mprintf("%s", zPathBuf);
+** </pre></blockquote>
+*/
+SQLITE_API char *sqlite3_temp_directory;
+
+/*
+** CAPI3REF: Name Of The Folder Holding Database Files
+**
+** ^(If this global variable is made to point to a string which is
+** the name of a folder (a.k.a. directory), then all database files
+** specified with a relative pathname and created or accessed by
+** SQLite when using a built-in windows [sqlite3_vfs | VFS] will be assumed
+** to be relative to that directory.)^ ^If this variable is a NULL
+** pointer, then SQLite assumes that all database files specified
+** with a relative pathname are relative to the current directory
+** for the process.  Only the windows VFS makes use of this global
+** variable; it is ignored by the unix VFS.
+**
+** Changing the value of this variable while a database connection is
+** open can result in a corrupt database.
+**
+** It is not safe to read or modify this variable in more than one
+** thread at a time.  It is not safe to read or modify this variable
+** if a [database connection] is being used at the same time in a separate
+** thread.
+** It is intended that this variable be set once
+** as part of process initialization and before any SQLite interface
+** routines have been called and that this variable remain unchanged
+** thereafter.
+**
+** ^The [data_store_directory pragma] may modify this variable and cause
+** it to point to memory obtained from [sqlite3_malloc].  ^Furthermore,
+** the [data_store_directory pragma] always assumes that any string
+** that this variable points to is held in memory obtained from 
+** [sqlite3_malloc] and the pragma may attempt to free that memory
+** using [sqlite3_free].
+** Hence, if this variable is modified directly, either it should be
+** made NULL or made to point to memory obtained from [sqlite3_malloc]
+** or else the use of the [data_store_directory pragma] should be avoided.
+*/
+SQLITE_API char *sqlite3_data_directory;
+
+/*
+** CAPI3REF: Test For Auto-Commit Mode
+** KEYWORDS: {autocommit mode}
+**
+** ^The sqlite3_get_autocommit() interface returns non-zero or
+** zero if the given database connection is or is not in autocommit mode,
+** respectively.  ^Autocommit mode is on by default.
+** ^Autocommit mode is disabled by a [BEGIN] statement.
+** ^Autocommit mode is re-enabled by a [COMMIT] or [ROLLBACK].
+**
+** If certain kinds of errors occur on a statement within a multi-statement
+** transaction (errors including [SQLITE_FULL], [SQLITE_IOERR],
+** [SQLITE_NOMEM], [SQLITE_BUSY], and [SQLITE_INTERRUPT]) then the
+** transaction might be rolled back automatically.  The only way to
+** find out whether SQLite automatically rolled back the transaction after
+** an error is to use this function.
+**
+** If another thread changes the autocommit status of the database
+** connection while this routine is running, then the return value
+** is undefined.
+*/
+SQLITE_API int sqlite3_get_autocommit(sqlite3*);
+
+/*
+** CAPI3REF: Find The Database Handle Of A Prepared Statement
+**
+** ^The sqlite3_db_handle interface returns the [database connection] handle
+** to which a [prepared statement] belongs.  ^The [database connection]
+** returned by sqlite3_db_handle is the same [database connection]
+** that was the first argument
+** to the [sqlite3_prepare_v2()] call (or its variants) that was used to
+** create the statement in the first place.
+*/
+SQLITE_API sqlite3 *sqlite3_db_handle(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Return The Filename For A Database Connection
+**
+** ^The sqlite3_db_filename(D,N) interface returns a pointer to a filename
+** associated with database N of connection D.  ^The main database file
+** has the name "main".  If there is no attached database N on the database
+** connection D, or if database N is a temporary or in-memory database, then
+** a NULL pointer is returned.
+**
+** ^The filename returned by this function is the output of the
+** xFullPathname method of the [VFS].  ^In other words, the filename
+** will be an absolute pathname, even if the filename used
+** to open the database originally was a URI or relative pathname.
+*/
+SQLITE_API const char *sqlite3_db_filename(sqlite3 *db, const char *zDbName);
+
+/*
+** CAPI3REF: Determine if a database is read-only
+**
+** ^The sqlite3_db_readonly(D,N) interface returns 1 if the database N
+** of connection D is read-only, 0 if it is read/write, or -1 if N is not
+** the name of a database on connection D.
+*/
+SQLITE_API int sqlite3_db_readonly(sqlite3 *db, const char *zDbName);
+
+/*
+** CAPI3REF: Find the next prepared statement
+**
+** ^This interface returns a pointer to the next [prepared statement] after
+** pStmt associated with the [database connection] pDb.  ^If pStmt is NULL
+** then this interface returns a pointer to the first prepared statement
+** associated with the database connection pDb.  ^If no prepared statement
+** satisfies the conditions of this routine, it returns NULL.
+**
+** The [database connection] pointer D in a call to
+** [sqlite3_next_stmt(D,S)] must refer to an open database
+** connection and in particular must not be a NULL pointer.
+*/
+SQLITE_API sqlite3_stmt *sqlite3_next_stmt(sqlite3 *pDb, sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Commit And Rollback Notification Callbacks
+**
+** ^The sqlite3_commit_hook() interface registers a callback
+** function to be invoked whenever a transaction is [COMMIT | committed].
+** ^Any callback set by a previous call to sqlite3_commit_hook()
+** for the same database connection is overridden.
+** ^The sqlite3_rollback_hook() interface registers a callback
+** function to be invoked whenever a transaction is [ROLLBACK | rolled back].
+** ^Any callback set by a previous call to sqlite3_rollback_hook()
+** for the same database connection is overridden.
+** ^The pArg argument is passed through to the callback.
+** ^If the callback on a commit hook function returns non-zero,
+** then the commit is converted into a rollback.
+**
+** ^The sqlite3_commit_hook(D,C,P) and sqlite3_rollback_hook(D,C,P) functions
+** return the P argument from the previous call of the same function
+** on the same [database connection] D, or NULL for
+** the first call for each function on D.
+**
+** The commit and rollback hook callbacks are not reentrant.
+** The callback implementation must not do anything that will modify
+** the database connection that invoked the callback.  Any actions
+** to modify the database connection must be deferred until after the
+** completion of the [sqlite3_step()] call that triggered the commit
+** or rollback hook in the first place.
+** Note that running any other SQL statements, including SELECT statements,
+** or merely calling [sqlite3_prepare_v2()] and [sqlite3_step()] will modify
+** the database connections for the meaning of "modify" in this paragraph.
+**
+** ^Registering a NULL function disables the callback.
+**
+** ^When the commit hook callback routine returns zero, the [COMMIT]
+** operation is allowed to continue normally.  ^If the commit hook
+** returns non-zero, then the [COMMIT] is converted into a [ROLLBACK].
+** ^The rollback hook is invoked on a rollback that results from a commit
+** hook returning non-zero, just as it would be with any other rollback.
+**
+** ^For the purposes of this API, a transaction is said to have been
+** rolled back if an explicit "ROLLBACK" statement is executed, or
+** an error or constraint causes an implicit rollback to occur.
+** ^The rollback callback is not invoked if a transaction is
+** automatically rolled back because the database connection is closed.
+**
+** See also the [sqlite3_update_hook()] interface.
+*/
+SQLITE_API void *sqlite3_commit_hook(sqlite3*, int(*)(void*), void*);
+SQLITE_API void *sqlite3_rollback_hook(sqlite3*, void(*)(void *), void*);
+
+/*
+** CAPI3REF: Data Change Notification Callbacks
+**
+** ^The sqlite3_update_hook() interface registers a callback function
+** with the [database connection] identified by the first argument
+** to be invoked whenever a row is updated, inserted or deleted in
+** a rowid table.
+** ^Any callback set by a previous call to this function
+** for the same database connection is overridden.
+**
+** ^The second argument is a pointer to the function to invoke when a
+** row is updated, inserted or deleted in a rowid table.
+** ^The first argument to the callback is a copy of the third argument
+** to sqlite3_update_hook().
+** ^The second callback argument is one of [SQLITE_INSERT], [SQLITE_DELETE],
+** or [SQLITE_UPDATE], depending on the operation that caused the callback
+** to be invoked.
+** ^The third and fourth arguments to the callback contain pointers to the
+** database and table name containing the affected row.
+** ^The final callback parameter is the [rowid] of the row.
+** ^In the case of an update, this is the [rowid] after the update takes place.
+**
+** ^(The update hook is not invoked when internal system tables are
+** modified (i.e. sqlite_master and sqlite_sequence).)^
+** ^The update hook is not invoked when [WITHOUT ROWID] tables are modified.
+**
+** ^In the current implementation, the update hook
+** is not invoked when duplication rows are deleted because of an
+** [ON CONFLICT | ON CONFLICT REPLACE] clause.  ^Nor is the update hook
+** invoked when rows are deleted using the [truncate optimization].
+** The exceptions defined in this paragraph might change in a future
+** release of SQLite.
+**
+** The update hook implementation must not do anything that will modify
+** the database connection that invoked the update hook.  Any actions
+** to modify the database connection must be deferred until after the
+** completion of the [sqlite3_step()] call that triggered the update hook.
+** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their
+** database connections for the meaning of "modify" in this paragraph.
+**
+** ^The sqlite3_update_hook(D,C,P) function
+** returns the P argument from the previous call
+** on the same [database connection] D, or NULL for
+** the first call on D.
+**
+** See also the [sqlite3_commit_hook()] and [sqlite3_rollback_hook()]
+** interfaces.
+*/
+SQLITE_API void *sqlite3_update_hook(
+  sqlite3*, 
+  void(*)(void *,int ,char const *,char const *,sqlite3_int64),
+  void*
+);
+
+/*
+** CAPI3REF: Enable Or Disable Shared Pager Cache
+**
+** ^(This routine enables or disables the sharing of the database cache
+** and schema data structures between [database connection | connections]
+** to the same database. Sharing is enabled if the argument is true
+** and disabled if the argument is false.)^
+**
+** ^Cache sharing is enabled and disabled for an entire process.
+** This is a change as of SQLite version 3.5.0. In prior versions of SQLite,
+** sharing was enabled or disabled for each thread separately.
+**
+** ^(The cache sharing mode set by this interface effects all subsequent
+** calls to [sqlite3_open()], [sqlite3_open_v2()], and [sqlite3_open16()].
+** Existing database connections continue use the sharing mode
+** that was in effect at the time they were opened.)^
+**
+** ^(This routine returns [SQLITE_OK] if shared cache was enabled or disabled
+** successfully.  An [error code] is returned otherwise.)^
+**
+** ^Shared cache is disabled by default. But this might change in
+** future releases of SQLite.  Applications that care about shared
+** cache setting should set it explicitly.
+**
+** This interface is threadsafe on processors where writing a
+** 32-bit integer is atomic.
+**
+** See Also:  [SQLite Shared-Cache Mode]
+*/
+SQLITE_API int sqlite3_enable_shared_cache(int);
+
+/*
+** CAPI3REF: Attempt To Free Heap Memory
+**
+** ^The sqlite3_release_memory() interface attempts to free N bytes
+** of heap memory by deallocating non-essential memory allocations
+** held by the database library.   Memory used to cache database
+** pages to improve performance is an example of non-essential memory.
+** ^sqlite3_release_memory() returns the number of bytes actually freed,
+** which might be more or less than the amount requested.
+** ^The sqlite3_release_memory() routine is a no-op returning zero
+** if SQLite is not compiled with [SQLITE_ENABLE_MEMORY_MANAGEMENT].
+**
+** See also: [sqlite3_db_release_memory()]
+*/
+SQLITE_API int sqlite3_release_memory(int);
+
+/*
+** CAPI3REF: Free Memory Used By A Database Connection
+**
+** ^The sqlite3_db_release_memory(D) interface attempts to free as much heap
+** memory as possible from database connection D. Unlike the
+** [sqlite3_release_memory()] interface, this interface is in effect even
+** when the [SQLITE_ENABLE_MEMORY_MANAGEMENT] compile-time option is
+** omitted.
+**
+** See also: [sqlite3_release_memory()]
+*/
+SQLITE_API int sqlite3_db_release_memory(sqlite3*);
+
+/*
+** CAPI3REF: Impose A Limit On Heap Size
+**
+** ^The sqlite3_soft_heap_limit64() interface sets and/or queries the
+** soft limit on the amount of heap memory that may be allocated by SQLite.
+** ^SQLite strives to keep heap memory utilization below the soft heap
+** limit by reducing the number of pages held in the page cache
+** as heap memory usages approaches the limit.
+** ^The soft heap limit is "soft" because even though SQLite strives to stay
+** below the limit, it will exceed the limit rather than generate
+** an [SQLITE_NOMEM] error.  In other words, the soft heap limit 
+** is advisory only.
+**
+** ^The return value from sqlite3_soft_heap_limit64() is the size of
+** the soft heap limit prior to the call, or negative in the case of an
+** error.  ^If the argument N is negative
+** then no change is made to the soft heap limit.  Hence, the current
+** size of the soft heap limit can be determined by invoking
+** sqlite3_soft_heap_limit64() with a negative argument.
+**
+** ^If the argument N is zero then the soft heap limit is disabled.
+**
+** ^(The soft heap limit is not enforced in the current implementation
+** if one or more of following conditions are true:
+**
+** <ul>
+** <li> The soft heap limit is set to zero.
+** <li> Memory accounting is disabled using a combination of the
+**      [sqlite3_config]([SQLITE_CONFIG_MEMSTATUS],...) start-time option and
+**      the [SQLITE_DEFAULT_MEMSTATUS] compile-time option.
+** <li> An alternative page cache implementation is specified using
+**      [sqlite3_config]([SQLITE_CONFIG_PCACHE2],...).
+** <li> The page cache allocates from its own memory pool supplied
+**      by [sqlite3_config]([SQLITE_CONFIG_PAGECACHE],...) rather than
+**      from the heap.
+** </ul>)^
+**
+** Beginning with SQLite version 3.7.3, the soft heap limit is enforced
+** regardless of whether or not the [SQLITE_ENABLE_MEMORY_MANAGEMENT]
+** compile-time option is invoked.  With [SQLITE_ENABLE_MEMORY_MANAGEMENT],
+** the soft heap limit is enforced on every memory allocation.  Without
+** [SQLITE_ENABLE_MEMORY_MANAGEMENT], the soft heap limit is only enforced
+** when memory is allocated by the page cache.  Testing suggests that because
+** the page cache is the predominate memory user in SQLite, most
+** applications will achieve adequate soft heap limit enforcement without
+** the use of [SQLITE_ENABLE_MEMORY_MANAGEMENT].
+**
+** The circumstances under which SQLite will enforce the soft heap limit may
+** changes in future releases of SQLite.
+*/
+SQLITE_API sqlite3_int64 sqlite3_soft_heap_limit64(sqlite3_int64 N);
+
+/*
+** CAPI3REF: Deprecated Soft Heap Limit Interface
+** DEPRECATED
+**
+** This is a deprecated version of the [sqlite3_soft_heap_limit64()]
+** interface.  This routine is provided for historical compatibility
+** only.  All new applications should use the
+** [sqlite3_soft_heap_limit64()] interface rather than this one.
+*/
+SQLITE_API SQLITE_DEPRECATED void sqlite3_soft_heap_limit(int N);
+
+
+/*
+** CAPI3REF: Extract Metadata About A Column Of A Table
+**
+** ^(The sqlite3_table_column_metadata(X,D,T,C,....) routine returns
+** information about column C of table T in database D
+** on [database connection] X.)^  ^The sqlite3_table_column_metadata()
+** interface returns SQLITE_OK and fills in the non-NULL pointers in
+** the final five arguments with appropriate values if the specified
+** column exists.  ^The sqlite3_table_column_metadata() interface returns
+** SQLITE_ERROR and if the specified column does not exist.
+** ^If the column-name parameter to sqlite3_table_column_metadata() is a
+** NULL pointer, then this routine simply checks for the existance of the
+** table and returns SQLITE_OK if the table exists and SQLITE_ERROR if it
+** does not.
+**
+** ^The column is identified by the second, third and fourth parameters to
+** this function. ^(The second parameter is either the name of the database
+** (i.e. "main", "temp", or an attached database) containing the specified
+** table or NULL.)^ ^If it is NULL, then all attached databases are searched
+** for the table using the same algorithm used by the database engine to
+** resolve unqualified table references.
+**
+** ^The third and fourth parameters to this function are the table and column
+** name of the desired column, respectively.
+**
+** ^Metadata is returned by writing to the memory locations passed as the 5th
+** and subsequent parameters to this function. ^Any of these arguments may be
+** NULL, in which case the corresponding element of metadata is omitted.
+**
+** ^(<blockquote>
+** <table border="1">
+** <tr><th> Parameter <th> Output<br>Type <th>  Description
+**
+** <tr><td> 5th <td> const char* <td> Data type
+** <tr><td> 6th <td> const char* <td> Name of default collation sequence
+** <tr><td> 7th <td> int         <td> True if column has a NOT NULL constraint
+** <tr><td> 8th <td> int         <td> True if column is part of the PRIMARY KEY
+** <tr><td> 9th <td> int         <td> True if column is [AUTOINCREMENT]
+** </table>
+** </blockquote>)^
+**
+** ^The memory pointed to by the character pointers returned for the
+** declaration type and collation sequence is valid until the next
+** call to any SQLite API function.
+**
+** ^If the specified table is actually a view, an [error code] is returned.
+**
+** ^If the specified column is "rowid", "oid" or "_rowid_" and the table 
+** is not a [WITHOUT ROWID] table and an
+** [INTEGER PRIMARY KEY] column has been explicitly declared, then the output
+** parameters are set for the explicitly declared column. ^(If there is no
+** [INTEGER PRIMARY KEY] column, then the outputs
+** for the [rowid] are set as follows:
+**
+** <pre>
+**     data type: "INTEGER"
+**     collation sequence: "BINARY"
+**     not null: 0
+**     primary key: 1
+**     auto increment: 0
+** </pre>)^
+**
+** ^This function causes all database schemas to be read from disk and
+** parsed, if that has not already been done, and returns an error if
+** any errors are encountered while loading the schema.
+*/
+SQLITE_API int sqlite3_table_column_metadata(
+  sqlite3 *db,                /* Connection handle */
+  const char *zDbName,        /* Database name or NULL */
+  const char *zTableName,     /* Table name */
+  const char *zColumnName,    /* Column name */
+  char const **pzDataType,    /* OUTPUT: Declared data type */
+  char const **pzCollSeq,     /* OUTPUT: Collation sequence name */
+  int *pNotNull,              /* OUTPUT: True if NOT NULL constraint exists */
+  int *pPrimaryKey,           /* OUTPUT: True if column part of PK */
+  int *pAutoinc               /* OUTPUT: True if column is auto-increment */
+);
+
+/*
+** CAPI3REF: Load An Extension
+**
+** ^This interface loads an SQLite extension library from the named file.
+**
+** ^The sqlite3_load_extension() interface attempts to load an
+** [SQLite extension] library contained in the file zFile.  If
+** the file cannot be loaded directly, attempts are made to load
+** with various operating-system specific extensions added.
+** So for example, if "samplelib" cannot be loaded, then names like
+** "samplelib.so" or "samplelib.dylib" or "samplelib.dll" might
+** be tried also.
+**
+** ^The entry point is zProc.
+** ^(zProc may be 0, in which case SQLite will try to come up with an
+** entry point name on its own.  It first tries "sqlite3_extension_init".
+** If that does not work, it constructs a name "sqlite3_X_init" where the
+** X is consists of the lower-case equivalent of all ASCII alphabetic
+** characters in the filename from the last "/" to the first following
+** "." and omitting any initial "lib".)^
+** ^The sqlite3_load_extension() interface returns
+** [SQLITE_OK] on success and [SQLITE_ERROR] if something goes wrong.
+** ^If an error occurs and pzErrMsg is not 0, then the
+** [sqlite3_load_extension()] interface shall attempt to
+** fill *pzErrMsg with error message text stored in memory
+** obtained from [sqlite3_malloc()]. The calling function
+** should free this memory by calling [sqlite3_free()].
+**
+** ^Extension loading must be enabled using
+** [sqlite3_enable_load_extension()] prior to calling this API,
+** otherwise an error will be returned.
+**
+** See also the [load_extension() SQL function].
+*/
+SQLITE_API int sqlite3_load_extension(
+  sqlite3 *db,          /* Load the extension into this database connection */
+  const char *zFile,    /* Name of the shared library containing extension */
+  const char *zProc,    /* Entry point.  Derived from zFile if 0 */
+  char **pzErrMsg       /* Put error message here if not 0 */
+);
+
+/*
+** CAPI3REF: Enable Or Disable Extension Loading
+**
+** ^So as not to open security holes in older applications that are
+** unprepared to deal with [extension loading], and as a means of disabling
+** [extension loading] while evaluating user-entered SQL, the following API
+** is provided to turn the [sqlite3_load_extension()] mechanism on and off.
+**
+** ^Extension loading is off by default.
+** ^Call the sqlite3_enable_load_extension() routine with onoff==1
+** to turn extension loading on and call it with onoff==0 to turn
+** it back off again.
+*/
+SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff);
+
+/*
+** CAPI3REF: Automatically Load Statically Linked Extensions
+**
+** ^This interface causes the xEntryPoint() function to be invoked for
+** each new [database connection] that is created.  The idea here is that
+** xEntryPoint() is the entry point for a statically linked [SQLite extension]
+** that is to be automatically loaded into all new database connections.
+**
+** ^(Even though the function prototype shows that xEntryPoint() takes
+** no arguments and returns void, SQLite invokes xEntryPoint() with three
+** arguments and expects and integer result as if the signature of the
+** entry point where as follows:
+**
+** <blockquote><pre>
+** &nbsp;  int xEntryPoint(
+** &nbsp;    sqlite3 *db,
+** &nbsp;    const char **pzErrMsg,
+** &nbsp;    const struct sqlite3_api_routines *pThunk
+** &nbsp;  );
+** </pre></blockquote>)^
+**
+** If the xEntryPoint routine encounters an error, it should make *pzErrMsg
+** point to an appropriate error message (obtained from [sqlite3_mprintf()])
+** and return an appropriate [error code].  ^SQLite ensures that *pzErrMsg
+** is NULL before calling the xEntryPoint().  ^SQLite will invoke
+** [sqlite3_free()] on *pzErrMsg after xEntryPoint() returns.  ^If any
+** xEntryPoint() returns an error, the [sqlite3_open()], [sqlite3_open16()],
+** or [sqlite3_open_v2()] call that provoked the xEntryPoint() will fail.
+**
+** ^Calling sqlite3_auto_extension(X) with an entry point X that is already
+** on the list of automatic extensions is a harmless no-op. ^No entry point
+** will be called more than once for each database connection that is opened.
+**
+** See also: [sqlite3_reset_auto_extension()]
+** and [sqlite3_cancel_auto_extension()]
+*/
+SQLITE_API int sqlite3_auto_extension(void (*xEntryPoint)(void));
+
+/*
+** CAPI3REF: Cancel Automatic Extension Loading
+**
+** ^The [sqlite3_cancel_auto_extension(X)] interface unregisters the
+** initialization routine X that was registered using a prior call to
+** [sqlite3_auto_extension(X)].  ^The [sqlite3_cancel_auto_extension(X)]
+** routine returns 1 if initialization routine X was successfully 
+** unregistered and it returns 0 if X was not on the list of initialization
+** routines.
+*/
+SQLITE_API int sqlite3_cancel_auto_extension(void (*xEntryPoint)(void));
+
+/*
+** CAPI3REF: Reset Automatic Extension Loading
+**
+** ^This interface disables all automatic extensions previously
+** registered using [sqlite3_auto_extension()].
+*/
+SQLITE_API void sqlite3_reset_auto_extension(void);
+
+/*
+** The interface to the virtual-table mechanism is currently considered
+** to be experimental.  The interface might change in incompatible ways.
+** If this is a problem for you, do not use the interface at this time.
+**
+** When the virtual-table mechanism stabilizes, we will declare the
+** interface fixed, support it indefinitely, and remove this comment.
+*/
+
+/*
+** Structures used by the virtual table interface
+*/
+typedef struct sqlite3_vtab sqlite3_vtab;
+typedef struct sqlite3_index_info sqlite3_index_info;
+typedef struct sqlite3_vtab_cursor sqlite3_vtab_cursor;
+typedef struct sqlite3_module sqlite3_module;
+
+/*
+** CAPI3REF: Virtual Table Object
+** KEYWORDS: sqlite3_module {virtual table module}
+**
+** This structure, sometimes called a "virtual table module", 
+** defines the implementation of a [virtual tables].  
+** This structure consists mostly of methods for the module.
+**
+** ^A virtual table module is created by filling in a persistent
+** instance of this structure and passing a pointer to that instance
+** to [sqlite3_create_module()] or [sqlite3_create_module_v2()].
+** ^The registration remains valid until it is replaced by a different
+** module or until the [database connection] closes.  The content
+** of this structure must not change while it is registered with
+** any database connection.
+*/
+struct sqlite3_module {
+  int iVersion;
+  int (*xCreate)(sqlite3*, void *pAux,
+               int argc, const char *const*argv,
+               sqlite3_vtab **ppVTab, char**);
+  int (*xConnect)(sqlite3*, void *pAux,
+               int argc, const char *const*argv,
+               sqlite3_vtab **ppVTab, char**);
+  int (*xBestIndex)(sqlite3_vtab *pVTab, sqlite3_index_info*);
+  int (*xDisconnect)(sqlite3_vtab *pVTab);
+  int (*xDestroy)(sqlite3_vtab *pVTab);
+  int (*xOpen)(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor);
+  int (*xClose)(sqlite3_vtab_cursor*);
+  int (*xFilter)(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
+                int argc, sqlite3_value **argv);
+  int (*xNext)(sqlite3_vtab_cursor*);
+  int (*xEof)(sqlite3_vtab_cursor*);
+  int (*xColumn)(sqlite3_vtab_cursor*, sqlite3_context*, int);
+  int (*xRowid)(sqlite3_vtab_cursor*, sqlite3_int64 *pRowid);
+  int (*xUpdate)(sqlite3_vtab *, int, sqlite3_value **, sqlite3_int64 *);
+  int (*xBegin)(sqlite3_vtab *pVTab);
+  int (*xSync)(sqlite3_vtab *pVTab);
+  int (*xCommit)(sqlite3_vtab *pVTab);
+  int (*xRollback)(sqlite3_vtab *pVTab);
+  int (*xFindFunction)(sqlite3_vtab *pVtab, int nArg, const char *zName,
+                       void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
+                       void **ppArg);
+  int (*xRename)(sqlite3_vtab *pVtab, const char *zNew);
+  /* The methods above are in version 1 of the sqlite_module object. Those 
+  ** below are for version 2 and greater. */
+  int (*xSavepoint)(sqlite3_vtab *pVTab, int);
+  int (*xRelease)(sqlite3_vtab *pVTab, int);
+  int (*xRollbackTo)(sqlite3_vtab *pVTab, int);
+};
+
+/*
+** CAPI3REF: Virtual Table Indexing Information
+** KEYWORDS: sqlite3_index_info
+**
+** The sqlite3_index_info structure and its substructures is used as part
+** of the [virtual table] interface to
+** pass information into and receive the reply from the [xBestIndex]
+** method of a [virtual table module].  The fields under **Inputs** are the
+** inputs to xBestIndex and are read-only.  xBestIndex inserts its
+** results into the **Outputs** fields.
+**
+** ^(The aConstraint[] array records WHERE clause constraints of the form:
+**
+** <blockquote>column OP expr</blockquote>
+**
+** where OP is =, &lt;, &lt;=, &gt;, or &gt;=.)^  ^(The particular operator is
+** stored in aConstraint[].op using one of the
+** [SQLITE_INDEX_CONSTRAINT_EQ | SQLITE_INDEX_CONSTRAINT_ values].)^
+** ^(The index of the column is stored in
+** aConstraint[].iColumn.)^  ^(aConstraint[].usable is TRUE if the
+** expr on the right-hand side can be evaluated (and thus the constraint
+** is usable) and false if it cannot.)^
+**
+** ^The optimizer automatically inverts terms of the form "expr OP column"
+** and makes other simplifications to the WHERE clause in an attempt to
+** get as many WHERE clause terms into the form shown above as possible.
+** ^The aConstraint[] array only reports WHERE clause terms that are
+** relevant to the particular virtual table being queried.
+**
+** ^Information about the ORDER BY clause is stored in aOrderBy[].
+** ^Each term of aOrderBy records a column of the ORDER BY clause.
+**
+** The [xBestIndex] method must fill aConstraintUsage[] with information
+** about what parameters to pass to xFilter.  ^If argvIndex>0 then
+** the right-hand side of the corresponding aConstraint[] is evaluated
+** and becomes the argvIndex-th entry in argv.  ^(If aConstraintUsage[].omit
+** is true, then the constraint is assumed to be fully handled by the
+** virtual table and is not checked again by SQLite.)^
+**
+** ^The idxNum and idxPtr values are recorded and passed into the
+** [xFilter] method.
+** ^[sqlite3_free()] is used to free idxPtr if and only if
+** needToFreeIdxPtr is true.
+**
+** ^The orderByConsumed means that output from [xFilter]/[xNext] will occur in
+** the correct order to satisfy the ORDER BY clause so that no separate
+** sorting step is required.
+**
+** ^The estimatedCost value is an estimate of the cost of a particular
+** strategy. A cost of N indicates that the cost of the strategy is similar
+** to a linear scan of an SQLite table with N rows. A cost of log(N) 
+** indicates that the expense of the operation is similar to that of a
+** binary search on a unique indexed field of an SQLite table with N rows.
+**
+** ^The estimatedRows value is an estimate of the number of rows that
+** will be returned by the strategy.
+**
+** IMPORTANT: The estimatedRows field was added to the sqlite3_index_info
+** structure for SQLite version 3.8.2. If a virtual table extension is
+** used with an SQLite version earlier than 3.8.2, the results of attempting 
+** to read or write the estimatedRows field are undefined (but are likely 
+** to included crashing the application). The estimatedRows field should
+** therefore only be used if [sqlite3_libversion_number()] returns a
+** value greater than or equal to 3008002.
+*/
+struct sqlite3_index_info {
+  /* Inputs */
+  int nConstraint;           /* Number of entries in aConstraint */
+  struct sqlite3_index_constraint {
+     int iColumn;              /* Column on left-hand side of constraint */
+     unsigned char op;         /* Constraint operator */
+     unsigned char usable;     /* True if this constraint is usable */
+     int iTermOffset;          /* Used internally - xBestIndex should ignore */
+  } *aConstraint;            /* Table of WHERE clause constraints */
+  int nOrderBy;              /* Number of terms in the ORDER BY clause */
+  struct sqlite3_index_orderby {
+     int iColumn;              /* Column number */
+     unsigned char desc;       /* True for DESC.  False for ASC. */
+  } *aOrderBy;               /* The ORDER BY clause */
+  /* Outputs */
+  struct sqlite3_index_constraint_usage {
+    int argvIndex;           /* if >0, constraint is part of argv to xFilter */
+    unsigned char omit;      /* Do not code a test for this constraint */
+  } *aConstraintUsage;
+  int idxNum;                /* Number used to identify the index */
+  char *idxStr;              /* String, possibly obtained from sqlite3_malloc */
+  int needToFreeIdxStr;      /* Free idxStr using sqlite3_free() if true */
+  int orderByConsumed;       /* True if output is already ordered */
+  double estimatedCost;           /* Estimated cost of using this index */
+  /* Fields below are only available in SQLite 3.8.2 and later */
+  sqlite3_int64 estimatedRows;    /* Estimated number of rows returned */
+};
+
+/*
+** CAPI3REF: Virtual Table Constraint Operator Codes
+**
+** These macros defined the allowed values for the
+** [sqlite3_index_info].aConstraint[].op field.  Each value represents
+** an operator that is part of a constraint term in the wHERE clause of
+** a query that uses a [virtual table].
+*/
+#define SQLITE_INDEX_CONSTRAINT_EQ    2
+#define SQLITE_INDEX_CONSTRAINT_GT    4
+#define SQLITE_INDEX_CONSTRAINT_LE    8
+#define SQLITE_INDEX_CONSTRAINT_LT    16
+#define SQLITE_INDEX_CONSTRAINT_GE    32
+#define SQLITE_INDEX_CONSTRAINT_MATCH 64
+
+/*
+** CAPI3REF: Register A Virtual Table Implementation
+**
+** ^These routines are used to register a new [virtual table module] name.
+** ^Module names must be registered before
+** creating a new [virtual table] using the module and before using a
+** preexisting [virtual table] for the module.
+**
+** ^The module name is registered on the [database connection] specified
+** by the first parameter.  ^The name of the module is given by the 
+** second parameter.  ^The third parameter is a pointer to
+** the implementation of the [virtual table module].   ^The fourth
+** parameter is an arbitrary client data pointer that is passed through
+** into the [xCreate] and [xConnect] methods of the virtual table module
+** when a new virtual table is be being created or reinitialized.
+**
+** ^The sqlite3_create_module_v2() interface has a fifth parameter which
+** is a pointer to a destructor for the pClientData.  ^SQLite will
+** invoke the destructor function (if it is not NULL) when SQLite
+** no longer needs the pClientData pointer.  ^The destructor will also
+** be invoked if the call to sqlite3_create_module_v2() fails.
+** ^The sqlite3_create_module()
+** interface is equivalent to sqlite3_create_module_v2() with a NULL
+** destructor.
+*/
+SQLITE_API int sqlite3_create_module(
+  sqlite3 *db,               /* SQLite connection to register module with */
+  const char *zName,         /* Name of the module */
+  const sqlite3_module *p,   /* Methods for the module */
+  void *pClientData          /* Client data for xCreate/xConnect */
+);
+SQLITE_API int sqlite3_create_module_v2(
+  sqlite3 *db,               /* SQLite connection to register module with */
+  const char *zName,         /* Name of the module */
+  const sqlite3_module *p,   /* Methods for the module */
+  void *pClientData,         /* Client data for xCreate/xConnect */
+  void(*xDestroy)(void*)     /* Module destructor function */
+);
+
+/*
+** CAPI3REF: Virtual Table Instance Object
+** KEYWORDS: sqlite3_vtab
+**
+** Every [virtual table module] implementation uses a subclass
+** of this object to describe a particular instance
+** of the [virtual table].  Each subclass will
+** be tailored to the specific needs of the module implementation.
+** The purpose of this superclass is to define certain fields that are
+** common to all module implementations.
+**
+** ^Virtual tables methods can set an error message by assigning a
+** string obtained from [sqlite3_mprintf()] to zErrMsg.  The method should
+** take care that any prior string is freed by a call to [sqlite3_free()]
+** prior to assigning a new string to zErrMsg.  ^After the error message
+** is delivered up to the client application, the string will be automatically
+** freed by sqlite3_free() and the zErrMsg field will be zeroed.
+*/
+struct sqlite3_vtab {
+  const sqlite3_module *pModule;  /* The module for this virtual table */
+  int nRef;                       /* NO LONGER USED */
+  char *zErrMsg;                  /* Error message from sqlite3_mprintf() */
+  /* Virtual table implementations will typically add additional fields */
+};
+
+/*
+** CAPI3REF: Virtual Table Cursor Object
+** KEYWORDS: sqlite3_vtab_cursor {virtual table cursor}
+**
+** Every [virtual table module] implementation uses a subclass of the
+** following structure to describe cursors that point into the
+** [virtual table] and are used
+** to loop through the virtual table.  Cursors are created using the
+** [sqlite3_module.xOpen | xOpen] method of the module and are destroyed
+** by the [sqlite3_module.xClose | xClose] method.  Cursors are used
+** by the [xFilter], [xNext], [xEof], [xColumn], and [xRowid] methods
+** of the module.  Each module implementation will define
+** the content of a cursor structure to suit its own needs.
+**
+** This superclass exists in order to define fields of the cursor that
+** are common to all implementations.
+*/
+struct sqlite3_vtab_cursor {
+  sqlite3_vtab *pVtab;      /* Virtual table of this cursor */
+  /* Virtual table implementations will typically add additional fields */
+};
+
+/*
+** CAPI3REF: Declare The Schema Of A Virtual Table
+**
+** ^The [xCreate] and [xConnect] methods of a
+** [virtual table module] call this interface
+** to declare the format (the names and datatypes of the columns) of
+** the virtual tables they implement.
+*/
+SQLITE_API int sqlite3_declare_vtab(sqlite3*, const char *zSQL);
+
+/*
+** CAPI3REF: Overload A Function For A Virtual Table
+**
+** ^(Virtual tables can provide alternative implementations of functions
+** using the [xFindFunction] method of the [virtual table module].  
+** But global versions of those functions
+** must exist in order to be overloaded.)^
+**
+** ^(This API makes sure a global version of a function with a particular
+** name and number of parameters exists.  If no such function exists
+** before this API is called, a new function is created.)^  ^The implementation
+** of the new function always causes an exception to be thrown.  So
+** the new function is not good for anything by itself.  Its only
+** purpose is to be a placeholder function that can be overloaded
+** by a [virtual table].
+*/
+SQLITE_API int sqlite3_overload_function(sqlite3*, const char *zFuncName, int nArg);
+
+/*
+** The interface to the virtual-table mechanism defined above (back up
+** to a comment remarkably similar to this one) is currently considered
+** to be experimental.  The interface might change in incompatible ways.
+** If this is a problem for you, do not use the interface at this time.
+**
+** When the virtual-table mechanism stabilizes, we will declare the
+** interface fixed, support it indefinitely, and remove this comment.
+*/
+
+/*
+** CAPI3REF: A Handle To An Open BLOB
+** KEYWORDS: {BLOB handle} {BLOB handles}
+**
+** An instance of this object represents an open BLOB on which
+** [sqlite3_blob_open | incremental BLOB I/O] can be performed.
+** ^Objects of this type are created by [sqlite3_blob_open()]
+** and destroyed by [sqlite3_blob_close()].
+** ^The [sqlite3_blob_read()] and [sqlite3_blob_write()] interfaces
+** can be used to read or write small subsections of the BLOB.
+** ^The [sqlite3_blob_bytes()] interface returns the size of the BLOB in bytes.
+*/
+typedef struct sqlite3_blob sqlite3_blob;
+
+/*
+** CAPI3REF: Open A BLOB For Incremental I/O
+**
+** ^(This interfaces opens a [BLOB handle | handle] to the BLOB located
+** in row iRow, column zColumn, table zTable in database zDb;
+** in other words, the same BLOB that would be selected by:
+**
+** <pre>
+**     SELECT zColumn FROM zDb.zTable WHERE [rowid] = iRow;
+** </pre>)^
+**
+** ^(Parameter zDb is not the filename that contains the database, but 
+** rather the symbolic name of the database. For attached databases, this is
+** the name that appears after the AS keyword in the [ATTACH] statement.
+** For the main database file, the database name is "main". For TEMP
+** tables, the database name is "temp".)^
+**
+** ^If the flags parameter is non-zero, then the BLOB is opened for read
+** and write access. ^If the flags parameter is zero, the BLOB is opened for
+** read-only access.
+**
+** ^(On success, [SQLITE_OK] is returned and the new [BLOB handle] is stored
+** in *ppBlob. Otherwise an [error code] is returned and, unless the error
+** code is SQLITE_MISUSE, *ppBlob is set to NULL.)^ ^This means that, provided
+** the API is not misused, it is always safe to call [sqlite3_blob_close()] 
+** on *ppBlob after this function it returns.
+**
+** This function fails with SQLITE_ERROR if any of the following are true:
+** <ul>
+**   <li> ^(Database zDb does not exist)^, 
+**   <li> ^(Table zTable does not exist within database zDb)^, 
+**   <li> ^(Table zTable is a WITHOUT ROWID table)^, 
+**   <li> ^(Column zColumn does not exist)^,
+**   <li> ^(Row iRow is not present in the table)^,
+**   <li> ^(The specified column of row iRow contains a value that is not
+**         a TEXT or BLOB value)^,
+**   <li> ^(Column zColumn is part of an index, PRIMARY KEY or UNIQUE 
+**         constraint and the blob is being opened for read/write access)^,
+**   <li> ^([foreign key constraints | Foreign key constraints] are enabled, 
+**         column zColumn is part of a [child key] definition and the blob is
+**         being opened for read/write access)^.
+** </ul>
+**
+** ^Unless it returns SQLITE_MISUSE, this function sets the 
+** [database connection] error code and message accessible via 
+** [sqlite3_errcode()] and [sqlite3_errmsg()] and related functions. 
+**
+**
+** ^(If the row that a BLOB handle points to is modified by an
+** [UPDATE], [DELETE], or by [ON CONFLICT] side-effects
+** then the BLOB handle is marked as "expired".
+** This is true if any column of the row is changed, even a column
+** other than the one the BLOB handle is open on.)^
+** ^Calls to [sqlite3_blob_read()] and [sqlite3_blob_write()] for
+** an expired BLOB handle fail with a return code of [SQLITE_ABORT].
+** ^(Changes written into a BLOB prior to the BLOB expiring are not
+** rolled back by the expiration of the BLOB.  Such changes will eventually
+** commit if the transaction continues to completion.)^
+**
+** ^Use the [sqlite3_blob_bytes()] interface to determine the size of
+** the opened blob.  ^The size of a blob may not be changed by this
+** interface.  Use the [UPDATE] SQL command to change the size of a
+** blob.
+**
+** ^The [sqlite3_bind_zeroblob()] and [sqlite3_result_zeroblob()] interfaces
+** and the built-in [zeroblob] SQL function may be used to create a 
+** zero-filled blob to read or write using the incremental-blob interface.
+**
+** To avoid a resource leak, every open [BLOB handle] should eventually
+** be released by a call to [sqlite3_blob_close()].
+*/
+SQLITE_API int sqlite3_blob_open(
+  sqlite3*,
+  const char *zDb,
+  const char *zTable,
+  const char *zColumn,
+  sqlite3_int64 iRow,
+  int flags,
+  sqlite3_blob **ppBlob
+);
+
+/*
+** CAPI3REF: Move a BLOB Handle to a New Row
+**
+** ^This function is used to move an existing blob handle so that it points
+** to a different row of the same database table. ^The new row is identified
+** by the rowid value passed as the second argument. Only the row can be
+** changed. ^The database, table and column on which the blob handle is open
+** remain the same. Moving an existing blob handle to a new row can be
+** faster than closing the existing handle and opening a new one.
+**
+** ^(The new row must meet the same criteria as for [sqlite3_blob_open()] -
+** it must exist and there must be either a blob or text value stored in
+** the nominated column.)^ ^If the new row is not present in the table, or if
+** it does not contain a blob or text value, or if another error occurs, an
+** SQLite error code is returned and the blob handle is considered aborted.
+** ^All subsequent calls to [sqlite3_blob_read()], [sqlite3_blob_write()] or
+** [sqlite3_blob_reopen()] on an aborted blob handle immediately return
+** SQLITE_ABORT. ^Calling [sqlite3_blob_bytes()] on an aborted blob handle
+** always returns zero.
+**
+** ^This function sets the database handle error code and message.
+*/
+SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64);
+
+/*
+** CAPI3REF: Close A BLOB Handle
+**
+** ^This function closes an open [BLOB handle]. ^(The BLOB handle is closed
+** unconditionally.  Even if this routine returns an error code, the 
+** handle is still closed.)^
+**
+** ^If the blob handle being closed was opened for read-write access, and if
+** the database is in auto-commit mode and there are no other open read-write
+** blob handles or active write statements, the current transaction is
+** committed. ^If an error occurs while committing the transaction, an error
+** code is returned and the transaction rolled back.
+**
+** Calling this function with an argument that is not a NULL pointer or an
+** open blob handle results in undefined behaviour. ^Calling this routine 
+** with a null pointer (such as would be returned by a failed call to 
+** [sqlite3_blob_open()]) is a harmless no-op. ^Otherwise, if this function
+** is passed a valid open blob handle, the values returned by the 
+** sqlite3_errcode() and sqlite3_errmsg() functions are set before returning.
+*/
+SQLITE_API int sqlite3_blob_close(sqlite3_blob *);
+
+/*
+** CAPI3REF: Return The Size Of An Open BLOB
+**
+** ^Returns the size in bytes of the BLOB accessible via the 
+** successfully opened [BLOB handle] in its only argument.  ^The
+** incremental blob I/O routines can only read or overwriting existing
+** blob content; they cannot change the size of a blob.
+**
+** This routine only works on a [BLOB handle] which has been created
+** by a prior successful call to [sqlite3_blob_open()] and which has not
+** been closed by [sqlite3_blob_close()].  Passing any other pointer in
+** to this routine results in undefined and probably undesirable behavior.
+*/
+SQLITE_API int sqlite3_blob_bytes(sqlite3_blob *);
+
+/*
+** CAPI3REF: Read Data From A BLOB Incrementally
+**
+** ^(This function is used to read data from an open [BLOB handle] into a
+** caller-supplied buffer. N bytes of data are copied into buffer Z
+** from the open BLOB, starting at offset iOffset.)^
+**
+** ^If offset iOffset is less than N bytes from the end of the BLOB,
+** [SQLITE_ERROR] is returned and no data is read.  ^If N or iOffset is
+** less than zero, [SQLITE_ERROR] is returned and no data is read.
+** ^The size of the blob (and hence the maximum value of N+iOffset)
+** can be determined using the [sqlite3_blob_bytes()] interface.
+**
+** ^An attempt to read from an expired [BLOB handle] fails with an
+** error code of [SQLITE_ABORT].
+**
+** ^(On success, sqlite3_blob_read() returns SQLITE_OK.
+** Otherwise, an [error code] or an [extended error code] is returned.)^
+**
+** This routine only works on a [BLOB handle] which has been created
+** by a prior successful call to [sqlite3_blob_open()] and which has not
+** been closed by [sqlite3_blob_close()].  Passing any other pointer in
+** to this routine results in undefined and probably undesirable behavior.
+**
+** See also: [sqlite3_blob_write()].
+*/
+SQLITE_API int sqlite3_blob_read(sqlite3_blob *, void *Z, int N, int iOffset);
+
+/*
+** CAPI3REF: Write Data Into A BLOB Incrementally
+**
+** ^(This function is used to write data into an open [BLOB handle] from a
+** caller-supplied buffer. N bytes of data are copied from the buffer Z
+** into the open BLOB, starting at offset iOffset.)^
+**
+** ^(On success, sqlite3_blob_write() returns SQLITE_OK.
+** Otherwise, an  [error code] or an [extended error code] is returned.)^
+** ^Unless SQLITE_MISUSE is returned, this function sets the 
+** [database connection] error code and message accessible via 
+** [sqlite3_errcode()] and [sqlite3_errmsg()] and related functions. 
+**
+** ^If the [BLOB handle] passed as the first argument was not opened for
+** writing (the flags parameter to [sqlite3_blob_open()] was zero),
+** this function returns [SQLITE_READONLY].
+**
+** This function may only modify the contents of the BLOB; it is
+** not possible to increase the size of a BLOB using this API.
+** ^If offset iOffset is less than N bytes from the end of the BLOB,
+** [SQLITE_ERROR] is returned and no data is written. The size of the 
+** BLOB (and hence the maximum value of N+iOffset) can be determined 
+** using the [sqlite3_blob_bytes()] interface. ^If N or iOffset are less 
+** than zero [SQLITE_ERROR] is returned and no data is written.
+**
+** ^An attempt to write to an expired [BLOB handle] fails with an
+** error code of [SQLITE_ABORT].  ^Writes to the BLOB that occurred
+** before the [BLOB handle] expired are not rolled back by the
+** expiration of the handle, though of course those changes might
+** have been overwritten by the statement that expired the BLOB handle
+** or by other independent statements.
+**
+** This routine only works on a [BLOB handle] which has been created
+** by a prior successful call to [sqlite3_blob_open()] and which has not
+** been closed by [sqlite3_blob_close()].  Passing any other pointer in
+** to this routine results in undefined and probably undesirable behavior.
+**
+** See also: [sqlite3_blob_read()].
+*/
+SQLITE_API int sqlite3_blob_write(sqlite3_blob *, const void *z, int n, int iOffset);
+
+/*
+** CAPI3REF: Virtual File System Objects
+**
+** A virtual filesystem (VFS) is an [sqlite3_vfs] object
+** that SQLite uses to interact
+** with the underlying operating system.  Most SQLite builds come with a
+** single default VFS that is appropriate for the host computer.
+** New VFSes can be registered and existing VFSes can be unregistered.
+** The following interfaces are provided.
+**
+** ^The sqlite3_vfs_find() interface returns a pointer to a VFS given its name.
+** ^Names are case sensitive.
+** ^Names are zero-terminated UTF-8 strings.
+** ^If there is no match, a NULL pointer is returned.
+** ^If zVfsName is NULL then the default VFS is returned.
+**
+** ^New VFSes are registered with sqlite3_vfs_register().
+** ^Each new VFS becomes the default VFS if the makeDflt flag is set.
+** ^The same VFS can be registered multiple times without injury.
+** ^To make an existing VFS into the default VFS, register it again
+** with the makeDflt flag set.  If two different VFSes with the
+** same name are registered, the behavior is undefined.  If a
+** VFS is registered with a name that is NULL or an empty string,
+** then the behavior is undefined.
+**
+** ^Unregister a VFS with the sqlite3_vfs_unregister() interface.
+** ^(If the default VFS is unregistered, another VFS is chosen as
+** the default.  The choice for the new VFS is arbitrary.)^
+*/
+SQLITE_API sqlite3_vfs *sqlite3_vfs_find(const char *zVfsName);
+SQLITE_API int sqlite3_vfs_register(sqlite3_vfs*, int makeDflt);
+SQLITE_API int sqlite3_vfs_unregister(sqlite3_vfs*);
+
+/*
+** CAPI3REF: Mutexes
+**
+** The SQLite core uses these routines for thread
+** synchronization. Though they are intended for internal
+** use by SQLite, code that links against SQLite is
+** permitted to use any of these routines.
+**
+** The SQLite source code contains multiple implementations
+** of these mutex routines.  An appropriate implementation
+** is selected automatically at compile-time.  The following
+** implementations are available in the SQLite core:
+**
+** <ul>
+** <li>   SQLITE_MUTEX_PTHREADS
+** <li>   SQLITE_MUTEX_W32
+** <li>   SQLITE_MUTEX_NOOP
+** </ul>
+**
+** The SQLITE_MUTEX_NOOP implementation is a set of routines
+** that does no real locking and is appropriate for use in
+** a single-threaded application.  The SQLITE_MUTEX_PTHREADS and
+** SQLITE_MUTEX_W32 implementations are appropriate for use on Unix
+** and Windows.
+**
+** If SQLite is compiled with the SQLITE_MUTEX_APPDEF preprocessor
+** macro defined (with "-DSQLITE_MUTEX_APPDEF=1"), then no mutex
+** implementation is included with the library. In this case the
+** application must supply a custom mutex implementation using the
+** [SQLITE_CONFIG_MUTEX] option of the sqlite3_config() function
+** before calling sqlite3_initialize() or any other public sqlite3_
+** function that calls sqlite3_initialize().
+**
+** ^The sqlite3_mutex_alloc() routine allocates a new
+** mutex and returns a pointer to it. ^The sqlite3_mutex_alloc()
+** routine returns NULL if it is unable to allocate the requested
+** mutex.  The argument to sqlite3_mutex_alloc() must one of these
+** integer constants:
+**
+** <ul>
+** <li>  SQLITE_MUTEX_FAST
+** <li>  SQLITE_MUTEX_RECURSIVE
+** <li>  SQLITE_MUTEX_STATIC_MASTER
+** <li>  SQLITE_MUTEX_STATIC_MEM
+** <li>  SQLITE_MUTEX_STATIC_OPEN
+** <li>  SQLITE_MUTEX_STATIC_PRNG
+** <li>  SQLITE_MUTEX_STATIC_LRU
+** <li>  SQLITE_MUTEX_STATIC_PMEM
+** <li>  SQLITE_MUTEX_STATIC_APP1
+** <li>  SQLITE_MUTEX_STATIC_APP2
+** <li>  SQLITE_MUTEX_STATIC_APP3
+** </ul>
+**
+** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE)
+** cause sqlite3_mutex_alloc() to create
+** a new mutex.  ^The new mutex is recursive when SQLITE_MUTEX_RECURSIVE
+** is used but not necessarily so when SQLITE_MUTEX_FAST is used.
+** The mutex implementation does not need to make a distinction
+** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does
+** not want to.  SQLite will only request a recursive mutex in
+** cases where it really needs one.  If a faster non-recursive mutex
+** implementation is available on the host platform, the mutex subsystem
+** might return such a mutex in response to SQLITE_MUTEX_FAST.
+**
+** ^The other allowed parameters to sqlite3_mutex_alloc() (anything other
+** than SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) each return
+** a pointer to a static preexisting mutex.  ^Nine static mutexes are
+** used by the current version of SQLite.  Future versions of SQLite
+** may add additional static mutexes.  Static mutexes are for internal
+** use by SQLite only.  Applications that use SQLite mutexes should
+** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or
+** SQLITE_MUTEX_RECURSIVE.
+**
+** ^Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST
+** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc()
+** returns a different mutex on every call.  ^For the static
+** mutex types, the same mutex is returned on every call that has
+** the same type number.
+**
+** ^The sqlite3_mutex_free() routine deallocates a previously
+** allocated dynamic mutex.  Attempting to deallocate a static
+** mutex results in undefined behavior.
+**
+** ^The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt
+** to enter a mutex.  ^If another thread is already within the mutex,
+** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return
+** SQLITE_BUSY.  ^The sqlite3_mutex_try() interface returns [SQLITE_OK]
+** upon successful entry.  ^(Mutexes created using
+** SQLITE_MUTEX_RECURSIVE can be entered multiple times by the same thread.
+** In such cases, the
+** mutex must be exited an equal number of times before another thread
+** can enter.)^  If the same thread tries to enter any mutex other
+** than an SQLITE_MUTEX_RECURSIVE more than once, the behavior is undefined.
+**
+** ^(Some systems (for example, Windows 95) do not support the operation
+** implemented by sqlite3_mutex_try().  On those systems, sqlite3_mutex_try()
+** will always return SQLITE_BUSY. The SQLite core only ever uses
+** sqlite3_mutex_try() as an optimization so this is acceptable 
+** behavior.)^
+**
+** ^The sqlite3_mutex_leave() routine exits a mutex that was
+** previously entered by the same thread.   The behavior
+** is undefined if the mutex is not currently entered by the
+** calling thread or is not currently allocated.
+**
+** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(), or
+** sqlite3_mutex_leave() is a NULL pointer, then all three routines
+** behave as no-ops.
+**
+** See also: [sqlite3_mutex_held()] and [sqlite3_mutex_notheld()].
+*/
+SQLITE_API sqlite3_mutex *sqlite3_mutex_alloc(int);
+SQLITE_API void sqlite3_mutex_free(sqlite3_mutex*);
+SQLITE_API void sqlite3_mutex_enter(sqlite3_mutex*);
+SQLITE_API int sqlite3_mutex_try(sqlite3_mutex*);
+SQLITE_API void sqlite3_mutex_leave(sqlite3_mutex*);
+
+/*
+** CAPI3REF: Mutex Methods Object
+**
+** An instance of this structure defines the low-level routines
+** used to allocate and use mutexes.
+**
+** Usually, the default mutex implementations provided by SQLite are
+** sufficient, however the application has the option of substituting a custom
+** implementation for specialized deployments or systems for which SQLite
+** does not provide a suitable implementation. In this case, the application
+** creates and populates an instance of this structure to pass
+** to sqlite3_config() along with the [SQLITE_CONFIG_MUTEX] option.
+** Additionally, an instance of this structure can be used as an
+** output variable when querying the system for the current mutex
+** implementation, using the [SQLITE_CONFIG_GETMUTEX] option.
+**
+** ^The xMutexInit method defined by this structure is invoked as
+** part of system initialization by the sqlite3_initialize() function.
+** ^The xMutexInit routine is called by SQLite exactly once for each
+** effective call to [sqlite3_initialize()].
+**
+** ^The xMutexEnd method defined by this structure is invoked as
+** part of system shutdown by the sqlite3_shutdown() function. The
+** implementation of this method is expected to release all outstanding
+** resources obtained by the mutex methods implementation, especially
+** those obtained by the xMutexInit method.  ^The xMutexEnd()
+** interface is invoked exactly once for each call to [sqlite3_shutdown()].
+**
+** ^(The remaining seven methods defined by this structure (xMutexAlloc,
+** xMutexFree, xMutexEnter, xMutexTry, xMutexLeave, xMutexHeld and
+** xMutexNotheld) implement the following interfaces (respectively):
+**
+** <ul>
+**   <li>  [sqlite3_mutex_alloc()] </li>
+**   <li>  [sqlite3_mutex_free()] </li>
+**   <li>  [sqlite3_mutex_enter()] </li>
+**   <li>  [sqlite3_mutex_try()] </li>
+**   <li>  [sqlite3_mutex_leave()] </li>
+**   <li>  [sqlite3_mutex_held()] </li>
+**   <li>  [sqlite3_mutex_notheld()] </li>
+** </ul>)^
+**
+** The only difference is that the public sqlite3_XXX functions enumerated
+** above silently ignore any invocations that pass a NULL pointer instead
+** of a valid mutex handle. The implementations of the methods defined
+** by this structure are not required to handle this case, the results
+** of passing a NULL pointer instead of a valid mutex handle are undefined
+** (i.e. it is acceptable to provide an implementation that segfaults if
+** it is passed a NULL pointer).
+**
+** The xMutexInit() method must be threadsafe.  It must be harmless to
+** invoke xMutexInit() multiple times within the same process and without
+** intervening calls to xMutexEnd().  Second and subsequent calls to
+** xMutexInit() must be no-ops.
+**
+** xMutexInit() must not use SQLite memory allocation ([sqlite3_malloc()]
+** and its associates).  Similarly, xMutexAlloc() must not use SQLite memory
+** allocation for a static mutex.  ^However xMutexAlloc() may use SQLite
+** memory allocation for a fast or recursive mutex.
+**
+** ^SQLite will invoke the xMutexEnd() method when [sqlite3_shutdown()] is
+** called, but only if the prior call to xMutexInit returned SQLITE_OK.
+** If xMutexInit fails in any way, it is expected to clean up after itself
+** prior to returning.
+*/
+typedef struct sqlite3_mutex_methods sqlite3_mutex_methods;
+struct sqlite3_mutex_methods {
+  int (*xMutexInit)(void);
+  int (*xMutexEnd)(void);
+  sqlite3_mutex *(*xMutexAlloc)(int);
+  void (*xMutexFree)(sqlite3_mutex *);
+  void (*xMutexEnter)(sqlite3_mutex *);
+  int (*xMutexTry)(sqlite3_mutex *);
+  void (*xMutexLeave)(sqlite3_mutex *);
+  int (*xMutexHeld)(sqlite3_mutex *);
+  int (*xMutexNotheld)(sqlite3_mutex *);
+};
+
+/*
+** CAPI3REF: Mutex Verification Routines
+**
+** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routines
+** are intended for use inside assert() statements.  The SQLite core
+** never uses these routines except inside an assert() and applications
+** are advised to follow the lead of the core.  The SQLite core only
+** provides implementations for these routines when it is compiled
+** with the SQLITE_DEBUG flag.  External mutex implementations
+** are only required to provide these routines if SQLITE_DEBUG is
+** defined and if NDEBUG is not defined.
+**
+** These routines should return true if the mutex in their argument
+** is held or not held, respectively, by the calling thread.
+**
+** The implementation is not required to provide versions of these
+** routines that actually work. If the implementation does not provide working
+** versions of these routines, it should at least provide stubs that always
+** return true so that one does not get spurious assertion failures.
+**
+** If the argument to sqlite3_mutex_held() is a NULL pointer then
+** the routine should return 1.   This seems counter-intuitive since
+** clearly the mutex cannot be held if it does not exist.  But
+** the reason the mutex does not exist is because the build is not
+** using mutexes.  And we do not want the assert() containing the
+** call to sqlite3_mutex_held() to fail, so a non-zero return is
+** the appropriate thing to do.  The sqlite3_mutex_notheld()
+** interface should also return 1 when given a NULL pointer.
+*/
+#ifndef NDEBUG
+SQLITE_API int sqlite3_mutex_held(sqlite3_mutex*);
+SQLITE_API int sqlite3_mutex_notheld(sqlite3_mutex*);
+#endif
+
+/*
+** CAPI3REF: Mutex Types
+**
+** The [sqlite3_mutex_alloc()] interface takes a single argument
+** which is one of these integer constants.
+**
+** The set of static mutexes may change from one SQLite release to the
+** next.  Applications that override the built-in mutex logic must be
+** prepared to accommodate additional static mutexes.
+*/
+#define SQLITE_MUTEX_FAST             0
+#define SQLITE_MUTEX_RECURSIVE        1
+#define SQLITE_MUTEX_STATIC_MASTER    2
+#define SQLITE_MUTEX_STATIC_MEM       3  /* sqlite3_malloc() */
+#define SQLITE_MUTEX_STATIC_MEM2      4  /* NOT USED */
+#define SQLITE_MUTEX_STATIC_OPEN      4  /* sqlite3BtreeOpen() */
+#define SQLITE_MUTEX_STATIC_PRNG      5  /* sqlite3_random() */
+#define SQLITE_MUTEX_STATIC_LRU       6  /* lru page list */
+#define SQLITE_MUTEX_STATIC_LRU2      7  /* NOT USED */
+#define SQLITE_MUTEX_STATIC_PMEM      7  /* sqlite3PageMalloc() */
+#define SQLITE_MUTEX_STATIC_APP1      8  /* For use by application */
+#define SQLITE_MUTEX_STATIC_APP2      9  /* For use by application */
+#define SQLITE_MUTEX_STATIC_APP3     10  /* For use by application */
+
+/*
+** CAPI3REF: Retrieve the mutex for a database connection
+**
+** ^This interface returns a pointer the [sqlite3_mutex] object that 
+** serializes access to the [database connection] given in the argument
+** when the [threading mode] is Serialized.
+** ^If the [threading mode] is Single-thread or Multi-thread then this
+** routine returns a NULL pointer.
+*/
+SQLITE_API sqlite3_mutex *sqlite3_db_mutex(sqlite3*);
+
+/*
+** CAPI3REF: Low-Level Control Of Database Files
+**
+** ^The [sqlite3_file_control()] interface makes a direct call to the
+** xFileControl method for the [sqlite3_io_methods] object associated
+** with a particular database identified by the second argument. ^The
+** name of the database is "main" for the main database or "temp" for the
+** TEMP database, or the name that appears after the AS keyword for
+** databases that are added using the [ATTACH] SQL command.
+** ^A NULL pointer can be used in place of "main" to refer to the
+** main database file.
+** ^The third and fourth parameters to this routine
+** are passed directly through to the second and third parameters of
+** the xFileControl method.  ^The return value of the xFileControl
+** method becomes the return value of this routine.
+**
+** ^The SQLITE_FCNTL_FILE_POINTER value for the op parameter causes
+** a pointer to the underlying [sqlite3_file] object to be written into
+** the space pointed to by the 4th parameter.  ^The SQLITE_FCNTL_FILE_POINTER
+** case is a short-circuit path which does not actually invoke the
+** underlying sqlite3_io_methods.xFileControl method.
+**
+** ^If the second parameter (zDbName) does not match the name of any
+** open database file, then SQLITE_ERROR is returned.  ^This error
+** code is not remembered and will not be recalled by [sqlite3_errcode()]
+** or [sqlite3_errmsg()].  The underlying xFileControl method might
+** also return SQLITE_ERROR.  There is no way to distinguish between
+** an incorrect zDbName and an SQLITE_ERROR return from the underlying
+** xFileControl method.
+**
+** See also: [SQLITE_FCNTL_LOCKSTATE]
+*/
+SQLITE_API int sqlite3_file_control(sqlite3*, const char *zDbName, int op, void*);
+
+/*
+** CAPI3REF: Testing Interface
+**
+** ^The sqlite3_test_control() interface is used to read out internal
+** state of SQLite and to inject faults into SQLite for testing
+** purposes.  ^The first parameter is an operation code that determines
+** the number, meaning, and operation of all subsequent parameters.
+**
+** This interface is not for use by applications.  It exists solely
+** for verifying the correct operation of the SQLite library.  Depending
+** on how the SQLite library is compiled, this interface might not exist.
+**
+** The details of the operation codes, their meanings, the parameters
+** they take, and what they do are all subject to change without notice.
+** Unlike most of the SQLite API, this function is not guaranteed to
+** operate consistently from one release to the next.
+*/
+SQLITE_API int sqlite3_test_control(int op, ...);
+
+/*
+** CAPI3REF: Testing Interface Operation Codes
+**
+** These constants are the valid operation code parameters used
+** as the first argument to [sqlite3_test_control()].
+**
+** These parameters and their meanings are subject to change
+** without notice.  These values are for testing purposes only.
+** Applications should not use any of these parameters or the
+** [sqlite3_test_control()] interface.
+*/
+#define SQLITE_TESTCTRL_FIRST                    5
+#define SQLITE_TESTCTRL_PRNG_SAVE                5
+#define SQLITE_TESTCTRL_PRNG_RESTORE             6
+#define SQLITE_TESTCTRL_PRNG_RESET               7
+#define SQLITE_TESTCTRL_BITVEC_TEST              8
+#define SQLITE_TESTCTRL_FAULT_INSTALL            9
+#define SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS     10
+#define SQLITE_TESTCTRL_PENDING_BYTE            11
+#define SQLITE_TESTCTRL_ASSERT                  12
+#define SQLITE_TESTCTRL_ALWAYS                  13
+#define SQLITE_TESTCTRL_RESERVE                 14
+#define SQLITE_TESTCTRL_OPTIMIZATIONS           15
+#define SQLITE_TESTCTRL_ISKEYWORD               16
+#define SQLITE_TESTCTRL_SCRATCHMALLOC           17
+#define SQLITE_TESTCTRL_LOCALTIME_FAULT         18
+#define SQLITE_TESTCTRL_EXPLAIN_STMT            19  /* NOT USED */
+#define SQLITE_TESTCTRL_NEVER_CORRUPT           20
+#define SQLITE_TESTCTRL_VDBE_COVERAGE           21
+#define SQLITE_TESTCTRL_BYTEORDER               22
+#define SQLITE_TESTCTRL_ISINIT                  23
+#define SQLITE_TESTCTRL_SORTER_MMAP             24
+#define SQLITE_TESTCTRL_LAST                    24
+
+/*
+** CAPI3REF: SQLite Runtime Status
+**
+** ^This interface is used to retrieve runtime status information
+** about the performance of SQLite, and optionally to reset various
+** highwater marks.  ^The first argument is an integer code for
+** the specific parameter to measure.  ^(Recognized integer codes
+** are of the form [status parameters | SQLITE_STATUS_...].)^
+** ^The current value of the parameter is returned into *pCurrent.
+** ^The highest recorded value is returned in *pHighwater.  ^If the
+** resetFlag is true, then the highest record value is reset after
+** *pHighwater is written.  ^(Some parameters do not record the highest
+** value.  For those parameters
+** nothing is written into *pHighwater and the resetFlag is ignored.)^
+** ^(Other parameters record only the highwater mark and not the current
+** value.  For these latter parameters nothing is written into *pCurrent.)^
+**
+** ^The sqlite3_status() routine returns SQLITE_OK on success and a
+** non-zero [error code] on failure.
+**
+** This routine is threadsafe but is not atomic.  This routine can be
+** called while other threads are running the same or different SQLite
+** interfaces.  However the values returned in *pCurrent and
+** *pHighwater reflect the status of SQLite at different points in time
+** and it is possible that another thread might change the parameter
+** in between the times when *pCurrent and *pHighwater are written.
+**
+** See also: [sqlite3_db_status()]
+*/
+SQLITE_API int sqlite3_status(int op, int *pCurrent, int *pHighwater, int resetFlag);
+
+
+/*
+** CAPI3REF: Status Parameters
+** KEYWORDS: {status parameters}
+**
+** These integer constants designate various run-time status parameters
+** that can be returned by [sqlite3_status()].
+**
+** <dl>
+** [[SQLITE_STATUS_MEMORY_USED]] ^(<dt>SQLITE_STATUS_MEMORY_USED</dt>
+** <dd>This parameter is the current amount of memory checked out
+** using [sqlite3_malloc()], either directly or indirectly.  The
+** figure includes calls made to [sqlite3_malloc()] by the application
+** and internal memory usage by the SQLite library.  Scratch memory
+** controlled by [SQLITE_CONFIG_SCRATCH] and auxiliary page-cache
+** memory controlled by [SQLITE_CONFIG_PAGECACHE] is not included in
+** this parameter.  The amount returned is the sum of the allocation
+** sizes as reported by the xSize method in [sqlite3_mem_methods].</dd>)^
+**
+** [[SQLITE_STATUS_MALLOC_SIZE]] ^(<dt>SQLITE_STATUS_MALLOC_SIZE</dt>
+** <dd>This parameter records the largest memory allocation request
+** handed to [sqlite3_malloc()] or [sqlite3_realloc()] (or their
+** internal equivalents).  Only the value returned in the
+** *pHighwater parameter to [sqlite3_status()] is of interest.  
+** The value written into the *pCurrent parameter is undefined.</dd>)^
+**
+** [[SQLITE_STATUS_MALLOC_COUNT]] ^(<dt>SQLITE_STATUS_MALLOC_COUNT</dt>
+** <dd>This parameter records the number of separate memory allocations
+** currently checked out.</dd>)^
+**
+** [[SQLITE_STATUS_PAGECACHE_USED]] ^(<dt>SQLITE_STATUS_PAGECACHE_USED</dt>
+** <dd>This parameter returns the number of pages used out of the
+** [pagecache memory allocator] that was configured using 
+** [SQLITE_CONFIG_PAGECACHE].  The
+** value returned is in pages, not in bytes.</dd>)^
+**
+** [[SQLITE_STATUS_PAGECACHE_OVERFLOW]] 
+** ^(<dt>SQLITE_STATUS_PAGECACHE_OVERFLOW</dt>
+** <dd>This parameter returns the number of bytes of page cache
+** allocation which could not be satisfied by the [SQLITE_CONFIG_PAGECACHE]
+** buffer and where forced to overflow to [sqlite3_malloc()].  The
+** returned value includes allocations that overflowed because they
+** where too large (they were larger than the "sz" parameter to
+** [SQLITE_CONFIG_PAGECACHE]) and allocations that overflowed because
+** no space was left in the page cache.</dd>)^
+**
+** [[SQLITE_STATUS_PAGECACHE_SIZE]] ^(<dt>SQLITE_STATUS_PAGECACHE_SIZE</dt>
+** <dd>This parameter records the largest memory allocation request
+** handed to [pagecache memory allocator].  Only the value returned in the
+** *pHighwater parameter to [sqlite3_status()] is of interest.  
+** The value written into the *pCurrent parameter is undefined.</dd>)^
+**
+** [[SQLITE_STATUS_SCRATCH_USED]] ^(<dt>SQLITE_STATUS_SCRATCH_USED</dt>
+** <dd>This parameter returns the number of allocations used out of the
+** [scratch memory allocator] configured using
+** [SQLITE_CONFIG_SCRATCH].  The value returned is in allocations, not
+** in bytes.  Since a single thread may only have one scratch allocation
+** outstanding at time, this parameter also reports the number of threads
+** using scratch memory at the same time.</dd>)^
+**
+** [[SQLITE_STATUS_SCRATCH_OVERFLOW]] ^(<dt>SQLITE_STATUS_SCRATCH_OVERFLOW</dt>
+** <dd>This parameter returns the number of bytes of scratch memory
+** allocation which could not be satisfied by the [SQLITE_CONFIG_SCRATCH]
+** buffer and where forced to overflow to [sqlite3_malloc()].  The values
+** returned include overflows because the requested allocation was too
+** larger (that is, because the requested allocation was larger than the
+** "sz" parameter to [SQLITE_CONFIG_SCRATCH]) and because no scratch buffer
+** slots were available.
+** </dd>)^
+**
+** [[SQLITE_STATUS_SCRATCH_SIZE]] ^(<dt>SQLITE_STATUS_SCRATCH_SIZE</dt>
+** <dd>This parameter records the largest memory allocation request
+** handed to [scratch memory allocator].  Only the value returned in the
+** *pHighwater parameter to [sqlite3_status()] is of interest.  
+** The value written into the *pCurrent parameter is undefined.</dd>)^
+**
+** [[SQLITE_STATUS_PARSER_STACK]] ^(<dt>SQLITE_STATUS_PARSER_STACK</dt>
+** <dd>This parameter records the deepest parser stack.  It is only
+** meaningful if SQLite is compiled with [YYTRACKMAXSTACKDEPTH].</dd>)^
+** </dl>
+**
+** New status parameters may be added from time to time.
+*/
+#define SQLITE_STATUS_MEMORY_USED          0
+#define SQLITE_STATUS_PAGECACHE_USED       1
+#define SQLITE_STATUS_PAGECACHE_OVERFLOW   2
+#define SQLITE_STATUS_SCRATCH_USED         3
+#define SQLITE_STATUS_SCRATCH_OVERFLOW     4
+#define SQLITE_STATUS_MALLOC_SIZE          5
+#define SQLITE_STATUS_PARSER_STACK         6
+#define SQLITE_STATUS_PAGECACHE_SIZE       7
+#define SQLITE_STATUS_SCRATCH_SIZE         8
+#define SQLITE_STATUS_MALLOC_COUNT         9
+
+/*
+** CAPI3REF: Database Connection Status
+**
+** ^This interface is used to retrieve runtime status information 
+** about a single [database connection].  ^The first argument is the
+** database connection object to be interrogated.  ^The second argument
+** is an integer constant, taken from the set of
+** [SQLITE_DBSTATUS options], that
+** determines the parameter to interrogate.  The set of 
+** [SQLITE_DBSTATUS options] is likely
+** to grow in future releases of SQLite.
+**
+** ^The current value of the requested parameter is written into *pCur
+** and the highest instantaneous value is written into *pHiwtr.  ^If
+** the resetFlg is true, then the highest instantaneous value is
+** reset back down to the current value.
+**
+** ^The sqlite3_db_status() routine returns SQLITE_OK on success and a
+** non-zero [error code] on failure.
+**
+** See also: [sqlite3_status()] and [sqlite3_stmt_status()].
+*/
+SQLITE_API int sqlite3_db_status(sqlite3*, int op, int *pCur, int *pHiwtr, int resetFlg);
+
+/*
+** CAPI3REF: Status Parameters for database connections
+** KEYWORDS: {SQLITE_DBSTATUS options}
+**
+** These constants are the available integer "verbs" that can be passed as
+** the second argument to the [sqlite3_db_status()] interface.
+**
+** New verbs may be added in future releases of SQLite. Existing verbs
+** might be discontinued. Applications should check the return code from
+** [sqlite3_db_status()] to make sure that the call worked.
+** The [sqlite3_db_status()] interface will return a non-zero error code
+** if a discontinued or unsupported verb is invoked.
+**
+** <dl>
+** [[SQLITE_DBSTATUS_LOOKASIDE_USED]] ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_USED</dt>
+** <dd>This parameter returns the number of lookaside memory slots currently
+** checked out.</dd>)^
+**
+** [[SQLITE_DBSTATUS_LOOKASIDE_HIT]] ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_HIT</dt>
+** <dd>This parameter returns the number malloc attempts that were 
+** satisfied using lookaside memory. Only the high-water value is meaningful;
+** the current value is always zero.)^
+**
+** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE]]
+** ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE</dt>
+** <dd>This parameter returns the number malloc attempts that might have
+** been satisfied using lookaside memory but failed due to the amount of
+** memory requested being larger than the lookaside slot size.
+** Only the high-water value is meaningful;
+** the current value is always zero.)^
+**
+** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL]]
+** ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL</dt>
+** <dd>This parameter returns the number malloc attempts that might have
+** been satisfied using lookaside memory but failed due to all lookaside
+** memory already being in use.
+** Only the high-water value is meaningful;
+** the current value is always zero.)^
+**
+** [[SQLITE_DBSTATUS_CACHE_USED]] ^(<dt>SQLITE_DBSTATUS_CACHE_USED</dt>
+** <dd>This parameter returns the approximate number of bytes of heap
+** memory used by all pager caches associated with the database connection.)^
+** ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_USED is always 0.
+**
+** [[SQLITE_DBSTATUS_SCHEMA_USED]] ^(<dt>SQLITE_DBSTATUS_SCHEMA_USED</dt>
+** <dd>This parameter returns the approximate number of bytes of heap
+** memory used to store the schema for all databases associated
+** with the connection - main, temp, and any [ATTACH]-ed databases.)^ 
+** ^The full amount of memory used by the schemas is reported, even if the
+** schema memory is shared with other database connections due to
+** [shared cache mode] being enabled.
+** ^The highwater mark associated with SQLITE_DBSTATUS_SCHEMA_USED is always 0.
+**
+** [[SQLITE_DBSTATUS_STMT_USED]] ^(<dt>SQLITE_DBSTATUS_STMT_USED</dt>
+** <dd>This parameter returns the approximate number of bytes of heap
+** and lookaside memory used by all prepared statements associated with
+** the database connection.)^
+** ^The highwater mark associated with SQLITE_DBSTATUS_STMT_USED is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_HIT]] ^(<dt>SQLITE_DBSTATUS_CACHE_HIT</dt>
+** <dd>This parameter returns the number of pager cache hits that have
+** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_HIT 
+** is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_MISS]] ^(<dt>SQLITE_DBSTATUS_CACHE_MISS</dt>
+** <dd>This parameter returns the number of pager cache misses that have
+** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_MISS 
+** is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_WRITE]] ^(<dt>SQLITE_DBSTATUS_CACHE_WRITE</dt>
+** <dd>This parameter returns the number of dirty cache entries that have
+** been written to disk. Specifically, the number of pages written to the
+** wal file in wal mode databases, or the number of pages written to the
+** database file in rollback mode databases. Any pages written as part of
+** transaction rollback or database recovery operations are not included.
+** If an IO or other error occurs while writing a page to disk, the effect
+** on subsequent SQLITE_DBSTATUS_CACHE_WRITE requests is undefined.)^ ^The
+** highwater mark associated with SQLITE_DBSTATUS_CACHE_WRITE is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_DEFERRED_FKS]] ^(<dt>SQLITE_DBSTATUS_DEFERRED_FKS</dt>
+** <dd>This parameter returns zero for the current value if and only if
+** all foreign key constraints (deferred or immediate) have been
+** resolved.)^  ^The highwater mark is always 0.
+** </dd>
+** </dl>
+*/
+#define SQLITE_DBSTATUS_LOOKASIDE_USED       0
+#define SQLITE_DBSTATUS_CACHE_USED           1
+#define SQLITE_DBSTATUS_SCHEMA_USED          2
+#define SQLITE_DBSTATUS_STMT_USED            3
+#define SQLITE_DBSTATUS_LOOKASIDE_HIT        4
+#define SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE  5
+#define SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL  6
+#define SQLITE_DBSTATUS_CACHE_HIT            7
+#define SQLITE_DBSTATUS_CACHE_MISS           8
+#define SQLITE_DBSTATUS_CACHE_WRITE          9
+#define SQLITE_DBSTATUS_DEFERRED_FKS        10
+#define SQLITE_DBSTATUS_MAX                 10   /* Largest defined DBSTATUS */
+
+
+/*
+** CAPI3REF: Prepared Statement Status
+**
+** ^(Each prepared statement maintains various
+** [SQLITE_STMTSTATUS counters] that measure the number
+** of times it has performed specific operations.)^  These counters can
+** be used to monitor the performance characteristics of the prepared
+** statements.  For example, if the number of table steps greatly exceeds
+** the number of table searches or result rows, that would tend to indicate
+** that the prepared statement is using a full table scan rather than
+** an index.  
+**
+** ^(This interface is used to retrieve and reset counter values from
+** a [prepared statement].  The first argument is the prepared statement
+** object to be interrogated.  The second argument
+** is an integer code for a specific [SQLITE_STMTSTATUS counter]
+** to be interrogated.)^
+** ^The current value of the requested counter is returned.
+** ^If the resetFlg is true, then the counter is reset to zero after this
+** interface call returns.
+**
+** See also: [sqlite3_status()] and [sqlite3_db_status()].
+*/
+SQLITE_API int sqlite3_stmt_status(sqlite3_stmt*, int op,int resetFlg);
+
+/*
+** CAPI3REF: Status Parameters for prepared statements
+** KEYWORDS: {SQLITE_STMTSTATUS counter} {SQLITE_STMTSTATUS counters}
+**
+** These preprocessor macros define integer codes that name counter
+** values associated with the [sqlite3_stmt_status()] interface.
+** The meanings of the various counters are as follows:
+**
+** <dl>
+** [[SQLITE_STMTSTATUS_FULLSCAN_STEP]] <dt>SQLITE_STMTSTATUS_FULLSCAN_STEP</dt>
+** <dd>^This is the number of times that SQLite has stepped forward in
+** a table as part of a full table scan.  Large numbers for this counter
+** may indicate opportunities for performance improvement through 
+** careful use of indices.</dd>
+**
+** [[SQLITE_STMTSTATUS_SORT]] <dt>SQLITE_STMTSTATUS_SORT</dt>
+** <dd>^This is the number of sort operations that have occurred.
+** A non-zero value in this counter may indicate an opportunity to
+** improvement performance through careful use of indices.</dd>
+**
+** [[SQLITE_STMTSTATUS_AUTOINDEX]] <dt>SQLITE_STMTSTATUS_AUTOINDEX</dt>
+** <dd>^This is the number of rows inserted into transient indices that
+** were created automatically in order to help joins run faster.
+** A non-zero value in this counter may indicate an opportunity to
+** improvement performance by adding permanent indices that do not
+** need to be reinitialized each time the statement is run.</dd>
+**
+** [[SQLITE_STMTSTATUS_VM_STEP]] <dt>SQLITE_STMTSTATUS_VM_STEP</dt>
+** <dd>^This is the number of virtual machine operations executed
+** by the prepared statement if that number is less than or equal
+** to 2147483647.  The number of virtual machine operations can be 
+** used as a proxy for the total work done by the prepared statement.
+** If the number of virtual machine operations exceeds 2147483647
+** then the value returned by this statement status code is undefined.
+** </dd>
+** </dl>
+*/
+#define SQLITE_STMTSTATUS_FULLSCAN_STEP     1
+#define SQLITE_STMTSTATUS_SORT              2
+#define SQLITE_STMTSTATUS_AUTOINDEX         3
+#define SQLITE_STMTSTATUS_VM_STEP           4
+
+/*
+** CAPI3REF: Custom Page Cache Object
+**
+** The sqlite3_pcache type is opaque.  It is implemented by
+** the pluggable module.  The SQLite core has no knowledge of
+** its size or internal structure and never deals with the
+** sqlite3_pcache object except by holding and passing pointers
+** to the object.
+**
+** See [sqlite3_pcache_methods2] for additional information.
+*/
+typedef struct sqlite3_pcache sqlite3_pcache;
+
+/*
+** CAPI3REF: Custom Page Cache Object
+**
+** The sqlite3_pcache_page object represents a single page in the
+** page cache.  The page cache will allocate instances of this
+** object.  Various methods of the page cache use pointers to instances
+** of this object as parameters or as their return value.
+**
+** See [sqlite3_pcache_methods2] for additional information.
+*/
+typedef struct sqlite3_pcache_page sqlite3_pcache_page;
+struct sqlite3_pcache_page {
+  void *pBuf;        /* The content of the page */
+  void *pExtra;      /* Extra information associated with the page */
+};
+
+/*
+** CAPI3REF: Application Defined Page Cache.
+** KEYWORDS: {page cache}
+**
+** ^(The [sqlite3_config]([SQLITE_CONFIG_PCACHE2], ...) interface can
+** register an alternative page cache implementation by passing in an 
+** instance of the sqlite3_pcache_methods2 structure.)^
+** In many applications, most of the heap memory allocated by 
+** SQLite is used for the page cache.
+** By implementing a 
+** custom page cache using this API, an application can better control
+** the amount of memory consumed by SQLite, the way in which 
+** that memory is allocated and released, and the policies used to 
+** determine exactly which parts of a database file are cached and for 
+** how long.
+**
+** The alternative page cache mechanism is an
+** extreme measure that is only needed by the most demanding applications.
+** The built-in page cache is recommended for most uses.
+**
+** ^(The contents of the sqlite3_pcache_methods2 structure are copied to an
+** internal buffer by SQLite within the call to [sqlite3_config].  Hence
+** the application may discard the parameter after the call to
+** [sqlite3_config()] returns.)^
+**
+** [[the xInit() page cache method]]
+** ^(The xInit() method is called once for each effective 
+** call to [sqlite3_initialize()])^
+** (usually only once during the lifetime of the process). ^(The xInit()
+** method is passed a copy of the sqlite3_pcache_methods2.pArg value.)^
+** The intent of the xInit() method is to set up global data structures 
+** required by the custom page cache implementation. 
+** ^(If the xInit() method is NULL, then the 
+** built-in default page cache is used instead of the application defined
+** page cache.)^
+**
+** [[the xShutdown() page cache method]]
+** ^The xShutdown() method is called by [sqlite3_shutdown()].
+** It can be used to clean up 
+** any outstanding resources before process shutdown, if required.
+** ^The xShutdown() method may be NULL.
+**
+** ^SQLite automatically serializes calls to the xInit method,
+** so the xInit method need not be threadsafe.  ^The
+** xShutdown method is only called from [sqlite3_shutdown()] so it does
+** not need to be threadsafe either.  All other methods must be threadsafe
+** in multithreaded applications.
+**
+** ^SQLite will never invoke xInit() more than once without an intervening
+** call to xShutdown().
+**
+** [[the xCreate() page cache methods]]
+** ^SQLite invokes the xCreate() method to construct a new cache instance.
+** SQLite will typically create one cache instance for each open database file,
+** though this is not guaranteed. ^The
+** first parameter, szPage, is the size in bytes of the pages that must
+** be allocated by the cache.  ^szPage will always a power of two.  ^The
+** second parameter szExtra is a number of bytes of extra storage 
+** associated with each page cache entry.  ^The szExtra parameter will
+** a number less than 250.  SQLite will use the
+** extra szExtra bytes on each page to store metadata about the underlying
+** database page on disk.  The value passed into szExtra depends
+** on the SQLite version, the target platform, and how SQLite was compiled.
+** ^The third argument to xCreate(), bPurgeable, is true if the cache being
+** created will be used to cache database pages of a file stored on disk, or
+** false if it is used for an in-memory database. The cache implementation
+** does not have to do anything special based with the value of bPurgeable;
+** it is purely advisory.  ^On a cache where bPurgeable is false, SQLite will
+** never invoke xUnpin() except to deliberately delete a page.
+** ^In other words, calls to xUnpin() on a cache with bPurgeable set to
+** false will always have the "discard" flag set to true.  
+** ^Hence, a cache created with bPurgeable false will
+** never contain any unpinned pages.
+**
+** [[the xCachesize() page cache method]]
+** ^(The xCachesize() method may be called at any time by SQLite to set the
+** suggested maximum cache-size (number of pages stored by) the cache
+** instance passed as the first argument. This is the value configured using
+** the SQLite "[PRAGMA cache_size]" command.)^  As with the bPurgeable
+** parameter, the implementation is not required to do anything with this
+** value; it is advisory only.
+**
+** [[the xPagecount() page cache methods]]
+** The xPagecount() method must return the number of pages currently
+** stored in the cache, both pinned and unpinned.
+** 
+** [[the xFetch() page cache methods]]
+** The xFetch() method locates a page in the cache and returns a pointer to 
+** an sqlite3_pcache_page object associated with that page, or a NULL pointer.
+** The pBuf element of the returned sqlite3_pcache_page object will be a
+** pointer to a buffer of szPage bytes used to store the content of a 
+** single database page.  The pExtra element of sqlite3_pcache_page will be
+** a pointer to the szExtra bytes of extra storage that SQLite has requested
+** for each entry in the page cache.
+**
+** The page to be fetched is determined by the key. ^The minimum key value
+** is 1.  After it has been retrieved using xFetch, the page is considered
+** to be "pinned".
+**
+** If the requested page is already in the page cache, then the page cache
+** implementation must return a pointer to the page buffer with its content
+** intact.  If the requested page is not already in the cache, then the
+** cache implementation should use the value of the createFlag
+** parameter to help it determined what action to take:
+**
+** <table border=1 width=85% align=center>
+** <tr><th> createFlag <th> Behavior when page is not already in cache
+** <tr><td> 0 <td> Do not allocate a new page.  Return NULL.
+** <tr><td> 1 <td> Allocate a new page if it easy and convenient to do so.
+**                 Otherwise return NULL.
+** <tr><td> 2 <td> Make every effort to allocate a new page.  Only return
+**                 NULL if allocating a new page is effectively impossible.
+** </table>
+**
+** ^(SQLite will normally invoke xFetch() with a createFlag of 0 or 1.  SQLite
+** will only use a createFlag of 2 after a prior call with a createFlag of 1
+** failed.)^  In between the to xFetch() calls, SQLite may
+** attempt to unpin one or more cache pages by spilling the content of
+** pinned pages to disk and synching the operating system disk cache.
+**
+** [[the xUnpin() page cache method]]
+** ^xUnpin() is called by SQLite with a pointer to a currently pinned page
+** as its second argument.  If the third parameter, discard, is non-zero,
+** then the page must be evicted from the cache.
+** ^If the discard parameter is
+** zero, then the page may be discarded or retained at the discretion of
+** page cache implementation. ^The page cache implementation
+** may choose to evict unpinned pages at any time.
+**
+** The cache must not perform any reference counting. A single 
+** call to xUnpin() unpins the page regardless of the number of prior calls 
+** to xFetch().
+**
+** [[the xRekey() page cache methods]]
+** The xRekey() method is used to change the key value associated with the
+** page passed as the second argument. If the cache
+** previously contains an entry associated with newKey, it must be
+** discarded. ^Any prior cache entry associated with newKey is guaranteed not
+** to be pinned.
+**
+** When SQLite calls the xTruncate() method, the cache must discard all
+** existing cache entries with page numbers (keys) greater than or equal
+** to the value of the iLimit parameter passed to xTruncate(). If any
+** of these pages are pinned, they are implicitly unpinned, meaning that
+** they can be safely discarded.
+**
+** [[the xDestroy() page cache method]]
+** ^The xDestroy() method is used to delete a cache allocated by xCreate().
+** All resources associated with the specified cache should be freed. ^After
+** calling the xDestroy() method, SQLite considers the [sqlite3_pcache*]
+** handle invalid, and will not use it with any other sqlite3_pcache_methods2
+** functions.
+**
+** [[the xShrink() page cache method]]
+** ^SQLite invokes the xShrink() method when it wants the page cache to
+** free up as much of heap memory as possible.  The page cache implementation
+** is not obligated to free any memory, but well-behaved implementations should
+** do their best.
+*/
+typedef struct sqlite3_pcache_methods2 sqlite3_pcache_methods2;
+struct sqlite3_pcache_methods2 {
+  int iVersion;
+  void *pArg;
+  int (*xInit)(void*);
+  void (*xShutdown)(void*);
+  sqlite3_pcache *(*xCreate)(int szPage, int szExtra, int bPurgeable);
+  void (*xCachesize)(sqlite3_pcache*, int nCachesize);
+  int (*xPagecount)(sqlite3_pcache*);
+  sqlite3_pcache_page *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag);
+  void (*xUnpin)(sqlite3_pcache*, sqlite3_pcache_page*, int discard);
+  void (*xRekey)(sqlite3_pcache*, sqlite3_pcache_page*, 
+      unsigned oldKey, unsigned newKey);
+  void (*xTruncate)(sqlite3_pcache*, unsigned iLimit);
+  void (*xDestroy)(sqlite3_pcache*);
+  void (*xShrink)(sqlite3_pcache*);
+};
+
+/*
+** This is the obsolete pcache_methods object that has now been replaced
+** by sqlite3_pcache_methods2.  This object is not used by SQLite.  It is
+** retained in the header file for backwards compatibility only.
+*/
+typedef struct sqlite3_pcache_methods sqlite3_pcache_methods;
+struct sqlite3_pcache_methods {
+  void *pArg;
+  int (*xInit)(void*);
+  void (*xShutdown)(void*);
+  sqlite3_pcache *(*xCreate)(int szPage, int bPurgeable);
+  void (*xCachesize)(sqlite3_pcache*, int nCachesize);
+  int (*xPagecount)(sqlite3_pcache*);
+  void *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag);
+  void (*xUnpin)(sqlite3_pcache*, void*, int discard);
+  void (*xRekey)(sqlite3_pcache*, void*, unsigned oldKey, unsigned newKey);
+  void (*xTruncate)(sqlite3_pcache*, unsigned iLimit);
+  void (*xDestroy)(sqlite3_pcache*);
+};
+
+
+/*
+** CAPI3REF: Online Backup Object
+**
+** The sqlite3_backup object records state information about an ongoing
+** online backup operation.  ^The sqlite3_backup object is created by
+** a call to [sqlite3_backup_init()] and is destroyed by a call to
+** [sqlite3_backup_finish()].
+**
+** See Also: [Using the SQLite Online Backup API]
+*/
+typedef struct sqlite3_backup sqlite3_backup;
+
+/*
+** CAPI3REF: Online Backup API.
+**
+** The backup API copies the content of one database into another.
+** It is useful either for creating backups of databases or
+** for copying in-memory databases to or from persistent files. 
+**
+** See Also: [Using the SQLite Online Backup API]
+**
+** ^SQLite holds a write transaction open on the destination database file
+** for the duration of the backup operation.
+** ^The source database is read-locked only while it is being read;
+** it is not locked continuously for the entire backup operation.
+** ^Thus, the backup may be performed on a live source database without
+** preventing other database connections from
+** reading or writing to the source database while the backup is underway.
+** 
+** ^(To perform a backup operation: 
+**   <ol>
+**     <li><b>sqlite3_backup_init()</b> is called once to initialize the
+**         backup, 
+**     <li><b>sqlite3_backup_step()</b> is called one or more times to transfer 
+**         the data between the two databases, and finally
+**     <li><b>sqlite3_backup_finish()</b> is called to release all resources 
+**         associated with the backup operation. 
+**   </ol>)^
+** There should be exactly one call to sqlite3_backup_finish() for each
+** successful call to sqlite3_backup_init().
+**
+** [[sqlite3_backup_init()]] <b>sqlite3_backup_init()</b>
+**
+** ^The D and N arguments to sqlite3_backup_init(D,N,S,M) are the 
+** [database connection] associated with the destination database 
+** and the database name, respectively.
+** ^The database name is "main" for the main database, "temp" for the
+** temporary database, or the name specified after the AS keyword in
+** an [ATTACH] statement for an attached database.
+** ^The S and M arguments passed to 
+** sqlite3_backup_init(D,N,S,M) identify the [database connection]
+** and database name of the source database, respectively.
+** ^The source and destination [database connections] (parameters S and D)
+** must be different or else sqlite3_backup_init(D,N,S,M) will fail with
+** an error.
+**
+** ^A call to sqlite3_backup_init() will fail, returning SQLITE_ERROR, if 
+** there is already a read or read-write transaction open on the 
+** destination database.
+**
+** ^If an error occurs within sqlite3_backup_init(D,N,S,M), then NULL is
+** returned and an error code and error message are stored in the
+** destination [database connection] D.
+** ^The error code and message for the failed call to sqlite3_backup_init()
+** can be retrieved using the [sqlite3_errcode()], [sqlite3_errmsg()], and/or
+** [sqlite3_errmsg16()] functions.
+** ^A successful call to sqlite3_backup_init() returns a pointer to an
+** [sqlite3_backup] object.
+** ^The [sqlite3_backup] object may be used with the sqlite3_backup_step() and
+** sqlite3_backup_finish() functions to perform the specified backup 
+** operation.
+**
+** [[sqlite3_backup_step()]] <b>sqlite3_backup_step()</b>
+**
+** ^Function sqlite3_backup_step(B,N) will copy up to N pages between 
+** the source and destination databases specified by [sqlite3_backup] object B.
+** ^If N is negative, all remaining source pages are copied. 
+** ^If sqlite3_backup_step(B,N) successfully copies N pages and there
+** are still more pages to be copied, then the function returns [SQLITE_OK].
+** ^If sqlite3_backup_step(B,N) successfully finishes copying all pages
+** from source to destination, then it returns [SQLITE_DONE].
+** ^If an error occurs while running sqlite3_backup_step(B,N),
+** then an [error code] is returned. ^As well as [SQLITE_OK] and
+** [SQLITE_DONE], a call to sqlite3_backup_step() may return [SQLITE_READONLY],
+** [SQLITE_NOMEM], [SQLITE_BUSY], [SQLITE_LOCKED], or an
+** [SQLITE_IOERR_ACCESS | SQLITE_IOERR_XXX] extended error code.
+**
+** ^(The sqlite3_backup_step() might return [SQLITE_READONLY] if
+** <ol>
+** <li> the destination database was opened read-only, or
+** <li> the destination database is using write-ahead-log journaling
+** and the destination and source page sizes differ, or
+** <li> the destination database is an in-memory database and the
+** destination and source page sizes differ.
+** </ol>)^
+**
+** ^If sqlite3_backup_step() cannot obtain a required file-system lock, then
+** the [sqlite3_busy_handler | busy-handler function]
+** is invoked (if one is specified). ^If the 
+** busy-handler returns non-zero before the lock is available, then 
+** [SQLITE_BUSY] is returned to the caller. ^In this case the call to
+** sqlite3_backup_step() can be retried later. ^If the source
+** [database connection]
+** is being used to write to the source database when sqlite3_backup_step()
+** is called, then [SQLITE_LOCKED] is returned immediately. ^Again, in this
+** case the call to sqlite3_backup_step() can be retried later on. ^(If
+** [SQLITE_IOERR_ACCESS | SQLITE_IOERR_XXX], [SQLITE_NOMEM], or
+** [SQLITE_READONLY] is returned, then 
+** there is no point in retrying the call to sqlite3_backup_step(). These 
+** errors are considered fatal.)^  The application must accept 
+** that the backup operation has failed and pass the backup operation handle 
+** to the sqlite3_backup_finish() to release associated resources.
+**
+** ^The first call to sqlite3_backup_step() obtains an exclusive lock
+** on the destination file. ^The exclusive lock is not released until either 
+** sqlite3_backup_finish() is called or the backup operation is complete 
+** and sqlite3_backup_step() returns [SQLITE_DONE].  ^Every call to
+** sqlite3_backup_step() obtains a [shared lock] on the source database that
+** lasts for the duration of the sqlite3_backup_step() call.
+** ^Because the source database is not locked between calls to
+** sqlite3_backup_step(), the source database may be modified mid-way
+** through the backup process.  ^If the source database is modified by an
+** external process or via a database connection other than the one being
+** used by the backup operation, then the backup will be automatically
+** restarted by the next call to sqlite3_backup_step(). ^If the source 
+** database is modified by the using the same database connection as is used
+** by the backup operation, then the backup database is automatically
+** updated at the same time.
+**
+** [[sqlite3_backup_finish()]] <b>sqlite3_backup_finish()</b>
+**
+** When sqlite3_backup_step() has returned [SQLITE_DONE], or when the 
+** application wishes to abandon the backup operation, the application
+** should destroy the [sqlite3_backup] by passing it to sqlite3_backup_finish().
+** ^The sqlite3_backup_finish() interfaces releases all
+** resources associated with the [sqlite3_backup] object. 
+** ^If sqlite3_backup_step() has not yet returned [SQLITE_DONE], then any
+** active write-transaction on the destination database is rolled back.
+** The [sqlite3_backup] object is invalid
+** and may not be used following a call to sqlite3_backup_finish().
+**
+** ^The value returned by sqlite3_backup_finish is [SQLITE_OK] if no
+** sqlite3_backup_step() errors occurred, regardless or whether or not
+** sqlite3_backup_step() completed.
+** ^If an out-of-memory condition or IO error occurred during any prior
+** sqlite3_backup_step() call on the same [sqlite3_backup] object, then
+** sqlite3_backup_finish() returns the corresponding [error code].
+**
+** ^A return of [SQLITE_BUSY] or [SQLITE_LOCKED] from sqlite3_backup_step()
+** is not a permanent error and does not affect the return value of
+** sqlite3_backup_finish().
+**
+** [[sqlite3_backup__remaining()]] [[sqlite3_backup_pagecount()]]
+** <b>sqlite3_backup_remaining() and sqlite3_backup_pagecount()</b>
+**
+** ^Each call to sqlite3_backup_step() sets two values inside
+** the [sqlite3_backup] object: the number of pages still to be backed
+** up and the total number of pages in the source database file.
+** The sqlite3_backup_remaining() and sqlite3_backup_pagecount() interfaces
+** retrieve these two values, respectively.
+**
+** ^The values returned by these functions are only updated by
+** sqlite3_backup_step(). ^If the source database is modified during a backup
+** operation, then the values are not updated to account for any extra
+** pages that need to be updated or the size of the source database file
+** changing.
+**
+** <b>Concurrent Usage of Database Handles</b>
+**
+** ^The source [database connection] may be used by the application for other
+** purposes while a backup operation is underway or being initialized.
+** ^If SQLite is compiled and configured to support threadsafe database
+** connections, then the source database connection may be used concurrently
+** from within other threads.
+**
+** However, the application must guarantee that the destination 
+** [database connection] is not passed to any other API (by any thread) after 
+** sqlite3_backup_init() is called and before the corresponding call to
+** sqlite3_backup_finish().  SQLite does not currently check to see
+** if the application incorrectly accesses the destination [database connection]
+** and so no error code is reported, but the operations may malfunction
+** nevertheless.  Use of the destination database connection while a
+** backup is in progress might also also cause a mutex deadlock.
+**
+** If running in [shared cache mode], the application must
+** guarantee that the shared cache used by the destination database
+** is not accessed while the backup is running. In practice this means
+** that the application must guarantee that the disk file being 
+** backed up to is not accessed by any connection within the process,
+** not just the specific connection that was passed to sqlite3_backup_init().
+**
+** The [sqlite3_backup] object itself is partially threadsafe. Multiple 
+** threads may safely make multiple concurrent calls to sqlite3_backup_step().
+** However, the sqlite3_backup_remaining() and sqlite3_backup_pagecount()
+** APIs are not strictly speaking threadsafe. If they are invoked at the
+** same time as another thread is invoking sqlite3_backup_step() it is
+** possible that they return invalid values.
+*/
+SQLITE_API sqlite3_backup *sqlite3_backup_init(
+  sqlite3 *pDest,                        /* Destination database handle */
+  const char *zDestName,                 /* Destination database name */
+  sqlite3 *pSource,                      /* Source database handle */
+  const char *zSourceName                /* Source database name */
+);
+SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage);
+SQLITE_API int sqlite3_backup_finish(sqlite3_backup *p);
+SQLITE_API int sqlite3_backup_remaining(sqlite3_backup *p);
+SQLITE_API int sqlite3_backup_pagecount(sqlite3_backup *p);
+
+/*
+** CAPI3REF: Unlock Notification
+**
+** ^When running in shared-cache mode, a database operation may fail with
+** an [SQLITE_LOCKED] error if the required locks on the shared-cache or
+** individual tables within the shared-cache cannot be obtained. See
+** [SQLite Shared-Cache Mode] for a description of shared-cache locking. 
+** ^This API may be used to register a callback that SQLite will invoke 
+** when the connection currently holding the required lock relinquishes it.
+** ^This API is only available if the library was compiled with the
+** [SQLITE_ENABLE_UNLOCK_NOTIFY] C-preprocessor symbol defined.
+**
+** See Also: [Using the SQLite Unlock Notification Feature].
+**
+** ^Shared-cache locks are released when a database connection concludes
+** its current transaction, either by committing it or rolling it back. 
+**
+** ^When a connection (known as the blocked connection) fails to obtain a
+** shared-cache lock and SQLITE_LOCKED is returned to the caller, the
+** identity of the database connection (the blocking connection) that
+** has locked the required resource is stored internally. ^After an 
+** application receives an SQLITE_LOCKED error, it may call the
+** sqlite3_unlock_notify() method with the blocked connection handle as 
+** the first argument to register for a callback that will be invoked
+** when the blocking connections current transaction is concluded. ^The
+** callback is invoked from within the [sqlite3_step] or [sqlite3_close]
+** call that concludes the blocking connections transaction.
+**
+** ^(If sqlite3_unlock_notify() is called in a multi-threaded application,
+** there is a chance that the blocking connection will have already
+** concluded its transaction by the time sqlite3_unlock_notify() is invoked.
+** If this happens, then the specified callback is invoked immediately,
+** from within the call to sqlite3_unlock_notify().)^
+**
+** ^If the blocked connection is attempting to obtain a write-lock on a
+** shared-cache table, and more than one other connection currently holds
+** a read-lock on the same table, then SQLite arbitrarily selects one of 
+** the other connections to use as the blocking connection.
+**
+** ^(There may be at most one unlock-notify callback registered by a 
+** blocked connection. If sqlite3_unlock_notify() is called when the
+** blocked connection already has a registered unlock-notify callback,
+** then the new callback replaces the old.)^ ^If sqlite3_unlock_notify() is
+** called with a NULL pointer as its second argument, then any existing
+** unlock-notify callback is canceled. ^The blocked connections 
+** unlock-notify callback may also be canceled by closing the blocked
+** connection using [sqlite3_close()].
+**
+** The unlock-notify callback is not reentrant. If an application invokes
+** any sqlite3_xxx API functions from within an unlock-notify callback, a
+** crash or deadlock may be the result.
+**
+** ^Unless deadlock is detected (see below), sqlite3_unlock_notify() always
+** returns SQLITE_OK.
+**
+** <b>Callback Invocation Details</b>
+**
+** When an unlock-notify callback is registered, the application provides a 
+** single void* pointer that is passed to the callback when it is invoked.
+** However, the signature of the callback function allows SQLite to pass
+** it an array of void* context pointers. The first argument passed to
+** an unlock-notify callback is a pointer to an array of void* pointers,
+** and the second is the number of entries in the array.
+**
+** When a blocking connections transaction is concluded, there may be
+** more than one blocked connection that has registered for an unlock-notify
+** callback. ^If two or more such blocked connections have specified the
+** same callback function, then instead of invoking the callback function
+** multiple times, it is invoked once with the set of void* context pointers
+** specified by the blocked connections bundled together into an array.
+** This gives the application an opportunity to prioritize any actions 
+** related to the set of unblocked database connections.
+**
+** <b>Deadlock Detection</b>
+**
+** Assuming that after registering for an unlock-notify callback a 
+** database waits for the callback to be issued before taking any further
+** action (a reasonable assumption), then using this API may cause the
+** application to deadlock. For example, if connection X is waiting for
+** connection Y's transaction to be concluded, and similarly connection
+** Y is waiting on connection X's transaction, then neither connection
+** will proceed and the system may remain deadlocked indefinitely.
+**
+** To avoid this scenario, the sqlite3_unlock_notify() performs deadlock
+** detection. ^If a given call to sqlite3_unlock_notify() would put the
+** system in a deadlocked state, then SQLITE_LOCKED is returned and no
+** unlock-notify callback is registered. The system is said to be in
+** a deadlocked state if connection A has registered for an unlock-notify
+** callback on the conclusion of connection B's transaction, and connection
+** B has itself registered for an unlock-notify callback when connection
+** A's transaction is concluded. ^Indirect deadlock is also detected, so
+** the system is also considered to be deadlocked if connection B has
+** registered for an unlock-notify callback on the conclusion of connection
+** C's transaction, where connection C is waiting on connection A. ^Any
+** number of levels of indirection are allowed.
+**
+** <b>The "DROP TABLE" Exception</b>
+**
+** When a call to [sqlite3_step()] returns SQLITE_LOCKED, it is almost 
+** always appropriate to call sqlite3_unlock_notify(). There is however,
+** one exception. When executing a "DROP TABLE" or "DROP INDEX" statement,
+** SQLite checks if there are any currently executing SELECT statements
+** that belong to the same connection. If there are, SQLITE_LOCKED is
+** returned. In this case there is no "blocking connection", so invoking
+** sqlite3_unlock_notify() results in the unlock-notify callback being
+** invoked immediately. If the application then re-attempts the "DROP TABLE"
+** or "DROP INDEX" query, an infinite loop might be the result.
+**
+** One way around this problem is to check the extended error code returned
+** by an sqlite3_step() call. ^(If there is a blocking connection, then the
+** extended error code is set to SQLITE_LOCKED_SHAREDCACHE. Otherwise, in
+** the special "DROP TABLE/INDEX" case, the extended error code is just 
+** SQLITE_LOCKED.)^
+*/
+SQLITE_API int sqlite3_unlock_notify(
+  sqlite3 *pBlocked,                          /* Waiting connection */
+  void (*xNotify)(void **apArg, int nArg),    /* Callback function to invoke */
+  void *pNotifyArg                            /* Argument to pass to xNotify */
+);
+
+
+/*
+** CAPI3REF: String Comparison
+**
+** ^The [sqlite3_stricmp()] and [sqlite3_strnicmp()] APIs allow applications
+** and extensions to compare the contents of two buffers containing UTF-8
+** strings in a case-independent fashion, using the same definition of "case
+** independence" that SQLite uses internally when comparing identifiers.
+*/
+SQLITE_API int sqlite3_stricmp(const char *, const char *);
+SQLITE_API int sqlite3_strnicmp(const char *, const char *, int);
+
+/*
+** CAPI3REF: String Globbing
+*
+** ^The [sqlite3_strglob(P,X)] interface returns zero if string X matches
+** the glob pattern P, and it returns non-zero if string X does not match
+** the glob pattern P.  ^The definition of glob pattern matching used in
+** [sqlite3_strglob(P,X)] is the same as for the "X GLOB P" operator in the
+** SQL dialect used by SQLite.  ^The sqlite3_strglob(P,X) function is case
+** sensitive.
+**
+** Note that this routine returns zero on a match and non-zero if the strings
+** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()].
+*/
+SQLITE_API int sqlite3_strglob(const char *zGlob, const char *zStr);
+
+/*
+** CAPI3REF: Error Logging Interface
+**
+** ^The [sqlite3_log()] interface writes a message into the [error log]
+** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()].
+** ^If logging is enabled, the zFormat string and subsequent arguments are
+** used with [sqlite3_snprintf()] to generate the final output string.
+**
+** The sqlite3_log() interface is intended for use by extensions such as
+** virtual tables, collating functions, and SQL functions.  While there is
+** nothing to prevent an application from calling sqlite3_log(), doing so
+** is considered bad form.
+**
+** The zFormat string must not be NULL.
+**
+** To avoid deadlocks and other threading problems, the sqlite3_log() routine
+** will not use dynamically allocated memory.  The log message is stored in
+** a fixed-length buffer on the stack.  If the log message is longer than
+** a few hundred characters, it will be truncated to the length of the
+** buffer.
+*/
+SQLITE_API void sqlite3_log(int iErrCode, const char *zFormat, ...);
+
+/*
+** CAPI3REF: Write-Ahead Log Commit Hook
+**
+** ^The [sqlite3_wal_hook()] function is used to register a callback that
+** is invoked each time data is committed to a database in wal mode.
+**
+** ^(The callback is invoked by SQLite after the commit has taken place and 
+** the associated write-lock on the database released)^, so the implementation 
+** may read, write or [checkpoint] the database as required.
+**
+** ^The first parameter passed to the callback function when it is invoked
+** is a copy of the third parameter passed to sqlite3_wal_hook() when
+** registering the callback. ^The second is a copy of the database handle.
+** ^The third parameter is the name of the database that was written to -
+** either "main" or the name of an [ATTACH]-ed database. ^The fourth parameter
+** is the number of pages currently in the write-ahead log file,
+** including those that were just committed.
+**
+** The callback function should normally return [SQLITE_OK].  ^If an error
+** code is returned, that error will propagate back up through the
+** SQLite code base to cause the statement that provoked the callback
+** to report an error, though the commit will have still occurred. If the
+** callback returns [SQLITE_ROW] or [SQLITE_DONE], or if it returns a value
+** that does not correspond to any valid SQLite error code, the results
+** are undefined.
+**
+** A single database handle may have at most a single write-ahead log callback 
+** registered at one time. ^Calling [sqlite3_wal_hook()] replaces any
+** previously registered write-ahead log callback. ^Note that the
+** [sqlite3_wal_autocheckpoint()] interface and the
+** [wal_autocheckpoint pragma] both invoke [sqlite3_wal_hook()] and will
+** those overwrite any prior [sqlite3_wal_hook()] settings.
+*/
+SQLITE_API void *sqlite3_wal_hook(
+  sqlite3*, 
+  int(*)(void *,sqlite3*,const char*,int),
+  void*
+);
+
+/*
+** CAPI3REF: Configure an auto-checkpoint
+**
+** ^The [sqlite3_wal_autocheckpoint(D,N)] is a wrapper around
+** [sqlite3_wal_hook()] that causes any database on [database connection] D
+** to automatically [checkpoint]
+** after committing a transaction if there are N or
+** more frames in the [write-ahead log] file.  ^Passing zero or 
+** a negative value as the nFrame parameter disables automatic
+** checkpoints entirely.
+**
+** ^The callback registered by this function replaces any existing callback
+** registered using [sqlite3_wal_hook()].  ^Likewise, registering a callback
+** using [sqlite3_wal_hook()] disables the automatic checkpoint mechanism
+** configured by this function.
+**
+** ^The [wal_autocheckpoint pragma] can be used to invoke this interface
+** from SQL.
+**
+** ^Checkpoints initiated by this mechanism are
+** [sqlite3_wal_checkpoint_v2|PASSIVE].
+**
+** ^Every new [database connection] defaults to having the auto-checkpoint
+** enabled with a threshold of 1000 or [SQLITE_DEFAULT_WAL_AUTOCHECKPOINT]
+** pages.  The use of this interface
+** is only necessary if the default setting is found to be suboptimal
+** for a particular application.
+*/
+SQLITE_API int sqlite3_wal_autocheckpoint(sqlite3 *db, int N);
+
+/*
+** CAPI3REF: Checkpoint a database
+**
+** ^(The sqlite3_wal_checkpoint(D,X) is equivalent to
+** [sqlite3_wal_checkpoint_v2](D,X,[SQLITE_CHECKPOINT_PASSIVE],0,0).)^
+**
+** In brief, sqlite3_wal_checkpoint(D,X) causes the content in the 
+** [write-ahead log] for database X on [database connection] D to be
+** transferred into the database file and for the write-ahead log to
+** be reset.  See the [checkpointing] documentation for addition
+** information.
+**
+** This interface used to be the only way to cause a checkpoint to
+** occur.  But then the newer and more powerful [sqlite3_wal_checkpoint_v2()]
+** interface was added.  This interface is retained for backwards
+** compatibility and as a convenience for applications that need to manually
+** start a callback but which do not need the full power (and corresponding
+** complication) of [sqlite3_wal_checkpoint_v2()].
+*/
+SQLITE_API int sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb);
+
+/*
+** CAPI3REF: Checkpoint a database
+**
+** ^(The sqlite3_wal_checkpoint_v2(D,X,M,L,C) interface runs a checkpoint
+** operation on database X of [database connection] D in mode M.  Status
+** information is written back into integers pointed to by L and C.)^
+** ^(The M parameter must be a valid [checkpoint mode]:)^
+**
+** <dl>
+** <dt>SQLITE_CHECKPOINT_PASSIVE<dd>
+**   ^Checkpoint as many frames as possible without waiting for any database 
+**   readers or writers to finish, then sync the database file if all frames 
+**   in the log were checkpointed. ^The [busy-handler callback]
+**   is never invoked in the SQLITE_CHECKPOINT_PASSIVE mode.  
+**   ^On the other hand, passive mode might leave the checkpoint unfinished
+**   if there are concurrent readers or writers.
+**
+** <dt>SQLITE_CHECKPOINT_FULL<dd>
+**   ^This mode blocks (it invokes the
+**   [sqlite3_busy_handler|busy-handler callback]) until there is no
+**   database writer and all readers are reading from the most recent database
+**   snapshot. ^It then checkpoints all frames in the log file and syncs the
+**   database file. ^This mode blocks new database writers while it is pending,
+**   but new database readers are allowed to continue unimpeded.
+**
+** <dt>SQLITE_CHECKPOINT_RESTART<dd>
+**   ^This mode works the same way as SQLITE_CHECKPOINT_FULL with the addition
+**   that after checkpointing the log file it blocks (calls the 
+**   [busy-handler callback])
+**   until all readers are reading from the database file only. ^This ensures 
+**   that the next writer will restart the log file from the beginning.
+**   ^Like SQLITE_CHECKPOINT_FULL, this mode blocks new
+**   database writer attempts while it is pending, but does not impede readers.
+**
+** <dt>SQLITE_CHECKPOINT_TRUNCATE<dd>
+**   ^This mode works the same way as SQLITE_CHECKPOINT_RESTART with the
+**   addition that it also truncates the log file to zero bytes just prior
+**   to a successful return.
+** </dl>
+**
+** ^If pnLog is not NULL, then *pnLog is set to the total number of frames in
+** the log file or to -1 if the checkpoint could not run because
+** of an error or because the database is not in [WAL mode]. ^If pnCkpt is not
+** NULL,then *pnCkpt is set to the total number of checkpointed frames in the
+** log file (including any that were already checkpointed before the function
+** was called) or to -1 if the checkpoint could not run due to an error or
+** because the database is not in WAL mode. ^Note that upon successful
+** completion of an SQLITE_CHECKPOINT_TRUNCATE, the log file will have been
+** truncated to zero bytes and so both *pnLog and *pnCkpt will be set to zero.
+**
+** ^All calls obtain an exclusive "checkpoint" lock on the database file. ^If
+** any other process is running a checkpoint operation at the same time, the 
+** lock cannot be obtained and SQLITE_BUSY is returned. ^Even if there is a 
+** busy-handler configured, it will not be invoked in this case.
+**
+** ^The SQLITE_CHECKPOINT_FULL, RESTART and TRUNCATE modes also obtain the 
+** exclusive "writer" lock on the database file. ^If the writer lock cannot be
+** obtained immediately, and a busy-handler is configured, it is invoked and
+** the writer lock retried until either the busy-handler returns 0 or the lock
+** is successfully obtained. ^The busy-handler is also invoked while waiting for
+** database readers as described above. ^If the busy-handler returns 0 before
+** the writer lock is obtained or while waiting for database readers, the
+** checkpoint operation proceeds from that point in the same way as 
+** SQLITE_CHECKPOINT_PASSIVE - checkpointing as many frames as possible 
+** without blocking any further. ^SQLITE_BUSY is returned in this case.
+**
+** ^If parameter zDb is NULL or points to a zero length string, then the
+** specified operation is attempted on all WAL databases [attached] to 
+** [database connection] db.  In this case the
+** values written to output parameters *pnLog and *pnCkpt are undefined. ^If 
+** an SQLITE_BUSY error is encountered when processing one or more of the 
+** attached WAL databases, the operation is still attempted on any remaining 
+** attached databases and SQLITE_BUSY is returned at the end. ^If any other 
+** error occurs while processing an attached database, processing is abandoned 
+** and the error code is returned to the caller immediately. ^If no error 
+** (SQLITE_BUSY or otherwise) is encountered while processing the attached 
+** databases, SQLITE_OK is returned.
+**
+** ^If database zDb is the name of an attached database that is not in WAL
+** mode, SQLITE_OK is returned and both *pnLog and *pnCkpt set to -1. ^If
+** zDb is not NULL (or a zero length string) and is not the name of any
+** attached database, SQLITE_ERROR is returned to the caller.
+**
+** ^Unless it returns SQLITE_MISUSE,
+** the sqlite3_wal_checkpoint_v2() interface
+** sets the error information that is queried by
+** [sqlite3_errcode()] and [sqlite3_errmsg()].
+**
+** ^The [PRAGMA wal_checkpoint] command can be used to invoke this interface
+** from SQL.
+*/
+SQLITE_API int sqlite3_wal_checkpoint_v2(
+  sqlite3 *db,                    /* Database handle */
+  const char *zDb,                /* Name of attached database (or NULL) */
+  int eMode,                      /* SQLITE_CHECKPOINT_* value */
+  int *pnLog,                     /* OUT: Size of WAL log in frames */
+  int *pnCkpt                     /* OUT: Total number of frames checkpointed */
+);
+
+/*
+** CAPI3REF: Checkpoint Mode Values
+** KEYWORDS: {checkpoint mode}
+**
+** These constants define all valid values for the "checkpoint mode" passed
+** as the third parameter to the [sqlite3_wal_checkpoint_v2()] interface.
+** See the [sqlite3_wal_checkpoint_v2()] documentation for details on the
+** meaning of each of these checkpoint modes.
+*/
+#define SQLITE_CHECKPOINT_PASSIVE  0  /* Do as much as possible w/o blocking */
+#define SQLITE_CHECKPOINT_FULL     1  /* Wait for writers, then checkpoint */
+#define SQLITE_CHECKPOINT_RESTART  2  /* Like FULL but wait for for readers */
+#define SQLITE_CHECKPOINT_TRUNCATE 3  /* Like RESTART but also truncate WAL */
+
+/*
+** CAPI3REF: Virtual Table Interface Configuration
+**
+** This function may be called by either the [xConnect] or [xCreate] method
+** of a [virtual table] implementation to configure
+** various facets of the virtual table interface.
+**
+** If this interface is invoked outside the context of an xConnect or
+** xCreate virtual table method then the behavior is undefined.
+**
+** At present, there is only one option that may be configured using
+** this function. (See [SQLITE_VTAB_CONSTRAINT_SUPPORT].)  Further options
+** may be added in the future.
+*/
+SQLITE_API int sqlite3_vtab_config(sqlite3*, int op, ...);
+
+/*
+** CAPI3REF: Virtual Table Configuration Options
+**
+** These macros define the various options to the
+** [sqlite3_vtab_config()] interface that [virtual table] implementations
+** can use to customize and optimize their behavior.
+**
+** <dl>
+** <dt>SQLITE_VTAB_CONSTRAINT_SUPPORT
+** <dd>Calls of the form
+** [sqlite3_vtab_config](db,SQLITE_VTAB_CONSTRAINT_SUPPORT,X) are supported,
+** where X is an integer.  If X is zero, then the [virtual table] whose
+** [xCreate] or [xConnect] method invoked [sqlite3_vtab_config()] does not
+** support constraints.  In this configuration (which is the default) if
+** a call to the [xUpdate] method returns [SQLITE_CONSTRAINT], then the entire
+** statement is rolled back as if [ON CONFLICT | OR ABORT] had been
+** specified as part of the users SQL statement, regardless of the actual
+** ON CONFLICT mode specified.
+**
+** If X is non-zero, then the virtual table implementation guarantees
+** that if [xUpdate] returns [SQLITE_CONSTRAINT], it will do so before
+** any modifications to internal or persistent data structures have been made.
+** If the [ON CONFLICT] mode is ABORT, FAIL, IGNORE or ROLLBACK, SQLite 
+** is able to roll back a statement or database transaction, and abandon
+** or continue processing the current SQL statement as appropriate. 
+** If the ON CONFLICT mode is REPLACE and the [xUpdate] method returns
+** [SQLITE_CONSTRAINT], SQLite handles this as if the ON CONFLICT mode
+** had been ABORT.
+**
+** Virtual table implementations that are required to handle OR REPLACE
+** must do so within the [xUpdate] method. If a call to the 
+** [sqlite3_vtab_on_conflict()] function indicates that the current ON 
+** CONFLICT policy is REPLACE, the virtual table implementation should 
+** silently replace the appropriate rows within the xUpdate callback and
+** return SQLITE_OK. Or, if this is not possible, it may return
+** SQLITE_CONSTRAINT, in which case SQLite falls back to OR ABORT 
+** constraint handling.
+** </dl>
+*/
+#define SQLITE_VTAB_CONSTRAINT_SUPPORT 1
+
+/*
+** CAPI3REF: Determine The Virtual Table Conflict Policy
+**
+** This function may only be called from within a call to the [xUpdate] method
+** of a [virtual table] implementation for an INSERT or UPDATE operation. ^The
+** value returned is one of [SQLITE_ROLLBACK], [SQLITE_IGNORE], [SQLITE_FAIL],
+** [SQLITE_ABORT], or [SQLITE_REPLACE], according to the [ON CONFLICT] mode
+** of the SQL statement that triggered the call to the [xUpdate] method of the
+** [virtual table].
+*/
+SQLITE_API int sqlite3_vtab_on_conflict(sqlite3 *);
+
+/*
+** CAPI3REF: Conflict resolution modes
+** KEYWORDS: {conflict resolution mode}
+**
+** These constants are returned by [sqlite3_vtab_on_conflict()] to
+** inform a [virtual table] implementation what the [ON CONFLICT] mode
+** is for the SQL statement being evaluated.
+**
+** Note that the [SQLITE_IGNORE] constant is also used as a potential
+** return value from the [sqlite3_set_authorizer()] callback and that
+** [SQLITE_ABORT] is also a [result code].
+*/
+#define SQLITE_ROLLBACK 1
+/* #define SQLITE_IGNORE 2 // Also used by sqlite3_authorizer() callback */
+#define SQLITE_FAIL     3
+/* #define SQLITE_ABORT 4  // Also an error code */
+#define SQLITE_REPLACE  5
+
+/*
+** CAPI3REF: Prepared Statement Scan Status Opcodes
+** KEYWORDS: {scanstatus options}
+**
+** The following constants can be used for the T parameter to the
+** [sqlite3_stmt_scanstatus(S,X,T,V)] interface.  Each constant designates a
+** different metric for sqlite3_stmt_scanstatus() to return.
+**
+** When the value returned to V is a string, space to hold that string is
+** managed by the prepared statement S and will be automatically freed when
+** S is finalized.
+**
+** <dl>
+** [[SQLITE_SCANSTAT_NLOOP]] <dt>SQLITE_SCANSTAT_NLOOP</dt>
+** <dd>^The [sqlite3_int64] variable pointed to by the T parameter will be
+** set to the total number of times that the X-th loop has run.</dd>
+**
+** [[SQLITE_SCANSTAT_NVISIT]] <dt>SQLITE_SCANSTAT_NVISIT</dt>
+** <dd>^The [sqlite3_int64] variable pointed to by the T parameter will be set
+** to the total number of rows examined by all iterations of the X-th loop.</dd>
+**
+** [[SQLITE_SCANSTAT_EST]] <dt>SQLITE_SCANSTAT_EST</dt>
+** <dd>^The "double" variable pointed to by the T parameter will be set to the
+** query planner's estimate for the average number of rows output from each
+** iteration of the X-th loop.  If the query planner's estimates was accurate,
+** then this value will approximate the quotient NVISIT/NLOOP and the
+** product of this value for all prior loops with the same SELECTID will
+** be the NLOOP value for the current loop.
+**
+** [[SQLITE_SCANSTAT_NAME]] <dt>SQLITE_SCANSTAT_NAME</dt>
+** <dd>^The "const char *" variable pointed to by the T parameter will be set
+** to a zero-terminated UTF-8 string containing the name of the index or table
+** used for the X-th loop.
+**
+** [[SQLITE_SCANSTAT_EXPLAIN]] <dt>SQLITE_SCANSTAT_EXPLAIN</dt>
+** <dd>^The "const char *" variable pointed to by the T parameter will be set
+** to a zero-terminated UTF-8 string containing the [EXPLAIN QUERY PLAN]
+** description for the X-th loop.
+**
+** [[SQLITE_SCANSTAT_SELECTID]] <dt>SQLITE_SCANSTAT_SELECT</dt>
+** <dd>^The "int" variable pointed to by the T parameter will be set to the
+** "select-id" for the X-th loop.  The select-id identifies which query or
+** subquery the loop is part of.  The main query has a select-id of zero.
+** The select-id is the same value as is output in the first column
+** of an [EXPLAIN QUERY PLAN] query.
+** </dl>
+*/
+#define SQLITE_SCANSTAT_NLOOP    0
+#define SQLITE_SCANSTAT_NVISIT   1
+#define SQLITE_SCANSTAT_EST      2
+#define SQLITE_SCANSTAT_NAME     3
+#define SQLITE_SCANSTAT_EXPLAIN  4
+#define SQLITE_SCANSTAT_SELECTID 5
+
+/*
+** CAPI3REF: Prepared Statement Scan Status
+**
+** This interface returns information about the predicted and measured
+** performance for pStmt.  Advanced applications can use this
+** interface to compare the predicted and the measured performance and
+** issue warnings and/or rerun [ANALYZE] if discrepancies are found.
+**
+** Since this interface is expected to be rarely used, it is only
+** available if SQLite is compiled using the [SQLITE_ENABLE_STMT_SCANSTATUS]
+** compile-time option.
+**
+** The "iScanStatusOp" parameter determines which status information to return.
+** The "iScanStatusOp" must be one of the [scanstatus options] or the behavior
+** of this interface is undefined.
+** ^The requested measurement is written into a variable pointed to by
+** the "pOut" parameter.
+** Parameter "idx" identifies the specific loop to retrieve statistics for.
+** Loops are numbered starting from zero. ^If idx is out of range - less than
+** zero or greater than or equal to the total number of loops used to implement
+** the statement - a non-zero value is returned and the variable that pOut
+** points to is unchanged.
+**
+** ^Statistics might not be available for all loops in all statements. ^In cases
+** where there exist loops with no available statistics, this function behaves
+** as if the loop did not exist - it returns non-zero and leave the variable
+** that pOut points to unchanged.
+**
+** See also: [sqlite3_stmt_scanstatus_reset()]
+*/
+SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_stmt_scanstatus(
+  sqlite3_stmt *pStmt,      /* Prepared statement for which info desired */
+  int idx,                  /* Index of loop to report on */
+  int iScanStatusOp,        /* Information desired.  SQLITE_SCANSTAT_* */
+  void *pOut                /* Result written here */
+);     
+
+/*
+** CAPI3REF: Zero Scan-Status Counters
+**
+** ^Zero all [sqlite3_stmt_scanstatus()] related event counters.
+**
+** This API is only available if the library is built with pre-processor
+** symbol [SQLITE_ENABLE_STMT_SCANSTATUS] defined.
+*/
+SQLITE_API SQLITE_EXPERIMENTAL void sqlite3_stmt_scanstatus_reset(sqlite3_stmt*);
+
+
+/*
+** Undo the hack that converts floating point types to integer for
+** builds on processors without floating point support.
+*/
+#ifdef SQLITE_OMIT_FLOATING_POINT
+# undef double
+#endif
+
+#if 0
+}  /* End of the 'extern "C"' block */
+#endif
+#endif /* _SQLITE3_H_ */
+
+/*
+** 2010 August 30
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+*/
+
+#ifndef _SQLITE3RTREE_H_
+#define _SQLITE3RTREE_H_
+
+
+#if 0
+extern "C" {
+#endif
+
+typedef struct sqlite3_rtree_geometry sqlite3_rtree_geometry;
+typedef struct sqlite3_rtree_query_info sqlite3_rtree_query_info;
+
+/* The double-precision datatype used by RTree depends on the
+** SQLITE_RTREE_INT_ONLY compile-time option.
+*/
+#ifdef SQLITE_RTREE_INT_ONLY
+  typedef sqlite3_int64 sqlite3_rtree_dbl;
+#else
+  typedef double sqlite3_rtree_dbl;
+#endif
+
+/*
+** Register a geometry callback named zGeom that can be used as part of an
+** R-Tree geometry query as follows:
+**
+**   SELECT ... FROM <rtree> WHERE <rtree col> MATCH $zGeom(... params ...)
+*/
+SQLITE_API int sqlite3_rtree_geometry_callback(
+  sqlite3 *db,
+  const char *zGeom,
+  int (*xGeom)(sqlite3_rtree_geometry*, int, sqlite3_rtree_dbl*,int*),
+  void *pContext
+);
+
+
+/*
+** A pointer to a structure of the following type is passed as the first
+** argument to callbacks registered using rtree_geometry_callback().
+*/
+struct sqlite3_rtree_geometry {
+  void *pContext;                 /* Copy of pContext passed to s_r_g_c() */
+  int nParam;                     /* Size of array aParam[] */
+  sqlite3_rtree_dbl *aParam;      /* Parameters passed to SQL geom function */
+  void *pUser;                    /* Callback implementation user data */
+  void (*xDelUser)(void *);       /* Called by SQLite to clean up pUser */
+};
+
+/*
+** Register a 2nd-generation geometry callback named zScore that can be 
+** used as part of an R-Tree geometry query as follows:
+**
+**   SELECT ... FROM <rtree> WHERE <rtree col> MATCH $zQueryFunc(... params ...)
+*/
+SQLITE_API int sqlite3_rtree_query_callback(
+  sqlite3 *db,
+  const char *zQueryFunc,
+  int (*xQueryFunc)(sqlite3_rtree_query_info*),
+  void *pContext,
+  void (*xDestructor)(void*)
+);
+
+
+/*
+** A pointer to a structure of the following type is passed as the 
+** argument to scored geometry callback registered using
+** sqlite3_rtree_query_callback().
+**
+** Note that the first 5 fields of this structure are identical to
+** sqlite3_rtree_geometry.  This structure is a subclass of
+** sqlite3_rtree_geometry.
+*/
+struct sqlite3_rtree_query_info {
+  void *pContext;                   /* pContext from when function registered */
+  int nParam;                       /* Number of function parameters */
+  sqlite3_rtree_dbl *aParam;        /* value of function parameters */
+  void *pUser;                      /* callback can use this, if desired */
+  void (*xDelUser)(void*);          /* function to free pUser */
+  sqlite3_rtree_dbl *aCoord;        /* Coordinates of node or entry to check */
+  unsigned int *anQueue;            /* Number of pending entries in the queue */
+  int nCoord;                       /* Number of coordinates */
+  int iLevel;                       /* Level of current node or entry */
+  int mxLevel;                      /* The largest iLevel value in the tree */
+  sqlite3_int64 iRowid;             /* Rowid for current entry */
+  sqlite3_rtree_dbl rParentScore;   /* Score of parent node */
+  int eParentWithin;                /* Visibility of parent node */
+  int eWithin;                      /* OUT: Visiblity */
+  sqlite3_rtree_dbl rScore;         /* OUT: Write the score here */
+};
+
+/*
+** Allowed values for sqlite3_rtree_query.eWithin and .eParentWithin.
+*/
+#define NOT_WITHIN       0   /* Object completely outside of query region */
+#define PARTLY_WITHIN    1   /* Object partially overlaps query region */
+#define FULLY_WITHIN     2   /* Object fully contained within query region */
+
+
+#if 0
+}  /* end of the 'extern "C"' block */
+#endif
+
+#endif  /* ifndef _SQLITE3RTREE_H_ */
+
+
+/************** End of sqlite3.h *********************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+
+/*
+** Include the configuration header output by 'configure' if we're using the
+** autoconf-based build
+*/
+#ifdef _HAVE_SQLITE_CONFIG_H
+#include "config.h"
+#endif
+
+/************** Include sqliteLimit.h in the middle of sqliteInt.h ***********/
+/************** Begin file sqliteLimit.h *************************************/
+/*
+** 2007 May 7
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** 
+** This file defines various limits of what SQLite can process.
+*/
+
+/*
+** The maximum length of a TEXT or BLOB in bytes.   This also
+** limits the size of a row in a table or index.
+**
+** The hard limit is the ability of a 32-bit signed integer
+** to count the size: 2^31-1 or 2147483647.
+*/
+#ifndef SQLITE_MAX_LENGTH
+# define SQLITE_MAX_LENGTH 1000000000
+#endif
+
+/*
+** This is the maximum number of
+**
+**    * Columns in a table
+**    * Columns in an index
+**    * Columns in a view
+**    * Terms in the SET clause of an UPDATE statement
+**    * Terms in the result set of a SELECT statement
+**    * Terms in the GROUP BY or ORDER BY clauses of a SELECT statement.
+**    * Terms in the VALUES clause of an INSERT statement
+**
+** The hard upper limit here is 32676.  Most database people will
+** tell you that in a well-normalized database, you usually should
+** not have more than a dozen or so columns in any table.  And if
+** that is the case, there is no point in having more than a few
+** dozen values in any of the other situations described above.
+*/
+#ifndef SQLITE_MAX_COLUMN
+# define SQLITE_MAX_COLUMN 2000
+#endif
+
+/*
+** The maximum length of a single SQL statement in bytes.
+**
+** It used to be the case that setting this value to zero would
+** turn the limit off.  That is no longer true.  It is not possible
+** to turn this limit off.
+*/
+#ifndef SQLITE_MAX_SQL_LENGTH
+# define SQLITE_MAX_SQL_LENGTH 1000000000
+#endif
+
+/*
+** The maximum depth of an expression tree. This is limited to 
+** some extent by SQLITE_MAX_SQL_LENGTH. But sometime you might 
+** want to place more severe limits on the complexity of an 
+** expression.
+**
+** A value of 0 used to mean that the limit was not enforced.
+** But that is no longer true.  The limit is now strictly enforced
+** at all times.
+*/
+#ifndef SQLITE_MAX_EXPR_DEPTH
+# define SQLITE_MAX_EXPR_DEPTH 1000
+#endif
+
+/*
+** The maximum number of terms in a compound SELECT statement.
+** The code generator for compound SELECT statements does one
+** level of recursion for each term.  A stack overflow can result
+** if the number of terms is too large.  In practice, most SQL
+** never has more than 3 or 4 terms.  Use a value of 0 to disable
+** any limit on the number of terms in a compount SELECT.
+*/
+#ifndef SQLITE_MAX_COMPOUND_SELECT
+# define SQLITE_MAX_COMPOUND_SELECT 500
+#endif
+
+/*
+** The maximum number of opcodes in a VDBE program.
+** Not currently enforced.
+*/
+#ifndef SQLITE_MAX_VDBE_OP
+# define SQLITE_MAX_VDBE_OP 25000
+#endif
+
+/*
+** The maximum number of arguments to an SQL function.
+*/
+#ifndef SQLITE_MAX_FUNCTION_ARG
+# define SQLITE_MAX_FUNCTION_ARG 127
+#endif
+
+/*
+** The maximum number of in-memory pages to use for the main database
+** table and for temporary tables.  The SQLITE_DEFAULT_CACHE_SIZE
+*/
+#ifndef SQLITE_DEFAULT_CACHE_SIZE
+# define SQLITE_DEFAULT_CACHE_SIZE  2000
+#endif
+#ifndef SQLITE_DEFAULT_TEMP_CACHE_SIZE
+# define SQLITE_DEFAULT_TEMP_CACHE_SIZE  500
+#endif
+
+/*
+** The default number of frames to accumulate in the log file before
+** checkpointing the database in WAL mode.
+*/
+#ifndef SQLITE_DEFAULT_WAL_AUTOCHECKPOINT
+# define SQLITE_DEFAULT_WAL_AUTOCHECKPOINT  1000
+#endif
+
+/*
+** The maximum number of attached databases.  This must be between 0
+** and 62.  The upper bound on 62 is because a 64-bit integer bitmap
+** is used internally to track attached databases.
+*/
+#ifndef SQLITE_MAX_ATTACHED
+# define SQLITE_MAX_ATTACHED 10
+#endif
+
+
+/*
+** The maximum value of a ?nnn wildcard that the parser will accept.
+*/
+#ifndef SQLITE_MAX_VARIABLE_NUMBER
+# define SQLITE_MAX_VARIABLE_NUMBER 999
+#endif
+
+/* Maximum page size.  The upper bound on this value is 65536.  This a limit
+** imposed by the use of 16-bit offsets within each page.
+**
+** Earlier versions of SQLite allowed the user to change this value at
+** compile time. This is no longer permitted, on the grounds that it creates
+** a library that is technically incompatible with an SQLite library 
+** compiled with a different limit. If a process operating on a database 
+** with a page-size of 65536 bytes crashes, then an instance of SQLite 
+** compiled with the default page-size limit will not be able to rollback 
+** the aborted transaction. This could lead to database corruption.
+*/
+#ifdef SQLITE_MAX_PAGE_SIZE
+# undef SQLITE_MAX_PAGE_SIZE
+#endif
+#define SQLITE_MAX_PAGE_SIZE 65536
+
+
+/*
+** The default size of a database page.
+*/
+#ifndef SQLITE_DEFAULT_PAGE_SIZE
+# define SQLITE_DEFAULT_PAGE_SIZE 1024
+#endif
+#if SQLITE_DEFAULT_PAGE_SIZE>SQLITE_MAX_PAGE_SIZE
+# undef SQLITE_DEFAULT_PAGE_SIZE
+# define SQLITE_DEFAULT_PAGE_SIZE SQLITE_MAX_PAGE_SIZE
+#endif
+
+/*
+** Ordinarily, if no value is explicitly provided, SQLite creates databases
+** with page size SQLITE_DEFAULT_PAGE_SIZE. However, based on certain
+** device characteristics (sector-size and atomic write() support),
+** SQLite may choose a larger value. This constant is the maximum value
+** SQLite will choose on its own.
+*/
+#ifndef SQLITE_MAX_DEFAULT_PAGE_SIZE
+# define SQLITE_MAX_DEFAULT_PAGE_SIZE 8192
+#endif
+#if SQLITE_MAX_DEFAULT_PAGE_SIZE>SQLITE_MAX_PAGE_SIZE
+# undef SQLITE_MAX_DEFAULT_PAGE_SIZE
+# define SQLITE_MAX_DEFAULT_PAGE_SIZE SQLITE_MAX_PAGE_SIZE
+#endif
+
+
+/*
+** Maximum number of pages in one database file.
+**
+** This is really just the default value for the max_page_count pragma.
+** This value can be lowered (or raised) at run-time using that the
+** max_page_count macro.
+*/
+#ifndef SQLITE_MAX_PAGE_COUNT
+# define SQLITE_MAX_PAGE_COUNT 1073741823
+#endif
+
+/*
+** Maximum length (in bytes) of the pattern in a LIKE or GLOB
+** operator.
+*/
+#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
+# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
+#endif
+
+/*
+** Maximum depth of recursion for triggers.
+**
+** A value of 1 means that a trigger program will not be able to itself
+** fire any triggers. A value of 0 means that no trigger programs at all 
+** may be executed.
+*/
+#ifndef SQLITE_MAX_TRIGGER_DEPTH
+# define SQLITE_MAX_TRIGGER_DEPTH 1000
+#endif
+
+/************** End of sqliteLimit.h *****************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+
+/* Disable nuisance warnings on Borland compilers */
+#if defined(__BORLANDC__)
+#pragma warn -rch /* unreachable code */
+#pragma warn -ccc /* Condition is always true or false */
+#pragma warn -aus /* Assigned value is never used */
+#pragma warn -csu /* Comparing signed and unsigned */
+#pragma warn -spa /* Suspicious pointer arithmetic */
+#endif
+
+/*
+** Include standard header files as necessary
+*/
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+/*
+** The following macros are used to cast pointers to integers and
+** integers to pointers.  The way you do this varies from one compiler
+** to the next, so we have developed the following set of #if statements
+** to generate appropriate macros for a wide range of compilers.
+**
+** The correct "ANSI" way to do this is to use the intptr_t type. 
+** Unfortunately, that typedef is not available on all compilers, or
+** if it is available, it requires an #include of specific headers
+** that vary from one machine to the next.
+**
+** Ticket #3860:  The llvm-gcc-4.2 compiler from Apple chokes on
+** the ((void*)&((char*)0)[X]) construct.  But MSVC chokes on ((void*)(X)).
+** So we have to define the macros in different ways depending on the
+** compiler.
+*/
+#if defined(__PTRDIFF_TYPE__)  /* This case should work for GCC */
+# define SQLITE_INT_TO_PTR(X)  ((void*)(__PTRDIFF_TYPE__)(X))
+# define SQLITE_PTR_TO_INT(X)  ((int)(__PTRDIFF_TYPE__)(X))
+#elif !defined(__GNUC__)       /* Works for compilers other than LLVM */
+# define SQLITE_INT_TO_PTR(X)  ((void*)&((char*)0)[X])
+# define SQLITE_PTR_TO_INT(X)  ((int)(((char*)X)-(char*)0))
+#elif defined(HAVE_STDINT_H)   /* Use this case if we have ANSI headers */
+# define SQLITE_INT_TO_PTR(X)  ((void*)(intptr_t)(X))
+# define SQLITE_PTR_TO_INT(X)  ((int)(intptr_t)(X))
+#else                          /* Generates a warning - but it always works */
+# define SQLITE_INT_TO_PTR(X)  ((void*)(X))
+# define SQLITE_PTR_TO_INT(X)  ((int)(X))
+#endif
+
+/*
+** A macro to hint to the compiler that a function should not be
+** inlined.
+*/
+#if defined(__GNUC__)
+#  define SQLITE_NOINLINE  __attribute__((noinline))
+#elif defined(_MSC_VER) && _MSC_VER>=1310
+#  define SQLITE_NOINLINE  __declspec(noinline)
+#else
+#  define SQLITE_NOINLINE
+#endif
+
+/*
+** The SQLITE_THREADSAFE macro must be defined as 0, 1, or 2.
+** 0 means mutexes are permanently disable and the library is never
+** threadsafe.  1 means the library is serialized which is the highest
+** level of threadsafety.  2 means the library is multithreaded - multiple
+** threads can use SQLite as long as no two threads try to use the same
+** database connection at the same time.
+**
+** Older versions of SQLite used an optional THREADSAFE macro.
+** We support that for legacy.
+*/
+#if !defined(SQLITE_THREADSAFE)
+# if defined(THREADSAFE)
+#   define SQLITE_THREADSAFE THREADSAFE
+# else
+#   define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */
+# endif
+#endif
+
+/*
+** Powersafe overwrite is on by default.  But can be turned off using
+** the -DSQLITE_POWERSAFE_OVERWRITE=0 command-line option.
+*/
+#ifndef SQLITE_POWERSAFE_OVERWRITE
+# define SQLITE_POWERSAFE_OVERWRITE 1
+#endif
+
+/*
+** EVIDENCE-OF: R-25715-37072 Memory allocation statistics are enabled by
+** default unless SQLite is compiled with SQLITE_DEFAULT_MEMSTATUS=0 in
+** which case memory allocation statistics are disabled by default.
+*/
+#if !defined(SQLITE_DEFAULT_MEMSTATUS)
+# define SQLITE_DEFAULT_MEMSTATUS 1
+#endif
+
+/*
+** Exactly one of the following macros must be defined in order to
+** specify which memory allocation subsystem to use.
+**
+**     SQLITE_SYSTEM_MALLOC          // Use normal system malloc()
+**     SQLITE_WIN32_MALLOC           // Use Win32 native heap API
+**     SQLITE_ZERO_MALLOC            // Use a stub allocator that always fails
+**     SQLITE_MEMDEBUG               // Debugging version of system malloc()
+**
+** On Windows, if the SQLITE_WIN32_MALLOC_VALIDATE macro is defined and the
+** assert() macro is enabled, each call into the Win32 native heap subsystem
+** will cause HeapValidate to be called.  If heap validation should fail, an
+** assertion will be triggered.
+**
+** If none of the above are defined, then set SQLITE_SYSTEM_MALLOC as
+** the default.
+*/
+#if defined(SQLITE_SYSTEM_MALLOC) \
+  + defined(SQLITE_WIN32_MALLOC) \
+  + defined(SQLITE_ZERO_MALLOC) \
+  + defined(SQLITE_MEMDEBUG)>1
+# error "Two or more of the following compile-time configuration options\
+ are defined but at most one is allowed:\
+ SQLITE_SYSTEM_MALLOC, SQLITE_WIN32_MALLOC, SQLITE_MEMDEBUG,\
+ SQLITE_ZERO_MALLOC"
+#endif
+#if defined(SQLITE_SYSTEM_MALLOC) \
+  + defined(SQLITE_WIN32_MALLOC) \
+  + defined(SQLITE_ZERO_MALLOC) \
+  + defined(SQLITE_MEMDEBUG)==0
+# define SQLITE_SYSTEM_MALLOC 1
+#endif
+
+/*
+** If SQLITE_MALLOC_SOFT_LIMIT is not zero, then try to keep the
+** sizes of memory allocations below this value where possible.
+*/
+#if !defined(SQLITE_MALLOC_SOFT_LIMIT)
+# define SQLITE_MALLOC_SOFT_LIMIT 1024
+#endif
+
+/*
+** We need to define _XOPEN_SOURCE as follows in order to enable
+** recursive mutexes on most Unix systems and fchmod() on OpenBSD.
+** But _XOPEN_SOURCE define causes problems for Mac OS X, so omit
+** it.
+*/
+#if !defined(_XOPEN_SOURCE) && !defined(__DARWIN__) && !defined(__APPLE__)
+#  define _XOPEN_SOURCE 600
+#endif
+
+/*
+** NDEBUG and SQLITE_DEBUG are opposites.  It should always be true that
+** defined(NDEBUG)==!defined(SQLITE_DEBUG).  If this is not currently true,
+** make it true by defining or undefining NDEBUG.
+**
+** Setting NDEBUG makes the code smaller and faster by disabling the
+** assert() statements in the code.  So we want the default action
+** to be for NDEBUG to be set and NDEBUG to be undefined only if SQLITE_DEBUG
+** is set.  Thus NDEBUG becomes an opt-in rather than an opt-out
+** feature.
+*/
+#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 
+# define NDEBUG 1
+#endif
+#if defined(NDEBUG) && defined(SQLITE_DEBUG)
+# undef NDEBUG
+#endif
+
+/*
+** Enable SQLITE_ENABLE_EXPLAIN_COMMENTS if SQLITE_DEBUG is turned on.
+*/
+#if !defined(SQLITE_ENABLE_EXPLAIN_COMMENTS) && defined(SQLITE_DEBUG)
+# define SQLITE_ENABLE_EXPLAIN_COMMENTS 1
+#endif
+
+/*
+** The testcase() macro is used to aid in coverage testing.  When 
+** doing coverage testing, the condition inside the argument to
+** testcase() must be evaluated both true and false in order to
+** get full branch coverage.  The testcase() macro is inserted
+** to help ensure adequate test coverage in places where simple
+** condition/decision coverage is inadequate.  For example, testcase()
+** can be used to make sure boundary values are tested.  For
+** bitmask tests, testcase() can be used to make sure each bit
+** is significant and used at least once.  On switch statements
+** where multiple cases go to the same block of code, testcase()
+** can insure that all cases are evaluated.
+**
+*/
+#ifdef SQLITE_COVERAGE_TEST
+SQLITE_PRIVATE   void sqlite3Coverage(int);
+# define testcase(X)  if( X ){ sqlite3Coverage(__LINE__); }
+#else
+# define testcase(X)
+#endif
+
+/*
+** The TESTONLY macro is used to enclose variable declarations or
+** other bits of code that are needed to support the arguments
+** within testcase() and assert() macros.
+*/
+#if !defined(NDEBUG) || defined(SQLITE_COVERAGE_TEST)
+# define TESTONLY(X)  X
+#else
+# define TESTONLY(X)
+#endif
+
+/*
+** Sometimes we need a small amount of code such as a variable initialization
+** to setup for a later assert() statement.  We do not want this code to
+** appear when assert() is disabled.  The following macro is therefore
+** used to contain that setup code.  The "VVA" acronym stands for
+** "Verification, Validation, and Accreditation".  In other words, the
+** code within VVA_ONLY() will only run during verification processes.
+*/
+#ifndef NDEBUG
+# define VVA_ONLY(X)  X
+#else
+# define VVA_ONLY(X)
+#endif
+
+/*
+** The ALWAYS and NEVER macros surround boolean expressions which 
+** are intended to always be true or false, respectively.  Such
+** expressions could be omitted from the code completely.  But they
+** are included in a few cases in order to enhance the resilience
+** of SQLite to unexpected behavior - to make the code "self-healing"
+** or "ductile" rather than being "brittle" and crashing at the first
+** hint of unplanned behavior.
+**
+** In other words, ALWAYS and NEVER are added for defensive code.
+**
+** When doing coverage testing ALWAYS and NEVER are hard-coded to
+** be true and false so that the unreachable code they specify will
+** not be counted as untested code.
+*/
+#if defined(SQLITE_COVERAGE_TEST)
+# define ALWAYS(X)      (1)
+# define NEVER(X)       (0)
+#elif !defined(NDEBUG)
+# define ALWAYS(X)      ((X)?1:(assert(0),0))
+# define NEVER(X)       ((X)?(assert(0),1):0)
+#else
+# define ALWAYS(X)      (X)
+# define NEVER(X)       (X)
+#endif
+
+/*
+** Return true (non-zero) if the input is an integer that is too large
+** to fit in 32-bits.  This macro is used inside of various testcase()
+** macros to verify that we have tested SQLite for large-file support.
+*/
+#define IS_BIG_INT(X)  (((X)&~(i64)0xffffffff)!=0)
+
+/*
+** The macro unlikely() is a hint that surrounds a boolean
+** expression that is usually false.  Macro likely() surrounds
+** a boolean expression that is usually true.  These hints could,
+** in theory, be used by the compiler to generate better code, but
+** currently they are just comments for human readers.
+*/
+#define likely(X)    (X)
+#define unlikely(X)  (X)
+
+/************** Include hash.h in the middle of sqliteInt.h ******************/
+/************** Begin file hash.h ********************************************/
+/*
+** 2001 September 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This is the header file for the generic hash-table implementation
+** used in SQLite.
+*/
+#ifndef _SQLITE_HASH_H_
+#define _SQLITE_HASH_H_
+
+/* Forward declarations of structures. */
+typedef struct Hash Hash;
+typedef struct HashElem HashElem;
+
+/* A complete hash table is an instance of the following structure.
+** The internals of this structure are intended to be opaque -- client
+** code should not attempt to access or modify the fields of this structure
+** directly.  Change this structure only by using the routines below.
+** However, some of the "procedures" and "functions" for modifying and
+** accessing this structure are really macros, so we can't really make
+** this structure opaque.
+**
+** All elements of the hash table are on a single doubly-linked list.
+** Hash.first points to the head of this list.
+**
+** There are Hash.htsize buckets.  Each bucket points to a spot in
+** the global doubly-linked list.  The contents of the bucket are the
+** element pointed to plus the next _ht.count-1 elements in the list.
+**
+** Hash.htsize and Hash.ht may be zero.  In that case lookup is done
+** by a linear search of the global list.  For small tables, the 
+** Hash.ht table is never allocated because if there are few elements
+** in the table, it is faster to do a linear search than to manage
+** the hash table.
+*/
+struct Hash {
+  unsigned int htsize;      /* Number of buckets in the hash table */
+  unsigned int count;       /* Number of entries in this table */
+  HashElem *first;          /* The first element of the array */
+  struct _ht {              /* the hash table */
+    int count;                 /* Number of entries with this hash */
+    HashElem *chain;           /* Pointer to first entry with this hash */
+  } *ht;
+};
+
+/* Each element in the hash table is an instance of the following 
+** structure.  All elements are stored on a single doubly-linked list.
+**
+** Again, this structure is intended to be opaque, but it can't really
+** be opaque because it is used by macros.
+*/
+struct HashElem {
+  HashElem *next, *prev;       /* Next and previous elements in the table */
+  void *data;                  /* Data associated with this element */
+  const char *pKey;            /* Key associated with this element */
+};
+
+/*
+** Access routines.  To delete, insert a NULL pointer.
+*/
+SQLITE_PRIVATE void sqlite3HashInit(Hash*);
+SQLITE_PRIVATE void *sqlite3HashInsert(Hash*, const char *pKey, void *pData);
+SQLITE_PRIVATE void *sqlite3HashFind(const Hash*, const char *pKey);
+SQLITE_PRIVATE void sqlite3HashClear(Hash*);
+
+/*
+** Macros for looping over all elements of a hash table.  The idiom is
+** like this:
+**
+**   Hash h;
+**   HashElem *p;
+**   ...
+**   for(p=sqliteHashFirst(&h); p; p=sqliteHashNext(p)){
+**     SomeStructure *pData = sqliteHashData(p);
+**     // do something with pData
+**   }
+*/
+#define sqliteHashFirst(H)  ((H)->first)
+#define sqliteHashNext(E)   ((E)->next)
+#define sqliteHashData(E)   ((E)->data)
+/* #define sqliteHashKey(E)    ((E)->pKey) // NOT USED */
+/* #define sqliteHashKeysize(E) ((E)->nKey)  // NOT USED */
+
+/*
+** Number of entries in a hash table
+*/
+/* #define sqliteHashCount(H)  ((H)->count) // NOT USED */
+
+#endif /* _SQLITE_HASH_H_ */
+
+/************** End of hash.h ************************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+/************** Include parse.h in the middle of sqliteInt.h *****************/
+/************** Begin file parse.h *******************************************/
+#define TK_SEMI                             1
+#define TK_EXPLAIN                          2
+#define TK_QUERY                            3
+#define TK_PLAN                             4
+#define TK_BEGIN                            5
+#define TK_TRANSACTION                      6
+#define TK_DEFERRED                         7
+#define TK_IMMEDIATE                        8
+#define TK_EXCLUSIVE                        9
+#define TK_COMMIT                          10
+#define TK_END                             11
+#define TK_ROLLBACK                        12
+#define TK_SAVEPOINT                       13
+#define TK_RELEASE                         14
+#define TK_TO                              15
+#define TK_TABLE                           16
+#define TK_CREATE                          17
+#define TK_IF                              18
+#define TK_NOT                             19
+#define TK_EXISTS                          20
+#define TK_TEMP                            21
+#define TK_LP                              22
+#define TK_RP                              23
+#define TK_AS                              24
+#define TK_WITHOUT                         25
+#define TK_COMMA                           26
+#define TK_ID                              27
+#define TK_INDEXED                         28
+#define TK_ABORT                           29
+#define TK_ACTION                          30
+#define TK_AFTER                           31
+#define TK_ANALYZE                         32
+#define TK_ASC                             33
+#define TK_ATTACH                          34
+#define TK_BEFORE                          35
+#define TK_BY                              36
+#define TK_CASCADE                         37
+#define TK_CAST                            38
+#define TK_COLUMNKW                        39
+#define TK_CONFLICT                        40
+#define TK_DATABASE                        41
+#define TK_DESC                            42
+#define TK_DETACH                          43
+#define TK_EACH                            44
+#define TK_FAIL                            45
+#define TK_FOR                             46
+#define TK_IGNORE                          47
+#define TK_INITIALLY                       48
+#define TK_INSTEAD                         49
+#define TK_LIKE_KW                         50
+#define TK_MATCH                           51
+#define TK_NO                              52
+#define TK_KEY                             53
+#define TK_OF                              54
+#define TK_OFFSET                          55
+#define TK_PRAGMA                          56
+#define TK_RAISE                           57
+#define TK_RECURSIVE                       58
+#define TK_REPLACE                         59
+#define TK_RESTRICT                        60
+#define TK_ROW                             61
+#define TK_TRIGGER                         62
+#define TK_VACUUM                          63
+#define TK_VIEW                            64
+#define TK_VIRTUAL                         65
+#define TK_WITH                            66
+#define TK_REINDEX                         67
+#define TK_RENAME                          68
+#define TK_CTIME_KW                        69
+#define TK_ANY                             70
+#define TK_OR                              71
+#define TK_AND                             72
+#define TK_IS                              73
+#define TK_BETWEEN                         74
+#define TK_IN                              75
+#define TK_ISNULL                          76
+#define TK_NOTNULL                         77
+#define TK_NE                              78
+#define TK_EQ                              79
+#define TK_GT                              80
+#define TK_LE                              81
+#define TK_LT                              82
+#define TK_GE                              83
+#define TK_ESCAPE                          84
+#define TK_BITAND                          85
+#define TK_BITOR                           86
+#define TK_LSHIFT                          87
+#define TK_RSHIFT                          88
+#define TK_PLUS                            89
+#define TK_MINUS                           90
+#define TK_STAR                            91
+#define TK_SLASH                           92
+#define TK_REM                             93
+#define TK_CONCAT                          94
+#define TK_COLLATE                         95
+#define TK_BITNOT                          96
+#define TK_STRING                          97
+#define TK_JOIN_KW                         98
+#define TK_CONSTRAINT                      99
+#define TK_DEFAULT                        100
+#define TK_NULL                           101
+#define TK_PRIMARY                        102
+#define TK_UNIQUE                         103
+#define TK_CHECK                          104
+#define TK_REFERENCES                     105
+#define TK_AUTOINCR                       106
+#define TK_ON                             107
+#define TK_INSERT                         108
+#define TK_DELETE                         109
+#define TK_UPDATE                         110
+#define TK_SET                            111
+#define TK_DEFERRABLE                     112
+#define TK_FOREIGN                        113
+#define TK_DROP                           114
+#define TK_UNION                          115
+#define TK_ALL                            116
+#define TK_EXCEPT                         117
+#define TK_INTERSECT                      118
+#define TK_SELECT                         119
+#define TK_VALUES                         120
+#define TK_DISTINCT                       121
+#define TK_DOT                            122
+#define TK_FROM                           123
+#define TK_JOIN                           124
+#define TK_USING                          125
+#define TK_ORDER                          126
+#define TK_GROUP                          127
+#define TK_HAVING                         128
+#define TK_LIMIT                          129
+#define TK_WHERE                          130
+#define TK_INTO                           131
+#define TK_INTEGER                        132
+#define TK_FLOAT                          133
+#define TK_BLOB                           134
+#define TK_VARIABLE                       135
+#define TK_CASE                           136
+#define TK_WHEN                           137
+#define TK_THEN                           138
+#define TK_ELSE                           139
+#define TK_INDEX                          140
+#define TK_ALTER                          141
+#define TK_ADD                            142
+#define TK_TO_TEXT                        143
+#define TK_TO_BLOB                        144
+#define TK_TO_NUMERIC                     145
+#define TK_TO_INT                         146
+#define TK_TO_REAL                        147
+#define TK_ISNOT                          148
+#define TK_END_OF_FILE                    149
+#define TK_ILLEGAL                        150
+#define TK_SPACE                          151
+#define TK_UNCLOSED_STRING                152
+#define TK_FUNCTION                       153
+#define TK_COLUMN                         154
+#define TK_AGG_FUNCTION                   155
+#define TK_AGG_COLUMN                     156
+#define TK_UMINUS                         157
+#define TK_UPLUS                          158
+#define TK_REGISTER                       159
+
+/************** End of parse.h ***********************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stddef.h>
+
+/*
+** If compiling for a processor that lacks floating point support,
+** substitute integer for floating-point
+*/
+#ifdef SQLITE_OMIT_FLOATING_POINT
+# define double sqlite_int64
+# define float sqlite_int64
+# define LONGDOUBLE_TYPE sqlite_int64
+# ifndef SQLITE_BIG_DBL
+#   define SQLITE_BIG_DBL (((sqlite3_int64)1)<<50)
+# endif
+# define SQLITE_OMIT_DATETIME_FUNCS 1
+# define SQLITE_OMIT_TRACE 1
+# undef SQLITE_MIXED_ENDIAN_64BIT_FLOAT
+# undef SQLITE_HAVE_ISNAN
+#endif
+#ifndef SQLITE_BIG_DBL
+# define SQLITE_BIG_DBL (1e99)
+#endif
+
+/*
+** OMIT_TEMPDB is set to 1 if SQLITE_OMIT_TEMPDB is defined, or 0
+** afterward. Having this macro allows us to cause the C compiler 
+** to omit code used by TEMP tables without messy #ifndef statements.
+*/
+#ifdef SQLITE_OMIT_TEMPDB
+#define OMIT_TEMPDB 1
+#else
+#define OMIT_TEMPDB 0
+#endif
+
+/*
+** The "file format" number is an integer that is incremented whenever
+** the VDBE-level file format changes.  The following macros define the
+** the default file format for new databases and the maximum file format
+** that the library can read.
+*/
+#define SQLITE_MAX_FILE_FORMAT 4
+#ifndef SQLITE_DEFAULT_FILE_FORMAT
+# define SQLITE_DEFAULT_FILE_FORMAT 4
+#endif
+
+/*
+** Determine whether triggers are recursive by default.  This can be
+** changed at run-time using a pragma.
+*/
+#ifndef SQLITE_DEFAULT_RECURSIVE_TRIGGERS
+# define SQLITE_DEFAULT_RECURSIVE_TRIGGERS 0
+#endif
+
+/*
+** Provide a default value for SQLITE_TEMP_STORE in case it is not specified
+** on the command-line
+*/
+#ifndef SQLITE_TEMP_STORE
+# define SQLITE_TEMP_STORE 1
+# define SQLITE_TEMP_STORE_xc 1  /* Exclude from ctime.c */
+#endif
+
+/*
+** If no value has been provided for SQLITE_MAX_WORKER_THREADS, or if
+** SQLITE_TEMP_STORE is set to 3 (never use temporary files), set it 
+** to zero.
+*/
+#if SQLITE_TEMP_STORE==3 || SQLITE_THREADSAFE==0
+# undef SQLITE_MAX_WORKER_THREADS
+# define SQLITE_MAX_WORKER_THREADS 0
+#endif
+#ifndef SQLITE_MAX_WORKER_THREADS
+# define SQLITE_MAX_WORKER_THREADS 8
+#endif
+#ifndef SQLITE_DEFAULT_WORKER_THREADS
+# define SQLITE_DEFAULT_WORKER_THREADS 0
+#endif
+#if SQLITE_DEFAULT_WORKER_THREADS>SQLITE_MAX_WORKER_THREADS
+# undef SQLITE_MAX_WORKER_THREADS
+# define SQLITE_MAX_WORKER_THREADS SQLITE_DEFAULT_WORKER_THREADS
+#endif
+
+
+/*
+** GCC does not define the offsetof() macro so we'll have to do it
+** ourselves.
+*/
+#ifndef offsetof
+#define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD))
+#endif
+
+/*
+** Macros to compute minimum and maximum of two numbers.
+*/
+#define MIN(A,B) ((A)<(B)?(A):(B))
+#define MAX(A,B) ((A)>(B)?(A):(B))
+
+/*
+** Swap two objects of type TYPE.
+*/
+#define SWAP(TYPE,A,B) {TYPE t=A; A=B; B=t;}
+
+/*
+** Check to see if this machine uses EBCDIC.  (Yes, believe it or
+** not, there are still machines out there that use EBCDIC.)
+*/
+#if 'A' == '\301'
+# define SQLITE_EBCDIC 1
+#else
+# define SQLITE_ASCII 1
+#endif
+
+/*
+** Integers of known sizes.  These typedefs might change for architectures
+** where the sizes very.  Preprocessor macros are available so that the
+** types can be conveniently redefined at compile-type.  Like this:
+**
+**         cc '-DUINTPTR_TYPE=long long int' ...
+*/
+#ifndef UINT32_TYPE
+# ifdef HAVE_UINT32_T
+#  define UINT32_TYPE uint32_t
+# else
+#  define UINT32_TYPE unsigned int
+# endif
+#endif
+#ifndef UINT16_TYPE
+# ifdef HAVE_UINT16_T
+#  define UINT16_TYPE uint16_t
+# else
+#  define UINT16_TYPE unsigned short int
+# endif
+#endif
+#ifndef INT16_TYPE
+# ifdef HAVE_INT16_T
+#  define INT16_TYPE int16_t
+# else
+#  define INT16_TYPE short int
+# endif
+#endif
+#ifndef UINT8_TYPE
+# ifdef HAVE_UINT8_T
+#  define UINT8_TYPE uint8_t
+# else
+#  define UINT8_TYPE unsigned char
+# endif
+#endif
+#ifndef INT8_TYPE
+# ifdef HAVE_INT8_T
+#  define INT8_TYPE int8_t
+# else
+#  define INT8_TYPE signed char
+# endif
+#endif
+#ifndef LONGDOUBLE_TYPE
+# define LONGDOUBLE_TYPE long double
+#endif
+typedef sqlite_int64 i64;          /* 8-byte signed integer */
+typedef sqlite_uint64 u64;         /* 8-byte unsigned integer */
+typedef UINT32_TYPE u32;           /* 4-byte unsigned integer */
+typedef UINT16_TYPE u16;           /* 2-byte unsigned integer */
+typedef INT16_TYPE i16;            /* 2-byte signed integer */
+typedef UINT8_TYPE u8;             /* 1-byte unsigned integer */
+typedef INT8_TYPE i8;              /* 1-byte signed integer */
+
+/*
+** SQLITE_MAX_U32 is a u64 constant that is the maximum u64 value
+** that can be stored in a u32 without loss of data.  The value
+** is 0x00000000ffffffff.  But because of quirks of some compilers, we
+** have to specify the value in the less intuitive manner shown:
+*/
+#define SQLITE_MAX_U32  ((((u64)1)<<32)-1)
+
+/*
+** The datatype used to store estimates of the number of rows in a
+** table or index.  This is an unsigned integer type.  For 99.9% of
+** the world, a 32-bit integer is sufficient.  But a 64-bit integer
+** can be used at compile-time if desired.
+*/
+#ifdef SQLITE_64BIT_STATS
+ typedef u64 tRowcnt;    /* 64-bit only if requested at compile-time */
+#else
+ typedef u32 tRowcnt;    /* 32-bit is the default */
+#endif
+
+/*
+** Estimated quantities used for query planning are stored as 16-bit
+** logarithms.  For quantity X, the value stored is 10*log2(X).  This
+** gives a possible range of values of approximately 1.0e986 to 1e-986.
+** But the allowed values are "grainy".  Not every value is representable.
+** For example, quantities 16 and 17 are both represented by a LogEst
+** of 40.  However, since LogEst quantities are suppose to be estimates,
+** not exact values, this imprecision is not a problem.
+**
+** "LogEst" is short for "Logarithmic Estimate".
+**
+** Examples:
+**      1 -> 0              20 -> 43          10000 -> 132
+**      2 -> 10             25 -> 46          25000 -> 146
+**      3 -> 16            100 -> 66        1000000 -> 199
+**      4 -> 20           1000 -> 99        1048576 -> 200
+**     10 -> 33           1024 -> 100    4294967296 -> 320
+**
+** The LogEst can be negative to indicate fractional values. 
+** Examples:
+**
+**    0.5 -> -10           0.1 -> -33        0.0625 -> -40
+*/
+typedef INT16_TYPE LogEst;
+
+/*
+** Macros to determine whether the machine is big or little endian,
+** and whether or not that determination is run-time or compile-time.
+**
+** For best performance, an attempt is made to guess at the byte-order
+** using C-preprocessor macros.  If that is unsuccessful, or if
+** -DSQLITE_RUNTIME_BYTEORDER=1 is set, then byte-order is determined
+** at run-time.
+*/
+#ifdef SQLITE_AMALGAMATION
+SQLITE_PRIVATE const int sqlite3one = 1;
+#else
+SQLITE_PRIVATE const int sqlite3one;
+#endif
+#if (defined(i386)     || defined(__i386__)   || defined(_M_IX86) ||    \
+     defined(__x86_64) || defined(__x86_64__) || defined(_M_X64)  ||    \
+     defined(_M_AMD64) || defined(_M_ARM)     || defined(__x86)   ||    \
+     defined(__arm__)) && !defined(SQLITE_RUNTIME_BYTEORDER)
+# define SQLITE_BYTEORDER    1234
+# define SQLITE_BIGENDIAN    0
+# define SQLITE_LITTLEENDIAN 1
+# define SQLITE_UTF16NATIVE  SQLITE_UTF16LE
+#endif
+#if (defined(sparc)    || defined(__ppc__))  \
+    && !defined(SQLITE_RUNTIME_BYTEORDER)
+# define SQLITE_BYTEORDER    4321
+# define SQLITE_BIGENDIAN    1
+# define SQLITE_LITTLEENDIAN 0
+# define SQLITE_UTF16NATIVE  SQLITE_UTF16BE
+#endif
+#if !defined(SQLITE_BYTEORDER)
+# define SQLITE_BYTEORDER    0     /* 0 means "unknown at compile-time" */
+# define SQLITE_BIGENDIAN    (*(char *)(&sqlite3one)==0)
+# define SQLITE_LITTLEENDIAN (*(char *)(&sqlite3one)==1)
+# define SQLITE_UTF16NATIVE  (SQLITE_BIGENDIAN?SQLITE_UTF16BE:SQLITE_UTF16LE)
+#endif
+
+/*
+** Constants for the largest and smallest possible 64-bit signed integers.
+** These macros are designed to work correctly on both 32-bit and 64-bit
+** compilers.
+*/
+#define LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))
+#define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
+
+/* 
+** Round up a number to the next larger multiple of 8.  This is used
+** to force 8-byte alignment on 64-bit architectures.
+*/
+#define ROUND8(x)     (((x)+7)&~7)
+
+/*
+** Round down to the nearest multiple of 8
+*/
+#define ROUNDDOWN8(x) ((x)&~7)
+
+/*
+** Assert that the pointer X is aligned to an 8-byte boundary.  This
+** macro is used only within assert() to verify that the code gets
+** all alignment restrictions correct.
+**
+** Except, if SQLITE_4_BYTE_ALIGNED_MALLOC is defined, then the
+** underlying malloc() implementation might return us 4-byte aligned
+** pointers.  In that case, only verify 4-byte alignment.
+*/
+#ifdef SQLITE_4_BYTE_ALIGNED_MALLOC
+# define EIGHT_BYTE_ALIGNMENT(X)   ((((char*)(X) - (char*)0)&3)==0)
+#else
+# define EIGHT_BYTE_ALIGNMENT(X)   ((((char*)(X) - (char*)0)&7)==0)
+#endif
+
+/*
+** Disable MMAP on platforms where it is known to not work
+*/
+#if defined(__OpenBSD__) || defined(__QNXNTO__)
+# undef SQLITE_MAX_MMAP_SIZE
+# define SQLITE_MAX_MMAP_SIZE 0
+#endif
+
+/*
+** Default maximum size of memory used by memory-mapped I/O in the VFS
+*/
+#ifdef __APPLE__
+# include <TargetConditionals.h>
+# if TARGET_OS_IPHONE
+#   undef SQLITE_MAX_MMAP_SIZE
+#   define SQLITE_MAX_MMAP_SIZE 0
+# endif
+#endif
+#ifndef SQLITE_MAX_MMAP_SIZE
+# if defined(__linux__) \
+  || defined(_WIN32) \
+  || (defined(__APPLE__) && defined(__MACH__)) \
+  || defined(__sun)
+#   define SQLITE_MAX_MMAP_SIZE 0x7fff0000  /* 2147418112 */
+# else
+#   define SQLITE_MAX_MMAP_SIZE 0
+# endif
+# define SQLITE_MAX_MMAP_SIZE_xc 1 /* exclude from ctime.c */
+#endif
+
+/*
+** The default MMAP_SIZE is zero on all platforms.  Or, even if a larger
+** default MMAP_SIZE is specified at compile-time, make sure that it does
+** not exceed the maximum mmap size.
+*/
+#ifndef SQLITE_DEFAULT_MMAP_SIZE
+# define SQLITE_DEFAULT_MMAP_SIZE 0
+# define SQLITE_DEFAULT_MMAP_SIZE_xc 1  /* Exclude from ctime.c */
+#endif
+#if SQLITE_DEFAULT_MMAP_SIZE>SQLITE_MAX_MMAP_SIZE
+# undef SQLITE_DEFAULT_MMAP_SIZE
+# define SQLITE_DEFAULT_MMAP_SIZE SQLITE_MAX_MMAP_SIZE
+#endif
+
+/*
+** Only one of SQLITE_ENABLE_STAT3 or SQLITE_ENABLE_STAT4 can be defined.
+** Priority is given to SQLITE_ENABLE_STAT4.  If either are defined, also
+** define SQLITE_ENABLE_STAT3_OR_STAT4
+*/
+#ifdef SQLITE_ENABLE_STAT4
+# undef SQLITE_ENABLE_STAT3
+# define SQLITE_ENABLE_STAT3_OR_STAT4 1
+#elif SQLITE_ENABLE_STAT3
+# define SQLITE_ENABLE_STAT3_OR_STAT4 1
+#elif SQLITE_ENABLE_STAT3_OR_STAT4
+# undef SQLITE_ENABLE_STAT3_OR_STAT4
+#endif
+
+/*
+** SELECTTRACE_ENABLED will be either 1 or 0 depending on whether or not
+** the Select query generator tracing logic is turned on.
+*/
+#if defined(SQLITE_DEBUG) || defined(SQLITE_ENABLE_SELECTTRACE)
+# define SELECTTRACE_ENABLED 1
+#else
+# define SELECTTRACE_ENABLED 0
+#endif
+
+/*
+** An instance of the following structure is used to store the busy-handler
+** callback for a given sqlite handle. 
+**
+** The sqlite.busyHandler member of the sqlite struct contains the busy
+** callback for the database handle. Each pager opened via the sqlite
+** handle is passed a pointer to sqlite.busyHandler. The busy-handler
+** callback is currently invoked only from within pager.c.
+*/
+typedef struct BusyHandler BusyHandler;
+struct BusyHandler {
+  int (*xFunc)(void *,int);  /* The busy callback */
+  void *pArg;                /* First arg to busy callback */
+  int nBusy;                 /* Incremented with each busy call */
+};
+
+/*
+** Name of the master database table.  The master database table
+** is a special table that holds the names and attributes of all
+** user tables and indices.
+*/
+#define MASTER_NAME       "sqlite_master"
+#define TEMP_MASTER_NAME  "sqlite_temp_master"
+
+/*
+** The root-page of the master database table.
+*/
+#define MASTER_ROOT       1
+
+/*
+** The name of the schema table.
+*/
+#define SCHEMA_TABLE(x)  ((!OMIT_TEMPDB)&&(x==1)?TEMP_MASTER_NAME:MASTER_NAME)
+
+/*
+** A convenience macro that returns the number of elements in
+** an array.
+*/
+#define ArraySize(X)    ((int)(sizeof(X)/sizeof(X[0])))
+
+/*
+** Determine if the argument is a power of two
+*/
+#define IsPowerOfTwo(X) (((X)&((X)-1))==0)
+
+/*
+** The following value as a destructor means to use sqlite3DbFree().
+** The sqlite3DbFree() routine requires two parameters instead of the 
+** one parameter that destructors normally want.  So we have to introduce 
+** this magic value that the code knows to handle differently.  Any 
+** pointer will work here as long as it is distinct from SQLITE_STATIC
+** and SQLITE_TRANSIENT.
+*/
+#define SQLITE_DYNAMIC   ((sqlite3_destructor_type)sqlite3MallocSize)
+
+/*
+** When SQLITE_OMIT_WSD is defined, it means that the target platform does
+** not support Writable Static Data (WSD) such as global and static variables.
+** All variables must either be on the stack or dynamically allocated from
+** the heap.  When WSD is unsupported, the variable declarations scattered
+** throughout the SQLite code must become constants instead.  The SQLITE_WSD
+** macro is used for this purpose.  And instead of referencing the variable
+** directly, we use its constant as a key to lookup the run-time allocated
+** buffer that holds real variable.  The constant is also the initializer
+** for the run-time allocated buffer.
+**
+** In the usual case where WSD is supported, the SQLITE_WSD and GLOBAL
+** macros become no-ops and have zero performance impact.
+*/
+#ifdef SQLITE_OMIT_WSD
+  #define SQLITE_WSD const
+  #define GLOBAL(t,v) (*(t*)sqlite3_wsd_find((void*)&(v), sizeof(v)))
+  #define sqlite3GlobalConfig GLOBAL(struct Sqlite3Config, sqlite3Config)
+SQLITE_API   int sqlite3_wsd_init(int N, int J);
+SQLITE_API   void *sqlite3_wsd_find(void *K, int L);
+#else
+  #define SQLITE_WSD 
+  #define GLOBAL(t,v) v
+  #define sqlite3GlobalConfig sqlite3Config
+#endif
+
+/*
+** The following macros are used to suppress compiler warnings and to
+** make it clear to human readers when a function parameter is deliberately 
+** left unused within the body of a function. This usually happens when
+** a function is called via a function pointer. For example the 
+** implementation of an SQL aggregate step callback may not use the
+** parameter indicating the number of arguments passed to the aggregate,
+** if it knows that this is enforced elsewhere.
+**
+** When a function parameter is not used at all within the body of a function,
+** it is generally named "NotUsed" or "NotUsed2" to make things even clearer.
+** However, these macros may also be used to suppress warnings related to
+** parameters that may or may not be used depending on compilation options.
+** For example those parameters only used in assert() statements. In these
+** cases the parameters are named as per the usual conventions.
+*/
+#define UNUSED_PARAMETER(x) (void)(x)
+#define UNUSED_PARAMETER2(x,y) UNUSED_PARAMETER(x),UNUSED_PARAMETER(y)
+
+/*
+** Forward references to structures
+*/
+typedef struct AggInfo AggInfo;
+typedef struct AuthContext AuthContext;
+typedef struct AutoincInfo AutoincInfo;
+typedef struct Bitvec Bitvec;
+typedef struct CollSeq CollSeq;
+typedef struct Column Column;
+typedef struct Db Db;
+typedef struct Schema Schema;
+typedef struct Expr Expr;
+typedef struct ExprList ExprList;
+typedef struct ExprSpan ExprSpan;
+typedef struct FKey FKey;
+typedef struct FuncDestructor FuncDestructor;
+typedef struct FuncDef FuncDef;
+typedef struct FuncDefHash FuncDefHash;
+typedef struct IdList IdList;
+typedef struct Index Index;
+typedef struct IndexSample IndexSample;
+typedef struct KeyClass KeyClass;
+typedef struct KeyInfo KeyInfo;
+typedef struct Lookaside Lookaside;
+typedef struct LookasideSlot LookasideSlot;
+typedef struct Module Module;
+typedef struct NameContext NameContext;
+typedef struct Parse Parse;
+typedef struct PrintfArguments PrintfArguments;
+typedef struct RowSet RowSet;
+typedef struct Savepoint Savepoint;
+typedef struct Select Select;
+typedef struct SQLiteThread SQLiteThread;
+typedef struct SelectDest SelectDest;
+typedef struct SrcList SrcList;
+typedef struct StrAccum StrAccum;
+typedef struct Table Table;
+typedef struct TableLock TableLock;
+typedef struct Token Token;
+typedef struct TreeView TreeView;
+typedef struct Trigger Trigger;
+typedef struct TriggerPrg TriggerPrg;
+typedef struct TriggerStep TriggerStep;
+typedef struct UnpackedRecord UnpackedRecord;
+typedef struct VTable VTable;
+typedef struct VtabCtx VtabCtx;
+typedef struct Walker Walker;
+typedef struct WhereInfo WhereInfo;
+typedef struct With With;
+
+/*
+** Defer sourcing vdbe.h and btree.h until after the "u8" and 
+** "BusyHandler" typedefs. vdbe.h also requires a few of the opaque
+** pointer types (i.e. FuncDef) defined above.
+*/
+/************** Include btree.h in the middle of sqliteInt.h *****************/
+/************** Begin file btree.h *******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This header file defines the interface that the sqlite B-Tree file
+** subsystem.  See comments in the source code for a detailed description
+** of what each interface routine does.
+*/
+#ifndef _BTREE_H_
+#define _BTREE_H_
+
+/* TODO: This definition is just included so other modules compile. It
+** needs to be revisited.
+*/
+#define SQLITE_N_BTREE_META 16
+
+/*
+** If defined as non-zero, auto-vacuum is enabled by default. Otherwise
+** it must be turned on for each database using "PRAGMA auto_vacuum = 1".
+*/
+#ifndef SQLITE_DEFAULT_AUTOVACUUM
+  #define SQLITE_DEFAULT_AUTOVACUUM 0
+#endif
+
+#define BTREE_AUTOVACUUM_NONE 0        /* Do not do auto-vacuum */
+#define BTREE_AUTOVACUUM_FULL 1        /* Do full auto-vacuum */
+#define BTREE_AUTOVACUUM_INCR 2        /* Incremental vacuum */
+
+/*
+** Forward declarations of structure
+*/
+typedef struct Btree Btree;
+typedef struct BtCursor BtCursor;
+typedef struct BtShared BtShared;
+
+
+SQLITE_PRIVATE int sqlite3BtreeOpen(
+  sqlite3_vfs *pVfs,       /* VFS to use with this b-tree */
+  const char *zFilename,   /* Name of database file to open */
+  sqlite3 *db,             /* Associated database connection */
+  Btree **ppBtree,         /* Return open Btree* here */
+  int flags,               /* Flags */
+  int vfsFlags             /* Flags passed through to VFS open */
+);
+
+/* The flags parameter to sqlite3BtreeOpen can be the bitwise or of the
+** following values.
+**
+** NOTE:  These values must match the corresponding PAGER_ values in
+** pager.h.
+*/
+#define BTREE_OMIT_JOURNAL  1  /* Do not create or use a rollback journal */
+#define BTREE_MEMORY        2  /* This is an in-memory DB */
+#define BTREE_SINGLE        4  /* The file contains at most 1 b-tree */
+#define BTREE_UNORDERED     8  /* Use of a hash implementation is OK */
+
+SQLITE_PRIVATE int sqlite3BtreeClose(Btree*);
+SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree*,int);
+#if SQLITE_MAX_MMAP_SIZE>0
+SQLITE_PRIVATE   int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64);
+#endif
+SQLITE_PRIVATE int sqlite3BtreeSetPagerFlags(Btree*,unsigned);
+SQLITE_PRIVATE int sqlite3BtreeSyncDisabled(Btree*);
+SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix);
+SQLITE_PRIVATE int sqlite3BtreeGetPageSize(Btree*);
+SQLITE_PRIVATE int sqlite3BtreeMaxPageCount(Btree*,int);
+SQLITE_PRIVATE u32 sqlite3BtreeLastPage(Btree*);
+SQLITE_PRIVATE int sqlite3BtreeSecureDelete(Btree*,int);
+SQLITE_PRIVATE int sqlite3BtreeGetReserve(Btree*);
+#if defined(SQLITE_HAS_CODEC) || defined(SQLITE_DEBUG)
+SQLITE_PRIVATE int sqlite3BtreeGetReserveNoMutex(Btree *p);
+#endif
+SQLITE_PRIVATE int sqlite3BtreeSetAutoVacuum(Btree *, int);
+SQLITE_PRIVATE int sqlite3BtreeGetAutoVacuum(Btree *);
+SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree*,int);
+SQLITE_PRIVATE int sqlite3BtreeCommitPhaseOne(Btree*, const char *zMaster);
+SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree*, int);
+SQLITE_PRIVATE int sqlite3BtreeCommit(Btree*);
+SQLITE_PRIVATE int sqlite3BtreeRollback(Btree*,int,int);
+SQLITE_PRIVATE int sqlite3BtreeBeginStmt(Btree*,int);
+SQLITE_PRIVATE int sqlite3BtreeCreateTable(Btree*, int*, int flags);
+SQLITE_PRIVATE int sqlite3BtreeIsInTrans(Btree*);
+SQLITE_PRIVATE int sqlite3BtreeIsInReadTrans(Btree*);
+SQLITE_PRIVATE int sqlite3BtreeIsInBackup(Btree*);
+SQLITE_PRIVATE void *sqlite3BtreeSchema(Btree *, int, void(*)(void *));
+SQLITE_PRIVATE int sqlite3BtreeSchemaLocked(Btree *pBtree);
+SQLITE_PRIVATE int sqlite3BtreeLockTable(Btree *pBtree, int iTab, u8 isWriteLock);
+SQLITE_PRIVATE int sqlite3BtreeSavepoint(Btree *, int, int);
+
+SQLITE_PRIVATE const char *sqlite3BtreeGetFilename(Btree *);
+SQLITE_PRIVATE const char *sqlite3BtreeGetJournalname(Btree *);
+SQLITE_PRIVATE int sqlite3BtreeCopyFile(Btree *, Btree *);
+
+SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *);
+
+/* The flags parameter to sqlite3BtreeCreateTable can be the bitwise OR
+** of the flags shown below.
+**
+** Every SQLite table must have either BTREE_INTKEY or BTREE_BLOBKEY set.
+** With BTREE_INTKEY, the table key is a 64-bit integer and arbitrary data
+** is stored in the leaves.  (BTREE_INTKEY is used for SQL tables.)  With
+** BTREE_BLOBKEY, the key is an arbitrary BLOB and no content is stored
+** anywhere - the key is the content.  (BTREE_BLOBKEY is used for SQL
+** indices.)
+*/
+#define BTREE_INTKEY     1    /* Table has only 64-bit signed integer keys */
+#define BTREE_BLOBKEY    2    /* Table has keys only - no data */
+
+SQLITE_PRIVATE int sqlite3BtreeDropTable(Btree*, int, int*);
+SQLITE_PRIVATE int sqlite3BtreeClearTable(Btree*, int, int*);
+SQLITE_PRIVATE int sqlite3BtreeClearTableOfCursor(BtCursor*);
+SQLITE_PRIVATE int sqlite3BtreeTripAllCursors(Btree*, int, int);
+
+SQLITE_PRIVATE void sqlite3BtreeGetMeta(Btree *pBtree, int idx, u32 *pValue);
+SQLITE_PRIVATE int sqlite3BtreeUpdateMeta(Btree*, int idx, u32 value);
+
+SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p);
+
+/*
+** The second parameter to sqlite3BtreeGetMeta or sqlite3BtreeUpdateMeta
+** should be one of the following values. The integer values are assigned 
+** to constants so that the offset of the corresponding field in an
+** SQLite database header may be found using the following formula:
+**
+**   offset = 36 + (idx * 4)
+**
+** For example, the free-page-count field is located at byte offset 36 of
+** the database file header. The incr-vacuum-flag field is located at
+** byte offset 64 (== 36+4*7).
+**
+** The BTREE_DATA_VERSION value is not really a value stored in the header.
+** It is a read-only number computed by the pager.  But we merge it with
+** the header value access routines since its access pattern is the same.
+** Call it a "virtual meta value".
+*/
+#define BTREE_FREE_PAGE_COUNT     0
+#define BTREE_SCHEMA_VERSION      1
+#define BTREE_FILE_FORMAT         2
+#define BTREE_DEFAULT_CACHE_SIZE  3
+#define BTREE_LARGEST_ROOT_PAGE   4
+#define BTREE_TEXT_ENCODING       5
+#define BTREE_USER_VERSION        6
+#define BTREE_INCR_VACUUM         7
+#define BTREE_APPLICATION_ID      8
+#define BTREE_DATA_VERSION        15  /* A virtual meta-value */
+
+/*
+** Values that may be OR'd together to form the second argument of an
+** sqlite3BtreeCursorHints() call.
+*/
+#define BTREE_BULKLOAD 0x00000001
+
+SQLITE_PRIVATE int sqlite3BtreeCursor(
+  Btree*,                              /* BTree containing table to open */
+  int iTable,                          /* Index of root page */
+  int wrFlag,                          /* 1 for writing.  0 for read-only */
+  struct KeyInfo*,                     /* First argument to compare function */
+  BtCursor *pCursor                    /* Space to write cursor structure */
+);
+SQLITE_PRIVATE int sqlite3BtreeCursorSize(void);
+SQLITE_PRIVATE void sqlite3BtreeCursorZero(BtCursor*);
+
+SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor*);
+SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked(
+  BtCursor*,
+  UnpackedRecord *pUnKey,
+  i64 intKey,
+  int bias,
+  int *pRes
+);
+SQLITE_PRIVATE int sqlite3BtreeCursorHasMoved(BtCursor*);
+SQLITE_PRIVATE int sqlite3BtreeCursorRestore(BtCursor*, int*);
+SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor*);
+SQLITE_PRIVATE int sqlite3BtreeInsert(BtCursor*, const void *pKey, i64 nKey,
+                                  const void *pData, int nData,
+                                  int nZero, int bias, int seekResult);
+SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor*, int *pRes);
+SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor*, int *pRes);
+SQLITE_PRIVATE int sqlite3BtreeNext(BtCursor*, int *pRes);
+SQLITE_PRIVATE int sqlite3BtreeEof(BtCursor*);
+SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor*, int *pRes);
+SQLITE_PRIVATE int sqlite3BtreeKeySize(BtCursor*, i64 *pSize);
+SQLITE_PRIVATE int sqlite3BtreeKey(BtCursor*, u32 offset, u32 amt, void*);
+SQLITE_PRIVATE const void *sqlite3BtreeKeyFetch(BtCursor*, u32 *pAmt);
+SQLITE_PRIVATE const void *sqlite3BtreeDataFetch(BtCursor*, u32 *pAmt);
+SQLITE_PRIVATE int sqlite3BtreeDataSize(BtCursor*, u32 *pSize);
+SQLITE_PRIVATE int sqlite3BtreeData(BtCursor*, u32 offset, u32 amt, void*);
+
+SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(Btree*, int *aRoot, int nRoot, int, int*);
+SQLITE_PRIVATE struct Pager *sqlite3BtreePager(Btree*);
+
+SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*);
+SQLITE_PRIVATE void sqlite3BtreeIncrblobCursor(BtCursor *);
+SQLITE_PRIVATE void sqlite3BtreeClearCursor(BtCursor *);
+SQLITE_PRIVATE int sqlite3BtreeSetVersion(Btree *pBt, int iVersion);
+SQLITE_PRIVATE void sqlite3BtreeCursorHints(BtCursor *, unsigned int mask);
+SQLITE_PRIVATE int sqlite3BtreeIsReadonly(Btree *pBt);
+SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void);
+
+#ifndef NDEBUG
+SQLITE_PRIVATE int sqlite3BtreeCursorIsValid(BtCursor*);
+#endif
+
+#ifndef SQLITE_OMIT_BTREECOUNT
+SQLITE_PRIVATE int sqlite3BtreeCount(BtCursor *, i64 *);
+#endif
+
+#ifdef SQLITE_TEST
+SQLITE_PRIVATE int sqlite3BtreeCursorInfo(BtCursor*, int*, int);
+SQLITE_PRIVATE void sqlite3BtreeCursorList(Btree*);
+#endif
+
+#ifndef SQLITE_OMIT_WAL
+SQLITE_PRIVATE   int sqlite3BtreeCheckpoint(Btree*, int, int *, int *);
+#endif
+
+/*
+** If we are not using shared cache, then there is no need to
+** use mutexes to access the BtShared structures.  So make the
+** Enter and Leave procedures no-ops.
+*/
+#ifndef SQLITE_OMIT_SHARED_CACHE
+SQLITE_PRIVATE   void sqlite3BtreeEnter(Btree*);
+SQLITE_PRIVATE   void sqlite3BtreeEnterAll(sqlite3*);
+#else
+# define sqlite3BtreeEnter(X) 
+# define sqlite3BtreeEnterAll(X)
+#endif
+
+#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE
+SQLITE_PRIVATE   int sqlite3BtreeSharable(Btree*);
+SQLITE_PRIVATE   void sqlite3BtreeLeave(Btree*);
+SQLITE_PRIVATE   void sqlite3BtreeEnterCursor(BtCursor*);
+SQLITE_PRIVATE   void sqlite3BtreeLeaveCursor(BtCursor*);
+SQLITE_PRIVATE   void sqlite3BtreeLeaveAll(sqlite3*);
+#ifndef NDEBUG
+  /* These routines are used inside assert() statements only. */
+SQLITE_PRIVATE   int sqlite3BtreeHoldsMutex(Btree*);
+SQLITE_PRIVATE   int sqlite3BtreeHoldsAllMutexes(sqlite3*);
+SQLITE_PRIVATE   int sqlite3SchemaMutexHeld(sqlite3*,int,Schema*);
+#endif
+#else
+
+# define sqlite3BtreeSharable(X) 0
+# define sqlite3BtreeLeave(X)
+# define sqlite3BtreeEnterCursor(X)
+# define sqlite3BtreeLeaveCursor(X)
+# define sqlite3BtreeLeaveAll(X)
+
+# define sqlite3BtreeHoldsMutex(X) 1
+# define sqlite3BtreeHoldsAllMutexes(X) 1
+# define sqlite3SchemaMutexHeld(X,Y,Z) 1
+#endif
+
+
+#endif /* _BTREE_H_ */
+
+/************** End of btree.h ***********************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+/************** Include vdbe.h in the middle of sqliteInt.h ******************/
+/************** Begin file vdbe.h ********************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Header file for the Virtual DataBase Engine (VDBE)
+**
+** This header defines the interface to the virtual database engine
+** or VDBE.  The VDBE implements an abstract machine that runs a
+** simple program to access and modify the underlying database.
+*/
+#ifndef _SQLITE_VDBE_H_
+#define _SQLITE_VDBE_H_
+/* #include <stdio.h> */
+
+/*
+** A single VDBE is an opaque structure named "Vdbe".  Only routines
+** in the source file sqliteVdbe.c are allowed to see the insides
+** of this structure.
+*/
+typedef struct Vdbe Vdbe;
+
+/*
+** The names of the following types declared in vdbeInt.h are required
+** for the VdbeOp definition.
+*/
+typedef struct Mem Mem;
+typedef struct SubProgram SubProgram;
+
+/*
+** A single instruction of the virtual machine has an opcode
+** and as many as three operands.  The instruction is recorded
+** as an instance of the following structure:
+*/
+struct VdbeOp {
+  u8 opcode;          /* What operation to perform */
+  signed char p4type; /* One of the P4_xxx constants for p4 */
+  u8 opflags;         /* Mask of the OPFLG_* flags in opcodes.h */
+  u8 p5;              /* Fifth parameter is an unsigned character */
+  int p1;             /* First operand */
+  int p2;             /* Second parameter (often the jump destination) */
+  int p3;             /* The third parameter */
+  union {             /* fourth parameter */
+    int i;                 /* Integer value if p4type==P4_INT32 */
+    void *p;               /* Generic pointer */
+    char *z;               /* Pointer to data for string (char array) types */
+    i64 *pI64;             /* Used when p4type is P4_INT64 */
+    double *pReal;         /* Used when p4type is P4_REAL */
+    FuncDef *pFunc;        /* Used when p4type is P4_FUNCDEF */
+    CollSeq *pColl;        /* Used when p4type is P4_COLLSEQ */
+    Mem *pMem;             /* Used when p4type is P4_MEM */
+    VTable *pVtab;         /* Used when p4type is P4_VTAB */
+    KeyInfo *pKeyInfo;     /* Used when p4type is P4_KEYINFO */
+    int *ai;               /* Used when p4type is P4_INTARRAY */
+    SubProgram *pProgram;  /* Used when p4type is P4_SUBPROGRAM */
+    int (*xAdvance)(BtCursor *, int *);
+  } p4;
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+  char *zComment;          /* Comment to improve readability */
+#endif
+#ifdef VDBE_PROFILE
+  u32 cnt;                 /* Number of times this instruction was executed */
+  u64 cycles;              /* Total time spent executing this instruction */
+#endif
+#ifdef SQLITE_VDBE_COVERAGE
+  int iSrcLine;            /* Source-code line that generated this opcode */
+#endif
+};
+typedef struct VdbeOp VdbeOp;
+
+
+/*
+** A sub-routine used to implement a trigger program.
+*/
+struct SubProgram {
+  VdbeOp *aOp;                  /* Array of opcodes for sub-program */
+  int nOp;                      /* Elements in aOp[] */
+  int nMem;                     /* Number of memory cells required */
+  int nCsr;                     /* Number of cursors required */
+  int nOnce;                    /* Number of OP_Once instructions */
+  void *token;                  /* id that may be used to recursive triggers */
+  SubProgram *pNext;            /* Next sub-program already visited */
+};
+
+/*
+** A smaller version of VdbeOp used for the VdbeAddOpList() function because
+** it takes up less space.
+*/
+struct VdbeOpList {
+  u8 opcode;          /* What operation to perform */
+  signed char p1;     /* First operand */
+  signed char p2;     /* Second parameter (often the jump destination) */
+  signed char p3;     /* Third parameter */
+};
+typedef struct VdbeOpList VdbeOpList;
+
+/*
+** Allowed values of VdbeOp.p4type
+*/
+#define P4_NOTUSED    0   /* The P4 parameter is not used */
+#define P4_DYNAMIC  (-1)  /* Pointer to a string obtained from sqliteMalloc() */
+#define P4_STATIC   (-2)  /* Pointer to a static string */
+#define P4_COLLSEQ  (-4)  /* P4 is a pointer to a CollSeq structure */
+#define P4_FUNCDEF  (-5)  /* P4 is a pointer to a FuncDef structure */
+#define P4_KEYINFO  (-6)  /* P4 is a pointer to a KeyInfo structure */
+#define P4_MEM      (-8)  /* P4 is a pointer to a Mem*    structure */
+#define P4_TRANSIENT  0   /* P4 is a pointer to a transient string */
+#define P4_VTAB     (-10) /* P4 is a pointer to an sqlite3_vtab structure */
+#define P4_MPRINTF  (-11) /* P4 is a string obtained from sqlite3_mprintf() */
+#define P4_REAL     (-12) /* P4 is a 64-bit floating point value */
+#define P4_INT64    (-13) /* P4 is a 64-bit signed integer */
+#define P4_INT32    (-14) /* P4 is a 32-bit signed integer */
+#define P4_INTARRAY (-15) /* P4 is a vector of 32-bit integers */
+#define P4_SUBPROGRAM  (-18) /* P4 is a pointer to a SubProgram structure */
+#define P4_ADVANCE  (-19) /* P4 is a pointer to BtreeNext() or BtreePrev() */
+
+/* Error message codes for OP_Halt */
+#define P5_ConstraintNotNull 1
+#define P5_ConstraintUnique  2
+#define P5_ConstraintCheck   3
+#define P5_ConstraintFK      4
+
+/*
+** The Vdbe.aColName array contains 5n Mem structures, where n is the 
+** number of columns of data returned by the statement.
+*/
+#define COLNAME_NAME     0
+#define COLNAME_DECLTYPE 1
+#define COLNAME_DATABASE 2
+#define COLNAME_TABLE    3
+#define COLNAME_COLUMN   4
+#ifdef SQLITE_ENABLE_COLUMN_METADATA
+# define COLNAME_N        5      /* Number of COLNAME_xxx symbols */
+#else
+# ifdef SQLITE_OMIT_DECLTYPE
+#   define COLNAME_N      1      /* Store only the name */
+# else
+#   define COLNAME_N      2      /* Store the name and decltype */
+# endif
+#endif
+
+/*
+** The following macro converts a relative address in the p2 field
+** of a VdbeOp structure into a negative number so that 
+** sqlite3VdbeAddOpList() knows that the address is relative.  Calling
+** the macro again restores the address.
+*/
+#define ADDR(X)  (-1-(X))
+
+/*
+** The makefile scans the vdbe.c source file and creates the "opcodes.h"
+** header file that defines a number for each opcode used by the VDBE.
+*/
+/************** Include opcodes.h in the middle of vdbe.h ********************/
+/************** Begin file opcodes.h *****************************************/
+/* Automatically generated.  Do not edit */
+/* See the mkopcodeh.awk script for details */
+#define OP_Function        1 /* synopsis: r[P3]=func(r[P2@P5])             */
+#define OP_Savepoint       2
+#define OP_AutoCommit      3
+#define OP_Transaction     4
+#define OP_SorterNext      5
+#define OP_PrevIfOpen      6
+#define OP_NextIfOpen      7
+#define OP_Prev            8
+#define OP_Next            9
+#define OP_AggStep        10 /* synopsis: accum=r[P3] step(r[P2@P5])       */
+#define OP_Checkpoint     11
+#define OP_JournalMode    12
+#define OP_Vacuum         13
+#define OP_VFilter        14 /* synopsis: iplan=r[P3] zplan='P4'           */
+#define OP_VUpdate        15 /* synopsis: data=r[P3@P2]                    */
+#define OP_Goto           16
+#define OP_Gosub          17
+#define OP_Return         18
+#define OP_Not            19 /* same as TK_NOT, synopsis: r[P2]= !r[P1]    */
+#define OP_InitCoroutine  20
+#define OP_EndCoroutine   21
+#define OP_Yield          22
+#define OP_HaltIfNull     23 /* synopsis: if r[P3]=null halt               */
+#define OP_Halt           24
+#define OP_Integer        25 /* synopsis: r[P2]=P1                         */
+#define OP_Int64          26 /* synopsis: r[P2]=P4                         */
+#define OP_String         27 /* synopsis: r[P2]='P4' (len=P1)              */
+#define OP_Null           28 /* synopsis: r[P2..P3]=NULL                   */
+#define OP_SoftNull       29 /* synopsis: r[P1]=NULL                       */
+#define OP_Blob           30 /* synopsis: r[P2]=P4 (len=P1)                */
+#define OP_Variable       31 /* synopsis: r[P2]=parameter(P1,P4)           */
+#define OP_Move           32 /* synopsis: r[P2@P3]=r[P1@P3]                */
+#define OP_Copy           33 /* synopsis: r[P2@P3+1]=r[P1@P3+1]            */
+#define OP_SCopy          34 /* synopsis: r[P2]=r[P1]                      */
+#define OP_ResultRow      35 /* synopsis: output=r[P1@P2]                  */
+#define OP_CollSeq        36
+#define OP_AddImm         37 /* synopsis: r[P1]=r[P1]+P2                   */
+#define OP_MustBeInt      38
+#define OP_RealAffinity   39
+#define OP_Cast           40 /* synopsis: affinity(r[P1])                  */
+#define OP_Permutation    41
+#define OP_Compare        42 /* synopsis: r[P1@P3] <-> r[P2@P3]            */
+#define OP_Jump           43
+#define OP_Once           44
+#define OP_If             45
+#define OP_IfNot          46
+#define OP_Column         47 /* synopsis: r[P3]=PX                         */
+#define OP_Affinity       48 /* synopsis: affinity(r[P1@P2])               */
+#define OP_MakeRecord     49 /* synopsis: r[P3]=mkrec(r[P1@P2])            */
+#define OP_Count          50 /* synopsis: r[P2]=count()                    */
+#define OP_ReadCookie     51
+#define OP_SetCookie      52
+#define OP_ReopenIdx      53 /* synopsis: root=P2 iDb=P3                   */
+#define OP_OpenRead       54 /* synopsis: root=P2 iDb=P3                   */
+#define OP_OpenWrite      55 /* synopsis: root=P2 iDb=P3                   */
+#define OP_OpenAutoindex  56 /* synopsis: nColumn=P2                       */
+#define OP_OpenEphemeral  57 /* synopsis: nColumn=P2                       */
+#define OP_SorterOpen     58
+#define OP_SequenceTest   59 /* synopsis: if( cursor[P1].ctr++ ) pc = P2   */
+#define OP_OpenPseudo     60 /* synopsis: P3 columns in r[P2]              */
+#define OP_Close          61
+#define OP_SeekLT         62 /* synopsis: key=r[P3@P4]                     */
+#define OP_SeekLE         63 /* synopsis: key=r[P3@P4]                     */
+#define OP_SeekGE         64 /* synopsis: key=r[P3@P4]                     */
+#define OP_SeekGT         65 /* synopsis: key=r[P3@P4]                     */
+#define OP_Seek           66 /* synopsis: intkey=r[P2]                     */
+#define OP_NoConflict     67 /* synopsis: key=r[P3@P4]                     */
+#define OP_NotFound       68 /* synopsis: key=r[P3@P4]                     */
+#define OP_Found          69 /* synopsis: key=r[P3@P4]                     */
+#define OP_NotExists      70 /* synopsis: intkey=r[P3]                     */
+#define OP_Or             71 /* same as TK_OR, synopsis: r[P3]=(r[P1] || r[P2]) */
+#define OP_And            72 /* same as TK_AND, synopsis: r[P3]=(r[P1] && r[P2]) */
+#define OP_Sequence       73 /* synopsis: r[P2]=cursor[P1].ctr++           */
+#define OP_NewRowid       74 /* synopsis: r[P2]=rowid                      */
+#define OP_Insert         75 /* synopsis: intkey=r[P3] data=r[P2]          */
+#define OP_IsNull         76 /* same as TK_ISNULL, synopsis: if r[P1]==NULL goto P2 */
+#define OP_NotNull        77 /* same as TK_NOTNULL, synopsis: if r[P1]!=NULL goto P2 */
+#define OP_Ne             78 /* same as TK_NE, synopsis: if r[P1]!=r[P3] goto P2 */
+#define OP_Eq             79 /* same as TK_EQ, synopsis: if r[P1]==r[P3] goto P2 */
+#define OP_Gt             80 /* same as TK_GT, synopsis: if r[P1]>r[P3] goto P2 */
+#define OP_Le             81 /* same as TK_LE, synopsis: if r[P1]<=r[P3] goto P2 */
+#define OP_Lt             82 /* same as TK_LT, synopsis: if r[P1]<r[P3] goto P2 */
+#define OP_Ge             83 /* same as TK_GE, synopsis: if r[P1]>=r[P3] goto P2 */
+#define OP_InsertInt      84 /* synopsis: intkey=P3 data=r[P2]             */
+#define OP_BitAnd         85 /* same as TK_BITAND, synopsis: r[P3]=r[P1]&r[P2] */
+#define OP_BitOr          86 /* same as TK_BITOR, synopsis: r[P3]=r[P1]|r[P2] */
+#define OP_ShiftLeft      87 /* same as TK_LSHIFT, synopsis: r[P3]=r[P2]<<r[P1] */
+#define OP_ShiftRight     88 /* same as TK_RSHIFT, synopsis: r[P3]=r[P2]>>r[P1] */
+#define OP_Add            89 /* same as TK_PLUS, synopsis: r[P3]=r[P1]+r[P2] */
+#define OP_Subtract       90 /* same as TK_MINUS, synopsis: r[P3]=r[P2]-r[P1] */
+#define OP_Multiply       91 /* same as TK_STAR, synopsis: r[P3]=r[P1]*r[P2] */
+#define OP_Divide         92 /* same as TK_SLASH, synopsis: r[P3]=r[P2]/r[P1] */
+#define OP_Remainder      93 /* same as TK_REM, synopsis: r[P3]=r[P2]%r[P1] */
+#define OP_Concat         94 /* same as TK_CONCAT, synopsis: r[P3]=r[P2]+r[P1] */
+#define OP_Delete         95
+#define OP_BitNot         96 /* same as TK_BITNOT, synopsis: r[P1]= ~r[P1] */
+#define OP_String8        97 /* same as TK_STRING, synopsis: r[P2]='P4'    */
+#define OP_ResetCount     98
+#define OP_SorterCompare  99 /* synopsis: if key(P1)!=trim(r[P3],P4) goto P2 */
+#define OP_SorterData    100 /* synopsis: r[P2]=data                       */
+#define OP_RowKey        101 /* synopsis: r[P2]=key                        */
+#define OP_RowData       102 /* synopsis: r[P2]=data                       */
+#define OP_Rowid         103 /* synopsis: r[P2]=rowid                      */
+#define OP_NullRow       104
+#define OP_Last          105
+#define OP_SorterSort    106
+#define OP_Sort          107
+#define OP_Rewind        108
+#define OP_SorterInsert  109
+#define OP_IdxInsert     110 /* synopsis: key=r[P2]                        */
+#define OP_IdxDelete     111 /* synopsis: key=r[P2@P3]                     */
+#define OP_IdxRowid      112 /* synopsis: r[P2]=rowid                      */
+#define OP_IdxLE         113 /* synopsis: key=r[P3@P4]                     */
+#define OP_IdxGT         114 /* synopsis: key=r[P3@P4]                     */
+#define OP_IdxLT         115 /* synopsis: key=r[P3@P4]                     */
+#define OP_IdxGE         116 /* synopsis: key=r[P3@P4]                     */
+#define OP_Destroy       117
+#define OP_Clear         118
+#define OP_ResetSorter   119
+#define OP_CreateIndex   120 /* synopsis: r[P2]=root iDb=P1                */
+#define OP_CreateTable   121 /* synopsis: r[P2]=root iDb=P1                */
+#define OP_ParseSchema   122
+#define OP_LoadAnalysis  123
+#define OP_DropTable     124
+#define OP_DropIndex     125
+#define OP_DropTrigger   126
+#define OP_IntegrityCk   127
+#define OP_RowSetAdd     128 /* synopsis: rowset(P1)=r[P2]                 */
+#define OP_RowSetRead    129 /* synopsis: r[P3]=rowset(P1)                 */
+#define OP_RowSetTest    130 /* synopsis: if r[P3] in rowset(P1) goto P2   */
+#define OP_Program       131
+#define OP_Param         132
+#define OP_Real          133 /* same as TK_FLOAT, synopsis: r[P2]=P4       */
+#define OP_FkCounter     134 /* synopsis: fkctr[P1]+=P2                    */
+#define OP_FkIfZero      135 /* synopsis: if fkctr[P1]==0 goto P2          */
+#define OP_MemMax        136 /* synopsis: r[P1]=max(r[P1],r[P2])           */
+#define OP_IfPos         137 /* synopsis: if r[P1]>0 goto P2               */
+#define OP_IfNeg         138 /* synopsis: r[P1]+=P3, if r[P1]<0 goto P2    */
+#define OP_IfZero        139 /* synopsis: r[P1]+=P3, if r[P1]==0 goto P2   */
+#define OP_AggFinal      140 /* synopsis: accum=r[P1] N=P2                 */
+#define OP_IncrVacuum    141
+#define OP_Expire        142
+#define OP_TableLock     143 /* synopsis: iDb=P1 root=P2 write=P3          */
+#define OP_VBegin        144
+#define OP_VCreate       145
+#define OP_VDestroy      146
+#define OP_VOpen         147
+#define OP_VColumn       148 /* synopsis: r[P3]=vcolumn(P2)                */
+#define OP_VNext         149
+#define OP_VRename       150
+#define OP_Pagecount     151
+#define OP_MaxPgcnt      152
+#define OP_Init          153 /* synopsis: Start at P2                      */
+#define OP_Noop          154
+#define OP_Explain       155
+
+
+/* Properties such as "out2" or "jump" that are specified in
+** comments following the "case" for each opcode in the vdbe.c
+** are encoded into bitvectors as follows:
+*/
+#define OPFLG_JUMP            0x0001  /* jump:  P2 holds jmp target */
+#define OPFLG_OUT2_PRERELEASE 0x0002  /* out2-prerelease: */
+#define OPFLG_IN1             0x0004  /* in1:   P1 is an input */
+#define OPFLG_IN2             0x0008  /* in2:   P2 is an input */
+#define OPFLG_IN3             0x0010  /* in3:   P3 is an input */
+#define OPFLG_OUT2            0x0020  /* out2:  P2 is an output */
+#define OPFLG_OUT3            0x0040  /* out3:  P3 is an output */
+#define OPFLG_INITIALIZER {\
+/*   0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,\
+/*   8 */ 0x01, 0x01, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00,\
+/*  16 */ 0x01, 0x01, 0x04, 0x24, 0x01, 0x04, 0x05, 0x10,\
+/*  24 */ 0x00, 0x02, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02,\
+/*  32 */ 0x00, 0x00, 0x20, 0x00, 0x00, 0x04, 0x05, 0x04,\
+/*  40 */ 0x04, 0x00, 0x00, 0x01, 0x01, 0x05, 0x05, 0x00,\
+/*  48 */ 0x00, 0x00, 0x02, 0x02, 0x10, 0x00, 0x00, 0x00,\
+/*  56 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x11,\
+/*  64 */ 0x11, 0x11, 0x08, 0x11, 0x11, 0x11, 0x11, 0x4c,\
+/*  72 */ 0x4c, 0x02, 0x02, 0x00, 0x05, 0x05, 0x15, 0x15,\
+/*  80 */ 0x15, 0x15, 0x15, 0x15, 0x00, 0x4c, 0x4c, 0x4c,\
+/*  88 */ 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x00,\
+/*  96 */ 0x24, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,\
+/* 104 */ 0x00, 0x01, 0x01, 0x01, 0x01, 0x08, 0x08, 0x00,\
+/* 112 */ 0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x00, 0x00,\
+/* 120 */ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\
+/* 128 */ 0x0c, 0x45, 0x15, 0x01, 0x02, 0x02, 0x00, 0x01,\
+/* 136 */ 0x08, 0x05, 0x05, 0x05, 0x00, 0x01, 0x00, 0x00,\
+/* 144 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02,\
+/* 152 */ 0x02, 0x01, 0x00, 0x00,}
+
+/************** End of opcodes.h *********************************************/
+/************** Continuing where we left off in vdbe.h ***********************/
+
+/*
+** Prototypes for the VDBE interface.  See comments on the implementation
+** for a description of what each of these routines does.
+*/
+SQLITE_PRIVATE Vdbe *sqlite3VdbeCreate(Parse*);
+SQLITE_PRIVATE int sqlite3VdbeAddOp0(Vdbe*,int);
+SQLITE_PRIVATE int sqlite3VdbeAddOp1(Vdbe*,int,int);
+SQLITE_PRIVATE int sqlite3VdbeAddOp2(Vdbe*,int,int,int);
+SQLITE_PRIVATE int sqlite3VdbeAddOp3(Vdbe*,int,int,int,int);
+SQLITE_PRIVATE int sqlite3VdbeAddOp4(Vdbe*,int,int,int,int,const char *zP4,int);
+SQLITE_PRIVATE int sqlite3VdbeAddOp4Int(Vdbe*,int,int,int,int,int);
+SQLITE_PRIVATE int sqlite3VdbeAddOpList(Vdbe*, int nOp, VdbeOpList const *aOp, int iLineno);
+SQLITE_PRIVATE void sqlite3VdbeAddParseSchemaOp(Vdbe*,int,char*);
+SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe*, u32 addr, int P1);
+SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe*, u32 addr, int P2);
+SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe*, u32 addr, int P3);
+SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe*, u8 P5);
+SQLITE_PRIVATE void sqlite3VdbeJumpHere(Vdbe*, int addr);
+SQLITE_PRIVATE void sqlite3VdbeChangeToNoop(Vdbe*, int addr);
+SQLITE_PRIVATE int sqlite3VdbeDeletePriorOpcode(Vdbe*, u8 op);
+SQLITE_PRIVATE void sqlite3VdbeChangeP4(Vdbe*, int addr, const char *zP4, int N);
+SQLITE_PRIVATE void sqlite3VdbeSetP4KeyInfo(Parse*, Index*);
+SQLITE_PRIVATE void sqlite3VdbeUsesBtree(Vdbe*, int);
+SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe*, int);
+SQLITE_PRIVATE int sqlite3VdbeMakeLabel(Vdbe*);
+SQLITE_PRIVATE void sqlite3VdbeRunOnlyOnce(Vdbe*);
+SQLITE_PRIVATE void sqlite3VdbeDelete(Vdbe*);
+SQLITE_PRIVATE void sqlite3VdbeClearObject(sqlite3*,Vdbe*);
+SQLITE_PRIVATE void sqlite3VdbeMakeReady(Vdbe*,Parse*);
+SQLITE_PRIVATE int sqlite3VdbeFinalize(Vdbe*);
+SQLITE_PRIVATE void sqlite3VdbeResolveLabel(Vdbe*, int);
+SQLITE_PRIVATE int sqlite3VdbeCurrentAddr(Vdbe*);
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE   int sqlite3VdbeAssertMayAbort(Vdbe *, int);
+#endif
+SQLITE_PRIVATE void sqlite3VdbeResetStepResult(Vdbe*);
+SQLITE_PRIVATE void sqlite3VdbeRewind(Vdbe*);
+SQLITE_PRIVATE int sqlite3VdbeReset(Vdbe*);
+SQLITE_PRIVATE void sqlite3VdbeSetNumCols(Vdbe*,int);
+SQLITE_PRIVATE int sqlite3VdbeSetColName(Vdbe*, int, int, const char *, void(*)(void*));
+SQLITE_PRIVATE void sqlite3VdbeCountChanges(Vdbe*);
+SQLITE_PRIVATE sqlite3 *sqlite3VdbeDb(Vdbe*);
+SQLITE_PRIVATE void sqlite3VdbeSetSql(Vdbe*, const char *z, int n, int);
+SQLITE_PRIVATE void sqlite3VdbeSwap(Vdbe*,Vdbe*);
+SQLITE_PRIVATE VdbeOp *sqlite3VdbeTakeOpArray(Vdbe*, int*, int*);
+SQLITE_PRIVATE sqlite3_value *sqlite3VdbeGetBoundValue(Vdbe*, int, u8);
+SQLITE_PRIVATE void sqlite3VdbeSetVarmask(Vdbe*, int);
+#ifndef SQLITE_OMIT_TRACE
+SQLITE_PRIVATE   char *sqlite3VdbeExpandSql(Vdbe*, const char*);
+#endif
+SQLITE_PRIVATE int sqlite3MemCompare(const Mem*, const Mem*, const CollSeq*);
+
+SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*);
+SQLITE_PRIVATE int sqlite3VdbeRecordCompare(int,const void*,UnpackedRecord*);
+SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **);
+
+typedef int (*RecordCompare)(int,const void*,UnpackedRecord*);
+SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*);
+
+#ifndef SQLITE_OMIT_TRIGGER
+SQLITE_PRIVATE void sqlite3VdbeLinkSubProgram(Vdbe *, SubProgram *);
+#endif
+
+/* Use SQLITE_ENABLE_COMMENTS to enable generation of extra comments on
+** each VDBE opcode.
+**
+** Use the SQLITE_ENABLE_MODULE_COMMENTS macro to see some extra no-op
+** comments in VDBE programs that show key decision points in the code
+** generator.
+*/
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+SQLITE_PRIVATE   void sqlite3VdbeComment(Vdbe*, const char*, ...);
+# define VdbeComment(X)  sqlite3VdbeComment X
+SQLITE_PRIVATE   void sqlite3VdbeNoopComment(Vdbe*, const char*, ...);
+# define VdbeNoopComment(X)  sqlite3VdbeNoopComment X
+# ifdef SQLITE_ENABLE_MODULE_COMMENTS
+#   define VdbeModuleComment(X)  sqlite3VdbeNoopComment X
+# else
+#   define VdbeModuleComment(X)
+# endif
+#else
+# define VdbeComment(X)
+# define VdbeNoopComment(X)
+# define VdbeModuleComment(X)
+#endif
+
+/*
+** The VdbeCoverage macros are used to set a coverage testing point
+** for VDBE branch instructions.  The coverage testing points are line
+** numbers in the sqlite3.c source file.  VDBE branch coverage testing
+** only works with an amalagmation build.  That's ok since a VDBE branch
+** coverage build designed for testing the test suite only.  No application
+** should ever ship with VDBE branch coverage measuring turned on.
+**
+**    VdbeCoverage(v)                  // Mark the previously coded instruction
+**                                     // as a branch
+**
+**    VdbeCoverageIf(v, conditional)   // Mark previous if conditional true
+**
+**    VdbeCoverageAlwaysTaken(v)       // Previous branch is always taken
+**
+**    VdbeCoverageNeverTaken(v)        // Previous branch is never taken
+**
+** Every VDBE branch operation must be tagged with one of the macros above.
+** If not, then when "make test" is run with -DSQLITE_VDBE_COVERAGE and
+** -DSQLITE_DEBUG then an ALWAYS() will fail in the vdbeTakeBranch()
+** routine in vdbe.c, alerting the developer to the missed tag.
+*/
+#ifdef SQLITE_VDBE_COVERAGE
+SQLITE_PRIVATE   void sqlite3VdbeSetLineNumber(Vdbe*,int);
+# define VdbeCoverage(v) sqlite3VdbeSetLineNumber(v,__LINE__)
+# define VdbeCoverageIf(v,x) if(x)sqlite3VdbeSetLineNumber(v,__LINE__)
+# define VdbeCoverageAlwaysTaken(v) sqlite3VdbeSetLineNumber(v,2);
+# define VdbeCoverageNeverTaken(v) sqlite3VdbeSetLineNumber(v,1);
+# define VDBE_OFFSET_LINENO(x) (__LINE__+x)
+#else
+# define VdbeCoverage(v)
+# define VdbeCoverageIf(v,x)
+# define VdbeCoverageAlwaysTaken(v)
+# define VdbeCoverageNeverTaken(v)
+# define VDBE_OFFSET_LINENO(x) 0
+#endif
+
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+SQLITE_PRIVATE void sqlite3VdbeScanStatus(Vdbe*, int, int, int, LogEst, const char*);
+#else
+# define sqlite3VdbeScanStatus(a,b,c,d,e)
+#endif
+
+#endif
+
+/************** End of vdbe.h ************************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+/************** Include pager.h in the middle of sqliteInt.h *****************/
+/************** Begin file pager.h *******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This header file defines the interface that the sqlite page cache
+** subsystem.  The page cache subsystem reads and writes a file a page
+** at a time and provides a journal for rollback.
+*/
+
+#ifndef _PAGER_H_
+#define _PAGER_H_
+
+/*
+** Default maximum size for persistent journal files. A negative 
+** value means no limit. This value may be overridden using the 
+** sqlite3PagerJournalSizeLimit() API. See also "PRAGMA journal_size_limit".
+*/
+#ifndef SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT
+  #define SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT -1
+#endif
+
+/*
+** The type used to represent a page number.  The first page in a file
+** is called page 1.  0 is used to represent "not a page".
+*/
+typedef u32 Pgno;
+
+/*
+** Each open file is managed by a separate instance of the "Pager" structure.
+*/
+typedef struct Pager Pager;
+
+/*
+** Handle type for pages.
+*/
+typedef struct PgHdr DbPage;
+
+/*
+** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
+** reserved for working around a windows/posix incompatibility). It is
+** used in the journal to signify that the remainder of the journal file 
+** is devoted to storing a master journal name - there are no more pages to
+** roll back. See comments for function writeMasterJournal() in pager.c 
+** for details.
+*/
+#define PAGER_MJ_PGNO(x) ((Pgno)((PENDING_BYTE/((x)->pageSize))+1))
+
+/*
+** Allowed values for the flags parameter to sqlite3PagerOpen().
+**
+** NOTE: These values must match the corresponding BTREE_ values in btree.h.
+*/
+#define PAGER_OMIT_JOURNAL  0x0001    /* Do not use a rollback journal */
+#define PAGER_MEMORY        0x0002    /* In-memory database */
+
+/*
+** Valid values for the second argument to sqlite3PagerLockingMode().
+*/
+#define PAGER_LOCKINGMODE_QUERY      -1
+#define PAGER_LOCKINGMODE_NORMAL      0
+#define PAGER_LOCKINGMODE_EXCLUSIVE   1
+
+/*
+** Numeric constants that encode the journalmode.  
+*/
+#define PAGER_JOURNALMODE_QUERY     (-1)  /* Query the value of journalmode */
+#define PAGER_JOURNALMODE_DELETE      0   /* Commit by deleting journal file */
+#define PAGER_JOURNALMODE_PERSIST     1   /* Commit by zeroing journal header */
+#define PAGER_JOURNALMODE_OFF         2   /* Journal omitted.  */
+#define PAGER_JOURNALMODE_TRUNCATE    3   /* Commit by truncating journal */
+#define PAGER_JOURNALMODE_MEMORY      4   /* In-memory journal file */
+#define PAGER_JOURNALMODE_WAL         5   /* Use write-ahead logging */
+
+/*
+** Flags that make up the mask passed to sqlite3PagerAcquire().
+*/
+#define PAGER_GET_NOCONTENT     0x01  /* Do not load data from disk */
+#define PAGER_GET_READONLY      0x02  /* Read-only page is acceptable */
+
+/*
+** Flags for sqlite3PagerSetFlags()
+*/
+#define PAGER_SYNCHRONOUS_OFF       0x01  /* PRAGMA synchronous=OFF */
+#define PAGER_SYNCHRONOUS_NORMAL    0x02  /* PRAGMA synchronous=NORMAL */
+#define PAGER_SYNCHRONOUS_FULL      0x03  /* PRAGMA synchronous=FULL */
+#define PAGER_SYNCHRONOUS_MASK      0x03  /* Mask for three values above */
+#define PAGER_FULLFSYNC             0x04  /* PRAGMA fullfsync=ON */
+#define PAGER_CKPT_FULLFSYNC        0x08  /* PRAGMA checkpoint_fullfsync=ON */
+#define PAGER_CACHESPILL            0x10  /* PRAGMA cache_spill=ON */
+#define PAGER_FLAGS_MASK            0x1c  /* All above except SYNCHRONOUS */
+
+/*
+** The remainder of this file contains the declarations of the functions
+** that make up the Pager sub-system API. See source code comments for 
+** a detailed description of each routine.
+*/
+
+/* Open and close a Pager connection. */ 
+SQLITE_PRIVATE int sqlite3PagerOpen(
+  sqlite3_vfs*,
+  Pager **ppPager,
+  const char*,
+  int,
+  int,
+  int,
+  void(*)(DbPage*)
+);
+SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager);
+SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager*, int, unsigned char*);
+
+/* Functions used to configure a Pager object. */
+SQLITE_PRIVATE void sqlite3PagerSetBusyhandler(Pager*, int(*)(void *), void *);
+SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager*, u32*, int);
+SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager*, int);
+SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager*, int);
+SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64);
+SQLITE_PRIVATE void sqlite3PagerShrink(Pager*);
+SQLITE_PRIVATE void sqlite3PagerSetFlags(Pager*,unsigned);
+SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *, int);
+SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *, int);
+SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager*);
+SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager*);
+SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *, i64);
+SQLITE_PRIVATE sqlite3_backup **sqlite3PagerBackupPtr(Pager*);
+
+/* Functions used to obtain and release page references. */ 
+SQLITE_PRIVATE int sqlite3PagerAcquire(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag);
+#define sqlite3PagerGet(A,B,C) sqlite3PagerAcquire(A,B,C,0)
+SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno);
+SQLITE_PRIVATE void sqlite3PagerRef(DbPage*);
+SQLITE_PRIVATE void sqlite3PagerUnref(DbPage*);
+SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage*);
+
+/* Operations on page references. */
+SQLITE_PRIVATE int sqlite3PagerWrite(DbPage*);
+SQLITE_PRIVATE void sqlite3PagerDontWrite(DbPage*);
+SQLITE_PRIVATE int sqlite3PagerMovepage(Pager*,DbPage*,Pgno,int);
+SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage*);
+SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *); 
+SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *); 
+
+/* Functions used to manage pager transactions and savepoints. */
+SQLITE_PRIVATE void sqlite3PagerPagecount(Pager*, int*);
+SQLITE_PRIVATE int sqlite3PagerBegin(Pager*, int exFlag, int);
+SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne(Pager*,const char *zMaster, int);
+SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager*);
+SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zMaster);
+SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager*);
+SQLITE_PRIVATE int sqlite3PagerRollback(Pager*);
+SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int n);
+SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint);
+SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager);
+
+#ifndef SQLITE_OMIT_WAL
+SQLITE_PRIVATE   int sqlite3PagerCheckpoint(Pager *pPager, int, int*, int*);
+SQLITE_PRIVATE   int sqlite3PagerWalSupported(Pager *pPager);
+SQLITE_PRIVATE   int sqlite3PagerWalCallback(Pager *pPager);
+SQLITE_PRIVATE   int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen);
+SQLITE_PRIVATE   int sqlite3PagerCloseWal(Pager *pPager);
+#endif
+
+#ifdef SQLITE_ENABLE_ZIPVFS
+SQLITE_PRIVATE   int sqlite3PagerWalFramesize(Pager *pPager);
+#endif
+
+/* Functions used to query pager state and configuration. */
+SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager*);
+SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager*);
+SQLITE_PRIVATE int sqlite3PagerRefcount(Pager*);
+SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager*);
+SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager*, int);
+SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager*);
+SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager*);
+SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager*);
+SQLITE_PRIVATE int sqlite3PagerNosync(Pager*);
+SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager*);
+SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager*);
+SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *, int, int, int *);
+SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *);
+SQLITE_PRIVATE int sqlite3SectorSize(sqlite3_file *);
+
+/* Functions used to truncate the database file. */
+SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager*,Pgno);
+
+SQLITE_PRIVATE void sqlite3PagerRekey(DbPage*, Pgno, u16);
+
+#if defined(SQLITE_HAS_CODEC) && !defined(SQLITE_OMIT_WAL)
+SQLITE_PRIVATE void *sqlite3PagerCodec(DbPage *);
+#endif
+
+/* Functions to support testing and debugging. */
+#if !defined(NDEBUG) || defined(SQLITE_TEST)
+SQLITE_PRIVATE   Pgno sqlite3PagerPagenumber(DbPage*);
+SQLITE_PRIVATE   int sqlite3PagerIswriteable(DbPage*);
+#endif
+#ifdef SQLITE_TEST
+SQLITE_PRIVATE   int *sqlite3PagerStats(Pager*);
+SQLITE_PRIVATE   void sqlite3PagerRefdump(Pager*);
+  void disable_simulated_io_errors(void);
+  void enable_simulated_io_errors(void);
+#else
+# define disable_simulated_io_errors()
+# define enable_simulated_io_errors()
+#endif
+
+#endif /* _PAGER_H_ */
+
+/************** End of pager.h ***********************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+/************** Include pcache.h in the middle of sqliteInt.h ****************/
+/************** Begin file pcache.h ******************************************/
+/*
+** 2008 August 05
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This header file defines the interface that the sqlite page cache
+** subsystem. 
+*/
+
+#ifndef _PCACHE_H_
+
+typedef struct PgHdr PgHdr;
+typedef struct PCache PCache;
+
+/*
+** Every page in the cache is controlled by an instance of the following
+** structure.
+*/
+struct PgHdr {
+  sqlite3_pcache_page *pPage;    /* Pcache object page handle */
+  void *pData;                   /* Page data */
+  void *pExtra;                  /* Extra content */
+  PgHdr *pDirty;                 /* Transient list of dirty pages */
+  Pager *pPager;                 /* The pager this page is part of */
+  Pgno pgno;                     /* Page number for this page */
+#ifdef SQLITE_CHECK_PAGES
+  u32 pageHash;                  /* Hash of page content */
+#endif
+  u16 flags;                     /* PGHDR flags defined below */
+
+  /**********************************************************************
+  ** Elements above are public.  All that follows is private to pcache.c
+  ** and should not be accessed by other modules.
+  */
+  i16 nRef;                      /* Number of users of this page */
+  PCache *pCache;                /* Cache that owns this page */
+
+  PgHdr *pDirtyNext;             /* Next element in list of dirty pages */
+  PgHdr *pDirtyPrev;             /* Previous element in list of dirty pages */
+};
+
+/* Bit values for PgHdr.flags */
+#define PGHDR_DIRTY             0x002  /* Page has changed */
+#define PGHDR_NEED_SYNC         0x004  /* Fsync the rollback journal before
+                                       ** writing this page to the database */
+#define PGHDR_NEED_READ         0x008  /* Content is unread */
+#define PGHDR_REUSE_UNLIKELY    0x010  /* A hint that reuse is unlikely */
+#define PGHDR_DONT_WRITE        0x020  /* Do not write content to disk */
+
+#define PGHDR_MMAP              0x040  /* This is an mmap page object */
+
+/* Initialize and shutdown the page cache subsystem */
+SQLITE_PRIVATE int sqlite3PcacheInitialize(void);
+SQLITE_PRIVATE void sqlite3PcacheShutdown(void);
+
+/* Page cache buffer management:
+** These routines implement SQLITE_CONFIG_PAGECACHE.
+*/
+SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *, int sz, int n);
+
+/* Create a new pager cache.
+** Under memory stress, invoke xStress to try to make pages clean.
+** Only clean and unpinned pages can be reclaimed.
+*/
+SQLITE_PRIVATE int sqlite3PcacheOpen(
+  int szPage,                    /* Size of every page */
+  int szExtra,                   /* Extra space associated with each page */
+  int bPurgeable,                /* True if pages are on backing store */
+  int (*xStress)(void*, PgHdr*), /* Call to try to make pages clean */
+  void *pStress,                 /* Argument to xStress */
+  PCache *pToInit                /* Preallocated space for the PCache */
+);
+
+/* Modify the page-size after the cache has been created. */
+SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *, int);
+
+/* Return the size in bytes of a PCache object.  Used to preallocate
+** storage space.
+*/
+SQLITE_PRIVATE int sqlite3PcacheSize(void);
+
+/* One release per successful fetch.  Page is pinned until released.
+** Reference counted. 
+*/
+SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch(PCache*, Pgno, int createFlag);
+SQLITE_PRIVATE int sqlite3PcacheFetchStress(PCache*, Pgno, sqlite3_pcache_page**);
+SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish(PCache*, Pgno, sqlite3_pcache_page *pPage);
+SQLITE_PRIVATE void sqlite3PcacheRelease(PgHdr*);
+
+SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr*);         /* Remove page from cache */
+SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr*);    /* Make sure page is marked dirty */
+SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr*);    /* Mark a single page as clean */
+SQLITE_PRIVATE void sqlite3PcacheCleanAll(PCache*);    /* Mark all dirty list pages as clean */
+
+/* Change a page number.  Used by incr-vacuum. */
+SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr*, Pgno);
+
+/* Remove all pages with pgno>x.  Reset the cache if x==0 */
+SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache*, Pgno x);
+
+/* Get a list of all dirty pages in the cache, sorted by page number */
+SQLITE_PRIVATE PgHdr *sqlite3PcacheDirtyList(PCache*);
+
+/* Reset and close the cache object */
+SQLITE_PRIVATE void sqlite3PcacheClose(PCache*);
+
+/* Clear flags from pages of the page cache */
+SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *);
+
+/* Discard the contents of the cache */
+SQLITE_PRIVATE void sqlite3PcacheClear(PCache*);
+
+/* Return the total number of outstanding page references */
+SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache*);
+
+/* Increment the reference count of an existing page */
+SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr*);
+
+SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr*);
+
+/* Return the total number of pages stored in the cache */
+SQLITE_PRIVATE int sqlite3PcachePagecount(PCache*);
+
+#if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG)
+/* Iterate through all dirty pages currently stored in the cache. This
+** interface is only available if SQLITE_CHECK_PAGES is defined when the 
+** library is built.
+*/
+SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *));
+#endif
+
+/* Set and get the suggested cache-size for the specified pager-cache.
+**
+** If no global maximum is configured, then the system attempts to limit
+** the total number of pages cached by purgeable pager-caches to the sum
+** of the suggested cache-sizes.
+*/
+SQLITE_PRIVATE void sqlite3PcacheSetCachesize(PCache *, int);
+#ifdef SQLITE_TEST
+SQLITE_PRIVATE int sqlite3PcacheGetCachesize(PCache *);
+#endif
+
+/* Free up as much memory as possible from the page cache */
+SQLITE_PRIVATE void sqlite3PcacheShrink(PCache*);
+
+#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
+/* Try to return memory used by the pcache module to the main memory heap */
+SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int);
+#endif
+
+#ifdef SQLITE_TEST
+SQLITE_PRIVATE void sqlite3PcacheStats(int*,int*,int*,int*);
+#endif
+
+SQLITE_PRIVATE void sqlite3PCacheSetDefault(void);
+
+/* Return the header size */
+SQLITE_PRIVATE int sqlite3HeaderSizePcache(void);
+SQLITE_PRIVATE int sqlite3HeaderSizePcache1(void);
+
+#endif /* _PCACHE_H_ */
+
+/************** End of pcache.h **********************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+
+/************** Include os.h in the middle of sqliteInt.h ********************/
+/************** Begin file os.h **********************************************/
+/*
+** 2001 September 16
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This header file (together with is companion C source-code file
+** "os.c") attempt to abstract the underlying operating system so that
+** the SQLite library will work on both POSIX and windows systems.
+**
+** This header file is #include-ed by sqliteInt.h and thus ends up
+** being included by every source file.
+*/
+#ifndef _SQLITE_OS_H_
+#define _SQLITE_OS_H_
+
+/*
+** Attempt to automatically detect the operating system and setup the
+** necessary pre-processor macros for it.
+*/
+/************** Include os_setup.h in the middle of os.h *********************/
+/************** Begin file os_setup.h ****************************************/
+/*
+** 2013 November 25
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains pre-processor directives related to operating system
+** detection and/or setup.
+*/
+#ifndef _OS_SETUP_H_
+#define _OS_SETUP_H_
+
+/*
+** Figure out if we are dealing with Unix, Windows, or some other operating
+** system.
+**
+** After the following block of preprocess macros, all of SQLITE_OS_UNIX,
+** SQLITE_OS_WIN, and SQLITE_OS_OTHER will defined to either 1 or 0.  One of
+** the three will be 1.  The other two will be 0.
+*/
+#if defined(SQLITE_OS_OTHER)
+#  if SQLITE_OS_OTHER==1
+#    undef SQLITE_OS_UNIX
+#    define SQLITE_OS_UNIX 0
+#    undef SQLITE_OS_WIN
+#    define SQLITE_OS_WIN 0
+#  else
+#    undef SQLITE_OS_OTHER
+#  endif
+#endif
+#if !defined(SQLITE_OS_UNIX) && !defined(SQLITE_OS_OTHER)
+#  define SQLITE_OS_OTHER 0
+#  ifndef SQLITE_OS_WIN
+#    if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \
+        defined(__MINGW32__) || defined(__BORLANDC__)
+#      define SQLITE_OS_WIN 1
+#      define SQLITE_OS_UNIX 0
+#    else
+#      define SQLITE_OS_WIN 0
+#      define SQLITE_OS_UNIX 1
+#    endif
+#  else
+#    define SQLITE_OS_UNIX 0
+#  endif
+#else
+#  ifndef SQLITE_OS_WIN
+#    define SQLITE_OS_WIN 0
+#  endif
+#endif
+
+#endif /* _OS_SETUP_H_ */
+
+/************** End of os_setup.h ********************************************/
+/************** Continuing where we left off in os.h *************************/
+
+/* If the SET_FULLSYNC macro is not defined above, then make it
+** a no-op
+*/
+#ifndef SET_FULLSYNC
+# define SET_FULLSYNC(x,y)
+#endif
+
+/*
+** The default size of a disk sector
+*/
+#ifndef SQLITE_DEFAULT_SECTOR_SIZE
+# define SQLITE_DEFAULT_SECTOR_SIZE 4096
+#endif
+
+/*
+** Temporary files are named starting with this prefix followed by 16 random
+** alphanumeric characters, and no file extension. They are stored in the
+** OS's standard temporary file directory, and are deleted prior to exit.
+** If sqlite is being embedded in another program, you may wish to change the
+** prefix to reflect your program's name, so that if your program exits
+** prematurely, old temporary files can be easily identified. This can be done
+** using -DSQLITE_TEMP_FILE_PREFIX=myprefix_ on the compiler command line.
+**
+** 2006-10-31:  The default prefix used to be "sqlite_".  But then
+** Mcafee started using SQLite in their anti-virus product and it
+** started putting files with the "sqlite" name in the c:/temp folder.
+** This annoyed many windows users.  Those users would then do a 
+** Google search for "sqlite", find the telephone numbers of the
+** developers and call to wake them up at night and complain.
+** For this reason, the default name prefix is changed to be "sqlite" 
+** spelled backwards.  So the temp files are still identified, but
+** anybody smart enough to figure out the code is also likely smart
+** enough to know that calling the developer will not help get rid
+** of the file.
+*/
+#ifndef SQLITE_TEMP_FILE_PREFIX
+# define SQLITE_TEMP_FILE_PREFIX "etilqs_"
+#endif
+
+/*
+** The following values may be passed as the second argument to
+** sqlite3OsLock(). The various locks exhibit the following semantics:
+**
+** SHARED:    Any number of processes may hold a SHARED lock simultaneously.
+** RESERVED:  A single process may hold a RESERVED lock on a file at
+**            any time. Other processes may hold and obtain new SHARED locks.
+** PENDING:   A single process may hold a PENDING lock on a file at
+**            any one time. Existing SHARED locks may persist, but no new
+**            SHARED locks may be obtained by other processes.
+** EXCLUSIVE: An EXCLUSIVE lock precludes all other locks.
+**
+** PENDING_LOCK may not be passed directly to sqlite3OsLock(). Instead, a
+** process that requests an EXCLUSIVE lock may actually obtain a PENDING
+** lock. This can be upgraded to an EXCLUSIVE lock by a subsequent call to
+** sqlite3OsLock().
+*/
+#define NO_LOCK         0
+#define SHARED_LOCK     1
+#define RESERVED_LOCK   2
+#define PENDING_LOCK    3
+#define EXCLUSIVE_LOCK  4
+
+/*
+** File Locking Notes:  (Mostly about windows but also some info for Unix)
+**
+** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because
+** those functions are not available.  So we use only LockFile() and
+** UnlockFile().
+**
+** LockFile() prevents not just writing but also reading by other processes.
+** A SHARED_LOCK is obtained by locking a single randomly-chosen 
+** byte out of a specific range of bytes. The lock byte is obtained at 
+** random so two separate readers can probably access the file at the 
+** same time, unless they are unlucky and choose the same lock byte.
+** An EXCLUSIVE_LOCK is obtained by locking all bytes in the range.
+** There can only be one writer.  A RESERVED_LOCK is obtained by locking
+** a single byte of the file that is designated as the reserved lock byte.
+** A PENDING_LOCK is obtained by locking a designated byte different from
+** the RESERVED_LOCK byte.
+**
+** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available,
+** which means we can use reader/writer locks.  When reader/writer locks
+** are used, the lock is placed on the same range of bytes that is used
+** for probabilistic locking in Win95/98/ME.  Hence, the locking scheme
+** will support two or more Win95 readers or two or more WinNT readers.
+** But a single Win95 reader will lock out all WinNT readers and a single
+** WinNT reader will lock out all other Win95 readers.
+**
+** The following #defines specify the range of bytes used for locking.
+** SHARED_SIZE is the number of bytes available in the pool from which
+** a random byte is selected for a shared lock.  The pool of bytes for
+** shared locks begins at SHARED_FIRST. 
+**
+** The same locking strategy and
+** byte ranges are used for Unix.  This leaves open the possibility of having
+** clients on win95, winNT, and unix all talking to the same shared file
+** and all locking correctly.  To do so would require that samba (or whatever
+** tool is being used for file sharing) implements locks correctly between
+** windows and unix.  I'm guessing that isn't likely to happen, but by
+** using the same locking range we are at least open to the possibility.
+**
+** Locking in windows is manditory.  For this reason, we cannot store
+** actual data in the bytes used for locking.  The pager never allocates
+** the pages involved in locking therefore.  SHARED_SIZE is selected so
+** that all locks will fit on a single page even at the minimum page size.
+** PENDING_BYTE defines the beginning of the locks.  By default PENDING_BYTE
+** is set high so that we don't have to allocate an unused page except
+** for very large databases.  But one should test the page skipping logic 
+** by setting PENDING_BYTE low and running the entire regression suite.
+**
+** Changing the value of PENDING_BYTE results in a subtly incompatible
+** file format.  Depending on how it is changed, you might not notice
+** the incompatibility right away, even running a full regression test.
+** The default location of PENDING_BYTE is the first byte past the
+** 1GB boundary.
+**
+*/
+#ifdef SQLITE_OMIT_WSD
+# define PENDING_BYTE     (0x40000000)
+#else
+# define PENDING_BYTE      sqlite3PendingByte
+#endif
+#define RESERVED_BYTE     (PENDING_BYTE+1)
+#define SHARED_FIRST      (PENDING_BYTE+2)
+#define SHARED_SIZE       510
+
+/*
+** Wrapper around OS specific sqlite3_os_init() function.
+*/
+SQLITE_PRIVATE int sqlite3OsInit(void);
+
+/* 
+** Functions for accessing sqlite3_file methods 
+*/
+SQLITE_PRIVATE int sqlite3OsClose(sqlite3_file*);
+SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file*, void*, int amt, i64 offset);
+SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file*, const void*, int amt, i64 offset);
+SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file*, i64 size);
+SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file*, int);
+SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file*, i64 *pSize);
+SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file*, int);
+SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file*, int);
+SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut);
+SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file*,int,void*);
+SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file*,int,void*);
+#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
+SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id);
+SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
+SQLITE_PRIVATE int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **);
+SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
+SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id);
+SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int);
+SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64, int, void **);
+SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *, i64, void *);
+
+
+/* 
+** Functions for accessing sqlite3_vfs methods 
+*/
+SQLITE_PRIVATE int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);
+SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *, const char *, int);
+SQLITE_PRIVATE int sqlite3OsAccess(sqlite3_vfs *, const char *, int, int *pResOut);
+SQLITE_PRIVATE int sqlite3OsFullPathname(sqlite3_vfs *, const char *, int, char *);
+#ifndef SQLITE_OMIT_LOAD_EXTENSION
+SQLITE_PRIVATE void *sqlite3OsDlOpen(sqlite3_vfs *, const char *);
+SQLITE_PRIVATE void sqlite3OsDlError(sqlite3_vfs *, int, char *);
+SQLITE_PRIVATE void (*sqlite3OsDlSym(sqlite3_vfs *, void *, const char *))(void);
+SQLITE_PRIVATE void sqlite3OsDlClose(sqlite3_vfs *, void *);
+#endif /* SQLITE_OMIT_LOAD_EXTENSION */
+SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *, int, char *);
+SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *, int);
+SQLITE_PRIVATE int sqlite3OsCurrentTimeInt64(sqlite3_vfs *, sqlite3_int64*);
+
+/*
+** Convenience functions for opening and closing files using 
+** sqlite3_malloc() to obtain space for the file-handle structure.
+*/
+SQLITE_PRIVATE int sqlite3OsOpenMalloc(sqlite3_vfs *, const char *, sqlite3_file **, int,int*);
+SQLITE_PRIVATE int sqlite3OsCloseFree(sqlite3_file *);
+
+#endif /* _SQLITE_OS_H_ */
+
+/************** End of os.h **************************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+/************** Include mutex.h in the middle of sqliteInt.h *****************/
+/************** Begin file mutex.h *******************************************/
+/*
+** 2007 August 28
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains the common header for all mutex implementations.
+** The sqliteInt.h header #includes this file so that it is available
+** to all source files.  We break it out in an effort to keep the code
+** better organized.
+**
+** NOTE:  source files should *not* #include this header file directly.
+** Source files should #include the sqliteInt.h file and let that file
+** include this one indirectly.
+*/
+
+
+/*
+** Figure out what version of the code to use.  The choices are
+**
+**   SQLITE_MUTEX_OMIT         No mutex logic.  Not even stubs.  The
+**                             mutexes implementation cannot be overridden
+**                             at start-time.
+**
+**   SQLITE_MUTEX_NOOP         For single-threaded applications.  No
+**                             mutual exclusion is provided.  But this
+**                             implementation can be overridden at
+**                             start-time.
+**
+**   SQLITE_MUTEX_PTHREADS     For multi-threaded applications on Unix.
+**
+**   SQLITE_MUTEX_W32          For multi-threaded applications on Win32.
+*/
+#if !SQLITE_THREADSAFE
+# define SQLITE_MUTEX_OMIT
+#endif
+#if SQLITE_THREADSAFE && !defined(SQLITE_MUTEX_NOOP)
+#  if SQLITE_OS_UNIX
+#    define SQLITE_MUTEX_PTHREADS
+#  elif SQLITE_OS_WIN
+#    define SQLITE_MUTEX_W32
+#  else
+#    define SQLITE_MUTEX_NOOP
+#  endif
+#endif
+
+#ifdef SQLITE_MUTEX_OMIT
+/*
+** If this is a no-op implementation, implement everything as macros.
+*/
+#define sqlite3_mutex_alloc(X)    ((sqlite3_mutex*)8)
+#define sqlite3_mutex_free(X)
+#define sqlite3_mutex_enter(X)    
+#define sqlite3_mutex_try(X)      SQLITE_OK
+#define sqlite3_mutex_leave(X)    
+#define sqlite3_mutex_held(X)     ((void)(X),1)
+#define sqlite3_mutex_notheld(X)  ((void)(X),1)
+#define sqlite3MutexAlloc(X)      ((sqlite3_mutex*)8)
+#define sqlite3MutexInit()        SQLITE_OK
+#define sqlite3MutexEnd()
+#define MUTEX_LOGIC(X)
+#else
+#define MUTEX_LOGIC(X)            X
+#endif /* defined(SQLITE_MUTEX_OMIT) */
+
+/************** End of mutex.h ***********************************************/
+/************** Continuing where we left off in sqliteInt.h ******************/
+
+
+/*
+** Each database file to be accessed by the system is an instance
+** of the following structure.  There are normally two of these structures
+** in the sqlite.aDb[] array.  aDb[0] is the main database file and
+** aDb[1] is the database file used to hold temporary tables.  Additional
+** databases may be attached.
+*/
+struct Db {
+  char *zName;         /* Name of this database */
+  Btree *pBt;          /* The B*Tree structure for this database file */
+  u8 safety_level;     /* How aggressive at syncing data to disk */
+  Schema *pSchema;     /* Pointer to database schema (possibly shared) */
+};
+
+/*
+** An instance of the following structure stores a database schema.
+**
+** Most Schema objects are associated with a Btree.  The exception is
+** the Schema for the TEMP databaes (sqlite3.aDb[1]) which is free-standing.
+** In shared cache mode, a single Schema object can be shared by multiple
+** Btrees that refer to the same underlying BtShared object.
+** 
+** Schema objects are automatically deallocated when the last Btree that
+** references them is destroyed.   The TEMP Schema is manually freed by
+** sqlite3_close().
+*
+** A thread must be holding a mutex on the corresponding Btree in order
+** to access Schema content.  This implies that the thread must also be
+** holding a mutex on the sqlite3 connection pointer that owns the Btree.
+** For a TEMP Schema, only the connection mutex is required.
+*/
+struct Schema {
+  int schema_cookie;   /* Database schema version number for this file */
+  int iGeneration;     /* Generation counter.  Incremented with each change */
+  Hash tblHash;        /* All tables indexed by name */
+  Hash idxHash;        /* All (named) indices indexed by name */
+  Hash trigHash;       /* All triggers indexed by name */
+  Hash fkeyHash;       /* All foreign keys by referenced table name */
+  Table *pSeqTab;      /* The sqlite_sequence table used by AUTOINCREMENT */
+  u8 file_format;      /* Schema format version for this file */
+  u8 enc;              /* Text encoding used by this database */
+  u16 schemaFlags;     /* Flags associated with this schema */
+  int cache_size;      /* Number of pages to use in the cache */
+};
+
+/*
+** These macros can be used to test, set, or clear bits in the 
+** Db.pSchema->flags field.
+*/
+#define DbHasProperty(D,I,P)     (((D)->aDb[I].pSchema->schemaFlags&(P))==(P))
+#define DbHasAnyProperty(D,I,P)  (((D)->aDb[I].pSchema->schemaFlags&(P))!=0)
+#define DbSetProperty(D,I,P)     (D)->aDb[I].pSchema->schemaFlags|=(P)
+#define DbClearProperty(D,I,P)   (D)->aDb[I].pSchema->schemaFlags&=~(P)
+
+/*
+** Allowed values for the DB.pSchema->flags field.
+**
+** The DB_SchemaLoaded flag is set after the database schema has been
+** read into internal hash tables.
+**
+** DB_UnresetViews means that one or more views have column names that
+** have been filled out.  If the schema changes, these column names might
+** changes and so the view will need to be reset.
+*/
+#define DB_SchemaLoaded    0x0001  /* The schema has been loaded */
+#define DB_UnresetViews    0x0002  /* Some views have defined column names */
+#define DB_Empty           0x0004  /* The file is empty (length 0 bytes) */
+
+/*
+** The number of different kinds of things that can be limited
+** using the sqlite3_limit() interface.
+*/
+#define SQLITE_N_LIMIT (SQLITE_LIMIT_WORKER_THREADS+1)
+
+/*
+** Lookaside malloc is a set of fixed-size buffers that can be used
+** to satisfy small transient memory allocation requests for objects
+** associated with a particular database connection.  The use of
+** lookaside malloc provides a significant performance enhancement
+** (approx 10%) by avoiding numerous malloc/free requests while parsing
+** SQL statements.
+**
+** The Lookaside structure holds configuration information about the
+** lookaside malloc subsystem.  Each available memory allocation in
+** the lookaside subsystem is stored on a linked list of LookasideSlot
+** objects.
+**
+** Lookaside allocations are only allowed for objects that are associated
+** with a particular database connection.  Hence, schema information cannot
+** be stored in lookaside because in shared cache mode the schema information
+** is shared by multiple database connections.  Therefore, while parsing
+** schema information, the Lookaside.bEnabled flag is cleared so that
+** lookaside allocations are not used to construct the schema objects.
+*/
+struct Lookaside {
+  u16 sz;                 /* Size of each buffer in bytes */
+  u8 bEnabled;            /* False to disable new lookaside allocations */
+  u8 bMalloced;           /* True if pStart obtained from sqlite3_malloc() */
+  int nOut;               /* Number of buffers currently checked out */
+  int mxOut;              /* Highwater mark for nOut */
+  int anStat[3];          /* 0: hits.  1: size misses.  2: full misses */
+  LookasideSlot *pFree;   /* List of available buffers */
+  void *pStart;           /* First byte of available memory space */
+  void *pEnd;             /* First byte past end of available space */
+};
+struct LookasideSlot {
+  LookasideSlot *pNext;    /* Next buffer in the list of free buffers */
+};
+
+/*
+** A hash table for function definitions.
+**
+** Hash each FuncDef structure into one of the FuncDefHash.a[] slots.
+** Collisions are on the FuncDef.pHash chain.
+*/
+struct FuncDefHash {
+  FuncDef *a[23];       /* Hash table for functions */
+};
+
+#ifdef SQLITE_USER_AUTHENTICATION
+/*
+** Information held in the "sqlite3" database connection object and used
+** to manage user authentication.
+*/
+typedef struct sqlite3_userauth sqlite3_userauth;
+struct sqlite3_userauth {
+  u8 authLevel;                 /* Current authentication level */
+  int nAuthPW;                  /* Size of the zAuthPW in bytes */
+  char *zAuthPW;                /* Password used to authenticate */
+  char *zAuthUser;              /* User name used to authenticate */
+};
+
+/* Allowed values for sqlite3_userauth.authLevel */
+#define UAUTH_Unknown     0     /* Authentication not yet checked */
+#define UAUTH_Fail        1     /* User authentication failed */
+#define UAUTH_User        2     /* Authenticated as a normal user */
+#define UAUTH_Admin       3     /* Authenticated as an administrator */
+
+/* Functions used only by user authorization logic */
+SQLITE_PRIVATE int sqlite3UserAuthTable(const char*);
+SQLITE_PRIVATE int sqlite3UserAuthCheckLogin(sqlite3*,const char*,u8*);
+SQLITE_PRIVATE void sqlite3UserAuthInit(sqlite3*);
+SQLITE_PRIVATE void sqlite3CryptFunc(sqlite3_context*,int,sqlite3_value**);
+
+#endif /* SQLITE_USER_AUTHENTICATION */
+
+/*
+** typedef for the authorization callback function.
+*/
+#ifdef SQLITE_USER_AUTHENTICATION
+  typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*,
+                               const char*, const char*);
+#else
+  typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*,
+                               const char*);
+#endif
+
+
+/*
+** Each database connection is an instance of the following structure.
+*/
+struct sqlite3 {
+  sqlite3_vfs *pVfs;            /* OS Interface */
+  struct Vdbe *pVdbe;           /* List of active virtual machines */
+  CollSeq *pDfltColl;           /* The default collating sequence (BINARY) */
+  sqlite3_mutex *mutex;         /* Connection mutex */
+  Db *aDb;                      /* All backends */
+  int nDb;                      /* Number of backends currently in use */
+  int flags;                    /* Miscellaneous flags. See below */
+  i64 lastRowid;                /* ROWID of most recent insert (see above) */
+  i64 szMmap;                   /* Default mmap_size setting */
+  unsigned int openFlags;       /* Flags passed to sqlite3_vfs.xOpen() */
+  int errCode;                  /* Most recent error code (SQLITE_*) */
+  int errMask;                  /* & result codes with this before returning */
+  u16 dbOptFlags;               /* Flags to enable/disable optimizations */
+  u8 enc;                       /* Text encoding */
+  u8 autoCommit;                /* The auto-commit flag. */
+  u8 temp_store;                /* 1: file 2: memory 0: default */
+  u8 mallocFailed;              /* True if we have seen a malloc failure */
+  u8 dfltLockMode;              /* Default locking-mode for attached dbs */
+  signed char nextAutovac;      /* Autovac setting after VACUUM if >=0 */
+  u8 suppressErr;               /* Do not issue error messages if true */
+  u8 vtabOnConflict;            /* Value to return for s3_vtab_on_conflict() */
+  u8 isTransactionSavepoint;    /* True if the outermost savepoint is a TS */
+  int nextPagesize;             /* Pagesize after VACUUM if >0 */
+  u32 magic;                    /* Magic number for detect library misuse */
+  int nChange;                  /* Value returned by sqlite3_changes() */
+  int nTotalChange;             /* Value returned by sqlite3_total_changes() */
+  int aLimit[SQLITE_N_LIMIT];   /* Limits */
+  int nMaxSorterMmap;           /* Maximum size of regions mapped by sorter */
+  struct sqlite3InitInfo {      /* Information used during initialization */
+    int newTnum;                /* Rootpage of table being initialized */
+    u8 iDb;                     /* Which db file is being initialized */
+    u8 busy;                    /* TRUE if currently initializing */
+    u8 orphanTrigger;           /* Last statement is orphaned TEMP trigger */
+  } init;
+  int nVdbeActive;              /* Number of VDBEs currently running */
+  int nVdbeRead;                /* Number of active VDBEs that read or write */
+  int nVdbeWrite;               /* Number of active VDBEs that read and write */
+  int nVdbeExec;                /* Number of nested calls to VdbeExec() */
+  int nExtension;               /* Number of loaded extensions */
+  void **aExtension;            /* Array of shared library handles */
+  void (*xTrace)(void*,const char*);        /* Trace function */
+  void *pTraceArg;                          /* Argument to the trace function */
+  void (*xProfile)(void*,const char*,u64);  /* Profiling function */
+  void *pProfileArg;                        /* Argument to profile function */
+  void *pCommitArg;                 /* Argument to xCommitCallback() */   
+  int (*xCommitCallback)(void*);    /* Invoked at every commit. */
+  void *pRollbackArg;               /* Argument to xRollbackCallback() */   
+  void (*xRollbackCallback)(void*); /* Invoked at every commit. */
+  void *pUpdateArg;
+  void (*xUpdateCallback)(void*,int, const char*,const char*,sqlite_int64);
+#ifndef SQLITE_OMIT_WAL
+  int (*xWalCallback)(void *, sqlite3 *, const char *, int);
+  void *pWalArg;
+#endif
+  void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*);
+  void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*);
+  void *pCollNeededArg;
+  sqlite3_value *pErr;          /* Most recent error message */
+  union {
+    volatile int isInterrupted; /* True if sqlite3_interrupt has been called */
+    double notUsed1;            /* Spacer */
+  } u1;
+  Lookaside lookaside;          /* Lookaside malloc configuration */
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  sqlite3_xauth xAuth;          /* Access authorization function */
+  void *pAuthArg;               /* 1st argument to the access auth function */
+#endif
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+  int (*xProgress)(void *);     /* The progress callback */
+  void *pProgressArg;           /* Argument to the progress callback */
+  unsigned nProgressOps;        /* Number of opcodes for progress callback */
+#endif
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  int nVTrans;                  /* Allocated size of aVTrans */
+  Hash aModule;                 /* populated by sqlite3_create_module() */
+  VtabCtx *pVtabCtx;            /* Context for active vtab connect/create */
+  VTable **aVTrans;             /* Virtual tables with open transactions */
+  VTable *pDisconnect;    /* Disconnect these in next sqlite3_prepare() */
+#endif
+  FuncDefHash aFunc;            /* Hash table of connection functions */
+  Hash aCollSeq;                /* All collating sequences */
+  BusyHandler busyHandler;      /* Busy callback */
+  Db aDbStatic[2];              /* Static space for the 2 default backends */
+  Savepoint *pSavepoint;        /* List of active savepoints */
+  int busyTimeout;              /* Busy handler timeout, in msec */
+  int nSavepoint;               /* Number of non-transaction savepoints */
+  int nStatement;               /* Number of nested statement-transactions  */
+  i64 nDeferredCons;            /* Net deferred constraints this transaction. */
+  i64 nDeferredImmCons;         /* Net deferred immediate constraints */
+  int *pnBytesFreed;            /* If not NULL, increment this in DbFree() */
+#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY
+  /* The following variables are all protected by the STATIC_MASTER 
+  ** mutex, not by sqlite3.mutex. They are used by code in notify.c. 
+  **
+  ** When X.pUnlockConnection==Y, that means that X is waiting for Y to
+  ** unlock so that it can proceed.
+  **
+  ** When X.pBlockingConnection==Y, that means that something that X tried
+  ** tried to do recently failed with an SQLITE_LOCKED error due to locks
+  ** held by Y.
+  */
+  sqlite3 *pBlockingConnection; /* Connection that caused SQLITE_LOCKED */
+  sqlite3 *pUnlockConnection;           /* Connection to watch for unlock */
+  void *pUnlockArg;                     /* Argument to xUnlockNotify */
+  void (*xUnlockNotify)(void **, int);  /* Unlock notify callback */
+  sqlite3 *pNextBlocked;        /* Next in list of all blocked connections */
+#endif
+#ifdef SQLITE_USER_AUTHENTICATION
+  sqlite3_userauth auth;        /* User authentication information */
+#endif
+};
+
+/*
+** A macro to discover the encoding of a database.
+*/
+#define SCHEMA_ENC(db) ((db)->aDb[0].pSchema->enc)
+#define ENC(db)        ((db)->enc)
+
+/*
+** Possible values for the sqlite3.flags.
+*/
+#define SQLITE_VdbeTrace      0x00000001  /* True to trace VDBE execution */
+#define SQLITE_InternChanges  0x00000002  /* Uncommitted Hash table changes */
+#define SQLITE_FullFSync      0x00000004  /* Use full fsync on the backend */
+#define SQLITE_CkptFullFSync  0x00000008  /* Use full fsync for checkpoint */
+#define SQLITE_CacheSpill     0x00000010  /* OK to spill pager cache */
+#define SQLITE_FullColNames   0x00000020  /* Show full column names on SELECT */
+#define SQLITE_ShortColNames  0x00000040  /* Show short columns names */
+#define SQLITE_CountRows      0x00000080  /* Count rows changed by INSERT, */
+                                          /*   DELETE, or UPDATE and return */
+                                          /*   the count using a callback. */
+#define SQLITE_NullCallback   0x00000100  /* Invoke the callback once if the */
+                                          /*   result set is empty */
+#define SQLITE_SqlTrace       0x00000200  /* Debug print SQL as it executes */
+#define SQLITE_VdbeListing    0x00000400  /* Debug listings of VDBE programs */
+#define SQLITE_WriteSchema    0x00000800  /* OK to update SQLITE_MASTER */
+#define SQLITE_VdbeAddopTrace 0x00001000  /* Trace sqlite3VdbeAddOp() calls */
+#define SQLITE_IgnoreChecks   0x00002000  /* Do not enforce check constraints */
+#define SQLITE_ReadUncommitted 0x0004000  /* For shared-cache mode */
+#define SQLITE_LegacyFileFmt  0x00008000  /* Create new databases in format 1 */
+#define SQLITE_RecoveryMode   0x00010000  /* Ignore schema errors */
+#define SQLITE_ReverseOrder   0x00020000  /* Reverse unordered SELECTs */
+#define SQLITE_RecTriggers    0x00040000  /* Enable recursive triggers */
+#define SQLITE_ForeignKeys    0x00080000  /* Enforce foreign key constraints  */
+#define SQLITE_AutoIndex      0x00100000  /* Enable automatic indexes */
+#define SQLITE_PreferBuiltin  0x00200000  /* Preference to built-in funcs */
+#define SQLITE_LoadExtension  0x00400000  /* Enable load_extension */
+#define SQLITE_EnableTrigger  0x00800000  /* True to enable triggers */
+#define SQLITE_DeferFKs       0x01000000  /* Defer all FK constraints */
+#define SQLITE_QueryOnly      0x02000000  /* Disable database changes */
+#define SQLITE_VdbeEQP        0x04000000  /* Debug EXPLAIN QUERY PLAN */
+
+
+/*
+** Bits of the sqlite3.dbOptFlags field that are used by the
+** sqlite3_test_control(SQLITE_TESTCTRL_OPTIMIZATIONS,...) interface to
+** selectively disable various optimizations.
+*/
+#define SQLITE_QueryFlattener 0x0001   /* Query flattening */
+#define SQLITE_ColumnCache    0x0002   /* Column cache */
+#define SQLITE_GroupByOrder   0x0004   /* GROUPBY cover of ORDERBY */
+#define SQLITE_FactorOutConst 0x0008   /* Constant factoring */
+/*                not used    0x0010   // Was: SQLITE_IdxRealAsInt */
+#define SQLITE_DistinctOpt    0x0020   /* DISTINCT using indexes */
+#define SQLITE_CoverIdxScan   0x0040   /* Covering index scans */
+#define SQLITE_OrderByIdxJoin 0x0080   /* ORDER BY of joins via index */
+#define SQLITE_SubqCoroutine  0x0100   /* Evaluate subqueries as coroutines */
+#define SQLITE_Transitive     0x0200   /* Transitive constraints */
+#define SQLITE_OmitNoopJoin   0x0400   /* Omit unused tables in joins */
+#define SQLITE_Stat34         0x0800   /* Use STAT3 or STAT4 data */
+#define SQLITE_AllOpts        0xffff   /* All optimizations */
+
+/*
+** Macros for testing whether or not optimizations are enabled or disabled.
+*/
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+#define OptimizationDisabled(db, mask)  (((db)->dbOptFlags&(mask))!=0)
+#define OptimizationEnabled(db, mask)   (((db)->dbOptFlags&(mask))==0)
+#else
+#define OptimizationDisabled(db, mask)  0
+#define OptimizationEnabled(db, mask)   1
+#endif
+
+/*
+** Return true if it OK to factor constant expressions into the initialization
+** code. The argument is a Parse object for the code generator.
+*/
+#define ConstFactorOk(P) ((P)->okConstFactor)
+
+/*
+** Possible values for the sqlite.magic field.
+** The numbers are obtained at random and have no special meaning, other
+** than being distinct from one another.
+*/
+#define SQLITE_MAGIC_OPEN     0xa029a697  /* Database is open */
+#define SQLITE_MAGIC_CLOSED   0x9f3c2d33  /* Database is closed */
+#define SQLITE_MAGIC_SICK     0x4b771290  /* Error and awaiting close */
+#define SQLITE_MAGIC_BUSY     0xf03b7906  /* Database currently in use */
+#define SQLITE_MAGIC_ERROR    0xb5357930  /* An SQLITE_MISUSE error occurred */
+#define SQLITE_MAGIC_ZOMBIE   0x64cffc7f  /* Close with last statement close */
+
+/*
+** Each SQL function is defined by an instance of the following
+** structure.  A pointer to this structure is stored in the sqlite.aFunc
+** hash table.  When multiple functions have the same name, the hash table
+** points to a linked list of these structures.
+*/
+struct FuncDef {
+  i16 nArg;            /* Number of arguments.  -1 means unlimited */
+  u16 funcFlags;       /* Some combination of SQLITE_FUNC_* */
+  void *pUserData;     /* User data parameter */
+  FuncDef *pNext;      /* Next function with same name */
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**); /* Regular function */
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**); /* Aggregate step */
+  void (*xFinalize)(sqlite3_context*);                /* Aggregate finalizer */
+  char *zName;         /* SQL name of the function. */
+  FuncDef *pHash;      /* Next with a different name but the same hash */
+  FuncDestructor *pDestructor;   /* Reference counted destructor function */
+};
+
+/*
+** This structure encapsulates a user-function destructor callback (as
+** configured using create_function_v2()) and a reference counter. When
+** create_function_v2() is called to create a function with a destructor,
+** a single object of this type is allocated. FuncDestructor.nRef is set to 
+** the number of FuncDef objects created (either 1 or 3, depending on whether
+** or not the specified encoding is SQLITE_ANY). The FuncDef.pDestructor
+** member of each of the new FuncDef objects is set to point to the allocated
+** FuncDestructor.
+**
+** Thereafter, when one of the FuncDef objects is deleted, the reference
+** count on this object is decremented. When it reaches 0, the destructor
+** is invoked and the FuncDestructor structure freed.
+*/
+struct FuncDestructor {
+  int nRef;
+  void (*xDestroy)(void *);
+  void *pUserData;
+};
+
+/*
+** Possible values for FuncDef.flags.  Note that the _LENGTH and _TYPEOF
+** values must correspond to OPFLAG_LENGTHARG and OPFLAG_TYPEOFARG.  There
+** are assert() statements in the code to verify this.
+*/
+#define SQLITE_FUNC_ENCMASK  0x003 /* SQLITE_UTF8, SQLITE_UTF16BE or UTF16LE */
+#define SQLITE_FUNC_LIKE     0x004 /* Candidate for the LIKE optimization */
+#define SQLITE_FUNC_CASE     0x008 /* Case-sensitive LIKE-type function */
+#define SQLITE_FUNC_EPHEM    0x010 /* Ephemeral.  Delete with VDBE */
+#define SQLITE_FUNC_NEEDCOLL 0x020 /* sqlite3GetFuncCollSeq() might be called */
+#define SQLITE_FUNC_LENGTH   0x040 /* Built-in length() function */
+#define SQLITE_FUNC_TYPEOF   0x080 /* Built-in typeof() function */
+#define SQLITE_FUNC_COUNT    0x100 /* Built-in count(*) aggregate */
+#define SQLITE_FUNC_COALESCE 0x200 /* Built-in coalesce() or ifnull() */
+#define SQLITE_FUNC_UNLIKELY 0x400 /* Built-in unlikely() function */
+#define SQLITE_FUNC_CONSTANT 0x800 /* Constant inputs give a constant output */
+#define SQLITE_FUNC_MINMAX  0x1000 /* True for min() and max() aggregates */
+
+/*
+** The following three macros, FUNCTION(), LIKEFUNC() and AGGREGATE() are
+** used to create the initializers for the FuncDef structures.
+**
+**   FUNCTION(zName, nArg, iArg, bNC, xFunc)
+**     Used to create a scalar function definition of a function zName 
+**     implemented by C function xFunc that accepts nArg arguments. The
+**     value passed as iArg is cast to a (void*) and made available
+**     as the user-data (sqlite3_user_data()) for the function. If 
+**     argument bNC is true, then the SQLITE_FUNC_NEEDCOLL flag is set.
+**
+**   VFUNCTION(zName, nArg, iArg, bNC, xFunc)
+**     Like FUNCTION except it omits the SQLITE_FUNC_CONSTANT flag.
+**
+**   AGGREGATE(zName, nArg, iArg, bNC, xStep, xFinal)
+**     Used to create an aggregate function definition implemented by
+**     the C functions xStep and xFinal. The first four parameters
+**     are interpreted in the same way as the first 4 parameters to
+**     FUNCTION().
+**
+**   LIKEFUNC(zName, nArg, pArg, flags)
+**     Used to create a scalar function definition of a function zName 
+**     that accepts nArg arguments and is implemented by a call to C 
+**     function likeFunc. Argument pArg is cast to a (void *) and made
+**     available as the function user-data (sqlite3_user_data()). The
+**     FuncDef.flags variable is set to the value passed as the flags
+**     parameter.
+*/
+#define FUNCTION(zName, nArg, iArg, bNC, xFunc) \
+  {nArg, SQLITE_FUNC_CONSTANT|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \
+   SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, #zName, 0, 0}
+#define VFUNCTION(zName, nArg, iArg, bNC, xFunc) \
+  {nArg, SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \
+   SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, #zName, 0, 0}
+#define FUNCTION2(zName, nArg, iArg, bNC, xFunc, extraFlags) \
+  {nArg,SQLITE_FUNC_CONSTANT|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL)|extraFlags,\
+   SQLITE_INT_TO_PTR(iArg), 0, xFunc, 0, 0, #zName, 0, 0}
+#define STR_FUNCTION(zName, nArg, pArg, bNC, xFunc) \
+  {nArg, SQLITE_FUNC_CONSTANT|SQLITE_UTF8|(bNC*SQLITE_FUNC_NEEDCOLL), \
+   pArg, 0, xFunc, 0, 0, #zName, 0, 0}
+#define LIKEFUNC(zName, nArg, arg, flags) \
+  {nArg, SQLITE_FUNC_CONSTANT|SQLITE_UTF8|flags, \
+   (void *)arg, 0, likeFunc, 0, 0, #zName, 0, 0}
+#define AGGREGATE(zName, nArg, arg, nc, xStep, xFinal) \
+  {nArg, SQLITE_UTF8|(nc*SQLITE_FUNC_NEEDCOLL), \
+   SQLITE_INT_TO_PTR(arg), 0, 0, xStep,xFinal,#zName,0,0}
+#define AGGREGATE2(zName, nArg, arg, nc, xStep, xFinal, extraFlags) \
+  {nArg, SQLITE_UTF8|(nc*SQLITE_FUNC_NEEDCOLL)|extraFlags, \
+   SQLITE_INT_TO_PTR(arg), 0, 0, xStep,xFinal,#zName,0,0}
+
+/*
+** All current savepoints are stored in a linked list starting at
+** sqlite3.pSavepoint. The first element in the list is the most recently
+** opened savepoint. Savepoints are added to the list by the vdbe
+** OP_Savepoint instruction.
+*/
+struct Savepoint {
+  char *zName;                        /* Savepoint name (nul-terminated) */
+  i64 nDeferredCons;                  /* Number of deferred fk violations */
+  i64 nDeferredImmCons;               /* Number of deferred imm fk. */
+  Savepoint *pNext;                   /* Parent savepoint (if any) */
+};
+
+/*
+** The following are used as the second parameter to sqlite3Savepoint(),
+** and as the P1 argument to the OP_Savepoint instruction.
+*/
+#define SAVEPOINT_BEGIN      0
+#define SAVEPOINT_RELEASE    1
+#define SAVEPOINT_ROLLBACK   2
+
+
+/*
+** Each SQLite module (virtual table definition) is defined by an
+** instance of the following structure, stored in the sqlite3.aModule
+** hash table.
+*/
+struct Module {
+  const sqlite3_module *pModule;       /* Callback pointers */
+  const char *zName;                   /* Name passed to create_module() */
+  void *pAux;                          /* pAux passed to create_module() */
+  void (*xDestroy)(void *);            /* Module destructor function */
+};
+
+/*
+** information about each column of an SQL table is held in an instance
+** of this structure.
+*/
+struct Column {
+  char *zName;     /* Name of this column */
+  Expr *pDflt;     /* Default value of this column */
+  char *zDflt;     /* Original text of the default value */
+  char *zType;     /* Data type for this column */
+  char *zColl;     /* Collating sequence.  If NULL, use the default */
+  u8 notNull;      /* An OE_ code for handling a NOT NULL constraint */
+  char affinity;   /* One of the SQLITE_AFF_... values */
+  u8 szEst;        /* Estimated size of this column.  INT==1 */
+  u8 colFlags;     /* Boolean properties.  See COLFLAG_ defines below */
+};
+
+/* Allowed values for Column.colFlags:
+*/
+#define COLFLAG_PRIMKEY  0x0001    /* Column is part of the primary key */
+#define COLFLAG_HIDDEN   0x0002    /* A hidden column in a virtual table */
+
+/*
+** A "Collating Sequence" is defined by an instance of the following
+** structure. Conceptually, a collating sequence consists of a name and
+** a comparison routine that defines the order of that sequence.
+**
+** If CollSeq.xCmp is NULL, it means that the
+** collating sequence is undefined.  Indices built on an undefined
+** collating sequence may not be read or written.
+*/
+struct CollSeq {
+  char *zName;          /* Name of the collating sequence, UTF-8 encoded */
+  u8 enc;               /* Text encoding handled by xCmp() */
+  void *pUser;          /* First argument to xCmp() */
+  int (*xCmp)(void*,int, const void*, int, const void*);
+  void (*xDel)(void*);  /* Destructor for pUser */
+};
+
+/*
+** A sort order can be either ASC or DESC.
+*/
+#define SQLITE_SO_ASC       0  /* Sort in ascending order */
+#define SQLITE_SO_DESC      1  /* Sort in ascending order */
+
+/*
+** Column affinity types.
+**
+** These used to have mnemonic name like 'i' for SQLITE_AFF_INTEGER and
+** 't' for SQLITE_AFF_TEXT.  But we can save a little space and improve
+** the speed a little by numbering the values consecutively.  
+**
+** But rather than start with 0 or 1, we begin with 'A'.  That way,
+** when multiple affinity types are concatenated into a string and
+** used as the P4 operand, they will be more readable.
+**
+** Note also that the numeric types are grouped together so that testing
+** for a numeric type is a single comparison.  And the NONE type is first.
+*/
+#define SQLITE_AFF_NONE     'A'
+#define SQLITE_AFF_TEXT     'B'
+#define SQLITE_AFF_NUMERIC  'C'
+#define SQLITE_AFF_INTEGER  'D'
+#define SQLITE_AFF_REAL     'E'
+
+#define sqlite3IsNumericAffinity(X)  ((X)>=SQLITE_AFF_NUMERIC)
+
+/*
+** The SQLITE_AFF_MASK values masks off the significant bits of an
+** affinity value. 
+*/
+#define SQLITE_AFF_MASK     0x47
+
+/*
+** Additional bit values that can be ORed with an affinity without
+** changing the affinity.
+**
+** The SQLITE_NOTNULL flag is a combination of NULLEQ and JUMPIFNULL.
+** It causes an assert() to fire if either operand to a comparison
+** operator is NULL.  It is added to certain comparison operators to
+** prove that the operands are always NOT NULL.
+*/
+#define SQLITE_JUMPIFNULL   0x10  /* jumps if either operand is NULL */
+#define SQLITE_STOREP2      0x20  /* Store result in reg[P2] rather than jump */
+#define SQLITE_NULLEQ       0x80  /* NULL=NULL */
+#define SQLITE_NOTNULL      0x90  /* Assert that operands are never NULL */
+
+/*
+** An object of this type is created for each virtual table present in
+** the database schema. 
+**
+** If the database schema is shared, then there is one instance of this
+** structure for each database connection (sqlite3*) that uses the shared
+** schema. This is because each database connection requires its own unique
+** instance of the sqlite3_vtab* handle used to access the virtual table 
+** implementation. sqlite3_vtab* handles can not be shared between 
+** database connections, even when the rest of the in-memory database 
+** schema is shared, as the implementation often stores the database
+** connection handle passed to it via the xConnect() or xCreate() method
+** during initialization internally. This database connection handle may
+** then be used by the virtual table implementation to access real tables 
+** within the database. So that they appear as part of the callers 
+** transaction, these accesses need to be made via the same database 
+** connection as that used to execute SQL operations on the virtual table.
+**
+** All VTable objects that correspond to a single table in a shared
+** database schema are initially stored in a linked-list pointed to by
+** the Table.pVTable member variable of the corresponding Table object.
+** When an sqlite3_prepare() operation is required to access the virtual
+** table, it searches the list for the VTable that corresponds to the
+** database connection doing the preparing so as to use the correct
+** sqlite3_vtab* handle in the compiled query.
+**
+** When an in-memory Table object is deleted (for example when the
+** schema is being reloaded for some reason), the VTable objects are not 
+** deleted and the sqlite3_vtab* handles are not xDisconnect()ed 
+** immediately. Instead, they are moved from the Table.pVTable list to
+** another linked list headed by the sqlite3.pDisconnect member of the
+** corresponding sqlite3 structure. They are then deleted/xDisconnected 
+** next time a statement is prepared using said sqlite3*. This is done
+** to avoid deadlock issues involving multiple sqlite3.mutex mutexes.
+** Refer to comments above function sqlite3VtabUnlockList() for an
+** explanation as to why it is safe to add an entry to an sqlite3.pDisconnect
+** list without holding the corresponding sqlite3.mutex mutex.
+**
+** The memory for objects of this type is always allocated by 
+** sqlite3DbMalloc(), using the connection handle stored in VTable.db as 
+** the first argument.
+*/
+struct VTable {
+  sqlite3 *db;              /* Database connection associated with this table */
+  Module *pMod;             /* Pointer to module implementation */
+  sqlite3_vtab *pVtab;      /* Pointer to vtab instance */
+  int nRef;                 /* Number of pointers to this structure */
+  u8 bConstraint;           /* True if constraints are supported */
+  int iSavepoint;           /* Depth of the SAVEPOINT stack */
+  VTable *pNext;            /* Next in linked list (see above) */
+};
+
+/*
+** Each SQL table is represented in memory by an instance of the
+** following structure.
+**
+** Table.zName is the name of the table.  The case of the original
+** CREATE TABLE statement is stored, but case is not significant for
+** comparisons.
+**
+** Table.nCol is the number of columns in this table.  Table.aCol is a
+** pointer to an array of Column structures, one for each column.
+**
+** If the table has an INTEGER PRIMARY KEY, then Table.iPKey is the index of
+** the column that is that key.   Otherwise Table.iPKey is negative.  Note
+** that the datatype of the PRIMARY KEY must be INTEGER for this field to
+** be set.  An INTEGER PRIMARY KEY is used as the rowid for each row of
+** the table.  If a table has no INTEGER PRIMARY KEY, then a random rowid
+** is generated for each row of the table.  TF_HasPrimaryKey is set if
+** the table has any PRIMARY KEY, INTEGER or otherwise.
+**
+** Table.tnum is the page number for the root BTree page of the table in the
+** database file.  If Table.iDb is the index of the database table backend
+** in sqlite.aDb[].  0 is for the main database and 1 is for the file that
+** holds temporary tables and indices.  If TF_Ephemeral is set
+** then the table is stored in a file that is automatically deleted
+** when the VDBE cursor to the table is closed.  In this case Table.tnum 
+** refers VDBE cursor number that holds the table open, not to the root
+** page number.  Transient tables are used to hold the results of a
+** sub-query that appears instead of a real table name in the FROM clause 
+** of a SELECT statement.
+*/
+struct Table {
+  char *zName;         /* Name of the table or view */
+  Column *aCol;        /* Information about each column */
+  Index *pIndex;       /* List of SQL indexes on this table. */
+  Select *pSelect;     /* NULL for tables.  Points to definition if a view. */
+  FKey *pFKey;         /* Linked list of all foreign keys in this table */
+  char *zColAff;       /* String defining the affinity of each column */
+#ifndef SQLITE_OMIT_CHECK
+  ExprList *pCheck;    /* All CHECK constraints */
+#endif
+  LogEst nRowLogEst;   /* Estimated rows in table - from sqlite_stat1 table */
+  int tnum;            /* Root BTree node for this table (see note above) */
+  i16 iPKey;           /* If not negative, use aCol[iPKey] as the primary key */
+  i16 nCol;            /* Number of columns in this table */
+  u16 nRef;            /* Number of pointers to this Table */
+  LogEst szTabRow;     /* Estimated size of each table row in bytes */
+#ifdef SQLITE_ENABLE_COSTMULT
+  LogEst costMult;     /* Cost multiplier for using this table */
+#endif
+  u8 tabFlags;         /* Mask of TF_* values */
+  u8 keyConf;          /* What to do in case of uniqueness conflict on iPKey */
+#ifndef SQLITE_OMIT_ALTERTABLE
+  int addColOffset;    /* Offset in CREATE TABLE stmt to add a new column */
+#endif
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  int nModuleArg;      /* Number of arguments to the module */
+  char **azModuleArg;  /* Text of all module args. [0] is module name */
+  VTable *pVTable;     /* List of VTable objects. */
+#endif
+  Trigger *pTrigger;   /* List of triggers stored in pSchema */
+  Schema *pSchema;     /* Schema that contains this table */
+  Table *pNextZombie;  /* Next on the Parse.pZombieTab list */
+};
+
+/*
+** Allowed values for Table.tabFlags.
+*/
+#define TF_Readonly        0x01    /* Read-only system table */
+#define TF_Ephemeral       0x02    /* An ephemeral table */
+#define TF_HasPrimaryKey   0x04    /* Table has a primary key */
+#define TF_Autoincrement   0x08    /* Integer primary key is autoincrement */
+#define TF_Virtual         0x10    /* Is a virtual table */
+#define TF_WithoutRowid    0x20    /* No rowid used. PRIMARY KEY is the key */
+
+
+/*
+** Test to see whether or not a table is a virtual table.  This is
+** done as a macro so that it will be optimized out when virtual
+** table support is omitted from the build.
+*/
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+#  define IsVirtual(X)      (((X)->tabFlags & TF_Virtual)!=0)
+#  define IsHiddenColumn(X) (((X)->colFlags & COLFLAG_HIDDEN)!=0)
+#else
+#  define IsVirtual(X)      0
+#  define IsHiddenColumn(X) 0
+#endif
+
+/* Does the table have a rowid */
+#define HasRowid(X)     (((X)->tabFlags & TF_WithoutRowid)==0)
+
+/*
+** Each foreign key constraint is an instance of the following structure.
+**
+** A foreign key is associated with two tables.  The "from" table is
+** the table that contains the REFERENCES clause that creates the foreign
+** key.  The "to" table is the table that is named in the REFERENCES clause.
+** Consider this example:
+**
+**     CREATE TABLE ex1(
+**       a INTEGER PRIMARY KEY,
+**       b INTEGER CONSTRAINT fk1 REFERENCES ex2(x)
+**     );
+**
+** For foreign key "fk1", the from-table is "ex1" and the to-table is "ex2".
+** Equivalent names:
+**
+**     from-table == child-table
+**       to-table == parent-table
+**
+** Each REFERENCES clause generates an instance of the following structure
+** which is attached to the from-table.  The to-table need not exist when
+** the from-table is created.  The existence of the to-table is not checked.
+**
+** The list of all parents for child Table X is held at X.pFKey.
+**
+** A list of all children for a table named Z (which might not even exist)
+** is held in Schema.fkeyHash with a hash key of Z.
+*/
+struct FKey {
+  Table *pFrom;     /* Table containing the REFERENCES clause (aka: Child) */
+  FKey *pNextFrom;  /* Next FKey with the same in pFrom. Next parent of pFrom */
+  char *zTo;        /* Name of table that the key points to (aka: Parent) */
+  FKey *pNextTo;    /* Next with the same zTo. Next child of zTo. */
+  FKey *pPrevTo;    /* Previous with the same zTo */
+  int nCol;         /* Number of columns in this key */
+  /* EV: R-30323-21917 */
+  u8 isDeferred;       /* True if constraint checking is deferred till COMMIT */
+  u8 aAction[2];        /* ON DELETE and ON UPDATE actions, respectively */
+  Trigger *apTrigger[2];/* Triggers for aAction[] actions */
+  struct sColMap {      /* Mapping of columns in pFrom to columns in zTo */
+    int iFrom;            /* Index of column in pFrom */
+    char *zCol;           /* Name of column in zTo.  If NULL use PRIMARY KEY */
+  } aCol[1];            /* One entry for each of nCol columns */
+};
+
+/*
+** SQLite supports many different ways to resolve a constraint
+** error.  ROLLBACK processing means that a constraint violation
+** causes the operation in process to fail and for the current transaction
+** to be rolled back.  ABORT processing means the operation in process
+** fails and any prior changes from that one operation are backed out,
+** but the transaction is not rolled back.  FAIL processing means that
+** the operation in progress stops and returns an error code.  But prior
+** changes due to the same operation are not backed out and no rollback
+** occurs.  IGNORE means that the particular row that caused the constraint
+** error is not inserted or updated.  Processing continues and no error
+** is returned.  REPLACE means that preexisting database rows that caused
+** a UNIQUE constraint violation are removed so that the new insert or
+** update can proceed.  Processing continues and no error is reported.
+**
+** RESTRICT, SETNULL, and CASCADE actions apply only to foreign keys.
+** RESTRICT is the same as ABORT for IMMEDIATE foreign keys and the
+** same as ROLLBACK for DEFERRED keys.  SETNULL means that the foreign
+** key is set to NULL.  CASCADE means that a DELETE or UPDATE of the
+** referenced table row is propagated into the row that holds the
+** foreign key.
+** 
+** The following symbolic values are used to record which type
+** of action to take.
+*/
+#define OE_None     0   /* There is no constraint to check */
+#define OE_Rollback 1   /* Fail the operation and rollback the transaction */
+#define OE_Abort    2   /* Back out changes but do no rollback transaction */
+#define OE_Fail     3   /* Stop the operation but leave all prior changes */
+#define OE_Ignore   4   /* Ignore the error. Do not do the INSERT or UPDATE */
+#define OE_Replace  5   /* Delete existing record, then do INSERT or UPDATE */
+
+#define OE_Restrict 6   /* OE_Abort for IMMEDIATE, OE_Rollback for DEFERRED */
+#define OE_SetNull  7   /* Set the foreign key value to NULL */
+#define OE_SetDflt  8   /* Set the foreign key value to its default */
+#define OE_Cascade  9   /* Cascade the changes */
+
+#define OE_Default  10  /* Do whatever the default action is */
+
+
+/*
+** An instance of the following structure is passed as the first
+** argument to sqlite3VdbeKeyCompare and is used to control the 
+** comparison of the two index keys.
+**
+** Note that aSortOrder[] and aColl[] have nField+1 slots.  There
+** are nField slots for the columns of an index then one extra slot
+** for the rowid at the end.
+*/
+struct KeyInfo {
+  u32 nRef;           /* Number of references to this KeyInfo object */
+  u8 enc;             /* Text encoding - one of the SQLITE_UTF* values */
+  u16 nField;         /* Number of key columns in the index */
+  u16 nXField;        /* Number of columns beyond the key columns */
+  sqlite3 *db;        /* The database connection */
+  u8 *aSortOrder;     /* Sort order for each column. */
+  CollSeq *aColl[1];  /* Collating sequence for each term of the key */
+};
+
+/*
+** An instance of the following structure holds information about a
+** single index record that has already been parsed out into individual
+** values.
+**
+** A record is an object that contains one or more fields of data.
+** Records are used to store the content of a table row and to store
+** the key of an index.  A blob encoding of a record is created by
+** the OP_MakeRecord opcode of the VDBE and is disassembled by the
+** OP_Column opcode.
+**
+** This structure holds a record that has already been disassembled
+** into its constituent fields.
+**
+** The r1 and r2 member variables are only used by the optimized comparison
+** functions vdbeRecordCompareInt() and vdbeRecordCompareString().
+*/
+struct UnpackedRecord {
+  KeyInfo *pKeyInfo;  /* Collation and sort-order information */
+  u16 nField;         /* Number of entries in apMem[] */
+  i8 default_rc;      /* Comparison result if keys are equal */
+  u8 errCode;         /* Error detected by xRecordCompare (CORRUPT or NOMEM) */
+  Mem *aMem;          /* Values */
+  int r1;             /* Value to return if (lhs > rhs) */
+  int r2;             /* Value to return if (rhs < lhs) */
+};
+
+
+/*
+** Each SQL index is represented in memory by an
+** instance of the following structure.
+**
+** The columns of the table that are to be indexed are described
+** by the aiColumn[] field of this structure.  For example, suppose
+** we have the following table and index:
+**
+**     CREATE TABLE Ex1(c1 int, c2 int, c3 text);
+**     CREATE INDEX Ex2 ON Ex1(c3,c1);
+**
+** In the Table structure describing Ex1, nCol==3 because there are
+** three columns in the table.  In the Index structure describing
+** Ex2, nColumn==2 since 2 of the 3 columns of Ex1 are indexed.
+** The value of aiColumn is {2, 0}.  aiColumn[0]==2 because the 
+** first column to be indexed (c3) has an index of 2 in Ex1.aCol[].
+** The second column to be indexed (c1) has an index of 0 in
+** Ex1.aCol[], hence Ex2.aiColumn[1]==0.
+**
+** The Index.onError field determines whether or not the indexed columns
+** must be unique and what to do if they are not.  When Index.onError=OE_None,
+** it means this is not a unique index.  Otherwise it is a unique index
+** and the value of Index.onError indicate the which conflict resolution 
+** algorithm to employ whenever an attempt is made to insert a non-unique
+** element.
+*/
+struct Index {
+  char *zName;             /* Name of this index */
+  i16 *aiColumn;           /* Which columns are used by this index.  1st is 0 */
+  LogEst *aiRowLogEst;     /* From ANALYZE: Est. rows selected by each column */
+  Table *pTable;           /* The SQL table being indexed */
+  char *zColAff;           /* String defining the affinity of each column */
+  Index *pNext;            /* The next index associated with the same table */
+  Schema *pSchema;         /* Schema containing this index */
+  u8 *aSortOrder;          /* for each column: True==DESC, False==ASC */
+  char **azColl;           /* Array of collation sequence names for index */
+  Expr *pPartIdxWhere;     /* WHERE clause for partial indices */
+  int tnum;                /* DB Page containing root of this index */
+  LogEst szIdxRow;         /* Estimated average row size in bytes */
+  u16 nKeyCol;             /* Number of columns forming the key */
+  u16 nColumn;             /* Number of columns stored in the index */
+  u8 onError;              /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
+  unsigned idxType:2;      /* 1==UNIQUE, 2==PRIMARY KEY, 0==CREATE INDEX */
+  unsigned bUnordered:1;   /* Use this index for == or IN queries only */
+  unsigned uniqNotNull:1;  /* True if UNIQUE and NOT NULL for all columns */
+  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
+  unsigned isCovering:1;   /* True if this is a covering index */
+  unsigned noSkipScan:1;   /* Do not try to use skip-scan if true */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  int nSample;             /* Number of elements in aSample[] */
+  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
+  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
+  IndexSample *aSample;    /* Samples of the left-most key */
+  tRowcnt *aiRowEst;       /* Non-logarithmic stat1 data for this index */
+  tRowcnt nRowEst0;        /* Non-logarithmic number of rows in the index */
+#endif
+};
+
+/*
+** Allowed values for Index.idxType
+*/
+#define SQLITE_IDXTYPE_APPDEF      0   /* Created using CREATE INDEX */
+#define SQLITE_IDXTYPE_UNIQUE      1   /* Implements a UNIQUE constraint */
+#define SQLITE_IDXTYPE_PRIMARYKEY  2   /* Is the PRIMARY KEY for the table */
+
+/* Return true if index X is a PRIMARY KEY index */
+#define IsPrimaryKeyIndex(X)  ((X)->idxType==SQLITE_IDXTYPE_PRIMARYKEY)
+
+/* Return true if index X is a UNIQUE index */
+#define IsUniqueIndex(X)      ((X)->onError!=OE_None)
+
+/*
+** Each sample stored in the sqlite_stat3 table is represented in memory 
+** using a structure of this type.  See documentation at the top of the
+** analyze.c source file for additional information.
+*/
+struct IndexSample {
+  void *p;          /* Pointer to sampled record */
+  int n;            /* Size of record in bytes */
+  tRowcnt *anEq;    /* Est. number of rows where the key equals this sample */
+  tRowcnt *anLt;    /* Est. number of rows where key is less than this sample */
+  tRowcnt *anDLt;   /* Est. number of distinct keys less than this sample */
+};
+
+/*
+** Each token coming out of the lexer is an instance of
+** this structure.  Tokens are also used as part of an expression.
+**
+** Note if Token.z==0 then Token.dyn and Token.n are undefined and
+** may contain random values.  Do not make any assumptions about Token.dyn
+** and Token.n when Token.z==0.
+*/
+struct Token {
+  const char *z;     /* Text of the token.  Not NULL-terminated! */
+  unsigned int n;    /* Number of characters in this token */
+};
+
+/*
+** An instance of this structure contains information needed to generate
+** code for a SELECT that contains aggregate functions.
+**
+** If Expr.op==TK_AGG_COLUMN or TK_AGG_FUNCTION then Expr.pAggInfo is a
+** pointer to this structure.  The Expr.iColumn field is the index in
+** AggInfo.aCol[] or AggInfo.aFunc[] of information needed to generate
+** code for that node.
+**
+** AggInfo.pGroupBy and AggInfo.aFunc.pExpr point to fields within the
+** original Select structure that describes the SELECT statement.  These
+** fields do not need to be freed when deallocating the AggInfo structure.
+*/
+struct AggInfo {
+  u8 directMode;          /* Direct rendering mode means take data directly
+                          ** from source tables rather than from accumulators */
+  u8 useSortingIdx;       /* In direct mode, reference the sorting index rather
+                          ** than the source table */
+  int sortingIdx;         /* Cursor number of the sorting index */
+  int sortingIdxPTab;     /* Cursor number of pseudo-table */
+  int nSortingColumn;     /* Number of columns in the sorting index */
+  int mnReg, mxReg;       /* Range of registers allocated for aCol and aFunc */
+  ExprList *pGroupBy;     /* The group by clause */
+  struct AggInfo_col {    /* For each column used in source tables */
+    Table *pTab;             /* Source table */
+    int iTable;              /* Cursor number of the source table */
+    int iColumn;             /* Column number within the source table */
+    int iSorterColumn;       /* Column number in the sorting index */
+    int iMem;                /* Memory location that acts as accumulator */
+    Expr *pExpr;             /* The original expression */
+  } *aCol;
+  int nColumn;            /* Number of used entries in aCol[] */
+  int nAccumulator;       /* Number of columns that show through to the output.
+                          ** Additional columns are used only as parameters to
+                          ** aggregate functions */
+  struct AggInfo_func {   /* For each aggregate function */
+    Expr *pExpr;             /* Expression encoding the function */
+    FuncDef *pFunc;          /* The aggregate function implementation */
+    int iMem;                /* Memory location that acts as accumulator */
+    int iDistinct;           /* Ephemeral table used to enforce DISTINCT */
+  } *aFunc;
+  int nFunc;              /* Number of entries in aFunc[] */
+};
+
+/*
+** The datatype ynVar is a signed integer, either 16-bit or 32-bit.
+** Usually it is 16-bits.  But if SQLITE_MAX_VARIABLE_NUMBER is greater
+** than 32767 we have to make it 32-bit.  16-bit is preferred because
+** it uses less memory in the Expr object, which is a big memory user
+** in systems with lots of prepared statements.  And few applications
+** need more than about 10 or 20 variables.  But some extreme users want
+** to have prepared statements with over 32767 variables, and for them
+** the option is available (at compile-time).
+*/
+#if SQLITE_MAX_VARIABLE_NUMBER<=32767
+typedef i16 ynVar;
+#else
+typedef int ynVar;
+#endif
+
+/*
+** Each node of an expression in the parse tree is an instance
+** of this structure.
+**
+** Expr.op is the opcode. The integer parser token codes are reused
+** as opcodes here. For example, the parser defines TK_GE to be an integer
+** code representing the ">=" operator. This same integer code is reused
+** to represent the greater-than-or-equal-to operator in the expression
+** tree.
+**
+** If the expression is an SQL literal (TK_INTEGER, TK_FLOAT, TK_BLOB, 
+** or TK_STRING), then Expr.token contains the text of the SQL literal. If
+** the expression is a variable (TK_VARIABLE), then Expr.token contains the 
+** variable name. Finally, if the expression is an SQL function (TK_FUNCTION),
+** then Expr.token contains the name of the function.
+**
+** Expr.pRight and Expr.pLeft are the left and right subexpressions of a
+** binary operator. Either or both may be NULL.
+**
+** Expr.x.pList is a list of arguments if the expression is an SQL function,
+** a CASE expression or an IN expression of the form "<lhs> IN (<y>, <z>...)".
+** Expr.x.pSelect is used if the expression is a sub-select or an expression of
+** the form "<lhs> IN (SELECT ...)". If the EP_xIsSelect bit is set in the
+** Expr.flags mask, then Expr.x.pSelect is valid. Otherwise, Expr.x.pList is 
+** valid.
+**
+** An expression of the form ID or ID.ID refers to a column in a table.
+** For such expressions, Expr.op is set to TK_COLUMN and Expr.iTable is
+** the integer cursor number of a VDBE cursor pointing to that table and
+** Expr.iColumn is the column number for the specific column.  If the
+** expression is used as a result in an aggregate SELECT, then the
+** value is also stored in the Expr.iAgg column in the aggregate so that
+** it can be accessed after all aggregates are computed.
+**
+** If the expression is an unbound variable marker (a question mark 
+** character '?' in the original SQL) then the Expr.iTable holds the index 
+** number for that variable.
+**
+** If the expression is a subquery then Expr.iColumn holds an integer
+** register number containing the result of the subquery.  If the
+** subquery gives a constant result, then iTable is -1.  If the subquery
+** gives a different answer at different times during statement processing
+** then iTable is the address of a subroutine that computes the subquery.
+**
+** If the Expr is of type OP_Column, and the table it is selecting from
+** is a disk table or the "old.*" pseudo-table, then pTab points to the
+** corresponding table definition.
+**
+** ALLOCATION NOTES:
+**
+** Expr objects can use a lot of memory space in database schema.  To
+** help reduce memory requirements, sometimes an Expr object will be
+** truncated.  And to reduce the number of memory allocations, sometimes
+** two or more Expr objects will be stored in a single memory allocation,
+** together with Expr.zToken strings.
+**
+** If the EP_Reduced and EP_TokenOnly flags are set when
+** an Expr object is truncated.  When EP_Reduced is set, then all
+** the child Expr objects in the Expr.pLeft and Expr.pRight subtrees
+** are contained within the same memory allocation.  Note, however, that
+** the subtrees in Expr.x.pList or Expr.x.pSelect are always separately
+** allocated, regardless of whether or not EP_Reduced is set.
+*/
+struct Expr {
+  u8 op;                 /* Operation performed by this node */
+  char affinity;         /* The affinity of the column or 0 if not a column */
+  u32 flags;             /* Various flags.  EP_* See below */
+  union {
+    char *zToken;          /* Token value. Zero terminated and dequoted */
+    int iValue;            /* Non-negative integer value if EP_IntValue */
+  } u;
+
+  /* If the EP_TokenOnly flag is set in the Expr.flags mask, then no
+  ** space is allocated for the fields below this point. An attempt to
+  ** access them will result in a segfault or malfunction. 
+  *********************************************************************/
+
+  Expr *pLeft;           /* Left subnode */
+  Expr *pRight;          /* Right subnode */
+  union {
+    ExprList *pList;     /* op = IN, EXISTS, SELECT, CASE, FUNCTION, BETWEEN */
+    Select *pSelect;     /* EP_xIsSelect and op = IN, EXISTS, SELECT */
+  } x;
+
+  /* If the EP_Reduced flag is set in the Expr.flags mask, then no
+  ** space is allocated for the fields below this point. An attempt to
+  ** access them will result in a segfault or malfunction.
+  *********************************************************************/
+
+#if SQLITE_MAX_EXPR_DEPTH>0
+  int nHeight;           /* Height of the tree headed by this node */
+#endif
+  int iTable;            /* TK_COLUMN: cursor number of table holding column
+                         ** TK_REGISTER: register number
+                         ** TK_TRIGGER: 1 -> new, 0 -> old
+                         ** EP_Unlikely:  134217728 times likelihood */
+  ynVar iColumn;         /* TK_COLUMN: column index.  -1 for rowid.
+                         ** TK_VARIABLE: variable number (always >= 1). */
+  i16 iAgg;              /* Which entry in pAggInfo->aCol[] or ->aFunc[] */
+  i16 iRightJoinTable;   /* If EP_FromJoin, the right table of the join */
+  u8 op2;                /* TK_REGISTER: original value of Expr.op
+                         ** TK_COLUMN: the value of p5 for OP_Column
+                         ** TK_AGG_FUNCTION: nesting depth */
+  AggInfo *pAggInfo;     /* Used by TK_AGG_COLUMN and TK_AGG_FUNCTION */
+  Table *pTab;           /* Table for TK_COLUMN expressions. */
+};
+
+/*
+** The following are the meanings of bits in the Expr.flags field.
+*/
+#define EP_FromJoin  0x000001 /* Originates in ON/USING clause of outer join */
+#define EP_Agg       0x000002 /* Contains one or more aggregate functions */
+#define EP_Resolved  0x000004 /* IDs have been resolved to COLUMNs */
+#define EP_Error     0x000008 /* Expression contains one or more errors */
+#define EP_Distinct  0x000010 /* Aggregate function with DISTINCT keyword */
+#define EP_VarSelect 0x000020 /* pSelect is correlated, not constant */
+#define EP_DblQuoted 0x000040 /* token.z was originally in "..." */
+#define EP_InfixFunc 0x000080 /* True for an infix function: LIKE, GLOB, etc */
+#define EP_Collate   0x000100 /* Tree contains a TK_COLLATE operator */
+#define EP_Generic   0x000200 /* Ignore COLLATE or affinity on this tree */
+#define EP_IntValue  0x000400 /* Integer value contained in u.iValue */
+#define EP_xIsSelect 0x000800 /* x.pSelect is valid (otherwise x.pList is) */
+#define EP_Skip      0x001000 /* COLLATE, AS, or UNLIKELY */
+#define EP_Reduced   0x002000 /* Expr struct EXPR_REDUCEDSIZE bytes only */
+#define EP_TokenOnly 0x004000 /* Expr struct EXPR_TOKENONLYSIZE bytes only */
+#define EP_Static    0x008000 /* Held in memory not obtained from malloc() */
+#define EP_MemToken  0x010000 /* Need to sqlite3DbFree() Expr.zToken */
+#define EP_NoReduce  0x020000 /* Cannot EXPRDUP_REDUCE this Expr */
+#define EP_Unlikely  0x040000 /* unlikely() or likelihood() function */
+#define EP_Constant  0x080000 /* Node is a constant */
+#define EP_CanBeNull 0x100000 /* Can be null despite NOT NULL constraint */
+
+/*
+** These macros can be used to test, set, or clear bits in the 
+** Expr.flags field.
+*/
+#define ExprHasProperty(E,P)     (((E)->flags&(P))!=0)
+#define ExprHasAllProperty(E,P)  (((E)->flags&(P))==(P))
+#define ExprSetProperty(E,P)     (E)->flags|=(P)
+#define ExprClearProperty(E,P)   (E)->flags&=~(P)
+
+/* The ExprSetVVAProperty() macro is used for Verification, Validation,
+** and Accreditation only.  It works like ExprSetProperty() during VVA
+** processes but is a no-op for delivery.
+*/
+#ifdef SQLITE_DEBUG
+# define ExprSetVVAProperty(E,P)  (E)->flags|=(P)
+#else
+# define ExprSetVVAProperty(E,P)
+#endif
+
+/*
+** Macros to determine the number of bytes required by a normal Expr 
+** struct, an Expr struct with the EP_Reduced flag set in Expr.flags 
+** and an Expr struct with the EP_TokenOnly flag set.
+*/
+#define EXPR_FULLSIZE           sizeof(Expr)           /* Full size */
+#define EXPR_REDUCEDSIZE        offsetof(Expr,iTable)  /* Common features */
+#define EXPR_TOKENONLYSIZE      offsetof(Expr,pLeft)   /* Fewer features */
+
+/*
+** Flags passed to the sqlite3ExprDup() function. See the header comment 
+** above sqlite3ExprDup() for details.
+*/
+#define EXPRDUP_REDUCE         0x0001  /* Used reduced-size Expr nodes */
+
+/*
+** A list of expressions.  Each expression may optionally have a
+** name.  An expr/name combination can be used in several ways, such
+** as the list of "expr AS ID" fields following a "SELECT" or in the
+** list of "ID = expr" items in an UPDATE.  A list of expressions can
+** also be used as the argument to a function, in which case the a.zName
+** field is not used.
+**
+** By default the Expr.zSpan field holds a human-readable description of
+** the expression that is used in the generation of error messages and
+** column labels.  In this case, Expr.zSpan is typically the text of a
+** column expression as it exists in a SELECT statement.  However, if
+** the bSpanIsTab flag is set, then zSpan is overloaded to mean the name
+** of the result column in the form: DATABASE.TABLE.COLUMN.  This later
+** form is used for name resolution with nested FROM clauses.
+*/
+struct ExprList {
+  int nExpr;             /* Number of expressions on the list */
+  struct ExprList_item { /* For each expression in the list */
+    Expr *pExpr;            /* The list of expressions */
+    char *zName;            /* Token associated with this expression */
+    char *zSpan;            /* Original text of the expression */
+    u8 sortOrder;           /* 1 for DESC or 0 for ASC */
+    unsigned done :1;       /* A flag to indicate when processing is finished */
+    unsigned bSpanIsTab :1; /* zSpan holds DB.TABLE.COLUMN */
+    unsigned reusable :1;   /* Constant expression is reusable */
+    union {
+      struct {
+        u16 iOrderByCol;      /* For ORDER BY, column number in result set */
+        u16 iAlias;           /* Index into Parse.aAlias[] for zName */
+      } x;
+      int iConstExprReg;      /* Register in which Expr value is cached */
+    } u;
+  } *a;                  /* Alloc a power of two greater or equal to nExpr */
+};
+
+/*
+** An instance of this structure is used by the parser to record both
+** the parse tree for an expression and the span of input text for an
+** expression.
+*/
+struct ExprSpan {
+  Expr *pExpr;          /* The expression parse tree */
+  const char *zStart;   /* First character of input text */
+  const char *zEnd;     /* One character past the end of input text */
+};
+
+/*
+** An instance of this structure can hold a simple list of identifiers,
+** such as the list "a,b,c" in the following statements:
+**
+**      INSERT INTO t(a,b,c) VALUES ...;
+**      CREATE INDEX idx ON t(a,b,c);
+**      CREATE TRIGGER trig BEFORE UPDATE ON t(a,b,c) ...;
+**
+** The IdList.a.idx field is used when the IdList represents the list of
+** column names after a table name in an INSERT statement.  In the statement
+**
+**     INSERT INTO t(a,b,c) ...
+**
+** If "a" is the k-th column of table "t", then IdList.a[0].idx==k.
+*/
+struct IdList {
+  struct IdList_item {
+    char *zName;      /* Name of the identifier */
+    int idx;          /* Index in some Table.aCol[] of a column named zName */
+  } *a;
+  int nId;         /* Number of identifiers on the list */
+};
+
+/*
+** The bitmask datatype defined below is used for various optimizations.
+**
+** Changing this from a 64-bit to a 32-bit type limits the number of
+** tables in a join to 32 instead of 64.  But it also reduces the size
+** of the library by 738 bytes on ix86.
+*/
+typedef u64 Bitmask;
+
+/*
+** The number of bits in a Bitmask.  "BMS" means "BitMask Size".
+*/
+#define BMS  ((int)(sizeof(Bitmask)*8))
+
+/*
+** A bit in a Bitmask
+*/
+#define MASKBIT(n)   (((Bitmask)1)<<(n))
+#define MASKBIT32(n) (((unsigned int)1)<<(n))
+
+/*
+** The following structure describes the FROM clause of a SELECT statement.
+** Each table or subquery in the FROM clause is a separate element of
+** the SrcList.a[] array.
+**
+** With the addition of multiple database support, the following structure
+** can also be used to describe a particular table such as the table that
+** is modified by an INSERT, DELETE, or UPDATE statement.  In standard SQL,
+** such a table must be a simple name: ID.  But in SQLite, the table can
+** now be identified by a database name, a dot, then the table name: ID.ID.
+**
+** The jointype starts out showing the join type between the current table
+** and the next table on the list.  The parser builds the list this way.
+** But sqlite3SrcListShiftJoinType() later shifts the jointypes so that each
+** jointype expresses the join between the table and the previous table.
+**
+** In the colUsed field, the high-order bit (bit 63) is set if the table
+** contains more than 63 columns and the 64-th or later column is used.
+*/
+struct SrcList {
+  int nSrc;        /* Number of tables or subqueries in the FROM clause */
+  u32 nAlloc;      /* Number of entries allocated in a[] below */
+  struct SrcList_item {
+    Schema *pSchema;  /* Schema to which this item is fixed */
+    char *zDatabase;  /* Name of database holding this table */
+    char *zName;      /* Name of the table */
+    char *zAlias;     /* The "B" part of a "A AS B" phrase.  zName is the "A" */
+    Table *pTab;      /* An SQL table corresponding to zName */
+    Select *pSelect;  /* A SELECT statement used in place of a table name */
+    int addrFillSub;  /* Address of subroutine to manifest a subquery */
+    int regReturn;    /* Register holding return address of addrFillSub */
+    int regResult;    /* Registers holding results of a co-routine */
+    u8 jointype;      /* Type of join between this able and the previous */
+    unsigned notIndexed :1;    /* True if there is a NOT INDEXED clause */
+    unsigned isCorrelated :1;  /* True if sub-query is correlated */
+    unsigned viaCoroutine :1;  /* Implemented as a co-routine */
+    unsigned isRecursive :1;   /* True for recursive reference in WITH */
+#ifndef SQLITE_OMIT_EXPLAIN
+    u8 iSelectId;     /* If pSelect!=0, the id of the sub-select in EQP */
+#endif
+    int iCursor;      /* The VDBE cursor number used to access this table */
+    Expr *pOn;        /* The ON clause of a join */
+    IdList *pUsing;   /* The USING clause of a join */
+    Bitmask colUsed;  /* Bit N (1<<N) set if column N of pTab is used */
+    char *zIndex;     /* Identifier from "INDEXED BY <zIndex>" clause */
+    Index *pIndex;    /* Index structure corresponding to zIndex, if any */
+  } a[1];             /* One entry for each identifier on the list */
+};
+
+/*
+** Permitted values of the SrcList.a.jointype field
+*/
+#define JT_INNER     0x0001    /* Any kind of inner or cross join */
+#define JT_CROSS     0x0002    /* Explicit use of the CROSS keyword */
+#define JT_NATURAL   0x0004    /* True for a "natural" join */
+#define JT_LEFT      0x0008    /* Left outer join */
+#define JT_RIGHT     0x0010    /* Right outer join */
+#define JT_OUTER     0x0020    /* The "OUTER" keyword is present */
+#define JT_ERROR     0x0040    /* unknown or unsupported join type */
+
+
+/*
+** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin()
+** and the WhereInfo.wctrlFlags member.
+*/
+#define WHERE_ORDERBY_NORMAL   0x0000 /* No-op */
+#define WHERE_ORDERBY_MIN      0x0001 /* ORDER BY processing for min() func */
+#define WHERE_ORDERBY_MAX      0x0002 /* ORDER BY processing for max() func */
+#define WHERE_ONEPASS_DESIRED  0x0004 /* Want to do one-pass UPDATE/DELETE */
+#define WHERE_DUPLICATES_OK    0x0008 /* Ok to return a row more than once */
+#define WHERE_OMIT_OPEN_CLOSE  0x0010 /* Table cursors are already open */
+#define WHERE_FORCE_TABLE      0x0020 /* Do not use an index-only search */
+#define WHERE_ONETABLE_ONLY    0x0040 /* Only code the 1st table in pTabList */
+                          /*   0x0080 // not currently used */
+#define WHERE_GROUPBY          0x0100 /* pOrderBy is really a GROUP BY */
+#define WHERE_DISTINCTBY       0x0200 /* pOrderby is really a DISTINCT clause */
+#define WHERE_WANT_DISTINCT    0x0400 /* All output needs to be distinct */
+#define WHERE_SORTBYGROUP      0x0800 /* Support sqlite3WhereIsSorted() */
+#define WHERE_REOPEN_IDX       0x1000 /* Try to use OP_ReopenIdx */
+
+/* Allowed return values from sqlite3WhereIsDistinct()
+*/
+#define WHERE_DISTINCT_NOOP      0  /* DISTINCT keyword not used */
+#define WHERE_DISTINCT_UNIQUE    1  /* No duplicates */
+#define WHERE_DISTINCT_ORDERED   2  /* All duplicates are adjacent */
+#define WHERE_DISTINCT_UNORDERED 3  /* Duplicates are scattered */
+
+/*
+** A NameContext defines a context in which to resolve table and column
+** names.  The context consists of a list of tables (the pSrcList) field and
+** a list of named expression (pEList).  The named expression list may
+** be NULL.  The pSrc corresponds to the FROM clause of a SELECT or
+** to the table being operated on by INSERT, UPDATE, or DELETE.  The
+** pEList corresponds to the result set of a SELECT and is NULL for
+** other statements.
+**
+** NameContexts can be nested.  When resolving names, the inner-most 
+** context is searched first.  If no match is found, the next outer
+** context is checked.  If there is still no match, the next context
+** is checked.  This process continues until either a match is found
+** or all contexts are check.  When a match is found, the nRef member of
+** the context containing the match is incremented. 
+**
+** Each subquery gets a new NameContext.  The pNext field points to the
+** NameContext in the parent query.  Thus the process of scanning the
+** NameContext list corresponds to searching through successively outer
+** subqueries looking for a match.
+*/
+struct NameContext {
+  Parse *pParse;       /* The parser */
+  SrcList *pSrcList;   /* One or more tables used to resolve names */
+  ExprList *pEList;    /* Optional list of result-set columns */
+  AggInfo *pAggInfo;   /* Information about aggregates at this level */
+  NameContext *pNext;  /* Next outer name context.  NULL for outermost */
+  int nRef;            /* Number of names resolved by this context */
+  int nErr;            /* Number of errors encountered while resolving names */
+  u16 ncFlags;         /* Zero or more NC_* flags defined below */
+};
+
+/*
+** Allowed values for the NameContext, ncFlags field.
+**
+** Note:  NC_MinMaxAgg must have the same value as SF_MinMaxAgg and
+** SQLITE_FUNC_MINMAX.
+** 
+*/
+#define NC_AllowAgg  0x0001  /* Aggregate functions are allowed here */
+#define NC_HasAgg    0x0002  /* One or more aggregate functions seen */
+#define NC_IsCheck   0x0004  /* True if resolving names in a CHECK constraint */
+#define NC_InAggFunc 0x0008  /* True if analyzing arguments to an agg func */
+#define NC_PartIdx   0x0010  /* True if resolving a partial index WHERE */
+#define NC_MinMaxAgg 0x1000  /* min/max aggregates seen.  See note above */
+
+/*
+** An instance of the following structure contains all information
+** needed to generate code for a single SELECT statement.
+**
+** nLimit is set to -1 if there is no LIMIT clause.  nOffset is set to 0.
+** If there is a LIMIT clause, the parser sets nLimit to the value of the
+** limit and nOffset to the value of the offset (or 0 if there is not
+** offset).  But later on, nLimit and nOffset become the memory locations
+** in the VDBE that record the limit and offset counters.
+**
+** addrOpenEphm[] entries contain the address of OP_OpenEphemeral opcodes.
+** These addresses must be stored so that we can go back and fill in
+** the P4_KEYINFO and P2 parameters later.  Neither the KeyInfo nor
+** the number of columns in P2 can be computed at the same time
+** as the OP_OpenEphm instruction is coded because not
+** enough information about the compound query is known at that point.
+** The KeyInfo for addrOpenTran[0] and [1] contains collating sequences
+** for the result set.  The KeyInfo for addrOpenEphm[2] contains collating
+** sequences for the ORDER BY clause.
+*/
+struct Select {
+  ExprList *pEList;      /* The fields of the result */
+  u8 op;                 /* One of: TK_UNION TK_ALL TK_INTERSECT TK_EXCEPT */
+  u16 selFlags;          /* Various SF_* values */
+  int iLimit, iOffset;   /* Memory registers holding LIMIT & OFFSET counters */
+#if SELECTTRACE_ENABLED
+  char zSelName[12];     /* Symbolic name of this SELECT use for debugging */
+#endif
+  int addrOpenEphm[2];   /* OP_OpenEphem opcodes related to this select */
+  u64 nSelectRow;        /* Estimated number of result rows */
+  SrcList *pSrc;         /* The FROM clause */
+  Expr *pWhere;          /* The WHERE clause */
+  ExprList *pGroupBy;    /* The GROUP BY clause */
+  Expr *pHaving;         /* The HAVING clause */
+  ExprList *pOrderBy;    /* The ORDER BY clause */
+  Select *pPrior;        /* Prior select in a compound select statement */
+  Select *pNext;         /* Next select to the left in a compound */
+  Expr *pLimit;          /* LIMIT expression. NULL means not used. */
+  Expr *pOffset;         /* OFFSET expression. NULL means not used. */
+  With *pWith;           /* WITH clause attached to this select. Or NULL. */
+};
+
+/*
+** Allowed values for Select.selFlags.  The "SF" prefix stands for
+** "Select Flag".
+*/
+#define SF_Distinct        0x0001  /* Output should be DISTINCT */
+#define SF_Resolved        0x0002  /* Identifiers have been resolved */
+#define SF_Aggregate       0x0004  /* Contains aggregate functions */
+#define SF_UsesEphemeral   0x0008  /* Uses the OpenEphemeral opcode */
+#define SF_Expanded        0x0010  /* sqlite3SelectExpand() called on this */
+#define SF_HasTypeInfo     0x0020  /* FROM subqueries have Table metadata */
+#define SF_Compound        0x0040  /* Part of a compound query */
+#define SF_Values          0x0080  /* Synthesized from VALUES clause */
+#define SF_AllValues       0x0100  /* All terms of compound are VALUES */
+#define SF_NestedFrom      0x0200  /* Part of a parenthesized FROM clause */
+#define SF_MaybeConvert    0x0400  /* Need convertCompoundSelectToSubquery() */
+#define SF_Recursive       0x0800  /* The recursive part of a recursive CTE */
+#define SF_MinMaxAgg       0x1000  /* Aggregate containing min() or max() */
+
+
+/*
+** The results of a SELECT can be distributed in several ways, as defined
+** by one of the following macros.  The "SRT" prefix means "SELECT Result
+** Type".
+**
+**     SRT_Union       Store results as a key in a temporary index 
+**                     identified by pDest->iSDParm.
+**
+**     SRT_Except      Remove results from the temporary index pDest->iSDParm.
+**
+**     SRT_Exists      Store a 1 in memory cell pDest->iSDParm if the result
+**                     set is not empty.
+**
+**     SRT_Discard     Throw the results away.  This is used by SELECT
+**                     statements within triggers whose only purpose is
+**                     the side-effects of functions.
+**
+** All of the above are free to ignore their ORDER BY clause. Those that
+** follow must honor the ORDER BY clause.
+**
+**     SRT_Output      Generate a row of output (using the OP_ResultRow
+**                     opcode) for each row in the result set.
+**
+**     SRT_Mem         Only valid if the result is a single column.
+**                     Store the first column of the first result row
+**                     in register pDest->iSDParm then abandon the rest
+**                     of the query.  This destination implies "LIMIT 1".
+**
+**     SRT_Set         The result must be a single column.  Store each
+**                     row of result as the key in table pDest->iSDParm. 
+**                     Apply the affinity pDest->affSdst before storing
+**                     results.  Used to implement "IN (SELECT ...)".
+**
+**     SRT_EphemTab    Create an temporary table pDest->iSDParm and store
+**                     the result there. The cursor is left open after
+**                     returning.  This is like SRT_Table except that
+**                     this destination uses OP_OpenEphemeral to create
+**                     the table first.
+**
+**     SRT_Coroutine   Generate a co-routine that returns a new row of
+**                     results each time it is invoked.  The entry point
+**                     of the co-routine is stored in register pDest->iSDParm
+**                     and the result row is stored in pDest->nDest registers
+**                     starting with pDest->iSdst.
+**
+**     SRT_Table       Store results in temporary table pDest->iSDParm.
+**     SRT_Fifo        This is like SRT_EphemTab except that the table
+**                     is assumed to already be open.  SRT_Fifo has
+**                     the additional property of being able to ignore
+**                     the ORDER BY clause.
+**
+**     SRT_DistFifo    Store results in a temporary table pDest->iSDParm.
+**                     But also use temporary table pDest->iSDParm+1 as
+**                     a record of all prior results and ignore any duplicate
+**                     rows.  Name means:  "Distinct Fifo".
+**
+**     SRT_Queue       Store results in priority queue pDest->iSDParm (really
+**                     an index).  Append a sequence number so that all entries
+**                     are distinct.
+**
+**     SRT_DistQueue   Store results in priority queue pDest->iSDParm only if
+**                     the same record has never been stored before.  The
+**                     index at pDest->iSDParm+1 hold all prior stores.
+*/
+#define SRT_Union        1  /* Store result as keys in an index */
+#define SRT_Except       2  /* Remove result from a UNION index */
+#define SRT_Exists       3  /* Store 1 if the result is not empty */
+#define SRT_Discard      4  /* Do not save the results anywhere */
+#define SRT_Fifo         5  /* Store result as data with an automatic rowid */
+#define SRT_DistFifo     6  /* Like SRT_Fifo, but unique results only */
+#define SRT_Queue        7  /* Store result in an queue */
+#define SRT_DistQueue    8  /* Like SRT_Queue, but unique results only */
+
+/* The ORDER BY clause is ignored for all of the above */
+#define IgnorableOrderby(X) ((X->eDest)<=SRT_DistQueue)
+
+#define SRT_Output       9  /* Output each row of result */
+#define SRT_Mem         10  /* Store result in a memory cell */
+#define SRT_Set         11  /* Store results as keys in an index */
+#define SRT_EphemTab    12  /* Create transient tab and store like SRT_Table */
+#define SRT_Coroutine   13  /* Generate a single row of result */
+#define SRT_Table       14  /* Store result as data with an automatic rowid */
+
+/*
+** An instance of this object describes where to put of the results of
+** a SELECT statement.
+*/
+struct SelectDest {
+  u8 eDest;            /* How to dispose of the results.  On of SRT_* above. */
+  char affSdst;        /* Affinity used when eDest==SRT_Set */
+  int iSDParm;         /* A parameter used by the eDest disposal method */
+  int iSdst;           /* Base register where results are written */
+  int nSdst;           /* Number of registers allocated */
+  ExprList *pOrderBy;  /* Key columns for SRT_Queue and SRT_DistQueue */
+};
+
+/*
+** During code generation of statements that do inserts into AUTOINCREMENT 
+** tables, the following information is attached to the Table.u.autoInc.p
+** pointer of each autoincrement table to record some side information that
+** the code generator needs.  We have to keep per-table autoincrement
+** information in case inserts are down within triggers.  Triggers do not
+** normally coordinate their activities, but we do need to coordinate the
+** loading and saving of autoincrement information.
+*/
+struct AutoincInfo {
+  AutoincInfo *pNext;   /* Next info block in a list of them all */
+  Table *pTab;          /* Table this info block refers to */
+  int iDb;              /* Index in sqlite3.aDb[] of database holding pTab */
+  int regCtr;           /* Memory register holding the rowid counter */
+};
+
+/*
+** Size of the column cache
+*/
+#ifndef SQLITE_N_COLCACHE
+# define SQLITE_N_COLCACHE 10
+#endif
+
+/*
+** At least one instance of the following structure is created for each 
+** trigger that may be fired while parsing an INSERT, UPDATE or DELETE
+** statement. All such objects are stored in the linked list headed at
+** Parse.pTriggerPrg and deleted once statement compilation has been
+** completed.
+**
+** A Vdbe sub-program that implements the body and WHEN clause of trigger
+** TriggerPrg.pTrigger, assuming a default ON CONFLICT clause of
+** TriggerPrg.orconf, is stored in the TriggerPrg.pProgram variable.
+** The Parse.pTriggerPrg list never contains two entries with the same
+** values for both pTrigger and orconf.
+**
+** The TriggerPrg.aColmask[0] variable is set to a mask of old.* columns
+** accessed (or set to 0 for triggers fired as a result of INSERT 
+** statements). Similarly, the TriggerPrg.aColmask[1] variable is set to
+** a mask of new.* columns used by the program.
+*/
+struct TriggerPrg {
+  Trigger *pTrigger;      /* Trigger this program was coded from */
+  TriggerPrg *pNext;      /* Next entry in Parse.pTriggerPrg list */
+  SubProgram *pProgram;   /* Program implementing pTrigger/orconf */
+  int orconf;             /* Default ON CONFLICT policy */
+  u32 aColmask[2];        /* Masks of old.*, new.* columns accessed */
+};
+
+/*
+** The yDbMask datatype for the bitmask of all attached databases.
+*/
+#if SQLITE_MAX_ATTACHED>30
+  typedef unsigned char yDbMask[(SQLITE_MAX_ATTACHED+9)/8];
+# define DbMaskTest(M,I)    (((M)[(I)/8]&(1<<((I)&7)))!=0)
+# define DbMaskZero(M)      memset((M),0,sizeof(M))
+# define DbMaskSet(M,I)     (M)[(I)/8]|=(1<<((I)&7))
+# define DbMaskAllZero(M)   sqlite3DbMaskAllZero(M)
+# define DbMaskNonZero(M)   (sqlite3DbMaskAllZero(M)==0)
+#else
+  typedef unsigned int yDbMask;
+# define DbMaskTest(M,I)    (((M)&(((yDbMask)1)<<(I)))!=0)
+# define DbMaskZero(M)      (M)=0
+# define DbMaskSet(M,I)     (M)|=(((yDbMask)1)<<(I))
+# define DbMaskAllZero(M)   (M)==0
+# define DbMaskNonZero(M)   (M)!=0
+#endif
+
+/*
+** An SQL parser context.  A copy of this structure is passed through
+** the parser and down into all the parser action routine in order to
+** carry around information that is global to the entire parse.
+**
+** The structure is divided into two parts.  When the parser and code
+** generate call themselves recursively, the first part of the structure
+** is constant but the second part is reset at the beginning and end of
+** each recursion.
+**
+** The nTableLock and aTableLock variables are only used if the shared-cache 
+** feature is enabled (if sqlite3Tsd()->useSharedData is true). They are
+** used to store the set of table-locks required by the statement being
+** compiled. Function sqlite3TableLock() is used to add entries to the
+** list.
+*/
+struct Parse {
+  sqlite3 *db;         /* The main database structure */
+  char *zErrMsg;       /* An error message */
+  Vdbe *pVdbe;         /* An engine for executing database bytecode */
+  int rc;              /* Return code from execution */
+  u8 colNamesSet;      /* TRUE after OP_ColumnName has been issued to pVdbe */
+  u8 checkSchema;      /* Causes schema cookie check after an error */
+  u8 nested;           /* Number of nested calls to the parser/code generator */
+  u8 nTempReg;         /* Number of temporary registers in aTempReg[] */
+  u8 isMultiWrite;     /* True if statement may modify/insert multiple rows */
+  u8 mayAbort;         /* True if statement may throw an ABORT exception */
+  u8 hasCompound;      /* Need to invoke convertCompoundSelectToSubquery() */
+  u8 okConstFactor;    /* OK to factor out constants */
+  int aTempReg[8];     /* Holding area for temporary registers */
+  int nRangeReg;       /* Size of the temporary register block */
+  int iRangeReg;       /* First register in temporary register block */
+  int nErr;            /* Number of errors seen */
+  int nTab;            /* Number of previously allocated VDBE cursors */
+  int nMem;            /* Number of memory cells used so far */
+  int nSet;            /* Number of sets used so far */
+  int nOnce;           /* Number of OP_Once instructions so far */
+  int nOpAlloc;        /* Number of slots allocated for Vdbe.aOp[] */
+  int iFixedOp;        /* Never back out opcodes iFixedOp-1 or earlier */
+  int ckBase;          /* Base register of data during check constraints */
+  int iPartIdxTab;     /* Table corresponding to a partial index */
+  int iCacheLevel;     /* ColCache valid when aColCache[].iLevel<=iCacheLevel */
+  int iCacheCnt;       /* Counter used to generate aColCache[].lru values */
+  int nLabel;          /* Number of labels used */
+  int *aLabel;         /* Space to hold the labels */
+  struct yColCache {
+    int iTable;           /* Table cursor number */
+    i16 iColumn;          /* Table column number */
+    u8 tempReg;           /* iReg is a temp register that needs to be freed */
+    int iLevel;           /* Nesting level */
+    int iReg;             /* Reg with value of this column. 0 means none. */
+    int lru;              /* Least recently used entry has the smallest value */
+  } aColCache[SQLITE_N_COLCACHE];  /* One for each column cache entry */
+  ExprList *pConstExpr;/* Constant expressions */
+  Token constraintName;/* Name of the constraint currently being parsed */
+  yDbMask writeMask;   /* Start a write transaction on these databases */
+  yDbMask cookieMask;  /* Bitmask of schema verified databases */
+  int cookieValue[SQLITE_MAX_ATTACHED+2];  /* Values of cookies to verify */
+  int regRowid;        /* Register holding rowid of CREATE TABLE entry */
+  int regRoot;         /* Register holding root page number for new objects */
+  int nMaxArg;         /* Max args passed to user function by sub-program */
+#if SELECTTRACE_ENABLED
+  int nSelect;         /* Number of SELECT statements seen */
+  int nSelectIndent;   /* How far to indent SELECTTRACE() output */
+#endif
+#ifndef SQLITE_OMIT_SHARED_CACHE
+  int nTableLock;        /* Number of locks in aTableLock */
+  TableLock *aTableLock; /* Required table locks for shared-cache mode */
+#endif
+  AutoincInfo *pAinc;  /* Information about AUTOINCREMENT counters */
+
+  /* Information used while coding trigger programs. */
+  Parse *pToplevel;    /* Parse structure for main program (or NULL) */
+  Table *pTriggerTab;  /* Table triggers are being coded for */
+  int addrCrTab;       /* Address of OP_CreateTable opcode on CREATE TABLE */
+  int addrSkipPK;      /* Address of instruction to skip PRIMARY KEY index */
+  u32 nQueryLoop;      /* Est number of iterations of a query (10*log2(N)) */
+  u32 oldmask;         /* Mask of old.* columns referenced */
+  u32 newmask;         /* Mask of new.* columns referenced */
+  u8 eTriggerOp;       /* TK_UPDATE, TK_INSERT or TK_DELETE */
+  u8 eOrconf;          /* Default ON CONFLICT policy for trigger steps */
+  u8 disableTriggers;  /* True to disable triggers */
+
+  /************************************************************************
+  ** Above is constant between recursions.  Below is reset before and after
+  ** each recursion.  The boundary between these two regions is determined
+  ** using offsetof(Parse,nVar) so the nVar field must be the first field
+  ** in the recursive region.
+  ************************************************************************/
+
+  int nVar;                 /* Number of '?' variables seen in the SQL so far */
+  int nzVar;                /* Number of available slots in azVar[] */
+  u8 iPkSortOrder;          /* ASC or DESC for INTEGER PRIMARY KEY */
+  u8 bFreeWith;             /* True if pWith should be freed with parser */
+  u8 explain;               /* True if the EXPLAIN flag is found on the query */
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  u8 declareVtab;           /* True if inside sqlite3_declare_vtab() */
+  int nVtabLock;            /* Number of virtual tables to lock */
+#endif
+  int nAlias;               /* Number of aliased result set columns */
+  int nHeight;              /* Expression tree height of current sub-select */
+#ifndef SQLITE_OMIT_EXPLAIN
+  int iSelectId;            /* ID of current select for EXPLAIN output */
+  int iNextSelectId;        /* Next available select ID for EXPLAIN output */
+#endif
+  char **azVar;             /* Pointers to names of parameters */
+  Vdbe *pReprepare;         /* VM being reprepared (sqlite3Reprepare()) */
+  const char *zTail;        /* All SQL text past the last semicolon parsed */
+  Table *pNewTable;         /* A table being constructed by CREATE TABLE */
+  Trigger *pNewTrigger;     /* Trigger under construct by a CREATE TRIGGER */
+  const char *zAuthContext; /* The 6th parameter to db->xAuth callbacks */
+  Token sNameToken;         /* Token with unqualified schema object name */
+  Token sLastToken;         /* The last token parsed */
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  Token sArg;               /* Complete text of a module argument */
+  Table **apVtabLock;       /* Pointer to virtual tables needing locking */
+#endif
+  Table *pZombieTab;        /* List of Table objects to delete after code gen */
+  TriggerPrg *pTriggerPrg;  /* Linked list of coded triggers */
+  With *pWith;              /* Current WITH clause, or NULL */
+};
+
+/*
+** Return true if currently inside an sqlite3_declare_vtab() call.
+*/
+#ifdef SQLITE_OMIT_VIRTUALTABLE
+  #define IN_DECLARE_VTAB 0
+#else
+  #define IN_DECLARE_VTAB (pParse->declareVtab)
+#endif
+
+/*
+** An instance of the following structure can be declared on a stack and used
+** to save the Parse.zAuthContext value so that it can be restored later.
+*/
+struct AuthContext {
+  const char *zAuthContext;   /* Put saved Parse.zAuthContext here */
+  Parse *pParse;              /* The Parse structure */
+};
+
+/*
+** Bitfield flags for P5 value in various opcodes.
+*/
+#define OPFLAG_NCHANGE       0x01    /* Set to update db->nChange */
+#define OPFLAG_EPHEM         0x01    /* OP_Column: Ephemeral output is ok */
+#define OPFLAG_LASTROWID     0x02    /* Set to update db->lastRowid */
+#define OPFLAG_ISUPDATE      0x04    /* This OP_Insert is an sql UPDATE */
+#define OPFLAG_APPEND        0x08    /* This is likely to be an append */
+#define OPFLAG_USESEEKRESULT 0x10    /* Try to avoid a seek in BtreeInsert() */
+#define OPFLAG_LENGTHARG     0x40    /* OP_Column only used for length() */
+#define OPFLAG_TYPEOFARG     0x80    /* OP_Column only used for typeof() */
+#define OPFLAG_BULKCSR       0x01    /* OP_Open** used to open bulk cursor */
+#define OPFLAG_P2ISREG       0x02    /* P2 to OP_Open** is a register number */
+#define OPFLAG_PERMUTE       0x01    /* OP_Compare: use the permutation */
+
+/*
+ * Each trigger present in the database schema is stored as an instance of
+ * struct Trigger. 
+ *
+ * Pointers to instances of struct Trigger are stored in two ways.
+ * 1. In the "trigHash" hash table (part of the sqlite3* that represents the 
+ *    database). This allows Trigger structures to be retrieved by name.
+ * 2. All triggers associated with a single table form a linked list, using the
+ *    pNext member of struct Trigger. A pointer to the first element of the
+ *    linked list is stored as the "pTrigger" member of the associated
+ *    struct Table.
+ *
+ * The "step_list" member points to the first element of a linked list
+ * containing the SQL statements specified as the trigger program.
+ */
+struct Trigger {
+  char *zName;            /* The name of the trigger                        */
+  char *table;            /* The table or view to which the trigger applies */
+  u8 op;                  /* One of TK_DELETE, TK_UPDATE, TK_INSERT         */
+  u8 tr_tm;               /* One of TRIGGER_BEFORE, TRIGGER_AFTER */
+  Expr *pWhen;            /* The WHEN clause of the expression (may be NULL) */
+  IdList *pColumns;       /* If this is an UPDATE OF <column-list> trigger,
+                             the <column-list> is stored here */
+  Schema *pSchema;        /* Schema containing the trigger */
+  Schema *pTabSchema;     /* Schema containing the table */
+  TriggerStep *step_list; /* Link list of trigger program steps             */
+  Trigger *pNext;         /* Next trigger associated with the table */
+};
+
+/*
+** A trigger is either a BEFORE or an AFTER trigger.  The following constants
+** determine which. 
+**
+** If there are multiple triggers, you might of some BEFORE and some AFTER.
+** In that cases, the constants below can be ORed together.
+*/
+#define TRIGGER_BEFORE  1
+#define TRIGGER_AFTER   2
+
+/*
+ * An instance of struct TriggerStep is used to store a single SQL statement
+ * that is a part of a trigger-program. 
+ *
+ * Instances of struct TriggerStep are stored in a singly linked list (linked
+ * using the "pNext" member) referenced by the "step_list" member of the 
+ * associated struct Trigger instance. The first element of the linked list is
+ * the first step of the trigger-program.
+ * 
+ * The "op" member indicates whether this is a "DELETE", "INSERT", "UPDATE" or
+ * "SELECT" statement. The meanings of the other members is determined by the 
+ * value of "op" as follows:
+ *
+ * (op == TK_INSERT)
+ * orconf    -> stores the ON CONFLICT algorithm
+ * pSelect   -> If this is an INSERT INTO ... SELECT ... statement, then
+ *              this stores a pointer to the SELECT statement. Otherwise NULL.
+ * target    -> A token holding the quoted name of the table to insert into.
+ * pExprList -> If this is an INSERT INTO ... VALUES ... statement, then
+ *              this stores values to be inserted. Otherwise NULL.
+ * pIdList   -> If this is an INSERT INTO ... (<column-names>) VALUES ... 
+ *              statement, then this stores the column-names to be
+ *              inserted into.
+ *
+ * (op == TK_DELETE)
+ * target    -> A token holding the quoted name of the table to delete from.
+ * pWhere    -> The WHERE clause of the DELETE statement if one is specified.
+ *              Otherwise NULL.
+ * 
+ * (op == TK_UPDATE)
+ * target    -> A token holding the quoted name of the table to update rows of.
+ * pWhere    -> The WHERE clause of the UPDATE statement if one is specified.
+ *              Otherwise NULL.
+ * pExprList -> A list of the columns to update and the expressions to update
+ *              them to. See sqlite3Update() documentation of "pChanges"
+ *              argument.
+ * 
+ */
+struct TriggerStep {
+  u8 op;               /* One of TK_DELETE, TK_UPDATE, TK_INSERT, TK_SELECT */
+  u8 orconf;           /* OE_Rollback etc. */
+  Trigger *pTrig;      /* The trigger that this step is a part of */
+  Select *pSelect;     /* SELECT statment or RHS of INSERT INTO .. SELECT ... */
+  Token target;        /* Target table for DELETE, UPDATE, INSERT */
+  Expr *pWhere;        /* The WHERE clause for DELETE or UPDATE steps */
+  ExprList *pExprList; /* SET clause for UPDATE. */
+  IdList *pIdList;     /* Column names for INSERT */
+  TriggerStep *pNext;  /* Next in the link-list */
+  TriggerStep *pLast;  /* Last element in link-list. Valid for 1st elem only */
+};
+
+/*
+** The following structure contains information used by the sqliteFix...
+** routines as they walk the parse tree to make database references
+** explicit.  
+*/
+typedef struct DbFixer DbFixer;
+struct DbFixer {
+  Parse *pParse;      /* The parsing context.  Error messages written here */
+  Schema *pSchema;    /* Fix items to this schema */
+  int bVarOnly;       /* Check for variable references only */
+  const char *zDb;    /* Make sure all objects are contained in this database */
+  const char *zType;  /* Type of the container - used for error messages */
+  const Token *pName; /* Name of the container - used for error messages */
+};
+
+/*
+** An objected used to accumulate the text of a string where we
+** do not necessarily know how big the string will be in the end.
+*/
+struct StrAccum {
+  sqlite3 *db;         /* Optional database for lookaside.  Can be NULL */
+  char *zBase;         /* A base allocation.  Not from malloc. */
+  char *zText;         /* The string collected so far */
+  int  nChar;          /* Length of the string so far */
+  int  nAlloc;         /* Amount of space allocated in zText */
+  int  mxAlloc;        /* Maximum allowed string length */
+  u8   useMalloc;      /* 0: none,  1: sqlite3DbMalloc,  2: sqlite3_malloc */
+  u8   accError;       /* STRACCUM_NOMEM or STRACCUM_TOOBIG */
+};
+#define STRACCUM_NOMEM   1
+#define STRACCUM_TOOBIG  2
+
+/*
+** A pointer to this structure is used to communicate information
+** from sqlite3Init and OP_ParseSchema into the sqlite3InitCallback.
+*/
+typedef struct {
+  sqlite3 *db;        /* The database being initialized */
+  char **pzErrMsg;    /* Error message stored here */
+  int iDb;            /* 0 for main database.  1 for TEMP, 2.. for ATTACHed */
+  int rc;             /* Result code stored here */
+} InitData;
+
+/*
+** Structure containing global configuration data for the SQLite library.
+**
+** This structure also contains some state information.
+*/
+struct Sqlite3Config {
+  int bMemstat;                     /* True to enable memory status */
+  int bCoreMutex;                   /* True to enable core mutexing */
+  int bFullMutex;                   /* True to enable full mutexing */
+  int bOpenUri;                     /* True to interpret filenames as URIs */
+  int bUseCis;                      /* Use covering indices for full-scans */
+  int mxStrlen;                     /* Maximum string length */
+  int neverCorrupt;                 /* Database is always well-formed */
+  int szLookaside;                  /* Default lookaside buffer size */
+  int nLookaside;                   /* Default lookaside buffer count */
+  sqlite3_mem_methods m;            /* Low-level memory allocation interface */
+  sqlite3_mutex_methods mutex;      /* Low-level mutex interface */
+  sqlite3_pcache_methods2 pcache2;  /* Low-level page-cache interface */
+  void *pHeap;                      /* Heap storage space */
+  int nHeap;                        /* Size of pHeap[] */
+  int mnReq, mxReq;                 /* Min and max heap requests sizes */
+  sqlite3_int64 szMmap;             /* mmap() space per open file */
+  sqlite3_int64 mxMmap;             /* Maximum value for szMmap */
+  void *pScratch;                   /* Scratch memory */
+  int szScratch;                    /* Size of each scratch buffer */
+  int nScratch;                     /* Number of scratch buffers */
+  void *pPage;                      /* Page cache memory */
+  int szPage;                       /* Size of each page in pPage[] */
+  int nPage;                        /* Number of pages in pPage[] */
+  int mxParserStack;                /* maximum depth of the parser stack */
+  int sharedCacheEnabled;           /* true if shared-cache mode enabled */
+  u32 szPma;                        /* Maximum Sorter PMA size */
+  /* The above might be initialized to non-zero.  The following need to always
+  ** initially be zero, however. */
+  int isInit;                       /* True after initialization has finished */
+  int inProgress;                   /* True while initialization in progress */
+  int isMutexInit;                  /* True after mutexes are initialized */
+  int isMallocInit;                 /* True after malloc is initialized */
+  int isPCacheInit;                 /* True after malloc is initialized */
+  int nRefInitMutex;                /* Number of users of pInitMutex */
+  sqlite3_mutex *pInitMutex;        /* Mutex used by sqlite3_initialize() */
+  void (*xLog)(void*,int,const char*); /* Function for logging */
+  void *pLogArg;                       /* First argument to xLog() */
+#ifdef SQLITE_ENABLE_SQLLOG
+  void(*xSqllog)(void*,sqlite3*,const char*, int);
+  void *pSqllogArg;
+#endif
+#ifdef SQLITE_VDBE_COVERAGE
+  /* The following callback (if not NULL) is invoked on every VDBE branch
+  ** operation.  Set the callback using SQLITE_TESTCTRL_VDBE_COVERAGE.
+  */
+  void (*xVdbeBranch)(void*,int iSrcLine,u8 eThis,u8 eMx);  /* Callback */
+  void *pVdbeBranchArg;                                     /* 1st argument */
+#endif
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+  int (*xTestCallback)(int);        /* Invoked by sqlite3FaultSim() */
+#endif
+  int bLocaltimeFault;              /* True to fail localtime() calls */
+};
+
+/*
+** This macro is used inside of assert() statements to indicate that
+** the assert is only valid on a well-formed database.  Instead of:
+**
+**     assert( X );
+**
+** One writes:
+**
+**     assert( X || CORRUPT_DB );
+**
+** CORRUPT_DB is true during normal operation.  CORRUPT_DB does not indicate
+** that the database is definitely corrupt, only that it might be corrupt.
+** For most test cases, CORRUPT_DB is set to false using a special
+** sqlite3_test_control().  This enables assert() statements to prove
+** things that are always true for well-formed databases.
+*/
+#define CORRUPT_DB  (sqlite3Config.neverCorrupt==0)
+
+/*
+** Context pointer passed down through the tree-walk.
+*/
+struct Walker {
+  int (*xExprCallback)(Walker*, Expr*);     /* Callback for expressions */
+  int (*xSelectCallback)(Walker*,Select*);  /* Callback for SELECTs */
+  void (*xSelectCallback2)(Walker*,Select*);/* Second callback for SELECTs */
+  Parse *pParse;                            /* Parser context.  */
+  int walkerDepth;                          /* Number of subqueries */
+  u8 eCode;                                 /* A small processing code */
+  union {                                   /* Extra data for callback */
+    NameContext *pNC;                          /* Naming context */
+    int n;                                     /* A counter */
+    int iCur;                                  /* A cursor number */
+    SrcList *pSrcList;                         /* FROM clause */
+    struct SrcCount *pSrcCount;                /* Counting column references */
+  } u;
+};
+
+/* Forward declarations */
+SQLITE_PRIVATE int sqlite3WalkExpr(Walker*, Expr*);
+SQLITE_PRIVATE int sqlite3WalkExprList(Walker*, ExprList*);
+SQLITE_PRIVATE int sqlite3WalkSelect(Walker*, Select*);
+SQLITE_PRIVATE int sqlite3WalkSelectExpr(Walker*, Select*);
+SQLITE_PRIVATE int sqlite3WalkSelectFrom(Walker*, Select*);
+
+/*
+** Return code from the parse-tree walking primitives and their
+** callbacks.
+*/
+#define WRC_Continue    0   /* Continue down into children */
+#define WRC_Prune       1   /* Omit children but continue walking siblings */
+#define WRC_Abort       2   /* Abandon the tree walk */
+
+/*
+** An instance of this structure represents a set of one or more CTEs
+** (common table expressions) created by a single WITH clause.
+*/
+struct With {
+  int nCte;                       /* Number of CTEs in the WITH clause */
+  With *pOuter;                   /* Containing WITH clause, or NULL */
+  struct Cte {                    /* For each CTE in the WITH clause.... */
+    char *zName;                    /* Name of this CTE */
+    ExprList *pCols;                /* List of explicit column names, or NULL */
+    Select *pSelect;                /* The definition of this CTE */
+    const char *zErr;               /* Error message for circular references */
+  } a[1];
+};
+
+#ifdef SQLITE_DEBUG
+/*
+** An instance of the TreeView object is used for printing the content of
+** data structures on sqlite3DebugPrintf() using a tree-like view.
+*/
+struct TreeView {
+  int iLevel;             /* Which level of the tree we are on */
+  u8  bLine[100];         /* Draw vertical in column i if bLine[i] is true */
+};
+#endif /* SQLITE_DEBUG */
+
+/*
+** Assuming zIn points to the first byte of a UTF-8 character,
+** advance zIn to point to the first byte of the next UTF-8 character.
+*/
+#define SQLITE_SKIP_UTF8(zIn) {                        \
+  if( (*(zIn++))>=0xc0 ){                              \
+    while( (*zIn & 0xc0)==0x80 ){ zIn++; }             \
+  }                                                    \
+}
+
+/*
+** The SQLITE_*_BKPT macros are substitutes for the error codes with
+** the same name but without the _BKPT suffix.  These macros invoke
+** routines that report the line-number on which the error originated
+** using sqlite3_log().  The routines also provide a convenient place
+** to set a debugger breakpoint.
+*/
+SQLITE_PRIVATE int sqlite3CorruptError(int);
+SQLITE_PRIVATE int sqlite3MisuseError(int);
+SQLITE_PRIVATE int sqlite3CantopenError(int);
+#define SQLITE_CORRUPT_BKPT sqlite3CorruptError(__LINE__)
+#define SQLITE_MISUSE_BKPT sqlite3MisuseError(__LINE__)
+#define SQLITE_CANTOPEN_BKPT sqlite3CantopenError(__LINE__)
+
+
+/*
+** FTS4 is really an extension for FTS3.  It is enabled using the
+** SQLITE_ENABLE_FTS3 macro.  But to avoid confusion we also call
+** the SQLITE_ENABLE_FTS4 macro to serve as an alias for SQLITE_ENABLE_FTS3.
+*/
+#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
+# define SQLITE_ENABLE_FTS3 1
+#endif
+
+/*
+** The ctype.h header is needed for non-ASCII systems.  It is also
+** needed by FTS3 when FTS3 is included in the amalgamation.
+*/
+#if !defined(SQLITE_ASCII) || \
+    (defined(SQLITE_ENABLE_FTS3) && defined(SQLITE_AMALGAMATION))
+# include <ctype.h>
+#endif
+
+/*
+** The following macros mimic the standard library functions toupper(),
+** isspace(), isalnum(), isdigit() and isxdigit(), respectively. The
+** sqlite versions only work for ASCII characters, regardless of locale.
+*/
+#ifdef SQLITE_ASCII
+# define sqlite3Toupper(x)  ((x)&~(sqlite3CtypeMap[(unsigned char)(x)]&0x20))
+# define sqlite3Isspace(x)   (sqlite3CtypeMap[(unsigned char)(x)]&0x01)
+# define sqlite3Isalnum(x)   (sqlite3CtypeMap[(unsigned char)(x)]&0x06)
+# define sqlite3Isalpha(x)   (sqlite3CtypeMap[(unsigned char)(x)]&0x02)
+# define sqlite3Isdigit(x)   (sqlite3CtypeMap[(unsigned char)(x)]&0x04)
+# define sqlite3Isxdigit(x)  (sqlite3CtypeMap[(unsigned char)(x)]&0x08)
+# define sqlite3Tolower(x)   (sqlite3UpperToLower[(unsigned char)(x)])
+#else
+# define sqlite3Toupper(x)   toupper((unsigned char)(x))
+# define sqlite3Isspace(x)   isspace((unsigned char)(x))
+# define sqlite3Isalnum(x)   isalnum((unsigned char)(x))
+# define sqlite3Isalpha(x)   isalpha((unsigned char)(x))
+# define sqlite3Isdigit(x)   isdigit((unsigned char)(x))
+# define sqlite3Isxdigit(x)  isxdigit((unsigned char)(x))
+# define sqlite3Tolower(x)   tolower((unsigned char)(x))
+#endif
+SQLITE_PRIVATE int sqlite3IsIdChar(u8);
+
+/*
+** Internal function prototypes
+*/
+#define sqlite3StrICmp sqlite3_stricmp
+SQLITE_PRIVATE int sqlite3Strlen30(const char*);
+#define sqlite3StrNICmp sqlite3_strnicmp
+
+SQLITE_PRIVATE int sqlite3MallocInit(void);
+SQLITE_PRIVATE void sqlite3MallocEnd(void);
+SQLITE_PRIVATE void *sqlite3Malloc(u64);
+SQLITE_PRIVATE void *sqlite3MallocZero(u64);
+SQLITE_PRIVATE void *sqlite3DbMallocZero(sqlite3*, u64);
+SQLITE_PRIVATE void *sqlite3DbMallocRaw(sqlite3*, u64);
+SQLITE_PRIVATE char *sqlite3DbStrDup(sqlite3*,const char*);
+SQLITE_PRIVATE char *sqlite3DbStrNDup(sqlite3*,const char*, u64);
+SQLITE_PRIVATE void *sqlite3Realloc(void*, u64);
+SQLITE_PRIVATE void *sqlite3DbReallocOrFree(sqlite3 *, void *, u64);
+SQLITE_PRIVATE void *sqlite3DbRealloc(sqlite3 *, void *, u64);
+SQLITE_PRIVATE void sqlite3DbFree(sqlite3*, void*);
+SQLITE_PRIVATE int sqlite3MallocSize(void*);
+SQLITE_PRIVATE int sqlite3DbMallocSize(sqlite3*, void*);
+SQLITE_PRIVATE void *sqlite3ScratchMalloc(int);
+SQLITE_PRIVATE void sqlite3ScratchFree(void*);
+SQLITE_PRIVATE void *sqlite3PageMalloc(int);
+SQLITE_PRIVATE void sqlite3PageFree(void*);
+SQLITE_PRIVATE void sqlite3MemSetDefault(void);
+SQLITE_PRIVATE void sqlite3BenignMallocHooks(void (*)(void), void (*)(void));
+SQLITE_PRIVATE int sqlite3HeapNearlyFull(void);
+
+/*
+** On systems with ample stack space and that support alloca(), make
+** use of alloca() to obtain space for large automatic objects.  By default,
+** obtain space from malloc().
+**
+** The alloca() routine never returns NULL.  This will cause code paths
+** that deal with sqlite3StackAlloc() failures to be unreachable.
+*/
+#ifdef SQLITE_USE_ALLOCA
+# define sqlite3StackAllocRaw(D,N)   alloca(N)
+# define sqlite3StackAllocZero(D,N)  memset(alloca(N), 0, N)
+# define sqlite3StackFree(D,P)       
+#else
+# define sqlite3StackAllocRaw(D,N)   sqlite3DbMallocRaw(D,N)
+# define sqlite3StackAllocZero(D,N)  sqlite3DbMallocZero(D,N)
+# define sqlite3StackFree(D,P)       sqlite3DbFree(D,P)
+#endif
+
+#ifdef SQLITE_ENABLE_MEMSYS3
+SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys3(void);
+#endif
+#ifdef SQLITE_ENABLE_MEMSYS5
+SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys5(void);
+#endif
+
+
+#ifndef SQLITE_MUTEX_OMIT
+SQLITE_PRIVATE   sqlite3_mutex_methods const *sqlite3DefaultMutex(void);
+SQLITE_PRIVATE   sqlite3_mutex_methods const *sqlite3NoopMutex(void);
+SQLITE_PRIVATE   sqlite3_mutex *sqlite3MutexAlloc(int);
+SQLITE_PRIVATE   int sqlite3MutexInit(void);
+SQLITE_PRIVATE   int sqlite3MutexEnd(void);
+#endif
+
+SQLITE_PRIVATE int sqlite3StatusValue(int);
+SQLITE_PRIVATE void sqlite3StatusAdd(int, int);
+SQLITE_PRIVATE void sqlite3StatusSet(int, int);
+
+#ifndef SQLITE_OMIT_FLOATING_POINT
+SQLITE_PRIVATE   int sqlite3IsNaN(double);
+#else
+# define sqlite3IsNaN(X)  0
+#endif
+
+/*
+** An instance of the following structure holds information about SQL
+** functions arguments that are the parameters to the printf() function.
+*/
+struct PrintfArguments {
+  int nArg;                /* Total number of arguments */
+  int nUsed;               /* Number of arguments used so far */
+  sqlite3_value **apArg;   /* The argument values */
+};
+
+#define SQLITE_PRINTF_INTERNAL 0x01
+#define SQLITE_PRINTF_SQLFUNC  0x02
+SQLITE_PRIVATE void sqlite3VXPrintf(StrAccum*, u32, const char*, va_list);
+SQLITE_PRIVATE void sqlite3XPrintf(StrAccum*, u32, const char*, ...);
+SQLITE_PRIVATE char *sqlite3MPrintf(sqlite3*,const char*, ...);
+SQLITE_PRIVATE char *sqlite3VMPrintf(sqlite3*,const char*, va_list);
+SQLITE_PRIVATE char *sqlite3MAppendf(sqlite3*,char*,const char*,...);
+#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
+SQLITE_PRIVATE   void sqlite3DebugPrintf(const char*, ...);
+#endif
+#if defined(SQLITE_TEST)
+SQLITE_PRIVATE   void *sqlite3TestTextToPtr(const char*);
+#endif
+
+#if defined(SQLITE_DEBUG)
+SQLITE_PRIVATE   TreeView *sqlite3TreeViewPush(TreeView*,u8);
+SQLITE_PRIVATE   void sqlite3TreeViewPop(TreeView*);
+SQLITE_PRIVATE   void sqlite3TreeViewLine(TreeView*, const char*, ...);
+SQLITE_PRIVATE   void sqlite3TreeViewItem(TreeView*, const char*, u8);
+SQLITE_PRIVATE   void sqlite3TreeViewExpr(TreeView*, const Expr*, u8);
+SQLITE_PRIVATE   void sqlite3TreeViewExprList(TreeView*, const ExprList*, u8, const char*);
+SQLITE_PRIVATE   void sqlite3TreeViewSelect(TreeView*, const Select*, u8);
+#endif
+
+
+SQLITE_PRIVATE void sqlite3SetString(char **, sqlite3*, const char*, ...);
+SQLITE_PRIVATE void sqlite3ErrorMsg(Parse*, const char*, ...);
+SQLITE_PRIVATE int sqlite3Dequote(char*);
+SQLITE_PRIVATE int sqlite3KeywordCode(const unsigned char*, int);
+SQLITE_PRIVATE int sqlite3RunParser(Parse*, const char*, char **);
+SQLITE_PRIVATE void sqlite3FinishCoding(Parse*);
+SQLITE_PRIVATE int sqlite3GetTempReg(Parse*);
+SQLITE_PRIVATE void sqlite3ReleaseTempReg(Parse*,int);
+SQLITE_PRIVATE int sqlite3GetTempRange(Parse*,int);
+SQLITE_PRIVATE void sqlite3ReleaseTempRange(Parse*,int,int);
+SQLITE_PRIVATE void sqlite3ClearTempRegCache(Parse*);
+SQLITE_PRIVATE Expr *sqlite3ExprAlloc(sqlite3*,int,const Token*,int);
+SQLITE_PRIVATE Expr *sqlite3Expr(sqlite3*,int,const char*);
+SQLITE_PRIVATE void sqlite3ExprAttachSubtrees(sqlite3*,Expr*,Expr*,Expr*);
+SQLITE_PRIVATE Expr *sqlite3PExpr(Parse*, int, Expr*, Expr*, const Token*);
+SQLITE_PRIVATE Expr *sqlite3ExprAnd(sqlite3*,Expr*, Expr*);
+SQLITE_PRIVATE Expr *sqlite3ExprFunction(Parse*,ExprList*, Token*);
+SQLITE_PRIVATE void sqlite3ExprAssignVarNumber(Parse*, Expr*);
+SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3*, Expr*);
+SQLITE_PRIVATE ExprList *sqlite3ExprListAppend(Parse*,ExprList*,Expr*);
+SQLITE_PRIVATE void sqlite3ExprListSetName(Parse*,ExprList*,Token*,int);
+SQLITE_PRIVATE void sqlite3ExprListSetSpan(Parse*,ExprList*,ExprSpan*);
+SQLITE_PRIVATE void sqlite3ExprListDelete(sqlite3*, ExprList*);
+SQLITE_PRIVATE int sqlite3Init(sqlite3*, char**);
+SQLITE_PRIVATE int sqlite3InitCallback(void*, int, char**, char**);
+SQLITE_PRIVATE void sqlite3Pragma(Parse*,Token*,Token*,Token*,int);
+SQLITE_PRIVATE void sqlite3ResetAllSchemasOfConnection(sqlite3*);
+SQLITE_PRIVATE void sqlite3ResetOneSchema(sqlite3*,int);
+SQLITE_PRIVATE void sqlite3CollapseDatabaseArray(sqlite3*);
+SQLITE_PRIVATE void sqlite3BeginParse(Parse*,int);
+SQLITE_PRIVATE void sqlite3CommitInternalChanges(sqlite3*);
+SQLITE_PRIVATE Table *sqlite3ResultSetOfSelect(Parse*,Select*);
+SQLITE_PRIVATE void sqlite3OpenMasterTable(Parse *, int);
+SQLITE_PRIVATE Index *sqlite3PrimaryKeyIndex(Table*);
+SQLITE_PRIVATE i16 sqlite3ColumnOfIndex(Index*, i16);
+SQLITE_PRIVATE void sqlite3StartTable(Parse*,Token*,Token*,int,int,int,int);
+SQLITE_PRIVATE void sqlite3AddColumn(Parse*,Token*);
+SQLITE_PRIVATE void sqlite3AddNotNull(Parse*, int);
+SQLITE_PRIVATE void sqlite3AddPrimaryKey(Parse*, ExprList*, int, int, int);
+SQLITE_PRIVATE void sqlite3AddCheckConstraint(Parse*, Expr*);
+SQLITE_PRIVATE void sqlite3AddColumnType(Parse*,Token*);
+SQLITE_PRIVATE void sqlite3AddDefaultValue(Parse*,ExprSpan*);
+SQLITE_PRIVATE void sqlite3AddCollateType(Parse*, Token*);
+SQLITE_PRIVATE void sqlite3EndTable(Parse*,Token*,Token*,u8,Select*);
+SQLITE_PRIVATE int sqlite3ParseUri(const char*,const char*,unsigned int*,
+                    sqlite3_vfs**,char**,char **);
+SQLITE_PRIVATE Btree *sqlite3DbNameToBtree(sqlite3*,const char*);
+SQLITE_PRIVATE int sqlite3CodeOnce(Parse *);
+
+#ifdef SQLITE_OMIT_BUILTIN_TEST
+# define sqlite3FaultSim(X) SQLITE_OK
+#else
+SQLITE_PRIVATE   int sqlite3FaultSim(int);
+#endif
+
+SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32);
+SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec*, u32);
+SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec*, u32);
+SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec*, u32, void*);
+SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec*);
+SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec*);
+SQLITE_PRIVATE int sqlite3BitvecBuiltinTest(int,int*);
+
+SQLITE_PRIVATE RowSet *sqlite3RowSetInit(sqlite3*, void*, unsigned int);
+SQLITE_PRIVATE void sqlite3RowSetClear(RowSet*);
+SQLITE_PRIVATE void sqlite3RowSetInsert(RowSet*, i64);
+SQLITE_PRIVATE int sqlite3RowSetTest(RowSet*, int iBatch, i64);
+SQLITE_PRIVATE int sqlite3RowSetNext(RowSet*, i64*);
+
+SQLITE_PRIVATE void sqlite3CreateView(Parse*,Token*,Token*,Token*,Select*,int,int);
+
+#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE)
+SQLITE_PRIVATE   int sqlite3ViewGetColumnNames(Parse*,Table*);
+#else
+# define sqlite3ViewGetColumnNames(A,B) 0
+#endif
+
+#if SQLITE_MAX_ATTACHED>30
+SQLITE_PRIVATE   int sqlite3DbMaskAllZero(yDbMask);
+#endif
+SQLITE_PRIVATE void sqlite3DropTable(Parse*, SrcList*, int, int);
+SQLITE_PRIVATE void sqlite3CodeDropTable(Parse*, Table*, int, int);
+SQLITE_PRIVATE void sqlite3DeleteTable(sqlite3*, Table*);
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+SQLITE_PRIVATE   void sqlite3AutoincrementBegin(Parse *pParse);
+SQLITE_PRIVATE   void sqlite3AutoincrementEnd(Parse *pParse);
+#else
+# define sqlite3AutoincrementBegin(X)
+# define sqlite3AutoincrementEnd(X)
+#endif
+SQLITE_PRIVATE void sqlite3Insert(Parse*, SrcList*, Select*, IdList*, int);
+SQLITE_PRIVATE void *sqlite3ArrayAllocate(sqlite3*,void*,int,int*,int*);
+SQLITE_PRIVATE IdList *sqlite3IdListAppend(sqlite3*, IdList*, Token*);
+SQLITE_PRIVATE int sqlite3IdListIndex(IdList*,const char*);
+SQLITE_PRIVATE SrcList *sqlite3SrcListEnlarge(sqlite3*, SrcList*, int, int);
+SQLITE_PRIVATE SrcList *sqlite3SrcListAppend(sqlite3*, SrcList*, Token*, Token*);
+SQLITE_PRIVATE SrcList *sqlite3SrcListAppendFromTerm(Parse*, SrcList*, Token*, Token*,
+                                      Token*, Select*, Expr*, IdList*);
+SQLITE_PRIVATE void sqlite3SrcListIndexedBy(Parse *, SrcList *, Token *);
+SQLITE_PRIVATE int sqlite3IndexedByLookup(Parse *, struct SrcList_item *);
+SQLITE_PRIVATE void sqlite3SrcListShiftJoinType(SrcList*);
+SQLITE_PRIVATE void sqlite3SrcListAssignCursors(Parse*, SrcList*);
+SQLITE_PRIVATE void sqlite3IdListDelete(sqlite3*, IdList*);
+SQLITE_PRIVATE void sqlite3SrcListDelete(sqlite3*, SrcList*);
+SQLITE_PRIVATE Index *sqlite3AllocateIndexObject(sqlite3*,i16,int,char**);
+SQLITE_PRIVATE Index *sqlite3CreateIndex(Parse*,Token*,Token*,SrcList*,ExprList*,int,Token*,
+                          Expr*, int, int);
+SQLITE_PRIVATE void sqlite3DropIndex(Parse*, SrcList*, int);
+SQLITE_PRIVATE int sqlite3Select(Parse*, Select*, SelectDest*);
+SQLITE_PRIVATE Select *sqlite3SelectNew(Parse*,ExprList*,SrcList*,Expr*,ExprList*,
+                         Expr*,ExprList*,u16,Expr*,Expr*);
+SQLITE_PRIVATE void sqlite3SelectDelete(sqlite3*, Select*);
+SQLITE_PRIVATE Table *sqlite3SrcListLookup(Parse*, SrcList*);
+SQLITE_PRIVATE int sqlite3IsReadOnly(Parse*, Table*, int);
+SQLITE_PRIVATE void sqlite3OpenTable(Parse*, int iCur, int iDb, Table*, int);
+#if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY)
+SQLITE_PRIVATE Expr *sqlite3LimitWhere(Parse*,SrcList*,Expr*,ExprList*,Expr*,Expr*,char*);
+#endif
+SQLITE_PRIVATE void sqlite3DeleteFrom(Parse*, SrcList*, Expr*);
+SQLITE_PRIVATE void sqlite3Update(Parse*, SrcList*, ExprList*, Expr*, int);
+SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(Parse*,SrcList*,Expr*,ExprList*,ExprList*,u16,int);
+SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo*);
+SQLITE_PRIVATE u64 sqlite3WhereOutputRowCount(WhereInfo*);
+SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo*);
+SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo*);
+SQLITE_PRIVATE int sqlite3WhereIsSorted(WhereInfo*);
+SQLITE_PRIVATE int sqlite3WhereContinueLabel(WhereInfo*);
+SQLITE_PRIVATE int sqlite3WhereBreakLabel(WhereInfo*);
+SQLITE_PRIVATE int sqlite3WhereOkOnePass(WhereInfo*, int*);
+SQLITE_PRIVATE int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int, u8);
+SQLITE_PRIVATE void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int);
+SQLITE_PRIVATE void sqlite3ExprCodeMove(Parse*, int, int, int);
+SQLITE_PRIVATE void sqlite3ExprCacheStore(Parse*, int, int, int);
+SQLITE_PRIVATE void sqlite3ExprCachePush(Parse*);
+SQLITE_PRIVATE void sqlite3ExprCachePop(Parse*);
+SQLITE_PRIVATE void sqlite3ExprCacheRemove(Parse*, int, int);
+SQLITE_PRIVATE void sqlite3ExprCacheClear(Parse*);
+SQLITE_PRIVATE void sqlite3ExprCacheAffinityChange(Parse*, int, int);
+SQLITE_PRIVATE void sqlite3ExprCode(Parse*, Expr*, int);
+SQLITE_PRIVATE void sqlite3ExprCodeFactorable(Parse*, Expr*, int);
+SQLITE_PRIVATE void sqlite3ExprCodeAtInit(Parse*, Expr*, int, u8);
+SQLITE_PRIVATE int sqlite3ExprCodeTemp(Parse*, Expr*, int*);
+SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse*, Expr*, int);
+SQLITE_PRIVATE void sqlite3ExprCodeAndCache(Parse*, Expr*, int);
+SQLITE_PRIVATE int sqlite3ExprCodeExprList(Parse*, ExprList*, int, u8);
+#define SQLITE_ECEL_DUP      0x01  /* Deep, not shallow copies */
+#define SQLITE_ECEL_FACTOR   0x02  /* Factor out constant terms */
+SQLITE_PRIVATE void sqlite3ExprIfTrue(Parse*, Expr*, int, int);
+SQLITE_PRIVATE void sqlite3ExprIfFalse(Parse*, Expr*, int, int);
+SQLITE_PRIVATE Table *sqlite3FindTable(sqlite3*,const char*, const char*);
+SQLITE_PRIVATE Table *sqlite3LocateTable(Parse*,int isView,const char*, const char*);
+SQLITE_PRIVATE Table *sqlite3LocateTableItem(Parse*,int isView,struct SrcList_item *);
+SQLITE_PRIVATE Index *sqlite3FindIndex(sqlite3*,const char*, const char*);
+SQLITE_PRIVATE void sqlite3UnlinkAndDeleteTable(sqlite3*,int,const char*);
+SQLITE_PRIVATE void sqlite3UnlinkAndDeleteIndex(sqlite3*,int,const char*);
+SQLITE_PRIVATE void sqlite3Vacuum(Parse*);
+SQLITE_PRIVATE int sqlite3RunVacuum(char**, sqlite3*);
+SQLITE_PRIVATE char *sqlite3NameFromToken(sqlite3*, Token*);
+SQLITE_PRIVATE int sqlite3ExprCompare(Expr*, Expr*, int);
+SQLITE_PRIVATE int sqlite3ExprListCompare(ExprList*, ExprList*, int);
+SQLITE_PRIVATE int sqlite3ExprImpliesExpr(Expr*, Expr*, int);
+SQLITE_PRIVATE void sqlite3ExprAnalyzeAggregates(NameContext*, Expr*);
+SQLITE_PRIVATE void sqlite3ExprAnalyzeAggList(NameContext*,ExprList*);
+SQLITE_PRIVATE int sqlite3FunctionUsesThisSrc(Expr*, SrcList*);
+SQLITE_PRIVATE Vdbe *sqlite3GetVdbe(Parse*);
+SQLITE_PRIVATE void sqlite3PrngSaveState(void);
+SQLITE_PRIVATE void sqlite3PrngRestoreState(void);
+SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3*,int);
+SQLITE_PRIVATE void sqlite3CodeVerifySchema(Parse*, int);
+SQLITE_PRIVATE void sqlite3CodeVerifyNamedSchema(Parse*, const char *zDb);
+SQLITE_PRIVATE void sqlite3BeginTransaction(Parse*, int);
+SQLITE_PRIVATE void sqlite3CommitTransaction(Parse*);
+SQLITE_PRIVATE void sqlite3RollbackTransaction(Parse*);
+SQLITE_PRIVATE void sqlite3Savepoint(Parse*, int, Token*);
+SQLITE_PRIVATE void sqlite3CloseSavepoints(sqlite3 *);
+SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3*);
+SQLITE_PRIVATE int sqlite3ExprIsConstant(Expr*);
+SQLITE_PRIVATE int sqlite3ExprIsConstantNotJoin(Expr*);
+SQLITE_PRIVATE int sqlite3ExprIsConstantOrFunction(Expr*, u8);
+SQLITE_PRIVATE int sqlite3ExprIsTableConstant(Expr*,int);
+SQLITE_PRIVATE int sqlite3ExprIsInteger(Expr*, int*);
+SQLITE_PRIVATE int sqlite3ExprCanBeNull(const Expr*);
+SQLITE_PRIVATE int sqlite3ExprNeedsNoAffinityChange(const Expr*, char);
+SQLITE_PRIVATE int sqlite3IsRowid(const char*);
+SQLITE_PRIVATE void sqlite3GenerateRowDelete(Parse*,Table*,Trigger*,int,int,int,i16,u8,u8,u8);
+SQLITE_PRIVATE void sqlite3GenerateRowIndexDelete(Parse*, Table*, int, int, int*);
+SQLITE_PRIVATE int sqlite3GenerateIndexKey(Parse*, Index*, int, int, int, int*,Index*,int);
+SQLITE_PRIVATE void sqlite3ResolvePartIdxLabel(Parse*,int);
+SQLITE_PRIVATE void sqlite3GenerateConstraintChecks(Parse*,Table*,int*,int,int,int,int,
+                                     u8,u8,int,int*);
+SQLITE_PRIVATE void sqlite3CompleteInsertion(Parse*,Table*,int,int,int,int*,int,int,int);
+SQLITE_PRIVATE int sqlite3OpenTableAndIndices(Parse*, Table*, int, int, u8*, int*, int*);
+SQLITE_PRIVATE void sqlite3BeginWriteOperation(Parse*, int, int);
+SQLITE_PRIVATE void sqlite3MultiWrite(Parse*);
+SQLITE_PRIVATE void sqlite3MayAbort(Parse*);
+SQLITE_PRIVATE void sqlite3HaltConstraint(Parse*, int, int, char*, i8, u8);
+SQLITE_PRIVATE void sqlite3UniqueConstraint(Parse*, int, Index*);
+SQLITE_PRIVATE void sqlite3RowidConstraint(Parse*, int, Table*);
+SQLITE_PRIVATE Expr *sqlite3ExprDup(sqlite3*,Expr*,int);
+SQLITE_PRIVATE ExprList *sqlite3ExprListDup(sqlite3*,ExprList*,int);
+SQLITE_PRIVATE SrcList *sqlite3SrcListDup(sqlite3*,SrcList*,int);
+SQLITE_PRIVATE IdList *sqlite3IdListDup(sqlite3*,IdList*);
+SQLITE_PRIVATE Select *sqlite3SelectDup(sqlite3*,Select*,int);
+#if SELECTTRACE_ENABLED
+SQLITE_PRIVATE void sqlite3SelectSetName(Select*,const char*);
+#else
+# define sqlite3SelectSetName(A,B)
+#endif
+SQLITE_PRIVATE void sqlite3FuncDefInsert(FuncDefHash*, FuncDef*);
+SQLITE_PRIVATE FuncDef *sqlite3FindFunction(sqlite3*,const char*,int,int,u8,u8);
+SQLITE_PRIVATE void sqlite3RegisterBuiltinFunctions(sqlite3*);
+SQLITE_PRIVATE void sqlite3RegisterDateTimeFunctions(void);
+SQLITE_PRIVATE void sqlite3RegisterGlobalFunctions(void);
+SQLITE_PRIVATE int sqlite3SafetyCheckOk(sqlite3*);
+SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(sqlite3*);
+SQLITE_PRIVATE void sqlite3ChangeCookie(Parse*, int);
+
+#if !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER)
+SQLITE_PRIVATE void sqlite3MaterializeView(Parse*, Table*, Expr*, int);
+#endif
+
+#ifndef SQLITE_OMIT_TRIGGER
+SQLITE_PRIVATE   void sqlite3BeginTrigger(Parse*, Token*,Token*,int,int,IdList*,SrcList*,
+                           Expr*,int, int);
+SQLITE_PRIVATE   void sqlite3FinishTrigger(Parse*, TriggerStep*, Token*);
+SQLITE_PRIVATE   void sqlite3DropTrigger(Parse*, SrcList*, int);
+SQLITE_PRIVATE   void sqlite3DropTriggerPtr(Parse*, Trigger*);
+SQLITE_PRIVATE   Trigger *sqlite3TriggersExist(Parse *, Table*, int, ExprList*, int *pMask);
+SQLITE_PRIVATE   Trigger *sqlite3TriggerList(Parse *, Table *);
+SQLITE_PRIVATE   void sqlite3CodeRowTrigger(Parse*, Trigger *, int, ExprList*, int, Table *,
+                            int, int, int);
+SQLITE_PRIVATE   void sqlite3CodeRowTriggerDirect(Parse *, Trigger *, Table *, int, int, int);
+  void sqliteViewTriggers(Parse*, Table*, Expr*, int, ExprList*);
+SQLITE_PRIVATE   void sqlite3DeleteTriggerStep(sqlite3*, TriggerStep*);
+SQLITE_PRIVATE   TriggerStep *sqlite3TriggerSelectStep(sqlite3*,Select*);
+SQLITE_PRIVATE   TriggerStep *sqlite3TriggerInsertStep(sqlite3*,Token*, IdList*,
+                                        Select*,u8);
+SQLITE_PRIVATE   TriggerStep *sqlite3TriggerUpdateStep(sqlite3*,Token*,ExprList*, Expr*, u8);
+SQLITE_PRIVATE   TriggerStep *sqlite3TriggerDeleteStep(sqlite3*,Token*, Expr*);
+SQLITE_PRIVATE   void sqlite3DeleteTrigger(sqlite3*, Trigger*);
+SQLITE_PRIVATE   void sqlite3UnlinkAndDeleteTrigger(sqlite3*,int,const char*);
+SQLITE_PRIVATE   u32 sqlite3TriggerColmask(Parse*,Trigger*,ExprList*,int,int,Table*,int);
+# define sqlite3ParseToplevel(p) ((p)->pToplevel ? (p)->pToplevel : (p))
+#else
+# define sqlite3TriggersExist(B,C,D,E,F) 0
+# define sqlite3DeleteTrigger(A,B)
+# define sqlite3DropTriggerPtr(A,B)
+# define sqlite3UnlinkAndDeleteTrigger(A,B,C)
+# define sqlite3CodeRowTrigger(A,B,C,D,E,F,G,H,I)
+# define sqlite3CodeRowTriggerDirect(A,B,C,D,E,F)
+# define sqlite3TriggerList(X, Y) 0
+# define sqlite3ParseToplevel(p) p
+# define sqlite3TriggerColmask(A,B,C,D,E,F,G) 0
+#endif
+
+SQLITE_PRIVATE int sqlite3JoinType(Parse*, Token*, Token*, Token*);
+SQLITE_PRIVATE void sqlite3CreateForeignKey(Parse*, ExprList*, Token*, ExprList*, int);
+SQLITE_PRIVATE void sqlite3DeferForeignKey(Parse*, int);
+#ifndef SQLITE_OMIT_AUTHORIZATION
+SQLITE_PRIVATE   void sqlite3AuthRead(Parse*,Expr*,Schema*,SrcList*);
+SQLITE_PRIVATE   int sqlite3AuthCheck(Parse*,int, const char*, const char*, const char*);
+SQLITE_PRIVATE   void sqlite3AuthContextPush(Parse*, AuthContext*, const char*);
+SQLITE_PRIVATE   void sqlite3AuthContextPop(AuthContext*);
+SQLITE_PRIVATE   int sqlite3AuthReadCol(Parse*, const char *, const char *, int);
+#else
+# define sqlite3AuthRead(a,b,c,d)
+# define sqlite3AuthCheck(a,b,c,d,e)    SQLITE_OK
+# define sqlite3AuthContextPush(a,b,c)
+# define sqlite3AuthContextPop(a)  ((void)(a))
+#endif
+SQLITE_PRIVATE void sqlite3Attach(Parse*, Expr*, Expr*, Expr*);
+SQLITE_PRIVATE void sqlite3Detach(Parse*, Expr*);
+SQLITE_PRIVATE void sqlite3FixInit(DbFixer*, Parse*, int, const char*, const Token*);
+SQLITE_PRIVATE int sqlite3FixSrcList(DbFixer*, SrcList*);
+SQLITE_PRIVATE int sqlite3FixSelect(DbFixer*, Select*);
+SQLITE_PRIVATE int sqlite3FixExpr(DbFixer*, Expr*);
+SQLITE_PRIVATE int sqlite3FixExprList(DbFixer*, ExprList*);
+SQLITE_PRIVATE int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
+SQLITE_PRIVATE int sqlite3AtoF(const char *z, double*, int, u8);
+SQLITE_PRIVATE int sqlite3GetInt32(const char *, int*);
+SQLITE_PRIVATE int sqlite3Atoi(const char*);
+SQLITE_PRIVATE int sqlite3Utf16ByteLen(const void *pData, int nChar);
+SQLITE_PRIVATE int sqlite3Utf8CharLen(const char *pData, int nByte);
+SQLITE_PRIVATE u32 sqlite3Utf8Read(const u8**);
+SQLITE_PRIVATE LogEst sqlite3LogEst(u64);
+SQLITE_PRIVATE LogEst sqlite3LogEstAdd(LogEst,LogEst);
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+SQLITE_PRIVATE LogEst sqlite3LogEstFromDouble(double);
+#endif
+SQLITE_PRIVATE u64 sqlite3LogEstToInt(LogEst);
+
+/*
+** Routines to read and write variable-length integers.  These used to
+** be defined locally, but now we use the varint routines in the util.c
+** file.
+*/
+SQLITE_PRIVATE int sqlite3PutVarint(unsigned char*, u64);
+SQLITE_PRIVATE u8 sqlite3GetVarint(const unsigned char *, u64 *);
+SQLITE_PRIVATE u8 sqlite3GetVarint32(const unsigned char *, u32 *);
+SQLITE_PRIVATE int sqlite3VarintLen(u64 v);
+
+/*
+** The common case is for a varint to be a single byte.  They following
+** macros handle the common case without a procedure call, but then call
+** the procedure for larger varints.
+*/
+#define getVarint32(A,B)  \
+  (u8)((*(A)<(u8)0x80)?((B)=(u32)*(A)),1:sqlite3GetVarint32((A),(u32 *)&(B)))
+#define putVarint32(A,B)  \
+  (u8)(((u32)(B)<(u32)0x80)?(*(A)=(unsigned char)(B)),1:\
+  sqlite3PutVarint((A),(B)))
+#define getVarint    sqlite3GetVarint
+#define putVarint    sqlite3PutVarint
+
+
+SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(Vdbe *, Index *);
+SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe*, Table*, int);
+SQLITE_PRIVATE char sqlite3CompareAffinity(Expr *pExpr, char aff2);
+SQLITE_PRIVATE int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity);
+SQLITE_PRIVATE char sqlite3ExprAffinity(Expr *pExpr);
+SQLITE_PRIVATE int sqlite3Atoi64(const char*, i64*, int, u8);
+SQLITE_PRIVATE int sqlite3DecOrHexToI64(const char*, i64*);
+SQLITE_PRIVATE void sqlite3ErrorWithMsg(sqlite3*, int, const char*,...);
+SQLITE_PRIVATE void sqlite3Error(sqlite3*,int);
+SQLITE_PRIVATE void *sqlite3HexToBlob(sqlite3*, const char *z, int n);
+SQLITE_PRIVATE u8 sqlite3HexToInt(int h);
+SQLITE_PRIVATE int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
+
+#if defined(SQLITE_TEST) 
+SQLITE_PRIVATE const char *sqlite3ErrName(int);
+#endif
+
+SQLITE_PRIVATE const char *sqlite3ErrStr(int);
+SQLITE_PRIVATE int sqlite3ReadSchema(Parse *pParse);
+SQLITE_PRIVATE CollSeq *sqlite3FindCollSeq(sqlite3*,u8 enc, const char*,int);
+SQLITE_PRIVATE CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char*zName);
+SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
+SQLITE_PRIVATE Expr *sqlite3ExprAddCollateToken(Parse *pParse, Expr*, const Token*);
+SQLITE_PRIVATE Expr *sqlite3ExprAddCollateString(Parse*,Expr*,const char*);
+SQLITE_PRIVATE Expr *sqlite3ExprSkipCollate(Expr*);
+SQLITE_PRIVATE int sqlite3CheckCollSeq(Parse *, CollSeq *);
+SQLITE_PRIVATE int sqlite3CheckObjectName(Parse *, const char *);
+SQLITE_PRIVATE void sqlite3VdbeSetChanges(sqlite3 *, int);
+SQLITE_PRIVATE int sqlite3AddInt64(i64*,i64);
+SQLITE_PRIVATE int sqlite3SubInt64(i64*,i64);
+SQLITE_PRIVATE int sqlite3MulInt64(i64*,i64);
+SQLITE_PRIVATE int sqlite3AbsInt32(int);
+#ifdef SQLITE_ENABLE_8_3_NAMES
+SQLITE_PRIVATE void sqlite3FileSuffix3(const char*, char*);
+#else
+# define sqlite3FileSuffix3(X,Y)
+#endif
+SQLITE_PRIVATE u8 sqlite3GetBoolean(const char *z,u8);
+
+SQLITE_PRIVATE const void *sqlite3ValueText(sqlite3_value*, u8);
+SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value*, u8);
+SQLITE_PRIVATE void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, 
+                        void(*)(void*));
+SQLITE_PRIVATE void sqlite3ValueSetNull(sqlite3_value*);
+SQLITE_PRIVATE void sqlite3ValueFree(sqlite3_value*);
+SQLITE_PRIVATE sqlite3_value *sqlite3ValueNew(sqlite3 *);
+SQLITE_PRIVATE char *sqlite3Utf16to8(sqlite3 *, const void*, int, u8);
+SQLITE_PRIVATE int sqlite3ValueFromExpr(sqlite3 *, Expr *, u8, u8, sqlite3_value **);
+SQLITE_PRIVATE void sqlite3ValueApplyAffinity(sqlite3_value *, u8, u8);
+#ifndef SQLITE_AMALGAMATION
+SQLITE_PRIVATE const unsigned char sqlite3OpcodeProperty[];
+SQLITE_PRIVATE const unsigned char sqlite3UpperToLower[];
+SQLITE_PRIVATE const unsigned char sqlite3CtypeMap[];
+SQLITE_PRIVATE const Token sqlite3IntTokens[];
+SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config;
+SQLITE_PRIVATE SQLITE_WSD FuncDefHash sqlite3GlobalFunctions;
+#ifndef SQLITE_OMIT_WSD
+SQLITE_PRIVATE int sqlite3PendingByte;
+#endif
+#endif
+SQLITE_PRIVATE void sqlite3RootPageMoved(sqlite3*, int, int, int);
+SQLITE_PRIVATE void sqlite3Reindex(Parse*, Token*, Token*);
+SQLITE_PRIVATE void sqlite3AlterFunctions(void);
+SQLITE_PRIVATE void sqlite3AlterRenameTable(Parse*, SrcList*, Token*);
+SQLITE_PRIVATE int sqlite3GetToken(const unsigned char *, int *);
+SQLITE_PRIVATE void sqlite3NestedParse(Parse*, const char*, ...);
+SQLITE_PRIVATE void sqlite3ExpirePreparedStatements(sqlite3*);
+SQLITE_PRIVATE int sqlite3CodeSubselect(Parse *, Expr *, int, int);
+SQLITE_PRIVATE void sqlite3SelectPrep(Parse*, Select*, NameContext*);
+SQLITE_PRIVATE int sqlite3MatchSpanName(const char*, const char*, const char*, const char*);
+SQLITE_PRIVATE int sqlite3ResolveExprNames(NameContext*, Expr*);
+SQLITE_PRIVATE void sqlite3ResolveSelectNames(Parse*, Select*, NameContext*);
+SQLITE_PRIVATE void sqlite3ResolveSelfReference(Parse*,Table*,int,Expr*,ExprList*);
+SQLITE_PRIVATE int sqlite3ResolveOrderGroupBy(Parse*, Select*, ExprList*, const char*);
+SQLITE_PRIVATE void sqlite3ColumnDefault(Vdbe *, Table *, int, int);
+SQLITE_PRIVATE void sqlite3AlterFinishAddColumn(Parse *, Token *);
+SQLITE_PRIVATE void sqlite3AlterBeginAddColumn(Parse *, SrcList *);
+SQLITE_PRIVATE CollSeq *sqlite3GetCollSeq(Parse*, u8, CollSeq *, const char*);
+SQLITE_PRIVATE char sqlite3AffinityType(const char*, u8*);
+SQLITE_PRIVATE void sqlite3Analyze(Parse*, Token*, Token*);
+SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler*);
+SQLITE_PRIVATE int sqlite3FindDb(sqlite3*, Token*);
+SQLITE_PRIVATE int sqlite3FindDbName(sqlite3 *, const char *);
+SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3*,int iDB);
+SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3*,Index*);
+SQLITE_PRIVATE void sqlite3DefaultRowEst(Index*);
+SQLITE_PRIVATE void sqlite3RegisterLikeFunctions(sqlite3*, int);
+SQLITE_PRIVATE int sqlite3IsLikeFunction(sqlite3*,Expr*,int*,char*);
+SQLITE_PRIVATE void sqlite3MinimumFileFormat(Parse*, int, int);
+SQLITE_PRIVATE void sqlite3SchemaClear(void *);
+SQLITE_PRIVATE Schema *sqlite3SchemaGet(sqlite3 *, Btree *);
+SQLITE_PRIVATE int sqlite3SchemaToIndex(sqlite3 *db, Schema *);
+SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoAlloc(sqlite3*,int,int);
+SQLITE_PRIVATE void sqlite3KeyInfoUnref(KeyInfo*);
+SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoRef(KeyInfo*);
+SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoOfIndex(Parse*, Index*);
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE int sqlite3KeyInfoIsWriteable(KeyInfo*);
+#endif
+SQLITE_PRIVATE int sqlite3CreateFunc(sqlite3 *, const char *, int, int, void *, 
+  void (*)(sqlite3_context*,int,sqlite3_value **),
+  void (*)(sqlite3_context*,int,sqlite3_value **), void (*)(sqlite3_context*),
+  FuncDestructor *pDestructor
+);
+SQLITE_PRIVATE int sqlite3ApiExit(sqlite3 *db, int);
+SQLITE_PRIVATE int sqlite3OpenTempDatabase(Parse *);
+
+SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum*, char*, int, int);
+SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum*,const char*,int);
+SQLITE_PRIVATE void sqlite3StrAccumAppendAll(StrAccum*,const char*);
+SQLITE_PRIVATE void sqlite3AppendChar(StrAccum*,int,char);
+SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum*);
+SQLITE_PRIVATE void sqlite3StrAccumReset(StrAccum*);
+SQLITE_PRIVATE void sqlite3SelectDestInit(SelectDest*,int,int);
+SQLITE_PRIVATE Expr *sqlite3CreateColumnExpr(sqlite3 *, SrcList *, int, int);
+
+SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *);
+SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *, Pgno, const u8 *);
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+SQLITE_PRIVATE void sqlite3AnalyzeFunctions(void);
+SQLITE_PRIVATE int sqlite3Stat4ProbeSetValue(Parse*,Index*,UnpackedRecord**,Expr*,u8,int,int*);
+SQLITE_PRIVATE int sqlite3Stat4ValueFromExpr(Parse*, Expr*, u8, sqlite3_value**);
+SQLITE_PRIVATE void sqlite3Stat4ProbeFree(UnpackedRecord*);
+SQLITE_PRIVATE int sqlite3Stat4Column(sqlite3*, const void*, int, int, sqlite3_value**);
+#endif
+
+/*
+** The interface to the LEMON-generated parser
+*/
+SQLITE_PRIVATE void *sqlite3ParserAlloc(void*(*)(u64));
+SQLITE_PRIVATE void sqlite3ParserFree(void*, void(*)(void*));
+SQLITE_PRIVATE void sqlite3Parser(void*, int, Token, Parse*);
+#ifdef YYTRACKMAXSTACKDEPTH
+SQLITE_PRIVATE   int sqlite3ParserStackPeak(void*);
+#endif
+
+SQLITE_PRIVATE void sqlite3AutoLoadExtensions(sqlite3*);
+#ifndef SQLITE_OMIT_LOAD_EXTENSION
+SQLITE_PRIVATE   void sqlite3CloseExtensions(sqlite3*);
+#else
+# define sqlite3CloseExtensions(X)
+#endif
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+SQLITE_PRIVATE   void sqlite3TableLock(Parse *, int, int, u8, const char *);
+#else
+  #define sqlite3TableLock(v,w,x,y,z)
+#endif
+
+#ifdef SQLITE_TEST
+SQLITE_PRIVATE   int sqlite3Utf8To8(unsigned char*);
+#endif
+
+#ifdef SQLITE_OMIT_VIRTUALTABLE
+#  define sqlite3VtabClear(Y)
+#  define sqlite3VtabSync(X,Y) SQLITE_OK
+#  define sqlite3VtabRollback(X)
+#  define sqlite3VtabCommit(X)
+#  define sqlite3VtabInSync(db) 0
+#  define sqlite3VtabLock(X) 
+#  define sqlite3VtabUnlock(X)
+#  define sqlite3VtabUnlockList(X)
+#  define sqlite3VtabSavepoint(X, Y, Z) SQLITE_OK
+#  define sqlite3GetVTable(X,Y)  ((VTable*)0)
+#else
+SQLITE_PRIVATE    void sqlite3VtabClear(sqlite3 *db, Table*);
+SQLITE_PRIVATE    void sqlite3VtabDisconnect(sqlite3 *db, Table *p);
+SQLITE_PRIVATE    int sqlite3VtabSync(sqlite3 *db, Vdbe*);
+SQLITE_PRIVATE    int sqlite3VtabRollback(sqlite3 *db);
+SQLITE_PRIVATE    int sqlite3VtabCommit(sqlite3 *db);
+SQLITE_PRIVATE    void sqlite3VtabLock(VTable *);
+SQLITE_PRIVATE    void sqlite3VtabUnlock(VTable *);
+SQLITE_PRIVATE    void sqlite3VtabUnlockList(sqlite3*);
+SQLITE_PRIVATE    int sqlite3VtabSavepoint(sqlite3 *, int, int);
+SQLITE_PRIVATE    void sqlite3VtabImportErrmsg(Vdbe*, sqlite3_vtab*);
+SQLITE_PRIVATE    VTable *sqlite3GetVTable(sqlite3*, Table*);
+#  define sqlite3VtabInSync(db) ((db)->nVTrans>0 && (db)->aVTrans==0)
+#endif
+SQLITE_PRIVATE void sqlite3VtabMakeWritable(Parse*,Table*);
+SQLITE_PRIVATE void sqlite3VtabBeginParse(Parse*, Token*, Token*, Token*, int);
+SQLITE_PRIVATE void sqlite3VtabFinishParse(Parse*, Token*);
+SQLITE_PRIVATE void sqlite3VtabArgInit(Parse*);
+SQLITE_PRIVATE void sqlite3VtabArgExtend(Parse*, Token*);
+SQLITE_PRIVATE int sqlite3VtabCallCreate(sqlite3*, int, const char *, char **);
+SQLITE_PRIVATE int sqlite3VtabCallConnect(Parse*, Table*);
+SQLITE_PRIVATE int sqlite3VtabCallDestroy(sqlite3*, int, const char *);
+SQLITE_PRIVATE int sqlite3VtabBegin(sqlite3 *, VTable *);
+SQLITE_PRIVATE FuncDef *sqlite3VtabOverloadFunction(sqlite3 *,FuncDef*, int nArg, Expr*);
+SQLITE_PRIVATE void sqlite3InvalidFunction(sqlite3_context*,int,sqlite3_value**);
+SQLITE_PRIVATE sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context*);
+SQLITE_PRIVATE int sqlite3VdbeParameterIndex(Vdbe*, const char*, int);
+SQLITE_PRIVATE int sqlite3TransferBindings(sqlite3_stmt *, sqlite3_stmt *);
+SQLITE_PRIVATE void sqlite3ParserReset(Parse*);
+SQLITE_PRIVATE int sqlite3Reprepare(Vdbe*);
+SQLITE_PRIVATE void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*);
+SQLITE_PRIVATE CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *);
+SQLITE_PRIVATE int sqlite3TempInMemory(const sqlite3*);
+SQLITE_PRIVATE const char *sqlite3JournalModename(int);
+#ifndef SQLITE_OMIT_WAL
+SQLITE_PRIVATE   int sqlite3Checkpoint(sqlite3*, int, int, int*, int*);
+SQLITE_PRIVATE   int sqlite3WalDefaultHook(void*,sqlite3*,const char*,int);
+#endif
+#ifndef SQLITE_OMIT_CTE
+SQLITE_PRIVATE   With *sqlite3WithAdd(Parse*,With*,Token*,ExprList*,Select*);
+SQLITE_PRIVATE   void sqlite3WithDelete(sqlite3*,With*);
+SQLITE_PRIVATE   void sqlite3WithPush(Parse*, With*, u8);
+#else
+#define sqlite3WithPush(x,y,z)
+#define sqlite3WithDelete(x,y)
+#endif
+
+/* Declarations for functions in fkey.c. All of these are replaced by
+** no-op macros if OMIT_FOREIGN_KEY is defined. In this case no foreign
+** key functionality is available. If OMIT_TRIGGER is defined but
+** OMIT_FOREIGN_KEY is not, only some of the functions are no-oped. In
+** this case foreign keys are parsed, but no other functionality is 
+** provided (enforcement of FK constraints requires the triggers sub-system).
+*/
+#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER)
+SQLITE_PRIVATE   void sqlite3FkCheck(Parse*, Table*, int, int, int*, int);
+SQLITE_PRIVATE   void sqlite3FkDropTable(Parse*, SrcList *, Table*);
+SQLITE_PRIVATE   void sqlite3FkActions(Parse*, Table*, ExprList*, int, int*, int);
+SQLITE_PRIVATE   int sqlite3FkRequired(Parse*, Table*, int*, int);
+SQLITE_PRIVATE   u32 sqlite3FkOldmask(Parse*, Table*);
+SQLITE_PRIVATE   FKey *sqlite3FkReferences(Table *);
+#else
+  #define sqlite3FkActions(a,b,c,d,e,f)
+  #define sqlite3FkCheck(a,b,c,d,e,f)
+  #define sqlite3FkDropTable(a,b,c)
+  #define sqlite3FkOldmask(a,b)         0
+  #define sqlite3FkRequired(a,b,c,d)    0
+#endif
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+SQLITE_PRIVATE   void sqlite3FkDelete(sqlite3 *, Table*);
+SQLITE_PRIVATE   int sqlite3FkLocateIndex(Parse*,Table*,FKey*,Index**,int**);
+#else
+  #define sqlite3FkDelete(a,b)
+  #define sqlite3FkLocateIndex(a,b,c,d,e)
+#endif
+
+
+/*
+** Available fault injectors.  Should be numbered beginning with 0.
+*/
+#define SQLITE_FAULTINJECTOR_MALLOC     0
+#define SQLITE_FAULTINJECTOR_COUNT      1
+
+/*
+** The interface to the code in fault.c used for identifying "benign"
+** malloc failures. This is only present if SQLITE_OMIT_BUILTIN_TEST
+** is not defined.
+*/
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+SQLITE_PRIVATE   void sqlite3BeginBenignMalloc(void);
+SQLITE_PRIVATE   void sqlite3EndBenignMalloc(void);
+#else
+  #define sqlite3BeginBenignMalloc()
+  #define sqlite3EndBenignMalloc()
+#endif
+
+/*
+** Allowed return values from sqlite3FindInIndex()
+*/
+#define IN_INDEX_ROWID        1   /* Search the rowid of the table */
+#define IN_INDEX_EPH          2   /* Search an ephemeral b-tree */
+#define IN_INDEX_INDEX_ASC    3   /* Existing index ASCENDING */
+#define IN_INDEX_INDEX_DESC   4   /* Existing index DESCENDING */
+#define IN_INDEX_NOOP         5   /* No table available. Use comparisons */
+/*
+** Allowed flags for the 3rd parameter to sqlite3FindInIndex().
+*/
+#define IN_INDEX_NOOP_OK     0x0001  /* OK to return IN_INDEX_NOOP */
+#define IN_INDEX_MEMBERSHIP  0x0002  /* IN operator used for membership test */
+#define IN_INDEX_LOOP        0x0004  /* IN operator used as a loop */
+SQLITE_PRIVATE int sqlite3FindInIndex(Parse *, Expr *, u32, int*);
+
+#ifdef SQLITE_ENABLE_ATOMIC_WRITE
+SQLITE_PRIVATE   int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int);
+SQLITE_PRIVATE   int sqlite3JournalSize(sqlite3_vfs *);
+SQLITE_PRIVATE   int sqlite3JournalCreate(sqlite3_file *);
+SQLITE_PRIVATE   int sqlite3JournalExists(sqlite3_file *p);
+#else
+  #define sqlite3JournalSize(pVfs) ((pVfs)->szOsFile)
+  #define sqlite3JournalExists(p) 1
+#endif
+
+SQLITE_PRIVATE void sqlite3MemJournalOpen(sqlite3_file *);
+SQLITE_PRIVATE int sqlite3MemJournalSize(void);
+SQLITE_PRIVATE int sqlite3IsMemJournal(sqlite3_file *);
+
+#if SQLITE_MAX_EXPR_DEPTH>0
+SQLITE_PRIVATE   void sqlite3ExprSetHeight(Parse *pParse, Expr *p);
+SQLITE_PRIVATE   int sqlite3SelectExprHeight(Select *);
+SQLITE_PRIVATE   int sqlite3ExprCheckHeight(Parse*, int);
+#else
+  #define sqlite3ExprSetHeight(x,y)
+  #define sqlite3SelectExprHeight(x) 0
+  #define sqlite3ExprCheckHeight(x,y)
+#endif
+
+SQLITE_PRIVATE u32 sqlite3Get4byte(const u8*);
+SQLITE_PRIVATE void sqlite3Put4byte(u8*, u32);
+
+#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY
+SQLITE_PRIVATE   void sqlite3ConnectionBlocked(sqlite3 *, sqlite3 *);
+SQLITE_PRIVATE   void sqlite3ConnectionUnlocked(sqlite3 *db);
+SQLITE_PRIVATE   void sqlite3ConnectionClosed(sqlite3 *db);
+#else
+  #define sqlite3ConnectionBlocked(x,y)
+  #define sqlite3ConnectionUnlocked(x)
+  #define sqlite3ConnectionClosed(x)
+#endif
+
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE   void sqlite3ParserTrace(FILE*, char *);
+#endif
+
+/*
+** If the SQLITE_ENABLE IOTRACE exists then the global variable
+** sqlite3IoTrace is a pointer to a printf-like routine used to
+** print I/O tracing messages. 
+*/
+#ifdef SQLITE_ENABLE_IOTRACE
+# define IOTRACE(A)  if( sqlite3IoTrace ){ sqlite3IoTrace A; }
+SQLITE_PRIVATE   void sqlite3VdbeIOTraceSql(Vdbe*);
+void (*sqlite3IoTrace)(const char*,...);
+#else
+# define IOTRACE(A)
+# define sqlite3VdbeIOTraceSql(X)
+#endif
+
+/*
+** These routines are available for the mem2.c debugging memory allocator
+** only.  They are used to verify that different "types" of memory
+** allocations are properly tracked by the system.
+**
+** sqlite3MemdebugSetType() sets the "type" of an allocation to one of
+** the MEMTYPE_* macros defined below.  The type must be a bitmask with
+** a single bit set.
+**
+** sqlite3MemdebugHasType() returns true if any of the bits in its second
+** argument match the type set by the previous sqlite3MemdebugSetType().
+** sqlite3MemdebugHasType() is intended for use inside assert() statements.
+**
+** sqlite3MemdebugNoType() returns true if none of the bits in its second
+** argument match the type set by the previous sqlite3MemdebugSetType().
+**
+** Perhaps the most important point is the difference between MEMTYPE_HEAP
+** and MEMTYPE_LOOKASIDE.  If an allocation is MEMTYPE_LOOKASIDE, that means
+** it might have been allocated by lookaside, except the allocation was
+** too large or lookaside was already full.  It is important to verify
+** that allocations that might have been satisfied by lookaside are not
+** passed back to non-lookaside free() routines.  Asserts such as the
+** example above are placed on the non-lookaside free() routines to verify
+** this constraint. 
+**
+** All of this is no-op for a production build.  It only comes into
+** play when the SQLITE_MEMDEBUG compile-time option is used.
+*/
+#ifdef SQLITE_MEMDEBUG
+SQLITE_PRIVATE   void sqlite3MemdebugSetType(void*,u8);
+SQLITE_PRIVATE   int sqlite3MemdebugHasType(void*,u8);
+SQLITE_PRIVATE   int sqlite3MemdebugNoType(void*,u8);
+#else
+# define sqlite3MemdebugSetType(X,Y)  /* no-op */
+# define sqlite3MemdebugHasType(X,Y)  1
+# define sqlite3MemdebugNoType(X,Y)   1
+#endif
+#define MEMTYPE_HEAP       0x01  /* General heap allocations */
+#define MEMTYPE_LOOKASIDE  0x02  /* Heap that might have been lookaside */
+#define MEMTYPE_SCRATCH    0x04  /* Scratch allocations */
+#define MEMTYPE_PCACHE     0x08  /* Page cache allocations */
+
+/*
+** Threading interface
+*/
+#if SQLITE_MAX_WORKER_THREADS>0
+SQLITE_PRIVATE int sqlite3ThreadCreate(SQLiteThread**,void*(*)(void*),void*);
+SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread*, void**);
+#endif
+
+#endif /* _SQLITEINT_H_ */
+
+/************** End of sqliteInt.h *******************************************/
+/************** Begin file global.c ******************************************/
+/*
+** 2008 June 13
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains definitions of global variables and constants.
+*/
+
+/* An array to map all upper-case characters into their corresponding
+** lower-case character. 
+**
+** SQLite only considers US-ASCII (or EBCDIC) characters.  We do not
+** handle case conversions for the UTF character set since the tables
+** involved are nearly as big or bigger than SQLite itself.
+*/
+SQLITE_PRIVATE const unsigned char sqlite3UpperToLower[] = {
+#ifdef SQLITE_ASCII
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
+     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
+     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
+    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
+    122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
+    108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
+    126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+    144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
+    162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
+    180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
+    198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
+    216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
+    234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
+    252,253,254,255
+#endif
+#ifdef SQLITE_EBCDIC
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 0x */
+     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 1x */
+     32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 2x */
+     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 3x */
+     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, /* 4x */
+     80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, /* 5x */
+     96, 97, 66, 67, 68, 69, 70, 71, 72, 73,106,107,108,109,110,111, /* 6x */
+    112, 81, 82, 83, 84, 85, 86, 87, 88, 89,122,123,124,125,126,127, /* 7x */
+    128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, /* 8x */
+    144,145,146,147,148,149,150,151,152,153,154,155,156,157,156,159, /* 9x */
+    160,161,162,163,164,165,166,167,168,169,170,171,140,141,142,175, /* Ax */
+    176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, /* Bx */
+    192,129,130,131,132,133,134,135,136,137,202,203,204,205,206,207, /* Cx */
+    208,145,146,147,148,149,150,151,152,153,218,219,220,221,222,223, /* Dx */
+    224,225,162,163,164,165,166,167,168,169,232,203,204,205,206,207, /* Ex */
+    239,240,241,242,243,244,245,246,247,248,249,219,220,221,222,255, /* Fx */
+#endif
+};
+
+/*
+** The following 256 byte lookup table is used to support SQLites built-in
+** equivalents to the following standard library functions:
+**
+**   isspace()                        0x01
+**   isalpha()                        0x02
+**   isdigit()                        0x04
+**   isalnum()                        0x06
+**   isxdigit()                       0x08
+**   toupper()                        0x20
+**   SQLite identifier character      0x40
+**
+** Bit 0x20 is set if the mapped character requires translation to upper
+** case. i.e. if the character is a lower-case ASCII character.
+** If x is a lower-case ASCII character, then its upper-case equivalent
+** is (x - 0x20). Therefore toupper() can be implemented as:
+**
+**   (x & ~(map[x]&0x20))
+**
+** Standard function tolower() is implemented using the sqlite3UpperToLower[]
+** array. tolower() is used more often than toupper() by SQLite.
+**
+** Bit 0x40 is set if the character non-alphanumeric and can be used in an 
+** SQLite identifier.  Identifiers are alphanumerics, "_", "$", and any
+** non-ASCII UTF character. Hence the test for whether or not a character is
+** part of an identifier is 0x46.
+**
+** SQLite's versions are identical to the standard versions assuming a
+** locale of "C". They are implemented as macros in sqliteInt.h.
+*/
+#ifdef SQLITE_ASCII
+SQLITE_PRIVATE const unsigned char sqlite3CtypeMap[256] = {
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /* 00..07    ........ */
+  0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,  /* 08..0f    ........ */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /* 10..17    ........ */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /* 18..1f    ........ */
+  0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,  /* 20..27     !"#$%&' */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /* 28..2f    ()*+,-./ */
+  0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,  /* 30..37    01234567 */
+  0x0c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  /* 38..3f    89:;<=>? */
+
+  0x00, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x02,  /* 40..47    @ABCDEFG */
+  0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  /* 48..4f    HIJKLMNO */
+  0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  /* 50..57    PQRSTUVW */
+  0x02, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x40,  /* 58..5f    XYZ[\]^_ */
+  0x00, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x22,  /* 60..67    `abcdefg */
+  0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,  /* 68..6f    hijklmno */
+  0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,  /* 70..77    pqrstuvw */
+  0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00,  /* 78..7f    xyz{|}~. */
+
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* 80..87    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* 88..8f    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* 90..97    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* 98..9f    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* a0..a7    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* a8..af    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* b0..b7    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* b8..bf    ........ */
+
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* c0..c7    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* c8..cf    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* d0..d7    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* d8..df    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* e0..e7    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* e8..ef    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  /* f0..f7    ........ */
+  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40   /* f8..ff    ........ */
+};
+#endif
+
+/* EVIDENCE-OF: R-02982-34736 In order to maintain full backwards
+** compatibility for legacy applications, the URI filename capability is
+** disabled by default.
+**
+** EVIDENCE-OF: R-38799-08373 URI filenames can be enabled or disabled
+** using the SQLITE_USE_URI=1 or SQLITE_USE_URI=0 compile-time options.
+**
+** EVIDENCE-OF: R-43642-56306 By default, URI handling is globally
+** disabled. The default value may be changed by compiling with the
+** SQLITE_USE_URI symbol defined.
+*/
+#ifndef SQLITE_USE_URI
+# define  SQLITE_USE_URI 0
+#endif
+
+/* EVIDENCE-OF: R-38720-18127 The default setting is determined by the
+** SQLITE_ALLOW_COVERING_INDEX_SCAN compile-time option, or is "on" if
+** that compile-time option is omitted.
+*/
+#ifndef SQLITE_ALLOW_COVERING_INDEX_SCAN
+# define SQLITE_ALLOW_COVERING_INDEX_SCAN 1
+#endif
+
+/* The minimum PMA size is set to this value multiplied by the database
+** page size in bytes.
+*/
+#ifndef SQLITE_SORTER_PMASZ
+# define SQLITE_SORTER_PMASZ 250
+#endif
+
+/*
+** The following singleton contains the global configuration for
+** the SQLite library.
+*/
+SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = {
+   SQLITE_DEFAULT_MEMSTATUS,  /* bMemstat */
+   1,                         /* bCoreMutex */
+   SQLITE_THREADSAFE==1,      /* bFullMutex */
+   SQLITE_USE_URI,            /* bOpenUri */
+   SQLITE_ALLOW_COVERING_INDEX_SCAN,   /* bUseCis */
+   0x7ffffffe,                /* mxStrlen */
+   0,                         /* neverCorrupt */
+   128,                       /* szLookaside */
+   500,                       /* nLookaside */
+   {0,0,0,0,0,0,0,0},         /* m */
+   {0,0,0,0,0,0,0,0,0},       /* mutex */
+   {0,0,0,0,0,0,0,0,0,0,0,0,0},/* pcache2 */
+   (void*)0,                  /* pHeap */
+   0,                         /* nHeap */
+   0, 0,                      /* mnHeap, mxHeap */
+   SQLITE_DEFAULT_MMAP_SIZE,  /* szMmap */
+   SQLITE_MAX_MMAP_SIZE,      /* mxMmap */
+   (void*)0,                  /* pScratch */
+   0,                         /* szScratch */
+   0,                         /* nScratch */
+   (void*)0,                  /* pPage */
+   0,                         /* szPage */
+   0,                         /* nPage */
+   0,                         /* mxParserStack */
+   0,                         /* sharedCacheEnabled */
+   SQLITE_SORTER_PMASZ,       /* szPma */
+   /* All the rest should always be initialized to zero */
+   0,                         /* isInit */
+   0,                         /* inProgress */
+   0,                         /* isMutexInit */
+   0,                         /* isMallocInit */
+   0,                         /* isPCacheInit */
+   0,                         /* nRefInitMutex */
+   0,                         /* pInitMutex */
+   0,                         /* xLog */
+   0,                         /* pLogArg */
+#ifdef SQLITE_ENABLE_SQLLOG
+   0,                         /* xSqllog */
+   0,                         /* pSqllogArg */
+#endif
+#ifdef SQLITE_VDBE_COVERAGE
+   0,                         /* xVdbeBranch */
+   0,                         /* pVbeBranchArg */
+#endif
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+   0,                         /* xTestCallback */
+#endif
+   0                          /* bLocaltimeFault */
+};
+
+/*
+** Hash table for global functions - functions common to all
+** database connections.  After initialization, this table is
+** read-only.
+*/
+SQLITE_PRIVATE SQLITE_WSD FuncDefHash sqlite3GlobalFunctions;
+
+/*
+** Constant tokens for values 0 and 1.
+*/
+SQLITE_PRIVATE const Token sqlite3IntTokens[] = {
+   { "0", 1 },
+   { "1", 1 }
+};
+
+
+/*
+** The value of the "pending" byte must be 0x40000000 (1 byte past the
+** 1-gibabyte boundary) in a compatible database.  SQLite never uses
+** the database page that contains the pending byte.  It never attempts
+** to read or write that page.  The pending byte page is set assign
+** for use by the VFS layers as space for managing file locks.
+**
+** During testing, it is often desirable to move the pending byte to
+** a different position in the file.  This allows code that has to
+** deal with the pending byte to run on files that are much smaller
+** than 1 GiB.  The sqlite3_test_control() interface can be used to
+** move the pending byte.
+**
+** IMPORTANT:  Changing the pending byte to any value other than
+** 0x40000000 results in an incompatible database file format!
+** Changing the pending byte during operation will result in undefined
+** and incorrect behavior.
+*/
+#ifndef SQLITE_OMIT_WSD
+SQLITE_PRIVATE int sqlite3PendingByte = 0x40000000;
+#endif
+
+/*
+** Properties of opcodes.  The OPFLG_INITIALIZER macro is
+** created by mkopcodeh.awk during compilation.  Data is obtained
+** from the comments following the "case OP_xxxx:" statements in
+** the vdbe.c file.  
+*/
+SQLITE_PRIVATE const unsigned char sqlite3OpcodeProperty[] = OPFLG_INITIALIZER;
+
+/************** End of global.c **********************************************/
+/************** Begin file ctime.c *******************************************/
+/*
+** 2010 February 23
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file implements routines used to report what compile-time options
+** SQLite was built with.
+*/
+
+#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
+
+
+/*
+** An array of names of all compile-time options.  This array should 
+** be sorted A-Z.
+**
+** This array looks large, but in a typical installation actually uses
+** only a handful of compile-time options, so most times this array is usually
+** rather short and uses little memory space.
+*/
+static const char * const azCompileOpt[] = {
+
+/* These macros are provided to "stringify" the value of the define
+** for those options in which the value is meaningful. */
+#define CTIMEOPT_VAL_(opt) #opt
+#define CTIMEOPT_VAL(opt) CTIMEOPT_VAL_(opt)
+
+#if SQLITE_32BIT_ROWID
+  "32BIT_ROWID",
+#endif
+#if SQLITE_4_BYTE_ALIGNED_MALLOC
+  "4_BYTE_ALIGNED_MALLOC",
+#endif
+#if SQLITE_CASE_SENSITIVE_LIKE
+  "CASE_SENSITIVE_LIKE",
+#endif
+#if SQLITE_CHECK_PAGES
+  "CHECK_PAGES",
+#endif
+#if SQLITE_COVERAGE_TEST
+  "COVERAGE_TEST",
+#endif
+#if SQLITE_DEBUG
+  "DEBUG",
+#endif
+#if SQLITE_DEFAULT_LOCKING_MODE
+  "DEFAULT_LOCKING_MODE=" CTIMEOPT_VAL(SQLITE_DEFAULT_LOCKING_MODE),
+#endif
+#if defined(SQLITE_DEFAULT_MMAP_SIZE) && !defined(SQLITE_DEFAULT_MMAP_SIZE_xc)
+  "DEFAULT_MMAP_SIZE=" CTIMEOPT_VAL(SQLITE_DEFAULT_MMAP_SIZE),
+#endif
+#if SQLITE_DISABLE_DIRSYNC
+  "DISABLE_DIRSYNC",
+#endif
+#if SQLITE_DISABLE_LFS
+  "DISABLE_LFS",
+#endif
+#if SQLITE_ENABLE_API_ARMOR
+  "ENABLE_API_ARMOR",
+#endif
+#if SQLITE_ENABLE_ATOMIC_WRITE
+  "ENABLE_ATOMIC_WRITE",
+#endif
+#if SQLITE_ENABLE_CEROD
+  "ENABLE_CEROD",
+#endif
+#if SQLITE_ENABLE_COLUMN_METADATA
+  "ENABLE_COLUMN_METADATA",
+#endif
+#if SQLITE_ENABLE_EXPENSIVE_ASSERT
+  "ENABLE_EXPENSIVE_ASSERT",
+#endif
+#if SQLITE_ENABLE_FTS1
+  "ENABLE_FTS1",
+#endif
+#if SQLITE_ENABLE_FTS2
+  "ENABLE_FTS2",
+#endif
+#if SQLITE_ENABLE_FTS3
+  "ENABLE_FTS3",
+#endif
+#if SQLITE_ENABLE_FTS3_PARENTHESIS
+  "ENABLE_FTS3_PARENTHESIS",
+#endif
+#if SQLITE_ENABLE_FTS4
+  "ENABLE_FTS4",
+#endif
+#if SQLITE_ENABLE_ICU
+  "ENABLE_ICU",
+#endif
+#if SQLITE_ENABLE_IOTRACE
+  "ENABLE_IOTRACE",
+#endif
+#if SQLITE_ENABLE_LOAD_EXTENSION
+  "ENABLE_LOAD_EXTENSION",
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE
+  "ENABLE_LOCKING_STYLE=" CTIMEOPT_VAL(SQLITE_ENABLE_LOCKING_STYLE),
+#endif
+#if SQLITE_ENABLE_MEMORY_MANAGEMENT
+  "ENABLE_MEMORY_MANAGEMENT",
+#endif
+#if SQLITE_ENABLE_MEMSYS3
+  "ENABLE_MEMSYS3",
+#endif
+#if SQLITE_ENABLE_MEMSYS5
+  "ENABLE_MEMSYS5",
+#endif
+#if SQLITE_ENABLE_OVERSIZE_CELL_CHECK
+  "ENABLE_OVERSIZE_CELL_CHECK",
+#endif
+#if SQLITE_ENABLE_RTREE
+  "ENABLE_RTREE",
+#endif
+#if defined(SQLITE_ENABLE_STAT4)
+  "ENABLE_STAT4",
+#elif defined(SQLITE_ENABLE_STAT3)
+  "ENABLE_STAT3",
+#endif
+#if SQLITE_ENABLE_UNLOCK_NOTIFY
+  "ENABLE_UNLOCK_NOTIFY",
+#endif
+#if SQLITE_ENABLE_UPDATE_DELETE_LIMIT
+  "ENABLE_UPDATE_DELETE_LIMIT",
+#endif
+#if SQLITE_HAS_CODEC
+  "HAS_CODEC",
+#endif
+#if HAVE_ISNAN || SQLITE_HAVE_ISNAN
+  "HAVE_ISNAN",
+#endif
+#if SQLITE_HOMEGROWN_RECURSIVE_MUTEX
+  "HOMEGROWN_RECURSIVE_MUTEX",
+#endif
+#if SQLITE_IGNORE_AFP_LOCK_ERRORS
+  "IGNORE_AFP_LOCK_ERRORS",
+#endif
+#if SQLITE_IGNORE_FLOCK_LOCK_ERRORS
+  "IGNORE_FLOCK_LOCK_ERRORS",
+#endif
+#ifdef SQLITE_INT64_TYPE
+  "INT64_TYPE",
+#endif
+#if SQLITE_LOCK_TRACE
+  "LOCK_TRACE",
+#endif
+#if defined(SQLITE_MAX_MMAP_SIZE) && !defined(SQLITE_MAX_MMAP_SIZE_xc)
+  "MAX_MMAP_SIZE=" CTIMEOPT_VAL(SQLITE_MAX_MMAP_SIZE),
+#endif
+#ifdef SQLITE_MAX_SCHEMA_RETRY
+  "MAX_SCHEMA_RETRY=" CTIMEOPT_VAL(SQLITE_MAX_SCHEMA_RETRY),
+#endif
+#if SQLITE_MEMDEBUG
+  "MEMDEBUG",
+#endif
+#if SQLITE_MIXED_ENDIAN_64BIT_FLOAT
+  "MIXED_ENDIAN_64BIT_FLOAT",
+#endif
+#if SQLITE_NO_SYNC
+  "NO_SYNC",
+#endif
+#if SQLITE_OMIT_ALTERTABLE
+  "OMIT_ALTERTABLE",
+#endif
+#if SQLITE_OMIT_ANALYZE
+  "OMIT_ANALYZE",
+#endif
+#if SQLITE_OMIT_ATTACH
+  "OMIT_ATTACH",
+#endif
+#if SQLITE_OMIT_AUTHORIZATION
+  "OMIT_AUTHORIZATION",
+#endif
+#if SQLITE_OMIT_AUTOINCREMENT
+  "OMIT_AUTOINCREMENT",
+#endif
+#if SQLITE_OMIT_AUTOINIT
+  "OMIT_AUTOINIT",
+#endif
+#if SQLITE_OMIT_AUTOMATIC_INDEX
+  "OMIT_AUTOMATIC_INDEX",
+#endif
+#if SQLITE_OMIT_AUTORESET
+  "OMIT_AUTORESET",
+#endif
+#if SQLITE_OMIT_AUTOVACUUM
+  "OMIT_AUTOVACUUM",
+#endif
+#if SQLITE_OMIT_BETWEEN_OPTIMIZATION
+  "OMIT_BETWEEN_OPTIMIZATION",
+#endif
+#if SQLITE_OMIT_BLOB_LITERAL
+  "OMIT_BLOB_LITERAL",
+#endif
+#if SQLITE_OMIT_BTREECOUNT
+  "OMIT_BTREECOUNT",
+#endif
+#if SQLITE_OMIT_BUILTIN_TEST
+  "OMIT_BUILTIN_TEST",
+#endif
+#if SQLITE_OMIT_CAST
+  "OMIT_CAST",
+#endif
+#if SQLITE_OMIT_CHECK
+  "OMIT_CHECK",
+#endif
+#if SQLITE_OMIT_COMPLETE
+  "OMIT_COMPLETE",
+#endif
+#if SQLITE_OMIT_COMPOUND_SELECT
+  "OMIT_COMPOUND_SELECT",
+#endif
+#if SQLITE_OMIT_CTE
+  "OMIT_CTE",
+#endif
+#if SQLITE_OMIT_DATETIME_FUNCS
+  "OMIT_DATETIME_FUNCS",
+#endif
+#if SQLITE_OMIT_DECLTYPE
+  "OMIT_DECLTYPE",
+#endif
+#if SQLITE_OMIT_DEPRECATED
+  "OMIT_DEPRECATED",
+#endif
+#if SQLITE_OMIT_DISKIO
+  "OMIT_DISKIO",
+#endif
+#if SQLITE_OMIT_EXPLAIN
+  "OMIT_EXPLAIN",
+#endif
+#if SQLITE_OMIT_FLAG_PRAGMAS
+  "OMIT_FLAG_PRAGMAS",
+#endif
+#if SQLITE_OMIT_FLOATING_POINT
+  "OMIT_FLOATING_POINT",
+#endif
+#if SQLITE_OMIT_FOREIGN_KEY
+  "OMIT_FOREIGN_KEY",
+#endif
+#if SQLITE_OMIT_GET_TABLE
+  "OMIT_GET_TABLE",
+#endif
+#if SQLITE_OMIT_INCRBLOB
+  "OMIT_INCRBLOB",
+#endif
+#if SQLITE_OMIT_INTEGRITY_CHECK
+  "OMIT_INTEGRITY_CHECK",
+#endif
+#if SQLITE_OMIT_LIKE_OPTIMIZATION
+  "OMIT_LIKE_OPTIMIZATION",
+#endif
+#if SQLITE_OMIT_LOAD_EXTENSION
+  "OMIT_LOAD_EXTENSION",
+#endif
+#if SQLITE_OMIT_LOCALTIME
+  "OMIT_LOCALTIME",
+#endif
+#if SQLITE_OMIT_LOOKASIDE
+  "OMIT_LOOKASIDE",
+#endif
+#if SQLITE_OMIT_MEMORYDB
+  "OMIT_MEMORYDB",
+#endif
+#if SQLITE_OMIT_OR_OPTIMIZATION
+  "OMIT_OR_OPTIMIZATION",
+#endif
+#if SQLITE_OMIT_PAGER_PRAGMAS
+  "OMIT_PAGER_PRAGMAS",
+#endif
+#if SQLITE_OMIT_PRAGMA
+  "OMIT_PRAGMA",
+#endif
+#if SQLITE_OMIT_PROGRESS_CALLBACK
+  "OMIT_PROGRESS_CALLBACK",
+#endif
+#if SQLITE_OMIT_QUICKBALANCE
+  "OMIT_QUICKBALANCE",
+#endif
+#if SQLITE_OMIT_REINDEX
+  "OMIT_REINDEX",
+#endif
+#if SQLITE_OMIT_SCHEMA_PRAGMAS
+  "OMIT_SCHEMA_PRAGMAS",
+#endif
+#if SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS
+  "OMIT_SCHEMA_VERSION_PRAGMAS",
+#endif
+#if SQLITE_OMIT_SHARED_CACHE
+  "OMIT_SHARED_CACHE",
+#endif
+#if SQLITE_OMIT_SUBQUERY
+  "OMIT_SUBQUERY",
+#endif
+#if SQLITE_OMIT_TCL_VARIABLE
+  "OMIT_TCL_VARIABLE",
+#endif
+#if SQLITE_OMIT_TEMPDB
+  "OMIT_TEMPDB",
+#endif
+#if SQLITE_OMIT_TRACE
+  "OMIT_TRACE",
+#endif
+#if SQLITE_OMIT_TRIGGER
+  "OMIT_TRIGGER",
+#endif
+#if SQLITE_OMIT_TRUNCATE_OPTIMIZATION
+  "OMIT_TRUNCATE_OPTIMIZATION",
+#endif
+#if SQLITE_OMIT_UTF16
+  "OMIT_UTF16",
+#endif
+#if SQLITE_OMIT_VACUUM
+  "OMIT_VACUUM",
+#endif
+#if SQLITE_OMIT_VIEW
+  "OMIT_VIEW",
+#endif
+#if SQLITE_OMIT_VIRTUALTABLE
+  "OMIT_VIRTUALTABLE",
+#endif
+#if SQLITE_OMIT_WAL
+  "OMIT_WAL",
+#endif
+#if SQLITE_OMIT_WSD
+  "OMIT_WSD",
+#endif
+#if SQLITE_OMIT_XFER_OPT
+  "OMIT_XFER_OPT",
+#endif
+#if SQLITE_PERFORMANCE_TRACE
+  "PERFORMANCE_TRACE",
+#endif
+#if SQLITE_PROXY_DEBUG
+  "PROXY_DEBUG",
+#endif
+#if SQLITE_RTREE_INT_ONLY
+  "RTREE_INT_ONLY",
+#endif
+#if SQLITE_SECURE_DELETE
+  "SECURE_DELETE",
+#endif
+#if SQLITE_SMALL_STACK
+  "SMALL_STACK",
+#endif
+#if SQLITE_SOUNDEX
+  "SOUNDEX",
+#endif
+#if SQLITE_SYSTEM_MALLOC
+  "SYSTEM_MALLOC",
+#endif
+#if SQLITE_TCL
+  "TCL",
+#endif
+#if defined(SQLITE_TEMP_STORE) && !defined(SQLITE_TEMP_STORE_xc)
+  "TEMP_STORE=" CTIMEOPT_VAL(SQLITE_TEMP_STORE),
+#endif
+#if SQLITE_TEST
+  "TEST",
+#endif
+#if defined(SQLITE_THREADSAFE)
+  "THREADSAFE=" CTIMEOPT_VAL(SQLITE_THREADSAFE),
+#endif
+#if SQLITE_USE_ALLOCA
+  "USE_ALLOCA",
+#endif
+#if SQLITE_USER_AUTHENTICATION
+  "USER_AUTHENTICATION",
+#endif
+#if SQLITE_WIN32_MALLOC
+  "WIN32_MALLOC",
+#endif
+#if SQLITE_ZERO_MALLOC
+  "ZERO_MALLOC"
+#endif
+};
+
+/*
+** Given the name of a compile-time option, return true if that option
+** was used and false if not.
+**
+** The name can optionally begin with "SQLITE_" but the "SQLITE_" prefix
+** is not required for a match.
+*/
+SQLITE_API int sqlite3_compileoption_used(const char *zOptName){
+  int i, n;
+
+#if SQLITE_ENABLE_API_ARMOR
+  if( zOptName==0 ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  if( sqlite3StrNICmp(zOptName, "SQLITE_", 7)==0 ) zOptName += 7;
+  n = sqlite3Strlen30(zOptName);
+
+  /* Since ArraySize(azCompileOpt) is normally in single digits, a
+  ** linear search is adequate.  No need for a binary search. */
+  for(i=0; i<ArraySize(azCompileOpt); i++){
+    if( sqlite3StrNICmp(zOptName, azCompileOpt[i], n)==0
+     && sqlite3IsIdChar((unsigned char)azCompileOpt[i][n])==0
+    ){
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** Return the N-th compile-time option string.  If N is out of range,
+** return a NULL pointer.
+*/
+SQLITE_API const char *sqlite3_compileoption_get(int N){
+  if( N>=0 && N<ArraySize(azCompileOpt) ){
+    return azCompileOpt[N];
+  }
+  return 0;
+}
+
+#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */
+
+/************** End of ctime.c ***********************************************/
+/************** Begin file status.c ******************************************/
+/*
+** 2008 June 18
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This module implements the sqlite3_status() interface and related
+** functionality.
+*/
+/************** Include vdbeInt.h in the middle of status.c ******************/
+/************** Begin file vdbeInt.h *****************************************/
+/*
+** 2003 September 6
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This is the header file for information that is private to the
+** VDBE.  This information used to all be at the top of the single
+** source code file "vdbe.c".  When that file became too big (over
+** 6000 lines long) it was split up into several smaller files and
+** this header information was factored out.
+*/
+#ifndef _VDBEINT_H_
+#define _VDBEINT_H_
+
+/*
+** The maximum number of times that a statement will try to reparse
+** itself before giving up and returning SQLITE_SCHEMA.
+*/
+#ifndef SQLITE_MAX_SCHEMA_RETRY
+# define SQLITE_MAX_SCHEMA_RETRY 50
+#endif
+
+/*
+** SQL is translated into a sequence of instructions to be
+** executed by a virtual machine.  Each instruction is an instance
+** of the following structure.
+*/
+typedef struct VdbeOp Op;
+
+/*
+** Boolean values
+*/
+typedef unsigned Bool;
+
+/* Opaque type used by code in vdbesort.c */
+typedef struct VdbeSorter VdbeSorter;
+
+/* Opaque type used by the explainer */
+typedef struct Explain Explain;
+
+/* Elements of the linked list at Vdbe.pAuxData */
+typedef struct AuxData AuxData;
+
+/*
+** A cursor is a pointer into a single BTree within a database file.
+** The cursor can seek to a BTree entry with a particular key, or
+** loop over all entries of the Btree.  You can also insert new BTree
+** entries or retrieve the key or data from the entry that the cursor
+** is currently pointing to.
+**
+** Cursors can also point to virtual tables, sorters, or "pseudo-tables".
+** A pseudo-table is a single-row table implemented by registers.
+** 
+** Every cursor that the virtual machine has open is represented by an
+** instance of the following structure.
+*/
+struct VdbeCursor {
+  BtCursor *pCursor;    /* The cursor structure of the backend */
+  Btree *pBt;           /* Separate file holding temporary table */
+  KeyInfo *pKeyInfo;    /* Info about index keys needed by index cursors */
+  int seekResult;       /* Result of previous sqlite3BtreeMoveto() */
+  int pseudoTableReg;   /* Register holding pseudotable content. */
+  i16 nField;           /* Number of fields in the header */
+  u16 nHdrParsed;       /* Number of header fields parsed so far */
+#ifdef SQLITE_DEBUG
+  u8 seekOp;            /* Most recent seek operation on this cursor */
+#endif
+  i8 iDb;               /* Index of cursor database in db->aDb[] (or -1) */
+  u8 nullRow;           /* True if pointing to a row with no data */
+  u8 deferredMoveto;    /* A call to sqlite3BtreeMoveto() is needed */
+  Bool isEphemeral:1;   /* True for an ephemeral table */
+  Bool useRandomRowid:1;/* Generate new record numbers semi-randomly */
+  Bool isTable:1;       /* True if a table requiring integer keys */
+  Bool isOrdered:1;     /* True if the underlying table is BTREE_UNORDERED */
+  Pgno pgnoRoot;        /* Root page of the open btree cursor */
+  sqlite3_vtab_cursor *pVtabCursor;  /* The cursor for a virtual table */
+  i64 seqCount;         /* Sequence counter */
+  i64 movetoTarget;     /* Argument to the deferred sqlite3BtreeMoveto() */
+  VdbeSorter *pSorter;  /* Sorter object for OP_SorterOpen cursors */
+
+  /* Cached information about the header for the data record that the
+  ** cursor is currently pointing to.  Only valid if cacheStatus matches
+  ** Vdbe.cacheCtr.  Vdbe.cacheCtr will never take on the value of
+  ** CACHE_STALE and so setting cacheStatus=CACHE_STALE guarantees that
+  ** the cache is out of date.
+  **
+  ** aRow might point to (ephemeral) data for the current row, or it might
+  ** be NULL.
+  */
+  u32 cacheStatus;      /* Cache is valid if this matches Vdbe.cacheCtr */
+  u32 payloadSize;      /* Total number of bytes in the record */
+  u32 szRow;            /* Byte available in aRow */
+  u32 iHdrOffset;       /* Offset to next unparsed byte of the header */
+  const u8 *aRow;       /* Data for the current row, if all on one page */
+  u32 *aOffset;         /* Pointer to aType[nField] */
+  u32 aType[1];         /* Type values for all entries in the record */
+  /* 2*nField extra array elements allocated for aType[], beyond the one
+  ** static element declared in the structure.  nField total array slots for
+  ** aType[] and nField+1 array slots for aOffset[] */
+};
+typedef struct VdbeCursor VdbeCursor;
+
+/*
+** When a sub-program is executed (OP_Program), a structure of this type
+** is allocated to store the current value of the program counter, as
+** well as the current memory cell array and various other frame specific
+** values stored in the Vdbe struct. When the sub-program is finished, 
+** these values are copied back to the Vdbe from the VdbeFrame structure,
+** restoring the state of the VM to as it was before the sub-program
+** began executing.
+**
+** The memory for a VdbeFrame object is allocated and managed by a memory
+** cell in the parent (calling) frame. When the memory cell is deleted or
+** overwritten, the VdbeFrame object is not freed immediately. Instead, it
+** is linked into the Vdbe.pDelFrame list. The contents of the Vdbe.pDelFrame
+** list is deleted when the VM is reset in VdbeHalt(). The reason for doing
+** this instead of deleting the VdbeFrame immediately is to avoid recursive
+** calls to sqlite3VdbeMemRelease() when the memory cells belonging to the
+** child frame are released.
+**
+** The currently executing frame is stored in Vdbe.pFrame. Vdbe.pFrame is
+** set to NULL if the currently executing frame is the main program.
+*/
+typedef struct VdbeFrame VdbeFrame;
+struct VdbeFrame {
+  Vdbe *v;                /* VM this frame belongs to */
+  VdbeFrame *pParent;     /* Parent of this frame, or NULL if parent is main */
+  Op *aOp;                /* Program instructions for parent frame */
+  i64 *anExec;            /* Event counters from parent frame */
+  Mem *aMem;              /* Array of memory cells for parent frame */
+  u8 *aOnceFlag;          /* Array of OP_Once flags for parent frame */
+  VdbeCursor **apCsr;     /* Array of Vdbe cursors for parent frame */
+  void *token;            /* Copy of SubProgram.token */
+  i64 lastRowid;          /* Last insert rowid (sqlite3.lastRowid) */
+  int nCursor;            /* Number of entries in apCsr */
+  int pc;                 /* Program Counter in parent (calling) frame */
+  int nOp;                /* Size of aOp array */
+  int nMem;               /* Number of entries in aMem */
+  int nOnceFlag;          /* Number of entries in aOnceFlag */
+  int nChildMem;          /* Number of memory cells for child frame */
+  int nChildCsr;          /* Number of cursors for child frame */
+  int nChange;            /* Statement changes (Vdbe.nChange)     */
+  int nDbChange;          /* Value of db->nChange */
+};
+
+#define VdbeFrameMem(p) ((Mem *)&((u8 *)p)[ROUND8(sizeof(VdbeFrame))])
+
+/*
+** A value for VdbeCursor.cacheValid that means the cache is always invalid.
+*/
+#define CACHE_STALE 0
+
+/*
+** Internally, the vdbe manipulates nearly all SQL values as Mem
+** structures. Each Mem struct may cache multiple representations (string,
+** integer etc.) of the same value.
+*/
+struct Mem {
+  union MemValue {
+    double r;           /* Real value used when MEM_Real is set in flags */
+    i64 i;              /* Integer value used when MEM_Int is set in flags */
+    int nZero;          /* Used when bit MEM_Zero is set in flags */
+    FuncDef *pDef;      /* Used only when flags==MEM_Agg */
+    RowSet *pRowSet;    /* Used only when flags==MEM_RowSet */
+    VdbeFrame *pFrame;  /* Used when flags==MEM_Frame */
+  } u;
+  u16 flags;          /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */
+  u8  enc;            /* SQLITE_UTF8, SQLITE_UTF16BE, SQLITE_UTF16LE */
+  int n;              /* Number of characters in string value, excluding '\0' */
+  char *z;            /* String or BLOB value */
+  /* ShallowCopy only needs to copy the information above */
+  char *zMalloc;      /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */
+  int szMalloc;       /* Size of the zMalloc allocation */
+  u32 uTemp;          /* Transient storage for serial_type in OP_MakeRecord */
+  sqlite3 *db;        /* The associated database connection */
+  void (*xDel)(void*);/* Destructor for Mem.z - only valid if MEM_Dyn */
+#ifdef SQLITE_DEBUG
+  Mem *pScopyFrom;    /* This Mem is a shallow copy of pScopyFrom */
+  void *pFiller;      /* So that sizeof(Mem) is a multiple of 8 */
+#endif
+};
+
+/* One or more of the following flags are set to indicate the validOK
+** representations of the value stored in the Mem struct.
+**
+** If the MEM_Null flag is set, then the value is an SQL NULL value.
+** No other flags may be set in this case.
+**
+** If the MEM_Str flag is set then Mem.z points at a string representation.
+** Usually this is encoded in the same unicode encoding as the main
+** database (see below for exceptions). If the MEM_Term flag is also
+** set, then the string is nul terminated. The MEM_Int and MEM_Real 
+** flags may coexist with the MEM_Str flag.
+*/
+#define MEM_Null      0x0001   /* Value is NULL */
+#define MEM_Str       0x0002   /* Value is a string */
+#define MEM_Int       0x0004   /* Value is an integer */
+#define MEM_Real      0x0008   /* Value is a real number */
+#define MEM_Blob      0x0010   /* Value is a BLOB */
+#define MEM_AffMask   0x001f   /* Mask of affinity bits */
+#define MEM_RowSet    0x0020   /* Value is a RowSet object */
+#define MEM_Frame     0x0040   /* Value is a VdbeFrame object */
+#define MEM_Undefined 0x0080   /* Value is undefined */
+#define MEM_Cleared   0x0100   /* NULL set by OP_Null, not from data */
+#define MEM_TypeMask  0x01ff   /* Mask of type bits */
+
+
+/* Whenever Mem contains a valid string or blob representation, one of
+** the following flags must be set to determine the memory management
+** policy for Mem.z.  The MEM_Term flag tells us whether or not the
+** string is \000 or \u0000 terminated
+*/
+#define MEM_Term      0x0200   /* String rep is nul terminated */
+#define MEM_Dyn       0x0400   /* Need to call Mem.xDel() on Mem.z */
+#define MEM_Static    0x0800   /* Mem.z points to a static string */
+#define MEM_Ephem     0x1000   /* Mem.z points to an ephemeral string */
+#define MEM_Agg       0x2000   /* Mem.z points to an agg function context */
+#define MEM_Zero      0x4000   /* Mem.i contains count of 0s appended to blob */
+#ifdef SQLITE_OMIT_INCRBLOB
+  #undef MEM_Zero
+  #define MEM_Zero 0x0000
+#endif
+
+/*
+** Clear any existing type flags from a Mem and replace them with f
+*/
+#define MemSetTypeFlag(p, f) \
+   ((p)->flags = ((p)->flags&~(MEM_TypeMask|MEM_Zero))|f)
+
+/*
+** Return true if a memory cell is not marked as invalid.  This macro
+** is for use inside assert() statements only.
+*/
+#ifdef SQLITE_DEBUG
+#define memIsValid(M)  ((M)->flags & MEM_Undefined)==0
+#endif
+
+/*
+** Each auxiliary data pointer stored by a user defined function 
+** implementation calling sqlite3_set_auxdata() is stored in an instance
+** of this structure. All such structures associated with a single VM
+** are stored in a linked list headed at Vdbe.pAuxData. All are destroyed
+** when the VM is halted (if not before).
+*/
+struct AuxData {
+  int iOp;                        /* Instruction number of OP_Function opcode */
+  int iArg;                       /* Index of function argument. */
+  void *pAux;                     /* Aux data pointer */
+  void (*xDelete)(void *);        /* Destructor for the aux data */
+  AuxData *pNext;                 /* Next element in list */
+};
+
+/*
+** The "context" argument for an installable function.  A pointer to an
+** instance of this structure is the first argument to the routines used
+** implement the SQL functions.
+**
+** There is a typedef for this structure in sqlite.h.  So all routines,
+** even the public interface to SQLite, can use a pointer to this structure.
+** But this file is the only place where the internal details of this
+** structure are known.
+**
+** This structure is defined inside of vdbeInt.h because it uses substructures
+** (Mem) which are only defined there.
+*/
+struct sqlite3_context {
+  Mem *pOut;            /* The return value is stored here */
+  FuncDef *pFunc;       /* Pointer to function information */
+  Mem *pMem;            /* Memory cell used to store aggregate context */
+  Vdbe *pVdbe;          /* The VM that owns this context */
+  int iOp;              /* Instruction number of OP_Function */
+  int isError;          /* Error code returned by the function. */
+  u8 skipFlag;          /* Skip accumulator loading if true */
+  u8 fErrorOrAux;       /* isError!=0 or pVdbe->pAuxData modified */
+};
+
+/*
+** An Explain object accumulates indented output which is helpful
+** in describing recursive data structures.
+*/
+struct Explain {
+  Vdbe *pVdbe;       /* Attach the explanation to this Vdbe */
+  StrAccum str;      /* The string being accumulated */
+  int nIndent;       /* Number of elements in aIndent */
+  u16 aIndent[100];  /* Levels of indentation */
+  char zBase[100];   /* Initial space */
+};
+
+/* A bitfield type for use inside of structures.  Always follow with :N where
+** N is the number of bits.
+*/
+typedef unsigned bft;  /* Bit Field Type */
+
+typedef struct ScanStatus ScanStatus;
+struct ScanStatus {
+  int addrExplain;                /* OP_Explain for loop */
+  int addrLoop;                   /* Address of "loops" counter */
+  int addrVisit;                  /* Address of "rows visited" counter */
+  int iSelectID;                  /* The "Select-ID" for this loop */
+  LogEst nEst;                    /* Estimated output rows per loop */
+  char *zName;                    /* Name of table or index */
+};
+
+/*
+** An instance of the virtual machine.  This structure contains the complete
+** state of the virtual machine.
+**
+** The "sqlite3_stmt" structure pointer that is returned by sqlite3_prepare()
+** is really a pointer to an instance of this structure.
+**
+** The Vdbe.inVtabMethod variable is set to non-zero for the duration of
+** any virtual table method invocations made by the vdbe program. It is
+** set to 2 for xDestroy method calls and 1 for all other methods. This
+** variable is used for two purposes: to allow xDestroy methods to execute
+** "DROP TABLE" statements and to prevent some nasty side effects of
+** malloc failure when SQLite is invoked recursively by a virtual table 
+** method function.
+*/
+struct Vdbe {
+  sqlite3 *db;            /* The database connection that owns this statement */
+  Op *aOp;                /* Space to hold the virtual machine's program */
+  Mem *aMem;              /* The memory locations */
+  Mem **apArg;            /* Arguments to currently executing user function */
+  Mem *aColName;          /* Column names to return */
+  Mem *pResultSet;        /* Pointer to an array of results */
+  Parse *pParse;          /* Parsing context used to create this Vdbe */
+  int nMem;               /* Number of memory locations currently allocated */
+  int nOp;                /* Number of instructions in the program */
+  int nCursor;            /* Number of slots in apCsr[] */
+  u32 magic;              /* Magic number for sanity checking */
+  char *zErrMsg;          /* Error message written here */
+  Vdbe *pPrev,*pNext;     /* Linked list of VDBEs with the same Vdbe.db */
+  VdbeCursor **apCsr;     /* One element of this array for each open cursor */
+  Mem *aVar;              /* Values for the OP_Variable opcode. */
+  char **azVar;           /* Name of variables */
+  ynVar nVar;             /* Number of entries in aVar[] */
+  ynVar nzVar;            /* Number of entries in azVar[] */
+  u32 cacheCtr;           /* VdbeCursor row cache generation counter */
+  int pc;                 /* The program counter */
+  int rc;                 /* Value to return */
+  u16 nResColumn;         /* Number of columns in one row of the result set */
+  u8 errorAction;         /* Recovery action to do in case of an error */
+  u8 minWriteFileFormat;  /* Minimum file format for writable database files */
+  bft explain:2;          /* True if EXPLAIN present on SQL command */
+  bft inVtabMethod:2;     /* See comments above */
+  bft changeCntOn:1;      /* True to update the change-counter */
+  bft expired:1;          /* True if the VM needs to be recompiled */
+  bft runOnlyOnce:1;      /* Automatically expire on reset */
+  bft usesStmtJournal:1;  /* True if uses a statement journal */
+  bft readOnly:1;         /* True for statements that do not write */
+  bft bIsReader:1;        /* True for statements that read */
+  bft isPrepareV2:1;      /* True if prepared with prepare_v2() */
+  bft doingRerun:1;       /* True if rerunning after an auto-reprepare */
+  int nChange;            /* Number of db changes made since last reset */
+  yDbMask btreeMask;      /* Bitmask of db->aDb[] entries referenced */
+  yDbMask lockMask;       /* Subset of btreeMask that requires a lock */
+  int iStatement;         /* Statement number (or 0 if has not opened stmt) */
+  u32 aCounter[5];        /* Counters used by sqlite3_stmt_status() */
+#ifndef SQLITE_OMIT_TRACE
+  i64 startTime;          /* Time when query started - used for profiling */
+#endif
+  i64 iCurrentTime;       /* Value of julianday('now') for this statement */
+  i64 nFkConstraint;      /* Number of imm. FK constraints this VM */
+  i64 nStmtDefCons;       /* Number of def. constraints when stmt started */
+  i64 nStmtDefImmCons;    /* Number of def. imm constraints when stmt started */
+  char *zSql;             /* Text of the SQL statement that generated this */
+  void *pFree;            /* Free this when deleting the vdbe */
+  VdbeFrame *pFrame;      /* Parent frame */
+  VdbeFrame *pDelFrame;   /* List of frame objects to free on VM reset */
+  int nFrame;             /* Number of frames in pFrame list */
+  u32 expmask;            /* Binding to these vars invalidates VM */
+  SubProgram *pProgram;   /* Linked list of all sub-programs used by VM */
+  int nOnceFlag;          /* Size of array aOnceFlag[] */
+  u8 *aOnceFlag;          /* Flags for OP_Once */
+  AuxData *pAuxData;      /* Linked list of auxdata allocations */
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  i64 *anExec;            /* Number of times each op has been executed */
+  int nScan;              /* Entries in aScan[] */
+  ScanStatus *aScan;      /* Scan definitions for sqlite3_stmt_scanstatus() */
+#endif
+};
+
+/*
+** The following are allowed values for Vdbe.magic
+*/
+#define VDBE_MAGIC_INIT     0x26bceaa5    /* Building a VDBE program */
+#define VDBE_MAGIC_RUN      0xbdf20da3    /* VDBE is ready to execute */
+#define VDBE_MAGIC_HALT     0x519c2973    /* VDBE has completed execution */
+#define VDBE_MAGIC_DEAD     0xb606c3c8    /* The VDBE has been deallocated */
+
+/*
+** Function prototypes
+*/
+SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *, VdbeCursor*);
+void sqliteVdbePopStack(Vdbe*,int);
+SQLITE_PRIVATE int sqlite3VdbeCursorMoveto(VdbeCursor*);
+SQLITE_PRIVATE int sqlite3VdbeCursorRestore(VdbeCursor*);
+#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE)
+SQLITE_PRIVATE void sqlite3VdbePrintOp(FILE*, int, Op*);
+#endif
+SQLITE_PRIVATE u32 sqlite3VdbeSerialTypeLen(u32);
+SQLITE_PRIVATE u32 sqlite3VdbeSerialType(Mem*, int);
+SQLITE_PRIVATE u32 sqlite3VdbeSerialPut(unsigned char*, Mem*, u32);
+SQLITE_PRIVATE u32 sqlite3VdbeSerialGet(const unsigned char*, u32, Mem*);
+SQLITE_PRIVATE void sqlite3VdbeDeleteAuxData(Vdbe*, int, int);
+
+int sqlite2BtreeKeyCompare(BtCursor *, const void *, int, int, int *);
+SQLITE_PRIVATE int sqlite3VdbeIdxKeyCompare(sqlite3*,VdbeCursor*,UnpackedRecord*,int*);
+SQLITE_PRIVATE int sqlite3VdbeIdxRowid(sqlite3*, BtCursor*, i64*);
+SQLITE_PRIVATE int sqlite3VdbeExec(Vdbe*);
+SQLITE_PRIVATE int sqlite3VdbeList(Vdbe*);
+SQLITE_PRIVATE int sqlite3VdbeHalt(Vdbe*);
+SQLITE_PRIVATE int sqlite3VdbeChangeEncoding(Mem *, int);
+SQLITE_PRIVATE int sqlite3VdbeMemTooBig(Mem*);
+SQLITE_PRIVATE int sqlite3VdbeMemCopy(Mem*, const Mem*);
+SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem*, const Mem*, int);
+SQLITE_PRIVATE void sqlite3VdbeMemMove(Mem*, Mem*);
+SQLITE_PRIVATE int sqlite3VdbeMemNulTerminate(Mem*);
+SQLITE_PRIVATE int sqlite3VdbeMemSetStr(Mem*, const char*, int, u8, void(*)(void*));
+SQLITE_PRIVATE void sqlite3VdbeMemSetInt64(Mem*, i64);
+#ifdef SQLITE_OMIT_FLOATING_POINT
+# define sqlite3VdbeMemSetDouble sqlite3VdbeMemSetInt64
+#else
+SQLITE_PRIVATE   void sqlite3VdbeMemSetDouble(Mem*, double);
+#endif
+SQLITE_PRIVATE void sqlite3VdbeMemInit(Mem*,sqlite3*,u16);
+SQLITE_PRIVATE void sqlite3VdbeMemSetNull(Mem*);
+SQLITE_PRIVATE void sqlite3VdbeMemSetZeroBlob(Mem*,int);
+SQLITE_PRIVATE void sqlite3VdbeMemSetRowSet(Mem*);
+SQLITE_PRIVATE int sqlite3VdbeMemMakeWriteable(Mem*);
+SQLITE_PRIVATE int sqlite3VdbeMemStringify(Mem*, u8, u8);
+SQLITE_PRIVATE i64 sqlite3VdbeIntValue(Mem*);
+SQLITE_PRIVATE int sqlite3VdbeMemIntegerify(Mem*);
+SQLITE_PRIVATE double sqlite3VdbeRealValue(Mem*);
+SQLITE_PRIVATE void sqlite3VdbeIntegerAffinity(Mem*);
+SQLITE_PRIVATE int sqlite3VdbeMemRealify(Mem*);
+SQLITE_PRIVATE int sqlite3VdbeMemNumerify(Mem*);
+SQLITE_PRIVATE void sqlite3VdbeMemCast(Mem*,u8,u8);
+SQLITE_PRIVATE int sqlite3VdbeMemFromBtree(BtCursor*,u32,u32,int,Mem*);
+SQLITE_PRIVATE void sqlite3VdbeMemRelease(Mem *p);
+#define VdbeMemDynamic(X)  \
+  (((X)->flags&(MEM_Agg|MEM_Dyn|MEM_RowSet|MEM_Frame))!=0)
+SQLITE_PRIVATE int sqlite3VdbeMemFinalize(Mem*, FuncDef*);
+SQLITE_PRIVATE const char *sqlite3OpcodeName(int);
+SQLITE_PRIVATE int sqlite3VdbeMemGrow(Mem *pMem, int n, int preserve);
+SQLITE_PRIVATE int sqlite3VdbeMemClearAndResize(Mem *pMem, int n);
+SQLITE_PRIVATE int sqlite3VdbeCloseStatement(Vdbe *, int);
+SQLITE_PRIVATE void sqlite3VdbeFrameDelete(VdbeFrame*);
+SQLITE_PRIVATE int sqlite3VdbeFrameRestore(VdbeFrame *);
+SQLITE_PRIVATE int sqlite3VdbeTransferError(Vdbe *p);
+
+SQLITE_PRIVATE int sqlite3VdbeSorterInit(sqlite3 *, int, VdbeCursor *);
+SQLITE_PRIVATE void sqlite3VdbeSorterReset(sqlite3 *, VdbeSorter *);
+SQLITE_PRIVATE void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
+SQLITE_PRIVATE int sqlite3VdbeSorterRowkey(const VdbeCursor *, Mem *);
+SQLITE_PRIVATE int sqlite3VdbeSorterNext(sqlite3 *, const VdbeCursor *, int *);
+SQLITE_PRIVATE int sqlite3VdbeSorterRewind(const VdbeCursor *, int *);
+SQLITE_PRIVATE int sqlite3VdbeSorterWrite(const VdbeCursor *, Mem *);
+SQLITE_PRIVATE int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int, int *);
+
+#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
+SQLITE_PRIVATE   void sqlite3VdbeEnter(Vdbe*);
+SQLITE_PRIVATE   void sqlite3VdbeLeave(Vdbe*);
+#else
+# define sqlite3VdbeEnter(X)
+# define sqlite3VdbeLeave(X)
+#endif
+
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE void sqlite3VdbeMemAboutToChange(Vdbe*,Mem*);
+SQLITE_PRIVATE int sqlite3VdbeCheckMemInvariants(Mem*);
+#endif
+
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+SQLITE_PRIVATE int sqlite3VdbeCheckFk(Vdbe *, int);
+#else
+# define sqlite3VdbeCheckFk(p,i) 0
+#endif
+
+SQLITE_PRIVATE int sqlite3VdbeMemTranslate(Mem*, u8);
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE   void sqlite3VdbePrintSql(Vdbe*);
+SQLITE_PRIVATE   void sqlite3VdbeMemPrettyPrint(Mem *pMem, char *zBuf);
+#endif
+SQLITE_PRIVATE int sqlite3VdbeMemHandleBom(Mem *pMem);
+
+#ifndef SQLITE_OMIT_INCRBLOB
+SQLITE_PRIVATE   int sqlite3VdbeMemExpandBlob(Mem *);
+  #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0)
+#else
+  #define sqlite3VdbeMemExpandBlob(x) SQLITE_OK
+  #define ExpandBlob(P) SQLITE_OK
+#endif
+
+#endif /* !defined(_VDBEINT_H_) */
+
+/************** End of vdbeInt.h *********************************************/
+/************** Continuing where we left off in status.c *********************/
+
+/*
+** Variables in which to record status information.
+*/
+typedef struct sqlite3StatType sqlite3StatType;
+static SQLITE_WSD struct sqlite3StatType {
+  int nowValue[10];         /* Current value */
+  int mxValue[10];          /* Maximum value */
+} sqlite3Stat = { {0,}, {0,} };
+
+
+/* The "wsdStat" macro will resolve to the status information
+** state vector.  If writable static data is unsupported on the target,
+** we have to locate the state vector at run-time.  In the more common
+** case where writable static data is supported, wsdStat can refer directly
+** to the "sqlite3Stat" state vector declared above.
+*/
+#ifdef SQLITE_OMIT_WSD
+# define wsdStatInit  sqlite3StatType *x = &GLOBAL(sqlite3StatType,sqlite3Stat)
+# define wsdStat x[0]
+#else
+# define wsdStatInit
+# define wsdStat sqlite3Stat
+#endif
+
+/*
+** Return the current value of a status parameter.
+*/
+SQLITE_PRIVATE int sqlite3StatusValue(int op){
+  wsdStatInit;
+  assert( op>=0 && op<ArraySize(wsdStat.nowValue) );
+  return wsdStat.nowValue[op];
+}
+
+/*
+** Add N to the value of a status record.  It is assumed that the
+** caller holds appropriate locks.
+*/
+SQLITE_PRIVATE void sqlite3StatusAdd(int op, int N){
+  wsdStatInit;
+  assert( op>=0 && op<ArraySize(wsdStat.nowValue) );
+  wsdStat.nowValue[op] += N;
+  if( wsdStat.nowValue[op]>wsdStat.mxValue[op] ){
+    wsdStat.mxValue[op] = wsdStat.nowValue[op];
+  }
+}
+
+/*
+** Set the value of a status to X.
+*/
+SQLITE_PRIVATE void sqlite3StatusSet(int op, int X){
+  wsdStatInit;
+  assert( op>=0 && op<ArraySize(wsdStat.nowValue) );
+  wsdStat.nowValue[op] = X;
+  if( wsdStat.nowValue[op]>wsdStat.mxValue[op] ){
+    wsdStat.mxValue[op] = wsdStat.nowValue[op];
+  }
+}
+
+/*
+** Query status information.
+**
+** This implementation assumes that reading or writing an aligned
+** 32-bit integer is an atomic operation.  If that assumption is not true,
+** then this routine is not threadsafe.
+*/
+SQLITE_API int sqlite3_status(int op, int *pCurrent, int *pHighwater, int resetFlag){
+  wsdStatInit;
+  if( op<0 || op>=ArraySize(wsdStat.nowValue) ){
+    return SQLITE_MISUSE_BKPT;
+  }
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pCurrent==0 || pHighwater==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  *pCurrent = wsdStat.nowValue[op];
+  *pHighwater = wsdStat.mxValue[op];
+  if( resetFlag ){
+    wsdStat.mxValue[op] = wsdStat.nowValue[op];
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Query status information for a single database connection
+*/
+SQLITE_API int sqlite3_db_status(
+  sqlite3 *db,          /* The database connection whose status is desired */
+  int op,               /* Status verb */
+  int *pCurrent,        /* Write current value here */
+  int *pHighwater,      /* Write high-water mark here */
+  int resetFlag         /* Reset high-water mark if true */
+){
+  int rc = SQLITE_OK;   /* Return code */
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || pCurrent==0|| pHighwater==0 ){
+    return SQLITE_MISUSE_BKPT;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  switch( op ){
+    case SQLITE_DBSTATUS_LOOKASIDE_USED: {
+      *pCurrent = db->lookaside.nOut;
+      *pHighwater = db->lookaside.mxOut;
+      if( resetFlag ){
+        db->lookaside.mxOut = db->lookaside.nOut;
+      }
+      break;
+    }
+
+    case SQLITE_DBSTATUS_LOOKASIDE_HIT:
+    case SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE:
+    case SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL: {
+      testcase( op==SQLITE_DBSTATUS_LOOKASIDE_HIT );
+      testcase( op==SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE );
+      testcase( op==SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL );
+      assert( (op-SQLITE_DBSTATUS_LOOKASIDE_HIT)>=0 );
+      assert( (op-SQLITE_DBSTATUS_LOOKASIDE_HIT)<3 );
+      *pCurrent = 0;
+      *pHighwater = db->lookaside.anStat[op - SQLITE_DBSTATUS_LOOKASIDE_HIT];
+      if( resetFlag ){
+        db->lookaside.anStat[op - SQLITE_DBSTATUS_LOOKASIDE_HIT] = 0;
+      }
+      break;
+    }
+
+    /* 
+    ** Return an approximation for the amount of memory currently used
+    ** by all pagers associated with the given database connection.  The
+    ** highwater mark is meaningless and is returned as zero.
+    */
+    case SQLITE_DBSTATUS_CACHE_USED: {
+      int totalUsed = 0;
+      int i;
+      sqlite3BtreeEnterAll(db);
+      for(i=0; i<db->nDb; i++){
+        Btree *pBt = db->aDb[i].pBt;
+        if( pBt ){
+          Pager *pPager = sqlite3BtreePager(pBt);
+          totalUsed += sqlite3PagerMemUsed(pPager);
+        }
+      }
+      sqlite3BtreeLeaveAll(db);
+      *pCurrent = totalUsed;
+      *pHighwater = 0;
+      break;
+    }
+
+    /*
+    ** *pCurrent gets an accurate estimate of the amount of memory used
+    ** to store the schema for all databases (main, temp, and any ATTACHed
+    ** databases.  *pHighwater is set to zero.
+    */
+    case SQLITE_DBSTATUS_SCHEMA_USED: {
+      int i;                      /* Used to iterate through schemas */
+      int nByte = 0;              /* Used to accumulate return value */
+
+      sqlite3BtreeEnterAll(db);
+      db->pnBytesFreed = &nByte;
+      for(i=0; i<db->nDb; i++){
+        Schema *pSchema = db->aDb[i].pSchema;
+        if( ALWAYS(pSchema!=0) ){
+          HashElem *p;
+
+          nByte += sqlite3GlobalConfig.m.xRoundup(sizeof(HashElem)) * (
+              pSchema->tblHash.count 
+            + pSchema->trigHash.count
+            + pSchema->idxHash.count
+            + pSchema->fkeyHash.count
+          );
+          nByte += sqlite3MallocSize(pSchema->tblHash.ht);
+          nByte += sqlite3MallocSize(pSchema->trigHash.ht);
+          nByte += sqlite3MallocSize(pSchema->idxHash.ht);
+          nByte += sqlite3MallocSize(pSchema->fkeyHash.ht);
+
+          for(p=sqliteHashFirst(&pSchema->trigHash); p; p=sqliteHashNext(p)){
+            sqlite3DeleteTrigger(db, (Trigger*)sqliteHashData(p));
+          }
+          for(p=sqliteHashFirst(&pSchema->tblHash); p; p=sqliteHashNext(p)){
+            sqlite3DeleteTable(db, (Table *)sqliteHashData(p));
+          }
+        }
+      }
+      db->pnBytesFreed = 0;
+      sqlite3BtreeLeaveAll(db);
+
+      *pHighwater = 0;
+      *pCurrent = nByte;
+      break;
+    }
+
+    /*
+    ** *pCurrent gets an accurate estimate of the amount of memory used
+    ** to store all prepared statements.
+    ** *pHighwater is set to zero.
+    */
+    case SQLITE_DBSTATUS_STMT_USED: {
+      struct Vdbe *pVdbe;         /* Used to iterate through VMs */
+      int nByte = 0;              /* Used to accumulate return value */
+
+      db->pnBytesFreed = &nByte;
+      for(pVdbe=db->pVdbe; pVdbe; pVdbe=pVdbe->pNext){
+        sqlite3VdbeClearObject(db, pVdbe);
+        sqlite3DbFree(db, pVdbe);
+      }
+      db->pnBytesFreed = 0;
+
+      *pHighwater = 0;  /* IMP: R-64479-57858 */
+      *pCurrent = nByte;
+
+      break;
+    }
+
+    /*
+    ** Set *pCurrent to the total cache hits or misses encountered by all
+    ** pagers the database handle is connected to. *pHighwater is always set 
+    ** to zero.
+    */
+    case SQLITE_DBSTATUS_CACHE_HIT:
+    case SQLITE_DBSTATUS_CACHE_MISS:
+    case SQLITE_DBSTATUS_CACHE_WRITE:{
+      int i;
+      int nRet = 0;
+      assert( SQLITE_DBSTATUS_CACHE_MISS==SQLITE_DBSTATUS_CACHE_HIT+1 );
+      assert( SQLITE_DBSTATUS_CACHE_WRITE==SQLITE_DBSTATUS_CACHE_HIT+2 );
+
+      for(i=0; i<db->nDb; i++){
+        if( db->aDb[i].pBt ){
+          Pager *pPager = sqlite3BtreePager(db->aDb[i].pBt);
+          sqlite3PagerCacheStat(pPager, op, resetFlag, &nRet);
+        }
+      }
+      *pHighwater = 0; /* IMP: R-42420-56072 */
+                       /* IMP: R-54100-20147 */
+                       /* IMP: R-29431-39229 */
+      *pCurrent = nRet;
+      break;
+    }
+
+    /* Set *pCurrent to non-zero if there are unresolved deferred foreign
+    ** key constraints.  Set *pCurrent to zero if all foreign key constraints
+    ** have been satisfied.  The *pHighwater is always set to zero.
+    */
+    case SQLITE_DBSTATUS_DEFERRED_FKS: {
+      *pHighwater = 0;  /* IMP: R-11967-56545 */
+      *pCurrent = db->nDeferredImmCons>0 || db->nDeferredCons>0;
+      break;
+    }
+
+    default: {
+      rc = SQLITE_ERROR;
+    }
+  }
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/************** End of status.c **********************************************/
+/************** Begin file date.c ********************************************/
+/*
+** 2003 October 31
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the C functions that implement date and time
+** functions for SQLite.  
+**
+** There is only one exported symbol in this file - the function
+** sqlite3RegisterDateTimeFunctions() found at the bottom of the file.
+** All other code has file scope.
+**
+** SQLite processes all times and dates as julian day numbers.  The
+** dates and times are stored as the number of days since noon
+** in Greenwich on November 24, 4714 B.C. according to the Gregorian
+** calendar system. 
+**
+** 1970-01-01 00:00:00 is JD 2440587.5
+** 2000-01-01 00:00:00 is JD 2451544.5
+**
+** This implementation requires years to be expressed as a 4-digit number
+** which means that only dates between 0000-01-01 and 9999-12-31 can
+** be represented, even though julian day numbers allow a much wider
+** range of dates.
+**
+** The Gregorian calendar system is used for all dates and times,
+** even those that predate the Gregorian calendar.  Historians usually
+** use the julian calendar for dates prior to 1582-10-15 and for some
+** dates afterwards, depending on locale.  Beware of this difference.
+**
+** The conversion algorithms are implemented based on descriptions
+** in the following text:
+**
+**      Jean Meeus
+**      Astronomical Algorithms, 2nd Edition, 1998
+**      ISBM 0-943396-61-1
+**      Willmann-Bell, Inc
+**      Richmond, Virginia (USA)
+*/
+/* #include <stdlib.h> */
+/* #include <assert.h> */
+#include <time.h>
+
+#ifndef SQLITE_OMIT_DATETIME_FUNCS
+
+
+/*
+** A structure for holding a single date and time.
+*/
+typedef struct DateTime DateTime;
+struct DateTime {
+  sqlite3_int64 iJD; /* The julian day number times 86400000 */
+  int Y, M, D;       /* Year, month, and day */
+  int h, m;          /* Hour and minutes */
+  int tz;            /* Timezone offset in minutes */
+  double s;          /* Seconds */
+  char validYMD;     /* True (1) if Y,M,D are valid */
+  char validHMS;     /* True (1) if h,m,s are valid */
+  char validJD;      /* True (1) if iJD is valid */
+  char validTZ;      /* True (1) if tz is valid */
+};
+
+
+/*
+** Convert zDate into one or more integers.  Additional arguments
+** come in groups of 5 as follows:
+**
+**       N       number of digits in the integer
+**       min     minimum allowed value of the integer
+**       max     maximum allowed value of the integer
+**       nextC   first character after the integer
+**       pVal    where to write the integers value.
+**
+** Conversions continue until one with nextC==0 is encountered.
+** The function returns the number of successful conversions.
+*/
+static int getDigits(const char *zDate, ...){
+  va_list ap;
+  int val;
+  int N;
+  int min;
+  int max;
+  int nextC;
+  int *pVal;
+  int cnt = 0;
+  va_start(ap, zDate);
+  do{
+    N = va_arg(ap, int);
+    min = va_arg(ap, int);
+    max = va_arg(ap, int);
+    nextC = va_arg(ap, int);
+    pVal = va_arg(ap, int*);
+    val = 0;
+    while( N-- ){
+      if( !sqlite3Isdigit(*zDate) ){
+        goto end_getDigits;
+      }
+      val = val*10 + *zDate - '0';
+      zDate++;
+    }
+    if( val<min || val>max || (nextC!=0 && nextC!=*zDate) ){
+      goto end_getDigits;
+    }
+    *pVal = val;
+    zDate++;
+    cnt++;
+  }while( nextC );
+end_getDigits:
+  va_end(ap);
+  return cnt;
+}
+
+/*
+** Parse a timezone extension on the end of a date-time.
+** The extension is of the form:
+**
+**        (+/-)HH:MM
+**
+** Or the "zulu" notation:
+**
+**        Z
+**
+** If the parse is successful, write the number of minutes
+** of change in p->tz and return 0.  If a parser error occurs,
+** return non-zero.
+**
+** A missing specifier is not considered an error.
+*/
+static int parseTimezone(const char *zDate, DateTime *p){
+  int sgn = 0;
+  int nHr, nMn;
+  int c;
+  while( sqlite3Isspace(*zDate) ){ zDate++; }
+  p->tz = 0;
+  c = *zDate;
+  if( c=='-' ){
+    sgn = -1;
+  }else if( c=='+' ){
+    sgn = +1;
+  }else if( c=='Z' || c=='z' ){
+    zDate++;
+    goto zulu_time;
+  }else{
+    return c!=0;
+  }
+  zDate++;
+  if( getDigits(zDate, 2, 0, 14, ':', &nHr, 2, 0, 59, 0, &nMn)!=2 ){
+    return 1;
+  }
+  zDate += 5;
+  p->tz = sgn*(nMn + nHr*60);
+zulu_time:
+  while( sqlite3Isspace(*zDate) ){ zDate++; }
+  return *zDate!=0;
+}
+
+/*
+** Parse times of the form HH:MM or HH:MM:SS or HH:MM:SS.FFFF.
+** The HH, MM, and SS must each be exactly 2 digits.  The
+** fractional seconds FFFF can be one or more digits.
+**
+** Return 1 if there is a parsing error and 0 on success.
+*/
+static int parseHhMmSs(const char *zDate, DateTime *p){
+  int h, m, s;
+  double ms = 0.0;
+  if( getDigits(zDate, 2, 0, 24, ':', &h, 2, 0, 59, 0, &m)!=2 ){
+    return 1;
+  }
+  zDate += 5;
+  if( *zDate==':' ){
+    zDate++;
+    if( getDigits(zDate, 2, 0, 59, 0, &s)!=1 ){
+      return 1;
+    }
+    zDate += 2;
+    if( *zDate=='.' && sqlite3Isdigit(zDate[1]) ){
+      double rScale = 1.0;
+      zDate++;
+      while( sqlite3Isdigit(*zDate) ){
+        ms = ms*10.0 + *zDate - '0';
+        rScale *= 10.0;
+        zDate++;
+      }
+      ms /= rScale;
+    }
+  }else{
+    s = 0;
+  }
+  p->validJD = 0;
+  p->validHMS = 1;
+  p->h = h;
+  p->m = m;
+  p->s = s + ms;
+  if( parseTimezone(zDate, p) ) return 1;
+  p->validTZ = (p->tz!=0)?1:0;
+  return 0;
+}
+
+/*
+** Convert from YYYY-MM-DD HH:MM:SS to julian day.  We always assume
+** that the YYYY-MM-DD is according to the Gregorian calendar.
+**
+** Reference:  Meeus page 61
+*/
+static void computeJD(DateTime *p){
+  int Y, M, D, A, B, X1, X2;
+
+  if( p->validJD ) return;
+  if( p->validYMD ){
+    Y = p->Y;
+    M = p->M;
+    D = p->D;
+  }else{
+    Y = 2000;  /* If no YMD specified, assume 2000-Jan-01 */
+    M = 1;
+    D = 1;
+  }
+  if( M<=2 ){
+    Y--;
+    M += 12;
+  }
+  A = Y/100;
+  B = 2 - A + (A/4);
+  X1 = 36525*(Y+4716)/100;
+  X2 = 306001*(M+1)/10000;
+  p->iJD = (sqlite3_int64)((X1 + X2 + D + B - 1524.5 ) * 86400000);
+  p->validJD = 1;
+  if( p->validHMS ){
+    p->iJD += p->h*3600000 + p->m*60000 + (sqlite3_int64)(p->s*1000);
+    if( p->validTZ ){
+      p->iJD -= p->tz*60000;
+      p->validYMD = 0;
+      p->validHMS = 0;
+      p->validTZ = 0;
+    }
+  }
+}
+
+/*
+** Parse dates of the form
+**
+**     YYYY-MM-DD HH:MM:SS.FFF
+**     YYYY-MM-DD HH:MM:SS
+**     YYYY-MM-DD HH:MM
+**     YYYY-MM-DD
+**
+** Write the result into the DateTime structure and return 0
+** on success and 1 if the input string is not a well-formed
+** date.
+*/
+static int parseYyyyMmDd(const char *zDate, DateTime *p){
+  int Y, M, D, neg;
+
+  if( zDate[0]=='-' ){
+    zDate++;
+    neg = 1;
+  }else{
+    neg = 0;
+  }
+  if( getDigits(zDate,4,0,9999,'-',&Y,2,1,12,'-',&M,2,1,31,0,&D)!=3 ){
+    return 1;
+  }
+  zDate += 10;
+  while( sqlite3Isspace(*zDate) || 'T'==*(u8*)zDate ){ zDate++; }
+  if( parseHhMmSs(zDate, p)==0 ){
+    /* We got the time */
+  }else if( *zDate==0 ){
+    p->validHMS = 0;
+  }else{
+    return 1;
+  }
+  p->validJD = 0;
+  p->validYMD = 1;
+  p->Y = neg ? -Y : Y;
+  p->M = M;
+  p->D = D;
+  if( p->validTZ ){
+    computeJD(p);
+  }
+  return 0;
+}
+
+/*
+** Set the time to the current time reported by the VFS.
+**
+** Return the number of errors.
+*/
+static int setDateTimeToCurrent(sqlite3_context *context, DateTime *p){
+  p->iJD = sqlite3StmtCurrentTime(context);
+  if( p->iJD>0 ){
+    p->validJD = 1;
+    return 0;
+  }else{
+    return 1;
+  }
+}
+
+/*
+** Attempt to parse the given string into a julian day number.  Return
+** the number of errors.
+**
+** The following are acceptable forms for the input string:
+**
+**      YYYY-MM-DD HH:MM:SS.FFF  +/-HH:MM
+**      DDDD.DD 
+**      now
+**
+** In the first form, the +/-HH:MM is always optional.  The fractional
+** seconds extension (the ".FFF") is optional.  The seconds portion
+** (":SS.FFF") is option.  The year and date can be omitted as long
+** as there is a time string.  The time string can be omitted as long
+** as there is a year and date.
+*/
+static int parseDateOrTime(
+  sqlite3_context *context, 
+  const char *zDate, 
+  DateTime *p
+){
+  double r;
+  if( parseYyyyMmDd(zDate,p)==0 ){
+    return 0;
+  }else if( parseHhMmSs(zDate, p)==0 ){
+    return 0;
+  }else if( sqlite3StrICmp(zDate,"now")==0){
+    return setDateTimeToCurrent(context, p);
+  }else if( sqlite3AtoF(zDate, &r, sqlite3Strlen30(zDate), SQLITE_UTF8) ){
+    p->iJD = (sqlite3_int64)(r*86400000.0 + 0.5);
+    p->validJD = 1;
+    return 0;
+  }
+  return 1;
+}
+
+/*
+** Compute the Year, Month, and Day from the julian day number.
+*/
+static void computeYMD(DateTime *p){
+  int Z, A, B, C, D, E, X1;
+  if( p->validYMD ) return;
+  if( !p->validJD ){
+    p->Y = 2000;
+    p->M = 1;
+    p->D = 1;
+  }else{
+    Z = (int)((p->iJD + 43200000)/86400000);
+    A = (int)((Z - 1867216.25)/36524.25);
+    A = Z + 1 + A - (A/4);
+    B = A + 1524;
+    C = (int)((B - 122.1)/365.25);
+    D = (36525*C)/100;
+    E = (int)((B-D)/30.6001);
+    X1 = (int)(30.6001*E);
+    p->D = B - D - X1;
+    p->M = E<14 ? E-1 : E-13;
+    p->Y = p->M>2 ? C - 4716 : C - 4715;
+  }
+  p->validYMD = 1;
+}
+
+/*
+** Compute the Hour, Minute, and Seconds from the julian day number.
+*/
+static void computeHMS(DateTime *p){
+  int s;
+  if( p->validHMS ) return;
+  computeJD(p);
+  s = (int)((p->iJD + 43200000) % 86400000);
+  p->s = s/1000.0;
+  s = (int)p->s;
+  p->s -= s;
+  p->h = s/3600;
+  s -= p->h*3600;
+  p->m = s/60;
+  p->s += s - p->m*60;
+  p->validHMS = 1;
+}
+
+/*
+** Compute both YMD and HMS
+*/
+static void computeYMD_HMS(DateTime *p){
+  computeYMD(p);
+  computeHMS(p);
+}
+
+/*
+** Clear the YMD and HMS and the TZ
+*/
+static void clearYMD_HMS_TZ(DateTime *p){
+  p->validYMD = 0;
+  p->validHMS = 0;
+  p->validTZ = 0;
+}
+
+/*
+** On recent Windows platforms, the localtime_s() function is available
+** as part of the "Secure CRT". It is essentially equivalent to 
+** localtime_r() available under most POSIX platforms, except that the 
+** order of the parameters is reversed.
+**
+** See http://msdn.microsoft.com/en-us/library/a442x3ye(VS.80).aspx.
+**
+** If the user has not indicated to use localtime_r() or localtime_s()
+** already, check for an MSVC build environment that provides 
+** localtime_s().
+*/
+#if !HAVE_LOCALTIME_R && !HAVE_LOCALTIME_S \
+    && defined(_MSC_VER) && defined(_CRT_INSECURE_DEPRECATE)
+#undef  HAVE_LOCALTIME_S
+#define HAVE_LOCALTIME_S 1
+#endif
+
+#ifndef SQLITE_OMIT_LOCALTIME
+/*
+** The following routine implements the rough equivalent of localtime_r()
+** using whatever operating-system specific localtime facility that
+** is available.  This routine returns 0 on success and
+** non-zero on any kind of error.
+**
+** If the sqlite3GlobalConfig.bLocaltimeFault variable is true then this
+** routine will always fail.
+**
+** EVIDENCE-OF: R-62172-00036 In this implementation, the standard C
+** library function localtime_r() is used to assist in the calculation of
+** local time.
+*/
+static int osLocaltime(time_t *t, struct tm *pTm){
+  int rc;
+#if !HAVE_LOCALTIME_R && !HAVE_LOCALTIME_S
+  struct tm *pX;
+#if SQLITE_THREADSAFE>0
+  sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+#endif
+  sqlite3_mutex_enter(mutex);
+  pX = localtime(t);
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+  if( sqlite3GlobalConfig.bLocaltimeFault ) pX = 0;
+#endif
+  if( pX ) *pTm = *pX;
+  sqlite3_mutex_leave(mutex);
+  rc = pX==0;
+#else
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+  if( sqlite3GlobalConfig.bLocaltimeFault ) return 1;
+#endif
+#if HAVE_LOCALTIME_R
+  rc = localtime_r(t, pTm)==0;
+#else
+  rc = localtime_s(pTm, t);
+#endif /* HAVE_LOCALTIME_R */
+#endif /* HAVE_LOCALTIME_R || HAVE_LOCALTIME_S */
+  return rc;
+}
+#endif /* SQLITE_OMIT_LOCALTIME */
+
+
+#ifndef SQLITE_OMIT_LOCALTIME
+/*
+** Compute the difference (in milliseconds) between localtime and UTC
+** (a.k.a. GMT) for the time value p where p is in UTC. If no error occurs,
+** return this value and set *pRc to SQLITE_OK. 
+**
+** Or, if an error does occur, set *pRc to SQLITE_ERROR. The returned value
+** is undefined in this case.
+*/
+static sqlite3_int64 localtimeOffset(
+  DateTime *p,                    /* Date at which to calculate offset */
+  sqlite3_context *pCtx,          /* Write error here if one occurs */
+  int *pRc                        /* OUT: Error code. SQLITE_OK or ERROR */
+){
+  DateTime x, y;
+  time_t t;
+  struct tm sLocal;
+
+  /* Initialize the contents of sLocal to avoid a compiler warning. */
+  memset(&sLocal, 0, sizeof(sLocal));
+
+  x = *p;
+  computeYMD_HMS(&x);
+  if( x.Y<1971 || x.Y>=2038 ){
+    /* EVIDENCE-OF: R-55269-29598 The localtime_r() C function normally only
+    ** works for years between 1970 and 2037. For dates outside this range,
+    ** SQLite attempts to map the year into an equivalent year within this
+    ** range, do the calculation, then map the year back.
+    */
+    x.Y = 2000;
+    x.M = 1;
+    x.D = 1;
+    x.h = 0;
+    x.m = 0;
+    x.s = 0.0;
+  } else {
+    int s = (int)(x.s + 0.5);
+    x.s = s;
+  }
+  x.tz = 0;
+  x.validJD = 0;
+  computeJD(&x);
+  t = (time_t)(x.iJD/1000 - 21086676*(i64)10000);
+  if( osLocaltime(&t, &sLocal) ){
+    sqlite3_result_error(pCtx, "local time unavailable", -1);
+    *pRc = SQLITE_ERROR;
+    return 0;
+  }
+  y.Y = sLocal.tm_year + 1900;
+  y.M = sLocal.tm_mon + 1;
+  y.D = sLocal.tm_mday;
+  y.h = sLocal.tm_hour;
+  y.m = sLocal.tm_min;
+  y.s = sLocal.tm_sec;
+  y.validYMD = 1;
+  y.validHMS = 1;
+  y.validJD = 0;
+  y.validTZ = 0;
+  computeJD(&y);
+  *pRc = SQLITE_OK;
+  return y.iJD - x.iJD;
+}
+#endif /* SQLITE_OMIT_LOCALTIME */
+
+/*
+** Process a modifier to a date-time stamp.  The modifiers are
+** as follows:
+**
+**     NNN days
+**     NNN hours
+**     NNN minutes
+**     NNN.NNNN seconds
+**     NNN months
+**     NNN years
+**     start of month
+**     start of year
+**     start of week
+**     start of day
+**     weekday N
+**     unixepoch
+**     localtime
+**     utc
+**
+** Return 0 on success and 1 if there is any kind of error. If the error
+** is in a system call (i.e. localtime()), then an error message is written
+** to context pCtx. If the error is an unrecognized modifier, no error is
+** written to pCtx.
+*/
+static int parseModifier(sqlite3_context *pCtx, const char *zMod, DateTime *p){
+  int rc = 1;
+  int n;
+  double r;
+  char *z, zBuf[30];
+  z = zBuf;
+  for(n=0; n<ArraySize(zBuf)-1 && zMod[n]; n++){
+    z[n] = (char)sqlite3UpperToLower[(u8)zMod[n]];
+  }
+  z[n] = 0;
+  switch( z[0] ){
+#ifndef SQLITE_OMIT_LOCALTIME
+    case 'l': {
+      /*    localtime
+      **
+      ** Assuming the current time value is UTC (a.k.a. GMT), shift it to
+      ** show local time.
+      */
+      if( strcmp(z, "localtime")==0 ){
+        computeJD(p);
+        p->iJD += localtimeOffset(p, pCtx, &rc);
+        clearYMD_HMS_TZ(p);
+      }
+      break;
+    }
+#endif
+    case 'u': {
+      /*
+      **    unixepoch
+      **
+      ** Treat the current value of p->iJD as the number of
+      ** seconds since 1970.  Convert to a real julian day number.
+      */
+      if( strcmp(z, "unixepoch")==0 && p->validJD ){
+        p->iJD = (p->iJD + 43200)/86400 + 21086676*(i64)10000000;
+        clearYMD_HMS_TZ(p);
+        rc = 0;
+      }
+#ifndef SQLITE_OMIT_LOCALTIME
+      else if( strcmp(z, "utc")==0 ){
+        sqlite3_int64 c1;
+        computeJD(p);
+        c1 = localtimeOffset(p, pCtx, &rc);
+        if( rc==SQLITE_OK ){
+          p->iJD -= c1;
+          clearYMD_HMS_TZ(p);
+          p->iJD += c1 - localtimeOffset(p, pCtx, &rc);
+        }
+      }
+#endif
+      break;
+    }
+    case 'w': {
+      /*
+      **    weekday N
+      **
+      ** Move the date to the same time on the next occurrence of
+      ** weekday N where 0==Sunday, 1==Monday, and so forth.  If the
+      ** date is already on the appropriate weekday, this is a no-op.
+      */
+      if( strncmp(z, "weekday ", 8)==0
+               && sqlite3AtoF(&z[8], &r, sqlite3Strlen30(&z[8]), SQLITE_UTF8)
+               && (n=(int)r)==r && n>=0 && r<7 ){
+        sqlite3_int64 Z;
+        computeYMD_HMS(p);
+        p->validTZ = 0;
+        p->validJD = 0;
+        computeJD(p);
+        Z = ((p->iJD + 129600000)/86400000) % 7;
+        if( Z>n ) Z -= 7;
+        p->iJD += (n - Z)*86400000;
+        clearYMD_HMS_TZ(p);
+        rc = 0;
+      }
+      break;
+    }
+    case 's': {
+      /*
+      **    start of TTTTT
+      **
+      ** Move the date backwards to the beginning of the current day,
+      ** or month or year.
+      */
+      if( strncmp(z, "start of ", 9)!=0 ) break;
+      z += 9;
+      computeYMD(p);
+      p->validHMS = 1;
+      p->h = p->m = 0;
+      p->s = 0.0;
+      p->validTZ = 0;
+      p->validJD = 0;
+      if( strcmp(z,"month")==0 ){
+        p->D = 1;
+        rc = 0;
+      }else if( strcmp(z,"year")==0 ){
+        computeYMD(p);
+        p->M = 1;
+        p->D = 1;
+        rc = 0;
+      }else if( strcmp(z,"day")==0 ){
+        rc = 0;
+      }
+      break;
+    }
+    case '+':
+    case '-':
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9': {
+      double rRounder;
+      for(n=1; z[n] && z[n]!=':' && !sqlite3Isspace(z[n]); n++){}
+      if( !sqlite3AtoF(z, &r, n, SQLITE_UTF8) ){
+        rc = 1;
+        break;
+      }
+      if( z[n]==':' ){
+        /* A modifier of the form (+|-)HH:MM:SS.FFF adds (or subtracts) the
+        ** specified number of hours, minutes, seconds, and fractional seconds
+        ** to the time.  The ".FFF" may be omitted.  The ":SS.FFF" may be
+        ** omitted.
+        */
+        const char *z2 = z;
+        DateTime tx;
+        sqlite3_int64 day;
+        if( !sqlite3Isdigit(*z2) ) z2++;
+        memset(&tx, 0, sizeof(tx));
+        if( parseHhMmSs(z2, &tx) ) break;
+        computeJD(&tx);
+        tx.iJD -= 43200000;
+        day = tx.iJD/86400000;
+        tx.iJD -= day*86400000;
+        if( z[0]=='-' ) tx.iJD = -tx.iJD;
+        computeJD(p);
+        clearYMD_HMS_TZ(p);
+        p->iJD += tx.iJD;
+        rc = 0;
+        break;
+      }
+      z += n;
+      while( sqlite3Isspace(*z) ) z++;
+      n = sqlite3Strlen30(z);
+      if( n>10 || n<3 ) break;
+      if( z[n-1]=='s' ){ z[n-1] = 0; n--; }
+      computeJD(p);
+      rc = 0;
+      rRounder = r<0 ? -0.5 : +0.5;
+      if( n==3 && strcmp(z,"day")==0 ){
+        p->iJD += (sqlite3_int64)(r*86400000.0 + rRounder);
+      }else if( n==4 && strcmp(z,"hour")==0 ){
+        p->iJD += (sqlite3_int64)(r*(86400000.0/24.0) + rRounder);
+      }else if( n==6 && strcmp(z,"minute")==0 ){
+        p->iJD += (sqlite3_int64)(r*(86400000.0/(24.0*60.0)) + rRounder);
+      }else if( n==6 && strcmp(z,"second")==0 ){
+        p->iJD += (sqlite3_int64)(r*(86400000.0/(24.0*60.0*60.0)) + rRounder);
+      }else if( n==5 && strcmp(z,"month")==0 ){
+        int x, y;
+        computeYMD_HMS(p);
+        p->M += (int)r;
+        x = p->M>0 ? (p->M-1)/12 : (p->M-12)/12;
+        p->Y += x;
+        p->M -= x*12;
+        p->validJD = 0;
+        computeJD(p);
+        y = (int)r;
+        if( y!=r ){
+          p->iJD += (sqlite3_int64)((r - y)*30.0*86400000.0 + rRounder);
+        }
+      }else if( n==4 && strcmp(z,"year")==0 ){
+        int y = (int)r;
+        computeYMD_HMS(p);
+        p->Y += y;
+        p->validJD = 0;
+        computeJD(p);
+        if( y!=r ){
+          p->iJD += (sqlite3_int64)((r - y)*365.0*86400000.0 + rRounder);
+        }
+      }else{
+        rc = 1;
+      }
+      clearYMD_HMS_TZ(p);
+      break;
+    }
+    default: {
+      break;
+    }
+  }
+  return rc;
+}
+
+/*
+** Process time function arguments.  argv[0] is a date-time stamp.
+** argv[1] and following are modifiers.  Parse them all and write
+** the resulting time into the DateTime structure p.  Return 0
+** on success and 1 if there are any errors.
+**
+** If there are zero parameters (if even argv[0] is undefined)
+** then assume a default value of "now" for argv[0].
+*/
+static int isDate(
+  sqlite3_context *context, 
+  int argc, 
+  sqlite3_value **argv, 
+  DateTime *p
+){
+  int i;
+  const unsigned char *z;
+  int eType;
+  memset(p, 0, sizeof(*p));
+  if( argc==0 ){
+    return setDateTimeToCurrent(context, p);
+  }
+  if( (eType = sqlite3_value_type(argv[0]))==SQLITE_FLOAT
+                   || eType==SQLITE_INTEGER ){
+    p->iJD = (sqlite3_int64)(sqlite3_value_double(argv[0])*86400000.0 + 0.5);
+    p->validJD = 1;
+  }else{
+    z = sqlite3_value_text(argv[0]);
+    if( !z || parseDateOrTime(context, (char*)z, p) ){
+      return 1;
+    }
+  }
+  for(i=1; i<argc; i++){
+    z = sqlite3_value_text(argv[i]);
+    if( z==0 || parseModifier(context, (char*)z, p) ) return 1;
+  }
+  return 0;
+}
+
+
+/*
+** The following routines implement the various date and time functions
+** of SQLite.
+*/
+
+/*
+**    julianday( TIMESTRING, MOD, MOD, ...)
+**
+** Return the julian day number of the date specified in the arguments
+*/
+static void juliandayFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  DateTime x;
+  if( isDate(context, argc, argv, &x)==0 ){
+    computeJD(&x);
+    sqlite3_result_double(context, x.iJD/86400000.0);
+  }
+}
+
+/*
+**    datetime( TIMESTRING, MOD, MOD, ...)
+**
+** Return YYYY-MM-DD HH:MM:SS
+*/
+static void datetimeFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  DateTime x;
+  if( isDate(context, argc, argv, &x)==0 ){
+    char zBuf[100];
+    computeYMD_HMS(&x);
+    sqlite3_snprintf(sizeof(zBuf), zBuf, "%04d-%02d-%02d %02d:%02d:%02d",
+                     x.Y, x.M, x.D, x.h, x.m, (int)(x.s));
+    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+  }
+}
+
+/*
+**    time( TIMESTRING, MOD, MOD, ...)
+**
+** Return HH:MM:SS
+*/
+static void timeFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  DateTime x;
+  if( isDate(context, argc, argv, &x)==0 ){
+    char zBuf[100];
+    computeHMS(&x);
+    sqlite3_snprintf(sizeof(zBuf), zBuf, "%02d:%02d:%02d", x.h, x.m, (int)x.s);
+    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+  }
+}
+
+/*
+**    date( TIMESTRING, MOD, MOD, ...)
+**
+** Return YYYY-MM-DD
+*/
+static void dateFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  DateTime x;
+  if( isDate(context, argc, argv, &x)==0 ){
+    char zBuf[100];
+    computeYMD(&x);
+    sqlite3_snprintf(sizeof(zBuf), zBuf, "%04d-%02d-%02d", x.Y, x.M, x.D);
+    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+  }
+}
+
+/*
+**    strftime( FORMAT, TIMESTRING, MOD, MOD, ...)
+**
+** Return a string described by FORMAT.  Conversions as follows:
+**
+**   %d  day of month
+**   %f  ** fractional seconds  SS.SSS
+**   %H  hour 00-24
+**   %j  day of year 000-366
+**   %J  ** julian day number
+**   %m  month 01-12
+**   %M  minute 00-59
+**   %s  seconds since 1970-01-01
+**   %S  seconds 00-59
+**   %w  day of week 0-6  sunday==0
+**   %W  week of year 00-53
+**   %Y  year 0000-9999
+**   %%  %
+*/
+static void strftimeFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  DateTime x;
+  u64 n;
+  size_t i,j;
+  char *z;
+  sqlite3 *db;
+  const char *zFmt;
+  char zBuf[100];
+  if( argc==0 ) return;
+  zFmt = (const char*)sqlite3_value_text(argv[0]);
+  if( zFmt==0 || isDate(context, argc-1, argv+1, &x) ) return;
+  db = sqlite3_context_db_handle(context);
+  for(i=0, n=1; zFmt[i]; i++, n++){
+    if( zFmt[i]=='%' ){
+      switch( zFmt[i+1] ){
+        case 'd':
+        case 'H':
+        case 'm':
+        case 'M':
+        case 'S':
+        case 'W':
+          n++;
+          /* fall thru */
+        case 'w':
+        case '%':
+          break;
+        case 'f':
+          n += 8;
+          break;
+        case 'j':
+          n += 3;
+          break;
+        case 'Y':
+          n += 8;
+          break;
+        case 's':
+        case 'J':
+          n += 50;
+          break;
+        default:
+          return;  /* ERROR.  return a NULL */
+      }
+      i++;
+    }
+  }
+  testcase( n==sizeof(zBuf)-1 );
+  testcase( n==sizeof(zBuf) );
+  testcase( n==(u64)db->aLimit[SQLITE_LIMIT_LENGTH]+1 );
+  testcase( n==(u64)db->aLimit[SQLITE_LIMIT_LENGTH] );
+  if( n<sizeof(zBuf) ){
+    z = zBuf;
+  }else if( n>(u64)db->aLimit[SQLITE_LIMIT_LENGTH] ){
+    sqlite3_result_error_toobig(context);
+    return;
+  }else{
+    z = sqlite3DbMallocRaw(db, (int)n);
+    if( z==0 ){
+      sqlite3_result_error_nomem(context);
+      return;
+    }
+  }
+  computeJD(&x);
+  computeYMD_HMS(&x);
+  for(i=j=0; zFmt[i]; i++){
+    if( zFmt[i]!='%' ){
+      z[j++] = zFmt[i];
+    }else{
+      i++;
+      switch( zFmt[i] ){
+        case 'd':  sqlite3_snprintf(3, &z[j],"%02d",x.D); j+=2; break;
+        case 'f': {
+          double s = x.s;
+          if( s>59.999 ) s = 59.999;
+          sqlite3_snprintf(7, &z[j],"%06.3f", s);
+          j += sqlite3Strlen30(&z[j]);
+          break;
+        }
+        case 'H':  sqlite3_snprintf(3, &z[j],"%02d",x.h); j+=2; break;
+        case 'W': /* Fall thru */
+        case 'j': {
+          int nDay;             /* Number of days since 1st day of year */
+          DateTime y = x;
+          y.validJD = 0;
+          y.M = 1;
+          y.D = 1;
+          computeJD(&y);
+          nDay = (int)((x.iJD-y.iJD+43200000)/86400000);
+          if( zFmt[i]=='W' ){
+            int wd;   /* 0=Monday, 1=Tuesday, ... 6=Sunday */
+            wd = (int)(((x.iJD+43200000)/86400000)%7);
+            sqlite3_snprintf(3, &z[j],"%02d",(nDay+7-wd)/7);
+            j += 2;
+          }else{
+            sqlite3_snprintf(4, &z[j],"%03d",nDay+1);
+            j += 3;
+          }
+          break;
+        }
+        case 'J': {
+          sqlite3_snprintf(20, &z[j],"%.16g",x.iJD/86400000.0);
+          j+=sqlite3Strlen30(&z[j]);
+          break;
+        }
+        case 'm':  sqlite3_snprintf(3, &z[j],"%02d",x.M); j+=2; break;
+        case 'M':  sqlite3_snprintf(3, &z[j],"%02d",x.m); j+=2; break;
+        case 's': {
+          sqlite3_snprintf(30,&z[j],"%lld",
+                           (i64)(x.iJD/1000 - 21086676*(i64)10000));
+          j += sqlite3Strlen30(&z[j]);
+          break;
+        }
+        case 'S':  sqlite3_snprintf(3,&z[j],"%02d",(int)x.s); j+=2; break;
+        case 'w': {
+          z[j++] = (char)(((x.iJD+129600000)/86400000) % 7) + '0';
+          break;
+        }
+        case 'Y': {
+          sqlite3_snprintf(5,&z[j],"%04d",x.Y); j+=sqlite3Strlen30(&z[j]);
+          break;
+        }
+        default:   z[j++] = '%'; break;
+      }
+    }
+  }
+  z[j] = 0;
+  sqlite3_result_text(context, z, -1,
+                      z==zBuf ? SQLITE_TRANSIENT : SQLITE_DYNAMIC);
+}
+
+/*
+** current_time()
+**
+** This function returns the same value as time('now').
+*/
+static void ctimeFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **NotUsed2
+){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  timeFunc(context, 0, 0);
+}
+
+/*
+** current_date()
+**
+** This function returns the same value as date('now').
+*/
+static void cdateFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **NotUsed2
+){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  dateFunc(context, 0, 0);
+}
+
+/*
+** current_timestamp()
+**
+** This function returns the same value as datetime('now').
+*/
+static void ctimestampFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **NotUsed2
+){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  datetimeFunc(context, 0, 0);
+}
+#endif /* !defined(SQLITE_OMIT_DATETIME_FUNCS) */
+
+#ifdef SQLITE_OMIT_DATETIME_FUNCS
+/*
+** If the library is compiled to omit the full-scale date and time
+** handling (to get a smaller binary), the following minimal version
+** of the functions current_time(), current_date() and current_timestamp()
+** are included instead. This is to support column declarations that
+** include "DEFAULT CURRENT_TIME" etc.
+**
+** This function uses the C-library functions time(), gmtime()
+** and strftime(). The format string to pass to strftime() is supplied
+** as the user-data for the function.
+*/
+static void currentTimeFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  time_t t;
+  char *zFormat = (char *)sqlite3_user_data(context);
+  sqlite3 *db;
+  sqlite3_int64 iT;
+  struct tm *pTm;
+  struct tm sNow;
+  char zBuf[20];
+
+  UNUSED_PARAMETER(argc);
+  UNUSED_PARAMETER(argv);
+
+  iT = sqlite3StmtCurrentTime(context);
+  if( iT<=0 ) return;
+  t = iT/1000 - 10000*(sqlite3_int64)21086676;
+#if HAVE_GMTIME_R
+  pTm = gmtime_r(&t, &sNow);
+#else
+  sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+  pTm = gmtime(&t);
+  if( pTm ) memcpy(&sNow, pTm, sizeof(sNow));
+  sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+#endif
+  if( pTm ){
+    strftime(zBuf, 20, zFormat, &sNow);
+    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+  }
+}
+#endif
+
+/*
+** This function registered all of the above C functions as SQL
+** functions.  This should be the only routine in this file with
+** external linkage.
+*/
+SQLITE_PRIVATE void sqlite3RegisterDateTimeFunctions(void){
+  static SQLITE_WSD FuncDef aDateTimeFuncs[] = {
+#ifndef SQLITE_OMIT_DATETIME_FUNCS
+    FUNCTION(julianday,        -1, 0, 0, juliandayFunc ),
+    FUNCTION(date,             -1, 0, 0, dateFunc      ),
+    FUNCTION(time,             -1, 0, 0, timeFunc      ),
+    FUNCTION(datetime,         -1, 0, 0, datetimeFunc  ),
+    FUNCTION(strftime,         -1, 0, 0, strftimeFunc  ),
+    FUNCTION(current_time,      0, 0, 0, ctimeFunc     ),
+    FUNCTION(current_timestamp, 0, 0, 0, ctimestampFunc),
+    FUNCTION(current_date,      0, 0, 0, cdateFunc     ),
+#else
+    STR_FUNCTION(current_time,      0, "%H:%M:%S",          0, currentTimeFunc),
+    STR_FUNCTION(current_date,      0, "%Y-%m-%d",          0, currentTimeFunc),
+    STR_FUNCTION(current_timestamp, 0, "%Y-%m-%d %H:%M:%S", 0, currentTimeFunc),
+#endif
+  };
+  int i;
+  FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions);
+  FuncDef *aFunc = (FuncDef*)&GLOBAL(FuncDef, aDateTimeFuncs);
+
+  for(i=0; i<ArraySize(aDateTimeFuncs); i++){
+    sqlite3FuncDefInsert(pHash, &aFunc[i]);
+  }
+}
+
+/************** End of date.c ************************************************/
+/************** Begin file os.c **********************************************/
+/*
+** 2005 November 29
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains OS interface code that is common to all
+** architectures.
+*/
+#define _SQLITE_OS_C_ 1
+#undef _SQLITE_OS_C_
+
+/*
+** The default SQLite sqlite3_vfs implementations do not allocate
+** memory (actually, os_unix.c allocates a small amount of memory
+** from within OsOpen()), but some third-party implementations may.
+** So we test the effects of a malloc() failing and the sqlite3OsXXX()
+** function returning SQLITE_IOERR_NOMEM using the DO_OS_MALLOC_TEST macro.
+**
+** The following functions are instrumented for malloc() failure 
+** testing:
+**
+**     sqlite3OsRead()
+**     sqlite3OsWrite()
+**     sqlite3OsSync()
+**     sqlite3OsFileSize()
+**     sqlite3OsLock()
+**     sqlite3OsCheckReservedLock()
+**     sqlite3OsFileControl()
+**     sqlite3OsShmMap()
+**     sqlite3OsOpen()
+**     sqlite3OsDelete()
+**     sqlite3OsAccess()
+**     sqlite3OsFullPathname()
+**
+*/
+#if defined(SQLITE_TEST)
+SQLITE_API int sqlite3_memdebug_vfs_oom_test = 1;
+  #define DO_OS_MALLOC_TEST(x)                                       \
+  if (sqlite3_memdebug_vfs_oom_test && (!x || !sqlite3IsMemJournal(x))) {  \
+    void *pTstAlloc = sqlite3Malloc(10);                             \
+    if (!pTstAlloc) return SQLITE_IOERR_NOMEM;                       \
+    sqlite3_free(pTstAlloc);                                         \
+  }
+#else
+  #define DO_OS_MALLOC_TEST(x)
+#endif
+
+/*
+** The following routines are convenience wrappers around methods
+** of the sqlite3_file object.  This is mostly just syntactic sugar. All
+** of this would be completely automatic if SQLite were coded using
+** C++ instead of plain old C.
+*/
+SQLITE_PRIVATE int sqlite3OsClose(sqlite3_file *pId){
+  int rc = SQLITE_OK;
+  if( pId->pMethods ){
+    rc = pId->pMethods->xClose(pId);
+    pId->pMethods = 0;
+  }
+  return rc;
+}
+SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file *id, void *pBuf, int amt, i64 offset){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xRead(id, pBuf, amt, offset);
+}
+SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file *id, const void *pBuf, int amt, i64 offset){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xWrite(id, pBuf, amt, offset);
+}
+SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file *id, i64 size){
+  return id->pMethods->xTruncate(id, size);
+}
+SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file *id, int flags){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xSync(id, flags);
+}
+SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file *id, i64 *pSize){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xFileSize(id, pSize);
+}
+SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file *id, int lockType){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xLock(id, lockType);
+}
+SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file *id, int lockType){
+  return id->pMethods->xUnlock(id, lockType);
+}
+SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xCheckReservedLock(id, pResOut);
+}
+
+/*
+** Use sqlite3OsFileControl() when we are doing something that might fail
+** and we need to know about the failures.  Use sqlite3OsFileControlHint()
+** when simply tossing information over the wall to the VFS and we do not
+** really care if the VFS receives and understands the information since it
+** is only a hint and can be safely ignored.  The sqlite3OsFileControlHint()
+** routine has no return value since the return value would be meaningless.
+*/
+SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file *id, int op, void *pArg){
+#ifdef SQLITE_TEST
+  if( op!=SQLITE_FCNTL_COMMIT_PHASETWO ){
+    /* Faults are not injected into COMMIT_PHASETWO because, assuming SQLite
+    ** is using a regular VFS, it is called after the corresponding 
+    ** transaction has been committed. Injecting a fault at this point 
+    ** confuses the test scripts - the COMMIT comand returns SQLITE_NOMEM
+    ** but the transaction is committed anyway.
+    **
+    ** The core must call OsFileControl() though, not OsFileControlHint(),
+    ** as if a custom VFS (e.g. zipvfs) returns an error here, it probably
+    ** means the commit really has failed and an error should be returned
+    ** to the user.  */
+    DO_OS_MALLOC_TEST(id);
+  }
+#endif
+  return id->pMethods->xFileControl(id, op, pArg);
+}
+SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file *id, int op, void *pArg){
+  (void)id->pMethods->xFileControl(id, op, pArg);
+}
+
+SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id){
+  int (*xSectorSize)(sqlite3_file*) = id->pMethods->xSectorSize;
+  return (xSectorSize ? xSectorSize(id) : SQLITE_DEFAULT_SECTOR_SIZE);
+}
+SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id){
+  return id->pMethods->xDeviceCharacteristics(id);
+}
+SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){
+  return id->pMethods->xShmLock(id, offset, n, flags);
+}
+SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id){
+  id->pMethods->xShmBarrier(id);
+}
+SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int deleteFlag){
+  return id->pMethods->xShmUnmap(id, deleteFlag);
+}
+SQLITE_PRIVATE int sqlite3OsShmMap(
+  sqlite3_file *id,               /* Database file handle */
+  int iPage,
+  int pgsz,
+  int bExtend,                    /* True to extend file if necessary */
+  void volatile **pp              /* OUT: Pointer to mapping */
+){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp);
+}
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/* The real implementation of xFetch and xUnfetch */
+SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){
+  DO_OS_MALLOC_TEST(id);
+  return id->pMethods->xFetch(id, iOff, iAmt, pp);
+}
+SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){
+  return id->pMethods->xUnfetch(id, iOff, p);
+}
+#else
+/* No-op stubs to use when memory-mapped I/O is disabled */
+SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){
+  *pp = 0;
+  return SQLITE_OK;
+}
+SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){
+  return SQLITE_OK;
+}
+#endif
+
+/*
+** The next group of routines are convenience wrappers around the
+** VFS methods.
+*/
+SQLITE_PRIVATE int sqlite3OsOpen(
+  sqlite3_vfs *pVfs, 
+  const char *zPath, 
+  sqlite3_file *pFile, 
+  int flags, 
+  int *pFlagsOut
+){
+  int rc;
+  DO_OS_MALLOC_TEST(0);
+  /* 0x87f7f is a mask of SQLITE_OPEN_ flags that are valid to be passed
+  ** down into the VFS layer.  Some SQLITE_OPEN_ flags (for example,
+  ** SQLITE_OPEN_FULLMUTEX or SQLITE_OPEN_SHAREDCACHE) are blocked before
+  ** reaching the VFS. */
+  rc = pVfs->xOpen(pVfs, zPath, pFile, flags & 0x87f7f, pFlagsOut);
+  assert( rc==SQLITE_OK || pFile->pMethods==0 );
+  return rc;
+}
+SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
+  DO_OS_MALLOC_TEST(0);
+  assert( dirSync==0 || dirSync==1 );
+  return pVfs->xDelete(pVfs, zPath, dirSync);
+}
+SQLITE_PRIVATE int sqlite3OsAccess(
+  sqlite3_vfs *pVfs, 
+  const char *zPath, 
+  int flags, 
+  int *pResOut
+){
+  DO_OS_MALLOC_TEST(0);
+  return pVfs->xAccess(pVfs, zPath, flags, pResOut);
+}
+SQLITE_PRIVATE int sqlite3OsFullPathname(
+  sqlite3_vfs *pVfs, 
+  const char *zPath, 
+  int nPathOut, 
+  char *zPathOut
+){
+  DO_OS_MALLOC_TEST(0);
+  zPathOut[0] = 0;
+  return pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
+}
+#ifndef SQLITE_OMIT_LOAD_EXTENSION
+SQLITE_PRIVATE void *sqlite3OsDlOpen(sqlite3_vfs *pVfs, const char *zPath){
+  return pVfs->xDlOpen(pVfs, zPath);
+}
+SQLITE_PRIVATE void sqlite3OsDlError(sqlite3_vfs *pVfs, int nByte, char *zBufOut){
+  pVfs->xDlError(pVfs, nByte, zBufOut);
+}
+SQLITE_PRIVATE void (*sqlite3OsDlSym(sqlite3_vfs *pVfs, void *pHdle, const char *zSym))(void){
+  return pVfs->xDlSym(pVfs, pHdle, zSym);
+}
+SQLITE_PRIVATE void sqlite3OsDlClose(sqlite3_vfs *pVfs, void *pHandle){
+  pVfs->xDlClose(pVfs, pHandle);
+}
+#endif /* SQLITE_OMIT_LOAD_EXTENSION */
+SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *pVfs, int nByte, char *zBufOut){
+  return pVfs->xRandomness(pVfs, nByte, zBufOut);
+}
+SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *pVfs, int nMicro){
+  return pVfs->xSleep(pVfs, nMicro);
+}
+SQLITE_PRIVATE int sqlite3OsCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *pTimeOut){
+  int rc;
+  /* IMPLEMENTATION-OF: R-49045-42493 SQLite will use the xCurrentTimeInt64()
+  ** method to get the current date and time if that method is available
+  ** (if iVersion is 2 or greater and the function pointer is not NULL) and
+  ** will fall back to xCurrentTime() if xCurrentTimeInt64() is
+  ** unavailable.
+  */
+  if( pVfs->iVersion>=2 && pVfs->xCurrentTimeInt64 ){
+    rc = pVfs->xCurrentTimeInt64(pVfs, pTimeOut);
+  }else{
+    double r;
+    rc = pVfs->xCurrentTime(pVfs, &r);
+    *pTimeOut = (sqlite3_int64)(r*86400000.0);
+  }
+  return rc;
+}
+
+SQLITE_PRIVATE int sqlite3OsOpenMalloc(
+  sqlite3_vfs *pVfs, 
+  const char *zFile, 
+  sqlite3_file **ppFile, 
+  int flags,
+  int *pOutFlags
+){
+  int rc = SQLITE_NOMEM;
+  sqlite3_file *pFile;
+  pFile = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile);
+  if( pFile ){
+    rc = sqlite3OsOpen(pVfs, zFile, pFile, flags, pOutFlags);
+    if( rc!=SQLITE_OK ){
+      sqlite3_free(pFile);
+    }else{
+      *ppFile = pFile;
+    }
+  }
+  return rc;
+}
+SQLITE_PRIVATE int sqlite3OsCloseFree(sqlite3_file *pFile){
+  int rc = SQLITE_OK;
+  assert( pFile );
+  rc = sqlite3OsClose(pFile);
+  sqlite3_free(pFile);
+  return rc;
+}
+
+/*
+** This function is a wrapper around the OS specific implementation of
+** sqlite3_os_init(). The purpose of the wrapper is to provide the
+** ability to simulate a malloc failure, so that the handling of an
+** error in sqlite3_os_init() by the upper layers can be tested.
+*/
+SQLITE_PRIVATE int sqlite3OsInit(void){
+  void *p = sqlite3_malloc(10);
+  if( p==0 ) return SQLITE_NOMEM;
+  sqlite3_free(p);
+  return sqlite3_os_init();
+}
+
+/*
+** The list of all registered VFS implementations.
+*/
+static sqlite3_vfs * SQLITE_WSD vfsList = 0;
+#define vfsList GLOBAL(sqlite3_vfs *, vfsList)
+
+/*
+** Locate a VFS by name.  If no name is given, simply return the
+** first VFS on the list.
+*/
+SQLITE_API sqlite3_vfs *sqlite3_vfs_find(const char *zVfs){
+  sqlite3_vfs *pVfs = 0;
+#if SQLITE_THREADSAFE
+  sqlite3_mutex *mutex;
+#endif
+#ifndef SQLITE_OMIT_AUTOINIT
+  int rc = sqlite3_initialize();
+  if( rc ) return 0;
+#endif
+#if SQLITE_THREADSAFE
+  mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+#endif
+  sqlite3_mutex_enter(mutex);
+  for(pVfs = vfsList; pVfs; pVfs=pVfs->pNext){
+    if( zVfs==0 ) break;
+    if( strcmp(zVfs, pVfs->zName)==0 ) break;
+  }
+  sqlite3_mutex_leave(mutex);
+  return pVfs;
+}
+
+/*
+** Unlink a VFS from the linked list
+*/
+static void vfsUnlink(sqlite3_vfs *pVfs){
+  assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)) );
+  if( pVfs==0 ){
+    /* No-op */
+  }else if( vfsList==pVfs ){
+    vfsList = pVfs->pNext;
+  }else if( vfsList ){
+    sqlite3_vfs *p = vfsList;
+    while( p->pNext && p->pNext!=pVfs ){
+      p = p->pNext;
+    }
+    if( p->pNext==pVfs ){
+      p->pNext = pVfs->pNext;
+    }
+  }
+}
+
+/*
+** Register a VFS with the system.  It is harmless to register the same
+** VFS multiple times.  The new VFS becomes the default if makeDflt is
+** true.
+*/
+SQLITE_API int sqlite3_vfs_register(sqlite3_vfs *pVfs, int makeDflt){
+  MUTEX_LOGIC(sqlite3_mutex *mutex;)
+#ifndef SQLITE_OMIT_AUTOINIT
+  int rc = sqlite3_initialize();
+  if( rc ) return rc;
+#endif
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pVfs==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+
+  MUTEX_LOGIC( mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); )
+  sqlite3_mutex_enter(mutex);
+  vfsUnlink(pVfs);
+  if( makeDflt || vfsList==0 ){
+    pVfs->pNext = vfsList;
+    vfsList = pVfs;
+  }else{
+    pVfs->pNext = vfsList->pNext;
+    vfsList->pNext = pVfs;
+  }
+  assert(vfsList);
+  sqlite3_mutex_leave(mutex);
+  return SQLITE_OK;
+}
+
+/*
+** Unregister a VFS so that it is no longer accessible.
+*/
+SQLITE_API int sqlite3_vfs_unregister(sqlite3_vfs *pVfs){
+#if SQLITE_THREADSAFE
+  sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+#endif
+  sqlite3_mutex_enter(mutex);
+  vfsUnlink(pVfs);
+  sqlite3_mutex_leave(mutex);
+  return SQLITE_OK;
+}
+
+/************** End of os.c **************************************************/
+/************** Begin file fault.c *******************************************/
+/*
+** 2008 Jan 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains code to support the concept of "benign" 
+** malloc failures (when the xMalloc() or xRealloc() method of the
+** sqlite3_mem_methods structure fails to allocate a block of memory
+** and returns 0). 
+**
+** Most malloc failures are non-benign. After they occur, SQLite
+** abandons the current operation and returns an error code (usually
+** SQLITE_NOMEM) to the user. However, sometimes a fault is not necessarily
+** fatal. For example, if a malloc fails while resizing a hash table, this 
+** is completely recoverable simply by not carrying out the resize. The 
+** hash table will continue to function normally.  So a malloc failure 
+** during a hash table resize is a benign fault.
+*/
+
+
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+
+/*
+** Global variables.
+*/
+typedef struct BenignMallocHooks BenignMallocHooks;
+static SQLITE_WSD struct BenignMallocHooks {
+  void (*xBenignBegin)(void);
+  void (*xBenignEnd)(void);
+} sqlite3Hooks = { 0, 0 };
+
+/* The "wsdHooks" macro will resolve to the appropriate BenignMallocHooks
+** structure.  If writable static data is unsupported on the target,
+** we have to locate the state vector at run-time.  In the more common
+** case where writable static data is supported, wsdHooks can refer directly
+** to the "sqlite3Hooks" state vector declared above.
+*/
+#ifdef SQLITE_OMIT_WSD
+# define wsdHooksInit \
+  BenignMallocHooks *x = &GLOBAL(BenignMallocHooks,sqlite3Hooks)
+# define wsdHooks x[0]
+#else
+# define wsdHooksInit
+# define wsdHooks sqlite3Hooks
+#endif
+
+
+/*
+** Register hooks to call when sqlite3BeginBenignMalloc() and
+** sqlite3EndBenignMalloc() are called, respectively.
+*/
+SQLITE_PRIVATE void sqlite3BenignMallocHooks(
+  void (*xBenignBegin)(void),
+  void (*xBenignEnd)(void)
+){
+  wsdHooksInit;
+  wsdHooks.xBenignBegin = xBenignBegin;
+  wsdHooks.xBenignEnd = xBenignEnd;
+}
+
+/*
+** This (sqlite3EndBenignMalloc()) is called by SQLite code to indicate that
+** subsequent malloc failures are benign. A call to sqlite3EndBenignMalloc()
+** indicates that subsequent malloc failures are non-benign.
+*/
+SQLITE_PRIVATE void sqlite3BeginBenignMalloc(void){
+  wsdHooksInit;
+  if( wsdHooks.xBenignBegin ){
+    wsdHooks.xBenignBegin();
+  }
+}
+SQLITE_PRIVATE void sqlite3EndBenignMalloc(void){
+  wsdHooksInit;
+  if( wsdHooks.xBenignEnd ){
+    wsdHooks.xBenignEnd();
+  }
+}
+
+#endif   /* #ifndef SQLITE_OMIT_BUILTIN_TEST */
+
+/************** End of fault.c ***********************************************/
+/************** Begin file mem0.c ********************************************/
+/*
+** 2008 October 28
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains a no-op memory allocation drivers for use when
+** SQLITE_ZERO_MALLOC is defined.  The allocation drivers implemented
+** here always fail.  SQLite will not operate with these drivers.  These
+** are merely placeholders.  Real drivers must be substituted using
+** sqlite3_config() before SQLite will operate.
+*/
+
+/*
+** This version of the memory allocator is the default.  It is
+** used when no other memory allocator is specified using compile-time
+** macros.
+*/
+#ifdef SQLITE_ZERO_MALLOC
+
+/*
+** No-op versions of all memory allocation routines
+*/
+static void *sqlite3MemMalloc(int nByte){ return 0; }
+static void sqlite3MemFree(void *pPrior){ return; }
+static void *sqlite3MemRealloc(void *pPrior, int nByte){ return 0; }
+static int sqlite3MemSize(void *pPrior){ return 0; }
+static int sqlite3MemRoundup(int n){ return n; }
+static int sqlite3MemInit(void *NotUsed){ return SQLITE_OK; }
+static void sqlite3MemShutdown(void *NotUsed){ return; }
+
+/*
+** This routine is the only routine in this file with external linkage.
+**
+** Populate the low-level memory allocation function pointers in
+** sqlite3GlobalConfig.m with pointers to the routines in this file.
+*/
+SQLITE_PRIVATE void sqlite3MemSetDefault(void){
+  static const sqlite3_mem_methods defaultMethods = {
+     sqlite3MemMalloc,
+     sqlite3MemFree,
+     sqlite3MemRealloc,
+     sqlite3MemSize,
+     sqlite3MemRoundup,
+     sqlite3MemInit,
+     sqlite3MemShutdown,
+     0
+  };
+  sqlite3_config(SQLITE_CONFIG_MALLOC, &defaultMethods);
+}
+
+#endif /* SQLITE_ZERO_MALLOC */
+
+/************** End of mem0.c ************************************************/
+/************** Begin file mem1.c ********************************************/
+/*
+** 2007 August 14
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains low-level memory allocation drivers for when
+** SQLite will use the standard C-library malloc/realloc/free interface
+** to obtain the memory it needs.
+**
+** This file contains implementations of the low-level memory allocation
+** routines specified in the sqlite3_mem_methods object.  The content of
+** this file is only used if SQLITE_SYSTEM_MALLOC is defined.  The
+** SQLITE_SYSTEM_MALLOC macro is defined automatically if neither the
+** SQLITE_MEMDEBUG nor the SQLITE_WIN32_MALLOC macros are defined.  The
+** default configuration is to use memory allocation routines in this
+** file.
+**
+** C-preprocessor macro summary:
+**
+**    HAVE_MALLOC_USABLE_SIZE     The configure script sets this symbol if
+**                                the malloc_usable_size() interface exists
+**                                on the target platform.  Or, this symbol
+**                                can be set manually, if desired.
+**                                If an equivalent interface exists by
+**                                a different name, using a separate -D
+**                                option to rename it.
+**
+**    SQLITE_WITHOUT_ZONEMALLOC   Some older macs lack support for the zone
+**                                memory allocator.  Set this symbol to enable
+**                                building on older macs.
+**
+**    SQLITE_WITHOUT_MSIZE        Set this symbol to disable the use of
+**                                _msize() on windows systems.  This might
+**                                be necessary when compiling for Delphi,
+**                                for example.
+*/
+
+/*
+** This version of the memory allocator is the default.  It is
+** used when no other memory allocator is specified using compile-time
+** macros.
+*/
+#ifdef SQLITE_SYSTEM_MALLOC
+#if defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC)
+
+/*
+** Use the zone allocator available on apple products unless the
+** SQLITE_WITHOUT_ZONEMALLOC symbol is defined.
+*/
+#include <sys/sysctl.h>
+#include <malloc/malloc.h>
+#include <libkern/OSAtomic.h>
+static malloc_zone_t* _sqliteZone_;
+#define SQLITE_MALLOC(x) malloc_zone_malloc(_sqliteZone_, (x))
+#define SQLITE_FREE(x) malloc_zone_free(_sqliteZone_, (x));
+#define SQLITE_REALLOC(x,y) malloc_zone_realloc(_sqliteZone_, (x), (y))
+#define SQLITE_MALLOCSIZE(x) \
+        (_sqliteZone_ ? _sqliteZone_->size(_sqliteZone_,x) : malloc_size(x))
+
+#else /* if not __APPLE__ */
+
+/*
+** Use standard C library malloc and free on non-Apple systems.  
+** Also used by Apple systems if SQLITE_WITHOUT_ZONEMALLOC is defined.
+*/
+#define SQLITE_MALLOC(x)             malloc(x)
+#define SQLITE_FREE(x)               free(x)
+#define SQLITE_REALLOC(x,y)          realloc((x),(y))
+
+/*
+** The malloc.h header file is needed for malloc_usable_size() function
+** on some systems (e.g. Linux).
+*/
+#if HAVE_MALLOC_H && HAVE_MALLOC_USABLE_SIZE
+#  define SQLITE_USE_MALLOC_H 1
+#  define SQLITE_USE_MALLOC_USABLE_SIZE 1
+/*
+** The MSVCRT has malloc_usable_size(), but it is called _msize().  The
+** use of _msize() is automatic, but can be disabled by compiling with
+** -DSQLITE_WITHOUT_MSIZE.  Using the _msize() function also requires
+** the malloc.h header file.
+*/
+#elif defined(_MSC_VER) && !defined(SQLITE_WITHOUT_MSIZE)
+#  define SQLITE_USE_MALLOC_H
+#  define SQLITE_USE_MSIZE
+#endif
+
+/*
+** Include the malloc.h header file, if necessary.  Also set define macro
+** SQLITE_MALLOCSIZE to the appropriate function name, which is _msize()
+** for MSVC and malloc_usable_size() for most other systems (e.g. Linux).
+** The memory size function can always be overridden manually by defining
+** the macro SQLITE_MALLOCSIZE to the desired function name.
+*/
+#if defined(SQLITE_USE_MALLOC_H)
+#  include <malloc.h>
+#  if defined(SQLITE_USE_MALLOC_USABLE_SIZE)
+#    if !defined(SQLITE_MALLOCSIZE)
+#      define SQLITE_MALLOCSIZE(x)   malloc_usable_size(x)
+#    endif
+#  elif defined(SQLITE_USE_MSIZE)
+#    if !defined(SQLITE_MALLOCSIZE)
+#      define SQLITE_MALLOCSIZE      _msize
+#    endif
+#  endif
+#endif /* defined(SQLITE_USE_MALLOC_H) */
+
+#endif /* __APPLE__ or not __APPLE__ */
+
+/*
+** Like malloc(), but remember the size of the allocation
+** so that we can find it later using sqlite3MemSize().
+**
+** For this low-level routine, we are guaranteed that nByte>0 because
+** cases of nByte<=0 will be intercepted and dealt with by higher level
+** routines.
+*/
+static void *sqlite3MemMalloc(int nByte){
+#ifdef SQLITE_MALLOCSIZE
+  void *p = SQLITE_MALLOC( nByte );
+  if( p==0 ){
+    testcase( sqlite3GlobalConfig.xLog!=0 );
+    sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes of memory", nByte);
+  }
+  return p;
+#else
+  sqlite3_int64 *p;
+  assert( nByte>0 );
+  nByte = ROUND8(nByte);
+  p = SQLITE_MALLOC( nByte+8 );
+  if( p ){
+    p[0] = nByte;
+    p++;
+  }else{
+    testcase( sqlite3GlobalConfig.xLog!=0 );
+    sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes of memory", nByte);
+  }
+  return (void *)p;
+#endif
+}
+
+/*
+** Like free() but works for allocations obtained from sqlite3MemMalloc()
+** or sqlite3MemRealloc().
+**
+** For this low-level routine, we already know that pPrior!=0 since
+** cases where pPrior==0 will have been intecepted and dealt with
+** by higher-level routines.
+*/
+static void sqlite3MemFree(void *pPrior){
+#ifdef SQLITE_MALLOCSIZE
+  SQLITE_FREE(pPrior);
+#else
+  sqlite3_int64 *p = (sqlite3_int64*)pPrior;
+  assert( pPrior!=0 );
+  p--;
+  SQLITE_FREE(p);
+#endif
+}
+
+/*
+** Report the allocated size of a prior return from xMalloc()
+** or xRealloc().
+*/
+static int sqlite3MemSize(void *pPrior){
+#ifdef SQLITE_MALLOCSIZE
+  return pPrior ? (int)SQLITE_MALLOCSIZE(pPrior) : 0;
+#else
+  sqlite3_int64 *p;
+  if( pPrior==0 ) return 0;
+  p = (sqlite3_int64*)pPrior;
+  p--;
+  return (int)p[0];
+#endif
+}
+
+/*
+** Like realloc().  Resize an allocation previously obtained from
+** sqlite3MemMalloc().
+**
+** For this low-level interface, we know that pPrior!=0.  Cases where
+** pPrior==0 while have been intercepted by higher-level routine and
+** redirected to xMalloc.  Similarly, we know that nByte>0 because
+** cases where nByte<=0 will have been intercepted by higher-level
+** routines and redirected to xFree.
+*/
+static void *sqlite3MemRealloc(void *pPrior, int nByte){
+#ifdef SQLITE_MALLOCSIZE
+  void *p = SQLITE_REALLOC(pPrior, nByte);
+  if( p==0 ){
+    testcase( sqlite3GlobalConfig.xLog!=0 );
+    sqlite3_log(SQLITE_NOMEM,
+      "failed memory resize %u to %u bytes",
+      SQLITE_MALLOCSIZE(pPrior), nByte);
+  }
+  return p;
+#else
+  sqlite3_int64 *p = (sqlite3_int64*)pPrior;
+  assert( pPrior!=0 && nByte>0 );
+  assert( nByte==ROUND8(nByte) ); /* EV: R-46199-30249 */
+  p--;
+  p = SQLITE_REALLOC(p, nByte+8 );
+  if( p ){
+    p[0] = nByte;
+    p++;
+  }else{
+    testcase( sqlite3GlobalConfig.xLog!=0 );
+    sqlite3_log(SQLITE_NOMEM,
+      "failed memory resize %u to %u bytes",
+      sqlite3MemSize(pPrior), nByte);
+  }
+  return (void*)p;
+#endif
+}
+
+/*
+** Round up a request size to the next valid allocation size.
+*/
+static int sqlite3MemRoundup(int n){
+  return ROUND8(n);
+}
+
+/*
+** Initialize this module.
+*/
+static int sqlite3MemInit(void *NotUsed){
+#if defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC)
+  int cpuCount;
+  size_t len;
+  if( _sqliteZone_ ){
+    return SQLITE_OK;
+  }
+  len = sizeof(cpuCount);
+  /* One usually wants to use hw.acctivecpu for MT decisions, but not here */
+  sysctlbyname("hw.ncpu", &cpuCount, &len, NULL, 0);
+  if( cpuCount>1 ){
+    /* defer MT decisions to system malloc */
+    _sqliteZone_ = malloc_default_zone();
+  }else{
+    /* only 1 core, use our own zone to contention over global locks, 
+    ** e.g. we have our own dedicated locks */
+    bool success;
+    malloc_zone_t* newzone = malloc_create_zone(4096, 0);
+    malloc_set_zone_name(newzone, "Sqlite_Heap");
+    do{
+      success = OSAtomicCompareAndSwapPtrBarrier(NULL, newzone, 
+                                 (void * volatile *)&_sqliteZone_);
+    }while(!_sqliteZone_);
+    if( !success ){
+      /* somebody registered a zone first */
+      malloc_destroy_zone(newzone);
+    }
+  }
+#endif
+  UNUSED_PARAMETER(NotUsed);
+  return SQLITE_OK;
+}
+
+/*
+** Deinitialize this module.
+*/
+static void sqlite3MemShutdown(void *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  return;
+}
+
+/*
+** This routine is the only routine in this file with external linkage.
+**
+** Populate the low-level memory allocation function pointers in
+** sqlite3GlobalConfig.m with pointers to the routines in this file.
+*/
+SQLITE_PRIVATE void sqlite3MemSetDefault(void){
+  static const sqlite3_mem_methods defaultMethods = {
+     sqlite3MemMalloc,
+     sqlite3MemFree,
+     sqlite3MemRealloc,
+     sqlite3MemSize,
+     sqlite3MemRoundup,
+     sqlite3MemInit,
+     sqlite3MemShutdown,
+     0
+  };
+  sqlite3_config(SQLITE_CONFIG_MALLOC, &defaultMethods);
+}
+
+#endif /* SQLITE_SYSTEM_MALLOC */
+
+/************** End of mem1.c ************************************************/
+/************** Begin file mem2.c ********************************************/
+/*
+** 2007 August 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains low-level memory allocation drivers for when
+** SQLite will use the standard C-library malloc/realloc/free interface
+** to obtain the memory it needs while adding lots of additional debugging
+** information to each allocation in order to help detect and fix memory
+** leaks and memory usage errors.
+**
+** This file contains implementations of the low-level memory allocation
+** routines specified in the sqlite3_mem_methods object.
+*/
+
+/*
+** This version of the memory allocator is used only if the
+** SQLITE_MEMDEBUG macro is defined
+*/
+#ifdef SQLITE_MEMDEBUG
+
+/*
+** The backtrace functionality is only available with GLIBC
+*/
+#ifdef __GLIBC__
+  extern int backtrace(void**,int);
+  extern void backtrace_symbols_fd(void*const*,int,int);
+#else
+# define backtrace(A,B) 1
+# define backtrace_symbols_fd(A,B,C)
+#endif
+/* #include <stdio.h> */
+
+/*
+** Each memory allocation looks like this:
+**
+**  ------------------------------------------------------------------------
+**  | Title |  backtrace pointers |  MemBlockHdr |  allocation |  EndGuard |
+**  ------------------------------------------------------------------------
+**
+** The application code sees only a pointer to the allocation.  We have
+** to back up from the allocation pointer to find the MemBlockHdr.  The
+** MemBlockHdr tells us the size of the allocation and the number of
+** backtrace pointers.  There is also a guard word at the end of the
+** MemBlockHdr.
+*/
+struct MemBlockHdr {
+  i64 iSize;                          /* Size of this allocation */
+  struct MemBlockHdr *pNext, *pPrev;  /* Linked list of all unfreed memory */
+  char nBacktrace;                    /* Number of backtraces on this alloc */
+  char nBacktraceSlots;               /* Available backtrace slots */
+  u8 nTitle;                          /* Bytes of title; includes '\0' */
+  u8 eType;                           /* Allocation type code */
+  int iForeGuard;                     /* Guard word for sanity */
+};
+
+/*
+** Guard words
+*/
+#define FOREGUARD 0x80F5E153
+#define REARGUARD 0xE4676B53
+
+/*
+** Number of malloc size increments to track.
+*/
+#define NCSIZE  1000
+
+/*
+** All of the static variables used by this module are collected
+** into a single structure named "mem".  This is to keep the
+** static variables organized and to reduce namespace pollution
+** when this module is combined with other in the amalgamation.
+*/
+static struct {
+  
+  /*
+  ** Mutex to control access to the memory allocation subsystem.
+  */
+  sqlite3_mutex *mutex;
+
+  /*
+  ** Head and tail of a linked list of all outstanding allocations
+  */
+  struct MemBlockHdr *pFirst;
+  struct MemBlockHdr *pLast;
+  
+  /*
+  ** The number of levels of backtrace to save in new allocations.
+  */
+  int nBacktrace;
+  void (*xBacktrace)(int, int, void **);
+
+  /*
+  ** Title text to insert in front of each block
+  */
+  int nTitle;        /* Bytes of zTitle to save.  Includes '\0' and padding */
+  char zTitle[100];  /* The title text */
+
+  /* 
+  ** sqlite3MallocDisallow() increments the following counter.
+  ** sqlite3MallocAllow() decrements it.
+  */
+  int disallow; /* Do not allow memory allocation */
+
+  /*
+  ** Gather statistics on the sizes of memory allocations.
+  ** nAlloc[i] is the number of allocation attempts of i*8
+  ** bytes.  i==NCSIZE is the number of allocation attempts for
+  ** sizes more than NCSIZE*8 bytes.
+  */
+  int nAlloc[NCSIZE];      /* Total number of allocations */
+  int nCurrent[NCSIZE];    /* Current number of allocations */
+  int mxCurrent[NCSIZE];   /* Highwater mark for nCurrent */
+
+} mem;
+
+
+/*
+** Adjust memory usage statistics
+*/
+static void adjustStats(int iSize, int increment){
+  int i = ROUND8(iSize)/8;
+  if( i>NCSIZE-1 ){
+    i = NCSIZE - 1;
+  }
+  if( increment>0 ){
+    mem.nAlloc[i]++;
+    mem.nCurrent[i]++;
+    if( mem.nCurrent[i]>mem.mxCurrent[i] ){
+      mem.mxCurrent[i] = mem.nCurrent[i];
+    }
+  }else{
+    mem.nCurrent[i]--;
+    assert( mem.nCurrent[i]>=0 );
+  }
+}
+
+/*
+** Given an allocation, find the MemBlockHdr for that allocation.
+**
+** This routine checks the guards at either end of the allocation and
+** if they are incorrect it asserts.
+*/
+static struct MemBlockHdr *sqlite3MemsysGetHeader(void *pAllocation){
+  struct MemBlockHdr *p;
+  int *pInt;
+  u8 *pU8;
+  int nReserve;
+
+  p = (struct MemBlockHdr*)pAllocation;
+  p--;
+  assert( p->iForeGuard==(int)FOREGUARD );
+  nReserve = ROUND8(p->iSize);
+  pInt = (int*)pAllocation;
+  pU8 = (u8*)pAllocation;
+  assert( pInt[nReserve/sizeof(int)]==(int)REARGUARD );
+  /* This checks any of the "extra" bytes allocated due
+  ** to rounding up to an 8 byte boundary to ensure 
+  ** they haven't been overwritten.
+  */
+  while( nReserve-- > p->iSize ) assert( pU8[nReserve]==0x65 );
+  return p;
+}
+
+/*
+** Return the number of bytes currently allocated at address p.
+*/
+static int sqlite3MemSize(void *p){
+  struct MemBlockHdr *pHdr;
+  if( !p ){
+    return 0;
+  }
+  pHdr = sqlite3MemsysGetHeader(p);
+  return (int)pHdr->iSize;
+}
+
+/*
+** Initialize the memory allocation subsystem.
+*/
+static int sqlite3MemInit(void *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  assert( (sizeof(struct MemBlockHdr)&7) == 0 );
+  if( !sqlite3GlobalConfig.bMemstat ){
+    /* If memory status is enabled, then the malloc.c wrapper will already
+    ** hold the STATIC_MEM mutex when the routines here are invoked. */
+    mem.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Deinitialize the memory allocation subsystem.
+*/
+static void sqlite3MemShutdown(void *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  mem.mutex = 0;
+}
+
+/*
+** Round up a request size to the next valid allocation size.
+*/
+static int sqlite3MemRoundup(int n){
+  return ROUND8(n);
+}
+
+/*
+** Fill a buffer with pseudo-random bytes.  This is used to preset
+** the content of a new memory allocation to unpredictable values and
+** to clear the content of a freed allocation to unpredictable values.
+*/
+static void randomFill(char *pBuf, int nByte){
+  unsigned int x, y, r;
+  x = SQLITE_PTR_TO_INT(pBuf);
+  y = nByte | 1;
+  while( nByte >= 4 ){
+    x = (x>>1) ^ (-(int)(x&1) & 0xd0000001);
+    y = y*1103515245 + 12345;
+    r = x ^ y;
+    *(int*)pBuf = r;
+    pBuf += 4;
+    nByte -= 4;
+  }
+  while( nByte-- > 0 ){
+    x = (x>>1) ^ (-(int)(x&1) & 0xd0000001);
+    y = y*1103515245 + 12345;
+    r = x ^ y;
+    *(pBuf++) = r & 0xff;
+  }
+}
+
+/*
+** Allocate nByte bytes of memory.
+*/
+static void *sqlite3MemMalloc(int nByte){
+  struct MemBlockHdr *pHdr;
+  void **pBt;
+  char *z;
+  int *pInt;
+  void *p = 0;
+  int totalSize;
+  int nReserve;
+  sqlite3_mutex_enter(mem.mutex);
+  assert( mem.disallow==0 );
+  nReserve = ROUND8(nByte);
+  totalSize = nReserve + sizeof(*pHdr) + sizeof(int) +
+               mem.nBacktrace*sizeof(void*) + mem.nTitle;
+  p = malloc(totalSize);
+  if( p ){
+    z = p;
+    pBt = (void**)&z[mem.nTitle];
+    pHdr = (struct MemBlockHdr*)&pBt[mem.nBacktrace];
+    pHdr->pNext = 0;
+    pHdr->pPrev = mem.pLast;
+    if( mem.pLast ){
+      mem.pLast->pNext = pHdr;
+    }else{
+      mem.pFirst = pHdr;
+    }
+    mem.pLast = pHdr;
+    pHdr->iForeGuard = FOREGUARD;
+    pHdr->eType = MEMTYPE_HEAP;
+    pHdr->nBacktraceSlots = mem.nBacktrace;
+    pHdr->nTitle = mem.nTitle;
+    if( mem.nBacktrace ){
+      void *aAddr[40];
+      pHdr->nBacktrace = backtrace(aAddr, mem.nBacktrace+1)-1;
+      memcpy(pBt, &aAddr[1], pHdr->nBacktrace*sizeof(void*));
+      assert(pBt[0]);
+      if( mem.xBacktrace ){
+        mem.xBacktrace(nByte, pHdr->nBacktrace-1, &aAddr[1]);
+      }
+    }else{
+      pHdr->nBacktrace = 0;
+    }
+    if( mem.nTitle ){
+      memcpy(z, mem.zTitle, mem.nTitle);
+    }
+    pHdr->iSize = nByte;
+    adjustStats(nByte, +1);
+    pInt = (int*)&pHdr[1];
+    pInt[nReserve/sizeof(int)] = REARGUARD;
+    randomFill((char*)pInt, nByte);
+    memset(((char*)pInt)+nByte, 0x65, nReserve-nByte);
+    p = (void*)pInt;
+  }
+  sqlite3_mutex_leave(mem.mutex);
+  return p; 
+}
+
+/*
+** Free memory.
+*/
+static void sqlite3MemFree(void *pPrior){
+  struct MemBlockHdr *pHdr;
+  void **pBt;
+  char *z;
+  assert( sqlite3GlobalConfig.bMemstat || sqlite3GlobalConfig.bCoreMutex==0 
+       || mem.mutex!=0 );
+  pHdr = sqlite3MemsysGetHeader(pPrior);
+  pBt = (void**)pHdr;
+  pBt -= pHdr->nBacktraceSlots;
+  sqlite3_mutex_enter(mem.mutex);
+  if( pHdr->pPrev ){
+    assert( pHdr->pPrev->pNext==pHdr );
+    pHdr->pPrev->pNext = pHdr->pNext;
+  }else{
+    assert( mem.pFirst==pHdr );
+    mem.pFirst = pHdr->pNext;
+  }
+  if( pHdr->pNext ){
+    assert( pHdr->pNext->pPrev==pHdr );
+    pHdr->pNext->pPrev = pHdr->pPrev;
+  }else{
+    assert( mem.pLast==pHdr );
+    mem.pLast = pHdr->pPrev;
+  }
+  z = (char*)pBt;
+  z -= pHdr->nTitle;
+  adjustStats((int)pHdr->iSize, -1);
+  randomFill(z, sizeof(void*)*pHdr->nBacktraceSlots + sizeof(*pHdr) +
+                (int)pHdr->iSize + sizeof(int) + pHdr->nTitle);
+  free(z);
+  sqlite3_mutex_leave(mem.mutex);  
+}
+
+/*
+** Change the size of an existing memory allocation.
+**
+** For this debugging implementation, we *always* make a copy of the
+** allocation into a new place in memory.  In this way, if the 
+** higher level code is using pointer to the old allocation, it is 
+** much more likely to break and we are much more liking to find
+** the error.
+*/
+static void *sqlite3MemRealloc(void *pPrior, int nByte){
+  struct MemBlockHdr *pOldHdr;
+  void *pNew;
+  assert( mem.disallow==0 );
+  assert( (nByte & 7)==0 );     /* EV: R-46199-30249 */
+  pOldHdr = sqlite3MemsysGetHeader(pPrior);
+  pNew = sqlite3MemMalloc(nByte);
+  if( pNew ){
+    memcpy(pNew, pPrior, (int)(nByte<pOldHdr->iSize ? nByte : pOldHdr->iSize));
+    if( nByte>pOldHdr->iSize ){
+      randomFill(&((char*)pNew)[pOldHdr->iSize], nByte - (int)pOldHdr->iSize);
+    }
+    sqlite3MemFree(pPrior);
+  }
+  return pNew;
+}
+
+/*
+** Populate the low-level memory allocation function pointers in
+** sqlite3GlobalConfig.m with pointers to the routines in this file.
+*/
+SQLITE_PRIVATE void sqlite3MemSetDefault(void){
+  static const sqlite3_mem_methods defaultMethods = {
+     sqlite3MemMalloc,
+     sqlite3MemFree,
+     sqlite3MemRealloc,
+     sqlite3MemSize,
+     sqlite3MemRoundup,
+     sqlite3MemInit,
+     sqlite3MemShutdown,
+     0
+  };
+  sqlite3_config(SQLITE_CONFIG_MALLOC, &defaultMethods);
+}
+
+/*
+** Set the "type" of an allocation.
+*/
+SQLITE_PRIVATE void sqlite3MemdebugSetType(void *p, u8 eType){
+  if( p && sqlite3GlobalConfig.m.xMalloc==sqlite3MemMalloc ){
+    struct MemBlockHdr *pHdr;
+    pHdr = sqlite3MemsysGetHeader(p);
+    assert( pHdr->iForeGuard==FOREGUARD );
+    pHdr->eType = eType;
+  }
+}
+
+/*
+** Return TRUE if the mask of type in eType matches the type of the
+** allocation p.  Also return true if p==NULL.
+**
+** This routine is designed for use within an assert() statement, to
+** verify the type of an allocation.  For example:
+**
+**     assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) );
+*/
+SQLITE_PRIVATE int sqlite3MemdebugHasType(void *p, u8 eType){
+  int rc = 1;
+  if( p && sqlite3GlobalConfig.m.xMalloc==sqlite3MemMalloc ){
+    struct MemBlockHdr *pHdr;
+    pHdr = sqlite3MemsysGetHeader(p);
+    assert( pHdr->iForeGuard==FOREGUARD );         /* Allocation is valid */
+    if( (pHdr->eType&eType)==0 ){
+      rc = 0;
+    }
+  }
+  return rc;
+}
+
+/*
+** Return TRUE if the mask of type in eType matches no bits of the type of the
+** allocation p.  Also return true if p==NULL.
+**
+** This routine is designed for use within an assert() statement, to
+** verify the type of an allocation.  For example:
+**
+**     assert( sqlite3MemdebugNoType(p, MEMTYPE_LOOKASIDE) );
+*/
+SQLITE_PRIVATE int sqlite3MemdebugNoType(void *p, u8 eType){
+  int rc = 1;
+  if( p && sqlite3GlobalConfig.m.xMalloc==sqlite3MemMalloc ){
+    struct MemBlockHdr *pHdr;
+    pHdr = sqlite3MemsysGetHeader(p);
+    assert( pHdr->iForeGuard==FOREGUARD );         /* Allocation is valid */
+    if( (pHdr->eType&eType)!=0 ){
+      rc = 0;
+    }
+  }
+  return rc;
+}
+
+/*
+** Set the number of backtrace levels kept for each allocation.
+** A value of zero turns off backtracing.  The number is always rounded
+** up to a multiple of 2.
+*/
+SQLITE_PRIVATE void sqlite3MemdebugBacktrace(int depth){
+  if( depth<0 ){ depth = 0; }
+  if( depth>20 ){ depth = 20; }
+  depth = (depth+1)&0xfe;
+  mem.nBacktrace = depth;
+}
+
+SQLITE_PRIVATE void sqlite3MemdebugBacktraceCallback(void (*xBacktrace)(int, int, void **)){
+  mem.xBacktrace = xBacktrace;
+}
+
+/*
+** Set the title string for subsequent allocations.
+*/
+SQLITE_PRIVATE void sqlite3MemdebugSettitle(const char *zTitle){
+  unsigned int n = sqlite3Strlen30(zTitle) + 1;
+  sqlite3_mutex_enter(mem.mutex);
+  if( n>=sizeof(mem.zTitle) ) n = sizeof(mem.zTitle)-1;
+  memcpy(mem.zTitle, zTitle, n);
+  mem.zTitle[n] = 0;
+  mem.nTitle = ROUND8(n);
+  sqlite3_mutex_leave(mem.mutex);
+}
+
+SQLITE_PRIVATE void sqlite3MemdebugSync(){
+  struct MemBlockHdr *pHdr;
+  for(pHdr=mem.pFirst; pHdr; pHdr=pHdr->pNext){
+    void **pBt = (void**)pHdr;
+    pBt -= pHdr->nBacktraceSlots;
+    mem.xBacktrace((int)pHdr->iSize, pHdr->nBacktrace-1, &pBt[1]);
+  }
+}
+
+/*
+** Open the file indicated and write a log of all unfreed memory 
+** allocations into that log.
+*/
+SQLITE_PRIVATE void sqlite3MemdebugDump(const char *zFilename){
+  FILE *out;
+  struct MemBlockHdr *pHdr;
+  void **pBt;
+  int i;
+  out = fopen(zFilename, "w");
+  if( out==0 ){
+    fprintf(stderr, "** Unable to output memory debug output log: %s **\n",
+                    zFilename);
+    return;
+  }
+  for(pHdr=mem.pFirst; pHdr; pHdr=pHdr->pNext){
+    char *z = (char*)pHdr;
+    z -= pHdr->nBacktraceSlots*sizeof(void*) + pHdr->nTitle;
+    fprintf(out, "**** %lld bytes at %p from %s ****\n", 
+            pHdr->iSize, &pHdr[1], pHdr->nTitle ? z : "???");
+    if( pHdr->nBacktrace ){
+      fflush(out);
+      pBt = (void**)pHdr;
+      pBt -= pHdr->nBacktraceSlots;
+      backtrace_symbols_fd(pBt, pHdr->nBacktrace, fileno(out));
+      fprintf(out, "\n");
+    }
+  }
+  fprintf(out, "COUNTS:\n");
+  for(i=0; i<NCSIZE-1; i++){
+    if( mem.nAlloc[i] ){
+      fprintf(out, "   %5d: %10d %10d %10d\n", 
+            i*8, mem.nAlloc[i], mem.nCurrent[i], mem.mxCurrent[i]);
+    }
+  }
+  if( mem.nAlloc[NCSIZE-1] ){
+    fprintf(out, "   %5d: %10d %10d %10d\n",
+             NCSIZE*8-8, mem.nAlloc[NCSIZE-1],
+             mem.nCurrent[NCSIZE-1], mem.mxCurrent[NCSIZE-1]);
+  }
+  fclose(out);
+}
+
+/*
+** Return the number of times sqlite3MemMalloc() has been called.
+*/
+SQLITE_PRIVATE int sqlite3MemdebugMallocCount(){
+  int i;
+  int nTotal = 0;
+  for(i=0; i<NCSIZE; i++){
+    nTotal += mem.nAlloc[i];
+  }
+  return nTotal;
+}
+
+
+#endif /* SQLITE_MEMDEBUG */
+
+/************** End of mem2.c ************************************************/
+/************** Begin file mem3.c ********************************************/
+/*
+** 2007 October 14
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the C functions that implement a memory
+** allocation subsystem for use by SQLite. 
+**
+** This version of the memory allocation subsystem omits all
+** use of malloc(). The SQLite user supplies a block of memory
+** before calling sqlite3_initialize() from which allocations
+** are made and returned by the xMalloc() and xRealloc() 
+** implementations. Once sqlite3_initialize() has been called,
+** the amount of memory available to SQLite is fixed and cannot
+** be changed.
+**
+** This version of the memory allocation subsystem is included
+** in the build only if SQLITE_ENABLE_MEMSYS3 is defined.
+*/
+
+/*
+** This version of the memory allocator is only built into the library
+** SQLITE_ENABLE_MEMSYS3 is defined. Defining this symbol does not
+** mean that the library will use a memory-pool by default, just that
+** it is available. The mempool allocator is activated by calling
+** sqlite3_config().
+*/
+#ifdef SQLITE_ENABLE_MEMSYS3
+
+/*
+** Maximum size (in Mem3Blocks) of a "small" chunk.
+*/
+#define MX_SMALL 10
+
+
+/*
+** Number of freelist hash slots
+*/
+#define N_HASH  61
+
+/*
+** A memory allocation (also called a "chunk") consists of two or 
+** more blocks where each block is 8 bytes.  The first 8 bytes are 
+** a header that is not returned to the user.
+**
+** A chunk is two or more blocks that is either checked out or
+** free.  The first block has format u.hdr.  u.hdr.size4x is 4 times the
+** size of the allocation in blocks if the allocation is free.
+** The u.hdr.size4x&1 bit is true if the chunk is checked out and
+** false if the chunk is on the freelist.  The u.hdr.size4x&2 bit
+** is true if the previous chunk is checked out and false if the
+** previous chunk is free.  The u.hdr.prevSize field is the size of
+** the previous chunk in blocks if the previous chunk is on the
+** freelist. If the previous chunk is checked out, then
+** u.hdr.prevSize can be part of the data for that chunk and should
+** not be read or written.
+**
+** We often identify a chunk by its index in mem3.aPool[].  When
+** this is done, the chunk index refers to the second block of
+** the chunk.  In this way, the first chunk has an index of 1.
+** A chunk index of 0 means "no such chunk" and is the equivalent
+** of a NULL pointer.
+**
+** The second block of free chunks is of the form u.list.  The
+** two fields form a double-linked list of chunks of related sizes.
+** Pointers to the head of the list are stored in mem3.aiSmall[] 
+** for smaller chunks and mem3.aiHash[] for larger chunks.
+**
+** The second block of a chunk is user data if the chunk is checked 
+** out.  If a chunk is checked out, the user data may extend into
+** the u.hdr.prevSize value of the following chunk.
+*/
+typedef struct Mem3Block Mem3Block;
+struct Mem3Block {
+  union {
+    struct {
+      u32 prevSize;   /* Size of previous chunk in Mem3Block elements */
+      u32 size4x;     /* 4x the size of current chunk in Mem3Block elements */
+    } hdr;
+    struct {
+      u32 next;       /* Index in mem3.aPool[] of next free chunk */
+      u32 prev;       /* Index in mem3.aPool[] of previous free chunk */
+    } list;
+  } u;
+};
+
+/*
+** All of the static variables used by this module are collected
+** into a single structure named "mem3".  This is to keep the
+** static variables organized and to reduce namespace pollution
+** when this module is combined with other in the amalgamation.
+*/
+static SQLITE_WSD struct Mem3Global {
+  /*
+  ** Memory available for allocation. nPool is the size of the array
+  ** (in Mem3Blocks) pointed to by aPool less 2.
+  */
+  u32 nPool;
+  Mem3Block *aPool;
+
+  /*
+  ** True if we are evaluating an out-of-memory callback.
+  */
+  int alarmBusy;
+  
+  /*
+  ** Mutex to control access to the memory allocation subsystem.
+  */
+  sqlite3_mutex *mutex;
+  
+  /*
+  ** The minimum amount of free space that we have seen.
+  */
+  u32 mnMaster;
+
+  /*
+  ** iMaster is the index of the master chunk.  Most new allocations
+  ** occur off of this chunk.  szMaster is the size (in Mem3Blocks)
+  ** of the current master.  iMaster is 0 if there is not master chunk.
+  ** The master chunk is not in either the aiHash[] or aiSmall[].
+  */
+  u32 iMaster;
+  u32 szMaster;
+
+  /*
+  ** Array of lists of free blocks according to the block size 
+  ** for smaller chunks, or a hash on the block size for larger
+  ** chunks.
+  */
+  u32 aiSmall[MX_SMALL-1];   /* For sizes 2 through MX_SMALL, inclusive */
+  u32 aiHash[N_HASH];        /* For sizes MX_SMALL+1 and larger */
+} mem3 = { 97535575 };
+
+#define mem3 GLOBAL(struct Mem3Global, mem3)
+
+/*
+** Unlink the chunk at mem3.aPool[i] from list it is currently
+** on.  *pRoot is the list that i is a member of.
+*/
+static void memsys3UnlinkFromList(u32 i, u32 *pRoot){
+  u32 next = mem3.aPool[i].u.list.next;
+  u32 prev = mem3.aPool[i].u.list.prev;
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  if( prev==0 ){
+    *pRoot = next;
+  }else{
+    mem3.aPool[prev].u.list.next = next;
+  }
+  if( next ){
+    mem3.aPool[next].u.list.prev = prev;
+  }
+  mem3.aPool[i].u.list.next = 0;
+  mem3.aPool[i].u.list.prev = 0;
+}
+
+/*
+** Unlink the chunk at index i from 
+** whatever list is currently a member of.
+*/
+static void memsys3Unlink(u32 i){
+  u32 size, hash;
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  assert( (mem3.aPool[i-1].u.hdr.size4x & 1)==0 );
+  assert( i>=1 );
+  size = mem3.aPool[i-1].u.hdr.size4x/4;
+  assert( size==mem3.aPool[i+size-1].u.hdr.prevSize );
+  assert( size>=2 );
+  if( size <= MX_SMALL ){
+    memsys3UnlinkFromList(i, &mem3.aiSmall[size-2]);
+  }else{
+    hash = size % N_HASH;
+    memsys3UnlinkFromList(i, &mem3.aiHash[hash]);
+  }
+}
+
+/*
+** Link the chunk at mem3.aPool[i] so that is on the list rooted
+** at *pRoot.
+*/
+static void memsys3LinkIntoList(u32 i, u32 *pRoot){
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  mem3.aPool[i].u.list.next = *pRoot;
+  mem3.aPool[i].u.list.prev = 0;
+  if( *pRoot ){
+    mem3.aPool[*pRoot].u.list.prev = i;
+  }
+  *pRoot = i;
+}
+
+/*
+** Link the chunk at index i into either the appropriate
+** small chunk list, or into the large chunk hash table.
+*/
+static void memsys3Link(u32 i){
+  u32 size, hash;
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  assert( i>=1 );
+  assert( (mem3.aPool[i-1].u.hdr.size4x & 1)==0 );
+  size = mem3.aPool[i-1].u.hdr.size4x/4;
+  assert( size==mem3.aPool[i+size-1].u.hdr.prevSize );
+  assert( size>=2 );
+  if( size <= MX_SMALL ){
+    memsys3LinkIntoList(i, &mem3.aiSmall[size-2]);
+  }else{
+    hash = size % N_HASH;
+    memsys3LinkIntoList(i, &mem3.aiHash[hash]);
+  }
+}
+
+/*
+** If the STATIC_MEM mutex is not already held, obtain it now. The mutex
+** will already be held (obtained by code in malloc.c) if
+** sqlite3GlobalConfig.bMemStat is true.
+*/
+static void memsys3Enter(void){
+  if( sqlite3GlobalConfig.bMemstat==0 && mem3.mutex==0 ){
+    mem3.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM);
+  }
+  sqlite3_mutex_enter(mem3.mutex);
+}
+static void memsys3Leave(void){
+  sqlite3_mutex_leave(mem3.mutex);
+}
+
+/*
+** Called when we are unable to satisfy an allocation of nBytes.
+*/
+static void memsys3OutOfMemory(int nByte){
+  if( !mem3.alarmBusy ){
+    mem3.alarmBusy = 1;
+    assert( sqlite3_mutex_held(mem3.mutex) );
+    sqlite3_mutex_leave(mem3.mutex);
+    sqlite3_release_memory(nByte);
+    sqlite3_mutex_enter(mem3.mutex);
+    mem3.alarmBusy = 0;
+  }
+}
+
+
+/*
+** Chunk i is a free chunk that has been unlinked.  Adjust its 
+** size parameters for check-out and return a pointer to the 
+** user portion of the chunk.
+*/
+static void *memsys3Checkout(u32 i, u32 nBlock){
+  u32 x;
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  assert( i>=1 );
+  assert( mem3.aPool[i-1].u.hdr.size4x/4==nBlock );
+  assert( mem3.aPool[i+nBlock-1].u.hdr.prevSize==nBlock );
+  x = mem3.aPool[i-1].u.hdr.size4x;
+  mem3.aPool[i-1].u.hdr.size4x = nBlock*4 | 1 | (x&2);
+  mem3.aPool[i+nBlock-1].u.hdr.prevSize = nBlock;
+  mem3.aPool[i+nBlock-1].u.hdr.size4x |= 2;
+  return &mem3.aPool[i];
+}
+
+/*
+** Carve a piece off of the end of the mem3.iMaster free chunk.
+** Return a pointer to the new allocation.  Or, if the master chunk
+** is not large enough, return 0.
+*/
+static void *memsys3FromMaster(u32 nBlock){
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  assert( mem3.szMaster>=nBlock );
+  if( nBlock>=mem3.szMaster-1 ){
+    /* Use the entire master */
+    void *p = memsys3Checkout(mem3.iMaster, mem3.szMaster);
+    mem3.iMaster = 0;
+    mem3.szMaster = 0;
+    mem3.mnMaster = 0;
+    return p;
+  }else{
+    /* Split the master block.  Return the tail. */
+    u32 newi, x;
+    newi = mem3.iMaster + mem3.szMaster - nBlock;
+    assert( newi > mem3.iMaster+1 );
+    mem3.aPool[mem3.iMaster+mem3.szMaster-1].u.hdr.prevSize = nBlock;
+    mem3.aPool[mem3.iMaster+mem3.szMaster-1].u.hdr.size4x |= 2;
+    mem3.aPool[newi-1].u.hdr.size4x = nBlock*4 + 1;
+    mem3.szMaster -= nBlock;
+    mem3.aPool[newi-1].u.hdr.prevSize = mem3.szMaster;
+    x = mem3.aPool[mem3.iMaster-1].u.hdr.size4x & 2;
+    mem3.aPool[mem3.iMaster-1].u.hdr.size4x = mem3.szMaster*4 | x;
+    if( mem3.szMaster < mem3.mnMaster ){
+      mem3.mnMaster = mem3.szMaster;
+    }
+    return (void*)&mem3.aPool[newi];
+  }
+}
+
+/*
+** *pRoot is the head of a list of free chunks of the same size
+** or same size hash.  In other words, *pRoot is an entry in either
+** mem3.aiSmall[] or mem3.aiHash[].  
+**
+** This routine examines all entries on the given list and tries
+** to coalesce each entries with adjacent free chunks.  
+**
+** If it sees a chunk that is larger than mem3.iMaster, it replaces 
+** the current mem3.iMaster with the new larger chunk.  In order for
+** this mem3.iMaster replacement to work, the master chunk must be
+** linked into the hash tables.  That is not the normal state of
+** affairs, of course.  The calling routine must link the master
+** chunk before invoking this routine, then must unlink the (possibly
+** changed) master chunk once this routine has finished.
+*/
+static void memsys3Merge(u32 *pRoot){
+  u32 iNext, prev, size, i, x;
+
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  for(i=*pRoot; i>0; i=iNext){
+    iNext = mem3.aPool[i].u.list.next;
+    size = mem3.aPool[i-1].u.hdr.size4x;
+    assert( (size&1)==0 );
+    if( (size&2)==0 ){
+      memsys3UnlinkFromList(i, pRoot);
+      assert( i > mem3.aPool[i-1].u.hdr.prevSize );
+      prev = i - mem3.aPool[i-1].u.hdr.prevSize;
+      if( prev==iNext ){
+        iNext = mem3.aPool[prev].u.list.next;
+      }
+      memsys3Unlink(prev);
+      size = i + size/4 - prev;
+      x = mem3.aPool[prev-1].u.hdr.size4x & 2;
+      mem3.aPool[prev-1].u.hdr.size4x = size*4 | x;
+      mem3.aPool[prev+size-1].u.hdr.prevSize = size;
+      memsys3Link(prev);
+      i = prev;
+    }else{
+      size /= 4;
+    }
+    if( size>mem3.szMaster ){
+      mem3.iMaster = i;
+      mem3.szMaster = size;
+    }
+  }
+}
+
+/*
+** Return a block of memory of at least nBytes in size.
+** Return NULL if unable.
+**
+** This function assumes that the necessary mutexes, if any, are
+** already held by the caller. Hence "Unsafe".
+*/
+static void *memsys3MallocUnsafe(int nByte){
+  u32 i;
+  u32 nBlock;
+  u32 toFree;
+
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  assert( sizeof(Mem3Block)==8 );
+  if( nByte<=12 ){
+    nBlock = 2;
+  }else{
+    nBlock = (nByte + 11)/8;
+  }
+  assert( nBlock>=2 );
+
+  /* STEP 1:
+  ** Look for an entry of the correct size in either the small
+  ** chunk table or in the large chunk hash table.  This is
+  ** successful most of the time (about 9 times out of 10).
+  */
+  if( nBlock <= MX_SMALL ){
+    i = mem3.aiSmall[nBlock-2];
+    if( i>0 ){
+      memsys3UnlinkFromList(i, &mem3.aiSmall[nBlock-2]);
+      return memsys3Checkout(i, nBlock);
+    }
+  }else{
+    int hash = nBlock % N_HASH;
+    for(i=mem3.aiHash[hash]; i>0; i=mem3.aPool[i].u.list.next){
+      if( mem3.aPool[i-1].u.hdr.size4x/4==nBlock ){
+        memsys3UnlinkFromList(i, &mem3.aiHash[hash]);
+        return memsys3Checkout(i, nBlock);
+      }
+    }
+  }
+
+  /* STEP 2:
+  ** Try to satisfy the allocation by carving a piece off of the end
+  ** of the master chunk.  This step usually works if step 1 fails.
+  */
+  if( mem3.szMaster>=nBlock ){
+    return memsys3FromMaster(nBlock);
+  }
+
+
+  /* STEP 3:  
+  ** Loop through the entire memory pool.  Coalesce adjacent free
+  ** chunks.  Recompute the master chunk as the largest free chunk.
+  ** Then try again to satisfy the allocation by carving a piece off
+  ** of the end of the master chunk.  This step happens very
+  ** rarely (we hope!)
+  */
+  for(toFree=nBlock*16; toFree<(mem3.nPool*16); toFree *= 2){
+    memsys3OutOfMemory(toFree);
+    if( mem3.iMaster ){
+      memsys3Link(mem3.iMaster);
+      mem3.iMaster = 0;
+      mem3.szMaster = 0;
+    }
+    for(i=0; i<N_HASH; i++){
+      memsys3Merge(&mem3.aiHash[i]);
+    }
+    for(i=0; i<MX_SMALL-1; i++){
+      memsys3Merge(&mem3.aiSmall[i]);
+    }
+    if( mem3.szMaster ){
+      memsys3Unlink(mem3.iMaster);
+      if( mem3.szMaster>=nBlock ){
+        return memsys3FromMaster(nBlock);
+      }
+    }
+  }
+
+  /* If none of the above worked, then we fail. */
+  return 0;
+}
+
+/*
+** Free an outstanding memory allocation.
+**
+** This function assumes that the necessary mutexes, if any, are
+** already held by the caller. Hence "Unsafe".
+*/
+static void memsys3FreeUnsafe(void *pOld){
+  Mem3Block *p = (Mem3Block*)pOld;
+  int i;
+  u32 size, x;
+  assert( sqlite3_mutex_held(mem3.mutex) );
+  assert( p>mem3.aPool && p<&mem3.aPool[mem3.nPool] );
+  i = p - mem3.aPool;
+  assert( (mem3.aPool[i-1].u.hdr.size4x&1)==1 );
+  size = mem3.aPool[i-1].u.hdr.size4x/4;
+  assert( i+size<=mem3.nPool+1 );
+  mem3.aPool[i-1].u.hdr.size4x &= ~1;
+  mem3.aPool[i+size-1].u.hdr.prevSize = size;
+  mem3.aPool[i+size-1].u.hdr.size4x &= ~2;
+  memsys3Link(i);
+
+  /* Try to expand the master using the newly freed chunk */
+  if( mem3.iMaster ){
+    while( (mem3.aPool[mem3.iMaster-1].u.hdr.size4x&2)==0 ){
+      size = mem3.aPool[mem3.iMaster-1].u.hdr.prevSize;
+      mem3.iMaster -= size;
+      mem3.szMaster += size;
+      memsys3Unlink(mem3.iMaster);
+      x = mem3.aPool[mem3.iMaster-1].u.hdr.size4x & 2;
+      mem3.aPool[mem3.iMaster-1].u.hdr.size4x = mem3.szMaster*4 | x;
+      mem3.aPool[mem3.iMaster+mem3.szMaster-1].u.hdr.prevSize = mem3.szMaster;
+    }
+    x = mem3.aPool[mem3.iMaster-1].u.hdr.size4x & 2;
+    while( (mem3.aPool[mem3.iMaster+mem3.szMaster-1].u.hdr.size4x&1)==0 ){
+      memsys3Unlink(mem3.iMaster+mem3.szMaster);
+      mem3.szMaster += mem3.aPool[mem3.iMaster+mem3.szMaster-1].u.hdr.size4x/4;
+      mem3.aPool[mem3.iMaster-1].u.hdr.size4x = mem3.szMaster*4 | x;
+      mem3.aPool[mem3.iMaster+mem3.szMaster-1].u.hdr.prevSize = mem3.szMaster;
+    }
+  }
+}
+
+/*
+** Return the size of an outstanding allocation, in bytes.  The
+** size returned omits the 8-byte header overhead.  This only
+** works for chunks that are currently checked out.
+*/
+static int memsys3Size(void *p){
+  Mem3Block *pBlock;
+  if( p==0 ) return 0;
+  pBlock = (Mem3Block*)p;
+  assert( (pBlock[-1].u.hdr.size4x&1)!=0 );
+  return (pBlock[-1].u.hdr.size4x&~3)*2 - 4;
+}
+
+/*
+** Round up a request size to the next valid allocation size.
+*/
+static int memsys3Roundup(int n){
+  if( n<=12 ){
+    return 12;
+  }else{
+    return ((n+11)&~7) - 4;
+  }
+}
+
+/*
+** Allocate nBytes of memory.
+*/
+static void *memsys3Malloc(int nBytes){
+  sqlite3_int64 *p;
+  assert( nBytes>0 );          /* malloc.c filters out 0 byte requests */
+  memsys3Enter();
+  p = memsys3MallocUnsafe(nBytes);
+  memsys3Leave();
+  return (void*)p; 
+}
+
+/*
+** Free memory.
+*/
+static void memsys3Free(void *pPrior){
+  assert( pPrior );
+  memsys3Enter();
+  memsys3FreeUnsafe(pPrior);
+  memsys3Leave();
+}
+
+/*
+** Change the size of an existing memory allocation
+*/
+static void *memsys3Realloc(void *pPrior, int nBytes){
+  int nOld;
+  void *p;
+  if( pPrior==0 ){
+    return sqlite3_malloc(nBytes);
+  }
+  if( nBytes<=0 ){
+    sqlite3_free(pPrior);
+    return 0;
+  }
+  nOld = memsys3Size(pPrior);
+  if( nBytes<=nOld && nBytes>=nOld-128 ){
+    return pPrior;
+  }
+  memsys3Enter();
+  p = memsys3MallocUnsafe(nBytes);
+  if( p ){
+    if( nOld<nBytes ){
+      memcpy(p, pPrior, nOld);
+    }else{
+      memcpy(p, pPrior, nBytes);
+    }
+    memsys3FreeUnsafe(pPrior);
+  }
+  memsys3Leave();
+  return p;
+}
+
+/*
+** Initialize this module.
+*/
+static int memsys3Init(void *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  if( !sqlite3GlobalConfig.pHeap ){
+    return SQLITE_ERROR;
+  }
+
+  /* Store a pointer to the memory block in global structure mem3. */
+  assert( sizeof(Mem3Block)==8 );
+  mem3.aPool = (Mem3Block *)sqlite3GlobalConfig.pHeap;
+  mem3.nPool = (sqlite3GlobalConfig.nHeap / sizeof(Mem3Block)) - 2;
+
+  /* Initialize the master block. */
+  mem3.szMaster = mem3.nPool;
+  mem3.mnMaster = mem3.szMaster;
+  mem3.iMaster = 1;
+  mem3.aPool[0].u.hdr.size4x = (mem3.szMaster<<2) + 2;
+  mem3.aPool[mem3.nPool].u.hdr.prevSize = mem3.nPool;
+  mem3.aPool[mem3.nPool].u.hdr.size4x = 1;
+
+  return SQLITE_OK;
+}
+
+/*
+** Deinitialize this module.
+*/
+static void memsys3Shutdown(void *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  mem3.mutex = 0;
+  return;
+}
+
+
+
+/*
+** Open the file indicated and write a log of all unfreed memory 
+** allocations into that log.
+*/
+SQLITE_PRIVATE void sqlite3Memsys3Dump(const char *zFilename){
+#ifdef SQLITE_DEBUG
+  FILE *out;
+  u32 i, j;
+  u32 size;
+  if( zFilename==0 || zFilename[0]==0 ){
+    out = stdout;
+  }else{
+    out = fopen(zFilename, "w");
+    if( out==0 ){
+      fprintf(stderr, "** Unable to output memory debug output log: %s **\n",
+                      zFilename);
+      return;
+    }
+  }
+  memsys3Enter();
+  fprintf(out, "CHUNKS:\n");
+  for(i=1; i<=mem3.nPool; i+=size/4){
+    size = mem3.aPool[i-1].u.hdr.size4x;
+    if( size/4<=1 ){
+      fprintf(out, "%p size error\n", &mem3.aPool[i]);
+      assert( 0 );
+      break;
+    }
+    if( (size&1)==0 && mem3.aPool[i+size/4-1].u.hdr.prevSize!=size/4 ){
+      fprintf(out, "%p tail size does not match\n", &mem3.aPool[i]);
+      assert( 0 );
+      break;
+    }
+    if( ((mem3.aPool[i+size/4-1].u.hdr.size4x&2)>>1)!=(size&1) ){
+      fprintf(out, "%p tail checkout bit is incorrect\n", &mem3.aPool[i]);
+      assert( 0 );
+      break;
+    }
+    if( size&1 ){
+      fprintf(out, "%p %6d bytes checked out\n", &mem3.aPool[i], (size/4)*8-8);
+    }else{
+      fprintf(out, "%p %6d bytes free%s\n", &mem3.aPool[i], (size/4)*8-8,
+                  i==mem3.iMaster ? " **master**" : "");
+    }
+  }
+  for(i=0; i<MX_SMALL-1; i++){
+    if( mem3.aiSmall[i]==0 ) continue;
+    fprintf(out, "small(%2d):", i);
+    for(j = mem3.aiSmall[i]; j>0; j=mem3.aPool[j].u.list.next){
+      fprintf(out, " %p(%d)", &mem3.aPool[j],
+              (mem3.aPool[j-1].u.hdr.size4x/4)*8-8);
+    }
+    fprintf(out, "\n"); 
+  }
+  for(i=0; i<N_HASH; i++){
+    if( mem3.aiHash[i]==0 ) continue;
+    fprintf(out, "hash(%2d):", i);
+    for(j = mem3.aiHash[i]; j>0; j=mem3.aPool[j].u.list.next){
+      fprintf(out, " %p(%d)", &mem3.aPool[j],
+              (mem3.aPool[j-1].u.hdr.size4x/4)*8-8);
+    }
+    fprintf(out, "\n"); 
+  }
+  fprintf(out, "master=%d\n", mem3.iMaster);
+  fprintf(out, "nowUsed=%d\n", mem3.nPool*8 - mem3.szMaster*8);
+  fprintf(out, "mxUsed=%d\n", mem3.nPool*8 - mem3.mnMaster*8);
+  sqlite3_mutex_leave(mem3.mutex);
+  if( out==stdout ){
+    fflush(stdout);
+  }else{
+    fclose(out);
+  }
+#else
+  UNUSED_PARAMETER(zFilename);
+#endif
+}
+
+/*
+** This routine is the only routine in this file with external 
+** linkage.
+**
+** Populate the low-level memory allocation function pointers in
+** sqlite3GlobalConfig.m with pointers to the routines in this file. The
+** arguments specify the block of memory to manage.
+**
+** This routine is only called by sqlite3_config(), and therefore
+** is not required to be threadsafe (it is not).
+*/
+SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys3(void){
+  static const sqlite3_mem_methods mempoolMethods = {
+     memsys3Malloc,
+     memsys3Free,
+     memsys3Realloc,
+     memsys3Size,
+     memsys3Roundup,
+     memsys3Init,
+     memsys3Shutdown,
+     0
+  };
+  return &mempoolMethods;
+}
+
+#endif /* SQLITE_ENABLE_MEMSYS3 */
+
+/************** End of mem3.c ************************************************/
+/************** Begin file mem5.c ********************************************/
+/*
+** 2007 October 14
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the C functions that implement a memory
+** allocation subsystem for use by SQLite. 
+**
+** This version of the memory allocation subsystem omits all
+** use of malloc(). The application gives SQLite a block of memory
+** before calling sqlite3_initialize() from which allocations
+** are made and returned by the xMalloc() and xRealloc() 
+** implementations. Once sqlite3_initialize() has been called,
+** the amount of memory available to SQLite is fixed and cannot
+** be changed.
+**
+** This version of the memory allocation subsystem is included
+** in the build only if SQLITE_ENABLE_MEMSYS5 is defined.
+**
+** This memory allocator uses the following algorithm:
+**
+**   1.  All memory allocations sizes are rounded up to a power of 2.
+**
+**   2.  If two adjacent free blocks are the halves of a larger block,
+**       then the two blocks are coalesced into the single larger block.
+**
+**   3.  New memory is allocated from the first available free block.
+**
+** This algorithm is described in: J. M. Robson. "Bounds for Some Functions
+** Concerning Dynamic Storage Allocation". Journal of the Association for
+** Computing Machinery, Volume 21, Number 8, July 1974, pages 491-499.
+** 
+** Let n be the size of the largest allocation divided by the minimum
+** allocation size (after rounding all sizes up to a power of 2.)  Let M
+** be the maximum amount of memory ever outstanding at one time.  Let
+** N be the total amount of memory available for allocation.  Robson
+** proved that this memory allocator will never breakdown due to 
+** fragmentation as long as the following constraint holds:
+**
+**      N >=  M*(1 + log2(n)/2) - n + 1
+**
+** The sqlite3_status() logic tracks the maximum values of n and M so
+** that an application can, at any time, verify this constraint.
+*/
+
+/*
+** This version of the memory allocator is used only when 
+** SQLITE_ENABLE_MEMSYS5 is defined.
+*/
+#ifdef SQLITE_ENABLE_MEMSYS5
+
+/*
+** A minimum allocation is an instance of the following structure.
+** Larger allocations are an array of these structures where the
+** size of the array is a power of 2.
+**
+** The size of this object must be a power of two.  That fact is
+** verified in memsys5Init().
+*/
+typedef struct Mem5Link Mem5Link;
+struct Mem5Link {
+  int next;       /* Index of next free chunk */
+  int prev;       /* Index of previous free chunk */
+};
+
+/*
+** Maximum size of any allocation is ((1<<LOGMAX)*mem5.szAtom). Since
+** mem5.szAtom is always at least 8 and 32-bit integers are used,
+** it is not actually possible to reach this limit.
+*/
+#define LOGMAX 30
+
+/*
+** Masks used for mem5.aCtrl[] elements.
+*/
+#define CTRL_LOGSIZE  0x1f    /* Log2 Size of this block */
+#define CTRL_FREE     0x20    /* True if not checked out */
+
+/*
+** All of the static variables used by this module are collected
+** into a single structure named "mem5".  This is to keep the
+** static variables organized and to reduce namespace pollution
+** when this module is combined with other in the amalgamation.
+*/
+static SQLITE_WSD struct Mem5Global {
+  /*
+  ** Memory available for allocation
+  */
+  int szAtom;      /* Smallest possible allocation in bytes */
+  int nBlock;      /* Number of szAtom sized blocks in zPool */
+  u8 *zPool;       /* Memory available to be allocated */
+  
+  /*
+  ** Mutex to control access to the memory allocation subsystem.
+  */
+  sqlite3_mutex *mutex;
+
+  /*
+  ** Performance statistics
+  */
+  u64 nAlloc;         /* Total number of calls to malloc */
+  u64 totalAlloc;     /* Total of all malloc calls - includes internal frag */
+  u64 totalExcess;    /* Total internal fragmentation */
+  u32 currentOut;     /* Current checkout, including internal fragmentation */
+  u32 currentCount;   /* Current number of distinct checkouts */
+  u32 maxOut;         /* Maximum instantaneous currentOut */
+  u32 maxCount;       /* Maximum instantaneous currentCount */
+  u32 maxRequest;     /* Largest allocation (exclusive of internal frag) */
+  
+  /*
+  ** Lists of free blocks.  aiFreelist[0] is a list of free blocks of
+  ** size mem5.szAtom.  aiFreelist[1] holds blocks of size szAtom*2.
+  ** and so forth.
+  */
+  int aiFreelist[LOGMAX+1];
+
+  /*
+  ** Space for tracking which blocks are checked out and the size
+  ** of each block.  One byte per block.
+  */
+  u8 *aCtrl;
+
+} mem5;
+
+/*
+** Access the static variable through a macro for SQLITE_OMIT_WSD.
+*/
+#define mem5 GLOBAL(struct Mem5Global, mem5)
+
+/*
+** Assuming mem5.zPool is divided up into an array of Mem5Link
+** structures, return a pointer to the idx-th such link.
+*/
+#define MEM5LINK(idx) ((Mem5Link *)(&mem5.zPool[(idx)*mem5.szAtom]))
+
+/*
+** Unlink the chunk at mem5.aPool[i] from list it is currently
+** on.  It should be found on mem5.aiFreelist[iLogsize].
+*/
+static void memsys5Unlink(int i, int iLogsize){
+  int next, prev;
+  assert( i>=0 && i<mem5.nBlock );
+  assert( iLogsize>=0 && iLogsize<=LOGMAX );
+  assert( (mem5.aCtrl[i] & CTRL_LOGSIZE)==iLogsize );
+
+  next = MEM5LINK(i)->next;
+  prev = MEM5LINK(i)->prev;
+  if( prev<0 ){
+    mem5.aiFreelist[iLogsize] = next;
+  }else{
+    MEM5LINK(prev)->next = next;
+  }
+  if( next>=0 ){
+    MEM5LINK(next)->prev = prev;
+  }
+}
+
+/*
+** Link the chunk at mem5.aPool[i] so that is on the iLogsize
+** free list.
+*/
+static void memsys5Link(int i, int iLogsize){
+  int x;
+  assert( sqlite3_mutex_held(mem5.mutex) );
+  assert( i>=0 && i<mem5.nBlock );
+  assert( iLogsize>=0 && iLogsize<=LOGMAX );
+  assert( (mem5.aCtrl[i] & CTRL_LOGSIZE)==iLogsize );
+
+  x = MEM5LINK(i)->next = mem5.aiFreelist[iLogsize];
+  MEM5LINK(i)->prev = -1;
+  if( x>=0 ){
+    assert( x<mem5.nBlock );
+    MEM5LINK(x)->prev = i;
+  }
+  mem5.aiFreelist[iLogsize] = i;
+}
+
+/*
+** If the STATIC_MEM mutex is not already held, obtain it now. The mutex
+** will already be held (obtained by code in malloc.c) if
+** sqlite3GlobalConfig.bMemStat is true.
+*/
+static void memsys5Enter(void){
+  sqlite3_mutex_enter(mem5.mutex);
+}
+static void memsys5Leave(void){
+  sqlite3_mutex_leave(mem5.mutex);
+}
+
+/*
+** Return the size of an outstanding allocation, in bytes.  The
+** size returned omits the 8-byte header overhead.  This only
+** works for chunks that are currently checked out.
+*/
+static int memsys5Size(void *p){
+  int iSize = 0;
+  if( p ){
+    int i = (int)(((u8 *)p-mem5.zPool)/mem5.szAtom);
+    assert( i>=0 && i<mem5.nBlock );
+    iSize = mem5.szAtom * (1 << (mem5.aCtrl[i]&CTRL_LOGSIZE));
+  }
+  return iSize;
+}
+
+/*
+** Return a block of memory of at least nBytes in size.
+** Return NULL if unable.  Return NULL if nBytes==0.
+**
+** The caller guarantees that nByte is positive.
+**
+** The caller has obtained a mutex prior to invoking this
+** routine so there is never any chance that two or more
+** threads can be in this routine at the same time.
+*/
+static void *memsys5MallocUnsafe(int nByte){
+  int i;           /* Index of a mem5.aPool[] slot */
+  int iBin;        /* Index into mem5.aiFreelist[] */
+  int iFullSz;     /* Size of allocation rounded up to power of 2 */
+  int iLogsize;    /* Log2 of iFullSz/POW2_MIN */
+
+  /* nByte must be a positive */
+  assert( nByte>0 );
+
+  /* Keep track of the maximum allocation request.  Even unfulfilled
+  ** requests are counted */
+  if( (u32)nByte>mem5.maxRequest ){
+    mem5.maxRequest = nByte;
+  }
+
+  /* Abort if the requested allocation size is larger than the largest
+  ** power of two that we can represent using 32-bit signed integers.
+  */
+  if( nByte > 0x40000000 ){
+    return 0;
+  }
+
+  /* Round nByte up to the next valid power of two */
+  for(iFullSz=mem5.szAtom, iLogsize=0; iFullSz<nByte; iFullSz *= 2, iLogsize++){}
+
+  /* Make sure mem5.aiFreelist[iLogsize] contains at least one free
+  ** block.  If not, then split a block of the next larger power of
+  ** two in order to create a new free block of size iLogsize.
+  */
+  for(iBin=iLogsize; iBin<=LOGMAX && mem5.aiFreelist[iBin]<0; iBin++){}
+  if( iBin>LOGMAX ){
+    testcase( sqlite3GlobalConfig.xLog!=0 );
+    sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes", nByte);
+    return 0;
+  }
+  i = mem5.aiFreelist[iBin];
+  memsys5Unlink(i, iBin);
+  while( iBin>iLogsize ){
+    int newSize;
+
+    iBin--;
+    newSize = 1 << iBin;
+    mem5.aCtrl[i+newSize] = CTRL_FREE | iBin;
+    memsys5Link(i+newSize, iBin);
+  }
+  mem5.aCtrl[i] = iLogsize;
+
+  /* Update allocator performance statistics. */
+  mem5.nAlloc++;
+  mem5.totalAlloc += iFullSz;
+  mem5.totalExcess += iFullSz - nByte;
+  mem5.currentCount++;
+  mem5.currentOut += iFullSz;
+  if( mem5.maxCount<mem5.currentCount ) mem5.maxCount = mem5.currentCount;
+  if( mem5.maxOut<mem5.currentOut ) mem5.maxOut = mem5.currentOut;
+
+#ifdef SQLITE_DEBUG
+  /* Make sure the allocated memory does not assume that it is set to zero
+  ** or retains a value from a previous allocation */
+  memset(&mem5.zPool[i*mem5.szAtom], 0xAA, iFullSz);
+#endif
+
+  /* Return a pointer to the allocated memory. */
+  return (void*)&mem5.zPool[i*mem5.szAtom];
+}
+
+/*
+** Free an outstanding memory allocation.
+*/
+static void memsys5FreeUnsafe(void *pOld){
+  u32 size, iLogsize;
+  int iBlock;
+
+  /* Set iBlock to the index of the block pointed to by pOld in 
+  ** the array of mem5.szAtom byte blocks pointed to by mem5.zPool.
+  */
+  iBlock = (int)(((u8 *)pOld-mem5.zPool)/mem5.szAtom);
+
+  /* Check that the pointer pOld points to a valid, non-free block. */
+  assert( iBlock>=0 && iBlock<mem5.nBlock );
+  assert( ((u8 *)pOld-mem5.zPool)%mem5.szAtom==0 );
+  assert( (mem5.aCtrl[iBlock] & CTRL_FREE)==0 );
+
+  iLogsize = mem5.aCtrl[iBlock] & CTRL_LOGSIZE;
+  size = 1<<iLogsize;
+  assert( iBlock+size-1<(u32)mem5.nBlock );
+
+  mem5.aCtrl[iBlock] |= CTRL_FREE;
+  mem5.aCtrl[iBlock+size-1] |= CTRL_FREE;
+  assert( mem5.currentCount>0 );
+  assert( mem5.currentOut>=(size*mem5.szAtom) );
+  mem5.currentCount--;
+  mem5.currentOut -= size*mem5.szAtom;
+  assert( mem5.currentOut>0 || mem5.currentCount==0 );
+  assert( mem5.currentCount>0 || mem5.currentOut==0 );
+
+  mem5.aCtrl[iBlock] = CTRL_FREE | iLogsize;
+  while( ALWAYS(iLogsize<LOGMAX) ){
+    int iBuddy;
+    if( (iBlock>>iLogsize) & 1 ){
+      iBuddy = iBlock - size;
+    }else{
+      iBuddy = iBlock + size;
+    }
+    assert( iBuddy>=0 );
+    if( (iBuddy+(1<<iLogsize))>mem5.nBlock ) break;
+    if( mem5.aCtrl[iBuddy]!=(CTRL_FREE | iLogsize) ) break;
+    memsys5Unlink(iBuddy, iLogsize);
+    iLogsize++;
+    if( iBuddy<iBlock ){
+      mem5.aCtrl[iBuddy] = CTRL_FREE | iLogsize;
+      mem5.aCtrl[iBlock] = 0;
+      iBlock = iBuddy;
+    }else{
+      mem5.aCtrl[iBlock] = CTRL_FREE | iLogsize;
+      mem5.aCtrl[iBuddy] = 0;
+    }
+    size *= 2;
+  }
+
+#ifdef SQLITE_DEBUG
+  /* Overwrite freed memory with the 0x55 bit pattern to verify that it is
+  ** not used after being freed */
+  memset(&mem5.zPool[iBlock*mem5.szAtom], 0x55, size);
+#endif
+
+  memsys5Link(iBlock, iLogsize);
+}
+
+/*
+** Allocate nBytes of memory.
+*/
+static void *memsys5Malloc(int nBytes){
+  sqlite3_int64 *p = 0;
+  if( nBytes>0 ){
+    memsys5Enter();
+    p = memsys5MallocUnsafe(nBytes);
+    memsys5Leave();
+  }
+  return (void*)p; 
+}
+
+/*
+** Free memory.
+**
+** The outer layer memory allocator prevents this routine from
+** being called with pPrior==0.
+*/
+static void memsys5Free(void *pPrior){
+  assert( pPrior!=0 );
+  memsys5Enter();
+  memsys5FreeUnsafe(pPrior);
+  memsys5Leave();  
+}
+
+/*
+** Change the size of an existing memory allocation.
+**
+** The outer layer memory allocator prevents this routine from
+** being called with pPrior==0.  
+**
+** nBytes is always a value obtained from a prior call to
+** memsys5Round().  Hence nBytes is always a non-negative power
+** of two.  If nBytes==0 that means that an oversize allocation
+** (an allocation larger than 0x40000000) was requested and this
+** routine should return 0 without freeing pPrior.
+*/
+static void *memsys5Realloc(void *pPrior, int nBytes){
+  int nOld;
+  void *p;
+  assert( pPrior!=0 );
+  assert( (nBytes&(nBytes-1))==0 );  /* EV: R-46199-30249 */
+  assert( nBytes>=0 );
+  if( nBytes==0 ){
+    return 0;
+  }
+  nOld = memsys5Size(pPrior);
+  if( nBytes<=nOld ){
+    return pPrior;
+  }
+  memsys5Enter();
+  p = memsys5MallocUnsafe(nBytes);
+  if( p ){
+    memcpy(p, pPrior, nOld);
+    memsys5FreeUnsafe(pPrior);
+  }
+  memsys5Leave();
+  return p;
+}
+
+/*
+** Round up a request size to the next valid allocation size.  If
+** the allocation is too large to be handled by this allocation system,
+** return 0.
+**
+** All allocations must be a power of two and must be expressed by a
+** 32-bit signed integer.  Hence the largest allocation is 0x40000000
+** or 1073741824 bytes.
+*/
+static int memsys5Roundup(int n){
+  int iFullSz;
+  if( n > 0x40000000 ) return 0;
+  for(iFullSz=mem5.szAtom; iFullSz<n; iFullSz *= 2);
+  return iFullSz;
+}
+
+/*
+** Return the ceiling of the logarithm base 2 of iValue.
+**
+** Examples:   memsys5Log(1) -> 0
+**             memsys5Log(2) -> 1
+**             memsys5Log(4) -> 2
+**             memsys5Log(5) -> 3
+**             memsys5Log(8) -> 3
+**             memsys5Log(9) -> 4
+*/
+static int memsys5Log(int iValue){
+  int iLog;
+  for(iLog=0; (iLog<(int)((sizeof(int)*8)-1)) && (1<<iLog)<iValue; iLog++);
+  return iLog;
+}
+
+/*
+** Initialize the memory allocator.
+**
+** This routine is not threadsafe.  The caller must be holding a mutex
+** to prevent multiple threads from entering at the same time.
+*/
+static int memsys5Init(void *NotUsed){
+  int ii;            /* Loop counter */
+  int nByte;         /* Number of bytes of memory available to this allocator */
+  u8 *zByte;         /* Memory usable by this allocator */
+  int nMinLog;       /* Log base 2 of minimum allocation size in bytes */
+  int iOffset;       /* An offset into mem5.aCtrl[] */
+
+  UNUSED_PARAMETER(NotUsed);
+
+  /* For the purposes of this routine, disable the mutex */
+  mem5.mutex = 0;
+
+  /* The size of a Mem5Link object must be a power of two.  Verify that
+  ** this is case.
+  */
+  assert( (sizeof(Mem5Link)&(sizeof(Mem5Link)-1))==0 );
+
+  nByte = sqlite3GlobalConfig.nHeap;
+  zByte = (u8*)sqlite3GlobalConfig.pHeap;
+  assert( zByte!=0 );  /* sqlite3_config() does not allow otherwise */
+
+  /* boundaries on sqlite3GlobalConfig.mnReq are enforced in sqlite3_config() */
+  nMinLog = memsys5Log(sqlite3GlobalConfig.mnReq);
+  mem5.szAtom = (1<<nMinLog);
+  while( (int)sizeof(Mem5Link)>mem5.szAtom ){
+    mem5.szAtom = mem5.szAtom << 1;
+  }
+
+  mem5.nBlock = (nByte / (mem5.szAtom+sizeof(u8)));
+  mem5.zPool = zByte;
+  mem5.aCtrl = (u8 *)&mem5.zPool[mem5.nBlock*mem5.szAtom];
+
+  for(ii=0; ii<=LOGMAX; ii++){
+    mem5.aiFreelist[ii] = -1;
+  }
+
+  iOffset = 0;
+  for(ii=LOGMAX; ii>=0; ii--){
+    int nAlloc = (1<<ii);
+    if( (iOffset+nAlloc)<=mem5.nBlock ){
+      mem5.aCtrl[iOffset] = ii | CTRL_FREE;
+      memsys5Link(iOffset, ii);
+      iOffset += nAlloc;
+    }
+    assert((iOffset+nAlloc)>mem5.nBlock);
+  }
+
+  /* If a mutex is required for normal operation, allocate one */
+  if( sqlite3GlobalConfig.bMemstat==0 ){
+    mem5.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM);
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Deinitialize this module.
+*/
+static void memsys5Shutdown(void *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  mem5.mutex = 0;
+  return;
+}
+
+#ifdef SQLITE_TEST
+/*
+** Open the file indicated and write a log of all unfreed memory 
+** allocations into that log.
+*/
+SQLITE_PRIVATE void sqlite3Memsys5Dump(const char *zFilename){
+  FILE *out;
+  int i, j, n;
+  int nMinLog;
+
+  if( zFilename==0 || zFilename[0]==0 ){
+    out = stdout;
+  }else{
+    out = fopen(zFilename, "w");
+    if( out==0 ){
+      fprintf(stderr, "** Unable to output memory debug output log: %s **\n",
+                      zFilename);
+      return;
+    }
+  }
+  memsys5Enter();
+  nMinLog = memsys5Log(mem5.szAtom);
+  for(i=0; i<=LOGMAX && i+nMinLog<32; i++){
+    for(n=0, j=mem5.aiFreelist[i]; j>=0; j = MEM5LINK(j)->next, n++){}
+    fprintf(out, "freelist items of size %d: %d\n", mem5.szAtom << i, n);
+  }
+  fprintf(out, "mem5.nAlloc       = %llu\n", mem5.nAlloc);
+  fprintf(out, "mem5.totalAlloc   = %llu\n", mem5.totalAlloc);
+  fprintf(out, "mem5.totalExcess  = %llu\n", mem5.totalExcess);
+  fprintf(out, "mem5.currentOut   = %u\n", mem5.currentOut);
+  fprintf(out, "mem5.currentCount = %u\n", mem5.currentCount);
+  fprintf(out, "mem5.maxOut       = %u\n", mem5.maxOut);
+  fprintf(out, "mem5.maxCount     = %u\n", mem5.maxCount);
+  fprintf(out, "mem5.maxRequest   = %u\n", mem5.maxRequest);
+  memsys5Leave();
+  if( out==stdout ){
+    fflush(stdout);
+  }else{
+    fclose(out);
+  }
+}
+#endif
+
+/*
+** This routine is the only routine in this file with external 
+** linkage. It returns a pointer to a static sqlite3_mem_methods
+** struct populated with the memsys5 methods.
+*/
+SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys5(void){
+  static const sqlite3_mem_methods memsys5Methods = {
+     memsys5Malloc,
+     memsys5Free,
+     memsys5Realloc,
+     memsys5Size,
+     memsys5Roundup,
+     memsys5Init,
+     memsys5Shutdown,
+     0
+  };
+  return &memsys5Methods;
+}
+
+#endif /* SQLITE_ENABLE_MEMSYS5 */
+
+/************** End of mem5.c ************************************************/
+/************** Begin file mutex.c *******************************************/
+/*
+** 2007 August 14
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the C functions that implement mutexes.
+**
+** This file contains code that is common across all mutex implementations.
+*/
+
+#if defined(SQLITE_DEBUG) && !defined(SQLITE_MUTEX_OMIT)
+/*
+** For debugging purposes, record when the mutex subsystem is initialized
+** and uninitialized so that we can assert() if there is an attempt to
+** allocate a mutex while the system is uninitialized.
+*/
+static SQLITE_WSD int mutexIsInit = 0;
+#endif /* SQLITE_DEBUG */
+
+
+#ifndef SQLITE_MUTEX_OMIT
+/*
+** Initialize the mutex system.
+*/
+SQLITE_PRIVATE int sqlite3MutexInit(void){ 
+  int rc = SQLITE_OK;
+  if( !sqlite3GlobalConfig.mutex.xMutexAlloc ){
+    /* If the xMutexAlloc method has not been set, then the user did not
+    ** install a mutex implementation via sqlite3_config() prior to 
+    ** sqlite3_initialize() being called. This block copies pointers to
+    ** the default implementation into the sqlite3GlobalConfig structure.
+    */
+    sqlite3_mutex_methods const *pFrom;
+    sqlite3_mutex_methods *pTo = &sqlite3GlobalConfig.mutex;
+
+    if( sqlite3GlobalConfig.bCoreMutex ){
+      pFrom = sqlite3DefaultMutex();
+    }else{
+      pFrom = sqlite3NoopMutex();
+    }
+    memcpy(pTo, pFrom, offsetof(sqlite3_mutex_methods, xMutexAlloc));
+    memcpy(&pTo->xMutexFree, &pFrom->xMutexFree,
+           sizeof(*pTo) - offsetof(sqlite3_mutex_methods, xMutexFree));
+    pTo->xMutexAlloc = pFrom->xMutexAlloc;
+  }
+  rc = sqlite3GlobalConfig.mutex.xMutexInit();
+
+#ifdef SQLITE_DEBUG
+  GLOBAL(int, mutexIsInit) = 1;
+#endif
+
+  return rc;
+}
+
+/*
+** Shutdown the mutex system. This call frees resources allocated by
+** sqlite3MutexInit().
+*/
+SQLITE_PRIVATE int sqlite3MutexEnd(void){
+  int rc = SQLITE_OK;
+  if( sqlite3GlobalConfig.mutex.xMutexEnd ){
+    rc = sqlite3GlobalConfig.mutex.xMutexEnd();
+  }
+
+#ifdef SQLITE_DEBUG
+  GLOBAL(int, mutexIsInit) = 0;
+#endif
+
+  return rc;
+}
+
+/*
+** Retrieve a pointer to a static mutex or allocate a new dynamic one.
+*/
+SQLITE_API sqlite3_mutex *sqlite3_mutex_alloc(int id){
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( id<=SQLITE_MUTEX_RECURSIVE && sqlite3_initialize() ) return 0;
+  if( id>SQLITE_MUTEX_RECURSIVE && sqlite3MutexInit() ) return 0;
+#endif
+  return sqlite3GlobalConfig.mutex.xMutexAlloc(id);
+}
+
+SQLITE_PRIVATE sqlite3_mutex *sqlite3MutexAlloc(int id){
+  if( !sqlite3GlobalConfig.bCoreMutex ){
+    return 0;
+  }
+  assert( GLOBAL(int, mutexIsInit) );
+  return sqlite3GlobalConfig.mutex.xMutexAlloc(id);
+}
+
+/*
+** Free a dynamic mutex.
+*/
+SQLITE_API void sqlite3_mutex_free(sqlite3_mutex *p){
+  if( p ){
+    sqlite3GlobalConfig.mutex.xMutexFree(p);
+  }
+}
+
+/*
+** Obtain the mutex p. If some other thread already has the mutex, block
+** until it can be obtained.
+*/
+SQLITE_API void sqlite3_mutex_enter(sqlite3_mutex *p){
+  if( p ){
+    sqlite3GlobalConfig.mutex.xMutexEnter(p);
+  }
+}
+
+/*
+** Obtain the mutex p. If successful, return SQLITE_OK. Otherwise, if another
+** thread holds the mutex and it cannot be obtained, return SQLITE_BUSY.
+*/
+SQLITE_API int sqlite3_mutex_try(sqlite3_mutex *p){
+  int rc = SQLITE_OK;
+  if( p ){
+    return sqlite3GlobalConfig.mutex.xMutexTry(p);
+  }
+  return rc;
+}
+
+/*
+** The sqlite3_mutex_leave() routine exits a mutex that was previously
+** entered by the same thread.  The behavior is undefined if the mutex 
+** is not currently entered. If a NULL pointer is passed as an argument
+** this function is a no-op.
+*/
+SQLITE_API void sqlite3_mutex_leave(sqlite3_mutex *p){
+  if( p ){
+    sqlite3GlobalConfig.mutex.xMutexLeave(p);
+  }
+}
+
+#ifndef NDEBUG
+/*
+** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are
+** intended for use inside assert() statements.
+*/
+SQLITE_API int sqlite3_mutex_held(sqlite3_mutex *p){
+  return p==0 || sqlite3GlobalConfig.mutex.xMutexHeld(p);
+}
+SQLITE_API int sqlite3_mutex_notheld(sqlite3_mutex *p){
+  return p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld(p);
+}
+#endif
+
+#endif /* !defined(SQLITE_MUTEX_OMIT) */
+
+/************** End of mutex.c ***********************************************/
+/************** Begin file mutex_noop.c **************************************/
+/*
+** 2008 October 07
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the C functions that implement mutexes.
+**
+** This implementation in this file does not provide any mutual
+** exclusion and is thus suitable for use only in applications
+** that use SQLite in a single thread.  The routines defined
+** here are place-holders.  Applications can substitute working
+** mutex routines at start-time using the
+**
+**     sqlite3_config(SQLITE_CONFIG_MUTEX,...)
+**
+** interface.
+**
+** If compiled with SQLITE_DEBUG, then additional logic is inserted
+** that does error checking on mutexes to make sure they are being
+** called correctly.
+*/
+
+#ifndef SQLITE_MUTEX_OMIT
+
+#ifndef SQLITE_DEBUG
+/*
+** Stub routines for all mutex methods.
+**
+** This routines provide no mutual exclusion or error checking.
+*/
+static int noopMutexInit(void){ return SQLITE_OK; }
+static int noopMutexEnd(void){ return SQLITE_OK; }
+static sqlite3_mutex *noopMutexAlloc(int id){ 
+  UNUSED_PARAMETER(id);
+  return (sqlite3_mutex*)8; 
+}
+static void noopMutexFree(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; }
+static void noopMutexEnter(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; }
+static int noopMutexTry(sqlite3_mutex *p){
+  UNUSED_PARAMETER(p);
+  return SQLITE_OK;
+}
+static void noopMutexLeave(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; }
+
+SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void){
+  static const sqlite3_mutex_methods sMutex = {
+    noopMutexInit,
+    noopMutexEnd,
+    noopMutexAlloc,
+    noopMutexFree,
+    noopMutexEnter,
+    noopMutexTry,
+    noopMutexLeave,
+
+    0,
+    0,
+  };
+
+  return &sMutex;
+}
+#endif /* !SQLITE_DEBUG */
+
+#ifdef SQLITE_DEBUG
+/*
+** In this implementation, error checking is provided for testing
+** and debugging purposes.  The mutexes still do not provide any
+** mutual exclusion.
+*/
+
+/*
+** The mutex object
+*/
+typedef struct sqlite3_debug_mutex {
+  int id;     /* The mutex type */
+  int cnt;    /* Number of entries without a matching leave */
+} sqlite3_debug_mutex;
+
+/*
+** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are
+** intended for use inside assert() statements.
+*/
+static int debugMutexHeld(sqlite3_mutex *pX){
+  sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX;
+  return p==0 || p->cnt>0;
+}
+static int debugMutexNotheld(sqlite3_mutex *pX){
+  sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX;
+  return p==0 || p->cnt==0;
+}
+
+/*
+** Initialize and deinitialize the mutex subsystem.
+*/
+static int debugMutexInit(void){ return SQLITE_OK; }
+static int debugMutexEnd(void){ return SQLITE_OK; }
+
+/*
+** The sqlite3_mutex_alloc() routine allocates a new
+** mutex and returns a pointer to it.  If it returns NULL
+** that means that a mutex could not be allocated. 
+*/
+static sqlite3_mutex *debugMutexAlloc(int id){
+  static sqlite3_debug_mutex aStatic[SQLITE_MUTEX_STATIC_APP3 - 1];
+  sqlite3_debug_mutex *pNew = 0;
+  switch( id ){
+    case SQLITE_MUTEX_FAST:
+    case SQLITE_MUTEX_RECURSIVE: {
+      pNew = sqlite3Malloc(sizeof(*pNew));
+      if( pNew ){
+        pNew->id = id;
+        pNew->cnt = 0;
+      }
+      break;
+    }
+    default: {
+      assert( id-2 >= 0 );
+      assert( id-2 < (int)(sizeof(aStatic)/sizeof(aStatic[0])) );
+      pNew = &aStatic[id-2];
+      pNew->id = id;
+      break;
+    }
+  }
+  return (sqlite3_mutex*)pNew;
+}
+
+/*
+** This routine deallocates a previously allocated mutex.
+*/
+static void debugMutexFree(sqlite3_mutex *pX){
+  sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX;
+  assert( p->cnt==0 );
+  assert( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE );
+  sqlite3_free(p);
+}
+
+/*
+** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt
+** to enter a mutex.  If another thread is already within the mutex,
+** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return
+** SQLITE_BUSY.  The sqlite3_mutex_try() interface returns SQLITE_OK
+** upon successful entry.  Mutexes created using SQLITE_MUTEX_RECURSIVE can
+** be entered multiple times by the same thread.  In such cases the,
+** mutex must be exited an equal number of times before another thread
+** can enter.  If the same thread tries to enter any other kind of mutex
+** more than once, the behavior is undefined.
+*/
+static void debugMutexEnter(sqlite3_mutex *pX){
+  sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX;
+  assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) );
+  p->cnt++;
+}
+static int debugMutexTry(sqlite3_mutex *pX){
+  sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX;
+  assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) );
+  p->cnt++;
+  return SQLITE_OK;
+}
+
+/*
+** The sqlite3_mutex_leave() routine exits a mutex that was
+** previously entered by the same thread.  The behavior
+** is undefined if the mutex is not currently entered or
+** is not currently allocated.  SQLite will never do either.
+*/
+static void debugMutexLeave(sqlite3_mutex *pX){
+  sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX;
+  assert( debugMutexHeld(pX) );
+  p->cnt--;
+  assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) );
+}
+
+SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void){
+  static const sqlite3_mutex_methods sMutex = {
+    debugMutexInit,
+    debugMutexEnd,
+    debugMutexAlloc,
+    debugMutexFree,
+    debugMutexEnter,
+    debugMutexTry,
+    debugMutexLeave,
+
+    debugMutexHeld,
+    debugMutexNotheld
+  };
+
+  return &sMutex;
+}
+#endif /* SQLITE_DEBUG */
+
+/*
+** If compiled with SQLITE_MUTEX_NOOP, then the no-op mutex implementation
+** is used regardless of the run-time threadsafety setting.
+*/
+#ifdef SQLITE_MUTEX_NOOP
+SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){
+  return sqlite3NoopMutex();
+}
+#endif /* defined(SQLITE_MUTEX_NOOP) */
+#endif /* !defined(SQLITE_MUTEX_OMIT) */
+
+/************** End of mutex_noop.c ******************************************/
+/************** Begin file mutex_unix.c **************************************/
+/*
+** 2007 August 28
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the C functions that implement mutexes for pthreads
+*/
+
+/*
+** The code in this file is only used if we are compiling threadsafe
+** under unix with pthreads.
+**
+** Note that this implementation requires a version of pthreads that
+** supports recursive mutexes.
+*/
+#ifdef SQLITE_MUTEX_PTHREADS
+
+#include <pthread.h>
+
+/*
+** The sqlite3_mutex.id, sqlite3_mutex.nRef, and sqlite3_mutex.owner fields
+** are necessary under two condidtions:  (1) Debug builds and (2) using
+** home-grown mutexes.  Encapsulate these conditions into a single #define.
+*/
+#if defined(SQLITE_DEBUG) || defined(SQLITE_HOMEGROWN_RECURSIVE_MUTEX)
+# define SQLITE_MUTEX_NREF 1
+#else
+# define SQLITE_MUTEX_NREF 0
+#endif
+
+/*
+** Each recursive mutex is an instance of the following structure.
+*/
+struct sqlite3_mutex {
+  pthread_mutex_t mutex;     /* Mutex controlling the lock */
+#if SQLITE_MUTEX_NREF
+  int id;                    /* Mutex type */
+  volatile int nRef;         /* Number of entrances */
+  volatile pthread_t owner;  /* Thread that is within this mutex */
+  int trace;                 /* True to trace changes */
+#endif
+};
+#if SQLITE_MUTEX_NREF
+#define SQLITE3_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER, 0, 0, (pthread_t)0, 0 }
+#else
+#define SQLITE3_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER }
+#endif
+
+/*
+** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are
+** intended for use only inside assert() statements.  On some platforms,
+** there might be race conditions that can cause these routines to
+** deliver incorrect results.  In particular, if pthread_equal() is
+** not an atomic operation, then these routines might delivery
+** incorrect results.  On most platforms, pthread_equal() is a 
+** comparison of two integers and is therefore atomic.  But we are
+** told that HPUX is not such a platform.  If so, then these routines
+** will not always work correctly on HPUX.
+**
+** On those platforms where pthread_equal() is not atomic, SQLite
+** should be compiled without -DSQLITE_DEBUG and with -DNDEBUG to
+** make sure no assert() statements are evaluated and hence these
+** routines are never called.
+*/
+#if !defined(NDEBUG) || defined(SQLITE_DEBUG)
+static int pthreadMutexHeld(sqlite3_mutex *p){
+  return (p->nRef!=0 && pthread_equal(p->owner, pthread_self()));
+}
+static int pthreadMutexNotheld(sqlite3_mutex *p){
+  return p->nRef==0 || pthread_equal(p->owner, pthread_self())==0;
+}
+#endif
+
+/*
+** Initialize and deinitialize the mutex subsystem.
+*/
+static int pthreadMutexInit(void){ return SQLITE_OK; }
+static int pthreadMutexEnd(void){ return SQLITE_OK; }
+
+/*
+** The sqlite3_mutex_alloc() routine allocates a new
+** mutex and returns a pointer to it.  If it returns NULL
+** that means that a mutex could not be allocated.  SQLite
+** will unwind its stack and return an error.  The argument
+** to sqlite3_mutex_alloc() is one of these integer constants:
+**
+** <ul>
+** <li>  SQLITE_MUTEX_FAST
+** <li>  SQLITE_MUTEX_RECURSIVE
+** <li>  SQLITE_MUTEX_STATIC_MASTER
+** <li>  SQLITE_MUTEX_STATIC_MEM
+** <li>  SQLITE_MUTEX_STATIC_OPEN
+** <li>  SQLITE_MUTEX_STATIC_PRNG
+** <li>  SQLITE_MUTEX_STATIC_LRU
+** <li>  SQLITE_MUTEX_STATIC_PMEM
+** <li>  SQLITE_MUTEX_STATIC_APP1
+** <li>  SQLITE_MUTEX_STATIC_APP2
+** <li>  SQLITE_MUTEX_STATIC_APP3
+** </ul>
+**
+** The first two constants cause sqlite3_mutex_alloc() to create
+** a new mutex.  The new mutex is recursive when SQLITE_MUTEX_RECURSIVE
+** is used but not necessarily so when SQLITE_MUTEX_FAST is used.
+** The mutex implementation does not need to make a distinction
+** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does
+** not want to.  But SQLite will only request a recursive mutex in
+** cases where it really needs one.  If a faster non-recursive mutex
+** implementation is available on the host platform, the mutex subsystem
+** might return such a mutex in response to SQLITE_MUTEX_FAST.
+**
+** The other allowed parameters to sqlite3_mutex_alloc() each return
+** a pointer to a static preexisting mutex.  Six static mutexes are
+** used by the current version of SQLite.  Future versions of SQLite
+** may add additional static mutexes.  Static mutexes are for internal
+** use by SQLite only.  Applications that use SQLite mutexes should
+** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or
+** SQLITE_MUTEX_RECURSIVE.
+**
+** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST
+** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc()
+** returns a different mutex on every call.  But for the static 
+** mutex types, the same mutex is returned on every call that has
+** the same type number.
+*/
+static sqlite3_mutex *pthreadMutexAlloc(int iType){
+  static sqlite3_mutex staticMutexes[] = {
+    SQLITE3_MUTEX_INITIALIZER,
+    SQLITE3_MUTEX_INITIALIZER,
+    SQLITE3_MUTEX_INITIALIZER,
+    SQLITE3_MUTEX_INITIALIZER,
+    SQLITE3_MUTEX_INITIALIZER,
+    SQLITE3_MUTEX_INITIALIZER,
+    SQLITE3_MUTEX_INITIALIZER,
+    SQLITE3_MUTEX_INITIALIZER,
+    SQLITE3_MUTEX_INITIALIZER
+  };
+  sqlite3_mutex *p;
+  switch( iType ){
+    case SQLITE_MUTEX_RECURSIVE: {
+      p = sqlite3MallocZero( sizeof(*p) );
+      if( p ){
+#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX
+        /* If recursive mutexes are not available, we will have to
+        ** build our own.  See below. */
+        pthread_mutex_init(&p->mutex, 0);
+#else
+        /* Use a recursive mutex if it is available */
+        pthread_mutexattr_t recursiveAttr;
+        pthread_mutexattr_init(&recursiveAttr);
+        pthread_mutexattr_settype(&recursiveAttr, PTHREAD_MUTEX_RECURSIVE);
+        pthread_mutex_init(&p->mutex, &recursiveAttr);
+        pthread_mutexattr_destroy(&recursiveAttr);
+#endif
+#if SQLITE_MUTEX_NREF
+        p->id = iType;
+#endif
+      }
+      break;
+    }
+    case SQLITE_MUTEX_FAST: {
+      p = sqlite3MallocZero( sizeof(*p) );
+      if( p ){
+#if SQLITE_MUTEX_NREF
+        p->id = iType;
+#endif
+        pthread_mutex_init(&p->mutex, 0);
+      }
+      break;
+    }
+    default: {
+#ifdef SQLITE_ENABLE_API_ARMOR
+      if( iType-2<0 || iType-2>=ArraySize(staticMutexes) ){
+        (void)SQLITE_MISUSE_BKPT;
+        return 0;
+      }
+#endif
+      p = &staticMutexes[iType-2];
+#if SQLITE_MUTEX_NREF
+      p->id = iType;
+#endif
+      break;
+    }
+  }
+  return p;
+}
+
+
+/*
+** This routine deallocates a previously
+** allocated mutex.  SQLite is careful to deallocate every
+** mutex that it allocates.
+*/
+static void pthreadMutexFree(sqlite3_mutex *p){
+  assert( p->nRef==0 );
+  assert( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE );
+  pthread_mutex_destroy(&p->mutex);
+  sqlite3_free(p);
+}
+
+/*
+** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt
+** to enter a mutex.  If another thread is already within the mutex,
+** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return
+** SQLITE_BUSY.  The sqlite3_mutex_try() interface returns SQLITE_OK
+** upon successful entry.  Mutexes created using SQLITE_MUTEX_RECURSIVE can
+** be entered multiple times by the same thread.  In such cases the,
+** mutex must be exited an equal number of times before another thread
+** can enter.  If the same thread tries to enter any other kind of mutex
+** more than once, the behavior is undefined.
+*/
+static void pthreadMutexEnter(sqlite3_mutex *p){
+  assert( p->id==SQLITE_MUTEX_RECURSIVE || pthreadMutexNotheld(p) );
+
+#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX
+  /* If recursive mutexes are not available, then we have to grow
+  ** our own.  This implementation assumes that pthread_equal()
+  ** is atomic - that it cannot be deceived into thinking self
+  ** and p->owner are equal if p->owner changes between two values
+  ** that are not equal to self while the comparison is taking place.
+  ** This implementation also assumes a coherent cache - that 
+  ** separate processes cannot read different values from the same
+  ** address at the same time.  If either of these two conditions
+  ** are not met, then the mutexes will fail and problems will result.
+  */
+  {
+    pthread_t self = pthread_self();
+    if( p->nRef>0 && pthread_equal(p->owner, self) ){
+      p->nRef++;
+    }else{
+      pthread_mutex_lock(&p->mutex);
+      assert( p->nRef==0 );
+      p->owner = self;
+      p->nRef = 1;
+    }
+  }
+#else
+  /* Use the built-in recursive mutexes if they are available.
+  */
+  pthread_mutex_lock(&p->mutex);
+#if SQLITE_MUTEX_NREF
+  assert( p->nRef>0 || p->owner==0 );
+  p->owner = pthread_self();
+  p->nRef++;
+#endif
+#endif
+
+#ifdef SQLITE_DEBUG
+  if( p->trace ){
+    printf("enter mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef);
+  }
+#endif
+}
+static int pthreadMutexTry(sqlite3_mutex *p){
+  int rc;
+  assert( p->id==SQLITE_MUTEX_RECURSIVE || pthreadMutexNotheld(p) );
+
+#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX
+  /* If recursive mutexes are not available, then we have to grow
+  ** our own.  This implementation assumes that pthread_equal()
+  ** is atomic - that it cannot be deceived into thinking self
+  ** and p->owner are equal if p->owner changes between two values
+  ** that are not equal to self while the comparison is taking place.
+  ** This implementation also assumes a coherent cache - that 
+  ** separate processes cannot read different values from the same
+  ** address at the same time.  If either of these two conditions
+  ** are not met, then the mutexes will fail and problems will result.
+  */
+  {
+    pthread_t self = pthread_self();
+    if( p->nRef>0 && pthread_equal(p->owner, self) ){
+      p->nRef++;
+      rc = SQLITE_OK;
+    }else if( pthread_mutex_trylock(&p->mutex)==0 ){
+      assert( p->nRef==0 );
+      p->owner = self;
+      p->nRef = 1;
+      rc = SQLITE_OK;
+    }else{
+      rc = SQLITE_BUSY;
+    }
+  }
+#else
+  /* Use the built-in recursive mutexes if they are available.
+  */
+  if( pthread_mutex_trylock(&p->mutex)==0 ){
+#if SQLITE_MUTEX_NREF
+    p->owner = pthread_self();
+    p->nRef++;
+#endif
+    rc = SQLITE_OK;
+  }else{
+    rc = SQLITE_BUSY;
+  }
+#endif
+
+#ifdef SQLITE_DEBUG
+  if( rc==SQLITE_OK && p->trace ){
+    printf("enter mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef);
+  }
+#endif
+  return rc;
+}
+
+/*
+** The sqlite3_mutex_leave() routine exits a mutex that was
+** previously entered by the same thread.  The behavior
+** is undefined if the mutex is not currently entered or
+** is not currently allocated.  SQLite will never do either.
+*/
+static void pthreadMutexLeave(sqlite3_mutex *p){
+  assert( pthreadMutexHeld(p) );
+#if SQLITE_MUTEX_NREF
+  p->nRef--;
+  if( p->nRef==0 ) p->owner = 0;
+#endif
+  assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE );
+
+#ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX
+  if( p->nRef==0 ){
+    pthread_mutex_unlock(&p->mutex);
+  }
+#else
+  pthread_mutex_unlock(&p->mutex);
+#endif
+
+#ifdef SQLITE_DEBUG
+  if( p->trace ){
+    printf("leave mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef);
+  }
+#endif
+}
+
+SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){
+  static const sqlite3_mutex_methods sMutex = {
+    pthreadMutexInit,
+    pthreadMutexEnd,
+    pthreadMutexAlloc,
+    pthreadMutexFree,
+    pthreadMutexEnter,
+    pthreadMutexTry,
+    pthreadMutexLeave,
+#ifdef SQLITE_DEBUG
+    pthreadMutexHeld,
+    pthreadMutexNotheld
+#else
+    0,
+    0
+#endif
+  };
+
+  return &sMutex;
+}
+
+#endif /* SQLITE_MUTEX_PTHREADS */
+
+/************** End of mutex_unix.c ******************************************/
+/************** Begin file mutex_w32.c ***************************************/
+/*
+** 2007 August 14
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the C functions that implement mutexes for Win32.
+*/
+
+#if SQLITE_OS_WIN
+/*
+** Include code that is common to all os_*.c files
+*/
+/************** Include os_common.h in the middle of mutex_w32.c *************/
+/************** Begin file os_common.h ***************************************/
+/*
+** 2004 May 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains macros and a little bit of code that is common to
+** all of the platform-specific files (os_*.c) and is #included into those
+** files.
+**
+** This file should be #included by the os_*.c files only.  It is not a
+** general purpose header file.
+*/
+#ifndef _OS_COMMON_H_
+#define _OS_COMMON_H_
+
+/*
+** At least two bugs have slipped in because we changed the MEMORY_DEBUG
+** macro to SQLITE_DEBUG and some older makefiles have not yet made the
+** switch.  The following code should catch this problem at compile-time.
+*/
+#ifdef MEMORY_DEBUG
+# error "The MEMORY_DEBUG macro is obsolete.  Use SQLITE_DEBUG instead."
+#endif
+
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+# ifndef SQLITE_DEBUG_OS_TRACE
+#   define SQLITE_DEBUG_OS_TRACE 0
+# endif
+  int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE;
+# define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
+#else
+# define OSTRACE(X)
+#endif
+
+/*
+** Macros for performance tracing.  Normally turned off.  Only works
+** on i486 hardware.
+*/
+#ifdef SQLITE_PERFORMANCE_TRACE
+
+/* 
+** hwtime.h contains inline assembler code for implementing 
+** high-performance timing routines.
+*/
+/************** Include hwtime.h in the middle of os_common.h ****************/
+/************** Begin file hwtime.h ******************************************/
+/*
+** 2008 May 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains inline asm code for retrieving "high-performance"
+** counters for x86 class CPUs.
+*/
+#ifndef _HWTIME_H_
+#define _HWTIME_H_
+
+/*
+** The following routine only works on pentium-class (or newer) processors.
+** It uses the RDTSC opcode to read the cycle count value out of the
+** processor and returns that value.  This can be used for high-res
+** profiling.
+*/
+#if (defined(__GNUC__) || defined(_MSC_VER)) && \
+      (defined(i386) || defined(__i386__) || defined(_M_IX86))
+
+  #if defined(__GNUC__)
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+     unsigned int lo, hi;
+     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+     return (sqlite_uint64)hi << 32 | lo;
+  }
+
+  #elif defined(_MSC_VER)
+
+  __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){
+     __asm {
+        rdtsc
+        ret       ; return value at EDX:EAX
+     }
+  }
+
+  #endif
+
+#elif (defined(__GNUC__) && defined(__x86_64__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long val;
+      __asm__ __volatile__ ("rdtsc" : "=A" (val));
+      return val;
+  }
+ 
+#elif (defined(__GNUC__) && defined(__ppc__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long long retval;
+      unsigned long junk;
+      __asm__ __volatile__ ("\n\
+          1:      mftbu   %1\n\
+                  mftb    %L0\n\
+                  mftbu   %0\n\
+                  cmpw    %0,%1\n\
+                  bne     1b"
+                  : "=r" (retval), "=r" (junk));
+      return retval;
+  }
+
+#else
+
+  #error Need implementation of sqlite3Hwtime() for your platform.
+
+  /*
+  ** To compile without implementing sqlite3Hwtime() for your platform,
+  ** you can remove the above #error and use the following
+  ** stub function.  You will lose timing support for many
+  ** of the debugging and testing utilities, but it should at
+  ** least compile and run.
+  */
+SQLITE_PRIVATE   sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); }
+
+#endif
+
+#endif /* !defined(_HWTIME_H_) */
+
+/************** End of hwtime.h **********************************************/
+/************** Continuing where we left off in os_common.h ******************/
+
+static sqlite_uint64 g_start;
+static sqlite_uint64 g_elapsed;
+#define TIMER_START       g_start=sqlite3Hwtime()
+#define TIMER_END         g_elapsed=sqlite3Hwtime()-g_start
+#define TIMER_ELAPSED     g_elapsed
+#else
+#define TIMER_START
+#define TIMER_END
+#define TIMER_ELAPSED     ((sqlite_uint64)0)
+#endif
+
+/*
+** If we compile with the SQLITE_TEST macro set, then the following block
+** of code will give us the ability to simulate a disk I/O error.  This
+** is used for testing the I/O recovery logic.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_io_error_hit = 0;            /* Total number of I/O Errors */
+SQLITE_API int sqlite3_io_error_hardhit = 0;        /* Number of non-benign errors */
+SQLITE_API int sqlite3_io_error_pending = 0;        /* Count down to first I/O error */
+SQLITE_API int sqlite3_io_error_persist = 0;        /* True if I/O errors persist */
+SQLITE_API int sqlite3_io_error_benign = 0;         /* True if errors are benign */
+SQLITE_API int sqlite3_diskfull_pending = 0;
+SQLITE_API int sqlite3_diskfull = 0;
+#define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X)
+#define SimulateIOError(CODE)  \
+  if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \
+       || sqlite3_io_error_pending-- == 1 )  \
+              { local_ioerr(); CODE; }
+static void local_ioerr(){
+  IOTRACE(("IOERR\n"));
+  sqlite3_io_error_hit++;
+  if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++;
+}
+#define SimulateDiskfullError(CODE) \
+   if( sqlite3_diskfull_pending ){ \
+     if( sqlite3_diskfull_pending == 1 ){ \
+       local_ioerr(); \
+       sqlite3_diskfull = 1; \
+       sqlite3_io_error_hit = 1; \
+       CODE; \
+     }else{ \
+       sqlite3_diskfull_pending--; \
+     } \
+   }
+#else
+#define SimulateIOErrorBenign(X)
+#define SimulateIOError(A)
+#define SimulateDiskfullError(A)
+#endif
+
+/*
+** When testing, keep a count of the number of open files.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_open_file_count = 0;
+#define OpenCounter(X)  sqlite3_open_file_count+=(X)
+#else
+#define OpenCounter(X)
+#endif
+
+#endif /* !defined(_OS_COMMON_H_) */
+
+/************** End of os_common.h *******************************************/
+/************** Continuing where we left off in mutex_w32.c ******************/
+
+/*
+** Include the header file for the Windows VFS.
+*/
+/************** Include os_win.h in the middle of mutex_w32.c ****************/
+/************** Begin file os_win.h ******************************************/
+/*
+** 2013 November 25
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains code that is specific to Windows.
+*/
+#ifndef _OS_WIN_H_
+#define _OS_WIN_H_
+
+/*
+** Include the primary Windows SDK header file.
+*/
+#include "windows.h"
+
+#ifdef __CYGWIN__
+# include <sys/cygwin.h>
+# include <errno.h> /* amalgamator: dontcache */
+#endif
+
+/*
+** Determine if we are dealing with Windows NT.
+**
+** We ought to be able to determine if we are compiling for Windows 9x or
+** Windows NT using the _WIN32_WINNT macro as follows:
+**
+** #if defined(_WIN32_WINNT)
+** # define SQLITE_OS_WINNT 1
+** #else
+** # define SQLITE_OS_WINNT 0
+** #endif
+**
+** However, Visual Studio 2005 does not set _WIN32_WINNT by default, as
+** it ought to, so the above test does not work.  We'll just assume that
+** everything is Windows NT unless the programmer explicitly says otherwise
+** by setting SQLITE_OS_WINNT to 0.
+*/
+#if SQLITE_OS_WIN && !defined(SQLITE_OS_WINNT)
+# define SQLITE_OS_WINNT 1
+#endif
+
+/*
+** Determine if we are dealing with Windows CE - which has a much reduced
+** API.
+*/
+#if defined(_WIN32_WCE)
+# define SQLITE_OS_WINCE 1
+#else
+# define SQLITE_OS_WINCE 0
+#endif
+
+/*
+** Determine if we are dealing with WinRT, which provides only a subset of
+** the full Win32 API.
+*/
+#if !defined(SQLITE_OS_WINRT)
+# define SQLITE_OS_WINRT 0
+#endif
+
+/*
+** For WinCE, some API function parameters do not appear to be declared as
+** volatile.
+*/
+#if SQLITE_OS_WINCE
+# define SQLITE_WIN32_VOLATILE
+#else
+# define SQLITE_WIN32_VOLATILE volatile
+#endif
+
+#endif /* _OS_WIN_H_ */
+
+/************** End of os_win.h **********************************************/
+/************** Continuing where we left off in mutex_w32.c ******************/
+#endif
+
+/*
+** The code in this file is only used if we are compiling multithreaded
+** on a Win32 system.
+*/
+#ifdef SQLITE_MUTEX_W32
+
+/*
+** Each recursive mutex is an instance of the following structure.
+*/
+struct sqlite3_mutex {
+  CRITICAL_SECTION mutex;    /* Mutex controlling the lock */
+  int id;                    /* Mutex type */
+#ifdef SQLITE_DEBUG
+  volatile int nRef;         /* Number of enterances */
+  volatile DWORD owner;      /* Thread holding this mutex */
+  volatile int trace;        /* True to trace changes */
+#endif
+};
+
+/*
+** These are the initializer values used when declaring a "static" mutex
+** on Win32.  It should be noted that all mutexes require initialization
+** on the Win32 platform.
+*/
+#define SQLITE_W32_MUTEX_INITIALIZER { 0 }
+
+#ifdef SQLITE_DEBUG
+#define SQLITE3_MUTEX_INITIALIZER { SQLITE_W32_MUTEX_INITIALIZER, 0, \
+                                    0L, (DWORD)0, 0 }
+#else
+#define SQLITE3_MUTEX_INITIALIZER { SQLITE_W32_MUTEX_INITIALIZER, 0 }
+#endif
+
+#ifdef SQLITE_DEBUG
+/*
+** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are
+** intended for use only inside assert() statements.
+*/
+static int winMutexHeld(sqlite3_mutex *p){
+  return p->nRef!=0 && p->owner==GetCurrentThreadId();
+}
+
+static int winMutexNotheld2(sqlite3_mutex *p, DWORD tid){
+  return p->nRef==0 || p->owner!=tid;
+}
+
+static int winMutexNotheld(sqlite3_mutex *p){
+  DWORD tid = GetCurrentThreadId();
+  return winMutexNotheld2(p, tid);
+}
+#endif
+
+/*
+** Initialize and deinitialize the mutex subsystem.
+*/
+static sqlite3_mutex winMutex_staticMutexes[] = {
+  SQLITE3_MUTEX_INITIALIZER,
+  SQLITE3_MUTEX_INITIALIZER,
+  SQLITE3_MUTEX_INITIALIZER,
+  SQLITE3_MUTEX_INITIALIZER,
+  SQLITE3_MUTEX_INITIALIZER,
+  SQLITE3_MUTEX_INITIALIZER,
+  SQLITE3_MUTEX_INITIALIZER,
+  SQLITE3_MUTEX_INITIALIZER,
+  SQLITE3_MUTEX_INITIALIZER
+};
+
+static int winMutex_isInit = 0;
+static int winMutex_isNt = -1; /* <0 means "need to query" */
+
+/* As the winMutexInit() and winMutexEnd() functions are called as part
+** of the sqlite3_initialize() and sqlite3_shutdown() processing, the
+** "interlocked" magic used here is probably not strictly necessary.
+*/
+static LONG SQLITE_WIN32_VOLATILE winMutex_lock = 0;
+
+SQLITE_API int sqlite3_win32_is_nt(void); /* os_win.c */
+SQLITE_API void sqlite3_win32_sleep(DWORD milliseconds); /* os_win.c */
+
+static int winMutexInit(void){
+  /* The first to increment to 1 does actual initialization */
+  if( InterlockedCompareExchange(&winMutex_lock, 1, 0)==0 ){
+    int i;
+    for(i=0; i<ArraySize(winMutex_staticMutexes); i++){
+#if SQLITE_OS_WINRT
+      InitializeCriticalSectionEx(&winMutex_staticMutexes[i].mutex, 0, 0);
+#else
+      InitializeCriticalSection(&winMutex_staticMutexes[i].mutex);
+#endif
+    }
+    winMutex_isInit = 1;
+  }else{
+    /* Another thread is (in the process of) initializing the static
+    ** mutexes */
+    while( !winMutex_isInit ){
+      sqlite3_win32_sleep(1);
+    }
+  }
+  return SQLITE_OK;
+}
+
+static int winMutexEnd(void){
+  /* The first to decrement to 0 does actual shutdown
+  ** (which should be the last to shutdown.) */
+  if( InterlockedCompareExchange(&winMutex_lock, 0, 1)==1 ){
+    if( winMutex_isInit==1 ){
+      int i;
+      for(i=0; i<ArraySize(winMutex_staticMutexes); i++){
+        DeleteCriticalSection(&winMutex_staticMutexes[i].mutex);
+      }
+      winMutex_isInit = 0;
+    }
+  }
+  return SQLITE_OK;
+}
+
+/*
+** The sqlite3_mutex_alloc() routine allocates a new
+** mutex and returns a pointer to it.  If it returns NULL
+** that means that a mutex could not be allocated.  SQLite
+** will unwind its stack and return an error.  The argument
+** to sqlite3_mutex_alloc() is one of these integer constants:
+**
+** <ul>
+** <li>  SQLITE_MUTEX_FAST
+** <li>  SQLITE_MUTEX_RECURSIVE
+** <li>  SQLITE_MUTEX_STATIC_MASTER
+** <li>  SQLITE_MUTEX_STATIC_MEM
+** <li>  SQLITE_MUTEX_STATIC_OPEN
+** <li>  SQLITE_MUTEX_STATIC_PRNG
+** <li>  SQLITE_MUTEX_STATIC_LRU
+** <li>  SQLITE_MUTEX_STATIC_PMEM
+** <li>  SQLITE_MUTEX_STATIC_APP1
+** <li>  SQLITE_MUTEX_STATIC_APP2
+** <li>  SQLITE_MUTEX_STATIC_APP3
+** </ul>
+**
+** The first two constants cause sqlite3_mutex_alloc() to create
+** a new mutex.  The new mutex is recursive when SQLITE_MUTEX_RECURSIVE
+** is used but not necessarily so when SQLITE_MUTEX_FAST is used.
+** The mutex implementation does not need to make a distinction
+** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does
+** not want to.  But SQLite will only request a recursive mutex in
+** cases where it really needs one.  If a faster non-recursive mutex
+** implementation is available on the host platform, the mutex subsystem
+** might return such a mutex in response to SQLITE_MUTEX_FAST.
+**
+** The other allowed parameters to sqlite3_mutex_alloc() each return
+** a pointer to a static preexisting mutex.  Six static mutexes are
+** used by the current version of SQLite.  Future versions of SQLite
+** may add additional static mutexes.  Static mutexes are for internal
+** use by SQLite only.  Applications that use SQLite mutexes should
+** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or
+** SQLITE_MUTEX_RECURSIVE.
+**
+** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST
+** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc()
+** returns a different mutex on every call.  But for the static
+** mutex types, the same mutex is returned on every call that has
+** the same type number.
+*/
+static sqlite3_mutex *winMutexAlloc(int iType){
+  sqlite3_mutex *p;
+
+  switch( iType ){
+    case SQLITE_MUTEX_FAST:
+    case SQLITE_MUTEX_RECURSIVE: {
+      p = sqlite3MallocZero( sizeof(*p) );
+      if( p ){
+#ifdef SQLITE_DEBUG
+        p->id = iType;
+#ifdef SQLITE_WIN32_MUTEX_TRACE_DYNAMIC
+        p->trace = 1;
+#endif
+#endif
+#if SQLITE_OS_WINRT
+        InitializeCriticalSectionEx(&p->mutex, 0, 0);
+#else
+        InitializeCriticalSection(&p->mutex);
+#endif
+      }
+      break;
+    }
+    default: {
+#ifdef SQLITE_ENABLE_API_ARMOR
+      if( iType-2<0 || iType-2>=ArraySize(winMutex_staticMutexes) ){
+        (void)SQLITE_MISUSE_BKPT;
+        return 0;
+      }
+#endif
+      assert( iType-2 >= 0 );
+      assert( iType-2 < ArraySize(winMutex_staticMutexes) );
+      assert( winMutex_isInit==1 );
+      p = &winMutex_staticMutexes[iType-2];
+#ifdef SQLITE_DEBUG
+      p->id = iType;
+#ifdef SQLITE_WIN32_MUTEX_TRACE_STATIC
+      p->trace = 1;
+#endif
+#endif
+      break;
+    }
+  }
+  return p;
+}
+
+
+/*
+** This routine deallocates a previously
+** allocated mutex.  SQLite is careful to deallocate every
+** mutex that it allocates.
+*/
+static void winMutexFree(sqlite3_mutex *p){
+  assert( p );
+#ifdef SQLITE_DEBUG
+  assert( p->nRef==0 && p->owner==0 );
+  assert( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE );
+#endif
+  assert( winMutex_isInit==1 );
+  DeleteCriticalSection(&p->mutex);
+  sqlite3_free(p);
+}
+
+/*
+** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt
+** to enter a mutex.  If another thread is already within the mutex,
+** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return
+** SQLITE_BUSY.  The sqlite3_mutex_try() interface returns SQLITE_OK
+** upon successful entry.  Mutexes created using SQLITE_MUTEX_RECURSIVE can
+** be entered multiple times by the same thread.  In such cases the,
+** mutex must be exited an equal number of times before another thread
+** can enter.  If the same thread tries to enter any other kind of mutex
+** more than once, the behavior is undefined.
+*/
+static void winMutexEnter(sqlite3_mutex *p){
+#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
+  DWORD tid = GetCurrentThreadId();
+#endif
+#ifdef SQLITE_DEBUG
+  assert( p );
+  assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) );
+#else
+  assert( p );
+#endif
+  assert( winMutex_isInit==1 );
+  EnterCriticalSection(&p->mutex);
+#ifdef SQLITE_DEBUG
+  assert( p->nRef>0 || p->owner==0 );
+  p->owner = tid;
+  p->nRef++;
+  if( p->trace ){
+    OSTRACE(("ENTER-MUTEX tid=%lu, mutex=%p (%d), nRef=%d\n",
+             tid, p, p->trace, p->nRef));
+  }
+#endif
+}
+
+static int winMutexTry(sqlite3_mutex *p){
+#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
+  DWORD tid = GetCurrentThreadId();
+#endif
+  int rc = SQLITE_BUSY;
+  assert( p );
+  assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) );
+  /*
+  ** The sqlite3_mutex_try() routine is very rarely used, and when it
+  ** is used it is merely an optimization.  So it is OK for it to always
+  ** fail.
+  **
+  ** The TryEnterCriticalSection() interface is only available on WinNT.
+  ** And some windows compilers complain if you try to use it without
+  ** first doing some #defines that prevent SQLite from building on Win98.
+  ** For that reason, we will omit this optimization for now.  See
+  ** ticket #2685.
+  */
+#if defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0400
+  assert( winMutex_isInit==1 );
+  assert( winMutex_isNt>=-1 && winMutex_isNt<=1 );
+  if( winMutex_isNt<0 ){
+    winMutex_isNt = sqlite3_win32_is_nt();
+  }
+  assert( winMutex_isNt==0 || winMutex_isNt==1 );
+  if( winMutex_isNt && TryEnterCriticalSection(&p->mutex) ){
+#ifdef SQLITE_DEBUG
+    p->owner = tid;
+    p->nRef++;
+#endif
+    rc = SQLITE_OK;
+  }
+#else
+  UNUSED_PARAMETER(p);
+#endif
+#ifdef SQLITE_DEBUG
+  if( p->trace ){
+    OSTRACE(("TRY-MUTEX tid=%lu, mutex=%p (%d), owner=%lu, nRef=%d, rc=%s\n",
+             tid, p, p->trace, p->owner, p->nRef, sqlite3ErrName(rc)));
+  }
+#endif
+  return rc;
+}
+
+/*
+** The sqlite3_mutex_leave() routine exits a mutex that was
+** previously entered by the same thread.  The behavior
+** is undefined if the mutex is not currently entered or
+** is not currently allocated.  SQLite will never do either.
+*/
+static void winMutexLeave(sqlite3_mutex *p){
+#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
+  DWORD tid = GetCurrentThreadId();
+#endif
+  assert( p );
+#ifdef SQLITE_DEBUG
+  assert( p->nRef>0 );
+  assert( p->owner==tid );
+  p->nRef--;
+  if( p->nRef==0 ) p->owner = 0;
+  assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE );
+#endif
+  assert( winMutex_isInit==1 );
+  LeaveCriticalSection(&p->mutex);
+#ifdef SQLITE_DEBUG
+  if( p->trace ){
+    OSTRACE(("LEAVE-MUTEX tid=%lu, mutex=%p (%d), nRef=%d\n",
+             tid, p, p->trace, p->nRef));
+  }
+#endif
+}
+
+SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){
+  static const sqlite3_mutex_methods sMutex = {
+    winMutexInit,
+    winMutexEnd,
+    winMutexAlloc,
+    winMutexFree,
+    winMutexEnter,
+    winMutexTry,
+    winMutexLeave,
+#ifdef SQLITE_DEBUG
+    winMutexHeld,
+    winMutexNotheld
+#else
+    0,
+    0
+#endif
+  };
+  return &sMutex;
+}
+
+#endif /* SQLITE_MUTEX_W32 */
+
+/************** End of mutex_w32.c *******************************************/
+/************** Begin file malloc.c ******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** Memory allocation functions used throughout sqlite.
+*/
+/* #include <stdarg.h> */
+
+/*
+** Attempt to release up to n bytes of non-essential memory currently
+** held by SQLite. An example of non-essential memory is memory used to
+** cache database pages that are not currently in use.
+*/
+SQLITE_API int sqlite3_release_memory(int n){
+#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
+  return sqlite3PcacheReleaseMemory(n);
+#else
+  /* IMPLEMENTATION-OF: R-34391-24921 The sqlite3_release_memory() routine
+  ** is a no-op returning zero if SQLite is not compiled with
+  ** SQLITE_ENABLE_MEMORY_MANAGEMENT. */
+  UNUSED_PARAMETER(n);
+  return 0;
+#endif
+}
+
+/*
+** An instance of the following object records the location of
+** each unused scratch buffer.
+*/
+typedef struct ScratchFreeslot {
+  struct ScratchFreeslot *pNext;   /* Next unused scratch buffer */
+} ScratchFreeslot;
+
+/*
+** State information local to the memory allocation subsystem.
+*/
+static SQLITE_WSD struct Mem0Global {
+  sqlite3_mutex *mutex;         /* Mutex to serialize access */
+
+  /*
+  ** The alarm callback and its arguments.  The mem0.mutex lock will
+  ** be held while the callback is running.  Recursive calls into
+  ** the memory subsystem are allowed, but no new callbacks will be
+  ** issued.
+  */
+  sqlite3_int64 alarmThreshold;
+  void (*alarmCallback)(void*, sqlite3_int64,int);
+  void *alarmArg;
+
+  /*
+  ** Pointers to the end of sqlite3GlobalConfig.pScratch memory
+  ** (so that a range test can be used to determine if an allocation
+  ** being freed came from pScratch) and a pointer to the list of
+  ** unused scratch allocations.
+  */
+  void *pScratchEnd;
+  ScratchFreeslot *pScratchFree;
+  u32 nScratchFree;
+
+  /*
+  ** True if heap is nearly "full" where "full" is defined by the
+  ** sqlite3_soft_heap_limit() setting.
+  */
+  int nearlyFull;
+} mem0 = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+#define mem0 GLOBAL(struct Mem0Global, mem0)
+
+/*
+** This routine runs when the memory allocator sees that the
+** total memory allocation is about to exceed the soft heap
+** limit.
+*/
+static void softHeapLimitEnforcer(
+  void *NotUsed, 
+  sqlite3_int64 NotUsed2,
+  int allocSize
+){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  sqlite3_release_memory(allocSize);
+}
+
+/*
+** Change the alarm callback
+*/
+static int sqlite3MemoryAlarm(
+  void(*xCallback)(void *pArg, sqlite3_int64 used,int N),
+  void *pArg,
+  sqlite3_int64 iThreshold
+){
+  int nUsed;
+  sqlite3_mutex_enter(mem0.mutex);
+  mem0.alarmCallback = xCallback;
+  mem0.alarmArg = pArg;
+  mem0.alarmThreshold = iThreshold;
+  nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED);
+  mem0.nearlyFull = (iThreshold>0 && iThreshold<=nUsed);
+  sqlite3_mutex_leave(mem0.mutex);
+  return SQLITE_OK;
+}
+
+#ifndef SQLITE_OMIT_DEPRECATED
+/*
+** Deprecated external interface.  Internal/core SQLite code
+** should call sqlite3MemoryAlarm.
+*/
+SQLITE_API int sqlite3_memory_alarm(
+  void(*xCallback)(void *pArg, sqlite3_int64 used,int N),
+  void *pArg,
+  sqlite3_int64 iThreshold
+){
+  return sqlite3MemoryAlarm(xCallback, pArg, iThreshold);
+}
+#endif
+
+/*
+** Set the soft heap-size limit for the library. Passing a zero or 
+** negative value indicates no limit.
+*/
+SQLITE_API sqlite3_int64 sqlite3_soft_heap_limit64(sqlite3_int64 n){
+  sqlite3_int64 priorLimit;
+  sqlite3_int64 excess;
+#ifndef SQLITE_OMIT_AUTOINIT
+  int rc = sqlite3_initialize();
+  if( rc ) return -1;
+#endif
+  sqlite3_mutex_enter(mem0.mutex);
+  priorLimit = mem0.alarmThreshold;
+  sqlite3_mutex_leave(mem0.mutex);
+  if( n<0 ) return priorLimit;
+  if( n>0 ){
+    sqlite3MemoryAlarm(softHeapLimitEnforcer, 0, n);
+  }else{
+    sqlite3MemoryAlarm(0, 0, 0);
+  }
+  excess = sqlite3_memory_used() - n;
+  if( excess>0 ) sqlite3_release_memory((int)(excess & 0x7fffffff));
+  return priorLimit;
+}
+SQLITE_API void sqlite3_soft_heap_limit(int n){
+  if( n<0 ) n = 0;
+  sqlite3_soft_heap_limit64(n);
+}
+
+/*
+** Initialize the memory allocation subsystem.
+*/
+SQLITE_PRIVATE int sqlite3MallocInit(void){
+  if( sqlite3GlobalConfig.m.xMalloc==0 ){
+    sqlite3MemSetDefault();
+  }
+  memset(&mem0, 0, sizeof(mem0));
+  if( sqlite3GlobalConfig.bCoreMutex ){
+    mem0.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM);
+  }
+  if( sqlite3GlobalConfig.pScratch && sqlite3GlobalConfig.szScratch>=100
+      && sqlite3GlobalConfig.nScratch>0 ){
+    int i, n, sz;
+    ScratchFreeslot *pSlot;
+    sz = ROUNDDOWN8(sqlite3GlobalConfig.szScratch);
+    sqlite3GlobalConfig.szScratch = sz;
+    pSlot = (ScratchFreeslot*)sqlite3GlobalConfig.pScratch;
+    n = sqlite3GlobalConfig.nScratch;
+    mem0.pScratchFree = pSlot;
+    mem0.nScratchFree = n;
+    for(i=0; i<n-1; i++){
+      pSlot->pNext = (ScratchFreeslot*)(sz+(char*)pSlot);
+      pSlot = pSlot->pNext;
+    }
+    pSlot->pNext = 0;
+    mem0.pScratchEnd = (void*)&pSlot[1];
+  }else{
+    mem0.pScratchEnd = 0;
+    sqlite3GlobalConfig.pScratch = 0;
+    sqlite3GlobalConfig.szScratch = 0;
+    sqlite3GlobalConfig.nScratch = 0;
+  }
+  if( sqlite3GlobalConfig.pPage==0 || sqlite3GlobalConfig.szPage<512
+      || sqlite3GlobalConfig.nPage<1 ){
+    sqlite3GlobalConfig.pPage = 0;
+    sqlite3GlobalConfig.szPage = 0;
+    sqlite3GlobalConfig.nPage = 0;
+  }
+  return sqlite3GlobalConfig.m.xInit(sqlite3GlobalConfig.m.pAppData);
+}
+
+/*
+** Return true if the heap is currently under memory pressure - in other
+** words if the amount of heap used is close to the limit set by
+** sqlite3_soft_heap_limit().
+*/
+SQLITE_PRIVATE int sqlite3HeapNearlyFull(void){
+  return mem0.nearlyFull;
+}
+
+/*
+** Deinitialize the memory allocation subsystem.
+*/
+SQLITE_PRIVATE void sqlite3MallocEnd(void){
+  if( sqlite3GlobalConfig.m.xShutdown ){
+    sqlite3GlobalConfig.m.xShutdown(sqlite3GlobalConfig.m.pAppData);
+  }
+  memset(&mem0, 0, sizeof(mem0));
+}
+
+/*
+** Return the amount of memory currently checked out.
+*/
+SQLITE_API sqlite3_int64 sqlite3_memory_used(void){
+  int n, mx;
+  sqlite3_int64 res;
+  sqlite3_status(SQLITE_STATUS_MEMORY_USED, &n, &mx, 0);
+  res = (sqlite3_int64)n;  /* Work around bug in Borland C. Ticket #3216 */
+  return res;
+}
+
+/*
+** Return the maximum amount of memory that has ever been
+** checked out since either the beginning of this process
+** or since the most recent reset.
+*/
+SQLITE_API sqlite3_int64 sqlite3_memory_highwater(int resetFlag){
+  int n, mx;
+  sqlite3_int64 res;
+  sqlite3_status(SQLITE_STATUS_MEMORY_USED, &n, &mx, resetFlag);
+  res = (sqlite3_int64)mx;  /* Work around bug in Borland C. Ticket #3216 */
+  return res;
+}
+
+/*
+** Trigger the alarm 
+*/
+static void sqlite3MallocAlarm(int nByte){
+  void (*xCallback)(void*,sqlite3_int64,int);
+  sqlite3_int64 nowUsed;
+  void *pArg;
+  if( mem0.alarmCallback==0 ) return;
+  xCallback = mem0.alarmCallback;
+  nowUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED);
+  pArg = mem0.alarmArg;
+  mem0.alarmCallback = 0;
+  sqlite3_mutex_leave(mem0.mutex);
+  xCallback(pArg, nowUsed, nByte);
+  sqlite3_mutex_enter(mem0.mutex);
+  mem0.alarmCallback = xCallback;
+  mem0.alarmArg = pArg;
+}
+
+/*
+** Do a memory allocation with statistics and alarms.  Assume the
+** lock is already held.
+*/
+static int mallocWithAlarm(int n, void **pp){
+  int nFull;
+  void *p;
+  assert( sqlite3_mutex_held(mem0.mutex) );
+  nFull = sqlite3GlobalConfig.m.xRoundup(n);
+  sqlite3StatusSet(SQLITE_STATUS_MALLOC_SIZE, n);
+  if( mem0.alarmCallback!=0 ){
+    int nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED);
+    if( nUsed >= mem0.alarmThreshold - nFull ){
+      mem0.nearlyFull = 1;
+      sqlite3MallocAlarm(nFull);
+    }else{
+      mem0.nearlyFull = 0;
+    }
+  }
+  p = sqlite3GlobalConfig.m.xMalloc(nFull);
+#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
+  if( p==0 && mem0.alarmCallback ){
+    sqlite3MallocAlarm(nFull);
+    p = sqlite3GlobalConfig.m.xMalloc(nFull);
+  }
+#endif
+  if( p ){
+    nFull = sqlite3MallocSize(p);
+    sqlite3StatusAdd(SQLITE_STATUS_MEMORY_USED, nFull);
+    sqlite3StatusAdd(SQLITE_STATUS_MALLOC_COUNT, 1);
+  }
+  *pp = p;
+  return nFull;
+}
+
+/*
+** Allocate memory.  This routine is like sqlite3_malloc() except that it
+** assumes the memory subsystem has already been initialized.
+*/
+SQLITE_PRIVATE void *sqlite3Malloc(u64 n){
+  void *p;
+  if( n==0 || n>=0x7fffff00 ){
+    /* A memory allocation of a number of bytes which is near the maximum
+    ** signed integer value might cause an integer overflow inside of the
+    ** xMalloc().  Hence we limit the maximum size to 0x7fffff00, giving
+    ** 255 bytes of overhead.  SQLite itself will never use anything near
+    ** this amount.  The only way to reach the limit is with sqlite3_malloc() */
+    p = 0;
+  }else if( sqlite3GlobalConfig.bMemstat ){
+    sqlite3_mutex_enter(mem0.mutex);
+    mallocWithAlarm((int)n, &p);
+    sqlite3_mutex_leave(mem0.mutex);
+  }else{
+    p = sqlite3GlobalConfig.m.xMalloc((int)n);
+  }
+  assert( EIGHT_BYTE_ALIGNMENT(p) );  /* IMP: R-11148-40995 */
+  return p;
+}
+
+/*
+** This version of the memory allocation is for use by the application.
+** First make sure the memory subsystem is initialized, then do the
+** allocation.
+*/
+SQLITE_API void *sqlite3_malloc(int n){
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( sqlite3_initialize() ) return 0;
+#endif
+  return n<=0 ? 0 : sqlite3Malloc(n);
+}
+SQLITE_API void *sqlite3_malloc64(sqlite3_uint64 n){
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( sqlite3_initialize() ) return 0;
+#endif
+  return sqlite3Malloc(n);
+}
+
+/*
+** Each thread may only have a single outstanding allocation from
+** xScratchMalloc().  We verify this constraint in the single-threaded
+** case by setting scratchAllocOut to 1 when an allocation
+** is outstanding clearing it when the allocation is freed.
+*/
+#if SQLITE_THREADSAFE==0 && !defined(NDEBUG)
+static int scratchAllocOut = 0;
+#endif
+
+
+/*
+** Allocate memory that is to be used and released right away.
+** This routine is similar to alloca() in that it is not intended
+** for situations where the memory might be held long-term.  This
+** routine is intended to get memory to old large transient data
+** structures that would not normally fit on the stack of an
+** embedded processor.
+*/
+SQLITE_PRIVATE void *sqlite3ScratchMalloc(int n){
+  void *p;
+  assert( n>0 );
+
+  sqlite3_mutex_enter(mem0.mutex);
+  sqlite3StatusSet(SQLITE_STATUS_SCRATCH_SIZE, n);
+  if( mem0.nScratchFree && sqlite3GlobalConfig.szScratch>=n ){
+    p = mem0.pScratchFree;
+    mem0.pScratchFree = mem0.pScratchFree->pNext;
+    mem0.nScratchFree--;
+    sqlite3StatusAdd(SQLITE_STATUS_SCRATCH_USED, 1);
+    sqlite3_mutex_leave(mem0.mutex);
+  }else{
+    sqlite3_mutex_leave(mem0.mutex);
+    p = sqlite3Malloc(n);
+    if( sqlite3GlobalConfig.bMemstat && p ){
+      sqlite3_mutex_enter(mem0.mutex);
+      sqlite3StatusAdd(SQLITE_STATUS_SCRATCH_OVERFLOW, sqlite3MallocSize(p));
+      sqlite3_mutex_leave(mem0.mutex);
+    }
+    sqlite3MemdebugSetType(p, MEMTYPE_SCRATCH);
+  }
+  assert( sqlite3_mutex_notheld(mem0.mutex) );
+
+
+#if SQLITE_THREADSAFE==0 && !defined(NDEBUG)
+  /* EVIDENCE-OF: R-12970-05880 SQLite will not use more than one scratch
+  ** buffers per thread.
+  **
+  ** This can only be checked in single-threaded mode.
+  */
+  assert( scratchAllocOut==0 );
+  if( p ) scratchAllocOut++;
+#endif
+
+  return p;
+}
+SQLITE_PRIVATE void sqlite3ScratchFree(void *p){
+  if( p ){
+
+#if SQLITE_THREADSAFE==0 && !defined(NDEBUG)
+    /* Verify that no more than two scratch allocation per thread
+    ** is outstanding at one time.  (This is only checked in the
+    ** single-threaded case since checking in the multi-threaded case
+    ** would be much more complicated.) */
+    assert( scratchAllocOut>=1 && scratchAllocOut<=2 );
+    scratchAllocOut--;
+#endif
+
+    if( p>=sqlite3GlobalConfig.pScratch && p<mem0.pScratchEnd ){
+      /* Release memory from the SQLITE_CONFIG_SCRATCH allocation */
+      ScratchFreeslot *pSlot;
+      pSlot = (ScratchFreeslot*)p;
+      sqlite3_mutex_enter(mem0.mutex);
+      pSlot->pNext = mem0.pScratchFree;
+      mem0.pScratchFree = pSlot;
+      mem0.nScratchFree++;
+      assert( mem0.nScratchFree <= (u32)sqlite3GlobalConfig.nScratch );
+      sqlite3StatusAdd(SQLITE_STATUS_SCRATCH_USED, -1);
+      sqlite3_mutex_leave(mem0.mutex);
+    }else{
+      /* Release memory back to the heap */
+      assert( sqlite3MemdebugHasType(p, MEMTYPE_SCRATCH) );
+      assert( sqlite3MemdebugNoType(p, ~MEMTYPE_SCRATCH) );
+      sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
+      if( sqlite3GlobalConfig.bMemstat ){
+        int iSize = sqlite3MallocSize(p);
+        sqlite3_mutex_enter(mem0.mutex);
+        sqlite3StatusAdd(SQLITE_STATUS_SCRATCH_OVERFLOW, -iSize);
+        sqlite3StatusAdd(SQLITE_STATUS_MEMORY_USED, -iSize);
+        sqlite3StatusAdd(SQLITE_STATUS_MALLOC_COUNT, -1);
+        sqlite3GlobalConfig.m.xFree(p);
+        sqlite3_mutex_leave(mem0.mutex);
+      }else{
+        sqlite3GlobalConfig.m.xFree(p);
+      }
+    }
+  }
+}
+
+/*
+** TRUE if p is a lookaside memory allocation from db
+*/
+#ifndef SQLITE_OMIT_LOOKASIDE
+static int isLookaside(sqlite3 *db, void *p){
+  return p>=db->lookaside.pStart && p<db->lookaside.pEnd;
+}
+#else
+#define isLookaside(A,B) 0
+#endif
+
+/*
+** Return the size of a memory allocation previously obtained from
+** sqlite3Malloc() or sqlite3_malloc().
+*/
+SQLITE_PRIVATE int sqlite3MallocSize(void *p){
+  assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) );
+  return sqlite3GlobalConfig.m.xSize(p);
+}
+SQLITE_PRIVATE int sqlite3DbMallocSize(sqlite3 *db, void *p){
+  if( db==0 ){
+    assert( sqlite3MemdebugNoType(p, ~MEMTYPE_HEAP) );
+    assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) );
+    return sqlite3MallocSize(p);
+  }else{
+    assert( sqlite3_mutex_held(db->mutex) );
+    if( isLookaside(db, p) ){
+      return db->lookaside.sz;
+    }else{
+      assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+      assert( sqlite3MemdebugNoType(p, ~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+      return sqlite3GlobalConfig.m.xSize(p);
+    }
+  }
+}
+SQLITE_API sqlite3_uint64 sqlite3_msize(void *p){
+  assert( sqlite3MemdebugNoType(p, ~MEMTYPE_HEAP) );
+  assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) );
+  return (sqlite3_uint64)sqlite3GlobalConfig.m.xSize(p);
+}
+
+/*
+** Free memory previously obtained from sqlite3Malloc().
+*/
+SQLITE_API void sqlite3_free(void *p){
+  if( p==0 ) return;  /* IMP: R-49053-54554 */
+  assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) );
+  assert( sqlite3MemdebugNoType(p, ~MEMTYPE_HEAP) );
+  if( sqlite3GlobalConfig.bMemstat ){
+    sqlite3_mutex_enter(mem0.mutex);
+    sqlite3StatusAdd(SQLITE_STATUS_MEMORY_USED, -sqlite3MallocSize(p));
+    sqlite3StatusAdd(SQLITE_STATUS_MALLOC_COUNT, -1);
+    sqlite3GlobalConfig.m.xFree(p);
+    sqlite3_mutex_leave(mem0.mutex);
+  }else{
+    sqlite3GlobalConfig.m.xFree(p);
+  }
+}
+
+/*
+** Add the size of memory allocation "p" to the count in
+** *db->pnBytesFreed.
+*/
+static SQLITE_NOINLINE void measureAllocationSize(sqlite3 *db, void *p){
+  *db->pnBytesFreed += sqlite3DbMallocSize(db,p);
+}
+
+/*
+** Free memory that might be associated with a particular database
+** connection.
+*/
+SQLITE_PRIVATE void sqlite3DbFree(sqlite3 *db, void *p){
+  assert( db==0 || sqlite3_mutex_held(db->mutex) );
+  if( p==0 ) return;
+  if( db ){
+    if( db->pnBytesFreed ){
+      measureAllocationSize(db, p);
+      return;
+    }
+    if( isLookaside(db, p) ){
+      LookasideSlot *pBuf = (LookasideSlot*)p;
+#if SQLITE_DEBUG
+      /* Trash all content in the buffer being freed */
+      memset(p, 0xaa, db->lookaside.sz);
+#endif
+      pBuf->pNext = db->lookaside.pFree;
+      db->lookaside.pFree = pBuf;
+      db->lookaside.nOut--;
+      return;
+    }
+  }
+  assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+  assert( sqlite3MemdebugNoType(p, ~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+  assert( db!=0 || sqlite3MemdebugNoType(p, MEMTYPE_LOOKASIDE) );
+  sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
+  sqlite3_free(p);
+}
+
+/*
+** Change the size of an existing memory allocation
+*/
+SQLITE_PRIVATE void *sqlite3Realloc(void *pOld, u64 nBytes){
+  int nOld, nNew, nDiff;
+  void *pNew;
+  assert( sqlite3MemdebugHasType(pOld, MEMTYPE_HEAP) );
+  assert( sqlite3MemdebugNoType(pOld, ~MEMTYPE_HEAP) );
+  if( pOld==0 ){
+    return sqlite3Malloc(nBytes); /* IMP: R-04300-56712 */
+  }
+  if( nBytes==0 ){
+    sqlite3_free(pOld); /* IMP: R-26507-47431 */
+    return 0;
+  }
+  if( nBytes>=0x7fffff00 ){
+    /* The 0x7ffff00 limit term is explained in comments on sqlite3Malloc() */
+    return 0;
+  }
+  nOld = sqlite3MallocSize(pOld);
+  /* IMPLEMENTATION-OF: R-46199-30249 SQLite guarantees that the second
+  ** argument to xRealloc is always a value returned by a prior call to
+  ** xRoundup. */
+  nNew = sqlite3GlobalConfig.m.xRoundup((int)nBytes);
+  if( nOld==nNew ){
+    pNew = pOld;
+  }else if( sqlite3GlobalConfig.bMemstat ){
+    sqlite3_mutex_enter(mem0.mutex);
+    sqlite3StatusSet(SQLITE_STATUS_MALLOC_SIZE, (int)nBytes);
+    nDiff = nNew - nOld;
+    if( sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED) >= 
+          mem0.alarmThreshold-nDiff ){
+      sqlite3MallocAlarm(nDiff);
+    }
+    pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew);
+    if( pNew==0 && mem0.alarmCallback ){
+      sqlite3MallocAlarm((int)nBytes);
+      pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew);
+    }
+    if( pNew ){
+      nNew = sqlite3MallocSize(pNew);
+      sqlite3StatusAdd(SQLITE_STATUS_MEMORY_USED, nNew-nOld);
+    }
+    sqlite3_mutex_leave(mem0.mutex);
+  }else{
+    pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew);
+  }
+  assert( EIGHT_BYTE_ALIGNMENT(pNew) ); /* IMP: R-11148-40995 */
+  return pNew;
+}
+
+/*
+** The public interface to sqlite3Realloc.  Make sure that the memory
+** subsystem is initialized prior to invoking sqliteRealloc.
+*/
+SQLITE_API void *sqlite3_realloc(void *pOld, int n){
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( sqlite3_initialize() ) return 0;
+#endif
+  if( n<0 ) n = 0;  /* IMP: R-26507-47431 */
+  return sqlite3Realloc(pOld, n);
+}
+SQLITE_API void *sqlite3_realloc64(void *pOld, sqlite3_uint64 n){
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( sqlite3_initialize() ) return 0;
+#endif
+  return sqlite3Realloc(pOld, n);
+}
+
+
+/*
+** Allocate and zero memory.
+*/ 
+SQLITE_PRIVATE void *sqlite3MallocZero(u64 n){
+  void *p = sqlite3Malloc(n);
+  if( p ){
+    memset(p, 0, (size_t)n);
+  }
+  return p;
+}
+
+/*
+** Allocate and zero memory.  If the allocation fails, make
+** the mallocFailed flag in the connection pointer.
+*/
+SQLITE_PRIVATE void *sqlite3DbMallocZero(sqlite3 *db, u64 n){
+  void *p = sqlite3DbMallocRaw(db, n);
+  if( p ){
+    memset(p, 0, (size_t)n);
+  }
+  return p;
+}
+
+/*
+** Allocate and zero memory.  If the allocation fails, make
+** the mallocFailed flag in the connection pointer.
+**
+** If db!=0 and db->mallocFailed is true (indicating a prior malloc
+** failure on the same database connection) then always return 0.
+** Hence for a particular database connection, once malloc starts
+** failing, it fails consistently until mallocFailed is reset.
+** This is an important assumption.  There are many places in the
+** code that do things like this:
+**
+**         int *a = (int*)sqlite3DbMallocRaw(db, 100);
+**         int *b = (int*)sqlite3DbMallocRaw(db, 200);
+**         if( b ) a[10] = 9;
+**
+** In other words, if a subsequent malloc (ex: "b") worked, it is assumed
+** that all prior mallocs (ex: "a") worked too.
+*/
+SQLITE_PRIVATE void *sqlite3DbMallocRaw(sqlite3 *db, u64 n){
+  void *p;
+  assert( db==0 || sqlite3_mutex_held(db->mutex) );
+  assert( db==0 || db->pnBytesFreed==0 );
+#ifndef SQLITE_OMIT_LOOKASIDE
+  if( db ){
+    LookasideSlot *pBuf;
+    if( db->mallocFailed ){
+      return 0;
+    }
+    if( db->lookaside.bEnabled ){
+      if( n>db->lookaside.sz ){
+        db->lookaside.anStat[1]++;
+      }else if( (pBuf = db->lookaside.pFree)==0 ){
+        db->lookaside.anStat[2]++;
+      }else{
+        db->lookaside.pFree = pBuf->pNext;
+        db->lookaside.nOut++;
+        db->lookaside.anStat[0]++;
+        if( db->lookaside.nOut>db->lookaside.mxOut ){
+          db->lookaside.mxOut = db->lookaside.nOut;
+        }
+        return (void*)pBuf;
+      }
+    }
+  }
+#else
+  if( db && db->mallocFailed ){
+    return 0;
+  }
+#endif
+  p = sqlite3Malloc(n);
+  if( !p && db ){
+    db->mallocFailed = 1;
+  }
+  sqlite3MemdebugSetType(p, 
+         (db && db->lookaside.bEnabled) ? MEMTYPE_LOOKASIDE : MEMTYPE_HEAP);
+  return p;
+}
+
+/*
+** Resize the block of memory pointed to by p to n bytes. If the
+** resize fails, set the mallocFailed flag in the connection object.
+*/
+SQLITE_PRIVATE void *sqlite3DbRealloc(sqlite3 *db, void *p, u64 n){
+  void *pNew = 0;
+  assert( db!=0 );
+  assert( sqlite3_mutex_held(db->mutex) );
+  if( db->mallocFailed==0 ){
+    if( p==0 ){
+      return sqlite3DbMallocRaw(db, n);
+    }
+    if( isLookaside(db, p) ){
+      if( n<=db->lookaside.sz ){
+        return p;
+      }
+      pNew = sqlite3DbMallocRaw(db, n);
+      if( pNew ){
+        memcpy(pNew, p, db->lookaside.sz);
+        sqlite3DbFree(db, p);
+      }
+    }else{
+      assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+      assert( sqlite3MemdebugNoType(p, ~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) );
+      sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
+      pNew = sqlite3_realloc64(p, n);
+      if( !pNew ){
+        db->mallocFailed = 1;
+      }
+      sqlite3MemdebugSetType(pNew,
+            (db->lookaside.bEnabled ? MEMTYPE_LOOKASIDE : MEMTYPE_HEAP));
+    }
+  }
+  return pNew;
+}
+
+/*
+** Attempt to reallocate p.  If the reallocation fails, then free p
+** and set the mallocFailed flag in the database connection.
+*/
+SQLITE_PRIVATE void *sqlite3DbReallocOrFree(sqlite3 *db, void *p, u64 n){
+  void *pNew;
+  pNew = sqlite3DbRealloc(db, p, n);
+  if( !pNew ){
+    sqlite3DbFree(db, p);
+  }
+  return pNew;
+}
+
+/*
+** Make a copy of a string in memory obtained from sqliteMalloc(). These 
+** functions call sqlite3MallocRaw() directly instead of sqliteMalloc(). This
+** is because when memory debugging is turned on, these two functions are 
+** called via macros that record the current file and line number in the
+** ThreadData structure.
+*/
+SQLITE_PRIVATE char *sqlite3DbStrDup(sqlite3 *db, const char *z){
+  char *zNew;
+  size_t n;
+  if( z==0 ){
+    return 0;
+  }
+  n = sqlite3Strlen30(z) + 1;
+  assert( (n&0x7fffffff)==n );
+  zNew = sqlite3DbMallocRaw(db, (int)n);
+  if( zNew ){
+    memcpy(zNew, z, n);
+  }
+  return zNew;
+}
+SQLITE_PRIVATE char *sqlite3DbStrNDup(sqlite3 *db, const char *z, u64 n){
+  char *zNew;
+  if( z==0 ){
+    return 0;
+  }
+  assert( (n&0x7fffffff)==n );
+  zNew = sqlite3DbMallocRaw(db, n+1);
+  if( zNew ){
+    memcpy(zNew, z, (size_t)n);
+    zNew[n] = 0;
+  }
+  return zNew;
+}
+
+/*
+** Create a string from the zFromat argument and the va_list that follows.
+** Store the string in memory obtained from sqliteMalloc() and make *pz
+** point to that string.
+*/
+SQLITE_PRIVATE void sqlite3SetString(char **pz, sqlite3 *db, const char *zFormat, ...){
+  va_list ap;
+  char *z;
+
+  va_start(ap, zFormat);
+  z = sqlite3VMPrintf(db, zFormat, ap);
+  va_end(ap);
+  sqlite3DbFree(db, *pz);
+  *pz = z;
+}
+
+/*
+** Take actions at the end of an API call to indicate an OOM error
+*/
+static SQLITE_NOINLINE int apiOomError(sqlite3 *db){
+  db->mallocFailed = 0;
+  sqlite3Error(db, SQLITE_NOMEM);
+  return SQLITE_NOMEM;
+}
+
+/*
+** This function must be called before exiting any API function (i.e. 
+** returning control to the user) that has called sqlite3_malloc or
+** sqlite3_realloc.
+**
+** The returned value is normally a copy of the second argument to this
+** function. However, if a malloc() failure has occurred since the previous
+** invocation SQLITE_NOMEM is returned instead. 
+**
+** If the first argument, db, is not NULL and a malloc() error has occurred,
+** then the connection error-code (the value returned by sqlite3_errcode())
+** is set to SQLITE_NOMEM.
+*/
+SQLITE_PRIVATE int sqlite3ApiExit(sqlite3* db, int rc){
+  /* If the db handle is not NULL, then we must hold the connection handle
+  ** mutex here. Otherwise the read (and possible write) of db->mallocFailed 
+  ** is unsafe, as is the call to sqlite3Error().
+  */
+  assert( !db || sqlite3_mutex_held(db->mutex) );
+  if( db==0 ) return rc & 0xff;
+  if( db->mallocFailed || rc==SQLITE_IOERR_NOMEM ){
+    return apiOomError(db);
+  }
+  return rc & db->errMask;
+}
+
+/************** End of malloc.c **********************************************/
+/************** Begin file printf.c ******************************************/
+/*
+** The "printf" code that follows dates from the 1980's.  It is in
+** the public domain.  The original comments are included here for
+** completeness.  They are very out-of-date but might be useful as
+** an historical reference.  Most of the "enhancements" have been backed
+** out so that the functionality is now the same as standard printf().
+**
+**************************************************************************
+**
+** This file contains code for a set of "printf"-like routines.  These
+** routines format strings much like the printf() from the standard C
+** library, though the implementation here has enhancements to support
+** SQLlite.
+*/
+
+/*
+** Conversion types fall into various categories as defined by the
+** following enumeration.
+*/
+#define etRADIX       1 /* Integer types.  %d, %x, %o, and so forth */
+#define etFLOAT       2 /* Floating point.  %f */
+#define etEXP         3 /* Exponentional notation. %e and %E */
+#define etGENERIC     4 /* Floating or exponential, depending on exponent. %g */
+#define etSIZE        5 /* Return number of characters processed so far. %n */
+#define etSTRING      6 /* Strings. %s */
+#define etDYNSTRING   7 /* Dynamically allocated strings. %z */
+#define etPERCENT     8 /* Percent symbol. %% */
+#define etCHARX       9 /* Characters. %c */
+/* The rest are extensions, not normally found in printf() */
+#define etSQLESCAPE  10 /* Strings with '\'' doubled.  %q */
+#define etSQLESCAPE2 11 /* Strings with '\'' doubled and enclosed in '',
+                          NULL pointers replaced by SQL NULL.  %Q */
+#define etTOKEN      12 /* a pointer to a Token structure */
+#define etSRCLIST    13 /* a pointer to a SrcList */
+#define etPOINTER    14 /* The %p conversion */
+#define etSQLESCAPE3 15 /* %w -> Strings with '\"' doubled */
+#define etORDINAL    16 /* %r -> 1st, 2nd, 3rd, 4th, etc.  English only */
+
+#define etINVALID     0 /* Any unrecognized conversion type */
+
+
+/*
+** An "etByte" is an 8-bit unsigned value.
+*/
+typedef unsigned char etByte;
+
+/*
+** Each builtin conversion character (ex: the 'd' in "%d") is described
+** by an instance of the following structure
+*/
+typedef struct et_info {   /* Information about each format field */
+  char fmttype;            /* The format field code letter */
+  etByte base;             /* The base for radix conversion */
+  etByte flags;            /* One or more of FLAG_ constants below */
+  etByte type;             /* Conversion paradigm */
+  etByte charset;          /* Offset into aDigits[] of the digits string */
+  etByte prefix;           /* Offset into aPrefix[] of the prefix string */
+} et_info;
+
+/*
+** Allowed values for et_info.flags
+*/
+#define FLAG_SIGNED  1     /* True if the value to convert is signed */
+#define FLAG_INTERN  2     /* True if for internal use only */
+#define FLAG_STRING  4     /* Allow infinity precision */
+
+
+/*
+** The following table is searched linearly, so it is good to put the
+** most frequently used conversion types first.
+*/
+static const char aDigits[] = "0123456789ABCDEF0123456789abcdef";
+static const char aPrefix[] = "-x0\000X0";
+static const et_info fmtinfo[] = {
+  {  'd', 10, 1, etRADIX,      0,  0 },
+  {  's',  0, 4, etSTRING,     0,  0 },
+  {  'g',  0, 1, etGENERIC,    30, 0 },
+  {  'z',  0, 4, etDYNSTRING,  0,  0 },
+  {  'q',  0, 4, etSQLESCAPE,  0,  0 },
+  {  'Q',  0, 4, etSQLESCAPE2, 0,  0 },
+  {  'w',  0, 4, etSQLESCAPE3, 0,  0 },
+  {  'c',  0, 0, etCHARX,      0,  0 },
+  {  'o',  8, 0, etRADIX,      0,  2 },
+  {  'u', 10, 0, etRADIX,      0,  0 },
+  {  'x', 16, 0, etRADIX,      16, 1 },
+  {  'X', 16, 0, etRADIX,      0,  4 },
+#ifndef SQLITE_OMIT_FLOATING_POINT
+  {  'f',  0, 1, etFLOAT,      0,  0 },
+  {  'e',  0, 1, etEXP,        30, 0 },
+  {  'E',  0, 1, etEXP,        14, 0 },
+  {  'G',  0, 1, etGENERIC,    14, 0 },
+#endif
+  {  'i', 10, 1, etRADIX,      0,  0 },
+  {  'n',  0, 0, etSIZE,       0,  0 },
+  {  '%',  0, 0, etPERCENT,    0,  0 },
+  {  'p', 16, 0, etPOINTER,    0,  1 },
+
+/* All the rest have the FLAG_INTERN bit set and are thus for internal
+** use only */
+  {  'T',  0, 2, etTOKEN,      0,  0 },
+  {  'S',  0, 2, etSRCLIST,    0,  0 },
+  {  'r', 10, 3, etORDINAL,    0,  0 },
+};
+
+/*
+** If SQLITE_OMIT_FLOATING_POINT is defined, then none of the floating point
+** conversions will work.
+*/
+#ifndef SQLITE_OMIT_FLOATING_POINT
+/*
+** "*val" is a double such that 0.1 <= *val < 10.0
+** Return the ascii code for the leading digit of *val, then
+** multiply "*val" by 10.0 to renormalize.
+**
+** Example:
+**     input:     *val = 3.14159
+**     output:    *val = 1.4159    function return = '3'
+**
+** The counter *cnt is incremented each time.  After counter exceeds
+** 16 (the number of significant digits in a 64-bit float) '0' is
+** always returned.
+*/
+static char et_getdigit(LONGDOUBLE_TYPE *val, int *cnt){
+  int digit;
+  LONGDOUBLE_TYPE d;
+  if( (*cnt)<=0 ) return '0';
+  (*cnt)--;
+  digit = (int)*val;
+  d = digit;
+  digit += '0';
+  *val = (*val - d)*10.0;
+  return (char)digit;
+}
+#endif /* SQLITE_OMIT_FLOATING_POINT */
+
+/*
+** Set the StrAccum object to an error mode.
+*/
+static void setStrAccumError(StrAccum *p, u8 eError){
+  p->accError = eError;
+  p->nAlloc = 0;
+}
+
+/*
+** Extra argument values from a PrintfArguments object
+*/
+static sqlite3_int64 getIntArg(PrintfArguments *p){
+  if( p->nArg<=p->nUsed ) return 0;
+  return sqlite3_value_int64(p->apArg[p->nUsed++]);
+}
+static double getDoubleArg(PrintfArguments *p){
+  if( p->nArg<=p->nUsed ) return 0.0;
+  return sqlite3_value_double(p->apArg[p->nUsed++]);
+}
+static char *getTextArg(PrintfArguments *p){
+  if( p->nArg<=p->nUsed ) return 0;
+  return (char*)sqlite3_value_text(p->apArg[p->nUsed++]);
+}
+
+
+/*
+** On machines with a small stack size, you can redefine the
+** SQLITE_PRINT_BUF_SIZE to be something smaller, if desired.
+*/
+#ifndef SQLITE_PRINT_BUF_SIZE
+# define SQLITE_PRINT_BUF_SIZE 70
+#endif
+#define etBUFSIZE SQLITE_PRINT_BUF_SIZE  /* Size of the output buffer */
+
+/*
+** Render a string given by "fmt" into the StrAccum object.
+*/
+SQLITE_PRIVATE void sqlite3VXPrintf(
+  StrAccum *pAccum,          /* Accumulate results here */
+  u32 bFlags,                /* SQLITE_PRINTF_* flags */
+  const char *fmt,           /* Format string */
+  va_list ap                 /* arguments */
+){
+  int c;                     /* Next character in the format string */
+  char *bufpt;               /* Pointer to the conversion buffer */
+  int precision;             /* Precision of the current field */
+  int length;                /* Length of the field */
+  int idx;                   /* A general purpose loop counter */
+  int width;                 /* Width of the current field */
+  etByte flag_leftjustify;   /* True if "-" flag is present */
+  etByte flag_plussign;      /* True if "+" flag is present */
+  etByte flag_blanksign;     /* True if " " flag is present */
+  etByte flag_alternateform; /* True if "#" flag is present */
+  etByte flag_altform2;      /* True if "!" flag is present */
+  etByte flag_zeropad;       /* True if field width constant starts with zero */
+  etByte flag_long;          /* True if "l" flag is present */
+  etByte flag_longlong;      /* True if the "ll" flag is present */
+  etByte done;               /* Loop termination flag */
+  etByte xtype = 0;          /* Conversion paradigm */
+  u8 bArgList;               /* True for SQLITE_PRINTF_SQLFUNC */
+  u8 useIntern;              /* Ok to use internal conversions (ex: %T) */
+  char prefix;               /* Prefix character.  "+" or "-" or " " or '\0'. */
+  sqlite_uint64 longvalue;   /* Value for integer types */
+  LONGDOUBLE_TYPE realvalue; /* Value for real types */
+  const et_info *infop;      /* Pointer to the appropriate info structure */
+  char *zOut;                /* Rendering buffer */
+  int nOut;                  /* Size of the rendering buffer */
+  char *zExtra = 0;          /* Malloced memory used by some conversion */
+#ifndef SQLITE_OMIT_FLOATING_POINT
+  int  exp, e2;              /* exponent of real numbers */
+  int nsd;                   /* Number of significant digits returned */
+  double rounder;            /* Used for rounding floating point values */
+  etByte flag_dp;            /* True if decimal point should be shown */
+  etByte flag_rtz;           /* True if trailing zeros should be removed */
+#endif
+  PrintfArguments *pArgList = 0; /* Arguments for SQLITE_PRINTF_SQLFUNC */
+  char buf[etBUFSIZE];       /* Conversion buffer */
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( ap==0 ){
+    (void)SQLITE_MISUSE_BKPT;
+    sqlite3StrAccumReset(pAccum);
+    return;
+  }
+#endif
+  bufpt = 0;
+  if( bFlags ){
+    if( (bArgList = (bFlags & SQLITE_PRINTF_SQLFUNC))!=0 ){
+      pArgList = va_arg(ap, PrintfArguments*);
+    }
+    useIntern = bFlags & SQLITE_PRINTF_INTERNAL;
+  }else{
+    bArgList = useIntern = 0;
+  }
+  for(; (c=(*fmt))!=0; ++fmt){
+    if( c!='%' ){
+      bufpt = (char *)fmt;
+#if HAVE_STRCHRNUL
+      fmt = strchrnul(fmt, '%');
+#else
+      do{ fmt++; }while( *fmt && *fmt != '%' );
+#endif
+      sqlite3StrAccumAppend(pAccum, bufpt, (int)(fmt - bufpt));
+      if( *fmt==0 ) break;
+    }
+    if( (c=(*++fmt))==0 ){
+      sqlite3StrAccumAppend(pAccum, "%", 1);
+      break;
+    }
+    /* Find out what flags are present */
+    flag_leftjustify = flag_plussign = flag_blanksign = 
+     flag_alternateform = flag_altform2 = flag_zeropad = 0;
+    done = 0;
+    do{
+      switch( c ){
+        case '-':   flag_leftjustify = 1;     break;
+        case '+':   flag_plussign = 1;        break;
+        case ' ':   flag_blanksign = 1;       break;
+        case '#':   flag_alternateform = 1;   break;
+        case '!':   flag_altform2 = 1;        break;
+        case '0':   flag_zeropad = 1;         break;
+        default:    done = 1;                 break;
+      }
+    }while( !done && (c=(*++fmt))!=0 );
+    /* Get the field width */
+    width = 0;
+    if( c=='*' ){
+      if( bArgList ){
+        width = (int)getIntArg(pArgList);
+      }else{
+        width = va_arg(ap,int);
+      }
+      if( width<0 ){
+        flag_leftjustify = 1;
+        width = -width;
+      }
+      c = *++fmt;
+    }else{
+      while( c>='0' && c<='9' ){
+        width = width*10 + c - '0';
+        c = *++fmt;
+      }
+    }
+    /* Get the precision */
+    if( c=='.' ){
+      precision = 0;
+      c = *++fmt;
+      if( c=='*' ){
+        if( bArgList ){
+          precision = (int)getIntArg(pArgList);
+        }else{
+          precision = va_arg(ap,int);
+        }
+        if( precision<0 ) precision = -precision;
+        c = *++fmt;
+      }else{
+        while( c>='0' && c<='9' ){
+          precision = precision*10 + c - '0';
+          c = *++fmt;
+        }
+      }
+    }else{
+      precision = -1;
+    }
+    /* Get the conversion type modifier */
+    if( c=='l' ){
+      flag_long = 1;
+      c = *++fmt;
+      if( c=='l' ){
+        flag_longlong = 1;
+        c = *++fmt;
+      }else{
+        flag_longlong = 0;
+      }
+    }else{
+      flag_long = flag_longlong = 0;
+    }
+    /* Fetch the info entry for the field */
+    infop = &fmtinfo[0];
+    xtype = etINVALID;
+    for(idx=0; idx<ArraySize(fmtinfo); idx++){
+      if( c==fmtinfo[idx].fmttype ){
+        infop = &fmtinfo[idx];
+        if( useIntern || (infop->flags & FLAG_INTERN)==0 ){
+          xtype = infop->type;
+        }else{
+          return;
+        }
+        break;
+      }
+    }
+
+    /*
+    ** At this point, variables are initialized as follows:
+    **
+    **   flag_alternateform          TRUE if a '#' is present.
+    **   flag_altform2               TRUE if a '!' is present.
+    **   flag_plussign               TRUE if a '+' is present.
+    **   flag_leftjustify            TRUE if a '-' is present or if the
+    **                               field width was negative.
+    **   flag_zeropad                TRUE if the width began with 0.
+    **   flag_long                   TRUE if the letter 'l' (ell) prefixed
+    **                               the conversion character.
+    **   flag_longlong               TRUE if the letter 'll' (ell ell) prefixed
+    **                               the conversion character.
+    **   flag_blanksign              TRUE if a ' ' is present.
+    **   width                       The specified field width.  This is
+    **                               always non-negative.  Zero is the default.
+    **   precision                   The specified precision.  The default
+    **                               is -1.
+    **   xtype                       The class of the conversion.
+    **   infop                       Pointer to the appropriate info struct.
+    */
+    switch( xtype ){
+      case etPOINTER:
+        flag_longlong = sizeof(char*)==sizeof(i64);
+        flag_long = sizeof(char*)==sizeof(long int);
+        /* Fall through into the next case */
+      case etORDINAL:
+      case etRADIX:
+        if( infop->flags & FLAG_SIGNED ){
+          i64 v;
+          if( bArgList ){
+            v = getIntArg(pArgList);
+          }else if( flag_longlong ){
+            v = va_arg(ap,i64);
+          }else if( flag_long ){
+            v = va_arg(ap,long int);
+          }else{
+            v = va_arg(ap,int);
+          }
+          if( v<0 ){
+            if( v==SMALLEST_INT64 ){
+              longvalue = ((u64)1)<<63;
+            }else{
+              longvalue = -v;
+            }
+            prefix = '-';
+          }else{
+            longvalue = v;
+            if( flag_plussign )        prefix = '+';
+            else if( flag_blanksign )  prefix = ' ';
+            else                       prefix = 0;
+          }
+        }else{
+          if( bArgList ){
+            longvalue = (u64)getIntArg(pArgList);
+          }else if( flag_longlong ){
+            longvalue = va_arg(ap,u64);
+          }else if( flag_long ){
+            longvalue = va_arg(ap,unsigned long int);
+          }else{
+            longvalue = va_arg(ap,unsigned int);
+          }
+          prefix = 0;
+        }
+        if( longvalue==0 ) flag_alternateform = 0;
+        if( flag_zeropad && precision<width-(prefix!=0) ){
+          precision = width-(prefix!=0);
+        }
+        if( precision<etBUFSIZE-10 ){
+          nOut = etBUFSIZE;
+          zOut = buf;
+        }else{
+          nOut = precision + 10;
+          zOut = zExtra = sqlite3Malloc( nOut );
+          if( zOut==0 ){
+            setStrAccumError(pAccum, STRACCUM_NOMEM);
+            return;
+          }
+        }
+        bufpt = &zOut[nOut-1];
+        if( xtype==etORDINAL ){
+          static const char zOrd[] = "thstndrd";
+          int x = (int)(longvalue % 10);
+          if( x>=4 || (longvalue/10)%10==1 ){
+            x = 0;
+          }
+          *(--bufpt) = zOrd[x*2+1];
+          *(--bufpt) = zOrd[x*2];
+        }
+        {
+          const char *cset = &aDigits[infop->charset];
+          u8 base = infop->base;
+          do{                                           /* Convert to ascii */
+            *(--bufpt) = cset[longvalue%base];
+            longvalue = longvalue/base;
+          }while( longvalue>0 );
+        }
+        length = (int)(&zOut[nOut-1]-bufpt);
+        for(idx=precision-length; idx>0; idx--){
+          *(--bufpt) = '0';                             /* Zero pad */
+        }
+        if( prefix ) *(--bufpt) = prefix;               /* Add sign */
+        if( flag_alternateform && infop->prefix ){      /* Add "0" or "0x" */
+          const char *pre;
+          char x;
+          pre = &aPrefix[infop->prefix];
+          for(; (x=(*pre))!=0; pre++) *(--bufpt) = x;
+        }
+        length = (int)(&zOut[nOut-1]-bufpt);
+        break;
+      case etFLOAT:
+      case etEXP:
+      case etGENERIC:
+        if( bArgList ){
+          realvalue = getDoubleArg(pArgList);
+        }else{
+          realvalue = va_arg(ap,double);
+        }
+#ifdef SQLITE_OMIT_FLOATING_POINT
+        length = 0;
+#else
+        if( precision<0 ) precision = 6;         /* Set default precision */
+        if( realvalue<0.0 ){
+          realvalue = -realvalue;
+          prefix = '-';
+        }else{
+          if( flag_plussign )          prefix = '+';
+          else if( flag_blanksign )    prefix = ' ';
+          else                         prefix = 0;
+        }
+        if( xtype==etGENERIC && precision>0 ) precision--;
+        for(idx=precision, rounder=0.5; idx>0; idx--, rounder*=0.1){}
+        if( xtype==etFLOAT ) realvalue += rounder;
+        /* Normalize realvalue to within 10.0 > realvalue >= 1.0 */
+        exp = 0;
+        if( sqlite3IsNaN((double)realvalue) ){
+          bufpt = "NaN";
+          length = 3;
+          break;
+        }
+        if( realvalue>0.0 ){
+          LONGDOUBLE_TYPE scale = 1.0;
+          while( realvalue>=1e100*scale && exp<=350 ){ scale *= 1e100;exp+=100;}
+          while( realvalue>=1e64*scale && exp<=350 ){ scale *= 1e64; exp+=64; }
+          while( realvalue>=1e8*scale && exp<=350 ){ scale *= 1e8; exp+=8; }
+          while( realvalue>=10.0*scale && exp<=350 ){ scale *= 10.0; exp++; }
+          realvalue /= scale;
+          while( realvalue<1e-8 ){ realvalue *= 1e8; exp-=8; }
+          while( realvalue<1.0 ){ realvalue *= 10.0; exp--; }
+          if( exp>350 ){
+            if( prefix=='-' ){
+              bufpt = "-Inf";
+            }else if( prefix=='+' ){
+              bufpt = "+Inf";
+            }else{
+              bufpt = "Inf";
+            }
+            length = sqlite3Strlen30(bufpt);
+            break;
+          }
+        }
+        bufpt = buf;
+        /*
+        ** If the field type is etGENERIC, then convert to either etEXP
+        ** or etFLOAT, as appropriate.
+        */
+        if( xtype!=etFLOAT ){
+          realvalue += rounder;
+          if( realvalue>=10.0 ){ realvalue *= 0.1; exp++; }
+        }
+        if( xtype==etGENERIC ){
+          flag_rtz = !flag_alternateform;
+          if( exp<-4 || exp>precision ){
+            xtype = etEXP;
+          }else{
+            precision = precision - exp;
+            xtype = etFLOAT;
+          }
+        }else{
+          flag_rtz = flag_altform2;
+        }
+        if( xtype==etEXP ){
+          e2 = 0;
+        }else{
+          e2 = exp;
+        }
+        if( MAX(e2,0)+precision+width > etBUFSIZE - 15 ){
+          bufpt = zExtra = sqlite3Malloc( MAX(e2,0)+precision+width+15 );
+          if( bufpt==0 ){
+            setStrAccumError(pAccum, STRACCUM_NOMEM);
+            return;
+          }
+        }
+        zOut = bufpt;
+        nsd = 16 + flag_altform2*10;
+        flag_dp = (precision>0 ?1:0) | flag_alternateform | flag_altform2;
+        /* The sign in front of the number */
+        if( prefix ){
+          *(bufpt++) = prefix;
+        }
+        /* Digits prior to the decimal point */
+        if( e2<0 ){
+          *(bufpt++) = '0';
+        }else{
+          for(; e2>=0; e2--){
+            *(bufpt++) = et_getdigit(&realvalue,&nsd);
+          }
+        }
+        /* The decimal point */
+        if( flag_dp ){
+          *(bufpt++) = '.';
+        }
+        /* "0" digits after the decimal point but before the first
+        ** significant digit of the number */
+        for(e2++; e2<0; precision--, e2++){
+          assert( precision>0 );
+          *(bufpt++) = '0';
+        }
+        /* Significant digits after the decimal point */
+        while( (precision--)>0 ){
+          *(bufpt++) = et_getdigit(&realvalue,&nsd);
+        }
+        /* Remove trailing zeros and the "." if no digits follow the "." */
+        if( flag_rtz && flag_dp ){
+          while( bufpt[-1]=='0' ) *(--bufpt) = 0;
+          assert( bufpt>zOut );
+          if( bufpt[-1]=='.' ){
+            if( flag_altform2 ){
+              *(bufpt++) = '0';
+            }else{
+              *(--bufpt) = 0;
+            }
+          }
+        }
+        /* Add the "eNNN" suffix */
+        if( xtype==etEXP ){
+          *(bufpt++) = aDigits[infop->charset];
+          if( exp<0 ){
+            *(bufpt++) = '-'; exp = -exp;
+          }else{
+            *(bufpt++) = '+';
+          }
+          if( exp>=100 ){
+            *(bufpt++) = (char)((exp/100)+'0');        /* 100's digit */
+            exp %= 100;
+          }
+          *(bufpt++) = (char)(exp/10+'0');             /* 10's digit */
+          *(bufpt++) = (char)(exp%10+'0');             /* 1's digit */
+        }
+        *bufpt = 0;
+
+        /* The converted number is in buf[] and zero terminated. Output it.
+        ** Note that the number is in the usual order, not reversed as with
+        ** integer conversions. */
+        length = (int)(bufpt-zOut);
+        bufpt = zOut;
+
+        /* Special case:  Add leading zeros if the flag_zeropad flag is
+        ** set and we are not left justified */
+        if( flag_zeropad && !flag_leftjustify && length < width){
+          int i;
+          int nPad = width - length;
+          for(i=width; i>=nPad; i--){
+            bufpt[i] = bufpt[i-nPad];
+          }
+          i = prefix!=0;
+          while( nPad-- ) bufpt[i++] = '0';
+          length = width;
+        }
+#endif /* !defined(SQLITE_OMIT_FLOATING_POINT) */
+        break;
+      case etSIZE:
+        if( !bArgList ){
+          *(va_arg(ap,int*)) = pAccum->nChar;
+        }
+        length = width = 0;
+        break;
+      case etPERCENT:
+        buf[0] = '%';
+        bufpt = buf;
+        length = 1;
+        break;
+      case etCHARX:
+        if( bArgList ){
+          bufpt = getTextArg(pArgList);
+          c = bufpt ? bufpt[0] : 0;
+        }else{
+          c = va_arg(ap,int);
+        }
+        if( precision>1 ){
+          width -= precision-1;
+          if( width>1 && !flag_leftjustify ){
+            sqlite3AppendChar(pAccum, width-1, ' ');
+            width = 0;
+          }
+          sqlite3AppendChar(pAccum, precision-1, c);
+        }
+        length = 1;
+        buf[0] = c;
+        bufpt = buf;
+        break;
+      case etSTRING:
+      case etDYNSTRING:
+        if( bArgList ){
+          bufpt = getTextArg(pArgList);
+        }else{
+          bufpt = va_arg(ap,char*);
+        }
+        if( bufpt==0 ){
+          bufpt = "";
+        }else if( xtype==etDYNSTRING && !bArgList ){
+          zExtra = bufpt;
+        }
+        if( precision>=0 ){
+          for(length=0; length<precision && bufpt[length]; length++){}
+        }else{
+          length = sqlite3Strlen30(bufpt);
+        }
+        break;
+      case etSQLESCAPE:
+      case etSQLESCAPE2:
+      case etSQLESCAPE3: {
+        int i, j, k, n, isnull;
+        int needQuote;
+        char ch;
+        char q = ((xtype==etSQLESCAPE3)?'"':'\'');   /* Quote character */
+        char *escarg;
+
+        if( bArgList ){
+          escarg = getTextArg(pArgList);
+        }else{
+          escarg = va_arg(ap,char*);
+        }
+        isnull = escarg==0;
+        if( isnull ) escarg = (xtype==etSQLESCAPE2 ? "NULL" : "(NULL)");
+        k = precision;
+        for(i=n=0; k!=0 && (ch=escarg[i])!=0; i++, k--){
+          if( ch==q )  n++;
+        }
+        needQuote = !isnull && xtype==etSQLESCAPE2;
+        n += i + 1 + needQuote*2;
+        if( n>etBUFSIZE ){
+          bufpt = zExtra = sqlite3Malloc( n );
+          if( bufpt==0 ){
+            setStrAccumError(pAccum, STRACCUM_NOMEM);
+            return;
+          }
+        }else{
+          bufpt = buf;
+        }
+        j = 0;
+        if( needQuote ) bufpt[j++] = q;
+        k = i;
+        for(i=0; i<k; i++){
+          bufpt[j++] = ch = escarg[i];
+          if( ch==q ) bufpt[j++] = ch;
+        }
+        if( needQuote ) bufpt[j++] = q;
+        bufpt[j] = 0;
+        length = j;
+        /* The precision in %q and %Q means how many input characters to
+        ** consume, not the length of the output...
+        ** if( precision>=0 && precision<length ) length = precision; */
+        break;
+      }
+      case etTOKEN: {
+        Token *pToken = va_arg(ap, Token*);
+        assert( bArgList==0 );
+        if( pToken && pToken->n ){
+          sqlite3StrAccumAppend(pAccum, (const char*)pToken->z, pToken->n);
+        }
+        length = width = 0;
+        break;
+      }
+      case etSRCLIST: {
+        SrcList *pSrc = va_arg(ap, SrcList*);
+        int k = va_arg(ap, int);
+        struct SrcList_item *pItem = &pSrc->a[k];
+        assert( bArgList==0 );
+        assert( k>=0 && k<pSrc->nSrc );
+        if( pItem->zDatabase ){
+          sqlite3StrAccumAppendAll(pAccum, pItem->zDatabase);
+          sqlite3StrAccumAppend(pAccum, ".", 1);
+        }
+        sqlite3StrAccumAppendAll(pAccum, pItem->zName);
+        length = width = 0;
+        break;
+      }
+      default: {
+        assert( xtype==etINVALID );
+        return;
+      }
+    }/* End switch over the format type */
+    /*
+    ** The text of the conversion is pointed to by "bufpt" and is
+    ** "length" characters long.  The field width is "width".  Do
+    ** the output.
+    */
+    width -= length;
+    if( width>0 && !flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
+    sqlite3StrAccumAppend(pAccum, bufpt, length);
+    if( width>0 && flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
+
+    if( zExtra ){
+      sqlite3_free(zExtra);
+      zExtra = 0;
+    }
+  }/* End for loop over the format string */
+} /* End of function */
+
+/*
+** Enlarge the memory allocation on a StrAccum object so that it is
+** able to accept at least N more bytes of text.
+**
+** Return the number of bytes of text that StrAccum is able to accept
+** after the attempted enlargement.  The value returned might be zero.
+*/
+static int sqlite3StrAccumEnlarge(StrAccum *p, int N){
+  char *zNew;
+  assert( p->nChar+N >= p->nAlloc ); /* Only called if really needed */
+  if( p->accError ){
+    testcase(p->accError==STRACCUM_TOOBIG);
+    testcase(p->accError==STRACCUM_NOMEM);
+    return 0;
+  }
+  if( !p->useMalloc ){
+    N = p->nAlloc - p->nChar - 1;
+    setStrAccumError(p, STRACCUM_TOOBIG);
+    return N;
+  }else{
+    char *zOld = (p->zText==p->zBase ? 0 : p->zText);
+    i64 szNew = p->nChar;
+    szNew += N + 1;
+    if( szNew+p->nChar<=p->mxAlloc ){
+      /* Force exponential buffer size growth as long as it does not overflow,
+      ** to avoid having to call this routine too often */
+      szNew += p->nChar;
+    }
+    if( szNew > p->mxAlloc ){
+      sqlite3StrAccumReset(p);
+      setStrAccumError(p, STRACCUM_TOOBIG);
+      return 0;
+    }else{
+      p->nAlloc = (int)szNew;
+    }
+    if( p->useMalloc==1 ){
+      zNew = sqlite3DbRealloc(p->db, zOld, p->nAlloc);
+    }else{
+      zNew = sqlite3_realloc(zOld, p->nAlloc);
+    }
+    if( zNew ){
+      assert( p->zText!=0 || p->nChar==0 );
+      if( zOld==0 && p->nChar>0 ) memcpy(zNew, p->zText, p->nChar);
+      p->zText = zNew;
+      p->nAlloc = sqlite3DbMallocSize(p->db, zNew);
+    }else{
+      sqlite3StrAccumReset(p);
+      setStrAccumError(p, STRACCUM_NOMEM);
+      return 0;
+    }
+  }
+  return N;
+}
+
+/*
+** Append N copies of character c to the given string buffer.
+*/
+SQLITE_PRIVATE void sqlite3AppendChar(StrAccum *p, int N, char c){
+  if( p->nChar+N >= p->nAlloc && (N = sqlite3StrAccumEnlarge(p, N))<=0 ) return;
+  while( (N--)>0 ) p->zText[p->nChar++] = c;
+}
+
+/*
+** The StrAccum "p" is not large enough to accept N new bytes of z[].
+** So enlarge if first, then do the append.
+**
+** This is a helper routine to sqlite3StrAccumAppend() that does special-case
+** work (enlarging the buffer) using tail recursion, so that the
+** sqlite3StrAccumAppend() routine can use fast calling semantics.
+*/
+static void SQLITE_NOINLINE enlargeAndAppend(StrAccum *p, const char *z, int N){
+  N = sqlite3StrAccumEnlarge(p, N);
+  if( N>0 ){
+    memcpy(&p->zText[p->nChar], z, N);
+    p->nChar += N;
+  }
+}
+
+/*
+** Append N bytes of text from z to the StrAccum object.  Increase the
+** size of the memory allocation for StrAccum if necessary.
+*/
+SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum *p, const char *z, int N){
+  assert( z!=0 );
+  assert( p->zText!=0 || p->nChar==0 || p->accError );
+  assert( N>=0 );
+  assert( p->accError==0 || p->nAlloc==0 );
+  if( p->nChar+N >= p->nAlloc ){
+    enlargeAndAppend(p,z,N);
+  }else{
+    assert( p->zText );
+    p->nChar += N;
+    memcpy(&p->zText[p->nChar-N], z, N);
+  }
+}
+
+/*
+** Append the complete text of zero-terminated string z[] to the p string.
+*/
+SQLITE_PRIVATE void sqlite3StrAccumAppendAll(StrAccum *p, const char *z){
+  sqlite3StrAccumAppend(p, z, sqlite3Strlen30(z));
+}
+
+
+/*
+** Finish off a string by making sure it is zero-terminated.
+** Return a pointer to the resulting string.  Return a NULL
+** pointer if any kind of error was encountered.
+*/
+SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum *p){
+  if( p->zText ){
+    p->zText[p->nChar] = 0;
+    if( p->useMalloc && p->zText==p->zBase ){
+      if( p->useMalloc==1 ){
+        p->zText = sqlite3DbMallocRaw(p->db, p->nChar+1 );
+      }else{
+        p->zText = sqlite3_malloc(p->nChar+1);
+      }
+      if( p->zText ){
+        memcpy(p->zText, p->zBase, p->nChar+1);
+      }else{
+        setStrAccumError(p, STRACCUM_NOMEM);
+      }
+    }
+  }
+  return p->zText;
+}
+
+/*
+** Reset an StrAccum string.  Reclaim all malloced memory.
+*/
+SQLITE_PRIVATE void sqlite3StrAccumReset(StrAccum *p){
+  if( p->zText!=p->zBase ){
+    if( p->useMalloc==1 ){
+      sqlite3DbFree(p->db, p->zText);
+    }else{
+      sqlite3_free(p->zText);
+    }
+  }
+  p->zText = 0;
+}
+
+/*
+** Initialize a string accumulator
+*/
+SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum *p, char *zBase, int n, int mx){
+  p->zText = p->zBase = zBase;
+  p->db = 0;
+  p->nChar = 0;
+  p->nAlloc = n;
+  p->mxAlloc = mx;
+  p->useMalloc = 1;
+  p->accError = 0;
+}
+
+/*
+** Print into memory obtained from sqliteMalloc().  Use the internal
+** %-conversion extensions.
+*/
+SQLITE_PRIVATE char *sqlite3VMPrintf(sqlite3 *db, const char *zFormat, va_list ap){
+  char *z;
+  char zBase[SQLITE_PRINT_BUF_SIZE];
+  StrAccum acc;
+  assert( db!=0 );
+  sqlite3StrAccumInit(&acc, zBase, sizeof(zBase),
+                      db->aLimit[SQLITE_LIMIT_LENGTH]);
+  acc.db = db;
+  sqlite3VXPrintf(&acc, SQLITE_PRINTF_INTERNAL, zFormat, ap);
+  z = sqlite3StrAccumFinish(&acc);
+  if( acc.accError==STRACCUM_NOMEM ){
+    db->mallocFailed = 1;
+  }
+  return z;
+}
+
+/*
+** Print into memory obtained from sqliteMalloc().  Use the internal
+** %-conversion extensions.
+*/
+SQLITE_PRIVATE char *sqlite3MPrintf(sqlite3 *db, const char *zFormat, ...){
+  va_list ap;
+  char *z;
+  va_start(ap, zFormat);
+  z = sqlite3VMPrintf(db, zFormat, ap);
+  va_end(ap);
+  return z;
+}
+
+/*
+** Like sqlite3MPrintf(), but call sqlite3DbFree() on zStr after formatting
+** the string and before returning.  This routine is intended to be used
+** to modify an existing string.  For example:
+**
+**       x = sqlite3MPrintf(db, x, "prefix %s suffix", x);
+**
+*/
+SQLITE_PRIVATE char *sqlite3MAppendf(sqlite3 *db, char *zStr, const char *zFormat, ...){
+  va_list ap;
+  char *z;
+  va_start(ap, zFormat);
+  z = sqlite3VMPrintf(db, zFormat, ap);
+  va_end(ap);
+  sqlite3DbFree(db, zStr);
+  return z;
+}
+
+/*
+** Print into memory obtained from sqlite3_malloc().  Omit the internal
+** %-conversion extensions.
+*/
+SQLITE_API char *sqlite3_vmprintf(const char *zFormat, va_list ap){
+  char *z;
+  char zBase[SQLITE_PRINT_BUF_SIZE];
+  StrAccum acc;
+
+#ifdef SQLITE_ENABLE_API_ARMOR  
+  if( zFormat==0 ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( sqlite3_initialize() ) return 0;
+#endif
+  sqlite3StrAccumInit(&acc, zBase, sizeof(zBase), SQLITE_MAX_LENGTH);
+  acc.useMalloc = 2;
+  sqlite3VXPrintf(&acc, 0, zFormat, ap);
+  z = sqlite3StrAccumFinish(&acc);
+  return z;
+}
+
+/*
+** Print into memory obtained from sqlite3_malloc()().  Omit the internal
+** %-conversion extensions.
+*/
+SQLITE_API char *sqlite3_mprintf(const char *zFormat, ...){
+  va_list ap;
+  char *z;
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( sqlite3_initialize() ) return 0;
+#endif
+  va_start(ap, zFormat);
+  z = sqlite3_vmprintf(zFormat, ap);
+  va_end(ap);
+  return z;
+}
+
+/*
+** sqlite3_snprintf() works like snprintf() except that it ignores the
+** current locale settings.  This is important for SQLite because we
+** are not able to use a "," as the decimal point in place of "." as
+** specified by some locales.
+**
+** Oops:  The first two arguments of sqlite3_snprintf() are backwards
+** from the snprintf() standard.  Unfortunately, it is too late to change
+** this without breaking compatibility, so we just have to live with the
+** mistake.
+**
+** sqlite3_vsnprintf() is the varargs version.
+*/
+SQLITE_API char *sqlite3_vsnprintf(int n, char *zBuf, const char *zFormat, va_list ap){
+  StrAccum acc;
+  if( n<=0 ) return zBuf;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( zBuf==0 || zFormat==0 ) {
+    (void)SQLITE_MISUSE_BKPT;
+    if( zBuf && n>0 ) zBuf[0] = 0;
+    return zBuf;
+  }
+#endif
+  sqlite3StrAccumInit(&acc, zBuf, n, 0);
+  acc.useMalloc = 0;
+  sqlite3VXPrintf(&acc, 0, zFormat, ap);
+  return sqlite3StrAccumFinish(&acc);
+}
+SQLITE_API char *sqlite3_snprintf(int n, char *zBuf, const char *zFormat, ...){
+  char *z;
+  va_list ap;
+  va_start(ap,zFormat);
+  z = sqlite3_vsnprintf(n, zBuf, zFormat, ap);
+  va_end(ap);
+  return z;
+}
+
+/*
+** This is the routine that actually formats the sqlite3_log() message.
+** We house it in a separate routine from sqlite3_log() to avoid using
+** stack space on small-stack systems when logging is disabled.
+**
+** sqlite3_log() must render into a static buffer.  It cannot dynamically
+** allocate memory because it might be called while the memory allocator
+** mutex is held.
+*/
+static void renderLogMsg(int iErrCode, const char *zFormat, va_list ap){
+  StrAccum acc;                          /* String accumulator */
+  char zMsg[SQLITE_PRINT_BUF_SIZE*3];    /* Complete log message */
+
+  sqlite3StrAccumInit(&acc, zMsg, sizeof(zMsg), 0);
+  acc.useMalloc = 0;
+  sqlite3VXPrintf(&acc, 0, zFormat, ap);
+  sqlite3GlobalConfig.xLog(sqlite3GlobalConfig.pLogArg, iErrCode,
+                           sqlite3StrAccumFinish(&acc));
+}
+
+/*
+** Format and write a message to the log if logging is enabled.
+*/
+SQLITE_API void sqlite3_log(int iErrCode, const char *zFormat, ...){
+  va_list ap;                             /* Vararg list */
+  if( sqlite3GlobalConfig.xLog ){
+    va_start(ap, zFormat);
+    renderLogMsg(iErrCode, zFormat, ap);
+    va_end(ap);
+  }
+}
+
+#if defined(SQLITE_DEBUG)
+/*
+** A version of printf() that understands %lld.  Used for debugging.
+** The printf() built into some versions of windows does not understand %lld
+** and segfaults if you give it a long long int.
+*/
+SQLITE_PRIVATE void sqlite3DebugPrintf(const char *zFormat, ...){
+  va_list ap;
+  StrAccum acc;
+  char zBuf[500];
+  sqlite3StrAccumInit(&acc, zBuf, sizeof(zBuf), 0);
+  acc.useMalloc = 0;
+  va_start(ap,zFormat);
+  sqlite3VXPrintf(&acc, 0, zFormat, ap);
+  va_end(ap);
+  sqlite3StrAccumFinish(&acc);
+  fprintf(stdout,"%s", zBuf);
+  fflush(stdout);
+}
+#endif
+
+#ifdef SQLITE_DEBUG
+/*************************************************************************
+** Routines for implementing the "TreeView" display of hierarchical
+** data structures for debugging.
+**
+** The main entry points (coded elsewhere) are:
+**     sqlite3TreeViewExpr(0, pExpr, 0);
+**     sqlite3TreeViewExprList(0, pList, 0, 0);
+**     sqlite3TreeViewSelect(0, pSelect, 0);
+** Insert calls to those routines while debugging in order to display
+** a diagram of Expr, ExprList, and Select objects.
+**
+*/
+/* Add a new subitem to the tree.  The moreToFollow flag indicates that this
+** is not the last item in the tree. */
+SQLITE_PRIVATE TreeView *sqlite3TreeViewPush(TreeView *p, u8 moreToFollow){
+  if( p==0 ){
+    p = sqlite3_malloc( sizeof(*p) );
+    if( p==0 ) return 0;
+    memset(p, 0, sizeof(*p));
+  }else{
+    p->iLevel++;
+  }
+  assert( moreToFollow==0 || moreToFollow==1 );
+  if( p->iLevel<sizeof(p->bLine) ) p->bLine[p->iLevel] = moreToFollow;
+  return p;
+}
+/* Finished with one layer of the tree */
+SQLITE_PRIVATE void sqlite3TreeViewPop(TreeView *p){
+  if( p==0 ) return;
+  p->iLevel--;
+  if( p->iLevel<0 ) sqlite3_free(p);
+}
+/* Generate a single line of output for the tree, with a prefix that contains
+** all the appropriate tree lines */
+SQLITE_PRIVATE void sqlite3TreeViewLine(TreeView *p, const char *zFormat, ...){
+  va_list ap;
+  int i;
+  StrAccum acc;
+  char zBuf[500];
+  sqlite3StrAccumInit(&acc, zBuf, sizeof(zBuf), 0);
+  acc.useMalloc = 0;
+  if( p ){
+    for(i=0; i<p->iLevel && i<sizeof(p->bLine)-1; i++){
+      sqlite3StrAccumAppend(&acc, p->bLine[i] ? "|   " : "    ", 4);
+    }
+    sqlite3StrAccumAppend(&acc, p->bLine[i] ? "|-- " : "'-- ", 4);
+  }
+  va_start(ap, zFormat);
+  sqlite3VXPrintf(&acc, 0, zFormat, ap);
+  va_end(ap);
+  if( zBuf[acc.nChar-1]!='\n' ) sqlite3StrAccumAppend(&acc, "\n", 1);
+  sqlite3StrAccumFinish(&acc);
+  fprintf(stdout,"%s", zBuf);
+  fflush(stdout);
+}
+/* Shorthand for starting a new tree item that consists of a single label */
+SQLITE_PRIVATE void sqlite3TreeViewItem(TreeView *p, const char *zLabel, u8 moreToFollow){
+  p = sqlite3TreeViewPush(p, moreToFollow);
+  sqlite3TreeViewLine(p, "%s", zLabel);
+}
+#endif /* SQLITE_DEBUG */
+
+/*
+** variable-argument wrapper around sqlite3VXPrintf().
+*/
+SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ...){
+  va_list ap;
+  va_start(ap,zFormat);
+  sqlite3VXPrintf(p, bFlags, zFormat, ap);
+  va_end(ap);
+}
+
+/************** End of printf.c **********************************************/
+/************** Begin file random.c ******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code to implement a pseudo-random number
+** generator (PRNG) for SQLite.
+**
+** Random numbers are used by some of the database backends in order
+** to generate random integer keys for tables or random filenames.
+*/
+
+
+/* All threads share a single random number generator.
+** This structure is the current state of the generator.
+*/
+static SQLITE_WSD struct sqlite3PrngType {
+  unsigned char isInit;          /* True if initialized */
+  unsigned char i, j;            /* State variables */
+  unsigned char s[256];          /* State variables */
+} sqlite3Prng;
+
+/*
+** Return N random bytes.
+*/
+SQLITE_API void sqlite3_randomness(int N, void *pBuf){
+  unsigned char t;
+  unsigned char *zBuf = pBuf;
+
+  /* The "wsdPrng" macro will resolve to the pseudo-random number generator
+  ** state vector.  If writable static data is unsupported on the target,
+  ** we have to locate the state vector at run-time.  In the more common
+  ** case where writable static data is supported, wsdPrng can refer directly
+  ** to the "sqlite3Prng" state vector declared above.
+  */
+#ifdef SQLITE_OMIT_WSD
+  struct sqlite3PrngType *p = &GLOBAL(struct sqlite3PrngType, sqlite3Prng);
+# define wsdPrng p[0]
+#else
+# define wsdPrng sqlite3Prng
+#endif
+
+#if SQLITE_THREADSAFE
+  sqlite3_mutex *mutex;
+#endif
+
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( sqlite3_initialize() ) return;
+#endif
+
+#if SQLITE_THREADSAFE
+  mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_PRNG);
+#endif
+
+  sqlite3_mutex_enter(mutex);
+  if( N<=0 || pBuf==0 ){
+    wsdPrng.isInit = 0;
+    sqlite3_mutex_leave(mutex);
+    return;
+  }
+
+  /* Initialize the state of the random number generator once,
+  ** the first time this routine is called.  The seed value does
+  ** not need to contain a lot of randomness since we are not
+  ** trying to do secure encryption or anything like that...
+  **
+  ** Nothing in this file or anywhere else in SQLite does any kind of
+  ** encryption.  The RC4 algorithm is being used as a PRNG (pseudo-random
+  ** number generator) not as an encryption device.
+  */
+  if( !wsdPrng.isInit ){
+    int i;
+    char k[256];
+    wsdPrng.j = 0;
+    wsdPrng.i = 0;
+    sqlite3OsRandomness(sqlite3_vfs_find(0), 256, k);
+    for(i=0; i<256; i++){
+      wsdPrng.s[i] = (u8)i;
+    }
+    for(i=0; i<256; i++){
+      wsdPrng.j += wsdPrng.s[i] + k[i];
+      t = wsdPrng.s[wsdPrng.j];
+      wsdPrng.s[wsdPrng.j] = wsdPrng.s[i];
+      wsdPrng.s[i] = t;
+    }
+    wsdPrng.isInit = 1;
+  }
+
+  assert( N>0 );
+  do{
+    wsdPrng.i++;
+    t = wsdPrng.s[wsdPrng.i];
+    wsdPrng.j += t;
+    wsdPrng.s[wsdPrng.i] = wsdPrng.s[wsdPrng.j];
+    wsdPrng.s[wsdPrng.j] = t;
+    t += wsdPrng.s[wsdPrng.i];
+    *(zBuf++) = wsdPrng.s[t];
+  }while( --N );
+  sqlite3_mutex_leave(mutex);
+}
+
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+/*
+** For testing purposes, we sometimes want to preserve the state of
+** PRNG and restore the PRNG to its saved state at a later time, or
+** to reset the PRNG to its initial state.  These routines accomplish
+** those tasks.
+**
+** The sqlite3_test_control() interface calls these routines to
+** control the PRNG.
+*/
+static SQLITE_WSD struct sqlite3PrngType sqlite3SavedPrng;
+SQLITE_PRIVATE void sqlite3PrngSaveState(void){
+  memcpy(
+    &GLOBAL(struct sqlite3PrngType, sqlite3SavedPrng),
+    &GLOBAL(struct sqlite3PrngType, sqlite3Prng),
+    sizeof(sqlite3Prng)
+  );
+}
+SQLITE_PRIVATE void sqlite3PrngRestoreState(void){
+  memcpy(
+    &GLOBAL(struct sqlite3PrngType, sqlite3Prng),
+    &GLOBAL(struct sqlite3PrngType, sqlite3SavedPrng),
+    sizeof(sqlite3Prng)
+  );
+}
+#endif /* SQLITE_OMIT_BUILTIN_TEST */
+
+/************** End of random.c **********************************************/
+/************** Begin file threads.c *****************************************/
+/*
+** 2012 July 21
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file presents a simple cross-platform threading interface for
+** use internally by SQLite.
+**
+** A "thread" can be created using sqlite3ThreadCreate().  This thread
+** runs independently of its creator until it is joined using
+** sqlite3ThreadJoin(), at which point it terminates.
+**
+** Threads do not have to be real.  It could be that the work of the
+** "thread" is done by the main thread at either the sqlite3ThreadCreate()
+** or sqlite3ThreadJoin() call.  This is, in fact, what happens in
+** single threaded systems.  Nothing in SQLite requires multiple threads.
+** This interface exists so that applications that want to take advantage
+** of multiple cores can do so, while also allowing applications to stay
+** single-threaded if desired.
+*/
+#if SQLITE_OS_WIN
+#endif
+
+#if SQLITE_MAX_WORKER_THREADS>0
+
+/********************************* Unix Pthreads ****************************/
+#if SQLITE_OS_UNIX && defined(SQLITE_MUTEX_PTHREADS) && SQLITE_THREADSAFE>0
+
+#define SQLITE_THREADS_IMPLEMENTED 1  /* Prevent the single-thread code below */
+/* #include <pthread.h> */
+
+/* A running thread */
+struct SQLiteThread {
+  pthread_t tid;                 /* Thread ID */
+  int done;                      /* Set to true when thread finishes */
+  void *pOut;                    /* Result returned by the thread */
+  void *(*xTask)(void*);         /* The thread routine */
+  void *pIn;                     /* Argument to the thread */
+};
+
+/* Create a new thread */
+SQLITE_PRIVATE int sqlite3ThreadCreate(
+  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
+  void *(*xTask)(void*),    /* Routine to run in a separate thread */
+  void *pIn                 /* Argument passed into xTask() */
+){
+  SQLiteThread *p;
+  int rc;
+
+  assert( ppThread!=0 );
+  assert( xTask!=0 );
+  /* This routine is never used in single-threaded mode */
+  assert( sqlite3GlobalConfig.bCoreMutex!=0 );
+
+  *ppThread = 0;
+  p = sqlite3Malloc(sizeof(*p));
+  if( p==0 ) return SQLITE_NOMEM;
+  memset(p, 0, sizeof(*p));
+  p->xTask = xTask;
+  p->pIn = pIn;
+  if( sqlite3FaultSim(200) ){
+    rc = 1;
+  }else{    
+    rc = pthread_create(&p->tid, 0, xTask, pIn);
+  }
+  if( rc ){
+    p->done = 1;
+    p->pOut = xTask(pIn);
+  }
+  *ppThread = p;
+  return SQLITE_OK;
+}
+
+/* Get the results of the thread */
+SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
+  int rc;
+
+  assert( ppOut!=0 );
+  if( NEVER(p==0) ) return SQLITE_NOMEM;
+  if( p->done ){
+    *ppOut = p->pOut;
+    rc = SQLITE_OK;
+  }else{
+    rc = pthread_join(p->tid, ppOut) ? SQLITE_ERROR : SQLITE_OK;
+  }
+  sqlite3_free(p);
+  return rc;
+}
+
+#endif /* SQLITE_OS_UNIX && defined(SQLITE_MUTEX_PTHREADS) */
+/******************************** End Unix Pthreads *************************/
+
+
+/********************************* Win32 Threads ****************************/
+#if SQLITE_OS_WIN && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_THREADSAFE>0
+
+#define SQLITE_THREADS_IMPLEMENTED 1  /* Prevent the single-thread code below */
+#include <process.h>
+
+/* A running thread */
+struct SQLiteThread {
+  void *tid;               /* The thread handle */
+  unsigned id;             /* The thread identifier */
+  void *(*xTask)(void*);   /* The routine to run as a thread */
+  void *pIn;               /* Argument to xTask */
+  void *pResult;           /* Result of xTask */
+};
+
+/* Thread procedure Win32 compatibility shim */
+static unsigned __stdcall sqlite3ThreadProc(
+  void *pArg  /* IN: Pointer to the SQLiteThread structure */
+){
+  SQLiteThread *p = (SQLiteThread *)pArg;
+
+  assert( p!=0 );
+#if 0
+  /*
+  ** This assert appears to trigger spuriously on certain
+  ** versions of Windows, possibly due to _beginthreadex()
+  ** and/or CreateThread() not fully setting their thread
+  ** ID parameter before starting the thread.
+  */
+  assert( p->id==GetCurrentThreadId() );
+#endif
+  assert( p->xTask!=0 );
+  p->pResult = p->xTask(p->pIn);
+
+  _endthreadex(0);
+  return 0; /* NOT REACHED */
+}
+
+/* Create a new thread */
+SQLITE_PRIVATE int sqlite3ThreadCreate(
+  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
+  void *(*xTask)(void*),    /* Routine to run in a separate thread */
+  void *pIn                 /* Argument passed into xTask() */
+){
+  SQLiteThread *p;
+
+  assert( ppThread!=0 );
+  assert( xTask!=0 );
+  *ppThread = 0;
+  p = sqlite3Malloc(sizeof(*p));
+  if( p==0 ) return SQLITE_NOMEM;
+  if( sqlite3GlobalConfig.bCoreMutex==0 ){
+    memset(p, 0, sizeof(*p));
+  }else{
+    p->xTask = xTask;
+    p->pIn = pIn;
+    p->tid = (void*)_beginthreadex(0, 0, sqlite3ThreadProc, p, 0, &p->id);
+    if( p->tid==0 ){
+      memset(p, 0, sizeof(*p));
+    }
+  }
+  if( p->xTask==0 ){
+    p->id = GetCurrentThreadId();
+    p->pResult = xTask(pIn);
+  }
+  *ppThread = p;
+  return SQLITE_OK;
+}
+
+SQLITE_PRIVATE DWORD sqlite3Win32Wait(HANDLE hObject); /* os_win.c */
+
+/* Get the results of the thread */
+SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
+  DWORD rc;
+  BOOL bRc;
+
+  assert( ppOut!=0 );
+  if( NEVER(p==0) ) return SQLITE_NOMEM;
+  if( p->xTask==0 ){
+    assert( p->id==GetCurrentThreadId() );
+    rc = WAIT_OBJECT_0;
+    assert( p->tid==0 );
+  }else{
+    assert( p->id!=0 && p->id!=GetCurrentThreadId() );
+    rc = sqlite3Win32Wait((HANDLE)p->tid);
+    assert( rc!=WAIT_IO_COMPLETION );
+    bRc = CloseHandle((HANDLE)p->tid);
+    assert( bRc );
+  }
+  if( rc==WAIT_OBJECT_0 ) *ppOut = p->pResult;
+  sqlite3_free(p);
+  return (rc==WAIT_OBJECT_0) ? SQLITE_OK : SQLITE_ERROR;
+}
+
+#endif /* SQLITE_OS_WIN && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT */
+/******************************** End Win32 Threads *************************/
+
+
+/********************************* Single-Threaded **************************/
+#ifndef SQLITE_THREADS_IMPLEMENTED
+/*
+** This implementation does not actually create a new thread.  It does the
+** work of the thread in the main thread, when either the thread is created
+** or when it is joined
+*/
+
+/* A running thread */
+struct SQLiteThread {
+  void *(*xTask)(void*);   /* The routine to run as a thread */
+  void *pIn;               /* Argument to xTask */
+  void *pResult;           /* Result of xTask */
+};
+
+/* Create a new thread */
+SQLITE_PRIVATE int sqlite3ThreadCreate(
+  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
+  void *(*xTask)(void*),    /* Routine to run in a separate thread */
+  void *pIn                 /* Argument passed into xTask() */
+){
+  SQLiteThread *p;
+
+  assert( ppThread!=0 );
+  assert( xTask!=0 );
+  *ppThread = 0;
+  p = sqlite3Malloc(sizeof(*p));
+  if( p==0 ) return SQLITE_NOMEM;
+  if( (SQLITE_PTR_TO_INT(p)/17)&1 ){
+    p->xTask = xTask;
+    p->pIn = pIn;
+  }else{
+    p->xTask = 0;
+    p->pResult = xTask(pIn);
+  }
+  *ppThread = p;
+  return SQLITE_OK;
+}
+
+/* Get the results of the thread */
+SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
+
+  assert( ppOut!=0 );
+  if( NEVER(p==0) ) return SQLITE_NOMEM;
+  if( p->xTask ){
+    *ppOut = p->xTask(p->pIn);
+  }else{
+    *ppOut = p->pResult;
+  }
+  sqlite3_free(p);
+
+#if defined(SQLITE_TEST)
+  {
+    void *pTstAlloc = sqlite3Malloc(10);
+    if (!pTstAlloc) return SQLITE_NOMEM;
+    sqlite3_free(pTstAlloc);
+  }
+#endif
+
+  return SQLITE_OK;
+}
+
+#endif /* !defined(SQLITE_THREADS_IMPLEMENTED) */
+/****************************** End Single-Threaded *************************/
+#endif /* SQLITE_MAX_WORKER_THREADS>0 */
+
+/************** End of threads.c *********************************************/
+/************** Begin file utf.c *********************************************/
+/*
+** 2004 April 13
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains routines used to translate between UTF-8, 
+** UTF-16, UTF-16BE, and UTF-16LE.
+**
+** Notes on UTF-8:
+**
+**   Byte-0    Byte-1    Byte-2    Byte-3    Value
+**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
+**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
+**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
+**  11110uuu  10uuzzzz  10yyyyyy  10xxxxxx   000uuuuu zzzzyyyy yyxxxxxx
+**
+**
+** Notes on UTF-16:  (with wwww+1==uuuuu)
+**
+**      Word-0               Word-1          Value
+**  110110ww wwzzzzyy   110111yy yyxxxxxx    000uuuuu zzzzyyyy yyxxxxxx
+**  zzzzyyyy yyxxxxxx                        00000000 zzzzyyyy yyxxxxxx
+**
+**
+** BOM or Byte Order Mark:
+**     0xff 0xfe   little-endian utf-16 follows
+**     0xfe 0xff   big-endian utf-16 follows
+**
+*/
+/* #include <assert.h> */
+
+#ifndef SQLITE_AMALGAMATION
+/*
+** The following constant value is used by the SQLITE_BIGENDIAN and
+** SQLITE_LITTLEENDIAN macros.
+*/
+SQLITE_PRIVATE const int sqlite3one = 1;
+#endif /* SQLITE_AMALGAMATION */
+
+/*
+** This lookup table is used to help decode the first byte of
+** a multi-byte UTF8 character.
+*/
+static const unsigned char sqlite3Utf8Trans1[] = {
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+
+#define WRITE_UTF8(zOut, c) {                          \
+  if( c<0x00080 ){                                     \
+    *zOut++ = (u8)(c&0xFF);                            \
+  }                                                    \
+  else if( c<0x00800 ){                                \
+    *zOut++ = 0xC0 + (u8)((c>>6)&0x1F);                \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }                                                    \
+  else if( c<0x10000 ){                                \
+    *zOut++ = 0xE0 + (u8)((c>>12)&0x0F);               \
+    *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);              \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }else{                                               \
+    *zOut++ = 0xF0 + (u8)((c>>18) & 0x07);             \
+    *zOut++ = 0x80 + (u8)((c>>12) & 0x3F);             \
+    *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);              \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }                                                    \
+}
+
+#define WRITE_UTF16LE(zOut, c) {                                    \
+  if( c<=0xFFFF ){                                                  \
+    *zOut++ = (u8)(c&0x00FF);                                       \
+    *zOut++ = (u8)((c>>8)&0x00FF);                                  \
+  }else{                                                            \
+    *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
+    *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03));              \
+    *zOut++ = (u8)(c&0x00FF);                                       \
+    *zOut++ = (u8)(0x00DC + ((c>>8)&0x03));                         \
+  }                                                                 \
+}
+
+#define WRITE_UTF16BE(zOut, c) {                                    \
+  if( c<=0xFFFF ){                                                  \
+    *zOut++ = (u8)((c>>8)&0x00FF);                                  \
+    *zOut++ = (u8)(c&0x00FF);                                       \
+  }else{                                                            \
+    *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03));              \
+    *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
+    *zOut++ = (u8)(0x00DC + ((c>>8)&0x03));                         \
+    *zOut++ = (u8)(c&0x00FF);                                       \
+  }                                                                 \
+}
+
+#define READ_UTF16LE(zIn, TERM, c){                                   \
+  c = (*zIn++);                                                       \
+  c += ((*zIn++)<<8);                                                 \
+  if( c>=0xD800 && c<0xE000 && TERM ){                                \
+    int c2 = (*zIn++);                                                \
+    c2 += ((*zIn++)<<8);                                              \
+    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
+  }                                                                   \
+}
+
+#define READ_UTF16BE(zIn, TERM, c){                                   \
+  c = ((*zIn++)<<8);                                                  \
+  c += (*zIn++);                                                      \
+  if( c>=0xD800 && c<0xE000 && TERM ){                                \
+    int c2 = ((*zIn++)<<8);                                           \
+    c2 += (*zIn++);                                                   \
+    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
+  }                                                                   \
+}
+
+/*
+** Translate a single UTF-8 character.  Return the unicode value.
+**
+** During translation, assume that the byte that zTerm points
+** is a 0x00.
+**
+** Write a pointer to the next unread byte back into *pzNext.
+**
+** Notes On Invalid UTF-8:
+**
+**  *  This routine never allows a 7-bit character (0x00 through 0x7f) to
+**     be encoded as a multi-byte character.  Any multi-byte character that
+**     attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
+**
+**  *  This routine never allows a UTF16 surrogate value to be encoded.
+**     If a multi-byte character attempts to encode a value between
+**     0xd800 and 0xe000 then it is rendered as 0xfffd.
+**
+**  *  Bytes in the range of 0x80 through 0xbf which occur as the first
+**     byte of a character are interpreted as single-byte characters
+**     and rendered as themselves even though they are technically
+**     invalid characters.
+**
+**  *  This routine accepts over-length UTF8 encodings
+**     for unicode values 0x80 and greater.  It does not change over-length
+**     encodings to 0xfffd as some systems recommend.
+*/
+#define READ_UTF8(zIn, zTerm, c)                           \
+  c = *(zIn++);                                            \
+  if( c>=0xc0 ){                                           \
+    c = sqlite3Utf8Trans1[c-0xc0];                         \
+    while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
+      c = (c<<6) + (0x3f & *(zIn++));                      \
+    }                                                      \
+    if( c<0x80                                             \
+        || (c&0xFFFFF800)==0xD800                          \
+        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
+  }
+SQLITE_PRIVATE u32 sqlite3Utf8Read(
+  const unsigned char **pz    /* Pointer to string from which to read char */
+){
+  unsigned int c;
+
+  /* Same as READ_UTF8() above but without the zTerm parameter.
+  ** For this routine, we assume the UTF8 string is always zero-terminated.
+  */
+  c = *((*pz)++);
+  if( c>=0xc0 ){
+    c = sqlite3Utf8Trans1[c-0xc0];
+    while( (*(*pz) & 0xc0)==0x80 ){
+      c = (c<<6) + (0x3f & *((*pz)++));
+    }
+    if( c<0x80
+        || (c&0xFFFFF800)==0xD800
+        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }
+  }
+  return c;
+}
+
+
+
+
+/*
+** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
+** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
+*/ 
+/* #define TRANSLATE_TRACE 1 */
+
+#ifndef SQLITE_OMIT_UTF16
+/*
+** This routine transforms the internal text encoding used by pMem to
+** desiredEnc. It is an error if the string is already of the desired
+** encoding, or if *pMem does not contain a string value.
+*/
+SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
+  int len;                    /* Maximum length of output string in bytes */
+  unsigned char *zOut;                  /* Output buffer */
+  unsigned char *zIn;                   /* Input iterator */
+  unsigned char *zTerm;                 /* End of input */
+  unsigned char *z;                     /* Output iterator */
+  unsigned int c;
+
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( pMem->flags&MEM_Str );
+  assert( pMem->enc!=desiredEnc );
+  assert( pMem->enc!=0 );
+  assert( pMem->n>=0 );
+
+#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
+  {
+    char zBuf[100];
+    sqlite3VdbeMemPrettyPrint(pMem, zBuf);
+    fprintf(stderr, "INPUT:  %s\n", zBuf);
+  }
+#endif
+
+  /* If the translation is between UTF-16 little and big endian, then 
+  ** all that is required is to swap the byte order. This case is handled
+  ** differently from the others.
+  */
+  if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
+    u8 temp;
+    int rc;
+    rc = sqlite3VdbeMemMakeWriteable(pMem);
+    if( rc!=SQLITE_OK ){
+      assert( rc==SQLITE_NOMEM );
+      return SQLITE_NOMEM;
+    }
+    zIn = (u8*)pMem->z;
+    zTerm = &zIn[pMem->n&~1];
+    while( zIn<zTerm ){
+      temp = *zIn;
+      *zIn = *(zIn+1);
+      zIn++;
+      *zIn++ = temp;
+    }
+    pMem->enc = desiredEnc;
+    goto translate_out;
+  }
+
+  /* Set len to the maximum number of bytes required in the output buffer. */
+  if( desiredEnc==SQLITE_UTF8 ){
+    /* When converting from UTF-16, the maximum growth results from
+    ** translating a 2-byte character to a 4-byte UTF-8 character.
+    ** A single byte is required for the output string
+    ** nul-terminator.
+    */
+    pMem->n &= ~1;
+    len = pMem->n * 2 + 1;
+  }else{
+    /* When converting from UTF-8 to UTF-16 the maximum growth is caused
+    ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16
+    ** character. Two bytes are required in the output buffer for the
+    ** nul-terminator.
+    */
+    len = pMem->n * 2 + 2;
+  }
+
+  /* Set zIn to point at the start of the input buffer and zTerm to point 1
+  ** byte past the end.
+  **
+  ** Variable zOut is set to point at the output buffer, space obtained
+  ** from sqlite3_malloc().
+  */
+  zIn = (u8*)pMem->z;
+  zTerm = &zIn[pMem->n];
+  zOut = sqlite3DbMallocRaw(pMem->db, len);
+  if( !zOut ){
+    return SQLITE_NOMEM;
+  }
+  z = zOut;
+
+  if( pMem->enc==SQLITE_UTF8 ){
+    if( desiredEnc==SQLITE_UTF16LE ){
+      /* UTF-8 -> UTF-16 Little-endian */
+      while( zIn<zTerm ){
+        READ_UTF8(zIn, zTerm, c);
+        WRITE_UTF16LE(z, c);
+      }
+    }else{
+      assert( desiredEnc==SQLITE_UTF16BE );
+      /* UTF-8 -> UTF-16 Big-endian */
+      while( zIn<zTerm ){
+        READ_UTF8(zIn, zTerm, c);
+        WRITE_UTF16BE(z, c);
+      }
+    }
+    pMem->n = (int)(z - zOut);
+    *z++ = 0;
+  }else{
+    assert( desiredEnc==SQLITE_UTF8 );
+    if( pMem->enc==SQLITE_UTF16LE ){
+      /* UTF-16 Little-endian -> UTF-8 */
+      while( zIn<zTerm ){
+        READ_UTF16LE(zIn, zIn<zTerm, c); 
+        WRITE_UTF8(z, c);
+      }
+    }else{
+      /* UTF-16 Big-endian -> UTF-8 */
+      while( zIn<zTerm ){
+        READ_UTF16BE(zIn, zIn<zTerm, c); 
+        WRITE_UTF8(z, c);
+      }
+    }
+    pMem->n = (int)(z - zOut);
+  }
+  *z = 0;
+  assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );
+
+  c = pMem->flags;
+  sqlite3VdbeMemRelease(pMem);
+  pMem->flags = MEM_Str|MEM_Term|(c&MEM_AffMask);
+  pMem->enc = desiredEnc;
+  pMem->z = (char*)zOut;
+  pMem->zMalloc = pMem->z;
+  pMem->szMalloc = sqlite3DbMallocSize(pMem->db, pMem->z);
+
+translate_out:
+#if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
+  {
+    char zBuf[100];
+    sqlite3VdbeMemPrettyPrint(pMem, zBuf);
+    fprintf(stderr, "OUTPUT: %s\n", zBuf);
+  }
+#endif
+  return SQLITE_OK;
+}
+
+/*
+** This routine checks for a byte-order mark at the beginning of the 
+** UTF-16 string stored in *pMem. If one is present, it is removed and
+** the encoding of the Mem adjusted. This routine does not do any
+** byte-swapping, it just sets Mem.enc appropriately.
+**
+** The allocation (static, dynamic etc.) and encoding of the Mem may be
+** changed by this function.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemHandleBom(Mem *pMem){
+  int rc = SQLITE_OK;
+  u8 bom = 0;
+
+  assert( pMem->n>=0 );
+  if( pMem->n>1 ){
+    u8 b1 = *(u8 *)pMem->z;
+    u8 b2 = *(((u8 *)pMem->z) + 1);
+    if( b1==0xFE && b2==0xFF ){
+      bom = SQLITE_UTF16BE;
+    }
+    if( b1==0xFF && b2==0xFE ){
+      bom = SQLITE_UTF16LE;
+    }
+  }
+  
+  if( bom ){
+    rc = sqlite3VdbeMemMakeWriteable(pMem);
+    if( rc==SQLITE_OK ){
+      pMem->n -= 2;
+      memmove(pMem->z, &pMem->z[2], pMem->n);
+      pMem->z[pMem->n] = '\0';
+      pMem->z[pMem->n+1] = '\0';
+      pMem->flags |= MEM_Term;
+      pMem->enc = bom;
+    }
+  }
+  return rc;
+}
+#endif /* SQLITE_OMIT_UTF16 */
+
+/*
+** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
+** return the number of unicode characters in pZ up to (but not including)
+** the first 0x00 byte. If nByte is not less than zero, return the
+** number of unicode characters in the first nByte of pZ (or up to 
+** the first 0x00, whichever comes first).
+*/
+SQLITE_PRIVATE int sqlite3Utf8CharLen(const char *zIn, int nByte){
+  int r = 0;
+  const u8 *z = (const u8*)zIn;
+  const u8 *zTerm;
+  if( nByte>=0 ){
+    zTerm = &z[nByte];
+  }else{
+    zTerm = (const u8*)(-1);
+  }
+  assert( z<=zTerm );
+  while( *z!=0 && z<zTerm ){
+    SQLITE_SKIP_UTF8(z);
+    r++;
+  }
+  return r;
+}
+
+/* This test function is not currently used by the automated test-suite. 
+** Hence it is only available in debug builds.
+*/
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+/*
+** Translate UTF-8 to UTF-8.
+**
+** This has the effect of making sure that the string is well-formed
+** UTF-8.  Miscoded characters are removed.
+**
+** The translation is done in-place and aborted if the output
+** overruns the input.
+*/
+SQLITE_PRIVATE int sqlite3Utf8To8(unsigned char *zIn){
+  unsigned char *zOut = zIn;
+  unsigned char *zStart = zIn;
+  u32 c;
+
+  while( zIn[0] && zOut<=zIn ){
+    c = sqlite3Utf8Read((const u8**)&zIn);
+    if( c!=0xfffd ){
+      WRITE_UTF8(zOut, c);
+    }
+  }
+  *zOut = 0;
+  return (int)(zOut - zStart);
+}
+#endif
+
+#ifndef SQLITE_OMIT_UTF16
+/*
+** Convert a UTF-16 string in the native encoding into a UTF-8 string.
+** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must
+** be freed by the calling function.
+**
+** NULL is returned if there is an allocation error.
+*/
+SQLITE_PRIVATE char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte, u8 enc){
+  Mem m;
+  memset(&m, 0, sizeof(m));
+  m.db = db;
+  sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC);
+  sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);
+  if( db->mallocFailed ){
+    sqlite3VdbeMemRelease(&m);
+    m.z = 0;
+  }
+  assert( (m.flags & MEM_Term)!=0 || db->mallocFailed );
+  assert( (m.flags & MEM_Str)!=0 || db->mallocFailed );
+  assert( m.z || db->mallocFailed );
+  return m.z;
+}
+
+/*
+** zIn is a UTF-16 encoded unicode string at least nChar characters long.
+** Return the number of bytes in the first nChar unicode characters
+** in pZ.  nChar must be non-negative.
+*/
+SQLITE_PRIVATE int sqlite3Utf16ByteLen(const void *zIn, int nChar){
+  int c;
+  unsigned char const *z = zIn;
+  int n = 0;
+  
+  if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
+    while( n<nChar ){
+      READ_UTF16BE(z, 1, c);
+      n++;
+    }
+  }else{
+    while( n<nChar ){
+      READ_UTF16LE(z, 1, c);
+      n++;
+    }
+  }
+  return (int)(z-(unsigned char const *)zIn);
+}
+
+#if defined(SQLITE_TEST)
+/*
+** This routine is called from the TCL test function "translate_selftest".
+** It checks that the primitives for serializing and deserializing
+** characters in each encoding are inverses of each other.
+*/
+SQLITE_PRIVATE void sqlite3UtfSelfTest(void){
+  unsigned int i, t;
+  unsigned char zBuf[20];
+  unsigned char *z;
+  int n;
+  unsigned int c;
+
+  for(i=0; i<0x00110000; i++){
+    z = zBuf;
+    WRITE_UTF8(z, i);
+    n = (int)(z-zBuf);
+    assert( n>0 && n<=4 );
+    z[0] = 0;
+    z = zBuf;
+    c = sqlite3Utf8Read((const u8**)&z);
+    t = i;
+    if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
+    if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
+    assert( c==t );
+    assert( (z-zBuf)==n );
+  }
+  for(i=0; i<0x00110000; i++){
+    if( i>=0xD800 && i<0xE000 ) continue;
+    z = zBuf;
+    WRITE_UTF16LE(z, i);
+    n = (int)(z-zBuf);
+    assert( n>0 && n<=4 );
+    z[0] = 0;
+    z = zBuf;
+    READ_UTF16LE(z, 1, c);
+    assert( c==i );
+    assert( (z-zBuf)==n );
+  }
+  for(i=0; i<0x00110000; i++){
+    if( i>=0xD800 && i<0xE000 ) continue;
+    z = zBuf;
+    WRITE_UTF16BE(z, i);
+    n = (int)(z-zBuf);
+    assert( n>0 && n<=4 );
+    z[0] = 0;
+    z = zBuf;
+    READ_UTF16BE(z, 1, c);
+    assert( c==i );
+    assert( (z-zBuf)==n );
+  }
+}
+#endif /* SQLITE_TEST */
+#endif /* SQLITE_OMIT_UTF16 */
+
+/************** End of utf.c *************************************************/
+/************** Begin file util.c ********************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Utility functions used throughout sqlite.
+**
+** This file contains functions for allocating memory, comparing
+** strings, and stuff like that.
+**
+*/
+/* #include <stdarg.h> */
+#if HAVE_ISNAN || SQLITE_HAVE_ISNAN
+# include <math.h>
+#endif
+
+/*
+** Routine needed to support the testcase() macro.
+*/
+#ifdef SQLITE_COVERAGE_TEST
+SQLITE_PRIVATE void sqlite3Coverage(int x){
+  static unsigned dummy = 0;
+  dummy += (unsigned)x;
+}
+#endif
+
+/*
+** Give a callback to the test harness that can be used to simulate faults
+** in places where it is difficult or expensive to do so purely by means
+** of inputs.
+**
+** The intent of the integer argument is to let the fault simulator know
+** which of multiple sqlite3FaultSim() calls has been hit.
+**
+** Return whatever integer value the test callback returns, or return
+** SQLITE_OK if no test callback is installed.
+*/
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+SQLITE_PRIVATE int sqlite3FaultSim(int iTest){
+  int (*xCallback)(int) = sqlite3GlobalConfig.xTestCallback;
+  return xCallback ? xCallback(iTest) : SQLITE_OK;
+}
+#endif
+
+#ifndef SQLITE_OMIT_FLOATING_POINT
+/*
+** Return true if the floating point value is Not a Number (NaN).
+**
+** Use the math library isnan() function if compiled with SQLITE_HAVE_ISNAN.
+** Otherwise, we have our own implementation that works on most systems.
+*/
+SQLITE_PRIVATE int sqlite3IsNaN(double x){
+  int rc;   /* The value return */
+#if !SQLITE_HAVE_ISNAN && !HAVE_ISNAN
+  /*
+  ** Systems that support the isnan() library function should probably
+  ** make use of it by compiling with -DSQLITE_HAVE_ISNAN.  But we have
+  ** found that many systems do not have a working isnan() function so
+  ** this implementation is provided as an alternative.
+  **
+  ** This NaN test sometimes fails if compiled on GCC with -ffast-math.
+  ** On the other hand, the use of -ffast-math comes with the following
+  ** warning:
+  **
+  **      This option [-ffast-math] should never be turned on by any
+  **      -O option since it can result in incorrect output for programs
+  **      which depend on an exact implementation of IEEE or ISO 
+  **      rules/specifications for math functions.
+  **
+  ** Under MSVC, this NaN test may fail if compiled with a floating-
+  ** point precision mode other than /fp:precise.  From the MSDN 
+  ** documentation:
+  **
+  **      The compiler [with /fp:precise] will properly handle comparisons 
+  **      involving NaN. For example, x != x evaluates to true if x is NaN 
+  **      ...
+  */
+#ifdef __FAST_MATH__
+# error SQLite will not work correctly with the -ffast-math option of GCC.
+#endif
+  volatile double y = x;
+  volatile double z = y;
+  rc = (y!=z);
+#else  /* if HAVE_ISNAN */
+  rc = isnan(x);
+#endif /* HAVE_ISNAN */
+  testcase( rc );
+  return rc;
+}
+#endif /* SQLITE_OMIT_FLOATING_POINT */
+
+/*
+** Compute a string length that is limited to what can be stored in
+** lower 30 bits of a 32-bit signed integer.
+**
+** The value returned will never be negative.  Nor will it ever be greater
+** than the actual length of the string.  For very long strings (greater
+** than 1GiB) the value returned might be less than the true string length.
+*/
+SQLITE_PRIVATE int sqlite3Strlen30(const char *z){
+  const char *z2 = z;
+  if( z==0 ) return 0;
+  while( *z2 ){ z2++; }
+  return 0x3fffffff & (int)(z2 - z);
+}
+
+/*
+** Set the current error code to err_code and clear any prior error message.
+*/
+SQLITE_PRIVATE void sqlite3Error(sqlite3 *db, int err_code){
+  assert( db!=0 );
+  db->errCode = err_code;
+  if( db->pErr ) sqlite3ValueSetNull(db->pErr);
+}
+
+/*
+** Set the most recent error code and error string for the sqlite
+** handle "db". The error code is set to "err_code".
+**
+** If it is not NULL, string zFormat specifies the format of the
+** error string in the style of the printf functions: The following
+** format characters are allowed:
+**
+**      %s      Insert a string
+**      %z      A string that should be freed after use
+**      %d      Insert an integer
+**      %T      Insert a token
+**      %S      Insert the first element of a SrcList
+**
+** zFormat and any string tokens that follow it are assumed to be
+** encoded in UTF-8.
+**
+** To clear the most recent error for sqlite handle "db", sqlite3Error
+** should be called with err_code set to SQLITE_OK and zFormat set
+** to NULL.
+*/
+SQLITE_PRIVATE void sqlite3ErrorWithMsg(sqlite3 *db, int err_code, const char *zFormat, ...){
+  assert( db!=0 );
+  db->errCode = err_code;
+  if( zFormat==0 ){
+    sqlite3Error(db, err_code);
+  }else if( db->pErr || (db->pErr = sqlite3ValueNew(db))!=0 ){
+    char *z;
+    va_list ap;
+    va_start(ap, zFormat);
+    z = sqlite3VMPrintf(db, zFormat, ap);
+    va_end(ap);
+    sqlite3ValueSetStr(db->pErr, -1, z, SQLITE_UTF8, SQLITE_DYNAMIC);
+  }
+}
+
+/*
+** Add an error message to pParse->zErrMsg and increment pParse->nErr.
+** The following formatting characters are allowed:
+**
+**      %s      Insert a string
+**      %z      A string that should be freed after use
+**      %d      Insert an integer
+**      %T      Insert a token
+**      %S      Insert the first element of a SrcList
+**
+** This function should be used to report any error that occurs while
+** compiling an SQL statement (i.e. within sqlite3_prepare()). The
+** last thing the sqlite3_prepare() function does is copy the error
+** stored by this function into the database handle using sqlite3Error().
+** Functions sqlite3Error() or sqlite3ErrorWithMsg() should be used
+** during statement execution (sqlite3_step() etc.).
+*/
+SQLITE_PRIVATE void sqlite3ErrorMsg(Parse *pParse, const char *zFormat, ...){
+  char *zMsg;
+  va_list ap;
+  sqlite3 *db = pParse->db;
+  va_start(ap, zFormat);
+  zMsg = sqlite3VMPrintf(db, zFormat, ap);
+  va_end(ap);
+  if( db->suppressErr ){
+    sqlite3DbFree(db, zMsg);
+  }else{
+    pParse->nErr++;
+    sqlite3DbFree(db, pParse->zErrMsg);
+    pParse->zErrMsg = zMsg;
+    pParse->rc = SQLITE_ERROR;
+  }
+}
+
+/*
+** Convert an SQL-style quoted string into a normal string by removing
+** the quote characters.  The conversion is done in-place.  If the
+** input does not begin with a quote character, then this routine
+** is a no-op.
+**
+** The input string must be zero-terminated.  A new zero-terminator
+** is added to the dequoted string.
+**
+** The return value is -1 if no dequoting occurs or the length of the
+** dequoted string, exclusive of the zero terminator, if dequoting does
+** occur.
+**
+** 2002-Feb-14: This routine is extended to remove MS-Access style
+** brackets from around identifiers.  For example:  "[a-b-c]" becomes
+** "a-b-c".
+*/
+SQLITE_PRIVATE int sqlite3Dequote(char *z){
+  char quote;
+  int i, j;
+  if( z==0 ) return -1;
+  quote = z[0];
+  switch( quote ){
+    case '\'':  break;
+    case '"':   break;
+    case '`':   break;                /* For MySQL compatibility */
+    case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
+    default:    return -1;
+  }
+  for(i=1, j=0;; i++){
+    assert( z[i] );
+    if( z[i]==quote ){
+      if( z[i+1]==quote ){
+        z[j++] = quote;
+        i++;
+      }else{
+        break;
+      }
+    }else{
+      z[j++] = z[i];
+    }
+  }
+  z[j] = 0;
+  return j;
+}
+
+/* Convenient short-hand */
+#define UpperToLower sqlite3UpperToLower
+
+/*
+** Some systems have stricmp().  Others have strcasecmp().  Because
+** there is no consistency, we will define our own.
+**
+** IMPLEMENTATION-OF: R-30243-02494 The sqlite3_stricmp() and
+** sqlite3_strnicmp() APIs allow applications and extensions to compare
+** the contents of two buffers containing UTF-8 strings in a
+** case-independent fashion, using the same definition of "case
+** independence" that SQLite uses internally when comparing identifiers.
+*/
+SQLITE_API int sqlite3_stricmp(const char *zLeft, const char *zRight){
+  register unsigned char *a, *b;
+  if( zLeft==0 ){
+    return zRight ? -1 : 0;
+  }else if( zRight==0 ){
+    return 1;
+  }
+  a = (unsigned char *)zLeft;
+  b = (unsigned char *)zRight;
+  while( *a!=0 && UpperToLower[*a]==UpperToLower[*b]){ a++; b++; }
+  return UpperToLower[*a] - UpperToLower[*b];
+}
+SQLITE_API int sqlite3_strnicmp(const char *zLeft, const char *zRight, int N){
+  register unsigned char *a, *b;
+  if( zLeft==0 ){
+    return zRight ? -1 : 0;
+  }else if( zRight==0 ){
+    return 1;
+  }
+  a = (unsigned char *)zLeft;
+  b = (unsigned char *)zRight;
+  while( N-- > 0 && *a!=0 && UpperToLower[*a]==UpperToLower[*b]){ a++; b++; }
+  return N<0 ? 0 : UpperToLower[*a] - UpperToLower[*b];
+}
+
+/*
+** The string z[] is an text representation of a real number.
+** Convert this string to a double and write it into *pResult.
+**
+** The string z[] is length bytes in length (bytes, not characters) and
+** uses the encoding enc.  The string is not necessarily zero-terminated.
+**
+** Return TRUE if the result is a valid real number (or integer) and FALSE
+** if the string is empty or contains extraneous text.  Valid numbers
+** are in one of these formats:
+**
+**    [+-]digits[E[+-]digits]
+**    [+-]digits.[digits][E[+-]digits]
+**    [+-].digits[E[+-]digits]
+**
+** Leading and trailing whitespace is ignored for the purpose of determining
+** validity.
+**
+** If some prefix of the input string is a valid number, this routine
+** returns FALSE but it still converts the prefix and writes the result
+** into *pResult.
+*/
+SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 enc){
+#ifndef SQLITE_OMIT_FLOATING_POINT
+  int incr;
+  const char *zEnd = z + length;
+  /* sign * significand * (10 ^ (esign * exponent)) */
+  int sign = 1;    /* sign of significand */
+  i64 s = 0;       /* significand */
+  int d = 0;       /* adjust exponent for shifting decimal point */
+  int esign = 1;   /* sign of exponent */
+  int e = 0;       /* exponent */
+  int eValid = 1;  /* True exponent is either not used or is well-formed */
+  double result;
+  int nDigits = 0;
+  int nonNum = 0;
+
+  assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE );
+  *pResult = 0.0;   /* Default return value, in case of an error */
+
+  if( enc==SQLITE_UTF8 ){
+    incr = 1;
+  }else{
+    int i;
+    incr = 2;
+    assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 );
+    for(i=3-enc; i<length && z[i]==0; i+=2){}
+    nonNum = i<length;
+    zEnd = z+i+enc-3;
+    z += (enc&1);
+  }
+
+  /* skip leading spaces */
+  while( z<zEnd && sqlite3Isspace(*z) ) z+=incr;
+  if( z>=zEnd ) return 0;
+
+  /* get sign of significand */
+  if( *z=='-' ){
+    sign = -1;
+    z+=incr;
+  }else if( *z=='+' ){
+    z+=incr;
+  }
+
+  /* skip leading zeroes */
+  while( z<zEnd && z[0]=='0' ) z+=incr, nDigits++;
+
+  /* copy max significant digits to significand */
+  while( z<zEnd && sqlite3Isdigit(*z) && s<((LARGEST_INT64-9)/10) ){
+    s = s*10 + (*z - '0');
+    z+=incr, nDigits++;
+  }
+
+  /* skip non-significant significand digits
+  ** (increase exponent by d to shift decimal left) */
+  while( z<zEnd && sqlite3Isdigit(*z) ) z+=incr, nDigits++, d++;
+  if( z>=zEnd ) goto do_atof_calc;
+
+  /* if decimal point is present */
+  if( *z=='.' ){
+    z+=incr;
+    /* copy digits from after decimal to significand
+    ** (decrease exponent by d to shift decimal right) */
+    while( z<zEnd && sqlite3Isdigit(*z) && s<((LARGEST_INT64-9)/10) ){
+      s = s*10 + (*z - '0');
+      z+=incr, nDigits++, d--;
+    }
+    /* skip non-significant digits */
+    while( z<zEnd && sqlite3Isdigit(*z) ) z+=incr, nDigits++;
+  }
+  if( z>=zEnd ) goto do_atof_calc;
+
+  /* if exponent is present */
+  if( *z=='e' || *z=='E' ){
+    z+=incr;
+    eValid = 0;
+    if( z>=zEnd ) goto do_atof_calc;
+    /* get sign of exponent */
+    if( *z=='-' ){
+      esign = -1;
+      z+=incr;
+    }else if( *z=='+' ){
+      z+=incr;
+    }
+    /* copy digits to exponent */
+    while( z<zEnd && sqlite3Isdigit(*z) ){
+      e = e<10000 ? (e*10 + (*z - '0')) : 10000;
+      z+=incr;
+      eValid = 1;
+    }
+  }
+
+  /* skip trailing spaces */
+  if( nDigits && eValid ){
+    while( z<zEnd && sqlite3Isspace(*z) ) z+=incr;
+  }
+
+do_atof_calc:
+  /* adjust exponent by d, and update sign */
+  e = (e*esign) + d;
+  if( e<0 ) {
+    esign = -1;
+    e *= -1;
+  } else {
+    esign = 1;
+  }
+
+  /* if 0 significand */
+  if( !s ) {
+    /* In the IEEE 754 standard, zero is signed.
+    ** Add the sign if we've seen at least one digit */
+    result = (sign<0 && nDigits) ? -(double)0 : (double)0;
+  } else {
+    /* attempt to reduce exponent */
+    if( esign>0 ){
+      while( s<(LARGEST_INT64/10) && e>0 ) e--,s*=10;
+    }else{
+      while( !(s%10) && e>0 ) e--,s/=10;
+    }
+
+    /* adjust the sign of significand */
+    s = sign<0 ? -s : s;
+
+    /* if exponent, scale significand as appropriate
+    ** and store in result. */
+    if( e ){
+      LONGDOUBLE_TYPE scale = 1.0;
+      /* attempt to handle extremely small/large numbers better */
+      if( e>307 && e<342 ){
+        while( e%308 ) { scale *= 1.0e+1; e -= 1; }
+        if( esign<0 ){
+          result = s / scale;
+          result /= 1.0e+308;
+        }else{
+          result = s * scale;
+          result *= 1.0e+308;
+        }
+      }else if( e>=342 ){
+        if( esign<0 ){
+          result = 0.0*s;
+        }else{
+          result = 1e308*1e308*s;  /* Infinity */
+        }
+      }else{
+        /* 1.0e+22 is the largest power of 10 than can be 
+        ** represented exactly. */
+        while( e%22 ) { scale *= 1.0e+1; e -= 1; }
+        while( e>0 ) { scale *= 1.0e+22; e -= 22; }
+        if( esign<0 ){
+          result = s / scale;
+        }else{
+          result = s * scale;
+        }
+      }
+    } else {
+      result = (double)s;
+    }
+  }
+
+  /* store the result */
+  *pResult = result;
+
+  /* return true if number and no extra non-whitespace chracters after */
+  return z>=zEnd && nDigits>0 && eValid && nonNum==0;
+#else
+  return !sqlite3Atoi64(z, pResult, length, enc);
+#endif /* SQLITE_OMIT_FLOATING_POINT */
+}
+
+/*
+** Compare the 19-character string zNum against the text representation
+** value 2^63:  9223372036854775808.  Return negative, zero, or positive
+** if zNum is less than, equal to, or greater than the string.
+** Note that zNum must contain exactly 19 characters.
+**
+** Unlike memcmp() this routine is guaranteed to return the difference
+** in the values of the last digit if the only difference is in the
+** last digit.  So, for example,
+**
+**      compare2pow63("9223372036854775800", 1)
+**
+** will return -8.
+*/
+static int compare2pow63(const char *zNum, int incr){
+  int c = 0;
+  int i;
+                    /* 012345678901234567 */
+  const char *pow63 = "922337203685477580";
+  for(i=0; c==0 && i<18; i++){
+    c = (zNum[i*incr]-pow63[i])*10;
+  }
+  if( c==0 ){
+    c = zNum[18*incr] - '8';
+    testcase( c==(-1) );
+    testcase( c==0 );
+    testcase( c==(+1) );
+  }
+  return c;
+}
+
+/*
+** Convert zNum to a 64-bit signed integer.  zNum must be decimal. This
+** routine does *not* accept hexadecimal notation.
+**
+** If the zNum value is representable as a 64-bit twos-complement 
+** integer, then write that value into *pNum and return 0.
+**
+** If zNum is exactly 9223372036854775808, return 2.  This special
+** case is broken out because while 9223372036854775808 cannot be a 
+** signed 64-bit integer, its negative -9223372036854775808 can be.
+**
+** If zNum is too big for a 64-bit integer and is not
+** 9223372036854775808  or if zNum contains any non-numeric text,
+** then return 1.
+**
+** length is the number of bytes in the string (bytes, not characters).
+** The string is not necessarily zero-terminated.  The encoding is
+** given by enc.
+*/
+SQLITE_PRIVATE int sqlite3Atoi64(const char *zNum, i64 *pNum, int length, u8 enc){
+  int incr;
+  u64 u = 0;
+  int neg = 0; /* assume positive */
+  int i;
+  int c = 0;
+  int nonNum = 0;
+  const char *zStart;
+  const char *zEnd = zNum + length;
+  assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE );
+  if( enc==SQLITE_UTF8 ){
+    incr = 1;
+  }else{
+    incr = 2;
+    assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 );
+    for(i=3-enc; i<length && zNum[i]==0; i+=2){}
+    nonNum = i<length;
+    zEnd = zNum+i+enc-3;
+    zNum += (enc&1);
+  }
+  while( zNum<zEnd && sqlite3Isspace(*zNum) ) zNum+=incr;
+  if( zNum<zEnd ){
+    if( *zNum=='-' ){
+      neg = 1;
+      zNum+=incr;
+    }else if( *zNum=='+' ){
+      zNum+=incr;
+    }
+  }
+  zStart = zNum;
+  while( zNum<zEnd && zNum[0]=='0' ){ zNum+=incr; } /* Skip leading zeros. */
+  for(i=0; &zNum[i]<zEnd && (c=zNum[i])>='0' && c<='9'; i+=incr){
+    u = u*10 + c - '0';
+  }
+  if( u>LARGEST_INT64 ){
+    *pNum = neg ? SMALLEST_INT64 : LARGEST_INT64;
+  }else if( neg ){
+    *pNum = -(i64)u;
+  }else{
+    *pNum = (i64)u;
+  }
+  testcase( i==18 );
+  testcase( i==19 );
+  testcase( i==20 );
+  if( (c!=0 && &zNum[i]<zEnd) || (i==0 && zStart==zNum) || i>19*incr || nonNum ){
+    /* zNum is empty or contains non-numeric text or is longer
+    ** than 19 digits (thus guaranteeing that it is too large) */
+    return 1;
+  }else if( i<19*incr ){
+    /* Less than 19 digits, so we know that it fits in 64 bits */
+    assert( u<=LARGEST_INT64 );
+    return 0;
+  }else{
+    /* zNum is a 19-digit numbers.  Compare it against 9223372036854775808. */
+    c = compare2pow63(zNum, incr);
+    if( c<0 ){
+      /* zNum is less than 9223372036854775808 so it fits */
+      assert( u<=LARGEST_INT64 );
+      return 0;
+    }else if( c>0 ){
+      /* zNum is greater than 9223372036854775808 so it overflows */
+      return 1;
+    }else{
+      /* zNum is exactly 9223372036854775808.  Fits if negative.  The
+      ** special case 2 overflow if positive */
+      assert( u-1==LARGEST_INT64 );
+      return neg ? 0 : 2;
+    }
+  }
+}
+
+/*
+** Transform a UTF-8 integer literal, in either decimal or hexadecimal,
+** into a 64-bit signed integer.  This routine accepts hexadecimal literals,
+** whereas sqlite3Atoi64() does not.
+**
+** Returns:
+**
+**     0    Successful transformation.  Fits in a 64-bit signed integer.
+**     1    Integer too large for a 64-bit signed integer or is malformed
+**     2    Special case of 9223372036854775808
+*/
+SQLITE_PRIVATE int sqlite3DecOrHexToI64(const char *z, i64 *pOut){
+#ifndef SQLITE_OMIT_HEX_INTEGER
+  if( z[0]=='0'
+   && (z[1]=='x' || z[1]=='X')
+   && sqlite3Isxdigit(z[2])
+  ){
+    u64 u = 0;
+    int i, k;
+    for(i=2; z[i]=='0'; i++){}
+    for(k=i; sqlite3Isxdigit(z[k]); k++){
+      u = u*16 + sqlite3HexToInt(z[k]);
+    }
+    memcpy(pOut, &u, 8);
+    return (z[k]==0 && k-i<=16) ? 0 : 1;
+  }else
+#endif /* SQLITE_OMIT_HEX_INTEGER */
+  {
+    return sqlite3Atoi64(z, pOut, sqlite3Strlen30(z), SQLITE_UTF8);
+  }
+}
+
+/*
+** If zNum represents an integer that will fit in 32-bits, then set
+** *pValue to that integer and return true.  Otherwise return false.
+**
+** This routine accepts both decimal and hexadecimal notation for integers.
+**
+** Any non-numeric characters that following zNum are ignored.
+** This is different from sqlite3Atoi64() which requires the
+** input number to be zero-terminated.
+*/
+SQLITE_PRIVATE int sqlite3GetInt32(const char *zNum, int *pValue){
+  sqlite_int64 v = 0;
+  int i, c;
+  int neg = 0;
+  if( zNum[0]=='-' ){
+    neg = 1;
+    zNum++;
+  }else if( zNum[0]=='+' ){
+    zNum++;
+  }
+#ifndef SQLITE_OMIT_HEX_INTEGER
+  else if( zNum[0]=='0'
+        && (zNum[1]=='x' || zNum[1]=='X')
+        && sqlite3Isxdigit(zNum[2])
+  ){
+    u32 u = 0;
+    zNum += 2;
+    while( zNum[0]=='0' ) zNum++;
+    for(i=0; sqlite3Isxdigit(zNum[i]) && i<8; i++){
+      u = u*16 + sqlite3HexToInt(zNum[i]);
+    }
+    if( (u&0x80000000)==0 && sqlite3Isxdigit(zNum[i])==0 ){
+      memcpy(pValue, &u, 4);
+      return 1;
+    }else{
+      return 0;
+    }
+  }
+#endif
+  for(i=0; i<11 && (c = zNum[i] - '0')>=0 && c<=9; i++){
+    v = v*10 + c;
+  }
+
+  /* The longest decimal representation of a 32 bit integer is 10 digits:
+  **
+  **             1234567890
+  **     2^31 -> 2147483648
+  */
+  testcase( i==10 );
+  if( i>10 ){
+    return 0;
+  }
+  testcase( v-neg==2147483647 );
+  if( v-neg>2147483647 ){
+    return 0;
+  }
+  if( neg ){
+    v = -v;
+  }
+  *pValue = (int)v;
+  return 1;
+}
+
+/*
+** Return a 32-bit integer value extracted from a string.  If the
+** string is not an integer, just return 0.
+*/
+SQLITE_PRIVATE int sqlite3Atoi(const char *z){
+  int x = 0;
+  if( z ) sqlite3GetInt32(z, &x);
+  return x;
+}
+
+/*
+** The variable-length integer encoding is as follows:
+**
+** KEY:
+**         A = 0xxxxxxx    7 bits of data and one flag bit
+**         B = 1xxxxxxx    7 bits of data and one flag bit
+**         C = xxxxxxxx    8 bits of data
+**
+**  7 bits - A
+** 14 bits - BA
+** 21 bits - BBA
+** 28 bits - BBBA
+** 35 bits - BBBBA
+** 42 bits - BBBBBA
+** 49 bits - BBBBBBA
+** 56 bits - BBBBBBBA
+** 64 bits - BBBBBBBBC
+*/
+
+/*
+** Write a 64-bit variable-length integer to memory starting at p[0].
+** The length of data write will be between 1 and 9 bytes.  The number
+** of bytes written is returned.
+**
+** A variable-length integer consists of the lower 7 bits of each byte
+** for all bytes that have the 8th bit set and one byte with the 8th
+** bit clear.  Except, if we get to the 9th byte, it stores the full
+** 8 bits and is the last byte.
+*/
+static int SQLITE_NOINLINE putVarint64(unsigned char *p, u64 v){
+  int i, j, n;
+  u8 buf[10];
+  if( v & (((u64)0xff000000)<<32) ){
+    p[8] = (u8)v;
+    v >>= 8;
+    for(i=7; i>=0; i--){
+      p[i] = (u8)((v & 0x7f) | 0x80);
+      v >>= 7;
+    }
+    return 9;
+  }    
+  n = 0;
+  do{
+    buf[n++] = (u8)((v & 0x7f) | 0x80);
+    v >>= 7;
+  }while( v!=0 );
+  buf[0] &= 0x7f;
+  assert( n<=9 );
+  for(i=0, j=n-1; j>=0; j--, i++){
+    p[i] = buf[j];
+  }
+  return n;
+}
+SQLITE_PRIVATE int sqlite3PutVarint(unsigned char *p, u64 v){
+  if( v<=0x7f ){
+    p[0] = v&0x7f;
+    return 1;
+  }
+  if( v<=0x3fff ){
+    p[0] = ((v>>7)&0x7f)|0x80;
+    p[1] = v&0x7f;
+    return 2;
+  }
+  return putVarint64(p,v);
+}
+
+/*
+** Bitmasks used by sqlite3GetVarint().  These precomputed constants
+** are defined here rather than simply putting the constant expressions
+** inline in order to work around bugs in the RVT compiler.
+**
+** SLOT_2_0     A mask for  (0x7f<<14) | 0x7f
+**
+** SLOT_4_2_0   A mask for  (0x7f<<28) | SLOT_2_0
+*/
+#define SLOT_2_0     0x001fc07f
+#define SLOT_4_2_0   0xf01fc07f
+
+
+/*
+** Read a 64-bit variable-length integer from memory starting at p[0].
+** Return the number of bytes read.  The value is stored in *v.
+*/
+SQLITE_PRIVATE u8 sqlite3GetVarint(const unsigned char *p, u64 *v){
+  u32 a,b,s;
+
+  a = *p;
+  /* a: p0 (unmasked) */
+  if (!(a&0x80))
+  {
+    *v = a;
+    return 1;
+  }
+
+  p++;
+  b = *p;
+  /* b: p1 (unmasked) */
+  if (!(b&0x80))
+  {
+    a &= 0x7f;
+    a = a<<7;
+    a |= b;
+    *v = a;
+    return 2;
+  }
+
+  /* Verify that constants are precomputed correctly */
+  assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
+  assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
+
+  p++;
+  a = a<<14;
+  a |= *p;
+  /* a: p0<<14 | p2 (unmasked) */
+  if (!(a&0x80))
+  {
+    a &= SLOT_2_0;
+    b &= 0x7f;
+    b = b<<7;
+    a |= b;
+    *v = a;
+    return 3;
+  }
+
+  /* CSE1 from below */
+  a &= SLOT_2_0;
+  p++;
+  b = b<<14;
+  b |= *p;
+  /* b: p1<<14 | p3 (unmasked) */
+  if (!(b&0x80))
+  {
+    b &= SLOT_2_0;
+    /* moved CSE1 up */
+    /* a &= (0x7f<<14)|(0x7f); */
+    a = a<<7;
+    a |= b;
+    *v = a;
+    return 4;
+  }
+
+  /* a: p0<<14 | p2 (masked) */
+  /* b: p1<<14 | p3 (unmasked) */
+  /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+  /* moved CSE1 up */
+  /* a &= (0x7f<<14)|(0x7f); */
+  b &= SLOT_2_0;
+  s = a;
+  /* s: p0<<14 | p2 (masked) */
+
+  p++;
+  a = a<<14;
+  a |= *p;
+  /* a: p0<<28 | p2<<14 | p4 (unmasked) */
+  if (!(a&0x80))
+  {
+    /* we can skip these cause they were (effectively) done above in calc'ing s */
+    /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
+    /* b &= (0x7f<<14)|(0x7f); */
+    b = b<<7;
+    a |= b;
+    s = s>>18;
+    *v = ((u64)s)<<32 | a;
+    return 5;
+  }
+
+  /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+  s = s<<7;
+  s |= b;
+  /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+
+  p++;
+  b = b<<14;
+  b |= *p;
+  /* b: p1<<28 | p3<<14 | p5 (unmasked) */
+  if (!(b&0x80))
+  {
+    /* we can skip this cause it was (effectively) done above in calc'ing s */
+    /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
+    a &= SLOT_2_0;
+    a = a<<7;
+    a |= b;
+    s = s>>18;
+    *v = ((u64)s)<<32 | a;
+    return 6;
+  }
+
+  p++;
+  a = a<<14;
+  a |= *p;
+  /* a: p2<<28 | p4<<14 | p6 (unmasked) */
+  if (!(a&0x80))
+  {
+    a &= SLOT_4_2_0;
+    b &= SLOT_2_0;
+    b = b<<7;
+    a |= b;
+    s = s>>11;
+    *v = ((u64)s)<<32 | a;
+    return 7;
+  }
+
+  /* CSE2 from below */
+  a &= SLOT_2_0;
+  p++;
+  b = b<<14;
+  b |= *p;
+  /* b: p3<<28 | p5<<14 | p7 (unmasked) */
+  if (!(b&0x80))
+  {
+    b &= SLOT_4_2_0;
+    /* moved CSE2 up */
+    /* a &= (0x7f<<14)|(0x7f); */
+    a = a<<7;
+    a |= b;
+    s = s>>4;
+    *v = ((u64)s)<<32 | a;
+    return 8;
+  }
+
+  p++;
+  a = a<<15;
+  a |= *p;
+  /* a: p4<<29 | p6<<15 | p8 (unmasked) */
+
+  /* moved CSE2 up */
+  /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
+  b &= SLOT_2_0;
+  b = b<<8;
+  a |= b;
+
+  s = s<<4;
+  b = p[-4];
+  b &= 0x7f;
+  b = b>>3;
+  s |= b;
+
+  *v = ((u64)s)<<32 | a;
+
+  return 9;
+}
+
+/*
+** Read a 32-bit variable-length integer from memory starting at p[0].
+** Return the number of bytes read.  The value is stored in *v.
+**
+** If the varint stored in p[0] is larger than can fit in a 32-bit unsigned
+** integer, then set *v to 0xffffffff.
+**
+** A MACRO version, getVarint32, is provided which inlines the 
+** single-byte case.  All code should use the MACRO version as 
+** this function assumes the single-byte case has already been handled.
+*/
+SQLITE_PRIVATE u8 sqlite3GetVarint32(const unsigned char *p, u32 *v){
+  u32 a,b;
+
+  /* The 1-byte case.  Overwhelmingly the most common.  Handled inline
+  ** by the getVarin32() macro */
+  a = *p;
+  /* a: p0 (unmasked) */
+#ifndef getVarint32
+  if (!(a&0x80))
+  {
+    /* Values between 0 and 127 */
+    *v = a;
+    return 1;
+  }
+#endif
+
+  /* The 2-byte case */
+  p++;
+  b = *p;
+  /* b: p1 (unmasked) */
+  if (!(b&0x80))
+  {
+    /* Values between 128 and 16383 */
+    a &= 0x7f;
+    a = a<<7;
+    *v = a | b;
+    return 2;
+  }
+
+  /* The 3-byte case */
+  p++;
+  a = a<<14;
+  a |= *p;
+  /* a: p0<<14 | p2 (unmasked) */
+  if (!(a&0x80))
+  {
+    /* Values between 16384 and 2097151 */
+    a &= (0x7f<<14)|(0x7f);
+    b &= 0x7f;
+    b = b<<7;
+    *v = a | b;
+    return 3;
+  }
+
+  /* A 32-bit varint is used to store size information in btrees.
+  ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
+  ** A 3-byte varint is sufficient, for example, to record the size
+  ** of a 1048569-byte BLOB or string.
+  **
+  ** We only unroll the first 1-, 2-, and 3- byte cases.  The very
+  ** rare larger cases can be handled by the slower 64-bit varint
+  ** routine.
+  */
+#if 1
+  {
+    u64 v64;
+    u8 n;
+
+    p -= 2;
+    n = sqlite3GetVarint(p, &v64);
+    assert( n>3 && n<=9 );
+    if( (v64 & SQLITE_MAX_U32)!=v64 ){
+      *v = 0xffffffff;
+    }else{
+      *v = (u32)v64;
+    }
+    return n;
+  }
+
+#else
+  /* For following code (kept for historical record only) shows an
+  ** unrolling for the 3- and 4-byte varint cases.  This code is
+  ** slightly faster, but it is also larger and much harder to test.
+  */
+  p++;
+  b = b<<14;
+  b |= *p;
+  /* b: p1<<14 | p3 (unmasked) */
+  if (!(b&0x80))
+  {
+    /* Values between 2097152 and 268435455 */
+    b &= (0x7f<<14)|(0x7f);
+    a &= (0x7f<<14)|(0x7f);
+    a = a<<7;
+    *v = a | b;
+    return 4;
+  }
+
+  p++;
+  a = a<<14;
+  a |= *p;
+  /* a: p0<<28 | p2<<14 | p4 (unmasked) */
+  if (!(a&0x80))
+  {
+    /* Values  between 268435456 and 34359738367 */
+    a &= SLOT_4_2_0;
+    b &= SLOT_4_2_0;
+    b = b<<7;
+    *v = a | b;
+    return 5;
+  }
+
+  /* We can only reach this point when reading a corrupt database
+  ** file.  In that case we are not in any hurry.  Use the (relatively
+  ** slow) general-purpose sqlite3GetVarint() routine to extract the
+  ** value. */
+  {
+    u64 v64;
+    u8 n;
+
+    p -= 4;
+    n = sqlite3GetVarint(p, &v64);
+    assert( n>5 && n<=9 );
+    *v = (u32)v64;
+    return n;
+  }
+#endif
+}
+
+/*
+** Return the number of bytes that will be needed to store the given
+** 64-bit integer.
+*/
+SQLITE_PRIVATE int sqlite3VarintLen(u64 v){
+  int i = 0;
+  do{
+    i++;
+    v >>= 7;
+  }while( v!=0 && ALWAYS(i<9) );
+  return i;
+}
+
+
+/*
+** Read or write a four-byte big-endian integer value.
+*/
+SQLITE_PRIVATE u32 sqlite3Get4byte(const u8 *p){
+  testcase( p[0]&0x80 );
+  return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
+}
+SQLITE_PRIVATE void sqlite3Put4byte(unsigned char *p, u32 v){
+  p[0] = (u8)(v>>24);
+  p[1] = (u8)(v>>16);
+  p[2] = (u8)(v>>8);
+  p[3] = (u8)v;
+}
+
+
+
+/*
+** Translate a single byte of Hex into an integer.
+** This routine only works if h really is a valid hexadecimal
+** character:  0..9a..fA..F
+*/
+SQLITE_PRIVATE u8 sqlite3HexToInt(int h){
+  assert( (h>='0' && h<='9') ||  (h>='a' && h<='f') ||  (h>='A' && h<='F') );
+#ifdef SQLITE_ASCII
+  h += 9*(1&(h>>6));
+#endif
+#ifdef SQLITE_EBCDIC
+  h += 9*(1&~(h>>4));
+#endif
+  return (u8)(h & 0xf);
+}
+
+#if !defined(SQLITE_OMIT_BLOB_LITERAL) || defined(SQLITE_HAS_CODEC)
+/*
+** Convert a BLOB literal of the form "x'hhhhhh'" into its binary
+** value.  Return a pointer to its binary value.  Space to hold the
+** binary value has been obtained from malloc and must be freed by
+** the calling routine.
+*/
+SQLITE_PRIVATE void *sqlite3HexToBlob(sqlite3 *db, const char *z, int n){
+  char *zBlob;
+  int i;
+
+  zBlob = (char *)sqlite3DbMallocRaw(db, n/2 + 1);
+  n--;
+  if( zBlob ){
+    for(i=0; i<n; i+=2){
+      zBlob[i/2] = (sqlite3HexToInt(z[i])<<4) | sqlite3HexToInt(z[i+1]);
+    }
+    zBlob[i/2] = 0;
+  }
+  return zBlob;
+}
+#endif /* !SQLITE_OMIT_BLOB_LITERAL || SQLITE_HAS_CODEC */
+
+/*
+** Log an error that is an API call on a connection pointer that should
+** not have been used.  The "type" of connection pointer is given as the
+** argument.  The zType is a word like "NULL" or "closed" or "invalid".
+*/
+static void logBadConnection(const char *zType){
+  sqlite3_log(SQLITE_MISUSE, 
+     "API call with %s database connection pointer",
+     zType
+  );
+}
+
+/*
+** Check to make sure we have a valid db pointer.  This test is not
+** foolproof but it does provide some measure of protection against
+** misuse of the interface such as passing in db pointers that are
+** NULL or which have been previously closed.  If this routine returns
+** 1 it means that the db pointer is valid and 0 if it should not be
+** dereferenced for any reason.  The calling function should invoke
+** SQLITE_MISUSE immediately.
+**
+** sqlite3SafetyCheckOk() requires that the db pointer be valid for
+** use.  sqlite3SafetyCheckSickOrOk() allows a db pointer that failed to
+** open properly and is not fit for general use but which can be
+** used as an argument to sqlite3_errmsg() or sqlite3_close().
+*/
+SQLITE_PRIVATE int sqlite3SafetyCheckOk(sqlite3 *db){
+  u32 magic;
+  if( db==0 ){
+    logBadConnection("NULL");
+    return 0;
+  }
+  magic = db->magic;
+  if( magic!=SQLITE_MAGIC_OPEN ){
+    if( sqlite3SafetyCheckSickOrOk(db) ){
+      testcase( sqlite3GlobalConfig.xLog!=0 );
+      logBadConnection("unopened");
+    }
+    return 0;
+  }else{
+    return 1;
+  }
+}
+SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(sqlite3 *db){
+  u32 magic;
+  magic = db->magic;
+  if( magic!=SQLITE_MAGIC_SICK &&
+      magic!=SQLITE_MAGIC_OPEN &&
+      magic!=SQLITE_MAGIC_BUSY ){
+    testcase( sqlite3GlobalConfig.xLog!=0 );
+    logBadConnection("invalid");
+    return 0;
+  }else{
+    return 1;
+  }
+}
+
+/*
+** Attempt to add, substract, or multiply the 64-bit signed value iB against
+** the other 64-bit signed integer at *pA and store the result in *pA.
+** Return 0 on success.  Or if the operation would have resulted in an
+** overflow, leave *pA unchanged and return 1.
+*/
+SQLITE_PRIVATE int sqlite3AddInt64(i64 *pA, i64 iB){
+  i64 iA = *pA;
+  testcase( iA==0 ); testcase( iA==1 );
+  testcase( iB==-1 ); testcase( iB==0 );
+  if( iB>=0 ){
+    testcase( iA>0 && LARGEST_INT64 - iA == iB );
+    testcase( iA>0 && LARGEST_INT64 - iA == iB - 1 );
+    if( iA>0 && LARGEST_INT64 - iA < iB ) return 1;
+  }else{
+    testcase( iA<0 && -(iA + LARGEST_INT64) == iB + 1 );
+    testcase( iA<0 && -(iA + LARGEST_INT64) == iB + 2 );
+    if( iA<0 && -(iA + LARGEST_INT64) > iB + 1 ) return 1;
+  }
+  *pA += iB;
+  return 0; 
+}
+SQLITE_PRIVATE int sqlite3SubInt64(i64 *pA, i64 iB){
+  testcase( iB==SMALLEST_INT64+1 );
+  if( iB==SMALLEST_INT64 ){
+    testcase( (*pA)==(-1) ); testcase( (*pA)==0 );
+    if( (*pA)>=0 ) return 1;
+    *pA -= iB;
+    return 0;
+  }else{
+    return sqlite3AddInt64(pA, -iB);
+  }
+}
+#define TWOPOWER32 (((i64)1)<<32)
+#define TWOPOWER31 (((i64)1)<<31)
+SQLITE_PRIVATE int sqlite3MulInt64(i64 *pA, i64 iB){
+  i64 iA = *pA;
+  i64 iA1, iA0, iB1, iB0, r;
+
+  iA1 = iA/TWOPOWER32;
+  iA0 = iA % TWOPOWER32;
+  iB1 = iB/TWOPOWER32;
+  iB0 = iB % TWOPOWER32;
+  if( iA1==0 ){
+    if( iB1==0 ){
+      *pA *= iB;
+      return 0;
+    }
+    r = iA0*iB1;
+  }else if( iB1==0 ){
+    r = iA1*iB0;
+  }else{
+    /* If both iA1 and iB1 are non-zero, overflow will result */
+    return 1;
+  }
+  testcase( r==(-TWOPOWER31)-1 );
+  testcase( r==(-TWOPOWER31) );
+  testcase( r==TWOPOWER31 );
+  testcase( r==TWOPOWER31-1 );
+  if( r<(-TWOPOWER31) || r>=TWOPOWER31 ) return 1;
+  r *= TWOPOWER32;
+  if( sqlite3AddInt64(&r, iA0*iB0) ) return 1;
+  *pA = r;
+  return 0;
+}
+
+/*
+** Compute the absolute value of a 32-bit signed integer, of possible.  Or 
+** if the integer has a value of -2147483648, return +2147483647
+*/
+SQLITE_PRIVATE int sqlite3AbsInt32(int x){
+  if( x>=0 ) return x;
+  if( x==(int)0x80000000 ) return 0x7fffffff;
+  return -x;
+}
+
+#ifdef SQLITE_ENABLE_8_3_NAMES
+/*
+** If SQLITE_ENABLE_8_3_NAMES is set at compile-time and if the database
+** filename in zBaseFilename is a URI with the "8_3_names=1" parameter and
+** if filename in z[] has a suffix (a.k.a. "extension") that is longer than
+** three characters, then shorten the suffix on z[] to be the last three
+** characters of the original suffix.
+**
+** If SQLITE_ENABLE_8_3_NAMES is set to 2 at compile-time, then always
+** do the suffix shortening regardless of URI parameter.
+**
+** Examples:
+**
+**     test.db-journal    =>   test.nal
+**     test.db-wal        =>   test.wal
+**     test.db-shm        =>   test.shm
+**     test.db-mj7f3319fa =>   test.9fa
+*/
+SQLITE_PRIVATE void sqlite3FileSuffix3(const char *zBaseFilename, char *z){
+#if SQLITE_ENABLE_8_3_NAMES<2
+  if( sqlite3_uri_boolean(zBaseFilename, "8_3_names", 0) )
+#endif
+  {
+    int i, sz;
+    sz = sqlite3Strlen30(z);
+    for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){}
+    if( z[i]=='.' && ALWAYS(sz>i+4) ) memmove(&z[i+1], &z[sz-3], 4);
+  }
+}
+#endif
+
+/* 
+** Find (an approximate) sum of two LogEst values.  This computation is
+** not a simple "+" operator because LogEst is stored as a logarithmic
+** value.
+** 
+*/
+SQLITE_PRIVATE LogEst sqlite3LogEstAdd(LogEst a, LogEst b){
+  static const unsigned char x[] = {
+     10, 10,                         /* 0,1 */
+      9, 9,                          /* 2,3 */
+      8, 8,                          /* 4,5 */
+      7, 7, 7,                       /* 6,7,8 */
+      6, 6, 6,                       /* 9,10,11 */
+      5, 5, 5,                       /* 12-14 */
+      4, 4, 4, 4,                    /* 15-18 */
+      3, 3, 3, 3, 3, 3,              /* 19-24 */
+      2, 2, 2, 2, 2, 2, 2,           /* 25-31 */
+  };
+  if( a>=b ){
+    if( a>b+49 ) return a;
+    if( a>b+31 ) return a+1;
+    return a+x[a-b];
+  }else{
+    if( b>a+49 ) return b;
+    if( b>a+31 ) return b+1;
+    return b+x[b-a];
+  }
+}
+
+/*
+** Convert an integer into a LogEst.  In other words, compute an
+** approximation for 10*log2(x).
+*/
+SQLITE_PRIVATE LogEst sqlite3LogEst(u64 x){
+  static LogEst a[] = { 0, 2, 3, 5, 6, 7, 8, 9 };
+  LogEst y = 40;
+  if( x<8 ){
+    if( x<2 ) return 0;
+    while( x<8 ){  y -= 10; x <<= 1; }
+  }else{
+    while( x>255 ){ y += 40; x >>= 4; }
+    while( x>15 ){  y += 10; x >>= 1; }
+  }
+  return a[x&7] + y - 10;
+}
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/*
+** Convert a double into a LogEst
+** In other words, compute an approximation for 10*log2(x).
+*/
+SQLITE_PRIVATE LogEst sqlite3LogEstFromDouble(double x){
+  u64 a;
+  LogEst e;
+  assert( sizeof(x)==8 && sizeof(a)==8 );
+  if( x<=1 ) return 0;
+  if( x<=2000000000 ) return sqlite3LogEst((u64)x);
+  memcpy(&a, &x, 8);
+  e = (a>>52) - 1022;
+  return e*10;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+/*
+** Convert a LogEst into an integer.
+*/
+SQLITE_PRIVATE u64 sqlite3LogEstToInt(LogEst x){
+  u64 n;
+  if( x<10 ) return 1;
+  n = x%10;
+  x /= 10;
+  if( n>=5 ) n -= 2;
+  else if( n>=1 ) n -= 1;
+  if( x>=3 ){
+    return x>60 ? (u64)LARGEST_INT64 : (n+8)<<(x-3);
+  }
+  return (n+8)>>(3-x);
+}
+
+/************** End of util.c ************************************************/
+/************** Begin file hash.c ********************************************/
+/*
+** 2001 September 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This is the implementation of generic hash-tables
+** used in SQLite.
+*/
+/* #include <assert.h> */
+
+/* Turn bulk memory into a hash table object by initializing the
+** fields of the Hash structure.
+**
+** "pNew" is a pointer to the hash table that is to be initialized.
+*/
+SQLITE_PRIVATE void sqlite3HashInit(Hash *pNew){
+  assert( pNew!=0 );
+  pNew->first = 0;
+  pNew->count = 0;
+  pNew->htsize = 0;
+  pNew->ht = 0;
+}
+
+/* Remove all entries from a hash table.  Reclaim all memory.
+** Call this routine to delete a hash table or to reset a hash table
+** to the empty state.
+*/
+SQLITE_PRIVATE void sqlite3HashClear(Hash *pH){
+  HashElem *elem;         /* For looping over all elements of the table */
+
+  assert( pH!=0 );
+  elem = pH->first;
+  pH->first = 0;
+  sqlite3_free(pH->ht);
+  pH->ht = 0;
+  pH->htsize = 0;
+  while( elem ){
+    HashElem *next_elem = elem->next;
+    sqlite3_free(elem);
+    elem = next_elem;
+  }
+  pH->count = 0;
+}
+
+/*
+** The hashing function.
+*/
+static unsigned int strHash(const char *z){
+  unsigned int h = 0;
+  unsigned char c;
+  while( (c = (unsigned char)*z++)!=0 ){
+    h = (h<<3) ^ h ^ sqlite3UpperToLower[c];
+  }
+  return h;
+}
+
+
+/* Link pNew element into the hash table pH.  If pEntry!=0 then also
+** insert pNew into the pEntry hash bucket.
+*/
+static void insertElement(
+  Hash *pH,              /* The complete hash table */
+  struct _ht *pEntry,    /* The entry into which pNew is inserted */
+  HashElem *pNew         /* The element to be inserted */
+){
+  HashElem *pHead;       /* First element already in pEntry */
+  if( pEntry ){
+    pHead = pEntry->count ? pEntry->chain : 0;
+    pEntry->count++;
+    pEntry->chain = pNew;
+  }else{
+    pHead = 0;
+  }
+  if( pHead ){
+    pNew->next = pHead;
+    pNew->prev = pHead->prev;
+    if( pHead->prev ){ pHead->prev->next = pNew; }
+    else             { pH->first = pNew; }
+    pHead->prev = pNew;
+  }else{
+    pNew->next = pH->first;
+    if( pH->first ){ pH->first->prev = pNew; }
+    pNew->prev = 0;
+    pH->first = pNew;
+  }
+}
+
+
+/* Resize the hash table so that it cantains "new_size" buckets.
+**
+** The hash table might fail to resize if sqlite3_malloc() fails or
+** if the new size is the same as the prior size.
+** Return TRUE if the resize occurs and false if not.
+*/
+static int rehash(Hash *pH, unsigned int new_size){
+  struct _ht *new_ht;            /* The new hash table */
+  HashElem *elem, *next_elem;    /* For looping over existing elements */
+
+#if SQLITE_MALLOC_SOFT_LIMIT>0
+  if( new_size*sizeof(struct _ht)>SQLITE_MALLOC_SOFT_LIMIT ){
+    new_size = SQLITE_MALLOC_SOFT_LIMIT/sizeof(struct _ht);
+  }
+  if( new_size==pH->htsize ) return 0;
+#endif
+
+  /* The inability to allocates space for a larger hash table is
+  ** a performance hit but it is not a fatal error.  So mark the
+  ** allocation as a benign. Use sqlite3Malloc()/memset(0) instead of 
+  ** sqlite3MallocZero() to make the allocation, as sqlite3MallocZero()
+  ** only zeroes the requested number of bytes whereas this module will
+  ** use the actual amount of space allocated for the hash table (which
+  ** may be larger than the requested amount).
+  */
+  sqlite3BeginBenignMalloc();
+  new_ht = (struct _ht *)sqlite3Malloc( new_size*sizeof(struct _ht) );
+  sqlite3EndBenignMalloc();
+
+  if( new_ht==0 ) return 0;
+  sqlite3_free(pH->ht);
+  pH->ht = new_ht;
+  pH->htsize = new_size = sqlite3MallocSize(new_ht)/sizeof(struct _ht);
+  memset(new_ht, 0, new_size*sizeof(struct _ht));
+  for(elem=pH->first, pH->first=0; elem; elem = next_elem){
+    unsigned int h = strHash(elem->pKey) % new_size;
+    next_elem = elem->next;
+    insertElement(pH, &new_ht[h], elem);
+  }
+  return 1;
+}
+
+/* This function (for internal use only) locates an element in an
+** hash table that matches the given key.  The hash for this key is
+** also computed and returned in the *pH parameter.
+*/
+static HashElem *findElementWithHash(
+  const Hash *pH,     /* The pH to be searched */
+  const char *pKey,   /* The key we are searching for */
+  unsigned int *pHash /* Write the hash value here */
+){
+  HashElem *elem;                /* Used to loop thru the element list */
+  int count;                     /* Number of elements left to test */
+  unsigned int h;                /* The computed hash */
+
+  if( pH->ht ){
+    struct _ht *pEntry;
+    h = strHash(pKey) % pH->htsize;
+    pEntry = &pH->ht[h];
+    elem = pEntry->chain;
+    count = pEntry->count;
+  }else{
+    h = 0;
+    elem = pH->first;
+    count = pH->count;
+  }
+  *pHash = h;
+  while( count-- ){
+    assert( elem!=0 );
+    if( sqlite3StrICmp(elem->pKey,pKey)==0 ){ 
+      return elem;
+    }
+    elem = elem->next;
+  }
+  return 0;
+}
+
+/* Remove a single entry from the hash table given a pointer to that
+** element and a hash on the element's key.
+*/
+static void removeElementGivenHash(
+  Hash *pH,         /* The pH containing "elem" */
+  HashElem* elem,   /* The element to be removed from the pH */
+  unsigned int h    /* Hash value for the element */
+){
+  struct _ht *pEntry;
+  if( elem->prev ){
+    elem->prev->next = elem->next; 
+  }else{
+    pH->first = elem->next;
+  }
+  if( elem->next ){
+    elem->next->prev = elem->prev;
+  }
+  if( pH->ht ){
+    pEntry = &pH->ht[h];
+    if( pEntry->chain==elem ){
+      pEntry->chain = elem->next;
+    }
+    pEntry->count--;
+    assert( pEntry->count>=0 );
+  }
+  sqlite3_free( elem );
+  pH->count--;
+  if( pH->count==0 ){
+    assert( pH->first==0 );
+    assert( pH->count==0 );
+    sqlite3HashClear(pH);
+  }
+}
+
+/* Attempt to locate an element of the hash table pH with a key
+** that matches pKey.  Return the data for this element if it is
+** found, or NULL if there is no match.
+*/
+SQLITE_PRIVATE void *sqlite3HashFind(const Hash *pH, const char *pKey){
+  HashElem *elem;    /* The element that matches key */
+  unsigned int h;    /* A hash on key */
+
+  assert( pH!=0 );
+  assert( pKey!=0 );
+  elem = findElementWithHash(pH, pKey, &h);
+  return elem ? elem->data : 0;
+}
+
+/* Insert an element into the hash table pH.  The key is pKey
+** and the data is "data".
+**
+** If no element exists with a matching key, then a new
+** element is created and NULL is returned.
+**
+** If another element already exists with the same key, then the
+** new data replaces the old data and the old data is returned.
+** The key is not copied in this instance.  If a malloc fails, then
+** the new data is returned and the hash table is unchanged.
+**
+** If the "data" parameter to this function is NULL, then the
+** element corresponding to "key" is removed from the hash table.
+*/
+SQLITE_PRIVATE void *sqlite3HashInsert(Hash *pH, const char *pKey, void *data){
+  unsigned int h;       /* the hash of the key modulo hash table size */
+  HashElem *elem;       /* Used to loop thru the element list */
+  HashElem *new_elem;   /* New element added to the pH */
+
+  assert( pH!=0 );
+  assert( pKey!=0 );
+  elem = findElementWithHash(pH,pKey,&h);
+  if( elem ){
+    void *old_data = elem->data;
+    if( data==0 ){
+      removeElementGivenHash(pH,elem,h);
+    }else{
+      elem->data = data;
+      elem->pKey = pKey;
+    }
+    return old_data;
+  }
+  if( data==0 ) return 0;
+  new_elem = (HashElem*)sqlite3Malloc( sizeof(HashElem) );
+  if( new_elem==0 ) return data;
+  new_elem->pKey = pKey;
+  new_elem->data = data;
+  pH->count++;
+  if( pH->count>=10 && pH->count > 2*pH->htsize ){
+    if( rehash(pH, pH->count*2) ){
+      assert( pH->htsize>0 );
+      h = strHash(pKey) % pH->htsize;
+    }
+  }
+  insertElement(pH, pH->ht ? &pH->ht[h] : 0, new_elem);
+  return 0;
+}
+
+/************** End of hash.c ************************************************/
+/************** Begin file opcodes.c *****************************************/
+/* Automatically generated.  Do not edit */
+/* See the mkopcodec.awk script for details. */
+#if !defined(SQLITE_OMIT_EXPLAIN) || defined(VDBE_PROFILE) || defined(SQLITE_DEBUG)
+#if defined(SQLITE_ENABLE_EXPLAIN_COMMENTS) || defined(SQLITE_DEBUG)
+# define OpHelp(X) "\0" X
+#else
+# define OpHelp(X)
+#endif
+SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
+ static const char *const azName[] = { "?",
+     /*   1 */ "Function"         OpHelp("r[P3]=func(r[P2@P5])"),
+     /*   2 */ "Savepoint"        OpHelp(""),
+     /*   3 */ "AutoCommit"       OpHelp(""),
+     /*   4 */ "Transaction"      OpHelp(""),
+     /*   5 */ "SorterNext"       OpHelp(""),
+     /*   6 */ "PrevIfOpen"       OpHelp(""),
+     /*   7 */ "NextIfOpen"       OpHelp(""),
+     /*   8 */ "Prev"             OpHelp(""),
+     /*   9 */ "Next"             OpHelp(""),
+     /*  10 */ "AggStep"          OpHelp("accum=r[P3] step(r[P2@P5])"),
+     /*  11 */ "Checkpoint"       OpHelp(""),
+     /*  12 */ "JournalMode"      OpHelp(""),
+     /*  13 */ "Vacuum"           OpHelp(""),
+     /*  14 */ "VFilter"          OpHelp("iplan=r[P3] zplan='P4'"),
+     /*  15 */ "VUpdate"          OpHelp("data=r[P3@P2]"),
+     /*  16 */ "Goto"             OpHelp(""),
+     /*  17 */ "Gosub"            OpHelp(""),
+     /*  18 */ "Return"           OpHelp(""),
+     /*  19 */ "Not"              OpHelp("r[P2]= !r[P1]"),
+     /*  20 */ "InitCoroutine"    OpHelp(""),
+     /*  21 */ "EndCoroutine"     OpHelp(""),
+     /*  22 */ "Yield"            OpHelp(""),
+     /*  23 */ "HaltIfNull"       OpHelp("if r[P3]=null halt"),
+     /*  24 */ "Halt"             OpHelp(""),
+     /*  25 */ "Integer"          OpHelp("r[P2]=P1"),
+     /*  26 */ "Int64"            OpHelp("r[P2]=P4"),
+     /*  27 */ "String"           OpHelp("r[P2]='P4' (len=P1)"),
+     /*  28 */ "Null"             OpHelp("r[P2..P3]=NULL"),
+     /*  29 */ "SoftNull"         OpHelp("r[P1]=NULL"),
+     /*  30 */ "Blob"             OpHelp("r[P2]=P4 (len=P1)"),
+     /*  31 */ "Variable"         OpHelp("r[P2]=parameter(P1,P4)"),
+     /*  32 */ "Move"             OpHelp("r[P2@P3]=r[P1@P3]"),
+     /*  33 */ "Copy"             OpHelp("r[P2@P3+1]=r[P1@P3+1]"),
+     /*  34 */ "SCopy"            OpHelp("r[P2]=r[P1]"),
+     /*  35 */ "ResultRow"        OpHelp("output=r[P1@P2]"),
+     /*  36 */ "CollSeq"          OpHelp(""),
+     /*  37 */ "AddImm"           OpHelp("r[P1]=r[P1]+P2"),
+     /*  38 */ "MustBeInt"        OpHelp(""),
+     /*  39 */ "RealAffinity"     OpHelp(""),
+     /*  40 */ "Cast"             OpHelp("affinity(r[P1])"),
+     /*  41 */ "Permutation"      OpHelp(""),
+     /*  42 */ "Compare"          OpHelp("r[P1@P3] <-> r[P2@P3]"),
+     /*  43 */ "Jump"             OpHelp(""),
+     /*  44 */ "Once"             OpHelp(""),
+     /*  45 */ "If"               OpHelp(""),
+     /*  46 */ "IfNot"            OpHelp(""),
+     /*  47 */ "Column"           OpHelp("r[P3]=PX"),
+     /*  48 */ "Affinity"         OpHelp("affinity(r[P1@P2])"),
+     /*  49 */ "MakeRecord"       OpHelp("r[P3]=mkrec(r[P1@P2])"),
+     /*  50 */ "Count"            OpHelp("r[P2]=count()"),
+     /*  51 */ "ReadCookie"       OpHelp(""),
+     /*  52 */ "SetCookie"        OpHelp(""),
+     /*  53 */ "ReopenIdx"        OpHelp("root=P2 iDb=P3"),
+     /*  54 */ "OpenRead"         OpHelp("root=P2 iDb=P3"),
+     /*  55 */ "OpenWrite"        OpHelp("root=P2 iDb=P3"),
+     /*  56 */ "OpenAutoindex"    OpHelp("nColumn=P2"),
+     /*  57 */ "OpenEphemeral"    OpHelp("nColumn=P2"),
+     /*  58 */ "SorterOpen"       OpHelp(""),
+     /*  59 */ "SequenceTest"     OpHelp("if( cursor[P1].ctr++ ) pc = P2"),
+     /*  60 */ "OpenPseudo"       OpHelp("P3 columns in r[P2]"),
+     /*  61 */ "Close"            OpHelp(""),
+     /*  62 */ "SeekLT"           OpHelp("key=r[P3@P4]"),
+     /*  63 */ "SeekLE"           OpHelp("key=r[P3@P4]"),
+     /*  64 */ "SeekGE"           OpHelp("key=r[P3@P4]"),
+     /*  65 */ "SeekGT"           OpHelp("key=r[P3@P4]"),
+     /*  66 */ "Seek"             OpHelp("intkey=r[P2]"),
+     /*  67 */ "NoConflict"       OpHelp("key=r[P3@P4]"),
+     /*  68 */ "NotFound"         OpHelp("key=r[P3@P4]"),
+     /*  69 */ "Found"            OpHelp("key=r[P3@P4]"),
+     /*  70 */ "NotExists"        OpHelp("intkey=r[P3]"),
+     /*  71 */ "Or"               OpHelp("r[P3]=(r[P1] || r[P2])"),
+     /*  72 */ "And"              OpHelp("r[P3]=(r[P1] && r[P2])"),
+     /*  73 */ "Sequence"         OpHelp("r[P2]=cursor[P1].ctr++"),
+     /*  74 */ "NewRowid"         OpHelp("r[P2]=rowid"),
+     /*  75 */ "Insert"           OpHelp("intkey=r[P3] data=r[P2]"),
+     /*  76 */ "IsNull"           OpHelp("if r[P1]==NULL goto P2"),
+     /*  77 */ "NotNull"          OpHelp("if r[P1]!=NULL goto P2"),
+     /*  78 */ "Ne"               OpHelp("if r[P1]!=r[P3] goto P2"),
+     /*  79 */ "Eq"               OpHelp("if r[P1]==r[P3] goto P2"),
+     /*  80 */ "Gt"               OpHelp("if r[P1]>r[P3] goto P2"),
+     /*  81 */ "Le"               OpHelp("if r[P1]<=r[P3] goto P2"),
+     /*  82 */ "Lt"               OpHelp("if r[P1]<r[P3] goto P2"),
+     /*  83 */ "Ge"               OpHelp("if r[P1]>=r[P3] goto P2"),
+     /*  84 */ "InsertInt"        OpHelp("intkey=P3 data=r[P2]"),
+     /*  85 */ "BitAnd"           OpHelp("r[P3]=r[P1]&r[P2]"),
+     /*  86 */ "BitOr"            OpHelp("r[P3]=r[P1]|r[P2]"),
+     /*  87 */ "ShiftLeft"        OpHelp("r[P3]=r[P2]<<r[P1]"),
+     /*  88 */ "ShiftRight"       OpHelp("r[P3]=r[P2]>>r[P1]"),
+     /*  89 */ "Add"              OpHelp("r[P3]=r[P1]+r[P2]"),
+     /*  90 */ "Subtract"         OpHelp("r[P3]=r[P2]-r[P1]"),
+     /*  91 */ "Multiply"         OpHelp("r[P3]=r[P1]*r[P2]"),
+     /*  92 */ "Divide"           OpHelp("r[P3]=r[P2]/r[P1]"),
+     /*  93 */ "Remainder"        OpHelp("r[P3]=r[P2]%r[P1]"),
+     /*  94 */ "Concat"           OpHelp("r[P3]=r[P2]+r[P1]"),
+     /*  95 */ "Delete"           OpHelp(""),
+     /*  96 */ "BitNot"           OpHelp("r[P1]= ~r[P1]"),
+     /*  97 */ "String8"          OpHelp("r[P2]='P4'"),
+     /*  98 */ "ResetCount"       OpHelp(""),
+     /*  99 */ "SorterCompare"    OpHelp("if key(P1)!=trim(r[P3],P4) goto P2"),
+     /* 100 */ "SorterData"       OpHelp("r[P2]=data"),
+     /* 101 */ "RowKey"           OpHelp("r[P2]=key"),
+     /* 102 */ "RowData"          OpHelp("r[P2]=data"),
+     /* 103 */ "Rowid"            OpHelp("r[P2]=rowid"),
+     /* 104 */ "NullRow"          OpHelp(""),
+     /* 105 */ "Last"             OpHelp(""),
+     /* 106 */ "SorterSort"       OpHelp(""),
+     /* 107 */ "Sort"             OpHelp(""),
+     /* 108 */ "Rewind"           OpHelp(""),
+     /* 109 */ "SorterInsert"     OpHelp(""),
+     /* 110 */ "IdxInsert"        OpHelp("key=r[P2]"),
+     /* 111 */ "IdxDelete"        OpHelp("key=r[P2@P3]"),
+     /* 112 */ "IdxRowid"         OpHelp("r[P2]=rowid"),
+     /* 113 */ "IdxLE"            OpHelp("key=r[P3@P4]"),
+     /* 114 */ "IdxGT"            OpHelp("key=r[P3@P4]"),
+     /* 115 */ "IdxLT"            OpHelp("key=r[P3@P4]"),
+     /* 116 */ "IdxGE"            OpHelp("key=r[P3@P4]"),
+     /* 117 */ "Destroy"          OpHelp(""),
+     /* 118 */ "Clear"            OpHelp(""),
+     /* 119 */ "ResetSorter"      OpHelp(""),
+     /* 120 */ "CreateIndex"      OpHelp("r[P2]=root iDb=P1"),
+     /* 121 */ "CreateTable"      OpHelp("r[P2]=root iDb=P1"),
+     /* 122 */ "ParseSchema"      OpHelp(""),
+     /* 123 */ "LoadAnalysis"     OpHelp(""),
+     /* 124 */ "DropTable"        OpHelp(""),
+     /* 125 */ "DropIndex"        OpHelp(""),
+     /* 126 */ "DropTrigger"      OpHelp(""),
+     /* 127 */ "IntegrityCk"      OpHelp(""),
+     /* 128 */ "RowSetAdd"        OpHelp("rowset(P1)=r[P2]"),
+     /* 129 */ "RowSetRead"       OpHelp("r[P3]=rowset(P1)"),
+     /* 130 */ "RowSetTest"       OpHelp("if r[P3] in rowset(P1) goto P2"),
+     /* 131 */ "Program"          OpHelp(""),
+     /* 132 */ "Param"            OpHelp(""),
+     /* 133 */ "Real"             OpHelp("r[P2]=P4"),
+     /* 134 */ "FkCounter"        OpHelp("fkctr[P1]+=P2"),
+     /* 135 */ "FkIfZero"         OpHelp("if fkctr[P1]==0 goto P2"),
+     /* 136 */ "MemMax"           OpHelp("r[P1]=max(r[P1],r[P2])"),
+     /* 137 */ "IfPos"            OpHelp("if r[P1]>0 goto P2"),
+     /* 138 */ "IfNeg"            OpHelp("r[P1]+=P3, if r[P1]<0 goto P2"),
+     /* 139 */ "IfZero"           OpHelp("r[P1]+=P3, if r[P1]==0 goto P2"),
+     /* 140 */ "AggFinal"         OpHelp("accum=r[P1] N=P2"),
+     /* 141 */ "IncrVacuum"       OpHelp(""),
+     /* 142 */ "Expire"           OpHelp(""),
+     /* 143 */ "TableLock"        OpHelp("iDb=P1 root=P2 write=P3"),
+     /* 144 */ "VBegin"           OpHelp(""),
+     /* 145 */ "VCreate"          OpHelp(""),
+     /* 146 */ "VDestroy"         OpHelp(""),
+     /* 147 */ "VOpen"            OpHelp(""),
+     /* 148 */ "VColumn"          OpHelp("r[P3]=vcolumn(P2)"),
+     /* 149 */ "VNext"            OpHelp(""),
+     /* 150 */ "VRename"          OpHelp(""),
+     /* 151 */ "Pagecount"        OpHelp(""),
+     /* 152 */ "MaxPgcnt"         OpHelp(""),
+     /* 153 */ "Init"             OpHelp("Start at P2"),
+     /* 154 */ "Noop"             OpHelp(""),
+     /* 155 */ "Explain"          OpHelp(""),
+  };
+  return azName[i];
+}
+#endif
+
+/************** End of opcodes.c *********************************************/
+/************** Begin file os_unix.c *****************************************/
+/*
+** 2004 May 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains the VFS implementation for unix-like operating systems
+** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.
+**
+** There are actually several different VFS implementations in this file.
+** The differences are in the way that file locking is done.  The default
+** implementation uses Posix Advisory Locks.  Alternative implementations
+** use flock(), dot-files, various proprietary locking schemas, or simply
+** skip locking all together.
+**
+** This source file is organized into divisions where the logic for various
+** subfunctions is contained within the appropriate division.  PLEASE
+** KEEP THE STRUCTURE OF THIS FILE INTACT.  New code should be placed
+** in the correct division and should be clearly labeled.
+**
+** The layout of divisions is as follows:
+**
+**   *  General-purpose declarations and utility functions.
+**   *  Unique file ID logic used by VxWorks.
+**   *  Various locking primitive implementations (all except proxy locking):
+**      + for Posix Advisory Locks
+**      + for no-op locks
+**      + for dot-file locks
+**      + for flock() locking
+**      + for named semaphore locks (VxWorks only)
+**      + for AFP filesystem locks (MacOSX only)
+**   *  sqlite3_file methods not associated with locking.
+**   *  Definitions of sqlite3_io_methods objects for all locking
+**      methods plus "finder" functions for each locking method.
+**   *  sqlite3_vfs method implementations.
+**   *  Locking primitives for the proxy uber-locking-method. (MacOSX only)
+**   *  Definitions of sqlite3_vfs objects for all locking methods
+**      plus implementations of sqlite3_os_init() and sqlite3_os_end().
+*/
+#if SQLITE_OS_UNIX              /* This file is used on unix only */
+
+/*
+** There are various methods for file locking used for concurrency
+** control:
+**
+**   1. POSIX locking (the default),
+**   2. No locking,
+**   3. Dot-file locking,
+**   4. flock() locking,
+**   5. AFP locking (OSX only),
+**   6. Named POSIX semaphores (VXWorks only),
+**   7. proxy locking. (OSX only)
+**
+** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE
+** is defined to 1.  The SQLITE_ENABLE_LOCKING_STYLE also enables automatic
+** selection of the appropriate locking style based on the filesystem
+** where the database is located.  
+*/
+#if !defined(SQLITE_ENABLE_LOCKING_STYLE)
+#  if defined(__APPLE__)
+#    define SQLITE_ENABLE_LOCKING_STYLE 1
+#  else
+#    define SQLITE_ENABLE_LOCKING_STYLE 0
+#  endif
+#endif
+
+/*
+** Define the OS_VXWORKS pre-processor macro to 1 if building on 
+** vxworks, or 0 otherwise.
+*/
+#ifndef OS_VXWORKS
+#  if defined(__RTP__) || defined(_WRS_KERNEL)
+#    define OS_VXWORKS 1
+#  else
+#    define OS_VXWORKS 0
+#  endif
+#endif
+
+/*
+** standard include files.
+*/
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+/* #include <time.h> */
+#include <sys/time.h>
+#include <errno.h>
+#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+# include <sys/mman.h>
+#endif
+
+#if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS
+# include <sys/ioctl.h>
+# if OS_VXWORKS
+#  include <semaphore.h>
+#  include <limits.h>
+# else
+#  include <sys/file.h>
+#  include <sys/param.h>
+# endif
+#endif /* SQLITE_ENABLE_LOCKING_STYLE */
+
+#if defined(__APPLE__) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)
+# include <sys/mount.h>
+#endif
+
+#ifdef HAVE_UTIME
+# include <utime.h>
+#endif
+
+/*
+** Allowed values of unixFile.fsFlags
+*/
+#define SQLITE_FSFLAGS_IS_MSDOS     0x1
+
+/*
+** If we are to be thread-safe, include the pthreads header and define
+** the SQLITE_UNIX_THREADS macro.
+*/
+#if SQLITE_THREADSAFE
+/* # include <pthread.h> */
+# define SQLITE_UNIX_THREADS 1
+#endif
+
+/*
+** Default permissions when creating a new file
+*/
+#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
+# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
+#endif
+
+/*
+** Default permissions when creating auto proxy dir
+*/
+#ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
+# define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755
+#endif
+
+/*
+** Maximum supported path-length.
+*/
+#define MAX_PATHNAME 512
+
+/*
+** Only set the lastErrno if the error code is a real error and not 
+** a normal expected return code of SQLITE_BUSY or SQLITE_OK
+*/
+#define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))
+
+/* Forward references */
+typedef struct unixShm unixShm;               /* Connection shared memory */
+typedef struct unixShmNode unixShmNode;       /* Shared memory instance */
+typedef struct unixInodeInfo unixInodeInfo;   /* An i-node */
+typedef struct UnixUnusedFd UnixUnusedFd;     /* An unused file descriptor */
+
+/*
+** Sometimes, after a file handle is closed by SQLite, the file descriptor
+** cannot be closed immediately. In these cases, instances of the following
+** structure are used to store the file descriptor while waiting for an
+** opportunity to either close or reuse it.
+*/
+struct UnixUnusedFd {
+  int fd;                   /* File descriptor to close */
+  int flags;                /* Flags this file descriptor was opened with */
+  UnixUnusedFd *pNext;      /* Next unused file descriptor on same file */
+};
+
+/*
+** The unixFile structure is subclass of sqlite3_file specific to the unix
+** VFS implementations.
+*/
+typedef struct unixFile unixFile;
+struct unixFile {
+  sqlite3_io_methods const *pMethod;  /* Always the first entry */
+  sqlite3_vfs *pVfs;                  /* The VFS that created this unixFile */
+  unixInodeInfo *pInode;              /* Info about locks on this inode */
+  int h;                              /* The file descriptor */
+  unsigned char eFileLock;            /* The type of lock held on this fd */
+  unsigned short int ctrlFlags;       /* Behavioral bits.  UNIXFILE_* flags */
+  int lastErrno;                      /* The unix errno from last I/O error */
+  void *lockingContext;               /* Locking style specific state */
+  UnixUnusedFd *pUnused;              /* Pre-allocated UnixUnusedFd */
+  const char *zPath;                  /* Name of the file */
+  unixShm *pShm;                      /* Shared memory segment information */
+  int szChunk;                        /* Configured by FCNTL_CHUNK_SIZE */
+#if SQLITE_MAX_MMAP_SIZE>0
+  int nFetchOut;                      /* Number of outstanding xFetch refs */
+  sqlite3_int64 mmapSize;             /* Usable size of mapping at pMapRegion */
+  sqlite3_int64 mmapSizeActual;       /* Actual size of mapping at pMapRegion */
+  sqlite3_int64 mmapSizeMax;          /* Configured FCNTL_MMAP_SIZE value */
+  void *pMapRegion;                   /* Memory mapped region */
+#endif
+#ifdef __QNXNTO__
+  int sectorSize;                     /* Device sector size */
+  int deviceCharacteristics;          /* Precomputed device characteristics */
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE
+  int openFlags;                      /* The flags specified at open() */
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__)
+  unsigned fsFlags;                   /* cached details from statfs() */
+#endif
+#if OS_VXWORKS
+  struct vxworksFileId *pId;          /* Unique file ID */
+#endif
+#ifdef SQLITE_DEBUG
+  /* The next group of variables are used to track whether or not the
+  ** transaction counter in bytes 24-27 of database files are updated
+  ** whenever any part of the database changes.  An assertion fault will
+  ** occur if a file is updated without also updating the transaction
+  ** counter.  This test is made to avoid new problems similar to the
+  ** one described by ticket #3584. 
+  */
+  unsigned char transCntrChng;   /* True if the transaction counter changed */
+  unsigned char dbUpdate;        /* True if any part of database file changed */
+  unsigned char inNormalWrite;   /* True if in a normal write operation */
+
+#endif
+
+#ifdef SQLITE_TEST
+  /* In test mode, increase the size of this structure a bit so that 
+  ** it is larger than the struct CrashFile defined in test6.c.
+  */
+  char aPadding[32];
+#endif
+};
+
+/* This variable holds the process id (pid) from when the xRandomness()
+** method was called.  If xOpen() is called from a different process id,
+** indicating that a fork() has occurred, the PRNG will be reset.
+*/
+static int randomnessPid = 0;
+
+/*
+** Allowed values for the unixFile.ctrlFlags bitmask:
+*/
+#define UNIXFILE_EXCL        0x01     /* Connections from one process only */
+#define UNIXFILE_RDONLY      0x02     /* Connection is read only */
+#define UNIXFILE_PERSIST_WAL 0x04     /* Persistent WAL mode */
+#ifndef SQLITE_DISABLE_DIRSYNC
+# define UNIXFILE_DIRSYNC    0x08     /* Directory sync needed */
+#else
+# define UNIXFILE_DIRSYNC    0x00
+#endif
+#define UNIXFILE_PSOW        0x10     /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
+#define UNIXFILE_DELETE      0x20     /* Delete on close */
+#define UNIXFILE_URI         0x40     /* Filename might have query parameters */
+#define UNIXFILE_NOLOCK      0x80     /* Do no file locking */
+#define UNIXFILE_WARNED    0x0100     /* verifyDbFile() warnings have been issued */
+
+/*
+** Include code that is common to all os_*.c files
+*/
+/************** Include os_common.h in the middle of os_unix.c ***************/
+/************** Begin file os_common.h ***************************************/
+/*
+** 2004 May 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains macros and a little bit of code that is common to
+** all of the platform-specific files (os_*.c) and is #included into those
+** files.
+**
+** This file should be #included by the os_*.c files only.  It is not a
+** general purpose header file.
+*/
+#ifndef _OS_COMMON_H_
+#define _OS_COMMON_H_
+
+/*
+** At least two bugs have slipped in because we changed the MEMORY_DEBUG
+** macro to SQLITE_DEBUG and some older makefiles have not yet made the
+** switch.  The following code should catch this problem at compile-time.
+*/
+#ifdef MEMORY_DEBUG
+# error "The MEMORY_DEBUG macro is obsolete.  Use SQLITE_DEBUG instead."
+#endif
+
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+# ifndef SQLITE_DEBUG_OS_TRACE
+#   define SQLITE_DEBUG_OS_TRACE 0
+# endif
+  int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE;
+# define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
+#else
+# define OSTRACE(X)
+#endif
+
+/*
+** Macros for performance tracing.  Normally turned off.  Only works
+** on i486 hardware.
+*/
+#ifdef SQLITE_PERFORMANCE_TRACE
+
+/* 
+** hwtime.h contains inline assembler code for implementing 
+** high-performance timing routines.
+*/
+/************** Include hwtime.h in the middle of os_common.h ****************/
+/************** Begin file hwtime.h ******************************************/
+/*
+** 2008 May 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains inline asm code for retrieving "high-performance"
+** counters for x86 class CPUs.
+*/
+#ifndef _HWTIME_H_
+#define _HWTIME_H_
+
+/*
+** The following routine only works on pentium-class (or newer) processors.
+** It uses the RDTSC opcode to read the cycle count value out of the
+** processor and returns that value.  This can be used for high-res
+** profiling.
+*/
+#if (defined(__GNUC__) || defined(_MSC_VER)) && \
+      (defined(i386) || defined(__i386__) || defined(_M_IX86))
+
+  #if defined(__GNUC__)
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+     unsigned int lo, hi;
+     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+     return (sqlite_uint64)hi << 32 | lo;
+  }
+
+  #elif defined(_MSC_VER)
+
+  __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){
+     __asm {
+        rdtsc
+        ret       ; return value at EDX:EAX
+     }
+  }
+
+  #endif
+
+#elif (defined(__GNUC__) && defined(__x86_64__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long val;
+      __asm__ __volatile__ ("rdtsc" : "=A" (val));
+      return val;
+  }
+ 
+#elif (defined(__GNUC__) && defined(__ppc__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long long retval;
+      unsigned long junk;
+      __asm__ __volatile__ ("\n\
+          1:      mftbu   %1\n\
+                  mftb    %L0\n\
+                  mftbu   %0\n\
+                  cmpw    %0,%1\n\
+                  bne     1b"
+                  : "=r" (retval), "=r" (junk));
+      return retval;
+  }
+
+#else
+
+  #error Need implementation of sqlite3Hwtime() for your platform.
+
+  /*
+  ** To compile without implementing sqlite3Hwtime() for your platform,
+  ** you can remove the above #error and use the following
+  ** stub function.  You will lose timing support for many
+  ** of the debugging and testing utilities, but it should at
+  ** least compile and run.
+  */
+SQLITE_PRIVATE   sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); }
+
+#endif
+
+#endif /* !defined(_HWTIME_H_) */
+
+/************** End of hwtime.h **********************************************/
+/************** Continuing where we left off in os_common.h ******************/
+
+static sqlite_uint64 g_start;
+static sqlite_uint64 g_elapsed;
+#define TIMER_START       g_start=sqlite3Hwtime()
+#define TIMER_END         g_elapsed=sqlite3Hwtime()-g_start
+#define TIMER_ELAPSED     g_elapsed
+#else
+#define TIMER_START
+#define TIMER_END
+#define TIMER_ELAPSED     ((sqlite_uint64)0)
+#endif
+
+/*
+** If we compile with the SQLITE_TEST macro set, then the following block
+** of code will give us the ability to simulate a disk I/O error.  This
+** is used for testing the I/O recovery logic.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_io_error_hit = 0;            /* Total number of I/O Errors */
+SQLITE_API int sqlite3_io_error_hardhit = 0;        /* Number of non-benign errors */
+SQLITE_API int sqlite3_io_error_pending = 0;        /* Count down to first I/O error */
+SQLITE_API int sqlite3_io_error_persist = 0;        /* True if I/O errors persist */
+SQLITE_API int sqlite3_io_error_benign = 0;         /* True if errors are benign */
+SQLITE_API int sqlite3_diskfull_pending = 0;
+SQLITE_API int sqlite3_diskfull = 0;
+#define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X)
+#define SimulateIOError(CODE)  \
+  if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \
+       || sqlite3_io_error_pending-- == 1 )  \
+              { local_ioerr(); CODE; }
+static void local_ioerr(){
+  IOTRACE(("IOERR\n"));
+  sqlite3_io_error_hit++;
+  if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++;
+}
+#define SimulateDiskfullError(CODE) \
+   if( sqlite3_diskfull_pending ){ \
+     if( sqlite3_diskfull_pending == 1 ){ \
+       local_ioerr(); \
+       sqlite3_diskfull = 1; \
+       sqlite3_io_error_hit = 1; \
+       CODE; \
+     }else{ \
+       sqlite3_diskfull_pending--; \
+     } \
+   }
+#else
+#define SimulateIOErrorBenign(X)
+#define SimulateIOError(A)
+#define SimulateDiskfullError(A)
+#endif
+
+/*
+** When testing, keep a count of the number of open files.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_open_file_count = 0;
+#define OpenCounter(X)  sqlite3_open_file_count+=(X)
+#else
+#define OpenCounter(X)
+#endif
+
+#endif /* !defined(_OS_COMMON_H_) */
+
+/************** End of os_common.h *******************************************/
+/************** Continuing where we left off in os_unix.c ********************/
+
+/*
+** Define various macros that are missing from some systems.
+*/
+#ifndef O_LARGEFILE
+# define O_LARGEFILE 0
+#endif
+#ifdef SQLITE_DISABLE_LFS
+# undef O_LARGEFILE
+# define O_LARGEFILE 0
+#endif
+#ifndef O_NOFOLLOW
+# define O_NOFOLLOW 0
+#endif
+#ifndef O_BINARY
+# define O_BINARY 0
+#endif
+
+/*
+** The threadid macro resolves to the thread-id or to 0.  Used for
+** testing and debugging only.
+*/
+#if SQLITE_THREADSAFE
+#define threadid pthread_self()
+#else
+#define threadid 0
+#endif
+
+/*
+** HAVE_MREMAP defaults to true on Linux and false everywhere else.
+*/
+#if !defined(HAVE_MREMAP)
+# if defined(__linux__) && defined(_GNU_SOURCE)
+#  define HAVE_MREMAP 1
+# else
+#  define HAVE_MREMAP 0
+# endif
+#endif
+
+/*
+** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()
+** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.
+*/
+#ifdef __ANDROID__
+# define lseek lseek64
+#endif
+
+/*
+** Different Unix systems declare open() in different ways.  Same use
+** open(const char*,int,mode_t).  Others use open(const char*,int,...).
+** The difference is important when using a pointer to the function.
+**
+** The safest way to deal with the problem is to always use this wrapper
+** which always has the same well-defined interface.
+*/
+static int posixOpen(const char *zFile, int flags, int mode){
+  return open(zFile, flags, mode);
+}
+
+/*
+** On some systems, calls to fchown() will trigger a message in a security
+** log if they come from non-root processes.  So avoid calling fchown() if
+** we are not running as root.
+*/
+static int posixFchown(int fd, uid_t uid, gid_t gid){
+#if OS_VXWORKS
+  return 0;
+#else
+  return geteuid() ? 0 : fchown(fd,uid,gid);
+#endif
+}
+
+/* Forward reference */
+static int openDirectory(const char*, int*);
+static int unixGetpagesize(void);
+
+/*
+** Many system calls are accessed through pointer-to-functions so that
+** they may be overridden at runtime to facilitate fault injection during
+** testing and sandboxing.  The following array holds the names and pointers
+** to all overrideable system calls.
+*/
+static struct unix_syscall {
+  const char *zName;            /* Name of the system call */
+  sqlite3_syscall_ptr pCurrent; /* Current value of the system call */
+  sqlite3_syscall_ptr pDefault; /* Default value */
+} aSyscall[] = {
+  { "open",         (sqlite3_syscall_ptr)posixOpen,  0  },
+#define osOpen      ((int(*)(const char*,int,int))aSyscall[0].pCurrent)
+
+  { "close",        (sqlite3_syscall_ptr)close,      0  },
+#define osClose     ((int(*)(int))aSyscall[1].pCurrent)
+
+  { "access",       (sqlite3_syscall_ptr)access,     0  },
+#define osAccess    ((int(*)(const char*,int))aSyscall[2].pCurrent)
+
+  { "getcwd",       (sqlite3_syscall_ptr)getcwd,     0  },
+#define osGetcwd    ((char*(*)(char*,size_t))aSyscall[3].pCurrent)
+
+  { "stat",         (sqlite3_syscall_ptr)stat,       0  },
+#define osStat      ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent)
+
+/*
+** The DJGPP compiler environment looks mostly like Unix, but it
+** lacks the fcntl() system call.  So redefine fcntl() to be something
+** that always succeeds.  This means that locking does not occur under
+** DJGPP.  But it is DOS - what did you expect?
+*/
+#ifdef __DJGPP__
+  { "fstat",        0,                 0  },
+#define osFstat(a,b,c)    0
+#else     
+  { "fstat",        (sqlite3_syscall_ptr)fstat,      0  },
+#define osFstat     ((int(*)(int,struct stat*))aSyscall[5].pCurrent)
+#endif
+
+  { "ftruncate",    (sqlite3_syscall_ptr)ftruncate,  0  },
+#define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent)
+
+  { "fcntl",        (sqlite3_syscall_ptr)fcntl,      0  },
+#define osFcntl     ((int(*)(int,int,...))aSyscall[7].pCurrent)
+
+  { "read",         (sqlite3_syscall_ptr)read,       0  },
+#define osRead      ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent)
+
+#if defined(USE_PREAD) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)
+  { "pread",        (sqlite3_syscall_ptr)pread,      0  },
+#else
+  { "pread",        (sqlite3_syscall_ptr)0,          0  },
+#endif
+#define osPread     ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent)
+
+#if defined(USE_PREAD64)
+  { "pread64",      (sqlite3_syscall_ptr)pread64,    0  },
+#else
+  { "pread64",      (sqlite3_syscall_ptr)0,          0  },
+#endif
+#define osPread64   ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[10].pCurrent)
+
+  { "write",        (sqlite3_syscall_ptr)write,      0  },
+#define osWrite     ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent)
+
+#if defined(USE_PREAD) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)
+  { "pwrite",       (sqlite3_syscall_ptr)pwrite,     0  },
+#else
+  { "pwrite",       (sqlite3_syscall_ptr)0,          0  },
+#endif
+#define osPwrite    ((ssize_t(*)(int,const void*,size_t,off_t))\
+                    aSyscall[12].pCurrent)
+
+#if defined(USE_PREAD64)
+  { "pwrite64",     (sqlite3_syscall_ptr)pwrite64,   0  },
+#else
+  { "pwrite64",     (sqlite3_syscall_ptr)0,          0  },
+#endif
+#define osPwrite64  ((ssize_t(*)(int,const void*,size_t,off_t))\
+                    aSyscall[13].pCurrent)
+
+  { "fchmod",       (sqlite3_syscall_ptr)fchmod,     0  },
+#define osFchmod    ((int(*)(int,mode_t))aSyscall[14].pCurrent)
+
+#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
+  { "fallocate",    (sqlite3_syscall_ptr)posix_fallocate,  0 },
+#else
+  { "fallocate",    (sqlite3_syscall_ptr)0,                0 },
+#endif
+#define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)
+
+  { "unlink",       (sqlite3_syscall_ptr)unlink,           0 },
+#define osUnlink    ((int(*)(const char*))aSyscall[16].pCurrent)
+
+  { "openDirectory",    (sqlite3_syscall_ptr)openDirectory,      0 },
+#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
+
+  { "mkdir",        (sqlite3_syscall_ptr)mkdir,           0 },
+#define osMkdir     ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
+
+  { "rmdir",        (sqlite3_syscall_ptr)rmdir,           0 },
+#define osRmdir     ((int(*)(const char*))aSyscall[19].pCurrent)
+
+  { "fchown",       (sqlite3_syscall_ptr)posixFchown,     0 },
+#define osFchown    ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
+
+#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+  { "mmap",       (sqlite3_syscall_ptr)mmap,     0 },
+#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)
+
+  { "munmap",       (sqlite3_syscall_ptr)munmap,          0 },
+#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent)
+
+#if HAVE_MREMAP
+  { "mremap",       (sqlite3_syscall_ptr)mremap,          0 },
+#else
+  { "mremap",       (sqlite3_syscall_ptr)0,               0 },
+#endif
+#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)
+  { "getpagesize",  (sqlite3_syscall_ptr)unixGetpagesize, 0 },
+#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent)
+
+#endif
+
+}; /* End of the overrideable system calls */
+
+/*
+** This is the xSetSystemCall() method of sqlite3_vfs for all of the
+** "unix" VFSes.  Return SQLITE_OK opon successfully updating the
+** system call pointer, or SQLITE_NOTFOUND if there is no configurable
+** system call named zName.
+*/
+static int unixSetSystemCall(
+  sqlite3_vfs *pNotUsed,        /* The VFS pointer.  Not used */
+  const char *zName,            /* Name of system call to override */
+  sqlite3_syscall_ptr pNewFunc  /* Pointer to new system call value */
+){
+  unsigned int i;
+  int rc = SQLITE_NOTFOUND;
+
+  UNUSED_PARAMETER(pNotUsed);
+  if( zName==0 ){
+    /* If no zName is given, restore all system calls to their default
+    ** settings and return NULL
+    */
+    rc = SQLITE_OK;
+    for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
+      if( aSyscall[i].pDefault ){
+        aSyscall[i].pCurrent = aSyscall[i].pDefault;
+      }
+    }
+  }else{
+    /* If zName is specified, operate on only the one system call
+    ** specified.
+    */
+    for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
+      if( strcmp(zName, aSyscall[i].zName)==0 ){
+        if( aSyscall[i].pDefault==0 ){
+          aSyscall[i].pDefault = aSyscall[i].pCurrent;
+        }
+        rc = SQLITE_OK;
+        if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault;
+        aSyscall[i].pCurrent = pNewFunc;
+        break;
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** Return the value of a system call.  Return NULL if zName is not a
+** recognized system call name.  NULL is also returned if the system call
+** is currently undefined.
+*/
+static sqlite3_syscall_ptr unixGetSystemCall(
+  sqlite3_vfs *pNotUsed,
+  const char *zName
+){
+  unsigned int i;
+
+  UNUSED_PARAMETER(pNotUsed);
+  for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
+    if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent;
+  }
+  return 0;
+}
+
+/*
+** Return the name of the first system call after zName.  If zName==NULL
+** then return the name of the first system call.  Return NULL if zName
+** is the last system call or if zName is not the name of a valid
+** system call.
+*/
+static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){
+  int i = -1;
+
+  UNUSED_PARAMETER(p);
+  if( zName ){
+    for(i=0; i<ArraySize(aSyscall)-1; i++){
+      if( strcmp(zName, aSyscall[i].zName)==0 ) break;
+    }
+  }
+  for(i++; i<ArraySize(aSyscall); i++){
+    if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName;
+  }
+  return 0;
+}
+
+/*
+** Do not accept any file descriptor less than this value, in order to avoid
+** opening database file using file descriptors that are commonly used for 
+** standard input, output, and error.
+*/
+#ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR
+# define SQLITE_MINIMUM_FILE_DESCRIPTOR 3
+#endif
+
+/*
+** Invoke open().  Do so multiple times, until it either succeeds or
+** fails for some reason other than EINTR.
+**
+** If the file creation mode "m" is 0 then set it to the default for
+** SQLite.  The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally
+** 0644) as modified by the system umask.  If m is not 0, then
+** make the file creation mode be exactly m ignoring the umask.
+**
+** The m parameter will be non-zero only when creating -wal, -journal,
+** and -shm files.  We want those files to have *exactly* the same
+** permissions as their original database, unadulterated by the umask.
+** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a
+** transaction crashes and leaves behind hot journals, then any
+** process that is able to write to the database will also be able to
+** recover the hot journals.
+*/
+static int robust_open(const char *z, int f, mode_t m){
+  int fd;
+  mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS;
+  while(1){
+#if defined(O_CLOEXEC)
+    fd = osOpen(z,f|O_CLOEXEC,m2);
+#else
+    fd = osOpen(z,f,m2);
+#endif
+    if( fd<0 ){
+      if( errno==EINTR ) continue;
+      break;
+    }
+    if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;
+    osClose(fd);
+    sqlite3_log(SQLITE_WARNING, 
+                "attempt to open \"%s\" as file descriptor %d", z, fd);
+    fd = -1;
+    if( osOpen("/dev/null", f, m)<0 ) break;
+  }
+  if( fd>=0 ){
+    if( m!=0 ){
+      struct stat statbuf;
+      if( osFstat(fd, &statbuf)==0 
+       && statbuf.st_size==0
+       && (statbuf.st_mode&0777)!=m 
+      ){
+        osFchmod(fd, m);
+      }
+    }
+#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0)
+    osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
+#endif
+  }
+  return fd;
+}
+
+/*
+** Helper functions to obtain and relinquish the global mutex. The
+** global mutex is used to protect the unixInodeInfo and
+** vxworksFileId objects used by this file, all of which may be 
+** shared by multiple threads.
+**
+** Function unixMutexHeld() is used to assert() that the global mutex 
+** is held when required. This function is only used as part of assert() 
+** statements. e.g.
+**
+**   unixEnterMutex()
+**     assert( unixMutexHeld() );
+**   unixEnterLeave()
+*/
+static void unixEnterMutex(void){
+  sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+}
+static void unixLeaveMutex(void){
+  sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+}
+#ifdef SQLITE_DEBUG
+static int unixMutexHeld(void) {
+  return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+}
+#endif
+
+
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+/*
+** Helper function for printing out trace information from debugging
+** binaries. This returns the string representation of the supplied
+** integer lock-type.
+*/
+static const char *azFileLock(int eFileLock){
+  switch( eFileLock ){
+    case NO_LOCK: return "NONE";
+    case SHARED_LOCK: return "SHARED";
+    case RESERVED_LOCK: return "RESERVED";
+    case PENDING_LOCK: return "PENDING";
+    case EXCLUSIVE_LOCK: return "EXCLUSIVE";
+  }
+  return "ERROR";
+}
+#endif
+
+#ifdef SQLITE_LOCK_TRACE
+/*
+** Print out information about all locking operations.
+**
+** This routine is used for troubleshooting locks on multithreaded
+** platforms.  Enable by compiling with the -DSQLITE_LOCK_TRACE
+** command-line option on the compiler.  This code is normally
+** turned off.
+*/
+static int lockTrace(int fd, int op, struct flock *p){
+  char *zOpName, *zType;
+  int s;
+  int savedErrno;
+  if( op==F_GETLK ){
+    zOpName = "GETLK";
+  }else if( op==F_SETLK ){
+    zOpName = "SETLK";
+  }else{
+    s = osFcntl(fd, op, p);
+    sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
+    return s;
+  }
+  if( p->l_type==F_RDLCK ){
+    zType = "RDLCK";
+  }else if( p->l_type==F_WRLCK ){
+    zType = "WRLCK";
+  }else if( p->l_type==F_UNLCK ){
+    zType = "UNLCK";
+  }else{
+    assert( 0 );
+  }
+  assert( p->l_whence==SEEK_SET );
+  s = osFcntl(fd, op, p);
+  savedErrno = errno;
+  sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
+     threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
+     (int)p->l_pid, s);
+  if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
+    struct flock l2;
+    l2 = *p;
+    osFcntl(fd, F_GETLK, &l2);
+    if( l2.l_type==F_RDLCK ){
+      zType = "RDLCK";
+    }else if( l2.l_type==F_WRLCK ){
+      zType = "WRLCK";
+    }else if( l2.l_type==F_UNLCK ){
+      zType = "UNLCK";
+    }else{
+      assert( 0 );
+    }
+    sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
+       zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
+  }
+  errno = savedErrno;
+  return s;
+}
+#undef osFcntl
+#define osFcntl lockTrace
+#endif /* SQLITE_LOCK_TRACE */
+
+/*
+** Retry ftruncate() calls that fail due to EINTR
+**
+** All calls to ftruncate() within this file should be made through this wrapper.
+** On the Android platform, bypassing the logic below could lead to a corrupt
+** database.
+*/
+static int robust_ftruncate(int h, sqlite3_int64 sz){
+  int rc;
+#ifdef __ANDROID__
+  /* On Android, ftruncate() always uses 32-bit offsets, even if 
+  ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to
+  ** truncate a file to any size larger than 2GiB. Silently ignore any
+  ** such attempts.  */
+  if( sz>(sqlite3_int64)0x7FFFFFFF ){
+    rc = SQLITE_OK;
+  }else
+#endif
+  do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR );
+  return rc;
+}
+
+/*
+** This routine translates a standard POSIX errno code into something
+** useful to the clients of the sqlite3 functions.  Specifically, it is
+** intended to translate a variety of "try again" errors into SQLITE_BUSY
+** and a variety of "please close the file descriptor NOW" errors into 
+** SQLITE_IOERR
+** 
+** Errors during initialization of locks, or file system support for locks,
+** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
+*/
+static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
+  switch (posixError) {
+#if 0
+  /* At one point this code was not commented out. In theory, this branch
+  ** should never be hit, as this function should only be called after
+  ** a locking-related function (i.e. fcntl()) has returned non-zero with
+  ** the value of errno as the first argument. Since a system call has failed,
+  ** errno should be non-zero.
+  **
+  ** Despite this, if errno really is zero, we still don't want to return
+  ** SQLITE_OK. The system call failed, and *some* SQLite error should be
+  ** propagated back to the caller. Commenting this branch out means errno==0
+  ** will be handled by the "default:" case below.
+  */
+  case 0: 
+    return SQLITE_OK;
+#endif
+
+  case EAGAIN:
+  case ETIMEDOUT:
+  case EBUSY:
+  case EINTR:
+  case ENOLCK:  
+    /* random NFS retry error, unless during file system support 
+     * introspection, in which it actually means what it says */
+    return SQLITE_BUSY;
+    
+  case EACCES: 
+    /* EACCES is like EAGAIN during locking operations, but not any other time*/
+    if( (sqliteIOErr == SQLITE_IOERR_LOCK) || 
+        (sqliteIOErr == SQLITE_IOERR_UNLOCK) || 
+        (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
+        (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
+      return SQLITE_BUSY;
+    }
+    /* else fall through */
+  case EPERM: 
+    return SQLITE_PERM;
+    
+#if EOPNOTSUPP!=ENOTSUP
+  case EOPNOTSUPP: 
+    /* something went terribly awry, unless during file system support 
+     * introspection, in which it actually means what it says */
+#endif
+#ifdef ENOTSUP
+  case ENOTSUP: 
+    /* invalid fd, unless during file system support introspection, in which 
+     * it actually means what it says */
+#endif
+  case EIO:
+  case EBADF:
+  case EINVAL:
+  case ENOTCONN:
+  case ENODEV:
+  case ENXIO:
+  case ENOENT:
+#ifdef ESTALE                     /* ESTALE is not defined on Interix systems */
+  case ESTALE:
+#endif
+  case ENOSYS:
+    /* these should force the client to close the file and reconnect */
+    
+  default: 
+    return sqliteIOErr;
+  }
+}
+
+
+/******************************************************************************
+****************** Begin Unique File ID Utility Used By VxWorks ***************
+**
+** On most versions of unix, we can get a unique ID for a file by concatenating
+** the device number and the inode number.  But this does not work on VxWorks.
+** On VxWorks, a unique file id must be based on the canonical filename.
+**
+** A pointer to an instance of the following structure can be used as a
+** unique file ID in VxWorks.  Each instance of this structure contains
+** a copy of the canonical filename.  There is also a reference count.  
+** The structure is reclaimed when the number of pointers to it drops to
+** zero.
+**
+** There are never very many files open at one time and lookups are not
+** a performance-critical path, so it is sufficient to put these
+** structures on a linked list.
+*/
+struct vxworksFileId {
+  struct vxworksFileId *pNext;  /* Next in a list of them all */
+  int nRef;                     /* Number of references to this one */
+  int nName;                    /* Length of the zCanonicalName[] string */
+  char *zCanonicalName;         /* Canonical filename */
+};
+
+#if OS_VXWORKS
+/* 
+** All unique filenames are held on a linked list headed by this
+** variable:
+*/
+static struct vxworksFileId *vxworksFileList = 0;
+
+/*
+** Simplify a filename into its canonical form
+** by making the following changes:
+**
+**  * removing any trailing and duplicate /
+**  * convert /./ into just /
+**  * convert /A/../ where A is any simple name into just /
+**
+** Changes are made in-place.  Return the new name length.
+**
+** The original filename is in z[0..n-1].  Return the number of
+** characters in the simplified name.
+*/
+static int vxworksSimplifyName(char *z, int n){
+  int i, j;
+  while( n>1 && z[n-1]=='/' ){ n--; }
+  for(i=j=0; i<n; i++){
+    if( z[i]=='/' ){
+      if( z[i+1]=='/' ) continue;
+      if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
+        i += 1;
+        continue;
+      }
+      if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
+        while( j>0 && z[j-1]!='/' ){ j--; }
+        if( j>0 ){ j--; }
+        i += 2;
+        continue;
+      }
+    }
+    z[j++] = z[i];
+  }
+  z[j] = 0;
+  return j;
+}
+
+/*
+** Find a unique file ID for the given absolute pathname.  Return
+** a pointer to the vxworksFileId object.  This pointer is the unique
+** file ID.
+**
+** The nRef field of the vxworksFileId object is incremented before
+** the object is returned.  A new vxworksFileId object is created
+** and added to the global list if necessary.
+**
+** If a memory allocation error occurs, return NULL.
+*/
+static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
+  struct vxworksFileId *pNew;         /* search key and new file ID */
+  struct vxworksFileId *pCandidate;   /* For looping over existing file IDs */
+  int n;                              /* Length of zAbsoluteName string */
+
+  assert( zAbsoluteName[0]=='/' );
+  n = (int)strlen(zAbsoluteName);
+  pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) );
+  if( pNew==0 ) return 0;
+  pNew->zCanonicalName = (char*)&pNew[1];
+  memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
+  n = vxworksSimplifyName(pNew->zCanonicalName, n);
+
+  /* Search for an existing entry that matching the canonical name.
+  ** If found, increment the reference count and return a pointer to
+  ** the existing file ID.
+  */
+  unixEnterMutex();
+  for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
+    if( pCandidate->nName==n 
+     && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
+    ){
+       sqlite3_free(pNew);
+       pCandidate->nRef++;
+       unixLeaveMutex();
+       return pCandidate;
+    }
+  }
+
+  /* No match was found.  We will make a new file ID */
+  pNew->nRef = 1;
+  pNew->nName = n;
+  pNew->pNext = vxworksFileList;
+  vxworksFileList = pNew;
+  unixLeaveMutex();
+  return pNew;
+}
+
+/*
+** Decrement the reference count on a vxworksFileId object.  Free
+** the object when the reference count reaches zero.
+*/
+static void vxworksReleaseFileId(struct vxworksFileId *pId){
+  unixEnterMutex();
+  assert( pId->nRef>0 );
+  pId->nRef--;
+  if( pId->nRef==0 ){
+    struct vxworksFileId **pp;
+    for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
+    assert( *pp==pId );
+    *pp = pId->pNext;
+    sqlite3_free(pId);
+  }
+  unixLeaveMutex();
+}
+#endif /* OS_VXWORKS */
+/*************** End of Unique File ID Utility Used By VxWorks ****************
+******************************************************************************/
+
+
+/******************************************************************************
+*************************** Posix Advisory Locking ****************************
+**
+** POSIX advisory locks are broken by design.  ANSI STD 1003.1 (1996)
+** section 6.5.2.2 lines 483 through 490 specify that when a process
+** sets or clears a lock, that operation overrides any prior locks set
+** by the same process.  It does not explicitly say so, but this implies
+** that it overrides locks set by the same process using a different
+** file descriptor.  Consider this test case:
+**
+**       int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
+**       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
+**
+** Suppose ./file1 and ./file2 are really the same file (because
+** one is a hard or symbolic link to the other) then if you set
+** an exclusive lock on fd1, then try to get an exclusive lock
+** on fd2, it works.  I would have expected the second lock to
+** fail since there was already a lock on the file due to fd1.
+** But not so.  Since both locks came from the same process, the
+** second overrides the first, even though they were on different
+** file descriptors opened on different file names.
+**
+** This means that we cannot use POSIX locks to synchronize file access
+** among competing threads of the same process.  POSIX locks will work fine
+** to synchronize access for threads in separate processes, but not
+** threads within the same process.
+**
+** To work around the problem, SQLite has to manage file locks internally
+** on its own.  Whenever a new database is opened, we have to find the
+** specific inode of the database file (the inode is determined by the
+** st_dev and st_ino fields of the stat structure that fstat() fills in)
+** and check for locks already existing on that inode.  When locks are
+** created or removed, we have to look at our own internal record of the
+** locks to see if another thread has previously set a lock on that same
+** inode.
+**
+** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.
+** For VxWorks, we have to use the alternative unique ID system based on
+** canonical filename and implemented in the previous division.)
+**
+** The sqlite3_file structure for POSIX is no longer just an integer file
+** descriptor.  It is now a structure that holds the integer file
+** descriptor and a pointer to a structure that describes the internal
+** locks on the corresponding inode.  There is one locking structure
+** per inode, so if the same inode is opened twice, both unixFile structures
+** point to the same locking structure.  The locking structure keeps
+** a reference count (so we will know when to delete it) and a "cnt"
+** field that tells us its internal lock status.  cnt==0 means the
+** file is unlocked.  cnt==-1 means the file has an exclusive lock.
+** cnt>0 means there are cnt shared locks on the file.
+**
+** Any attempt to lock or unlock a file first checks the locking
+** structure.  The fcntl() system call is only invoked to set a 
+** POSIX lock if the internal lock structure transitions between
+** a locked and an unlocked state.
+**
+** But wait:  there are yet more problems with POSIX advisory locks.
+**
+** If you close a file descriptor that points to a file that has locks,
+** all locks on that file that are owned by the current process are
+** released.  To work around this problem, each unixInodeInfo object
+** maintains a count of the number of pending locks on tha inode.
+** When an attempt is made to close an unixFile, if there are
+** other unixFile open on the same inode that are holding locks, the call
+** to close() the file descriptor is deferred until all of the locks clear.
+** The unixInodeInfo structure keeps a list of file descriptors that need to
+** be closed and that list is walked (and cleared) when the last lock
+** clears.
+**
+** Yet another problem:  LinuxThreads do not play well with posix locks.
+**
+** Many older versions of linux use the LinuxThreads library which is
+** not posix compliant.  Under LinuxThreads, a lock created by thread
+** A cannot be modified or overridden by a different thread B.
+** Only thread A can modify the lock.  Locking behavior is correct
+** if the appliation uses the newer Native Posix Thread Library (NPTL)
+** on linux - with NPTL a lock created by thread A can override locks
+** in thread B.  But there is no way to know at compile-time which
+** threading library is being used.  So there is no way to know at
+** compile-time whether or not thread A can override locks on thread B.
+** One has to do a run-time check to discover the behavior of the
+** current process.
+**
+** SQLite used to support LinuxThreads.  But support for LinuxThreads
+** was dropped beginning with version 3.7.0.  SQLite will still work with
+** LinuxThreads provided that (1) there is no more than one connection 
+** per database file in the same process and (2) database connections
+** do not move across threads.
+*/
+
+/*
+** An instance of the following structure serves as the key used
+** to locate a particular unixInodeInfo object.
+*/
+struct unixFileId {
+  dev_t dev;                  /* Device number */
+#if OS_VXWORKS
+  struct vxworksFileId *pId;  /* Unique file ID for vxworks. */
+#else
+  ino_t ino;                  /* Inode number */
+#endif
+};
+
+/*
+** An instance of the following structure is allocated for each open
+** inode.  Or, on LinuxThreads, there is one of these structures for
+** each inode opened by each thread.
+**
+** A single inode can have multiple file descriptors, so each unixFile
+** structure contains a pointer to an instance of this object and this
+** object keeps a count of the number of unixFile pointing to it.
+*/
+struct unixInodeInfo {
+  struct unixFileId fileId;       /* The lookup key */
+  int nShared;                    /* Number of SHARED locks held */
+  unsigned char eFileLock;        /* One of SHARED_LOCK, RESERVED_LOCK etc. */
+  unsigned char bProcessLock;     /* An exclusive process lock is held */
+  int nRef;                       /* Number of pointers to this structure */
+  unixShmNode *pShmNode;          /* Shared memory associated with this inode */
+  int nLock;                      /* Number of outstanding file locks */
+  UnixUnusedFd *pUnused;          /* Unused file descriptors to close */
+  unixInodeInfo *pNext;           /* List of all unixInodeInfo objects */
+  unixInodeInfo *pPrev;           /*    .... doubly linked */
+#if SQLITE_ENABLE_LOCKING_STYLE
+  unsigned long long sharedByte;  /* for AFP simulated shared lock */
+#endif
+#if OS_VXWORKS
+  sem_t *pSem;                    /* Named POSIX semaphore */
+  char aSemName[MAX_PATHNAME+2];  /* Name of that semaphore */
+#endif
+};
+
+/*
+** A lists of all unixInodeInfo objects.
+*/
+static unixInodeInfo *inodeList = 0;
+
+/*
+**
+** This function - unixLogError_x(), is only ever called via the macro
+** unixLogError().
+**
+** It is invoked after an error occurs in an OS function and errno has been
+** set. It logs a message using sqlite3_log() containing the current value of
+** errno and, if possible, the human-readable equivalent from strerror() or
+** strerror_r().
+**
+** The first argument passed to the macro should be the error code that
+** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). 
+** The two subsequent arguments should be the name of the OS function that
+** failed (e.g. "unlink", "open") and the associated file-system path,
+** if any.
+*/
+#define unixLogError(a,b,c)     unixLogErrorAtLine(a,b,c,__LINE__)
+static int unixLogErrorAtLine(
+  int errcode,                    /* SQLite error code */
+  const char *zFunc,              /* Name of OS function that failed */
+  const char *zPath,              /* File path associated with error */
+  int iLine                       /* Source line number where error occurred */
+){
+  char *zErr;                     /* Message from strerror() or equivalent */
+  int iErrno = errno;             /* Saved syscall error number */
+
+  /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use
+  ** the strerror() function to obtain the human-readable error message
+  ** equivalent to errno. Otherwise, use strerror_r().
+  */ 
+#if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R)
+  char aErr[80];
+  memset(aErr, 0, sizeof(aErr));
+  zErr = aErr;
+
+  /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined,
+  ** assume that the system provides the GNU version of strerror_r() that
+  ** returns a pointer to a buffer containing the error message. That pointer 
+  ** may point to aErr[], or it may point to some static storage somewhere. 
+  ** Otherwise, assume that the system provides the POSIX version of 
+  ** strerror_r(), which always writes an error message into aErr[].
+  **
+  ** If the code incorrectly assumes that it is the POSIX version that is
+  ** available, the error message will often be an empty string. Not a
+  ** huge problem. Incorrectly concluding that the GNU version is available 
+  ** could lead to a segfault though.
+  */
+#if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU)
+  zErr = 
+# endif
+  strerror_r(iErrno, aErr, sizeof(aErr)-1);
+
+#elif SQLITE_THREADSAFE
+  /* This is a threadsafe build, but strerror_r() is not available. */
+  zErr = "";
+#else
+  /* Non-threadsafe build, use strerror(). */
+  zErr = strerror(iErrno);
+#endif
+
+  if( zPath==0 ) zPath = "";
+  sqlite3_log(errcode,
+      "os_unix.c:%d: (%d) %s(%s) - %s",
+      iLine, iErrno, zFunc, zPath, zErr
+  );
+
+  return errcode;
+}
+
+/*
+** Close a file descriptor.
+**
+** We assume that close() almost always works, since it is only in a
+** very sick application or on a very sick platform that it might fail.
+** If it does fail, simply leak the file descriptor, but do log the
+** error.
+**
+** Note that it is not safe to retry close() after EINTR since the
+** file descriptor might have already been reused by another thread.
+** So we don't even try to recover from an EINTR.  Just log the error
+** and move on.
+*/
+static void robust_close(unixFile *pFile, int h, int lineno){
+  if( osClose(h) ){
+    unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close",
+                       pFile ? pFile->zPath : 0, lineno);
+  }
+}
+
+/*
+** Close all file descriptors accumuated in the unixInodeInfo->pUnused list.
+*/ 
+static void closePendingFds(unixFile *pFile){
+  unixInodeInfo *pInode = pFile->pInode;
+  UnixUnusedFd *p;
+  UnixUnusedFd *pNext;
+  for(p=pInode->pUnused; p; p=pNext){
+    pNext = p->pNext;
+    robust_close(pFile, p->fd, __LINE__);
+    sqlite3_free(p);
+  }
+  pInode->pUnused = 0;
+}
+
+/*
+** Release a unixInodeInfo structure previously allocated by findInodeInfo().
+**
+** The mutex entered using the unixEnterMutex() function must be held
+** when this function is called.
+*/
+static void releaseInodeInfo(unixFile *pFile){
+  unixInodeInfo *pInode = pFile->pInode;
+  assert( unixMutexHeld() );
+  if( ALWAYS(pInode) ){
+    pInode->nRef--;
+    if( pInode->nRef==0 ){
+      assert( pInode->pShmNode==0 );
+      closePendingFds(pFile);
+      if( pInode->pPrev ){
+        assert( pInode->pPrev->pNext==pInode );
+        pInode->pPrev->pNext = pInode->pNext;
+      }else{
+        assert( inodeList==pInode );
+        inodeList = pInode->pNext;
+      }
+      if( pInode->pNext ){
+        assert( pInode->pNext->pPrev==pInode );
+        pInode->pNext->pPrev = pInode->pPrev;
+      }
+      sqlite3_free(pInode);
+    }
+  }
+}
+
+/*
+** Given a file descriptor, locate the unixInodeInfo object that
+** describes that file descriptor.  Create a new one if necessary.  The
+** return value might be uninitialized if an error occurs.
+**
+** The mutex entered using the unixEnterMutex() function must be held
+** when this function is called.
+**
+** Return an appropriate error code.
+*/
+static int findInodeInfo(
+  unixFile *pFile,               /* Unix file with file desc used in the key */
+  unixInodeInfo **ppInode        /* Return the unixInodeInfo object here */
+){
+  int rc;                        /* System call return code */
+  int fd;                        /* The file descriptor for pFile */
+  struct unixFileId fileId;      /* Lookup key for the unixInodeInfo */
+  struct stat statbuf;           /* Low-level file information */
+  unixInodeInfo *pInode = 0;     /* Candidate unixInodeInfo object */
+
+  assert( unixMutexHeld() );
+
+  /* Get low-level information about the file that we can used to
+  ** create a unique name for the file.
+  */
+  fd = pFile->h;
+  rc = osFstat(fd, &statbuf);
+  if( rc!=0 ){
+    pFile->lastErrno = errno;
+#ifdef EOVERFLOW
+    if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
+#endif
+    return SQLITE_IOERR;
+  }
+
+#ifdef __APPLE__
+  /* On OS X on an msdos filesystem, the inode number is reported
+  ** incorrectly for zero-size files.  See ticket #3260.  To work
+  ** around this problem (we consider it a bug in OS X, not SQLite)
+  ** we always increase the file size to 1 by writing a single byte
+  ** prior to accessing the inode number.  The one byte written is
+  ** an ASCII 'S' character which also happens to be the first byte
+  ** in the header of every SQLite database.  In this way, if there
+  ** is a race condition such that another thread has already populated
+  ** the first page of the database, no damage is done.
+  */
+  if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){
+    do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR );
+    if( rc!=1 ){
+      pFile->lastErrno = errno;
+      return SQLITE_IOERR;
+    }
+    rc = osFstat(fd, &statbuf);
+    if( rc!=0 ){
+      pFile->lastErrno = errno;
+      return SQLITE_IOERR;
+    }
+  }
+#endif
+
+  memset(&fileId, 0, sizeof(fileId));
+  fileId.dev = statbuf.st_dev;
+#if OS_VXWORKS
+  fileId.pId = pFile->pId;
+#else
+  fileId.ino = statbuf.st_ino;
+#endif
+  pInode = inodeList;
+  while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){
+    pInode = pInode->pNext;
+  }
+  if( pInode==0 ){
+    pInode = sqlite3_malloc( sizeof(*pInode) );
+    if( pInode==0 ){
+      return SQLITE_NOMEM;
+    }
+    memset(pInode, 0, sizeof(*pInode));
+    memcpy(&pInode->fileId, &fileId, sizeof(fileId));
+    pInode->nRef = 1;
+    pInode->pNext = inodeList;
+    pInode->pPrev = 0;
+    if( inodeList ) inodeList->pPrev = pInode;
+    inodeList = pInode;
+  }else{
+    pInode->nRef++;
+  }
+  *ppInode = pInode;
+  return SQLITE_OK;
+}
+
+/*
+** Return TRUE if pFile has been renamed or unlinked since it was first opened.
+*/
+static int fileHasMoved(unixFile *pFile){
+#if OS_VXWORKS
+  return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId;
+#else
+  struct stat buf;
+  return pFile->pInode!=0 &&
+      (osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino);
+#endif
+}
+
+
+/*
+** Check a unixFile that is a database.  Verify the following:
+**
+** (1) There is exactly one hard link on the file
+** (2) The file is not a symbolic link
+** (3) The file has not been renamed or unlinked
+**
+** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
+*/
+static void verifyDbFile(unixFile *pFile){
+  struct stat buf;
+  int rc;
+  if( pFile->ctrlFlags & UNIXFILE_WARNED ){
+    /* One or more of the following warnings have already been issued.  Do not
+    ** repeat them so as not to clutter the error log */
+    return;
+  }
+  rc = osFstat(pFile->h, &buf);
+  if( rc!=0 ){
+    sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
+    pFile->ctrlFlags |= UNIXFILE_WARNED;
+    return;
+  }
+  if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){
+    sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
+    pFile->ctrlFlags |= UNIXFILE_WARNED;
+    return;
+  }
+  if( buf.st_nlink>1 ){
+    sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
+    pFile->ctrlFlags |= UNIXFILE_WARNED;
+    return;
+  }
+  if( fileHasMoved(pFile) ){
+    sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);
+    pFile->ctrlFlags |= UNIXFILE_WARNED;
+    return;
+  }
+}
+
+
+/*
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, set *pResOut
+** to a non-zero value otherwise *pResOut is set to zero.  The return value
+** is set to SQLITE_OK unless an I/O error occurs during lock checking.
+*/
+static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
+  int rc = SQLITE_OK;
+  int reserved = 0;
+  unixFile *pFile = (unixFile*)id;
+
+  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+
+  assert( pFile );
+  unixEnterMutex(); /* Because pFile->pInode is shared across threads */
+
+  /* Check if a thread in this process holds such a lock */
+  if( pFile->pInode->eFileLock>SHARED_LOCK ){
+    reserved = 1;
+  }
+
+  /* Otherwise see if some other process holds it.
+  */
+#ifndef __DJGPP__
+  if( !reserved && !pFile->pInode->bProcessLock ){
+    struct flock lock;
+    lock.l_whence = SEEK_SET;
+    lock.l_start = RESERVED_BYTE;
+    lock.l_len = 1;
+    lock.l_type = F_WRLCK;
+    if( osFcntl(pFile->h, F_GETLK, &lock) ){
+      rc = SQLITE_IOERR_CHECKRESERVEDLOCK;
+      pFile->lastErrno = errno;
+    } else if( lock.l_type!=F_UNLCK ){
+      reserved = 1;
+    }
+  }
+#endif
+  
+  unixLeaveMutex();
+  OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved));
+
+  *pResOut = reserved;
+  return rc;
+}
+
+/*
+** Attempt to set a system-lock on the file pFile.  The lock is 
+** described by pLock.
+**
+** If the pFile was opened read/write from unix-excl, then the only lock
+** ever obtained is an exclusive lock, and it is obtained exactly once
+** the first time any lock is attempted.  All subsequent system locking
+** operations become no-ops.  Locking operations still happen internally,
+** in order to coordinate access between separate database connections
+** within this process, but all of that is handled in memory and the
+** operating system does not participate.
+**
+** This function is a pass-through to fcntl(F_SETLK) if pFile is using
+** any VFS other than "unix-excl" or if pFile is opened on "unix-excl"
+** and is read-only.
+**
+** Zero is returned if the call completes successfully, or -1 if a call
+** to fcntl() fails. In this case, errno is set appropriately (by fcntl()).
+*/
+static int unixFileLock(unixFile *pFile, struct flock *pLock){
+  int rc;
+  unixInodeInfo *pInode = pFile->pInode;
+  assert( unixMutexHeld() );
+  assert( pInode!=0 );
+  if( ((pFile->ctrlFlags & UNIXFILE_EXCL)!=0 || pInode->bProcessLock)
+   && ((pFile->ctrlFlags & UNIXFILE_RDONLY)==0)
+  ){
+    if( pInode->bProcessLock==0 ){
+      struct flock lock;
+      assert( pInode->nLock==0 );
+      lock.l_whence = SEEK_SET;
+      lock.l_start = SHARED_FIRST;
+      lock.l_len = SHARED_SIZE;
+      lock.l_type = F_WRLCK;
+      rc = osFcntl(pFile->h, F_SETLK, &lock);
+      if( rc<0 ) return rc;
+      pInode->bProcessLock = 1;
+      pInode->nLock++;
+    }else{
+      rc = 0;
+    }
+  }else{
+    rc = osFcntl(pFile->h, F_SETLK, pLock);
+  }
+  return rc;
+}
+
+/*
+** Lock the file with the lock specified by parameter eFileLock - one
+** of the following:
+**
+**     (1) SHARED_LOCK
+**     (2) RESERVED_LOCK
+**     (3) PENDING_LOCK
+**     (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between.  The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal.  The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+**    UNLOCKED -> SHARED
+**    SHARED -> RESERVED
+**    SHARED -> (PENDING) -> EXCLUSIVE
+**    RESERVED -> (PENDING) -> EXCLUSIVE
+**    PENDING -> EXCLUSIVE
+**
+** This routine will only increase a lock.  Use the sqlite3OsUnlock()
+** routine to lower a locking level.
+*/
+static int unixLock(sqlite3_file *id, int eFileLock){
+  /* The following describes the implementation of the various locks and
+  ** lock transitions in terms of the POSIX advisory shared and exclusive
+  ** lock primitives (called read-locks and write-locks below, to avoid
+  ** confusion with SQLite lock names). The algorithms are complicated
+  ** slightly in order to be compatible with windows systems simultaneously
+  ** accessing the same database file, in case that is ever required.
+  **
+  ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
+  ** byte', each single bytes at well known offsets, and the 'shared byte
+  ** range', a range of 510 bytes at a well known offset.
+  **
+  ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
+  ** byte'.  If this is successful, a random byte from the 'shared byte
+  ** range' is read-locked and the lock on the 'pending byte' released.
+  **
+  ** A process may only obtain a RESERVED lock after it has a SHARED lock.
+  ** A RESERVED lock is implemented by grabbing a write-lock on the
+  ** 'reserved byte'. 
+  **
+  ** A process may only obtain a PENDING lock after it has obtained a
+  ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
+  ** on the 'pending byte'. This ensures that no new SHARED locks can be
+  ** obtained, but existing SHARED locks are allowed to persist. A process
+  ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
+  ** This property is used by the algorithm for rolling back a journal file
+  ** after a crash.
+  **
+  ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
+  ** implemented by obtaining a write-lock on the entire 'shared byte
+  ** range'. Since all other locks require a read-lock on one of the bytes
+  ** within this range, this ensures that no other locks are held on the
+  ** database. 
+  **
+  ** The reason a single byte cannot be used instead of the 'shared byte
+  ** range' is that some versions of windows do not support read-locks. By
+  ** locking a random byte from a range, concurrent SHARED locks may exist
+  ** even if the locking primitive used is always a write-lock.
+  */
+  int rc = SQLITE_OK;
+  unixFile *pFile = (unixFile*)id;
+  unixInodeInfo *pInode;
+  struct flock lock;
+  int tErrno = 0;
+
+  assert( pFile );
+  OSTRACE(("LOCK    %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h,
+      azFileLock(eFileLock), azFileLock(pFile->eFileLock),
+      azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared , getpid()));
+
+  /* If there is already a lock of this type or more restrictive on the
+  ** unixFile, do nothing. Don't use the end_lock: exit path, as
+  ** unixEnterMutex() hasn't been called yet.
+  */
+  if( pFile->eFileLock>=eFileLock ){
+    OSTRACE(("LOCK    %d %s ok (already held) (unix)\n", pFile->h,
+            azFileLock(eFileLock)));
+    return SQLITE_OK;
+  }
+
+  /* Make sure the locking sequence is correct.
+  **  (1) We never move from unlocked to anything higher than shared lock.
+  **  (2) SQLite never explicitly requests a pendig lock.
+  **  (3) A shared lock is always held when a reserve lock is requested.
+  */
+  assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
+  assert( eFileLock!=PENDING_LOCK );
+  assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
+
+  /* This mutex is needed because pFile->pInode is shared across threads
+  */
+  unixEnterMutex();
+  pInode = pFile->pInode;
+
+  /* If some thread using this PID has a lock via a different unixFile*
+  ** handle that precludes the requested lock, return BUSY.
+  */
+  if( (pFile->eFileLock!=pInode->eFileLock && 
+          (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
+  ){
+    rc = SQLITE_BUSY;
+    goto end_lock;
+  }
+
+  /* If a SHARED lock is requested, and some thread using this PID already
+  ** has a SHARED or RESERVED lock, then increment reference counts and
+  ** return SQLITE_OK.
+  */
+  if( eFileLock==SHARED_LOCK && 
+      (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
+    assert( eFileLock==SHARED_LOCK );
+    assert( pFile->eFileLock==0 );
+    assert( pInode->nShared>0 );
+    pFile->eFileLock = SHARED_LOCK;
+    pInode->nShared++;
+    pInode->nLock++;
+    goto end_lock;
+  }
+
+
+  /* A PENDING lock is needed before acquiring a SHARED lock and before
+  ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
+  ** be released.
+  */
+  lock.l_len = 1L;
+  lock.l_whence = SEEK_SET;
+  if( eFileLock==SHARED_LOCK 
+      || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
+  ){
+    lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK);
+    lock.l_start = PENDING_BYTE;
+    if( unixFileLock(pFile, &lock) ){
+      tErrno = errno;
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+      if( rc!=SQLITE_BUSY ){
+        pFile->lastErrno = tErrno;
+      }
+      goto end_lock;
+    }
+  }
+
+
+  /* If control gets to this point, then actually go ahead and make
+  ** operating system calls for the specified lock.
+  */
+  if( eFileLock==SHARED_LOCK ){
+    assert( pInode->nShared==0 );
+    assert( pInode->eFileLock==0 );
+    assert( rc==SQLITE_OK );
+
+    /* Now get the read-lock */
+    lock.l_start = SHARED_FIRST;
+    lock.l_len = SHARED_SIZE;
+    if( unixFileLock(pFile, &lock) ){
+      tErrno = errno;
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+    }
+
+    /* Drop the temporary PENDING lock */
+    lock.l_start = PENDING_BYTE;
+    lock.l_len = 1L;
+    lock.l_type = F_UNLCK;
+    if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){
+      /* This could happen with a network mount */
+      tErrno = errno;
+      rc = SQLITE_IOERR_UNLOCK; 
+    }
+
+    if( rc ){
+      if( rc!=SQLITE_BUSY ){
+        pFile->lastErrno = tErrno;
+      }
+      goto end_lock;
+    }else{
+      pFile->eFileLock = SHARED_LOCK;
+      pInode->nLock++;
+      pInode->nShared = 1;
+    }
+  }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
+    /* We are trying for an exclusive lock but another thread in this
+    ** same process is still holding a shared lock. */
+    rc = SQLITE_BUSY;
+  }else{
+    /* The request was for a RESERVED or EXCLUSIVE lock.  It is
+    ** assumed that there is a SHARED or greater lock on the file
+    ** already.
+    */
+    assert( 0!=pFile->eFileLock );
+    lock.l_type = F_WRLCK;
+
+    assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK );
+    if( eFileLock==RESERVED_LOCK ){
+      lock.l_start = RESERVED_BYTE;
+      lock.l_len = 1L;
+    }else{
+      lock.l_start = SHARED_FIRST;
+      lock.l_len = SHARED_SIZE;
+    }
+
+    if( unixFileLock(pFile, &lock) ){
+      tErrno = errno;
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+      if( rc!=SQLITE_BUSY ){
+        pFile->lastErrno = tErrno;
+      }
+    }
+  }
+  
+
+#ifdef SQLITE_DEBUG
+  /* Set up the transaction-counter change checking flags when
+  ** transitioning from a SHARED to a RESERVED lock.  The change
+  ** from SHARED to RESERVED marks the beginning of a normal
+  ** write operation (not a hot journal rollback).
+  */
+  if( rc==SQLITE_OK
+   && pFile->eFileLock<=SHARED_LOCK
+   && eFileLock==RESERVED_LOCK
+  ){
+    pFile->transCntrChng = 0;
+    pFile->dbUpdate = 0;
+    pFile->inNormalWrite = 1;
+  }
+#endif
+
+
+  if( rc==SQLITE_OK ){
+    pFile->eFileLock = eFileLock;
+    pInode->eFileLock = eFileLock;
+  }else if( eFileLock==EXCLUSIVE_LOCK ){
+    pFile->eFileLock = PENDING_LOCK;
+    pInode->eFileLock = PENDING_LOCK;
+  }
+
+end_lock:
+  unixLeaveMutex();
+  OSTRACE(("LOCK    %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), 
+      rc==SQLITE_OK ? "ok" : "failed"));
+  return rc;
+}
+
+/*
+** Add the file descriptor used by file handle pFile to the corresponding
+** pUnused list.
+*/
+static void setPendingFd(unixFile *pFile){
+  unixInodeInfo *pInode = pFile->pInode;
+  UnixUnusedFd *p = pFile->pUnused;
+  p->pNext = pInode->pUnused;
+  pInode->pUnused = p;
+  pFile->h = -1;
+  pFile->pUnused = 0;
+}
+
+/*
+** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+** 
+** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED
+** the byte range is divided into 2 parts and the first part is unlocked then
+** set to a read lock, then the other part is simply unlocked.  This works 
+** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to 
+** remove the write lock on a region when a read lock is set.
+*/
+static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
+  unixFile *pFile = (unixFile*)id;
+  unixInodeInfo *pInode;
+  struct flock lock;
+  int rc = SQLITE_OK;
+
+  assert( pFile );
+  OSTRACE(("UNLOCK  %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock,
+      pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
+      getpid()));
+
+  assert( eFileLock<=SHARED_LOCK );
+  if( pFile->eFileLock<=eFileLock ){
+    return SQLITE_OK;
+  }
+  unixEnterMutex();
+  pInode = pFile->pInode;
+  assert( pInode->nShared!=0 );
+  if( pFile->eFileLock>SHARED_LOCK ){
+    assert( pInode->eFileLock==pFile->eFileLock );
+
+#ifdef SQLITE_DEBUG
+    /* When reducing a lock such that other processes can start
+    ** reading the database file again, make sure that the
+    ** transaction counter was updated if any part of the database
+    ** file changed.  If the transaction counter is not updated,
+    ** other connections to the same file might not realize that
+    ** the file has changed and hence might not know to flush their
+    ** cache.  The use of a stale cache can lead to database corruption.
+    */
+    pFile->inNormalWrite = 0;
+#endif
+
+    /* downgrading to a shared lock on NFS involves clearing the write lock
+    ** before establishing the readlock - to avoid a race condition we downgrade
+    ** the lock in 2 blocks, so that part of the range will be covered by a 
+    ** write lock until the rest is covered by a read lock:
+    **  1:   [WWWWW]
+    **  2:   [....W]
+    **  3:   [RRRRW]
+    **  4:   [RRRR.]
+    */
+    if( eFileLock==SHARED_LOCK ){
+
+#if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE
+      (void)handleNFSUnlock;
+      assert( handleNFSUnlock==0 );
+#endif
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+      if( handleNFSUnlock ){
+        int tErrno;               /* Error code from system call errors */
+        off_t divSize = SHARED_SIZE - 1;
+        
+        lock.l_type = F_UNLCK;
+        lock.l_whence = SEEK_SET;
+        lock.l_start = SHARED_FIRST;
+        lock.l_len = divSize;
+        if( unixFileLock(pFile, &lock)==(-1) ){
+          tErrno = errno;
+          rc = SQLITE_IOERR_UNLOCK;
+          if( IS_LOCK_ERROR(rc) ){
+            pFile->lastErrno = tErrno;
+          }
+          goto end_unlock;
+        }
+        lock.l_type = F_RDLCK;
+        lock.l_whence = SEEK_SET;
+        lock.l_start = SHARED_FIRST;
+        lock.l_len = divSize;
+        if( unixFileLock(pFile, &lock)==(-1) ){
+          tErrno = errno;
+          rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
+          if( IS_LOCK_ERROR(rc) ){
+            pFile->lastErrno = tErrno;
+          }
+          goto end_unlock;
+        }
+        lock.l_type = F_UNLCK;
+        lock.l_whence = SEEK_SET;
+        lock.l_start = SHARED_FIRST+divSize;
+        lock.l_len = SHARED_SIZE-divSize;
+        if( unixFileLock(pFile, &lock)==(-1) ){
+          tErrno = errno;
+          rc = SQLITE_IOERR_UNLOCK;
+          if( IS_LOCK_ERROR(rc) ){
+            pFile->lastErrno = tErrno;
+          }
+          goto end_unlock;
+        }
+      }else
+#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
+      {
+        lock.l_type = F_RDLCK;
+        lock.l_whence = SEEK_SET;
+        lock.l_start = SHARED_FIRST;
+        lock.l_len = SHARED_SIZE;
+        if( unixFileLock(pFile, &lock) ){
+          /* In theory, the call to unixFileLock() cannot fail because another
+          ** process is holding an incompatible lock. If it does, this 
+          ** indicates that the other process is not following the locking
+          ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning
+          ** SQLITE_BUSY would confuse the upper layer (in practice it causes 
+          ** an assert to fail). */ 
+          rc = SQLITE_IOERR_RDLOCK;
+          pFile->lastErrno = errno;
+          goto end_unlock;
+        }
+      }
+    }
+    lock.l_type = F_UNLCK;
+    lock.l_whence = SEEK_SET;
+    lock.l_start = PENDING_BYTE;
+    lock.l_len = 2L;  assert( PENDING_BYTE+1==RESERVED_BYTE );
+    if( unixFileLock(pFile, &lock)==0 ){
+      pInode->eFileLock = SHARED_LOCK;
+    }else{
+      rc = SQLITE_IOERR_UNLOCK;
+      pFile->lastErrno = errno;
+      goto end_unlock;
+    }
+  }
+  if( eFileLock==NO_LOCK ){
+    /* Decrement the shared lock counter.  Release the lock using an
+    ** OS call only when all threads in this same process have released
+    ** the lock.
+    */
+    pInode->nShared--;
+    if( pInode->nShared==0 ){
+      lock.l_type = F_UNLCK;
+      lock.l_whence = SEEK_SET;
+      lock.l_start = lock.l_len = 0L;
+      if( unixFileLock(pFile, &lock)==0 ){
+        pInode->eFileLock = NO_LOCK;
+      }else{
+        rc = SQLITE_IOERR_UNLOCK;
+        pFile->lastErrno = errno;
+        pInode->eFileLock = NO_LOCK;
+        pFile->eFileLock = NO_LOCK;
+      }
+    }
+
+    /* Decrement the count of locks against this same file.  When the
+    ** count reaches zero, close any other file descriptors whose close
+    ** was deferred because of outstanding locks.
+    */
+    pInode->nLock--;
+    assert( pInode->nLock>=0 );
+    if( pInode->nLock==0 ){
+      closePendingFds(pFile);
+    }
+  }
+
+end_unlock:
+  unixLeaveMutex();
+  if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
+  return rc;
+}
+
+/*
+** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+*/
+static int unixUnlock(sqlite3_file *id, int eFileLock){
+#if SQLITE_MAX_MMAP_SIZE>0
+  assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 );
+#endif
+  return posixUnlock(id, eFileLock, 0);
+}
+
+#if SQLITE_MAX_MMAP_SIZE>0
+static int unixMapfile(unixFile *pFd, i64 nByte);
+static void unixUnmapfile(unixFile *pFd);
+#endif
+
+/*
+** This function performs the parts of the "close file" operation 
+** common to all locking schemes. It closes the directory and file
+** handles, if they are valid, and sets all fields of the unixFile
+** structure to 0.
+**
+** It is *not* necessary to hold the mutex when this routine is called,
+** even on VxWorks.  A mutex will be acquired on VxWorks by the
+** vxworksReleaseFileId() routine.
+*/
+static int closeUnixFile(sqlite3_file *id){
+  unixFile *pFile = (unixFile*)id;
+#if SQLITE_MAX_MMAP_SIZE>0
+  unixUnmapfile(pFile);
+#endif
+  if( pFile->h>=0 ){
+    robust_close(pFile, pFile->h, __LINE__);
+    pFile->h = -1;
+  }
+#if OS_VXWORKS
+  if( pFile->pId ){
+    if( pFile->ctrlFlags & UNIXFILE_DELETE ){
+      osUnlink(pFile->pId->zCanonicalName);
+    }
+    vxworksReleaseFileId(pFile->pId);
+    pFile->pId = 0;
+  }
+#endif
+#ifdef SQLITE_UNLINK_AFTER_CLOSE
+  if( pFile->ctrlFlags & UNIXFILE_DELETE ){
+    osUnlink(pFile->zPath);
+    sqlite3_free(*(char**)&pFile->zPath);
+    pFile->zPath = 0;
+  }
+#endif
+  OSTRACE(("CLOSE   %-3d\n", pFile->h));
+  OpenCounter(-1);
+  sqlite3_free(pFile->pUnused);
+  memset(pFile, 0, sizeof(unixFile));
+  return SQLITE_OK;
+}
+
+/*
+** Close a file.
+*/
+static int unixClose(sqlite3_file *id){
+  int rc = SQLITE_OK;
+  unixFile *pFile = (unixFile *)id;
+  verifyDbFile(pFile);
+  unixUnlock(id, NO_LOCK);
+  unixEnterMutex();
+
+  /* unixFile.pInode is always valid here. Otherwise, a different close
+  ** routine (e.g. nolockClose()) would be called instead.
+  */
+  assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 );
+  if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){
+    /* If there are outstanding locks, do not actually close the file just
+    ** yet because that would clear those locks.  Instead, add the file
+    ** descriptor to pInode->pUnused list.  It will be automatically closed 
+    ** when the last lock is cleared.
+    */
+    setPendingFd(pFile);
+  }
+  releaseInodeInfo(pFile);
+  rc = closeUnixFile(id);
+  unixLeaveMutex();
+  return rc;
+}
+
+/************** End of the posix advisory lock implementation *****************
+******************************************************************************/
+
+/******************************************************************************
+****************************** No-op Locking **********************************
+**
+** Of the various locking implementations available, this is by far the
+** simplest:  locking is ignored.  No attempt is made to lock the database
+** file for reading or writing.
+**
+** This locking mode is appropriate for use on read-only databases
+** (ex: databases that are burned into CD-ROM, for example.)  It can
+** also be used if the application employs some external mechanism to
+** prevent simultaneous access of the same database by two or more
+** database connections.  But there is a serious risk of database
+** corruption if this locking mode is used in situations where multiple
+** database connections are accessing the same database file at the same
+** time and one or more of those connections are writing.
+*/
+
+static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
+  UNUSED_PARAMETER(NotUsed);
+  *pResOut = 0;
+  return SQLITE_OK;
+}
+static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  return SQLITE_OK;
+}
+static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  return SQLITE_OK;
+}
+
+/*
+** Close the file.
+*/
+static int nolockClose(sqlite3_file *id) {
+  return closeUnixFile(id);
+}
+
+/******************* End of the no-op lock implementation *********************
+******************************************************************************/
+
+/******************************************************************************
+************************* Begin dot-file Locking ******************************
+**
+** The dotfile locking implementation uses the existence of separate lock
+** files (really a directory) to control access to the database.  This works
+** on just about every filesystem imaginable.  But there are serious downsides:
+**
+**    (1)  There is zero concurrency.  A single reader blocks all other
+**         connections from reading or writing the database.
+**
+**    (2)  An application crash or power loss can leave stale lock files
+**         sitting around that need to be cleared manually.
+**
+** Nevertheless, a dotlock is an appropriate locking mode for use if no
+** other locking strategy is available.
+**
+** Dotfile locking works by creating a subdirectory in the same directory as
+** the database and with the same name but with a ".lock" extension added.
+** The existence of a lock directory implies an EXCLUSIVE lock.  All other
+** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
+*/
+
+/*
+** The file suffix added to the data base filename in order to create the
+** lock directory.
+*/
+#define DOTLOCK_SUFFIX ".lock"
+
+/*
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, set *pResOut
+** to a non-zero value otherwise *pResOut is set to zero.  The return value
+** is set to SQLITE_OK unless an I/O error occurs during lock checking.
+**
+** In dotfile locking, either a lock exists or it does not.  So in this
+** variation of CheckReservedLock(), *pResOut is set to true if any lock
+** is held on the file and false if the file is unlocked.
+*/
+static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
+  int rc = SQLITE_OK;
+  int reserved = 0;
+  unixFile *pFile = (unixFile*)id;
+
+  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+  
+  assert( pFile );
+
+  /* Check if a thread in this process holds such a lock */
+  if( pFile->eFileLock>SHARED_LOCK ){
+    /* Either this connection or some other connection in the same process
+    ** holds a lock on the file.  No need to check further. */
+    reserved = 1;
+  }else{
+    /* The lock is held if and only if the lockfile exists */
+    const char *zLockFile = (const char*)pFile->lockingContext;
+    reserved = osAccess(zLockFile, 0)==0;
+  }
+  OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved));
+  *pResOut = reserved;
+  return rc;
+}
+
+/*
+** Lock the file with the lock specified by parameter eFileLock - one
+** of the following:
+**
+**     (1) SHARED_LOCK
+**     (2) RESERVED_LOCK
+**     (3) PENDING_LOCK
+**     (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between.  The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal.  The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+**    UNLOCKED -> SHARED
+**    SHARED -> RESERVED
+**    SHARED -> (PENDING) -> EXCLUSIVE
+**    RESERVED -> (PENDING) -> EXCLUSIVE
+**    PENDING -> EXCLUSIVE
+**
+** This routine will only increase a lock.  Use the sqlite3OsUnlock()
+** routine to lower a locking level.
+**
+** With dotfile locking, we really only support state (4): EXCLUSIVE.
+** But we track the other locking levels internally.
+*/
+static int dotlockLock(sqlite3_file *id, int eFileLock) {
+  unixFile *pFile = (unixFile*)id;
+  char *zLockFile = (char *)pFile->lockingContext;
+  int rc = SQLITE_OK;
+
+
+  /* If we have any lock, then the lock file already exists.  All we have
+  ** to do is adjust our internal record of the lock level.
+  */
+  if( pFile->eFileLock > NO_LOCK ){
+    pFile->eFileLock = eFileLock;
+    /* Always update the timestamp on the old file */
+#ifdef HAVE_UTIME
+    utime(zLockFile, NULL);
+#else
+    utimes(zLockFile, NULL);
+#endif
+    return SQLITE_OK;
+  }
+  
+  /* grab an exclusive lock */
+  rc = osMkdir(zLockFile, 0777);
+  if( rc<0 ){
+    /* failed to open/create the lock directory */
+    int tErrno = errno;
+    if( EEXIST == tErrno ){
+      rc = SQLITE_BUSY;
+    } else {
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+      if( IS_LOCK_ERROR(rc) ){
+        pFile->lastErrno = tErrno;
+      }
+    }
+    return rc;
+  } 
+  
+  /* got it, set the type and return ok */
+  pFile->eFileLock = eFileLock;
+  return rc;
+}
+
+/*
+** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+**
+** When the locking level reaches NO_LOCK, delete the lock file.
+*/
+static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
+  unixFile *pFile = (unixFile*)id;
+  char *zLockFile = (char *)pFile->lockingContext;
+  int rc;
+
+  assert( pFile );
+  OSTRACE(("UNLOCK  %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock,
+           pFile->eFileLock, getpid()));
+  assert( eFileLock<=SHARED_LOCK );
+  
+  /* no-op if possible */
+  if( pFile->eFileLock==eFileLock ){
+    return SQLITE_OK;
+  }
+
+  /* To downgrade to shared, simply update our internal notion of the
+  ** lock state.  No need to mess with the file on disk.
+  */
+  if( eFileLock==SHARED_LOCK ){
+    pFile->eFileLock = SHARED_LOCK;
+    return SQLITE_OK;
+  }
+  
+  /* To fully unlock the database, delete the lock file */
+  assert( eFileLock==NO_LOCK );
+  rc = osRmdir(zLockFile);
+  if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile);
+  if( rc<0 ){
+    int tErrno = errno;
+    rc = 0;
+    if( ENOENT != tErrno ){
+      rc = SQLITE_IOERR_UNLOCK;
+    }
+    if( IS_LOCK_ERROR(rc) ){
+      pFile->lastErrno = tErrno;
+    }
+    return rc; 
+  }
+  pFile->eFileLock = NO_LOCK;
+  return SQLITE_OK;
+}
+
+/*
+** Close a file.  Make sure the lock has been released before closing.
+*/
+static int dotlockClose(sqlite3_file *id) {
+  int rc = SQLITE_OK;
+  if( id ){
+    unixFile *pFile = (unixFile*)id;
+    dotlockUnlock(id, NO_LOCK);
+    sqlite3_free(pFile->lockingContext);
+    rc = closeUnixFile(id);
+  }
+  return rc;
+}
+/****************** End of the dot-file lock implementation *******************
+******************************************************************************/
+
+/******************************************************************************
+************************** Begin flock Locking ********************************
+**
+** Use the flock() system call to do file locking.
+**
+** flock() locking is like dot-file locking in that the various
+** fine-grain locking levels supported by SQLite are collapsed into
+** a single exclusive lock.  In other words, SHARED, RESERVED, and
+** PENDING locks are the same thing as an EXCLUSIVE lock.  SQLite
+** still works when you do this, but concurrency is reduced since
+** only a single process can be reading the database at a time.
+**
+** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if
+** compiling for VXWORKS.
+*/
+#if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
+
+/*
+** Retry flock() calls that fail with EINTR
+*/
+#ifdef EINTR
+static int robust_flock(int fd, int op){
+  int rc;
+  do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR );
+  return rc;
+}
+#else
+# define robust_flock(a,b) flock(a,b)
+#endif
+     
+
+/*
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, set *pResOut
+** to a non-zero value otherwise *pResOut is set to zero.  The return value
+** is set to SQLITE_OK unless an I/O error occurs during lock checking.
+*/
+static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
+  int rc = SQLITE_OK;
+  int reserved = 0;
+  unixFile *pFile = (unixFile*)id;
+  
+  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+  
+  assert( pFile );
+  
+  /* Check if a thread in this process holds such a lock */
+  if( pFile->eFileLock>SHARED_LOCK ){
+    reserved = 1;
+  }
+  
+  /* Otherwise see if some other process holds it. */
+  if( !reserved ){
+    /* attempt to get the lock */
+    int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB);
+    if( !lrc ){
+      /* got the lock, unlock it */
+      lrc = robust_flock(pFile->h, LOCK_UN);
+      if ( lrc ) {
+        int tErrno = errno;
+        /* unlock failed with an error */
+        lrc = SQLITE_IOERR_UNLOCK; 
+        if( IS_LOCK_ERROR(lrc) ){
+          pFile->lastErrno = tErrno;
+          rc = lrc;
+        }
+      }
+    } else {
+      int tErrno = errno;
+      reserved = 1;
+      /* someone else might have it reserved */
+      lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); 
+      if( IS_LOCK_ERROR(lrc) ){
+        pFile->lastErrno = tErrno;
+        rc = lrc;
+      }
+    }
+  }
+  OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved));
+
+#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
+  if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
+    rc = SQLITE_OK;
+    reserved=1;
+  }
+#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
+  *pResOut = reserved;
+  return rc;
+}
+
+/*
+** Lock the file with the lock specified by parameter eFileLock - one
+** of the following:
+**
+**     (1) SHARED_LOCK
+**     (2) RESERVED_LOCK
+**     (3) PENDING_LOCK
+**     (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between.  The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal.  The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+**    UNLOCKED -> SHARED
+**    SHARED -> RESERVED
+**    SHARED -> (PENDING) -> EXCLUSIVE
+**    RESERVED -> (PENDING) -> EXCLUSIVE
+**    PENDING -> EXCLUSIVE
+**
+** flock() only really support EXCLUSIVE locks.  We track intermediate
+** lock states in the sqlite3_file structure, but all locks SHARED or
+** above are really EXCLUSIVE locks and exclude all other processes from
+** access the file.
+**
+** This routine will only increase a lock.  Use the sqlite3OsUnlock()
+** routine to lower a locking level.
+*/
+static int flockLock(sqlite3_file *id, int eFileLock) {
+  int rc = SQLITE_OK;
+  unixFile *pFile = (unixFile*)id;
+
+  assert( pFile );
+
+  /* if we already have a lock, it is exclusive.  
+  ** Just adjust level and punt on outta here. */
+  if (pFile->eFileLock > NO_LOCK) {
+    pFile->eFileLock = eFileLock;
+    return SQLITE_OK;
+  }
+  
+  /* grab an exclusive lock */
+  
+  if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) {
+    int tErrno = errno;
+    /* didn't get, must be busy */
+    rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+    if( IS_LOCK_ERROR(rc) ){
+      pFile->lastErrno = tErrno;
+    }
+  } else {
+    /* got it, set the type and return ok */
+    pFile->eFileLock = eFileLock;
+  }
+  OSTRACE(("LOCK    %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), 
+           rc==SQLITE_OK ? "ok" : "failed"));
+#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
+  if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
+    rc = SQLITE_BUSY;
+  }
+#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
+  return rc;
+}
+
+
+/*
+** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+*/
+static int flockUnlock(sqlite3_file *id, int eFileLock) {
+  unixFile *pFile = (unixFile*)id;
+  
+  assert( pFile );
+  OSTRACE(("UNLOCK  %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock,
+           pFile->eFileLock, getpid()));
+  assert( eFileLock<=SHARED_LOCK );
+  
+  /* no-op if possible */
+  if( pFile->eFileLock==eFileLock ){
+    return SQLITE_OK;
+  }
+  
+  /* shared can just be set because we always have an exclusive */
+  if (eFileLock==SHARED_LOCK) {
+    pFile->eFileLock = eFileLock;
+    return SQLITE_OK;
+  }
+  
+  /* no, really, unlock. */
+  if( robust_flock(pFile->h, LOCK_UN) ){
+#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
+    return SQLITE_OK;
+#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
+    return SQLITE_IOERR_UNLOCK;
+  }else{
+    pFile->eFileLock = NO_LOCK;
+    return SQLITE_OK;
+  }
+}
+
+/*
+** Close a file.
+*/
+static int flockClose(sqlite3_file *id) {
+  int rc = SQLITE_OK;
+  if( id ){
+    flockUnlock(id, NO_LOCK);
+    rc = closeUnixFile(id);
+  }
+  return rc;
+}
+
+#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */
+
+/******************* End of the flock lock implementation *********************
+******************************************************************************/
+
+/******************************************************************************
+************************ Begin Named Semaphore Locking ************************
+**
+** Named semaphore locking is only supported on VxWorks.
+**
+** Semaphore locking is like dot-lock and flock in that it really only
+** supports EXCLUSIVE locking.  Only a single process can read or write
+** the database file at a time.  This reduces potential concurrency, but
+** makes the lock implementation much easier.
+*/
+#if OS_VXWORKS
+
+/*
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, set *pResOut
+** to a non-zero value otherwise *pResOut is set to zero.  The return value
+** is set to SQLITE_OK unless an I/O error occurs during lock checking.
+*/
+static int semCheckReservedLock(sqlite3_file *id, int *pResOut) {
+  int rc = SQLITE_OK;
+  int reserved = 0;
+  unixFile *pFile = (unixFile*)id;
+
+  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+  
+  assert( pFile );
+
+  /* Check if a thread in this process holds such a lock */
+  if( pFile->eFileLock>SHARED_LOCK ){
+    reserved = 1;
+  }
+  
+  /* Otherwise see if some other process holds it. */
+  if( !reserved ){
+    sem_t *pSem = pFile->pInode->pSem;
+
+    if( sem_trywait(pSem)==-1 ){
+      int tErrno = errno;
+      if( EAGAIN != tErrno ){
+        rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
+        pFile->lastErrno = tErrno;
+      } else {
+        /* someone else has the lock when we are in NO_LOCK */
+        reserved = (pFile->eFileLock < SHARED_LOCK);
+      }
+    }else{
+      /* we could have it if we want it */
+      sem_post(pSem);
+    }
+  }
+  OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved));
+
+  *pResOut = reserved;
+  return rc;
+}
+
+/*
+** Lock the file with the lock specified by parameter eFileLock - one
+** of the following:
+**
+**     (1) SHARED_LOCK
+**     (2) RESERVED_LOCK
+**     (3) PENDING_LOCK
+**     (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between.  The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal.  The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+**    UNLOCKED -> SHARED
+**    SHARED -> RESERVED
+**    SHARED -> (PENDING) -> EXCLUSIVE
+**    RESERVED -> (PENDING) -> EXCLUSIVE
+**    PENDING -> EXCLUSIVE
+**
+** Semaphore locks only really support EXCLUSIVE locks.  We track intermediate
+** lock states in the sqlite3_file structure, but all locks SHARED or
+** above are really EXCLUSIVE locks and exclude all other processes from
+** access the file.
+**
+** This routine will only increase a lock.  Use the sqlite3OsUnlock()
+** routine to lower a locking level.
+*/
+static int semLock(sqlite3_file *id, int eFileLock) {
+  unixFile *pFile = (unixFile*)id;
+  sem_t *pSem = pFile->pInode->pSem;
+  int rc = SQLITE_OK;
+
+  /* if we already have a lock, it is exclusive.  
+  ** Just adjust level and punt on outta here. */
+  if (pFile->eFileLock > NO_LOCK) {
+    pFile->eFileLock = eFileLock;
+    rc = SQLITE_OK;
+    goto sem_end_lock;
+  }
+  
+  /* lock semaphore now but bail out when already locked. */
+  if( sem_trywait(pSem)==-1 ){
+    rc = SQLITE_BUSY;
+    goto sem_end_lock;
+  }
+
+  /* got it, set the type and return ok */
+  pFile->eFileLock = eFileLock;
+
+ sem_end_lock:
+  return rc;
+}
+
+/*
+** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+*/
+static int semUnlock(sqlite3_file *id, int eFileLock) {
+  unixFile *pFile = (unixFile*)id;
+  sem_t *pSem = pFile->pInode->pSem;
+
+  assert( pFile );
+  assert( pSem );
+  OSTRACE(("UNLOCK  %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock,
+           pFile->eFileLock, getpid()));
+  assert( eFileLock<=SHARED_LOCK );
+  
+  /* no-op if possible */
+  if( pFile->eFileLock==eFileLock ){
+    return SQLITE_OK;
+  }
+  
+  /* shared can just be set because we always have an exclusive */
+  if (eFileLock==SHARED_LOCK) {
+    pFile->eFileLock = eFileLock;
+    return SQLITE_OK;
+  }
+  
+  /* no, really unlock. */
+  if ( sem_post(pSem)==-1 ) {
+    int rc, tErrno = errno;
+    rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
+    if( IS_LOCK_ERROR(rc) ){
+      pFile->lastErrno = tErrno;
+    }
+    return rc; 
+  }
+  pFile->eFileLock = NO_LOCK;
+  return SQLITE_OK;
+}
+
+/*
+ ** Close a file.
+ */
+static int semClose(sqlite3_file *id) {
+  if( id ){
+    unixFile *pFile = (unixFile*)id;
+    semUnlock(id, NO_LOCK);
+    assert( pFile );
+    unixEnterMutex();
+    releaseInodeInfo(pFile);
+    unixLeaveMutex();
+    closeUnixFile(id);
+  }
+  return SQLITE_OK;
+}
+
+#endif /* OS_VXWORKS */
+/*
+** Named semaphore locking is only available on VxWorks.
+**
+*************** End of the named semaphore lock implementation ****************
+******************************************************************************/
+
+
+/******************************************************************************
+*************************** Begin AFP Locking *********************************
+**
+** AFP is the Apple Filing Protocol.  AFP is a network filesystem found
+** on Apple Macintosh computers - both OS9 and OSX.
+**
+** Third-party implementations of AFP are available.  But this code here
+** only works on OSX.
+*/
+
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+/*
+** The afpLockingContext structure contains all afp lock specific state
+*/
+typedef struct afpLockingContext afpLockingContext;
+struct afpLockingContext {
+  int reserved;
+  const char *dbPath;             /* Name of the open file */
+};
+
+struct ByteRangeLockPB2
+{
+  unsigned long long offset;        /* offset to first byte to lock */
+  unsigned long long length;        /* nbr of bytes to lock */
+  unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
+  unsigned char unLockFlag;         /* 1 = unlock, 0 = lock */
+  unsigned char startEndFlag;       /* 1=rel to end of fork, 0=rel to start */
+  int fd;                           /* file desc to assoc this lock with */
+};
+
+#define afpfsByteRangeLock2FSCTL        _IOWR('z', 23, struct ByteRangeLockPB2)
+
+/*
+** This is a utility for setting or clearing a bit-range lock on an
+** AFP filesystem.
+** 
+** Return SQLITE_OK on success, SQLITE_BUSY on failure.
+*/
+static int afpSetLock(
+  const char *path,              /* Name of the file to be locked or unlocked */
+  unixFile *pFile,               /* Open file descriptor on path */
+  unsigned long long offset,     /* First byte to be locked */
+  unsigned long long length,     /* Number of bytes to lock */
+  int setLockFlag                /* True to set lock.  False to clear lock */
+){
+  struct ByteRangeLockPB2 pb;
+  int err;
+  
+  pb.unLockFlag = setLockFlag ? 0 : 1;
+  pb.startEndFlag = 0;
+  pb.offset = offset;
+  pb.length = length; 
+  pb.fd = pFile->h;
+  
+  OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 
+    (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""),
+    offset, length));
+  err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
+  if ( err==-1 ) {
+    int rc;
+    int tErrno = errno;
+    OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n",
+             path, tErrno, strerror(tErrno)));
+#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
+    rc = SQLITE_BUSY;
+#else
+    rc = sqliteErrorFromPosixError(tErrno,
+                    setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
+#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
+    if( IS_LOCK_ERROR(rc) ){
+      pFile->lastErrno = tErrno;
+    }
+    return rc;
+  } else {
+    return SQLITE_OK;
+  }
+}
+
+/*
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, set *pResOut
+** to a non-zero value otherwise *pResOut is set to zero.  The return value
+** is set to SQLITE_OK unless an I/O error occurs during lock checking.
+*/
+static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
+  int rc = SQLITE_OK;
+  int reserved = 0;
+  unixFile *pFile = (unixFile*)id;
+  afpLockingContext *context;
+  
+  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+  
+  assert( pFile );
+  context = (afpLockingContext *) pFile->lockingContext;
+  if( context->reserved ){
+    *pResOut = 1;
+    return SQLITE_OK;
+  }
+  unixEnterMutex(); /* Because pFile->pInode is shared across threads */
+  
+  /* Check if a thread in this process holds such a lock */
+  if( pFile->pInode->eFileLock>SHARED_LOCK ){
+    reserved = 1;
+  }
+  
+  /* Otherwise see if some other process holds it.
+   */
+  if( !reserved ){
+    /* lock the RESERVED byte */
+    int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);  
+    if( SQLITE_OK==lrc ){
+      /* if we succeeded in taking the reserved lock, unlock it to restore
+      ** the original state */
+      lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
+    } else {
+      /* if we failed to get the lock then someone else must have it */
+      reserved = 1;
+    }
+    if( IS_LOCK_ERROR(lrc) ){
+      rc=lrc;
+    }
+  }
+  
+  unixLeaveMutex();
+  OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved));
+  
+  *pResOut = reserved;
+  return rc;
+}
+
+/*
+** Lock the file with the lock specified by parameter eFileLock - one
+** of the following:
+**
+**     (1) SHARED_LOCK
+**     (2) RESERVED_LOCK
+**     (3) PENDING_LOCK
+**     (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between.  The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal.  The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+**    UNLOCKED -> SHARED
+**    SHARED -> RESERVED
+**    SHARED -> (PENDING) -> EXCLUSIVE
+**    RESERVED -> (PENDING) -> EXCLUSIVE
+**    PENDING -> EXCLUSIVE
+**
+** This routine will only increase a lock.  Use the sqlite3OsUnlock()
+** routine to lower a locking level.
+*/
+static int afpLock(sqlite3_file *id, int eFileLock){
+  int rc = SQLITE_OK;
+  unixFile *pFile = (unixFile*)id;
+  unixInodeInfo *pInode = pFile->pInode;
+  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+  
+  assert( pFile );
+  OSTRACE(("LOCK    %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h,
+           azFileLock(eFileLock), azFileLock(pFile->eFileLock),
+           azFileLock(pInode->eFileLock), pInode->nShared , getpid()));
+
+  /* If there is already a lock of this type or more restrictive on the
+  ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
+  ** unixEnterMutex() hasn't been called yet.
+  */
+  if( pFile->eFileLock>=eFileLock ){
+    OSTRACE(("LOCK    %d %s ok (already held) (afp)\n", pFile->h,
+           azFileLock(eFileLock)));
+    return SQLITE_OK;
+  }
+
+  /* Make sure the locking sequence is correct
+  **  (1) We never move from unlocked to anything higher than shared lock.
+  **  (2) SQLite never explicitly requests a pendig lock.
+  **  (3) A shared lock is always held when a reserve lock is requested.
+  */
+  assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
+  assert( eFileLock!=PENDING_LOCK );
+  assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
+  
+  /* This mutex is needed because pFile->pInode is shared across threads
+  */
+  unixEnterMutex();
+  pInode = pFile->pInode;
+
+  /* If some thread using this PID has a lock via a different unixFile*
+  ** handle that precludes the requested lock, return BUSY.
+  */
+  if( (pFile->eFileLock!=pInode->eFileLock && 
+       (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
+     ){
+    rc = SQLITE_BUSY;
+    goto afp_end_lock;
+  }
+  
+  /* If a SHARED lock is requested, and some thread using this PID already
+  ** has a SHARED or RESERVED lock, then increment reference counts and
+  ** return SQLITE_OK.
+  */
+  if( eFileLock==SHARED_LOCK && 
+     (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
+    assert( eFileLock==SHARED_LOCK );
+    assert( pFile->eFileLock==0 );
+    assert( pInode->nShared>0 );
+    pFile->eFileLock = SHARED_LOCK;
+    pInode->nShared++;
+    pInode->nLock++;
+    goto afp_end_lock;
+  }
+    
+  /* A PENDING lock is needed before acquiring a SHARED lock and before
+  ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
+  ** be released.
+  */
+  if( eFileLock==SHARED_LOCK 
+      || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
+  ){
+    int failed;
+    failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
+    if (failed) {
+      rc = failed;
+      goto afp_end_lock;
+    }
+  }
+  
+  /* If control gets to this point, then actually go ahead and make
+  ** operating system calls for the specified lock.
+  */
+  if( eFileLock==SHARED_LOCK ){
+    int lrc1, lrc2, lrc1Errno = 0;
+    long lk, mask;
+    
+    assert( pInode->nShared==0 );
+    assert( pInode->eFileLock==0 );
+        
+    mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff;
+    /* Now get the read-lock SHARED_LOCK */
+    /* note that the quality of the randomness doesn't matter that much */
+    lk = random(); 
+    pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1);
+    lrc1 = afpSetLock(context->dbPath, pFile, 
+          SHARED_FIRST+pInode->sharedByte, 1, 1);
+    if( IS_LOCK_ERROR(lrc1) ){
+      lrc1Errno = pFile->lastErrno;
+    }
+    /* Drop the temporary PENDING lock */
+    lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
+    
+    if( IS_LOCK_ERROR(lrc1) ) {
+      pFile->lastErrno = lrc1Errno;
+      rc = lrc1;
+      goto afp_end_lock;
+    } else if( IS_LOCK_ERROR(lrc2) ){
+      rc = lrc2;
+      goto afp_end_lock;
+    } else if( lrc1 != SQLITE_OK ) {
+      rc = lrc1;
+    } else {
+      pFile->eFileLock = SHARED_LOCK;
+      pInode->nLock++;
+      pInode->nShared = 1;
+    }
+  }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
+    /* We are trying for an exclusive lock but another thread in this
+     ** same process is still holding a shared lock. */
+    rc = SQLITE_BUSY;
+  }else{
+    /* The request was for a RESERVED or EXCLUSIVE lock.  It is
+    ** assumed that there is a SHARED or greater lock on the file
+    ** already.
+    */
+    int failed = 0;
+    assert( 0!=pFile->eFileLock );
+    if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) {
+        /* Acquire a RESERVED lock */
+        failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
+      if( !failed ){
+        context->reserved = 1;
+      }
+    }
+    if (!failed && eFileLock == EXCLUSIVE_LOCK) {
+      /* Acquire an EXCLUSIVE lock */
+        
+      /* Remove the shared lock before trying the range.  we'll need to 
+      ** reestablish the shared lock if we can't get the  afpUnlock
+      */
+      if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST +
+                         pInode->sharedByte, 1, 0)) ){
+        int failed2 = SQLITE_OK;
+        /* now attemmpt to get the exclusive lock range */
+        failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, 
+                               SHARED_SIZE, 1);
+        if( failed && (failed2 = afpSetLock(context->dbPath, pFile, 
+                       SHARED_FIRST + pInode->sharedByte, 1, 1)) ){
+          /* Can't reestablish the shared lock.  Sqlite can't deal, this is
+          ** a critical I/O error
+          */
+          rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 : 
+               SQLITE_IOERR_LOCK;
+          goto afp_end_lock;
+        } 
+      }else{
+        rc = failed; 
+      }
+    }
+    if( failed ){
+      rc = failed;
+    }
+  }
+  
+  if( rc==SQLITE_OK ){
+    pFile->eFileLock = eFileLock;
+    pInode->eFileLock = eFileLock;
+  }else if( eFileLock==EXCLUSIVE_LOCK ){
+    pFile->eFileLock = PENDING_LOCK;
+    pInode->eFileLock = PENDING_LOCK;
+  }
+  
+afp_end_lock:
+  unixLeaveMutex();
+  OSTRACE(("LOCK    %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), 
+         rc==SQLITE_OK ? "ok" : "failed"));
+  return rc;
+}
+
+/*
+** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+*/
+static int afpUnlock(sqlite3_file *id, int eFileLock) {
+  int rc = SQLITE_OK;
+  unixFile *pFile = (unixFile*)id;
+  unixInodeInfo *pInode;
+  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+  int skipShared = 0;
+#ifdef SQLITE_TEST
+  int h = pFile->h;
+#endif
+
+  assert( pFile );
+  OSTRACE(("UNLOCK  %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock,
+           pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
+           getpid()));
+
+  assert( eFileLock<=SHARED_LOCK );
+  if( pFile->eFileLock<=eFileLock ){
+    return SQLITE_OK;
+  }
+  unixEnterMutex();
+  pInode = pFile->pInode;
+  assert( pInode->nShared!=0 );
+  if( pFile->eFileLock>SHARED_LOCK ){
+    assert( pInode->eFileLock==pFile->eFileLock );
+    SimulateIOErrorBenign(1);
+    SimulateIOError( h=(-1) )
+    SimulateIOErrorBenign(0);
+    
+#ifdef SQLITE_DEBUG
+    /* When reducing a lock such that other processes can start
+    ** reading the database file again, make sure that the
+    ** transaction counter was updated if any part of the database
+    ** file changed.  If the transaction counter is not updated,
+    ** other connections to the same file might not realize that
+    ** the file has changed and hence might not know to flush their
+    ** cache.  The use of a stale cache can lead to database corruption.
+    */
+    assert( pFile->inNormalWrite==0
+           || pFile->dbUpdate==0
+           || pFile->transCntrChng==1 );
+    pFile->inNormalWrite = 0;
+#endif
+    
+    if( pFile->eFileLock==EXCLUSIVE_LOCK ){
+      rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
+      if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){
+        /* only re-establish the shared lock if necessary */
+        int sharedLockByte = SHARED_FIRST+pInode->sharedByte;
+        rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1);
+      } else {
+        skipShared = 1;
+      }
+    }
+    if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){
+      rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
+    } 
+    if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){
+      rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
+      if( !rc ){ 
+        context->reserved = 0; 
+      }
+    }
+    if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){
+      pInode->eFileLock = SHARED_LOCK;
+    }
+  }
+  if( rc==SQLITE_OK && eFileLock==NO_LOCK ){
+
+    /* Decrement the shared lock counter.  Release the lock using an
+    ** OS call only when all threads in this same process have released
+    ** the lock.
+    */
+    unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte;
+    pInode->nShared--;
+    if( pInode->nShared==0 ){
+      SimulateIOErrorBenign(1);
+      SimulateIOError( h=(-1) )
+      SimulateIOErrorBenign(0);
+      if( !skipShared ){
+        rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0);
+      }
+      if( !rc ){
+        pInode->eFileLock = NO_LOCK;
+        pFile->eFileLock = NO_LOCK;
+      }
+    }
+    if( rc==SQLITE_OK ){
+      pInode->nLock--;
+      assert( pInode->nLock>=0 );
+      if( pInode->nLock==0 ){
+        closePendingFds(pFile);
+      }
+    }
+  }
+  
+  unixLeaveMutex();
+  if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
+  return rc;
+}
+
+/*
+** Close a file & cleanup AFP specific locking context 
+*/
+static int afpClose(sqlite3_file *id) {
+  int rc = SQLITE_OK;
+  if( id ){
+    unixFile *pFile = (unixFile*)id;
+    afpUnlock(id, NO_LOCK);
+    unixEnterMutex();
+    if( pFile->pInode && pFile->pInode->nLock ){
+      /* If there are outstanding locks, do not actually close the file just
+      ** yet because that would clear those locks.  Instead, add the file
+      ** descriptor to pInode->aPending.  It will be automatically closed when
+      ** the last lock is cleared.
+      */
+      setPendingFd(pFile);
+    }
+    releaseInodeInfo(pFile);
+    sqlite3_free(pFile->lockingContext);
+    rc = closeUnixFile(id);
+    unixLeaveMutex();
+  }
+  return rc;
+}
+
+#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
+/*
+** The code above is the AFP lock implementation.  The code is specific
+** to MacOSX and does not work on other unix platforms.  No alternative
+** is available.  If you don't compile for a mac, then the "unix-afp"
+** VFS is not available.
+**
+********************* End of the AFP lock implementation **********************
+******************************************************************************/
+
+/******************************************************************************
+*************************** Begin NFS Locking ********************************/
+
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+/*
+ ** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
+ ** must be either NO_LOCK or SHARED_LOCK.
+ **
+ ** If the locking level of the file descriptor is already at or below
+ ** the requested locking level, this routine is a no-op.
+ */
+static int nfsUnlock(sqlite3_file *id, int eFileLock){
+  return posixUnlock(id, eFileLock, 1);
+}
+
+#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
+/*
+** The code above is the NFS lock implementation.  The code is specific
+** to MacOSX and does not work on other unix platforms.  No alternative
+** is available.  
+**
+********************* End of the NFS lock implementation **********************
+******************************************************************************/
+
+/******************************************************************************
+**************** Non-locking sqlite3_file methods *****************************
+**
+** The next division contains implementations for all methods of the 
+** sqlite3_file object other than the locking methods.  The locking
+** methods were defined in divisions above (one locking method per
+** division).  Those methods that are common to all locking modes
+** are gather together into this division.
+*/
+
+/*
+** Seek to the offset passed as the second argument, then read cnt 
+** bytes into pBuf. Return the number of bytes actually read.
+**
+** NB:  If you define USE_PREAD or USE_PREAD64, then it might also
+** be necessary to define _XOPEN_SOURCE to be 500.  This varies from
+** one system to another.  Since SQLite does not define USE_PREAD
+** in any form by default, we will not attempt to define _XOPEN_SOURCE.
+** See tickets #2741 and #2681.
+**
+** To avoid stomping the errno value on a failed read the lastErrno value
+** is set before returning.
+*/
+static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
+  int got;
+  int prior = 0;
+#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
+  i64 newOffset;
+#endif
+  TIMER_START;
+  assert( cnt==(cnt&0x1ffff) );
+  assert( id->h>2 );
+  cnt &= 0x1ffff;
+  do{
+#if defined(USE_PREAD)
+    got = osPread(id->h, pBuf, cnt, offset);
+    SimulateIOError( got = -1 );
+#elif defined(USE_PREAD64)
+    got = osPread64(id->h, pBuf, cnt, offset);
+    SimulateIOError( got = -1 );
+#else
+    newOffset = lseek(id->h, offset, SEEK_SET);
+    SimulateIOError( newOffset-- );
+    if( newOffset!=offset ){
+      if( newOffset == -1 ){
+        ((unixFile*)id)->lastErrno = errno;
+      }else{
+        ((unixFile*)id)->lastErrno = 0;
+      }
+      return -1;
+    }
+    got = osRead(id->h, pBuf, cnt);
+#endif
+    if( got==cnt ) break;
+    if( got<0 ){
+      if( errno==EINTR ){ got = 1; continue; }
+      prior = 0;
+      ((unixFile*)id)->lastErrno = errno;
+      break;
+    }else if( got>0 ){
+      cnt -= got;
+      offset += got;
+      prior += got;
+      pBuf = (void*)(got + (char*)pBuf);
+    }
+  }while( got>0 );
+  TIMER_END;
+  OSTRACE(("READ    %-3d %5d %7lld %llu\n",
+            id->h, got+prior, offset-prior, TIMER_ELAPSED));
+  return got+prior;
+}
+
+/*
+** Read data from a file into a buffer.  Return SQLITE_OK if all
+** bytes were read successfully and SQLITE_IOERR if anything goes
+** wrong.
+*/
+static int unixRead(
+  sqlite3_file *id, 
+  void *pBuf, 
+  int amt,
+  sqlite3_int64 offset
+){
+  unixFile *pFile = (unixFile *)id;
+  int got;
+  assert( id );
+  assert( offset>=0 );
+  assert( amt>0 );
+
+  /* If this is a database file (not a journal, master-journal or temp
+  ** file), the bytes in the locking range should never be read or written. */
+#if 0
+  assert( pFile->pUnused==0
+       || offset>=PENDING_BYTE+512
+       || offset+amt<=PENDING_BYTE 
+  );
+#endif
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* Deal with as much of this read request as possible by transfering
+  ** data from the memory mapping using memcpy().  */
+  if( offset<pFile->mmapSize ){
+    if( offset+amt <= pFile->mmapSize ){
+      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+      return SQLITE_OK;
+    }else{
+      int nCopy = pFile->mmapSize - offset;
+      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+      pBuf = &((u8 *)pBuf)[nCopy];
+      amt -= nCopy;
+      offset += nCopy;
+    }
+  }
+#endif
+
+  got = seekAndRead(pFile, offset, pBuf, amt);
+  if( got==amt ){
+    return SQLITE_OK;
+  }else if( got<0 ){
+    /* lastErrno set by seekAndRead */
+    return SQLITE_IOERR_READ;
+  }else{
+    pFile->lastErrno = 0; /* not a system error */
+    /* Unread parts of the buffer must be zero-filled */
+    memset(&((char*)pBuf)[got], 0, amt-got);
+    return SQLITE_IOERR_SHORT_READ;
+  }
+}
+
+/*
+** Attempt to seek the file-descriptor passed as the first argument to
+** absolute offset iOff, then attempt to write nBuf bytes of data from
+** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, 
+** return the actual number of bytes written (which may be less than
+** nBuf).
+*/
+static int seekAndWriteFd(
+  int fd,                         /* File descriptor to write to */
+  i64 iOff,                       /* File offset to begin writing at */
+  const void *pBuf,               /* Copy data from this buffer to the file */
+  int nBuf,                       /* Size of buffer pBuf in bytes */
+  int *piErrno                    /* OUT: Error number if error occurs */
+){
+  int rc = 0;                     /* Value returned by system call */
+
+  assert( nBuf==(nBuf&0x1ffff) );
+  assert( fd>2 );
+  nBuf &= 0x1ffff;
+  TIMER_START;
+
+#if defined(USE_PREAD)
+  do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );
+#elif defined(USE_PREAD64)
+  do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);
+#else
+  do{
+    i64 iSeek = lseek(fd, iOff, SEEK_SET);
+    SimulateIOError( iSeek-- );
+
+    if( iSeek!=iOff ){
+      if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0);
+      return -1;
+    }
+    rc = osWrite(fd, pBuf, nBuf);
+  }while( rc<0 && errno==EINTR );
+#endif
+
+  TIMER_END;
+  OSTRACE(("WRITE   %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));
+
+  if( rc<0 && piErrno ) *piErrno = errno;
+  return rc;
+}
+
+
+/*
+** Seek to the offset in id->offset then read cnt bytes into pBuf.
+** Return the number of bytes actually read.  Update the offset.
+**
+** To avoid stomping the errno value on a failed write the lastErrno value
+** is set before returning.
+*/
+static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
+  return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);
+}
+
+
+/*
+** Write data from a buffer into a file.  Return SQLITE_OK on success
+** or some other error code on failure.
+*/
+static int unixWrite(
+  sqlite3_file *id, 
+  const void *pBuf, 
+  int amt,
+  sqlite3_int64 offset 
+){
+  unixFile *pFile = (unixFile*)id;
+  int wrote = 0;
+  assert( id );
+  assert( amt>0 );
+
+  /* If this is a database file (not a journal, master-journal or temp
+  ** file), the bytes in the locking range should never be read or written. */
+#if 0
+  assert( pFile->pUnused==0
+       || offset>=PENDING_BYTE+512
+       || offset+amt<=PENDING_BYTE 
+  );
+#endif
+
+#ifdef SQLITE_DEBUG
+  /* If we are doing a normal write to a database file (as opposed to
+  ** doing a hot-journal rollback or a write to some file other than a
+  ** normal database file) then record the fact that the database
+  ** has changed.  If the transaction counter is modified, record that
+  ** fact too.
+  */
+  if( pFile->inNormalWrite ){
+    pFile->dbUpdate = 1;  /* The database has been modified */
+    if( offset<=24 && offset+amt>=27 ){
+      int rc;
+      char oldCntr[4];
+      SimulateIOErrorBenign(1);
+      rc = seekAndRead(pFile, 24, oldCntr, 4);
+      SimulateIOErrorBenign(0);
+      if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
+        pFile->transCntrChng = 1;  /* The transaction counter has changed */
+      }
+    }
+  }
+#endif
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* Deal with as much of this write request as possible by transfering
+  ** data from the memory mapping using memcpy().  */
+  if( offset<pFile->mmapSize ){
+    if( offset+amt <= pFile->mmapSize ){
+      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+      return SQLITE_OK;
+    }else{
+      int nCopy = pFile->mmapSize - offset;
+      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+      pBuf = &((u8 *)pBuf)[nCopy];
+      amt -= nCopy;
+      offset += nCopy;
+    }
+  }
+#endif
+
+  while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
+    amt -= wrote;
+    offset += wrote;
+    pBuf = &((char*)pBuf)[wrote];
+  }
+  SimulateIOError(( wrote=(-1), amt=1 ));
+  SimulateDiskfullError(( wrote=0, amt=1 ));
+
+  if( amt>0 ){
+    if( wrote<0 && pFile->lastErrno!=ENOSPC ){
+      /* lastErrno set by seekAndWrite */
+      return SQLITE_IOERR_WRITE;
+    }else{
+      pFile->lastErrno = 0; /* not a system error */
+      return SQLITE_FULL;
+    }
+  }
+
+  return SQLITE_OK;
+}
+
+#ifdef SQLITE_TEST
+/*
+** Count the number of fullsyncs and normal syncs.  This is used to test
+** that syncs and fullsyncs are occurring at the right times.
+*/
+SQLITE_API int sqlite3_sync_count = 0;
+SQLITE_API int sqlite3_fullsync_count = 0;
+#endif
+
+/*
+** We do not trust systems to provide a working fdatasync().  Some do.
+** Others do no.  To be safe, we will stick with the (slightly slower)
+** fsync(). If you know that your system does support fdatasync() correctly,
+** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC
+*/
+#if !defined(fdatasync) && !HAVE_FDATASYNC
+# define fdatasync fsync
+#endif
+
+/*
+** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
+** the F_FULLFSYNC macro is defined.  F_FULLFSYNC is currently
+** only available on Mac OS X.  But that could change.
+*/
+#ifdef F_FULLFSYNC
+# define HAVE_FULLFSYNC 1
+#else
+# define HAVE_FULLFSYNC 0
+#endif
+
+
+/*
+** The fsync() system call does not work as advertised on many
+** unix systems.  The following procedure is an attempt to make
+** it work better.
+**
+** The SQLITE_NO_SYNC macro disables all fsync()s.  This is useful
+** for testing when we want to run through the test suite quickly.
+** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
+** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
+** or power failure will likely corrupt the database file.
+**
+** SQLite sets the dataOnly flag if the size of the file is unchanged.
+** The idea behind dataOnly is that it should only write the file content
+** to disk, not the inode.  We only set dataOnly if the file size is 
+** unchanged since the file size is part of the inode.  However, 
+** Ted Ts'o tells us that fdatasync() will also write the inode if the
+** file size has changed.  The only real difference between fdatasync()
+** and fsync(), Ted tells us, is that fdatasync() will not flush the
+** inode if the mtime or owner or other inode attributes have changed.
+** We only care about the file size, not the other file attributes, so
+** as far as SQLite is concerned, an fdatasync() is always adequate.
+** So, we always use fdatasync() if it is available, regardless of
+** the value of the dataOnly flag.
+*/
+static int full_fsync(int fd, int fullSync, int dataOnly){
+  int rc;
+
+  /* The following "ifdef/elif/else/" block has the same structure as
+  ** the one below. It is replicated here solely to avoid cluttering 
+  ** up the real code with the UNUSED_PARAMETER() macros.
+  */
+#ifdef SQLITE_NO_SYNC
+  UNUSED_PARAMETER(fd);
+  UNUSED_PARAMETER(fullSync);
+  UNUSED_PARAMETER(dataOnly);
+#elif HAVE_FULLFSYNC
+  UNUSED_PARAMETER(dataOnly);
+#else
+  UNUSED_PARAMETER(fullSync);
+  UNUSED_PARAMETER(dataOnly);
+#endif
+
+  /* Record the number of times that we do a normal fsync() and 
+  ** FULLSYNC.  This is used during testing to verify that this procedure
+  ** gets called with the correct arguments.
+  */
+#ifdef SQLITE_TEST
+  if( fullSync ) sqlite3_fullsync_count++;
+  sqlite3_sync_count++;
+#endif
+
+  /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
+  ** no-op
+  */
+#ifdef SQLITE_NO_SYNC
+  rc = SQLITE_OK;
+#elif HAVE_FULLFSYNC
+  if( fullSync ){
+    rc = osFcntl(fd, F_FULLFSYNC, 0);
+  }else{
+    rc = 1;
+  }
+  /* If the FULLFSYNC failed, fall back to attempting an fsync().
+  ** It shouldn't be possible for fullfsync to fail on the local 
+  ** file system (on OSX), so failure indicates that FULLFSYNC
+  ** isn't supported for this file system. So, attempt an fsync 
+  ** and (for now) ignore the overhead of a superfluous fcntl call.  
+  ** It'd be better to detect fullfsync support once and avoid 
+  ** the fcntl call every time sync is called.
+  */
+  if( rc ) rc = fsync(fd);
+
+#elif defined(__APPLE__)
+  /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly
+  ** so currently we default to the macro that redefines fdatasync to fsync
+  */
+  rc = fsync(fd);
+#else 
+  rc = fdatasync(fd);
+#if OS_VXWORKS
+  if( rc==-1 && errno==ENOTSUP ){
+    rc = fsync(fd);
+  }
+#endif /* OS_VXWORKS */
+#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
+
+  if( OS_VXWORKS && rc!= -1 ){
+    rc = 0;
+  }
+  return rc;
+}
+
+/*
+** Open a file descriptor to the directory containing file zFilename.
+** If successful, *pFd is set to the opened file descriptor and
+** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
+** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
+** value.
+**
+** The directory file descriptor is used for only one thing - to
+** fsync() a directory to make sure file creation and deletion events
+** are flushed to disk.  Such fsyncs are not needed on newer
+** journaling filesystems, but are required on older filesystems.
+**
+** This routine can be overridden using the xSetSysCall interface.
+** The ability to override this routine was added in support of the
+** chromium sandbox.  Opening a directory is a security risk (we are
+** told) so making it overrideable allows the chromium sandbox to
+** replace this routine with a harmless no-op.  To make this routine
+** a no-op, replace it with a stub that returns SQLITE_OK but leaves
+** *pFd set to a negative number.
+**
+** If SQLITE_OK is returned, the caller is responsible for closing
+** the file descriptor *pFd using close().
+*/
+static int openDirectory(const char *zFilename, int *pFd){
+  int ii;
+  int fd = -1;
+  char zDirname[MAX_PATHNAME+1];
+
+  sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
+  for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);
+  if( ii>0 ){
+    zDirname[ii] = '\0';
+    fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
+    if( fd>=0 ){
+      OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
+    }
+  }
+  *pFd = fd;
+  return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));
+}
+
+/*
+** Make sure all writes to a particular file are committed to disk.
+**
+** If dataOnly==0 then both the file itself and its metadata (file
+** size, access time, etc) are synced.  If dataOnly!=0 then only the
+** file data is synced.
+**
+** Under Unix, also make sure that the directory entry for the file
+** has been created by fsync-ing the directory that contains the file.
+** If we do not do this and we encounter a power failure, the directory
+** entry for the journal might not exist after we reboot.  The next
+** SQLite to access the file will not know that the journal exists (because
+** the directory entry for the journal was never created) and the transaction
+** will not roll back - possibly leading to database corruption.
+*/
+static int unixSync(sqlite3_file *id, int flags){
+  int rc;
+  unixFile *pFile = (unixFile*)id;
+
+  int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
+  int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
+
+  /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
+  assert((flags&0x0F)==SQLITE_SYNC_NORMAL
+      || (flags&0x0F)==SQLITE_SYNC_FULL
+  );
+
+  /* Unix cannot, but some systems may return SQLITE_FULL from here. This
+  ** line is to test that doing so does not cause any problems.
+  */
+  SimulateDiskfullError( return SQLITE_FULL );
+
+  assert( pFile );
+  OSTRACE(("SYNC    %-3d\n", pFile->h));
+  rc = full_fsync(pFile->h, isFullsync, isDataOnly);
+  SimulateIOError( rc=1 );
+  if( rc ){
+    pFile->lastErrno = errno;
+    return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath);
+  }
+
+  /* Also fsync the directory containing the file if the DIRSYNC flag
+  ** is set.  This is a one-time occurrence.  Many systems (examples: AIX)
+  ** are unable to fsync a directory, so ignore errors on the fsync.
+  */
+  if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){
+    int dirfd;
+    OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath,
+            HAVE_FULLFSYNC, isFullsync));
+    rc = osOpenDirectory(pFile->zPath, &dirfd);
+    if( rc==SQLITE_OK && dirfd>=0 ){
+      full_fsync(dirfd, 0, 0);
+      robust_close(pFile, dirfd, __LINE__);
+    }else if( rc==SQLITE_CANTOPEN ){
+      rc = SQLITE_OK;
+    }
+    pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC;
+  }
+  return rc;
+}
+
+/*
+** Truncate an open file to a specified size
+*/
+static int unixTruncate(sqlite3_file *id, i64 nByte){
+  unixFile *pFile = (unixFile *)id;
+  int rc;
+  assert( pFile );
+  SimulateIOError( return SQLITE_IOERR_TRUNCATE );
+
+  /* If the user has configured a chunk-size for this file, truncate the
+  ** file so that it consists of an integer number of chunks (i.e. the
+  ** actual file size after the operation may be larger than the requested
+  ** size).
+  */
+  if( pFile->szChunk>0 ){
+    nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
+  }
+
+  rc = robust_ftruncate(pFile->h, nByte);
+  if( rc ){
+    pFile->lastErrno = errno;
+    return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
+  }else{
+#ifdef SQLITE_DEBUG
+    /* If we are doing a normal write to a database file (as opposed to
+    ** doing a hot-journal rollback or a write to some file other than a
+    ** normal database file) and we truncate the file to zero length,
+    ** that effectively updates the change counter.  This might happen
+    ** when restoring a database using the backup API from a zero-length
+    ** source.
+    */
+    if( pFile->inNormalWrite && nByte==0 ){
+      pFile->transCntrChng = 1;
+    }
+#endif
+
+#if SQLITE_MAX_MMAP_SIZE>0
+    /* If the file was just truncated to a size smaller than the currently
+    ** mapped region, reduce the effective mapping size as well. SQLite will
+    ** use read() and write() to access data beyond this point from now on.  
+    */
+    if( nByte<pFile->mmapSize ){
+      pFile->mmapSize = nByte;
+    }
+#endif
+
+    return SQLITE_OK;
+  }
+}
+
+/*
+** Determine the current size of a file in bytes
+*/
+static int unixFileSize(sqlite3_file *id, i64 *pSize){
+  int rc;
+  struct stat buf;
+  assert( id );
+  rc = osFstat(((unixFile*)id)->h, &buf);
+  SimulateIOError( rc=1 );
+  if( rc!=0 ){
+    ((unixFile*)id)->lastErrno = errno;
+    return SQLITE_IOERR_FSTAT;
+  }
+  *pSize = buf.st_size;
+
+  /* When opening a zero-size database, the findInodeInfo() procedure
+  ** writes a single byte into that file in order to work around a bug
+  ** in the OS-X msdos filesystem.  In order to avoid problems with upper
+  ** layers, we need to report this file size as zero even though it is
+  ** really 1.   Ticket #3260.
+  */
+  if( *pSize==1 ) *pSize = 0;
+
+
+  return SQLITE_OK;
+}
+
+#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
+/*
+** Handler for proxy-locking file-control verbs.  Defined below in the
+** proxying locking division.
+*/
+static int proxyFileControl(sqlite3_file*,int,void*);
+#endif
+
+/* 
+** This function is called to handle the SQLITE_FCNTL_SIZE_HINT 
+** file-control operation.  Enlarge the database to nBytes in size
+** (rounded up to the next chunk-size).  If the database is already
+** nBytes or larger, this routine is a no-op.
+*/
+static int fcntlSizeHint(unixFile *pFile, i64 nByte){
+  if( pFile->szChunk>0 ){
+    i64 nSize;                    /* Required file size */
+    struct stat buf;              /* Used to hold return values of fstat() */
+   
+    if( osFstat(pFile->h, &buf) ) return SQLITE_IOERR_FSTAT;
+
+    nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;
+    if( nSize>(i64)buf.st_size ){
+
+#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
+      /* The code below is handling the return value of osFallocate() 
+      ** correctly. posix_fallocate() is defined to "returns zero on success, 
+      ** or an error number on  failure". See the manpage for details. */
+      int err;
+      do{
+        err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size);
+      }while( err==EINTR );
+      if( err ) return SQLITE_IOERR_WRITE;
+#else
+      /* If the OS does not have posix_fallocate(), fake it. Write a 
+      ** single byte to the last byte in each block that falls entirely
+      ** within the extended region. Then, if required, a single byte
+      ** at offset (nSize-1), to set the size of the file correctly.
+      ** This is a similar technique to that used by glibc on systems
+      ** that do not have a real fallocate() call.
+      */
+      int nBlk = buf.st_blksize;  /* File-system block size */
+      int nWrite = 0;             /* Number of bytes written by seekAndWrite */
+      i64 iWrite;                 /* Next offset to write to */
+
+      iWrite = ((buf.st_size + 2*nBlk - 1)/nBlk)*nBlk-1;
+      assert( iWrite>=buf.st_size );
+      assert( (iWrite/nBlk)==((buf.st_size+nBlk-1)/nBlk) );
+      assert( ((iWrite+1)%nBlk)==0 );
+      for(/*no-op*/; iWrite<nSize; iWrite+=nBlk ){
+        nWrite = seekAndWrite(pFile, iWrite, "", 1);
+        if( nWrite!=1 ) return SQLITE_IOERR_WRITE;
+      }
+      if( nWrite==0 || (nSize%nBlk) ){
+        nWrite = seekAndWrite(pFile, nSize-1, "", 1);
+        if( nWrite!=1 ) return SQLITE_IOERR_WRITE;
+      }
+#endif
+    }
+  }
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){
+    int rc;
+    if( pFile->szChunk<=0 ){
+      if( robust_ftruncate(pFile->h, nByte) ){
+        pFile->lastErrno = errno;
+        return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
+      }
+    }
+
+    rc = unixMapfile(pFile, nByte);
+    return rc;
+  }
+#endif
+
+  return SQLITE_OK;
+}
+
+/*
+** If *pArg is initially negative then this is a query.  Set *pArg to
+** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
+**
+** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags.
+*/
+static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){
+  if( *pArg<0 ){
+    *pArg = (pFile->ctrlFlags & mask)!=0;
+  }else if( (*pArg)==0 ){
+    pFile->ctrlFlags &= ~mask;
+  }else{
+    pFile->ctrlFlags |= mask;
+  }
+}
+
+/* Forward declaration */
+static int unixGetTempname(int nBuf, char *zBuf);
+
+/*
+** Information and control of an open file handle.
+*/
+static int unixFileControl(sqlite3_file *id, int op, void *pArg){
+  unixFile *pFile = (unixFile*)id;
+  switch( op ){
+    case SQLITE_FCNTL_LOCKSTATE: {
+      *(int*)pArg = pFile->eFileLock;
+      return SQLITE_OK;
+    }
+    case SQLITE_LAST_ERRNO: {
+      *(int*)pArg = pFile->lastErrno;
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_CHUNK_SIZE: {
+      pFile->szChunk = *(int *)pArg;
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_SIZE_HINT: {
+      int rc;
+      SimulateIOErrorBenign(1);
+      rc = fcntlSizeHint(pFile, *(i64 *)pArg);
+      SimulateIOErrorBenign(0);
+      return rc;
+    }
+    case SQLITE_FCNTL_PERSIST_WAL: {
+      unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg);
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {
+      unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg);
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_VFSNAME: {
+      *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName);
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_TEMPFILENAME: {
+      char *zTFile = sqlite3_malloc( pFile->pVfs->mxPathname );
+      if( zTFile ){
+        unixGetTempname(pFile->pVfs->mxPathname, zTFile);
+        *(char**)pArg = zTFile;
+      }
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_HAS_MOVED: {
+      *(int*)pArg = fileHasMoved(pFile);
+      return SQLITE_OK;
+    }
+#if SQLITE_MAX_MMAP_SIZE>0
+    case SQLITE_FCNTL_MMAP_SIZE: {
+      i64 newLimit = *(i64*)pArg;
+      int rc = SQLITE_OK;
+      if( newLimit>sqlite3GlobalConfig.mxMmap ){
+        newLimit = sqlite3GlobalConfig.mxMmap;
+      }
+      *(i64*)pArg = pFile->mmapSizeMax;
+      if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){
+        pFile->mmapSizeMax = newLimit;
+        if( pFile->mmapSize>0 ){
+          unixUnmapfile(pFile);
+          rc = unixMapfile(pFile, -1);
+        }
+      }
+      return rc;
+    }
+#endif
+#ifdef SQLITE_DEBUG
+    /* The pager calls this method to signal that it has done
+    ** a rollback and that the database is therefore unchanged and
+    ** it hence it is OK for the transaction change counter to be
+    ** unchanged.
+    */
+    case SQLITE_FCNTL_DB_UNCHANGED: {
+      ((unixFile*)id)->dbUpdate = 0;
+      return SQLITE_OK;
+    }
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
+    case SQLITE_SET_LOCKPROXYFILE:
+    case SQLITE_GET_LOCKPROXYFILE: {
+      return proxyFileControl(id,op,pArg);
+    }
+#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
+  }
+  return SQLITE_NOTFOUND;
+}
+
+/*
+** Return the sector size in bytes of the underlying block device for
+** the specified file. This is almost always 512 bytes, but may be
+** larger for some devices.
+**
+** SQLite code assumes this function cannot fail. It also assumes that
+** if two files are created in the same file-system directory (i.e.
+** a database and its journal file) that the sector size will be the
+** same for both.
+*/
+#ifndef __QNXNTO__ 
+static int unixSectorSize(sqlite3_file *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  return SQLITE_DEFAULT_SECTOR_SIZE;
+}
+#endif
+
+/*
+** The following version of unixSectorSize() is optimized for QNX.
+*/
+#ifdef __QNXNTO__
+#include <sys/dcmd_blk.h>
+#include <sys/statvfs.h>
+static int unixSectorSize(sqlite3_file *id){
+  unixFile *pFile = (unixFile*)id;
+  if( pFile->sectorSize == 0 ){
+    struct statvfs fsInfo;
+       
+    /* Set defaults for non-supported filesystems */
+    pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
+    pFile->deviceCharacteristics = 0;
+    if( fstatvfs(pFile->h, &fsInfo) == -1 ) {
+      return pFile->sectorSize;
+    }
+
+    if( !strcmp(fsInfo.f_basetype, "tmp") ) {
+      pFile->sectorSize = fsInfo.f_bsize;
+      pFile->deviceCharacteristics =
+        SQLITE_IOCAP_ATOMIC4K |       /* All ram filesystem writes are atomic */
+        SQLITE_IOCAP_SAFE_APPEND |    /* growing the file does not occur until
+                                      ** the write succeeds */
+        SQLITE_IOCAP_SEQUENTIAL |     /* The ram filesystem has no write behind
+                                      ** so it is ordered */
+        0;
+    }else if( strstr(fsInfo.f_basetype, "etfs") ){
+      pFile->sectorSize = fsInfo.f_bsize;
+      pFile->deviceCharacteristics =
+        /* etfs cluster size writes are atomic */
+        (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) |
+        SQLITE_IOCAP_SAFE_APPEND |    /* growing the file does not occur until
+                                      ** the write succeeds */
+        SQLITE_IOCAP_SEQUENTIAL |     /* The ram filesystem has no write behind
+                                      ** so it is ordered */
+        0;
+    }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){
+      pFile->sectorSize = fsInfo.f_bsize;
+      pFile->deviceCharacteristics =
+        SQLITE_IOCAP_ATOMIC |         /* All filesystem writes are atomic */
+        SQLITE_IOCAP_SAFE_APPEND |    /* growing the file does not occur until
+                                      ** the write succeeds */
+        SQLITE_IOCAP_SEQUENTIAL |     /* The ram filesystem has no write behind
+                                      ** so it is ordered */
+        0;
+    }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){
+      pFile->sectorSize = fsInfo.f_bsize;
+      pFile->deviceCharacteristics =
+        /* full bitset of atomics from max sector size and smaller */
+        ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
+        SQLITE_IOCAP_SEQUENTIAL |     /* The ram filesystem has no write behind
+                                      ** so it is ordered */
+        0;
+    }else if( strstr(fsInfo.f_basetype, "dos") ){
+      pFile->sectorSize = fsInfo.f_bsize;
+      pFile->deviceCharacteristics =
+        /* full bitset of atomics from max sector size and smaller */
+        ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
+        SQLITE_IOCAP_SEQUENTIAL |     /* The ram filesystem has no write behind
+                                      ** so it is ordered */
+        0;
+    }else{
+      pFile->deviceCharacteristics =
+        SQLITE_IOCAP_ATOMIC512 |      /* blocks are atomic */
+        SQLITE_IOCAP_SAFE_APPEND |    /* growing the file does not occur until
+                                      ** the write succeeds */
+        0;
+    }
+  }
+  /* Last chance verification.  If the sector size isn't a multiple of 512
+  ** then it isn't valid.*/
+  if( pFile->sectorSize % 512 != 0 ){
+    pFile->deviceCharacteristics = 0;
+    pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
+  }
+  return pFile->sectorSize;
+}
+#endif /* __QNXNTO__ */
+
+/*
+** Return the device characteristics for the file.
+**
+** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
+** However, that choice is controversial since technically the underlying
+** file system does not always provide powersafe overwrites.  (In other
+** words, after a power-loss event, parts of the file that were never
+** written might end up being altered.)  However, non-PSOW behavior is very,
+** very rare.  And asserting PSOW makes a large reduction in the amount
+** of required I/O for journaling, since a lot of padding is eliminated.
+**  Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
+** available to turn it off and URI query parameter available to turn it off.
+*/
+static int unixDeviceCharacteristics(sqlite3_file *id){
+  unixFile *p = (unixFile*)id;
+  int rc = 0;
+#ifdef __QNXNTO__
+  if( p->sectorSize==0 ) unixSectorSize(id);
+  rc = p->deviceCharacteristics;
+#endif
+  if( p->ctrlFlags & UNIXFILE_PSOW ){
+    rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
+  }
+  return rc;
+}
+
+#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+
+/*
+** Return the system page size.
+**
+** This function should not be called directly by other code in this file. 
+** Instead, it should be called via macro osGetpagesize().
+*/
+static int unixGetpagesize(void){
+#if defined(_BSD_SOURCE)
+  return getpagesize();
+#else
+  return (int)sysconf(_SC_PAGESIZE);
+#endif
+}
+
+#endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */
+
+#ifndef SQLITE_OMIT_WAL
+
+/*
+** Object used to represent an shared memory buffer.  
+**
+** When multiple threads all reference the same wal-index, each thread
+** has its own unixShm object, but they all point to a single instance
+** of this unixShmNode object.  In other words, each wal-index is opened
+** only once per process.
+**
+** Each unixShmNode object is connected to a single unixInodeInfo object.
+** We could coalesce this object into unixInodeInfo, but that would mean
+** every open file that does not use shared memory (in other words, most
+** open files) would have to carry around this extra information.  So
+** the unixInodeInfo object contains a pointer to this unixShmNode object
+** and the unixShmNode object is created only when needed.
+**
+** unixMutexHeld() must be true when creating or destroying
+** this object or while reading or writing the following fields:
+**
+**      nRef
+**
+** The following fields are read-only after the object is created:
+** 
+**      fid
+**      zFilename
+**
+** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and
+** unixMutexHeld() is true when reading or writing any other field
+** in this structure.
+*/
+struct unixShmNode {
+  unixInodeInfo *pInode;     /* unixInodeInfo that owns this SHM node */
+  sqlite3_mutex *mutex;      /* Mutex to access this object */
+  char *zFilename;           /* Name of the mmapped file */
+  int h;                     /* Open file descriptor */
+  int szRegion;              /* Size of shared-memory regions */
+  u16 nRegion;               /* Size of array apRegion */
+  u8 isReadonly;             /* True if read-only */
+  char **apRegion;           /* Array of mapped shared-memory regions */
+  int nRef;                  /* Number of unixShm objects pointing to this */
+  unixShm *pFirst;           /* All unixShm objects pointing to this */
+#ifdef SQLITE_DEBUG
+  u8 exclMask;               /* Mask of exclusive locks held */
+  u8 sharedMask;             /* Mask of shared locks held */
+  u8 nextShmId;              /* Next available unixShm.id value */
+#endif
+};
+
+/*
+** Structure used internally by this VFS to record the state of an
+** open shared memory connection.
+**
+** The following fields are initialized when this object is created and
+** are read-only thereafter:
+**
+**    unixShm.pFile
+**    unixShm.id
+**
+** All other fields are read/write.  The unixShm.pFile->mutex must be held
+** while accessing any read/write fields.
+*/
+struct unixShm {
+  unixShmNode *pShmNode;     /* The underlying unixShmNode object */
+  unixShm *pNext;            /* Next unixShm with the same unixShmNode */
+  u8 hasMutex;               /* True if holding the unixShmNode mutex */
+  u8 id;                     /* Id of this connection within its unixShmNode */
+  u16 sharedMask;            /* Mask of shared locks held */
+  u16 exclMask;              /* Mask of exclusive locks held */
+};
+
+/*
+** Constants used for locking
+*/
+#define UNIX_SHM_BASE   ((22+SQLITE_SHM_NLOCK)*4)         /* first lock byte */
+#define UNIX_SHM_DMS    (UNIX_SHM_BASE+SQLITE_SHM_NLOCK)  /* deadman switch */
+
+/*
+** Apply posix advisory locks for all bytes from ofst through ofst+n-1.
+**
+** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
+** otherwise.
+*/
+static int unixShmSystemLock(
+  unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */
+  int lockType,          /* F_UNLCK, F_RDLCK, or F_WRLCK */
+  int ofst,              /* First byte of the locking range */
+  int n                  /* Number of bytes to lock */
+){
+  struct flock f;       /* The posix advisory locking structure */
+  int rc = SQLITE_OK;   /* Result code form fcntl() */
+
+  /* Access to the unixShmNode object is serialized by the caller */
+  assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );
+
+  /* Shared locks never span more than one byte */
+  assert( n==1 || lockType!=F_RDLCK );
+
+  /* Locks are within range */
+  assert( n>=1 && n<SQLITE_SHM_NLOCK );
+
+  if( pShmNode->h>=0 ){
+    /* Initialize the locking parameters */
+    memset(&f, 0, sizeof(f));
+    f.l_type = lockType;
+    f.l_whence = SEEK_SET;
+    f.l_start = ofst;
+    f.l_len = n;
+
+    rc = osFcntl(pShmNode->h, F_SETLK, &f);
+    rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
+  }
+
+  /* Update the global lock state and do debug tracing */
+#ifdef SQLITE_DEBUG
+  { u16 mask;
+  OSTRACE(("SHM-LOCK "));
+  mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst);
+  if( rc==SQLITE_OK ){
+    if( lockType==F_UNLCK ){
+      OSTRACE(("unlock %d ok", ofst));
+      pShmNode->exclMask &= ~mask;
+      pShmNode->sharedMask &= ~mask;
+    }else if( lockType==F_RDLCK ){
+      OSTRACE(("read-lock %d ok", ofst));
+      pShmNode->exclMask &= ~mask;
+      pShmNode->sharedMask |= mask;
+    }else{
+      assert( lockType==F_WRLCK );
+      OSTRACE(("write-lock %d ok", ofst));
+      pShmNode->exclMask |= mask;
+      pShmNode->sharedMask &= ~mask;
+    }
+  }else{
+    if( lockType==F_UNLCK ){
+      OSTRACE(("unlock %d failed", ofst));
+    }else if( lockType==F_RDLCK ){
+      OSTRACE(("read-lock failed"));
+    }else{
+      assert( lockType==F_WRLCK );
+      OSTRACE(("write-lock %d failed", ofst));
+    }
+  }
+  OSTRACE((" - afterwards %03x,%03x\n",
+           pShmNode->sharedMask, pShmNode->exclMask));
+  }
+#endif
+
+  return rc;        
+}
+
+/*
+** Return the minimum number of 32KB shm regions that should be mapped at
+** a time, assuming that each mapping must be an integer multiple of the
+** current system page-size.
+**
+** Usually, this is 1. The exception seems to be systems that are configured
+** to use 64KB pages - in this case each mapping must cover at least two
+** shm regions.
+*/
+static int unixShmRegionPerMap(void){
+  int shmsz = 32*1024;            /* SHM region size */
+  int pgsz = osGetpagesize();   /* System page size */
+  assert( ((pgsz-1)&pgsz)==0 );   /* Page size must be a power of 2 */
+  if( pgsz<shmsz ) return 1;
+  return pgsz/shmsz;
+}
+
+/*
+** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
+**
+** This is not a VFS shared-memory method; it is a utility function called
+** by VFS shared-memory methods.
+*/
+static void unixShmPurge(unixFile *pFd){
+  unixShmNode *p = pFd->pInode->pShmNode;
+  assert( unixMutexHeld() );
+  if( p && p->nRef==0 ){
+    int nShmPerMap = unixShmRegionPerMap();
+    int i;
+    assert( p->pInode==pFd->pInode );
+    sqlite3_mutex_free(p->mutex);
+    for(i=0; i<p->nRegion; i+=nShmPerMap){
+      if( p->h>=0 ){
+        osMunmap(p->apRegion[i], p->szRegion);
+      }else{
+        sqlite3_free(p->apRegion[i]);
+      }
+    }
+    sqlite3_free(p->apRegion);
+    if( p->h>=0 ){
+      robust_close(pFd, p->h, __LINE__);
+      p->h = -1;
+    }
+    p->pInode->pShmNode = 0;
+    sqlite3_free(p);
+  }
+}
+
+/*
+** Open a shared-memory area associated with open database file pDbFd.  
+** This particular implementation uses mmapped files.
+**
+** The file used to implement shared-memory is in the same directory
+** as the open database file and has the same name as the open database
+** file with the "-shm" suffix added.  For example, if the database file
+** is "/home/user1/config.db" then the file that is created and mmapped
+** for shared memory will be called "/home/user1/config.db-shm".  
+**
+** Another approach to is to use files in /dev/shm or /dev/tmp or an
+** some other tmpfs mount. But if a file in a different directory
+** from the database file is used, then differing access permissions
+** or a chroot() might cause two different processes on the same
+** database to end up using different files for shared memory - 
+** meaning that their memory would not really be shared - resulting
+** in database corruption.  Nevertheless, this tmpfs file usage
+** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm"
+** or the equivalent.  The use of the SQLITE_SHM_DIRECTORY compile-time
+** option results in an incompatible build of SQLite;  builds of SQLite
+** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the
+** same database file at the same time, database corruption will likely
+** result. The SQLITE_SHM_DIRECTORY compile-time option is considered
+** "unsupported" and may go away in a future SQLite release.
+**
+** When opening a new shared-memory file, if no other instances of that
+** file are currently open, in this process or in other processes, then
+** the file must be truncated to zero length or have its header cleared.
+**
+** If the original database file (pDbFd) is using the "unix-excl" VFS
+** that means that an exclusive lock is held on the database file and
+** that no other processes are able to read or write the database.  In
+** that case, we do not really need shared memory.  No shared memory
+** file is created.  The shared memory will be simulated with heap memory.
+*/
+static int unixOpenSharedMemory(unixFile *pDbFd){
+  struct unixShm *p = 0;          /* The connection to be opened */
+  struct unixShmNode *pShmNode;   /* The underlying mmapped file */
+  int rc;                         /* Result code */
+  unixInodeInfo *pInode;          /* The inode of fd */
+  char *zShmFilename;             /* Name of the file used for SHM */
+  int nShmFilename;               /* Size of the SHM filename in bytes */
+
+  /* Allocate space for the new unixShm object. */
+  p = sqlite3_malloc( sizeof(*p) );
+  if( p==0 ) return SQLITE_NOMEM;
+  memset(p, 0, sizeof(*p));
+  assert( pDbFd->pShm==0 );
+
+  /* Check to see if a unixShmNode object already exists. Reuse an existing
+  ** one if present. Create a new one if necessary.
+  */
+  unixEnterMutex();
+  pInode = pDbFd->pInode;
+  pShmNode = pInode->pShmNode;
+  if( pShmNode==0 ){
+    struct stat sStat;                 /* fstat() info for database file */
+
+    /* Call fstat() to figure out the permissions on the database file. If
+    ** a new *-shm file is created, an attempt will be made to create it
+    ** with the same permissions.
+    */
+    if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){
+      rc = SQLITE_IOERR_FSTAT;
+      goto shm_open_err;
+    }
+
+#ifdef SQLITE_SHM_DIRECTORY
+    nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;
+#else
+    nShmFilename = 6 + (int)strlen(pDbFd->zPath);
+#endif
+    pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename );
+    if( pShmNode==0 ){
+      rc = SQLITE_NOMEM;
+      goto shm_open_err;
+    }
+    memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);
+    zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1];
+#ifdef SQLITE_SHM_DIRECTORY
+    sqlite3_snprintf(nShmFilename, zShmFilename, 
+                     SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x",
+                     (u32)sStat.st_ino, (u32)sStat.st_dev);
+#else
+    sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", pDbFd->zPath);
+    sqlite3FileSuffix3(pDbFd->zPath, zShmFilename);
+#endif
+    pShmNode->h = -1;
+    pDbFd->pInode->pShmNode = pShmNode;
+    pShmNode->pInode = pDbFd->pInode;
+    pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
+    if( pShmNode->mutex==0 ){
+      rc = SQLITE_NOMEM;
+      goto shm_open_err;
+    }
+
+    if( pInode->bProcessLock==0 ){
+      int openFlags = O_RDWR | O_CREAT;
+      if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
+        openFlags = O_RDONLY;
+        pShmNode->isReadonly = 1;
+      }
+      pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777));
+      if( pShmNode->h<0 ){
+        rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
+        goto shm_open_err;
+      }
+
+      /* If this process is running as root, make sure that the SHM file
+      ** is owned by the same user that owns the original database.  Otherwise,
+      ** the original owner will not be able to connect.
+      */
+      osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid);
+  
+      /* Check to see if another process is holding the dead-man switch.
+      ** If not, truncate the file to zero length. 
+      */
+      rc = SQLITE_OK;
+      if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
+        if( robust_ftruncate(pShmNode->h, 0) ){
+          rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
+        }
+      }
+      if( rc==SQLITE_OK ){
+        rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
+      }
+      if( rc ) goto shm_open_err;
+    }
+  }
+
+  /* Make the new connection a child of the unixShmNode */
+  p->pShmNode = pShmNode;
+#ifdef SQLITE_DEBUG
+  p->id = pShmNode->nextShmId++;
+#endif
+  pShmNode->nRef++;
+  pDbFd->pShm = p;
+  unixLeaveMutex();
+
+  /* The reference count on pShmNode has already been incremented under
+  ** the cover of the unixEnterMutex() mutex and the pointer from the
+  ** new (struct unixShm) object to the pShmNode has been set. All that is
+  ** left to do is to link the new object into the linked list starting
+  ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex 
+  ** mutex.
+  */
+  sqlite3_mutex_enter(pShmNode->mutex);
+  p->pNext = pShmNode->pFirst;
+  pShmNode->pFirst = p;
+  sqlite3_mutex_leave(pShmNode->mutex);
+  return SQLITE_OK;
+
+  /* Jump here on any error */
+shm_open_err:
+  unixShmPurge(pDbFd);       /* This call frees pShmNode if required */
+  sqlite3_free(p);
+  unixLeaveMutex();
+  return rc;
+}
+
+/*
+** This function is called to obtain a pointer to region iRegion of the 
+** shared-memory associated with the database file fd. Shared-memory regions 
+** are numbered starting from zero. Each shared-memory region is szRegion 
+** bytes in size.
+**
+** If an error occurs, an error code is returned and *pp is set to NULL.
+**
+** Otherwise, if the bExtend parameter is 0 and the requested shared-memory
+** region has not been allocated (by any client, including one running in a
+** separate process), then *pp is set to NULL and SQLITE_OK returned. If 
+** bExtend is non-zero and the requested shared-memory region has not yet 
+** been allocated, it is allocated by this function.
+**
+** If the shared-memory region has already been allocated or is allocated by
+** this call as described above, then it is mapped into this processes 
+** address space (if it is not already), *pp is set to point to the mapped 
+** memory and SQLITE_OK returned.
+*/
+static int unixShmMap(
+  sqlite3_file *fd,               /* Handle open on database file */
+  int iRegion,                    /* Region to retrieve */
+  int szRegion,                   /* Size of regions */
+  int bExtend,                    /* True to extend file if necessary */
+  void volatile **pp              /* OUT: Mapped memory */
+){
+  unixFile *pDbFd = (unixFile*)fd;
+  unixShm *p;
+  unixShmNode *pShmNode;
+  int rc = SQLITE_OK;
+  int nShmPerMap = unixShmRegionPerMap();
+  int nReqRegion;
+
+  /* If the shared-memory file has not yet been opened, open it now. */
+  if( pDbFd->pShm==0 ){
+    rc = unixOpenSharedMemory(pDbFd);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+
+  p = pDbFd->pShm;
+  pShmNode = p->pShmNode;
+  sqlite3_mutex_enter(pShmNode->mutex);
+  assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
+  assert( pShmNode->pInode==pDbFd->pInode );
+  assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
+  assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
+
+  /* Minimum number of regions required to be mapped. */
+  nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;
+
+  if( pShmNode->nRegion<nReqRegion ){
+    char **apNew;                      /* New apRegion[] array */
+    int nByte = nReqRegion*szRegion;   /* Minimum required file size */
+    struct stat sStat;                 /* Used by fstat() */
+
+    pShmNode->szRegion = szRegion;
+
+    if( pShmNode->h>=0 ){
+      /* The requested region is not mapped into this processes address space.
+      ** Check to see if it has been allocated (i.e. if the wal-index file is
+      ** large enough to contain the requested region).
+      */
+      if( osFstat(pShmNode->h, &sStat) ){
+        rc = SQLITE_IOERR_SHMSIZE;
+        goto shmpage_out;
+      }
+  
+      if( sStat.st_size<nByte ){
+        /* The requested memory region does not exist. If bExtend is set to
+        ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
+        */
+        if( !bExtend ){
+          goto shmpage_out;
+        }
+
+        /* Alternatively, if bExtend is true, extend the file. Do this by
+        ** writing a single byte to the end of each (OS) page being
+        ** allocated or extended. Technically, we need only write to the
+        ** last page in order to extend the file. But writing to all new
+        ** pages forces the OS to allocate them immediately, which reduces
+        ** the chances of SIGBUS while accessing the mapped region later on.
+        */
+        else{
+          static const int pgsz = 4096;
+          int iPg;
+
+          /* Write to the last byte of each newly allocated or extended page */
+          assert( (nByte % pgsz)==0 );
+          for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
+            if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){
+              const char *zFile = pShmNode->zFilename;
+              rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);
+              goto shmpage_out;
+            }
+          }
+        }
+      }
+    }
+
+    /* Map the requested memory region into this processes address space. */
+    apNew = (char **)sqlite3_realloc(
+        pShmNode->apRegion, nReqRegion*sizeof(char *)
+    );
+    if( !apNew ){
+      rc = SQLITE_IOERR_NOMEM;
+      goto shmpage_out;
+    }
+    pShmNode->apRegion = apNew;
+    while( pShmNode->nRegion<nReqRegion ){
+      int nMap = szRegion*nShmPerMap;
+      int i;
+      void *pMem;
+      if( pShmNode->h>=0 ){
+        pMem = osMmap(0, nMap,
+            pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 
+            MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
+        );
+        if( pMem==MAP_FAILED ){
+          rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
+          goto shmpage_out;
+        }
+      }else{
+        pMem = sqlite3_malloc(szRegion);
+        if( pMem==0 ){
+          rc = SQLITE_NOMEM;
+          goto shmpage_out;
+        }
+        memset(pMem, 0, szRegion);
+      }
+
+      for(i=0; i<nShmPerMap; i++){
+        pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i];
+      }
+      pShmNode->nRegion += nShmPerMap;
+    }
+  }
+
+shmpage_out:
+  if( pShmNode->nRegion>iRegion ){
+    *pp = pShmNode->apRegion[iRegion];
+  }else{
+    *pp = 0;
+  }
+  if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;
+  sqlite3_mutex_leave(pShmNode->mutex);
+  return rc;
+}
+
+/*
+** Change the lock state for a shared-memory segment.
+**
+** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
+** different here than in posix.  In xShmLock(), one can go from unlocked
+** to shared and back or from unlocked to exclusive and back.  But one may
+** not go from shared to exclusive or from exclusive to shared.
+*/
+static int unixShmLock(
+  sqlite3_file *fd,          /* Database file holding the shared memory */
+  int ofst,                  /* First lock to acquire or release */
+  int n,                     /* Number of locks to acquire or release */
+  int flags                  /* What to do with the lock */
+){
+  unixFile *pDbFd = (unixFile*)fd;      /* Connection holding shared memory */
+  unixShm *p = pDbFd->pShm;             /* The shared memory being locked */
+  unixShm *pX;                          /* For looping over all siblings */
+  unixShmNode *pShmNode = p->pShmNode;  /* The underlying file iNode */
+  int rc = SQLITE_OK;                   /* Result code */
+  u16 mask;                             /* Mask of locks to take or release */
+
+  assert( pShmNode==pDbFd->pInode->pShmNode );
+  assert( pShmNode->pInode==pDbFd->pInode );
+  assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );
+  assert( n>=1 );
+  assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
+       || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
+       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
+       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
+  assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
+  assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
+  assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
+
+  mask = (1<<(ofst+n)) - (1<<ofst);
+  assert( n>1 || mask==(1<<ofst) );
+  sqlite3_mutex_enter(pShmNode->mutex);
+  if( flags & SQLITE_SHM_UNLOCK ){
+    u16 allMask = 0; /* Mask of locks held by siblings */
+
+    /* See if any siblings hold this same lock */
+    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
+      if( pX==p ) continue;
+      assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
+      allMask |= pX->sharedMask;
+    }
+
+    /* Unlock the system-level locks */
+    if( (mask & allMask)==0 ){
+      rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n);
+    }else{
+      rc = SQLITE_OK;
+    }
+
+    /* Undo the local locks */
+    if( rc==SQLITE_OK ){
+      p->exclMask &= ~mask;
+      p->sharedMask &= ~mask;
+    } 
+  }else if( flags & SQLITE_SHM_SHARED ){
+    u16 allShared = 0;  /* Union of locks held by connections other than "p" */
+
+    /* Find out which shared locks are already held by sibling connections.
+    ** If any sibling already holds an exclusive lock, go ahead and return
+    ** SQLITE_BUSY.
+    */
+    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
+      if( (pX->exclMask & mask)!=0 ){
+        rc = SQLITE_BUSY;
+        break;
+      }
+      allShared |= pX->sharedMask;
+    }
+
+    /* Get shared locks at the system level, if necessary */
+    if( rc==SQLITE_OK ){
+      if( (allShared & mask)==0 ){
+        rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n);
+      }else{
+        rc = SQLITE_OK;
+      }
+    }
+
+    /* Get the local shared locks */
+    if( rc==SQLITE_OK ){
+      p->sharedMask |= mask;
+    }
+  }else{
+    /* Make sure no sibling connections hold locks that will block this
+    ** lock.  If any do, return SQLITE_BUSY right away.
+    */
+    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
+      if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){
+        rc = SQLITE_BUSY;
+        break;
+      }
+    }
+  
+    /* Get the exclusive locks at the system level.  Then if successful
+    ** also mark the local connection as being locked.
+    */
+    if( rc==SQLITE_OK ){
+      rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n);
+      if( rc==SQLITE_OK ){
+        assert( (p->sharedMask & mask)==0 );
+        p->exclMask |= mask;
+      }
+    }
+  }
+  sqlite3_mutex_leave(pShmNode->mutex);
+  OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n",
+           p->id, getpid(), p->sharedMask, p->exclMask));
+  return rc;
+}
+
+/*
+** Implement a memory barrier or memory fence on shared memory.  
+**
+** All loads and stores begun before the barrier must complete before
+** any load or store begun after the barrier.
+*/
+static void unixShmBarrier(
+  sqlite3_file *fd                /* Database file holding the shared memory */
+){
+  UNUSED_PARAMETER(fd);
+  unixEnterMutex();
+  unixLeaveMutex();
+}
+
+/*
+** Close a connection to shared-memory.  Delete the underlying 
+** storage if deleteFlag is true.
+**
+** If there is no shared memory associated with the connection then this
+** routine is a harmless no-op.
+*/
+static int unixShmUnmap(
+  sqlite3_file *fd,               /* The underlying database file */
+  int deleteFlag                  /* Delete shared-memory if true */
+){
+  unixShm *p;                     /* The connection to be closed */
+  unixShmNode *pShmNode;          /* The underlying shared-memory file */
+  unixShm **pp;                   /* For looping over sibling connections */
+  unixFile *pDbFd;                /* The underlying database file */
+
+  pDbFd = (unixFile*)fd;
+  p = pDbFd->pShm;
+  if( p==0 ) return SQLITE_OK;
+  pShmNode = p->pShmNode;
+
+  assert( pShmNode==pDbFd->pInode->pShmNode );
+  assert( pShmNode->pInode==pDbFd->pInode );
+
+  /* Remove connection p from the set of connections associated
+  ** with pShmNode */
+  sqlite3_mutex_enter(pShmNode->mutex);
+  for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
+  *pp = p->pNext;
+
+  /* Free the connection p */
+  sqlite3_free(p);
+  pDbFd->pShm = 0;
+  sqlite3_mutex_leave(pShmNode->mutex);
+
+  /* If pShmNode->nRef has reached 0, then close the underlying
+  ** shared-memory file, too */
+  unixEnterMutex();
+  assert( pShmNode->nRef>0 );
+  pShmNode->nRef--;
+  if( pShmNode->nRef==0 ){
+    if( deleteFlag && pShmNode->h>=0 ) osUnlink(pShmNode->zFilename);
+    unixShmPurge(pDbFd);
+  }
+  unixLeaveMutex();
+
+  return SQLITE_OK;
+}
+
+
+#else
+# define unixShmMap     0
+# define unixShmLock    0
+# define unixShmBarrier 0
+# define unixShmUnmap   0
+#endif /* #ifndef SQLITE_OMIT_WAL */
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/*
+** If it is currently memory mapped, unmap file pFd.
+*/
+static void unixUnmapfile(unixFile *pFd){
+  assert( pFd->nFetchOut==0 );
+  if( pFd->pMapRegion ){
+    osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
+    pFd->pMapRegion = 0;
+    pFd->mmapSize = 0;
+    pFd->mmapSizeActual = 0;
+  }
+}
+
+/*
+** Attempt to set the size of the memory mapping maintained by file 
+** descriptor pFd to nNew bytes. Any existing mapping is discarded.
+**
+** If successful, this function sets the following variables:
+**
+**       unixFile.pMapRegion
+**       unixFile.mmapSize
+**       unixFile.mmapSizeActual
+**
+** If unsuccessful, an error message is logged via sqlite3_log() and
+** the three variables above are zeroed. In this case SQLite should
+** continue accessing the database using the xRead() and xWrite()
+** methods.
+*/
+static void unixRemapfile(
+  unixFile *pFd,                  /* File descriptor object */
+  i64 nNew                        /* Required mapping size */
+){
+  const char *zErr = "mmap";
+  int h = pFd->h;                      /* File descriptor open on db file */
+  u8 *pOrig = (u8 *)pFd->pMapRegion;   /* Pointer to current file mapping */
+  i64 nOrig = pFd->mmapSizeActual;     /* Size of pOrig region in bytes */
+  u8 *pNew = 0;                        /* Location of new mapping */
+  int flags = PROT_READ;               /* Flags to pass to mmap() */
+
+  assert( pFd->nFetchOut==0 );
+  assert( nNew>pFd->mmapSize );
+  assert( nNew<=pFd->mmapSizeMax );
+  assert( nNew>0 );
+  assert( pFd->mmapSizeActual>=pFd->mmapSize );
+  assert( MAP_FAILED!=0 );
+
+  if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
+
+  if( pOrig ){
+#if HAVE_MREMAP
+    i64 nReuse = pFd->mmapSize;
+#else
+    const int szSyspage = osGetpagesize();
+    i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
+#endif
+    u8 *pReq = &pOrig[nReuse];
+
+    /* Unmap any pages of the existing mapping that cannot be reused. */
+    if( nReuse!=nOrig ){
+      osMunmap(pReq, nOrig-nReuse);
+    }
+
+#if HAVE_MREMAP
+    pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);
+    zErr = "mremap";
+#else
+    pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
+    if( pNew!=MAP_FAILED ){
+      if( pNew!=pReq ){
+        osMunmap(pNew, nNew - nReuse);
+        pNew = 0;
+      }else{
+        pNew = pOrig;
+      }
+    }
+#endif
+
+    /* The attempt to extend the existing mapping failed. Free it. */
+    if( pNew==MAP_FAILED || pNew==0 ){
+      osMunmap(pOrig, nReuse);
+    }
+  }
+
+  /* If pNew is still NULL, try to create an entirely new mapping. */
+  if( pNew==0 ){
+    pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
+  }
+
+  if( pNew==MAP_FAILED ){
+    pNew = 0;
+    nNew = 0;
+    unixLogError(SQLITE_OK, zErr, pFd->zPath);
+
+    /* If the mmap() above failed, assume that all subsequent mmap() calls
+    ** will probably fail too. Fall back to using xRead/xWrite exclusively
+    ** in this case.  */
+    pFd->mmapSizeMax = 0;
+  }
+  pFd->pMapRegion = (void *)pNew;
+  pFd->mmapSize = pFd->mmapSizeActual = nNew;
+}
+
+/*
+** Memory map or remap the file opened by file-descriptor pFd (if the file
+** is already mapped, the existing mapping is replaced by the new). Or, if 
+** there already exists a mapping for this file, and there are still 
+** outstanding xFetch() references to it, this function is a no-op.
+**
+** If parameter nByte is non-negative, then it is the requested size of 
+** the mapping to create. Otherwise, if nByte is less than zero, then the 
+** requested size is the size of the file on disk. The actual size of the
+** created mapping is either the requested size or the value configured 
+** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
+**
+** SQLITE_OK is returned if no error occurs (even if the mapping is not
+** recreated as a result of outstanding references) or an SQLite error
+** code otherwise.
+*/
+static int unixMapfile(unixFile *pFd, i64 nByte){
+  i64 nMap = nByte;
+  int rc;
+
+  assert( nMap>=0 || pFd->nFetchOut==0 );
+  if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+  if( nMap<0 ){
+    struct stat statbuf;          /* Low-level file information */
+    rc = osFstat(pFd->h, &statbuf);
+    if( rc!=SQLITE_OK ){
+      return SQLITE_IOERR_FSTAT;
+    }
+    nMap = statbuf.st_size;
+  }
+  if( nMap>pFd->mmapSizeMax ){
+    nMap = pFd->mmapSizeMax;
+  }
+
+  if( nMap!=pFd->mmapSize ){
+    if( nMap>0 ){
+      unixRemapfile(pFd, nMap);
+    }else{
+      unixUnmapfile(pFd);
+    }
+  }
+
+  return SQLITE_OK;
+}
+#endif /* SQLITE_MAX_MMAP_SIZE>0 */
+
+/*
+** If possible, return a pointer to a mapping of file fd starting at offset
+** iOff. The mapping must be valid for at least nAmt bytes.
+**
+** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
+** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
+** Finally, if an error does occur, return an SQLite error code. The final
+** value of *pp is undefined in this case.
+**
+** If this function does return a pointer, the caller must eventually 
+** release the reference by calling unixUnfetch().
+*/
+static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+#if SQLITE_MAX_MMAP_SIZE>0
+  unixFile *pFd = (unixFile *)fd;   /* The underlying database file */
+#endif
+  *pp = 0;
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  if( pFd->mmapSizeMax>0 ){
+    if( pFd->pMapRegion==0 ){
+      int rc = unixMapfile(pFd, -1);
+      if( rc!=SQLITE_OK ) return rc;
+    }
+    if( pFd->mmapSize >= iOff+nAmt ){
+      *pp = &((u8 *)pFd->pMapRegion)[iOff];
+      pFd->nFetchOut++;
+    }
+  }
+#endif
+  return SQLITE_OK;
+}
+
+/*
+** If the third argument is non-NULL, then this function releases a 
+** reference obtained by an earlier call to unixFetch(). The second
+** argument passed to this function must be the same as the corresponding
+** argument that was passed to the unixFetch() invocation. 
+**
+** Or, if the third argument is NULL, then this function is being called 
+** to inform the VFS layer that, according to POSIX, any existing mapping 
+** may now be invalid and should be unmapped.
+*/
+static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){
+#if SQLITE_MAX_MMAP_SIZE>0
+  unixFile *pFd = (unixFile *)fd;   /* The underlying database file */
+  UNUSED_PARAMETER(iOff);
+
+  /* If p==0 (unmap the entire file) then there must be no outstanding 
+  ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
+  ** then there must be at least one outstanding.  */
+  assert( (p==0)==(pFd->nFetchOut==0) );
+
+  /* If p!=0, it must match the iOff value. */
+  assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
+
+  if( p ){
+    pFd->nFetchOut--;
+  }else{
+    unixUnmapfile(pFd);
+  }
+
+  assert( pFd->nFetchOut>=0 );
+#else
+  UNUSED_PARAMETER(fd);
+  UNUSED_PARAMETER(p);
+  UNUSED_PARAMETER(iOff);
+#endif
+  return SQLITE_OK;
+}
+
+/*
+** Here ends the implementation of all sqlite3_file methods.
+**
+********************** End sqlite3_file Methods *******************************
+******************************************************************************/
+
+/*
+** This division contains definitions of sqlite3_io_methods objects that
+** implement various file locking strategies.  It also contains definitions
+** of "finder" functions.  A finder-function is used to locate the appropriate
+** sqlite3_io_methods object for a particular database file.  The pAppData
+** field of the sqlite3_vfs VFS objects are initialized to be pointers to
+** the correct finder-function for that VFS.
+**
+** Most finder functions return a pointer to a fixed sqlite3_io_methods
+** object.  The only interesting finder-function is autolockIoFinder, which
+** looks at the filesystem type and tries to guess the best locking
+** strategy from that.
+**
+** For finder-function F, two objects are created:
+**
+**    (1) The real finder-function named "FImpt()".
+**
+**    (2) A constant pointer to this function named just "F".
+**
+**
+** A pointer to the F pointer is used as the pAppData value for VFS
+** objects.  We have to do this instead of letting pAppData point
+** directly at the finder-function since C90 rules prevent a void*
+** from be cast into a function pointer.
+**
+**
+** Each instance of this macro generates two objects:
+**
+**   *  A constant sqlite3_io_methods object call METHOD that has locking
+**      methods CLOSE, LOCK, UNLOCK, CKRESLOCK.
+**
+**   *  An I/O method finder function called FINDER that returns a pointer
+**      to the METHOD object in the previous bullet.
+*/
+#define IOMETHODS(FINDER, METHOD, VERSION, CLOSE, LOCK, UNLOCK, CKLOCK, SHMMAP) \
+static const sqlite3_io_methods METHOD = {                                   \
+   VERSION,                    /* iVersion */                                \
+   CLOSE,                      /* xClose */                                  \
+   unixRead,                   /* xRead */                                   \
+   unixWrite,                  /* xWrite */                                  \
+   unixTruncate,               /* xTruncate */                               \
+   unixSync,                   /* xSync */                                   \
+   unixFileSize,               /* xFileSize */                               \
+   LOCK,                       /* xLock */                                   \
+   UNLOCK,                     /* xUnlock */                                 \
+   CKLOCK,                     /* xCheckReservedLock */                      \
+   unixFileControl,            /* xFileControl */                            \
+   unixSectorSize,             /* xSectorSize */                             \
+   unixDeviceCharacteristics,  /* xDeviceCapabilities */                     \
+   SHMMAP,                     /* xShmMap */                                 \
+   unixShmLock,                /* xShmLock */                                \
+   unixShmBarrier,             /* xShmBarrier */                             \
+   unixShmUnmap,               /* xShmUnmap */                               \
+   unixFetch,                  /* xFetch */                                  \
+   unixUnfetch,                /* xUnfetch */                                \
+};                                                                           \
+static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){   \
+  UNUSED_PARAMETER(z); UNUSED_PARAMETER(p);                                  \
+  return &METHOD;                                                            \
+}                                                                            \
+static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p)    \
+    = FINDER##Impl;
+
+/*
+** Here are all of the sqlite3_io_methods objects for each of the
+** locking strategies.  Functions that return pointers to these methods
+** are also created.
+*/
+IOMETHODS(
+  posixIoFinder,            /* Finder function name */
+  posixIoMethods,           /* sqlite3_io_methods object name */
+  3,                        /* shared memory and mmap are enabled */
+  unixClose,                /* xClose method */
+  unixLock,                 /* xLock method */
+  unixUnlock,               /* xUnlock method */
+  unixCheckReservedLock,    /* xCheckReservedLock method */
+  unixShmMap                /* xShmMap method */
+)
+IOMETHODS(
+  nolockIoFinder,           /* Finder function name */
+  nolockIoMethods,          /* sqlite3_io_methods object name */
+  3,                        /* shared memory is disabled */
+  nolockClose,              /* xClose method */
+  nolockLock,               /* xLock method */
+  nolockUnlock,             /* xUnlock method */
+  nolockCheckReservedLock,  /* xCheckReservedLock method */
+  0                         /* xShmMap method */
+)
+IOMETHODS(
+  dotlockIoFinder,          /* Finder function name */
+  dotlockIoMethods,         /* sqlite3_io_methods object name */
+  1,                        /* shared memory is disabled */
+  dotlockClose,             /* xClose method */
+  dotlockLock,              /* xLock method */
+  dotlockUnlock,            /* xUnlock method */
+  dotlockCheckReservedLock, /* xCheckReservedLock method */
+  0                         /* xShmMap method */
+)
+
+#if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
+IOMETHODS(
+  flockIoFinder,            /* Finder function name */
+  flockIoMethods,           /* sqlite3_io_methods object name */
+  1,                        /* shared memory is disabled */
+  flockClose,               /* xClose method */
+  flockLock,                /* xLock method */
+  flockUnlock,              /* xUnlock method */
+  flockCheckReservedLock,   /* xCheckReservedLock method */
+  0                         /* xShmMap method */
+)
+#endif
+
+#if OS_VXWORKS
+IOMETHODS(
+  semIoFinder,              /* Finder function name */
+  semIoMethods,             /* sqlite3_io_methods object name */
+  1,                        /* shared memory is disabled */
+  semClose,                 /* xClose method */
+  semLock,                  /* xLock method */
+  semUnlock,                /* xUnlock method */
+  semCheckReservedLock,     /* xCheckReservedLock method */
+  0                         /* xShmMap method */
+)
+#endif
+
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+IOMETHODS(
+  afpIoFinder,              /* Finder function name */
+  afpIoMethods,             /* sqlite3_io_methods object name */
+  1,                        /* shared memory is disabled */
+  afpClose,                 /* xClose method */
+  afpLock,                  /* xLock method */
+  afpUnlock,                /* xUnlock method */
+  afpCheckReservedLock,     /* xCheckReservedLock method */
+  0                         /* xShmMap method */
+)
+#endif
+
+/*
+** The proxy locking method is a "super-method" in the sense that it
+** opens secondary file descriptors for the conch and lock files and
+** it uses proxy, dot-file, AFP, and flock() locking methods on those
+** secondary files.  For this reason, the division that implements
+** proxy locking is located much further down in the file.  But we need
+** to go ahead and define the sqlite3_io_methods and finder function
+** for proxy locking here.  So we forward declare the I/O methods.
+*/
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+static int proxyClose(sqlite3_file*);
+static int proxyLock(sqlite3_file*, int);
+static int proxyUnlock(sqlite3_file*, int);
+static int proxyCheckReservedLock(sqlite3_file*, int*);
+IOMETHODS(
+  proxyIoFinder,            /* Finder function name */
+  proxyIoMethods,           /* sqlite3_io_methods object name */
+  1,                        /* shared memory is disabled */
+  proxyClose,               /* xClose method */
+  proxyLock,                /* xLock method */
+  proxyUnlock,              /* xUnlock method */
+  proxyCheckReservedLock,   /* xCheckReservedLock method */
+  0                         /* xShmMap method */
+)
+#endif
+
+/* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+IOMETHODS(
+  nfsIoFinder,               /* Finder function name */
+  nfsIoMethods,              /* sqlite3_io_methods object name */
+  1,                         /* shared memory is disabled */
+  unixClose,                 /* xClose method */
+  unixLock,                  /* xLock method */
+  nfsUnlock,                 /* xUnlock method */
+  unixCheckReservedLock,     /* xCheckReservedLock method */
+  0                          /* xShmMap method */
+)
+#endif
+
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+/* 
+** This "finder" function attempts to determine the best locking strategy 
+** for the database file "filePath".  It then returns the sqlite3_io_methods
+** object that implements that strategy.
+**
+** This is for MacOSX only.
+*/
+static const sqlite3_io_methods *autolockIoFinderImpl(
+  const char *filePath,    /* name of the database file */
+  unixFile *pNew           /* open file object for the database file */
+){
+  static const struct Mapping {
+    const char *zFilesystem;              /* Filesystem type name */
+    const sqlite3_io_methods *pMethods;   /* Appropriate locking method */
+  } aMap[] = {
+    { "hfs",    &posixIoMethods },
+    { "ufs",    &posixIoMethods },
+    { "afpfs",  &afpIoMethods },
+    { "smbfs",  &afpIoMethods },
+    { "webdav", &nolockIoMethods },
+    { 0, 0 }
+  };
+  int i;
+  struct statfs fsInfo;
+  struct flock lockInfo;
+
+  if( !filePath ){
+    /* If filePath==NULL that means we are dealing with a transient file
+    ** that does not need to be locked. */
+    return &nolockIoMethods;
+  }
+  if( statfs(filePath, &fsInfo) != -1 ){
+    if( fsInfo.f_flags & MNT_RDONLY ){
+      return &nolockIoMethods;
+    }
+    for(i=0; aMap[i].zFilesystem; i++){
+      if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
+        return aMap[i].pMethods;
+      }
+    }
+  }
+
+  /* Default case. Handles, amongst others, "nfs".
+  ** Test byte-range lock using fcntl(). If the call succeeds, 
+  ** assume that the file-system supports POSIX style locks. 
+  */
+  lockInfo.l_len = 1;
+  lockInfo.l_start = 0;
+  lockInfo.l_whence = SEEK_SET;
+  lockInfo.l_type = F_RDLCK;
+  if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
+    if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){
+      return &nfsIoMethods;
+    } else {
+      return &posixIoMethods;
+    }
+  }else{
+    return &dotlockIoMethods;
+  }
+}
+static const sqlite3_io_methods 
+  *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl;
+
+#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
+
+#if OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE
+/* 
+** This "finder" function attempts to determine the best locking strategy 
+** for the database file "filePath".  It then returns the sqlite3_io_methods
+** object that implements that strategy.
+**
+** This is for VXWorks only.
+*/
+static const sqlite3_io_methods *autolockIoFinderImpl(
+  const char *filePath,    /* name of the database file */
+  unixFile *pNew           /* the open file object */
+){
+  struct flock lockInfo;
+
+  if( !filePath ){
+    /* If filePath==NULL that means we are dealing with a transient file
+    ** that does not need to be locked. */
+    return &nolockIoMethods;
+  }
+
+  /* Test if fcntl() is supported and use POSIX style locks.
+  ** Otherwise fall back to the named semaphore method.
+  */
+  lockInfo.l_len = 1;
+  lockInfo.l_start = 0;
+  lockInfo.l_whence = SEEK_SET;
+  lockInfo.l_type = F_RDLCK;
+  if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
+    return &posixIoMethods;
+  }else{
+    return &semIoMethods;
+  }
+}
+static const sqlite3_io_methods 
+  *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl;
+
+#endif /* OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE */
+
+/*
+** An abstract type for a pointer to an IO method finder function:
+*/
+typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
+
+
+/****************************************************************************
+**************************** sqlite3_vfs methods ****************************
+**
+** This division contains the implementation of methods on the
+** sqlite3_vfs object.
+*/
+
+/*
+** Initialize the contents of the unixFile structure pointed to by pId.
+*/
+static int fillInUnixFile(
+  sqlite3_vfs *pVfs,      /* Pointer to vfs object */
+  int h,                  /* Open file descriptor of file being opened */
+  sqlite3_file *pId,      /* Write to the unixFile structure here */
+  const char *zFilename,  /* Name of the file being opened */
+  int ctrlFlags           /* Zero or more UNIXFILE_* values */
+){
+  const sqlite3_io_methods *pLockingStyle;
+  unixFile *pNew = (unixFile *)pId;
+  int rc = SQLITE_OK;
+
+  assert( pNew->pInode==NULL );
+
+  /* Usually the path zFilename should not be a relative pathname. The
+  ** exception is when opening the proxy "conch" file in builds that
+  ** include the special Apple locking styles.
+  */
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+  assert( zFilename==0 || zFilename[0]=='/' 
+    || pVfs->pAppData==(void*)&autolockIoFinder );
+#else
+  assert( zFilename==0 || zFilename[0]=='/' );
+#endif
+
+  /* No locking occurs in temporary files */
+  assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );
+
+  OSTRACE(("OPEN    %-3d %s\n", h, zFilename));
+  pNew->h = h;
+  pNew->pVfs = pVfs;
+  pNew->zPath = zFilename;
+  pNew->ctrlFlags = (u8)ctrlFlags;
+#if SQLITE_MAX_MMAP_SIZE>0
+  pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
+#endif
+  if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
+                           "psow", SQLITE_POWERSAFE_OVERWRITE) ){
+    pNew->ctrlFlags |= UNIXFILE_PSOW;
+  }
+  if( strcmp(pVfs->zName,"unix-excl")==0 ){
+    pNew->ctrlFlags |= UNIXFILE_EXCL;
+  }
+
+#if OS_VXWORKS
+  pNew->pId = vxworksFindFileId(zFilename);
+  if( pNew->pId==0 ){
+    ctrlFlags |= UNIXFILE_NOLOCK;
+    rc = SQLITE_NOMEM;
+  }
+#endif
+
+  if( ctrlFlags & UNIXFILE_NOLOCK ){
+    pLockingStyle = &nolockIoMethods;
+  }else{
+    pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew);
+#if SQLITE_ENABLE_LOCKING_STYLE
+    /* Cache zFilename in the locking context (AFP and dotlock override) for
+    ** proxyLock activation is possible (remote proxy is based on db name)
+    ** zFilename remains valid until file is closed, to support */
+    pNew->lockingContext = (void*)zFilename;
+#endif
+  }
+
+  if( pLockingStyle == &posixIoMethods
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+    || pLockingStyle == &nfsIoMethods
+#endif
+  ){
+    unixEnterMutex();
+    rc = findInodeInfo(pNew, &pNew->pInode);
+    if( rc!=SQLITE_OK ){
+      /* If an error occurred in findInodeInfo(), close the file descriptor
+      ** immediately, before releasing the mutex. findInodeInfo() may fail
+      ** in two scenarios:
+      **
+      **   (a) A call to fstat() failed.
+      **   (b) A malloc failed.
+      **
+      ** Scenario (b) may only occur if the process is holding no other
+      ** file descriptors open on the same file. If there were other file
+      ** descriptors on this file, then no malloc would be required by
+      ** findInodeInfo(). If this is the case, it is quite safe to close
+      ** handle h - as it is guaranteed that no posix locks will be released
+      ** by doing so.
+      **
+      ** If scenario (a) caused the error then things are not so safe. The
+      ** implicit assumption here is that if fstat() fails, things are in
+      ** such bad shape that dropping a lock or two doesn't matter much.
+      */
+      robust_close(pNew, h, __LINE__);
+      h = -1;
+    }
+    unixLeaveMutex();
+  }
+
+#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
+  else if( pLockingStyle == &afpIoMethods ){
+    /* AFP locking uses the file path so it needs to be included in
+    ** the afpLockingContext.
+    */
+    afpLockingContext *pCtx;
+    pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) );
+    if( pCtx==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      /* NB: zFilename exists and remains valid until the file is closed
+      ** according to requirement F11141.  So we do not need to make a
+      ** copy of the filename. */
+      pCtx->dbPath = zFilename;
+      pCtx->reserved = 0;
+      srandomdev();
+      unixEnterMutex();
+      rc = findInodeInfo(pNew, &pNew->pInode);
+      if( rc!=SQLITE_OK ){
+        sqlite3_free(pNew->lockingContext);
+        robust_close(pNew, h, __LINE__);
+        h = -1;
+      }
+      unixLeaveMutex();        
+    }
+  }
+#endif
+
+  else if( pLockingStyle == &dotlockIoMethods ){
+    /* Dotfile locking uses the file path so it needs to be included in
+    ** the dotlockLockingContext 
+    */
+    char *zLockFile;
+    int nFilename;
+    assert( zFilename!=0 );
+    nFilename = (int)strlen(zFilename) + 6;
+    zLockFile = (char *)sqlite3_malloc(nFilename);
+    if( zLockFile==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename);
+    }
+    pNew->lockingContext = zLockFile;
+  }
+
+#if OS_VXWORKS
+  else if( pLockingStyle == &semIoMethods ){
+    /* Named semaphore locking uses the file path so it needs to be
+    ** included in the semLockingContext
+    */
+    unixEnterMutex();
+    rc = findInodeInfo(pNew, &pNew->pInode);
+    if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){
+      char *zSemName = pNew->pInode->aSemName;
+      int n;
+      sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem",
+                       pNew->pId->zCanonicalName);
+      for( n=1; zSemName[n]; n++ )
+        if( zSemName[n]=='/' ) zSemName[n] = '_';
+      pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1);
+      if( pNew->pInode->pSem == SEM_FAILED ){
+        rc = SQLITE_NOMEM;
+        pNew->pInode->aSemName[0] = '\0';
+      }
+    }
+    unixLeaveMutex();
+  }
+#endif
+  
+  pNew->lastErrno = 0;
+#if OS_VXWORKS
+  if( rc!=SQLITE_OK ){
+    if( h>=0 ) robust_close(pNew, h, __LINE__);
+    h = -1;
+    osUnlink(zFilename);
+    pNew->ctrlFlags |= UNIXFILE_DELETE;
+  }
+#endif
+  if( rc!=SQLITE_OK ){
+    if( h>=0 ) robust_close(pNew, h, __LINE__);
+  }else{
+    pNew->pMethod = pLockingStyle;
+    OpenCounter(+1);
+    verifyDbFile(pNew);
+  }
+  return rc;
+}
+
+/*
+** Return the name of a directory in which to put temporary files.
+** If no suitable temporary file directory can be found, return NULL.
+*/
+static const char *unixTempFileDir(void){
+  static const char *azDirs[] = {
+     0,
+     0,
+     0,
+     "/var/tmp",
+     "/usr/tmp",
+     "/tmp",
+     0        /* List terminator */
+  };
+  unsigned int i;
+  struct stat buf;
+  const char *zDir = 0;
+
+  azDirs[0] = sqlite3_temp_directory;
+  if( !azDirs[1] ) azDirs[1] = getenv("SQLITE_TMPDIR");
+  if( !azDirs[2] ) azDirs[2] = getenv("TMPDIR");
+  for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){
+    if( zDir==0 ) continue;
+    if( osStat(zDir, &buf) ) continue;
+    if( !S_ISDIR(buf.st_mode) ) continue;
+    if( osAccess(zDir, 07) ) continue;
+    break;
+  }
+  return zDir;
+}
+
+/*
+** Create a temporary file name in zBuf.  zBuf must be allocated
+** by the calling process and must be big enough to hold at least
+** pVfs->mxPathname bytes.
+*/
+static int unixGetTempname(int nBuf, char *zBuf){
+  static const unsigned char zChars[] =
+    "abcdefghijklmnopqrstuvwxyz"
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    "0123456789";
+  unsigned int i, j;
+  const char *zDir;
+
+  /* It's odd to simulate an io-error here, but really this is just
+  ** using the io-error infrastructure to test that SQLite handles this
+  ** function failing. 
+  */
+  SimulateIOError( return SQLITE_IOERR );
+
+  zDir = unixTempFileDir();
+  if( zDir==0 ) zDir = ".";
+
+  /* Check that the output buffer is large enough for the temporary file 
+  ** name. If it is not, return SQLITE_ERROR.
+  */
+  if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 18) >= (size_t)nBuf ){
+    return SQLITE_ERROR;
+  }
+
+  do{
+    sqlite3_snprintf(nBuf-18, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
+    j = (int)strlen(zBuf);
+    sqlite3_randomness(15, &zBuf[j]);
+    for(i=0; i<15; i++, j++){
+      zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
+    }
+    zBuf[j] = 0;
+    zBuf[j+1] = 0;
+  }while( osAccess(zBuf,0)==0 );
+  return SQLITE_OK;
+}
+
+#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
+/*
+** Routine to transform a unixFile into a proxy-locking unixFile.
+** Implementation in the proxy-lock division, but used by unixOpen()
+** if SQLITE_PREFER_PROXY_LOCKING is defined.
+*/
+static int proxyTransformUnixFile(unixFile*, const char*);
+#endif
+
+/*
+** Search for an unused file descriptor that was opened on the database 
+** file (not a journal or master-journal file) identified by pathname
+** zPath with SQLITE_OPEN_XXX flags matching those passed as the second
+** argument to this function.
+**
+** Such a file descriptor may exist if a database connection was closed
+** but the associated file descriptor could not be closed because some
+** other file descriptor open on the same file is holding a file-lock.
+** Refer to comments in the unixClose() function and the lengthy comment
+** describing "Posix Advisory Locking" at the start of this file for 
+** further details. Also, ticket #4018.
+**
+** If a suitable file descriptor is found, then it is returned. If no
+** such file descriptor is located, -1 is returned.
+*/
+static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
+  UnixUnusedFd *pUnused = 0;
+
+  /* Do not search for an unused file descriptor on vxworks. Not because
+  ** vxworks would not benefit from the change (it might, we're not sure),
+  ** but because no way to test it is currently available. It is better 
+  ** not to risk breaking vxworks support for the sake of such an obscure 
+  ** feature.  */
+#if !OS_VXWORKS
+  struct stat sStat;                   /* Results of stat() call */
+
+  /* A stat() call may fail for various reasons. If this happens, it is
+  ** almost certain that an open() call on the same path will also fail.
+  ** For this reason, if an error occurs in the stat() call here, it is
+  ** ignored and -1 is returned. The caller will try to open a new file
+  ** descriptor on the same path, fail, and return an error to SQLite.
+  **
+  ** Even if a subsequent open() call does succeed, the consequences of
+  ** not searching for a reusable file descriptor are not dire.  */
+  if( 0==osStat(zPath, &sStat) ){
+    unixInodeInfo *pInode;
+
+    unixEnterMutex();
+    pInode = inodeList;
+    while( pInode && (pInode->fileId.dev!=sStat.st_dev
+                     || pInode->fileId.ino!=sStat.st_ino) ){
+       pInode = pInode->pNext;
+    }
+    if( pInode ){
+      UnixUnusedFd **pp;
+      for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext));
+      pUnused = *pp;
+      if( pUnused ){
+        *pp = pUnused->pNext;
+      }
+    }
+    unixLeaveMutex();
+  }
+#endif    /* if !OS_VXWORKS */
+  return pUnused;
+}
+
+/*
+** This function is called by unixOpen() to determine the unix permissions
+** to create new files with. If no error occurs, then SQLITE_OK is returned
+** and a value suitable for passing as the third argument to open(2) is
+** written to *pMode. If an IO error occurs, an SQLite error code is 
+** returned and the value of *pMode is not modified.
+**
+** In most cases, this routine sets *pMode to 0, which will become
+** an indication to robust_open() to create the file using
+** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask.
+** But if the file being opened is a WAL or regular journal file, then 
+** this function queries the file-system for the permissions on the 
+** corresponding database file and sets *pMode to this value. Whenever 
+** possible, WAL and journal files are created using the same permissions 
+** as the associated database file.
+**
+** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the
+** original filename is unavailable.  But 8_3_NAMES is only used for
+** FAT filesystems and permissions do not matter there, so just use
+** the default permissions.
+*/
+static int findCreateFileMode(
+  const char *zPath,              /* Path of file (possibly) being created */
+  int flags,                      /* Flags passed as 4th argument to xOpen() */
+  mode_t *pMode,                  /* OUT: Permissions to open file with */
+  uid_t *pUid,                    /* OUT: uid to set on the file */
+  gid_t *pGid                     /* OUT: gid to set on the file */
+){
+  int rc = SQLITE_OK;             /* Return Code */
+  *pMode = 0;
+  *pUid = 0;
+  *pGid = 0;
+  if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
+    char zDb[MAX_PATHNAME+1];     /* Database file path */
+    int nDb;                      /* Number of valid bytes in zDb */
+    struct stat sStat;            /* Output of stat() on database file */
+
+    /* zPath is a path to a WAL or journal file. The following block derives
+    ** the path to the associated database file from zPath. This block handles
+    ** the following naming conventions:
+    **
+    **   "<path to db>-journal"
+    **   "<path to db>-wal"
+    **   "<path to db>-journalNN"
+    **   "<path to db>-walNN"
+    **
+    ** where NN is a decimal number. The NN naming schemes are 
+    ** used by the test_multiplex.c module.
+    */
+    nDb = sqlite3Strlen30(zPath) - 1; 
+#ifdef SQLITE_ENABLE_8_3_NAMES
+    while( nDb>0 && sqlite3Isalnum(zPath[nDb]) ) nDb--;
+    if( nDb==0 || zPath[nDb]!='-' ) return SQLITE_OK;
+#else
+    while( zPath[nDb]!='-' ){
+      assert( nDb>0 );
+      assert( zPath[nDb]!='\n' );
+      nDb--;
+    }
+#endif
+    memcpy(zDb, zPath, nDb);
+    zDb[nDb] = '\0';
+
+    if( 0==osStat(zDb, &sStat) ){
+      *pMode = sStat.st_mode & 0777;
+      *pUid = sStat.st_uid;
+      *pGid = sStat.st_gid;
+    }else{
+      rc = SQLITE_IOERR_FSTAT;
+    }
+  }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){
+    *pMode = 0600;
+  }
+  return rc;
+}
+
+/*
+** Open the file zPath.
+** 
+** Previously, the SQLite OS layer used three functions in place of this
+** one:
+**
+**     sqlite3OsOpenReadWrite();
+**     sqlite3OsOpenReadOnly();
+**     sqlite3OsOpenExclusive();
+**
+** These calls correspond to the following combinations of flags:
+**
+**     ReadWrite() ->     (READWRITE | CREATE)
+**     ReadOnly()  ->     (READONLY) 
+**     OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
+**
+** The old OpenExclusive() accepted a boolean argument - "delFlag". If
+** true, the file was configured to be automatically deleted when the
+** file handle closed. To achieve the same effect using this new 
+** interface, add the DELETEONCLOSE flag to those specified above for 
+** OpenExclusive().
+*/
+static int unixOpen(
+  sqlite3_vfs *pVfs,           /* The VFS for which this is the xOpen method */
+  const char *zPath,           /* Pathname of file to be opened */
+  sqlite3_file *pFile,         /* The file descriptor to be filled in */
+  int flags,                   /* Input flags to control the opening */
+  int *pOutFlags               /* Output flags returned to SQLite core */
+){
+  unixFile *p = (unixFile *)pFile;
+  int fd = -1;                   /* File descriptor returned by open() */
+  int openFlags = 0;             /* Flags to pass to open() */
+  int eType = flags&0xFFFFFF00;  /* Type of file to open */
+  int noLock;                    /* True to omit locking primitives */
+  int rc = SQLITE_OK;            /* Function Return Code */
+  int ctrlFlags = 0;             /* UNIXFILE_* flags */
+
+  int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
+  int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
+  int isCreate     = (flags & SQLITE_OPEN_CREATE);
+  int isReadonly   = (flags & SQLITE_OPEN_READONLY);
+  int isReadWrite  = (flags & SQLITE_OPEN_READWRITE);
+#if SQLITE_ENABLE_LOCKING_STYLE
+  int isAutoProxy  = (flags & SQLITE_OPEN_AUTOPROXY);
+#endif
+#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
+  struct statfs fsInfo;
+#endif
+
+  /* If creating a master or main-file journal, this function will open
+  ** a file-descriptor on the directory too. The first time unixSync()
+  ** is called the directory file descriptor will be fsync()ed and close()d.
+  */
+  int syncDir = (isCreate && (
+        eType==SQLITE_OPEN_MASTER_JOURNAL 
+     || eType==SQLITE_OPEN_MAIN_JOURNAL 
+     || eType==SQLITE_OPEN_WAL
+  ));
+
+  /* If argument zPath is a NULL pointer, this function is required to open
+  ** a temporary file. Use this buffer to store the file name in.
+  */
+  char zTmpname[MAX_PATHNAME+2];
+  const char *zName = zPath;
+
+  /* Check the following statements are true: 
+  **
+  **   (a) Exactly one of the READWRITE and READONLY flags must be set, and 
+  **   (b) if CREATE is set, then READWRITE must also be set, and
+  **   (c) if EXCLUSIVE is set, then CREATE must also be set.
+  **   (d) if DELETEONCLOSE is set, then CREATE must also be set.
+  */
+  assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
+  assert(isCreate==0 || isReadWrite);
+  assert(isExclusive==0 || isCreate);
+  assert(isDelete==0 || isCreate);
+
+  /* The main DB, main journal, WAL file and master journal are never 
+  ** automatically deleted. Nor are they ever temporary files.  */
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB );
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL );
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL );
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL );
+
+  /* Assert that the upper layer has set one of the "file-type" flags. */
+  assert( eType==SQLITE_OPEN_MAIN_DB      || eType==SQLITE_OPEN_TEMP_DB 
+       || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL 
+       || eType==SQLITE_OPEN_SUBJOURNAL   || eType==SQLITE_OPEN_MASTER_JOURNAL 
+       || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL
+  );
+
+  /* Detect a pid change and reset the PRNG.  There is a race condition
+  ** here such that two or more threads all trying to open databases at
+  ** the same instant might all reset the PRNG.  But multiple resets
+  ** are harmless.
+  */
+  if( randomnessPid!=getpid() ){
+    randomnessPid = getpid();
+    sqlite3_randomness(0,0);
+  }
+
+  memset(p, 0, sizeof(unixFile));
+
+  if( eType==SQLITE_OPEN_MAIN_DB ){
+    UnixUnusedFd *pUnused;
+    pUnused = findReusableFd(zName, flags);
+    if( pUnused ){
+      fd = pUnused->fd;
+    }else{
+      pUnused = sqlite3_malloc(sizeof(*pUnused));
+      if( !pUnused ){
+        return SQLITE_NOMEM;
+      }
+    }
+    p->pUnused = pUnused;
+
+    /* Database filenames are double-zero terminated if they are not
+    ** URIs with parameters.  Hence, they can always be passed into
+    ** sqlite3_uri_parameter(). */
+    assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 );
+
+  }else if( !zName ){
+    /* If zName is NULL, the upper layer is requesting a temp file. */
+    assert(isDelete && !syncDir);
+    rc = unixGetTempname(MAX_PATHNAME+2, zTmpname);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+    zName = zTmpname;
+
+    /* Generated temporary filenames are always double-zero terminated
+    ** for use by sqlite3_uri_parameter(). */
+    assert( zName[strlen(zName)+1]==0 );
+  }
+
+  /* Determine the value of the flags parameter passed to POSIX function
+  ** open(). These must be calculated even if open() is not called, as
+  ** they may be stored as part of the file handle and used by the 
+  ** 'conch file' locking functions later on.  */
+  if( isReadonly )  openFlags |= O_RDONLY;
+  if( isReadWrite ) openFlags |= O_RDWR;
+  if( isCreate )    openFlags |= O_CREAT;
+  if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
+  openFlags |= (O_LARGEFILE|O_BINARY);
+
+  if( fd<0 ){
+    mode_t openMode;              /* Permissions to create file with */
+    uid_t uid;                    /* Userid for the file */
+    gid_t gid;                    /* Groupid for the file */
+    rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid);
+    if( rc!=SQLITE_OK ){
+      assert( !p->pUnused );
+      assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL );
+      return rc;
+    }
+    fd = robust_open(zName, openFlags, openMode);
+    OSTRACE(("OPENX   %-3d %s 0%o\n", fd, zName, openFlags));
+    if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
+      /* Failed to open the file for read/write access. Try read-only. */
+      flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
+      openFlags &= ~(O_RDWR|O_CREAT);
+      flags |= SQLITE_OPEN_READONLY;
+      openFlags |= O_RDONLY;
+      isReadonly = 1;
+      fd = robust_open(zName, openFlags, openMode);
+    }
+    if( fd<0 ){
+      rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName);
+      goto open_finished;
+    }
+
+    /* If this process is running as root and if creating a new rollback
+    ** journal or WAL file, set the ownership of the journal or WAL to be
+    ** the same as the original database.
+    */
+    if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
+      osFchown(fd, uid, gid);
+    }
+  }
+  assert( fd>=0 );
+  if( pOutFlags ){
+    *pOutFlags = flags;
+  }
+
+  if( p->pUnused ){
+    p->pUnused->fd = fd;
+    p->pUnused->flags = flags;
+  }
+
+  if( isDelete ){
+#if OS_VXWORKS
+    zPath = zName;
+#elif defined(SQLITE_UNLINK_AFTER_CLOSE)
+    zPath = sqlite3_mprintf("%s", zName);
+    if( zPath==0 ){
+      robust_close(p, fd, __LINE__);
+      return SQLITE_NOMEM;
+    }
+#else
+    osUnlink(zName);
+#endif
+  }
+#if SQLITE_ENABLE_LOCKING_STYLE
+  else{
+    p->openFlags = openFlags;
+  }
+#endif
+
+  noLock = eType!=SQLITE_OPEN_MAIN_DB;
+
+  
+#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
+  if( fstatfs(fd, &fsInfo) == -1 ){
+    ((unixFile*)pFile)->lastErrno = errno;
+    robust_close(p, fd, __LINE__);
+    return SQLITE_IOERR_ACCESS;
+  }
+  if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) {
+    ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
+  }
+#endif
+
+  /* Set up appropriate ctrlFlags */
+  if( isDelete )                ctrlFlags |= UNIXFILE_DELETE;
+  if( isReadonly )              ctrlFlags |= UNIXFILE_RDONLY;
+  if( noLock )                  ctrlFlags |= UNIXFILE_NOLOCK;
+  if( syncDir )                 ctrlFlags |= UNIXFILE_DIRSYNC;
+  if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI;
+
+#if SQLITE_ENABLE_LOCKING_STYLE
+#if SQLITE_PREFER_PROXY_LOCKING
+  isAutoProxy = 1;
+#endif
+  if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){
+    char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
+    int useProxy = 0;
+
+    /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 
+    ** never use proxy, NULL means use proxy for non-local files only.  */
+    if( envforce!=NULL ){
+      useProxy = atoi(envforce)>0;
+    }else{
+      if( statfs(zPath, &fsInfo) == -1 ){
+        /* In theory, the close(fd) call is sub-optimal. If the file opened
+        ** with fd is a database file, and there are other connections open
+        ** on that file that are currently holding advisory locks on it,
+        ** then the call to close() will cancel those locks. In practice,
+        ** we're assuming that statfs() doesn't fail very often. At least
+        ** not while other file descriptors opened by the same process on
+        ** the same file are working.  */
+        p->lastErrno = errno;
+        robust_close(p, fd, __LINE__);
+        rc = SQLITE_IOERR_ACCESS;
+        goto open_finished;
+      }
+      useProxy = !(fsInfo.f_flags&MNT_LOCAL);
+    }
+    if( useProxy ){
+      rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
+      if( rc==SQLITE_OK ){
+        rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
+        if( rc!=SQLITE_OK ){
+          /* Use unixClose to clean up the resources added in fillInUnixFile 
+          ** and clear all the structure's references.  Specifically, 
+          ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op 
+          */
+          unixClose(pFile);
+          return rc;
+        }
+      }
+      goto open_finished;
+    }
+  }
+#endif
+  
+  rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
+
+open_finished:
+  if( rc!=SQLITE_OK ){
+    sqlite3_free(p->pUnused);
+  }
+  return rc;
+}
+
+
+/*
+** Delete the file at zPath. If the dirSync argument is true, fsync()
+** the directory after deleting the file.
+*/
+static int unixDelete(
+  sqlite3_vfs *NotUsed,     /* VFS containing this as the xDelete method */
+  const char *zPath,        /* Name of file to be deleted */
+  int dirSync               /* If true, fsync() directory after deleting file */
+){
+  int rc = SQLITE_OK;
+  UNUSED_PARAMETER(NotUsed);
+  SimulateIOError(return SQLITE_IOERR_DELETE);
+  if( osUnlink(zPath)==(-1) ){
+    if( errno==ENOENT
+#if OS_VXWORKS
+        || osAccess(zPath,0)!=0
+#endif
+    ){
+      rc = SQLITE_IOERR_DELETE_NOENT;
+    }else{
+      rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
+    }
+    return rc;
+  }
+#ifndef SQLITE_DISABLE_DIRSYNC
+  if( (dirSync & 1)!=0 ){
+    int fd;
+    rc = osOpenDirectory(zPath, &fd);
+    if( rc==SQLITE_OK ){
+#if OS_VXWORKS
+      if( fsync(fd)==-1 )
+#else
+      if( fsync(fd) )
+#endif
+      {
+        rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath);
+      }
+      robust_close(0, fd, __LINE__);
+    }else if( rc==SQLITE_CANTOPEN ){
+      rc = SQLITE_OK;
+    }
+  }
+#endif
+  return rc;
+}
+
+/*
+** Test the existence of or access permissions of file zPath. The
+** test performed depends on the value of flags:
+**
+**     SQLITE_ACCESS_EXISTS: Return 1 if the file exists
+**     SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
+**     SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
+**
+** Otherwise return 0.
+*/
+static int unixAccess(
+  sqlite3_vfs *NotUsed,   /* The VFS containing this xAccess method */
+  const char *zPath,      /* Path of the file to examine */
+  int flags,              /* What do we want to learn about the zPath file? */
+  int *pResOut            /* Write result boolean here */
+){
+  int amode = 0;
+  UNUSED_PARAMETER(NotUsed);
+  SimulateIOError( return SQLITE_IOERR_ACCESS; );
+  switch( flags ){
+    case SQLITE_ACCESS_EXISTS:
+      amode = F_OK;
+      break;
+    case SQLITE_ACCESS_READWRITE:
+      amode = W_OK|R_OK;
+      break;
+    case SQLITE_ACCESS_READ:
+      amode = R_OK;
+      break;
+
+    default:
+      assert(!"Invalid flags argument");
+  }
+  *pResOut = (osAccess(zPath, amode)==0);
+  if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){
+    struct stat buf;
+    if( 0==osStat(zPath, &buf) && buf.st_size==0 ){
+      *pResOut = 0;
+    }
+  }
+  return SQLITE_OK;
+}
+
+
+/*
+** Turn a relative pathname into a full pathname. The relative path
+** is stored as a nul-terminated string in the buffer pointed to by
+** zPath. 
+**
+** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes 
+** (in this case, MAX_PATHNAME bytes). The full-path is written to
+** this buffer before returning.
+*/
+static int unixFullPathname(
+  sqlite3_vfs *pVfs,            /* Pointer to vfs object */
+  const char *zPath,            /* Possibly relative input path */
+  int nOut,                     /* Size of output buffer in bytes */
+  char *zOut                    /* Output buffer */
+){
+
+  /* It's odd to simulate an io-error here, but really this is just
+  ** using the io-error infrastructure to test that SQLite handles this
+  ** function failing. This function could fail if, for example, the
+  ** current working directory has been unlinked.
+  */
+  SimulateIOError( return SQLITE_ERROR );
+
+  assert( pVfs->mxPathname==MAX_PATHNAME );
+  UNUSED_PARAMETER(pVfs);
+
+  zOut[nOut-1] = '\0';
+  if( zPath[0]=='/' ){
+    sqlite3_snprintf(nOut, zOut, "%s", zPath);
+  }else{
+    int nCwd;
+    if( osGetcwd(zOut, nOut-1)==0 ){
+      return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath);
+    }
+    nCwd = (int)strlen(zOut);
+    sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);
+  }
+  return SQLITE_OK;
+}
+
+
+#ifndef SQLITE_OMIT_LOAD_EXTENSION
+/*
+** Interfaces for opening a shared library, finding entry points
+** within the shared library, and closing the shared library.
+*/
+#include <dlfcn.h>
+static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){
+  UNUSED_PARAMETER(NotUsed);
+  return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
+}
+
+/*
+** SQLite calls this function immediately after a call to unixDlSym() or
+** unixDlOpen() fails (returns a null pointer). If a more detailed error
+** message is available, it is written to zBufOut. If no error message
+** is available, zBufOut is left unmodified and SQLite uses a default
+** error message.
+*/
+static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){
+  const char *zErr;
+  UNUSED_PARAMETER(NotUsed);
+  unixEnterMutex();
+  zErr = dlerror();
+  if( zErr ){
+    sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
+  }
+  unixLeaveMutex();
+}
+static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){
+  /* 
+  ** GCC with -pedantic-errors says that C90 does not allow a void* to be
+  ** cast into a pointer to a function.  And yet the library dlsym() routine
+  ** returns a void* which is really a pointer to a function.  So how do we
+  ** use dlsym() with -pedantic-errors?
+  **
+  ** Variable x below is defined to be a pointer to a function taking
+  ** parameters void* and const char* and returning a pointer to a function.
+  ** We initialize x by assigning it a pointer to the dlsym() function.
+  ** (That assignment requires a cast.)  Then we call the function that
+  ** x points to.  
+  **
+  ** This work-around is unlikely to work correctly on any system where
+  ** you really cannot cast a function pointer into void*.  But then, on the
+  ** other hand, dlsym() will not work on such a system either, so we have
+  ** not really lost anything.
+  */
+  void (*(*x)(void*,const char*))(void);
+  UNUSED_PARAMETER(NotUsed);
+  x = (void(*(*)(void*,const char*))(void))dlsym;
+  return (*x)(p, zSym);
+}
+static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){
+  UNUSED_PARAMETER(NotUsed);
+  dlclose(pHandle);
+}
+#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
+  #define unixDlOpen  0
+  #define unixDlError 0
+  #define unixDlSym   0
+  #define unixDlClose 0
+#endif
+
+/*
+** Write nBuf bytes of random data to the supplied buffer zBuf.
+*/
+static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){
+  UNUSED_PARAMETER(NotUsed);
+  assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int)));
+
+  /* We have to initialize zBuf to prevent valgrind from reporting
+  ** errors.  The reports issued by valgrind are incorrect - we would
+  ** prefer that the randomness be increased by making use of the
+  ** uninitialized space in zBuf - but valgrind errors tend to worry
+  ** some users.  Rather than argue, it seems easier just to initialize
+  ** the whole array and silence valgrind, even if that means less randomness
+  ** in the random seed.
+  **
+  ** When testing, initializing zBuf[] to zero is all we do.  That means
+  ** that we always use the same random number sequence.  This makes the
+  ** tests repeatable.
+  */
+  memset(zBuf, 0, nBuf);
+  randomnessPid = getpid();  
+#if !defined(SQLITE_TEST)
+  {
+    int fd, got;
+    fd = robust_open("/dev/urandom", O_RDONLY, 0);
+    if( fd<0 ){
+      time_t t;
+      time(&t);
+      memcpy(zBuf, &t, sizeof(t));
+      memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid));
+      assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf );
+      nBuf = sizeof(t) + sizeof(randomnessPid);
+    }else{
+      do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR );
+      robust_close(0, fd, __LINE__);
+    }
+  }
+#endif
+  return nBuf;
+}
+
+
+/*
+** Sleep for a little while.  Return the amount of time slept.
+** The argument is the number of microseconds we want to sleep.
+** The return value is the number of microseconds of sleep actually
+** requested from the underlying operating system, a number which
+** might be greater than or equal to the argument, but not less
+** than the argument.
+*/
+static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){
+#if OS_VXWORKS
+  struct timespec sp;
+
+  sp.tv_sec = microseconds / 1000000;
+  sp.tv_nsec = (microseconds % 1000000) * 1000;
+  nanosleep(&sp, NULL);
+  UNUSED_PARAMETER(NotUsed);
+  return microseconds;
+#elif defined(HAVE_USLEEP) && HAVE_USLEEP
+  usleep(microseconds);
+  UNUSED_PARAMETER(NotUsed);
+  return microseconds;
+#else
+  int seconds = (microseconds+999999)/1000000;
+  sleep(seconds);
+  UNUSED_PARAMETER(NotUsed);
+  return seconds*1000000;
+#endif
+}
+
+/*
+** The following variable, if set to a non-zero value, is interpreted as
+** the number of seconds since 1970 and is used to set the result of
+** sqlite3OsCurrentTime() during testing.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_current_time = 0;  /* Fake system time in seconds since 1970. */
+#endif
+
+/*
+** Find the current time (in Universal Coordinated Time).  Write into *piNow
+** the current time and date as a Julian Day number times 86_400_000.  In
+** other words, write into *piNow the number of milliseconds since the Julian
+** epoch of noon in Greenwich on November 24, 4714 B.C according to the
+** proleptic Gregorian calendar.
+**
+** On success, return SQLITE_OK.  Return SQLITE_ERROR if the time and date 
+** cannot be found.
+*/
+static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
+  static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
+  int rc = SQLITE_OK;
+#if defined(NO_GETTOD)
+  time_t t;
+  time(&t);
+  *piNow = ((sqlite3_int64)t)*1000 + unixEpoch;
+#elif OS_VXWORKS
+  struct timespec sNow;
+  clock_gettime(CLOCK_REALTIME, &sNow);
+  *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000;
+#else
+  struct timeval sNow;
+  if( gettimeofday(&sNow, 0)==0 ){
+    *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
+  }else{
+    rc = SQLITE_ERROR;
+  }
+#endif
+
+#ifdef SQLITE_TEST
+  if( sqlite3_current_time ){
+    *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch;
+  }
+#endif
+  UNUSED_PARAMETER(NotUsed);
+  return rc;
+}
+
+/*
+** Find the current time (in Universal Coordinated Time).  Write the
+** current time and date as a Julian Day number into *prNow and
+** return 0.  Return 1 if the time and date cannot be found.
+*/
+static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){
+  sqlite3_int64 i = 0;
+  int rc;
+  UNUSED_PARAMETER(NotUsed);
+  rc = unixCurrentTimeInt64(0, &i);
+  *prNow = i/86400000.0;
+  return rc;
+}
+
+/*
+** We added the xGetLastError() method with the intention of providing
+** better low-level error messages when operating-system problems come up
+** during SQLite operation.  But so far, none of that has been implemented
+** in the core.  So this routine is never called.  For now, it is merely
+** a place-holder.
+*/
+static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
+  UNUSED_PARAMETER(NotUsed);
+  UNUSED_PARAMETER(NotUsed2);
+  UNUSED_PARAMETER(NotUsed3);
+  return 0;
+}
+
+
+/*
+************************ End of sqlite3_vfs methods ***************************
+******************************************************************************/
+
+/******************************************************************************
+************************** Begin Proxy Locking ********************************
+**
+** Proxy locking is a "uber-locking-method" in this sense:  It uses the
+** other locking methods on secondary lock files.  Proxy locking is a
+** meta-layer over top of the primitive locking implemented above.  For
+** this reason, the division that implements of proxy locking is deferred
+** until late in the file (here) after all of the other I/O methods have
+** been defined - so that the primitive locking methods are available
+** as services to help with the implementation of proxy locking.
+**
+****
+**
+** The default locking schemes in SQLite use byte-range locks on the
+** database file to coordinate safe, concurrent access by multiple readers
+** and writers [http://sqlite.org/lockingv3.html].  The five file locking
+** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented
+** as POSIX read & write locks over fixed set of locations (via fsctl),
+** on AFP and SMB only exclusive byte-range locks are available via fsctl
+** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states.
+** To simulate a F_RDLCK on the shared range, on AFP a randomly selected
+** address in the shared range is taken for a SHARED lock, the entire
+** shared range is taken for an EXCLUSIVE lock):
+**
+**      PENDING_BYTE        0x40000000
+**      RESERVED_BYTE       0x40000001
+**      SHARED_RANGE        0x40000002 -> 0x40000200
+**
+** This works well on the local file system, but shows a nearly 100x
+** slowdown in read performance on AFP because the AFP client disables
+** the read cache when byte-range locks are present.  Enabling the read
+** cache exposes a cache coherency problem that is present on all OS X
+** supported network file systems.  NFS and AFP both observe the
+** close-to-open semantics for ensuring cache coherency
+** [http://nfs.sourceforge.net/#faq_a8], which does not effectively
+** address the requirements for concurrent database access by multiple
+** readers and writers
+** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html].
+**
+** To address the performance and cache coherency issues, proxy file locking
+** changes the way database access is controlled by limiting access to a
+** single host at a time and moving file locks off of the database file
+** and onto a proxy file on the local file system.  
+**
+**
+** Using proxy locks
+** -----------------
+**
+** C APIs
+**
+**  sqlite3_file_control(db, dbname, SQLITE_SET_LOCKPROXYFILE,
+**                       <proxy_path> | ":auto:");
+**  sqlite3_file_control(db, dbname, SQLITE_GET_LOCKPROXYFILE, &<proxy_path>);
+**
+**
+** SQL pragmas
+**
+**  PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto:
+**  PRAGMA [database.]lock_proxy_file
+**
+** Specifying ":auto:" means that if there is a conch file with a matching
+** host ID in it, the proxy path in the conch file will be used, otherwise
+** a proxy path based on the user's temp dir
+** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the
+** actual proxy file name is generated from the name and path of the
+** database file.  For example:
+**
+**       For database path "/Users/me/foo.db" 
+**       The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:")
+**
+** Once a lock proxy is configured for a database connection, it can not
+** be removed, however it may be switched to a different proxy path via
+** the above APIs (assuming the conch file is not being held by another
+** connection or process). 
+**
+**
+** How proxy locking works
+** -----------------------
+**
+** Proxy file locking relies primarily on two new supporting files: 
+**
+**   *  conch file to limit access to the database file to a single host
+**      at a time
+**
+**   *  proxy file to act as a proxy for the advisory locks normally
+**      taken on the database
+**
+** The conch file - to use a proxy file, sqlite must first "hold the conch"
+** by taking an sqlite-style shared lock on the conch file, reading the
+** contents and comparing the host's unique host ID (see below) and lock
+** proxy path against the values stored in the conch.  The conch file is
+** stored in the same directory as the database file and the file name
+** is patterned after the database file name as ".<databasename>-conch".
+** If the conch file does not exist, or its contents do not match the
+** host ID and/or proxy path, then the lock is escalated to an exclusive
+** lock and the conch file contents is updated with the host ID and proxy
+** path and the lock is downgraded to a shared lock again.  If the conch
+** is held by another process (with a shared lock), the exclusive lock
+** will fail and SQLITE_BUSY is returned.
+**
+** The proxy file - a single-byte file used for all advisory file locks
+** normally taken on the database file.   This allows for safe sharing
+** of the database file for multiple readers and writers on the same
+** host (the conch ensures that they all use the same local lock file).
+**
+** Requesting the lock proxy does not immediately take the conch, it is
+** only taken when the first request to lock database file is made.  
+** This matches the semantics of the traditional locking behavior, where
+** opening a connection to a database file does not take a lock on it.
+** The shared lock and an open file descriptor are maintained until 
+** the connection to the database is closed. 
+**
+** The proxy file and the lock file are never deleted so they only need
+** to be created the first time they are used.
+**
+** Configuration options
+** ---------------------
+**
+**  SQLITE_PREFER_PROXY_LOCKING
+**
+**       Database files accessed on non-local file systems are
+**       automatically configured for proxy locking, lock files are
+**       named automatically using the same logic as
+**       PRAGMA lock_proxy_file=":auto:"
+**    
+**  SQLITE_PROXY_DEBUG
+**
+**       Enables the logging of error messages during host id file
+**       retrieval and creation
+**
+**  LOCKPROXYDIR
+**
+**       Overrides the default directory used for lock proxy files that
+**       are named automatically via the ":auto:" setting
+**
+**  SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
+**
+**       Permissions to use when creating a directory for storing the
+**       lock proxy files, only used when LOCKPROXYDIR is not set.
+**    
+**    
+** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING,
+** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will
+** force proxy locking to be used for every database file opened, and 0
+** will force automatic proxy locking to be disabled for all database
+** files (explicitly calling the SQLITE_SET_LOCKPROXYFILE pragma or
+** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).
+*/
+
+/*
+** Proxy locking is only available on MacOSX 
+*/
+#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
+
+/*
+** The proxyLockingContext has the path and file structures for the remote 
+** and local proxy files in it
+*/
+typedef struct proxyLockingContext proxyLockingContext;
+struct proxyLockingContext {
+  unixFile *conchFile;         /* Open conch file */
+  char *conchFilePath;         /* Name of the conch file */
+  unixFile *lockProxy;         /* Open proxy lock file */
+  char *lockProxyPath;         /* Name of the proxy lock file */
+  char *dbPath;                /* Name of the open file */
+  int conchHeld;               /* 1 if the conch is held, -1 if lockless */
+  void *oldLockingContext;     /* Original lockingcontext to restore on close */
+  sqlite3_io_methods const *pOldMethod;     /* Original I/O methods for close */
+};
+
+/* 
+** The proxy lock file path for the database at dbPath is written into lPath, 
+** which must point to valid, writable memory large enough for a maxLen length
+** file path. 
+*/
+static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){
+  int len;
+  int dbLen;
+  int i;
+
+#ifdef LOCKPROXYDIR
+  len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
+#else
+# ifdef _CS_DARWIN_USER_TEMP_DIR
+  {
+    if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){
+      OSTRACE(("GETLOCKPATH  failed %s errno=%d pid=%d\n",
+               lPath, errno, getpid()));
+      return SQLITE_IOERR_LOCK;
+    }
+    len = strlcat(lPath, "sqliteplocks", maxLen);    
+  }
+# else
+  len = strlcpy(lPath, "/tmp/", maxLen);
+# endif
+#endif
+
+  if( lPath[len-1]!='/' ){
+    len = strlcat(lPath, "/", maxLen);
+  }
+  
+  /* transform the db path to a unique cache name */
+  dbLen = (int)strlen(dbPath);
+  for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){
+    char c = dbPath[i];
+    lPath[i+len] = (c=='/')?'_':c;
+  }
+  lPath[i+len]='\0';
+  strlcat(lPath, ":auto:", maxLen);
+  OSTRACE(("GETLOCKPATH  proxy lock path=%s pid=%d\n", lPath, getpid()));
+  return SQLITE_OK;
+}
+
+/* 
+ ** Creates the lock file and any missing directories in lockPath
+ */
+static int proxyCreateLockPath(const char *lockPath){
+  int i, len;
+  char buf[MAXPATHLEN];
+  int start = 0;
+  
+  assert(lockPath!=NULL);
+  /* try to create all the intermediate directories */
+  len = (int)strlen(lockPath);
+  buf[0] = lockPath[0];
+  for( i=1; i<len; i++ ){
+    if( lockPath[i] == '/' && (i - start > 0) ){
+      /* only mkdir if leaf dir != "." or "/" or ".." */
+      if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') 
+         || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){
+        buf[i]='\0';
+        if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
+          int err=errno;
+          if( err!=EEXIST ) {
+            OSTRACE(("CREATELOCKPATH  FAILED creating %s, "
+                     "'%s' proxy lock path=%s pid=%d\n",
+                     buf, strerror(err), lockPath, getpid()));
+            return err;
+          }
+        }
+      }
+      start=i+1;
+    }
+    buf[i] = lockPath[i];
+  }
+  OSTRACE(("CREATELOCKPATH  proxy lock path=%s pid=%d\n", lockPath, getpid()));
+  return 0;
+}
+
+/*
+** Create a new VFS file descriptor (stored in memory obtained from
+** sqlite3_malloc) and open the file named "path" in the file descriptor.
+**
+** The caller is responsible not only for closing the file descriptor
+** but also for freeing the memory associated with the file descriptor.
+*/
+static int proxyCreateUnixFile(
+    const char *path,        /* path for the new unixFile */
+    unixFile **ppFile,       /* unixFile created and returned by ref */
+    int islockfile           /* if non zero missing dirs will be created */
+) {
+  int fd = -1;
+  unixFile *pNew;
+  int rc = SQLITE_OK;
+  int openFlags = O_RDWR | O_CREAT;
+  sqlite3_vfs dummyVfs;
+  int terrno = 0;
+  UnixUnusedFd *pUnused = NULL;
+
+  /* 1. first try to open/create the file
+  ** 2. if that fails, and this is a lock file (not-conch), try creating
+  ** the parent directories and then try again.
+  ** 3. if that fails, try to open the file read-only
+  ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file
+  */
+  pUnused = findReusableFd(path, openFlags);
+  if( pUnused ){
+    fd = pUnused->fd;
+  }else{
+    pUnused = sqlite3_malloc(sizeof(*pUnused));
+    if( !pUnused ){
+      return SQLITE_NOMEM;
+    }
+  }
+  if( fd<0 ){
+    fd = robust_open(path, openFlags, 0);
+    terrno = errno;
+    if( fd<0 && errno==ENOENT && islockfile ){
+      if( proxyCreateLockPath(path) == SQLITE_OK ){
+        fd = robust_open(path, openFlags, 0);
+      }
+    }
+  }
+  if( fd<0 ){
+    openFlags = O_RDONLY;
+    fd = robust_open(path, openFlags, 0);
+    terrno = errno;
+  }
+  if( fd<0 ){
+    if( islockfile ){
+      return SQLITE_BUSY;
+    }
+    switch (terrno) {
+      case EACCES:
+        return SQLITE_PERM;
+      case EIO: 
+        return SQLITE_IOERR_LOCK; /* even though it is the conch */
+      default:
+        return SQLITE_CANTOPEN_BKPT;
+    }
+  }
+  
+  pNew = (unixFile *)sqlite3_malloc(sizeof(*pNew));
+  if( pNew==NULL ){
+    rc = SQLITE_NOMEM;
+    goto end_create_proxy;
+  }
+  memset(pNew, 0, sizeof(unixFile));
+  pNew->openFlags = openFlags;
+  memset(&dummyVfs, 0, sizeof(dummyVfs));
+  dummyVfs.pAppData = (void*)&autolockIoFinder;
+  dummyVfs.zName = "dummy";
+  pUnused->fd = fd;
+  pUnused->flags = openFlags;
+  pNew->pUnused = pUnused;
+  
+  rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0);
+  if( rc==SQLITE_OK ){
+    *ppFile = pNew;
+    return SQLITE_OK;
+  }
+end_create_proxy:    
+  robust_close(pNew, fd, __LINE__);
+  sqlite3_free(pNew);
+  sqlite3_free(pUnused);
+  return rc;
+}
+
+#ifdef SQLITE_TEST
+/* simulate multiple hosts by creating unique hostid file paths */
+SQLITE_API int sqlite3_hostid_num = 0;
+#endif
+
+#define PROXY_HOSTIDLEN    16  /* conch file host id length */
+
+/* Not always defined in the headers as it ought to be */
+extern int gethostuuid(uuid_t id, const struct timespec *wait);
+
+/* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN 
+** bytes of writable memory.
+*/
+static int proxyGetHostID(unsigned char *pHostID, int *pError){
+  assert(PROXY_HOSTIDLEN == sizeof(uuid_t));
+  memset(pHostID, 0, PROXY_HOSTIDLEN);
+#if defined(__MAX_OS_X_VERSION_MIN_REQUIRED)\
+               && __MAC_OS_X_VERSION_MIN_REQUIRED<1050
+  {
+    static const struct timespec timeout = {1, 0}; /* 1 sec timeout */
+    if( gethostuuid(pHostID, &timeout) ){
+      int err = errno;
+      if( pError ){
+        *pError = err;
+      }
+      return SQLITE_IOERR;
+    }
+  }
+#else
+  UNUSED_PARAMETER(pError);
+#endif
+#ifdef SQLITE_TEST
+  /* simulate multiple hosts by creating unique hostid file paths */
+  if( sqlite3_hostid_num != 0){
+    pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF));
+  }
+#endif
+  
+  return SQLITE_OK;
+}
+
+/* The conch file contains the header, host id and lock file path
+ */
+#define PROXY_CONCHVERSION 2   /* 1-byte header, 16-byte host id, path */
+#define PROXY_HEADERLEN    1   /* conch file header length */
+#define PROXY_PATHINDEX    (PROXY_HEADERLEN+PROXY_HOSTIDLEN)
+#define PROXY_MAXCONCHLEN  (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN)
+
+/* 
+** Takes an open conch file, copies the contents to a new path and then moves 
+** it back.  The newly created file's file descriptor is assigned to the
+** conch file structure and finally the original conch file descriptor is 
+** closed.  Returns zero if successful.
+*/
+static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){
+  proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 
+  unixFile *conchFile = pCtx->conchFile;
+  char tPath[MAXPATHLEN];
+  char buf[PROXY_MAXCONCHLEN];
+  char *cPath = pCtx->conchFilePath;
+  size_t readLen = 0;
+  size_t pathLen = 0;
+  char errmsg[64] = "";
+  int fd = -1;
+  int rc = -1;
+  UNUSED_PARAMETER(myHostID);
+
+  /* create a new path by replace the trailing '-conch' with '-break' */
+  pathLen = strlcpy(tPath, cPath, MAXPATHLEN);
+  if( pathLen>MAXPATHLEN || pathLen<6 || 
+     (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){
+    sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen);
+    goto end_breaklock;
+  }
+  /* read the conch content */
+  readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0);
+  if( readLen<PROXY_PATHINDEX ){
+    sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen);
+    goto end_breaklock;
+  }
+  /* write it out to the temporary break file */
+  fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0);
+  if( fd<0 ){
+    sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno);
+    goto end_breaklock;
+  }
+  if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){
+    sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno);
+    goto end_breaklock;
+  }
+  if( rename(tPath, cPath) ){
+    sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno);
+    goto end_breaklock;
+  }
+  rc = 0;
+  fprintf(stderr, "broke stale lock on %s\n", cPath);
+  robust_close(pFile, conchFile->h, __LINE__);
+  conchFile->h = fd;
+  conchFile->openFlags = O_RDWR | O_CREAT;
+
+end_breaklock:
+  if( rc ){
+    if( fd>=0 ){
+      osUnlink(tPath);
+      robust_close(pFile, fd, __LINE__);
+    }
+    fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg);
+  }
+  return rc;
+}
+
+/* Take the requested lock on the conch file and break a stale lock if the 
+** host id matches.
+*/
+static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){
+  proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 
+  unixFile *conchFile = pCtx->conchFile;
+  int rc = SQLITE_OK;
+  int nTries = 0;
+  struct timespec conchModTime;
+  
+  memset(&conchModTime, 0, sizeof(conchModTime));
+  do {
+    rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
+    nTries ++;
+    if( rc==SQLITE_BUSY ){
+      /* If the lock failed (busy):
+       * 1st try: get the mod time of the conch, wait 0.5s and try again. 
+       * 2nd try: fail if the mod time changed or host id is different, wait 
+       *           10 sec and try again
+       * 3rd try: break the lock unless the mod time has changed.
+       */
+      struct stat buf;
+      if( osFstat(conchFile->h, &buf) ){
+        pFile->lastErrno = errno;
+        return SQLITE_IOERR_LOCK;
+      }
+      
+      if( nTries==1 ){
+        conchModTime = buf.st_mtimespec;
+        usleep(500000); /* wait 0.5 sec and try the lock again*/
+        continue;  
+      }
+
+      assert( nTries>1 );
+      if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || 
+         conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){
+        return SQLITE_BUSY;
+      }
+      
+      if( nTries==2 ){  
+        char tBuf[PROXY_MAXCONCHLEN];
+        int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0);
+        if( len<0 ){
+          pFile->lastErrno = errno;
+          return SQLITE_IOERR_LOCK;
+        }
+        if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){
+          /* don't break the lock if the host id doesn't match */
+          if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){
+            return SQLITE_BUSY;
+          }
+        }else{
+          /* don't break the lock on short read or a version mismatch */
+          return SQLITE_BUSY;
+        }
+        usleep(10000000); /* wait 10 sec and try the lock again */
+        continue; 
+      }
+      
+      assert( nTries==3 );
+      if( 0==proxyBreakConchLock(pFile, myHostID) ){
+        rc = SQLITE_OK;
+        if( lockType==EXCLUSIVE_LOCK ){
+          rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK);          
+        }
+        if( !rc ){
+          rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
+        }
+      }
+    }
+  } while( rc==SQLITE_BUSY && nTries<3 );
+  
+  return rc;
+}
+
+/* Takes the conch by taking a shared lock and read the contents conch, if 
+** lockPath is non-NULL, the host ID and lock file path must match.  A NULL 
+** lockPath means that the lockPath in the conch file will be used if the 
+** host IDs match, or a new lock path will be generated automatically 
+** and written to the conch file.
+*/
+static int proxyTakeConch(unixFile *pFile){
+  proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; 
+  
+  if( pCtx->conchHeld!=0 ){
+    return SQLITE_OK;
+  }else{
+    unixFile *conchFile = pCtx->conchFile;
+    uuid_t myHostID;
+    int pError = 0;
+    char readBuf[PROXY_MAXCONCHLEN];
+    char lockPath[MAXPATHLEN];
+    char *tempLockPath = NULL;
+    int rc = SQLITE_OK;
+    int createConch = 0;
+    int hostIdMatch = 0;
+    int readLen = 0;
+    int tryOldLockPath = 0;
+    int forceNewLockPath = 0;
+    
+    OSTRACE(("TAKECONCH  %d for %s pid=%d\n", conchFile->h,
+             (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), getpid()));
+
+    rc = proxyGetHostID(myHostID, &pError);
+    if( (rc&0xff)==SQLITE_IOERR ){
+      pFile->lastErrno = pError;
+      goto end_takeconch;
+    }
+    rc = proxyConchLock(pFile, myHostID, SHARED_LOCK);
+    if( rc!=SQLITE_OK ){
+      goto end_takeconch;
+    }
+    /* read the existing conch file */
+    readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN);
+    if( readLen<0 ){
+      /* I/O error: lastErrno set by seekAndRead */
+      pFile->lastErrno = conchFile->lastErrno;
+      rc = SQLITE_IOERR_READ;
+      goto end_takeconch;
+    }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || 
+             readBuf[0]!=(char)PROXY_CONCHVERSION ){
+      /* a short read or version format mismatch means we need to create a new 
+      ** conch file. 
+      */
+      createConch = 1;
+    }
+    /* if the host id matches and the lock path already exists in the conch
+    ** we'll try to use the path there, if we can't open that path, we'll 
+    ** retry with a new auto-generated path 
+    */
+    do { /* in case we need to try again for an :auto: named lock file */
+
+      if( !createConch && !forceNewLockPath ){
+        hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, 
+                                  PROXY_HOSTIDLEN);
+        /* if the conch has data compare the contents */
+        if( !pCtx->lockProxyPath ){
+          /* for auto-named local lock file, just check the host ID and we'll
+           ** use the local lock file path that's already in there
+           */
+          if( hostIdMatch ){
+            size_t pathLen = (readLen - PROXY_PATHINDEX);
+            
+            if( pathLen>=MAXPATHLEN ){
+              pathLen=MAXPATHLEN-1;
+            }
+            memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen);
+            lockPath[pathLen] = 0;
+            tempLockPath = lockPath;
+            tryOldLockPath = 1;
+            /* create a copy of the lock path if the conch is taken */
+            goto end_takeconch;
+          }
+        }else if( hostIdMatch
+               && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX],
+                           readLen-PROXY_PATHINDEX)
+        ){
+          /* conch host and lock path match */
+          goto end_takeconch; 
+        }
+      }
+      
+      /* if the conch isn't writable and doesn't match, we can't take it */
+      if( (conchFile->openFlags&O_RDWR) == 0 ){
+        rc = SQLITE_BUSY;
+        goto end_takeconch;
+      }
+      
+      /* either the conch didn't match or we need to create a new one */
+      if( !pCtx->lockProxyPath ){
+        proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN);
+        tempLockPath = lockPath;
+        /* create a copy of the lock path _only_ if the conch is taken */
+      }
+      
+      /* update conch with host and path (this will fail if other process
+      ** has a shared lock already), if the host id matches, use the big
+      ** stick.
+      */
+      futimes(conchFile->h, NULL);
+      if( hostIdMatch && !createConch ){
+        if( conchFile->pInode && conchFile->pInode->nShared>1 ){
+          /* We are trying for an exclusive lock but another thread in this
+           ** same process is still holding a shared lock. */
+          rc = SQLITE_BUSY;
+        } else {          
+          rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);
+        }
+      }else{
+        rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, EXCLUSIVE_LOCK);
+      }
+      if( rc==SQLITE_OK ){
+        char writeBuffer[PROXY_MAXCONCHLEN];
+        int writeSize = 0;
+        
+        writeBuffer[0] = (char)PROXY_CONCHVERSION;
+        memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN);
+        if( pCtx->lockProxyPath!=NULL ){
+          strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, MAXPATHLEN);
+        }else{
+          strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN);
+        }
+        writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]);
+        robust_ftruncate(conchFile->h, writeSize);
+        rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0);
+        fsync(conchFile->h);
+        /* If we created a new conch file (not just updated the contents of a 
+         ** valid conch file), try to match the permissions of the database 
+         */
+        if( rc==SQLITE_OK && createConch ){
+          struct stat buf;
+          int err = osFstat(pFile->h, &buf);
+          if( err==0 ){
+            mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP |
+                                        S_IROTH|S_IWOTH);
+            /* try to match the database file R/W permissions, ignore failure */
+#ifndef SQLITE_PROXY_DEBUG
+            osFchmod(conchFile->h, cmode);
+#else
+            do{
+              rc = osFchmod(conchFile->h, cmode);
+            }while( rc==(-1) && errno==EINTR );
+            if( rc!=0 ){
+              int code = errno;
+              fprintf(stderr, "fchmod %o FAILED with %d %s\n",
+                      cmode, code, strerror(code));
+            } else {
+              fprintf(stderr, "fchmod %o SUCCEDED\n",cmode);
+            }
+          }else{
+            int code = errno;
+            fprintf(stderr, "STAT FAILED[%d] with %d %s\n", 
+                    err, code, strerror(code));
+#endif
+          }
+        }
+      }
+      conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK);
+      
+    end_takeconch:
+      OSTRACE(("TRANSPROXY: CLOSE  %d\n", pFile->h));
+      if( rc==SQLITE_OK && pFile->openFlags ){
+        int fd;
+        if( pFile->h>=0 ){
+          robust_close(pFile, pFile->h, __LINE__);
+        }
+        pFile->h = -1;
+        fd = robust_open(pCtx->dbPath, pFile->openFlags, 0);
+        OSTRACE(("TRANSPROXY: OPEN  %d\n", fd));
+        if( fd>=0 ){
+          pFile->h = fd;
+        }else{
+          rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called
+           during locking */
+        }
+      }
+      if( rc==SQLITE_OK && !pCtx->lockProxy ){
+        char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath;
+        rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1);
+        if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){
+          /* we couldn't create the proxy lock file with the old lock file path
+           ** so try again via auto-naming 
+           */
+          forceNewLockPath = 1;
+          tryOldLockPath = 0;
+          continue; /* go back to the do {} while start point, try again */
+        }
+      }
+      if( rc==SQLITE_OK ){
+        /* Need to make a copy of path if we extracted the value
+         ** from the conch file or the path was allocated on the stack
+         */
+        if( tempLockPath ){
+          pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath);
+          if( !pCtx->lockProxyPath ){
+            rc = SQLITE_NOMEM;
+          }
+        }
+      }
+      if( rc==SQLITE_OK ){
+        pCtx->conchHeld = 1;
+        
+        if( pCtx->lockProxy->pMethod == &afpIoMethods ){
+          afpLockingContext *afpCtx;
+          afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext;
+          afpCtx->dbPath = pCtx->lockProxyPath;
+        }
+      } else {
+        conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
+      }
+      OSTRACE(("TAKECONCH  %d %s\n", conchFile->h,
+               rc==SQLITE_OK?"ok":"failed"));
+      return rc;
+    } while (1); /* in case we need to retry the :auto: lock file - 
+                 ** we should never get here except via the 'continue' call. */
+  }
+}
+
+/*
+** If pFile holds a lock on a conch file, then release that lock.
+*/
+static int proxyReleaseConch(unixFile *pFile){
+  int rc = SQLITE_OK;         /* Subroutine return code */
+  proxyLockingContext *pCtx;  /* The locking context for the proxy lock */
+  unixFile *conchFile;        /* Name of the conch file */
+
+  pCtx = (proxyLockingContext *)pFile->lockingContext;
+  conchFile = pCtx->conchFile;
+  OSTRACE(("RELEASECONCH  %d for %s pid=%d\n", conchFile->h,
+           (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), 
+           getpid()));
+  if( pCtx->conchHeld>0 ){
+    rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
+  }
+  pCtx->conchHeld = 0;
+  OSTRACE(("RELEASECONCH  %d %s\n", conchFile->h,
+           (rc==SQLITE_OK ? "ok" : "failed")));
+  return rc;
+}
+
+/*
+** Given the name of a database file, compute the name of its conch file.
+** Store the conch filename in memory obtained from sqlite3_malloc().
+** Make *pConchPath point to the new name.  Return SQLITE_OK on success
+** or SQLITE_NOMEM if unable to obtain memory.
+**
+** The caller is responsible for ensuring that the allocated memory
+** space is eventually freed.
+**
+** *pConchPath is set to NULL if a memory allocation error occurs.
+*/
+static int proxyCreateConchPathname(char *dbPath, char **pConchPath){
+  int i;                        /* Loop counter */
+  int len = (int)strlen(dbPath); /* Length of database filename - dbPath */
+  char *conchPath;              /* buffer in which to construct conch name */
+
+  /* Allocate space for the conch filename and initialize the name to
+  ** the name of the original database file. */  
+  *pConchPath = conchPath = (char *)sqlite3_malloc(len + 8);
+  if( conchPath==0 ){
+    return SQLITE_NOMEM;
+  }
+  memcpy(conchPath, dbPath, len+1);
+  
+  /* now insert a "." before the last / character */
+  for( i=(len-1); i>=0; i-- ){
+    if( conchPath[i]=='/' ){
+      i++;
+      break;
+    }
+  }
+  conchPath[i]='.';
+  while ( i<len ){
+    conchPath[i+1]=dbPath[i];
+    i++;
+  }
+
+  /* append the "-conch" suffix to the file */
+  memcpy(&conchPath[i+1], "-conch", 7);
+  assert( (int)strlen(conchPath) == len+7 );
+
+  return SQLITE_OK;
+}
+
+
+/* Takes a fully configured proxy locking-style unix file and switches
+** the local lock file path 
+*/
+static int switchLockProxyPath(unixFile *pFile, const char *path) {
+  proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
+  char *oldPath = pCtx->lockProxyPath;
+  int rc = SQLITE_OK;
+
+  if( pFile->eFileLock!=NO_LOCK ){
+    return SQLITE_BUSY;
+  }  
+
+  /* nothing to do if the path is NULL, :auto: or matches the existing path */
+  if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
+    (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
+    return SQLITE_OK;
+  }else{
+    unixFile *lockProxy = pCtx->lockProxy;
+    pCtx->lockProxy=NULL;
+    pCtx->conchHeld = 0;
+    if( lockProxy!=NULL ){
+      rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
+      if( rc ) return rc;
+      sqlite3_free(lockProxy);
+    }
+    sqlite3_free(oldPath);
+    pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
+  }
+  
+  return rc;
+}
+
+/*
+** pFile is a file that has been opened by a prior xOpen call.  dbPath
+** is a string buffer at least MAXPATHLEN+1 characters in size.
+**
+** This routine find the filename associated with pFile and writes it
+** int dbPath.
+*/
+static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){
+#if defined(__APPLE__)
+  if( pFile->pMethod == &afpIoMethods ){
+    /* afp style keeps a reference to the db path in the filePath field 
+    ** of the struct */
+    assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
+    strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, MAXPATHLEN);
+  } else
+#endif
+  if( pFile->pMethod == &dotlockIoMethods ){
+    /* dot lock style uses the locking context to store the dot lock
+    ** file path */
+    int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX);
+    memcpy(dbPath, (char *)pFile->lockingContext, len + 1);
+  }else{
+    /* all other styles use the locking context to store the db file path */
+    assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
+    strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Takes an already filled in unix file and alters it so all file locking 
+** will be performed on the local proxy lock file.  The following fields
+** are preserved in the locking context so that they can be restored and 
+** the unix structure properly cleaned up at close time:
+**  ->lockingContext
+**  ->pMethod
+*/
+static int proxyTransformUnixFile(unixFile *pFile, const char *path) {
+  proxyLockingContext *pCtx;
+  char dbPath[MAXPATHLEN+1];       /* Name of the database file */
+  char *lockPath=NULL;
+  int rc = SQLITE_OK;
+  
+  if( pFile->eFileLock!=NO_LOCK ){
+    return SQLITE_BUSY;
+  }
+  proxyGetDbPathForUnixFile(pFile, dbPath);
+  if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
+    lockPath=NULL;
+  }else{
+    lockPath=(char *)path;
+  }
+  
+  OSTRACE(("TRANSPROXY  %d for %s pid=%d\n", pFile->h,
+           (lockPath ? lockPath : ":auto:"), getpid()));
+
+  pCtx = sqlite3_malloc( sizeof(*pCtx) );
+  if( pCtx==0 ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCtx, 0, sizeof(*pCtx));
+
+  rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath);
+  if( rc==SQLITE_OK ){
+    rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0);
+    if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){
+      /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and
+      ** (c) the file system is read-only, then enable no-locking access.
+      ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts
+      ** that openFlags will have only one of O_RDONLY or O_RDWR.
+      */
+      struct statfs fsInfo;
+      struct stat conchInfo;
+      int goLockless = 0;
+
+      if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) {
+        int err = errno;
+        if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){
+          goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY;
+        }
+      }
+      if( goLockless ){
+        pCtx->conchHeld = -1; /* read only FS/ lockless */
+        rc = SQLITE_OK;
+      }
+    }
+  }  
+  if( rc==SQLITE_OK && lockPath ){
+    pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
+  }
+
+  if( rc==SQLITE_OK ){
+    pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
+    if( pCtx->dbPath==NULL ){
+      rc = SQLITE_NOMEM;
+    }
+  }
+  if( rc==SQLITE_OK ){
+    /* all memory is allocated, proxys are created and assigned, 
+    ** switch the locking context and pMethod then return.
+    */
+    pCtx->oldLockingContext = pFile->lockingContext;
+    pFile->lockingContext = pCtx;
+    pCtx->pOldMethod = pFile->pMethod;
+    pFile->pMethod = &proxyIoMethods;
+  }else{
+    if( pCtx->conchFile ){ 
+      pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
+      sqlite3_free(pCtx->conchFile);
+    }
+    sqlite3DbFree(0, pCtx->lockProxyPath);
+    sqlite3_free(pCtx->conchFilePath); 
+    sqlite3_free(pCtx);
+  }
+  OSTRACE(("TRANSPROXY  %d %s\n", pFile->h,
+           (rc==SQLITE_OK ? "ok" : "failed")));
+  return rc;
+}
+
+
+/*
+** This routine handles sqlite3_file_control() calls that are specific
+** to proxy locking.
+*/
+static int proxyFileControl(sqlite3_file *id, int op, void *pArg){
+  switch( op ){
+    case SQLITE_GET_LOCKPROXYFILE: {
+      unixFile *pFile = (unixFile*)id;
+      if( pFile->pMethod == &proxyIoMethods ){
+        proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
+        proxyTakeConch(pFile);
+        if( pCtx->lockProxyPath ){
+          *(const char **)pArg = pCtx->lockProxyPath;
+        }else{
+          *(const char **)pArg = ":auto: (not held)";
+        }
+      } else {
+        *(const char **)pArg = NULL;
+      }
+      return SQLITE_OK;
+    }
+    case SQLITE_SET_LOCKPROXYFILE: {
+      unixFile *pFile = (unixFile*)id;
+      int rc = SQLITE_OK;
+      int isProxyStyle = (pFile->pMethod == &proxyIoMethods);
+      if( pArg==NULL || (const char *)pArg==0 ){
+        if( isProxyStyle ){
+          /* turn off proxy locking - not supported */
+          rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/;
+        }else{
+          /* turn off proxy locking - already off - NOOP */
+          rc = SQLITE_OK;
+        }
+      }else{
+        const char *proxyPath = (const char *)pArg;
+        if( isProxyStyle ){
+          proxyLockingContext *pCtx = 
+            (proxyLockingContext*)pFile->lockingContext;
+          if( !strcmp(pArg, ":auto:") 
+           || (pCtx->lockProxyPath &&
+               !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN))
+          ){
+            rc = SQLITE_OK;
+          }else{
+            rc = switchLockProxyPath(pFile, proxyPath);
+          }
+        }else{
+          /* turn on proxy file locking */
+          rc = proxyTransformUnixFile(pFile, proxyPath);
+        }
+      }
+      return rc;
+    }
+    default: {
+      assert( 0 );  /* The call assures that only valid opcodes are sent */
+    }
+  }
+  /*NOTREACHED*/
+  return SQLITE_ERROR;
+}
+
+/*
+** Within this division (the proxying locking implementation) the procedures
+** above this point are all utilities.  The lock-related methods of the
+** proxy-locking sqlite3_io_method object follow.
+*/
+
+
+/*
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, set *pResOut
+** to a non-zero value otherwise *pResOut is set to zero.  The return value
+** is set to SQLITE_OK unless an I/O error occurs during lock checking.
+*/
+static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
+  unixFile *pFile = (unixFile*)id;
+  int rc = proxyTakeConch(pFile);
+  if( rc==SQLITE_OK ){
+    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+    if( pCtx->conchHeld>0 ){
+      unixFile *proxy = pCtx->lockProxy;
+      return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
+    }else{ /* conchHeld < 0 is lockless */
+      pResOut=0;
+    }
+  }
+  return rc;
+}
+
+/*
+** Lock the file with the lock specified by parameter eFileLock - one
+** of the following:
+**
+**     (1) SHARED_LOCK
+**     (2) RESERVED_LOCK
+**     (3) PENDING_LOCK
+**     (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between.  The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal.  The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+**    UNLOCKED -> SHARED
+**    SHARED -> RESERVED
+**    SHARED -> (PENDING) -> EXCLUSIVE
+**    RESERVED -> (PENDING) -> EXCLUSIVE
+**    PENDING -> EXCLUSIVE
+**
+** This routine will only increase a lock.  Use the sqlite3OsUnlock()
+** routine to lower a locking level.
+*/
+static int proxyLock(sqlite3_file *id, int eFileLock) {
+  unixFile *pFile = (unixFile*)id;
+  int rc = proxyTakeConch(pFile);
+  if( rc==SQLITE_OK ){
+    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+    if( pCtx->conchHeld>0 ){
+      unixFile *proxy = pCtx->lockProxy;
+      rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock);
+      pFile->eFileLock = proxy->eFileLock;
+    }else{
+      /* conchHeld < 0 is lockless */
+    }
+  }
+  return rc;
+}
+
+
+/*
+** Lower the locking level on file descriptor pFile to eFileLock.  eFileLock
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+*/
+static int proxyUnlock(sqlite3_file *id, int eFileLock) {
+  unixFile *pFile = (unixFile*)id;
+  int rc = proxyTakeConch(pFile);
+  if( rc==SQLITE_OK ){
+    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+    if( pCtx->conchHeld>0 ){
+      unixFile *proxy = pCtx->lockProxy;
+      rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock);
+      pFile->eFileLock = proxy->eFileLock;
+    }else{
+      /* conchHeld < 0 is lockless */
+    }
+  }
+  return rc;
+}
+
+/*
+** Close a file that uses proxy locks.
+*/
+static int proxyClose(sqlite3_file *id) {
+  if( id ){
+    unixFile *pFile = (unixFile*)id;
+    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+    unixFile *lockProxy = pCtx->lockProxy;
+    unixFile *conchFile = pCtx->conchFile;
+    int rc = SQLITE_OK;
+    
+    if( lockProxy ){
+      rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
+      if( rc ) return rc;
+      rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
+      if( rc ) return rc;
+      sqlite3_free(lockProxy);
+      pCtx->lockProxy = 0;
+    }
+    if( conchFile ){
+      if( pCtx->conchHeld ){
+        rc = proxyReleaseConch(pFile);
+        if( rc ) return rc;
+      }
+      rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
+      if( rc ) return rc;
+      sqlite3_free(conchFile);
+    }
+    sqlite3DbFree(0, pCtx->lockProxyPath);
+    sqlite3_free(pCtx->conchFilePath);
+    sqlite3DbFree(0, pCtx->dbPath);
+    /* restore the original locking context and pMethod then close it */
+    pFile->lockingContext = pCtx->oldLockingContext;
+    pFile->pMethod = pCtx->pOldMethod;
+    sqlite3_free(pCtx);
+    return pFile->pMethod->xClose(id);
+  }
+  return SQLITE_OK;
+}
+
+
+
+#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
+/*
+** The proxy locking style is intended for use with AFP filesystems.
+** And since AFP is only supported on MacOSX, the proxy locking is also
+** restricted to MacOSX.
+** 
+**
+******************* End of the proxy lock implementation **********************
+******************************************************************************/
+
+/*
+** Initialize the operating system interface.
+**
+** This routine registers all VFS implementations for unix-like operating
+** systems.  This routine, and the sqlite3_os_end() routine that follows,
+** should be the only routines in this file that are visible from other
+** files.
+**
+** This routine is called once during SQLite initialization and by a
+** single thread.  The memory allocation and mutex subsystems have not
+** necessarily been initialized when this routine is called, and so they
+** should not be used.
+*/
+SQLITE_API int sqlite3_os_init(void){ 
+  /* 
+  ** The following macro defines an initializer for an sqlite3_vfs object.
+  ** The name of the VFS is NAME.  The pAppData is a pointer to a pointer
+  ** to the "finder" function.  (pAppData is a pointer to a pointer because
+  ** silly C90 rules prohibit a void* from being cast to a function pointer
+  ** and so we have to go through the intermediate pointer to avoid problems
+  ** when compiling with -pedantic-errors on GCC.)
+  **
+  ** The FINDER parameter to this macro is the name of the pointer to the
+  ** finder-function.  The finder-function returns a pointer to the
+  ** sqlite_io_methods object that implements the desired locking
+  ** behaviors.  See the division above that contains the IOMETHODS
+  ** macro for addition information on finder-functions.
+  **
+  ** Most finders simply return a pointer to a fixed sqlite3_io_methods
+  ** object.  But the "autolockIoFinder" available on MacOSX does a little
+  ** more than that; it looks at the filesystem type that hosts the 
+  ** database file and tries to choose an locking method appropriate for
+  ** that filesystem time.
+  */
+  #define UNIXVFS(VFSNAME, FINDER) {                        \
+    3,                    /* iVersion */                    \
+    sizeof(unixFile),     /* szOsFile */                    \
+    MAX_PATHNAME,         /* mxPathname */                  \
+    0,                    /* pNext */                       \
+    VFSNAME,              /* zName */                       \
+    (void*)&FINDER,       /* pAppData */                    \
+    unixOpen,             /* xOpen */                       \
+    unixDelete,           /* xDelete */                     \
+    unixAccess,           /* xAccess */                     \
+    unixFullPathname,     /* xFullPathname */               \
+    unixDlOpen,           /* xDlOpen */                     \
+    unixDlError,          /* xDlError */                    \
+    unixDlSym,            /* xDlSym */                      \
+    unixDlClose,          /* xDlClose */                    \
+    unixRandomness,       /* xRandomness */                 \
+    unixSleep,            /* xSleep */                      \
+    unixCurrentTime,      /* xCurrentTime */                \
+    unixGetLastError,     /* xGetLastError */               \
+    unixCurrentTimeInt64, /* xCurrentTimeInt64 */           \
+    unixSetSystemCall,    /* xSetSystemCall */              \
+    unixGetSystemCall,    /* xGetSystemCall */              \
+    unixNextSystemCall,   /* xNextSystemCall */             \
+  }
+
+  /*
+  ** All default VFSes for unix are contained in the following array.
+  **
+  ** Note that the sqlite3_vfs.pNext field of the VFS object is modified
+  ** by the SQLite core when the VFS is registered.  So the following
+  ** array cannot be const.
+  */
+  static sqlite3_vfs aVfs[] = {
+#if SQLITE_ENABLE_LOCKING_STYLE && (OS_VXWORKS || defined(__APPLE__))
+    UNIXVFS("unix",          autolockIoFinder ),
+#else
+    UNIXVFS("unix",          posixIoFinder ),
+#endif
+    UNIXVFS("unix-none",     nolockIoFinder ),
+    UNIXVFS("unix-dotfile",  dotlockIoFinder ),
+    UNIXVFS("unix-excl",     posixIoFinder ),
+#if OS_VXWORKS
+    UNIXVFS("unix-namedsem", semIoFinder ),
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE
+    UNIXVFS("unix-posix",    posixIoFinder ),
+#if !OS_VXWORKS
+    UNIXVFS("unix-flock",    flockIoFinder ),
+#endif
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
+    UNIXVFS("unix-afp",      afpIoFinder ),
+    UNIXVFS("unix-nfs",      nfsIoFinder ),
+    UNIXVFS("unix-proxy",    proxyIoFinder ),
+#endif
+  };
+  unsigned int i;          /* Loop counter */
+
+  /* Double-check that the aSyscall[] array has been constructed
+  ** correctly.  See ticket [bb3a86e890c8e96ab] */
+  assert( ArraySize(aSyscall)==25 );
+
+  /* Register all VFSes defined in the aVfs[] array */
+  for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
+    sqlite3_vfs_register(&aVfs[i], i==0);
+  }
+  return SQLITE_OK; 
+}
+
+/*
+** Shutdown the operating system interface.
+**
+** Some operating systems might need to do some cleanup in this routine,
+** to release dynamically allocated objects.  But not on unix.
+** This routine is a no-op for unix.
+*/
+SQLITE_API int sqlite3_os_end(void){ 
+  return SQLITE_OK; 
+}
+ 
+#endif /* SQLITE_OS_UNIX */
+
+/************** End of os_unix.c *********************************************/
+/************** Begin file os_win.c ******************************************/
+/*
+** 2004 May 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains code that is specific to Windows.
+*/
+#if SQLITE_OS_WIN               /* This file is used for Windows only */
+
+/*
+** Include code that is common to all os_*.c files
+*/
+/************** Include os_common.h in the middle of os_win.c ****************/
+/************** Begin file os_common.h ***************************************/
+/*
+** 2004 May 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains macros and a little bit of code that is common to
+** all of the platform-specific files (os_*.c) and is #included into those
+** files.
+**
+** This file should be #included by the os_*.c files only.  It is not a
+** general purpose header file.
+*/
+#ifndef _OS_COMMON_H_
+#define _OS_COMMON_H_
+
+/*
+** At least two bugs have slipped in because we changed the MEMORY_DEBUG
+** macro to SQLITE_DEBUG and some older makefiles have not yet made the
+** switch.  The following code should catch this problem at compile-time.
+*/
+#ifdef MEMORY_DEBUG
+# error "The MEMORY_DEBUG macro is obsolete.  Use SQLITE_DEBUG instead."
+#endif
+
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+# ifndef SQLITE_DEBUG_OS_TRACE
+#   define SQLITE_DEBUG_OS_TRACE 0
+# endif
+  int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE;
+# define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
+#else
+# define OSTRACE(X)
+#endif
+
+/*
+** Macros for performance tracing.  Normally turned off.  Only works
+** on i486 hardware.
+*/
+#ifdef SQLITE_PERFORMANCE_TRACE
+
+/* 
+** hwtime.h contains inline assembler code for implementing 
+** high-performance timing routines.
+*/
+/************** Include hwtime.h in the middle of os_common.h ****************/
+/************** Begin file hwtime.h ******************************************/
+/*
+** 2008 May 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains inline asm code for retrieving "high-performance"
+** counters for x86 class CPUs.
+*/
+#ifndef _HWTIME_H_
+#define _HWTIME_H_
+
+/*
+** The following routine only works on pentium-class (or newer) processors.
+** It uses the RDTSC opcode to read the cycle count value out of the
+** processor and returns that value.  This can be used for high-res
+** profiling.
+*/
+#if (defined(__GNUC__) || defined(_MSC_VER)) && \
+      (defined(i386) || defined(__i386__) || defined(_M_IX86))
+
+  #if defined(__GNUC__)
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+     unsigned int lo, hi;
+     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+     return (sqlite_uint64)hi << 32 | lo;
+  }
+
+  #elif defined(_MSC_VER)
+
+  __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){
+     __asm {
+        rdtsc
+        ret       ; return value at EDX:EAX
+     }
+  }
+
+  #endif
+
+#elif (defined(__GNUC__) && defined(__x86_64__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long val;
+      __asm__ __volatile__ ("rdtsc" : "=A" (val));
+      return val;
+  }
+ 
+#elif (defined(__GNUC__) && defined(__ppc__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long long retval;
+      unsigned long junk;
+      __asm__ __volatile__ ("\n\
+          1:      mftbu   %1\n\
+                  mftb    %L0\n\
+                  mftbu   %0\n\
+                  cmpw    %0,%1\n\
+                  bne     1b"
+                  : "=r" (retval), "=r" (junk));
+      return retval;
+  }
+
+#else
+
+  #error Need implementation of sqlite3Hwtime() for your platform.
+
+  /*
+  ** To compile without implementing sqlite3Hwtime() for your platform,
+  ** you can remove the above #error and use the following
+  ** stub function.  You will lose timing support for many
+  ** of the debugging and testing utilities, but it should at
+  ** least compile and run.
+  */
+SQLITE_PRIVATE   sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); }
+
+#endif
+
+#endif /* !defined(_HWTIME_H_) */
+
+/************** End of hwtime.h **********************************************/
+/************** Continuing where we left off in os_common.h ******************/
+
+static sqlite_uint64 g_start;
+static sqlite_uint64 g_elapsed;
+#define TIMER_START       g_start=sqlite3Hwtime()
+#define TIMER_END         g_elapsed=sqlite3Hwtime()-g_start
+#define TIMER_ELAPSED     g_elapsed
+#else
+#define TIMER_START
+#define TIMER_END
+#define TIMER_ELAPSED     ((sqlite_uint64)0)
+#endif
+
+/*
+** If we compile with the SQLITE_TEST macro set, then the following block
+** of code will give us the ability to simulate a disk I/O error.  This
+** is used for testing the I/O recovery logic.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_io_error_hit = 0;            /* Total number of I/O Errors */
+SQLITE_API int sqlite3_io_error_hardhit = 0;        /* Number of non-benign errors */
+SQLITE_API int sqlite3_io_error_pending = 0;        /* Count down to first I/O error */
+SQLITE_API int sqlite3_io_error_persist = 0;        /* True if I/O errors persist */
+SQLITE_API int sqlite3_io_error_benign = 0;         /* True if errors are benign */
+SQLITE_API int sqlite3_diskfull_pending = 0;
+SQLITE_API int sqlite3_diskfull = 0;
+#define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X)
+#define SimulateIOError(CODE)  \
+  if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \
+       || sqlite3_io_error_pending-- == 1 )  \
+              { local_ioerr(); CODE; }
+static void local_ioerr(){
+  IOTRACE(("IOERR\n"));
+  sqlite3_io_error_hit++;
+  if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++;
+}
+#define SimulateDiskfullError(CODE) \
+   if( sqlite3_diskfull_pending ){ \
+     if( sqlite3_diskfull_pending == 1 ){ \
+       local_ioerr(); \
+       sqlite3_diskfull = 1; \
+       sqlite3_io_error_hit = 1; \
+       CODE; \
+     }else{ \
+       sqlite3_diskfull_pending--; \
+     } \
+   }
+#else
+#define SimulateIOErrorBenign(X)
+#define SimulateIOError(A)
+#define SimulateDiskfullError(A)
+#endif
+
+/*
+** When testing, keep a count of the number of open files.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_open_file_count = 0;
+#define OpenCounter(X)  sqlite3_open_file_count+=(X)
+#else
+#define OpenCounter(X)
+#endif
+
+#endif /* !defined(_OS_COMMON_H_) */
+
+/************** End of os_common.h *******************************************/
+/************** Continuing where we left off in os_win.c *********************/
+
+/*
+** Include the header file for the Windows VFS.
+*/
+
+/*
+** Compiling and using WAL mode requires several APIs that are only
+** available in Windows platforms based on the NT kernel.
+*/
+#if !SQLITE_OS_WINNT && !defined(SQLITE_OMIT_WAL)
+#  error "WAL mode requires support from the Windows NT kernel, compile\
+ with SQLITE_OMIT_WAL."
+#endif
+
+#if !SQLITE_OS_WINNT && SQLITE_MAX_MMAP_SIZE>0
+#  error "Memory mapped files require support from the Windows NT kernel,\
+ compile with SQLITE_MAX_MMAP_SIZE=0."
+#endif
+
+/*
+** Are most of the Win32 ANSI APIs available (i.e. with certain exceptions
+** based on the sub-platform)?
+*/
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(SQLITE_WIN32_NO_ANSI)
+#  define SQLITE_WIN32_HAS_ANSI
+#endif
+
+/*
+** Are most of the Win32 Unicode APIs available (i.e. with certain exceptions
+** based on the sub-platform)?
+*/
+#if (SQLITE_OS_WINCE || SQLITE_OS_WINNT || SQLITE_OS_WINRT) && \
+    !defined(SQLITE_WIN32_NO_WIDE)
+#  define SQLITE_WIN32_HAS_WIDE
+#endif
+
+/*
+** Make sure at least one set of Win32 APIs is available.
+*/
+#if !defined(SQLITE_WIN32_HAS_ANSI) && !defined(SQLITE_WIN32_HAS_WIDE)
+#  error "At least one of SQLITE_WIN32_HAS_ANSI and SQLITE_WIN32_HAS_WIDE\
+ must be defined."
+#endif
+
+/*
+** Define the required Windows SDK version constants if they are not
+** already available.
+*/
+#ifndef NTDDI_WIN8
+#  define NTDDI_WIN8                        0x06020000
+#endif
+
+#ifndef NTDDI_WINBLUE
+#  define NTDDI_WINBLUE                     0x06030000
+#endif
+
+/*
+** Check to see if the GetVersionEx[AW] functions are deprecated on the
+** target system.  GetVersionEx was first deprecated in Win8.1.
+*/
+#ifndef SQLITE_WIN32_GETVERSIONEX
+#  if defined(NTDDI_VERSION) && NTDDI_VERSION >= NTDDI_WINBLUE
+#    define SQLITE_WIN32_GETVERSIONEX   0   /* GetVersionEx() is deprecated */
+#  else
+#    define SQLITE_WIN32_GETVERSIONEX   1   /* GetVersionEx() is current */
+#  endif
+#endif
+
+/*
+** This constant should already be defined (in the "WinDef.h" SDK file).
+*/
+#ifndef MAX_PATH
+#  define MAX_PATH                      (260)
+#endif
+
+/*
+** Maximum pathname length (in chars) for Win32.  This should normally be
+** MAX_PATH.
+*/
+#ifndef SQLITE_WIN32_MAX_PATH_CHARS
+#  define SQLITE_WIN32_MAX_PATH_CHARS   (MAX_PATH)
+#endif
+
+/*
+** This constant should already be defined (in the "WinNT.h" SDK file).
+*/
+#ifndef UNICODE_STRING_MAX_CHARS
+#  define UNICODE_STRING_MAX_CHARS      (32767)
+#endif
+
+/*
+** Maximum pathname length (in chars) for WinNT.  This should normally be
+** UNICODE_STRING_MAX_CHARS.
+*/
+#ifndef SQLITE_WINNT_MAX_PATH_CHARS
+#  define SQLITE_WINNT_MAX_PATH_CHARS   (UNICODE_STRING_MAX_CHARS)
+#endif
+
+/*
+** Maximum pathname length (in bytes) for Win32.  The MAX_PATH macro is in
+** characters, so we allocate 4 bytes per character assuming worst-case of
+** 4-bytes-per-character for UTF8.
+*/
+#ifndef SQLITE_WIN32_MAX_PATH_BYTES
+#  define SQLITE_WIN32_MAX_PATH_BYTES   (SQLITE_WIN32_MAX_PATH_CHARS*4)
+#endif
+
+/*
+** Maximum pathname length (in bytes) for WinNT.  This should normally be
+** UNICODE_STRING_MAX_CHARS * sizeof(WCHAR).
+*/
+#ifndef SQLITE_WINNT_MAX_PATH_BYTES
+#  define SQLITE_WINNT_MAX_PATH_BYTES   \
+                            (sizeof(WCHAR) * SQLITE_WINNT_MAX_PATH_CHARS)
+#endif
+
+/*
+** Maximum error message length (in chars) for WinRT.
+*/
+#ifndef SQLITE_WIN32_MAX_ERRMSG_CHARS
+#  define SQLITE_WIN32_MAX_ERRMSG_CHARS (1024)
+#endif
+
+/*
+** Returns non-zero if the character should be treated as a directory
+** separator.
+*/
+#ifndef winIsDirSep
+#  define winIsDirSep(a)                (((a) == '/') || ((a) == '\\'))
+#endif
+
+/*
+** This macro is used when a local variable is set to a value that is
+** [sometimes] not used by the code (e.g. via conditional compilation).
+*/
+#ifndef UNUSED_VARIABLE_VALUE
+#  define UNUSED_VARIABLE_VALUE(x)      (void)(x)
+#endif
+
+/*
+** Returns the character that should be used as the directory separator.
+*/
+#ifndef winGetDirSep
+#  define winGetDirSep()                '\\'
+#endif
+
+/*
+** Do we need to manually define the Win32 file mapping APIs for use with WAL
+** mode or memory mapped files (e.g. these APIs are available in the Windows
+** CE SDK; however, they are not present in the header file)?
+*/
+#if SQLITE_WIN32_FILEMAPPING_API && \
+        (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)
+/*
+** Two of the file mapping APIs are different under WinRT.  Figure out which
+** set we need.
+*/
+#if SQLITE_OS_WINRT
+WINBASEAPI HANDLE WINAPI CreateFileMappingFromApp(HANDLE, \
+        LPSECURITY_ATTRIBUTES, ULONG, ULONG64, LPCWSTR);
+
+WINBASEAPI LPVOID WINAPI MapViewOfFileFromApp(HANDLE, ULONG, ULONG64, SIZE_T);
+#else
+#if defined(SQLITE_WIN32_HAS_ANSI)
+WINBASEAPI HANDLE WINAPI CreateFileMappingA(HANDLE, LPSECURITY_ATTRIBUTES, \
+        DWORD, DWORD, DWORD, LPCSTR);
+#endif /* defined(SQLITE_WIN32_HAS_ANSI) */
+
+#if defined(SQLITE_WIN32_HAS_WIDE)
+WINBASEAPI HANDLE WINAPI CreateFileMappingW(HANDLE, LPSECURITY_ATTRIBUTES, \
+        DWORD, DWORD, DWORD, LPCWSTR);
+#endif /* defined(SQLITE_WIN32_HAS_WIDE) */
+
+WINBASEAPI LPVOID WINAPI MapViewOfFile(HANDLE, DWORD, DWORD, DWORD, SIZE_T);
+#endif /* SQLITE_OS_WINRT */
+
+/*
+** This file mapping API is common to both Win32 and WinRT.
+*/
+WINBASEAPI BOOL WINAPI UnmapViewOfFile(LPCVOID);
+#endif /* SQLITE_WIN32_FILEMAPPING_API */
+
+/*
+** Some Microsoft compilers lack this definition.
+*/
+#ifndef INVALID_FILE_ATTRIBUTES
+# define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
+#endif
+
+#ifndef FILE_FLAG_MASK
+# define FILE_FLAG_MASK          (0xFF3C0000)
+#endif
+
+#ifndef FILE_ATTRIBUTE_MASK
+# define FILE_ATTRIBUTE_MASK     (0x0003FFF7)
+#endif
+
+#ifndef SQLITE_OMIT_WAL
+/* Forward references to structures used for WAL */
+typedef struct winShm winShm;           /* A connection to shared-memory */
+typedef struct winShmNode winShmNode;   /* A region of shared-memory */
+#endif
+
+/*
+** WinCE lacks native support for file locking so we have to fake it
+** with some code of our own.
+*/
+#if SQLITE_OS_WINCE
+typedef struct winceLock {
+  int nReaders;       /* Number of reader locks obtained */
+  BOOL bPending;      /* Indicates a pending lock has been obtained */
+  BOOL bReserved;     /* Indicates a reserved lock has been obtained */
+  BOOL bExclusive;    /* Indicates an exclusive lock has been obtained */
+} winceLock;
+#endif
+
+/*
+** The winFile structure is a subclass of sqlite3_file* specific to the win32
+** portability layer.
+*/
+typedef struct winFile winFile;
+struct winFile {
+  const sqlite3_io_methods *pMethod; /*** Must be first ***/
+  sqlite3_vfs *pVfs;      /* The VFS used to open this file */
+  HANDLE h;               /* Handle for accessing the file */
+  u8 locktype;            /* Type of lock currently held on this file */
+  short sharedLockByte;   /* Randomly chosen byte used as a shared lock */
+  u8 ctrlFlags;           /* Flags.  See WINFILE_* below */
+  DWORD lastErrno;        /* The Windows errno from the last I/O error */
+#ifndef SQLITE_OMIT_WAL
+  winShm *pShm;           /* Instance of shared memory on this file */
+#endif
+  const char *zPath;      /* Full pathname of this file */
+  int szChunk;            /* Chunk size configured by FCNTL_CHUNK_SIZE */
+#if SQLITE_OS_WINCE
+  LPWSTR zDeleteOnClose;  /* Name of file to delete when closing */
+  HANDLE hMutex;          /* Mutex used to control access to shared lock */
+  HANDLE hShared;         /* Shared memory segment used for locking */
+  winceLock local;        /* Locks obtained by this instance of winFile */
+  winceLock *shared;      /* Global shared lock memory for the file  */
+#endif
+#if SQLITE_MAX_MMAP_SIZE>0
+  int nFetchOut;                /* Number of outstanding xFetch references */
+  HANDLE hMap;                  /* Handle for accessing memory mapping */
+  void *pMapRegion;             /* Area memory mapped */
+  sqlite3_int64 mmapSize;       /* Usable size of mapped region */
+  sqlite3_int64 mmapSizeActual; /* Actual size of mapped region */
+  sqlite3_int64 mmapSizeMax;    /* Configured FCNTL_MMAP_SIZE value */
+#endif
+};
+
+/*
+** Allowed values for winFile.ctrlFlags
+*/
+#define WINFILE_RDONLY          0x02   /* Connection is read only */
+#define WINFILE_PERSIST_WAL     0x04   /* Persistent WAL mode */
+#define WINFILE_PSOW            0x10   /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
+
+/*
+ * The size of the buffer used by sqlite3_win32_write_debug().
+ */
+#ifndef SQLITE_WIN32_DBG_BUF_SIZE
+#  define SQLITE_WIN32_DBG_BUF_SIZE   ((int)(4096-sizeof(DWORD)))
+#endif
+
+/*
+ * The value used with sqlite3_win32_set_directory() to specify that
+ * the data directory should be changed.
+ */
+#ifndef SQLITE_WIN32_DATA_DIRECTORY_TYPE
+#  define SQLITE_WIN32_DATA_DIRECTORY_TYPE (1)
+#endif
+
+/*
+ * The value used with sqlite3_win32_set_directory() to specify that
+ * the temporary directory should be changed.
+ */
+#ifndef SQLITE_WIN32_TEMP_DIRECTORY_TYPE
+#  define SQLITE_WIN32_TEMP_DIRECTORY_TYPE (2)
+#endif
+
+/*
+ * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the
+ * various Win32 API heap functions instead of our own.
+ */
+#ifdef SQLITE_WIN32_MALLOC
+
+/*
+ * If this is non-zero, an isolated heap will be created by the native Win32
+ * allocator subsystem; otherwise, the default process heap will be used.  This
+ * setting has no effect when compiling for WinRT.  By default, this is enabled
+ * and an isolated heap will be created to store all allocated data.
+ *
+ ******************************************************************************
+ * WARNING: It is important to note that when this setting is non-zero and the
+ *          winMemShutdown function is called (e.g. by the sqlite3_shutdown
+ *          function), all data that was allocated using the isolated heap will
+ *          be freed immediately and any attempt to access any of that freed
+ *          data will almost certainly result in an immediate access violation.
+ ******************************************************************************
+ */
+#ifndef SQLITE_WIN32_HEAP_CREATE
+#  define SQLITE_WIN32_HEAP_CREATE    (TRUE)
+#endif
+
+/*
+ * The initial size of the Win32-specific heap.  This value may be zero.
+ */
+#ifndef SQLITE_WIN32_HEAP_INIT_SIZE
+#  define SQLITE_WIN32_HEAP_INIT_SIZE ((SQLITE_DEFAULT_CACHE_SIZE) * \
+                                       (SQLITE_DEFAULT_PAGE_SIZE) + 4194304)
+#endif
+
+/*
+ * The maximum size of the Win32-specific heap.  This value may be zero.
+ */
+#ifndef SQLITE_WIN32_HEAP_MAX_SIZE
+#  define SQLITE_WIN32_HEAP_MAX_SIZE  (0)
+#endif
+
+/*
+ * The extra flags to use in calls to the Win32 heap APIs.  This value may be
+ * zero for the default behavior.
+ */
+#ifndef SQLITE_WIN32_HEAP_FLAGS
+#  define SQLITE_WIN32_HEAP_FLAGS     (0)
+#endif
+
+
+/*
+** The winMemData structure stores information required by the Win32-specific
+** sqlite3_mem_methods implementation.
+*/
+typedef struct winMemData winMemData;
+struct winMemData {
+#ifndef NDEBUG
+  u32 magic1;   /* Magic number to detect structure corruption. */
+#endif
+  HANDLE hHeap; /* The handle to our heap. */
+  BOOL bOwned;  /* Do we own the heap (i.e. destroy it on shutdown)? */
+#ifndef NDEBUG
+  u32 magic2;   /* Magic number to detect structure corruption. */
+#endif
+};
+
+#ifndef NDEBUG
+#define WINMEM_MAGIC1     0x42b2830b
+#define WINMEM_MAGIC2     0xbd4d7cf4
+#endif
+
+static struct winMemData win_mem_data = {
+#ifndef NDEBUG
+  WINMEM_MAGIC1,
+#endif
+  NULL, FALSE
+#ifndef NDEBUG
+  ,WINMEM_MAGIC2
+#endif
+};
+
+#ifndef NDEBUG
+#define winMemAssertMagic1() assert( win_mem_data.magic1==WINMEM_MAGIC1 )
+#define winMemAssertMagic2() assert( win_mem_data.magic2==WINMEM_MAGIC2 )
+#define winMemAssertMagic()  winMemAssertMagic1(); winMemAssertMagic2();
+#else
+#define winMemAssertMagic()
+#endif
+
+#define winMemGetDataPtr()  &win_mem_data
+#define winMemGetHeap()     win_mem_data.hHeap
+#define winMemGetOwned()    win_mem_data.bOwned
+
+static void *winMemMalloc(int nBytes);
+static void winMemFree(void *pPrior);
+static void *winMemRealloc(void *pPrior, int nBytes);
+static int winMemSize(void *p);
+static int winMemRoundup(int n);
+static int winMemInit(void *pAppData);
+static void winMemShutdown(void *pAppData);
+
+SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void);
+#endif /* SQLITE_WIN32_MALLOC */
+
+/*
+** The following variable is (normally) set once and never changes
+** thereafter.  It records whether the operating system is Win9x
+** or WinNT.
+**
+** 0:   Operating system unknown.
+** 1:   Operating system is Win9x.
+** 2:   Operating system is WinNT.
+**
+** In order to facilitate testing on a WinNT system, the test fixture
+** can manually set this value to 1 to emulate Win98 behavior.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API LONG SQLITE_WIN32_VOLATILE sqlite3_os_type = 0;
+#else
+static LONG SQLITE_WIN32_VOLATILE sqlite3_os_type = 0;
+#endif
+
+#ifndef SYSCALL
+#  define SYSCALL sqlite3_syscall_ptr
+#endif
+
+/*
+** This function is not available on Windows CE or WinRT.
+ */
+
+#if SQLITE_OS_WINCE || SQLITE_OS_WINRT
+#  define osAreFileApisANSI()       1
+#endif
+
+/*
+** Many system calls are accessed through pointer-to-functions so that
+** they may be overridden at runtime to facilitate fault injection during
+** testing and sandboxing.  The following array holds the names and pointers
+** to all overrideable system calls.
+*/
+static struct win_syscall {
+  const char *zName;            /* Name of the system call */
+  sqlite3_syscall_ptr pCurrent; /* Current value of the system call */
+  sqlite3_syscall_ptr pDefault; /* Default value */
+} aSyscall[] = {
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT
+  { "AreFileApisANSI",         (SYSCALL)AreFileApisANSI,         0 },
+#else
+  { "AreFileApisANSI",         (SYSCALL)0,                       0 },
+#endif
+
+#ifndef osAreFileApisANSI
+#define osAreFileApisANSI ((BOOL(WINAPI*)(VOID))aSyscall[0].pCurrent)
+#endif
+
+#if SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_WIDE)
+  { "CharLowerW",              (SYSCALL)CharLowerW,              0 },
+#else
+  { "CharLowerW",              (SYSCALL)0,                       0 },
+#endif
+
+#define osCharLowerW ((LPWSTR(WINAPI*)(LPWSTR))aSyscall[1].pCurrent)
+
+#if SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_WIDE)
+  { "CharUpperW",              (SYSCALL)CharUpperW,              0 },
+#else
+  { "CharUpperW",              (SYSCALL)0,                       0 },
+#endif
+
+#define osCharUpperW ((LPWSTR(WINAPI*)(LPWSTR))aSyscall[2].pCurrent)
+
+  { "CloseHandle",             (SYSCALL)CloseHandle,             0 },
+
+#define osCloseHandle ((BOOL(WINAPI*)(HANDLE))aSyscall[3].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_ANSI)
+  { "CreateFileA",             (SYSCALL)CreateFileA,             0 },
+#else
+  { "CreateFileA",             (SYSCALL)0,                       0 },
+#endif
+
+#define osCreateFileA ((HANDLE(WINAPI*)(LPCSTR,DWORD,DWORD, \
+        LPSECURITY_ATTRIBUTES,DWORD,DWORD,HANDLE))aSyscall[4].pCurrent)
+
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE)
+  { "CreateFileW",             (SYSCALL)CreateFileW,             0 },
+#else
+  { "CreateFileW",             (SYSCALL)0,                       0 },
+#endif
+
+#define osCreateFileW ((HANDLE(WINAPI*)(LPCWSTR,DWORD,DWORD, \
+        LPSECURITY_ATTRIBUTES,DWORD,DWORD,HANDLE))aSyscall[5].pCurrent)
+
+#if (!SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_ANSI) && \
+        (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0))
+  { "CreateFileMappingA",      (SYSCALL)CreateFileMappingA,      0 },
+#else
+  { "CreateFileMappingA",      (SYSCALL)0,                       0 },
+#endif
+
+#define osCreateFileMappingA ((HANDLE(WINAPI*)(HANDLE,LPSECURITY_ATTRIBUTES, \
+        DWORD,DWORD,DWORD,LPCSTR))aSyscall[6].pCurrent)
+
+#if SQLITE_OS_WINCE || (!SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \
+        (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0))
+  { "CreateFileMappingW",      (SYSCALL)CreateFileMappingW,      0 },
+#else
+  { "CreateFileMappingW",      (SYSCALL)0,                       0 },
+#endif
+
+#define osCreateFileMappingW ((HANDLE(WINAPI*)(HANDLE,LPSECURITY_ATTRIBUTES, \
+        DWORD,DWORD,DWORD,LPCWSTR))aSyscall[7].pCurrent)
+
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE)
+  { "CreateMutexW",            (SYSCALL)CreateMutexW,            0 },
+#else
+  { "CreateMutexW",            (SYSCALL)0,                       0 },
+#endif
+
+#define osCreateMutexW ((HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES,BOOL, \
+        LPCWSTR))aSyscall[8].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_ANSI)
+  { "DeleteFileA",             (SYSCALL)DeleteFileA,             0 },
+#else
+  { "DeleteFileA",             (SYSCALL)0,                       0 },
+#endif
+
+#define osDeleteFileA ((BOOL(WINAPI*)(LPCSTR))aSyscall[9].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_WIDE)
+  { "DeleteFileW",             (SYSCALL)DeleteFileW,             0 },
+#else
+  { "DeleteFileW",             (SYSCALL)0,                       0 },
+#endif
+
+#define osDeleteFileW ((BOOL(WINAPI*)(LPCWSTR))aSyscall[10].pCurrent)
+
+#if SQLITE_OS_WINCE
+  { "FileTimeToLocalFileTime", (SYSCALL)FileTimeToLocalFileTime, 0 },
+#else
+  { "FileTimeToLocalFileTime", (SYSCALL)0,                       0 },
+#endif
+
+#define osFileTimeToLocalFileTime ((BOOL(WINAPI*)(CONST FILETIME*, \
+        LPFILETIME))aSyscall[11].pCurrent)
+
+#if SQLITE_OS_WINCE
+  { "FileTimeToSystemTime",    (SYSCALL)FileTimeToSystemTime,    0 },
+#else
+  { "FileTimeToSystemTime",    (SYSCALL)0,                       0 },
+#endif
+
+#define osFileTimeToSystemTime ((BOOL(WINAPI*)(CONST FILETIME*, \
+        LPSYSTEMTIME))aSyscall[12].pCurrent)
+
+  { "FlushFileBuffers",        (SYSCALL)FlushFileBuffers,        0 },
+
+#define osFlushFileBuffers ((BOOL(WINAPI*)(HANDLE))aSyscall[13].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_ANSI)
+  { "FormatMessageA",          (SYSCALL)FormatMessageA,          0 },
+#else
+  { "FormatMessageA",          (SYSCALL)0,                       0 },
+#endif
+
+#define osFormatMessageA ((DWORD(WINAPI*)(DWORD,LPCVOID,DWORD,DWORD,LPSTR, \
+        DWORD,va_list*))aSyscall[14].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_WIDE)
+  { "FormatMessageW",          (SYSCALL)FormatMessageW,          0 },
+#else
+  { "FormatMessageW",          (SYSCALL)0,                       0 },
+#endif
+
+#define osFormatMessageW ((DWORD(WINAPI*)(DWORD,LPCVOID,DWORD,DWORD,LPWSTR, \
+        DWORD,va_list*))aSyscall[15].pCurrent)
+
+#if !defined(SQLITE_OMIT_LOAD_EXTENSION)
+  { "FreeLibrary",             (SYSCALL)FreeLibrary,             0 },
+#else
+  { "FreeLibrary",             (SYSCALL)0,                       0 },
+#endif
+
+#define osFreeLibrary ((BOOL(WINAPI*)(HMODULE))aSyscall[16].pCurrent)
+
+  { "GetCurrentProcessId",     (SYSCALL)GetCurrentProcessId,     0 },
+
+#define osGetCurrentProcessId ((DWORD(WINAPI*)(VOID))aSyscall[17].pCurrent)
+
+#if !SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_ANSI)
+  { "GetDiskFreeSpaceA",       (SYSCALL)GetDiskFreeSpaceA,       0 },
+#else
+  { "GetDiskFreeSpaceA",       (SYSCALL)0,                       0 },
+#endif
+
+#define osGetDiskFreeSpaceA ((BOOL(WINAPI*)(LPCSTR,LPDWORD,LPDWORD,LPDWORD, \
+        LPDWORD))aSyscall[18].pCurrent)
+
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE)
+  { "GetDiskFreeSpaceW",       (SYSCALL)GetDiskFreeSpaceW,       0 },
+#else
+  { "GetDiskFreeSpaceW",       (SYSCALL)0,                       0 },
+#endif
+
+#define osGetDiskFreeSpaceW ((BOOL(WINAPI*)(LPCWSTR,LPDWORD,LPDWORD,LPDWORD, \
+        LPDWORD))aSyscall[19].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_ANSI)
+  { "GetFileAttributesA",      (SYSCALL)GetFileAttributesA,      0 },
+#else
+  { "GetFileAttributesA",      (SYSCALL)0,                       0 },
+#endif
+
+#define osGetFileAttributesA ((DWORD(WINAPI*)(LPCSTR))aSyscall[20].pCurrent)
+
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE)
+  { "GetFileAttributesW",      (SYSCALL)GetFileAttributesW,      0 },
+#else
+  { "GetFileAttributesW",      (SYSCALL)0,                       0 },
+#endif
+
+#define osGetFileAttributesW ((DWORD(WINAPI*)(LPCWSTR))aSyscall[21].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_WIDE)
+  { "GetFileAttributesExW",    (SYSCALL)GetFileAttributesExW,    0 },
+#else
+  { "GetFileAttributesExW",    (SYSCALL)0,                       0 },
+#endif
+
+#define osGetFileAttributesExW ((BOOL(WINAPI*)(LPCWSTR,GET_FILEEX_INFO_LEVELS, \
+        LPVOID))aSyscall[22].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "GetFileSize",             (SYSCALL)GetFileSize,             0 },
+#else
+  { "GetFileSize",             (SYSCALL)0,                       0 },
+#endif
+
+#define osGetFileSize ((DWORD(WINAPI*)(HANDLE,LPDWORD))aSyscall[23].pCurrent)
+
+#if !SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_ANSI)
+  { "GetFullPathNameA",        (SYSCALL)GetFullPathNameA,        0 },
+#else
+  { "GetFullPathNameA",        (SYSCALL)0,                       0 },
+#endif
+
+#define osGetFullPathNameA ((DWORD(WINAPI*)(LPCSTR,DWORD,LPSTR, \
+        LPSTR*))aSyscall[24].pCurrent)
+
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE)
+  { "GetFullPathNameW",        (SYSCALL)GetFullPathNameW,        0 },
+#else
+  { "GetFullPathNameW",        (SYSCALL)0,                       0 },
+#endif
+
+#define osGetFullPathNameW ((DWORD(WINAPI*)(LPCWSTR,DWORD,LPWSTR, \
+        LPWSTR*))aSyscall[25].pCurrent)
+
+  { "GetLastError",            (SYSCALL)GetLastError,            0 },
+
+#define osGetLastError ((DWORD(WINAPI*)(VOID))aSyscall[26].pCurrent)
+
+#if !defined(SQLITE_OMIT_LOAD_EXTENSION)
+#if SQLITE_OS_WINCE
+  /* The GetProcAddressA() routine is only available on Windows CE. */
+  { "GetProcAddressA",         (SYSCALL)GetProcAddressA,         0 },
+#else
+  /* All other Windows platforms expect GetProcAddress() to take
+  ** an ANSI string regardless of the _UNICODE setting */
+  { "GetProcAddressA",         (SYSCALL)GetProcAddress,          0 },
+#endif
+#else
+  { "GetProcAddressA",         (SYSCALL)0,                       0 },
+#endif
+
+#define osGetProcAddressA ((FARPROC(WINAPI*)(HMODULE, \
+        LPCSTR))aSyscall[27].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "GetSystemInfo",           (SYSCALL)GetSystemInfo,           0 },
+#else
+  { "GetSystemInfo",           (SYSCALL)0,                       0 },
+#endif
+
+#define osGetSystemInfo ((VOID(WINAPI*)(LPSYSTEM_INFO))aSyscall[28].pCurrent)
+
+  { "GetSystemTime",           (SYSCALL)GetSystemTime,           0 },
+
+#define osGetSystemTime ((VOID(WINAPI*)(LPSYSTEMTIME))aSyscall[29].pCurrent)
+
+#if !SQLITE_OS_WINCE
+  { "GetSystemTimeAsFileTime", (SYSCALL)GetSystemTimeAsFileTime, 0 },
+#else
+  { "GetSystemTimeAsFileTime", (SYSCALL)0,                       0 },
+#endif
+
+#define osGetSystemTimeAsFileTime ((VOID(WINAPI*)( \
+        LPFILETIME))aSyscall[30].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_ANSI)
+  { "GetTempPathA",            (SYSCALL)GetTempPathA,            0 },
+#else
+  { "GetTempPathA",            (SYSCALL)0,                       0 },
+#endif
+
+#define osGetTempPathA ((DWORD(WINAPI*)(DWORD,LPSTR))aSyscall[31].pCurrent)
+
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE)
+  { "GetTempPathW",            (SYSCALL)GetTempPathW,            0 },
+#else
+  { "GetTempPathW",            (SYSCALL)0,                       0 },
+#endif
+
+#define osGetTempPathW ((DWORD(WINAPI*)(DWORD,LPWSTR))aSyscall[32].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "GetTickCount",            (SYSCALL)GetTickCount,            0 },
+#else
+  { "GetTickCount",            (SYSCALL)0,                       0 },
+#endif
+
+#define osGetTickCount ((DWORD(WINAPI*)(VOID))aSyscall[33].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_ANSI) && defined(SQLITE_WIN32_GETVERSIONEX) && \
+        SQLITE_WIN32_GETVERSIONEX
+  { "GetVersionExA",           (SYSCALL)GetVersionExA,           0 },
+#else
+  { "GetVersionExA",           (SYSCALL)0,                       0 },
+#endif
+
+#define osGetVersionExA ((BOOL(WINAPI*)( \
+        LPOSVERSIONINFOA))aSyscall[34].pCurrent)
+
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \
+        defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX
+  { "GetVersionExW",           (SYSCALL)GetVersionExW,           0 },
+#else
+  { "GetVersionExW",           (SYSCALL)0,                       0 },
+#endif
+
+#define osGetVersionExW ((BOOL(WINAPI*)( \
+        LPOSVERSIONINFOW))aSyscall[35].pCurrent)
+
+  { "HeapAlloc",               (SYSCALL)HeapAlloc,               0 },
+
+#define osHeapAlloc ((LPVOID(WINAPI*)(HANDLE,DWORD, \
+        SIZE_T))aSyscall[36].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "HeapCreate",              (SYSCALL)HeapCreate,              0 },
+#else
+  { "HeapCreate",              (SYSCALL)0,                       0 },
+#endif
+
+#define osHeapCreate ((HANDLE(WINAPI*)(DWORD,SIZE_T, \
+        SIZE_T))aSyscall[37].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "HeapDestroy",             (SYSCALL)HeapDestroy,             0 },
+#else
+  { "HeapDestroy",             (SYSCALL)0,                       0 },
+#endif
+
+#define osHeapDestroy ((BOOL(WINAPI*)(HANDLE))aSyscall[38].pCurrent)
+
+  { "HeapFree",                (SYSCALL)HeapFree,                0 },
+
+#define osHeapFree ((BOOL(WINAPI*)(HANDLE,DWORD,LPVOID))aSyscall[39].pCurrent)
+
+  { "HeapReAlloc",             (SYSCALL)HeapReAlloc,             0 },
+
+#define osHeapReAlloc ((LPVOID(WINAPI*)(HANDLE,DWORD,LPVOID, \
+        SIZE_T))aSyscall[40].pCurrent)
+
+  { "HeapSize",                (SYSCALL)HeapSize,                0 },
+
+#define osHeapSize ((SIZE_T(WINAPI*)(HANDLE,DWORD, \
+        LPCVOID))aSyscall[41].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "HeapValidate",            (SYSCALL)HeapValidate,            0 },
+#else
+  { "HeapValidate",            (SYSCALL)0,                       0 },
+#endif
+
+#define osHeapValidate ((BOOL(WINAPI*)(HANDLE,DWORD, \
+        LPCVOID))aSyscall[42].pCurrent)
+
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT
+  { "HeapCompact",             (SYSCALL)HeapCompact,             0 },
+#else
+  { "HeapCompact",             (SYSCALL)0,                       0 },
+#endif
+
+#define osHeapCompact ((UINT(WINAPI*)(HANDLE,DWORD))aSyscall[43].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_ANSI) && !defined(SQLITE_OMIT_LOAD_EXTENSION)
+  { "LoadLibraryA",            (SYSCALL)LoadLibraryA,            0 },
+#else
+  { "LoadLibraryA",            (SYSCALL)0,                       0 },
+#endif
+
+#define osLoadLibraryA ((HMODULE(WINAPI*)(LPCSTR))aSyscall[44].pCurrent)
+
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \
+        !defined(SQLITE_OMIT_LOAD_EXTENSION)
+  { "LoadLibraryW",            (SYSCALL)LoadLibraryW,            0 },
+#else
+  { "LoadLibraryW",            (SYSCALL)0,                       0 },
+#endif
+
+#define osLoadLibraryW ((HMODULE(WINAPI*)(LPCWSTR))aSyscall[45].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "LocalFree",               (SYSCALL)LocalFree,               0 },
+#else
+  { "LocalFree",               (SYSCALL)0,                       0 },
+#endif
+
+#define osLocalFree ((HLOCAL(WINAPI*)(HLOCAL))aSyscall[46].pCurrent)
+
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT
+  { "LockFile",                (SYSCALL)LockFile,                0 },
+#else
+  { "LockFile",                (SYSCALL)0,                       0 },
+#endif
+
+#ifndef osLockFile
+#define osLockFile ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \
+        DWORD))aSyscall[47].pCurrent)
+#endif
+
+#if !SQLITE_OS_WINCE
+  { "LockFileEx",              (SYSCALL)LockFileEx,              0 },
+#else
+  { "LockFileEx",              (SYSCALL)0,                       0 },
+#endif
+
+#ifndef osLockFileEx
+#define osLockFileEx ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD,DWORD, \
+        LPOVERLAPPED))aSyscall[48].pCurrent)
+#endif
+
+#if SQLITE_OS_WINCE || (!SQLITE_OS_WINRT && \
+        (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0))
+  { "MapViewOfFile",           (SYSCALL)MapViewOfFile,           0 },
+#else
+  { "MapViewOfFile",           (SYSCALL)0,                       0 },
+#endif
+
+#define osMapViewOfFile ((LPVOID(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \
+        SIZE_T))aSyscall[49].pCurrent)
+
+  { "MultiByteToWideChar",     (SYSCALL)MultiByteToWideChar,     0 },
+
+#define osMultiByteToWideChar ((int(WINAPI*)(UINT,DWORD,LPCSTR,int,LPWSTR, \
+        int))aSyscall[50].pCurrent)
+
+  { "QueryPerformanceCounter", (SYSCALL)QueryPerformanceCounter, 0 },
+
+#define osQueryPerformanceCounter ((BOOL(WINAPI*)( \
+        LARGE_INTEGER*))aSyscall[51].pCurrent)
+
+  { "ReadFile",                (SYSCALL)ReadFile,                0 },
+
+#define osReadFile ((BOOL(WINAPI*)(HANDLE,LPVOID,DWORD,LPDWORD, \
+        LPOVERLAPPED))aSyscall[52].pCurrent)
+
+  { "SetEndOfFile",            (SYSCALL)SetEndOfFile,            0 },
+
+#define osSetEndOfFile ((BOOL(WINAPI*)(HANDLE))aSyscall[53].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "SetFilePointer",          (SYSCALL)SetFilePointer,          0 },
+#else
+  { "SetFilePointer",          (SYSCALL)0,                       0 },
+#endif
+
+#define osSetFilePointer ((DWORD(WINAPI*)(HANDLE,LONG,PLONG, \
+        DWORD))aSyscall[54].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "Sleep",                   (SYSCALL)Sleep,                   0 },
+#else
+  { "Sleep",                   (SYSCALL)0,                       0 },
+#endif
+
+#define osSleep ((VOID(WINAPI*)(DWORD))aSyscall[55].pCurrent)
+
+  { "SystemTimeToFileTime",    (SYSCALL)SystemTimeToFileTime,    0 },
+
+#define osSystemTimeToFileTime ((BOOL(WINAPI*)(CONST SYSTEMTIME*, \
+        LPFILETIME))aSyscall[56].pCurrent)
+
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT
+  { "UnlockFile",              (SYSCALL)UnlockFile,              0 },
+#else
+  { "UnlockFile",              (SYSCALL)0,                       0 },
+#endif
+
+#ifndef osUnlockFile
+#define osUnlockFile ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \
+        DWORD))aSyscall[57].pCurrent)
+#endif
+
+#if !SQLITE_OS_WINCE
+  { "UnlockFileEx",            (SYSCALL)UnlockFileEx,            0 },
+#else
+  { "UnlockFileEx",            (SYSCALL)0,                       0 },
+#endif
+
+#define osUnlockFileEx ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \
+        LPOVERLAPPED))aSyscall[58].pCurrent)
+
+#if SQLITE_OS_WINCE || !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+  { "UnmapViewOfFile",         (SYSCALL)UnmapViewOfFile,         0 },
+#else
+  { "UnmapViewOfFile",         (SYSCALL)0,                       0 },
+#endif
+
+#define osUnmapViewOfFile ((BOOL(WINAPI*)(LPCVOID))aSyscall[59].pCurrent)
+
+  { "WideCharToMultiByte",     (SYSCALL)WideCharToMultiByte,     0 },
+
+#define osWideCharToMultiByte ((int(WINAPI*)(UINT,DWORD,LPCWSTR,int,LPSTR,int, \
+        LPCSTR,LPBOOL))aSyscall[60].pCurrent)
+
+  { "WriteFile",               (SYSCALL)WriteFile,               0 },
+
+#define osWriteFile ((BOOL(WINAPI*)(HANDLE,LPCVOID,DWORD,LPDWORD, \
+        LPOVERLAPPED))aSyscall[61].pCurrent)
+
+#if SQLITE_OS_WINRT
+  { "CreateEventExW",          (SYSCALL)CreateEventExW,          0 },
+#else
+  { "CreateEventExW",          (SYSCALL)0,                       0 },
+#endif
+
+#define osCreateEventExW ((HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES,LPCWSTR, \
+        DWORD,DWORD))aSyscall[62].pCurrent)
+
+#if !SQLITE_OS_WINRT
+  { "WaitForSingleObject",     (SYSCALL)WaitForSingleObject,     0 },
+#else
+  { "WaitForSingleObject",     (SYSCALL)0,                       0 },
+#endif
+
+#define osWaitForSingleObject ((DWORD(WINAPI*)(HANDLE, \
+        DWORD))aSyscall[63].pCurrent)
+
+#if !SQLITE_OS_WINCE
+  { "WaitForSingleObjectEx",   (SYSCALL)WaitForSingleObjectEx,   0 },
+#else
+  { "WaitForSingleObjectEx",   (SYSCALL)0,                       0 },
+#endif
+
+#define osWaitForSingleObjectEx ((DWORD(WINAPI*)(HANDLE,DWORD, \
+        BOOL))aSyscall[64].pCurrent)
+
+#if SQLITE_OS_WINRT
+  { "SetFilePointerEx",        (SYSCALL)SetFilePointerEx,        0 },
+#else
+  { "SetFilePointerEx",        (SYSCALL)0,                       0 },
+#endif
+
+#define osSetFilePointerEx ((BOOL(WINAPI*)(HANDLE,LARGE_INTEGER, \
+        PLARGE_INTEGER,DWORD))aSyscall[65].pCurrent)
+
+#if SQLITE_OS_WINRT
+  { "GetFileInformationByHandleEx", (SYSCALL)GetFileInformationByHandleEx, 0 },
+#else
+  { "GetFileInformationByHandleEx", (SYSCALL)0,                  0 },
+#endif
+
+#define osGetFileInformationByHandleEx ((BOOL(WINAPI*)(HANDLE, \
+        FILE_INFO_BY_HANDLE_CLASS,LPVOID,DWORD))aSyscall[66].pCurrent)
+
+#if SQLITE_OS_WINRT && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)
+  { "MapViewOfFileFromApp",    (SYSCALL)MapViewOfFileFromApp,    0 },
+#else
+  { "MapViewOfFileFromApp",    (SYSCALL)0,                       0 },
+#endif
+
+#define osMapViewOfFileFromApp ((LPVOID(WINAPI*)(HANDLE,ULONG,ULONG64, \
+        SIZE_T))aSyscall[67].pCurrent)
+
+#if SQLITE_OS_WINRT
+  { "CreateFile2",             (SYSCALL)CreateFile2,             0 },
+#else
+  { "CreateFile2",             (SYSCALL)0,                       0 },
+#endif
+
+#define osCreateFile2 ((HANDLE(WINAPI*)(LPCWSTR,DWORD,DWORD,DWORD, \
+        LPCREATEFILE2_EXTENDED_PARAMETERS))aSyscall[68].pCurrent)
+
+#if SQLITE_OS_WINRT && !defined(SQLITE_OMIT_LOAD_EXTENSION)
+  { "LoadPackagedLibrary",     (SYSCALL)LoadPackagedLibrary,     0 },
+#else
+  { "LoadPackagedLibrary",     (SYSCALL)0,                       0 },
+#endif
+
+#define osLoadPackagedLibrary ((HMODULE(WINAPI*)(LPCWSTR, \
+        DWORD))aSyscall[69].pCurrent)
+
+#if SQLITE_OS_WINRT
+  { "GetTickCount64",          (SYSCALL)GetTickCount64,          0 },
+#else
+  { "GetTickCount64",          (SYSCALL)0,                       0 },
+#endif
+
+#define osGetTickCount64 ((ULONGLONG(WINAPI*)(VOID))aSyscall[70].pCurrent)
+
+#if SQLITE_OS_WINRT
+  { "GetNativeSystemInfo",     (SYSCALL)GetNativeSystemInfo,     0 },
+#else
+  { "GetNativeSystemInfo",     (SYSCALL)0,                       0 },
+#endif
+
+#define osGetNativeSystemInfo ((VOID(WINAPI*)( \
+        LPSYSTEM_INFO))aSyscall[71].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_ANSI)
+  { "OutputDebugStringA",      (SYSCALL)OutputDebugStringA,      0 },
+#else
+  { "OutputDebugStringA",      (SYSCALL)0,                       0 },
+#endif
+
+#define osOutputDebugStringA ((VOID(WINAPI*)(LPCSTR))aSyscall[72].pCurrent)
+
+#if defined(SQLITE_WIN32_HAS_WIDE)
+  { "OutputDebugStringW",      (SYSCALL)OutputDebugStringW,      0 },
+#else
+  { "OutputDebugStringW",      (SYSCALL)0,                       0 },
+#endif
+
+#define osOutputDebugStringW ((VOID(WINAPI*)(LPCWSTR))aSyscall[73].pCurrent)
+
+  { "GetProcessHeap",          (SYSCALL)GetProcessHeap,          0 },
+
+#define osGetProcessHeap ((HANDLE(WINAPI*)(VOID))aSyscall[74].pCurrent)
+
+#if SQLITE_OS_WINRT && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)
+  { "CreateFileMappingFromApp", (SYSCALL)CreateFileMappingFromApp, 0 },
+#else
+  { "CreateFileMappingFromApp", (SYSCALL)0,                      0 },
+#endif
+
+#define osCreateFileMappingFromApp ((HANDLE(WINAPI*)(HANDLE, \
+        LPSECURITY_ATTRIBUTES,ULONG,ULONG64,LPCWSTR))aSyscall[75].pCurrent)
+
+/*
+** NOTE: On some sub-platforms, the InterlockedCompareExchange "function"
+**       is really just a macro that uses a compiler intrinsic (e.g. x64).
+**       So do not try to make this is into a redefinable interface.
+*/
+#if defined(InterlockedCompareExchange)
+  { "InterlockedCompareExchange", (SYSCALL)0,                    0 },
+
+#define osInterlockedCompareExchange InterlockedCompareExchange
+#else
+  { "InterlockedCompareExchange", (SYSCALL)InterlockedCompareExchange, 0 },
+
+#define osInterlockedCompareExchange ((LONG(WINAPI*)(LONG \
+        SQLITE_WIN32_VOLATILE*, LONG,LONG))aSyscall[76].pCurrent)
+#endif /* defined(InterlockedCompareExchange) */
+
+}; /* End of the overrideable system calls */
+
+/*
+** This is the xSetSystemCall() method of sqlite3_vfs for all of the
+** "win32" VFSes.  Return SQLITE_OK opon successfully updating the
+** system call pointer, or SQLITE_NOTFOUND if there is no configurable
+** system call named zName.
+*/
+static int winSetSystemCall(
+  sqlite3_vfs *pNotUsed,        /* The VFS pointer.  Not used */
+  const char *zName,            /* Name of system call to override */
+  sqlite3_syscall_ptr pNewFunc  /* Pointer to new system call value */
+){
+  unsigned int i;
+  int rc = SQLITE_NOTFOUND;
+
+  UNUSED_PARAMETER(pNotUsed);
+  if( zName==0 ){
+    /* If no zName is given, restore all system calls to their default
+    ** settings and return NULL
+    */
+    rc = SQLITE_OK;
+    for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
+      if( aSyscall[i].pDefault ){
+        aSyscall[i].pCurrent = aSyscall[i].pDefault;
+      }
+    }
+  }else{
+    /* If zName is specified, operate on only the one system call
+    ** specified.
+    */
+    for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
+      if( strcmp(zName, aSyscall[i].zName)==0 ){
+        if( aSyscall[i].pDefault==0 ){
+          aSyscall[i].pDefault = aSyscall[i].pCurrent;
+        }
+        rc = SQLITE_OK;
+        if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault;
+        aSyscall[i].pCurrent = pNewFunc;
+        break;
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** Return the value of a system call.  Return NULL if zName is not a
+** recognized system call name.  NULL is also returned if the system call
+** is currently undefined.
+*/
+static sqlite3_syscall_ptr winGetSystemCall(
+  sqlite3_vfs *pNotUsed,
+  const char *zName
+){
+  unsigned int i;
+
+  UNUSED_PARAMETER(pNotUsed);
+  for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
+    if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent;
+  }
+  return 0;
+}
+
+/*
+** Return the name of the first system call after zName.  If zName==NULL
+** then return the name of the first system call.  Return NULL if zName
+** is the last system call or if zName is not the name of a valid
+** system call.
+*/
+static const char *winNextSystemCall(sqlite3_vfs *p, const char *zName){
+  int i = -1;
+
+  UNUSED_PARAMETER(p);
+  if( zName ){
+    for(i=0; i<ArraySize(aSyscall)-1; i++){
+      if( strcmp(zName, aSyscall[i].zName)==0 ) break;
+    }
+  }
+  for(i++; i<ArraySize(aSyscall); i++){
+    if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName;
+  }
+  return 0;
+}
+
+#ifdef SQLITE_WIN32_MALLOC
+/*
+** If a Win32 native heap has been configured, this function will attempt to
+** compact it.  Upon success, SQLITE_OK will be returned.  Upon failure, one
+** of SQLITE_NOMEM, SQLITE_ERROR, or SQLITE_NOTFOUND will be returned.  The
+** "pnLargest" argument, if non-zero, will be used to return the size of the
+** largest committed free block in the heap, in bytes.
+*/
+SQLITE_API int sqlite3_win32_compact_heap(LPUINT pnLargest){
+  int rc = SQLITE_OK;
+  UINT nLargest = 0;
+  HANDLE hHeap;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE)
+  assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT
+  if( (nLargest=osHeapCompact(hHeap, SQLITE_WIN32_HEAP_FLAGS))==0 ){
+    DWORD lastErrno = osGetLastError();
+    if( lastErrno==NO_ERROR ){
+      sqlite3_log(SQLITE_NOMEM, "failed to HeapCompact (no space), heap=%p",
+                  (void*)hHeap);
+      rc = SQLITE_NOMEM;
+    }else{
+      sqlite3_log(SQLITE_ERROR, "failed to HeapCompact (%lu), heap=%p",
+                  osGetLastError(), (void*)hHeap);
+      rc = SQLITE_ERROR;
+    }
+  }
+#else
+  sqlite3_log(SQLITE_NOTFOUND, "failed to HeapCompact, heap=%p",
+              (void*)hHeap);
+  rc = SQLITE_NOTFOUND;
+#endif
+  if( pnLargest ) *pnLargest = nLargest;
+  return rc;
+}
+
+/*
+** If a Win32 native heap has been configured, this function will attempt to
+** destroy and recreate it.  If the Win32 native heap is not isolated and/or
+** the sqlite3_memory_used() function does not return zero, SQLITE_BUSY will
+** be returned and no changes will be made to the Win32 native heap.
+*/
+SQLITE_API int sqlite3_win32_reset_heap(){
+  int rc;
+  MUTEX_LOGIC( sqlite3_mutex *pMaster; ) /* The main static mutex */
+  MUTEX_LOGIC( sqlite3_mutex *pMem; )    /* The memsys static mutex */
+  MUTEX_LOGIC( pMaster = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER); )
+  MUTEX_LOGIC( pMem = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM); )
+  sqlite3_mutex_enter(pMaster);
+  sqlite3_mutex_enter(pMem);
+  winMemAssertMagic();
+  if( winMemGetHeap()!=NULL && winMemGetOwned() && sqlite3_memory_used()==0 ){
+    /*
+    ** At this point, there should be no outstanding memory allocations on
+    ** the heap.  Also, since both the master and memsys locks are currently
+    ** being held by us, no other function (i.e. from another thread) should
+    ** be able to even access the heap.  Attempt to destroy and recreate our
+    ** isolated Win32 native heap now.
+    */
+    assert( winMemGetHeap()!=NULL );
+    assert( winMemGetOwned() );
+    assert( sqlite3_memory_used()==0 );
+    winMemShutdown(winMemGetDataPtr());
+    assert( winMemGetHeap()==NULL );
+    assert( !winMemGetOwned() );
+    assert( sqlite3_memory_used()==0 );
+    rc = winMemInit(winMemGetDataPtr());
+    assert( rc!=SQLITE_OK || winMemGetHeap()!=NULL );
+    assert( rc!=SQLITE_OK || winMemGetOwned() );
+    assert( rc!=SQLITE_OK || sqlite3_memory_used()==0 );
+  }else{
+    /*
+    ** The Win32 native heap cannot be modified because it may be in use.
+    */
+    rc = SQLITE_BUSY;
+  }
+  sqlite3_mutex_leave(pMem);
+  sqlite3_mutex_leave(pMaster);
+  return rc;
+}
+#endif /* SQLITE_WIN32_MALLOC */
+
+/*
+** This function outputs the specified (ANSI) string to the Win32 debugger
+** (if available).
+*/
+
+SQLITE_API void sqlite3_win32_write_debug(const char *zBuf, int nBuf){
+  char zDbgBuf[SQLITE_WIN32_DBG_BUF_SIZE];
+  int nMin = MIN(nBuf, (SQLITE_WIN32_DBG_BUF_SIZE - 1)); /* may be negative. */
+  if( nMin<-1 ) nMin = -1; /* all negative values become -1. */
+  assert( nMin==-1 || nMin==0 || nMin<SQLITE_WIN32_DBG_BUF_SIZE );
+#if defined(SQLITE_WIN32_HAS_ANSI)
+  if( nMin>0 ){
+    memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
+    memcpy(zDbgBuf, zBuf, nMin);
+    osOutputDebugStringA(zDbgBuf);
+  }else{
+    osOutputDebugStringA(zBuf);
+  }
+#elif defined(SQLITE_WIN32_HAS_WIDE)
+  memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
+  if ( osMultiByteToWideChar(
+          osAreFileApisANSI() ? CP_ACP : CP_OEMCP, 0, zBuf,
+          nMin, (LPWSTR)zDbgBuf, SQLITE_WIN32_DBG_BUF_SIZE/sizeof(WCHAR))<=0 ){
+    return;
+  }
+  osOutputDebugStringW((LPCWSTR)zDbgBuf);
+#else
+  if( nMin>0 ){
+    memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
+    memcpy(zDbgBuf, zBuf, nMin);
+    fprintf(stderr, "%s", zDbgBuf);
+  }else{
+    fprintf(stderr, "%s", zBuf);
+  }
+#endif
+}
+
+/*
+** The following routine suspends the current thread for at least ms
+** milliseconds.  This is equivalent to the Win32 Sleep() interface.
+*/
+#if SQLITE_OS_WINRT
+static HANDLE sleepObj = NULL;
+#endif
+
+SQLITE_API void sqlite3_win32_sleep(DWORD milliseconds){
+#if SQLITE_OS_WINRT
+  if ( sleepObj==NULL ){
+    sleepObj = osCreateEventExW(NULL, NULL, CREATE_EVENT_MANUAL_RESET,
+                                SYNCHRONIZE);
+  }
+  assert( sleepObj!=NULL );
+  osWaitForSingleObjectEx(sleepObj, milliseconds, FALSE);
+#else
+  osSleep(milliseconds);
+#endif
+}
+
+#if SQLITE_MAX_WORKER_THREADS>0 && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && \
+        SQLITE_THREADSAFE>0
+SQLITE_PRIVATE DWORD sqlite3Win32Wait(HANDLE hObject){
+  DWORD rc;
+  while( (rc = osWaitForSingleObjectEx(hObject, INFINITE,
+                                       TRUE))==WAIT_IO_COMPLETION ){}
+  return rc;
+}
+#endif
+
+/*
+** Return true (non-zero) if we are running under WinNT, Win2K, WinXP,
+** or WinCE.  Return false (zero) for Win95, Win98, or WinME.
+**
+** Here is an interesting observation:  Win95, Win98, and WinME lack
+** the LockFileEx() API.  But we can still statically link against that
+** API as long as we don't call it when running Win95/98/ME.  A call to
+** this routine is used to determine if the host is Win95/98/ME or
+** WinNT/2K/XP so that we will know whether or not we can safely call
+** the LockFileEx() API.
+*/
+
+#if !defined(SQLITE_WIN32_GETVERSIONEX) || !SQLITE_WIN32_GETVERSIONEX
+# define osIsNT()  (1)
+#elif SQLITE_OS_WINCE || SQLITE_OS_WINRT || !defined(SQLITE_WIN32_HAS_ANSI)
+# define osIsNT()  (1)
+#elif !defined(SQLITE_WIN32_HAS_WIDE)
+# define osIsNT()  (0)
+#else
+# define osIsNT()  ((sqlite3_os_type==2) || sqlite3_win32_is_nt())
+#endif
+
+/*
+** This function determines if the machine is running a version of Windows
+** based on the NT kernel.
+*/
+SQLITE_API int sqlite3_win32_is_nt(void){
+#if SQLITE_OS_WINRT
+  /*
+  ** NOTE: The WinRT sub-platform is always assumed to be based on the NT
+  **       kernel.
+  */
+  return 1;
+#elif defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX
+  if( osInterlockedCompareExchange(&sqlite3_os_type, 0, 0)==0 ){
+#if defined(SQLITE_WIN32_HAS_ANSI)
+    OSVERSIONINFOA sInfo;
+    sInfo.dwOSVersionInfoSize = sizeof(sInfo);
+    osGetVersionExA(&sInfo);
+    osInterlockedCompareExchange(&sqlite3_os_type,
+        (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0);
+#elif defined(SQLITE_WIN32_HAS_WIDE)
+    OSVERSIONINFOW sInfo;
+    sInfo.dwOSVersionInfoSize = sizeof(sInfo);
+    osGetVersionExW(&sInfo);
+    osInterlockedCompareExchange(&sqlite3_os_type,
+        (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0);
+#endif
+  }
+  return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2;
+#elif SQLITE_TEST
+  return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2;
+#else
+  /*
+  ** NOTE: All sub-platforms where the GetVersionEx[AW] functions are
+  **       deprecated are always assumed to be based on the NT kernel.
+  */
+  return 1;
+#endif
+}
+
+#ifdef SQLITE_WIN32_MALLOC
+/*
+** Allocate nBytes of memory.
+*/
+static void *winMemMalloc(int nBytes){
+  HANDLE hHeap;
+  void *p;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE)
+  assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+  assert( nBytes>=0 );
+  p = osHeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes);
+  if( !p ){
+    sqlite3_log(SQLITE_NOMEM, "failed to HeapAlloc %u bytes (%lu), heap=%p",
+                nBytes, osGetLastError(), (void*)hHeap);
+  }
+  return p;
+}
+
+/*
+** Free memory.
+*/
+static void winMemFree(void *pPrior){
+  HANDLE hHeap;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE)
+  assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) );
+#endif
+  if( !pPrior ) return; /* Passing NULL to HeapFree is undefined. */
+  if( !osHeapFree(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ){
+    sqlite3_log(SQLITE_NOMEM, "failed to HeapFree block %p (%lu), heap=%p",
+                pPrior, osGetLastError(), (void*)hHeap);
+  }
+}
+
+/*
+** Change the size of an existing memory allocation
+*/
+static void *winMemRealloc(void *pPrior, int nBytes){
+  HANDLE hHeap;
+  void *p;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE)
+  assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) );
+#endif
+  assert( nBytes>=0 );
+  if( !pPrior ){
+    p = osHeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes);
+  }else{
+    p = osHeapReAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior, (SIZE_T)nBytes);
+  }
+  if( !p ){
+    sqlite3_log(SQLITE_NOMEM, "failed to %s %u bytes (%lu), heap=%p",
+                pPrior ? "HeapReAlloc" : "HeapAlloc", nBytes, osGetLastError(),
+                (void*)hHeap);
+  }
+  return p;
+}
+
+/*
+** Return the size of an outstanding allocation, in bytes.
+*/
+static int winMemSize(void *p){
+  HANDLE hHeap;
+  SIZE_T n;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE)
+  assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, p) );
+#endif
+  if( !p ) return 0;
+  n = osHeapSize(hHeap, SQLITE_WIN32_HEAP_FLAGS, p);
+  if( n==(SIZE_T)-1 ){
+    sqlite3_log(SQLITE_NOMEM, "failed to HeapSize block %p (%lu), heap=%p",
+                p, osGetLastError(), (void*)hHeap);
+    return 0;
+  }
+  return (int)n;
+}
+
+/*
+** Round up a request size to the next valid allocation size.
+*/
+static int winMemRoundup(int n){
+  return n;
+}
+
+/*
+** Initialize this module.
+*/
+static int winMemInit(void *pAppData){
+  winMemData *pWinMemData = (winMemData *)pAppData;
+
+  if( !pWinMemData ) return SQLITE_ERROR;
+  assert( pWinMemData->magic1==WINMEM_MAGIC1 );
+  assert( pWinMemData->magic2==WINMEM_MAGIC2 );
+
+#if !SQLITE_OS_WINRT && SQLITE_WIN32_HEAP_CREATE
+  if( !pWinMemData->hHeap ){
+    DWORD dwInitialSize = SQLITE_WIN32_HEAP_INIT_SIZE;
+    DWORD dwMaximumSize = (DWORD)sqlite3GlobalConfig.nHeap;
+    if( dwMaximumSize==0 ){
+      dwMaximumSize = SQLITE_WIN32_HEAP_MAX_SIZE;
+    }else if( dwInitialSize>dwMaximumSize ){
+      dwInitialSize = dwMaximumSize;
+    }
+    pWinMemData->hHeap = osHeapCreate(SQLITE_WIN32_HEAP_FLAGS,
+                                      dwInitialSize, dwMaximumSize);
+    if( !pWinMemData->hHeap ){
+      sqlite3_log(SQLITE_NOMEM,
+          "failed to HeapCreate (%lu), flags=%u, initSize=%lu, maxSize=%lu",
+          osGetLastError(), SQLITE_WIN32_HEAP_FLAGS, dwInitialSize,
+          dwMaximumSize);
+      return SQLITE_NOMEM;
+    }
+    pWinMemData->bOwned = TRUE;
+    assert( pWinMemData->bOwned );
+  }
+#else
+  pWinMemData->hHeap = osGetProcessHeap();
+  if( !pWinMemData->hHeap ){
+    sqlite3_log(SQLITE_NOMEM,
+        "failed to GetProcessHeap (%lu)", osGetLastError());
+    return SQLITE_NOMEM;
+  }
+  pWinMemData->bOwned = FALSE;
+  assert( !pWinMemData->bOwned );
+#endif
+  assert( pWinMemData->hHeap!=0 );
+  assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE );
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE)
+  assert( osHeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+  return SQLITE_OK;
+}
+
+/*
+** Deinitialize this module.
+*/
+static void winMemShutdown(void *pAppData){
+  winMemData *pWinMemData = (winMemData *)pAppData;
+
+  if( !pWinMemData ) return;
+  assert( pWinMemData->magic1==WINMEM_MAGIC1 );
+  assert( pWinMemData->magic2==WINMEM_MAGIC2 );
+
+  if( pWinMemData->hHeap ){
+    assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE );
+#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE)
+    assert( osHeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+    if( pWinMemData->bOwned ){
+      if( !osHeapDestroy(pWinMemData->hHeap) ){
+        sqlite3_log(SQLITE_NOMEM, "failed to HeapDestroy (%lu), heap=%p",
+                    osGetLastError(), (void*)pWinMemData->hHeap);
+      }
+      pWinMemData->bOwned = FALSE;
+    }
+    pWinMemData->hHeap = NULL;
+  }
+}
+
+/*
+** Populate the low-level memory allocation function pointers in
+** sqlite3GlobalConfig.m with pointers to the routines in this file. The
+** arguments specify the block of memory to manage.
+**
+** This routine is only called by sqlite3_config(), and therefore
+** is not required to be threadsafe (it is not).
+*/
+SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void){
+  static const sqlite3_mem_methods winMemMethods = {
+    winMemMalloc,
+    winMemFree,
+    winMemRealloc,
+    winMemSize,
+    winMemRoundup,
+    winMemInit,
+    winMemShutdown,
+    &win_mem_data
+  };
+  return &winMemMethods;
+}
+
+SQLITE_PRIVATE void sqlite3MemSetDefault(void){
+  sqlite3_config(SQLITE_CONFIG_MALLOC, sqlite3MemGetWin32());
+}
+#endif /* SQLITE_WIN32_MALLOC */
+
+/*
+** Convert a UTF-8 string to Microsoft Unicode (UTF-16?).
+**
+** Space to hold the returned string is obtained from malloc.
+*/
+static LPWSTR winUtf8ToUnicode(const char *zFilename){
+  int nChar;
+  LPWSTR zWideFilename;
+
+  nChar = osMultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0);
+  if( nChar==0 ){
+    return 0;
+  }
+  zWideFilename = sqlite3MallocZero( nChar*sizeof(zWideFilename[0]) );
+  if( zWideFilename==0 ){
+    return 0;
+  }
+  nChar = osMultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename,
+                                nChar);
+  if( nChar==0 ){
+    sqlite3_free(zWideFilename);
+    zWideFilename = 0;
+  }
+  return zWideFilename;
+}
+
+/*
+** Convert Microsoft Unicode to UTF-8.  Space to hold the returned string is
+** obtained from sqlite3_malloc().
+*/
+static char *winUnicodeToUtf8(LPCWSTR zWideFilename){
+  int nByte;
+  char *zFilename;
+
+  nByte = osWideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0);
+  if( nByte == 0 ){
+    return 0;
+  }
+  zFilename = sqlite3MallocZero( nByte );
+  if( zFilename==0 ){
+    return 0;
+  }
+  nByte = osWideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte,
+                                0, 0);
+  if( nByte == 0 ){
+    sqlite3_free(zFilename);
+    zFilename = 0;
+  }
+  return zFilename;
+}
+
+/*
+** Convert an ANSI string to Microsoft Unicode, based on the
+** current codepage settings for file apis.
+**
+** Space to hold the returned string is obtained
+** from sqlite3_malloc.
+*/
+static LPWSTR winMbcsToUnicode(const char *zFilename){
+  int nByte;
+  LPWSTR zMbcsFilename;
+  int codepage = osAreFileApisANSI() ? CP_ACP : CP_OEMCP;
+
+  nByte = osMultiByteToWideChar(codepage, 0, zFilename, -1, NULL,
+                                0)*sizeof(WCHAR);
+  if( nByte==0 ){
+    return 0;
+  }
+  zMbcsFilename = sqlite3MallocZero( nByte*sizeof(zMbcsFilename[0]) );
+  if( zMbcsFilename==0 ){
+    return 0;
+  }
+  nByte = osMultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename,
+                                nByte);
+  if( nByte==0 ){
+    sqlite3_free(zMbcsFilename);
+    zMbcsFilename = 0;
+  }
+  return zMbcsFilename;
+}
+
+/*
+** Convert Microsoft Unicode to multi-byte character string, based on the
+** user's ANSI codepage.
+**
+** Space to hold the returned string is obtained from
+** sqlite3_malloc().
+*/
+static char *winUnicodeToMbcs(LPCWSTR zWideFilename){
+  int nByte;
+  char *zFilename;
+  int codepage = osAreFileApisANSI() ? CP_ACP : CP_OEMCP;
+
+  nByte = osWideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0);
+  if( nByte == 0 ){
+    return 0;
+  }
+  zFilename = sqlite3MallocZero( nByte );
+  if( zFilename==0 ){
+    return 0;
+  }
+  nByte = osWideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename,
+                                nByte, 0, 0);
+  if( nByte == 0 ){
+    sqlite3_free(zFilename);
+    zFilename = 0;
+  }
+  return zFilename;
+}
+
+/*
+** Convert multibyte character string to UTF-8.  Space to hold the
+** returned string is obtained from sqlite3_malloc().
+*/
+SQLITE_API char *sqlite3_win32_mbcs_to_utf8(const char *zFilename){
+  char *zFilenameUtf8;
+  LPWSTR zTmpWide;
+
+  zTmpWide = winMbcsToUnicode(zFilename);
+  if( zTmpWide==0 ){
+    return 0;
+  }
+  zFilenameUtf8 = winUnicodeToUtf8(zTmpWide);
+  sqlite3_free(zTmpWide);
+  return zFilenameUtf8;
+}
+
+/*
+** Convert UTF-8 to multibyte character string.  Space to hold the
+** returned string is obtained from sqlite3_malloc().
+*/
+SQLITE_API char *sqlite3_win32_utf8_to_mbcs(const char *zFilename){
+  char *zFilenameMbcs;
+  LPWSTR zTmpWide;
+
+  zTmpWide = winUtf8ToUnicode(zFilename);
+  if( zTmpWide==0 ){
+    return 0;
+  }
+  zFilenameMbcs = winUnicodeToMbcs(zTmpWide);
+  sqlite3_free(zTmpWide);
+  return zFilenameMbcs;
+}
+
+/*
+** This function sets the data directory or the temporary directory based on
+** the provided arguments.  The type argument must be 1 in order to set the
+** data directory or 2 in order to set the temporary directory.  The zValue
+** argument is the name of the directory to use.  The return value will be
+** SQLITE_OK if successful.
+*/
+SQLITE_API int sqlite3_win32_set_directory(DWORD type, LPCWSTR zValue){
+  char **ppDirectory = 0;
+#ifndef SQLITE_OMIT_AUTOINIT
+  int rc = sqlite3_initialize();
+  if( rc ) return rc;
+#endif
+  if( type==SQLITE_WIN32_DATA_DIRECTORY_TYPE ){
+    ppDirectory = &sqlite3_data_directory;
+  }else if( type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE ){
+    ppDirectory = &sqlite3_temp_directory;
+  }
+  assert( !ppDirectory || type==SQLITE_WIN32_DATA_DIRECTORY_TYPE
+          || type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE
+  );
+  assert( !ppDirectory || sqlite3MemdebugHasType(*ppDirectory, MEMTYPE_HEAP) );
+  if( ppDirectory ){
+    char *zValueUtf8 = 0;
+    if( zValue && zValue[0] ){
+      zValueUtf8 = winUnicodeToUtf8(zValue);
+      if ( zValueUtf8==0 ){
+        return SQLITE_NOMEM;
+      }
+    }
+    sqlite3_free(*ppDirectory);
+    *ppDirectory = zValueUtf8;
+    return SQLITE_OK;
+  }
+  return SQLITE_ERROR;
+}
+
+/*
+** The return value of winGetLastErrorMsg
+** is zero if the error message fits in the buffer, or non-zero
+** otherwise (if the message was truncated).
+*/
+static int winGetLastErrorMsg(DWORD lastErrno, int nBuf, char *zBuf){
+  /* FormatMessage returns 0 on failure.  Otherwise it
+  ** returns the number of TCHARs written to the output
+  ** buffer, excluding the terminating null char.
+  */
+  DWORD dwLen = 0;
+  char *zOut = 0;
+
+  if( osIsNT() ){
+#if SQLITE_OS_WINRT
+    WCHAR zTempWide[SQLITE_WIN32_MAX_ERRMSG_CHARS+1];
+    dwLen = osFormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM |
+                             FORMAT_MESSAGE_IGNORE_INSERTS,
+                             NULL,
+                             lastErrno,
+                             0,
+                             zTempWide,
+                             SQLITE_WIN32_MAX_ERRMSG_CHARS,
+                             0);
+#else
+    LPWSTR zTempWide = NULL;
+    dwLen = osFormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                             FORMAT_MESSAGE_FROM_SYSTEM |
+                             FORMAT_MESSAGE_IGNORE_INSERTS,
+                             NULL,
+                             lastErrno,
+                             0,
+                             (LPWSTR) &zTempWide,
+                             0,
+                             0);
+#endif
+    if( dwLen > 0 ){
+      /* allocate a buffer and convert to UTF8 */
+      sqlite3BeginBenignMalloc();
+      zOut = winUnicodeToUtf8(zTempWide);
+      sqlite3EndBenignMalloc();
+#if !SQLITE_OS_WINRT
+      /* free the system buffer allocated by FormatMessage */
+      osLocalFree(zTempWide);
+#endif
+    }
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    char *zTemp = NULL;
+    dwLen = osFormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                             FORMAT_MESSAGE_FROM_SYSTEM |
+                             FORMAT_MESSAGE_IGNORE_INSERTS,
+                             NULL,
+                             lastErrno,
+                             0,
+                             (LPSTR) &zTemp,
+                             0,
+                             0);
+    if( dwLen > 0 ){
+      /* allocate a buffer and convert to UTF8 */
+      sqlite3BeginBenignMalloc();
+      zOut = sqlite3_win32_mbcs_to_utf8(zTemp);
+      sqlite3EndBenignMalloc();
+      /* free the system buffer allocated by FormatMessage */
+      osLocalFree(zTemp);
+    }
+  }
+#endif
+  if( 0 == dwLen ){
+    sqlite3_snprintf(nBuf, zBuf, "OsError 0x%lx (%lu)", lastErrno, lastErrno);
+  }else{
+    /* copy a maximum of nBuf chars to output buffer */
+    sqlite3_snprintf(nBuf, zBuf, "%s", zOut);
+    /* free the UTF8 buffer */
+    sqlite3_free(zOut);
+  }
+  return 0;
+}
+
+/*
+**
+** This function - winLogErrorAtLine() - is only ever called via the macro
+** winLogError().
+**
+** This routine is invoked after an error occurs in an OS function.
+** It logs a message using sqlite3_log() containing the current value of
+** error code and, if possible, the human-readable equivalent from
+** FormatMessage.
+**
+** The first argument passed to the macro should be the error code that
+** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN).
+** The two subsequent arguments should be the name of the OS function that
+** failed and the associated file-system path, if any.
+*/
+#define winLogError(a,b,c,d)   winLogErrorAtLine(a,b,c,d,__LINE__)
+static int winLogErrorAtLine(
+  int errcode,                    /* SQLite error code */
+  DWORD lastErrno,                /* Win32 last error */
+  const char *zFunc,              /* Name of OS function that failed */
+  const char *zPath,              /* File path associated with error */
+  int iLine                       /* Source line number where error occurred */
+){
+  char zMsg[500];                 /* Human readable error text */
+  int i;                          /* Loop counter */
+
+  zMsg[0] = 0;
+  winGetLastErrorMsg(lastErrno, sizeof(zMsg), zMsg);
+  assert( errcode!=SQLITE_OK );
+  if( zPath==0 ) zPath = "";
+  for(i=0; zMsg[i] && zMsg[i]!='\r' && zMsg[i]!='\n'; i++){}
+  zMsg[i] = 0;
+  sqlite3_log(errcode,
+      "os_win.c:%d: (%lu) %s(%s) - %s",
+      iLine, lastErrno, zFunc, zPath, zMsg
+  );
+
+  return errcode;
+}
+
+/*
+** The number of times that a ReadFile(), WriteFile(), and DeleteFile()
+** will be retried following a locking error - probably caused by
+** antivirus software.  Also the initial delay before the first retry.
+** The delay increases linearly with each retry.
+*/
+#ifndef SQLITE_WIN32_IOERR_RETRY
+# define SQLITE_WIN32_IOERR_RETRY 10
+#endif
+#ifndef SQLITE_WIN32_IOERR_RETRY_DELAY
+# define SQLITE_WIN32_IOERR_RETRY_DELAY 25
+#endif
+static int winIoerrRetry = SQLITE_WIN32_IOERR_RETRY;
+static int winIoerrRetryDelay = SQLITE_WIN32_IOERR_RETRY_DELAY;
+
+/*
+** The "winIoerrCanRetry1" macro is used to determine if a particular I/O
+** error code obtained via GetLastError() is eligible to be retried.  It
+** must accept the error code DWORD as its only argument and should return
+** non-zero if the error code is transient in nature and the operation
+** responsible for generating the original error might succeed upon being
+** retried.  The argument to this macro should be a variable.
+**
+** Additionally, a macro named "winIoerrCanRetry2" may be defined.  If it
+** is defined, it will be consulted only when the macro "winIoerrCanRetry1"
+** returns zero.  The "winIoerrCanRetry2" macro is completely optional and
+** may be used to include additional error codes in the set that should
+** result in the failing I/O operation being retried by the caller.  If
+** defined, the "winIoerrCanRetry2" macro must exhibit external semantics
+** identical to those of the "winIoerrCanRetry1" macro.
+*/
+#if !defined(winIoerrCanRetry1)
+#define winIoerrCanRetry1(a) (((a)==ERROR_ACCESS_DENIED)        || \
+                              ((a)==ERROR_SHARING_VIOLATION)    || \
+                              ((a)==ERROR_LOCK_VIOLATION)       || \
+                              ((a)==ERROR_DEV_NOT_EXIST)        || \
+                              ((a)==ERROR_NETNAME_DELETED)      || \
+                              ((a)==ERROR_SEM_TIMEOUT)          || \
+                              ((a)==ERROR_NETWORK_UNREACHABLE))
+#endif
+
+/*
+** If a ReadFile() or WriteFile() error occurs, invoke this routine
+** to see if it should be retried.  Return TRUE to retry.  Return FALSE
+** to give up with an error.
+*/
+static int winRetryIoerr(int *pnRetry, DWORD *pError){
+  DWORD e = osGetLastError();
+  if( *pnRetry>=winIoerrRetry ){
+    if( pError ){
+      *pError = e;
+    }
+    return 0;
+  }
+  if( winIoerrCanRetry1(e) ){
+    sqlite3_win32_sleep(winIoerrRetryDelay*(1+*pnRetry));
+    ++*pnRetry;
+    return 1;
+  }
+#if defined(winIoerrCanRetry2)
+  else if( winIoerrCanRetry2(e) ){
+    sqlite3_win32_sleep(winIoerrRetryDelay*(1+*pnRetry));
+    ++*pnRetry;
+    return 1;
+  }
+#endif
+  if( pError ){
+    *pError = e;
+  }
+  return 0;
+}
+
+/*
+** Log a I/O error retry episode.
+*/
+static void winLogIoerr(int nRetry){
+  if( nRetry ){
+    sqlite3_log(SQLITE_IOERR,
+      "delayed %dms for lock/sharing conflict",
+      winIoerrRetryDelay*nRetry*(nRetry+1)/2
+    );
+  }
+}
+
+#if SQLITE_OS_WINCE
+/*************************************************************************
+** This section contains code for WinCE only.
+*/
+#if !defined(SQLITE_MSVC_LOCALTIME_API) || !SQLITE_MSVC_LOCALTIME_API
+/*
+** The MSVC CRT on Windows CE may not have a localtime() function.  So
+** create a substitute.
+*/
+/* #include <time.h> */
+struct tm *__cdecl localtime(const time_t *t)
+{
+  static struct tm y;
+  FILETIME uTm, lTm;
+  SYSTEMTIME pTm;
+  sqlite3_int64 t64;
+  t64 = *t;
+  t64 = (t64 + 11644473600)*10000000;
+  uTm.dwLowDateTime = (DWORD)(t64 & 0xFFFFFFFF);
+  uTm.dwHighDateTime= (DWORD)(t64 >> 32);
+  osFileTimeToLocalFileTime(&uTm,&lTm);
+  osFileTimeToSystemTime(&lTm,&pTm);
+  y.tm_year = pTm.wYear - 1900;
+  y.tm_mon = pTm.wMonth - 1;
+  y.tm_wday = pTm.wDayOfWeek;
+  y.tm_mday = pTm.wDay;
+  y.tm_hour = pTm.wHour;
+  y.tm_min = pTm.wMinute;
+  y.tm_sec = pTm.wSecond;
+  return &y;
+}
+#endif
+
+#define HANDLE_TO_WINFILE(a) (winFile*)&((char*)a)[-(int)offsetof(winFile,h)]
+
+/*
+** Acquire a lock on the handle h
+*/
+static void winceMutexAcquire(HANDLE h){
+   DWORD dwErr;
+   do {
+     dwErr = osWaitForSingleObject(h, INFINITE);
+   } while (dwErr != WAIT_OBJECT_0 && dwErr != WAIT_ABANDONED);
+}
+/*
+** Release a lock acquired by winceMutexAcquire()
+*/
+#define winceMutexRelease(h) ReleaseMutex(h)
+
+/*
+** Create the mutex and shared memory used for locking in the file
+** descriptor pFile
+*/
+static int winceCreateLock(const char *zFilename, winFile *pFile){
+  LPWSTR zTok;
+  LPWSTR zName;
+  DWORD lastErrno;
+  BOOL bLogged = FALSE;
+  BOOL bInit = TRUE;
+
+  zName = winUtf8ToUnicode(zFilename);
+  if( zName==0 ){
+    /* out of memory */
+    return SQLITE_IOERR_NOMEM;
+  }
+
+  /* Initialize the local lockdata */
+  memset(&pFile->local, 0, sizeof(pFile->local));
+
+  /* Replace the backslashes from the filename and lowercase it
+  ** to derive a mutex name. */
+  zTok = osCharLowerW(zName);
+  for (;*zTok;zTok++){
+    if (*zTok == '\\') *zTok = '_';
+  }
+
+  /* Create/open the named mutex */
+  pFile->hMutex = osCreateMutexW(NULL, FALSE, zName);
+  if (!pFile->hMutex){
+    pFile->lastErrno = osGetLastError();
+    sqlite3_free(zName);
+    return winLogError(SQLITE_IOERR, pFile->lastErrno,
+                       "winceCreateLock1", zFilename);
+  }
+
+  /* Acquire the mutex before continuing */
+  winceMutexAcquire(pFile->hMutex);
+
+  /* Since the names of named mutexes, semaphores, file mappings etc are
+  ** case-sensitive, take advantage of that by uppercasing the mutex name
+  ** and using that as the shared filemapping name.
+  */
+  osCharUpperW(zName);
+  pFile->hShared = osCreateFileMappingW(INVALID_HANDLE_VALUE, NULL,
+                                        PAGE_READWRITE, 0, sizeof(winceLock),
+                                        zName);
+
+  /* Set a flag that indicates we're the first to create the memory so it
+  ** must be zero-initialized */
+  lastErrno = osGetLastError();
+  if (lastErrno == ERROR_ALREADY_EXISTS){
+    bInit = FALSE;
+  }
+
+  sqlite3_free(zName);
+
+  /* If we succeeded in making the shared memory handle, map it. */
+  if( pFile->hShared ){
+    pFile->shared = (winceLock*)osMapViewOfFile(pFile->hShared,
+             FILE_MAP_READ|FILE_MAP_WRITE, 0, 0, sizeof(winceLock));
+    /* If mapping failed, close the shared memory handle and erase it */
+    if( !pFile->shared ){
+      pFile->lastErrno = osGetLastError();
+      winLogError(SQLITE_IOERR, pFile->lastErrno,
+                  "winceCreateLock2", zFilename);
+      bLogged = TRUE;
+      osCloseHandle(pFile->hShared);
+      pFile->hShared = NULL;
+    }
+  }
+
+  /* If shared memory could not be created, then close the mutex and fail */
+  if( pFile->hShared==NULL ){
+    if( !bLogged ){
+      pFile->lastErrno = lastErrno;
+      winLogError(SQLITE_IOERR, pFile->lastErrno,
+                  "winceCreateLock3", zFilename);
+      bLogged = TRUE;
+    }
+    winceMutexRelease(pFile->hMutex);
+    osCloseHandle(pFile->hMutex);
+    pFile->hMutex = NULL;
+    return SQLITE_IOERR;
+  }
+
+  /* Initialize the shared memory if we're supposed to */
+  if( bInit ){
+    memset(pFile->shared, 0, sizeof(winceLock));
+  }
+
+  winceMutexRelease(pFile->hMutex);
+  return SQLITE_OK;
+}
+
+/*
+** Destroy the part of winFile that deals with wince locks
+*/
+static void winceDestroyLock(winFile *pFile){
+  if (pFile->hMutex){
+    /* Acquire the mutex */
+    winceMutexAcquire(pFile->hMutex);
+
+    /* The following blocks should probably assert in debug mode, but they
+       are to cleanup in case any locks remained open */
+    if (pFile->local.nReaders){
+      pFile->shared->nReaders --;
+    }
+    if (pFile->local.bReserved){
+      pFile->shared->bReserved = FALSE;
+    }
+    if (pFile->local.bPending){
+      pFile->shared->bPending = FALSE;
+    }
+    if (pFile->local.bExclusive){
+      pFile->shared->bExclusive = FALSE;
+    }
+
+    /* De-reference and close our copy of the shared memory handle */
+    osUnmapViewOfFile(pFile->shared);
+    osCloseHandle(pFile->hShared);
+
+    /* Done with the mutex */
+    winceMutexRelease(pFile->hMutex);
+    osCloseHandle(pFile->hMutex);
+    pFile->hMutex = NULL;
+  }
+}
+
+/*
+** An implementation of the LockFile() API of Windows for CE
+*/
+static BOOL winceLockFile(
+  LPHANDLE phFile,
+  DWORD dwFileOffsetLow,
+  DWORD dwFileOffsetHigh,
+  DWORD nNumberOfBytesToLockLow,
+  DWORD nNumberOfBytesToLockHigh
+){
+  winFile *pFile = HANDLE_TO_WINFILE(phFile);
+  BOOL bReturn = FALSE;
+
+  UNUSED_PARAMETER(dwFileOffsetHigh);
+  UNUSED_PARAMETER(nNumberOfBytesToLockHigh);
+
+  if (!pFile->hMutex) return TRUE;
+  winceMutexAcquire(pFile->hMutex);
+
+  /* Wanting an exclusive lock? */
+  if (dwFileOffsetLow == (DWORD)SHARED_FIRST
+       && nNumberOfBytesToLockLow == (DWORD)SHARED_SIZE){
+    if (pFile->shared->nReaders == 0 && pFile->shared->bExclusive == 0){
+       pFile->shared->bExclusive = TRUE;
+       pFile->local.bExclusive = TRUE;
+       bReturn = TRUE;
+    }
+  }
+
+  /* Want a read-only lock? */
+  else if (dwFileOffsetLow == (DWORD)SHARED_FIRST &&
+           nNumberOfBytesToLockLow == 1){
+    if (pFile->shared->bExclusive == 0){
+      pFile->local.nReaders ++;
+      if (pFile->local.nReaders == 1){
+        pFile->shared->nReaders ++;
+      }
+      bReturn = TRUE;
+    }
+  }
+
+  /* Want a pending lock? */
+  else if (dwFileOffsetLow == (DWORD)PENDING_BYTE
+           && nNumberOfBytesToLockLow == 1){
+    /* If no pending lock has been acquired, then acquire it */
+    if (pFile->shared->bPending == 0) {
+      pFile->shared->bPending = TRUE;
+      pFile->local.bPending = TRUE;
+      bReturn = TRUE;
+    }
+  }
+
+  /* Want a reserved lock? */
+  else if (dwFileOffsetLow == (DWORD)RESERVED_BYTE
+           && nNumberOfBytesToLockLow == 1){
+    if (pFile->shared->bReserved == 0) {
+      pFile->shared->bReserved = TRUE;
+      pFile->local.bReserved = TRUE;
+      bReturn = TRUE;
+    }
+  }
+
+  winceMutexRelease(pFile->hMutex);
+  return bReturn;
+}
+
+/*
+** An implementation of the UnlockFile API of Windows for CE
+*/
+static BOOL winceUnlockFile(
+  LPHANDLE phFile,
+  DWORD dwFileOffsetLow,
+  DWORD dwFileOffsetHigh,
+  DWORD nNumberOfBytesToUnlockLow,
+  DWORD nNumberOfBytesToUnlockHigh
+){
+  winFile *pFile = HANDLE_TO_WINFILE(phFile);
+  BOOL bReturn = FALSE;
+
+  UNUSED_PARAMETER(dwFileOffsetHigh);
+  UNUSED_PARAMETER(nNumberOfBytesToUnlockHigh);
+
+  if (!pFile->hMutex) return TRUE;
+  winceMutexAcquire(pFile->hMutex);
+
+  /* Releasing a reader lock or an exclusive lock */
+  if (dwFileOffsetLow == (DWORD)SHARED_FIRST){
+    /* Did we have an exclusive lock? */
+    if (pFile->local.bExclusive){
+      assert(nNumberOfBytesToUnlockLow == (DWORD)SHARED_SIZE);
+      pFile->local.bExclusive = FALSE;
+      pFile->shared->bExclusive = FALSE;
+      bReturn = TRUE;
+    }
+
+    /* Did we just have a reader lock? */
+    else if (pFile->local.nReaders){
+      assert(nNumberOfBytesToUnlockLow == (DWORD)SHARED_SIZE
+             || nNumberOfBytesToUnlockLow == 1);
+      pFile->local.nReaders --;
+      if (pFile->local.nReaders == 0)
+      {
+        pFile->shared->nReaders --;
+      }
+      bReturn = TRUE;
+    }
+  }
+
+  /* Releasing a pending lock */
+  else if (dwFileOffsetLow == (DWORD)PENDING_BYTE
+           && nNumberOfBytesToUnlockLow == 1){
+    if (pFile->local.bPending){
+      pFile->local.bPending = FALSE;
+      pFile->shared->bPending = FALSE;
+      bReturn = TRUE;
+    }
+  }
+  /* Releasing a reserved lock */
+  else if (dwFileOffsetLow == (DWORD)RESERVED_BYTE
+           && nNumberOfBytesToUnlockLow == 1){
+    if (pFile->local.bReserved) {
+      pFile->local.bReserved = FALSE;
+      pFile->shared->bReserved = FALSE;
+      bReturn = TRUE;
+    }
+  }
+
+  winceMutexRelease(pFile->hMutex);
+  return bReturn;
+}
+/*
+** End of the special code for wince
+*****************************************************************************/
+#endif /* SQLITE_OS_WINCE */
+
+/*
+** Lock a file region.
+*/
+static BOOL winLockFile(
+  LPHANDLE phFile,
+  DWORD flags,
+  DWORD offsetLow,
+  DWORD offsetHigh,
+  DWORD numBytesLow,
+  DWORD numBytesHigh
+){
+#if SQLITE_OS_WINCE
+  /*
+  ** NOTE: Windows CE is handled differently here due its lack of the Win32
+  **       API LockFile.
+  */
+  return winceLockFile(phFile, offsetLow, offsetHigh,
+                       numBytesLow, numBytesHigh);
+#else
+  if( osIsNT() ){
+    OVERLAPPED ovlp;
+    memset(&ovlp, 0, sizeof(OVERLAPPED));
+    ovlp.Offset = offsetLow;
+    ovlp.OffsetHigh = offsetHigh;
+    return osLockFileEx(*phFile, flags, 0, numBytesLow, numBytesHigh, &ovlp);
+  }else{
+    return osLockFile(*phFile, offsetLow, offsetHigh, numBytesLow,
+                      numBytesHigh);
+  }
+#endif
+}
+
+/*
+** Unlock a file region.
+ */
+static BOOL winUnlockFile(
+  LPHANDLE phFile,
+  DWORD offsetLow,
+  DWORD offsetHigh,
+  DWORD numBytesLow,
+  DWORD numBytesHigh
+){
+#if SQLITE_OS_WINCE
+  /*
+  ** NOTE: Windows CE is handled differently here due its lack of the Win32
+  **       API UnlockFile.
+  */
+  return winceUnlockFile(phFile, offsetLow, offsetHigh,
+                         numBytesLow, numBytesHigh);
+#else
+  if( osIsNT() ){
+    OVERLAPPED ovlp;
+    memset(&ovlp, 0, sizeof(OVERLAPPED));
+    ovlp.Offset = offsetLow;
+    ovlp.OffsetHigh = offsetHigh;
+    return osUnlockFileEx(*phFile, 0, numBytesLow, numBytesHigh, &ovlp);
+  }else{
+    return osUnlockFile(*phFile, offsetLow, offsetHigh, numBytesLow,
+                        numBytesHigh);
+  }
+#endif
+}
+
+/*****************************************************************************
+** The next group of routines implement the I/O methods specified
+** by the sqlite3_io_methods object.
+******************************************************************************/
+
+/*
+** Some Microsoft compilers lack this definition.
+*/
+#ifndef INVALID_SET_FILE_POINTER
+# define INVALID_SET_FILE_POINTER ((DWORD)-1)
+#endif
+
+/*
+** Move the current position of the file handle passed as the first
+** argument to offset iOffset within the file. If successful, return 0.
+** Otherwise, set pFile->lastErrno and return non-zero.
+*/
+static int winSeekFile(winFile *pFile, sqlite3_int64 iOffset){
+#if !SQLITE_OS_WINRT
+  LONG upperBits;                 /* Most sig. 32 bits of new offset */
+  LONG lowerBits;                 /* Least sig. 32 bits of new offset */
+  DWORD dwRet;                    /* Value returned by SetFilePointer() */
+  DWORD lastErrno;                /* Value returned by GetLastError() */
+
+  OSTRACE(("SEEK file=%p, offset=%lld\n", pFile->h, iOffset));
+
+  upperBits = (LONG)((iOffset>>32) & 0x7fffffff);
+  lowerBits = (LONG)(iOffset & 0xffffffff);
+
+  /* API oddity: If successful, SetFilePointer() returns a dword
+  ** containing the lower 32-bits of the new file-offset. Or, if it fails,
+  ** it returns INVALID_SET_FILE_POINTER. However according to MSDN,
+  ** INVALID_SET_FILE_POINTER may also be a valid new offset. So to determine
+  ** whether an error has actually occurred, it is also necessary to call
+  ** GetLastError().
+  */
+  dwRet = osSetFilePointer(pFile->h, lowerBits, &upperBits, FILE_BEGIN);
+
+  if( (dwRet==INVALID_SET_FILE_POINTER
+      && ((lastErrno = osGetLastError())!=NO_ERROR)) ){
+    pFile->lastErrno = lastErrno;
+    winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno,
+                "winSeekFile", pFile->zPath);
+    OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h));
+    return 1;
+  }
+
+  OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h));
+  return 0;
+#else
+  /*
+  ** Same as above, except that this implementation works for WinRT.
+  */
+
+  LARGE_INTEGER x;                /* The new offset */
+  BOOL bRet;                      /* Value returned by SetFilePointerEx() */
+
+  x.QuadPart = iOffset;
+  bRet = osSetFilePointerEx(pFile->h, x, 0, FILE_BEGIN);
+
+  if(!bRet){
+    pFile->lastErrno = osGetLastError();
+    winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno,
+                "winSeekFile", pFile->zPath);
+    OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h));
+    return 1;
+  }
+
+  OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h));
+  return 0;
+#endif
+}
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/* Forward references to VFS helper methods used for memory mapped files */
+static int winMapfile(winFile*, sqlite3_int64);
+static int winUnmapfile(winFile*);
+#endif
+
+/*
+** Close a file.
+**
+** It is reported that an attempt to close a handle might sometimes
+** fail.  This is a very unreasonable result, but Windows is notorious
+** for being unreasonable so I do not doubt that it might happen.  If
+** the close fails, we pause for 100 milliseconds and try again.  As
+** many as MX_CLOSE_ATTEMPT attempts to close the handle are made before
+** giving up and returning an error.
+*/
+#define MX_CLOSE_ATTEMPT 3
+static int winClose(sqlite3_file *id){
+  int rc, cnt = 0;
+  winFile *pFile = (winFile*)id;
+
+  assert( id!=0 );
+#ifndef SQLITE_OMIT_WAL
+  assert( pFile->pShm==0 );
+#endif
+  assert( pFile->h!=NULL && pFile->h!=INVALID_HANDLE_VALUE );
+  OSTRACE(("CLOSE file=%p\n", pFile->h));
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  winUnmapfile(pFile);
+#endif
+
+  do{
+    rc = osCloseHandle(pFile->h);
+    /* SimulateIOError( rc=0; cnt=MX_CLOSE_ATTEMPT; ); */
+  }while( rc==0 && ++cnt < MX_CLOSE_ATTEMPT && (sqlite3_win32_sleep(100), 1) );
+#if SQLITE_OS_WINCE
+#define WINCE_DELETION_ATTEMPTS 3
+  winceDestroyLock(pFile);
+  if( pFile->zDeleteOnClose ){
+    int cnt = 0;
+    while(
+           osDeleteFileW(pFile->zDeleteOnClose)==0
+        && osGetFileAttributesW(pFile->zDeleteOnClose)!=0xffffffff
+        && cnt++ < WINCE_DELETION_ATTEMPTS
+    ){
+       sqlite3_win32_sleep(100);  /* Wait a little before trying again */
+    }
+    sqlite3_free(pFile->zDeleteOnClose);
+  }
+#endif
+  if( rc ){
+    pFile->h = NULL;
+  }
+  OpenCounter(-1);
+  OSTRACE(("CLOSE file=%p, rc=%s\n", pFile->h, rc ? "ok" : "failed"));
+  return rc ? SQLITE_OK
+            : winLogError(SQLITE_IOERR_CLOSE, osGetLastError(),
+                          "winClose", pFile->zPath);
+}
+
+/*
+** Read data from a file into a buffer.  Return SQLITE_OK if all
+** bytes were read successfully and SQLITE_IOERR if anything goes
+** wrong.
+*/
+static int winRead(
+  sqlite3_file *id,          /* File to read from */
+  void *pBuf,                /* Write content into this buffer */
+  int amt,                   /* Number of bytes to read */
+  sqlite3_int64 offset       /* Begin reading at this offset */
+){
+#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED)
+  OVERLAPPED overlapped;          /* The offset for ReadFile. */
+#endif
+  winFile *pFile = (winFile*)id;  /* file handle */
+  DWORD nRead;                    /* Number of bytes actually read from file */
+  int nRetry = 0;                 /* Number of retrys */
+
+  assert( id!=0 );
+  assert( amt>0 );
+  assert( offset>=0 );
+  SimulateIOError(return SQLITE_IOERR_READ);
+  OSTRACE(("READ file=%p, buffer=%p, amount=%d, offset=%lld, lock=%d\n",
+           pFile->h, pBuf, amt, offset, pFile->locktype));
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* Deal with as much of this read request as possible by transfering
+  ** data from the memory mapping using memcpy().  */
+  if( offset<pFile->mmapSize ){
+    if( offset+amt <= pFile->mmapSize ){
+      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+      OSTRACE(("READ-MMAP file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }else{
+      int nCopy = (int)(pFile->mmapSize - offset);
+      memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+      pBuf = &((u8 *)pBuf)[nCopy];
+      amt -= nCopy;
+      offset += nCopy;
+    }
+  }
+#endif
+
+#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED)
+  if( winSeekFile(pFile, offset) ){
+    OSTRACE(("READ file=%p, rc=SQLITE_FULL\n", pFile->h));
+    return SQLITE_FULL;
+  }
+  while( !osReadFile(pFile->h, pBuf, amt, &nRead, 0) ){
+#else
+  memset(&overlapped, 0, sizeof(OVERLAPPED));
+  overlapped.Offset = (LONG)(offset & 0xffffffff);
+  overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff);
+  while( !osReadFile(pFile->h, pBuf, amt, &nRead, &overlapped) &&
+         osGetLastError()!=ERROR_HANDLE_EOF ){
+#endif
+    DWORD lastErrno;
+    if( winRetryIoerr(&nRetry, &lastErrno) ) continue;
+    pFile->lastErrno = lastErrno;
+    OSTRACE(("READ file=%p, rc=SQLITE_IOERR_READ\n", pFile->h));
+    return winLogError(SQLITE_IOERR_READ, pFile->lastErrno,
+                       "winRead", pFile->zPath);
+  }
+  winLogIoerr(nRetry);
+  if( nRead<(DWORD)amt ){
+    /* Unread parts of the buffer must be zero-filled */
+    memset(&((char*)pBuf)[nRead], 0, amt-nRead);
+    OSTRACE(("READ file=%p, rc=SQLITE_IOERR_SHORT_READ\n", pFile->h));
+    return SQLITE_IOERR_SHORT_READ;
+  }
+
+  OSTRACE(("READ file=%p, rc=SQLITE_OK\n", pFile->h));
+  return SQLITE_OK;
+}
+
+/*
+** Write data from a buffer into a file.  Return SQLITE_OK on success
+** or some other error code on failure.
+*/
+static int winWrite(
+  sqlite3_file *id,               /* File to write into */
+  const void *pBuf,               /* The bytes to be written */
+  int amt,                        /* Number of bytes to write */
+  sqlite3_int64 offset            /* Offset into the file to begin writing at */
+){
+  int rc = 0;                     /* True if error has occurred, else false */
+  winFile *pFile = (winFile*)id;  /* File handle */
+  int nRetry = 0;                 /* Number of retries */
+
+  assert( amt>0 );
+  assert( pFile );
+  SimulateIOError(return SQLITE_IOERR_WRITE);
+  SimulateDiskfullError(return SQLITE_FULL);
+
+  OSTRACE(("WRITE file=%p, buffer=%p, amount=%d, offset=%lld, lock=%d\n",
+           pFile->h, pBuf, amt, offset, pFile->locktype));
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* Deal with as much of this write request as possible by transfering
+  ** data from the memory mapping using memcpy().  */
+  if( offset<pFile->mmapSize ){
+    if( offset+amt <= pFile->mmapSize ){
+      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+      OSTRACE(("WRITE-MMAP file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }else{
+      int nCopy = (int)(pFile->mmapSize - offset);
+      memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+      pBuf = &((u8 *)pBuf)[nCopy];
+      amt -= nCopy;
+      offset += nCopy;
+    }
+  }
+#endif
+
+#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED)
+  rc = winSeekFile(pFile, offset);
+  if( rc==0 ){
+#else
+  {
+#endif
+#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED)
+    OVERLAPPED overlapped;        /* The offset for WriteFile. */
+#endif
+    u8 *aRem = (u8 *)pBuf;        /* Data yet to be written */
+    int nRem = amt;               /* Number of bytes yet to be written */
+    DWORD nWrite;                 /* Bytes written by each WriteFile() call */
+    DWORD lastErrno = NO_ERROR;   /* Value returned by GetLastError() */
+
+#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED)
+    memset(&overlapped, 0, sizeof(OVERLAPPED));
+    overlapped.Offset = (LONG)(offset & 0xffffffff);
+    overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff);
+#endif
+
+    while( nRem>0 ){
+#if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED)
+      if( !osWriteFile(pFile->h, aRem, nRem, &nWrite, 0) ){
+#else
+      if( !osWriteFile(pFile->h, aRem, nRem, &nWrite, &overlapped) ){
+#endif
+        if( winRetryIoerr(&nRetry, &lastErrno) ) continue;
+        break;
+      }
+      assert( nWrite==0 || nWrite<=(DWORD)nRem );
+      if( nWrite==0 || nWrite>(DWORD)nRem ){
+        lastErrno = osGetLastError();
+        break;
+      }
+#if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED)
+      offset += nWrite;
+      overlapped.Offset = (LONG)(offset & 0xffffffff);
+      overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff);
+#endif
+      aRem += nWrite;
+      nRem -= nWrite;
+    }
+    if( nRem>0 ){
+      pFile->lastErrno = lastErrno;
+      rc = 1;
+    }
+  }
+
+  if( rc ){
+    if(   ( pFile->lastErrno==ERROR_HANDLE_DISK_FULL )
+       || ( pFile->lastErrno==ERROR_DISK_FULL )){
+      OSTRACE(("WRITE file=%p, rc=SQLITE_FULL\n", pFile->h));
+      return winLogError(SQLITE_FULL, pFile->lastErrno,
+                         "winWrite1", pFile->zPath);
+    }
+    OSTRACE(("WRITE file=%p, rc=SQLITE_IOERR_WRITE\n", pFile->h));
+    return winLogError(SQLITE_IOERR_WRITE, pFile->lastErrno,
+                       "winWrite2", pFile->zPath);
+  }else{
+    winLogIoerr(nRetry);
+  }
+  OSTRACE(("WRITE file=%p, rc=SQLITE_OK\n", pFile->h));
+  return SQLITE_OK;
+}
+
+/*
+** Truncate an open file to a specified size
+*/
+static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){
+  winFile *pFile = (winFile*)id;  /* File handle object */
+  int rc = SQLITE_OK;             /* Return code for this function */
+  DWORD lastErrno;
+
+  assert( pFile );
+  SimulateIOError(return SQLITE_IOERR_TRUNCATE);
+  OSTRACE(("TRUNCATE file=%p, size=%lld, lock=%d\n",
+           pFile->h, nByte, pFile->locktype));
+
+  /* If the user has configured a chunk-size for this file, truncate the
+  ** file so that it consists of an integer number of chunks (i.e. the
+  ** actual file size after the operation may be larger than the requested
+  ** size).
+  */
+  if( pFile->szChunk>0 ){
+    nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
+  }
+
+  /* SetEndOfFile() returns non-zero when successful, or zero when it fails. */
+  if( winSeekFile(pFile, nByte) ){
+    rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno,
+                     "winTruncate1", pFile->zPath);
+  }else if( 0==osSetEndOfFile(pFile->h) &&
+            ((lastErrno = osGetLastError())!=ERROR_USER_MAPPED_FILE) ){
+    pFile->lastErrno = lastErrno;
+    rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno,
+                     "winTruncate2", pFile->zPath);
+  }
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  /* If the file was truncated to a size smaller than the currently
+  ** mapped region, reduce the effective mapping size as well. SQLite will
+  ** use read() and write() to access data beyond this point from now on.
+  */
+  if( pFile->pMapRegion && nByte<pFile->mmapSize ){
+    pFile->mmapSize = nByte;
+  }
+#endif
+
+  OSTRACE(("TRUNCATE file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc)));
+  return rc;
+}
+
+#ifdef SQLITE_TEST
+/*
+** Count the number of fullsyncs and normal syncs.  This is used to test
+** that syncs and fullsyncs are occuring at the right times.
+*/
+SQLITE_API int sqlite3_sync_count = 0;
+SQLITE_API int sqlite3_fullsync_count = 0;
+#endif
+
+/*
+** Make sure all writes to a particular file are committed to disk.
+*/
+static int winSync(sqlite3_file *id, int flags){
+#ifndef SQLITE_NO_SYNC
+  /*
+  ** Used only when SQLITE_NO_SYNC is not defined.
+   */
+  BOOL rc;
+#endif
+#if !defined(NDEBUG) || !defined(SQLITE_NO_SYNC) || \
+    (defined(SQLITE_TEST) && defined(SQLITE_DEBUG))
+  /*
+  ** Used when SQLITE_NO_SYNC is not defined and by the assert() and/or
+  ** OSTRACE() macros.
+   */
+  winFile *pFile = (winFile*)id;
+#else
+  UNUSED_PARAMETER(id);
+#endif
+
+  assert( pFile );
+  /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
+  assert((flags&0x0F)==SQLITE_SYNC_NORMAL
+      || (flags&0x0F)==SQLITE_SYNC_FULL
+  );
+
+  /* Unix cannot, but some systems may return SQLITE_FULL from here. This
+  ** line is to test that doing so does not cause any problems.
+  */
+  SimulateDiskfullError( return SQLITE_FULL );
+
+  OSTRACE(("SYNC file=%p, flags=%x, lock=%d\n",
+           pFile->h, flags, pFile->locktype));
+
+#ifndef SQLITE_TEST
+  UNUSED_PARAMETER(flags);
+#else
+  if( (flags&0x0F)==SQLITE_SYNC_FULL ){
+    sqlite3_fullsync_count++;
+  }
+  sqlite3_sync_count++;
+#endif
+
+  /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
+  ** no-op
+  */
+#ifdef SQLITE_NO_SYNC
+  OSTRACE(("SYNC-NOP file=%p, rc=SQLITE_OK\n", pFile->h));
+  return SQLITE_OK;
+#else
+  rc = osFlushFileBuffers(pFile->h);
+  SimulateIOError( rc=FALSE );
+  if( rc ){
+    OSTRACE(("SYNC file=%p, rc=SQLITE_OK\n", pFile->h));
+    return SQLITE_OK;
+  }else{
+    pFile->lastErrno = osGetLastError();
+    OSTRACE(("SYNC file=%p, rc=SQLITE_IOERR_FSYNC\n", pFile->h));
+    return winLogError(SQLITE_IOERR_FSYNC, pFile->lastErrno,
+                       "winSync", pFile->zPath);
+  }
+#endif
+}
+
+/*
+** Determine the current size of a file in bytes
+*/
+static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){
+  winFile *pFile = (winFile*)id;
+  int rc = SQLITE_OK;
+
+  assert( id!=0 );
+  assert( pSize!=0 );
+  SimulateIOError(return SQLITE_IOERR_FSTAT);
+  OSTRACE(("SIZE file=%p, pSize=%p\n", pFile->h, pSize));
+
+#if SQLITE_OS_WINRT
+  {
+    FILE_STANDARD_INFO info;
+    if( osGetFileInformationByHandleEx(pFile->h, FileStandardInfo,
+                                     &info, sizeof(info)) ){
+      *pSize = info.EndOfFile.QuadPart;
+    }else{
+      pFile->lastErrno = osGetLastError();
+      rc = winLogError(SQLITE_IOERR_FSTAT, pFile->lastErrno,
+                       "winFileSize", pFile->zPath);
+    }
+  }
+#else
+  {
+    DWORD upperBits;
+    DWORD lowerBits;
+    DWORD lastErrno;
+
+    lowerBits = osGetFileSize(pFile->h, &upperBits);
+    *pSize = (((sqlite3_int64)upperBits)<<32) + lowerBits;
+    if(   (lowerBits == INVALID_FILE_SIZE)
+       && ((lastErrno = osGetLastError())!=NO_ERROR) ){
+      pFile->lastErrno = lastErrno;
+      rc = winLogError(SQLITE_IOERR_FSTAT, pFile->lastErrno,
+                       "winFileSize", pFile->zPath);
+    }
+  }
+#endif
+  OSTRACE(("SIZE file=%p, pSize=%p, *pSize=%lld, rc=%s\n",
+           pFile->h, pSize, *pSize, sqlite3ErrName(rc)));
+  return rc;
+}
+
+/*
+** LOCKFILE_FAIL_IMMEDIATELY is undefined on some Windows systems.
+*/
+#ifndef LOCKFILE_FAIL_IMMEDIATELY
+# define LOCKFILE_FAIL_IMMEDIATELY 1
+#endif
+
+#ifndef LOCKFILE_EXCLUSIVE_LOCK
+# define LOCKFILE_EXCLUSIVE_LOCK 2
+#endif
+
+/*
+** Historically, SQLite has used both the LockFile and LockFileEx functions.
+** When the LockFile function was used, it was always expected to fail
+** immediately if the lock could not be obtained.  Also, it always expected to
+** obtain an exclusive lock.  These flags are used with the LockFileEx function
+** and reflect those expectations; therefore, they should not be changed.
+*/
+#ifndef SQLITE_LOCKFILE_FLAGS
+# define SQLITE_LOCKFILE_FLAGS   (LOCKFILE_FAIL_IMMEDIATELY | \
+                                  LOCKFILE_EXCLUSIVE_LOCK)
+#endif
+
+/*
+** Currently, SQLite never calls the LockFileEx function without wanting the
+** call to fail immediately if the lock cannot be obtained.
+*/
+#ifndef SQLITE_LOCKFILEEX_FLAGS
+# define SQLITE_LOCKFILEEX_FLAGS (LOCKFILE_FAIL_IMMEDIATELY)
+#endif
+
+/*
+** Acquire a reader lock.
+** Different API routines are called depending on whether or not this
+** is Win9x or WinNT.
+*/
+static int winGetReadLock(winFile *pFile){
+  int res;
+  OSTRACE(("READ-LOCK file=%p, lock=%d\n", pFile->h, pFile->locktype));
+  if( osIsNT() ){
+#if SQLITE_OS_WINCE
+    /*
+    ** NOTE: Windows CE is handled differently here due its lack of the Win32
+    **       API LockFileEx.
+    */
+    res = winceLockFile(&pFile->h, SHARED_FIRST, 0, 1, 0);
+#else
+    res = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS, SHARED_FIRST, 0,
+                      SHARED_SIZE, 0);
+#endif
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    int lk;
+    sqlite3_randomness(sizeof(lk), &lk);
+    pFile->sharedLockByte = (short)((lk & 0x7fffffff)%(SHARED_SIZE - 1));
+    res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS,
+                      SHARED_FIRST+pFile->sharedLockByte, 0, 1, 0);
+  }
+#endif
+  if( res == 0 ){
+    pFile->lastErrno = osGetLastError();
+    /* No need to log a failure to lock */
+  }
+  OSTRACE(("READ-LOCK file=%p, result=%d\n", pFile->h, res));
+  return res;
+}
+
+/*
+** Undo a readlock
+*/
+static int winUnlockReadLock(winFile *pFile){
+  int res;
+  DWORD lastErrno;
+  OSTRACE(("READ-UNLOCK file=%p, lock=%d\n", pFile->h, pFile->locktype));
+  if( osIsNT() ){
+    res = winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0);
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    res = winUnlockFile(&pFile->h, SHARED_FIRST+pFile->sharedLockByte, 0, 1, 0);
+  }
+#endif
+  if( res==0 && ((lastErrno = osGetLastError())!=ERROR_NOT_LOCKED) ){
+    pFile->lastErrno = lastErrno;
+    winLogError(SQLITE_IOERR_UNLOCK, pFile->lastErrno,
+                "winUnlockReadLock", pFile->zPath);
+  }
+  OSTRACE(("READ-UNLOCK file=%p, result=%d\n", pFile->h, res));
+  return res;
+}
+
+/*
+** Lock the file with the lock specified by parameter locktype - one
+** of the following:
+**
+**     (1) SHARED_LOCK
+**     (2) RESERVED_LOCK
+**     (3) PENDING_LOCK
+**     (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between.  The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal.  The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+**    UNLOCKED -> SHARED
+**    SHARED -> RESERVED
+**    SHARED -> (PENDING) -> EXCLUSIVE
+**    RESERVED -> (PENDING) -> EXCLUSIVE
+**    PENDING -> EXCLUSIVE
+**
+** This routine will only increase a lock.  The winUnlock() routine
+** erases all locks at once and returns us immediately to locking level 0.
+** It is not possible to lower the locking level one step at a time.  You
+** must go straight to locking level 0.
+*/
+static int winLock(sqlite3_file *id, int locktype){
+  int rc = SQLITE_OK;    /* Return code from subroutines */
+  int res = 1;           /* Result of a Windows lock call */
+  int newLocktype;       /* Set pFile->locktype to this value before exiting */
+  int gotPendingLock = 0;/* True if we acquired a PENDING lock this time */
+  winFile *pFile = (winFile*)id;
+  DWORD lastErrno = NO_ERROR;
+
+  assert( id!=0 );
+  OSTRACE(("LOCK file=%p, oldLock=%d(%d), newLock=%d\n",
+           pFile->h, pFile->locktype, pFile->sharedLockByte, locktype));
+
+  /* If there is already a lock of this type or more restrictive on the
+  ** OsFile, do nothing. Don't use the end_lock: exit path, as
+  ** sqlite3OsEnterMutex() hasn't been called yet.
+  */
+  if( pFile->locktype>=locktype ){
+    OSTRACE(("LOCK-HELD file=%p, rc=SQLITE_OK\n", pFile->h));
+    return SQLITE_OK;
+  }
+
+  /* Make sure the locking sequence is correct
+  */
+  assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
+  assert( locktype!=PENDING_LOCK );
+  assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
+
+  /* Lock the PENDING_LOCK byte if we need to acquire a PENDING lock or
+  ** a SHARED lock.  If we are acquiring a SHARED lock, the acquisition of
+  ** the PENDING_LOCK byte is temporary.
+  */
+  newLocktype = pFile->locktype;
+  if(   (pFile->locktype==NO_LOCK)
+     || (   (locktype==EXCLUSIVE_LOCK)
+         && (pFile->locktype==RESERVED_LOCK))
+  ){
+    int cnt = 3;
+    while( cnt-->0 && (res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS,
+                                         PENDING_BYTE, 0, 1, 0))==0 ){
+      /* Try 3 times to get the pending lock.  This is needed to work
+      ** around problems caused by indexing and/or anti-virus software on
+      ** Windows systems.
+      ** If you are using this code as a model for alternative VFSes, do not
+      ** copy this retry logic.  It is a hack intended for Windows only.
+      */
+      lastErrno = osGetLastError();
+      OSTRACE(("LOCK-PENDING-FAIL file=%p, count=%d, result=%d\n",
+               pFile->h, cnt, res));
+      if( lastErrno==ERROR_INVALID_HANDLE ){
+        pFile->lastErrno = lastErrno;
+        rc = SQLITE_IOERR_LOCK;
+        OSTRACE(("LOCK-FAIL file=%p, count=%d, rc=%s\n",
+                 pFile->h, cnt, sqlite3ErrName(rc)));
+        return rc;
+      }
+      if( cnt ) sqlite3_win32_sleep(1);
+    }
+    gotPendingLock = res;
+    if( !res ){
+      lastErrno = osGetLastError();
+    }
+  }
+
+  /* Acquire a shared lock
+  */
+  if( locktype==SHARED_LOCK && res ){
+    assert( pFile->locktype==NO_LOCK );
+    res = winGetReadLock(pFile);
+    if( res ){
+      newLocktype = SHARED_LOCK;
+    }else{
+      lastErrno = osGetLastError();
+    }
+  }
+
+  /* Acquire a RESERVED lock
+  */
+  if( locktype==RESERVED_LOCK && res ){
+    assert( pFile->locktype==SHARED_LOCK );
+    res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, RESERVED_BYTE, 0, 1, 0);
+    if( res ){
+      newLocktype = RESERVED_LOCK;
+    }else{
+      lastErrno = osGetLastError();
+    }
+  }
+
+  /* Acquire a PENDING lock
+  */
+  if( locktype==EXCLUSIVE_LOCK && res ){
+    newLocktype = PENDING_LOCK;
+    gotPendingLock = 0;
+  }
+
+  /* Acquire an EXCLUSIVE lock
+  */
+  if( locktype==EXCLUSIVE_LOCK && res ){
+    assert( pFile->locktype>=SHARED_LOCK );
+    res = winUnlockReadLock(pFile);
+    res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, SHARED_FIRST, 0,
+                      SHARED_SIZE, 0);
+    if( res ){
+      newLocktype = EXCLUSIVE_LOCK;
+    }else{
+      lastErrno = osGetLastError();
+      winGetReadLock(pFile);
+    }
+  }
+
+  /* If we are holding a PENDING lock that ought to be released, then
+  ** release it now.
+  */
+  if( gotPendingLock && locktype==SHARED_LOCK ){
+    winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0);
+  }
+
+  /* Update the state of the lock has held in the file descriptor then
+  ** return the appropriate result code.
+  */
+  if( res ){
+    rc = SQLITE_OK;
+  }else{
+    pFile->lastErrno = lastErrno;
+    rc = SQLITE_BUSY;
+    OSTRACE(("LOCK-FAIL file=%p, wanted=%d, got=%d\n",
+             pFile->h, locktype, newLocktype));
+  }
+  pFile->locktype = (u8)newLocktype;
+  OSTRACE(("LOCK file=%p, lock=%d, rc=%s\n",
+           pFile->h, pFile->locktype, sqlite3ErrName(rc)));
+  return rc;
+}
+
+/*
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, return
+** non-zero, otherwise zero.
+*/
+static int winCheckReservedLock(sqlite3_file *id, int *pResOut){
+  int res;
+  winFile *pFile = (winFile*)id;
+
+  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+  OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p\n", pFile->h, pResOut));
+
+  assert( id!=0 );
+  if( pFile->locktype>=RESERVED_LOCK ){
+    res = 1;
+    OSTRACE(("TEST-WR-LOCK file=%p, result=%d (local)\n", pFile->h, res));
+  }else{
+    res = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS,RESERVED_BYTE, 0, 1, 0);
+    if( res ){
+      winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0);
+    }
+    res = !res;
+    OSTRACE(("TEST-WR-LOCK file=%p, result=%d (remote)\n", pFile->h, res));
+  }
+  *pResOut = res;
+  OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n",
+           pFile->h, pResOut, *pResOut));
+  return SQLITE_OK;
+}
+
+/*
+** Lower the locking level on file descriptor id to locktype.  locktype
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+**
+** It is not possible for this routine to fail if the second argument
+** is NO_LOCK.  If the second argument is SHARED_LOCK then this routine
+** might return SQLITE_IOERR;
+*/
+static int winUnlock(sqlite3_file *id, int locktype){
+  int type;
+  winFile *pFile = (winFile*)id;
+  int rc = SQLITE_OK;
+  assert( pFile!=0 );
+  assert( locktype<=SHARED_LOCK );
+  OSTRACE(("UNLOCK file=%p, oldLock=%d(%d), newLock=%d\n",
+           pFile->h, pFile->locktype, pFile->sharedLockByte, locktype));
+  type = pFile->locktype;
+  if( type>=EXCLUSIVE_LOCK ){
+    winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0);
+    if( locktype==SHARED_LOCK && !winGetReadLock(pFile) ){
+      /* This should never happen.  We should always be able to
+      ** reacquire the read lock */
+      rc = winLogError(SQLITE_IOERR_UNLOCK, osGetLastError(),
+                       "winUnlock", pFile->zPath);
+    }
+  }
+  if( type>=RESERVED_LOCK ){
+    winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0);
+  }
+  if( locktype==NO_LOCK && type>=SHARED_LOCK ){
+    winUnlockReadLock(pFile);
+  }
+  if( type>=PENDING_LOCK ){
+    winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0);
+  }
+  pFile->locktype = (u8)locktype;
+  OSTRACE(("UNLOCK file=%p, lock=%d, rc=%s\n",
+           pFile->h, pFile->locktype, sqlite3ErrName(rc)));
+  return rc;
+}
+
+/*
+** If *pArg is initially negative then this is a query.  Set *pArg to
+** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
+**
+** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags.
+*/
+static void winModeBit(winFile *pFile, unsigned char mask, int *pArg){
+  if( *pArg<0 ){
+    *pArg = (pFile->ctrlFlags & mask)!=0;
+  }else if( (*pArg)==0 ){
+    pFile->ctrlFlags &= ~mask;
+  }else{
+    pFile->ctrlFlags |= mask;
+  }
+}
+
+/* Forward references to VFS helper methods used for temporary files */
+static int winGetTempname(sqlite3_vfs *, char **);
+static int winIsDir(const void *);
+static BOOL winIsDriveLetterAndColon(const char *);
+
+/*
+** Control and query of the open file handle.
+*/
+static int winFileControl(sqlite3_file *id, int op, void *pArg){
+  winFile *pFile = (winFile*)id;
+  OSTRACE(("FCNTL file=%p, op=%d, pArg=%p\n", pFile->h, op, pArg));
+  switch( op ){
+    case SQLITE_FCNTL_LOCKSTATE: {
+      *(int*)pArg = pFile->locktype;
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+    case SQLITE_LAST_ERRNO: {
+      *(int*)pArg = (int)pFile->lastErrno;
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_CHUNK_SIZE: {
+      pFile->szChunk = *(int *)pArg;
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_SIZE_HINT: {
+      if( pFile->szChunk>0 ){
+        sqlite3_int64 oldSz;
+        int rc = winFileSize(id, &oldSz);
+        if( rc==SQLITE_OK ){
+          sqlite3_int64 newSz = *(sqlite3_int64*)pArg;
+          if( newSz>oldSz ){
+            SimulateIOErrorBenign(1);
+            rc = winTruncate(id, newSz);
+            SimulateIOErrorBenign(0);
+          }
+        }
+        OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc)));
+        return rc;
+      }
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_PERSIST_WAL: {
+      winModeBit(pFile, WINFILE_PERSIST_WAL, (int*)pArg);
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {
+      winModeBit(pFile, WINFILE_PSOW, (int*)pArg);
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_VFSNAME: {
+      *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName);
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_WIN32_AV_RETRY: {
+      int *a = (int*)pArg;
+      if( a[0]>0 ){
+        winIoerrRetry = a[0];
+      }else{
+        a[0] = winIoerrRetry;
+      }
+      if( a[1]>0 ){
+        winIoerrRetryDelay = a[1];
+      }else{
+        a[1] = winIoerrRetryDelay;
+      }
+      OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h));
+      return SQLITE_OK;
+    }
+#ifdef SQLITE_TEST
+    case SQLITE_FCNTL_WIN32_SET_HANDLE: {
+      LPHANDLE phFile = (LPHANDLE)pArg;
+      HANDLE hOldFile = pFile->h;
+      pFile->h = *phFile;
+      *phFile = hOldFile;
+      OSTRACE(("FCNTL oldFile=%p, newFile=%p, rc=SQLITE_OK\n",
+               hOldFile, pFile->h));
+      return SQLITE_OK;
+    }
+#endif
+    case SQLITE_FCNTL_TEMPFILENAME: {
+      char *zTFile = 0;
+      int rc = winGetTempname(pFile->pVfs, &zTFile);
+      if( rc==SQLITE_OK ){
+        *(char**)pArg = zTFile;
+      }
+      OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc)));
+      return rc;
+    }
+#if SQLITE_MAX_MMAP_SIZE>0
+    case SQLITE_FCNTL_MMAP_SIZE: {
+      i64 newLimit = *(i64*)pArg;
+      int rc = SQLITE_OK;
+      if( newLimit>sqlite3GlobalConfig.mxMmap ){
+        newLimit = sqlite3GlobalConfig.mxMmap;
+      }
+      *(i64*)pArg = pFile->mmapSizeMax;
+      if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){
+        pFile->mmapSizeMax = newLimit;
+        if( pFile->mmapSize>0 ){
+          winUnmapfile(pFile);
+          rc = winMapfile(pFile, -1);
+        }
+      }
+      OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc)));
+      return rc;
+    }
+#endif
+  }
+  OSTRACE(("FCNTL file=%p, rc=SQLITE_NOTFOUND\n", pFile->h));
+  return SQLITE_NOTFOUND;
+}
+
+/*
+** Return the sector size in bytes of the underlying block device for
+** the specified file. This is almost always 512 bytes, but may be
+** larger for some devices.
+**
+** SQLite code assumes this function cannot fail. It also assumes that
+** if two files are created in the same file-system directory (i.e.
+** a database and its journal file) that the sector size will be the
+** same for both.
+*/
+static int winSectorSize(sqlite3_file *id){
+  (void)id;
+  return SQLITE_DEFAULT_SECTOR_SIZE;
+}
+
+/*
+** Return a vector of device characteristics.
+*/
+static int winDeviceCharacteristics(sqlite3_file *id){
+  winFile *p = (winFile*)id;
+  return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN |
+         ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0);
+}
+
+/*
+** Windows will only let you create file view mappings
+** on allocation size granularity boundaries.
+** During sqlite3_os_init() we do a GetSystemInfo()
+** to get the granularity size.
+*/
+static SYSTEM_INFO winSysInfo;
+
+#ifndef SQLITE_OMIT_WAL
+
+/*
+** Helper functions to obtain and relinquish the global mutex. The
+** global mutex is used to protect the winLockInfo objects used by
+** this file, all of which may be shared by multiple threads.
+**
+** Function winShmMutexHeld() is used to assert() that the global mutex
+** is held when required. This function is only used as part of assert()
+** statements. e.g.
+**
+**   winShmEnterMutex()
+**     assert( winShmMutexHeld() );
+**   winShmLeaveMutex()
+*/
+static void winShmEnterMutex(void){
+  sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+}
+static void winShmLeaveMutex(void){
+  sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+}
+#ifndef NDEBUG
+static int winShmMutexHeld(void) {
+  return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+}
+#endif
+
+/*
+** Object used to represent a single file opened and mmapped to provide
+** shared memory.  When multiple threads all reference the same
+** log-summary, each thread has its own winFile object, but they all
+** point to a single instance of this object.  In other words, each
+** log-summary is opened only once per process.
+**
+** winShmMutexHeld() must be true when creating or destroying
+** this object or while reading or writing the following fields:
+**
+**      nRef
+**      pNext
+**
+** The following fields are read-only after the object is created:
+**
+**      fid
+**      zFilename
+**
+** Either winShmNode.mutex must be held or winShmNode.nRef==0 and
+** winShmMutexHeld() is true when reading or writing any other field
+** in this structure.
+**
+*/
+struct winShmNode {
+  sqlite3_mutex *mutex;      /* Mutex to access this object */
+  char *zFilename;           /* Name of the file */
+  winFile hFile;             /* File handle from winOpen */
+
+  int szRegion;              /* Size of shared-memory regions */
+  int nRegion;               /* Size of array apRegion */
+  struct ShmRegion {
+    HANDLE hMap;             /* File handle from CreateFileMapping */
+    void *pMap;
+  } *aRegion;
+  DWORD lastErrno;           /* The Windows errno from the last I/O error */
+
+  int nRef;                  /* Number of winShm objects pointing to this */
+  winShm *pFirst;            /* All winShm objects pointing to this */
+  winShmNode *pNext;         /* Next in list of all winShmNode objects */
+#ifdef SQLITE_DEBUG
+  u8 nextShmId;              /* Next available winShm.id value */
+#endif
+};
+
+/*
+** A global array of all winShmNode objects.
+**
+** The winShmMutexHeld() must be true while reading or writing this list.
+*/
+static winShmNode *winShmNodeList = 0;
+
+/*
+** Structure used internally by this VFS to record the state of an
+** open shared memory connection.
+**
+** The following fields are initialized when this object is created and
+** are read-only thereafter:
+**
+**    winShm.pShmNode
+**    winShm.id
+**
+** All other fields are read/write.  The winShm.pShmNode->mutex must be held
+** while accessing any read/write fields.
+*/
+struct winShm {
+  winShmNode *pShmNode;      /* The underlying winShmNode object */
+  winShm *pNext;             /* Next winShm with the same winShmNode */
+  u8 hasMutex;               /* True if holding the winShmNode mutex */
+  u16 sharedMask;            /* Mask of shared locks held */
+  u16 exclMask;              /* Mask of exclusive locks held */
+#ifdef SQLITE_DEBUG
+  u8 id;                     /* Id of this connection with its winShmNode */
+#endif
+};
+
+/*
+** Constants used for locking
+*/
+#define WIN_SHM_BASE   ((22+SQLITE_SHM_NLOCK)*4)        /* first lock byte */
+#define WIN_SHM_DMS    (WIN_SHM_BASE+SQLITE_SHM_NLOCK)  /* deadman switch */
+
+/*
+** Apply advisory locks for all n bytes beginning at ofst.
+*/
+#define _SHM_UNLCK  1
+#define _SHM_RDLCK  2
+#define _SHM_WRLCK  3
+static int winShmSystemLock(
+  winShmNode *pFile,    /* Apply locks to this open shared-memory segment */
+  int lockType,         /* _SHM_UNLCK, _SHM_RDLCK, or _SHM_WRLCK */
+  int ofst,             /* Offset to first byte to be locked/unlocked */
+  int nByte             /* Number of bytes to lock or unlock */
+){
+  int rc = 0;           /* Result code form Lock/UnlockFileEx() */
+
+  /* Access to the winShmNode object is serialized by the caller */
+  assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 );
+
+  OSTRACE(("SHM-LOCK file=%p, lock=%d, offset=%d, size=%d\n",
+           pFile->hFile.h, lockType, ofst, nByte));
+
+  /* Release/Acquire the system-level lock */
+  if( lockType==_SHM_UNLCK ){
+    rc = winUnlockFile(&pFile->hFile.h, ofst, 0, nByte, 0);
+  }else{
+    /* Initialize the locking parameters */
+    DWORD dwFlags = LOCKFILE_FAIL_IMMEDIATELY;
+    if( lockType == _SHM_WRLCK ) dwFlags |= LOCKFILE_EXCLUSIVE_LOCK;
+    rc = winLockFile(&pFile->hFile.h, dwFlags, ofst, 0, nByte, 0);
+  }
+
+  if( rc!= 0 ){
+    rc = SQLITE_OK;
+  }else{
+    pFile->lastErrno =  osGetLastError();
+    rc = SQLITE_BUSY;
+  }
+
+  OSTRACE(("SHM-LOCK file=%p, func=%s, errno=%lu, rc=%s\n",
+           pFile->hFile.h, (lockType == _SHM_UNLCK) ? "winUnlockFile" :
+           "winLockFile", pFile->lastErrno, sqlite3ErrName(rc)));
+
+  return rc;
+}
+
+/* Forward references to VFS methods */
+static int winOpen(sqlite3_vfs*,const char*,sqlite3_file*,int,int*);
+static int winDelete(sqlite3_vfs *,const char*,int);
+
+/*
+** Purge the winShmNodeList list of all entries with winShmNode.nRef==0.
+**
+** This is not a VFS shared-memory method; it is a utility function called
+** by VFS shared-memory methods.
+*/
+static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){
+  winShmNode **pp;
+  winShmNode *p;
+  assert( winShmMutexHeld() );
+  OSTRACE(("SHM-PURGE pid=%lu, deleteFlag=%d\n",
+           osGetCurrentProcessId(), deleteFlag));
+  pp = &winShmNodeList;
+  while( (p = *pp)!=0 ){
+    if( p->nRef==0 ){
+      int i;
+      if( p->mutex ){ sqlite3_mutex_free(p->mutex); }
+      for(i=0; i<p->nRegion; i++){
+        BOOL bRc = osUnmapViewOfFile(p->aRegion[i].pMap);
+        OSTRACE(("SHM-PURGE-UNMAP pid=%lu, region=%d, rc=%s\n",
+                 osGetCurrentProcessId(), i, bRc ? "ok" : "failed"));
+        UNUSED_VARIABLE_VALUE(bRc);
+        bRc = osCloseHandle(p->aRegion[i].hMap);
+        OSTRACE(("SHM-PURGE-CLOSE pid=%lu, region=%d, rc=%s\n",
+                 osGetCurrentProcessId(), i, bRc ? "ok" : "failed"));
+        UNUSED_VARIABLE_VALUE(bRc);
+      }
+      if( p->hFile.h!=NULL && p->hFile.h!=INVALID_HANDLE_VALUE ){
+        SimulateIOErrorBenign(1);
+        winClose((sqlite3_file *)&p->hFile);
+        SimulateIOErrorBenign(0);
+      }
+      if( deleteFlag ){
+        SimulateIOErrorBenign(1);
+        sqlite3BeginBenignMalloc();
+        winDelete(pVfs, p->zFilename, 0);
+        sqlite3EndBenignMalloc();
+        SimulateIOErrorBenign(0);
+      }
+      *pp = p->pNext;
+      sqlite3_free(p->aRegion);
+      sqlite3_free(p);
+    }else{
+      pp = &p->pNext;
+    }
+  }
+}
+
+/*
+** Open the shared-memory area associated with database file pDbFd.
+**
+** When opening a new shared-memory file, if no other instances of that
+** file are currently open, in this process or in other processes, then
+** the file must be truncated to zero length or have its header cleared.
+*/
+static int winOpenSharedMemory(winFile *pDbFd){
+  struct winShm *p;                  /* The connection to be opened */
+  struct winShmNode *pShmNode = 0;   /* The underlying mmapped file */
+  int rc;                            /* Result code */
+  struct winShmNode *pNew;           /* Newly allocated winShmNode */
+  int nName;                         /* Size of zName in bytes */
+
+  assert( pDbFd->pShm==0 );    /* Not previously opened */
+
+  /* Allocate space for the new sqlite3_shm object.  Also speculatively
+  ** allocate space for a new winShmNode and filename.
+  */
+  p = sqlite3MallocZero( sizeof(*p) );
+  if( p==0 ) return SQLITE_IOERR_NOMEM;
+  nName = sqlite3Strlen30(pDbFd->zPath);
+  pNew = sqlite3MallocZero( sizeof(*pShmNode) + nName + 17 );
+  if( pNew==0 ){
+    sqlite3_free(p);
+    return SQLITE_IOERR_NOMEM;
+  }
+  pNew->zFilename = (char*)&pNew[1];
+  sqlite3_snprintf(nName+15, pNew->zFilename, "%s-shm", pDbFd->zPath);
+  sqlite3FileSuffix3(pDbFd->zPath, pNew->zFilename);
+
+  /* Look to see if there is an existing winShmNode that can be used.
+  ** If no matching winShmNode currently exists, create a new one.
+  */
+  winShmEnterMutex();
+  for(pShmNode = winShmNodeList; pShmNode; pShmNode=pShmNode->pNext){
+    /* TBD need to come up with better match here.  Perhaps
+    ** use FILE_ID_BOTH_DIR_INFO Structure.
+    */
+    if( sqlite3StrICmp(pShmNode->zFilename, pNew->zFilename)==0 ) break;
+  }
+  if( pShmNode ){
+    sqlite3_free(pNew);
+  }else{
+    pShmNode = pNew;
+    pNew = 0;
+    ((winFile*)(&pShmNode->hFile))->h = INVALID_HANDLE_VALUE;
+    pShmNode->pNext = winShmNodeList;
+    winShmNodeList = pShmNode;
+
+    pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
+    if( pShmNode->mutex==0 ){
+      rc = SQLITE_IOERR_NOMEM;
+      goto shm_open_err;
+    }
+
+    rc = winOpen(pDbFd->pVfs,
+                 pShmNode->zFilename,             /* Name of the file (UTF-8) */
+                 (sqlite3_file*)&pShmNode->hFile,  /* File handle here */
+                 SQLITE_OPEN_WAL | SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE,
+                 0);
+    if( SQLITE_OK!=rc ){
+      goto shm_open_err;
+    }
+
+    /* Check to see if another process is holding the dead-man switch.
+    ** If not, truncate the file to zero length.
+    */
+    if( winShmSystemLock(pShmNode, _SHM_WRLCK, WIN_SHM_DMS, 1)==SQLITE_OK ){
+      rc = winTruncate((sqlite3_file *)&pShmNode->hFile, 0);
+      if( rc!=SQLITE_OK ){
+        rc = winLogError(SQLITE_IOERR_SHMOPEN, osGetLastError(),
+                         "winOpenShm", pDbFd->zPath);
+      }
+    }
+    if( rc==SQLITE_OK ){
+      winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1);
+      rc = winShmSystemLock(pShmNode, _SHM_RDLCK, WIN_SHM_DMS, 1);
+    }
+    if( rc ) goto shm_open_err;
+  }
+
+  /* Make the new connection a child of the winShmNode */
+  p->pShmNode = pShmNode;
+#ifdef SQLITE_DEBUG
+  p->id = pShmNode->nextShmId++;
+#endif
+  pShmNode->nRef++;
+  pDbFd->pShm = p;
+  winShmLeaveMutex();
+
+  /* The reference count on pShmNode has already been incremented under
+  ** the cover of the winShmEnterMutex() mutex and the pointer from the
+  ** new (struct winShm) object to the pShmNode has been set. All that is
+  ** left to do is to link the new object into the linked list starting
+  ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex
+  ** mutex.
+  */
+  sqlite3_mutex_enter(pShmNode->mutex);
+  p->pNext = pShmNode->pFirst;
+  pShmNode->pFirst = p;
+  sqlite3_mutex_leave(pShmNode->mutex);
+  return SQLITE_OK;
+
+  /* Jump here on any error */
+shm_open_err:
+  winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1);
+  winShmPurge(pDbFd->pVfs, 0);      /* This call frees pShmNode if required */
+  sqlite3_free(p);
+  sqlite3_free(pNew);
+  winShmLeaveMutex();
+  return rc;
+}
+
+/*
+** Close a connection to shared-memory.  Delete the underlying
+** storage if deleteFlag is true.
+*/
+static int winShmUnmap(
+  sqlite3_file *fd,          /* Database holding shared memory */
+  int deleteFlag             /* Delete after closing if true */
+){
+  winFile *pDbFd;       /* Database holding shared-memory */
+  winShm *p;            /* The connection to be closed */
+  winShmNode *pShmNode; /* The underlying shared-memory file */
+  winShm **pp;          /* For looping over sibling connections */
+
+  pDbFd = (winFile*)fd;
+  p = pDbFd->pShm;
+  if( p==0 ) return SQLITE_OK;
+  pShmNode = p->pShmNode;
+
+  /* Remove connection p from the set of connections associated
+  ** with pShmNode */
+  sqlite3_mutex_enter(pShmNode->mutex);
+  for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
+  *pp = p->pNext;
+
+  /* Free the connection p */
+  sqlite3_free(p);
+  pDbFd->pShm = 0;
+  sqlite3_mutex_leave(pShmNode->mutex);
+
+  /* If pShmNode->nRef has reached 0, then close the underlying
+  ** shared-memory file, too */
+  winShmEnterMutex();
+  assert( pShmNode->nRef>0 );
+  pShmNode->nRef--;
+  if( pShmNode->nRef==0 ){
+    winShmPurge(pDbFd->pVfs, deleteFlag);
+  }
+  winShmLeaveMutex();
+
+  return SQLITE_OK;
+}
+
+/*
+** Change the lock state for a shared-memory segment.
+*/
+static int winShmLock(
+  sqlite3_file *fd,          /* Database file holding the shared memory */
+  int ofst,                  /* First lock to acquire or release */
+  int n,                     /* Number of locks to acquire or release */
+  int flags                  /* What to do with the lock */
+){
+  winFile *pDbFd = (winFile*)fd;        /* Connection holding shared memory */
+  winShm *p = pDbFd->pShm;              /* The shared memory being locked */
+  winShm *pX;                           /* For looping over all siblings */
+  winShmNode *pShmNode = p->pShmNode;
+  int rc = SQLITE_OK;                   /* Result code */
+  u16 mask;                             /* Mask of locks to take or release */
+
+  assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );
+  assert( n>=1 );
+  assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
+       || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
+       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
+       || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
+  assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
+
+  mask = (u16)((1U<<(ofst+n)) - (1U<<ofst));
+  assert( n>1 || mask==(1<<ofst) );
+  sqlite3_mutex_enter(pShmNode->mutex);
+  if( flags & SQLITE_SHM_UNLOCK ){
+    u16 allMask = 0; /* Mask of locks held by siblings */
+
+    /* See if any siblings hold this same lock */
+    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
+      if( pX==p ) continue;
+      assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
+      allMask |= pX->sharedMask;
+    }
+
+    /* Unlock the system-level locks */
+    if( (mask & allMask)==0 ){
+      rc = winShmSystemLock(pShmNode, _SHM_UNLCK, ofst+WIN_SHM_BASE, n);
+    }else{
+      rc = SQLITE_OK;
+    }
+
+    /* Undo the local locks */
+    if( rc==SQLITE_OK ){
+      p->exclMask &= ~mask;
+      p->sharedMask &= ~mask;
+    }
+  }else if( flags & SQLITE_SHM_SHARED ){
+    u16 allShared = 0;  /* Union of locks held by connections other than "p" */
+
+    /* Find out which shared locks are already held by sibling connections.
+    ** If any sibling already holds an exclusive lock, go ahead and return
+    ** SQLITE_BUSY.
+    */
+    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
+      if( (pX->exclMask & mask)!=0 ){
+        rc = SQLITE_BUSY;
+        break;
+      }
+      allShared |= pX->sharedMask;
+    }
+
+    /* Get shared locks at the system level, if necessary */
+    if( rc==SQLITE_OK ){
+      if( (allShared & mask)==0 ){
+        rc = winShmSystemLock(pShmNode, _SHM_RDLCK, ofst+WIN_SHM_BASE, n);
+      }else{
+        rc = SQLITE_OK;
+      }
+    }
+
+    /* Get the local shared locks */
+    if( rc==SQLITE_OK ){
+      p->sharedMask |= mask;
+    }
+  }else{
+    /* Make sure no sibling connections hold locks that will block this
+    ** lock.  If any do, return SQLITE_BUSY right away.
+    */
+    for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
+      if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){
+        rc = SQLITE_BUSY;
+        break;
+      }
+    }
+
+    /* Get the exclusive locks at the system level.  Then if successful
+    ** also mark the local connection as being locked.
+    */
+    if( rc==SQLITE_OK ){
+      rc = winShmSystemLock(pShmNode, _SHM_WRLCK, ofst+WIN_SHM_BASE, n);
+      if( rc==SQLITE_OK ){
+        assert( (p->sharedMask & mask)==0 );
+        p->exclMask |= mask;
+      }
+    }
+  }
+  sqlite3_mutex_leave(pShmNode->mutex);
+  OSTRACE(("SHM-LOCK pid=%lu, id=%d, sharedMask=%03x, exclMask=%03x, rc=%s\n",
+           osGetCurrentProcessId(), p->id, p->sharedMask, p->exclMask,
+           sqlite3ErrName(rc)));
+  return rc;
+}
+
+/*
+** Implement a memory barrier or memory fence on shared memory.
+**
+** All loads and stores begun before the barrier must complete before
+** any load or store begun after the barrier.
+*/
+static void winShmBarrier(
+  sqlite3_file *fd          /* Database holding the shared memory */
+){
+  UNUSED_PARAMETER(fd);
+  /* MemoryBarrier(); // does not work -- do not know why not */
+  winShmEnterMutex();
+  winShmLeaveMutex();
+}
+
+/*
+** This function is called to obtain a pointer to region iRegion of the
+** shared-memory associated with the database file fd. Shared-memory regions
+** are numbered starting from zero. Each shared-memory region is szRegion
+** bytes in size.
+**
+** If an error occurs, an error code is returned and *pp is set to NULL.
+**
+** Otherwise, if the isWrite parameter is 0 and the requested shared-memory
+** region has not been allocated (by any client, including one running in a
+** separate process), then *pp is set to NULL and SQLITE_OK returned. If
+** isWrite is non-zero and the requested shared-memory region has not yet
+** been allocated, it is allocated by this function.
+**
+** If the shared-memory region has already been allocated or is allocated by
+** this call as described above, then it is mapped into this processes
+** address space (if it is not already), *pp is set to point to the mapped
+** memory and SQLITE_OK returned.
+*/
+static int winShmMap(
+  sqlite3_file *fd,               /* Handle open on database file */
+  int iRegion,                    /* Region to retrieve */
+  int szRegion,                   /* Size of regions */
+  int isWrite,                    /* True to extend file if necessary */
+  void volatile **pp              /* OUT: Mapped memory */
+){
+  winFile *pDbFd = (winFile*)fd;
+  winShm *p = pDbFd->pShm;
+  winShmNode *pShmNode;
+  int rc = SQLITE_OK;
+
+  if( !p ){
+    rc = winOpenSharedMemory(pDbFd);
+    if( rc!=SQLITE_OK ) return rc;
+    p = pDbFd->pShm;
+  }
+  pShmNode = p->pShmNode;
+
+  sqlite3_mutex_enter(pShmNode->mutex);
+  assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
+
+  if( pShmNode->nRegion<=iRegion ){
+    struct ShmRegion *apNew;           /* New aRegion[] array */
+    int nByte = (iRegion+1)*szRegion;  /* Minimum required file size */
+    sqlite3_int64 sz;                  /* Current size of wal-index file */
+
+    pShmNode->szRegion = szRegion;
+
+    /* The requested region is not mapped into this processes address space.
+    ** Check to see if it has been allocated (i.e. if the wal-index file is
+    ** large enough to contain the requested region).
+    */
+    rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz);
+    if( rc!=SQLITE_OK ){
+      rc = winLogError(SQLITE_IOERR_SHMSIZE, osGetLastError(),
+                       "winShmMap1", pDbFd->zPath);
+      goto shmpage_out;
+    }
+
+    if( sz<nByte ){
+      /* The requested memory region does not exist. If isWrite is set to
+      ** zero, exit early. *pp will be set to NULL and SQLITE_OK returned.
+      **
+      ** Alternatively, if isWrite is non-zero, use ftruncate() to allocate
+      ** the requested memory region.
+      */
+      if( !isWrite ) goto shmpage_out;
+      rc = winTruncate((sqlite3_file *)&pShmNode->hFile, nByte);
+      if( rc!=SQLITE_OK ){
+        rc = winLogError(SQLITE_IOERR_SHMSIZE, osGetLastError(),
+                         "winShmMap2", pDbFd->zPath);
+        goto shmpage_out;
+      }
+    }
+
+    /* Map the requested memory region into this processes address space. */
+    apNew = (struct ShmRegion *)sqlite3_realloc(
+        pShmNode->aRegion, (iRegion+1)*sizeof(apNew[0])
+    );
+    if( !apNew ){
+      rc = SQLITE_IOERR_NOMEM;
+      goto shmpage_out;
+    }
+    pShmNode->aRegion = apNew;
+
+    while( pShmNode->nRegion<=iRegion ){
+      HANDLE hMap = NULL;         /* file-mapping handle */
+      void *pMap = 0;             /* Mapped memory region */
+
+#if SQLITE_OS_WINRT
+      hMap = osCreateFileMappingFromApp(pShmNode->hFile.h,
+          NULL, PAGE_READWRITE, nByte, NULL
+      );
+#elif defined(SQLITE_WIN32_HAS_WIDE)
+      hMap = osCreateFileMappingW(pShmNode->hFile.h,
+          NULL, PAGE_READWRITE, 0, nByte, NULL
+      );
+#elif defined(SQLITE_WIN32_HAS_ANSI)
+      hMap = osCreateFileMappingA(pShmNode->hFile.h,
+          NULL, PAGE_READWRITE, 0, nByte, NULL
+      );
+#endif
+      OSTRACE(("SHM-MAP-CREATE pid=%lu, region=%d, size=%d, rc=%s\n",
+               osGetCurrentProcessId(), pShmNode->nRegion, nByte,
+               hMap ? "ok" : "failed"));
+      if( hMap ){
+        int iOffset = pShmNode->nRegion*szRegion;
+        int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity;
+#if SQLITE_OS_WINRT
+        pMap = osMapViewOfFileFromApp(hMap, FILE_MAP_WRITE | FILE_MAP_READ,
+            iOffset - iOffsetShift, szRegion + iOffsetShift
+        );
+#else
+        pMap = osMapViewOfFile(hMap, FILE_MAP_WRITE | FILE_MAP_READ,
+            0, iOffset - iOffsetShift, szRegion + iOffsetShift
+        );
+#endif
+        OSTRACE(("SHM-MAP-MAP pid=%lu, region=%d, offset=%d, size=%d, rc=%s\n",
+                 osGetCurrentProcessId(), pShmNode->nRegion, iOffset,
+                 szRegion, pMap ? "ok" : "failed"));
+      }
+      if( !pMap ){
+        pShmNode->lastErrno = osGetLastError();
+        rc = winLogError(SQLITE_IOERR_SHMMAP, pShmNode->lastErrno,
+                         "winShmMap3", pDbFd->zPath);
+        if( hMap ) osCloseHandle(hMap);
+        goto shmpage_out;
+      }
+
+      pShmNode->aRegion[pShmNode->nRegion].pMap = pMap;
+      pShmNode->aRegion[pShmNode->nRegion].hMap = hMap;
+      pShmNode->nRegion++;
+    }
+  }
+
+shmpage_out:
+  if( pShmNode->nRegion>iRegion ){
+    int iOffset = iRegion*szRegion;
+    int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity;
+    char *p = (char *)pShmNode->aRegion[iRegion].pMap;
+    *pp = (void *)&p[iOffsetShift];
+  }else{
+    *pp = 0;
+  }
+  sqlite3_mutex_leave(pShmNode->mutex);
+  return rc;
+}
+
+#else
+# define winShmMap     0
+# define winShmLock    0
+# define winShmBarrier 0
+# define winShmUnmap   0
+#endif /* #ifndef SQLITE_OMIT_WAL */
+
+/*
+** Cleans up the mapped region of the specified file, if any.
+*/
+#if SQLITE_MAX_MMAP_SIZE>0
+static int winUnmapfile(winFile *pFile){
+  assert( pFile!=0 );
+  OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, pMapRegion=%p, "
+           "mmapSize=%lld, mmapSizeActual=%lld, mmapSizeMax=%lld\n",
+           osGetCurrentProcessId(), pFile, pFile->hMap, pFile->pMapRegion,
+           pFile->mmapSize, pFile->mmapSizeActual, pFile->mmapSizeMax));
+  if( pFile->pMapRegion ){
+    if( !osUnmapViewOfFile(pFile->pMapRegion) ){
+      pFile->lastErrno = osGetLastError();
+      OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, pMapRegion=%p, "
+               "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), pFile,
+               pFile->pMapRegion));
+      return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno,
+                         "winUnmapfile1", pFile->zPath);
+    }
+    pFile->pMapRegion = 0;
+    pFile->mmapSize = 0;
+    pFile->mmapSizeActual = 0;
+  }
+  if( pFile->hMap!=NULL ){
+    if( !osCloseHandle(pFile->hMap) ){
+      pFile->lastErrno = osGetLastError();
+      OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, rc=SQLITE_IOERR_MMAP\n",
+               osGetCurrentProcessId(), pFile, pFile->hMap));
+      return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno,
+                         "winUnmapfile2", pFile->zPath);
+    }
+    pFile->hMap = NULL;
+  }
+  OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n",
+           osGetCurrentProcessId(), pFile));
+  return SQLITE_OK;
+}
+
+/*
+** Memory map or remap the file opened by file-descriptor pFd (if the file
+** is already mapped, the existing mapping is replaced by the new). Or, if
+** there already exists a mapping for this file, and there are still
+** outstanding xFetch() references to it, this function is a no-op.
+**
+** If parameter nByte is non-negative, then it is the requested size of
+** the mapping to create. Otherwise, if nByte is less than zero, then the
+** requested size is the size of the file on disk. The actual size of the
+** created mapping is either the requested size or the value configured
+** using SQLITE_FCNTL_MMAP_SIZE, whichever is smaller.
+**
+** SQLITE_OK is returned if no error occurs (even if the mapping is not
+** recreated as a result of outstanding references) or an SQLite error
+** code otherwise.
+*/
+static int winMapfile(winFile *pFd, sqlite3_int64 nByte){
+  sqlite3_int64 nMap = nByte;
+  int rc;
+
+  assert( nMap>=0 || pFd->nFetchOut==0 );
+  OSTRACE(("MAP-FILE pid=%lu, pFile=%p, size=%lld\n",
+           osGetCurrentProcessId(), pFd, nByte));
+
+  if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+  if( nMap<0 ){
+    rc = winFileSize((sqlite3_file*)pFd, &nMap);
+    if( rc ){
+      OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_IOERR_FSTAT\n",
+               osGetCurrentProcessId(), pFd));
+      return SQLITE_IOERR_FSTAT;
+    }
+  }
+  if( nMap>pFd->mmapSizeMax ){
+    nMap = pFd->mmapSizeMax;
+  }
+  nMap &= ~(sqlite3_int64)(winSysInfo.dwPageSize - 1);
+
+  if( nMap==0 && pFd->mmapSize>0 ){
+    winUnmapfile(pFd);
+  }
+  if( nMap!=pFd->mmapSize ){
+    void *pNew = 0;
+    DWORD protect = PAGE_READONLY;
+    DWORD flags = FILE_MAP_READ;
+
+    winUnmapfile(pFd);
+    if( (pFd->ctrlFlags & WINFILE_RDONLY)==0 ){
+      protect = PAGE_READWRITE;
+      flags |= FILE_MAP_WRITE;
+    }
+#if SQLITE_OS_WINRT
+    pFd->hMap = osCreateFileMappingFromApp(pFd->h, NULL, protect, nMap, NULL);
+#elif defined(SQLITE_WIN32_HAS_WIDE)
+    pFd->hMap = osCreateFileMappingW(pFd->h, NULL, protect,
+                                (DWORD)((nMap>>32) & 0xffffffff),
+                                (DWORD)(nMap & 0xffffffff), NULL);
+#elif defined(SQLITE_WIN32_HAS_ANSI)
+    pFd->hMap = osCreateFileMappingA(pFd->h, NULL, protect,
+                                (DWORD)((nMap>>32) & 0xffffffff),
+                                (DWORD)(nMap & 0xffffffff), NULL);
+#endif
+    if( pFd->hMap==NULL ){
+      pFd->lastErrno = osGetLastError();
+      rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno,
+                       "winMapfile1", pFd->zPath);
+      /* Log the error, but continue normal operation using xRead/xWrite */
+      OSTRACE(("MAP-FILE-CREATE pid=%lu, pFile=%p, rc=%s\n",
+               osGetCurrentProcessId(), pFd, sqlite3ErrName(rc)));
+      return SQLITE_OK;
+    }
+    assert( (nMap % winSysInfo.dwPageSize)==0 );
+    assert( sizeof(SIZE_T)==sizeof(sqlite3_int64) || nMap<=0xffffffff );
+#if SQLITE_OS_WINRT
+    pNew = osMapViewOfFileFromApp(pFd->hMap, flags, 0, (SIZE_T)nMap);
+#else
+    pNew = osMapViewOfFile(pFd->hMap, flags, 0, 0, (SIZE_T)nMap);
+#endif
+    if( pNew==NULL ){
+      osCloseHandle(pFd->hMap);
+      pFd->hMap = NULL;
+      pFd->lastErrno = osGetLastError();
+      rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno,
+                       "winMapfile2", pFd->zPath);
+      /* Log the error, but continue normal operation using xRead/xWrite */
+      OSTRACE(("MAP-FILE-MAP pid=%lu, pFile=%p, rc=%s\n",
+               osGetCurrentProcessId(), pFd, sqlite3ErrName(rc)));
+      return SQLITE_OK;
+    }
+    pFd->pMapRegion = pNew;
+    pFd->mmapSize = nMap;
+    pFd->mmapSizeActual = nMap;
+  }
+
+  OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n",
+           osGetCurrentProcessId(), pFd));
+  return SQLITE_OK;
+}
+#endif /* SQLITE_MAX_MMAP_SIZE>0 */
+
+/*
+** If possible, return a pointer to a mapping of file fd starting at offset
+** iOff. The mapping must be valid for at least nAmt bytes.
+**
+** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
+** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
+** Finally, if an error does occur, return an SQLite error code. The final
+** value of *pp is undefined in this case.
+**
+** If this function does return a pointer, the caller must eventually
+** release the reference by calling winUnfetch().
+*/
+static int winFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+#if SQLITE_MAX_MMAP_SIZE>0
+  winFile *pFd = (winFile*)fd;   /* The underlying database file */
+#endif
+  *pp = 0;
+
+  OSTRACE(("FETCH pid=%lu, pFile=%p, offset=%lld, amount=%d, pp=%p\n",
+           osGetCurrentProcessId(), fd, iOff, nAmt, pp));
+
+#if SQLITE_MAX_MMAP_SIZE>0
+  if( pFd->mmapSizeMax>0 ){
+    if( pFd->pMapRegion==0 ){
+      int rc = winMapfile(pFd, -1);
+      if( rc!=SQLITE_OK ){
+        OSTRACE(("FETCH pid=%lu, pFile=%p, rc=%s\n",
+                 osGetCurrentProcessId(), pFd, sqlite3ErrName(rc)));
+        return rc;
+      }
+    }
+    if( pFd->mmapSize >= iOff+nAmt ){
+      *pp = &((u8 *)pFd->pMapRegion)[iOff];
+      pFd->nFetchOut++;
+    }
+  }
+#endif
+
+  OSTRACE(("FETCH pid=%lu, pFile=%p, pp=%p, *pp=%p, rc=SQLITE_OK\n",
+           osGetCurrentProcessId(), fd, pp, *pp));
+  return SQLITE_OK;
+}
+
+/*
+** If the third argument is non-NULL, then this function releases a
+** reference obtained by an earlier call to winFetch(). The second
+** argument passed to this function must be the same as the corresponding
+** argument that was passed to the winFetch() invocation.
+**
+** Or, if the third argument is NULL, then this function is being called
+** to inform the VFS layer that, according to POSIX, any existing mapping
+** may now be invalid and should be unmapped.
+*/
+static int winUnfetch(sqlite3_file *fd, i64 iOff, void *p){
+#if SQLITE_MAX_MMAP_SIZE>0
+  winFile *pFd = (winFile*)fd;   /* The underlying database file */
+
+  /* If p==0 (unmap the entire file) then there must be no outstanding
+  ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
+  ** then there must be at least one outstanding.  */
+  assert( (p==0)==(pFd->nFetchOut==0) );
+
+  /* If p!=0, it must match the iOff value. */
+  assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
+
+  OSTRACE(("UNFETCH pid=%lu, pFile=%p, offset=%lld, p=%p\n",
+           osGetCurrentProcessId(), pFd, iOff, p));
+
+  if( p ){
+    pFd->nFetchOut--;
+  }else{
+    /* FIXME:  If Windows truly always prevents truncating or deleting a
+    ** file while a mapping is held, then the following winUnmapfile() call
+    ** is unnecessary can be omitted - potentially improving
+    ** performance.  */
+    winUnmapfile(pFd);
+  }
+
+  assert( pFd->nFetchOut>=0 );
+#endif
+
+  OSTRACE(("UNFETCH pid=%lu, pFile=%p, rc=SQLITE_OK\n",
+           osGetCurrentProcessId(), fd));
+  return SQLITE_OK;
+}
+
+/*
+** Here ends the implementation of all sqlite3_file methods.
+**
+********************** End sqlite3_file Methods *******************************
+******************************************************************************/
+
+/*
+** This vector defines all the methods that can operate on an
+** sqlite3_file for win32.
+*/
+static const sqlite3_io_methods winIoMethod = {
+  3,                              /* iVersion */
+  winClose,                       /* xClose */
+  winRead,                        /* xRead */
+  winWrite,                       /* xWrite */
+  winTruncate,                    /* xTruncate */
+  winSync,                        /* xSync */
+  winFileSize,                    /* xFileSize */
+  winLock,                        /* xLock */
+  winUnlock,                      /* xUnlock */
+  winCheckReservedLock,           /* xCheckReservedLock */
+  winFileControl,                 /* xFileControl */
+  winSectorSize,                  /* xSectorSize */
+  winDeviceCharacteristics,       /* xDeviceCharacteristics */
+  winShmMap,                      /* xShmMap */
+  winShmLock,                     /* xShmLock */
+  winShmBarrier,                  /* xShmBarrier */
+  winShmUnmap,                    /* xShmUnmap */
+  winFetch,                       /* xFetch */
+  winUnfetch                      /* xUnfetch */
+};
+
+/****************************************************************************
+**************************** sqlite3_vfs methods ****************************
+**
+** This division contains the implementation of methods on the
+** sqlite3_vfs object.
+*/
+
+#if defined(__CYGWIN__)
+/*
+** Convert a filename from whatever the underlying operating system
+** supports for filenames into UTF-8.  Space to hold the result is
+** obtained from malloc and must be freed by the calling function.
+*/
+static char *winConvertToUtf8Filename(const void *zFilename){
+  char *zConverted = 0;
+  if( osIsNT() ){
+    zConverted = winUnicodeToUtf8(zFilename);
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    zConverted = sqlite3_win32_mbcs_to_utf8(zFilename);
+  }
+#endif
+  /* caller will handle out of memory */
+  return zConverted;
+}
+#endif
+
+/*
+** Convert a UTF-8 filename into whatever form the underlying
+** operating system wants filenames in.  Space to hold the result
+** is obtained from malloc and must be freed by the calling
+** function.
+*/
+static void *winConvertFromUtf8Filename(const char *zFilename){
+  void *zConverted = 0;
+  if( osIsNT() ){
+    zConverted = winUtf8ToUnicode(zFilename);
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    zConverted = sqlite3_win32_utf8_to_mbcs(zFilename);
+  }
+#endif
+  /* caller will handle out of memory */
+  return zConverted;
+}
+
+/*
+** This function returns non-zero if the specified UTF-8 string buffer
+** ends with a directory separator character or one was successfully
+** added to it.
+*/
+static int winMakeEndInDirSep(int nBuf, char *zBuf){
+  if( zBuf ){
+    int nLen = sqlite3Strlen30(zBuf);
+    if( nLen>0 ){
+      if( winIsDirSep(zBuf[nLen-1]) ){
+        return 1;
+      }else if( nLen+1<nBuf ){
+        zBuf[nLen] = winGetDirSep();
+        zBuf[nLen+1] = '\0';
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+** Create a temporary file name and store the resulting pointer into pzBuf.
+** The pointer returned in pzBuf must be freed via sqlite3_free().
+*/
+static int winGetTempname(sqlite3_vfs *pVfs, char **pzBuf){
+  static char zChars[] =
+    "abcdefghijklmnopqrstuvwxyz"
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    "0123456789";
+  size_t i, j;
+  int nPre = sqlite3Strlen30(SQLITE_TEMP_FILE_PREFIX);
+  int nMax, nBuf, nDir, nLen;
+  char *zBuf;
+
+  /* It's odd to simulate an io-error here, but really this is just
+  ** using the io-error infrastructure to test that SQLite handles this
+  ** function failing.
+  */
+  SimulateIOError( return SQLITE_IOERR );
+
+  /* Allocate a temporary buffer to store the fully qualified file
+  ** name for the temporary file.  If this fails, we cannot continue.
+  */
+  nMax = pVfs->mxPathname; nBuf = nMax + 2;
+  zBuf = sqlite3MallocZero( nBuf );
+  if( !zBuf ){
+    OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
+    return SQLITE_IOERR_NOMEM;
+  }
+
+  /* Figure out the effective temporary directory.  First, check if one
+  ** has been explicitly set by the application; otherwise, use the one
+  ** configured by the operating system.
+  */
+  nDir = nMax - (nPre + 15);
+  assert( nDir>0 );
+  if( sqlite3_temp_directory ){
+    int nDirLen = sqlite3Strlen30(sqlite3_temp_directory);
+    if( nDirLen>0 ){
+      if( !winIsDirSep(sqlite3_temp_directory[nDirLen-1]) ){
+        nDirLen++;
+      }
+      if( nDirLen>nDir ){
+        sqlite3_free(zBuf);
+        OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n"));
+        return winLogError(SQLITE_ERROR, 0, "winGetTempname1", 0);
+      }
+      sqlite3_snprintf(nMax, zBuf, "%s", sqlite3_temp_directory);
+    }
+  }
+#if defined(__CYGWIN__)
+  else{
+    static const char *azDirs[] = {
+       0, /* getenv("SQLITE_TMPDIR") */
+       0, /* getenv("TMPDIR") */
+       0, /* getenv("TMP") */
+       0, /* getenv("TEMP") */
+       0, /* getenv("USERPROFILE") */
+       "/var/tmp",
+       "/usr/tmp",
+       "/tmp",
+       ".",
+       0        /* List terminator */
+    };
+    unsigned int i;
+    const char *zDir = 0;
+
+    if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR");
+    if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR");
+    if( !azDirs[2] ) azDirs[2] = getenv("TMP");
+    if( !azDirs[3] ) azDirs[3] = getenv("TEMP");
+    if( !azDirs[4] ) azDirs[4] = getenv("USERPROFILE");
+    for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){
+      void *zConverted;
+      if( zDir==0 ) continue;
+      /* If the path starts with a drive letter followed by the colon
+      ** character, assume it is already a native Win32 path; otherwise,
+      ** it must be converted to a native Win32 path via the Cygwin API
+      ** prior to using it.
+      */
+      if( winIsDriveLetterAndColon(zDir) ){
+        zConverted = winConvertFromUtf8Filename(zDir);
+        if( !zConverted ){
+          sqlite3_free(zBuf);
+          OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
+          return SQLITE_IOERR_NOMEM;
+        }
+        if( winIsDir(zConverted) ){
+          sqlite3_snprintf(nMax, zBuf, "%s", zDir);
+          sqlite3_free(zConverted);
+          break;
+        }
+        sqlite3_free(zConverted);
+      }else{
+        zConverted = sqlite3MallocZero( nMax+1 );
+        if( !zConverted ){
+          sqlite3_free(zBuf);
+          OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
+          return SQLITE_IOERR_NOMEM;
+        }
+        if( cygwin_conv_path(
+                osIsNT() ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_A, zDir,
+                zConverted, nMax+1)<0 ){
+          sqlite3_free(zConverted);
+          sqlite3_free(zBuf);
+          OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_CONVPATH\n"));
+          return winLogError(SQLITE_IOERR_CONVPATH, (DWORD)errno,
+                             "winGetTempname2", zDir);
+        }
+        if( winIsDir(zConverted) ){
+          /* At this point, we know the candidate directory exists and should
+          ** be used.  However, we may need to convert the string containing
+          ** its name into UTF-8 (i.e. if it is UTF-16 right now).
+          */
+          char *zUtf8 = winConvertToUtf8Filename(zConverted);
+          if( !zUtf8 ){
+            sqlite3_free(zConverted);
+            sqlite3_free(zBuf);
+            OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
+            return SQLITE_IOERR_NOMEM;
+          }
+          sqlite3_snprintf(nMax, zBuf, "%s", zUtf8);
+          sqlite3_free(zUtf8);
+          sqlite3_free(zConverted);
+          break;
+        }
+        sqlite3_free(zConverted);
+      }
+    }
+  }
+#elif !SQLITE_OS_WINRT && !defined(__CYGWIN__)
+  else if( osIsNT() ){
+    char *zMulti;
+    LPWSTR zWidePath = sqlite3MallocZero( nMax*sizeof(WCHAR) );
+    if( !zWidePath ){
+      sqlite3_free(zBuf);
+      OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
+      return SQLITE_IOERR_NOMEM;
+    }
+    if( osGetTempPathW(nMax, zWidePath)==0 ){
+      sqlite3_free(zWidePath);
+      sqlite3_free(zBuf);
+      OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_GETTEMPPATH\n"));
+      return winLogError(SQLITE_IOERR_GETTEMPPATH, osGetLastError(),
+                         "winGetTempname2", 0);
+    }
+    zMulti = winUnicodeToUtf8(zWidePath);
+    if( zMulti ){
+      sqlite3_snprintf(nMax, zBuf, "%s", zMulti);
+      sqlite3_free(zMulti);
+      sqlite3_free(zWidePath);
+    }else{
+      sqlite3_free(zWidePath);
+      sqlite3_free(zBuf);
+      OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
+      return SQLITE_IOERR_NOMEM;
+    }
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    char *zUtf8;
+    char *zMbcsPath = sqlite3MallocZero( nMax );
+    if( !zMbcsPath ){
+      sqlite3_free(zBuf);
+      OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
+      return SQLITE_IOERR_NOMEM;
+    }
+    if( osGetTempPathA(nMax, zMbcsPath)==0 ){
+      sqlite3_free(zBuf);
+      OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_GETTEMPPATH\n"));
+      return winLogError(SQLITE_IOERR_GETTEMPPATH, osGetLastError(),
+                         "winGetTempname3", 0);
+    }
+    zUtf8 = sqlite3_win32_mbcs_to_utf8(zMbcsPath);
+    if( zUtf8 ){
+      sqlite3_snprintf(nMax, zBuf, "%s", zUtf8);
+      sqlite3_free(zUtf8);
+    }else{
+      sqlite3_free(zBuf);
+      OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n"));
+      return SQLITE_IOERR_NOMEM;
+    }
+  }
+#endif /* SQLITE_WIN32_HAS_ANSI */
+#endif /* !SQLITE_OS_WINRT */
+
+  /*
+  ** Check to make sure the temporary directory ends with an appropriate
+  ** separator.  If it does not and there is not enough space left to add
+  ** one, fail.
+  */
+  if( !winMakeEndInDirSep(nDir+1, zBuf) ){
+    sqlite3_free(zBuf);
+    OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n"));
+    return winLogError(SQLITE_ERROR, 0, "winGetTempname4", 0);
+  }
+
+  /*
+  ** Check that the output buffer is large enough for the temporary file
+  ** name in the following format:
+  **
+  **   "<temporary_directory>/etilqs_XXXXXXXXXXXXXXX\0\0"
+  **
+  ** If not, return SQLITE_ERROR.  The number 17 is used here in order to
+  ** account for the space used by the 15 character random suffix and the
+  ** two trailing NUL characters.  The final directory separator character
+  ** has already added if it was not already present.
+  */
+  nLen = sqlite3Strlen30(zBuf);
+  if( (nLen + nPre + 17) > nBuf ){
+    sqlite3_free(zBuf);
+    OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n"));
+    return winLogError(SQLITE_ERROR, 0, "winGetTempname5", 0);
+  }
+
+  sqlite3_snprintf(nBuf-16-nLen, zBuf+nLen, SQLITE_TEMP_FILE_PREFIX);
+
+  j = sqlite3Strlen30(zBuf);
+  sqlite3_randomness(15, &zBuf[j]);
+  for(i=0; i<15; i++, j++){
+    zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
+  }
+  zBuf[j] = 0;
+  zBuf[j+1] = 0;
+  *pzBuf = zBuf;
+
+  OSTRACE(("TEMP-FILENAME name=%s, rc=SQLITE_OK\n", zBuf));
+  return SQLITE_OK;
+}
+
+/*
+** Return TRUE if the named file is really a directory.  Return false if
+** it is something other than a directory, or if there is any kind of memory
+** allocation failure.
+*/
+static int winIsDir(const void *zConverted){
+  DWORD attr;
+  int rc = 0;
+  DWORD lastErrno;
+
+  if( osIsNT() ){
+    int cnt = 0;
+    WIN32_FILE_ATTRIBUTE_DATA sAttrData;
+    memset(&sAttrData, 0, sizeof(sAttrData));
+    while( !(rc = osGetFileAttributesExW((LPCWSTR)zConverted,
+                             GetFileExInfoStandard,
+                             &sAttrData)) && winRetryIoerr(&cnt, &lastErrno) ){}
+    if( !rc ){
+      return 0; /* Invalid name? */
+    }
+    attr = sAttrData.dwFileAttributes;
+#if SQLITE_OS_WINCE==0
+  }else{
+    attr = osGetFileAttributesA((char*)zConverted);
+#endif
+  }
+  return (attr!=INVALID_FILE_ATTRIBUTES) && (attr&FILE_ATTRIBUTE_DIRECTORY);
+}
+
+/*
+** Open a file.
+*/
+static int winOpen(
+  sqlite3_vfs *pVfs,        /* Used to get maximum path name length */
+  const char *zName,        /* Name of the file (UTF-8) */
+  sqlite3_file *id,         /* Write the SQLite file handle here */
+  int flags,                /* Open mode flags */
+  int *pOutFlags            /* Status return flags */
+){
+  HANDLE h;
+  DWORD lastErrno = 0;
+  DWORD dwDesiredAccess;
+  DWORD dwShareMode;
+  DWORD dwCreationDisposition;
+  DWORD dwFlagsAndAttributes = 0;
+#if SQLITE_OS_WINCE
+  int isTemp = 0;
+#endif
+  winFile *pFile = (winFile*)id;
+  void *zConverted;              /* Filename in OS encoding */
+  const char *zUtf8Name = zName; /* Filename in UTF-8 encoding */
+  int cnt = 0;
+
+  /* If argument zPath is a NULL pointer, this function is required to open
+  ** a temporary file. Use this buffer to store the file name in.
+  */
+  char *zTmpname = 0; /* For temporary filename, if necessary. */
+
+  int rc = SQLITE_OK;            /* Function Return Code */
+#if !defined(NDEBUG) || SQLITE_OS_WINCE
+  int eType = flags&0xFFFFFF00;  /* Type of file to open */
+#endif
+
+  int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
+  int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
+  int isCreate     = (flags & SQLITE_OPEN_CREATE);
+  int isReadonly   = (flags & SQLITE_OPEN_READONLY);
+  int isReadWrite  = (flags & SQLITE_OPEN_READWRITE);
+
+#ifndef NDEBUG
+  int isOpenJournal = (isCreate && (
+        eType==SQLITE_OPEN_MASTER_JOURNAL
+     || eType==SQLITE_OPEN_MAIN_JOURNAL
+     || eType==SQLITE_OPEN_WAL
+  ));
+#endif
+
+  OSTRACE(("OPEN name=%s, pFile=%p, flags=%x, pOutFlags=%p\n",
+           zUtf8Name, id, flags, pOutFlags));
+
+  /* Check the following statements are true:
+  **
+  **   (a) Exactly one of the READWRITE and READONLY flags must be set, and
+  **   (b) if CREATE is set, then READWRITE must also be set, and
+  **   (c) if EXCLUSIVE is set, then CREATE must also be set.
+  **   (d) if DELETEONCLOSE is set, then CREATE must also be set.
+  */
+  assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
+  assert(isCreate==0 || isReadWrite);
+  assert(isExclusive==0 || isCreate);
+  assert(isDelete==0 || isCreate);
+
+  /* The main DB, main journal, WAL file and master journal are never
+  ** automatically deleted. Nor are they ever temporary files.  */
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB );
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL );
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL );
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL );
+
+  /* Assert that the upper layer has set one of the "file-type" flags. */
+  assert( eType==SQLITE_OPEN_MAIN_DB      || eType==SQLITE_OPEN_TEMP_DB
+       || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL
+       || eType==SQLITE_OPEN_SUBJOURNAL   || eType==SQLITE_OPEN_MASTER_JOURNAL
+       || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL
+  );
+
+  assert( pFile!=0 );
+  memset(pFile, 0, sizeof(winFile));
+  pFile->h = INVALID_HANDLE_VALUE;
+
+#if SQLITE_OS_WINRT
+  if( !zUtf8Name && !sqlite3_temp_directory ){
+    sqlite3_log(SQLITE_ERROR,
+        "sqlite3_temp_directory variable should be set for WinRT");
+  }
+#endif
+
+  /* If the second argument to this function is NULL, generate a
+  ** temporary file name to use
+  */
+  if( !zUtf8Name ){
+    assert( isDelete && !isOpenJournal );
+    rc = winGetTempname(pVfs, &zTmpname);
+    if( rc!=SQLITE_OK ){
+      OSTRACE(("OPEN name=%s, rc=%s", zUtf8Name, sqlite3ErrName(rc)));
+      return rc;
+    }
+    zUtf8Name = zTmpname;
+  }
+
+  /* Database filenames are double-zero terminated if they are not
+  ** URIs with parameters.  Hence, they can always be passed into
+  ** sqlite3_uri_parameter().
+  */
+  assert( (eType!=SQLITE_OPEN_MAIN_DB) || (flags & SQLITE_OPEN_URI) ||
+       zUtf8Name[sqlite3Strlen30(zUtf8Name)+1]==0 );
+
+  /* Convert the filename to the system encoding. */
+  zConverted = winConvertFromUtf8Filename(zUtf8Name);
+  if( zConverted==0 ){
+    sqlite3_free(zTmpname);
+    OSTRACE(("OPEN name=%s, rc=SQLITE_IOERR_NOMEM", zUtf8Name));
+    return SQLITE_IOERR_NOMEM;
+  }
+
+  if( winIsDir(zConverted) ){
+    sqlite3_free(zConverted);
+    sqlite3_free(zTmpname);
+    OSTRACE(("OPEN name=%s, rc=SQLITE_CANTOPEN_ISDIR", zUtf8Name));
+    return SQLITE_CANTOPEN_ISDIR;
+  }
+
+  if( isReadWrite ){
+    dwDesiredAccess = GENERIC_READ | GENERIC_WRITE;
+  }else{
+    dwDesiredAccess = GENERIC_READ;
+  }
+
+  /* SQLITE_OPEN_EXCLUSIVE is used to make sure that a new file is
+  ** created. SQLite doesn't use it to indicate "exclusive access"
+  ** as it is usually understood.
+  */
+  if( isExclusive ){
+    /* Creates a new file, only if it does not already exist. */
+    /* If the file exists, it fails. */
+    dwCreationDisposition = CREATE_NEW;
+  }else if( isCreate ){
+    /* Open existing file, or create if it doesn't exist */
+    dwCreationDisposition = OPEN_ALWAYS;
+  }else{
+    /* Opens a file, only if it exists. */
+    dwCreationDisposition = OPEN_EXISTING;
+  }
+
+  dwShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+
+  if( isDelete ){
+#if SQLITE_OS_WINCE
+    dwFlagsAndAttributes = FILE_ATTRIBUTE_HIDDEN;
+    isTemp = 1;
+#else
+    dwFlagsAndAttributes = FILE_ATTRIBUTE_TEMPORARY
+                               | FILE_ATTRIBUTE_HIDDEN
+                               | FILE_FLAG_DELETE_ON_CLOSE;
+#endif
+  }else{
+    dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL;
+  }
+  /* Reports from the internet are that performance is always
+  ** better if FILE_FLAG_RANDOM_ACCESS is used.  Ticket #2699. */
+#if SQLITE_OS_WINCE
+  dwFlagsAndAttributes |= FILE_FLAG_RANDOM_ACCESS;
+#endif
+
+  if( osIsNT() ){
+#if SQLITE_OS_WINRT
+    CREATEFILE2_EXTENDED_PARAMETERS extendedParameters;
+    extendedParameters.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS);
+    extendedParameters.dwFileAttributes =
+            dwFlagsAndAttributes & FILE_ATTRIBUTE_MASK;
+    extendedParameters.dwFileFlags = dwFlagsAndAttributes & FILE_FLAG_MASK;
+    extendedParameters.dwSecurityQosFlags = SECURITY_ANONYMOUS;
+    extendedParameters.lpSecurityAttributes = NULL;
+    extendedParameters.hTemplateFile = NULL;
+    while( (h = osCreateFile2((LPCWSTR)zConverted,
+                              dwDesiredAccess,
+                              dwShareMode,
+                              dwCreationDisposition,
+                              &extendedParameters))==INVALID_HANDLE_VALUE &&
+                              winRetryIoerr(&cnt, &lastErrno) ){
+               /* Noop */
+    }
+#else
+    while( (h = osCreateFileW((LPCWSTR)zConverted,
+                              dwDesiredAccess,
+                              dwShareMode, NULL,
+                              dwCreationDisposition,
+                              dwFlagsAndAttributes,
+                              NULL))==INVALID_HANDLE_VALUE &&
+                              winRetryIoerr(&cnt, &lastErrno) ){
+               /* Noop */
+    }
+#endif
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    while( (h = osCreateFileA((LPCSTR)zConverted,
+                              dwDesiredAccess,
+                              dwShareMode, NULL,
+                              dwCreationDisposition,
+                              dwFlagsAndAttributes,
+                              NULL))==INVALID_HANDLE_VALUE &&
+                              winRetryIoerr(&cnt, &lastErrno) ){
+               /* Noop */
+    }
+  }
+#endif
+  winLogIoerr(cnt);
+
+  OSTRACE(("OPEN file=%p, name=%s, access=%lx, rc=%s\n", h, zUtf8Name,
+           dwDesiredAccess, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok"));
+
+  if( h==INVALID_HANDLE_VALUE ){
+    pFile->lastErrno = lastErrno;
+    winLogError(SQLITE_CANTOPEN, pFile->lastErrno, "winOpen", zUtf8Name);
+    sqlite3_free(zConverted);
+    sqlite3_free(zTmpname);
+    if( isReadWrite && !isExclusive ){
+      return winOpen(pVfs, zName, id,
+         ((flags|SQLITE_OPEN_READONLY) &
+                     ~(SQLITE_OPEN_CREATE|SQLITE_OPEN_READWRITE)),
+         pOutFlags);
+    }else{
+      return SQLITE_CANTOPEN_BKPT;
+    }
+  }
+
+  if( pOutFlags ){
+    if( isReadWrite ){
+      *pOutFlags = SQLITE_OPEN_READWRITE;
+    }else{
+      *pOutFlags = SQLITE_OPEN_READONLY;
+    }
+  }
+
+  OSTRACE(("OPEN file=%p, name=%s, access=%lx, pOutFlags=%p, *pOutFlags=%d, "
+           "rc=%s\n", h, zUtf8Name, dwDesiredAccess, pOutFlags, pOutFlags ?
+           *pOutFlags : 0, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok"));
+
+#if SQLITE_OS_WINCE
+  if( isReadWrite && eType==SQLITE_OPEN_MAIN_DB
+       && (rc = winceCreateLock(zName, pFile))!=SQLITE_OK
+  ){
+    osCloseHandle(h);
+    sqlite3_free(zConverted);
+    sqlite3_free(zTmpname);
+    OSTRACE(("OPEN-CE-LOCK name=%s, rc=%s\n", zName, sqlite3ErrName(rc)));
+    return rc;
+  }
+  if( isTemp ){
+    pFile->zDeleteOnClose = zConverted;
+  }else
+#endif
+  {
+    sqlite3_free(zConverted);
+  }
+
+  sqlite3_free(zTmpname);
+  pFile->pMethod = &winIoMethod;
+  pFile->pVfs = pVfs;
+  pFile->h = h;
+  if( isReadonly ){
+    pFile->ctrlFlags |= WINFILE_RDONLY;
+  }
+  if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){
+    pFile->ctrlFlags |= WINFILE_PSOW;
+  }
+  pFile->lastErrno = NO_ERROR;
+  pFile->zPath = zName;
+#if SQLITE_MAX_MMAP_SIZE>0
+  pFile->hMap = NULL;
+  pFile->pMapRegion = 0;
+  pFile->mmapSize = 0;
+  pFile->mmapSizeActual = 0;
+  pFile->mmapSizeMax = sqlite3GlobalConfig.szMmap;
+#endif
+
+  OpenCounter(+1);
+  return rc;
+}
+
+/*
+** Delete the named file.
+**
+** Note that Windows does not allow a file to be deleted if some other
+** process has it open.  Sometimes a virus scanner or indexing program
+** will open a journal file shortly after it is created in order to do
+** whatever it does.  While this other process is holding the
+** file open, we will be unable to delete it.  To work around this
+** problem, we delay 100 milliseconds and try to delete again.  Up
+** to MX_DELETION_ATTEMPTs deletion attempts are run before giving
+** up and returning an error.
+*/
+static int winDelete(
+  sqlite3_vfs *pVfs,          /* Not used on win32 */
+  const char *zFilename,      /* Name of file to delete */
+  int syncDir                 /* Not used on win32 */
+){
+  int cnt = 0;
+  int rc;
+  DWORD attr;
+  DWORD lastErrno = 0;
+  void *zConverted;
+  UNUSED_PARAMETER(pVfs);
+  UNUSED_PARAMETER(syncDir);
+
+  SimulateIOError(return SQLITE_IOERR_DELETE);
+  OSTRACE(("DELETE name=%s, syncDir=%d\n", zFilename, syncDir));
+
+  zConverted = winConvertFromUtf8Filename(zFilename);
+  if( zConverted==0 ){
+    OSTRACE(("DELETE name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename));
+    return SQLITE_IOERR_NOMEM;
+  }
+  if( osIsNT() ){
+    do {
+#if SQLITE_OS_WINRT
+      WIN32_FILE_ATTRIBUTE_DATA sAttrData;
+      memset(&sAttrData, 0, sizeof(sAttrData));
+      if ( osGetFileAttributesExW(zConverted, GetFileExInfoStandard,
+                                  &sAttrData) ){
+        attr = sAttrData.dwFileAttributes;
+      }else{
+        lastErrno = osGetLastError();
+        if( lastErrno==ERROR_FILE_NOT_FOUND
+         || lastErrno==ERROR_PATH_NOT_FOUND ){
+          rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */
+        }else{
+          rc = SQLITE_ERROR;
+        }
+        break;
+      }
+#else
+      attr = osGetFileAttributesW(zConverted);
+#endif
+      if ( attr==INVALID_FILE_ATTRIBUTES ){
+        lastErrno = osGetLastError();
+        if( lastErrno==ERROR_FILE_NOT_FOUND
+         || lastErrno==ERROR_PATH_NOT_FOUND ){
+          rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */
+        }else{
+          rc = SQLITE_ERROR;
+        }
+        break;
+      }
+      if ( attr&FILE_ATTRIBUTE_DIRECTORY ){
+        rc = SQLITE_ERROR; /* Files only. */
+        break;
+      }
+      if ( osDeleteFileW(zConverted) ){
+        rc = SQLITE_OK; /* Deleted OK. */
+        break;
+      }
+      if ( !winRetryIoerr(&cnt, &lastErrno) ){
+        rc = SQLITE_ERROR; /* No more retries. */
+        break;
+      }
+    } while(1);
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    do {
+      attr = osGetFileAttributesA(zConverted);
+      if ( attr==INVALID_FILE_ATTRIBUTES ){
+        lastErrno = osGetLastError();
+        if( lastErrno==ERROR_FILE_NOT_FOUND
+         || lastErrno==ERROR_PATH_NOT_FOUND ){
+          rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */
+        }else{
+          rc = SQLITE_ERROR;
+        }
+        break;
+      }
+      if ( attr&FILE_ATTRIBUTE_DIRECTORY ){
+        rc = SQLITE_ERROR; /* Files only. */
+        break;
+      }
+      if ( osDeleteFileA(zConverted) ){
+        rc = SQLITE_OK; /* Deleted OK. */
+        break;
+      }
+      if ( !winRetryIoerr(&cnt, &lastErrno) ){
+        rc = SQLITE_ERROR; /* No more retries. */
+        break;
+      }
+    } while(1);
+  }
+#endif
+  if( rc && rc!=SQLITE_IOERR_DELETE_NOENT ){
+    rc = winLogError(SQLITE_IOERR_DELETE, lastErrno, "winDelete", zFilename);
+  }else{
+    winLogIoerr(cnt);
+  }
+  sqlite3_free(zConverted);
+  OSTRACE(("DELETE name=%s, rc=%s\n", zFilename, sqlite3ErrName(rc)));
+  return rc;
+}
+
+/*
+** Check the existence and status of a file.
+*/
+static int winAccess(
+  sqlite3_vfs *pVfs,         /* Not used on win32 */
+  const char *zFilename,     /* Name of file to check */
+  int flags,                 /* Type of test to make on this file */
+  int *pResOut               /* OUT: Result */
+){
+  DWORD attr;
+  int rc = 0;
+  DWORD lastErrno = 0;
+  void *zConverted;
+  UNUSED_PARAMETER(pVfs);
+
+  SimulateIOError( return SQLITE_IOERR_ACCESS; );
+  OSTRACE(("ACCESS name=%s, flags=%x, pResOut=%p\n",
+           zFilename, flags, pResOut));
+
+  zConverted = winConvertFromUtf8Filename(zFilename);
+  if( zConverted==0 ){
+    OSTRACE(("ACCESS name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename));
+    return SQLITE_IOERR_NOMEM;
+  }
+  if( osIsNT() ){
+    int cnt = 0;
+    WIN32_FILE_ATTRIBUTE_DATA sAttrData;
+    memset(&sAttrData, 0, sizeof(sAttrData));
+    while( !(rc = osGetFileAttributesExW((LPCWSTR)zConverted,
+                             GetFileExInfoStandard,
+                             &sAttrData)) && winRetryIoerr(&cnt, &lastErrno) ){}
+    if( rc ){
+      /* For an SQLITE_ACCESS_EXISTS query, treat a zero-length file
+      ** as if it does not exist.
+      */
+      if(    flags==SQLITE_ACCESS_EXISTS
+          && sAttrData.nFileSizeHigh==0
+          && sAttrData.nFileSizeLow==0 ){
+        attr = INVALID_FILE_ATTRIBUTES;
+      }else{
+        attr = sAttrData.dwFileAttributes;
+      }
+    }else{
+      winLogIoerr(cnt);
+      if( lastErrno!=ERROR_FILE_NOT_FOUND && lastErrno!=ERROR_PATH_NOT_FOUND ){
+        sqlite3_free(zConverted);
+        return winLogError(SQLITE_IOERR_ACCESS, lastErrno, "winAccess",
+                           zFilename);
+      }else{
+        attr = INVALID_FILE_ATTRIBUTES;
+      }
+    }
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    attr = osGetFileAttributesA((char*)zConverted);
+  }
+#endif
+  sqlite3_free(zConverted);
+  switch( flags ){
+    case SQLITE_ACCESS_READ:
+    case SQLITE_ACCESS_EXISTS:
+      rc = attr!=INVALID_FILE_ATTRIBUTES;
+      break;
+    case SQLITE_ACCESS_READWRITE:
+      rc = attr!=INVALID_FILE_ATTRIBUTES &&
+             (attr & FILE_ATTRIBUTE_READONLY)==0;
+      break;
+    default:
+      assert(!"Invalid flags argument");
+  }
+  *pResOut = rc;
+  OSTRACE(("ACCESS name=%s, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n",
+           zFilename, pResOut, *pResOut));
+  return SQLITE_OK;
+}
+
+/*
+** Returns non-zero if the specified path name starts with a drive letter
+** followed by a colon character.
+*/
+static BOOL winIsDriveLetterAndColon(
+  const char *zPathname
+){
+  return ( sqlite3Isalpha(zPathname[0]) && zPathname[1]==':' );
+}
+
+/*
+** Returns non-zero if the specified path name should be used verbatim.  If
+** non-zero is returned from this function, the calling function must simply
+** use the provided path name verbatim -OR- resolve it into a full path name
+** using the GetFullPathName Win32 API function (if available).
+*/
+static BOOL winIsVerbatimPathname(
+  const char *zPathname
+){
+  /*
+  ** If the path name starts with a forward slash or a backslash, it is either
+  ** a legal UNC name, a volume relative path, or an absolute path name in the
+  ** "Unix" format on Windows.  There is no easy way to differentiate between
+  ** the final two cases; therefore, we return the safer return value of TRUE
+  ** so that callers of this function will simply use it verbatim.
+  */
+  if ( winIsDirSep(zPathname[0]) ){
+    return TRUE;
+  }
+
+  /*
+  ** If the path name starts with a letter and a colon it is either a volume
+  ** relative path or an absolute path.  Callers of this function must not
+  ** attempt to treat it as a relative path name (i.e. they should simply use
+  ** it verbatim).
+  */
+  if ( winIsDriveLetterAndColon(zPathname) ){
+    return TRUE;
+  }
+
+  /*
+  ** If we get to this point, the path name should almost certainly be a purely
+  ** relative one (i.e. not a UNC name, not absolute, and not volume relative).
+  */
+  return FALSE;
+}
+
+/*
+** Turn a relative pathname into a full pathname.  Write the full
+** pathname into zOut[].  zOut[] will be at least pVfs->mxPathname
+** bytes in size.
+*/
+static int winFullPathname(
+  sqlite3_vfs *pVfs,            /* Pointer to vfs object */
+  const char *zRelative,        /* Possibly relative input path */
+  int nFull,                    /* Size of output buffer in bytes */
+  char *zFull                   /* Output buffer */
+){
+
+#if defined(__CYGWIN__)
+  SimulateIOError( return SQLITE_ERROR );
+  UNUSED_PARAMETER(nFull);
+  assert( nFull>=pVfs->mxPathname );
+  if ( sqlite3_data_directory && !winIsVerbatimPathname(zRelative) ){
+    /*
+    ** NOTE: We are dealing with a relative path name and the data
+    **       directory has been set.  Therefore, use it as the basis
+    **       for converting the relative path name to an absolute
+    **       one by prepending the data directory and a slash.
+    */
+    char *zOut = sqlite3MallocZero( pVfs->mxPathname+1 );
+    if( !zOut ){
+      return SQLITE_IOERR_NOMEM;
+    }
+    if( cygwin_conv_path(
+            (osIsNT() ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_A) |
+            CCP_RELATIVE, zRelative, zOut, pVfs->mxPathname+1)<0 ){
+      sqlite3_free(zOut);
+      return winLogError(SQLITE_CANTOPEN_CONVPATH, (DWORD)errno,
+                         "winFullPathname1", zRelative);
+    }else{
+      char *zUtf8 = winConvertToUtf8Filename(zOut);
+      if( !zUtf8 ){
+        sqlite3_free(zOut);
+        return SQLITE_IOERR_NOMEM;
+      }
+      sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s%c%s",
+                       sqlite3_data_directory, winGetDirSep(), zUtf8);
+      sqlite3_free(zUtf8);
+      sqlite3_free(zOut);
+    }
+  }else{
+    char *zOut = sqlite3MallocZero( pVfs->mxPathname+1 );
+    if( !zOut ){
+      return SQLITE_IOERR_NOMEM;
+    }
+    if( cygwin_conv_path(
+            (osIsNT() ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_A),
+            zRelative, zOut, pVfs->mxPathname+1)<0 ){
+      sqlite3_free(zOut);
+      return winLogError(SQLITE_CANTOPEN_CONVPATH, (DWORD)errno,
+                         "winFullPathname2", zRelative);
+    }else{
+      char *zUtf8 = winConvertToUtf8Filename(zOut);
+      if( !zUtf8 ){
+        sqlite3_free(zOut);
+        return SQLITE_IOERR_NOMEM;
+      }
+      sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zUtf8);
+      sqlite3_free(zUtf8);
+      sqlite3_free(zOut);
+    }
+  }
+  return SQLITE_OK;
+#endif
+
+#if (SQLITE_OS_WINCE || SQLITE_OS_WINRT) && !defined(__CYGWIN__)
+  SimulateIOError( return SQLITE_ERROR );
+  /* WinCE has no concept of a relative pathname, or so I am told. */
+  /* WinRT has no way to convert a relative path to an absolute one. */
+  if ( sqlite3_data_directory && !winIsVerbatimPathname(zRelative) ){
+    /*
+    ** NOTE: We are dealing with a relative path name and the data
+    **       directory has been set.  Therefore, use it as the basis
+    **       for converting the relative path name to an absolute
+    **       one by prepending the data directory and a backslash.
+    */
+    sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s%c%s",
+                     sqlite3_data_directory, winGetDirSep(), zRelative);
+  }else{
+    sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zRelative);
+  }
+  return SQLITE_OK;
+#endif
+
+#if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(__CYGWIN__)
+  DWORD nByte;
+  void *zConverted;
+  char *zOut;
+
+  /* If this path name begins with "/X:", where "X" is any alphabetic
+  ** character, discard the initial "/" from the pathname.
+  */
+  if( zRelative[0]=='/' && winIsDriveLetterAndColon(zRelative+1) ){
+    zRelative++;
+  }
+
+  /* It's odd to simulate an io-error here, but really this is just
+  ** using the io-error infrastructure to test that SQLite handles this
+  ** function failing. This function could fail if, for example, the
+  ** current working directory has been unlinked.
+  */
+  SimulateIOError( return SQLITE_ERROR );
+  if ( sqlite3_data_directory && !winIsVerbatimPathname(zRelative) ){
+    /*
+    ** NOTE: We are dealing with a relative path name and the data
+    **       directory has been set.  Therefore, use it as the basis
+    **       for converting the relative path name to an absolute
+    **       one by prepending the data directory and a backslash.
+    */
+    sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s%c%s",
+                     sqlite3_data_directory, winGetDirSep(), zRelative);
+    return SQLITE_OK;
+  }
+  zConverted = winConvertFromUtf8Filename(zRelative);
+  if( zConverted==0 ){
+    return SQLITE_IOERR_NOMEM;
+  }
+  if( osIsNT() ){
+    LPWSTR zTemp;
+    nByte = osGetFullPathNameW((LPCWSTR)zConverted, 0, 0, 0);
+    if( nByte==0 ){
+      sqlite3_free(zConverted);
+      return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(),
+                         "winFullPathname1", zRelative);
+    }
+    nByte += 3;
+    zTemp = sqlite3MallocZero( nByte*sizeof(zTemp[0]) );
+    if( zTemp==0 ){
+      sqlite3_free(zConverted);
+      return SQLITE_IOERR_NOMEM;
+    }
+    nByte = osGetFullPathNameW((LPCWSTR)zConverted, nByte, zTemp, 0);
+    if( nByte==0 ){
+      sqlite3_free(zConverted);
+      sqlite3_free(zTemp);
+      return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(),
+                         "winFullPathname2", zRelative);
+    }
+    sqlite3_free(zConverted);
+    zOut = winUnicodeToUtf8(zTemp);
+    sqlite3_free(zTemp);
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    char *zTemp;
+    nByte = osGetFullPathNameA((char*)zConverted, 0, 0, 0);
+    if( nByte==0 ){
+      sqlite3_free(zConverted);
+      return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(),
+                         "winFullPathname3", zRelative);
+    }
+    nByte += 3;
+    zTemp = sqlite3MallocZero( nByte*sizeof(zTemp[0]) );
+    if( zTemp==0 ){
+      sqlite3_free(zConverted);
+      return SQLITE_IOERR_NOMEM;
+    }
+    nByte = osGetFullPathNameA((char*)zConverted, nByte, zTemp, 0);
+    if( nByte==0 ){
+      sqlite3_free(zConverted);
+      sqlite3_free(zTemp);
+      return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(),
+                         "winFullPathname4", zRelative);
+    }
+    sqlite3_free(zConverted);
+    zOut = sqlite3_win32_mbcs_to_utf8(zTemp);
+    sqlite3_free(zTemp);
+  }
+#endif
+  if( zOut ){
+    sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zOut);
+    sqlite3_free(zOut);
+    return SQLITE_OK;
+  }else{
+    return SQLITE_IOERR_NOMEM;
+  }
+#endif
+}
+
+#ifndef SQLITE_OMIT_LOAD_EXTENSION
+/*
+** Interfaces for opening a shared library, finding entry points
+** within the shared library, and closing the shared library.
+*/
+static void *winDlOpen(sqlite3_vfs *pVfs, const char *zFilename){
+  HANDLE h;
+#if defined(__CYGWIN__)
+  int nFull = pVfs->mxPathname+1;
+  char *zFull = sqlite3MallocZero( nFull );
+  void *zConverted = 0;
+  if( zFull==0 ){
+    OSTRACE(("DLOPEN name=%s, handle=%p\n", zFilename, (void*)0));
+    return 0;
+  }
+  if( winFullPathname(pVfs, zFilename, nFull, zFull)!=SQLITE_OK ){
+    sqlite3_free(zFull);
+    OSTRACE(("DLOPEN name=%s, handle=%p\n", zFilename, (void*)0));
+    return 0;
+  }
+  zConverted = winConvertFromUtf8Filename(zFull);
+  sqlite3_free(zFull);
+#else
+  void *zConverted = winConvertFromUtf8Filename(zFilename);
+  UNUSED_PARAMETER(pVfs);
+#endif
+  if( zConverted==0 ){
+    OSTRACE(("DLOPEN name=%s, handle=%p\n", zFilename, (void*)0));
+    return 0;
+  }
+  if( osIsNT() ){
+#if SQLITE_OS_WINRT
+    h = osLoadPackagedLibrary((LPCWSTR)zConverted, 0);
+#else
+    h = osLoadLibraryW((LPCWSTR)zConverted);
+#endif
+  }
+#ifdef SQLITE_WIN32_HAS_ANSI
+  else{
+    h = osLoadLibraryA((char*)zConverted);
+  }
+#endif
+  OSTRACE(("DLOPEN name=%s, handle=%p\n", zFilename, (void*)h));
+  sqlite3_free(zConverted);
+  return (void*)h;
+}
+static void winDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){
+  UNUSED_PARAMETER(pVfs);
+  winGetLastErrorMsg(osGetLastError(), nBuf, zBufOut);
+}
+static void (*winDlSym(sqlite3_vfs *pVfs,void *pH,const char *zSym))(void){
+  FARPROC proc;
+  UNUSED_PARAMETER(pVfs);
+  proc = osGetProcAddressA((HANDLE)pH, zSym);
+  OSTRACE(("DLSYM handle=%p, symbol=%s, address=%p\n",
+           (void*)pH, zSym, (void*)proc));
+  return (void(*)(void))proc;
+}
+static void winDlClose(sqlite3_vfs *pVfs, void *pHandle){
+  UNUSED_PARAMETER(pVfs);
+  osFreeLibrary((HANDLE)pHandle);
+  OSTRACE(("DLCLOSE handle=%p\n", (void*)pHandle));
+}
+#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
+  #define winDlOpen  0
+  #define winDlError 0
+  #define winDlSym   0
+  #define winDlClose 0
+#endif
+
+
+/*
+** Write up to nBuf bytes of randomness into zBuf.
+*/
+static int winRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
+  int n = 0;
+  UNUSED_PARAMETER(pVfs);
+#if defined(SQLITE_TEST)
+  n = nBuf;
+  memset(zBuf, 0, nBuf);
+#else
+  if( sizeof(SYSTEMTIME)<=nBuf-n ){
+    SYSTEMTIME x;
+    osGetSystemTime(&x);
+    memcpy(&zBuf[n], &x, sizeof(x));
+    n += sizeof(x);
+  }
+  if( sizeof(DWORD)<=nBuf-n ){
+    DWORD pid = osGetCurrentProcessId();
+    memcpy(&zBuf[n], &pid, sizeof(pid));
+    n += sizeof(pid);
+  }
+#if SQLITE_OS_WINRT
+  if( sizeof(ULONGLONG)<=nBuf-n ){
+    ULONGLONG cnt = osGetTickCount64();
+    memcpy(&zBuf[n], &cnt, sizeof(cnt));
+    n += sizeof(cnt);
+  }
+#else
+  if( sizeof(DWORD)<=nBuf-n ){
+    DWORD cnt = osGetTickCount();
+    memcpy(&zBuf[n], &cnt, sizeof(cnt));
+    n += sizeof(cnt);
+  }
+#endif
+  if( sizeof(LARGE_INTEGER)<=nBuf-n ){
+    LARGE_INTEGER i;
+    osQueryPerformanceCounter(&i);
+    memcpy(&zBuf[n], &i, sizeof(i));
+    n += sizeof(i);
+  }
+#endif
+  return n;
+}
+
+
+/*
+** Sleep for a little while.  Return the amount of time slept.
+*/
+static int winSleep(sqlite3_vfs *pVfs, int microsec){
+  sqlite3_win32_sleep((microsec+999)/1000);
+  UNUSED_PARAMETER(pVfs);
+  return ((microsec+999)/1000)*1000;
+}
+
+/*
+** The following variable, if set to a non-zero value, is interpreted as
+** the number of seconds since 1970 and is used to set the result of
+** sqlite3OsCurrentTime() during testing.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_current_time = 0;  /* Fake system time in seconds since 1970. */
+#endif
+
+/*
+** Find the current time (in Universal Coordinated Time).  Write into *piNow
+** the current time and date as a Julian Day number times 86_400_000.  In
+** other words, write into *piNow the number of milliseconds since the Julian
+** epoch of noon in Greenwich on November 24, 4714 B.C according to the
+** proleptic Gregorian calendar.
+**
+** On success, return SQLITE_OK.  Return SQLITE_ERROR if the time and date
+** cannot be found.
+*/
+static int winCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *piNow){
+  /* FILETIME structure is a 64-bit value representing the number of
+     100-nanosecond intervals since January 1, 1601 (= JD 2305813.5).
+  */
+  FILETIME ft;
+  static const sqlite3_int64 winFiletimeEpoch = 23058135*(sqlite3_int64)8640000;
+#ifdef SQLITE_TEST
+  static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
+#endif
+  /* 2^32 - to avoid use of LL and warnings in gcc */
+  static const sqlite3_int64 max32BitValue =
+      (sqlite3_int64)2000000000 + (sqlite3_int64)2000000000 +
+      (sqlite3_int64)294967296;
+
+#if SQLITE_OS_WINCE
+  SYSTEMTIME time;
+  osGetSystemTime(&time);
+  /* if SystemTimeToFileTime() fails, it returns zero. */
+  if (!osSystemTimeToFileTime(&time,&ft)){
+    return SQLITE_ERROR;
+  }
+#else
+  osGetSystemTimeAsFileTime( &ft );
+#endif
+
+  *piNow = winFiletimeEpoch +
+            ((((sqlite3_int64)ft.dwHighDateTime)*max32BitValue) +
+               (sqlite3_int64)ft.dwLowDateTime)/(sqlite3_int64)10000;
+
+#ifdef SQLITE_TEST
+  if( sqlite3_current_time ){
+    *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch;
+  }
+#endif
+  UNUSED_PARAMETER(pVfs);
+  return SQLITE_OK;
+}
+
+/*
+** Find the current time (in Universal Coordinated Time).  Write the
+** current time and date as a Julian Day number into *prNow and
+** return 0.  Return 1 if the time and date cannot be found.
+*/
+static int winCurrentTime(sqlite3_vfs *pVfs, double *prNow){
+  int rc;
+  sqlite3_int64 i;
+  rc = winCurrentTimeInt64(pVfs, &i);
+  if( !rc ){
+    *prNow = i/86400000.0;
+  }
+  return rc;
+}
+
+/*
+** The idea is that this function works like a combination of
+** GetLastError() and FormatMessage() on Windows (or errno and
+** strerror_r() on Unix). After an error is returned by an OS
+** function, SQLite calls this function with zBuf pointing to
+** a buffer of nBuf bytes. The OS layer should populate the
+** buffer with a nul-terminated UTF-8 encoded error message
+** describing the last IO error to have occurred within the calling
+** thread.
+**
+** If the error message is too large for the supplied buffer,
+** it should be truncated. The return value of xGetLastError
+** is zero if the error message fits in the buffer, or non-zero
+** otherwise (if the message was truncated). If non-zero is returned,
+** then it is not necessary to include the nul-terminator character
+** in the output buffer.
+**
+** Not supplying an error message will have no adverse effect
+** on SQLite. It is fine to have an implementation that never
+** returns an error message:
+**
+**   int xGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
+**     assert(zBuf[0]=='\0');
+**     return 0;
+**   }
+**
+** However if an error message is supplied, it will be incorporated
+** by sqlite into the error message available to the user using
+** sqlite3_errmsg(), possibly making IO errors easier to debug.
+*/
+static int winGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
+  UNUSED_PARAMETER(pVfs);
+  return winGetLastErrorMsg(osGetLastError(), nBuf, zBuf);
+}
+
+/*
+** Initialize and deinitialize the operating system interface.
+*/
+SQLITE_API int sqlite3_os_init(void){
+  static sqlite3_vfs winVfs = {
+    3,                   /* iVersion */
+    sizeof(winFile),     /* szOsFile */
+    SQLITE_WIN32_MAX_PATH_BYTES, /* mxPathname */
+    0,                   /* pNext */
+    "win32",             /* zName */
+    0,                   /* pAppData */
+    winOpen,             /* xOpen */
+    winDelete,           /* xDelete */
+    winAccess,           /* xAccess */
+    winFullPathname,     /* xFullPathname */
+    winDlOpen,           /* xDlOpen */
+    winDlError,          /* xDlError */
+    winDlSym,            /* xDlSym */
+    winDlClose,          /* xDlClose */
+    winRandomness,       /* xRandomness */
+    winSleep,            /* xSleep */
+    winCurrentTime,      /* xCurrentTime */
+    winGetLastError,     /* xGetLastError */
+    winCurrentTimeInt64, /* xCurrentTimeInt64 */
+    winSetSystemCall,    /* xSetSystemCall */
+    winGetSystemCall,    /* xGetSystemCall */
+    winNextSystemCall,   /* xNextSystemCall */
+  };
+#if defined(SQLITE_WIN32_HAS_WIDE)
+  static sqlite3_vfs winLongPathVfs = {
+    3,                   /* iVersion */
+    sizeof(winFile),     /* szOsFile */
+    SQLITE_WINNT_MAX_PATH_BYTES, /* mxPathname */
+    0,                   /* pNext */
+    "win32-longpath",    /* zName */
+    0,                   /* pAppData */
+    winOpen,             /* xOpen */
+    winDelete,           /* xDelete */
+    winAccess,           /* xAccess */
+    winFullPathname,     /* xFullPathname */
+    winDlOpen,           /* xDlOpen */
+    winDlError,          /* xDlError */
+    winDlSym,            /* xDlSym */
+    winDlClose,          /* xDlClose */
+    winRandomness,       /* xRandomness */
+    winSleep,            /* xSleep */
+    winCurrentTime,      /* xCurrentTime */
+    winGetLastError,     /* xGetLastError */
+    winCurrentTimeInt64, /* xCurrentTimeInt64 */
+    winSetSystemCall,    /* xSetSystemCall */
+    winGetSystemCall,    /* xGetSystemCall */
+    winNextSystemCall,   /* xNextSystemCall */
+  };
+#endif
+
+  /* Double-check that the aSyscall[] array has been constructed
+  ** correctly.  See ticket [bb3a86e890c8e96ab] */
+  assert( ArraySize(aSyscall)==77 );
+
+  /* get memory map allocation granularity */
+  memset(&winSysInfo, 0, sizeof(SYSTEM_INFO));
+#if SQLITE_OS_WINRT
+  osGetNativeSystemInfo(&winSysInfo);
+#else
+  osGetSystemInfo(&winSysInfo);
+#endif
+  assert( winSysInfo.dwAllocationGranularity>0 );
+  assert( winSysInfo.dwPageSize>0 );
+
+  sqlite3_vfs_register(&winVfs, 1);
+
+#if defined(SQLITE_WIN32_HAS_WIDE)
+  sqlite3_vfs_register(&winLongPathVfs, 0);
+#endif
+
+  return SQLITE_OK;
+}
+
+SQLITE_API int sqlite3_os_end(void){
+#if SQLITE_OS_WINRT
+  if( sleepObj!=NULL ){
+    osCloseHandle(sleepObj);
+    sleepObj = NULL;
+  }
+#endif
+  return SQLITE_OK;
+}
+
+#endif /* SQLITE_OS_WIN */
+
+/************** End of os_win.c **********************************************/
+/************** Begin file bitvec.c ******************************************/
+/*
+** 2008 February 16
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file implements an object that represents a fixed-length
+** bitmap.  Bits are numbered starting with 1.
+**
+** A bitmap is used to record which pages of a database file have been
+** journalled during a transaction, or which pages have the "dont-write"
+** property.  Usually only a few pages are meet either condition.
+** So the bitmap is usually sparse and has low cardinality.
+** But sometimes (for example when during a DROP of a large table) most
+** or all of the pages in a database can get journalled.  In those cases, 
+** the bitmap becomes dense with high cardinality.  The algorithm needs 
+** to handle both cases well.
+**
+** The size of the bitmap is fixed when the object is created.
+**
+** All bits are clear when the bitmap is created.  Individual bits
+** may be set or cleared one at a time.
+**
+** Test operations are about 100 times more common that set operations.
+** Clear operations are exceedingly rare.  There are usually between
+** 5 and 500 set operations per Bitvec object, though the number of sets can
+** sometimes grow into tens of thousands or larger.  The size of the
+** Bitvec object is the number of pages in the database file at the
+** start of a transaction, and is thus usually less than a few thousand,
+** but can be as large as 2 billion for a really big database.
+*/
+
+/* Size of the Bitvec structure in bytes. */
+#define BITVEC_SZ        512
+
+/* Round the union size down to the nearest pointer boundary, since that's how 
+** it will be aligned within the Bitvec struct. */
+#define BITVEC_USIZE     (((BITVEC_SZ-(3*sizeof(u32)))/sizeof(Bitvec*))*sizeof(Bitvec*))
+
+/* Type of the array "element" for the bitmap representation. 
+** Should be a power of 2, and ideally, evenly divide into BITVEC_USIZE. 
+** Setting this to the "natural word" size of your CPU may improve
+** performance. */
+#define BITVEC_TELEM     u8
+/* Size, in bits, of the bitmap element. */
+#define BITVEC_SZELEM    8
+/* Number of elements in a bitmap array. */
+#define BITVEC_NELEM     (BITVEC_USIZE/sizeof(BITVEC_TELEM))
+/* Number of bits in the bitmap array. */
+#define BITVEC_NBIT      (BITVEC_NELEM*BITVEC_SZELEM)
+
+/* Number of u32 values in hash table. */
+#define BITVEC_NINT      (BITVEC_USIZE/sizeof(u32))
+/* Maximum number of entries in hash table before 
+** sub-dividing and re-hashing. */
+#define BITVEC_MXHASH    (BITVEC_NINT/2)
+/* Hashing function for the aHash representation.
+** Empirical testing showed that the *37 multiplier 
+** (an arbitrary prime)in the hash function provided 
+** no fewer collisions than the no-op *1. */
+#define BITVEC_HASH(X)   (((X)*1)%BITVEC_NINT)
+
+#define BITVEC_NPTR      (BITVEC_USIZE/sizeof(Bitvec *))
+
+
+/*
+** A bitmap is an instance of the following structure.
+**
+** This bitmap records the existence of zero or more bits
+** with values between 1 and iSize, inclusive.
+**
+** There are three possible representations of the bitmap.
+** If iSize<=BITVEC_NBIT, then Bitvec.u.aBitmap[] is a straight
+** bitmap.  The least significant bit is bit 1.
+**
+** If iSize>BITVEC_NBIT and iDivisor==0 then Bitvec.u.aHash[] is
+** a hash table that will hold up to BITVEC_MXHASH distinct values.
+**
+** Otherwise, the value i is redirected into one of BITVEC_NPTR
+** sub-bitmaps pointed to by Bitvec.u.apSub[].  Each subbitmap
+** handles up to iDivisor separate values of i.  apSub[0] holds
+** values between 1 and iDivisor.  apSub[1] holds values between
+** iDivisor+1 and 2*iDivisor.  apSub[N] holds values between
+** N*iDivisor+1 and (N+1)*iDivisor.  Each subbitmap is normalized
+** to hold deal with values between 1 and iDivisor.
+*/
+struct Bitvec {
+  u32 iSize;      /* Maximum bit index.  Max iSize is 4,294,967,296. */
+  u32 nSet;       /* Number of bits that are set - only valid for aHash
+                  ** element.  Max is BITVEC_NINT.  For BITVEC_SZ of 512,
+                  ** this would be 125. */
+  u32 iDivisor;   /* Number of bits handled by each apSub[] entry. */
+                  /* Should >=0 for apSub element. */
+                  /* Max iDivisor is max(u32) / BITVEC_NPTR + 1.  */
+                  /* For a BITVEC_SZ of 512, this would be 34,359,739. */
+  union {
+    BITVEC_TELEM aBitmap[BITVEC_NELEM];    /* Bitmap representation */
+    u32 aHash[BITVEC_NINT];      /* Hash table representation */
+    Bitvec *apSub[BITVEC_NPTR];  /* Recursive representation */
+  } u;
+};
+
+/*
+** Create a new bitmap object able to handle bits between 0 and iSize,
+** inclusive.  Return a pointer to the new object.  Return NULL if 
+** malloc fails.
+*/
+SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32 iSize){
+  Bitvec *p;
+  assert( sizeof(*p)==BITVEC_SZ );
+  p = sqlite3MallocZero( sizeof(*p) );
+  if( p ){
+    p->iSize = iSize;
+  }
+  return p;
+}
+
+/*
+** Check to see if the i-th bit is set.  Return true or false.
+** If p is NULL (if the bitmap has not been created) or if
+** i is out of range, then return false.
+*/
+SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){
+  if( p==0 ) return 0;
+  if( i>p->iSize || i==0 ) return 0;
+  i--;
+  while( p->iDivisor ){
+    u32 bin = i/p->iDivisor;
+    i = i%p->iDivisor;
+    p = p->u.apSub[bin];
+    if (!p) {
+      return 0;
+    }
+  }
+  if( p->iSize<=BITVEC_NBIT ){
+    return (p->u.aBitmap[i/BITVEC_SZELEM] & (1<<(i&(BITVEC_SZELEM-1))))!=0;
+  } else{
+    u32 h = BITVEC_HASH(i++);
+    while( p->u.aHash[h] ){
+      if( p->u.aHash[h]==i ) return 1;
+      h = (h+1) % BITVEC_NINT;
+    }
+    return 0;
+  }
+}
+
+/*
+** Set the i-th bit.  Return 0 on success and an error code if
+** anything goes wrong.
+**
+** This routine might cause sub-bitmaps to be allocated.  Failing
+** to get the memory needed to hold the sub-bitmap is the only
+** that can go wrong with an insert, assuming p and i are valid.
+**
+** The calling function must ensure that p is a valid Bitvec object
+** and that the value for "i" is within range of the Bitvec object.
+** Otherwise the behavior is undefined.
+*/
+SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec *p, u32 i){
+  u32 h;
+  if( p==0 ) return SQLITE_OK;
+  assert( i>0 );
+  assert( i<=p->iSize );
+  i--;
+  while((p->iSize > BITVEC_NBIT) && p->iDivisor) {
+    u32 bin = i/p->iDivisor;
+    i = i%p->iDivisor;
+    if( p->u.apSub[bin]==0 ){
+      p->u.apSub[bin] = sqlite3BitvecCreate( p->iDivisor );
+      if( p->u.apSub[bin]==0 ) return SQLITE_NOMEM;
+    }
+    p = p->u.apSub[bin];
+  }
+  if( p->iSize<=BITVEC_NBIT ){
+    p->u.aBitmap[i/BITVEC_SZELEM] |= 1 << (i&(BITVEC_SZELEM-1));
+    return SQLITE_OK;
+  }
+  h = BITVEC_HASH(i++);
+  /* if there wasn't a hash collision, and this doesn't */
+  /* completely fill the hash, then just add it without */
+  /* worring about sub-dividing and re-hashing. */
+  if( !p->u.aHash[h] ){
+    if (p->nSet<(BITVEC_NINT-1)) {
+      goto bitvec_set_end;
+    } else {
+      goto bitvec_set_rehash;
+    }
+  }
+  /* there was a collision, check to see if it's already */
+  /* in hash, if not, try to find a spot for it */
+  do {
+    if( p->u.aHash[h]==i ) return SQLITE_OK;
+    h++;
+    if( h>=BITVEC_NINT ) h = 0;
+  } while( p->u.aHash[h] );
+  /* we didn't find it in the hash.  h points to the first */
+  /* available free spot. check to see if this is going to */
+  /* make our hash too "full".  */
+bitvec_set_rehash:
+  if( p->nSet>=BITVEC_MXHASH ){
+    unsigned int j;
+    int rc;
+    u32 *aiValues = sqlite3StackAllocRaw(0, sizeof(p->u.aHash));
+    if( aiValues==0 ){
+      return SQLITE_NOMEM;
+    }else{
+      memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash));
+      memset(p->u.apSub, 0, sizeof(p->u.apSub));
+      p->iDivisor = (p->iSize + BITVEC_NPTR - 1)/BITVEC_NPTR;
+      rc = sqlite3BitvecSet(p, i);
+      for(j=0; j<BITVEC_NINT; j++){
+        if( aiValues[j] ) rc |= sqlite3BitvecSet(p, aiValues[j]);
+      }
+      sqlite3StackFree(0, aiValues);
+      return rc;
+    }
+  }
+bitvec_set_end:
+  p->nSet++;
+  p->u.aHash[h] = i;
+  return SQLITE_OK;
+}
+
+/*
+** Clear the i-th bit.
+**
+** pBuf must be a pointer to at least BITVEC_SZ bytes of temporary storage
+** that BitvecClear can use to rebuilt its hash table.
+*/
+SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec *p, u32 i, void *pBuf){
+  if( p==0 ) return;
+  assert( i>0 );
+  i--;
+  while( p->iDivisor ){
+    u32 bin = i/p->iDivisor;
+    i = i%p->iDivisor;
+    p = p->u.apSub[bin];
+    if (!p) {
+      return;
+    }
+  }
+  if( p->iSize<=BITVEC_NBIT ){
+    p->u.aBitmap[i/BITVEC_SZELEM] &= ~(1 << (i&(BITVEC_SZELEM-1)));
+  }else{
+    unsigned int j;
+    u32 *aiValues = pBuf;
+    memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash));
+    memset(p->u.aHash, 0, sizeof(p->u.aHash));
+    p->nSet = 0;
+    for(j=0; j<BITVEC_NINT; j++){
+      if( aiValues[j] && aiValues[j]!=(i+1) ){
+        u32 h = BITVEC_HASH(aiValues[j]-1);
+        p->nSet++;
+        while( p->u.aHash[h] ){
+          h++;
+          if( h>=BITVEC_NINT ) h = 0;
+        }
+        p->u.aHash[h] = aiValues[j];
+      }
+    }
+  }
+}
+
+/*
+** Destroy a bitmap object.  Reclaim all memory used.
+*/
+SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec *p){
+  if( p==0 ) return;
+  if( p->iDivisor ){
+    unsigned int i;
+    for(i=0; i<BITVEC_NPTR; i++){
+      sqlite3BitvecDestroy(p->u.apSub[i]);
+    }
+  }
+  sqlite3_free(p);
+}
+
+/*
+** Return the value of the iSize parameter specified when Bitvec *p
+** was created.
+*/
+SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec *p){
+  return p->iSize;
+}
+
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+/*
+** Let V[] be an array of unsigned characters sufficient to hold
+** up to N bits.  Let I be an integer between 0 and N.  0<=I<N.
+** Then the following macros can be used to set, clear, or test
+** individual bits within V.
+*/
+#define SETBIT(V,I)      V[I>>3] |= (1<<(I&7))
+#define CLEARBIT(V,I)    V[I>>3] &= ~(1<<(I&7))
+#define TESTBIT(V,I)     (V[I>>3]&(1<<(I&7)))!=0
+
+/*
+** This routine runs an extensive test of the Bitvec code.
+**
+** The input is an array of integers that acts as a program
+** to test the Bitvec.  The integers are opcodes followed
+** by 0, 1, or 3 operands, depending on the opcode.  Another
+** opcode follows immediately after the last operand.
+**
+** There are 6 opcodes numbered from 0 through 5.  0 is the
+** "halt" opcode and causes the test to end.
+**
+**    0          Halt and return the number of errors
+**    1 N S X    Set N bits beginning with S and incrementing by X
+**    2 N S X    Clear N bits beginning with S and incrementing by X
+**    3 N        Set N randomly chosen bits
+**    4 N        Clear N randomly chosen bits
+**    5 N S X    Set N bits from S increment X in array only, not in bitvec
+**
+** The opcodes 1 through 4 perform set and clear operations are performed
+** on both a Bitvec object and on a linear array of bits obtained from malloc.
+** Opcode 5 works on the linear array only, not on the Bitvec.
+** Opcode 5 is used to deliberately induce a fault in order to
+** confirm that error detection works.
+**
+** At the conclusion of the test the linear array is compared
+** against the Bitvec object.  If there are any differences,
+** an error is returned.  If they are the same, zero is returned.
+**
+** If a memory allocation error occurs, return -1.
+*/
+SQLITE_PRIVATE int sqlite3BitvecBuiltinTest(int sz, int *aOp){
+  Bitvec *pBitvec = 0;
+  unsigned char *pV = 0;
+  int rc = -1;
+  int i, nx, pc, op;
+  void *pTmpSpace;
+
+  /* Allocate the Bitvec to be tested and a linear array of
+  ** bits to act as the reference */
+  pBitvec = sqlite3BitvecCreate( sz );
+  pV = sqlite3MallocZero( (sz+7)/8 + 1 );
+  pTmpSpace = sqlite3_malloc(BITVEC_SZ);
+  if( pBitvec==0 || pV==0 || pTmpSpace==0  ) goto bitvec_end;
+
+  /* NULL pBitvec tests */
+  sqlite3BitvecSet(0, 1);
+  sqlite3BitvecClear(0, 1, pTmpSpace);
+
+  /* Run the program */
+  pc = 0;
+  while( (op = aOp[pc])!=0 ){
+    switch( op ){
+      case 1:
+      case 2:
+      case 5: {
+        nx = 4;
+        i = aOp[pc+2] - 1;
+        aOp[pc+2] += aOp[pc+3];
+        break;
+      }
+      case 3:
+      case 4: 
+      default: {
+        nx = 2;
+        sqlite3_randomness(sizeof(i), &i);
+        break;
+      }
+    }
+    if( (--aOp[pc+1]) > 0 ) nx = 0;
+    pc += nx;
+    i = (i & 0x7fffffff)%sz;
+    if( (op & 1)!=0 ){
+      SETBIT(pV, (i+1));
+      if( op!=5 ){
+        if( sqlite3BitvecSet(pBitvec, i+1) ) goto bitvec_end;
+      }
+    }else{
+      CLEARBIT(pV, (i+1));
+      sqlite3BitvecClear(pBitvec, i+1, pTmpSpace);
+    }
+  }
+
+  /* Test to make sure the linear array exactly matches the
+  ** Bitvec object.  Start with the assumption that they do
+  ** match (rc==0).  Change rc to non-zero if a discrepancy
+  ** is found.
+  */
+  rc = sqlite3BitvecTest(0,0) + sqlite3BitvecTest(pBitvec, sz+1)
+          + sqlite3BitvecTest(pBitvec, 0)
+          + (sqlite3BitvecSize(pBitvec) - sz);
+  for(i=1; i<=sz; i++){
+    if(  (TESTBIT(pV,i))!=sqlite3BitvecTest(pBitvec,i) ){
+      rc = i;
+      break;
+    }
+  }
+
+  /* Free allocated structure */
+bitvec_end:
+  sqlite3_free(pTmpSpace);
+  sqlite3_free(pV);
+  sqlite3BitvecDestroy(pBitvec);
+  return rc;
+}
+#endif /* SQLITE_OMIT_BUILTIN_TEST */
+
+/************** End of bitvec.c **********************************************/
+/************** Begin file pcache.c ******************************************/
+/*
+** 2008 August 05
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file implements that page cache.
+*/
+
+/*
+** A complete page cache is an instance of this structure.
+*/
+struct PCache {
+  PgHdr *pDirty, *pDirtyTail;         /* List of dirty pages in LRU order */
+  PgHdr *pSynced;                     /* Last synced page in dirty page list */
+  int nRef;                           /* Number of referenced pages */
+  int szCache;                        /* Configured cache size */
+  int szPage;                         /* Size of every page in this cache */
+  int szExtra;                        /* Size of extra space for each page */
+  u8 bPurgeable;                      /* True if pages are on backing store */
+  u8 eCreate;                         /* eCreate value for for xFetch() */
+  int (*xStress)(void*,PgHdr*);       /* Call to try make a page clean */
+  void *pStress;                      /* Argument to xStress */
+  sqlite3_pcache *pCache;             /* Pluggable cache module */
+  PgHdr *pPage1;                      /* Reference to page 1 */
+};
+
+/********************************** Linked List Management ********************/
+
+/* Allowed values for second argument to pcacheManageDirtyList() */
+#define PCACHE_DIRTYLIST_REMOVE   1    /* Remove pPage from dirty list */
+#define PCACHE_DIRTYLIST_ADD      2    /* Add pPage to the dirty list */
+#define PCACHE_DIRTYLIST_FRONT    3    /* Move pPage to the front of the list */
+
+/*
+** Manage pPage's participation on the dirty list.  Bits of the addRemove
+** argument determines what operation to do.  The 0x01 bit means first
+** remove pPage from the dirty list.  The 0x02 means add pPage back to
+** the dirty list.  Doing both moves pPage to the front of the dirty list.
+*/
+static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){
+  PCache *p = pPage->pCache;
+
+  if( addRemove & PCACHE_DIRTYLIST_REMOVE ){
+    assert( pPage->pDirtyNext || pPage==p->pDirtyTail );
+    assert( pPage->pDirtyPrev || pPage==p->pDirty );
+  
+    /* Update the PCache1.pSynced variable if necessary. */
+    if( p->pSynced==pPage ){
+      PgHdr *pSynced = pPage->pDirtyPrev;
+      while( pSynced && (pSynced->flags&PGHDR_NEED_SYNC) ){
+        pSynced = pSynced->pDirtyPrev;
+      }
+      p->pSynced = pSynced;
+    }
+  
+    if( pPage->pDirtyNext ){
+      pPage->pDirtyNext->pDirtyPrev = pPage->pDirtyPrev;
+    }else{
+      assert( pPage==p->pDirtyTail );
+      p->pDirtyTail = pPage->pDirtyPrev;
+    }
+    if( pPage->pDirtyPrev ){
+      pPage->pDirtyPrev->pDirtyNext = pPage->pDirtyNext;
+    }else{
+      assert( pPage==p->pDirty );
+      p->pDirty = pPage->pDirtyNext;
+      if( p->pDirty==0 && p->bPurgeable ){
+        assert( p->eCreate==1 );
+        p->eCreate = 2;
+      }
+    }
+    pPage->pDirtyNext = 0;
+    pPage->pDirtyPrev = 0;
+  }
+  if( addRemove & PCACHE_DIRTYLIST_ADD ){
+    assert( pPage->pDirtyNext==0 && pPage->pDirtyPrev==0 && p->pDirty!=pPage );
+  
+    pPage->pDirtyNext = p->pDirty;
+    if( pPage->pDirtyNext ){
+      assert( pPage->pDirtyNext->pDirtyPrev==0 );
+      pPage->pDirtyNext->pDirtyPrev = pPage;
+    }else{
+      p->pDirtyTail = pPage;
+      if( p->bPurgeable ){
+        assert( p->eCreate==2 );
+        p->eCreate = 1;
+      }
+    }
+    p->pDirty = pPage;
+    if( !p->pSynced && 0==(pPage->flags&PGHDR_NEED_SYNC) ){
+      p->pSynced = pPage;
+    }
+  }
+}
+
+/*
+** Wrapper around the pluggable caches xUnpin method. If the cache is
+** being used for an in-memory database, this function is a no-op.
+*/
+static void pcacheUnpin(PgHdr *p){
+  if( p->pCache->bPurgeable ){
+    if( p->pgno==1 ){
+      p->pCache->pPage1 = 0;
+    }
+    sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0);
+  }
+}
+
+/*
+** Compute the number of pages of cache requested.
+*/
+static int numberOfCachePages(PCache *p){
+  if( p->szCache>=0 ){
+    return p->szCache;
+  }else{
+    return (int)((-1024*(i64)p->szCache)/(p->szPage+p->szExtra));
+  }
+}
+
+/*************************************************** General Interfaces ******
+**
+** Initialize and shutdown the page cache subsystem. Neither of these 
+** functions are threadsafe.
+*/
+SQLITE_PRIVATE int sqlite3PcacheInitialize(void){
+  if( sqlite3GlobalConfig.pcache2.xInit==0 ){
+    /* IMPLEMENTATION-OF: R-26801-64137 If the xInit() method is NULL, then the
+    ** built-in default page cache is used instead of the application defined
+    ** page cache. */
+    sqlite3PCacheSetDefault();
+  }
+  return sqlite3GlobalConfig.pcache2.xInit(sqlite3GlobalConfig.pcache2.pArg);
+}
+SQLITE_PRIVATE void sqlite3PcacheShutdown(void){
+  if( sqlite3GlobalConfig.pcache2.xShutdown ){
+    /* IMPLEMENTATION-OF: R-26000-56589 The xShutdown() method may be NULL. */
+    sqlite3GlobalConfig.pcache2.xShutdown(sqlite3GlobalConfig.pcache2.pArg);
+  }
+}
+
+/*
+** Return the size in bytes of a PCache object.
+*/
+SQLITE_PRIVATE int sqlite3PcacheSize(void){ return sizeof(PCache); }
+
+/*
+** Create a new PCache object. Storage space to hold the object
+** has already been allocated and is passed in as the p pointer. 
+** The caller discovers how much space needs to be allocated by 
+** calling sqlite3PcacheSize().
+*/
+SQLITE_PRIVATE int sqlite3PcacheOpen(
+  int szPage,                  /* Size of every page */
+  int szExtra,                 /* Extra space associated with each page */
+  int bPurgeable,              /* True if pages are on backing store */
+  int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */
+  void *pStress,               /* Argument to xStress */
+  PCache *p                    /* Preallocated space for the PCache */
+){
+  memset(p, 0, sizeof(PCache));
+  p->szPage = 1;
+  p->szExtra = szExtra;
+  p->bPurgeable = bPurgeable;
+  p->eCreate = 2;
+  p->xStress = xStress;
+  p->pStress = pStress;
+  p->szCache = 100;
+  return sqlite3PcacheSetPageSize(p, szPage);
+}
+
+/*
+** Change the page size for PCache object. The caller must ensure that there
+** are no outstanding page references when this function is called.
+*/
+SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
+  assert( pCache->nRef==0 && pCache->pDirty==0 );
+  if( pCache->szPage ){
+    sqlite3_pcache *pNew;
+    pNew = sqlite3GlobalConfig.pcache2.xCreate(
+                szPage, pCache->szExtra + ROUND8(sizeof(PgHdr)),
+                pCache->bPurgeable
+    );
+    if( pNew==0 ) return SQLITE_NOMEM;
+    sqlite3GlobalConfig.pcache2.xCachesize(pNew, numberOfCachePages(pCache));
+    if( pCache->pCache ){
+      sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
+    }
+    pCache->pCache = pNew;
+    pCache->pPage1 = 0;
+    pCache->szPage = szPage;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Try to obtain a page from the cache.
+**
+** This routine returns a pointer to an sqlite3_pcache_page object if
+** such an object is already in cache, or if a new one is created.
+** This routine returns a NULL pointer if the object was not in cache
+** and could not be created.
+**
+** The createFlags should be 0 to check for existing pages and should
+** be 3 (not 1, but 3) to try to create a new page.
+**
+** If the createFlag is 0, then NULL is always returned if the page
+** is not already in the cache.  If createFlag is 1, then a new page
+** is created only if that can be done without spilling dirty pages
+** and without exceeding the cache size limit.
+**
+** The caller needs to invoke sqlite3PcacheFetchFinish() to properly
+** initialize the sqlite3_pcache_page object and convert it into a
+** PgHdr object.  The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish()
+** routines are split this way for performance reasons. When separated
+** they can both (usually) operate without having to push values to
+** the stack on entry and pop them back off on exit, which saves a
+** lot of pushing and popping.
+*/
+SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch(
+  PCache *pCache,       /* Obtain the page from this cache */
+  Pgno pgno,            /* Page number to obtain */
+  int createFlag        /* If true, create page if it does not exist already */
+){
+  int eCreate;
+
+  assert( pCache!=0 );
+  assert( pCache->pCache!=0 );
+  assert( createFlag==3 || createFlag==0 );
+  assert( pgno>0 );
+
+  /* eCreate defines what to do if the page does not exist.
+  **    0     Do not allocate a new page.  (createFlag==0)
+  **    1     Allocate a new page if doing so is inexpensive.
+  **          (createFlag==1 AND bPurgeable AND pDirty)
+  **    2     Allocate a new page even it doing so is difficult.
+  **          (createFlag==1 AND !(bPurgeable AND pDirty)
+  */
+  eCreate = createFlag & pCache->eCreate;
+  assert( eCreate==0 || eCreate==1 || eCreate==2 );
+  assert( createFlag==0 || pCache->eCreate==eCreate );
+  assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) );
+  return sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate);
+}
+
+/*
+** If the sqlite3PcacheFetch() routine is unable to allocate a new
+** page because new clean pages are available for reuse and the cache
+** size limit has been reached, then this routine can be invoked to 
+** try harder to allocate a page.  This routine might invoke the stress
+** callback to spill dirty pages to the journal.  It will then try to
+** allocate the new page and will only fail to allocate a new page on
+** an OOM error.
+**
+** This routine should be invoked only after sqlite3PcacheFetch() fails.
+*/
+SQLITE_PRIVATE int sqlite3PcacheFetchStress(
+  PCache *pCache,                 /* Obtain the page from this cache */
+  Pgno pgno,                      /* Page number to obtain */
+  sqlite3_pcache_page **ppPage    /* Write result here */
+){
+  PgHdr *pPg;
+  if( pCache->eCreate==2 ) return 0;
+
+
+  /* Find a dirty page to write-out and recycle. First try to find a 
+  ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC
+  ** cleared), but if that is not possible settle for any other 
+  ** unreferenced dirty page.
+  */
+  for(pPg=pCache->pSynced; 
+      pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); 
+      pPg=pPg->pDirtyPrev
+  );
+  pCache->pSynced = pPg;
+  if( !pPg ){
+    for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev);
+  }
+  if( pPg ){
+    int rc;
+#ifdef SQLITE_LOG_CACHE_SPILL
+    sqlite3_log(SQLITE_FULL, 
+                "spill page %d making room for %d - cache used: %d/%d",
+                pPg->pgno, pgno,
+                sqlite3GlobalConfig.pcache.xPagecount(pCache->pCache),
+                numberOfCachePages(pCache));
+#endif
+    rc = pCache->xStress(pCache->pStress, pPg);
+    if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){
+      return rc;
+    }
+  }
+  *ppPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2);
+  return *ppPage==0 ? SQLITE_NOMEM : SQLITE_OK;
+}
+
+/*
+** This is a helper routine for sqlite3PcacheFetchFinish()
+**
+** In the uncommon case where the page being fetched has not been
+** initialized, this routine is invoked to do the initialization.
+** This routine is broken out into a separate function since it
+** requires extra stack manipulation that can be avoided in the common
+** case.
+*/
+static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit(
+  PCache *pCache,             /* Obtain the page from this cache */
+  Pgno pgno,                  /* Page number obtained */
+  sqlite3_pcache_page *pPage  /* Page obtained by prior PcacheFetch() call */
+){
+  PgHdr *pPgHdr;
+  assert( pPage!=0 );
+  pPgHdr = (PgHdr*)pPage->pExtra;
+  assert( pPgHdr->pPage==0 );
+ memset(pPgHdr, 0, sizeof(PgHdr));
+  pPgHdr->pPage = pPage;
+  pPgHdr->pData = pPage->pBuf;
+  pPgHdr->pExtra = (void *)&pPgHdr[1];
+  memset(pPgHdr->pExtra, 0, pCache->szExtra);
+  pPgHdr->pCache = pCache;
+  pPgHdr->pgno = pgno;
+  return sqlite3PcacheFetchFinish(pCache,pgno,pPage);
+}
+
+/*
+** This routine converts the sqlite3_pcache_page object returned by
+** sqlite3PcacheFetch() into an initialized PgHdr object.  This routine
+** must be called after sqlite3PcacheFetch() in order to get a usable
+** result.
+*/
+SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish(
+  PCache *pCache,             /* Obtain the page from this cache */
+  Pgno pgno,                  /* Page number obtained */
+  sqlite3_pcache_page *pPage  /* Page obtained by prior PcacheFetch() call */
+){
+  PgHdr *pPgHdr;
+
+  if( pPage==0 ) return 0;
+  pPgHdr = (PgHdr *)pPage->pExtra;
+
+  if( !pPgHdr->pPage ){
+    return pcacheFetchFinishWithInit(pCache, pgno, pPage);
+  }
+  if( 0==pPgHdr->nRef ){
+    pCache->nRef++;
+  }
+  pPgHdr->nRef++;
+  if( pgno==1 ){
+    pCache->pPage1 = pPgHdr;
+  }
+  return pPgHdr;
+}
+
+/*
+** Decrement the reference count on a page. If the page is clean and the
+** reference count drops to 0, then it is made eligible for recycling.
+*/
+SQLITE_PRIVATE void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
+  assert( p->nRef>0 );
+  p->nRef--;
+  if( p->nRef==0 ){
+    p->pCache->nRef--;
+    if( (p->flags&PGHDR_DIRTY)==0 ){
+      pcacheUnpin(p);
+    }else if( p->pDirtyPrev!=0 ){
+      /* Move the page to the head of the dirty list. */
+      pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT);
+    }
+  }
+}
+
+/*
+** Increase the reference count of a supplied page by 1.
+*/
+SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr *p){
+  assert(p->nRef>0);
+  p->nRef++;
+}
+
+/*
+** Drop a page from the cache. There must be exactly one reference to the
+** page. This function deletes that reference, so after it returns the
+** page pointed to by p is invalid.
+*/
+SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr *p){
+  assert( p->nRef==1 );
+  if( p->flags&PGHDR_DIRTY ){
+    pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE);
+  }
+  p->pCache->nRef--;
+  if( p->pgno==1 ){
+    p->pCache->pPage1 = 0;
+  }
+  sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1);
+}
+
+/*
+** Make sure the page is marked as dirty. If it isn't dirty already,
+** make it so.
+*/
+SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){
+  p->flags &= ~PGHDR_DONT_WRITE;
+  assert( p->nRef>0 );
+  if( 0==(p->flags & PGHDR_DIRTY) ){
+    p->flags |= PGHDR_DIRTY;
+    pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD);
+  }
+}
+
+/*
+** Make sure the page is marked as clean. If it isn't clean already,
+** make it so.
+*/
+SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr *p){
+  if( (p->flags & PGHDR_DIRTY) ){
+    pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE);
+    p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC);
+    if( p->nRef==0 ){
+      pcacheUnpin(p);
+    }
+  }
+}
+
+/*
+** Make every page in the cache clean.
+*/
+SQLITE_PRIVATE void sqlite3PcacheCleanAll(PCache *pCache){
+  PgHdr *p;
+  while( (p = pCache->pDirty)!=0 ){
+    sqlite3PcacheMakeClean(p);
+  }
+}
+
+/*
+** Clear the PGHDR_NEED_SYNC flag from all dirty pages.
+*/
+SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *pCache){
+  PgHdr *p;
+  for(p=pCache->pDirty; p; p=p->pDirtyNext){
+    p->flags &= ~PGHDR_NEED_SYNC;
+  }
+  pCache->pSynced = pCache->pDirtyTail;
+}
+
+/*
+** Change the page number of page p to newPgno. 
+*/
+SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr *p, Pgno newPgno){
+  PCache *pCache = p->pCache;
+  assert( p->nRef>0 );
+  assert( newPgno>0 );
+  sqlite3GlobalConfig.pcache2.xRekey(pCache->pCache, p->pPage, p->pgno,newPgno);
+  p->pgno = newPgno;
+  if( (p->flags&PGHDR_DIRTY) && (p->flags&PGHDR_NEED_SYNC) ){
+    pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT);
+  }
+}
+
+/*
+** Drop every cache entry whose page number is greater than "pgno". The
+** caller must ensure that there are no outstanding references to any pages
+** other than page 1 with a page number greater than pgno.
+**
+** If there is a reference to page 1 and the pgno parameter passed to this
+** function is 0, then the data area associated with page 1 is zeroed, but
+** the page object is not dropped.
+*/
+SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){
+  if( pCache->pCache ){
+    PgHdr *p;
+    PgHdr *pNext;
+    for(p=pCache->pDirty; p; p=pNext){
+      pNext = p->pDirtyNext;
+      /* This routine never gets call with a positive pgno except right
+      ** after sqlite3PcacheCleanAll().  So if there are dirty pages,
+      ** it must be that pgno==0.
+      */
+      assert( p->pgno>0 );
+      if( ALWAYS(p->pgno>pgno) ){
+        assert( p->flags&PGHDR_DIRTY );
+        sqlite3PcacheMakeClean(p);
+      }
+    }
+    if( pgno==0 && pCache->pPage1 ){
+      memset(pCache->pPage1->pData, 0, pCache->szPage);
+      pgno = 1;
+    }
+    sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1);
+  }
+}
+
+/*
+** Close a cache.
+*/
+SQLITE_PRIVATE void sqlite3PcacheClose(PCache *pCache){
+  assert( pCache->pCache!=0 );
+  sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
+}
+
+/* 
+** Discard the contents of the cache.
+*/
+SQLITE_PRIVATE void sqlite3PcacheClear(PCache *pCache){
+  sqlite3PcacheTruncate(pCache, 0);
+}
+
+/*
+** Merge two lists of pages connected by pDirty and in pgno order.
+** Do not both fixing the pDirtyPrev pointers.
+*/
+static PgHdr *pcacheMergeDirtyList(PgHdr *pA, PgHdr *pB){
+  PgHdr result, *pTail;
+  pTail = &result;
+  while( pA && pB ){
+    if( pA->pgno<pB->pgno ){
+      pTail->pDirty = pA;
+      pTail = pA;
+      pA = pA->pDirty;
+    }else{
+      pTail->pDirty = pB;
+      pTail = pB;
+      pB = pB->pDirty;
+    }
+  }
+  if( pA ){
+    pTail->pDirty = pA;
+  }else if( pB ){
+    pTail->pDirty = pB;
+  }else{
+    pTail->pDirty = 0;
+  }
+  return result.pDirty;
+}
+
+/*
+** Sort the list of pages in accending order by pgno.  Pages are
+** connected by pDirty pointers.  The pDirtyPrev pointers are
+** corrupted by this sort.
+**
+** Since there cannot be more than 2^31 distinct pages in a database,
+** there cannot be more than 31 buckets required by the merge sorter.
+** One extra bucket is added to catch overflow in case something
+** ever changes to make the previous sentence incorrect.
+*/
+#define N_SORT_BUCKET  32
+static PgHdr *pcacheSortDirtyList(PgHdr *pIn){
+  PgHdr *a[N_SORT_BUCKET], *p;
+  int i;
+  memset(a, 0, sizeof(a));
+  while( pIn ){
+    p = pIn;
+    pIn = p->pDirty;
+    p->pDirty = 0;
+    for(i=0; ALWAYS(i<N_SORT_BUCKET-1); i++){
+      if( a[i]==0 ){
+        a[i] = p;
+        break;
+      }else{
+        p = pcacheMergeDirtyList(a[i], p);
+        a[i] = 0;
+      }
+    }
+    if( NEVER(i==N_SORT_BUCKET-1) ){
+      /* To get here, there need to be 2^(N_SORT_BUCKET) elements in
+      ** the input list.  But that is impossible.
+      */
+      a[i] = pcacheMergeDirtyList(a[i], p);
+    }
+  }
+  p = a[0];
+  for(i=1; i<N_SORT_BUCKET; i++){
+    p = pcacheMergeDirtyList(p, a[i]);
+  }
+  return p;
+}
+
+/*
+** Return a list of all dirty pages in the cache, sorted by page number.
+*/
+SQLITE_PRIVATE PgHdr *sqlite3PcacheDirtyList(PCache *pCache){
+  PgHdr *p;
+  for(p=pCache->pDirty; p; p=p->pDirtyNext){
+    p->pDirty = p->pDirtyNext;
+  }
+  return pcacheSortDirtyList(pCache->pDirty);
+}
+
+/* 
+** Return the total number of referenced pages held by the cache.
+*/
+SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache *pCache){
+  return pCache->nRef;
+}
+
+/*
+** Return the number of references to the page supplied as an argument.
+*/
+SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr *p){
+  return p->nRef;
+}
+
+/* 
+** Return the total number of pages in the cache.
+*/
+SQLITE_PRIVATE int sqlite3PcachePagecount(PCache *pCache){
+  assert( pCache->pCache!=0 );
+  return sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache);
+}
+
+#ifdef SQLITE_TEST
+/*
+** Get the suggested cache-size value.
+*/
+SQLITE_PRIVATE int sqlite3PcacheGetCachesize(PCache *pCache){
+  return numberOfCachePages(pCache);
+}
+#endif
+
+/*
+** Set the suggested cache-size value.
+*/
+SQLITE_PRIVATE void sqlite3PcacheSetCachesize(PCache *pCache, int mxPage){
+  assert( pCache->pCache!=0 );
+  pCache->szCache = mxPage;
+  sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache,
+                                         numberOfCachePages(pCache));
+}
+
+/*
+** Free up as much memory as possible from the page cache.
+*/
+SQLITE_PRIVATE void sqlite3PcacheShrink(PCache *pCache){
+  assert( pCache->pCache!=0 );
+  sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache);
+}
+
+/*
+** Return the size of the header added by this middleware layer
+** in the page-cache hierarchy.
+*/
+SQLITE_PRIVATE int sqlite3HeaderSizePcache(void){ return ROUND8(sizeof(PgHdr)); }
+
+
+#if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG)
+/*
+** For all dirty pages currently in the cache, invoke the specified
+** callback. This is only used if the SQLITE_CHECK_PAGES macro is
+** defined.
+*/
+SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)){
+  PgHdr *pDirty;
+  for(pDirty=pCache->pDirty; pDirty; pDirty=pDirty->pDirtyNext){
+    xIter(pDirty);
+  }
+}
+#endif
+
+/************** End of pcache.c **********************************************/
+/************** Begin file pcache1.c *****************************************/
+/*
+** 2008 November 05
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file implements the default page cache implementation (the
+** sqlite3_pcache interface). It also contains part of the implementation
+** of the SQLITE_CONFIG_PAGECACHE and sqlite3_release_memory() features.
+** If the default page cache implementation is overridden, then neither of
+** these two features are available.
+*/
+
+
+typedef struct PCache1 PCache1;
+typedef struct PgHdr1 PgHdr1;
+typedef struct PgFreeslot PgFreeslot;
+typedef struct PGroup PGroup;
+
+/* Each page cache (or PCache) belongs to a PGroup.  A PGroup is a set 
+** of one or more PCaches that are able to recycle each other's unpinned
+** pages when they are under memory pressure.  A PGroup is an instance of
+** the following object.
+**
+** This page cache implementation works in one of two modes:
+**
+**   (1)  Every PCache is the sole member of its own PGroup.  There is
+**        one PGroup per PCache.
+**
+**   (2)  There is a single global PGroup that all PCaches are a member
+**        of.
+**
+** Mode 1 uses more memory (since PCache instances are not able to rob
+** unused pages from other PCaches) but it also operates without a mutex,
+** and is therefore often faster.  Mode 2 requires a mutex in order to be
+** threadsafe, but recycles pages more efficiently.
+**
+** For mode (1), PGroup.mutex is NULL.  For mode (2) there is only a single
+** PGroup which is the pcache1.grp global variable and its mutex is
+** SQLITE_MUTEX_STATIC_LRU.
+*/
+struct PGroup {
+  sqlite3_mutex *mutex;          /* MUTEX_STATIC_LRU or NULL */
+  unsigned int nMaxPage;         /* Sum of nMax for purgeable caches */
+  unsigned int nMinPage;         /* Sum of nMin for purgeable caches */
+  unsigned int mxPinned;         /* nMaxpage + 10 - nMinPage */
+  unsigned int nCurrentPage;     /* Number of purgeable pages allocated */
+  PgHdr1 *pLruHead, *pLruTail;   /* LRU list of unpinned pages */
+};
+
+/* Each page cache is an instance of the following object.  Every
+** open database file (including each in-memory database and each
+** temporary or transient database) has a single page cache which
+** is an instance of this object.
+**
+** Pointers to structures of this type are cast and returned as 
+** opaque sqlite3_pcache* handles.
+*/
+struct PCache1 {
+  /* Cache configuration parameters. Page size (szPage) and the purgeable
+  ** flag (bPurgeable) are set when the cache is created. nMax may be 
+  ** modified at any time by a call to the pcache1Cachesize() method.
+  ** The PGroup mutex must be held when accessing nMax.
+  */
+  PGroup *pGroup;                     /* PGroup this cache belongs to */
+  int szPage;                         /* Size of allocated pages in bytes */
+  int szExtra;                        /* Size of extra space in bytes */
+  int bPurgeable;                     /* True if cache is purgeable */
+  unsigned int nMin;                  /* Minimum number of pages reserved */
+  unsigned int nMax;                  /* Configured "cache_size" value */
+  unsigned int n90pct;                /* nMax*9/10 */
+  unsigned int iMaxKey;               /* Largest key seen since xTruncate() */
+
+  /* Hash table of all pages. The following variables may only be accessed
+  ** when the accessor is holding the PGroup mutex.
+  */
+  unsigned int nRecyclable;           /* Number of pages in the LRU list */
+  unsigned int nPage;                 /* Total number of pages in apHash */
+  unsigned int nHash;                 /* Number of slots in apHash[] */
+  PgHdr1 **apHash;                    /* Hash table for fast lookup by key */
+};
+
+/*
+** Each cache entry is represented by an instance of the following 
+** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of
+** PgHdr1.pCache->szPage bytes is allocated directly before this structure 
+** in memory.
+*/
+struct PgHdr1 {
+  sqlite3_pcache_page page;
+  unsigned int iKey;             /* Key value (page number) */
+  u8 isPinned;                   /* Page in use, not on the LRU list */
+  PgHdr1 *pNext;                 /* Next in hash table chain */
+  PCache1 *pCache;               /* Cache that currently owns this page */
+  PgHdr1 *pLruNext;              /* Next in LRU list of unpinned pages */
+  PgHdr1 *pLruPrev;              /* Previous in LRU list of unpinned pages */
+};
+
+/*
+** Free slots in the allocator used to divide up the buffer provided using
+** the SQLITE_CONFIG_PAGECACHE mechanism.
+*/
+struct PgFreeslot {
+  PgFreeslot *pNext;  /* Next free slot */
+};
+
+/*
+** Global data used by this cache.
+*/
+static SQLITE_WSD struct PCacheGlobal {
+  PGroup grp;                    /* The global PGroup for mode (2) */
+
+  /* Variables related to SQLITE_CONFIG_PAGECACHE settings.  The
+  ** szSlot, nSlot, pStart, pEnd, nReserve, and isInit values are all
+  ** fixed at sqlite3_initialize() time and do not require mutex protection.
+  ** The nFreeSlot and pFree values do require mutex protection.
+  */
+  int isInit;                    /* True if initialized */
+  int szSlot;                    /* Size of each free slot */
+  int nSlot;                     /* The number of pcache slots */
+  int nReserve;                  /* Try to keep nFreeSlot above this */
+  void *pStart, *pEnd;           /* Bounds of pagecache malloc range */
+  /* Above requires no mutex.  Use mutex below for variable that follow. */
+  sqlite3_mutex *mutex;          /* Mutex for accessing the following: */
+  PgFreeslot *pFree;             /* Free page blocks */
+  int nFreeSlot;                 /* Number of unused pcache slots */
+  /* The following value requires a mutex to change.  We skip the mutex on
+  ** reading because (1) most platforms read a 32-bit integer atomically and
+  ** (2) even if an incorrect value is read, no great harm is done since this
+  ** is really just an optimization. */
+  int bUnderPressure;            /* True if low on PAGECACHE memory */
+} pcache1_g;
+
+/*
+** All code in this file should access the global structure above via the
+** alias "pcache1". This ensures that the WSD emulation is used when
+** compiling for systems that do not support real WSD.
+*/
+#define pcache1 (GLOBAL(struct PCacheGlobal, pcache1_g))
+
+/*
+** Macros to enter and leave the PCache LRU mutex.
+*/
+#define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex)
+#define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex)
+
+/******************************************************************************/
+/******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/
+
+/*
+** This function is called during initialization if a static buffer is 
+** supplied to use for the page-cache by passing the SQLITE_CONFIG_PAGECACHE
+** verb to sqlite3_config(). Parameter pBuf points to an allocation large
+** enough to contain 'n' buffers of 'sz' bytes each.
+**
+** This routine is called from sqlite3_initialize() and so it is guaranteed
+** to be serialized already.  There is no need for further mutexing.
+*/
+SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n){
+  if( pcache1.isInit ){
+    PgFreeslot *p;
+    sz = ROUNDDOWN8(sz);
+    pcache1.szSlot = sz;
+    pcache1.nSlot = pcache1.nFreeSlot = n;
+    pcache1.nReserve = n>90 ? 10 : (n/10 + 1);
+    pcache1.pStart = pBuf;
+    pcache1.pFree = 0;
+    pcache1.bUnderPressure = 0;
+    while( n-- ){
+      p = (PgFreeslot*)pBuf;
+      p->pNext = pcache1.pFree;
+      pcache1.pFree = p;
+      pBuf = (void*)&((char*)pBuf)[sz];
+    }
+    pcache1.pEnd = pBuf;
+  }
+}
+
+/*
+** Malloc function used within this file to allocate space from the buffer
+** configured using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no 
+** such buffer exists or there is no space left in it, this function falls 
+** back to sqlite3Malloc().
+**
+** Multiple threads can run this routine at the same time.  Global variables
+** in pcache1 need to be protected via mutex.
+*/
+static void *pcache1Alloc(int nByte){
+  void *p = 0;
+  assert( sqlite3_mutex_notheld(pcache1.grp.mutex) );
+  sqlite3StatusSet(SQLITE_STATUS_PAGECACHE_SIZE, nByte);
+  if( nByte<=pcache1.szSlot ){
+    sqlite3_mutex_enter(pcache1.mutex);
+    p = (PgHdr1 *)pcache1.pFree;
+    if( p ){
+      pcache1.pFree = pcache1.pFree->pNext;
+      pcache1.nFreeSlot--;
+      pcache1.bUnderPressure = pcache1.nFreeSlot<pcache1.nReserve;
+      assert( pcache1.nFreeSlot>=0 );
+      sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_USED, 1);
+    }
+    sqlite3_mutex_leave(pcache1.mutex);
+  }
+  if( p==0 ){
+    /* Memory is not available in the SQLITE_CONFIG_PAGECACHE pool.  Get
+    ** it from sqlite3Malloc instead.
+    */
+    p = sqlite3Malloc(nByte);
+#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS
+    if( p ){
+      int sz = sqlite3MallocSize(p);
+      sqlite3_mutex_enter(pcache1.mutex);
+      sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz);
+      sqlite3_mutex_leave(pcache1.mutex);
+    }
+#endif
+    sqlite3MemdebugSetType(p, MEMTYPE_PCACHE);
+  }
+  return p;
+}
+
+/*
+** Free an allocated buffer obtained from pcache1Alloc().
+*/
+static int pcache1Free(void *p){
+  int nFreed = 0;
+  if( p==0 ) return 0;
+  if( p>=pcache1.pStart && p<pcache1.pEnd ){
+    PgFreeslot *pSlot;
+    sqlite3_mutex_enter(pcache1.mutex);
+    sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_USED, -1);
+    pSlot = (PgFreeslot*)p;
+    pSlot->pNext = pcache1.pFree;
+    pcache1.pFree = pSlot;
+    pcache1.nFreeSlot++;
+    pcache1.bUnderPressure = pcache1.nFreeSlot<pcache1.nReserve;
+    assert( pcache1.nFreeSlot<=pcache1.nSlot );
+    sqlite3_mutex_leave(pcache1.mutex);
+  }else{
+    assert( sqlite3MemdebugHasType(p, MEMTYPE_PCACHE) );
+    sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
+    nFreed = sqlite3MallocSize(p);
+#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS
+    sqlite3_mutex_enter(pcache1.mutex);
+    sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, -nFreed);
+    sqlite3_mutex_leave(pcache1.mutex);
+#endif
+    sqlite3_free(p);
+  }
+  return nFreed;
+}
+
+#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
+/*
+** Return the size of a pcache allocation
+*/
+static int pcache1MemSize(void *p){
+  if( p>=pcache1.pStart && p<pcache1.pEnd ){
+    return pcache1.szSlot;
+  }else{
+    int iSize;
+    assert( sqlite3MemdebugHasType(p, MEMTYPE_PCACHE) );
+    sqlite3MemdebugSetType(p, MEMTYPE_HEAP);
+    iSize = sqlite3MallocSize(p);
+    sqlite3MemdebugSetType(p, MEMTYPE_PCACHE);
+    return iSize;
+  }
+}
+#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
+
+/*
+** Allocate a new page object initially associated with cache pCache.
+*/
+static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
+  PgHdr1 *p = 0;
+  void *pPg;
+
+  /* The group mutex must be released before pcache1Alloc() is called. This
+  ** is because it may call sqlite3_release_memory(), which assumes that 
+  ** this mutex is not held. */
+  assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
+  pcache1LeaveMutex(pCache->pGroup);
+#ifdef SQLITE_PCACHE_SEPARATE_HEADER
+  pPg = pcache1Alloc(pCache->szPage);
+  p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra);
+  if( !pPg || !p ){
+    pcache1Free(pPg);
+    sqlite3_free(p);
+    pPg = 0;
+  }
+#else
+  pPg = pcache1Alloc(ROUND8(sizeof(PgHdr1)) + pCache->szPage + pCache->szExtra);
+  p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage];
+#endif
+  pcache1EnterMutex(pCache->pGroup);
+
+  if( pPg ){
+    p->page.pBuf = pPg;
+    p->page.pExtra = &p[1];
+    if( pCache->bPurgeable ){
+      pCache->pGroup->nCurrentPage++;
+    }
+    return p;
+  }
+  return 0;
+}
+
+/*
+** Free a page object allocated by pcache1AllocPage().
+**
+** The pointer is allowed to be NULL, which is prudent.  But it turns out
+** that the current implementation happens to never call this routine
+** with a NULL pointer, so we mark the NULL test with ALWAYS().
+*/
+static void pcache1FreePage(PgHdr1 *p){
+  if( ALWAYS(p) ){
+    PCache1 *pCache = p->pCache;
+    assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) );
+    pcache1Free(p->page.pBuf);
+#ifdef SQLITE_PCACHE_SEPARATE_HEADER
+    sqlite3_free(p);
+#endif
+    if( pCache->bPurgeable ){
+      pCache->pGroup->nCurrentPage--;
+    }
+  }
+}
+
+/*
+** Malloc function used by SQLite to obtain space from the buffer configured
+** using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no such buffer
+** exists, this function falls back to sqlite3Malloc().
+*/
+SQLITE_PRIVATE void *sqlite3PageMalloc(int sz){
+  return pcache1Alloc(sz);
+}
+
+/*
+** Free an allocated buffer obtained from sqlite3PageMalloc().
+*/
+SQLITE_PRIVATE void sqlite3PageFree(void *p){
+  pcache1Free(p);
+}
+
+
+/*
+** Return true if it desirable to avoid allocating a new page cache
+** entry.
+**
+** If memory was allocated specifically to the page cache using
+** SQLITE_CONFIG_PAGECACHE but that memory has all been used, then
+** it is desirable to avoid allocating a new page cache entry because
+** presumably SQLITE_CONFIG_PAGECACHE was suppose to be sufficient
+** for all page cache needs and we should not need to spill the
+** allocation onto the heap.
+**
+** Or, the heap is used for all page cache memory but the heap is
+** under memory pressure, then again it is desirable to avoid
+** allocating a new page cache entry in order to avoid stressing
+** the heap even further.
+*/
+static int pcache1UnderMemoryPressure(PCache1 *pCache){
+  if( pcache1.nSlot && (pCache->szPage+pCache->szExtra)<=pcache1.szSlot ){
+    return pcache1.bUnderPressure;
+  }else{
+    return sqlite3HeapNearlyFull();
+  }
+}
+
+/******************************************************************************/
+/******** General Implementation Functions ************************************/
+
+/*
+** This function is used to resize the hash table used by the cache passed
+** as the first argument.
+**
+** The PCache mutex must be held when this function is called.
+*/
+static void pcache1ResizeHash(PCache1 *p){
+  PgHdr1 **apNew;
+  unsigned int nNew;
+  unsigned int i;
+
+  assert( sqlite3_mutex_held(p->pGroup->mutex) );
+
+  nNew = p->nHash*2;
+  if( nNew<256 ){
+    nNew = 256;
+  }
+
+  pcache1LeaveMutex(p->pGroup);
+  if( p->nHash ){ sqlite3BeginBenignMalloc(); }
+  apNew = (PgHdr1 **)sqlite3MallocZero(sizeof(PgHdr1 *)*nNew);
+  if( p->nHash ){ sqlite3EndBenignMalloc(); }
+  pcache1EnterMutex(p->pGroup);
+  if( apNew ){
+    for(i=0; i<p->nHash; i++){
+      PgHdr1 *pPage;
+      PgHdr1 *pNext = p->apHash[i];
+      while( (pPage = pNext)!=0 ){
+        unsigned int h = pPage->iKey % nNew;
+        pNext = pPage->pNext;
+        pPage->pNext = apNew[h];
+        apNew[h] = pPage;
+      }
+    }
+    sqlite3_free(p->apHash);
+    p->apHash = apNew;
+    p->nHash = nNew;
+  }
+}
+
+/*
+** This function is used internally to remove the page pPage from the 
+** PGroup LRU list, if is part of it. If pPage is not part of the PGroup
+** LRU list, then this function is a no-op.
+**
+** The PGroup mutex must be held when this function is called.
+*/
+static void pcache1PinPage(PgHdr1 *pPage){
+  PCache1 *pCache;
+  PGroup *pGroup;
+
+  assert( pPage!=0 );
+  assert( pPage->isPinned==0 );
+  pCache = pPage->pCache;
+  pGroup = pCache->pGroup;
+  assert( pPage->pLruNext || pPage==pGroup->pLruTail );
+  assert( pPage->pLruPrev || pPage==pGroup->pLruHead );
+  assert( sqlite3_mutex_held(pGroup->mutex) );
+  if( pPage->pLruPrev ){
+    pPage->pLruPrev->pLruNext = pPage->pLruNext;
+  }else{
+    pGroup->pLruHead = pPage->pLruNext;
+  }
+  if( pPage->pLruNext ){
+    pPage->pLruNext->pLruPrev = pPage->pLruPrev;
+  }else{
+    pGroup->pLruTail = pPage->pLruPrev;
+  }
+  pPage->pLruNext = 0;
+  pPage->pLruPrev = 0;
+  pPage->isPinned = 1;
+  pCache->nRecyclable--;
+}
+
+
+/*
+** Remove the page supplied as an argument from the hash table 
+** (PCache1.apHash structure) that it is currently stored in.
+**
+** The PGroup mutex must be held when this function is called.
+*/
+static void pcache1RemoveFromHash(PgHdr1 *pPage){
+  unsigned int h;
+  PCache1 *pCache = pPage->pCache;
+  PgHdr1 **pp;
+
+  assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
+  h = pPage->iKey % pCache->nHash;
+  for(pp=&pCache->apHash[h]; (*pp)!=pPage; pp=&(*pp)->pNext);
+  *pp = (*pp)->pNext;
+
+  pCache->nPage--;
+}
+
+/*
+** If there are currently more than nMaxPage pages allocated, try
+** to recycle pages to reduce the number allocated to nMaxPage.
+*/
+static void pcache1EnforceMaxPage(PGroup *pGroup){
+  assert( sqlite3_mutex_held(pGroup->mutex) );
+  while( pGroup->nCurrentPage>pGroup->nMaxPage && pGroup->pLruTail ){
+    PgHdr1 *p = pGroup->pLruTail;
+    assert( p->pCache->pGroup==pGroup );
+    assert( p->isPinned==0 );
+    pcache1PinPage(p);
+    pcache1RemoveFromHash(p);
+    pcache1FreePage(p);
+  }
+}
+
+/*
+** Discard all pages from cache pCache with a page number (key value) 
+** greater than or equal to iLimit. Any pinned pages that meet this 
+** criteria are unpinned before they are discarded.
+**
+** The PCache mutex must be held when this function is called.
+*/
+static void pcache1TruncateUnsafe(
+  PCache1 *pCache,             /* The cache to truncate */
+  unsigned int iLimit          /* Drop pages with this pgno or larger */
+){
+  TESTONLY( unsigned int nPage = 0; )  /* To assert pCache->nPage is correct */
+  unsigned int h;
+  assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
+  for(h=0; h<pCache->nHash; h++){
+    PgHdr1 **pp = &pCache->apHash[h]; 
+    PgHdr1 *pPage;
+    while( (pPage = *pp)!=0 ){
+      if( pPage->iKey>=iLimit ){
+        pCache->nPage--;
+        *pp = pPage->pNext;
+        if( !pPage->isPinned ) pcache1PinPage(pPage);
+        pcache1FreePage(pPage);
+      }else{
+        pp = &pPage->pNext;
+        TESTONLY( nPage++; )
+      }
+    }
+  }
+  assert( pCache->nPage==nPage );
+}
+
+/******************************************************************************/
+/******** sqlite3_pcache Methods **********************************************/
+
+/*
+** Implementation of the sqlite3_pcache.xInit method.
+*/
+static int pcache1Init(void *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  assert( pcache1.isInit==0 );
+  memset(&pcache1, 0, sizeof(pcache1));
+  if( sqlite3GlobalConfig.bCoreMutex ){
+    pcache1.grp.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU);
+    pcache1.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_PMEM);
+  }
+  pcache1.grp.mxPinned = 10;
+  pcache1.isInit = 1;
+  return SQLITE_OK;
+}
+
+/*
+** Implementation of the sqlite3_pcache.xShutdown method.
+** Note that the static mutex allocated in xInit does 
+** not need to be freed.
+*/
+static void pcache1Shutdown(void *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  assert( pcache1.isInit!=0 );
+  memset(&pcache1, 0, sizeof(pcache1));
+}
+
+/* forward declaration */
+static void pcache1Destroy(sqlite3_pcache *p);
+
+/*
+** Implementation of the sqlite3_pcache.xCreate method.
+**
+** Allocate a new cache.
+*/
+static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){
+  PCache1 *pCache;      /* The newly created page cache */
+  PGroup *pGroup;       /* The group the new page cache will belong to */
+  int sz;               /* Bytes of memory required to allocate the new cache */
+
+  /*
+  ** The separateCache variable is true if each PCache has its own private
+  ** PGroup.  In other words, separateCache is true for mode (1) where no
+  ** mutexing is required.
+  **
+  **   *  Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT
+  **
+  **   *  Always use a unified cache in single-threaded applications
+  **
+  **   *  Otherwise (if multi-threaded and ENABLE_MEMORY_MANAGEMENT is off)
+  **      use separate caches (mode-1)
+  */
+#if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) || SQLITE_THREADSAFE==0
+  const int separateCache = 0;
+#else
+  int separateCache = sqlite3GlobalConfig.bCoreMutex>0;
+#endif
+
+  assert( (szPage & (szPage-1))==0 && szPage>=512 && szPage<=65536 );
+  assert( szExtra < 300 );
+
+  sz = sizeof(PCache1) + sizeof(PGroup)*separateCache;
+  pCache = (PCache1 *)sqlite3MallocZero(sz);
+  if( pCache ){
+    if( separateCache ){
+      pGroup = (PGroup*)&pCache[1];
+      pGroup->mxPinned = 10;
+    }else{
+      pGroup = &pcache1.grp;
+    }
+    pCache->pGroup = pGroup;
+    pCache->szPage = szPage;
+    pCache->szExtra = szExtra;
+    pCache->bPurgeable = (bPurgeable ? 1 : 0);
+    pcache1EnterMutex(pGroup);
+    pcache1ResizeHash(pCache);
+    if( bPurgeable ){
+      pCache->nMin = 10;
+      pGroup->nMinPage += pCache->nMin;
+      pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage;
+    }
+    pcache1LeaveMutex(pGroup);
+    if( pCache->nHash==0 ){
+      pcache1Destroy((sqlite3_pcache*)pCache);
+      pCache = 0;
+    }
+  }
+  return (sqlite3_pcache *)pCache;
+}
+
+/*
+** Implementation of the sqlite3_pcache.xCachesize method. 
+**
+** Configure the cache_size limit for a cache.
+*/
+static void pcache1Cachesize(sqlite3_pcache *p, int nMax){
+  PCache1 *pCache = (PCache1 *)p;
+  if( pCache->bPurgeable ){
+    PGroup *pGroup = pCache->pGroup;
+    pcache1EnterMutex(pGroup);
+    pGroup->nMaxPage += (nMax - pCache->nMax);
+    pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage;
+    pCache->nMax = nMax;
+    pCache->n90pct = pCache->nMax*9/10;
+    pcache1EnforceMaxPage(pGroup);
+    pcache1LeaveMutex(pGroup);
+  }
+}
+
+/*
+** Implementation of the sqlite3_pcache.xShrink method. 
+**
+** Free up as much memory as possible.
+*/
+static void pcache1Shrink(sqlite3_pcache *p){
+  PCache1 *pCache = (PCache1*)p;
+  if( pCache->bPurgeable ){
+    PGroup *pGroup = pCache->pGroup;
+    int savedMaxPage;
+    pcache1EnterMutex(pGroup);
+    savedMaxPage = pGroup->nMaxPage;
+    pGroup->nMaxPage = 0;
+    pcache1EnforceMaxPage(pGroup);
+    pGroup->nMaxPage = savedMaxPage;
+    pcache1LeaveMutex(pGroup);
+  }
+}
+
+/*
+** Implementation of the sqlite3_pcache.xPagecount method. 
+*/
+static int pcache1Pagecount(sqlite3_pcache *p){
+  int n;
+  PCache1 *pCache = (PCache1*)p;
+  pcache1EnterMutex(pCache->pGroup);
+  n = pCache->nPage;
+  pcache1LeaveMutex(pCache->pGroup);
+  return n;
+}
+
+
+/*
+** Implement steps 3, 4, and 5 of the pcache1Fetch() algorithm described
+** in the header of the pcache1Fetch() procedure.
+**
+** This steps are broken out into a separate procedure because they are
+** usually not needed, and by avoiding the stack initialization required
+** for these steps, the main pcache1Fetch() procedure can run faster.
+*/
+static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2(
+  PCache1 *pCache, 
+  unsigned int iKey, 
+  int createFlag
+){
+  unsigned int nPinned;
+  PGroup *pGroup = pCache->pGroup;
+  PgHdr1 *pPage = 0;
+
+  /* Step 3: Abort if createFlag is 1 but the cache is nearly full */
+  assert( pCache->nPage >= pCache->nRecyclable );
+  nPinned = pCache->nPage - pCache->nRecyclable;
+  assert( pGroup->mxPinned == pGroup->nMaxPage + 10 - pGroup->nMinPage );
+  assert( pCache->n90pct == pCache->nMax*9/10 );
+  if( createFlag==1 && (
+        nPinned>=pGroup->mxPinned
+     || nPinned>=pCache->n90pct
+     || (pcache1UnderMemoryPressure(pCache) && pCache->nRecyclable<nPinned)
+  )){
+    return 0;
+  }
+
+  if( pCache->nPage>=pCache->nHash ) pcache1ResizeHash(pCache);
+  assert( pCache->nHash>0 && pCache->apHash );
+
+  /* Step 4. Try to recycle a page. */
+  if( pCache->bPurgeable && pGroup->pLruTail && (
+         (pCache->nPage+1>=pCache->nMax)
+      || pGroup->nCurrentPage>=pGroup->nMaxPage
+      || pcache1UnderMemoryPressure(pCache)
+  )){
+    PCache1 *pOther;
+    pPage = pGroup->pLruTail;
+    assert( pPage->isPinned==0 );
+    pcache1RemoveFromHash(pPage);
+    pcache1PinPage(pPage);
+    pOther = pPage->pCache;
+
+    /* We want to verify that szPage and szExtra are the same for pOther
+    ** and pCache.  Assert that we can verify this by comparing sums. */
+    assert( (pCache->szPage & (pCache->szPage-1))==0 && pCache->szPage>=512 );
+    assert( pCache->szExtra<512 );
+    assert( (pOther->szPage & (pOther->szPage-1))==0 && pOther->szPage>=512 );
+    assert( pOther->szExtra<512 );
+
+    if( pOther->szPage+pOther->szExtra != pCache->szPage+pCache->szExtra ){
+      pcache1FreePage(pPage);
+      pPage = 0;
+    }else{
+      pGroup->nCurrentPage -= (pOther->bPurgeable - pCache->bPurgeable);
+    }
+  }
+
+  /* Step 5. If a usable page buffer has still not been found, 
+  ** attempt to allocate a new one. 
+  */
+  if( !pPage ){
+    if( createFlag==1 ) sqlite3BeginBenignMalloc();
+    pPage = pcache1AllocPage(pCache);
+    if( createFlag==1 ) sqlite3EndBenignMalloc();
+  }
+
+  if( pPage ){
+    unsigned int h = iKey % pCache->nHash;
+    pCache->nPage++;
+    pPage->iKey = iKey;
+    pPage->pNext = pCache->apHash[h];
+    pPage->pCache = pCache;
+    pPage->pLruPrev = 0;
+    pPage->pLruNext = 0;
+    pPage->isPinned = 1;
+    *(void **)pPage->page.pExtra = 0;
+    pCache->apHash[h] = pPage;
+    if( iKey>pCache->iMaxKey ){
+      pCache->iMaxKey = iKey;
+    }
+  }
+  return pPage;
+}
+
+/*
+** Implementation of the sqlite3_pcache.xFetch method. 
+**
+** Fetch a page by key value.
+**
+** Whether or not a new page may be allocated by this function depends on
+** the value of the createFlag argument.  0 means do not allocate a new
+** page.  1 means allocate a new page if space is easily available.  2 
+** means to try really hard to allocate a new page.
+**
+** For a non-purgeable cache (a cache used as the storage for an in-memory
+** database) there is really no difference between createFlag 1 and 2.  So
+** the calling function (pcache.c) will never have a createFlag of 1 on
+** a non-purgeable cache.
+**
+** There are three different approaches to obtaining space for a page,
+** depending on the value of parameter createFlag (which may be 0, 1 or 2).
+**
+**   1. Regardless of the value of createFlag, the cache is searched for a 
+**      copy of the requested page. If one is found, it is returned.
+**
+**   2. If createFlag==0 and the page is not already in the cache, NULL is
+**      returned.
+**
+**   3. If createFlag is 1, and the page is not already in the cache, then
+**      return NULL (do not allocate a new page) if any of the following
+**      conditions are true:
+**
+**       (a) the number of pages pinned by the cache is greater than
+**           PCache1.nMax, or
+**
+**       (b) the number of pages pinned by the cache is greater than
+**           the sum of nMax for all purgeable caches, less the sum of 
+**           nMin for all other purgeable caches, or
+**
+**   4. If none of the first three conditions apply and the cache is marked
+**      as purgeable, and if one of the following is true:
+**
+**       (a) The number of pages allocated for the cache is already 
+**           PCache1.nMax, or
+**
+**       (b) The number of pages allocated for all purgeable caches is
+**           already equal to or greater than the sum of nMax for all
+**           purgeable caches,
+**
+**       (c) The system is under memory pressure and wants to avoid
+**           unnecessary pages cache entry allocations
+**
+**      then attempt to recycle a page from the LRU list. If it is the right
+**      size, return the recycled buffer. Otherwise, free the buffer and
+**      proceed to step 5. 
+**
+**   5. Otherwise, allocate and return a new page buffer.
+*/
+static sqlite3_pcache_page *pcache1Fetch(
+  sqlite3_pcache *p, 
+  unsigned int iKey, 
+  int createFlag
+){
+  PCache1 *pCache = (PCache1 *)p;
+  PgHdr1 *pPage = 0;
+
+  assert( offsetof(PgHdr1,page)==0 );
+  assert( pCache->bPurgeable || createFlag!=1 );
+  assert( pCache->bPurgeable || pCache->nMin==0 );
+  assert( pCache->bPurgeable==0 || pCache->nMin==10 );
+  assert( pCache->nMin==0 || pCache->bPurgeable );
+  assert( pCache->nHash>0 );
+  pcache1EnterMutex(pCache->pGroup);
+
+  /* Step 1: Search the hash table for an existing entry. */
+  pPage = pCache->apHash[iKey % pCache->nHash];
+  while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; }
+
+  /* Step 2: Abort if no existing page is found and createFlag is 0 */
+  if( pPage ){
+    if( !pPage->isPinned ) pcache1PinPage(pPage);
+  }else if( createFlag ){
+    /* Steps 3, 4, and 5 implemented by this subroutine */
+    pPage = pcache1FetchStage2(pCache, iKey, createFlag);
+  }
+  assert( pPage==0 || pCache->iMaxKey>=iKey );
+  pcache1LeaveMutex(pCache->pGroup);
+  return (sqlite3_pcache_page*)pPage;
+}
+
+
+/*
+** Implementation of the sqlite3_pcache.xUnpin method.
+**
+** Mark a page as unpinned (eligible for asynchronous recycling).
+*/
+static void pcache1Unpin(
+  sqlite3_pcache *p, 
+  sqlite3_pcache_page *pPg, 
+  int reuseUnlikely
+){
+  PCache1 *pCache = (PCache1 *)p;
+  PgHdr1 *pPage = (PgHdr1 *)pPg;
+  PGroup *pGroup = pCache->pGroup;
+ 
+  assert( pPage->pCache==pCache );
+  pcache1EnterMutex(pGroup);
+
+  /* It is an error to call this function if the page is already 
+  ** part of the PGroup LRU list.
+  */
+  assert( pPage->pLruPrev==0 && pPage->pLruNext==0 );
+  assert( pGroup->pLruHead!=pPage && pGroup->pLruTail!=pPage );
+  assert( pPage->isPinned==1 );
+
+  if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){
+    pcache1RemoveFromHash(pPage);
+    pcache1FreePage(pPage);
+  }else{
+    /* Add the page to the PGroup LRU list. */
+    if( pGroup->pLruHead ){
+      pGroup->pLruHead->pLruPrev = pPage;
+      pPage->pLruNext = pGroup->pLruHead;
+      pGroup->pLruHead = pPage;
+    }else{
+      pGroup->pLruTail = pPage;
+      pGroup->pLruHead = pPage;
+    }
+    pCache->nRecyclable++;
+    pPage->isPinned = 0;
+  }
+
+  pcache1LeaveMutex(pCache->pGroup);
+}
+
+/*
+** Implementation of the sqlite3_pcache.xRekey method. 
+*/
+static void pcache1Rekey(
+  sqlite3_pcache *p,
+  sqlite3_pcache_page *pPg,
+  unsigned int iOld,
+  unsigned int iNew
+){
+  PCache1 *pCache = (PCache1 *)p;
+  PgHdr1 *pPage = (PgHdr1 *)pPg;
+  PgHdr1 **pp;
+  unsigned int h; 
+  assert( pPage->iKey==iOld );
+  assert( pPage->pCache==pCache );
+
+  pcache1EnterMutex(pCache->pGroup);
+
+  h = iOld%pCache->nHash;
+  pp = &pCache->apHash[h];
+  while( (*pp)!=pPage ){
+    pp = &(*pp)->pNext;
+  }
+  *pp = pPage->pNext;
+
+  h = iNew%pCache->nHash;
+  pPage->iKey = iNew;
+  pPage->pNext = pCache->apHash[h];
+  pCache->apHash[h] = pPage;
+  if( iNew>pCache->iMaxKey ){
+    pCache->iMaxKey = iNew;
+  }
+
+  pcache1LeaveMutex(pCache->pGroup);
+}
+
+/*
+** Implementation of the sqlite3_pcache.xTruncate method. 
+**
+** Discard all unpinned pages in the cache with a page number equal to
+** or greater than parameter iLimit. Any pinned pages with a page number
+** equal to or greater than iLimit are implicitly unpinned.
+*/
+static void pcache1Truncate(sqlite3_pcache *p, unsigned int iLimit){
+  PCache1 *pCache = (PCache1 *)p;
+  pcache1EnterMutex(pCache->pGroup);
+  if( iLimit<=pCache->iMaxKey ){
+    pcache1TruncateUnsafe(pCache, iLimit);
+    pCache->iMaxKey = iLimit-1;
+  }
+  pcache1LeaveMutex(pCache->pGroup);
+}
+
+/*
+** Implementation of the sqlite3_pcache.xDestroy method. 
+**
+** Destroy a cache allocated using pcache1Create().
+*/
+static void pcache1Destroy(sqlite3_pcache *p){
+  PCache1 *pCache = (PCache1 *)p;
+  PGroup *pGroup = pCache->pGroup;
+  assert( pCache->bPurgeable || (pCache->nMax==0 && pCache->nMin==0) );
+  pcache1EnterMutex(pGroup);
+  pcache1TruncateUnsafe(pCache, 0);
+  assert( pGroup->nMaxPage >= pCache->nMax );
+  pGroup->nMaxPage -= pCache->nMax;
+  assert( pGroup->nMinPage >= pCache->nMin );
+  pGroup->nMinPage -= pCache->nMin;
+  pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage;
+  pcache1EnforceMaxPage(pGroup);
+  pcache1LeaveMutex(pGroup);
+  sqlite3_free(pCache->apHash);
+  sqlite3_free(pCache);
+}
+
+/*
+** This function is called during initialization (sqlite3_initialize()) to
+** install the default pluggable cache module, assuming the user has not
+** already provided an alternative.
+*/
+SQLITE_PRIVATE void sqlite3PCacheSetDefault(void){
+  static const sqlite3_pcache_methods2 defaultMethods = {
+    1,                       /* iVersion */
+    0,                       /* pArg */
+    pcache1Init,             /* xInit */
+    pcache1Shutdown,         /* xShutdown */
+    pcache1Create,           /* xCreate */
+    pcache1Cachesize,        /* xCachesize */
+    pcache1Pagecount,        /* xPagecount */
+    pcache1Fetch,            /* xFetch */
+    pcache1Unpin,            /* xUnpin */
+    pcache1Rekey,            /* xRekey */
+    pcache1Truncate,         /* xTruncate */
+    pcache1Destroy,          /* xDestroy */
+    pcache1Shrink            /* xShrink */
+  };
+  sqlite3_config(SQLITE_CONFIG_PCACHE2, &defaultMethods);
+}
+
+/*
+** Return the size of the header on each page of this PCACHE implementation.
+*/
+SQLITE_PRIVATE int sqlite3HeaderSizePcache1(void){ return ROUND8(sizeof(PgHdr1)); }
+
+#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
+/*
+** This function is called to free superfluous dynamically allocated memory
+** held by the pager system. Memory in use by any SQLite pager allocated
+** by the current thread may be sqlite3_free()ed.
+**
+** nReq is the number of bytes of memory required. Once this much has
+** been released, the function returns. The return value is the total number 
+** of bytes of memory released.
+*/
+SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int nReq){
+  int nFree = 0;
+  assert( sqlite3_mutex_notheld(pcache1.grp.mutex) );
+  assert( sqlite3_mutex_notheld(pcache1.mutex) );
+  if( pcache1.pStart==0 ){
+    PgHdr1 *p;
+    pcache1EnterMutex(&pcache1.grp);
+    while( (nReq<0 || nFree<nReq) && ((p=pcache1.grp.pLruTail)!=0) ){
+      nFree += pcache1MemSize(p->page.pBuf);
+#ifdef SQLITE_PCACHE_SEPARATE_HEADER
+      nFree += sqlite3MemSize(p);
+#endif
+      assert( p->isPinned==0 );
+      pcache1PinPage(p);
+      pcache1RemoveFromHash(p);
+      pcache1FreePage(p);
+    }
+    pcache1LeaveMutex(&pcache1.grp);
+  }
+  return nFree;
+}
+#endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
+
+#ifdef SQLITE_TEST
+/*
+** This function is used by test procedures to inspect the internal state
+** of the global cache.
+*/
+SQLITE_PRIVATE void sqlite3PcacheStats(
+  int *pnCurrent,      /* OUT: Total number of pages cached */
+  int *pnMax,          /* OUT: Global maximum cache size */
+  int *pnMin,          /* OUT: Sum of PCache1.nMin for purgeable caches */
+  int *pnRecyclable    /* OUT: Total number of pages available for recycling */
+){
+  PgHdr1 *p;
+  int nRecyclable = 0;
+  for(p=pcache1.grp.pLruHead; p; p=p->pLruNext){
+    assert( p->isPinned==0 );
+    nRecyclable++;
+  }
+  *pnCurrent = pcache1.grp.nCurrentPage;
+  *pnMax = (int)pcache1.grp.nMaxPage;
+  *pnMin = (int)pcache1.grp.nMinPage;
+  *pnRecyclable = nRecyclable;
+}
+#endif
+
+/************** End of pcache1.c *********************************************/
+/************** Begin file rowset.c ******************************************/
+/*
+** 2008 December 3
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This module implements an object we call a "RowSet".
+**
+** The RowSet object is a collection of rowids.  Rowids
+** are inserted into the RowSet in an arbitrary order.  Inserts
+** can be intermixed with tests to see if a given rowid has been
+** previously inserted into the RowSet.
+**
+** After all inserts are finished, it is possible to extract the
+** elements of the RowSet in sorted order.  Once this extraction
+** process has started, no new elements may be inserted.
+**
+** Hence, the primitive operations for a RowSet are:
+**
+**    CREATE
+**    INSERT
+**    TEST
+**    SMALLEST
+**    DESTROY
+**
+** The CREATE and DESTROY primitives are the constructor and destructor,
+** obviously.  The INSERT primitive adds a new element to the RowSet.
+** TEST checks to see if an element is already in the RowSet.  SMALLEST
+** extracts the least value from the RowSet.
+**
+** The INSERT primitive might allocate additional memory.  Memory is
+** allocated in chunks so most INSERTs do no allocation.  There is an 
+** upper bound on the size of allocated memory.  No memory is freed
+** until DESTROY.
+**
+** The TEST primitive includes a "batch" number.  The TEST primitive
+** will only see elements that were inserted before the last change
+** in the batch number.  In other words, if an INSERT occurs between
+** two TESTs where the TESTs have the same batch nubmer, then the
+** value added by the INSERT will not be visible to the second TEST.
+** The initial batch number is zero, so if the very first TEST contains
+** a non-zero batch number, it will see all prior INSERTs.
+**
+** No INSERTs may occurs after a SMALLEST.  An assertion will fail if
+** that is attempted.
+**
+** The cost of an INSERT is roughly constant.  (Sometimes new memory
+** has to be allocated on an INSERT.)  The cost of a TEST with a new
+** batch number is O(NlogN) where N is the number of elements in the RowSet.
+** The cost of a TEST using the same batch number is O(logN).  The cost
+** of the first SMALLEST is O(NlogN).  Second and subsequent SMALLEST
+** primitives are constant time.  The cost of DESTROY is O(N).
+**
+** There is an added cost of O(N) when switching between TEST and
+** SMALLEST primitives.
+*/
+
+
+/*
+** Target size for allocation chunks.
+*/
+#define ROWSET_ALLOCATION_SIZE 1024
+
+/*
+** The number of rowset entries per allocation chunk.
+*/
+#define ROWSET_ENTRY_PER_CHUNK  \
+                       ((ROWSET_ALLOCATION_SIZE-8)/sizeof(struct RowSetEntry))
+
+/*
+** Each entry in a RowSet is an instance of the following object.
+**
+** This same object is reused to store a linked list of trees of RowSetEntry
+** objects.  In that alternative use, pRight points to the next entry
+** in the list, pLeft points to the tree, and v is unused.  The
+** RowSet.pForest value points to the head of this forest list.
+*/
+struct RowSetEntry {            
+  i64 v;                        /* ROWID value for this entry */
+  struct RowSetEntry *pRight;   /* Right subtree (larger entries) or list */
+  struct RowSetEntry *pLeft;    /* Left subtree (smaller entries) */
+};
+
+/*
+** RowSetEntry objects are allocated in large chunks (instances of the
+** following structure) to reduce memory allocation overhead.  The
+** chunks are kept on a linked list so that they can be deallocated
+** when the RowSet is destroyed.
+*/
+struct RowSetChunk {
+  struct RowSetChunk *pNextChunk;        /* Next chunk on list of them all */
+  struct RowSetEntry aEntry[ROWSET_ENTRY_PER_CHUNK]; /* Allocated entries */
+};
+
+/*
+** A RowSet in an instance of the following structure.
+**
+** A typedef of this structure if found in sqliteInt.h.
+*/
+struct RowSet {
+  struct RowSetChunk *pChunk;    /* List of all chunk allocations */
+  sqlite3 *db;                   /* The database connection */
+  struct RowSetEntry *pEntry;    /* List of entries using pRight */
+  struct RowSetEntry *pLast;     /* Last entry on the pEntry list */
+  struct RowSetEntry *pFresh;    /* Source of new entry objects */
+  struct RowSetEntry *pForest;   /* List of binary trees of entries */
+  u16 nFresh;                    /* Number of objects on pFresh */
+  u16 rsFlags;                   /* Various flags */
+  int iBatch;                    /* Current insert batch */
+};
+
+/*
+** Allowed values for RowSet.rsFlags
+*/
+#define ROWSET_SORTED  0x01   /* True if RowSet.pEntry is sorted */
+#define ROWSET_NEXT    0x02   /* True if sqlite3RowSetNext() has been called */
+
+/*
+** Turn bulk memory into a RowSet object.  N bytes of memory
+** are available at pSpace.  The db pointer is used as a memory context
+** for any subsequent allocations that need to occur.
+** Return a pointer to the new RowSet object.
+**
+** It must be the case that N is sufficient to make a Rowset.  If not
+** an assertion fault occurs.
+** 
+** If N is larger than the minimum, use the surplus as an initial
+** allocation of entries available to be filled.
+*/
+SQLITE_PRIVATE RowSet *sqlite3RowSetInit(sqlite3 *db, void *pSpace, unsigned int N){
+  RowSet *p;
+  assert( N >= ROUND8(sizeof(*p)) );
+  p = pSpace;
+  p->pChunk = 0;
+  p->db = db;
+  p->pEntry = 0;
+  p->pLast = 0;
+  p->pForest = 0;
+  p->pFresh = (struct RowSetEntry*)(ROUND8(sizeof(*p)) + (char*)p);
+  p->nFresh = (u16)((N - ROUND8(sizeof(*p)))/sizeof(struct RowSetEntry));
+  p->rsFlags = ROWSET_SORTED;
+  p->iBatch = 0;
+  return p;
+}
+
+/*
+** Deallocate all chunks from a RowSet.  This frees all memory that
+** the RowSet has allocated over its lifetime.  This routine is
+** the destructor for the RowSet.
+*/
+SQLITE_PRIVATE void sqlite3RowSetClear(RowSet *p){
+  struct RowSetChunk *pChunk, *pNextChunk;
+  for(pChunk=p->pChunk; pChunk; pChunk = pNextChunk){
+    pNextChunk = pChunk->pNextChunk;
+    sqlite3DbFree(p->db, pChunk);
+  }
+  p->pChunk = 0;
+  p->nFresh = 0;
+  p->pEntry = 0;
+  p->pLast = 0;
+  p->pForest = 0;
+  p->rsFlags = ROWSET_SORTED;
+}
+
+/*
+** Allocate a new RowSetEntry object that is associated with the
+** given RowSet.  Return a pointer to the new and completely uninitialized
+** objected.
+**
+** In an OOM situation, the RowSet.db->mallocFailed flag is set and this
+** routine returns NULL.
+*/
+static struct RowSetEntry *rowSetEntryAlloc(RowSet *p){
+  assert( p!=0 );
+  if( p->nFresh==0 ){
+    struct RowSetChunk *pNew;
+    pNew = sqlite3DbMallocRaw(p->db, sizeof(*pNew));
+    if( pNew==0 ){
+      return 0;
+    }
+    pNew->pNextChunk = p->pChunk;
+    p->pChunk = pNew;
+    p->pFresh = pNew->aEntry;
+    p->nFresh = ROWSET_ENTRY_PER_CHUNK;
+  }
+  p->nFresh--;
+  return p->pFresh++;
+}
+
+/*
+** Insert a new value into a RowSet.
+**
+** The mallocFailed flag of the database connection is set if a
+** memory allocation fails.
+*/
+SQLITE_PRIVATE void sqlite3RowSetInsert(RowSet *p, i64 rowid){
+  struct RowSetEntry *pEntry;  /* The new entry */
+  struct RowSetEntry *pLast;   /* The last prior entry */
+
+  /* This routine is never called after sqlite3RowSetNext() */
+  assert( p!=0 && (p->rsFlags & ROWSET_NEXT)==0 );
+
+  pEntry = rowSetEntryAlloc(p);
+  if( pEntry==0 ) return;
+  pEntry->v = rowid;
+  pEntry->pRight = 0;
+  pLast = p->pLast;
+  if( pLast ){
+    if( (p->rsFlags & ROWSET_SORTED)!=0 && rowid<=pLast->v ){
+      p->rsFlags &= ~ROWSET_SORTED;
+    }
+    pLast->pRight = pEntry;
+  }else{
+    p->pEntry = pEntry;
+  }
+  p->pLast = pEntry;
+}
+
+/*
+** Merge two lists of RowSetEntry objects.  Remove duplicates.
+**
+** The input lists are connected via pRight pointers and are 
+** assumed to each already be in sorted order.
+*/
+static struct RowSetEntry *rowSetEntryMerge(
+  struct RowSetEntry *pA,    /* First sorted list to be merged */
+  struct RowSetEntry *pB     /* Second sorted list to be merged */
+){
+  struct RowSetEntry head;
+  struct RowSetEntry *pTail;
+
+  pTail = &head;
+  while( pA && pB ){
+    assert( pA->pRight==0 || pA->v<=pA->pRight->v );
+    assert( pB->pRight==0 || pB->v<=pB->pRight->v );
+    if( pA->v<pB->v ){
+      pTail->pRight = pA;
+      pA = pA->pRight;
+      pTail = pTail->pRight;
+    }else if( pB->v<pA->v ){
+      pTail->pRight = pB;
+      pB = pB->pRight;
+      pTail = pTail->pRight;
+    }else{
+      pA = pA->pRight;
+    }
+  }
+  if( pA ){
+    assert( pA->pRight==0 || pA->v<=pA->pRight->v );
+    pTail->pRight = pA;
+  }else{
+    assert( pB==0 || pB->pRight==0 || pB->v<=pB->pRight->v );
+    pTail->pRight = pB;
+  }
+  return head.pRight;
+}
+
+/*
+** Sort all elements on the list of RowSetEntry objects into order of
+** increasing v.
+*/ 
+static struct RowSetEntry *rowSetEntrySort(struct RowSetEntry *pIn){
+  unsigned int i;
+  struct RowSetEntry *pNext, *aBucket[40];
+
+  memset(aBucket, 0, sizeof(aBucket));
+  while( pIn ){
+    pNext = pIn->pRight;
+    pIn->pRight = 0;
+    for(i=0; aBucket[i]; i++){
+      pIn = rowSetEntryMerge(aBucket[i], pIn);
+      aBucket[i] = 0;
+    }
+    aBucket[i] = pIn;
+    pIn = pNext;
+  }
+  pIn = 0;
+  for(i=0; i<sizeof(aBucket)/sizeof(aBucket[0]); i++){
+    pIn = rowSetEntryMerge(pIn, aBucket[i]);
+  }
+  return pIn;
+}
+
+
+/*
+** The input, pIn, is a binary tree (or subtree) of RowSetEntry objects.
+** Convert this tree into a linked list connected by the pRight pointers
+** and return pointers to the first and last elements of the new list.
+*/
+static void rowSetTreeToList(
+  struct RowSetEntry *pIn,         /* Root of the input tree */
+  struct RowSetEntry **ppFirst,    /* Write head of the output list here */
+  struct RowSetEntry **ppLast      /* Write tail of the output list here */
+){
+  assert( pIn!=0 );
+  if( pIn->pLeft ){
+    struct RowSetEntry *p;
+    rowSetTreeToList(pIn->pLeft, ppFirst, &p);
+    p->pRight = pIn;
+  }else{
+    *ppFirst = pIn;
+  }
+  if( pIn->pRight ){
+    rowSetTreeToList(pIn->pRight, &pIn->pRight, ppLast);
+  }else{
+    *ppLast = pIn;
+  }
+  assert( (*ppLast)->pRight==0 );
+}
+
+
+/*
+** Convert a sorted list of elements (connected by pRight) into a binary
+** tree with depth of iDepth.  A depth of 1 means the tree contains a single
+** node taken from the head of *ppList.  A depth of 2 means a tree with
+** three nodes.  And so forth.
+**
+** Use as many entries from the input list as required and update the
+** *ppList to point to the unused elements of the list.  If the input
+** list contains too few elements, then construct an incomplete tree
+** and leave *ppList set to NULL.
+**
+** Return a pointer to the root of the constructed binary tree.
+*/
+static struct RowSetEntry *rowSetNDeepTree(
+  struct RowSetEntry **ppList,
+  int iDepth
+){
+  struct RowSetEntry *p;         /* Root of the new tree */
+  struct RowSetEntry *pLeft;     /* Left subtree */
+  if( *ppList==0 ){
+    return 0;
+  }
+  if( iDepth==1 ){
+    p = *ppList;
+    *ppList = p->pRight;
+    p->pLeft = p->pRight = 0;
+    return p;
+  }
+  pLeft = rowSetNDeepTree(ppList, iDepth-1);
+  p = *ppList;
+  if( p==0 ){
+    return pLeft;
+  }
+  p->pLeft = pLeft;
+  *ppList = p->pRight;
+  p->pRight = rowSetNDeepTree(ppList, iDepth-1);
+  return p;
+}
+
+/*
+** Convert a sorted list of elements into a binary tree. Make the tree
+** as deep as it needs to be in order to contain the entire list.
+*/
+static struct RowSetEntry *rowSetListToTree(struct RowSetEntry *pList){
+  int iDepth;           /* Depth of the tree so far */
+  struct RowSetEntry *p;       /* Current tree root */
+  struct RowSetEntry *pLeft;   /* Left subtree */
+
+  assert( pList!=0 );
+  p = pList;
+  pList = p->pRight;
+  p->pLeft = p->pRight = 0;
+  for(iDepth=1; pList; iDepth++){
+    pLeft = p;
+    p = pList;
+    pList = p->pRight;
+    p->pLeft = pLeft;
+    p->pRight = rowSetNDeepTree(&pList, iDepth);
+  }
+  return p;
+}
+
+/*
+** Take all the entries on p->pEntry and on the trees in p->pForest and
+** sort them all together into one big ordered list on p->pEntry.
+**
+** This routine should only be called once in the life of a RowSet.
+*/
+static void rowSetToList(RowSet *p){
+
+  /* This routine is called only once */
+  assert( p!=0 && (p->rsFlags & ROWSET_NEXT)==0 );
+
+  if( (p->rsFlags & ROWSET_SORTED)==0 ){
+    p->pEntry = rowSetEntrySort(p->pEntry);
+  }
+
+  /* While this module could theoretically support it, sqlite3RowSetNext()
+  ** is never called after sqlite3RowSetText() for the same RowSet.  So
+  ** there is never a forest to deal with.  Should this change, simply
+  ** remove the assert() and the #if 0. */
+  assert( p->pForest==0 );
+#if 0
+  while( p->pForest ){
+    struct RowSetEntry *pTree = p->pForest->pLeft;
+    if( pTree ){
+      struct RowSetEntry *pHead, *pTail;
+      rowSetTreeToList(pTree, &pHead, &pTail);
+      p->pEntry = rowSetEntryMerge(p->pEntry, pHead);
+    }
+    p->pForest = p->pForest->pRight;
+  }
+#endif
+  p->rsFlags |= ROWSET_NEXT;  /* Verify this routine is never called again */
+}
+
+/*
+** Extract the smallest element from the RowSet.
+** Write the element into *pRowid.  Return 1 on success.  Return
+** 0 if the RowSet is already empty.
+**
+** After this routine has been called, the sqlite3RowSetInsert()
+** routine may not be called again.  
+*/
+SQLITE_PRIVATE int sqlite3RowSetNext(RowSet *p, i64 *pRowid){
+  assert( p!=0 );
+
+  /* Merge the forest into a single sorted list on first call */
+  if( (p->rsFlags & ROWSET_NEXT)==0 ) rowSetToList(p);
+
+  /* Return the next entry on the list */
+  if( p->pEntry ){
+    *pRowid = p->pEntry->v;
+    p->pEntry = p->pEntry->pRight;
+    if( p->pEntry==0 ){
+      sqlite3RowSetClear(p);
+    }
+    return 1;
+  }else{
+    return 0;
+  }
+}
+
+/*
+** Check to see if element iRowid was inserted into the rowset as
+** part of any insert batch prior to iBatch.  Return 1 or 0.
+**
+** If this is the first test of a new batch and if there exist entries
+** on pRowSet->pEntry, then sort those entries into the forest at
+** pRowSet->pForest so that they can be tested.
+*/
+SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64 iRowid){
+  struct RowSetEntry *p, *pTree;
+
+  /* This routine is never called after sqlite3RowSetNext() */
+  assert( pRowSet!=0 && (pRowSet->rsFlags & ROWSET_NEXT)==0 );
+
+  /* Sort entries into the forest on the first test of a new batch 
+  */
+  if( iBatch!=pRowSet->iBatch ){
+    p = pRowSet->pEntry;
+    if( p ){
+      struct RowSetEntry **ppPrevTree = &pRowSet->pForest;
+      if( (pRowSet->rsFlags & ROWSET_SORTED)==0 ){
+        p = rowSetEntrySort(p);
+      }
+      for(pTree = pRowSet->pForest; pTree; pTree=pTree->pRight){
+        ppPrevTree = &pTree->pRight;
+        if( pTree->pLeft==0 ){
+          pTree->pLeft = rowSetListToTree(p);
+          break;
+        }else{
+          struct RowSetEntry *pAux, *pTail;
+          rowSetTreeToList(pTree->pLeft, &pAux, &pTail);
+          pTree->pLeft = 0;
+          p = rowSetEntryMerge(pAux, p);
+        }
+      }
+      if( pTree==0 ){
+        *ppPrevTree = pTree = rowSetEntryAlloc(pRowSet);
+        if( pTree ){
+          pTree->v = 0;
+          pTree->pRight = 0;
+          pTree->pLeft = rowSetListToTree(p);
+        }
+      }
+      pRowSet->pEntry = 0;
+      pRowSet->pLast = 0;
+      pRowSet->rsFlags |= ROWSET_SORTED;
+    }
+    pRowSet->iBatch = iBatch;
+  }
+
+  /* Test to see if the iRowid value appears anywhere in the forest.
+  ** Return 1 if it does and 0 if not.
+  */
+  for(pTree = pRowSet->pForest; pTree; pTree=pTree->pRight){
+    p = pTree->pLeft;
+    while( p ){
+      if( p->v<iRowid ){
+        p = p->pRight;
+      }else if( p->v>iRowid ){
+        p = p->pLeft;
+      }else{
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+/************** End of rowset.c **********************************************/
+/************** Begin file pager.c *******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This is the implementation of the page cache subsystem or "pager".
+** 
+** The pager is used to access a database disk file.  It implements
+** atomic commit and rollback through the use of a journal file that
+** is separate from the database file.  The pager also implements file
+** locking to prevent two processes from writing the same database
+** file simultaneously, or one process from reading the database while
+** another is writing.
+*/
+#ifndef SQLITE_OMIT_DISKIO
+/************** Include wal.h in the middle of pager.c ***********************/
+/************** Begin file wal.h *********************************************/
+/*
+** 2010 February 1
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This header file defines the interface to the write-ahead logging 
+** system. Refer to the comments below and the header comment attached to 
+** the implementation of each function in log.c for further details.
+*/
+
+#ifndef _WAL_H_
+#define _WAL_H_
+
+
+/* Additional values that can be added to the sync_flags argument of
+** sqlite3WalFrames():
+*/
+#define WAL_SYNC_TRANSACTIONS  0x20   /* Sync at the end of each transaction */
+#define SQLITE_SYNC_MASK       0x13   /* Mask off the SQLITE_SYNC_* values */
+
+#ifdef SQLITE_OMIT_WAL
+# define sqlite3WalOpen(x,y,z)                   0
+# define sqlite3WalLimit(x,y)
+# define sqlite3WalClose(w,x,y,z)                0
+# define sqlite3WalBeginReadTransaction(y,z)     0
+# define sqlite3WalEndReadTransaction(z)
+# define sqlite3WalDbsize(y)                     0
+# define sqlite3WalBeginWriteTransaction(y)      0
+# define sqlite3WalEndWriteTransaction(x)        0
+# define sqlite3WalUndo(x,y,z)                   0
+# define sqlite3WalSavepoint(y,z)
+# define sqlite3WalSavepointUndo(y,z)            0
+# define sqlite3WalFrames(u,v,w,x,y,z)           0
+# define sqlite3WalCheckpoint(r,s,t,u,v,w,x,y,z) 0
+# define sqlite3WalCallback(z)                   0
+# define sqlite3WalExclusiveMode(y,z)            0
+# define sqlite3WalHeapMemory(z)                 0
+# define sqlite3WalFramesize(z)                  0
+# define sqlite3WalFindFrame(x,y,z)              0
+#else
+
+#define WAL_SAVEPOINT_NDATA 4
+
+/* Connection to a write-ahead log (WAL) file. 
+** There is one object of this type for each pager. 
+*/
+typedef struct Wal Wal;
+
+/* Open and close a connection to a write-ahead log. */
+SQLITE_PRIVATE int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *, int, i64, Wal**);
+SQLITE_PRIVATE int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *);
+
+/* Set the limiting size of a WAL file. */
+SQLITE_PRIVATE void sqlite3WalLimit(Wal*, i64);
+
+/* Used by readers to open (lock) and close (unlock) a snapshot.  A 
+** snapshot is like a read-transaction.  It is the state of the database
+** at an instant in time.  sqlite3WalOpenSnapshot gets a read lock and
+** preserves the current state even if the other threads or processes
+** write to or checkpoint the WAL.  sqlite3WalCloseSnapshot() closes the
+** transaction and releases the lock.
+*/
+SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *);
+SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal);
+
+/* Read a page from the write-ahead log, if it is present. */
+SQLITE_PRIVATE int sqlite3WalFindFrame(Wal *, Pgno, u32 *);
+SQLITE_PRIVATE int sqlite3WalReadFrame(Wal *, u32, int, u8 *);
+
+/* If the WAL is not empty, return the size of the database. */
+SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal);
+
+/* Obtain or release the WRITER lock. */
+SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal);
+SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal);
+
+/* Undo any frames written (but not committed) to the log */
+SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx);
+
+/* Return an integer that records the current (uncommitted) write
+** position in the WAL */
+SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData);
+
+/* Move the write position of the WAL back to iFrame.  Called in
+** response to a ROLLBACK TO command. */
+SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);
+
+/* Write a frame or frames to the log. */
+SQLITE_PRIVATE int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);
+
+/* Copy pages from the log to the database file */ 
+SQLITE_PRIVATE int sqlite3WalCheckpoint(
+  Wal *pWal,                      /* Write-ahead log connection */
+  int eMode,                      /* One of PASSIVE, FULL and RESTART */
+  int (*xBusy)(void*),            /* Function to call when busy */
+  void *pBusyArg,                 /* Context argument for xBusyHandler */
+  int sync_flags,                 /* Flags to sync db file with (or 0) */
+  int nBuf,                       /* Size of buffer nBuf */
+  u8 *zBuf,                       /* Temporary buffer to use */
+  int *pnLog,                     /* OUT: Number of frames in WAL */
+  int *pnCkpt                     /* OUT: Number of backfilled frames in WAL */
+);
+
+/* Return the value to pass to a sqlite3_wal_hook callback, the
+** number of frames in the WAL at the point of the last commit since
+** sqlite3WalCallback() was called.  If no commits have occurred since
+** the last call, then return 0.
+*/
+SQLITE_PRIVATE int sqlite3WalCallback(Wal *pWal);
+
+/* Tell the wal layer that an EXCLUSIVE lock has been obtained (or released)
+** by the pager layer on the database file.
+*/
+SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op);
+
+/* Return true if the argument is non-NULL and the WAL module is using
+** heap-memory for the wal-index. Otherwise, if the argument is NULL or the
+** WAL module is using shared-memory, return false. 
+*/
+SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal);
+
+#ifdef SQLITE_ENABLE_ZIPVFS
+/* If the WAL file is not empty, return the number of bytes of content
+** stored in each frame (i.e. the db page-size when the WAL was created).
+*/
+SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal);
+#endif
+
+#endif /* ifndef SQLITE_OMIT_WAL */
+#endif /* _WAL_H_ */
+
+/************** End of wal.h *************************************************/
+/************** Continuing where we left off in pager.c **********************/
+
+
+/******************* NOTES ON THE DESIGN OF THE PAGER ************************
+**
+** This comment block describes invariants that hold when using a rollback
+** journal.  These invariants do not apply for journal_mode=WAL,
+** journal_mode=MEMORY, or journal_mode=OFF.
+**
+** Within this comment block, a page is deemed to have been synced
+** automatically as soon as it is written when PRAGMA synchronous=OFF.
+** Otherwise, the page is not synced until the xSync method of the VFS
+** is called successfully on the file containing the page.
+**
+** Definition:  A page of the database file is said to be "overwriteable" if
+** one or more of the following are true about the page:
+** 
+**     (a)  The original content of the page as it was at the beginning of
+**          the transaction has been written into the rollback journal and
+**          synced.
+** 
+**     (b)  The page was a freelist leaf page at the start of the transaction.
+** 
+**     (c)  The page number is greater than the largest page that existed in
+**          the database file at the start of the transaction.
+** 
+** (1) A page of the database file is never overwritten unless one of the
+**     following are true:
+** 
+**     (a) The page and all other pages on the same sector are overwriteable.
+** 
+**     (b) The atomic page write optimization is enabled, and the entire
+**         transaction other than the update of the transaction sequence
+**         number consists of a single page change.
+** 
+** (2) The content of a page written into the rollback journal exactly matches
+**     both the content in the database when the rollback journal was written
+**     and the content in the database at the beginning of the current
+**     transaction.
+** 
+** (3) Writes to the database file are an integer multiple of the page size
+**     in length and are aligned on a page boundary.
+** 
+** (4) Reads from the database file are either aligned on a page boundary and
+**     an integer multiple of the page size in length or are taken from the
+**     first 100 bytes of the database file.
+** 
+** (5) All writes to the database file are synced prior to the rollback journal
+**     being deleted, truncated, or zeroed.
+** 
+** (6) If a master journal file is used, then all writes to the database file
+**     are synced prior to the master journal being deleted.
+** 
+** Definition: Two databases (or the same database at two points it time)
+** are said to be "logically equivalent" if they give the same answer to
+** all queries.  Note in particular the content of freelist leaf
+** pages can be changed arbitrarily without affecting the logical equivalence
+** of the database.
+** 
+** (7) At any time, if any subset, including the empty set and the total set,
+**     of the unsynced changes to a rollback journal are removed and the 
+**     journal is rolled back, the resulting database file will be logically
+**     equivalent to the database file at the beginning of the transaction.
+** 
+** (8) When a transaction is rolled back, the xTruncate method of the VFS
+**     is called to restore the database file to the same size it was at
+**     the beginning of the transaction.  (In some VFSes, the xTruncate
+**     method is a no-op, but that does not change the fact the SQLite will
+**     invoke it.)
+** 
+** (9) Whenever the database file is modified, at least one bit in the range
+**     of bytes from 24 through 39 inclusive will be changed prior to releasing
+**     the EXCLUSIVE lock, thus signaling other connections on the same
+**     database to flush their caches.
+**
+** (10) The pattern of bits in bytes 24 through 39 shall not repeat in less
+**      than one billion transactions.
+**
+** (11) A database file is well-formed at the beginning and at the conclusion
+**      of every transaction.
+**
+** (12) An EXCLUSIVE lock is held on the database file when writing to
+**      the database file.
+**
+** (13) A SHARED lock is held on the database file while reading any
+**      content out of the database file.
+**
+******************************************************************************/
+
+/*
+** Macros for troubleshooting.  Normally turned off
+*/
+#if 0
+int sqlite3PagerTrace=1;  /* True to enable tracing */
+#define sqlite3DebugPrintf printf
+#define PAGERTRACE(X)     if( sqlite3PagerTrace ){ sqlite3DebugPrintf X; }
+#else
+#define PAGERTRACE(X)
+#endif
+
+/*
+** The following two macros are used within the PAGERTRACE() macros above
+** to print out file-descriptors. 
+**
+** PAGERID() takes a pointer to a Pager struct as its argument. The
+** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
+** struct as its argument.
+*/
+#define PAGERID(p) ((int)(p->fd))
+#define FILEHANDLEID(fd) ((int)fd)
+
+/*
+** The Pager.eState variable stores the current 'state' of a pager. A
+** pager may be in any one of the seven states shown in the following
+** state diagram.
+**
+**                            OPEN <------+------+
+**                              |         |      |
+**                              V         |      |
+**               +---------> READER-------+      |
+**               |              |                |
+**               |              V                |
+**               |<-------WRITER_LOCKED------> ERROR
+**               |              |                ^  
+**               |              V                |
+**               |<------WRITER_CACHEMOD-------->|
+**               |              |                |
+**               |              V                |
+**               |<-------WRITER_DBMOD---------->|
+**               |              |                |
+**               |              V                |
+**               +<------WRITER_FINISHED-------->+
+**
+**
+** List of state transitions and the C [function] that performs each:
+** 
+**   OPEN              -> READER              [sqlite3PagerSharedLock]
+**   READER            -> OPEN                [pager_unlock]
+**
+**   READER            -> WRITER_LOCKED       [sqlite3PagerBegin]
+**   WRITER_LOCKED     -> WRITER_CACHEMOD     [pager_open_journal]
+**   WRITER_CACHEMOD   -> WRITER_DBMOD        [syncJournal]
+**   WRITER_DBMOD      -> WRITER_FINISHED     [sqlite3PagerCommitPhaseOne]
+**   WRITER_***        -> READER              [pager_end_transaction]
+**
+**   WRITER_***        -> ERROR               [pager_error]
+**   ERROR             -> OPEN                [pager_unlock]
+** 
+**
+**  OPEN:
+**
+**    The pager starts up in this state. Nothing is guaranteed in this
+**    state - the file may or may not be locked and the database size is
+**    unknown. The database may not be read or written.
+**
+**    * No read or write transaction is active.
+**    * Any lock, or no lock at all, may be held on the database file.
+**    * The dbSize, dbOrigSize and dbFileSize variables may not be trusted.
+**
+**  READER:
+**
+**    In this state all the requirements for reading the database in 
+**    rollback (non-WAL) mode are met. Unless the pager is (or recently
+**    was) in exclusive-locking mode, a user-level read transaction is 
+**    open. The database size is known in this state.
+**
+**    A connection running with locking_mode=normal enters this state when
+**    it opens a read-transaction on the database and returns to state
+**    OPEN after the read-transaction is completed. However a connection
+**    running in locking_mode=exclusive (including temp databases) remains in
+**    this state even after the read-transaction is closed. The only way
+**    a locking_mode=exclusive connection can transition from READER to OPEN
+**    is via the ERROR state (see below).
+** 
+**    * A read transaction may be active (but a write-transaction cannot).
+**    * A SHARED or greater lock is held on the database file.
+**    * The dbSize variable may be trusted (even if a user-level read 
+**      transaction is not active). The dbOrigSize and dbFileSize variables
+**      may not be trusted at this point.
+**    * If the database is a WAL database, then the WAL connection is open.
+**    * Even if a read-transaction is not open, it is guaranteed that 
+**      there is no hot-journal in the file-system.
+**
+**  WRITER_LOCKED:
+**
+**    The pager moves to this state from READER when a write-transaction
+**    is first opened on the database. In WRITER_LOCKED state, all locks 
+**    required to start a write-transaction are held, but no actual 
+**    modifications to the cache or database have taken place.
+**
+**    In rollback mode, a RESERVED or (if the transaction was opened with 
+**    BEGIN EXCLUSIVE) EXCLUSIVE lock is obtained on the database file when
+**    moving to this state, but the journal file is not written to or opened 
+**    to in this state. If the transaction is committed or rolled back while 
+**    in WRITER_LOCKED state, all that is required is to unlock the database 
+**    file.
+**
+**    IN WAL mode, WalBeginWriteTransaction() is called to lock the log file.
+**    If the connection is running with locking_mode=exclusive, an attempt
+**    is made to obtain an EXCLUSIVE lock on the database file.
+**
+**    * A write transaction is active.
+**    * If the connection is open in rollback-mode, a RESERVED or greater 
+**      lock is held on the database file.
+**    * If the connection is open in WAL-mode, a WAL write transaction
+**      is open (i.e. sqlite3WalBeginWriteTransaction() has been successfully
+**      called).
+**    * The dbSize, dbOrigSize and dbFileSize variables are all valid.
+**    * The contents of the pager cache have not been modified.
+**    * The journal file may or may not be open.
+**    * Nothing (not even the first header) has been written to the journal.
+**
+**  WRITER_CACHEMOD:
+**
+**    A pager moves from WRITER_LOCKED state to this state when a page is
+**    first modified by the upper layer. In rollback mode the journal file
+**    is opened (if it is not already open) and a header written to the
+**    start of it. The database file on disk has not been modified.
+**
+**    * A write transaction is active.
+**    * A RESERVED or greater lock is held on the database file.
+**    * The journal file is open and the first header has been written 
+**      to it, but the header has not been synced to disk.
+**    * The contents of the page cache have been modified.
+**
+**  WRITER_DBMOD:
+**
+**    The pager transitions from WRITER_CACHEMOD into WRITER_DBMOD state
+**    when it modifies the contents of the database file. WAL connections
+**    never enter this state (since they do not modify the database file,
+**    just the log file).
+**
+**    * A write transaction is active.
+**    * An EXCLUSIVE or greater lock is held on the database file.
+**    * The journal file is open and the first header has been written 
+**      and synced to disk.
+**    * The contents of the page cache have been modified (and possibly
+**      written to disk).
+**
+**  WRITER_FINISHED:
+**
+**    It is not possible for a WAL connection to enter this state.
+**
+**    A rollback-mode pager changes to WRITER_FINISHED state from WRITER_DBMOD
+**    state after the entire transaction has been successfully written into the
+**    database file. In this state the transaction may be committed simply
+**    by finalizing the journal file. Once in WRITER_FINISHED state, it is 
+**    not possible to modify the database further. At this point, the upper 
+**    layer must either commit or rollback the transaction.
+**
+**    * A write transaction is active.
+**    * An EXCLUSIVE or greater lock is held on the database file.
+**    * All writing and syncing of journal and database data has finished.
+**      If no error occurred, all that remains is to finalize the journal to
+**      commit the transaction. If an error did occur, the caller will need
+**      to rollback the transaction. 
+**
+**  ERROR:
+**
+**    The ERROR state is entered when an IO or disk-full error (including
+**    SQLITE_IOERR_NOMEM) occurs at a point in the code that makes it 
+**    difficult to be sure that the in-memory pager state (cache contents, 
+**    db size etc.) are consistent with the contents of the file-system.
+**
+**    Temporary pager files may enter the ERROR state, but in-memory pagers
+**    cannot.
+**
+**    For example, if an IO error occurs while performing a rollback, 
+**    the contents of the page-cache may be left in an inconsistent state.
+**    At this point it would be dangerous to change back to READER state
+**    (as usually happens after a rollback). Any subsequent readers might
+**    report database corruption (due to the inconsistent cache), and if
+**    they upgrade to writers, they may inadvertently corrupt the database
+**    file. To avoid this hazard, the pager switches into the ERROR state
+**    instead of READER following such an error.
+**
+**    Once it has entered the ERROR state, any attempt to use the pager
+**    to read or write data returns an error. Eventually, once all 
+**    outstanding transactions have been abandoned, the pager is able to
+**    transition back to OPEN state, discarding the contents of the 
+**    page-cache and any other in-memory state at the same time. Everything
+**    is reloaded from disk (and, if necessary, hot-journal rollback peformed)
+**    when a read-transaction is next opened on the pager (transitioning
+**    the pager into READER state). At that point the system has recovered 
+**    from the error.
+**
+**    Specifically, the pager jumps into the ERROR state if:
+**
+**      1. An error occurs while attempting a rollback. This happens in
+**         function sqlite3PagerRollback().
+**
+**      2. An error occurs while attempting to finalize a journal file
+**         following a commit in function sqlite3PagerCommitPhaseTwo().
+**
+**      3. An error occurs while attempting to write to the journal or
+**         database file in function pagerStress() in order to free up
+**         memory.
+**
+**    In other cases, the error is returned to the b-tree layer. The b-tree
+**    layer then attempts a rollback operation. If the error condition 
+**    persists, the pager enters the ERROR state via condition (1) above.
+**
+**    Condition (3) is necessary because it can be triggered by a read-only
+**    statement executed within a transaction. In this case, if the error
+**    code were simply returned to the user, the b-tree layer would not
+**    automatically attempt a rollback, as it assumes that an error in a
+**    read-only statement cannot leave the pager in an internally inconsistent 
+**    state.
+**
+**    * The Pager.errCode variable is set to something other than SQLITE_OK.
+**    * There are one or more outstanding references to pages (after the
+**      last reference is dropped the pager should move back to OPEN state).
+**    * The pager is not an in-memory pager.
+**    
+**
+** Notes:
+**
+**   * A pager is never in WRITER_DBMOD or WRITER_FINISHED state if the
+**     connection is open in WAL mode. A WAL connection is always in one
+**     of the first four states.
+**
+**   * Normally, a connection open in exclusive mode is never in PAGER_OPEN
+**     state. There are two exceptions: immediately after exclusive-mode has
+**     been turned on (and before any read or write transactions are 
+**     executed), and when the pager is leaving the "error state".
+**
+**   * See also: assert_pager_state().
+*/
+#define PAGER_OPEN                  0
+#define PAGER_READER                1
+#define PAGER_WRITER_LOCKED         2
+#define PAGER_WRITER_CACHEMOD       3
+#define PAGER_WRITER_DBMOD          4
+#define PAGER_WRITER_FINISHED       5
+#define PAGER_ERROR                 6
+
+/*
+** The Pager.eLock variable is almost always set to one of the 
+** following locking-states, according to the lock currently held on
+** the database file: NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK.
+** This variable is kept up to date as locks are taken and released by
+** the pagerLockDb() and pagerUnlockDb() wrappers.
+**
+** If the VFS xLock() or xUnlock() returns an error other than SQLITE_BUSY
+** (i.e. one of the SQLITE_IOERR subtypes), it is not clear whether or not
+** the operation was successful. In these circumstances pagerLockDb() and
+** pagerUnlockDb() take a conservative approach - eLock is always updated
+** when unlocking the file, and only updated when locking the file if the
+** VFS call is successful. This way, the Pager.eLock variable may be set
+** to a less exclusive (lower) value than the lock that is actually held
+** at the system level, but it is never set to a more exclusive value.
+**
+** This is usually safe. If an xUnlock fails or appears to fail, there may 
+** be a few redundant xLock() calls or a lock may be held for longer than
+** required, but nothing really goes wrong.
+**
+** The exception is when the database file is unlocked as the pager moves
+** from ERROR to OPEN state. At this point there may be a hot-journal file 
+** in the file-system that needs to be rolled back (as part of an OPEN->SHARED
+** transition, by the same pager or any other). If the call to xUnlock()
+** fails at this point and the pager is left holding an EXCLUSIVE lock, this
+** can confuse the call to xCheckReservedLock() call made later as part
+** of hot-journal detection.
+**
+** xCheckReservedLock() is defined as returning true "if there is a RESERVED 
+** lock held by this process or any others". So xCheckReservedLock may 
+** return true because the caller itself is holding an EXCLUSIVE lock (but
+** doesn't know it because of a previous error in xUnlock). If this happens
+** a hot-journal may be mistaken for a journal being created by an active
+** transaction in another process, causing SQLite to read from the database
+** without rolling it back.
+**
+** To work around this, if a call to xUnlock() fails when unlocking the
+** database in the ERROR state, Pager.eLock is set to UNKNOWN_LOCK. It
+** is only changed back to a real locking state after a successful call
+** to xLock(EXCLUSIVE). Also, the code to do the OPEN->SHARED state transition
+** omits the check for a hot-journal if Pager.eLock is set to UNKNOWN_LOCK 
+** lock. Instead, it assumes a hot-journal exists and obtains an EXCLUSIVE
+** lock on the database file before attempting to roll it back. See function
+** PagerSharedLock() for more detail.
+**
+** Pager.eLock may only be set to UNKNOWN_LOCK when the pager is in 
+** PAGER_OPEN state.
+*/
+#define UNKNOWN_LOCK                (EXCLUSIVE_LOCK+1)
+
+/*
+** A macro used for invoking the codec if there is one
+*/
+#ifdef SQLITE_HAS_CODEC
+# define CODEC1(P,D,N,X,E) \
+    if( P->xCodec && P->xCodec(P->pCodec,D,N,X)==0 ){ E; }
+# define CODEC2(P,D,N,X,E,O) \
+    if( P->xCodec==0 ){ O=(char*)D; }else \
+    if( (O=(char*)(P->xCodec(P->pCodec,D,N,X)))==0 ){ E; }
+#else
+# define CODEC1(P,D,N,X,E)   /* NO-OP */
+# define CODEC2(P,D,N,X,E,O) O=(char*)D
+#endif
+
+/*
+** The maximum allowed sector size. 64KiB. If the xSectorsize() method 
+** returns a value larger than this, then MAX_SECTOR_SIZE is used instead.
+** This could conceivably cause corruption following a power failure on
+** such a system. This is currently an undocumented limit.
+*/
+#define MAX_SECTOR_SIZE 0x10000
+
+/*
+** An instance of the following structure is allocated for each active
+** savepoint and statement transaction in the system. All such structures
+** are stored in the Pager.aSavepoint[] array, which is allocated and
+** resized using sqlite3Realloc().
+**
+** When a savepoint is created, the PagerSavepoint.iHdrOffset field is
+** set to 0. If a journal-header is written into the main journal while
+** the savepoint is active, then iHdrOffset is set to the byte offset 
+** immediately following the last journal record written into the main
+** journal before the journal-header. This is required during savepoint
+** rollback (see pagerPlaybackSavepoint()).
+*/
+typedef struct PagerSavepoint PagerSavepoint;
+struct PagerSavepoint {
+  i64 iOffset;                 /* Starting offset in main journal */
+  i64 iHdrOffset;              /* See above */
+  Bitvec *pInSavepoint;        /* Set of pages in this savepoint */
+  Pgno nOrig;                  /* Original number of pages in file */
+  Pgno iSubRec;                /* Index of first record in sub-journal */
+#ifndef SQLITE_OMIT_WAL
+  u32 aWalData[WAL_SAVEPOINT_NDATA];        /* WAL savepoint context */
+#endif
+};
+
+/*
+** Bits of the Pager.doNotSpill flag.  See further description below.
+*/
+#define SPILLFLAG_OFF         0x01      /* Never spill cache.  Set via pragma */
+#define SPILLFLAG_ROLLBACK    0x02      /* Current rolling back, so do not spill */
+#define SPILLFLAG_NOSYNC      0x04      /* Spill is ok, but do not sync */
+
+/*
+** An open page cache is an instance of struct Pager. A description of
+** some of the more important member variables follows:
+**
+** eState
+**
+**   The current 'state' of the pager object. See the comment and state
+**   diagram above for a description of the pager state.
+**
+** eLock
+**
+**   For a real on-disk database, the current lock held on the database file -
+**   NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK.
+**
+**   For a temporary or in-memory database (neither of which require any
+**   locks), this variable is always set to EXCLUSIVE_LOCK. Since such
+**   databases always have Pager.exclusiveMode==1, this tricks the pager
+**   logic into thinking that it already has all the locks it will ever
+**   need (and no reason to release them).
+**
+**   In some (obscure) circumstances, this variable may also be set to
+**   UNKNOWN_LOCK. See the comment above the #define of UNKNOWN_LOCK for
+**   details.
+**
+** changeCountDone
+**
+**   This boolean variable is used to make sure that the change-counter 
+**   (the 4-byte header field at byte offset 24 of the database file) is 
+**   not updated more often than necessary. 
+**
+**   It is set to true when the change-counter field is updated, which 
+**   can only happen if an exclusive lock is held on the database file.
+**   It is cleared (set to false) whenever an exclusive lock is 
+**   relinquished on the database file. Each time a transaction is committed,
+**   The changeCountDone flag is inspected. If it is true, the work of
+**   updating the change-counter is omitted for the current transaction.
+**
+**   This mechanism means that when running in exclusive mode, a connection 
+**   need only update the change-counter once, for the first transaction
+**   committed.
+**
+** setMaster
+**
+**   When PagerCommitPhaseOne() is called to commit a transaction, it may
+**   (or may not) specify a master-journal name to be written into the 
+**   journal file before it is synced to disk.
+**
+**   Whether or not a journal file contains a master-journal pointer affects 
+**   the way in which the journal file is finalized after the transaction is 
+**   committed or rolled back when running in "journal_mode=PERSIST" mode.
+**   If a journal file does not contain a master-journal pointer, it is
+**   finalized by overwriting the first journal header with zeroes. If
+**   it does contain a master-journal pointer the journal file is finalized 
+**   by truncating it to zero bytes, just as if the connection were 
+**   running in "journal_mode=truncate" mode.
+**
+**   Journal files that contain master journal pointers cannot be finalized
+**   simply by overwriting the first journal-header with zeroes, as the
+**   master journal pointer could interfere with hot-journal rollback of any
+**   subsequently interrupted transaction that reuses the journal file.
+**
+**   The flag is cleared as soon as the journal file is finalized (either
+**   by PagerCommitPhaseTwo or PagerRollback). If an IO error prevents the
+**   journal file from being successfully finalized, the setMaster flag
+**   is cleared anyway (and the pager will move to ERROR state).
+**
+** doNotSpill
+**
+**   This variables control the behavior of cache-spills  (calls made by
+**   the pcache module to the pagerStress() routine to write cached data
+**   to the file-system in order to free up memory).
+**
+**   When bits SPILLFLAG_OFF or SPILLFLAG_ROLLBACK of doNotSpill are set,
+**   writing to the database from pagerStress() is disabled altogether.
+**   The SPILLFLAG_ROLLBACK case is done in a very obscure case that
+**   comes up during savepoint rollback that requires the pcache module
+**   to allocate a new page to prevent the journal file from being written
+**   while it is being traversed by code in pager_playback().  The SPILLFLAG_OFF
+**   case is a user preference.
+** 
+**   If the SPILLFLAG_NOSYNC bit is set, writing to the database from pagerStress()
+**   is permitted, but syncing the journal file is not. This flag is set
+**   by sqlite3PagerWrite() when the file-system sector-size is larger than
+**   the database page-size in order to prevent a journal sync from happening 
+**   in between the journalling of two pages on the same sector. 
+**
+** subjInMemory
+**
+**   This is a boolean variable. If true, then any required sub-journal
+**   is opened as an in-memory journal file. If false, then in-memory
+**   sub-journals are only used for in-memory pager files.
+**
+**   This variable is updated by the upper layer each time a new 
+**   write-transaction is opened.
+**
+** dbSize, dbOrigSize, dbFileSize
+**
+**   Variable dbSize is set to the number of pages in the database file.
+**   It is valid in PAGER_READER and higher states (all states except for
+**   OPEN and ERROR). 
+**
+**   dbSize is set based on the size of the database file, which may be 
+**   larger than the size of the database (the value stored at offset
+**   28 of the database header by the btree). If the size of the file
+**   is not an integer multiple of the page-size, the value stored in
+**   dbSize is rounded down (i.e. a 5KB file with 2K page-size has dbSize==2).
+**   Except, any file that is greater than 0 bytes in size is considered
+**   to have at least one page. (i.e. a 1KB file with 2K page-size leads
+**   to dbSize==1).
+**
+**   During a write-transaction, if pages with page-numbers greater than
+**   dbSize are modified in the cache, dbSize is updated accordingly.
+**   Similarly, if the database is truncated using PagerTruncateImage(), 
+**   dbSize is updated.
+**
+**   Variables dbOrigSize and dbFileSize are valid in states 
+**   PAGER_WRITER_LOCKED and higher. dbOrigSize is a copy of the dbSize
+**   variable at the start of the transaction. It is used during rollback,
+**   and to determine whether or not pages need to be journalled before
+**   being modified.
+**
+**   Throughout a write-transaction, dbFileSize contains the size of
+**   the file on disk in pages. It is set to a copy of dbSize when the
+**   write-transaction is first opened, and updated when VFS calls are made
+**   to write or truncate the database file on disk. 
+**
+**   The only reason the dbFileSize variable is required is to suppress 
+**   unnecessary calls to xTruncate() after committing a transaction. If, 
+**   when a transaction is committed, the dbFileSize variable indicates 
+**   that the database file is larger than the database image (Pager.dbSize), 
+**   pager_truncate() is called. The pager_truncate() call uses xFilesize()
+**   to measure the database file on disk, and then truncates it if required.
+**   dbFileSize is not used when rolling back a transaction. In this case
+**   pager_truncate() is called unconditionally (which means there may be
+**   a call to xFilesize() that is not strictly required). In either case,
+**   pager_truncate() may cause the file to become smaller or larger.
+**
+** dbHintSize
+**
+**   The dbHintSize variable is used to limit the number of calls made to
+**   the VFS xFileControl(FCNTL_SIZE_HINT) method. 
+**
+**   dbHintSize is set to a copy of the dbSize variable when a
+**   write-transaction is opened (at the same time as dbFileSize and
+**   dbOrigSize). If the xFileControl(FCNTL_SIZE_HINT) method is called,
+**   dbHintSize is increased to the number of pages that correspond to the
+**   size-hint passed to the method call. See pager_write_pagelist() for 
+**   details.
+**
+** errCode
+**
+**   The Pager.errCode variable is only ever used in PAGER_ERROR state. It
+**   is set to zero in all other states. In PAGER_ERROR state, Pager.errCode 
+**   is always set to SQLITE_FULL, SQLITE_IOERR or one of the SQLITE_IOERR_XXX 
+**   sub-codes.
+*/
+struct Pager {
+  sqlite3_vfs *pVfs;          /* OS functions to use for IO */
+  u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
+  u8 journalMode;             /* One of the PAGER_JOURNALMODE_* values */
+  u8 useJournal;              /* Use a rollback journal on this file */
+  u8 noSync;                  /* Do not sync the journal if true */
+  u8 fullSync;                /* Do extra syncs of the journal for robustness */
+  u8 ckptSyncFlags;           /* SYNC_NORMAL or SYNC_FULL for checkpoint */
+  u8 walSyncFlags;            /* SYNC_NORMAL or SYNC_FULL for wal writes */
+  u8 syncFlags;               /* SYNC_NORMAL or SYNC_FULL otherwise */
+  u8 tempFile;                /* zFilename is a temporary or immutable file */
+  u8 noLock;                  /* Do not lock (except in WAL mode) */
+  u8 readOnly;                /* True for a read-only database */
+  u8 memDb;                   /* True to inhibit all file I/O */
+
+  /**************************************************************************
+  ** The following block contains those class members that change during
+  ** routine operation.  Class members not in this block are either fixed
+  ** when the pager is first created or else only change when there is a
+  ** significant mode change (such as changing the page_size, locking_mode,
+  ** or the journal_mode).  From another view, these class members describe
+  ** the "state" of the pager, while other class members describe the
+  ** "configuration" of the pager.
+  */
+  u8 eState;                  /* Pager state (OPEN, READER, WRITER_LOCKED..) */
+  u8 eLock;                   /* Current lock held on database file */
+  u8 changeCountDone;         /* Set after incrementing the change-counter */
+  u8 setMaster;               /* True if a m-j name has been written to jrnl */
+  u8 doNotSpill;              /* Do not spill the cache when non-zero */
+  u8 subjInMemory;            /* True to use in-memory sub-journals */
+  u8 bUseFetch;               /* True to use xFetch() */
+  u8 hasBeenUsed;             /* True if any content previously read from this pager*/
+  Pgno dbSize;                /* Number of pages in the database */
+  Pgno dbOrigSize;            /* dbSize before the current transaction */
+  Pgno dbFileSize;            /* Number of pages in the database file */
+  Pgno dbHintSize;            /* Value passed to FCNTL_SIZE_HINT call */
+  int errCode;                /* One of several kinds of errors */
+  int nRec;                   /* Pages journalled since last j-header written */
+  u32 cksumInit;              /* Quasi-random value added to every checksum */
+  u32 nSubRec;                /* Number of records written to sub-journal */
+  Bitvec *pInJournal;         /* One bit for each page in the database file */
+  sqlite3_file *fd;           /* File descriptor for database */
+  sqlite3_file *jfd;          /* File descriptor for main journal */
+  sqlite3_file *sjfd;         /* File descriptor for sub-journal */
+  i64 journalOff;             /* Current write offset in the journal file */
+  i64 journalHdr;             /* Byte offset to previous journal header */
+  sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
+  PagerSavepoint *aSavepoint; /* Array of active savepoints */
+  int nSavepoint;             /* Number of elements in aSavepoint[] */
+  u32 iDataVersion;           /* Changes whenever database content changes */
+  char dbFileVers[16];        /* Changes whenever database file changes */
+
+  int nMmapOut;               /* Number of mmap pages currently outstanding */
+  sqlite3_int64 szMmap;       /* Desired maximum mmap size */
+  PgHdr *pMmapFreelist;       /* List of free mmap page headers (pDirty) */
+  /*
+  ** End of the routinely-changing class members
+  ***************************************************************************/
+
+  u16 nExtra;                 /* Add this many bytes to each in-memory page */
+  i16 nReserve;               /* Number of unused bytes at end of each page */
+  u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
+  u32 sectorSize;             /* Assumed sector size during rollback */
+  int pageSize;               /* Number of bytes in a page */
+  Pgno mxPgno;                /* Maximum allowed size of the database */
+  i64 journalSizeLimit;       /* Size limit for persistent journal files */
+  char *zFilename;            /* Name of the database file */
+  char *zJournal;             /* Name of the journal file */
+  int (*xBusyHandler)(void*); /* Function to call when busy */
+  void *pBusyHandlerArg;      /* Context argument for xBusyHandler */
+  int aStat[3];               /* Total cache hits, misses and writes */
+#ifdef SQLITE_TEST
+  int nRead;                  /* Database pages read */
+#endif
+  void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
+#ifdef SQLITE_HAS_CODEC
+  void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
+  void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
+  void (*xCodecFree)(void*);             /* Destructor for the codec */
+  void *pCodec;               /* First argument to xCodec... methods */
+#endif
+  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
+  PCache *pPCache;            /* Pointer to page cache object */
+#ifndef SQLITE_OMIT_WAL
+  Wal *pWal;                  /* Write-ahead log used by "journal_mode=wal" */
+  char *zWal;                 /* File name for write-ahead log */
+#endif
+};
+
+/*
+** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains
+** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS 
+** or CACHE_WRITE to sqlite3_db_status().
+*/
+#define PAGER_STAT_HIT   0
+#define PAGER_STAT_MISS  1
+#define PAGER_STAT_WRITE 2
+
+/*
+** The following global variables hold counters used for
+** testing purposes only.  These variables do not exist in
+** a non-testing build.  These variables are not thread-safe.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_pager_readdb_count = 0;    /* Number of full pages read from DB */
+SQLITE_API int sqlite3_pager_writedb_count = 0;   /* Number of full pages written to DB */
+SQLITE_API int sqlite3_pager_writej_count = 0;    /* Number of pages written to journal */
+# define PAGER_INCR(v)  v++
+#else
+# define PAGER_INCR(v)
+#endif
+
+
+
+/*
+** Journal files begin with the following magic string.  The data
+** was obtained from /dev/random.  It is used only as a sanity check.
+**
+** Since version 2.8.0, the journal format contains additional sanity
+** checking information.  If the power fails while the journal is being
+** written, semi-random garbage data might appear in the journal
+** file after power is restored.  If an attempt is then made
+** to roll the journal back, the database could be corrupted.  The additional
+** sanity checking data is an attempt to discover the garbage in the
+** journal and ignore it.
+**
+** The sanity checking information for the new journal format consists
+** of a 32-bit checksum on each page of data.  The checksum covers both
+** the page number and the pPager->pageSize bytes of data for the page.
+** This cksum is initialized to a 32-bit random value that appears in the
+** journal file right after the header.  The random initializer is important,
+** because garbage data that appears at the end of a journal is likely
+** data that was once in other files that have now been deleted.  If the
+** garbage data came from an obsolete journal file, the checksums might
+** be correct.  But by initializing the checksum to random value which
+** is different for every journal, we minimize that risk.
+*/
+static const unsigned char aJournalMagic[] = {
+  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
+};
+
+/*
+** The size of the of each page record in the journal is given by
+** the following macro.
+*/
+#define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
+
+/*
+** The journal header size for this pager. This is usually the same 
+** size as a single disk sector. See also setSectorSize().
+*/
+#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
+
+/*
+** The macro MEMDB is true if we are dealing with an in-memory database.
+** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
+** the value of MEMDB will be a constant and the compiler will optimize
+** out code that would never execute.
+*/
+#ifdef SQLITE_OMIT_MEMORYDB
+# define MEMDB 0
+#else
+# define MEMDB pPager->memDb
+#endif
+
+/*
+** The macro USEFETCH is true if we are allowed to use the xFetch and xUnfetch
+** interfaces to access the database using memory-mapped I/O.
+*/
+#if SQLITE_MAX_MMAP_SIZE>0
+# define USEFETCH(x) ((x)->bUseFetch)
+#else
+# define USEFETCH(x) 0
+#endif
+
+/*
+** The maximum legal page number is (2^31 - 1).
+*/
+#define PAGER_MAX_PGNO 2147483647
+
+/*
+** The argument to this macro is a file descriptor (type sqlite3_file*).
+** Return 0 if it is not open, or non-zero (but not 1) if it is.
+**
+** This is so that expressions can be written as:
+**
+**   if( isOpen(pPager->jfd) ){ ...
+**
+** instead of
+**
+**   if( pPager->jfd->pMethods ){ ...
+*/
+#define isOpen(pFd) ((pFd)->pMethods)
+
+/*
+** Return true if this pager uses a write-ahead log instead of the usual
+** rollback journal. Otherwise false.
+*/
+#ifndef SQLITE_OMIT_WAL
+static int pagerUseWal(Pager *pPager){
+  return (pPager->pWal!=0);
+}
+#else
+# define pagerUseWal(x) 0
+# define pagerRollbackWal(x) 0
+# define pagerWalFrames(v,w,x,y) 0
+# define pagerOpenWalIfPresent(z) SQLITE_OK
+# define pagerBeginReadTransaction(z) SQLITE_OK
+#endif
+
+#ifndef NDEBUG 
+/*
+** Usage:
+**
+**   assert( assert_pager_state(pPager) );
+**
+** This function runs many asserts to try to find inconsistencies in
+** the internal state of the Pager object.
+*/
+static int assert_pager_state(Pager *p){
+  Pager *pPager = p;
+
+  /* State must be valid. */
+  assert( p->eState==PAGER_OPEN
+       || p->eState==PAGER_READER
+       || p->eState==PAGER_WRITER_LOCKED
+       || p->eState==PAGER_WRITER_CACHEMOD
+       || p->eState==PAGER_WRITER_DBMOD
+       || p->eState==PAGER_WRITER_FINISHED
+       || p->eState==PAGER_ERROR
+  );
+
+  /* Regardless of the current state, a temp-file connection always behaves
+  ** as if it has an exclusive lock on the database file. It never updates
+  ** the change-counter field, so the changeCountDone flag is always set.
+  */
+  assert( p->tempFile==0 || p->eLock==EXCLUSIVE_LOCK );
+  assert( p->tempFile==0 || pPager->changeCountDone );
+
+  /* If the useJournal flag is clear, the journal-mode must be "OFF". 
+  ** And if the journal-mode is "OFF", the journal file must not be open.
+  */
+  assert( p->journalMode==PAGER_JOURNALMODE_OFF || p->useJournal );
+  assert( p->journalMode!=PAGER_JOURNALMODE_OFF || !isOpen(p->jfd) );
+
+  /* Check that MEMDB implies noSync. And an in-memory journal. Since 
+  ** this means an in-memory pager performs no IO at all, it cannot encounter 
+  ** either SQLITE_IOERR or SQLITE_FULL during rollback or while finalizing 
+  ** a journal file. (although the in-memory journal implementation may 
+  ** return SQLITE_IOERR_NOMEM while the journal file is being written). It 
+  ** is therefore not possible for an in-memory pager to enter the ERROR 
+  ** state.
+  */
+  if( MEMDB ){
+    assert( p->noSync );
+    assert( p->journalMode==PAGER_JOURNALMODE_OFF 
+         || p->journalMode==PAGER_JOURNALMODE_MEMORY 
+    );
+    assert( p->eState!=PAGER_ERROR && p->eState!=PAGER_OPEN );
+    assert( pagerUseWal(p)==0 );
+  }
+
+  /* If changeCountDone is set, a RESERVED lock or greater must be held
+  ** on the file.
+  */
+  assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK );
+  assert( p->eLock!=PENDING_LOCK );
+
+  switch( p->eState ){
+    case PAGER_OPEN:
+      assert( !MEMDB );
+      assert( pPager->errCode==SQLITE_OK );
+      assert( sqlite3PcacheRefCount(pPager->pPCache)==0 || pPager->tempFile );
+      break;
+
+    case PAGER_READER:
+      assert( pPager->errCode==SQLITE_OK );
+      assert( p->eLock!=UNKNOWN_LOCK );
+      assert( p->eLock>=SHARED_LOCK );
+      break;
+
+    case PAGER_WRITER_LOCKED:
+      assert( p->eLock!=UNKNOWN_LOCK );
+      assert( pPager->errCode==SQLITE_OK );
+      if( !pagerUseWal(pPager) ){
+        assert( p->eLock>=RESERVED_LOCK );
+      }
+      assert( pPager->dbSize==pPager->dbOrigSize );
+      assert( pPager->dbOrigSize==pPager->dbFileSize );
+      assert( pPager->dbOrigSize==pPager->dbHintSize );
+      assert( pPager->setMaster==0 );
+      break;
+
+    case PAGER_WRITER_CACHEMOD:
+      assert( p->eLock!=UNKNOWN_LOCK );
+      assert( pPager->errCode==SQLITE_OK );
+      if( !pagerUseWal(pPager) ){
+        /* It is possible that if journal_mode=wal here that neither the
+        ** journal file nor the WAL file are open. This happens during
+        ** a rollback transaction that switches from journal_mode=off
+        ** to journal_mode=wal.
+        */
+        assert( p->eLock>=RESERVED_LOCK );
+        assert( isOpen(p->jfd) 
+             || p->journalMode==PAGER_JOURNALMODE_OFF 
+             || p->journalMode==PAGER_JOURNALMODE_WAL 
+        );
+      }
+      assert( pPager->dbOrigSize==pPager->dbFileSize );
+      assert( pPager->dbOrigSize==pPager->dbHintSize );
+      break;
+
+    case PAGER_WRITER_DBMOD:
+      assert( p->eLock==EXCLUSIVE_LOCK );
+      assert( pPager->errCode==SQLITE_OK );
+      assert( !pagerUseWal(pPager) );
+      assert( p->eLock>=EXCLUSIVE_LOCK );
+      assert( isOpen(p->jfd) 
+           || p->journalMode==PAGER_JOURNALMODE_OFF 
+           || p->journalMode==PAGER_JOURNALMODE_WAL 
+      );
+      assert( pPager->dbOrigSize<=pPager->dbHintSize );
+      break;
+
+    case PAGER_WRITER_FINISHED:
+      assert( p->eLock==EXCLUSIVE_LOCK );
+      assert( pPager->errCode==SQLITE_OK );
+      assert( !pagerUseWal(pPager) );
+      assert( isOpen(p->jfd) 
+           || p->journalMode==PAGER_JOURNALMODE_OFF 
+           || p->journalMode==PAGER_JOURNALMODE_WAL 
+      );
+      break;
+
+    case PAGER_ERROR:
+      /* There must be at least one outstanding reference to the pager if
+      ** in ERROR state. Otherwise the pager should have already dropped
+      ** back to OPEN state.
+      */
+      assert( pPager->errCode!=SQLITE_OK );
+      assert( sqlite3PcacheRefCount(pPager->pPCache)>0 );
+      break;
+  }
+
+  return 1;
+}
+#endif /* ifndef NDEBUG */
+
+#ifdef SQLITE_DEBUG 
+/*
+** Return a pointer to a human readable string in a static buffer
+** containing the state of the Pager object passed as an argument. This
+** is intended to be used within debuggers. For example, as an alternative
+** to "print *pPager" in gdb:
+**
+** (gdb) printf "%s", print_pager_state(pPager)
+*/
+static char *print_pager_state(Pager *p){
+  static char zRet[1024];
+
+  sqlite3_snprintf(1024, zRet,
+      "Filename:      %s\n"
+      "State:         %s errCode=%d\n"
+      "Lock:          %s\n"
+      "Locking mode:  locking_mode=%s\n"
+      "Journal mode:  journal_mode=%s\n"
+      "Backing store: tempFile=%d memDb=%d useJournal=%d\n"
+      "Journal:       journalOff=%lld journalHdr=%lld\n"
+      "Size:          dbsize=%d dbOrigSize=%d dbFileSize=%d\n"
+      , p->zFilename
+      , p->eState==PAGER_OPEN            ? "OPEN" :
+        p->eState==PAGER_READER          ? "READER" :
+        p->eState==PAGER_WRITER_LOCKED   ? "WRITER_LOCKED" :
+        p->eState==PAGER_WRITER_CACHEMOD ? "WRITER_CACHEMOD" :
+        p->eState==PAGER_WRITER_DBMOD    ? "WRITER_DBMOD" :
+        p->eState==PAGER_WRITER_FINISHED ? "WRITER_FINISHED" :
+        p->eState==PAGER_ERROR           ? "ERROR" : "?error?"
+      , (int)p->errCode
+      , p->eLock==NO_LOCK         ? "NO_LOCK" :
+        p->eLock==RESERVED_LOCK   ? "RESERVED" :
+        p->eLock==EXCLUSIVE_LOCK  ? "EXCLUSIVE" :
+        p->eLock==SHARED_LOCK     ? "SHARED" :
+        p->eLock==UNKNOWN_LOCK    ? "UNKNOWN" : "?error?"
+      , p->exclusiveMode ? "exclusive" : "normal"
+      , p->journalMode==PAGER_JOURNALMODE_MEMORY   ? "memory" :
+        p->journalMode==PAGER_JOURNALMODE_OFF      ? "off" :
+        p->journalMode==PAGER_JOURNALMODE_DELETE   ? "delete" :
+        p->journalMode==PAGER_JOURNALMODE_PERSIST  ? "persist" :
+        p->journalMode==PAGER_JOURNALMODE_TRUNCATE ? "truncate" :
+        p->journalMode==PAGER_JOURNALMODE_WAL      ? "wal" : "?error?"
+      , (int)p->tempFile, (int)p->memDb, (int)p->useJournal
+      , p->journalOff, p->journalHdr
+      , (int)p->dbSize, (int)p->dbOrigSize, (int)p->dbFileSize
+  );
+
+  return zRet;
+}
+#endif
+
+/*
+** Return true if it is necessary to write page *pPg into the sub-journal.
+** A page needs to be written into the sub-journal if there exists one
+** or more open savepoints for which:
+**
+**   * The page-number is less than or equal to PagerSavepoint.nOrig, and
+**   * The bit corresponding to the page-number is not set in
+**     PagerSavepoint.pInSavepoint.
+*/
+static int subjRequiresPage(PgHdr *pPg){
+  Pager *pPager = pPg->pPager;
+  PagerSavepoint *p;
+  Pgno pgno = pPg->pgno;
+  int i;
+  for(i=0; i<pPager->nSavepoint; i++){
+    p = &pPager->aSavepoint[i];
+    if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** Return true if the page is already in the journal file.
+*/
+static int pageInJournal(Pager *pPager, PgHdr *pPg){
+  return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno);
+}
+
+/*
+** Read a 32-bit integer from the given file descriptor.  Store the integer
+** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
+** error code is something goes wrong.
+**
+** All values are stored on disk as big-endian.
+*/
+static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
+  unsigned char ac[4];
+  int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
+  if( rc==SQLITE_OK ){
+    *pRes = sqlite3Get4byte(ac);
+  }
+  return rc;
+}
+
+/*
+** Write a 32-bit integer into a string buffer in big-endian byte order.
+*/
+#define put32bits(A,B)  sqlite3Put4byte((u8*)A,B)
+
+
+/*
+** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
+** on success or an error code is something goes wrong.
+*/
+static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
+  char ac[4];
+  put32bits(ac, val);
+  return sqlite3OsWrite(fd, ac, 4, offset);
+}
+
+/*
+** Unlock the database file to level eLock, which must be either NO_LOCK
+** or SHARED_LOCK. Regardless of whether or not the call to xUnlock()
+** succeeds, set the Pager.eLock variable to match the (attempted) new lock.
+**
+** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is
+** called, do not modify it. See the comment above the #define of 
+** UNKNOWN_LOCK for an explanation of this.
+*/
+static int pagerUnlockDb(Pager *pPager, int eLock){
+  int rc = SQLITE_OK;
+
+  assert( !pPager->exclusiveMode || pPager->eLock==eLock );
+  assert( eLock==NO_LOCK || eLock==SHARED_LOCK );
+  assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 );
+  if( isOpen(pPager->fd) ){
+    assert( pPager->eLock>=eLock );
+    rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock);
+    if( pPager->eLock!=UNKNOWN_LOCK ){
+      pPager->eLock = (u8)eLock;
+    }
+    IOTRACE(("UNLOCK %p %d\n", pPager, eLock))
+  }
+  return rc;
+}
+
+/*
+** Lock the database file to level eLock, which must be either SHARED_LOCK,
+** RESERVED_LOCK or EXCLUSIVE_LOCK. If the caller is successful, set the
+** Pager.eLock variable to the new locking state. 
+**
+** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is 
+** called, do not modify it unless the new locking state is EXCLUSIVE_LOCK. 
+** See the comment above the #define of UNKNOWN_LOCK for an explanation 
+** of this.
+*/
+static int pagerLockDb(Pager *pPager, int eLock){
+  int rc = SQLITE_OK;
+
+  assert( eLock==SHARED_LOCK || eLock==RESERVED_LOCK || eLock==EXCLUSIVE_LOCK );
+  if( pPager->eLock<eLock || pPager->eLock==UNKNOWN_LOCK ){
+    rc = pPager->noLock ? SQLITE_OK : sqlite3OsLock(pPager->fd, eLock);
+    if( rc==SQLITE_OK && (pPager->eLock!=UNKNOWN_LOCK||eLock==EXCLUSIVE_LOCK) ){
+      pPager->eLock = (u8)eLock;
+      IOTRACE(("LOCK %p %d\n", pPager, eLock))
+    }
+  }
+  return rc;
+}
+
+/*
+** This function determines whether or not the atomic-write optimization
+** can be used with this pager. The optimization can be used if:
+**
+**  (a) the value returned by OsDeviceCharacteristics() indicates that
+**      a database page may be written atomically, and
+**  (b) the value returned by OsSectorSize() is less than or equal
+**      to the page size.
+**
+** The optimization is also always enabled for temporary files. It is
+** an error to call this function if pPager is opened on an in-memory
+** database.
+**
+** If the optimization cannot be used, 0 is returned. If it can be used,
+** then the value returned is the size of the journal file when it
+** contains rollback data for exactly one page.
+*/
+#ifdef SQLITE_ENABLE_ATOMIC_WRITE
+static int jrnlBufferSize(Pager *pPager){
+  assert( !MEMDB );
+  if( !pPager->tempFile ){
+    int dc;                           /* Device characteristics */
+    int nSector;                      /* Sector size */
+    int szPage;                       /* Page size */
+
+    assert( isOpen(pPager->fd) );
+    dc = sqlite3OsDeviceCharacteristics(pPager->fd);
+    nSector = pPager->sectorSize;
+    szPage = pPager->pageSize;
+
+    assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
+    assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
+    if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){
+      return 0;
+    }
+  }
+
+  return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
+}
+#endif
+
+/*
+** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
+** on the cache using a hash function.  This is used for testing
+** and debugging only.
+*/
+#ifdef SQLITE_CHECK_PAGES
+/*
+** Return a 32-bit hash of the page data for pPage.
+*/
+static u32 pager_datahash(int nByte, unsigned char *pData){
+  u32 hash = 0;
+  int i;
+  for(i=0; i<nByte; i++){
+    hash = (hash*1039) + pData[i];
+  }
+  return hash;
+}
+static u32 pager_pagehash(PgHdr *pPage){
+  return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
+}
+static void pager_set_pagehash(PgHdr *pPage){
+  pPage->pageHash = pager_pagehash(pPage);
+}
+
+/*
+** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
+** is defined, and NDEBUG is not defined, an assert() statement checks
+** that the page is either dirty or still matches the calculated page-hash.
+*/
+#define CHECK_PAGE(x) checkPage(x)
+static void checkPage(PgHdr *pPg){
+  Pager *pPager = pPg->pPager;
+  assert( pPager->eState!=PAGER_ERROR );
+  assert( (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
+}
+
+#else
+#define pager_datahash(X,Y)  0
+#define pager_pagehash(X)  0
+#define pager_set_pagehash(X)
+#define CHECK_PAGE(x)
+#endif  /* SQLITE_CHECK_PAGES */
+
+/*
+** When this is called the journal file for pager pPager must be open.
+** This function attempts to read a master journal file name from the 
+** end of the file and, if successful, copies it into memory supplied 
+** by the caller. See comments above writeMasterJournal() for the format
+** used to store a master journal file name at the end of a journal file.
+**
+** zMaster must point to a buffer of at least nMaster bytes allocated by
+** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
+** enough space to write the master journal name). If the master journal
+** name in the journal is longer than nMaster bytes (including a
+** nul-terminator), then this is handled as if no master journal name
+** were present in the journal.
+**
+** If a master journal file name is present at the end of the journal
+** file, then it is copied into the buffer pointed to by zMaster. A
+** nul-terminator byte is appended to the buffer following the master
+** journal file name.
+**
+** If it is determined that no master journal file name is present 
+** zMaster[0] is set to 0 and SQLITE_OK returned.
+**
+** If an error occurs while reading from the journal file, an SQLite
+** error code is returned.
+*/
+static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, u32 nMaster){
+  int rc;                    /* Return code */
+  u32 len;                   /* Length in bytes of master journal name */
+  i64 szJ;                   /* Total size in bytes of journal file pJrnl */
+  u32 cksum;                 /* MJ checksum value read from journal */
+  u32 u;                     /* Unsigned loop counter */
+  unsigned char aMagic[8];   /* A buffer to hold the magic header */
+  zMaster[0] = '\0';
+
+  if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ))
+   || szJ<16
+   || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len))
+   || len>=nMaster 
+   || len==0 
+   || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum))
+   || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8))
+   || memcmp(aMagic, aJournalMagic, 8)
+   || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len))
+  ){
+    return rc;
+  }
+
+  /* See if the checksum matches the master journal name */
+  for(u=0; u<len; u++){
+    cksum -= zMaster[u];
+  }
+  if( cksum ){
+    /* If the checksum doesn't add up, then one or more of the disk sectors
+    ** containing the master journal filename is corrupted. This means
+    ** definitely roll back, so just return SQLITE_OK and report a (nul)
+    ** master-journal filename.
+    */
+    len = 0;
+  }
+  zMaster[len] = '\0';
+   
+  return SQLITE_OK;
+}
+
+/*
+** Return the offset of the sector boundary at or immediately 
+** following the value in pPager->journalOff, assuming a sector 
+** size of pPager->sectorSize bytes.
+**
+** i.e for a sector size of 512:
+**
+**   Pager.journalOff          Return value
+**   ---------------------------------------
+**   0                         0
+**   512                       512
+**   100                       512
+**   2000                      2048
+** 
+*/
+static i64 journalHdrOffset(Pager *pPager){
+  i64 offset = 0;
+  i64 c = pPager->journalOff;
+  if( c ){
+    offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
+  }
+  assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
+  assert( offset>=c );
+  assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
+  return offset;
+}
+
+/*
+** The journal file must be open when this function is called.
+**
+** This function is a no-op if the journal file has not been written to
+** within the current transaction (i.e. if Pager.journalOff==0).
+**
+** If doTruncate is non-zero or the Pager.journalSizeLimit variable is
+** set to 0, then truncate the journal file to zero bytes in size. Otherwise,
+** zero the 28-byte header at the start of the journal file. In either case, 
+** if the pager is not in no-sync mode, sync the journal file immediately 
+** after writing or truncating it.
+**
+** If Pager.journalSizeLimit is set to a positive, non-zero value, and
+** following the truncation or zeroing described above the size of the 
+** journal file in bytes is larger than this value, then truncate the
+** journal file to Pager.journalSizeLimit bytes. The journal file does
+** not need to be synced following this operation.
+**
+** If an IO error occurs, abandon processing and return the IO error code.
+** Otherwise, return SQLITE_OK.
+*/
+static int zeroJournalHdr(Pager *pPager, int doTruncate){
+  int rc = SQLITE_OK;                               /* Return code */
+  assert( isOpen(pPager->jfd) );
+  if( pPager->journalOff ){
+    const i64 iLimit = pPager->journalSizeLimit;    /* Local cache of jsl */
+
+    IOTRACE(("JZEROHDR %p\n", pPager))
+    if( doTruncate || iLimit==0 ){
+      rc = sqlite3OsTruncate(pPager->jfd, 0);
+    }else{
+      static const char zeroHdr[28] = {0};
+      rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
+    }
+    if( rc==SQLITE_OK && !pPager->noSync ){
+      rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->syncFlags);
+    }
+
+    /* At this point the transaction is committed but the write lock 
+    ** is still held on the file. If there is a size limit configured for 
+    ** the persistent journal and the journal file currently consumes more
+    ** space than that limit allows for, truncate it now. There is no need
+    ** to sync the file following this operation.
+    */
+    if( rc==SQLITE_OK && iLimit>0 ){
+      i64 sz;
+      rc = sqlite3OsFileSize(pPager->jfd, &sz);
+      if( rc==SQLITE_OK && sz>iLimit ){
+        rc = sqlite3OsTruncate(pPager->jfd, iLimit);
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** The journal file must be open when this routine is called. A journal
+** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
+** current location.
+**
+** The format for the journal header is as follows:
+** - 8 bytes: Magic identifying journal format.
+** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
+** - 4 bytes: Random number used for page hash.
+** - 4 bytes: Initial database page count.
+** - 4 bytes: Sector size used by the process that wrote this journal.
+** - 4 bytes: Database page size.
+** 
+** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
+*/
+static int writeJournalHdr(Pager *pPager){
+  int rc = SQLITE_OK;                 /* Return code */
+  char *zHeader = pPager->pTmpSpace;  /* Temporary space used to build header */
+  u32 nHeader = (u32)pPager->pageSize;/* Size of buffer pointed to by zHeader */
+  u32 nWrite;                         /* Bytes of header sector written */
+  int ii;                             /* Loop counter */
+
+  assert( isOpen(pPager->jfd) );      /* Journal file must be open. */
+
+  if( nHeader>JOURNAL_HDR_SZ(pPager) ){
+    nHeader = JOURNAL_HDR_SZ(pPager);
+  }
+
+  /* If there are active savepoints and any of them were created 
+  ** since the most recent journal header was written, update the 
+  ** PagerSavepoint.iHdrOffset fields now.
+  */
+  for(ii=0; ii<pPager->nSavepoint; ii++){
+    if( pPager->aSavepoint[ii].iHdrOffset==0 ){
+      pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff;
+    }
+  }
+
+  pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager);
+
+  /* 
+  ** Write the nRec Field - the number of page records that follow this
+  ** journal header. Normally, zero is written to this value at this time.
+  ** After the records are added to the journal (and the journal synced, 
+  ** if in full-sync mode), the zero is overwritten with the true number
+  ** of records (see syncJournal()).
+  **
+  ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
+  ** reading the journal this value tells SQLite to assume that the
+  ** rest of the journal file contains valid page records. This assumption
+  ** is dangerous, as if a failure occurred whilst writing to the journal
+  ** file it may contain some garbage data. There are two scenarios
+  ** where this risk can be ignored:
+  **
+  **   * When the pager is in no-sync mode. Corruption can follow a
+  **     power failure in this case anyway.
+  **
+  **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
+  **     that garbage data is never appended to the journal file.
+  */
+  assert( isOpen(pPager->fd) || pPager->noSync );
+  if( pPager->noSync || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY)
+   || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
+  ){
+    memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
+    put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
+  }else{
+    memset(zHeader, 0, sizeof(aJournalMagic)+4);
+  }
+
+  /* The random check-hash initializer */ 
+  sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
+  put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
+  /* The initial database size */
+  put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize);
+  /* The assumed sector size for this process */
+  put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
+
+  /* The page size */
+  put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
+
+  /* Initializing the tail of the buffer is not necessary.  Everything
+  ** works find if the following memset() is omitted.  But initializing
+  ** the memory prevents valgrind from complaining, so we are willing to
+  ** take the performance hit.
+  */
+  memset(&zHeader[sizeof(aJournalMagic)+20], 0,
+         nHeader-(sizeof(aJournalMagic)+20));
+
+  /* In theory, it is only necessary to write the 28 bytes that the 
+  ** journal header consumes to the journal file here. Then increment the 
+  ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next 
+  ** record is written to the following sector (leaving a gap in the file
+  ** that will be implicitly filled in by the OS).
+  **
+  ** However it has been discovered that on some systems this pattern can 
+  ** be significantly slower than contiguously writing data to the file,
+  ** even if that means explicitly writing data to the block of 
+  ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what
+  ** is done. 
+  **
+  ** The loop is required here in case the sector-size is larger than the 
+  ** database page size. Since the zHeader buffer is only Pager.pageSize
+  ** bytes in size, more than one call to sqlite3OsWrite() may be required
+  ** to populate the entire journal header sector.
+  */ 
+  for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
+    IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
+    rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
+    assert( pPager->journalHdr <= pPager->journalOff );
+    pPager->journalOff += nHeader;
+  }
+
+  return rc;
+}
+
+/*
+** The journal file must be open when this is called. A journal header file
+** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
+** file. The current location in the journal file is given by
+** pPager->journalOff. See comments above function writeJournalHdr() for
+** a description of the journal header format.
+**
+** If the header is read successfully, *pNRec is set to the number of
+** page records following this header and *pDbSize is set to the size of the
+** database before the transaction began, in pages. Also, pPager->cksumInit
+** is set to the value read from the journal header. SQLITE_OK is returned
+** in this case.
+**
+** If the journal header file appears to be corrupted, SQLITE_DONE is
+** returned and *pNRec and *PDbSize are undefined.  If JOURNAL_HDR_SZ bytes
+** cannot be read from the journal file an error code is returned.
+*/
+static int readJournalHdr(
+  Pager *pPager,               /* Pager object */
+  int isHot,
+  i64 journalSize,             /* Size of the open journal file in bytes */
+  u32 *pNRec,                  /* OUT: Value read from the nRec field */
+  u32 *pDbSize                 /* OUT: Value of original database size field */
+){
+  int rc;                      /* Return code */
+  unsigned char aMagic[8];     /* A buffer to hold the magic header */
+  i64 iHdrOff;                 /* Offset of journal header being read */
+
+  assert( isOpen(pPager->jfd) );      /* Journal file must be open. */
+
+  /* Advance Pager.journalOff to the start of the next sector. If the
+  ** journal file is too small for there to be a header stored at this
+  ** point, return SQLITE_DONE.
+  */
+  pPager->journalOff = journalHdrOffset(pPager);
+  if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
+    return SQLITE_DONE;
+  }
+  iHdrOff = pPager->journalOff;
+
+  /* Read in the first 8 bytes of the journal header. If they do not match
+  ** the  magic string found at the start of each journal header, return
+  ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise,
+  ** proceed.
+  */
+  if( isHot || iHdrOff!=pPager->journalHdr ){
+    rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff);
+    if( rc ){
+      return rc;
+    }
+    if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
+      return SQLITE_DONE;
+    }
+  }
+
+  /* Read the first three 32-bit fields of the journal header: The nRec
+  ** field, the checksum-initializer and the database size at the start
+  ** of the transaction. Return an error code if anything goes wrong.
+  */
+  if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec))
+   || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit))
+   || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize))
+  ){
+    return rc;
+  }
+
+  if( pPager->journalOff==0 ){
+    u32 iPageSize;               /* Page-size field of journal header */
+    u32 iSectorSize;             /* Sector-size field of journal header */
+
+    /* Read the page-size and sector-size journal header fields. */
+    if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize))
+     || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize))
+    ){
+      return rc;
+    }
+
+    /* Versions of SQLite prior to 3.5.8 set the page-size field of the
+    ** journal header to zero. In this case, assume that the Pager.pageSize
+    ** variable is already set to the correct page size.
+    */
+    if( iPageSize==0 ){
+      iPageSize = pPager->pageSize;
+    }
+
+    /* Check that the values read from the page-size and sector-size fields
+    ** are within range. To be 'in range', both values need to be a power
+    ** of two greater than or equal to 512 or 32, and not greater than their 
+    ** respective compile time maximum limits.
+    */
+    if( iPageSize<512                  || iSectorSize<32
+     || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE
+     || ((iPageSize-1)&iPageSize)!=0   || ((iSectorSize-1)&iSectorSize)!=0 
+    ){
+      /* If the either the page-size or sector-size in the journal-header is 
+      ** invalid, then the process that wrote the journal-header must have 
+      ** crashed before the header was synced. In this case stop reading 
+      ** the journal file here.
+      */
+      return SQLITE_DONE;
+    }
+
+    /* Update the page-size to match the value read from the journal. 
+    ** Use a testcase() macro to make sure that malloc failure within 
+    ** PagerSetPagesize() is tested.
+    */
+    rc = sqlite3PagerSetPagesize(pPager, &iPageSize, -1);
+    testcase( rc!=SQLITE_OK );
+
+    /* Update the assumed sector-size to match the value used by 
+    ** the process that created this journal. If this journal was
+    ** created by a process other than this one, then this routine
+    ** is being called from within pager_playback(). The local value
+    ** of Pager.sectorSize is restored at the end of that routine.
+    */
+    pPager->sectorSize = iSectorSize;
+  }
+
+  pPager->journalOff += JOURNAL_HDR_SZ(pPager);
+  return rc;
+}
+
+
+/*
+** Write the supplied master journal name into the journal file for pager
+** pPager at the current location. The master journal name must be the last
+** thing written to a journal file. If the pager is in full-sync mode, the
+** journal file descriptor is advanced to the next sector boundary before
+** anything is written. The format is:
+**
+**   + 4 bytes: PAGER_MJ_PGNO.
+**   + N bytes: Master journal filename in utf-8.
+**   + 4 bytes: N (length of master journal name in bytes, no nul-terminator).
+**   + 4 bytes: Master journal name checksum.
+**   + 8 bytes: aJournalMagic[].
+**
+** The master journal page checksum is the sum of the bytes in the master
+** journal name, where each byte is interpreted as a signed 8-bit integer.
+**
+** If zMaster is a NULL pointer (occurs for a single database transaction), 
+** this call is a no-op.
+*/
+static int writeMasterJournal(Pager *pPager, const char *zMaster){
+  int rc;                          /* Return code */
+  int nMaster;                     /* Length of string zMaster */
+  i64 iHdrOff;                     /* Offset of header in journal file */
+  i64 jrnlSize;                    /* Size of journal file on disk */
+  u32 cksum = 0;                   /* Checksum of string zMaster */
+
+  assert( pPager->setMaster==0 );
+  assert( !pagerUseWal(pPager) );
+
+  if( !zMaster 
+   || pPager->journalMode==PAGER_JOURNALMODE_MEMORY 
+   || !isOpen(pPager->jfd)
+  ){
+    return SQLITE_OK;
+  }
+  pPager->setMaster = 1;
+  assert( pPager->journalHdr <= pPager->journalOff );
+
+  /* Calculate the length in bytes and the checksum of zMaster */
+  for(nMaster=0; zMaster[nMaster]; nMaster++){
+    cksum += zMaster[nMaster];
+  }
+
+  /* If in full-sync mode, advance to the next disk sector before writing
+  ** the master journal name. This is in case the previous page written to
+  ** the journal has already been synced.
+  */
+  if( pPager->fullSync ){
+    pPager->journalOff = journalHdrOffset(pPager);
+  }
+  iHdrOff = pPager->journalOff;
+
+  /* Write the master journal data to the end of the journal file. If
+  ** an error occurs, return the error code to the caller.
+  */
+  if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager))))
+   || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4)))
+   || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster)))
+   || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum)))
+   || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8)))
+  ){
+    return rc;
+  }
+  pPager->journalOff += (nMaster+20);
+
+  /* If the pager is in peristent-journal mode, then the physical 
+  ** journal-file may extend past the end of the master-journal name
+  ** and 8 bytes of magic data just written to the file. This is 
+  ** dangerous because the code to rollback a hot-journal file
+  ** will not be able to find the master-journal name to determine 
+  ** whether or not the journal is hot. 
+  **
+  ** Easiest thing to do in this scenario is to truncate the journal 
+  ** file to the required size.
+  */ 
+  if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))
+   && jrnlSize>pPager->journalOff
+  ){
+    rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff);
+  }
+  return rc;
+}
+
+/*
+** Discard the entire contents of the in-memory page-cache.
+*/
+static void pager_reset(Pager *pPager){
+  pPager->iDataVersion++;
+  sqlite3BackupRestart(pPager->pBackup);
+  sqlite3PcacheClear(pPager->pPCache);
+}
+
+/*
+** Return the pPager->iDataVersion value
+*/
+SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager *pPager){
+  assert( pPager->eState>PAGER_OPEN );
+  return pPager->iDataVersion;
+}
+
+/*
+** Free all structures in the Pager.aSavepoint[] array and set both
+** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal
+** if it is open and the pager is not in exclusive mode.
+*/
+static void releaseAllSavepoints(Pager *pPager){
+  int ii;               /* Iterator for looping through Pager.aSavepoint */
+  for(ii=0; ii<pPager->nSavepoint; ii++){
+    sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
+  }
+  if( !pPager->exclusiveMode || sqlite3IsMemJournal(pPager->sjfd) ){
+    sqlite3OsClose(pPager->sjfd);
+  }
+  sqlite3_free(pPager->aSavepoint);
+  pPager->aSavepoint = 0;
+  pPager->nSavepoint = 0;
+  pPager->nSubRec = 0;
+}
+
+/*
+** Set the bit number pgno in the PagerSavepoint.pInSavepoint 
+** bitvecs of all open savepoints. Return SQLITE_OK if successful
+** or SQLITE_NOMEM if a malloc failure occurs.
+*/
+static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){
+  int ii;                   /* Loop counter */
+  int rc = SQLITE_OK;       /* Result code */
+
+  for(ii=0; ii<pPager->nSavepoint; ii++){
+    PagerSavepoint *p = &pPager->aSavepoint[ii];
+    if( pgno<=p->nOrig ){
+      rc |= sqlite3BitvecSet(p->pInSavepoint, pgno);
+      testcase( rc==SQLITE_NOMEM );
+      assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
+    }
+  }
+  return rc;
+}
+
+/*
+** This function is a no-op if the pager is in exclusive mode and not
+** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN
+** state.
+**
+** If the pager is not in exclusive-access mode, the database file is
+** completely unlocked. If the file is unlocked and the file-system does
+** not exhibit the UNDELETABLE_WHEN_OPEN property, the journal file is
+** closed (if it is open).
+**
+** If the pager is in ERROR state when this function is called, the 
+** contents of the pager cache are discarded before switching back to 
+** the OPEN state. Regardless of whether the pager is in exclusive-mode
+** or not, any journal file left in the file-system will be treated
+** as a hot-journal and rolled back the next time a read-transaction
+** is opened (by this or by any other connection).
+*/
+static void pager_unlock(Pager *pPager){
+
+  assert( pPager->eState==PAGER_READER 
+       || pPager->eState==PAGER_OPEN 
+       || pPager->eState==PAGER_ERROR 
+  );
+
+  sqlite3BitvecDestroy(pPager->pInJournal);
+  pPager->pInJournal = 0;
+  releaseAllSavepoints(pPager);
+
+  if( pagerUseWal(pPager) ){
+    assert( !isOpen(pPager->jfd) );
+    sqlite3WalEndReadTransaction(pPager->pWal);
+    pPager->eState = PAGER_OPEN;
+  }else if( !pPager->exclusiveMode ){
+    int rc;                       /* Error code returned by pagerUnlockDb() */
+    int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0;
+
+    /* If the operating system support deletion of open files, then
+    ** close the journal file when dropping the database lock.  Otherwise
+    ** another connection with journal_mode=delete might delete the file
+    ** out from under us.
+    */
+    assert( (PAGER_JOURNALMODE_MEMORY   & 5)!=1 );
+    assert( (PAGER_JOURNALMODE_OFF      & 5)!=1 );
+    assert( (PAGER_JOURNALMODE_WAL      & 5)!=1 );
+    assert( (PAGER_JOURNALMODE_DELETE   & 5)!=1 );
+    assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
+    assert( (PAGER_JOURNALMODE_PERSIST  & 5)==1 );
+    if( 0==(iDc & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN)
+     || 1!=(pPager->journalMode & 5)
+    ){
+      sqlite3OsClose(pPager->jfd);
+    }
+
+    /* If the pager is in the ERROR state and the call to unlock the database
+    ** file fails, set the current lock to UNKNOWN_LOCK. See the comment
+    ** above the #define for UNKNOWN_LOCK for an explanation of why this
+    ** is necessary.
+    */
+    rc = pagerUnlockDb(pPager, NO_LOCK);
+    if( rc!=SQLITE_OK && pPager->eState==PAGER_ERROR ){
+      pPager->eLock = UNKNOWN_LOCK;
+    }
+
+    /* The pager state may be changed from PAGER_ERROR to PAGER_OPEN here
+    ** without clearing the error code. This is intentional - the error
+    ** code is cleared and the cache reset in the block below.
+    */
+    assert( pPager->errCode || pPager->eState!=PAGER_ERROR );
+    pPager->changeCountDone = 0;
+    pPager->eState = PAGER_OPEN;
+  }
+
+  /* If Pager.errCode is set, the contents of the pager cache cannot be
+  ** trusted. Now that there are no outstanding references to the pager,
+  ** it can safely move back to PAGER_OPEN state. This happens in both
+  ** normal and exclusive-locking mode.
+  */
+  if( pPager->errCode ){
+    assert( !MEMDB );
+    pager_reset(pPager);
+    pPager->changeCountDone = pPager->tempFile;
+    pPager->eState = PAGER_OPEN;
+    pPager->errCode = SQLITE_OK;
+    if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0);
+  }
+
+  pPager->journalOff = 0;
+  pPager->journalHdr = 0;
+  pPager->setMaster = 0;
+}
+
+/*
+** This function is called whenever an IOERR or FULL error that requires
+** the pager to transition into the ERROR state may ahve occurred.
+** The first argument is a pointer to the pager structure, the second 
+** the error-code about to be returned by a pager API function. The 
+** value returned is a copy of the second argument to this function. 
+**
+** If the second argument is SQLITE_FULL, SQLITE_IOERR or one of the
+** IOERR sub-codes, the pager enters the ERROR state and the error code
+** is stored in Pager.errCode. While the pager remains in the ERROR state,
+** all major API calls on the Pager will immediately return Pager.errCode.
+**
+** The ERROR state indicates that the contents of the pager-cache 
+** cannot be trusted. This state can be cleared by completely discarding 
+** the contents of the pager-cache. If a transaction was active when
+** the persistent error occurred, then the rollback journal may need
+** to be replayed to restore the contents of the database file (as if
+** it were a hot-journal).
+*/
+static int pager_error(Pager *pPager, int rc){
+  int rc2 = rc & 0xff;
+  assert( rc==SQLITE_OK || !MEMDB );
+  assert(
+       pPager->errCode==SQLITE_FULL ||
+       pPager->errCode==SQLITE_OK ||
+       (pPager->errCode & 0xff)==SQLITE_IOERR
+  );
+  if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){
+    pPager->errCode = rc;
+    pPager->eState = PAGER_ERROR;
+  }
+  return rc;
+}
+
+static int pager_truncate(Pager *pPager, Pgno nPage);
+
+/*
+** This routine ends a transaction. A transaction is usually ended by 
+** either a COMMIT or a ROLLBACK operation. This routine may be called 
+** after rollback of a hot-journal, or if an error occurs while opening
+** the journal file or writing the very first journal-header of a
+** database transaction.
+** 
+** This routine is never called in PAGER_ERROR state. If it is called
+** in PAGER_NONE or PAGER_SHARED state and the lock held is less
+** exclusive than a RESERVED lock, it is a no-op.
+**
+** Otherwise, any active savepoints are released.
+**
+** If the journal file is open, then it is "finalized". Once a journal 
+** file has been finalized it is not possible to use it to roll back a 
+** transaction. Nor will it be considered to be a hot-journal by this
+** or any other database connection. Exactly how a journal is finalized
+** depends on whether or not the pager is running in exclusive mode and
+** the current journal-mode (Pager.journalMode value), as follows:
+**
+**   journalMode==MEMORY
+**     Journal file descriptor is simply closed. This destroys an 
+**     in-memory journal.
+**
+**   journalMode==TRUNCATE
+**     Journal file is truncated to zero bytes in size.
+**
+**   journalMode==PERSIST
+**     The first 28 bytes of the journal file are zeroed. This invalidates
+**     the first journal header in the file, and hence the entire journal
+**     file. An invalid journal file cannot be rolled back.
+**
+**   journalMode==DELETE
+**     The journal file is closed and deleted using sqlite3OsDelete().
+**
+**     If the pager is running in exclusive mode, this method of finalizing
+**     the journal file is never used. Instead, if the journalMode is
+**     DELETE and the pager is in exclusive mode, the method described under
+**     journalMode==PERSIST is used instead.
+**
+** After the journal is finalized, the pager moves to PAGER_READER state.
+** If running in non-exclusive rollback mode, the lock on the file is 
+** downgraded to a SHARED_LOCK.
+**
+** SQLITE_OK is returned if no error occurs. If an error occurs during
+** any of the IO operations to finalize the journal file or unlock the
+** database then the IO error code is returned to the user. If the 
+** operation to finalize the journal file fails, then the code still
+** tries to unlock the database file if not in exclusive mode. If the
+** unlock operation fails as well, then the first error code related
+** to the first error encountered (the journal finalization one) is
+** returned.
+*/
+static int pager_end_transaction(Pager *pPager, int hasMaster, int bCommit){
+  int rc = SQLITE_OK;      /* Error code from journal finalization operation */
+  int rc2 = SQLITE_OK;     /* Error code from db file unlock operation */
+
+  /* Do nothing if the pager does not have an open write transaction
+  ** or at least a RESERVED lock. This function may be called when there
+  ** is no write-transaction active but a RESERVED or greater lock is
+  ** held under two circumstances:
+  **
+  **   1. After a successful hot-journal rollback, it is called with
+  **      eState==PAGER_NONE and eLock==EXCLUSIVE_LOCK.
+  **
+  **   2. If a connection with locking_mode=exclusive holding an EXCLUSIVE 
+  **      lock switches back to locking_mode=normal and then executes a
+  **      read-transaction, this function is called with eState==PAGER_READER 
+  **      and eLock==EXCLUSIVE_LOCK when the read-transaction is closed.
+  */
+  assert( assert_pager_state(pPager) );
+  assert( pPager->eState!=PAGER_ERROR );
+  if( pPager->eState<PAGER_WRITER_LOCKED && pPager->eLock<RESERVED_LOCK ){
+    return SQLITE_OK;
+  }
+
+  releaseAllSavepoints(pPager);
+  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );
+  if( isOpen(pPager->jfd) ){
+    assert( !pagerUseWal(pPager) );
+
+    /* Finalize the journal file. */
+    if( sqlite3IsMemJournal(pPager->jfd) ){
+      assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY );
+      sqlite3OsClose(pPager->jfd);
+    }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){
+      if( pPager->journalOff==0 ){
+        rc = SQLITE_OK;
+      }else{
+        rc = sqlite3OsTruncate(pPager->jfd, 0);
+        if( rc==SQLITE_OK && pPager->fullSync ){
+          /* Make sure the new file size is written into the inode right away.
+          ** Otherwise the journal might resurrect following a power loss and
+          ** cause the last transaction to roll back.  See
+          ** https://bugzilla.mozilla.org/show_bug.cgi?id=1072773
+          */
+          rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags);
+        }
+      }
+      pPager->journalOff = 0;
+    }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST
+      || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL)
+    ){
+      rc = zeroJournalHdr(pPager, hasMaster);
+      pPager->journalOff = 0;
+    }else{
+      /* This branch may be executed with Pager.journalMode==MEMORY if
+      ** a hot-journal was just rolled back. In this case the journal
+      ** file should be closed and deleted. If this connection writes to
+      ** the database file, it will do so using an in-memory journal. 
+      */
+      int bDelete = (!pPager->tempFile && sqlite3JournalExists(pPager->jfd));
+      assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE 
+           || pPager->journalMode==PAGER_JOURNALMODE_MEMORY 
+           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
+      );
+      sqlite3OsClose(pPager->jfd);
+      if( bDelete ){
+        rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
+      }
+    }
+  }
+
+#ifdef SQLITE_CHECK_PAGES
+  sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash);
+  if( pPager->dbSize==0 && sqlite3PcacheRefCount(pPager->pPCache)>0 ){
+    PgHdr *p = sqlite3PagerLookup(pPager, 1);
+    if( p ){
+      p->pageHash = 0;
+      sqlite3PagerUnrefNotNull(p);
+    }
+  }
+#endif
+
+  sqlite3BitvecDestroy(pPager->pInJournal);
+  pPager->pInJournal = 0;
+  pPager->nRec = 0;
+  sqlite3PcacheCleanAll(pPager->pPCache);
+  sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
+
+  if( pagerUseWal(pPager) ){
+    /* Drop the WAL write-lock, if any. Also, if the connection was in 
+    ** locking_mode=exclusive mode but is no longer, drop the EXCLUSIVE 
+    ** lock held on the database file.
+    */
+    rc2 = sqlite3WalEndWriteTransaction(pPager->pWal);
+    assert( rc2==SQLITE_OK );
+  }else if( rc==SQLITE_OK && bCommit && pPager->dbFileSize>pPager->dbSize ){
+    /* This branch is taken when committing a transaction in rollback-journal
+    ** mode if the database file on disk is larger than the database image.
+    ** At this point the journal has been finalized and the transaction 
+    ** successfully committed, but the EXCLUSIVE lock is still held on the
+    ** file. So it is safe to truncate the database file to its minimum
+    ** required size.  */
+    assert( pPager->eLock==EXCLUSIVE_LOCK );
+    rc = pager_truncate(pPager, pPager->dbSize);
+  }
+
+  if( rc==SQLITE_OK && bCommit && isOpen(pPager->fd) ){
+    rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_COMMIT_PHASETWO, 0);
+    if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
+  }
+
+  if( !pPager->exclusiveMode 
+   && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0))
+  ){
+    rc2 = pagerUnlockDb(pPager, SHARED_LOCK);
+    pPager->changeCountDone = 0;
+  }
+  pPager->eState = PAGER_READER;
+  pPager->setMaster = 0;
+
+  return (rc==SQLITE_OK?rc2:rc);
+}
+
+/*
+** Execute a rollback if a transaction is active and unlock the 
+** database file. 
+**
+** If the pager has already entered the ERROR state, do not attempt 
+** the rollback at this time. Instead, pager_unlock() is called. The
+** call to pager_unlock() will discard all in-memory pages, unlock
+** the database file and move the pager back to OPEN state. If this 
+** means that there is a hot-journal left in the file-system, the next 
+** connection to obtain a shared lock on the pager (which may be this one) 
+** will roll it back.
+**
+** If the pager has not already entered the ERROR state, but an IO or
+** malloc error occurs during a rollback, then this will itself cause 
+** the pager to enter the ERROR state. Which will be cleared by the
+** call to pager_unlock(), as described above.
+*/
+static void pagerUnlockAndRollback(Pager *pPager){
+  if( pPager->eState!=PAGER_ERROR && pPager->eState!=PAGER_OPEN ){
+    assert( assert_pager_state(pPager) );
+    if( pPager->eState>=PAGER_WRITER_LOCKED ){
+      sqlite3BeginBenignMalloc();
+      sqlite3PagerRollback(pPager);
+      sqlite3EndBenignMalloc();
+    }else if( !pPager->exclusiveMode ){
+      assert( pPager->eState==PAGER_READER );
+      pager_end_transaction(pPager, 0, 0);
+    }
+  }
+  pager_unlock(pPager);
+}
+
+/*
+** Parameter aData must point to a buffer of pPager->pageSize bytes
+** of data. Compute and return a checksum based ont the contents of the 
+** page of data and the current value of pPager->cksumInit.
+**
+** This is not a real checksum. It is really just the sum of the 
+** random initial value (pPager->cksumInit) and every 200th byte
+** of the page data, starting with byte offset (pPager->pageSize%200).
+** Each byte is interpreted as an 8-bit unsigned integer.
+**
+** Changing the formula used to compute this checksum results in an
+** incompatible journal file format.
+**
+** If journal corruption occurs due to a power failure, the most likely 
+** scenario is that one end or the other of the record will be changed. 
+** It is much less likely that the two ends of the journal record will be
+** correct and the middle be corrupt.  Thus, this "checksum" scheme,
+** though fast and simple, catches the mostly likely kind of corruption.
+*/
+static u32 pager_cksum(Pager *pPager, const u8 *aData){
+  u32 cksum = pPager->cksumInit;         /* Checksum value to return */
+  int i = pPager->pageSize-200;          /* Loop counter */
+  while( i>0 ){
+    cksum += aData[i];
+    i -= 200;
+  }
+  return cksum;
+}
+
+/*
+** Report the current page size and number of reserved bytes back
+** to the codec.
+*/
+#ifdef SQLITE_HAS_CODEC
+static void pagerReportSize(Pager *pPager){
+  if( pPager->xCodecSizeChng ){
+    pPager->xCodecSizeChng(pPager->pCodec, pPager->pageSize,
+                           (int)pPager->nReserve);
+  }
+}
+#else
+# define pagerReportSize(X)     /* No-op if we do not support a codec */
+#endif
+
+/*
+** Read a single page from either the journal file (if isMainJrnl==1) or
+** from the sub-journal (if isMainJrnl==0) and playback that page.
+** The page begins at offset *pOffset into the file. The *pOffset
+** value is increased to the start of the next page in the journal.
+**
+** The main rollback journal uses checksums - the statement journal does 
+** not.
+**
+** If the page number of the page record read from the (sub-)journal file
+** is greater than the current value of Pager.dbSize, then playback is
+** skipped and SQLITE_OK is returned.
+**
+** If pDone is not NULL, then it is a record of pages that have already
+** been played back.  If the page at *pOffset has already been played back
+** (if the corresponding pDone bit is set) then skip the playback.
+** Make sure the pDone bit corresponding to the *pOffset page is set
+** prior to returning.
+**
+** If the page record is successfully read from the (sub-)journal file
+** and played back, then SQLITE_OK is returned. If an IO error occurs
+** while reading the record from the (sub-)journal file or while writing
+** to the database file, then the IO error code is returned. If data
+** is successfully read from the (sub-)journal file but appears to be
+** corrupted, SQLITE_DONE is returned. Data is considered corrupted in
+** two circumstances:
+** 
+**   * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or
+**   * If the record is being rolled back from the main journal file
+**     and the checksum field does not match the record content.
+**
+** Neither of these two scenarios are possible during a savepoint rollback.
+**
+** If this is a savepoint rollback, then memory may have to be dynamically
+** allocated by this function. If this is the case and an allocation fails,
+** SQLITE_NOMEM is returned.
+*/
+static int pager_playback_one_page(
+  Pager *pPager,                /* The pager being played back */
+  i64 *pOffset,                 /* Offset of record to playback */
+  Bitvec *pDone,                /* Bitvec of pages already played back */
+  int isMainJrnl,               /* 1 -> main journal. 0 -> sub-journal. */
+  int isSavepnt                 /* True for a savepoint rollback */
+){
+  int rc;
+  PgHdr *pPg;                   /* An existing page in the cache */
+  Pgno pgno;                    /* The page number of a page in journal */
+  u32 cksum;                    /* Checksum used for sanity checking */
+  char *aData;                  /* Temporary storage for the page */
+  sqlite3_file *jfd;            /* The file descriptor for the journal file */
+  int isSynced;                 /* True if journal page is synced */
+
+  assert( (isMainJrnl&~1)==0 );      /* isMainJrnl is 0 or 1 */
+  assert( (isSavepnt&~1)==0 );       /* isSavepnt is 0 or 1 */
+  assert( isMainJrnl || pDone );     /* pDone always used on sub-journals */
+  assert( isSavepnt || pDone==0 );   /* pDone never used on non-savepoint */
+
+  aData = pPager->pTmpSpace;
+  assert( aData );         /* Temp storage must have already been allocated */
+  assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) );
+
+  /* Either the state is greater than PAGER_WRITER_CACHEMOD (a transaction 
+  ** or savepoint rollback done at the request of the caller) or this is
+  ** a hot-journal rollback. If it is a hot-journal rollback, the pager
+  ** is in state OPEN and holds an EXCLUSIVE lock. Hot-journal rollback
+  ** only reads from the main journal, not the sub-journal.
+  */
+  assert( pPager->eState>=PAGER_WRITER_CACHEMOD
+       || (pPager->eState==PAGER_OPEN && pPager->eLock==EXCLUSIVE_LOCK)
+  );
+  assert( pPager->eState>=PAGER_WRITER_CACHEMOD || isMainJrnl );
+
+  /* Read the page number and page data from the journal or sub-journal
+  ** file. Return an error code to the caller if an IO error occurs.
+  */
+  jfd = isMainJrnl ? pPager->jfd : pPager->sjfd;
+  rc = read32bits(jfd, *pOffset, &pgno);
+  if( rc!=SQLITE_OK ) return rc;
+  rc = sqlite3OsRead(jfd, (u8*)aData, pPager->pageSize, (*pOffset)+4);
+  if( rc!=SQLITE_OK ) return rc;
+  *pOffset += pPager->pageSize + 4 + isMainJrnl*4;
+
+  /* Sanity checking on the page.  This is more important that I originally
+  ** thought.  If a power failure occurs while the journal is being written,
+  ** it could cause invalid data to be written into the journal.  We need to
+  ** detect this invalid data (with high probability) and ignore it.
+  */
+  if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
+    assert( !isSavepnt );
+    return SQLITE_DONE;
+  }
+  if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){
+    return SQLITE_OK;
+  }
+  if( isMainJrnl ){
+    rc = read32bits(jfd, (*pOffset)-4, &cksum);
+    if( rc ) return rc;
+    if( !isSavepnt && pager_cksum(pPager, (u8*)aData)!=cksum ){
+      return SQLITE_DONE;
+    }
+  }
+
+  /* If this page has already been played by before during the current
+  ** rollback, then don't bother to play it back again.
+  */
+  if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){
+    return rc;
+  }
+
+  /* When playing back page 1, restore the nReserve setting
+  */
+  if( pgno==1 && pPager->nReserve!=((u8*)aData)[20] ){
+    pPager->nReserve = ((u8*)aData)[20];
+    pagerReportSize(pPager);
+  }
+
+  /* If the pager is in CACHEMOD state, then there must be a copy of this
+  ** page in the pager cache. In this case just update the pager cache,
+  ** not the database file. The page is left marked dirty in this case.
+  **
+  ** An exception to the above rule: If the database is in no-sync mode
+  ** and a page is moved during an incremental vacuum then the page may
+  ** not be in the pager cache. Later: if a malloc() or IO error occurs
+  ** during a Movepage() call, then the page may not be in the cache
+  ** either. So the condition described in the above paragraph is not
+  ** assert()able.
+  **
+  ** If in WRITER_DBMOD, WRITER_FINISHED or OPEN state, then we update the
+  ** pager cache if it exists and the main file. The page is then marked 
+  ** not dirty. Since this code is only executed in PAGER_OPEN state for
+  ** a hot-journal rollback, it is guaranteed that the page-cache is empty
+  ** if the pager is in OPEN state.
+  **
+  ** Ticket #1171:  The statement journal might contain page content that is
+  ** different from the page content at the start of the transaction.
+  ** This occurs when a page is changed prior to the start of a statement
+  ** then changed again within the statement.  When rolling back such a
+  ** statement we must not write to the original database unless we know
+  ** for certain that original page contents are synced into the main rollback
+  ** journal.  Otherwise, a power loss might leave modified data in the
+  ** database file without an entry in the rollback journal that can
+  ** restore the database to its original form.  Two conditions must be
+  ** met before writing to the database files. (1) the database must be
+  ** locked.  (2) we know that the original page content is fully synced
+  ** in the main journal either because the page is not in cache or else
+  ** the page is marked as needSync==0.
+  **
+  ** 2008-04-14:  When attempting to vacuum a corrupt database file, it
+  ** is possible to fail a statement on a database that does not yet exist.
+  ** Do not attempt to write if database file has never been opened.
+  */
+  if( pagerUseWal(pPager) ){
+    pPg = 0;
+  }else{
+    pPg = sqlite3PagerLookup(pPager, pgno);
+  }
+  assert( pPg || !MEMDB );
+  assert( pPager->eState!=PAGER_OPEN || pPg==0 );
+  PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n",
+           PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData),
+           (isMainJrnl?"main-journal":"sub-journal")
+  ));
+  if( isMainJrnl ){
+    isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr);
+  }else{
+    isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC));
+  }
+  if( isOpen(pPager->fd)
+   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
+   && isSynced
+  ){
+    i64 ofst = (pgno-1)*(i64)pPager->pageSize;
+    testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 );
+    assert( !pagerUseWal(pPager) );
+    rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst);
+    if( pgno>pPager->dbFileSize ){
+      pPager->dbFileSize = pgno;
+    }
+    if( pPager->pBackup ){
+      CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM);
+      sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData);
+      CODEC2(pPager, aData, pgno, 7, rc=SQLITE_NOMEM, aData);
+    }
+  }else if( !isMainJrnl && pPg==0 ){
+    /* If this is a rollback of a savepoint and data was not written to
+    ** the database and the page is not in-memory, there is a potential
+    ** problem. When the page is next fetched by the b-tree layer, it 
+    ** will be read from the database file, which may or may not be 
+    ** current. 
+    **
+    ** There are a couple of different ways this can happen. All are quite
+    ** obscure. When running in synchronous mode, this can only happen 
+    ** if the page is on the free-list at the start of the transaction, then
+    ** populated, then moved using sqlite3PagerMovepage().
+    **
+    ** The solution is to add an in-memory page to the cache containing
+    ** the data just read from the sub-journal. Mark the page as dirty 
+    ** and if the pager requires a journal-sync, then mark the page as 
+    ** requiring a journal-sync before it is written.
+    */
+    assert( isSavepnt );
+    assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)==0 );
+    pPager->doNotSpill |= SPILLFLAG_ROLLBACK;
+    rc = sqlite3PagerAcquire(pPager, pgno, &pPg, 1);
+    assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)!=0 );
+    pPager->doNotSpill &= ~SPILLFLAG_ROLLBACK;
+    if( rc!=SQLITE_OK ) return rc;
+    pPg->flags &= ~PGHDR_NEED_READ;
+    sqlite3PcacheMakeDirty(pPg);
+  }
+  if( pPg ){
+    /* No page should ever be explicitly rolled back that is in use, except
+    ** for page 1 which is held in use in order to keep the lock on the
+    ** database active. However such a page may be rolled back as a result
+    ** of an internal error resulting in an automatic call to
+    ** sqlite3PagerRollback().
+    */
+    void *pData;
+    pData = pPg->pData;
+    memcpy(pData, (u8*)aData, pPager->pageSize);
+    pPager->xReiniter(pPg);
+    if( isMainJrnl && (!isSavepnt || *pOffset<=pPager->journalHdr) ){
+      /* If the contents of this page were just restored from the main 
+      ** journal file, then its content must be as they were when the 
+      ** transaction was first opened. In this case we can mark the page
+      ** as clean, since there will be no need to write it out to the
+      ** database.
+      **
+      ** There is one exception to this rule. If the page is being rolled
+      ** back as part of a savepoint (or statement) rollback from an 
+      ** unsynced portion of the main journal file, then it is not safe
+      ** to mark the page as clean. This is because marking the page as
+      ** clean will clear the PGHDR_NEED_SYNC flag. Since the page is
+      ** already in the journal file (recorded in Pager.pInJournal) and
+      ** the PGHDR_NEED_SYNC flag is cleared, if the page is written to
+      ** again within this transaction, it will be marked as dirty but
+      ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially
+      ** be written out into the database file before its journal file
+      ** segment is synced. If a crash occurs during or following this,
+      ** database corruption may ensue.
+      */
+      assert( !pagerUseWal(pPager) );
+      sqlite3PcacheMakeClean(pPg);
+    }
+    pager_set_pagehash(pPg);
+
+    /* If this was page 1, then restore the value of Pager.dbFileVers.
+    ** Do this before any decoding. */
+    if( pgno==1 ){
+      memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
+    }
+
+    /* Decode the page just read from disk */
+    CODEC1(pPager, pData, pPg->pgno, 3, rc=SQLITE_NOMEM);
+    sqlite3PcacheRelease(pPg);
+  }
+  return rc;
+}
+
+/*
+** Parameter zMaster is the name of a master journal file. A single journal
+** file that referred to the master journal file has just been rolled back.
+** This routine checks if it is possible to delete the master journal file,
+** and does so if it is.
+**
+** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
+** available for use within this function.
+**
+** When a master journal file is created, it is populated with the names 
+** of all of its child journals, one after another, formatted as utf-8 
+** encoded text. The end of each child journal file is marked with a 
+** nul-terminator byte (0x00). i.e. the entire contents of a master journal
+** file for a transaction involving two databases might be:
+**
+**   "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00"
+**
+** A master journal file may only be deleted once all of its child 
+** journals have been rolled back.
+**
+** This function reads the contents of the master-journal file into 
+** memory and loops through each of the child journal names. For
+** each child journal, it checks if:
+**
+**   * if the child journal exists, and if so
+**   * if the child journal contains a reference to master journal 
+**     file zMaster
+**
+** If a child journal can be found that matches both of the criteria
+** above, this function returns without doing anything. Otherwise, if
+** no such child journal can be found, file zMaster is deleted from
+** the file-system using sqlite3OsDelete().
+**
+** If an IO error within this function, an error code is returned. This
+** function allocates memory by calling sqlite3Malloc(). If an allocation
+** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors 
+** occur, SQLITE_OK is returned.
+**
+** TODO: This function allocates a single block of memory to load
+** the entire contents of the master journal file. This could be
+** a couple of kilobytes or so - potentially larger than the page 
+** size.
+*/
+static int pager_delmaster(Pager *pPager, const char *zMaster){
+  sqlite3_vfs *pVfs = pPager->pVfs;
+  int rc;                   /* Return code */
+  sqlite3_file *pMaster;    /* Malloc'd master-journal file descriptor */
+  sqlite3_file *pJournal;   /* Malloc'd child-journal file descriptor */
+  char *zMasterJournal = 0; /* Contents of master journal file */
+  i64 nMasterJournal;       /* Size of master journal file */
+  char *zJournal;           /* Pointer to one journal within MJ file */
+  char *zMasterPtr;         /* Space to hold MJ filename from a journal file */
+  int nMasterPtr;           /* Amount of space allocated to zMasterPtr[] */
+
+  /* Allocate space for both the pJournal and pMaster file descriptors.
+  ** If successful, open the master journal file for reading.
+  */
+  pMaster = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2);
+  pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
+  if( !pMaster ){
+    rc = SQLITE_NOMEM;
+  }else{
+    const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
+    rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
+  }
+  if( rc!=SQLITE_OK ) goto delmaster_out;
+
+  /* Load the entire master journal file into space obtained from
+  ** sqlite3_malloc() and pointed to by zMasterJournal.   Also obtain
+  ** sufficient space (in zMasterPtr) to hold the names of master
+  ** journal files extracted from regular rollback-journals.
+  */
+  rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
+  if( rc!=SQLITE_OK ) goto delmaster_out;
+  nMasterPtr = pVfs->mxPathname+1;
+  zMasterJournal = sqlite3Malloc(nMasterJournal + nMasterPtr + 1);
+  if( !zMasterJournal ){
+    rc = SQLITE_NOMEM;
+    goto delmaster_out;
+  }
+  zMasterPtr = &zMasterJournal[nMasterJournal+1];
+  rc = sqlite3OsRead(pMaster, zMasterJournal, (int)nMasterJournal, 0);
+  if( rc!=SQLITE_OK ) goto delmaster_out;
+  zMasterJournal[nMasterJournal] = 0;
+
+  zJournal = zMasterJournal;
+  while( (zJournal-zMasterJournal)<nMasterJournal ){
+    int exists;
+    rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
+    if( rc!=SQLITE_OK ){
+      goto delmaster_out;
+    }
+    if( exists ){
+      /* One of the journals pointed to by the master journal exists.
+      ** Open it and check if it points at the master journal. If
+      ** so, return without deleting the master journal file.
+      */
+      int c;
+      int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
+      rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
+      if( rc!=SQLITE_OK ){
+        goto delmaster_out;
+      }
+
+      rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
+      sqlite3OsClose(pJournal);
+      if( rc!=SQLITE_OK ){
+        goto delmaster_out;
+      }
+
+      c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
+      if( c ){
+        /* We have a match. Do not delete the master journal file. */
+        goto delmaster_out;
+      }
+    }
+    zJournal += (sqlite3Strlen30(zJournal)+1);
+  }
+ 
+  sqlite3OsClose(pMaster);
+  rc = sqlite3OsDelete(pVfs, zMaster, 0);
+
+delmaster_out:
+  sqlite3_free(zMasterJournal);
+  if( pMaster ){
+    sqlite3OsClose(pMaster);
+    assert( !isOpen(pJournal) );
+    sqlite3_free(pMaster);
+  }
+  return rc;
+}
+
+
+/*
+** This function is used to change the actual size of the database 
+** file in the file-system. This only happens when committing a transaction,
+** or rolling back a transaction (including rolling back a hot-journal).
+**
+** If the main database file is not open, or the pager is not in either
+** DBMOD or OPEN state, this function is a no-op. Otherwise, the size 
+** of the file is changed to nPage pages (nPage*pPager->pageSize bytes). 
+** If the file on disk is currently larger than nPage pages, then use the VFS
+** xTruncate() method to truncate it.
+**
+** Or, it might be the case that the file on disk is smaller than 
+** nPage pages. Some operating system implementations can get confused if 
+** you try to truncate a file to some size that is larger than it 
+** currently is, so detect this case and write a single zero byte to 
+** the end of the new file instead.
+**
+** If successful, return SQLITE_OK. If an IO error occurs while modifying
+** the database file, return the error code to the caller.
+*/
+static int pager_truncate(Pager *pPager, Pgno nPage){
+  int rc = SQLITE_OK;
+  assert( pPager->eState!=PAGER_ERROR );
+  assert( pPager->eState!=PAGER_READER );
+  
+  if( isOpen(pPager->fd) 
+   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) 
+  ){
+    i64 currentSize, newSize;
+    int szPage = pPager->pageSize;
+    assert( pPager->eLock==EXCLUSIVE_LOCK );
+    /* TODO: Is it safe to use Pager.dbFileSize here? */
+    rc = sqlite3OsFileSize(pPager->fd, &currentSize);
+    newSize = szPage*(i64)nPage;
+    if( rc==SQLITE_OK && currentSize!=newSize ){
+      if( currentSize>newSize ){
+        rc = sqlite3OsTruncate(pPager->fd, newSize);
+      }else if( (currentSize+szPage)<=newSize ){
+        char *pTmp = pPager->pTmpSpace;
+        memset(pTmp, 0, szPage);
+        testcase( (newSize-szPage) == currentSize );
+        testcase( (newSize-szPage) >  currentSize );
+        rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage);
+      }
+      if( rc==SQLITE_OK ){
+        pPager->dbFileSize = nPage;
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** Return a sanitized version of the sector-size of OS file pFile. The
+** return value is guaranteed to lie between 32 and MAX_SECTOR_SIZE.
+*/
+SQLITE_PRIVATE int sqlite3SectorSize(sqlite3_file *pFile){
+  int iRet = sqlite3OsSectorSize(pFile);
+  if( iRet<32 ){
+    iRet = 512;
+  }else if( iRet>MAX_SECTOR_SIZE ){
+    assert( MAX_SECTOR_SIZE>=512 );
+    iRet = MAX_SECTOR_SIZE;
+  }
+  return iRet;
+}
+
+/*
+** Set the value of the Pager.sectorSize variable for the given
+** pager based on the value returned by the xSectorSize method
+** of the open database file. The sector size will be used 
+** to determine the size and alignment of journal header and 
+** master journal pointers within created journal files.
+**
+** For temporary files the effective sector size is always 512 bytes.
+**
+** Otherwise, for non-temporary files, the effective sector size is
+** the value returned by the xSectorSize() method rounded up to 32 if
+** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it
+** is greater than MAX_SECTOR_SIZE.
+**
+** If the file has the SQLITE_IOCAP_POWERSAFE_OVERWRITE property, then set
+** the effective sector size to its minimum value (512).  The purpose of
+** pPager->sectorSize is to define the "blast radius" of bytes that
+** might change if a crash occurs while writing to a single byte in
+** that range.  But with POWERSAFE_OVERWRITE, the blast radius is zero
+** (that is what POWERSAFE_OVERWRITE means), so we minimize the sector
+** size.  For backwards compatibility of the rollback journal file format,
+** we cannot reduce the effective sector size below 512.
+*/
+static void setSectorSize(Pager *pPager){
+  assert( isOpen(pPager->fd) || pPager->tempFile );
+
+  if( pPager->tempFile
+   || (sqlite3OsDeviceCharacteristics(pPager->fd) & 
+              SQLITE_IOCAP_POWERSAFE_OVERWRITE)!=0
+  ){
+    /* Sector size doesn't matter for temporary files. Also, the file
+    ** may not have been opened yet, in which case the OsSectorSize()
+    ** call will segfault. */
+    pPager->sectorSize = 512;
+  }else{
+    pPager->sectorSize = sqlite3SectorSize(pPager->fd);
+  }
+}
+
+/*
+** Playback the journal and thus restore the database file to
+** the state it was in before we started making changes.  
+**
+** The journal file format is as follows: 
+**
+**  (1)  8 byte prefix.  A copy of aJournalMagic[].
+**  (2)  4 byte big-endian integer which is the number of valid page records
+**       in the journal.  If this value is 0xffffffff, then compute the
+**       number of page records from the journal size.
+**  (3)  4 byte big-endian integer which is the initial value for the 
+**       sanity checksum.
+**  (4)  4 byte integer which is the number of pages to truncate the
+**       database to during a rollback.
+**  (5)  4 byte big-endian integer which is the sector size.  The header
+**       is this many bytes in size.
+**  (6)  4 byte big-endian integer which is the page size.
+**  (7)  zero padding out to the next sector size.
+**  (8)  Zero or more pages instances, each as follows:
+**        +  4 byte page number.
+**        +  pPager->pageSize bytes of data.
+**        +  4 byte checksum
+**
+** When we speak of the journal header, we mean the first 7 items above.
+** Each entry in the journal is an instance of the 8th item.
+**
+** Call the value from the second bullet "nRec".  nRec is the number of
+** valid page entries in the journal.  In most cases, you can compute the
+** value of nRec from the size of the journal file.  But if a power
+** failure occurred while the journal was being written, it could be the
+** case that the size of the journal file had already been increased but
+** the extra entries had not yet made it safely to disk.  In such a case,
+** the value of nRec computed from the file size would be too large.  For
+** that reason, we always use the nRec value in the header.
+**
+** If the nRec value is 0xffffffff it means that nRec should be computed
+** from the file size.  This value is used when the user selects the
+** no-sync option for the journal.  A power failure could lead to corruption
+** in this case.  But for things like temporary table (which will be
+** deleted when the power is restored) we don't care.  
+**
+** If the file opened as the journal file is not a well-formed
+** journal file then all pages up to the first corrupted page are rolled
+** back (or no pages if the journal header is corrupted). The journal file
+** is then deleted and SQLITE_OK returned, just as if no corruption had
+** been encountered.
+**
+** If an I/O or malloc() error occurs, the journal-file is not deleted
+** and an error code is returned.
+**
+** The isHot parameter indicates that we are trying to rollback a journal
+** that might be a hot journal.  Or, it could be that the journal is 
+** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE.
+** If the journal really is hot, reset the pager cache prior rolling
+** back any content.  If the journal is merely persistent, no reset is
+** needed.
+*/
+static int pager_playback(Pager *pPager, int isHot){
+  sqlite3_vfs *pVfs = pPager->pVfs;
+  i64 szJ;                 /* Size of the journal file in bytes */
+  u32 nRec;                /* Number of Records in the journal */
+  u32 u;                   /* Unsigned loop counter */
+  Pgno mxPg = 0;           /* Size of the original file in pages */
+  int rc;                  /* Result code of a subroutine */
+  int res = 1;             /* Value returned by sqlite3OsAccess() */
+  char *zMaster = 0;       /* Name of master journal file if any */
+  int needPagerReset;      /* True to reset page prior to first page rollback */
+  int nPlayback = 0;       /* Total number of pages restored from journal */
+
+  /* Figure out how many records are in the journal.  Abort early if
+  ** the journal is empty.
+  */
+  assert( isOpen(pPager->jfd) );
+  rc = sqlite3OsFileSize(pPager->jfd, &szJ);
+  if( rc!=SQLITE_OK ){
+    goto end_playback;
+  }
+
+  /* Read the master journal name from the journal, if it is present.
+  ** If a master journal file name is specified, but the file is not
+  ** present on disk, then the journal is not hot and does not need to be
+  ** played back.
+  **
+  ** TODO: Technically the following is an error because it assumes that
+  ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that
+  ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c,
+  **  mxPathname is 512, which is the same as the minimum allowable value
+  ** for pageSize.
+  */
+  zMaster = pPager->pTmpSpace;
+  rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
+  if( rc==SQLITE_OK && zMaster[0] ){
+    rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
+  }
+  zMaster = 0;
+  if( rc!=SQLITE_OK || !res ){
+    goto end_playback;
+  }
+  pPager->journalOff = 0;
+  needPagerReset = isHot;
+
+  /* This loop terminates either when a readJournalHdr() or 
+  ** pager_playback_one_page() call returns SQLITE_DONE or an IO error 
+  ** occurs. 
+  */
+  while( 1 ){
+    /* Read the next journal header from the journal file.  If there are
+    ** not enough bytes left in the journal file for a complete header, or
+    ** it is corrupted, then a process must have failed while writing it.
+    ** This indicates nothing more needs to be rolled back.
+    */
+    rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg);
+    if( rc!=SQLITE_OK ){ 
+      if( rc==SQLITE_DONE ){
+        rc = SQLITE_OK;
+      }
+      goto end_playback;
+    }
+
+    /* If nRec is 0xffffffff, then this journal was created by a process
+    ** working in no-sync mode. This means that the rest of the journal
+    ** file consists of pages, there are no more journal headers. Compute
+    ** the value of nRec based on this assumption.
+    */
+    if( nRec==0xffffffff ){
+      assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
+      nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager));
+    }
+
+    /* If nRec is 0 and this rollback is of a transaction created by this
+    ** process and if this is the final header in the journal, then it means
+    ** that this part of the journal was being filled but has not yet been
+    ** synced to disk.  Compute the number of pages based on the remaining
+    ** size of the file.
+    **
+    ** The third term of the test was added to fix ticket #2565.
+    ** When rolling back a hot journal, nRec==0 always means that the next
+    ** chunk of the journal contains zero pages to be rolled back.  But
+    ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in
+    ** the journal, it means that the journal might contain additional
+    ** pages that need to be rolled back and that the number of pages 
+    ** should be computed based on the journal file size.
+    */
+    if( nRec==0 && !isHot &&
+        pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
+      nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager));
+    }
+
+    /* If this is the first header read from the journal, truncate the
+    ** database file back to its original size.
+    */
+    if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
+      rc = pager_truncate(pPager, mxPg);
+      if( rc!=SQLITE_OK ){
+        goto end_playback;
+      }
+      pPager->dbSize = mxPg;
+    }
+
+    /* Copy original pages out of the journal and back into the 
+    ** database file and/or page cache.
+    */
+    for(u=0; u<nRec; u++){
+      if( needPagerReset ){
+        pager_reset(pPager);
+        needPagerReset = 0;
+      }
+      rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0);
+      if( rc==SQLITE_OK ){
+        nPlayback++;
+      }else{
+        if( rc==SQLITE_DONE ){
+          pPager->journalOff = szJ;
+          break;
+        }else if( rc==SQLITE_IOERR_SHORT_READ ){
+          /* If the journal has been truncated, simply stop reading and
+          ** processing the journal. This might happen if the journal was
+          ** not completely written and synced prior to a crash.  In that
+          ** case, the database should have never been written in the
+          ** first place so it is OK to simply abandon the rollback. */
+          rc = SQLITE_OK;
+          goto end_playback;
+        }else{
+          /* If we are unable to rollback, quit and return the error
+          ** code.  This will cause the pager to enter the error state
+          ** so that no further harm will be done.  Perhaps the next
+          ** process to come along will be able to rollback the database.
+          */
+          goto end_playback;
+        }
+      }
+    }
+  }
+  /*NOTREACHED*/
+  assert( 0 );
+
+end_playback:
+  /* Following a rollback, the database file should be back in its original
+  ** state prior to the start of the transaction, so invoke the
+  ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the
+  ** assertion that the transaction counter was modified.
+  */
+#ifdef SQLITE_DEBUG
+  if( pPager->fd->pMethods ){
+    sqlite3OsFileControlHint(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0);
+  }
+#endif
+
+  /* If this playback is happening automatically as a result of an IO or 
+  ** malloc error that occurred after the change-counter was updated but 
+  ** before the transaction was committed, then the change-counter 
+  ** modification may just have been reverted. If this happens in exclusive 
+  ** mode, then subsequent transactions performed by the connection will not
+  ** update the change-counter at all. This may lead to cache inconsistency
+  ** problems for other processes at some point in the future. So, just
+  ** in case this has happened, clear the changeCountDone flag now.
+  */
+  pPager->changeCountDone = pPager->tempFile;
+
+  if( rc==SQLITE_OK ){
+    zMaster = pPager->pTmpSpace;
+    rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
+    testcase( rc!=SQLITE_OK );
+  }
+  if( rc==SQLITE_OK
+   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
+  ){
+    rc = sqlite3PagerSync(pPager, 0);
+  }
+  if( rc==SQLITE_OK ){
+    rc = pager_end_transaction(pPager, zMaster[0]!='\0', 0);
+    testcase( rc!=SQLITE_OK );
+  }
+  if( rc==SQLITE_OK && zMaster[0] && res ){
+    /* If there was a master journal and this routine will return success,
+    ** see if it is possible to delete the master journal.
+    */
+    rc = pager_delmaster(pPager, zMaster);
+    testcase( rc!=SQLITE_OK );
+  }
+  if( isHot && nPlayback ){
+    sqlite3_log(SQLITE_NOTICE_RECOVER_ROLLBACK, "recovered %d pages from %s",
+                nPlayback, pPager->zJournal);
+  }
+
+  /* The Pager.sectorSize variable may have been updated while rolling
+  ** back a journal created by a process with a different sector size
+  ** value. Reset it to the correct value for this process.
+  */
+  setSectorSize(pPager);
+  return rc;
+}
+
+
+/*
+** Read the content for page pPg out of the database file and into 
+** pPg->pData. A shared lock or greater must be held on the database
+** file before this function is called.
+**
+** If page 1 is read, then the value of Pager.dbFileVers[] is set to
+** the value read from the database file.
+**
+** If an IO error occurs, then the IO error is returned to the caller.
+** Otherwise, SQLITE_OK is returned.
+*/
+static int readDbPage(PgHdr *pPg, u32 iFrame){
+  Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
+  Pgno pgno = pPg->pgno;       /* Page number to read */
+  int rc = SQLITE_OK;          /* Return code */
+  int pgsz = pPager->pageSize; /* Number of bytes to read */
+
+  assert( pPager->eState>=PAGER_READER && !MEMDB );
+  assert( isOpen(pPager->fd) );
+
+#ifndef SQLITE_OMIT_WAL
+  if( iFrame ){
+    /* Try to pull the page from the write-ahead log. */
+    rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData);
+  }else
+#endif
+  {
+    i64 iOffset = (pgno-1)*(i64)pPager->pageSize;
+    rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
+    if( rc==SQLITE_IOERR_SHORT_READ ){
+      rc = SQLITE_OK;
+    }
+  }
+
+  if( pgno==1 ){
+    if( rc ){
+      /* If the read is unsuccessful, set the dbFileVers[] to something
+      ** that will never be a valid file version.  dbFileVers[] is a copy
+      ** of bytes 24..39 of the database.  Bytes 28..31 should always be
+      ** zero or the size of the database in page. Bytes 32..35 and 35..39
+      ** should be page numbers which are never 0xffffffff.  So filling
+      ** pPager->dbFileVers[] with all 0xff bytes should suffice.
+      **
+      ** For an encrypted database, the situation is more complex:  bytes
+      ** 24..39 of the database are white noise.  But the probability of
+      ** white noise equaling 16 bytes of 0xff is vanishingly small so
+      ** we should still be ok.
+      */
+      memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers));
+    }else{
+      u8 *dbFileVers = &((u8*)pPg->pData)[24];
+      memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers));
+    }
+  }
+  CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM);
+
+  PAGER_INCR(sqlite3_pager_readdb_count);
+  PAGER_INCR(pPager->nRead);
+  IOTRACE(("PGIN %p %d\n", pPager, pgno));
+  PAGERTRACE(("FETCH %d page %d hash(%08x)\n",
+               PAGERID(pPager), pgno, pager_pagehash(pPg)));
+
+  return rc;
+}
+
+/*
+** Update the value of the change-counter at offsets 24 and 92 in
+** the header and the sqlite version number at offset 96.
+**
+** This is an unconditional update.  See also the pager_incr_changecounter()
+** routine which only updates the change-counter if the update is actually
+** needed, as determined by the pPager->changeCountDone state variable.
+*/
+static void pager_write_changecounter(PgHdr *pPg){
+  u32 change_counter;
+
+  /* Increment the value just read and write it back to byte 24. */
+  change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1;
+  put32bits(((char*)pPg->pData)+24, change_counter);
+
+  /* Also store the SQLite version number in bytes 96..99 and in
+  ** bytes 92..95 store the change counter for which the version number
+  ** is valid. */
+  put32bits(((char*)pPg->pData)+92, change_counter);
+  put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER);
+}
+
+#ifndef SQLITE_OMIT_WAL
+/*
+** This function is invoked once for each page that has already been 
+** written into the log file when a WAL transaction is rolled back.
+** Parameter iPg is the page number of said page. The pCtx argument 
+** is actually a pointer to the Pager structure.
+**
+** If page iPg is present in the cache, and has no outstanding references,
+** it is discarded. Otherwise, if there are one or more outstanding
+** references, the page content is reloaded from the database. If the
+** attempt to reload content from the database is required and fails, 
+** return an SQLite error code. Otherwise, SQLITE_OK.
+*/
+static int pagerUndoCallback(void *pCtx, Pgno iPg){
+  int rc = SQLITE_OK;
+  Pager *pPager = (Pager *)pCtx;
+  PgHdr *pPg;
+
+  assert( pagerUseWal(pPager) );
+  pPg = sqlite3PagerLookup(pPager, iPg);
+  if( pPg ){
+    if( sqlite3PcachePageRefcount(pPg)==1 ){
+      sqlite3PcacheDrop(pPg);
+    }else{
+      u32 iFrame = 0;
+      rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame);
+      if( rc==SQLITE_OK ){
+        rc = readDbPage(pPg, iFrame);
+      }
+      if( rc==SQLITE_OK ){
+        pPager->xReiniter(pPg);
+      }
+      sqlite3PagerUnrefNotNull(pPg);
+    }
+  }
+
+  /* Normally, if a transaction is rolled back, any backup processes are
+  ** updated as data is copied out of the rollback journal and into the
+  ** database. This is not generally possible with a WAL database, as
+  ** rollback involves simply truncating the log file. Therefore, if one
+  ** or more frames have already been written to the log (and therefore 
+  ** also copied into the backup databases) as part of this transaction,
+  ** the backups must be restarted.
+  */
+  sqlite3BackupRestart(pPager->pBackup);
+
+  return rc;
+}
+
+/*
+** This function is called to rollback a transaction on a WAL database.
+*/
+static int pagerRollbackWal(Pager *pPager){
+  int rc;                         /* Return Code */
+  PgHdr *pList;                   /* List of dirty pages to revert */
+
+  /* For all pages in the cache that are currently dirty or have already
+  ** been written (but not committed) to the log file, do one of the 
+  ** following:
+  **
+  **   + Discard the cached page (if refcount==0), or
+  **   + Reload page content from the database (if refcount>0).
+  */
+  pPager->dbSize = pPager->dbOrigSize;
+  rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager);
+  pList = sqlite3PcacheDirtyList(pPager->pPCache);
+  while( pList && rc==SQLITE_OK ){
+    PgHdr *pNext = pList->pDirty;
+    rc = pagerUndoCallback((void *)pPager, pList->pgno);
+    pList = pNext;
+  }
+
+  return rc;
+}
+
+/*
+** This function is a wrapper around sqlite3WalFrames(). As well as logging
+** the contents of the list of pages headed by pList (connected by pDirty),
+** this function notifies any active backup processes that the pages have
+** changed. 
+**
+** The list of pages passed into this routine is always sorted by page number.
+** Hence, if page 1 appears anywhere on the list, it will be the first page.
+*/ 
+static int pagerWalFrames(
+  Pager *pPager,                  /* Pager object */
+  PgHdr *pList,                   /* List of frames to log */
+  Pgno nTruncate,                 /* Database size after this commit */
+  int isCommit                    /* True if this is a commit */
+){
+  int rc;                         /* Return code */
+  int nList;                      /* Number of pages in pList */
+#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES)
+  PgHdr *p;                       /* For looping over pages */
+#endif
+
+  assert( pPager->pWal );
+  assert( pList );
+#ifdef SQLITE_DEBUG
+  /* Verify that the page list is in accending order */
+  for(p=pList; p && p->pDirty; p=p->pDirty){
+    assert( p->pgno < p->pDirty->pgno );
+  }
+#endif
+
+  assert( pList->pDirty==0 || isCommit );
+  if( isCommit ){
+    /* If a WAL transaction is being committed, there is no point in writing
+    ** any pages with page numbers greater than nTruncate into the WAL file.
+    ** They will never be read by any client. So remove them from the pDirty
+    ** list here. */
+    PgHdr *p;
+    PgHdr **ppNext = &pList;
+    nList = 0;
+    for(p=pList; (*ppNext = p)!=0; p=p->pDirty){
+      if( p->pgno<=nTruncate ){
+        ppNext = &p->pDirty;
+        nList++;
+      }
+    }
+    assert( pList );
+  }else{
+    nList = 1;
+  }
+  pPager->aStat[PAGER_STAT_WRITE] += nList;
+
+  if( pList->pgno==1 ) pager_write_changecounter(pList);
+  rc = sqlite3WalFrames(pPager->pWal, 
+      pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags
+  );
+  if( rc==SQLITE_OK && pPager->pBackup ){
+    PgHdr *p;
+    for(p=pList; p; p=p->pDirty){
+      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
+    }
+  }
+
+#ifdef SQLITE_CHECK_PAGES
+  pList = sqlite3PcacheDirtyList(pPager->pPCache);
+  for(p=pList; p; p=p->pDirty){
+    pager_set_pagehash(p);
+  }
+#endif
+
+  return rc;
+}
+
+/*
+** Begin a read transaction on the WAL.
+**
+** This routine used to be called "pagerOpenSnapshot()" because it essentially
+** makes a snapshot of the database at the current point in time and preserves
+** that snapshot for use by the reader in spite of concurrently changes by
+** other writers or checkpointers.
+*/
+static int pagerBeginReadTransaction(Pager *pPager){
+  int rc;                         /* Return code */
+  int changed = 0;                /* True if cache must be reset */
+
+  assert( pagerUseWal(pPager) );
+  assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
+
+  /* sqlite3WalEndReadTransaction() was not called for the previous
+  ** transaction in locking_mode=EXCLUSIVE.  So call it now.  If we
+  ** are in locking_mode=NORMAL and EndRead() was previously called,
+  ** the duplicate call is harmless.
+  */
+  sqlite3WalEndReadTransaction(pPager->pWal);
+
+  rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
+  if( rc!=SQLITE_OK || changed ){
+    pager_reset(pPager);
+    if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0);
+  }
+
+  return rc;
+}
+#endif
+
+/*
+** This function is called as part of the transition from PAGER_OPEN
+** to PAGER_READER state to determine the size of the database file
+** in pages (assuming the page size currently stored in Pager.pageSize).
+**
+** If no error occurs, SQLITE_OK is returned and the size of the database
+** in pages is stored in *pnPage. Otherwise, an error code (perhaps
+** SQLITE_IOERR_FSTAT) is returned and *pnPage is left unmodified.
+*/
+static int pagerPagecount(Pager *pPager, Pgno *pnPage){
+  Pgno nPage;                     /* Value to return via *pnPage */
+
+  /* Query the WAL sub-system for the database size. The WalDbsize()
+  ** function returns zero if the WAL is not open (i.e. Pager.pWal==0), or
+  ** if the database size is not available. The database size is not
+  ** available from the WAL sub-system if the log file is empty or
+  ** contains no valid committed transactions.
+  */
+  assert( pPager->eState==PAGER_OPEN );
+  assert( pPager->eLock>=SHARED_LOCK );
+  nPage = sqlite3WalDbsize(pPager->pWal);
+
+  /* If the database size was not available from the WAL sub-system,
+  ** determine it based on the size of the database file. If the size
+  ** of the database file is not an integer multiple of the page-size,
+  ** round down to the nearest page. Except, any file larger than 0
+  ** bytes in size is considered to contain at least one page.
+  */
+  if( nPage==0 ){
+    i64 n = 0;                    /* Size of db file in bytes */
+    assert( isOpen(pPager->fd) || pPager->tempFile );
+    if( isOpen(pPager->fd) ){
+      int rc = sqlite3OsFileSize(pPager->fd, &n);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+    }
+    nPage = (Pgno)((n+pPager->pageSize-1) / pPager->pageSize);
+  }
+
+  /* If the current number of pages in the file is greater than the
+  ** configured maximum pager number, increase the allowed limit so
+  ** that the file can be read.
+  */
+  if( nPage>pPager->mxPgno ){
+    pPager->mxPgno = (Pgno)nPage;
+  }
+
+  *pnPage = nPage;
+  return SQLITE_OK;
+}
+
+#ifndef SQLITE_OMIT_WAL
+/*
+** Check if the *-wal file that corresponds to the database opened by pPager
+** exists if the database is not empy, or verify that the *-wal file does
+** not exist (by deleting it) if the database file is empty.
+**
+** If the database is not empty and the *-wal file exists, open the pager
+** in WAL mode.  If the database is empty or if no *-wal file exists and
+** if no error occurs, make sure Pager.journalMode is not set to
+** PAGER_JOURNALMODE_WAL.
+**
+** Return SQLITE_OK or an error code.
+**
+** The caller must hold a SHARED lock on the database file to call this
+** function. Because an EXCLUSIVE lock on the db file is required to delete 
+** a WAL on a none-empty database, this ensures there is no race condition 
+** between the xAccess() below and an xDelete() being executed by some 
+** other connection.
+*/
+static int pagerOpenWalIfPresent(Pager *pPager){
+  int rc = SQLITE_OK;
+  assert( pPager->eState==PAGER_OPEN );
+  assert( pPager->eLock>=SHARED_LOCK );
+
+  if( !pPager->tempFile ){
+    int isWal;                    /* True if WAL file exists */
+    Pgno nPage;                   /* Size of the database file */
+
+    rc = pagerPagecount(pPager, &nPage);
+    if( rc ) return rc;
+    if( nPage==0 ){
+      rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0);
+      if( rc==SQLITE_IOERR_DELETE_NOENT ) rc = SQLITE_OK;
+      isWal = 0;
+    }else{
+      rc = sqlite3OsAccess(
+          pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &isWal
+      );
+    }
+    if( rc==SQLITE_OK ){
+      if( isWal ){
+        testcase( sqlite3PcachePagecount(pPager->pPCache)==0 );
+        rc = sqlite3PagerOpenWal(pPager, 0);
+      }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
+        pPager->journalMode = PAGER_JOURNALMODE_DELETE;
+      }
+    }
+  }
+  return rc;
+}
+#endif
+
+/*
+** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback
+** the entire master journal file. The case pSavepoint==NULL occurs when 
+** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction 
+** savepoint.
+**
+** When pSavepoint is not NULL (meaning a non-transaction savepoint is 
+** being rolled back), then the rollback consists of up to three stages,
+** performed in the order specified:
+**
+**   * Pages are played back from the main journal starting at byte
+**     offset PagerSavepoint.iOffset and continuing to 
+**     PagerSavepoint.iHdrOffset, or to the end of the main journal
+**     file if PagerSavepoint.iHdrOffset is zero.
+**
+**   * If PagerSavepoint.iHdrOffset is not zero, then pages are played
+**     back starting from the journal header immediately following 
+**     PagerSavepoint.iHdrOffset to the end of the main journal file.
+**
+**   * Pages are then played back from the sub-journal file, starting
+**     with the PagerSavepoint.iSubRec and continuing to the end of
+**     the journal file.
+**
+** Throughout the rollback process, each time a page is rolled back, the
+** corresponding bit is set in a bitvec structure (variable pDone in the
+** implementation below). This is used to ensure that a page is only
+** rolled back the first time it is encountered in either journal.
+**
+** If pSavepoint is NULL, then pages are only played back from the main
+** journal file. There is no need for a bitvec in this case.
+**
+** In either case, before playback commences the Pager.dbSize variable
+** is reset to the value that it held at the start of the savepoint 
+** (or transaction). No page with a page-number greater than this value
+** is played back. If one is encountered it is simply skipped.
+*/
+static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
+  i64 szJ;                 /* Effective size of the main journal */
+  i64 iHdrOff;             /* End of first segment of main-journal records */
+  int rc = SQLITE_OK;      /* Return code */
+  Bitvec *pDone = 0;       /* Bitvec to ensure pages played back only once */
+
+  assert( pPager->eState!=PAGER_ERROR );
+  assert( pPager->eState>=PAGER_WRITER_LOCKED );
+
+  /* Allocate a bitvec to use to store the set of pages rolled back */
+  if( pSavepoint ){
+    pDone = sqlite3BitvecCreate(pSavepoint->nOrig);
+    if( !pDone ){
+      return SQLITE_NOMEM;
+    }
+  }
+
+  /* Set the database size back to the value it was before the savepoint 
+  ** being reverted was opened.
+  */
+  pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize;
+  pPager->changeCountDone = pPager->tempFile;
+
+  if( !pSavepoint && pagerUseWal(pPager) ){
+    return pagerRollbackWal(pPager);
+  }
+
+  /* Use pPager->journalOff as the effective size of the main rollback
+  ** journal.  The actual file might be larger than this in
+  ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST.  But anything
+  ** past pPager->journalOff is off-limits to us.
+  */
+  szJ = pPager->journalOff;
+  assert( pagerUseWal(pPager)==0 || szJ==0 );
+
+  /* Begin by rolling back records from the main journal starting at
+  ** PagerSavepoint.iOffset and continuing to the next journal header.
+  ** There might be records in the main journal that have a page number
+  ** greater than the current database size (pPager->dbSize) but those
+  ** will be skipped automatically.  Pages are added to pDone as they
+  ** are played back.
+  */
+  if( pSavepoint && !pagerUseWal(pPager) ){
+    iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
+    pPager->journalOff = pSavepoint->iOffset;
+    while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
+      rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
+    }
+    assert( rc!=SQLITE_DONE );
+  }else{
+    pPager->journalOff = 0;
+  }
+
+  /* Continue rolling back records out of the main journal starting at
+  ** the first journal header seen and continuing until the effective end
+  ** of the main journal file.  Continue to skip out-of-range pages and
+  ** continue adding pages rolled back to pDone.
+  */
+  while( rc==SQLITE_OK && pPager->journalOff<szJ ){
+    u32 ii;            /* Loop counter */
+    u32 nJRec = 0;     /* Number of Journal Records */
+    u32 dummy;
+    rc = readJournalHdr(pPager, 0, szJ, &nJRec, &dummy);
+    assert( rc!=SQLITE_DONE );
+
+    /*
+    ** The "pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff"
+    ** test is related to ticket #2565.  See the discussion in the
+    ** pager_playback() function for additional information.
+    */
+    if( nJRec==0 
+     && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff
+    ){
+      nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager));
+    }
+    for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){
+      rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
+    }
+    assert( rc!=SQLITE_DONE );
+  }
+  assert( rc!=SQLITE_OK || pPager->journalOff>=szJ );
+
+  /* Finally,  rollback pages from the sub-journal.  Page that were
+  ** previously rolled back out of the main journal (and are hence in pDone)
+  ** will be skipped.  Out-of-range pages are also skipped.
+  */
+  if( pSavepoint ){
+    u32 ii;            /* Loop counter */
+    i64 offset = (i64)pSavepoint->iSubRec*(4+pPager->pageSize);
+
+    if( pagerUseWal(pPager) ){
+      rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->aWalData);
+    }
+    for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){
+      assert( offset==(i64)ii*(4+pPager->pageSize) );
+      rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1);
+    }
+    assert( rc!=SQLITE_DONE );
+  }
+
+  sqlite3BitvecDestroy(pDone);
+  if( rc==SQLITE_OK ){
+    pPager->journalOff = szJ;
+  }
+
+  return rc;
+}
+
+/*
+** Change the maximum number of in-memory pages that are allowed.
+*/
+SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
+  sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
+}
+
+/*
+** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of szMmap.
+*/
+static void pagerFixMaplimit(Pager *pPager){
+#if SQLITE_MAX_MMAP_SIZE>0
+  sqlite3_file *fd = pPager->fd;
+  if( isOpen(fd) && fd->pMethods->iVersion>=3 ){
+    sqlite3_int64 sz;
+    sz = pPager->szMmap;
+    pPager->bUseFetch = (sz>0);
+    sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, &sz);
+  }
+#endif
+}
+
+/*
+** Change the maximum size of any memory mapping made of the database file.
+*/
+SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 szMmap){
+  pPager->szMmap = szMmap;
+  pagerFixMaplimit(pPager);
+}
+
+/*
+** Free as much memory as possible from the pager.
+*/
+SQLITE_PRIVATE void sqlite3PagerShrink(Pager *pPager){
+  sqlite3PcacheShrink(pPager->pPCache);
+}
+
+/*
+** Adjust settings of the pager to those specified in the pgFlags parameter.
+**
+** The "level" in pgFlags & PAGER_SYNCHRONOUS_MASK sets the robustness
+** of the database to damage due to OS crashes or power failures by
+** changing the number of syncs()s when writing the journals.
+** There are three levels:
+**
+**    OFF       sqlite3OsSync() is never called.  This is the default
+**              for temporary and transient files.
+**
+**    NORMAL    The journal is synced once before writes begin on the
+**              database.  This is normally adequate protection, but
+**              it is theoretically possible, though very unlikely,
+**              that an inopertune power failure could leave the journal
+**              in a state which would cause damage to the database
+**              when it is rolled back.
+**
+**    FULL      The journal is synced twice before writes begin on the
+**              database (with some additional information - the nRec field
+**              of the journal header - being written in between the two
+**              syncs).  If we assume that writing a
+**              single disk sector is atomic, then this mode provides
+**              assurance that the journal will not be corrupted to the
+**              point of causing damage to the database during rollback.
+**
+** The above is for a rollback-journal mode.  For WAL mode, OFF continues
+** to mean that no syncs ever occur.  NORMAL means that the WAL is synced
+** prior to the start of checkpoint and that the database file is synced
+** at the conclusion of the checkpoint if the entire content of the WAL
+** was written back into the database.  But no sync operations occur for
+** an ordinary commit in NORMAL mode with WAL.  FULL means that the WAL
+** file is synced following each commit operation, in addition to the
+** syncs associated with NORMAL.
+**
+** Do not confuse synchronous=FULL with SQLITE_SYNC_FULL.  The
+** SQLITE_SYNC_FULL macro means to use the MacOSX-style full-fsync
+** using fcntl(F_FULLFSYNC).  SQLITE_SYNC_NORMAL means to do an
+** ordinary fsync() call.  There is no difference between SQLITE_SYNC_FULL
+** and SQLITE_SYNC_NORMAL on platforms other than MacOSX.  But the
+** synchronous=FULL versus synchronous=NORMAL setting determines when
+** the xSync primitive is called and is relevant to all platforms.
+**
+** Numeric values associated with these states are OFF==1, NORMAL=2,
+** and FULL=3.
+*/
+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
+SQLITE_PRIVATE void sqlite3PagerSetFlags(
+  Pager *pPager,        /* The pager to set safety level for */
+  unsigned pgFlags      /* Various flags */
+){
+  unsigned level = pgFlags & PAGER_SYNCHRONOUS_MASK;
+  assert( level>=1 && level<=3 );
+  pPager->noSync =  (level==1 || pPager->tempFile) ?1:0;
+  pPager->fullSync = (level==3 && !pPager->tempFile) ?1:0;
+  if( pPager->noSync ){
+    pPager->syncFlags = 0;
+    pPager->ckptSyncFlags = 0;
+  }else if( pgFlags & PAGER_FULLFSYNC ){
+    pPager->syncFlags = SQLITE_SYNC_FULL;
+    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
+  }else if( pgFlags & PAGER_CKPT_FULLFSYNC ){
+    pPager->syncFlags = SQLITE_SYNC_NORMAL;
+    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
+  }else{
+    pPager->syncFlags = SQLITE_SYNC_NORMAL;
+    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
+  }
+  pPager->walSyncFlags = pPager->syncFlags;
+  if( pPager->fullSync ){
+    pPager->walSyncFlags |= WAL_SYNC_TRANSACTIONS;
+  }
+  if( pgFlags & PAGER_CACHESPILL ){
+    pPager->doNotSpill &= ~SPILLFLAG_OFF;
+  }else{
+    pPager->doNotSpill |= SPILLFLAG_OFF;
+  }
+}
+#endif
+
+/*
+** The following global variable is incremented whenever the library
+** attempts to open a temporary file.  This information is used for
+** testing and analysis only.  
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_opentemp_count = 0;
+#endif
+
+/*
+** Open a temporary file.
+**
+** Write the file descriptor into *pFile. Return SQLITE_OK on success 
+** or some other error code if we fail. The OS will automatically 
+** delete the temporary file when it is closed.
+**
+** The flags passed to the VFS layer xOpen() call are those specified
+** by parameter vfsFlags ORed with the following:
+**
+**     SQLITE_OPEN_READWRITE
+**     SQLITE_OPEN_CREATE
+**     SQLITE_OPEN_EXCLUSIVE
+**     SQLITE_OPEN_DELETEONCLOSE
+*/
+static int pagerOpentemp(
+  Pager *pPager,        /* The pager object */
+  sqlite3_file *pFile,  /* Write the file descriptor here */
+  int vfsFlags          /* Flags passed through to the VFS */
+){
+  int rc;               /* Return code */
+
+#ifdef SQLITE_TEST
+  sqlite3_opentemp_count++;  /* Used for testing and analysis only */
+#endif
+
+  vfsFlags |=  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
+            SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
+  rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
+  assert( rc!=SQLITE_OK || isOpen(pFile) );
+  return rc;
+}
+
+/*
+** Set the busy handler function.
+**
+** The pager invokes the busy-handler if sqlite3OsLock() returns 
+** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock,
+** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE 
+** lock. It does *not* invoke the busy handler when upgrading from
+** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE
+** (which occurs during hot-journal rollback). Summary:
+**
+**   Transition                        | Invokes xBusyHandler
+**   --------------------------------------------------------
+**   NO_LOCK       -> SHARED_LOCK      | Yes
+**   SHARED_LOCK   -> RESERVED_LOCK    | No
+**   SHARED_LOCK   -> EXCLUSIVE_LOCK   | No
+**   RESERVED_LOCK -> EXCLUSIVE_LOCK   | Yes
+**
+** If the busy-handler callback returns non-zero, the lock is 
+** retried. If it returns zero, then the SQLITE_BUSY error is
+** returned to the caller of the pager API function.
+*/
+SQLITE_PRIVATE void sqlite3PagerSetBusyhandler(
+  Pager *pPager,                       /* Pager object */
+  int (*xBusyHandler)(void *),         /* Pointer to busy-handler function */
+  void *pBusyHandlerArg                /* Argument to pass to xBusyHandler */
+){
+  pPager->xBusyHandler = xBusyHandler;
+  pPager->pBusyHandlerArg = pBusyHandlerArg;
+
+  if( isOpen(pPager->fd) ){
+    void **ap = (void **)&pPager->xBusyHandler;
+    assert( ((int(*)(void *))(ap[0]))==xBusyHandler );
+    assert( ap[1]==pBusyHandlerArg );
+    sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_BUSYHANDLER, (void *)ap);
+  }
+}
+
+/*
+** Change the page size used by the Pager object. The new page size 
+** is passed in *pPageSize.
+**
+** If the pager is in the error state when this function is called, it
+** is a no-op. The value returned is the error state error code (i.e. 
+** one of SQLITE_IOERR, an SQLITE_IOERR_xxx sub-code or SQLITE_FULL).
+**
+** Otherwise, if all of the following are true:
+**
+**   * the new page size (value of *pPageSize) is valid (a power 
+**     of two between 512 and SQLITE_MAX_PAGE_SIZE, inclusive), and
+**
+**   * there are no outstanding page references, and
+**
+**   * the database is either not an in-memory database or it is
+**     an in-memory database that currently consists of zero pages.
+**
+** then the pager object page size is set to *pPageSize.
+**
+** If the page size is changed, then this function uses sqlite3PagerMalloc() 
+** to obtain a new Pager.pTmpSpace buffer. If this allocation attempt 
+** fails, SQLITE_NOMEM is returned and the page size remains unchanged. 
+** In all other cases, SQLITE_OK is returned.
+**
+** If the page size is not changed, either because one of the enumerated
+** conditions above is not true, the pager was in error state when this
+** function was called, or because the memory allocation attempt failed, 
+** then *pPageSize is set to the old, retained page size before returning.
+*/
+SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve){
+  int rc = SQLITE_OK;
+
+  /* It is not possible to do a full assert_pager_state() here, as this
+  ** function may be called from within PagerOpen(), before the state
+  ** of the Pager object is internally consistent.
+  **
+  ** At one point this function returned an error if the pager was in 
+  ** PAGER_ERROR state. But since PAGER_ERROR state guarantees that
+  ** there is at least one outstanding page reference, this function
+  ** is a no-op for that case anyhow.
+  */
+
+  u32 pageSize = *pPageSize;
+  assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
+  if( (pPager->memDb==0 || pPager->dbSize==0)
+   && sqlite3PcacheRefCount(pPager->pPCache)==0 
+   && pageSize && pageSize!=(u32)pPager->pageSize 
+  ){
+    char *pNew = NULL;             /* New temp space */
+    i64 nByte = 0;
+
+    if( pPager->eState>PAGER_OPEN && isOpen(pPager->fd) ){
+      rc = sqlite3OsFileSize(pPager->fd, &nByte);
+    }
+    if( rc==SQLITE_OK ){
+      pNew = (char *)sqlite3PageMalloc(pageSize);
+      if( !pNew ) rc = SQLITE_NOMEM;
+    }
+
+    if( rc==SQLITE_OK ){
+      pager_reset(pPager);
+      rc = sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
+    }
+    if( rc==SQLITE_OK ){
+      sqlite3PageFree(pPager->pTmpSpace);
+      pPager->pTmpSpace = pNew;
+      pPager->dbSize = (Pgno)((nByte+pageSize-1)/pageSize);
+      pPager->pageSize = pageSize;
+    }else{
+      sqlite3PageFree(pNew);
+    }
+  }
+
+  *pPageSize = pPager->pageSize;
+  if( rc==SQLITE_OK ){
+    if( nReserve<0 ) nReserve = pPager->nReserve;
+    assert( nReserve>=0 && nReserve<1000 );
+    pPager->nReserve = (i16)nReserve;
+    pagerReportSize(pPager);
+    pagerFixMaplimit(pPager);
+  }
+  return rc;
+}
+
+/*
+** Return a pointer to the "temporary page" buffer held internally
+** by the pager.  This is a buffer that is big enough to hold the
+** entire content of a database page.  This buffer is used internally
+** during rollback and will be overwritten whenever a rollback
+** occurs.  But other modules are free to use it too, as long as
+** no rollbacks are happening.
+*/
+SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager *pPager){
+  return pPager->pTmpSpace;
+}
+
+/*
+** Attempt to set the maximum database page count if mxPage is positive. 
+** Make no changes if mxPage is zero or negative.  And never reduce the
+** maximum page count below the current size of the database.
+**
+** Regardless of mxPage, return the current maximum page count.
+*/
+SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
+  if( mxPage>0 ){
+    pPager->mxPgno = mxPage;
+  }
+  assert( pPager->eState!=PAGER_OPEN );      /* Called only by OP_MaxPgcnt */
+  assert( pPager->mxPgno>=pPager->dbSize );  /* OP_MaxPgcnt enforces this */
+  return pPager->mxPgno;
+}
+
+/*
+** The following set of routines are used to disable the simulated
+** I/O error mechanism.  These routines are used to avoid simulated
+** errors in places where we do not care about errors.
+**
+** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
+** and generate no code.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API extern int sqlite3_io_error_pending;
+SQLITE_API extern int sqlite3_io_error_hit;
+static int saved_cnt;
+void disable_simulated_io_errors(void){
+  saved_cnt = sqlite3_io_error_pending;
+  sqlite3_io_error_pending = -1;
+}
+void enable_simulated_io_errors(void){
+  sqlite3_io_error_pending = saved_cnt;
+}
+#else
+# define disable_simulated_io_errors()
+# define enable_simulated_io_errors()
+#endif
+
+/*
+** Read the first N bytes from the beginning of the file into memory
+** that pDest points to. 
+**
+** If the pager was opened on a transient file (zFilename==""), or
+** opened on a file less than N bytes in size, the output buffer is
+** zeroed and SQLITE_OK returned. The rationale for this is that this 
+** function is used to read database headers, and a new transient or
+** zero sized database has a header than consists entirely of zeroes.
+**
+** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered,
+** the error code is returned to the caller and the contents of the
+** output buffer undefined.
+*/
+SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
+  int rc = SQLITE_OK;
+  memset(pDest, 0, N);
+  assert( isOpen(pPager->fd) || pPager->tempFile );
+
+  /* This routine is only called by btree immediately after creating
+  ** the Pager object.  There has not been an opportunity to transition
+  ** to WAL mode yet.
+  */
+  assert( !pagerUseWal(pPager) );
+
+  if( isOpen(pPager->fd) ){
+    IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
+    rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
+    if( rc==SQLITE_IOERR_SHORT_READ ){
+      rc = SQLITE_OK;
+    }
+  }
+  return rc;
+}
+
+/*
+** This function may only be called when a read-transaction is open on
+** the pager. It returns the total number of pages in the database.
+**
+** However, if the file is between 1 and <page-size> bytes in size, then 
+** this is considered a 1 page file.
+*/
+SQLITE_PRIVATE void sqlite3PagerPagecount(Pager *pPager, int *pnPage){
+  assert( pPager->eState>=PAGER_READER );
+  assert( pPager->eState!=PAGER_WRITER_FINISHED );
+  *pnPage = (int)pPager->dbSize;
+}
+
+
+/*
+** Try to obtain a lock of type locktype on the database file. If
+** a similar or greater lock is already held, this function is a no-op
+** (returning SQLITE_OK immediately).
+**
+** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke 
+** the busy callback if the lock is currently not available. Repeat 
+** until the busy callback returns false or until the attempt to 
+** obtain the lock succeeds.
+**
+** Return SQLITE_OK on success and an error code if we cannot obtain
+** the lock. If the lock is obtained successfully, set the Pager.state 
+** variable to locktype before returning.
+*/
+static int pager_wait_on_lock(Pager *pPager, int locktype){
+  int rc;                              /* Return code */
+
+  /* Check that this is either a no-op (because the requested lock is 
+  ** already held), or one of the transitions that the busy-handler
+  ** may be invoked during, according to the comment above
+  ** sqlite3PagerSetBusyhandler().
+  */
+  assert( (pPager->eLock>=locktype)
+       || (pPager->eLock==NO_LOCK && locktype==SHARED_LOCK)
+       || (pPager->eLock==RESERVED_LOCK && locktype==EXCLUSIVE_LOCK)
+  );
+
+  do {
+    rc = pagerLockDb(pPager, locktype);
+  }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) );
+  return rc;
+}
+
+/*
+** Function assertTruncateConstraint(pPager) checks that one of the 
+** following is true for all dirty pages currently in the page-cache:
+**
+**   a) The page number is less than or equal to the size of the 
+**      current database image, in pages, OR
+**
+**   b) if the page content were written at this time, it would not
+**      be necessary to write the current content out to the sub-journal
+**      (as determined by function subjRequiresPage()).
+**
+** If the condition asserted by this function were not true, and the
+** dirty page were to be discarded from the cache via the pagerStress()
+** routine, pagerStress() would not write the current page content to
+** the database file. If a savepoint transaction were rolled back after
+** this happened, the correct behavior would be to restore the current
+** content of the page. However, since this content is not present in either
+** the database file or the portion of the rollback journal and 
+** sub-journal rolled back the content could not be restored and the
+** database image would become corrupt. It is therefore fortunate that 
+** this circumstance cannot arise.
+*/
+#if defined(SQLITE_DEBUG)
+static void assertTruncateConstraintCb(PgHdr *pPg){
+  assert( pPg->flags&PGHDR_DIRTY );
+  assert( !subjRequiresPage(pPg) || pPg->pgno<=pPg->pPager->dbSize );
+}
+static void assertTruncateConstraint(Pager *pPager){
+  sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb);
+}
+#else
+# define assertTruncateConstraint(pPager)
+#endif
+
+/*
+** Truncate the in-memory database file image to nPage pages. This 
+** function does not actually modify the database file on disk. It 
+** just sets the internal state of the pager object so that the 
+** truncation will be done when the current transaction is committed.
+**
+** This function is only called right before committing a transaction.
+** Once this function has been called, the transaction must either be
+** rolled back or committed. It is not safe to call this function and
+** then continue writing to the database.
+*/
+SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){
+  assert( pPager->dbSize>=nPage );
+  assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
+  pPager->dbSize = nPage;
+
+  /* At one point the code here called assertTruncateConstraint() to
+  ** ensure that all pages being truncated away by this operation are,
+  ** if one or more savepoints are open, present in the savepoint 
+  ** journal so that they can be restored if the savepoint is rolled
+  ** back. This is no longer necessary as this function is now only
+  ** called right before committing a transaction. So although the 
+  ** Pager object may still have open savepoints (Pager.nSavepoint!=0), 
+  ** they cannot be rolled back. So the assertTruncateConstraint() call
+  ** is no longer correct. */
+}
+
+
+/*
+** This function is called before attempting a hot-journal rollback. It
+** syncs the journal file to disk, then sets pPager->journalHdr to the
+** size of the journal file so that the pager_playback() routine knows
+** that the entire journal file has been synced.
+**
+** Syncing a hot-journal to disk before attempting to roll it back ensures 
+** that if a power-failure occurs during the rollback, the process that
+** attempts rollback following system recovery sees the same journal
+** content as this process.
+**
+** If everything goes as planned, SQLITE_OK is returned. Otherwise, 
+** an SQLite error code.
+*/
+static int pagerSyncHotJournal(Pager *pPager){
+  int rc = SQLITE_OK;
+  if( !pPager->noSync ){
+    rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL);
+  }
+  if( rc==SQLITE_OK ){
+    rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr);
+  }
+  return rc;
+}
+
+/*
+** Obtain a reference to a memory mapped page object for page number pgno. 
+** The new object will use the pointer pData, obtained from xFetch().
+** If successful, set *ppPage to point to the new page reference
+** and return SQLITE_OK. Otherwise, return an SQLite error code and set
+** *ppPage to zero.
+**
+** Page references obtained by calling this function should be released
+** by calling pagerReleaseMapPage().
+*/
+static int pagerAcquireMapPage(
+  Pager *pPager,                  /* Pager object */
+  Pgno pgno,                      /* Page number */
+  void *pData,                    /* xFetch()'d data for this page */
+  PgHdr **ppPage                  /* OUT: Acquired page object */
+){
+  PgHdr *p;                       /* Memory mapped page to return */
+  
+  if( pPager->pMmapFreelist ){
+    *ppPage = p = pPager->pMmapFreelist;
+    pPager->pMmapFreelist = p->pDirty;
+    p->pDirty = 0;
+    memset(p->pExtra, 0, pPager->nExtra);
+  }else{
+    *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra);
+    if( p==0 ){
+      sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData);
+      return SQLITE_NOMEM;
+    }
+    p->pExtra = (void *)&p[1];
+    p->flags = PGHDR_MMAP;
+    p->nRef = 1;
+    p->pPager = pPager;
+  }
+
+  assert( p->pExtra==(void *)&p[1] );
+  assert( p->pPage==0 );
+  assert( p->flags==PGHDR_MMAP );
+  assert( p->pPager==pPager );
+  assert( p->nRef==1 );
+
+  p->pgno = pgno;
+  p->pData = pData;
+  pPager->nMmapOut++;
+
+  return SQLITE_OK;
+}
+
+/*
+** Release a reference to page pPg. pPg must have been returned by an 
+** earlier call to pagerAcquireMapPage().
+*/
+static void pagerReleaseMapPage(PgHdr *pPg){
+  Pager *pPager = pPg->pPager;
+  pPager->nMmapOut--;
+  pPg->pDirty = pPager->pMmapFreelist;
+  pPager->pMmapFreelist = pPg;
+
+  assert( pPager->fd->pMethods->iVersion>=3 );
+  sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData);
+}
+
+/*
+** Free all PgHdr objects stored in the Pager.pMmapFreelist list.
+*/
+static void pagerFreeMapHdrs(Pager *pPager){
+  PgHdr *p;
+  PgHdr *pNext;
+  for(p=pPager->pMmapFreelist; p; p=pNext){
+    pNext = p->pDirty;
+    sqlite3_free(p);
+  }
+}
+
+
+/*
+** Shutdown the page cache.  Free all memory and close all files.
+**
+** If a transaction was in progress when this routine is called, that
+** transaction is rolled back.  All outstanding pages are invalidated
+** and their memory is freed.  Any attempt to use a page associated
+** with this page cache after this function returns will likely
+** result in a coredump.
+**
+** This function always succeeds. If a transaction is active an attempt
+** is made to roll it back. If an error occurs during the rollback 
+** a hot journal may be left in the filesystem but no error is returned
+** to the caller.
+*/
+SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager){
+  u8 *pTmp = (u8 *)pPager->pTmpSpace;
+
+  assert( assert_pager_state(pPager) );
+  disable_simulated_io_errors();
+  sqlite3BeginBenignMalloc();
+  pagerFreeMapHdrs(pPager);
+  /* pPager->errCode = 0; */
+  pPager->exclusiveMode = 0;
+#ifndef SQLITE_OMIT_WAL
+  sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp);
+  pPager->pWal = 0;
+#endif
+  pager_reset(pPager);
+  if( MEMDB ){
+    pager_unlock(pPager);
+  }else{
+    /* If it is open, sync the journal file before calling UnlockAndRollback.
+    ** If this is not done, then an unsynced portion of the open journal 
+    ** file may be played back into the database. If a power failure occurs 
+    ** while this is happening, the database could become corrupt.
+    **
+    ** If an error occurs while trying to sync the journal, shift the pager
+    ** into the ERROR state. This causes UnlockAndRollback to unlock the
+    ** database and close the journal file without attempting to roll it
+    ** back or finalize it. The next database user will have to do hot-journal
+    ** rollback before accessing the database file.
+    */
+    if( isOpen(pPager->jfd) ){
+      pager_error(pPager, pagerSyncHotJournal(pPager));
+    }
+    pagerUnlockAndRollback(pPager);
+  }
+  sqlite3EndBenignMalloc();
+  enable_simulated_io_errors();
+  PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
+  IOTRACE(("CLOSE %p\n", pPager))
+  sqlite3OsClose(pPager->jfd);
+  sqlite3OsClose(pPager->fd);
+  sqlite3PageFree(pTmp);
+  sqlite3PcacheClose(pPager->pPCache);
+
+#ifdef SQLITE_HAS_CODEC
+  if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
+#endif
+
+  assert( !pPager->aSavepoint && !pPager->pInJournal );
+  assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) );
+
+  sqlite3_free(pPager);
+  return SQLITE_OK;
+}
+
+#if !defined(NDEBUG) || defined(SQLITE_TEST)
+/*
+** Return the page number for page pPg.
+*/
+SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage *pPg){
+  return pPg->pgno;
+}
+#endif
+
+/*
+** Increment the reference count for page pPg.
+*/
+SQLITE_PRIVATE void sqlite3PagerRef(DbPage *pPg){
+  sqlite3PcacheRef(pPg);
+}
+
+/*
+** Sync the journal. In other words, make sure all the pages that have
+** been written to the journal have actually reached the surface of the
+** disk and can be restored in the event of a hot-journal rollback.
+**
+** If the Pager.noSync flag is set, then this function is a no-op.
+** Otherwise, the actions required depend on the journal-mode and the 
+** device characteristics of the file-system, as follows:
+**
+**   * If the journal file is an in-memory journal file, no action need
+**     be taken.
+**
+**   * Otherwise, if the device does not support the SAFE_APPEND property,
+**     then the nRec field of the most recently written journal header
+**     is updated to contain the number of journal records that have
+**     been written following it. If the pager is operating in full-sync
+**     mode, then the journal file is synced before this field is updated.
+**
+**   * If the device does not support the SEQUENTIAL property, then 
+**     journal file is synced.
+**
+** Or, in pseudo-code:
+**
+**   if( NOT <in-memory journal> ){
+**     if( NOT SAFE_APPEND ){
+**       if( <full-sync mode> ) xSync(<journal file>);
+**       <update nRec field>
+**     } 
+**     if( NOT SEQUENTIAL ) xSync(<journal file>);
+**   }
+**
+** If successful, this routine clears the PGHDR_NEED_SYNC flag of every 
+** page currently held in memory before returning SQLITE_OK. If an IO
+** error is encountered, then the IO error code is returned to the caller.
+*/
+static int syncJournal(Pager *pPager, int newHdr){
+  int rc;                         /* Return code */
+
+  assert( pPager->eState==PAGER_WRITER_CACHEMOD
+       || pPager->eState==PAGER_WRITER_DBMOD
+  );
+  assert( assert_pager_state(pPager) );
+  assert( !pagerUseWal(pPager) );
+
+  rc = sqlite3PagerExclusiveLock(pPager);
+  if( rc!=SQLITE_OK ) return rc;
+
+  if( !pPager->noSync ){
+    assert( !pPager->tempFile );
+    if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){
+      const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
+      assert( isOpen(pPager->jfd) );
+
+      if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
+        /* This block deals with an obscure problem. If the last connection
+        ** that wrote to this database was operating in persistent-journal
+        ** mode, then the journal file may at this point actually be larger
+        ** than Pager.journalOff bytes. If the next thing in the journal
+        ** file happens to be a journal-header (written as part of the
+        ** previous connection's transaction), and a crash or power-failure 
+        ** occurs after nRec is updated but before this connection writes 
+        ** anything else to the journal file (or commits/rolls back its 
+        ** transaction), then SQLite may become confused when doing the 
+        ** hot-journal rollback following recovery. It may roll back all
+        ** of this connections data, then proceed to rolling back the old,
+        ** out-of-date data that follows it. Database corruption.
+        **
+        ** To work around this, if the journal file does appear to contain
+        ** a valid header following Pager.journalOff, then write a 0x00
+        ** byte to the start of it to prevent it from being recognized.
+        **
+        ** Variable iNextHdrOffset is set to the offset at which this
+        ** problematic header will occur, if it exists. aMagic is used 
+        ** as a temporary buffer to inspect the first couple of bytes of
+        ** the potential journal header.
+        */
+        i64 iNextHdrOffset;
+        u8 aMagic[8];
+        u8 zHeader[sizeof(aJournalMagic)+4];
+
+        memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
+        put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec);
+
+        iNextHdrOffset = journalHdrOffset(pPager);
+        rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset);
+        if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){
+          static const u8 zerobyte = 0;
+          rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset);
+        }
+        if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
+          return rc;
+        }
+
+        /* Write the nRec value into the journal file header. If in
+        ** full-synchronous mode, sync the journal first. This ensures that
+        ** all data has really hit the disk before nRec is updated to mark
+        ** it as a candidate for rollback.
+        **
+        ** This is not required if the persistent media supports the
+        ** SAFE_APPEND property. Because in this case it is not possible 
+        ** for garbage data to be appended to the file, the nRec field
+        ** is populated with 0xFFFFFFFF when the journal header is written
+        ** and never needs to be updated.
+        */
+        if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
+          PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
+          IOTRACE(("JSYNC %p\n", pPager))
+          rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags);
+          if( rc!=SQLITE_OK ) return rc;
+        }
+        IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr));
+        rc = sqlite3OsWrite(
+            pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr
+        );
+        if( rc!=SQLITE_OK ) return rc;
+      }
+      if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
+        PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
+        IOTRACE(("JSYNC %p\n", pPager))
+        rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags| 
+          (pPager->syncFlags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
+        );
+        if( rc!=SQLITE_OK ) return rc;
+      }
+
+      pPager->journalHdr = pPager->journalOff;
+      if( newHdr && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
+        pPager->nRec = 0;
+        rc = writeJournalHdr(pPager);
+        if( rc!=SQLITE_OK ) return rc;
+      }
+    }else{
+      pPager->journalHdr = pPager->journalOff;
+    }
+  }
+
+  /* Unless the pager is in noSync mode, the journal file was just 
+  ** successfully synced. Either way, clear the PGHDR_NEED_SYNC flag on 
+  ** all pages.
+  */
+  sqlite3PcacheClearSyncFlags(pPager->pPCache);
+  pPager->eState = PAGER_WRITER_DBMOD;
+  assert( assert_pager_state(pPager) );
+  return SQLITE_OK;
+}
+
+/*
+** The argument is the first in a linked list of dirty pages connected
+** by the PgHdr.pDirty pointer. This function writes each one of the
+** in-memory pages in the list to the database file. The argument may
+** be NULL, representing an empty list. In this case this function is
+** a no-op.
+**
+** The pager must hold at least a RESERVED lock when this function
+** is called. Before writing anything to the database file, this lock
+** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained,
+** SQLITE_BUSY is returned and no data is written to the database file.
+** 
+** If the pager is a temp-file pager and the actual file-system file
+** is not yet open, it is created and opened before any data is 
+** written out.
+**
+** Once the lock has been upgraded and, if necessary, the file opened,
+** the pages are written out to the database file in list order. Writing
+** a page is skipped if it meets either of the following criteria:
+**
+**   * The page number is greater than Pager.dbSize, or
+**   * The PGHDR_DONT_WRITE flag is set on the page.
+**
+** If writing out a page causes the database file to grow, Pager.dbFileSize
+** is updated accordingly. If page 1 is written out, then the value cached
+** in Pager.dbFileVers[] is updated to match the new value stored in
+** the database file.
+**
+** If everything is successful, SQLITE_OK is returned. If an IO error 
+** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot
+** be obtained, SQLITE_BUSY is returned.
+*/
+static int pager_write_pagelist(Pager *pPager, PgHdr *pList){
+  int rc = SQLITE_OK;                  /* Return code */
+
+  /* This function is only called for rollback pagers in WRITER_DBMOD state. */
+  assert( !pagerUseWal(pPager) );
+  assert( pPager->eState==PAGER_WRITER_DBMOD );
+  assert( pPager->eLock==EXCLUSIVE_LOCK );
+
+  /* If the file is a temp-file has not yet been opened, open it now. It
+  ** is not possible for rc to be other than SQLITE_OK if this branch
+  ** is taken, as pager_wait_on_lock() is a no-op for temp-files.
+  */
+  if( !isOpen(pPager->fd) ){
+    assert( pPager->tempFile && rc==SQLITE_OK );
+    rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
+  }
+
+  /* Before the first write, give the VFS a hint of what the final
+  ** file size will be.
+  */
+  assert( rc!=SQLITE_OK || isOpen(pPager->fd) );
+  if( rc==SQLITE_OK 
+   && pPager->dbHintSize<pPager->dbSize
+   && (pList->pDirty || pList->pgno>pPager->dbHintSize)
+  ){
+    sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize;
+    sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile);
+    pPager->dbHintSize = pPager->dbSize;
+  }
+
+  while( rc==SQLITE_OK && pList ){
+    Pgno pgno = pList->pgno;
+
+    /* If there are dirty pages in the page cache with page numbers greater
+    ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to
+    ** make the file smaller (presumably by auto-vacuum code). Do not write
+    ** any such pages to the file.
+    **
+    ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag
+    ** set (set by sqlite3PagerDontWrite()).
+    */
+    if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
+      i64 offset = (pgno-1)*(i64)pPager->pageSize;   /* Offset to write */
+      char *pData;                                   /* Data to write */    
+
+      assert( (pList->flags&PGHDR_NEED_SYNC)==0 );
+      if( pList->pgno==1 ) pager_write_changecounter(pList);
+
+      /* Encode the database */
+      CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData);
+
+      /* Write out the page data. */
+      rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
+
+      /* If page 1 was just written, update Pager.dbFileVers to match
+      ** the value now stored in the database file. If writing this 
+      ** page caused the database file to grow, update dbFileSize. 
+      */
+      if( pgno==1 ){
+        memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
+      }
+      if( pgno>pPager->dbFileSize ){
+        pPager->dbFileSize = pgno;
+      }
+      pPager->aStat[PAGER_STAT_WRITE]++;
+
+      /* Update any backup objects copying the contents of this pager. */
+      sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData);
+
+      PAGERTRACE(("STORE %d page %d hash(%08x)\n",
+                   PAGERID(pPager), pgno, pager_pagehash(pList)));
+      IOTRACE(("PGOUT %p %d\n", pPager, pgno));
+      PAGER_INCR(sqlite3_pager_writedb_count);
+    }else{
+      PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno));
+    }
+    pager_set_pagehash(pList);
+    pList = pList->pDirty;
+  }
+
+  return rc;
+}
+
+/*
+** Ensure that the sub-journal file is open. If it is already open, this 
+** function is a no-op.
+**
+** SQLITE_OK is returned if everything goes according to plan. An 
+** SQLITE_IOERR_XXX error code is returned if a call to sqlite3OsOpen() 
+** fails.
+*/
+static int openSubJournal(Pager *pPager){
+  int rc = SQLITE_OK;
+  if( !isOpen(pPager->sjfd) ){
+    if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){
+      sqlite3MemJournalOpen(pPager->sjfd);
+    }else{
+      rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL);
+    }
+  }
+  return rc;
+}
+
+/*
+** Append a record of the current state of page pPg to the sub-journal. 
+** It is the callers responsibility to use subjRequiresPage() to check 
+** that it is really required before calling this function.
+**
+** If successful, set the bit corresponding to pPg->pgno in the bitvecs
+** for all open savepoints before returning.
+**
+** This function returns SQLITE_OK if everything is successful, an IO
+** error code if the attempt to write to the sub-journal fails, or 
+** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint
+** bitvec.
+*/
+static int subjournalPage(PgHdr *pPg){
+  int rc = SQLITE_OK;
+  Pager *pPager = pPg->pPager;
+  if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
+
+    /* Open the sub-journal, if it has not already been opened */
+    assert( pPager->useJournal );
+    assert( isOpen(pPager->jfd) || pagerUseWal(pPager) );
+    assert( isOpen(pPager->sjfd) || pPager->nSubRec==0 );
+    assert( pagerUseWal(pPager) 
+         || pageInJournal(pPager, pPg) 
+         || pPg->pgno>pPager->dbOrigSize 
+    );
+    rc = openSubJournal(pPager);
+
+    /* If the sub-journal was opened successfully (or was already open),
+    ** write the journal record into the file.  */
+    if( rc==SQLITE_OK ){
+      void *pData = pPg->pData;
+      i64 offset = (i64)pPager->nSubRec*(4+pPager->pageSize);
+      char *pData2;
+  
+      CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
+      PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno));
+      rc = write32bits(pPager->sjfd, offset, pPg->pgno);
+      if( rc==SQLITE_OK ){
+        rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4);
+      }
+    }
+  }
+  if( rc==SQLITE_OK ){
+    pPager->nSubRec++;
+    assert( pPager->nSavepoint>0 );
+    rc = addToSavepointBitvecs(pPager, pPg->pgno);
+  }
+  return rc;
+}
+
+/*
+** This function is called by the pcache layer when it has reached some
+** soft memory limit. The first argument is a pointer to a Pager object
+** (cast as a void*). The pager is always 'purgeable' (not an in-memory
+** database). The second argument is a reference to a page that is 
+** currently dirty but has no outstanding references. The page
+** is always associated with the Pager object passed as the first 
+** argument.
+**
+** The job of this function is to make pPg clean by writing its contents
+** out to the database file, if possible. This may involve syncing the
+** journal file. 
+**
+** If successful, sqlite3PcacheMakeClean() is called on the page and
+** SQLITE_OK returned. If an IO error occurs while trying to make the
+** page clean, the IO error code is returned. If the page cannot be
+** made clean for some other reason, but no error occurs, then SQLITE_OK
+** is returned by sqlite3PcacheMakeClean() is not called.
+*/
+static int pagerStress(void *p, PgHdr *pPg){
+  Pager *pPager = (Pager *)p;
+  int rc = SQLITE_OK;
+
+  assert( pPg->pPager==pPager );
+  assert( pPg->flags&PGHDR_DIRTY );
+
+  /* The doNotSpill NOSYNC bit is set during times when doing a sync of
+  ** journal (and adding a new header) is not allowed.  This occurs
+  ** during calls to sqlite3PagerWrite() while trying to journal multiple
+  ** pages belonging to the same sector.
+  **
+  ** The doNotSpill ROLLBACK and OFF bits inhibits all cache spilling
+  ** regardless of whether or not a sync is required.  This is set during
+  ** a rollback or by user request, respectively.
+  **
+  ** Spilling is also prohibited when in an error state since that could
+  ** lead to database corruption.   In the current implementation it 
+  ** is impossible for sqlite3PcacheFetch() to be called with createFlag==3
+  ** while in the error state, hence it is impossible for this routine to
+  ** be called in the error state.  Nevertheless, we include a NEVER()
+  ** test for the error state as a safeguard against future changes.
+  */
+  if( NEVER(pPager->errCode) ) return SQLITE_OK;
+  testcase( pPager->doNotSpill & SPILLFLAG_ROLLBACK );
+  testcase( pPager->doNotSpill & SPILLFLAG_OFF );
+  testcase( pPager->doNotSpill & SPILLFLAG_NOSYNC );
+  if( pPager->doNotSpill
+   && ((pPager->doNotSpill & (SPILLFLAG_ROLLBACK|SPILLFLAG_OFF))!=0
+      || (pPg->flags & PGHDR_NEED_SYNC)!=0)
+  ){
+    return SQLITE_OK;
+  }
+
+  pPg->pDirty = 0;
+  if( pagerUseWal(pPager) ){
+    /* Write a single frame for this page to the log. */
+    if( subjRequiresPage(pPg) ){ 
+      rc = subjournalPage(pPg); 
+    }
+    if( rc==SQLITE_OK ){
+      rc = pagerWalFrames(pPager, pPg, 0, 0);
+    }
+  }else{
+  
+    /* Sync the journal file if required. */
+    if( pPg->flags&PGHDR_NEED_SYNC 
+     || pPager->eState==PAGER_WRITER_CACHEMOD
+    ){
+      rc = syncJournal(pPager, 1);
+    }
+  
+    /* If the page number of this page is larger than the current size of
+    ** the database image, it may need to be written to the sub-journal.
+    ** This is because the call to pager_write_pagelist() below will not
+    ** actually write data to the file in this case.
+    **
+    ** Consider the following sequence of events:
+    **
+    **   BEGIN;
+    **     <journal page X>
+    **     <modify page X>
+    **     SAVEPOINT sp;
+    **       <shrink database file to Y pages>
+    **       pagerStress(page X)
+    **     ROLLBACK TO sp;
+    **
+    ** If (X>Y), then when pagerStress is called page X will not be written
+    ** out to the database file, but will be dropped from the cache. Then,
+    ** following the "ROLLBACK TO sp" statement, reading page X will read
+    ** data from the database file. This will be the copy of page X as it
+    ** was when the transaction started, not as it was when "SAVEPOINT sp"
+    ** was executed.
+    **
+    ** The solution is to write the current data for page X into the 
+    ** sub-journal file now (if it is not already there), so that it will
+    ** be restored to its current value when the "ROLLBACK TO sp" is 
+    ** executed.
+    */
+    if( NEVER(
+        rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
+    ) ){
+      rc = subjournalPage(pPg);
+    }
+  
+    /* Write the contents of the page out to the database file. */
+    if( rc==SQLITE_OK ){
+      assert( (pPg->flags&PGHDR_NEED_SYNC)==0 );
+      rc = pager_write_pagelist(pPager, pPg);
+    }
+  }
+
+  /* Mark the page as clean. */
+  if( rc==SQLITE_OK ){
+    PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno));
+    sqlite3PcacheMakeClean(pPg);
+  }
+
+  return pager_error(pPager, rc); 
+}
+
+
+/*
+** Allocate and initialize a new Pager object and put a pointer to it
+** in *ppPager. The pager should eventually be freed by passing it
+** to sqlite3PagerClose().
+**
+** The zFilename argument is the path to the database file to open.
+** If zFilename is NULL then a randomly-named temporary file is created
+** and used as the file to be cached. Temporary files are be deleted
+** automatically when they are closed. If zFilename is ":memory:" then 
+** all information is held in cache. It is never written to disk. 
+** This can be used to implement an in-memory database.
+**
+** The nExtra parameter specifies the number of bytes of space allocated
+** along with each page reference. This space is available to the user
+** via the sqlite3PagerGetExtra() API.
+**
+** The flags argument is used to specify properties that affect the
+** operation of the pager. It should be passed some bitwise combination
+** of the PAGER_* flags.
+**
+** The vfsFlags parameter is a bitmask to pass to the flags parameter
+** of the xOpen() method of the supplied VFS when opening files. 
+**
+** If the pager object is allocated and the specified file opened 
+** successfully, SQLITE_OK is returned and *ppPager set to point to
+** the new pager object. If an error occurs, *ppPager is set to NULL
+** and error code returned. This function may return SQLITE_NOMEM
+** (sqlite3Malloc() is used to allocate memory), SQLITE_CANTOPEN or 
+** various SQLITE_IO_XXX errors.
+*/
+SQLITE_PRIVATE int sqlite3PagerOpen(
+  sqlite3_vfs *pVfs,       /* The virtual file system to use */
+  Pager **ppPager,         /* OUT: Return the Pager structure here */
+  const char *zFilename,   /* Name of the database file to open */
+  int nExtra,              /* Extra bytes append to each in-memory page */
+  int flags,               /* flags controlling this file */
+  int vfsFlags,            /* flags passed through to sqlite3_vfs.xOpen() */
+  void (*xReinit)(DbPage*) /* Function to reinitialize pages */
+){
+  u8 *pPtr;
+  Pager *pPager = 0;       /* Pager object to allocate and return */
+  int rc = SQLITE_OK;      /* Return code */
+  int tempFile = 0;        /* True for temp files (incl. in-memory files) */
+  int memDb = 0;           /* True if this is an in-memory file */
+  int readOnly = 0;        /* True if this is a read-only file */
+  int journalFileSize;     /* Bytes to allocate for each journal fd */
+  char *zPathname = 0;     /* Full path to database file */
+  int nPathname = 0;       /* Number of bytes in zPathname */
+  int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; /* False to omit journal */
+  int pcacheSize = sqlite3PcacheSize();       /* Bytes to allocate for PCache */
+  u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;  /* Default page size */
+  const char *zUri = 0;    /* URI args to copy */
+  int nUri = 0;            /* Number of bytes of URI args at *zUri */
+
+  /* Figure out how much space is required for each journal file-handle
+  ** (there are two of them, the main journal and the sub-journal). This
+  ** is the maximum space required for an in-memory journal file handle 
+  ** and a regular journal file-handle. Note that a "regular journal-handle"
+  ** may be a wrapper capable of caching the first portion of the journal
+  ** file in memory to implement the atomic-write optimization (see 
+  ** source file journal.c).
+  */
+  if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){
+    journalFileSize = ROUND8(sqlite3JournalSize(pVfs));
+  }else{
+    journalFileSize = ROUND8(sqlite3MemJournalSize());
+  }
+
+  /* Set the output variable to NULL in case an error occurs. */
+  *ppPager = 0;
+
+#ifndef SQLITE_OMIT_MEMORYDB
+  if( flags & PAGER_MEMORY ){
+    memDb = 1;
+    if( zFilename && zFilename[0] ){
+      zPathname = sqlite3DbStrDup(0, zFilename);
+      if( zPathname==0  ) return SQLITE_NOMEM;
+      nPathname = sqlite3Strlen30(zPathname);
+      zFilename = 0;
+    }
+  }
+#endif
+
+  /* Compute and store the full pathname in an allocated buffer pointed
+  ** to by zPathname, length nPathname. Or, if this is a temporary file,
+  ** leave both nPathname and zPathname set to 0.
+  */
+  if( zFilename && zFilename[0] ){
+    const char *z;
+    nPathname = pVfs->mxPathname+1;
+    zPathname = sqlite3DbMallocRaw(0, nPathname*2);
+    if( zPathname==0 ){
+      return SQLITE_NOMEM;
+    }
+    zPathname[0] = 0; /* Make sure initialized even if FullPathname() fails */
+    rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
+    nPathname = sqlite3Strlen30(zPathname);
+    z = zUri = &zFilename[sqlite3Strlen30(zFilename)+1];
+    while( *z ){
+      z += sqlite3Strlen30(z)+1;
+      z += sqlite3Strlen30(z)+1;
+    }
+    nUri = (int)(&z[1] - zUri);
+    assert( nUri>=0 );
+    if( rc==SQLITE_OK && nPathname+8>pVfs->mxPathname ){
+      /* This branch is taken when the journal path required by
+      ** the database being opened will be more than pVfs->mxPathname
+      ** bytes in length. This means the database cannot be opened,
+      ** as it will not be possible to open the journal file or even
+      ** check for a hot-journal before reading.
+      */
+      rc = SQLITE_CANTOPEN_BKPT;
+    }
+    if( rc!=SQLITE_OK ){
+      sqlite3DbFree(0, zPathname);
+      return rc;
+    }
+  }
+
+  /* Allocate memory for the Pager structure, PCache object, the
+  ** three file descriptors, the database file name and the journal 
+  ** file name. The layout in memory is as follows:
+  **
+  **     Pager object                    (sizeof(Pager) bytes)
+  **     PCache object                   (sqlite3PcacheSize() bytes)
+  **     Database file handle            (pVfs->szOsFile bytes)
+  **     Sub-journal file handle         (journalFileSize bytes)
+  **     Main journal file handle        (journalFileSize bytes)
+  **     Database file name              (nPathname+1 bytes)
+  **     Journal file name               (nPathname+8+1 bytes)
+  */
+  pPtr = (u8 *)sqlite3MallocZero(
+    ROUND8(sizeof(*pPager)) +      /* Pager structure */
+    ROUND8(pcacheSize) +           /* PCache object */
+    ROUND8(pVfs->szOsFile) +       /* The main db file */
+    journalFileSize * 2 +          /* The two journal files */ 
+    nPathname + 1 + nUri +         /* zFilename */
+    nPathname + 8 + 2              /* zJournal */
+#ifndef SQLITE_OMIT_WAL
+    + nPathname + 4 + 2            /* zWal */
+#endif
+  );
+  assert( EIGHT_BYTE_ALIGNMENT(SQLITE_INT_TO_PTR(journalFileSize)) );
+  if( !pPtr ){
+    sqlite3DbFree(0, zPathname);
+    return SQLITE_NOMEM;
+  }
+  pPager =              (Pager*)(pPtr);
+  pPager->pPCache =    (PCache*)(pPtr += ROUND8(sizeof(*pPager)));
+  pPager->fd =   (sqlite3_file*)(pPtr += ROUND8(pcacheSize));
+  pPager->sjfd = (sqlite3_file*)(pPtr += ROUND8(pVfs->szOsFile));
+  pPager->jfd =  (sqlite3_file*)(pPtr += journalFileSize);
+  pPager->zFilename =    (char*)(pPtr += journalFileSize);
+  assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) );
+
+  /* Fill in the Pager.zFilename and Pager.zJournal buffers, if required. */
+  if( zPathname ){
+    assert( nPathname>0 );
+    pPager->zJournal =   (char*)(pPtr += nPathname + 1 + nUri);
+    memcpy(pPager->zFilename, zPathname, nPathname);
+    if( nUri ) memcpy(&pPager->zFilename[nPathname+1], zUri, nUri);
+    memcpy(pPager->zJournal, zPathname, nPathname);
+    memcpy(&pPager->zJournal[nPathname], "-journal\000", 8+2);
+    sqlite3FileSuffix3(pPager->zFilename, pPager->zJournal);
+#ifndef SQLITE_OMIT_WAL
+    pPager->zWal = &pPager->zJournal[nPathname+8+1];
+    memcpy(pPager->zWal, zPathname, nPathname);
+    memcpy(&pPager->zWal[nPathname], "-wal\000", 4+1);
+    sqlite3FileSuffix3(pPager->zFilename, pPager->zWal);
+#endif
+    sqlite3DbFree(0, zPathname);
+  }
+  pPager->pVfs = pVfs;
+  pPager->vfsFlags = vfsFlags;
+
+  /* Open the pager file.
+  */
+  if( zFilename && zFilename[0] ){
+    int fout = 0;                    /* VFS flags returned by xOpen() */
+    rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout);
+    assert( !memDb );
+    readOnly = (fout&SQLITE_OPEN_READONLY);
+
+    /* If the file was successfully opened for read/write access,
+    ** choose a default page size in case we have to create the
+    ** database file. The default page size is the maximum of:
+    **
+    **    + SQLITE_DEFAULT_PAGE_SIZE,
+    **    + The value returned by sqlite3OsSectorSize()
+    **    + The largest page size that can be written atomically.
+    */
+    if( rc==SQLITE_OK ){
+      int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
+      if( !readOnly ){
+        setSectorSize(pPager);
+        assert(SQLITE_DEFAULT_PAGE_SIZE<=SQLITE_MAX_DEFAULT_PAGE_SIZE);
+        if( szPageDflt<pPager->sectorSize ){
+          if( pPager->sectorSize>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
+            szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
+          }else{
+            szPageDflt = (u32)pPager->sectorSize;
+          }
+        }
+#ifdef SQLITE_ENABLE_ATOMIC_WRITE
+        {
+          int ii;
+          assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
+          assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
+          assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
+          for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
+            if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ){
+              szPageDflt = ii;
+            }
+          }
+        }
+#endif
+      }
+      pPager->noLock = sqlite3_uri_boolean(zFilename, "nolock", 0);
+      if( (iDc & SQLITE_IOCAP_IMMUTABLE)!=0
+       || sqlite3_uri_boolean(zFilename, "immutable", 0) ){
+          vfsFlags |= SQLITE_OPEN_READONLY;
+          goto act_like_temp_file;
+      }
+    }
+  }else{
+    /* If a temporary file is requested, it is not opened immediately.
+    ** In this case we accept the default page size and delay actually
+    ** opening the file until the first call to OsWrite().
+    **
+    ** This branch is also run for an in-memory database. An in-memory
+    ** database is the same as a temp-file that is never written out to
+    ** disk and uses an in-memory rollback journal.
+    **
+    ** This branch also runs for files marked as immutable.
+    */ 
+act_like_temp_file:
+    tempFile = 1;
+    pPager->eState = PAGER_READER;     /* Pretend we already have a lock */
+    pPager->eLock = EXCLUSIVE_LOCK;    /* Pretend we are in EXCLUSIVE locking mode */
+    pPager->noLock = 1;                /* Do no locking */
+    readOnly = (vfsFlags&SQLITE_OPEN_READONLY);
+  }
+
+  /* The following call to PagerSetPagesize() serves to set the value of 
+  ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer.
+  */
+  if( rc==SQLITE_OK ){
+    assert( pPager->memDb==0 );
+    rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1);
+    testcase( rc!=SQLITE_OK );
+  }
+
+  /* Initialize the PCache object. */
+  if( rc==SQLITE_OK ){
+    assert( nExtra<1000 );
+    nExtra = ROUND8(nExtra);
+    rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
+                           !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
+  }
+
+  /* If an error occurred above, free the  Pager structure and close the file.
+  */
+  if( rc!=SQLITE_OK ){
+    sqlite3OsClose(pPager->fd);
+    sqlite3PageFree(pPager->pTmpSpace);
+    sqlite3_free(pPager);
+    return rc;
+  }
+
+  PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename));
+  IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
+
+  pPager->useJournal = (u8)useJournal;
+  /* pPager->stmtOpen = 0; */
+  /* pPager->stmtInUse = 0; */
+  /* pPager->nRef = 0; */
+  /* pPager->stmtSize = 0; */
+  /* pPager->stmtJSize = 0; */
+  /* pPager->nPage = 0; */
+  pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
+  /* pPager->state = PAGER_UNLOCK; */
+  /* pPager->errMask = 0; */
+  pPager->tempFile = (u8)tempFile;
+  assert( tempFile==PAGER_LOCKINGMODE_NORMAL 
+          || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
+  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
+  pPager->exclusiveMode = (u8)tempFile; 
+  pPager->changeCountDone = pPager->tempFile;
+  pPager->memDb = (u8)memDb;
+  pPager->readOnly = (u8)readOnly;
+  assert( useJournal || pPager->tempFile );
+  pPager->noSync = pPager->tempFile;
+  if( pPager->noSync ){
+    assert( pPager->fullSync==0 );
+    assert( pPager->syncFlags==0 );
+    assert( pPager->walSyncFlags==0 );
+    assert( pPager->ckptSyncFlags==0 );
+  }else{
+    pPager->fullSync = 1;
+    pPager->syncFlags = SQLITE_SYNC_NORMAL;
+    pPager->walSyncFlags = SQLITE_SYNC_NORMAL | WAL_SYNC_TRANSACTIONS;
+    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
+  }
+  /* pPager->pFirst = 0; */
+  /* pPager->pFirstSynced = 0; */
+  /* pPager->pLast = 0; */
+  pPager->nExtra = (u16)nExtra;
+  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
+  assert( isOpen(pPager->fd) || tempFile );
+  setSectorSize(pPager);
+  if( !useJournal ){
+    pPager->journalMode = PAGER_JOURNALMODE_OFF;
+  }else if( memDb ){
+    pPager->journalMode = PAGER_JOURNALMODE_MEMORY;
+  }
+  /* pPager->xBusyHandler = 0; */
+  /* pPager->pBusyHandlerArg = 0; */
+  pPager->xReiniter = xReinit;
+  /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
+  /* pPager->szMmap = SQLITE_DEFAULT_MMAP_SIZE // will be set by btree.c */
+
+  *ppPager = pPager;
+  return SQLITE_OK;
+}
+
+
+/* Verify that the database file has not be deleted or renamed out from
+** under the pager.  Return SQLITE_OK if the database is still were it ought
+** to be on disk.  Return non-zero (SQLITE_READONLY_DBMOVED or some other error
+** code from sqlite3OsAccess()) if the database has gone missing.
+*/
+static int databaseIsUnmoved(Pager *pPager){
+  int bHasMoved = 0;
+  int rc;
+
+  if( pPager->tempFile ) return SQLITE_OK;
+  if( pPager->dbSize==0 ) return SQLITE_OK;
+  assert( pPager->zFilename && pPager->zFilename[0] );
+  rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_HAS_MOVED, &bHasMoved);
+  if( rc==SQLITE_NOTFOUND ){
+    /* If the HAS_MOVED file-control is unimplemented, assume that the file
+    ** has not been moved.  That is the historical behavior of SQLite: prior to
+    ** version 3.8.3, it never checked */
+    rc = SQLITE_OK;
+  }else if( rc==SQLITE_OK && bHasMoved ){
+    rc = SQLITE_READONLY_DBMOVED;
+  }
+  return rc;
+}
+
+
+/*
+** This function is called after transitioning from PAGER_UNLOCK to
+** PAGER_SHARED state. It tests if there is a hot journal present in
+** the file-system for the given pager. A hot journal is one that 
+** needs to be played back. According to this function, a hot-journal
+** file exists if the following criteria are met:
+**
+**   * The journal file exists in the file system, and
+**   * No process holds a RESERVED or greater lock on the database file, and
+**   * The database file itself is greater than 0 bytes in size, and
+**   * The first byte of the journal file exists and is not 0x00.
+**
+** If the current size of the database file is 0 but a journal file
+** exists, that is probably an old journal left over from a prior
+** database with the same name. In this case the journal file is
+** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK
+** is returned.
+**
+** This routine does not check if there is a master journal filename
+** at the end of the file. If there is, and that master journal file
+** does not exist, then the journal file is not really hot. In this
+** case this routine will return a false-positive. The pager_playback()
+** routine will discover that the journal file is not really hot and 
+** will not roll it back. 
+**
+** If a hot-journal file is found to exist, *pExists is set to 1 and 
+** SQLITE_OK returned. If no hot-journal file is present, *pExists is
+** set to 0 and SQLITE_OK returned. If an IO error occurs while trying
+** to determine whether or not a hot-journal file exists, the IO error
+** code is returned and the value of *pExists is undefined.
+*/
+static int hasHotJournal(Pager *pPager, int *pExists){
+  sqlite3_vfs * const pVfs = pPager->pVfs;
+  int rc = SQLITE_OK;           /* Return code */
+  int exists = 1;               /* True if a journal file is present */
+  int jrnlOpen = !!isOpen(pPager->jfd);
+
+  assert( pPager->useJournal );
+  assert( isOpen(pPager->fd) );
+  assert( pPager->eState==PAGER_OPEN );
+
+  assert( jrnlOpen==0 || ( sqlite3OsDeviceCharacteristics(pPager->jfd) &
+    SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN
+  ));
+
+  *pExists = 0;
+  if( !jrnlOpen ){
+    rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
+  }
+  if( rc==SQLITE_OK && exists ){
+    int locked = 0;             /* True if some process holds a RESERVED lock */
+
+    /* Race condition here:  Another process might have been holding the
+    ** the RESERVED lock and have a journal open at the sqlite3OsAccess() 
+    ** call above, but then delete the journal and drop the lock before
+    ** we get to the following sqlite3OsCheckReservedLock() call.  If that
+    ** is the case, this routine might think there is a hot journal when
+    ** in fact there is none.  This results in a false-positive which will
+    ** be dealt with by the playback routine.  Ticket #3883.
+    */
+    rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
+    if( rc==SQLITE_OK && !locked ){
+      Pgno nPage;                 /* Number of pages in database file */
+
+      rc = pagerPagecount(pPager, &nPage);
+      if( rc==SQLITE_OK ){
+        /* If the database is zero pages in size, that means that either (1) the
+        ** journal is a remnant from a prior database with the same name where
+        ** the database file but not the journal was deleted, or (2) the initial
+        ** transaction that populates a new database is being rolled back.
+        ** In either case, the journal file can be deleted.  However, take care
+        ** not to delete the journal file if it is already open due to
+        ** journal_mode=PERSIST.
+        */
+        if( nPage==0 && !jrnlOpen ){
+          sqlite3BeginBenignMalloc();
+          if( pagerLockDb(pPager, RESERVED_LOCK)==SQLITE_OK ){
+            sqlite3OsDelete(pVfs, pPager->zJournal, 0);
+            if( !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK);
+          }
+          sqlite3EndBenignMalloc();
+        }else{
+          /* The journal file exists and no other connection has a reserved
+          ** or greater lock on the database file. Now check that there is
+          ** at least one non-zero bytes at the start of the journal file.
+          ** If there is, then we consider this journal to be hot. If not, 
+          ** it can be ignored.
+          */
+          if( !jrnlOpen ){
+            int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL;
+            rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f);
+          }
+          if( rc==SQLITE_OK ){
+            u8 first = 0;
+            rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0);
+            if( rc==SQLITE_IOERR_SHORT_READ ){
+              rc = SQLITE_OK;
+            }
+            if( !jrnlOpen ){
+              sqlite3OsClose(pPager->jfd);
+            }
+            *pExists = (first!=0);
+          }else if( rc==SQLITE_CANTOPEN ){
+            /* If we cannot open the rollback journal file in order to see if
+            ** it has a zero header, that might be due to an I/O error, or
+            ** it might be due to the race condition described above and in
+            ** ticket #3883.  Either way, assume that the journal is hot.
+            ** This might be a false positive.  But if it is, then the
+            ** automatic journal playback and recovery mechanism will deal
+            ** with it under an EXCLUSIVE lock where we do not need to
+            ** worry so much with race conditions.
+            */
+            *pExists = 1;
+            rc = SQLITE_OK;
+          }
+        }
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
+** This function is called to obtain a shared lock on the database file.
+** It is illegal to call sqlite3PagerAcquire() until after this function
+** has been successfully called. If a shared-lock is already held when
+** this function is called, it is a no-op.
+**
+** The following operations are also performed by this function.
+**
+**   1) If the pager is currently in PAGER_OPEN state (no lock held
+**      on the database file), then an attempt is made to obtain a
+**      SHARED lock on the database file. Immediately after obtaining
+**      the SHARED lock, the file-system is checked for a hot-journal,
+**      which is played back if present. Following any hot-journal 
+**      rollback, the contents of the cache are validated by checking
+**      the 'change-counter' field of the database file header and
+**      discarded if they are found to be invalid.
+**
+**   2) If the pager is running in exclusive-mode, and there are currently
+**      no outstanding references to any pages, and is in the error state,
+**      then an attempt is made to clear the error state by discarding
+**      the contents of the page cache and rolling back any open journal
+**      file.
+**
+** If everything is successful, SQLITE_OK is returned. If an IO error 
+** occurs while locking the database, checking for a hot-journal file or 
+** rolling back a journal file, the IO error code is returned.
+*/
+SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){
+  int rc = SQLITE_OK;                /* Return code */
+
+  /* This routine is only called from b-tree and only when there are no
+  ** outstanding pages. This implies that the pager state should either
+  ** be OPEN or READER. READER is only possible if the pager is or was in 
+  ** exclusive access mode.
+  */
+  assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
+  assert( assert_pager_state(pPager) );
+  assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
+  if( NEVER(MEMDB && pPager->errCode) ){ return pPager->errCode; }
+
+  if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){
+    int bHotJournal = 1;          /* True if there exists a hot journal-file */
+
+    assert( !MEMDB );
+
+    rc = pager_wait_on_lock(pPager, SHARED_LOCK);
+    if( rc!=SQLITE_OK ){
+      assert( pPager->eLock==NO_LOCK || pPager->eLock==UNKNOWN_LOCK );
+      goto failed;
+    }
+
+    /* If a journal file exists, and there is no RESERVED lock on the
+    ** database file, then it either needs to be played back or deleted.
+    */
+    if( pPager->eLock<=SHARED_LOCK ){
+      rc = hasHotJournal(pPager, &bHotJournal);
+    }
+    if( rc!=SQLITE_OK ){
+      goto failed;
+    }
+    if( bHotJournal ){
+      if( pPager->readOnly ){
+        rc = SQLITE_READONLY_ROLLBACK;
+        goto failed;
+      }
+
+      /* Get an EXCLUSIVE lock on the database file. At this point it is
+      ** important that a RESERVED lock is not obtained on the way to the
+      ** EXCLUSIVE lock. If it were, another process might open the
+      ** database file, detect the RESERVED lock, and conclude that the
+      ** database is safe to read while this process is still rolling the 
+      ** hot-journal back.
+      ** 
+      ** Because the intermediate RESERVED lock is not requested, any
+      ** other process attempting to access the database file will get to 
+      ** this point in the code and fail to obtain its own EXCLUSIVE lock 
+      ** on the database file.
+      **
+      ** Unless the pager is in locking_mode=exclusive mode, the lock is
+      ** downgraded to SHARED_LOCK before this function returns.
+      */
+      rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
+      if( rc!=SQLITE_OK ){
+        goto failed;
+      }
+ 
+      /* If it is not already open and the file exists on disk, open the 
+      ** journal for read/write access. Write access is required because 
+      ** in exclusive-access mode the file descriptor will be kept open 
+      ** and possibly used for a transaction later on. Also, write-access 
+      ** is usually required to finalize the journal in journal_mode=persist 
+      ** mode (and also for journal_mode=truncate on some systems).
+      **
+      ** If the journal does not exist, it usually means that some 
+      ** other connection managed to get in and roll it back before 
+      ** this connection obtained the exclusive lock above. Or, it 
+      ** may mean that the pager was in the error-state when this
+      ** function was called and the journal file does not exist.
+      */
+      if( !isOpen(pPager->jfd) ){
+        sqlite3_vfs * const pVfs = pPager->pVfs;
+        int bExists;              /* True if journal file exists */
+        rc = sqlite3OsAccess(
+            pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &bExists);
+        if( rc==SQLITE_OK && bExists ){
+          int fout = 0;
+          int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
+          assert( !pPager->tempFile );
+          rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
+          assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
+          if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){
+            rc = SQLITE_CANTOPEN_BKPT;
+            sqlite3OsClose(pPager->jfd);
+          }
+        }
+      }
+ 
+      /* Playback and delete the journal.  Drop the database write
+      ** lock and reacquire the read lock. Purge the cache before
+      ** playing back the hot-journal so that we don't end up with
+      ** an inconsistent cache.  Sync the hot journal before playing
+      ** it back since the process that crashed and left the hot journal
+      ** probably did not sync it and we are required to always sync
+      ** the journal before playing it back.
+      */
+      if( isOpen(pPager->jfd) ){
+        assert( rc==SQLITE_OK );
+        rc = pagerSyncHotJournal(pPager);
+        if( rc==SQLITE_OK ){
+          rc = pager_playback(pPager, 1);
+          pPager->eState = PAGER_OPEN;
+        }
+      }else if( !pPager->exclusiveMode ){
+        pagerUnlockDb(pPager, SHARED_LOCK);
+      }
+
+      if( rc!=SQLITE_OK ){
+        /* This branch is taken if an error occurs while trying to open
+        ** or roll back a hot-journal while holding an EXCLUSIVE lock. The
+        ** pager_unlock() routine will be called before returning to unlock
+        ** the file. If the unlock attempt fails, then Pager.eLock must be
+        ** set to UNKNOWN_LOCK (see the comment above the #define for 
+        ** UNKNOWN_LOCK above for an explanation). 
+        **
+        ** In order to get pager_unlock() to do this, set Pager.eState to
+        ** PAGER_ERROR now. This is not actually counted as a transition
+        ** to ERROR state in the state diagram at the top of this file,
+        ** since we know that the same call to pager_unlock() will very
+        ** shortly transition the pager object to the OPEN state. Calling
+        ** assert_pager_state() would fail now, as it should not be possible
+        ** to be in ERROR state when there are zero outstanding page 
+        ** references.
+        */
+        pager_error(pPager, rc);
+        goto failed;
+      }
+
+      assert( pPager->eState==PAGER_OPEN );
+      assert( (pPager->eLock==SHARED_LOCK)
+           || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK)
+      );
+    }
+
+    if( !pPager->tempFile && pPager->hasBeenUsed ){
+      /* The shared-lock has just been acquired then check to
+      ** see if the database has been modified.  If the database has changed,
+      ** flush the cache.  The pPager->hasBeenUsed flag prevents this from
+      ** occurring on the very first access to a file, in order to save a
+      ** single unnecessary sqlite3OsRead() call at the start-up.
+      **
+      ** Database changes is detected by looking at 15 bytes beginning
+      ** at offset 24 into the file.  The first 4 of these 16 bytes are
+      ** a 32-bit counter that is incremented with each change.  The
+      ** other bytes change randomly with each file change when
+      ** a codec is in use.
+      ** 
+      ** There is a vanishingly small chance that a change will not be 
+      ** detected.  The chance of an undetected change is so small that
+      ** it can be neglected.
+      */
+      Pgno nPage = 0;
+      char dbFileVers[sizeof(pPager->dbFileVers)];
+
+      rc = pagerPagecount(pPager, &nPage);
+      if( rc ) goto failed;
+
+      if( nPage>0 ){
+        IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
+        rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
+        if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
+          goto failed;
+        }
+      }else{
+        memset(dbFileVers, 0, sizeof(dbFileVers));
+      }
+
+      if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
+        pager_reset(pPager);
+
+        /* Unmap the database file. It is possible that external processes
+        ** may have truncated the database file and then extended it back
+        ** to its original size while this process was not holding a lock.
+        ** In this case there may exist a Pager.pMap mapping that appears
+        ** to be the right size but is not actually valid. Avoid this
+        ** possibility by unmapping the db here. */
+        if( USEFETCH(pPager) ){
+          sqlite3OsUnfetch(pPager->fd, 0, 0);
+        }
+      }
+    }
+
+    /* If there is a WAL file in the file-system, open this database in WAL
+    ** mode. Otherwise, the following function call is a no-op.
+    */
+    rc = pagerOpenWalIfPresent(pPager);
+#ifndef SQLITE_OMIT_WAL
+    assert( pPager->pWal==0 || rc==SQLITE_OK );
+#endif
+  }
+
+  if( pagerUseWal(pPager) ){
+    assert( rc==SQLITE_OK );
+    rc = pagerBeginReadTransaction(pPager);
+  }
+
+  if( pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){
+    rc = pagerPagecount(pPager, &pPager->dbSize);
+  }
+
+ failed:
+  if( rc!=SQLITE_OK ){
+    assert( !MEMDB );
+    pager_unlock(pPager);
+    assert( pPager->eState==PAGER_OPEN );
+  }else{
+    pPager->eState = PAGER_READER;
+  }
+  return rc;
+}
+
+/*
+** If the reference count has reached zero, rollback any active
+** transaction and unlock the pager.
+**
+** Except, in locking_mode=EXCLUSIVE when there is nothing to in
+** the rollback journal, the unlock is not performed and there is
+** nothing to rollback, so this routine is a no-op.
+*/ 
+static void pagerUnlockIfUnused(Pager *pPager){
+  if( pPager->nMmapOut==0 && (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
+    pagerUnlockAndRollback(pPager);
+  }
+}
+
+/*
+** Acquire a reference to page number pgno in pager pPager (a page
+** reference has type DbPage*). If the requested reference is 
+** successfully obtained, it is copied to *ppPage and SQLITE_OK returned.
+**
+** If the requested page is already in the cache, it is returned. 
+** Otherwise, a new page object is allocated and populated with data
+** read from the database file. In some cases, the pcache module may
+** choose not to allocate a new page object and may reuse an existing
+** object with no outstanding references.
+**
+** The extra data appended to a page is always initialized to zeros the 
+** first time a page is loaded into memory. If the page requested is 
+** already in the cache when this function is called, then the extra
+** data is left as it was when the page object was last used.
+**
+** If the database image is smaller than the requested page or if a 
+** non-zero value is passed as the noContent parameter and the 
+** requested page is not already stored in the cache, then no 
+** actual disk read occurs. In this case the memory image of the 
+** page is initialized to all zeros. 
+**
+** If noContent is true, it means that we do not care about the contents
+** of the page. This occurs in two scenarios:
+**
+**   a) When reading a free-list leaf page from the database, and
+**
+**   b) When a savepoint is being rolled back and we need to load
+**      a new page into the cache to be filled with the data read
+**      from the savepoint journal.
+**
+** If noContent is true, then the data returned is zeroed instead of
+** being read from the database. Additionally, the bits corresponding
+** to pgno in Pager.pInJournal (bitvec of pages already written to the
+** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open
+** savepoints are set. This means if the page is made writable at any
+** point in the future, using a call to sqlite3PagerWrite(), its contents
+** will not be journaled. This saves IO.
+**
+** The acquisition might fail for several reasons.  In all cases,
+** an appropriate error code is returned and *ppPage is set to NULL.
+**
+** See also sqlite3PagerLookup().  Both this routine and Lookup() attempt
+** to find a page in the in-memory cache first.  If the page is not already
+** in memory, this routine goes to disk to read it in whereas Lookup()
+** just returns 0.  This routine acquires a read-lock the first time it
+** has to go to disk, and could also playback an old journal if necessary.
+** Since Lookup() never goes to disk, it never has to deal with locks
+** or journal files.
+*/
+SQLITE_PRIVATE int sqlite3PagerAcquire(
+  Pager *pPager,      /* The pager open on the database file */
+  Pgno pgno,          /* Page number to fetch */
+  DbPage **ppPage,    /* Write a pointer to the page here */
+  int flags           /* PAGER_GET_XXX flags */
+){
+  int rc = SQLITE_OK;
+  PgHdr *pPg = 0;
+  u32 iFrame = 0;                 /* Frame to read from WAL file */
+  const int noContent = (flags & PAGER_GET_NOCONTENT);
+
+  /* It is acceptable to use a read-only (mmap) page for any page except
+  ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
+  ** flag was specified by the caller. And so long as the db is not a 
+  ** temporary or in-memory database.  */
+  const int bMmapOk = (pgno!=1 && USEFETCH(pPager)
+   && (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY))
+#ifdef SQLITE_HAS_CODEC
+   && pPager->xCodec==0
+#endif
+  );
+
+  assert( pPager->eState>=PAGER_READER );
+  assert( assert_pager_state(pPager) );
+  assert( noContent==0 || bMmapOk==0 );
+
+  if( pgno==0 ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+  pPager->hasBeenUsed = 1;
+
+  /* If the pager is in the error state, return an error immediately. 
+  ** Otherwise, request the page from the PCache layer. */
+  if( pPager->errCode!=SQLITE_OK ){
+    rc = pPager->errCode;
+  }else{
+    if( bMmapOk && pagerUseWal(pPager) ){
+      rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
+      if( rc!=SQLITE_OK ) goto pager_acquire_err;
+    }
+
+    if( bMmapOk && iFrame==0 ){
+      void *pData = 0;
+
+      rc = sqlite3OsFetch(pPager->fd, 
+          (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData
+      );
+
+      if( rc==SQLITE_OK && pData ){
+        if( pPager->eState>PAGER_READER ){
+          pPg = sqlite3PagerLookup(pPager, pgno);
+        }
+        if( pPg==0 ){
+          rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg);
+        }else{
+          sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData);
+        }
+        if( pPg ){
+          assert( rc==SQLITE_OK );
+          *ppPage = pPg;
+          return SQLITE_OK;
+        }
+      }
+      if( rc!=SQLITE_OK ){
+        goto pager_acquire_err;
+      }
+    }
+
+    {
+      sqlite3_pcache_page *pBase;
+      pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3);
+      if( pBase==0 ){
+        rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase);
+        if( rc!=SQLITE_OK ) goto pager_acquire_err;
+      }
+      pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase);
+      if( pPg==0 ) rc = SQLITE_NOMEM;
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    /* Either the call to sqlite3PcacheFetch() returned an error or the
+    ** pager was already in the error-state when this function was called.
+    ** Set pPg to 0 and jump to the exception handler.  */
+    pPg = 0;
+    goto pager_acquire_err;
+  }
+  assert( (*ppPage)->pgno==pgno );
+  assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 );
+
+  if( (*ppPage)->pPager && !noContent ){
+    /* In this case the pcache already contains an initialized copy of
+    ** the page. Return without further ado.  */
+    assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) );
+    pPager->aStat[PAGER_STAT_HIT]++;
+    return SQLITE_OK;
+
+  }else{
+    /* The pager cache has created a new page. Its content needs to 
+    ** be initialized.  */
+
+    pPg = *ppPage;
+    pPg->pPager = pPager;
+
+    /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
+    ** number greater than this, or the unused locking-page, is requested. */
+    if( pgno>PAGER_MAX_PGNO || pgno==PAGER_MJ_PGNO(pPager) ){
+      rc = SQLITE_CORRUPT_BKPT;
+      goto pager_acquire_err;
+    }
+
+    if( MEMDB || pPager->dbSize<pgno || noContent || !isOpen(pPager->fd) ){
+      if( pgno>pPager->mxPgno ){
+        rc = SQLITE_FULL;
+        goto pager_acquire_err;
+      }
+      if( noContent ){
+        /* Failure to set the bits in the InJournal bit-vectors is benign.
+        ** It merely means that we might do some extra work to journal a 
+        ** page that does not need to be journaled.  Nevertheless, be sure 
+        ** to test the case where a malloc error occurs while trying to set 
+        ** a bit in a bit vector.
+        */
+        sqlite3BeginBenignMalloc();
+        if( pgno<=pPager->dbOrigSize ){
+          TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno);
+          testcase( rc==SQLITE_NOMEM );
+        }
+        TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno);
+        testcase( rc==SQLITE_NOMEM );
+        sqlite3EndBenignMalloc();
+      }
+      memset(pPg->pData, 0, pPager->pageSize);
+      IOTRACE(("ZERO %p %d\n", pPager, pgno));
+    }else{
+      if( pagerUseWal(pPager) && bMmapOk==0 ){
+        rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
+        if( rc!=SQLITE_OK ) goto pager_acquire_err;
+      }
+      assert( pPg->pPager==pPager );
+      pPager->aStat[PAGER_STAT_MISS]++;
+      rc = readDbPage(pPg, iFrame);
+      if( rc!=SQLITE_OK ){
+        goto pager_acquire_err;
+      }
+    }
+    pager_set_pagehash(pPg);
+  }
+
+  return SQLITE_OK;
+
+pager_acquire_err:
+  assert( rc!=SQLITE_OK );
+  if( pPg ){
+    sqlite3PcacheDrop(pPg);
+  }
+  pagerUnlockIfUnused(pPager);
+
+  *ppPage = 0;
+  return rc;
+}
+
+/*
+** Acquire a page if it is already in the in-memory cache.  Do
+** not read the page from disk.  Return a pointer to the page,
+** or 0 if the page is not in cache. 
+**
+** See also sqlite3PagerGet().  The difference between this routine
+** and sqlite3PagerGet() is that _get() will go to the disk and read
+** in the page if the page is not already in cache.  This routine
+** returns NULL if the page is not in cache or if a disk I/O error 
+** has ever happened.
+*/
+SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
+  sqlite3_pcache_page *pPage;
+  assert( pPager!=0 );
+  assert( pgno!=0 );
+  assert( pPager->pPCache!=0 );
+  pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0);
+  assert( pPage==0 || pPager->hasBeenUsed );
+  return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage);
+}
+
+/*
+** Release a page reference.
+**
+** If the number of references to the page drop to zero, then the
+** page is added to the LRU list.  When all references to all pages
+** are released, a rollback occurs and the lock on the database is
+** removed.
+*/
+SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage *pPg){
+  Pager *pPager;
+  assert( pPg!=0 );
+  pPager = pPg->pPager;
+  if( pPg->flags & PGHDR_MMAP ){
+    pagerReleaseMapPage(pPg);
+  }else{
+    sqlite3PcacheRelease(pPg);
+  }
+  pagerUnlockIfUnused(pPager);
+}
+SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){
+  if( pPg ) sqlite3PagerUnrefNotNull(pPg);
+}
+
+/*
+** This function is called at the start of every write transaction.
+** There must already be a RESERVED or EXCLUSIVE lock on the database 
+** file when this routine is called.
+**
+** Open the journal file for pager pPager and write a journal header
+** to the start of it. If there are active savepoints, open the sub-journal
+** as well. This function is only used when the journal file is being 
+** opened to write a rollback log for a transaction. It is not used 
+** when opening a hot journal file to roll it back.
+**
+** If the journal file is already open (as it may be in exclusive mode),
+** then this function just writes a journal header to the start of the
+** already open file. 
+**
+** Whether or not the journal file is opened by this function, the
+** Pager.pInJournal bitvec structure is allocated.
+**
+** Return SQLITE_OK if everything is successful. Otherwise, return 
+** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or 
+** an IO error code if opening or writing the journal file fails.
+*/
+static int pager_open_journal(Pager *pPager){
+  int rc = SQLITE_OK;                        /* Return code */
+  sqlite3_vfs * const pVfs = pPager->pVfs;   /* Local cache of vfs pointer */
+
+  assert( pPager->eState==PAGER_WRITER_LOCKED );
+  assert( assert_pager_state(pPager) );
+  assert( pPager->pInJournal==0 );
+  
+  /* If already in the error state, this function is a no-op.  But on
+  ** the other hand, this routine is never called if we are already in
+  ** an error state. */
+  if( NEVER(pPager->errCode) ) return pPager->errCode;
+
+  if( !pagerUseWal(pPager) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
+    pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
+    if( pPager->pInJournal==0 ){
+      return SQLITE_NOMEM;
+    }
+  
+    /* Open the journal file if it is not already open. */
+    if( !isOpen(pPager->jfd) ){
+      if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){
+        sqlite3MemJournalOpen(pPager->jfd);
+      }else{
+        const int flags =                   /* VFS flags to open journal file */
+          SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|
+          (pPager->tempFile ? 
+            (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL):
+            (SQLITE_OPEN_MAIN_JOURNAL)
+          );
+
+        /* Verify that the database still has the same name as it did when
+        ** it was originally opened. */
+        rc = databaseIsUnmoved(pPager);
+        if( rc==SQLITE_OK ){
+#ifdef SQLITE_ENABLE_ATOMIC_WRITE
+          rc = sqlite3JournalOpen(
+              pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
+          );
+#else
+          rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
+#endif
+        }
+      }
+      assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
+    }
+  
+  
+    /* Write the first journal header to the journal file and open 
+    ** the sub-journal if necessary.
+    */
+    if( rc==SQLITE_OK ){
+      /* TODO: Check if all of these are really required. */
+      pPager->nRec = 0;
+      pPager->journalOff = 0;
+      pPager->setMaster = 0;
+      pPager->journalHdr = 0;
+      rc = writeJournalHdr(pPager);
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    sqlite3BitvecDestroy(pPager->pInJournal);
+    pPager->pInJournal = 0;
+  }else{
+    assert( pPager->eState==PAGER_WRITER_LOCKED );
+    pPager->eState = PAGER_WRITER_CACHEMOD;
+  }
+
+  return rc;
+}
+
+/*
+** Begin a write-transaction on the specified pager object. If a 
+** write-transaction has already been opened, this function is a no-op.
+**
+** If the exFlag argument is false, then acquire at least a RESERVED
+** lock on the database file. If exFlag is true, then acquire at least
+** an EXCLUSIVE lock. If such a lock is already held, no locking 
+** functions need be called.
+**
+** If the subjInMemory argument is non-zero, then any sub-journal opened
+** within this transaction will be opened as an in-memory file. This
+** has no effect if the sub-journal is already opened (as it may be when
+** running in exclusive mode) or if the transaction does not require a
+** sub-journal. If the subjInMemory argument is zero, then any required
+** sub-journal is implemented in-memory if pPager is an in-memory database, 
+** or using a temporary file otherwise.
+*/
+SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
+  int rc = SQLITE_OK;
+
+  if( pPager->errCode ) return pPager->errCode;
+  assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR );
+  pPager->subjInMemory = (u8)subjInMemory;
+
+  if( ALWAYS(pPager->eState==PAGER_READER) ){
+    assert( pPager->pInJournal==0 );
+
+    if( pagerUseWal(pPager) ){
+      /* If the pager is configured to use locking_mode=exclusive, and an
+      ** exclusive lock on the database is not already held, obtain it now.
+      */
+      if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){
+        rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
+        if( rc!=SQLITE_OK ){
+          return rc;
+        }
+        sqlite3WalExclusiveMode(pPager->pWal, 1);
+      }
+
+      /* Grab the write lock on the log file. If successful, upgrade to
+      ** PAGER_RESERVED state. Otherwise, return an error code to the caller.
+      ** The busy-handler is not invoked if another connection already
+      ** holds the write-lock. If possible, the upper layer will call it.
+      */
+      rc = sqlite3WalBeginWriteTransaction(pPager->pWal);
+    }else{
+      /* Obtain a RESERVED lock on the database file. If the exFlag parameter
+      ** is true, then immediately upgrade this to an EXCLUSIVE lock. The
+      ** busy-handler callback can be used when upgrading to the EXCLUSIVE
+      ** lock, but not when obtaining the RESERVED lock.
+      */
+      rc = pagerLockDb(pPager, RESERVED_LOCK);
+      if( rc==SQLITE_OK && exFlag ){
+        rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
+      }
+    }
+
+    if( rc==SQLITE_OK ){
+      /* Change to WRITER_LOCKED state.
+      **
+      ** WAL mode sets Pager.eState to PAGER_WRITER_LOCKED or CACHEMOD
+      ** when it has an open transaction, but never to DBMOD or FINISHED.
+      ** This is because in those states the code to roll back savepoint 
+      ** transactions may copy data from the sub-journal into the database 
+      ** file as well as into the page cache. Which would be incorrect in 
+      ** WAL mode.
+      */
+      pPager->eState = PAGER_WRITER_LOCKED;
+      pPager->dbHintSize = pPager->dbSize;
+      pPager->dbFileSize = pPager->dbSize;
+      pPager->dbOrigSize = pPager->dbSize;
+      pPager->journalOff = 0;
+    }
+
+    assert( rc==SQLITE_OK || pPager->eState==PAGER_READER );
+    assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED );
+    assert( assert_pager_state(pPager) );
+  }
+
+  PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager)));
+  return rc;
+}
+
+/*
+** Mark a single data page as writeable. The page is written into the 
+** main journal or sub-journal as required. If the page is written into
+** one of the journals, the corresponding bit is set in the 
+** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs
+** of any open savepoints as appropriate.
+*/
+static int pager_write(PgHdr *pPg){
+  Pager *pPager = pPg->pPager;
+  int rc = SQLITE_OK;
+  int inJournal;
+
+  /* This routine is not called unless a write-transaction has already 
+  ** been started. The journal file may or may not be open at this point.
+  ** It is never called in the ERROR state.
+  */
+  assert( pPager->eState==PAGER_WRITER_LOCKED
+       || pPager->eState==PAGER_WRITER_CACHEMOD
+       || pPager->eState==PAGER_WRITER_DBMOD
+  );
+  assert( assert_pager_state(pPager) );
+  assert( pPager->errCode==0 );
+  assert( pPager->readOnly==0 );
+
+  CHECK_PAGE(pPg);
+
+  /* The journal file needs to be opened. Higher level routines have already
+  ** obtained the necessary locks to begin the write-transaction, but the
+  ** rollback journal might not yet be open. Open it now if this is the case.
+  **
+  ** This is done before calling sqlite3PcacheMakeDirty() on the page. 
+  ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then
+  ** an error might occur and the pager would end up in WRITER_LOCKED state
+  ** with pages marked as dirty in the cache.
+  */
+  if( pPager->eState==PAGER_WRITER_LOCKED ){
+    rc = pager_open_journal(pPager);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+  assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
+  assert( assert_pager_state(pPager) );
+
+  /* Mark the page as dirty.  If the page has already been written
+  ** to the journal then we can return right away.
+  */
+  sqlite3PcacheMakeDirty(pPg);
+  inJournal = pageInJournal(pPager, pPg);
+  if( inJournal && (pPager->nSavepoint==0 || !subjRequiresPage(pPg)) ){
+    assert( !pagerUseWal(pPager) );
+  }else{
+  
+    /* The transaction journal now exists and we have a RESERVED or an
+    ** EXCLUSIVE lock on the main database file.  Write the current page to
+    ** the transaction journal if it is not there already.
+    */
+    if( !inJournal && !pagerUseWal(pPager) ){
+      assert( pagerUseWal(pPager)==0 );
+      if( pPg->pgno<=pPager->dbOrigSize && isOpen(pPager->jfd) ){
+        u32 cksum;
+        char *pData2;
+        i64 iOff = pPager->journalOff;
+
+        /* We should never write to the journal file the page that
+        ** contains the database locks.  The following assert verifies
+        ** that we do not. */
+        assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
+
+        assert( pPager->journalHdr<=pPager->journalOff );
+        CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
+        cksum = pager_cksum(pPager, (u8*)pData2);
+
+        /* Even if an IO or diskfull error occurs while journalling the
+        ** page in the block above, set the need-sync flag for the page.
+        ** Otherwise, when the transaction is rolled back, the logic in
+        ** playback_one_page() will think that the page needs to be restored
+        ** in the database file. And if an IO error occurs while doing so,
+        ** then corruption may follow.
+        */
+        pPg->flags |= PGHDR_NEED_SYNC;
+
+        rc = write32bits(pPager->jfd, iOff, pPg->pgno);
+        if( rc!=SQLITE_OK ) return rc;
+        rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4);
+        if( rc!=SQLITE_OK ) return rc;
+        rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum);
+        if( rc!=SQLITE_OK ) return rc;
+
+        IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
+                 pPager->journalOff, pPager->pageSize));
+        PAGER_INCR(sqlite3_pager_writej_count);
+        PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n",
+             PAGERID(pPager), pPg->pgno, 
+             ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg)));
+
+        pPager->journalOff += 8 + pPager->pageSize;
+        pPager->nRec++;
+        assert( pPager->pInJournal!=0 );
+        rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
+        testcase( rc==SQLITE_NOMEM );
+        assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
+        rc |= addToSavepointBitvecs(pPager, pPg->pgno);
+        if( rc!=SQLITE_OK ){
+          assert( rc==SQLITE_NOMEM );
+          return rc;
+        }
+      }else{
+        if( pPager->eState!=PAGER_WRITER_DBMOD ){
+          pPg->flags |= PGHDR_NEED_SYNC;
+        }
+        PAGERTRACE(("APPEND %d page %d needSync=%d\n",
+                PAGERID(pPager), pPg->pgno,
+               ((pPg->flags&PGHDR_NEED_SYNC)?1:0)));
+      }
+    }
+  
+    /* If the statement journal is open and the page is not in it,
+    ** then write the current page to the statement journal.  Note that
+    ** the statement journal format differs from the standard journal format
+    ** in that it omits the checksums and the header.
+    */
+    if( pPager->nSavepoint>0 && subjRequiresPage(pPg) ){
+      rc = subjournalPage(pPg);
+    }
+  }
+
+  /* Update the database size and return.
+  */
+  if( pPager->dbSize<pPg->pgno ){
+    pPager->dbSize = pPg->pgno;
+  }
+  return rc;
+}
+
+/*
+** This is a variant of sqlite3PagerWrite() that runs when the sector size
+** is larger than the page size.  SQLite makes the (reasonable) assumption that
+** all bytes of a sector are written together by hardware.  Hence, all bytes of
+** a sector need to be journalled in case of a power loss in the middle of
+** a write.
+**
+** Usually, the sector size is less than or equal to the page size, in which
+** case pages can be individually written.  This routine only runs in the exceptional
+** case where the page size is smaller than the sector size.
+*/
+static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){
+  int rc = SQLITE_OK;            /* Return code */
+  Pgno nPageCount;               /* Total number of pages in database file */
+  Pgno pg1;                      /* First page of the sector pPg is located on. */
+  int nPage = 0;                 /* Number of pages starting at pg1 to journal */
+  int ii;                        /* Loop counter */
+  int needSync = 0;              /* True if any page has PGHDR_NEED_SYNC */
+  Pager *pPager = pPg->pPager;   /* The pager that owns pPg */
+  Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
+
+  /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow
+  ** a journal header to be written between the pages journaled by
+  ** this function.
+  */
+  assert( !MEMDB );
+  assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)==0 );
+  pPager->doNotSpill |= SPILLFLAG_NOSYNC;
+
+  /* This trick assumes that both the page-size and sector-size are
+  ** an integer power of 2. It sets variable pg1 to the identifier
+  ** of the first page of the sector pPg is located on.
+  */
+  pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
+
+  nPageCount = pPager->dbSize;
+  if( pPg->pgno>nPageCount ){
+    nPage = (pPg->pgno - pg1)+1;
+  }else if( (pg1+nPagePerSector-1)>nPageCount ){
+    nPage = nPageCount+1-pg1;
+  }else{
+    nPage = nPagePerSector;
+  }
+  assert(nPage>0);
+  assert(pg1<=pPg->pgno);
+  assert((pg1+nPage)>pPg->pgno);
+
+  for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
+    Pgno pg = pg1+ii;
+    PgHdr *pPage;
+    if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
+      if( pg!=PAGER_MJ_PGNO(pPager) ){
+        rc = sqlite3PagerGet(pPager, pg, &pPage);
+        if( rc==SQLITE_OK ){
+          rc = pager_write(pPage);
+          if( pPage->flags&PGHDR_NEED_SYNC ){
+            needSync = 1;
+          }
+          sqlite3PagerUnrefNotNull(pPage);
+        }
+      }
+    }else if( (pPage = sqlite3PagerLookup(pPager, pg))!=0 ){
+      if( pPage->flags&PGHDR_NEED_SYNC ){
+        needSync = 1;
+      }
+      sqlite3PagerUnrefNotNull(pPage);
+    }
+  }
+
+  /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages 
+  ** starting at pg1, then it needs to be set for all of them. Because
+  ** writing to any of these nPage pages may damage the others, the
+  ** journal file must contain sync()ed copies of all of them
+  ** before any of them can be written out to the database file.
+  */
+  if( rc==SQLITE_OK && needSync ){
+    assert( !MEMDB );
+    for(ii=0; ii<nPage; ii++){
+      PgHdr *pPage = sqlite3PagerLookup(pPager, pg1+ii);
+      if( pPage ){
+        pPage->flags |= PGHDR_NEED_SYNC;
+        sqlite3PagerUnrefNotNull(pPage);
+      }
+    }
+  }
+
+  assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)!=0 );
+  pPager->doNotSpill &= ~SPILLFLAG_NOSYNC;
+  return rc;
+}
+
+/*
+** Mark a data page as writeable. This routine must be called before 
+** making changes to a page. The caller must check the return value 
+** of this function and be careful not to change any page data unless 
+** this routine returns SQLITE_OK.
+**
+** The difference between this function and pager_write() is that this
+** function also deals with the special case where 2 or more pages
+** fit on a single disk sector. In this case all co-resident pages
+** must have been written to the journal file before returning.
+**
+** If an error occurs, SQLITE_NOMEM or an IO error code is returned
+** as appropriate. Otherwise, SQLITE_OK.
+*/
+SQLITE_PRIVATE int sqlite3PagerWrite(PgHdr *pPg){
+  assert( (pPg->flags & PGHDR_MMAP)==0 );
+  assert( pPg->pPager->eState>=PAGER_WRITER_LOCKED );
+  assert( pPg->pPager->eState!=PAGER_ERROR );
+  assert( assert_pager_state(pPg->pPager) );
+  if( pPg->pPager->sectorSize > (u32)pPg->pPager->pageSize ){
+    return pagerWriteLargeSector(pPg);
+  }else{
+    return pager_write(pPg);
+  }
+}
+
+/*
+** Return TRUE if the page given in the argument was previously passed
+** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
+** to change the content of the page.
+*/
+#ifndef NDEBUG
+SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage *pPg){
+  return pPg->flags&PGHDR_DIRTY;
+}
+#endif
+
+/*
+** A call to this routine tells the pager that it is not necessary to
+** write the information on page pPg back to the disk, even though
+** that page might be marked as dirty.  This happens, for example, when
+** the page has been added as a leaf of the freelist and so its
+** content no longer matters.
+**
+** The overlying software layer calls this routine when all of the data
+** on the given page is unused. The pager marks the page as clean so
+** that it does not get written to disk.
+**
+** Tests show that this optimization can quadruple the speed of large 
+** DELETE operations.
+*/
+SQLITE_PRIVATE void sqlite3PagerDontWrite(PgHdr *pPg){
+  Pager *pPager = pPg->pPager;
+  if( (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){
+    PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)));
+    IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
+    pPg->flags |= PGHDR_DONT_WRITE;
+    pager_set_pagehash(pPg);
+  }
+}
+
+/*
+** This routine is called to increment the value of the database file 
+** change-counter, stored as a 4-byte big-endian integer starting at 
+** byte offset 24 of the pager file.  The secondary change counter at
+** 92 is also updated, as is the SQLite version number at offset 96.
+**
+** But this only happens if the pPager->changeCountDone flag is false.
+** To avoid excess churning of page 1, the update only happens once.
+** See also the pager_write_changecounter() routine that does an 
+** unconditional update of the change counters.
+**
+** If the isDirectMode flag is zero, then this is done by calling 
+** sqlite3PagerWrite() on page 1, then modifying the contents of the
+** page data. In this case the file will be updated when the current
+** transaction is committed.
+**
+** The isDirectMode flag may only be non-zero if the library was compiled
+** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case,
+** if isDirect is non-zero, then the database file is updated directly
+** by writing an updated version of page 1 using a call to the 
+** sqlite3OsWrite() function.
+*/
+static int pager_incr_changecounter(Pager *pPager, int isDirectMode){
+  int rc = SQLITE_OK;
+
+  assert( pPager->eState==PAGER_WRITER_CACHEMOD
+       || pPager->eState==PAGER_WRITER_DBMOD
+  );
+  assert( assert_pager_state(pPager) );
+
+  /* Declare and initialize constant integer 'isDirect'. If the
+  ** atomic-write optimization is enabled in this build, then isDirect
+  ** is initialized to the value passed as the isDirectMode parameter
+  ** to this function. Otherwise, it is always set to zero.
+  **
+  ** The idea is that if the atomic-write optimization is not
+  ** enabled at compile time, the compiler can omit the tests of
+  ** 'isDirect' below, as well as the block enclosed in the
+  ** "if( isDirect )" condition.
+  */
+#ifndef SQLITE_ENABLE_ATOMIC_WRITE
+# define DIRECT_MODE 0
+  assert( isDirectMode==0 );
+  UNUSED_PARAMETER(isDirectMode);
+#else
+# define DIRECT_MODE isDirectMode
+#endif
+
+  if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){
+    PgHdr *pPgHdr;                /* Reference to page 1 */
+
+    assert( !pPager->tempFile && isOpen(pPager->fd) );
+
+    /* Open page 1 of the file for writing. */
+    rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
+    assert( pPgHdr==0 || rc==SQLITE_OK );
+
+    /* If page one was fetched successfully, and this function is not
+    ** operating in direct-mode, make page 1 writable.  When not in 
+    ** direct mode, page 1 is always held in cache and hence the PagerGet()
+    ** above is always successful - hence the ALWAYS on rc==SQLITE_OK.
+    */
+    if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){
+      rc = sqlite3PagerWrite(pPgHdr);
+    }
+
+    if( rc==SQLITE_OK ){
+      /* Actually do the update of the change counter */
+      pager_write_changecounter(pPgHdr);
+
+      /* If running in direct mode, write the contents of page 1 to the file. */
+      if( DIRECT_MODE ){
+        const void *zBuf;
+        assert( pPager->dbFileSize>0 );
+        CODEC2(pPager, pPgHdr->pData, 1, 6, rc=SQLITE_NOMEM, zBuf);
+        if( rc==SQLITE_OK ){
+          rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
+          pPager->aStat[PAGER_STAT_WRITE]++;
+        }
+        if( rc==SQLITE_OK ){
+          /* Update the pager's copy of the change-counter. Otherwise, the
+          ** next time a read transaction is opened the cache will be
+          ** flushed (as the change-counter values will not match).  */
+          const void *pCopy = (const void *)&((const char *)zBuf)[24];
+          memcpy(&pPager->dbFileVers, pCopy, sizeof(pPager->dbFileVers));
+          pPager->changeCountDone = 1;
+        }
+      }else{
+        pPager->changeCountDone = 1;
+      }
+    }
+
+    /* Release the page reference. */
+    sqlite3PagerUnref(pPgHdr);
+  }
+  return rc;
+}
+
+/*
+** Sync the database file to disk. This is a no-op for in-memory databases
+** or pages with the Pager.noSync flag set.
+**
+** If successful, or if called on a pager for which it is a no-op, this
+** function returns SQLITE_OK. Otherwise, an IO error code is returned.
+*/
+SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zMaster){
+  int rc = SQLITE_OK;
+
+  if( isOpen(pPager->fd) ){
+    void *pArg = (void*)zMaster;
+    rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SYNC, pArg);
+    if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
+  }
+  if( rc==SQLITE_OK && !pPager->noSync ){
+    assert( !MEMDB );
+    rc = sqlite3OsSync(pPager->fd, pPager->syncFlags);
+  }
+  return rc;
+}
+
+/*
+** This function may only be called while a write-transaction is active in
+** rollback. If the connection is in WAL mode, this call is a no-op. 
+** Otherwise, if the connection does not already have an EXCLUSIVE lock on 
+** the database file, an attempt is made to obtain one.
+**
+** If the EXCLUSIVE lock is already held or the attempt to obtain it is
+** successful, or the connection is in WAL mode, SQLITE_OK is returned.
+** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is 
+** returned.
+*/
+SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager *pPager){
+  int rc = SQLITE_OK;
+  assert( pPager->eState==PAGER_WRITER_CACHEMOD 
+       || pPager->eState==PAGER_WRITER_DBMOD 
+       || pPager->eState==PAGER_WRITER_LOCKED 
+  );
+  assert( assert_pager_state(pPager) );
+  if( 0==pagerUseWal(pPager) ){
+    rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
+  }
+  return rc;
+}
+
+/*
+** Sync the database file for the pager pPager. zMaster points to the name
+** of a master journal file that should be written into the individual
+** journal file. zMaster may be NULL, which is interpreted as no master
+** journal (a single database transaction).
+**
+** This routine ensures that:
+**
+**   * The database file change-counter is updated,
+**   * the journal is synced (unless the atomic-write optimization is used),
+**   * all dirty pages are written to the database file, 
+**   * the database file is truncated (if required), and
+**   * the database file synced. 
+**
+** The only thing that remains to commit the transaction is to finalize 
+** (delete, truncate or zero the first part of) the journal file (or 
+** delete the master journal file if specified).
+**
+** Note that if zMaster==NULL, this does not overwrite a previous value
+** passed to an sqlite3PagerCommitPhaseOne() call.
+**
+** If the final parameter - noSync - is true, then the database file itself
+** is not synced. The caller must call sqlite3PagerSync() directly to
+** sync the database file before calling CommitPhaseTwo() to delete the
+** journal file in this case.
+*/
+SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne(
+  Pager *pPager,                  /* Pager object */
+  const char *zMaster,            /* If not NULL, the master journal name */
+  int noSync                      /* True to omit the xSync on the db file */
+){
+  int rc = SQLITE_OK;             /* Return code */
+
+  assert( pPager->eState==PAGER_WRITER_LOCKED
+       || pPager->eState==PAGER_WRITER_CACHEMOD
+       || pPager->eState==PAGER_WRITER_DBMOD
+       || pPager->eState==PAGER_ERROR
+  );
+  assert( assert_pager_state(pPager) );
+
+  /* If a prior error occurred, report that error again. */
+  if( NEVER(pPager->errCode) ) return pPager->errCode;
+
+  PAGERTRACE(("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n", 
+      pPager->zFilename, zMaster, pPager->dbSize));
+
+  /* If no database changes have been made, return early. */
+  if( pPager->eState<PAGER_WRITER_CACHEMOD ) return SQLITE_OK;
+
+  if( MEMDB ){
+    /* If this is an in-memory db, or no pages have been written to, or this
+    ** function has already been called, it is mostly a no-op.  However, any
+    ** backup in progress needs to be restarted.
+    */
+    sqlite3BackupRestart(pPager->pBackup);
+  }else{
+    if( pagerUseWal(pPager) ){
+      PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
+      PgHdr *pPageOne = 0;
+      if( pList==0 ){
+        /* Must have at least one page for the WAL commit flag.
+        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
+        rc = sqlite3PagerGet(pPager, 1, &pPageOne);
+        pList = pPageOne;
+        pList->pDirty = 0;
+      }
+      assert( rc==SQLITE_OK );
+      if( ALWAYS(pList) ){
+        rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1);
+      }
+      sqlite3PagerUnref(pPageOne);
+      if( rc==SQLITE_OK ){
+        sqlite3PcacheCleanAll(pPager->pPCache);
+      }
+    }else{
+      /* The following block updates the change-counter. Exactly how it
+      ** does this depends on whether or not the atomic-update optimization
+      ** was enabled at compile time, and if this transaction meets the 
+      ** runtime criteria to use the operation: 
+      **
+      **    * The file-system supports the atomic-write property for
+      **      blocks of size page-size, and 
+      **    * This commit is not part of a multi-file transaction, and
+      **    * Exactly one page has been modified and store in the journal file.
+      **
+      ** If the optimization was not enabled at compile time, then the
+      ** pager_incr_changecounter() function is called to update the change
+      ** counter in 'indirect-mode'. If the optimization is compiled in but
+      ** is not applicable to this transaction, call sqlite3JournalCreate()
+      ** to make sure the journal file has actually been created, then call
+      ** pager_incr_changecounter() to update the change-counter in indirect
+      ** mode. 
+      **
+      ** Otherwise, if the optimization is both enabled and applicable,
+      ** then call pager_incr_changecounter() to update the change-counter
+      ** in 'direct' mode. In this case the journal file will never be
+      ** created for this transaction.
+      */
+  #ifdef SQLITE_ENABLE_ATOMIC_WRITE
+      PgHdr *pPg;
+      assert( isOpen(pPager->jfd) 
+           || pPager->journalMode==PAGER_JOURNALMODE_OFF 
+           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
+      );
+      if( !zMaster && isOpen(pPager->jfd) 
+       && pPager->journalOff==jrnlBufferSize(pPager) 
+       && pPager->dbSize>=pPager->dbOrigSize
+       && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
+      ){
+        /* Update the db file change counter via the direct-write method. The 
+        ** following call will modify the in-memory representation of page 1 
+        ** to include the updated change counter and then write page 1 
+        ** directly to the database file. Because of the atomic-write 
+        ** property of the host file-system, this is safe.
+        */
+        rc = pager_incr_changecounter(pPager, 1);
+      }else{
+        rc = sqlite3JournalCreate(pPager->jfd);
+        if( rc==SQLITE_OK ){
+          rc = pager_incr_changecounter(pPager, 0);
+        }
+      }
+  #else
+      rc = pager_incr_changecounter(pPager, 0);
+  #endif
+      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
+  
+      /* Write the master journal name into the journal file. If a master 
+      ** journal file name has already been written to the journal file, 
+      ** or if zMaster is NULL (no master journal), then this call is a no-op.
+      */
+      rc = writeMasterJournal(pPager, zMaster);
+      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
+  
+      /* Sync the journal file and write all dirty pages to the database.
+      ** If the atomic-update optimization is being used, this sync will not 
+      ** create the journal file or perform any real IO.
+      **
+      ** Because the change-counter page was just modified, unless the
+      ** atomic-update optimization is used it is almost certain that the
+      ** journal requires a sync here. However, in locking_mode=exclusive
+      ** on a system under memory pressure it is just possible that this is 
+      ** not the case. In this case it is likely enough that the redundant
+      ** xSync() call will be changed to a no-op by the OS anyhow. 
+      */
+      rc = syncJournal(pPager, 0);
+      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
+  
+      rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));
+      if( rc!=SQLITE_OK ){
+        assert( rc!=SQLITE_IOERR_BLOCKED );
+        goto commit_phase_one_exit;
+      }
+      sqlite3PcacheCleanAll(pPager->pPCache);
+
+      /* If the file on disk is smaller than the database image, use 
+      ** pager_truncate to grow the file here. This can happen if the database
+      ** image was extended as part of the current transaction and then the
+      ** last page in the db image moved to the free-list. In this case the
+      ** last page is never written out to disk, leaving the database file
+      ** undersized. Fix this now if it is the case.  */
+      if( pPager->dbSize>pPager->dbFileSize ){
+        Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager));
+        assert( pPager->eState==PAGER_WRITER_DBMOD );
+        rc = pager_truncate(pPager, nNew);
+        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
+      }
+  
+      /* Finally, sync the database file. */
+      if( !noSync ){
+        rc = sqlite3PagerSync(pPager, zMaster);
+      }
+      IOTRACE(("DBSYNC %p\n", pPager))
+    }
+  }
+
+commit_phase_one_exit:
+  if( rc==SQLITE_OK && !pagerUseWal(pPager) ){
+    pPager->eState = PAGER_WRITER_FINISHED;
+  }
+  return rc;
+}
+
+
+/*
+** When this function is called, the database file has been completely
+** updated to reflect the changes made by the current transaction and
+** synced to disk. The journal file still exists in the file-system 
+** though, and if a failure occurs at this point it will eventually
+** be used as a hot-journal and the current transaction rolled back.
+**
+** This function finalizes the journal file, either by deleting, 
+** truncating or partially zeroing it, so that it cannot be used 
+** for hot-journal rollback. Once this is done the transaction is
+** irrevocably committed.
+**
+** If an error occurs, an IO error code is returned and the pager
+** moves into the error state. Otherwise, SQLITE_OK is returned.
+*/
+SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager *pPager){
+  int rc = SQLITE_OK;                  /* Return code */
+
+  /* This routine should not be called if a prior error has occurred.
+  ** But if (due to a coding error elsewhere in the system) it does get
+  ** called, just return the same error code without doing anything. */
+  if( NEVER(pPager->errCode) ) return pPager->errCode;
+
+  assert( pPager->eState==PAGER_WRITER_LOCKED
+       || pPager->eState==PAGER_WRITER_FINISHED
+       || (pagerUseWal(pPager) && pPager->eState==PAGER_WRITER_CACHEMOD)
+  );
+  assert( assert_pager_state(pPager) );
+
+  /* An optimization. If the database was not actually modified during
+  ** this transaction, the pager is running in exclusive-mode and is
+  ** using persistent journals, then this function is a no-op.
+  **
+  ** The start of the journal file currently contains a single journal 
+  ** header with the nRec field set to 0. If such a journal is used as
+  ** a hot-journal during hot-journal rollback, 0 changes will be made
+  ** to the database file. So there is no need to zero the journal 
+  ** header. Since the pager is in exclusive mode, there is no need
+  ** to drop any locks either.
+  */
+  if( pPager->eState==PAGER_WRITER_LOCKED 
+   && pPager->exclusiveMode 
+   && pPager->journalMode==PAGER_JOURNALMODE_PERSIST
+  ){
+    assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) || !pPager->journalOff );
+    pPager->eState = PAGER_READER;
+    return SQLITE_OK;
+  }
+
+  PAGERTRACE(("COMMIT %d\n", PAGERID(pPager)));
+  pPager->iDataVersion++;
+  rc = pager_end_transaction(pPager, pPager->setMaster, 1);
+  return pager_error(pPager, rc);
+}
+
+/*
+** If a write transaction is open, then all changes made within the 
+** transaction are reverted and the current write-transaction is closed.
+** The pager falls back to PAGER_READER state if successful, or PAGER_ERROR
+** state if an error occurs.
+**
+** If the pager is already in PAGER_ERROR state when this function is called,
+** it returns Pager.errCode immediately. No work is performed in this case.
+**
+** Otherwise, in rollback mode, this function performs two functions:
+**
+**   1) It rolls back the journal file, restoring all database file and 
+**      in-memory cache pages to the state they were in when the transaction
+**      was opened, and
+**
+**   2) It finalizes the journal file, so that it is not used for hot
+**      rollback at any point in the future.
+**
+** Finalization of the journal file (task 2) is only performed if the 
+** rollback is successful.
+**
+** In WAL mode, all cache-entries containing data modified within the
+** current transaction are either expelled from the cache or reverted to
+** their pre-transaction state by re-reading data from the database or
+** WAL files. The WAL transaction is then closed.
+*/
+SQLITE_PRIVATE int sqlite3PagerRollback(Pager *pPager){
+  int rc = SQLITE_OK;                  /* Return code */
+  PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager)));
+
+  /* PagerRollback() is a no-op if called in READER or OPEN state. If
+  ** the pager is already in the ERROR state, the rollback is not 
+  ** attempted here. Instead, the error code is returned to the caller.
+  */
+  assert( assert_pager_state(pPager) );
+  if( pPager->eState==PAGER_ERROR ) return pPager->errCode;
+  if( pPager->eState<=PAGER_READER ) return SQLITE_OK;
+
+  if( pagerUseWal(pPager) ){
+    int rc2;
+    rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1);
+    rc2 = pager_end_transaction(pPager, pPager->setMaster, 0);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){
+    int eState = pPager->eState;
+    rc = pager_end_transaction(pPager, 0, 0);
+    if( !MEMDB && eState>PAGER_WRITER_LOCKED ){
+      /* This can happen using journal_mode=off. Move the pager to the error 
+      ** state to indicate that the contents of the cache may not be trusted.
+      ** Any active readers will get SQLITE_ABORT.
+      */
+      pPager->errCode = SQLITE_ABORT;
+      pPager->eState = PAGER_ERROR;
+      return rc;
+    }
+  }else{
+    rc = pager_playback(pPager, 0);
+  }
+
+  assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK );
+  assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT
+          || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR 
+          || rc==SQLITE_CANTOPEN
+  );
+
+  /* If an error occurs during a ROLLBACK, we can no longer trust the pager
+  ** cache. So call pager_error() on the way out to make any error persistent.
+  */
+  return pager_error(pPager, rc);
+}
+
+/*
+** Return TRUE if the database file is opened read-only.  Return FALSE
+** if the database is (in theory) writable.
+*/
+SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager *pPager){
+  return pPager->readOnly;
+}
+
+/*
+** Return the number of references to the pager.
+*/
+SQLITE_PRIVATE int sqlite3PagerRefcount(Pager *pPager){
+  return sqlite3PcacheRefCount(pPager->pPCache);
+}
+
+/*
+** Return the approximate number of bytes of memory currently
+** used by the pager and its associated cache.
+*/
+SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager *pPager){
+  int perPageSize = pPager->pageSize + pPager->nExtra + sizeof(PgHdr)
+                                     + 5*sizeof(void*);
+  return perPageSize*sqlite3PcachePagecount(pPager->pPCache)
+           + sqlite3MallocSize(pPager)
+           + pPager->pageSize;
+}
+
+/*
+** Return the number of references to the specified page.
+*/
+SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage *pPage){
+  return sqlite3PcachePageRefcount(pPage);
+}
+
+#ifdef SQLITE_TEST
+/*
+** This routine is used for testing and analysis only.
+*/
+SQLITE_PRIVATE int *sqlite3PagerStats(Pager *pPager){
+  static int a[11];
+  a[0] = sqlite3PcacheRefCount(pPager->pPCache);
+  a[1] = sqlite3PcachePagecount(pPager->pPCache);
+  a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
+  a[3] = pPager->eState==PAGER_OPEN ? -1 : (int) pPager->dbSize;
+  a[4] = pPager->eState;
+  a[5] = pPager->errCode;
+  a[6] = pPager->aStat[PAGER_STAT_HIT];
+  a[7] = pPager->aStat[PAGER_STAT_MISS];
+  a[8] = 0;  /* Used to be pPager->nOvfl */
+  a[9] = pPager->nRead;
+  a[10] = pPager->aStat[PAGER_STAT_WRITE];
+  return a;
+}
+#endif
+
+/*
+** Parameter eStat must be either SQLITE_DBSTATUS_CACHE_HIT or
+** SQLITE_DBSTATUS_CACHE_MISS. Before returning, *pnVal is incremented by the
+** current cache hit or miss count, according to the value of eStat. If the 
+** reset parameter is non-zero, the cache hit or miss count is zeroed before 
+** returning.
+*/
+SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *pPager, int eStat, int reset, int *pnVal){
+
+  assert( eStat==SQLITE_DBSTATUS_CACHE_HIT
+       || eStat==SQLITE_DBSTATUS_CACHE_MISS
+       || eStat==SQLITE_DBSTATUS_CACHE_WRITE
+  );
+
+  assert( SQLITE_DBSTATUS_CACHE_HIT+1==SQLITE_DBSTATUS_CACHE_MISS );
+  assert( SQLITE_DBSTATUS_CACHE_HIT+2==SQLITE_DBSTATUS_CACHE_WRITE );
+  assert( PAGER_STAT_HIT==0 && PAGER_STAT_MISS==1 && PAGER_STAT_WRITE==2 );
+
+  *pnVal += pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT];
+  if( reset ){
+    pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT] = 0;
+  }
+}
+
+/*
+** Return true if this is an in-memory pager.
+*/
+SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager *pPager){
+  return MEMDB;
+}
+
+/*
+** Check that there are at least nSavepoint savepoints open. If there are
+** currently less than nSavepoints open, then open one or more savepoints
+** to make up the difference. If the number of savepoints is already
+** equal to nSavepoint, then this function is a no-op.
+**
+** If a memory allocation fails, SQLITE_NOMEM is returned. If an error 
+** occurs while opening the sub-journal file, then an IO error code is
+** returned. Otherwise, SQLITE_OK.
+*/
+SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){
+  int rc = SQLITE_OK;                       /* Return code */
+  int nCurrent = pPager->nSavepoint;        /* Current number of savepoints */
+
+  assert( pPager->eState>=PAGER_WRITER_LOCKED );
+  assert( assert_pager_state(pPager) );
+
+  if( nSavepoint>nCurrent && pPager->useJournal ){
+    int ii;                                 /* Iterator variable */
+    PagerSavepoint *aNew;                   /* New Pager.aSavepoint array */
+
+    /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM
+    ** if the allocation fails. Otherwise, zero the new portion in case a 
+    ** malloc failure occurs while populating it in the for(...) loop below.
+    */
+    aNew = (PagerSavepoint *)sqlite3Realloc(
+        pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint
+    );
+    if( !aNew ){
+      return SQLITE_NOMEM;
+    }
+    memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint));
+    pPager->aSavepoint = aNew;
+
+    /* Populate the PagerSavepoint structures just allocated. */
+    for(ii=nCurrent; ii<nSavepoint; ii++){
+      aNew[ii].nOrig = pPager->dbSize;
+      if( isOpen(pPager->jfd) && pPager->journalOff>0 ){
+        aNew[ii].iOffset = pPager->journalOff;
+      }else{
+        aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager);
+      }
+      aNew[ii].iSubRec = pPager->nSubRec;
+      aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize);
+      if( !aNew[ii].pInSavepoint ){
+        return SQLITE_NOMEM;
+      }
+      if( pagerUseWal(pPager) ){
+        sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData);
+      }
+      pPager->nSavepoint = ii+1;
+    }
+    assert( pPager->nSavepoint==nSavepoint );
+    assertTruncateConstraint(pPager);
+  }
+
+  return rc;
+}
+
+/*
+** This function is called to rollback or release (commit) a savepoint.
+** The savepoint to release or rollback need not be the most recently 
+** created savepoint.
+**
+** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE.
+** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with
+** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes
+** that have occurred since the specified savepoint was created.
+**
+** The savepoint to rollback or release is identified by parameter 
+** iSavepoint. A value of 0 means to operate on the outermost savepoint
+** (the first created). A value of (Pager.nSavepoint-1) means operate
+** on the most recently created savepoint. If iSavepoint is greater than
+** (Pager.nSavepoint-1), then this function is a no-op.
+**
+** If a negative value is passed to this function, then the current
+** transaction is rolled back. This is different to calling 
+** sqlite3PagerRollback() because this function does not terminate
+** the transaction or unlock the database, it just restores the 
+** contents of the database to its original state. 
+**
+** In any case, all savepoints with an index greater than iSavepoint 
+** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE),
+** then savepoint iSavepoint is also destroyed.
+**
+** This function may return SQLITE_NOMEM if a memory allocation fails,
+** or an IO error code if an IO error occurs while rolling back a 
+** savepoint. If no errors occur, SQLITE_OK is returned.
+*/ 
+SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){
+  int rc = pPager->errCode;       /* Return code */
+
+  assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK );
+  assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK );
+
+  if( rc==SQLITE_OK && iSavepoint<pPager->nSavepoint ){
+    int ii;            /* Iterator variable */
+    int nNew;          /* Number of remaining savepoints after this op. */
+
+    /* Figure out how many savepoints will still be active after this
+    ** operation. Store this value in nNew. Then free resources associated 
+    ** with any savepoints that are destroyed by this operation.
+    */
+    nNew = iSavepoint + (( op==SAVEPOINT_RELEASE ) ? 0 : 1);
+    for(ii=nNew; ii<pPager->nSavepoint; ii++){
+      sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
+    }
+    pPager->nSavepoint = nNew;
+
+    /* If this is a release of the outermost savepoint, truncate 
+    ** the sub-journal to zero bytes in size. */
+    if( op==SAVEPOINT_RELEASE ){
+      if( nNew==0 && isOpen(pPager->sjfd) ){
+        /* Only truncate if it is an in-memory sub-journal. */
+        if( sqlite3IsMemJournal(pPager->sjfd) ){
+          rc = sqlite3OsTruncate(pPager->sjfd, 0);
+          assert( rc==SQLITE_OK );
+        }
+        pPager->nSubRec = 0;
+      }
+    }
+    /* Else this is a rollback operation, playback the specified savepoint.
+    ** If this is a temp-file, it is possible that the journal file has
+    ** not yet been opened. In this case there have been no changes to
+    ** the database file, so the playback operation can be skipped.
+    */
+    else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){
+      PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1];
+      rc = pagerPlaybackSavepoint(pPager, pSavepoint);
+      assert(rc!=SQLITE_DONE);
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Return the full pathname of the database file.
+**
+** Except, if the pager is in-memory only, then return an empty string if
+** nullIfMemDb is true.  This routine is called with nullIfMemDb==1 when
+** used to report the filename to the user, for compatibility with legacy
+** behavior.  But when the Btree needs to know the filename for matching to
+** shared cache, it uses nullIfMemDb==0 so that in-memory databases can
+** participate in shared-cache.
+*/
+SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager *pPager, int nullIfMemDb){
+  return (nullIfMemDb && pPager->memDb) ? "" : pPager->zFilename;
+}
+
+/*
+** Return the VFS structure for the pager.
+*/
+SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
+  return pPager->pVfs;
+}
+
+/*
+** Return the file handle for the database file associated
+** with the pager.  This might return NULL if the file has
+** not yet been opened.
+*/
+SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager *pPager){
+  return pPager->fd;
+}
+
+/*
+** Return the full pathname of the journal file.
+*/
+SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager *pPager){
+  return pPager->zJournal;
+}
+
+/*
+** Return true if fsync() calls are disabled for this pager.  Return FALSE
+** if fsync()s are executed normally.
+*/
+SQLITE_PRIVATE int sqlite3PagerNosync(Pager *pPager){
+  return pPager->noSync;
+}
+
+#ifdef SQLITE_HAS_CODEC
+/*
+** Set or retrieve the codec for this pager
+*/
+SQLITE_PRIVATE void sqlite3PagerSetCodec(
+  Pager *pPager,
+  void *(*xCodec)(void*,void*,Pgno,int),
+  void (*xCodecSizeChng)(void*,int,int),
+  void (*xCodecFree)(void*),
+  void *pCodec
+){
+  if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
+  pPager->xCodec = pPager->memDb ? 0 : xCodec;
+  pPager->xCodecSizeChng = xCodecSizeChng;
+  pPager->xCodecFree = xCodecFree;
+  pPager->pCodec = pCodec;
+  pagerReportSize(pPager);
+}
+SQLITE_PRIVATE void *sqlite3PagerGetCodec(Pager *pPager){
+  return pPager->pCodec;
+}
+
+/*
+** This function is called by the wal module when writing page content
+** into the log file.
+**
+** This function returns a pointer to a buffer containing the encrypted
+** page content. If a malloc fails, this function may return NULL.
+*/
+SQLITE_PRIVATE void *sqlite3PagerCodec(PgHdr *pPg){
+  void *aData = 0;
+  CODEC2(pPg->pPager, pPg->pData, pPg->pgno, 6, return 0, aData);
+  return aData;
+}
+
+/*
+** Return the current pager state
+*/
+SQLITE_PRIVATE int sqlite3PagerState(Pager *pPager){
+  return pPager->eState;
+}
+#endif /* SQLITE_HAS_CODEC */
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+/*
+** Move the page pPg to location pgno in the file.
+**
+** There must be no references to the page previously located at
+** pgno (which we call pPgOld) though that page is allowed to be
+** in cache.  If the page previously located at pgno is not already
+** in the rollback journal, it is not put there by by this routine.
+**
+** References to the page pPg remain valid. Updating any
+** meta-data associated with pPg (i.e. data stored in the nExtra bytes
+** allocated along with the page) is the responsibility of the caller.
+**
+** A transaction must be active when this routine is called. It used to be
+** required that a statement transaction was not active, but this restriction
+** has been removed (CREATE INDEX needs to move a page when a statement
+** transaction is active).
+**
+** If the fourth argument, isCommit, is non-zero, then this page is being
+** moved as part of a database reorganization just before the transaction 
+** is being committed. In this case, it is guaranteed that the database page 
+** pPg refers to will not be written to again within this transaction.
+**
+** This function may return SQLITE_NOMEM or an IO error code if an error
+** occurs. Otherwise, it returns SQLITE_OK.
+*/
+SQLITE_PRIVATE int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
+  PgHdr *pPgOld;               /* The page being overwritten. */
+  Pgno needSyncPgno = 0;       /* Old value of pPg->pgno, if sync is required */
+  int rc;                      /* Return code */
+  Pgno origPgno;               /* The original page number */
+
+  assert( pPg->nRef>0 );
+  assert( pPager->eState==PAGER_WRITER_CACHEMOD
+       || pPager->eState==PAGER_WRITER_DBMOD
+  );
+  assert( assert_pager_state(pPager) );
+
+  /* In order to be able to rollback, an in-memory database must journal
+  ** the page we are moving from.
+  */
+  if( MEMDB ){
+    rc = sqlite3PagerWrite(pPg);
+    if( rc ) return rc;
+  }
+
+  /* If the page being moved is dirty and has not been saved by the latest
+  ** savepoint, then save the current contents of the page into the 
+  ** sub-journal now. This is required to handle the following scenario:
+  **
+  **   BEGIN;
+  **     <journal page X, then modify it in memory>
+  **     SAVEPOINT one;
+  **       <Move page X to location Y>
+  **     ROLLBACK TO one;
+  **
+  ** If page X were not written to the sub-journal here, it would not
+  ** be possible to restore its contents when the "ROLLBACK TO one"
+  ** statement were is processed.
+  **
+  ** subjournalPage() may need to allocate space to store pPg->pgno into
+  ** one or more savepoint bitvecs. This is the reason this function
+  ** may return SQLITE_NOMEM.
+  */
+  if( pPg->flags&PGHDR_DIRTY
+   && subjRequiresPage(pPg)
+   && SQLITE_OK!=(rc = subjournalPage(pPg))
+  ){
+    return rc;
+  }
+
+  PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n", 
+      PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno));
+  IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
+
+  /* If the journal needs to be sync()ed before page pPg->pgno can
+  ** be written to, store pPg->pgno in local variable needSyncPgno.
+  **
+  ** If the isCommit flag is set, there is no need to remember that
+  ** the journal needs to be sync()ed before database page pPg->pgno 
+  ** can be written to. The caller has already promised not to write to it.
+  */
+  if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
+    needSyncPgno = pPg->pgno;
+    assert( pPager->journalMode==PAGER_JOURNALMODE_OFF ||
+            pageInJournal(pPager, pPg) || pPg->pgno>pPager->dbOrigSize );
+    assert( pPg->flags&PGHDR_DIRTY );
+  }
+
+  /* If the cache contains a page with page-number pgno, remove it
+  ** from its hash chain. Also, if the PGHDR_NEED_SYNC flag was set for 
+  ** page pgno before the 'move' operation, it needs to be retained 
+  ** for the page moved there.
+  */
+  pPg->flags &= ~PGHDR_NEED_SYNC;
+  pPgOld = sqlite3PagerLookup(pPager, pgno);
+  assert( !pPgOld || pPgOld->nRef==1 );
+  if( pPgOld ){
+    pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
+    if( MEMDB ){
+      /* Do not discard pages from an in-memory database since we might
+      ** need to rollback later.  Just move the page out of the way. */
+      sqlite3PcacheMove(pPgOld, pPager->dbSize+1);
+    }else{
+      sqlite3PcacheDrop(pPgOld);
+    }
+  }
+
+  origPgno = pPg->pgno;
+  sqlite3PcacheMove(pPg, pgno);
+  sqlite3PcacheMakeDirty(pPg);
+
+  /* For an in-memory database, make sure the original page continues
+  ** to exist, in case the transaction needs to roll back.  Use pPgOld
+  ** as the original page since it has already been allocated.
+  */
+  if( MEMDB ){
+    assert( pPgOld );
+    sqlite3PcacheMove(pPgOld, origPgno);
+    sqlite3PagerUnrefNotNull(pPgOld);
+  }
+
+  if( needSyncPgno ){
+    /* If needSyncPgno is non-zero, then the journal file needs to be 
+    ** sync()ed before any data is written to database file page needSyncPgno.
+    ** Currently, no such page exists in the page-cache and the 
+    ** "is journaled" bitvec flag has been set. This needs to be remedied by
+    ** loading the page into the pager-cache and setting the PGHDR_NEED_SYNC
+    ** flag.
+    **
+    ** If the attempt to load the page into the page-cache fails, (due
+    ** to a malloc() or IO failure), clear the bit in the pInJournal[]
+    ** array. Otherwise, if the page is loaded and written again in
+    ** this transaction, it may be written to the database file before
+    ** it is synced into the journal file. This way, it may end up in
+    ** the journal file twice, but that is not a problem.
+    */
+    PgHdr *pPgHdr;
+    rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
+    if( rc!=SQLITE_OK ){
+      if( needSyncPgno<=pPager->dbOrigSize ){
+        assert( pPager->pTmpSpace!=0 );
+        sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace);
+      }
+      return rc;
+    }
+    pPgHdr->flags |= PGHDR_NEED_SYNC;
+    sqlite3PcacheMakeDirty(pPgHdr);
+    sqlite3PagerUnrefNotNull(pPgHdr);
+  }
+
+  return SQLITE_OK;
+}
+#endif
+
+/*
+** The page handle passed as the first argument refers to a dirty page 
+** with a page number other than iNew. This function changes the page's 
+** page number to iNew and sets the value of the PgHdr.flags field to 
+** the value passed as the third parameter.
+*/
+SQLITE_PRIVATE void sqlite3PagerRekey(DbPage *pPg, Pgno iNew, u16 flags){
+  assert( pPg->pgno!=iNew );
+  pPg->flags = flags;
+  sqlite3PcacheMove(pPg, iNew);
+}
+
+/*
+** Return a pointer to the data for the specified page.
+*/
+SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *pPg){
+  assert( pPg->nRef>0 || pPg->pPager->memDb );
+  return pPg->pData;
+}
+
+/*
+** Return a pointer to the Pager.nExtra bytes of "extra" space 
+** allocated along with the specified page.
+*/
+SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *pPg){
+  return pPg->pExtra;
+}
+
+/*
+** Get/set the locking-mode for this pager. Parameter eMode must be one
+** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 
+** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
+** the locking-mode is set to the value specified.
+**
+** The returned value is either PAGER_LOCKINGMODE_NORMAL or
+** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
+** locking-mode.
+*/
+SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *pPager, int eMode){
+  assert( eMode==PAGER_LOCKINGMODE_QUERY
+            || eMode==PAGER_LOCKINGMODE_NORMAL
+            || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
+  assert( PAGER_LOCKINGMODE_QUERY<0 );
+  assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
+  assert( pPager->exclusiveMode || 0==sqlite3WalHeapMemory(pPager->pWal) );
+  if( eMode>=0 && !pPager->tempFile && !sqlite3WalHeapMemory(pPager->pWal) ){
+    pPager->exclusiveMode = (u8)eMode;
+  }
+  return (int)pPager->exclusiveMode;
+}
+
+/*
+** Set the journal-mode for this pager. Parameter eMode must be one of:
+**
+**    PAGER_JOURNALMODE_DELETE
+**    PAGER_JOURNALMODE_TRUNCATE
+**    PAGER_JOURNALMODE_PERSIST
+**    PAGER_JOURNALMODE_OFF
+**    PAGER_JOURNALMODE_MEMORY
+**    PAGER_JOURNALMODE_WAL
+**
+** The journalmode is set to the value specified if the change is allowed.
+** The change may be disallowed for the following reasons:
+**
+**   *  An in-memory database can only have its journal_mode set to _OFF
+**      or _MEMORY.
+**
+**   *  Temporary databases cannot have _WAL journalmode.
+**
+** The returned indicate the current (possibly updated) journal-mode.
+*/
+SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){
+  u8 eOld = pPager->journalMode;    /* Prior journalmode */
+
+#ifdef SQLITE_DEBUG
+  /* The print_pager_state() routine is intended to be used by the debugger
+  ** only.  We invoke it once here to suppress a compiler warning. */
+  print_pager_state(pPager);
+#endif
+
+
+  /* The eMode parameter is always valid */
+  assert(      eMode==PAGER_JOURNALMODE_DELETE
+            || eMode==PAGER_JOURNALMODE_TRUNCATE
+            || eMode==PAGER_JOURNALMODE_PERSIST
+            || eMode==PAGER_JOURNALMODE_OFF 
+            || eMode==PAGER_JOURNALMODE_WAL 
+            || eMode==PAGER_JOURNALMODE_MEMORY );
+
+  /* This routine is only called from the OP_JournalMode opcode, and
+  ** the logic there will never allow a temporary file to be changed
+  ** to WAL mode.
+  */
+  assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL );
+
+  /* Do allow the journalmode of an in-memory database to be set to
+  ** anything other than MEMORY or OFF
+  */
+  if( MEMDB ){
+    assert( eOld==PAGER_JOURNALMODE_MEMORY || eOld==PAGER_JOURNALMODE_OFF );
+    if( eMode!=PAGER_JOURNALMODE_MEMORY && eMode!=PAGER_JOURNALMODE_OFF ){
+      eMode = eOld;
+    }
+  }
+
+  if( eMode!=eOld ){
+
+    /* Change the journal mode. */
+    assert( pPager->eState!=PAGER_ERROR );
+    pPager->journalMode = (u8)eMode;
+
+    /* When transistioning from TRUNCATE or PERSIST to any other journal
+    ** mode except WAL, unless the pager is in locking_mode=exclusive mode,
+    ** delete the journal file.
+    */
+    assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
+    assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 );
+    assert( (PAGER_JOURNALMODE_DELETE & 5)==0 );
+    assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 );
+    assert( (PAGER_JOURNALMODE_OFF & 5)==0 );
+    assert( (PAGER_JOURNALMODE_WAL & 5)==5 );
+
+    assert( isOpen(pPager->fd) || pPager->exclusiveMode );
+    if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){
+
+      /* In this case we would like to delete the journal file. If it is
+      ** not possible, then that is not a problem. Deleting the journal file
+      ** here is an optimization only.
+      **
+      ** Before deleting the journal file, obtain a RESERVED lock on the
+      ** database file. This ensures that the journal file is not deleted
+      ** while it is in use by some other client.
+      */
+      sqlite3OsClose(pPager->jfd);
+      if( pPager->eLock>=RESERVED_LOCK ){
+        sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
+      }else{
+        int rc = SQLITE_OK;
+        int state = pPager->eState;
+        assert( state==PAGER_OPEN || state==PAGER_READER );
+        if( state==PAGER_OPEN ){
+          rc = sqlite3PagerSharedLock(pPager);
+        }
+        if( pPager->eState==PAGER_READER ){
+          assert( rc==SQLITE_OK );
+          rc = pagerLockDb(pPager, RESERVED_LOCK);
+        }
+        if( rc==SQLITE_OK ){
+          sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
+        }
+        if( rc==SQLITE_OK && state==PAGER_READER ){
+          pagerUnlockDb(pPager, SHARED_LOCK);
+        }else if( state==PAGER_OPEN ){
+          pager_unlock(pPager);
+        }
+        assert( state==pPager->eState );
+      }
+    }
+  }
+
+  /* Return the new journal mode */
+  return (int)pPager->journalMode;
+}
+
+/*
+** Return the current journal mode.
+*/
+SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){
+  return (int)pPager->journalMode;
+}
+
+/*
+** Return TRUE if the pager is in a state where it is OK to change the
+** journalmode.  Journalmode changes can only happen when the database
+** is unmodified.
+*/
+SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){
+  assert( assert_pager_state(pPager) );
+  if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0;
+  if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0;
+  return 1;
+}
+
+/*
+** Get/set the size-limit used for persistent journal files.
+**
+** Setting the size limit to -1 means no limit is enforced.
+** An attempt to set a limit smaller than -1 is a no-op.
+*/
+SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
+  if( iLimit>=-1 ){
+    pPager->journalSizeLimit = iLimit;
+    sqlite3WalLimit(pPager->pWal, iLimit);
+  }
+  return pPager->journalSizeLimit;
+}
+
+/*
+** Return a pointer to the pPager->pBackup variable. The backup module
+** in backup.c maintains the content of this variable. This module
+** uses it opaquely as an argument to sqlite3BackupRestart() and
+** sqlite3BackupUpdate() only.
+*/
+SQLITE_PRIVATE sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
+  return &pPager->pBackup;
+}
+
+#ifndef SQLITE_OMIT_VACUUM
+/*
+** Unless this is an in-memory or temporary database, clear the pager cache.
+*/
+SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *pPager){
+  if( !MEMDB && pPager->tempFile==0 ) pager_reset(pPager);
+}
+#endif
+
+#ifndef SQLITE_OMIT_WAL
+/*
+** This function is called when the user invokes "PRAGMA wal_checkpoint",
+** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint()
+** or wal_blocking_checkpoint() API functions.
+**
+** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
+*/
+SQLITE_PRIVATE int sqlite3PagerCheckpoint(Pager *pPager, int eMode, int *pnLog, int *pnCkpt){
+  int rc = SQLITE_OK;
+  if( pPager->pWal ){
+    rc = sqlite3WalCheckpoint(pPager->pWal, eMode,
+        (eMode==SQLITE_CHECKPOINT_PASSIVE ? 0 : pPager->xBusyHandler),
+        pPager->pBusyHandlerArg,
+        pPager->ckptSyncFlags, pPager->pageSize, (u8 *)pPager->pTmpSpace,
+        pnLog, pnCkpt
+    );
+  }
+  return rc;
+}
+
+SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager){
+  return sqlite3WalCallback(pPager->pWal);
+}
+
+/*
+** Return true if the underlying VFS for the given pager supports the
+** primitives necessary for write-ahead logging.
+*/
+SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager){
+  const sqlite3_io_methods *pMethods = pPager->fd->pMethods;
+  return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap);
+}
+
+/*
+** Attempt to take an exclusive lock on the database file. If a PENDING lock
+** is obtained instead, immediately release it.
+*/
+static int pagerExclusiveLock(Pager *pPager){
+  int rc;                         /* Return code */
+
+  assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK );
+  rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
+  if( rc!=SQLITE_OK ){
+    /* If the attempt to grab the exclusive lock failed, release the 
+    ** pending lock that may have been obtained instead.  */
+    pagerUnlockDb(pPager, SHARED_LOCK);
+  }
+
+  return rc;
+}
+
+/*
+** Call sqlite3WalOpen() to open the WAL handle. If the pager is in 
+** exclusive-locking mode when this function is called, take an EXCLUSIVE
+** lock on the database file and use heap-memory to store the wal-index
+** in. Otherwise, use the normal shared-memory.
+*/
+static int pagerOpenWal(Pager *pPager){
+  int rc = SQLITE_OK;
+
+  assert( pPager->pWal==0 && pPager->tempFile==0 );
+  assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK );
+
+  /* If the pager is already in exclusive-mode, the WAL module will use 
+  ** heap-memory for the wal-index instead of the VFS shared-memory 
+  ** implementation. Take the exclusive lock now, before opening the WAL
+  ** file, to make sure this is safe.
+  */
+  if( pPager->exclusiveMode ){
+    rc = pagerExclusiveLock(pPager);
+  }
+
+  /* Open the connection to the log file. If this operation fails, 
+  ** (e.g. due to malloc() failure), return an error code.
+  */
+  if( rc==SQLITE_OK ){
+    rc = sqlite3WalOpen(pPager->pVfs,
+        pPager->fd, pPager->zWal, pPager->exclusiveMode,
+        pPager->journalSizeLimit, &pPager->pWal
+    );
+  }
+  pagerFixMaplimit(pPager);
+
+  return rc;
+}
+
+
+/*
+** The caller must be holding a SHARED lock on the database file to call
+** this function.
+**
+** If the pager passed as the first argument is open on a real database
+** file (not a temp file or an in-memory database), and the WAL file
+** is not already open, make an attempt to open it now. If successful,
+** return SQLITE_OK. If an error occurs or the VFS used by the pager does 
+** not support the xShmXXX() methods, return an error code. *pbOpen is
+** not modified in either case.
+**
+** If the pager is open on a temp-file (or in-memory database), or if
+** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK
+** without doing anything.
+*/
+SQLITE_PRIVATE int sqlite3PagerOpenWal(
+  Pager *pPager,                  /* Pager object */
+  int *pbOpen                     /* OUT: Set to true if call is a no-op */
+){
+  int rc = SQLITE_OK;             /* Return code */
+
+  assert( assert_pager_state(pPager) );
+  assert( pPager->eState==PAGER_OPEN   || pbOpen );
+  assert( pPager->eState==PAGER_READER || !pbOpen );
+  assert( pbOpen==0 || *pbOpen==0 );
+  assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) );
+
+  if( !pPager->tempFile && !pPager->pWal ){
+    if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN;
+
+    /* Close any rollback journal previously open */
+    sqlite3OsClose(pPager->jfd);
+
+    rc = pagerOpenWal(pPager);
+    if( rc==SQLITE_OK ){
+      pPager->journalMode = PAGER_JOURNALMODE_WAL;
+      pPager->eState = PAGER_OPEN;
+    }
+  }else{
+    *pbOpen = 1;
+  }
+
+  return rc;
+}
+
+/*
+** This function is called to close the connection to the log file prior
+** to switching from WAL to rollback mode.
+**
+** Before closing the log file, this function attempts to take an 
+** EXCLUSIVE lock on the database file. If this cannot be obtained, an
+** error (SQLITE_BUSY) is returned and the log connection is not closed.
+** If successful, the EXCLUSIVE lock is not released before returning.
+*/
+SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager){
+  int rc = SQLITE_OK;
+
+  assert( pPager->journalMode==PAGER_JOURNALMODE_WAL );
+
+  /* If the log file is not already open, but does exist in the file-system,
+  ** it may need to be checkpointed before the connection can switch to
+  ** rollback mode. Open it now so this can happen.
+  */
+  if( !pPager->pWal ){
+    int logexists = 0;
+    rc = pagerLockDb(pPager, SHARED_LOCK);
+    if( rc==SQLITE_OK ){
+      rc = sqlite3OsAccess(
+          pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists
+      );
+    }
+    if( rc==SQLITE_OK && logexists ){
+      rc = pagerOpenWal(pPager);
+    }
+  }
+    
+  /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on
+  ** the database file, the log and log-summary files will be deleted.
+  */
+  if( rc==SQLITE_OK && pPager->pWal ){
+    rc = pagerExclusiveLock(pPager);
+    if( rc==SQLITE_OK ){
+      rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags,
+                           pPager->pageSize, (u8*)pPager->pTmpSpace);
+      pPager->pWal = 0;
+      pagerFixMaplimit(pPager);
+    }
+  }
+  return rc;
+}
+
+#endif /* !SQLITE_OMIT_WAL */
+
+#ifdef SQLITE_ENABLE_ZIPVFS
+/*
+** A read-lock must be held on the pager when this function is called. If
+** the pager is in WAL mode and the WAL file currently contains one or more
+** frames, return the size in bytes of the page images stored within the
+** WAL frames. Otherwise, if this is not a WAL database or the WAL file
+** is empty, return 0.
+*/
+SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager){
+  assert( pPager->eState>=PAGER_READER );
+  return sqlite3WalFramesize(pPager->pWal);
+}
+#endif
+
+
+#endif /* SQLITE_OMIT_DISKIO */
+
+/************** End of pager.c ***********************************************/
+/************** Begin file wal.c *********************************************/
+/*
+** 2010 February 1
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains the implementation of a write-ahead log (WAL) used in 
+** "journal_mode=WAL" mode.
+**
+** WRITE-AHEAD LOG (WAL) FILE FORMAT
+**
+** A WAL file consists of a header followed by zero or more "frames".
+** Each frame records the revised content of a single page from the
+** database file.  All changes to the database are recorded by writing
+** frames into the WAL.  Transactions commit when a frame is written that
+** contains a commit marker.  A single WAL can and usually does record 
+** multiple transactions.  Periodically, the content of the WAL is
+** transferred back into the database file in an operation called a
+** "checkpoint".
+**
+** A single WAL file can be used multiple times.  In other words, the
+** WAL can fill up with frames and then be checkpointed and then new
+** frames can overwrite the old ones.  A WAL always grows from beginning
+** toward the end.  Checksums and counters attached to each frame are
+** used to determine which frames within the WAL are valid and which
+** are leftovers from prior checkpoints.
+**
+** The WAL header is 32 bytes in size and consists of the following eight
+** big-endian 32-bit unsigned integer values:
+**
+**     0: Magic number.  0x377f0682 or 0x377f0683
+**     4: File format version.  Currently 3007000
+**     8: Database page size.  Example: 1024
+**    12: Checkpoint sequence number
+**    16: Salt-1, random integer incremented with each checkpoint
+**    20: Salt-2, a different random integer changing with each ckpt
+**    24: Checksum-1 (first part of checksum for first 24 bytes of header).
+**    28: Checksum-2 (second part of checksum for first 24 bytes of header).
+**
+** Immediately following the wal-header are zero or more frames. Each
+** frame consists of a 24-byte frame-header followed by a <page-size> bytes
+** of page data. The frame-header is six big-endian 32-bit unsigned 
+** integer values, as follows:
+**
+**     0: Page number.
+**     4: For commit records, the size of the database image in pages 
+**        after the commit. For all other records, zero.
+**     8: Salt-1 (copied from the header)
+**    12: Salt-2 (copied from the header)
+**    16: Checksum-1.
+**    20: Checksum-2.
+**
+** A frame is considered valid if and only if the following conditions are
+** true:
+**
+**    (1) The salt-1 and salt-2 values in the frame-header match
+**        salt values in the wal-header
+**
+**    (2) The checksum values in the final 8 bytes of the frame-header
+**        exactly match the checksum computed consecutively on the
+**        WAL header and the first 8 bytes and the content of all frames
+**        up to and including the current frame.
+**
+** The checksum is computed using 32-bit big-endian integers if the
+** magic number in the first 4 bytes of the WAL is 0x377f0683 and it
+** is computed using little-endian if the magic number is 0x377f0682.
+** The checksum values are always stored in the frame header in a
+** big-endian format regardless of which byte order is used to compute
+** the checksum.  The checksum is computed by interpreting the input as
+** an even number of unsigned 32-bit integers: x[0] through x[N].  The
+** algorithm used for the checksum is as follows:
+** 
+**   for i from 0 to n-1 step 2:
+**     s0 += x[i] + s1;
+**     s1 += x[i+1] + s0;
+**   endfor
+**
+** Note that s0 and s1 are both weighted checksums using fibonacci weights
+** in reverse order (the largest fibonacci weight occurs on the first element
+** of the sequence being summed.)  The s1 value spans all 32-bit 
+** terms of the sequence whereas s0 omits the final term.
+**
+** On a checkpoint, the WAL is first VFS.xSync-ed, then valid content of the
+** WAL is transferred into the database, then the database is VFS.xSync-ed.
+** The VFS.xSync operations serve as write barriers - all writes launched
+** before the xSync must complete before any write that launches after the
+** xSync begins.
+**
+** After each checkpoint, the salt-1 value is incremented and the salt-2
+** value is randomized.  This prevents old and new frames in the WAL from
+** being considered valid at the same time and being checkpointing together
+** following a crash.
+**
+** READER ALGORITHM
+**
+** To read a page from the database (call it page number P), a reader
+** first checks the WAL to see if it contains page P.  If so, then the
+** last valid instance of page P that is a followed by a commit frame
+** or is a commit frame itself becomes the value read.  If the WAL
+** contains no copies of page P that are valid and which are a commit
+** frame or are followed by a commit frame, then page P is read from
+** the database file.
+**
+** To start a read transaction, the reader records the index of the last
+** valid frame in the WAL.  The reader uses this recorded "mxFrame" value
+** for all subsequent read operations.  New transactions can be appended
+** to the WAL, but as long as the reader uses its original mxFrame value
+** and ignores the newly appended content, it will see a consistent snapshot
+** of the database from a single point in time.  This technique allows
+** multiple concurrent readers to view different versions of the database
+** content simultaneously.
+**
+** The reader algorithm in the previous paragraphs works correctly, but 
+** because frames for page P can appear anywhere within the WAL, the
+** reader has to scan the entire WAL looking for page P frames.  If the
+** WAL is large (multiple megabytes is typical) that scan can be slow,
+** and read performance suffers.  To overcome this problem, a separate
+** data structure called the wal-index is maintained to expedite the
+** search for frames of a particular page.
+** 
+** WAL-INDEX FORMAT
+**
+** Conceptually, the wal-index is shared memory, though VFS implementations
+** might choose to implement the wal-index using a mmapped file.  Because
+** the wal-index is shared memory, SQLite does not support journal_mode=WAL 
+** on a network filesystem.  All users of the database must be able to
+** share memory.
+**
+** The wal-index is transient.  After a crash, the wal-index can (and should
+** be) reconstructed from the original WAL file.  In fact, the VFS is required
+** to either truncate or zero the header of the wal-index when the last
+** connection to it closes.  Because the wal-index is transient, it can
+** use an architecture-specific format; it does not have to be cross-platform.
+** Hence, unlike the database and WAL file formats which store all values
+** as big endian, the wal-index can store multi-byte values in the native
+** byte order of the host computer.
+**
+** The purpose of the wal-index is to answer this question quickly:  Given
+** a page number P and a maximum frame index M, return the index of the 
+** last frame in the wal before frame M for page P in the WAL, or return
+** NULL if there are no frames for page P in the WAL prior to M.
+**
+** The wal-index consists of a header region, followed by an one or
+** more index blocks.  
+**
+** The wal-index header contains the total number of frames within the WAL
+** in the mxFrame field.
+**
+** Each index block except for the first contains information on 
+** HASHTABLE_NPAGE frames. The first index block contains information on
+** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and 
+** HASHTABLE_NPAGE are selected so that together the wal-index header and
+** first index block are the same size as all other index blocks in the
+** wal-index.
+**
+** Each index block contains two sections, a page-mapping that contains the
+** database page number associated with each wal frame, and a hash-table 
+** that allows readers to query an index block for a specific page number.
+** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE
+** for the first index block) 32-bit page numbers. The first entry in the 
+** first index-block contains the database page number corresponding to the
+** first frame in the WAL file. The first entry in the second index block
+** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in
+** the log, and so on.
+**
+** The last index block in a wal-index usually contains less than the full
+** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers,
+** depending on the contents of the WAL file. This does not change the
+** allocated size of the page-mapping array - the page-mapping array merely
+** contains unused entries.
+**
+** Even without using the hash table, the last frame for page P
+** can be found by scanning the page-mapping sections of each index block
+** starting with the last index block and moving toward the first, and
+** within each index block, starting at the end and moving toward the
+** beginning.  The first entry that equals P corresponds to the frame
+** holding the content for that page.
+**
+** The hash table consists of HASHTABLE_NSLOT 16-bit unsigned integers.
+** HASHTABLE_NSLOT = 2*HASHTABLE_NPAGE, and there is one entry in the
+** hash table for each page number in the mapping section, so the hash 
+** table is never more than half full.  The expected number of collisions 
+** prior to finding a match is 1.  Each entry of the hash table is an
+** 1-based index of an entry in the mapping section of the same
+** index block.   Let K be the 1-based index of the largest entry in
+** the mapping section.  (For index blocks other than the last, K will
+** always be exactly HASHTABLE_NPAGE (4096) and for the last index block
+** K will be (mxFrame%HASHTABLE_NPAGE).)  Unused slots of the hash table
+** contain a value of 0.
+**
+** To look for page P in the hash table, first compute a hash iKey on
+** P as follows:
+**
+**      iKey = (P * 383) % HASHTABLE_NSLOT
+**
+** Then start scanning entries of the hash table, starting with iKey
+** (wrapping around to the beginning when the end of the hash table is
+** reached) until an unused hash slot is found. Let the first unused slot
+** be at index iUnused.  (iUnused might be less than iKey if there was
+** wrap-around.) Because the hash table is never more than half full,
+** the search is guaranteed to eventually hit an unused entry.  Let 
+** iMax be the value between iKey and iUnused, closest to iUnused,
+** where aHash[iMax]==P.  If there is no iMax entry (if there exists
+** no hash slot such that aHash[i]==p) then page P is not in the
+** current index block.  Otherwise the iMax-th mapping entry of the
+** current index block corresponds to the last entry that references 
+** page P.
+**
+** A hash search begins with the last index block and moves toward the
+** first index block, looking for entries corresponding to page P.  On
+** average, only two or three slots in each index block need to be
+** examined in order to either find the last entry for page P, or to
+** establish that no such entry exists in the block.  Each index block
+** holds over 4000 entries.  So two or three index blocks are sufficient
+** to cover a typical 10 megabyte WAL file, assuming 1K pages.  8 or 10
+** comparisons (on average) suffice to either locate a frame in the
+** WAL or to establish that the frame does not exist in the WAL.  This
+** is much faster than scanning the entire 10MB WAL.
+**
+** Note that entries are added in order of increasing K.  Hence, one
+** reader might be using some value K0 and a second reader that started
+** at a later time (after additional transactions were added to the WAL
+** and to the wal-index) might be using a different value K1, where K1>K0.
+** Both readers can use the same hash table and mapping section to get
+** the correct result.  There may be entries in the hash table with
+** K>K0 but to the first reader, those entries will appear to be unused
+** slots in the hash table and so the first reader will get an answer as
+** if no values greater than K0 had ever been inserted into the hash table
+** in the first place - which is what reader one wants.  Meanwhile, the
+** second reader using K1 will see additional values that were inserted
+** later, which is exactly what reader two wants.  
+**
+** When a rollback occurs, the value of K is decreased. Hash table entries
+** that correspond to frames greater than the new K value are removed
+** from the hash table at this point.
+*/
+#ifndef SQLITE_OMIT_WAL
+
+
+/*
+** Trace output macros
+*/
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+SQLITE_PRIVATE int sqlite3WalTrace = 0;
+# define WALTRACE(X)  if(sqlite3WalTrace) sqlite3DebugPrintf X
+#else
+# define WALTRACE(X)
+#endif
+
+/*
+** The maximum (and only) versions of the wal and wal-index formats
+** that may be interpreted by this version of SQLite.
+**
+** If a client begins recovering a WAL file and finds that (a) the checksum
+** values in the wal-header are correct and (b) the version field is not
+** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN.
+**
+** Similarly, if a client successfully reads a wal-index header (i.e. the 
+** checksum test is successful) and finds that the version field is not
+** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite
+** returns SQLITE_CANTOPEN.
+*/
+#define WAL_MAX_VERSION      3007000
+#define WALINDEX_MAX_VERSION 3007000
+
+/*
+** Indices of various locking bytes.   WAL_NREADER is the number
+** of available reader locks and should be at least 3.
+*/
+#define WAL_WRITE_LOCK         0
+#define WAL_ALL_BUT_WRITE      1
+#define WAL_CKPT_LOCK          1
+#define WAL_RECOVER_LOCK       2
+#define WAL_READ_LOCK(I)       (3+(I))
+#define WAL_NREADER            (SQLITE_SHM_NLOCK-3)
+
+
+/* Object declarations */
+typedef struct WalIndexHdr WalIndexHdr;
+typedef struct WalIterator WalIterator;
+typedef struct WalCkptInfo WalCkptInfo;
+
+
+/*
+** The following object holds a copy of the wal-index header content.
+**
+** The actual header in the wal-index consists of two copies of this
+** object.
+**
+** The szPage value can be any power of 2 between 512 and 32768, inclusive.
+** Or it can be 1 to represent a 65536-byte page.  The latter case was
+** added in 3.7.1 when support for 64K pages was added.  
+*/
+struct WalIndexHdr {
+  u32 iVersion;                   /* Wal-index version */
+  u32 unused;                     /* Unused (padding) field */
+  u32 iChange;                    /* Counter incremented each transaction */
+  u8 isInit;                      /* 1 when initialized */
+  u8 bigEndCksum;                 /* True if checksums in WAL are big-endian */
+  u16 szPage;                     /* Database page size in bytes. 1==64K */
+  u32 mxFrame;                    /* Index of last valid frame in the WAL */
+  u32 nPage;                      /* Size of database in pages */
+  u32 aFrameCksum[2];             /* Checksum of last frame in log */
+  u32 aSalt[2];                   /* Two salt values copied from WAL header */
+  u32 aCksum[2];                  /* Checksum over all prior fields */
+};
+
+/*
+** A copy of the following object occurs in the wal-index immediately
+** following the second copy of the WalIndexHdr.  This object stores
+** information used by checkpoint.
+**
+** nBackfill is the number of frames in the WAL that have been written
+** back into the database. (We call the act of moving content from WAL to
+** database "backfilling".)  The nBackfill number is never greater than
+** WalIndexHdr.mxFrame.  nBackfill can only be increased by threads
+** holding the WAL_CKPT_LOCK lock (which includes a recovery thread).
+** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from
+** mxFrame back to zero when the WAL is reset.
+**
+** There is one entry in aReadMark[] for each reader lock.  If a reader
+** holds read-lock K, then the value in aReadMark[K] is no greater than
+** the mxFrame for that reader.  The value READMARK_NOT_USED (0xffffffff)
+** for any aReadMark[] means that entry is unused.  aReadMark[0] is 
+** a special case; its value is never used and it exists as a place-holder
+** to avoid having to offset aReadMark[] indexs by one.  Readers holding
+** WAL_READ_LOCK(0) always ignore the entire WAL and read all content
+** directly from the database.
+**
+** The value of aReadMark[K] may only be changed by a thread that
+** is holding an exclusive lock on WAL_READ_LOCK(K).  Thus, the value of
+** aReadMark[K] cannot changed while there is a reader is using that mark
+** since the reader will be holding a shared lock on WAL_READ_LOCK(K).
+**
+** The checkpointer may only transfer frames from WAL to database where
+** the frame numbers are less than or equal to every aReadMark[] that is
+** in use (that is, every aReadMark[j] for which there is a corresponding
+** WAL_READ_LOCK(j)).  New readers (usually) pick the aReadMark[] with the
+** largest value and will increase an unused aReadMark[] to mxFrame if there
+** is not already an aReadMark[] equal to mxFrame.  The exception to the
+** previous sentence is when nBackfill equals mxFrame (meaning that everything
+** in the WAL has been backfilled into the database) then new readers
+** will choose aReadMark[0] which has value 0 and hence such reader will
+** get all their all content directly from the database file and ignore 
+** the WAL.
+**
+** Writers normally append new frames to the end of the WAL.  However,
+** if nBackfill equals mxFrame (meaning that all WAL content has been
+** written back into the database) and if no readers are using the WAL
+** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then
+** the writer will first "reset" the WAL back to the beginning and start
+** writing new content beginning at frame 1.
+**
+** We assume that 32-bit loads are atomic and so no locks are needed in
+** order to read from any aReadMark[] entries.
+*/
+struct WalCkptInfo {
+  u32 nBackfill;                  /* Number of WAL frames backfilled into DB */
+  u32 aReadMark[WAL_NREADER];     /* Reader marks */
+};
+#define READMARK_NOT_USED  0xffffffff
+
+
+/* A block of WALINDEX_LOCK_RESERVED bytes beginning at
+** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems
+** only support mandatory file-locks, we do not read or write data
+** from the region of the file on which locks are applied.
+*/
+#define WALINDEX_LOCK_OFFSET   (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo))
+#define WALINDEX_LOCK_RESERVED 16
+#define WALINDEX_HDR_SIZE      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)
+
+/* Size of header before each frame in wal */
+#define WAL_FRAME_HDRSIZE 24
+
+/* Size of write ahead log header, including checksum. */
+/* #define WAL_HDRSIZE 24 */
+#define WAL_HDRSIZE 32
+
+/* WAL magic value. Either this value, or the same value with the least
+** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit
+** big-endian format in the first 4 bytes of a WAL file.
+**
+** If the LSB is set, then the checksums for each frame within the WAL
+** file are calculated by treating all data as an array of 32-bit 
+** big-endian words. Otherwise, they are calculated by interpreting 
+** all data as 32-bit little-endian words.
+*/
+#define WAL_MAGIC 0x377f0682
+
+/*
+** Return the offset of frame iFrame in the write-ahead log file, 
+** assuming a database page size of szPage bytes. The offset returned
+** is to the start of the write-ahead log frame-header.
+*/
+#define walFrameOffset(iFrame, szPage) (                               \
+  WAL_HDRSIZE + ((iFrame)-1)*(i64)((szPage)+WAL_FRAME_HDRSIZE)         \
+)
+
+/*
+** An open write-ahead log file is represented by an instance of the
+** following object.
+*/
+struct Wal {
+  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
+  sqlite3_file *pDbFd;       /* File handle for the database file */
+  sqlite3_file *pWalFd;      /* File handle for WAL file */
+  u32 iCallback;             /* Value to pass to log callback (or 0) */
+  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
+  int nWiData;               /* Size of array apWiData */
+  int szFirstBlock;          /* Size of first block written to WAL file */
+  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
+  u32 szPage;                /* Database page size */
+  i16 readLock;              /* Which read lock is being held.  -1 for none */
+  u8 syncFlags;              /* Flags to use to sync header writes */
+  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
+  u8 writeLock;              /* True if in a write transaction */
+  u8 ckptLock;               /* True if holding a checkpoint lock */
+  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
+  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
+  u8 syncHeader;             /* Fsync the WAL header if true */
+  u8 padToSectorBoundary;    /* Pad transactions out to the next sector */
+  WalIndexHdr hdr;           /* Wal-index header for current transaction */
+  const char *zWalName;      /* Name of WAL file */
+  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
+#ifdef SQLITE_DEBUG
+  u8 lockError;              /* True if a locking error has occurred */
+#endif
+};
+
+/*
+** Candidate values for Wal.exclusiveMode.
+*/
+#define WAL_NORMAL_MODE     0
+#define WAL_EXCLUSIVE_MODE  1     
+#define WAL_HEAPMEMORY_MODE 2
+
+/*
+** Possible values for WAL.readOnly
+*/
+#define WAL_RDWR        0    /* Normal read/write connection */
+#define WAL_RDONLY      1    /* The WAL file is readonly */
+#define WAL_SHM_RDONLY  2    /* The SHM file is readonly */
+
+/*
+** Each page of the wal-index mapping contains a hash-table made up of
+** an array of HASHTABLE_NSLOT elements of the following type.
+*/
+typedef u16 ht_slot;
+
+/*
+** This structure is used to implement an iterator that loops through
+** all frames in the WAL in database page order. Where two or more frames
+** correspond to the same database page, the iterator visits only the 
+** frame most recently written to the WAL (in other words, the frame with
+** the largest index).
+**
+** The internals of this structure are only accessed by:
+**
+**   walIteratorInit() - Create a new iterator,
+**   walIteratorNext() - Step an iterator,
+**   walIteratorFree() - Free an iterator.
+**
+** This functionality is used by the checkpoint code (see walCheckpoint()).
+*/
+struct WalIterator {
+  int iPrior;                     /* Last result returned from the iterator */
+  int nSegment;                   /* Number of entries in aSegment[] */
+  struct WalSegment {
+    int iNext;                    /* Next slot in aIndex[] not yet returned */
+    ht_slot *aIndex;              /* i0, i1, i2... such that aPgno[iN] ascend */
+    u32 *aPgno;                   /* Array of page numbers. */
+    int nEntry;                   /* Nr. of entries in aPgno[] and aIndex[] */
+    int iZero;                    /* Frame number associated with aPgno[0] */
+  } aSegment[1];                  /* One for every 32KB page in the wal-index */
+};
+
+/*
+** Define the parameters of the hash tables in the wal-index file. There
+** is a hash-table following every HASHTABLE_NPAGE page numbers in the
+** wal-index.
+**
+** Changing any of these constants will alter the wal-index format and
+** create incompatibilities.
+*/
+#define HASHTABLE_NPAGE      4096                 /* Must be power of 2 */
+#define HASHTABLE_HASH_1     383                  /* Should be prime */
+#define HASHTABLE_NSLOT      (HASHTABLE_NPAGE*2)  /* Must be a power of 2 */
+
+/* 
+** The block of page numbers associated with the first hash-table in a
+** wal-index is smaller than usual. This is so that there is a complete
+** hash-table on each aligned 32KB page of the wal-index.
+*/
+#define HASHTABLE_NPAGE_ONE  (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32)))
+
+/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */
+#define WALINDEX_PGSZ   (                                         \
+    sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
+)
+
+/*
+** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
+** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
+** numbered from zero.
+**
+** If this call is successful, *ppPage is set to point to the wal-index
+** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
+** then an SQLite error code is returned and *ppPage is set to 0.
+*/
+static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
+  int rc = SQLITE_OK;
+
+  /* Enlarge the pWal->apWiData[] array if required */
+  if( pWal->nWiData<=iPage ){
+    int nByte = sizeof(u32*)*(iPage+1);
+    volatile u32 **apNew;
+    apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte);
+    if( !apNew ){
+      *ppPage = 0;
+      return SQLITE_NOMEM;
+    }
+    memset((void*)&apNew[pWal->nWiData], 0,
+           sizeof(u32*)*(iPage+1-pWal->nWiData));
+    pWal->apWiData = apNew;
+    pWal->nWiData = iPage+1;
+  }
+
+  /* Request a pointer to the required page from the VFS */
+  if( pWal->apWiData[iPage]==0 ){
+    if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
+      pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
+      if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM;
+    }else{
+      rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, 
+          pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
+      );
+      if( rc==SQLITE_READONLY ){
+        pWal->readOnly |= WAL_SHM_RDONLY;
+        rc = SQLITE_OK;
+      }
+    }
+  }
+
+  *ppPage = pWal->apWiData[iPage];
+  assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
+  return rc;
+}
+
+/*
+** Return a pointer to the WalCkptInfo structure in the wal-index.
+*/
+static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
+  assert( pWal->nWiData>0 && pWal->apWiData[0] );
+  return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]);
+}
+
+/*
+** Return a pointer to the WalIndexHdr structure in the wal-index.
+*/
+static volatile WalIndexHdr *walIndexHdr(Wal *pWal){
+  assert( pWal->nWiData>0 && pWal->apWiData[0] );
+  return (volatile WalIndexHdr*)pWal->apWiData[0];
+}
+
+/*
+** The argument to this macro must be of type u32. On a little-endian
+** architecture, it returns the u32 value that results from interpreting
+** the 4 bytes as a big-endian value. On a big-endian architecture, it
+** returns the value that would be produced by interpreting the 4 bytes
+** of the input value as a little-endian integer.
+*/
+#define BYTESWAP32(x) ( \
+    (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8)  \
+  + (((x)&0x00FF0000)>>8)  + (((x)&0xFF000000)>>24) \
+)
+
+/*
+** Generate or extend an 8 byte checksum based on the data in 
+** array aByte[] and the initial values of aIn[0] and aIn[1] (or
+** initial values of 0 and 0 if aIn==NULL).
+**
+** The checksum is written back into aOut[] before returning.
+**
+** nByte must be a positive multiple of 8.
+*/
+static void walChecksumBytes(
+  int nativeCksum, /* True for native byte-order, false for non-native */
+  u8 *a,           /* Content to be checksummed */
+  int nByte,       /* Bytes of content in a[].  Must be a multiple of 8. */
+  const u32 *aIn,  /* Initial checksum value input */
+  u32 *aOut        /* OUT: Final checksum value output */
+){
+  u32 s1, s2;
+  u32 *aData = (u32 *)a;
+  u32 *aEnd = (u32 *)&a[nByte];
+
+  if( aIn ){
+    s1 = aIn[0];
+    s2 = aIn[1];
+  }else{
+    s1 = s2 = 0;
+  }
+
+  assert( nByte>=8 );
+  assert( (nByte&0x00000007)==0 );
+
+  if( nativeCksum ){
+    do {
+      s1 += *aData++ + s2;
+      s2 += *aData++ + s1;
+    }while( aData<aEnd );
+  }else{
+    do {
+      s1 += BYTESWAP32(aData[0]) + s2;
+      s2 += BYTESWAP32(aData[1]) + s1;
+      aData += 2;
+    }while( aData<aEnd );
+  }
+
+  aOut[0] = s1;
+  aOut[1] = s2;
+}
+
+static void walShmBarrier(Wal *pWal){
+  if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
+    sqlite3OsShmBarrier(pWal->pDbFd);
+  }
+}
+
+/*
+** Write the header information in pWal->hdr into the wal-index.
+**
+** The checksum on pWal->hdr is updated before it is written.
+*/
+static void walIndexWriteHdr(Wal *pWal){
+  volatile WalIndexHdr *aHdr = walIndexHdr(pWal);
+  const int nCksum = offsetof(WalIndexHdr, aCksum);
+
+  assert( pWal->writeLock );
+  pWal->hdr.isInit = 1;
+  pWal->hdr.iVersion = WALINDEX_MAX_VERSION;
+  walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum);
+  memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr));
+  walShmBarrier(pWal);
+  memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr));
+}
+
+/*
+** This function encodes a single frame header and writes it to a buffer
+** supplied by the caller. A frame-header is made up of a series of 
+** 4-byte big-endian integers, as follows:
+**
+**     0: Page number.
+**     4: For commit records, the size of the database image in pages 
+**        after the commit. For all other records, zero.
+**     8: Salt-1 (copied from the wal-header)
+**    12: Salt-2 (copied from the wal-header)
+**    16: Checksum-1.
+**    20: Checksum-2.
+*/
+static void walEncodeFrame(
+  Wal *pWal,                      /* The write-ahead log */
+  u32 iPage,                      /* Database page number for frame */
+  u32 nTruncate,                  /* New db size (or 0 for non-commit frames) */
+  u8 *aData,                      /* Pointer to page data */
+  u8 *aFrame                      /* OUT: Write encoded frame here */
+){
+  int nativeCksum;                /* True for native byte-order checksums */
+  u32 *aCksum = pWal->hdr.aFrameCksum;
+  assert( WAL_FRAME_HDRSIZE==24 );
+  sqlite3Put4byte(&aFrame[0], iPage);
+  sqlite3Put4byte(&aFrame[4], nTruncate);
+  memcpy(&aFrame[8], pWal->hdr.aSalt, 8);
+
+  nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
+  walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
+  walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
+
+  sqlite3Put4byte(&aFrame[16], aCksum[0]);
+  sqlite3Put4byte(&aFrame[20], aCksum[1]);
+}
+
+/*
+** Check to see if the frame with header in aFrame[] and content
+** in aData[] is valid.  If it is a valid frame, fill *piPage and
+** *pnTruncate and return true.  Return if the frame is not valid.
+*/
+static int walDecodeFrame(
+  Wal *pWal,                      /* The write-ahead log */
+  u32 *piPage,                    /* OUT: Database page number for frame */
+  u32 *pnTruncate,                /* OUT: New db size (or 0 if not commit) */
+  u8 *aData,                      /* Pointer to page data (for checksum) */
+  u8 *aFrame                      /* Frame data */
+){
+  int nativeCksum;                /* True for native byte-order checksums */
+  u32 *aCksum = pWal->hdr.aFrameCksum;
+  u32 pgno;                       /* Page number of the frame */
+  assert( WAL_FRAME_HDRSIZE==24 );
+
+  /* A frame is only valid if the salt values in the frame-header
+  ** match the salt values in the wal-header. 
+  */
+  if( memcmp(&pWal->hdr.aSalt, &aFrame[8], 8)!=0 ){
+    return 0;
+  }
+
+  /* A frame is only valid if the page number is creater than zero.
+  */
+  pgno = sqlite3Get4byte(&aFrame[0]);
+  if( pgno==0 ){
+    return 0;
+  }
+
+  /* A frame is only valid if a checksum of the WAL header,
+  ** all prior frams, the first 16 bytes of this frame-header, 
+  ** and the frame-data matches the checksum in the last 8 
+  ** bytes of this frame-header.
+  */
+  nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
+  walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
+  walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
+  if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) 
+   || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) 
+  ){
+    /* Checksum failed. */
+    return 0;
+  }
+
+  /* If we reach this point, the frame is valid.  Return the page number
+  ** and the new database size.
+  */
+  *piPage = pgno;
+  *pnTruncate = sqlite3Get4byte(&aFrame[4]);
+  return 1;
+}
+
+
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+/*
+** Names of locks.  This routine is used to provide debugging output and is not
+** a part of an ordinary build.
+*/
+static const char *walLockName(int lockIdx){
+  if( lockIdx==WAL_WRITE_LOCK ){
+    return "WRITE-LOCK";
+  }else if( lockIdx==WAL_CKPT_LOCK ){
+    return "CKPT-LOCK";
+  }else if( lockIdx==WAL_RECOVER_LOCK ){
+    return "RECOVER-LOCK";
+  }else{
+    static char zName[15];
+    sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]",
+                     lockIdx-WAL_READ_LOCK(0));
+    return zName;
+  }
+}
+#endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */
+    
+
+/*
+** Set or release locks on the WAL.  Locks are either shared or exclusive.
+** A lock cannot be moved directly between shared and exclusive - it must go
+** through the unlocked state first.
+**
+** In locking_mode=EXCLUSIVE, all of these routines become no-ops.
+*/
+static int walLockShared(Wal *pWal, int lockIdx){
+  int rc;
+  if( pWal->exclusiveMode ) return SQLITE_OK;
+  rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
+                        SQLITE_SHM_LOCK | SQLITE_SHM_SHARED);
+  WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal,
+            walLockName(lockIdx), rc ? "failed" : "ok"));
+  VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
+  return rc;
+}
+static void walUnlockShared(Wal *pWal, int lockIdx){
+  if( pWal->exclusiveMode ) return;
+  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
+                         SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
+  WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx)));
+}
+static int walLockExclusive(Wal *pWal, int lockIdx, int n){
+  int rc;
+  if( pWal->exclusiveMode ) return SQLITE_OK;
+  rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
+                        SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
+  WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
+            walLockName(lockIdx), n, rc ? "failed" : "ok"));
+  VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
+  return rc;
+}
+static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
+  if( pWal->exclusiveMode ) return;
+  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
+                         SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
+  WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
+             walLockName(lockIdx), n));
+}
+
+/*
+** Compute a hash on a page number.  The resulting hash value must land
+** between 0 and (HASHTABLE_NSLOT-1).  The walHashNext() function advances
+** the hash to the next value in the event of a collision.
+*/
+static int walHash(u32 iPage){
+  assert( iPage>0 );
+  assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
+  return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
+}
+static int walNextHash(int iPriorHash){
+  return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
+}
+
+/* 
+** Return pointers to the hash table and page number array stored on
+** page iHash of the wal-index. The wal-index is broken into 32KB pages
+** numbered starting from 0.
+**
+** Set output variable *paHash to point to the start of the hash table
+** in the wal-index file. Set *piZero to one less than the frame 
+** number of the first frame indexed by this hash table. If a
+** slot in the hash table is set to N, it refers to frame number 
+** (*piZero+N) in the log.
+**
+** Finally, set *paPgno so that *paPgno[1] is the page number of the
+** first frame indexed by the hash table, frame (*piZero+1).
+*/
+static int walHashGet(
+  Wal *pWal,                      /* WAL handle */
+  int iHash,                      /* Find the iHash'th table */
+  volatile ht_slot **paHash,      /* OUT: Pointer to hash index */
+  volatile u32 **paPgno,          /* OUT: Pointer to page number array */
+  u32 *piZero                     /* OUT: Frame associated with *paPgno[0] */
+){
+  int rc;                         /* Return code */
+  volatile u32 *aPgno;
+
+  rc = walIndexPage(pWal, iHash, &aPgno);
+  assert( rc==SQLITE_OK || iHash>0 );
+
+  if( rc==SQLITE_OK ){
+    u32 iZero;
+    volatile ht_slot *aHash;
+
+    aHash = (volatile ht_slot *)&aPgno[HASHTABLE_NPAGE];
+    if( iHash==0 ){
+      aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)];
+      iZero = 0;
+    }else{
+      iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
+    }
+  
+    *paPgno = &aPgno[-1];
+    *paHash = aHash;
+    *piZero = iZero;
+  }
+  return rc;
+}
+
+/*
+** Return the number of the wal-index page that contains the hash-table
+** and page-number array that contain entries corresponding to WAL frame
+** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages 
+** are numbered starting from 0.
+*/
+static int walFramePage(u32 iFrame){
+  int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
+  assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
+       && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
+       && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
+       && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
+       && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
+  );
+  return iHash;
+}
+
+/*
+** Return the page number associated with frame iFrame in this WAL.
+*/
+static u32 walFramePgno(Wal *pWal, u32 iFrame){
+  int iHash = walFramePage(iFrame);
+  if( iHash==0 ){
+    return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
+  }
+  return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
+}
+
+/*
+** Remove entries from the hash table that point to WAL slots greater
+** than pWal->hdr.mxFrame.
+**
+** This function is called whenever pWal->hdr.mxFrame is decreased due
+** to a rollback or savepoint.
+**
+** At most only the hash table containing pWal->hdr.mxFrame needs to be
+** updated.  Any later hash tables will be automatically cleared when
+** pWal->hdr.mxFrame advances to the point where those hash tables are
+** actually needed.
+*/
+static void walCleanupHash(Wal *pWal){
+  volatile ht_slot *aHash = 0;    /* Pointer to hash table to clear */
+  volatile u32 *aPgno = 0;        /* Page number array for hash table */
+  u32 iZero = 0;                  /* frame == (aHash[x]+iZero) */
+  int iLimit = 0;                 /* Zero values greater than this */
+  int nByte;                      /* Number of bytes to zero in aPgno[] */
+  int i;                          /* Used to iterate through aHash[] */
+
+  assert( pWal->writeLock );
+  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 );
+  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE );
+  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 );
+
+  if( pWal->hdr.mxFrame==0 ) return;
+
+  /* Obtain pointers to the hash-table and page-number array containing 
+  ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed
+  ** that the page said hash-table and array reside on is already mapped.
+  */
+  assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) );
+  assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] );
+  walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero);
+
+  /* Zero all hash-table entries that correspond to frame numbers greater
+  ** than pWal->hdr.mxFrame.
+  */
+  iLimit = pWal->hdr.mxFrame - iZero;
+  assert( iLimit>0 );
+  for(i=0; i<HASHTABLE_NSLOT; i++){
+    if( aHash[i]>iLimit ){
+      aHash[i] = 0;
+    }
+  }
+  
+  /* Zero the entries in the aPgno array that correspond to frames with
+  ** frame numbers greater than pWal->hdr.mxFrame. 
+  */
+  nByte = (int)((char *)aHash - (char *)&aPgno[iLimit+1]);
+  memset((void *)&aPgno[iLimit+1], 0, nByte);
+
+#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
+  /* Verify that the every entry in the mapping region is still reachable
+  ** via the hash table even after the cleanup.
+  */
+  if( iLimit ){
+    int i;           /* Loop counter */
+    int iKey;        /* Hash key */
+    for(i=1; i<=iLimit; i++){
+      for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){
+        if( aHash[iKey]==i ) break;
+      }
+      assert( aHash[iKey]==i );
+    }
+  }
+#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
+}
+
+
+/*
+** Set an entry in the wal-index that will map database page number
+** pPage into WAL frame iFrame.
+*/
+static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
+  int rc;                         /* Return code */
+  u32 iZero = 0;                  /* One less than frame number of aPgno[1] */
+  volatile u32 *aPgno = 0;        /* Page number array */
+  volatile ht_slot *aHash = 0;    /* Hash table */
+
+  rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero);
+
+  /* Assuming the wal-index file was successfully mapped, populate the
+  ** page number array and hash table entry.
+  */
+  if( rc==SQLITE_OK ){
+    int iKey;                     /* Hash table key */
+    int idx;                      /* Value to write to hash-table slot */
+    int nCollide;                 /* Number of hash collisions */
+
+    idx = iFrame - iZero;
+    assert( idx <= HASHTABLE_NSLOT/2 + 1 );
+    
+    /* If this is the first entry to be added to this hash-table, zero the
+    ** entire hash table and aPgno[] array before proceeding. 
+    */
+    if( idx==1 ){
+      int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]);
+      memset((void*)&aPgno[1], 0, nByte);
+    }
+
+    /* If the entry in aPgno[] is already set, then the previous writer
+    ** must have exited unexpectedly in the middle of a transaction (after
+    ** writing one or more dirty pages to the WAL to free up memory). 
+    ** Remove the remnants of that writers uncommitted transaction from 
+    ** the hash-table before writing any new entries.
+    */
+    if( aPgno[idx] ){
+      walCleanupHash(pWal);
+      assert( !aPgno[idx] );
+    }
+
+    /* Write the aPgno[] array entry and the hash-table slot. */
+    nCollide = idx;
+    for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){
+      if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT;
+    }
+    aPgno[idx] = iPage;
+    aHash[iKey] = (ht_slot)idx;
+
+#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
+    /* Verify that the number of entries in the hash table exactly equals
+    ** the number of entries in the mapping region.
+    */
+    {
+      int i;           /* Loop counter */
+      int nEntry = 0;  /* Number of entries in the hash table */
+      for(i=0; i<HASHTABLE_NSLOT; i++){ if( aHash[i] ) nEntry++; }
+      assert( nEntry==idx );
+    }
+
+    /* Verify that the every entry in the mapping region is reachable
+    ** via the hash table.  This turns out to be a really, really expensive
+    ** thing to check, so only do this occasionally - not on every
+    ** iteration.
+    */
+    if( (idx&0x3ff)==0 ){
+      int i;           /* Loop counter */
+      for(i=1; i<=idx; i++){
+        for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){
+          if( aHash[iKey]==i ) break;
+        }
+        assert( aHash[iKey]==i );
+      }
+    }
+#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
+  }
+
+
+  return rc;
+}
+
+
+/*
+** Recover the wal-index by reading the write-ahead log file. 
+**
+** This routine first tries to establish an exclusive lock on the
+** wal-index to prevent other threads/processes from doing anything
+** with the WAL or wal-index while recovery is running.  The
+** WAL_RECOVER_LOCK is also held so that other threads will know
+** that this thread is running recovery.  If unable to establish
+** the necessary locks, this routine returns SQLITE_BUSY.
+*/
+static int walIndexRecover(Wal *pWal){
+  int rc;                         /* Return Code */
+  i64 nSize;                      /* Size of log file */
+  u32 aFrameCksum[2] = {0, 0};
+  int iLock;                      /* Lock offset to lock for checkpoint */
+  int nLock;                      /* Number of locks to hold */
+
+  /* Obtain an exclusive lock on all byte in the locking range not already
+  ** locked by the caller. The caller is guaranteed to have locked the
+  ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte.
+  ** If successful, the same bytes that are locked here are unlocked before
+  ** this function returns.
+  */
+  assert( pWal->ckptLock==1 || pWal->ckptLock==0 );
+  assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 );
+  assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
+  assert( pWal->writeLock );
+  iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
+  nLock = SQLITE_SHM_NLOCK - iLock;
+  rc = walLockExclusive(pWal, iLock, nLock);
+  if( rc ){
+    return rc;
+  }
+  WALTRACE(("WAL%p: recovery begin...\n", pWal));
+
+  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
+
+  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
+  if( rc!=SQLITE_OK ){
+    goto recovery_error;
+  }
+
+  if( nSize>WAL_HDRSIZE ){
+    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
+    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
+    int szFrame;                  /* Number of bytes in buffer aFrame[] */
+    u8 *aData;                    /* Pointer to data part of aFrame buffer */
+    int iFrame;                   /* Index of last frame read */
+    i64 iOffset;                  /* Next offset to read from log file */
+    int szPage;                   /* Page size according to the log */
+    u32 magic;                    /* Magic value read from WAL header */
+    u32 version;                  /* Magic value read from WAL header */
+    int isValid;                  /* True if this frame is valid */
+
+    /* Read in the WAL header. */
+    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
+    if( rc!=SQLITE_OK ){
+      goto recovery_error;
+    }
+
+    /* If the database page size is not a power of two, or is greater than
+    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
+    ** data. Similarly, if the 'magic' value is invalid, ignore the whole
+    ** WAL file.
+    */
+    magic = sqlite3Get4byte(&aBuf[0]);
+    szPage = sqlite3Get4byte(&aBuf[8]);
+    if( (magic&0xFFFFFFFE)!=WAL_MAGIC 
+     || szPage&(szPage-1) 
+     || szPage>SQLITE_MAX_PAGE_SIZE 
+     || szPage<512 
+    ){
+      goto finished;
+    }
+    pWal->hdr.bigEndCksum = (u8)(magic&0x00000001);
+    pWal->szPage = szPage;
+    pWal->nCkpt = sqlite3Get4byte(&aBuf[12]);
+    memcpy(&pWal->hdr.aSalt, &aBuf[16], 8);
+
+    /* Verify that the WAL header checksum is correct */
+    walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, 
+        aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum
+    );
+    if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24])
+     || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28])
+    ){
+      goto finished;
+    }
+
+    /* Verify that the version number on the WAL format is one that
+    ** are able to understand */
+    version = sqlite3Get4byte(&aBuf[4]);
+    if( version!=WAL_MAX_VERSION ){
+      rc = SQLITE_CANTOPEN_BKPT;
+      goto finished;
+    }
+
+    /* Malloc a buffer to read frames into. */
+    szFrame = szPage + WAL_FRAME_HDRSIZE;
+    aFrame = (u8 *)sqlite3_malloc(szFrame);
+    if( !aFrame ){
+      rc = SQLITE_NOMEM;
+      goto recovery_error;
+    }
+    aData = &aFrame[WAL_FRAME_HDRSIZE];
+
+    /* Read all frames from the log file. */
+    iFrame = 0;
+    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
+      u32 pgno;                   /* Database page number for frame */
+      u32 nTruncate;              /* dbsize field from frame header */
+
+      /* Read and decode the next log frame. */
+      iFrame++;
+      rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
+      if( rc!=SQLITE_OK ) break;
+      isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
+      if( !isValid ) break;
+      rc = walIndexAppend(pWal, iFrame, pgno);
+      if( rc!=SQLITE_OK ) break;
+
+      /* If nTruncate is non-zero, this is a commit record. */
+      if( nTruncate ){
+        pWal->hdr.mxFrame = iFrame;
+        pWal->hdr.nPage = nTruncate;
+        pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
+        testcase( szPage<=32768 );
+        testcase( szPage>=65536 );
+        aFrameCksum[0] = pWal->hdr.aFrameCksum[0];
+        aFrameCksum[1] = pWal->hdr.aFrameCksum[1];
+      }
+    }
+
+    sqlite3_free(aFrame);
+  }
+
+finished:
+  if( rc==SQLITE_OK ){
+    volatile WalCkptInfo *pInfo;
+    int i;
+    pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
+    pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
+    walIndexWriteHdr(pWal);
+
+    /* Reset the checkpoint-header. This is safe because this thread is 
+    ** currently holding locks that exclude all other readers, writers and
+    ** checkpointers.
+    */
+    pInfo = walCkptInfo(pWal);
+    pInfo->nBackfill = 0;
+    pInfo->aReadMark[0] = 0;
+    for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
+    if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame;
+
+    /* If more than one frame was recovered from the log file, report an
+    ** event via sqlite3_log(). This is to help with identifying performance
+    ** problems caused by applications routinely shutting down without
+    ** checkpointing the log file.
+    */
+    if( pWal->hdr.nPage ){
+      sqlite3_log(SQLITE_NOTICE_RECOVER_WAL,
+          "recovered %d frames from WAL file %s",
+          pWal->hdr.mxFrame, pWal->zWalName
+      );
+    }
+  }
+
+recovery_error:
+  WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok"));
+  walUnlockExclusive(pWal, iLock, nLock);
+  return rc;
+}
+
+/*
+** Close an open wal-index.
+*/
+static void walIndexClose(Wal *pWal, int isDelete){
+  if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
+    int i;
+    for(i=0; i<pWal->nWiData; i++){
+      sqlite3_free((void *)pWal->apWiData[i]);
+      pWal->apWiData[i] = 0;
+    }
+  }else{
+    sqlite3OsShmUnmap(pWal->pDbFd, isDelete);
+  }
+}
+
+/* 
+** Open a connection to the WAL file zWalName. The database file must 
+** already be opened on connection pDbFd. The buffer that zWalName points
+** to must remain valid for the lifetime of the returned Wal* handle.
+**
+** A SHARED lock should be held on the database file when this function
+** is called. The purpose of this SHARED lock is to prevent any other
+** client from unlinking the WAL or wal-index file. If another process
+** were to do this just after this client opened one of these files, the
+** system would be badly broken.
+**
+** If the log file is successfully opened, SQLITE_OK is returned and 
+** *ppWal is set to point to a new WAL handle. If an error occurs,
+** an SQLite error code is returned and *ppWal is left unmodified.
+*/
+SQLITE_PRIVATE int sqlite3WalOpen(
+  sqlite3_vfs *pVfs,              /* vfs module to open wal and wal-index */
+  sqlite3_file *pDbFd,            /* The open database file */
+  const char *zWalName,           /* Name of the WAL file */
+  int bNoShm,                     /* True to run in heap-memory mode */
+  i64 mxWalSize,                  /* Truncate WAL to this size on reset */
+  Wal **ppWal                     /* OUT: Allocated Wal handle */
+){
+  int rc;                         /* Return Code */
+  Wal *pRet;                      /* Object to allocate and return */
+  int flags;                      /* Flags passed to OsOpen() */
+
+  assert( zWalName && zWalName[0] );
+  assert( pDbFd );
+
+  /* In the amalgamation, the os_unix.c and os_win.c source files come before
+  ** this source file.  Verify that the #defines of the locking byte offsets
+  ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value.
+  */
+#ifdef WIN_SHM_BASE
+  assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET );
+#endif
+#ifdef UNIX_SHM_BASE
+  assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET );
+#endif
+
+
+  /* Allocate an instance of struct Wal to return. */
+  *ppWal = 0;
+  pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile);
+  if( !pRet ){
+    return SQLITE_NOMEM;
+  }
+
+  pRet->pVfs = pVfs;
+  pRet->pWalFd = (sqlite3_file *)&pRet[1];
+  pRet->pDbFd = pDbFd;
+  pRet->readLock = -1;
+  pRet->mxWalSize = mxWalSize;
+  pRet->zWalName = zWalName;
+  pRet->syncHeader = 1;
+  pRet->padToSectorBoundary = 1;
+  pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE);
+
+  /* Open file handle on the write-ahead log file. */
+  flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
+  rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags);
+  if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){
+    pRet->readOnly = WAL_RDONLY;
+  }
+
+  if( rc!=SQLITE_OK ){
+    walIndexClose(pRet, 0);
+    sqlite3OsClose(pRet->pWalFd);
+    sqlite3_free(pRet);
+  }else{
+    int iDC = sqlite3OsDeviceCharacteristics(pDbFd);
+    if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; }
+    if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){
+      pRet->padToSectorBoundary = 0;
+    }
+    *ppWal = pRet;
+    WALTRACE(("WAL%d: opened\n", pRet));
+  }
+  return rc;
+}
+
+/*
+** Change the size to which the WAL file is trucated on each reset.
+*/
+SQLITE_PRIVATE void sqlite3WalLimit(Wal *pWal, i64 iLimit){
+  if( pWal ) pWal->mxWalSize = iLimit;
+}
+
+/*
+** Find the smallest page number out of all pages held in the WAL that
+** has not been returned by any prior invocation of this method on the
+** same WalIterator object.   Write into *piFrame the frame index where
+** that page was last written into the WAL.  Write into *piPage the page
+** number.
+**
+** Return 0 on success.  If there are no pages in the WAL with a page
+** number larger than *piPage, then return 1.
+*/
+static int walIteratorNext(
+  WalIterator *p,               /* Iterator */
+  u32 *piPage,                  /* OUT: The page number of the next page */
+  u32 *piFrame                  /* OUT: Wal frame index of next page */
+){
+  u32 iMin;                     /* Result pgno must be greater than iMin */
+  u32 iRet = 0xFFFFFFFF;        /* 0xffffffff is never a valid page number */
+  int i;                        /* For looping through segments */
+
+  iMin = p->iPrior;
+  assert( iMin<0xffffffff );
+  for(i=p->nSegment-1; i>=0; i--){
+    struct WalSegment *pSegment = &p->aSegment[i];
+    while( pSegment->iNext<pSegment->nEntry ){
+      u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]];
+      if( iPg>iMin ){
+        if( iPg<iRet ){
+          iRet = iPg;
+          *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext];
+        }
+        break;
+      }
+      pSegment->iNext++;
+    }
+  }
+
+  *piPage = p->iPrior = iRet;
+  return (iRet==0xFFFFFFFF);
+}
+
+/*
+** This function merges two sorted lists into a single sorted list.
+**
+** aLeft[] and aRight[] are arrays of indices.  The sort key is
+** aContent[aLeft[]] and aContent[aRight[]].  Upon entry, the following
+** is guaranteed for all J<K:
+**
+**        aContent[aLeft[J]] < aContent[aLeft[K]]
+**        aContent[aRight[J]] < aContent[aRight[K]]
+**
+** This routine overwrites aRight[] with a new (probably longer) sequence
+** of indices such that the aRight[] contains every index that appears in
+** either aLeft[] or the old aRight[] and such that the second condition
+** above is still met.
+**
+** The aContent[aLeft[X]] values will be unique for all X.  And the
+** aContent[aRight[X]] values will be unique too.  But there might be
+** one or more combinations of X and Y such that
+**
+**      aLeft[X]!=aRight[Y]  &&  aContent[aLeft[X]] == aContent[aRight[Y]]
+**
+** When that happens, omit the aLeft[X] and use the aRight[Y] index.
+*/
+static void walMerge(
+  const u32 *aContent,            /* Pages in wal - keys for the sort */
+  ht_slot *aLeft,                 /* IN: Left hand input list */
+  int nLeft,                      /* IN: Elements in array *paLeft */
+  ht_slot **paRight,              /* IN/OUT: Right hand input list */
+  int *pnRight,                   /* IN/OUT: Elements in *paRight */
+  ht_slot *aTmp                   /* Temporary buffer */
+){
+  int iLeft = 0;                  /* Current index in aLeft */
+  int iRight = 0;                 /* Current index in aRight */
+  int iOut = 0;                   /* Current index in output buffer */
+  int nRight = *pnRight;
+  ht_slot *aRight = *paRight;
+
+  assert( nLeft>0 && nRight>0 );
+  while( iRight<nRight || iLeft<nLeft ){
+    ht_slot logpage;
+    Pgno dbpage;
+
+    if( (iLeft<nLeft) 
+     && (iRight>=nRight || aContent[aLeft[iLeft]]<aContent[aRight[iRight]])
+    ){
+      logpage = aLeft[iLeft++];
+    }else{
+      logpage = aRight[iRight++];
+    }
+    dbpage = aContent[logpage];
+
+    aTmp[iOut++] = logpage;
+    if( iLeft<nLeft && aContent[aLeft[iLeft]]==dbpage ) iLeft++;
+
+    assert( iLeft>=nLeft || aContent[aLeft[iLeft]]>dbpage );
+    assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage );
+  }
+
+  *paRight = aLeft;
+  *pnRight = iOut;
+  memcpy(aLeft, aTmp, sizeof(aTmp[0])*iOut);
+}
+
+/*
+** Sort the elements in list aList using aContent[] as the sort key.
+** Remove elements with duplicate keys, preferring to keep the
+** larger aList[] values.
+**
+** The aList[] entries are indices into aContent[].  The values in
+** aList[] are to be sorted so that for all J<K:
+**
+**      aContent[aList[J]] < aContent[aList[K]]
+**
+** For any X and Y such that
+**
+**      aContent[aList[X]] == aContent[aList[Y]]
+**
+** Keep the larger of the two values aList[X] and aList[Y] and discard
+** the smaller.
+*/
+static void walMergesort(
+  const u32 *aContent,            /* Pages in wal */
+  ht_slot *aBuffer,               /* Buffer of at least *pnList items to use */
+  ht_slot *aList,                 /* IN/OUT: List to sort */
+  int *pnList                     /* IN/OUT: Number of elements in aList[] */
+){
+  struct Sublist {
+    int nList;                    /* Number of elements in aList */
+    ht_slot *aList;               /* Pointer to sub-list content */
+  };
+
+  const int nList = *pnList;      /* Size of input list */
+  int nMerge = 0;                 /* Number of elements in list aMerge */
+  ht_slot *aMerge = 0;            /* List to be merged */
+  int iList;                      /* Index into input list */
+  int iSub = 0;                   /* Index into aSub array */
+  struct Sublist aSub[13];        /* Array of sub-lists */
+
+  memset(aSub, 0, sizeof(aSub));
+  assert( nList<=HASHTABLE_NPAGE && nList>0 );
+  assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) );
+
+  for(iList=0; iList<nList; iList++){
+    nMerge = 1;
+    aMerge = &aList[iList];
+    for(iSub=0; iList & (1<<iSub); iSub++){
+      struct Sublist *p = &aSub[iSub];
+      assert( p->aList && p->nList<=(1<<iSub) );
+      assert( p->aList==&aList[iList&~((2<<iSub)-1)] );
+      walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer);
+    }
+    aSub[iSub].aList = aMerge;
+    aSub[iSub].nList = nMerge;
+  }
+
+  for(iSub++; iSub<ArraySize(aSub); iSub++){
+    if( nList & (1<<iSub) ){
+      struct Sublist *p = &aSub[iSub];
+      assert( p->nList<=(1<<iSub) );
+      assert( p->aList==&aList[nList&~((2<<iSub)-1)] );
+      walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer);
+    }
+  }
+  assert( aMerge==aList );
+  *pnList = nMerge;
+
+#ifdef SQLITE_DEBUG
+  {
+    int i;
+    for(i=1; i<*pnList; i++){
+      assert( aContent[aList[i]] > aContent[aList[i-1]] );
+    }
+  }
+#endif
+}
+
+/* 
+** Free an iterator allocated by walIteratorInit().
+*/
+static void walIteratorFree(WalIterator *p){
+  sqlite3_free(p);
+}
+
+/*
+** Construct a WalInterator object that can be used to loop over all 
+** pages in the WAL in ascending order. The caller must hold the checkpoint
+** lock.
+**
+** On success, make *pp point to the newly allocated WalInterator object
+** return SQLITE_OK. Otherwise, return an error code. If this routine
+** returns an error, the value of *pp is undefined.
+**
+** The calling routine should invoke walIteratorFree() to destroy the
+** WalIterator object when it has finished with it.
+*/
+static int walIteratorInit(Wal *pWal, WalIterator **pp){
+  WalIterator *p;                 /* Return value */
+  int nSegment;                   /* Number of segments to merge */
+  u32 iLast;                      /* Last frame in log */
+  int nByte;                      /* Number of bytes to allocate */
+  int i;                          /* Iterator variable */
+  ht_slot *aTmp;                  /* Temp space used by merge-sort */
+  int rc = SQLITE_OK;             /* Return Code */
+
+  /* This routine only runs while holding the checkpoint lock. And
+  ** it only runs if there is actually content in the log (mxFrame>0).
+  */
+  assert( pWal->ckptLock && pWal->hdr.mxFrame>0 );
+  iLast = pWal->hdr.mxFrame;
+
+  /* Allocate space for the WalIterator object. */
+  nSegment = walFramePage(iLast) + 1;
+  nByte = sizeof(WalIterator) 
+        + (nSegment-1)*sizeof(struct WalSegment)
+        + iLast*sizeof(ht_slot);
+  p = (WalIterator *)sqlite3_malloc(nByte);
+  if( !p ){
+    return SQLITE_NOMEM;
+  }
+  memset(p, 0, nByte);
+  p->nSegment = nSegment;
+
+  /* Allocate temporary space used by the merge-sort routine. This block
+  ** of memory will be freed before this function returns.
+  */
+  aTmp = (ht_slot *)sqlite3_malloc(
+      sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast)
+  );
+  if( !aTmp ){
+    rc = SQLITE_NOMEM;
+  }
+
+  for(i=0; rc==SQLITE_OK && i<nSegment; i++){
+    volatile ht_slot *aHash;
+    u32 iZero;
+    volatile u32 *aPgno;
+
+    rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero);
+    if( rc==SQLITE_OK ){
+      int j;                      /* Counter variable */
+      int nEntry;                 /* Number of entries in this segment */
+      ht_slot *aIndex;            /* Sorted index for this segment */
+
+      aPgno++;
+      if( (i+1)==nSegment ){
+        nEntry = (int)(iLast - iZero);
+      }else{
+        nEntry = (int)((u32*)aHash - (u32*)aPgno);
+      }
+      aIndex = &((ht_slot *)&p->aSegment[p->nSegment])[iZero];
+      iZero++;
+  
+      for(j=0; j<nEntry; j++){
+        aIndex[j] = (ht_slot)j;
+      }
+      walMergesort((u32 *)aPgno, aTmp, aIndex, &nEntry);
+      p->aSegment[i].iZero = iZero;
+      p->aSegment[i].nEntry = nEntry;
+      p->aSegment[i].aIndex = aIndex;
+      p->aSegment[i].aPgno = (u32 *)aPgno;
+    }
+  }
+  sqlite3_free(aTmp);
+
+  if( rc!=SQLITE_OK ){
+    walIteratorFree(p);
+  }
+  *pp = p;
+  return rc;
+}
+
+/*
+** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and
+** n. If the attempt fails and parameter xBusy is not NULL, then it is a
+** busy-handler function. Invoke it and retry the lock until either the
+** lock is successfully obtained or the busy-handler returns 0.
+*/
+static int walBusyLock(
+  Wal *pWal,                      /* WAL connection */
+  int (*xBusy)(void*),            /* Function to call when busy */
+  void *pBusyArg,                 /* Context argument for xBusyHandler */
+  int lockIdx,                    /* Offset of first byte to lock */
+  int n                           /* Number of bytes to lock */
+){
+  int rc;
+  do {
+    rc = walLockExclusive(pWal, lockIdx, n);
+  }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) );
+  return rc;
+}
+
+/*
+** The cache of the wal-index header must be valid to call this function.
+** Return the page-size in bytes used by the database.
+*/
+static int walPagesize(Wal *pWal){
+  return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
+}
+
+/*
+** The following is guaranteed when this function is called:
+**
+**   a) the WRITER lock is held,
+**   b) the entire log file has been checkpointed, and
+**   c) any existing readers are reading exclusively from the database
+**      file - there are no readers that may attempt to read a frame from
+**      the log file.
+**
+** This function updates the shared-memory structures so that the next
+** client to write to the database (which may be this one) does so by
+** writing frames into the start of the log file.
+**
+** The value of parameter salt1 is used as the aSalt[1] value in the 
+** new wal-index header. It should be passed a pseudo-random value (i.e. 
+** one obtained from sqlite3_randomness()).
+*/
+static void walRestartHdr(Wal *pWal, u32 salt1){
+  volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
+  int i;                          /* Loop counter */
+  u32 *aSalt = pWal->hdr.aSalt;   /* Big-endian salt values */
+  pWal->nCkpt++;
+  pWal->hdr.mxFrame = 0;
+  sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0]));
+  memcpy(&pWal->hdr.aSalt[1], &salt1, 4);
+  walIndexWriteHdr(pWal);
+  pInfo->nBackfill = 0;
+  pInfo->aReadMark[1] = 0;
+  for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
+  assert( pInfo->aReadMark[0]==0 );
+}
+
+/*
+** Copy as much content as we can from the WAL back into the database file
+** in response to an sqlite3_wal_checkpoint() request or the equivalent.
+**
+** The amount of information copies from WAL to database might be limited
+** by active readers.  This routine will never overwrite a database page
+** that a concurrent reader might be using.
+**
+** All I/O barrier operations (a.k.a fsyncs) occur in this routine when
+** SQLite is in WAL-mode in synchronous=NORMAL.  That means that if 
+** checkpoints are always run by a background thread or background 
+** process, foreground threads will never block on a lengthy fsync call.
+**
+** Fsync is called on the WAL before writing content out of the WAL and
+** into the database.  This ensures that if the new content is persistent
+** in the WAL and can be recovered following a power-loss or hard reset.
+**
+** Fsync is also called on the database file if (and only if) the entire
+** WAL content is copied into the database file.  This second fsync makes
+** it safe to delete the WAL since the new content will persist in the
+** database file.
+**
+** This routine uses and updates the nBackfill field of the wal-index header.
+** This is the only routine that will increase the value of nBackfill.  
+** (A WAL reset or recovery will revert nBackfill to zero, but not increase
+** its value.)
+**
+** The caller must be holding sufficient locks to ensure that no other
+** checkpoint is running (in any other thread or process) at the same
+** time.
+*/
+static int walCheckpoint(
+  Wal *pWal,                      /* Wal connection */
+  int eMode,                      /* One of PASSIVE, FULL or RESTART */
+  int (*xBusy)(void*),            /* Function to call when busy */
+  void *pBusyArg,                 /* Context argument for xBusyHandler */
+  int sync_flags,                 /* Flags for OsSync() (or 0) */
+  u8 *zBuf                        /* Temporary buffer to use */
+){
+  int rc;                         /* Return code */
+  int szPage;                     /* Database page-size */
+  WalIterator *pIter = 0;         /* Wal iterator context */
+  u32 iDbpage = 0;                /* Next database page to write */
+  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */
+  u32 mxSafeFrame;                /* Max frame that can be backfilled */
+  u32 mxPage;                     /* Max database page to write */
+  int i;                          /* Loop counter */
+  volatile WalCkptInfo *pInfo;    /* The checkpoint status information */
+
+  szPage = walPagesize(pWal);
+  testcase( szPage<=32768 );
+  testcase( szPage>=65536 );
+  pInfo = walCkptInfo(pWal);
+  if( pInfo->nBackfill>=pWal->hdr.mxFrame ) return SQLITE_OK;
+
+  /* Allocate the iterator */
+  rc = walIteratorInit(pWal, &pIter);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+  assert( pIter );
+
+  /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked
+  ** in the SQLITE_CHECKPOINT_PASSIVE mode. */
+  assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );
+
+  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
+  ** safe to write into the database.  Frames beyond mxSafeFrame might
+  ** overwrite database pages that are in use by active readers and thus
+  ** cannot be backfilled from the WAL.
+  */
+  mxSafeFrame = pWal->hdr.mxFrame;
+  mxPage = pWal->hdr.nPage;
+  for(i=1; i<WAL_NREADER; i++){
+    u32 y = pInfo->aReadMark[i];
+    if( mxSafeFrame>y ){
+      assert( y<=pWal->hdr.mxFrame );
+      rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
+      if( rc==SQLITE_OK ){
+        pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
+        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
+      }else if( rc==SQLITE_BUSY ){
+        mxSafeFrame = y;
+        xBusy = 0;
+      }else{
+        goto walcheckpoint_out;
+      }
+    }
+  }
+
+  if( pInfo->nBackfill<mxSafeFrame
+   && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))==SQLITE_OK
+  ){
+    i64 nSize;                    /* Current size of database file */
+    u32 nBackfill = pInfo->nBackfill;
+
+    /* Sync the WAL to disk */
+    if( sync_flags ){
+      rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
+    }
+
+    /* If the database may grow as a result of this checkpoint, hint
+    ** about the eventual size of the db file to the VFS layer.
+    */
+    if( rc==SQLITE_OK ){
+      i64 nReq = ((i64)mxPage * szPage);
+      rc = sqlite3OsFileSize(pWal->pDbFd, &nSize);
+      if( rc==SQLITE_OK && nSize<nReq ){
+        sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq);
+      }
+    }
+
+
+    /* Iterate through the contents of the WAL, copying data to the db file. */
+    while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
+      i64 iOffset;
+      assert( walFramePgno(pWal, iFrame)==iDbpage );
+      if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue;
+      iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE;
+      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */
+      rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset);
+      if( rc!=SQLITE_OK ) break;
+      iOffset = (iDbpage-1)*(i64)szPage;
+      testcase( IS_BIG_INT(iOffset) );
+      rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset);
+      if( rc!=SQLITE_OK ) break;
+    }
+
+    /* If work was actually accomplished... */
+    if( rc==SQLITE_OK ){
+      if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
+        i64 szDb = pWal->hdr.nPage*(i64)szPage;
+        testcase( IS_BIG_INT(szDb) );
+        rc = sqlite3OsTruncate(pWal->pDbFd, szDb);
+        if( rc==SQLITE_OK && sync_flags ){
+          rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
+        }
+      }
+      if( rc==SQLITE_OK ){
+        pInfo->nBackfill = mxSafeFrame;
+      }
+    }
+
+    /* Release the reader lock held while backfilling */
+    walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
+  }
+
+  if( rc==SQLITE_BUSY ){
+    /* Reset the return code so as not to report a checkpoint failure
+    ** just because there are active readers.  */
+    rc = SQLITE_OK;
+  }
+
+  /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the
+  ** entire wal file has been copied into the database file, then block 
+  ** until all readers have finished using the wal file. This ensures that 
+  ** the next process to write to the database restarts the wal file.
+  */
+  if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
+    assert( pWal->writeLock );
+    if( pInfo->nBackfill<pWal->hdr.mxFrame ){
+      rc = SQLITE_BUSY;
+    }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){
+      u32 salt1;
+      sqlite3_randomness(4, &salt1);
+      assert( mxSafeFrame==pWal->hdr.mxFrame );
+      rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1);
+      if( rc==SQLITE_OK ){
+        if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){
+          /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as
+          ** SQLITE_CHECKPOINT_RESTART with the addition that it also
+          ** truncates the log file to zero bytes just prior to a
+          ** successful return.
+          **
+          ** In theory, it might be safe to do this without updating the
+          ** wal-index header in shared memory, as all subsequent reader or
+          ** writer clients should see that the entire log file has been
+          ** checkpointed and behave accordingly. This seems unsafe though,
+          ** as it would leave the system in a state where the contents of
+          ** the wal-index header do not match the contents of the 
+          ** file-system. To avoid this, update the wal-index header to
+          ** indicate that the log file contains zero valid frames.  */
+          walRestartHdr(pWal, salt1);
+          rc = sqlite3OsTruncate(pWal->pWalFd, 0);
+        }
+        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
+      }
+    }
+  }
+
+ walcheckpoint_out:
+  walIteratorFree(pIter);
+  return rc;
+}
+
+/*
+** If the WAL file is currently larger than nMax bytes in size, truncate
+** it to exactly nMax bytes. If an error occurs while doing so, ignore it.
+*/
+static void walLimitSize(Wal *pWal, i64 nMax){
+  i64 sz;
+  int rx;
+  sqlite3BeginBenignMalloc();
+  rx = sqlite3OsFileSize(pWal->pWalFd, &sz);
+  if( rx==SQLITE_OK && (sz > nMax ) ){
+    rx = sqlite3OsTruncate(pWal->pWalFd, nMax);
+  }
+  sqlite3EndBenignMalloc();
+  if( rx ){
+    sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName);
+  }
+}
+
+/*
+** Close a connection to a log file.
+*/
+SQLITE_PRIVATE int sqlite3WalClose(
+  Wal *pWal,                      /* Wal to close */
+  int sync_flags,                 /* Flags to pass to OsSync() (or 0) */
+  int nBuf,
+  u8 *zBuf                        /* Buffer of at least nBuf bytes */
+){
+  int rc = SQLITE_OK;
+  if( pWal ){
+    int isDelete = 0;             /* True to unlink wal and wal-index files */
+
+    /* If an EXCLUSIVE lock can be obtained on the database file (using the
+    ** ordinary, rollback-mode locking methods, this guarantees that the
+    ** connection associated with this log file is the only connection to
+    ** the database. In this case checkpoint the database and unlink both
+    ** the wal and wal-index files.
+    **
+    ** The EXCLUSIVE lock is not released before returning.
+    */
+    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
+    if( rc==SQLITE_OK ){
+      if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
+        pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
+      }
+      rc = sqlite3WalCheckpoint(
+          pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
+      );
+      if( rc==SQLITE_OK ){
+        int bPersist = -1;
+        sqlite3OsFileControlHint(
+            pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist
+        );
+        if( bPersist!=1 ){
+          /* Try to delete the WAL file if the checkpoint completed and
+          ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal
+          ** mode (!bPersist) */
+          isDelete = 1;
+        }else if( pWal->mxWalSize>=0 ){
+          /* Try to truncate the WAL file to zero bytes if the checkpoint
+          ** completed and fsynced (rc==SQLITE_OK) and we are in persistent
+          ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a
+          ** non-negative value (pWal->mxWalSize>=0).  Note that we truncate
+          ** to zero bytes as truncating to the journal_size_limit might
+          ** leave a corrupt WAL file on disk. */
+          walLimitSize(pWal, 0);
+        }
+      }
+    }
+
+    walIndexClose(pWal, isDelete);
+    sqlite3OsClose(pWal->pWalFd);
+    if( isDelete ){
+      sqlite3BeginBenignMalloc();
+      sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
+      sqlite3EndBenignMalloc();
+    }
+    WALTRACE(("WAL%p: closed\n", pWal));
+    sqlite3_free((void *)pWal->apWiData);
+    sqlite3_free(pWal);
+  }
+  return rc;
+}
+
+/*
+** Try to read the wal-index header.  Return 0 on success and 1 if
+** there is a problem.
+**
+** The wal-index is in shared memory.  Another thread or process might
+** be writing the header at the same time this procedure is trying to
+** read it, which might result in inconsistency.  A dirty read is detected
+** by verifying that both copies of the header are the same and also by
+** a checksum on the header.
+**
+** If and only if the read is consistent and the header is different from
+** pWal->hdr, then pWal->hdr is updated to the content of the new header
+** and *pChanged is set to 1.
+**
+** If the checksum cannot be verified return non-zero. If the header
+** is read successfully and the checksum verified, return zero.
+*/
+static int walIndexTryHdr(Wal *pWal, int *pChanged){
+  u32 aCksum[2];                  /* Checksum on the header content */
+  WalIndexHdr h1, h2;             /* Two copies of the header content */
+  WalIndexHdr volatile *aHdr;     /* Header in shared memory */
+
+  /* The first page of the wal-index must be mapped at this point. */
+  assert( pWal->nWiData>0 && pWal->apWiData[0] );
+
+  /* Read the header. This might happen concurrently with a write to the
+  ** same area of shared memory on a different CPU in a SMP,
+  ** meaning it is possible that an inconsistent snapshot is read
+  ** from the file. If this happens, return non-zero.
+  **
+  ** There are two copies of the header at the beginning of the wal-index.
+  ** When reading, read [0] first then [1].  Writes are in the reverse order.
+  ** Memory barriers are used to prevent the compiler or the hardware from
+  ** reordering the reads and writes.
+  */
+  aHdr = walIndexHdr(pWal);
+  memcpy(&h1, (void *)&aHdr[0], sizeof(h1));
+  walShmBarrier(pWal);
+  memcpy(&h2, (void *)&aHdr[1], sizeof(h2));
+
+  if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
+    return 1;   /* Dirty read */
+  }  
+  if( h1.isInit==0 ){
+    return 1;   /* Malformed header - probably all zeros */
+  }
+  walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum);
+  if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){
+    return 1;   /* Checksum does not match */
+  }
+
+  if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){
+    *pChanged = 1;
+    memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr));
+    pWal->szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
+    testcase( pWal->szPage<=32768 );
+    testcase( pWal->szPage>=65536 );
+  }
+
+  /* The header was successfully read. Return zero. */
+  return 0;
+}
+
+/*
+** Read the wal-index header from the wal-index and into pWal->hdr.
+** If the wal-header appears to be corrupt, try to reconstruct the
+** wal-index from the WAL before returning.
+**
+** Set *pChanged to 1 if the wal-index header value in pWal->hdr is
+** changed by this operation.  If pWal->hdr is unchanged, set *pChanged
+** to 0.
+**
+** If the wal-index header is successfully read, return SQLITE_OK. 
+** Otherwise an SQLite error code.
+*/
+static int walIndexReadHdr(Wal *pWal, int *pChanged){
+  int rc;                         /* Return code */
+  int badHdr;                     /* True if a header read failed */
+  volatile u32 *page0;            /* Chunk of wal-index containing header */
+
+  /* Ensure that page 0 of the wal-index (the page that contains the 
+  ** wal-index header) is mapped. Return early if an error occurs here.
+  */
+  assert( pChanged );
+  rc = walIndexPage(pWal, 0, &page0);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  };
+  assert( page0 || pWal->writeLock==0 );
+
+  /* If the first page of the wal-index has been mapped, try to read the
+  ** wal-index header immediately, without holding any lock. This usually
+  ** works, but may fail if the wal-index header is corrupt or currently 
+  ** being modified by another thread or process.
+  */
+  badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);
+
+  /* If the first attempt failed, it might have been due to a race
+  ** with a writer.  So get a WRITE lock and try again.
+  */
+  assert( badHdr==0 || pWal->writeLock==0 );
+  if( badHdr ){
+    if( pWal->readOnly & WAL_SHM_RDONLY ){
+      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
+        walUnlockShared(pWal, WAL_WRITE_LOCK);
+        rc = SQLITE_READONLY_RECOVERY;
+      }
+    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
+      pWal->writeLock = 1;
+      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
+        badHdr = walIndexTryHdr(pWal, pChanged);
+        if( badHdr ){
+          /* If the wal-index header is still malformed even while holding
+          ** a WRITE lock, it can only mean that the header is corrupted and
+          ** needs to be reconstructed.  So run recovery to do exactly that.
+          */
+          rc = walIndexRecover(pWal);
+          *pChanged = 1;
+        }
+      }
+      pWal->writeLock = 0;
+      walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
+    }
+  }
+
+  /* If the header is read successfully, check the version number to make
+  ** sure the wal-index was not constructed with some future format that
+  ** this version of SQLite cannot understand.
+  */
+  if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
+    rc = SQLITE_CANTOPEN_BKPT;
+  }
+
+  return rc;
+}
+
+/*
+** This is the value that walTryBeginRead returns when it needs to
+** be retried.
+*/
+#define WAL_RETRY  (-1)
+
+/*
+** Attempt to start a read transaction.  This might fail due to a race or
+** other transient condition.  When that happens, it returns WAL_RETRY to
+** indicate to the caller that it is safe to retry immediately.
+**
+** On success return SQLITE_OK.  On a permanent failure (such an
+** I/O error or an SQLITE_BUSY because another process is running
+** recovery) return a positive error code.
+**
+** The useWal parameter is true to force the use of the WAL and disable
+** the case where the WAL is bypassed because it has been completely
+** checkpointed.  If useWal==0 then this routine calls walIndexReadHdr() 
+** to make a copy of the wal-index header into pWal->hdr.  If the 
+** wal-index header has changed, *pChanged is set to 1 (as an indication 
+** to the caller that the local paget cache is obsolete and needs to be 
+** flushed.)  When useWal==1, the wal-index header is assumed to already
+** be loaded and the pChanged parameter is unused.
+**
+** The caller must set the cnt parameter to the number of prior calls to
+** this routine during the current read attempt that returned WAL_RETRY.
+** This routine will start taking more aggressive measures to clear the
+** race conditions after multiple WAL_RETRY returns, and after an excessive
+** number of errors will ultimately return SQLITE_PROTOCOL.  The
+** SQLITE_PROTOCOL return indicates that some other process has gone rogue
+** and is not honoring the locking protocol.  There is a vanishingly small
+** chance that SQLITE_PROTOCOL could be returned because of a run of really
+** bad luck when there is lots of contention for the wal-index, but that
+** possibility is so small that it can be safely neglected, we believe.
+**
+** On success, this routine obtains a read lock on 
+** WAL_READ_LOCK(pWal->readLock).  The pWal->readLock integer is
+** in the range 0 <= pWal->readLock < WAL_NREADER.  If pWal->readLock==(-1)
+** that means the Wal does not hold any read lock.  The reader must not
+** access any database page that is modified by a WAL frame up to and
+** including frame number aReadMark[pWal->readLock].  The reader will
+** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0
+** Or if pWal->readLock==0, then the reader will ignore the WAL
+** completely and get all content directly from the database file.
+** If the useWal parameter is 1 then the WAL will never be ignored and
+** this routine will always set pWal->readLock>0 on success.
+** When the read transaction is completed, the caller must release the
+** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1.
+**
+** This routine uses the nBackfill and aReadMark[] fields of the header
+** to select a particular WAL_READ_LOCK() that strives to let the
+** checkpoint process do as much work as possible.  This routine might
+** update values of the aReadMark[] array in the header, but if it does
+** so it takes care to hold an exclusive lock on the corresponding
+** WAL_READ_LOCK() while changing values.
+*/
+static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
+  volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
+  u32 mxReadMark;                 /* Largest aReadMark[] value */
+  int mxI;                        /* Index of largest aReadMark[] value */
+  int i;                          /* Loop counter */
+  int rc = SQLITE_OK;             /* Return code  */
+
+  assert( pWal->readLock<0 );     /* Not currently locked */
+
+  /* Take steps to avoid spinning forever if there is a protocol error.
+  **
+  ** Circumstances that cause a RETRY should only last for the briefest
+  ** instances of time.  No I/O or other system calls are done while the
+  ** locks are held, so the locks should not be held for very long. But 
+  ** if we are unlucky, another process that is holding a lock might get
+  ** paged out or take a page-fault that is time-consuming to resolve, 
+  ** during the few nanoseconds that it is holding the lock.  In that case,
+  ** it might take longer than normal for the lock to free.
+  **
+  ** After 5 RETRYs, we begin calling sqlite3OsSleep().  The first few
+  ** calls to sqlite3OsSleep() have a delay of 1 microsecond.  Really this
+  ** is more of a scheduler yield than an actual delay.  But on the 10th
+  ** an subsequent retries, the delays start becoming longer and longer, 
+  ** so that on the 100th (and last) RETRY we delay for 323 milliseconds.
+  ** The total delay time before giving up is less than 10 seconds.
+  */
+  if( cnt>5 ){
+    int nDelay = 1;                      /* Pause time in microseconds */
+    if( cnt>100 ){
+      VVA_ONLY( pWal->lockError = 1; )
+      return SQLITE_PROTOCOL;
+    }
+    if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39;
+    sqlite3OsSleep(pWal->pVfs, nDelay);
+  }
+
+  if( !useWal ){
+    rc = walIndexReadHdr(pWal, pChanged);
+    if( rc==SQLITE_BUSY ){
+      /* If there is not a recovery running in another thread or process
+      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
+      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
+      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
+      ** would be technically correct.  But the race is benign since with
+      ** WAL_RETRY this routine will be called again and will probably be
+      ** right on the second iteration.
+      */
+      if( pWal->apWiData[0]==0 ){
+        /* This branch is taken when the xShmMap() method returns SQLITE_BUSY.
+        ** We assume this is a transient condition, so return WAL_RETRY. The
+        ** xShmMap() implementation used by the default unix and win32 VFS 
+        ** modules may return SQLITE_BUSY due to a race condition in the 
+        ** code that determines whether or not the shared-memory region 
+        ** must be zeroed before the requested page is returned.
+        */
+        rc = WAL_RETRY;
+      }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){
+        walUnlockShared(pWal, WAL_RECOVER_LOCK);
+        rc = WAL_RETRY;
+      }else if( rc==SQLITE_BUSY ){
+        rc = SQLITE_BUSY_RECOVERY;
+      }
+    }
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+  }
+
+  pInfo = walCkptInfo(pWal);
+  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
+    /* The WAL has been completely backfilled (or it is empty).
+    ** and can be safely ignored.
+    */
+    rc = walLockShared(pWal, WAL_READ_LOCK(0));
+    walShmBarrier(pWal);
+    if( rc==SQLITE_OK ){
+      if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
+        /* It is not safe to allow the reader to continue here if frames
+        ** may have been appended to the log before READ_LOCK(0) was obtained.
+        ** When holding READ_LOCK(0), the reader ignores the entire log file,
+        ** which implies that the database file contains a trustworthy
+        ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from
+        ** happening, this is usually correct.
+        **
+        ** However, if frames have been appended to the log (or if the log 
+        ** is wrapped and written for that matter) before the READ_LOCK(0)
+        ** is obtained, that is not necessarily true. A checkpointer may
+        ** have started to backfill the appended frames but crashed before
+        ** it finished. Leaving a corrupt image in the database file.
+        */
+        walUnlockShared(pWal, WAL_READ_LOCK(0));
+        return WAL_RETRY;
+      }
+      pWal->readLock = 0;
+      return SQLITE_OK;
+    }else if( rc!=SQLITE_BUSY ){
+      return rc;
+    }
+  }
+
+  /* If we get this far, it means that the reader will want to use
+  ** the WAL to get at content from recent commits.  The job now is
+  ** to select one of the aReadMark[] entries that is closest to
+  ** but not exceeding pWal->hdr.mxFrame and lock that entry.
+  */
+  mxReadMark = 0;
+  mxI = 0;
+  for(i=1; i<WAL_NREADER; i++){
+    u32 thisMark = pInfo->aReadMark[i];
+    if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){
+      assert( thisMark!=READMARK_NOT_USED );
+      mxReadMark = thisMark;
+      mxI = i;
+    }
+  }
+  /* There was once an "if" here. The extra "{" is to preserve indentation. */
+  {
+    if( (pWal->readOnly & WAL_SHM_RDONLY)==0
+     && (mxReadMark<pWal->hdr.mxFrame || mxI==0)
+    ){
+      for(i=1; i<WAL_NREADER; i++){
+        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
+        if( rc==SQLITE_OK ){
+          mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame;
+          mxI = i;
+          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
+          break;
+        }else if( rc!=SQLITE_BUSY ){
+          return rc;
+        }
+      }
+    }
+    if( mxI==0 ){
+      assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
+      return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
+    }
+
+    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
+    if( rc ){
+      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
+    }
+    /* Now that the read-lock has been obtained, check that neither the
+    ** value in the aReadMark[] array or the contents of the wal-index
+    ** header have changed.
+    **
+    ** It is necessary to check that the wal-index header did not change
+    ** between the time it was read and when the shared-lock was obtained
+    ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility
+    ** that the log file may have been wrapped by a writer, or that frames
+    ** that occur later in the log than pWal->hdr.mxFrame may have been
+    ** copied into the database by a checkpointer. If either of these things
+    ** happened, then reading the database with the current value of
+    ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
+    ** instead.
+    **
+    ** This does not guarantee that the copy of the wal-index header is up to
+    ** date before proceeding. That would not be possible without somehow
+    ** blocking writers. It only guarantees that a dangerous checkpoint or 
+    ** log-wrap (either of which would require an exclusive lock on
+    ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid.
+    */
+    walShmBarrier(pWal);
+    if( pInfo->aReadMark[mxI]!=mxReadMark
+     || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
+    ){
+      walUnlockShared(pWal, WAL_READ_LOCK(mxI));
+      return WAL_RETRY;
+    }else{
+      assert( mxReadMark<=pWal->hdr.mxFrame );
+      pWal->readLock = (i16)mxI;
+    }
+  }
+  return rc;
+}
+
+/*
+** Begin a read transaction on the database.
+**
+** This routine used to be called sqlite3OpenSnapshot() and with good reason:
+** it takes a snapshot of the state of the WAL and wal-index for the current
+** instant in time.  The current thread will continue to use this snapshot.
+** Other threads might append new content to the WAL and wal-index but
+** that extra content is ignored by the current thread.
+**
+** If the database contents have changes since the previous read
+** transaction, then *pChanged is set to 1 before returning.  The
+** Pager layer will use this to know that is cache is stale and
+** needs to be flushed.
+*/
+SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
+  int rc;                         /* Return code */
+  int cnt = 0;                    /* Number of TryBeginRead attempts */
+
+  do{
+    rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
+  }while( rc==WAL_RETRY );
+  testcase( (rc&0xff)==SQLITE_BUSY );
+  testcase( (rc&0xff)==SQLITE_IOERR );
+  testcase( rc==SQLITE_PROTOCOL );
+  testcase( rc==SQLITE_OK );
+  return rc;
+}
+
+/*
+** Finish with a read transaction.  All this does is release the
+** read-lock.
+*/
+SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){
+  sqlite3WalEndWriteTransaction(pWal);
+  if( pWal->readLock>=0 ){
+    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
+    pWal->readLock = -1;
+  }
+}
+
+/*
+** Search the wal file for page pgno. If found, set *piRead to the frame that
+** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
+** to zero.
+**
+** Return SQLITE_OK if successful, or an error code if an error occurs. If an
+** error does occur, the final value of *piRead is undefined.
+*/
+SQLITE_PRIVATE int sqlite3WalFindFrame(
+  Wal *pWal,                      /* WAL handle */
+  Pgno pgno,                      /* Database page number to read data for */
+  u32 *piRead                     /* OUT: Frame number (or zero) */
+){
+  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
+  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
+  int iHash;                      /* Used to loop through N hash tables */
+
+  /* This routine is only be called from within a read transaction. */
+  assert( pWal->readLock>=0 || pWal->lockError );
+
+  /* If the "last page" field of the wal-index header snapshot is 0, then
+  ** no data will be read from the wal under any circumstances. Return early
+  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
+  ** then the WAL is ignored by the reader so return early, as if the 
+  ** WAL were empty.
+  */
+  if( iLast==0 || pWal->readLock==0 ){
+    *piRead = 0;
+    return SQLITE_OK;
+  }
+
+  /* Search the hash table or tables for an entry matching page number
+  ** pgno. Each iteration of the following for() loop searches one
+  ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
+  **
+  ** This code might run concurrently to the code in walIndexAppend()
+  ** that adds entries to the wal-index (and possibly to this hash 
+  ** table). This means the value just read from the hash 
+  ** slot (aHash[iKey]) may have been added before or after the 
+  ** current read transaction was opened. Values added after the
+  ** read transaction was opened may have been written incorrectly -
+  ** i.e. these slots may contain garbage data. However, we assume
+  ** that any slots written before the current read transaction was
+  ** opened remain unmodified.
+  **
+  ** For the reasons above, the if(...) condition featured in the inner
+  ** loop of the following block is more stringent that would be required 
+  ** if we had exclusive access to the hash-table:
+  **
+  **   (aPgno[iFrame]==pgno): 
+  **     This condition filters out normal hash-table collisions.
+  **
+  **   (iFrame<=iLast): 
+  **     This condition filters out entries that were added to the hash
+  **     table after the current read-transaction had started.
+  */
+  for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
+    volatile ht_slot *aHash;      /* Pointer to hash table */
+    volatile u32 *aPgno;          /* Pointer to array of page numbers */
+    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
+    int iKey;                     /* Hash slot index */
+    int nCollide;                 /* Number of hash collisions remaining */
+    int rc;                       /* Error code */
+
+    rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+    nCollide = HASHTABLE_NSLOT;
+    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
+      u32 iFrame = aHash[iKey] + iZero;
+      if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){
+        assert( iFrame>iRead || CORRUPT_DB );
+        iRead = iFrame;
+      }
+      if( (nCollide--)==0 ){
+        return SQLITE_CORRUPT_BKPT;
+      }
+    }
+  }
+
+#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
+  /* If expensive assert() statements are available, do a linear search
+  ** of the wal-index file content. Make sure the results agree with the
+  ** result obtained using the hash indexes above.  */
+  {
+    u32 iRead2 = 0;
+    u32 iTest;
+    for(iTest=iLast; iTest>0; iTest--){
+      if( walFramePgno(pWal, iTest)==pgno ){
+        iRead2 = iTest;
+        break;
+      }
+    }
+    assert( iRead==iRead2 );
+  }
+#endif
+
+  *piRead = iRead;
+  return SQLITE_OK;
+}
+
+/*
+** Read the contents of frame iRead from the wal file into buffer pOut
+** (which is nOut bytes in size). Return SQLITE_OK if successful, or an
+** error code otherwise.
+*/
+SQLITE_PRIVATE int sqlite3WalReadFrame(
+  Wal *pWal,                      /* WAL handle */
+  u32 iRead,                      /* Frame to read */
+  int nOut,                       /* Size of buffer pOut in bytes */
+  u8 *pOut                        /* Buffer to write page data to */
+){
+  int sz;
+  i64 iOffset;
+  sz = pWal->hdr.szPage;
+  sz = (sz&0xfe00) + ((sz&0x0001)<<16);
+  testcase( sz<=32768 );
+  testcase( sz>=65536 );
+  iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
+  /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
+  return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
+}
+
+/* 
+** Return the size of the database in pages (or zero, if unknown).
+*/
+SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal){
+  if( pWal && ALWAYS(pWal->readLock>=0) ){
+    return pWal->hdr.nPage;
+  }
+  return 0;
+}
+
+
+/* 
+** This function starts a write transaction on the WAL.
+**
+** A read transaction must have already been started by a prior call
+** to sqlite3WalBeginReadTransaction().
+**
+** If another thread or process has written into the database since
+** the read transaction was started, then it is not possible for this
+** thread to write as doing so would cause a fork.  So this routine
+** returns SQLITE_BUSY in that case and no write transaction is started.
+**
+** There can only be a single writer active at a time.
+*/
+SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal){
+  int rc;
+
+  /* Cannot start a write transaction without first holding a read
+  ** transaction. */
+  assert( pWal->readLock>=0 );
+
+  if( pWal->readOnly ){
+    return SQLITE_READONLY;
+  }
+
+  /* Only one writer allowed at a time.  Get the write lock.  Return
+  ** SQLITE_BUSY if unable.
+  */
+  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
+  if( rc ){
+    return rc;
+  }
+  pWal->writeLock = 1;
+
+  /* If another connection has written to the database file since the
+  ** time the read transaction on this connection was started, then
+  ** the write is disallowed.
+  */
+  if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
+    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
+    pWal->writeLock = 0;
+    rc = SQLITE_BUSY_SNAPSHOT;
+  }
+
+  return rc;
+}
+
+/*
+** End a write transaction.  The commit has already been done.  This
+** routine merely releases the lock.
+*/
+SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal){
+  if( pWal->writeLock ){
+    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
+    pWal->writeLock = 0;
+    pWal->truncateOnCommit = 0;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** If any data has been written (but not committed) to the log file, this
+** function moves the write-pointer back to the start of the transaction.
+**
+** Additionally, the callback function is invoked for each frame written
+** to the WAL since the start of the transaction. If the callback returns
+** other than SQLITE_OK, it is not invoked again and the error code is
+** returned to the caller.
+**
+** Otherwise, if the callback function does not return an error, this
+** function returns SQLITE_OK.
+*/
+SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
+  int rc = SQLITE_OK;
+  if( ALWAYS(pWal->writeLock) ){
+    Pgno iMax = pWal->hdr.mxFrame;
+    Pgno iFrame;
+  
+    /* Restore the clients cache of the wal-index header to the state it
+    ** was in before the client began writing to the database. 
+    */
+    memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
+
+    for(iFrame=pWal->hdr.mxFrame+1; 
+        ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; 
+        iFrame++
+    ){
+      /* This call cannot fail. Unless the page for which the page number
+      ** is passed as the second argument is (a) in the cache and 
+      ** (b) has an outstanding reference, then xUndo is either a no-op
+      ** (if (a) is false) or simply expels the page from the cache (if (b)
+      ** is false).
+      **
+      ** If the upper layer is doing a rollback, it is guaranteed that there
+      ** are no outstanding references to any page other than page 1. And
+      ** page 1 is never written to the log until the transaction is
+      ** committed. As a result, the call to xUndo may not fail.
+      */
+      assert( walFramePgno(pWal, iFrame)!=1 );
+      rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
+    }
+    if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal);
+  }
+  return rc;
+}
+
+/* 
+** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 
+** values. This function populates the array with values required to 
+** "rollback" the write position of the WAL handle back to the current 
+** point in the event of a savepoint rollback (via WalSavepointUndo()).
+*/
+SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
+  assert( pWal->writeLock );
+  aWalData[0] = pWal->hdr.mxFrame;
+  aWalData[1] = pWal->hdr.aFrameCksum[0];
+  aWalData[2] = pWal->hdr.aFrameCksum[1];
+  aWalData[3] = pWal->nCkpt;
+}
+
+/* 
+** Move the write position of the WAL back to the point identified by
+** the values in the aWalData[] array. aWalData must point to an array
+** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated
+** by a call to WalSavepoint().
+*/
+SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
+  int rc = SQLITE_OK;
+
+  assert( pWal->writeLock );
+  assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame );
+
+  if( aWalData[3]!=pWal->nCkpt ){
+    /* This savepoint was opened immediately after the write-transaction
+    ** was started. Right after that, the writer decided to wrap around
+    ** to the start of the log. Update the savepoint values to match.
+    */
+    aWalData[0] = 0;
+    aWalData[3] = pWal->nCkpt;
+  }
+
+  if( aWalData[0]<pWal->hdr.mxFrame ){
+    pWal->hdr.mxFrame = aWalData[0];
+    pWal->hdr.aFrameCksum[0] = aWalData[1];
+    pWal->hdr.aFrameCksum[1] = aWalData[2];
+    walCleanupHash(pWal);
+  }
+
+  return rc;
+}
+
+/*
+** This function is called just before writing a set of frames to the log
+** file (see sqlite3WalFrames()). It checks to see if, instead of appending
+** to the current log file, it is possible to overwrite the start of the
+** existing log file with the new frames (i.e. "reset" the log). If so,
+** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left
+** unchanged.
+**
+** SQLITE_OK is returned if no error is encountered (regardless of whether
+** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned
+** if an error occurs.
+*/
+static int walRestartLog(Wal *pWal){
+  int rc = SQLITE_OK;
+  int cnt;
+
+  if( pWal->readLock==0 ){
+    volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
+    assert( pInfo->nBackfill==pWal->hdr.mxFrame );
+    if( pInfo->nBackfill>0 ){
+      u32 salt1;
+      sqlite3_randomness(4, &salt1);
+      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
+      if( rc==SQLITE_OK ){
+        /* If all readers are using WAL_READ_LOCK(0) (in other words if no
+        ** readers are currently using the WAL), then the transactions
+        ** frames will overwrite the start of the existing log. Update the
+        ** wal-index header to reflect this.
+        **
+        ** In theory it would be Ok to update the cache of the header only
+        ** at this point. But updating the actual wal-index header is also
+        ** safe and means there is no special case for sqlite3WalUndo()
+        ** to handle if this transaction is rolled back.  */
+        walRestartHdr(pWal, salt1);
+        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
+      }else if( rc!=SQLITE_BUSY ){
+        return rc;
+      }
+    }
+    walUnlockShared(pWal, WAL_READ_LOCK(0));
+    pWal->readLock = -1;
+    cnt = 0;
+    do{
+      int notUsed;
+      rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
+    }while( rc==WAL_RETRY );
+    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
+    testcase( (rc&0xff)==SQLITE_IOERR );
+    testcase( rc==SQLITE_PROTOCOL );
+    testcase( rc==SQLITE_OK );
+  }
+  return rc;
+}
+
+/*
+** Information about the current state of the WAL file and where
+** the next fsync should occur - passed from sqlite3WalFrames() into
+** walWriteToLog().
+*/
+typedef struct WalWriter {
+  Wal *pWal;                   /* The complete WAL information */
+  sqlite3_file *pFd;           /* The WAL file to which we write */
+  sqlite3_int64 iSyncPoint;    /* Fsync at this offset */
+  int syncFlags;               /* Flags for the fsync */
+  int szPage;                  /* Size of one page */
+} WalWriter;
+
+/*
+** Write iAmt bytes of content into the WAL file beginning at iOffset.
+** Do a sync when crossing the p->iSyncPoint boundary.
+**
+** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt,
+** first write the part before iSyncPoint, then sync, then write the
+** rest.
+*/
+static int walWriteToLog(
+  WalWriter *p,              /* WAL to write to */
+  void *pContent,            /* Content to be written */
+  int iAmt,                  /* Number of bytes to write */
+  sqlite3_int64 iOffset      /* Start writing at this offset */
+){
+  int rc;
+  if( iOffset<p->iSyncPoint && iOffset+iAmt>=p->iSyncPoint ){
+    int iFirstAmt = (int)(p->iSyncPoint - iOffset);
+    rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset);
+    if( rc ) return rc;
+    iOffset += iFirstAmt;
+    iAmt -= iFirstAmt;
+    pContent = (void*)(iFirstAmt + (char*)pContent);
+    assert( p->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) );
+    rc = sqlite3OsSync(p->pFd, p->syncFlags & SQLITE_SYNC_MASK);
+    if( iAmt==0 || rc ) return rc;
+  }
+  rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset);
+  return rc;
+}
+
+/*
+** Write out a single frame of the WAL
+*/
+static int walWriteOneFrame(
+  WalWriter *p,               /* Where to write the frame */
+  PgHdr *pPage,               /* The page of the frame to be written */
+  int nTruncate,              /* The commit flag.  Usually 0.  >0 for commit */
+  sqlite3_int64 iOffset       /* Byte offset at which to write */
+){
+  int rc;                         /* Result code from subfunctions */
+  void *pData;                    /* Data actually written */
+  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
+#if defined(SQLITE_HAS_CODEC)
+  if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM;
+#else
+  pData = pPage->pData;
+#endif
+  walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame);
+  rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset);
+  if( rc ) return rc;
+  /* Write the page data */
+  rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame));
+  return rc;
+}
+
+/* 
+** Write a set of frames to the log. The caller must hold the write-lock
+** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
+*/
+SQLITE_PRIVATE int sqlite3WalFrames(
+  Wal *pWal,                      /* Wal handle to write to */
+  int szPage,                     /* Database page-size in bytes */
+  PgHdr *pList,                   /* List of dirty pages to write */
+  Pgno nTruncate,                 /* Database size after this commit */
+  int isCommit,                   /* True if this is a commit */
+  int sync_flags                  /* Flags to pass to OsSync() (or 0) */
+){
+  int rc;                         /* Used to catch return codes */
+  u32 iFrame;                     /* Next frame address */
+  PgHdr *p;                       /* Iterator to run through pList with. */
+  PgHdr *pLast = 0;               /* Last frame in list */
+  int nExtra = 0;                 /* Number of extra copies of last page */
+  int szFrame;                    /* The size of a single frame */
+  i64 iOffset;                    /* Next byte to write in WAL file */
+  WalWriter w;                    /* The writer */
+
+  assert( pList );
+  assert( pWal->writeLock );
+
+  /* If this frame set completes a transaction, then nTruncate>0.  If
+  ** nTruncate==0 then this frame set does not complete the transaction. */
+  assert( (isCommit!=0)==(nTruncate!=0) );
+
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
+    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
+              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
+  }
+#endif
+
+  /* See if it is possible to write these frames into the start of the
+  ** log file, instead of appending to it at pWal->hdr.mxFrame.
+  */
+  if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
+    return rc;
+  }
+
+  /* If this is the first frame written into the log, write the WAL
+  ** header to the start of the WAL file. See comments at the top of
+  ** this source file for a description of the WAL header format.
+  */
+  iFrame = pWal->hdr.mxFrame;
+  if( iFrame==0 ){
+    u8 aWalHdr[WAL_HDRSIZE];      /* Buffer to assemble wal-header in */
+    u32 aCksum[2];                /* Checksum for wal-header */
+
+    sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
+    sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION);
+    sqlite3Put4byte(&aWalHdr[8], szPage);
+    sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
+    if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt);
+    memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8);
+    walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum);
+    sqlite3Put4byte(&aWalHdr[24], aCksum[0]);
+    sqlite3Put4byte(&aWalHdr[28], aCksum[1]);
+    
+    pWal->szPage = szPage;
+    pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN;
+    pWal->hdr.aFrameCksum[0] = aCksum[0];
+    pWal->hdr.aFrameCksum[1] = aCksum[1];
+    pWal->truncateOnCommit = 1;
+
+    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
+    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+
+    /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless
+    ** all syncing is turned off by PRAGMA synchronous=OFF).  Otherwise
+    ** an out-of-order write following a WAL restart could result in
+    ** database corruption.  See the ticket:
+    **
+    **     http://localhost:591/sqlite/info/ff5be73dee
+    */
+    if( pWal->syncHeader && sync_flags ){
+      rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK);
+      if( rc ) return rc;
+    }
+  }
+  assert( (int)pWal->szPage==szPage );
+
+  /* Setup information needed to write frames into the WAL */
+  w.pWal = pWal;
+  w.pFd = pWal->pWalFd;
+  w.iSyncPoint = 0;
+  w.syncFlags = sync_flags;
+  w.szPage = szPage;
+  iOffset = walFrameOffset(iFrame+1, szPage);
+  szFrame = szPage + WAL_FRAME_HDRSIZE;
+
+  /* Write all frames into the log file exactly once */
+  for(p=pList; p; p=p->pDirty){
+    int nDbSize;   /* 0 normally.  Positive == commit flag */
+    iFrame++;
+    assert( iOffset==walFrameOffset(iFrame, szPage) );
+    nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0;
+    rc = walWriteOneFrame(&w, p, nDbSize, iOffset);
+    if( rc ) return rc;
+    pLast = p;
+    iOffset += szFrame;
+  }
+
+  /* If this is the end of a transaction, then we might need to pad
+  ** the transaction and/or sync the WAL file.
+  **
+  ** Padding and syncing only occur if this set of frames complete a
+  ** transaction and if PRAGMA synchronous=FULL.  If synchronous==NORMAL
+  ** or synchronous==OFF, then no padding or syncing are needed.
+  **
+  ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not
+  ** needed and only the sync is done.  If padding is needed, then the
+  ** final frame is repeated (with its commit mark) until the next sector
+  ** boundary is crossed.  Only the part of the WAL prior to the last
+  ** sector boundary is synced; the part of the last frame that extends
+  ** past the sector boundary is written after the sync.
+  */
+  if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){
+    if( pWal->padToSectorBoundary ){
+      int sectorSize = sqlite3SectorSize(pWal->pWalFd);
+      w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize;
+      while( iOffset<w.iSyncPoint ){
+        rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset);
+        if( rc ) return rc;
+        iOffset += szFrame;
+        nExtra++;
+      }
+    }else{
+      rc = sqlite3OsSync(w.pFd, sync_flags & SQLITE_SYNC_MASK);
+    }
+  }
+
+  /* If this frame set completes the first transaction in the WAL and
+  ** if PRAGMA journal_size_limit is set, then truncate the WAL to the
+  ** journal size limit, if possible.
+  */
+  if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){
+    i64 sz = pWal->mxWalSize;
+    if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){
+      sz = walFrameOffset(iFrame+nExtra+1, szPage);
+    }
+    walLimitSize(pWal, sz);
+    pWal->truncateOnCommit = 0;
+  }
+
+  /* Append data to the wal-index. It is not necessary to lock the 
+  ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index
+  ** guarantees that there are no other writers, and no data that may
+  ** be in use by existing readers is being overwritten.
+  */
+  iFrame = pWal->hdr.mxFrame;
+  for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){
+    iFrame++;
+    rc = walIndexAppend(pWal, iFrame, p->pgno);
+  }
+  while( rc==SQLITE_OK && nExtra>0 ){
+    iFrame++;
+    nExtra--;
+    rc = walIndexAppend(pWal, iFrame, pLast->pgno);
+  }
+
+  if( rc==SQLITE_OK ){
+    /* Update the private copy of the header. */
+    pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
+    testcase( szPage<=32768 );
+    testcase( szPage>=65536 );
+    pWal->hdr.mxFrame = iFrame;
+    if( isCommit ){
+      pWal->hdr.iChange++;
+      pWal->hdr.nPage = nTruncate;
+    }
+    /* If this is a commit, update the wal-index header too. */
+    if( isCommit ){
+      walIndexWriteHdr(pWal);
+      pWal->iCallback = iFrame;
+    }
+  }
+
+  WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok"));
+  return rc;
+}
+
+/* 
+** This routine is called to implement sqlite3_wal_checkpoint() and
+** related interfaces.
+**
+** Obtain a CHECKPOINT lock and then backfill as much information as
+** we can from WAL into the database.
+**
+** If parameter xBusy is not NULL, it is a pointer to a busy-handler
+** callback. In this case this function runs a blocking checkpoint.
+*/
+SQLITE_PRIVATE int sqlite3WalCheckpoint(
+  Wal *pWal,                      /* Wal connection */
+  int eMode,                      /* PASSIVE, FULL, RESTART, or TRUNCATE */
+  int (*xBusy)(void*),            /* Function to call when busy */
+  void *pBusyArg,                 /* Context argument for xBusyHandler */
+  int sync_flags,                 /* Flags to sync db file with (or 0) */
+  int nBuf,                       /* Size of temporary buffer */
+  u8 *zBuf,                       /* Temporary buffer to use */
+  int *pnLog,                     /* OUT: Number of frames in WAL */
+  int *pnCkpt                     /* OUT: Number of backfilled frames in WAL */
+){
+  int rc;                         /* Return code */
+  int isChanged = 0;              /* True if a new wal-index header is loaded */
+  int eMode2 = eMode;             /* Mode to pass to walCheckpoint() */
+  int (*xBusy2)(void*) = xBusy;   /* Busy handler for eMode2 */
+
+  assert( pWal->ckptLock==0 );
+  assert( pWal->writeLock==0 );
+
+  /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked
+  ** in the SQLITE_CHECKPOINT_PASSIVE mode. */
+  assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );
+
+  if( pWal->readOnly ) return SQLITE_READONLY;
+  WALTRACE(("WAL%p: checkpoint begins\n", pWal));
+
+  /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive 
+  ** "checkpoint" lock on the database file. */
+  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
+  if( rc ){
+    /* EVIDENCE-OF: R-10421-19736 If any other process is running a
+    ** checkpoint operation at the same time, the lock cannot be obtained and
+    ** SQLITE_BUSY is returned.
+    ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured,
+    ** it will not be invoked in this case.
+    */
+    testcase( rc==SQLITE_BUSY );
+    testcase( xBusy!=0 );
+    return rc;
+  }
+  pWal->ckptLock = 1;
+
+  /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and
+  ** TRUNCATE modes also obtain the exclusive "writer" lock on the database
+  ** file.
+  **
+  ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained
+  ** immediately, and a busy-handler is configured, it is invoked and the
+  ** writer lock retried until either the busy-handler returns 0 or the
+  ** lock is successfully obtained.
+  */
+  if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){
+    rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1);
+    if( rc==SQLITE_OK ){
+      pWal->writeLock = 1;
+    }else if( rc==SQLITE_BUSY ){
+      eMode2 = SQLITE_CHECKPOINT_PASSIVE;
+      xBusy2 = 0;
+      rc = SQLITE_OK;
+    }
+  }
+
+  /* Read the wal-index header. */
+  if( rc==SQLITE_OK ){
+    rc = walIndexReadHdr(pWal, &isChanged);
+    if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
+      sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
+    }
+  }
+
+  /* Copy data from the log to the database file. */
+  if( rc==SQLITE_OK ){
+    if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){
+      rc = SQLITE_CORRUPT_BKPT;
+    }else{
+      rc = walCheckpoint(pWal, eMode2, xBusy2, pBusyArg, sync_flags, zBuf);
+    }
+
+    /* If no error occurred, set the output variables. */
+    if( rc==SQLITE_OK || rc==SQLITE_BUSY ){
+      if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame;
+      if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill);
+    }
+  }
+
+  if( isChanged ){
+    /* If a new wal-index header was loaded before the checkpoint was 
+    ** performed, then the pager-cache associated with pWal is now
+    ** out of date. So zero the cached wal-index header to ensure that
+    ** next time the pager opens a snapshot on this database it knows that
+    ** the cache needs to be reset.
+    */
+    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
+  }
+
+  /* Release the locks. */
+  sqlite3WalEndWriteTransaction(pWal);
+  walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
+  pWal->ckptLock = 0;
+  WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));
+  return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc);
+}
+
+/* Return the value to pass to a sqlite3_wal_hook callback, the
+** number of frames in the WAL at the point of the last commit since
+** sqlite3WalCallback() was called.  If no commits have occurred since
+** the last call, then return 0.
+*/
+SQLITE_PRIVATE int sqlite3WalCallback(Wal *pWal){
+  u32 ret = 0;
+  if( pWal ){
+    ret = pWal->iCallback;
+    pWal->iCallback = 0;
+  }
+  return (int)ret;
+}
+
+/*
+** This function is called to change the WAL subsystem into or out
+** of locking_mode=EXCLUSIVE.
+**
+** If op is zero, then attempt to change from locking_mode=EXCLUSIVE
+** into locking_mode=NORMAL.  This means that we must acquire a lock
+** on the pWal->readLock byte.  If the WAL is already in locking_mode=NORMAL
+** or if the acquisition of the lock fails, then return 0.  If the
+** transition out of exclusive-mode is successful, return 1.  This
+** operation must occur while the pager is still holding the exclusive
+** lock on the main database file.
+**
+** If op is one, then change from locking_mode=NORMAL into 
+** locking_mode=EXCLUSIVE.  This means that the pWal->readLock must
+** be released.  Return 1 if the transition is made and 0 if the
+** WAL is already in exclusive-locking mode - meaning that this
+** routine is a no-op.  The pager must already hold the exclusive lock
+** on the main database file before invoking this operation.
+**
+** If op is negative, then do a dry-run of the op==1 case but do
+** not actually change anything. The pager uses this to see if it
+** should acquire the database exclusive lock prior to invoking
+** the op==1 case.
+*/
+SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op){
+  int rc;
+  assert( pWal->writeLock==0 );
+  assert( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE || op==-1 );
+
+  /* pWal->readLock is usually set, but might be -1 if there was a 
+  ** prior error while attempting to acquire are read-lock. This cannot 
+  ** happen if the connection is actually in exclusive mode (as no xShmLock
+  ** locks are taken in this case). Nor should the pager attempt to
+  ** upgrade to exclusive-mode following such an error.
+  */
+  assert( pWal->readLock>=0 || pWal->lockError );
+  assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) );
+
+  if( op==0 ){
+    if( pWal->exclusiveMode ){
+      pWal->exclusiveMode = 0;
+      if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){
+        pWal->exclusiveMode = 1;
+      }
+      rc = pWal->exclusiveMode==0;
+    }else{
+      /* Already in locking_mode=NORMAL */
+      rc = 0;
+    }
+  }else if( op>0 ){
+    assert( pWal->exclusiveMode==0 );
+    assert( pWal->readLock>=0 );
+    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
+    pWal->exclusiveMode = 1;
+    rc = 1;
+  }else{
+    rc = pWal->exclusiveMode==0;
+  }
+  return rc;
+}
+
+/* 
+** Return true if the argument is non-NULL and the WAL module is using
+** heap-memory for the wal-index. Otherwise, if the argument is NULL or the
+** WAL module is using shared-memory, return false. 
+*/
+SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal){
+  return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE );
+}
+
+#ifdef SQLITE_ENABLE_ZIPVFS
+/*
+** If the argument is not NULL, it points to a Wal object that holds a
+** read-lock. This function returns the database page-size if it is known,
+** or zero if it is not (or if pWal is NULL).
+*/
+SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal){
+  assert( pWal==0 || pWal->readLock>=0 );
+  return (pWal ? pWal->szPage : 0);
+}
+#endif
+
+#endif /* #ifndef SQLITE_OMIT_WAL */
+
+/************** End of wal.c *************************************************/
+/************** Begin file btmutex.c *****************************************/
+/*
+** 2007 August 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains code used to implement mutexes on Btree objects.
+** This code really belongs in btree.c.  But btree.c is getting too
+** big and we want to break it down some.  This packaged seemed like
+** a good breakout.
+*/
+/************** Include btreeInt.h in the middle of btmutex.c ****************/
+/************** Begin file btreeInt.h ****************************************/
+/*
+** 2004 April 6
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file implements an external (disk-based) database using BTrees.
+** For a detailed discussion of BTrees, refer to
+**
+**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
+**     "Sorting And Searching", pages 473-480. Addison-Wesley
+**     Publishing Company, Reading, Massachusetts.
+**
+** The basic idea is that each page of the file contains N database
+** entries and N+1 pointers to subpages.
+**
+**   ----------------------------------------------------------------
+**   |  Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N-1) | Ptr(N) |
+**   ----------------------------------------------------------------
+**
+** All of the keys on the page that Ptr(0) points to have values less
+** than Key(0).  All of the keys on page Ptr(1) and its subpages have
+** values greater than Key(0) and less than Key(1).  All of the keys
+** on Ptr(N) and its subpages have values greater than Key(N-1).  And
+** so forth.
+**
+** Finding a particular key requires reading O(log(M)) pages from the 
+** disk where M is the number of entries in the tree.
+**
+** In this implementation, a single file can hold one or more separate 
+** BTrees.  Each BTree is identified by the index of its root page.  The
+** key and data for any entry are combined to form the "payload".  A
+** fixed amount of payload can be carried directly on the database
+** page.  If the payload is larger than the preset amount then surplus
+** bytes are stored on overflow pages.  The payload for an entry
+** and the preceding pointer are combined to form a "Cell".  Each 
+** page has a small header which contains the Ptr(N) pointer and other
+** information such as the size of key and data.
+**
+** FORMAT DETAILS
+**
+** The file is divided into pages.  The first page is called page 1,
+** the second is page 2, and so forth.  A page number of zero indicates
+** "no such page".  The page size can be any power of 2 between 512 and 65536.
+** Each page can be either a btree page, a freelist page, an overflow
+** page, or a pointer-map page.
+**
+** The first page is always a btree page.  The first 100 bytes of the first
+** page contain a special header (the "file header") that describes the file.
+** The format of the file header is as follows:
+**
+**   OFFSET   SIZE    DESCRIPTION
+**      0      16     Header string: "SQLite format 3\000"
+**     16       2     Page size in bytes.  (1 means 65536)
+**     18       1     File format write version
+**     19       1     File format read version
+**     20       1     Bytes of unused space at the end of each page
+**     21       1     Max embedded payload fraction (must be 64)
+**     22       1     Min embedded payload fraction (must be 32)
+**     23       1     Min leaf payload fraction (must be 32)
+**     24       4     File change counter
+**     28       4     Reserved for future use
+**     32       4     First freelist page
+**     36       4     Number of freelist pages in the file
+**     40      60     15 4-byte meta values passed to higher layers
+**
+**     40       4     Schema cookie
+**     44       4     File format of schema layer
+**     48       4     Size of page cache
+**     52       4     Largest root-page (auto/incr_vacuum)
+**     56       4     1=UTF-8 2=UTF16le 3=UTF16be
+**     60       4     User version
+**     64       4     Incremental vacuum mode
+**     68       4     Application-ID
+**     72      20     unused
+**     92       4     The version-valid-for number
+**     96       4     SQLITE_VERSION_NUMBER
+**
+** All of the integer values are big-endian (most significant byte first).
+**
+** The file change counter is incremented when the database is changed
+** This counter allows other processes to know when the file has changed
+** and thus when they need to flush their cache.
+**
+** The max embedded payload fraction is the amount of the total usable
+** space in a page that can be consumed by a single cell for standard
+** B-tree (non-LEAFDATA) tables.  A value of 255 means 100%.  The default
+** is to limit the maximum cell size so that at least 4 cells will fit
+** on one page.  Thus the default max embedded payload fraction is 64.
+**
+** If the payload for a cell is larger than the max payload, then extra
+** payload is spilled to overflow pages.  Once an overflow page is allocated,
+** as many bytes as possible are moved into the overflow pages without letting
+** the cell size drop below the min embedded payload fraction.
+**
+** The min leaf payload fraction is like the min embedded payload fraction
+** except that it applies to leaf nodes in a LEAFDATA tree.  The maximum
+** payload fraction for a LEAFDATA tree is always 100% (or 255) and it
+** not specified in the header.
+**
+** Each btree pages is divided into three sections:  The header, the
+** cell pointer array, and the cell content area.  Page 1 also has a 100-byte
+** file header that occurs before the page header.
+**
+**      |----------------|
+**      | file header    |   100 bytes.  Page 1 only.
+**      |----------------|
+**      | page header    |   8 bytes for leaves.  12 bytes for interior nodes
+**      |----------------|
+**      | cell pointer   |   |  2 bytes per cell.  Sorted order.
+**      | array          |   |  Grows downward
+**      |                |   v
+**      |----------------|
+**      | unallocated    |
+**      | space          |
+**      |----------------|   ^  Grows upwards
+**      | cell content   |   |  Arbitrary order interspersed with freeblocks.
+**      | area           |   |  and free space fragments.
+**      |----------------|
+**
+** The page headers looks like this:
+**
+**   OFFSET   SIZE     DESCRIPTION
+**      0       1      Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf
+**      1       2      byte offset to the first freeblock
+**      3       2      number of cells on this page
+**      5       2      first byte of the cell content area
+**      7       1      number of fragmented free bytes
+**      8       4      Right child (the Ptr(N) value).  Omitted on leaves.
+**
+** The flags define the format of this btree page.  The leaf flag means that
+** this page has no children.  The zerodata flag means that this page carries
+** only keys and no data.  The intkey flag means that the key is an integer
+** which is stored in the key size entry of the cell header rather than in
+** the payload area.
+**
+** The cell pointer array begins on the first byte after the page header.
+** The cell pointer array contains zero or more 2-byte numbers which are
+** offsets from the beginning of the page to the cell content in the cell
+** content area.  The cell pointers occur in sorted order.  The system strives
+** to keep free space after the last cell pointer so that new cells can
+** be easily added without having to defragment the page.
+**
+** Cell content is stored at the very end of the page and grows toward the
+** beginning of the page.
+**
+** Unused space within the cell content area is collected into a linked list of
+** freeblocks.  Each freeblock is at least 4 bytes in size.  The byte offset
+** to the first freeblock is given in the header.  Freeblocks occur in
+** increasing order.  Because a freeblock must be at least 4 bytes in size,
+** any group of 3 or fewer unused bytes in the cell content area cannot
+** exist on the freeblock chain.  A group of 3 or fewer free bytes is called
+** a fragment.  The total number of bytes in all fragments is recorded.
+** in the page header at offset 7.
+**
+**    SIZE    DESCRIPTION
+**      2     Byte offset of the next freeblock
+**      2     Bytes in this freeblock
+**
+** Cells are of variable length.  Cells are stored in the cell content area at
+** the end of the page.  Pointers to the cells are in the cell pointer array
+** that immediately follows the page header.  Cells is not necessarily
+** contiguous or in order, but cell pointers are contiguous and in order.
+**
+** Cell content makes use of variable length integers.  A variable
+** length integer is 1 to 9 bytes where the lower 7 bits of each 
+** byte are used.  The integer consists of all bytes that have bit 8 set and
+** the first byte with bit 8 clear.  The most significant byte of the integer
+** appears first.  A variable-length integer may not be more than 9 bytes long.
+** As a special case, all 8 bytes of the 9th byte are used as data.  This
+** allows a 64-bit integer to be encoded in 9 bytes.
+**
+**    0x00                      becomes  0x00000000
+**    0x7f                      becomes  0x0000007f
+**    0x81 0x00                 becomes  0x00000080
+**    0x82 0x00                 becomes  0x00000100
+**    0x80 0x7f                 becomes  0x0000007f
+**    0x8a 0x91 0xd1 0xac 0x78  becomes  0x12345678
+**    0x81 0x81 0x81 0x81 0x01  becomes  0x10204081
+**
+** Variable length integers are used for rowids and to hold the number of
+** bytes of key and data in a btree cell.
+**
+** The content of a cell looks like this:
+**
+**    SIZE    DESCRIPTION
+**      4     Page number of the left child. Omitted if leaf flag is set.
+**     var    Number of bytes of data. Omitted if the zerodata flag is set.
+**     var    Number of bytes of key. Or the key itself if intkey flag is set.
+**      *     Payload
+**      4     First page of the overflow chain.  Omitted if no overflow
+**
+** Overflow pages form a linked list.  Each page except the last is completely
+** filled with data (pagesize - 4 bytes).  The last page can have as little
+** as 1 byte of data.
+**
+**    SIZE    DESCRIPTION
+**      4     Page number of next overflow page
+**      *     Data
+**
+** Freelist pages come in two subtypes: trunk pages and leaf pages.  The
+** file header points to the first in a linked list of trunk page.  Each trunk
+** page points to multiple leaf pages.  The content of a leaf page is
+** unspecified.  A trunk page looks like this:
+**
+**    SIZE    DESCRIPTION
+**      4     Page number of next trunk page
+**      4     Number of leaf pointers on this page
+**      *     zero or more pages numbers of leaves
+*/
+
+
+/* The following value is the maximum cell size assuming a maximum page
+** size give above.
+*/
+#define MX_CELL_SIZE(pBt)  ((int)(pBt->pageSize-8))
+
+/* The maximum number of cells on a single page of the database.  This
+** assumes a minimum cell size of 6 bytes  (4 bytes for the cell itself
+** plus 2 bytes for the index to the cell in the page header).  Such
+** small cells will be rare, but they are possible.
+*/
+#define MX_CELL(pBt) ((pBt->pageSize-8)/6)
+
+/* Forward declarations */
+typedef struct MemPage MemPage;
+typedef struct BtLock BtLock;
+
+/*
+** This is a magic string that appears at the beginning of every
+** SQLite database in order to identify the file as a real database.
+**
+** You can change this value at compile-time by specifying a
+** -DSQLITE_FILE_HEADER="..." on the compiler command-line.  The
+** header must be exactly 16 bytes including the zero-terminator so
+** the string itself should be 15 characters long.  If you change
+** the header, then your custom library will not be able to read 
+** databases generated by the standard tools and the standard tools
+** will not be able to read databases created by your custom library.
+*/
+#ifndef SQLITE_FILE_HEADER /* 123456789 123456 */
+#  define SQLITE_FILE_HEADER "SQLite format 3"
+#endif
+
+/*
+** Page type flags.  An ORed combination of these flags appear as the
+** first byte of on-disk image of every BTree page.
+*/
+#define PTF_INTKEY    0x01
+#define PTF_ZERODATA  0x02
+#define PTF_LEAFDATA  0x04
+#define PTF_LEAF      0x08
+
+/*
+** As each page of the file is loaded into memory, an instance of the following
+** structure is appended and initialized to zero.  This structure stores
+** information about the page that is decoded from the raw file page.
+**
+** The pParent field points back to the parent page.  This allows us to
+** walk up the BTree from any leaf to the root.  Care must be taken to
+** unref() the parent page pointer when this page is no longer referenced.
+** The pageDestructor() routine handles that chore.
+**
+** Access to all fields of this structure is controlled by the mutex
+** stored in MemPage.pBt->mutex.
+*/
+struct MemPage {
+  u8 isInit;           /* True if previously initialized. MUST BE FIRST! */
+  u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
+  u8 intKey;           /* True if table b-trees.  False for index b-trees */
+  u8 intKeyLeaf;       /* True if the leaf of an intKey table */
+  u8 noPayload;        /* True if internal intKey page (thus w/o data) */
+  u8 leaf;             /* True if a leaf page */
+  u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
+  u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
+  u8 max1bytePayload;  /* min(maxLocal,127) */
+  u16 maxLocal;        /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
+  u16 minLocal;        /* Copy of BtShared.minLocal or BtShared.minLeaf */
+  u16 cellOffset;      /* Index in aData of first cell pointer */
+  u16 nFree;           /* Number of free bytes on the page */
+  u16 nCell;           /* Number of cells on this page, local and ovfl */
+  u16 maskPage;        /* Mask for page offset */
+  u16 aiOvfl[5];       /* Insert the i-th overflow cell before the aiOvfl-th
+                       ** non-overflow cell */
+  u8 *apOvfl[5];       /* Pointers to the body of overflow cells */
+  BtShared *pBt;       /* Pointer to BtShared that this page is part of */
+  u8 *aData;           /* Pointer to disk image of the page data */
+  u8 *aDataEnd;        /* One byte past the end of usable data */
+  u8 *aCellIdx;        /* The cell index area */
+  DbPage *pDbPage;     /* Pager page handle */
+  Pgno pgno;           /* Page number for this page */
+};
+
+/*
+** The in-memory image of a disk page has the auxiliary information appended
+** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
+** that extra information.
+*/
+#define EXTRA_SIZE sizeof(MemPage)
+
+/*
+** A linked list of the following structures is stored at BtShared.pLock.
+** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor 
+** is opened on the table with root page BtShared.iTable. Locks are removed
+** from this list when a transaction is committed or rolled back, or when
+** a btree handle is closed.
+*/
+struct BtLock {
+  Btree *pBtree;        /* Btree handle holding this lock */
+  Pgno iTable;          /* Root page of table */
+  u8 eLock;             /* READ_LOCK or WRITE_LOCK */
+  BtLock *pNext;        /* Next in BtShared.pLock list */
+};
+
+/* Candidate values for BtLock.eLock */
+#define READ_LOCK     1
+#define WRITE_LOCK    2
+
+/* A Btree handle
+**
+** A database connection contains a pointer to an instance of
+** this object for every database file that it has open.  This structure
+** is opaque to the database connection.  The database connection cannot
+** see the internals of this structure and only deals with pointers to
+** this structure.
+**
+** For some database files, the same underlying database cache might be 
+** shared between multiple connections.  In that case, each connection
+** has it own instance of this object.  But each instance of this object
+** points to the same BtShared object.  The database cache and the
+** schema associated with the database file are all contained within
+** the BtShared object.
+**
+** All fields in this structure are accessed under sqlite3.mutex.
+** The pBt pointer itself may not be changed while there exists cursors 
+** in the referenced BtShared that point back to this Btree since those
+** cursors have to go through this Btree to find their BtShared and
+** they often do so without holding sqlite3.mutex.
+*/
+struct Btree {
+  sqlite3 *db;       /* The database connection holding this btree */
+  BtShared *pBt;     /* Sharable content of this btree */
+  u8 inTrans;        /* TRANS_NONE, TRANS_READ or TRANS_WRITE */
+  u8 sharable;       /* True if we can share pBt with another db */
+  u8 locked;         /* True if db currently has pBt locked */
+  int wantToLock;    /* Number of nested calls to sqlite3BtreeEnter() */
+  int nBackup;       /* Number of backup operations reading this btree */
+  u32 iDataVersion;  /* Combines with pBt->pPager->iDataVersion */
+  Btree *pNext;      /* List of other sharable Btrees from the same db */
+  Btree *pPrev;      /* Back pointer of the same list */
+#ifndef SQLITE_OMIT_SHARED_CACHE
+  BtLock lock;       /* Object used to lock page 1 */
+#endif
+};
+
+/*
+** Btree.inTrans may take one of the following values.
+**
+** If the shared-data extension is enabled, there may be multiple users
+** of the Btree structure. At most one of these may open a write transaction,
+** but any number may have active read transactions.
+*/
+#define TRANS_NONE  0
+#define TRANS_READ  1
+#define TRANS_WRITE 2
+
+/*
+** An instance of this object represents a single database file.
+** 
+** A single database file can be in use at the same time by two
+** or more database connections.  When two or more connections are
+** sharing the same database file, each connection has it own
+** private Btree object for the file and each of those Btrees points
+** to this one BtShared object.  BtShared.nRef is the number of
+** connections currently sharing this database file.
+**
+** Fields in this structure are accessed under the BtShared.mutex
+** mutex, except for nRef and pNext which are accessed under the
+** global SQLITE_MUTEX_STATIC_MASTER mutex.  The pPager field
+** may not be modified once it is initially set as long as nRef>0.
+** The pSchema field may be set once under BtShared.mutex and
+** thereafter is unchanged as long as nRef>0.
+**
+** isPending:
+**
+**   If a BtShared client fails to obtain a write-lock on a database
+**   table (because there exists one or more read-locks on the table),
+**   the shared-cache enters 'pending-lock' state and isPending is
+**   set to true.
+**
+**   The shared-cache leaves the 'pending lock' state when either of
+**   the following occur:
+**
+**     1) The current writer (BtShared.pWriter) concludes its transaction, OR
+**     2) The number of locks held by other connections drops to zero.
+**
+**   while in the 'pending-lock' state, no connection may start a new
+**   transaction.
+**
+**   This feature is included to help prevent writer-starvation.
+*/
+struct BtShared {
+  Pager *pPager;        /* The page cache */
+  sqlite3 *db;          /* Database connection currently using this Btree */
+  BtCursor *pCursor;    /* A list of all open cursors */
+  MemPage *pPage1;      /* First page of the database */
+  u8 openFlags;         /* Flags to sqlite3BtreeOpen() */
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  u8 autoVacuum;        /* True if auto-vacuum is enabled */
+  u8 incrVacuum;        /* True if incr-vacuum is enabled */
+  u8 bDoTruncate;       /* True to truncate db on commit */
+#endif
+  u8 inTransaction;     /* Transaction state */
+  u8 max1bytePayload;   /* Maximum first byte of cell for a 1-byte payload */
+  u16 btsFlags;         /* Boolean parameters.  See BTS_* macros below */
+  u16 maxLocal;         /* Maximum local payload in non-LEAFDATA tables */
+  u16 minLocal;         /* Minimum local payload in non-LEAFDATA tables */
+  u16 maxLeaf;          /* Maximum local payload in a LEAFDATA table */
+  u16 minLeaf;          /* Minimum local payload in a LEAFDATA table */
+  u32 pageSize;         /* Total number of bytes on a page */
+  u32 usableSize;       /* Number of usable bytes on each page */
+  int nTransaction;     /* Number of open transactions (read + write) */
+  u32 nPage;            /* Number of pages in the database */
+  void *pSchema;        /* Pointer to space allocated by sqlite3BtreeSchema() */
+  void (*xFreeSchema)(void*);  /* Destructor for BtShared.pSchema */
+  sqlite3_mutex *mutex; /* Non-recursive mutex required to access this object */
+  Bitvec *pHasContent;  /* Set of pages moved to free-list this transaction */
+#ifndef SQLITE_OMIT_SHARED_CACHE
+  int nRef;             /* Number of references to this structure */
+  BtShared *pNext;      /* Next on a list of sharable BtShared structs */
+  BtLock *pLock;        /* List of locks held on this shared-btree struct */
+  Btree *pWriter;       /* Btree with currently open write transaction */
+#endif
+  u8 *pTmpSpace;        /* Temp space sufficient to hold a single cell */
+};
+
+/*
+** Allowed values for BtShared.btsFlags
+*/
+#define BTS_READ_ONLY        0x0001   /* Underlying file is readonly */
+#define BTS_PAGESIZE_FIXED   0x0002   /* Page size can no longer be changed */
+#define BTS_SECURE_DELETE    0x0004   /* PRAGMA secure_delete is enabled */
+#define BTS_INITIALLY_EMPTY  0x0008   /* Database was empty at trans start */
+#define BTS_NO_WAL           0x0010   /* Do not open write-ahead-log files */
+#define BTS_EXCLUSIVE        0x0020   /* pWriter has an exclusive lock */
+#define BTS_PENDING          0x0040   /* Waiting for read-locks to clear */
+
+/*
+** An instance of the following structure is used to hold information
+** about a cell.  The parseCellPtr() function fills in this structure
+** based on information extract from the raw disk page.
+*/
+typedef struct CellInfo CellInfo;
+struct CellInfo {
+  i64 nKey;      /* The key for INTKEY tables, or nPayload otherwise */
+  u8 *pPayload;  /* Pointer to the start of payload */
+  u32 nPayload;  /* Bytes of payload */
+  u16 nLocal;    /* Amount of payload held locally, not on overflow */
+  u16 iOverflow; /* Offset to overflow page number.  Zero if no overflow */
+  u16 nSize;     /* Size of the cell content on the main b-tree page */
+};
+
+/*
+** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
+** this will be declared corrupt. This value is calculated based on a
+** maximum database size of 2^31 pages a minimum fanout of 2 for a
+** root-node and 3 for all other internal nodes.
+**
+** If a tree that appears to be taller than this is encountered, it is
+** assumed that the database is corrupt.
+*/
+#define BTCURSOR_MAX_DEPTH 20
+
+/*
+** A cursor is a pointer to a particular entry within a particular
+** b-tree within a database file.
+**
+** The entry is identified by its MemPage and the index in
+** MemPage.aCell[] of the entry.
+**
+** A single database file can be shared by two more database connections,
+** but cursors cannot be shared.  Each cursor is associated with a
+** particular database connection identified BtCursor.pBtree.db.
+**
+** Fields in this structure are accessed under the BtShared.mutex
+** found at self->pBt->mutex. 
+**
+** skipNext meaning:
+**    eState==SKIPNEXT && skipNext>0:  Next sqlite3BtreeNext() is no-op.
+**    eState==SKIPNEXT && skipNext<0:  Next sqlite3BtreePrevious() is no-op.
+**    eState==FAULT:                   Cursor fault with skipNext as error code.
+*/
+struct BtCursor {
+  Btree *pBtree;            /* The Btree to which this cursor belongs */
+  BtShared *pBt;            /* The BtShared this cursor points to */
+  BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */
+  struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */
+  Pgno *aOverflow;          /* Cache of overflow page locations */
+  CellInfo info;            /* A parse of the cell we are pointing at */
+  i64 nKey;                 /* Size of pKey, or last integer key */
+  void *pKey;               /* Saved key that was cursor last known position */
+  Pgno pgnoRoot;            /* The root page of this tree */
+  int nOvflAlloc;           /* Allocated size of aOverflow[] array */
+  int skipNext;    /* Prev() is noop if negative. Next() is noop if positive.
+                   ** Error code if eState==CURSOR_FAULT */
+  u8 curFlags;              /* zero or more BTCF_* flags defined below */
+  u8 eState;                /* One of the CURSOR_XXX constants (see below) */
+  u8 hints;                             /* As configured by CursorSetHints() */
+  i16 iPage;                            /* Index of current page in apPage */
+  u16 aiIdx[BTCURSOR_MAX_DEPTH];        /* Current index in apPage[i] */
+  MemPage *apPage[BTCURSOR_MAX_DEPTH];  /* Pages from root to current page */
+};
+
+/*
+** Legal values for BtCursor.curFlags
+*/
+#define BTCF_WriteFlag    0x01   /* True if a write cursor */
+#define BTCF_ValidNKey    0x02   /* True if info.nKey is valid */
+#define BTCF_ValidOvfl    0x04   /* True if aOverflow is valid */
+#define BTCF_AtLast       0x08   /* Cursor is pointing ot the last entry */
+#define BTCF_Incrblob     0x10   /* True if an incremental I/O handle */
+
+/*
+** Potential values for BtCursor.eState.
+**
+** CURSOR_INVALID:
+**   Cursor does not point to a valid entry. This can happen (for example) 
+**   because the table is empty or because BtreeCursorFirst() has not been
+**   called.
+**
+** CURSOR_VALID:
+**   Cursor points to a valid entry. getPayload() etc. may be called.
+**
+** CURSOR_SKIPNEXT:
+**   Cursor is valid except that the Cursor.skipNext field is non-zero
+**   indicating that the next sqlite3BtreeNext() or sqlite3BtreePrevious()
+**   operation should be a no-op.
+**
+** CURSOR_REQUIRESEEK:
+**   The table that this cursor was opened on still exists, but has been 
+**   modified since the cursor was last used. The cursor position is saved
+**   in variables BtCursor.pKey and BtCursor.nKey. When a cursor is in 
+**   this state, restoreCursorPosition() can be called to attempt to
+**   seek the cursor to the saved position.
+**
+** CURSOR_FAULT:
+**   An unrecoverable error (an I/O error or a malloc failure) has occurred
+**   on a different connection that shares the BtShared cache with this
+**   cursor.  The error has left the cache in an inconsistent state.
+**   Do nothing else with this cursor.  Any attempt to use the cursor
+**   should return the error code stored in BtCursor.skipNext
+*/
+#define CURSOR_INVALID           0
+#define CURSOR_VALID             1
+#define CURSOR_SKIPNEXT          2
+#define CURSOR_REQUIRESEEK       3
+#define CURSOR_FAULT             4
+
+/* 
+** The database page the PENDING_BYTE occupies. This page is never used.
+*/
+# define PENDING_BYTE_PAGE(pBt) PAGER_MJ_PGNO(pBt)
+
+/*
+** These macros define the location of the pointer-map entry for a 
+** database page. The first argument to each is the number of usable
+** bytes on each page of the database (often 1024). The second is the
+** page number to look up in the pointer map.
+**
+** PTRMAP_PAGENO returns the database page number of the pointer-map
+** page that stores the required pointer. PTRMAP_PTROFFSET returns
+** the offset of the requested map entry.
+**
+** If the pgno argument passed to PTRMAP_PAGENO is a pointer-map page,
+** then pgno is returned. So (pgno==PTRMAP_PAGENO(pgsz, pgno)) can be
+** used to test if pgno is a pointer-map page. PTRMAP_ISPAGE implements
+** this test.
+*/
+#define PTRMAP_PAGENO(pBt, pgno) ptrmapPageno(pBt, pgno)
+#define PTRMAP_PTROFFSET(pgptrmap, pgno) (5*(pgno-pgptrmap-1))
+#define PTRMAP_ISPAGE(pBt, pgno) (PTRMAP_PAGENO((pBt),(pgno))==(pgno))
+
+/*
+** The pointer map is a lookup table that identifies the parent page for
+** each child page in the database file.  The parent page is the page that
+** contains a pointer to the child.  Every page in the database contains
+** 0 or 1 parent pages.  (In this context 'database page' refers
+** to any page that is not part of the pointer map itself.)  Each pointer map
+** entry consists of a single byte 'type' and a 4 byte parent page number.
+** The PTRMAP_XXX identifiers below are the valid types.
+**
+** The purpose of the pointer map is to facility moving pages from one
+** position in the file to another as part of autovacuum.  When a page
+** is moved, the pointer in its parent must be updated to point to the
+** new location.  The pointer map is used to locate the parent page quickly.
+**
+** PTRMAP_ROOTPAGE: The database page is a root-page. The page-number is not
+**                  used in this case.
+**
+** PTRMAP_FREEPAGE: The database page is an unused (free) page. The page-number 
+**                  is not used in this case.
+**
+** PTRMAP_OVERFLOW1: The database page is the first page in a list of 
+**                   overflow pages. The page number identifies the page that
+**                   contains the cell with a pointer to this overflow page.
+**
+** PTRMAP_OVERFLOW2: The database page is the second or later page in a list of
+**                   overflow pages. The page-number identifies the previous
+**                   page in the overflow page list.
+**
+** PTRMAP_BTREE: The database page is a non-root btree page. The page number
+**               identifies the parent page in the btree.
+*/
+#define PTRMAP_ROOTPAGE 1
+#define PTRMAP_FREEPAGE 2
+#define PTRMAP_OVERFLOW1 3
+#define PTRMAP_OVERFLOW2 4
+#define PTRMAP_BTREE 5
+
+/* A bunch of assert() statements to check the transaction state variables
+** of handle p (type Btree*) are internally consistent.
+*/
+#define btreeIntegrity(p) \
+  assert( p->pBt->inTransaction!=TRANS_NONE || p->pBt->nTransaction==0 ); \
+  assert( p->pBt->inTransaction>=p->inTrans ); 
+
+
+/*
+** The ISAUTOVACUUM macro is used within balance_nonroot() to determine
+** if the database supports auto-vacuum or not. Because it is used
+** within an expression that is an argument to another macro 
+** (sqliteMallocRaw), it is not possible to use conditional compilation.
+** So, this macro is defined instead.
+*/
+#ifndef SQLITE_OMIT_AUTOVACUUM
+#define ISAUTOVACUUM (pBt->autoVacuum)
+#else
+#define ISAUTOVACUUM 0
+#endif
+
+
+/*
+** This structure is passed around through all the sanity checking routines
+** in order to keep track of some global state information.
+**
+** The aRef[] array is allocated so that there is 1 bit for each page in
+** the database. As the integrity-check proceeds, for each page used in
+** the database the corresponding bit is set. This allows integrity-check to 
+** detect pages that are used twice and orphaned pages (both of which 
+** indicate corruption).
+*/
+typedef struct IntegrityCk IntegrityCk;
+struct IntegrityCk {
+  BtShared *pBt;    /* The tree being checked out */
+  Pager *pPager;    /* The associated pager.  Also accessible by pBt->pPager */
+  u8 *aPgRef;       /* 1 bit per page in the db (see above) */
+  Pgno nPage;       /* Number of pages in the database */
+  int mxErr;        /* Stop accumulating errors when this reaches zero */
+  int nErr;         /* Number of messages written to zErrMsg so far */
+  int mallocFailed; /* A memory allocation error has occurred */
+  const char *zPfx; /* Error message prefix */
+  int v1, v2;       /* Values for up to two %d fields in zPfx */
+  StrAccum errMsg;  /* Accumulate the error message text here */
+};
+
+/*
+** Routines to read or write a two- and four-byte big-endian integer values.
+*/
+#define get2byte(x)   ((x)[0]<<8 | (x)[1])
+#define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v))
+#define get4byte sqlite3Get4byte
+#define put4byte sqlite3Put4byte
+
+/************** End of btreeInt.h ********************************************/
+/************** Continuing where we left off in btmutex.c ********************/
+#ifndef SQLITE_OMIT_SHARED_CACHE
+#if SQLITE_THREADSAFE
+
+/*
+** Obtain the BtShared mutex associated with B-Tree handle p. Also,
+** set BtShared.db to the database handle associated with p and the
+** p->locked boolean to true.
+*/
+static void lockBtreeMutex(Btree *p){
+  assert( p->locked==0 );
+  assert( sqlite3_mutex_notheld(p->pBt->mutex) );
+  assert( sqlite3_mutex_held(p->db->mutex) );
+
+  sqlite3_mutex_enter(p->pBt->mutex);
+  p->pBt->db = p->db;
+  p->locked = 1;
+}
+
+/*
+** Release the BtShared mutex associated with B-Tree handle p and
+** clear the p->locked boolean.
+*/
+static void SQLITE_NOINLINE unlockBtreeMutex(Btree *p){
+  BtShared *pBt = p->pBt;
+  assert( p->locked==1 );
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  assert( p->db==pBt->db );
+
+  sqlite3_mutex_leave(pBt->mutex);
+  p->locked = 0;
+}
+
+/* Forward reference */
+static void SQLITE_NOINLINE btreeLockCarefully(Btree *p);
+
+/*
+** Enter a mutex on the given BTree object.
+**
+** If the object is not sharable, then no mutex is ever required
+** and this routine is a no-op.  The underlying mutex is non-recursive.
+** But we keep a reference count in Btree.wantToLock so the behavior
+** of this interface is recursive.
+**
+** To avoid deadlocks, multiple Btrees are locked in the same order
+** by all database connections.  The p->pNext is a list of other
+** Btrees belonging to the same database connection as the p Btree
+** which need to be locked after p.  If we cannot get a lock on
+** p, then first unlock all of the others on p->pNext, then wait
+** for the lock to become available on p, then relock all of the
+** subsequent Btrees that desire a lock.
+*/
+SQLITE_PRIVATE void sqlite3BtreeEnter(Btree *p){
+  /* Some basic sanity checking on the Btree.  The list of Btrees
+  ** connected by pNext and pPrev should be in sorted order by
+  ** Btree.pBt value. All elements of the list should belong to
+  ** the same connection. Only shared Btrees are on the list. */
+  assert( p->pNext==0 || p->pNext->pBt>p->pBt );
+  assert( p->pPrev==0 || p->pPrev->pBt<p->pBt );
+  assert( p->pNext==0 || p->pNext->db==p->db );
+  assert( p->pPrev==0 || p->pPrev->db==p->db );
+  assert( p->sharable || (p->pNext==0 && p->pPrev==0) );
+
+  /* Check for locking consistency */
+  assert( !p->locked || p->wantToLock>0 );
+  assert( p->sharable || p->wantToLock==0 );
+
+  /* We should already hold a lock on the database connection */
+  assert( sqlite3_mutex_held(p->db->mutex) );
+
+  /* Unless the database is sharable and unlocked, then BtShared.db
+  ** should already be set correctly. */
+  assert( (p->locked==0 && p->sharable) || p->pBt->db==p->db );
+
+  if( !p->sharable ) return;
+  p->wantToLock++;
+  if( p->locked ) return;
+  btreeLockCarefully(p);
+}
+
+/* This is a helper function for sqlite3BtreeLock(). By moving
+** complex, but seldom used logic, out of sqlite3BtreeLock() and
+** into this routine, we avoid unnecessary stack pointer changes
+** and thus help the sqlite3BtreeLock() routine to run much faster
+** in the common case.
+*/
+static void SQLITE_NOINLINE btreeLockCarefully(Btree *p){
+  Btree *pLater;
+
+  /* In most cases, we should be able to acquire the lock we
+  ** want without having to go through the ascending lock
+  ** procedure that follows.  Just be sure not to block.
+  */
+  if( sqlite3_mutex_try(p->pBt->mutex)==SQLITE_OK ){
+    p->pBt->db = p->db;
+    p->locked = 1;
+    return;
+  }
+
+  /* To avoid deadlock, first release all locks with a larger
+  ** BtShared address.  Then acquire our lock.  Then reacquire
+  ** the other BtShared locks that we used to hold in ascending
+  ** order.
+  */
+  for(pLater=p->pNext; pLater; pLater=pLater->pNext){
+    assert( pLater->sharable );
+    assert( pLater->pNext==0 || pLater->pNext->pBt>pLater->pBt );
+    assert( !pLater->locked || pLater->wantToLock>0 );
+    if( pLater->locked ){
+      unlockBtreeMutex(pLater);
+    }
+  }
+  lockBtreeMutex(p);
+  for(pLater=p->pNext; pLater; pLater=pLater->pNext){
+    if( pLater->wantToLock ){
+      lockBtreeMutex(pLater);
+    }
+  }
+}
+
+
+/*
+** Exit the recursive mutex on a Btree.
+*/
+SQLITE_PRIVATE void sqlite3BtreeLeave(Btree *p){
+  if( p->sharable ){
+    assert( p->wantToLock>0 );
+    p->wantToLock--;
+    if( p->wantToLock==0 ){
+      unlockBtreeMutex(p);
+    }
+  }
+}
+
+#ifndef NDEBUG
+/*
+** Return true if the BtShared mutex is held on the btree, or if the
+** B-Tree is not marked as sharable.
+**
+** This routine is used only from within assert() statements.
+*/
+SQLITE_PRIVATE int sqlite3BtreeHoldsMutex(Btree *p){
+  assert( p->sharable==0 || p->locked==0 || p->wantToLock>0 );
+  assert( p->sharable==0 || p->locked==0 || p->db==p->pBt->db );
+  assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->pBt->mutex) );
+  assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->db->mutex) );
+
+  return (p->sharable==0 || p->locked);
+}
+#endif
+
+
+#ifndef SQLITE_OMIT_INCRBLOB
+/*
+** Enter and leave a mutex on a Btree given a cursor owned by that
+** Btree.  These entry points are used by incremental I/O and can be
+** omitted if that module is not used.
+*/
+SQLITE_PRIVATE void sqlite3BtreeEnterCursor(BtCursor *pCur){
+  sqlite3BtreeEnter(pCur->pBtree);
+}
+SQLITE_PRIVATE void sqlite3BtreeLeaveCursor(BtCursor *pCur){
+  sqlite3BtreeLeave(pCur->pBtree);
+}
+#endif /* SQLITE_OMIT_INCRBLOB */
+
+
+/*
+** Enter the mutex on every Btree associated with a database
+** connection.  This is needed (for example) prior to parsing
+** a statement since we will be comparing table and column names
+** against all schemas and we do not want those schemas being
+** reset out from under us.
+**
+** There is a corresponding leave-all procedures.
+**
+** Enter the mutexes in accending order by BtShared pointer address
+** to avoid the possibility of deadlock when two threads with
+** two or more btrees in common both try to lock all their btrees
+** at the same instant.
+*/
+SQLITE_PRIVATE void sqlite3BtreeEnterAll(sqlite3 *db){
+  int i;
+  Btree *p;
+  assert( sqlite3_mutex_held(db->mutex) );
+  for(i=0; i<db->nDb; i++){
+    p = db->aDb[i].pBt;
+    if( p ) sqlite3BtreeEnter(p);
+  }
+}
+SQLITE_PRIVATE void sqlite3BtreeLeaveAll(sqlite3 *db){
+  int i;
+  Btree *p;
+  assert( sqlite3_mutex_held(db->mutex) );
+  for(i=0; i<db->nDb; i++){
+    p = db->aDb[i].pBt;
+    if( p ) sqlite3BtreeLeave(p);
+  }
+}
+
+/*
+** Return true if a particular Btree requires a lock.  Return FALSE if
+** no lock is ever required since it is not sharable.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSharable(Btree *p){
+  return p->sharable;
+}
+
+#ifndef NDEBUG
+/*
+** Return true if the current thread holds the database connection
+** mutex and all required BtShared mutexes.
+**
+** This routine is used inside assert() statements only.
+*/
+SQLITE_PRIVATE int sqlite3BtreeHoldsAllMutexes(sqlite3 *db){
+  int i;
+  if( !sqlite3_mutex_held(db->mutex) ){
+    return 0;
+  }
+  for(i=0; i<db->nDb; i++){
+    Btree *p;
+    p = db->aDb[i].pBt;
+    if( p && p->sharable &&
+         (p->wantToLock==0 || !sqlite3_mutex_held(p->pBt->mutex)) ){
+      return 0;
+    }
+  }
+  return 1;
+}
+#endif /* NDEBUG */
+
+#ifndef NDEBUG
+/*
+** Return true if the correct mutexes are held for accessing the
+** db->aDb[iDb].pSchema structure.  The mutexes required for schema
+** access are:
+**
+**   (1) The mutex on db
+**   (2) if iDb!=1, then the mutex on db->aDb[iDb].pBt.
+**
+** If pSchema is not NULL, then iDb is computed from pSchema and
+** db using sqlite3SchemaToIndex().
+*/
+SQLITE_PRIVATE int sqlite3SchemaMutexHeld(sqlite3 *db, int iDb, Schema *pSchema){
+  Btree *p;
+  assert( db!=0 );
+  if( pSchema ) iDb = sqlite3SchemaToIndex(db, pSchema);
+  assert( iDb>=0 && iDb<db->nDb );
+  if( !sqlite3_mutex_held(db->mutex) ) return 0;
+  if( iDb==1 ) return 1;
+  p = db->aDb[iDb].pBt;
+  assert( p!=0 );
+  return p->sharable==0 || p->locked==1;
+}
+#endif /* NDEBUG */
+
+#else /* SQLITE_THREADSAFE>0 above.  SQLITE_THREADSAFE==0 below */
+/*
+** The following are special cases for mutex enter routines for use
+** in single threaded applications that use shared cache.  Except for
+** these two routines, all mutex operations are no-ops in that case and
+** are null #defines in btree.h.
+**
+** If shared cache is disabled, then all btree mutex routines, including
+** the ones below, are no-ops and are null #defines in btree.h.
+*/
+
+SQLITE_PRIVATE void sqlite3BtreeEnter(Btree *p){
+  p->pBt->db = p->db;
+}
+SQLITE_PRIVATE void sqlite3BtreeEnterAll(sqlite3 *db){
+  int i;
+  for(i=0; i<db->nDb; i++){
+    Btree *p = db->aDb[i].pBt;
+    if( p ){
+      p->pBt->db = p->db;
+    }
+  }
+}
+#endif /* if SQLITE_THREADSAFE */
+#endif /* ifndef SQLITE_OMIT_SHARED_CACHE */
+
+/************** End of btmutex.c *********************************************/
+/************** Begin file btree.c *******************************************/
+/*
+** 2004 April 6
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file implements an external (disk-based) database using BTrees.
+** See the header comment on "btreeInt.h" for additional information.
+** Including a description of file format and an overview of operation.
+*/
+
+/*
+** The header string that appears at the beginning of every
+** SQLite database.
+*/
+static const char zMagicHeader[] = SQLITE_FILE_HEADER;
+
+/*
+** Set this global variable to 1 to enable tracing using the TRACE
+** macro.
+*/
+#if 0
+int sqlite3BtreeTrace=1;  /* True to enable tracing */
+# define TRACE(X)  if(sqlite3BtreeTrace){printf X;fflush(stdout);}
+#else
+# define TRACE(X)
+#endif
+
+/*
+** Extract a 2-byte big-endian integer from an array of unsigned bytes.
+** But if the value is zero, make it 65536.
+**
+** This routine is used to extract the "offset to cell content area" value
+** from the header of a btree page.  If the page size is 65536 and the page
+** is empty, the offset should be 65536, but the 2-byte value stores zero.
+** This routine makes the necessary adjustment to 65536.
+*/
+#define get2byteNotZero(X)  (((((int)get2byte(X))-1)&0xffff)+1)
+
+/*
+** Values passed as the 5th argument to allocateBtreePage()
+*/
+#define BTALLOC_ANY   0           /* Allocate any page */
+#define BTALLOC_EXACT 1           /* Allocate exact page if possible */
+#define BTALLOC_LE    2           /* Allocate any page <= the parameter */
+
+/*
+** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not 
+** defined, or 0 if it is. For example:
+**
+**   bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum);
+*/
+#ifndef SQLITE_OMIT_AUTOVACUUM
+#define IfNotOmitAV(expr) (expr)
+#else
+#define IfNotOmitAV(expr) 0
+#endif
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+/*
+** A list of BtShared objects that are eligible for participation
+** in shared cache.  This variable has file scope during normal builds,
+** but the test harness needs to access it so we make it global for 
+** test builds.
+**
+** Access to this variable is protected by SQLITE_MUTEX_STATIC_MASTER.
+*/
+#ifdef SQLITE_TEST
+SQLITE_PRIVATE BtShared *SQLITE_WSD sqlite3SharedCacheList = 0;
+#else
+static BtShared *SQLITE_WSD sqlite3SharedCacheList = 0;
+#endif
+#endif /* SQLITE_OMIT_SHARED_CACHE */
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+/*
+** Enable or disable the shared pager and schema features.
+**
+** This routine has no effect on existing database connections.
+** The shared cache setting effects only future calls to
+** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2().
+*/
+SQLITE_API int sqlite3_enable_shared_cache(int enable){
+  sqlite3GlobalConfig.sharedCacheEnabled = enable;
+  return SQLITE_OK;
+}
+#endif
+
+
+
+#ifdef SQLITE_OMIT_SHARED_CACHE
+  /*
+  ** The functions querySharedCacheTableLock(), setSharedCacheTableLock(),
+  ** and clearAllSharedCacheTableLocks()
+  ** manipulate entries in the BtShared.pLock linked list used to store
+  ** shared-cache table level locks. If the library is compiled with the
+  ** shared-cache feature disabled, then there is only ever one user
+  ** of each BtShared structure and so this locking is not necessary. 
+  ** So define the lock related functions as no-ops.
+  */
+  #define querySharedCacheTableLock(a,b,c) SQLITE_OK
+  #define setSharedCacheTableLock(a,b,c) SQLITE_OK
+  #define clearAllSharedCacheTableLocks(a)
+  #define downgradeAllSharedCacheTableLocks(a)
+  #define hasSharedCacheTableLock(a,b,c,d) 1
+  #define hasReadConflicts(a, b) 0
+#endif
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+
+#ifdef SQLITE_DEBUG
+/*
+**** This function is only used as part of an assert() statement. ***
+**
+** Check to see if pBtree holds the required locks to read or write to the 
+** table with root page iRoot.   Return 1 if it does and 0 if not.
+**
+** For example, when writing to a table with root-page iRoot via 
+** Btree connection pBtree:
+**
+**    assert( hasSharedCacheTableLock(pBtree, iRoot, 0, WRITE_LOCK) );
+**
+** When writing to an index that resides in a sharable database, the 
+** caller should have first obtained a lock specifying the root page of
+** the corresponding table. This makes things a bit more complicated,
+** as this module treats each table as a separate structure. To determine
+** the table corresponding to the index being written, this
+** function has to search through the database schema.
+**
+** Instead of a lock on the table/index rooted at page iRoot, the caller may
+** hold a write-lock on the schema table (root page 1). This is also
+** acceptable.
+*/
+static int hasSharedCacheTableLock(
+  Btree *pBtree,         /* Handle that must hold lock */
+  Pgno iRoot,            /* Root page of b-tree */
+  int isIndex,           /* True if iRoot is the root of an index b-tree */
+  int eLockType          /* Required lock type (READ_LOCK or WRITE_LOCK) */
+){
+  Schema *pSchema = (Schema *)pBtree->pBt->pSchema;
+  Pgno iTab = 0;
+  BtLock *pLock;
+
+  /* If this database is not shareable, or if the client is reading
+  ** and has the read-uncommitted flag set, then no lock is required. 
+  ** Return true immediately.
+  */
+  if( (pBtree->sharable==0)
+   || (eLockType==READ_LOCK && (pBtree->db->flags & SQLITE_ReadUncommitted))
+  ){
+    return 1;
+  }
+
+  /* If the client is reading  or writing an index and the schema is
+  ** not loaded, then it is too difficult to actually check to see if
+  ** the correct locks are held.  So do not bother - just return true.
+  ** This case does not come up very often anyhow.
+  */
+  if( isIndex && (!pSchema || (pSchema->schemaFlags&DB_SchemaLoaded)==0) ){
+    return 1;
+  }
+
+  /* Figure out the root-page that the lock should be held on. For table
+  ** b-trees, this is just the root page of the b-tree being read or
+  ** written. For index b-trees, it is the root page of the associated
+  ** table.  */
+  if( isIndex ){
+    HashElem *p;
+    for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){
+      Index *pIdx = (Index *)sqliteHashData(p);
+      if( pIdx->tnum==(int)iRoot ){
+        iTab = pIdx->pTable->tnum;
+      }
+    }
+  }else{
+    iTab = iRoot;
+  }
+
+  /* Search for the required lock. Either a write-lock on root-page iTab, a 
+  ** write-lock on the schema table, or (if the client is reading) a
+  ** read-lock on iTab will suffice. Return 1 if any of these are found.  */
+  for(pLock=pBtree->pBt->pLock; pLock; pLock=pLock->pNext){
+    if( pLock->pBtree==pBtree 
+     && (pLock->iTable==iTab || (pLock->eLock==WRITE_LOCK && pLock->iTable==1))
+     && pLock->eLock>=eLockType 
+    ){
+      return 1;
+    }
+  }
+
+  /* Failed to find the required lock. */
+  return 0;
+}
+#endif /* SQLITE_DEBUG */
+
+#ifdef SQLITE_DEBUG
+/*
+**** This function may be used as part of assert() statements only. ****
+**
+** Return true if it would be illegal for pBtree to write into the
+** table or index rooted at iRoot because other shared connections are
+** simultaneously reading that same table or index.
+**
+** It is illegal for pBtree to write if some other Btree object that
+** shares the same BtShared object is currently reading or writing
+** the iRoot table.  Except, if the other Btree object has the
+** read-uncommitted flag set, then it is OK for the other object to
+** have a read cursor.
+**
+** For example, before writing to any part of the table or index
+** rooted at page iRoot, one should call:
+**
+**    assert( !hasReadConflicts(pBtree, iRoot) );
+*/
+static int hasReadConflicts(Btree *pBtree, Pgno iRoot){
+  BtCursor *p;
+  for(p=pBtree->pBt->pCursor; p; p=p->pNext){
+    if( p->pgnoRoot==iRoot 
+     && p->pBtree!=pBtree
+     && 0==(p->pBtree->db->flags & SQLITE_ReadUncommitted)
+    ){
+      return 1;
+    }
+  }
+  return 0;
+}
+#endif    /* #ifdef SQLITE_DEBUG */
+
+/*
+** Query to see if Btree handle p may obtain a lock of type eLock 
+** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
+** SQLITE_OK if the lock may be obtained (by calling
+** setSharedCacheTableLock()), or SQLITE_LOCKED if not.
+*/
+static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){
+  BtShared *pBt = p->pBt;
+  BtLock *pIter;
+
+  assert( sqlite3BtreeHoldsMutex(p) );
+  assert( eLock==READ_LOCK || eLock==WRITE_LOCK );
+  assert( p->db!=0 );
+  assert( !(p->db->flags&SQLITE_ReadUncommitted)||eLock==WRITE_LOCK||iTab==1 );
+  
+  /* If requesting a write-lock, then the Btree must have an open write
+  ** transaction on this file. And, obviously, for this to be so there 
+  ** must be an open write transaction on the file itself.
+  */
+  assert( eLock==READ_LOCK || (p==pBt->pWriter && p->inTrans==TRANS_WRITE) );
+  assert( eLock==READ_LOCK || pBt->inTransaction==TRANS_WRITE );
+  
+  /* This routine is a no-op if the shared-cache is not enabled */
+  if( !p->sharable ){
+    return SQLITE_OK;
+  }
+
+  /* If some other connection is holding an exclusive lock, the
+  ** requested lock may not be obtained.
+  */
+  if( pBt->pWriter!=p && (pBt->btsFlags & BTS_EXCLUSIVE)!=0 ){
+    sqlite3ConnectionBlocked(p->db, pBt->pWriter->db);
+    return SQLITE_LOCKED_SHAREDCACHE;
+  }
+
+  for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
+    /* The condition (pIter->eLock!=eLock) in the following if(...) 
+    ** statement is a simplification of:
+    **
+    **   (eLock==WRITE_LOCK || pIter->eLock==WRITE_LOCK)
+    **
+    ** since we know that if eLock==WRITE_LOCK, then no other connection
+    ** may hold a WRITE_LOCK on any table in this file (since there can
+    ** only be a single writer).
+    */
+    assert( pIter->eLock==READ_LOCK || pIter->eLock==WRITE_LOCK );
+    assert( eLock==READ_LOCK || pIter->pBtree==p || pIter->eLock==READ_LOCK);
+    if( pIter->pBtree!=p && pIter->iTable==iTab && pIter->eLock!=eLock ){
+      sqlite3ConnectionBlocked(p->db, pIter->pBtree->db);
+      if( eLock==WRITE_LOCK ){
+        assert( p==pBt->pWriter );
+        pBt->btsFlags |= BTS_PENDING;
+      }
+      return SQLITE_LOCKED_SHAREDCACHE;
+    }
+  }
+  return SQLITE_OK;
+}
+#endif /* !SQLITE_OMIT_SHARED_CACHE */
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+/*
+** Add a lock on the table with root-page iTable to the shared-btree used
+** by Btree handle p. Parameter eLock must be either READ_LOCK or 
+** WRITE_LOCK.
+**
+** This function assumes the following:
+**
+**   (a) The specified Btree object p is connected to a sharable
+**       database (one with the BtShared.sharable flag set), and
+**
+**   (b) No other Btree objects hold a lock that conflicts
+**       with the requested lock (i.e. querySharedCacheTableLock() has
+**       already been called and returned SQLITE_OK).
+**
+** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM 
+** is returned if a malloc attempt fails.
+*/
+static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){
+  BtShared *pBt = p->pBt;
+  BtLock *pLock = 0;
+  BtLock *pIter;
+
+  assert( sqlite3BtreeHoldsMutex(p) );
+  assert( eLock==READ_LOCK || eLock==WRITE_LOCK );
+  assert( p->db!=0 );
+
+  /* A connection with the read-uncommitted flag set will never try to
+  ** obtain a read-lock using this function. The only read-lock obtained
+  ** by a connection in read-uncommitted mode is on the sqlite_master 
+  ** table, and that lock is obtained in BtreeBeginTrans().  */
+  assert( 0==(p->db->flags&SQLITE_ReadUncommitted) || eLock==WRITE_LOCK );
+
+  /* This function should only be called on a sharable b-tree after it 
+  ** has been determined that no other b-tree holds a conflicting lock.  */
+  assert( p->sharable );
+  assert( SQLITE_OK==querySharedCacheTableLock(p, iTable, eLock) );
+
+  /* First search the list for an existing lock on this table. */
+  for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
+    if( pIter->iTable==iTable && pIter->pBtree==p ){
+      pLock = pIter;
+      break;
+    }
+  }
+
+  /* If the above search did not find a BtLock struct associating Btree p
+  ** with table iTable, allocate one and link it into the list.
+  */
+  if( !pLock ){
+    pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock));
+    if( !pLock ){
+      return SQLITE_NOMEM;
+    }
+    pLock->iTable = iTable;
+    pLock->pBtree = p;
+    pLock->pNext = pBt->pLock;
+    pBt->pLock = pLock;
+  }
+
+  /* Set the BtLock.eLock variable to the maximum of the current lock
+  ** and the requested lock. This means if a write-lock was already held
+  ** and a read-lock requested, we don't incorrectly downgrade the lock.
+  */
+  assert( WRITE_LOCK>READ_LOCK );
+  if( eLock>pLock->eLock ){
+    pLock->eLock = eLock;
+  }
+
+  return SQLITE_OK;
+}
+#endif /* !SQLITE_OMIT_SHARED_CACHE */
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+/*
+** Release all the table locks (locks obtained via calls to
+** the setSharedCacheTableLock() procedure) held by Btree object p.
+**
+** This function assumes that Btree p has an open read or write 
+** transaction. If it does not, then the BTS_PENDING flag
+** may be incorrectly cleared.
+*/
+static void clearAllSharedCacheTableLocks(Btree *p){
+  BtShared *pBt = p->pBt;
+  BtLock **ppIter = &pBt->pLock;
+
+  assert( sqlite3BtreeHoldsMutex(p) );
+  assert( p->sharable || 0==*ppIter );
+  assert( p->inTrans>0 );
+
+  while( *ppIter ){
+    BtLock *pLock = *ppIter;
+    assert( (pBt->btsFlags & BTS_EXCLUSIVE)==0 || pBt->pWriter==pLock->pBtree );
+    assert( pLock->pBtree->inTrans>=pLock->eLock );
+    if( pLock->pBtree==p ){
+      *ppIter = pLock->pNext;
+      assert( pLock->iTable!=1 || pLock==&p->lock );
+      if( pLock->iTable!=1 ){
+        sqlite3_free(pLock);
+      }
+    }else{
+      ppIter = &pLock->pNext;
+    }
+  }
+
+  assert( (pBt->btsFlags & BTS_PENDING)==0 || pBt->pWriter );
+  if( pBt->pWriter==p ){
+    pBt->pWriter = 0;
+    pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING);
+  }else if( pBt->nTransaction==2 ){
+    /* This function is called when Btree p is concluding its 
+    ** transaction. If there currently exists a writer, and p is not
+    ** that writer, then the number of locks held by connections other
+    ** than the writer must be about to drop to zero. In this case
+    ** set the BTS_PENDING flag to 0.
+    **
+    ** If there is not currently a writer, then BTS_PENDING must
+    ** be zero already. So this next line is harmless in that case.
+    */
+    pBt->btsFlags &= ~BTS_PENDING;
+  }
+}
+
+/*
+** This function changes all write-locks held by Btree p into read-locks.
+*/
+static void downgradeAllSharedCacheTableLocks(Btree *p){
+  BtShared *pBt = p->pBt;
+  if( pBt->pWriter==p ){
+    BtLock *pLock;
+    pBt->pWriter = 0;
+    pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING);
+    for(pLock=pBt->pLock; pLock; pLock=pLock->pNext){
+      assert( pLock->eLock==READ_LOCK || pLock->pBtree==p );
+      pLock->eLock = READ_LOCK;
+    }
+  }
+}
+
+#endif /* SQLITE_OMIT_SHARED_CACHE */
+
+static void releasePage(MemPage *pPage);  /* Forward reference */
+
+/*
+***** This routine is used inside of assert() only ****
+**
+** Verify that the cursor holds the mutex on its BtShared
+*/
+#ifdef SQLITE_DEBUG
+static int cursorHoldsMutex(BtCursor *p){
+  return sqlite3_mutex_held(p->pBt->mutex);
+}
+#endif
+
+/*
+** Invalidate the overflow cache of the cursor passed as the first argument.
+** on the shared btree structure pBt.
+*/
+#define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl)
+
+/*
+** Invalidate the overflow page-list cache for all cursors opened
+** on the shared btree structure pBt.
+*/
+static void invalidateAllOverflowCache(BtShared *pBt){
+  BtCursor *p;
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  for(p=pBt->pCursor; p; p=p->pNext){
+    invalidateOverflowCache(p);
+  }
+}
+
+#ifndef SQLITE_OMIT_INCRBLOB
+/*
+** This function is called before modifying the contents of a table
+** to invalidate any incrblob cursors that are open on the
+** row or one of the rows being modified.
+**
+** If argument isClearTable is true, then the entire contents of the
+** table is about to be deleted. In this case invalidate all incrblob
+** cursors open on any row within the table with root-page pgnoRoot.
+**
+** Otherwise, if argument isClearTable is false, then the row with
+** rowid iRow is being replaced or deleted. In this case invalidate
+** only those incrblob cursors open on that specific row.
+*/
+static void invalidateIncrblobCursors(
+  Btree *pBtree,          /* The database file to check */
+  i64 iRow,               /* The rowid that might be changing */
+  int isClearTable        /* True if all rows are being deleted */
+){
+  BtCursor *p;
+  BtShared *pBt = pBtree->pBt;
+  assert( sqlite3BtreeHoldsMutex(pBtree) );
+  for(p=pBt->pCursor; p; p=p->pNext){
+    if( (p->curFlags & BTCF_Incrblob)!=0
+     && (isClearTable || p->info.nKey==iRow)
+    ){
+      p->eState = CURSOR_INVALID;
+    }
+  }
+}
+
+#else
+  /* Stub function when INCRBLOB is omitted */
+  #define invalidateIncrblobCursors(x,y,z)
+#endif /* SQLITE_OMIT_INCRBLOB */
+
+/*
+** Set bit pgno of the BtShared.pHasContent bitvec. This is called 
+** when a page that previously contained data becomes a free-list leaf 
+** page.
+**
+** The BtShared.pHasContent bitvec exists to work around an obscure
+** bug caused by the interaction of two useful IO optimizations surrounding
+** free-list leaf pages:
+**
+**   1) When all data is deleted from a page and the page becomes
+**      a free-list leaf page, the page is not written to the database
+**      (as free-list leaf pages contain no meaningful data). Sometimes
+**      such a page is not even journalled (as it will not be modified,
+**      why bother journalling it?).
+**
+**   2) When a free-list leaf page is reused, its content is not read
+**      from the database or written to the journal file (why should it
+**      be, if it is not at all meaningful?).
+**
+** By themselves, these optimizations work fine and provide a handy
+** performance boost to bulk delete or insert operations. However, if
+** a page is moved to the free-list and then reused within the same
+** transaction, a problem comes up. If the page is not journalled when
+** it is moved to the free-list and it is also not journalled when it
+** is extracted from the free-list and reused, then the original data
+** may be lost. In the event of a rollback, it may not be possible
+** to restore the database to its original configuration.
+**
+** The solution is the BtShared.pHasContent bitvec. Whenever a page is 
+** moved to become a free-list leaf page, the corresponding bit is
+** set in the bitvec. Whenever a leaf page is extracted from the free-list,
+** optimization 2 above is omitted if the corresponding bit is already
+** set in BtShared.pHasContent. The contents of the bitvec are cleared
+** at the end of every transaction.
+*/
+static int btreeSetHasContent(BtShared *pBt, Pgno pgno){
+  int rc = SQLITE_OK;
+  if( !pBt->pHasContent ){
+    assert( pgno<=pBt->nPage );
+    pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage);
+    if( !pBt->pHasContent ){
+      rc = SQLITE_NOMEM;
+    }
+  }
+  if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){
+    rc = sqlite3BitvecSet(pBt->pHasContent, pgno);
+  }
+  return rc;
+}
+
+/*
+** Query the BtShared.pHasContent vector.
+**
+** This function is called when a free-list leaf page is removed from the
+** free-list for reuse. It returns false if it is safe to retrieve the
+** page from the pager layer with the 'no-content' flag set. True otherwise.
+*/
+static int btreeGetHasContent(BtShared *pBt, Pgno pgno){
+  Bitvec *p = pBt->pHasContent;
+  return (p && (pgno>sqlite3BitvecSize(p) || sqlite3BitvecTest(p, pgno)));
+}
+
+/*
+** Clear (destroy) the BtShared.pHasContent bitvec. This should be
+** invoked at the conclusion of each write-transaction.
+*/
+static void btreeClearHasContent(BtShared *pBt){
+  sqlite3BitvecDestroy(pBt->pHasContent);
+  pBt->pHasContent = 0;
+}
+
+/*
+** Release all of the apPage[] pages for a cursor.
+*/
+static void btreeReleaseAllCursorPages(BtCursor *pCur){
+  int i;
+  for(i=0; i<=pCur->iPage; i++){
+    releasePage(pCur->apPage[i]);
+    pCur->apPage[i] = 0;
+  }
+  pCur->iPage = -1;
+}
+
+
+/*
+** Save the current cursor position in the variables BtCursor.nKey 
+** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
+**
+** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID)
+** prior to calling this routine.  
+*/
+static int saveCursorPosition(BtCursor *pCur){
+  int rc;
+
+  assert( CURSOR_VALID==pCur->eState );
+  assert( 0==pCur->pKey );
+  assert( cursorHoldsMutex(pCur) );
+
+  rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
+  assert( rc==SQLITE_OK );  /* KeySize() cannot fail */
+
+  /* If this is an intKey table, then the above call to BtreeKeySize()
+  ** stores the integer key in pCur->nKey. In this case this value is
+  ** all that is required. Otherwise, if pCur is not open on an intKey
+  ** table, then malloc space for and store the pCur->nKey bytes of key 
+  ** data.
+  */
+  if( 0==pCur->apPage[0]->intKey ){
+    void *pKey = sqlite3Malloc( pCur->nKey );
+    if( pKey ){
+      rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey);
+      if( rc==SQLITE_OK ){
+        pCur->pKey = pKey;
+      }else{
+        sqlite3_free(pKey);
+      }
+    }else{
+      rc = SQLITE_NOMEM;
+    }
+  }
+  assert( !pCur->apPage[0]->intKey || !pCur->pKey );
+
+  if( rc==SQLITE_OK ){
+    btreeReleaseAllCursorPages(pCur);
+    pCur->eState = CURSOR_REQUIRESEEK;
+  }
+
+  invalidateOverflowCache(pCur);
+  return rc;
+}
+
+/* Forward reference */
+static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*);
+
+/*
+** Save the positions of all cursors (except pExcept) that are open on
+** the table with root-page iRoot.  "Saving the cursor position" means that
+** the location in the btree is remembered in such a way that it can be
+** moved back to the same spot after the btree has been modified.  This
+** routine is called just before cursor pExcept is used to modify the
+** table, for example in BtreeDelete() or BtreeInsert().
+**
+** Implementation note:  This routine merely checks to see if any cursors
+** need to be saved.  It calls out to saveCursorsOnList() in the (unusual)
+** event that cursors are in need to being saved.
+*/
+static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
+  BtCursor *p;
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( pExcept==0 || pExcept->pBt==pBt );
+  for(p=pBt->pCursor; p; p=p->pNext){
+    if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break;
+  }
+  return p ? saveCursorsOnList(p, iRoot, pExcept) : SQLITE_OK;
+}
+
+/* This helper routine to saveAllCursors does the actual work of saving
+** the cursors if and when a cursor is found that actually requires saving.
+** The common case is that no cursors need to be saved, so this routine is
+** broken out from its caller to avoid unnecessary stack pointer movement.
+*/
+static int SQLITE_NOINLINE saveCursorsOnList(
+  BtCursor *p,         /* The first cursor that needs saving */
+  Pgno iRoot,          /* Only save cursor with this iRoot. Save all if zero */
+  BtCursor *pExcept    /* Do not save this cursor */
+){
+  do{
+    if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){
+      if( p->eState==CURSOR_VALID ){
+        int rc = saveCursorPosition(p);
+        if( SQLITE_OK!=rc ){
+          return rc;
+        }
+      }else{
+        testcase( p->iPage>0 );
+        btreeReleaseAllCursorPages(p);
+      }
+    }
+    p = p->pNext;
+  }while( p );
+  return SQLITE_OK;
+}
+
+/*
+** Clear the current cursor position.
+*/
+SQLITE_PRIVATE void sqlite3BtreeClearCursor(BtCursor *pCur){
+  assert( cursorHoldsMutex(pCur) );
+  sqlite3_free(pCur->pKey);
+  pCur->pKey = 0;
+  pCur->eState = CURSOR_INVALID;
+}
+
+/*
+** In this version of BtreeMoveto, pKey is a packed index record
+** such as is generated by the OP_MakeRecord opcode.  Unpack the
+** record and then call BtreeMovetoUnpacked() to do the work.
+*/
+static int btreeMoveto(
+  BtCursor *pCur,     /* Cursor open on the btree to be searched */
+  const void *pKey,   /* Packed key if the btree is an index */
+  i64 nKey,           /* Integer key for tables.  Size of pKey for indices */
+  int bias,           /* Bias search to the high end */
+  int *pRes           /* Write search results here */
+){
+  int rc;                    /* Status code */
+  UnpackedRecord *pIdxKey;   /* Unpacked index key */
+  char aSpace[200];          /* Temp space for pIdxKey - to avoid a malloc */
+  char *pFree = 0;
+
+  if( pKey ){
+    assert( nKey==(i64)(int)nKey );
+    pIdxKey = sqlite3VdbeAllocUnpackedRecord(
+        pCur->pKeyInfo, aSpace, sizeof(aSpace), &pFree
+    );
+    if( pIdxKey==0 ) return SQLITE_NOMEM;
+    sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, pIdxKey);
+    if( pIdxKey->nField==0 ){
+      sqlite3DbFree(pCur->pKeyInfo->db, pFree);
+      return SQLITE_CORRUPT_BKPT;
+    }
+  }else{
+    pIdxKey = 0;
+  }
+  rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes);
+  if( pFree ){
+    sqlite3DbFree(pCur->pKeyInfo->db, pFree);
+  }
+  return rc;
+}
+
+/*
+** Restore the cursor to the position it was in (or as close to as possible)
+** when saveCursorPosition() was called. Note that this call deletes the 
+** saved position info stored by saveCursorPosition(), so there can be
+** at most one effective restoreCursorPosition() call after each 
+** saveCursorPosition().
+*/
+static int btreeRestoreCursorPosition(BtCursor *pCur){
+  int rc;
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->eState>=CURSOR_REQUIRESEEK );
+  if( pCur->eState==CURSOR_FAULT ){
+    return pCur->skipNext;
+  }
+  pCur->eState = CURSOR_INVALID;
+  rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &pCur->skipNext);
+  if( rc==SQLITE_OK ){
+    sqlite3_free(pCur->pKey);
+    pCur->pKey = 0;
+    assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
+    if( pCur->skipNext && pCur->eState==CURSOR_VALID ){
+      pCur->eState = CURSOR_SKIPNEXT;
+    }
+  }
+  return rc;
+}
+
+#define restoreCursorPosition(p) \
+  (p->eState>=CURSOR_REQUIRESEEK ? \
+         btreeRestoreCursorPosition(p) : \
+         SQLITE_OK)
+
+/*
+** Determine whether or not a cursor has moved from the position where
+** it was last placed, or has been invalidated for any other reason.
+** Cursors can move when the row they are pointing at is deleted out
+** from under them, for example.  Cursor might also move if a btree
+** is rebalanced.
+**
+** Calling this routine with a NULL cursor pointer returns false.
+**
+** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor
+** back to where it ought to be if this routine returns true.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCursorHasMoved(BtCursor *pCur){
+  return pCur->eState!=CURSOR_VALID;
+}
+
+/*
+** This routine restores a cursor back to its original position after it
+** has been moved by some outside activity (such as a btree rebalance or
+** a row having been deleted out from under the cursor).  
+**
+** On success, the *pDifferentRow parameter is false if the cursor is left
+** pointing at exactly the same row.  *pDifferntRow is the row the cursor
+** was pointing to has been deleted, forcing the cursor to point to some
+** nearby row.
+**
+** This routine should only be called for a cursor that just returned
+** TRUE from sqlite3BtreeCursorHasMoved().
+*/
+SQLITE_PRIVATE int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){
+  int rc;
+
+  assert( pCur!=0 );
+  assert( pCur->eState!=CURSOR_VALID );
+  rc = restoreCursorPosition(pCur);
+  if( rc ){
+    *pDifferentRow = 1;
+    return rc;
+  }
+  if( pCur->eState!=CURSOR_VALID || NEVER(pCur->skipNext!=0) ){
+    *pDifferentRow = 1;
+  }else{
+    *pDifferentRow = 0;
+  }
+  return SQLITE_OK;
+}
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+/*
+** Given a page number of a regular database page, return the page
+** number for the pointer-map page that contains the entry for the
+** input page number.
+**
+** Return 0 (not a valid page) for pgno==1 since there is
+** no pointer map associated with page 1.  The integrity_check logic
+** requires that ptrmapPageno(*,1)!=1.
+*/
+static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
+  int nPagesPerMapPage;
+  Pgno iPtrMap, ret;
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  if( pgno<2 ) return 0;
+  nPagesPerMapPage = (pBt->usableSize/5)+1;
+  iPtrMap = (pgno-2)/nPagesPerMapPage;
+  ret = (iPtrMap*nPagesPerMapPage) + 2; 
+  if( ret==PENDING_BYTE_PAGE(pBt) ){
+    ret++;
+  }
+  return ret;
+}
+
+/*
+** Write an entry into the pointer map.
+**
+** This routine updates the pointer map entry for page number 'key'
+** so that it maps to type 'eType' and parent page number 'pgno'.
+**
+** If *pRC is initially non-zero (non-SQLITE_OK) then this routine is
+** a no-op.  If an error occurs, the appropriate error code is written
+** into *pRC.
+*/
+static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){
+  DbPage *pDbPage;  /* The pointer map page */
+  u8 *pPtrmap;      /* The pointer map data */
+  Pgno iPtrmap;     /* The pointer map page number */
+  int offset;       /* Offset in pointer map page */
+  int rc;           /* Return code from subfunctions */
+
+  if( *pRC ) return;
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  /* The master-journal page number must never be used as a pointer map page */
+  assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
+
+  assert( pBt->autoVacuum );
+  if( key==0 ){
+    *pRC = SQLITE_CORRUPT_BKPT;
+    return;
+  }
+  iPtrmap = PTRMAP_PAGENO(pBt, key);
+  rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
+  if( rc!=SQLITE_OK ){
+    *pRC = rc;
+    return;
+  }
+  offset = PTRMAP_PTROFFSET(iPtrmap, key);
+  if( offset<0 ){
+    *pRC = SQLITE_CORRUPT_BKPT;
+    goto ptrmap_exit;
+  }
+  assert( offset <= (int)pBt->usableSize-5 );
+  pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
+
+  if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
+    TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
+    *pRC= rc = sqlite3PagerWrite(pDbPage);
+    if( rc==SQLITE_OK ){
+      pPtrmap[offset] = eType;
+      put4byte(&pPtrmap[offset+1], parent);
+    }
+  }
+
+ptrmap_exit:
+  sqlite3PagerUnref(pDbPage);
+}
+
+/*
+** Read an entry from the pointer map.
+**
+** This routine retrieves the pointer map entry for page 'key', writing
+** the type and parent page number to *pEType and *pPgno respectively.
+** An error code is returned if something goes wrong, otherwise SQLITE_OK.
+*/
+static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
+  DbPage *pDbPage;   /* The pointer map page */
+  int iPtrmap;       /* Pointer map page index */
+  u8 *pPtrmap;       /* Pointer map page data */
+  int offset;        /* Offset of entry in pointer map */
+  int rc;
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+
+  iPtrmap = PTRMAP_PAGENO(pBt, key);
+  rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
+  if( rc!=0 ){
+    return rc;
+  }
+  pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
+
+  offset = PTRMAP_PTROFFSET(iPtrmap, key);
+  if( offset<0 ){
+    sqlite3PagerUnref(pDbPage);
+    return SQLITE_CORRUPT_BKPT;
+  }
+  assert( offset <= (int)pBt->usableSize-5 );
+  assert( pEType!=0 );
+  *pEType = pPtrmap[offset];
+  if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]);
+
+  sqlite3PagerUnref(pDbPage);
+  if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT;
+  return SQLITE_OK;
+}
+
+#else /* if defined SQLITE_OMIT_AUTOVACUUM */
+  #define ptrmapPut(w,x,y,z,rc)
+  #define ptrmapGet(w,x,y,z) SQLITE_OK
+  #define ptrmapPutOvflPtr(x, y, rc)
+#endif
+
+/*
+** Given a btree page and a cell index (0 means the first cell on
+** the page, 1 means the second cell, and so forth) return a pointer
+** to the cell content.
+**
+** This routine works only for pages that do not contain overflow cells.
+*/
+#define findCell(P,I) \
+  ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)])))
+#define findCellv2(D,M,O,I) (D+(M&get2byte(D+(O+2*(I)))))
+
+
+/*
+** This a more complex version of findCell() that works for
+** pages that do contain overflow cells.
+*/
+static u8 *findOverflowCell(MemPage *pPage, int iCell){
+  int i;
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  for(i=pPage->nOverflow-1; i>=0; i--){
+    int k;
+    k = pPage->aiOvfl[i];
+    if( k<=iCell ){
+      if( k==iCell ){
+        return pPage->apOvfl[i];
+      }
+      iCell--;
+    }
+  }
+  return findCell(pPage, iCell);
+}
+
+/*
+** Parse a cell content block and fill in the CellInfo structure.  There
+** are two versions of this function.  btreeParseCell() takes a 
+** cell index as the second argument and btreeParseCellPtr() 
+** takes a pointer to the body of the cell as its second argument.
+*/
+static void btreeParseCellPtr(
+  MemPage *pPage,         /* Page containing the cell */
+  u8 *pCell,              /* Pointer to the cell text. */
+  CellInfo *pInfo         /* Fill in this structure */
+){
+  u8 *pIter;              /* For scanning through pCell */
+  u32 nPayload;           /* Number of bytes of cell payload */
+
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  assert( pPage->leaf==0 || pPage->leaf==1 );
+  if( pPage->intKeyLeaf ){
+    assert( pPage->childPtrSize==0 );
+    pIter = pCell + getVarint32(pCell, nPayload);
+    pIter += getVarint(pIter, (u64*)&pInfo->nKey);
+  }else if( pPage->noPayload ){
+    assert( pPage->childPtrSize==4 );
+    pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey);
+    pInfo->nPayload = 0;
+    pInfo->nLocal = 0;
+    pInfo->iOverflow = 0;
+    pInfo->pPayload = 0;
+    return;
+  }else{
+    pIter = pCell + pPage->childPtrSize;
+    pIter += getVarint32(pIter, nPayload);
+    pInfo->nKey = nPayload;
+  }
+  pInfo->nPayload = nPayload;
+  pInfo->pPayload = pIter;
+  testcase( nPayload==pPage->maxLocal );
+  testcase( nPayload==pPage->maxLocal+1 );
+  if( nPayload<=pPage->maxLocal ){
+    /* This is the (easy) common case where the entire payload fits
+    ** on the local page.  No overflow is required.
+    */
+    pInfo->nSize = nPayload + (u16)(pIter - pCell);
+    if( pInfo->nSize<4 ) pInfo->nSize = 4;
+    pInfo->nLocal = (u16)nPayload;
+    pInfo->iOverflow = 0;
+  }else{
+    /* If the payload will not fit completely on the local page, we have
+    ** to decide how much to store locally and how much to spill onto
+    ** overflow pages.  The strategy is to minimize the amount of unused
+    ** space on overflow pages while keeping the amount of local storage
+    ** in between minLocal and maxLocal.
+    **
+    ** Warning:  changing the way overflow payload is distributed in any
+    ** way will result in an incompatible file format.
+    */
+    int minLocal;  /* Minimum amount of payload held locally */
+    int maxLocal;  /* Maximum amount of payload held locally */
+    int surplus;   /* Overflow payload available for local storage */
+
+    minLocal = pPage->minLocal;
+    maxLocal = pPage->maxLocal;
+    surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4);
+    testcase( surplus==maxLocal );
+    testcase( surplus==maxLocal+1 );
+    if( surplus <= maxLocal ){
+      pInfo->nLocal = (u16)surplus;
+    }else{
+      pInfo->nLocal = (u16)minLocal;
+    }
+    pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell);
+    pInfo->nSize = pInfo->iOverflow + 4;
+  }
+}
+static void btreeParseCell(
+  MemPage *pPage,         /* Page containing the cell */
+  int iCell,              /* The cell index.  First cell is 0 */
+  CellInfo *pInfo         /* Fill in this structure */
+){
+  btreeParseCellPtr(pPage, findCell(pPage, iCell), pInfo);
+}
+
+/*
+** Compute the total number of bytes that a Cell needs in the cell
+** data area of the btree-page.  The return number includes the cell
+** data header and the local payload, but not any overflow page or
+** the space used by the cell pointer.
+*/
+static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
+  u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */
+  u8 *pEnd;                                /* End mark for a varint */
+  u32 nSize;                               /* Size value to return */
+
+#ifdef SQLITE_DEBUG
+  /* The value returned by this function should always be the same as
+  ** the (CellInfo.nSize) value found by doing a full parse of the
+  ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
+  ** this function verifies that this invariant is not violated. */
+  CellInfo debuginfo;
+  btreeParseCellPtr(pPage, pCell, &debuginfo);
+#endif
+
+  if( pPage->noPayload ){
+    pEnd = &pIter[9];
+    while( (*pIter++)&0x80 && pIter<pEnd );
+    assert( pPage->childPtrSize==4 );
+    return (u16)(pIter - pCell);
+  }
+  nSize = *pIter;
+  if( nSize>=0x80 ){
+    pEnd = &pIter[9];
+    nSize &= 0x7f;
+    do{
+      nSize = (nSize<<7) | (*++pIter & 0x7f);
+    }while( *(pIter)>=0x80 && pIter<pEnd );
+  }
+  pIter++;
+  if( pPage->intKey ){
+    /* pIter now points at the 64-bit integer key value, a variable length 
+    ** integer. The following block moves pIter to point at the first byte
+    ** past the end of the key value. */
+    pEnd = &pIter[9];
+    while( (*pIter++)&0x80 && pIter<pEnd );
+  }
+  testcase( nSize==pPage->maxLocal );
+  testcase( nSize==pPage->maxLocal+1 );
+  if( nSize<=pPage->maxLocal ){
+    nSize += (u32)(pIter - pCell);
+    if( nSize<4 ) nSize = 4;
+  }else{
+    int minLocal = pPage->minLocal;
+    nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4);
+    testcase( nSize==pPage->maxLocal );
+    testcase( nSize==pPage->maxLocal+1 );
+    if( nSize>pPage->maxLocal ){
+      nSize = minLocal;
+    }
+    nSize += 4 + (u16)(pIter - pCell);
+  }
+  assert( nSize==debuginfo.nSize || CORRUPT_DB );
+  return (u16)nSize;
+}
+
+#ifdef SQLITE_DEBUG
+/* This variation on cellSizePtr() is used inside of assert() statements
+** only. */
+static u16 cellSize(MemPage *pPage, int iCell){
+  return cellSizePtr(pPage, findCell(pPage, iCell));
+}
+#endif
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+/*
+** If the cell pCell, part of page pPage contains a pointer
+** to an overflow page, insert an entry into the pointer-map
+** for the overflow page.
+*/
+static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){
+  CellInfo info;
+  if( *pRC ) return;
+  assert( pCell!=0 );
+  btreeParseCellPtr(pPage, pCell, &info);
+  if( info.iOverflow ){
+    Pgno ovfl = get4byte(&pCell[info.iOverflow]);
+    ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC);
+  }
+}
+#endif
+
+
+/*
+** Defragment the page given.  All Cells are moved to the
+** end of the page and all free space is collected into one
+** big FreeBlk that occurs in between the header and cell
+** pointer array and the cell content area.
+**
+** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a
+** b-tree page so that there are no freeblocks or fragment bytes, all
+** unused bytes are contained in the unallocated space region, and all
+** cells are packed tightly at the end of the page.
+*/
+static int defragmentPage(MemPage *pPage){
+  int i;                     /* Loop counter */
+  int pc;                    /* Address of the i-th cell */
+  int hdr;                   /* Offset to the page header */
+  int size;                  /* Size of a cell */
+  int usableSize;            /* Number of usable bytes on a page */
+  int cellOffset;            /* Offset to the cell pointer array */
+  int cbrk;                  /* Offset to the cell content area */
+  int nCell;                 /* Number of cells on the page */
+  unsigned char *data;       /* The page data */
+  unsigned char *temp;       /* Temp area for cell content */
+  unsigned char *src;        /* Source of content */
+  int iCellFirst;            /* First allowable cell index */
+  int iCellLast;             /* Last possible cell index */
+
+
+  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+  assert( pPage->pBt!=0 );
+  assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
+  assert( pPage->nOverflow==0 );
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  temp = 0;
+  src = data = pPage->aData;
+  hdr = pPage->hdrOffset;
+  cellOffset = pPage->cellOffset;
+  nCell = pPage->nCell;
+  assert( nCell==get2byte(&data[hdr+3]) );
+  usableSize = pPage->pBt->usableSize;
+  cbrk = usableSize;
+  iCellFirst = cellOffset + 2*nCell;
+  iCellLast = usableSize - 4;
+  for(i=0; i<nCell; i++){
+    u8 *pAddr;     /* The i-th cell pointer */
+    pAddr = &data[cellOffset + i*2];
+    pc = get2byte(pAddr);
+    testcase( pc==iCellFirst );
+    testcase( pc==iCellLast );
+#if !defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK)
+    /* These conditions have already been verified in btreeInitPage()
+    ** if SQLITE_ENABLE_OVERSIZE_CELL_CHECK is defined 
+    */
+    if( pc<iCellFirst || pc>iCellLast ){
+      return SQLITE_CORRUPT_BKPT;
+    }
+#endif
+    assert( pc>=iCellFirst && pc<=iCellLast );
+    size = cellSizePtr(pPage, &src[pc]);
+    cbrk -= size;
+#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK)
+    if( cbrk<iCellFirst ){
+      return SQLITE_CORRUPT_BKPT;
+    }
+#else
+    if( cbrk<iCellFirst || pc+size>usableSize ){
+      return SQLITE_CORRUPT_BKPT;
+    }
+#endif
+    assert( cbrk+size<=usableSize && cbrk>=iCellFirst );
+    testcase( cbrk+size==usableSize );
+    testcase( pc+size==usableSize );
+    put2byte(pAddr, cbrk);
+    if( temp==0 ){
+      int x;
+      if( cbrk==pc ) continue;
+      temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
+      x = get2byte(&data[hdr+5]);
+      memcpy(&temp[x], &data[x], (cbrk+size) - x);
+      src = temp;
+    }
+    memcpy(&data[cbrk], &src[pc], size);
+  }
+  assert( cbrk>=iCellFirst );
+  put2byte(&data[hdr+5], cbrk);
+  data[hdr+1] = 0;
+  data[hdr+2] = 0;
+  data[hdr+7] = 0;
+  memset(&data[iCellFirst], 0, cbrk-iCellFirst);
+  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+  if( cbrk-iCellFirst!=pPage->nFree ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Search the free-list on page pPg for space to store a cell nByte bytes in
+** size. If one can be found, return a pointer to the space and remove it
+** from the free-list.
+**
+** If no suitable space can be found on the free-list, return NULL.
+**
+** This function may detect corruption within pPg.  If corruption is
+** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned.
+**
+** If a slot of at least nByte bytes is found but cannot be used because 
+** there are already at least 60 fragmented bytes on the page, return NULL.
+** In this case, if pbDefrag parameter is not NULL, set *pbDefrag to true.
+*/
+static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){
+  const int hdr = pPg->hdrOffset;
+  u8 * const aData = pPg->aData;
+  int iAddr;
+  int pc;
+  int usableSize = pPg->pBt->usableSize;
+
+  for(iAddr=hdr+1; (pc = get2byte(&aData[iAddr]))>0; iAddr=pc){
+    int size;            /* Size of the free slot */
+    /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of
+    ** increasing offset. */
+    if( pc>usableSize-4 || pc<iAddr+4 ){
+      *pRc = SQLITE_CORRUPT_BKPT;
+      return 0;
+    }
+    /* EVIDENCE-OF: R-22710-53328 The third and fourth bytes of each
+    ** freeblock form a big-endian integer which is the size of the freeblock
+    ** in bytes, including the 4-byte header. */
+    size = get2byte(&aData[pc+2]);
+    if( size>=nByte ){
+      int x = size - nByte;
+      testcase( x==4 );
+      testcase( x==3 );
+      if( x<4 ){
+        /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total
+        ** number of bytes in fragments may not exceed 60. */
+        if( aData[hdr+7]>=60 ){
+          if( pbDefrag ) *pbDefrag = 1;
+          return 0;
+        }
+        /* Remove the slot from the free-list. Update the number of
+        ** fragmented bytes within the page. */
+        memcpy(&aData[iAddr], &aData[pc], 2);
+        aData[hdr+7] += (u8)x;
+      }else if( size+pc > usableSize ){
+        *pRc = SQLITE_CORRUPT_BKPT;
+        return 0;
+      }else{
+        /* The slot remains on the free-list. Reduce its size to account
+         ** for the portion used by the new allocation. */
+        put2byte(&aData[pc+2], x);
+      }
+      return &aData[pc + x];
+    }
+  }
+
+  return 0;
+}
+
+/*
+** Allocate nByte bytes of space from within the B-Tree page passed
+** as the first argument. Write into *pIdx the index into pPage->aData[]
+** of the first byte of allocated space. Return either SQLITE_OK or
+** an error code (usually SQLITE_CORRUPT).
+**
+** The caller guarantees that there is sufficient space to make the
+** allocation.  This routine might need to defragment in order to bring
+** all the space together, however.  This routine will avoid using
+** the first two bytes past the cell pointer area since presumably this
+** allocation is being made in order to insert a new cell, so we will
+** also end up needing a new cell pointer.
+*/
+static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
+  const int hdr = pPage->hdrOffset;    /* Local cache of pPage->hdrOffset */
+  u8 * const data = pPage->aData;      /* Local cache of pPage->aData */
+  int top;                             /* First byte of cell content area */
+  int rc = SQLITE_OK;                  /* Integer return code */
+  int gap;        /* First byte of gap between cell pointers and cell content */
+  
+  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+  assert( pPage->pBt );
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  assert( nByte>=0 );  /* Minimum cell size is 4 */
+  assert( pPage->nFree>=nByte );
+  assert( pPage->nOverflow==0 );
+  assert( nByte < (int)(pPage->pBt->usableSize-8) );
+
+  assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf );
+  gap = pPage->cellOffset + 2*pPage->nCell;
+  assert( gap<=65536 );
+  /* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size
+  ** and the reserved space is zero (the usual value for reserved space)
+  ** then the cell content offset of an empty page wants to be 65536.
+  ** However, that integer is too large to be stored in a 2-byte unsigned
+  ** integer, so a value of 0 is used in its place. */
+  top = get2byteNotZero(&data[hdr+5]);
+  if( gap>top ) return SQLITE_CORRUPT_BKPT;
+
+  /* If there is enough space between gap and top for one more cell pointer
+  ** array entry offset, and if the freelist is not empty, then search the
+  ** freelist looking for a free slot big enough to satisfy the request.
+  */
+  testcase( gap+2==top );
+  testcase( gap+1==top );
+  testcase( gap==top );
+  if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){
+    int bDefrag = 0;
+    u8 *pSpace = pageFindSlot(pPage, nByte, &rc, &bDefrag);
+    if( rc ) return rc;
+    if( bDefrag ) goto defragment_page;
+    if( pSpace ){
+      assert( pSpace>=data && (pSpace - data)<65536 );
+      *pIdx = (int)(pSpace - data);
+      return SQLITE_OK;
+    }
+  }
+
+  /* The request could not be fulfilled using a freelist slot.  Check
+  ** to see if defragmentation is necessary.
+  */
+  testcase( gap+2+nByte==top );
+  if( gap+2+nByte>top ){
+ defragment_page:
+    assert( pPage->nCell>0 || CORRUPT_DB );
+    rc = defragmentPage(pPage);
+    if( rc ) return rc;
+    top = get2byteNotZero(&data[hdr+5]);
+    assert( gap+nByte<=top );
+  }
+
+
+  /* Allocate memory from the gap in between the cell pointer array
+  ** and the cell content area.  The btreeInitPage() call has already
+  ** validated the freelist.  Given that the freelist is valid, there
+  ** is no way that the allocation can extend off the end of the page.
+  ** The assert() below verifies the previous sentence.
+  */
+  top -= nByte;
+  put2byte(&data[hdr+5], top);
+  assert( top+nByte <= (int)pPage->pBt->usableSize );
+  *pIdx = top;
+  return SQLITE_OK;
+}
+
+/*
+** Return a section of the pPage->aData to the freelist.
+** The first byte of the new free block is pPage->aData[iStart]
+** and the size of the block is iSize bytes.
+**
+** Adjacent freeblocks are coalesced.
+**
+** Note that even though the freeblock list was checked by btreeInitPage(),
+** that routine will not detect overlap between cells or freeblocks.  Nor
+** does it detect cells or freeblocks that encrouch into the reserved bytes
+** at the end of the page.  So do additional corruption checks inside this
+** routine and return SQLITE_CORRUPT if any problems are found.
+*/
+static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){
+  u16 iPtr;                             /* Address of ptr to next freeblock */
+  u16 iFreeBlk;                         /* Address of the next freeblock */
+  u8 hdr;                               /* Page header size.  0 or 100 */
+  u8 nFrag = 0;                         /* Reduction in fragmentation */
+  u16 iOrigSize = iSize;                /* Original value of iSize */
+  u32 iLast = pPage->pBt->usableSize-4; /* Largest possible freeblock offset */
+  u32 iEnd = iStart + iSize;            /* First byte past the iStart buffer */
+  unsigned char *data = pPage->aData;   /* Page content */
+
+  assert( pPage->pBt!=0 );
+  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+  assert( iStart>=pPage->hdrOffset+6+pPage->childPtrSize );
+  assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize );
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  assert( iSize>=4 );   /* Minimum cell size is 4 */
+  assert( iStart<=iLast );
+
+  /* Overwrite deleted information with zeros when the secure_delete
+  ** option is enabled */
+  if( pPage->pBt->btsFlags & BTS_SECURE_DELETE ){
+    memset(&data[iStart], 0, iSize);
+  }
+
+  /* The list of freeblocks must be in ascending order.  Find the 
+  ** spot on the list where iStart should be inserted.
+  */
+  hdr = pPage->hdrOffset;
+  iPtr = hdr + 1;
+  if( data[iPtr+1]==0 && data[iPtr]==0 ){
+    iFreeBlk = 0;  /* Shortcut for the case when the freelist is empty */
+  }else{
+    while( (iFreeBlk = get2byte(&data[iPtr]))>0 && iFreeBlk<iStart ){
+      if( iFreeBlk<iPtr+4 ) return SQLITE_CORRUPT_BKPT;
+      iPtr = iFreeBlk;
+    }
+    if( iFreeBlk>iLast ) return SQLITE_CORRUPT_BKPT;
+    assert( iFreeBlk>iPtr || iFreeBlk==0 );
+  
+    /* At this point:
+    **    iFreeBlk:   First freeblock after iStart, or zero if none
+    **    iPtr:       The address of a pointer iFreeBlk
+    **
+    ** Check to see if iFreeBlk should be coalesced onto the end of iStart.
+    */
+    if( iFreeBlk && iEnd+3>=iFreeBlk ){
+      nFrag = iFreeBlk - iEnd;
+      if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT;
+      iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]);
+      iSize = iEnd - iStart;
+      iFreeBlk = get2byte(&data[iFreeBlk]);
+    }
+  
+    /* If iPtr is another freeblock (that is, if iPtr is not the freelist
+    ** pointer in the page header) then check to see if iStart should be
+    ** coalesced onto the end of iPtr.
+    */
+    if( iPtr>hdr+1 ){
+      int iPtrEnd = iPtr + get2byte(&data[iPtr+2]);
+      if( iPtrEnd+3>=iStart ){
+        if( iPtrEnd>iStart ) return SQLITE_CORRUPT_BKPT;
+        nFrag += iStart - iPtrEnd;
+        iSize = iEnd - iPtr;
+        iStart = iPtr;
+      }
+    }
+    if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_BKPT;
+    data[hdr+7] -= nFrag;
+  }
+  if( iStart==get2byte(&data[hdr+5]) ){
+    /* The new freeblock is at the beginning of the cell content area,
+    ** so just extend the cell content area rather than create another
+    ** freelist entry */
+    if( iPtr!=hdr+1 ) return SQLITE_CORRUPT_BKPT;
+    put2byte(&data[hdr+1], iFreeBlk);
+    put2byte(&data[hdr+5], iEnd);
+  }else{
+    /* Insert the new freeblock into the freelist */
+    put2byte(&data[iPtr], iStart);
+    put2byte(&data[iStart], iFreeBlk);
+    put2byte(&data[iStart+2], iSize);
+  }
+  pPage->nFree += iOrigSize;
+  return SQLITE_OK;
+}
+
+/*
+** Decode the flags byte (the first byte of the header) for a page
+** and initialize fields of the MemPage structure accordingly.
+**
+** Only the following combinations are supported.  Anything different
+** indicates a corrupt database files:
+**
+**         PTF_ZERODATA
+**         PTF_ZERODATA | PTF_LEAF
+**         PTF_LEAFDATA | PTF_INTKEY
+**         PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF
+*/
+static int decodeFlags(MemPage *pPage, int flagByte){
+  BtShared *pBt;     /* A copy of pPage->pBt */
+
+  assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  pPage->leaf = (u8)(flagByte>>3);  assert( PTF_LEAF == 1<<3 );
+  flagByte &= ~PTF_LEAF;
+  pPage->childPtrSize = 4-4*pPage->leaf;
+  pBt = pPage->pBt;
+  if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
+    /* EVIDENCE-OF: R-03640-13415 A value of 5 means the page is an interior
+    ** table b-tree page. */
+    assert( (PTF_LEAFDATA|PTF_INTKEY)==5 );
+    /* EVIDENCE-OF: R-20501-61796 A value of 13 means the page is a leaf
+    ** table b-tree page. */
+    assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 );
+    pPage->intKey = 1;
+    pPage->intKeyLeaf = pPage->leaf;
+    pPage->noPayload = !pPage->leaf;
+    pPage->maxLocal = pBt->maxLeaf;
+    pPage->minLocal = pBt->minLeaf;
+  }else if( flagByte==PTF_ZERODATA ){
+    /* EVIDENCE-OF: R-27225-53936 A value of 2 means the page is an interior
+    ** index b-tree page. */
+    assert( (PTF_ZERODATA)==2 );
+    /* EVIDENCE-OF: R-16571-11615 A value of 10 means the page is a leaf
+    ** index b-tree page. */
+    assert( (PTF_ZERODATA|PTF_LEAF)==10 );
+    pPage->intKey = 0;
+    pPage->intKeyLeaf = 0;
+    pPage->noPayload = 0;
+    pPage->maxLocal = pBt->maxLocal;
+    pPage->minLocal = pBt->minLocal;
+  }else{
+    /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is
+    ** an error. */
+    return SQLITE_CORRUPT_BKPT;
+  }
+  pPage->max1bytePayload = pBt->max1bytePayload;
+  return SQLITE_OK;
+}
+
+/*
+** Initialize the auxiliary information for a disk block.
+**
+** Return SQLITE_OK on success.  If we see that the page does
+** not contain a well-formed database page, then return 
+** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
+** guarantee that the page is well-formed.  It only shows that
+** we failed to detect any corruption.
+*/
+static int btreeInitPage(MemPage *pPage){
+
+  assert( pPage->pBt!=0 );
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
+  assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
+  assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
+
+  if( !pPage->isInit ){
+    u16 pc;            /* Address of a freeblock within pPage->aData[] */
+    u8 hdr;            /* Offset to beginning of page header */
+    u8 *data;          /* Equal to pPage->aData */
+    BtShared *pBt;        /* The main btree structure */
+    int usableSize;    /* Amount of usable space on each page */
+    u16 cellOffset;    /* Offset from start of page to first cell pointer */
+    int nFree;         /* Number of unused bytes on the page */
+    int top;           /* First byte of the cell content area */
+    int iCellFirst;    /* First allowable cell or freeblock offset */
+    int iCellLast;     /* Last possible cell or freeblock offset */
+
+    pBt = pPage->pBt;
+
+    hdr = pPage->hdrOffset;
+    data = pPage->aData;
+    /* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating
+    ** the b-tree page type. */
+    if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
+    assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
+    pPage->maskPage = (u16)(pBt->pageSize - 1);
+    pPage->nOverflow = 0;
+    usableSize = pBt->usableSize;
+    pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize;
+    pPage->aDataEnd = &data[usableSize];
+    pPage->aCellIdx = &data[cellOffset];
+    /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates
+    ** the start of the cell content area. A zero value for this integer is
+    ** interpreted as 65536. */
+    top = get2byteNotZero(&data[hdr+5]);
+    /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
+    ** number of cells on the page. */
+    pPage->nCell = get2byte(&data[hdr+3]);
+    if( pPage->nCell>MX_CELL(pBt) ){
+      /* To many cells for a single page.  The page must be corrupt */
+      return SQLITE_CORRUPT_BKPT;
+    }
+    testcase( pPage->nCell==MX_CELL(pBt) );
+    /* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only
+    ** possible for a root page of a table that contains no rows) then the
+    ** offset to the cell content area will equal the page size minus the
+    ** bytes of reserved space. */
+    assert( pPage->nCell>0 || top==usableSize || CORRUPT_DB );
+
+    /* A malformed database page might cause us to read past the end
+    ** of page when parsing a cell.  
+    **
+    ** The following block of code checks early to see if a cell extends
+    ** past the end of a page boundary and causes SQLITE_CORRUPT to be 
+    ** returned if it does.
+    */
+    iCellFirst = cellOffset + 2*pPage->nCell;
+    iCellLast = usableSize - 4;
+#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK)
+    {
+      int i;            /* Index into the cell pointer array */
+      int sz;           /* Size of a cell */
+
+      if( !pPage->leaf ) iCellLast--;
+      for(i=0; i<pPage->nCell; i++){
+        pc = get2byte(&data[cellOffset+i*2]);
+        testcase( pc==iCellFirst );
+        testcase( pc==iCellLast );
+        if( pc<iCellFirst || pc>iCellLast ){
+          return SQLITE_CORRUPT_BKPT;
+        }
+        sz = cellSizePtr(pPage, &data[pc]);
+        testcase( pc+sz==usableSize );
+        if( pc+sz>usableSize ){
+          return SQLITE_CORRUPT_BKPT;
+        }
+      }
+      if( !pPage->leaf ) iCellLast++;
+    }  
+#endif
+
+    /* Compute the total free space on the page
+    ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the
+    ** start of the first freeblock on the page, or is zero if there are no
+    ** freeblocks. */
+    pc = get2byte(&data[hdr+1]);
+    nFree = data[hdr+7] + top;  /* Init nFree to non-freeblock free space */
+    while( pc>0 ){
+      u16 next, size;
+      if( pc<iCellFirst || pc>iCellLast ){
+        /* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will
+        ** always be at least one cell before the first freeblock.
+        **
+        ** Or, the freeblock is off the end of the page
+        */
+        return SQLITE_CORRUPT_BKPT; 
+      }
+      next = get2byte(&data[pc]);
+      size = get2byte(&data[pc+2]);
+      if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){
+        /* Free blocks must be in ascending order. And the last byte of
+        ** the free-block must lie on the database page.  */
+        return SQLITE_CORRUPT_BKPT; 
+      }
+      nFree = nFree + size;
+      pc = next;
+    }
+
+    /* At this point, nFree contains the sum of the offset to the start
+    ** of the cell-content area plus the number of free bytes within
+    ** the cell-content area. If this is greater than the usable-size
+    ** of the page, then the page must be corrupted. This check also
+    ** serves to verify that the offset to the start of the cell-content
+    ** area, according to the page header, lies within the page.
+    */
+    if( nFree>usableSize ){
+      return SQLITE_CORRUPT_BKPT; 
+    }
+    pPage->nFree = (u16)(nFree - iCellFirst);
+    pPage->isInit = 1;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Set up a raw page so that it looks like a database page holding
+** no entries.
+*/
+static void zeroPage(MemPage *pPage, int flags){
+  unsigned char *data = pPage->aData;
+  BtShared *pBt = pPage->pBt;
+  u8 hdr = pPage->hdrOffset;
+  u16 first;
+
+  assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno );
+  assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
+  assert( sqlite3PagerGetData(pPage->pDbPage) == data );
+  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  if( pBt->btsFlags & BTS_SECURE_DELETE ){
+    memset(&data[hdr], 0, pBt->usableSize - hdr);
+  }
+  data[hdr] = (char)flags;
+  first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8);
+  memset(&data[hdr+1], 0, 4);
+  data[hdr+7] = 0;
+  put2byte(&data[hdr+5], pBt->usableSize);
+  pPage->nFree = (u16)(pBt->usableSize - first);
+  decodeFlags(pPage, flags);
+  pPage->cellOffset = first;
+  pPage->aDataEnd = &data[pBt->usableSize];
+  pPage->aCellIdx = &data[first];
+  pPage->nOverflow = 0;
+  assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
+  pPage->maskPage = (u16)(pBt->pageSize - 1);
+  pPage->nCell = 0;
+  pPage->isInit = 1;
+}
+
+
+/*
+** Convert a DbPage obtained from the pager into a MemPage used by
+** the btree layer.
+*/
+static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){
+  MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
+  pPage->aData = sqlite3PagerGetData(pDbPage);
+  pPage->pDbPage = pDbPage;
+  pPage->pBt = pBt;
+  pPage->pgno = pgno;
+  pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
+  return pPage; 
+}
+
+/*
+** Get a page from the pager.  Initialize the MemPage.pBt and
+** MemPage.aData elements if needed.
+**
+** If the noContent flag is set, it means that we do not care about
+** the content of the page at this time.  So do not go to the disk
+** to fetch the content.  Just fill in the content with zeros for now.
+** If in the future we call sqlite3PagerWrite() on this page, that
+** means we have started to be concerned about content and the disk
+** read should occur at that point.
+*/
+static int btreeGetPage(
+  BtShared *pBt,       /* The btree */
+  Pgno pgno,           /* Number of the page to fetch */
+  MemPage **ppPage,    /* Return the page in this parameter */
+  int flags            /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */
+){
+  int rc;
+  DbPage *pDbPage;
+
+  assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY );
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags);
+  if( rc ) return rc;
+  *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
+  return SQLITE_OK;
+}
+
+/*
+** Retrieve a page from the pager cache. If the requested page is not
+** already in the pager cache return NULL. Initialize the MemPage.pBt and
+** MemPage.aData elements if needed.
+*/
+static MemPage *btreePageLookup(BtShared *pBt, Pgno pgno){
+  DbPage *pDbPage;
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
+  if( pDbPage ){
+    return btreePageFromDbPage(pDbPage, pgno, pBt);
+  }
+  return 0;
+}
+
+/*
+** Return the size of the database file in pages. If there is any kind of
+** error, return ((unsigned int)-1).
+*/
+static Pgno btreePagecount(BtShared *pBt){
+  return pBt->nPage;
+}
+SQLITE_PRIVATE u32 sqlite3BtreeLastPage(Btree *p){
+  assert( sqlite3BtreeHoldsMutex(p) );
+  assert( ((p->pBt->nPage)&0x8000000)==0 );
+  return btreePagecount(p->pBt);
+}
+
+/*
+** Get a page from the pager and initialize it.  This routine is just a
+** convenience wrapper around separate calls to btreeGetPage() and 
+** btreeInitPage().
+**
+** If an error occurs, then the value *ppPage is set to is undefined. It
+** may remain unchanged, or it may be set to an invalid value.
+*/
+static int getAndInitPage(
+  BtShared *pBt,                  /* The database file */
+  Pgno pgno,                      /* Number of the page to get */
+  MemPage **ppPage,               /* Write the page pointer here */
+  int bReadonly                   /* PAGER_GET_READONLY or 0 */
+){
+  int rc;
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( bReadonly==PAGER_GET_READONLY || bReadonly==0 );
+
+  if( pgno>btreePagecount(pBt) ){
+    rc = SQLITE_CORRUPT_BKPT;
+  }else{
+    rc = btreeGetPage(pBt, pgno, ppPage, bReadonly);
+    if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
+      rc = btreeInitPage(*ppPage);
+      if( rc!=SQLITE_OK ){
+        releasePage(*ppPage);
+      }
+    }
+  }
+
+  testcase( pgno==0 );
+  assert( pgno!=0 || rc==SQLITE_CORRUPT );
+  return rc;
+}
+
+/*
+** Release a MemPage.  This should be called once for each prior
+** call to btreeGetPage.
+*/
+static void releasePage(MemPage *pPage){
+  if( pPage ){
+    assert( pPage->aData );
+    assert( pPage->pBt );
+    assert( pPage->pDbPage!=0 );
+    assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
+    assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
+    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+    sqlite3PagerUnrefNotNull(pPage->pDbPage);
+  }
+}
+
+/*
+** During a rollback, when the pager reloads information into the cache
+** so that the cache is restored to its original state at the start of
+** the transaction, for each page restored this routine is called.
+**
+** This routine needs to reset the extra data section at the end of the
+** page to agree with the restored data.
+*/
+static void pageReinit(DbPage *pData){
+  MemPage *pPage;
+  pPage = (MemPage *)sqlite3PagerGetExtra(pData);
+  assert( sqlite3PagerPageRefcount(pData)>0 );
+  if( pPage->isInit ){
+    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+    pPage->isInit = 0;
+    if( sqlite3PagerPageRefcount(pData)>1 ){
+      /* pPage might not be a btree page;  it might be an overflow page
+      ** or ptrmap page or a free page.  In those cases, the following
+      ** call to btreeInitPage() will likely return SQLITE_CORRUPT.
+      ** But no harm is done by this.  And it is very important that
+      ** btreeInitPage() be called on every btree page so we make
+      ** the call for every page that comes in for re-initing. */
+      btreeInitPage(pPage);
+    }
+  }
+}
+
+/*
+** Invoke the busy handler for a btree.
+*/
+static int btreeInvokeBusyHandler(void *pArg){
+  BtShared *pBt = (BtShared*)pArg;
+  assert( pBt->db );
+  assert( sqlite3_mutex_held(pBt->db->mutex) );
+  return sqlite3InvokeBusyHandler(&pBt->db->busyHandler);
+}
+
+/*
+** Open a database file.
+** 
+** zFilename is the name of the database file.  If zFilename is NULL
+** then an ephemeral database is created.  The ephemeral database might
+** be exclusively in memory, or it might use a disk-based memory cache.
+** Either way, the ephemeral database will be automatically deleted 
+** when sqlite3BtreeClose() is called.
+**
+** If zFilename is ":memory:" then an in-memory database is created
+** that is automatically destroyed when it is closed.
+**
+** The "flags" parameter is a bitmask that might contain bits like
+** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY.
+**
+** If the database is already opened in the same database connection
+** and we are in shared cache mode, then the open will fail with an
+** SQLITE_CONSTRAINT error.  We cannot allow two or more BtShared
+** objects in the same database connection since doing so will lead
+** to problems with locking.
+*/
+SQLITE_PRIVATE int sqlite3BtreeOpen(
+  sqlite3_vfs *pVfs,      /* VFS to use for this b-tree */
+  const char *zFilename,  /* Name of the file containing the BTree database */
+  sqlite3 *db,            /* Associated database handle */
+  Btree **ppBtree,        /* Pointer to new Btree object written here */
+  int flags,              /* Options */
+  int vfsFlags            /* Flags passed through to sqlite3_vfs.xOpen() */
+){
+  BtShared *pBt = 0;             /* Shared part of btree structure */
+  Btree *p;                      /* Handle to return */
+  sqlite3_mutex *mutexOpen = 0;  /* Prevents a race condition. Ticket #3537 */
+  int rc = SQLITE_OK;            /* Result code from this function */
+  u8 nReserve;                   /* Byte of unused space on each page */
+  unsigned char zDbHeader[100];  /* Database header content */
+
+  /* True if opening an ephemeral, temporary database */
+  const int isTempDb = zFilename==0 || zFilename[0]==0;
+
+  /* Set the variable isMemdb to true for an in-memory database, or 
+  ** false for a file-based database.
+  */
+#ifdef SQLITE_OMIT_MEMORYDB
+  const int isMemdb = 0;
+#else
+  const int isMemdb = (zFilename && strcmp(zFilename, ":memory:")==0)
+                       || (isTempDb && sqlite3TempInMemory(db))
+                       || (vfsFlags & SQLITE_OPEN_MEMORY)!=0;
+#endif
+
+  assert( db!=0 );
+  assert( pVfs!=0 );
+  assert( sqlite3_mutex_held(db->mutex) );
+  assert( (flags&0xff)==flags );   /* flags fit in 8 bits */
+
+  /* Only a BTREE_SINGLE database can be BTREE_UNORDERED */
+  assert( (flags & BTREE_UNORDERED)==0 || (flags & BTREE_SINGLE)!=0 );
+
+  /* A BTREE_SINGLE database is always a temporary and/or ephemeral */
+  assert( (flags & BTREE_SINGLE)==0 || isTempDb );
+
+  if( isMemdb ){
+    flags |= BTREE_MEMORY;
+  }
+  if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){
+    vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB;
+  }
+  p = sqlite3MallocZero(sizeof(Btree));
+  if( !p ){
+    return SQLITE_NOMEM;
+  }
+  p->inTrans = TRANS_NONE;
+  p->db = db;
+#ifndef SQLITE_OMIT_SHARED_CACHE
+  p->lock.pBtree = p;
+  p->lock.iTable = 1;
+#endif
+
+#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
+  /*
+  ** If this Btree is a candidate for shared cache, try to find an
+  ** existing BtShared object that we can share with
+  */
+  if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){
+    if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
+      int nFullPathname = pVfs->mxPathname+1;
+      char *zFullPathname = sqlite3Malloc(nFullPathname);
+      MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
+      p->sharable = 1;
+      if( !zFullPathname ){
+        sqlite3_free(p);
+        return SQLITE_NOMEM;
+      }
+      if( isMemdb ){
+        memcpy(zFullPathname, zFilename, sqlite3Strlen30(zFilename)+1);
+      }else{
+        rc = sqlite3OsFullPathname(pVfs, zFilename,
+                                   nFullPathname, zFullPathname);
+        if( rc ){
+          sqlite3_free(zFullPathname);
+          sqlite3_free(p);
+          return rc;
+        }
+      }
+#if SQLITE_THREADSAFE
+      mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN);
+      sqlite3_mutex_enter(mutexOpen);
+      mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+      sqlite3_mutex_enter(mutexShared);
+#endif
+      for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){
+        assert( pBt->nRef>0 );
+        if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0))
+                 && sqlite3PagerVfs(pBt->pPager)==pVfs ){
+          int iDb;
+          for(iDb=db->nDb-1; iDb>=0; iDb--){
+            Btree *pExisting = db->aDb[iDb].pBt;
+            if( pExisting && pExisting->pBt==pBt ){
+              sqlite3_mutex_leave(mutexShared);
+              sqlite3_mutex_leave(mutexOpen);
+              sqlite3_free(zFullPathname);
+              sqlite3_free(p);
+              return SQLITE_CONSTRAINT;
+            }
+          }
+          p->pBt = pBt;
+          pBt->nRef++;
+          break;
+        }
+      }
+      sqlite3_mutex_leave(mutexShared);
+      sqlite3_free(zFullPathname);
+    }
+#ifdef SQLITE_DEBUG
+    else{
+      /* In debug mode, we mark all persistent databases as sharable
+      ** even when they are not.  This exercises the locking code and
+      ** gives more opportunity for asserts(sqlite3_mutex_held())
+      ** statements to find locking problems.
+      */
+      p->sharable = 1;
+    }
+#endif
+  }
+#endif
+  if( pBt==0 ){
+    /*
+    ** The following asserts make sure that structures used by the btree are
+    ** the right size.  This is to guard against size changes that result
+    ** when compiling on a different architecture.
+    */
+    assert( sizeof(i64)==8 || sizeof(i64)==4 );
+    assert( sizeof(u64)==8 || sizeof(u64)==4 );
+    assert( sizeof(u32)==4 );
+    assert( sizeof(u16)==2 );
+    assert( sizeof(Pgno)==4 );
+  
+    pBt = sqlite3MallocZero( sizeof(*pBt) );
+    if( pBt==0 ){
+      rc = SQLITE_NOMEM;
+      goto btree_open_out;
+    }
+    rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
+                          EXTRA_SIZE, flags, vfsFlags, pageReinit);
+    if( rc==SQLITE_OK ){
+      sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap);
+      rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
+    }
+    if( rc!=SQLITE_OK ){
+      goto btree_open_out;
+    }
+    pBt->openFlags = (u8)flags;
+    pBt->db = db;
+    sqlite3PagerSetBusyhandler(pBt->pPager, btreeInvokeBusyHandler, pBt);
+    p->pBt = pBt;
+  
+    pBt->pCursor = 0;
+    pBt->pPage1 = 0;
+    if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY;
+#ifdef SQLITE_SECURE_DELETE
+    pBt->btsFlags |= BTS_SECURE_DELETE;
+#endif
+    /* EVIDENCE-OF: R-51873-39618 The page size for a database file is
+    ** determined by the 2-byte integer located at an offset of 16 bytes from
+    ** the beginning of the database file. */
+    pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16);
+    if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
+         || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
+      pBt->pageSize = 0;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+      /* If the magic name ":memory:" will create an in-memory database, then
+      ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if
+      ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if
+      ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a
+      ** regular file-name. In this case the auto-vacuum applies as per normal.
+      */
+      if( zFilename && !isMemdb ){
+        pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0);
+        pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0);
+      }
+#endif
+      nReserve = 0;
+    }else{
+      /* EVIDENCE-OF: R-37497-42412 The size of the reserved region is
+      ** determined by the one-byte unsigned integer found at an offset of 20
+      ** into the database file header. */
+      nReserve = zDbHeader[20];
+      pBt->btsFlags |= BTS_PAGESIZE_FIXED;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+      pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
+      pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0);
+#endif
+    }
+    rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
+    if( rc ) goto btree_open_out;
+    pBt->usableSize = pBt->pageSize - nReserve;
+    assert( (pBt->pageSize & 7)==0 );  /* 8-byte alignment of pageSize */
+   
+#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
+    /* Add the new BtShared object to the linked list sharable BtShareds.
+    */
+    if( p->sharable ){
+      MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
+      pBt->nRef = 1;
+      MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);)
+      if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){
+        pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST);
+        if( pBt->mutex==0 ){
+          rc = SQLITE_NOMEM;
+          db->mallocFailed = 0;
+          goto btree_open_out;
+        }
+      }
+      sqlite3_mutex_enter(mutexShared);
+      pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList);
+      GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt;
+      sqlite3_mutex_leave(mutexShared);
+    }
+#endif
+  }
+
+#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
+  /* If the new Btree uses a sharable pBtShared, then link the new
+  ** Btree into the list of all sharable Btrees for the same connection.
+  ** The list is kept in ascending order by pBt address.
+  */
+  if( p->sharable ){
+    int i;
+    Btree *pSib;
+    for(i=0; i<db->nDb; i++){
+      if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){
+        while( pSib->pPrev ){ pSib = pSib->pPrev; }
+        if( p->pBt<pSib->pBt ){
+          p->pNext = pSib;
+          p->pPrev = 0;
+          pSib->pPrev = p;
+        }else{
+          while( pSib->pNext && pSib->pNext->pBt<p->pBt ){
+            pSib = pSib->pNext;
+          }
+          p->pNext = pSib->pNext;
+          p->pPrev = pSib;
+          if( p->pNext ){
+            p->pNext->pPrev = p;
+          }
+          pSib->pNext = p;
+        }
+        break;
+      }
+    }
+  }
+#endif
+  *ppBtree = p;
+
+btree_open_out:
+  if( rc!=SQLITE_OK ){
+    if( pBt && pBt->pPager ){
+      sqlite3PagerClose(pBt->pPager);
+    }
+    sqlite3_free(pBt);
+    sqlite3_free(p);
+    *ppBtree = 0;
+  }else{
+    /* If the B-Tree was successfully opened, set the pager-cache size to the
+    ** default value. Except, when opening on an existing shared pager-cache,
+    ** do not change the pager-cache size.
+    */
+    if( sqlite3BtreeSchema(p, 0, 0)==0 ){
+      sqlite3PagerSetCachesize(p->pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE);
+    }
+  }
+  if( mutexOpen ){
+    assert( sqlite3_mutex_held(mutexOpen) );
+    sqlite3_mutex_leave(mutexOpen);
+  }
+  return rc;
+}
+
+/*
+** Decrement the BtShared.nRef counter.  When it reaches zero,
+** remove the BtShared structure from the sharing list.  Return
+** true if the BtShared.nRef counter reaches zero and return
+** false if it is still positive.
+*/
+static int removeFromSharingList(BtShared *pBt){
+#ifndef SQLITE_OMIT_SHARED_CACHE
+  MUTEX_LOGIC( sqlite3_mutex *pMaster; )
+  BtShared *pList;
+  int removed = 0;
+
+  assert( sqlite3_mutex_notheld(pBt->mutex) );
+  MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); )
+  sqlite3_mutex_enter(pMaster);
+  pBt->nRef--;
+  if( pBt->nRef<=0 ){
+    if( GLOBAL(BtShared*,sqlite3SharedCacheList)==pBt ){
+      GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt->pNext;
+    }else{
+      pList = GLOBAL(BtShared*,sqlite3SharedCacheList);
+      while( ALWAYS(pList) && pList->pNext!=pBt ){
+        pList=pList->pNext;
+      }
+      if( ALWAYS(pList) ){
+        pList->pNext = pBt->pNext;
+      }
+    }
+    if( SQLITE_THREADSAFE ){
+      sqlite3_mutex_free(pBt->mutex);
+    }
+    removed = 1;
+  }
+  sqlite3_mutex_leave(pMaster);
+  return removed;
+#else
+  return 1;
+#endif
+}
+
+/*
+** Make sure pBt->pTmpSpace points to an allocation of 
+** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child
+** pointer.
+*/
+static void allocateTempSpace(BtShared *pBt){
+  if( !pBt->pTmpSpace ){
+    pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize );
+
+    /* One of the uses of pBt->pTmpSpace is to format cells before
+    ** inserting them into a leaf page (function fillInCell()). If
+    ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes
+    ** by the various routines that manipulate binary cells. Which
+    ** can mean that fillInCell() only initializes the first 2 or 3
+    ** bytes of pTmpSpace, but that the first 4 bytes are copied from
+    ** it into a database page. This is not actually a problem, but it
+    ** does cause a valgrind error when the 1 or 2 bytes of unitialized 
+    ** data is passed to system call write(). So to avoid this error,
+    ** zero the first 4 bytes of temp space here.
+    **
+    ** Also:  Provide four bytes of initialized space before the
+    ** beginning of pTmpSpace as an area available to prepend the
+    ** left-child pointer to the beginning of a cell.
+    */
+    if( pBt->pTmpSpace ){
+      memset(pBt->pTmpSpace, 0, 8);
+      pBt->pTmpSpace += 4;
+    }
+  }
+}
+
+/*
+** Free the pBt->pTmpSpace allocation
+*/
+static void freeTempSpace(BtShared *pBt){
+  if( pBt->pTmpSpace ){
+    pBt->pTmpSpace -= 4;
+    sqlite3PageFree(pBt->pTmpSpace);
+    pBt->pTmpSpace = 0;
+  }
+}
+
+/*
+** Close an open database and invalidate all cursors.
+*/
+SQLITE_PRIVATE int sqlite3BtreeClose(Btree *p){
+  BtShared *pBt = p->pBt;
+  BtCursor *pCur;
+
+  /* Close all cursors opened via this handle.  */
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  sqlite3BtreeEnter(p);
+  pCur = pBt->pCursor;
+  while( pCur ){
+    BtCursor *pTmp = pCur;
+    pCur = pCur->pNext;
+    if( pTmp->pBtree==p ){
+      sqlite3BtreeCloseCursor(pTmp);
+    }
+  }
+
+  /* Rollback any active transaction and free the handle structure.
+  ** The call to sqlite3BtreeRollback() drops any table-locks held by
+  ** this handle.
+  */
+  sqlite3BtreeRollback(p, SQLITE_OK, 0);
+  sqlite3BtreeLeave(p);
+
+  /* If there are still other outstanding references to the shared-btree
+  ** structure, return now. The remainder of this procedure cleans 
+  ** up the shared-btree.
+  */
+  assert( p->wantToLock==0 && p->locked==0 );
+  if( !p->sharable || removeFromSharingList(pBt) ){
+    /* The pBt is no longer on the sharing list, so we can access
+    ** it without having to hold the mutex.
+    **
+    ** Clean out and delete the BtShared object.
+    */
+    assert( !pBt->pCursor );
+    sqlite3PagerClose(pBt->pPager);
+    if( pBt->xFreeSchema && pBt->pSchema ){
+      pBt->xFreeSchema(pBt->pSchema);
+    }
+    sqlite3DbFree(0, pBt->pSchema);
+    freeTempSpace(pBt);
+    sqlite3_free(pBt);
+  }
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+  assert( p->wantToLock==0 );
+  assert( p->locked==0 );
+  if( p->pPrev ) p->pPrev->pNext = p->pNext;
+  if( p->pNext ) p->pNext->pPrev = p->pPrev;
+#endif
+
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+/*
+** Change the limit on the number of pages allowed in the cache.
+**
+** The maximum number of cache pages is set to the absolute
+** value of mxPage.  If mxPage is negative, the pager will
+** operate asynchronously - it will not stop to do fsync()s
+** to insure data is written to the disk surface before
+** continuing.  Transactions still work if synchronous is off,
+** and the database cannot be corrupted if this program
+** crashes.  But if the operating system crashes or there is
+** an abrupt power failure when synchronous is off, the database
+** could be left in an inconsistent and unrecoverable state.
+** Synchronous is on by default so database corruption is not
+** normally a worry.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
+  BtShared *pBt = p->pBt;
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  sqlite3BtreeEnter(p);
+  sqlite3PagerSetCachesize(pBt->pPager, mxPage);
+  sqlite3BtreeLeave(p);
+  return SQLITE_OK;
+}
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/*
+** Change the limit on the amount of the database file that may be
+** memory mapped.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){
+  BtShared *pBt = p->pBt;
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  sqlite3BtreeEnter(p);
+  sqlite3PagerSetMmapLimit(pBt->pPager, szMmap);
+  sqlite3BtreeLeave(p);
+  return SQLITE_OK;
+}
+#endif /* SQLITE_MAX_MMAP_SIZE>0 */
+
+/*
+** Change the way data is synced to disk in order to increase or decrease
+** how well the database resists damage due to OS crashes and power
+** failures.  Level 1 is the same as asynchronous (no syncs() occur and
+** there is a high probability of damage)  Level 2 is the default.  There
+** is a very low but non-zero probability of damage.  Level 3 reduces the
+** probability of damage to near zero but with a write performance reduction.
+*/
+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
+SQLITE_PRIVATE int sqlite3BtreeSetPagerFlags(
+  Btree *p,              /* The btree to set the safety level on */
+  unsigned pgFlags       /* Various PAGER_* flags */
+){
+  BtShared *pBt = p->pBt;
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  sqlite3BtreeEnter(p);
+  sqlite3PagerSetFlags(pBt->pPager, pgFlags);
+  sqlite3BtreeLeave(p);
+  return SQLITE_OK;
+}
+#endif
+
+/*
+** Return TRUE if the given btree is set to safety level 1.  In other
+** words, return TRUE if no sync() occurs on the disk files.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSyncDisabled(Btree *p){
+  BtShared *pBt = p->pBt;
+  int rc;
+  assert( sqlite3_mutex_held(p->db->mutex) );  
+  sqlite3BtreeEnter(p);
+  assert( pBt && pBt->pPager );
+  rc = sqlite3PagerNosync(pBt->pPager);
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** Change the default pages size and the number of reserved bytes per page.
+** Or, if the page size has already been fixed, return SQLITE_READONLY 
+** without changing anything.
+**
+** The page size must be a power of 2 between 512 and 65536.  If the page
+** size supplied does not meet this constraint then the page size is not
+** changed.
+**
+** Page sizes are constrained to be a power of two so that the region
+** of the database file used for locking (beginning at PENDING_BYTE,
+** the first byte past the 1GB boundary, 0x40000000) needs to occur
+** at the beginning of a page.
+**
+** If parameter nReserve is less than zero, then the number of reserved
+** bytes per page is left unchanged.
+**
+** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size
+** and autovacuum mode can no longer be changed.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){
+  int rc = SQLITE_OK;
+  BtShared *pBt = p->pBt;
+  assert( nReserve>=-1 && nReserve<=255 );
+  sqlite3BtreeEnter(p);
+  if( pBt->btsFlags & BTS_PAGESIZE_FIXED ){
+    sqlite3BtreeLeave(p);
+    return SQLITE_READONLY;
+  }
+  if( nReserve<0 ){
+    nReserve = pBt->pageSize - pBt->usableSize;
+  }
+  assert( nReserve>=0 && nReserve<=255 );
+  if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE &&
+        ((pageSize-1)&pageSize)==0 ){
+    assert( (pageSize & 7)==0 );
+    assert( !pBt->pPage1 && !pBt->pCursor );
+    pBt->pageSize = (u32)pageSize;
+    freeTempSpace(pBt);
+  }
+  rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
+  pBt->usableSize = pBt->pageSize - (u16)nReserve;
+  if( iFix ) pBt->btsFlags |= BTS_PAGESIZE_FIXED;
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** Return the currently defined page size
+*/
+SQLITE_PRIVATE int sqlite3BtreeGetPageSize(Btree *p){
+  return p->pBt->pageSize;
+}
+
+#if defined(SQLITE_HAS_CODEC) || defined(SQLITE_DEBUG)
+/*
+** This function is similar to sqlite3BtreeGetReserve(), except that it
+** may only be called if it is guaranteed that the b-tree mutex is already
+** held.
+**
+** This is useful in one special case in the backup API code where it is
+** known that the shared b-tree mutex is held, but the mutex on the 
+** database handle that owns *p is not. In this case if sqlite3BtreeEnter()
+** were to be called, it might collide with some other operation on the
+** database handle that owns *p, causing undefined behavior.
+*/
+SQLITE_PRIVATE int sqlite3BtreeGetReserveNoMutex(Btree *p){
+  assert( sqlite3_mutex_held(p->pBt->mutex) );
+  return p->pBt->pageSize - p->pBt->usableSize;
+}
+#endif /* SQLITE_HAS_CODEC || SQLITE_DEBUG */
+
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM)
+/*
+** Return the number of bytes of space at the end of every page that
+** are intentually left unused.  This is the "reserved" space that is
+** sometimes used by extensions.
+*/
+SQLITE_PRIVATE int sqlite3BtreeGetReserve(Btree *p){
+  int n;
+  sqlite3BtreeEnter(p);
+  n = p->pBt->pageSize - p->pBt->usableSize;
+  sqlite3BtreeLeave(p);
+  return n;
+}
+
+/*
+** Set the maximum page count for a database if mxPage is positive.
+** No changes are made if mxPage is 0 or negative.
+** Regardless of the value of mxPage, return the maximum page count.
+*/
+SQLITE_PRIVATE int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){
+  int n;
+  sqlite3BtreeEnter(p);
+  n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage);
+  sqlite3BtreeLeave(p);
+  return n;
+}
+
+/*
+** Set the BTS_SECURE_DELETE flag if newFlag is 0 or 1.  If newFlag is -1,
+** then make no changes.  Always return the value of the BTS_SECURE_DELETE
+** setting after the change.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSecureDelete(Btree *p, int newFlag){
+  int b;
+  if( p==0 ) return 0;
+  sqlite3BtreeEnter(p);
+  if( newFlag>=0 ){
+    p->pBt->btsFlags &= ~BTS_SECURE_DELETE;
+    if( newFlag ) p->pBt->btsFlags |= BTS_SECURE_DELETE;
+  } 
+  b = (p->pBt->btsFlags & BTS_SECURE_DELETE)!=0;
+  sqlite3BtreeLeave(p);
+  return b;
+}
+#endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */
+
+/*
+** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
+** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
+** is disabled. The default value for the auto-vacuum property is 
+** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
+#ifdef SQLITE_OMIT_AUTOVACUUM
+  return SQLITE_READONLY;
+#else
+  BtShared *pBt = p->pBt;
+  int rc = SQLITE_OK;
+  u8 av = (u8)autoVacuum;
+
+  sqlite3BtreeEnter(p);
+  if( (pBt->btsFlags & BTS_PAGESIZE_FIXED)!=0 && (av ?1:0)!=pBt->autoVacuum ){
+    rc = SQLITE_READONLY;
+  }else{
+    pBt->autoVacuum = av ?1:0;
+    pBt->incrVacuum = av==2 ?1:0;
+  }
+  sqlite3BtreeLeave(p);
+  return rc;
+#endif
+}
+
+/*
+** Return the value of the 'auto-vacuum' property. If auto-vacuum is 
+** enabled 1 is returned. Otherwise 0.
+*/
+SQLITE_PRIVATE int sqlite3BtreeGetAutoVacuum(Btree *p){
+#ifdef SQLITE_OMIT_AUTOVACUUM
+  return BTREE_AUTOVACUUM_NONE;
+#else
+  int rc;
+  sqlite3BtreeEnter(p);
+  rc = (
+    (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE:
+    (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL:
+    BTREE_AUTOVACUUM_INCR
+  );
+  sqlite3BtreeLeave(p);
+  return rc;
+#endif
+}
+
+
+/*
+** Get a reference to pPage1 of the database file.  This will
+** also acquire a readlock on that file.
+**
+** SQLITE_OK is returned on success.  If the file is not a
+** well-formed database file, then SQLITE_CORRUPT is returned.
+** SQLITE_BUSY is returned if the database is locked.  SQLITE_NOMEM
+** is returned if we run out of memory. 
+*/
+static int lockBtree(BtShared *pBt){
+  int rc;              /* Result code from subfunctions */
+  MemPage *pPage1;     /* Page 1 of the database file */
+  int nPage;           /* Number of pages in the database */
+  int nPageFile = 0;   /* Number of pages in the database file */
+  int nPageHeader;     /* Number of pages in the database according to hdr */
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( pBt->pPage1==0 );
+  rc = sqlite3PagerSharedLock(pBt->pPager);
+  if( rc!=SQLITE_OK ) return rc;
+  rc = btreeGetPage(pBt, 1, &pPage1, 0);
+  if( rc!=SQLITE_OK ) return rc;
+
+  /* Do some checking to help insure the file we opened really is
+  ** a valid database file. 
+  */
+  nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData);
+  sqlite3PagerPagecount(pBt->pPager, &nPageFile);
+  if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){
+    nPage = nPageFile;
+  }
+  if( nPage>0 ){
+    u32 pageSize;
+    u32 usableSize;
+    u8 *page1 = pPage1->aData;
+    rc = SQLITE_NOTADB;
+    /* EVIDENCE-OF: R-43737-39999 Every valid SQLite database file begins
+    ** with the following 16 bytes (in hex): 53 51 4c 69 74 65 20 66 6f 72 6d
+    ** 61 74 20 33 00. */
+    if( memcmp(page1, zMagicHeader, 16)!=0 ){
+      goto page1_init_failed;
+    }
+
+#ifdef SQLITE_OMIT_WAL
+    if( page1[18]>1 ){
+      pBt->btsFlags |= BTS_READ_ONLY;
+    }
+    if( page1[19]>1 ){
+      goto page1_init_failed;
+    }
+#else
+    if( page1[18]>2 ){
+      pBt->btsFlags |= BTS_READ_ONLY;
+    }
+    if( page1[19]>2 ){
+      goto page1_init_failed;
+    }
+
+    /* If the write version is set to 2, this database should be accessed
+    ** in WAL mode. If the log is not already open, open it now. Then 
+    ** return SQLITE_OK and return without populating BtShared.pPage1.
+    ** The caller detects this and calls this function again. This is
+    ** required as the version of page 1 currently in the page1 buffer
+    ** may not be the latest version - there may be a newer one in the log
+    ** file.
+    */
+    if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){
+      int isOpen = 0;
+      rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen);
+      if( rc!=SQLITE_OK ){
+        goto page1_init_failed;
+      }else if( isOpen==0 ){
+        releasePage(pPage1);
+        return SQLITE_OK;
+      }
+      rc = SQLITE_NOTADB;
+    }
+#endif
+
+    /* EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload
+    ** fractions and the leaf payload fraction values must be 64, 32, and 32.
+    **
+    ** The original design allowed these amounts to vary, but as of
+    ** version 3.6.0, we require them to be fixed.
+    */
+    if( memcmp(&page1[21], "\100\040\040",3)!=0 ){
+      goto page1_init_failed;
+    }
+    /* EVIDENCE-OF: R-51873-39618 The page size for a database file is
+    ** determined by the 2-byte integer located at an offset of 16 bytes from
+    ** the beginning of the database file. */
+    pageSize = (page1[16]<<8) | (page1[17]<<16);
+    /* EVIDENCE-OF: R-25008-21688 The size of a page is a power of two
+    ** between 512 and 65536 inclusive. */
+    if( ((pageSize-1)&pageSize)!=0
+     || pageSize>SQLITE_MAX_PAGE_SIZE 
+     || pageSize<=256 
+    ){
+      goto page1_init_failed;
+    }
+    assert( (pageSize & 7)==0 );
+    /* EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte
+    ** integer at offset 20 is the number of bytes of space at the end of
+    ** each page to reserve for extensions. 
+    **
+    ** EVIDENCE-OF: R-37497-42412 The size of the reserved region is
+    ** determined by the one-byte unsigned integer found at an offset of 20
+    ** into the database file header. */
+    usableSize = pageSize - page1[20];
+    if( (u32)pageSize!=pBt->pageSize ){
+      /* After reading the first page of the database assuming a page size
+      ** of BtShared.pageSize, we have discovered that the page-size is
+      ** actually pageSize. Unlock the database, leave pBt->pPage1 at
+      ** zero and return SQLITE_OK. The caller will call this function
+      ** again with the correct page-size.
+      */
+      releasePage(pPage1);
+      pBt->usableSize = usableSize;
+      pBt->pageSize = pageSize;
+      freeTempSpace(pBt);
+      rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize,
+                                   pageSize-usableSize);
+      return rc;
+    }
+    if( (pBt->db->flags & SQLITE_RecoveryMode)==0 && nPage>nPageFile ){
+      rc = SQLITE_CORRUPT_BKPT;
+      goto page1_init_failed;
+    }
+    /* EVIDENCE-OF: R-28312-64704 However, the usable size is not allowed to
+    ** be less than 480. In other words, if the page size is 512, then the
+    ** reserved space size cannot exceed 32. */
+    if( usableSize<480 ){
+      goto page1_init_failed;
+    }
+    pBt->pageSize = pageSize;
+    pBt->usableSize = usableSize;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0);
+    pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0);
+#endif
+  }
+
+  /* maxLocal is the maximum amount of payload to store locally for
+  ** a cell.  Make sure it is small enough so that at least minFanout
+  ** cells can will fit on one page.  We assume a 10-byte page header.
+  ** Besides the payload, the cell must store:
+  **     2-byte pointer to the cell
+  **     4-byte child pointer
+  **     9-byte nKey value
+  **     4-byte nData value
+  **     4-byte overflow page pointer
+  ** So a cell consists of a 2-byte pointer, a header which is as much as
+  ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
+  ** page pointer.
+  */
+  pBt->maxLocal = (u16)((pBt->usableSize-12)*64/255 - 23);
+  pBt->minLocal = (u16)((pBt->usableSize-12)*32/255 - 23);
+  pBt->maxLeaf = (u16)(pBt->usableSize - 35);
+  pBt->minLeaf = (u16)((pBt->usableSize-12)*32/255 - 23);
+  if( pBt->maxLocal>127 ){
+    pBt->max1bytePayload = 127;
+  }else{
+    pBt->max1bytePayload = (u8)pBt->maxLocal;
+  }
+  assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) );
+  pBt->pPage1 = pPage1;
+  pBt->nPage = nPage;
+  return SQLITE_OK;
+
+page1_init_failed:
+  releasePage(pPage1);
+  pBt->pPage1 = 0;
+  return rc;
+}
+
+#ifndef NDEBUG
+/*
+** Return the number of cursors open on pBt. This is for use
+** in assert() expressions, so it is only compiled if NDEBUG is not
+** defined.
+**
+** Only write cursors are counted if wrOnly is true.  If wrOnly is
+** false then all cursors are counted.
+**
+** For the purposes of this routine, a cursor is any cursor that
+** is capable of reading or writing to the database.  Cursors that
+** have been tripped into the CURSOR_FAULT state are not counted.
+*/
+static int countValidCursors(BtShared *pBt, int wrOnly){
+  BtCursor *pCur;
+  int r = 0;
+  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
+    if( (wrOnly==0 || (pCur->curFlags & BTCF_WriteFlag)!=0)
+     && pCur->eState!=CURSOR_FAULT ) r++; 
+  }
+  return r;
+}
+#endif
+
+/*
+** If there are no outstanding cursors and we are not in the middle
+** of a transaction but there is a read lock on the database, then
+** this routine unrefs the first page of the database file which 
+** has the effect of releasing the read lock.
+**
+** If there is a transaction in progress, this routine is a no-op.
+*/
+static void unlockBtreeIfUnused(BtShared *pBt){
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE );
+  if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){
+    MemPage *pPage1 = pBt->pPage1;
+    assert( pPage1->aData );
+    assert( sqlite3PagerRefcount(pBt->pPager)==1 );
+    pBt->pPage1 = 0;
+    releasePage(pPage1);
+  }
+}
+
+/*
+** If pBt points to an empty file then convert that empty file
+** into a new empty database by initializing the first page of
+** the database.
+*/
+static int newDatabase(BtShared *pBt){
+  MemPage *pP1;
+  unsigned char *data;
+  int rc;
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  if( pBt->nPage>0 ){
+    return SQLITE_OK;
+  }
+  pP1 = pBt->pPage1;
+  assert( pP1!=0 );
+  data = pP1->aData;
+  rc = sqlite3PagerWrite(pP1->pDbPage);
+  if( rc ) return rc;
+  memcpy(data, zMagicHeader, sizeof(zMagicHeader));
+  assert( sizeof(zMagicHeader)==16 );
+  data[16] = (u8)((pBt->pageSize>>8)&0xff);
+  data[17] = (u8)((pBt->pageSize>>16)&0xff);
+  data[18] = 1;
+  data[19] = 1;
+  assert( pBt->usableSize<=pBt->pageSize && pBt->usableSize+255>=pBt->pageSize);
+  data[20] = (u8)(pBt->pageSize - pBt->usableSize);
+  data[21] = 64;
+  data[22] = 32;
+  data[23] = 32;
+  memset(&data[24], 0, 100-24);
+  zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA );
+  pBt->btsFlags |= BTS_PAGESIZE_FIXED;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 );
+  assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 );
+  put4byte(&data[36 + 4*4], pBt->autoVacuum);
+  put4byte(&data[36 + 7*4], pBt->incrVacuum);
+#endif
+  pBt->nPage = 1;
+  data[31] = 1;
+  return SQLITE_OK;
+}
+
+/*
+** Initialize the first page of the database file (creating a database
+** consisting of a single page and no schema objects). Return SQLITE_OK
+** if successful, or an SQLite error code otherwise.
+*/
+SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p){
+  int rc;
+  sqlite3BtreeEnter(p);
+  p->pBt->nPage = 0;
+  rc = newDatabase(p->pBt);
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** Attempt to start a new transaction. A write-transaction
+** is started if the second argument is nonzero, otherwise a read-
+** transaction.  If the second argument is 2 or more and exclusive
+** transaction is started, meaning that no other process is allowed
+** to access the database.  A preexisting transaction may not be
+** upgraded to exclusive by calling this routine a second time - the
+** exclusivity flag only works for a new transaction.
+**
+** A write-transaction must be started before attempting any 
+** changes to the database.  None of the following routines 
+** will work unless a transaction is started first:
+**
+**      sqlite3BtreeCreateTable()
+**      sqlite3BtreeCreateIndex()
+**      sqlite3BtreeClearTable()
+**      sqlite3BtreeDropTable()
+**      sqlite3BtreeInsert()
+**      sqlite3BtreeDelete()
+**      sqlite3BtreeUpdateMeta()
+**
+** If an initial attempt to acquire the lock fails because of lock contention
+** and the database was previously unlocked, then invoke the busy handler
+** if there is one.  But if there was previously a read-lock, do not
+** invoke the busy handler - just return SQLITE_BUSY.  SQLITE_BUSY is 
+** returned when there is already a read-lock in order to avoid a deadlock.
+**
+** Suppose there are two processes A and B.  A has a read lock and B has
+** a reserved lock.  B tries to promote to exclusive but is blocked because
+** of A's read lock.  A tries to promote to reserved but is blocked by B.
+** One or the other of the two processes must give way or there can be
+** no progress.  By returning SQLITE_BUSY and not invoking the busy callback
+** when A already has a read lock, we encourage A to give up and let B
+** proceed.
+*/
+SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
+  sqlite3 *pBlock = 0;
+  BtShared *pBt = p->pBt;
+  int rc = SQLITE_OK;
+
+  sqlite3BtreeEnter(p);
+  btreeIntegrity(p);
+
+  /* If the btree is already in a write-transaction, or it
+  ** is already in a read-transaction and a read-transaction
+  ** is requested, this is a no-op.
+  */
+  if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
+    goto trans_begun;
+  }
+  assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 );
+
+  /* Write transactions are not possible on a read-only database */
+  if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){
+    rc = SQLITE_READONLY;
+    goto trans_begun;
+  }
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+  /* If another database handle has already opened a write transaction 
+  ** on this shared-btree structure and a second write transaction is
+  ** requested, return SQLITE_LOCKED.
+  */
+  if( (wrflag && pBt->inTransaction==TRANS_WRITE)
+   || (pBt->btsFlags & BTS_PENDING)!=0
+  ){
+    pBlock = pBt->pWriter->db;
+  }else if( wrflag>1 ){
+    BtLock *pIter;
+    for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
+      if( pIter->pBtree!=p ){
+        pBlock = pIter->pBtree->db;
+        break;
+      }
+    }
+  }
+  if( pBlock ){
+    sqlite3ConnectionBlocked(p->db, pBlock);
+    rc = SQLITE_LOCKED_SHAREDCACHE;
+    goto trans_begun;
+  }
+#endif
+
+  /* Any read-only or read-write transaction implies a read-lock on 
+  ** page 1. So if some other shared-cache client already has a write-lock 
+  ** on page 1, the transaction cannot be opened. */
+  rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK);
+  if( SQLITE_OK!=rc ) goto trans_begun;
+
+  pBt->btsFlags &= ~BTS_INITIALLY_EMPTY;
+  if( pBt->nPage==0 ) pBt->btsFlags |= BTS_INITIALLY_EMPTY;
+  do {
+    /* Call lockBtree() until either pBt->pPage1 is populated or
+    ** lockBtree() returns something other than SQLITE_OK. lockBtree()
+    ** may return SQLITE_OK but leave pBt->pPage1 set to 0 if after
+    ** reading page 1 it discovers that the page-size of the database 
+    ** file is not pBt->pageSize. In this case lockBtree() will update
+    ** pBt->pageSize to the page-size of the file on disk.
+    */
+    while( pBt->pPage1==0 && SQLITE_OK==(rc = lockBtree(pBt)) );
+
+    if( rc==SQLITE_OK && wrflag ){
+      if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){
+        rc = SQLITE_READONLY;
+      }else{
+        rc = sqlite3PagerBegin(pBt->pPager,wrflag>1,sqlite3TempInMemory(p->db));
+        if( rc==SQLITE_OK ){
+          rc = newDatabase(pBt);
+        }
+      }
+    }
+  
+    if( rc!=SQLITE_OK ){
+      unlockBtreeIfUnused(pBt);
+    }
+  }while( (rc&0xFF)==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
+          btreeInvokeBusyHandler(pBt) );
+
+  if( rc==SQLITE_OK ){
+    if( p->inTrans==TRANS_NONE ){
+      pBt->nTransaction++;
+#ifndef SQLITE_OMIT_SHARED_CACHE
+      if( p->sharable ){
+        assert( p->lock.pBtree==p && p->lock.iTable==1 );
+        p->lock.eLock = READ_LOCK;
+        p->lock.pNext = pBt->pLock;
+        pBt->pLock = &p->lock;
+      }
+#endif
+    }
+    p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
+    if( p->inTrans>pBt->inTransaction ){
+      pBt->inTransaction = p->inTrans;
+    }
+    if( wrflag ){
+      MemPage *pPage1 = pBt->pPage1;
+#ifndef SQLITE_OMIT_SHARED_CACHE
+      assert( !pBt->pWriter );
+      pBt->pWriter = p;
+      pBt->btsFlags &= ~BTS_EXCLUSIVE;
+      if( wrflag>1 ) pBt->btsFlags |= BTS_EXCLUSIVE;
+#endif
+
+      /* If the db-size header field is incorrect (as it may be if an old
+      ** client has been writing the database file), update it now. Doing
+      ** this sooner rather than later means the database size can safely 
+      ** re-read the database size from page 1 if a savepoint or transaction
+      ** rollback occurs within the transaction.
+      */
+      if( pBt->nPage!=get4byte(&pPage1->aData[28]) ){
+        rc = sqlite3PagerWrite(pPage1->pDbPage);
+        if( rc==SQLITE_OK ){
+          put4byte(&pPage1->aData[28], pBt->nPage);
+        }
+      }
+    }
+  }
+
+
+trans_begun:
+  if( rc==SQLITE_OK && wrflag ){
+    /* This call makes sure that the pager has the correct number of
+    ** open savepoints. If the second parameter is greater than 0 and
+    ** the sub-journal is not already open, then it will be opened here.
+    */
+    rc = sqlite3PagerOpenSavepoint(pBt->pPager, p->db->nSavepoint);
+  }
+
+  btreeIntegrity(p);
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+
+/*
+** Set the pointer-map entries for all children of page pPage. Also, if
+** pPage contains cells that point to overflow pages, set the pointer
+** map entries for the overflow pages as well.
+*/
+static int setChildPtrmaps(MemPage *pPage){
+  int i;                             /* Counter variable */
+  int nCell;                         /* Number of cells in page pPage */
+  int rc;                            /* Return code */
+  BtShared *pBt = pPage->pBt;
+  u8 isInitOrig = pPage->isInit;
+  Pgno pgno = pPage->pgno;
+
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  rc = btreeInitPage(pPage);
+  if( rc!=SQLITE_OK ){
+    goto set_child_ptrmaps_out;
+  }
+  nCell = pPage->nCell;
+
+  for(i=0; i<nCell; i++){
+    u8 *pCell = findCell(pPage, i);
+
+    ptrmapPutOvflPtr(pPage, pCell, &rc);
+
+    if( !pPage->leaf ){
+      Pgno childPgno = get4byte(pCell);
+      ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc);
+    }
+  }
+
+  if( !pPage->leaf ){
+    Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
+    ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc);
+  }
+
+set_child_ptrmaps_out:
+  pPage->isInit = isInitOrig;
+  return rc;
+}
+
+/*
+** Somewhere on pPage is a pointer to page iFrom.  Modify this pointer so
+** that it points to iTo. Parameter eType describes the type of pointer to
+** be modified, as  follows:
+**
+** PTRMAP_BTREE:     pPage is a btree-page. The pointer points at a child 
+**                   page of pPage.
+**
+** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
+**                   page pointed to by one of the cells on pPage.
+**
+** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
+**                   overflow page in the list.
+*/
+static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+  if( eType==PTRMAP_OVERFLOW2 ){
+    /* The pointer is always the first 4 bytes of the page in this case.  */
+    if( get4byte(pPage->aData)!=iFrom ){
+      return SQLITE_CORRUPT_BKPT;
+    }
+    put4byte(pPage->aData, iTo);
+  }else{
+    u8 isInitOrig = pPage->isInit;
+    int i;
+    int nCell;
+
+    btreeInitPage(pPage);
+    nCell = pPage->nCell;
+
+    for(i=0; i<nCell; i++){
+      u8 *pCell = findCell(pPage, i);
+      if( eType==PTRMAP_OVERFLOW1 ){
+        CellInfo info;
+        btreeParseCellPtr(pPage, pCell, &info);
+        if( info.iOverflow
+         && pCell+info.iOverflow+3<=pPage->aData+pPage->maskPage
+         && iFrom==get4byte(&pCell[info.iOverflow])
+        ){
+          put4byte(&pCell[info.iOverflow], iTo);
+          break;
+        }
+      }else{
+        if( get4byte(pCell)==iFrom ){
+          put4byte(pCell, iTo);
+          break;
+        }
+      }
+    }
+  
+    if( i==nCell ){
+      if( eType!=PTRMAP_BTREE || 
+          get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
+        return SQLITE_CORRUPT_BKPT;
+      }
+      put4byte(&pPage->aData[pPage->hdrOffset+8], iTo);
+    }
+
+    pPage->isInit = isInitOrig;
+  }
+  return SQLITE_OK;
+}
+
+
+/*
+** Move the open database page pDbPage to location iFreePage in the 
+** database. The pDbPage reference remains valid.
+**
+** The isCommit flag indicates that there is no need to remember that
+** the journal needs to be sync()ed before database page pDbPage->pgno 
+** can be written to. The caller has already promised not to write to that
+** page.
+*/
+static int relocatePage(
+  BtShared *pBt,           /* Btree */
+  MemPage *pDbPage,        /* Open page to move */
+  u8 eType,                /* Pointer map 'type' entry for pDbPage */
+  Pgno iPtrPage,           /* Pointer map 'page-no' entry for pDbPage */
+  Pgno iFreePage,          /* The location to move pDbPage to */
+  int isCommit             /* isCommit flag passed to sqlite3PagerMovepage */
+){
+  MemPage *pPtrPage;   /* The page that contains a pointer to pDbPage */
+  Pgno iDbPage = pDbPage->pgno;
+  Pager *pPager = pBt->pPager;
+  int rc;
+
+  assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || 
+      eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE );
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( pDbPage->pBt==pBt );
+
+  /* Move page iDbPage from its current location to page number iFreePage */
+  TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", 
+      iDbPage, iFreePage, iPtrPage, eType));
+  rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+  pDbPage->pgno = iFreePage;
+
+  /* If pDbPage was a btree-page, then it may have child pages and/or cells
+  ** that point to overflow pages. The pointer map entries for all these
+  ** pages need to be changed.
+  **
+  ** If pDbPage is an overflow page, then the first 4 bytes may store a
+  ** pointer to a subsequent overflow page. If this is the case, then
+  ** the pointer map needs to be updated for the subsequent overflow page.
+  */
+  if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){
+    rc = setChildPtrmaps(pDbPage);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+  }else{
+    Pgno nextOvfl = get4byte(pDbPage->aData);
+    if( nextOvfl!=0 ){
+      ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage, &rc);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+    }
+  }
+
+  /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
+  ** that it points at iFreePage. Also fix the pointer map entry for
+  ** iPtrPage.
+  */
+  if( eType!=PTRMAP_ROOTPAGE ){
+    rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+    rc = sqlite3PagerWrite(pPtrPage->pDbPage);
+    if( rc!=SQLITE_OK ){
+      releasePage(pPtrPage);
+      return rc;
+    }
+    rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
+    releasePage(pPtrPage);
+    if( rc==SQLITE_OK ){
+      ptrmapPut(pBt, iFreePage, eType, iPtrPage, &rc);
+    }
+  }
+  return rc;
+}
+
+/* Forward declaration required by incrVacuumStep(). */
+static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
+
+/*
+** Perform a single step of an incremental-vacuum. If successful, return
+** SQLITE_OK. If there is no work to do (and therefore no point in 
+** calling this function again), return SQLITE_DONE. Or, if an error 
+** occurs, return some other error code.
+**
+** More specifically, this function attempts to re-organize the database so 
+** that the last page of the file currently in use is no longer in use.
+**
+** Parameter nFin is the number of pages that this database would contain
+** were this function called until it returns SQLITE_DONE.
+**
+** If the bCommit parameter is non-zero, this function assumes that the 
+** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE 
+** or an error. bCommit is passed true for an auto-vacuum-on-commit 
+** operation, or false for an incremental vacuum.
+*/
+static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){
+  Pgno nFreeList;           /* Number of pages still on the free-list */
+  int rc;
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( iLastPg>nFin );
+
+  if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
+    u8 eType;
+    Pgno iPtrPage;
+
+    nFreeList = get4byte(&pBt->pPage1->aData[36]);
+    if( nFreeList==0 ){
+      return SQLITE_DONE;
+    }
+
+    rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+    if( eType==PTRMAP_ROOTPAGE ){
+      return SQLITE_CORRUPT_BKPT;
+    }
+
+    if( eType==PTRMAP_FREEPAGE ){
+      if( bCommit==0 ){
+        /* Remove the page from the files free-list. This is not required
+        ** if bCommit is non-zero. In that case, the free-list will be
+        ** truncated to zero after this function returns, so it doesn't 
+        ** matter if it still contains some garbage entries.
+        */
+        Pgno iFreePg;
+        MemPage *pFreePg;
+        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT);
+        if( rc!=SQLITE_OK ){
+          return rc;
+        }
+        assert( iFreePg==iLastPg );
+        releasePage(pFreePg);
+      }
+    } else {
+      Pgno iFreePg;             /* Index of free page to move pLastPg to */
+      MemPage *pLastPg;
+      u8 eMode = BTALLOC_ANY;   /* Mode parameter for allocateBtreePage() */
+      Pgno iNear = 0;           /* nearby parameter for allocateBtreePage() */
+
+      rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+
+      /* If bCommit is zero, this loop runs exactly once and page pLastPg
+      ** is swapped with the first free page pulled off the free list.
+      **
+      ** On the other hand, if bCommit is greater than zero, then keep
+      ** looping until a free-page located within the first nFin pages
+      ** of the file is found.
+      */
+      if( bCommit==0 ){
+        eMode = BTALLOC_LE;
+        iNear = nFin;
+      }
+      do {
+        MemPage *pFreePg;
+        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode);
+        if( rc!=SQLITE_OK ){
+          releasePage(pLastPg);
+          return rc;
+        }
+        releasePage(pFreePg);
+      }while( bCommit && iFreePg>nFin );
+      assert( iFreePg<iLastPg );
+      
+      rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, bCommit);
+      releasePage(pLastPg);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+    }
+  }
+
+  if( bCommit==0 ){
+    do {
+      iLastPg--;
+    }while( iLastPg==PENDING_BYTE_PAGE(pBt) || PTRMAP_ISPAGE(pBt, iLastPg) );
+    pBt->bDoTruncate = 1;
+    pBt->nPage = iLastPg;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** The database opened by the first argument is an auto-vacuum database
+** nOrig pages in size containing nFree free pages. Return the expected 
+** size of the database in pages following an auto-vacuum operation.
+*/
+static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){
+  int nEntry;                     /* Number of entries on one ptrmap page */
+  Pgno nPtrmap;                   /* Number of PtrMap pages to be freed */
+  Pgno nFin;                      /* Return value */
+
+  nEntry = pBt->usableSize/5;
+  nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry;
+  nFin = nOrig - nFree - nPtrmap;
+  if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){
+    nFin--;
+  }
+  while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
+    nFin--;
+  }
+
+  return nFin;
+}
+
+/*
+** A write-transaction must be opened before calling this function.
+** It performs a single unit of work towards an incremental vacuum.
+**
+** If the incremental vacuum is finished after this function has run,
+** SQLITE_DONE is returned. If it is not finished, but no error occurred,
+** SQLITE_OK is returned. Otherwise an SQLite error code. 
+*/
+SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *p){
+  int rc;
+  BtShared *pBt = p->pBt;
+
+  sqlite3BtreeEnter(p);
+  assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE );
+  if( !pBt->autoVacuum ){
+    rc = SQLITE_DONE;
+  }else{
+    Pgno nOrig = btreePagecount(pBt);
+    Pgno nFree = get4byte(&pBt->pPage1->aData[36]);
+    Pgno nFin = finalDbSize(pBt, nOrig, nFree);
+
+    if( nOrig<nFin ){
+      rc = SQLITE_CORRUPT_BKPT;
+    }else if( nFree>0 ){
+      rc = saveAllCursors(pBt, 0, 0);
+      if( rc==SQLITE_OK ){
+        invalidateAllOverflowCache(pBt);
+        rc = incrVacuumStep(pBt, nFin, nOrig, 0);
+      }
+      if( rc==SQLITE_OK ){
+        rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
+        put4byte(&pBt->pPage1->aData[28], pBt->nPage);
+      }
+    }else{
+      rc = SQLITE_DONE;
+    }
+  }
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** This routine is called prior to sqlite3PagerCommit when a transaction
+** is committed for an auto-vacuum database.
+**
+** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages
+** the database file should be truncated to during the commit process. 
+** i.e. the database has been reorganized so that only the first *pnTrunc
+** pages are in use.
+*/
+static int autoVacuumCommit(BtShared *pBt){
+  int rc = SQLITE_OK;
+  Pager *pPager = pBt->pPager;
+  VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager) );
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  invalidateAllOverflowCache(pBt);
+  assert(pBt->autoVacuum);
+  if( !pBt->incrVacuum ){
+    Pgno nFin;         /* Number of pages in database after autovacuuming */
+    Pgno nFree;        /* Number of pages on the freelist initially */
+    Pgno iFree;        /* The next page to be freed */
+    Pgno nOrig;        /* Database size before freeing */
+
+    nOrig = btreePagecount(pBt);
+    if( PTRMAP_ISPAGE(pBt, nOrig) || nOrig==PENDING_BYTE_PAGE(pBt) ){
+      /* It is not possible to create a database for which the final page
+      ** is either a pointer-map page or the pending-byte page. If one
+      ** is encountered, this indicates corruption.
+      */
+      return SQLITE_CORRUPT_BKPT;
+    }
+
+    nFree = get4byte(&pBt->pPage1->aData[36]);
+    nFin = finalDbSize(pBt, nOrig, nFree);
+    if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT;
+    if( nFin<nOrig ){
+      rc = saveAllCursors(pBt, 0, 0);
+    }
+    for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){
+      rc = incrVacuumStep(pBt, nFin, iFree, 1);
+    }
+    if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){
+      rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
+      put4byte(&pBt->pPage1->aData[32], 0);
+      put4byte(&pBt->pPage1->aData[36], 0);
+      put4byte(&pBt->pPage1->aData[28], nFin);
+      pBt->bDoTruncate = 1;
+      pBt->nPage = nFin;
+    }
+    if( rc!=SQLITE_OK ){
+      sqlite3PagerRollback(pPager);
+    }
+  }
+
+  assert( nRef>=sqlite3PagerRefcount(pPager) );
+  return rc;
+}
+
+#else /* ifndef SQLITE_OMIT_AUTOVACUUM */
+# define setChildPtrmaps(x) SQLITE_OK
+#endif
+
+/*
+** This routine does the first phase of a two-phase commit.  This routine
+** causes a rollback journal to be created (if it does not already exist)
+** and populated with enough information so that if a power loss occurs
+** the database can be restored to its original state by playing back
+** the journal.  Then the contents of the journal are flushed out to
+** the disk.  After the journal is safely on oxide, the changes to the
+** database are written into the database file and flushed to oxide.
+** At the end of this call, the rollback journal still exists on the
+** disk and we are still holding all locks, so the transaction has not
+** committed.  See sqlite3BtreeCommitPhaseTwo() for the second phase of the
+** commit process.
+**
+** This call is a no-op if no write-transaction is currently active on pBt.
+**
+** Otherwise, sync the database file for the btree pBt. zMaster points to
+** the name of a master journal file that should be written into the
+** individual journal file, or is NULL, indicating no master journal file 
+** (single database transaction).
+**
+** When this is called, the master journal should already have been
+** created, populated with this journal pointer and synced to disk.
+**
+** Once this is routine has returned, the only thing required to commit
+** the write-transaction for this database file is to delete the journal.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){
+  int rc = SQLITE_OK;
+  if( p->inTrans==TRANS_WRITE ){
+    BtShared *pBt = p->pBt;
+    sqlite3BtreeEnter(p);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( pBt->autoVacuum ){
+      rc = autoVacuumCommit(pBt);
+      if( rc!=SQLITE_OK ){
+        sqlite3BtreeLeave(p);
+        return rc;
+      }
+    }
+    if( pBt->bDoTruncate ){
+      sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage);
+    }
+#endif
+    rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, 0);
+    sqlite3BtreeLeave(p);
+  }
+  return rc;
+}
+
+/*
+** This function is called from both BtreeCommitPhaseTwo() and BtreeRollback()
+** at the conclusion of a transaction.
+*/
+static void btreeEndTransaction(Btree *p){
+  BtShared *pBt = p->pBt;
+  sqlite3 *db = p->db;
+  assert( sqlite3BtreeHoldsMutex(p) );
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  pBt->bDoTruncate = 0;
+#endif
+  if( p->inTrans>TRANS_NONE && db->nVdbeRead>1 ){
+    /* If there are other active statements that belong to this database
+    ** handle, downgrade to a read-only transaction. The other statements
+    ** may still be reading from the database.  */
+    downgradeAllSharedCacheTableLocks(p);
+    p->inTrans = TRANS_READ;
+  }else{
+    /* If the handle had any kind of transaction open, decrement the 
+    ** transaction count of the shared btree. If the transaction count 
+    ** reaches 0, set the shared state to TRANS_NONE. The unlockBtreeIfUnused()
+    ** call below will unlock the pager.  */
+    if( p->inTrans!=TRANS_NONE ){
+      clearAllSharedCacheTableLocks(p);
+      pBt->nTransaction--;
+      if( 0==pBt->nTransaction ){
+        pBt->inTransaction = TRANS_NONE;
+      }
+    }
+
+    /* Set the current transaction state to TRANS_NONE and unlock the 
+    ** pager if this call closed the only read or write transaction.  */
+    p->inTrans = TRANS_NONE;
+    unlockBtreeIfUnused(pBt);
+  }
+
+  btreeIntegrity(p);
+}
+
+/*
+** Commit the transaction currently in progress.
+**
+** This routine implements the second phase of a 2-phase commit.  The
+** sqlite3BtreeCommitPhaseOne() routine does the first phase and should
+** be invoked prior to calling this routine.  The sqlite3BtreeCommitPhaseOne()
+** routine did all the work of writing information out to disk and flushing the
+** contents so that they are written onto the disk platter.  All this
+** routine has to do is delete or truncate or zero the header in the
+** the rollback journal (which causes the transaction to commit) and
+** drop locks.
+**
+** Normally, if an error occurs while the pager layer is attempting to 
+** finalize the underlying journal file, this function returns an error and
+** the upper layer will attempt a rollback. However, if the second argument
+** is non-zero then this b-tree transaction is part of a multi-file 
+** transaction. In this case, the transaction has already been committed 
+** (by deleting a master journal file) and the caller will ignore this 
+** functions return code. So, even if an error occurs in the pager layer,
+** reset the b-tree objects internal state to indicate that the write
+** transaction has been closed. This is quite safe, as the pager will have
+** transitioned to the error state.
+**
+** This will release the write lock on the database file.  If there
+** are no active cursors, it also releases the read lock.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){
+
+  if( p->inTrans==TRANS_NONE ) return SQLITE_OK;
+  sqlite3BtreeEnter(p);
+  btreeIntegrity(p);
+
+  /* If the handle has a write-transaction open, commit the shared-btrees 
+  ** transaction and set the shared state to TRANS_READ.
+  */
+  if( p->inTrans==TRANS_WRITE ){
+    int rc;
+    BtShared *pBt = p->pBt;
+    assert( pBt->inTransaction==TRANS_WRITE );
+    assert( pBt->nTransaction>0 );
+    rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
+    if( rc!=SQLITE_OK && bCleanup==0 ){
+      sqlite3BtreeLeave(p);
+      return rc;
+    }
+    p->iDataVersion--;  /* Compensate for pPager->iDataVersion++; */
+    pBt->inTransaction = TRANS_READ;
+    btreeClearHasContent(pBt);
+  }
+
+  btreeEndTransaction(p);
+  sqlite3BtreeLeave(p);
+  return SQLITE_OK;
+}
+
+/*
+** Do both phases of a commit.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCommit(Btree *p){
+  int rc;
+  sqlite3BtreeEnter(p);
+  rc = sqlite3BtreeCommitPhaseOne(p, 0);
+  if( rc==SQLITE_OK ){
+    rc = sqlite3BtreeCommitPhaseTwo(p, 0);
+  }
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** This routine sets the state to CURSOR_FAULT and the error
+** code to errCode for every cursor on any BtShared that pBtree
+** references.  Or if the writeOnly flag is set to 1, then only
+** trip write cursors and leave read cursors unchanged.
+**
+** Every cursor is a candidate to be tripped, including cursors
+** that belong to other database connections that happen to be
+** sharing the cache with pBtree.
+**
+** This routine gets called when a rollback occurs. If the writeOnly
+** flag is true, then only write-cursors need be tripped - read-only
+** cursors save their current positions so that they may continue 
+** following the rollback. Or, if writeOnly is false, all cursors are 
+** tripped. In general, writeOnly is false if the transaction being
+** rolled back modified the database schema. In this case b-tree root
+** pages may be moved or deleted from the database altogether, making
+** it unsafe for read cursors to continue.
+**
+** If the writeOnly flag is true and an error is encountered while 
+** saving the current position of a read-only cursor, all cursors, 
+** including all read-cursors are tripped.
+**
+** SQLITE_OK is returned if successful, or if an error occurs while
+** saving a cursor position, an SQLite error code.
+*/
+SQLITE_PRIVATE int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){
+  BtCursor *p;
+  int rc = SQLITE_OK;
+
+  assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 );
+  if( pBtree ){
+    sqlite3BtreeEnter(pBtree);
+    for(p=pBtree->pBt->pCursor; p; p=p->pNext){
+      int i;
+      if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){
+        if( p->eState==CURSOR_VALID ){
+          rc = saveCursorPosition(p);
+          if( rc!=SQLITE_OK ){
+            (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0);
+            break;
+          }
+        }
+      }else{
+        sqlite3BtreeClearCursor(p);
+        p->eState = CURSOR_FAULT;
+        p->skipNext = errCode;
+      }
+      for(i=0; i<=p->iPage; i++){
+        releasePage(p->apPage[i]);
+        p->apPage[i] = 0;
+      }
+    }
+    sqlite3BtreeLeave(pBtree);
+  }
+  return rc;
+}
+
+/*
+** Rollback the transaction in progress.
+**
+** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped).
+** Only write cursors are tripped if writeOnly is true but all cursors are
+** tripped if writeOnly is false.  Any attempt to use
+** a tripped cursor will result in an error.
+**
+** This will release the write lock on the database file.  If there
+** are no active cursors, it also releases the read lock.
+*/
+SQLITE_PRIVATE int sqlite3BtreeRollback(Btree *p, int tripCode, int writeOnly){
+  int rc;
+  BtShared *pBt = p->pBt;
+  MemPage *pPage1;
+
+  assert( writeOnly==1 || writeOnly==0 );
+  assert( tripCode==SQLITE_ABORT_ROLLBACK || tripCode==SQLITE_OK );
+  sqlite3BtreeEnter(p);
+  if( tripCode==SQLITE_OK ){
+    rc = tripCode = saveAllCursors(pBt, 0, 0);
+    if( rc ) writeOnly = 0;
+  }else{
+    rc = SQLITE_OK;
+  }
+  if( tripCode ){
+    int rc2 = sqlite3BtreeTripAllCursors(p, tripCode, writeOnly);
+    assert( rc==SQLITE_OK || (writeOnly==0 && rc2==SQLITE_OK) );
+    if( rc2!=SQLITE_OK ) rc = rc2;
+  }
+  btreeIntegrity(p);
+
+  if( p->inTrans==TRANS_WRITE ){
+    int rc2;
+
+    assert( TRANS_WRITE==pBt->inTransaction );
+    rc2 = sqlite3PagerRollback(pBt->pPager);
+    if( rc2!=SQLITE_OK ){
+      rc = rc2;
+    }
+
+    /* The rollback may have destroyed the pPage1->aData value.  So
+    ** call btreeGetPage() on page 1 again to make
+    ** sure pPage1->aData is set correctly. */
+    if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
+      int nPage = get4byte(28+(u8*)pPage1->aData);
+      testcase( nPage==0 );
+      if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage);
+      testcase( pBt->nPage!=nPage );
+      pBt->nPage = nPage;
+      releasePage(pPage1);
+    }
+    assert( countValidCursors(pBt, 1)==0 );
+    pBt->inTransaction = TRANS_READ;
+    btreeClearHasContent(pBt);
+  }
+
+  btreeEndTransaction(p);
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** Start a statement subtransaction. The subtransaction can be rolled
+** back independently of the main transaction. You must start a transaction 
+** before starting a subtransaction. The subtransaction is ended automatically 
+** if the main transaction commits or rolls back.
+**
+** Statement subtransactions are used around individual SQL statements
+** that are contained within a BEGIN...COMMIT block.  If a constraint
+** error occurs within the statement, the effect of that one statement
+** can be rolled back without having to rollback the entire transaction.
+**
+** A statement sub-transaction is implemented as an anonymous savepoint. The
+** value passed as the second parameter is the total number of savepoints,
+** including the new anonymous savepoint, open on the B-Tree. i.e. if there
+** are no active savepoints and no other statement-transactions open,
+** iStatement is 1. This anonymous savepoint can be released or rolled back
+** using the sqlite3BtreeSavepoint() function.
+*/
+SQLITE_PRIVATE int sqlite3BtreeBeginStmt(Btree *p, int iStatement){
+  int rc;
+  BtShared *pBt = p->pBt;
+  sqlite3BtreeEnter(p);
+  assert( p->inTrans==TRANS_WRITE );
+  assert( (pBt->btsFlags & BTS_READ_ONLY)==0 );
+  assert( iStatement>0 );
+  assert( iStatement>p->db->nSavepoint );
+  assert( pBt->inTransaction==TRANS_WRITE );
+  /* At the pager level, a statement transaction is a savepoint with
+  ** an index greater than all savepoints created explicitly using
+  ** SQL statements. It is illegal to open, release or rollback any
+  ** such savepoints while the statement transaction savepoint is active.
+  */
+  rc = sqlite3PagerOpenSavepoint(pBt->pPager, iStatement);
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** The second argument to this function, op, is always SAVEPOINT_ROLLBACK
+** or SAVEPOINT_RELEASE. This function either releases or rolls back the
+** savepoint identified by parameter iSavepoint, depending on the value 
+** of op.
+**
+** Normally, iSavepoint is greater than or equal to zero. However, if op is
+** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the 
+** contents of the entire transaction are rolled back. This is different
+** from a normal transaction rollback, as no locks are released and the
+** transaction remains open.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){
+  int rc = SQLITE_OK;
+  if( p && p->inTrans==TRANS_WRITE ){
+    BtShared *pBt = p->pBt;
+    assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK );
+    assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) );
+    sqlite3BtreeEnter(p);
+    rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint);
+    if( rc==SQLITE_OK ){
+      if( iSavepoint<0 && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=0 ){
+        pBt->nPage = 0;
+      }
+      rc = newDatabase(pBt);
+      pBt->nPage = get4byte(28 + pBt->pPage1->aData);
+
+      /* The database size was written into the offset 28 of the header
+      ** when the transaction started, so we know that the value at offset
+      ** 28 is nonzero. */
+      assert( pBt->nPage>0 );
+    }
+    sqlite3BtreeLeave(p);
+  }
+  return rc;
+}
+
+/*
+** Create a new cursor for the BTree whose root is on the page
+** iTable. If a read-only cursor is requested, it is assumed that
+** the caller already has at least a read-only transaction open
+** on the database already. If a write-cursor is requested, then
+** the caller is assumed to have an open write transaction.
+**
+** If wrFlag==0, then the cursor can only be used for reading.
+** If wrFlag==1, then the cursor can be used for reading or for
+** writing if other conditions for writing are also met.  These
+** are the conditions that must be met in order for writing to
+** be allowed:
+**
+** 1:  The cursor must have been opened with wrFlag==1
+**
+** 2:  Other database connections that share the same pager cache
+**     but which are not in the READ_UNCOMMITTED state may not have
+**     cursors open with wrFlag==0 on the same table.  Otherwise
+**     the changes made by this write cursor would be visible to
+**     the read cursors in the other database connection.
+**
+** 3:  The database must be writable (not on read-only media)
+**
+** 4:  There must be an active transaction.
+**
+** No checking is done to make sure that page iTable really is the
+** root page of a b-tree.  If it is not, then the cursor acquired
+** will not work correctly.
+**
+** It is assumed that the sqlite3BtreeCursorZero() has been called
+** on pCur to initialize the memory space prior to invoking this routine.
+*/
+static int btreeCursor(
+  Btree *p,                              /* The btree */
+  int iTable,                            /* Root page of table to open */
+  int wrFlag,                            /* 1 to write. 0 read-only */
+  struct KeyInfo *pKeyInfo,              /* First arg to comparison function */
+  BtCursor *pCur                         /* Space for new cursor */
+){
+  BtShared *pBt = p->pBt;                /* Shared b-tree handle */
+
+  assert( sqlite3BtreeHoldsMutex(p) );
+  assert( wrFlag==0 || wrFlag==1 );
+
+  /* The following assert statements verify that if this is a sharable 
+  ** b-tree database, the connection is holding the required table locks, 
+  ** and that no other connection has any open cursor that conflicts with 
+  ** this lock.  */
+  assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=0, wrFlag+1) );
+  assert( wrFlag==0 || !hasReadConflicts(p, iTable) );
+
+  /* Assert that the caller has opened the required transaction. */
+  assert( p->inTrans>TRANS_NONE );
+  assert( wrFlag==0 || p->inTrans==TRANS_WRITE );
+  assert( pBt->pPage1 && pBt->pPage1->aData );
+
+  if( NEVER(wrFlag && (pBt->btsFlags & BTS_READ_ONLY)!=0) ){
+    return SQLITE_READONLY;
+  }
+  if( wrFlag ){
+    allocateTempSpace(pBt);
+    if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM;
+  }
+  if( iTable==1 && btreePagecount(pBt)==0 ){
+    assert( wrFlag==0 );
+    iTable = 0;
+  }
+
+  /* Now that no other errors can occur, finish filling in the BtCursor
+  ** variables and link the cursor into the BtShared list.  */
+  pCur->pgnoRoot = (Pgno)iTable;
+  pCur->iPage = -1;
+  pCur->pKeyInfo = pKeyInfo;
+  pCur->pBtree = p;
+  pCur->pBt = pBt;
+  assert( wrFlag==0 || wrFlag==BTCF_WriteFlag );
+  pCur->curFlags = wrFlag;
+  pCur->pNext = pBt->pCursor;
+  if( pCur->pNext ){
+    pCur->pNext->pPrev = pCur;
+  }
+  pBt->pCursor = pCur;
+  pCur->eState = CURSOR_INVALID;
+  return SQLITE_OK;
+}
+SQLITE_PRIVATE int sqlite3BtreeCursor(
+  Btree *p,                                   /* The btree */
+  int iTable,                                 /* Root page of table to open */
+  int wrFlag,                                 /* 1 to write. 0 read-only */
+  struct KeyInfo *pKeyInfo,                   /* First arg to xCompare() */
+  BtCursor *pCur                              /* Write new cursor here */
+){
+  int rc;
+  sqlite3BtreeEnter(p);
+  rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur);
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** Return the size of a BtCursor object in bytes.
+**
+** This interfaces is needed so that users of cursors can preallocate
+** sufficient storage to hold a cursor.  The BtCursor object is opaque
+** to users so they cannot do the sizeof() themselves - they must call
+** this routine.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCursorSize(void){
+  return ROUND8(sizeof(BtCursor));
+}
+
+/*
+** Initialize memory that will be converted into a BtCursor object.
+**
+** The simple approach here would be to memset() the entire object
+** to zero.  But it turns out that the apPage[] and aiIdx[] arrays
+** do not need to be zeroed and they are large, so we can save a lot
+** of run-time by skipping the initialization of those elements.
+*/
+SQLITE_PRIVATE void sqlite3BtreeCursorZero(BtCursor *p){
+  memset(p, 0, offsetof(BtCursor, iPage));
+}
+
+/*
+** Close a cursor.  The read lock on the database file is released
+** when the last cursor is closed.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){
+  Btree *pBtree = pCur->pBtree;
+  if( pBtree ){
+    int i;
+    BtShared *pBt = pCur->pBt;
+    sqlite3BtreeEnter(pBtree);
+    sqlite3BtreeClearCursor(pCur);
+    if( pCur->pPrev ){
+      pCur->pPrev->pNext = pCur->pNext;
+    }else{
+      pBt->pCursor = pCur->pNext;
+    }
+    if( pCur->pNext ){
+      pCur->pNext->pPrev = pCur->pPrev;
+    }
+    for(i=0; i<=pCur->iPage; i++){
+      releasePage(pCur->apPage[i]);
+    }
+    unlockBtreeIfUnused(pBt);
+    sqlite3_free(pCur->aOverflow);
+    /* sqlite3_free(pCur); */
+    sqlite3BtreeLeave(pBtree);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Make sure the BtCursor* given in the argument has a valid
+** BtCursor.info structure.  If it is not already valid, call
+** btreeParseCell() to fill it in.
+**
+** BtCursor.info is a cache of the information in the current cell.
+** Using this cache reduces the number of calls to btreeParseCell().
+**
+** 2007-06-25:  There is a bug in some versions of MSVC that cause the
+** compiler to crash when getCellInfo() is implemented as a macro.
+** But there is a measureable speed advantage to using the macro on gcc
+** (when less compiler optimizations like -Os or -O0 are used and the
+** compiler is not doing aggressive inlining.)  So we use a real function
+** for MSVC and a macro for everything else.  Ticket #2457.
+*/
+#ifndef NDEBUG
+  static void assertCellInfo(BtCursor *pCur){
+    CellInfo info;
+    int iPage = pCur->iPage;
+    memset(&info, 0, sizeof(info));
+    btreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info);
+    assert( CORRUPT_DB || memcmp(&info, &pCur->info, sizeof(info))==0 );
+  }
+#else
+  #define assertCellInfo(x)
+#endif
+#ifdef _MSC_VER
+  /* Use a real function in MSVC to work around bugs in that compiler. */
+  static void getCellInfo(BtCursor *pCur){
+    if( pCur->info.nSize==0 ){
+      int iPage = pCur->iPage;
+      btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info);
+      pCur->curFlags |= BTCF_ValidNKey;
+    }else{
+      assertCellInfo(pCur);
+    }
+  }
+#else /* if not _MSC_VER */
+  /* Use a macro in all other compilers so that the function is inlined */
+#define getCellInfo(pCur)                                                      \
+  if( pCur->info.nSize==0 ){                                                   \
+    int iPage = pCur->iPage;                                                   \
+    btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info);        \
+    pCur->curFlags |= BTCF_ValidNKey;                                          \
+  }else{                                                                       \
+    assertCellInfo(pCur);                                                      \
+  }
+#endif /* _MSC_VER */
+
+#ifndef NDEBUG  /* The next routine used only within assert() statements */
+/*
+** Return true if the given BtCursor is valid.  A valid cursor is one
+** that is currently pointing to a row in a (non-empty) table.
+** This is a verification routine is used only within assert() statements.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCursorIsValid(BtCursor *pCur){
+  return pCur && pCur->eState==CURSOR_VALID;
+}
+#endif /* NDEBUG */
+
+/*
+** Set *pSize to the size of the buffer needed to hold the value of
+** the key for the current entry.  If the cursor is not pointing
+** to a valid entry, *pSize is set to 0. 
+**
+** For a table with the INTKEY flag set, this routine returns the key
+** itself, not the number of bytes in the key.
+**
+** The caller must position the cursor prior to invoking this routine.
+** 
+** This routine cannot fail.  It always returns SQLITE_OK.  
+*/
+SQLITE_PRIVATE int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->eState==CURSOR_VALID );
+  getCellInfo(pCur);
+  *pSize = pCur->info.nKey;
+  return SQLITE_OK;
+}
+
+/*
+** Set *pSize to the number of bytes of data in the entry the
+** cursor currently points to.
+**
+** The caller must guarantee that the cursor is pointing to a non-NULL
+** valid entry.  In other words, the calling procedure must guarantee
+** that the cursor has Cursor.eState==CURSOR_VALID.
+**
+** Failure is not possible.  This function always returns SQLITE_OK.
+** It might just as well be a procedure (returning void) but we continue
+** to return an integer result code for historical reasons.
+*/
+SQLITE_PRIVATE int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->eState==CURSOR_VALID );
+  assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 );
+  getCellInfo(pCur);
+  *pSize = pCur->info.nPayload;
+  return SQLITE_OK;
+}
+
+/*
+** Given the page number of an overflow page in the database (parameter
+** ovfl), this function finds the page number of the next page in the 
+** linked list of overflow pages. If possible, it uses the auto-vacuum
+** pointer-map data instead of reading the content of page ovfl to do so. 
+**
+** If an error occurs an SQLite error code is returned. Otherwise:
+**
+** The page number of the next overflow page in the linked list is 
+** written to *pPgnoNext. If page ovfl is the last page in its linked 
+** list, *pPgnoNext is set to zero. 
+**
+** If ppPage is not NULL, and a reference to the MemPage object corresponding
+** to page number pOvfl was obtained, then *ppPage is set to point to that
+** reference. It is the responsibility of the caller to call releasePage()
+** on *ppPage to free the reference. In no reference was obtained (because
+** the pointer-map was used to obtain the value for *pPgnoNext), then
+** *ppPage is set to zero.
+*/
+static int getOverflowPage(
+  BtShared *pBt,               /* The database file */
+  Pgno ovfl,                   /* Current overflow page number */
+  MemPage **ppPage,            /* OUT: MemPage handle (may be NULL) */
+  Pgno *pPgnoNext              /* OUT: Next overflow page number */
+){
+  Pgno next = 0;
+  MemPage *pPage = 0;
+  int rc = SQLITE_OK;
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert(pPgnoNext);
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  /* Try to find the next page in the overflow list using the
+  ** autovacuum pointer-map pages. Guess that the next page in 
+  ** the overflow list is page number (ovfl+1). If that guess turns 
+  ** out to be wrong, fall back to loading the data of page 
+  ** number ovfl to determine the next page number.
+  */
+  if( pBt->autoVacuum ){
+    Pgno pgno;
+    Pgno iGuess = ovfl+1;
+    u8 eType;
+
+    while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){
+      iGuess++;
+    }
+
+    if( iGuess<=btreePagecount(pBt) ){
+      rc = ptrmapGet(pBt, iGuess, &eType, &pgno);
+      if( rc==SQLITE_OK && eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){
+        next = iGuess;
+        rc = SQLITE_DONE;
+      }
+    }
+  }
+#endif
+
+  assert( next==0 || rc==SQLITE_DONE );
+  if( rc==SQLITE_OK ){
+    rc = btreeGetPage(pBt, ovfl, &pPage, (ppPage==0) ? PAGER_GET_READONLY : 0);
+    assert( rc==SQLITE_OK || pPage==0 );
+    if( rc==SQLITE_OK ){
+      next = get4byte(pPage->aData);
+    }
+  }
+
+  *pPgnoNext = next;
+  if( ppPage ){
+    *ppPage = pPage;
+  }else{
+    releasePage(pPage);
+  }
+  return (rc==SQLITE_DONE ? SQLITE_OK : rc);
+}
+
+/*
+** Copy data from a buffer to a page, or from a page to a buffer.
+**
+** pPayload is a pointer to data stored on database page pDbPage.
+** If argument eOp is false, then nByte bytes of data are copied
+** from pPayload to the buffer pointed at by pBuf. If eOp is true,
+** then sqlite3PagerWrite() is called on pDbPage and nByte bytes
+** of data are copied from the buffer pBuf to pPayload.
+**
+** SQLITE_OK is returned on success, otherwise an error code.
+*/
+static int copyPayload(
+  void *pPayload,           /* Pointer to page data */
+  void *pBuf,               /* Pointer to buffer */
+  int nByte,                /* Number of bytes to copy */
+  int eOp,                  /* 0 -> copy from page, 1 -> copy to page */
+  DbPage *pDbPage           /* Page containing pPayload */
+){
+  if( eOp ){
+    /* Copy data from buffer to page (a write operation) */
+    int rc = sqlite3PagerWrite(pDbPage);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+    memcpy(pPayload, pBuf, nByte);
+  }else{
+    /* Copy data from page to buffer (a read operation) */
+    memcpy(pBuf, pPayload, nByte);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** This function is used to read or overwrite payload information
+** for the entry that the pCur cursor is pointing to. The eOp
+** argument is interpreted as follows:
+**
+**   0: The operation is a read. Populate the overflow cache.
+**   1: The operation is a write. Populate the overflow cache.
+**   2: The operation is a read. Do not populate the overflow cache.
+**
+** A total of "amt" bytes are read or written beginning at "offset".
+** Data is read to or from the buffer pBuf.
+**
+** The content being read or written might appear on the main page
+** or be scattered out on multiple overflow pages.
+**
+** If the current cursor entry uses one or more overflow pages and the
+** eOp argument is not 2, this function may allocate space for and lazily 
+** populates the overflow page-list cache array (BtCursor.aOverflow). 
+** Subsequent calls use this cache to make seeking to the supplied offset 
+** more efficient.
+**
+** Once an overflow page-list cache has been allocated, it may be
+** invalidated if some other cursor writes to the same table, or if
+** the cursor is moved to a different row. Additionally, in auto-vacuum
+** mode, the following events may invalidate an overflow page-list cache.
+**
+**   * An incremental vacuum,
+**   * A commit in auto_vacuum="full" mode,
+**   * Creating a table (may require moving an overflow page).
+*/
+static int accessPayload(
+  BtCursor *pCur,      /* Cursor pointing to entry to read from */
+  u32 offset,          /* Begin reading this far into payload */
+  u32 amt,             /* Read this many bytes */
+  unsigned char *pBuf, /* Write the bytes into this buffer */ 
+  int eOp              /* zero to read. non-zero to write. */
+){
+  unsigned char *aPayload;
+  int rc = SQLITE_OK;
+  int iIdx = 0;
+  MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */
+  BtShared *pBt = pCur->pBt;                  /* Btree this cursor belongs to */
+#ifdef SQLITE_DIRECT_OVERFLOW_READ
+  unsigned char * const pBufStart = pBuf;
+  int bEnd;                                 /* True if reading to end of data */
+#endif
+
+  assert( pPage );
+  assert( pCur->eState==CURSOR_VALID );
+  assert( pCur->aiIdx[pCur->iPage]<pPage->nCell );
+  assert( cursorHoldsMutex(pCur) );
+  assert( eOp!=2 || offset==0 );    /* Always start from beginning for eOp==2 */
+
+  getCellInfo(pCur);
+  aPayload = pCur->info.pPayload;
+#ifdef SQLITE_DIRECT_OVERFLOW_READ
+  bEnd = offset+amt==pCur->info.nPayload;
+#endif
+  assert( offset+amt <= pCur->info.nPayload );
+
+  if( &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] ){
+    /* Trying to read or write past the end of the data is an error */
+    return SQLITE_CORRUPT_BKPT;
+  }
+
+  /* Check if data must be read/written to/from the btree page itself. */
+  if( offset<pCur->info.nLocal ){
+    int a = amt;
+    if( a+offset>pCur->info.nLocal ){
+      a = pCur->info.nLocal - offset;
+    }
+    rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage);
+    offset = 0;
+    pBuf += a;
+    amt -= a;
+  }else{
+    offset -= pCur->info.nLocal;
+  }
+
+
+  if( rc==SQLITE_OK && amt>0 ){
+    const u32 ovflSize = pBt->usableSize - 4;  /* Bytes content per ovfl page */
+    Pgno nextPage;
+
+    nextPage = get4byte(&aPayload[pCur->info.nLocal]);
+
+    /* If the BtCursor.aOverflow[] has not been allocated, allocate it now.
+    ** Except, do not allocate aOverflow[] for eOp==2.
+    **
+    ** The aOverflow[] array is sized at one entry for each overflow page
+    ** in the overflow chain. The page number of the first overflow page is
+    ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array
+    ** means "not yet known" (the cache is lazily populated).
+    */
+    if( eOp!=2 && (pCur->curFlags & BTCF_ValidOvfl)==0 ){
+      int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize;
+      if( nOvfl>pCur->nOvflAlloc ){
+        Pgno *aNew = (Pgno*)sqlite3Realloc(
+            pCur->aOverflow, nOvfl*2*sizeof(Pgno)
+        );
+        if( aNew==0 ){
+          rc = SQLITE_NOMEM;
+        }else{
+          pCur->nOvflAlloc = nOvfl*2;
+          pCur->aOverflow = aNew;
+        }
+      }
+      if( rc==SQLITE_OK ){
+        memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno));
+        pCur->curFlags |= BTCF_ValidOvfl;
+      }
+    }
+
+    /* If the overflow page-list cache has been allocated and the
+    ** entry for the first required overflow page is valid, skip
+    ** directly to it.
+    */
+    if( (pCur->curFlags & BTCF_ValidOvfl)!=0
+     && pCur->aOverflow[offset/ovflSize]
+    ){
+      iIdx = (offset/ovflSize);
+      nextPage = pCur->aOverflow[iIdx];
+      offset = (offset%ovflSize);
+    }
+
+    for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){
+
+      /* If required, populate the overflow page-list cache. */
+      if( (pCur->curFlags & BTCF_ValidOvfl)!=0 ){
+        assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage);
+        pCur->aOverflow[iIdx] = nextPage;
+      }
+
+      if( offset>=ovflSize ){
+        /* The only reason to read this page is to obtain the page
+        ** number for the next page in the overflow chain. The page
+        ** data is not required. So first try to lookup the overflow
+        ** page-list cache, if any, then fall back to the getOverflowPage()
+        ** function.
+        **
+        ** Note that the aOverflow[] array must be allocated because eOp!=2
+        ** here.  If eOp==2, then offset==0 and this branch is never taken.
+        */
+        assert( eOp!=2 );
+        assert( pCur->curFlags & BTCF_ValidOvfl );
+        assert( pCur->pBtree->db==pBt->db );
+        if( pCur->aOverflow[iIdx+1] ){
+          nextPage = pCur->aOverflow[iIdx+1];
+        }else{
+          rc = getOverflowPage(pBt, nextPage, 0, &nextPage);
+        }
+        offset -= ovflSize;
+      }else{
+        /* Need to read this page properly. It contains some of the
+        ** range of data that is being read (eOp==0) or written (eOp!=0).
+        */
+#ifdef SQLITE_DIRECT_OVERFLOW_READ
+        sqlite3_file *fd;
+#endif
+        int a = amt;
+        if( a + offset > ovflSize ){
+          a = ovflSize - offset;
+        }
+
+#ifdef SQLITE_DIRECT_OVERFLOW_READ
+        /* If all the following are true:
+        **
+        **   1) this is a read operation, and 
+        **   2) data is required from the start of this overflow page, and
+        **   3) the database is file-backed, and
+        **   4) there is no open write-transaction, and
+        **   5) the database is not a WAL database,
+        **   6) all data from the page is being read.
+        **   7) at least 4 bytes have already been read into the output buffer 
+        **
+        ** then data can be read directly from the database file into the
+        ** output buffer, bypassing the page-cache altogether. This speeds
+        ** up loading large records that span many overflow pages.
+        */
+        if( (eOp&0x01)==0                                      /* (1) */
+         && offset==0                                          /* (2) */
+         && (bEnd || a==ovflSize)                              /* (6) */
+         && pBt->inTransaction==TRANS_READ                     /* (4) */
+         && (fd = sqlite3PagerFile(pBt->pPager))->pMethods     /* (3) */
+         && pBt->pPage1->aData[19]==0x01                       /* (5) */
+         && &pBuf[-4]>=pBufStart                               /* (7) */
+        ){
+          u8 aSave[4];
+          u8 *aWrite = &pBuf[-4];
+          assert( aWrite>=pBufStart );                         /* hence (7) */
+          memcpy(aSave, aWrite, 4);
+          rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1));
+          nextPage = get4byte(aWrite);
+          memcpy(aWrite, aSave, 4);
+        }else
+#endif
+
+        {
+          DbPage *pDbPage;
+          rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage,
+              ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0)
+          );
+          if( rc==SQLITE_OK ){
+            aPayload = sqlite3PagerGetData(pDbPage);
+            nextPage = get4byte(aPayload);
+            rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage);
+            sqlite3PagerUnref(pDbPage);
+            offset = 0;
+          }
+        }
+        amt -= a;
+        pBuf += a;
+      }
+    }
+  }
+
+  if( rc==SQLITE_OK && amt>0 ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+  return rc;
+}
+
+/*
+** Read part of the key associated with cursor pCur.  Exactly
+** "amt" bytes will be transferred into pBuf[].  The transfer
+** begins at "offset".
+**
+** The caller must ensure that pCur is pointing to a valid row
+** in the table.
+**
+** Return SQLITE_OK on success or an error code if anything goes
+** wrong.  An error is returned if "offset+amt" is larger than
+** the available payload.
+*/
+SQLITE_PRIVATE int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->eState==CURSOR_VALID );
+  assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] );
+  assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
+  return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0);
+}
+
+/*
+** Read part of the data associated with cursor pCur.  Exactly
+** "amt" bytes will be transfered into pBuf[].  The transfer
+** begins at "offset".
+**
+** Return SQLITE_OK on success or an error code if anything goes
+** wrong.  An error is returned if "offset+amt" is larger than
+** the available payload.
+*/
+SQLITE_PRIVATE int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
+  int rc;
+
+#ifndef SQLITE_OMIT_INCRBLOB
+  if ( pCur->eState==CURSOR_INVALID ){
+    return SQLITE_ABORT;
+  }
+#endif
+
+  assert( cursorHoldsMutex(pCur) );
+  rc = restoreCursorPosition(pCur);
+  if( rc==SQLITE_OK ){
+    assert( pCur->eState==CURSOR_VALID );
+    assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] );
+    assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
+    rc = accessPayload(pCur, offset, amt, pBuf, 0);
+  }
+  return rc;
+}
+
+/*
+** Return a pointer to payload information from the entry that the 
+** pCur cursor is pointing to.  The pointer is to the beginning of
+** the key if index btrees (pPage->intKey==0) and is the data for
+** table btrees (pPage->intKey==1). The number of bytes of available
+** key/data is written into *pAmt.  If *pAmt==0, then the value
+** returned will not be a valid pointer.
+**
+** This routine is an optimization.  It is common for the entire key
+** and data to fit on the local page and for there to be no overflow
+** pages.  When that is so, this routine can be used to access the
+** key and data without making a copy.  If the key and/or data spills
+** onto overflow pages, then accessPayload() must be used to reassemble
+** the key/data and copy it into a preallocated buffer.
+**
+** The pointer returned by this routine looks directly into the cached
+** page of the database.  The data might change or move the next time
+** any btree routine is called.
+*/
+static const void *fetchPayload(
+  BtCursor *pCur,      /* Cursor pointing to entry to read from */
+  u32 *pAmt            /* Write the number of available bytes here */
+){
+  assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]);
+  assert( pCur->eState==CURSOR_VALID );
+  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
+  assert( pCur->info.nSize>0 );
+  *pAmt = pCur->info.nLocal;
+  return (void*)pCur->info.pPayload;
+}
+
+
+/*
+** For the entry that cursor pCur is point to, return as
+** many bytes of the key or data as are available on the local
+** b-tree page.  Write the number of available bytes into *pAmt.
+**
+** The pointer returned is ephemeral.  The key/data may move
+** or be destroyed on the next call to any Btree routine,
+** including calls from other threads against the same cache.
+** Hence, a mutex on the BtShared should be held prior to calling
+** this routine.
+**
+** These routines is used to get quick access to key and data
+** in the common case where no overflow pages are used.
+*/
+SQLITE_PRIVATE const void *sqlite3BtreeKeyFetch(BtCursor *pCur, u32 *pAmt){
+  return fetchPayload(pCur, pAmt);
+}
+SQLITE_PRIVATE const void *sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){
+  return fetchPayload(pCur, pAmt);
+}
+
+
+/*
+** Move the cursor down to a new child page.  The newPgno argument is the
+** page number of the child page to move to.
+**
+** This function returns SQLITE_CORRUPT if the page-header flags field of
+** the new child page does not match the flags field of the parent (i.e.
+** if an intkey page appears to be the parent of a non-intkey page, or
+** vice-versa).
+*/
+static int moveToChild(BtCursor *pCur, u32 newPgno){
+  int rc;
+  int i = pCur->iPage;
+  MemPage *pNewPage;
+  BtShared *pBt = pCur->pBt;
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->eState==CURSOR_VALID );
+  assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
+  assert( pCur->iPage>=0 );
+  if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+  rc = getAndInitPage(pBt, newPgno, &pNewPage,
+               (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0);
+  if( rc ) return rc;
+  pCur->apPage[i+1] = pNewPage;
+  pCur->aiIdx[i+1] = 0;
+  pCur->iPage++;
+
+  pCur->info.nSize = 0;
+  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
+  if( pNewPage->nCell<1 || pNewPage->intKey!=pCur->apPage[i]->intKey ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+  return SQLITE_OK;
+}
+
+#if 0
+/*
+** Page pParent is an internal (non-leaf) tree page. This function 
+** asserts that page number iChild is the left-child if the iIdx'th
+** cell in page pParent. Or, if iIdx is equal to the total number of
+** cells in pParent, that page number iChild is the right-child of
+** the page.
+*/
+static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){
+  assert( iIdx<=pParent->nCell );
+  if( iIdx==pParent->nCell ){
+    assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild );
+  }else{
+    assert( get4byte(findCell(pParent, iIdx))==iChild );
+  }
+}
+#else
+#  define assertParentIndex(x,y,z) 
+#endif
+
+/*
+** Move the cursor up to the parent page.
+**
+** pCur->idx is set to the cell index that contains the pointer
+** to the page we are coming from.  If we are coming from the
+** right-most child page then pCur->idx is set to one more than
+** the largest cell index.
+*/
+static void moveToParent(BtCursor *pCur){
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->eState==CURSOR_VALID );
+  assert( pCur->iPage>0 );
+  assert( pCur->apPage[pCur->iPage] );
+
+  /* UPDATE: It is actually possible for the condition tested by the assert
+  ** below to be untrue if the database file is corrupt. This can occur if
+  ** one cursor has modified page pParent while a reference to it is held 
+  ** by a second cursor. Which can only happen if a single page is linked
+  ** into more than one b-tree structure in a corrupt database.  */
+#if 0
+  assertParentIndex(
+    pCur->apPage[pCur->iPage-1], 
+    pCur->aiIdx[pCur->iPage-1], 
+    pCur->apPage[pCur->iPage]->pgno
+  );
+#endif
+  testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell );
+
+  releasePage(pCur->apPage[pCur->iPage]);
+  pCur->iPage--;
+  pCur->info.nSize = 0;
+  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
+}
+
+/*
+** Move the cursor to point to the root page of its b-tree structure.
+**
+** If the table has a virtual root page, then the cursor is moved to point
+** to the virtual root page instead of the actual root page. A table has a
+** virtual root page when the actual root page contains no cells and a 
+** single child page. This can only happen with the table rooted at page 1.
+**
+** If the b-tree structure is empty, the cursor state is set to 
+** CURSOR_INVALID. Otherwise, the cursor is set to point to the first
+** cell located on the root (or virtual root) page and the cursor state
+** is set to CURSOR_VALID.
+**
+** If this function returns successfully, it may be assumed that the
+** page-header flags indicate that the [virtual] root-page is the expected 
+** kind of b-tree page (i.e. if when opening the cursor the caller did not
+** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D,
+** indicating a table b-tree, or if the caller did specify a KeyInfo 
+** structure the flags byte is set to 0x02 or 0x0A, indicating an index
+** b-tree).
+*/
+static int moveToRoot(BtCursor *pCur){
+  MemPage *pRoot;
+  int rc = SQLITE_OK;
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( CURSOR_INVALID < CURSOR_REQUIRESEEK );
+  assert( CURSOR_VALID   < CURSOR_REQUIRESEEK );
+  assert( CURSOR_FAULT   > CURSOR_REQUIRESEEK );
+  if( pCur->eState>=CURSOR_REQUIRESEEK ){
+    if( pCur->eState==CURSOR_FAULT ){
+      assert( pCur->skipNext!=SQLITE_OK );
+      return pCur->skipNext;
+    }
+    sqlite3BtreeClearCursor(pCur);
+  }
+
+  if( pCur->iPage>=0 ){
+    while( pCur->iPage ) releasePage(pCur->apPage[pCur->iPage--]);
+  }else if( pCur->pgnoRoot==0 ){
+    pCur->eState = CURSOR_INVALID;
+    return SQLITE_OK;
+  }else{
+    rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0],
+                 (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0);
+    if( rc!=SQLITE_OK ){
+      pCur->eState = CURSOR_INVALID;
+      return rc;
+    }
+    pCur->iPage = 0;
+  }
+  pRoot = pCur->apPage[0];
+  assert( pRoot->pgno==pCur->pgnoRoot );
+
+  /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor
+  ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is
+  ** NULL, the caller expects a table b-tree. If this is not the case,
+  ** return an SQLITE_CORRUPT error. 
+  **
+  ** Earlier versions of SQLite assumed that this test could not fail
+  ** if the root page was already loaded when this function was called (i.e.
+  ** if pCur->iPage>=0). But this is not so if the database is corrupted 
+  ** in such a way that page pRoot is linked into a second b-tree table 
+  ** (or the freelist).  */
+  assert( pRoot->intKey==1 || pRoot->intKey==0 );
+  if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+
+  pCur->aiIdx[0] = 0;
+  pCur->info.nSize = 0;
+  pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl);
+
+  if( pRoot->nCell>0 ){
+    pCur->eState = CURSOR_VALID;
+  }else if( !pRoot->leaf ){
+    Pgno subpage;
+    if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT;
+    subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
+    pCur->eState = CURSOR_VALID;
+    rc = moveToChild(pCur, subpage);
+  }else{
+    pCur->eState = CURSOR_INVALID;
+  }
+  return rc;
+}
+
+/*
+** Move the cursor down to the left-most leaf entry beneath the
+** entry to which it is currently pointing.
+**
+** The left-most leaf is the one with the smallest key - the first
+** in ascending order.
+*/
+static int moveToLeftmost(BtCursor *pCur){
+  Pgno pgno;
+  int rc = SQLITE_OK;
+  MemPage *pPage;
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->eState==CURSOR_VALID );
+  while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){
+    assert( pCur->aiIdx[pCur->iPage]<pPage->nCell );
+    pgno = get4byte(findCell(pPage, pCur->aiIdx[pCur->iPage]));
+    rc = moveToChild(pCur, pgno);
+  }
+  return rc;
+}
+
+/*
+** Move the cursor down to the right-most leaf entry beneath the
+** page to which it is currently pointing.  Notice the difference
+** between moveToLeftmost() and moveToRightmost().  moveToLeftmost()
+** finds the left-most entry beneath the *entry* whereas moveToRightmost()
+** finds the right-most entry beneath the *page*.
+**
+** The right-most entry is the one with the largest key - the last
+** key in ascending order.
+*/
+static int moveToRightmost(BtCursor *pCur){
+  Pgno pgno;
+  int rc = SQLITE_OK;
+  MemPage *pPage = 0;
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->eState==CURSOR_VALID );
+  while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){
+    pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
+    pCur->aiIdx[pCur->iPage] = pPage->nCell;
+    rc = moveToChild(pCur, pgno);
+    if( rc ) return rc;
+  }
+  pCur->aiIdx[pCur->iPage] = pPage->nCell-1;
+  assert( pCur->info.nSize==0 );
+  assert( (pCur->curFlags & BTCF_ValidNKey)==0 );
+  return SQLITE_OK;
+}
+
+/* Move the cursor to the first entry in the table.  Return SQLITE_OK
+** on success.  Set *pRes to 0 if the cursor actually points to something
+** or set *pRes to 1 if the table is empty.
+*/
+SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
+  int rc;
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
+  rc = moveToRoot(pCur);
+  if( rc==SQLITE_OK ){
+    if( pCur->eState==CURSOR_INVALID ){
+      assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
+      *pRes = 1;
+    }else{
+      assert( pCur->apPage[pCur->iPage]->nCell>0 );
+      *pRes = 0;
+      rc = moveToLeftmost(pCur);
+    }
+  }
+  return rc;
+}
+
+/* Move the cursor to the last entry in the table.  Return SQLITE_OK
+** on success.  Set *pRes to 0 if the cursor actually points to something
+** or set *pRes to 1 if the table is empty.
+*/
+SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
+  int rc;
+ 
+  assert( cursorHoldsMutex(pCur) );
+  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
+
+  /* If the cursor already points to the last entry, this is a no-op. */
+  if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){
+#ifdef SQLITE_DEBUG
+    /* This block serves to assert() that the cursor really does point 
+    ** to the last entry in the b-tree. */
+    int ii;
+    for(ii=0; ii<pCur->iPage; ii++){
+      assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell );
+    }
+    assert( pCur->aiIdx[pCur->iPage]==pCur->apPage[pCur->iPage]->nCell-1 );
+    assert( pCur->apPage[pCur->iPage]->leaf );
+#endif
+    return SQLITE_OK;
+  }
+
+  rc = moveToRoot(pCur);
+  if( rc==SQLITE_OK ){
+    if( CURSOR_INVALID==pCur->eState ){
+      assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
+      *pRes = 1;
+    }else{
+      assert( pCur->eState==CURSOR_VALID );
+      *pRes = 0;
+      rc = moveToRightmost(pCur);
+      if( rc==SQLITE_OK ){
+        pCur->curFlags |= BTCF_AtLast;
+      }else{
+        pCur->curFlags &= ~BTCF_AtLast;
+      }
+   
+    }
+  }
+  return rc;
+}
+
+/* Move the cursor so that it points to an entry near the key 
+** specified by pIdxKey or intKey.   Return a success code.
+**
+** For INTKEY tables, the intKey parameter is used.  pIdxKey 
+** must be NULL.  For index tables, pIdxKey is used and intKey
+** is ignored.
+**
+** If an exact match is not found, then the cursor is always
+** left pointing at a leaf page which would hold the entry if it
+** were present.  The cursor might point to an entry that comes
+** before or after the key.
+**
+** An integer is written into *pRes which is the result of
+** comparing the key with the entry to which the cursor is 
+** pointing.  The meaning of the integer written into
+** *pRes is as follows:
+**
+**     *pRes<0      The cursor is left pointing at an entry that
+**                  is smaller than intKey/pIdxKey or if the table is empty
+**                  and the cursor is therefore left point to nothing.
+**
+**     *pRes==0     The cursor is left pointing at an entry that
+**                  exactly matches intKey/pIdxKey.
+**
+**     *pRes>0      The cursor is left pointing at an entry that
+**                  is larger than intKey/pIdxKey.
+**
+*/
+SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked(
+  BtCursor *pCur,          /* The cursor to be moved */
+  UnpackedRecord *pIdxKey, /* Unpacked index key */
+  i64 intKey,              /* The table key */
+  int biasRight,           /* If true, bias the search to the high end */
+  int *pRes                /* Write search results here */
+){
+  int rc;
+  RecordCompare xRecordCompare;
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
+  assert( pRes );
+  assert( (pIdxKey==0)==(pCur->pKeyInfo==0) );
+
+  /* If the cursor is already positioned at the point we are trying
+  ** to move to, then just return without doing any work */
+  if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0
+   && pCur->apPage[0]->intKey 
+  ){
+    if( pCur->info.nKey==intKey ){
+      *pRes = 0;
+      return SQLITE_OK;
+    }
+    if( (pCur->curFlags & BTCF_AtLast)!=0 && pCur->info.nKey<intKey ){
+      *pRes = -1;
+      return SQLITE_OK;
+    }
+  }
+
+  if( pIdxKey ){
+    xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
+    pIdxKey->errCode = 0;
+    assert( pIdxKey->default_rc==1 
+         || pIdxKey->default_rc==0 
+         || pIdxKey->default_rc==-1
+    );
+  }else{
+    xRecordCompare = 0; /* All keys are integers */
+  }
+
+  rc = moveToRoot(pCur);
+  if( rc ){
+    return rc;
+  }
+  assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] );
+  assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit );
+  assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 );
+  if( pCur->eState==CURSOR_INVALID ){
+    *pRes = -1;
+    assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
+    return SQLITE_OK;
+  }
+  assert( pCur->apPage[0]->intKey || pIdxKey );
+  for(;;){
+    int lwr, upr, idx, c;
+    Pgno chldPg;
+    MemPage *pPage = pCur->apPage[pCur->iPage];
+    u8 *pCell;                          /* Pointer to current cell in pPage */
+
+    /* pPage->nCell must be greater than zero. If this is the root-page
+    ** the cursor would have been INVALID above and this for(;;) loop
+    ** not run. If this is not the root-page, then the moveToChild() routine
+    ** would have already detected db corruption. Similarly, pPage must
+    ** be the right kind (index or table) of b-tree page. Otherwise
+    ** a moveToChild() or moveToRoot() call would have detected corruption.  */
+    assert( pPage->nCell>0 );
+    assert( pPage->intKey==(pIdxKey==0) );
+    lwr = 0;
+    upr = pPage->nCell-1;
+    assert( biasRight==0 || biasRight==1 );
+    idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */
+    pCur->aiIdx[pCur->iPage] = (u16)idx;
+    if( xRecordCompare==0 ){
+      for(;;){
+        i64 nCellKey;
+        pCell = findCell(pPage, idx) + pPage->childPtrSize;
+        if( pPage->intKeyLeaf ){
+          while( 0x80 <= *(pCell++) ){
+            if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT;
+          }
+        }
+        getVarint(pCell, (u64*)&nCellKey);
+        if( nCellKey<intKey ){
+          lwr = idx+1;
+          if( lwr>upr ){ c = -1; break; }
+        }else if( nCellKey>intKey ){
+          upr = idx-1;
+          if( lwr>upr ){ c = +1; break; }
+        }else{
+          assert( nCellKey==intKey );
+          pCur->curFlags |= BTCF_ValidNKey;
+          pCur->info.nKey = nCellKey;
+          pCur->aiIdx[pCur->iPage] = (u16)idx;
+          if( !pPage->leaf ){
+            lwr = idx;
+            goto moveto_next_layer;
+          }else{
+            *pRes = 0;
+            rc = SQLITE_OK;
+            goto moveto_finish;
+          }
+        }
+        assert( lwr+upr>=0 );
+        idx = (lwr+upr)>>1;  /* idx = (lwr+upr)/2; */
+      }
+    }else{
+      for(;;){
+        int nCell;
+        pCell = findCell(pPage, idx) + pPage->childPtrSize;
+
+        /* The maximum supported page-size is 65536 bytes. This means that
+        ** the maximum number of record bytes stored on an index B-Tree
+        ** page is less than 16384 bytes and may be stored as a 2-byte
+        ** varint. This information is used to attempt to avoid parsing 
+        ** the entire cell by checking for the cases where the record is 
+        ** stored entirely within the b-tree page by inspecting the first 
+        ** 2 bytes of the cell.
+        */
+        nCell = pCell[0];
+        if( nCell<=pPage->max1bytePayload ){
+          /* This branch runs if the record-size field of the cell is a
+          ** single byte varint and the record fits entirely on the main
+          ** b-tree page.  */
+          testcase( pCell+nCell+1==pPage->aDataEnd );
+          c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey);
+        }else if( !(pCell[1] & 0x80) 
+          && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal
+        ){
+          /* The record-size field is a 2 byte varint and the record 
+          ** fits entirely on the main b-tree page.  */
+          testcase( pCell+nCell+2==pPage->aDataEnd );
+          c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey);
+        }else{
+          /* The record flows over onto one or more overflow pages. In
+          ** this case the whole cell needs to be parsed, a buffer allocated
+          ** and accessPayload() used to retrieve the record into the
+          ** buffer before VdbeRecordCompare() can be called. */
+          void *pCellKey;
+          u8 * const pCellBody = pCell - pPage->childPtrSize;
+          btreeParseCellPtr(pPage, pCellBody, &pCur->info);
+          nCell = (int)pCur->info.nKey;
+          pCellKey = sqlite3Malloc( nCell );
+          if( pCellKey==0 ){
+            rc = SQLITE_NOMEM;
+            goto moveto_finish;
+          }
+          pCur->aiIdx[pCur->iPage] = (u16)idx;
+          rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2);
+          if( rc ){
+            sqlite3_free(pCellKey);
+            goto moveto_finish;
+          }
+          c = xRecordCompare(nCell, pCellKey, pIdxKey);
+          sqlite3_free(pCellKey);
+        }
+        assert( 
+            (pIdxKey->errCode!=SQLITE_CORRUPT || c==0)
+         && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed)
+        );
+        if( c<0 ){
+          lwr = idx+1;
+        }else if( c>0 ){
+          upr = idx-1;
+        }else{
+          assert( c==0 );
+          *pRes = 0;
+          rc = SQLITE_OK;
+          pCur->aiIdx[pCur->iPage] = (u16)idx;
+          if( pIdxKey->errCode ) rc = SQLITE_CORRUPT;
+          goto moveto_finish;
+        }
+        if( lwr>upr ) break;
+        assert( lwr+upr>=0 );
+        idx = (lwr+upr)>>1;  /* idx = (lwr+upr)/2 */
+      }
+    }
+    assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) );
+    assert( pPage->isInit );
+    if( pPage->leaf ){
+      assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
+      pCur->aiIdx[pCur->iPage] = (u16)idx;
+      *pRes = c;
+      rc = SQLITE_OK;
+      goto moveto_finish;
+    }
+moveto_next_layer:
+    if( lwr>=pPage->nCell ){
+      chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
+    }else{
+      chldPg = get4byte(findCell(pPage, lwr));
+    }
+    pCur->aiIdx[pCur->iPage] = (u16)lwr;
+    rc = moveToChild(pCur, chldPg);
+    if( rc ) break;
+  }
+moveto_finish:
+  pCur->info.nSize = 0;
+  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
+  return rc;
+}
+
+
+/*
+** Return TRUE if the cursor is not pointing at an entry of the table.
+**
+** TRUE will be returned after a call to sqlite3BtreeNext() moves
+** past the last entry in the table or sqlite3BtreePrev() moves past
+** the first entry.  TRUE is also returned if the table is empty.
+*/
+SQLITE_PRIVATE int sqlite3BtreeEof(BtCursor *pCur){
+  /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries
+  ** have been deleted? This API will need to change to return an error code
+  ** as well as the boolean result value.
+  */
+  return (CURSOR_VALID!=pCur->eState);
+}
+
+/*
+** Advance the cursor to the next entry in the database.  If
+** successful then set *pRes=0.  If the cursor
+** was already pointing to the last entry in the database before
+** this routine was called, then set *pRes=1.
+**
+** The main entry point is sqlite3BtreeNext().  That routine is optimized
+** for the common case of merely incrementing the cell counter BtCursor.aiIdx
+** to the next cell on the current page.  The (slower) btreeNext() helper
+** routine is called when it is necessary to move to a different page or
+** to restore the cursor.
+**
+** The calling function will set *pRes to 0 or 1.  The initial *pRes value
+** will be 1 if the cursor being stepped corresponds to an SQL index and
+** if this routine could have been skipped if that SQL index had been
+** a unique index.  Otherwise the caller will have set *pRes to zero.
+** Zero is the common case. The btree implementation is free to use the
+** initial *pRes value as a hint to improve performance, but the current
+** SQLite btree implementation does not. (Note that the comdb2 btree
+** implementation does use this hint, however.)
+*/
+static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){
+  int rc;
+  int idx;
+  MemPage *pPage;
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
+  assert( *pRes==0 );
+  if( pCur->eState!=CURSOR_VALID ){
+    assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
+    rc = restoreCursorPosition(pCur);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+    if( CURSOR_INVALID==pCur->eState ){
+      *pRes = 1;
+      return SQLITE_OK;
+    }
+    if( pCur->skipNext ){
+      assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT );
+      pCur->eState = CURSOR_VALID;
+      if( pCur->skipNext>0 ){
+        pCur->skipNext = 0;
+        return SQLITE_OK;
+      }
+      pCur->skipNext = 0;
+    }
+  }
+
+  pPage = pCur->apPage[pCur->iPage];
+  idx = ++pCur->aiIdx[pCur->iPage];
+  assert( pPage->isInit );
+
+  /* If the database file is corrupt, it is possible for the value of idx 
+  ** to be invalid here. This can only occur if a second cursor modifies
+  ** the page while cursor pCur is holding a reference to it. Which can
+  ** only happen if the database is corrupt in such a way as to link the
+  ** page into more than one b-tree structure. */
+  testcase( idx>pPage->nCell );
+
+  if( idx>=pPage->nCell ){
+    if( !pPage->leaf ){
+      rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
+      if( rc ) return rc;
+      return moveToLeftmost(pCur);
+    }
+    do{
+      if( pCur->iPage==0 ){
+        *pRes = 1;
+        pCur->eState = CURSOR_INVALID;
+        return SQLITE_OK;
+      }
+      moveToParent(pCur);
+      pPage = pCur->apPage[pCur->iPage];
+    }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell );
+    if( pPage->intKey ){
+      return sqlite3BtreeNext(pCur, pRes);
+    }else{
+      return SQLITE_OK;
+    }
+  }
+  if( pPage->leaf ){
+    return SQLITE_OK;
+  }else{
+    return moveToLeftmost(pCur);
+  }
+}
+SQLITE_PRIVATE int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
+  MemPage *pPage;
+  assert( cursorHoldsMutex(pCur) );
+  assert( pRes!=0 );
+  assert( *pRes==0 || *pRes==1 );
+  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
+  pCur->info.nSize = 0;
+  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
+  *pRes = 0;
+  if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes);
+  pPage = pCur->apPage[pCur->iPage];
+  if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){
+    pCur->aiIdx[pCur->iPage]--;
+    return btreeNext(pCur, pRes);
+  }
+  if( pPage->leaf ){
+    return SQLITE_OK;
+  }else{
+    return moveToLeftmost(pCur);
+  }
+}
+
+/*
+** Step the cursor to the back to the previous entry in the database.  If
+** successful then set *pRes=0.  If the cursor
+** was already pointing to the first entry in the database before
+** this routine was called, then set *pRes=1.
+**
+** The main entry point is sqlite3BtreePrevious().  That routine is optimized
+** for the common case of merely decrementing the cell counter BtCursor.aiIdx
+** to the previous cell on the current page.  The (slower) btreePrevious()
+** helper routine is called when it is necessary to move to a different page
+** or to restore the cursor.
+**
+** The calling function will set *pRes to 0 or 1.  The initial *pRes value
+** will be 1 if the cursor being stepped corresponds to an SQL index and
+** if this routine could have been skipped if that SQL index had been
+** a unique index.  Otherwise the caller will have set *pRes to zero.
+** Zero is the common case. The btree implementation is free to use the
+** initial *pRes value as a hint to improve performance, but the current
+** SQLite btree implementation does not. (Note that the comdb2 btree
+** implementation does use this hint, however.)
+*/
+static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){
+  int rc;
+  MemPage *pPage;
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( pRes!=0 );
+  assert( *pRes==0 );
+  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
+  assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 );
+  assert( pCur->info.nSize==0 );
+  if( pCur->eState!=CURSOR_VALID ){
+    rc = restoreCursorPosition(pCur);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+    if( CURSOR_INVALID==pCur->eState ){
+      *pRes = 1;
+      return SQLITE_OK;
+    }
+    if( pCur->skipNext ){
+      assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT );
+      pCur->eState = CURSOR_VALID;
+      if( pCur->skipNext<0 ){
+        pCur->skipNext = 0;
+        return SQLITE_OK;
+      }
+      pCur->skipNext = 0;
+    }
+  }
+
+  pPage = pCur->apPage[pCur->iPage];
+  assert( pPage->isInit );
+  if( !pPage->leaf ){
+    int idx = pCur->aiIdx[pCur->iPage];
+    rc = moveToChild(pCur, get4byte(findCell(pPage, idx)));
+    if( rc ) return rc;
+    rc = moveToRightmost(pCur);
+  }else{
+    while( pCur->aiIdx[pCur->iPage]==0 ){
+      if( pCur->iPage==0 ){
+        pCur->eState = CURSOR_INVALID;
+        *pRes = 1;
+        return SQLITE_OK;
+      }
+      moveToParent(pCur);
+    }
+    assert( pCur->info.nSize==0 );
+    assert( (pCur->curFlags & (BTCF_ValidNKey|BTCF_ValidOvfl))==0 );
+
+    pCur->aiIdx[pCur->iPage]--;
+    pPage = pCur->apPage[pCur->iPage];
+    if( pPage->intKey && !pPage->leaf ){
+      rc = sqlite3BtreePrevious(pCur, pRes);
+    }else{
+      rc = SQLITE_OK;
+    }
+  }
+  return rc;
+}
+SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
+  assert( cursorHoldsMutex(pCur) );
+  assert( pRes!=0 );
+  assert( *pRes==0 || *pRes==1 );
+  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
+  *pRes = 0;
+  pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey);
+  pCur->info.nSize = 0;
+  if( pCur->eState!=CURSOR_VALID
+   || pCur->aiIdx[pCur->iPage]==0
+   || pCur->apPage[pCur->iPage]->leaf==0
+  ){
+    return btreePrevious(pCur, pRes);
+  }
+  pCur->aiIdx[pCur->iPage]--;
+  return SQLITE_OK;
+}
+
+/*
+** Allocate a new page from the database file.
+**
+** The new page is marked as dirty.  (In other words, sqlite3PagerWrite()
+** has already been called on the new page.)  The new page has also
+** been referenced and the calling routine is responsible for calling
+** sqlite3PagerUnref() on the new page when it is done.
+**
+** SQLITE_OK is returned on success.  Any other return value indicates
+** an error.  *ppPage and *pPgno are undefined in the event of an error.
+** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned.
+**
+** If the "nearby" parameter is not 0, then an effort is made to 
+** locate a page close to the page number "nearby".  This can be used in an
+** attempt to keep related pages close to each other in the database file,
+** which in turn can make database access faster.
+**
+** If the eMode parameter is BTALLOC_EXACT and the nearby page exists
+** anywhere on the free-list, then it is guaranteed to be returned.  If
+** eMode is BTALLOC_LT then the page returned will be less than or equal
+** to nearby if any such page exists.  If eMode is BTALLOC_ANY then there
+** are no restrictions on which page is returned.
+*/
+static int allocateBtreePage(
+  BtShared *pBt,         /* The btree */
+  MemPage **ppPage,      /* Store pointer to the allocated page here */
+  Pgno *pPgno,           /* Store the page number here */
+  Pgno nearby,           /* Search for a page near this one */
+  u8 eMode               /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */
+){
+  MemPage *pPage1;
+  int rc;
+  u32 n;     /* Number of pages on the freelist */
+  u32 k;     /* Number of leaves on the trunk of the freelist */
+  MemPage *pTrunk = 0;
+  MemPage *pPrevTrunk = 0;
+  Pgno mxPage;     /* Total size of the database file */
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) );
+  pPage1 = pBt->pPage1;
+  mxPage = btreePagecount(pBt);
+  /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36
+  ** stores stores the total number of pages on the freelist. */
+  n = get4byte(&pPage1->aData[36]);
+  testcase( n==mxPage-1 );
+  if( n>=mxPage ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+  if( n>0 ){
+    /* There are pages on the freelist.  Reuse one of those pages. */
+    Pgno iTrunk;
+    u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
+    
+    /* If eMode==BTALLOC_EXACT and a query of the pointer-map
+    ** shows that the page 'nearby' is somewhere on the free-list, then
+    ** the entire-list will be searched for that page.
+    */
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( eMode==BTALLOC_EXACT ){
+      if( nearby<=mxPage ){
+        u8 eType;
+        assert( nearby>0 );
+        assert( pBt->autoVacuum );
+        rc = ptrmapGet(pBt, nearby, &eType, 0);
+        if( rc ) return rc;
+        if( eType==PTRMAP_FREEPAGE ){
+          searchList = 1;
+        }
+      }
+    }else if( eMode==BTALLOC_LE ){
+      searchList = 1;
+    }
+#endif
+
+    /* Decrement the free-list count by 1. Set iTrunk to the index of the
+    ** first free-list trunk page. iPrevTrunk is initially 1.
+    */
+    rc = sqlite3PagerWrite(pPage1->pDbPage);
+    if( rc ) return rc;
+    put4byte(&pPage1->aData[36], n-1);
+
+    /* The code within this loop is run only once if the 'searchList' variable
+    ** is not true. Otherwise, it runs once for each trunk-page on the
+    ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT)
+    ** or until a page less than 'nearby' is located (eMode==BTALLOC_LT)
+    */
+    do {
+      pPrevTrunk = pTrunk;
+      if( pPrevTrunk ){
+        /* EVIDENCE-OF: R-01506-11053 The first integer on a freelist trunk page
+        ** is the page number of the next freelist trunk page in the list or
+        ** zero if this is the last freelist trunk page. */
+        iTrunk = get4byte(&pPrevTrunk->aData[0]);
+      }else{
+        /* EVIDENCE-OF: R-59841-13798 The 4-byte big-endian integer at offset 32
+        ** stores the page number of the first page of the freelist, or zero if
+        ** the freelist is empty. */
+        iTrunk = get4byte(&pPage1->aData[32]);
+      }
+      testcase( iTrunk==mxPage );
+      if( iTrunk>mxPage ){
+        rc = SQLITE_CORRUPT_BKPT;
+      }else{
+        rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
+      }
+      if( rc ){
+        pTrunk = 0;
+        goto end_allocate_page;
+      }
+      assert( pTrunk!=0 );
+      assert( pTrunk->aData!=0 );
+      /* EVIDENCE-OF: R-13523-04394 The second integer on a freelist trunk page
+      ** is the number of leaf page pointers to follow. */
+      k = get4byte(&pTrunk->aData[4]);
+      if( k==0 && !searchList ){
+        /* The trunk has no leaves and the list is not being searched. 
+        ** So extract the trunk page itself and use it as the newly 
+        ** allocated page */
+        assert( pPrevTrunk==0 );
+        rc = sqlite3PagerWrite(pTrunk->pDbPage);
+        if( rc ){
+          goto end_allocate_page;
+        }
+        *pPgno = iTrunk;
+        memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
+        *ppPage = pTrunk;
+        pTrunk = 0;
+        TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
+      }else if( k>(u32)(pBt->usableSize/4 - 2) ){
+        /* Value of k is out of range.  Database corruption */
+        rc = SQLITE_CORRUPT_BKPT;
+        goto end_allocate_page;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+      }else if( searchList 
+            && (nearby==iTrunk || (iTrunk<nearby && eMode==BTALLOC_LE)) 
+      ){
+        /* The list is being searched and this trunk page is the page
+        ** to allocate, regardless of whether it has leaves.
+        */
+        *pPgno = iTrunk;
+        *ppPage = pTrunk;
+        searchList = 0;
+        rc = sqlite3PagerWrite(pTrunk->pDbPage);
+        if( rc ){
+          goto end_allocate_page;
+        }
+        if( k==0 ){
+          if( !pPrevTrunk ){
+            memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
+          }else{
+            rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
+            if( rc!=SQLITE_OK ){
+              goto end_allocate_page;
+            }
+            memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
+          }
+        }else{
+          /* The trunk page is required by the caller but it contains 
+          ** pointers to free-list leaves. The first leaf becomes a trunk
+          ** page in this case.
+          */
+          MemPage *pNewTrunk;
+          Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
+          if( iNewTrunk>mxPage ){ 
+            rc = SQLITE_CORRUPT_BKPT;
+            goto end_allocate_page;
+          }
+          testcase( iNewTrunk==mxPage );
+          rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
+          if( rc!=SQLITE_OK ){
+            goto end_allocate_page;
+          }
+          rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
+          if( rc!=SQLITE_OK ){
+            releasePage(pNewTrunk);
+            goto end_allocate_page;
+          }
+          memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
+          put4byte(&pNewTrunk->aData[4], k-1);
+          memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
+          releasePage(pNewTrunk);
+          if( !pPrevTrunk ){
+            assert( sqlite3PagerIswriteable(pPage1->pDbPage) );
+            put4byte(&pPage1->aData[32], iNewTrunk);
+          }else{
+            rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
+            if( rc ){
+              goto end_allocate_page;
+            }
+            put4byte(&pPrevTrunk->aData[0], iNewTrunk);
+          }
+        }
+        pTrunk = 0;
+        TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
+#endif
+      }else if( k>0 ){
+        /* Extract a leaf from the trunk */
+        u32 closest;
+        Pgno iPage;
+        unsigned char *aData = pTrunk->aData;
+        if( nearby>0 ){
+          u32 i;
+          closest = 0;
+          if( eMode==BTALLOC_LE ){
+            for(i=0; i<k; i++){
+              iPage = get4byte(&aData[8+i*4]);
+              if( iPage<=nearby ){
+                closest = i;
+                break;
+              }
+            }
+          }else{
+            int dist;
+            dist = sqlite3AbsInt32(get4byte(&aData[8]) - nearby);
+            for(i=1; i<k; i++){
+              int d2 = sqlite3AbsInt32(get4byte(&aData[8+i*4]) - nearby);
+              if( d2<dist ){
+                closest = i;
+                dist = d2;
+              }
+            }
+          }
+        }else{
+          closest = 0;
+        }
+
+        iPage = get4byte(&aData[8+closest*4]);
+        testcase( iPage==mxPage );
+        if( iPage>mxPage ){
+          rc = SQLITE_CORRUPT_BKPT;
+          goto end_allocate_page;
+        }
+        testcase( iPage==mxPage );
+        if( !searchList 
+         || (iPage==nearby || (iPage<nearby && eMode==BTALLOC_LE)) 
+        ){
+          int noContent;
+          *pPgno = iPage;
+          TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
+                 ": %d more free pages\n",
+                 *pPgno, closest+1, k, pTrunk->pgno, n-1));
+          rc = sqlite3PagerWrite(pTrunk->pDbPage);
+          if( rc ) goto end_allocate_page;
+          if( closest<k-1 ){
+            memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
+          }
+          put4byte(&aData[4], k-1);
+          noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0;
+          rc = btreeGetPage(pBt, *pPgno, ppPage, noContent);
+          if( rc==SQLITE_OK ){
+            rc = sqlite3PagerWrite((*ppPage)->pDbPage);
+            if( rc!=SQLITE_OK ){
+              releasePage(*ppPage);
+            }
+          }
+          searchList = 0;
+        }
+      }
+      releasePage(pPrevTrunk);
+      pPrevTrunk = 0;
+    }while( searchList );
+  }else{
+    /* There are no pages on the freelist, so append a new page to the
+    ** database image.
+    **
+    ** Normally, new pages allocated by this block can be requested from the
+    ** pager layer with the 'no-content' flag set. This prevents the pager
+    ** from trying to read the pages content from disk. However, if the
+    ** current transaction has already run one or more incremental-vacuum
+    ** steps, then the page we are about to allocate may contain content
+    ** that is required in the event of a rollback. In this case, do
+    ** not set the no-content flag. This causes the pager to load and journal
+    ** the current page content before overwriting it.
+    **
+    ** Note that the pager will not actually attempt to load or journal 
+    ** content for any page that really does lie past the end of the database
+    ** file on disk. So the effects of disabling the no-content optimization
+    ** here are confined to those pages that lie between the end of the
+    ** database image and the end of the database file.
+    */
+    int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:0;
+
+    rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
+    if( rc ) return rc;
+    pBt->nPage++;
+    if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++;
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){
+      /* If *pPgno refers to a pointer-map page, allocate two new pages
+      ** at the end of the file instead of one. The first allocated page
+      ** becomes a new pointer-map page, the second is used by the caller.
+      */
+      MemPage *pPg = 0;
+      TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
+      assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
+      rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent);
+      if( rc==SQLITE_OK ){
+        rc = sqlite3PagerWrite(pPg->pDbPage);
+        releasePage(pPg);
+      }
+      if( rc ) return rc;
+      pBt->nPage++;
+      if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; }
+    }
+#endif
+    put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage);
+    *pPgno = pBt->nPage;
+
+    assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
+    rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent);
+    if( rc ) return rc;
+    rc = sqlite3PagerWrite((*ppPage)->pDbPage);
+    if( rc!=SQLITE_OK ){
+      releasePage(*ppPage);
+    }
+    TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
+  }
+
+  assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
+
+end_allocate_page:
+  releasePage(pTrunk);
+  releasePage(pPrevTrunk);
+  if( rc==SQLITE_OK ){
+    if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){
+      releasePage(*ppPage);
+      *ppPage = 0;
+      return SQLITE_CORRUPT_BKPT;
+    }
+    (*ppPage)->isInit = 0;
+  }else{
+    *ppPage = 0;
+  }
+  assert( rc!=SQLITE_OK || sqlite3PagerIswriteable((*ppPage)->pDbPage) );
+  return rc;
+}
+
+/*
+** This function is used to add page iPage to the database file free-list. 
+** It is assumed that the page is not already a part of the free-list.
+**
+** The value passed as the second argument to this function is optional.
+** If the caller happens to have a pointer to the MemPage object 
+** corresponding to page iPage handy, it may pass it as the second value. 
+** Otherwise, it may pass NULL.
+**
+** If a pointer to a MemPage object is passed as the second argument,
+** its reference count is not altered by this function.
+*/
+static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
+  MemPage *pTrunk = 0;                /* Free-list trunk page */
+  Pgno iTrunk = 0;                    /* Page number of free-list trunk page */ 
+  MemPage *pPage1 = pBt->pPage1;      /* Local reference to page 1 */
+  MemPage *pPage;                     /* Page being freed. May be NULL. */
+  int rc;                             /* Return Code */
+  int nFree;                          /* Initial number of pages on free-list */
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( iPage>1 );
+  assert( !pMemPage || pMemPage->pgno==iPage );
+
+  if( pMemPage ){
+    pPage = pMemPage;
+    sqlite3PagerRef(pPage->pDbPage);
+  }else{
+    pPage = btreePageLookup(pBt, iPage);
+  }
+
+  /* Increment the free page count on pPage1 */
+  rc = sqlite3PagerWrite(pPage1->pDbPage);
+  if( rc ) goto freepage_out;
+  nFree = get4byte(&pPage1->aData[36]);
+  put4byte(&pPage1->aData[36], nFree+1);
+
+  if( pBt->btsFlags & BTS_SECURE_DELETE ){
+    /* If the secure_delete option is enabled, then
+    ** always fully overwrite deleted information with zeros.
+    */
+    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) )
+     ||            ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0)
+    ){
+      goto freepage_out;
+    }
+    memset(pPage->aData, 0, pPage->pBt->pageSize);
+  }
+
+  /* If the database supports auto-vacuum, write an entry in the pointer-map
+  ** to indicate that the page is free.
+  */
+  if( ISAUTOVACUUM ){
+    ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc);
+    if( rc ) goto freepage_out;
+  }
+
+  /* Now manipulate the actual database free-list structure. There are two
+  ** possibilities. If the free-list is currently empty, or if the first
+  ** trunk page in the free-list is full, then this page will become a
+  ** new free-list trunk page. Otherwise, it will become a leaf of the
+  ** first trunk page in the current free-list. This block tests if it
+  ** is possible to add the page as a new free-list leaf.
+  */
+  if( nFree!=0 ){
+    u32 nLeaf;                /* Initial number of leaf cells on trunk page */
+
+    iTrunk = get4byte(&pPage1->aData[32]);
+    rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0);
+    if( rc!=SQLITE_OK ){
+      goto freepage_out;
+    }
+
+    nLeaf = get4byte(&pTrunk->aData[4]);
+    assert( pBt->usableSize>32 );
+    if( nLeaf > (u32)pBt->usableSize/4 - 2 ){
+      rc = SQLITE_CORRUPT_BKPT;
+      goto freepage_out;
+    }
+    if( nLeaf < (u32)pBt->usableSize/4 - 8 ){
+      /* In this case there is room on the trunk page to insert the page
+      ** being freed as a new leaf.
+      **
+      ** Note that the trunk page is not really full until it contains
+      ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have
+      ** coded.  But due to a coding error in versions of SQLite prior to
+      ** 3.6.0, databases with freelist trunk pages holding more than
+      ** usableSize/4 - 8 entries will be reported as corrupt.  In order
+      ** to maintain backwards compatibility with older versions of SQLite,
+      ** we will continue to restrict the number of entries to usableSize/4 - 8
+      ** for now.  At some point in the future (once everyone has upgraded
+      ** to 3.6.0 or later) we should consider fixing the conditional above
+      ** to read "usableSize/4-2" instead of "usableSize/4-8".
+      **
+      ** EVIDENCE-OF: R-19920-11576 However, newer versions of SQLite still
+      ** avoid using the last six entries in the freelist trunk page array in
+      ** order that database files created by newer versions of SQLite can be
+      ** read by older versions of SQLite.
+      */
+      rc = sqlite3PagerWrite(pTrunk->pDbPage);
+      if( rc==SQLITE_OK ){
+        put4byte(&pTrunk->aData[4], nLeaf+1);
+        put4byte(&pTrunk->aData[8+nLeaf*4], iPage);
+        if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==0 ){
+          sqlite3PagerDontWrite(pPage->pDbPage);
+        }
+        rc = btreeSetHasContent(pBt, iPage);
+      }
+      TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
+      goto freepage_out;
+    }
+  }
+
+  /* If control flows to this point, then it was not possible to add the
+  ** the page being freed as a leaf page of the first trunk in the free-list.
+  ** Possibly because the free-list is empty, or possibly because the 
+  ** first trunk in the free-list is full. Either way, the page being freed
+  ** will become the new first trunk page in the free-list.
+  */
+  if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){
+    goto freepage_out;
+  }
+  rc = sqlite3PagerWrite(pPage->pDbPage);
+  if( rc!=SQLITE_OK ){
+    goto freepage_out;
+  }
+  put4byte(pPage->aData, iTrunk);
+  put4byte(&pPage->aData[4], 0);
+  put4byte(&pPage1->aData[32], iPage);
+  TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk));
+
+freepage_out:
+  if( pPage ){
+    pPage->isInit = 0;
+  }
+  releasePage(pPage);
+  releasePage(pTrunk);
+  return rc;
+}
+static void freePage(MemPage *pPage, int *pRC){
+  if( (*pRC)==SQLITE_OK ){
+    *pRC = freePage2(pPage->pBt, pPage, pPage->pgno);
+  }
+}
+
+/*
+** Free any overflow pages associated with the given Cell.  Write the
+** local Cell size (the number of bytes on the original page, omitting
+** overflow) into *pnSize.
+*/
+static int clearCell(
+  MemPage *pPage,          /* The page that contains the Cell */
+  unsigned char *pCell,    /* First byte of the Cell */
+  u16 *pnSize              /* Write the size of the Cell here */
+){
+  BtShared *pBt = pPage->pBt;
+  CellInfo info;
+  Pgno ovflPgno;
+  int rc;
+  int nOvfl;
+  u32 ovflPageSize;
+
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  btreeParseCellPtr(pPage, pCell, &info);
+  *pnSize = info.nSize;
+  if( info.iOverflow==0 ){
+    return SQLITE_OK;  /* No overflow pages. Return without doing anything */
+  }
+  if( pCell+info.iOverflow+3 > pPage->aData+pPage->maskPage ){
+    return SQLITE_CORRUPT_BKPT;  /* Cell extends past end of page */
+  }
+  ovflPgno = get4byte(&pCell[info.iOverflow]);
+  assert( pBt->usableSize > 4 );
+  ovflPageSize = pBt->usableSize - 4;
+  nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize;
+  assert( ovflPgno==0 || nOvfl>0 );
+  while( nOvfl-- ){
+    Pgno iNext = 0;
+    MemPage *pOvfl = 0;
+    if( ovflPgno<2 || ovflPgno>btreePagecount(pBt) ){
+      /* 0 is not a legal page number and page 1 cannot be an 
+      ** overflow page. Therefore if ovflPgno<2 or past the end of the 
+      ** file the database must be corrupt. */
+      return SQLITE_CORRUPT_BKPT;
+    }
+    if( nOvfl ){
+      rc = getOverflowPage(pBt, ovflPgno, &pOvfl, &iNext);
+      if( rc ) return rc;
+    }
+
+    if( ( pOvfl || ((pOvfl = btreePageLookup(pBt, ovflPgno))!=0) )
+     && sqlite3PagerPageRefcount(pOvfl->pDbPage)!=1
+    ){
+      /* There is no reason any cursor should have an outstanding reference 
+      ** to an overflow page belonging to a cell that is being deleted/updated.
+      ** So if there exists more than one reference to this page, then it 
+      ** must not really be an overflow page and the database must be corrupt. 
+      ** It is helpful to detect this before calling freePage2(), as 
+      ** freePage2() may zero the page contents if secure-delete mode is
+      ** enabled. If this 'overflow' page happens to be a page that the
+      ** caller is iterating through or using in some other way, this
+      ** can be problematic.
+      */
+      rc = SQLITE_CORRUPT_BKPT;
+    }else{
+      rc = freePage2(pBt, pOvfl, ovflPgno);
+    }
+
+    if( pOvfl ){
+      sqlite3PagerUnref(pOvfl->pDbPage);
+    }
+    if( rc ) return rc;
+    ovflPgno = iNext;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Create the byte sequence used to represent a cell on page pPage
+** and write that byte sequence into pCell[].  Overflow pages are
+** allocated and filled in as necessary.  The calling procedure
+** is responsible for making sure sufficient space has been allocated
+** for pCell[].
+**
+** Note that pCell does not necessary need to point to the pPage->aData
+** area.  pCell might point to some temporary storage.  The cell will
+** be constructed in this temporary area then copied into pPage->aData
+** later.
+*/
+static int fillInCell(
+  MemPage *pPage,                /* The page that contains the cell */
+  unsigned char *pCell,          /* Complete text of the cell */
+  const void *pKey, i64 nKey,    /* The key */
+  const void *pData,int nData,   /* The data */
+  int nZero,                     /* Extra zero bytes to append to pData */
+  int *pnSize                    /* Write cell size here */
+){
+  int nPayload;
+  const u8 *pSrc;
+  int nSrc, n, rc;
+  int spaceLeft;
+  MemPage *pOvfl = 0;
+  MemPage *pToRelease = 0;
+  unsigned char *pPrior;
+  unsigned char *pPayload;
+  BtShared *pBt = pPage->pBt;
+  Pgno pgnoOvfl = 0;
+  int nHeader;
+
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+
+  /* pPage is not necessarily writeable since pCell might be auxiliary
+  ** buffer space that is separate from the pPage buffer area */
+  assert( pCell<pPage->aData || pCell>=&pPage->aData[pBt->pageSize]
+            || sqlite3PagerIswriteable(pPage->pDbPage) );
+
+  /* Fill in the header. */
+  nHeader = pPage->childPtrSize;
+  nPayload = nData + nZero;
+  if( pPage->intKeyLeaf ){
+    nHeader += putVarint32(&pCell[nHeader], nPayload);
+  }else{
+    assert( nData==0 );
+    assert( nZero==0 );
+  }
+  nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
+  
+  /* Fill in the payload size */
+  if( pPage->intKey ){
+    pSrc = pData;
+    nSrc = nData;
+    nData = 0;
+  }else{ 
+    if( NEVER(nKey>0x7fffffff || pKey==0) ){
+      return SQLITE_CORRUPT_BKPT;
+    }
+    nPayload = (int)nKey;
+    pSrc = pKey;
+    nSrc = (int)nKey;
+  }
+  if( nPayload<=pPage->maxLocal ){
+    n = nHeader + nPayload;
+    testcase( n==3 );
+    testcase( n==4 );
+    if( n<4 ) n = 4;
+    *pnSize = n;
+    spaceLeft = nPayload;
+    pPrior = pCell;
+  }else{
+    int mn = pPage->minLocal;
+    n = mn + (nPayload - mn) % (pPage->pBt->usableSize - 4);
+    testcase( n==pPage->maxLocal );
+    testcase( n==pPage->maxLocal+1 );
+    if( n > pPage->maxLocal ) n = mn;
+    spaceLeft = n;
+    *pnSize = n + nHeader + 4;
+    pPrior = &pCell[nHeader+n];
+  }
+  pPayload = &pCell[nHeader];
+
+  /* At this point variables should be set as follows:
+  **
+  **   nPayload           Total payload size in bytes
+  **   pPayload           Begin writing payload here
+  **   spaceLeft          Space available at pPayload.  If nPayload>spaceLeft,
+  **                      that means content must spill into overflow pages.
+  **   *pnSize            Size of the local cell (not counting overflow pages)
+  **   pPrior             Where to write the pgno of the first overflow page
+  **
+  ** Use a call to btreeParseCellPtr() to verify that the values above
+  ** were computed correctly.
+  */
+#if SQLITE_DEBUG
+  {
+    CellInfo info;
+    btreeParseCellPtr(pPage, pCell, &info);
+    assert( nHeader=(int)(info.pPayload - pCell) );
+    assert( info.nKey==nKey );
+    assert( *pnSize == info.nSize );
+    assert( spaceLeft == info.nLocal );
+    assert( pPrior == &pCell[info.iOverflow] );
+  }
+#endif
+
+  /* Write the payload into the local Cell and any extra into overflow pages */
+  while( nPayload>0 ){
+    if( spaceLeft==0 ){
+#ifndef SQLITE_OMIT_AUTOVACUUM
+      Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
+      if( pBt->autoVacuum ){
+        do{
+          pgnoOvfl++;
+        } while( 
+          PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt) 
+        );
+      }
+#endif
+      rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+      /* If the database supports auto-vacuum, and the second or subsequent
+      ** overflow page is being allocated, add an entry to the pointer-map
+      ** for that page now. 
+      **
+      ** If this is the first overflow page, then write a partial entry 
+      ** to the pointer-map. If we write nothing to this pointer-map slot,
+      ** then the optimistic overflow chain processing in clearCell()
+      ** may misinterpret the uninitialized values and delete the
+      ** wrong pages from the database.
+      */
+      if( pBt->autoVacuum && rc==SQLITE_OK ){
+        u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1);
+        ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap, &rc);
+        if( rc ){
+          releasePage(pOvfl);
+        }
+      }
+#endif
+      if( rc ){
+        releasePage(pToRelease);
+        return rc;
+      }
+
+      /* If pToRelease is not zero than pPrior points into the data area
+      ** of pToRelease.  Make sure pToRelease is still writeable. */
+      assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) );
+
+      /* If pPrior is part of the data area of pPage, then make sure pPage
+      ** is still writeable */
+      assert( pPrior<pPage->aData || pPrior>=&pPage->aData[pBt->pageSize]
+            || sqlite3PagerIswriteable(pPage->pDbPage) );
+
+      put4byte(pPrior, pgnoOvfl);
+      releasePage(pToRelease);
+      pToRelease = pOvfl;
+      pPrior = pOvfl->aData;
+      put4byte(pPrior, 0);
+      pPayload = &pOvfl->aData[4];
+      spaceLeft = pBt->usableSize - 4;
+    }
+    n = nPayload;
+    if( n>spaceLeft ) n = spaceLeft;
+
+    /* If pToRelease is not zero than pPayload points into the data area
+    ** of pToRelease.  Make sure pToRelease is still writeable. */
+    assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) );
+
+    /* If pPayload is part of the data area of pPage, then make sure pPage
+    ** is still writeable */
+    assert( pPayload<pPage->aData || pPayload>=&pPage->aData[pBt->pageSize]
+            || sqlite3PagerIswriteable(pPage->pDbPage) );
+
+    if( nSrc>0 ){
+      if( n>nSrc ) n = nSrc;
+      assert( pSrc );
+      memcpy(pPayload, pSrc, n);
+    }else{
+      memset(pPayload, 0, n);
+    }
+    nPayload -= n;
+    pPayload += n;
+    pSrc += n;
+    nSrc -= n;
+    spaceLeft -= n;
+    if( nSrc==0 ){
+      nSrc = nData;
+      pSrc = pData;
+    }
+  }
+  releasePage(pToRelease);
+  return SQLITE_OK;
+}
+
+/*
+** Remove the i-th cell from pPage.  This routine effects pPage only.
+** The cell content is not freed or deallocated.  It is assumed that
+** the cell content has been copied someplace else.  This routine just
+** removes the reference to the cell from pPage.
+**
+** "sz" must be the number of bytes in the cell.
+*/
+static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){
+  u32 pc;         /* Offset to cell content of cell being deleted */
+  u8 *data;       /* pPage->aData */
+  u8 *ptr;        /* Used to move bytes around within data[] */
+  int rc;         /* The return code */
+  int hdr;        /* Beginning of the header.  0 most pages.  100 page 1 */
+
+  if( *pRC ) return;
+
+  assert( idx>=0 && idx<pPage->nCell );
+  assert( sz==cellSize(pPage, idx) );
+  assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  data = pPage->aData;
+  ptr = &pPage->aCellIdx[2*idx];
+  pc = get2byte(ptr);
+  hdr = pPage->hdrOffset;
+  testcase( pc==get2byte(&data[hdr+5]) );
+  testcase( pc+sz==pPage->pBt->usableSize );
+  if( pc < (u32)get2byte(&data[hdr+5]) || pc+sz > pPage->pBt->usableSize ){
+    *pRC = SQLITE_CORRUPT_BKPT;
+    return;
+  }
+  rc = freeSpace(pPage, pc, sz);
+  if( rc ){
+    *pRC = rc;
+    return;
+  }
+  pPage->nCell--;
+  if( pPage->nCell==0 ){
+    memset(&data[hdr+1], 0, 4);
+    data[hdr+7] = 0;
+    put2byte(&data[hdr+5], pPage->pBt->usableSize);
+    pPage->nFree = pPage->pBt->usableSize - pPage->hdrOffset
+                       - pPage->childPtrSize - 8;
+  }else{
+    memmove(ptr, ptr+2, 2*(pPage->nCell - idx));
+    put2byte(&data[hdr+3], pPage->nCell);
+    pPage->nFree += 2;
+  }
+}
+
+/*
+** Insert a new cell on pPage at cell index "i".  pCell points to the
+** content of the cell.
+**
+** If the cell content will fit on the page, then put it there.  If it
+** will not fit, then make a copy of the cell content into pTemp if
+** pTemp is not null.  Regardless of pTemp, allocate a new entry
+** in pPage->apOvfl[] and make it point to the cell content (either
+** in pTemp or the original pCell) and also record its index. 
+** Allocating a new entry in pPage->aCell[] implies that 
+** pPage->nOverflow is incremented.
+*/
+static void insertCell(
+  MemPage *pPage,   /* Page into which we are copying */
+  int i,            /* New cell becomes the i-th cell of the page */
+  u8 *pCell,        /* Content of the new cell */
+  int sz,           /* Bytes of content in pCell */
+  u8 *pTemp,        /* Temp storage space for pCell, if needed */
+  Pgno iChild,      /* If non-zero, replace first 4 bytes with this value */
+  int *pRC          /* Read and write return code from here */
+){
+  int idx = 0;      /* Where to write new cell content in data[] */
+  int j;            /* Loop counter */
+  int end;          /* First byte past the last cell pointer in data[] */
+  int ins;          /* Index in data[] where new cell pointer is inserted */
+  int cellOffset;   /* Address of first cell pointer in data[] */
+  u8 *data;         /* The content of the whole page */
+
+  if( *pRC ) return;
+
+  assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
+  assert( MX_CELL(pPage->pBt)<=10921 );
+  assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB );
+  assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) );
+  assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) );
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  /* The cell should normally be sized correctly.  However, when moving a
+  ** malformed cell from a leaf page to an interior page, if the cell size
+  ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size
+  ** might be less than 8 (leaf-size + pointer) on the interior node.  Hence
+  ** the term after the || in the following assert(). */
+  assert( sz==cellSizePtr(pPage, pCell) || (sz==8 && iChild>0) );
+  if( pPage->nOverflow || sz+2>pPage->nFree ){
+    if( pTemp ){
+      memcpy(pTemp, pCell, sz);
+      pCell = pTemp;
+    }
+    if( iChild ){
+      put4byte(pCell, iChild);
+    }
+    j = pPage->nOverflow++;
+    assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) );
+    pPage->apOvfl[j] = pCell;
+    pPage->aiOvfl[j] = (u16)i;
+  }else{
+    int rc = sqlite3PagerWrite(pPage->pDbPage);
+    if( rc!=SQLITE_OK ){
+      *pRC = rc;
+      return;
+    }
+    assert( sqlite3PagerIswriteable(pPage->pDbPage) );
+    data = pPage->aData;
+    cellOffset = pPage->cellOffset;
+    end = cellOffset + 2*pPage->nCell;
+    ins = cellOffset + 2*i;
+    rc = allocateSpace(pPage, sz, &idx);
+    if( rc ){ *pRC = rc; return; }
+    /* The allocateSpace() routine guarantees the following two properties
+    ** if it returns success */
+    assert( idx >= end+2 );
+    assert( idx+sz <= (int)pPage->pBt->usableSize );
+    pPage->nCell++;
+    pPage->nFree -= (u16)(2 + sz);
+    memcpy(&data[idx], pCell, sz);
+    if( iChild ){
+      put4byte(&data[idx], iChild);
+    }
+    memmove(&data[ins+2], &data[ins], end-ins);
+    put2byte(&data[ins], idx);
+    put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( pPage->pBt->autoVacuum ){
+      /* The cell may contain a pointer to an overflow page. If so, write
+      ** the entry for the overflow page into the pointer map.
+      */
+      ptrmapPutOvflPtr(pPage, pCell, pRC);
+    }
+#endif
+  }
+}
+
+/*
+** Array apCell[] contains pointers to nCell b-tree page cells. The 
+** szCell[] array contains the size in bytes of each cell. This function
+** replaces the current contents of page pPg with the contents of the cell
+** array.
+**
+** Some of the cells in apCell[] may currently be stored in pPg. This
+** function works around problems caused by this by making a copy of any 
+** such cells before overwriting the page data.
+**
+** The MemPage.nFree field is invalidated by this function. It is the 
+** responsibility of the caller to set it correctly.
+*/
+static void rebuildPage(
+  MemPage *pPg,                   /* Edit this page */
+  int nCell,                      /* Final number of cells on page */
+  u8 **apCell,                    /* Array of cells */
+  u16 *szCell                     /* Array of cell sizes */
+){
+  const int hdr = pPg->hdrOffset;          /* Offset of header on pPg */
+  u8 * const aData = pPg->aData;           /* Pointer to data for pPg */
+  const int usableSize = pPg->pBt->usableSize;
+  u8 * const pEnd = &aData[usableSize];
+  int i;
+  u8 *pCellptr = pPg->aCellIdx;
+  u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
+  u8 *pData;
+
+  i = get2byte(&aData[hdr+5]);
+  memcpy(&pTmp[i], &aData[i], usableSize - i);
+
+  pData = pEnd;
+  for(i=0; i<nCell; i++){
+    u8 *pCell = apCell[i];
+    if( pCell>aData && pCell<pEnd ){
+      pCell = &pTmp[pCell - aData];
+    }
+    pData -= szCell[i];
+    memcpy(pData, pCell, szCell[i]);
+    put2byte(pCellptr, (pData - aData));
+    pCellptr += 2;
+    assert( szCell[i]==cellSizePtr(pPg, pCell) );
+  }
+
+  /* The pPg->nFree field is now set incorrectly. The caller will fix it. */
+  pPg->nCell = nCell;
+  pPg->nOverflow = 0;
+
+  put2byte(&aData[hdr+1], 0);
+  put2byte(&aData[hdr+3], pPg->nCell);
+  put2byte(&aData[hdr+5], pData - aData);
+  aData[hdr+7] = 0x00;
+}
+
+/*
+** Array apCell[] contains nCell pointers to b-tree cells. Array szCell
+** contains the size in bytes of each such cell. This function attempts to 
+** add the cells stored in the array to page pPg. If it cannot (because 
+** the page needs to be defragmented before the cells will fit), non-zero
+** is returned. Otherwise, if the cells are added successfully, zero is
+** returned.
+**
+** Argument pCellptr points to the first entry in the cell-pointer array
+** (part of page pPg) to populate. After cell apCell[0] is written to the
+** page body, a 16-bit offset is written to pCellptr. And so on, for each
+** cell in the array. It is the responsibility of the caller to ensure
+** that it is safe to overwrite this part of the cell-pointer array.
+**
+** When this function is called, *ppData points to the start of the 
+** content area on page pPg. If the size of the content area is extended,
+** *ppData is updated to point to the new start of the content area
+** before returning.
+**
+** Finally, argument pBegin points to the byte immediately following the
+** end of the space required by this page for the cell-pointer area (for
+** all cells - not just those inserted by the current call). If the content
+** area must be extended to before this point in order to accomodate all
+** cells in apCell[], then the cells do not fit and non-zero is returned.
+*/
+static int pageInsertArray(
+  MemPage *pPg,                   /* Page to add cells to */
+  u8 *pBegin,                     /* End of cell-pointer array */
+  u8 **ppData,                    /* IN/OUT: Page content -area pointer */
+  u8 *pCellptr,                   /* Pointer to cell-pointer area */
+  int nCell,                      /* Number of cells to add to pPg */
+  u8 **apCell,                    /* Array of cells */
+  u16 *szCell                     /* Array of cell sizes */
+){
+  int i;
+  u8 *aData = pPg->aData;
+  u8 *pData = *ppData;
+  const int bFreelist = aData[1] || aData[2];
+  assert( CORRUPT_DB || pPg->hdrOffset==0 );    /* Never called on page 1 */
+  for(i=0; i<nCell; i++){
+    int sz = szCell[i];
+    int rc;
+    u8 *pSlot;
+    if( bFreelist==0 || (pSlot = pageFindSlot(pPg, sz, &rc, 0))==0 ){
+      pData -= sz;
+      if( pData<pBegin ) return 1;
+      pSlot = pData;
+    }
+    memcpy(pSlot, apCell[i], sz);
+    put2byte(pCellptr, (pSlot - aData));
+    pCellptr += 2;
+  }
+  *ppData = pData;
+  return 0;
+}
+
+/*
+** Array apCell[] contains nCell pointers to b-tree cells. Array szCell 
+** contains the size in bytes of each such cell. This function adds the
+** space associated with each cell in the array that is currently stored 
+** within the body of pPg to the pPg free-list. The cell-pointers and other
+** fields of the page are not updated.
+**
+** This function returns the total number of cells added to the free-list.
+*/
+static int pageFreeArray(
+  MemPage *pPg,                   /* Page to edit */
+  int nCell,                      /* Cells to delete */
+  u8 **apCell,                    /* Array of cells */
+  u16 *szCell                     /* Array of cell sizes */
+){
+  u8 * const aData = pPg->aData;
+  u8 * const pEnd = &aData[pPg->pBt->usableSize];
+  u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize];
+  int nRet = 0;
+  int i;
+  u8 *pFree = 0;
+  int szFree = 0;
+
+  for(i=0; i<nCell; i++){
+    u8 *pCell = apCell[i];
+    if( pCell>=pStart && pCell<pEnd ){
+      int sz = szCell[i];
+      if( pFree!=(pCell + sz) ){
+        if( pFree ){
+          assert( pFree>aData && (pFree - aData)<65536 );
+          freeSpace(pPg, (u16)(pFree - aData), szFree);
+        }
+        pFree = pCell;
+        szFree = sz;
+        if( pFree+sz>pEnd ) return 0;
+      }else{
+        pFree = pCell;
+        szFree += sz;
+      }
+      nRet++;
+    }
+  }
+  if( pFree ){
+    assert( pFree>aData && (pFree - aData)<65536 );
+    freeSpace(pPg, (u16)(pFree - aData), szFree);
+  }
+  return nRet;
+}
+
+/*
+** apCell[] and szCell[] contains pointers to and sizes of all cells in the
+** pages being balanced.  The current page, pPg, has pPg->nCell cells starting
+** with apCell[iOld].  After balancing, this page should hold nNew cells
+** starting at apCell[iNew].
+**
+** This routine makes the necessary adjustments to pPg so that it contains
+** the correct cells after being balanced.
+**
+** The pPg->nFree field is invalid when this function returns. It is the
+** responsibility of the caller to set it correctly.
+*/
+static void editPage(
+  MemPage *pPg,                   /* Edit this page */
+  int iOld,                       /* Index of first cell currently on page */
+  int iNew,                       /* Index of new first cell on page */
+  int nNew,                       /* Final number of cells on page */
+  u8 **apCell,                    /* Array of cells */
+  u16 *szCell                     /* Array of cell sizes */
+){
+  u8 * const aData = pPg->aData;
+  const int hdr = pPg->hdrOffset;
+  u8 *pBegin = &pPg->aCellIdx[nNew * 2];
+  int nCell = pPg->nCell;       /* Cells stored on pPg */
+  u8 *pData;
+  u8 *pCellptr;
+  int i;
+  int iOldEnd = iOld + pPg->nCell + pPg->nOverflow;
+  int iNewEnd = iNew + nNew;
+
+#ifdef SQLITE_DEBUG
+  u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
+  memcpy(pTmp, aData, pPg->pBt->usableSize);
+#endif
+
+  /* Remove cells from the start and end of the page */
+  if( iOld<iNew ){
+    int nShift = pageFreeArray(
+        pPg, iNew-iOld, &apCell[iOld], &szCell[iOld]
+    );
+    memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2);
+    nCell -= nShift;
+  }
+  if( iNewEnd < iOldEnd ){
+    nCell -= pageFreeArray(
+        pPg, iOldEnd-iNewEnd, &apCell[iNewEnd], &szCell[iNewEnd]
+    );
+  }
+
+  pData = &aData[get2byteNotZero(&aData[hdr+5])];
+  if( pData<pBegin ) goto editpage_fail;
+
+  /* Add cells to the start of the page */
+  if( iNew<iOld ){
+    int nAdd = MIN(nNew,iOld-iNew);
+    assert( (iOld-iNew)<nNew || nCell==0 || CORRUPT_DB );
+    pCellptr = pPg->aCellIdx;
+    memmove(&pCellptr[nAdd*2], pCellptr, nCell*2);
+    if( pageInsertArray(
+          pPg, pBegin, &pData, pCellptr,
+          nAdd, &apCell[iNew], &szCell[iNew]
+    ) ) goto editpage_fail;
+    nCell += nAdd;
+  }
+
+  /* Add any overflow cells */
+  for(i=0; i<pPg->nOverflow; i++){
+    int iCell = (iOld + pPg->aiOvfl[i]) - iNew;
+    if( iCell>=0 && iCell<nNew ){
+      pCellptr = &pPg->aCellIdx[iCell * 2];
+      memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2);
+      nCell++;
+      if( pageInsertArray(
+            pPg, pBegin, &pData, pCellptr,
+            1, &apCell[iCell + iNew], &szCell[iCell + iNew]
+      ) ) goto editpage_fail;
+    }
+  }
+
+  /* Append cells to the end of the page */
+  pCellptr = &pPg->aCellIdx[nCell*2];
+  if( pageInsertArray(
+        pPg, pBegin, &pData, pCellptr,
+        nNew-nCell, &apCell[iNew+nCell], &szCell[iNew+nCell]
+  ) ) goto editpage_fail;
+
+  pPg->nCell = nNew;
+  pPg->nOverflow = 0;
+
+  put2byte(&aData[hdr+3], pPg->nCell);
+  put2byte(&aData[hdr+5], pData - aData);
+
+#ifdef SQLITE_DEBUG
+  for(i=0; i<nNew && !CORRUPT_DB; i++){
+    u8 *pCell = apCell[i+iNew];
+    int iOff = get2byte(&pPg->aCellIdx[i*2]);
+    if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
+      pCell = &pTmp[pCell - aData];
+    }
+    assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) );
+  }
+#endif
+
+  return;
+ editpage_fail:
+  /* Unable to edit this page. Rebuild it from scratch instead. */
+  rebuildPage(pPg, nNew, &apCell[iNew], &szCell[iNew]);
+}
+
+/*
+** The following parameters determine how many adjacent pages get involved
+** in a balancing operation.  NN is the number of neighbors on either side
+** of the page that participate in the balancing operation.  NB is the
+** total number of pages that participate, including the target page and
+** NN neighbors on either side.
+**
+** The minimum value of NN is 1 (of course).  Increasing NN above 1
+** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
+** in exchange for a larger degradation in INSERT and UPDATE performance.
+** The value of NN appears to give the best results overall.
+*/
+#define NN 1             /* Number of neighbors on either side of pPage */
+#define NB (NN*2+1)      /* Total pages involved in the balance */
+
+
+#ifndef SQLITE_OMIT_QUICKBALANCE
+/*
+** This version of balance() handles the common special case where
+** a new entry is being inserted on the extreme right-end of the
+** tree, in other words, when the new entry will become the largest
+** entry in the tree.
+**
+** Instead of trying to balance the 3 right-most leaf pages, just add
+** a new page to the right-hand side and put the one new entry in
+** that page.  This leaves the right side of the tree somewhat
+** unbalanced.  But odds are that we will be inserting new entries
+** at the end soon afterwards so the nearly empty page will quickly
+** fill up.  On average.
+**
+** pPage is the leaf page which is the right-most page in the tree.
+** pParent is its parent.  pPage must have a single overflow entry
+** which is also the right-most entry on the page.
+**
+** The pSpace buffer is used to store a temporary copy of the divider
+** cell that will be inserted into pParent. Such a cell consists of a 4
+** byte page number followed by a variable length integer. In other
+** words, at most 13 bytes. Hence the pSpace buffer must be at
+** least 13 bytes in size.
+*/
+static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){
+  BtShared *const pBt = pPage->pBt;    /* B-Tree Database */
+  MemPage *pNew;                       /* Newly allocated page */
+  int rc;                              /* Return Code */
+  Pgno pgnoNew;                        /* Page number of pNew */
+
+  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
+  assert( sqlite3PagerIswriteable(pParent->pDbPage) );
+  assert( pPage->nOverflow==1 );
+
+  /* This error condition is now caught prior to reaching this function */
+  if( NEVER(pPage->nCell==0) ) return SQLITE_CORRUPT_BKPT;
+
+  /* Allocate a new page. This page will become the right-sibling of 
+  ** pPage. Make the parent page writable, so that the new divider cell
+  ** may be inserted. If both these operations are successful, proceed.
+  */
+  rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
+
+  if( rc==SQLITE_OK ){
+
+    u8 *pOut = &pSpace[4];
+    u8 *pCell = pPage->apOvfl[0];
+    u16 szCell = cellSizePtr(pPage, pCell);
+    u8 *pStop;
+
+    assert( sqlite3PagerIswriteable(pNew->pDbPage) );
+    assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) );
+    zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF);
+    rebuildPage(pNew, 1, &pCell, &szCell);
+    pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell;
+
+    /* If this is an auto-vacuum database, update the pointer map
+    ** with entries for the new page, and any pointer from the 
+    ** cell on the page to an overflow page. If either of these
+    ** operations fails, the return code is set, but the contents
+    ** of the parent page are still manipulated by thh code below.
+    ** That is Ok, at this point the parent page is guaranteed to
+    ** be marked as dirty. Returning an error code will cause a
+    ** rollback, undoing any changes made to the parent page.
+    */
+    if( ISAUTOVACUUM ){
+      ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc);
+      if( szCell>pNew->minLocal ){
+        ptrmapPutOvflPtr(pNew, pCell, &rc);
+      }
+    }
+  
+    /* Create a divider cell to insert into pParent. The divider cell
+    ** consists of a 4-byte page number (the page number of pPage) and
+    ** a variable length key value (which must be the same value as the
+    ** largest key on pPage).
+    **
+    ** To find the largest key value on pPage, first find the right-most 
+    ** cell on pPage. The first two fields of this cell are the 
+    ** record-length (a variable length integer at most 32-bits in size)
+    ** and the key value (a variable length integer, may have any value).
+    ** The first of the while(...) loops below skips over the record-length
+    ** field. The second while(...) loop copies the key value from the
+    ** cell on pPage into the pSpace buffer.
+    */
+    pCell = findCell(pPage, pPage->nCell-1);
+    pStop = &pCell[9];
+    while( (*(pCell++)&0x80) && pCell<pStop );
+    pStop = &pCell[9];
+    while( ((*(pOut++) = *(pCell++))&0x80) && pCell<pStop );
+
+    /* Insert the new divider cell into pParent. */
+    insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace),
+               0, pPage->pgno, &rc);
+
+    /* Set the right-child pointer of pParent to point to the new page. */
+    put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
+  
+    /* Release the reference to the new page. */
+    releasePage(pNew);
+  }
+
+  return rc;
+}
+#endif /* SQLITE_OMIT_QUICKBALANCE */
+
+#if 0
+/*
+** This function does not contribute anything to the operation of SQLite.
+** it is sometimes activated temporarily while debugging code responsible 
+** for setting pointer-map entries.
+*/
+static int ptrmapCheckPages(MemPage **apPage, int nPage){
+  int i, j;
+  for(i=0; i<nPage; i++){
+    Pgno n;
+    u8 e;
+    MemPage *pPage = apPage[i];
+    BtShared *pBt = pPage->pBt;
+    assert( pPage->isInit );
+
+    for(j=0; j<pPage->nCell; j++){
+      CellInfo info;
+      u8 *z;
+     
+      z = findCell(pPage, j);
+      btreeParseCellPtr(pPage, z, &info);
+      if( info.iOverflow ){
+        Pgno ovfl = get4byte(&z[info.iOverflow]);
+        ptrmapGet(pBt, ovfl, &e, &n);
+        assert( n==pPage->pgno && e==PTRMAP_OVERFLOW1 );
+      }
+      if( !pPage->leaf ){
+        Pgno child = get4byte(z);
+        ptrmapGet(pBt, child, &e, &n);
+        assert( n==pPage->pgno && e==PTRMAP_BTREE );
+      }
+    }
+    if( !pPage->leaf ){
+      Pgno child = get4byte(&pPage->aData[pPage->hdrOffset+8]);
+      ptrmapGet(pBt, child, &e, &n);
+      assert( n==pPage->pgno && e==PTRMAP_BTREE );
+    }
+  }
+  return 1;
+}
+#endif
+
+/*
+** This function is used to copy the contents of the b-tree node stored 
+** on page pFrom to page pTo. If page pFrom was not a leaf page, then
+** the pointer-map entries for each child page are updated so that the
+** parent page stored in the pointer map is page pTo. If pFrom contained
+** any cells with overflow page pointers, then the corresponding pointer
+** map entries are also updated so that the parent page is page pTo.
+**
+** If pFrom is currently carrying any overflow cells (entries in the
+** MemPage.apOvfl[] array), they are not copied to pTo. 
+**
+** Before returning, page pTo is reinitialized using btreeInitPage().
+**
+** The performance of this function is not critical. It is only used by 
+** the balance_shallower() and balance_deeper() procedures, neither of
+** which are called often under normal circumstances.
+*/
+static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){
+  if( (*pRC)==SQLITE_OK ){
+    BtShared * const pBt = pFrom->pBt;
+    u8 * const aFrom = pFrom->aData;
+    u8 * const aTo = pTo->aData;
+    int const iFromHdr = pFrom->hdrOffset;
+    int const iToHdr = ((pTo->pgno==1) ? 100 : 0);
+    int rc;
+    int iData;
+  
+  
+    assert( pFrom->isInit );
+    assert( pFrom->nFree>=iToHdr );
+    assert( get2byte(&aFrom[iFromHdr+5]) <= (int)pBt->usableSize );
+  
+    /* Copy the b-tree node content from page pFrom to page pTo. */
+    iData = get2byte(&aFrom[iFromHdr+5]);
+    memcpy(&aTo[iData], &aFrom[iData], pBt->usableSize-iData);
+    memcpy(&aTo[iToHdr], &aFrom[iFromHdr], pFrom->cellOffset + 2*pFrom->nCell);
+  
+    /* Reinitialize page pTo so that the contents of the MemPage structure
+    ** match the new data. The initialization of pTo can actually fail under
+    ** fairly obscure circumstances, even though it is a copy of initialized 
+    ** page pFrom.
+    */
+    pTo->isInit = 0;
+    rc = btreeInitPage(pTo);
+    if( rc!=SQLITE_OK ){
+      *pRC = rc;
+      return;
+    }
+  
+    /* If this is an auto-vacuum database, update the pointer-map entries
+    ** for any b-tree or overflow pages that pTo now contains the pointers to.
+    */
+    if( ISAUTOVACUUM ){
+      *pRC = setChildPtrmaps(pTo);
+    }
+  }
+}
+
+/*
+** This routine redistributes cells on the iParentIdx'th child of pParent
+** (hereafter "the page") and up to 2 siblings so that all pages have about the
+** same amount of free space. Usually a single sibling on either side of the
+** page are used in the balancing, though both siblings might come from one
+** side if the page is the first or last child of its parent. If the page 
+** has fewer than 2 siblings (something which can only happen if the page
+** is a root page or a child of a root page) then all available siblings
+** participate in the balancing.
+**
+** The number of siblings of the page might be increased or decreased by 
+** one or two in an effort to keep pages nearly full but not over full. 
+**
+** Note that when this routine is called, some of the cells on the page
+** might not actually be stored in MemPage.aData[]. This can happen
+** if the page is overfull. This routine ensures that all cells allocated
+** to the page and its siblings fit into MemPage.aData[] before returning.
+**
+** In the course of balancing the page and its siblings, cells may be
+** inserted into or removed from the parent page (pParent). Doing so
+** may cause the parent page to become overfull or underfull. If this
+** happens, it is the responsibility of the caller to invoke the correct
+** balancing routine to fix this problem (see the balance() routine). 
+**
+** If this routine fails for any reason, it might leave the database
+** in a corrupted state. So if this routine fails, the database should
+** be rolled back.
+**
+** The third argument to this function, aOvflSpace, is a pointer to a
+** buffer big enough to hold one page. If while inserting cells into the parent
+** page (pParent) the parent page becomes overfull, this buffer is
+** used to store the parent's overflow cells. Because this function inserts
+** a maximum of four divider cells into the parent page, and the maximum
+** size of a cell stored within an internal node is always less than 1/4
+** of the page-size, the aOvflSpace[] buffer is guaranteed to be large
+** enough for all overflow cells.
+**
+** If aOvflSpace is set to a null pointer, this function returns 
+** SQLITE_NOMEM.
+*/
+#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
+#pragma optimize("", off)
+#endif
+static int balance_nonroot(
+  MemPage *pParent,               /* Parent page of siblings being balanced */
+  int iParentIdx,                 /* Index of "the page" in pParent */
+  u8 *aOvflSpace,                 /* page-size bytes of space for parent ovfl */
+  int isRoot,                     /* True if pParent is a root-page */
+  int bBulk                       /* True if this call is part of a bulk load */
+){
+  BtShared *pBt;               /* The whole database */
+  int nCell = 0;               /* Number of cells in apCell[] */
+  int nMaxCells = 0;           /* Allocated size of apCell, szCell, aFrom. */
+  int nNew = 0;                /* Number of pages in apNew[] */
+  int nOld;                    /* Number of pages in apOld[] */
+  int i, j, k;                 /* Loop counters */
+  int nxDiv;                   /* Next divider slot in pParent->aCell[] */
+  int rc = SQLITE_OK;          /* The return code */
+  u16 leafCorrection;          /* 4 if pPage is a leaf.  0 if not */
+  int leafData;                /* True if pPage is a leaf of a LEAFDATA tree */
+  int usableSpace;             /* Bytes in pPage beyond the header */
+  int pageFlags;               /* Value of pPage->aData[0] */
+  int subtotal;                /* Subtotal of bytes in cells on one page */
+  int iSpace1 = 0;             /* First unused byte of aSpace1[] */
+  int iOvflSpace = 0;          /* First unused byte of aOvflSpace[] */
+  int szScratch;               /* Size of scratch memory requested */
+  MemPage *apOld[NB];          /* pPage and up to two siblings */
+  MemPage *apNew[NB+2];        /* pPage and up to NB siblings after balancing */
+  u8 *pRight;                  /* Location in parent of right-sibling pointer */
+  u8 *apDiv[NB-1];             /* Divider cells in pParent */
+  int cntNew[NB+2];            /* Index in aCell[] of cell after i-th page */
+  int cntOld[NB+2];            /* Old index in aCell[] after i-th page */
+  int szNew[NB+2];             /* Combined size of cells placed on i-th page */
+  u8 **apCell = 0;             /* All cells begin balanced */
+  u16 *szCell;                 /* Local size of all cells in apCell[] */
+  u8 *aSpace1;                 /* Space for copies of dividers cells */
+  Pgno pgno;                   /* Temp var to store a page number in */
+  u8 abDone[NB+2];             /* True after i'th new page is populated */
+  Pgno aPgno[NB+2];            /* Page numbers of new pages before shuffling */
+  Pgno aPgOrder[NB+2];         /* Copy of aPgno[] used for sorting pages */
+  u16 aPgFlags[NB+2];          /* flags field of new pages before shuffling */
+
+  memset(abDone, 0, sizeof(abDone));
+  pBt = pParent->pBt;
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  assert( sqlite3PagerIswriteable(pParent->pDbPage) );
+
+#if 0
+  TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
+#endif
+
+  /* At this point pParent may have at most one overflow cell. And if
+  ** this overflow cell is present, it must be the cell with 
+  ** index iParentIdx. This scenario comes about when this function
+  ** is called (indirectly) from sqlite3BtreeDelete().
+  */
+  assert( pParent->nOverflow==0 || pParent->nOverflow==1 );
+  assert( pParent->nOverflow==0 || pParent->aiOvfl[0]==iParentIdx );
+
+  if( !aOvflSpace ){
+    return SQLITE_NOMEM;
+  }
+
+  /* Find the sibling pages to balance. Also locate the cells in pParent 
+  ** that divide the siblings. An attempt is made to find NN siblings on 
+  ** either side of pPage. More siblings are taken from one side, however, 
+  ** if there are fewer than NN siblings on the other side. If pParent
+  ** has NB or fewer children then all children of pParent are taken.  
+  **
+  ** This loop also drops the divider cells from the parent page. This
+  ** way, the remainder of the function does not have to deal with any
+  ** overflow cells in the parent page, since if any existed they will
+  ** have already been removed.
+  */
+  i = pParent->nOverflow + pParent->nCell;
+  if( i<2 ){
+    nxDiv = 0;
+  }else{
+    assert( bBulk==0 || bBulk==1 );
+    if( iParentIdx==0 ){                 
+      nxDiv = 0;
+    }else if( iParentIdx==i ){
+      nxDiv = i-2+bBulk;
+    }else{
+      assert( bBulk==0 );
+      nxDiv = iParentIdx-1;
+    }
+    i = 2-bBulk;
+  }
+  nOld = i+1;
+  if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){
+    pRight = &pParent->aData[pParent->hdrOffset+8];
+  }else{
+    pRight = findCell(pParent, i+nxDiv-pParent->nOverflow);
+  }
+  pgno = get4byte(pRight);
+  while( 1 ){
+    rc = getAndInitPage(pBt, pgno, &apOld[i], 0);
+    if( rc ){
+      memset(apOld, 0, (i+1)*sizeof(MemPage*));
+      goto balance_cleanup;
+    }
+    nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
+    if( (i--)==0 ) break;
+
+    if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){
+      apDiv[i] = pParent->apOvfl[0];
+      pgno = get4byte(apDiv[i]);
+      szNew[i] = cellSizePtr(pParent, apDiv[i]);
+      pParent->nOverflow = 0;
+    }else{
+      apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow);
+      pgno = get4byte(apDiv[i]);
+      szNew[i] = cellSizePtr(pParent, apDiv[i]);
+
+      /* Drop the cell from the parent page. apDiv[i] still points to
+      ** the cell within the parent, even though it has been dropped.
+      ** This is safe because dropping a cell only overwrites the first
+      ** four bytes of it, and this function does not need the first
+      ** four bytes of the divider cell. So the pointer is safe to use
+      ** later on.  
+      **
+      ** But not if we are in secure-delete mode. In secure-delete mode,
+      ** the dropCell() routine will overwrite the entire cell with zeroes.
+      ** In this case, temporarily copy the cell into the aOvflSpace[]
+      ** buffer. It will be copied out again as soon as the aSpace[] buffer
+      ** is allocated.  */
+      if( pBt->btsFlags & BTS_SECURE_DELETE ){
+        int iOff;
+
+        iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData);
+        if( (iOff+szNew[i])>(int)pBt->usableSize ){
+          rc = SQLITE_CORRUPT_BKPT;
+          memset(apOld, 0, (i+1)*sizeof(MemPage*));
+          goto balance_cleanup;
+        }else{
+          memcpy(&aOvflSpace[iOff], apDiv[i], szNew[i]);
+          apDiv[i] = &aOvflSpace[apDiv[i]-pParent->aData];
+        }
+      }
+      dropCell(pParent, i+nxDiv-pParent->nOverflow, szNew[i], &rc);
+    }
+  }
+
+  /* Make nMaxCells a multiple of 4 in order to preserve 8-byte
+  ** alignment */
+  nMaxCells = (nMaxCells + 3)&~3;
+
+  /*
+  ** Allocate space for memory structures
+  */
+  szScratch =
+       nMaxCells*sizeof(u8*)                       /* apCell */
+     + nMaxCells*sizeof(u16)                       /* szCell */
+     + pBt->pageSize;                              /* aSpace1 */
+
+  /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer
+  ** that is more than 6 times the database page size. */
+  assert( szScratch<=6*(int)pBt->pageSize );
+  apCell = sqlite3ScratchMalloc( szScratch ); 
+  if( apCell==0 ){
+    rc = SQLITE_NOMEM;
+    goto balance_cleanup;
+  }
+  szCell = (u16*)&apCell[nMaxCells];
+  aSpace1 = (u8*)&szCell[nMaxCells];
+  assert( EIGHT_BYTE_ALIGNMENT(aSpace1) );
+
+  /*
+  ** Load pointers to all cells on sibling pages and the divider cells
+  ** into the local apCell[] array.  Make copies of the divider cells
+  ** into space obtained from aSpace1[]. The divider cells have already
+  ** been removed from pParent.
+  **
+  ** If the siblings are on leaf pages, then the child pointers of the
+  ** divider cells are stripped from the cells before they are copied
+  ** into aSpace1[].  In this way, all cells in apCell[] are without
+  ** child pointers.  If siblings are not leaves, then all cell in
+  ** apCell[] include child pointers.  Either way, all cells in apCell[]
+  ** are alike.
+  **
+  ** leafCorrection:  4 if pPage is a leaf.  0 if pPage is not a leaf.
+  **       leafData:  1 if pPage holds key+data and pParent holds only keys.
+  */
+  leafCorrection = apOld[0]->leaf*4;
+  leafData = apOld[0]->intKeyLeaf;
+  for(i=0; i<nOld; i++){
+    int limit;
+    MemPage *pOld = apOld[i];
+
+    limit = pOld->nCell+pOld->nOverflow;
+    if( pOld->nOverflow>0 ){
+      for(j=0; j<limit; j++){
+        assert( nCell<nMaxCells );
+        apCell[nCell] = findOverflowCell(pOld, j);
+        szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
+        nCell++;
+      }
+    }else{
+      u8 *aData = pOld->aData;
+      u16 maskPage = pOld->maskPage;
+      u16 cellOffset = pOld->cellOffset;
+      for(j=0; j<limit; j++){
+        assert( nCell<nMaxCells );
+        apCell[nCell] = findCellv2(aData, maskPage, cellOffset, j);
+        szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
+        nCell++;
+      }
+    }       
+    cntOld[i] = nCell;
+    if( i<nOld-1 && !leafData){
+      u16 sz = (u16)szNew[i];
+      u8 *pTemp;
+      assert( nCell<nMaxCells );
+      szCell[nCell] = sz;
+      pTemp = &aSpace1[iSpace1];
+      iSpace1 += sz;
+      assert( sz<=pBt->maxLocal+23 );
+      assert( iSpace1 <= (int)pBt->pageSize );
+      memcpy(pTemp, apDiv[i], sz);
+      apCell[nCell] = pTemp+leafCorrection;
+      assert( leafCorrection==0 || leafCorrection==4 );
+      szCell[nCell] = szCell[nCell] - leafCorrection;
+      if( !pOld->leaf ){
+        assert( leafCorrection==0 );
+        assert( pOld->hdrOffset==0 );
+        /* The right pointer of the child page pOld becomes the left
+        ** pointer of the divider cell */
+        memcpy(apCell[nCell], &pOld->aData[8], 4);
+      }else{
+        assert( leafCorrection==4 );
+        if( szCell[nCell]<4 ){
+          /* Do not allow any cells smaller than 4 bytes. If a smaller cell
+          ** does exist, pad it with 0x00 bytes. */
+          assert( szCell[nCell]==3 );
+          assert( apCell[nCell]==&aSpace1[iSpace1-3] );
+          aSpace1[iSpace1++] = 0x00;
+          szCell[nCell] = 4;
+        }
+      }
+      nCell++;
+    }
+  }
+
+  /*
+  ** Figure out the number of pages needed to hold all nCell cells.
+  ** Store this number in "k".  Also compute szNew[] which is the total
+  ** size of all cells on the i-th page and cntNew[] which is the index
+  ** in apCell[] of the cell that divides page i from page i+1.  
+  ** cntNew[k] should equal nCell.
+  **
+  ** Values computed by this block:
+  **
+  **           k: The total number of sibling pages
+  **    szNew[i]: Spaced used on the i-th sibling page.
+  **   cntNew[i]: Index in apCell[] and szCell[] for the first cell to
+  **              the right of the i-th sibling page.
+  ** usableSpace: Number of bytes of space available on each sibling.
+  ** 
+  */
+  usableSpace = pBt->usableSize - 12 + leafCorrection;
+  for(subtotal=k=i=0; i<nCell; i++){
+    assert( i<nMaxCells );
+    subtotal += szCell[i] + 2;
+    if( subtotal > usableSpace ){
+      szNew[k] = subtotal - szCell[i] - 2;
+      cntNew[k] = i;
+      if( leafData ){ i--; }
+      subtotal = 0;
+      k++;
+      if( k>NB+1 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; }
+    }
+  }
+  szNew[k] = subtotal;
+  cntNew[k] = nCell;
+  k++;
+
+  /*
+  ** The packing computed by the previous block is biased toward the siblings
+  ** on the left side (siblings with smaller keys). The left siblings are
+  ** always nearly full, while the right-most sibling might be nearly empty.
+  ** The next block of code attempts to adjust the packing of siblings to
+  ** get a better balance.
+  **
+  ** This adjustment is more than an optimization.  The packing above might
+  ** be so out of balance as to be illegal.  For example, the right-most
+  ** sibling might be completely empty.  This adjustment is not optional.
+  */
+  for(i=k-1; i>0; i--){
+    int szRight = szNew[i];  /* Size of sibling on the right */
+    int szLeft = szNew[i-1]; /* Size of sibling on the left */
+    int r;              /* Index of right-most cell in left sibling */
+    int d;              /* Index of first cell to the left of right sibling */
+
+    r = cntNew[i-1] - 1;
+    d = r + 1 - leafData;
+    assert( d<nMaxCells );
+    assert( r<nMaxCells );
+    while( szRight==0 
+       || (!bBulk && szRight+szCell[d]+2<=szLeft-(szCell[r]+2)) 
+    ){
+      szRight += szCell[d] + 2;
+      szLeft -= szCell[r] + 2;
+      cntNew[i-1]--;
+      r = cntNew[i-1] - 1;
+      d = r + 1 - leafData;
+    }
+    szNew[i] = szRight;
+    szNew[i-1] = szLeft;
+  }
+
+  /* Sanity check:  For a non-corrupt database file one of the follwing
+  ** must be true:
+  **    (1) We found one or more cells (cntNew[0])>0), or
+  **    (2) pPage is a virtual root page.  A virtual root page is when
+  **        the real root page is page 1 and we are the only child of
+  **        that page.
+  */
+  assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB);
+  TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n",
+    apOld[0]->pgno, apOld[0]->nCell,
+    nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0,
+    nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0
+  ));
+
+  /*
+  ** Allocate k new pages.  Reuse old pages where possible.
+  */
+  if( apOld[0]->pgno<=1 ){
+    rc = SQLITE_CORRUPT_BKPT;
+    goto balance_cleanup;
+  }
+  pageFlags = apOld[0]->aData[0];
+  for(i=0; i<k; i++){
+    MemPage *pNew;
+    if( i<nOld ){
+      pNew = apNew[i] = apOld[i];
+      apOld[i] = 0;
+      rc = sqlite3PagerWrite(pNew->pDbPage);
+      nNew++;
+      if( rc ) goto balance_cleanup;
+    }else{
+      assert( i>0 );
+      rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0);
+      if( rc ) goto balance_cleanup;
+      zeroPage(pNew, pageFlags);
+      apNew[i] = pNew;
+      nNew++;
+      cntOld[i] = nCell;
+
+      /* Set the pointer-map entry for the new sibling page. */
+      if( ISAUTOVACUUM ){
+        ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc);
+        if( rc!=SQLITE_OK ){
+          goto balance_cleanup;
+        }
+      }
+    }
+  }
+
+  /*
+  ** Reassign page numbers so that the new pages are in ascending order. 
+  ** This helps to keep entries in the disk file in order so that a scan
+  ** of the table is closer to a linear scan through the file. That in turn 
+  ** helps the operating system to deliver pages from the disk more rapidly.
+  **
+  ** An O(n^2) insertion sort algorithm is used, but since n is never more 
+  ** than (NB+2) (a small constant), that should not be a problem.
+  **
+  ** When NB==3, this one optimization makes the database about 25% faster 
+  ** for large insertions and deletions.
+  */
+  for(i=0; i<nNew; i++){
+    aPgOrder[i] = aPgno[i] = apNew[i]->pgno;
+    aPgFlags[i] = apNew[i]->pDbPage->flags;
+    for(j=0; j<i; j++){
+      if( aPgno[j]==aPgno[i] ){
+        /* This branch is taken if the set of sibling pages somehow contains
+        ** duplicate entries. This can happen if the database is corrupt. 
+        ** It would be simpler to detect this as part of the loop below, but
+        ** we do the detection here in order to avoid populating the pager
+        ** cache with two separate objects associated with the same
+        ** page number.  */
+        assert( CORRUPT_DB );
+        rc = SQLITE_CORRUPT_BKPT;
+        goto balance_cleanup;
+      }
+    }
+  }
+  for(i=0; i<nNew; i++){
+    int iBest = 0;                /* aPgno[] index of page number to use */
+    for(j=1; j<nNew; j++){
+      if( aPgOrder[j]<aPgOrder[iBest] ) iBest = j;
+    }
+    pgno = aPgOrder[iBest];
+    aPgOrder[iBest] = 0xffffffff;
+    if( iBest!=i ){
+      if( iBest>i ){
+        sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0);
+      }
+      sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]);
+      apNew[i]->pgno = pgno;
+    }
+  }
+
+  TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) "
+         "%d(%d nc=%d) %d(%d nc=%d)\n",
+    apNew[0]->pgno, szNew[0], cntNew[0],
+    nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0,
+    nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0,
+    nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0,
+    nNew>=3 ? cntNew[2] - cntNew[1] - !leafData : 0,
+    nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0,
+    nNew>=4 ? cntNew[3] - cntNew[2] - !leafData : 0,
+    nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0,
+    nNew>=5 ? cntNew[4] - cntNew[3] - !leafData : 0
+  ));
+
+  assert( sqlite3PagerIswriteable(pParent->pDbPage) );
+  put4byte(pRight, apNew[nNew-1]->pgno);
+
+  /* If the sibling pages are not leaves, ensure that the right-child pointer
+  ** of the right-most new sibling page is set to the value that was 
+  ** originally in the same field of the right-most old sibling page. */
+  if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){
+    MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1];
+    memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4);
+  }
+
+  /* Make any required updates to pointer map entries associated with 
+  ** cells stored on sibling pages following the balance operation. Pointer
+  ** map entries associated with divider cells are set by the insertCell()
+  ** routine. The associated pointer map entries are:
+  **
+  **   a) if the cell contains a reference to an overflow chain, the
+  **      entry associated with the first page in the overflow chain, and
+  **
+  **   b) if the sibling pages are not leaves, the child page associated
+  **      with the cell.
+  **
+  ** If the sibling pages are not leaves, then the pointer map entry 
+  ** associated with the right-child of each sibling may also need to be 
+  ** updated. This happens below, after the sibling pages have been 
+  ** populated, not here.
+  */
+  if( ISAUTOVACUUM ){
+    MemPage *pNew = apNew[0];
+    u8 *aOld = pNew->aData;
+    int cntOldNext = pNew->nCell + pNew->nOverflow;
+    int usableSize = pBt->usableSize;
+    int iNew = 0;
+    int iOld = 0;
+
+    for(i=0; i<nCell; i++){
+      u8 *pCell = apCell[i];
+      if( i==cntOldNext ){
+        MemPage *pOld = (++iOld)<nNew ? apNew[iOld] : apOld[iOld];
+        cntOldNext += pOld->nCell + pOld->nOverflow + !leafData;
+        aOld = pOld->aData;
+      }
+      if( i==cntNew[iNew] ){
+        pNew = apNew[++iNew];
+        if( !leafData ) continue;
+      }
+
+      /* Cell pCell is destined for new sibling page pNew. Originally, it
+      ** was either part of sibling page iOld (possibly an overflow cell), 
+      ** or else the divider cell to the left of sibling page iOld. So,
+      ** if sibling page iOld had the same page number as pNew, and if
+      ** pCell really was a part of sibling page iOld (not a divider or
+      ** overflow cell), we can skip updating the pointer map entries.  */
+      if( iOld>=nNew
+       || pNew->pgno!=aPgno[iOld]
+       || pCell<aOld
+       || pCell>=&aOld[usableSize]
+      ){
+        if( !leafCorrection ){
+          ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc);
+        }
+        if( szCell[i]>pNew->minLocal ){
+          ptrmapPutOvflPtr(pNew, pCell, &rc);
+        }
+      }
+    }
+  }
+
+  /* Insert new divider cells into pParent. */
+  for(i=0; i<nNew-1; i++){
+    u8 *pCell;
+    u8 *pTemp;
+    int sz;
+    MemPage *pNew = apNew[i];
+    j = cntNew[i];
+
+    assert( j<nMaxCells );
+    pCell = apCell[j];
+    sz = szCell[j] + leafCorrection;
+    pTemp = &aOvflSpace[iOvflSpace];
+    if( !pNew->leaf ){
+      memcpy(&pNew->aData[8], pCell, 4);
+    }else if( leafData ){
+      /* If the tree is a leaf-data tree, and the siblings are leaves, 
+      ** then there is no divider cell in apCell[]. Instead, the divider 
+      ** cell consists of the integer key for the right-most cell of 
+      ** the sibling-page assembled above only.
+      */
+      CellInfo info;
+      j--;
+      btreeParseCellPtr(pNew, apCell[j], &info);
+      pCell = pTemp;
+      sz = 4 + putVarint(&pCell[4], info.nKey);
+      pTemp = 0;
+    }else{
+      pCell -= 4;
+      /* Obscure case for non-leaf-data trees: If the cell at pCell was
+      ** previously stored on a leaf node, and its reported size was 4
+      ** bytes, then it may actually be smaller than this 
+      ** (see btreeParseCellPtr(), 4 bytes is the minimum size of
+      ** any cell). But it is important to pass the correct size to 
+      ** insertCell(), so reparse the cell now.
+      **
+      ** Note that this can never happen in an SQLite data file, as all
+      ** cells are at least 4 bytes. It only happens in b-trees used
+      ** to evaluate "IN (SELECT ...)" and similar clauses.
+      */
+      if( szCell[j]==4 ){
+        assert(leafCorrection==4);
+        sz = cellSizePtr(pParent, pCell);
+      }
+    }
+    iOvflSpace += sz;
+    assert( sz<=pBt->maxLocal+23 );
+    assert( iOvflSpace <= (int)pBt->pageSize );
+    insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc);
+    if( rc!=SQLITE_OK ) goto balance_cleanup;
+    assert( sqlite3PagerIswriteable(pParent->pDbPage) );
+  }
+
+  /* Now update the actual sibling pages. The order in which they are updated
+  ** is important, as this code needs to avoid disrupting any page from which
+  ** cells may still to be read. In practice, this means:
+  **
+  **  (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1])
+  **      then it is not safe to update page apNew[iPg] until after
+  **      the left-hand sibling apNew[iPg-1] has been updated.
+  **
+  **  (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1])
+  **      then it is not safe to update page apNew[iPg] until after
+  **      the right-hand sibling apNew[iPg+1] has been updated.
+  **
+  ** If neither of the above apply, the page is safe to update.
+  **
+  ** The iPg value in the following loop starts at nNew-1 goes down
+  ** to 0, then back up to nNew-1 again, thus making two passes over
+  ** the pages.  On the initial downward pass, only condition (1) above
+  ** needs to be tested because (2) will always be true from the previous
+  ** step.  On the upward pass, both conditions are always true, so the
+  ** upwards pass simply processes pages that were missed on the downward
+  ** pass.
+  */
+  for(i=1-nNew; i<nNew; i++){
+    int iPg = i<0 ? -i : i;
+    assert( iPg>=0 && iPg<nNew );
+    if( abDone[iPg] ) continue;         /* Skip pages already processed */
+    if( i>=0                            /* On the upwards pass, or... */
+     || cntOld[iPg-1]>=cntNew[iPg-1]    /* Condition (1) is true */
+    ){
+      int iNew;
+      int iOld;
+      int nNewCell;
+
+      /* Verify condition (1):  If cells are moving left, update iPg
+      ** only after iPg-1 has already been updated. */
+      assert( iPg==0 || cntOld[iPg-1]>=cntNew[iPg-1] || abDone[iPg-1] );
+
+      /* Verify condition (2):  If cells are moving right, update iPg
+      ** only after iPg+1 has already been updated. */
+      assert( cntNew[iPg]>=cntOld[iPg] || abDone[iPg+1] );
+
+      if( iPg==0 ){
+        iNew = iOld = 0;
+        nNewCell = cntNew[0];
+      }else{
+        iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : nCell;
+        iNew = cntNew[iPg-1] + !leafData;
+        nNewCell = cntNew[iPg] - iNew;
+      }
+
+      editPage(apNew[iPg], iOld, iNew, nNewCell, apCell, szCell);
+      abDone[iPg]++;
+      apNew[iPg]->nFree = usableSpace-szNew[iPg];
+      assert( apNew[iPg]->nOverflow==0 );
+      assert( apNew[iPg]->nCell==nNewCell );
+    }
+  }
+
+  /* All pages have been processed exactly once */
+  assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 );
+
+  assert( nOld>0 );
+  assert( nNew>0 );
+
+  if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){
+    /* The root page of the b-tree now contains no cells. The only sibling
+    ** page is the right-child of the parent. Copy the contents of the
+    ** child page into the parent, decreasing the overall height of the
+    ** b-tree structure by one. This is described as the "balance-shallower"
+    ** sub-algorithm in some documentation.
+    **
+    ** If this is an auto-vacuum database, the call to copyNodeContent() 
+    ** sets all pointer-map entries corresponding to database image pages 
+    ** for which the pointer is stored within the content being copied.
+    **
+    ** It is critical that the child page be defragmented before being
+    ** copied into the parent, because if the parent is page 1 then it will
+    ** by smaller than the child due to the database header, and so all the
+    ** free space needs to be up front.
+    */
+    assert( nNew==1 );
+    rc = defragmentPage(apNew[0]);
+    testcase( rc!=SQLITE_OK );
+    assert( apNew[0]->nFree == 
+        (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2)
+      || rc!=SQLITE_OK
+    );
+    copyNodeContent(apNew[0], pParent, &rc);
+    freePage(apNew[0], &rc);
+  }else if( ISAUTOVACUUM && !leafCorrection ){
+    /* Fix the pointer map entries associated with the right-child of each
+    ** sibling page. All other pointer map entries have already been taken
+    ** care of.  */
+    for(i=0; i<nNew; i++){
+      u32 key = get4byte(&apNew[i]->aData[8]);
+      ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc);
+    }
+  }
+
+  assert( pParent->isInit );
+  TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
+          nOld, nNew, nCell));
+
+  /* Free any old pages that were not reused as new pages.
+  */
+  for(i=nNew; i<nOld; i++){
+    freePage(apOld[i], &rc);
+  }
+
+#if 0
+  if( ISAUTOVACUUM && rc==SQLITE_OK && apNew[0]->isInit ){
+    /* The ptrmapCheckPages() contains assert() statements that verify that
+    ** all pointer map pages are set correctly. This is helpful while 
+    ** debugging. This is usually disabled because a corrupt database may
+    ** cause an assert() statement to fail.  */
+    ptrmapCheckPages(apNew, nNew);
+    ptrmapCheckPages(&pParent, 1);
+  }
+#endif
+
+  /*
+  ** Cleanup before returning.
+  */
+balance_cleanup:
+  sqlite3ScratchFree(apCell);
+  for(i=0; i<nOld; i++){
+    releasePage(apOld[i]);
+  }
+  for(i=0; i<nNew; i++){
+    releasePage(apNew[i]);
+  }
+
+  return rc;
+}
+#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
+#pragma optimize("", on)
+#endif
+
+
+/*
+** This function is called when the root page of a b-tree structure is
+** overfull (has one or more overflow pages).
+**
+** A new child page is allocated and the contents of the current root
+** page, including overflow cells, are copied into the child. The root
+** page is then overwritten to make it an empty page with the right-child 
+** pointer pointing to the new page.
+**
+** Before returning, all pointer-map entries corresponding to pages 
+** that the new child-page now contains pointers to are updated. The
+** entry corresponding to the new right-child pointer of the root
+** page is also updated.
+**
+** If successful, *ppChild is set to contain a reference to the child 
+** page and SQLITE_OK is returned. In this case the caller is required
+** to call releasePage() on *ppChild exactly once. If an error occurs,
+** an error code is returned and *ppChild is set to 0.
+*/
+static int balance_deeper(MemPage *pRoot, MemPage **ppChild){
+  int rc;                        /* Return value from subprocedures */
+  MemPage *pChild = 0;           /* Pointer to a new child page */
+  Pgno pgnoChild = 0;            /* Page number of the new child page */
+  BtShared *pBt = pRoot->pBt;    /* The BTree */
+
+  assert( pRoot->nOverflow>0 );
+  assert( sqlite3_mutex_held(pBt->mutex) );
+
+  /* Make pRoot, the root page of the b-tree, writable. Allocate a new 
+  ** page that will become the new right-child of pPage. Copy the contents
+  ** of the node stored on pRoot into the new child page.
+  */
+  rc = sqlite3PagerWrite(pRoot->pDbPage);
+  if( rc==SQLITE_OK ){
+    rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0);
+    copyNodeContent(pRoot, pChild, &rc);
+    if( ISAUTOVACUUM ){
+      ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc);
+    }
+  }
+  if( rc ){
+    *ppChild = 0;
+    releasePage(pChild);
+    return rc;
+  }
+  assert( sqlite3PagerIswriteable(pChild->pDbPage) );
+  assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
+  assert( pChild->nCell==pRoot->nCell );
+
+  TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno));
+
+  /* Copy the overflow cells from pRoot to pChild */
+  memcpy(pChild->aiOvfl, pRoot->aiOvfl,
+         pRoot->nOverflow*sizeof(pRoot->aiOvfl[0]));
+  memcpy(pChild->apOvfl, pRoot->apOvfl,
+         pRoot->nOverflow*sizeof(pRoot->apOvfl[0]));
+  pChild->nOverflow = pRoot->nOverflow;
+
+  /* Zero the contents of pRoot. Then install pChild as the right-child. */
+  zeroPage(pRoot, pChild->aData[0] & ~PTF_LEAF);
+  put4byte(&pRoot->aData[pRoot->hdrOffset+8], pgnoChild);
+
+  *ppChild = pChild;
+  return SQLITE_OK;
+}
+
+/*
+** The page that pCur currently points to has just been modified in
+** some way. This function figures out if this modification means the
+** tree needs to be balanced, and if so calls the appropriate balancing 
+** routine. Balancing routines are:
+**
+**   balance_quick()
+**   balance_deeper()
+**   balance_nonroot()
+*/
+static int balance(BtCursor *pCur){
+  int rc = SQLITE_OK;
+  const int nMin = pCur->pBt->usableSize * 2 / 3;
+  u8 aBalanceQuickSpace[13];
+  u8 *pFree = 0;
+
+  TESTONLY( int balance_quick_called = 0 );
+  TESTONLY( int balance_deeper_called = 0 );
+
+  do {
+    int iPage = pCur->iPage;
+    MemPage *pPage = pCur->apPage[iPage];
+
+    if( iPage==0 ){
+      if( pPage->nOverflow ){
+        /* The root page of the b-tree is overfull. In this case call the
+        ** balance_deeper() function to create a new child for the root-page
+        ** and copy the current contents of the root-page to it. The
+        ** next iteration of the do-loop will balance the child page.
+        */ 
+        assert( (balance_deeper_called++)==0 );
+        rc = balance_deeper(pPage, &pCur->apPage[1]);
+        if( rc==SQLITE_OK ){
+          pCur->iPage = 1;
+          pCur->aiIdx[0] = 0;
+          pCur->aiIdx[1] = 0;
+          assert( pCur->apPage[1]->nOverflow );
+        }
+      }else{
+        break;
+      }
+    }else if( pPage->nOverflow==0 && pPage->nFree<=nMin ){
+      break;
+    }else{
+      MemPage * const pParent = pCur->apPage[iPage-1];
+      int const iIdx = pCur->aiIdx[iPage-1];
+
+      rc = sqlite3PagerWrite(pParent->pDbPage);
+      if( rc==SQLITE_OK ){
+#ifndef SQLITE_OMIT_QUICKBALANCE
+        if( pPage->intKeyLeaf
+         && pPage->nOverflow==1
+         && pPage->aiOvfl[0]==pPage->nCell
+         && pParent->pgno!=1
+         && pParent->nCell==iIdx
+        ){
+          /* Call balance_quick() to create a new sibling of pPage on which
+          ** to store the overflow cell. balance_quick() inserts a new cell
+          ** into pParent, which may cause pParent overflow. If this
+          ** happens, the next iteration of the do-loop will balance pParent 
+          ** use either balance_nonroot() or balance_deeper(). Until this
+          ** happens, the overflow cell is stored in the aBalanceQuickSpace[]
+          ** buffer. 
+          **
+          ** The purpose of the following assert() is to check that only a
+          ** single call to balance_quick() is made for each call to this
+          ** function. If this were not verified, a subtle bug involving reuse
+          ** of the aBalanceQuickSpace[] might sneak in.
+          */
+          assert( (balance_quick_called++)==0 );
+          rc = balance_quick(pParent, pPage, aBalanceQuickSpace);
+        }else
+#endif
+        {
+          /* In this case, call balance_nonroot() to redistribute cells
+          ** between pPage and up to 2 of its sibling pages. This involves
+          ** modifying the contents of pParent, which may cause pParent to
+          ** become overfull or underfull. The next iteration of the do-loop
+          ** will balance the parent page to correct this.
+          ** 
+          ** If the parent page becomes overfull, the overflow cell or cells
+          ** are stored in the pSpace buffer allocated immediately below. 
+          ** A subsequent iteration of the do-loop will deal with this by
+          ** calling balance_nonroot() (balance_deeper() may be called first,
+          ** but it doesn't deal with overflow cells - just moves them to a
+          ** different page). Once this subsequent call to balance_nonroot() 
+          ** has completed, it is safe to release the pSpace buffer used by
+          ** the previous call, as the overflow cell data will have been 
+          ** copied either into the body of a database page or into the new
+          ** pSpace buffer passed to the latter call to balance_nonroot().
+          */
+          u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize);
+          rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, pCur->hints);
+          if( pFree ){
+            /* If pFree is not NULL, it points to the pSpace buffer used 
+            ** by a previous call to balance_nonroot(). Its contents are
+            ** now stored either on real database pages or within the 
+            ** new pSpace buffer, so it may be safely freed here. */
+            sqlite3PageFree(pFree);
+          }
+
+          /* The pSpace buffer will be freed after the next call to
+          ** balance_nonroot(), or just before this function returns, whichever
+          ** comes first. */
+          pFree = pSpace;
+        }
+      }
+
+      pPage->nOverflow = 0;
+
+      /* The next iteration of the do-loop balances the parent page. */
+      releasePage(pPage);
+      pCur->iPage--;
+    }
+  }while( rc==SQLITE_OK );
+
+  if( pFree ){
+    sqlite3PageFree(pFree);
+  }
+  return rc;
+}
+
+
+/*
+** Insert a new record into the BTree.  The key is given by (pKey,nKey)
+** and the data is given by (pData,nData).  The cursor is used only to
+** define what table the record should be inserted into.  The cursor
+** is left pointing at a random location.
+**
+** For an INTKEY table, only the nKey value of the key is used.  pKey is
+** ignored.  For a ZERODATA table, the pData and nData are both ignored.
+**
+** If the seekResult parameter is non-zero, then a successful call to
+** MovetoUnpacked() to seek cursor pCur to (pKey, nKey) has already
+** been performed. seekResult is the search result returned (a negative
+** number if pCur points at an entry that is smaller than (pKey, nKey), or
+** a positive value if pCur points at an entry that is larger than 
+** (pKey, nKey)). 
+**
+** If the seekResult parameter is non-zero, then the caller guarantees that
+** cursor pCur is pointing at the existing copy of a row that is to be
+** overwritten.  If the seekResult parameter is 0, then cursor pCur may
+** point to any entry or to no entry at all and so this function has to seek
+** the cursor before the new key can be inserted.
+*/
+SQLITE_PRIVATE int sqlite3BtreeInsert(
+  BtCursor *pCur,                /* Insert data into the table of this cursor */
+  const void *pKey, i64 nKey,    /* The key of the new record */
+  const void *pData, int nData,  /* The data of the new record */
+  int nZero,                     /* Number of extra 0 bytes to append to data */
+  int appendBias,                /* True if this is likely an append */
+  int seekResult                 /* Result of prior MovetoUnpacked() call */
+){
+  int rc;
+  int loc = seekResult;          /* -1: before desired location  +1: after */
+  int szNew = 0;
+  int idx;
+  MemPage *pPage;
+  Btree *p = pCur->pBtree;
+  BtShared *pBt = p->pBt;
+  unsigned char *oldCell;
+  unsigned char *newCell = 0;
+
+  if( pCur->eState==CURSOR_FAULT ){
+    assert( pCur->skipNext!=SQLITE_OK );
+    return pCur->skipNext;
+  }
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( (pCur->curFlags & BTCF_WriteFlag)!=0
+              && pBt->inTransaction==TRANS_WRITE
+              && (pBt->btsFlags & BTS_READ_ONLY)==0 );
+  assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
+
+  /* Assert that the caller has been consistent. If this cursor was opened
+  ** expecting an index b-tree, then the caller should be inserting blob
+  ** keys with no associated data. If the cursor was opened expecting an
+  ** intkey table, the caller should be inserting integer keys with a
+  ** blob of associated data.  */
+  assert( (pKey==0)==(pCur->pKeyInfo==0) );
+
+  /* Save the positions of any other cursors open on this table.
+  **
+  ** In some cases, the call to btreeMoveto() below is a no-op. For
+  ** example, when inserting data into a table with auto-generated integer
+  ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the 
+  ** integer key to use. It then calls this function to actually insert the 
+  ** data into the intkey B-Tree. In this case btreeMoveto() recognizes
+  ** that the cursor is already where it needs to be and returns without
+  ** doing any work. To avoid thwarting these optimizations, it is important
+  ** not to clear the cursor here.
+  */
+  rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
+  if( rc ) return rc;
+
+  if( pCur->pKeyInfo==0 ){
+    /* If this is an insert into a table b-tree, invalidate any incrblob 
+    ** cursors open on the row being replaced */
+    invalidateIncrblobCursors(p, nKey, 0);
+
+    /* If the cursor is currently on the last row and we are appending a
+    ** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto()
+    ** call */
+    if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0
+      && pCur->info.nKey==nKey-1 ){
+      loc = -1;
+    }
+  }
+
+  if( !loc ){
+    rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc);
+    if( rc ) return rc;
+  }
+  assert( pCur->eState==CURSOR_VALID || (pCur->eState==CURSOR_INVALID && loc) );
+
+  pPage = pCur->apPage[pCur->iPage];
+  assert( pPage->intKey || nKey>=0 );
+  assert( pPage->leaf || !pPage->intKey );
+
+  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
+          pCur->pgnoRoot, nKey, nData, pPage->pgno,
+          loc==0 ? "overwrite" : "new entry"));
+  assert( pPage->isInit );
+  newCell = pBt->pTmpSpace;
+  assert( newCell!=0 );
+  rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
+  if( rc ) goto end_insert;
+  assert( szNew==cellSizePtr(pPage, newCell) );
+  assert( szNew <= MX_CELL_SIZE(pBt) );
+  idx = pCur->aiIdx[pCur->iPage];
+  if( loc==0 ){
+    u16 szOld;
+    assert( idx<pPage->nCell );
+    rc = sqlite3PagerWrite(pPage->pDbPage);
+    if( rc ){
+      goto end_insert;
+    }
+    oldCell = findCell(pPage, idx);
+    if( !pPage->leaf ){
+      memcpy(newCell, oldCell, 4);
+    }
+    rc = clearCell(pPage, oldCell, &szOld);
+    dropCell(pPage, idx, szOld, &rc);
+    if( rc ) goto end_insert;
+  }else if( loc<0 && pPage->nCell>0 ){
+    assert( pPage->leaf );
+    idx = ++pCur->aiIdx[pCur->iPage];
+  }else{
+    assert( pPage->leaf );
+  }
+  insertCell(pPage, idx, newCell, szNew, 0, 0, &rc);
+  assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 );
+
+  /* If no error has occurred and pPage has an overflow cell, call balance() 
+  ** to redistribute the cells within the tree. Since balance() may move
+  ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey
+  ** variables.
+  **
+  ** Previous versions of SQLite called moveToRoot() to move the cursor
+  ** back to the root page as balance() used to invalidate the contents
+  ** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that,
+  ** set the cursor state to "invalid". This makes common insert operations
+  ** slightly faster.
+  **
+  ** There is a subtle but important optimization here too. When inserting
+  ** multiple records into an intkey b-tree using a single cursor (as can
+  ** happen while processing an "INSERT INTO ... SELECT" statement), it
+  ** is advantageous to leave the cursor pointing to the last entry in
+  ** the b-tree if possible. If the cursor is left pointing to the last
+  ** entry in the table, and the next row inserted has an integer key
+  ** larger than the largest existing key, it is possible to insert the
+  ** row without seeking the cursor. This can be a big performance boost.
+  */
+  pCur->info.nSize = 0;
+  if( rc==SQLITE_OK && pPage->nOverflow ){
+    pCur->curFlags &= ~(BTCF_ValidNKey);
+    rc = balance(pCur);
+
+    /* Must make sure nOverflow is reset to zero even if the balance()
+    ** fails. Internal data structure corruption will result otherwise. 
+    ** Also, set the cursor state to invalid. This stops saveCursorPosition()
+    ** from trying to save the current position of the cursor.  */
+    pCur->apPage[pCur->iPage]->nOverflow = 0;
+    pCur->eState = CURSOR_INVALID;
+  }
+  assert( pCur->apPage[pCur->iPage]->nOverflow==0 );
+
+end_insert:
+  return rc;
+}
+
+/*
+** Delete the entry that the cursor is pointing to.  The cursor
+** is left pointing at an arbitrary location.
+*/
+SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur){
+  Btree *p = pCur->pBtree;
+  BtShared *pBt = p->pBt;              
+  int rc;                              /* Return code */
+  MemPage *pPage;                      /* Page to delete cell from */
+  unsigned char *pCell;                /* Pointer to cell to delete */
+  int iCellIdx;                        /* Index of cell to delete */
+  int iCellDepth;                      /* Depth of node containing pCell */ 
+  u16 szCell;                          /* Size of the cell being deleted */
+
+  assert( cursorHoldsMutex(pCur) );
+  assert( pBt->inTransaction==TRANS_WRITE );
+  assert( (pBt->btsFlags & BTS_READ_ONLY)==0 );
+  assert( pCur->curFlags & BTCF_WriteFlag );
+  assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
+  assert( !hasReadConflicts(p, pCur->pgnoRoot) );
+
+  if( NEVER(pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell) 
+   || NEVER(pCur->eState!=CURSOR_VALID)
+  ){
+    return SQLITE_ERROR;  /* Something has gone awry. */
+  }
+
+  iCellDepth = pCur->iPage;
+  iCellIdx = pCur->aiIdx[iCellDepth];
+  pPage = pCur->apPage[iCellDepth];
+  pCell = findCell(pPage, iCellIdx);
+
+  /* If the page containing the entry to delete is not a leaf page, move
+  ** the cursor to the largest entry in the tree that is smaller than
+  ** the entry being deleted. This cell will replace the cell being deleted
+  ** from the internal node. The 'previous' entry is used for this instead
+  ** of the 'next' entry, as the previous entry is always a part of the
+  ** sub-tree headed by the child page of the cell being deleted. This makes
+  ** balancing the tree following the delete operation easier.  */
+  if( !pPage->leaf ){
+    int notUsed = 0;
+    rc = sqlite3BtreePrevious(pCur, &notUsed);
+    if( rc ) return rc;
+  }
+
+  /* Save the positions of any other cursors open on this table before
+  ** making any modifications. Make the page containing the entry to be 
+  ** deleted writable. Then free any overflow pages associated with the 
+  ** entry and finally remove the cell itself from within the page.  
+  */
+  rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
+  if( rc ) return rc;
+
+  /* If this is a delete operation to remove a row from a table b-tree,
+  ** invalidate any incrblob cursors open on the row being deleted.  */
+  if( pCur->pKeyInfo==0 ){
+    invalidateIncrblobCursors(p, pCur->info.nKey, 0);
+  }
+
+  rc = sqlite3PagerWrite(pPage->pDbPage);
+  if( rc ) return rc;
+  rc = clearCell(pPage, pCell, &szCell);
+  dropCell(pPage, iCellIdx, szCell, &rc);
+  if( rc ) return rc;
+
+  /* If the cell deleted was not located on a leaf page, then the cursor
+  ** is currently pointing to the largest entry in the sub-tree headed
+  ** by the child-page of the cell that was just deleted from an internal
+  ** node. The cell from the leaf node needs to be moved to the internal
+  ** node to replace the deleted cell.  */
+  if( !pPage->leaf ){
+    MemPage *pLeaf = pCur->apPage[pCur->iPage];
+    int nCell;
+    Pgno n = pCur->apPage[iCellDepth+1]->pgno;
+    unsigned char *pTmp;
+
+    pCell = findCell(pLeaf, pLeaf->nCell-1);
+    nCell = cellSizePtr(pLeaf, pCell);
+    assert( MX_CELL_SIZE(pBt) >= nCell );
+    pTmp = pBt->pTmpSpace;
+    assert( pTmp!=0 );
+    rc = sqlite3PagerWrite(pLeaf->pDbPage);
+    insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc);
+    dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc);
+    if( rc ) return rc;
+  }
+
+  /* Balance the tree. If the entry deleted was located on a leaf page,
+  ** then the cursor still points to that page. In this case the first
+  ** call to balance() repairs the tree, and the if(...) condition is
+  ** never true.
+  **
+  ** Otherwise, if the entry deleted was on an internal node page, then
+  ** pCur is pointing to the leaf page from which a cell was removed to
+  ** replace the cell deleted from the internal node. This is slightly
+  ** tricky as the leaf node may be underfull, and the internal node may
+  ** be either under or overfull. In this case run the balancing algorithm
+  ** on the leaf node first. If the balance proceeds far enough up the
+  ** tree that we can be sure that any problem in the internal node has
+  ** been corrected, so be it. Otherwise, after balancing the leaf node,
+  ** walk the cursor up the tree to the internal node and balance it as 
+  ** well.  */
+  rc = balance(pCur);
+  if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){
+    while( pCur->iPage>iCellDepth ){
+      releasePage(pCur->apPage[pCur->iPage--]);
+    }
+    rc = balance(pCur);
+  }
+
+  if( rc==SQLITE_OK ){
+    moveToRoot(pCur);
+  }
+  return rc;
+}
+
+/*
+** Create a new BTree table.  Write into *piTable the page
+** number for the root page of the new table.
+**
+** The type of type is determined by the flags parameter.  Only the
+** following values of flags are currently in use.  Other values for
+** flags might not work:
+**
+**     BTREE_INTKEY|BTREE_LEAFDATA     Used for SQL tables with rowid keys
+**     BTREE_ZERODATA                  Used for SQL indices
+*/
+static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){
+  BtShared *pBt = p->pBt;
+  MemPage *pRoot;
+  Pgno pgnoRoot;
+  int rc;
+  int ptfFlags;          /* Page-type flage for the root page of new table */
+
+  assert( sqlite3BtreeHoldsMutex(p) );
+  assert( pBt->inTransaction==TRANS_WRITE );
+  assert( (pBt->btsFlags & BTS_READ_ONLY)==0 );
+
+#ifdef SQLITE_OMIT_AUTOVACUUM
+  rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
+  if( rc ){
+    return rc;
+  }
+#else
+  if( pBt->autoVacuum ){
+    Pgno pgnoMove;      /* Move a page here to make room for the root-page */
+    MemPage *pPageMove; /* The page to move to. */
+
+    /* Creating a new table may probably require moving an existing database
+    ** to make room for the new tables root page. In case this page turns
+    ** out to be an overflow page, delete all overflow page-map caches
+    ** held by open cursors.
+    */
+    invalidateAllOverflowCache(pBt);
+
+    /* Read the value of meta[3] from the database to determine where the
+    ** root page of the new table should go. meta[3] is the largest root-page
+    ** created so far, so the new root-page is (meta[3]+1).
+    */
+    sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot);
+    pgnoRoot++;
+
+    /* The new root-page may not be allocated on a pointer-map page, or the
+    ** PENDING_BYTE page.
+    */
+    while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
+        pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
+      pgnoRoot++;
+    }
+    assert( pgnoRoot>=3 );
+
+    /* Allocate a page. The page that currently resides at pgnoRoot will
+    ** be moved to the allocated page (unless the allocated page happens
+    ** to reside at pgnoRoot).
+    */
+    rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+
+    if( pgnoMove!=pgnoRoot ){
+      /* pgnoRoot is the page that will be used for the root-page of
+      ** the new table (assuming an error did not occur). But we were
+      ** allocated pgnoMove. If required (i.e. if it was not allocated
+      ** by extending the file), the current page at position pgnoMove
+      ** is already journaled.
+      */
+      u8 eType = 0;
+      Pgno iPtrPage = 0;
+
+      /* Save the positions of any open cursors. This is required in
+      ** case they are holding a reference to an xFetch reference
+      ** corresponding to page pgnoRoot.  */
+      rc = saveAllCursors(pBt, 0, 0);
+      releasePage(pPageMove);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+
+      /* Move the page currently at pgnoRoot to pgnoMove. */
+      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+      rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
+      if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
+        rc = SQLITE_CORRUPT_BKPT;
+      }
+      if( rc!=SQLITE_OK ){
+        releasePage(pRoot);
+        return rc;
+      }
+      assert( eType!=PTRMAP_ROOTPAGE );
+      assert( eType!=PTRMAP_FREEPAGE );
+      rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0);
+      releasePage(pRoot);
+
+      /* Obtain the page at pgnoRoot */
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+      rc = sqlite3PagerWrite(pRoot->pDbPage);
+      if( rc!=SQLITE_OK ){
+        releasePage(pRoot);
+        return rc;
+      }
+    }else{
+      pRoot = pPageMove;
+    } 
+
+    /* Update the pointer-map and meta-data with the new root-page number. */
+    ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0, &rc);
+    if( rc ){
+      releasePage(pRoot);
+      return rc;
+    }
+
+    /* When the new root page was allocated, page 1 was made writable in
+    ** order either to increase the database filesize, or to decrement the
+    ** freelist count.  Hence, the sqlite3BtreeUpdateMeta() call cannot fail.
+    */
+    assert( sqlite3PagerIswriteable(pBt->pPage1->pDbPage) );
+    rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot);
+    if( NEVER(rc) ){
+      releasePage(pRoot);
+      return rc;
+    }
+
+  }else{
+    rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
+    if( rc ) return rc;
+  }
+#endif
+  assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
+  if( createTabFlags & BTREE_INTKEY ){
+    ptfFlags = PTF_INTKEY | PTF_LEAFDATA | PTF_LEAF;
+  }else{
+    ptfFlags = PTF_ZERODATA | PTF_LEAF;
+  }
+  zeroPage(pRoot, ptfFlags);
+  sqlite3PagerUnref(pRoot->pDbPage);
+  assert( (pBt->openFlags & BTREE_SINGLE)==0 || pgnoRoot==2 );
+  *piTable = (int)pgnoRoot;
+  return SQLITE_OK;
+}
+SQLITE_PRIVATE int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
+  int rc;
+  sqlite3BtreeEnter(p);
+  rc = btreeCreateTable(p, piTable, flags);
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** Erase the given database page and all its children.  Return
+** the page to the freelist.
+*/
+static int clearDatabasePage(
+  BtShared *pBt,           /* The BTree that contains the table */
+  Pgno pgno,               /* Page number to clear */
+  int freePageFlag,        /* Deallocate page if true */
+  int *pnChange            /* Add number of Cells freed to this counter */
+){
+  MemPage *pPage;
+  int rc;
+  unsigned char *pCell;
+  int i;
+  int hdr;
+  u16 szCell;
+
+  assert( sqlite3_mutex_held(pBt->mutex) );
+  if( pgno>btreePagecount(pBt) ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+
+  rc = getAndInitPage(pBt, pgno, &pPage, 0);
+  if( rc ) return rc;
+  hdr = pPage->hdrOffset;
+  for(i=0; i<pPage->nCell; i++){
+    pCell = findCell(pPage, i);
+    if( !pPage->leaf ){
+      rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange);
+      if( rc ) goto cleardatabasepage_out;
+    }
+    rc = clearCell(pPage, pCell, &szCell);
+    if( rc ) goto cleardatabasepage_out;
+  }
+  if( !pPage->leaf ){
+    rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange);
+    if( rc ) goto cleardatabasepage_out;
+  }else if( pnChange ){
+    assert( pPage->intKey );
+    *pnChange += pPage->nCell;
+  }
+  if( freePageFlag ){
+    freePage(pPage, &rc);
+  }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){
+    zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF);
+  }
+
+cleardatabasepage_out:
+  releasePage(pPage);
+  return rc;
+}
+
+/*
+** Delete all information from a single table in the database.  iTable is
+** the page number of the root of the table.  After this routine returns,
+** the root page is empty, but still exists.
+**
+** This routine will fail with SQLITE_LOCKED if there are any open
+** read cursors on the table.  Open write cursors are moved to the
+** root of the table.
+**
+** If pnChange is not NULL, then table iTable must be an intkey table. The
+** integer value pointed to by pnChange is incremented by the number of
+** entries in the table.
+*/
+SQLITE_PRIVATE int sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){
+  int rc;
+  BtShared *pBt = p->pBt;
+  sqlite3BtreeEnter(p);
+  assert( p->inTrans==TRANS_WRITE );
+
+  rc = saveAllCursors(pBt, (Pgno)iTable, 0);
+
+  if( SQLITE_OK==rc ){
+    /* Invalidate all incrblob cursors open on table iTable (assuming iTable
+    ** is the root of a table b-tree - if it is not, the following call is
+    ** a no-op).  */
+    invalidateIncrblobCursors(p, 0, 1);
+    rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange);
+  }
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+/*
+** Delete all information from the single table that pCur is open on.
+**
+** This routine only work for pCur on an ephemeral table.
+*/
+SQLITE_PRIVATE int sqlite3BtreeClearTableOfCursor(BtCursor *pCur){
+  return sqlite3BtreeClearTable(pCur->pBtree, pCur->pgnoRoot, 0);
+}
+
+/*
+** Erase all information in a table and add the root of the table to
+** the freelist.  Except, the root of the principle table (the one on
+** page 1) is never added to the freelist.
+**
+** This routine will fail with SQLITE_LOCKED if there are any open
+** cursors on the table.
+**
+** If AUTOVACUUM is enabled and the page at iTable is not the last
+** root page in the database file, then the last root page 
+** in the database file is moved into the slot formerly occupied by
+** iTable and that last slot formerly occupied by the last root page
+** is added to the freelist instead of iTable.  In this say, all
+** root pages are kept at the beginning of the database file, which
+** is necessary for AUTOVACUUM to work right.  *piMoved is set to the 
+** page number that used to be the last root page in the file before
+** the move.  If no page gets moved, *piMoved is set to 0.
+** The last root page is recorded in meta[3] and the value of
+** meta[3] is updated by this procedure.
+*/
+static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
+  int rc;
+  MemPage *pPage = 0;
+  BtShared *pBt = p->pBt;
+
+  assert( sqlite3BtreeHoldsMutex(p) );
+  assert( p->inTrans==TRANS_WRITE );
+
+  /* It is illegal to drop a table if any cursors are open on the
+  ** database. This is because in auto-vacuum mode the backend may
+  ** need to move another root-page to fill a gap left by the deleted
+  ** root page. If an open cursor was using this page a problem would 
+  ** occur.
+  **
+  ** This error is caught long before control reaches this point.
+  */
+  if( NEVER(pBt->pCursor) ){
+    sqlite3ConnectionBlocked(p->db, pBt->pCursor->pBtree->db);
+    return SQLITE_LOCKED_SHAREDCACHE;
+  }
+
+  rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
+  if( rc ) return rc;
+  rc = sqlite3BtreeClearTable(p, iTable, 0);
+  if( rc ){
+    releasePage(pPage);
+    return rc;
+  }
+
+  *piMoved = 0;
+
+  if( iTable>1 ){
+#ifdef SQLITE_OMIT_AUTOVACUUM
+    freePage(pPage, &rc);
+    releasePage(pPage);
+#else
+    if( pBt->autoVacuum ){
+      Pgno maxRootPgno;
+      sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &maxRootPgno);
+
+      if( iTable==maxRootPgno ){
+        /* If the table being dropped is the table with the largest root-page
+        ** number in the database, put the root page on the free list. 
+        */
+        freePage(pPage, &rc);
+        releasePage(pPage);
+        if( rc!=SQLITE_OK ){
+          return rc;
+        }
+      }else{
+        /* The table being dropped does not have the largest root-page
+        ** number in the database. So move the page that does into the 
+        ** gap left by the deleted root-page.
+        */
+        MemPage *pMove;
+        releasePage(pPage);
+        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
+        if( rc!=SQLITE_OK ){
+          return rc;
+        }
+        rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0);
+        releasePage(pMove);
+        if( rc!=SQLITE_OK ){
+          return rc;
+        }
+        pMove = 0;
+        rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
+        freePage(pMove, &rc);
+        releasePage(pMove);
+        if( rc!=SQLITE_OK ){
+          return rc;
+        }
+        *piMoved = maxRootPgno;
+      }
+
+      /* Set the new 'max-root-page' value in the database header. This
+      ** is the old value less one, less one more if that happens to
+      ** be a root-page number, less one again if that is the
+      ** PENDING_BYTE_PAGE.
+      */
+      maxRootPgno--;
+      while( maxRootPgno==PENDING_BYTE_PAGE(pBt)
+             || PTRMAP_ISPAGE(pBt, maxRootPgno) ){
+        maxRootPgno--;
+      }
+      assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
+
+      rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
+    }else{
+      freePage(pPage, &rc);
+      releasePage(pPage);
+    }
+#endif
+  }else{
+    /* If sqlite3BtreeDropTable was called on page 1.
+    ** This really never should happen except in a corrupt
+    ** database. 
+    */
+    zeroPage(pPage, PTF_INTKEY|PTF_LEAF );
+    releasePage(pPage);
+  }
+  return rc;  
+}
+SQLITE_PRIVATE int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
+  int rc;
+  sqlite3BtreeEnter(p);
+  rc = btreeDropTable(p, iTable, piMoved);
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+
+/*
+** This function may only be called if the b-tree connection already
+** has a read or write transaction open on the database.
+**
+** Read the meta-information out of a database file.  Meta[0]
+** is the number of free pages currently in the database.  Meta[1]
+** through meta[15] are available for use by higher layers.  Meta[0]
+** is read-only, the others are read/write.
+** 
+** The schema layer numbers meta values differently.  At the schema
+** layer (and the SetCookie and ReadCookie opcodes) the number of
+** free pages is not visible.  So Cookie[0] is the same as Meta[1].
+**
+** This routine treats Meta[BTREE_DATA_VERSION] as a special case.  Instead
+** of reading the value out of the header, it instead loads the "DataVersion"
+** from the pager.  The BTREE_DATA_VERSION value is not actually stored in the
+** database file.  It is a number computed by the pager.  But its access
+** pattern is the same as header meta values, and so it is convenient to
+** read it from this routine.
+*/
+SQLITE_PRIVATE void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
+  BtShared *pBt = p->pBt;
+
+  sqlite3BtreeEnter(p);
+  assert( p->inTrans>TRANS_NONE );
+  assert( SQLITE_OK==querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK) );
+  assert( pBt->pPage1 );
+  assert( idx>=0 && idx<=15 );
+
+  if( idx==BTREE_DATA_VERSION ){
+    *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iDataVersion;
+  }else{
+    *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]);
+  }
+
+  /* If auto-vacuum is disabled in this build and this is an auto-vacuum
+  ** database, mark the database as read-only.  */
+#ifdef SQLITE_OMIT_AUTOVACUUM
+  if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ){
+    pBt->btsFlags |= BTS_READ_ONLY;
+  }
+#endif
+
+  sqlite3BtreeLeave(p);
+}
+
+/*
+** Write meta-information back into the database.  Meta[0] is
+** read-only and may not be written.
+*/
+SQLITE_PRIVATE int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
+  BtShared *pBt = p->pBt;
+  unsigned char *pP1;
+  int rc;
+  assert( idx>=1 && idx<=15 );
+  sqlite3BtreeEnter(p);
+  assert( p->inTrans==TRANS_WRITE );
+  assert( pBt->pPage1!=0 );
+  pP1 = pBt->pPage1->aData;
+  rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
+  if( rc==SQLITE_OK ){
+    put4byte(&pP1[36 + idx*4], iMeta);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( idx==BTREE_INCR_VACUUM ){
+      assert( pBt->autoVacuum || iMeta==0 );
+      assert( iMeta==0 || iMeta==1 );
+      pBt->incrVacuum = (u8)iMeta;
+    }
+#endif
+  }
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+#ifndef SQLITE_OMIT_BTREECOUNT
+/*
+** The first argument, pCur, is a cursor opened on some b-tree. Count the
+** number of entries in the b-tree and write the result to *pnEntry.
+**
+** SQLITE_OK is returned if the operation is successfully executed. 
+** Otherwise, if an error is encountered (i.e. an IO error or database
+** corruption) an SQLite error code is returned.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCount(BtCursor *pCur, i64 *pnEntry){
+  i64 nEntry = 0;                      /* Value to return in *pnEntry */
+  int rc;                              /* Return code */
+
+  if( pCur->pgnoRoot==0 ){
+    *pnEntry = 0;
+    return SQLITE_OK;
+  }
+  rc = moveToRoot(pCur);
+
+  /* Unless an error occurs, the following loop runs one iteration for each
+  ** page in the B-Tree structure (not including overflow pages). 
+  */
+  while( rc==SQLITE_OK ){
+    int iIdx;                          /* Index of child node in parent */
+    MemPage *pPage;                    /* Current page of the b-tree */
+
+    /* If this is a leaf page or the tree is not an int-key tree, then 
+    ** this page contains countable entries. Increment the entry counter
+    ** accordingly.
+    */
+    pPage = pCur->apPage[pCur->iPage];
+    if( pPage->leaf || !pPage->intKey ){
+      nEntry += pPage->nCell;
+    }
+
+    /* pPage is a leaf node. This loop navigates the cursor so that it 
+    ** points to the first interior cell that it points to the parent of
+    ** the next page in the tree that has not yet been visited. The
+    ** pCur->aiIdx[pCur->iPage] value is set to the index of the parent cell
+    ** of the page, or to the number of cells in the page if the next page
+    ** to visit is the right-child of its parent.
+    **
+    ** If all pages in the tree have been visited, return SQLITE_OK to the
+    ** caller.
+    */
+    if( pPage->leaf ){
+      do {
+        if( pCur->iPage==0 ){
+          /* All pages of the b-tree have been visited. Return successfully. */
+          *pnEntry = nEntry;
+          return moveToRoot(pCur);
+        }
+        moveToParent(pCur);
+      }while ( pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell );
+
+      pCur->aiIdx[pCur->iPage]++;
+      pPage = pCur->apPage[pCur->iPage];
+    }
+
+    /* Descend to the child node of the cell that the cursor currently 
+    ** points at. This is the right-child if (iIdx==pPage->nCell).
+    */
+    iIdx = pCur->aiIdx[pCur->iPage];
+    if( iIdx==pPage->nCell ){
+      rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
+    }else{
+      rc = moveToChild(pCur, get4byte(findCell(pPage, iIdx)));
+    }
+  }
+
+  /* An error has occurred. Return an error code. */
+  return rc;
+}
+#endif
+
+/*
+** Return the pager associated with a BTree.  This routine is used for
+** testing and debugging only.
+*/
+SQLITE_PRIVATE Pager *sqlite3BtreePager(Btree *p){
+  return p->pBt->pPager;
+}
+
+#ifndef SQLITE_OMIT_INTEGRITY_CHECK
+/*
+** Append a message to the error message string.
+*/
+static void checkAppendMsg(
+  IntegrityCk *pCheck,
+  const char *zFormat,
+  ...
+){
+  va_list ap;
+  char zBuf[200];
+  if( !pCheck->mxErr ) return;
+  pCheck->mxErr--;
+  pCheck->nErr++;
+  va_start(ap, zFormat);
+  if( pCheck->errMsg.nChar ){
+    sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1);
+  }
+  if( pCheck->zPfx ){
+    sqlite3_snprintf(sizeof(zBuf), zBuf, pCheck->zPfx, pCheck->v1, pCheck->v2);
+    sqlite3StrAccumAppendAll(&pCheck->errMsg, zBuf);
+  }
+  sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap);
+  va_end(ap);
+  if( pCheck->errMsg.accError==STRACCUM_NOMEM ){
+    pCheck->mallocFailed = 1;
+  }
+}
+#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
+
+#ifndef SQLITE_OMIT_INTEGRITY_CHECK
+
+/*
+** Return non-zero if the bit in the IntegrityCk.aPgRef[] array that
+** corresponds to page iPg is already set.
+*/
+static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){
+  assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 );
+  return (pCheck->aPgRef[iPg/8] & (1 << (iPg & 0x07)));
+}
+
+/*
+** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg.
+*/
+static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){
+  assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 );
+  pCheck->aPgRef[iPg/8] |= (1 << (iPg & 0x07));
+}
+
+
+/*
+** Add 1 to the reference count for page iPage.  If this is the second
+** reference to the page, add an error message to pCheck->zErrMsg.
+** Return 1 if there are 2 or more references to the page and 0 if
+** if this is the first reference to the page.
+**
+** Also check that the page number is in bounds.
+*/
+static int checkRef(IntegrityCk *pCheck, Pgno iPage){
+  if( iPage==0 ) return 1;
+  if( iPage>pCheck->nPage ){
+    checkAppendMsg(pCheck, "invalid page number %d", iPage);
+    return 1;
+  }
+  if( getPageReferenced(pCheck, iPage) ){
+    checkAppendMsg(pCheck, "2nd reference to page %d", iPage);
+    return 1;
+  }
+  setPageReferenced(pCheck, iPage);
+  return 0;
+}
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+/*
+** Check that the entry in the pointer-map for page iChild maps to 
+** page iParent, pointer type ptrType. If not, append an error message
+** to pCheck.
+*/
+static void checkPtrmap(
+  IntegrityCk *pCheck,   /* Integrity check context */
+  Pgno iChild,           /* Child page number */
+  u8 eType,              /* Expected pointer map type */
+  Pgno iParent           /* Expected pointer map parent page number */
+){
+  int rc;
+  u8 ePtrmapType;
+  Pgno iPtrmapParent;
+
+  rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
+  if( rc!=SQLITE_OK ){
+    if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->mallocFailed = 1;
+    checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild);
+    return;
+  }
+
+  if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
+    checkAppendMsg(pCheck,
+      "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", 
+      iChild, eType, iParent, ePtrmapType, iPtrmapParent);
+  }
+}
+#endif
+
+/*
+** Check the integrity of the freelist or of an overflow page list.
+** Verify that the number of pages on the list is N.
+*/
+static void checkList(
+  IntegrityCk *pCheck,  /* Integrity checking context */
+  int isFreeList,       /* True for a freelist.  False for overflow page list */
+  int iPage,            /* Page number for first page in the list */
+  int N                 /* Expected number of pages in the list */
+){
+  int i;
+  int expected = N;
+  int iFirst = iPage;
+  while( N-- > 0 && pCheck->mxErr ){
+    DbPage *pOvflPage;
+    unsigned char *pOvflData;
+    if( iPage<1 ){
+      checkAppendMsg(pCheck,
+         "%d of %d pages missing from overflow list starting at %d",
+          N+1, expected, iFirst);
+      break;
+    }
+    if( checkRef(pCheck, iPage) ) break;
+    if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){
+      checkAppendMsg(pCheck, "failed to get page %d", iPage);
+      break;
+    }
+    pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
+    if( isFreeList ){
+      int n = get4byte(&pOvflData[4]);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+      if( pCheck->pBt->autoVacuum ){
+        checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0);
+      }
+#endif
+      if( n>(int)pCheck->pBt->usableSize/4-2 ){
+        checkAppendMsg(pCheck,
+           "freelist leaf count too big on page %d", iPage);
+        N--;
+      }else{
+        for(i=0; i<n; i++){
+          Pgno iFreePage = get4byte(&pOvflData[8+i*4]);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+          if( pCheck->pBt->autoVacuum ){
+            checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0);
+          }
+#endif
+          checkRef(pCheck, iFreePage);
+        }
+        N -= n;
+      }
+    }
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    else{
+      /* If this database supports auto-vacuum and iPage is not the last
+      ** page in this overflow list, check that the pointer-map entry for
+      ** the following page matches iPage.
+      */
+      if( pCheck->pBt->autoVacuum && N>0 ){
+        i = get4byte(pOvflData);
+        checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage);
+      }
+    }
+#endif
+    iPage = get4byte(pOvflData);
+    sqlite3PagerUnref(pOvflPage);
+  }
+}
+#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
+
+#ifndef SQLITE_OMIT_INTEGRITY_CHECK
+/*
+** Do various sanity checks on a single page of a tree.  Return
+** the tree depth.  Root pages return 0.  Parents of root pages
+** return 1, and so forth.
+** 
+** These checks are done:
+**
+**      1.  Make sure that cells and freeblocks do not overlap
+**          but combine to completely cover the page.
+**  NO  2.  Make sure cell keys are in order.
+**  NO  3.  Make sure no key is less than or equal to zLowerBound.
+**  NO  4.  Make sure no key is greater than or equal to zUpperBound.
+**      5.  Check the integrity of overflow pages.
+**      6.  Recursively call checkTreePage on all children.
+**      7.  Verify that the depth of all children is the same.
+**      8.  Make sure this page is at least 33% full or else it is
+**          the root of the tree.
+*/
+static int checkTreePage(
+  IntegrityCk *pCheck,  /* Context for the sanity check */
+  int iPage,            /* Page number of the page to check */
+  i64 *pnParentMinKey, 
+  i64 *pnParentMaxKey
+){
+  MemPage *pPage;
+  int i, rc, depth, d2, pgno, cnt;
+  int hdr, cellStart;
+  int nCell;
+  u8 *data;
+  BtShared *pBt;
+  int usableSize;
+  char *hit = 0;
+  i64 nMinKey = 0;
+  i64 nMaxKey = 0;
+  const char *saved_zPfx = pCheck->zPfx;
+  int saved_v1 = pCheck->v1;
+  int saved_v2 = pCheck->v2;
+
+  /* Check that the page exists
+  */
+  pBt = pCheck->pBt;
+  usableSize = pBt->usableSize;
+  if( iPage==0 ) return 0;
+  if( checkRef(pCheck, iPage) ) return 0;
+  pCheck->zPfx = "Page %d: ";
+  pCheck->v1 = iPage;
+  if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
+    checkAppendMsg(pCheck,
+       "unable to get the page. error code=%d", rc);
+    depth = -1;
+    goto end_of_check;
+  }
+
+  /* Clear MemPage.isInit to make sure the corruption detection code in
+  ** btreeInitPage() is executed.  */
+  pPage->isInit = 0;
+  if( (rc = btreeInitPage(pPage))!=0 ){
+    assert( rc==SQLITE_CORRUPT );  /* The only possible error from InitPage */
+    checkAppendMsg(pCheck,
+                   "btreeInitPage() returns error code %d", rc);
+    releasePage(pPage);
+    depth = -1;
+    goto end_of_check;
+  }
+
+  /* Check out all the cells.
+  */
+  depth = 0;
+  for(i=0; i<pPage->nCell && pCheck->mxErr; i++){
+    u8 *pCell;
+    u32 sz;
+    CellInfo info;
+
+    /* Check payload overflow pages
+    */
+    pCheck->zPfx = "On tree page %d cell %d: ";
+    pCheck->v1 = iPage;
+    pCheck->v2 = i;
+    pCell = findCell(pPage,i);
+    btreeParseCellPtr(pPage, pCell, &info);
+    sz = info.nPayload;
+    /* For intKey pages, check that the keys are in order.
+    */
+    if( pPage->intKey ){
+      if( i==0 ){
+        nMinKey = nMaxKey = info.nKey;
+      }else if( info.nKey <= nMaxKey ){
+        checkAppendMsg(pCheck,
+           "Rowid %lld out of order (previous was %lld)", info.nKey, nMaxKey);
+      }
+      nMaxKey = info.nKey;
+    }
+    if( (sz>info.nLocal) 
+     && (&pCell[info.iOverflow]<=&pPage->aData[pBt->usableSize])
+    ){
+      int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4);
+      Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+      if( pBt->autoVacuum ){
+        checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage);
+      }
+#endif
+      checkList(pCheck, 0, pgnoOvfl, nPage);
+    }
+
+    /* Check sanity of left child page.
+    */
+    if( !pPage->leaf ){
+      pgno = get4byte(pCell);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+      if( pBt->autoVacuum ){
+        checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
+      }
+#endif
+      d2 = checkTreePage(pCheck, pgno, &nMinKey, i==0?NULL:&nMaxKey);
+      if( i>0 && d2!=depth ){
+        checkAppendMsg(pCheck, "Child page depth differs");
+      }
+      depth = d2;
+    }
+  }
+
+  if( !pPage->leaf ){
+    pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
+    pCheck->zPfx = "On page %d at right child: ";
+    pCheck->v1 = iPage;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( pBt->autoVacuum ){
+      checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
+    }
+#endif
+    checkTreePage(pCheck, pgno, NULL, !pPage->nCell?NULL:&nMaxKey);
+  }
+ 
+  /* For intKey leaf pages, check that the min/max keys are in order
+  ** with any left/parent/right pages.
+  */
+  pCheck->zPfx = "Page %d: ";
+  pCheck->v1 = iPage;
+  if( pPage->leaf && pPage->intKey ){
+    /* if we are a left child page */
+    if( pnParentMinKey ){
+      /* if we are the left most child page */
+      if( !pnParentMaxKey ){
+        if( nMaxKey > *pnParentMinKey ){
+          checkAppendMsg(pCheck,
+              "Rowid %lld out of order (max larger than parent min of %lld)",
+              nMaxKey, *pnParentMinKey);
+        }
+      }else{
+        if( nMinKey <= *pnParentMinKey ){
+          checkAppendMsg(pCheck,
+              "Rowid %lld out of order (min less than parent min of %lld)",
+              nMinKey, *pnParentMinKey);
+        }
+        if( nMaxKey > *pnParentMaxKey ){
+          checkAppendMsg(pCheck,
+              "Rowid %lld out of order (max larger than parent max of %lld)",
+              nMaxKey, *pnParentMaxKey);
+        }
+        *pnParentMinKey = nMaxKey;
+      }
+    /* else if we're a right child page */
+    } else if( pnParentMaxKey ){
+      if( nMinKey <= *pnParentMaxKey ){
+        checkAppendMsg(pCheck,
+            "Rowid %lld out of order (min less than parent max of %lld)",
+            nMinKey, *pnParentMaxKey);
+      }
+    }
+  }
+
+  /* Check for complete coverage of the page
+  */
+  data = pPage->aData;
+  hdr = pPage->hdrOffset;
+  hit = sqlite3PageMalloc( pBt->pageSize );
+  pCheck->zPfx = 0;
+  if( hit==0 ){
+    pCheck->mallocFailed = 1;
+  }else{
+    int contentOffset = get2byteNotZero(&data[hdr+5]);
+    assert( contentOffset<=usableSize );  /* Enforced by btreeInitPage() */
+    memset(hit+contentOffset, 0, usableSize-contentOffset);
+    memset(hit, 1, contentOffset);
+    /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
+    ** number of cells on the page. */
+    nCell = get2byte(&data[hdr+3]);
+    /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page
+    ** immediately follows the b-tree page header. */
+    cellStart = hdr + 12 - 4*pPage->leaf;
+    /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte
+    ** integer offsets to the cell contents. */
+    for(i=0; i<nCell; i++){
+      int pc = get2byte(&data[cellStart+i*2]);
+      u32 size = 65536;
+      int j;
+      if( pc<=usableSize-4 ){
+        size = cellSizePtr(pPage, &data[pc]);
+      }
+      if( (int)(pc+size-1)>=usableSize ){
+        pCheck->zPfx = 0;
+        checkAppendMsg(pCheck,
+            "Corruption detected in cell %d on page %d",i,iPage);
+      }else{
+        for(j=pc+size-1; j>=pc; j--) hit[j]++;
+      }
+    }
+    /* EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header
+    ** is the offset of the first freeblock, or zero if there are no
+    ** freeblocks on the page. */
+    i = get2byte(&data[hdr+1]);
+    while( i>0 ){
+      int size, j;
+      assert( i<=usableSize-4 );     /* Enforced by btreeInitPage() */
+      size = get2byte(&data[i+2]);
+      assert( i+size<=usableSize );  /* Enforced by btreeInitPage() */
+      for(j=i+size-1; j>=i; j--) hit[j]++;
+      /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a
+      ** big-endian integer which is the offset in the b-tree page of the next
+      ** freeblock in the chain, or zero if the freeblock is the last on the
+      ** chain. */
+      j = get2byte(&data[i]);
+      /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of
+      ** increasing offset. */
+      assert( j==0 || j>i+size );  /* Enforced by btreeInitPage() */
+      assert( j<=usableSize-4 );   /* Enforced by btreeInitPage() */
+      i = j;
+    }
+    for(i=cnt=0; i<usableSize; i++){
+      if( hit[i]==0 ){
+        cnt++;
+      }else if( hit[i]>1 ){
+        checkAppendMsg(pCheck,
+          "Multiple uses for byte %d of page %d", i, iPage);
+        break;
+      }
+    }
+    /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments
+    ** is stored in the fifth field of the b-tree page header.
+    ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the
+    ** number of fragmented free bytes within the cell content area.
+    */
+    if( cnt!=data[hdr+7] ){
+      checkAppendMsg(pCheck,
+          "Fragmentation of %d bytes reported as %d on page %d",
+          cnt, data[hdr+7], iPage);
+    }
+  }
+  sqlite3PageFree(hit);
+  releasePage(pPage);
+
+end_of_check:
+  pCheck->zPfx = saved_zPfx;
+  pCheck->v1 = saved_v1;
+  pCheck->v2 = saved_v2;
+  return depth+1;
+}
+#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
+
+#ifndef SQLITE_OMIT_INTEGRITY_CHECK
+/*
+** This routine does a complete check of the given BTree file.  aRoot[] is
+** an array of pages numbers were each page number is the root page of
+** a table.  nRoot is the number of entries in aRoot.
+**
+** A read-only or read-write transaction must be opened before calling
+** this function.
+**
+** Write the number of error seen in *pnErr.  Except for some memory
+** allocation errors,  an error message held in memory obtained from
+** malloc is returned if *pnErr is non-zero.  If *pnErr==0 then NULL is
+** returned.  If a memory allocation error occurs, NULL is returned.
+*/
+SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck(
+  Btree *p,     /* The btree to be checked */
+  int *aRoot,   /* An array of root pages numbers for individual trees */
+  int nRoot,    /* Number of entries in aRoot[] */
+  int mxErr,    /* Stop reporting errors after this many */
+  int *pnErr    /* Write number of errors seen to this variable */
+){
+  Pgno i;
+  int nRef;
+  IntegrityCk sCheck;
+  BtShared *pBt = p->pBt;
+  char zErr[100];
+
+  sqlite3BtreeEnter(p);
+  assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE );
+  nRef = sqlite3PagerRefcount(pBt->pPager);
+  sCheck.pBt = pBt;
+  sCheck.pPager = pBt->pPager;
+  sCheck.nPage = btreePagecount(sCheck.pBt);
+  sCheck.mxErr = mxErr;
+  sCheck.nErr = 0;
+  sCheck.mallocFailed = 0;
+  sCheck.zPfx = 0;
+  sCheck.v1 = 0;
+  sCheck.v2 = 0;
+  *pnErr = 0;
+  if( sCheck.nPage==0 ){
+    sqlite3BtreeLeave(p);
+    return 0;
+  }
+
+  sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1);
+  if( !sCheck.aPgRef ){
+    *pnErr = 1;
+    sqlite3BtreeLeave(p);
+    return 0;
+  }
+  i = PENDING_BYTE_PAGE(pBt);
+  if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i);
+  sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), SQLITE_MAX_LENGTH);
+  sCheck.errMsg.useMalloc = 2;
+
+  /* Check the integrity of the freelist
+  */
+  sCheck.zPfx = "Main freelist: ";
+  checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
+            get4byte(&pBt->pPage1->aData[36]));
+  sCheck.zPfx = 0;
+
+  /* Check all the tables.
+  */
+  for(i=0; (int)i<nRoot && sCheck.mxErr; i++){
+    if( aRoot[i]==0 ) continue;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( pBt->autoVacuum && aRoot[i]>1 ){
+      checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0);
+    }
+#endif
+    sCheck.zPfx = "List of tree roots: ";
+    checkTreePage(&sCheck, aRoot[i], NULL, NULL);
+    sCheck.zPfx = 0;
+  }
+
+  /* Make sure every page in the file is referenced
+  */
+  for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){
+#ifdef SQLITE_OMIT_AUTOVACUUM
+    if( getPageReferenced(&sCheck, i)==0 ){
+      checkAppendMsg(&sCheck, "Page %d is never used", i);
+    }
+#else
+    /* If the database supports auto-vacuum, make sure no tables contain
+    ** references to pointer-map pages.
+    */
+    if( getPageReferenced(&sCheck, i)==0 && 
+       (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
+      checkAppendMsg(&sCheck, "Page %d is never used", i);
+    }
+    if( getPageReferenced(&sCheck, i)!=0 && 
+       (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
+      checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i);
+    }
+#endif
+  }
+
+  /* Make sure this analysis did not leave any unref() pages.
+  ** This is an internal consistency check; an integrity check
+  ** of the integrity check.
+  */
+  if( NEVER(nRef != sqlite3PagerRefcount(pBt->pPager)) ){
+    checkAppendMsg(&sCheck,
+      "Outstanding page count goes from %d to %d during this analysis",
+      nRef, sqlite3PagerRefcount(pBt->pPager)
+    );
+  }
+
+  /* Clean  up and report errors.
+  */
+  sqlite3BtreeLeave(p);
+  sqlite3_free(sCheck.aPgRef);
+  if( sCheck.mallocFailed ){
+    sqlite3StrAccumReset(&sCheck.errMsg);
+    *pnErr = sCheck.nErr+1;
+    return 0;
+  }
+  *pnErr = sCheck.nErr;
+  if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg);
+  return sqlite3StrAccumFinish(&sCheck.errMsg);
+}
+#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
+
+/*
+** Return the full pathname of the underlying database file.  Return
+** an empty string if the database is in-memory or a TEMP database.
+**
+** The pager filename is invariant as long as the pager is
+** open so it is safe to access without the BtShared mutex.
+*/
+SQLITE_PRIVATE const char *sqlite3BtreeGetFilename(Btree *p){
+  assert( p->pBt->pPager!=0 );
+  return sqlite3PagerFilename(p->pBt->pPager, 1);
+}
+
+/*
+** Return the pathname of the journal file for this database. The return
+** value of this routine is the same regardless of whether the journal file
+** has been created or not.
+**
+** The pager journal filename is invariant as long as the pager is
+** open so it is safe to access without the BtShared mutex.
+*/
+SQLITE_PRIVATE const char *sqlite3BtreeGetJournalname(Btree *p){
+  assert( p->pBt->pPager!=0 );
+  return sqlite3PagerJournalname(p->pBt->pPager);
+}
+
+/*
+** Return non-zero if a transaction is active.
+*/
+SQLITE_PRIVATE int sqlite3BtreeIsInTrans(Btree *p){
+  assert( p==0 || sqlite3_mutex_held(p->db->mutex) );
+  return (p && (p->inTrans==TRANS_WRITE));
+}
+
+#ifndef SQLITE_OMIT_WAL
+/*
+** Run a checkpoint on the Btree passed as the first argument.
+**
+** Return SQLITE_LOCKED if this or any other connection has an open 
+** transaction on the shared-cache the argument Btree is connected to.
+**
+** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){
+  int rc = SQLITE_OK;
+  if( p ){
+    BtShared *pBt = p->pBt;
+    sqlite3BtreeEnter(p);
+    if( pBt->inTransaction!=TRANS_NONE ){
+      rc = SQLITE_LOCKED;
+    }else{
+      rc = sqlite3PagerCheckpoint(pBt->pPager, eMode, pnLog, pnCkpt);
+    }
+    sqlite3BtreeLeave(p);
+  }
+  return rc;
+}
+#endif
+
+/*
+** Return non-zero if a read (or write) transaction is active.
+*/
+SQLITE_PRIVATE int sqlite3BtreeIsInReadTrans(Btree *p){
+  assert( p );
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  return p->inTrans!=TRANS_NONE;
+}
+
+SQLITE_PRIVATE int sqlite3BtreeIsInBackup(Btree *p){
+  assert( p );
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  return p->nBackup!=0;
+}
+
+/*
+** This function returns a pointer to a blob of memory associated with
+** a single shared-btree. The memory is used by client code for its own
+** purposes (for example, to store a high-level schema associated with 
+** the shared-btree). The btree layer manages reference counting issues.
+**
+** The first time this is called on a shared-btree, nBytes bytes of memory
+** are allocated, zeroed, and returned to the caller. For each subsequent 
+** call the nBytes parameter is ignored and a pointer to the same blob
+** of memory returned. 
+**
+** If the nBytes parameter is 0 and the blob of memory has not yet been
+** allocated, a null pointer is returned. If the blob has already been
+** allocated, it is returned as normal.
+**
+** Just before the shared-btree is closed, the function passed as the 
+** xFree argument when the memory allocation was made is invoked on the 
+** blob of allocated memory. The xFree function should not call sqlite3_free()
+** on the memory, the btree layer does that.
+*/
+SQLITE_PRIVATE void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){
+  BtShared *pBt = p->pBt;
+  sqlite3BtreeEnter(p);
+  if( !pBt->pSchema && nBytes ){
+    pBt->pSchema = sqlite3DbMallocZero(0, nBytes);
+    pBt->xFreeSchema = xFree;
+  }
+  sqlite3BtreeLeave(p);
+  return pBt->pSchema;
+}
+
+/*
+** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared 
+** btree as the argument handle holds an exclusive lock on the 
+** sqlite_master table. Otherwise SQLITE_OK.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSchemaLocked(Btree *p){
+  int rc;
+  assert( sqlite3_mutex_held(p->db->mutex) );
+  sqlite3BtreeEnter(p);
+  rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK);
+  assert( rc==SQLITE_OK || rc==SQLITE_LOCKED_SHAREDCACHE );
+  sqlite3BtreeLeave(p);
+  return rc;
+}
+
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+/*
+** Obtain a lock on the table whose root page is iTab.  The
+** lock is a write lock if isWritelock is true or a read lock
+** if it is false.
+*/
+SQLITE_PRIVATE int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){
+  int rc = SQLITE_OK;
+  assert( p->inTrans!=TRANS_NONE );
+  if( p->sharable ){
+    u8 lockType = READ_LOCK + isWriteLock;
+    assert( READ_LOCK+1==WRITE_LOCK );
+    assert( isWriteLock==0 || isWriteLock==1 );
+
+    sqlite3BtreeEnter(p);
+    rc = querySharedCacheTableLock(p, iTab, lockType);
+    if( rc==SQLITE_OK ){
+      rc = setSharedCacheTableLock(p, iTab, lockType);
+    }
+    sqlite3BtreeLeave(p);
+  }
+  return rc;
+}
+#endif
+
+#ifndef SQLITE_OMIT_INCRBLOB
+/*
+** Argument pCsr must be a cursor opened for writing on an 
+** INTKEY table currently pointing at a valid table entry. 
+** This function modifies the data stored as part of that entry.
+**
+** Only the data content may only be modified, it is not possible to 
+** change the length of the data stored. If this function is called with
+** parameters that attempt to write past the end of the existing data,
+** no modifications are made and SQLITE_CORRUPT is returned.
+*/
+SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
+  int rc;
+  assert( cursorHoldsMutex(pCsr) );
+  assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) );
+  assert( pCsr->curFlags & BTCF_Incrblob );
+
+  rc = restoreCursorPosition(pCsr);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+  assert( pCsr->eState!=CURSOR_REQUIRESEEK );
+  if( pCsr->eState!=CURSOR_VALID ){
+    return SQLITE_ABORT;
+  }
+
+  /* Save the positions of all other cursors open on this table. This is
+  ** required in case any of them are holding references to an xFetch
+  ** version of the b-tree page modified by the accessPayload call below.
+  **
+  ** Note that pCsr must be open on a INTKEY table and saveCursorPosition()
+  ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence
+  ** saveAllCursors can only return SQLITE_OK.
+  */
+  VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr);
+  assert( rc==SQLITE_OK );
+
+  /* Check some assumptions: 
+  **   (a) the cursor is open for writing,
+  **   (b) there is a read/write transaction open,
+  **   (c) the connection holds a write-lock on the table (if required),
+  **   (d) there are no conflicting read-locks, and
+  **   (e) the cursor points at a valid row of an intKey table.
+  */
+  if( (pCsr->curFlags & BTCF_WriteFlag)==0 ){
+    return SQLITE_READONLY;
+  }
+  assert( (pCsr->pBt->btsFlags & BTS_READ_ONLY)==0
+              && pCsr->pBt->inTransaction==TRANS_WRITE );
+  assert( hasSharedCacheTableLock(pCsr->pBtree, pCsr->pgnoRoot, 0, 2) );
+  assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) );
+  assert( pCsr->apPage[pCsr->iPage]->intKey );
+
+  return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1);
+}
+
+/* 
+** Mark this cursor as an incremental blob cursor.
+*/
+SQLITE_PRIVATE void sqlite3BtreeIncrblobCursor(BtCursor *pCur){
+  pCur->curFlags |= BTCF_Incrblob;
+}
+#endif
+
+/*
+** Set both the "read version" (single byte at byte offset 18) and 
+** "write version" (single byte at byte offset 19) fields in the database
+** header to iVersion.
+*/
+SQLITE_PRIVATE int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){
+  BtShared *pBt = pBtree->pBt;
+  int rc;                         /* Return code */
+ 
+  assert( iVersion==1 || iVersion==2 );
+
+  /* If setting the version fields to 1, do not automatically open the
+  ** WAL connection, even if the version fields are currently set to 2.
+  */
+  pBt->btsFlags &= ~BTS_NO_WAL;
+  if( iVersion==1 ) pBt->btsFlags |= BTS_NO_WAL;
+
+  rc = sqlite3BtreeBeginTrans(pBtree, 0);
+  if( rc==SQLITE_OK ){
+    u8 *aData = pBt->pPage1->aData;
+    if( aData[18]!=(u8)iVersion || aData[19]!=(u8)iVersion ){
+      rc = sqlite3BtreeBeginTrans(pBtree, 2);
+      if( rc==SQLITE_OK ){
+        rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
+        if( rc==SQLITE_OK ){
+          aData[18] = (u8)iVersion;
+          aData[19] = (u8)iVersion;
+        }
+      }
+    }
+  }
+
+  pBt->btsFlags &= ~BTS_NO_WAL;
+  return rc;
+}
+
+/*
+** set the mask of hint flags for cursor pCsr. Currently the only valid
+** values are 0 and BTREE_BULKLOAD.
+*/
+SQLITE_PRIVATE void sqlite3BtreeCursorHints(BtCursor *pCsr, unsigned int mask){
+  assert( mask==BTREE_BULKLOAD || mask==0 );
+  pCsr->hints = mask;
+}
+
+/*
+** Return true if the given Btree is read-only.
+*/
+SQLITE_PRIVATE int sqlite3BtreeIsReadonly(Btree *p){
+  return (p->pBt->btsFlags & BTS_READ_ONLY)!=0;
+}
+
+/*
+** Return the size of the header added to each page by this module.
+*/
+SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); }
+
+/************** End of btree.c ***********************************************/
+/************** Begin file backup.c ******************************************/
+/*
+** 2009 January 28
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the implementation of the sqlite3_backup_XXX() 
+** API functions and the related features.
+*/
+
+/*
+** Structure allocated for each backup operation.
+*/
+struct sqlite3_backup {
+  sqlite3* pDestDb;        /* Destination database handle */
+  Btree *pDest;            /* Destination b-tree file */
+  u32 iDestSchema;         /* Original schema cookie in destination */
+  int bDestLocked;         /* True once a write-transaction is open on pDest */
+
+  Pgno iNext;              /* Page number of the next source page to copy */
+  sqlite3* pSrcDb;         /* Source database handle */
+  Btree *pSrc;             /* Source b-tree file */
+
+  int rc;                  /* Backup process error code */
+
+  /* These two variables are set by every call to backup_step(). They are
+  ** read by calls to backup_remaining() and backup_pagecount().
+  */
+  Pgno nRemaining;         /* Number of pages left to copy */
+  Pgno nPagecount;         /* Total number of pages to copy */
+
+  int isAttached;          /* True once backup has been registered with pager */
+  sqlite3_backup *pNext;   /* Next backup associated with source pager */
+};
+
+/*
+** THREAD SAFETY NOTES:
+**
+**   Once it has been created using backup_init(), a single sqlite3_backup
+**   structure may be accessed via two groups of thread-safe entry points:
+**
+**     * Via the sqlite3_backup_XXX() API function backup_step() and 
+**       backup_finish(). Both these functions obtain the source database
+**       handle mutex and the mutex associated with the source BtShared 
+**       structure, in that order.
+**
+**     * Via the BackupUpdate() and BackupRestart() functions, which are
+**       invoked by the pager layer to report various state changes in
+**       the page cache associated with the source database. The mutex
+**       associated with the source database BtShared structure will always 
+**       be held when either of these functions are invoked.
+**
+**   The other sqlite3_backup_XXX() API functions, backup_remaining() and
+**   backup_pagecount() are not thread-safe functions. If they are called
+**   while some other thread is calling backup_step() or backup_finish(),
+**   the values returned may be invalid. There is no way for a call to
+**   BackupUpdate() or BackupRestart() to interfere with backup_remaining()
+**   or backup_pagecount().
+**
+**   Depending on the SQLite configuration, the database handles and/or
+**   the Btree objects may have their own mutexes that require locking.
+**   Non-sharable Btrees (in-memory databases for example), do not have
+**   associated mutexes.
+*/
+
+/*
+** Return a pointer corresponding to database zDb (i.e. "main", "temp")
+** in connection handle pDb. If such a database cannot be found, return
+** a NULL pointer and write an error message to pErrorDb.
+**
+** If the "temp" database is requested, it may need to be opened by this 
+** function. If an error occurs while doing so, return 0 and write an 
+** error message to pErrorDb.
+*/
+static Btree *findBtree(sqlite3 *pErrorDb, sqlite3 *pDb, const char *zDb){
+  int i = sqlite3FindDbName(pDb, zDb);
+
+  if( i==1 ){
+    Parse *pParse;
+    int rc = 0;
+    pParse = sqlite3StackAllocZero(pErrorDb, sizeof(*pParse));
+    if( pParse==0 ){
+      sqlite3ErrorWithMsg(pErrorDb, SQLITE_NOMEM, "out of memory");
+      rc = SQLITE_NOMEM;
+    }else{
+      pParse->db = pDb;
+      if( sqlite3OpenTempDatabase(pParse) ){
+        sqlite3ErrorWithMsg(pErrorDb, pParse->rc, "%s", pParse->zErrMsg);
+        rc = SQLITE_ERROR;
+      }
+      sqlite3DbFree(pErrorDb, pParse->zErrMsg);
+      sqlite3ParserReset(pParse);
+      sqlite3StackFree(pErrorDb, pParse);
+    }
+    if( rc ){
+      return 0;
+    }
+  }
+
+  if( i<0 ){
+    sqlite3ErrorWithMsg(pErrorDb, SQLITE_ERROR, "unknown database %s", zDb);
+    return 0;
+  }
+
+  return pDb->aDb[i].pBt;
+}
+
+/*
+** Attempt to set the page size of the destination to match the page size
+** of the source.
+*/
+static int setDestPgsz(sqlite3_backup *p){
+  int rc;
+  rc = sqlite3BtreeSetPageSize(p->pDest,sqlite3BtreeGetPageSize(p->pSrc),-1,0);
+  return rc;
+}
+
+/*
+** Check that there is no open read-transaction on the b-tree passed as the
+** second argument. If there is not, return SQLITE_OK. Otherwise, if there
+** is an open read-transaction, return SQLITE_ERROR and leave an error 
+** message in database handle db.
+*/
+static int checkReadTransaction(sqlite3 *db, Btree *p){
+  if( sqlite3BtreeIsInReadTrans(p) ){
+    sqlite3ErrorWithMsg(db, SQLITE_ERROR, "destination database is in use");
+    return SQLITE_ERROR;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Create an sqlite3_backup process to copy the contents of zSrcDb from
+** connection handle pSrcDb to zDestDb in pDestDb. If successful, return
+** a pointer to the new sqlite3_backup object.
+**
+** If an error occurs, NULL is returned and an error code and error message
+** stored in database handle pDestDb.
+*/
+SQLITE_API sqlite3_backup *sqlite3_backup_init(
+  sqlite3* pDestDb,                     /* Database to write to */
+  const char *zDestDb,                  /* Name of database within pDestDb */
+  sqlite3* pSrcDb,                      /* Database connection to read from */
+  const char *zSrcDb                    /* Name of database within pSrcDb */
+){
+  sqlite3_backup *p;                    /* Value to return */
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(pSrcDb)||!sqlite3SafetyCheckOk(pDestDb) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+
+  /* Lock the source database handle. The destination database
+  ** handle is not locked in this routine, but it is locked in
+  ** sqlite3_backup_step(). The user is required to ensure that no
+  ** other thread accesses the destination handle for the duration
+  ** of the backup operation.  Any attempt to use the destination
+  ** database connection while a backup is in progress may cause
+  ** a malfunction or a deadlock.
+  */
+  sqlite3_mutex_enter(pSrcDb->mutex);
+  sqlite3_mutex_enter(pDestDb->mutex);
+
+  if( pSrcDb==pDestDb ){
+    sqlite3ErrorWithMsg(
+        pDestDb, SQLITE_ERROR, "source and destination must be distinct"
+    );
+    p = 0;
+  }else {
+    /* Allocate space for a new sqlite3_backup object...
+    ** EVIDENCE-OF: R-64852-21591 The sqlite3_backup object is created by a
+    ** call to sqlite3_backup_init() and is destroyed by a call to
+    ** sqlite3_backup_finish(). */
+    p = (sqlite3_backup *)sqlite3MallocZero(sizeof(sqlite3_backup));
+    if( !p ){
+      sqlite3Error(pDestDb, SQLITE_NOMEM);
+    }
+  }
+
+  /* If the allocation succeeded, populate the new object. */
+  if( p ){
+    p->pSrc = findBtree(pDestDb, pSrcDb, zSrcDb);
+    p->pDest = findBtree(pDestDb, pDestDb, zDestDb);
+    p->pDestDb = pDestDb;
+    p->pSrcDb = pSrcDb;
+    p->iNext = 1;
+    p->isAttached = 0;
+
+    if( 0==p->pSrc || 0==p->pDest 
+     || setDestPgsz(p)==SQLITE_NOMEM 
+     || checkReadTransaction(pDestDb, p->pDest)!=SQLITE_OK 
+     ){
+      /* One (or both) of the named databases did not exist or an OOM
+      ** error was hit. Or there is a transaction open on the destination
+      ** database. The error has already been written into the pDestDb 
+      ** handle. All that is left to do here is free the sqlite3_backup 
+      ** structure.  */
+      sqlite3_free(p);
+      p = 0;
+    }
+  }
+  if( p ){
+    p->pSrc->nBackup++;
+  }
+
+  sqlite3_mutex_leave(pDestDb->mutex);
+  sqlite3_mutex_leave(pSrcDb->mutex);
+  return p;
+}
+
+/*
+** Argument rc is an SQLite error code. Return true if this error is 
+** considered fatal if encountered during a backup operation. All errors
+** are considered fatal except for SQLITE_BUSY and SQLITE_LOCKED.
+*/
+static int isFatalError(int rc){
+  return (rc!=SQLITE_OK && rc!=SQLITE_BUSY && ALWAYS(rc!=SQLITE_LOCKED));
+}
+
+/*
+** Parameter zSrcData points to a buffer containing the data for 
+** page iSrcPg from the source database. Copy this data into the 
+** destination database.
+*/
+static int backupOnePage(
+  sqlite3_backup *p,              /* Backup handle */
+  Pgno iSrcPg,                    /* Source database page to backup */
+  const u8 *zSrcData,             /* Source database page data */
+  int bUpdate                     /* True for an update, false otherwise */
+){
+  Pager * const pDestPager = sqlite3BtreePager(p->pDest);
+  const int nSrcPgsz = sqlite3BtreeGetPageSize(p->pSrc);
+  int nDestPgsz = sqlite3BtreeGetPageSize(p->pDest);
+  const int nCopy = MIN(nSrcPgsz, nDestPgsz);
+  const i64 iEnd = (i64)iSrcPg*(i64)nSrcPgsz;
+#ifdef SQLITE_HAS_CODEC
+  /* Use BtreeGetReserveNoMutex() for the source b-tree, as although it is
+  ** guaranteed that the shared-mutex is held by this thread, handle
+  ** p->pSrc may not actually be the owner.  */
+  int nSrcReserve = sqlite3BtreeGetReserveNoMutex(p->pSrc);
+  int nDestReserve = sqlite3BtreeGetReserve(p->pDest);
+#endif
+  int rc = SQLITE_OK;
+  i64 iOff;
+
+  assert( sqlite3BtreeGetReserveNoMutex(p->pSrc)>=0 );
+  assert( p->bDestLocked );
+  assert( !isFatalError(p->rc) );
+  assert( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) );
+  assert( zSrcData );
+
+  /* Catch the case where the destination is an in-memory database and the
+  ** page sizes of the source and destination differ. 
+  */
+  if( nSrcPgsz!=nDestPgsz && sqlite3PagerIsMemdb(pDestPager) ){
+    rc = SQLITE_READONLY;
+  }
+
+#ifdef SQLITE_HAS_CODEC
+  /* Backup is not possible if the page size of the destination is changing
+  ** and a codec is in use.
+  */
+  if( nSrcPgsz!=nDestPgsz && sqlite3PagerGetCodec(pDestPager)!=0 ){
+    rc = SQLITE_READONLY;
+  }
+
+  /* Backup is not possible if the number of bytes of reserve space differ
+  ** between source and destination.  If there is a difference, try to
+  ** fix the destination to agree with the source.  If that is not possible,
+  ** then the backup cannot proceed.
+  */
+  if( nSrcReserve!=nDestReserve ){
+    u32 newPgsz = nSrcPgsz;
+    rc = sqlite3PagerSetPagesize(pDestPager, &newPgsz, nSrcReserve);
+    if( rc==SQLITE_OK && newPgsz!=nSrcPgsz ) rc = SQLITE_READONLY;
+  }
+#endif
+
+  /* This loop runs once for each destination page spanned by the source 
+  ** page. For each iteration, variable iOff is set to the byte offset
+  ** of the destination page.
+  */
+  for(iOff=iEnd-(i64)nSrcPgsz; rc==SQLITE_OK && iOff<iEnd; iOff+=nDestPgsz){
+    DbPage *pDestPg = 0;
+    Pgno iDest = (Pgno)(iOff/nDestPgsz)+1;
+    if( iDest==PENDING_BYTE_PAGE(p->pDest->pBt) ) continue;
+    if( SQLITE_OK==(rc = sqlite3PagerGet(pDestPager, iDest, &pDestPg))
+     && SQLITE_OK==(rc = sqlite3PagerWrite(pDestPg))
+    ){
+      const u8 *zIn = &zSrcData[iOff%nSrcPgsz];
+      u8 *zDestData = sqlite3PagerGetData(pDestPg);
+      u8 *zOut = &zDestData[iOff%nDestPgsz];
+
+      /* Copy the data from the source page into the destination page.
+      ** Then clear the Btree layer MemPage.isInit flag. Both this module
+      ** and the pager code use this trick (clearing the first byte
+      ** of the page 'extra' space to invalidate the Btree layers
+      ** cached parse of the page). MemPage.isInit is marked 
+      ** "MUST BE FIRST" for this purpose.
+      */
+      memcpy(zOut, zIn, nCopy);
+      ((u8 *)sqlite3PagerGetExtra(pDestPg))[0] = 0;
+      if( iOff==0 && bUpdate==0 ){
+        sqlite3Put4byte(&zOut[28], sqlite3BtreeLastPage(p->pSrc));
+      }
+    }
+    sqlite3PagerUnref(pDestPg);
+  }
+
+  return rc;
+}
+
+/*
+** If pFile is currently larger than iSize bytes, then truncate it to
+** exactly iSize bytes. If pFile is not larger than iSize bytes, then
+** this function is a no-op.
+**
+** Return SQLITE_OK if everything is successful, or an SQLite error 
+** code if an error occurs.
+*/
+static int backupTruncateFile(sqlite3_file *pFile, i64 iSize){
+  i64 iCurrent;
+  int rc = sqlite3OsFileSize(pFile, &iCurrent);
+  if( rc==SQLITE_OK && iCurrent>iSize ){
+    rc = sqlite3OsTruncate(pFile, iSize);
+  }
+  return rc;
+}
+
+/*
+** Register this backup object with the associated source pager for
+** callbacks when pages are changed or the cache invalidated.
+*/
+static void attachBackupObject(sqlite3_backup *p){
+  sqlite3_backup **pp;
+  assert( sqlite3BtreeHoldsMutex(p->pSrc) );
+  pp = sqlite3PagerBackupPtr(sqlite3BtreePager(p->pSrc));
+  p->pNext = *pp;
+  *pp = p;
+  p->isAttached = 1;
+}
+
+/*
+** Copy nPage pages from the source b-tree to the destination.
+*/
+SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage){
+  int rc;
+  int destMode;       /* Destination journal mode */
+  int pgszSrc = 0;    /* Source page size */
+  int pgszDest = 0;   /* Destination page size */
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( p==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(p->pSrcDb->mutex);
+  sqlite3BtreeEnter(p->pSrc);
+  if( p->pDestDb ){
+    sqlite3_mutex_enter(p->pDestDb->mutex);
+  }
+
+  rc = p->rc;
+  if( !isFatalError(rc) ){
+    Pager * const pSrcPager = sqlite3BtreePager(p->pSrc);     /* Source pager */
+    Pager * const pDestPager = sqlite3BtreePager(p->pDest);   /* Dest pager */
+    int ii;                            /* Iterator variable */
+    int nSrcPage = -1;                 /* Size of source db in pages */
+    int bCloseTrans = 0;               /* True if src db requires unlocking */
+
+    /* If the source pager is currently in a write-transaction, return
+    ** SQLITE_BUSY immediately.
+    */
+    if( p->pDestDb && p->pSrc->pBt->inTransaction==TRANS_WRITE ){
+      rc = SQLITE_BUSY;
+    }else{
+      rc = SQLITE_OK;
+    }
+
+    /* Lock the destination database, if it is not locked already. */
+    if( SQLITE_OK==rc && p->bDestLocked==0
+     && SQLITE_OK==(rc = sqlite3BtreeBeginTrans(p->pDest, 2)) 
+    ){
+      p->bDestLocked = 1;
+      sqlite3BtreeGetMeta(p->pDest, BTREE_SCHEMA_VERSION, &p->iDestSchema);
+    }
+
+    /* If there is no open read-transaction on the source database, open
+    ** one now. If a transaction is opened here, then it will be closed
+    ** before this function exits.
+    */
+    if( rc==SQLITE_OK && 0==sqlite3BtreeIsInReadTrans(p->pSrc) ){
+      rc = sqlite3BtreeBeginTrans(p->pSrc, 0);
+      bCloseTrans = 1;
+    }
+
+    /* Do not allow backup if the destination database is in WAL mode
+    ** and the page sizes are different between source and destination */
+    pgszSrc = sqlite3BtreeGetPageSize(p->pSrc);
+    pgszDest = sqlite3BtreeGetPageSize(p->pDest);
+    destMode = sqlite3PagerGetJournalMode(sqlite3BtreePager(p->pDest));
+    if( SQLITE_OK==rc && destMode==PAGER_JOURNALMODE_WAL && pgszSrc!=pgszDest ){
+      rc = SQLITE_READONLY;
+    }
+  
+    /* Now that there is a read-lock on the source database, query the
+    ** source pager for the number of pages in the database.
+    */
+    nSrcPage = (int)sqlite3BtreeLastPage(p->pSrc);
+    assert( nSrcPage>=0 );
+    for(ii=0; (nPage<0 || ii<nPage) && p->iNext<=(Pgno)nSrcPage && !rc; ii++){
+      const Pgno iSrcPg = p->iNext;                 /* Source page number */
+      if( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ){
+        DbPage *pSrcPg;                             /* Source page object */
+        rc = sqlite3PagerAcquire(pSrcPager, iSrcPg, &pSrcPg,
+                                 PAGER_GET_READONLY);
+        if( rc==SQLITE_OK ){
+          rc = backupOnePage(p, iSrcPg, sqlite3PagerGetData(pSrcPg), 0);
+          sqlite3PagerUnref(pSrcPg);
+        }
+      }
+      p->iNext++;
+    }
+    if( rc==SQLITE_OK ){
+      p->nPagecount = nSrcPage;
+      p->nRemaining = nSrcPage+1-p->iNext;
+      if( p->iNext>(Pgno)nSrcPage ){
+        rc = SQLITE_DONE;
+      }else if( !p->isAttached ){
+        attachBackupObject(p);
+      }
+    }
+  
+    /* Update the schema version field in the destination database. This
+    ** is to make sure that the schema-version really does change in
+    ** the case where the source and destination databases have the
+    ** same schema version.
+    */
+    if( rc==SQLITE_DONE ){
+      if( nSrcPage==0 ){
+        rc = sqlite3BtreeNewDb(p->pDest);
+        nSrcPage = 1;
+      }
+      if( rc==SQLITE_OK || rc==SQLITE_DONE ){
+        rc = sqlite3BtreeUpdateMeta(p->pDest,1,p->iDestSchema+1);
+      }
+      if( rc==SQLITE_OK ){
+        if( p->pDestDb ){
+          sqlite3ResetAllSchemasOfConnection(p->pDestDb);
+        }
+        if( destMode==PAGER_JOURNALMODE_WAL ){
+          rc = sqlite3BtreeSetVersion(p->pDest, 2);
+        }
+      }
+      if( rc==SQLITE_OK ){
+        int nDestTruncate;
+        /* Set nDestTruncate to the final number of pages in the destination
+        ** database. The complication here is that the destination page
+        ** size may be different to the source page size. 
+        **
+        ** If the source page size is smaller than the destination page size, 
+        ** round up. In this case the call to sqlite3OsTruncate() below will
+        ** fix the size of the file. However it is important to call
+        ** sqlite3PagerTruncateImage() here so that any pages in the 
+        ** destination file that lie beyond the nDestTruncate page mark are
+        ** journalled by PagerCommitPhaseOne() before they are destroyed
+        ** by the file truncation.
+        */
+        assert( pgszSrc==sqlite3BtreeGetPageSize(p->pSrc) );
+        assert( pgszDest==sqlite3BtreeGetPageSize(p->pDest) );
+        if( pgszSrc<pgszDest ){
+          int ratio = pgszDest/pgszSrc;
+          nDestTruncate = (nSrcPage+ratio-1)/ratio;
+          if( nDestTruncate==(int)PENDING_BYTE_PAGE(p->pDest->pBt) ){
+            nDestTruncate--;
+          }
+        }else{
+          nDestTruncate = nSrcPage * (pgszSrc/pgszDest);
+        }
+        assert( nDestTruncate>0 );
+
+        if( pgszSrc<pgszDest ){
+          /* If the source page-size is smaller than the destination page-size,
+          ** two extra things may need to happen:
+          **
+          **   * The destination may need to be truncated, and
+          **
+          **   * Data stored on the pages immediately following the 
+          **     pending-byte page in the source database may need to be
+          **     copied into the destination database.
+          */
+          const i64 iSize = (i64)pgszSrc * (i64)nSrcPage;
+          sqlite3_file * const pFile = sqlite3PagerFile(pDestPager);
+          Pgno iPg;
+          int nDstPage;
+          i64 iOff;
+          i64 iEnd;
+
+          assert( pFile );
+          assert( nDestTruncate==0 
+              || (i64)nDestTruncate*(i64)pgszDest >= iSize || (
+                nDestTruncate==(int)(PENDING_BYTE_PAGE(p->pDest->pBt)-1)
+             && iSize>=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest
+          ));
+
+          /* This block ensures that all data required to recreate the original
+          ** database has been stored in the journal for pDestPager and the
+          ** journal synced to disk. So at this point we may safely modify
+          ** the database file in any way, knowing that if a power failure
+          ** occurs, the original database will be reconstructed from the 
+          ** journal file.  */
+          sqlite3PagerPagecount(pDestPager, &nDstPage);
+          for(iPg=nDestTruncate; rc==SQLITE_OK && iPg<=(Pgno)nDstPage; iPg++){
+            if( iPg!=PENDING_BYTE_PAGE(p->pDest->pBt) ){
+              DbPage *pPg;
+              rc = sqlite3PagerGet(pDestPager, iPg, &pPg);
+              if( rc==SQLITE_OK ){
+                rc = sqlite3PagerWrite(pPg);
+                sqlite3PagerUnref(pPg);
+              }
+            }
+          }
+          if( rc==SQLITE_OK ){
+            rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1);
+          }
+
+          /* Write the extra pages and truncate the database file as required */
+          iEnd = MIN(PENDING_BYTE + pgszDest, iSize);
+          for(
+            iOff=PENDING_BYTE+pgszSrc; 
+            rc==SQLITE_OK && iOff<iEnd; 
+            iOff+=pgszSrc
+          ){
+            PgHdr *pSrcPg = 0;
+            const Pgno iSrcPg = (Pgno)((iOff/pgszSrc)+1);
+            rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg);
+            if( rc==SQLITE_OK ){
+              u8 *zData = sqlite3PagerGetData(pSrcPg);
+              rc = sqlite3OsWrite(pFile, zData, pgszSrc, iOff);
+            }
+            sqlite3PagerUnref(pSrcPg);
+          }
+          if( rc==SQLITE_OK ){
+            rc = backupTruncateFile(pFile, iSize);
+          }
+
+          /* Sync the database file to disk. */
+          if( rc==SQLITE_OK ){
+            rc = sqlite3PagerSync(pDestPager, 0);
+          }
+        }else{
+          sqlite3PagerTruncateImage(pDestPager, nDestTruncate);
+          rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0);
+        }
+    
+        /* Finish committing the transaction to the destination database. */
+        if( SQLITE_OK==rc
+         && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest, 0))
+        ){
+          rc = SQLITE_DONE;
+        }
+      }
+    }
+  
+    /* If bCloseTrans is true, then this function opened a read transaction
+    ** on the source database. Close the read transaction here. There is
+    ** no need to check the return values of the btree methods here, as
+    ** "committing" a read-only transaction cannot fail.
+    */
+    if( bCloseTrans ){
+      TESTONLY( int rc2 );
+      TESTONLY( rc2  = ) sqlite3BtreeCommitPhaseOne(p->pSrc, 0);
+      TESTONLY( rc2 |= ) sqlite3BtreeCommitPhaseTwo(p->pSrc, 0);
+      assert( rc2==SQLITE_OK );
+    }
+  
+    if( rc==SQLITE_IOERR_NOMEM ){
+      rc = SQLITE_NOMEM;
+    }
+    p->rc = rc;
+  }
+  if( p->pDestDb ){
+    sqlite3_mutex_leave(p->pDestDb->mutex);
+  }
+  sqlite3BtreeLeave(p->pSrc);
+  sqlite3_mutex_leave(p->pSrcDb->mutex);
+  return rc;
+}
+
+/*
+** Release all resources associated with an sqlite3_backup* handle.
+*/
+SQLITE_API int sqlite3_backup_finish(sqlite3_backup *p){
+  sqlite3_backup **pp;                 /* Ptr to head of pagers backup list */
+  sqlite3 *pSrcDb;                     /* Source database connection */
+  int rc;                              /* Value to return */
+
+  /* Enter the mutexes */
+  if( p==0 ) return SQLITE_OK;
+  pSrcDb = p->pSrcDb;
+  sqlite3_mutex_enter(pSrcDb->mutex);
+  sqlite3BtreeEnter(p->pSrc);
+  if( p->pDestDb ){
+    sqlite3_mutex_enter(p->pDestDb->mutex);
+  }
+
+  /* Detach this backup from the source pager. */
+  if( p->pDestDb ){
+    p->pSrc->nBackup--;
+  }
+  if( p->isAttached ){
+    pp = sqlite3PagerBackupPtr(sqlite3BtreePager(p->pSrc));
+    while( *pp!=p ){
+      pp = &(*pp)->pNext;
+    }
+    *pp = p->pNext;
+  }
+
+  /* If a transaction is still open on the Btree, roll it back. */
+  sqlite3BtreeRollback(p->pDest, SQLITE_OK, 0);
+
+  /* Set the error code of the destination database handle. */
+  rc = (p->rc==SQLITE_DONE) ? SQLITE_OK : p->rc;
+  if( p->pDestDb ){
+    sqlite3Error(p->pDestDb, rc);
+
+    /* Exit the mutexes and free the backup context structure. */
+    sqlite3LeaveMutexAndCloseZombie(p->pDestDb);
+  }
+  sqlite3BtreeLeave(p->pSrc);
+  if( p->pDestDb ){
+    /* EVIDENCE-OF: R-64852-21591 The sqlite3_backup object is created by a
+    ** call to sqlite3_backup_init() and is destroyed by a call to
+    ** sqlite3_backup_finish(). */
+    sqlite3_free(p);
+  }
+  sqlite3LeaveMutexAndCloseZombie(pSrcDb);
+  return rc;
+}
+
+/*
+** Return the number of pages still to be backed up as of the most recent
+** call to sqlite3_backup_step().
+*/
+SQLITE_API int sqlite3_backup_remaining(sqlite3_backup *p){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( p==0 ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  return p->nRemaining;
+}
+
+/*
+** Return the total number of pages in the source database as of the most 
+** recent call to sqlite3_backup_step().
+*/
+SQLITE_API int sqlite3_backup_pagecount(sqlite3_backup *p){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( p==0 ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  return p->nPagecount;
+}
+
+/*
+** This function is called after the contents of page iPage of the
+** source database have been modified. If page iPage has already been 
+** copied into the destination database, then the data written to the
+** destination is now invalidated. The destination copy of iPage needs
+** to be updated with the new data before the backup operation is
+** complete.
+**
+** It is assumed that the mutex associated with the BtShared object
+** corresponding to the source database is held when this function is
+** called.
+*/
+SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){
+  sqlite3_backup *p;                   /* Iterator variable */
+  for(p=pBackup; p; p=p->pNext){
+    assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) );
+    if( !isFatalError(p->rc) && iPage<p->iNext ){
+      /* The backup process p has already copied page iPage. But now it
+      ** has been modified by a transaction on the source pager. Copy
+      ** the new data into the backup.
+      */
+      int rc;
+      assert( p->pDestDb );
+      sqlite3_mutex_enter(p->pDestDb->mutex);
+      rc = backupOnePage(p, iPage, aData, 1);
+      sqlite3_mutex_leave(p->pDestDb->mutex);
+      assert( rc!=SQLITE_BUSY && rc!=SQLITE_LOCKED );
+      if( rc!=SQLITE_OK ){
+        p->rc = rc;
+      }
+    }
+  }
+}
+
+/*
+** Restart the backup process. This is called when the pager layer
+** detects that the database has been modified by an external database
+** connection. In this case there is no way of knowing which of the
+** pages that have been copied into the destination database are still 
+** valid and which are not, so the entire process needs to be restarted.
+**
+** It is assumed that the mutex associated with the BtShared object
+** corresponding to the source database is held when this function is
+** called.
+*/
+SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *pBackup){
+  sqlite3_backup *p;                   /* Iterator variable */
+  for(p=pBackup; p; p=p->pNext){
+    assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) );
+    p->iNext = 1;
+  }
+}
+
+#ifndef SQLITE_OMIT_VACUUM
+/*
+** Copy the complete content of pBtFrom into pBtTo.  A transaction
+** must be active for both files.
+**
+** The size of file pTo may be reduced by this operation. If anything 
+** goes wrong, the transaction on pTo is rolled back. If successful, the 
+** transaction is committed before returning.
+*/
+SQLITE_PRIVATE int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
+  int rc;
+  sqlite3_file *pFd;              /* File descriptor for database pTo */
+  sqlite3_backup b;
+  sqlite3BtreeEnter(pTo);
+  sqlite3BtreeEnter(pFrom);
+
+  assert( sqlite3BtreeIsInTrans(pTo) );
+  pFd = sqlite3PagerFile(sqlite3BtreePager(pTo));
+  if( pFd->pMethods ){
+    i64 nByte = sqlite3BtreeGetPageSize(pFrom)*(i64)sqlite3BtreeLastPage(pFrom);
+    rc = sqlite3OsFileControl(pFd, SQLITE_FCNTL_OVERWRITE, &nByte);
+    if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
+    if( rc ) goto copy_finished;
+  }
+
+  /* Set up an sqlite3_backup object. sqlite3_backup.pDestDb must be set
+  ** to 0. This is used by the implementations of sqlite3_backup_step()
+  ** and sqlite3_backup_finish() to detect that they are being called
+  ** from this function, not directly by the user.
+  */
+  memset(&b, 0, sizeof(b));
+  b.pSrcDb = pFrom->db;
+  b.pSrc = pFrom;
+  b.pDest = pTo;
+  b.iNext = 1;
+
+  /* 0x7FFFFFFF is the hard limit for the number of pages in a database
+  ** file. By passing this as the number of pages to copy to
+  ** sqlite3_backup_step(), we can guarantee that the copy finishes 
+  ** within a single call (unless an error occurs). The assert() statement
+  ** checks this assumption - (p->rc) should be set to either SQLITE_DONE 
+  ** or an error code.
+  */
+  sqlite3_backup_step(&b, 0x7FFFFFFF);
+  assert( b.rc!=SQLITE_OK );
+  rc = sqlite3_backup_finish(&b);
+  if( rc==SQLITE_OK ){
+    pTo->pBt->btsFlags &= ~BTS_PAGESIZE_FIXED;
+  }else{
+    sqlite3PagerClearCache(sqlite3BtreePager(b.pDest));
+  }
+
+  assert( sqlite3BtreeIsInTrans(pTo)==0 );
+copy_finished:
+  sqlite3BtreeLeave(pFrom);
+  sqlite3BtreeLeave(pTo);
+  return rc;
+}
+#endif /* SQLITE_OMIT_VACUUM */
+
+/************** End of backup.c **********************************************/
+/************** Begin file vdbemem.c *****************************************/
+/*
+** 2004 May 26
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains code use to manipulate "Mem" structure.  A "Mem"
+** stores a single value in the VDBE.  Mem is an opaque structure visible
+** only within the VDBE.  Interface routines refer to a Mem using the
+** name sqlite_value
+*/
+
+#ifdef SQLITE_DEBUG
+/*
+** Check invariants on a Mem object.
+**
+** This routine is intended for use inside of assert() statements, like
+** this:    assert( sqlite3VdbeCheckMemInvariants(pMem) );
+*/
+SQLITE_PRIVATE int sqlite3VdbeCheckMemInvariants(Mem *p){
+  /* If MEM_Dyn is set then Mem.xDel!=0.  
+  ** Mem.xDel is might not be initialized if MEM_Dyn is clear.
+  */
+  assert( (p->flags & MEM_Dyn)==0 || p->xDel!=0 );
+
+  /* MEM_Dyn may only be set if Mem.szMalloc==0.  In this way we
+  ** ensure that if Mem.szMalloc>0 then it is safe to do
+  ** Mem.z = Mem.zMalloc without having to check Mem.flags&MEM_Dyn.
+  ** That saves a few cycles in inner loops. */
+  assert( (p->flags & MEM_Dyn)==0 || p->szMalloc==0 );
+
+  /* Cannot be both MEM_Int and MEM_Real at the same time */
+  assert( (p->flags & (MEM_Int|MEM_Real))!=(MEM_Int|MEM_Real) );
+
+  /* The szMalloc field holds the correct memory allocation size */
+  assert( p->szMalloc==0
+       || p->szMalloc==sqlite3DbMallocSize(p->db,p->zMalloc) );
+
+  /* If p holds a string or blob, the Mem.z must point to exactly
+  ** one of the following:
+  **
+  **   (1) Memory in Mem.zMalloc and managed by the Mem object
+  **   (2) Memory to be freed using Mem.xDel
+  **   (3) An ephemeral string or blob
+  **   (4) A static string or blob
+  */
+  if( (p->flags & (MEM_Str|MEM_Blob)) && p->n>0 ){
+    assert( 
+      ((p->szMalloc>0 && p->z==p->zMalloc)? 1 : 0) +
+      ((p->flags&MEM_Dyn)!=0 ? 1 : 0) +
+      ((p->flags&MEM_Ephem)!=0 ? 1 : 0) +
+      ((p->flags&MEM_Static)!=0 ? 1 : 0) == 1
+    );
+  }
+  return 1;
+}
+#endif
+
+
+/*
+** If pMem is an object with a valid string representation, this routine
+** ensures the internal encoding for the string representation is
+** 'desiredEnc', one of SQLITE_UTF8, SQLITE_UTF16LE or SQLITE_UTF16BE.
+**
+** If pMem is not a string object, or the encoding of the string
+** representation is already stored using the requested encoding, then this
+** routine is a no-op.
+**
+** SQLITE_OK is returned if the conversion is successful (or not required).
+** SQLITE_NOMEM may be returned if a malloc() fails during conversion
+** between formats.
+*/
+SQLITE_PRIVATE int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){
+#ifndef SQLITE_OMIT_UTF16
+  int rc;
+#endif
+  assert( (pMem->flags&MEM_RowSet)==0 );
+  assert( desiredEnc==SQLITE_UTF8 || desiredEnc==SQLITE_UTF16LE
+           || desiredEnc==SQLITE_UTF16BE );
+  if( !(pMem->flags&MEM_Str) || pMem->enc==desiredEnc ){
+    return SQLITE_OK;
+  }
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+#ifdef SQLITE_OMIT_UTF16
+  return SQLITE_ERROR;
+#else
+
+  /* MemTranslate() may return SQLITE_OK or SQLITE_NOMEM. If NOMEM is returned,
+  ** then the encoding of the value may not have changed.
+  */
+  rc = sqlite3VdbeMemTranslate(pMem, (u8)desiredEnc);
+  assert(rc==SQLITE_OK    || rc==SQLITE_NOMEM);
+  assert(rc==SQLITE_OK    || pMem->enc!=desiredEnc);
+  assert(rc==SQLITE_NOMEM || pMem->enc==desiredEnc);
+  return rc;
+#endif
+}
+
+/*
+** Make sure pMem->z points to a writable allocation of at least 
+** min(n,32) bytes.
+**
+** If the bPreserve argument is true, then copy of the content of
+** pMem->z into the new allocation.  pMem must be either a string or
+** blob if bPreserve is true.  If bPreserve is false, any prior content
+** in pMem->z is discarded.
+*/
+SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3VdbeMemGrow(Mem *pMem, int n, int bPreserve){
+  assert( sqlite3VdbeCheckMemInvariants(pMem) );
+  assert( (pMem->flags&MEM_RowSet)==0 );
+
+  /* If the bPreserve flag is set to true, then the memory cell must already
+  ** contain a valid string or blob value.  */
+  assert( bPreserve==0 || pMem->flags&(MEM_Blob|MEM_Str) );
+  testcase( bPreserve && pMem->z==0 );
+
+  assert( pMem->szMalloc==0
+       || pMem->szMalloc==sqlite3DbMallocSize(pMem->db, pMem->zMalloc) );
+  if( pMem->szMalloc<n ){
+    if( n<32 ) n = 32;
+    if( bPreserve && pMem->szMalloc>0 && pMem->z==pMem->zMalloc ){
+      pMem->z = pMem->zMalloc = sqlite3DbReallocOrFree(pMem->db, pMem->z, n);
+      bPreserve = 0;
+    }else{
+      if( pMem->szMalloc>0 ) sqlite3DbFree(pMem->db, pMem->zMalloc);
+      pMem->zMalloc = sqlite3DbMallocRaw(pMem->db, n);
+    }
+    if( pMem->zMalloc==0 ){
+      sqlite3VdbeMemSetNull(pMem);
+      pMem->z = 0;
+      pMem->szMalloc = 0;
+      return SQLITE_NOMEM;
+    }else{
+      pMem->szMalloc = sqlite3DbMallocSize(pMem->db, pMem->zMalloc);
+    }
+  }
+
+  if( bPreserve && pMem->z && pMem->z!=pMem->zMalloc ){
+    memcpy(pMem->zMalloc, pMem->z, pMem->n);
+  }
+  if( (pMem->flags&MEM_Dyn)!=0 ){
+    assert( pMem->xDel!=0 && pMem->xDel!=SQLITE_DYNAMIC );
+    pMem->xDel((void *)(pMem->z));
+  }
+
+  pMem->z = pMem->zMalloc;
+  pMem->flags &= ~(MEM_Dyn|MEM_Ephem|MEM_Static);
+  return SQLITE_OK;
+}
+
+/*
+** Change the pMem->zMalloc allocation to be at least szNew bytes.
+** If pMem->zMalloc already meets or exceeds the requested size, this
+** routine is a no-op.
+**
+** Any prior string or blob content in the pMem object may be discarded.
+** The pMem->xDel destructor is called, if it exists.  Though MEM_Str
+** and MEM_Blob values may be discarded, MEM_Int, MEM_Real, and MEM_Null
+** values are preserved.
+**
+** Return SQLITE_OK on success or an error code (probably SQLITE_NOMEM)
+** if unable to complete the resizing.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemClearAndResize(Mem *pMem, int szNew){
+  assert( szNew>0 );
+  assert( (pMem->flags & MEM_Dyn)==0 || pMem->szMalloc==0 );
+  if( pMem->szMalloc<szNew ){
+    return sqlite3VdbeMemGrow(pMem, szNew, 0);
+  }
+  assert( (pMem->flags & MEM_Dyn)==0 );
+  pMem->z = pMem->zMalloc;
+  pMem->flags &= (MEM_Null|MEM_Int|MEM_Real);
+  return SQLITE_OK;
+}
+
+/*
+** Change pMem so that its MEM_Str or MEM_Blob value is stored in
+** MEM.zMalloc, where it can be safely written.
+**
+** Return SQLITE_OK on success or SQLITE_NOMEM if malloc fails.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemMakeWriteable(Mem *pMem){
+  int f;
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( (pMem->flags&MEM_RowSet)==0 );
+  ExpandBlob(pMem);
+  f = pMem->flags;
+  if( (f&(MEM_Str|MEM_Blob)) && (pMem->szMalloc==0 || pMem->z!=pMem->zMalloc) ){
+    if( sqlite3VdbeMemGrow(pMem, pMem->n + 2, 1) ){
+      return SQLITE_NOMEM;
+    }
+    pMem->z[pMem->n] = 0;
+    pMem->z[pMem->n+1] = 0;
+    pMem->flags |= MEM_Term;
+#ifdef SQLITE_DEBUG
+    pMem->pScopyFrom = 0;
+#endif
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** If the given Mem* has a zero-filled tail, turn it into an ordinary
+** blob stored in dynamically allocated space.
+*/
+#ifndef SQLITE_OMIT_INCRBLOB
+SQLITE_PRIVATE int sqlite3VdbeMemExpandBlob(Mem *pMem){
+  if( pMem->flags & MEM_Zero ){
+    int nByte;
+    assert( pMem->flags&MEM_Blob );
+    assert( (pMem->flags&MEM_RowSet)==0 );
+    assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+
+    /* Set nByte to the number of bytes required to store the expanded blob. */
+    nByte = pMem->n + pMem->u.nZero;
+    if( nByte<=0 ){
+      nByte = 1;
+    }
+    if( sqlite3VdbeMemGrow(pMem, nByte, 1) ){
+      return SQLITE_NOMEM;
+    }
+
+    memset(&pMem->z[pMem->n], 0, pMem->u.nZero);
+    pMem->n += pMem->u.nZero;
+    pMem->flags &= ~(MEM_Zero|MEM_Term);
+  }
+  return SQLITE_OK;
+}
+#endif
+
+/*
+** It is already known that pMem contains an unterminated string.
+** Add the zero terminator.
+*/
+static SQLITE_NOINLINE int vdbeMemAddTerminator(Mem *pMem){
+  if( sqlite3VdbeMemGrow(pMem, pMem->n+2, 1) ){
+    return SQLITE_NOMEM;
+  }
+  pMem->z[pMem->n] = 0;
+  pMem->z[pMem->n+1] = 0;
+  pMem->flags |= MEM_Term;
+  return SQLITE_OK;
+}
+
+/*
+** Make sure the given Mem is \u0000 terminated.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemNulTerminate(Mem *pMem){
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  testcase( (pMem->flags & (MEM_Term|MEM_Str))==(MEM_Term|MEM_Str) );
+  testcase( (pMem->flags & (MEM_Term|MEM_Str))==0 );
+  if( (pMem->flags & (MEM_Term|MEM_Str))!=MEM_Str ){
+    return SQLITE_OK;   /* Nothing to do */
+  }else{
+    return vdbeMemAddTerminator(pMem);
+  }
+}
+
+/*
+** Add MEM_Str to the set of representations for the given Mem.  Numbers
+** are converted using sqlite3_snprintf().  Converting a BLOB to a string
+** is a no-op.
+**
+** Existing representations MEM_Int and MEM_Real are invalidated if
+** bForce is true but are retained if bForce is false.
+**
+** A MEM_Null value will never be passed to this function. This function is
+** used for converting values to text for returning to the user (i.e. via
+** sqlite3_value_text()), or for ensuring that values to be used as btree
+** keys are strings. In the former case a NULL pointer is returned the
+** user and the latter is an internal programming error.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemStringify(Mem *pMem, u8 enc, u8 bForce){
+  int fg = pMem->flags;
+  const int nByte = 32;
+
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( !(fg&MEM_Zero) );
+  assert( !(fg&(MEM_Str|MEM_Blob)) );
+  assert( fg&(MEM_Int|MEM_Real) );
+  assert( (pMem->flags&MEM_RowSet)==0 );
+  assert( EIGHT_BYTE_ALIGNMENT(pMem) );
+
+
+  if( sqlite3VdbeMemClearAndResize(pMem, nByte) ){
+    return SQLITE_NOMEM;
+  }
+
+  /* For a Real or Integer, use sqlite3_snprintf() to produce the UTF-8
+  ** string representation of the value. Then, if the required encoding
+  ** is UTF-16le or UTF-16be do a translation.
+  ** 
+  ** FIX ME: It would be better if sqlite3_snprintf() could do UTF-16.
+  */
+  if( fg & MEM_Int ){
+    sqlite3_snprintf(nByte, pMem->z, "%lld", pMem->u.i);
+  }else{
+    assert( fg & MEM_Real );
+    sqlite3_snprintf(nByte, pMem->z, "%!.15g", pMem->u.r);
+  }
+  pMem->n = sqlite3Strlen30(pMem->z);
+  pMem->enc = SQLITE_UTF8;
+  pMem->flags |= MEM_Str|MEM_Term;
+  if( bForce ) pMem->flags &= ~(MEM_Int|MEM_Real);
+  sqlite3VdbeChangeEncoding(pMem, enc);
+  return SQLITE_OK;
+}
+
+/*
+** Memory cell pMem contains the context of an aggregate function.
+** This routine calls the finalize method for that function.  The
+** result of the aggregate is stored back into pMem.
+**
+** Return SQLITE_ERROR if the finalizer reports an error.  SQLITE_OK
+** otherwise.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemFinalize(Mem *pMem, FuncDef *pFunc){
+  int rc = SQLITE_OK;
+  if( ALWAYS(pFunc && pFunc->xFinalize) ){
+    sqlite3_context ctx;
+    Mem t;
+    assert( (pMem->flags & MEM_Null)!=0 || pFunc==pMem->u.pDef );
+    assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+    memset(&ctx, 0, sizeof(ctx));
+    memset(&t, 0, sizeof(t));
+    t.flags = MEM_Null;
+    t.db = pMem->db;
+    ctx.pOut = &t;
+    ctx.pMem = pMem;
+    ctx.pFunc = pFunc;
+    pFunc->xFinalize(&ctx); /* IMP: R-24505-23230 */
+    assert( (pMem->flags & MEM_Dyn)==0 );
+    if( pMem->szMalloc>0 ) sqlite3DbFree(pMem->db, pMem->zMalloc);
+    memcpy(pMem, &t, sizeof(t));
+    rc = ctx.isError;
+  }
+  return rc;
+}
+
+/*
+** If the memory cell contains a value that must be freed by
+** invoking the external callback in Mem.xDel, then this routine
+** will free that value.  It also sets Mem.flags to MEM_Null.
+**
+** This is a helper routine for sqlite3VdbeMemSetNull() and
+** for sqlite3VdbeMemRelease().  Use those other routines as the
+** entry point for releasing Mem resources.
+*/
+static SQLITE_NOINLINE void vdbeMemClearExternAndSetNull(Mem *p){
+  assert( p->db==0 || sqlite3_mutex_held(p->db->mutex) );
+  assert( VdbeMemDynamic(p) );
+  if( p->flags&MEM_Agg ){
+    sqlite3VdbeMemFinalize(p, p->u.pDef);
+    assert( (p->flags & MEM_Agg)==0 );
+    testcase( p->flags & MEM_Dyn );
+  }
+  if( p->flags&MEM_Dyn ){
+    assert( (p->flags&MEM_RowSet)==0 );
+    assert( p->xDel!=SQLITE_DYNAMIC && p->xDel!=0 );
+    p->xDel((void *)p->z);
+  }else if( p->flags&MEM_RowSet ){
+    sqlite3RowSetClear(p->u.pRowSet);
+  }else if( p->flags&MEM_Frame ){
+    VdbeFrame *pFrame = p->u.pFrame;
+    pFrame->pParent = pFrame->v->pDelFrame;
+    pFrame->v->pDelFrame = pFrame;
+  }
+  p->flags = MEM_Null;
+}
+
+/*
+** Release memory held by the Mem p, both external memory cleared
+** by p->xDel and memory in p->zMalloc.
+**
+** This is a helper routine invoked by sqlite3VdbeMemRelease() in
+** the unusual case where there really is memory in p that needs
+** to be freed.
+*/
+static SQLITE_NOINLINE void vdbeMemClear(Mem *p){
+  if( VdbeMemDynamic(p) ){
+    vdbeMemClearExternAndSetNull(p);
+  }
+  if( p->szMalloc ){
+    sqlite3DbFree(p->db, p->zMalloc);
+    p->szMalloc = 0;
+  }
+  p->z = 0;
+}
+
+/*
+** Release any memory resources held by the Mem.  Both the memory that is
+** free by Mem.xDel and the Mem.zMalloc allocation are freed.
+**
+** Use this routine prior to clean up prior to abandoning a Mem, or to
+** reset a Mem back to its minimum memory utilization.
+**
+** Use sqlite3VdbeMemSetNull() to release just the Mem.xDel space
+** prior to inserting new content into the Mem.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemRelease(Mem *p){
+  assert( sqlite3VdbeCheckMemInvariants(p) );
+  if( VdbeMemDynamic(p) || p->szMalloc ){
+    vdbeMemClear(p);
+  }
+}
+
+/*
+** Convert a 64-bit IEEE double into a 64-bit signed integer.
+** If the double is out of range of a 64-bit signed integer then
+** return the closest available 64-bit signed integer.
+*/
+static i64 doubleToInt64(double r){
+#ifdef SQLITE_OMIT_FLOATING_POINT
+  /* When floating-point is omitted, double and int64 are the same thing */
+  return r;
+#else
+  /*
+  ** Many compilers we encounter do not define constants for the
+  ** minimum and maximum 64-bit integers, or they define them
+  ** inconsistently.  And many do not understand the "LL" notation.
+  ** So we define our own static constants here using nothing
+  ** larger than a 32-bit integer constant.
+  */
+  static const i64 maxInt = LARGEST_INT64;
+  static const i64 minInt = SMALLEST_INT64;
+
+  if( r<=(double)minInt ){
+    return minInt;
+  }else if( r>=(double)maxInt ){
+    return maxInt;
+  }else{
+    return (i64)r;
+  }
+#endif
+}
+
+/*
+** Return some kind of integer value which is the best we can do
+** at representing the value that *pMem describes as an integer.
+** If pMem is an integer, then the value is exact.  If pMem is
+** a floating-point then the value returned is the integer part.
+** If pMem is a string or blob, then we make an attempt to convert
+** it into an integer and return that.  If pMem represents an
+** an SQL-NULL value, return 0.
+**
+** If pMem represents a string value, its encoding might be changed.
+*/
+SQLITE_PRIVATE i64 sqlite3VdbeIntValue(Mem *pMem){
+  int flags;
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( EIGHT_BYTE_ALIGNMENT(pMem) );
+  flags = pMem->flags;
+  if( flags & MEM_Int ){
+    return pMem->u.i;
+  }else if( flags & MEM_Real ){
+    return doubleToInt64(pMem->u.r);
+  }else if( flags & (MEM_Str|MEM_Blob) ){
+    i64 value = 0;
+    assert( pMem->z || pMem->n==0 );
+    sqlite3Atoi64(pMem->z, &value, pMem->n, pMem->enc);
+    return value;
+  }else{
+    return 0;
+  }
+}
+
+/*
+** Return the best representation of pMem that we can get into a
+** double.  If pMem is already a double or an integer, return its
+** value.  If it is a string or blob, try to convert it to a double.
+** If it is a NULL, return 0.0.
+*/
+SQLITE_PRIVATE double sqlite3VdbeRealValue(Mem *pMem){
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( EIGHT_BYTE_ALIGNMENT(pMem) );
+  if( pMem->flags & MEM_Real ){
+    return pMem->u.r;
+  }else if( pMem->flags & MEM_Int ){
+    return (double)pMem->u.i;
+  }else if( pMem->flags & (MEM_Str|MEM_Blob) ){
+    /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */
+    double val = (double)0;
+    sqlite3AtoF(pMem->z, &val, pMem->n, pMem->enc);
+    return val;
+  }else{
+    /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */
+    return (double)0;
+  }
+}
+
+/*
+** The MEM structure is already a MEM_Real.  Try to also make it a
+** MEM_Int if we can.
+*/
+SQLITE_PRIVATE void sqlite3VdbeIntegerAffinity(Mem *pMem){
+  i64 ix;
+  assert( pMem->flags & MEM_Real );
+  assert( (pMem->flags & MEM_RowSet)==0 );
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( EIGHT_BYTE_ALIGNMENT(pMem) );
+
+  ix = doubleToInt64(pMem->u.r);
+
+  /* Only mark the value as an integer if
+  **
+  **    (1) the round-trip conversion real->int->real is a no-op, and
+  **    (2) The integer is neither the largest nor the smallest
+  **        possible integer (ticket #3922)
+  **
+  ** The second and third terms in the following conditional enforces
+  ** the second condition under the assumption that addition overflow causes
+  ** values to wrap around.
+  */
+  if( pMem->u.r==ix && ix>SMALLEST_INT64 && ix<LARGEST_INT64 ){
+    pMem->u.i = ix;
+    MemSetTypeFlag(pMem, MEM_Int);
+  }
+}
+
+/*
+** Convert pMem to type integer.  Invalidate any prior representations.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemIntegerify(Mem *pMem){
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( (pMem->flags & MEM_RowSet)==0 );
+  assert( EIGHT_BYTE_ALIGNMENT(pMem) );
+
+  pMem->u.i = sqlite3VdbeIntValue(pMem);
+  MemSetTypeFlag(pMem, MEM_Int);
+  return SQLITE_OK;
+}
+
+/*
+** Convert pMem so that it is of type MEM_Real.
+** Invalidate any prior representations.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemRealify(Mem *pMem){
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( EIGHT_BYTE_ALIGNMENT(pMem) );
+
+  pMem->u.r = sqlite3VdbeRealValue(pMem);
+  MemSetTypeFlag(pMem, MEM_Real);
+  return SQLITE_OK;
+}
+
+/*
+** Convert pMem so that it has types MEM_Real or MEM_Int or both.
+** Invalidate any prior representations.
+**
+** Every effort is made to force the conversion, even if the input
+** is a string that does not look completely like a number.  Convert
+** as much of the string as we can and ignore the rest.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemNumerify(Mem *pMem){
+  if( (pMem->flags & (MEM_Int|MEM_Real|MEM_Null))==0 ){
+    assert( (pMem->flags & (MEM_Blob|MEM_Str))!=0 );
+    assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+    if( 0==sqlite3Atoi64(pMem->z, &pMem->u.i, pMem->n, pMem->enc) ){
+      MemSetTypeFlag(pMem, MEM_Int);
+    }else{
+      pMem->u.r = sqlite3VdbeRealValue(pMem);
+      MemSetTypeFlag(pMem, MEM_Real);
+      sqlite3VdbeIntegerAffinity(pMem);
+    }
+  }
+  assert( (pMem->flags & (MEM_Int|MEM_Real|MEM_Null))!=0 );
+  pMem->flags &= ~(MEM_Str|MEM_Blob);
+  return SQLITE_OK;
+}
+
+/*
+** Cast the datatype of the value in pMem according to the affinity
+** "aff".  Casting is different from applying affinity in that a cast
+** is forced.  In other words, the value is converted into the desired
+** affinity even if that results in loss of data.  This routine is
+** used (for example) to implement the SQL "cast()" operator.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemCast(Mem *pMem, u8 aff, u8 encoding){
+  if( pMem->flags & MEM_Null ) return;
+  switch( aff ){
+    case SQLITE_AFF_NONE: {   /* Really a cast to BLOB */
+      if( (pMem->flags & MEM_Blob)==0 ){
+        sqlite3ValueApplyAffinity(pMem, SQLITE_AFF_TEXT, encoding);
+        assert( pMem->flags & MEM_Str || pMem->db->mallocFailed );
+        MemSetTypeFlag(pMem, MEM_Blob);
+      }else{
+        pMem->flags &= ~(MEM_TypeMask&~MEM_Blob);
+      }
+      break;
+    }
+    case SQLITE_AFF_NUMERIC: {
+      sqlite3VdbeMemNumerify(pMem);
+      break;
+    }
+    case SQLITE_AFF_INTEGER: {
+      sqlite3VdbeMemIntegerify(pMem);
+      break;
+    }
+    case SQLITE_AFF_REAL: {
+      sqlite3VdbeMemRealify(pMem);
+      break;
+    }
+    default: {
+      assert( aff==SQLITE_AFF_TEXT );
+      assert( MEM_Str==(MEM_Blob>>3) );
+      pMem->flags |= (pMem->flags&MEM_Blob)>>3;
+      sqlite3ValueApplyAffinity(pMem, SQLITE_AFF_TEXT, encoding);
+      assert( pMem->flags & MEM_Str || pMem->db->mallocFailed );
+      pMem->flags &= ~(MEM_Int|MEM_Real|MEM_Blob|MEM_Zero);
+      break;
+    }
+  }
+}
+
+/*
+** Initialize bulk memory to be a consistent Mem object.
+**
+** The minimum amount of initialization feasible is performed.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemInit(Mem *pMem, sqlite3 *db, u16 flags){
+  assert( (flags & ~MEM_TypeMask)==0 );
+  pMem->flags = flags;
+  pMem->db = db;
+  pMem->szMalloc = 0;
+}
+
+
+/*
+** Delete any previous value and set the value stored in *pMem to NULL.
+**
+** This routine calls the Mem.xDel destructor to dispose of values that
+** require the destructor.  But it preserves the Mem.zMalloc memory allocation.
+** To free all resources, use sqlite3VdbeMemRelease(), which both calls this
+** routine to invoke the destructor and deallocates Mem.zMalloc.
+**
+** Use this routine to reset the Mem prior to insert a new value.
+**
+** Use sqlite3VdbeMemRelease() to complete erase the Mem prior to abandoning it.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemSetNull(Mem *pMem){
+  if( VdbeMemDynamic(pMem) ){
+    vdbeMemClearExternAndSetNull(pMem);
+  }else{
+    pMem->flags = MEM_Null;
+  }
+}
+SQLITE_PRIVATE void sqlite3ValueSetNull(sqlite3_value *p){
+  sqlite3VdbeMemSetNull((Mem*)p); 
+}
+
+/*
+** Delete any previous value and set the value to be a BLOB of length
+** n containing all zeros.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemSetZeroBlob(Mem *pMem, int n){
+  sqlite3VdbeMemRelease(pMem);
+  pMem->flags = MEM_Blob|MEM_Zero;
+  pMem->n = 0;
+  if( n<0 ) n = 0;
+  pMem->u.nZero = n;
+  pMem->enc = SQLITE_UTF8;
+  pMem->z = 0;
+}
+
+/*
+** The pMem is known to contain content that needs to be destroyed prior
+** to a value change.  So invoke the destructor, then set the value to
+** a 64-bit integer.
+*/
+static SQLITE_NOINLINE void vdbeReleaseAndSetInt64(Mem *pMem, i64 val){
+  sqlite3VdbeMemSetNull(pMem);
+  pMem->u.i = val;
+  pMem->flags = MEM_Int;
+}
+
+/*
+** Delete any previous value and set the value stored in *pMem to val,
+** manifest type INTEGER.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemSetInt64(Mem *pMem, i64 val){
+  if( VdbeMemDynamic(pMem) ){
+    vdbeReleaseAndSetInt64(pMem, val);
+  }else{
+    pMem->u.i = val;
+    pMem->flags = MEM_Int;
+  }
+}
+
+#ifndef SQLITE_OMIT_FLOATING_POINT
+/*
+** Delete any previous value and set the value stored in *pMem to val,
+** manifest type REAL.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemSetDouble(Mem *pMem, double val){
+  sqlite3VdbeMemSetNull(pMem);
+  if( !sqlite3IsNaN(val) ){
+    pMem->u.r = val;
+    pMem->flags = MEM_Real;
+  }
+}
+#endif
+
+/*
+** Delete any previous value and set the value of pMem to be an
+** empty boolean index.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemSetRowSet(Mem *pMem){
+  sqlite3 *db = pMem->db;
+  assert( db!=0 );
+  assert( (pMem->flags & MEM_RowSet)==0 );
+  sqlite3VdbeMemRelease(pMem);
+  pMem->zMalloc = sqlite3DbMallocRaw(db, 64);
+  if( db->mallocFailed ){
+    pMem->flags = MEM_Null;
+    pMem->szMalloc = 0;
+  }else{
+    assert( pMem->zMalloc );
+    pMem->szMalloc = sqlite3DbMallocSize(db, pMem->zMalloc);
+    pMem->u.pRowSet = sqlite3RowSetInit(db, pMem->zMalloc, pMem->szMalloc);
+    assert( pMem->u.pRowSet!=0 );
+    pMem->flags = MEM_RowSet;
+  }
+}
+
+/*
+** Return true if the Mem object contains a TEXT or BLOB that is
+** too large - whose size exceeds SQLITE_MAX_LENGTH.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemTooBig(Mem *p){
+  assert( p->db!=0 );
+  if( p->flags & (MEM_Str|MEM_Blob) ){
+    int n = p->n;
+    if( p->flags & MEM_Zero ){
+      n += p->u.nZero;
+    }
+    return n>p->db->aLimit[SQLITE_LIMIT_LENGTH];
+  }
+  return 0; 
+}
+
+#ifdef SQLITE_DEBUG
+/*
+** This routine prepares a memory cell for modification by breaking
+** its link to a shallow copy and by marking any current shallow
+** copies of this cell as invalid.
+**
+** This is used for testing and debugging only - to make sure shallow
+** copies are not misused.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemAboutToChange(Vdbe *pVdbe, Mem *pMem){
+  int i;
+  Mem *pX;
+  for(i=1, pX=&pVdbe->aMem[1]; i<=pVdbe->nMem; i++, pX++){
+    if( pX->pScopyFrom==pMem ){
+      pX->flags |= MEM_Undefined;
+      pX->pScopyFrom = 0;
+    }
+  }
+  pMem->pScopyFrom = 0;
+}
+#endif /* SQLITE_DEBUG */
+
+/*
+** Size of struct Mem not including the Mem.zMalloc member.
+*/
+#define MEMCELLSIZE offsetof(Mem,zMalloc)
+
+/*
+** Make an shallow copy of pFrom into pTo.  Prior contents of
+** pTo are freed.  The pFrom->z field is not duplicated.  If
+** pFrom->z is used, then pTo->z points to the same thing as pFrom->z
+** and flags gets srcType (either MEM_Ephem or MEM_Static).
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){
+  assert( (pFrom->flags & MEM_RowSet)==0 );
+  assert( pTo->db==pFrom->db );
+  if( VdbeMemDynamic(pTo) ) vdbeMemClearExternAndSetNull(pTo);
+  memcpy(pTo, pFrom, MEMCELLSIZE);
+  if( (pFrom->flags&MEM_Static)==0 ){
+    pTo->flags &= ~(MEM_Dyn|MEM_Static|MEM_Ephem);
+    assert( srcType==MEM_Ephem || srcType==MEM_Static );
+    pTo->flags |= srcType;
+  }
+}
+
+/*
+** Make a full copy of pFrom into pTo.  Prior contents of pTo are
+** freed before the copy is made.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemCopy(Mem *pTo, const Mem *pFrom){
+  int rc = SQLITE_OK;
+
+  assert( pTo->db==pFrom->db );
+  assert( (pFrom->flags & MEM_RowSet)==0 );
+  if( VdbeMemDynamic(pTo) ) vdbeMemClearExternAndSetNull(pTo);
+  memcpy(pTo, pFrom, MEMCELLSIZE);
+  pTo->flags &= ~MEM_Dyn;
+  if( pTo->flags&(MEM_Str|MEM_Blob) ){
+    if( 0==(pFrom->flags&MEM_Static) ){
+      pTo->flags |= MEM_Ephem;
+      rc = sqlite3VdbeMemMakeWriteable(pTo);
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Transfer the contents of pFrom to pTo. Any existing value in pTo is
+** freed. If pFrom contains ephemeral data, a copy is made.
+**
+** pFrom contains an SQL NULL when this routine returns.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemMove(Mem *pTo, Mem *pFrom){
+  assert( pFrom->db==0 || sqlite3_mutex_held(pFrom->db->mutex) );
+  assert( pTo->db==0 || sqlite3_mutex_held(pTo->db->mutex) );
+  assert( pFrom->db==0 || pTo->db==0 || pFrom->db==pTo->db );
+
+  sqlite3VdbeMemRelease(pTo);
+  memcpy(pTo, pFrom, sizeof(Mem));
+  pFrom->flags = MEM_Null;
+  pFrom->szMalloc = 0;
+}
+
+/*
+** Change the value of a Mem to be a string or a BLOB.
+**
+** The memory management strategy depends on the value of the xDel
+** parameter. If the value passed is SQLITE_TRANSIENT, then the 
+** string is copied into a (possibly existing) buffer managed by the 
+** Mem structure. Otherwise, any existing buffer is freed and the
+** pointer copied.
+**
+** If the string is too large (if it exceeds the SQLITE_LIMIT_LENGTH
+** size limit) then no memory allocation occurs.  If the string can be
+** stored without allocating memory, then it is.  If a memory allocation
+** is required to store the string, then value of pMem is unchanged.  In
+** either case, SQLITE_TOOBIG is returned.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemSetStr(
+  Mem *pMem,          /* Memory cell to set to string value */
+  const char *z,      /* String pointer */
+  int n,              /* Bytes in string, or negative */
+  u8 enc,             /* Encoding of z.  0 for BLOBs */
+  void (*xDel)(void*) /* Destructor function */
+){
+  int nByte = n;      /* New value for pMem->n */
+  int iLimit;         /* Maximum allowed string or blob size */
+  u16 flags = 0;      /* New value for pMem->flags */
+
+  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
+  assert( (pMem->flags & MEM_RowSet)==0 );
+
+  /* If z is a NULL pointer, set pMem to contain an SQL NULL. */
+  if( !z ){
+    sqlite3VdbeMemSetNull(pMem);
+    return SQLITE_OK;
+  }
+
+  if( pMem->db ){
+    iLimit = pMem->db->aLimit[SQLITE_LIMIT_LENGTH];
+  }else{
+    iLimit = SQLITE_MAX_LENGTH;
+  }
+  flags = (enc==0?MEM_Blob:MEM_Str);
+  if( nByte<0 ){
+    assert( enc!=0 );
+    if( enc==SQLITE_UTF8 ){
+      nByte = sqlite3Strlen30(z);
+      if( nByte>iLimit ) nByte = iLimit+1;
+    }else{
+      for(nByte=0; nByte<=iLimit && (z[nByte] | z[nByte+1]); nByte+=2){}
+    }
+    flags |= MEM_Term;
+  }
+
+  /* The following block sets the new values of Mem.z and Mem.xDel. It
+  ** also sets a flag in local variable "flags" to indicate the memory
+  ** management (one of MEM_Dyn or MEM_Static).
+  */
+  if( xDel==SQLITE_TRANSIENT ){
+    int nAlloc = nByte;
+    if( flags&MEM_Term ){
+      nAlloc += (enc==SQLITE_UTF8?1:2);
+    }
+    if( nByte>iLimit ){
+      return SQLITE_TOOBIG;
+    }
+    testcase( nAlloc==0 );
+    testcase( nAlloc==31 );
+    testcase( nAlloc==32 );
+    if( sqlite3VdbeMemClearAndResize(pMem, MAX(nAlloc,32)) ){
+      return SQLITE_NOMEM;
+    }
+    memcpy(pMem->z, z, nAlloc);
+  }else if( xDel==SQLITE_DYNAMIC ){
+    sqlite3VdbeMemRelease(pMem);
+    pMem->zMalloc = pMem->z = (char *)z;
+    pMem->szMalloc = sqlite3DbMallocSize(pMem->db, pMem->zMalloc);
+  }else{
+    sqlite3VdbeMemRelease(pMem);
+    pMem->z = (char *)z;
+    pMem->xDel = xDel;
+    flags |= ((xDel==SQLITE_STATIC)?MEM_Static:MEM_Dyn);
+  }
+
+  pMem->n = nByte;
+  pMem->flags = flags;
+  pMem->enc = (enc==0 ? SQLITE_UTF8 : enc);
+
+#ifndef SQLITE_OMIT_UTF16
+  if( pMem->enc!=SQLITE_UTF8 && sqlite3VdbeMemHandleBom(pMem) ){
+    return SQLITE_NOMEM;
+  }
+#endif
+
+  if( nByte>iLimit ){
+    return SQLITE_TOOBIG;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Move data out of a btree key or data field and into a Mem structure.
+** The data or key is taken from the entry that pCur is currently pointing
+** to.  offset and amt determine what portion of the data or key to retrieve.
+** key is true to get the key or false to get data.  The result is written
+** into the pMem element.
+**
+** The pMem object must have been initialized.  This routine will use
+** pMem->zMalloc to hold the content from the btree, if possible.  New
+** pMem->zMalloc space will be allocated if necessary.  The calling routine
+** is responsible for making sure that the pMem object is eventually
+** destroyed.
+**
+** If this routine fails for any reason (malloc returns NULL or unable
+** to read from the disk) then the pMem is left in an inconsistent state.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMemFromBtree(
+  BtCursor *pCur,   /* Cursor pointing at record to retrieve. */
+  u32 offset,       /* Offset from the start of data to return bytes from. */
+  u32 amt,          /* Number of bytes to return. */
+  int key,          /* If true, retrieve from the btree key, not data. */
+  Mem *pMem         /* OUT: Return data in this Mem structure. */
+){
+  char *zData;        /* Data from the btree layer */
+  u32 available = 0;  /* Number of bytes available on the local btree page */
+  int rc = SQLITE_OK; /* Return code */
+
+  assert( sqlite3BtreeCursorIsValid(pCur) );
+  assert( !VdbeMemDynamic(pMem) );
+
+  /* Note: the calls to BtreeKeyFetch() and DataFetch() below assert() 
+  ** that both the BtShared and database handle mutexes are held. */
+  assert( (pMem->flags & MEM_RowSet)==0 );
+  if( key ){
+    zData = (char *)sqlite3BtreeKeyFetch(pCur, &available);
+  }else{
+    zData = (char *)sqlite3BtreeDataFetch(pCur, &available);
+  }
+  assert( zData!=0 );
+
+  if( offset+amt<=available ){
+    pMem->z = &zData[offset];
+    pMem->flags = MEM_Blob|MEM_Ephem;
+    pMem->n = (int)amt;
+  }else{
+    pMem->flags = MEM_Null;
+    if( SQLITE_OK==(rc = sqlite3VdbeMemClearAndResize(pMem, amt+2)) ){
+      if( key ){
+        rc = sqlite3BtreeKey(pCur, offset, amt, pMem->z);
+      }else{
+        rc = sqlite3BtreeData(pCur, offset, amt, pMem->z);
+      }
+      if( rc==SQLITE_OK ){
+        pMem->z[amt] = 0;
+        pMem->z[amt+1] = 0;
+        pMem->flags = MEM_Blob|MEM_Term;
+        pMem->n = (int)amt;
+      }else{
+        sqlite3VdbeMemRelease(pMem);
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
+** The pVal argument is known to be a value other than NULL.
+** Convert it into a string with encoding enc and return a pointer
+** to a zero-terminated version of that string.
+*/
+static SQLITE_NOINLINE const void *valueToText(sqlite3_value* pVal, u8 enc){
+  assert( pVal!=0 );
+  assert( pVal->db==0 || sqlite3_mutex_held(pVal->db->mutex) );
+  assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) );
+  assert( (pVal->flags & MEM_RowSet)==0 );
+  assert( (pVal->flags & (MEM_Null))==0 );
+  if( pVal->flags & (MEM_Blob|MEM_Str) ){
+    pVal->flags |= MEM_Str;
+    if( pVal->flags & MEM_Zero ){
+      sqlite3VdbeMemExpandBlob(pVal);
+    }
+    if( pVal->enc != (enc & ~SQLITE_UTF16_ALIGNED) ){
+      sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED);
+    }
+    if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&SQLITE_PTR_TO_INT(pVal->z)) ){
+      assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 );
+      if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){
+        return 0;
+      }
+    }
+    sqlite3VdbeMemNulTerminate(pVal); /* IMP: R-31275-44060 */
+  }else{
+    sqlite3VdbeMemStringify(pVal, enc, 0);
+    assert( 0==(1&SQLITE_PTR_TO_INT(pVal->z)) );
+  }
+  assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || pVal->db==0
+              || pVal->db->mallocFailed );
+  if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){
+    return pVal->z;
+  }else{
+    return 0;
+  }
+}
+
+/* This function is only available internally, it is not part of the
+** external API. It works in a similar way to sqlite3_value_text(),
+** except the data returned is in the encoding specified by the second
+** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
+** SQLITE_UTF8.
+**
+** (2006-02-16:)  The enc value can be or-ed with SQLITE_UTF16_ALIGNED.
+** If that is the case, then the result must be aligned on an even byte
+** boundary.
+*/
+SQLITE_PRIVATE const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
+  if( !pVal ) return 0;
+  assert( pVal->db==0 || sqlite3_mutex_held(pVal->db->mutex) );
+  assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) );
+  assert( (pVal->flags & MEM_RowSet)==0 );
+  if( (pVal->flags&(MEM_Str|MEM_Term))==(MEM_Str|MEM_Term) && pVal->enc==enc ){
+    return pVal->z;
+  }
+  if( pVal->flags&MEM_Null ){
+    return 0;
+  }
+  return valueToText(pVal, enc);
+}
+
+/*
+** Create a new sqlite3_value object.
+*/
+SQLITE_PRIVATE sqlite3_value *sqlite3ValueNew(sqlite3 *db){
+  Mem *p = sqlite3DbMallocZero(db, sizeof(*p));
+  if( p ){
+    p->flags = MEM_Null;
+    p->db = db;
+  }
+  return p;
+}
+
+/*
+** Context object passed by sqlite3Stat4ProbeSetValue() through to 
+** valueNew(). See comments above valueNew() for details.
+*/
+struct ValueNewStat4Ctx {
+  Parse *pParse;
+  Index *pIdx;
+  UnpackedRecord **ppRec;
+  int iVal;
+};
+
+/*
+** Allocate and return a pointer to a new sqlite3_value object. If
+** the second argument to this function is NULL, the object is allocated
+** by calling sqlite3ValueNew().
+**
+** Otherwise, if the second argument is non-zero, then this function is 
+** being called indirectly by sqlite3Stat4ProbeSetValue(). If it has not
+** already been allocated, allocate the UnpackedRecord structure that 
+** that function will return to its caller here. Then return a pointer 
+** an sqlite3_value within the UnpackedRecord.a[] array.
+*/
+static sqlite3_value *valueNew(sqlite3 *db, struct ValueNewStat4Ctx *p){
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  if( p ){
+    UnpackedRecord *pRec = p->ppRec[0];
+
+    if( pRec==0 ){
+      Index *pIdx = p->pIdx;      /* Index being probed */
+      int nByte;                  /* Bytes of space to allocate */
+      int i;                      /* Counter variable */
+      int nCol = pIdx->nColumn;   /* Number of index columns including rowid */
+  
+      nByte = sizeof(Mem) * nCol + ROUND8(sizeof(UnpackedRecord));
+      pRec = (UnpackedRecord*)sqlite3DbMallocZero(db, nByte);
+      if( pRec ){
+        pRec->pKeyInfo = sqlite3KeyInfoOfIndex(p->pParse, pIdx);
+        if( pRec->pKeyInfo ){
+          assert( pRec->pKeyInfo->nField+pRec->pKeyInfo->nXField==nCol );
+          assert( pRec->pKeyInfo->enc==ENC(db) );
+          pRec->aMem = (Mem *)((u8*)pRec + ROUND8(sizeof(UnpackedRecord)));
+          for(i=0; i<nCol; i++){
+            pRec->aMem[i].flags = MEM_Null;
+            pRec->aMem[i].db = db;
+          }
+        }else{
+          sqlite3DbFree(db, pRec);
+          pRec = 0;
+        }
+      }
+      if( pRec==0 ) return 0;
+      p->ppRec[0] = pRec;
+    }
+  
+    pRec->nField = p->iVal+1;
+    return &pRec->aMem[p->iVal];
+  }
+#else
+  UNUSED_PARAMETER(p);
+#endif /* defined(SQLITE_ENABLE_STAT3_OR_STAT4) */
+  return sqlite3ValueNew(db);
+}
+
+/*
+** Extract a value from the supplied expression in the manner described
+** above sqlite3ValueFromExpr(). Allocate the sqlite3_value object
+** using valueNew().
+**
+** If pCtx is NULL and an error occurs after the sqlite3_value object
+** has been allocated, it is freed before returning. Or, if pCtx is not
+** NULL, it is assumed that the caller will free any allocated object
+** in all cases.
+*/
+static int valueFromExpr(
+  sqlite3 *db,                    /* The database connection */
+  Expr *pExpr,                    /* The expression to evaluate */
+  u8 enc,                         /* Encoding to use */
+  u8 affinity,                    /* Affinity to use */
+  sqlite3_value **ppVal,          /* Write the new value here */
+  struct ValueNewStat4Ctx *pCtx   /* Second argument for valueNew() */
+){
+  int op;
+  char *zVal = 0;
+  sqlite3_value *pVal = 0;
+  int negInt = 1;
+  const char *zNeg = "";
+  int rc = SQLITE_OK;
+
+  if( !pExpr ){
+    *ppVal = 0;
+    return SQLITE_OK;
+  }
+  while( (op = pExpr->op)==TK_UPLUS ) pExpr = pExpr->pLeft;
+  if( NEVER(op==TK_REGISTER) ) op = pExpr->op2;
+
+  if( op==TK_CAST ){
+    u8 aff = sqlite3AffinityType(pExpr->u.zToken,0);
+    rc = valueFromExpr(db, pExpr->pLeft, enc, aff, ppVal, pCtx);
+    testcase( rc!=SQLITE_OK );
+    if( *ppVal ){
+      sqlite3VdbeMemCast(*ppVal, aff, SQLITE_UTF8);
+      sqlite3ValueApplyAffinity(*ppVal, affinity, SQLITE_UTF8);
+    }
+    return rc;
+  }
+
+  /* Handle negative integers in a single step.  This is needed in the
+  ** case when the value is -9223372036854775808.
+  */
+  if( op==TK_UMINUS
+   && (pExpr->pLeft->op==TK_INTEGER || pExpr->pLeft->op==TK_FLOAT) ){
+    pExpr = pExpr->pLeft;
+    op = pExpr->op;
+    negInt = -1;
+    zNeg = "-";
+  }
+
+  if( op==TK_STRING || op==TK_FLOAT || op==TK_INTEGER ){
+    pVal = valueNew(db, pCtx);
+    if( pVal==0 ) goto no_mem;
+    if( ExprHasProperty(pExpr, EP_IntValue) ){
+      sqlite3VdbeMemSetInt64(pVal, (i64)pExpr->u.iValue*negInt);
+    }else{
+      zVal = sqlite3MPrintf(db, "%s%s", zNeg, pExpr->u.zToken);
+      if( zVal==0 ) goto no_mem;
+      sqlite3ValueSetStr(pVal, -1, zVal, SQLITE_UTF8, SQLITE_DYNAMIC);
+    }
+    if( (op==TK_INTEGER || op==TK_FLOAT ) && affinity==SQLITE_AFF_NONE ){
+      sqlite3ValueApplyAffinity(pVal, SQLITE_AFF_NUMERIC, SQLITE_UTF8);
+    }else{
+      sqlite3ValueApplyAffinity(pVal, affinity, SQLITE_UTF8);
+    }
+    if( pVal->flags & (MEM_Int|MEM_Real) ) pVal->flags &= ~MEM_Str;
+    if( enc!=SQLITE_UTF8 ){
+      rc = sqlite3VdbeChangeEncoding(pVal, enc);
+    }
+  }else if( op==TK_UMINUS ) {
+    /* This branch happens for multiple negative signs.  Ex: -(-5) */
+    if( SQLITE_OK==sqlite3ValueFromExpr(db,pExpr->pLeft,enc,affinity,&pVal) 
+     && pVal!=0
+    ){
+      sqlite3VdbeMemNumerify(pVal);
+      if( pVal->flags & MEM_Real ){
+        pVal->u.r = -pVal->u.r;
+      }else if( pVal->u.i==SMALLEST_INT64 ){
+        pVal->u.r = -(double)SMALLEST_INT64;
+        MemSetTypeFlag(pVal, MEM_Real);
+      }else{
+        pVal->u.i = -pVal->u.i;
+      }
+      sqlite3ValueApplyAffinity(pVal, affinity, enc);
+    }
+  }else if( op==TK_NULL ){
+    pVal = valueNew(db, pCtx);
+    if( pVal==0 ) goto no_mem;
+  }
+#ifndef SQLITE_OMIT_BLOB_LITERAL
+  else if( op==TK_BLOB ){
+    int nVal;
+    assert( pExpr->u.zToken[0]=='x' || pExpr->u.zToken[0]=='X' );
+    assert( pExpr->u.zToken[1]=='\'' );
+    pVal = valueNew(db, pCtx);
+    if( !pVal ) goto no_mem;
+    zVal = &pExpr->u.zToken[2];
+    nVal = sqlite3Strlen30(zVal)-1;
+    assert( zVal[nVal]=='\'' );
+    sqlite3VdbeMemSetStr(pVal, sqlite3HexToBlob(db, zVal, nVal), nVal/2,
+                         0, SQLITE_DYNAMIC);
+  }
+#endif
+
+  *ppVal = pVal;
+  return rc;
+
+no_mem:
+  db->mallocFailed = 1;
+  sqlite3DbFree(db, zVal);
+  assert( *ppVal==0 );
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  if( pCtx==0 ) sqlite3ValueFree(pVal);
+#else
+  assert( pCtx==0 ); sqlite3ValueFree(pVal);
+#endif
+  return SQLITE_NOMEM;
+}
+
+/*
+** Create a new sqlite3_value object, containing the value of pExpr.
+**
+** This only works for very simple expressions that consist of one constant
+** token (i.e. "5", "5.1", "'a string'"). If the expression can
+** be converted directly into a value, then the value is allocated and
+** a pointer written to *ppVal. The caller is responsible for deallocating
+** the value by passing it to sqlite3ValueFree() later on. If the expression
+** cannot be converted to a value, then *ppVal is set to NULL.
+*/
+SQLITE_PRIVATE int sqlite3ValueFromExpr(
+  sqlite3 *db,              /* The database connection */
+  Expr *pExpr,              /* The expression to evaluate */
+  u8 enc,                   /* Encoding to use */
+  u8 affinity,              /* Affinity to use */
+  sqlite3_value **ppVal     /* Write the new value here */
+){
+  return valueFromExpr(db, pExpr, enc, affinity, ppVal, 0);
+}
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+/*
+** The implementation of the sqlite_record() function. This function accepts
+** a single argument of any type. The return value is a formatted database 
+** record (a blob) containing the argument value.
+**
+** This is used to convert the value stored in the 'sample' column of the
+** sqlite_stat3 table to the record format SQLite uses internally.
+*/
+static void recordFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  const int file_format = 1;
+  int iSerial;                    /* Serial type */
+  int nSerial;                    /* Bytes of space for iSerial as varint */
+  int nVal;                       /* Bytes of space required for argv[0] */
+  int nRet;
+  sqlite3 *db;
+  u8 *aRet;
+
+  UNUSED_PARAMETER( argc );
+  iSerial = sqlite3VdbeSerialType(argv[0], file_format);
+  nSerial = sqlite3VarintLen(iSerial);
+  nVal = sqlite3VdbeSerialTypeLen(iSerial);
+  db = sqlite3_context_db_handle(context);
+
+  nRet = 1 + nSerial + nVal;
+  aRet = sqlite3DbMallocRaw(db, nRet);
+  if( aRet==0 ){
+    sqlite3_result_error_nomem(context);
+  }else{
+    aRet[0] = nSerial+1;
+    putVarint32(&aRet[1], iSerial);
+    sqlite3VdbeSerialPut(&aRet[1+nSerial], argv[0], iSerial);
+    sqlite3_result_blob(context, aRet, nRet, SQLITE_TRANSIENT);
+    sqlite3DbFree(db, aRet);
+  }
+}
+
+/*
+** Register built-in functions used to help read ANALYZE data.
+*/
+SQLITE_PRIVATE void sqlite3AnalyzeFunctions(void){
+  static SQLITE_WSD FuncDef aAnalyzeTableFuncs[] = {
+    FUNCTION(sqlite_record,   1, 0, 0, recordFunc),
+  };
+  int i;
+  FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions);
+  FuncDef *aFunc = (FuncDef*)&GLOBAL(FuncDef, aAnalyzeTableFuncs);
+  for(i=0; i<ArraySize(aAnalyzeTableFuncs); i++){
+    sqlite3FuncDefInsert(pHash, &aFunc[i]);
+  }
+}
+
+/*
+** Attempt to extract a value from pExpr and use it to construct *ppVal.
+**
+** If pAlloc is not NULL, then an UnpackedRecord object is created for
+** pAlloc if one does not exist and the new value is added to the
+** UnpackedRecord object.
+**
+** A value is extracted in the following cases:
+**
+**  * (pExpr==0). In this case the value is assumed to be an SQL NULL,
+**
+**  * The expression is a bound variable, and this is a reprepare, or
+**
+**  * The expression is a literal value.
+**
+** On success, *ppVal is made to point to the extracted value.  The caller
+** is responsible for ensuring that the value is eventually freed.
+*/
+static int stat4ValueFromExpr(
+  Parse *pParse,                  /* Parse context */
+  Expr *pExpr,                    /* The expression to extract a value from */
+  u8 affinity,                    /* Affinity to use */
+  struct ValueNewStat4Ctx *pAlloc,/* How to allocate space.  Or NULL */
+  sqlite3_value **ppVal           /* OUT: New value object (or NULL) */
+){
+  int rc = SQLITE_OK;
+  sqlite3_value *pVal = 0;
+  sqlite3 *db = pParse->db;
+
+  /* Skip over any TK_COLLATE nodes */
+  pExpr = sqlite3ExprSkipCollate(pExpr);
+
+  if( !pExpr ){
+    pVal = valueNew(db, pAlloc);
+    if( pVal ){
+      sqlite3VdbeMemSetNull((Mem*)pVal);
+    }
+  }else if( pExpr->op==TK_VARIABLE
+        || NEVER(pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE)
+  ){
+    Vdbe *v;
+    int iBindVar = pExpr->iColumn;
+    sqlite3VdbeSetVarmask(pParse->pVdbe, iBindVar);
+    if( (v = pParse->pReprepare)!=0 ){
+      pVal = valueNew(db, pAlloc);
+      if( pVal ){
+        rc = sqlite3VdbeMemCopy((Mem*)pVal, &v->aVar[iBindVar-1]);
+        if( rc==SQLITE_OK ){
+          sqlite3ValueApplyAffinity(pVal, affinity, ENC(db));
+        }
+        pVal->db = pParse->db;
+      }
+    }
+  }else{
+    rc = valueFromExpr(db, pExpr, ENC(db), affinity, &pVal, pAlloc);
+  }
+
+  assert( pVal==0 || pVal->db==db );
+  *ppVal = pVal;
+  return rc;
+}
+
+/*
+** This function is used to allocate and populate UnpackedRecord 
+** structures intended to be compared against sample index keys stored 
+** in the sqlite_stat4 table.
+**
+** A single call to this function attempts to populates field iVal (leftmost 
+** is 0 etc.) of the unpacked record with a value extracted from expression
+** pExpr. Extraction of values is possible if:
+**
+**  * (pExpr==0). In this case the value is assumed to be an SQL NULL,
+**
+**  * The expression is a bound variable, and this is a reprepare, or
+**
+**  * The sqlite3ValueFromExpr() function is able to extract a value 
+**    from the expression (i.e. the expression is a literal value).
+**
+** If a value can be extracted, the affinity passed as the 5th argument
+** is applied to it before it is copied into the UnpackedRecord. Output
+** parameter *pbOk is set to true if a value is extracted, or false 
+** otherwise.
+**
+** When this function is called, *ppRec must either point to an object
+** allocated by an earlier call to this function, or must be NULL. If it
+** is NULL and a value can be successfully extracted, a new UnpackedRecord
+** is allocated (and *ppRec set to point to it) before returning.
+**
+** Unless an error is encountered, SQLITE_OK is returned. It is not an
+** error if a value cannot be extracted from pExpr. If an error does
+** occur, an SQLite error code is returned.
+*/
+SQLITE_PRIVATE int sqlite3Stat4ProbeSetValue(
+  Parse *pParse,                  /* Parse context */
+  Index *pIdx,                    /* Index being probed */
+  UnpackedRecord **ppRec,         /* IN/OUT: Probe record */
+  Expr *pExpr,                    /* The expression to extract a value from */
+  u8 affinity,                    /* Affinity to use */
+  int iVal,                       /* Array element to populate */
+  int *pbOk                       /* OUT: True if value was extracted */
+){
+  int rc;
+  sqlite3_value *pVal = 0;
+  struct ValueNewStat4Ctx alloc;
+
+  alloc.pParse = pParse;
+  alloc.pIdx = pIdx;
+  alloc.ppRec = ppRec;
+  alloc.iVal = iVal;
+
+  rc = stat4ValueFromExpr(pParse, pExpr, affinity, &alloc, &pVal);
+  assert( pVal==0 || pVal->db==pParse->db );
+  *pbOk = (pVal!=0);
+  return rc;
+}
+
+/*
+** Attempt to extract a value from expression pExpr using the methods
+** as described for sqlite3Stat4ProbeSetValue() above. 
+**
+** If successful, set *ppVal to point to a new value object and return 
+** SQLITE_OK. If no value can be extracted, but no other error occurs
+** (e.g. OOM), return SQLITE_OK and set *ppVal to NULL. Or, if an error
+** does occur, return an SQLite error code. The final value of *ppVal
+** is undefined in this case.
+*/
+SQLITE_PRIVATE int sqlite3Stat4ValueFromExpr(
+  Parse *pParse,                  /* Parse context */
+  Expr *pExpr,                    /* The expression to extract a value from */
+  u8 affinity,                    /* Affinity to use */
+  sqlite3_value **ppVal           /* OUT: New value object (or NULL) */
+){
+  return stat4ValueFromExpr(pParse, pExpr, affinity, 0, ppVal);
+}
+
+/*
+** Extract the iCol-th column from the nRec-byte record in pRec.  Write
+** the column value into *ppVal.  If *ppVal is initially NULL then a new
+** sqlite3_value object is allocated.
+**
+** If *ppVal is initially NULL then the caller is responsible for 
+** ensuring that the value written into *ppVal is eventually freed.
+*/
+SQLITE_PRIVATE int sqlite3Stat4Column(
+  sqlite3 *db,                    /* Database handle */
+  const void *pRec,               /* Pointer to buffer containing record */
+  int nRec,                       /* Size of buffer pRec in bytes */
+  int iCol,                       /* Column to extract */
+  sqlite3_value **ppVal           /* OUT: Extracted value */
+){
+  u32 t;                          /* a column type code */
+  int nHdr;                       /* Size of the header in the record */
+  int iHdr;                       /* Next unread header byte */
+  int iField;                     /* Next unread data byte */
+  int szField;                    /* Size of the current data field */
+  int i;                          /* Column index */
+  u8 *a = (u8*)pRec;              /* Typecast byte array */
+  Mem *pMem = *ppVal;             /* Write result into this Mem object */
+
+  assert( iCol>0 );
+  iHdr = getVarint32(a, nHdr);
+  if( nHdr>nRec || iHdr>=nHdr ) return SQLITE_CORRUPT_BKPT;
+  iField = nHdr;
+  for(i=0; i<=iCol; i++){
+    iHdr += getVarint32(&a[iHdr], t);
+    testcase( iHdr==nHdr );
+    testcase( iHdr==nHdr+1 );
+    if( iHdr>nHdr ) return SQLITE_CORRUPT_BKPT;
+    szField = sqlite3VdbeSerialTypeLen(t);
+    iField += szField;
+  }
+  testcase( iField==nRec );
+  testcase( iField==nRec+1 );
+  if( iField>nRec ) return SQLITE_CORRUPT_BKPT;
+  if( pMem==0 ){
+    pMem = *ppVal = sqlite3ValueNew(db);
+    if( pMem==0 ) return SQLITE_NOMEM;
+  }
+  sqlite3VdbeSerialGet(&a[iField-szField], t, pMem);
+  pMem->enc = ENC(db);
+  return SQLITE_OK;
+}
+
+/*
+** Unless it is NULL, the argument must be an UnpackedRecord object returned
+** by an earlier call to sqlite3Stat4ProbeSetValue(). This call deletes
+** the object.
+*/
+SQLITE_PRIVATE void sqlite3Stat4ProbeFree(UnpackedRecord *pRec){
+  if( pRec ){
+    int i;
+    int nCol = pRec->pKeyInfo->nField+pRec->pKeyInfo->nXField;
+    Mem *aMem = pRec->aMem;
+    sqlite3 *db = aMem[0].db;
+    for(i=0; i<nCol; i++){
+      if( aMem[i].szMalloc ) sqlite3DbFree(db, aMem[i].zMalloc);
+    }
+    sqlite3KeyInfoUnref(pRec->pKeyInfo);
+    sqlite3DbFree(db, pRec);
+  }
+}
+#endif /* ifdef SQLITE_ENABLE_STAT4 */
+
+/*
+** Change the string value of an sqlite3_value object
+*/
+SQLITE_PRIVATE void sqlite3ValueSetStr(
+  sqlite3_value *v,     /* Value to be set */
+  int n,                /* Length of string z */
+  const void *z,        /* Text of the new string */
+  u8 enc,               /* Encoding to use */
+  void (*xDel)(void*)   /* Destructor for the string */
+){
+  if( v ) sqlite3VdbeMemSetStr((Mem *)v, z, n, enc, xDel);
+}
+
+/*
+** Free an sqlite3_value object
+*/
+SQLITE_PRIVATE void sqlite3ValueFree(sqlite3_value *v){
+  if( !v ) return;
+  sqlite3VdbeMemRelease((Mem *)v);
+  sqlite3DbFree(((Mem*)v)->db, v);
+}
+
+/*
+** Return the number of bytes in the sqlite3_value object assuming
+** that it uses the encoding "enc"
+*/
+SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){
+  Mem *p = (Mem*)pVal;
+  if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){
+    if( p->flags & MEM_Zero ){
+      return p->n + p->u.nZero;
+    }else{
+      return p->n;
+    }
+  }
+  return 0;
+}
+
+/************** End of vdbemem.c *********************************************/
+/************** Begin file vdbeaux.c *****************************************/
+/*
+** 2003 September 6
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code used for creating, destroying, and populating
+** a VDBE (or an "sqlite3_stmt" as it is known to the outside world.) 
+*/
+
+/*
+** Create a new virtual database engine.
+*/
+SQLITE_PRIVATE Vdbe *sqlite3VdbeCreate(Parse *pParse){
+  sqlite3 *db = pParse->db;
+  Vdbe *p;
+  p = sqlite3DbMallocZero(db, sizeof(Vdbe) );
+  if( p==0 ) return 0;
+  p->db = db;
+  if( db->pVdbe ){
+    db->pVdbe->pPrev = p;
+  }
+  p->pNext = db->pVdbe;
+  p->pPrev = 0;
+  db->pVdbe = p;
+  p->magic = VDBE_MAGIC_INIT;
+  p->pParse = pParse;
+  assert( pParse->aLabel==0 );
+  assert( pParse->nLabel==0 );
+  assert( pParse->nOpAlloc==0 );
+  return p;
+}
+
+/*
+** Remember the SQL string for a prepared statement.
+*/
+SQLITE_PRIVATE void sqlite3VdbeSetSql(Vdbe *p, const char *z, int n, int isPrepareV2){
+  assert( isPrepareV2==1 || isPrepareV2==0 );
+  if( p==0 ) return;
+#if defined(SQLITE_OMIT_TRACE) && !defined(SQLITE_ENABLE_SQLLOG)
+  if( !isPrepareV2 ) return;
+#endif
+  assert( p->zSql==0 );
+  p->zSql = sqlite3DbStrNDup(p->db, z, n);
+  p->isPrepareV2 = (u8)isPrepareV2;
+}
+
+/*
+** Return the SQL associated with a prepared statement
+*/
+SQLITE_API const char *sqlite3_sql(sqlite3_stmt *pStmt){
+  Vdbe *p = (Vdbe *)pStmt;
+  return (p && p->isPrepareV2) ? p->zSql : 0;
+}
+
+/*
+** Swap all content between two VDBE structures.
+*/
+SQLITE_PRIVATE void sqlite3VdbeSwap(Vdbe *pA, Vdbe *pB){
+  Vdbe tmp, *pTmp;
+  char *zTmp;
+  tmp = *pA;
+  *pA = *pB;
+  *pB = tmp;
+  pTmp = pA->pNext;
+  pA->pNext = pB->pNext;
+  pB->pNext = pTmp;
+  pTmp = pA->pPrev;
+  pA->pPrev = pB->pPrev;
+  pB->pPrev = pTmp;
+  zTmp = pA->zSql;
+  pA->zSql = pB->zSql;
+  pB->zSql = zTmp;
+  pB->isPrepareV2 = pA->isPrepareV2;
+}
+
+/*
+** Resize the Vdbe.aOp array so that it is at least nOp elements larger 
+** than its current size. nOp is guaranteed to be less than or equal
+** to 1024/sizeof(Op).
+**
+** If an out-of-memory error occurs while resizing the array, return
+** SQLITE_NOMEM. In this case Vdbe.aOp and Parse.nOpAlloc remain 
+** unchanged (this is so that any opcodes already allocated can be 
+** correctly deallocated along with the rest of the Vdbe).
+*/
+static int growOpArray(Vdbe *v, int nOp){
+  VdbeOp *pNew;
+  Parse *p = v->pParse;
+
+  /* The SQLITE_TEST_REALLOC_STRESS compile-time option is designed to force
+  ** more frequent reallocs and hence provide more opportunities for 
+  ** simulated OOM faults.  SQLITE_TEST_REALLOC_STRESS is generally used
+  ** during testing only.  With SQLITE_TEST_REALLOC_STRESS grow the op array
+  ** by the minimum* amount required until the size reaches 512.  Normal
+  ** operation (without SQLITE_TEST_REALLOC_STRESS) is to double the current
+  ** size of the op array or add 1KB of space, whichever is smaller. */
+#ifdef SQLITE_TEST_REALLOC_STRESS
+  int nNew = (p->nOpAlloc>=512 ? p->nOpAlloc*2 : p->nOpAlloc+nOp);
+#else
+  int nNew = (p->nOpAlloc ? p->nOpAlloc*2 : (int)(1024/sizeof(Op)));
+  UNUSED_PARAMETER(nOp);
+#endif
+
+  assert( nOp<=(1024/sizeof(Op)) );
+  assert( nNew>=(p->nOpAlloc+nOp) );
+  pNew = sqlite3DbRealloc(p->db, v->aOp, nNew*sizeof(Op));
+  if( pNew ){
+    p->nOpAlloc = sqlite3DbMallocSize(p->db, pNew)/sizeof(Op);
+    v->aOp = pNew;
+  }
+  return (pNew ? SQLITE_OK : SQLITE_NOMEM);
+}
+
+#ifdef SQLITE_DEBUG
+/* This routine is just a convenient place to set a breakpoint that will
+** fire after each opcode is inserted and displayed using
+** "PRAGMA vdbe_addoptrace=on".
+*/
+static void test_addop_breakpoint(void){
+  static int n = 0;
+  n++;
+}
+#endif
+
+/*
+** Add a new instruction to the list of instructions current in the
+** VDBE.  Return the address of the new instruction.
+**
+** Parameters:
+**
+**    p               Pointer to the VDBE
+**
+**    op              The opcode for this instruction
+**
+**    p1, p2, p3      Operands
+**
+** Use the sqlite3VdbeResolveLabel() function to fix an address and
+** the sqlite3VdbeChangeP4() function to change the value of the P4
+** operand.
+*/
+SQLITE_PRIVATE int sqlite3VdbeAddOp3(Vdbe *p, int op, int p1, int p2, int p3){
+  int i;
+  VdbeOp *pOp;
+
+  i = p->nOp;
+  assert( p->magic==VDBE_MAGIC_INIT );
+  assert( op>0 && op<0xff );
+  if( p->pParse->nOpAlloc<=i ){
+    if( growOpArray(p, 1) ){
+      return 1;
+    }
+  }
+  p->nOp++;
+  pOp = &p->aOp[i];
+  pOp->opcode = (u8)op;
+  pOp->p5 = 0;
+  pOp->p1 = p1;
+  pOp->p2 = p2;
+  pOp->p3 = p3;
+  pOp->p4.p = 0;
+  pOp->p4type = P4_NOTUSED;
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+  pOp->zComment = 0;
+#endif
+#ifdef SQLITE_DEBUG
+  if( p->db->flags & SQLITE_VdbeAddopTrace ){
+    int jj, kk;
+    Parse *pParse = p->pParse;
+    for(jj=kk=0; jj<SQLITE_N_COLCACHE; jj++){
+      struct yColCache *x = pParse->aColCache + jj;
+      if( x->iLevel>pParse->iCacheLevel || x->iReg==0 ) continue;
+      printf(" r[%d]={%d:%d}", x->iReg, x->iTable, x->iColumn);
+      kk++;
+    }
+    if( kk ) printf("\n");
+    sqlite3VdbePrintOp(0, i, &p->aOp[i]);
+    test_addop_breakpoint();
+  }
+#endif
+#ifdef VDBE_PROFILE
+  pOp->cycles = 0;
+  pOp->cnt = 0;
+#endif
+#ifdef SQLITE_VDBE_COVERAGE
+  pOp->iSrcLine = 0;
+#endif
+  return i;
+}
+SQLITE_PRIVATE int sqlite3VdbeAddOp0(Vdbe *p, int op){
+  return sqlite3VdbeAddOp3(p, op, 0, 0, 0);
+}
+SQLITE_PRIVATE int sqlite3VdbeAddOp1(Vdbe *p, int op, int p1){
+  return sqlite3VdbeAddOp3(p, op, p1, 0, 0);
+}
+SQLITE_PRIVATE int sqlite3VdbeAddOp2(Vdbe *p, int op, int p1, int p2){
+  return sqlite3VdbeAddOp3(p, op, p1, p2, 0);
+}
+
+
+/*
+** Add an opcode that includes the p4 value as a pointer.
+*/
+SQLITE_PRIVATE int sqlite3VdbeAddOp4(
+  Vdbe *p,            /* Add the opcode to this VM */
+  int op,             /* The new opcode */
+  int p1,             /* The P1 operand */
+  int p2,             /* The P2 operand */
+  int p3,             /* The P3 operand */
+  const char *zP4,    /* The P4 operand */
+  int p4type          /* P4 operand type */
+){
+  int addr = sqlite3VdbeAddOp3(p, op, p1, p2, p3);
+  sqlite3VdbeChangeP4(p, addr, zP4, p4type);
+  return addr;
+}
+
+/*
+** Add an OP_ParseSchema opcode.  This routine is broken out from
+** sqlite3VdbeAddOp4() since it needs to also needs to mark all btrees
+** as having been used.
+**
+** The zWhere string must have been obtained from sqlite3_malloc().
+** This routine will take ownership of the allocated memory.
+*/
+SQLITE_PRIVATE void sqlite3VdbeAddParseSchemaOp(Vdbe *p, int iDb, char *zWhere){
+  int j;
+  int addr = sqlite3VdbeAddOp3(p, OP_ParseSchema, iDb, 0, 0);
+  sqlite3VdbeChangeP4(p, addr, zWhere, P4_DYNAMIC);
+  for(j=0; j<p->db->nDb; j++) sqlite3VdbeUsesBtree(p, j);
+}
+
+/*
+** Add an opcode that includes the p4 value as an integer.
+*/
+SQLITE_PRIVATE int sqlite3VdbeAddOp4Int(
+  Vdbe *p,            /* Add the opcode to this VM */
+  int op,             /* The new opcode */
+  int p1,             /* The P1 operand */
+  int p2,             /* The P2 operand */
+  int p3,             /* The P3 operand */
+  int p4              /* The P4 operand as an integer */
+){
+  int addr = sqlite3VdbeAddOp3(p, op, p1, p2, p3);
+  sqlite3VdbeChangeP4(p, addr, SQLITE_INT_TO_PTR(p4), P4_INT32);
+  return addr;
+}
+
+/*
+** Create a new symbolic label for an instruction that has yet to be
+** coded.  The symbolic label is really just a negative number.  The
+** label can be used as the P2 value of an operation.  Later, when
+** the label is resolved to a specific address, the VDBE will scan
+** through its operation list and change all values of P2 which match
+** the label into the resolved address.
+**
+** The VDBE knows that a P2 value is a label because labels are
+** always negative and P2 values are suppose to be non-negative.
+** Hence, a negative P2 value is a label that has yet to be resolved.
+**
+** Zero is returned if a malloc() fails.
+*/
+SQLITE_PRIVATE int sqlite3VdbeMakeLabel(Vdbe *v){
+  Parse *p = v->pParse;
+  int i = p->nLabel++;
+  assert( v->magic==VDBE_MAGIC_INIT );
+  if( (i & (i-1))==0 ){
+    p->aLabel = sqlite3DbReallocOrFree(p->db, p->aLabel, 
+                                       (i*2+1)*sizeof(p->aLabel[0]));
+  }
+  if( p->aLabel ){
+    p->aLabel[i] = -1;
+  }
+  return -1-i;
+}
+
+/*
+** Resolve label "x" to be the address of the next instruction to
+** be inserted.  The parameter "x" must have been obtained from
+** a prior call to sqlite3VdbeMakeLabel().
+*/
+SQLITE_PRIVATE void sqlite3VdbeResolveLabel(Vdbe *v, int x){
+  Parse *p = v->pParse;
+  int j = -1-x;
+  assert( v->magic==VDBE_MAGIC_INIT );
+  assert( j<p->nLabel );
+  if( ALWAYS(j>=0) && p->aLabel ){
+    p->aLabel[j] = v->nOp;
+  }
+  p->iFixedOp = v->nOp - 1;
+}
+
+/*
+** Mark the VDBE as one that can only be run one time.
+*/
+SQLITE_PRIVATE void sqlite3VdbeRunOnlyOnce(Vdbe *p){
+  p->runOnlyOnce = 1;
+}
+
+#ifdef SQLITE_DEBUG /* sqlite3AssertMayAbort() logic */
+
+/*
+** The following type and function are used to iterate through all opcodes
+** in a Vdbe main program and each of the sub-programs (triggers) it may 
+** invoke directly or indirectly. It should be used as follows:
+**
+**   Op *pOp;
+**   VdbeOpIter sIter;
+**
+**   memset(&sIter, 0, sizeof(sIter));
+**   sIter.v = v;                            // v is of type Vdbe* 
+**   while( (pOp = opIterNext(&sIter)) ){
+**     // Do something with pOp
+**   }
+**   sqlite3DbFree(v->db, sIter.apSub);
+** 
+*/
+typedef struct VdbeOpIter VdbeOpIter;
+struct VdbeOpIter {
+  Vdbe *v;                   /* Vdbe to iterate through the opcodes of */
+  SubProgram **apSub;        /* Array of subprograms */
+  int nSub;                  /* Number of entries in apSub */
+  int iAddr;                 /* Address of next instruction to return */
+  int iSub;                  /* 0 = main program, 1 = first sub-program etc. */
+};
+static Op *opIterNext(VdbeOpIter *p){
+  Vdbe *v = p->v;
+  Op *pRet = 0;
+  Op *aOp;
+  int nOp;
+
+  if( p->iSub<=p->nSub ){
+
+    if( p->iSub==0 ){
+      aOp = v->aOp;
+      nOp = v->nOp;
+    }else{
+      aOp = p->apSub[p->iSub-1]->aOp;
+      nOp = p->apSub[p->iSub-1]->nOp;
+    }
+    assert( p->iAddr<nOp );
+
+    pRet = &aOp[p->iAddr];
+    p->iAddr++;
+    if( p->iAddr==nOp ){
+      p->iSub++;
+      p->iAddr = 0;
+    }
+  
+    if( pRet->p4type==P4_SUBPROGRAM ){
+      int nByte = (p->nSub+1)*sizeof(SubProgram*);
+      int j;
+      for(j=0; j<p->nSub; j++){
+        if( p->apSub[j]==pRet->p4.pProgram ) break;
+      }
+      if( j==p->nSub ){
+        p->apSub = sqlite3DbReallocOrFree(v->db, p->apSub, nByte);
+        if( !p->apSub ){
+          pRet = 0;
+        }else{
+          p->apSub[p->nSub++] = pRet->p4.pProgram;
+        }
+      }
+    }
+  }
+
+  return pRet;
+}
+
+/*
+** Check if the program stored in the VM associated with pParse may
+** throw an ABORT exception (causing the statement, but not entire transaction
+** to be rolled back). This condition is true if the main program or any
+** sub-programs contains any of the following:
+**
+**   *  OP_Halt with P1=SQLITE_CONSTRAINT and P2=OE_Abort.
+**   *  OP_HaltIfNull with P1=SQLITE_CONSTRAINT and P2=OE_Abort.
+**   *  OP_Destroy
+**   *  OP_VUpdate
+**   *  OP_VRename
+**   *  OP_FkCounter with P2==0 (immediate foreign key constraint)
+**
+** Then check that the value of Parse.mayAbort is true if an
+** ABORT may be thrown, or false otherwise. Return true if it does
+** match, or false otherwise. This function is intended to be used as
+** part of an assert statement in the compiler. Similar to:
+**
+**   assert( sqlite3VdbeAssertMayAbort(pParse->pVdbe, pParse->mayAbort) );
+*/
+SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *v, int mayAbort){
+  int hasAbort = 0;
+  int hasFkCounter = 0;
+  Op *pOp;
+  VdbeOpIter sIter;
+  memset(&sIter, 0, sizeof(sIter));
+  sIter.v = v;
+
+  while( (pOp = opIterNext(&sIter))!=0 ){
+    int opcode = pOp->opcode;
+    if( opcode==OP_Destroy || opcode==OP_VUpdate || opcode==OP_VRename 
+     || ((opcode==OP_Halt || opcode==OP_HaltIfNull) 
+      && ((pOp->p1&0xff)==SQLITE_CONSTRAINT && pOp->p2==OE_Abort))
+    ){
+      hasAbort = 1;
+      break;
+    }
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+    if( opcode==OP_FkCounter && pOp->p1==0 && pOp->p2==1 ){
+      hasFkCounter = 1;
+    }
+#endif
+  }
+  sqlite3DbFree(v->db, sIter.apSub);
+
+  /* Return true if hasAbort==mayAbort. Or if a malloc failure occurred.
+  ** If malloc failed, then the while() loop above may not have iterated
+  ** through all opcodes and hasAbort may be set incorrectly. Return
+  ** true for this case to prevent the assert() in the callers frame
+  ** from failing.  */
+  return ( v->db->mallocFailed || hasAbort==mayAbort || hasFkCounter );
+}
+#endif /* SQLITE_DEBUG - the sqlite3AssertMayAbort() function */
+
+/*
+** Loop through the program looking for P2 values that are negative
+** on jump instructions.  Each such value is a label.  Resolve the
+** label by setting the P2 value to its correct non-zero value.
+**
+** This routine is called once after all opcodes have been inserted.
+**
+** Variable *pMaxFuncArgs is set to the maximum value of any P2 argument 
+** to an OP_Function, OP_AggStep or OP_VFilter opcode. This is used by 
+** sqlite3VdbeMakeReady() to size the Vdbe.apArg[] array.
+**
+** The Op.opflags field is set on all opcodes.
+*/
+static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){
+  int i;
+  int nMaxArgs = *pMaxFuncArgs;
+  Op *pOp;
+  Parse *pParse = p->pParse;
+  int *aLabel = pParse->aLabel;
+  p->readOnly = 1;
+  p->bIsReader = 0;
+  for(pOp=p->aOp, i=p->nOp-1; i>=0; i--, pOp++){
+    u8 opcode = pOp->opcode;
+
+    /* NOTE: Be sure to update mkopcodeh.awk when adding or removing
+    ** cases from this switch! */
+    switch( opcode ){
+      case OP_Function:
+      case OP_AggStep: {
+        if( pOp->p5>nMaxArgs ) nMaxArgs = pOp->p5;
+        break;
+      }
+      case OP_Transaction: {
+        if( pOp->p2!=0 ) p->readOnly = 0;
+        /* fall thru */
+      }
+      case OP_AutoCommit:
+      case OP_Savepoint: {
+        p->bIsReader = 1;
+        break;
+      }
+#ifndef SQLITE_OMIT_WAL
+      case OP_Checkpoint:
+#endif
+      case OP_Vacuum:
+      case OP_JournalMode: {
+        p->readOnly = 0;
+        p->bIsReader = 1;
+        break;
+      }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+      case OP_VUpdate: {
+        if( pOp->p2>nMaxArgs ) nMaxArgs = pOp->p2;
+        break;
+      }
+      case OP_VFilter: {
+        int n;
+        assert( p->nOp - i >= 3 );
+        assert( pOp[-1].opcode==OP_Integer );
+        n = pOp[-1].p1;
+        if( n>nMaxArgs ) nMaxArgs = n;
+        break;
+      }
+#endif
+      case OP_Next:
+      case OP_NextIfOpen:
+      case OP_SorterNext: {
+        pOp->p4.xAdvance = sqlite3BtreeNext;
+        pOp->p4type = P4_ADVANCE;
+        break;
+      }
+      case OP_Prev:
+      case OP_PrevIfOpen: {
+        pOp->p4.xAdvance = sqlite3BtreePrevious;
+        pOp->p4type = P4_ADVANCE;
+        break;
+      }
+    }
+
+    pOp->opflags = sqlite3OpcodeProperty[opcode];
+    if( (pOp->opflags & OPFLG_JUMP)!=0 && pOp->p2<0 ){
+      assert( -1-pOp->p2<pParse->nLabel );
+      pOp->p2 = aLabel[-1-pOp->p2];
+    }
+  }
+  sqlite3DbFree(p->db, pParse->aLabel);
+  pParse->aLabel = 0;
+  pParse->nLabel = 0;
+  *pMaxFuncArgs = nMaxArgs;
+  assert( p->bIsReader!=0 || DbMaskAllZero(p->btreeMask) );
+}
+
+/*
+** Return the address of the next instruction to be inserted.
+*/
+SQLITE_PRIVATE int sqlite3VdbeCurrentAddr(Vdbe *p){
+  assert( p->magic==VDBE_MAGIC_INIT );
+  return p->nOp;
+}
+
+/*
+** This function returns a pointer to the array of opcodes associated with
+** the Vdbe passed as the first argument. It is the callers responsibility
+** to arrange for the returned array to be eventually freed using the 
+** vdbeFreeOpArray() function.
+**
+** Before returning, *pnOp is set to the number of entries in the returned
+** array. Also, *pnMaxArg is set to the larger of its current value and 
+** the number of entries in the Vdbe.apArg[] array required to execute the 
+** returned program.
+*/
+SQLITE_PRIVATE VdbeOp *sqlite3VdbeTakeOpArray(Vdbe *p, int *pnOp, int *pnMaxArg){
+  VdbeOp *aOp = p->aOp;
+  assert( aOp && !p->db->mallocFailed );
+
+  /* Check that sqlite3VdbeUsesBtree() was not called on this VM */
+  assert( DbMaskAllZero(p->btreeMask) );
+
+  resolveP2Values(p, pnMaxArg);
+  *pnOp = p->nOp;
+  p->aOp = 0;
+  return aOp;
+}
+
+/*
+** Add a whole list of operations to the operation stack.  Return the
+** address of the first operation added.
+*/
+SQLITE_PRIVATE int sqlite3VdbeAddOpList(Vdbe *p, int nOp, VdbeOpList const *aOp, int iLineno){
+  int addr;
+  assert( p->magic==VDBE_MAGIC_INIT );
+  if( p->nOp + nOp > p->pParse->nOpAlloc && growOpArray(p, nOp) ){
+    return 0;
+  }
+  addr = p->nOp;
+  if( ALWAYS(nOp>0) ){
+    int i;
+    VdbeOpList const *pIn = aOp;
+    for(i=0; i<nOp; i++, pIn++){
+      int p2 = pIn->p2;
+      VdbeOp *pOut = &p->aOp[i+addr];
+      pOut->opcode = pIn->opcode;
+      pOut->p1 = pIn->p1;
+      if( p2<0 ){
+        assert( sqlite3OpcodeProperty[pOut->opcode] & OPFLG_JUMP );
+        pOut->p2 = addr + ADDR(p2);
+      }else{
+        pOut->p2 = p2;
+      }
+      pOut->p3 = pIn->p3;
+      pOut->p4type = P4_NOTUSED;
+      pOut->p4.p = 0;
+      pOut->p5 = 0;
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+      pOut->zComment = 0;
+#endif
+#ifdef SQLITE_VDBE_COVERAGE
+      pOut->iSrcLine = iLineno+i;
+#else
+      (void)iLineno;
+#endif
+#ifdef SQLITE_DEBUG
+      if( p->db->flags & SQLITE_VdbeAddopTrace ){
+        sqlite3VdbePrintOp(0, i+addr, &p->aOp[i+addr]);
+      }
+#endif
+    }
+    p->nOp += nOp;
+  }
+  return addr;
+}
+
+#if defined(SQLITE_ENABLE_STMT_SCANSTATUS)
+/*
+** Add an entry to the array of counters managed by sqlite3_stmt_scanstatus().
+*/
+SQLITE_PRIVATE void sqlite3VdbeScanStatus(
+  Vdbe *p,                        /* VM to add scanstatus() to */
+  int addrExplain,                /* Address of OP_Explain (or 0) */
+  int addrLoop,                   /* Address of loop counter */ 
+  int addrVisit,                  /* Address of rows visited counter */
+  LogEst nEst,                    /* Estimated number of output rows */
+  const char *zName               /* Name of table or index being scanned */
+){
+  int nByte = (p->nScan+1) * sizeof(ScanStatus);
+  ScanStatus *aNew;
+  aNew = (ScanStatus*)sqlite3DbRealloc(p->db, p->aScan, nByte);
+  if( aNew ){
+    ScanStatus *pNew = &aNew[p->nScan++];
+    pNew->addrExplain = addrExplain;
+    pNew->addrLoop = addrLoop;
+    pNew->addrVisit = addrVisit;
+    pNew->nEst = nEst;
+    pNew->zName = sqlite3DbStrDup(p->db, zName);
+    p->aScan = aNew;
+  }
+}
+#endif
+
+
+/*
+** Change the value of the P1 operand for a specific instruction.
+** This routine is useful when a large program is loaded from a
+** static array using sqlite3VdbeAddOpList but we want to make a
+** few minor changes to the program.
+*/
+SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe *p, u32 addr, int val){
+  assert( p!=0 );
+  if( ((u32)p->nOp)>addr ){
+    p->aOp[addr].p1 = val;
+  }
+}
+
+/*
+** Change the value of the P2 operand for a specific instruction.
+** This routine is useful for setting a jump destination.
+*/
+SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe *p, u32 addr, int val){
+  assert( p!=0 );
+  if( ((u32)p->nOp)>addr ){
+    p->aOp[addr].p2 = val;
+  }
+}
+
+/*
+** Change the value of the P3 operand for a specific instruction.
+*/
+SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe *p, u32 addr, int val){
+  assert( p!=0 );
+  if( ((u32)p->nOp)>addr ){
+    p->aOp[addr].p3 = val;
+  }
+}
+
+/*
+** Change the value of the P5 operand for the most recently
+** added operation.
+*/
+SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe *p, u8 val){
+  assert( p!=0 );
+  if( p->aOp ){
+    assert( p->nOp>0 );
+    p->aOp[p->nOp-1].p5 = val;
+  }
+}
+
+/*
+** Change the P2 operand of instruction addr so that it points to
+** the address of the next instruction to be coded.
+*/
+SQLITE_PRIVATE void sqlite3VdbeJumpHere(Vdbe *p, int addr){
+  sqlite3VdbeChangeP2(p, addr, p->nOp);
+  p->pParse->iFixedOp = p->nOp - 1;
+}
+
+
+/*
+** If the input FuncDef structure is ephemeral, then free it.  If
+** the FuncDef is not ephermal, then do nothing.
+*/
+static void freeEphemeralFunction(sqlite3 *db, FuncDef *pDef){
+  if( ALWAYS(pDef) && (pDef->funcFlags & SQLITE_FUNC_EPHEM)!=0 ){
+    sqlite3DbFree(db, pDef);
+  }
+}
+
+static void vdbeFreeOpArray(sqlite3 *, Op *, int);
+
+/*
+** Delete a P4 value if necessary.
+*/
+static void freeP4(sqlite3 *db, int p4type, void *p4){
+  if( p4 ){
+    assert( db );
+    switch( p4type ){
+      case P4_REAL:
+      case P4_INT64:
+      case P4_DYNAMIC:
+      case P4_INTARRAY: {
+        sqlite3DbFree(db, p4);
+        break;
+      }
+      case P4_KEYINFO: {
+        if( db->pnBytesFreed==0 ) sqlite3KeyInfoUnref((KeyInfo*)p4);
+        break;
+      }
+      case P4_MPRINTF: {
+        if( db->pnBytesFreed==0 ) sqlite3_free(p4);
+        break;
+      }
+      case P4_FUNCDEF: {
+        freeEphemeralFunction(db, (FuncDef*)p4);
+        break;
+      }
+      case P4_MEM: {
+        if( db->pnBytesFreed==0 ){
+          sqlite3ValueFree((sqlite3_value*)p4);
+        }else{
+          Mem *p = (Mem*)p4;
+          if( p->szMalloc ) sqlite3DbFree(db, p->zMalloc);
+          sqlite3DbFree(db, p);
+        }
+        break;
+      }
+      case P4_VTAB : {
+        if( db->pnBytesFreed==0 ) sqlite3VtabUnlock((VTable *)p4);
+        break;
+      }
+    }
+  }
+}
+
+/*
+** Free the space allocated for aOp and any p4 values allocated for the
+** opcodes contained within. If aOp is not NULL it is assumed to contain 
+** nOp entries. 
+*/
+static void vdbeFreeOpArray(sqlite3 *db, Op *aOp, int nOp){
+  if( aOp ){
+    Op *pOp;
+    for(pOp=aOp; pOp<&aOp[nOp]; pOp++){
+      freeP4(db, pOp->p4type, pOp->p4.p);
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+      sqlite3DbFree(db, pOp->zComment);
+#endif     
+    }
+  }
+  sqlite3DbFree(db, aOp);
+}
+
+/*
+** Link the SubProgram object passed as the second argument into the linked
+** list at Vdbe.pSubProgram. This list is used to delete all sub-program
+** objects when the VM is no longer required.
+*/
+SQLITE_PRIVATE void sqlite3VdbeLinkSubProgram(Vdbe *pVdbe, SubProgram *p){
+  p->pNext = pVdbe->pProgram;
+  pVdbe->pProgram = p;
+}
+
+/*
+** Change the opcode at addr into OP_Noop
+*/
+SQLITE_PRIVATE void sqlite3VdbeChangeToNoop(Vdbe *p, int addr){
+  if( addr<p->nOp ){
+    VdbeOp *pOp = &p->aOp[addr];
+    sqlite3 *db = p->db;
+    freeP4(db, pOp->p4type, pOp->p4.p);
+    memset(pOp, 0, sizeof(pOp[0]));
+    pOp->opcode = OP_Noop;
+    if( addr==p->nOp-1 ) p->nOp--;
+  }
+}
+
+/*
+** If the last opcode is "op" and it is not a jump destination,
+** then remove it.  Return true if and only if an opcode was removed.
+*/
+SQLITE_PRIVATE int sqlite3VdbeDeletePriorOpcode(Vdbe *p, u8 op){
+  if( (p->nOp-1)>(p->pParse->iFixedOp) && p->aOp[p->nOp-1].opcode==op ){
+    sqlite3VdbeChangeToNoop(p, p->nOp-1);
+    return 1;
+  }else{
+    return 0;
+  }
+}
+
+/*
+** Change the value of the P4 operand for a specific instruction.
+** This routine is useful when a large program is loaded from a
+** static array using sqlite3VdbeAddOpList but we want to make a
+** few minor changes to the program.
+**
+** If n>=0 then the P4 operand is dynamic, meaning that a copy of
+** the string is made into memory obtained from sqlite3_malloc().
+** A value of n==0 means copy bytes of zP4 up to and including the
+** first null byte.  If n>0 then copy n+1 bytes of zP4.
+** 
+** Other values of n (P4_STATIC, P4_COLLSEQ etc.) indicate that zP4 points
+** to a string or structure that is guaranteed to exist for the lifetime of
+** the Vdbe. In these cases we can just copy the pointer.
+**
+** If addr<0 then change P4 on the most recently inserted instruction.
+*/
+SQLITE_PRIVATE void sqlite3VdbeChangeP4(Vdbe *p, int addr, const char *zP4, int n){
+  Op *pOp;
+  sqlite3 *db;
+  assert( p!=0 );
+  db = p->db;
+  assert( p->magic==VDBE_MAGIC_INIT );
+  if( p->aOp==0 || db->mallocFailed ){
+    if( n!=P4_VTAB ){
+      freeP4(db, n, (void*)*(char**)&zP4);
+    }
+    return;
+  }
+  assert( p->nOp>0 );
+  assert( addr<p->nOp );
+  if( addr<0 ){
+    addr = p->nOp - 1;
+  }
+  pOp = &p->aOp[addr];
+  assert( pOp->p4type==P4_NOTUSED
+       || pOp->p4type==P4_INT32
+       || pOp->p4type==P4_KEYINFO );
+  freeP4(db, pOp->p4type, pOp->p4.p);
+  pOp->p4.p = 0;
+  if( n==P4_INT32 ){
+    /* Note: this cast is safe, because the origin data point was an int
+    ** that was cast to a (const char *). */
+    pOp->p4.i = SQLITE_PTR_TO_INT(zP4);
+    pOp->p4type = P4_INT32;
+  }else if( zP4==0 ){
+    pOp->p4.p = 0;
+    pOp->p4type = P4_NOTUSED;
+  }else if( n==P4_KEYINFO ){
+    pOp->p4.p = (void*)zP4;
+    pOp->p4type = P4_KEYINFO;
+  }else if( n==P4_VTAB ){
+    pOp->p4.p = (void*)zP4;
+    pOp->p4type = P4_VTAB;
+    sqlite3VtabLock((VTable *)zP4);
+    assert( ((VTable *)zP4)->db==p->db );
+  }else if( n<0 ){
+    pOp->p4.p = (void*)zP4;
+    pOp->p4type = (signed char)n;
+  }else{
+    if( n==0 ) n = sqlite3Strlen30(zP4);
+    pOp->p4.z = sqlite3DbStrNDup(p->db, zP4, n);
+    pOp->p4type = P4_DYNAMIC;
+  }
+}
+
+/*
+** Set the P4 on the most recently added opcode to the KeyInfo for the
+** index given.
+*/
+SQLITE_PRIVATE void sqlite3VdbeSetP4KeyInfo(Parse *pParse, Index *pIdx){
+  Vdbe *v = pParse->pVdbe;
+  assert( v!=0 );
+  assert( pIdx!=0 );
+  sqlite3VdbeChangeP4(v, -1, (char*)sqlite3KeyInfoOfIndex(pParse, pIdx),
+                      P4_KEYINFO);
+}
+
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+/*
+** Change the comment on the most recently coded instruction.  Or
+** insert a No-op and add the comment to that new instruction.  This
+** makes the code easier to read during debugging.  None of this happens
+** in a production build.
+*/
+static void vdbeVComment(Vdbe *p, const char *zFormat, va_list ap){
+  assert( p->nOp>0 || p->aOp==0 );
+  assert( p->aOp==0 || p->aOp[p->nOp-1].zComment==0 || p->db->mallocFailed );
+  if( p->nOp ){
+    assert( p->aOp );
+    sqlite3DbFree(p->db, p->aOp[p->nOp-1].zComment);
+    p->aOp[p->nOp-1].zComment = sqlite3VMPrintf(p->db, zFormat, ap);
+  }
+}
+SQLITE_PRIVATE void sqlite3VdbeComment(Vdbe *p, const char *zFormat, ...){
+  va_list ap;
+  if( p ){
+    va_start(ap, zFormat);
+    vdbeVComment(p, zFormat, ap);
+    va_end(ap);
+  }
+}
+SQLITE_PRIVATE void sqlite3VdbeNoopComment(Vdbe *p, const char *zFormat, ...){
+  va_list ap;
+  if( p ){
+    sqlite3VdbeAddOp0(p, OP_Noop);
+    va_start(ap, zFormat);
+    vdbeVComment(p, zFormat, ap);
+    va_end(ap);
+  }
+}
+#endif  /* NDEBUG */
+
+#ifdef SQLITE_VDBE_COVERAGE
+/*
+** Set the value if the iSrcLine field for the previously coded instruction.
+*/
+SQLITE_PRIVATE void sqlite3VdbeSetLineNumber(Vdbe *v, int iLine){
+  sqlite3VdbeGetOp(v,-1)->iSrcLine = iLine;
+}
+#endif /* SQLITE_VDBE_COVERAGE */
+
+/*
+** Return the opcode for a given address.  If the address is -1, then
+** return the most recently inserted opcode.
+**
+** If a memory allocation error has occurred prior to the calling of this
+** routine, then a pointer to a dummy VdbeOp will be returned.  That opcode
+** is readable but not writable, though it is cast to a writable value.
+** The return of a dummy opcode allows the call to continue functioning
+** after an OOM fault without having to check to see if the return from 
+** this routine is a valid pointer.  But because the dummy.opcode is 0,
+** dummy will never be written to.  This is verified by code inspection and
+** by running with Valgrind.
+*/
+SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe *p, int addr){
+  /* C89 specifies that the constant "dummy" will be initialized to all
+  ** zeros, which is correct.  MSVC generates a warning, nevertheless. */
+  static VdbeOp dummy;  /* Ignore the MSVC warning about no initializer */
+  assert( p->magic==VDBE_MAGIC_INIT );
+  if( addr<0 ){
+    addr = p->nOp - 1;
+  }
+  assert( (addr>=0 && addr<p->nOp) || p->db->mallocFailed );
+  if( p->db->mallocFailed ){
+    return (VdbeOp*)&dummy;
+  }else{
+    return &p->aOp[addr];
+  }
+}
+
+#if defined(SQLITE_ENABLE_EXPLAIN_COMMENTS)
+/*
+** Return an integer value for one of the parameters to the opcode pOp
+** determined by character c.
+*/
+static int translateP(char c, const Op *pOp){
+  if( c=='1' ) return pOp->p1;
+  if( c=='2' ) return pOp->p2;
+  if( c=='3' ) return pOp->p3;
+  if( c=='4' ) return pOp->p4.i;
+  return pOp->p5;
+}
+
+/*
+** Compute a string for the "comment" field of a VDBE opcode listing.
+**
+** The Synopsis: field in comments in the vdbe.c source file gets converted
+** to an extra string that is appended to the sqlite3OpcodeName().  In the
+** absence of other comments, this synopsis becomes the comment on the opcode.
+** Some translation occurs:
+**
+**       "PX"      ->  "r[X]"
+**       "PX@PY"   ->  "r[X..X+Y-1]"  or "r[x]" if y is 0 or 1
+**       "PX@PY+1" ->  "r[X..X+Y]"    or "r[x]" if y is 0
+**       "PY..PY"  ->  "r[X..Y]"      or "r[x]" if y<=x
+*/
+static int displayComment(
+  const Op *pOp,     /* The opcode to be commented */
+  const char *zP4,   /* Previously obtained value for P4 */
+  char *zTemp,       /* Write result here */
+  int nTemp          /* Space available in zTemp[] */
+){
+  const char *zOpName;
+  const char *zSynopsis;
+  int nOpName;
+  int ii, jj;
+  zOpName = sqlite3OpcodeName(pOp->opcode);
+  nOpName = sqlite3Strlen30(zOpName);
+  if( zOpName[nOpName+1] ){
+    int seenCom = 0;
+    char c;
+    zSynopsis = zOpName += nOpName + 1;
+    for(ii=jj=0; jj<nTemp-1 && (c = zSynopsis[ii])!=0; ii++){
+      if( c=='P' ){
+        c = zSynopsis[++ii];
+        if( c=='4' ){
+          sqlite3_snprintf(nTemp-jj, zTemp+jj, "%s", zP4);
+        }else if( c=='X' ){
+          sqlite3_snprintf(nTemp-jj, zTemp+jj, "%s", pOp->zComment);
+          seenCom = 1;
+        }else{
+          int v1 = translateP(c, pOp);
+          int v2;
+          sqlite3_snprintf(nTemp-jj, zTemp+jj, "%d", v1);
+          if( strncmp(zSynopsis+ii+1, "@P", 2)==0 ){
+            ii += 3;
+            jj += sqlite3Strlen30(zTemp+jj);
+            v2 = translateP(zSynopsis[ii], pOp);
+            if( strncmp(zSynopsis+ii+1,"+1",2)==0 ){
+              ii += 2;
+              v2++;
+            }
+            if( v2>1 ){
+              sqlite3_snprintf(nTemp-jj, zTemp+jj, "..%d", v1+v2-1);
+            }
+          }else if( strncmp(zSynopsis+ii+1, "..P3", 4)==0 && pOp->p3==0 ){
+            ii += 4;
+          }
+        }
+        jj += sqlite3Strlen30(zTemp+jj);
+      }else{
+        zTemp[jj++] = c;
+      }
+    }
+    if( !seenCom && jj<nTemp-5 && pOp->zComment ){
+      sqlite3_snprintf(nTemp-jj, zTemp+jj, "; %s", pOp->zComment);
+      jj += sqlite3Strlen30(zTemp+jj);
+    }
+    if( jj<nTemp ) zTemp[jj] = 0;
+  }else if( pOp->zComment ){
+    sqlite3_snprintf(nTemp, zTemp, "%s", pOp->zComment);
+    jj = sqlite3Strlen30(zTemp);
+  }else{
+    zTemp[0] = 0;
+    jj = 0;
+  }
+  return jj;
+}
+#endif /* SQLITE_DEBUG */
+
+
+#if !defined(SQLITE_OMIT_EXPLAIN) || !defined(NDEBUG) \
+     || defined(VDBE_PROFILE) || defined(SQLITE_DEBUG)
+/*
+** Compute a string that describes the P4 parameter for an opcode.
+** Use zTemp for any required temporary buffer space.
+*/
+static char *displayP4(Op *pOp, char *zTemp, int nTemp){
+  char *zP4 = zTemp;
+  assert( nTemp>=20 );
+  switch( pOp->p4type ){
+    case P4_KEYINFO: {
+      int i, j;
+      KeyInfo *pKeyInfo = pOp->p4.pKeyInfo;
+      assert( pKeyInfo->aSortOrder!=0 );
+      sqlite3_snprintf(nTemp, zTemp, "k(%d", pKeyInfo->nField);
+      i = sqlite3Strlen30(zTemp);
+      for(j=0; j<pKeyInfo->nField; j++){
+        CollSeq *pColl = pKeyInfo->aColl[j];
+        const char *zColl = pColl ? pColl->zName : "nil";
+        int n = sqlite3Strlen30(zColl);
+        if( n==6 && memcmp(zColl,"BINARY",6)==0 ){
+          zColl = "B";
+          n = 1;
+        }
+        if( i+n>nTemp-6 ){
+          memcpy(&zTemp[i],",...",4);
+          break;
+        }
+        zTemp[i++] = ',';
+        if( pKeyInfo->aSortOrder[j] ){
+          zTemp[i++] = '-';
+        }
+        memcpy(&zTemp[i], zColl, n+1);
+        i += n;
+      }
+      zTemp[i++] = ')';
+      zTemp[i] = 0;
+      assert( i<nTemp );
+      break;
+    }
+    case P4_COLLSEQ: {
+      CollSeq *pColl = pOp->p4.pColl;
+      sqlite3_snprintf(nTemp, zTemp, "(%.20s)", pColl->zName);
+      break;
+    }
+    case P4_FUNCDEF: {
+      FuncDef *pDef = pOp->p4.pFunc;
+      sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg);
+      break;
+    }
+    case P4_INT64: {
+      sqlite3_snprintf(nTemp, zTemp, "%lld", *pOp->p4.pI64);
+      break;
+    }
+    case P4_INT32: {
+      sqlite3_snprintf(nTemp, zTemp, "%d", pOp->p4.i);
+      break;
+    }
+    case P4_REAL: {
+      sqlite3_snprintf(nTemp, zTemp, "%.16g", *pOp->p4.pReal);
+      break;
+    }
+    case P4_MEM: {
+      Mem *pMem = pOp->p4.pMem;
+      if( pMem->flags & MEM_Str ){
+        zP4 = pMem->z;
+      }else if( pMem->flags & MEM_Int ){
+        sqlite3_snprintf(nTemp, zTemp, "%lld", pMem->u.i);
+      }else if( pMem->flags & MEM_Real ){
+        sqlite3_snprintf(nTemp, zTemp, "%.16g", pMem->u.r);
+      }else if( pMem->flags & MEM_Null ){
+        sqlite3_snprintf(nTemp, zTemp, "NULL");
+      }else{
+        assert( pMem->flags & MEM_Blob );
+        zP4 = "(blob)";
+      }
+      break;
+    }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    case P4_VTAB: {
+      sqlite3_vtab *pVtab = pOp->p4.pVtab->pVtab;
+      sqlite3_snprintf(nTemp, zTemp, "vtab:%p:%p", pVtab, pVtab->pModule);
+      break;
+    }
+#endif
+    case P4_INTARRAY: {
+      sqlite3_snprintf(nTemp, zTemp, "intarray");
+      break;
+    }
+    case P4_SUBPROGRAM: {
+      sqlite3_snprintf(nTemp, zTemp, "program");
+      break;
+    }
+    case P4_ADVANCE: {
+      zTemp[0] = 0;
+      break;
+    }
+    default: {
+      zP4 = pOp->p4.z;
+      if( zP4==0 ){
+        zP4 = zTemp;
+        zTemp[0] = 0;
+      }
+    }
+  }
+  assert( zP4!=0 );
+  return zP4;
+}
+#endif
+
+/*
+** Declare to the Vdbe that the BTree object at db->aDb[i] is used.
+**
+** The prepared statements need to know in advance the complete set of
+** attached databases that will be use.  A mask of these databases
+** is maintained in p->btreeMask.  The p->lockMask value is the subset of
+** p->btreeMask of databases that will require a lock.
+*/
+SQLITE_PRIVATE void sqlite3VdbeUsesBtree(Vdbe *p, int i){
+  assert( i>=0 && i<p->db->nDb && i<(int)sizeof(yDbMask)*8 );
+  assert( i<(int)sizeof(p->btreeMask)*8 );
+  DbMaskSet(p->btreeMask, i);
+  if( i!=1 && sqlite3BtreeSharable(p->db->aDb[i].pBt) ){
+    DbMaskSet(p->lockMask, i);
+  }
+}
+
+#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
+/*
+** If SQLite is compiled to support shared-cache mode and to be threadsafe,
+** this routine obtains the mutex associated with each BtShared structure
+** that may be accessed by the VM passed as an argument. In doing so it also
+** sets the BtShared.db member of each of the BtShared structures, ensuring
+** that the correct busy-handler callback is invoked if required.
+**
+** If SQLite is not threadsafe but does support shared-cache mode, then
+** sqlite3BtreeEnter() is invoked to set the BtShared.db variables
+** of all of BtShared structures accessible via the database handle 
+** associated with the VM.
+**
+** If SQLite is not threadsafe and does not support shared-cache mode, this
+** function is a no-op.
+**
+** The p->btreeMask field is a bitmask of all btrees that the prepared 
+** statement p will ever use.  Let N be the number of bits in p->btreeMask
+** corresponding to btrees that use shared cache.  Then the runtime of
+** this routine is N*N.  But as N is rarely more than 1, this should not
+** be a problem.
+*/
+SQLITE_PRIVATE void sqlite3VdbeEnter(Vdbe *p){
+  int i;
+  sqlite3 *db;
+  Db *aDb;
+  int nDb;
+  if( DbMaskAllZero(p->lockMask) ) return;  /* The common case */
+  db = p->db;
+  aDb = db->aDb;
+  nDb = db->nDb;
+  for(i=0; i<nDb; i++){
+    if( i!=1 && DbMaskTest(p->lockMask,i) && ALWAYS(aDb[i].pBt!=0) ){
+      sqlite3BtreeEnter(aDb[i].pBt);
+    }
+  }
+}
+#endif
+
+#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
+/*
+** Unlock all of the btrees previously locked by a call to sqlite3VdbeEnter().
+*/
+SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe *p){
+  int i;
+  sqlite3 *db;
+  Db *aDb;
+  int nDb;
+  if( DbMaskAllZero(p->lockMask) ) return;  /* The common case */
+  db = p->db;
+  aDb = db->aDb;
+  nDb = db->nDb;
+  for(i=0; i<nDb; i++){
+    if( i!=1 && DbMaskTest(p->lockMask,i) && ALWAYS(aDb[i].pBt!=0) ){
+      sqlite3BtreeLeave(aDb[i].pBt);
+    }
+  }
+}
+#endif
+
+#if defined(VDBE_PROFILE) || defined(SQLITE_DEBUG)
+/*
+** Print a single opcode.  This routine is used for debugging only.
+*/
+SQLITE_PRIVATE void sqlite3VdbePrintOp(FILE *pOut, int pc, Op *pOp){
+  char *zP4;
+  char zPtr[50];
+  char zCom[100];
+  static const char *zFormat1 = "%4d %-13s %4d %4d %4d %-13s %.2X %s\n";
+  if( pOut==0 ) pOut = stdout;
+  zP4 = displayP4(pOp, zPtr, sizeof(zPtr));
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+  displayComment(pOp, zP4, zCom, sizeof(zCom));
+#else
+  zCom[0] = 0;
+#endif
+  /* NB:  The sqlite3OpcodeName() function is implemented by code created
+  ** by the mkopcodeh.awk and mkopcodec.awk scripts which extract the
+  ** information from the vdbe.c source text */
+  fprintf(pOut, zFormat1, pc, 
+      sqlite3OpcodeName(pOp->opcode), pOp->p1, pOp->p2, pOp->p3, zP4, pOp->p5,
+      zCom
+  );
+  fflush(pOut);
+}
+#endif
+
+/*
+** Release an array of N Mem elements
+*/
+static void releaseMemArray(Mem *p, int N){
+  if( p && N ){
+    Mem *pEnd = &p[N];
+    sqlite3 *db = p->db;
+    u8 malloc_failed = db->mallocFailed;
+    if( db->pnBytesFreed ){
+      do{
+        if( p->szMalloc ) sqlite3DbFree(db, p->zMalloc);
+      }while( (++p)<pEnd );
+      return;
+    }
+    do{
+      assert( (&p[1])==pEnd || p[0].db==p[1].db );
+      assert( sqlite3VdbeCheckMemInvariants(p) );
+
+      /* This block is really an inlined version of sqlite3VdbeMemRelease()
+      ** that takes advantage of the fact that the memory cell value is 
+      ** being set to NULL after releasing any dynamic resources.
+      **
+      ** The justification for duplicating code is that according to 
+      ** callgrind, this causes a certain test case to hit the CPU 4.7 
+      ** percent less (x86 linux, gcc version 4.1.2, -O6) than if 
+      ** sqlite3MemRelease() were called from here. With -O2, this jumps
+      ** to 6.6 percent. The test case is inserting 1000 rows into a table 
+      ** with no indexes using a single prepared INSERT statement, bind() 
+      ** and reset(). Inserts are grouped into a transaction.
+      */
+      testcase( p->flags & MEM_Agg );
+      testcase( p->flags & MEM_Dyn );
+      testcase( p->flags & MEM_Frame );
+      testcase( p->flags & MEM_RowSet );
+      if( p->flags&(MEM_Agg|MEM_Dyn|MEM_Frame|MEM_RowSet) ){
+        sqlite3VdbeMemRelease(p);
+      }else if( p->szMalloc ){
+        sqlite3DbFree(db, p->zMalloc);
+        p->szMalloc = 0;
+      }
+
+      p->flags = MEM_Undefined;
+    }while( (++p)<pEnd );
+    db->mallocFailed = malloc_failed;
+  }
+}
+
+/*
+** Delete a VdbeFrame object and its contents. VdbeFrame objects are
+** allocated by the OP_Program opcode in sqlite3VdbeExec().
+*/
+SQLITE_PRIVATE void sqlite3VdbeFrameDelete(VdbeFrame *p){
+  int i;
+  Mem *aMem = VdbeFrameMem(p);
+  VdbeCursor **apCsr = (VdbeCursor **)&aMem[p->nChildMem];
+  for(i=0; i<p->nChildCsr; i++){
+    sqlite3VdbeFreeCursor(p->v, apCsr[i]);
+  }
+  releaseMemArray(aMem, p->nChildMem);
+  sqlite3DbFree(p->v->db, p);
+}
+
+#ifndef SQLITE_OMIT_EXPLAIN
+/*
+** Give a listing of the program in the virtual machine.
+**
+** The interface is the same as sqlite3VdbeExec().  But instead of
+** running the code, it invokes the callback once for each instruction.
+** This feature is used to implement "EXPLAIN".
+**
+** When p->explain==1, each instruction is listed.  When
+** p->explain==2, only OP_Explain instructions are listed and these
+** are shown in a different format.  p->explain==2 is used to implement
+** EXPLAIN QUERY PLAN.
+**
+** When p->explain==1, first the main program is listed, then each of
+** the trigger subprograms are listed one by one.
+*/
+SQLITE_PRIVATE int sqlite3VdbeList(
+  Vdbe *p                   /* The VDBE */
+){
+  int nRow;                            /* Stop when row count reaches this */
+  int nSub = 0;                        /* Number of sub-vdbes seen so far */
+  SubProgram **apSub = 0;              /* Array of sub-vdbes */
+  Mem *pSub = 0;                       /* Memory cell hold array of subprogs */
+  sqlite3 *db = p->db;                 /* The database connection */
+  int i;                               /* Loop counter */
+  int rc = SQLITE_OK;                  /* Return code */
+  Mem *pMem = &p->aMem[1];             /* First Mem of result set */
+
+  assert( p->explain );
+  assert( p->magic==VDBE_MAGIC_RUN );
+  assert( p->rc==SQLITE_OK || p->rc==SQLITE_BUSY || p->rc==SQLITE_NOMEM );
+
+  /* Even though this opcode does not use dynamic strings for
+  ** the result, result columns may become dynamic if the user calls
+  ** sqlite3_column_text16(), causing a translation to UTF-16 encoding.
+  */
+  releaseMemArray(pMem, 8);
+  p->pResultSet = 0;
+
+  if( p->rc==SQLITE_NOMEM ){
+    /* This happens if a malloc() inside a call to sqlite3_column_text() or
+    ** sqlite3_column_text16() failed.  */
+    db->mallocFailed = 1;
+    return SQLITE_ERROR;
+  }
+
+  /* When the number of output rows reaches nRow, that means the
+  ** listing has finished and sqlite3_step() should return SQLITE_DONE.
+  ** nRow is the sum of the number of rows in the main program, plus
+  ** the sum of the number of rows in all trigger subprograms encountered
+  ** so far.  The nRow value will increase as new trigger subprograms are
+  ** encountered, but p->pc will eventually catch up to nRow.
+  */
+  nRow = p->nOp;
+  if( p->explain==1 ){
+    /* The first 8 memory cells are used for the result set.  So we will
+    ** commandeer the 9th cell to use as storage for an array of pointers
+    ** to trigger subprograms.  The VDBE is guaranteed to have at least 9
+    ** cells.  */
+    assert( p->nMem>9 );
+    pSub = &p->aMem[9];
+    if( pSub->flags&MEM_Blob ){
+      /* On the first call to sqlite3_step(), pSub will hold a NULL.  It is
+      ** initialized to a BLOB by the P4_SUBPROGRAM processing logic below */
+      nSub = pSub->n/sizeof(Vdbe*);
+      apSub = (SubProgram **)pSub->z;
+    }
+    for(i=0; i<nSub; i++){
+      nRow += apSub[i]->nOp;
+    }
+  }
+
+  do{
+    i = p->pc++;
+  }while( i<nRow && p->explain==2 && p->aOp[i].opcode!=OP_Explain );
+  if( i>=nRow ){
+    p->rc = SQLITE_OK;
+    rc = SQLITE_DONE;
+  }else if( db->u1.isInterrupted ){
+    p->rc = SQLITE_INTERRUPT;
+    rc = SQLITE_ERROR;
+    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(p->rc));
+  }else{
+    char *zP4;
+    Op *pOp;
+    if( i<p->nOp ){
+      /* The output line number is small enough that we are still in the
+      ** main program. */
+      pOp = &p->aOp[i];
+    }else{
+      /* We are currently listing subprograms.  Figure out which one and
+      ** pick up the appropriate opcode. */
+      int j;
+      i -= p->nOp;
+      for(j=0; i>=apSub[j]->nOp; j++){
+        i -= apSub[j]->nOp;
+      }
+      pOp = &apSub[j]->aOp[i];
+    }
+    if( p->explain==1 ){
+      pMem->flags = MEM_Int;
+      pMem->u.i = i;                                /* Program counter */
+      pMem++;
+  
+      pMem->flags = MEM_Static|MEM_Str|MEM_Term;
+      pMem->z = (char*)sqlite3OpcodeName(pOp->opcode); /* Opcode */
+      assert( pMem->z!=0 );
+      pMem->n = sqlite3Strlen30(pMem->z);
+      pMem->enc = SQLITE_UTF8;
+      pMem++;
+
+      /* When an OP_Program opcode is encounter (the only opcode that has
+      ** a P4_SUBPROGRAM argument), expand the size of the array of subprograms
+      ** kept in p->aMem[9].z to hold the new program - assuming this subprogram
+      ** has not already been seen.
+      */
+      if( pOp->p4type==P4_SUBPROGRAM ){
+        int nByte = (nSub+1)*sizeof(SubProgram*);
+        int j;
+        for(j=0; j<nSub; j++){
+          if( apSub[j]==pOp->p4.pProgram ) break;
+        }
+        if( j==nSub && SQLITE_OK==sqlite3VdbeMemGrow(pSub, nByte, nSub!=0) ){
+          apSub = (SubProgram **)pSub->z;
+          apSub[nSub++] = pOp->p4.pProgram;
+          pSub->flags |= MEM_Blob;
+          pSub->n = nSub*sizeof(SubProgram*);
+        }
+      }
+    }
+
+    pMem->flags = MEM_Int;
+    pMem->u.i = pOp->p1;                          /* P1 */
+    pMem++;
+
+    pMem->flags = MEM_Int;
+    pMem->u.i = pOp->p2;                          /* P2 */
+    pMem++;
+
+    pMem->flags = MEM_Int;
+    pMem->u.i = pOp->p3;                          /* P3 */
+    pMem++;
+
+    if( sqlite3VdbeMemClearAndResize(pMem, 32) ){ /* P4 */
+      assert( p->db->mallocFailed );
+      return SQLITE_ERROR;
+    }
+    pMem->flags = MEM_Str|MEM_Term;
+    zP4 = displayP4(pOp, pMem->z, 32);
+    if( zP4!=pMem->z ){
+      sqlite3VdbeMemSetStr(pMem, zP4, -1, SQLITE_UTF8, 0);
+    }else{
+      assert( pMem->z!=0 );
+      pMem->n = sqlite3Strlen30(pMem->z);
+      pMem->enc = SQLITE_UTF8;
+    }
+    pMem++;
+
+    if( p->explain==1 ){
+      if( sqlite3VdbeMemClearAndResize(pMem, 4) ){
+        assert( p->db->mallocFailed );
+        return SQLITE_ERROR;
+      }
+      pMem->flags = MEM_Str|MEM_Term;
+      pMem->n = 2;
+      sqlite3_snprintf(3, pMem->z, "%.2x", pOp->p5);   /* P5 */
+      pMem->enc = SQLITE_UTF8;
+      pMem++;
+  
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+      if( sqlite3VdbeMemClearAndResize(pMem, 500) ){
+        assert( p->db->mallocFailed );
+        return SQLITE_ERROR;
+      }
+      pMem->flags = MEM_Str|MEM_Term;
+      pMem->n = displayComment(pOp, zP4, pMem->z, 500);
+      pMem->enc = SQLITE_UTF8;
+#else
+      pMem->flags = MEM_Null;                       /* Comment */
+#endif
+    }
+
+    p->nResColumn = 8 - 4*(p->explain-1);
+    p->pResultSet = &p->aMem[1];
+    p->rc = SQLITE_OK;
+    rc = SQLITE_ROW;
+  }
+  return rc;
+}
+#endif /* SQLITE_OMIT_EXPLAIN */
+
+#ifdef SQLITE_DEBUG
+/*
+** Print the SQL that was used to generate a VDBE program.
+*/
+SQLITE_PRIVATE void sqlite3VdbePrintSql(Vdbe *p){
+  const char *z = 0;
+  if( p->zSql ){
+    z = p->zSql;
+  }else if( p->nOp>=1 ){
+    const VdbeOp *pOp = &p->aOp[0];
+    if( pOp->opcode==OP_Init && pOp->p4.z!=0 ){
+      z = pOp->p4.z;
+      while( sqlite3Isspace(*z) ) z++;
+    }
+  }
+  if( z ) printf("SQL: [%s]\n", z);
+}
+#endif
+
+#if !defined(SQLITE_OMIT_TRACE) && defined(SQLITE_ENABLE_IOTRACE)
+/*
+** Print an IOTRACE message showing SQL content.
+*/
+SQLITE_PRIVATE void sqlite3VdbeIOTraceSql(Vdbe *p){
+  int nOp = p->nOp;
+  VdbeOp *pOp;
+  if( sqlite3IoTrace==0 ) return;
+  if( nOp<1 ) return;
+  pOp = &p->aOp[0];
+  if( pOp->opcode==OP_Init && pOp->p4.z!=0 ){
+    int i, j;
+    char z[1000];
+    sqlite3_snprintf(sizeof(z), z, "%s", pOp->p4.z);
+    for(i=0; sqlite3Isspace(z[i]); i++){}
+    for(j=0; z[i]; i++){
+      if( sqlite3Isspace(z[i]) ){
+        if( z[i-1]!=' ' ){
+          z[j++] = ' ';
+        }
+      }else{
+        z[j++] = z[i];
+      }
+    }
+    z[j] = 0;
+    sqlite3IoTrace("SQL %s\n", z);
+  }
+}
+#endif /* !SQLITE_OMIT_TRACE && SQLITE_ENABLE_IOTRACE */
+
+/*
+** Allocate space from a fixed size buffer and return a pointer to
+** that space.  If insufficient space is available, return NULL.
+**
+** The pBuf parameter is the initial value of a pointer which will
+** receive the new memory.  pBuf is normally NULL.  If pBuf is not
+** NULL, it means that memory space has already been allocated and that
+** this routine should not allocate any new memory.  When pBuf is not
+** NULL simply return pBuf.  Only allocate new memory space when pBuf
+** is NULL.
+**
+** nByte is the number of bytes of space needed.
+**
+** *ppFrom points to available space and pEnd points to the end of the
+** available space.  When space is allocated, *ppFrom is advanced past
+** the end of the allocated space.
+**
+** *pnByte is a counter of the number of bytes of space that have failed
+** to allocate.  If there is insufficient space in *ppFrom to satisfy the
+** request, then increment *pnByte by the amount of the request.
+*/
+static void *allocSpace(
+  void *pBuf,          /* Where return pointer will be stored */
+  int nByte,           /* Number of bytes to allocate */
+  u8 **ppFrom,         /* IN/OUT: Allocate from *ppFrom */
+  u8 *pEnd,            /* Pointer to 1 byte past the end of *ppFrom buffer */
+  int *pnByte          /* If allocation cannot be made, increment *pnByte */
+){
+  assert( EIGHT_BYTE_ALIGNMENT(*ppFrom) );
+  if( pBuf ) return pBuf;
+  nByte = ROUND8(nByte);
+  if( &(*ppFrom)[nByte] <= pEnd ){
+    pBuf = (void*)*ppFrom;
+    *ppFrom += nByte;
+  }else{
+    *pnByte += nByte;
+  }
+  return pBuf;
+}
+
+/*
+** Rewind the VDBE back to the beginning in preparation for
+** running it.
+*/
+SQLITE_PRIVATE void sqlite3VdbeRewind(Vdbe *p){
+#if defined(SQLITE_DEBUG) || defined(VDBE_PROFILE)
+  int i;
+#endif
+  assert( p!=0 );
+  assert( p->magic==VDBE_MAGIC_INIT );
+
+  /* There should be at least one opcode.
+  */
+  assert( p->nOp>0 );
+
+  /* Set the magic to VDBE_MAGIC_RUN sooner rather than later. */
+  p->magic = VDBE_MAGIC_RUN;
+
+#ifdef SQLITE_DEBUG
+  for(i=1; i<p->nMem; i++){
+    assert( p->aMem[i].db==p->db );
+  }
+#endif
+  p->pc = -1;
+  p->rc = SQLITE_OK;
+  p->errorAction = OE_Abort;
+  p->magic = VDBE_MAGIC_RUN;
+  p->nChange = 0;
+  p->cacheCtr = 1;
+  p->minWriteFileFormat = 255;
+  p->iStatement = 0;
+  p->nFkConstraint = 0;
+#ifdef VDBE_PROFILE
+  for(i=0; i<p->nOp; i++){
+    p->aOp[i].cnt = 0;
+    p->aOp[i].cycles = 0;
+  }
+#endif
+}
+
+/*
+** Prepare a virtual machine for execution for the first time after
+** creating the virtual machine.  This involves things such
+** as allocating registers and initializing the program counter.
+** After the VDBE has be prepped, it can be executed by one or more
+** calls to sqlite3VdbeExec().  
+**
+** This function may be called exactly once on each virtual machine.
+** After this routine is called the VM has been "packaged" and is ready
+** to run.  After this routine is called, further calls to 
+** sqlite3VdbeAddOp() functions are prohibited.  This routine disconnects
+** the Vdbe from the Parse object that helped generate it so that the
+** the Vdbe becomes an independent entity and the Parse object can be
+** destroyed.
+**
+** Use the sqlite3VdbeRewind() procedure to restore a virtual machine back
+** to its initial state after it has been run.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMakeReady(
+  Vdbe *p,                       /* The VDBE */
+  Parse *pParse                  /* Parsing context */
+){
+  sqlite3 *db;                   /* The database connection */
+  int nVar;                      /* Number of parameters */
+  int nMem;                      /* Number of VM memory registers */
+  int nCursor;                   /* Number of cursors required */
+  int nArg;                      /* Number of arguments in subprograms */
+  int nOnce;                     /* Number of OP_Once instructions */
+  int n;                         /* Loop counter */
+  u8 *zCsr;                      /* Memory available for allocation */
+  u8 *zEnd;                      /* First byte past allocated memory */
+  int nByte;                     /* How much extra memory is needed */
+
+  assert( p!=0 );
+  assert( p->nOp>0 );
+  assert( pParse!=0 );
+  assert( p->magic==VDBE_MAGIC_INIT );
+  assert( pParse==p->pParse );
+  db = p->db;
+  assert( db->mallocFailed==0 );
+  nVar = pParse->nVar;
+  nMem = pParse->nMem;
+  nCursor = pParse->nTab;
+  nArg = pParse->nMaxArg;
+  nOnce = pParse->nOnce;
+  if( nOnce==0 ) nOnce = 1; /* Ensure at least one byte in p->aOnceFlag[] */
+  
+  /* For each cursor required, also allocate a memory cell. Memory
+  ** cells (nMem+1-nCursor)..nMem, inclusive, will never be used by
+  ** the vdbe program. Instead they are used to allocate space for
+  ** VdbeCursor/BtCursor structures. The blob of memory associated with 
+  ** cursor 0 is stored in memory cell nMem. Memory cell (nMem-1)
+  ** stores the blob of memory associated with cursor 1, etc.
+  **
+  ** See also: allocateCursor().
+  */
+  nMem += nCursor;
+
+  /* Allocate space for memory registers, SQL variables, VDBE cursors and 
+  ** an array to marshal SQL function arguments in.
+  */
+  zCsr = (u8*)&p->aOp[p->nOp];            /* Memory avaliable for allocation */
+  zEnd = (u8*)&p->aOp[pParse->nOpAlloc];  /* First byte past end of zCsr[] */
+
+  resolveP2Values(p, &nArg);
+  p->usesStmtJournal = (u8)(pParse->isMultiWrite && pParse->mayAbort);
+  if( pParse->explain && nMem<10 ){
+    nMem = 10;
+  }
+  memset(zCsr, 0, zEnd-zCsr);
+  zCsr += (zCsr - (u8*)0)&7;
+  assert( EIGHT_BYTE_ALIGNMENT(zCsr) );
+  p->expired = 0;
+
+  /* Memory for registers, parameters, cursor, etc, is allocated in two
+  ** passes.  On the first pass, we try to reuse unused space at the 
+  ** end of the opcode array.  If we are unable to satisfy all memory
+  ** requirements by reusing the opcode array tail, then the second
+  ** pass will fill in the rest using a fresh allocation.  
+  **
+  ** This two-pass approach that reuses as much memory as possible from
+  ** the leftover space at the end of the opcode array can significantly
+  ** reduce the amount of memory held by a prepared statement.
+  */
+  do {
+    nByte = 0;
+    p->aMem = allocSpace(p->aMem, nMem*sizeof(Mem), &zCsr, zEnd, &nByte);
+    p->aVar = allocSpace(p->aVar, nVar*sizeof(Mem), &zCsr, zEnd, &nByte);
+    p->apArg = allocSpace(p->apArg, nArg*sizeof(Mem*), &zCsr, zEnd, &nByte);
+    p->azVar = allocSpace(p->azVar, nVar*sizeof(char*), &zCsr, zEnd, &nByte);
+    p->apCsr = allocSpace(p->apCsr, nCursor*sizeof(VdbeCursor*),
+                          &zCsr, zEnd, &nByte);
+    p->aOnceFlag = allocSpace(p->aOnceFlag, nOnce, &zCsr, zEnd, &nByte);
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+    p->anExec = allocSpace(p->anExec, p->nOp*sizeof(i64), &zCsr, zEnd, &nByte);
+#endif
+    if( nByte ){
+      p->pFree = sqlite3DbMallocZero(db, nByte);
+    }
+    zCsr = p->pFree;
+    zEnd = &zCsr[nByte];
+  }while( nByte && !db->mallocFailed );
+
+  p->nCursor = nCursor;
+  p->nOnceFlag = nOnce;
+  if( p->aVar ){
+    p->nVar = (ynVar)nVar;
+    for(n=0; n<nVar; n++){
+      p->aVar[n].flags = MEM_Null;
+      p->aVar[n].db = db;
+    }
+  }
+  if( p->azVar && pParse->nzVar>0 ){
+    p->nzVar = pParse->nzVar;
+    memcpy(p->azVar, pParse->azVar, p->nzVar*sizeof(p->azVar[0]));
+    memset(pParse->azVar, 0, pParse->nzVar*sizeof(pParse->azVar[0]));
+  }
+  if( p->aMem ){
+    p->aMem--;                      /* aMem[] goes from 1..nMem */
+    p->nMem = nMem;                 /*       not from 0..nMem-1 */
+    for(n=1; n<=nMem; n++){
+      p->aMem[n].flags = MEM_Undefined;
+      p->aMem[n].db = db;
+    }
+  }
+  p->explain = pParse->explain;
+  sqlite3VdbeRewind(p);
+}
+
+/*
+** Close a VDBE cursor and release all the resources that cursor 
+** happens to hold.
+*/
+SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){
+  if( pCx==0 ){
+    return;
+  }
+  sqlite3VdbeSorterClose(p->db, pCx);
+  if( pCx->pBt ){
+    sqlite3BtreeClose(pCx->pBt);
+    /* The pCx->pCursor will be close automatically, if it exists, by
+    ** the call above. */
+  }else if( pCx->pCursor ){
+    sqlite3BtreeCloseCursor(pCx->pCursor);
+  }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  else if( pCx->pVtabCursor ){
+    sqlite3_vtab_cursor *pVtabCursor = pCx->pVtabCursor;
+    const sqlite3_module *pModule = pVtabCursor->pVtab->pModule;
+    p->inVtabMethod = 1;
+    pModule->xClose(pVtabCursor);
+    p->inVtabMethod = 0;
+  }
+#endif
+}
+
+/*
+** Copy the values stored in the VdbeFrame structure to its Vdbe. This
+** is used, for example, when a trigger sub-program is halted to restore
+** control to the main program.
+*/
+SQLITE_PRIVATE int sqlite3VdbeFrameRestore(VdbeFrame *pFrame){
+  Vdbe *v = pFrame->v;
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  v->anExec = pFrame->anExec;
+#endif
+  v->aOnceFlag = pFrame->aOnceFlag;
+  v->nOnceFlag = pFrame->nOnceFlag;
+  v->aOp = pFrame->aOp;
+  v->nOp = pFrame->nOp;
+  v->aMem = pFrame->aMem;
+  v->nMem = pFrame->nMem;
+  v->apCsr = pFrame->apCsr;
+  v->nCursor = pFrame->nCursor;
+  v->db->lastRowid = pFrame->lastRowid;
+  v->nChange = pFrame->nChange;
+  v->db->nChange = pFrame->nDbChange;
+  return pFrame->pc;
+}
+
+/*
+** Close all cursors.
+**
+** Also release any dynamic memory held by the VM in the Vdbe.aMem memory 
+** cell array. This is necessary as the memory cell array may contain
+** pointers to VdbeFrame objects, which may in turn contain pointers to
+** open cursors.
+*/
+static void closeAllCursors(Vdbe *p){
+  if( p->pFrame ){
+    VdbeFrame *pFrame;
+    for(pFrame=p->pFrame; pFrame->pParent; pFrame=pFrame->pParent);
+    sqlite3VdbeFrameRestore(pFrame);
+    p->pFrame = 0;
+    p->nFrame = 0;
+  }
+  assert( p->nFrame==0 );
+
+  if( p->apCsr ){
+    int i;
+    for(i=0; i<p->nCursor; i++){
+      VdbeCursor *pC = p->apCsr[i];
+      if( pC ){
+        sqlite3VdbeFreeCursor(p, pC);
+        p->apCsr[i] = 0;
+      }
+    }
+  }
+  if( p->aMem ){
+    releaseMemArray(&p->aMem[1], p->nMem);
+  }
+  while( p->pDelFrame ){
+    VdbeFrame *pDel = p->pDelFrame;
+    p->pDelFrame = pDel->pParent;
+    sqlite3VdbeFrameDelete(pDel);
+  }
+
+  /* Delete any auxdata allocations made by the VM */
+  if( p->pAuxData ) sqlite3VdbeDeleteAuxData(p, -1, 0);
+  assert( p->pAuxData==0 );
+}
+
+/*
+** Clean up the VM after a single run.
+*/
+static void Cleanup(Vdbe *p){
+  sqlite3 *db = p->db;
+
+#ifdef SQLITE_DEBUG
+  /* Execute assert() statements to ensure that the Vdbe.apCsr[] and 
+  ** Vdbe.aMem[] arrays have already been cleaned up.  */
+  int i;
+  if( p->apCsr ) for(i=0; i<p->nCursor; i++) assert( p->apCsr[i]==0 );
+  if( p->aMem ){
+    for(i=1; i<=p->nMem; i++) assert( p->aMem[i].flags==MEM_Undefined );
+  }
+#endif
+
+  sqlite3DbFree(db, p->zErrMsg);
+  p->zErrMsg = 0;
+  p->pResultSet = 0;
+}
+
+/*
+** Set the number of result columns that will be returned by this SQL
+** statement. This is now set at compile time, rather than during
+** execution of the vdbe program so that sqlite3_column_count() can
+** be called on an SQL statement before sqlite3_step().
+*/
+SQLITE_PRIVATE void sqlite3VdbeSetNumCols(Vdbe *p, int nResColumn){
+  Mem *pColName;
+  int n;
+  sqlite3 *db = p->db;
+
+  releaseMemArray(p->aColName, p->nResColumn*COLNAME_N);
+  sqlite3DbFree(db, p->aColName);
+  n = nResColumn*COLNAME_N;
+  p->nResColumn = (u16)nResColumn;
+  p->aColName = pColName = (Mem*)sqlite3DbMallocZero(db, sizeof(Mem)*n );
+  if( p->aColName==0 ) return;
+  while( n-- > 0 ){
+    pColName->flags = MEM_Null;
+    pColName->db = p->db;
+    pColName++;
+  }
+}
+
+/*
+** Set the name of the idx'th column to be returned by the SQL statement.
+** zName must be a pointer to a nul terminated string.
+**
+** This call must be made after a call to sqlite3VdbeSetNumCols().
+**
+** The final parameter, xDel, must be one of SQLITE_DYNAMIC, SQLITE_STATIC
+** or SQLITE_TRANSIENT. If it is SQLITE_DYNAMIC, then the buffer pointed
+** to by zName will be freed by sqlite3DbFree() when the vdbe is destroyed.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSetColName(
+  Vdbe *p,                         /* Vdbe being configured */
+  int idx,                         /* Index of column zName applies to */
+  int var,                         /* One of the COLNAME_* constants */
+  const char *zName,               /* Pointer to buffer containing name */
+  void (*xDel)(void*)              /* Memory management strategy for zName */
+){
+  int rc;
+  Mem *pColName;
+  assert( idx<p->nResColumn );
+  assert( var<COLNAME_N );
+  if( p->db->mallocFailed ){
+    assert( !zName || xDel!=SQLITE_DYNAMIC );
+    return SQLITE_NOMEM;
+  }
+  assert( p->aColName!=0 );
+  pColName = &(p->aColName[idx+var*p->nResColumn]);
+  rc = sqlite3VdbeMemSetStr(pColName, zName, -1, SQLITE_UTF8, xDel);
+  assert( rc!=0 || !zName || (pColName->flags&MEM_Term)!=0 );
+  return rc;
+}
+
+/*
+** A read or write transaction may or may not be active on database handle
+** db. If a transaction is active, commit it. If there is a
+** write-transaction spanning more than one database file, this routine
+** takes care of the master journal trickery.
+*/
+static int vdbeCommit(sqlite3 *db, Vdbe *p){
+  int i;
+  int nTrans = 0;  /* Number of databases with an active write-transaction */
+  int rc = SQLITE_OK;
+  int needXcommit = 0;
+
+#ifdef SQLITE_OMIT_VIRTUALTABLE
+  /* With this option, sqlite3VtabSync() is defined to be simply 
+  ** SQLITE_OK so p is not used. 
+  */
+  UNUSED_PARAMETER(p);
+#endif
+
+  /* Before doing anything else, call the xSync() callback for any
+  ** virtual module tables written in this transaction. This has to
+  ** be done before determining whether a master journal file is 
+  ** required, as an xSync() callback may add an attached database
+  ** to the transaction.
+  */
+  rc = sqlite3VtabSync(db, p);
+
+  /* This loop determines (a) if the commit hook should be invoked and
+  ** (b) how many database files have open write transactions, not 
+  ** including the temp database. (b) is important because if more than 
+  ** one database file has an open write transaction, a master journal
+  ** file is required for an atomic commit.
+  */ 
+  for(i=0; rc==SQLITE_OK && i<db->nDb; i++){ 
+    Btree *pBt = db->aDb[i].pBt;
+    if( sqlite3BtreeIsInTrans(pBt) ){
+      needXcommit = 1;
+      if( i!=1 ) nTrans++;
+      sqlite3BtreeEnter(pBt);
+      rc = sqlite3PagerExclusiveLock(sqlite3BtreePager(pBt));
+      sqlite3BtreeLeave(pBt);
+    }
+  }
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  /* If there are any write-transactions at all, invoke the commit hook */
+  if( needXcommit && db->xCommitCallback ){
+    rc = db->xCommitCallback(db->pCommitArg);
+    if( rc ){
+      return SQLITE_CONSTRAINT_COMMITHOOK;
+    }
+  }
+
+  /* The simple case - no more than one database file (not counting the
+  ** TEMP database) has a transaction active.   There is no need for the
+  ** master-journal.
+  **
+  ** If the return value of sqlite3BtreeGetFilename() is a zero length
+  ** string, it means the main database is :memory: or a temp file.  In 
+  ** that case we do not support atomic multi-file commits, so use the 
+  ** simple case then too.
+  */
+  if( 0==sqlite3Strlen30(sqlite3BtreeGetFilename(db->aDb[0].pBt))
+   || nTrans<=1
+  ){
+    for(i=0; rc==SQLITE_OK && i<db->nDb; i++){
+      Btree *pBt = db->aDb[i].pBt;
+      if( pBt ){
+        rc = sqlite3BtreeCommitPhaseOne(pBt, 0);
+      }
+    }
+
+    /* Do the commit only if all databases successfully complete phase 1. 
+    ** If one of the BtreeCommitPhaseOne() calls fails, this indicates an
+    ** IO error while deleting or truncating a journal file. It is unlikely,
+    ** but could happen. In this case abandon processing and return the error.
+    */
+    for(i=0; rc==SQLITE_OK && i<db->nDb; i++){
+      Btree *pBt = db->aDb[i].pBt;
+      if( pBt ){
+        rc = sqlite3BtreeCommitPhaseTwo(pBt, 0);
+      }
+    }
+    if( rc==SQLITE_OK ){
+      sqlite3VtabCommit(db);
+    }
+  }
+
+  /* The complex case - There is a multi-file write-transaction active.
+  ** This requires a master journal file to ensure the transaction is
+  ** committed atomically.
+  */
+#ifndef SQLITE_OMIT_DISKIO
+  else{
+    sqlite3_vfs *pVfs = db->pVfs;
+    int needSync = 0;
+    char *zMaster = 0;   /* File-name for the master journal */
+    char const *zMainFile = sqlite3BtreeGetFilename(db->aDb[0].pBt);
+    sqlite3_file *pMaster = 0;
+    i64 offset = 0;
+    int res;
+    int retryCount = 0;
+    int nMainFile;
+
+    /* Select a master journal file name */
+    nMainFile = sqlite3Strlen30(zMainFile);
+    zMaster = sqlite3MPrintf(db, "%s-mjXXXXXX9XXz", zMainFile);
+    if( zMaster==0 ) return SQLITE_NOMEM;
+    do {
+      u32 iRandom;
+      if( retryCount ){
+        if( retryCount>100 ){
+          sqlite3_log(SQLITE_FULL, "MJ delete: %s", zMaster);
+          sqlite3OsDelete(pVfs, zMaster, 0);
+          break;
+        }else if( retryCount==1 ){
+          sqlite3_log(SQLITE_FULL, "MJ collide: %s", zMaster);
+        }
+      }
+      retryCount++;
+      sqlite3_randomness(sizeof(iRandom), &iRandom);
+      sqlite3_snprintf(13, &zMaster[nMainFile], "-mj%06X9%02X",
+                               (iRandom>>8)&0xffffff, iRandom&0xff);
+      /* The antipenultimate character of the master journal name must
+      ** be "9" to avoid name collisions when using 8+3 filenames. */
+      assert( zMaster[sqlite3Strlen30(zMaster)-3]=='9' );
+      sqlite3FileSuffix3(zMainFile, zMaster);
+      rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
+    }while( rc==SQLITE_OK && res );
+    if( rc==SQLITE_OK ){
+      /* Open the master journal. */
+      rc = sqlite3OsOpenMalloc(pVfs, zMaster, &pMaster, 
+          SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|
+          SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_MASTER_JOURNAL, 0
+      );
+    }
+    if( rc!=SQLITE_OK ){
+      sqlite3DbFree(db, zMaster);
+      return rc;
+    }
+ 
+    /* Write the name of each database file in the transaction into the new
+    ** master journal file. If an error occurs at this point close
+    ** and delete the master journal file. All the individual journal files
+    ** still have 'null' as the master journal pointer, so they will roll
+    ** back independently if a failure occurs.
+    */
+    for(i=0; i<db->nDb; i++){
+      Btree *pBt = db->aDb[i].pBt;
+      if( sqlite3BtreeIsInTrans(pBt) ){
+        char const *zFile = sqlite3BtreeGetJournalname(pBt);
+        if( zFile==0 ){
+          continue;  /* Ignore TEMP and :memory: databases */
+        }
+        assert( zFile[0]!=0 );
+        if( !needSync && !sqlite3BtreeSyncDisabled(pBt) ){
+          needSync = 1;
+        }
+        rc = sqlite3OsWrite(pMaster, zFile, sqlite3Strlen30(zFile)+1, offset);
+        offset += sqlite3Strlen30(zFile)+1;
+        if( rc!=SQLITE_OK ){
+          sqlite3OsCloseFree(pMaster);
+          sqlite3OsDelete(pVfs, zMaster, 0);
+          sqlite3DbFree(db, zMaster);
+          return rc;
+        }
+      }
+    }
+
+    /* Sync the master journal file. If the IOCAP_SEQUENTIAL device
+    ** flag is set this is not required.
+    */
+    if( needSync 
+     && 0==(sqlite3OsDeviceCharacteristics(pMaster)&SQLITE_IOCAP_SEQUENTIAL)
+     && SQLITE_OK!=(rc = sqlite3OsSync(pMaster, SQLITE_SYNC_NORMAL))
+    ){
+      sqlite3OsCloseFree(pMaster);
+      sqlite3OsDelete(pVfs, zMaster, 0);
+      sqlite3DbFree(db, zMaster);
+      return rc;
+    }
+
+    /* Sync all the db files involved in the transaction. The same call
+    ** sets the master journal pointer in each individual journal. If
+    ** an error occurs here, do not delete the master journal file.
+    **
+    ** If the error occurs during the first call to
+    ** sqlite3BtreeCommitPhaseOne(), then there is a chance that the
+    ** master journal file will be orphaned. But we cannot delete it,
+    ** in case the master journal file name was written into the journal
+    ** file before the failure occurred.
+    */
+    for(i=0; rc==SQLITE_OK && i<db->nDb; i++){ 
+      Btree *pBt = db->aDb[i].pBt;
+      if( pBt ){
+        rc = sqlite3BtreeCommitPhaseOne(pBt, zMaster);
+      }
+    }
+    sqlite3OsCloseFree(pMaster);
+    assert( rc!=SQLITE_BUSY );
+    if( rc!=SQLITE_OK ){
+      sqlite3DbFree(db, zMaster);
+      return rc;
+    }
+
+    /* Delete the master journal file. This commits the transaction. After
+    ** doing this the directory is synced again before any individual
+    ** transaction files are deleted.
+    */
+    rc = sqlite3OsDelete(pVfs, zMaster, 1);
+    sqlite3DbFree(db, zMaster);
+    zMaster = 0;
+    if( rc ){
+      return rc;
+    }
+
+    /* All files and directories have already been synced, so the following
+    ** calls to sqlite3BtreeCommitPhaseTwo() are only closing files and
+    ** deleting or truncating journals. If something goes wrong while
+    ** this is happening we don't really care. The integrity of the
+    ** transaction is already guaranteed, but some stray 'cold' journals
+    ** may be lying around. Returning an error code won't help matters.
+    */
+    disable_simulated_io_errors();
+    sqlite3BeginBenignMalloc();
+    for(i=0; i<db->nDb; i++){ 
+      Btree *pBt = db->aDb[i].pBt;
+      if( pBt ){
+        sqlite3BtreeCommitPhaseTwo(pBt, 1);
+      }
+    }
+    sqlite3EndBenignMalloc();
+    enable_simulated_io_errors();
+
+    sqlite3VtabCommit(db);
+  }
+#endif
+
+  return rc;
+}
+
+/* 
+** This routine checks that the sqlite3.nVdbeActive count variable
+** matches the number of vdbe's in the list sqlite3.pVdbe that are
+** currently active. An assertion fails if the two counts do not match.
+** This is an internal self-check only - it is not an essential processing
+** step.
+**
+** This is a no-op if NDEBUG is defined.
+*/
+#ifndef NDEBUG
+static void checkActiveVdbeCnt(sqlite3 *db){
+  Vdbe *p;
+  int cnt = 0;
+  int nWrite = 0;
+  int nRead = 0;
+  p = db->pVdbe;
+  while( p ){
+    if( sqlite3_stmt_busy((sqlite3_stmt*)p) ){
+      cnt++;
+      if( p->readOnly==0 ) nWrite++;
+      if( p->bIsReader ) nRead++;
+    }
+    p = p->pNext;
+  }
+  assert( cnt==db->nVdbeActive );
+  assert( nWrite==db->nVdbeWrite );
+  assert( nRead==db->nVdbeRead );
+}
+#else
+#define checkActiveVdbeCnt(x)
+#endif
+
+/*
+** If the Vdbe passed as the first argument opened a statement-transaction,
+** close it now. Argument eOp must be either SAVEPOINT_ROLLBACK or
+** SAVEPOINT_RELEASE. If it is SAVEPOINT_ROLLBACK, then the statement
+** transaction is rolled back. If eOp is SAVEPOINT_RELEASE, then the 
+** statement transaction is committed.
+**
+** If an IO error occurs, an SQLITE_IOERR_XXX error code is returned. 
+** Otherwise SQLITE_OK.
+*/
+SQLITE_PRIVATE int sqlite3VdbeCloseStatement(Vdbe *p, int eOp){
+  sqlite3 *const db = p->db;
+  int rc = SQLITE_OK;
+
+  /* If p->iStatement is greater than zero, then this Vdbe opened a 
+  ** statement transaction that should be closed here. The only exception
+  ** is that an IO error may have occurred, causing an emergency rollback.
+  ** In this case (db->nStatement==0), and there is nothing to do.
+  */
+  if( db->nStatement && p->iStatement ){
+    int i;
+    const int iSavepoint = p->iStatement-1;
+
+    assert( eOp==SAVEPOINT_ROLLBACK || eOp==SAVEPOINT_RELEASE);
+    assert( db->nStatement>0 );
+    assert( p->iStatement==(db->nStatement+db->nSavepoint) );
+
+    for(i=0; i<db->nDb; i++){ 
+      int rc2 = SQLITE_OK;
+      Btree *pBt = db->aDb[i].pBt;
+      if( pBt ){
+        if( eOp==SAVEPOINT_ROLLBACK ){
+          rc2 = sqlite3BtreeSavepoint(pBt, SAVEPOINT_ROLLBACK, iSavepoint);
+        }
+        if( rc2==SQLITE_OK ){
+          rc2 = sqlite3BtreeSavepoint(pBt, SAVEPOINT_RELEASE, iSavepoint);
+        }
+        if( rc==SQLITE_OK ){
+          rc = rc2;
+        }
+      }
+    }
+    db->nStatement--;
+    p->iStatement = 0;
+
+    if( rc==SQLITE_OK ){
+      if( eOp==SAVEPOINT_ROLLBACK ){
+        rc = sqlite3VtabSavepoint(db, SAVEPOINT_ROLLBACK, iSavepoint);
+      }
+      if( rc==SQLITE_OK ){
+        rc = sqlite3VtabSavepoint(db, SAVEPOINT_RELEASE, iSavepoint);
+      }
+    }
+
+    /* If the statement transaction is being rolled back, also restore the 
+    ** database handles deferred constraint counter to the value it had when 
+    ** the statement transaction was opened.  */
+    if( eOp==SAVEPOINT_ROLLBACK ){
+      db->nDeferredCons = p->nStmtDefCons;
+      db->nDeferredImmCons = p->nStmtDefImmCons;
+    }
+  }
+  return rc;
+}
+
+/*
+** This function is called when a transaction opened by the database 
+** handle associated with the VM passed as an argument is about to be 
+** committed. If there are outstanding deferred foreign key constraint
+** violations, return SQLITE_ERROR. Otherwise, SQLITE_OK.
+**
+** If there are outstanding FK violations and this function returns 
+** SQLITE_ERROR, set the result of the VM to SQLITE_CONSTRAINT_FOREIGNKEY
+** and write an error message to it. Then return SQLITE_ERROR.
+*/
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+SQLITE_PRIVATE int sqlite3VdbeCheckFk(Vdbe *p, int deferred){
+  sqlite3 *db = p->db;
+  if( (deferred && (db->nDeferredCons+db->nDeferredImmCons)>0) 
+   || (!deferred && p->nFkConstraint>0) 
+  ){
+    p->rc = SQLITE_CONSTRAINT_FOREIGNKEY;
+    p->errorAction = OE_Abort;
+    sqlite3SetString(&p->zErrMsg, db, "FOREIGN KEY constraint failed");
+    return SQLITE_ERROR;
+  }
+  return SQLITE_OK;
+}
+#endif
+
+/*
+** This routine is called the when a VDBE tries to halt.  If the VDBE
+** has made changes and is in autocommit mode, then commit those
+** changes.  If a rollback is needed, then do the rollback.
+**
+** This routine is the only way to move the state of a VM from
+** SQLITE_MAGIC_RUN to SQLITE_MAGIC_HALT.  It is harmless to
+** call this on a VM that is in the SQLITE_MAGIC_HALT state.
+**
+** Return an error code.  If the commit could not complete because of
+** lock contention, return SQLITE_BUSY.  If SQLITE_BUSY is returned, it
+** means the close did not happen and needs to be repeated.
+*/
+SQLITE_PRIVATE int sqlite3VdbeHalt(Vdbe *p){
+  int rc;                         /* Used to store transient return codes */
+  sqlite3 *db = p->db;
+
+  /* This function contains the logic that determines if a statement or
+  ** transaction will be committed or rolled back as a result of the
+  ** execution of this virtual machine. 
+  **
+  ** If any of the following errors occur:
+  **
+  **     SQLITE_NOMEM
+  **     SQLITE_IOERR
+  **     SQLITE_FULL
+  **     SQLITE_INTERRUPT
+  **
+  ** Then the internal cache might have been left in an inconsistent
+  ** state.  We need to rollback the statement transaction, if there is
+  ** one, or the complete transaction if there is no statement transaction.
+  */
+
+  if( p->db->mallocFailed ){
+    p->rc = SQLITE_NOMEM;
+  }
+  if( p->aOnceFlag ) memset(p->aOnceFlag, 0, p->nOnceFlag);
+  closeAllCursors(p);
+  if( p->magic!=VDBE_MAGIC_RUN ){
+    return SQLITE_OK;
+  }
+  checkActiveVdbeCnt(db);
+
+  /* No commit or rollback needed if the program never started or if the
+  ** SQL statement does not read or write a database file.  */
+  if( p->pc>=0 && p->bIsReader ){
+    int mrc;   /* Primary error code from p->rc */
+    int eStatementOp = 0;
+    int isSpecialError;            /* Set to true if a 'special' error */
+
+    /* Lock all btrees used by the statement */
+    sqlite3VdbeEnter(p);
+
+    /* Check for one of the special errors */
+    mrc = p->rc & 0xff;
+    isSpecialError = mrc==SQLITE_NOMEM || mrc==SQLITE_IOERR
+                     || mrc==SQLITE_INTERRUPT || mrc==SQLITE_FULL;
+    if( isSpecialError ){
+      /* If the query was read-only and the error code is SQLITE_INTERRUPT, 
+      ** no rollback is necessary. Otherwise, at least a savepoint 
+      ** transaction must be rolled back to restore the database to a 
+      ** consistent state.
+      **
+      ** Even if the statement is read-only, it is important to perform
+      ** a statement or transaction rollback operation. If the error 
+      ** occurred while writing to the journal, sub-journal or database
+      ** file as part of an effort to free up cache space (see function
+      ** pagerStress() in pager.c), the rollback is required to restore 
+      ** the pager to a consistent state.
+      */
+      if( !p->readOnly || mrc!=SQLITE_INTERRUPT ){
+        if( (mrc==SQLITE_NOMEM || mrc==SQLITE_FULL) && p->usesStmtJournal ){
+          eStatementOp = SAVEPOINT_ROLLBACK;
+        }else{
+          /* We are forced to roll back the active transaction. Before doing
+          ** so, abort any other statements this handle currently has active.
+          */
+          sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK);
+          sqlite3CloseSavepoints(db);
+          db->autoCommit = 1;
+          p->nChange = 0;
+        }
+      }
+    }
+
+    /* Check for immediate foreign key violations. */
+    if( p->rc==SQLITE_OK ){
+      sqlite3VdbeCheckFk(p, 0);
+    }
+  
+    /* If the auto-commit flag is set and this is the only active writer 
+    ** VM, then we do either a commit or rollback of the current transaction. 
+    **
+    ** Note: This block also runs if one of the special errors handled 
+    ** above has occurred. 
+    */
+    if( !sqlite3VtabInSync(db) 
+     && db->autoCommit 
+     && db->nVdbeWrite==(p->readOnly==0) 
+    ){
+      if( p->rc==SQLITE_OK || (p->errorAction==OE_Fail && !isSpecialError) ){
+        rc = sqlite3VdbeCheckFk(p, 1);
+        if( rc!=SQLITE_OK ){
+          if( NEVER(p->readOnly) ){
+            sqlite3VdbeLeave(p);
+            return SQLITE_ERROR;
+          }
+          rc = SQLITE_CONSTRAINT_FOREIGNKEY;
+        }else{ 
+          /* The auto-commit flag is true, the vdbe program was successful 
+          ** or hit an 'OR FAIL' constraint and there are no deferred foreign
+          ** key constraints to hold up the transaction. This means a commit 
+          ** is required. */
+          rc = vdbeCommit(db, p);
+        }
+        if( rc==SQLITE_BUSY && p->readOnly ){
+          sqlite3VdbeLeave(p);
+          return SQLITE_BUSY;
+        }else if( rc!=SQLITE_OK ){
+          p->rc = rc;
+          sqlite3RollbackAll(db, SQLITE_OK);
+          p->nChange = 0;
+        }else{
+          db->nDeferredCons = 0;
+          db->nDeferredImmCons = 0;
+          db->flags &= ~SQLITE_DeferFKs;
+          sqlite3CommitInternalChanges(db);
+        }
+      }else{
+        sqlite3RollbackAll(db, SQLITE_OK);
+        p->nChange = 0;
+      }
+      db->nStatement = 0;
+    }else if( eStatementOp==0 ){
+      if( p->rc==SQLITE_OK || p->errorAction==OE_Fail ){
+        eStatementOp = SAVEPOINT_RELEASE;
+      }else if( p->errorAction==OE_Abort ){
+        eStatementOp = SAVEPOINT_ROLLBACK;
+      }else{
+        sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK);
+        sqlite3CloseSavepoints(db);
+        db->autoCommit = 1;
+        p->nChange = 0;
+      }
+    }
+  
+    /* If eStatementOp is non-zero, then a statement transaction needs to
+    ** be committed or rolled back. Call sqlite3VdbeCloseStatement() to
+    ** do so. If this operation returns an error, and the current statement
+    ** error code is SQLITE_OK or SQLITE_CONSTRAINT, then promote the
+    ** current statement error code.
+    */
+    if( eStatementOp ){
+      rc = sqlite3VdbeCloseStatement(p, eStatementOp);
+      if( rc ){
+        if( p->rc==SQLITE_OK || (p->rc&0xff)==SQLITE_CONSTRAINT ){
+          p->rc = rc;
+          sqlite3DbFree(db, p->zErrMsg);
+          p->zErrMsg = 0;
+        }
+        sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK);
+        sqlite3CloseSavepoints(db);
+        db->autoCommit = 1;
+        p->nChange = 0;
+      }
+    }
+  
+    /* If this was an INSERT, UPDATE or DELETE and no statement transaction
+    ** has been rolled back, update the database connection change-counter. 
+    */
+    if( p->changeCntOn ){
+      if( eStatementOp!=SAVEPOINT_ROLLBACK ){
+        sqlite3VdbeSetChanges(db, p->nChange);
+      }else{
+        sqlite3VdbeSetChanges(db, 0);
+      }
+      p->nChange = 0;
+    }
+
+    /* Release the locks */
+    sqlite3VdbeLeave(p);
+  }
+
+  /* We have successfully halted and closed the VM.  Record this fact. */
+  if( p->pc>=0 ){
+    db->nVdbeActive--;
+    if( !p->readOnly ) db->nVdbeWrite--;
+    if( p->bIsReader ) db->nVdbeRead--;
+    assert( db->nVdbeActive>=db->nVdbeRead );
+    assert( db->nVdbeRead>=db->nVdbeWrite );
+    assert( db->nVdbeWrite>=0 );
+  }
+  p->magic = VDBE_MAGIC_HALT;
+  checkActiveVdbeCnt(db);
+  if( p->db->mallocFailed ){
+    p->rc = SQLITE_NOMEM;
+  }
+
+  /* If the auto-commit flag is set to true, then any locks that were held
+  ** by connection db have now been released. Call sqlite3ConnectionUnlocked() 
+  ** to invoke any required unlock-notify callbacks.
+  */
+  if( db->autoCommit ){
+    sqlite3ConnectionUnlocked(db);
+  }
+
+  assert( db->nVdbeActive>0 || db->autoCommit==0 || db->nStatement==0 );
+  return (p->rc==SQLITE_BUSY ? SQLITE_BUSY : SQLITE_OK);
+}
+
+
+/*
+** Each VDBE holds the result of the most recent sqlite3_step() call
+** in p->rc.  This routine sets that result back to SQLITE_OK.
+*/
+SQLITE_PRIVATE void sqlite3VdbeResetStepResult(Vdbe *p){
+  p->rc = SQLITE_OK;
+}
+
+/*
+** Copy the error code and error message belonging to the VDBE passed
+** as the first argument to its database handle (so that they will be 
+** returned by calls to sqlite3_errcode() and sqlite3_errmsg()).
+**
+** This function does not clear the VDBE error code or message, just
+** copies them to the database handle.
+*/
+SQLITE_PRIVATE int sqlite3VdbeTransferError(Vdbe *p){
+  sqlite3 *db = p->db;
+  int rc = p->rc;
+  if( p->zErrMsg ){
+    u8 mallocFailed = db->mallocFailed;
+    sqlite3BeginBenignMalloc();
+    if( db->pErr==0 ) db->pErr = sqlite3ValueNew(db);
+    sqlite3ValueSetStr(db->pErr, -1, p->zErrMsg, SQLITE_UTF8, SQLITE_TRANSIENT);
+    sqlite3EndBenignMalloc();
+    db->mallocFailed = mallocFailed;
+    db->errCode = rc;
+  }else{
+    sqlite3Error(db, rc);
+  }
+  return rc;
+}
+
+#ifdef SQLITE_ENABLE_SQLLOG
+/*
+** If an SQLITE_CONFIG_SQLLOG hook is registered and the VM has been run, 
+** invoke it.
+*/
+static void vdbeInvokeSqllog(Vdbe *v){
+  if( sqlite3GlobalConfig.xSqllog && v->rc==SQLITE_OK && v->zSql && v->pc>=0 ){
+    char *zExpanded = sqlite3VdbeExpandSql(v, v->zSql);
+    assert( v->db->init.busy==0 );
+    if( zExpanded ){
+      sqlite3GlobalConfig.xSqllog(
+          sqlite3GlobalConfig.pSqllogArg, v->db, zExpanded, 1
+      );
+      sqlite3DbFree(v->db, zExpanded);
+    }
+  }
+}
+#else
+# define vdbeInvokeSqllog(x)
+#endif
+
+/*
+** Clean up a VDBE after execution but do not delete the VDBE just yet.
+** Write any error messages into *pzErrMsg.  Return the result code.
+**
+** After this routine is run, the VDBE should be ready to be executed
+** again.
+**
+** To look at it another way, this routine resets the state of the
+** virtual machine from VDBE_MAGIC_RUN or VDBE_MAGIC_HALT back to
+** VDBE_MAGIC_INIT.
+*/
+SQLITE_PRIVATE int sqlite3VdbeReset(Vdbe *p){
+  sqlite3 *db;
+  db = p->db;
+
+  /* If the VM did not run to completion or if it encountered an
+  ** error, then it might not have been halted properly.  So halt
+  ** it now.
+  */
+  sqlite3VdbeHalt(p);
+
+  /* If the VDBE has be run even partially, then transfer the error code
+  ** and error message from the VDBE into the main database structure.  But
+  ** if the VDBE has just been set to run but has not actually executed any
+  ** instructions yet, leave the main database error information unchanged.
+  */
+  if( p->pc>=0 ){
+    vdbeInvokeSqllog(p);
+    sqlite3VdbeTransferError(p);
+    sqlite3DbFree(db, p->zErrMsg);
+    p->zErrMsg = 0;
+    if( p->runOnlyOnce ) p->expired = 1;
+  }else if( p->rc && p->expired ){
+    /* The expired flag was set on the VDBE before the first call
+    ** to sqlite3_step(). For consistency (since sqlite3_step() was
+    ** called), set the database error in this case as well.
+    */
+    sqlite3ErrorWithMsg(db, p->rc, p->zErrMsg ? "%s" : 0, p->zErrMsg);
+    sqlite3DbFree(db, p->zErrMsg);
+    p->zErrMsg = 0;
+  }
+
+  /* Reclaim all memory used by the VDBE
+  */
+  Cleanup(p);
+
+  /* Save profiling information from this VDBE run.
+  */
+#ifdef VDBE_PROFILE
+  {
+    FILE *out = fopen("vdbe_profile.out", "a");
+    if( out ){
+      int i;
+      fprintf(out, "---- ");
+      for(i=0; i<p->nOp; i++){
+        fprintf(out, "%02x", p->aOp[i].opcode);
+      }
+      fprintf(out, "\n");
+      if( p->zSql ){
+        char c, pc = 0;
+        fprintf(out, "-- ");
+        for(i=0; (c = p->zSql[i])!=0; i++){
+          if( pc=='\n' ) fprintf(out, "-- ");
+          putc(c, out);
+          pc = c;
+        }
+        if( pc!='\n' ) fprintf(out, "\n");
+      }
+      for(i=0; i<p->nOp; i++){
+        char zHdr[100];
+        sqlite3_snprintf(sizeof(zHdr), zHdr, "%6u %12llu %8llu ",
+           p->aOp[i].cnt,
+           p->aOp[i].cycles,
+           p->aOp[i].cnt>0 ? p->aOp[i].cycles/p->aOp[i].cnt : 0
+        );
+        fprintf(out, "%s", zHdr);
+        sqlite3VdbePrintOp(out, i, &p->aOp[i]);
+      }
+      fclose(out);
+    }
+  }
+#endif
+  p->iCurrentTime = 0;
+  p->magic = VDBE_MAGIC_INIT;
+  return p->rc & db->errMask;
+}
+ 
+/*
+** Clean up and delete a VDBE after execution.  Return an integer which is
+** the result code.  Write any error message text into *pzErrMsg.
+*/
+SQLITE_PRIVATE int sqlite3VdbeFinalize(Vdbe *p){
+  int rc = SQLITE_OK;
+  if( p->magic==VDBE_MAGIC_RUN || p->magic==VDBE_MAGIC_HALT ){
+    rc = sqlite3VdbeReset(p);
+    assert( (rc & p->db->errMask)==rc );
+  }
+  sqlite3VdbeDelete(p);
+  return rc;
+}
+
+/*
+** If parameter iOp is less than zero, then invoke the destructor for
+** all auxiliary data pointers currently cached by the VM passed as
+** the first argument.
+**
+** Or, if iOp is greater than or equal to zero, then the destructor is
+** only invoked for those auxiliary data pointers created by the user 
+** function invoked by the OP_Function opcode at instruction iOp of 
+** VM pVdbe, and only then if:
+**
+**    * the associated function parameter is the 32nd or later (counting
+**      from left to right), or
+**
+**    * the corresponding bit in argument mask is clear (where the first
+**      function parameter corresponds to bit 0 etc.).
+*/
+SQLITE_PRIVATE void sqlite3VdbeDeleteAuxData(Vdbe *pVdbe, int iOp, int mask){
+  AuxData **pp = &pVdbe->pAuxData;
+  while( *pp ){
+    AuxData *pAux = *pp;
+    if( (iOp<0)
+     || (pAux->iOp==iOp && (pAux->iArg>31 || !(mask & MASKBIT32(pAux->iArg))))
+    ){
+      testcase( pAux->iArg==31 );
+      if( pAux->xDelete ){
+        pAux->xDelete(pAux->pAux);
+      }
+      *pp = pAux->pNext;
+      sqlite3DbFree(pVdbe->db, pAux);
+    }else{
+      pp= &pAux->pNext;
+    }
+  }
+}
+
+/*
+** Free all memory associated with the Vdbe passed as the second argument,
+** except for object itself, which is preserved.
+**
+** The difference between this function and sqlite3VdbeDelete() is that
+** VdbeDelete() also unlinks the Vdbe from the list of VMs associated with
+** the database connection and frees the object itself.
+*/
+SQLITE_PRIVATE void sqlite3VdbeClearObject(sqlite3 *db, Vdbe *p){
+  SubProgram *pSub, *pNext;
+  int i;
+  assert( p->db==0 || p->db==db );
+  releaseMemArray(p->aVar, p->nVar);
+  releaseMemArray(p->aColName, p->nResColumn*COLNAME_N);
+  for(pSub=p->pProgram; pSub; pSub=pNext){
+    pNext = pSub->pNext;
+    vdbeFreeOpArray(db, pSub->aOp, pSub->nOp);
+    sqlite3DbFree(db, pSub);
+  }
+  for(i=p->nzVar-1; i>=0; i--) sqlite3DbFree(db, p->azVar[i]);
+  vdbeFreeOpArray(db, p->aOp, p->nOp);
+  sqlite3DbFree(db, p->aColName);
+  sqlite3DbFree(db, p->zSql);
+  sqlite3DbFree(db, p->pFree);
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  for(i=0; i<p->nScan; i++){
+    sqlite3DbFree(db, p->aScan[i].zName);
+  }
+  sqlite3DbFree(db, p->aScan);
+#endif
+}
+
+/*
+** Delete an entire VDBE.
+*/
+SQLITE_PRIVATE void sqlite3VdbeDelete(Vdbe *p){
+  sqlite3 *db;
+
+  if( NEVER(p==0) ) return;
+  db = p->db;
+  assert( sqlite3_mutex_held(db->mutex) );
+  sqlite3VdbeClearObject(db, p);
+  if( p->pPrev ){
+    p->pPrev->pNext = p->pNext;
+  }else{
+    assert( db->pVdbe==p );
+    db->pVdbe = p->pNext;
+  }
+  if( p->pNext ){
+    p->pNext->pPrev = p->pPrev;
+  }
+  p->magic = VDBE_MAGIC_DEAD;
+  p->db = 0;
+  sqlite3DbFree(db, p);
+}
+
+/*
+** The cursor "p" has a pending seek operation that has not yet been
+** carried out.  Seek the cursor now.  If an error occurs, return
+** the appropriate error code.
+*/
+static int SQLITE_NOINLINE handleDeferredMoveto(VdbeCursor *p){
+  int res, rc;
+#ifdef SQLITE_TEST
+  extern int sqlite3_search_count;
+#endif
+  assert( p->deferredMoveto );
+  assert( p->isTable );
+  rc = sqlite3BtreeMovetoUnpacked(p->pCursor, 0, p->movetoTarget, 0, &res);
+  if( rc ) return rc;
+  if( res!=0 ) return SQLITE_CORRUPT_BKPT;
+#ifdef SQLITE_TEST
+  sqlite3_search_count++;
+#endif
+  p->deferredMoveto = 0;
+  p->cacheStatus = CACHE_STALE;
+  return SQLITE_OK;
+}
+
+/*
+** Something has moved cursor "p" out of place.  Maybe the row it was
+** pointed to was deleted out from under it.  Or maybe the btree was
+** rebalanced.  Whatever the cause, try to restore "p" to the place it
+** is supposed to be pointing.  If the row was deleted out from under the
+** cursor, set the cursor to point to a NULL row.
+*/
+static int SQLITE_NOINLINE handleMovedCursor(VdbeCursor *p){
+  int isDifferentRow, rc;
+  assert( p->pCursor!=0 );
+  assert( sqlite3BtreeCursorHasMoved(p->pCursor) );
+  rc = sqlite3BtreeCursorRestore(p->pCursor, &isDifferentRow);
+  p->cacheStatus = CACHE_STALE;
+  if( isDifferentRow ) p->nullRow = 1;
+  return rc;
+}
+
+/*
+** Check to ensure that the cursor is valid.  Restore the cursor
+** if need be.  Return any I/O error from the restore operation.
+*/
+SQLITE_PRIVATE int sqlite3VdbeCursorRestore(VdbeCursor *p){
+  if( sqlite3BtreeCursorHasMoved(p->pCursor) ){
+    return handleMovedCursor(p);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Make sure the cursor p is ready to read or write the row to which it
+** was last positioned.  Return an error code if an OOM fault or I/O error
+** prevents us from positioning the cursor to its correct position.
+**
+** If a MoveTo operation is pending on the given cursor, then do that
+** MoveTo now.  If no move is pending, check to see if the row has been
+** deleted out from under the cursor and if it has, mark the row as
+** a NULL row.
+**
+** If the cursor is already pointing to the correct row and that row has
+** not been deleted out from under the cursor, then this routine is a no-op.
+*/
+SQLITE_PRIVATE int sqlite3VdbeCursorMoveto(VdbeCursor *p){
+  if( p->deferredMoveto ){
+    return handleDeferredMoveto(p);
+  }
+  if( p->pCursor && sqlite3BtreeCursorHasMoved(p->pCursor) ){
+    return handleMovedCursor(p);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** The following functions:
+**
+** sqlite3VdbeSerialType()
+** sqlite3VdbeSerialTypeLen()
+** sqlite3VdbeSerialLen()
+** sqlite3VdbeSerialPut()
+** sqlite3VdbeSerialGet()
+**
+** encapsulate the code that serializes values for storage in SQLite
+** data and index records. Each serialized value consists of a
+** 'serial-type' and a blob of data. The serial type is an 8-byte unsigned
+** integer, stored as a varint.
+**
+** In an SQLite index record, the serial type is stored directly before
+** the blob of data that it corresponds to. In a table record, all serial
+** types are stored at the start of the record, and the blobs of data at
+** the end. Hence these functions allow the caller to handle the
+** serial-type and data blob separately.
+**
+** The following table describes the various storage classes for data:
+**
+**   serial type        bytes of data      type
+**   --------------     ---------------    ---------------
+**      0                     0            NULL
+**      1                     1            signed integer
+**      2                     2            signed integer
+**      3                     3            signed integer
+**      4                     4            signed integer
+**      5                     6            signed integer
+**      6                     8            signed integer
+**      7                     8            IEEE float
+**      8                     0            Integer constant 0
+**      9                     0            Integer constant 1
+**     10,11                               reserved for expansion
+**    N>=12 and even       (N-12)/2        BLOB
+**    N>=13 and odd        (N-13)/2        text
+**
+** The 8 and 9 types were added in 3.3.0, file format 4.  Prior versions
+** of SQLite will not understand those serial types.
+*/
+
+/*
+** Return the serial-type for the value stored in pMem.
+*/
+SQLITE_PRIVATE u32 sqlite3VdbeSerialType(Mem *pMem, int file_format){
+  int flags = pMem->flags;
+  u32 n;
+
+  if( flags&MEM_Null ){
+    return 0;
+  }
+  if( flags&MEM_Int ){
+    /* Figure out whether to use 1, 2, 4, 6 or 8 bytes. */
+#   define MAX_6BYTE ((((i64)0x00008000)<<32)-1)
+    i64 i = pMem->u.i;
+    u64 u;
+    if( i<0 ){
+      u = ~i;
+    }else{
+      u = i;
+    }
+    if( u<=127 ){
+      return ((i&1)==i && file_format>=4) ? 8+(u32)u : 1;
+    }
+    if( u<=32767 ) return 2;
+    if( u<=8388607 ) return 3;
+    if( u<=2147483647 ) return 4;
+    if( u<=MAX_6BYTE ) return 5;
+    return 6;
+  }
+  if( flags&MEM_Real ){
+    return 7;
+  }
+  assert( pMem->db->mallocFailed || flags&(MEM_Str|MEM_Blob) );
+  assert( pMem->n>=0 );
+  n = (u32)pMem->n;
+  if( flags & MEM_Zero ){
+    n += pMem->u.nZero;
+  }
+  return ((n*2) + 12 + ((flags&MEM_Str)!=0));
+}
+
+/*
+** Return the length of the data corresponding to the supplied serial-type.
+*/
+SQLITE_PRIVATE u32 sqlite3VdbeSerialTypeLen(u32 serial_type){
+  if( serial_type>=12 ){
+    return (serial_type-12)/2;
+  }else{
+    static const u8 aSize[] = { 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 };
+    return aSize[serial_type];
+  }
+}
+
+/*
+** If we are on an architecture with mixed-endian floating 
+** points (ex: ARM7) then swap the lower 4 bytes with the 
+** upper 4 bytes.  Return the result.
+**
+** For most architectures, this is a no-op.
+**
+** (later):  It is reported to me that the mixed-endian problem
+** on ARM7 is an issue with GCC, not with the ARM7 chip.  It seems
+** that early versions of GCC stored the two words of a 64-bit
+** float in the wrong order.  And that error has been propagated
+** ever since.  The blame is not necessarily with GCC, though.
+** GCC might have just copying the problem from a prior compiler.
+** I am also told that newer versions of GCC that follow a different
+** ABI get the byte order right.
+**
+** Developers using SQLite on an ARM7 should compile and run their
+** application using -DSQLITE_DEBUG=1 at least once.  With DEBUG
+** enabled, some asserts below will ensure that the byte order of
+** floating point values is correct.
+**
+** (2007-08-30)  Frank van Vugt has studied this problem closely
+** and has send his findings to the SQLite developers.  Frank
+** writes that some Linux kernels offer floating point hardware
+** emulation that uses only 32-bit mantissas instead of a full 
+** 48-bits as required by the IEEE standard.  (This is the
+** CONFIG_FPE_FASTFPE option.)  On such systems, floating point
+** byte swapping becomes very complicated.  To avoid problems,
+** the necessary byte swapping is carried out using a 64-bit integer
+** rather than a 64-bit float.  Frank assures us that the code here
+** works for him.  We, the developers, have no way to independently
+** verify this, but Frank seems to know what he is talking about
+** so we trust him.
+*/
+#ifdef SQLITE_MIXED_ENDIAN_64BIT_FLOAT
+static u64 floatSwap(u64 in){
+  union {
+    u64 r;
+    u32 i[2];
+  } u;
+  u32 t;
+
+  u.r = in;
+  t = u.i[0];
+  u.i[0] = u.i[1];
+  u.i[1] = t;
+  return u.r;
+}
+# define swapMixedEndianFloat(X)  X = floatSwap(X)
+#else
+# define swapMixedEndianFloat(X)
+#endif
+
+/*
+** Write the serialized data blob for the value stored in pMem into 
+** buf. It is assumed that the caller has allocated sufficient space.
+** Return the number of bytes written.
+**
+** nBuf is the amount of space left in buf[].  The caller is responsible
+** for allocating enough space to buf[] to hold the entire field, exclusive
+** of the pMem->u.nZero bytes for a MEM_Zero value.
+**
+** Return the number of bytes actually written into buf[].  The number
+** of bytes in the zero-filled tail is included in the return value only
+** if those bytes were zeroed in buf[].
+*/ 
+SQLITE_PRIVATE u32 sqlite3VdbeSerialPut(u8 *buf, Mem *pMem, u32 serial_type){
+  u32 len;
+
+  /* Integer and Real */
+  if( serial_type<=7 && serial_type>0 ){
+    u64 v;
+    u32 i;
+    if( serial_type==7 ){
+      assert( sizeof(v)==sizeof(pMem->u.r) );
+      memcpy(&v, &pMem->u.r, sizeof(v));
+      swapMixedEndianFloat(v);
+    }else{
+      v = pMem->u.i;
+    }
+    len = i = sqlite3VdbeSerialTypeLen(serial_type);
+    assert( i>0 );
+    do{
+      buf[--i] = (u8)(v&0xFF);
+      v >>= 8;
+    }while( i );
+    return len;
+  }
+
+  /* String or blob */
+  if( serial_type>=12 ){
+    assert( pMem->n + ((pMem->flags & MEM_Zero)?pMem->u.nZero:0)
+             == (int)sqlite3VdbeSerialTypeLen(serial_type) );
+    len = pMem->n;
+    memcpy(buf, pMem->z, len);
+    return len;
+  }
+
+  /* NULL or constants 0 or 1 */
+  return 0;
+}
+
+/* Input "x" is a sequence of unsigned characters that represent a
+** big-endian integer.  Return the equivalent native integer
+*/
+#define ONE_BYTE_INT(x)    ((i8)(x)[0])
+#define TWO_BYTE_INT(x)    (256*(i8)((x)[0])|(x)[1])
+#define THREE_BYTE_INT(x)  (65536*(i8)((x)[0])|((x)[1]<<8)|(x)[2])
+#define FOUR_BYTE_UINT(x)  (((u32)(x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
+#define FOUR_BYTE_INT(x) (16777216*(i8)((x)[0])|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
+
+/*
+** Deserialize the data blob pointed to by buf as serial type serial_type
+** and store the result in pMem.  Return the number of bytes read.
+**
+** This function is implemented as two separate routines for performance.
+** The few cases that require local variables are broken out into a separate
+** routine so that in most cases the overhead of moving the stack pointer
+** is avoided.
+*/ 
+static u32 SQLITE_NOINLINE serialGet(
+  const unsigned char *buf,     /* Buffer to deserialize from */
+  u32 serial_type,              /* Serial type to deserialize */
+  Mem *pMem                     /* Memory cell to write value into */
+){
+  u64 x = FOUR_BYTE_UINT(buf);
+  u32 y = FOUR_BYTE_UINT(buf+4);
+  x = (x<<32) + y;
+  if( serial_type==6 ){
+    /* EVIDENCE-OF: R-29851-52272 Value is a big-endian 64-bit
+    ** twos-complement integer. */
+    pMem->u.i = *(i64*)&x;
+    pMem->flags = MEM_Int;
+    testcase( pMem->u.i<0 );
+  }else{
+    /* EVIDENCE-OF: R-57343-49114 Value is a big-endian IEEE 754-2008 64-bit
+    ** floating point number. */
+#if !defined(NDEBUG) && !defined(SQLITE_OMIT_FLOATING_POINT)
+    /* Verify that integers and floating point values use the same
+    ** byte order.  Or, that if SQLITE_MIXED_ENDIAN_64BIT_FLOAT is
+    ** defined that 64-bit floating point values really are mixed
+    ** endian.
+    */
+    static const u64 t1 = ((u64)0x3ff00000)<<32;
+    static const double r1 = 1.0;
+    u64 t2 = t1;
+    swapMixedEndianFloat(t2);
+    assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 );
+#endif
+    assert( sizeof(x)==8 && sizeof(pMem->u.r)==8 );
+    swapMixedEndianFloat(x);
+    memcpy(&pMem->u.r, &x, sizeof(x));
+    pMem->flags = sqlite3IsNaN(pMem->u.r) ? MEM_Null : MEM_Real;
+  }
+  return 8;
+}
+SQLITE_PRIVATE u32 sqlite3VdbeSerialGet(
+  const unsigned char *buf,     /* Buffer to deserialize from */
+  u32 serial_type,              /* Serial type to deserialize */
+  Mem *pMem                     /* Memory cell to write value into */
+){
+  switch( serial_type ){
+    case 10:   /* Reserved for future use */
+    case 11:   /* Reserved for future use */
+    case 0: {  /* Null */
+      /* EVIDENCE-OF: R-24078-09375 Value is a NULL. */
+      pMem->flags = MEM_Null;
+      break;
+    }
+    case 1: {
+      /* EVIDENCE-OF: R-44885-25196 Value is an 8-bit twos-complement
+      ** integer. */
+      pMem->u.i = ONE_BYTE_INT(buf);
+      pMem->flags = MEM_Int;
+      testcase( pMem->u.i<0 );
+      return 1;
+    }
+    case 2: { /* 2-byte signed integer */
+      /* EVIDENCE-OF: R-49794-35026 Value is a big-endian 16-bit
+      ** twos-complement integer. */
+      pMem->u.i = TWO_BYTE_INT(buf);
+      pMem->flags = MEM_Int;
+      testcase( pMem->u.i<0 );
+      return 2;
+    }
+    case 3: { /* 3-byte signed integer */
+      /* EVIDENCE-OF: R-37839-54301 Value is a big-endian 24-bit
+      ** twos-complement integer. */
+      pMem->u.i = THREE_BYTE_INT(buf);
+      pMem->flags = MEM_Int;
+      testcase( pMem->u.i<0 );
+      return 3;
+    }
+    case 4: { /* 4-byte signed integer */
+      /* EVIDENCE-OF: R-01849-26079 Value is a big-endian 32-bit
+      ** twos-complement integer. */
+      pMem->u.i = FOUR_BYTE_INT(buf);
+      pMem->flags = MEM_Int;
+      testcase( pMem->u.i<0 );
+      return 4;
+    }
+    case 5: { /* 6-byte signed integer */
+      /* EVIDENCE-OF: R-50385-09674 Value is a big-endian 48-bit
+      ** twos-complement integer. */
+      pMem->u.i = FOUR_BYTE_UINT(buf+2) + (((i64)1)<<32)*TWO_BYTE_INT(buf);
+      pMem->flags = MEM_Int;
+      testcase( pMem->u.i<0 );
+      return 6;
+    }
+    case 6:   /* 8-byte signed integer */
+    case 7: { /* IEEE floating point */
+      /* These use local variables, so do them in a separate routine
+      ** to avoid having to move the frame pointer in the common case */
+      return serialGet(buf,serial_type,pMem);
+    }
+    case 8:    /* Integer 0 */
+    case 9: {  /* Integer 1 */
+      /* EVIDENCE-OF: R-12976-22893 Value is the integer 0. */
+      /* EVIDENCE-OF: R-18143-12121 Value is the integer 1. */
+      pMem->u.i = serial_type-8;
+      pMem->flags = MEM_Int;
+      return 0;
+    }
+    default: {
+      /* EVIDENCE-OF: R-14606-31564 Value is a BLOB that is (N-12)/2 bytes in
+      ** length.
+      ** EVIDENCE-OF: R-28401-00140 Value is a string in the text encoding and
+      ** (N-13)/2 bytes in length. */
+      static const u16 aFlag[] = { MEM_Blob|MEM_Ephem, MEM_Str|MEM_Ephem };
+      pMem->z = (char *)buf;
+      pMem->n = (serial_type-12)/2;
+      pMem->flags = aFlag[serial_type&1];
+      return pMem->n;
+    }
+  }
+  return 0;
+}
+/*
+** This routine is used to allocate sufficient space for an UnpackedRecord
+** structure large enough to be used with sqlite3VdbeRecordUnpack() if
+** the first argument is a pointer to KeyInfo structure pKeyInfo.
+**
+** The space is either allocated using sqlite3DbMallocRaw() or from within
+** the unaligned buffer passed via the second and third arguments (presumably
+** stack space). If the former, then *ppFree is set to a pointer that should
+** be eventually freed by the caller using sqlite3DbFree(). Or, if the 
+** allocation comes from the pSpace/szSpace buffer, *ppFree is set to NULL
+** before returning.
+**
+** If an OOM error occurs, NULL is returned.
+*/
+SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(
+  KeyInfo *pKeyInfo,              /* Description of the record */
+  char *pSpace,                   /* Unaligned space available */
+  int szSpace,                    /* Size of pSpace[] in bytes */
+  char **ppFree                   /* OUT: Caller should free this pointer */
+){
+  UnpackedRecord *p;              /* Unpacked record to return */
+  int nOff;                       /* Increment pSpace by nOff to align it */
+  int nByte;                      /* Number of bytes required for *p */
+
+  /* We want to shift the pointer pSpace up such that it is 8-byte aligned.
+  ** Thus, we need to calculate a value, nOff, between 0 and 7, to shift 
+  ** it by.  If pSpace is already 8-byte aligned, nOff should be zero.
+  */
+  nOff = (8 - (SQLITE_PTR_TO_INT(pSpace) & 7)) & 7;
+  nByte = ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*(pKeyInfo->nField+1);
+  if( nByte>szSpace+nOff ){
+    p = (UnpackedRecord *)sqlite3DbMallocRaw(pKeyInfo->db, nByte);
+    *ppFree = (char *)p;
+    if( !p ) return 0;
+  }else{
+    p = (UnpackedRecord*)&pSpace[nOff];
+    *ppFree = 0;
+  }
+
+  p->aMem = (Mem*)&((char*)p)[ROUND8(sizeof(UnpackedRecord))];
+  assert( pKeyInfo->aSortOrder!=0 );
+  p->pKeyInfo = pKeyInfo;
+  p->nField = pKeyInfo->nField + 1;
+  return p;
+}
+
+/*
+** Given the nKey-byte encoding of a record in pKey[], populate the 
+** UnpackedRecord structure indicated by the fourth argument with the
+** contents of the decoded record.
+*/ 
+SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(
+  KeyInfo *pKeyInfo,     /* Information about the record format */
+  int nKey,              /* Size of the binary record */
+  const void *pKey,      /* The binary record */
+  UnpackedRecord *p      /* Populate this structure before returning. */
+){
+  const unsigned char *aKey = (const unsigned char *)pKey;
+  int d; 
+  u32 idx;                        /* Offset in aKey[] to read from */
+  u16 u;                          /* Unsigned loop counter */
+  u32 szHdr;
+  Mem *pMem = p->aMem;
+
+  p->default_rc = 0;
+  assert( EIGHT_BYTE_ALIGNMENT(pMem) );
+  idx = getVarint32(aKey, szHdr);
+  d = szHdr;
+  u = 0;
+  while( idx<szHdr && d<=nKey ){
+    u32 serial_type;
+
+    idx += getVarint32(&aKey[idx], serial_type);
+    pMem->enc = pKeyInfo->enc;
+    pMem->db = pKeyInfo->db;
+    /* pMem->flags = 0; // sqlite3VdbeSerialGet() will set this for us */
+    pMem->szMalloc = 0;
+    d += sqlite3VdbeSerialGet(&aKey[d], serial_type, pMem);
+    pMem++;
+    if( (++u)>=p->nField ) break;
+  }
+  assert( u<=pKeyInfo->nField + 1 );
+  p->nField = u;
+}
+
+#if SQLITE_DEBUG
+/*
+** This function compares two index or table record keys in the same way
+** as the sqlite3VdbeRecordCompare() routine. Unlike VdbeRecordCompare(),
+** this function deserializes and compares values using the
+** sqlite3VdbeSerialGet() and sqlite3MemCompare() functions. It is used
+** in assert() statements to ensure that the optimized code in
+** sqlite3VdbeRecordCompare() returns results with these two primitives.
+**
+** Return true if the result of comparison is equivalent to desiredResult.
+** Return false if there is a disagreement.
+*/
+static int vdbeRecordCompareDebug(
+  int nKey1, const void *pKey1, /* Left key */
+  const UnpackedRecord *pPKey2, /* Right key */
+  int desiredResult             /* Correct answer */
+){
+  u32 d1;            /* Offset into aKey[] of next data element */
+  u32 idx1;          /* Offset into aKey[] of next header element */
+  u32 szHdr1;        /* Number of bytes in header */
+  int i = 0;
+  int rc = 0;
+  const unsigned char *aKey1 = (const unsigned char *)pKey1;
+  KeyInfo *pKeyInfo;
+  Mem mem1;
+
+  pKeyInfo = pPKey2->pKeyInfo;
+  if( pKeyInfo->db==0 ) return 1;
+  mem1.enc = pKeyInfo->enc;
+  mem1.db = pKeyInfo->db;
+  /* mem1.flags = 0;  // Will be initialized by sqlite3VdbeSerialGet() */
+  VVA_ONLY( mem1.szMalloc = 0; ) /* Only needed by assert() statements */
+
+  /* Compilers may complain that mem1.u.i is potentially uninitialized.
+  ** We could initialize it, as shown here, to silence those complaints.
+  ** But in fact, mem1.u.i will never actually be used uninitialized, and doing 
+  ** the unnecessary initialization has a measurable negative performance
+  ** impact, since this routine is a very high runner.  And so, we choose
+  ** to ignore the compiler warnings and leave this variable uninitialized.
+  */
+  /*  mem1.u.i = 0;  // not needed, here to silence compiler warning */
+  
+  idx1 = getVarint32(aKey1, szHdr1);
+  d1 = szHdr1;
+  assert( pKeyInfo->nField+pKeyInfo->nXField>=pPKey2->nField || CORRUPT_DB );
+  assert( pKeyInfo->aSortOrder!=0 );
+  assert( pKeyInfo->nField>0 );
+  assert( idx1<=szHdr1 || CORRUPT_DB );
+  do{
+    u32 serial_type1;
+
+    /* Read the serial types for the next element in each key. */
+    idx1 += getVarint32( aKey1+idx1, serial_type1 );
+
+    /* Verify that there is enough key space remaining to avoid
+    ** a buffer overread.  The "d1+serial_type1+2" subexpression will
+    ** always be greater than or equal to the amount of required key space.
+    ** Use that approximation to avoid the more expensive call to
+    ** sqlite3VdbeSerialTypeLen() in the common case.
+    */
+    if( d1+serial_type1+2>(u32)nKey1
+     && d1+sqlite3VdbeSerialTypeLen(serial_type1)>(u32)nKey1 
+    ){
+      break;
+    }
+
+    /* Extract the values to be compared.
+    */
+    d1 += sqlite3VdbeSerialGet(&aKey1[d1], serial_type1, &mem1);
+
+    /* Do the comparison
+    */
+    rc = sqlite3MemCompare(&mem1, &pPKey2->aMem[i], pKeyInfo->aColl[i]);
+    if( rc!=0 ){
+      assert( mem1.szMalloc==0 );  /* See comment below */
+      if( pKeyInfo->aSortOrder[i] ){
+        rc = -rc;  /* Invert the result for DESC sort order. */
+      }
+      goto debugCompareEnd;
+    }
+    i++;
+  }while( idx1<szHdr1 && i<pPKey2->nField );
+
+  /* No memory allocation is ever used on mem1.  Prove this using
+  ** the following assert().  If the assert() fails, it indicates a
+  ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1).
+  */
+  assert( mem1.szMalloc==0 );
+
+  /* rc==0 here means that one of the keys ran out of fields and
+  ** all the fields up to that point were equal. Return the default_rc
+  ** value.  */
+  rc = pPKey2->default_rc;
+
+debugCompareEnd:
+  if( desiredResult==0 && rc==0 ) return 1;
+  if( desiredResult<0 && rc<0 ) return 1;
+  if( desiredResult>0 && rc>0 ) return 1;
+  if( CORRUPT_DB ) return 1;
+  if( pKeyInfo->db->mallocFailed ) return 1;
+  return 0;
+}
+#endif
+
+#if SQLITE_DEBUG
+/*
+** Count the number of fields (a.k.a. columns) in the record given by
+** pKey,nKey.  The verify that this count is less than or equal to the
+** limit given by pKeyInfo->nField + pKeyInfo->nXField.
+**
+** If this constraint is not satisfied, it means that the high-speed
+** vdbeRecordCompareInt() and vdbeRecordCompareString() routines will
+** not work correctly.  If this assert() ever fires, it probably means
+** that the KeyInfo.nField or KeyInfo.nXField values were computed
+** incorrectly.
+*/
+static void vdbeAssertFieldCountWithinLimits(
+  int nKey, const void *pKey,   /* The record to verify */ 
+  const KeyInfo *pKeyInfo       /* Compare size with this KeyInfo */
+){
+  int nField = 0;
+  u32 szHdr;
+  u32 idx;
+  u32 notUsed;
+  const unsigned char *aKey = (const unsigned char*)pKey;
+
+  if( CORRUPT_DB ) return;
+  idx = getVarint32(aKey, szHdr);
+  assert( szHdr<=nKey );
+  while( idx<szHdr ){
+    idx += getVarint32(aKey+idx, notUsed);
+    nField++;
+  }
+  assert( nField <= pKeyInfo->nField+pKeyInfo->nXField );
+}
+#else
+# define vdbeAssertFieldCountWithinLimits(A,B,C)
+#endif
+
+/*
+** Both *pMem1 and *pMem2 contain string values. Compare the two values
+** using the collation sequence pColl. As usual, return a negative , zero
+** or positive value if *pMem1 is less than, equal to or greater than 
+** *pMem2, respectively. Similar in spirit to "rc = (*pMem1) - (*pMem2);".
+*/
+static int vdbeCompareMemString(
+  const Mem *pMem1,
+  const Mem *pMem2,
+  const CollSeq *pColl,
+  u8 *prcErr                      /* If an OOM occurs, set to SQLITE_NOMEM */
+){
+  if( pMem1->enc==pColl->enc ){
+    /* The strings are already in the correct encoding.  Call the
+     ** comparison function directly */
+    return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
+  }else{
+    int rc;
+    const void *v1, *v2;
+    int n1, n2;
+    Mem c1;
+    Mem c2;
+    sqlite3VdbeMemInit(&c1, pMem1->db, MEM_Null);
+    sqlite3VdbeMemInit(&c2, pMem1->db, MEM_Null);
+    sqlite3VdbeMemShallowCopy(&c1, pMem1, MEM_Ephem);
+    sqlite3VdbeMemShallowCopy(&c2, pMem2, MEM_Ephem);
+    v1 = sqlite3ValueText((sqlite3_value*)&c1, pColl->enc);
+    n1 = v1==0 ? 0 : c1.n;
+    v2 = sqlite3ValueText((sqlite3_value*)&c2, pColl->enc);
+    n2 = v2==0 ? 0 : c2.n;
+    rc = pColl->xCmp(pColl->pUser, n1, v1, n2, v2);
+    sqlite3VdbeMemRelease(&c1);
+    sqlite3VdbeMemRelease(&c2);
+    if( (v1==0 || v2==0) && prcErr ) *prcErr = SQLITE_NOMEM;
+    return rc;
+  }
+}
+
+/*
+** Compare two blobs.  Return negative, zero, or positive if the first
+** is less than, equal to, or greater than the second, respectively.
+** If one blob is a prefix of the other, then the shorter is the lessor.
+*/
+static SQLITE_NOINLINE int sqlite3BlobCompare(const Mem *pB1, const Mem *pB2){
+  int c = memcmp(pB1->z, pB2->z, pB1->n>pB2->n ? pB2->n : pB1->n);
+  if( c ) return c;
+  return pB1->n - pB2->n;
+}
+
+
+/*
+** Compare the values contained by the two memory cells, returning
+** negative, zero or positive if pMem1 is less than, equal to, or greater
+** than pMem2. Sorting order is NULL's first, followed by numbers (integers
+** and reals) sorted numerically, followed by text ordered by the collating
+** sequence pColl and finally blob's ordered by memcmp().
+**
+** Two NULL values are considered equal by this function.
+*/
+SQLITE_PRIVATE int sqlite3MemCompare(const Mem *pMem1, const Mem *pMem2, const CollSeq *pColl){
+  int f1, f2;
+  int combined_flags;
+
+  f1 = pMem1->flags;
+  f2 = pMem2->flags;
+  combined_flags = f1|f2;
+  assert( (combined_flags & MEM_RowSet)==0 );
+ 
+  /* If one value is NULL, it is less than the other. If both values
+  ** are NULL, return 0.
+  */
+  if( combined_flags&MEM_Null ){
+    return (f2&MEM_Null) - (f1&MEM_Null);
+  }
+
+  /* If one value is a number and the other is not, the number is less.
+  ** If both are numbers, compare as reals if one is a real, or as integers
+  ** if both values are integers.
+  */
+  if( combined_flags&(MEM_Int|MEM_Real) ){
+    double r1, r2;
+    if( (f1 & f2 & MEM_Int)!=0 ){
+      if( pMem1->u.i < pMem2->u.i ) return -1;
+      if( pMem1->u.i > pMem2->u.i ) return 1;
+      return 0;
+    }
+    if( (f1&MEM_Real)!=0 ){
+      r1 = pMem1->u.r;
+    }else if( (f1&MEM_Int)!=0 ){
+      r1 = (double)pMem1->u.i;
+    }else{
+      return 1;
+    }
+    if( (f2&MEM_Real)!=0 ){
+      r2 = pMem2->u.r;
+    }else if( (f2&MEM_Int)!=0 ){
+      r2 = (double)pMem2->u.i;
+    }else{
+      return -1;
+    }
+    if( r1<r2 ) return -1;
+    if( r1>r2 ) return 1;
+    return 0;
+  }
+
+  /* If one value is a string and the other is a blob, the string is less.
+  ** If both are strings, compare using the collating functions.
+  */
+  if( combined_flags&MEM_Str ){
+    if( (f1 & MEM_Str)==0 ){
+      return 1;
+    }
+    if( (f2 & MEM_Str)==0 ){
+      return -1;
+    }
+
+    assert( pMem1->enc==pMem2->enc );
+    assert( pMem1->enc==SQLITE_UTF8 || 
+            pMem1->enc==SQLITE_UTF16LE || pMem1->enc==SQLITE_UTF16BE );
+
+    /* The collation sequence must be defined at this point, even if
+    ** the user deletes the collation sequence after the vdbe program is
+    ** compiled (this was not always the case).
+    */
+    assert( !pColl || pColl->xCmp );
+
+    if( pColl ){
+      return vdbeCompareMemString(pMem1, pMem2, pColl, 0);
+    }
+    /* If a NULL pointer was passed as the collate function, fall through
+    ** to the blob case and use memcmp().  */
+  }
+ 
+  /* Both values must be blobs.  Compare using memcmp().  */
+  return sqlite3BlobCompare(pMem1, pMem2);
+}
+
+
+/*
+** The first argument passed to this function is a serial-type that
+** corresponds to an integer - all values between 1 and 9 inclusive 
+** except 7. The second points to a buffer containing an integer value
+** serialized according to serial_type. This function deserializes
+** and returns the value.
+*/
+static i64 vdbeRecordDecodeInt(u32 serial_type, const u8 *aKey){
+  u32 y;
+  assert( CORRUPT_DB || (serial_type>=1 && serial_type<=9 && serial_type!=7) );
+  switch( serial_type ){
+    case 0:
+    case 1:
+      testcase( aKey[0]&0x80 );
+      return ONE_BYTE_INT(aKey);
+    case 2:
+      testcase( aKey[0]&0x80 );
+      return TWO_BYTE_INT(aKey);
+    case 3:
+      testcase( aKey[0]&0x80 );
+      return THREE_BYTE_INT(aKey);
+    case 4: {
+      testcase( aKey[0]&0x80 );
+      y = FOUR_BYTE_UINT(aKey);
+      return (i64)*(int*)&y;
+    }
+    case 5: {
+      testcase( aKey[0]&0x80 );
+      return FOUR_BYTE_UINT(aKey+2) + (((i64)1)<<32)*TWO_BYTE_INT(aKey);
+    }
+    case 6: {
+      u64 x = FOUR_BYTE_UINT(aKey);
+      testcase( aKey[0]&0x80 );
+      x = (x<<32) | FOUR_BYTE_UINT(aKey+4);
+      return (i64)*(i64*)&x;
+    }
+  }
+
+  return (serial_type - 8);
+}
+
+/*
+** This function compares the two table rows or index records
+** specified by {nKey1, pKey1} and pPKey2.  It returns a negative, zero
+** or positive integer if key1 is less than, equal to or 
+** greater than key2.  The {nKey1, pKey1} key must be a blob
+** created by the OP_MakeRecord opcode of the VDBE.  The pPKey2
+** key must be a parsed key such as obtained from
+** sqlite3VdbeParseRecord.
+**
+** If argument bSkip is non-zero, it is assumed that the caller has already
+** determined that the first fields of the keys are equal.
+**
+** Key1 and Key2 do not have to contain the same number of fields. If all 
+** fields that appear in both keys are equal, then pPKey2->default_rc is 
+** returned.
+**
+** If database corruption is discovered, set pPKey2->errCode to 
+** SQLITE_CORRUPT and return 0. If an OOM error is encountered, 
+** pPKey2->errCode is set to SQLITE_NOMEM and, if it is not NULL, the
+** malloc-failed flag set on database handle (pPKey2->pKeyInfo->db).
+*/
+static int vdbeRecordCompareWithSkip(
+  int nKey1, const void *pKey1,   /* Left key */
+  UnpackedRecord *pPKey2,         /* Right key */
+  int bSkip                       /* If true, skip the first field */
+){
+  u32 d1;                         /* Offset into aKey[] of next data element */
+  int i;                          /* Index of next field to compare */
+  u32 szHdr1;                     /* Size of record header in bytes */
+  u32 idx1;                       /* Offset of first type in header */
+  int rc = 0;                     /* Return value */
+  Mem *pRhs = pPKey2->aMem;       /* Next field of pPKey2 to compare */
+  KeyInfo *pKeyInfo = pPKey2->pKeyInfo;
+  const unsigned char *aKey1 = (const unsigned char *)pKey1;
+  Mem mem1;
+
+  /* If bSkip is true, then the caller has already determined that the first
+  ** two elements in the keys are equal. Fix the various stack variables so
+  ** that this routine begins comparing at the second field. */
+  if( bSkip ){
+    u32 s1;
+    idx1 = 1 + getVarint32(&aKey1[1], s1);
+    szHdr1 = aKey1[0];
+    d1 = szHdr1 + sqlite3VdbeSerialTypeLen(s1);
+    i = 1;
+    pRhs++;
+  }else{
+    idx1 = getVarint32(aKey1, szHdr1);
+    d1 = szHdr1;
+    if( d1>(unsigned)nKey1 ){ 
+      pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
+      return 0;  /* Corruption */
+    }
+    i = 0;
+  }
+
+  VVA_ONLY( mem1.szMalloc = 0; ) /* Only needed by assert() statements */
+  assert( pPKey2->pKeyInfo->nField+pPKey2->pKeyInfo->nXField>=pPKey2->nField 
+       || CORRUPT_DB );
+  assert( pPKey2->pKeyInfo->aSortOrder!=0 );
+  assert( pPKey2->pKeyInfo->nField>0 );
+  assert( idx1<=szHdr1 || CORRUPT_DB );
+  do{
+    u32 serial_type;
+
+    /* RHS is an integer */
+    if( pRhs->flags & MEM_Int ){
+      serial_type = aKey1[idx1];
+      testcase( serial_type==12 );
+      if( serial_type>=12 ){
+        rc = +1;
+      }else if( serial_type==0 ){
+        rc = -1;
+      }else if( serial_type==7 ){
+        double rhs = (double)pRhs->u.i;
+        sqlite3VdbeSerialGet(&aKey1[d1], serial_type, &mem1);
+        if( mem1.u.r<rhs ){
+          rc = -1;
+        }else if( mem1.u.r>rhs ){
+          rc = +1;
+        }
+      }else{
+        i64 lhs = vdbeRecordDecodeInt(serial_type, &aKey1[d1]);
+        i64 rhs = pRhs->u.i;
+        if( lhs<rhs ){
+          rc = -1;
+        }else if( lhs>rhs ){
+          rc = +1;
+        }
+      }
+    }
+
+    /* RHS is real */
+    else if( pRhs->flags & MEM_Real ){
+      serial_type = aKey1[idx1];
+      if( serial_type>=12 ){
+        rc = +1;
+      }else if( serial_type==0 ){
+        rc = -1;
+      }else{
+        double rhs = pRhs->u.r;
+        double lhs;
+        sqlite3VdbeSerialGet(&aKey1[d1], serial_type, &mem1);
+        if( serial_type==7 ){
+          lhs = mem1.u.r;
+        }else{
+          lhs = (double)mem1.u.i;
+        }
+        if( lhs<rhs ){
+          rc = -1;
+        }else if( lhs>rhs ){
+          rc = +1;
+        }
+      }
+    }
+
+    /* RHS is a string */
+    else if( pRhs->flags & MEM_Str ){
+      getVarint32(&aKey1[idx1], serial_type);
+      testcase( serial_type==12 );
+      if( serial_type<12 ){
+        rc = -1;
+      }else if( !(serial_type & 0x01) ){
+        rc = +1;
+      }else{
+        mem1.n = (serial_type - 12) / 2;
+        testcase( (d1+mem1.n)==(unsigned)nKey1 );
+        testcase( (d1+mem1.n+1)==(unsigned)nKey1 );
+        if( (d1+mem1.n) > (unsigned)nKey1 ){
+          pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
+          return 0;                /* Corruption */
+        }else if( pKeyInfo->aColl[i] ){
+          mem1.enc = pKeyInfo->enc;
+          mem1.db = pKeyInfo->db;
+          mem1.flags = MEM_Str;
+          mem1.z = (char*)&aKey1[d1];
+          rc = vdbeCompareMemString(
+              &mem1, pRhs, pKeyInfo->aColl[i], &pPKey2->errCode
+          );
+        }else{
+          int nCmp = MIN(mem1.n, pRhs->n);
+          rc = memcmp(&aKey1[d1], pRhs->z, nCmp);
+          if( rc==0 ) rc = mem1.n - pRhs->n; 
+        }
+      }
+    }
+
+    /* RHS is a blob */
+    else if( pRhs->flags & MEM_Blob ){
+      getVarint32(&aKey1[idx1], serial_type);
+      testcase( serial_type==12 );
+      if( serial_type<12 || (serial_type & 0x01) ){
+        rc = -1;
+      }else{
+        int nStr = (serial_type - 12) / 2;
+        testcase( (d1+nStr)==(unsigned)nKey1 );
+        testcase( (d1+nStr+1)==(unsigned)nKey1 );
+        if( (d1+nStr) > (unsigned)nKey1 ){
+          pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
+          return 0;                /* Corruption */
+        }else{
+          int nCmp = MIN(nStr, pRhs->n);
+          rc = memcmp(&aKey1[d1], pRhs->z, nCmp);
+          if( rc==0 ) rc = nStr - pRhs->n;
+        }
+      }
+    }
+
+    /* RHS is null */
+    else{
+      serial_type = aKey1[idx1];
+      rc = (serial_type!=0);
+    }
+
+    if( rc!=0 ){
+      if( pKeyInfo->aSortOrder[i] ){
+        rc = -rc;
+      }
+      assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, rc) );
+      assert( mem1.szMalloc==0 );  /* See comment below */
+      return rc;
+    }
+
+    i++;
+    pRhs++;
+    d1 += sqlite3VdbeSerialTypeLen(serial_type);
+    idx1 += sqlite3VarintLen(serial_type);
+  }while( idx1<(unsigned)szHdr1 && i<pPKey2->nField && d1<=(unsigned)nKey1 );
+
+  /* No memory allocation is ever used on mem1.  Prove this using
+  ** the following assert().  If the assert() fails, it indicates a
+  ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1).  */
+  assert( mem1.szMalloc==0 );
+
+  /* rc==0 here means that one or both of the keys ran out of fields and
+  ** all the fields up to that point were equal. Return the default_rc
+  ** value.  */
+  assert( CORRUPT_DB 
+       || vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, pPKey2->default_rc) 
+       || pKeyInfo->db->mallocFailed
+  );
+  return pPKey2->default_rc;
+}
+SQLITE_PRIVATE int sqlite3VdbeRecordCompare(
+  int nKey1, const void *pKey1,   /* Left key */
+  UnpackedRecord *pPKey2          /* Right key */
+){
+  return vdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 0);
+}
+
+
+/*
+** This function is an optimized version of sqlite3VdbeRecordCompare() 
+** that (a) the first field of pPKey2 is an integer, and (b) the 
+** size-of-header varint at the start of (pKey1/nKey1) fits in a single
+** byte (i.e. is less than 128).
+**
+** To avoid concerns about buffer overreads, this routine is only used
+** on schemas where the maximum valid header size is 63 bytes or less.
+*/
+static int vdbeRecordCompareInt(
+  int nKey1, const void *pKey1, /* Left key */
+  UnpackedRecord *pPKey2        /* Right key */
+){
+  const u8 *aKey = &((const u8*)pKey1)[*(const u8*)pKey1 & 0x3F];
+  int serial_type = ((const u8*)pKey1)[1];
+  int res;
+  u32 y;
+  u64 x;
+  i64 v = pPKey2->aMem[0].u.i;
+  i64 lhs;
+
+  vdbeAssertFieldCountWithinLimits(nKey1, pKey1, pPKey2->pKeyInfo);
+  assert( (*(u8*)pKey1)<=0x3F || CORRUPT_DB );
+  switch( serial_type ){
+    case 1: { /* 1-byte signed integer */
+      lhs = ONE_BYTE_INT(aKey);
+      testcase( lhs<0 );
+      break;
+    }
+    case 2: { /* 2-byte signed integer */
+      lhs = TWO_BYTE_INT(aKey);
+      testcase( lhs<0 );
+      break;
+    }
+    case 3: { /* 3-byte signed integer */
+      lhs = THREE_BYTE_INT(aKey);
+      testcase( lhs<0 );
+      break;
+    }
+    case 4: { /* 4-byte signed integer */
+      y = FOUR_BYTE_UINT(aKey);
+      lhs = (i64)*(int*)&y;
+      testcase( lhs<0 );
+      break;
+    }
+    case 5: { /* 6-byte signed integer */
+      lhs = FOUR_BYTE_UINT(aKey+2) + (((i64)1)<<32)*TWO_BYTE_INT(aKey);
+      testcase( lhs<0 );
+      break;
+    }
+    case 6: { /* 8-byte signed integer */
+      x = FOUR_BYTE_UINT(aKey);
+      x = (x<<32) | FOUR_BYTE_UINT(aKey+4);
+      lhs = *(i64*)&x;
+      testcase( lhs<0 );
+      break;
+    }
+    case 8: 
+      lhs = 0;
+      break;
+    case 9:
+      lhs = 1;
+      break;
+
+    /* This case could be removed without changing the results of running
+    ** this code. Including it causes gcc to generate a faster switch 
+    ** statement (since the range of switch targets now starts at zero and
+    ** is contiguous) but does not cause any duplicate code to be generated
+    ** (as gcc is clever enough to combine the two like cases). Other 
+    ** compilers might be similar.  */ 
+    case 0: case 7:
+      return sqlite3VdbeRecordCompare(nKey1, pKey1, pPKey2);
+
+    default:
+      return sqlite3VdbeRecordCompare(nKey1, pKey1, pPKey2);
+  }
+
+  if( v>lhs ){
+    res = pPKey2->r1;
+  }else if( v<lhs ){
+    res = pPKey2->r2;
+  }else if( pPKey2->nField>1 ){
+    /* The first fields of the two keys are equal. Compare the trailing 
+    ** fields.  */
+    res = vdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1);
+  }else{
+    /* The first fields of the two keys are equal and there are no trailing
+    ** fields. Return pPKey2->default_rc in this case. */
+    res = pPKey2->default_rc;
+  }
+
+  assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) );
+  return res;
+}
+
+/*
+** This function is an optimized version of sqlite3VdbeRecordCompare() 
+** that (a) the first field of pPKey2 is a string, that (b) the first field
+** uses the collation sequence BINARY and (c) that the size-of-header varint 
+** at the start of (pKey1/nKey1) fits in a single byte.
+*/
+static int vdbeRecordCompareString(
+  int nKey1, const void *pKey1, /* Left key */
+  UnpackedRecord *pPKey2        /* Right key */
+){
+  const u8 *aKey1 = (const u8*)pKey1;
+  int serial_type;
+  int res;
+
+  vdbeAssertFieldCountWithinLimits(nKey1, pKey1, pPKey2->pKeyInfo);
+  getVarint32(&aKey1[1], serial_type);
+  if( serial_type<12 ){
+    res = pPKey2->r1;      /* (pKey1/nKey1) is a number or a null */
+  }else if( !(serial_type & 0x01) ){ 
+    res = pPKey2->r2;      /* (pKey1/nKey1) is a blob */
+  }else{
+    int nCmp;
+    int nStr;
+    int szHdr = aKey1[0];
+
+    nStr = (serial_type-12) / 2;
+    if( (szHdr + nStr) > nKey1 ){
+      pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
+      return 0;    /* Corruption */
+    }
+    nCmp = MIN( pPKey2->aMem[0].n, nStr );
+    res = memcmp(&aKey1[szHdr], pPKey2->aMem[0].z, nCmp);
+
+    if( res==0 ){
+      res = nStr - pPKey2->aMem[0].n;
+      if( res==0 ){
+        if( pPKey2->nField>1 ){
+          res = vdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1);
+        }else{
+          res = pPKey2->default_rc;
+        }
+      }else if( res>0 ){
+        res = pPKey2->r2;
+      }else{
+        res = pPKey2->r1;
+      }
+    }else if( res>0 ){
+      res = pPKey2->r2;
+    }else{
+      res = pPKey2->r1;
+    }
+  }
+
+  assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res)
+       || CORRUPT_DB
+       || pPKey2->pKeyInfo->db->mallocFailed
+  );
+  return res;
+}
+
+/*
+** Return a pointer to an sqlite3VdbeRecordCompare() compatible function
+** suitable for comparing serialized records to the unpacked record passed
+** as the only argument.
+*/
+SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord *p){
+  /* varintRecordCompareInt() and varintRecordCompareString() both assume
+  ** that the size-of-header varint that occurs at the start of each record
+  ** fits in a single byte (i.e. is 127 or less). varintRecordCompareInt()
+  ** also assumes that it is safe to overread a buffer by at least the 
+  ** maximum possible legal header size plus 8 bytes. Because there is
+  ** guaranteed to be at least 74 (but not 136) bytes of padding following each
+  ** buffer passed to varintRecordCompareInt() this makes it convenient to
+  ** limit the size of the header to 64 bytes in cases where the first field
+  ** is an integer.
+  **
+  ** The easiest way to enforce this limit is to consider only records with
+  ** 13 fields or less. If the first field is an integer, the maximum legal
+  ** header size is (12*5 + 1 + 1) bytes.  */
+  if( (p->pKeyInfo->nField + p->pKeyInfo->nXField)<=13 ){
+    int flags = p->aMem[0].flags;
+    if( p->pKeyInfo->aSortOrder[0] ){
+      p->r1 = 1;
+      p->r2 = -1;
+    }else{
+      p->r1 = -1;
+      p->r2 = 1;
+    }
+    if( (flags & MEM_Int) ){
+      return vdbeRecordCompareInt;
+    }
+    testcase( flags & MEM_Real );
+    testcase( flags & MEM_Null );
+    testcase( flags & MEM_Blob );
+    if( (flags & (MEM_Real|MEM_Null|MEM_Blob))==0 && p->pKeyInfo->aColl[0]==0 ){
+      assert( flags & MEM_Str );
+      return vdbeRecordCompareString;
+    }
+  }
+
+  return sqlite3VdbeRecordCompare;
+}
+
+/*
+** pCur points at an index entry created using the OP_MakeRecord opcode.
+** Read the rowid (the last field in the record) and store it in *rowid.
+** Return SQLITE_OK if everything works, or an error code otherwise.
+**
+** pCur might be pointing to text obtained from a corrupt database file.
+** So the content cannot be trusted.  Do appropriate checks on the content.
+*/
+SQLITE_PRIVATE int sqlite3VdbeIdxRowid(sqlite3 *db, BtCursor *pCur, i64 *rowid){
+  i64 nCellKey = 0;
+  int rc;
+  u32 szHdr;        /* Size of the header */
+  u32 typeRowid;    /* Serial type of the rowid */
+  u32 lenRowid;     /* Size of the rowid */
+  Mem m, v;
+
+  /* Get the size of the index entry.  Only indices entries of less
+  ** than 2GiB are support - anything large must be database corruption.
+  ** Any corruption is detected in sqlite3BtreeParseCellPtr(), though, so
+  ** this code can safely assume that nCellKey is 32-bits  
+  */
+  assert( sqlite3BtreeCursorIsValid(pCur) );
+  VVA_ONLY(rc =) sqlite3BtreeKeySize(pCur, &nCellKey);
+  assert( rc==SQLITE_OK );     /* pCur is always valid so KeySize cannot fail */
+  assert( (nCellKey & SQLITE_MAX_U32)==(u64)nCellKey );
+
+  /* Read in the complete content of the index entry */
+  sqlite3VdbeMemInit(&m, db, 0);
+  rc = sqlite3VdbeMemFromBtree(pCur, 0, (u32)nCellKey, 1, &m);
+  if( rc ){
+    return rc;
+  }
+
+  /* The index entry must begin with a header size */
+  (void)getVarint32((u8*)m.z, szHdr);
+  testcase( szHdr==3 );
+  testcase( szHdr==m.n );
+  if( unlikely(szHdr<3 || (int)szHdr>m.n) ){
+    goto idx_rowid_corruption;
+  }
+
+  /* The last field of the index should be an integer - the ROWID.
+  ** Verify that the last entry really is an integer. */
+  (void)getVarint32((u8*)&m.z[szHdr-1], typeRowid);
+  testcase( typeRowid==1 );
+  testcase( typeRowid==2 );
+  testcase( typeRowid==3 );
+  testcase( typeRowid==4 );
+  testcase( typeRowid==5 );
+  testcase( typeRowid==6 );
+  testcase( typeRowid==8 );
+  testcase( typeRowid==9 );
+  if( unlikely(typeRowid<1 || typeRowid>9 || typeRowid==7) ){
+    goto idx_rowid_corruption;
+  }
+  lenRowid = sqlite3VdbeSerialTypeLen(typeRowid);
+  testcase( (u32)m.n==szHdr+lenRowid );
+  if( unlikely((u32)m.n<szHdr+lenRowid) ){
+    goto idx_rowid_corruption;
+  }
+
+  /* Fetch the integer off the end of the index record */
+  sqlite3VdbeSerialGet((u8*)&m.z[m.n-lenRowid], typeRowid, &v);
+  *rowid = v.u.i;
+  sqlite3VdbeMemRelease(&m);
+  return SQLITE_OK;
+
+  /* Jump here if database corruption is detected after m has been
+  ** allocated.  Free the m object and return SQLITE_CORRUPT. */
+idx_rowid_corruption:
+  testcase( m.szMalloc!=0 );
+  sqlite3VdbeMemRelease(&m);
+  return SQLITE_CORRUPT_BKPT;
+}
+
+/*
+** Compare the key of the index entry that cursor pC is pointing to against
+** the key string in pUnpacked.  Write into *pRes a number
+** that is negative, zero, or positive if pC is less than, equal to,
+** or greater than pUnpacked.  Return SQLITE_OK on success.
+**
+** pUnpacked is either created without a rowid or is truncated so that it
+** omits the rowid at the end.  The rowid at the end of the index entry
+** is ignored as well.  Hence, this routine only compares the prefixes 
+** of the keys prior to the final rowid, not the entire key.
+*/
+SQLITE_PRIVATE int sqlite3VdbeIdxKeyCompare(
+  sqlite3 *db,                     /* Database connection */
+  VdbeCursor *pC,                  /* The cursor to compare against */
+  UnpackedRecord *pUnpacked,       /* Unpacked version of key */
+  int *res                         /* Write the comparison result here */
+){
+  i64 nCellKey = 0;
+  int rc;
+  BtCursor *pCur = pC->pCursor;
+  Mem m;
+
+  assert( sqlite3BtreeCursorIsValid(pCur) );
+  VVA_ONLY(rc =) sqlite3BtreeKeySize(pCur, &nCellKey);
+  assert( rc==SQLITE_OK );    /* pCur is always valid so KeySize cannot fail */
+  /* nCellKey will always be between 0 and 0xffffffff because of the way
+  ** that btreeParseCellPtr() and sqlite3GetVarint32() are implemented */
+  if( nCellKey<=0 || nCellKey>0x7fffffff ){
+    *res = 0;
+    return SQLITE_CORRUPT_BKPT;
+  }
+  sqlite3VdbeMemInit(&m, db, 0);
+  rc = sqlite3VdbeMemFromBtree(pC->pCursor, 0, (u32)nCellKey, 1, &m);
+  if( rc ){
+    return rc;
+  }
+  *res = sqlite3VdbeRecordCompare(m.n, m.z, pUnpacked);
+  sqlite3VdbeMemRelease(&m);
+  return SQLITE_OK;
+}
+
+/*
+** This routine sets the value to be returned by subsequent calls to
+** sqlite3_changes() on the database handle 'db'. 
+*/
+SQLITE_PRIVATE void sqlite3VdbeSetChanges(sqlite3 *db, int nChange){
+  assert( sqlite3_mutex_held(db->mutex) );
+  db->nChange = nChange;
+  db->nTotalChange += nChange;
+}
+
+/*
+** Set a flag in the vdbe to update the change counter when it is finalised
+** or reset.
+*/
+SQLITE_PRIVATE void sqlite3VdbeCountChanges(Vdbe *v){
+  v->changeCntOn = 1;
+}
+
+/*
+** Mark every prepared statement associated with a database connection
+** as expired.
+**
+** An expired statement means that recompilation of the statement is
+** recommend.  Statements expire when things happen that make their
+** programs obsolete.  Removing user-defined functions or collating
+** sequences, or changing an authorization function are the types of
+** things that make prepared statements obsolete.
+*/
+SQLITE_PRIVATE void sqlite3ExpirePreparedStatements(sqlite3 *db){
+  Vdbe *p;
+  for(p = db->pVdbe; p; p=p->pNext){
+    p->expired = 1;
+  }
+}
+
+/*
+** Return the database associated with the Vdbe.
+*/
+SQLITE_PRIVATE sqlite3 *sqlite3VdbeDb(Vdbe *v){
+  return v->db;
+}
+
+/*
+** Return a pointer to an sqlite3_value structure containing the value bound
+** parameter iVar of VM v. Except, if the value is an SQL NULL, return 
+** 0 instead. Unless it is NULL, apply affinity aff (one of the SQLITE_AFF_*
+** constants) to the value before returning it.
+**
+** The returned value must be freed by the caller using sqlite3ValueFree().
+*/
+SQLITE_PRIVATE sqlite3_value *sqlite3VdbeGetBoundValue(Vdbe *v, int iVar, u8 aff){
+  assert( iVar>0 );
+  if( v ){
+    Mem *pMem = &v->aVar[iVar-1];
+    if( 0==(pMem->flags & MEM_Null) ){
+      sqlite3_value *pRet = sqlite3ValueNew(v->db);
+      if( pRet ){
+        sqlite3VdbeMemCopy((Mem *)pRet, pMem);
+        sqlite3ValueApplyAffinity(pRet, aff, SQLITE_UTF8);
+      }
+      return pRet;
+    }
+  }
+  return 0;
+}
+
+/*
+** Configure SQL variable iVar so that binding a new value to it signals
+** to sqlite3_reoptimize() that re-preparing the statement may result
+** in a better query plan.
+*/
+SQLITE_PRIVATE void sqlite3VdbeSetVarmask(Vdbe *v, int iVar){
+  assert( iVar>0 );
+  if( iVar>32 ){
+    v->expmask = 0xffffffff;
+  }else{
+    v->expmask |= ((u32)1 << (iVar-1));
+  }
+}
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/*
+** Transfer error message text from an sqlite3_vtab.zErrMsg (text stored
+** in memory obtained from sqlite3_malloc) into a Vdbe.zErrMsg (text stored
+** in memory obtained from sqlite3DbMalloc).
+*/
+SQLITE_PRIVATE void sqlite3VtabImportErrmsg(Vdbe *p, sqlite3_vtab *pVtab){
+  sqlite3 *db = p->db;
+  sqlite3DbFree(db, p->zErrMsg);
+  p->zErrMsg = sqlite3DbStrDup(db, pVtab->zErrMsg);
+  sqlite3_free(pVtab->zErrMsg);
+  pVtab->zErrMsg = 0;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+/************** End of vdbeaux.c *********************************************/
+/************** Begin file vdbeapi.c *****************************************/
+/*
+** 2004 May 26
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains code use to implement APIs that are part of the
+** VDBE.
+*/
+
+#ifndef SQLITE_OMIT_DEPRECATED
+/*
+** Return TRUE (non-zero) of the statement supplied as an argument needs
+** to be recompiled.  A statement needs to be recompiled whenever the
+** execution environment changes in a way that would alter the program
+** that sqlite3_prepare() generates.  For example, if new functions or
+** collating sequences are registered or if an authorizer function is
+** added or changed.
+*/
+SQLITE_API int sqlite3_expired(sqlite3_stmt *pStmt){
+  Vdbe *p = (Vdbe*)pStmt;
+  return p==0 || p->expired;
+}
+#endif
+
+/*
+** Check on a Vdbe to make sure it has not been finalized.  Log
+** an error and return true if it has been finalized (or is otherwise
+** invalid).  Return false if it is ok.
+*/
+static int vdbeSafety(Vdbe *p){
+  if( p->db==0 ){
+    sqlite3_log(SQLITE_MISUSE, "API called with finalized prepared statement");
+    return 1;
+  }else{
+    return 0;
+  }
+}
+static int vdbeSafetyNotNull(Vdbe *p){
+  if( p==0 ){
+    sqlite3_log(SQLITE_MISUSE, "API called with NULL prepared statement");
+    return 1;
+  }else{
+    return vdbeSafety(p);
+  }
+}
+
+/*
+** The following routine destroys a virtual machine that is created by
+** the sqlite3_compile() routine. The integer returned is an SQLITE_
+** success/failure code that describes the result of executing the virtual
+** machine.
+**
+** This routine sets the error code and string returned by
+** sqlite3_errcode(), sqlite3_errmsg() and sqlite3_errmsg16().
+*/
+SQLITE_API int sqlite3_finalize(sqlite3_stmt *pStmt){
+  int rc;
+  if( pStmt==0 ){
+    /* IMPLEMENTATION-OF: R-57228-12904 Invoking sqlite3_finalize() on a NULL
+    ** pointer is a harmless no-op. */
+    rc = SQLITE_OK;
+  }else{
+    Vdbe *v = (Vdbe*)pStmt;
+    sqlite3 *db = v->db;
+    if( vdbeSafety(v) ) return SQLITE_MISUSE_BKPT;
+    sqlite3_mutex_enter(db->mutex);
+    rc = sqlite3VdbeFinalize(v);
+    rc = sqlite3ApiExit(db, rc);
+    sqlite3LeaveMutexAndCloseZombie(db);
+  }
+  return rc;
+}
+
+/*
+** Terminate the current execution of an SQL statement and reset it
+** back to its starting state so that it can be reused. A success code from
+** the prior execution is returned.
+**
+** This routine sets the error code and string returned by
+** sqlite3_errcode(), sqlite3_errmsg() and sqlite3_errmsg16().
+*/
+SQLITE_API int sqlite3_reset(sqlite3_stmt *pStmt){
+  int rc;
+  if( pStmt==0 ){
+    rc = SQLITE_OK;
+  }else{
+    Vdbe *v = (Vdbe*)pStmt;
+    sqlite3_mutex_enter(v->db->mutex);
+    rc = sqlite3VdbeReset(v);
+    sqlite3VdbeRewind(v);
+    assert( (rc & (v->db->errMask))==rc );
+    rc = sqlite3ApiExit(v->db, rc);
+    sqlite3_mutex_leave(v->db->mutex);
+  }
+  return rc;
+}
+
+/*
+** Set all the parameters in the compiled SQL statement to NULL.
+*/
+SQLITE_API int sqlite3_clear_bindings(sqlite3_stmt *pStmt){
+  int i;
+  int rc = SQLITE_OK;
+  Vdbe *p = (Vdbe*)pStmt;
+#if SQLITE_THREADSAFE
+  sqlite3_mutex *mutex = ((Vdbe*)pStmt)->db->mutex;
+#endif
+  sqlite3_mutex_enter(mutex);
+  for(i=0; i<p->nVar; i++){
+    sqlite3VdbeMemRelease(&p->aVar[i]);
+    p->aVar[i].flags = MEM_Null;
+  }
+  if( p->isPrepareV2 && p->expmask ){
+    p->expired = 1;
+  }
+  sqlite3_mutex_leave(mutex);
+  return rc;
+}
+
+
+/**************************** sqlite3_value_  *******************************
+** The following routines extract information from a Mem or sqlite3_value
+** structure.
+*/
+SQLITE_API const void *sqlite3_value_blob(sqlite3_value *pVal){
+  Mem *p = (Mem*)pVal;
+  if( p->flags & (MEM_Blob|MEM_Str) ){
+    sqlite3VdbeMemExpandBlob(p);
+    p->flags |= MEM_Blob;
+    return p->n ? p->z : 0;
+  }else{
+    return sqlite3_value_text(pVal);
+  }
+}
+SQLITE_API int sqlite3_value_bytes(sqlite3_value *pVal){
+  return sqlite3ValueBytes(pVal, SQLITE_UTF8);
+}
+SQLITE_API int sqlite3_value_bytes16(sqlite3_value *pVal){
+  return sqlite3ValueBytes(pVal, SQLITE_UTF16NATIVE);
+}
+SQLITE_API double sqlite3_value_double(sqlite3_value *pVal){
+  return sqlite3VdbeRealValue((Mem*)pVal);
+}
+SQLITE_API int sqlite3_value_int(sqlite3_value *pVal){
+  return (int)sqlite3VdbeIntValue((Mem*)pVal);
+}
+SQLITE_API sqlite_int64 sqlite3_value_int64(sqlite3_value *pVal){
+  return sqlite3VdbeIntValue((Mem*)pVal);
+}
+SQLITE_API const unsigned char *sqlite3_value_text(sqlite3_value *pVal){
+  return (const unsigned char *)sqlite3ValueText(pVal, SQLITE_UTF8);
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API const void *sqlite3_value_text16(sqlite3_value* pVal){
+  return sqlite3ValueText(pVal, SQLITE_UTF16NATIVE);
+}
+SQLITE_API const void *sqlite3_value_text16be(sqlite3_value *pVal){
+  return sqlite3ValueText(pVal, SQLITE_UTF16BE);
+}
+SQLITE_API const void *sqlite3_value_text16le(sqlite3_value *pVal){
+  return sqlite3ValueText(pVal, SQLITE_UTF16LE);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+SQLITE_API int sqlite3_value_type(sqlite3_value* pVal){
+  static const u8 aType[] = {
+     SQLITE_BLOB,     /* 0x00 */
+     SQLITE_NULL,     /* 0x01 */
+     SQLITE_TEXT,     /* 0x02 */
+     SQLITE_NULL,     /* 0x03 */
+     SQLITE_INTEGER,  /* 0x04 */
+     SQLITE_NULL,     /* 0x05 */
+     SQLITE_INTEGER,  /* 0x06 */
+     SQLITE_NULL,     /* 0x07 */
+     SQLITE_FLOAT,    /* 0x08 */
+     SQLITE_NULL,     /* 0x09 */
+     SQLITE_FLOAT,    /* 0x0a */
+     SQLITE_NULL,     /* 0x0b */
+     SQLITE_INTEGER,  /* 0x0c */
+     SQLITE_NULL,     /* 0x0d */
+     SQLITE_INTEGER,  /* 0x0e */
+     SQLITE_NULL,     /* 0x0f */
+     SQLITE_BLOB,     /* 0x10 */
+     SQLITE_NULL,     /* 0x11 */
+     SQLITE_TEXT,     /* 0x12 */
+     SQLITE_NULL,     /* 0x13 */
+     SQLITE_INTEGER,  /* 0x14 */
+     SQLITE_NULL,     /* 0x15 */
+     SQLITE_INTEGER,  /* 0x16 */
+     SQLITE_NULL,     /* 0x17 */
+     SQLITE_FLOAT,    /* 0x18 */
+     SQLITE_NULL,     /* 0x19 */
+     SQLITE_FLOAT,    /* 0x1a */
+     SQLITE_NULL,     /* 0x1b */
+     SQLITE_INTEGER,  /* 0x1c */
+     SQLITE_NULL,     /* 0x1d */
+     SQLITE_INTEGER,  /* 0x1e */
+     SQLITE_NULL,     /* 0x1f */
+  };
+  return aType[pVal->flags&MEM_AffMask];
+}
+
+/**************************** sqlite3_result_  *******************************
+** The following routines are used by user-defined functions to specify
+** the function result.
+**
+** The setStrOrError() function calls sqlite3VdbeMemSetStr() to store the
+** result as a string or blob but if the string or blob is too large, it
+** then sets the error code to SQLITE_TOOBIG
+**
+** The invokeValueDestructor(P,X) routine invokes destructor function X()
+** on value P is not going to be used and need to be destroyed.
+*/
+static void setResultStrOrError(
+  sqlite3_context *pCtx,  /* Function context */
+  const char *z,          /* String pointer */
+  int n,                  /* Bytes in string, or negative */
+  u8 enc,                 /* Encoding of z.  0 for BLOBs */
+  void (*xDel)(void*)     /* Destructor function */
+){
+  if( sqlite3VdbeMemSetStr(pCtx->pOut, z, n, enc, xDel)==SQLITE_TOOBIG ){
+    sqlite3_result_error_toobig(pCtx);
+  }
+}
+static int invokeValueDestructor(
+  const void *p,             /* Value to destroy */
+  void (*xDel)(void*),       /* The destructor */
+  sqlite3_context *pCtx      /* Set a SQLITE_TOOBIG error if no NULL */
+){
+  assert( xDel!=SQLITE_DYNAMIC );
+  if( xDel==0 ){
+    /* noop */
+  }else if( xDel==SQLITE_TRANSIENT ){
+    /* noop */
+  }else{
+    xDel((void*)p);
+  }
+  if( pCtx ) sqlite3_result_error_toobig(pCtx);
+  return SQLITE_TOOBIG;
+}
+SQLITE_API void sqlite3_result_blob(
+  sqlite3_context *pCtx, 
+  const void *z, 
+  int n, 
+  void (*xDel)(void *)
+){
+  assert( n>=0 );
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  setResultStrOrError(pCtx, z, n, 0, xDel);
+}
+SQLITE_API void sqlite3_result_blob64(
+  sqlite3_context *pCtx, 
+  const void *z, 
+  sqlite3_uint64 n,
+  void (*xDel)(void *)
+){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  assert( xDel!=SQLITE_DYNAMIC );
+  if( n>0x7fffffff ){
+    (void)invokeValueDestructor(z, xDel, pCtx);
+  }else{
+    setResultStrOrError(pCtx, z, (int)n, 0, xDel);
+  }
+}
+SQLITE_API void sqlite3_result_double(sqlite3_context *pCtx, double rVal){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  sqlite3VdbeMemSetDouble(pCtx->pOut, rVal);
+}
+SQLITE_API void sqlite3_result_error(sqlite3_context *pCtx, const char *z, int n){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  pCtx->isError = SQLITE_ERROR;
+  pCtx->fErrorOrAux = 1;
+  sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF8, SQLITE_TRANSIENT);
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API void sqlite3_result_error16(sqlite3_context *pCtx, const void *z, int n){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  pCtx->isError = SQLITE_ERROR;
+  pCtx->fErrorOrAux = 1;
+  sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF16NATIVE, SQLITE_TRANSIENT);
+}
+#endif
+SQLITE_API void sqlite3_result_int(sqlite3_context *pCtx, int iVal){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  sqlite3VdbeMemSetInt64(pCtx->pOut, (i64)iVal);
+}
+SQLITE_API void sqlite3_result_int64(sqlite3_context *pCtx, i64 iVal){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  sqlite3VdbeMemSetInt64(pCtx->pOut, iVal);
+}
+SQLITE_API void sqlite3_result_null(sqlite3_context *pCtx){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  sqlite3VdbeMemSetNull(pCtx->pOut);
+}
+SQLITE_API void sqlite3_result_text(
+  sqlite3_context *pCtx, 
+  const char *z, 
+  int n,
+  void (*xDel)(void *)
+){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  setResultStrOrError(pCtx, z, n, SQLITE_UTF8, xDel);
+}
+SQLITE_API void sqlite3_result_text64(
+  sqlite3_context *pCtx, 
+  const char *z, 
+  sqlite3_uint64 n,
+  void (*xDel)(void *),
+  unsigned char enc
+){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  assert( xDel!=SQLITE_DYNAMIC );
+  if( enc==SQLITE_UTF16 ) enc = SQLITE_UTF16NATIVE;
+  if( n>0x7fffffff ){
+    (void)invokeValueDestructor(z, xDel, pCtx);
+  }else{
+    setResultStrOrError(pCtx, z, (int)n, enc, xDel);
+  }
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API void sqlite3_result_text16(
+  sqlite3_context *pCtx, 
+  const void *z, 
+  int n, 
+  void (*xDel)(void *)
+){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  setResultStrOrError(pCtx, z, n, SQLITE_UTF16NATIVE, xDel);
+}
+SQLITE_API void sqlite3_result_text16be(
+  sqlite3_context *pCtx, 
+  const void *z, 
+  int n, 
+  void (*xDel)(void *)
+){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  setResultStrOrError(pCtx, z, n, SQLITE_UTF16BE, xDel);
+}
+SQLITE_API void sqlite3_result_text16le(
+  sqlite3_context *pCtx, 
+  const void *z, 
+  int n, 
+  void (*xDel)(void *)
+){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  setResultStrOrError(pCtx, z, n, SQLITE_UTF16LE, xDel);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+SQLITE_API void sqlite3_result_value(sqlite3_context *pCtx, sqlite3_value *pValue){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  sqlite3VdbeMemCopy(pCtx->pOut, pValue);
+}
+SQLITE_API void sqlite3_result_zeroblob(sqlite3_context *pCtx, int n){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  sqlite3VdbeMemSetZeroBlob(pCtx->pOut, n);
+}
+SQLITE_API void sqlite3_result_error_code(sqlite3_context *pCtx, int errCode){
+  pCtx->isError = errCode;
+  pCtx->fErrorOrAux = 1;
+  if( pCtx->pOut->flags & MEM_Null ){
+    sqlite3VdbeMemSetStr(pCtx->pOut, sqlite3ErrStr(errCode), -1, 
+                         SQLITE_UTF8, SQLITE_STATIC);
+  }
+}
+
+/* Force an SQLITE_TOOBIG error. */
+SQLITE_API void sqlite3_result_error_toobig(sqlite3_context *pCtx){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  pCtx->isError = SQLITE_TOOBIG;
+  pCtx->fErrorOrAux = 1;
+  sqlite3VdbeMemSetStr(pCtx->pOut, "string or blob too big", -1, 
+                       SQLITE_UTF8, SQLITE_STATIC);
+}
+
+/* An SQLITE_NOMEM error. */
+SQLITE_API void sqlite3_result_error_nomem(sqlite3_context *pCtx){
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  sqlite3VdbeMemSetNull(pCtx->pOut);
+  pCtx->isError = SQLITE_NOMEM;
+  pCtx->fErrorOrAux = 1;
+  pCtx->pOut->db->mallocFailed = 1;
+}
+
+/*
+** This function is called after a transaction has been committed. It 
+** invokes callbacks registered with sqlite3_wal_hook() as required.
+*/
+static int doWalCallbacks(sqlite3 *db){
+  int rc = SQLITE_OK;
+#ifndef SQLITE_OMIT_WAL
+  int i;
+  for(i=0; i<db->nDb; i++){
+    Btree *pBt = db->aDb[i].pBt;
+    if( pBt ){
+      int nEntry;
+      sqlite3BtreeEnter(pBt);
+      nEntry = sqlite3PagerWalCallback(sqlite3BtreePager(pBt));
+      sqlite3BtreeLeave(pBt);
+      if( db->xWalCallback && nEntry>0 && rc==SQLITE_OK ){
+        rc = db->xWalCallback(db->pWalArg, db, db->aDb[i].zName, nEntry);
+      }
+    }
+  }
+#endif
+  return rc;
+}
+
+/*
+** Execute the statement pStmt, either until a row of data is ready, the
+** statement is completely executed or an error occurs.
+**
+** This routine implements the bulk of the logic behind the sqlite_step()
+** API.  The only thing omitted is the automatic recompile if a 
+** schema change has occurred.  That detail is handled by the
+** outer sqlite3_step() wrapper procedure.
+*/
+static int sqlite3Step(Vdbe *p){
+  sqlite3 *db;
+  int rc;
+
+  assert(p);
+  if( p->magic!=VDBE_MAGIC_RUN ){
+    /* We used to require that sqlite3_reset() be called before retrying
+    ** sqlite3_step() after any error or after SQLITE_DONE.  But beginning
+    ** with version 3.7.0, we changed this so that sqlite3_reset() would
+    ** be called automatically instead of throwing the SQLITE_MISUSE error.
+    ** This "automatic-reset" change is not technically an incompatibility, 
+    ** since any application that receives an SQLITE_MISUSE is broken by
+    ** definition.
+    **
+    ** Nevertheless, some published applications that were originally written
+    ** for version 3.6.23 or earlier do in fact depend on SQLITE_MISUSE 
+    ** returns, and those were broken by the automatic-reset change.  As a
+    ** a work-around, the SQLITE_OMIT_AUTORESET compile-time restores the
+    ** legacy behavior of returning SQLITE_MISUSE for cases where the 
+    ** previous sqlite3_step() returned something other than a SQLITE_LOCKED
+    ** or SQLITE_BUSY error.
+    */
+#ifdef SQLITE_OMIT_AUTORESET
+    if( p->rc==SQLITE_BUSY || p->rc==SQLITE_LOCKED ){
+      sqlite3_reset((sqlite3_stmt*)p);
+    }else{
+      return SQLITE_MISUSE_BKPT;
+    }
+#else
+    sqlite3_reset((sqlite3_stmt*)p);
+#endif
+  }
+
+  /* Check that malloc() has not failed. If it has, return early. */
+  db = p->db;
+  if( db->mallocFailed ){
+    p->rc = SQLITE_NOMEM;
+    return SQLITE_NOMEM;
+  }
+
+  if( p->pc<=0 && p->expired ){
+    p->rc = SQLITE_SCHEMA;
+    rc = SQLITE_ERROR;
+    goto end_of_step;
+  }
+  if( p->pc<0 ){
+    /* If there are no other statements currently running, then
+    ** reset the interrupt flag.  This prevents a call to sqlite3_interrupt
+    ** from interrupting a statement that has not yet started.
+    */
+    if( db->nVdbeActive==0 ){
+      db->u1.isInterrupted = 0;
+    }
+
+    assert( db->nVdbeWrite>0 || db->autoCommit==0 
+        || (db->nDeferredCons==0 && db->nDeferredImmCons==0)
+    );
+
+#ifndef SQLITE_OMIT_TRACE
+    if( db->xProfile && !db->init.busy ){
+      sqlite3OsCurrentTimeInt64(db->pVfs, &p->startTime);
+    }
+#endif
+
+    db->nVdbeActive++;
+    if( p->readOnly==0 ) db->nVdbeWrite++;
+    if( p->bIsReader ) db->nVdbeRead++;
+    p->pc = 0;
+  }
+#ifndef SQLITE_OMIT_EXPLAIN
+  if( p->explain ){
+    rc = sqlite3VdbeList(p);
+  }else
+#endif /* SQLITE_OMIT_EXPLAIN */
+  {
+    db->nVdbeExec++;
+    rc = sqlite3VdbeExec(p);
+    db->nVdbeExec--;
+  }
+
+#ifndef SQLITE_OMIT_TRACE
+  /* Invoke the profile callback if there is one
+  */
+  if( rc!=SQLITE_ROW && db->xProfile && !db->init.busy && p->zSql ){
+    sqlite3_int64 iNow;
+    sqlite3OsCurrentTimeInt64(db->pVfs, &iNow);
+    db->xProfile(db->pProfileArg, p->zSql, (iNow - p->startTime)*1000000);
+  }
+#endif
+
+  if( rc==SQLITE_DONE ){
+    assert( p->rc==SQLITE_OK );
+    p->rc = doWalCallbacks(db);
+    if( p->rc!=SQLITE_OK ){
+      rc = SQLITE_ERROR;
+    }
+  }
+
+  db->errCode = rc;
+  if( SQLITE_NOMEM==sqlite3ApiExit(p->db, p->rc) ){
+    p->rc = SQLITE_NOMEM;
+  }
+end_of_step:
+  /* At this point local variable rc holds the value that should be 
+  ** returned if this statement was compiled using the legacy 
+  ** sqlite3_prepare() interface. According to the docs, this can only
+  ** be one of the values in the first assert() below. Variable p->rc 
+  ** contains the value that would be returned if sqlite3_finalize() 
+  ** were called on statement p.
+  */
+  assert( rc==SQLITE_ROW  || rc==SQLITE_DONE   || rc==SQLITE_ERROR 
+       || rc==SQLITE_BUSY || rc==SQLITE_MISUSE
+  );
+  assert( p->rc!=SQLITE_ROW && p->rc!=SQLITE_DONE );
+  if( p->isPrepareV2 && rc!=SQLITE_ROW && rc!=SQLITE_DONE ){
+    /* If this statement was prepared using sqlite3_prepare_v2(), and an
+    ** error has occurred, then return the error code in p->rc to the
+    ** caller. Set the error code in the database handle to the same value.
+    */ 
+    rc = sqlite3VdbeTransferError(p);
+  }
+  return (rc&db->errMask);
+}
+
+/*
+** This is the top-level implementation of sqlite3_step().  Call
+** sqlite3Step() to do most of the work.  If a schema error occurs,
+** call sqlite3Reprepare() and try again.
+*/
+SQLITE_API int sqlite3_step(sqlite3_stmt *pStmt){
+  int rc = SQLITE_OK;      /* Result from sqlite3Step() */
+  int rc2 = SQLITE_OK;     /* Result from sqlite3Reprepare() */
+  Vdbe *v = (Vdbe*)pStmt;  /* the prepared statement */
+  int cnt = 0;             /* Counter to prevent infinite loop of reprepares */
+  sqlite3 *db;             /* The database connection */
+
+  if( vdbeSafetyNotNull(v) ){
+    return SQLITE_MISUSE_BKPT;
+  }
+  db = v->db;
+  sqlite3_mutex_enter(db->mutex);
+  v->doingRerun = 0;
+  while( (rc = sqlite3Step(v))==SQLITE_SCHEMA
+         && cnt++ < SQLITE_MAX_SCHEMA_RETRY ){
+    int savedPc = v->pc;
+    rc2 = rc = sqlite3Reprepare(v);
+    if( rc!=SQLITE_OK) break;
+    sqlite3_reset(pStmt);
+    if( savedPc>=0 ) v->doingRerun = 1;
+    assert( v->expired==0 );
+  }
+  if( rc2!=SQLITE_OK ){
+    /* This case occurs after failing to recompile an sql statement. 
+    ** The error message from the SQL compiler has already been loaded 
+    ** into the database handle. This block copies the error message 
+    ** from the database handle into the statement and sets the statement
+    ** program counter to 0 to ensure that when the statement is 
+    ** finalized or reset the parser error message is available via
+    ** sqlite3_errmsg() and sqlite3_errcode().
+    */
+    const char *zErr = (const char *)sqlite3_value_text(db->pErr); 
+    sqlite3DbFree(db, v->zErrMsg);
+    if( !db->mallocFailed ){
+      v->zErrMsg = sqlite3DbStrDup(db, zErr);
+      v->rc = rc2;
+    } else {
+      v->zErrMsg = 0;
+      v->rc = rc = SQLITE_NOMEM;
+    }
+  }
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+
+/*
+** Extract the user data from a sqlite3_context structure and return a
+** pointer to it.
+*/
+SQLITE_API void *sqlite3_user_data(sqlite3_context *p){
+  assert( p && p->pFunc );
+  return p->pFunc->pUserData;
+}
+
+/*
+** Extract the user data from a sqlite3_context structure and return a
+** pointer to it.
+**
+** IMPLEMENTATION-OF: R-46798-50301 The sqlite3_context_db_handle() interface
+** returns a copy of the pointer to the database connection (the 1st
+** parameter) of the sqlite3_create_function() and
+** sqlite3_create_function16() routines that originally registered the
+** application defined function.
+*/
+SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context *p){
+  assert( p && p->pFunc );
+  return p->pOut->db;
+}
+
+/*
+** Return the current time for a statement
+*/
+SQLITE_PRIVATE sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context *p){
+  Vdbe *v = p->pVdbe;
+  int rc;
+  if( v->iCurrentTime==0 ){
+    rc = sqlite3OsCurrentTimeInt64(p->pOut->db->pVfs, &v->iCurrentTime);
+    if( rc ) v->iCurrentTime = 0;
+  }
+  return v->iCurrentTime;
+}
+
+/*
+** The following is the implementation of an SQL function that always
+** fails with an error message stating that the function is used in the
+** wrong context.  The sqlite3_overload_function() API might construct
+** SQL function that use this routine so that the functions will exist
+** for name resolution but are actually overloaded by the xFindFunction
+** method of virtual tables.
+*/
+SQLITE_PRIVATE void sqlite3InvalidFunction(
+  sqlite3_context *context,  /* The function calling context */
+  int NotUsed,               /* Number of arguments to the function */
+  sqlite3_value **NotUsed2   /* Value of each argument */
+){
+  const char *zName = context->pFunc->zName;
+  char *zErr;
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  zErr = sqlite3_mprintf(
+      "unable to use function %s in the requested context", zName);
+  sqlite3_result_error(context, zErr, -1);
+  sqlite3_free(zErr);
+}
+
+/*
+** Create a new aggregate context for p and return a pointer to
+** its pMem->z element.
+*/
+static SQLITE_NOINLINE void *createAggContext(sqlite3_context *p, int nByte){
+  Mem *pMem = p->pMem;
+  assert( (pMem->flags & MEM_Agg)==0 );
+  if( nByte<=0 ){
+    sqlite3VdbeMemSetNull(pMem);
+    pMem->z = 0;
+  }else{
+    sqlite3VdbeMemClearAndResize(pMem, nByte);
+    pMem->flags = MEM_Agg;
+    pMem->u.pDef = p->pFunc;
+    if( pMem->z ){
+      memset(pMem->z, 0, nByte);
+    }
+  }
+  return (void*)pMem->z;
+}
+
+/*
+** Allocate or return the aggregate context for a user function.  A new
+** context is allocated on the first call.  Subsequent calls return the
+** same context that was returned on prior calls.
+*/
+SQLITE_API void *sqlite3_aggregate_context(sqlite3_context *p, int nByte){
+  assert( p && p->pFunc && p->pFunc->xStep );
+  assert( sqlite3_mutex_held(p->pOut->db->mutex) );
+  testcase( nByte<0 );
+  if( (p->pMem->flags & MEM_Agg)==0 ){
+    return createAggContext(p, nByte);
+  }else{
+    return (void*)p->pMem->z;
+  }
+}
+
+/*
+** Return the auxiliary data pointer, if any, for the iArg'th argument to
+** the user-function defined by pCtx.
+*/
+SQLITE_API void *sqlite3_get_auxdata(sqlite3_context *pCtx, int iArg){
+  AuxData *pAuxData;
+
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  for(pAuxData=pCtx->pVdbe->pAuxData; pAuxData; pAuxData=pAuxData->pNext){
+    if( pAuxData->iOp==pCtx->iOp && pAuxData->iArg==iArg ) break;
+  }
+
+  return (pAuxData ? pAuxData->pAux : 0);
+}
+
+/*
+** Set the auxiliary data pointer and delete function, for the iArg'th
+** argument to the user-function defined by pCtx. Any previous value is
+** deleted by calling the delete function specified when it was set.
+*/
+SQLITE_API void sqlite3_set_auxdata(
+  sqlite3_context *pCtx, 
+  int iArg, 
+  void *pAux, 
+  void (*xDelete)(void*)
+){
+  AuxData *pAuxData;
+  Vdbe *pVdbe = pCtx->pVdbe;
+
+  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
+  if( iArg<0 ) goto failed;
+
+  for(pAuxData=pVdbe->pAuxData; pAuxData; pAuxData=pAuxData->pNext){
+    if( pAuxData->iOp==pCtx->iOp && pAuxData->iArg==iArg ) break;
+  }
+  if( pAuxData==0 ){
+    pAuxData = sqlite3DbMallocZero(pVdbe->db, sizeof(AuxData));
+    if( !pAuxData ) goto failed;
+    pAuxData->iOp = pCtx->iOp;
+    pAuxData->iArg = iArg;
+    pAuxData->pNext = pVdbe->pAuxData;
+    pVdbe->pAuxData = pAuxData;
+    if( pCtx->fErrorOrAux==0 ){
+      pCtx->isError = 0;
+      pCtx->fErrorOrAux = 1;
+    }
+  }else if( pAuxData->xDelete ){
+    pAuxData->xDelete(pAuxData->pAux);
+  }
+
+  pAuxData->pAux = pAux;
+  pAuxData->xDelete = xDelete;
+  return;
+
+failed:
+  if( xDelete ){
+    xDelete(pAux);
+  }
+}
+
+#ifndef SQLITE_OMIT_DEPRECATED
+/*
+** Return the number of times the Step function of an aggregate has been 
+** called.
+**
+** This function is deprecated.  Do not use it for new code.  It is
+** provide only to avoid breaking legacy code.  New aggregate function
+** implementations should keep their own counts within their aggregate
+** context.
+*/
+SQLITE_API int sqlite3_aggregate_count(sqlite3_context *p){
+  assert( p && p->pMem && p->pFunc && p->pFunc->xStep );
+  return p->pMem->n;
+}
+#endif
+
+/*
+** Return the number of columns in the result set for the statement pStmt.
+*/
+SQLITE_API int sqlite3_column_count(sqlite3_stmt *pStmt){
+  Vdbe *pVm = (Vdbe *)pStmt;
+  return pVm ? pVm->nResColumn : 0;
+}
+
+/*
+** Return the number of values available from the current row of the
+** currently executing statement pStmt.
+*/
+SQLITE_API int sqlite3_data_count(sqlite3_stmt *pStmt){
+  Vdbe *pVm = (Vdbe *)pStmt;
+  if( pVm==0 || pVm->pResultSet==0 ) return 0;
+  return pVm->nResColumn;
+}
+
+/*
+** Return a pointer to static memory containing an SQL NULL value.
+*/
+static const Mem *columnNullValue(void){
+  /* Even though the Mem structure contains an element
+  ** of type i64, on certain architectures (x86) with certain compiler
+  ** switches (-Os), gcc may align this Mem object on a 4-byte boundary
+  ** instead of an 8-byte one. This all works fine, except that when
+  ** running with SQLITE_DEBUG defined the SQLite code sometimes assert()s
+  ** that a Mem structure is located on an 8-byte boundary. To prevent
+  ** these assert()s from failing, when building with SQLITE_DEBUG defined
+  ** using gcc, we force nullMem to be 8-byte aligned using the magical
+  ** __attribute__((aligned(8))) macro.  */
+  static const Mem nullMem 
+#if defined(SQLITE_DEBUG) && defined(__GNUC__)
+    __attribute__((aligned(8))) 
+#endif
+    = {
+        /* .u          = */ {0},
+        /* .flags      = */ MEM_Null,
+        /* .enc        = */ 0,
+        /* .n          = */ 0,
+        /* .z          = */ 0,
+        /* .zMalloc    = */ 0,
+        /* .szMalloc   = */ 0,
+        /* .iPadding1  = */ 0,
+        /* .db         = */ 0,
+        /* .xDel       = */ 0,
+#ifdef SQLITE_DEBUG
+        /* .pScopyFrom = */ 0,
+        /* .pFiller    = */ 0,
+#endif
+      };
+  return &nullMem;
+}
+
+/*
+** Check to see if column iCol of the given statement is valid.  If
+** it is, return a pointer to the Mem for the value of that column.
+** If iCol is not valid, return a pointer to a Mem which has a value
+** of NULL.
+*/
+static Mem *columnMem(sqlite3_stmt *pStmt, int i){
+  Vdbe *pVm;
+  Mem *pOut;
+
+  pVm = (Vdbe *)pStmt;
+  if( pVm && pVm->pResultSet!=0 && i<pVm->nResColumn && i>=0 ){
+    sqlite3_mutex_enter(pVm->db->mutex);
+    pOut = &pVm->pResultSet[i];
+  }else{
+    if( pVm && ALWAYS(pVm->db) ){
+      sqlite3_mutex_enter(pVm->db->mutex);
+      sqlite3Error(pVm->db, SQLITE_RANGE);
+    }
+    pOut = (Mem*)columnNullValue();
+  }
+  return pOut;
+}
+
+/*
+** This function is called after invoking an sqlite3_value_XXX function on a 
+** column value (i.e. a value returned by evaluating an SQL expression in the
+** select list of a SELECT statement) that may cause a malloc() failure. If 
+** malloc() has failed, the threads mallocFailed flag is cleared and the result
+** code of statement pStmt set to SQLITE_NOMEM.
+**
+** Specifically, this is called from within:
+**
+**     sqlite3_column_int()
+**     sqlite3_column_int64()
+**     sqlite3_column_text()
+**     sqlite3_column_text16()
+**     sqlite3_column_real()
+**     sqlite3_column_bytes()
+**     sqlite3_column_bytes16()
+**     sqiite3_column_blob()
+*/
+static void columnMallocFailure(sqlite3_stmt *pStmt)
+{
+  /* If malloc() failed during an encoding conversion within an
+  ** sqlite3_column_XXX API, then set the return code of the statement to
+  ** SQLITE_NOMEM. The next call to _step() (if any) will return SQLITE_ERROR
+  ** and _finalize() will return NOMEM.
+  */
+  Vdbe *p = (Vdbe *)pStmt;
+  if( p ){
+    p->rc = sqlite3ApiExit(p->db, p->rc);
+    sqlite3_mutex_leave(p->db->mutex);
+  }
+}
+
+/**************************** sqlite3_column_  *******************************
+** The following routines are used to access elements of the current row
+** in the result set.
+*/
+SQLITE_API const void *sqlite3_column_blob(sqlite3_stmt *pStmt, int i){
+  const void *val;
+  val = sqlite3_value_blob( columnMem(pStmt,i) );
+  /* Even though there is no encoding conversion, value_blob() might
+  ** need to call malloc() to expand the result of a zeroblob() 
+  ** expression. 
+  */
+  columnMallocFailure(pStmt);
+  return val;
+}
+SQLITE_API int sqlite3_column_bytes(sqlite3_stmt *pStmt, int i){
+  int val = sqlite3_value_bytes( columnMem(pStmt,i) );
+  columnMallocFailure(pStmt);
+  return val;
+}
+SQLITE_API int sqlite3_column_bytes16(sqlite3_stmt *pStmt, int i){
+  int val = sqlite3_value_bytes16( columnMem(pStmt,i) );
+  columnMallocFailure(pStmt);
+  return val;
+}
+SQLITE_API double sqlite3_column_double(sqlite3_stmt *pStmt, int i){
+  double val = sqlite3_value_double( columnMem(pStmt,i) );
+  columnMallocFailure(pStmt);
+  return val;
+}
+SQLITE_API int sqlite3_column_int(sqlite3_stmt *pStmt, int i){
+  int val = sqlite3_value_int( columnMem(pStmt,i) );
+  columnMallocFailure(pStmt);
+  return val;
+}
+SQLITE_API sqlite_int64 sqlite3_column_int64(sqlite3_stmt *pStmt, int i){
+  sqlite_int64 val = sqlite3_value_int64( columnMem(pStmt,i) );
+  columnMallocFailure(pStmt);
+  return val;
+}
+SQLITE_API const unsigned char *sqlite3_column_text(sqlite3_stmt *pStmt, int i){
+  const unsigned char *val = sqlite3_value_text( columnMem(pStmt,i) );
+  columnMallocFailure(pStmt);
+  return val;
+}
+SQLITE_API sqlite3_value *sqlite3_column_value(sqlite3_stmt *pStmt, int i){
+  Mem *pOut = columnMem(pStmt, i);
+  if( pOut->flags&MEM_Static ){
+    pOut->flags &= ~MEM_Static;
+    pOut->flags |= MEM_Ephem;
+  }
+  columnMallocFailure(pStmt);
+  return (sqlite3_value *)pOut;
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API const void *sqlite3_column_text16(sqlite3_stmt *pStmt, int i){
+  const void *val = sqlite3_value_text16( columnMem(pStmt,i) );
+  columnMallocFailure(pStmt);
+  return val;
+}
+#endif /* SQLITE_OMIT_UTF16 */
+SQLITE_API int sqlite3_column_type(sqlite3_stmt *pStmt, int i){
+  int iType = sqlite3_value_type( columnMem(pStmt,i) );
+  columnMallocFailure(pStmt);
+  return iType;
+}
+
+/*
+** Convert the N-th element of pStmt->pColName[] into a string using
+** xFunc() then return that string.  If N is out of range, return 0.
+**
+** There are up to 5 names for each column.  useType determines which
+** name is returned.  Here are the names:
+**
+**    0      The column name as it should be displayed for output
+**    1      The datatype name for the column
+**    2      The name of the database that the column derives from
+**    3      The name of the table that the column derives from
+**    4      The name of the table column that the result column derives from
+**
+** If the result is not a simple column reference (if it is an expression
+** or a constant) then useTypes 2, 3, and 4 return NULL.
+*/
+static const void *columnName(
+  sqlite3_stmt *pStmt,
+  int N,
+  const void *(*xFunc)(Mem*),
+  int useType
+){
+  const void *ret;
+  Vdbe *p;
+  int n;
+  sqlite3 *db;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( pStmt==0 ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  ret = 0;
+  p = (Vdbe *)pStmt;
+  db = p->db;
+  assert( db!=0 );
+  n = sqlite3_column_count(pStmt);
+  if( N<n && N>=0 ){
+    N += useType*n;
+    sqlite3_mutex_enter(db->mutex);
+    assert( db->mallocFailed==0 );
+    ret = xFunc(&p->aColName[N]);
+     /* A malloc may have failed inside of the xFunc() call. If this
+    ** is the case, clear the mallocFailed flag and return NULL.
+    */
+    if( db->mallocFailed ){
+      db->mallocFailed = 0;
+      ret = 0;
+    }
+    sqlite3_mutex_leave(db->mutex);
+  }
+  return ret;
+}
+
+/*
+** Return the name of the Nth column of the result set returned by SQL
+** statement pStmt.
+*/
+SQLITE_API const char *sqlite3_column_name(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text, COLNAME_NAME);
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API const void *sqlite3_column_name16(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text16, COLNAME_NAME);
+}
+#endif
+
+/*
+** Constraint:  If you have ENABLE_COLUMN_METADATA then you must
+** not define OMIT_DECLTYPE.
+*/
+#if defined(SQLITE_OMIT_DECLTYPE) && defined(SQLITE_ENABLE_COLUMN_METADATA)
+# error "Must not define both SQLITE_OMIT_DECLTYPE \
+         and SQLITE_ENABLE_COLUMN_METADATA"
+#endif
+
+#ifndef SQLITE_OMIT_DECLTYPE
+/*
+** Return the column declaration type (if applicable) of the 'i'th column
+** of the result set of SQL statement pStmt.
+*/
+SQLITE_API const char *sqlite3_column_decltype(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text, COLNAME_DECLTYPE);
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API const void *sqlite3_column_decltype16(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text16, COLNAME_DECLTYPE);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+#endif /* SQLITE_OMIT_DECLTYPE */
+
+#ifdef SQLITE_ENABLE_COLUMN_METADATA
+/*
+** Return the name of the database from which a result column derives.
+** NULL is returned if the result column is an expression or constant or
+** anything else which is not an unambiguous reference to a database column.
+*/
+SQLITE_API const char *sqlite3_column_database_name(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text, COLNAME_DATABASE);
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API const void *sqlite3_column_database_name16(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text16, COLNAME_DATABASE);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+
+/*
+** Return the name of the table from which a result column derives.
+** NULL is returned if the result column is an expression or constant or
+** anything else which is not an unambiguous reference to a database column.
+*/
+SQLITE_API const char *sqlite3_column_table_name(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text, COLNAME_TABLE);
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API const void *sqlite3_column_table_name16(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text16, COLNAME_TABLE);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+
+/*
+** Return the name of the table column from which a result column derives.
+** NULL is returned if the result column is an expression or constant or
+** anything else which is not an unambiguous reference to a database column.
+*/
+SQLITE_API const char *sqlite3_column_origin_name(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text, COLNAME_COLUMN);
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt *pStmt, int N){
+  return columnName(
+      pStmt, N, (const void*(*)(Mem*))sqlite3_value_text16, COLNAME_COLUMN);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+#endif /* SQLITE_ENABLE_COLUMN_METADATA */
+
+
+/******************************* sqlite3_bind_  ***************************
+** 
+** Routines used to attach values to wildcards in a compiled SQL statement.
+*/
+/*
+** Unbind the value bound to variable i in virtual machine p. This is the 
+** the same as binding a NULL value to the column. If the "i" parameter is
+** out of range, then SQLITE_RANGE is returned. Othewise SQLITE_OK.
+**
+** A successful evaluation of this routine acquires the mutex on p.
+** the mutex is released if any kind of error occurs.
+**
+** The error code stored in database p->db is overwritten with the return
+** value in any case.
+*/
+static int vdbeUnbind(Vdbe *p, int i){
+  Mem *pVar;
+  if( vdbeSafetyNotNull(p) ){
+    return SQLITE_MISUSE_BKPT;
+  }
+  sqlite3_mutex_enter(p->db->mutex);
+  if( p->magic!=VDBE_MAGIC_RUN || p->pc>=0 ){
+    sqlite3Error(p->db, SQLITE_MISUSE);
+    sqlite3_mutex_leave(p->db->mutex);
+    sqlite3_log(SQLITE_MISUSE, 
+        "bind on a busy prepared statement: [%s]", p->zSql);
+    return SQLITE_MISUSE_BKPT;
+  }
+  if( i<1 || i>p->nVar ){
+    sqlite3Error(p->db, SQLITE_RANGE);
+    sqlite3_mutex_leave(p->db->mutex);
+    return SQLITE_RANGE;
+  }
+  i--;
+  pVar = &p->aVar[i];
+  sqlite3VdbeMemRelease(pVar);
+  pVar->flags = MEM_Null;
+  sqlite3Error(p->db, SQLITE_OK);
+
+  /* If the bit corresponding to this variable in Vdbe.expmask is set, then 
+  ** binding a new value to this variable invalidates the current query plan.
+  **
+  ** IMPLEMENTATION-OF: R-48440-37595 If the specific value bound to host
+  ** parameter in the WHERE clause might influence the choice of query plan
+  ** for a statement, then the statement will be automatically recompiled,
+  ** as if there had been a schema change, on the first sqlite3_step() call
+  ** following any change to the bindings of that parameter.
+  */
+  if( p->isPrepareV2 &&
+     ((i<32 && p->expmask & ((u32)1 << i)) || p->expmask==0xffffffff)
+  ){
+    p->expired = 1;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Bind a text or BLOB value.
+*/
+static int bindText(
+  sqlite3_stmt *pStmt,   /* The statement to bind against */
+  int i,                 /* Index of the parameter to bind */
+  const void *zData,     /* Pointer to the data to be bound */
+  int nData,             /* Number of bytes of data to be bound */
+  void (*xDel)(void*),   /* Destructor for the data */
+  u8 encoding            /* Encoding for the data */
+){
+  Vdbe *p = (Vdbe *)pStmt;
+  Mem *pVar;
+  int rc;
+
+  rc = vdbeUnbind(p, i);
+  if( rc==SQLITE_OK ){
+    if( zData!=0 ){
+      pVar = &p->aVar[i-1];
+      rc = sqlite3VdbeMemSetStr(pVar, zData, nData, encoding, xDel);
+      if( rc==SQLITE_OK && encoding!=0 ){
+        rc = sqlite3VdbeChangeEncoding(pVar, ENC(p->db));
+      }
+      sqlite3Error(p->db, rc);
+      rc = sqlite3ApiExit(p->db, rc);
+    }
+    sqlite3_mutex_leave(p->db->mutex);
+  }else if( xDel!=SQLITE_STATIC && xDel!=SQLITE_TRANSIENT ){
+    xDel((void*)zData);
+  }
+  return rc;
+}
+
+
+/*
+** Bind a blob value to an SQL statement variable.
+*/
+SQLITE_API int sqlite3_bind_blob(
+  sqlite3_stmt *pStmt, 
+  int i, 
+  const void *zData, 
+  int nData, 
+  void (*xDel)(void*)
+){
+  return bindText(pStmt, i, zData, nData, xDel, 0);
+}
+SQLITE_API int sqlite3_bind_blob64(
+  sqlite3_stmt *pStmt, 
+  int i, 
+  const void *zData, 
+  sqlite3_uint64 nData, 
+  void (*xDel)(void*)
+){
+  assert( xDel!=SQLITE_DYNAMIC );
+  if( nData>0x7fffffff ){
+    return invokeValueDestructor(zData, xDel, 0);
+  }else{
+    return bindText(pStmt, i, zData, (int)nData, xDel, 0);
+  }
+}
+SQLITE_API int sqlite3_bind_double(sqlite3_stmt *pStmt, int i, double rValue){
+  int rc;
+  Vdbe *p = (Vdbe *)pStmt;
+  rc = vdbeUnbind(p, i);
+  if( rc==SQLITE_OK ){
+    sqlite3VdbeMemSetDouble(&p->aVar[i-1], rValue);
+    sqlite3_mutex_leave(p->db->mutex);
+  }
+  return rc;
+}
+SQLITE_API int sqlite3_bind_int(sqlite3_stmt *p, int i, int iValue){
+  return sqlite3_bind_int64(p, i, (i64)iValue);
+}
+SQLITE_API int sqlite3_bind_int64(sqlite3_stmt *pStmt, int i, sqlite_int64 iValue){
+  int rc;
+  Vdbe *p = (Vdbe *)pStmt;
+  rc = vdbeUnbind(p, i);
+  if( rc==SQLITE_OK ){
+    sqlite3VdbeMemSetInt64(&p->aVar[i-1], iValue);
+    sqlite3_mutex_leave(p->db->mutex);
+  }
+  return rc;
+}
+SQLITE_API int sqlite3_bind_null(sqlite3_stmt *pStmt, int i){
+  int rc;
+  Vdbe *p = (Vdbe*)pStmt;
+  rc = vdbeUnbind(p, i);
+  if( rc==SQLITE_OK ){
+    sqlite3_mutex_leave(p->db->mutex);
+  }
+  return rc;
+}
+SQLITE_API int sqlite3_bind_text( 
+  sqlite3_stmt *pStmt, 
+  int i, 
+  const char *zData, 
+  int nData, 
+  void (*xDel)(void*)
+){
+  return bindText(pStmt, i, zData, nData, xDel, SQLITE_UTF8);
+}
+SQLITE_API int sqlite3_bind_text64( 
+  sqlite3_stmt *pStmt, 
+  int i, 
+  const char *zData, 
+  sqlite3_uint64 nData, 
+  void (*xDel)(void*),
+  unsigned char enc
+){
+  assert( xDel!=SQLITE_DYNAMIC );
+  if( nData>0x7fffffff ){
+    return invokeValueDestructor(zData, xDel, 0);
+  }else{
+    if( enc==SQLITE_UTF16 ) enc = SQLITE_UTF16NATIVE;
+    return bindText(pStmt, i, zData, (int)nData, xDel, enc);
+  }
+}
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API int sqlite3_bind_text16(
+  sqlite3_stmt *pStmt, 
+  int i, 
+  const void *zData, 
+  int nData, 
+  void (*xDel)(void*)
+){
+  return bindText(pStmt, i, zData, nData, xDel, SQLITE_UTF16NATIVE);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+SQLITE_API int sqlite3_bind_value(sqlite3_stmt *pStmt, int i, const sqlite3_value *pValue){
+  int rc;
+  switch( sqlite3_value_type((sqlite3_value*)pValue) ){
+    case SQLITE_INTEGER: {
+      rc = sqlite3_bind_int64(pStmt, i, pValue->u.i);
+      break;
+    }
+    case SQLITE_FLOAT: {
+      rc = sqlite3_bind_double(pStmt, i, pValue->u.r);
+      break;
+    }
+    case SQLITE_BLOB: {
+      if( pValue->flags & MEM_Zero ){
+        rc = sqlite3_bind_zeroblob(pStmt, i, pValue->u.nZero);
+      }else{
+        rc = sqlite3_bind_blob(pStmt, i, pValue->z, pValue->n,SQLITE_TRANSIENT);
+      }
+      break;
+    }
+    case SQLITE_TEXT: {
+      rc = bindText(pStmt,i,  pValue->z, pValue->n, SQLITE_TRANSIENT,
+                              pValue->enc);
+      break;
+    }
+    default: {
+      rc = sqlite3_bind_null(pStmt, i);
+      break;
+    }
+  }
+  return rc;
+}
+SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt *pStmt, int i, int n){
+  int rc;
+  Vdbe *p = (Vdbe *)pStmt;
+  rc = vdbeUnbind(p, i);
+  if( rc==SQLITE_OK ){
+    sqlite3VdbeMemSetZeroBlob(&p->aVar[i-1], n);
+    sqlite3_mutex_leave(p->db->mutex);
+  }
+  return rc;
+}
+
+/*
+** Return the number of wildcards that can be potentially bound to.
+** This routine is added to support DBD::SQLite.  
+*/
+SQLITE_API int sqlite3_bind_parameter_count(sqlite3_stmt *pStmt){
+  Vdbe *p = (Vdbe*)pStmt;
+  return p ? p->nVar : 0;
+}
+
+/*
+** Return the name of a wildcard parameter.  Return NULL if the index
+** is out of range or if the wildcard is unnamed.
+**
+** The result is always UTF-8.
+*/
+SQLITE_API const char *sqlite3_bind_parameter_name(sqlite3_stmt *pStmt, int i){
+  Vdbe *p = (Vdbe*)pStmt;
+  if( p==0 || i<1 || i>p->nzVar ){
+    return 0;
+  }
+  return p->azVar[i-1];
+}
+
+/*
+** Given a wildcard parameter name, return the index of the variable
+** with that name.  If there is no variable with the given name,
+** return 0.
+*/
+SQLITE_PRIVATE int sqlite3VdbeParameterIndex(Vdbe *p, const char *zName, int nName){
+  int i;
+  if( p==0 ){
+    return 0;
+  }
+  if( zName ){
+    for(i=0; i<p->nzVar; i++){
+      const char *z = p->azVar[i];
+      if( z && strncmp(z,zName,nName)==0 && z[nName]==0 ){
+        return i+1;
+      }
+    }
+  }
+  return 0;
+}
+SQLITE_API int sqlite3_bind_parameter_index(sqlite3_stmt *pStmt, const char *zName){
+  return sqlite3VdbeParameterIndex((Vdbe*)pStmt, zName, sqlite3Strlen30(zName));
+}
+
+/*
+** Transfer all bindings from the first statement over to the second.
+*/
+SQLITE_PRIVATE int sqlite3TransferBindings(sqlite3_stmt *pFromStmt, sqlite3_stmt *pToStmt){
+  Vdbe *pFrom = (Vdbe*)pFromStmt;
+  Vdbe *pTo = (Vdbe*)pToStmt;
+  int i;
+  assert( pTo->db==pFrom->db );
+  assert( pTo->nVar==pFrom->nVar );
+  sqlite3_mutex_enter(pTo->db->mutex);
+  for(i=0; i<pFrom->nVar; i++){
+    sqlite3VdbeMemMove(&pTo->aVar[i], &pFrom->aVar[i]);
+  }
+  sqlite3_mutex_leave(pTo->db->mutex);
+  return SQLITE_OK;
+}
+
+#ifndef SQLITE_OMIT_DEPRECATED
+/*
+** Deprecated external interface.  Internal/core SQLite code
+** should call sqlite3TransferBindings.
+**
+** It is misuse to call this routine with statements from different
+** database connections.  But as this is a deprecated interface, we
+** will not bother to check for that condition.
+**
+** If the two statements contain a different number of bindings, then
+** an SQLITE_ERROR is returned.  Nothing else can go wrong, so otherwise
+** SQLITE_OK is returned.
+*/
+SQLITE_API int sqlite3_transfer_bindings(sqlite3_stmt *pFromStmt, sqlite3_stmt *pToStmt){
+  Vdbe *pFrom = (Vdbe*)pFromStmt;
+  Vdbe *pTo = (Vdbe*)pToStmt;
+  if( pFrom->nVar!=pTo->nVar ){
+    return SQLITE_ERROR;
+  }
+  if( pTo->isPrepareV2 && pTo->expmask ){
+    pTo->expired = 1;
+  }
+  if( pFrom->isPrepareV2 && pFrom->expmask ){
+    pFrom->expired = 1;
+  }
+  return sqlite3TransferBindings(pFromStmt, pToStmt);
+}
+#endif
+
+/*
+** Return the sqlite3* database handle to which the prepared statement given
+** in the argument belongs.  This is the same database handle that was
+** the first argument to the sqlite3_prepare() that was used to create
+** the statement in the first place.
+*/
+SQLITE_API sqlite3 *sqlite3_db_handle(sqlite3_stmt *pStmt){
+  return pStmt ? ((Vdbe*)pStmt)->db : 0;
+}
+
+/*
+** Return true if the prepared statement is guaranteed to not modify the
+** database.
+*/
+SQLITE_API int sqlite3_stmt_readonly(sqlite3_stmt *pStmt){
+  return pStmt ? ((Vdbe*)pStmt)->readOnly : 1;
+}
+
+/*
+** Return true if the prepared statement is in need of being reset.
+*/
+SQLITE_API int sqlite3_stmt_busy(sqlite3_stmt *pStmt){
+  Vdbe *v = (Vdbe*)pStmt;
+  return v!=0 && v->pc>=0 && v->magic==VDBE_MAGIC_RUN;
+}
+
+/*
+** Return a pointer to the next prepared statement after pStmt associated
+** with database connection pDb.  If pStmt is NULL, return the first
+** prepared statement for the database connection.  Return NULL if there
+** are no more.
+*/
+SQLITE_API sqlite3_stmt *sqlite3_next_stmt(sqlite3 *pDb, sqlite3_stmt *pStmt){
+  sqlite3_stmt *pNext;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(pDb) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  sqlite3_mutex_enter(pDb->mutex);
+  if( pStmt==0 ){
+    pNext = (sqlite3_stmt*)pDb->pVdbe;
+  }else{
+    pNext = (sqlite3_stmt*)((Vdbe*)pStmt)->pNext;
+  }
+  sqlite3_mutex_leave(pDb->mutex);
+  return pNext;
+}
+
+/*
+** Return the value of a status counter for a prepared statement
+*/
+SQLITE_API int sqlite3_stmt_status(sqlite3_stmt *pStmt, int op, int resetFlag){
+  Vdbe *pVdbe = (Vdbe*)pStmt;
+  u32 v;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !pStmt ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  v = pVdbe->aCounter[op];
+  if( resetFlag ) pVdbe->aCounter[op] = 0;
+  return (int)v;
+}
+
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+/*
+** Return status data for a single loop within query pStmt.
+*/
+SQLITE_API int sqlite3_stmt_scanstatus(
+  sqlite3_stmt *pStmt,            /* Prepared statement being queried */
+  int idx,                        /* Index of loop to report on */
+  int iScanStatusOp,              /* Which metric to return */
+  void *pOut                      /* OUT: Write the answer here */
+){
+  Vdbe *p = (Vdbe*)pStmt;
+  ScanStatus *pScan;
+  if( idx<0 || idx>=p->nScan ) return 1;
+  pScan = &p->aScan[idx];
+  switch( iScanStatusOp ){
+    case SQLITE_SCANSTAT_NLOOP: {
+      *(sqlite3_int64*)pOut = p->anExec[pScan->addrLoop];
+      break;
+    }
+    case SQLITE_SCANSTAT_NVISIT: {
+      *(sqlite3_int64*)pOut = p->anExec[pScan->addrVisit];
+      break;
+    }
+    case SQLITE_SCANSTAT_EST: {
+      double r = 1.0;
+      LogEst x = pScan->nEst;
+      while( x<100 ){
+        x += 10;
+        r *= 0.5;
+      }
+      *(double*)pOut = r*sqlite3LogEstToInt(x);
+      break;
+    }
+    case SQLITE_SCANSTAT_NAME: {
+      *(const char**)pOut = pScan->zName;
+      break;
+    }
+    case SQLITE_SCANSTAT_EXPLAIN: {
+      if( pScan->addrExplain ){
+        *(const char**)pOut = p->aOp[ pScan->addrExplain ].p4.z;
+      }else{
+        *(const char**)pOut = 0;
+      }
+      break;
+    }
+    case SQLITE_SCANSTAT_SELECTID: {
+      if( pScan->addrExplain ){
+        *(int*)pOut = p->aOp[ pScan->addrExplain ].p1;
+      }else{
+        *(int*)pOut = -1;
+      }
+      break;
+    }
+    default: {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** Zero all counters associated with the sqlite3_stmt_scanstatus() data.
+*/
+SQLITE_API void sqlite3_stmt_scanstatus_reset(sqlite3_stmt *pStmt){
+  Vdbe *p = (Vdbe*)pStmt;
+  memset(p->anExec, 0, p->nOp * sizeof(i64));
+}
+#endif /* SQLITE_ENABLE_STMT_SCANSTATUS */
+
+/************** End of vdbeapi.c *********************************************/
+/************** Begin file vdbetrace.c ***************************************/
+/*
+** 2009 November 25
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains code used to insert the values of host parameters
+** (aka "wildcards") into the SQL text output by sqlite3_trace().
+**
+** The Vdbe parse-tree explainer is also found here.
+*/
+
+#ifndef SQLITE_OMIT_TRACE
+
+/*
+** zSql is a zero-terminated string of UTF-8 SQL text.  Return the number of
+** bytes in this text up to but excluding the first character in
+** a host parameter.  If the text contains no host parameters, return
+** the total number of bytes in the text.
+*/
+static int findNextHostParameter(const char *zSql, int *pnToken){
+  int tokenType;
+  int nTotal = 0;
+  int n;
+
+  *pnToken = 0;
+  while( zSql[0] ){
+    n = sqlite3GetToken((u8*)zSql, &tokenType);
+    assert( n>0 && tokenType!=TK_ILLEGAL );
+    if( tokenType==TK_VARIABLE ){
+      *pnToken = n;
+      break;
+    }
+    nTotal += n;
+    zSql += n;
+  }
+  return nTotal;
+}
+
+/*
+** This function returns a pointer to a nul-terminated string in memory
+** obtained from sqlite3DbMalloc(). If sqlite3.nVdbeExec is 1, then the
+** string contains a copy of zRawSql but with host parameters expanded to 
+** their current bindings. Or, if sqlite3.nVdbeExec is greater than 1, 
+** then the returned string holds a copy of zRawSql with "-- " prepended
+** to each line of text.
+**
+** If the SQLITE_TRACE_SIZE_LIMIT macro is defined to an integer, then
+** then long strings and blobs are truncated to that many bytes.  This
+** can be used to prevent unreasonably large trace strings when dealing
+** with large (multi-megabyte) strings and blobs.
+**
+** The calling function is responsible for making sure the memory returned
+** is eventually freed.
+**
+** ALGORITHM:  Scan the input string looking for host parameters in any of
+** these forms:  ?, ?N, $A, @A, :A.  Take care to avoid text within
+** string literals, quoted identifier names, and comments.  For text forms,
+** the host parameter index is found by scanning the prepared
+** statement for the corresponding OP_Variable opcode.  Once the host
+** parameter index is known, locate the value in p->aVar[].  Then render
+** the value as a literal in place of the host parameter name.
+*/
+SQLITE_PRIVATE char *sqlite3VdbeExpandSql(
+  Vdbe *p,                 /* The prepared statement being evaluated */
+  const char *zRawSql      /* Raw text of the SQL statement */
+){
+  sqlite3 *db;             /* The database connection */
+  int idx = 0;             /* Index of a host parameter */
+  int nextIndex = 1;       /* Index of next ? host parameter */
+  int n;                   /* Length of a token prefix */
+  int nToken;              /* Length of the parameter token */
+  int i;                   /* Loop counter */
+  Mem *pVar;               /* Value of a host parameter */
+  StrAccum out;            /* Accumulate the output here */
+  char zBase[100];         /* Initial working space */
+
+  db = p->db;
+  sqlite3StrAccumInit(&out, zBase, sizeof(zBase), 
+                      db->aLimit[SQLITE_LIMIT_LENGTH]);
+  out.db = db;
+  if( db->nVdbeExec>1 ){
+    while( *zRawSql ){
+      const char *zStart = zRawSql;
+      while( *(zRawSql++)!='\n' && *zRawSql );
+      sqlite3StrAccumAppend(&out, "-- ", 3);
+      assert( (zRawSql - zStart) > 0 );
+      sqlite3StrAccumAppend(&out, zStart, (int)(zRawSql-zStart));
+    }
+  }else{
+    while( zRawSql[0] ){
+      n = findNextHostParameter(zRawSql, &nToken);
+      assert( n>0 );
+      sqlite3StrAccumAppend(&out, zRawSql, n);
+      zRawSql += n;
+      assert( zRawSql[0] || nToken==0 );
+      if( nToken==0 ) break;
+      if( zRawSql[0]=='?' ){
+        if( nToken>1 ){
+          assert( sqlite3Isdigit(zRawSql[1]) );
+          sqlite3GetInt32(&zRawSql[1], &idx);
+        }else{
+          idx = nextIndex;
+        }
+      }else{
+        assert( zRawSql[0]==':' || zRawSql[0]=='$' || zRawSql[0]=='@' );
+        testcase( zRawSql[0]==':' );
+        testcase( zRawSql[0]=='$' );
+        testcase( zRawSql[0]=='@' );
+        idx = sqlite3VdbeParameterIndex(p, zRawSql, nToken);
+        assert( idx>0 );
+      }
+      zRawSql += nToken;
+      nextIndex = idx + 1;
+      assert( idx>0 && idx<=p->nVar );
+      pVar = &p->aVar[idx-1];
+      if( pVar->flags & MEM_Null ){
+        sqlite3StrAccumAppend(&out, "NULL", 4);
+      }else if( pVar->flags & MEM_Int ){
+        sqlite3XPrintf(&out, 0, "%lld", pVar->u.i);
+      }else if( pVar->flags & MEM_Real ){
+        sqlite3XPrintf(&out, 0, "%!.15g", pVar->u.r);
+      }else if( pVar->flags & MEM_Str ){
+        int nOut;  /* Number of bytes of the string text to include in output */
+#ifndef SQLITE_OMIT_UTF16
+        u8 enc = ENC(db);
+        Mem utf8;
+        if( enc!=SQLITE_UTF8 ){
+          memset(&utf8, 0, sizeof(utf8));
+          utf8.db = db;
+          sqlite3VdbeMemSetStr(&utf8, pVar->z, pVar->n, enc, SQLITE_STATIC);
+          sqlite3VdbeChangeEncoding(&utf8, SQLITE_UTF8);
+          pVar = &utf8;
+        }
+#endif
+        nOut = pVar->n;
+#ifdef SQLITE_TRACE_SIZE_LIMIT
+        if( nOut>SQLITE_TRACE_SIZE_LIMIT ){
+          nOut = SQLITE_TRACE_SIZE_LIMIT;
+          while( nOut<pVar->n && (pVar->z[nOut]&0xc0)==0x80 ){ nOut++; }
+        }
+#endif    
+        sqlite3XPrintf(&out, 0, "'%.*q'", nOut, pVar->z);
+#ifdef SQLITE_TRACE_SIZE_LIMIT
+        if( nOut<pVar->n ){
+          sqlite3XPrintf(&out, 0, "/*+%d bytes*/", pVar->n-nOut);
+        }
+#endif
+#ifndef SQLITE_OMIT_UTF16
+        if( enc!=SQLITE_UTF8 ) sqlite3VdbeMemRelease(&utf8);
+#endif
+      }else if( pVar->flags & MEM_Zero ){
+        sqlite3XPrintf(&out, 0, "zeroblob(%d)", pVar->u.nZero);
+      }else{
+        int nOut;  /* Number of bytes of the blob to include in output */
+        assert( pVar->flags & MEM_Blob );
+        sqlite3StrAccumAppend(&out, "x'", 2);
+        nOut = pVar->n;
+#ifdef SQLITE_TRACE_SIZE_LIMIT
+        if( nOut>SQLITE_TRACE_SIZE_LIMIT ) nOut = SQLITE_TRACE_SIZE_LIMIT;
+#endif
+        for(i=0; i<nOut; i++){
+          sqlite3XPrintf(&out, 0, "%02x", pVar->z[i]&0xff);
+        }
+        sqlite3StrAccumAppend(&out, "'", 1);
+#ifdef SQLITE_TRACE_SIZE_LIMIT
+        if( nOut<pVar->n ){
+          sqlite3XPrintf(&out, 0, "/*+%d bytes*/", pVar->n-nOut);
+        }
+#endif
+      }
+    }
+  }
+  return sqlite3StrAccumFinish(&out);
+}
+
+#endif /* #ifndef SQLITE_OMIT_TRACE */
+
+/************** End of vdbetrace.c *******************************************/
+/************** Begin file vdbe.c ********************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** The code in this file implements the function that runs the
+** bytecode of a prepared statement.
+**
+** Various scripts scan this source file in order to generate HTML
+** documentation, headers files, or other derived files.  The formatting
+** of the code in this file is, therefore, important.  See other comments
+** in this file for details.  If in doubt, do not deviate from existing
+** commenting and indentation practices when changing or adding code.
+*/
+
+/*
+** Invoke this macro on memory cells just prior to changing the
+** value of the cell.  This macro verifies that shallow copies are
+** not misused.  A shallow copy of a string or blob just copies a
+** pointer to the string or blob, not the content.  If the original
+** is changed while the copy is still in use, the string or blob might
+** be changed out from under the copy.  This macro verifies that nothing
+** like that ever happens.
+*/
+#ifdef SQLITE_DEBUG
+# define memAboutToChange(P,M) sqlite3VdbeMemAboutToChange(P,M)
+#else
+# define memAboutToChange(P,M)
+#endif
+
+/*
+** The following global variable is incremented every time a cursor
+** moves, either by the OP_SeekXX, OP_Next, or OP_Prev opcodes.  The test
+** procedures use this information to make sure that indices are
+** working correctly.  This variable has no function other than to
+** help verify the correct operation of the library.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_search_count = 0;
+#endif
+
+/*
+** When this global variable is positive, it gets decremented once before
+** each instruction in the VDBE.  When it reaches zero, the u1.isInterrupted
+** field of the sqlite3 structure is set in order to simulate an interrupt.
+**
+** This facility is used for testing purposes only.  It does not function
+** in an ordinary build.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_interrupt_count = 0;
+#endif
+
+/*
+** The next global variable is incremented each type the OP_Sort opcode
+** is executed.  The test procedures use this information to make sure that
+** sorting is occurring or not occurring at appropriate times.   This variable
+** has no function other than to help verify the correct operation of the
+** library.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_sort_count = 0;
+#endif
+
+/*
+** The next global variable records the size of the largest MEM_Blob
+** or MEM_Str that has been used by a VDBE opcode.  The test procedures
+** use this information to make sure that the zero-blob functionality
+** is working correctly.   This variable has no function other than to
+** help verify the correct operation of the library.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_max_blobsize = 0;
+static void updateMaxBlobsize(Mem *p){
+  if( (p->flags & (MEM_Str|MEM_Blob))!=0 && p->n>sqlite3_max_blobsize ){
+    sqlite3_max_blobsize = p->n;
+  }
+}
+#endif
+
+/*
+** The next global variable is incremented each time the OP_Found opcode
+** is executed. This is used to test whether or not the foreign key
+** operation implemented using OP_FkIsZero is working. This variable
+** has no function other than to help verify the correct operation of the
+** library.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_found_count = 0;
+#endif
+
+/*
+** Test a register to see if it exceeds the current maximum blob size.
+** If it does, record the new maximum blob size.
+*/
+#if defined(SQLITE_TEST) && !defined(SQLITE_OMIT_BUILTIN_TEST)
+# define UPDATE_MAX_BLOBSIZE(P)  updateMaxBlobsize(P)
+#else
+# define UPDATE_MAX_BLOBSIZE(P)
+#endif
+
+/*
+** Invoke the VDBE coverage callback, if that callback is defined.  This
+** feature is used for test suite validation only and does not appear an
+** production builds.
+**
+** M is an integer, 2 or 3, that indices how many different ways the
+** branch can go.  It is usually 2.  "I" is the direction the branch
+** goes.  0 means falls through.  1 means branch is taken.  2 means the
+** second alternative branch is taken.
+**
+** iSrcLine is the source code line (from the __LINE__ macro) that
+** generated the VDBE instruction.  This instrumentation assumes that all
+** source code is in a single file (the amalgamation).  Special values 1
+** and 2 for the iSrcLine parameter mean that this particular branch is
+** always taken or never taken, respectively.
+*/
+#if !defined(SQLITE_VDBE_COVERAGE)
+# define VdbeBranchTaken(I,M)
+#else
+# define VdbeBranchTaken(I,M) vdbeTakeBranch(pOp->iSrcLine,I,M)
+  static void vdbeTakeBranch(int iSrcLine, u8 I, u8 M){
+    if( iSrcLine<=2 && ALWAYS(iSrcLine>0) ){
+      M = iSrcLine;
+      /* Assert the truth of VdbeCoverageAlwaysTaken() and 
+      ** VdbeCoverageNeverTaken() */
+      assert( (M & I)==I );
+    }else{
+      if( sqlite3GlobalConfig.xVdbeBranch==0 ) return;  /*NO_TEST*/
+      sqlite3GlobalConfig.xVdbeBranch(sqlite3GlobalConfig.pVdbeBranchArg,
+                                      iSrcLine,I,M);
+    }
+  }
+#endif
+
+/*
+** Convert the given register into a string if it isn't one
+** already. Return non-zero if a malloc() fails.
+*/
+#define Stringify(P, enc) \
+   if(((P)->flags&(MEM_Str|MEM_Blob))==0 && sqlite3VdbeMemStringify(P,enc,0)) \
+     { goto no_mem; }
+
+/*
+** An ephemeral string value (signified by the MEM_Ephem flag) contains
+** a pointer to a dynamically allocated string where some other entity
+** is responsible for deallocating that string.  Because the register
+** does not control the string, it might be deleted without the register
+** knowing it.
+**
+** This routine converts an ephemeral string into a dynamically allocated
+** string that the register itself controls.  In other words, it
+** converts an MEM_Ephem string into a string with P.z==P.zMalloc.
+*/
+#define Deephemeralize(P) \
+   if( ((P)->flags&MEM_Ephem)!=0 \
+       && sqlite3VdbeMemMakeWriteable(P) ){ goto no_mem;}
+
+/* Return true if the cursor was opened using the OP_OpenSorter opcode. */
+#define isSorter(x) ((x)->pSorter!=0)
+
+/*
+** Allocate VdbeCursor number iCur.  Return a pointer to it.  Return NULL
+** if we run out of memory.
+*/
+static VdbeCursor *allocateCursor(
+  Vdbe *p,              /* The virtual machine */
+  int iCur,             /* Index of the new VdbeCursor */
+  int nField,           /* Number of fields in the table or index */
+  int iDb,              /* Database the cursor belongs to, or -1 */
+  int isBtreeCursor     /* True for B-Tree.  False for pseudo-table or vtab */
+){
+  /* Find the memory cell that will be used to store the blob of memory
+  ** required for this VdbeCursor structure. It is convenient to use a 
+  ** vdbe memory cell to manage the memory allocation required for a
+  ** VdbeCursor structure for the following reasons:
+  **
+  **   * Sometimes cursor numbers are used for a couple of different
+  **     purposes in a vdbe program. The different uses might require
+  **     different sized allocations. Memory cells provide growable
+  **     allocations.
+  **
+  **   * When using ENABLE_MEMORY_MANAGEMENT, memory cell buffers can
+  **     be freed lazily via the sqlite3_release_memory() API. This
+  **     minimizes the number of malloc calls made by the system.
+  **
+  ** Memory cells for cursors are allocated at the top of the address
+  ** space. Memory cell (p->nMem) corresponds to cursor 0. Space for
+  ** cursor 1 is managed by memory cell (p->nMem-1), etc.
+  */
+  Mem *pMem = &p->aMem[p->nMem-iCur];
+
+  int nByte;
+  VdbeCursor *pCx = 0;
+  nByte = 
+      ROUND8(sizeof(VdbeCursor)) + 2*sizeof(u32)*nField + 
+      (isBtreeCursor?sqlite3BtreeCursorSize():0);
+
+  assert( iCur<p->nCursor );
+  if( p->apCsr[iCur] ){
+    sqlite3VdbeFreeCursor(p, p->apCsr[iCur]);
+    p->apCsr[iCur] = 0;
+  }
+  if( SQLITE_OK==sqlite3VdbeMemClearAndResize(pMem, nByte) ){
+    p->apCsr[iCur] = pCx = (VdbeCursor*)pMem->z;
+    memset(pCx, 0, sizeof(VdbeCursor));
+    pCx->iDb = iDb;
+    pCx->nField = nField;
+    pCx->aOffset = &pCx->aType[nField];
+    if( isBtreeCursor ){
+      pCx->pCursor = (BtCursor*)
+          &pMem->z[ROUND8(sizeof(VdbeCursor))+2*sizeof(u32)*nField];
+      sqlite3BtreeCursorZero(pCx->pCursor);
+    }
+  }
+  return pCx;
+}
+
+/*
+** Try to convert a value into a numeric representation if we can
+** do so without loss of information.  In other words, if the string
+** looks like a number, convert it into a number.  If it does not
+** look like a number, leave it alone.
+**
+** If the bTryForInt flag is true, then extra effort is made to give
+** an integer representation.  Strings that look like floating point
+** values but which have no fractional component (example: '48.00')
+** will have a MEM_Int representation when bTryForInt is true.
+**
+** If bTryForInt is false, then if the input string contains a decimal
+** point or exponential notation, the result is only MEM_Real, even
+** if there is an exact integer representation of the quantity.
+*/
+static void applyNumericAffinity(Mem *pRec, int bTryForInt){
+  double rValue;
+  i64 iValue;
+  u8 enc = pRec->enc;
+  assert( (pRec->flags & (MEM_Str|MEM_Int|MEM_Real))==MEM_Str );
+  if( sqlite3AtoF(pRec->z, &rValue, pRec->n, enc)==0 ) return;
+  if( 0==sqlite3Atoi64(pRec->z, &iValue, pRec->n, enc) ){
+    pRec->u.i = iValue;
+    pRec->flags |= MEM_Int;
+  }else{
+    pRec->u.r = rValue;
+    pRec->flags |= MEM_Real;
+    if( bTryForInt ) sqlite3VdbeIntegerAffinity(pRec);
+  }
+}
+
+/*
+** Processing is determine by the affinity parameter:
+**
+** SQLITE_AFF_INTEGER:
+** SQLITE_AFF_REAL:
+** SQLITE_AFF_NUMERIC:
+**    Try to convert pRec to an integer representation or a 
+**    floating-point representation if an integer representation
+**    is not possible.  Note that the integer representation is
+**    always preferred, even if the affinity is REAL, because
+**    an integer representation is more space efficient on disk.
+**
+** SQLITE_AFF_TEXT:
+**    Convert pRec to a text representation.
+**
+** SQLITE_AFF_NONE:
+**    No-op.  pRec is unchanged.
+*/
+static void applyAffinity(
+  Mem *pRec,          /* The value to apply affinity to */
+  char affinity,      /* The affinity to be applied */
+  u8 enc              /* Use this text encoding */
+){
+  if( affinity>=SQLITE_AFF_NUMERIC ){
+    assert( affinity==SQLITE_AFF_INTEGER || affinity==SQLITE_AFF_REAL
+             || affinity==SQLITE_AFF_NUMERIC );
+    if( (pRec->flags & MEM_Int)==0 ){
+      if( (pRec->flags & MEM_Real)==0 ){
+        if( pRec->flags & MEM_Str ) applyNumericAffinity(pRec,1);
+      }else{
+        sqlite3VdbeIntegerAffinity(pRec);
+      }
+    }
+  }else if( affinity==SQLITE_AFF_TEXT ){
+    /* Only attempt the conversion to TEXT if there is an integer or real
+    ** representation (blob and NULL do not get converted) but no string
+    ** representation.
+    */
+    if( 0==(pRec->flags&MEM_Str) && (pRec->flags&(MEM_Real|MEM_Int)) ){
+      sqlite3VdbeMemStringify(pRec, enc, 1);
+    }
+  }
+}
+
+/*
+** Try to convert the type of a function argument or a result column
+** into a numeric representation.  Use either INTEGER or REAL whichever
+** is appropriate.  But only do the conversion if it is possible without
+** loss of information and return the revised type of the argument.
+*/
+SQLITE_API int sqlite3_value_numeric_type(sqlite3_value *pVal){
+  int eType = sqlite3_value_type(pVal);
+  if( eType==SQLITE_TEXT ){
+    Mem *pMem = (Mem*)pVal;
+    applyNumericAffinity(pMem, 0);
+    eType = sqlite3_value_type(pVal);
+  }
+  return eType;
+}
+
+/*
+** Exported version of applyAffinity(). This one works on sqlite3_value*, 
+** not the internal Mem* type.
+*/
+SQLITE_PRIVATE void sqlite3ValueApplyAffinity(
+  sqlite3_value *pVal, 
+  u8 affinity, 
+  u8 enc
+){
+  applyAffinity((Mem *)pVal, affinity, enc);
+}
+
+/*
+** pMem currently only holds a string type (or maybe a BLOB that we can
+** interpret as a string if we want to).  Compute its corresponding
+** numeric type, if has one.  Set the pMem->u.r and pMem->u.i fields
+** accordingly.
+*/
+static u16 SQLITE_NOINLINE computeNumericType(Mem *pMem){
+  assert( (pMem->flags & (MEM_Int|MEM_Real))==0 );
+  assert( (pMem->flags & (MEM_Str|MEM_Blob))!=0 );
+  if( sqlite3AtoF(pMem->z, &pMem->u.r, pMem->n, pMem->enc)==0 ){
+    return 0;
+  }
+  if( sqlite3Atoi64(pMem->z, &pMem->u.i, pMem->n, pMem->enc)==SQLITE_OK ){
+    return MEM_Int;
+  }
+  return MEM_Real;
+}
+
+/*
+** Return the numeric type for pMem, either MEM_Int or MEM_Real or both or
+** none.  
+**
+** Unlike applyNumericAffinity(), this routine does not modify pMem->flags.
+** But it does set pMem->u.r and pMem->u.i appropriately.
+*/
+static u16 numericType(Mem *pMem){
+  if( pMem->flags & (MEM_Int|MEM_Real) ){
+    return pMem->flags & (MEM_Int|MEM_Real);
+  }
+  if( pMem->flags & (MEM_Str|MEM_Blob) ){
+    return computeNumericType(pMem);
+  }
+  return 0;
+}
+
+#ifdef SQLITE_DEBUG
+/*
+** Write a nice string representation of the contents of cell pMem
+** into buffer zBuf, length nBuf.
+*/
+SQLITE_PRIVATE void sqlite3VdbeMemPrettyPrint(Mem *pMem, char *zBuf){
+  char *zCsr = zBuf;
+  int f = pMem->flags;
+
+  static const char *const encnames[] = {"(X)", "(8)", "(16LE)", "(16BE)"};
+
+  if( f&MEM_Blob ){
+    int i;
+    char c;
+    if( f & MEM_Dyn ){
+      c = 'z';
+      assert( (f & (MEM_Static|MEM_Ephem))==0 );
+    }else if( f & MEM_Static ){
+      c = 't';
+      assert( (f & (MEM_Dyn|MEM_Ephem))==0 );
+    }else if( f & MEM_Ephem ){
+      c = 'e';
+      assert( (f & (MEM_Static|MEM_Dyn))==0 );
+    }else{
+      c = 's';
+    }
+
+    sqlite3_snprintf(100, zCsr, "%c", c);
+    zCsr += sqlite3Strlen30(zCsr);
+    sqlite3_snprintf(100, zCsr, "%d[", pMem->n);
+    zCsr += sqlite3Strlen30(zCsr);
+    for(i=0; i<16 && i<pMem->n; i++){
+      sqlite3_snprintf(100, zCsr, "%02X", ((int)pMem->z[i] & 0xFF));
+      zCsr += sqlite3Strlen30(zCsr);
+    }
+    for(i=0; i<16 && i<pMem->n; i++){
+      char z = pMem->z[i];
+      if( z<32 || z>126 ) *zCsr++ = '.';
+      else *zCsr++ = z;
+    }
+
+    sqlite3_snprintf(100, zCsr, "]%s", encnames[pMem->enc]);
+    zCsr += sqlite3Strlen30(zCsr);
+    if( f & MEM_Zero ){
+      sqlite3_snprintf(100, zCsr,"+%dz",pMem->u.nZero);
+      zCsr += sqlite3Strlen30(zCsr);
+    }
+    *zCsr = '\0';
+  }else if( f & MEM_Str ){
+    int j, k;
+    zBuf[0] = ' ';
+    if( f & MEM_Dyn ){
+      zBuf[1] = 'z';
+      assert( (f & (MEM_Static|MEM_Ephem))==0 );
+    }else if( f & MEM_Static ){
+      zBuf[1] = 't';
+      assert( (f & (MEM_Dyn|MEM_Ephem))==0 );
+    }else if( f & MEM_Ephem ){
+      zBuf[1] = 'e';
+      assert( (f & (MEM_Static|MEM_Dyn))==0 );
+    }else{
+      zBuf[1] = 's';
+    }
+    k = 2;
+    sqlite3_snprintf(100, &zBuf[k], "%d", pMem->n);
+    k += sqlite3Strlen30(&zBuf[k]);
+    zBuf[k++] = '[';
+    for(j=0; j<15 && j<pMem->n; j++){
+      u8 c = pMem->z[j];
+      if( c>=0x20 && c<0x7f ){
+        zBuf[k++] = c;
+      }else{
+        zBuf[k++] = '.';
+      }
+    }
+    zBuf[k++] = ']';
+    sqlite3_snprintf(100,&zBuf[k], encnames[pMem->enc]);
+    k += sqlite3Strlen30(&zBuf[k]);
+    zBuf[k++] = 0;
+  }
+}
+#endif
+
+#ifdef SQLITE_DEBUG
+/*
+** Print the value of a register for tracing purposes:
+*/
+static void memTracePrint(Mem *p){
+  if( p->flags & MEM_Undefined ){
+    printf(" undefined");
+  }else if( p->flags & MEM_Null ){
+    printf(" NULL");
+  }else if( (p->flags & (MEM_Int|MEM_Str))==(MEM_Int|MEM_Str) ){
+    printf(" si:%lld", p->u.i);
+  }else if( p->flags & MEM_Int ){
+    printf(" i:%lld", p->u.i);
+#ifndef SQLITE_OMIT_FLOATING_POINT
+  }else if( p->flags & MEM_Real ){
+    printf(" r:%g", p->u.r);
+#endif
+  }else if( p->flags & MEM_RowSet ){
+    printf(" (rowset)");
+  }else{
+    char zBuf[200];
+    sqlite3VdbeMemPrettyPrint(p, zBuf);
+    printf(" %s", zBuf);
+  }
+}
+static void registerTrace(int iReg, Mem *p){
+  printf("REG[%d] = ", iReg);
+  memTracePrint(p);
+  printf("\n");
+}
+#endif
+
+#ifdef SQLITE_DEBUG
+#  define REGISTER_TRACE(R,M) if(db->flags&SQLITE_VdbeTrace)registerTrace(R,M)
+#else
+#  define REGISTER_TRACE(R,M)
+#endif
+
+
+#ifdef VDBE_PROFILE
+
+/* 
+** hwtime.h contains inline assembler code for implementing 
+** high-performance timing routines.
+*/
+/************** Include hwtime.h in the middle of vdbe.c *********************/
+/************** Begin file hwtime.h ******************************************/
+/*
+** 2008 May 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains inline asm code for retrieving "high-performance"
+** counters for x86 class CPUs.
+*/
+#ifndef _HWTIME_H_
+#define _HWTIME_H_
+
+/*
+** The following routine only works on pentium-class (or newer) processors.
+** It uses the RDTSC opcode to read the cycle count value out of the
+** processor and returns that value.  This can be used for high-res
+** profiling.
+*/
+#if (defined(__GNUC__) || defined(_MSC_VER)) && \
+      (defined(i386) || defined(__i386__) || defined(_M_IX86))
+
+  #if defined(__GNUC__)
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+     unsigned int lo, hi;
+     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+     return (sqlite_uint64)hi << 32 | lo;
+  }
+
+  #elif defined(_MSC_VER)
+
+  __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){
+     __asm {
+        rdtsc
+        ret       ; return value at EDX:EAX
+     }
+  }
+
+  #endif
+
+#elif (defined(__GNUC__) && defined(__x86_64__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long val;
+      __asm__ __volatile__ ("rdtsc" : "=A" (val));
+      return val;
+  }
+ 
+#elif (defined(__GNUC__) && defined(__ppc__))
+
+  __inline__ sqlite_uint64 sqlite3Hwtime(void){
+      unsigned long long retval;
+      unsigned long junk;
+      __asm__ __volatile__ ("\n\
+          1:      mftbu   %1\n\
+                  mftb    %L0\n\
+                  mftbu   %0\n\
+                  cmpw    %0,%1\n\
+                  bne     1b"
+                  : "=r" (retval), "=r" (junk));
+      return retval;
+  }
+
+#else
+
+  #error Need implementation of sqlite3Hwtime() for your platform.
+
+  /*
+  ** To compile without implementing sqlite3Hwtime() for your platform,
+  ** you can remove the above #error and use the following
+  ** stub function.  You will lose timing support for many
+  ** of the debugging and testing utilities, but it should at
+  ** least compile and run.
+  */
+SQLITE_PRIVATE   sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); }
+
+#endif
+
+#endif /* !defined(_HWTIME_H_) */
+
+/************** End of hwtime.h **********************************************/
+/************** Continuing where we left off in vdbe.c ***********************/
+
+#endif
+
+#ifndef NDEBUG
+/*
+** This function is only called from within an assert() expression. It
+** checks that the sqlite3.nTransaction variable is correctly set to
+** the number of non-transaction savepoints currently in the 
+** linked list starting at sqlite3.pSavepoint.
+** 
+** Usage:
+**
+**     assert( checkSavepointCount(db) );
+*/
+static int checkSavepointCount(sqlite3 *db){
+  int n = 0;
+  Savepoint *p;
+  for(p=db->pSavepoint; p; p=p->pNext) n++;
+  assert( n==(db->nSavepoint + db->isTransactionSavepoint) );
+  return 1;
+}
+#endif
+
+
+/*
+** Execute as much of a VDBE program as we can.
+** This is the core of sqlite3_step().  
+*/
+SQLITE_PRIVATE int sqlite3VdbeExec(
+  Vdbe *p                    /* The VDBE */
+){
+  int pc=0;                  /* The program counter */
+  Op *aOp = p->aOp;          /* Copy of p->aOp */
+  Op *pOp;                   /* Current operation */
+  int rc = SQLITE_OK;        /* Value to return */
+  sqlite3 *db = p->db;       /* The database */
+  u8 resetSchemaOnFault = 0; /* Reset schema after an error if positive */
+  u8 encoding = ENC(db);     /* The database encoding */
+  int iCompare = 0;          /* Result of last OP_Compare operation */
+  unsigned nVmStep = 0;      /* Number of virtual machine steps */
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+  unsigned nProgressLimit = 0;/* Invoke xProgress() when nVmStep reaches this */
+#endif
+  Mem *aMem = p->aMem;       /* Copy of p->aMem */
+  Mem *pIn1 = 0;             /* 1st input operand */
+  Mem *pIn2 = 0;             /* 2nd input operand */
+  Mem *pIn3 = 0;             /* 3rd input operand */
+  Mem *pOut = 0;             /* Output operand */
+  int *aPermute = 0;         /* Permutation of columns for OP_Compare */
+  i64 lastRowid = db->lastRowid;  /* Saved value of the last insert ROWID */
+#ifdef VDBE_PROFILE
+  u64 start;                 /* CPU clock count at start of opcode */
+#endif
+  /*** INSERT STACK UNION HERE ***/
+
+  assert( p->magic==VDBE_MAGIC_RUN );  /* sqlite3_step() verifies this */
+  sqlite3VdbeEnter(p);
+  if( p->rc==SQLITE_NOMEM ){
+    /* This happens if a malloc() inside a call to sqlite3_column_text() or
+    ** sqlite3_column_text16() failed.  */
+    goto no_mem;
+  }
+  assert( p->rc==SQLITE_OK || p->rc==SQLITE_BUSY );
+  assert( p->bIsReader || p->readOnly!=0 );
+  p->rc = SQLITE_OK;
+  p->iCurrentTime = 0;
+  assert( p->explain==0 );
+  p->pResultSet = 0;
+  db->busyHandler.nBusy = 0;
+  if( db->u1.isInterrupted ) goto abort_due_to_interrupt;
+  sqlite3VdbeIOTraceSql(p);
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+  if( db->xProgress ){
+    assert( 0 < db->nProgressOps );
+    nProgressLimit = (unsigned)p->aCounter[SQLITE_STMTSTATUS_VM_STEP];
+    if( nProgressLimit==0 ){
+      nProgressLimit = db->nProgressOps;
+    }else{
+      nProgressLimit %= (unsigned)db->nProgressOps;
+    }
+  }
+#endif
+#ifdef SQLITE_DEBUG
+  sqlite3BeginBenignMalloc();
+  if( p->pc==0
+   && (p->db->flags & (SQLITE_VdbeListing|SQLITE_VdbeEQP|SQLITE_VdbeTrace))!=0
+  ){
+    int i;
+    int once = 1;
+    sqlite3VdbePrintSql(p);
+    if( p->db->flags & SQLITE_VdbeListing ){
+      printf("VDBE Program Listing:\n");
+      for(i=0; i<p->nOp; i++){
+        sqlite3VdbePrintOp(stdout, i, &aOp[i]);
+      }
+    }
+    if( p->db->flags & SQLITE_VdbeEQP ){
+      for(i=0; i<p->nOp; i++){
+        if( aOp[i].opcode==OP_Explain ){
+          if( once ) printf("VDBE Query Plan:\n");
+          printf("%s\n", aOp[i].p4.z);
+          once = 0;
+        }
+      }
+    }
+    if( p->db->flags & SQLITE_VdbeTrace )  printf("VDBE Trace:\n");
+  }
+  sqlite3EndBenignMalloc();
+#endif
+  for(pc=p->pc; rc==SQLITE_OK; pc++){
+    assert( pc>=0 && pc<p->nOp );
+    if( db->mallocFailed ) goto no_mem;
+#ifdef VDBE_PROFILE
+    start = sqlite3Hwtime();
+#endif
+    nVmStep++;
+    pOp = &aOp[pc];
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+    if( p->anExec ) p->anExec[pc]++;
+#endif
+
+    /* Only allow tracing if SQLITE_DEBUG is defined.
+    */
+#ifdef SQLITE_DEBUG
+    if( db->flags & SQLITE_VdbeTrace ){
+      sqlite3VdbePrintOp(stdout, pc, pOp);
+    }
+#endif
+      
+
+    /* Check to see if we need to simulate an interrupt.  This only happens
+    ** if we have a special test build.
+    */
+#ifdef SQLITE_TEST
+    if( sqlite3_interrupt_count>0 ){
+      sqlite3_interrupt_count--;
+      if( sqlite3_interrupt_count==0 ){
+        sqlite3_interrupt(db);
+      }
+    }
+#endif
+
+    /* On any opcode with the "out2-prerelease" tag, free any
+    ** external allocations out of mem[p2] and set mem[p2] to be
+    ** an undefined integer.  Opcodes will either fill in the integer
+    ** value or convert mem[p2] to a different type.
+    */
+    assert( pOp->opflags==sqlite3OpcodeProperty[pOp->opcode] );
+    if( pOp->opflags & OPFLG_OUT2_PRERELEASE ){
+      assert( pOp->p2>0 );
+      assert( pOp->p2<=(p->nMem-p->nCursor) );
+      pOut = &aMem[pOp->p2];
+      memAboutToChange(p, pOut);
+      if( VdbeMemDynamic(pOut) ) sqlite3VdbeMemSetNull(pOut);
+      pOut->flags = MEM_Int;
+    }
+
+    /* Sanity checking on other operands */
+#ifdef SQLITE_DEBUG
+    if( (pOp->opflags & OPFLG_IN1)!=0 ){
+      assert( pOp->p1>0 );
+      assert( pOp->p1<=(p->nMem-p->nCursor) );
+      assert( memIsValid(&aMem[pOp->p1]) );
+      assert( sqlite3VdbeCheckMemInvariants(&aMem[pOp->p1]) );
+      REGISTER_TRACE(pOp->p1, &aMem[pOp->p1]);
+    }
+    if( (pOp->opflags & OPFLG_IN2)!=0 ){
+      assert( pOp->p2>0 );
+      assert( pOp->p2<=(p->nMem-p->nCursor) );
+      assert( memIsValid(&aMem[pOp->p2]) );
+      assert( sqlite3VdbeCheckMemInvariants(&aMem[pOp->p2]) );
+      REGISTER_TRACE(pOp->p2, &aMem[pOp->p2]);
+    }
+    if( (pOp->opflags & OPFLG_IN3)!=0 ){
+      assert( pOp->p3>0 );
+      assert( pOp->p3<=(p->nMem-p->nCursor) );
+      assert( memIsValid(&aMem[pOp->p3]) );
+      assert( sqlite3VdbeCheckMemInvariants(&aMem[pOp->p3]) );
+      REGISTER_TRACE(pOp->p3, &aMem[pOp->p3]);
+    }
+    if( (pOp->opflags & OPFLG_OUT2)!=0 ){
+      assert( pOp->p2>0 );
+      assert( pOp->p2<=(p->nMem-p->nCursor) );
+      memAboutToChange(p, &aMem[pOp->p2]);
+    }
+    if( (pOp->opflags & OPFLG_OUT3)!=0 ){
+      assert( pOp->p3>0 );
+      assert( pOp->p3<=(p->nMem-p->nCursor) );
+      memAboutToChange(p, &aMem[pOp->p3]);
+    }
+#endif
+  
+    switch( pOp->opcode ){
+
+/*****************************************************************************
+** What follows is a massive switch statement where each case implements a
+** separate instruction in the virtual machine.  If we follow the usual
+** indentation conventions, each case should be indented by 6 spaces.  But
+** that is a lot of wasted space on the left margin.  So the code within
+** the switch statement will break with convention and be flush-left. Another
+** big comment (similar to this one) will mark the point in the code where
+** we transition back to normal indentation.
+**
+** The formatting of each case is important.  The makefile for SQLite
+** generates two C files "opcodes.h" and "opcodes.c" by scanning this
+** file looking for lines that begin with "case OP_".  The opcodes.h files
+** will be filled with #defines that give unique integer values to each
+** opcode and the opcodes.c file is filled with an array of strings where
+** each string is the symbolic name for the corresponding opcode.  If the
+** case statement is followed by a comment of the form "/# same as ... #/"
+** that comment is used to determine the particular value of the opcode.
+**
+** Other keywords in the comment that follows each case are used to
+** construct the OPFLG_INITIALIZER value that initializes opcodeProperty[].
+** Keywords include: in1, in2, in3, out2_prerelease, out2, out3.  See
+** the mkopcodeh.awk script for additional information.
+**
+** Documentation about VDBE opcodes is generated by scanning this file
+** for lines of that contain "Opcode:".  That line and all subsequent
+** comment lines are used in the generation of the opcode.html documentation
+** file.
+**
+** SUMMARY:
+**
+**     Formatting is important to scripts that scan this file.
+**     Do not deviate from the formatting style currently in use.
+**
+*****************************************************************************/
+
+/* Opcode:  Goto * P2 * * *
+**
+** An unconditional jump to address P2.
+** The next instruction executed will be 
+** the one at index P2 from the beginning of
+** the program.
+**
+** The P1 parameter is not actually used by this opcode.  However, it
+** is sometimes set to 1 instead of 0 as a hint to the command-line shell
+** that this Goto is the bottom of a loop and that the lines from P2 down
+** to the current line should be indented for EXPLAIN output.
+*/
+case OP_Goto: {             /* jump */
+  pc = pOp->p2 - 1;
+
+  /* Opcodes that are used as the bottom of a loop (OP_Next, OP_Prev,
+  ** OP_VNext, OP_RowSetNext, or OP_SorterNext) all jump here upon
+  ** completion.  Check to see if sqlite3_interrupt() has been called
+  ** or if the progress callback needs to be invoked. 
+  **
+  ** This code uses unstructured "goto" statements and does not look clean.
+  ** But that is not due to sloppy coding habits. The code is written this
+  ** way for performance, to avoid having to run the interrupt and progress
+  ** checks on every opcode.  This helps sqlite3_step() to run about 1.5%
+  ** faster according to "valgrind --tool=cachegrind" */
+check_for_interrupt:
+  if( db->u1.isInterrupted ) goto abort_due_to_interrupt;
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+  /* Call the progress callback if it is configured and the required number
+  ** of VDBE ops have been executed (either since this invocation of
+  ** sqlite3VdbeExec() or since last time the progress callback was called).
+  ** If the progress callback returns non-zero, exit the virtual machine with
+  ** a return code SQLITE_ABORT.
+  */
+  if( db->xProgress!=0 && nVmStep>=nProgressLimit ){
+    assert( db->nProgressOps!=0 );
+    nProgressLimit = nVmStep + db->nProgressOps - (nVmStep%db->nProgressOps);
+    if( db->xProgress(db->pProgressArg) ){
+      rc = SQLITE_INTERRUPT;
+      goto vdbe_error_halt;
+    }
+  }
+#endif
+  
+  break;
+}
+
+/* Opcode:  Gosub P1 P2 * * *
+**
+** Write the current address onto register P1
+** and then jump to address P2.
+*/
+case OP_Gosub: {            /* jump */
+  assert( pOp->p1>0 && pOp->p1<=(p->nMem-p->nCursor) );
+  pIn1 = &aMem[pOp->p1];
+  assert( VdbeMemDynamic(pIn1)==0 );
+  memAboutToChange(p, pIn1);
+  pIn1->flags = MEM_Int;
+  pIn1->u.i = pc;
+  REGISTER_TRACE(pOp->p1, pIn1);
+  pc = pOp->p2 - 1;
+  break;
+}
+
+/* Opcode:  Return P1 * * * *
+**
+** Jump to the next instruction after the address in register P1.  After
+** the jump, register P1 becomes undefined.
+*/
+case OP_Return: {           /* in1 */
+  pIn1 = &aMem[pOp->p1];
+  assert( pIn1->flags==MEM_Int );
+  pc = (int)pIn1->u.i;
+  pIn1->flags = MEM_Undefined;
+  break;
+}
+
+/* Opcode: InitCoroutine P1 P2 P3 * *
+**
+** Set up register P1 so that it will Yield to the coroutine
+** located at address P3.
+**
+** If P2!=0 then the coroutine implementation immediately follows
+** this opcode.  So jump over the coroutine implementation to
+** address P2.
+**
+** See also: EndCoroutine
+*/
+case OP_InitCoroutine: {     /* jump */
+  assert( pOp->p1>0 &&  pOp->p1<=(p->nMem-p->nCursor) );
+  assert( pOp->p2>=0 && pOp->p2<p->nOp );
+  assert( pOp->p3>=0 && pOp->p3<p->nOp );
+  pOut = &aMem[pOp->p1];
+  assert( !VdbeMemDynamic(pOut) );
+  pOut->u.i = pOp->p3 - 1;
+  pOut->flags = MEM_Int;
+  if( pOp->p2 ) pc = pOp->p2 - 1;
+  break;
+}
+
+/* Opcode:  EndCoroutine P1 * * * *
+**
+** The instruction at the address in register P1 is a Yield.
+** Jump to the P2 parameter of that Yield.
+** After the jump, register P1 becomes undefined.
+**
+** See also: InitCoroutine
+*/
+case OP_EndCoroutine: {           /* in1 */
+  VdbeOp *pCaller;
+  pIn1 = &aMem[pOp->p1];
+  assert( pIn1->flags==MEM_Int );
+  assert( pIn1->u.i>=0 && pIn1->u.i<p->nOp );
+  pCaller = &aOp[pIn1->u.i];
+  assert( pCaller->opcode==OP_Yield );
+  assert( pCaller->p2>=0 && pCaller->p2<p->nOp );
+  pc = pCaller->p2 - 1;
+  pIn1->flags = MEM_Undefined;
+  break;
+}
+
+/* Opcode:  Yield P1 P2 * * *
+**
+** Swap the program counter with the value in register P1.  This
+** has the effect of yielding to a coroutine.
+**
+** If the coroutine that is launched by this instruction ends with
+** Yield or Return then continue to the next instruction.  But if
+** the coroutine launched by this instruction ends with
+** EndCoroutine, then jump to P2 rather than continuing with the
+** next instruction.
+**
+** See also: InitCoroutine
+*/
+case OP_Yield: {            /* in1, jump */
+  int pcDest;
+  pIn1 = &aMem[pOp->p1];
+  assert( VdbeMemDynamic(pIn1)==0 );
+  pIn1->flags = MEM_Int;
+  pcDest = (int)pIn1->u.i;
+  pIn1->u.i = pc;
+  REGISTER_TRACE(pOp->p1, pIn1);
+  pc = pcDest;
+  break;
+}
+
+/* Opcode:  HaltIfNull  P1 P2 P3 P4 P5
+** Synopsis:  if r[P3]=null halt
+**
+** Check the value in register P3.  If it is NULL then Halt using
+** parameter P1, P2, and P4 as if this were a Halt instruction.  If the
+** value in register P3 is not NULL, then this routine is a no-op.
+** The P5 parameter should be 1.
+*/
+case OP_HaltIfNull: {      /* in3 */
+  pIn3 = &aMem[pOp->p3];
+  if( (pIn3->flags & MEM_Null)==0 ) break;
+  /* Fall through into OP_Halt */
+}
+
+/* Opcode:  Halt P1 P2 * P4 P5
+**
+** Exit immediately.  All open cursors, etc are closed
+** automatically.
+**
+** P1 is the result code returned by sqlite3_exec(), sqlite3_reset(),
+** or sqlite3_finalize().  For a normal halt, this should be SQLITE_OK (0).
+** For errors, it can be some other value.  If P1!=0 then P2 will determine
+** whether or not to rollback the current transaction.  Do not rollback
+** if P2==OE_Fail. Do the rollback if P2==OE_Rollback.  If P2==OE_Abort,
+** then back out all changes that have occurred during this execution of the
+** VDBE, but do not rollback the transaction. 
+**
+** If P4 is not null then it is an error message string.
+**
+** P5 is a value between 0 and 4, inclusive, that modifies the P4 string.
+**
+**    0:  (no change)
+**    1:  NOT NULL contraint failed: P4
+**    2:  UNIQUE constraint failed: P4
+**    3:  CHECK constraint failed: P4
+**    4:  FOREIGN KEY constraint failed: P4
+**
+** If P5 is not zero and P4 is NULL, then everything after the ":" is
+** omitted.
+**
+** There is an implied "Halt 0 0 0" instruction inserted at the very end of
+** every program.  So a jump past the last instruction of the program
+** is the same as executing Halt.
+*/
+case OP_Halt: {
+  const char *zType;
+  const char *zLogFmt;
+
+  if( pOp->p1==SQLITE_OK && p->pFrame ){
+    /* Halt the sub-program. Return control to the parent frame. */
+    VdbeFrame *pFrame = p->pFrame;
+    p->pFrame = pFrame->pParent;
+    p->nFrame--;
+    sqlite3VdbeSetChanges(db, p->nChange);
+    pc = sqlite3VdbeFrameRestore(pFrame);
+    lastRowid = db->lastRowid;
+    if( pOp->p2==OE_Ignore ){
+      /* Instruction pc is the OP_Program that invoked the sub-program 
+      ** currently being halted. If the p2 instruction of this OP_Halt
+      ** instruction is set to OE_Ignore, then the sub-program is throwing
+      ** an IGNORE exception. In this case jump to the address specified
+      ** as the p2 of the calling OP_Program.  */
+      pc = p->aOp[pc].p2-1;
+    }
+    aOp = p->aOp;
+    aMem = p->aMem;
+    break;
+  }
+  p->rc = pOp->p1;
+  p->errorAction = (u8)pOp->p2;
+  p->pc = pc;
+  if( p->rc ){
+    if( pOp->p5 ){
+      static const char * const azType[] = { "NOT NULL", "UNIQUE", "CHECK",
+                                             "FOREIGN KEY" };
+      assert( pOp->p5>=1 && pOp->p5<=4 );
+      testcase( pOp->p5==1 );
+      testcase( pOp->p5==2 );
+      testcase( pOp->p5==3 );
+      testcase( pOp->p5==4 );
+      zType = azType[pOp->p5-1];
+    }else{
+      zType = 0;
+    }
+    assert( zType!=0 || pOp->p4.z!=0 );
+    zLogFmt = "abort at %d in [%s]: %s";
+    if( zType && pOp->p4.z ){
+      sqlite3SetString(&p->zErrMsg, db, "%s constraint failed: %s", 
+                       zType, pOp->p4.z);
+    }else if( pOp->p4.z ){
+      sqlite3SetString(&p->zErrMsg, db, "%s", pOp->p4.z);
+    }else{
+      sqlite3SetString(&p->zErrMsg, db, "%s constraint failed", zType);
+    }
+    sqlite3_log(pOp->p1, zLogFmt, pc, p->zSql, p->zErrMsg);
+  }
+  rc = sqlite3VdbeHalt(p);
+  assert( rc==SQLITE_BUSY || rc==SQLITE_OK || rc==SQLITE_ERROR );
+  if( rc==SQLITE_BUSY ){
+    p->rc = rc = SQLITE_BUSY;
+  }else{
+    assert( rc==SQLITE_OK || (p->rc&0xff)==SQLITE_CONSTRAINT );
+    assert( rc==SQLITE_OK || db->nDeferredCons>0 || db->nDeferredImmCons>0 );
+    rc = p->rc ? SQLITE_ERROR : SQLITE_DONE;
+  }
+  goto vdbe_return;
+}
+
+/* Opcode: Integer P1 P2 * * *
+** Synopsis: r[P2]=P1
+**
+** The 32-bit integer value P1 is written into register P2.
+*/
+case OP_Integer: {         /* out2-prerelease */
+  pOut->u.i = pOp->p1;
+  break;
+}
+
+/* Opcode: Int64 * P2 * P4 *
+** Synopsis: r[P2]=P4
+**
+** P4 is a pointer to a 64-bit integer value.
+** Write that value into register P2.
+*/
+case OP_Int64: {           /* out2-prerelease */
+  assert( pOp->p4.pI64!=0 );
+  pOut->u.i = *pOp->p4.pI64;
+  break;
+}
+
+#ifndef SQLITE_OMIT_FLOATING_POINT
+/* Opcode: Real * P2 * P4 *
+** Synopsis: r[P2]=P4
+**
+** P4 is a pointer to a 64-bit floating point value.
+** Write that value into register P2.
+*/
+case OP_Real: {            /* same as TK_FLOAT, out2-prerelease */
+  pOut->flags = MEM_Real;
+  assert( !sqlite3IsNaN(*pOp->p4.pReal) );
+  pOut->u.r = *pOp->p4.pReal;
+  break;
+}
+#endif
+
+/* Opcode: String8 * P2 * P4 *
+** Synopsis: r[P2]='P4'
+**
+** P4 points to a nul terminated UTF-8 string. This opcode is transformed 
+** into a String before it is executed for the first time.  During
+** this transformation, the length of string P4 is computed and stored
+** as the P1 parameter.
+*/
+case OP_String8: {         /* same as TK_STRING, out2-prerelease */
+  assert( pOp->p4.z!=0 );
+  pOp->opcode = OP_String;
+  pOp->p1 = sqlite3Strlen30(pOp->p4.z);
+
+#ifndef SQLITE_OMIT_UTF16
+  if( encoding!=SQLITE_UTF8 ){
+    rc = sqlite3VdbeMemSetStr(pOut, pOp->p4.z, -1, SQLITE_UTF8, SQLITE_STATIC);
+    if( rc==SQLITE_TOOBIG ) goto too_big;
+    if( SQLITE_OK!=sqlite3VdbeChangeEncoding(pOut, encoding) ) goto no_mem;
+    assert( pOut->szMalloc>0 && pOut->zMalloc==pOut->z );
+    assert( VdbeMemDynamic(pOut)==0 );
+    pOut->szMalloc = 0;
+    pOut->flags |= MEM_Static;
+    if( pOp->p4type==P4_DYNAMIC ){
+      sqlite3DbFree(db, pOp->p4.z);
+    }
+    pOp->p4type = P4_DYNAMIC;
+    pOp->p4.z = pOut->z;
+    pOp->p1 = pOut->n;
+  }
+#endif
+  if( pOp->p1>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+    goto too_big;
+  }
+  /* Fall through to the next case, OP_String */
+}
+  
+/* Opcode: String P1 P2 * P4 *
+** Synopsis: r[P2]='P4' (len=P1)
+**
+** The string value P4 of length P1 (bytes) is stored in register P2.
+*/
+case OP_String: {          /* out2-prerelease */
+  assert( pOp->p4.z!=0 );
+  pOut->flags = MEM_Str|MEM_Static|MEM_Term;
+  pOut->z = pOp->p4.z;
+  pOut->n = pOp->p1;
+  pOut->enc = encoding;
+  UPDATE_MAX_BLOBSIZE(pOut);
+  break;
+}
+
+/* Opcode: Null P1 P2 P3 * *
+** Synopsis:  r[P2..P3]=NULL
+**
+** Write a NULL into registers P2.  If P3 greater than P2, then also write
+** NULL into register P3 and every register in between P2 and P3.  If P3
+** is less than P2 (typically P3 is zero) then only register P2 is
+** set to NULL.
+**
+** If the P1 value is non-zero, then also set the MEM_Cleared flag so that
+** NULL values will not compare equal even if SQLITE_NULLEQ is set on
+** OP_Ne or OP_Eq.
+*/
+case OP_Null: {           /* out2-prerelease */
+  int cnt;
+  u16 nullFlag;
+  cnt = pOp->p3-pOp->p2;
+  assert( pOp->p3<=(p->nMem-p->nCursor) );
+  pOut->flags = nullFlag = pOp->p1 ? (MEM_Null|MEM_Cleared) : MEM_Null;
+  while( cnt>0 ){
+    pOut++;
+    memAboutToChange(p, pOut);
+    sqlite3VdbeMemSetNull(pOut);
+    pOut->flags = nullFlag;
+    cnt--;
+  }
+  break;
+}
+
+/* Opcode: SoftNull P1 * * * *
+** Synopsis:  r[P1]=NULL
+**
+** Set register P1 to have the value NULL as seen by the OP_MakeRecord
+** instruction, but do not free any string or blob memory associated with
+** the register, so that if the value was a string or blob that was
+** previously copied using OP_SCopy, the copies will continue to be valid.
+*/
+case OP_SoftNull: {
+  assert( pOp->p1>0 && pOp->p1<=(p->nMem-p->nCursor) );
+  pOut = &aMem[pOp->p1];
+  pOut->flags = (pOut->flags|MEM_Null)&~MEM_Undefined;
+  break;
+}
+
+/* Opcode: Blob P1 P2 * P4 *
+** Synopsis: r[P2]=P4 (len=P1)
+**
+** P4 points to a blob of data P1 bytes long.  Store this
+** blob in register P2.
+*/
+case OP_Blob: {                /* out2-prerelease */
+  assert( pOp->p1 <= SQLITE_MAX_LENGTH );
+  sqlite3VdbeMemSetStr(pOut, pOp->p4.z, pOp->p1, 0, 0);
+  pOut->enc = encoding;
+  UPDATE_MAX_BLOBSIZE(pOut);
+  break;
+}
+
+/* Opcode: Variable P1 P2 * P4 *
+** Synopsis: r[P2]=parameter(P1,P4)
+**
+** Transfer the values of bound parameter P1 into register P2
+**
+** If the parameter is named, then its name appears in P4.
+** The P4 value is used by sqlite3_bind_parameter_name().
+*/
+case OP_Variable: {            /* out2-prerelease */
+  Mem *pVar;       /* Value being transferred */
+
+  assert( pOp->p1>0 && pOp->p1<=p->nVar );
+  assert( pOp->p4.z==0 || pOp->p4.z==p->azVar[pOp->p1-1] );
+  pVar = &p->aVar[pOp->p1 - 1];
+  if( sqlite3VdbeMemTooBig(pVar) ){
+    goto too_big;
+  }
+  sqlite3VdbeMemShallowCopy(pOut, pVar, MEM_Static);
+  UPDATE_MAX_BLOBSIZE(pOut);
+  break;
+}
+
+/* Opcode: Move P1 P2 P3 * *
+** Synopsis:  r[P2@P3]=r[P1@P3]
+**
+** Move the P3 values in register P1..P1+P3-1 over into
+** registers P2..P2+P3-1.  Registers P1..P1+P3-1 are
+** left holding a NULL.  It is an error for register ranges
+** P1..P1+P3-1 and P2..P2+P3-1 to overlap.  It is an error
+** for P3 to be less than 1.
+*/
+case OP_Move: {
+  int n;           /* Number of registers left to copy */
+  int p1;          /* Register to copy from */
+  int p2;          /* Register to copy to */
+
+  n = pOp->p3;
+  p1 = pOp->p1;
+  p2 = pOp->p2;
+  assert( n>0 && p1>0 && p2>0 );
+  assert( p1+n<=p2 || p2+n<=p1 );
+
+  pIn1 = &aMem[p1];
+  pOut = &aMem[p2];
+  do{
+    assert( pOut<=&aMem[(p->nMem-p->nCursor)] );
+    assert( pIn1<=&aMem[(p->nMem-p->nCursor)] );
+    assert( memIsValid(pIn1) );
+    memAboutToChange(p, pOut);
+    sqlite3VdbeMemMove(pOut, pIn1);
+#ifdef SQLITE_DEBUG
+    if( pOut->pScopyFrom>=&aMem[p1] && pOut->pScopyFrom<&aMem[p1+pOp->p3] ){
+      pOut->pScopyFrom += p1 - pOp->p2;
+    }
+#endif
+    REGISTER_TRACE(p2++, pOut);
+    pIn1++;
+    pOut++;
+  }while( --n );
+  break;
+}
+
+/* Opcode: Copy P1 P2 P3 * *
+** Synopsis: r[P2@P3+1]=r[P1@P3+1]
+**
+** Make a copy of registers P1..P1+P3 into registers P2..P2+P3.
+**
+** This instruction makes a deep copy of the value.  A duplicate
+** is made of any string or blob constant.  See also OP_SCopy.
+*/
+case OP_Copy: {
+  int n;
+
+  n = pOp->p3;
+  pIn1 = &aMem[pOp->p1];
+  pOut = &aMem[pOp->p2];
+  assert( pOut!=pIn1 );
+  while( 1 ){
+    sqlite3VdbeMemShallowCopy(pOut, pIn1, MEM_Ephem);
+    Deephemeralize(pOut);
+#ifdef SQLITE_DEBUG
+    pOut->pScopyFrom = 0;
+#endif
+    REGISTER_TRACE(pOp->p2+pOp->p3-n, pOut);
+    if( (n--)==0 ) break;
+    pOut++;
+    pIn1++;
+  }
+  break;
+}
+
+/* Opcode: SCopy P1 P2 * * *
+** Synopsis: r[P2]=r[P1]
+**
+** Make a shallow copy of register P1 into register P2.
+**
+** This instruction makes a shallow copy of the value.  If the value
+** is a string or blob, then the copy is only a pointer to the
+** original and hence if the original changes so will the copy.
+** Worse, if the original is deallocated, the copy becomes invalid.
+** Thus the program must guarantee that the original will not change
+** during the lifetime of the copy.  Use OP_Copy to make a complete
+** copy.
+*/
+case OP_SCopy: {            /* out2 */
+  pIn1 = &aMem[pOp->p1];
+  pOut = &aMem[pOp->p2];
+  assert( pOut!=pIn1 );
+  sqlite3VdbeMemShallowCopy(pOut, pIn1, MEM_Ephem);
+#ifdef SQLITE_DEBUG
+  if( pOut->pScopyFrom==0 ) pOut->pScopyFrom = pIn1;
+#endif
+  break;
+}
+
+/* Opcode: ResultRow P1 P2 * * *
+** Synopsis:  output=r[P1@P2]
+**
+** The registers P1 through P1+P2-1 contain a single row of
+** results. This opcode causes the sqlite3_step() call to terminate
+** with an SQLITE_ROW return code and it sets up the sqlite3_stmt
+** structure to provide access to the r(P1)..r(P1+P2-1) values as
+** the result row.
+*/
+case OP_ResultRow: {
+  Mem *pMem;
+  int i;
+  assert( p->nResColumn==pOp->p2 );
+  assert( pOp->p1>0 );
+  assert( pOp->p1+pOp->p2<=(p->nMem-p->nCursor)+1 );
+
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+  /* Run the progress counter just before returning.
+  */
+  if( db->xProgress!=0
+   && nVmStep>=nProgressLimit
+   && db->xProgress(db->pProgressArg)!=0
+  ){
+    rc = SQLITE_INTERRUPT;
+    goto vdbe_error_halt;
+  }
+#endif
+
+  /* If this statement has violated immediate foreign key constraints, do
+  ** not return the number of rows modified. And do not RELEASE the statement
+  ** transaction. It needs to be rolled back.  */
+  if( SQLITE_OK!=(rc = sqlite3VdbeCheckFk(p, 0)) ){
+    assert( db->flags&SQLITE_CountRows );
+    assert( p->usesStmtJournal );
+    break;
+  }
+
+  /* If the SQLITE_CountRows flag is set in sqlite3.flags mask, then 
+  ** DML statements invoke this opcode to return the number of rows 
+  ** modified to the user. This is the only way that a VM that
+  ** opens a statement transaction may invoke this opcode.
+  **
+  ** In case this is such a statement, close any statement transaction
+  ** opened by this VM before returning control to the user. This is to
+  ** ensure that statement-transactions are always nested, not overlapping.
+  ** If the open statement-transaction is not closed here, then the user
+  ** may step another VM that opens its own statement transaction. This
+  ** may lead to overlapping statement transactions.
+  **
+  ** The statement transaction is never a top-level transaction.  Hence
+  ** the RELEASE call below can never fail.
+  */
+  assert( p->iStatement==0 || db->flags&SQLITE_CountRows );
+  rc = sqlite3VdbeCloseStatement(p, SAVEPOINT_RELEASE);
+  if( NEVER(rc!=SQLITE_OK) ){
+    break;
+  }
+
+  /* Invalidate all ephemeral cursor row caches */
+  p->cacheCtr = (p->cacheCtr + 2)|1;
+
+  /* Make sure the results of the current row are \000 terminated
+  ** and have an assigned type.  The results are de-ephemeralized as
+  ** a side effect.
+  */
+  pMem = p->pResultSet = &aMem[pOp->p1];
+  for(i=0; i<pOp->p2; i++){
+    assert( memIsValid(&pMem[i]) );
+    Deephemeralize(&pMem[i]);
+    assert( (pMem[i].flags & MEM_Ephem)==0
+            || (pMem[i].flags & (MEM_Str|MEM_Blob))==0 );
+    sqlite3VdbeMemNulTerminate(&pMem[i]);
+    REGISTER_TRACE(pOp->p1+i, &pMem[i]);
+  }
+  if( db->mallocFailed ) goto no_mem;
+
+  /* Return SQLITE_ROW
+  */
+  p->pc = pc + 1;
+  rc = SQLITE_ROW;
+  goto vdbe_return;
+}
+
+/* Opcode: Concat P1 P2 P3 * *
+** Synopsis: r[P3]=r[P2]+r[P1]
+**
+** Add the text in register P1 onto the end of the text in
+** register P2 and store the result in register P3.
+** If either the P1 or P2 text are NULL then store NULL in P3.
+**
+**   P3 = P2 || P1
+**
+** It is illegal for P1 and P3 to be the same register. Sometimes,
+** if P3 is the same register as P2, the implementation is able
+** to avoid a memcpy().
+*/
+case OP_Concat: {           /* same as TK_CONCAT, in1, in2, out3 */
+  i64 nByte;
+
+  pIn1 = &aMem[pOp->p1];
+  pIn2 = &aMem[pOp->p2];
+  pOut = &aMem[pOp->p3];
+  assert( pIn1!=pOut );
+  if( (pIn1->flags | pIn2->flags) & MEM_Null ){
+    sqlite3VdbeMemSetNull(pOut);
+    break;
+  }
+  if( ExpandBlob(pIn1) || ExpandBlob(pIn2) ) goto no_mem;
+  Stringify(pIn1, encoding);
+  Stringify(pIn2, encoding);
+  nByte = pIn1->n + pIn2->n;
+  if( nByte>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+    goto too_big;
+  }
+  if( sqlite3VdbeMemGrow(pOut, (int)nByte+2, pOut==pIn2) ){
+    goto no_mem;
+  }
+  MemSetTypeFlag(pOut, MEM_Str);
+  if( pOut!=pIn2 ){
+    memcpy(pOut->z, pIn2->z, pIn2->n);
+  }
+  memcpy(&pOut->z[pIn2->n], pIn1->z, pIn1->n);
+  pOut->z[nByte]=0;
+  pOut->z[nByte+1] = 0;
+  pOut->flags |= MEM_Term;
+  pOut->n = (int)nByte;
+  pOut->enc = encoding;
+  UPDATE_MAX_BLOBSIZE(pOut);
+  break;
+}
+
+/* Opcode: Add P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P1]+r[P2]
+**
+** Add the value in register P1 to the value in register P2
+** and store the result in register P3.
+** If either input is NULL, the result is NULL.
+*/
+/* Opcode: Multiply P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P1]*r[P2]
+**
+**
+** Multiply the value in register P1 by the value in register P2
+** and store the result in register P3.
+** If either input is NULL, the result is NULL.
+*/
+/* Opcode: Subtract P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P2]-r[P1]
+**
+** Subtract the value in register P1 from the value in register P2
+** and store the result in register P3.
+** If either input is NULL, the result is NULL.
+*/
+/* Opcode: Divide P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P2]/r[P1]
+**
+** Divide the value in register P1 by the value in register P2
+** and store the result in register P3 (P3=P2/P1). If the value in 
+** register P1 is zero, then the result is NULL. If either input is 
+** NULL, the result is NULL.
+*/
+/* Opcode: Remainder P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P2]%r[P1]
+**
+** Compute the remainder after integer register P2 is divided by 
+** register P1 and store the result in register P3. 
+** If the value in register P1 is zero the result is NULL.
+** If either operand is NULL, the result is NULL.
+*/
+case OP_Add:                   /* same as TK_PLUS, in1, in2, out3 */
+case OP_Subtract:              /* same as TK_MINUS, in1, in2, out3 */
+case OP_Multiply:              /* same as TK_STAR, in1, in2, out3 */
+case OP_Divide:                /* same as TK_SLASH, in1, in2, out3 */
+case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
+  char bIntint;   /* Started out as two integer operands */
+  u16 flags;      /* Combined MEM_* flags from both inputs */
+  u16 type1;      /* Numeric type of left operand */
+  u16 type2;      /* Numeric type of right operand */
+  i64 iA;         /* Integer value of left operand */
+  i64 iB;         /* Integer value of right operand */
+  double rA;      /* Real value of left operand */
+  double rB;      /* Real value of right operand */
+
+  pIn1 = &aMem[pOp->p1];
+  type1 = numericType(pIn1);
+  pIn2 = &aMem[pOp->p2];
+  type2 = numericType(pIn2);
+  pOut = &aMem[pOp->p3];
+  flags = pIn1->flags | pIn2->flags;
+  if( (flags & MEM_Null)!=0 ) goto arithmetic_result_is_null;
+  if( (type1 & type2 & MEM_Int)!=0 ){
+    iA = pIn1->u.i;
+    iB = pIn2->u.i;
+    bIntint = 1;
+    switch( pOp->opcode ){
+      case OP_Add:       if( sqlite3AddInt64(&iB,iA) ) goto fp_math;  break;
+      case OP_Subtract:  if( sqlite3SubInt64(&iB,iA) ) goto fp_math;  break;
+      case OP_Multiply:  if( sqlite3MulInt64(&iB,iA) ) goto fp_math;  break;
+      case OP_Divide: {
+        if( iA==0 ) goto arithmetic_result_is_null;
+        if( iA==-1 && iB==SMALLEST_INT64 ) goto fp_math;
+        iB /= iA;
+        break;
+      }
+      default: {
+        if( iA==0 ) goto arithmetic_result_is_null;
+        if( iA==-1 ) iA = 1;
+        iB %= iA;
+        break;
+      }
+    }
+    pOut->u.i = iB;
+    MemSetTypeFlag(pOut, MEM_Int);
+  }else{
+    bIntint = 0;
+fp_math:
+    rA = sqlite3VdbeRealValue(pIn1);
+    rB = sqlite3VdbeRealValue(pIn2);
+    switch( pOp->opcode ){
+      case OP_Add:         rB += rA;       break;
+      case OP_Subtract:    rB -= rA;       break;
+      case OP_Multiply:    rB *= rA;       break;
+      case OP_Divide: {
+        /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */
+        if( rA==(double)0 ) goto arithmetic_result_is_null;
+        rB /= rA;
+        break;
+      }
+      default: {
+        iA = (i64)rA;
+        iB = (i64)rB;
+        if( iA==0 ) goto arithmetic_result_is_null;
+        if( iA==-1 ) iA = 1;
+        rB = (double)(iB % iA);
+        break;
+      }
+    }
+#ifdef SQLITE_OMIT_FLOATING_POINT
+    pOut->u.i = rB;
+    MemSetTypeFlag(pOut, MEM_Int);
+#else
+    if( sqlite3IsNaN(rB) ){
+      goto arithmetic_result_is_null;
+    }
+    pOut->u.r = rB;
+    MemSetTypeFlag(pOut, MEM_Real);
+    if( ((type1|type2)&MEM_Real)==0 && !bIntint ){
+      sqlite3VdbeIntegerAffinity(pOut);
+    }
+#endif
+  }
+  break;
+
+arithmetic_result_is_null:
+  sqlite3VdbeMemSetNull(pOut);
+  break;
+}
+
+/* Opcode: CollSeq P1 * * P4
+**
+** P4 is a pointer to a CollSeq struct. If the next call to a user function
+** or aggregate calls sqlite3GetFuncCollSeq(), this collation sequence will
+** be returned. This is used by the built-in min(), max() and nullif()
+** functions.
+**
+** If P1 is not zero, then it is a register that a subsequent min() or
+** max() aggregate will set to 1 if the current row is not the minimum or
+** maximum.  The P1 register is initialized to 0 by this instruction.
+**
+** The interface used by the implementation of the aforementioned functions
+** to retrieve the collation sequence set by this opcode is not available
+** publicly, only to user functions defined in func.c.
+*/
+case OP_CollSeq: {
+  assert( pOp->p4type==P4_COLLSEQ );
+  if( pOp->p1 ){
+    sqlite3VdbeMemSetInt64(&aMem[pOp->p1], 0);
+  }
+  break;
+}
+
+/* Opcode: Function P1 P2 P3 P4 P5
+** Synopsis: r[P3]=func(r[P2@P5])
+**
+** Invoke a user function (P4 is a pointer to a Function structure that
+** defines the function) with P5 arguments taken from register P2 and
+** successors.  The result of the function is stored in register P3.
+** Register P3 must not be one of the function inputs.
+**
+** P1 is a 32-bit bitmask indicating whether or not each argument to the 
+** function was determined to be constant at compile time. If the first
+** argument was constant then bit 0 of P1 is set. This is used to determine
+** whether meta data associated with a user function argument using the
+** sqlite3_set_auxdata() API may be safely retained until the next
+** invocation of this opcode.
+**
+** See also: AggStep and AggFinal
+*/
+case OP_Function: {
+  int i;
+  Mem *pArg;
+  sqlite3_context ctx;
+  sqlite3_value **apVal;
+  int n;
+
+  n = pOp->p5;
+  apVal = p->apArg;
+  assert( apVal || n==0 );
+  assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
+  ctx.pOut = &aMem[pOp->p3];
+  memAboutToChange(p, ctx.pOut);
+
+  assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) );
+  assert( pOp->p3<pOp->p2 || pOp->p3>=pOp->p2+n );
+  pArg = &aMem[pOp->p2];
+  for(i=0; i<n; i++, pArg++){
+    assert( memIsValid(pArg) );
+    apVal[i] = pArg;
+    Deephemeralize(pArg);
+    REGISTER_TRACE(pOp->p2+i, pArg);
+  }
+
+  assert( pOp->p4type==P4_FUNCDEF );
+  ctx.pFunc = pOp->p4.pFunc;
+  ctx.iOp = pc;
+  ctx.pVdbe = p;
+  MemSetTypeFlag(ctx.pOut, MEM_Null);
+  ctx.fErrorOrAux = 0;
+  db->lastRowid = lastRowid;
+  (*ctx.pFunc->xFunc)(&ctx, n, apVal); /* IMP: R-24505-23230 */
+  lastRowid = db->lastRowid;  /* Remember rowid changes made by xFunc */
+
+  /* If the function returned an error, throw an exception */
+  if( ctx.fErrorOrAux ){
+    if( ctx.isError ){
+      sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(ctx.pOut));
+      rc = ctx.isError;
+    }
+    sqlite3VdbeDeleteAuxData(p, pc, pOp->p1);
+  }
+
+  /* Copy the result of the function into register P3 */
+  sqlite3VdbeChangeEncoding(ctx.pOut, encoding);
+  if( sqlite3VdbeMemTooBig(ctx.pOut) ){
+    goto too_big;
+  }
+
+  REGISTER_TRACE(pOp->p3, ctx.pOut);
+  UPDATE_MAX_BLOBSIZE(ctx.pOut);
+  break;
+}
+
+/* Opcode: BitAnd P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P1]&r[P2]
+**
+** Take the bit-wise AND of the values in register P1 and P2 and
+** store the result in register P3.
+** If either input is NULL, the result is NULL.
+*/
+/* Opcode: BitOr P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P1]|r[P2]
+**
+** Take the bit-wise OR of the values in register P1 and P2 and
+** store the result in register P3.
+** If either input is NULL, the result is NULL.
+*/
+/* Opcode: ShiftLeft P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P2]<<r[P1]
+**
+** Shift the integer value in register P2 to the left by the
+** number of bits specified by the integer in register P1.
+** Store the result in register P3.
+** If either input is NULL, the result is NULL.
+*/
+/* Opcode: ShiftRight P1 P2 P3 * *
+** Synopsis:  r[P3]=r[P2]>>r[P1]
+**
+** Shift the integer value in register P2 to the right by the
+** number of bits specified by the integer in register P1.
+** Store the result in register P3.
+** If either input is NULL, the result is NULL.
+*/
+case OP_BitAnd:                 /* same as TK_BITAND, in1, in2, out3 */
+case OP_BitOr:                  /* same as TK_BITOR, in1, in2, out3 */
+case OP_ShiftLeft:              /* same as TK_LSHIFT, in1, in2, out3 */
+case OP_ShiftRight: {           /* same as TK_RSHIFT, in1, in2, out3 */
+  i64 iA;
+  u64 uA;
+  i64 iB;
+  u8 op;
+
+  pIn1 = &aMem[pOp->p1];
+  pIn2 = &aMem[pOp->p2];
+  pOut = &aMem[pOp->p3];
+  if( (pIn1->flags | pIn2->flags) & MEM_Null ){
+    sqlite3VdbeMemSetNull(pOut);
+    break;
+  }
+  iA = sqlite3VdbeIntValue(pIn2);
+  iB = sqlite3VdbeIntValue(pIn1);
+  op = pOp->opcode;
+  if( op==OP_BitAnd ){
+    iA &= iB;
+  }else if( op==OP_BitOr ){
+    iA |= iB;
+  }else if( iB!=0 ){
+    assert( op==OP_ShiftRight || op==OP_ShiftLeft );
+
+    /* If shifting by a negative amount, shift in the other direction */
+    if( iB<0 ){
+      assert( OP_ShiftRight==OP_ShiftLeft+1 );
+      op = 2*OP_ShiftLeft + 1 - op;
+      iB = iB>(-64) ? -iB : 64;
+    }
+
+    if( iB>=64 ){
+      iA = (iA>=0 || op==OP_ShiftLeft) ? 0 : -1;
+    }else{
+      memcpy(&uA, &iA, sizeof(uA));
+      if( op==OP_ShiftLeft ){
+        uA <<= iB;
+      }else{
+        uA >>= iB;
+        /* Sign-extend on a right shift of a negative number */
+        if( iA<0 ) uA |= ((((u64)0xffffffff)<<32)|0xffffffff) << (64-iB);
+      }
+      memcpy(&iA, &uA, sizeof(iA));
+    }
+  }
+  pOut->u.i = iA;
+  MemSetTypeFlag(pOut, MEM_Int);
+  break;
+}
+
+/* Opcode: AddImm  P1 P2 * * *
+** Synopsis:  r[P1]=r[P1]+P2
+** 
+** Add the constant P2 to the value in register P1.
+** The result is always an integer.
+**
+** To force any register to be an integer, just add 0.
+*/
+case OP_AddImm: {            /* in1 */
+  pIn1 = &aMem[pOp->p1];
+  memAboutToChange(p, pIn1);
+  sqlite3VdbeMemIntegerify(pIn1);
+  pIn1->u.i += pOp->p2;
+  break;
+}
+
+/* Opcode: MustBeInt P1 P2 * * *
+** 
+** Force the value in register P1 to be an integer.  If the value
+** in P1 is not an integer and cannot be converted into an integer
+** without data loss, then jump immediately to P2, or if P2==0
+** raise an SQLITE_MISMATCH exception.
+*/
+case OP_MustBeInt: {            /* jump, in1 */
+  pIn1 = &aMem[pOp->p1];
+  if( (pIn1->flags & MEM_Int)==0 ){
+    applyAffinity(pIn1, SQLITE_AFF_NUMERIC, encoding);
+    VdbeBranchTaken((pIn1->flags&MEM_Int)==0, 2);
+    if( (pIn1->flags & MEM_Int)==0 ){
+      if( pOp->p2==0 ){
+        rc = SQLITE_MISMATCH;
+        goto abort_due_to_error;
+      }else{
+        pc = pOp->p2 - 1;
+        break;
+      }
+    }
+  }
+  MemSetTypeFlag(pIn1, MEM_Int);
+  break;
+}
+
+#ifndef SQLITE_OMIT_FLOATING_POINT
+/* Opcode: RealAffinity P1 * * * *
+**
+** If register P1 holds an integer convert it to a real value.
+**
+** This opcode is used when extracting information from a column that
+** has REAL affinity.  Such column values may still be stored as
+** integers, for space efficiency, but after extraction we want them
+** to have only a real value.
+*/
+case OP_RealAffinity: {                  /* in1 */
+  pIn1 = &aMem[pOp->p1];
+  if( pIn1->flags & MEM_Int ){
+    sqlite3VdbeMemRealify(pIn1);
+  }
+  break;
+}
+#endif
+
+#ifndef SQLITE_OMIT_CAST
+/* Opcode: Cast P1 P2 * * *
+** Synopsis: affinity(r[P1])
+**
+** Force the value in register P1 to be the type defined by P2.
+** 
+** <ul>
+** <li value="97"> TEXT
+** <li value="98"> BLOB
+** <li value="99"> NUMERIC
+** <li value="100"> INTEGER
+** <li value="101"> REAL
+** </ul>
+**
+** A NULL value is not changed by this routine.  It remains NULL.
+*/
+case OP_Cast: {                  /* in1 */
+  assert( pOp->p2>=SQLITE_AFF_NONE && pOp->p2<=SQLITE_AFF_REAL );
+  testcase( pOp->p2==SQLITE_AFF_TEXT );
+  testcase( pOp->p2==SQLITE_AFF_NONE );
+  testcase( pOp->p2==SQLITE_AFF_NUMERIC );
+  testcase( pOp->p2==SQLITE_AFF_INTEGER );
+  testcase( pOp->p2==SQLITE_AFF_REAL );
+  pIn1 = &aMem[pOp->p1];
+  memAboutToChange(p, pIn1);
+  rc = ExpandBlob(pIn1);
+  sqlite3VdbeMemCast(pIn1, pOp->p2, encoding);
+  UPDATE_MAX_BLOBSIZE(pIn1);
+  break;
+}
+#endif /* SQLITE_OMIT_CAST */
+
+/* Opcode: Lt P1 P2 P3 P4 P5
+** Synopsis: if r[P1]<r[P3] goto P2
+**
+** Compare the values in register P1 and P3.  If reg(P3)<reg(P1) then
+** jump to address P2.  
+**
+** If the SQLITE_JUMPIFNULL bit of P5 is set and either reg(P1) or
+** reg(P3) is NULL then take the jump.  If the SQLITE_JUMPIFNULL 
+** bit is clear then fall through if either operand is NULL.
+**
+** The SQLITE_AFF_MASK portion of P5 must be an affinity character -
+** SQLITE_AFF_TEXT, SQLITE_AFF_INTEGER, and so forth. An attempt is made 
+** to coerce both inputs according to this affinity before the
+** comparison is made. If the SQLITE_AFF_MASK is 0x00, then numeric
+** affinity is used. Note that the affinity conversions are stored
+** back into the input registers P1 and P3.  So this opcode can cause
+** persistent changes to registers P1 and P3.
+**
+** Once any conversions have taken place, and neither value is NULL, 
+** the values are compared. If both values are blobs then memcmp() is
+** used to determine the results of the comparison.  If both values
+** are text, then the appropriate collating function specified in
+** P4 is  used to do the comparison.  If P4 is not specified then
+** memcmp() is used to compare text string.  If both values are
+** numeric, then a numeric comparison is used. If the two values
+** are of different types, then numbers are considered less than
+** strings and strings are considered less than blobs.
+**
+** If the SQLITE_STOREP2 bit of P5 is set, then do not jump.  Instead,
+** store a boolean result (either 0, or 1, or NULL) in register P2.
+**
+** If the SQLITE_NULLEQ bit is set in P5, then NULL values are considered
+** equal to one another, provided that they do not have their MEM_Cleared
+** bit set.
+*/
+/* Opcode: Ne P1 P2 P3 P4 P5
+** Synopsis: if r[P1]!=r[P3] goto P2
+**
+** This works just like the Lt opcode except that the jump is taken if
+** the operands in registers P1 and P3 are not equal.  See the Lt opcode for
+** additional information.
+**
+** If SQLITE_NULLEQ is set in P5 then the result of comparison is always either
+** true or false and is never NULL.  If both operands are NULL then the result
+** of comparison is false.  If either operand is NULL then the result is true.
+** If neither operand is NULL the result is the same as it would be if
+** the SQLITE_NULLEQ flag were omitted from P5.
+*/
+/* Opcode: Eq P1 P2 P3 P4 P5
+** Synopsis: if r[P1]==r[P3] goto P2
+**
+** This works just like the Lt opcode except that the jump is taken if
+** the operands in registers P1 and P3 are equal.
+** See the Lt opcode for additional information.
+**
+** If SQLITE_NULLEQ is set in P5 then the result of comparison is always either
+** true or false and is never NULL.  If both operands are NULL then the result
+** of comparison is true.  If either operand is NULL then the result is false.
+** If neither operand is NULL the result is the same as it would be if
+** the SQLITE_NULLEQ flag were omitted from P5.
+*/
+/* Opcode: Le P1 P2 P3 P4 P5
+** Synopsis: if r[P1]<=r[P3] goto P2
+**
+** This works just like the Lt opcode except that the jump is taken if
+** the content of register P3 is less than or equal to the content of
+** register P1.  See the Lt opcode for additional information.
+*/
+/* Opcode: Gt P1 P2 P3 P4 P5
+** Synopsis: if r[P1]>r[P3] goto P2
+**
+** This works just like the Lt opcode except that the jump is taken if
+** the content of register P3 is greater than the content of
+** register P1.  See the Lt opcode for additional information.
+*/
+/* Opcode: Ge P1 P2 P3 P4 P5
+** Synopsis: if r[P1]>=r[P3] goto P2
+**
+** This works just like the Lt opcode except that the jump is taken if
+** the content of register P3 is greater than or equal to the content of
+** register P1.  See the Lt opcode for additional information.
+*/
+case OP_Eq:               /* same as TK_EQ, jump, in1, in3 */
+case OP_Ne:               /* same as TK_NE, jump, in1, in3 */
+case OP_Lt:               /* same as TK_LT, jump, in1, in3 */
+case OP_Le:               /* same as TK_LE, jump, in1, in3 */
+case OP_Gt:               /* same as TK_GT, jump, in1, in3 */
+case OP_Ge: {             /* same as TK_GE, jump, in1, in3 */
+  int res;            /* Result of the comparison of pIn1 against pIn3 */
+  char affinity;      /* Affinity to use for comparison */
+  u16 flags1;         /* Copy of initial value of pIn1->flags */
+  u16 flags3;         /* Copy of initial value of pIn3->flags */
+
+  pIn1 = &aMem[pOp->p1];
+  pIn3 = &aMem[pOp->p3];
+  flags1 = pIn1->flags;
+  flags3 = pIn3->flags;
+  if( (flags1 | flags3)&MEM_Null ){
+    /* One or both operands are NULL */
+    if( pOp->p5 & SQLITE_NULLEQ ){
+      /* If SQLITE_NULLEQ is set (which will only happen if the operator is
+      ** OP_Eq or OP_Ne) then take the jump or not depending on whether
+      ** or not both operands are null.
+      */
+      assert( pOp->opcode==OP_Eq || pOp->opcode==OP_Ne );
+      assert( (flags1 & MEM_Cleared)==0 );
+      assert( (pOp->p5 & SQLITE_JUMPIFNULL)==0 );
+      if( (flags1&MEM_Null)!=0
+       && (flags3&MEM_Null)!=0
+       && (flags3&MEM_Cleared)==0
+      ){
+        res = 0;  /* Results are equal */
+      }else{
+        res = 1;  /* Results are not equal */
+      }
+    }else{
+      /* SQLITE_NULLEQ is clear and at least one operand is NULL,
+      ** then the result is always NULL.
+      ** The jump is taken if the SQLITE_JUMPIFNULL bit is set.
+      */
+      if( pOp->p5 & SQLITE_STOREP2 ){
+        pOut = &aMem[pOp->p2];
+        MemSetTypeFlag(pOut, MEM_Null);
+        REGISTER_TRACE(pOp->p2, pOut);
+      }else{
+        VdbeBranchTaken(2,3);
+        if( pOp->p5 & SQLITE_JUMPIFNULL ){
+          pc = pOp->p2-1;
+        }
+      }
+      break;
+    }
+  }else{
+    /* Neither operand is NULL.  Do a comparison. */
+    affinity = pOp->p5 & SQLITE_AFF_MASK;
+    if( affinity>=SQLITE_AFF_NUMERIC ){
+      if( (pIn1->flags & (MEM_Int|MEM_Real|MEM_Str))==MEM_Str ){
+        applyNumericAffinity(pIn1,0);
+      }
+      if( (pIn3->flags & (MEM_Int|MEM_Real|MEM_Str))==MEM_Str ){
+        applyNumericAffinity(pIn3,0);
+      }
+    }else if( affinity==SQLITE_AFF_TEXT ){
+      if( (pIn1->flags & MEM_Str)==0 && (pIn1->flags & (MEM_Int|MEM_Real))!=0 ){
+        testcase( pIn1->flags & MEM_Int );
+        testcase( pIn1->flags & MEM_Real );
+        sqlite3VdbeMemStringify(pIn1, encoding, 1);
+      }
+      if( (pIn3->flags & MEM_Str)==0 && (pIn3->flags & (MEM_Int|MEM_Real))!=0 ){
+        testcase( pIn3->flags & MEM_Int );
+        testcase( pIn3->flags & MEM_Real );
+        sqlite3VdbeMemStringify(pIn3, encoding, 1);
+      }
+    }
+    assert( pOp->p4type==P4_COLLSEQ || pOp->p4.pColl==0 );
+    if( pIn1->flags & MEM_Zero ){
+      sqlite3VdbeMemExpandBlob(pIn1);
+      flags1 &= ~MEM_Zero;
+    }
+    if( pIn3->flags & MEM_Zero ){
+      sqlite3VdbeMemExpandBlob(pIn3);
+      flags3 &= ~MEM_Zero;
+    }
+    if( db->mallocFailed ) goto no_mem;
+    res = sqlite3MemCompare(pIn3, pIn1, pOp->p4.pColl);
+  }
+  switch( pOp->opcode ){
+    case OP_Eq:    res = res==0;     break;
+    case OP_Ne:    res = res!=0;     break;
+    case OP_Lt:    res = res<0;      break;
+    case OP_Le:    res = res<=0;     break;
+    case OP_Gt:    res = res>0;      break;
+    default:       res = res>=0;     break;
+  }
+
+  if( pOp->p5 & SQLITE_STOREP2 ){
+    pOut = &aMem[pOp->p2];
+    memAboutToChange(p, pOut);
+    MemSetTypeFlag(pOut, MEM_Int);
+    pOut->u.i = res;
+    REGISTER_TRACE(pOp->p2, pOut);
+  }else{
+    VdbeBranchTaken(res!=0, (pOp->p5 & SQLITE_NULLEQ)?2:3);
+    if( res ){
+      pc = pOp->p2-1;
+    }
+  }
+  /* Undo any changes made by applyAffinity() to the input registers. */
+  pIn1->flags = flags1;
+  pIn3->flags = flags3;
+  break;
+}
+
+/* Opcode: Permutation * * * P4 *
+**
+** Set the permutation used by the OP_Compare operator to be the array
+** of integers in P4.
+**
+** The permutation is only valid until the next OP_Compare that has
+** the OPFLAG_PERMUTE bit set in P5. Typically the OP_Permutation should 
+** occur immediately prior to the OP_Compare.
+*/
+case OP_Permutation: {
+  assert( pOp->p4type==P4_INTARRAY );
+  assert( pOp->p4.ai );
+  aPermute = pOp->p4.ai;
+  break;
+}
+
+/* Opcode: Compare P1 P2 P3 P4 P5
+** Synopsis: r[P1@P3] <-> r[P2@P3]
+**
+** Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this
+** vector "A") and in reg(P2)..reg(P2+P3-1) ("B").  Save the result of
+** the comparison for use by the next OP_Jump instruct.
+**
+** If P5 has the OPFLAG_PERMUTE bit set, then the order of comparison is
+** determined by the most recent OP_Permutation operator.  If the
+** OPFLAG_PERMUTE bit is clear, then register are compared in sequential
+** order.
+**
+** P4 is a KeyInfo structure that defines collating sequences and sort
+** orders for the comparison.  The permutation applies to registers
+** only.  The KeyInfo elements are used sequentially.
+**
+** The comparison is a sort comparison, so NULLs compare equal,
+** NULLs are less than numbers, numbers are less than strings,
+** and strings are less than blobs.
+*/
+case OP_Compare: {
+  int n;
+  int i;
+  int p1;
+  int p2;
+  const KeyInfo *pKeyInfo;
+  int idx;
+  CollSeq *pColl;    /* Collating sequence to use on this term */
+  int bRev;          /* True for DESCENDING sort order */
+
+  if( (pOp->p5 & OPFLAG_PERMUTE)==0 ) aPermute = 0;
+  n = pOp->p3;
+  pKeyInfo = pOp->p4.pKeyInfo;
+  assert( n>0 );
+  assert( pKeyInfo!=0 );
+  p1 = pOp->p1;
+  p2 = pOp->p2;
+#if SQLITE_DEBUG
+  if( aPermute ){
+    int k, mx = 0;
+    for(k=0; k<n; k++) if( aPermute[k]>mx ) mx = aPermute[k];
+    assert( p1>0 && p1+mx<=(p->nMem-p->nCursor)+1 );
+    assert( p2>0 && p2+mx<=(p->nMem-p->nCursor)+1 );
+  }else{
+    assert( p1>0 && p1+n<=(p->nMem-p->nCursor)+1 );
+    assert( p2>0 && p2+n<=(p->nMem-p->nCursor)+1 );
+  }
+#endif /* SQLITE_DEBUG */
+  for(i=0; i<n; i++){
+    idx = aPermute ? aPermute[i] : i;
+    assert( memIsValid(&aMem[p1+idx]) );
+    assert( memIsValid(&aMem[p2+idx]) );
+    REGISTER_TRACE(p1+idx, &aMem[p1+idx]);
+    REGISTER_TRACE(p2+idx, &aMem[p2+idx]);
+    assert( i<pKeyInfo->nField );
+    pColl = pKeyInfo->aColl[i];
+    bRev = pKeyInfo->aSortOrder[i];
+    iCompare = sqlite3MemCompare(&aMem[p1+idx], &aMem[p2+idx], pColl);
+    if( iCompare ){
+      if( bRev ) iCompare = -iCompare;
+      break;
+    }
+  }
+  aPermute = 0;
+  break;
+}
+
+/* Opcode: Jump P1 P2 P3 * *
+**
+** Jump to the instruction at address P1, P2, or P3 depending on whether
+** in the most recent OP_Compare instruction the P1 vector was less than
+** equal to, or greater than the P2 vector, respectively.
+*/
+case OP_Jump: {             /* jump */
+  if( iCompare<0 ){
+    pc = pOp->p1 - 1;  VdbeBranchTaken(0,3);
+  }else if( iCompare==0 ){
+    pc = pOp->p2 - 1;  VdbeBranchTaken(1,3);
+  }else{
+    pc = pOp->p3 - 1;  VdbeBranchTaken(2,3);
+  }
+  break;
+}
+
+/* Opcode: And P1 P2 P3 * *
+** Synopsis: r[P3]=(r[P1] && r[P2])
+**
+** Take the logical AND of the values in registers P1 and P2 and
+** write the result into register P3.
+**
+** If either P1 or P2 is 0 (false) then the result is 0 even if
+** the other input is NULL.  A NULL and true or two NULLs give
+** a NULL output.
+*/
+/* Opcode: Or P1 P2 P3 * *
+** Synopsis: r[P3]=(r[P1] || r[P2])
+**
+** Take the logical OR of the values in register P1 and P2 and
+** store the answer in register P3.
+**
+** If either P1 or P2 is nonzero (true) then the result is 1 (true)
+** even if the other input is NULL.  A NULL and false or two NULLs
+** give a NULL output.
+*/
+case OP_And:              /* same as TK_AND, in1, in2, out3 */
+case OP_Or: {             /* same as TK_OR, in1, in2, out3 */
+  int v1;    /* Left operand:  0==FALSE, 1==TRUE, 2==UNKNOWN or NULL */
+  int v2;    /* Right operand: 0==FALSE, 1==TRUE, 2==UNKNOWN or NULL */
+
+  pIn1 = &aMem[pOp->p1];
+  if( pIn1->flags & MEM_Null ){
+    v1 = 2;
+  }else{
+    v1 = sqlite3VdbeIntValue(pIn1)!=0;
+  }
+  pIn2 = &aMem[pOp->p2];
+  if( pIn2->flags & MEM_Null ){
+    v2 = 2;
+  }else{
+    v2 = sqlite3VdbeIntValue(pIn2)!=0;
+  }
+  if( pOp->opcode==OP_And ){
+    static const unsigned char and_logic[] = { 0, 0, 0, 0, 1, 2, 0, 2, 2 };
+    v1 = and_logic[v1*3+v2];
+  }else{
+    static const unsigned char or_logic[] = { 0, 1, 2, 1, 1, 1, 2, 1, 2 };
+    v1 = or_logic[v1*3+v2];
+  }
+  pOut = &aMem[pOp->p3];
+  if( v1==2 ){
+    MemSetTypeFlag(pOut, MEM_Null);
+  }else{
+    pOut->u.i = v1;
+    MemSetTypeFlag(pOut, MEM_Int);
+  }
+  break;
+}
+
+/* Opcode: Not P1 P2 * * *
+** Synopsis: r[P2]= !r[P1]
+**
+** Interpret the value in register P1 as a boolean value.  Store the
+** boolean complement in register P2.  If the value in register P1 is 
+** NULL, then a NULL is stored in P2.
+*/
+case OP_Not: {                /* same as TK_NOT, in1, out2 */
+  pIn1 = &aMem[pOp->p1];
+  pOut = &aMem[pOp->p2];
+  sqlite3VdbeMemSetNull(pOut);
+  if( (pIn1->flags & MEM_Null)==0 ){
+    pOut->flags = MEM_Int;
+    pOut->u.i = !sqlite3VdbeIntValue(pIn1);
+  }
+  break;
+}
+
+/* Opcode: BitNot P1 P2 * * *
+** Synopsis: r[P1]= ~r[P1]
+**
+** Interpret the content of register P1 as an integer.  Store the
+** ones-complement of the P1 value into register P2.  If P1 holds
+** a NULL then store a NULL in P2.
+*/
+case OP_BitNot: {             /* same as TK_BITNOT, in1, out2 */
+  pIn1 = &aMem[pOp->p1];
+  pOut = &aMem[pOp->p2];
+  sqlite3VdbeMemSetNull(pOut);
+  if( (pIn1->flags & MEM_Null)==0 ){
+    pOut->flags = MEM_Int;
+    pOut->u.i = ~sqlite3VdbeIntValue(pIn1);
+  }
+  break;
+}
+
+/* Opcode: Once P1 P2 * * *
+**
+** Check the "once" flag number P1. If it is set, jump to instruction P2. 
+** Otherwise, set the flag and fall through to the next instruction.
+** In other words, this opcode causes all following opcodes up through P2
+** (but not including P2) to run just once and to be skipped on subsequent
+** times through the loop.
+**
+** All "once" flags are initially cleared whenever a prepared statement
+** first begins to run.
+*/
+case OP_Once: {             /* jump */
+  assert( pOp->p1<p->nOnceFlag );
+  VdbeBranchTaken(p->aOnceFlag[pOp->p1]!=0, 2);
+  if( p->aOnceFlag[pOp->p1] ){
+    pc = pOp->p2-1;
+  }else{
+    p->aOnceFlag[pOp->p1] = 1;
+  }
+  break;
+}
+
+/* Opcode: If P1 P2 P3 * *
+**
+** Jump to P2 if the value in register P1 is true.  The value
+** is considered true if it is numeric and non-zero.  If the value
+** in P1 is NULL then take the jump if and only if P3 is non-zero.
+*/
+/* Opcode: IfNot P1 P2 P3 * *
+**
+** Jump to P2 if the value in register P1 is False.  The value
+** is considered false if it has a numeric value of zero.  If the value
+** in P1 is NULL then take the jump if and only if P3 is non-zero.
+*/
+case OP_If:                 /* jump, in1 */
+case OP_IfNot: {            /* jump, in1 */
+  int c;
+  pIn1 = &aMem[pOp->p1];
+  if( pIn1->flags & MEM_Null ){
+    c = pOp->p3;
+  }else{
+#ifdef SQLITE_OMIT_FLOATING_POINT
+    c = sqlite3VdbeIntValue(pIn1)!=0;
+#else
+    c = sqlite3VdbeRealValue(pIn1)!=0.0;
+#endif
+    if( pOp->opcode==OP_IfNot ) c = !c;
+  }
+  VdbeBranchTaken(c!=0, 2);
+  if( c ){
+    pc = pOp->p2-1;
+  }
+  break;
+}
+
+/* Opcode: IsNull P1 P2 * * *
+** Synopsis:  if r[P1]==NULL goto P2
+**
+** Jump to P2 if the value in register P1 is NULL.
+*/
+case OP_IsNull: {            /* same as TK_ISNULL, jump, in1 */
+  pIn1 = &aMem[pOp->p1];
+  VdbeBranchTaken( (pIn1->flags & MEM_Null)!=0, 2);
+  if( (pIn1->flags & MEM_Null)!=0 ){
+    pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: NotNull P1 P2 * * *
+** Synopsis: if r[P1]!=NULL goto P2
+**
+** Jump to P2 if the value in register P1 is not NULL.  
+*/
+case OP_NotNull: {            /* same as TK_NOTNULL, jump, in1 */
+  pIn1 = &aMem[pOp->p1];
+  VdbeBranchTaken( (pIn1->flags & MEM_Null)==0, 2);
+  if( (pIn1->flags & MEM_Null)==0 ){
+    pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: Column P1 P2 P3 P4 P5
+** Synopsis:  r[P3]=PX
+**
+** Interpret the data that cursor P1 points to as a structure built using
+** the MakeRecord instruction.  (See the MakeRecord opcode for additional
+** information about the format of the data.)  Extract the P2-th column
+** from this record.  If there are less that (P2+1) 
+** values in the record, extract a NULL.
+**
+** The value extracted is stored in register P3.
+**
+** If the column contains fewer than P2 fields, then extract a NULL.  Or,
+** if the P4 argument is a P4_MEM use the value of the P4 argument as
+** the result.
+**
+** If the OPFLAG_CLEARCACHE bit is set on P5 and P1 is a pseudo-table cursor,
+** then the cache of the cursor is reset prior to extracting the column.
+** The first OP_Column against a pseudo-table after the value of the content
+** register has changed should have this bit set.
+**
+** If the OPFLAG_LENGTHARG and OPFLAG_TYPEOFARG bits are set on P5 when
+** the result is guaranteed to only be used as the argument of a length()
+** or typeof() function, respectively.  The loading of large blobs can be
+** skipped for length() and all content loading can be skipped for typeof().
+*/
+case OP_Column: {
+  i64 payloadSize64; /* Number of bytes in the record */
+  int p2;            /* column number to retrieve */
+  VdbeCursor *pC;    /* The VDBE cursor */
+  BtCursor *pCrsr;   /* The BTree cursor */
+  u32 *aOffset;      /* aOffset[i] is offset to start of data for i-th column */
+  int len;           /* The length of the serialized data for the column */
+  int i;             /* Loop counter */
+  Mem *pDest;        /* Where to write the extracted value */
+  Mem sMem;          /* For storing the record being decoded */
+  const u8 *zData;   /* Part of the record being decoded */
+  const u8 *zHdr;    /* Next unparsed byte of the header */
+  const u8 *zEndHdr; /* Pointer to first byte after the header */
+  u32 offset;        /* Offset into the data */
+  u32 szField;       /* Number of bytes in the content of a field */
+  u32 avail;         /* Number of bytes of available data */
+  u32 t;             /* A type code from the record header */
+  u16 fx;            /* pDest->flags value */
+  Mem *pReg;         /* PseudoTable input register */
+
+  p2 = pOp->p2;
+  assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
+  pDest = &aMem[pOp->p3];
+  memAboutToChange(p, pDest);
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( p2<pC->nField );
+  aOffset = pC->aOffset;
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  assert( pC->pVtabCursor==0 ); /* OP_Column never called on virtual table */
+#endif
+  pCrsr = pC->pCursor;
+  assert( pCrsr!=0 || pC->pseudoTableReg>0 ); /* pCrsr NULL on PseudoTables */
+  assert( pCrsr!=0 || pC->nullRow );          /* pC->nullRow on PseudoTables */
+
+  /* If the cursor cache is stale, bring it up-to-date */
+  rc = sqlite3VdbeCursorMoveto(pC);
+  if( rc ) goto abort_due_to_error;
+  if( pC->cacheStatus!=p->cacheCtr ){
+    if( pC->nullRow ){
+      if( pCrsr==0 ){
+        assert( pC->pseudoTableReg>0 );
+        pReg = &aMem[pC->pseudoTableReg];
+        assert( pReg->flags & MEM_Blob );
+        assert( memIsValid(pReg) );
+        pC->payloadSize = pC->szRow = avail = pReg->n;
+        pC->aRow = (u8*)pReg->z;
+      }else{
+        sqlite3VdbeMemSetNull(pDest);
+        goto op_column_out;
+      }
+    }else{
+      assert( pCrsr );
+      if( pC->isTable==0 ){
+        assert( sqlite3BtreeCursorIsValid(pCrsr) );
+        VVA_ONLY(rc =) sqlite3BtreeKeySize(pCrsr, &payloadSize64);
+        assert( rc==SQLITE_OK ); /* True because of CursorMoveto() call above */
+        /* sqlite3BtreeParseCellPtr() uses getVarint32() to extract the
+        ** payload size, so it is impossible for payloadSize64 to be
+        ** larger than 32 bits. */
+        assert( (payloadSize64 & SQLITE_MAX_U32)==(u64)payloadSize64 );
+        pC->aRow = sqlite3BtreeKeyFetch(pCrsr, &avail);
+        pC->payloadSize = (u32)payloadSize64;
+      }else{
+        assert( sqlite3BtreeCursorIsValid(pCrsr) );
+        VVA_ONLY(rc =) sqlite3BtreeDataSize(pCrsr, &pC->payloadSize);
+        assert( rc==SQLITE_OK );   /* DataSize() cannot fail */
+        pC->aRow = sqlite3BtreeDataFetch(pCrsr, &avail);
+      }
+      assert( avail<=65536 );  /* Maximum page size is 64KiB */
+      if( pC->payloadSize <= (u32)avail ){
+        pC->szRow = pC->payloadSize;
+      }else{
+        pC->szRow = avail;
+      }
+      if( pC->payloadSize > (u32)db->aLimit[SQLITE_LIMIT_LENGTH] ){
+        goto too_big;
+      }
+    }
+    pC->cacheStatus = p->cacheCtr;
+    pC->iHdrOffset = getVarint32(pC->aRow, offset);
+    pC->nHdrParsed = 0;
+    aOffset[0] = offset;
+
+    /* Make sure a corrupt database has not given us an oversize header.
+    ** Do this now to avoid an oversize memory allocation.
+    **
+    ** Type entries can be between 1 and 5 bytes each.  But 4 and 5 byte
+    ** types use so much data space that there can only be 4096 and 32 of
+    ** them, respectively.  So the maximum header length results from a
+    ** 3-byte type for each of the maximum of 32768 columns plus three
+    ** extra bytes for the header length itself.  32768*3 + 3 = 98307.
+    */
+    if( offset > 98307 || offset > pC->payloadSize ){
+      rc = SQLITE_CORRUPT_BKPT;
+      goto op_column_error;
+    }
+
+    if( avail<offset ){
+      /* pC->aRow does not have to hold the entire row, but it does at least
+      ** need to cover the header of the record.  If pC->aRow does not contain
+      ** the complete header, then set it to zero, forcing the header to be
+      ** dynamically allocated. */
+      pC->aRow = 0;
+      pC->szRow = 0;
+    }
+
+    /* The following goto is an optimization.  It can be omitted and
+    ** everything will still work.  But OP_Column is measurably faster
+    ** by skipping the subsequent conditional, which is always true.
+    */
+    assert( pC->nHdrParsed<=p2 );         /* Conditional skipped */
+    goto op_column_read_header;
+  }
+
+  /* Make sure at least the first p2+1 entries of the header have been
+  ** parsed and valid information is in aOffset[] and pC->aType[].
+  */
+  if( pC->nHdrParsed<=p2 ){
+    /* If there is more header available for parsing in the record, try
+    ** to extract additional fields up through the p2+1-th field 
+    */
+    op_column_read_header:
+    if( pC->iHdrOffset<aOffset[0] ){
+      /* Make sure zData points to enough of the record to cover the header. */
+      if( pC->aRow==0 ){
+        memset(&sMem, 0, sizeof(sMem));
+        rc = sqlite3VdbeMemFromBtree(pCrsr, 0, aOffset[0], 
+                                     !pC->isTable, &sMem);
+        if( rc!=SQLITE_OK ){
+          goto op_column_error;
+        }
+        zData = (u8*)sMem.z;
+      }else{
+        zData = pC->aRow;
+      }
+  
+      /* Fill in pC->aType[i] and aOffset[i] values through the p2-th field. */
+      i = pC->nHdrParsed;
+      offset = aOffset[i];
+      zHdr = zData + pC->iHdrOffset;
+      zEndHdr = zData + aOffset[0];
+      assert( i<=p2 && zHdr<zEndHdr );
+      do{
+        if( zHdr[0]<0x80 ){
+          t = zHdr[0];
+          zHdr++;
+        }else{
+          zHdr += sqlite3GetVarint32(zHdr, &t);
+        }
+        pC->aType[i] = t;
+        szField = sqlite3VdbeSerialTypeLen(t);
+        offset += szField;
+        if( offset<szField ){  /* True if offset overflows */
+          zHdr = &zEndHdr[1];  /* Forces SQLITE_CORRUPT return below */
+          break;
+        }
+        i++;
+        aOffset[i] = offset;
+      }while( i<=p2 && zHdr<zEndHdr );
+      pC->nHdrParsed = i;
+      pC->iHdrOffset = (u32)(zHdr - zData);
+      if( pC->aRow==0 ){
+        sqlite3VdbeMemRelease(&sMem);
+        sMem.flags = MEM_Null;
+      }
+  
+      /* The record is corrupt if any of the following are true:
+      ** (1) the bytes of the header extend past the declared header size
+      **          (zHdr>zEndHdr)
+      ** (2) the entire header was used but not all data was used
+      **          (zHdr==zEndHdr && offset!=pC->payloadSize)
+      ** (3) the end of the data extends beyond the end of the record.
+      **          (offset > pC->payloadSize)
+      */
+      if( (zHdr>=zEndHdr && (zHdr>zEndHdr || offset!=pC->payloadSize))
+       || (offset > pC->payloadSize)
+      ){
+        rc = SQLITE_CORRUPT_BKPT;
+        goto op_column_error;
+      }
+    }
+
+    /* If after trying to extra new entries from the header, nHdrParsed is
+    ** still not up to p2, that means that the record has fewer than p2
+    ** columns.  So the result will be either the default value or a NULL.
+    */
+    if( pC->nHdrParsed<=p2 ){
+      if( pOp->p4type==P4_MEM ){
+        sqlite3VdbeMemShallowCopy(pDest, pOp->p4.pMem, MEM_Static);
+      }else{
+        sqlite3VdbeMemSetNull(pDest);
+      }
+      goto op_column_out;
+    }
+  }
+
+  /* Extract the content for the p2+1-th column.  Control can only
+  ** reach this point if aOffset[p2], aOffset[p2+1], and pC->aType[p2] are
+  ** all valid.
+  */
+  assert( p2<pC->nHdrParsed );
+  assert( rc==SQLITE_OK );
+  assert( sqlite3VdbeCheckMemInvariants(pDest) );
+  if( VdbeMemDynamic(pDest) ) sqlite3VdbeMemSetNull(pDest);
+  t = pC->aType[p2];
+  if( pC->szRow>=aOffset[p2+1] ){
+    /* This is the common case where the desired content fits on the original
+    ** page - where the content is not on an overflow page */
+    sqlite3VdbeSerialGet(pC->aRow+aOffset[p2], t, pDest);
+  }else{
+    /* This branch happens only when content is on overflow pages */
+    if( ((pOp->p5 & (OPFLAG_LENGTHARG|OPFLAG_TYPEOFARG))!=0
+          && ((t>=12 && (t&1)==0) || (pOp->p5 & OPFLAG_TYPEOFARG)!=0))
+     || (len = sqlite3VdbeSerialTypeLen(t))==0
+    ){
+      /* Content is irrelevant for
+      **    1. the typeof() function,
+      **    2. the length(X) function if X is a blob, and
+      **    3. if the content length is zero.
+      ** So we might as well use bogus content rather than reading
+      ** content from disk.  NULL will work for the value for strings
+      ** and blobs and whatever is in the payloadSize64 variable
+      ** will work for everything else. */
+      sqlite3VdbeSerialGet(t<=13 ? (u8*)&payloadSize64 : 0, t, pDest);
+    }else{
+      rc = sqlite3VdbeMemFromBtree(pCrsr, aOffset[p2], len, !pC->isTable,
+                                   pDest);
+      if( rc!=SQLITE_OK ){
+        goto op_column_error;
+      }
+      sqlite3VdbeSerialGet((const u8*)pDest->z, t, pDest);
+      pDest->flags &= ~MEM_Ephem;
+    }
+  }
+  pDest->enc = encoding;
+
+op_column_out:
+  /* If the column value is an ephemeral string, go ahead and persist
+  ** that string in case the cursor moves before the column value is
+  ** used.  The following code does the equivalent of Deephemeralize()
+  ** but does it faster. */
+  if( (pDest->flags & MEM_Ephem)!=0 && pDest->z ){
+    fx = pDest->flags & (MEM_Str|MEM_Blob);
+    assert( fx!=0 );
+    zData = (const u8*)pDest->z;
+    len = pDest->n;
+    if( sqlite3VdbeMemClearAndResize(pDest, len+2) ) goto no_mem;
+    memcpy(pDest->z, zData, len);
+    pDest->z[len] = 0;
+    pDest->z[len+1] = 0;
+    pDest->flags = fx|MEM_Term;
+  }
+op_column_error:
+  UPDATE_MAX_BLOBSIZE(pDest);
+  REGISTER_TRACE(pOp->p3, pDest);
+  break;
+}
+
+/* Opcode: Affinity P1 P2 * P4 *
+** Synopsis: affinity(r[P1@P2])
+**
+** Apply affinities to a range of P2 registers starting with P1.
+**
+** P4 is a string that is P2 characters long. The nth character of the
+** string indicates the column affinity that should be used for the nth
+** memory cell in the range.
+*/
+case OP_Affinity: {
+  const char *zAffinity;   /* The affinity to be applied */
+  char cAff;               /* A single character of affinity */
+
+  zAffinity = pOp->p4.z;
+  assert( zAffinity!=0 );
+  assert( zAffinity[pOp->p2]==0 );
+  pIn1 = &aMem[pOp->p1];
+  while( (cAff = *(zAffinity++))!=0 ){
+    assert( pIn1 <= &p->aMem[(p->nMem-p->nCursor)] );
+    assert( memIsValid(pIn1) );
+    applyAffinity(pIn1, cAff, encoding);
+    pIn1++;
+  }
+  break;
+}
+
+/* Opcode: MakeRecord P1 P2 P3 P4 *
+** Synopsis: r[P3]=mkrec(r[P1@P2])
+**
+** Convert P2 registers beginning with P1 into the [record format]
+** use as a data record in a database table or as a key
+** in an index.  The OP_Column opcode can decode the record later.
+**
+** P4 may be a string that is P2 characters long.  The nth character of the
+** string indicates the column affinity that should be used for the nth
+** field of the index key.
+**
+** The mapping from character to affinity is given by the SQLITE_AFF_
+** macros defined in sqliteInt.h.
+**
+** If P4 is NULL then all index fields have the affinity NONE.
+*/
+case OP_MakeRecord: {
+  u8 *zNewRecord;        /* A buffer to hold the data for the new record */
+  Mem *pRec;             /* The new record */
+  u64 nData;             /* Number of bytes of data space */
+  int nHdr;              /* Number of bytes of header space */
+  i64 nByte;             /* Data space required for this record */
+  int nZero;             /* Number of zero bytes at the end of the record */
+  int nVarint;           /* Number of bytes in a varint */
+  u32 serial_type;       /* Type field */
+  Mem *pData0;           /* First field to be combined into the record */
+  Mem *pLast;            /* Last field of the record */
+  int nField;            /* Number of fields in the record */
+  char *zAffinity;       /* The affinity string for the record */
+  int file_format;       /* File format to use for encoding */
+  int i;                 /* Space used in zNewRecord[] header */
+  int j;                 /* Space used in zNewRecord[] content */
+  int len;               /* Length of a field */
+
+  /* Assuming the record contains N fields, the record format looks
+  ** like this:
+  **
+  ** ------------------------------------------------------------------------
+  ** | hdr-size | type 0 | type 1 | ... | type N-1 | data0 | ... | data N-1 | 
+  ** ------------------------------------------------------------------------
+  **
+  ** Data(0) is taken from register P1.  Data(1) comes from register P1+1
+  ** and so forth.
+  **
+  ** Each type field is a varint representing the serial type of the 
+  ** corresponding data element (see sqlite3VdbeSerialType()). The
+  ** hdr-size field is also a varint which is the offset from the beginning
+  ** of the record to data0.
+  */
+  nData = 0;         /* Number of bytes of data space */
+  nHdr = 0;          /* Number of bytes of header space */
+  nZero = 0;         /* Number of zero bytes at the end of the record */
+  nField = pOp->p1;
+  zAffinity = pOp->p4.z;
+  assert( nField>0 && pOp->p2>0 && pOp->p2+nField<=(p->nMem-p->nCursor)+1 );
+  pData0 = &aMem[nField];
+  nField = pOp->p2;
+  pLast = &pData0[nField-1];
+  file_format = p->minWriteFileFormat;
+
+  /* Identify the output register */
+  assert( pOp->p3<pOp->p1 || pOp->p3>=pOp->p1+pOp->p2 );
+  pOut = &aMem[pOp->p3];
+  memAboutToChange(p, pOut);
+
+  /* Apply the requested affinity to all inputs
+  */
+  assert( pData0<=pLast );
+  if( zAffinity ){
+    pRec = pData0;
+    do{
+      applyAffinity(pRec++, *(zAffinity++), encoding);
+      assert( zAffinity[0]==0 || pRec<=pLast );
+    }while( zAffinity[0] );
+  }
+
+  /* Loop through the elements that will make up the record to figure
+  ** out how much space is required for the new record.
+  */
+  pRec = pLast;
+  do{
+    assert( memIsValid(pRec) );
+    pRec->uTemp = serial_type = sqlite3VdbeSerialType(pRec, file_format);
+    len = sqlite3VdbeSerialTypeLen(serial_type);
+    if( pRec->flags & MEM_Zero ){
+      if( nData ){
+        sqlite3VdbeMemExpandBlob(pRec);
+      }else{
+        nZero += pRec->u.nZero;
+        len -= pRec->u.nZero;
+      }
+    }
+    nData += len;
+    testcase( serial_type==127 );
+    testcase( serial_type==128 );
+    nHdr += serial_type<=127 ? 1 : sqlite3VarintLen(serial_type);
+  }while( (--pRec)>=pData0 );
+
+  /* EVIDENCE-OF: R-22564-11647 The header begins with a single varint
+  ** which determines the total number of bytes in the header. The varint
+  ** value is the size of the header in bytes including the size varint
+  ** itself. */
+  testcase( nHdr==126 );
+  testcase( nHdr==127 );
+  if( nHdr<=126 ){
+    /* The common case */
+    nHdr += 1;
+  }else{
+    /* Rare case of a really large header */
+    nVarint = sqlite3VarintLen(nHdr);
+    nHdr += nVarint;
+    if( nVarint<sqlite3VarintLen(nHdr) ) nHdr++;
+  }
+  nByte = nHdr+nData;
+  if( nByte>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+    goto too_big;
+  }
+
+  /* Make sure the output register has a buffer large enough to store 
+  ** the new record. The output register (pOp->p3) is not allowed to
+  ** be one of the input registers (because the following call to
+  ** sqlite3VdbeMemClearAndResize() could clobber the value before it is used).
+  */
+  if( sqlite3VdbeMemClearAndResize(pOut, (int)nByte) ){
+    goto no_mem;
+  }
+  zNewRecord = (u8 *)pOut->z;
+
+  /* Write the record */
+  i = putVarint32(zNewRecord, nHdr);
+  j = nHdr;
+  assert( pData0<=pLast );
+  pRec = pData0;
+  do{
+    serial_type = pRec->uTemp;
+    /* EVIDENCE-OF: R-06529-47362 Following the size varint are one or more
+    ** additional varints, one per column. */
+    i += putVarint32(&zNewRecord[i], serial_type);            /* serial type */
+    /* EVIDENCE-OF: R-64536-51728 The values for each column in the record
+    ** immediately follow the header. */
+    j += sqlite3VdbeSerialPut(&zNewRecord[j], pRec, serial_type); /* content */
+  }while( (++pRec)<=pLast );
+  assert( i==nHdr );
+  assert( j==nByte );
+
+  assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
+  pOut->n = (int)nByte;
+  pOut->flags = MEM_Blob;
+  if( nZero ){
+    pOut->u.nZero = nZero;
+    pOut->flags |= MEM_Zero;
+  }
+  pOut->enc = SQLITE_UTF8;  /* In case the blob is ever converted to text */
+  REGISTER_TRACE(pOp->p3, pOut);
+  UPDATE_MAX_BLOBSIZE(pOut);
+  break;
+}
+
+/* Opcode: Count P1 P2 * * *
+** Synopsis: r[P2]=count()
+**
+** Store the number of entries (an integer value) in the table or index 
+** opened by cursor P1 in register P2
+*/
+#ifndef SQLITE_OMIT_BTREECOUNT
+case OP_Count: {         /* out2-prerelease */
+  i64 nEntry;
+  BtCursor *pCrsr;
+
+  pCrsr = p->apCsr[pOp->p1]->pCursor;
+  assert( pCrsr );
+  nEntry = 0;  /* Not needed.  Only used to silence a warning. */
+  rc = sqlite3BtreeCount(pCrsr, &nEntry);
+  pOut->u.i = nEntry;
+  break;
+}
+#endif
+
+/* Opcode: Savepoint P1 * * P4 *
+**
+** Open, release or rollback the savepoint named by parameter P4, depending
+** on the value of P1. To open a new savepoint, P1==0. To release (commit) an
+** existing savepoint, P1==1, or to rollback an existing savepoint P1==2.
+*/
+case OP_Savepoint: {
+  int p1;                         /* Value of P1 operand */
+  char *zName;                    /* Name of savepoint */
+  int nName;
+  Savepoint *pNew;
+  Savepoint *pSavepoint;
+  Savepoint *pTmp;
+  int iSavepoint;
+  int ii;
+
+  p1 = pOp->p1;
+  zName = pOp->p4.z;
+
+  /* Assert that the p1 parameter is valid. Also that if there is no open
+  ** transaction, then there cannot be any savepoints. 
+  */
+  assert( db->pSavepoint==0 || db->autoCommit==0 );
+  assert( p1==SAVEPOINT_BEGIN||p1==SAVEPOINT_RELEASE||p1==SAVEPOINT_ROLLBACK );
+  assert( db->pSavepoint || db->isTransactionSavepoint==0 );
+  assert( checkSavepointCount(db) );
+  assert( p->bIsReader );
+
+  if( p1==SAVEPOINT_BEGIN ){
+    if( db->nVdbeWrite>0 ){
+      /* A new savepoint cannot be created if there are active write 
+      ** statements (i.e. open read/write incremental blob handles).
+      */
+      sqlite3SetString(&p->zErrMsg, db, "cannot open savepoint - "
+        "SQL statements in progress");
+      rc = SQLITE_BUSY;
+    }else{
+      nName = sqlite3Strlen30(zName);
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+      /* This call is Ok even if this savepoint is actually a transaction
+      ** savepoint (and therefore should not prompt xSavepoint()) callbacks.
+      ** If this is a transaction savepoint being opened, it is guaranteed
+      ** that the db->aVTrans[] array is empty.  */
+      assert( db->autoCommit==0 || db->nVTrans==0 );
+      rc = sqlite3VtabSavepoint(db, SAVEPOINT_BEGIN,
+                                db->nStatement+db->nSavepoint);
+      if( rc!=SQLITE_OK ) goto abort_due_to_error;
+#endif
+
+      /* Create a new savepoint structure. */
+      pNew = sqlite3DbMallocRaw(db, sizeof(Savepoint)+nName+1);
+      if( pNew ){
+        pNew->zName = (char *)&pNew[1];
+        memcpy(pNew->zName, zName, nName+1);
+    
+        /* If there is no open transaction, then mark this as a special
+        ** "transaction savepoint". */
+        if( db->autoCommit ){
+          db->autoCommit = 0;
+          db->isTransactionSavepoint = 1;
+        }else{
+          db->nSavepoint++;
+        }
+    
+        /* Link the new savepoint into the database handle's list. */
+        pNew->pNext = db->pSavepoint;
+        db->pSavepoint = pNew;
+        pNew->nDeferredCons = db->nDeferredCons;
+        pNew->nDeferredImmCons = db->nDeferredImmCons;
+      }
+    }
+  }else{
+    iSavepoint = 0;
+
+    /* Find the named savepoint. If there is no such savepoint, then an
+    ** an error is returned to the user.  */
+    for(
+      pSavepoint = db->pSavepoint; 
+      pSavepoint && sqlite3StrICmp(pSavepoint->zName, zName);
+      pSavepoint = pSavepoint->pNext
+    ){
+      iSavepoint++;
+    }
+    if( !pSavepoint ){
+      sqlite3SetString(&p->zErrMsg, db, "no such savepoint: %s", zName);
+      rc = SQLITE_ERROR;
+    }else if( db->nVdbeWrite>0 && p1==SAVEPOINT_RELEASE ){
+      /* It is not possible to release (commit) a savepoint if there are 
+      ** active write statements.
+      */
+      sqlite3SetString(&p->zErrMsg, db, 
+        "cannot release savepoint - SQL statements in progress"
+      );
+      rc = SQLITE_BUSY;
+    }else{
+
+      /* Determine whether or not this is a transaction savepoint. If so,
+      ** and this is a RELEASE command, then the current transaction 
+      ** is committed. 
+      */
+      int isTransaction = pSavepoint->pNext==0 && db->isTransactionSavepoint;
+      if( isTransaction && p1==SAVEPOINT_RELEASE ){
+        if( (rc = sqlite3VdbeCheckFk(p, 1))!=SQLITE_OK ){
+          goto vdbe_return;
+        }
+        db->autoCommit = 1;
+        if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){
+          p->pc = pc;
+          db->autoCommit = 0;
+          p->rc = rc = SQLITE_BUSY;
+          goto vdbe_return;
+        }
+        db->isTransactionSavepoint = 0;
+        rc = p->rc;
+      }else{
+        int isSchemaChange;
+        iSavepoint = db->nSavepoint - iSavepoint - 1;
+        if( p1==SAVEPOINT_ROLLBACK ){
+          isSchemaChange = (db->flags & SQLITE_InternChanges)!=0;
+          for(ii=0; ii<db->nDb; ii++){
+            rc = sqlite3BtreeTripAllCursors(db->aDb[ii].pBt,
+                                       SQLITE_ABORT_ROLLBACK,
+                                       isSchemaChange==0);
+            if( rc!=SQLITE_OK ) goto abort_due_to_error;
+          }
+        }else{
+          isSchemaChange = 0;
+        }
+        for(ii=0; ii<db->nDb; ii++){
+          rc = sqlite3BtreeSavepoint(db->aDb[ii].pBt, p1, iSavepoint);
+          if( rc!=SQLITE_OK ){
+            goto abort_due_to_error;
+          }
+        }
+        if( isSchemaChange ){
+          sqlite3ExpirePreparedStatements(db);
+          sqlite3ResetAllSchemasOfConnection(db);
+          db->flags = (db->flags | SQLITE_InternChanges);
+        }
+      }
+  
+      /* Regardless of whether this is a RELEASE or ROLLBACK, destroy all 
+      ** savepoints nested inside of the savepoint being operated on. */
+      while( db->pSavepoint!=pSavepoint ){
+        pTmp = db->pSavepoint;
+        db->pSavepoint = pTmp->pNext;
+        sqlite3DbFree(db, pTmp);
+        db->nSavepoint--;
+      }
+
+      /* If it is a RELEASE, then destroy the savepoint being operated on 
+      ** too. If it is a ROLLBACK TO, then set the number of deferred 
+      ** constraint violations present in the database to the value stored
+      ** when the savepoint was created.  */
+      if( p1==SAVEPOINT_RELEASE ){
+        assert( pSavepoint==db->pSavepoint );
+        db->pSavepoint = pSavepoint->pNext;
+        sqlite3DbFree(db, pSavepoint);
+        if( !isTransaction ){
+          db->nSavepoint--;
+        }
+      }else{
+        db->nDeferredCons = pSavepoint->nDeferredCons;
+        db->nDeferredImmCons = pSavepoint->nDeferredImmCons;
+      }
+
+      if( !isTransaction ){
+        rc = sqlite3VtabSavepoint(db, p1, iSavepoint);
+        if( rc!=SQLITE_OK ) goto abort_due_to_error;
+      }
+    }
+  }
+
+  break;
+}
+
+/* Opcode: AutoCommit P1 P2 * * *
+**
+** Set the database auto-commit flag to P1 (1 or 0). If P2 is true, roll
+** back any currently active btree transactions. If there are any active
+** VMs (apart from this one), then a ROLLBACK fails.  A COMMIT fails if
+** there are active writing VMs or active VMs that use shared cache.
+**
+** This instruction causes the VM to halt.
+*/
+case OP_AutoCommit: {
+  int desiredAutoCommit;
+  int iRollback;
+  int turnOnAC;
+
+  desiredAutoCommit = pOp->p1;
+  iRollback = pOp->p2;
+  turnOnAC = desiredAutoCommit && !db->autoCommit;
+  assert( desiredAutoCommit==1 || desiredAutoCommit==0 );
+  assert( desiredAutoCommit==1 || iRollback==0 );
+  assert( db->nVdbeActive>0 );  /* At least this one VM is active */
+  assert( p->bIsReader );
+
+#if 0
+  if( turnOnAC && iRollback && db->nVdbeActive>1 ){
+    /* If this instruction implements a ROLLBACK and other VMs are
+    ** still running, and a transaction is active, return an error indicating
+    ** that the other VMs must complete first. 
+    */
+    sqlite3SetString(&p->zErrMsg, db, "cannot rollback transaction - "
+        "SQL statements in progress");
+    rc = SQLITE_BUSY;
+  }else
+#endif
+  if( turnOnAC && !iRollback && db->nVdbeWrite>0 ){
+    /* If this instruction implements a COMMIT and other VMs are writing
+    ** return an error indicating that the other VMs must complete first. 
+    */
+    sqlite3SetString(&p->zErrMsg, db, "cannot commit transaction - "
+        "SQL statements in progress");
+    rc = SQLITE_BUSY;
+  }else if( desiredAutoCommit!=db->autoCommit ){
+    if( iRollback ){
+      assert( desiredAutoCommit==1 );
+      sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK);
+      db->autoCommit = 1;
+    }else if( (rc = sqlite3VdbeCheckFk(p, 1))!=SQLITE_OK ){
+      goto vdbe_return;
+    }else{
+      db->autoCommit = (u8)desiredAutoCommit;
+      if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){
+        p->pc = pc;
+        db->autoCommit = (u8)(1-desiredAutoCommit);
+        p->rc = rc = SQLITE_BUSY;
+        goto vdbe_return;
+      }
+    }
+    assert( db->nStatement==0 );
+    sqlite3CloseSavepoints(db);
+    if( p->rc==SQLITE_OK ){
+      rc = SQLITE_DONE;
+    }else{
+      rc = SQLITE_ERROR;
+    }
+    goto vdbe_return;
+  }else{
+    sqlite3SetString(&p->zErrMsg, db,
+        (!desiredAutoCommit)?"cannot start a transaction within a transaction":(
+        (iRollback)?"cannot rollback - no transaction is active":
+                   "cannot commit - no transaction is active"));
+         
+    rc = SQLITE_ERROR;
+  }
+  break;
+}
+
+/* Opcode: Transaction P1 P2 P3 P4 P5
+**
+** Begin a transaction on database P1 if a transaction is not already
+** active.
+** If P2 is non-zero, then a write-transaction is started, or if a 
+** read-transaction is already active, it is upgraded to a write-transaction.
+** If P2 is zero, then a read-transaction is started.
+**
+** P1 is the index of the database file on which the transaction is
+** started.  Index 0 is the main database file and index 1 is the
+** file used for temporary tables.  Indices of 2 or more are used for
+** attached databases.
+**
+** If a write-transaction is started and the Vdbe.usesStmtJournal flag is
+** true (this flag is set if the Vdbe may modify more than one row and may
+** throw an ABORT exception), a statement transaction may also be opened.
+** More specifically, a statement transaction is opened iff the database
+** connection is currently not in autocommit mode, or if there are other
+** active statements. A statement transaction allows the changes made by this
+** VDBE to be rolled back after an error without having to roll back the
+** entire transaction. If no error is encountered, the statement transaction
+** will automatically commit when the VDBE halts.
+**
+** If P5!=0 then this opcode also checks the schema cookie against P3
+** and the schema generation counter against P4.
+** The cookie changes its value whenever the database schema changes.
+** This operation is used to detect when that the cookie has changed
+** and that the current process needs to reread the schema.  If the schema
+** cookie in P3 differs from the schema cookie in the database header or
+** if the schema generation counter in P4 differs from the current
+** generation counter, then an SQLITE_SCHEMA error is raised and execution
+** halts.  The sqlite3_step() wrapper function might then reprepare the
+** statement and rerun it from the beginning.
+*/
+case OP_Transaction: {
+  Btree *pBt;
+  int iMeta;
+  int iGen;
+
+  assert( p->bIsReader );
+  assert( p->readOnly==0 || pOp->p2==0 );
+  assert( pOp->p1>=0 && pOp->p1<db->nDb );
+  assert( DbMaskTest(p->btreeMask, pOp->p1) );
+  if( pOp->p2 && (db->flags & SQLITE_QueryOnly)!=0 ){
+    rc = SQLITE_READONLY;
+    goto abort_due_to_error;
+  }
+  pBt = db->aDb[pOp->p1].pBt;
+
+  if( pBt ){
+    rc = sqlite3BtreeBeginTrans(pBt, pOp->p2);
+    if( rc==SQLITE_BUSY ){
+      p->pc = pc;
+      p->rc = rc = SQLITE_BUSY;
+      goto vdbe_return;
+    }
+    if( rc!=SQLITE_OK ){
+      goto abort_due_to_error;
+    }
+
+    if( pOp->p2 && p->usesStmtJournal 
+     && (db->autoCommit==0 || db->nVdbeRead>1) 
+    ){
+      assert( sqlite3BtreeIsInTrans(pBt) );
+      if( p->iStatement==0 ){
+        assert( db->nStatement>=0 && db->nSavepoint>=0 );
+        db->nStatement++; 
+        p->iStatement = db->nSavepoint + db->nStatement;
+      }
+
+      rc = sqlite3VtabSavepoint(db, SAVEPOINT_BEGIN, p->iStatement-1);
+      if( rc==SQLITE_OK ){
+        rc = sqlite3BtreeBeginStmt(pBt, p->iStatement);
+      }
+
+      /* Store the current value of the database handles deferred constraint
+      ** counter. If the statement transaction needs to be rolled back,
+      ** the value of this counter needs to be restored too.  */
+      p->nStmtDefCons = db->nDeferredCons;
+      p->nStmtDefImmCons = db->nDeferredImmCons;
+    }
+
+    /* Gather the schema version number for checking */
+    sqlite3BtreeGetMeta(pBt, BTREE_SCHEMA_VERSION, (u32 *)&iMeta);
+    iGen = db->aDb[pOp->p1].pSchema->iGeneration;
+  }else{
+    iGen = iMeta = 0;
+  }
+  assert( pOp->p5==0 || pOp->p4type==P4_INT32 );
+  if( pOp->p5 && (iMeta!=pOp->p3 || iGen!=pOp->p4.i) ){
+    sqlite3DbFree(db, p->zErrMsg);
+    p->zErrMsg = sqlite3DbStrDup(db, "database schema has changed");
+    /* If the schema-cookie from the database file matches the cookie 
+    ** stored with the in-memory representation of the schema, do
+    ** not reload the schema from the database file.
+    **
+    ** If virtual-tables are in use, this is not just an optimization.
+    ** Often, v-tables store their data in other SQLite tables, which
+    ** are queried from within xNext() and other v-table methods using
+    ** prepared queries. If such a query is out-of-date, we do not want to
+    ** discard the database schema, as the user code implementing the
+    ** v-table would have to be ready for the sqlite3_vtab structure itself
+    ** to be invalidated whenever sqlite3_step() is called from within 
+    ** a v-table method.
+    */
+    if( db->aDb[pOp->p1].pSchema->schema_cookie!=iMeta ){
+      sqlite3ResetOneSchema(db, pOp->p1);
+    }
+    p->expired = 1;
+    rc = SQLITE_SCHEMA;
+  }
+  break;
+}
+
+/* Opcode: ReadCookie P1 P2 P3 * *
+**
+** Read cookie number P3 from database P1 and write it into register P2.
+** P3==1 is the schema version.  P3==2 is the database format.
+** P3==3 is the recommended pager cache size, and so forth.  P1==0 is
+** the main database file and P1==1 is the database file used to store
+** temporary tables.
+**
+** There must be a read-lock on the database (either a transaction
+** must be started or there must be an open cursor) before
+** executing this instruction.
+*/
+case OP_ReadCookie: {               /* out2-prerelease */
+  int iMeta;
+  int iDb;
+  int iCookie;
+
+  assert( p->bIsReader );
+  iDb = pOp->p1;
+  iCookie = pOp->p3;
+  assert( pOp->p3<SQLITE_N_BTREE_META );
+  assert( iDb>=0 && iDb<db->nDb );
+  assert( db->aDb[iDb].pBt!=0 );
+  assert( DbMaskTest(p->btreeMask, iDb) );
+
+  sqlite3BtreeGetMeta(db->aDb[iDb].pBt, iCookie, (u32 *)&iMeta);
+  pOut->u.i = iMeta;
+  break;
+}
+
+/* Opcode: SetCookie P1 P2 P3 * *
+**
+** Write the content of register P3 (interpreted as an integer)
+** into cookie number P2 of database P1.  P2==1 is the schema version.  
+** P2==2 is the database format. P2==3 is the recommended pager cache 
+** size, and so forth.  P1==0 is the main database file and P1==1 is the 
+** database file used to store temporary tables.
+**
+** A transaction must be started before executing this opcode.
+*/
+case OP_SetCookie: {       /* in3 */
+  Db *pDb;
+  assert( pOp->p2<SQLITE_N_BTREE_META );
+  assert( pOp->p1>=0 && pOp->p1<db->nDb );
+  assert( DbMaskTest(p->btreeMask, pOp->p1) );
+  assert( p->readOnly==0 );
+  pDb = &db->aDb[pOp->p1];
+  assert( pDb->pBt!=0 );
+  assert( sqlite3SchemaMutexHeld(db, pOp->p1, 0) );
+  pIn3 = &aMem[pOp->p3];
+  sqlite3VdbeMemIntegerify(pIn3);
+  /* See note about index shifting on OP_ReadCookie */
+  rc = sqlite3BtreeUpdateMeta(pDb->pBt, pOp->p2, (int)pIn3->u.i);
+  if( pOp->p2==BTREE_SCHEMA_VERSION ){
+    /* When the schema cookie changes, record the new cookie internally */
+    pDb->pSchema->schema_cookie = (int)pIn3->u.i;
+    db->flags |= SQLITE_InternChanges;
+  }else if( pOp->p2==BTREE_FILE_FORMAT ){
+    /* Record changes in the file format */
+    pDb->pSchema->file_format = (u8)pIn3->u.i;
+  }
+  if( pOp->p1==1 ){
+    /* Invalidate all prepared statements whenever the TEMP database
+    ** schema is changed.  Ticket #1644 */
+    sqlite3ExpirePreparedStatements(db);
+    p->expired = 0;
+  }
+  break;
+}
+
+/* Opcode: OpenRead P1 P2 P3 P4 P5
+** Synopsis: root=P2 iDb=P3
+**
+** Open a read-only cursor for the database table whose root page is
+** P2 in a database file.  The database file is determined by P3. 
+** P3==0 means the main database, P3==1 means the database used for 
+** temporary tables, and P3>1 means used the corresponding attached
+** database.  Give the new cursor an identifier of P1.  The P1
+** values need not be contiguous but all P1 values should be small integers.
+** It is an error for P1 to be negative.
+**
+** If P5!=0 then use the content of register P2 as the root page, not
+** the value of P2 itself.
+**
+** There will be a read lock on the database whenever there is an
+** open cursor.  If the database was unlocked prior to this instruction
+** then a read lock is acquired as part of this instruction.  A read
+** lock allows other processes to read the database but prohibits
+** any other process from modifying the database.  The read lock is
+** released when all cursors are closed.  If this instruction attempts
+** to get a read lock but fails, the script terminates with an
+** SQLITE_BUSY error code.
+**
+** The P4 value may be either an integer (P4_INT32) or a pointer to
+** a KeyInfo structure (P4_KEYINFO). If it is a pointer to a KeyInfo 
+** structure, then said structure defines the content and collating 
+** sequence of the index being opened. Otherwise, if P4 is an integer 
+** value, it is set to the number of columns in the table.
+**
+** See also: OpenWrite, ReopenIdx
+*/
+/* Opcode: ReopenIdx P1 P2 P3 P4 P5
+** Synopsis: root=P2 iDb=P3
+**
+** The ReopenIdx opcode works exactly like ReadOpen except that it first
+** checks to see if the cursor on P1 is already open with a root page
+** number of P2 and if it is this opcode becomes a no-op.  In other words,
+** if the cursor is already open, do not reopen it.
+**
+** The ReopenIdx opcode may only be used with P5==0 and with P4 being
+** a P4_KEYINFO object.  Furthermore, the P3 value must be the same as
+** every other ReopenIdx or OpenRead for the same cursor number.
+**
+** See the OpenRead opcode documentation for additional information.
+*/
+/* Opcode: OpenWrite P1 P2 P3 P4 P5
+** Synopsis: root=P2 iDb=P3
+**
+** Open a read/write cursor named P1 on the table or index whose root
+** page is P2.  Or if P5!=0 use the content of register P2 to find the
+** root page.
+**
+** The P4 value may be either an integer (P4_INT32) or a pointer to
+** a KeyInfo structure (P4_KEYINFO). If it is a pointer to a KeyInfo 
+** structure, then said structure defines the content and collating 
+** sequence of the index being opened. Otherwise, if P4 is an integer 
+** value, it is set to the number of columns in the table, or to the
+** largest index of any column of the table that is actually used.
+**
+** This instruction works just like OpenRead except that it opens the cursor
+** in read/write mode.  For a given table, there can be one or more read-only
+** cursors or a single read/write cursor but not both.
+**
+** See also OpenRead.
+*/
+case OP_ReopenIdx: {
+  VdbeCursor *pCur;
+
+  assert( pOp->p5==0 );
+  assert( pOp->p4type==P4_KEYINFO );
+  pCur = p->apCsr[pOp->p1];
+  if( pCur && pCur->pgnoRoot==(u32)pOp->p2 ){
+    assert( pCur->iDb==pOp->p3 );      /* Guaranteed by the code generator */
+    break;
+  }
+  /* If the cursor is not currently open or is open on a different
+  ** index, then fall through into OP_OpenRead to force a reopen */
+}
+case OP_OpenRead:
+case OP_OpenWrite: {
+  int nField;
+  KeyInfo *pKeyInfo;
+  int p2;
+  int iDb;
+  int wrFlag;
+  Btree *pX;
+  VdbeCursor *pCur;
+  Db *pDb;
+
+  assert( (pOp->p5&(OPFLAG_P2ISREG|OPFLAG_BULKCSR))==pOp->p5 );
+  assert( pOp->opcode==OP_OpenWrite || pOp->p5==0 );
+  assert( p->bIsReader );
+  assert( pOp->opcode==OP_OpenRead || pOp->opcode==OP_ReopenIdx
+          || p->readOnly==0 );
+
+  if( p->expired ){
+    rc = SQLITE_ABORT_ROLLBACK;
+    break;
+  }
+
+  nField = 0;
+  pKeyInfo = 0;
+  p2 = pOp->p2;
+  iDb = pOp->p3;
+  assert( iDb>=0 && iDb<db->nDb );
+  assert( DbMaskTest(p->btreeMask, iDb) );
+  pDb = &db->aDb[iDb];
+  pX = pDb->pBt;
+  assert( pX!=0 );
+  if( pOp->opcode==OP_OpenWrite ){
+    wrFlag = 1;
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    if( pDb->pSchema->file_format < p->minWriteFileFormat ){
+      p->minWriteFileFormat = pDb->pSchema->file_format;
+    }
+  }else{
+    wrFlag = 0;
+  }
+  if( pOp->p5 & OPFLAG_P2ISREG ){
+    assert( p2>0 );
+    assert( p2<=(p->nMem-p->nCursor) );
+    pIn2 = &aMem[p2];
+    assert( memIsValid(pIn2) );
+    assert( (pIn2->flags & MEM_Int)!=0 );
+    sqlite3VdbeMemIntegerify(pIn2);
+    p2 = (int)pIn2->u.i;
+    /* The p2 value always comes from a prior OP_CreateTable opcode and
+    ** that opcode will always set the p2 value to 2 or more or else fail.
+    ** If there were a failure, the prepared statement would have halted
+    ** before reaching this instruction. */
+    if( NEVER(p2<2) ) {
+      rc = SQLITE_CORRUPT_BKPT;
+      goto abort_due_to_error;
+    }
+  }
+  if( pOp->p4type==P4_KEYINFO ){
+    pKeyInfo = pOp->p4.pKeyInfo;
+    assert( pKeyInfo->enc==ENC(db) );
+    assert( pKeyInfo->db==db );
+    nField = pKeyInfo->nField+pKeyInfo->nXField;
+  }else if( pOp->p4type==P4_INT32 ){
+    nField = pOp->p4.i;
+  }
+  assert( pOp->p1>=0 );
+  assert( nField>=0 );
+  testcase( nField==0 );  /* Table with INTEGER PRIMARY KEY and nothing else */
+  pCur = allocateCursor(p, pOp->p1, nField, iDb, 1);
+  if( pCur==0 ) goto no_mem;
+  pCur->nullRow = 1;
+  pCur->isOrdered = 1;
+  pCur->pgnoRoot = p2;
+  rc = sqlite3BtreeCursor(pX, p2, wrFlag, pKeyInfo, pCur->pCursor);
+  pCur->pKeyInfo = pKeyInfo;
+  assert( OPFLAG_BULKCSR==BTREE_BULKLOAD );
+  sqlite3BtreeCursorHints(pCur->pCursor, (pOp->p5 & OPFLAG_BULKCSR));
+
+  /* Set the VdbeCursor.isTable variable. Previous versions of
+  ** SQLite used to check if the root-page flags were sane at this point
+  ** and report database corruption if they were not, but this check has
+  ** since moved into the btree layer.  */  
+  pCur->isTable = pOp->p4type!=P4_KEYINFO;
+  break;
+}
+
+/* Opcode: OpenEphemeral P1 P2 * P4 P5
+** Synopsis: nColumn=P2
+**
+** Open a new cursor P1 to a transient table.
+** The cursor is always opened read/write even if 
+** the main database is read-only.  The ephemeral
+** table is deleted automatically when the cursor is closed.
+**
+** P2 is the number of columns in the ephemeral table.
+** The cursor points to a BTree table if P4==0 and to a BTree index
+** if P4 is not 0.  If P4 is not NULL, it points to a KeyInfo structure
+** that defines the format of keys in the index.
+**
+** The P5 parameter can be a mask of the BTREE_* flags defined
+** in btree.h.  These flags control aspects of the operation of
+** the btree.  The BTREE_OMIT_JOURNAL and BTREE_SINGLE flags are
+** added automatically.
+*/
+/* Opcode: OpenAutoindex P1 P2 * P4 *
+** Synopsis: nColumn=P2
+**
+** This opcode works the same as OP_OpenEphemeral.  It has a
+** different name to distinguish its use.  Tables created using
+** by this opcode will be used for automatically created transient
+** indices in joins.
+*/
+case OP_OpenAutoindex: 
+case OP_OpenEphemeral: {
+  VdbeCursor *pCx;
+  KeyInfo *pKeyInfo;
+
+  static const int vfsFlags = 
+      SQLITE_OPEN_READWRITE |
+      SQLITE_OPEN_CREATE |
+      SQLITE_OPEN_EXCLUSIVE |
+      SQLITE_OPEN_DELETEONCLOSE |
+      SQLITE_OPEN_TRANSIENT_DB;
+  assert( pOp->p1>=0 );
+  assert( pOp->p2>=0 );
+  pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
+  if( pCx==0 ) goto no_mem;
+  pCx->nullRow = 1;
+  pCx->isEphemeral = 1;
+  rc = sqlite3BtreeOpen(db->pVfs, 0, db, &pCx->pBt, 
+                        BTREE_OMIT_JOURNAL | BTREE_SINGLE | pOp->p5, vfsFlags);
+  if( rc==SQLITE_OK ){
+    rc = sqlite3BtreeBeginTrans(pCx->pBt, 1);
+  }
+  if( rc==SQLITE_OK ){
+    /* If a transient index is required, create it by calling
+    ** sqlite3BtreeCreateTable() with the BTREE_BLOBKEY flag before
+    ** opening it. If a transient table is required, just use the
+    ** automatically created table with root-page 1 (an BLOB_INTKEY table).
+    */
+    if( (pKeyInfo = pOp->p4.pKeyInfo)!=0 ){
+      int pgno;
+      assert( pOp->p4type==P4_KEYINFO );
+      rc = sqlite3BtreeCreateTable(pCx->pBt, &pgno, BTREE_BLOBKEY | pOp->p5); 
+      if( rc==SQLITE_OK ){
+        assert( pgno==MASTER_ROOT+1 );
+        assert( pKeyInfo->db==db );
+        assert( pKeyInfo->enc==ENC(db) );
+        pCx->pKeyInfo = pKeyInfo;
+        rc = sqlite3BtreeCursor(pCx->pBt, pgno, 1, pKeyInfo, pCx->pCursor);
+      }
+      pCx->isTable = 0;
+    }else{
+      rc = sqlite3BtreeCursor(pCx->pBt, MASTER_ROOT, 1, 0, pCx->pCursor);
+      pCx->isTable = 1;
+    }
+  }
+  pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
+  break;
+}
+
+/* Opcode: SorterOpen P1 P2 P3 P4 *
+**
+** This opcode works like OP_OpenEphemeral except that it opens
+** a transient index that is specifically designed to sort large
+** tables using an external merge-sort algorithm.
+**
+** If argument P3 is non-zero, then it indicates that the sorter may
+** assume that a stable sort considering the first P3 fields of each
+** key is sufficient to produce the required results.
+*/
+case OP_SorterOpen: {
+  VdbeCursor *pCx;
+
+  assert( pOp->p1>=0 );
+  assert( pOp->p2>=0 );
+  pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
+  if( pCx==0 ) goto no_mem;
+  pCx->pKeyInfo = pOp->p4.pKeyInfo;
+  assert( pCx->pKeyInfo->db==db );
+  assert( pCx->pKeyInfo->enc==ENC(db) );
+  rc = sqlite3VdbeSorterInit(db, pOp->p3, pCx);
+  break;
+}
+
+/* Opcode: SequenceTest P1 P2 * * *
+** Synopsis: if( cursor[P1].ctr++ ) pc = P2
+**
+** P1 is a sorter cursor. If the sequence counter is currently zero, jump
+** to P2. Regardless of whether or not the jump is taken, increment the
+** the sequence value.
+*/
+case OP_SequenceTest: {
+  VdbeCursor *pC;
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC->pSorter );
+  if( (pC->seqCount++)==0 ){
+    pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: OpenPseudo P1 P2 P3 * *
+** Synopsis: P3 columns in r[P2]
+**
+** Open a new cursor that points to a fake table that contains a single
+** row of data.  The content of that one row is the content of memory
+** register P2.  In other words, cursor P1 becomes an alias for the 
+** MEM_Blob content contained in register P2.
+**
+** A pseudo-table created by this opcode is used to hold a single
+** row output from the sorter so that the row can be decomposed into
+** individual columns using the OP_Column opcode.  The OP_Column opcode
+** is the only cursor opcode that works with a pseudo-table.
+**
+** P3 is the number of fields in the records that will be stored by
+** the pseudo-table.
+*/
+case OP_OpenPseudo: {
+  VdbeCursor *pCx;
+
+  assert( pOp->p1>=0 );
+  assert( pOp->p3>=0 );
+  pCx = allocateCursor(p, pOp->p1, pOp->p3, -1, 0);
+  if( pCx==0 ) goto no_mem;
+  pCx->nullRow = 1;
+  pCx->pseudoTableReg = pOp->p2;
+  pCx->isTable = 1;
+  assert( pOp->p5==0 );
+  break;
+}
+
+/* Opcode: Close P1 * * * *
+**
+** Close a cursor previously opened as P1.  If P1 is not
+** currently open, this instruction is a no-op.
+*/
+case OP_Close: {
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  sqlite3VdbeFreeCursor(p, p->apCsr[pOp->p1]);
+  p->apCsr[pOp->p1] = 0;
+  break;
+}
+
+/* Opcode: SeekGE P1 P2 P3 P4 *
+** Synopsis: key=r[P3@P4]
+**
+** If cursor P1 refers to an SQL table (B-Tree that uses integer keys), 
+** use the value in register P3 as the key.  If cursor P1 refers 
+** to an SQL index, then P3 is the first in an array of P4 registers 
+** that are used as an unpacked index key. 
+**
+** Reposition cursor P1 so that  it points to the smallest entry that 
+** is greater than or equal to the key value. If there are no records 
+** greater than or equal to the key and P2 is not zero, then jump to P2.
+**
+** This opcode leaves the cursor configured to move in forward order,
+** from the beginning toward the end.  In other words, the cursor is
+** configured to use Next, not Prev.
+**
+** See also: Found, NotFound, SeekLt, SeekGt, SeekLe
+*/
+/* Opcode: SeekGT P1 P2 P3 P4 *
+** Synopsis: key=r[P3@P4]
+**
+** If cursor P1 refers to an SQL table (B-Tree that uses integer keys), 
+** use the value in register P3 as a key. If cursor P1 refers 
+** to an SQL index, then P3 is the first in an array of P4 registers 
+** that are used as an unpacked index key. 
+**
+** Reposition cursor P1 so that  it points to the smallest entry that 
+** is greater than the key value. If there are no records greater than 
+** the key and P2 is not zero, then jump to P2.
+**
+** This opcode leaves the cursor configured to move in forward order,
+** from the beginning toward the end.  In other words, the cursor is
+** configured to use Next, not Prev.
+**
+** See also: Found, NotFound, SeekLt, SeekGe, SeekLe
+*/
+/* Opcode: SeekLT P1 P2 P3 P4 * 
+** Synopsis: key=r[P3@P4]
+**
+** If cursor P1 refers to an SQL table (B-Tree that uses integer keys), 
+** use the value in register P3 as a key. If cursor P1 refers 
+** to an SQL index, then P3 is the first in an array of P4 registers 
+** that are used as an unpacked index key. 
+**
+** Reposition cursor P1 so that  it points to the largest entry that 
+** is less than the key value. If there are no records less than 
+** the key and P2 is not zero, then jump to P2.
+**
+** This opcode leaves the cursor configured to move in reverse order,
+** from the end toward the beginning.  In other words, the cursor is
+** configured to use Prev, not Next.
+**
+** See also: Found, NotFound, SeekGt, SeekGe, SeekLe
+*/
+/* Opcode: SeekLE P1 P2 P3 P4 *
+** Synopsis: key=r[P3@P4]
+**
+** If cursor P1 refers to an SQL table (B-Tree that uses integer keys), 
+** use the value in register P3 as a key. If cursor P1 refers 
+** to an SQL index, then P3 is the first in an array of P4 registers 
+** that are used as an unpacked index key. 
+**
+** Reposition cursor P1 so that it points to the largest entry that 
+** is less than or equal to the key value. If there are no records 
+** less than or equal to the key and P2 is not zero, then jump to P2.
+**
+** This opcode leaves the cursor configured to move in reverse order,
+** from the end toward the beginning.  In other words, the cursor is
+** configured to use Prev, not Next.
+**
+** See also: Found, NotFound, SeekGt, SeekGe, SeekLt
+*/
+case OP_SeekLT:         /* jump, in3 */
+case OP_SeekLE:         /* jump, in3 */
+case OP_SeekGE:         /* jump, in3 */
+case OP_SeekGT: {       /* jump, in3 */
+  int res;
+  int oc;
+  VdbeCursor *pC;
+  UnpackedRecord r;
+  int nField;
+  i64 iKey;      /* The rowid we are to seek to */
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  assert( pOp->p2!=0 );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( pC->pseudoTableReg==0 );
+  assert( OP_SeekLE == OP_SeekLT+1 );
+  assert( OP_SeekGE == OP_SeekLT+2 );
+  assert( OP_SeekGT == OP_SeekLT+3 );
+  assert( pC->isOrdered );
+  assert( pC->pCursor!=0 );
+  oc = pOp->opcode;
+  pC->nullRow = 0;
+#ifdef SQLITE_DEBUG
+  pC->seekOp = pOp->opcode;
+#endif
+  if( pC->isTable ){
+    /* The input value in P3 might be of any type: integer, real, string,
+    ** blob, or NULL.  But it needs to be an integer before we can do
+    ** the seek, so convert it. */
+    pIn3 = &aMem[pOp->p3];
+    if( (pIn3->flags & (MEM_Int|MEM_Real|MEM_Str))==MEM_Str ){
+      applyNumericAffinity(pIn3, 0);
+    }
+    iKey = sqlite3VdbeIntValue(pIn3);
+
+    /* If the P3 value could not be converted into an integer without
+    ** loss of information, then special processing is required... */
+    if( (pIn3->flags & MEM_Int)==0 ){
+      if( (pIn3->flags & MEM_Real)==0 ){
+        /* If the P3 value cannot be converted into any kind of a number,
+        ** then the seek is not possible, so jump to P2 */
+        pc = pOp->p2 - 1;  VdbeBranchTaken(1,2);
+        break;
+      }
+
+      /* If the approximation iKey is larger than the actual real search
+      ** term, substitute >= for > and < for <=. e.g. if the search term
+      ** is 4.9 and the integer approximation 5:
+      **
+      **        (x >  4.9)    ->     (x >= 5)
+      **        (x <= 4.9)    ->     (x <  5)
+      */
+      if( pIn3->u.r<(double)iKey ){
+        assert( OP_SeekGE==(OP_SeekGT-1) );
+        assert( OP_SeekLT==(OP_SeekLE-1) );
+        assert( (OP_SeekLE & 0x0001)==(OP_SeekGT & 0x0001) );
+        if( (oc & 0x0001)==(OP_SeekGT & 0x0001) ) oc--;
+      }
+
+      /* If the approximation iKey is smaller than the actual real search
+      ** term, substitute <= for < and > for >=.  */
+      else if( pIn3->u.r>(double)iKey ){
+        assert( OP_SeekLE==(OP_SeekLT+1) );
+        assert( OP_SeekGT==(OP_SeekGE+1) );
+        assert( (OP_SeekLT & 0x0001)==(OP_SeekGE & 0x0001) );
+        if( (oc & 0x0001)==(OP_SeekLT & 0x0001) ) oc++;
+      }
+    } 
+    rc = sqlite3BtreeMovetoUnpacked(pC->pCursor, 0, (u64)iKey, 0, &res);
+    pC->movetoTarget = iKey;  /* Used by OP_Delete */
+    if( rc!=SQLITE_OK ){
+      goto abort_due_to_error;
+    }
+  }else{
+    nField = pOp->p4.i;
+    assert( pOp->p4type==P4_INT32 );
+    assert( nField>0 );
+    r.pKeyInfo = pC->pKeyInfo;
+    r.nField = (u16)nField;
+
+    /* The next line of code computes as follows, only faster:
+    **   if( oc==OP_SeekGT || oc==OP_SeekLE ){
+    **     r.default_rc = -1;
+    **   }else{
+    **     r.default_rc = +1;
+    **   }
+    */
+    r.default_rc = ((1 & (oc - OP_SeekLT)) ? -1 : +1);
+    assert( oc!=OP_SeekGT || r.default_rc==-1 );
+    assert( oc!=OP_SeekLE || r.default_rc==-1 );
+    assert( oc!=OP_SeekGE || r.default_rc==+1 );
+    assert( oc!=OP_SeekLT || r.default_rc==+1 );
+
+    r.aMem = &aMem[pOp->p3];
+#ifdef SQLITE_DEBUG
+    { int i; for(i=0; i<r.nField; i++) assert( memIsValid(&r.aMem[i]) ); }
+#endif
+    ExpandBlob(r.aMem);
+    rc = sqlite3BtreeMovetoUnpacked(pC->pCursor, &r, 0, 0, &res);
+    if( rc!=SQLITE_OK ){
+      goto abort_due_to_error;
+    }
+  }
+  pC->deferredMoveto = 0;
+  pC->cacheStatus = CACHE_STALE;
+#ifdef SQLITE_TEST
+  sqlite3_search_count++;
+#endif
+  if( oc>=OP_SeekGE ){  assert( oc==OP_SeekGE || oc==OP_SeekGT );
+    if( res<0 || (res==0 && oc==OP_SeekGT) ){
+      res = 0;
+      rc = sqlite3BtreeNext(pC->pCursor, &res);
+      if( rc!=SQLITE_OK ) goto abort_due_to_error;
+    }else{
+      res = 0;
+    }
+  }else{
+    assert( oc==OP_SeekLT || oc==OP_SeekLE );
+    if( res>0 || (res==0 && oc==OP_SeekLT) ){
+      res = 0;
+      rc = sqlite3BtreePrevious(pC->pCursor, &res);
+      if( rc!=SQLITE_OK ) goto abort_due_to_error;
+    }else{
+      /* res might be negative because the table is empty.  Check to
+      ** see if this is the case.
+      */
+      res = sqlite3BtreeEof(pC->pCursor);
+    }
+  }
+  assert( pOp->p2>0 );
+  VdbeBranchTaken(res!=0,2);
+  if( res ){
+    pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: Seek P1 P2 * * *
+** Synopsis:  intkey=r[P2]
+**
+** P1 is an open table cursor and P2 is a rowid integer.  Arrange
+** for P1 to move so that it points to the rowid given by P2.
+**
+** This is actually a deferred seek.  Nothing actually happens until
+** the cursor is used to read a record.  That way, if no reads
+** occur, no unnecessary I/O happens.
+*/
+case OP_Seek: {    /* in2 */
+  VdbeCursor *pC;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( pC->pCursor!=0 );
+  assert( pC->isTable );
+  pC->nullRow = 0;
+  pIn2 = &aMem[pOp->p2];
+  pC->movetoTarget = sqlite3VdbeIntValue(pIn2);
+  pC->deferredMoveto = 1;
+  break;
+}
+  
+
+/* Opcode: Found P1 P2 P3 P4 *
+** Synopsis: key=r[P3@P4]
+**
+** If P4==0 then register P3 holds a blob constructed by MakeRecord.  If
+** P4>0 then register P3 is the first of P4 registers that form an unpacked
+** record.
+**
+** Cursor P1 is on an index btree.  If the record identified by P3 and P4
+** is a prefix of any entry in P1 then a jump is made to P2 and
+** P1 is left pointing at the matching entry.
+**
+** This operation leaves the cursor in a state where it can be
+** advanced in the forward direction.  The Next instruction will work,
+** but not the Prev instruction.
+**
+** See also: NotFound, NoConflict, NotExists. SeekGe
+*/
+/* Opcode: NotFound P1 P2 P3 P4 *
+** Synopsis: key=r[P3@P4]
+**
+** If P4==0 then register P3 holds a blob constructed by MakeRecord.  If
+** P4>0 then register P3 is the first of P4 registers that form an unpacked
+** record.
+** 
+** Cursor P1 is on an index btree.  If the record identified by P3 and P4
+** is not the prefix of any entry in P1 then a jump is made to P2.  If P1 
+** does contain an entry whose prefix matches the P3/P4 record then control
+** falls through to the next instruction and P1 is left pointing at the
+** matching entry.
+**
+** This operation leaves the cursor in a state where it cannot be
+** advanced in either direction.  In other words, the Next and Prev
+** opcodes do not work after this operation.
+**
+** See also: Found, NotExists, NoConflict
+*/
+/* Opcode: NoConflict P1 P2 P3 P4 *
+** Synopsis: key=r[P3@P4]
+**
+** If P4==0 then register P3 holds a blob constructed by MakeRecord.  If
+** P4>0 then register P3 is the first of P4 registers that form an unpacked
+** record.
+** 
+** Cursor P1 is on an index btree.  If the record identified by P3 and P4
+** contains any NULL value, jump immediately to P2.  If all terms of the
+** record are not-NULL then a check is done to determine if any row in the
+** P1 index btree has a matching key prefix.  If there are no matches, jump
+** immediately to P2.  If there is a match, fall through and leave the P1
+** cursor pointing to the matching row.
+**
+** This opcode is similar to OP_NotFound with the exceptions that the
+** branch is always taken if any part of the search key input is NULL.
+**
+** This operation leaves the cursor in a state where it cannot be
+** advanced in either direction.  In other words, the Next and Prev
+** opcodes do not work after this operation.
+**
+** See also: NotFound, Found, NotExists
+*/
+case OP_NoConflict:     /* jump, in3 */
+case OP_NotFound:       /* jump, in3 */
+case OP_Found: {        /* jump, in3 */
+  int alreadyExists;
+  int ii;
+  VdbeCursor *pC;
+  int res;
+  char *pFree;
+  UnpackedRecord *pIdxKey;
+  UnpackedRecord r;
+  char aTempRec[ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*4 + 7];
+
+#ifdef SQLITE_TEST
+  if( pOp->opcode!=OP_NoConflict ) sqlite3_found_count++;
+#endif
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  assert( pOp->p4type==P4_INT32 );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+#ifdef SQLITE_DEBUG
+  pC->seekOp = pOp->opcode;
+#endif
+  pIn3 = &aMem[pOp->p3];
+  assert( pC->pCursor!=0 );
+  assert( pC->isTable==0 );
+  pFree = 0;  /* Not needed.  Only used to suppress a compiler warning. */
+  if( pOp->p4.i>0 ){
+    r.pKeyInfo = pC->pKeyInfo;
+    r.nField = (u16)pOp->p4.i;
+    r.aMem = pIn3;
+    for(ii=0; ii<r.nField; ii++){
+      assert( memIsValid(&r.aMem[ii]) );
+      ExpandBlob(&r.aMem[ii]);
+#ifdef SQLITE_DEBUG
+      if( ii ) REGISTER_TRACE(pOp->p3+ii, &r.aMem[ii]);
+#endif
+    }
+    pIdxKey = &r;
+  }else{
+    pIdxKey = sqlite3VdbeAllocUnpackedRecord(
+        pC->pKeyInfo, aTempRec, sizeof(aTempRec), &pFree
+    );
+    if( pIdxKey==0 ) goto no_mem;
+    assert( pIn3->flags & MEM_Blob );
+    ExpandBlob(pIn3);
+    sqlite3VdbeRecordUnpack(pC->pKeyInfo, pIn3->n, pIn3->z, pIdxKey);
+  }
+  pIdxKey->default_rc = 0;
+  if( pOp->opcode==OP_NoConflict ){
+    /* For the OP_NoConflict opcode, take the jump if any of the
+    ** input fields are NULL, since any key with a NULL will not
+    ** conflict */
+    for(ii=0; ii<pIdxKey->nField; ii++){
+      if( pIdxKey->aMem[ii].flags & MEM_Null ){
+        pc = pOp->p2 - 1; VdbeBranchTaken(1,2);
+        break;
+      }
+    }
+  }
+  rc = sqlite3BtreeMovetoUnpacked(pC->pCursor, pIdxKey, 0, 0, &res);
+  if( pOp->p4.i==0 ){
+    sqlite3DbFree(db, pFree);
+  }
+  if( rc!=SQLITE_OK ){
+    break;
+  }
+  pC->seekResult = res;
+  alreadyExists = (res==0);
+  pC->nullRow = 1-alreadyExists;
+  pC->deferredMoveto = 0;
+  pC->cacheStatus = CACHE_STALE;
+  if( pOp->opcode==OP_Found ){
+    VdbeBranchTaken(alreadyExists!=0,2);
+    if( alreadyExists ) pc = pOp->p2 - 1;
+  }else{
+    VdbeBranchTaken(alreadyExists==0,2);
+    if( !alreadyExists ) pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: NotExists P1 P2 P3 * *
+** Synopsis: intkey=r[P3]
+**
+** P1 is the index of a cursor open on an SQL table btree (with integer
+** keys).  P3 is an integer rowid.  If P1 does not contain a record with
+** rowid P3 then jump immediately to P2.  If P1 does contain a record
+** with rowid P3 then leave the cursor pointing at that record and fall
+** through to the next instruction.
+**
+** The OP_NotFound opcode performs the same operation on index btrees
+** (with arbitrary multi-value keys).
+**
+** This opcode leaves the cursor in a state where it cannot be advanced
+** in either direction.  In other words, the Next and Prev opcodes will
+** not work following this opcode.
+**
+** See also: Found, NotFound, NoConflict
+*/
+case OP_NotExists: {        /* jump, in3 */
+  VdbeCursor *pC;
+  BtCursor *pCrsr;
+  int res;
+  u64 iKey;
+
+  pIn3 = &aMem[pOp->p3];
+  assert( pIn3->flags & MEM_Int );
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+#ifdef SQLITE_DEBUG
+  pC->seekOp = 0;
+#endif
+  assert( pC->isTable );
+  assert( pC->pseudoTableReg==0 );
+  pCrsr = pC->pCursor;
+  assert( pCrsr!=0 );
+  res = 0;
+  iKey = pIn3->u.i;
+  rc = sqlite3BtreeMovetoUnpacked(pCrsr, 0, iKey, 0, &res);
+  pC->movetoTarget = iKey;  /* Used by OP_Delete */
+  pC->nullRow = 0;
+  pC->cacheStatus = CACHE_STALE;
+  pC->deferredMoveto = 0;
+  VdbeBranchTaken(res!=0,2);
+  if( res!=0 ){
+    pc = pOp->p2 - 1;
+  }
+  pC->seekResult = res;
+  break;
+}
+
+/* Opcode: Sequence P1 P2 * * *
+** Synopsis: r[P2]=cursor[P1].ctr++
+**
+** Find the next available sequence number for cursor P1.
+** Write the sequence number into register P2.
+** The sequence number on the cursor is incremented after this
+** instruction.  
+*/
+case OP_Sequence: {           /* out2-prerelease */
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  assert( p->apCsr[pOp->p1]!=0 );
+  pOut->u.i = p->apCsr[pOp->p1]->seqCount++;
+  break;
+}
+
+
+/* Opcode: NewRowid P1 P2 P3 * *
+** Synopsis: r[P2]=rowid
+**
+** Get a new integer record number (a.k.a "rowid") used as the key to a table.
+** The record number is not previously used as a key in the database
+** table that cursor P1 points to.  The new record number is written
+** written to register P2.
+**
+** If P3>0 then P3 is a register in the root frame of this VDBE that holds 
+** the largest previously generated record number. No new record numbers are
+** allowed to be less than this value. When this value reaches its maximum, 
+** an SQLITE_FULL error is generated. The P3 register is updated with the '
+** generated record number. This P3 mechanism is used to help implement the
+** AUTOINCREMENT feature.
+*/
+case OP_NewRowid: {           /* out2-prerelease */
+  i64 v;                 /* The new rowid */
+  VdbeCursor *pC;        /* Cursor of table to get the new rowid */
+  int res;               /* Result of an sqlite3BtreeLast() */
+  int cnt;               /* Counter to limit the number of searches */
+  Mem *pMem;             /* Register holding largest rowid for AUTOINCREMENT */
+  VdbeFrame *pFrame;     /* Root frame of VDBE */
+
+  v = 0;
+  res = 0;
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  if( NEVER(pC->pCursor==0) ){
+    /* The zero initialization above is all that is needed */
+  }else{
+    /* The next rowid or record number (different terms for the same
+    ** thing) is obtained in a two-step algorithm.
+    **
+    ** First we attempt to find the largest existing rowid and add one
+    ** to that.  But if the largest existing rowid is already the maximum
+    ** positive integer, we have to fall through to the second
+    ** probabilistic algorithm
+    **
+    ** The second algorithm is to select a rowid at random and see if
+    ** it already exists in the table.  If it does not exist, we have
+    ** succeeded.  If the random rowid does exist, we select a new one
+    ** and try again, up to 100 times.
+    */
+    assert( pC->isTable );
+
+#ifdef SQLITE_32BIT_ROWID
+#   define MAX_ROWID 0x7fffffff
+#else
+    /* Some compilers complain about constants of the form 0x7fffffffffffffff.
+    ** Others complain about 0x7ffffffffffffffffLL.  The following macro seems
+    ** to provide the constant while making all compilers happy.
+    */
+#   define MAX_ROWID  (i64)( (((u64)0x7fffffff)<<32) | (u64)0xffffffff )
+#endif
+
+    if( !pC->useRandomRowid ){
+      rc = sqlite3BtreeLast(pC->pCursor, &res);
+      if( rc!=SQLITE_OK ){
+        goto abort_due_to_error;
+      }
+      if( res ){
+        v = 1;   /* IMP: R-61914-48074 */
+      }else{
+        assert( sqlite3BtreeCursorIsValid(pC->pCursor) );
+        rc = sqlite3BtreeKeySize(pC->pCursor, &v);
+        assert( rc==SQLITE_OK );   /* Cannot fail following BtreeLast() */
+        if( v>=MAX_ROWID ){
+          pC->useRandomRowid = 1;
+        }else{
+          v++;   /* IMP: R-29538-34987 */
+        }
+      }
+    }
+
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+    if( pOp->p3 ){
+      /* Assert that P3 is a valid memory cell. */
+      assert( pOp->p3>0 );
+      if( p->pFrame ){
+        for(pFrame=p->pFrame; pFrame->pParent; pFrame=pFrame->pParent);
+        /* Assert that P3 is a valid memory cell. */
+        assert( pOp->p3<=pFrame->nMem );
+        pMem = &pFrame->aMem[pOp->p3];
+      }else{
+        /* Assert that P3 is a valid memory cell. */
+        assert( pOp->p3<=(p->nMem-p->nCursor) );
+        pMem = &aMem[pOp->p3];
+        memAboutToChange(p, pMem);
+      }
+      assert( memIsValid(pMem) );
+
+      REGISTER_TRACE(pOp->p3, pMem);
+      sqlite3VdbeMemIntegerify(pMem);
+      assert( (pMem->flags & MEM_Int)!=0 );  /* mem(P3) holds an integer */
+      if( pMem->u.i==MAX_ROWID || pC->useRandomRowid ){
+        rc = SQLITE_FULL;   /* IMP: R-12275-61338 */
+        goto abort_due_to_error;
+      }
+      if( v<pMem->u.i+1 ){
+        v = pMem->u.i + 1;
+      }
+      pMem->u.i = v;
+    }
+#endif
+    if( pC->useRandomRowid ){
+      /* IMPLEMENTATION-OF: R-07677-41881 If the largest ROWID is equal to the
+      ** largest possible integer (9223372036854775807) then the database
+      ** engine starts picking positive candidate ROWIDs at random until
+      ** it finds one that is not previously used. */
+      assert( pOp->p3==0 );  /* We cannot be in random rowid mode if this is
+                             ** an AUTOINCREMENT table. */
+      cnt = 0;
+      do{
+        sqlite3_randomness(sizeof(v), &v);
+        v &= (MAX_ROWID>>1); v++;  /* Ensure that v is greater than zero */
+      }while(  ((rc = sqlite3BtreeMovetoUnpacked(pC->pCursor, 0, (u64)v,
+                                                 0, &res))==SQLITE_OK)
+            && (res==0)
+            && (++cnt<100));
+      if( rc==SQLITE_OK && res==0 ){
+        rc = SQLITE_FULL;   /* IMP: R-38219-53002 */
+        goto abort_due_to_error;
+      }
+      assert( v>0 );  /* EV: R-40812-03570 */
+    }
+    pC->deferredMoveto = 0;
+    pC->cacheStatus = CACHE_STALE;
+  }
+  pOut->u.i = v;
+  break;
+}
+
+/* Opcode: Insert P1 P2 P3 P4 P5
+** Synopsis: intkey=r[P3] data=r[P2]
+**
+** Write an entry into the table of cursor P1.  A new entry is
+** created if it doesn't already exist or the data for an existing
+** entry is overwritten.  The data is the value MEM_Blob stored in register
+** number P2. The key is stored in register P3. The key must
+** be a MEM_Int.
+**
+** If the OPFLAG_NCHANGE flag of P5 is set, then the row change count is
+** incremented (otherwise not).  If the OPFLAG_LASTROWID flag of P5 is set,
+** then rowid is stored for subsequent return by the
+** sqlite3_last_insert_rowid() function (otherwise it is unmodified).
+**
+** If the OPFLAG_USESEEKRESULT flag of P5 is set and if the result of
+** the last seek operation (OP_NotExists) was a success, then this
+** operation will not attempt to find the appropriate row before doing
+** the insert but will instead overwrite the row that the cursor is
+** currently pointing to.  Presumably, the prior OP_NotExists opcode
+** has already positioned the cursor correctly.  This is an optimization
+** that boosts performance by avoiding redundant seeks.
+**
+** If the OPFLAG_ISUPDATE flag is set, then this opcode is part of an
+** UPDATE operation.  Otherwise (if the flag is clear) then this opcode
+** is part of an INSERT operation.  The difference is only important to
+** the update hook.
+**
+** Parameter P4 may point to a string containing the table-name, or
+** may be NULL. If it is not NULL, then the update-hook 
+** (sqlite3.xUpdateCallback) is invoked following a successful insert.
+**
+** (WARNING/TODO: If P1 is a pseudo-cursor and P2 is dynamically
+** allocated, then ownership of P2 is transferred to the pseudo-cursor
+** and register P2 becomes ephemeral.  If the cursor is changed, the
+** value of register P2 will then change.  Make sure this does not
+** cause any problems.)
+**
+** This instruction only works on tables.  The equivalent instruction
+** for indices is OP_IdxInsert.
+*/
+/* Opcode: InsertInt P1 P2 P3 P4 P5
+** Synopsis:  intkey=P3 data=r[P2]
+**
+** This works exactly like OP_Insert except that the key is the
+** integer value P3, not the value of the integer stored in register P3.
+*/
+case OP_Insert: 
+case OP_InsertInt: {
+  Mem *pData;       /* MEM cell holding data for the record to be inserted */
+  Mem *pKey;        /* MEM cell holding key  for the record */
+  i64 iKey;         /* The integer ROWID or key for the record to be inserted */
+  VdbeCursor *pC;   /* Cursor to table into which insert is written */
+  int nZero;        /* Number of zero-bytes to append */
+  int seekResult;   /* Result of prior seek or 0 if no USESEEKRESULT flag */
+  const char *zDb;  /* database name - used by the update hook */
+  const char *zTbl; /* Table name - used by the opdate hook */
+  int op;           /* Opcode for update hook: SQLITE_UPDATE or SQLITE_INSERT */
+
+  pData = &aMem[pOp->p2];
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  assert( memIsValid(pData) );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( pC->pCursor!=0 );
+  assert( pC->pseudoTableReg==0 );
+  assert( pC->isTable );
+  REGISTER_TRACE(pOp->p2, pData);
+
+  if( pOp->opcode==OP_Insert ){
+    pKey = &aMem[pOp->p3];
+    assert( pKey->flags & MEM_Int );
+    assert( memIsValid(pKey) );
+    REGISTER_TRACE(pOp->p3, pKey);
+    iKey = pKey->u.i;
+  }else{
+    assert( pOp->opcode==OP_InsertInt );
+    iKey = pOp->p3;
+  }
+
+  if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++;
+  if( pOp->p5 & OPFLAG_LASTROWID ) db->lastRowid = lastRowid = iKey;
+  if( pData->flags & MEM_Null ){
+    pData->z = 0;
+    pData->n = 0;
+  }else{
+    assert( pData->flags & (MEM_Blob|MEM_Str) );
+  }
+  seekResult = ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0);
+  if( pData->flags & MEM_Zero ){
+    nZero = pData->u.nZero;
+  }else{
+    nZero = 0;
+  }
+  rc = sqlite3BtreeInsert(pC->pCursor, 0, iKey,
+                          pData->z, pData->n, nZero,
+                          (pOp->p5 & OPFLAG_APPEND)!=0, seekResult
+  );
+  pC->deferredMoveto = 0;
+  pC->cacheStatus = CACHE_STALE;
+
+  /* Invoke the update-hook if required. */
+  if( rc==SQLITE_OK && db->xUpdateCallback && pOp->p4.z ){
+    zDb = db->aDb[pC->iDb].zName;
+    zTbl = pOp->p4.z;
+    op = ((pOp->p5 & OPFLAG_ISUPDATE) ? SQLITE_UPDATE : SQLITE_INSERT);
+    assert( pC->isTable );
+    db->xUpdateCallback(db->pUpdateArg, op, zDb, zTbl, iKey);
+    assert( pC->iDb>=0 );
+  }
+  break;
+}
+
+/* Opcode: Delete P1 P2 * P4 *
+**
+** Delete the record at which the P1 cursor is currently pointing.
+**
+** The cursor will be left pointing at either the next or the previous
+** record in the table. If it is left pointing at the next record, then
+** the next Next instruction will be a no-op.  Hence it is OK to delete
+** a record from within a Next loop.
+**
+** If the OPFLAG_NCHANGE flag of P2 is set, then the row change count is
+** incremented (otherwise not).
+**
+** P1 must not be pseudo-table.  It has to be a real table with
+** multiple rows.
+**
+** If P4 is not NULL, then it is the name of the table that P1 is
+** pointing to.  The update hook will be invoked, if it exists.
+** If P4 is not NULL then the P1 cursor must have been positioned
+** using OP_NotFound prior to invoking this opcode.
+*/
+case OP_Delete: {
+  VdbeCursor *pC;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( pC->pCursor!=0 );  /* Only valid for real tables, no pseudotables */
+  assert( pC->deferredMoveto==0 );
+
+#ifdef SQLITE_DEBUG
+  /* The seek operation that positioned the cursor prior to OP_Delete will
+  ** have also set the pC->movetoTarget field to the rowid of the row that
+  ** is being deleted */
+  if( pOp->p4.z && pC->isTable ){
+    i64 iKey = 0;
+    sqlite3BtreeKeySize(pC->pCursor, &iKey);
+    assert( pC->movetoTarget==iKey ); 
+  }
+#endif
+ 
+  rc = sqlite3BtreeDelete(pC->pCursor);
+  pC->cacheStatus = CACHE_STALE;
+
+  /* Invoke the update-hook if required. */
+  if( rc==SQLITE_OK && db->xUpdateCallback && pOp->p4.z && pC->isTable ){
+    db->xUpdateCallback(db->pUpdateArg, SQLITE_DELETE,
+                        db->aDb[pC->iDb].zName, pOp->p4.z, pC->movetoTarget);
+    assert( pC->iDb>=0 );
+  }
+  if( pOp->p2 & OPFLAG_NCHANGE ) p->nChange++;
+  break;
+}
+/* Opcode: ResetCount * * * * *
+**
+** The value of the change counter is copied to the database handle
+** change counter (returned by subsequent calls to sqlite3_changes()).
+** Then the VMs internal change counter resets to 0.
+** This is used by trigger programs.
+*/
+case OP_ResetCount: {
+  sqlite3VdbeSetChanges(db, p->nChange);
+  p->nChange = 0;
+  break;
+}
+
+/* Opcode: SorterCompare P1 P2 P3 P4
+** Synopsis:  if key(P1)!=trim(r[P3],P4) goto P2
+**
+** P1 is a sorter cursor. This instruction compares a prefix of the
+** record blob in register P3 against a prefix of the entry that 
+** the sorter cursor currently points to.  Only the first P4 fields
+** of r[P3] and the sorter record are compared.
+**
+** If either P3 or the sorter contains a NULL in one of their significant
+** fields (not counting the P4 fields at the end which are ignored) then
+** the comparison is assumed to be equal.
+**
+** Fall through to next instruction if the two records compare equal to
+** each other.  Jump to P2 if they are different.
+*/
+case OP_SorterCompare: {
+  VdbeCursor *pC;
+  int res;
+  int nKeyCol;
+
+  pC = p->apCsr[pOp->p1];
+  assert( isSorter(pC) );
+  assert( pOp->p4type==P4_INT32 );
+  pIn3 = &aMem[pOp->p3];
+  nKeyCol = pOp->p4.i;
+  res = 0;
+  rc = sqlite3VdbeSorterCompare(pC, pIn3, nKeyCol, &res);
+  VdbeBranchTaken(res!=0,2);
+  if( res ){
+    pc = pOp->p2-1;
+  }
+  break;
+};
+
+/* Opcode: SorterData P1 P2 P3 * *
+** Synopsis: r[P2]=data
+**
+** Write into register P2 the current sorter data for sorter cursor P1.
+** Then clear the column header cache on cursor P3.
+**
+** This opcode is normally use to move a record out of the sorter and into
+** a register that is the source for a pseudo-table cursor created using
+** OpenPseudo.  That pseudo-table cursor is the one that is identified by
+** parameter P3.  Clearing the P3 column cache as part of this opcode saves
+** us from having to issue a separate NullRow instruction to clear that cache.
+*/
+case OP_SorterData: {
+  VdbeCursor *pC;
+
+  pOut = &aMem[pOp->p2];
+  pC = p->apCsr[pOp->p1];
+  assert( isSorter(pC) );
+  rc = sqlite3VdbeSorterRowkey(pC, pOut);
+  assert( rc!=SQLITE_OK || (pOut->flags & MEM_Blob) );
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  p->apCsr[pOp->p3]->cacheStatus = CACHE_STALE;
+  break;
+}
+
+/* Opcode: RowData P1 P2 * * *
+** Synopsis: r[P2]=data
+**
+** Write into register P2 the complete row data for cursor P1.
+** There is no interpretation of the data.  
+** It is just copied onto the P2 register exactly as 
+** it is found in the database file.
+**
+** If the P1 cursor must be pointing to a valid row (not a NULL row)
+** of a real table, not a pseudo-table.
+*/
+/* Opcode: RowKey P1 P2 * * *
+** Synopsis: r[P2]=key
+**
+** Write into register P2 the complete row key for cursor P1.
+** There is no interpretation of the data.  
+** The key is copied onto the P2 register exactly as 
+** it is found in the database file.
+**
+** If the P1 cursor must be pointing to a valid row (not a NULL row)
+** of a real table, not a pseudo-table.
+*/
+case OP_RowKey:
+case OP_RowData: {
+  VdbeCursor *pC;
+  BtCursor *pCrsr;
+  u32 n;
+  i64 n64;
+
+  pOut = &aMem[pOp->p2];
+  memAboutToChange(p, pOut);
+
+  /* Note that RowKey and RowData are really exactly the same instruction */
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( isSorter(pC)==0 );
+  assert( pC->isTable || pOp->opcode!=OP_RowData );
+  assert( pC->isTable==0 || pOp->opcode==OP_RowData );
+  assert( pC!=0 );
+  assert( pC->nullRow==0 );
+  assert( pC->pseudoTableReg==0 );
+  assert( pC->pCursor!=0 );
+  pCrsr = pC->pCursor;
+
+  /* The OP_RowKey and OP_RowData opcodes always follow OP_NotExists or
+  ** OP_Rewind/Op_Next with no intervening instructions that might invalidate
+  ** the cursor.  If this where not the case, on of the following assert()s
+  ** would fail.  Should this ever change (because of changes in the code
+  ** generator) then the fix would be to insert a call to
+  ** sqlite3VdbeCursorMoveto().
+  */
+  assert( pC->deferredMoveto==0 );
+  assert( sqlite3BtreeCursorIsValid(pCrsr) );
+#if 0  /* Not required due to the previous to assert() statements */
+  rc = sqlite3VdbeCursorMoveto(pC);
+  if( rc!=SQLITE_OK ) goto abort_due_to_error;
+#endif
+
+  if( pC->isTable==0 ){
+    assert( !pC->isTable );
+    VVA_ONLY(rc =) sqlite3BtreeKeySize(pCrsr, &n64);
+    assert( rc==SQLITE_OK );    /* True because of CursorMoveto() call above */
+    if( n64>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+      goto too_big;
+    }
+    n = (u32)n64;
+  }else{
+    VVA_ONLY(rc =) sqlite3BtreeDataSize(pCrsr, &n);
+    assert( rc==SQLITE_OK );    /* DataSize() cannot fail */
+    if( n>(u32)db->aLimit[SQLITE_LIMIT_LENGTH] ){
+      goto too_big;
+    }
+  }
+  testcase( n==0 );
+  if( sqlite3VdbeMemClearAndResize(pOut, MAX(n,32)) ){
+    goto no_mem;
+  }
+  pOut->n = n;
+  MemSetTypeFlag(pOut, MEM_Blob);
+  if( pC->isTable==0 ){
+    rc = sqlite3BtreeKey(pCrsr, 0, n, pOut->z);
+  }else{
+    rc = sqlite3BtreeData(pCrsr, 0, n, pOut->z);
+  }
+  pOut->enc = SQLITE_UTF8;  /* In case the blob is ever cast to text */
+  UPDATE_MAX_BLOBSIZE(pOut);
+  REGISTER_TRACE(pOp->p2, pOut);
+  break;
+}
+
+/* Opcode: Rowid P1 P2 * * *
+** Synopsis: r[P2]=rowid
+**
+** Store in register P2 an integer which is the key of the table entry that
+** P1 is currently point to.
+**
+** P1 can be either an ordinary table or a virtual table.  There used to
+** be a separate OP_VRowid opcode for use with virtual tables, but this
+** one opcode now works for both table types.
+*/
+case OP_Rowid: {                 /* out2-prerelease */
+  VdbeCursor *pC;
+  i64 v;
+  sqlite3_vtab *pVtab;
+  const sqlite3_module *pModule;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( pC->pseudoTableReg==0 || pC->nullRow );
+  if( pC->nullRow ){
+    pOut->flags = MEM_Null;
+    break;
+  }else if( pC->deferredMoveto ){
+    v = pC->movetoTarget;
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  }else if( pC->pVtabCursor ){
+    pVtab = pC->pVtabCursor->pVtab;
+    pModule = pVtab->pModule;
+    assert( pModule->xRowid );
+    rc = pModule->xRowid(pC->pVtabCursor, &v);
+    sqlite3VtabImportErrmsg(p, pVtab);
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+  }else{
+    assert( pC->pCursor!=0 );
+    rc = sqlite3VdbeCursorRestore(pC);
+    if( rc ) goto abort_due_to_error;
+    if( pC->nullRow ){
+      pOut->flags = MEM_Null;
+      break;
+    }
+    rc = sqlite3BtreeKeySize(pC->pCursor, &v);
+    assert( rc==SQLITE_OK );  /* Always so because of CursorRestore() above */
+  }
+  pOut->u.i = v;
+  break;
+}
+
+/* Opcode: NullRow P1 * * * *
+**
+** Move the cursor P1 to a null row.  Any OP_Column operations
+** that occur while the cursor is on the null row will always
+** write a NULL.
+*/
+case OP_NullRow: {
+  VdbeCursor *pC;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  pC->nullRow = 1;
+  pC->cacheStatus = CACHE_STALE;
+  if( pC->pCursor ){
+    sqlite3BtreeClearCursor(pC->pCursor);
+  }
+  break;
+}
+
+/* Opcode: Last P1 P2 * * *
+**
+** The next use of the Rowid or Column or Prev instruction for P1 
+** will refer to the last entry in the database table or index.
+** If the table or index is empty and P2>0, then jump immediately to P2.
+** If P2 is 0 or if the table or index is not empty, fall through
+** to the following instruction.
+**
+** This opcode leaves the cursor configured to move in reverse order,
+** from the end toward the beginning.  In other words, the cursor is
+** configured to use Prev, not Next.
+*/
+case OP_Last: {        /* jump */
+  VdbeCursor *pC;
+  BtCursor *pCrsr;
+  int res;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  pCrsr = pC->pCursor;
+  res = 0;
+  assert( pCrsr!=0 );
+  rc = sqlite3BtreeLast(pCrsr, &res);
+  pC->nullRow = (u8)res;
+  pC->deferredMoveto = 0;
+  pC->cacheStatus = CACHE_STALE;
+#ifdef SQLITE_DEBUG
+  pC->seekOp = OP_Last;
+#endif
+  if( pOp->p2>0 ){
+    VdbeBranchTaken(res!=0,2);
+    if( res ) pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+
+/* Opcode: Sort P1 P2 * * *
+**
+** This opcode does exactly the same thing as OP_Rewind except that
+** it increments an undocumented global variable used for testing.
+**
+** Sorting is accomplished by writing records into a sorting index,
+** then rewinding that index and playing it back from beginning to
+** end.  We use the OP_Sort opcode instead of OP_Rewind to do the
+** rewinding so that the global variable will be incremented and
+** regression tests can determine whether or not the optimizer is
+** correctly optimizing out sorts.
+*/
+case OP_SorterSort:    /* jump */
+case OP_Sort: {        /* jump */
+#ifdef SQLITE_TEST
+  sqlite3_sort_count++;
+  sqlite3_search_count--;
+#endif
+  p->aCounter[SQLITE_STMTSTATUS_SORT]++;
+  /* Fall through into OP_Rewind */
+}
+/* Opcode: Rewind P1 P2 * * *
+**
+** The next use of the Rowid or Column or Next instruction for P1 
+** will refer to the first entry in the database table or index.
+** If the table or index is empty, jump immediately to P2.
+** If the table or index is not empty, fall through to the following 
+** instruction.
+**
+** This opcode leaves the cursor configured to move in forward order,
+** from the beginning toward the end.  In other words, the cursor is
+** configured to use Next, not Prev.
+*/
+case OP_Rewind: {        /* jump */
+  VdbeCursor *pC;
+  BtCursor *pCrsr;
+  int res;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( isSorter(pC)==(pOp->opcode==OP_SorterSort) );
+  res = 1;
+#ifdef SQLITE_DEBUG
+  pC->seekOp = OP_Rewind;
+#endif
+  if( isSorter(pC) ){
+    rc = sqlite3VdbeSorterRewind(pC, &res);
+  }else{
+    pCrsr = pC->pCursor;
+    assert( pCrsr );
+    rc = sqlite3BtreeFirst(pCrsr, &res);
+    pC->deferredMoveto = 0;
+    pC->cacheStatus = CACHE_STALE;
+  }
+  pC->nullRow = (u8)res;
+  assert( pOp->p2>0 && pOp->p2<p->nOp );
+  VdbeBranchTaken(res!=0,2);
+  if( res ){
+    pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: Next P1 P2 P3 P4 P5
+**
+** Advance cursor P1 so that it points to the next key/data pair in its
+** table or index.  If there are no more key/value pairs then fall through
+** to the following instruction.  But if the cursor advance was successful,
+** jump immediately to P2.
+**
+** The Next opcode is only valid following an SeekGT, SeekGE, or
+** OP_Rewind opcode used to position the cursor.  Next is not allowed
+** to follow SeekLT, SeekLE, or OP_Last.
+**
+** The P1 cursor must be for a real table, not a pseudo-table.  P1 must have
+** been opened prior to this opcode or the program will segfault.
+**
+** The P3 value is a hint to the btree implementation. If P3==1, that
+** means P1 is an SQL index and that this instruction could have been
+** omitted if that index had been unique.  P3 is usually 0.  P3 is
+** always either 0 or 1.
+**
+** P4 is always of type P4_ADVANCE. The function pointer points to
+** sqlite3BtreeNext().
+**
+** If P5 is positive and the jump is taken, then event counter
+** number P5-1 in the prepared statement is incremented.
+**
+** See also: Prev, NextIfOpen
+*/
+/* Opcode: NextIfOpen P1 P2 P3 P4 P5
+**
+** This opcode works just like Next except that if cursor P1 is not
+** open it behaves a no-op.
+*/
+/* Opcode: Prev P1 P2 P3 P4 P5
+**
+** Back up cursor P1 so that it points to the previous key/data pair in its
+** table or index.  If there is no previous key/value pairs then fall through
+** to the following instruction.  But if the cursor backup was successful,
+** jump immediately to P2.
+**
+**
+** The Prev opcode is only valid following an SeekLT, SeekLE, or
+** OP_Last opcode used to position the cursor.  Prev is not allowed
+** to follow SeekGT, SeekGE, or OP_Rewind.
+**
+** The P1 cursor must be for a real table, not a pseudo-table.  If P1 is
+** not open then the behavior is undefined.
+**
+** The P3 value is a hint to the btree implementation. If P3==1, that
+** means P1 is an SQL index and that this instruction could have been
+** omitted if that index had been unique.  P3 is usually 0.  P3 is
+** always either 0 or 1.
+**
+** P4 is always of type P4_ADVANCE. The function pointer points to
+** sqlite3BtreePrevious().
+**
+** If P5 is positive and the jump is taken, then event counter
+** number P5-1 in the prepared statement is incremented.
+*/
+/* Opcode: PrevIfOpen P1 P2 P3 P4 P5
+**
+** This opcode works just like Prev except that if cursor P1 is not
+** open it behaves a no-op.
+*/
+case OP_SorterNext: {  /* jump */
+  VdbeCursor *pC;
+  int res;
+
+  pC = p->apCsr[pOp->p1];
+  assert( isSorter(pC) );
+  res = 0;
+  rc = sqlite3VdbeSorterNext(db, pC, &res);
+  goto next_tail;
+case OP_PrevIfOpen:    /* jump */
+case OP_NextIfOpen:    /* jump */
+  if( p->apCsr[pOp->p1]==0 ) break;
+  /* Fall through */
+case OP_Prev:          /* jump */
+case OP_Next:          /* jump */
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  assert( pOp->p5<ArraySize(p->aCounter) );
+  pC = p->apCsr[pOp->p1];
+  res = pOp->p3;
+  assert( pC!=0 );
+  assert( pC->deferredMoveto==0 );
+  assert( pC->pCursor );
+  assert( res==0 || (res==1 && pC->isTable==0) );
+  testcase( res==1 );
+  assert( pOp->opcode!=OP_Next || pOp->p4.xAdvance==sqlite3BtreeNext );
+  assert( pOp->opcode!=OP_Prev || pOp->p4.xAdvance==sqlite3BtreePrevious );
+  assert( pOp->opcode!=OP_NextIfOpen || pOp->p4.xAdvance==sqlite3BtreeNext );
+  assert( pOp->opcode!=OP_PrevIfOpen || pOp->p4.xAdvance==sqlite3BtreePrevious);
+
+  /* The Next opcode is only used after SeekGT, SeekGE, and Rewind.
+  ** The Prev opcode is only used after SeekLT, SeekLE, and Last. */
+  assert( pOp->opcode!=OP_Next || pOp->opcode!=OP_NextIfOpen
+       || pC->seekOp==OP_SeekGT || pC->seekOp==OP_SeekGE
+       || pC->seekOp==OP_Rewind || pC->seekOp==OP_Found);
+  assert( pOp->opcode!=OP_Prev || pOp->opcode!=OP_PrevIfOpen
+       || pC->seekOp==OP_SeekLT || pC->seekOp==OP_SeekLE
+       || pC->seekOp==OP_Last );
+
+  rc = pOp->p4.xAdvance(pC->pCursor, &res);
+next_tail:
+  pC->cacheStatus = CACHE_STALE;
+  VdbeBranchTaken(res==0,2);
+  if( res==0 ){
+    pC->nullRow = 0;
+    pc = pOp->p2 - 1;
+    p->aCounter[pOp->p5]++;
+#ifdef SQLITE_TEST
+    sqlite3_search_count++;
+#endif
+  }else{
+    pC->nullRow = 1;
+  }
+  goto check_for_interrupt;
+}
+
+/* Opcode: IdxInsert P1 P2 P3 * P5
+** Synopsis: key=r[P2]
+**
+** Register P2 holds an SQL index key made using the
+** MakeRecord instructions.  This opcode writes that key
+** into the index P1.  Data for the entry is nil.
+**
+** P3 is a flag that provides a hint to the b-tree layer that this
+** insert is likely to be an append.
+**
+** If P5 has the OPFLAG_NCHANGE bit set, then the change counter is
+** incremented by this instruction.  If the OPFLAG_NCHANGE bit is clear,
+** then the change counter is unchanged.
+**
+** If P5 has the OPFLAG_USESEEKRESULT bit set, then the cursor must have
+** just done a seek to the spot where the new entry is to be inserted.
+** This flag avoids doing an extra seek.
+**
+** This instruction only works for indices.  The equivalent instruction
+** for tables is OP_Insert.
+*/
+case OP_SorterInsert:       /* in2 */
+case OP_IdxInsert: {        /* in2 */
+  VdbeCursor *pC;
+  BtCursor *pCrsr;
+  int nKey;
+  const char *zKey;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( isSorter(pC)==(pOp->opcode==OP_SorterInsert) );
+  pIn2 = &aMem[pOp->p2];
+  assert( pIn2->flags & MEM_Blob );
+  pCrsr = pC->pCursor;
+  if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++;
+  assert( pCrsr!=0 );
+  assert( pC->isTable==0 );
+  rc = ExpandBlob(pIn2);
+  if( rc==SQLITE_OK ){
+    if( isSorter(pC) ){
+      rc = sqlite3VdbeSorterWrite(pC, pIn2);
+    }else{
+      nKey = pIn2->n;
+      zKey = pIn2->z;
+      rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, 
+          ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
+          );
+      assert( pC->deferredMoveto==0 );
+      pC->cacheStatus = CACHE_STALE;
+    }
+  }
+  break;
+}
+
+/* Opcode: IdxDelete P1 P2 P3 * *
+** Synopsis: key=r[P2@P3]
+**
+** The content of P3 registers starting at register P2 form
+** an unpacked index key. This opcode removes that entry from the 
+** index opened by cursor P1.
+*/
+case OP_IdxDelete: {
+  VdbeCursor *pC;
+  BtCursor *pCrsr;
+  int res;
+  UnpackedRecord r;
+
+  assert( pOp->p3>0 );
+  assert( pOp->p2>0 && pOp->p2+pOp->p3<=(p->nMem-p->nCursor)+1 );
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  pCrsr = pC->pCursor;
+  assert( pCrsr!=0 );
+  assert( pOp->p5==0 );
+  r.pKeyInfo = pC->pKeyInfo;
+  r.nField = (u16)pOp->p3;
+  r.default_rc = 0;
+  r.aMem = &aMem[pOp->p2];
+#ifdef SQLITE_DEBUG
+  { int i; for(i=0; i<r.nField; i++) assert( memIsValid(&r.aMem[i]) ); }
+#endif
+  rc = sqlite3BtreeMovetoUnpacked(pCrsr, &r, 0, 0, &res);
+  if( rc==SQLITE_OK && res==0 ){
+    rc = sqlite3BtreeDelete(pCrsr);
+  }
+  assert( pC->deferredMoveto==0 );
+  pC->cacheStatus = CACHE_STALE;
+  break;
+}
+
+/* Opcode: IdxRowid P1 P2 * * *
+** Synopsis: r[P2]=rowid
+**
+** Write into register P2 an integer which is the last entry in the record at
+** the end of the index key pointed to by cursor P1.  This integer should be
+** the rowid of the table entry to which this index entry points.
+**
+** See also: Rowid, MakeRecord.
+*/
+case OP_IdxRowid: {              /* out2-prerelease */
+  BtCursor *pCrsr;
+  VdbeCursor *pC;
+  i64 rowid;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  pCrsr = pC->pCursor;
+  assert( pCrsr!=0 );
+  pOut->flags = MEM_Null;
+  assert( pC->isTable==0 );
+  assert( pC->deferredMoveto==0 );
+
+  /* sqlite3VbeCursorRestore() can only fail if the record has been deleted
+  ** out from under the cursor.  That will never happend for an IdxRowid
+  ** opcode, hence the NEVER() arround the check of the return value.
+  */
+  rc = sqlite3VdbeCursorRestore(pC);
+  if( NEVER(rc!=SQLITE_OK) ) goto abort_due_to_error;
+
+  if( !pC->nullRow ){
+    rowid = 0;  /* Not needed.  Only used to silence a warning. */
+    rc = sqlite3VdbeIdxRowid(db, pCrsr, &rowid);
+    if( rc!=SQLITE_OK ){
+      goto abort_due_to_error;
+    }
+    pOut->u.i = rowid;
+    pOut->flags = MEM_Int;
+  }
+  break;
+}
+
+/* Opcode: IdxGE P1 P2 P3 P4 P5
+** Synopsis: key=r[P3@P4]
+**
+** The P4 register values beginning with P3 form an unpacked index 
+** key that omits the PRIMARY KEY.  Compare this key value against the index 
+** that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID 
+** fields at the end.
+**
+** If the P1 index entry is greater than or equal to the key value
+** then jump to P2.  Otherwise fall through to the next instruction.
+*/
+/* Opcode: IdxGT P1 P2 P3 P4 P5
+** Synopsis: key=r[P3@P4]
+**
+** The P4 register values beginning with P3 form an unpacked index 
+** key that omits the PRIMARY KEY.  Compare this key value against the index 
+** that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID 
+** fields at the end.
+**
+** If the P1 index entry is greater than the key value
+** then jump to P2.  Otherwise fall through to the next instruction.
+*/
+/* Opcode: IdxLT P1 P2 P3 P4 P5
+** Synopsis: key=r[P3@P4]
+**
+** The P4 register values beginning with P3 form an unpacked index 
+** key that omits the PRIMARY KEY or ROWID.  Compare this key value against
+** the index that P1 is currently pointing to, ignoring the PRIMARY KEY or
+** ROWID on the P1 index.
+**
+** If the P1 index entry is less than the key value then jump to P2.
+** Otherwise fall through to the next instruction.
+*/
+/* Opcode: IdxLE P1 P2 P3 P4 P5
+** Synopsis: key=r[P3@P4]
+**
+** The P4 register values beginning with P3 form an unpacked index 
+** key that omits the PRIMARY KEY or ROWID.  Compare this key value against
+** the index that P1 is currently pointing to, ignoring the PRIMARY KEY or
+** ROWID on the P1 index.
+**
+** If the P1 index entry is less than or equal to the key value then jump
+** to P2. Otherwise fall through to the next instruction.
+*/
+case OP_IdxLE:          /* jump */
+case OP_IdxGT:          /* jump */
+case OP_IdxLT:          /* jump */
+case OP_IdxGE:  {       /* jump */
+  VdbeCursor *pC;
+  int res;
+  UnpackedRecord r;
+
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  assert( pC->isOrdered );
+  assert( pC->pCursor!=0);
+  assert( pC->deferredMoveto==0 );
+  assert( pOp->p5==0 || pOp->p5==1 );
+  assert( pOp->p4type==P4_INT32 );
+  r.pKeyInfo = pC->pKeyInfo;
+  r.nField = (u16)pOp->p4.i;
+  if( pOp->opcode<OP_IdxLT ){
+    assert( pOp->opcode==OP_IdxLE || pOp->opcode==OP_IdxGT );
+    r.default_rc = -1;
+  }else{
+    assert( pOp->opcode==OP_IdxGE || pOp->opcode==OP_IdxLT );
+    r.default_rc = 0;
+  }
+  r.aMem = &aMem[pOp->p3];
+#ifdef SQLITE_DEBUG
+  { int i; for(i=0; i<r.nField; i++) assert( memIsValid(&r.aMem[i]) ); }
+#endif
+  res = 0;  /* Not needed.  Only used to silence a warning. */
+  rc = sqlite3VdbeIdxKeyCompare(db, pC, &r, &res);
+  assert( (OP_IdxLE&1)==(OP_IdxLT&1) && (OP_IdxGE&1)==(OP_IdxGT&1) );
+  if( (pOp->opcode&1)==(OP_IdxLT&1) ){
+    assert( pOp->opcode==OP_IdxLE || pOp->opcode==OP_IdxLT );
+    res = -res;
+  }else{
+    assert( pOp->opcode==OP_IdxGE || pOp->opcode==OP_IdxGT );
+    res++;
+  }
+  VdbeBranchTaken(res>0,2);
+  if( res>0 ){
+    pc = pOp->p2 - 1 ;
+  }
+  break;
+}
+
+/* Opcode: Destroy P1 P2 P3 * *
+**
+** Delete an entire database table or index whose root page in the database
+** file is given by P1.
+**
+** The table being destroyed is in the main database file if P3==0.  If
+** P3==1 then the table to be clear is in the auxiliary database file
+** that is used to store tables create using CREATE TEMPORARY TABLE.
+**
+** If AUTOVACUUM is enabled then it is possible that another root page
+** might be moved into the newly deleted root page in order to keep all
+** root pages contiguous at the beginning of the database.  The former
+** value of the root page that moved - its value before the move occurred -
+** is stored in register P2.  If no page 
+** movement was required (because the table being dropped was already 
+** the last one in the database) then a zero is stored in register P2.
+** If AUTOVACUUM is disabled then a zero is stored in register P2.
+**
+** See also: Clear
+*/
+case OP_Destroy: {     /* out2-prerelease */
+  int iMoved;
+  int iCnt;
+  Vdbe *pVdbe;
+  int iDb;
+
+  assert( p->readOnly==0 );
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  iCnt = 0;
+  for(pVdbe=db->pVdbe; pVdbe; pVdbe = pVdbe->pNext){
+    if( pVdbe->magic==VDBE_MAGIC_RUN && pVdbe->bIsReader 
+     && pVdbe->inVtabMethod<2 && pVdbe->pc>=0 
+    ){
+      iCnt++;
+    }
+  }
+#else
+  iCnt = db->nVdbeRead;
+#endif
+  pOut->flags = MEM_Null;
+  if( iCnt>1 ){
+    rc = SQLITE_LOCKED;
+    p->errorAction = OE_Abort;
+  }else{
+    iDb = pOp->p3;
+    assert( iCnt==1 );
+    assert( DbMaskTest(p->btreeMask, iDb) );
+    iMoved = 0;  /* Not needed.  Only to silence a warning. */
+    rc = sqlite3BtreeDropTable(db->aDb[iDb].pBt, pOp->p1, &iMoved);
+    pOut->flags = MEM_Int;
+    pOut->u.i = iMoved;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    if( rc==SQLITE_OK && iMoved!=0 ){
+      sqlite3RootPageMoved(db, iDb, iMoved, pOp->p1);
+      /* All OP_Destroy operations occur on the same btree */
+      assert( resetSchemaOnFault==0 || resetSchemaOnFault==iDb+1 );
+      resetSchemaOnFault = iDb+1;
+    }
+#endif
+  }
+  break;
+}
+
+/* Opcode: Clear P1 P2 P3
+**
+** Delete all contents of the database table or index whose root page
+** in the database file is given by P1.  But, unlike Destroy, do not
+** remove the table or index from the database file.
+**
+** The table being clear is in the main database file if P2==0.  If
+** P2==1 then the table to be clear is in the auxiliary database file
+** that is used to store tables create using CREATE TEMPORARY TABLE.
+**
+** If the P3 value is non-zero, then the table referred to must be an
+** intkey table (an SQL table, not an index). In this case the row change 
+** count is incremented by the number of rows in the table being cleared. 
+** If P3 is greater than zero, then the value stored in register P3 is
+** also incremented by the number of rows in the table being cleared.
+**
+** See also: Destroy
+*/
+case OP_Clear: {
+  int nChange;
+ 
+  nChange = 0;
+  assert( p->readOnly==0 );
+  assert( DbMaskTest(p->btreeMask, pOp->p2) );
+  rc = sqlite3BtreeClearTable(
+      db->aDb[pOp->p2].pBt, pOp->p1, (pOp->p3 ? &nChange : 0)
+  );
+  if( pOp->p3 ){
+    p->nChange += nChange;
+    if( pOp->p3>0 ){
+      assert( memIsValid(&aMem[pOp->p3]) );
+      memAboutToChange(p, &aMem[pOp->p3]);
+      aMem[pOp->p3].u.i += nChange;
+    }
+  }
+  break;
+}
+
+/* Opcode: ResetSorter P1 * * * *
+**
+** Delete all contents from the ephemeral table or sorter
+** that is open on cursor P1.
+**
+** This opcode only works for cursors used for sorting and
+** opened with OP_OpenEphemeral or OP_SorterOpen.
+*/
+case OP_ResetSorter: {
+  VdbeCursor *pC;
+ 
+  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
+  pC = p->apCsr[pOp->p1];
+  assert( pC!=0 );
+  if( pC->pSorter ){
+    sqlite3VdbeSorterReset(db, pC->pSorter);
+  }else{
+    assert( pC->isEphemeral );
+    rc = sqlite3BtreeClearTableOfCursor(pC->pCursor);
+  }
+  break;
+}
+
+/* Opcode: CreateTable P1 P2 * * *
+** Synopsis: r[P2]=root iDb=P1
+**
+** Allocate a new table in the main database file if P1==0 or in the
+** auxiliary database file if P1==1 or in an attached database if
+** P1>1.  Write the root page number of the new table into
+** register P2
+**
+** The difference between a table and an index is this:  A table must
+** have a 4-byte integer key and can have arbitrary data.  An index
+** has an arbitrary key but no data.
+**
+** See also: CreateIndex
+*/
+/* Opcode: CreateIndex P1 P2 * * *
+** Synopsis: r[P2]=root iDb=P1
+**
+** Allocate a new index in the main database file if P1==0 or in the
+** auxiliary database file if P1==1 or in an attached database if
+** P1>1.  Write the root page number of the new table into
+** register P2.
+**
+** See documentation on OP_CreateTable for additional information.
+*/
+case OP_CreateIndex:            /* out2-prerelease */
+case OP_CreateTable: {          /* out2-prerelease */
+  int pgno;
+  int flags;
+  Db *pDb;
+
+  pgno = 0;
+  assert( pOp->p1>=0 && pOp->p1<db->nDb );
+  assert( DbMaskTest(p->btreeMask, pOp->p1) );
+  assert( p->readOnly==0 );
+  pDb = &db->aDb[pOp->p1];
+  assert( pDb->pBt!=0 );
+  if( pOp->opcode==OP_CreateTable ){
+    /* flags = BTREE_INTKEY; */
+    flags = BTREE_INTKEY;
+  }else{
+    flags = BTREE_BLOBKEY;
+  }
+  rc = sqlite3BtreeCreateTable(pDb->pBt, &pgno, flags);
+  pOut->u.i = pgno;
+  break;
+}
+
+/* Opcode: ParseSchema P1 * * P4 *
+**
+** Read and parse all entries from the SQLITE_MASTER table of database P1
+** that match the WHERE clause P4. 
+**
+** This opcode invokes the parser to create a new virtual machine,
+** then runs the new virtual machine.  It is thus a re-entrant opcode.
+*/
+case OP_ParseSchema: {
+  int iDb;
+  const char *zMaster;
+  char *zSql;
+  InitData initData;
+
+  /* Any prepared statement that invokes this opcode will hold mutexes
+  ** on every btree.  This is a prerequisite for invoking 
+  ** sqlite3InitCallback().
+  */
+#ifdef SQLITE_DEBUG
+  for(iDb=0; iDb<db->nDb; iDb++){
+    assert( iDb==1 || sqlite3BtreeHoldsMutex(db->aDb[iDb].pBt) );
+  }
+#endif
+
+  iDb = pOp->p1;
+  assert( iDb>=0 && iDb<db->nDb );
+  assert( DbHasProperty(db, iDb, DB_SchemaLoaded) );
+  /* Used to be a conditional */ {
+    zMaster = SCHEMA_TABLE(iDb);
+    initData.db = db;
+    initData.iDb = pOp->p1;
+    initData.pzErrMsg = &p->zErrMsg;
+    zSql = sqlite3MPrintf(db,
+       "SELECT name, rootpage, sql FROM '%q'.%s WHERE %s ORDER BY rowid",
+       db->aDb[iDb].zName, zMaster, pOp->p4.z);
+    if( zSql==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      assert( db->init.busy==0 );
+      db->init.busy = 1;
+      initData.rc = SQLITE_OK;
+      assert( !db->mallocFailed );
+      rc = sqlite3_exec(db, zSql, sqlite3InitCallback, &initData, 0);
+      if( rc==SQLITE_OK ) rc = initData.rc;
+      sqlite3DbFree(db, zSql);
+      db->init.busy = 0;
+    }
+  }
+  if( rc ) sqlite3ResetAllSchemasOfConnection(db);
+  if( rc==SQLITE_NOMEM ){
+    goto no_mem;
+  }
+  break;  
+}
+
+#if !defined(SQLITE_OMIT_ANALYZE)
+/* Opcode: LoadAnalysis P1 * * * *
+**
+** Read the sqlite_stat1 table for database P1 and load the content
+** of that table into the internal index hash table.  This will cause
+** the analysis to be used when preparing all subsequent queries.
+*/
+case OP_LoadAnalysis: {
+  assert( pOp->p1>=0 && pOp->p1<db->nDb );
+  rc = sqlite3AnalysisLoad(db, pOp->p1);
+  break;  
+}
+#endif /* !defined(SQLITE_OMIT_ANALYZE) */
+
+/* Opcode: DropTable P1 * * P4 *
+**
+** Remove the internal (in-memory) data structures that describe
+** the table named P4 in database P1.  This is called after a table
+** is dropped from disk (using the Destroy opcode) in order to keep 
+** the internal representation of the
+** schema consistent with what is on disk.
+*/
+case OP_DropTable: {
+  sqlite3UnlinkAndDeleteTable(db, pOp->p1, pOp->p4.z);
+  break;
+}
+
+/* Opcode: DropIndex P1 * * P4 *
+**
+** Remove the internal (in-memory) data structures that describe
+** the index named P4 in database P1.  This is called after an index
+** is dropped from disk (using the Destroy opcode)
+** in order to keep the internal representation of the
+** schema consistent with what is on disk.
+*/
+case OP_DropIndex: {
+  sqlite3UnlinkAndDeleteIndex(db, pOp->p1, pOp->p4.z);
+  break;
+}
+
+/* Opcode: DropTrigger P1 * * P4 *
+**
+** Remove the internal (in-memory) data structures that describe
+** the trigger named P4 in database P1.  This is called after a trigger
+** is dropped from disk (using the Destroy opcode) in order to keep 
+** the internal representation of the
+** schema consistent with what is on disk.
+*/
+case OP_DropTrigger: {
+  sqlite3UnlinkAndDeleteTrigger(db, pOp->p1, pOp->p4.z);
+  break;
+}
+
+
+#ifndef SQLITE_OMIT_INTEGRITY_CHECK
+/* Opcode: IntegrityCk P1 P2 P3 * P5
+**
+** Do an analysis of the currently open database.  Store in
+** register P1 the text of an error message describing any problems.
+** If no problems are found, store a NULL in register P1.
+**
+** The register P3 contains the maximum number of allowed errors.
+** At most reg(P3) errors will be reported.
+** In other words, the analysis stops as soon as reg(P1) errors are 
+** seen.  Reg(P1) is updated with the number of errors remaining.
+**
+** The root page numbers of all tables in the database are integer
+** stored in reg(P1), reg(P1+1), reg(P1+2), ....  There are P2 tables
+** total.
+**
+** If P5 is not zero, the check is done on the auxiliary database
+** file, not the main database file.
+**
+** This opcode is used to implement the integrity_check pragma.
+*/
+case OP_IntegrityCk: {
+  int nRoot;      /* Number of tables to check.  (Number of root pages.) */
+  int *aRoot;     /* Array of rootpage numbers for tables to be checked */
+  int j;          /* Loop counter */
+  int nErr;       /* Number of errors reported */
+  char *z;        /* Text of the error report */
+  Mem *pnErr;     /* Register keeping track of errors remaining */
+
+  assert( p->bIsReader );
+  nRoot = pOp->p2;
+  assert( nRoot>0 );
+  aRoot = sqlite3DbMallocRaw(db, sizeof(int)*(nRoot+1) );
+  if( aRoot==0 ) goto no_mem;
+  assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
+  pnErr = &aMem[pOp->p3];
+  assert( (pnErr->flags & MEM_Int)!=0 );
+  assert( (pnErr->flags & (MEM_Str|MEM_Blob))==0 );
+  pIn1 = &aMem[pOp->p1];
+  for(j=0; j<nRoot; j++){
+    aRoot[j] = (int)sqlite3VdbeIntValue(&pIn1[j]);
+  }
+  aRoot[j] = 0;
+  assert( pOp->p5<db->nDb );
+  assert( DbMaskTest(p->btreeMask, pOp->p5) );
+  z = sqlite3BtreeIntegrityCheck(db->aDb[pOp->p5].pBt, aRoot, nRoot,
+                                 (int)pnErr->u.i, &nErr);
+  sqlite3DbFree(db, aRoot);
+  pnErr->u.i -= nErr;
+  sqlite3VdbeMemSetNull(pIn1);
+  if( nErr==0 ){
+    assert( z==0 );
+  }else if( z==0 ){
+    goto no_mem;
+  }else{
+    sqlite3VdbeMemSetStr(pIn1, z, -1, SQLITE_UTF8, sqlite3_free);
+  }
+  UPDATE_MAX_BLOBSIZE(pIn1);
+  sqlite3VdbeChangeEncoding(pIn1, encoding);
+  break;
+}
+#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
+
+/* Opcode: RowSetAdd P1 P2 * * *
+** Synopsis:  rowset(P1)=r[P2]
+**
+** Insert the integer value held by register P2 into a boolean index
+** held in register P1.
+**
+** An assertion fails if P2 is not an integer.
+*/
+case OP_RowSetAdd: {       /* in1, in2 */
+  pIn1 = &aMem[pOp->p1];
+  pIn2 = &aMem[pOp->p2];
+  assert( (pIn2->flags & MEM_Int)!=0 );
+  if( (pIn1->flags & MEM_RowSet)==0 ){
+    sqlite3VdbeMemSetRowSet(pIn1);
+    if( (pIn1->flags & MEM_RowSet)==0 ) goto no_mem;
+  }
+  sqlite3RowSetInsert(pIn1->u.pRowSet, pIn2->u.i);
+  break;
+}
+
+/* Opcode: RowSetRead P1 P2 P3 * *
+** Synopsis:  r[P3]=rowset(P1)
+**
+** Extract the smallest value from boolean index P1 and put that value into
+** register P3.  Or, if boolean index P1 is initially empty, leave P3
+** unchanged and jump to instruction P2.
+*/
+case OP_RowSetRead: {       /* jump, in1, out3 */
+  i64 val;
+
+  pIn1 = &aMem[pOp->p1];
+  if( (pIn1->flags & MEM_RowSet)==0 
+   || sqlite3RowSetNext(pIn1->u.pRowSet, &val)==0
+  ){
+    /* The boolean index is empty */
+    sqlite3VdbeMemSetNull(pIn1);
+    pc = pOp->p2 - 1;
+    VdbeBranchTaken(1,2);
+  }else{
+    /* A value was pulled from the index */
+    sqlite3VdbeMemSetInt64(&aMem[pOp->p3], val);
+    VdbeBranchTaken(0,2);
+  }
+  goto check_for_interrupt;
+}
+
+/* Opcode: RowSetTest P1 P2 P3 P4
+** Synopsis: if r[P3] in rowset(P1) goto P2
+**
+** Register P3 is assumed to hold a 64-bit integer value. If register P1
+** contains a RowSet object and that RowSet object contains
+** the value held in P3, jump to register P2. Otherwise, insert the
+** integer in P3 into the RowSet and continue on to the
+** next opcode.
+**
+** The RowSet object is optimized for the case where successive sets
+** of integers, where each set contains no duplicates. Each set
+** of values is identified by a unique P4 value. The first set
+** must have P4==0, the final set P4=-1.  P4 must be either -1 or
+** non-negative.  For non-negative values of P4 only the lower 4
+** bits are significant.
+**
+** This allows optimizations: (a) when P4==0 there is no need to test
+** the rowset object for P3, as it is guaranteed not to contain it,
+** (b) when P4==-1 there is no need to insert the value, as it will
+** never be tested for, and (c) when a value that is part of set X is
+** inserted, there is no need to search to see if the same value was
+** previously inserted as part of set X (only if it was previously
+** inserted as part of some other set).
+*/
+case OP_RowSetTest: {                     /* jump, in1, in3 */
+  int iSet;
+  int exists;
+
+  pIn1 = &aMem[pOp->p1];
+  pIn3 = &aMem[pOp->p3];
+  iSet = pOp->p4.i;
+  assert( pIn3->flags&MEM_Int );
+
+  /* If there is anything other than a rowset object in memory cell P1,
+  ** delete it now and initialize P1 with an empty rowset
+  */
+  if( (pIn1->flags & MEM_RowSet)==0 ){
+    sqlite3VdbeMemSetRowSet(pIn1);
+    if( (pIn1->flags & MEM_RowSet)==0 ) goto no_mem;
+  }
+
+  assert( pOp->p4type==P4_INT32 );
+  assert( iSet==-1 || iSet>=0 );
+  if( iSet ){
+    exists = sqlite3RowSetTest(pIn1->u.pRowSet, iSet, pIn3->u.i);
+    VdbeBranchTaken(exists!=0,2);
+    if( exists ){
+      pc = pOp->p2 - 1;
+      break;
+    }
+  }
+  if( iSet>=0 ){
+    sqlite3RowSetInsert(pIn1->u.pRowSet, pIn3->u.i);
+  }
+  break;
+}
+
+
+#ifndef SQLITE_OMIT_TRIGGER
+
+/* Opcode: Program P1 P2 P3 P4 P5
+**
+** Execute the trigger program passed as P4 (type P4_SUBPROGRAM). 
+**
+** P1 contains the address of the memory cell that contains the first memory 
+** cell in an array of values used as arguments to the sub-program. P2 
+** contains the address to jump to if the sub-program throws an IGNORE 
+** exception using the RAISE() function. Register P3 contains the address 
+** of a memory cell in this (the parent) VM that is used to allocate the 
+** memory required by the sub-vdbe at runtime.
+**
+** P4 is a pointer to the VM containing the trigger program.
+**
+** If P5 is non-zero, then recursive program invocation is enabled.
+*/
+case OP_Program: {        /* jump */
+  int nMem;               /* Number of memory registers for sub-program */
+  int nByte;              /* Bytes of runtime space required for sub-program */
+  Mem *pRt;               /* Register to allocate runtime space */
+  Mem *pMem;              /* Used to iterate through memory cells */
+  Mem *pEnd;              /* Last memory cell in new array */
+  VdbeFrame *pFrame;      /* New vdbe frame to execute in */
+  SubProgram *pProgram;   /* Sub-program to execute */
+  void *t;                /* Token identifying trigger */
+
+  pProgram = pOp->p4.pProgram;
+  pRt = &aMem[pOp->p3];
+  assert( pProgram->nOp>0 );
+  
+  /* If the p5 flag is clear, then recursive invocation of triggers is 
+  ** disabled for backwards compatibility (p5 is set if this sub-program
+  ** is really a trigger, not a foreign key action, and the flag set
+  ** and cleared by the "PRAGMA recursive_triggers" command is clear).
+  ** 
+  ** It is recursive invocation of triggers, at the SQL level, that is 
+  ** disabled. In some cases a single trigger may generate more than one 
+  ** SubProgram (if the trigger may be executed with more than one different 
+  ** ON CONFLICT algorithm). SubProgram structures associated with a
+  ** single trigger all have the same value for the SubProgram.token 
+  ** variable.  */
+  if( pOp->p5 ){
+    t = pProgram->token;
+    for(pFrame=p->pFrame; pFrame && pFrame->token!=t; pFrame=pFrame->pParent);
+    if( pFrame ) break;
+  }
+
+  if( p->nFrame>=db->aLimit[SQLITE_LIMIT_TRIGGER_DEPTH] ){
+    rc = SQLITE_ERROR;
+    sqlite3SetString(&p->zErrMsg, db, "too many levels of trigger recursion");
+    break;
+  }
+
+  /* Register pRt is used to store the memory required to save the state
+  ** of the current program, and the memory required at runtime to execute
+  ** the trigger program. If this trigger has been fired before, then pRt 
+  ** is already allocated. Otherwise, it must be initialized.  */
+  if( (pRt->flags&MEM_Frame)==0 ){
+    /* SubProgram.nMem is set to the number of memory cells used by the 
+    ** program stored in SubProgram.aOp. As well as these, one memory
+    ** cell is required for each cursor used by the program. Set local
+    ** variable nMem (and later, VdbeFrame.nChildMem) to this value.
+    */
+    nMem = pProgram->nMem + pProgram->nCsr;
+    nByte = ROUND8(sizeof(VdbeFrame))
+              + nMem * sizeof(Mem)
+              + pProgram->nCsr * sizeof(VdbeCursor *)
+              + pProgram->nOnce * sizeof(u8);
+    pFrame = sqlite3DbMallocZero(db, nByte);
+    if( !pFrame ){
+      goto no_mem;
+    }
+    sqlite3VdbeMemRelease(pRt);
+    pRt->flags = MEM_Frame;
+    pRt->u.pFrame = pFrame;
+
+    pFrame->v = p;
+    pFrame->nChildMem = nMem;
+    pFrame->nChildCsr = pProgram->nCsr;
+    pFrame->pc = pc;
+    pFrame->aMem = p->aMem;
+    pFrame->nMem = p->nMem;
+    pFrame->apCsr = p->apCsr;
+    pFrame->nCursor = p->nCursor;
+    pFrame->aOp = p->aOp;
+    pFrame->nOp = p->nOp;
+    pFrame->token = pProgram->token;
+    pFrame->aOnceFlag = p->aOnceFlag;
+    pFrame->nOnceFlag = p->nOnceFlag;
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+    pFrame->anExec = p->anExec;
+#endif
+
+    pEnd = &VdbeFrameMem(pFrame)[pFrame->nChildMem];
+    for(pMem=VdbeFrameMem(pFrame); pMem!=pEnd; pMem++){
+      pMem->flags = MEM_Undefined;
+      pMem->db = db;
+    }
+  }else{
+    pFrame = pRt->u.pFrame;
+    assert( pProgram->nMem+pProgram->nCsr==pFrame->nChildMem );
+    assert( pProgram->nCsr==pFrame->nChildCsr );
+    assert( pc==pFrame->pc );
+  }
+
+  p->nFrame++;
+  pFrame->pParent = p->pFrame;
+  pFrame->lastRowid = lastRowid;
+  pFrame->nChange = p->nChange;
+  pFrame->nDbChange = p->db->nChange;
+  p->nChange = 0;
+  p->pFrame = pFrame;
+  p->aMem = aMem = &VdbeFrameMem(pFrame)[-1];
+  p->nMem = pFrame->nChildMem;
+  p->nCursor = (u16)pFrame->nChildCsr;
+  p->apCsr = (VdbeCursor **)&aMem[p->nMem+1];
+  p->aOp = aOp = pProgram->aOp;
+  p->nOp = pProgram->nOp;
+  p->aOnceFlag = (u8 *)&p->apCsr[p->nCursor];
+  p->nOnceFlag = pProgram->nOnce;
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  p->anExec = 0;
+#endif
+  pc = -1;
+  memset(p->aOnceFlag, 0, p->nOnceFlag);
+
+  break;
+}
+
+/* Opcode: Param P1 P2 * * *
+**
+** This opcode is only ever present in sub-programs called via the 
+** OP_Program instruction. Copy a value currently stored in a memory 
+** cell of the calling (parent) frame to cell P2 in the current frames 
+** address space. This is used by trigger programs to access the new.* 
+** and old.* values.
+**
+** The address of the cell in the parent frame is determined by adding
+** the value of the P1 argument to the value of the P1 argument to the
+** calling OP_Program instruction.
+*/
+case OP_Param: {           /* out2-prerelease */
+  VdbeFrame *pFrame;
+  Mem *pIn;
+  pFrame = p->pFrame;
+  pIn = &pFrame->aMem[pOp->p1 + pFrame->aOp[pFrame->pc].p1];   
+  sqlite3VdbeMemShallowCopy(pOut, pIn, MEM_Ephem);
+  break;
+}
+
+#endif /* #ifndef SQLITE_OMIT_TRIGGER */
+
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+/* Opcode: FkCounter P1 P2 * * *
+** Synopsis: fkctr[P1]+=P2
+**
+** Increment a "constraint counter" by P2 (P2 may be negative or positive).
+** If P1 is non-zero, the database constraint counter is incremented 
+** (deferred foreign key constraints). Otherwise, if P1 is zero, the 
+** statement counter is incremented (immediate foreign key constraints).
+*/
+case OP_FkCounter: {
+  if( db->flags & SQLITE_DeferFKs ){
+    db->nDeferredImmCons += pOp->p2;
+  }else if( pOp->p1 ){
+    db->nDeferredCons += pOp->p2;
+  }else{
+    p->nFkConstraint += pOp->p2;
+  }
+  break;
+}
+
+/* Opcode: FkIfZero P1 P2 * * *
+** Synopsis: if fkctr[P1]==0 goto P2
+**
+** This opcode tests if a foreign key constraint-counter is currently zero.
+** If so, jump to instruction P2. Otherwise, fall through to the next 
+** instruction.
+**
+** If P1 is non-zero, then the jump is taken if the database constraint-counter
+** is zero (the one that counts deferred constraint violations). If P1 is
+** zero, the jump is taken if the statement constraint-counter is zero
+** (immediate foreign key constraint violations).
+*/
+case OP_FkIfZero: {         /* jump */
+  if( pOp->p1 ){
+    VdbeBranchTaken(db->nDeferredCons==0 && db->nDeferredImmCons==0, 2);
+    if( db->nDeferredCons==0 && db->nDeferredImmCons==0 ) pc = pOp->p2-1;
+  }else{
+    VdbeBranchTaken(p->nFkConstraint==0 && db->nDeferredImmCons==0, 2);
+    if( p->nFkConstraint==0 && db->nDeferredImmCons==0 ) pc = pOp->p2-1;
+  }
+  break;
+}
+#endif /* #ifndef SQLITE_OMIT_FOREIGN_KEY */
+
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+/* Opcode: MemMax P1 P2 * * *
+** Synopsis: r[P1]=max(r[P1],r[P2])
+**
+** P1 is a register in the root frame of this VM (the root frame is
+** different from the current frame if this instruction is being executed
+** within a sub-program). Set the value of register P1 to the maximum of 
+** its current value and the value in register P2.
+**
+** This instruction throws an error if the memory cell is not initially
+** an integer.
+*/
+case OP_MemMax: {        /* in2 */
+  VdbeFrame *pFrame;
+  if( p->pFrame ){
+    for(pFrame=p->pFrame; pFrame->pParent; pFrame=pFrame->pParent);
+    pIn1 = &pFrame->aMem[pOp->p1];
+  }else{
+    pIn1 = &aMem[pOp->p1];
+  }
+  assert( memIsValid(pIn1) );
+  sqlite3VdbeMemIntegerify(pIn1);
+  pIn2 = &aMem[pOp->p2];
+  sqlite3VdbeMemIntegerify(pIn2);
+  if( pIn1->u.i<pIn2->u.i){
+    pIn1->u.i = pIn2->u.i;
+  }
+  break;
+}
+#endif /* SQLITE_OMIT_AUTOINCREMENT */
+
+/* Opcode: IfPos P1 P2 * * *
+** Synopsis: if r[P1]>0 goto P2
+**
+** If the value of register P1 is 1 or greater, jump to P2.
+**
+** It is illegal to use this instruction on a register that does
+** not contain an integer.  An assertion fault will result if you try.
+*/
+case OP_IfPos: {        /* jump, in1 */
+  pIn1 = &aMem[pOp->p1];
+  assert( pIn1->flags&MEM_Int );
+  VdbeBranchTaken( pIn1->u.i>0, 2);
+  if( pIn1->u.i>0 ){
+     pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: IfNeg P1 P2 P3 * *
+** Synopsis: r[P1]+=P3, if r[P1]<0 goto P2
+**
+** Register P1 must contain an integer.  Add literal P3 to the value in
+** register P1 then if the value of register P1 is less than zero, jump to P2. 
+*/
+case OP_IfNeg: {        /* jump, in1 */
+  pIn1 = &aMem[pOp->p1];
+  assert( pIn1->flags&MEM_Int );
+  pIn1->u.i += pOp->p3;
+  VdbeBranchTaken(pIn1->u.i<0, 2);
+  if( pIn1->u.i<0 ){
+     pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: IfZero P1 P2 P3 * *
+** Synopsis: r[P1]+=P3, if r[P1]==0 goto P2
+**
+** The register P1 must contain an integer.  Add literal P3 to the
+** value in register P1.  If the result is exactly 0, jump to P2. 
+*/
+case OP_IfZero: {        /* jump, in1 */
+  pIn1 = &aMem[pOp->p1];
+  assert( pIn1->flags&MEM_Int );
+  pIn1->u.i += pOp->p3;
+  VdbeBranchTaken(pIn1->u.i==0, 2);
+  if( pIn1->u.i==0 ){
+     pc = pOp->p2 - 1;
+  }
+  break;
+}
+
+/* Opcode: AggStep * P2 P3 P4 P5
+** Synopsis: accum=r[P3] step(r[P2@P5])
+**
+** Execute the step function for an aggregate.  The
+** function has P5 arguments.   P4 is a pointer to the FuncDef
+** structure that specifies the function.  Use register
+** P3 as the accumulator.
+**
+** The P5 arguments are taken from register P2 and its
+** successors.
+*/
+case OP_AggStep: {
+  int n;
+  int i;
+  Mem *pMem;
+  Mem *pRec;
+  Mem t;
+  sqlite3_context ctx;
+  sqlite3_value **apVal;
+
+  n = pOp->p5;
+  assert( n>=0 );
+  pRec = &aMem[pOp->p2];
+  apVal = p->apArg;
+  assert( apVal || n==0 );
+  for(i=0; i<n; i++, pRec++){
+    assert( memIsValid(pRec) );
+    apVal[i] = pRec;
+    memAboutToChange(p, pRec);
+  }
+  ctx.pFunc = pOp->p4.pFunc;
+  assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
+  ctx.pMem = pMem = &aMem[pOp->p3];
+  pMem->n++;
+  sqlite3VdbeMemInit(&t, db, MEM_Null);
+  ctx.pOut = &t;
+  ctx.isError = 0;
+  ctx.pVdbe = p;
+  ctx.iOp = pc;
+  ctx.skipFlag = 0;
+  (ctx.pFunc->xStep)(&ctx, n, apVal); /* IMP: R-24505-23230 */
+  if( ctx.isError ){
+    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&t));
+    rc = ctx.isError;
+  }
+  if( ctx.skipFlag ){
+    assert( pOp[-1].opcode==OP_CollSeq );
+    i = pOp[-1].p1;
+    if( i ) sqlite3VdbeMemSetInt64(&aMem[i], 1);
+  }
+  sqlite3VdbeMemRelease(&t);
+  break;
+}
+
+/* Opcode: AggFinal P1 P2 * P4 *
+** Synopsis: accum=r[P1] N=P2
+**
+** Execute the finalizer function for an aggregate.  P1 is
+** the memory location that is the accumulator for the aggregate.
+**
+** P2 is the number of arguments that the step function takes and
+** P4 is a pointer to the FuncDef for this function.  The P2
+** argument is not used by this opcode.  It is only there to disambiguate
+** functions that can take varying numbers of arguments.  The
+** P4 argument is only needed for the degenerate case where
+** the step function was not previously called.
+*/
+case OP_AggFinal: {
+  Mem *pMem;
+  assert( pOp->p1>0 && pOp->p1<=(p->nMem-p->nCursor) );
+  pMem = &aMem[pOp->p1];
+  assert( (pMem->flags & ~(MEM_Null|MEM_Agg))==0 );
+  rc = sqlite3VdbeMemFinalize(pMem, pOp->p4.pFunc);
+  if( rc ){
+    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(pMem));
+  }
+  sqlite3VdbeChangeEncoding(pMem, encoding);
+  UPDATE_MAX_BLOBSIZE(pMem);
+  if( sqlite3VdbeMemTooBig(pMem) ){
+    goto too_big;
+  }
+  break;
+}
+
+#ifndef SQLITE_OMIT_WAL
+/* Opcode: Checkpoint P1 P2 P3 * *
+**
+** Checkpoint database P1. This is a no-op if P1 is not currently in
+** WAL mode. Parameter P2 is one of SQLITE_CHECKPOINT_PASSIVE, FULL,
+** RESTART, or TRUNCATE.  Write 1 or 0 into mem[P3] if the checkpoint returns
+** SQLITE_BUSY or not, respectively.  Write the number of pages in the
+** WAL after the checkpoint into mem[P3+1] and the number of pages
+** in the WAL that have been checkpointed after the checkpoint
+** completes into mem[P3+2].  However on an error, mem[P3+1] and
+** mem[P3+2] are initialized to -1.
+*/
+case OP_Checkpoint: {
+  int i;                          /* Loop counter */
+  int aRes[3];                    /* Results */
+  Mem *pMem;                      /* Write results here */
+
+  assert( p->readOnly==0 );
+  aRes[0] = 0;
+  aRes[1] = aRes[2] = -1;
+  assert( pOp->p2==SQLITE_CHECKPOINT_PASSIVE
+       || pOp->p2==SQLITE_CHECKPOINT_FULL
+       || pOp->p2==SQLITE_CHECKPOINT_RESTART
+       || pOp->p2==SQLITE_CHECKPOINT_TRUNCATE
+  );
+  rc = sqlite3Checkpoint(db, pOp->p1, pOp->p2, &aRes[1], &aRes[2]);
+  if( rc==SQLITE_BUSY ){
+    rc = SQLITE_OK;
+    aRes[0] = 1;
+  }
+  for(i=0, pMem = &aMem[pOp->p3]; i<3; i++, pMem++){
+    sqlite3VdbeMemSetInt64(pMem, (i64)aRes[i]);
+  }    
+  break;
+};  
+#endif
+
+#ifndef SQLITE_OMIT_PRAGMA
+/* Opcode: JournalMode P1 P2 P3 * *
+**
+** Change the journal mode of database P1 to P3. P3 must be one of the
+** PAGER_JOURNALMODE_XXX values. If changing between the various rollback
+** modes (delete, truncate, persist, off and memory), this is a simple
+** operation. No IO is required.
+**
+** If changing into or out of WAL mode the procedure is more complicated.
+**
+** Write a string containing the final journal-mode to register P2.
+*/
+case OP_JournalMode: {    /* out2-prerelease */
+  Btree *pBt;                     /* Btree to change journal mode of */
+  Pager *pPager;                  /* Pager associated with pBt */
+  int eNew;                       /* New journal mode */
+  int eOld;                       /* The old journal mode */
+#ifndef SQLITE_OMIT_WAL
+  const char *zFilename;          /* Name of database file for pPager */
+#endif
+
+  eNew = pOp->p3;
+  assert( eNew==PAGER_JOURNALMODE_DELETE 
+       || eNew==PAGER_JOURNALMODE_TRUNCATE 
+       || eNew==PAGER_JOURNALMODE_PERSIST 
+       || eNew==PAGER_JOURNALMODE_OFF
+       || eNew==PAGER_JOURNALMODE_MEMORY
+       || eNew==PAGER_JOURNALMODE_WAL
+       || eNew==PAGER_JOURNALMODE_QUERY
+  );
+  assert( pOp->p1>=0 && pOp->p1<db->nDb );
+  assert( p->readOnly==0 );
+
+  pBt = db->aDb[pOp->p1].pBt;
+  pPager = sqlite3BtreePager(pBt);
+  eOld = sqlite3PagerGetJournalMode(pPager);
+  if( eNew==PAGER_JOURNALMODE_QUERY ) eNew = eOld;
+  if( !sqlite3PagerOkToChangeJournalMode(pPager) ) eNew = eOld;
+
+#ifndef SQLITE_OMIT_WAL
+  zFilename = sqlite3PagerFilename(pPager, 1);
+
+  /* Do not allow a transition to journal_mode=WAL for a database
+  ** in temporary storage or if the VFS does not support shared memory 
+  */
+  if( eNew==PAGER_JOURNALMODE_WAL
+   && (sqlite3Strlen30(zFilename)==0           /* Temp file */
+       || !sqlite3PagerWalSupported(pPager))   /* No shared-memory support */
+  ){
+    eNew = eOld;
+  }
+
+  if( (eNew!=eOld)
+   && (eOld==PAGER_JOURNALMODE_WAL || eNew==PAGER_JOURNALMODE_WAL)
+  ){
+    if( !db->autoCommit || db->nVdbeRead>1 ){
+      rc = SQLITE_ERROR;
+      sqlite3SetString(&p->zErrMsg, db, 
+          "cannot change %s wal mode from within a transaction",
+          (eNew==PAGER_JOURNALMODE_WAL ? "into" : "out of")
+      );
+      break;
+    }else{
+ 
+      if( eOld==PAGER_JOURNALMODE_WAL ){
+        /* If leaving WAL mode, close the log file. If successful, the call
+        ** to PagerCloseWal() checkpoints and deletes the write-ahead-log 
+        ** file. An EXCLUSIVE lock may still be held on the database file 
+        ** after a successful return. 
+        */
+        rc = sqlite3PagerCloseWal(pPager);
+        if( rc==SQLITE_OK ){
+          sqlite3PagerSetJournalMode(pPager, eNew);
+        }
+      }else if( eOld==PAGER_JOURNALMODE_MEMORY ){
+        /* Cannot transition directly from MEMORY to WAL.  Use mode OFF
+        ** as an intermediate */
+        sqlite3PagerSetJournalMode(pPager, PAGER_JOURNALMODE_OFF);
+      }
+  
+      /* Open a transaction on the database file. Regardless of the journal
+      ** mode, this transaction always uses a rollback journal.
+      */
+      assert( sqlite3BtreeIsInTrans(pBt)==0 );
+      if( rc==SQLITE_OK ){
+        rc = sqlite3BtreeSetVersion(pBt, (eNew==PAGER_JOURNALMODE_WAL ? 2 : 1));
+      }
+    }
+  }
+#endif /* ifndef SQLITE_OMIT_WAL */
+
+  if( rc ){
+    eNew = eOld;
+  }
+  eNew = sqlite3PagerSetJournalMode(pPager, eNew);
+
+  pOut = &aMem[pOp->p2];
+  pOut->flags = MEM_Str|MEM_Static|MEM_Term;
+  pOut->z = (char *)sqlite3JournalModename(eNew);
+  pOut->n = sqlite3Strlen30(pOut->z);
+  pOut->enc = SQLITE_UTF8;
+  sqlite3VdbeChangeEncoding(pOut, encoding);
+  break;
+};
+#endif /* SQLITE_OMIT_PRAGMA */
+
+#if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH)
+/* Opcode: Vacuum * * * * *
+**
+** Vacuum the entire database.  This opcode will cause other virtual
+** machines to be created and run.  It may not be called from within
+** a transaction.
+*/
+case OP_Vacuum: {
+  assert( p->readOnly==0 );
+  rc = sqlite3RunVacuum(&p->zErrMsg, db);
+  break;
+}
+#endif
+
+#if !defined(SQLITE_OMIT_AUTOVACUUM)
+/* Opcode: IncrVacuum P1 P2 * * *
+**
+** Perform a single step of the incremental vacuum procedure on
+** the P1 database. If the vacuum has finished, jump to instruction
+** P2. Otherwise, fall through to the next instruction.
+*/
+case OP_IncrVacuum: {        /* jump */
+  Btree *pBt;
+
+  assert( pOp->p1>=0 && pOp->p1<db->nDb );
+  assert( DbMaskTest(p->btreeMask, pOp->p1) );
+  assert( p->readOnly==0 );
+  pBt = db->aDb[pOp->p1].pBt;
+  rc = sqlite3BtreeIncrVacuum(pBt);
+  VdbeBranchTaken(rc==SQLITE_DONE,2);
+  if( rc==SQLITE_DONE ){
+    pc = pOp->p2 - 1;
+    rc = SQLITE_OK;
+  }
+  break;
+}
+#endif
+
+/* Opcode: Expire P1 * * * *
+**
+** Cause precompiled statements to expire.  When an expired statement
+** is executed using sqlite3_step() it will either automatically
+** reprepare itself (if it was originally created using sqlite3_prepare_v2())
+** or it will fail with SQLITE_SCHEMA.
+** 
+** If P1 is 0, then all SQL statements become expired. If P1 is non-zero,
+** then only the currently executing statement is expired.
+*/
+case OP_Expire: {
+  if( !pOp->p1 ){
+    sqlite3ExpirePreparedStatements(db);
+  }else{
+    p->expired = 1;
+  }
+  break;
+}
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+/* Opcode: TableLock P1 P2 P3 P4 *
+** Synopsis: iDb=P1 root=P2 write=P3
+**
+** Obtain a lock on a particular table. This instruction is only used when
+** the shared-cache feature is enabled. 
+**
+** P1 is the index of the database in sqlite3.aDb[] of the database
+** on which the lock is acquired.  A readlock is obtained if P3==0 or
+** a write lock if P3==1.
+**
+** P2 contains the root-page of the table to lock.
+**
+** P4 contains a pointer to the name of the table being locked. This is only
+** used to generate an error message if the lock cannot be obtained.
+*/
+case OP_TableLock: {
+  u8 isWriteLock = (u8)pOp->p3;
+  if( isWriteLock || 0==(db->flags&SQLITE_ReadUncommitted) ){
+    int p1 = pOp->p1; 
+    assert( p1>=0 && p1<db->nDb );
+    assert( DbMaskTest(p->btreeMask, p1) );
+    assert( isWriteLock==0 || isWriteLock==1 );
+    rc = sqlite3BtreeLockTable(db->aDb[p1].pBt, pOp->p2, isWriteLock);
+    if( (rc&0xFF)==SQLITE_LOCKED ){
+      const char *z = pOp->p4.z;
+      sqlite3SetString(&p->zErrMsg, db, "database table is locked: %s", z);
+    }
+  }
+  break;
+}
+#endif /* SQLITE_OMIT_SHARED_CACHE */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VBegin * * * P4 *
+**
+** P4 may be a pointer to an sqlite3_vtab structure. If so, call the 
+** xBegin method for that table.
+**
+** Also, whether or not P4 is set, check that this is not being called from
+** within a callback to a virtual table xSync() method. If it is, the error
+** code will be set to SQLITE_LOCKED.
+*/
+case OP_VBegin: {
+  VTable *pVTab;
+  pVTab = pOp->p4.pVtab;
+  rc = sqlite3VtabBegin(db, pVTab);
+  if( pVTab ) sqlite3VtabImportErrmsg(p, pVTab->pVtab);
+  break;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VCreate P1 * * P4 *
+**
+** P4 is the name of a virtual table in database P1. Call the xCreate method
+** for that table.
+*/
+case OP_VCreate: {
+  rc = sqlite3VtabCallCreate(db, pOp->p1, pOp->p4.z, &p->zErrMsg);
+  break;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VDestroy P1 * * P4 *
+**
+** P4 is the name of a virtual table in database P1.  Call the xDestroy method
+** of that table.
+*/
+case OP_VDestroy: {
+  p->inVtabMethod = 2;
+  rc = sqlite3VtabCallDestroy(db, pOp->p1, pOp->p4.z);
+  p->inVtabMethod = 0;
+  break;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VOpen P1 * * P4 *
+**
+** P4 is a pointer to a virtual table object, an sqlite3_vtab structure.
+** P1 is a cursor number.  This opcode opens a cursor to the virtual
+** table and stores that cursor in P1.
+*/
+case OP_VOpen: {
+  VdbeCursor *pCur;
+  sqlite3_vtab_cursor *pVtabCursor;
+  sqlite3_vtab *pVtab;
+  sqlite3_module *pModule;
+
+  assert( p->bIsReader );
+  pCur = 0;
+  pVtabCursor = 0;
+  pVtab = pOp->p4.pVtab->pVtab;
+  pModule = (sqlite3_module *)pVtab->pModule;
+  assert(pVtab && pModule);
+  rc = pModule->xOpen(pVtab, &pVtabCursor);
+  sqlite3VtabImportErrmsg(p, pVtab);
+  if( SQLITE_OK==rc ){
+    /* Initialize sqlite3_vtab_cursor base class */
+    pVtabCursor->pVtab = pVtab;
+
+    /* Initialize vdbe cursor object */
+    pCur = allocateCursor(p, pOp->p1, 0, -1, 0);
+    if( pCur ){
+      pCur->pVtabCursor = pVtabCursor;
+    }else{
+      db->mallocFailed = 1;
+      pModule->xClose(pVtabCursor);
+    }
+  }
+  break;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VFilter P1 P2 P3 P4 *
+** Synopsis: iplan=r[P3] zplan='P4'
+**
+** P1 is a cursor opened using VOpen.  P2 is an address to jump to if
+** the filtered result set is empty.
+**
+** P4 is either NULL or a string that was generated by the xBestIndex
+** method of the module.  The interpretation of the P4 string is left
+** to the module implementation.
+**
+** This opcode invokes the xFilter method on the virtual table specified
+** by P1.  The integer query plan parameter to xFilter is stored in register
+** P3. Register P3+1 stores the argc parameter to be passed to the
+** xFilter method. Registers P3+2..P3+1+argc are the argc
+** additional parameters which are passed to
+** xFilter as argv. Register P3+2 becomes argv[0] when passed to xFilter.
+**
+** A jump is made to P2 if the result set after filtering would be empty.
+*/
+case OP_VFilter: {   /* jump */
+  int nArg;
+  int iQuery;
+  const sqlite3_module *pModule;
+  Mem *pQuery;
+  Mem *pArgc;
+  sqlite3_vtab_cursor *pVtabCursor;
+  sqlite3_vtab *pVtab;
+  VdbeCursor *pCur;
+  int res;
+  int i;
+  Mem **apArg;
+
+  pQuery = &aMem[pOp->p3];
+  pArgc = &pQuery[1];
+  pCur = p->apCsr[pOp->p1];
+  assert( memIsValid(pQuery) );
+  REGISTER_TRACE(pOp->p3, pQuery);
+  assert( pCur->pVtabCursor );
+  pVtabCursor = pCur->pVtabCursor;
+  pVtab = pVtabCursor->pVtab;
+  pModule = pVtab->pModule;
+
+  /* Grab the index number and argc parameters */
+  assert( (pQuery->flags&MEM_Int)!=0 && pArgc->flags==MEM_Int );
+  nArg = (int)pArgc->u.i;
+  iQuery = (int)pQuery->u.i;
+
+  /* Invoke the xFilter method */
+  {
+    res = 0;
+    apArg = p->apArg;
+    for(i = 0; i<nArg; i++){
+      apArg[i] = &pArgc[i+1];
+    }
+
+    p->inVtabMethod = 1;
+    rc = pModule->xFilter(pVtabCursor, iQuery, pOp->p4.z, nArg, apArg);
+    p->inVtabMethod = 0;
+    sqlite3VtabImportErrmsg(p, pVtab);
+    if( rc==SQLITE_OK ){
+      res = pModule->xEof(pVtabCursor);
+    }
+    VdbeBranchTaken(res!=0,2);
+    if( res ){
+      pc = pOp->p2 - 1;
+    }
+  }
+  pCur->nullRow = 0;
+
+  break;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VColumn P1 P2 P3 * *
+** Synopsis: r[P3]=vcolumn(P2)
+**
+** Store the value of the P2-th column of
+** the row of the virtual-table that the 
+** P1 cursor is pointing to into register P3.
+*/
+case OP_VColumn: {
+  sqlite3_vtab *pVtab;
+  const sqlite3_module *pModule;
+  Mem *pDest;
+  sqlite3_context sContext;
+
+  VdbeCursor *pCur = p->apCsr[pOp->p1];
+  assert( pCur->pVtabCursor );
+  assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
+  pDest = &aMem[pOp->p3];
+  memAboutToChange(p, pDest);
+  if( pCur->nullRow ){
+    sqlite3VdbeMemSetNull(pDest);
+    break;
+  }
+  pVtab = pCur->pVtabCursor->pVtab;
+  pModule = pVtab->pModule;
+  assert( pModule->xColumn );
+  memset(&sContext, 0, sizeof(sContext));
+  sContext.pOut = pDest;
+  MemSetTypeFlag(pDest, MEM_Null);
+  rc = pModule->xColumn(pCur->pVtabCursor, &sContext, pOp->p2);
+  sqlite3VtabImportErrmsg(p, pVtab);
+  if( sContext.isError ){
+    rc = sContext.isError;
+  }
+  sqlite3VdbeChangeEncoding(pDest, encoding);
+  REGISTER_TRACE(pOp->p3, pDest);
+  UPDATE_MAX_BLOBSIZE(pDest);
+
+  if( sqlite3VdbeMemTooBig(pDest) ){
+    goto too_big;
+  }
+  break;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VNext P1 P2 * * *
+**
+** Advance virtual table P1 to the next row in its result set and
+** jump to instruction P2.  Or, if the virtual table has reached
+** the end of its result set, then fall through to the next instruction.
+*/
+case OP_VNext: {   /* jump */
+  sqlite3_vtab *pVtab;
+  const sqlite3_module *pModule;
+  int res;
+  VdbeCursor *pCur;
+
+  res = 0;
+  pCur = p->apCsr[pOp->p1];
+  assert( pCur->pVtabCursor );
+  if( pCur->nullRow ){
+    break;
+  }
+  pVtab = pCur->pVtabCursor->pVtab;
+  pModule = pVtab->pModule;
+  assert( pModule->xNext );
+
+  /* Invoke the xNext() method of the module. There is no way for the
+  ** underlying implementation to return an error if one occurs during
+  ** xNext(). Instead, if an error occurs, true is returned (indicating that 
+  ** data is available) and the error code returned when xColumn or
+  ** some other method is next invoked on the save virtual table cursor.
+  */
+  p->inVtabMethod = 1;
+  rc = pModule->xNext(pCur->pVtabCursor);
+  p->inVtabMethod = 0;
+  sqlite3VtabImportErrmsg(p, pVtab);
+  if( rc==SQLITE_OK ){
+    res = pModule->xEof(pCur->pVtabCursor);
+  }
+  VdbeBranchTaken(!res,2);
+  if( !res ){
+    /* If there is data, jump to P2 */
+    pc = pOp->p2 - 1;
+  }
+  goto check_for_interrupt;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VRename P1 * * P4 *
+**
+** P4 is a pointer to a virtual table object, an sqlite3_vtab structure.
+** This opcode invokes the corresponding xRename method. The value
+** in register P1 is passed as the zName argument to the xRename method.
+*/
+case OP_VRename: {
+  sqlite3_vtab *pVtab;
+  Mem *pName;
+
+  pVtab = pOp->p4.pVtab->pVtab;
+  pName = &aMem[pOp->p1];
+  assert( pVtab->pModule->xRename );
+  assert( memIsValid(pName) );
+  assert( p->readOnly==0 );
+  REGISTER_TRACE(pOp->p1, pName);
+  assert( pName->flags & MEM_Str );
+  testcase( pName->enc==SQLITE_UTF8 );
+  testcase( pName->enc==SQLITE_UTF16BE );
+  testcase( pName->enc==SQLITE_UTF16LE );
+  rc = sqlite3VdbeChangeEncoding(pName, SQLITE_UTF8);
+  if( rc==SQLITE_OK ){
+    rc = pVtab->pModule->xRename(pVtab, pName->z);
+    sqlite3VtabImportErrmsg(p, pVtab);
+    p->expired = 0;
+  }
+  break;
+}
+#endif
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Opcode: VUpdate P1 P2 P3 P4 P5
+** Synopsis: data=r[P3@P2]
+**
+** P4 is a pointer to a virtual table object, an sqlite3_vtab structure.
+** This opcode invokes the corresponding xUpdate method. P2 values
+** are contiguous memory cells starting at P3 to pass to the xUpdate 
+** invocation. The value in register (P3+P2-1) corresponds to the 
+** p2th element of the argv array passed to xUpdate.
+**
+** The xUpdate method will do a DELETE or an INSERT or both.
+** The argv[0] element (which corresponds to memory cell P3)
+** is the rowid of a row to delete.  If argv[0] is NULL then no 
+** deletion occurs.  The argv[1] element is the rowid of the new 
+** row.  This can be NULL to have the virtual table select the new 
+** rowid for itself.  The subsequent elements in the array are 
+** the values of columns in the new row.
+**
+** If P2==1 then no insert is performed.  argv[0] is the rowid of
+** a row to delete.
+**
+** P1 is a boolean flag. If it is set to true and the xUpdate call
+** is successful, then the value returned by sqlite3_last_insert_rowid() 
+** is set to the value of the rowid for the row just inserted.
+**
+** P5 is the error actions (OE_Replace, OE_Fail, OE_Ignore, etc) to
+** apply in the case of a constraint failure on an insert or update.
+*/
+case OP_VUpdate: {
+  sqlite3_vtab *pVtab;
+  sqlite3_module *pModule;
+  int nArg;
+  int i;
+  sqlite_int64 rowid;
+  Mem **apArg;
+  Mem *pX;
+
+  assert( pOp->p2==1        || pOp->p5==OE_Fail   || pOp->p5==OE_Rollback 
+       || pOp->p5==OE_Abort || pOp->p5==OE_Ignore || pOp->p5==OE_Replace
+  );
+  assert( p->readOnly==0 );
+  pVtab = pOp->p4.pVtab->pVtab;
+  pModule = (sqlite3_module *)pVtab->pModule;
+  nArg = pOp->p2;
+  assert( pOp->p4type==P4_VTAB );
+  if( ALWAYS(pModule->xUpdate) ){
+    u8 vtabOnConflict = db->vtabOnConflict;
+    apArg = p->apArg;
+    pX = &aMem[pOp->p3];
+    for(i=0; i<nArg; i++){
+      assert( memIsValid(pX) );
+      memAboutToChange(p, pX);
+      apArg[i] = pX;
+      pX++;
+    }
+    db->vtabOnConflict = pOp->p5;
+    rc = pModule->xUpdate(pVtab, nArg, apArg, &rowid);
+    db->vtabOnConflict = vtabOnConflict;
+    sqlite3VtabImportErrmsg(p, pVtab);
+    if( rc==SQLITE_OK && pOp->p1 ){
+      assert( nArg>1 && apArg[0] && (apArg[0]->flags&MEM_Null) );
+      db->lastRowid = lastRowid = rowid;
+    }
+    if( (rc&0xff)==SQLITE_CONSTRAINT && pOp->p4.pVtab->bConstraint ){
+      if( pOp->p5==OE_Ignore ){
+        rc = SQLITE_OK;
+      }else{
+        p->errorAction = ((pOp->p5==OE_Replace) ? OE_Abort : pOp->p5);
+      }
+    }else{
+      p->nChange++;
+    }
+  }
+  break;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifndef  SQLITE_OMIT_PAGER_PRAGMAS
+/* Opcode: Pagecount P1 P2 * * *
+**
+** Write the current number of pages in database P1 to memory cell P2.
+*/
+case OP_Pagecount: {            /* out2-prerelease */
+  pOut->u.i = sqlite3BtreeLastPage(db->aDb[pOp->p1].pBt);
+  break;
+}
+#endif
+
+
+#ifndef  SQLITE_OMIT_PAGER_PRAGMAS
+/* Opcode: MaxPgcnt P1 P2 P3 * *
+**
+** Try to set the maximum page count for database P1 to the value in P3.
+** Do not let the maximum page count fall below the current page count and
+** do not change the maximum page count value if P3==0.
+**
+** Store the maximum page count after the change in register P2.
+*/
+case OP_MaxPgcnt: {            /* out2-prerelease */
+  unsigned int newMax;
+  Btree *pBt;
+
+  pBt = db->aDb[pOp->p1].pBt;
+  newMax = 0;
+  if( pOp->p3 ){
+    newMax = sqlite3BtreeLastPage(pBt);
+    if( newMax < (unsigned)pOp->p3 ) newMax = (unsigned)pOp->p3;
+  }
+  pOut->u.i = sqlite3BtreeMaxPageCount(pBt, newMax);
+  break;
+}
+#endif
+
+
+/* Opcode: Init * P2 * P4 *
+** Synopsis:  Start at P2
+**
+** Programs contain a single instance of this opcode as the very first
+** opcode.
+**
+** If tracing is enabled (by the sqlite3_trace()) interface, then
+** the UTF-8 string contained in P4 is emitted on the trace callback.
+** Or if P4 is blank, use the string returned by sqlite3_sql().
+**
+** If P2 is not zero, jump to instruction P2.
+*/
+case OP_Init: {          /* jump */
+  char *zTrace;
+  char *z;
+
+  if( pOp->p2 ){
+    pc = pOp->p2 - 1;
+  }
+#ifndef SQLITE_OMIT_TRACE
+  if( db->xTrace
+   && !p->doingRerun
+   && (zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql))!=0
+  ){
+    z = sqlite3VdbeExpandSql(p, zTrace);
+    db->xTrace(db->pTraceArg, z);
+    sqlite3DbFree(db, z);
+  }
+#ifdef SQLITE_USE_FCNTL_TRACE
+  zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql);
+  if( zTrace ){
+    int i;
+    for(i=0; i<db->nDb; i++){
+      if( DbMaskTest(p->btreeMask, i)==0 ) continue;
+      sqlite3_file_control(db, db->aDb[i].zName, SQLITE_FCNTL_TRACE, zTrace);
+    }
+  }
+#endif /* SQLITE_USE_FCNTL_TRACE */
+#ifdef SQLITE_DEBUG
+  if( (db->flags & SQLITE_SqlTrace)!=0
+   && (zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql))!=0
+  ){
+    sqlite3DebugPrintf("SQL-trace: %s\n", zTrace);
+  }
+#endif /* SQLITE_DEBUG */
+#endif /* SQLITE_OMIT_TRACE */
+  break;
+}
+
+
+/* Opcode: Noop * * * * *
+**
+** Do nothing.  This instruction is often useful as a jump
+** destination.
+*/
+/*
+** The magic Explain opcode are only inserted when explain==2 (which
+** is to say when the EXPLAIN QUERY PLAN syntax is used.)
+** This opcode records information from the optimizer.  It is the
+** the same as a no-op.  This opcodesnever appears in a real VM program.
+*/
+default: {          /* This is really OP_Noop and OP_Explain */
+  assert( pOp->opcode==OP_Noop || pOp->opcode==OP_Explain );
+  break;
+}
+
+/*****************************************************************************
+** The cases of the switch statement above this line should all be indented
+** by 6 spaces.  But the left-most 6 spaces have been removed to improve the
+** readability.  From this point on down, the normal indentation rules are
+** restored.
+*****************************************************************************/
+    }
+
+#ifdef VDBE_PROFILE
+    {
+      u64 endTime = sqlite3Hwtime();
+      if( endTime>start ) pOp->cycles += endTime - start;
+      pOp->cnt++;
+    }
+#endif
+
+    /* The following code adds nothing to the actual functionality
+    ** of the program.  It is only here for testing and debugging.
+    ** On the other hand, it does burn CPU cycles every time through
+    ** the evaluator loop.  So we can leave it out when NDEBUG is defined.
+    */
+#ifndef NDEBUG
+    assert( pc>=-1 && pc<p->nOp );
+
+#ifdef SQLITE_DEBUG
+    if( db->flags & SQLITE_VdbeTrace ){
+      if( rc!=0 ) printf("rc=%d\n",rc);
+      if( pOp->opflags & (OPFLG_OUT2_PRERELEASE|OPFLG_OUT2) ){
+        registerTrace(pOp->p2, &aMem[pOp->p2]);
+      }
+      if( pOp->opflags & OPFLG_OUT3 ){
+        registerTrace(pOp->p3, &aMem[pOp->p3]);
+      }
+    }
+#endif  /* SQLITE_DEBUG */
+#endif  /* NDEBUG */
+  }  /* The end of the for(;;) loop the loops through opcodes */
+
+  /* If we reach this point, it means that execution is finished with
+  ** an error of some kind.
+  */
+vdbe_error_halt:
+  assert( rc );
+  p->rc = rc;
+  testcase( sqlite3GlobalConfig.xLog!=0 );
+  sqlite3_log(rc, "statement aborts at %d: [%s] %s", 
+                   pc, p->zSql, p->zErrMsg);
+  sqlite3VdbeHalt(p);
+  if( rc==SQLITE_IOERR_NOMEM ) db->mallocFailed = 1;
+  rc = SQLITE_ERROR;
+  if( resetSchemaOnFault>0 ){
+    sqlite3ResetOneSchema(db, resetSchemaOnFault-1);
+  }
+
+  /* This is the only way out of this procedure.  We have to
+  ** release the mutexes on btrees that were acquired at the
+  ** top. */
+vdbe_return:
+  db->lastRowid = lastRowid;
+  testcase( nVmStep>0 );
+  p->aCounter[SQLITE_STMTSTATUS_VM_STEP] += (int)nVmStep;
+  sqlite3VdbeLeave(p);
+  return rc;
+
+  /* Jump to here if a string or blob larger than SQLITE_MAX_LENGTH
+  ** is encountered.
+  */
+too_big:
+  sqlite3SetString(&p->zErrMsg, db, "string or blob too big");
+  rc = SQLITE_TOOBIG;
+  goto vdbe_error_halt;
+
+  /* Jump to here if a malloc() fails.
+  */
+no_mem:
+  db->mallocFailed = 1;
+  sqlite3SetString(&p->zErrMsg, db, "out of memory");
+  rc = SQLITE_NOMEM;
+  goto vdbe_error_halt;
+
+  /* Jump to here for any other kind of fatal error.  The "rc" variable
+  ** should hold the error number.
+  */
+abort_due_to_error:
+  assert( p->zErrMsg==0 );
+  if( db->mallocFailed ) rc = SQLITE_NOMEM;
+  if( rc!=SQLITE_IOERR_NOMEM ){
+    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(rc));
+  }
+  goto vdbe_error_halt;
+
+  /* Jump to here if the sqlite3_interrupt() API sets the interrupt
+  ** flag.
+  */
+abort_due_to_interrupt:
+  assert( db->u1.isInterrupted );
+  rc = SQLITE_INTERRUPT;
+  p->rc = rc;
+  sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(rc));
+  goto vdbe_error_halt;
+}
+
+
+/************** End of vdbe.c ************************************************/
+/************** Begin file vdbeblob.c ****************************************/
+/*
+** 2007 May 1
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains code used to implement incremental BLOB I/O.
+*/
+
+
+#ifndef SQLITE_OMIT_INCRBLOB
+
+/*
+** Valid sqlite3_blob* handles point to Incrblob structures.
+*/
+typedef struct Incrblob Incrblob;
+struct Incrblob {
+  int flags;              /* Copy of "flags" passed to sqlite3_blob_open() */
+  int nByte;              /* Size of open blob, in bytes */
+  int iOffset;            /* Byte offset of blob in cursor data */
+  int iCol;               /* Table column this handle is open on */
+  BtCursor *pCsr;         /* Cursor pointing at blob row */
+  sqlite3_stmt *pStmt;    /* Statement holding cursor open */
+  sqlite3 *db;            /* The associated database */
+};
+
+
+/*
+** This function is used by both blob_open() and blob_reopen(). It seeks
+** the b-tree cursor associated with blob handle p to point to row iRow.
+** If successful, SQLITE_OK is returned and subsequent calls to
+** sqlite3_blob_read() or sqlite3_blob_write() access the specified row.
+**
+** If an error occurs, or if the specified row does not exist or does not
+** contain a value of type TEXT or BLOB in the column nominated when the
+** blob handle was opened, then an error code is returned and *pzErr may
+** be set to point to a buffer containing an error message. It is the
+** responsibility of the caller to free the error message buffer using
+** sqlite3DbFree().
+**
+** If an error does occur, then the b-tree cursor is closed. All subsequent
+** calls to sqlite3_blob_read(), blob_write() or blob_reopen() will 
+** immediately return SQLITE_ABORT.
+*/
+static int blobSeekToRow(Incrblob *p, sqlite3_int64 iRow, char **pzErr){
+  int rc;                         /* Error code */
+  char *zErr = 0;                 /* Error message */
+  Vdbe *v = (Vdbe *)p->pStmt;
+
+  /* Set the value of the SQL statements only variable to integer iRow. 
+  ** This is done directly instead of using sqlite3_bind_int64() to avoid 
+  ** triggering asserts related to mutexes.
+  */
+  assert( v->aVar[0].flags&MEM_Int );
+  v->aVar[0].u.i = iRow;
+
+  rc = sqlite3_step(p->pStmt);
+  if( rc==SQLITE_ROW ){
+    VdbeCursor *pC = v->apCsr[0];
+    u32 type = pC->aType[p->iCol];
+    if( type<12 ){
+      zErr = sqlite3MPrintf(p->db, "cannot open value of type %s",
+          type==0?"null": type==7?"real": "integer"
+      );
+      rc = SQLITE_ERROR;
+      sqlite3_finalize(p->pStmt);
+      p->pStmt = 0;
+    }else{
+      p->iOffset = pC->aType[p->iCol + pC->nField];
+      p->nByte = sqlite3VdbeSerialTypeLen(type);
+      p->pCsr =  pC->pCursor;
+      sqlite3BtreeIncrblobCursor(p->pCsr);
+    }
+  }
+
+  if( rc==SQLITE_ROW ){
+    rc = SQLITE_OK;
+  }else if( p->pStmt ){
+    rc = sqlite3_finalize(p->pStmt);
+    p->pStmt = 0;
+    if( rc==SQLITE_OK ){
+      zErr = sqlite3MPrintf(p->db, "no such rowid: %lld", iRow);
+      rc = SQLITE_ERROR;
+    }else{
+      zErr = sqlite3MPrintf(p->db, "%s", sqlite3_errmsg(p->db));
+    }
+  }
+
+  assert( rc!=SQLITE_OK || zErr==0 );
+  assert( rc!=SQLITE_ROW && rc!=SQLITE_DONE );
+
+  *pzErr = zErr;
+  return rc;
+}
+
+/*
+** Open a blob handle.
+*/
+SQLITE_API int sqlite3_blob_open(
+  sqlite3* db,            /* The database connection */
+  const char *zDb,        /* The attached database containing the blob */
+  const char *zTable,     /* The table containing the blob */
+  const char *zColumn,    /* The column containing the blob */
+  sqlite_int64 iRow,      /* The row containing the glob */
+  int flags,              /* True -> read/write access, false -> read-only */
+  sqlite3_blob **ppBlob   /* Handle for accessing the blob returned here */
+){
+  int nAttempt = 0;
+  int iCol;               /* Index of zColumn in row-record */
+
+  /* This VDBE program seeks a btree cursor to the identified 
+  ** db/table/row entry. The reason for using a vdbe program instead
+  ** of writing code to use the b-tree layer directly is that the
+  ** vdbe program will take advantage of the various transaction,
+  ** locking and error handling infrastructure built into the vdbe.
+  **
+  ** After seeking the cursor, the vdbe executes an OP_ResultRow.
+  ** Code external to the Vdbe then "borrows" the b-tree cursor and
+  ** uses it to implement the blob_read(), blob_write() and 
+  ** blob_bytes() functions.
+  **
+  ** The sqlite3_blob_close() function finalizes the vdbe program,
+  ** which closes the b-tree cursor and (possibly) commits the 
+  ** transaction.
+  */
+  static const int iLn = VDBE_OFFSET_LINENO(4);
+  static const VdbeOpList openBlob[] = {
+    /* {OP_Transaction, 0, 0, 0},  // 0: Inserted separately */
+    {OP_TableLock, 0, 0, 0},       /* 1: Acquire a read or write lock */
+    /* One of the following two instructions is replaced by an OP_Noop. */
+    {OP_OpenRead, 0, 0, 0},        /* 2: Open cursor 0 for reading */
+    {OP_OpenWrite, 0, 0, 0},       /* 3: Open cursor 0 for read/write */
+    {OP_Variable, 1, 1, 1},        /* 4: Push the rowid to the stack */
+    {OP_NotExists, 0, 10, 1},      /* 5: Seek the cursor */
+    {OP_Column, 0, 0, 1},          /* 6  */
+    {OP_ResultRow, 1, 0, 0},       /* 7  */
+    {OP_Goto, 0, 4, 0},            /* 8  */
+    {OP_Close, 0, 0, 0},           /* 9  */
+    {OP_Halt, 0, 0, 0},            /* 10 */
+  };
+
+  int rc = SQLITE_OK;
+  char *zErr = 0;
+  Table *pTab;
+  Parse *pParse = 0;
+  Incrblob *pBlob = 0;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || ppBlob==0 || zTable==0 ){
+    return SQLITE_MISUSE_BKPT;
+  }
+#endif
+  flags = !!flags;                /* flags = (flags ? 1 : 0); */
+  *ppBlob = 0;
+
+  sqlite3_mutex_enter(db->mutex);
+
+  pBlob = (Incrblob *)sqlite3DbMallocZero(db, sizeof(Incrblob));
+  if( !pBlob ) goto blob_open_out;
+  pParse = sqlite3StackAllocRaw(db, sizeof(*pParse));
+  if( !pParse ) goto blob_open_out;
+
+  do {
+    memset(pParse, 0, sizeof(Parse));
+    pParse->db = db;
+    sqlite3DbFree(db, zErr);
+    zErr = 0;
+
+    sqlite3BtreeEnterAll(db);
+    pTab = sqlite3LocateTable(pParse, 0, zTable, zDb);
+    if( pTab && IsVirtual(pTab) ){
+      pTab = 0;
+      sqlite3ErrorMsg(pParse, "cannot open virtual table: %s", zTable);
+    }
+    if( pTab && !HasRowid(pTab) ){
+      pTab = 0;
+      sqlite3ErrorMsg(pParse, "cannot open table without rowid: %s", zTable);
+    }
+#ifndef SQLITE_OMIT_VIEW
+    if( pTab && pTab->pSelect ){
+      pTab = 0;
+      sqlite3ErrorMsg(pParse, "cannot open view: %s", zTable);
+    }
+#endif
+    if( !pTab ){
+      if( pParse->zErrMsg ){
+        sqlite3DbFree(db, zErr);
+        zErr = pParse->zErrMsg;
+        pParse->zErrMsg = 0;
+      }
+      rc = SQLITE_ERROR;
+      sqlite3BtreeLeaveAll(db);
+      goto blob_open_out;
+    }
+
+    /* Now search pTab for the exact column. */
+    for(iCol=0; iCol<pTab->nCol; iCol++) {
+      if( sqlite3StrICmp(pTab->aCol[iCol].zName, zColumn)==0 ){
+        break;
+      }
+    }
+    if( iCol==pTab->nCol ){
+      sqlite3DbFree(db, zErr);
+      zErr = sqlite3MPrintf(db, "no such column: \"%s\"", zColumn);
+      rc = SQLITE_ERROR;
+      sqlite3BtreeLeaveAll(db);
+      goto blob_open_out;
+    }
+
+    /* If the value is being opened for writing, check that the
+    ** column is not indexed, and that it is not part of a foreign key. 
+    ** It is against the rules to open a column to which either of these
+    ** descriptions applies for writing.  */
+    if( flags ){
+      const char *zFault = 0;
+      Index *pIdx;
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+      if( db->flags&SQLITE_ForeignKeys ){
+        /* Check that the column is not part of an FK child key definition. It
+        ** is not necessary to check if it is part of a parent key, as parent
+        ** key columns must be indexed. The check below will pick up this 
+        ** case.  */
+        FKey *pFKey;
+        for(pFKey=pTab->pFKey; pFKey; pFKey=pFKey->pNextFrom){
+          int j;
+          for(j=0; j<pFKey->nCol; j++){
+            if( pFKey->aCol[j].iFrom==iCol ){
+              zFault = "foreign key";
+            }
+          }
+        }
+      }
+#endif
+      for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+        int j;
+        for(j=0; j<pIdx->nKeyCol; j++){
+          if( pIdx->aiColumn[j]==iCol ){
+            zFault = "indexed";
+          }
+        }
+      }
+      if( zFault ){
+        sqlite3DbFree(db, zErr);
+        zErr = sqlite3MPrintf(db, "cannot open %s column for writing", zFault);
+        rc = SQLITE_ERROR;
+        sqlite3BtreeLeaveAll(db);
+        goto blob_open_out;
+      }
+    }
+
+    pBlob->pStmt = (sqlite3_stmt *)sqlite3VdbeCreate(pParse);
+    assert( pBlob->pStmt || db->mallocFailed );
+    if( pBlob->pStmt ){
+      Vdbe *v = (Vdbe *)pBlob->pStmt;
+      int iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+
+
+      sqlite3VdbeAddOp4Int(v, OP_Transaction, iDb, flags, 
+                           pTab->pSchema->schema_cookie,
+                           pTab->pSchema->iGeneration);
+      sqlite3VdbeChangeP5(v, 1);     
+      sqlite3VdbeAddOpList(v, ArraySize(openBlob), openBlob, iLn);
+
+      /* Make sure a mutex is held on the table to be accessed */
+      sqlite3VdbeUsesBtree(v, iDb); 
+
+      /* Configure the OP_TableLock instruction */
+#ifdef SQLITE_OMIT_SHARED_CACHE
+      sqlite3VdbeChangeToNoop(v, 1);
+#else
+      sqlite3VdbeChangeP1(v, 1, iDb);
+      sqlite3VdbeChangeP2(v, 1, pTab->tnum);
+      sqlite3VdbeChangeP3(v, 1, flags);
+      sqlite3VdbeChangeP4(v, 1, pTab->zName, P4_TRANSIENT);
+#endif
+
+      /* Remove either the OP_OpenWrite or OpenRead. Set the P2 
+      ** parameter of the other to pTab->tnum.  */
+      sqlite3VdbeChangeToNoop(v, 3 - flags);
+      sqlite3VdbeChangeP2(v, 2 + flags, pTab->tnum);
+      sqlite3VdbeChangeP3(v, 2 + flags, iDb);
+
+      /* Configure the number of columns. Configure the cursor to
+      ** think that the table has one more column than it really
+      ** does. An OP_Column to retrieve this imaginary column will
+      ** always return an SQL NULL. This is useful because it means
+      ** we can invoke OP_Column to fill in the vdbe cursors type 
+      ** and offset cache without causing any IO.
+      */
+      sqlite3VdbeChangeP4(v, 2+flags, SQLITE_INT_TO_PTR(pTab->nCol+1),P4_INT32);
+      sqlite3VdbeChangeP2(v, 6, pTab->nCol);
+      if( !db->mallocFailed ){
+        pParse->nVar = 1;
+        pParse->nMem = 1;
+        pParse->nTab = 1;
+        sqlite3VdbeMakeReady(v, pParse);
+      }
+    }
+   
+    pBlob->flags = flags;
+    pBlob->iCol = iCol;
+    pBlob->db = db;
+    sqlite3BtreeLeaveAll(db);
+    if( db->mallocFailed ){
+      goto blob_open_out;
+    }
+    sqlite3_bind_int64(pBlob->pStmt, 1, iRow);
+    rc = blobSeekToRow(pBlob, iRow, &zErr);
+  } while( (++nAttempt)<SQLITE_MAX_SCHEMA_RETRY && rc==SQLITE_SCHEMA );
+
+blob_open_out:
+  if( rc==SQLITE_OK && db->mallocFailed==0 ){
+    *ppBlob = (sqlite3_blob *)pBlob;
+  }else{
+    if( pBlob && pBlob->pStmt ) sqlite3VdbeFinalize((Vdbe *)pBlob->pStmt);
+    sqlite3DbFree(db, pBlob);
+  }
+  sqlite3ErrorWithMsg(db, rc, (zErr ? "%s" : 0), zErr);
+  sqlite3DbFree(db, zErr);
+  sqlite3ParserReset(pParse);
+  sqlite3StackFree(db, pParse);
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/*
+** Close a blob handle that was previously created using
+** sqlite3_blob_open().
+*/
+SQLITE_API int sqlite3_blob_close(sqlite3_blob *pBlob){
+  Incrblob *p = (Incrblob *)pBlob;
+  int rc;
+  sqlite3 *db;
+
+  if( p ){
+    db = p->db;
+    sqlite3_mutex_enter(db->mutex);
+    rc = sqlite3_finalize(p->pStmt);
+    sqlite3DbFree(db, p);
+    sqlite3_mutex_leave(db->mutex);
+  }else{
+    rc = SQLITE_OK;
+  }
+  return rc;
+}
+
+/*
+** Perform a read or write operation on a blob
+*/
+static int blobReadWrite(
+  sqlite3_blob *pBlob, 
+  void *z, 
+  int n, 
+  int iOffset, 
+  int (*xCall)(BtCursor*, u32, u32, void*)
+){
+  int rc;
+  Incrblob *p = (Incrblob *)pBlob;
+  Vdbe *v;
+  sqlite3 *db;
+
+  if( p==0 ) return SQLITE_MISUSE_BKPT;
+  db = p->db;
+  sqlite3_mutex_enter(db->mutex);
+  v = (Vdbe*)p->pStmt;
+
+  if( n<0 || iOffset<0 || (iOffset+n)>p->nByte ){
+    /* Request is out of range. Return a transient error. */
+    rc = SQLITE_ERROR;
+  }else if( v==0 ){
+    /* If there is no statement handle, then the blob-handle has
+    ** already been invalidated. Return SQLITE_ABORT in this case.
+    */
+    rc = SQLITE_ABORT;
+  }else{
+    /* Call either BtreeData() or BtreePutData(). If SQLITE_ABORT is
+    ** returned, clean-up the statement handle.
+    */
+    assert( db == v->db );
+    sqlite3BtreeEnterCursor(p->pCsr);
+    rc = xCall(p->pCsr, iOffset+p->iOffset, n, z);
+    sqlite3BtreeLeaveCursor(p->pCsr);
+    if( rc==SQLITE_ABORT ){
+      sqlite3VdbeFinalize(v);
+      p->pStmt = 0;
+    }else{
+      v->rc = rc;
+    }
+  }
+  sqlite3Error(db, rc);
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/*
+** Read data from a blob handle.
+*/
+SQLITE_API int sqlite3_blob_read(sqlite3_blob *pBlob, void *z, int n, int iOffset){
+  return blobReadWrite(pBlob, z, n, iOffset, sqlite3BtreeData);
+}
+
+/*
+** Write data to a blob handle.
+*/
+SQLITE_API int sqlite3_blob_write(sqlite3_blob *pBlob, const void *z, int n, int iOffset){
+  return blobReadWrite(pBlob, (void *)z, n, iOffset, sqlite3BtreePutData);
+}
+
+/*
+** Query a blob handle for the size of the data.
+**
+** The Incrblob.nByte field is fixed for the lifetime of the Incrblob
+** so no mutex is required for access.
+*/
+SQLITE_API int sqlite3_blob_bytes(sqlite3_blob *pBlob){
+  Incrblob *p = (Incrblob *)pBlob;
+  return (p && p->pStmt) ? p->nByte : 0;
+}
+
+/*
+** Move an existing blob handle to point to a different row of the same
+** database table.
+**
+** If an error occurs, or if the specified row does not exist or does not
+** contain a blob or text value, then an error code is returned and the
+** database handle error code and message set. If this happens, then all 
+** subsequent calls to sqlite3_blob_xxx() functions (except blob_close()) 
+** immediately return SQLITE_ABORT.
+*/
+SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_int64 iRow){
+  int rc;
+  Incrblob *p = (Incrblob *)pBlob;
+  sqlite3 *db;
+
+  if( p==0 ) return SQLITE_MISUSE_BKPT;
+  db = p->db;
+  sqlite3_mutex_enter(db->mutex);
+
+  if( p->pStmt==0 ){
+    /* If there is no statement handle, then the blob-handle has
+    ** already been invalidated. Return SQLITE_ABORT in this case.
+    */
+    rc = SQLITE_ABORT;
+  }else{
+    char *zErr;
+    rc = blobSeekToRow(p, iRow, &zErr);
+    if( rc!=SQLITE_OK ){
+      sqlite3ErrorWithMsg(db, rc, (zErr ? "%s" : 0), zErr);
+      sqlite3DbFree(db, zErr);
+    }
+    assert( rc!=SQLITE_SCHEMA );
+  }
+
+  rc = sqlite3ApiExit(db, rc);
+  assert( rc==SQLITE_OK || p->pStmt==0 );
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+#endif /* #ifndef SQLITE_OMIT_INCRBLOB */
+
+/************** End of vdbeblob.c ********************************************/
+/************** Begin file vdbesort.c ****************************************/
+/*
+** 2011-07-09
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code for the VdbeSorter object, used in concert with
+** a VdbeCursor to sort large numbers of keys for CREATE INDEX statements
+** or by SELECT statements with ORDER BY clauses that cannot be satisfied
+** using indexes and without LIMIT clauses.
+**
+** The VdbeSorter object implements a multi-threaded external merge sort
+** algorithm that is efficient even if the number of elements being sorted
+** exceeds the available memory.
+**
+** Here is the (internal, non-API) interface between this module and the
+** rest of the SQLite system:
+**
+**    sqlite3VdbeSorterInit()       Create a new VdbeSorter object.
+**
+**    sqlite3VdbeSorterWrite()      Add a single new row to the VdbeSorter
+**                                  object.  The row is a binary blob in the
+**                                  OP_MakeRecord format that contains both
+**                                  the ORDER BY key columns and result columns
+**                                  in the case of a SELECT w/ ORDER BY, or
+**                                  the complete record for an index entry
+**                                  in the case of a CREATE INDEX.
+**
+**    sqlite3VdbeSorterRewind()     Sort all content previously added.
+**                                  Position the read cursor on the
+**                                  first sorted element.
+**
+**    sqlite3VdbeSorterNext()       Advance the read cursor to the next sorted
+**                                  element.
+**
+**    sqlite3VdbeSorterRowkey()     Return the complete binary blob for the
+**                                  row currently under the read cursor.
+**
+**    sqlite3VdbeSorterCompare()    Compare the binary blob for the row
+**                                  currently under the read cursor against
+**                                  another binary blob X and report if
+**                                  X is strictly less than the read cursor.
+**                                  Used to enforce uniqueness in a
+**                                  CREATE UNIQUE INDEX statement.
+**
+**    sqlite3VdbeSorterClose()      Close the VdbeSorter object and reclaim
+**                                  all resources.
+**
+**    sqlite3VdbeSorterReset()      Refurbish the VdbeSorter for reuse.  This
+**                                  is like Close() followed by Init() only
+**                                  much faster.
+**
+** The interfaces above must be called in a particular order.  Write() can 
+** only occur in between Init()/Reset() and Rewind().  Next(), Rowkey(), and
+** Compare() can only occur in between Rewind() and Close()/Reset(). i.e.
+**
+**   Init()
+**   for each record: Write()
+**   Rewind()
+**     Rowkey()/Compare()
+**   Next() 
+**   Close()
+**
+** Algorithm:
+**
+** Records passed to the sorter via calls to Write() are initially held 
+** unsorted in main memory. Assuming the amount of memory used never exceeds
+** a threshold, when Rewind() is called the set of records is sorted using
+** an in-memory merge sort. In this case, no temporary files are required
+** and subsequent calls to Rowkey(), Next() and Compare() read records 
+** directly from main memory.
+**
+** If the amount of space used to store records in main memory exceeds the
+** threshold, then the set of records currently in memory are sorted and
+** written to a temporary file in "Packed Memory Array" (PMA) format.
+** A PMA created at this point is known as a "level-0 PMA". Higher levels
+** of PMAs may be created by merging existing PMAs together - for example
+** merging two or more level-0 PMAs together creates a level-1 PMA.
+**
+** The threshold for the amount of main memory to use before flushing 
+** records to a PMA is roughly the same as the limit configured for the
+** page-cache of the main database. Specifically, the threshold is set to 
+** the value returned by "PRAGMA main.page_size" multipled by 
+** that returned by "PRAGMA main.cache_size", in bytes.
+**
+** If the sorter is running in single-threaded mode, then all PMAs generated
+** are appended to a single temporary file. Or, if the sorter is running in
+** multi-threaded mode then up to (N+1) temporary files may be opened, where
+** N is the configured number of worker threads. In this case, instead of
+** sorting the records and writing the PMA to a temporary file itself, the
+** calling thread usually launches a worker thread to do so. Except, if
+** there are already N worker threads running, the main thread does the work
+** itself.
+**
+** The sorter is running in multi-threaded mode if (a) the library was built
+** with pre-processor symbol SQLITE_MAX_WORKER_THREADS set to a value greater
+** than zero, and (b) worker threads have been enabled at runtime by calling
+** "PRAGMA threads=N" with some value of N greater than 0.
+**
+** When Rewind() is called, any data remaining in memory is flushed to a 
+** final PMA. So at this point the data is stored in some number of sorted
+** PMAs within temporary files on disk.
+**
+** If there are fewer than SORTER_MAX_MERGE_COUNT PMAs in total and the
+** sorter is running in single-threaded mode, then these PMAs are merged
+** incrementally as keys are retreived from the sorter by the VDBE.  The
+** MergeEngine object, described in further detail below, performs this
+** merge.
+**
+** Or, if running in multi-threaded mode, then a background thread is
+** launched to merge the existing PMAs. Once the background thread has
+** merged T bytes of data into a single sorted PMA, the main thread 
+** begins reading keys from that PMA while the background thread proceeds
+** with merging the next T bytes of data. And so on.
+**
+** Parameter T is set to half the value of the memory threshold used 
+** by Write() above to determine when to create a new PMA.
+**
+** If there are more than SORTER_MAX_MERGE_COUNT PMAs in total when 
+** Rewind() is called, then a hierarchy of incremental-merges is used. 
+** First, T bytes of data from the first SORTER_MAX_MERGE_COUNT PMAs on 
+** disk are merged together. Then T bytes of data from the second set, and
+** so on, such that no operation ever merges more than SORTER_MAX_MERGE_COUNT
+** PMAs at a time. This done is to improve locality.
+**
+** If running in multi-threaded mode and there are more than
+** SORTER_MAX_MERGE_COUNT PMAs on disk when Rewind() is called, then more
+** than one background thread may be created. Specifically, there may be
+** one background thread for each temporary file on disk, and one background
+** thread to merge the output of each of the others to a single PMA for
+** the main thread to read from.
+*/
+
+/* 
+** If SQLITE_DEBUG_SORTER_THREADS is defined, this module outputs various
+** messages to stderr that may be helpful in understanding the performance
+** characteristics of the sorter in multi-threaded mode.
+*/
+#if 0
+# define SQLITE_DEBUG_SORTER_THREADS 1
+#endif
+
+/*
+** Hard-coded maximum amount of data to accumulate in memory before flushing
+** to a level 0 PMA. The purpose of this limit is to prevent various integer
+** overflows. 512MiB.
+*/
+#define SQLITE_MAX_PMASZ    (1<<29)
+
+/*
+** Private objects used by the sorter
+*/
+typedef struct MergeEngine MergeEngine;     /* Merge PMAs together */
+typedef struct PmaReader PmaReader;         /* Incrementally read one PMA */
+typedef struct PmaWriter PmaWriter;         /* Incrementally write one PMA */
+typedef struct SorterRecord SorterRecord;   /* A record being sorted */
+typedef struct SortSubtask SortSubtask;     /* A sub-task in the sort process */
+typedef struct SorterFile SorterFile;       /* Temporary file object wrapper */
+typedef struct SorterList SorterList;       /* In-memory list of records */
+typedef struct IncrMerger IncrMerger;       /* Read & merge multiple PMAs */
+
+/*
+** A container for a temp file handle and the current amount of data 
+** stored in the file.
+*/
+struct SorterFile {
+  sqlite3_file *pFd;              /* File handle */
+  i64 iEof;                       /* Bytes of data stored in pFd */
+};
+
+/*
+** An in-memory list of objects to be sorted.
+**
+** If aMemory==0 then each object is allocated separately and the objects
+** are connected using SorterRecord.u.pNext.  If aMemory!=0 then all objects
+** are stored in the aMemory[] bulk memory, one right after the other, and
+** are connected using SorterRecord.u.iNext.
+*/
+struct SorterList {
+  SorterRecord *pList;            /* Linked list of records */
+  u8 *aMemory;                    /* If non-NULL, bulk memory to hold pList */
+  int szPMA;                      /* Size of pList as PMA in bytes */
+};
+
+/*
+** The MergeEngine object is used to combine two or more smaller PMAs into
+** one big PMA using a merge operation.  Separate PMAs all need to be
+** combined into one big PMA in order to be able to step through the sorted
+** records in order.
+**
+** The aReadr[] array contains a PmaReader object for each of the PMAs being
+** merged.  An aReadr[] object either points to a valid key or else is at EOF.
+** ("EOF" means "End Of File".  When aReadr[] is at EOF there is no more data.)
+** For the purposes of the paragraphs below, we assume that the array is
+** actually N elements in size, where N is the smallest power of 2 greater
+** to or equal to the number of PMAs being merged. The extra aReadr[] elements
+** are treated as if they are empty (always at EOF).
+**
+** The aTree[] array is also N elements in size. The value of N is stored in
+** the MergeEngine.nTree variable.
+**
+** The final (N/2) elements of aTree[] contain the results of comparing
+** pairs of PMA keys together. Element i contains the result of 
+** comparing aReadr[2*i-N] and aReadr[2*i-N+1]. Whichever key is smaller, the
+** aTree element is set to the index of it. 
+**
+** For the purposes of this comparison, EOF is considered greater than any
+** other key value. If the keys are equal (only possible with two EOF
+** values), it doesn't matter which index is stored.
+**
+** The (N/4) elements of aTree[] that precede the final (N/2) described 
+** above contains the index of the smallest of each block of 4 PmaReaders
+** And so on. So that aTree[1] contains the index of the PmaReader that 
+** currently points to the smallest key value. aTree[0] is unused.
+**
+** Example:
+**
+**     aReadr[0] -> Banana
+**     aReadr[1] -> Feijoa
+**     aReadr[2] -> Elderberry
+**     aReadr[3] -> Currant
+**     aReadr[4] -> Grapefruit
+**     aReadr[5] -> Apple
+**     aReadr[6] -> Durian
+**     aReadr[7] -> EOF
+**
+**     aTree[] = { X, 5   0, 5    0, 3, 5, 6 }
+**
+** The current element is "Apple" (the value of the key indicated by 
+** PmaReader 5). When the Next() operation is invoked, PmaReader 5 will
+** be advanced to the next key in its segment. Say the next key is
+** "Eggplant":
+**
+**     aReadr[5] -> Eggplant
+**
+** The contents of aTree[] are updated first by comparing the new PmaReader
+** 5 key to the current key of PmaReader 4 (still "Grapefruit"). The PmaReader
+** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree.
+** The value of PmaReader 6 - "Durian" - is now smaller than that of PmaReader
+** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Banana<Durian),
+** so the value written into element 1 of the array is 0. As follows:
+**
+**     aTree[] = { X, 0   0, 6    0, 3, 5, 6 }
+**
+** In other words, each time we advance to the next sorter element, log2(N)
+** key comparison operations are required, where N is the number of segments
+** being merged (rounded up to the next power of 2).
+*/
+struct MergeEngine {
+  int nTree;                 /* Used size of aTree/aReadr (power of 2) */
+  SortSubtask *pTask;        /* Used by this thread only */
+  int *aTree;                /* Current state of incremental merge */
+  PmaReader *aReadr;         /* Array of PmaReaders to merge data from */
+};
+
+/*
+** This object represents a single thread of control in a sort operation.
+** Exactly VdbeSorter.nTask instances of this object are allocated
+** as part of each VdbeSorter object. Instances are never allocated any
+** other way. VdbeSorter.nTask is set to the number of worker threads allowed
+** (see SQLITE_CONFIG_WORKER_THREADS) plus one (the main thread).  Thus for
+** single-threaded operation, there is exactly one instance of this object
+** and for multi-threaded operation there are two or more instances.
+**
+** Essentially, this structure contains all those fields of the VdbeSorter
+** structure for which each thread requires a separate instance. For example,
+** each thread requries its own UnpackedRecord object to unpack records in
+** as part of comparison operations.
+**
+** Before a background thread is launched, variable bDone is set to 0. Then, 
+** right before it exits, the thread itself sets bDone to 1. This is used for 
+** two purposes:
+**
+**   1. When flushing the contents of memory to a level-0 PMA on disk, to
+**      attempt to select a SortSubtask for which there is not already an
+**      active background thread (since doing so causes the main thread
+**      to block until it finishes).
+**
+**   2. If SQLITE_DEBUG_SORTER_THREADS is defined, to determine if a call
+**      to sqlite3ThreadJoin() is likely to block. Cases that are likely to
+**      block provoke debugging output.
+**
+** In both cases, the effects of the main thread seeing (bDone==0) even
+** after the thread has finished are not dire. So we don't worry about
+** memory barriers and such here.
+*/
+struct SortSubtask {
+  SQLiteThread *pThread;          /* Background thread, if any */
+  int bDone;                      /* Set if thread is finished but not joined */
+  VdbeSorter *pSorter;            /* Sorter that owns this sub-task */
+  UnpackedRecord *pUnpacked;      /* Space to unpack a record */
+  SorterList list;                /* List for thread to write to a PMA */
+  int nPMA;                       /* Number of PMAs currently in file */
+  SorterFile file;                /* Temp file for level-0 PMAs */
+  SorterFile file2;               /* Space for other PMAs */
+};
+
+/*
+** Main sorter structure. A single instance of this is allocated for each 
+** sorter cursor created by the VDBE.
+**
+** mxKeysize:
+**   As records are added to the sorter by calls to sqlite3VdbeSorterWrite(),
+**   this variable is updated so as to be set to the size on disk of the
+**   largest record in the sorter.
+*/
+struct VdbeSorter {
+  int mnPmaSize;                  /* Minimum PMA size, in bytes */
+  int mxPmaSize;                  /* Maximum PMA size, in bytes.  0==no limit */
+  int mxKeysize;                  /* Largest serialized key seen so far */
+  int pgsz;                       /* Main database page size */
+  PmaReader *pReader;             /* Readr data from here after Rewind() */
+  MergeEngine *pMerger;           /* Or here, if bUseThreads==0 */
+  sqlite3 *db;                    /* Database connection */
+  KeyInfo *pKeyInfo;              /* How to compare records */
+  UnpackedRecord *pUnpacked;      /* Used by VdbeSorterCompare() */
+  SorterList list;                /* List of in-memory records */
+  int iMemory;                    /* Offset of free space in list.aMemory */
+  int nMemory;                    /* Size of list.aMemory allocation in bytes */
+  u8 bUsePMA;                     /* True if one or more PMAs created */
+  u8 bUseThreads;                 /* True to use background threads */
+  u8 iPrev;                       /* Previous thread used to flush PMA */
+  u8 nTask;                       /* Size of aTask[] array */
+  SortSubtask aTask[1];           /* One or more subtasks */
+};
+
+/*
+** An instance of the following object is used to read records out of a
+** PMA, in sorted order.  The next key to be read is cached in nKey/aKey.
+** aKey might point into aMap or into aBuffer.  If neither of those locations
+** contain a contiguous representation of the key, then aAlloc is allocated
+** and the key is copied into aAlloc and aKey is made to poitn to aAlloc.
+**
+** pFd==0 at EOF.
+*/
+struct PmaReader {
+  i64 iReadOff;               /* Current read offset */
+  i64 iEof;                   /* 1 byte past EOF for this PmaReader */
+  int nAlloc;                 /* Bytes of space at aAlloc */
+  int nKey;                   /* Number of bytes in key */
+  sqlite3_file *pFd;          /* File handle we are reading from */
+  u8 *aAlloc;                 /* Space for aKey if aBuffer and pMap wont work */
+  u8 *aKey;                   /* Pointer to current key */
+  u8 *aBuffer;                /* Current read buffer */
+  int nBuffer;                /* Size of read buffer in bytes */
+  u8 *aMap;                   /* Pointer to mapping of entire file */
+  IncrMerger *pIncr;          /* Incremental merger */
+};
+
+/*
+** Normally, a PmaReader object iterates through an existing PMA stored 
+** within a temp file. However, if the PmaReader.pIncr variable points to
+** an object of the following type, it may be used to iterate/merge through
+** multiple PMAs simultaneously.
+**
+** There are two types of IncrMerger object - single (bUseThread==0) and 
+** multi-threaded (bUseThread==1). 
+**
+** A multi-threaded IncrMerger object uses two temporary files - aFile[0] 
+** and aFile[1]. Neither file is allowed to grow to more than mxSz bytes in 
+** size. When the IncrMerger is initialized, it reads enough data from 
+** pMerger to populate aFile[0]. It then sets variables within the 
+** corresponding PmaReader object to read from that file and kicks off 
+** a background thread to populate aFile[1] with the next mxSz bytes of 
+** sorted record data from pMerger. 
+**
+** When the PmaReader reaches the end of aFile[0], it blocks until the
+** background thread has finished populating aFile[1]. It then exchanges
+** the contents of the aFile[0] and aFile[1] variables within this structure,
+** sets the PmaReader fields to read from the new aFile[0] and kicks off
+** another background thread to populate the new aFile[1]. And so on, until
+** the contents of pMerger are exhausted.
+**
+** A single-threaded IncrMerger does not open any temporary files of its
+** own. Instead, it has exclusive access to mxSz bytes of space beginning
+** at offset iStartOff of file pTask->file2. And instead of using a 
+** background thread to prepare data for the PmaReader, with a single
+** threaded IncrMerger the allocate part of pTask->file2 is "refilled" with
+** keys from pMerger by the calling thread whenever the PmaReader runs out
+** of data.
+*/
+struct IncrMerger {
+  SortSubtask *pTask;             /* Task that owns this merger */
+  MergeEngine *pMerger;           /* Merge engine thread reads data from */
+  i64 iStartOff;                  /* Offset to start writing file at */
+  int mxSz;                       /* Maximum bytes of data to store */
+  int bEof;                       /* Set to true when merge is finished */
+  int bUseThread;                 /* True to use a bg thread for this object */
+  SorterFile aFile[2];            /* aFile[0] for reading, [1] for writing */
+};
+
+/*
+** An instance of this object is used for writing a PMA.
+**
+** The PMA is written one record at a time.  Each record is of an arbitrary
+** size.  But I/O is more efficient if it occurs in page-sized blocks where
+** each block is aligned on a page boundary.  This object caches writes to
+** the PMA so that aligned, page-size blocks are written.
+*/
+struct PmaWriter {
+  int eFWErr;                     /* Non-zero if in an error state */
+  u8 *aBuffer;                    /* Pointer to write buffer */
+  int nBuffer;                    /* Size of write buffer in bytes */
+  int iBufStart;                  /* First byte of buffer to write */
+  int iBufEnd;                    /* Last byte of buffer to write */
+  i64 iWriteOff;                  /* Offset of start of buffer in file */
+  sqlite3_file *pFd;              /* File handle to write to */
+};
+
+/*
+** This object is the header on a single record while that record is being
+** held in memory and prior to being written out as part of a PMA.
+**
+** How the linked list is connected depends on how memory is being managed
+** by this module. If using a separate allocation for each in-memory record
+** (VdbeSorter.list.aMemory==0), then the list is always connected using the
+** SorterRecord.u.pNext pointers.
+**
+** Or, if using the single large allocation method (VdbeSorter.list.aMemory!=0),
+** then while records are being accumulated the list is linked using the
+** SorterRecord.u.iNext offset. This is because the aMemory[] array may
+** be sqlite3Realloc()ed while records are being accumulated. Once the VM
+** has finished passing records to the sorter, or when the in-memory buffer
+** is full, the list is sorted. As part of the sorting process, it is
+** converted to use the SorterRecord.u.pNext pointers. See function
+** vdbeSorterSort() for details.
+*/
+struct SorterRecord {
+  int nVal;                       /* Size of the record in bytes */
+  union {
+    SorterRecord *pNext;          /* Pointer to next record in list */
+    int iNext;                    /* Offset within aMemory of next record */
+  } u;
+  /* The data for the record immediately follows this header */
+};
+
+/* Return a pointer to the buffer containing the record data for SorterRecord
+** object p. Should be used as if:
+**
+**   void *SRVAL(SorterRecord *p) { return (void*)&p[1]; }
+*/
+#define SRVAL(p) ((void*)((SorterRecord*)(p) + 1))
+
+
+/* Maximum number of PMAs that a single MergeEngine can merge */
+#define SORTER_MAX_MERGE_COUNT 16
+
+static int vdbeIncrSwap(IncrMerger*);
+static void vdbeIncrFree(IncrMerger *);
+
+/*
+** Free all memory belonging to the PmaReader object passed as the
+** argument. All structure fields are set to zero before returning.
+*/
+static void vdbePmaReaderClear(PmaReader *pReadr){
+  sqlite3_free(pReadr->aAlloc);
+  sqlite3_free(pReadr->aBuffer);
+  if( pReadr->aMap ) sqlite3OsUnfetch(pReadr->pFd, 0, pReadr->aMap);
+  vdbeIncrFree(pReadr->pIncr);
+  memset(pReadr, 0, sizeof(PmaReader));
+}
+
+/*
+** Read the next nByte bytes of data from the PMA p.
+** If successful, set *ppOut to point to a buffer containing the data
+** and return SQLITE_OK. Otherwise, if an error occurs, return an SQLite
+** error code.
+**
+** The buffer returned in *ppOut is only valid until the
+** next call to this function.
+*/
+static int vdbePmaReadBlob(
+  PmaReader *p,                   /* PmaReader from which to take the blob */
+  int nByte,                      /* Bytes of data to read */
+  u8 **ppOut                      /* OUT: Pointer to buffer containing data */
+){
+  int iBuf;                       /* Offset within buffer to read from */
+  int nAvail;                     /* Bytes of data available in buffer */
+
+  if( p->aMap ){
+    *ppOut = &p->aMap[p->iReadOff];
+    p->iReadOff += nByte;
+    return SQLITE_OK;
+  }
+
+  assert( p->aBuffer );
+
+  /* If there is no more data to be read from the buffer, read the next 
+  ** p->nBuffer bytes of data from the file into it. Or, if there are less
+  ** than p->nBuffer bytes remaining in the PMA, read all remaining data.  */
+  iBuf = p->iReadOff % p->nBuffer;
+  if( iBuf==0 ){
+    int nRead;                    /* Bytes to read from disk */
+    int rc;                       /* sqlite3OsRead() return code */
+
+    /* Determine how many bytes of data to read. */
+    if( (p->iEof - p->iReadOff) > (i64)p->nBuffer ){
+      nRead = p->nBuffer;
+    }else{
+      nRead = (int)(p->iEof - p->iReadOff);
+    }
+    assert( nRead>0 );
+
+    /* Readr data from the file. Return early if an error occurs. */
+    rc = sqlite3OsRead(p->pFd, p->aBuffer, nRead, p->iReadOff);
+    assert( rc!=SQLITE_IOERR_SHORT_READ );
+    if( rc!=SQLITE_OK ) return rc;
+  }
+  nAvail = p->nBuffer - iBuf; 
+
+  if( nByte<=nAvail ){
+    /* The requested data is available in the in-memory buffer. In this
+    ** case there is no need to make a copy of the data, just return a 
+    ** pointer into the buffer to the caller.  */
+    *ppOut = &p->aBuffer[iBuf];
+    p->iReadOff += nByte;
+  }else{
+    /* The requested data is not all available in the in-memory buffer.
+    ** In this case, allocate space at p->aAlloc[] to copy the requested
+    ** range into. Then return a copy of pointer p->aAlloc to the caller.  */
+    int nRem;                     /* Bytes remaining to copy */
+
+    /* Extend the p->aAlloc[] allocation if required. */
+    if( p->nAlloc<nByte ){
+      u8 *aNew;
+      int nNew = MAX(128, p->nAlloc*2);
+      while( nByte>nNew ) nNew = nNew*2;
+      aNew = sqlite3Realloc(p->aAlloc, nNew);
+      if( !aNew ) return SQLITE_NOMEM;
+      p->nAlloc = nNew;
+      p->aAlloc = aNew;
+    }
+
+    /* Copy as much data as is available in the buffer into the start of
+    ** p->aAlloc[].  */
+    memcpy(p->aAlloc, &p->aBuffer[iBuf], nAvail);
+    p->iReadOff += nAvail;
+    nRem = nByte - nAvail;
+
+    /* The following loop copies up to p->nBuffer bytes per iteration into
+    ** the p->aAlloc[] buffer.  */
+    while( nRem>0 ){
+      int rc;                     /* vdbePmaReadBlob() return code */
+      int nCopy;                  /* Number of bytes to copy */
+      u8 *aNext;                  /* Pointer to buffer to copy data from */
+
+      nCopy = nRem;
+      if( nRem>p->nBuffer ) nCopy = p->nBuffer;
+      rc = vdbePmaReadBlob(p, nCopy, &aNext);
+      if( rc!=SQLITE_OK ) return rc;
+      assert( aNext!=p->aAlloc );
+      memcpy(&p->aAlloc[nByte - nRem], aNext, nCopy);
+      nRem -= nCopy;
+    }
+
+    *ppOut = p->aAlloc;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Read a varint from the stream of data accessed by p. Set *pnOut to
+** the value read.
+*/
+static int vdbePmaReadVarint(PmaReader *p, u64 *pnOut){
+  int iBuf;
+
+  if( p->aMap ){
+    p->iReadOff += sqlite3GetVarint(&p->aMap[p->iReadOff], pnOut);
+  }else{
+    iBuf = p->iReadOff % p->nBuffer;
+    if( iBuf && (p->nBuffer-iBuf)>=9 ){
+      p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut);
+    }else{
+      u8 aVarint[16], *a;
+      int i = 0, rc;
+      do{
+        rc = vdbePmaReadBlob(p, 1, &a);
+        if( rc ) return rc;
+        aVarint[(i++)&0xf] = a[0];
+      }while( (a[0]&0x80)!=0 );
+      sqlite3GetVarint(aVarint, pnOut);
+    }
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Attempt to memory map file pFile. If successful, set *pp to point to the
+** new mapping and return SQLITE_OK. If the mapping is not attempted 
+** (because the file is too large or the VFS layer is configured not to use
+** mmap), return SQLITE_OK and set *pp to NULL.
+**
+** Or, if an error occurs, return an SQLite error code. The final value of
+** *pp is undefined in this case.
+*/
+static int vdbeSorterMapFile(SortSubtask *pTask, SorterFile *pFile, u8 **pp){
+  int rc = SQLITE_OK;
+  if( pFile->iEof<=(i64)(pTask->pSorter->db->nMaxSorterMmap) ){
+    sqlite3_file *pFd = pFile->pFd;
+    if( pFd->pMethods->iVersion>=3 ){
+      rc = sqlite3OsFetch(pFd, 0, (int)pFile->iEof, (void**)pp);
+      testcase( rc!=SQLITE_OK );
+    }
+  }
+  return rc;
+}
+
+/*
+** Attach PmaReader pReadr to file pFile (if it is not already attached to
+** that file) and seek it to offset iOff within the file.  Return SQLITE_OK 
+** if successful, or an SQLite error code if an error occurs.
+*/
+static int vdbePmaReaderSeek(
+  SortSubtask *pTask,             /* Task context */
+  PmaReader *pReadr,              /* Reader whose cursor is to be moved */
+  SorterFile *pFile,              /* Sorter file to read from */
+  i64 iOff                        /* Offset in pFile */
+){
+  int rc = SQLITE_OK;
+
+  assert( pReadr->pIncr==0 || pReadr->pIncr->bEof==0 );
+
+  if( sqlite3FaultSim(201) ) return SQLITE_IOERR_READ;
+  if( pReadr->aMap ){
+    sqlite3OsUnfetch(pReadr->pFd, 0, pReadr->aMap);
+    pReadr->aMap = 0;
+  }
+  pReadr->iReadOff = iOff;
+  pReadr->iEof = pFile->iEof;
+  pReadr->pFd = pFile->pFd;
+
+  rc = vdbeSorterMapFile(pTask, pFile, &pReadr->aMap);
+  if( rc==SQLITE_OK && pReadr->aMap==0 ){
+    int pgsz = pTask->pSorter->pgsz;
+    int iBuf = pReadr->iReadOff % pgsz;
+    if( pReadr->aBuffer==0 ){
+      pReadr->aBuffer = (u8*)sqlite3Malloc(pgsz);
+      if( pReadr->aBuffer==0 ) rc = SQLITE_NOMEM;
+      pReadr->nBuffer = pgsz;
+    }
+    if( rc==SQLITE_OK && iBuf ){
+      int nRead = pgsz - iBuf;
+      if( (pReadr->iReadOff + nRead) > pReadr->iEof ){
+        nRead = (int)(pReadr->iEof - pReadr->iReadOff);
+      }
+      rc = sqlite3OsRead(
+          pReadr->pFd, &pReadr->aBuffer[iBuf], nRead, pReadr->iReadOff
+      );
+      testcase( rc!=SQLITE_OK );
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Advance PmaReader pReadr to the next key in its PMA. Return SQLITE_OK if
+** no error occurs, or an SQLite error code if one does.
+*/
+static int vdbePmaReaderNext(PmaReader *pReadr){
+  int rc = SQLITE_OK;             /* Return Code */
+  u64 nRec = 0;                   /* Size of record in bytes */
+
+
+  if( pReadr->iReadOff>=pReadr->iEof ){
+    IncrMerger *pIncr = pReadr->pIncr;
+    int bEof = 1;
+    if( pIncr ){
+      rc = vdbeIncrSwap(pIncr);
+      if( rc==SQLITE_OK && pIncr->bEof==0 ){
+        rc = vdbePmaReaderSeek(
+            pIncr->pTask, pReadr, &pIncr->aFile[0], pIncr->iStartOff
+        );
+        bEof = 0;
+      }
+    }
+
+    if( bEof ){
+      /* This is an EOF condition */
+      vdbePmaReaderClear(pReadr);
+      testcase( rc!=SQLITE_OK );
+      return rc;
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    rc = vdbePmaReadVarint(pReadr, &nRec);
+  }
+  if( rc==SQLITE_OK ){
+    pReadr->nKey = (int)nRec;
+    rc = vdbePmaReadBlob(pReadr, (int)nRec, &pReadr->aKey);
+    testcase( rc!=SQLITE_OK );
+  }
+
+  return rc;
+}
+
+/*
+** Initialize PmaReader pReadr to scan through the PMA stored in file pFile
+** starting at offset iStart and ending at offset iEof-1. This function 
+** leaves the PmaReader pointing to the first key in the PMA (or EOF if the 
+** PMA is empty).
+**
+** If the pnByte parameter is NULL, then it is assumed that the file 
+** contains a single PMA, and that that PMA omits the initial length varint.
+*/
+static int vdbePmaReaderInit(
+  SortSubtask *pTask,             /* Task context */
+  SorterFile *pFile,              /* Sorter file to read from */
+  i64 iStart,                     /* Start offset in pFile */
+  PmaReader *pReadr,              /* PmaReader to populate */
+  i64 *pnByte                     /* IN/OUT: Increment this value by PMA size */
+){
+  int rc;
+
+  assert( pFile->iEof>iStart );
+  assert( pReadr->aAlloc==0 && pReadr->nAlloc==0 );
+  assert( pReadr->aBuffer==0 );
+  assert( pReadr->aMap==0 );
+
+  rc = vdbePmaReaderSeek(pTask, pReadr, pFile, iStart);
+  if( rc==SQLITE_OK ){
+    u64 nByte;                    /* Size of PMA in bytes */
+    rc = vdbePmaReadVarint(pReadr, &nByte);
+    pReadr->iEof = pReadr->iReadOff + nByte;
+    *pnByte += nByte;
+  }
+
+  if( rc==SQLITE_OK ){
+    rc = vdbePmaReaderNext(pReadr);
+  }
+  return rc;
+}
+
+
+/*
+** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, 
+** size nKey2 bytes). Use (pTask->pKeyInfo) for the collation sequences
+** used by the comparison. Return the result of the comparison.
+**
+** Before returning, object (pTask->pUnpacked) is populated with the
+** unpacked version of key2. Or, if pKey2 is passed a NULL pointer, then it 
+** is assumed that the (pTask->pUnpacked) structure already contains the 
+** unpacked key to use as key2.
+**
+** If an OOM error is encountered, (pTask->pUnpacked->error_rc) is set
+** to SQLITE_NOMEM.
+*/
+static int vdbeSorterCompare(
+  SortSubtask *pTask,             /* Subtask context (for pKeyInfo) */
+  const void *pKey1, int nKey1,   /* Left side of comparison */
+  const void *pKey2, int nKey2    /* Right side of comparison */
+){
+  UnpackedRecord *r2 = pTask->pUnpacked;
+  if( pKey2 ){
+    sqlite3VdbeRecordUnpack(pTask->pSorter->pKeyInfo, nKey2, pKey2, r2);
+  }
+  return sqlite3VdbeRecordCompare(nKey1, pKey1, r2);
+}
+
+/*
+** Initialize the temporary index cursor just opened as a sorter cursor.
+**
+** Usually, the sorter module uses the value of (pCsr->pKeyInfo->nField)
+** to determine the number of fields that should be compared from the
+** records being sorted. However, if the value passed as argument nField
+** is non-zero and the sorter is able to guarantee a stable sort, nField
+** is used instead. This is used when sorting records for a CREATE INDEX
+** statement. In this case, keys are always delivered to the sorter in
+** order of the primary key, which happens to be make up the final part 
+** of the records being sorted. So if the sort is stable, there is never
+** any reason to compare PK fields and they can be ignored for a small
+** performance boost.
+**
+** The sorter can guarantee a stable sort when running in single-threaded
+** mode, but not in multi-threaded mode.
+**
+** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterInit(
+  sqlite3 *db,                    /* Database connection (for malloc()) */
+  int nField,                     /* Number of key fields in each record */
+  VdbeCursor *pCsr                /* Cursor that holds the new sorter */
+){
+  int pgsz;                       /* Page size of main database */
+  int i;                          /* Used to iterate through aTask[] */
+  int mxCache;                    /* Cache size */
+  VdbeSorter *pSorter;            /* The new sorter */
+  KeyInfo *pKeyInfo;              /* Copy of pCsr->pKeyInfo with db==0 */
+  int szKeyInfo;                  /* Size of pCsr->pKeyInfo in bytes */
+  int sz;                         /* Size of pSorter in bytes */
+  int rc = SQLITE_OK;
+#if SQLITE_MAX_WORKER_THREADS==0
+# define nWorker 0
+#else
+  int nWorker;
+#endif
+
+  /* Initialize the upper limit on the number of worker threads */
+#if SQLITE_MAX_WORKER_THREADS>0
+  if( sqlite3TempInMemory(db) || sqlite3GlobalConfig.bCoreMutex==0 ){
+    nWorker = 0;
+  }else{
+    nWorker = db->aLimit[SQLITE_LIMIT_WORKER_THREADS];
+  }
+#endif
+
+  /* Do not allow the total number of threads (main thread + all workers)
+  ** to exceed the maximum merge count */
+#if SQLITE_MAX_WORKER_THREADS>=SORTER_MAX_MERGE_COUNT
+  if( nWorker>=SORTER_MAX_MERGE_COUNT ){
+    nWorker = SORTER_MAX_MERGE_COUNT-1;
+  }
+#endif
+
+  assert( pCsr->pKeyInfo && pCsr->pBt==0 );
+  szKeyInfo = sizeof(KeyInfo) + (pCsr->pKeyInfo->nField-1)*sizeof(CollSeq*);
+  sz = sizeof(VdbeSorter) + nWorker * sizeof(SortSubtask);
+
+  pSorter = (VdbeSorter*)sqlite3DbMallocZero(db, sz + szKeyInfo);
+  pCsr->pSorter = pSorter;
+  if( pSorter==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    pSorter->pKeyInfo = pKeyInfo = (KeyInfo*)((u8*)pSorter + sz);
+    memcpy(pKeyInfo, pCsr->pKeyInfo, szKeyInfo);
+    pKeyInfo->db = 0;
+    if( nField && nWorker==0 ) pKeyInfo->nField = nField;
+    pSorter->pgsz = pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
+    pSorter->nTask = nWorker + 1;
+    pSorter->bUseThreads = (pSorter->nTask>1);
+    pSorter->db = db;
+    for(i=0; i<pSorter->nTask; i++){
+      SortSubtask *pTask = &pSorter->aTask[i];
+      pTask->pSorter = pSorter;
+    }
+
+    if( !sqlite3TempInMemory(db) ){
+      u32 szPma = sqlite3GlobalConfig.szPma;
+      pSorter->mnPmaSize = szPma * pgsz;
+      mxCache = db->aDb[0].pSchema->cache_size;
+      if( mxCache<(int)szPma ) mxCache = (int)szPma;
+      pSorter->mxPmaSize = MIN((i64)mxCache*pgsz, SQLITE_MAX_PMASZ);
+
+      /* EVIDENCE-OF: R-26747-61719 When the application provides any amount of
+      ** scratch memory using SQLITE_CONFIG_SCRATCH, SQLite avoids unnecessary
+      ** large heap allocations.
+      */
+      if( sqlite3GlobalConfig.pScratch==0 ){
+        assert( pSorter->iMemory==0 );
+        pSorter->nMemory = pgsz;
+        pSorter->list.aMemory = (u8*)sqlite3Malloc(pgsz);
+        if( !pSorter->list.aMemory ) rc = SQLITE_NOMEM;
+      }
+    }
+  }
+
+  return rc;
+}
+#undef nWorker   /* Defined at the top of this function */
+
+/*
+** Free the list of sorted records starting at pRecord.
+*/
+static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){
+  SorterRecord *p;
+  SorterRecord *pNext;
+  for(p=pRecord; p; p=pNext){
+    pNext = p->u.pNext;
+    sqlite3DbFree(db, p);
+  }
+}
+
+/*
+** Free all resources owned by the object indicated by argument pTask. All 
+** fields of *pTask are zeroed before returning.
+*/
+static void vdbeSortSubtaskCleanup(sqlite3 *db, SortSubtask *pTask){
+  sqlite3DbFree(db, pTask->pUnpacked);
+  pTask->pUnpacked = 0;
+#if SQLITE_MAX_WORKER_THREADS>0
+  /* pTask->list.aMemory can only be non-zero if it was handed memory
+  ** from the main thread.  That only occurs SQLITE_MAX_WORKER_THREADS>0 */
+  if( pTask->list.aMemory ){
+    sqlite3_free(pTask->list.aMemory);
+    pTask->list.aMemory = 0;
+  }else
+#endif
+  {
+    assert( pTask->list.aMemory==0 );
+    vdbeSorterRecordFree(0, pTask->list.pList);
+  }
+  pTask->list.pList = 0;
+  if( pTask->file.pFd ){
+    sqlite3OsCloseFree(pTask->file.pFd);
+    pTask->file.pFd = 0;
+    pTask->file.iEof = 0;
+  }
+  if( pTask->file2.pFd ){
+    sqlite3OsCloseFree(pTask->file2.pFd);
+    pTask->file2.pFd = 0;
+    pTask->file2.iEof = 0;
+  }
+}
+
+#ifdef SQLITE_DEBUG_SORTER_THREADS
+static void vdbeSorterWorkDebug(SortSubtask *pTask, const char *zEvent){
+  i64 t;
+  int iTask = (pTask - pTask->pSorter->aTask);
+  sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
+  fprintf(stderr, "%lld:%d %s\n", t, iTask, zEvent);
+}
+static void vdbeSorterRewindDebug(const char *zEvent){
+  i64 t;
+  sqlite3OsCurrentTimeInt64(sqlite3_vfs_find(0), &t);
+  fprintf(stderr, "%lld:X %s\n", t, zEvent);
+}
+static void vdbeSorterPopulateDebug(
+  SortSubtask *pTask,
+  const char *zEvent
+){
+  i64 t;
+  int iTask = (pTask - pTask->pSorter->aTask);
+  sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
+  fprintf(stderr, "%lld:bg%d %s\n", t, iTask, zEvent);
+}
+static void vdbeSorterBlockDebug(
+  SortSubtask *pTask,
+  int bBlocked,
+  const char *zEvent
+){
+  if( bBlocked ){
+    i64 t;
+    sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
+    fprintf(stderr, "%lld:main %s\n", t, zEvent);
+  }
+}
+#else
+# define vdbeSorterWorkDebug(x,y)
+# define vdbeSorterRewindDebug(y)
+# define vdbeSorterPopulateDebug(x,y)
+# define vdbeSorterBlockDebug(x,y,z)
+#endif
+
+#if SQLITE_MAX_WORKER_THREADS>0
+/*
+** Join thread pTask->thread.
+*/
+static int vdbeSorterJoinThread(SortSubtask *pTask){
+  int rc = SQLITE_OK;
+  if( pTask->pThread ){
+#ifdef SQLITE_DEBUG_SORTER_THREADS
+    int bDone = pTask->bDone;
+#endif
+    void *pRet = SQLITE_INT_TO_PTR(SQLITE_ERROR);
+    vdbeSorterBlockDebug(pTask, !bDone, "enter");
+    (void)sqlite3ThreadJoin(pTask->pThread, &pRet);
+    vdbeSorterBlockDebug(pTask, !bDone, "exit");
+    rc = SQLITE_PTR_TO_INT(pRet);
+    assert( pTask->bDone==1 );
+    pTask->bDone = 0;
+    pTask->pThread = 0;
+  }
+  return rc;
+}
+
+/*
+** Launch a background thread to run xTask(pIn).
+*/
+static int vdbeSorterCreateThread(
+  SortSubtask *pTask,             /* Thread will use this task object */
+  void *(*xTask)(void*),          /* Routine to run in a separate thread */
+  void *pIn                       /* Argument passed into xTask() */
+){
+  assert( pTask->pThread==0 && pTask->bDone==0 );
+  return sqlite3ThreadCreate(&pTask->pThread, xTask, pIn);
+}
+
+/*
+** Join all outstanding threads launched by SorterWrite() to create 
+** level-0 PMAs.
+*/
+static int vdbeSorterJoinAll(VdbeSorter *pSorter, int rcin){
+  int rc = rcin;
+  int i;
+
+  /* This function is always called by the main user thread.
+  **
+  ** If this function is being called after SorterRewind() has been called, 
+  ** it is possible that thread pSorter->aTask[pSorter->nTask-1].pThread
+  ** is currently attempt to join one of the other threads. To avoid a race
+  ** condition where this thread also attempts to join the same object, join 
+  ** thread pSorter->aTask[pSorter->nTask-1].pThread first. */
+  for(i=pSorter->nTask-1; i>=0; i--){
+    SortSubtask *pTask = &pSorter->aTask[i];
+    int rc2 = vdbeSorterJoinThread(pTask);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+  return rc;
+}
+#else
+# define vdbeSorterJoinAll(x,rcin) (rcin)
+# define vdbeSorterJoinThread(pTask) SQLITE_OK
+#endif
+
+/*
+** Allocate a new MergeEngine object capable of handling up to
+** nReader PmaReader inputs.
+**
+** nReader is automatically rounded up to the next power of two.
+** nReader may not exceed SORTER_MAX_MERGE_COUNT even after rounding up.
+*/
+static MergeEngine *vdbeMergeEngineNew(int nReader){
+  int N = 2;                      /* Smallest power of two >= nReader */
+  int nByte;                      /* Total bytes of space to allocate */
+  MergeEngine *pNew;              /* Pointer to allocated object to return */
+
+  assert( nReader<=SORTER_MAX_MERGE_COUNT );
+
+  while( N<nReader ) N += N;
+  nByte = sizeof(MergeEngine) + N * (sizeof(int) + sizeof(PmaReader));
+
+  pNew = sqlite3FaultSim(100) ? 0 : (MergeEngine*)sqlite3MallocZero(nByte);
+  if( pNew ){
+    pNew->nTree = N;
+    pNew->pTask = 0;
+    pNew->aReadr = (PmaReader*)&pNew[1];
+    pNew->aTree = (int*)&pNew->aReadr[N];
+  }
+  return pNew;
+}
+
+/*
+** Free the MergeEngine object passed as the only argument.
+*/
+static void vdbeMergeEngineFree(MergeEngine *pMerger){
+  int i;
+  if( pMerger ){
+    for(i=0; i<pMerger->nTree; i++){
+      vdbePmaReaderClear(&pMerger->aReadr[i]);
+    }
+  }
+  sqlite3_free(pMerger);
+}
+
+/*
+** Free all resources associated with the IncrMerger object indicated by
+** the first argument.
+*/
+static void vdbeIncrFree(IncrMerger *pIncr){
+  if( pIncr ){
+#if SQLITE_MAX_WORKER_THREADS>0
+    if( pIncr->bUseThread ){
+      vdbeSorterJoinThread(pIncr->pTask);
+      if( pIncr->aFile[0].pFd ) sqlite3OsCloseFree(pIncr->aFile[0].pFd);
+      if( pIncr->aFile[1].pFd ) sqlite3OsCloseFree(pIncr->aFile[1].pFd);
+    }
+#endif
+    vdbeMergeEngineFree(pIncr->pMerger);
+    sqlite3_free(pIncr);
+  }
+}
+
+/*
+** Reset a sorting cursor back to its original empty state.
+*/
+SQLITE_PRIVATE void sqlite3VdbeSorterReset(sqlite3 *db, VdbeSorter *pSorter){
+  int i;
+  (void)vdbeSorterJoinAll(pSorter, SQLITE_OK);
+  assert( pSorter->bUseThreads || pSorter->pReader==0 );
+#if SQLITE_MAX_WORKER_THREADS>0
+  if( pSorter->pReader ){
+    vdbePmaReaderClear(pSorter->pReader);
+    sqlite3DbFree(db, pSorter->pReader);
+    pSorter->pReader = 0;
+  }
+#endif
+  vdbeMergeEngineFree(pSorter->pMerger);
+  pSorter->pMerger = 0;
+  for(i=0; i<pSorter->nTask; i++){
+    SortSubtask *pTask = &pSorter->aTask[i];
+    vdbeSortSubtaskCleanup(db, pTask);
+  }
+  if( pSorter->list.aMemory==0 ){
+    vdbeSorterRecordFree(0, pSorter->list.pList);
+  }
+  pSorter->list.pList = 0;
+  pSorter->list.szPMA = 0;
+  pSorter->bUsePMA = 0;
+  pSorter->iMemory = 0;
+  pSorter->mxKeysize = 0;
+  sqlite3DbFree(db, pSorter->pUnpacked);
+  pSorter->pUnpacked = 0;
+}
+
+/*
+** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
+*/
+SQLITE_PRIVATE void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  if( pSorter ){
+    sqlite3VdbeSorterReset(db, pSorter);
+    sqlite3_free(pSorter->list.aMemory);
+    sqlite3DbFree(db, pSorter);
+    pCsr->pSorter = 0;
+  }
+}
+
+#if SQLITE_MAX_MMAP_SIZE>0
+/*
+** The first argument is a file-handle open on a temporary file. The file
+** is guaranteed to be nByte bytes or smaller in size. This function
+** attempts to extend the file to nByte bytes in size and to ensure that
+** the VFS has memory mapped it.
+**
+** Whether or not the file does end up memory mapped of course depends on
+** the specific VFS implementation.
+*/
+static void vdbeSorterExtendFile(sqlite3 *db, sqlite3_file *pFd, i64 nByte){
+  if( nByte<=(i64)(db->nMaxSorterMmap) && pFd->pMethods->iVersion>=3 ){
+    void *p = 0;
+    int chunksize = 4*1024;
+    sqlite3OsFileControlHint(pFd, SQLITE_FCNTL_CHUNK_SIZE, &chunksize);
+    sqlite3OsFileControlHint(pFd, SQLITE_FCNTL_SIZE_HINT, &nByte);
+    sqlite3OsFetch(pFd, 0, (int)nByte, &p);
+    sqlite3OsUnfetch(pFd, 0, p);
+  }
+}
+#else
+# define vdbeSorterExtendFile(x,y,z)
+#endif
+
+/*
+** Allocate space for a file-handle and open a temporary file. If successful,
+** set *ppFd to point to the malloc'd file-handle and return SQLITE_OK.
+** Otherwise, set *ppFd to 0 and return an SQLite error code.
+*/
+static int vdbeSorterOpenTempFile(
+  sqlite3 *db,                    /* Database handle doing sort */
+  i64 nExtend,                    /* Attempt to extend file to this size */
+  sqlite3_file **ppFd
+){
+  int rc;
+  rc = sqlite3OsOpenMalloc(db->pVfs, 0, ppFd,
+      SQLITE_OPEN_TEMP_JOURNAL |
+      SQLITE_OPEN_READWRITE    | SQLITE_OPEN_CREATE |
+      SQLITE_OPEN_EXCLUSIVE    | SQLITE_OPEN_DELETEONCLOSE, &rc
+  );
+  if( rc==SQLITE_OK ){
+    i64 max = SQLITE_MAX_MMAP_SIZE;
+    sqlite3OsFileControlHint(*ppFd, SQLITE_FCNTL_MMAP_SIZE, (void*)&max);
+    if( nExtend>0 ){
+      vdbeSorterExtendFile(db, *ppFd, nExtend);
+    }
+  }
+  return rc;
+}
+
+/*
+** If it has not already been allocated, allocate the UnpackedRecord 
+** structure at pTask->pUnpacked. Return SQLITE_OK if successful (or 
+** if no allocation was required), or SQLITE_NOMEM otherwise.
+*/
+static int vdbeSortAllocUnpacked(SortSubtask *pTask){
+  if( pTask->pUnpacked==0 ){
+    char *pFree;
+    pTask->pUnpacked = sqlite3VdbeAllocUnpackedRecord(
+        pTask->pSorter->pKeyInfo, 0, 0, &pFree
+    );
+    assert( pTask->pUnpacked==(UnpackedRecord*)pFree );
+    if( pFree==0 ) return SQLITE_NOMEM;
+    pTask->pUnpacked->nField = pTask->pSorter->pKeyInfo->nField;
+    pTask->pUnpacked->errCode = 0;
+  }
+  return SQLITE_OK;
+}
+
+
+/*
+** Merge the two sorted lists p1 and p2 into a single list.
+** Set *ppOut to the head of the new list.
+*/
+static void vdbeSorterMerge(
+  SortSubtask *pTask,             /* Calling thread context */
+  SorterRecord *p1,               /* First list to merge */
+  SorterRecord *p2,               /* Second list to merge */
+  SorterRecord **ppOut            /* OUT: Head of merged list */
+){
+  SorterRecord *pFinal = 0;
+  SorterRecord **pp = &pFinal;
+  void *pVal2 = p2 ? SRVAL(p2) : 0;
+
+  while( p1 && p2 ){
+    int res;
+    res = vdbeSorterCompare(pTask, SRVAL(p1), p1->nVal, pVal2, p2->nVal);
+    if( res<=0 ){
+      *pp = p1;
+      pp = &p1->u.pNext;
+      p1 = p1->u.pNext;
+      pVal2 = 0;
+    }else{
+      *pp = p2;
+       pp = &p2->u.pNext;
+      p2 = p2->u.pNext;
+      if( p2==0 ) break;
+      pVal2 = SRVAL(p2);
+    }
+  }
+  *pp = p1 ? p1 : p2;
+  *ppOut = pFinal;
+}
+
+/*
+** Sort the linked list of records headed at pTask->pList. Return 
+** SQLITE_OK if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if 
+** an error occurs.
+*/
+static int vdbeSorterSort(SortSubtask *pTask, SorterList *pList){
+  int i;
+  SorterRecord **aSlot;
+  SorterRecord *p;
+  int rc;
+
+  rc = vdbeSortAllocUnpacked(pTask);
+  if( rc!=SQLITE_OK ) return rc;
+
+  aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *));
+  if( !aSlot ){
+    return SQLITE_NOMEM;
+  }
+
+  p = pList->pList;
+  while( p ){
+    SorterRecord *pNext;
+    if( pList->aMemory ){
+      if( (u8*)p==pList->aMemory ){
+        pNext = 0;
+      }else{
+        assert( p->u.iNext<sqlite3MallocSize(pList->aMemory) );
+        pNext = (SorterRecord*)&pList->aMemory[p->u.iNext];
+      }
+    }else{
+      pNext = p->u.pNext;
+    }
+
+    p->u.pNext = 0;
+    for(i=0; aSlot[i]; i++){
+      vdbeSorterMerge(pTask, p, aSlot[i], &p);
+      aSlot[i] = 0;
+    }
+    aSlot[i] = p;
+    p = pNext;
+  }
+
+  p = 0;
+  for(i=0; i<64; i++){
+    vdbeSorterMerge(pTask, p, aSlot[i], &p);
+  }
+  pList->pList = p;
+
+  sqlite3_free(aSlot);
+  assert( pTask->pUnpacked->errCode==SQLITE_OK 
+       || pTask->pUnpacked->errCode==SQLITE_NOMEM 
+  );
+  return pTask->pUnpacked->errCode;
+}
+
+/*
+** Initialize a PMA-writer object.
+*/
+static void vdbePmaWriterInit(
+  sqlite3_file *pFd,              /* File handle to write to */
+  PmaWriter *p,                   /* Object to populate */
+  int nBuf,                       /* Buffer size */
+  i64 iStart                      /* Offset of pFd to begin writing at */
+){
+  memset(p, 0, sizeof(PmaWriter));
+  p->aBuffer = (u8*)sqlite3Malloc(nBuf);
+  if( !p->aBuffer ){
+    p->eFWErr = SQLITE_NOMEM;
+  }else{
+    p->iBufEnd = p->iBufStart = (iStart % nBuf);
+    p->iWriteOff = iStart - p->iBufStart;
+    p->nBuffer = nBuf;
+    p->pFd = pFd;
+  }
+}
+
+/*
+** Write nData bytes of data to the PMA. Return SQLITE_OK
+** if successful, or an SQLite error code if an error occurs.
+*/
+static void vdbePmaWriteBlob(PmaWriter *p, u8 *pData, int nData){
+  int nRem = nData;
+  while( nRem>0 && p->eFWErr==0 ){
+    int nCopy = nRem;
+    if( nCopy>(p->nBuffer - p->iBufEnd) ){
+      nCopy = p->nBuffer - p->iBufEnd;
+    }
+
+    memcpy(&p->aBuffer[p->iBufEnd], &pData[nData-nRem], nCopy);
+    p->iBufEnd += nCopy;
+    if( p->iBufEnd==p->nBuffer ){
+      p->eFWErr = sqlite3OsWrite(p->pFd, 
+          &p->aBuffer[p->iBufStart], p->iBufEnd - p->iBufStart, 
+          p->iWriteOff + p->iBufStart
+      );
+      p->iBufStart = p->iBufEnd = 0;
+      p->iWriteOff += p->nBuffer;
+    }
+    assert( p->iBufEnd<p->nBuffer );
+
+    nRem -= nCopy;
+  }
+}
+
+/*
+** Flush any buffered data to disk and clean up the PMA-writer object.
+** The results of using the PMA-writer after this call are undefined.
+** Return SQLITE_OK if flushing the buffered data succeeds or is not 
+** required. Otherwise, return an SQLite error code.
+**
+** Before returning, set *piEof to the offset immediately following the
+** last byte written to the file.
+*/
+static int vdbePmaWriterFinish(PmaWriter *p, i64 *piEof){
+  int rc;
+  if( p->eFWErr==0 && ALWAYS(p->aBuffer) && p->iBufEnd>p->iBufStart ){
+    p->eFWErr = sqlite3OsWrite(p->pFd, 
+        &p->aBuffer[p->iBufStart], p->iBufEnd - p->iBufStart, 
+        p->iWriteOff + p->iBufStart
+    );
+  }
+  *piEof = (p->iWriteOff + p->iBufEnd);
+  sqlite3_free(p->aBuffer);
+  rc = p->eFWErr;
+  memset(p, 0, sizeof(PmaWriter));
+  return rc;
+}
+
+/*
+** Write value iVal encoded as a varint to the PMA. Return 
+** SQLITE_OK if successful, or an SQLite error code if an error occurs.
+*/
+static void vdbePmaWriteVarint(PmaWriter *p, u64 iVal){
+  int nByte; 
+  u8 aByte[10];
+  nByte = sqlite3PutVarint(aByte, iVal);
+  vdbePmaWriteBlob(p, aByte, nByte);
+}
+
+/*
+** Write the current contents of in-memory linked-list pList to a level-0
+** PMA in the temp file belonging to sub-task pTask. Return SQLITE_OK if 
+** successful, or an SQLite error code otherwise.
+**
+** The format of a PMA is:
+**
+**     * A varint. This varint contains the total number of bytes of content
+**       in the PMA (not including the varint itself).
+**
+**     * One or more records packed end-to-end in order of ascending keys. 
+**       Each record consists of a varint followed by a blob of data (the 
+**       key). The varint is the number of bytes in the blob of data.
+*/
+static int vdbeSorterListToPMA(SortSubtask *pTask, SorterList *pList){
+  sqlite3 *db = pTask->pSorter->db;
+  int rc = SQLITE_OK;             /* Return code */
+  PmaWriter writer;               /* Object used to write to the file */
+
+#ifdef SQLITE_DEBUG
+  /* Set iSz to the expected size of file pTask->file after writing the PMA. 
+  ** This is used by an assert() statement at the end of this function.  */
+  i64 iSz = pList->szPMA + sqlite3VarintLen(pList->szPMA) + pTask->file.iEof;
+#endif
+
+  vdbeSorterWorkDebug(pTask, "enter");
+  memset(&writer, 0, sizeof(PmaWriter));
+  assert( pList->szPMA>0 );
+
+  /* If the first temporary PMA file has not been opened, open it now. */
+  if( pTask->file.pFd==0 ){
+    rc = vdbeSorterOpenTempFile(db, 0, &pTask->file.pFd);
+    assert( rc!=SQLITE_OK || pTask->file.pFd );
+    assert( pTask->file.iEof==0 );
+    assert( pTask->nPMA==0 );
+  }
+
+  /* Try to get the file to memory map */
+  if( rc==SQLITE_OK ){
+    vdbeSorterExtendFile(db, pTask->file.pFd, pTask->file.iEof+pList->szPMA+9);
+  }
+
+  /* Sort the list */
+  if( rc==SQLITE_OK ){
+    rc = vdbeSorterSort(pTask, pList);
+  }
+
+  if( rc==SQLITE_OK ){
+    SorterRecord *p;
+    SorterRecord *pNext = 0;
+
+    vdbePmaWriterInit(pTask->file.pFd, &writer, pTask->pSorter->pgsz,
+                      pTask->file.iEof);
+    pTask->nPMA++;
+    vdbePmaWriteVarint(&writer, pList->szPMA);
+    for(p=pList->pList; p; p=pNext){
+      pNext = p->u.pNext;
+      vdbePmaWriteVarint(&writer, p->nVal);
+      vdbePmaWriteBlob(&writer, SRVAL(p), p->nVal);
+      if( pList->aMemory==0 ) sqlite3_free(p);
+    }
+    pList->pList = p;
+    rc = vdbePmaWriterFinish(&writer, &pTask->file.iEof);
+  }
+
+  vdbeSorterWorkDebug(pTask, "exit");
+  assert( rc!=SQLITE_OK || pList->pList==0 );
+  assert( rc!=SQLITE_OK || pTask->file.iEof==iSz );
+  return rc;
+}
+
+/*
+** Advance the MergeEngine to its next entry.
+** Set *pbEof to true there is no next entry because
+** the MergeEngine has reached the end of all its inputs.
+**
+** Return SQLITE_OK if successful or an error code if an error occurs.
+*/
+static int vdbeMergeEngineStep(
+  MergeEngine *pMerger,      /* The merge engine to advance to the next row */
+  int *pbEof                 /* Set TRUE at EOF.  Set false for more content */
+){
+  int rc;
+  int iPrev = pMerger->aTree[1];/* Index of PmaReader to advance */
+  SortSubtask *pTask = pMerger->pTask;
+
+  /* Advance the current PmaReader */
+  rc = vdbePmaReaderNext(&pMerger->aReadr[iPrev]);
+
+  /* Update contents of aTree[] */
+  if( rc==SQLITE_OK ){
+    int i;                      /* Index of aTree[] to recalculate */
+    PmaReader *pReadr1;         /* First PmaReader to compare */
+    PmaReader *pReadr2;         /* Second PmaReader to compare */
+    u8 *pKey2;                  /* To pReadr2->aKey, or 0 if record cached */
+
+    /* Find the first two PmaReaders to compare. The one that was just
+    ** advanced (iPrev) and the one next to it in the array.  */
+    pReadr1 = &pMerger->aReadr[(iPrev & 0xFFFE)];
+    pReadr2 = &pMerger->aReadr[(iPrev | 0x0001)];
+    pKey2 = pReadr2->aKey;
+
+    for(i=(pMerger->nTree+iPrev)/2; i>0; i=i/2){
+      /* Compare pReadr1 and pReadr2. Store the result in variable iRes. */
+      int iRes;
+      if( pReadr1->pFd==0 ){
+        iRes = +1;
+      }else if( pReadr2->pFd==0 ){
+        iRes = -1;
+      }else{
+        iRes = vdbeSorterCompare(pTask, 
+            pReadr1->aKey, pReadr1->nKey, pKey2, pReadr2->nKey
+        );
+      }
+
+      /* If pReadr1 contained the smaller value, set aTree[i] to its index.
+      ** Then set pReadr2 to the next PmaReader to compare to pReadr1. In this
+      ** case there is no cache of pReadr2 in pTask->pUnpacked, so set
+      ** pKey2 to point to the record belonging to pReadr2.
+      **
+      ** Alternatively, if pReadr2 contains the smaller of the two values,
+      ** set aTree[i] to its index and update pReadr1. If vdbeSorterCompare()
+      ** was actually called above, then pTask->pUnpacked now contains
+      ** a value equivalent to pReadr2. So set pKey2 to NULL to prevent
+      ** vdbeSorterCompare() from decoding pReadr2 again.
+      **
+      ** If the two values were equal, then the value from the oldest
+      ** PMA should be considered smaller. The VdbeSorter.aReadr[] array
+      ** is sorted from oldest to newest, so pReadr1 contains older values
+      ** than pReadr2 iff (pReadr1<pReadr2).  */
+      if( iRes<0 || (iRes==0 && pReadr1<pReadr2) ){
+        pMerger->aTree[i] = (int)(pReadr1 - pMerger->aReadr);
+        pReadr2 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ];
+        pKey2 = pReadr2->aKey;
+      }else{
+        if( pReadr1->pFd ) pKey2 = 0;
+        pMerger->aTree[i] = (int)(pReadr2 - pMerger->aReadr);
+        pReadr1 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ];
+      }
+    }
+    *pbEof = (pMerger->aReadr[pMerger->aTree[1]].pFd==0);
+  }
+
+  return (rc==SQLITE_OK ? pTask->pUnpacked->errCode : rc);
+}
+
+#if SQLITE_MAX_WORKER_THREADS>0
+/*
+** The main routine for background threads that write level-0 PMAs.
+*/
+static void *vdbeSorterFlushThread(void *pCtx){
+  SortSubtask *pTask = (SortSubtask*)pCtx;
+  int rc;                         /* Return code */
+  assert( pTask->bDone==0 );
+  rc = vdbeSorterListToPMA(pTask, &pTask->list);
+  pTask->bDone = 1;
+  return SQLITE_INT_TO_PTR(rc);
+}
+#endif /* SQLITE_MAX_WORKER_THREADS>0 */
+
+/*
+** Flush the current contents of VdbeSorter.list to a new PMA, possibly
+** using a background thread.
+*/
+static int vdbeSorterFlushPMA(VdbeSorter *pSorter){
+#if SQLITE_MAX_WORKER_THREADS==0
+  pSorter->bUsePMA = 1;
+  return vdbeSorterListToPMA(&pSorter->aTask[0], &pSorter->list);
+#else
+  int rc = SQLITE_OK;
+  int i;
+  SortSubtask *pTask = 0;    /* Thread context used to create new PMA */
+  int nWorker = (pSorter->nTask-1);
+
+  /* Set the flag to indicate that at least one PMA has been written. 
+  ** Or will be, anyhow.  */
+  pSorter->bUsePMA = 1;
+
+  /* Select a sub-task to sort and flush the current list of in-memory
+  ** records to disk. If the sorter is running in multi-threaded mode,
+  ** round-robin between the first (pSorter->nTask-1) tasks. Except, if
+  ** the background thread from a sub-tasks previous turn is still running,
+  ** skip it. If the first (pSorter->nTask-1) sub-tasks are all still busy,
+  ** fall back to using the final sub-task. The first (pSorter->nTask-1)
+  ** sub-tasks are prefered as they use background threads - the final 
+  ** sub-task uses the main thread. */
+  for(i=0; i<nWorker; i++){
+    int iTest = (pSorter->iPrev + i + 1) % nWorker;
+    pTask = &pSorter->aTask[iTest];
+    if( pTask->bDone ){
+      rc = vdbeSorterJoinThread(pTask);
+    }
+    if( rc!=SQLITE_OK || pTask->pThread==0 ) break;
+  }
+
+  if( rc==SQLITE_OK ){
+    if( i==nWorker ){
+      /* Use the foreground thread for this operation */
+      rc = vdbeSorterListToPMA(&pSorter->aTask[nWorker], &pSorter->list);
+    }else{
+      /* Launch a background thread for this operation */
+      u8 *aMem = pTask->list.aMemory;
+      void *pCtx = (void*)pTask;
+
+      assert( pTask->pThread==0 && pTask->bDone==0 );
+      assert( pTask->list.pList==0 );
+      assert( pTask->list.aMemory==0 || pSorter->list.aMemory!=0 );
+
+      pSorter->iPrev = (u8)(pTask - pSorter->aTask);
+      pTask->list = pSorter->list;
+      pSorter->list.pList = 0;
+      pSorter->list.szPMA = 0;
+      if( aMem ){
+        pSorter->list.aMemory = aMem;
+        pSorter->nMemory = sqlite3MallocSize(aMem);
+      }else if( pSorter->list.aMemory ){
+        pSorter->list.aMemory = sqlite3Malloc(pSorter->nMemory);
+        if( !pSorter->list.aMemory ) return SQLITE_NOMEM;
+      }
+
+      rc = vdbeSorterCreateThread(pTask, vdbeSorterFlushThread, pCtx);
+    }
+  }
+
+  return rc;
+#endif /* SQLITE_MAX_WORKER_THREADS!=0 */
+}
+
+/*
+** Add a record to the sorter.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterWrite(
+  const VdbeCursor *pCsr,         /* Sorter cursor */
+  Mem *pVal                       /* Memory cell containing record */
+){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  int rc = SQLITE_OK;             /* Return Code */
+  SorterRecord *pNew;             /* New list element */
+
+  int bFlush;                     /* True to flush contents of memory to PMA */
+  int nReq;                       /* Bytes of memory required */
+  int nPMA;                       /* Bytes of PMA space required */
+
+  assert( pSorter );
+
+  /* Figure out whether or not the current contents of memory should be
+  ** flushed to a PMA before continuing. If so, do so.
+  **
+  ** If using the single large allocation mode (pSorter->aMemory!=0), then
+  ** flush the contents of memory to a new PMA if (a) at least one value is
+  ** already in memory and (b) the new value will not fit in memory.
+  ** 
+  ** Or, if using separate allocations for each record, flush the contents
+  ** of memory to a PMA if either of the following are true:
+  **
+  **   * The total memory allocated for the in-memory list is greater 
+  **     than (page-size * cache-size), or
+  **
+  **   * The total memory allocated for the in-memory list is greater 
+  **     than (page-size * 10) and sqlite3HeapNearlyFull() returns true.
+  */
+  nReq = pVal->n + sizeof(SorterRecord);
+  nPMA = pVal->n + sqlite3VarintLen(pVal->n);
+  if( pSorter->mxPmaSize ){
+    if( pSorter->list.aMemory ){
+      bFlush = pSorter->iMemory && (pSorter->iMemory+nReq) > pSorter->mxPmaSize;
+    }else{
+      bFlush = (
+          (pSorter->list.szPMA > pSorter->mxPmaSize)
+       || (pSorter->list.szPMA > pSorter->mnPmaSize && sqlite3HeapNearlyFull())
+      );
+    }
+    if( bFlush ){
+      rc = vdbeSorterFlushPMA(pSorter);
+      pSorter->list.szPMA = 0;
+      pSorter->iMemory = 0;
+      assert( rc!=SQLITE_OK || pSorter->list.pList==0 );
+    }
+  }
+
+  pSorter->list.szPMA += nPMA;
+  if( nPMA>pSorter->mxKeysize ){
+    pSorter->mxKeysize = nPMA;
+  }
+
+  if( pSorter->list.aMemory ){
+    int nMin = pSorter->iMemory + nReq;
+
+    if( nMin>pSorter->nMemory ){
+      u8 *aNew;
+      int nNew = pSorter->nMemory * 2;
+      while( nNew < nMin ) nNew = nNew*2;
+      if( nNew > pSorter->mxPmaSize ) nNew = pSorter->mxPmaSize;
+      if( nNew < nMin ) nNew = nMin;
+
+      aNew = sqlite3Realloc(pSorter->list.aMemory, nNew);
+      if( !aNew ) return SQLITE_NOMEM;
+      pSorter->list.pList = (SorterRecord*)(
+          aNew + ((u8*)pSorter->list.pList - pSorter->list.aMemory)
+      );
+      pSorter->list.aMemory = aNew;
+      pSorter->nMemory = nNew;
+    }
+
+    pNew = (SorterRecord*)&pSorter->list.aMemory[pSorter->iMemory];
+    pSorter->iMemory += ROUND8(nReq);
+    pNew->u.iNext = (int)((u8*)(pSorter->list.pList) - pSorter->list.aMemory);
+  }else{
+    pNew = (SorterRecord *)sqlite3Malloc(nReq);
+    if( pNew==0 ){
+      return SQLITE_NOMEM;
+    }
+    pNew->u.pNext = pSorter->list.pList;
+  }
+
+  memcpy(SRVAL(pNew), pVal->z, pVal->n);
+  pNew->nVal = pVal->n;
+  pSorter->list.pList = pNew;
+
+  return rc;
+}
+
+/*
+** Read keys from pIncr->pMerger and populate pIncr->aFile[1]. The format
+** of the data stored in aFile[1] is the same as that used by regular PMAs,
+** except that the number-of-bytes varint is omitted from the start.
+*/
+static int vdbeIncrPopulate(IncrMerger *pIncr){
+  int rc = SQLITE_OK;
+  int rc2;
+  i64 iStart = pIncr->iStartOff;
+  SorterFile *pOut = &pIncr->aFile[1];
+  SortSubtask *pTask = pIncr->pTask;
+  MergeEngine *pMerger = pIncr->pMerger;
+  PmaWriter writer;
+  assert( pIncr->bEof==0 );
+
+  vdbeSorterPopulateDebug(pTask, "enter");
+
+  vdbePmaWriterInit(pOut->pFd, &writer, pTask->pSorter->pgsz, iStart);
+  while( rc==SQLITE_OK ){
+    int dummy;
+    PmaReader *pReader = &pMerger->aReadr[ pMerger->aTree[1] ];
+    int nKey = pReader->nKey;
+    i64 iEof = writer.iWriteOff + writer.iBufEnd;
+
+    /* Check if the output file is full or if the input has been exhausted.
+    ** In either case exit the loop. */
+    if( pReader->pFd==0 ) break;
+    if( (iEof + nKey + sqlite3VarintLen(nKey))>(iStart + pIncr->mxSz) ) break;
+
+    /* Write the next key to the output. */
+    vdbePmaWriteVarint(&writer, nKey);
+    vdbePmaWriteBlob(&writer, pReader->aKey, nKey);
+    assert( pIncr->pMerger->pTask==pTask );
+    rc = vdbeMergeEngineStep(pIncr->pMerger, &dummy);
+  }
+
+  rc2 = vdbePmaWriterFinish(&writer, &pOut->iEof);
+  if( rc==SQLITE_OK ) rc = rc2;
+  vdbeSorterPopulateDebug(pTask, "exit");
+  return rc;
+}
+
+#if SQLITE_MAX_WORKER_THREADS>0
+/*
+** The main routine for background threads that populate aFile[1] of
+** multi-threaded IncrMerger objects.
+*/
+static void *vdbeIncrPopulateThread(void *pCtx){
+  IncrMerger *pIncr = (IncrMerger*)pCtx;
+  void *pRet = SQLITE_INT_TO_PTR( vdbeIncrPopulate(pIncr) );
+  pIncr->pTask->bDone = 1;
+  return pRet;
+}
+
+/*
+** Launch a background thread to populate aFile[1] of pIncr.
+*/
+static int vdbeIncrBgPopulate(IncrMerger *pIncr){
+  void *p = (void*)pIncr;
+  assert( pIncr->bUseThread );
+  return vdbeSorterCreateThread(pIncr->pTask, vdbeIncrPopulateThread, p);
+}
+#endif
+
+/*
+** This function is called when the PmaReader corresponding to pIncr has
+** finished reading the contents of aFile[0]. Its purpose is to "refill"
+** aFile[0] such that the PmaReader should start rereading it from the
+** beginning.
+**
+** For single-threaded objects, this is accomplished by literally reading 
+** keys from pIncr->pMerger and repopulating aFile[0]. 
+**
+** For multi-threaded objects, all that is required is to wait until the 
+** background thread is finished (if it is not already) and then swap 
+** aFile[0] and aFile[1] in place. If the contents of pMerger have not
+** been exhausted, this function also launches a new background thread
+** to populate the new aFile[1].
+**
+** SQLITE_OK is returned on success, or an SQLite error code otherwise.
+*/
+static int vdbeIncrSwap(IncrMerger *pIncr){
+  int rc = SQLITE_OK;
+
+#if SQLITE_MAX_WORKER_THREADS>0
+  if( pIncr->bUseThread ){
+    rc = vdbeSorterJoinThread(pIncr->pTask);
+
+    if( rc==SQLITE_OK ){
+      SorterFile f0 = pIncr->aFile[0];
+      pIncr->aFile[0] = pIncr->aFile[1];
+      pIncr->aFile[1] = f0;
+    }
+
+    if( rc==SQLITE_OK ){
+      if( pIncr->aFile[0].iEof==pIncr->iStartOff ){
+        pIncr->bEof = 1;
+      }else{
+        rc = vdbeIncrBgPopulate(pIncr);
+      }
+    }
+  }else
+#endif
+  {
+    rc = vdbeIncrPopulate(pIncr);
+    pIncr->aFile[0] = pIncr->aFile[1];
+    if( pIncr->aFile[0].iEof==pIncr->iStartOff ){
+      pIncr->bEof = 1;
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Allocate and return a new IncrMerger object to read data from pMerger.
+**
+** If an OOM condition is encountered, return NULL. In this case free the
+** pMerger argument before returning.
+*/
+static int vdbeIncrMergerNew(
+  SortSubtask *pTask,     /* The thread that will be using the new IncrMerger */
+  MergeEngine *pMerger,   /* The MergeEngine that the IncrMerger will control */
+  IncrMerger **ppOut      /* Write the new IncrMerger here */
+){
+  int rc = SQLITE_OK;
+  IncrMerger *pIncr = *ppOut = (IncrMerger*)
+       (sqlite3FaultSim(100) ? 0 : sqlite3MallocZero(sizeof(*pIncr)));
+  if( pIncr ){
+    pIncr->pMerger = pMerger;
+    pIncr->pTask = pTask;
+    pIncr->mxSz = MAX(pTask->pSorter->mxKeysize+9,pTask->pSorter->mxPmaSize/2);
+    pTask->file2.iEof += pIncr->mxSz;
+  }else{
+    vdbeMergeEngineFree(pMerger);
+    rc = SQLITE_NOMEM;
+  }
+  return rc;
+}
+
+#if SQLITE_MAX_WORKER_THREADS>0
+/*
+** Set the "use-threads" flag on object pIncr.
+*/
+static void vdbeIncrMergerSetThreads(IncrMerger *pIncr){
+  pIncr->bUseThread = 1;
+  pIncr->pTask->file2.iEof -= pIncr->mxSz;
+}
+#endif /* SQLITE_MAX_WORKER_THREADS>0 */
+
+
+
+/*
+** Recompute pMerger->aTree[iOut] by comparing the next keys on the
+** two PmaReaders that feed that entry.  Neither of the PmaReaders
+** are advanced.  This routine merely does the comparison.
+*/
+static void vdbeMergeEngineCompare(
+  MergeEngine *pMerger,  /* Merge engine containing PmaReaders to compare */
+  int iOut               /* Store the result in pMerger->aTree[iOut] */
+){
+  int i1;
+  int i2;
+  int iRes;
+  PmaReader *p1;
+  PmaReader *p2;
+
+  assert( iOut<pMerger->nTree && iOut>0 );
+
+  if( iOut>=(pMerger->nTree/2) ){
+    i1 = (iOut - pMerger->nTree/2) * 2;
+    i2 = i1 + 1;
+  }else{
+    i1 = pMerger->aTree[iOut*2];
+    i2 = pMerger->aTree[iOut*2+1];
+  }
+
+  p1 = &pMerger->aReadr[i1];
+  p2 = &pMerger->aReadr[i2];
+
+  if( p1->pFd==0 ){
+    iRes = i2;
+  }else if( p2->pFd==0 ){
+    iRes = i1;
+  }else{
+    int res;
+    assert( pMerger->pTask->pUnpacked!=0 );  /* from vdbeSortSubtaskMain() */
+    res = vdbeSorterCompare(
+        pMerger->pTask, p1->aKey, p1->nKey, p2->aKey, p2->nKey
+    );
+    if( res<=0 ){
+      iRes = i1;
+    }else{
+      iRes = i2;
+    }
+  }
+
+  pMerger->aTree[iOut] = iRes;
+}
+
+/*
+** Allowed values for the eMode parameter to vdbeMergeEngineInit()
+** and vdbePmaReaderIncrMergeInit().
+**
+** Only INCRINIT_NORMAL is valid in single-threaded builds (when
+** SQLITE_MAX_WORKER_THREADS==0).  The other values are only used
+** when there exists one or more separate worker threads.
+*/
+#define INCRINIT_NORMAL 0
+#define INCRINIT_TASK   1
+#define INCRINIT_ROOT   2
+
+/* Forward reference.
+** The vdbeIncrMergeInit() and vdbePmaReaderIncrMergeInit() routines call each
+** other (when building a merge tree).
+*/
+static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode);
+
+/*
+** Initialize the MergeEngine object passed as the second argument. Once this
+** function returns, the first key of merged data may be read from the 
+** MergeEngine object in the usual fashion.
+**
+** If argument eMode is INCRINIT_ROOT, then it is assumed that any IncrMerge
+** objects attached to the PmaReader objects that the merger reads from have
+** already been populated, but that they have not yet populated aFile[0] and
+** set the PmaReader objects up to read from it. In this case all that is
+** required is to call vdbePmaReaderNext() on each PmaReader to point it at
+** its first key.
+**
+** Otherwise, if eMode is any value other than INCRINIT_ROOT, then use 
+** vdbePmaReaderIncrMergeInit() to initialize each PmaReader that feeds data 
+** to pMerger.
+**
+** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
+*/
+static int vdbeMergeEngineInit(
+  SortSubtask *pTask,             /* Thread that will run pMerger */
+  MergeEngine *pMerger,           /* MergeEngine to initialize */
+  int eMode                       /* One of the INCRINIT_XXX constants */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  int i;                          /* For looping over PmaReader objects */
+  int nTree = pMerger->nTree;
+
+  /* eMode is always INCRINIT_NORMAL in single-threaded mode */
+  assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL );
+
+  /* Verify that the MergeEngine is assigned to a single thread */
+  assert( pMerger->pTask==0 );
+  pMerger->pTask = pTask;
+
+  for(i=0; i<nTree; i++){
+    if( SQLITE_MAX_WORKER_THREADS>0 && eMode==INCRINIT_ROOT ){
+      /* PmaReaders should be normally initialized in order, as if they are
+      ** reading from the same temp file this makes for more linear file IO.
+      ** However, in the INCRINIT_ROOT case, if PmaReader aReadr[nTask-1] is
+      ** in use it will block the vdbePmaReaderNext() call while it uses
+      ** the main thread to fill its buffer. So calling PmaReaderNext()
+      ** on this PmaReader before any of the multi-threaded PmaReaders takes
+      ** better advantage of multi-processor hardware. */
+      rc = vdbePmaReaderNext(&pMerger->aReadr[nTree-i-1]);
+    }else{
+      rc = vdbePmaReaderIncrMergeInit(&pMerger->aReadr[i], INCRINIT_NORMAL);
+    }
+    if( rc!=SQLITE_OK ) return rc;
+  }
+
+  for(i=pMerger->nTree-1; i>0; i--){
+    vdbeMergeEngineCompare(pMerger, i);
+  }
+  return pTask->pUnpacked->errCode;
+}
+
+/*
+** Initialize the IncrMerge field of a PmaReader.
+**
+** If the PmaReader passed as the first argument is not an incremental-reader
+** (if pReadr->pIncr==0), then this function is a no-op. Otherwise, it serves
+** to open and/or initialize the temp file related fields of the IncrMerge
+** object at (pReadr->pIncr).
+**
+** If argument eMode is set to INCRINIT_NORMAL, then all PmaReaders
+** in the sub-tree headed by pReadr are also initialized. Data is then loaded
+** into the buffers belonging to pReadr and it is set to
+** point to the first key in its range.
+**
+** If argument eMode is set to INCRINIT_TASK, then pReadr is guaranteed
+** to be a multi-threaded PmaReader and this function is being called in a
+** background thread. In this case all PmaReaders in the sub-tree are 
+** initialized as for INCRINIT_NORMAL and the aFile[1] buffer belonging to
+** pReadr is populated. However, pReadr itself is not set up to point
+** to its first key. A call to vdbePmaReaderNext() is still required to do
+** that. 
+**
+** The reason this function does not call vdbePmaReaderNext() immediately 
+** in the INCRINIT_TASK case is that vdbePmaReaderNext() assumes that it has
+** to block on thread (pTask->thread) before accessing aFile[1]. But, since
+** this entire function is being run by thread (pTask->thread), that will
+** lead to the current background thread attempting to join itself.
+**
+** Finally, if argument eMode is set to INCRINIT_ROOT, it may be assumed
+** that pReadr->pIncr is a multi-threaded IncrMerge objects, and that all
+** child-trees have already been initialized using IncrInit(INCRINIT_TASK).
+** In this case vdbePmaReaderNext() is called on all child PmaReaders and
+** the current PmaReader set to point to the first key in its range.
+**
+** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
+*/
+static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode){
+  int rc = SQLITE_OK;
+  IncrMerger *pIncr = pReadr->pIncr;
+
+  /* eMode is always INCRINIT_NORMAL in single-threaded mode */
+  assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL );
+
+  if( pIncr ){
+    SortSubtask *pTask = pIncr->pTask;
+    sqlite3 *db = pTask->pSorter->db;
+
+    rc = vdbeMergeEngineInit(pTask, pIncr->pMerger, eMode);
+
+    /* Set up the required files for pIncr. A multi-theaded IncrMerge object
+    ** requires two temp files to itself, whereas a single-threaded object
+    ** only requires a region of pTask->file2. */
+    if( rc==SQLITE_OK ){
+      int mxSz = pIncr->mxSz;
+#if SQLITE_MAX_WORKER_THREADS>0
+      if( pIncr->bUseThread ){
+        rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[0].pFd);
+        if( rc==SQLITE_OK ){
+          rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[1].pFd);
+        }
+      }else
+#endif
+      /*if( !pIncr->bUseThread )*/{
+        if( pTask->file2.pFd==0 ){
+          assert( pTask->file2.iEof>0 );
+          rc = vdbeSorterOpenTempFile(db, pTask->file2.iEof, &pTask->file2.pFd);
+          pTask->file2.iEof = 0;
+        }
+        if( rc==SQLITE_OK ){
+          pIncr->aFile[1].pFd = pTask->file2.pFd;
+          pIncr->iStartOff = pTask->file2.iEof;
+          pTask->file2.iEof += mxSz;
+        }
+      }
+    }
+
+#if SQLITE_MAX_WORKER_THREADS>0
+    if( rc==SQLITE_OK && pIncr->bUseThread ){
+      /* Use the current thread to populate aFile[1], even though this
+      ** PmaReader is multi-threaded. The reason being that this function
+      ** is already running in background thread pIncr->pTask->thread. */
+      assert( eMode==INCRINIT_ROOT || eMode==INCRINIT_TASK );
+      rc = vdbeIncrPopulate(pIncr);
+    }
+#endif
+
+    if( rc==SQLITE_OK
+     && (SQLITE_MAX_WORKER_THREADS==0 || eMode!=INCRINIT_TASK)
+    ){
+      rc = vdbePmaReaderNext(pReadr);
+    }
+  }
+  return rc;
+}
+
+#if SQLITE_MAX_WORKER_THREADS>0
+/*
+** The main routine for vdbePmaReaderIncrMergeInit() operations run in 
+** background threads.
+*/
+static void *vdbePmaReaderBgInit(void *pCtx){
+  PmaReader *pReader = (PmaReader*)pCtx;
+  void *pRet = SQLITE_INT_TO_PTR(
+                  vdbePmaReaderIncrMergeInit(pReader,INCRINIT_TASK)
+               );
+  pReader->pIncr->pTask->bDone = 1;
+  return pRet;
+}
+
+/*
+** Use a background thread to invoke vdbePmaReaderIncrMergeInit(INCRINIT_TASK) 
+** on the PmaReader object passed as the first argument.
+**
+** This call will initialize the various fields of the pReadr->pIncr 
+** structure and, if it is a multi-threaded IncrMerger, launch a 
+** background thread to populate aFile[1].
+*/
+static int vdbePmaReaderBgIncrInit(PmaReader *pReadr){
+  void *pCtx = (void*)pReadr;
+  return vdbeSorterCreateThread(pReadr->pIncr->pTask, vdbePmaReaderBgInit, pCtx);
+}
+#endif
+
+/*
+** Allocate a new MergeEngine object to merge the contents of nPMA level-0
+** PMAs from pTask->file. If no error occurs, set *ppOut to point to
+** the new object and return SQLITE_OK. Or, if an error does occur, set *ppOut
+** to NULL and return an SQLite error code.
+**
+** When this function is called, *piOffset is set to the offset of the
+** first PMA to read from pTask->file. Assuming no error occurs, it is 
+** set to the offset immediately following the last byte of the last
+** PMA before returning. If an error does occur, then the final value of
+** *piOffset is undefined.
+*/
+static int vdbeMergeEngineLevel0(
+  SortSubtask *pTask,             /* Sorter task to read from */
+  int nPMA,                       /* Number of PMAs to read */
+  i64 *piOffset,                  /* IN/OUT: Readr offset in pTask->file */
+  MergeEngine **ppOut             /* OUT: New merge-engine */
+){
+  MergeEngine *pNew;              /* Merge engine to return */
+  i64 iOff = *piOffset;
+  int i;
+  int rc = SQLITE_OK;
+
+  *ppOut = pNew = vdbeMergeEngineNew(nPMA);
+  if( pNew==0 ) rc = SQLITE_NOMEM;
+
+  for(i=0; i<nPMA && rc==SQLITE_OK; i++){
+    i64 nDummy;
+    PmaReader *pReadr = &pNew->aReadr[i];
+    rc = vdbePmaReaderInit(pTask, &pTask->file, iOff, pReadr, &nDummy);
+    iOff = pReadr->iEof;
+  }
+
+  if( rc!=SQLITE_OK ){
+    vdbeMergeEngineFree(pNew);
+    *ppOut = 0;
+  }
+  *piOffset = iOff;
+  return rc;
+}
+
+/*
+** Return the depth of a tree comprising nPMA PMAs, assuming a fanout of
+** SORTER_MAX_MERGE_COUNT. The returned value does not include leaf nodes.
+**
+** i.e.
+**
+**   nPMA<=16    -> TreeDepth() == 0
+**   nPMA<=256   -> TreeDepth() == 1
+**   nPMA<=65536 -> TreeDepth() == 2
+*/
+static int vdbeSorterTreeDepth(int nPMA){
+  int nDepth = 0;
+  i64 nDiv = SORTER_MAX_MERGE_COUNT;
+  while( nDiv < (i64)nPMA ){
+    nDiv = nDiv * SORTER_MAX_MERGE_COUNT;
+    nDepth++;
+  }
+  return nDepth;
+}
+
+/*
+** pRoot is the root of an incremental merge-tree with depth nDepth (according
+** to vdbeSorterTreeDepth()). pLeaf is the iSeq'th leaf to be added to the
+** tree, counting from zero. This function adds pLeaf to the tree.
+**
+** If successful, SQLITE_OK is returned. If an error occurs, an SQLite error
+** code is returned and pLeaf is freed.
+*/
+static int vdbeSorterAddToTree(
+  SortSubtask *pTask,             /* Task context */
+  int nDepth,                     /* Depth of tree according to TreeDepth() */
+  int iSeq,                       /* Sequence number of leaf within tree */
+  MergeEngine *pRoot,             /* Root of tree */
+  MergeEngine *pLeaf              /* Leaf to add to tree */
+){
+  int rc = SQLITE_OK;
+  int nDiv = 1;
+  int i;
+  MergeEngine *p = pRoot;
+  IncrMerger *pIncr;
+
+  rc = vdbeIncrMergerNew(pTask, pLeaf, &pIncr);
+
+  for(i=1; i<nDepth; i++){
+    nDiv = nDiv * SORTER_MAX_MERGE_COUNT;
+  }
+
+  for(i=1; i<nDepth && rc==SQLITE_OK; i++){
+    int iIter = (iSeq / nDiv) % SORTER_MAX_MERGE_COUNT;
+    PmaReader *pReadr = &p->aReadr[iIter];
+
+    if( pReadr->pIncr==0 ){
+      MergeEngine *pNew = vdbeMergeEngineNew(SORTER_MAX_MERGE_COUNT);
+      if( pNew==0 ){
+        rc = SQLITE_NOMEM;
+      }else{
+        rc = vdbeIncrMergerNew(pTask, pNew, &pReadr->pIncr);
+      }
+    }
+    if( rc==SQLITE_OK ){
+      p = pReadr->pIncr->pMerger;
+      nDiv = nDiv / SORTER_MAX_MERGE_COUNT;
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    p->aReadr[iSeq % SORTER_MAX_MERGE_COUNT].pIncr = pIncr;
+  }else{
+    vdbeIncrFree(pIncr);
+  }
+  return rc;
+}
+
+/*
+** This function is called as part of a SorterRewind() operation on a sorter
+** that has already written two or more level-0 PMAs to one or more temp
+** files. It builds a tree of MergeEngine/IncrMerger/PmaReader objects that 
+** can be used to incrementally merge all PMAs on disk.
+**
+** If successful, SQLITE_OK is returned and *ppOut set to point to the
+** MergeEngine object at the root of the tree before returning. Or, if an
+** error occurs, an SQLite error code is returned and the final value 
+** of *ppOut is undefined.
+*/
+static int vdbeSorterMergeTreeBuild(
+  VdbeSorter *pSorter,       /* The VDBE cursor that implements the sort */
+  MergeEngine **ppOut        /* Write the MergeEngine here */
+){
+  MergeEngine *pMain = 0;
+  int rc = SQLITE_OK;
+  int iTask;
+
+#if SQLITE_MAX_WORKER_THREADS>0
+  /* If the sorter uses more than one task, then create the top-level 
+  ** MergeEngine here. This MergeEngine will read data from exactly 
+  ** one PmaReader per sub-task.  */
+  assert( pSorter->bUseThreads || pSorter->nTask==1 );
+  if( pSorter->nTask>1 ){
+    pMain = vdbeMergeEngineNew(pSorter->nTask);
+    if( pMain==0 ) rc = SQLITE_NOMEM;
+  }
+#endif
+
+  for(iTask=0; rc==SQLITE_OK && iTask<pSorter->nTask; iTask++){
+    SortSubtask *pTask = &pSorter->aTask[iTask];
+    assert( pTask->nPMA>0 || SQLITE_MAX_WORKER_THREADS>0 );
+    if( SQLITE_MAX_WORKER_THREADS==0 || pTask->nPMA ){
+      MergeEngine *pRoot = 0;     /* Root node of tree for this task */
+      int nDepth = vdbeSorterTreeDepth(pTask->nPMA);
+      i64 iReadOff = 0;
+
+      if( pTask->nPMA<=SORTER_MAX_MERGE_COUNT ){
+        rc = vdbeMergeEngineLevel0(pTask, pTask->nPMA, &iReadOff, &pRoot);
+      }else{
+        int i;
+        int iSeq = 0;
+        pRoot = vdbeMergeEngineNew(SORTER_MAX_MERGE_COUNT);
+        if( pRoot==0 ) rc = SQLITE_NOMEM;
+        for(i=0; i<pTask->nPMA && rc==SQLITE_OK; i += SORTER_MAX_MERGE_COUNT){
+          MergeEngine *pMerger = 0; /* New level-0 PMA merger */
+          int nReader;              /* Number of level-0 PMAs to merge */
+
+          nReader = MIN(pTask->nPMA - i, SORTER_MAX_MERGE_COUNT);
+          rc = vdbeMergeEngineLevel0(pTask, nReader, &iReadOff, &pMerger);
+          if( rc==SQLITE_OK ){
+            rc = vdbeSorterAddToTree(pTask, nDepth, iSeq++, pRoot, pMerger);
+          }
+        }
+      }
+
+      if( rc==SQLITE_OK ){
+#if SQLITE_MAX_WORKER_THREADS>0
+        if( pMain!=0 ){
+          rc = vdbeIncrMergerNew(pTask, pRoot, &pMain->aReadr[iTask].pIncr);
+        }else
+#endif
+        {
+          assert( pMain==0 );
+          pMain = pRoot;
+        }
+      }else{
+        vdbeMergeEngineFree(pRoot);
+      }
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    vdbeMergeEngineFree(pMain);
+    pMain = 0;
+  }
+  *ppOut = pMain;
+  return rc;
+}
+
+/*
+** This function is called as part of an sqlite3VdbeSorterRewind() operation
+** on a sorter that has written two or more PMAs to temporary files. It sets
+** up either VdbeSorter.pMerger (for single threaded sorters) or pReader
+** (for multi-threaded sorters) so that it can be used to iterate through
+** all records stored in the sorter.
+**
+** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
+*/
+static int vdbeSorterSetupMerge(VdbeSorter *pSorter){
+  int rc;                         /* Return code */
+  SortSubtask *pTask0 = &pSorter->aTask[0];
+  MergeEngine *pMain = 0;
+#if SQLITE_MAX_WORKER_THREADS
+  sqlite3 *db = pTask0->pSorter->db;
+#endif
+
+  rc = vdbeSorterMergeTreeBuild(pSorter, &pMain);
+  if( rc==SQLITE_OK ){
+#if SQLITE_MAX_WORKER_THREADS
+    assert( pSorter->bUseThreads==0 || pSorter->nTask>1 );
+    if( pSorter->bUseThreads ){
+      int iTask;
+      PmaReader *pReadr = 0;
+      SortSubtask *pLast = &pSorter->aTask[pSorter->nTask-1];
+      rc = vdbeSortAllocUnpacked(pLast);
+      if( rc==SQLITE_OK ){
+        pReadr = (PmaReader*)sqlite3DbMallocZero(db, sizeof(PmaReader));
+        pSorter->pReader = pReadr;
+        if( pReadr==0 ) rc = SQLITE_NOMEM;
+      }
+      if( rc==SQLITE_OK ){
+        rc = vdbeIncrMergerNew(pLast, pMain, &pReadr->pIncr);
+        if( rc==SQLITE_OK ){
+          vdbeIncrMergerSetThreads(pReadr->pIncr);
+          for(iTask=0; iTask<(pSorter->nTask-1); iTask++){
+            IncrMerger *pIncr;
+            if( (pIncr = pMain->aReadr[iTask].pIncr) ){
+              vdbeIncrMergerSetThreads(pIncr);
+              assert( pIncr->pTask!=pLast );
+            }
+          }
+          for(iTask=0; rc==SQLITE_OK && iTask<pSorter->nTask; iTask++){
+            PmaReader *p = &pMain->aReadr[iTask];
+            assert( p->pIncr==0 || p->pIncr->pTask==&pSorter->aTask[iTask] );
+            if( p->pIncr ){ 
+              if( iTask==pSorter->nTask-1 ){
+                rc = vdbePmaReaderIncrMergeInit(p, INCRINIT_TASK);
+              }else{
+                rc = vdbePmaReaderBgIncrInit(p);
+              }
+            }
+          }
+        }
+        pMain = 0;
+      }
+      if( rc==SQLITE_OK ){
+        rc = vdbePmaReaderIncrMergeInit(pReadr, INCRINIT_ROOT);
+      }
+    }else
+#endif
+    {
+      rc = vdbeMergeEngineInit(pTask0, pMain, INCRINIT_NORMAL);
+      pSorter->pMerger = pMain;
+      pMain = 0;
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    vdbeMergeEngineFree(pMain);
+  }
+  return rc;
+}
+
+
+/*
+** Once the sorter has been populated by calls to sqlite3VdbeSorterWrite,
+** this function is called to prepare for iterating through the records
+** in sorted order.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterRewind(const VdbeCursor *pCsr, int *pbEof){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  int rc = SQLITE_OK;             /* Return code */
+
+  assert( pSorter );
+
+  /* If no data has been written to disk, then do not do so now. Instead,
+  ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
+  ** from the in-memory list.  */
+  if( pSorter->bUsePMA==0 ){
+    if( pSorter->list.pList ){
+      *pbEof = 0;
+      rc = vdbeSorterSort(&pSorter->aTask[0], &pSorter->list);
+    }else{
+      *pbEof = 1;
+    }
+    return rc;
+  }
+
+  /* Write the current in-memory list to a PMA. When the VdbeSorterWrite() 
+  ** function flushes the contents of memory to disk, it immediately always
+  ** creates a new list consisting of a single key immediately afterwards.
+  ** So the list is never empty at this point.  */
+  assert( pSorter->list.pList );
+  rc = vdbeSorterFlushPMA(pSorter);
+
+  /* Join all threads */
+  rc = vdbeSorterJoinAll(pSorter, rc);
+
+  vdbeSorterRewindDebug("rewind");
+
+  /* Assuming no errors have occurred, set up a merger structure to 
+  ** incrementally read and merge all remaining PMAs.  */
+  assert( pSorter->pReader==0 );
+  if( rc==SQLITE_OK ){
+    rc = vdbeSorterSetupMerge(pSorter);
+    *pbEof = 0;
+  }
+
+  vdbeSorterRewindDebug("rewinddone");
+  return rc;
+}
+
+/*
+** Advance to the next element in the sorter.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterNext(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  int rc;                         /* Return code */
+
+  assert( pSorter->bUsePMA || (pSorter->pReader==0 && pSorter->pMerger==0) );
+  if( pSorter->bUsePMA ){
+    assert( pSorter->pReader==0 || pSorter->pMerger==0 );
+    assert( pSorter->bUseThreads==0 || pSorter->pReader );
+    assert( pSorter->bUseThreads==1 || pSorter->pMerger );
+#if SQLITE_MAX_WORKER_THREADS>0
+    if( pSorter->bUseThreads ){
+      rc = vdbePmaReaderNext(pSorter->pReader);
+      *pbEof = (pSorter->pReader->pFd==0);
+    }else
+#endif
+    /*if( !pSorter->bUseThreads )*/ {
+      assert( pSorter->pMerger!=0 );
+      assert( pSorter->pMerger->pTask==(&pSorter->aTask[0]) );
+      rc = vdbeMergeEngineStep(pSorter->pMerger, pbEof);
+    }
+  }else{
+    SorterRecord *pFree = pSorter->list.pList;
+    pSorter->list.pList = pFree->u.pNext;
+    pFree->u.pNext = 0;
+    if( pSorter->list.aMemory==0 ) vdbeSorterRecordFree(db, pFree);
+    *pbEof = !pSorter->list.pList;
+    rc = SQLITE_OK;
+  }
+  return rc;
+}
+
+/*
+** Return a pointer to a buffer owned by the sorter that contains the 
+** current key.
+*/
+static void *vdbeSorterRowkey(
+  const VdbeSorter *pSorter,      /* Sorter object */
+  int *pnKey                      /* OUT: Size of current key in bytes */
+){
+  void *pKey;
+  if( pSorter->bUsePMA ){
+    PmaReader *pReader;
+#if SQLITE_MAX_WORKER_THREADS>0
+    if( pSorter->bUseThreads ){
+      pReader = pSorter->pReader;
+    }else
+#endif
+    /*if( !pSorter->bUseThreads )*/{
+      pReader = &pSorter->pMerger->aReadr[pSorter->pMerger->aTree[1]];
+    }
+    *pnKey = pReader->nKey;
+    pKey = pReader->aKey;
+  }else{
+    *pnKey = pSorter->list.pList->nVal;
+    pKey = SRVAL(pSorter->list.pList);
+  }
+  return pKey;
+}
+
+/*
+** Copy the current sorter key into the memory cell pOut.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterRowkey(const VdbeCursor *pCsr, Mem *pOut){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  void *pKey; int nKey;           /* Sorter key to copy into pOut */
+
+  pKey = vdbeSorterRowkey(pSorter, &nKey);
+  if( sqlite3VdbeMemClearAndResize(pOut, nKey) ){
+    return SQLITE_NOMEM;
+  }
+  pOut->n = nKey;
+  MemSetTypeFlag(pOut, MEM_Blob);
+  memcpy(pOut->z, pKey, nKey);
+
+  return SQLITE_OK;
+}
+
+/*
+** Compare the key in memory cell pVal with the key that the sorter cursor
+** passed as the first argument currently points to. For the purposes of
+** the comparison, ignore the rowid field at the end of each record.
+**
+** If the sorter cursor key contains any NULL values, consider it to be
+** less than pVal. Even if pVal also contains NULL values.
+**
+** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
+** Otherwise, set *pRes to a negative, zero or positive value if the
+** key in pVal is smaller than, equal to or larger than the current sorter
+** key.
+**
+** This routine forms the core of the OP_SorterCompare opcode, which in
+** turn is used to verify uniqueness when constructing a UNIQUE INDEX.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterCompare(
+  const VdbeCursor *pCsr,         /* Sorter cursor */
+  Mem *pVal,                      /* Value to compare to current sorter key */
+  int nKeyCol,                    /* Compare this many columns */
+  int *pRes                       /* OUT: Result of comparison */
+){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  UnpackedRecord *r2 = pSorter->pUnpacked;
+  KeyInfo *pKeyInfo = pCsr->pKeyInfo;
+  int i;
+  void *pKey; int nKey;           /* Sorter key to compare pVal with */
+
+  if( r2==0 ){
+    char *p;
+    r2 = pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pKeyInfo,0,0,&p);
+    assert( pSorter->pUnpacked==(UnpackedRecord*)p );
+    if( r2==0 ) return SQLITE_NOMEM;
+    r2->nField = nKeyCol;
+  }
+  assert( r2->nField==nKeyCol );
+
+  pKey = vdbeSorterRowkey(pSorter, &nKey);
+  sqlite3VdbeRecordUnpack(pKeyInfo, nKey, pKey, r2);
+  for(i=0; i<nKeyCol; i++){
+    if( r2->aMem[i].flags & MEM_Null ){
+      *pRes = -1;
+      return SQLITE_OK;
+    }
+  }
+
+  *pRes = sqlite3VdbeRecordCompare(pVal->n, pVal->z, r2);
+  return SQLITE_OK;
+}
+
+/************** End of vdbesort.c ********************************************/
+/************** Begin file journal.c *****************************************/
+/*
+** 2007 August 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file implements a special kind of sqlite3_file object used
+** by SQLite to create journal files if the atomic-write optimization
+** is enabled.
+**
+** The distinctive characteristic of this sqlite3_file is that the
+** actual on disk file is created lazily. When the file is created,
+** the caller specifies a buffer size for an in-memory buffer to
+** be used to service read() and write() requests. The actual file
+** on disk is not created or populated until either:
+**
+**   1) The in-memory representation grows too large for the allocated 
+**      buffer, or
+**   2) The sqlite3JournalCreate() function is called.
+*/
+#ifdef SQLITE_ENABLE_ATOMIC_WRITE
+
+
+/*
+** A JournalFile object is a subclass of sqlite3_file used by
+** as an open file handle for journal files.
+*/
+struct JournalFile {
+  sqlite3_io_methods *pMethod;    /* I/O methods on journal files */
+  int nBuf;                       /* Size of zBuf[] in bytes */
+  char *zBuf;                     /* Space to buffer journal writes */
+  int iSize;                      /* Amount of zBuf[] currently used */
+  int flags;                      /* xOpen flags */
+  sqlite3_vfs *pVfs;              /* The "real" underlying VFS */
+  sqlite3_file *pReal;            /* The "real" underlying file descriptor */
+  const char *zJournal;           /* Name of the journal file */
+};
+typedef struct JournalFile JournalFile;
+
+/*
+** If it does not already exists, create and populate the on-disk file 
+** for JournalFile p.
+*/
+static int createFile(JournalFile *p){
+  int rc = SQLITE_OK;
+  if( !p->pReal ){
+    sqlite3_file *pReal = (sqlite3_file *)&p[1];
+    rc = sqlite3OsOpen(p->pVfs, p->zJournal, pReal, p->flags, 0);
+    if( rc==SQLITE_OK ){
+      p->pReal = pReal;
+      if( p->iSize>0 ){
+        assert(p->iSize<=p->nBuf);
+        rc = sqlite3OsWrite(p->pReal, p->zBuf, p->iSize, 0);
+      }
+      if( rc!=SQLITE_OK ){
+        /* If an error occurred while writing to the file, close it before
+        ** returning. This way, SQLite uses the in-memory journal data to 
+        ** roll back changes made to the internal page-cache before this
+        ** function was called.  */
+        sqlite3OsClose(pReal);
+        p->pReal = 0;
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** Close the file.
+*/
+static int jrnlClose(sqlite3_file *pJfd){
+  JournalFile *p = (JournalFile *)pJfd;
+  if( p->pReal ){
+    sqlite3OsClose(p->pReal);
+  }
+  sqlite3_free(p->zBuf);
+  return SQLITE_OK;
+}
+
+/*
+** Read data from the file.
+*/
+static int jrnlRead(
+  sqlite3_file *pJfd,    /* The journal file from which to read */
+  void *zBuf,            /* Put the results here */
+  int iAmt,              /* Number of bytes to read */
+  sqlite_int64 iOfst     /* Begin reading at this offset */
+){
+  int rc = SQLITE_OK;
+  JournalFile *p = (JournalFile *)pJfd;
+  if( p->pReal ){
+    rc = sqlite3OsRead(p->pReal, zBuf, iAmt, iOfst);
+  }else if( (iAmt+iOfst)>p->iSize ){
+    rc = SQLITE_IOERR_SHORT_READ;
+  }else{
+    memcpy(zBuf, &p->zBuf[iOfst], iAmt);
+  }
+  return rc;
+}
+
+/*
+** Write data to the file.
+*/
+static int jrnlWrite(
+  sqlite3_file *pJfd,    /* The journal file into which to write */
+  const void *zBuf,      /* Take data to be written from here */
+  int iAmt,              /* Number of bytes to write */
+  sqlite_int64 iOfst     /* Begin writing at this offset into the file */
+){
+  int rc = SQLITE_OK;
+  JournalFile *p = (JournalFile *)pJfd;
+  if( !p->pReal && (iOfst+iAmt)>p->nBuf ){
+    rc = createFile(p);
+  }
+  if( rc==SQLITE_OK ){
+    if( p->pReal ){
+      rc = sqlite3OsWrite(p->pReal, zBuf, iAmt, iOfst);
+    }else{
+      memcpy(&p->zBuf[iOfst], zBuf, iAmt);
+      if( p->iSize<(iOfst+iAmt) ){
+        p->iSize = (iOfst+iAmt);
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** Truncate the file.
+*/
+static int jrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
+  int rc = SQLITE_OK;
+  JournalFile *p = (JournalFile *)pJfd;
+  if( p->pReal ){
+    rc = sqlite3OsTruncate(p->pReal, size);
+  }else if( size<p->iSize ){
+    p->iSize = size;
+  }
+  return rc;
+}
+
+/*
+** Sync the file.
+*/
+static int jrnlSync(sqlite3_file *pJfd, int flags){
+  int rc;
+  JournalFile *p = (JournalFile *)pJfd;
+  if( p->pReal ){
+    rc = sqlite3OsSync(p->pReal, flags);
+  }else{
+    rc = SQLITE_OK;
+  }
+  return rc;
+}
+
+/*
+** Query the size of the file in bytes.
+*/
+static int jrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
+  int rc = SQLITE_OK;
+  JournalFile *p = (JournalFile *)pJfd;
+  if( p->pReal ){
+    rc = sqlite3OsFileSize(p->pReal, pSize);
+  }else{
+    *pSize = (sqlite_int64) p->iSize;
+  }
+  return rc;
+}
+
+/*
+** Table of methods for JournalFile sqlite3_file object.
+*/
+static struct sqlite3_io_methods JournalFileMethods = {
+  1,             /* iVersion */
+  jrnlClose,     /* xClose */
+  jrnlRead,      /* xRead */
+  jrnlWrite,     /* xWrite */
+  jrnlTruncate,  /* xTruncate */
+  jrnlSync,      /* xSync */
+  jrnlFileSize,  /* xFileSize */
+  0,             /* xLock */
+  0,             /* xUnlock */
+  0,             /* xCheckReservedLock */
+  0,             /* xFileControl */
+  0,             /* xSectorSize */
+  0,             /* xDeviceCharacteristics */
+  0,             /* xShmMap */
+  0,             /* xShmLock */
+  0,             /* xShmBarrier */
+  0              /* xShmUnmap */
+};
+
+/* 
+** Open a journal file.
+*/
+SQLITE_PRIVATE int sqlite3JournalOpen(
+  sqlite3_vfs *pVfs,         /* The VFS to use for actual file I/O */
+  const char *zName,         /* Name of the journal file */
+  sqlite3_file *pJfd,        /* Preallocated, blank file handle */
+  int flags,                 /* Opening flags */
+  int nBuf                   /* Bytes buffered before opening the file */
+){
+  JournalFile *p = (JournalFile *)pJfd;
+  memset(p, 0, sqlite3JournalSize(pVfs));
+  if( nBuf>0 ){
+    p->zBuf = sqlite3MallocZero(nBuf);
+    if( !p->zBuf ){
+      return SQLITE_NOMEM;
+    }
+  }else{
+    return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
+  }
+  p->pMethod = &JournalFileMethods;
+  p->nBuf = nBuf;
+  p->flags = flags;
+  p->zJournal = zName;
+  p->pVfs = pVfs;
+  return SQLITE_OK;
+}
+
+/*
+** If the argument p points to a JournalFile structure, and the underlying
+** file has not yet been created, create it now.
+*/
+SQLITE_PRIVATE int sqlite3JournalCreate(sqlite3_file *p){
+  if( p->pMethods!=&JournalFileMethods ){
+    return SQLITE_OK;
+  }
+  return createFile((JournalFile *)p);
+}
+
+/*
+** The file-handle passed as the only argument is guaranteed to be an open
+** file. It may or may not be of class JournalFile. If the file is a
+** JournalFile, and the underlying file on disk has not yet been opened,
+** return 0. Otherwise, return 1.
+*/
+SQLITE_PRIVATE int sqlite3JournalExists(sqlite3_file *p){
+  return (p->pMethods!=&JournalFileMethods || ((JournalFile *)p)->pReal!=0);
+}
+
+/* 
+** Return the number of bytes required to store a JournalFile that uses vfs
+** pVfs to create the underlying on-disk files.
+*/
+SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *pVfs){
+  return (pVfs->szOsFile+sizeof(JournalFile));
+}
+#endif
+
+/************** End of journal.c *********************************************/
+/************** Begin file memjournal.c **************************************/
+/*
+** 2008 October 7
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains code use to implement an in-memory rollback journal.
+** The in-memory rollback journal is used to journal transactions for
+** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
+*/
+
+/* Forward references to internal structures */
+typedef struct MemJournal MemJournal;
+typedef struct FilePoint FilePoint;
+typedef struct FileChunk FileChunk;
+
+/* Space to hold the rollback journal is allocated in increments of
+** this many bytes.
+**
+** The size chosen is a little less than a power of two.  That way,
+** the FileChunk object will have a size that almost exactly fills
+** a power-of-two allocation.  This minimizes wasted space in power-of-two
+** memory allocators.
+*/
+#define JOURNAL_CHUNKSIZE ((int)(1024-sizeof(FileChunk*)))
+
+/*
+** The rollback journal is composed of a linked list of these structures.
+*/
+struct FileChunk {
+  FileChunk *pNext;               /* Next chunk in the journal */
+  u8 zChunk[JOURNAL_CHUNKSIZE];   /* Content of this chunk */
+};
+
+/*
+** An instance of this object serves as a cursor into the rollback journal.
+** The cursor can be either for reading or writing.
+*/
+struct FilePoint {
+  sqlite3_int64 iOffset;          /* Offset from the beginning of the file */
+  FileChunk *pChunk;              /* Specific chunk into which cursor points */
+};
+
+/*
+** This subclass is a subclass of sqlite3_file.  Each open memory-journal
+** is an instance of this class.
+*/
+struct MemJournal {
+  sqlite3_io_methods *pMethod;    /* Parent class. MUST BE FIRST */
+  FileChunk *pFirst;              /* Head of in-memory chunk-list */
+  FilePoint endpoint;             /* Pointer to the end of the file */
+  FilePoint readpoint;            /* Pointer to the end of the last xRead() */
+};
+
+/*
+** Read data from the in-memory journal file.  This is the implementation
+** of the sqlite3_vfs.xRead method.
+*/
+static int memjrnlRead(
+  sqlite3_file *pJfd,    /* The journal file from which to read */
+  void *zBuf,            /* Put the results here */
+  int iAmt,              /* Number of bytes to read */
+  sqlite_int64 iOfst     /* Begin reading at this offset */
+){
+  MemJournal *p = (MemJournal *)pJfd;
+  u8 *zOut = zBuf;
+  int nRead = iAmt;
+  int iChunkOffset;
+  FileChunk *pChunk;
+
+  /* SQLite never tries to read past the end of a rollback journal file */
+  assert( iOfst+iAmt<=p->endpoint.iOffset );
+
+  if( p->readpoint.iOffset!=iOfst || iOfst==0 ){
+    sqlite3_int64 iOff = 0;
+    for(pChunk=p->pFirst; 
+        ALWAYS(pChunk) && (iOff+JOURNAL_CHUNKSIZE)<=iOfst;
+        pChunk=pChunk->pNext
+    ){
+      iOff += JOURNAL_CHUNKSIZE;
+    }
+  }else{
+    pChunk = p->readpoint.pChunk;
+  }
+
+  iChunkOffset = (int)(iOfst%JOURNAL_CHUNKSIZE);
+  do {
+    int iSpace = JOURNAL_CHUNKSIZE - iChunkOffset;
+    int nCopy = MIN(nRead, (JOURNAL_CHUNKSIZE - iChunkOffset));
+    memcpy(zOut, &pChunk->zChunk[iChunkOffset], nCopy);
+    zOut += nCopy;
+    nRead -= iSpace;
+    iChunkOffset = 0;
+  } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 );
+  p->readpoint.iOffset = iOfst+iAmt;
+  p->readpoint.pChunk = pChunk;
+
+  return SQLITE_OK;
+}
+
+/*
+** Write data to the file.
+*/
+static int memjrnlWrite(
+  sqlite3_file *pJfd,    /* The journal file into which to write */
+  const void *zBuf,      /* Take data to be written from here */
+  int iAmt,              /* Number of bytes to write */
+  sqlite_int64 iOfst     /* Begin writing at this offset into the file */
+){
+  MemJournal *p = (MemJournal *)pJfd;
+  int nWrite = iAmt;
+  u8 *zWrite = (u8 *)zBuf;
+
+  /* An in-memory journal file should only ever be appended to. Random
+  ** access writes are not required by sqlite.
+  */
+  assert( iOfst==p->endpoint.iOffset );
+  UNUSED_PARAMETER(iOfst);
+
+  while( nWrite>0 ){
+    FileChunk *pChunk = p->endpoint.pChunk;
+    int iChunkOffset = (int)(p->endpoint.iOffset%JOURNAL_CHUNKSIZE);
+    int iSpace = MIN(nWrite, JOURNAL_CHUNKSIZE - iChunkOffset);
+
+    if( iChunkOffset==0 ){
+      /* New chunk is required to extend the file. */
+      FileChunk *pNew = sqlite3_malloc(sizeof(FileChunk));
+      if( !pNew ){
+        return SQLITE_IOERR_NOMEM;
+      }
+      pNew->pNext = 0;
+      if( pChunk ){
+        assert( p->pFirst );
+        pChunk->pNext = pNew;
+      }else{
+        assert( !p->pFirst );
+        p->pFirst = pNew;
+      }
+      p->endpoint.pChunk = pNew;
+    }
+
+    memcpy(&p->endpoint.pChunk->zChunk[iChunkOffset], zWrite, iSpace);
+    zWrite += iSpace;
+    nWrite -= iSpace;
+    p->endpoint.iOffset += iSpace;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Truncate the file.
+*/
+static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
+  MemJournal *p = (MemJournal *)pJfd;
+  FileChunk *pChunk;
+  assert(size==0);
+  UNUSED_PARAMETER(size);
+  pChunk = p->pFirst;
+  while( pChunk ){
+    FileChunk *pTmp = pChunk;
+    pChunk = pChunk->pNext;
+    sqlite3_free(pTmp);
+  }
+  sqlite3MemJournalOpen(pJfd);
+  return SQLITE_OK;
+}
+
+/*
+** Close the file.
+*/
+static int memjrnlClose(sqlite3_file *pJfd){
+  memjrnlTruncate(pJfd, 0);
+  return SQLITE_OK;
+}
+
+
+/*
+** Sync the file.
+**
+** Syncing an in-memory journal is a no-op.  And, in fact, this routine
+** is never called in a working implementation.  This implementation
+** exists purely as a contingency, in case some malfunction in some other
+** part of SQLite causes Sync to be called by mistake.
+*/
+static int memjrnlSync(sqlite3_file *NotUsed, int NotUsed2){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  return SQLITE_OK;
+}
+
+/*
+** Query the size of the file in bytes.
+*/
+static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
+  MemJournal *p = (MemJournal *)pJfd;
+  *pSize = (sqlite_int64) p->endpoint.iOffset;
+  return SQLITE_OK;
+}
+
+/*
+** Table of methods for MemJournal sqlite3_file object.
+*/
+static const struct sqlite3_io_methods MemJournalMethods = {
+  1,                /* iVersion */
+  memjrnlClose,     /* xClose */
+  memjrnlRead,      /* xRead */
+  memjrnlWrite,     /* xWrite */
+  memjrnlTruncate,  /* xTruncate */
+  memjrnlSync,      /* xSync */
+  memjrnlFileSize,  /* xFileSize */
+  0,                /* xLock */
+  0,                /* xUnlock */
+  0,                /* xCheckReservedLock */
+  0,                /* xFileControl */
+  0,                /* xSectorSize */
+  0,                /* xDeviceCharacteristics */
+  0,                /* xShmMap */
+  0,                /* xShmLock */
+  0,                /* xShmBarrier */
+  0,                /* xShmUnmap */
+  0,                /* xFetch */
+  0                 /* xUnfetch */
+};
+
+/* 
+** Open a journal file.
+*/
+SQLITE_PRIVATE void sqlite3MemJournalOpen(sqlite3_file *pJfd){
+  MemJournal *p = (MemJournal *)pJfd;
+  assert( EIGHT_BYTE_ALIGNMENT(p) );
+  memset(p, 0, sqlite3MemJournalSize());
+  p->pMethod = (sqlite3_io_methods*)&MemJournalMethods;
+}
+
+/*
+** Return true if the file-handle passed as an argument is 
+** an in-memory journal 
+*/
+SQLITE_PRIVATE int sqlite3IsMemJournal(sqlite3_file *pJfd){
+  return pJfd->pMethods==&MemJournalMethods;
+}
+
+/* 
+** Return the number of bytes required to store a MemJournal file descriptor.
+*/
+SQLITE_PRIVATE int sqlite3MemJournalSize(void){
+  return sizeof(MemJournal);
+}
+
+/************** End of memjournal.c ******************************************/
+/************** Begin file walker.c ******************************************/
+/*
+** 2008 August 16
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains routines used for walking the parser tree for
+** an SQL statement.
+*/
+/* #include <stdlib.h> */
+/* #include <string.h> */
+
+
+/*
+** Walk an expression tree.  Invoke the callback once for each node
+** of the expression, while descending.  (In other words, the callback
+** is invoked before visiting children.)
+**
+** The return value from the callback should be one of the WRC_*
+** constants to specify how to proceed with the walk.
+**
+**    WRC_Continue      Continue descending down the tree.
+**
+**    WRC_Prune         Do not descend into child nodes.  But allow
+**                      the walk to continue with sibling nodes.
+**
+**    WRC_Abort         Do no more callbacks.  Unwind the stack and
+**                      return the top-level walk call.
+**
+** The return value from this routine is WRC_Abort to abandon the tree walk
+** and WRC_Continue to continue.
+*/
+SQLITE_PRIVATE int sqlite3WalkExpr(Walker *pWalker, Expr *pExpr){
+  int rc;
+  if( pExpr==0 ) return WRC_Continue;
+  testcase( ExprHasProperty(pExpr, EP_TokenOnly) );
+  testcase( ExprHasProperty(pExpr, EP_Reduced) );
+  rc = pWalker->xExprCallback(pWalker, pExpr);
+  if( rc==WRC_Continue
+              && !ExprHasProperty(pExpr,EP_TokenOnly) ){
+    if( sqlite3WalkExpr(pWalker, pExpr->pLeft) ) return WRC_Abort;
+    if( sqlite3WalkExpr(pWalker, pExpr->pRight) ) return WRC_Abort;
+    if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+      if( sqlite3WalkSelect(pWalker, pExpr->x.pSelect) ) return WRC_Abort;
+    }else{
+      if( sqlite3WalkExprList(pWalker, pExpr->x.pList) ) return WRC_Abort;
+    }
+  }
+  return rc & WRC_Abort;
+}
+
+/*
+** Call sqlite3WalkExpr() for every expression in list p or until
+** an abort request is seen.
+*/
+SQLITE_PRIVATE int sqlite3WalkExprList(Walker *pWalker, ExprList *p){
+  int i;
+  struct ExprList_item *pItem;
+  if( p ){
+    for(i=p->nExpr, pItem=p->a; i>0; i--, pItem++){
+      if( sqlite3WalkExpr(pWalker, pItem->pExpr) ) return WRC_Abort;
+    }
+  }
+  return WRC_Continue;
+}
+
+/*
+** Walk all expressions associated with SELECT statement p.  Do
+** not invoke the SELECT callback on p, but do (of course) invoke
+** any expr callbacks and SELECT callbacks that come from subqueries.
+** Return WRC_Abort or WRC_Continue.
+*/
+SQLITE_PRIVATE int sqlite3WalkSelectExpr(Walker *pWalker, Select *p){
+  if( sqlite3WalkExprList(pWalker, p->pEList) ) return WRC_Abort;
+  if( sqlite3WalkExpr(pWalker, p->pWhere) ) return WRC_Abort;
+  if( sqlite3WalkExprList(pWalker, p->pGroupBy) ) return WRC_Abort;
+  if( sqlite3WalkExpr(pWalker, p->pHaving) ) return WRC_Abort;
+  if( sqlite3WalkExprList(pWalker, p->pOrderBy) ) return WRC_Abort;
+  if( sqlite3WalkExpr(pWalker, p->pLimit) ) return WRC_Abort;
+  if( sqlite3WalkExpr(pWalker, p->pOffset) ) return WRC_Abort;
+  return WRC_Continue;
+}
+
+/*
+** Walk the parse trees associated with all subqueries in the
+** FROM clause of SELECT statement p.  Do not invoke the select
+** callback on p, but do invoke it on each FROM clause subquery
+** and on any subqueries further down in the tree.  Return 
+** WRC_Abort or WRC_Continue;
+*/
+SQLITE_PRIVATE int sqlite3WalkSelectFrom(Walker *pWalker, Select *p){
+  SrcList *pSrc;
+  int i;
+  struct SrcList_item *pItem;
+
+  pSrc = p->pSrc;
+  if( ALWAYS(pSrc) ){
+    for(i=pSrc->nSrc, pItem=pSrc->a; i>0; i--, pItem++){
+      if( sqlite3WalkSelect(pWalker, pItem->pSelect) ){
+        return WRC_Abort;
+      }
+    }
+  }
+  return WRC_Continue;
+} 
+
+/*
+** Call sqlite3WalkExpr() for every expression in Select statement p.
+** Invoke sqlite3WalkSelect() for subqueries in the FROM clause and
+** on the compound select chain, p->pPrior. 
+**
+** If it is not NULL, the xSelectCallback() callback is invoked before
+** the walk of the expressions and FROM clause. The xSelectCallback2()
+** method, if it is not NULL, is invoked following the walk of the 
+** expressions and FROM clause.
+**
+** Return WRC_Continue under normal conditions.  Return WRC_Abort if
+** there is an abort request.
+**
+** If the Walker does not have an xSelectCallback() then this routine
+** is a no-op returning WRC_Continue.
+*/
+SQLITE_PRIVATE int sqlite3WalkSelect(Walker *pWalker, Select *p){
+  int rc;
+  if( p==0 || (pWalker->xSelectCallback==0 && pWalker->xSelectCallback2==0) ){
+    return WRC_Continue;
+  }
+  rc = WRC_Continue;
+  pWalker->walkerDepth++;
+  while( p ){
+    if( pWalker->xSelectCallback ){
+       rc = pWalker->xSelectCallback(pWalker, p);
+       if( rc ) break;
+    }
+    if( sqlite3WalkSelectExpr(pWalker, p)
+     || sqlite3WalkSelectFrom(pWalker, p)
+    ){
+      pWalker->walkerDepth--;
+      return WRC_Abort;
+    }
+    if( pWalker->xSelectCallback2 ){
+      pWalker->xSelectCallback2(pWalker, p);
+    }
+    p = p->pPrior;
+  }
+  pWalker->walkerDepth--;
+  return rc & WRC_Abort;
+}
+
+/************** End of walker.c **********************************************/
+/************** Begin file resolve.c *****************************************/
+/*
+** 2008 August 18
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains routines used for walking the parser tree and
+** resolve all identifiers by associating them with a particular
+** table and column.
+*/
+/* #include <stdlib.h> */
+/* #include <string.h> */
+
+/*
+** Walk the expression tree pExpr and increase the aggregate function
+** depth (the Expr.op2 field) by N on every TK_AGG_FUNCTION node.
+** This needs to occur when copying a TK_AGG_FUNCTION node from an
+** outer query into an inner subquery.
+**
+** incrAggFunctionDepth(pExpr,n) is the main routine.  incrAggDepth(..)
+** is a helper function - a callback for the tree walker.
+*/
+static int incrAggDepth(Walker *pWalker, Expr *pExpr){
+  if( pExpr->op==TK_AGG_FUNCTION ) pExpr->op2 += pWalker->u.n;
+  return WRC_Continue;
+}
+static void incrAggFunctionDepth(Expr *pExpr, int N){
+  if( N>0 ){
+    Walker w;
+    memset(&w, 0, sizeof(w));
+    w.xExprCallback = incrAggDepth;
+    w.u.n = N;
+    sqlite3WalkExpr(&w, pExpr);
+  }
+}
+
+/*
+** Turn the pExpr expression into an alias for the iCol-th column of the
+** result set in pEList.
+**
+** If the result set column is a simple column reference, then this routine
+** makes an exact copy.  But for any other kind of expression, this
+** routine make a copy of the result set column as the argument to the
+** TK_AS operator.  The TK_AS operator causes the expression to be
+** evaluated just once and then reused for each alias.
+**
+** The reason for suppressing the TK_AS term when the expression is a simple
+** column reference is so that the column reference will be recognized as
+** usable by indices within the WHERE clause processing logic. 
+**
+** The TK_AS operator is inhibited if zType[0]=='G'.  This means
+** that in a GROUP BY clause, the expression is evaluated twice.  Hence:
+**
+**     SELECT random()%5 AS x, count(*) FROM tab GROUP BY x
+**
+** Is equivalent to:
+**
+**     SELECT random()%5 AS x, count(*) FROM tab GROUP BY random()%5
+**
+** The result of random()%5 in the GROUP BY clause is probably different
+** from the result in the result-set.  On the other hand Standard SQL does
+** not allow the GROUP BY clause to contain references to result-set columns.
+** So this should never come up in well-formed queries.
+**
+** If the reference is followed by a COLLATE operator, then make sure
+** the COLLATE operator is preserved.  For example:
+**
+**     SELECT a+b, c+d FROM t1 ORDER BY 1 COLLATE nocase;
+**
+** Should be transformed into:
+**
+**     SELECT a+b, c+d FROM t1 ORDER BY (a+b) COLLATE nocase;
+**
+** The nSubquery parameter specifies how many levels of subquery the
+** alias is removed from the original expression.  The usually value is
+** zero but it might be more if the alias is contained within a subquery
+** of the original expression.  The Expr.op2 field of TK_AGG_FUNCTION
+** structures must be increased by the nSubquery amount.
+*/
+static void resolveAlias(
+  Parse *pParse,         /* Parsing context */
+  ExprList *pEList,      /* A result set */
+  int iCol,              /* A column in the result set.  0..pEList->nExpr-1 */
+  Expr *pExpr,           /* Transform this into an alias to the result set */
+  const char *zType,     /* "GROUP" or "ORDER" or "" */
+  int nSubquery          /* Number of subqueries that the label is moving */
+){
+  Expr *pOrig;           /* The iCol-th column of the result set */
+  Expr *pDup;            /* Copy of pOrig */
+  sqlite3 *db;           /* The database connection */
+
+  assert( iCol>=0 && iCol<pEList->nExpr );
+  pOrig = pEList->a[iCol].pExpr;
+  assert( pOrig!=0 );
+  assert( pOrig->flags & EP_Resolved );
+  db = pParse->db;
+  pDup = sqlite3ExprDup(db, pOrig, 0);
+  if( pDup==0 ) return;
+  if( pOrig->op!=TK_COLUMN && zType[0]!='G' ){
+    incrAggFunctionDepth(pDup, nSubquery);
+    pDup = sqlite3PExpr(pParse, TK_AS, pDup, 0, 0);
+    if( pDup==0 ) return;
+    ExprSetProperty(pDup, EP_Skip);
+    if( pEList->a[iCol].u.x.iAlias==0 ){
+      pEList->a[iCol].u.x.iAlias = (u16)(++pParse->nAlias);
+    }
+    pDup->iTable = pEList->a[iCol].u.x.iAlias;
+  }
+  if( pExpr->op==TK_COLLATE ){
+    pDup = sqlite3ExprAddCollateString(pParse, pDup, pExpr->u.zToken);
+  }
+
+  /* Before calling sqlite3ExprDelete(), set the EP_Static flag. This 
+  ** prevents ExprDelete() from deleting the Expr structure itself,
+  ** allowing it to be repopulated by the memcpy() on the following line.
+  ** The pExpr->u.zToken might point into memory that will be freed by the
+  ** sqlite3DbFree(db, pDup) on the last line of this block, so be sure to
+  ** make a copy of the token before doing the sqlite3DbFree().
+  */
+  ExprSetProperty(pExpr, EP_Static);
+  sqlite3ExprDelete(db, pExpr);
+  memcpy(pExpr, pDup, sizeof(*pExpr));
+  if( !ExprHasProperty(pExpr, EP_IntValue) && pExpr->u.zToken!=0 ){
+    assert( (pExpr->flags & (EP_Reduced|EP_TokenOnly))==0 );
+    pExpr->u.zToken = sqlite3DbStrDup(db, pExpr->u.zToken);
+    pExpr->flags |= EP_MemToken;
+  }
+  sqlite3DbFree(db, pDup);
+}
+
+
+/*
+** Return TRUE if the name zCol occurs anywhere in the USING clause.
+**
+** Return FALSE if the USING clause is NULL or if it does not contain
+** zCol.
+*/
+static int nameInUsingClause(IdList *pUsing, const char *zCol){
+  if( pUsing ){
+    int k;
+    for(k=0; k<pUsing->nId; k++){
+      if( sqlite3StrICmp(pUsing->a[k].zName, zCol)==0 ) return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** Subqueries stores the original database, table and column names for their
+** result sets in ExprList.a[].zSpan, in the form "DATABASE.TABLE.COLUMN".
+** Check to see if the zSpan given to this routine matches the zDb, zTab,
+** and zCol.  If any of zDb, zTab, and zCol are NULL then those fields will
+** match anything.
+*/
+SQLITE_PRIVATE int sqlite3MatchSpanName(
+  const char *zSpan,
+  const char *zCol,
+  const char *zTab,
+  const char *zDb
+){
+  int n;
+  for(n=0; ALWAYS(zSpan[n]) && zSpan[n]!='.'; n++){}
+  if( zDb && (sqlite3StrNICmp(zSpan, zDb, n)!=0 || zDb[n]!=0) ){
+    return 0;
+  }
+  zSpan += n+1;
+  for(n=0; ALWAYS(zSpan[n]) && zSpan[n]!='.'; n++){}
+  if( zTab && (sqlite3StrNICmp(zSpan, zTab, n)!=0 || zTab[n]!=0) ){
+    return 0;
+  }
+  zSpan += n+1;
+  if( zCol && sqlite3StrICmp(zSpan, zCol)!=0 ){
+    return 0;
+  }
+  return 1;
+}
+
+/*
+** Given the name of a column of the form X.Y.Z or Y.Z or just Z, look up
+** that name in the set of source tables in pSrcList and make the pExpr 
+** expression node refer back to that source column.  The following changes
+** are made to pExpr:
+**
+**    pExpr->iDb           Set the index in db->aDb[] of the database X
+**                         (even if X is implied).
+**    pExpr->iTable        Set to the cursor number for the table obtained
+**                         from pSrcList.
+**    pExpr->pTab          Points to the Table structure of X.Y (even if
+**                         X and/or Y are implied.)
+**    pExpr->iColumn       Set to the column number within the table.
+**    pExpr->op            Set to TK_COLUMN.
+**    pExpr->pLeft         Any expression this points to is deleted
+**    pExpr->pRight        Any expression this points to is deleted.
+**
+** The zDb variable is the name of the database (the "X").  This value may be
+** NULL meaning that name is of the form Y.Z or Z.  Any available database
+** can be used.  The zTable variable is the name of the table (the "Y").  This
+** value can be NULL if zDb is also NULL.  If zTable is NULL it
+** means that the form of the name is Z and that columns from any table
+** can be used.
+**
+** If the name cannot be resolved unambiguously, leave an error message
+** in pParse and return WRC_Abort.  Return WRC_Prune on success.
+*/
+static int lookupName(
+  Parse *pParse,       /* The parsing context */
+  const char *zDb,     /* Name of the database containing table, or NULL */
+  const char *zTab,    /* Name of table containing column, or NULL */
+  const char *zCol,    /* Name of the column. */
+  NameContext *pNC,    /* The name context used to resolve the name */
+  Expr *pExpr          /* Make this EXPR node point to the selected column */
+){
+  int i, j;                         /* Loop counters */
+  int cnt = 0;                      /* Number of matching column names */
+  int cntTab = 0;                   /* Number of matching table names */
+  int nSubquery = 0;                /* How many levels of subquery */
+  sqlite3 *db = pParse->db;         /* The database connection */
+  struct SrcList_item *pItem;       /* Use for looping over pSrcList items */
+  struct SrcList_item *pMatch = 0;  /* The matching pSrcList item */
+  NameContext *pTopNC = pNC;        /* First namecontext in the list */
+  Schema *pSchema = 0;              /* Schema of the expression */
+  int isTrigger = 0;                /* True if resolved to a trigger column */
+  Table *pTab = 0;                  /* Table hold the row */
+  Column *pCol;                     /* A column of pTab */
+
+  assert( pNC );     /* the name context cannot be NULL. */
+  assert( zCol );    /* The Z in X.Y.Z cannot be NULL */
+  assert( !ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced) );
+
+  /* Initialize the node to no-match */
+  pExpr->iTable = -1;
+  pExpr->pTab = 0;
+  ExprSetVVAProperty(pExpr, EP_NoReduce);
+
+  /* Translate the schema name in zDb into a pointer to the corresponding
+  ** schema.  If not found, pSchema will remain NULL and nothing will match
+  ** resulting in an appropriate error message toward the end of this routine
+  */
+  if( zDb ){
+    testcase( pNC->ncFlags & NC_PartIdx );
+    testcase( pNC->ncFlags & NC_IsCheck );
+    if( (pNC->ncFlags & (NC_PartIdx|NC_IsCheck))!=0 ){
+      /* Silently ignore database qualifiers inside CHECK constraints and partial
+      ** indices.  Do not raise errors because that might break legacy and
+      ** because it does not hurt anything to just ignore the database name. */
+      zDb = 0;
+    }else{
+      for(i=0; i<db->nDb; i++){
+        assert( db->aDb[i].zName );
+        if( sqlite3StrICmp(db->aDb[i].zName,zDb)==0 ){
+          pSchema = db->aDb[i].pSchema;
+          break;
+        }
+      }
+    }
+  }
+
+  /* Start at the inner-most context and move outward until a match is found */
+  while( pNC && cnt==0 ){
+    ExprList *pEList;
+    SrcList *pSrcList = pNC->pSrcList;
+
+    if( pSrcList ){
+      for(i=0, pItem=pSrcList->a; i<pSrcList->nSrc; i++, pItem++){
+        pTab = pItem->pTab;
+        assert( pTab!=0 && pTab->zName!=0 );
+        assert( pTab->nCol>0 );
+        if( pItem->pSelect && (pItem->pSelect->selFlags & SF_NestedFrom)!=0 ){
+          int hit = 0;
+          pEList = pItem->pSelect->pEList;
+          for(j=0; j<pEList->nExpr; j++){
+            if( sqlite3MatchSpanName(pEList->a[j].zSpan, zCol, zTab, zDb) ){
+              cnt++;
+              cntTab = 2;
+              pMatch = pItem;
+              pExpr->iColumn = j;
+              hit = 1;
+            }
+          }
+          if( hit || zTab==0 ) continue;
+        }
+        if( zDb && pTab->pSchema!=pSchema ){
+          continue;
+        }
+        if( zTab ){
+          const char *zTabName = pItem->zAlias ? pItem->zAlias : pTab->zName;
+          assert( zTabName!=0 );
+          if( sqlite3StrICmp(zTabName, zTab)!=0 ){
+            continue;
+          }
+        }
+        if( 0==(cntTab++) ){
+          pMatch = pItem;
+        }
+        for(j=0, pCol=pTab->aCol; j<pTab->nCol; j++, pCol++){
+          if( sqlite3StrICmp(pCol->zName, zCol)==0 ){
+            /* If there has been exactly one prior match and this match
+            ** is for the right-hand table of a NATURAL JOIN or is in a 
+            ** USING clause, then skip this match.
+            */
+            if( cnt==1 ){
+              if( pItem->jointype & JT_NATURAL ) continue;
+              if( nameInUsingClause(pItem->pUsing, zCol) ) continue;
+            }
+            cnt++;
+            pMatch = pItem;
+            /* Substitute the rowid (column -1) for the INTEGER PRIMARY KEY */
+            pExpr->iColumn = j==pTab->iPKey ? -1 : (i16)j;
+            break;
+          }
+        }
+      }
+      if( pMatch ){
+        pExpr->iTable = pMatch->iCursor;
+        pExpr->pTab = pMatch->pTab;
+        assert( (pMatch->jointype & JT_RIGHT)==0 ); /* RIGHT JOIN not (yet) supported */
+        if( (pMatch->jointype & JT_LEFT)!=0 ){
+          ExprSetProperty(pExpr, EP_CanBeNull);
+        }
+        pSchema = pExpr->pTab->pSchema;
+      }
+    } /* if( pSrcList ) */
+
+#ifndef SQLITE_OMIT_TRIGGER
+    /* If we have not already resolved the name, then maybe 
+    ** it is a new.* or old.* trigger argument reference
+    */
+    if( zDb==0 && zTab!=0 && cntTab==0 && pParse->pTriggerTab!=0 ){
+      int op = pParse->eTriggerOp;
+      assert( op==TK_DELETE || op==TK_UPDATE || op==TK_INSERT );
+      if( op!=TK_DELETE && sqlite3StrICmp("new",zTab) == 0 ){
+        pExpr->iTable = 1;
+        pTab = pParse->pTriggerTab;
+      }else if( op!=TK_INSERT && sqlite3StrICmp("old",zTab)==0 ){
+        pExpr->iTable = 0;
+        pTab = pParse->pTriggerTab;
+      }else{
+        pTab = 0;
+      }
+
+      if( pTab ){ 
+        int iCol;
+        pSchema = pTab->pSchema;
+        cntTab++;
+        for(iCol=0, pCol=pTab->aCol; iCol<pTab->nCol; iCol++, pCol++){
+          if( sqlite3StrICmp(pCol->zName, zCol)==0 ){
+            if( iCol==pTab->iPKey ){
+              iCol = -1;
+            }
+            break;
+          }
+        }
+        if( iCol>=pTab->nCol && sqlite3IsRowid(zCol) && HasRowid(pTab) ){
+          /* IMP: R-51414-32910 */
+          /* IMP: R-44911-55124 */
+          iCol = -1;
+        }
+        if( iCol<pTab->nCol ){
+          cnt++;
+          if( iCol<0 ){
+            pExpr->affinity = SQLITE_AFF_INTEGER;
+          }else if( pExpr->iTable==0 ){
+            testcase( iCol==31 );
+            testcase( iCol==32 );
+            pParse->oldmask |= (iCol>=32 ? 0xffffffff : (((u32)1)<<iCol));
+          }else{
+            testcase( iCol==31 );
+            testcase( iCol==32 );
+            pParse->newmask |= (iCol>=32 ? 0xffffffff : (((u32)1)<<iCol));
+          }
+          pExpr->iColumn = (i16)iCol;
+          pExpr->pTab = pTab;
+          isTrigger = 1;
+        }
+      }
+    }
+#endif /* !defined(SQLITE_OMIT_TRIGGER) */
+
+    /*
+    ** Perhaps the name is a reference to the ROWID
+    */
+    if( cnt==0 && cntTab==1 && pMatch && sqlite3IsRowid(zCol)
+     && HasRowid(pMatch->pTab) ){
+      cnt = 1;
+      pExpr->iColumn = -1;     /* IMP: R-44911-55124 */
+      pExpr->affinity = SQLITE_AFF_INTEGER;
+    }
+
+    /*
+    ** If the input is of the form Z (not Y.Z or X.Y.Z) then the name Z
+    ** might refer to an result-set alias.  This happens, for example, when
+    ** we are resolving names in the WHERE clause of the following command:
+    **
+    **     SELECT a+b AS x FROM table WHERE x<10;
+    **
+    ** In cases like this, replace pExpr with a copy of the expression that
+    ** forms the result set entry ("a+b" in the example) and return immediately.
+    ** Note that the expression in the result set should have already been
+    ** resolved by the time the WHERE clause is resolved.
+    **
+    ** The ability to use an output result-set column in the WHERE, GROUP BY,
+    ** or HAVING clauses, or as part of a larger expression in the ORDRE BY
+    ** clause is not standard SQL.  This is a (goofy) SQLite extension, that
+    ** is supported for backwards compatibility only.  TO DO: Issue a warning
+    ** on sqlite3_log() whenever the capability is used.
+    */
+    if( (pEList = pNC->pEList)!=0
+     && zTab==0
+     && cnt==0
+    ){
+      for(j=0; j<pEList->nExpr; j++){
+        char *zAs = pEList->a[j].zName;
+        if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){
+          Expr *pOrig;
+          assert( pExpr->pLeft==0 && pExpr->pRight==0 );
+          assert( pExpr->x.pList==0 );
+          assert( pExpr->x.pSelect==0 );
+          pOrig = pEList->a[j].pExpr;
+          if( (pNC->ncFlags&NC_AllowAgg)==0 && ExprHasProperty(pOrig, EP_Agg) ){
+            sqlite3ErrorMsg(pParse, "misuse of aliased aggregate %s", zAs);
+            return WRC_Abort;
+          }
+          resolveAlias(pParse, pEList, j, pExpr, "", nSubquery);
+          cnt = 1;
+          pMatch = 0;
+          assert( zTab==0 && zDb==0 );
+          goto lookupname_end;
+        }
+      } 
+    }
+
+    /* Advance to the next name context.  The loop will exit when either
+    ** we have a match (cnt>0) or when we run out of name contexts.
+    */
+    if( cnt==0 ){
+      pNC = pNC->pNext;
+      nSubquery++;
+    }
+  }
+
+  /*
+  ** If X and Y are NULL (in other words if only the column name Z is
+  ** supplied) and the value of Z is enclosed in double-quotes, then
+  ** Z is a string literal if it doesn't match any column names.  In that
+  ** case, we need to return right away and not make any changes to
+  ** pExpr.
+  **
+  ** Because no reference was made to outer contexts, the pNC->nRef
+  ** fields are not changed in any context.
+  */
+  if( cnt==0 && zTab==0 && ExprHasProperty(pExpr,EP_DblQuoted) ){
+    pExpr->op = TK_STRING;
+    pExpr->pTab = 0;
+    return WRC_Prune;
+  }
+
+  /*
+  ** cnt==0 means there was not match.  cnt>1 means there were two or
+  ** more matches.  Either way, we have an error.
+  */
+  if( cnt!=1 ){
+    const char *zErr;
+    zErr = cnt==0 ? "no such column" : "ambiguous column name";
+    if( zDb ){
+      sqlite3ErrorMsg(pParse, "%s: %s.%s.%s", zErr, zDb, zTab, zCol);
+    }else if( zTab ){
+      sqlite3ErrorMsg(pParse, "%s: %s.%s", zErr, zTab, zCol);
+    }else{
+      sqlite3ErrorMsg(pParse, "%s: %s", zErr, zCol);
+    }
+    pParse->checkSchema = 1;
+    pTopNC->nErr++;
+  }
+
+  /* If a column from a table in pSrcList is referenced, then record
+  ** this fact in the pSrcList.a[].colUsed bitmask.  Column 0 causes
+  ** bit 0 to be set.  Column 1 sets bit 1.  And so forth.  If the
+  ** column number is greater than the number of bits in the bitmask
+  ** then set the high-order bit of the bitmask.
+  */
+  if( pExpr->iColumn>=0 && pMatch!=0 ){
+    int n = pExpr->iColumn;
+    testcase( n==BMS-1 );
+    if( n>=BMS ){
+      n = BMS-1;
+    }
+    assert( pMatch->iCursor==pExpr->iTable );
+    pMatch->colUsed |= ((Bitmask)1)<<n;
+  }
+
+  /* Clean up and return
+  */
+  sqlite3ExprDelete(db, pExpr->pLeft);
+  pExpr->pLeft = 0;
+  sqlite3ExprDelete(db, pExpr->pRight);
+  pExpr->pRight = 0;
+  pExpr->op = (isTrigger ? TK_TRIGGER : TK_COLUMN);
+lookupname_end:
+  if( cnt==1 ){
+    assert( pNC!=0 );
+    if( pExpr->op!=TK_AS ){
+      sqlite3AuthRead(pParse, pExpr, pSchema, pNC->pSrcList);
+    }
+    /* Increment the nRef value on all name contexts from TopNC up to
+    ** the point where the name matched. */
+    for(;;){
+      assert( pTopNC!=0 );
+      pTopNC->nRef++;
+      if( pTopNC==pNC ) break;
+      pTopNC = pTopNC->pNext;
+    }
+    return WRC_Prune;
+  } else {
+    return WRC_Abort;
+  }
+}
+
+/*
+** Allocate and return a pointer to an expression to load the column iCol
+** from datasource iSrc in SrcList pSrc.
+*/
+SQLITE_PRIVATE Expr *sqlite3CreateColumnExpr(sqlite3 *db, SrcList *pSrc, int iSrc, int iCol){
+  Expr *p = sqlite3ExprAlloc(db, TK_COLUMN, 0, 0);
+  if( p ){
+    struct SrcList_item *pItem = &pSrc->a[iSrc];
+    p->pTab = pItem->pTab;
+    p->iTable = pItem->iCursor;
+    if( p->pTab->iPKey==iCol ){
+      p->iColumn = -1;
+    }else{
+      p->iColumn = (ynVar)iCol;
+      testcase( iCol==BMS );
+      testcase( iCol==BMS-1 );
+      pItem->colUsed |= ((Bitmask)1)<<(iCol>=BMS ? BMS-1 : iCol);
+    }
+    ExprSetProperty(p, EP_Resolved);
+  }
+  return p;
+}
+
+/*
+** Report an error that an expression is not valid for a partial index WHERE
+** clause.
+*/
+static void notValidPartIdxWhere(
+  Parse *pParse,       /* Leave error message here */
+  NameContext *pNC,    /* The name context */
+  const char *zMsg     /* Type of error */
+){
+  if( (pNC->ncFlags & NC_PartIdx)!=0 ){
+    sqlite3ErrorMsg(pParse, "%s prohibited in partial index WHERE clauses",
+                    zMsg);
+  }
+}
+
+#ifndef SQLITE_OMIT_CHECK
+/*
+** Report an error that an expression is not valid for a CHECK constraint.
+*/
+static void notValidCheckConstraint(
+  Parse *pParse,       /* Leave error message here */
+  NameContext *pNC,    /* The name context */
+  const char *zMsg     /* Type of error */
+){
+  if( (pNC->ncFlags & NC_IsCheck)!=0 ){
+    sqlite3ErrorMsg(pParse,"%s prohibited in CHECK constraints", zMsg);
+  }
+}
+#else
+# define notValidCheckConstraint(P,N,M)
+#endif
+
+/*
+** Expression p should encode a floating point value between 1.0 and 0.0.
+** Return 1024 times this value.  Or return -1 if p is not a floating point
+** value between 1.0 and 0.0.
+*/
+static int exprProbability(Expr *p){
+  double r = -1.0;
+  if( p->op!=TK_FLOAT ) return -1;
+  sqlite3AtoF(p->u.zToken, &r, sqlite3Strlen30(p->u.zToken), SQLITE_UTF8);
+  assert( r>=0.0 );
+  if( r>1.0 ) return -1;
+  return (int)(r*134217728.0);
+}
+
+/*
+** This routine is callback for sqlite3WalkExpr().
+**
+** Resolve symbolic names into TK_COLUMN operators for the current
+** node in the expression tree.  Return 0 to continue the search down
+** the tree or 2 to abort the tree walk.
+**
+** This routine also does error checking and name resolution for
+** function names.  The operator for aggregate functions is changed
+** to TK_AGG_FUNCTION.
+*/
+static int resolveExprStep(Walker *pWalker, Expr *pExpr){
+  NameContext *pNC;
+  Parse *pParse;
+
+  pNC = pWalker->u.pNC;
+  assert( pNC!=0 );
+  pParse = pNC->pParse;
+  assert( pParse==pWalker->pParse );
+
+  if( ExprHasProperty(pExpr, EP_Resolved) ) return WRC_Prune;
+  ExprSetProperty(pExpr, EP_Resolved);
+#ifndef NDEBUG
+  if( pNC->pSrcList && pNC->pSrcList->nAlloc>0 ){
+    SrcList *pSrcList = pNC->pSrcList;
+    int i;
+    for(i=0; i<pNC->pSrcList->nSrc; i++){
+      assert( pSrcList->a[i].iCursor>=0 && pSrcList->a[i].iCursor<pParse->nTab);
+    }
+  }
+#endif
+  switch( pExpr->op ){
+
+#if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY)
+    /* The special operator TK_ROW means use the rowid for the first
+    ** column in the FROM clause.  This is used by the LIMIT and ORDER BY
+    ** clause processing on UPDATE and DELETE statements.
+    */
+    case TK_ROW: {
+      SrcList *pSrcList = pNC->pSrcList;
+      struct SrcList_item *pItem;
+      assert( pSrcList && pSrcList->nSrc==1 );
+      pItem = pSrcList->a; 
+      pExpr->op = TK_COLUMN;
+      pExpr->pTab = pItem->pTab;
+      pExpr->iTable = pItem->iCursor;
+      pExpr->iColumn = -1;
+      pExpr->affinity = SQLITE_AFF_INTEGER;
+      break;
+    }
+#endif /* defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY) */
+
+    /* A lone identifier is the name of a column.
+    */
+    case TK_ID: {
+      return lookupName(pParse, 0, 0, pExpr->u.zToken, pNC, pExpr);
+    }
+  
+    /* A table name and column name:     ID.ID
+    ** Or a database, table and column:  ID.ID.ID
+    */
+    case TK_DOT: {
+      const char *zColumn;
+      const char *zTable;
+      const char *zDb;
+      Expr *pRight;
+
+      /* if( pSrcList==0 ) break; */
+      pRight = pExpr->pRight;
+      if( pRight->op==TK_ID ){
+        zDb = 0;
+        zTable = pExpr->pLeft->u.zToken;
+        zColumn = pRight->u.zToken;
+      }else{
+        assert( pRight->op==TK_DOT );
+        zDb = pExpr->pLeft->u.zToken;
+        zTable = pRight->pLeft->u.zToken;
+        zColumn = pRight->pRight->u.zToken;
+      }
+      return lookupName(pParse, zDb, zTable, zColumn, pNC, pExpr);
+    }
+
+    /* Resolve function names
+    */
+    case TK_FUNCTION: {
+      ExprList *pList = pExpr->x.pList;    /* The argument list */
+      int n = pList ? pList->nExpr : 0;    /* Number of arguments */
+      int no_such_func = 0;       /* True if no such function exists */
+      int wrong_num_args = 0;     /* True if wrong number of arguments */
+      int is_agg = 0;             /* True if is an aggregate function */
+      int auth;                   /* Authorization to use the function */
+      int nId;                    /* Number of characters in function name */
+      const char *zId;            /* The function name. */
+      FuncDef *pDef;              /* Information about the function */
+      u8 enc = ENC(pParse->db);   /* The database encoding */
+
+      assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
+      notValidPartIdxWhere(pParse, pNC, "functions");
+      zId = pExpr->u.zToken;
+      nId = sqlite3Strlen30(zId);
+      pDef = sqlite3FindFunction(pParse->db, zId, nId, n, enc, 0);
+      if( pDef==0 ){
+        pDef = sqlite3FindFunction(pParse->db, zId, nId, -2, enc, 0);
+        if( pDef==0 ){
+          no_such_func = 1;
+        }else{
+          wrong_num_args = 1;
+        }
+      }else{
+        is_agg = pDef->xFunc==0;
+        if( pDef->funcFlags & SQLITE_FUNC_UNLIKELY ){
+          ExprSetProperty(pExpr, EP_Unlikely|EP_Skip);
+          if( n==2 ){
+            pExpr->iTable = exprProbability(pList->a[1].pExpr);
+            if( pExpr->iTable<0 ){
+              sqlite3ErrorMsg(pParse, "second argument to likelihood() must be a "
+                                      "constant between 0.0 and 1.0");
+              pNC->nErr++;
+            }
+          }else{
+            /* EVIDENCE-OF: R-61304-29449 The unlikely(X) function is equivalent to
+            ** likelihood(X, 0.0625).
+            ** EVIDENCE-OF: R-01283-11636 The unlikely(X) function is short-hand for
+            ** likelihood(X,0.0625).
+            ** EVIDENCE-OF: R-36850-34127 The likely(X) function is short-hand for
+            ** likelihood(X,0.9375).
+            ** EVIDENCE-OF: R-53436-40973 The likely(X) function is equivalent to
+            ** likelihood(X,0.9375). */
+            /* TUNING: unlikely() probability is 0.0625.  likely() is 0.9375 */
+            pExpr->iTable = pDef->zName[0]=='u' ? 8388608 : 125829120;
+          }             
+        }
+#ifndef SQLITE_OMIT_AUTHORIZATION
+        auth = sqlite3AuthCheck(pParse, SQLITE_FUNCTION, 0, pDef->zName, 0);
+        if( auth!=SQLITE_OK ){
+          if( auth==SQLITE_DENY ){
+            sqlite3ErrorMsg(pParse, "not authorized to use function: %s",
+                                    pDef->zName);
+            pNC->nErr++;
+          }
+          pExpr->op = TK_NULL;
+          return WRC_Prune;
+        }
+#endif
+        if( pDef->funcFlags & SQLITE_FUNC_CONSTANT ) ExprSetProperty(pExpr,EP_Constant);
+      }
+      if( is_agg && (pNC->ncFlags & NC_AllowAgg)==0 ){
+        sqlite3ErrorMsg(pParse, "misuse of aggregate function %.*s()", nId,zId);
+        pNC->nErr++;
+        is_agg = 0;
+      }else if( no_such_func && pParse->db->init.busy==0 ){
+        sqlite3ErrorMsg(pParse, "no such function: %.*s", nId, zId);
+        pNC->nErr++;
+      }else if( wrong_num_args ){
+        sqlite3ErrorMsg(pParse,"wrong number of arguments to function %.*s()",
+             nId, zId);
+        pNC->nErr++;
+      }
+      if( is_agg ) pNC->ncFlags &= ~NC_AllowAgg;
+      sqlite3WalkExprList(pWalker, pList);
+      if( is_agg ){
+        NameContext *pNC2 = pNC;
+        pExpr->op = TK_AGG_FUNCTION;
+        pExpr->op2 = 0;
+        while( pNC2 && !sqlite3FunctionUsesThisSrc(pExpr, pNC2->pSrcList) ){
+          pExpr->op2++;
+          pNC2 = pNC2->pNext;
+        }
+        assert( pDef!=0 );
+        if( pNC2 ){
+          assert( SQLITE_FUNC_MINMAX==NC_MinMaxAgg );
+          testcase( (pDef->funcFlags & SQLITE_FUNC_MINMAX)!=0 );
+          pNC2->ncFlags |= NC_HasAgg | (pDef->funcFlags & SQLITE_FUNC_MINMAX);
+
+        }
+        pNC->ncFlags |= NC_AllowAgg;
+      }
+      /* FIX ME:  Compute pExpr->affinity based on the expected return
+      ** type of the function 
+      */
+      return WRC_Prune;
+    }
+#ifndef SQLITE_OMIT_SUBQUERY
+    case TK_SELECT:
+    case TK_EXISTS:  testcase( pExpr->op==TK_EXISTS );
+#endif
+    case TK_IN: {
+      testcase( pExpr->op==TK_IN );
+      if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+        int nRef = pNC->nRef;
+        notValidCheckConstraint(pParse, pNC, "subqueries");
+        notValidPartIdxWhere(pParse, pNC, "subqueries");
+        sqlite3WalkSelect(pWalker, pExpr->x.pSelect);
+        assert( pNC->nRef>=nRef );
+        if( nRef!=pNC->nRef ){
+          ExprSetProperty(pExpr, EP_VarSelect);
+        }
+      }
+      break;
+    }
+    case TK_VARIABLE: {
+      notValidCheckConstraint(pParse, pNC, "parameters");
+      notValidPartIdxWhere(pParse, pNC, "parameters");
+      break;
+    }
+  }
+  return (pParse->nErr || pParse->db->mallocFailed) ? WRC_Abort : WRC_Continue;
+}
+
+/*
+** pEList is a list of expressions which are really the result set of the
+** a SELECT statement.  pE is a term in an ORDER BY or GROUP BY clause.
+** This routine checks to see if pE is a simple identifier which corresponds
+** to the AS-name of one of the terms of the expression list.  If it is,
+** this routine return an integer between 1 and N where N is the number of
+** elements in pEList, corresponding to the matching entry.  If there is
+** no match, or if pE is not a simple identifier, then this routine
+** return 0.
+**
+** pEList has been resolved.  pE has not.
+*/
+static int resolveAsName(
+  Parse *pParse,     /* Parsing context for error messages */
+  ExprList *pEList,  /* List of expressions to scan */
+  Expr *pE           /* Expression we are trying to match */
+){
+  int i;             /* Loop counter */
+
+  UNUSED_PARAMETER(pParse);
+
+  if( pE->op==TK_ID ){
+    char *zCol = pE->u.zToken;
+    for(i=0; i<pEList->nExpr; i++){
+      char *zAs = pEList->a[i].zName;
+      if( zAs!=0 && sqlite3StrICmp(zAs, zCol)==0 ){
+        return i+1;
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+** pE is a pointer to an expression which is a single term in the
+** ORDER BY of a compound SELECT.  The expression has not been
+** name resolved.
+**
+** At the point this routine is called, we already know that the
+** ORDER BY term is not an integer index into the result set.  That
+** case is handled by the calling routine.
+**
+** Attempt to match pE against result set columns in the left-most
+** SELECT statement.  Return the index i of the matching column,
+** as an indication to the caller that it should sort by the i-th column.
+** The left-most column is 1.  In other words, the value returned is the
+** same integer value that would be used in the SQL statement to indicate
+** the column.
+**
+** If there is no match, return 0.  Return -1 if an error occurs.
+*/
+static int resolveOrderByTermToExprList(
+  Parse *pParse,     /* Parsing context for error messages */
+  Select *pSelect,   /* The SELECT statement with the ORDER BY clause */
+  Expr *pE           /* The specific ORDER BY term */
+){
+  int i;             /* Loop counter */
+  ExprList *pEList;  /* The columns of the result set */
+  NameContext nc;    /* Name context for resolving pE */
+  sqlite3 *db;       /* Database connection */
+  int rc;            /* Return code from subprocedures */
+  u8 savedSuppErr;   /* Saved value of db->suppressErr */
+
+  assert( sqlite3ExprIsInteger(pE, &i)==0 );
+  pEList = pSelect->pEList;
+
+  /* Resolve all names in the ORDER BY term expression
+  */
+  memset(&nc, 0, sizeof(nc));
+  nc.pParse = pParse;
+  nc.pSrcList = pSelect->pSrc;
+  nc.pEList = pEList;
+  nc.ncFlags = NC_AllowAgg;
+  nc.nErr = 0;
+  db = pParse->db;
+  savedSuppErr = db->suppressErr;
+  db->suppressErr = 1;
+  rc = sqlite3ResolveExprNames(&nc, pE);
+  db->suppressErr = savedSuppErr;
+  if( rc ) return 0;
+
+  /* Try to match the ORDER BY expression against an expression
+  ** in the result set.  Return an 1-based index of the matching
+  ** result-set entry.
+  */
+  for(i=0; i<pEList->nExpr; i++){
+    if( sqlite3ExprCompare(pEList->a[i].pExpr, pE, -1)<2 ){
+      return i+1;
+    }
+  }
+
+  /* If no match, return 0. */
+  return 0;
+}
+
+/*
+** Generate an ORDER BY or GROUP BY term out-of-range error.
+*/
+static void resolveOutOfRangeError(
+  Parse *pParse,         /* The error context into which to write the error */
+  const char *zType,     /* "ORDER" or "GROUP" */
+  int i,                 /* The index (1-based) of the term out of range */
+  int mx                 /* Largest permissible value of i */
+){
+  sqlite3ErrorMsg(pParse, 
+    "%r %s BY term out of range - should be "
+    "between 1 and %d", i, zType, mx);
+}
+
+/*
+** Analyze the ORDER BY clause in a compound SELECT statement.   Modify
+** each term of the ORDER BY clause is a constant integer between 1
+** and N where N is the number of columns in the compound SELECT.
+**
+** ORDER BY terms that are already an integer between 1 and N are
+** unmodified.  ORDER BY terms that are integers outside the range of
+** 1 through N generate an error.  ORDER BY terms that are expressions
+** are matched against result set expressions of compound SELECT
+** beginning with the left-most SELECT and working toward the right.
+** At the first match, the ORDER BY expression is transformed into
+** the integer column number.
+**
+** Return the number of errors seen.
+*/
+static int resolveCompoundOrderBy(
+  Parse *pParse,        /* Parsing context.  Leave error messages here */
+  Select *pSelect       /* The SELECT statement containing the ORDER BY */
+){
+  int i;
+  ExprList *pOrderBy;
+  ExprList *pEList;
+  sqlite3 *db;
+  int moreToDo = 1;
+
+  pOrderBy = pSelect->pOrderBy;
+  if( pOrderBy==0 ) return 0;
+  db = pParse->db;
+#if SQLITE_MAX_COLUMN
+  if( pOrderBy->nExpr>db->aLimit[SQLITE_LIMIT_COLUMN] ){
+    sqlite3ErrorMsg(pParse, "too many terms in ORDER BY clause");
+    return 1;
+  }
+#endif
+  for(i=0; i<pOrderBy->nExpr; i++){
+    pOrderBy->a[i].done = 0;
+  }
+  pSelect->pNext = 0;
+  while( pSelect->pPrior ){
+    pSelect->pPrior->pNext = pSelect;
+    pSelect = pSelect->pPrior;
+  }
+  while( pSelect && moreToDo ){
+    struct ExprList_item *pItem;
+    moreToDo = 0;
+    pEList = pSelect->pEList;
+    assert( pEList!=0 );
+    for(i=0, pItem=pOrderBy->a; i<pOrderBy->nExpr; i++, pItem++){
+      int iCol = -1;
+      Expr *pE, *pDup;
+      if( pItem->done ) continue;
+      pE = sqlite3ExprSkipCollate(pItem->pExpr);
+      if( sqlite3ExprIsInteger(pE, &iCol) ){
+        if( iCol<=0 || iCol>pEList->nExpr ){
+          resolveOutOfRangeError(pParse, "ORDER", i+1, pEList->nExpr);
+          return 1;
+        }
+      }else{
+        iCol = resolveAsName(pParse, pEList, pE);
+        if( iCol==0 ){
+          pDup = sqlite3ExprDup(db, pE, 0);
+          if( !db->mallocFailed ){
+            assert(pDup);
+            iCol = resolveOrderByTermToExprList(pParse, pSelect, pDup);
+          }
+          sqlite3ExprDelete(db, pDup);
+        }
+      }
+      if( iCol>0 ){
+        /* Convert the ORDER BY term into an integer column number iCol,
+        ** taking care to preserve the COLLATE clause if it exists */
+        Expr *pNew = sqlite3Expr(db, TK_INTEGER, 0);
+        if( pNew==0 ) return 1;
+        pNew->flags |= EP_IntValue;
+        pNew->u.iValue = iCol;
+        if( pItem->pExpr==pE ){
+          pItem->pExpr = pNew;
+        }else{
+          assert( pItem->pExpr->op==TK_COLLATE );
+          assert( pItem->pExpr->pLeft==pE );
+          pItem->pExpr->pLeft = pNew;
+        }
+        sqlite3ExprDelete(db, pE);
+        pItem->u.x.iOrderByCol = (u16)iCol;
+        pItem->done = 1;
+      }else{
+        moreToDo = 1;
+      }
+    }
+    pSelect = pSelect->pNext;
+  }
+  for(i=0; i<pOrderBy->nExpr; i++){
+    if( pOrderBy->a[i].done==0 ){
+      sqlite3ErrorMsg(pParse, "%r ORDER BY term does not match any "
+            "column in the result set", i+1);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** Check every term in the ORDER BY or GROUP BY clause pOrderBy of
+** the SELECT statement pSelect.  If any term is reference to a
+** result set expression (as determined by the ExprList.a.u.x.iOrderByCol
+** field) then convert that term into a copy of the corresponding result set
+** column.
+**
+** If any errors are detected, add an error message to pParse and
+** return non-zero.  Return zero if no errors are seen.
+*/
+SQLITE_PRIVATE int sqlite3ResolveOrderGroupBy(
+  Parse *pParse,        /* Parsing context.  Leave error messages here */
+  Select *pSelect,      /* The SELECT statement containing the clause */
+  ExprList *pOrderBy,   /* The ORDER BY or GROUP BY clause to be processed */
+  const char *zType     /* "ORDER" or "GROUP" */
+){
+  int i;
+  sqlite3 *db = pParse->db;
+  ExprList *pEList;
+  struct ExprList_item *pItem;
+
+  if( pOrderBy==0 || pParse->db->mallocFailed ) return 0;
+#if SQLITE_MAX_COLUMN
+  if( pOrderBy->nExpr>db->aLimit[SQLITE_LIMIT_COLUMN] ){
+    sqlite3ErrorMsg(pParse, "too many terms in %s BY clause", zType);
+    return 1;
+  }
+#endif
+  pEList = pSelect->pEList;
+  assert( pEList!=0 );  /* sqlite3SelectNew() guarantees this */
+  for(i=0, pItem=pOrderBy->a; i<pOrderBy->nExpr; i++, pItem++){
+    if( pItem->u.x.iOrderByCol ){
+      if( pItem->u.x.iOrderByCol>pEList->nExpr ){
+        resolveOutOfRangeError(pParse, zType, i+1, pEList->nExpr);
+        return 1;
+      }
+      resolveAlias(pParse, pEList, pItem->u.x.iOrderByCol-1, pItem->pExpr, zType,0);
+    }
+  }
+  return 0;
+}
+
+/*
+** pOrderBy is an ORDER BY or GROUP BY clause in SELECT statement pSelect.
+** The Name context of the SELECT statement is pNC.  zType is either
+** "ORDER" or "GROUP" depending on which type of clause pOrderBy is.
+**
+** This routine resolves each term of the clause into an expression.
+** If the order-by term is an integer I between 1 and N (where N is the
+** number of columns in the result set of the SELECT) then the expression
+** in the resolution is a copy of the I-th result-set expression.  If
+** the order-by term is an identifier that corresponds to the AS-name of
+** a result-set expression, then the term resolves to a copy of the
+** result-set expression.  Otherwise, the expression is resolved in
+** the usual way - using sqlite3ResolveExprNames().
+**
+** This routine returns the number of errors.  If errors occur, then
+** an appropriate error message might be left in pParse.  (OOM errors
+** excepted.)
+*/
+static int resolveOrderGroupBy(
+  NameContext *pNC,     /* The name context of the SELECT statement */
+  Select *pSelect,      /* The SELECT statement holding pOrderBy */
+  ExprList *pOrderBy,   /* An ORDER BY or GROUP BY clause to resolve */
+  const char *zType     /* Either "ORDER" or "GROUP", as appropriate */
+){
+  int i, j;                      /* Loop counters */
+  int iCol;                      /* Column number */
+  struct ExprList_item *pItem;   /* A term of the ORDER BY clause */
+  Parse *pParse;                 /* Parsing context */
+  int nResult;                   /* Number of terms in the result set */
+
+  if( pOrderBy==0 ) return 0;
+  nResult = pSelect->pEList->nExpr;
+  pParse = pNC->pParse;
+  for(i=0, pItem=pOrderBy->a; i<pOrderBy->nExpr; i++, pItem++){
+    Expr *pE = pItem->pExpr;
+    Expr *pE2 = sqlite3ExprSkipCollate(pE);
+    if( zType[0]!='G' ){
+      iCol = resolveAsName(pParse, pSelect->pEList, pE2);
+      if( iCol>0 ){
+        /* If an AS-name match is found, mark this ORDER BY column as being
+        ** a copy of the iCol-th result-set column.  The subsequent call to
+        ** sqlite3ResolveOrderGroupBy() will convert the expression to a
+        ** copy of the iCol-th result-set expression. */
+        pItem->u.x.iOrderByCol = (u16)iCol;
+        continue;
+      }
+    }
+    if( sqlite3ExprIsInteger(pE2, &iCol) ){
+      /* The ORDER BY term is an integer constant.  Again, set the column
+      ** number so that sqlite3ResolveOrderGroupBy() will convert the
+      ** order-by term to a copy of the result-set expression */
+      if( iCol<1 || iCol>0xffff ){
+        resolveOutOfRangeError(pParse, zType, i+1, nResult);
+        return 1;
+      }
+      pItem->u.x.iOrderByCol = (u16)iCol;
+      continue;
+    }
+
+    /* Otherwise, treat the ORDER BY term as an ordinary expression */
+    pItem->u.x.iOrderByCol = 0;
+    if( sqlite3ResolveExprNames(pNC, pE) ){
+      return 1;
+    }
+    for(j=0; j<pSelect->pEList->nExpr; j++){
+      if( sqlite3ExprCompare(pE, pSelect->pEList->a[j].pExpr, -1)==0 ){
+        pItem->u.x.iOrderByCol = j+1;
+      }
+    }
+  }
+  return sqlite3ResolveOrderGroupBy(pParse, pSelect, pOrderBy, zType);
+}
+
+/*
+** Resolve names in the SELECT statement p and all of its descendants.
+*/
+static int resolveSelectStep(Walker *pWalker, Select *p){
+  NameContext *pOuterNC;  /* Context that contains this SELECT */
+  NameContext sNC;        /* Name context of this SELECT */
+  int isCompound;         /* True if p is a compound select */
+  int nCompound;          /* Number of compound terms processed so far */
+  Parse *pParse;          /* Parsing context */
+  ExprList *pEList;       /* Result set expression list */
+  int i;                  /* Loop counter */
+  ExprList *pGroupBy;     /* The GROUP BY clause */
+  Select *pLeftmost;      /* Left-most of SELECT of a compound */
+  sqlite3 *db;            /* Database connection */
+  
+
+  assert( p!=0 );
+  if( p->selFlags & SF_Resolved ){
+    return WRC_Prune;
+  }
+  pOuterNC = pWalker->u.pNC;
+  pParse = pWalker->pParse;
+  db = pParse->db;
+
+  /* Normally sqlite3SelectExpand() will be called first and will have
+  ** already expanded this SELECT.  However, if this is a subquery within
+  ** an expression, sqlite3ResolveExprNames() will be called without a
+  ** prior call to sqlite3SelectExpand().  When that happens, let
+  ** sqlite3SelectPrep() do all of the processing for this SELECT.
+  ** sqlite3SelectPrep() will invoke both sqlite3SelectExpand() and
+  ** this routine in the correct order.
+  */
+  if( (p->selFlags & SF_Expanded)==0 ){
+    sqlite3SelectPrep(pParse, p, pOuterNC);
+    return (pParse->nErr || db->mallocFailed) ? WRC_Abort : WRC_Prune;
+  }
+
+  isCompound = p->pPrior!=0;
+  nCompound = 0;
+  pLeftmost = p;
+  while( p ){
+    assert( (p->selFlags & SF_Expanded)!=0 );
+    assert( (p->selFlags & SF_Resolved)==0 );
+    p->selFlags |= SF_Resolved;
+
+    /* Resolve the expressions in the LIMIT and OFFSET clauses. These
+    ** are not allowed to refer to any names, so pass an empty NameContext.
+    */
+    memset(&sNC, 0, sizeof(sNC));
+    sNC.pParse = pParse;
+    if( sqlite3ResolveExprNames(&sNC, p->pLimit) ||
+        sqlite3ResolveExprNames(&sNC, p->pOffset) ){
+      return WRC_Abort;
+    }
+  
+    /* Recursively resolve names in all subqueries
+    */
+    for(i=0; i<p->pSrc->nSrc; i++){
+      struct SrcList_item *pItem = &p->pSrc->a[i];
+      if( pItem->pSelect ){
+        NameContext *pNC;         /* Used to iterate name contexts */
+        int nRef = 0;             /* Refcount for pOuterNC and outer contexts */
+        const char *zSavedContext = pParse->zAuthContext;
+
+        /* Count the total number of references to pOuterNC and all of its
+        ** parent contexts. After resolving references to expressions in
+        ** pItem->pSelect, check if this value has changed. If so, then
+        ** SELECT statement pItem->pSelect must be correlated. Set the
+        ** pItem->isCorrelated flag if this is the case. */
+        for(pNC=pOuterNC; pNC; pNC=pNC->pNext) nRef += pNC->nRef;
+
+        if( pItem->zName ) pParse->zAuthContext = pItem->zName;
+        sqlite3ResolveSelectNames(pParse, pItem->pSelect, pOuterNC);
+        pParse->zAuthContext = zSavedContext;
+        if( pParse->nErr || db->mallocFailed ) return WRC_Abort;
+
+        for(pNC=pOuterNC; pNC; pNC=pNC->pNext) nRef -= pNC->nRef;
+        assert( pItem->isCorrelated==0 && nRef<=0 );
+        pItem->isCorrelated = (nRef!=0);
+      }
+    }
+  
+    /* Set up the local name-context to pass to sqlite3ResolveExprNames() to
+    ** resolve the result-set expression list.
+    */
+    sNC.ncFlags = NC_AllowAgg;
+    sNC.pSrcList = p->pSrc;
+    sNC.pNext = pOuterNC;
+  
+    /* Resolve names in the result set. */
+    pEList = p->pEList;
+    assert( pEList!=0 );
+    for(i=0; i<pEList->nExpr; i++){
+      Expr *pX = pEList->a[i].pExpr;
+      if( sqlite3ResolveExprNames(&sNC, pX) ){
+        return WRC_Abort;
+      }
+    }
+  
+    /* If there are no aggregate functions in the result-set, and no GROUP BY 
+    ** expression, do not allow aggregates in any of the other expressions.
+    */
+    assert( (p->selFlags & SF_Aggregate)==0 );
+    pGroupBy = p->pGroupBy;
+    if( pGroupBy || (sNC.ncFlags & NC_HasAgg)!=0 ){
+      assert( NC_MinMaxAgg==SF_MinMaxAgg );
+      p->selFlags |= SF_Aggregate | (sNC.ncFlags&NC_MinMaxAgg);
+    }else{
+      sNC.ncFlags &= ~NC_AllowAgg;
+    }
+  
+    /* If a HAVING clause is present, then there must be a GROUP BY clause.
+    */
+    if( p->pHaving && !pGroupBy ){
+      sqlite3ErrorMsg(pParse, "a GROUP BY clause is required before HAVING");
+      return WRC_Abort;
+    }
+  
+    /* Add the output column list to the name-context before parsing the
+    ** other expressions in the SELECT statement. This is so that
+    ** expressions in the WHERE clause (etc.) can refer to expressions by
+    ** aliases in the result set.
+    **
+    ** Minor point: If this is the case, then the expression will be
+    ** re-evaluated for each reference to it.
+    */
+    sNC.pEList = p->pEList;
+    if( sqlite3ResolveExprNames(&sNC, p->pHaving) ) return WRC_Abort;
+    if( sqlite3ResolveExprNames(&sNC, p->pWhere) ) return WRC_Abort;
+
+    /* The ORDER BY and GROUP BY clauses may not refer to terms in
+    ** outer queries 
+    */
+    sNC.pNext = 0;
+    sNC.ncFlags |= NC_AllowAgg;
+
+    /* Process the ORDER BY clause for singleton SELECT statements.
+    ** The ORDER BY clause for compounds SELECT statements is handled
+    ** below, after all of the result-sets for all of the elements of
+    ** the compound have been resolved.
+    */
+    if( !isCompound && resolveOrderGroupBy(&sNC, p, p->pOrderBy, "ORDER") ){
+      return WRC_Abort;
+    }
+    if( db->mallocFailed ){
+      return WRC_Abort;
+    }
+  
+    /* Resolve the GROUP BY clause.  At the same time, make sure 
+    ** the GROUP BY clause does not contain aggregate functions.
+    */
+    if( pGroupBy ){
+      struct ExprList_item *pItem;
+    
+      if( resolveOrderGroupBy(&sNC, p, pGroupBy, "GROUP") || db->mallocFailed ){
+        return WRC_Abort;
+      }
+      for(i=0, pItem=pGroupBy->a; i<pGroupBy->nExpr; i++, pItem++){
+        if( ExprHasProperty(pItem->pExpr, EP_Agg) ){
+          sqlite3ErrorMsg(pParse, "aggregate functions are not allowed in "
+              "the GROUP BY clause");
+          return WRC_Abort;
+        }
+      }
+    }
+
+    /* Advance to the next term of the compound
+    */
+    p = p->pPrior;
+    nCompound++;
+  }
+
+  /* Resolve the ORDER BY on a compound SELECT after all terms of
+  ** the compound have been resolved.
+  */
+  if( isCompound && resolveCompoundOrderBy(pParse, pLeftmost) ){
+    return WRC_Abort;
+  }
+
+  return WRC_Prune;
+}
+
+/*
+** This routine walks an expression tree and resolves references to
+** table columns and result-set columns.  At the same time, do error
+** checking on function usage and set a flag if any aggregate functions
+** are seen.
+**
+** To resolve table columns references we look for nodes (or subtrees) of the 
+** form X.Y.Z or Y.Z or just Z where
+**
+**      X:   The name of a database.  Ex:  "main" or "temp" or
+**           the symbolic name assigned to an ATTACH-ed database.
+**
+**      Y:   The name of a table in a FROM clause.  Or in a trigger
+**           one of the special names "old" or "new".
+**
+**      Z:   The name of a column in table Y.
+**
+** The node at the root of the subtree is modified as follows:
+**
+**    Expr.op        Changed to TK_COLUMN
+**    Expr.pTab      Points to the Table object for X.Y
+**    Expr.iColumn   The column index in X.Y.  -1 for the rowid.
+**    Expr.iTable    The VDBE cursor number for X.Y
+**
+**
+** To resolve result-set references, look for expression nodes of the
+** form Z (with no X and Y prefix) where the Z matches the right-hand
+** size of an AS clause in the result-set of a SELECT.  The Z expression
+** is replaced by a copy of the left-hand side of the result-set expression.
+** Table-name and function resolution occurs on the substituted expression
+** tree.  For example, in:
+**
+**      SELECT a+b AS x, c+d AS y FROM t1 ORDER BY x;
+**
+** The "x" term of the order by is replaced by "a+b" to render:
+**
+**      SELECT a+b AS x, c+d AS y FROM t1 ORDER BY a+b;
+**
+** Function calls are checked to make sure that the function is 
+** defined and that the correct number of arguments are specified.
+** If the function is an aggregate function, then the NC_HasAgg flag is
+** set and the opcode is changed from TK_FUNCTION to TK_AGG_FUNCTION.
+** If an expression contains aggregate functions then the EP_Agg
+** property on the expression is set.
+**
+** An error message is left in pParse if anything is amiss.  The number
+** if errors is returned.
+*/
+SQLITE_PRIVATE int sqlite3ResolveExprNames( 
+  NameContext *pNC,       /* Namespace to resolve expressions in. */
+  Expr *pExpr             /* The expression to be analyzed. */
+){
+  u16 savedHasAgg;
+  Walker w;
+
+  if( pExpr==0 ) return 0;
+#if SQLITE_MAX_EXPR_DEPTH>0
+  {
+    Parse *pParse = pNC->pParse;
+    if( sqlite3ExprCheckHeight(pParse, pExpr->nHeight+pNC->pParse->nHeight) ){
+      return 1;
+    }
+    pParse->nHeight += pExpr->nHeight;
+  }
+#endif
+  savedHasAgg = pNC->ncFlags & (NC_HasAgg|NC_MinMaxAgg);
+  pNC->ncFlags &= ~(NC_HasAgg|NC_MinMaxAgg);
+  memset(&w, 0, sizeof(w));
+  w.xExprCallback = resolveExprStep;
+  w.xSelectCallback = resolveSelectStep;
+  w.pParse = pNC->pParse;
+  w.u.pNC = pNC;
+  sqlite3WalkExpr(&w, pExpr);
+#if SQLITE_MAX_EXPR_DEPTH>0
+  pNC->pParse->nHeight -= pExpr->nHeight;
+#endif
+  if( pNC->nErr>0 || w.pParse->nErr>0 ){
+    ExprSetProperty(pExpr, EP_Error);
+  }
+  if( pNC->ncFlags & NC_HasAgg ){
+    ExprSetProperty(pExpr, EP_Agg);
+  }
+  pNC->ncFlags |= savedHasAgg;
+  return ExprHasProperty(pExpr, EP_Error);
+}
+
+
+/*
+** Resolve all names in all expressions of a SELECT and in all
+** decendents of the SELECT, including compounds off of p->pPrior,
+** subqueries in expressions, and subqueries used as FROM clause
+** terms.
+**
+** See sqlite3ResolveExprNames() for a description of the kinds of
+** transformations that occur.
+**
+** All SELECT statements should have been expanded using
+** sqlite3SelectExpand() prior to invoking this routine.
+*/
+SQLITE_PRIVATE void sqlite3ResolveSelectNames(
+  Parse *pParse,         /* The parser context */
+  Select *p,             /* The SELECT statement being coded. */
+  NameContext *pOuterNC  /* Name context for parent SELECT statement */
+){
+  Walker w;
+
+  assert( p!=0 );
+  memset(&w, 0, sizeof(w));
+  w.xExprCallback = resolveExprStep;
+  w.xSelectCallback = resolveSelectStep;
+  w.pParse = pParse;
+  w.u.pNC = pOuterNC;
+  sqlite3WalkSelect(&w, p);
+}
+
+/*
+** Resolve names in expressions that can only reference a single table:
+**
+**    *   CHECK constraints
+**    *   WHERE clauses on partial indices
+**
+** The Expr.iTable value for Expr.op==TK_COLUMN nodes of the expression
+** is set to -1 and the Expr.iColumn value is set to the column number.
+**
+** Any errors cause an error message to be set in pParse.
+*/
+SQLITE_PRIVATE void sqlite3ResolveSelfReference(
+  Parse *pParse,      /* Parsing context */
+  Table *pTab,        /* The table being referenced */
+  int type,           /* NC_IsCheck or NC_PartIdx */
+  Expr *pExpr,        /* Expression to resolve.  May be NULL. */
+  ExprList *pList     /* Expression list to resolve.  May be NUL. */
+){
+  SrcList sSrc;                   /* Fake SrcList for pParse->pNewTable */
+  NameContext sNC;                /* Name context for pParse->pNewTable */
+  int i;                          /* Loop counter */
+
+  assert( type==NC_IsCheck || type==NC_PartIdx );
+  memset(&sNC, 0, sizeof(sNC));
+  memset(&sSrc, 0, sizeof(sSrc));
+  sSrc.nSrc = 1;
+  sSrc.a[0].zName = pTab->zName;
+  sSrc.a[0].pTab = pTab;
+  sSrc.a[0].iCursor = -1;
+  sNC.pParse = pParse;
+  sNC.pSrcList = &sSrc;
+  sNC.ncFlags = type;
+  if( sqlite3ResolveExprNames(&sNC, pExpr) ) return;
+  if( pList ){
+    for(i=0; i<pList->nExpr; i++){
+      if( sqlite3ResolveExprNames(&sNC, pList->a[i].pExpr) ){
+        return;
+      }
+    }
+  }
+}
+
+/************** End of resolve.c *********************************************/
+/************** Begin file expr.c ********************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains routines used for analyzing expressions and
+** for generating VDBE code that evaluates expressions in SQLite.
+*/
+
+/*
+** Return the 'affinity' of the expression pExpr if any.
+**
+** If pExpr is a column, a reference to a column via an 'AS' alias,
+** or a sub-select with a column as the return value, then the 
+** affinity of that column is returned. Otherwise, 0x00 is returned,
+** indicating no affinity for the expression.
+**
+** i.e. the WHERE clause expressions in the following statements all
+** have an affinity:
+**
+** CREATE TABLE t1(a);
+** SELECT * FROM t1 WHERE a;
+** SELECT a AS b FROM t1 WHERE b;
+** SELECT * FROM t1 WHERE (select a from t1);
+*/
+SQLITE_PRIVATE char sqlite3ExprAffinity(Expr *pExpr){
+  int op;
+  pExpr = sqlite3ExprSkipCollate(pExpr);
+  if( pExpr->flags & EP_Generic ) return 0;
+  op = pExpr->op;
+  if( op==TK_SELECT ){
+    assert( pExpr->flags&EP_xIsSelect );
+    return sqlite3ExprAffinity(pExpr->x.pSelect->pEList->a[0].pExpr);
+  }
+#ifndef SQLITE_OMIT_CAST
+  if( op==TK_CAST ){
+    assert( !ExprHasProperty(pExpr, EP_IntValue) );
+    return sqlite3AffinityType(pExpr->u.zToken, 0);
+  }
+#endif
+  if( (op==TK_AGG_COLUMN || op==TK_COLUMN || op==TK_REGISTER) 
+   && pExpr->pTab!=0
+  ){
+    /* op==TK_REGISTER && pExpr->pTab!=0 happens when pExpr was originally
+    ** a TK_COLUMN but was previously evaluated and cached in a register */
+    int j = pExpr->iColumn;
+    if( j<0 ) return SQLITE_AFF_INTEGER;
+    assert( pExpr->pTab && j<pExpr->pTab->nCol );
+    return pExpr->pTab->aCol[j].affinity;
+  }
+  return pExpr->affinity;
+}
+
+/*
+** Set the collating sequence for expression pExpr to be the collating
+** sequence named by pToken.   Return a pointer to a new Expr node that
+** implements the COLLATE operator.
+**
+** If a memory allocation error occurs, that fact is recorded in pParse->db
+** and the pExpr parameter is returned unchanged.
+*/
+SQLITE_PRIVATE Expr *sqlite3ExprAddCollateToken(
+  Parse *pParse,           /* Parsing context */
+  Expr *pExpr,             /* Add the "COLLATE" clause to this expression */
+  const Token *pCollName   /* Name of collating sequence */
+){
+  if( pCollName->n>0 ){
+    Expr *pNew = sqlite3ExprAlloc(pParse->db, TK_COLLATE, pCollName, 1);
+    if( pNew ){
+      pNew->pLeft = pExpr;
+      pNew->flags |= EP_Collate|EP_Skip;
+      pExpr = pNew;
+    }
+  }
+  return pExpr;
+}
+SQLITE_PRIVATE Expr *sqlite3ExprAddCollateString(Parse *pParse, Expr *pExpr, const char *zC){
+  Token s;
+  assert( zC!=0 );
+  s.z = zC;
+  s.n = sqlite3Strlen30(s.z);
+  return sqlite3ExprAddCollateToken(pParse, pExpr, &s);
+}
+
+/*
+** Skip over any TK_COLLATE or TK_AS operators and any unlikely()
+** or likelihood() function at the root of an expression.
+*/
+SQLITE_PRIVATE Expr *sqlite3ExprSkipCollate(Expr *pExpr){
+  while( pExpr && ExprHasProperty(pExpr, EP_Skip) ){
+    if( ExprHasProperty(pExpr, EP_Unlikely) ){
+      assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
+      assert( pExpr->x.pList->nExpr>0 );
+      assert( pExpr->op==TK_FUNCTION );
+      pExpr = pExpr->x.pList->a[0].pExpr;
+    }else{
+      assert( pExpr->op==TK_COLLATE || pExpr->op==TK_AS );
+      pExpr = pExpr->pLeft;
+    }
+  }   
+  return pExpr;
+}
+
+/*
+** Return the collation sequence for the expression pExpr. If
+** there is no defined collating sequence, return NULL.
+**
+** The collating sequence might be determined by a COLLATE operator
+** or by the presence of a column with a defined collating sequence.
+** COLLATE operators take first precedence.  Left operands take
+** precedence over right operands.
+*/
+SQLITE_PRIVATE CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr){
+  sqlite3 *db = pParse->db;
+  CollSeq *pColl = 0;
+  Expr *p = pExpr;
+  while( p ){
+    int op = p->op;
+    if( p->flags & EP_Generic ) break;
+    if( op==TK_CAST || op==TK_UPLUS ){
+      p = p->pLeft;
+      continue;
+    }
+    if( op==TK_COLLATE || (op==TK_REGISTER && p->op2==TK_COLLATE) ){
+      pColl = sqlite3GetCollSeq(pParse, ENC(db), 0, p->u.zToken);
+      break;
+    }
+    if( p->pTab!=0
+     && (op==TK_AGG_COLUMN || op==TK_COLUMN
+          || op==TK_REGISTER || op==TK_TRIGGER)
+    ){
+      /* op==TK_REGISTER && p->pTab!=0 happens when pExpr was originally
+      ** a TK_COLUMN but was previously evaluated and cached in a register */
+      int j = p->iColumn;
+      if( j>=0 ){
+        const char *zColl = p->pTab->aCol[j].zColl;
+        pColl = sqlite3FindCollSeq(db, ENC(db), zColl, 0);
+      }
+      break;
+    }
+    if( p->flags & EP_Collate ){
+      if( ALWAYS(p->pLeft) && (p->pLeft->flags & EP_Collate)!=0 ){
+        p = p->pLeft;
+      }else{
+        p = p->pRight;
+      }
+    }else{
+      break;
+    }
+  }
+  if( sqlite3CheckCollSeq(pParse, pColl) ){ 
+    pColl = 0;
+  }
+  return pColl;
+}
+
+/*
+** pExpr is an operand of a comparison operator.  aff2 is the
+** type affinity of the other operand.  This routine returns the
+** type affinity that should be used for the comparison operator.
+*/
+SQLITE_PRIVATE char sqlite3CompareAffinity(Expr *pExpr, char aff2){
+  char aff1 = sqlite3ExprAffinity(pExpr);
+  if( aff1 && aff2 ){
+    /* Both sides of the comparison are columns. If one has numeric
+    ** affinity, use that. Otherwise use no affinity.
+    */
+    if( sqlite3IsNumericAffinity(aff1) || sqlite3IsNumericAffinity(aff2) ){
+      return SQLITE_AFF_NUMERIC;
+    }else{
+      return SQLITE_AFF_NONE;
+    }
+  }else if( !aff1 && !aff2 ){
+    /* Neither side of the comparison is a column.  Compare the
+    ** results directly.
+    */
+    return SQLITE_AFF_NONE;
+  }else{
+    /* One side is a column, the other is not. Use the columns affinity. */
+    assert( aff1==0 || aff2==0 );
+    return (aff1 + aff2);
+  }
+}
+
+/*
+** pExpr is a comparison operator.  Return the type affinity that should
+** be applied to both operands prior to doing the comparison.
+*/
+static char comparisonAffinity(Expr *pExpr){
+  char aff;
+  assert( pExpr->op==TK_EQ || pExpr->op==TK_IN || pExpr->op==TK_LT ||
+          pExpr->op==TK_GT || pExpr->op==TK_GE || pExpr->op==TK_LE ||
+          pExpr->op==TK_NE || pExpr->op==TK_IS || pExpr->op==TK_ISNOT );
+  assert( pExpr->pLeft );
+  aff = sqlite3ExprAffinity(pExpr->pLeft);
+  if( pExpr->pRight ){
+    aff = sqlite3CompareAffinity(pExpr->pRight, aff);
+  }else if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+    aff = sqlite3CompareAffinity(pExpr->x.pSelect->pEList->a[0].pExpr, aff);
+  }else if( !aff ){
+    aff = SQLITE_AFF_NONE;
+  }
+  return aff;
+}
+
+/*
+** pExpr is a comparison expression, eg. '=', '<', IN(...) etc.
+** idx_affinity is the affinity of an indexed column. Return true
+** if the index with affinity idx_affinity may be used to implement
+** the comparison in pExpr.
+*/
+SQLITE_PRIVATE int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity){
+  char aff = comparisonAffinity(pExpr);
+  switch( aff ){
+    case SQLITE_AFF_NONE:
+      return 1;
+    case SQLITE_AFF_TEXT:
+      return idx_affinity==SQLITE_AFF_TEXT;
+    default:
+      return sqlite3IsNumericAffinity(idx_affinity);
+  }
+}
+
+/*
+** Return the P5 value that should be used for a binary comparison
+** opcode (OP_Eq, OP_Ge etc.) used to compare pExpr1 and pExpr2.
+*/
+static u8 binaryCompareP5(Expr *pExpr1, Expr *pExpr2, int jumpIfNull){
+  u8 aff = (char)sqlite3ExprAffinity(pExpr2);
+  aff = (u8)sqlite3CompareAffinity(pExpr1, aff) | (u8)jumpIfNull;
+  return aff;
+}
+
+/*
+** Return a pointer to the collation sequence that should be used by
+** a binary comparison operator comparing pLeft and pRight.
+**
+** If the left hand expression has a collating sequence type, then it is
+** used. Otherwise the collation sequence for the right hand expression
+** is used, or the default (BINARY) if neither expression has a collating
+** type.
+**
+** Argument pRight (but not pLeft) may be a null pointer. In this case,
+** it is not considered.
+*/
+SQLITE_PRIVATE CollSeq *sqlite3BinaryCompareCollSeq(
+  Parse *pParse, 
+  Expr *pLeft, 
+  Expr *pRight
+){
+  CollSeq *pColl;
+  assert( pLeft );
+  if( pLeft->flags & EP_Collate ){
+    pColl = sqlite3ExprCollSeq(pParse, pLeft);
+  }else if( pRight && (pRight->flags & EP_Collate)!=0 ){
+    pColl = sqlite3ExprCollSeq(pParse, pRight);
+  }else{
+    pColl = sqlite3ExprCollSeq(pParse, pLeft);
+    if( !pColl ){
+      pColl = sqlite3ExprCollSeq(pParse, pRight);
+    }
+  }
+  return pColl;
+}
+
+/*
+** Generate code for a comparison operator.
+*/
+static int codeCompare(
+  Parse *pParse,    /* The parsing (and code generating) context */
+  Expr *pLeft,      /* The left operand */
+  Expr *pRight,     /* The right operand */
+  int opcode,       /* The comparison opcode */
+  int in1, int in2, /* Register holding operands */
+  int dest,         /* Jump here if true.  */
+  int jumpIfNull    /* If true, jump if either operand is NULL */
+){
+  int p5;
+  int addr;
+  CollSeq *p4;
+
+  p4 = sqlite3BinaryCompareCollSeq(pParse, pLeft, pRight);
+  p5 = binaryCompareP5(pLeft, pRight, jumpIfNull);
+  addr = sqlite3VdbeAddOp4(pParse->pVdbe, opcode, in2, dest, in1,
+                           (void*)p4, P4_COLLSEQ);
+  sqlite3VdbeChangeP5(pParse->pVdbe, (u8)p5);
+  return addr;
+}
+
+#if SQLITE_MAX_EXPR_DEPTH>0
+/*
+** Check that argument nHeight is less than or equal to the maximum
+** expression depth allowed. If it is not, leave an error message in
+** pParse.
+*/
+SQLITE_PRIVATE int sqlite3ExprCheckHeight(Parse *pParse, int nHeight){
+  int rc = SQLITE_OK;
+  int mxHeight = pParse->db->aLimit[SQLITE_LIMIT_EXPR_DEPTH];
+  if( nHeight>mxHeight ){
+    sqlite3ErrorMsg(pParse, 
+       "Expression tree is too large (maximum depth %d)", mxHeight
+    );
+    rc = SQLITE_ERROR;
+  }
+  return rc;
+}
+
+/* The following three functions, heightOfExpr(), heightOfExprList()
+** and heightOfSelect(), are used to determine the maximum height
+** of any expression tree referenced by the structure passed as the
+** first argument.
+**
+** If this maximum height is greater than the current value pointed
+** to by pnHeight, the second parameter, then set *pnHeight to that
+** value.
+*/
+static void heightOfExpr(Expr *p, int *pnHeight){
+  if( p ){
+    if( p->nHeight>*pnHeight ){
+      *pnHeight = p->nHeight;
+    }
+  }
+}
+static void heightOfExprList(ExprList *p, int *pnHeight){
+  if( p ){
+    int i;
+    for(i=0; i<p->nExpr; i++){
+      heightOfExpr(p->a[i].pExpr, pnHeight);
+    }
+  }
+}
+static void heightOfSelect(Select *p, int *pnHeight){
+  if( p ){
+    heightOfExpr(p->pWhere, pnHeight);
+    heightOfExpr(p->pHaving, pnHeight);
+    heightOfExpr(p->pLimit, pnHeight);
+    heightOfExpr(p->pOffset, pnHeight);
+    heightOfExprList(p->pEList, pnHeight);
+    heightOfExprList(p->pGroupBy, pnHeight);
+    heightOfExprList(p->pOrderBy, pnHeight);
+    heightOfSelect(p->pPrior, pnHeight);
+  }
+}
+
+/*
+** Set the Expr.nHeight variable in the structure passed as an 
+** argument. An expression with no children, Expr.pList or 
+** Expr.pSelect member has a height of 1. Any other expression
+** has a height equal to the maximum height of any other 
+** referenced Expr plus one.
+*/
+static void exprSetHeight(Expr *p){
+  int nHeight = 0;
+  heightOfExpr(p->pLeft, &nHeight);
+  heightOfExpr(p->pRight, &nHeight);
+  if( ExprHasProperty(p, EP_xIsSelect) ){
+    heightOfSelect(p->x.pSelect, &nHeight);
+  }else{
+    heightOfExprList(p->x.pList, &nHeight);
+  }
+  p->nHeight = nHeight + 1;
+}
+
+/*
+** Set the Expr.nHeight variable using the exprSetHeight() function. If
+** the height is greater than the maximum allowed expression depth,
+** leave an error in pParse.
+*/
+SQLITE_PRIVATE void sqlite3ExprSetHeight(Parse *pParse, Expr *p){
+  exprSetHeight(p);
+  sqlite3ExprCheckHeight(pParse, p->nHeight);
+}
+
+/*
+** Return the maximum height of any expression tree referenced
+** by the select statement passed as an argument.
+*/
+SQLITE_PRIVATE int sqlite3SelectExprHeight(Select *p){
+  int nHeight = 0;
+  heightOfSelect(p, &nHeight);
+  return nHeight;
+}
+#else
+  #define exprSetHeight(y)
+#endif /* SQLITE_MAX_EXPR_DEPTH>0 */
+
+/*
+** This routine is the core allocator for Expr nodes.
+**
+** Construct a new expression node and return a pointer to it.  Memory
+** for this node and for the pToken argument is a single allocation
+** obtained from sqlite3DbMalloc().  The calling function
+** is responsible for making sure the node eventually gets freed.
+**
+** If dequote is true, then the token (if it exists) is dequoted.
+** If dequote is false, no dequoting is performance.  The deQuote
+** parameter is ignored if pToken is NULL or if the token does not
+** appear to be quoted.  If the quotes were of the form "..." (double-quotes)
+** then the EP_DblQuoted flag is set on the expression node.
+**
+** Special case:  If op==TK_INTEGER and pToken points to a string that
+** can be translated into a 32-bit integer, then the token is not
+** stored in u.zToken.  Instead, the integer values is written
+** into u.iValue and the EP_IntValue flag is set.  No extra storage
+** is allocated to hold the integer text and the dequote flag is ignored.
+*/
+SQLITE_PRIVATE Expr *sqlite3ExprAlloc(
+  sqlite3 *db,            /* Handle for sqlite3DbMallocZero() (may be null) */
+  int op,                 /* Expression opcode */
+  const Token *pToken,    /* Token argument.  Might be NULL */
+  int dequote             /* True to dequote */
+){
+  Expr *pNew;
+  int nExtra = 0;
+  int iValue = 0;
+
+  if( pToken ){
+    if( op!=TK_INTEGER || pToken->z==0
+          || sqlite3GetInt32(pToken->z, &iValue)==0 ){
+      nExtra = pToken->n+1;
+      assert( iValue>=0 );
+    }
+  }
+  pNew = sqlite3DbMallocZero(db, sizeof(Expr)+nExtra);
+  if( pNew ){
+    pNew->op = (u8)op;
+    pNew->iAgg = -1;
+    if( pToken ){
+      if( nExtra==0 ){
+        pNew->flags |= EP_IntValue;
+        pNew->u.iValue = iValue;
+      }else{
+        int c;
+        pNew->u.zToken = (char*)&pNew[1];
+        assert( pToken->z!=0 || pToken->n==0 );
+        if( pToken->n ) memcpy(pNew->u.zToken, pToken->z, pToken->n);
+        pNew->u.zToken[pToken->n] = 0;
+        if( dequote && nExtra>=3 
+             && ((c = pToken->z[0])=='\'' || c=='"' || c=='[' || c=='`') ){
+          sqlite3Dequote(pNew->u.zToken);
+          if( c=='"' ) pNew->flags |= EP_DblQuoted;
+        }
+      }
+    }
+#if SQLITE_MAX_EXPR_DEPTH>0
+    pNew->nHeight = 1;
+#endif  
+  }
+  return pNew;
+}
+
+/*
+** Allocate a new expression node from a zero-terminated token that has
+** already been dequoted.
+*/
+SQLITE_PRIVATE Expr *sqlite3Expr(
+  sqlite3 *db,            /* Handle for sqlite3DbMallocZero() (may be null) */
+  int op,                 /* Expression opcode */
+  const char *zToken      /* Token argument.  Might be NULL */
+){
+  Token x;
+  x.z = zToken;
+  x.n = zToken ? sqlite3Strlen30(zToken) : 0;
+  return sqlite3ExprAlloc(db, op, &x, 0);
+}
+
+/*
+** Attach subtrees pLeft and pRight to the Expr node pRoot.
+**
+** If pRoot==NULL that means that a memory allocation error has occurred.
+** In that case, delete the subtrees pLeft and pRight.
+*/
+SQLITE_PRIVATE void sqlite3ExprAttachSubtrees(
+  sqlite3 *db,
+  Expr *pRoot,
+  Expr *pLeft,
+  Expr *pRight
+){
+  if( pRoot==0 ){
+    assert( db->mallocFailed );
+    sqlite3ExprDelete(db, pLeft);
+    sqlite3ExprDelete(db, pRight);
+  }else{
+    if( pRight ){
+      pRoot->pRight = pRight;
+      pRoot->flags |= EP_Collate & pRight->flags;
+    }
+    if( pLeft ){
+      pRoot->pLeft = pLeft;
+      pRoot->flags |= EP_Collate & pLeft->flags;
+    }
+    exprSetHeight(pRoot);
+  }
+}
+
+/*
+** Allocate an Expr node which joins as many as two subtrees.
+**
+** One or both of the subtrees can be NULL.  Return a pointer to the new
+** Expr node.  Or, if an OOM error occurs, set pParse->db->mallocFailed,
+** free the subtrees and return NULL.
+*/
+SQLITE_PRIVATE Expr *sqlite3PExpr(
+  Parse *pParse,          /* Parsing context */
+  int op,                 /* Expression opcode */
+  Expr *pLeft,            /* Left operand */
+  Expr *pRight,           /* Right operand */
+  const Token *pToken     /* Argument token */
+){
+  Expr *p;
+  if( op==TK_AND && pLeft && pRight && pParse->nErr==0 ){
+    /* Take advantage of short-circuit false optimization for AND */
+    p = sqlite3ExprAnd(pParse->db, pLeft, pRight);
+  }else{
+    p = sqlite3ExprAlloc(pParse->db, op, pToken, 1);
+    sqlite3ExprAttachSubtrees(pParse->db, p, pLeft, pRight);
+  }
+  if( p ) {
+    sqlite3ExprCheckHeight(pParse, p->nHeight);
+  }
+  return p;
+}
+
+/*
+** If the expression is always either TRUE or FALSE (respectively),
+** then return 1.  If one cannot determine the truth value of the
+** expression at compile-time return 0.
+**
+** This is an optimization.  If is OK to return 0 here even if
+** the expression really is always false or false (a false negative).
+** But it is a bug to return 1 if the expression might have different
+** boolean values in different circumstances (a false positive.)
+**
+** Note that if the expression is part of conditional for a
+** LEFT JOIN, then we cannot determine at compile-time whether or not
+** is it true or false, so always return 0.
+*/
+static int exprAlwaysTrue(Expr *p){
+  int v = 0;
+  if( ExprHasProperty(p, EP_FromJoin) ) return 0;
+  if( !sqlite3ExprIsInteger(p, &v) ) return 0;
+  return v!=0;
+}
+static int exprAlwaysFalse(Expr *p){
+  int v = 0;
+  if( ExprHasProperty(p, EP_FromJoin) ) return 0;
+  if( !sqlite3ExprIsInteger(p, &v) ) return 0;
+  return v==0;
+}
+
+/*
+** Join two expressions using an AND operator.  If either expression is
+** NULL, then just return the other expression.
+**
+** If one side or the other of the AND is known to be false, then instead
+** of returning an AND expression, just return a constant expression with
+** a value of false.
+*/
+SQLITE_PRIVATE Expr *sqlite3ExprAnd(sqlite3 *db, Expr *pLeft, Expr *pRight){
+  if( pLeft==0 ){
+    return pRight;
+  }else if( pRight==0 ){
+    return pLeft;
+  }else if( exprAlwaysFalse(pLeft) || exprAlwaysFalse(pRight) ){
+    sqlite3ExprDelete(db, pLeft);
+    sqlite3ExprDelete(db, pRight);
+    return sqlite3ExprAlloc(db, TK_INTEGER, &sqlite3IntTokens[0], 0);
+  }else{
+    Expr *pNew = sqlite3ExprAlloc(db, TK_AND, 0, 0);
+    sqlite3ExprAttachSubtrees(db, pNew, pLeft, pRight);
+    return pNew;
+  }
+}
+
+/*
+** Construct a new expression node for a function with multiple
+** arguments.
+*/
+SQLITE_PRIVATE Expr *sqlite3ExprFunction(Parse *pParse, ExprList *pList, Token *pToken){
+  Expr *pNew;
+  sqlite3 *db = pParse->db;
+  assert( pToken );
+  pNew = sqlite3ExprAlloc(db, TK_FUNCTION, pToken, 1);
+  if( pNew==0 ){
+    sqlite3ExprListDelete(db, pList); /* Avoid memory leak when malloc fails */
+    return 0;
+  }
+  pNew->x.pList = pList;
+  assert( !ExprHasProperty(pNew, EP_xIsSelect) );
+  sqlite3ExprSetHeight(pParse, pNew);
+  return pNew;
+}
+
+/*
+** Assign a variable number to an expression that encodes a wildcard
+** in the original SQL statement.  
+**
+** Wildcards consisting of a single "?" are assigned the next sequential
+** variable number.
+**
+** Wildcards of the form "?nnn" are assigned the number "nnn".  We make
+** sure "nnn" is not too be to avoid a denial of service attack when
+** the SQL statement comes from an external source.
+**
+** Wildcards of the form ":aaa", "@aaa", or "$aaa" are assigned the same number
+** as the previous instance of the same wildcard.  Or if this is the first
+** instance of the wildcard, the next sequential variable number is
+** assigned.
+*/
+SQLITE_PRIVATE void sqlite3ExprAssignVarNumber(Parse *pParse, Expr *pExpr){
+  sqlite3 *db = pParse->db;
+  const char *z;
+
+  if( pExpr==0 ) return;
+  assert( !ExprHasProperty(pExpr, EP_IntValue|EP_Reduced|EP_TokenOnly) );
+  z = pExpr->u.zToken;
+  assert( z!=0 );
+  assert( z[0]!=0 );
+  if( z[1]==0 ){
+    /* Wildcard of the form "?".  Assign the next variable number */
+    assert( z[0]=='?' );
+    pExpr->iColumn = (ynVar)(++pParse->nVar);
+  }else{
+    ynVar x = 0;
+    u32 n = sqlite3Strlen30(z);
+    if( z[0]=='?' ){
+      /* Wildcard of the form "?nnn".  Convert "nnn" to an integer and
+      ** use it as the variable number */
+      i64 i;
+      int bOk = 0==sqlite3Atoi64(&z[1], &i, n-1, SQLITE_UTF8);
+      pExpr->iColumn = x = (ynVar)i;
+      testcase( i==0 );
+      testcase( i==1 );
+      testcase( i==db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER]-1 );
+      testcase( i==db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER] );
+      if( bOk==0 || i<1 || i>db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER] ){
+        sqlite3ErrorMsg(pParse, "variable number must be between ?1 and ?%d",
+            db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER]);
+        x = 0;
+      }
+      if( i>pParse->nVar ){
+        pParse->nVar = (int)i;
+      }
+    }else{
+      /* Wildcards like ":aaa", "$aaa" or "@aaa".  Reuse the same variable
+      ** number as the prior appearance of the same name, or if the name
+      ** has never appeared before, reuse the same variable number
+      */
+      ynVar i;
+      for(i=0; i<pParse->nzVar; i++){
+        if( pParse->azVar[i] && strcmp(pParse->azVar[i],z)==0 ){
+          pExpr->iColumn = x = (ynVar)i+1;
+          break;
+        }
+      }
+      if( x==0 ) x = pExpr->iColumn = (ynVar)(++pParse->nVar);
+    }
+    if( x>0 ){
+      if( x>pParse->nzVar ){
+        char **a;
+        a = sqlite3DbRealloc(db, pParse->azVar, x*sizeof(a[0]));
+        if( a==0 ) return;  /* Error reported through db->mallocFailed */
+        pParse->azVar = a;
+        memset(&a[pParse->nzVar], 0, (x-pParse->nzVar)*sizeof(a[0]));
+        pParse->nzVar = x;
+      }
+      if( z[0]!='?' || pParse->azVar[x-1]==0 ){
+        sqlite3DbFree(db, pParse->azVar[x-1]);
+        pParse->azVar[x-1] = sqlite3DbStrNDup(db, z, n);
+      }
+    }
+  } 
+  if( !pParse->nErr && pParse->nVar>db->aLimit[SQLITE_LIMIT_VARIABLE_NUMBER] ){
+    sqlite3ErrorMsg(pParse, "too many SQL variables");
+  }
+}
+
+/*
+** Recursively delete an expression tree.
+*/
+SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3 *db, Expr *p){
+  if( p==0 ) return;
+  /* Sanity check: Assert that the IntValue is non-negative if it exists */
+  assert( !ExprHasProperty(p, EP_IntValue) || p->u.iValue>=0 );
+  if( !ExprHasProperty(p, EP_TokenOnly) ){
+    /* The Expr.x union is never used at the same time as Expr.pRight */
+    assert( p->x.pList==0 || p->pRight==0 );
+    sqlite3ExprDelete(db, p->pLeft);
+    sqlite3ExprDelete(db, p->pRight);
+    if( ExprHasProperty(p, EP_MemToken) ) sqlite3DbFree(db, p->u.zToken);
+    if( ExprHasProperty(p, EP_xIsSelect) ){
+      sqlite3SelectDelete(db, p->x.pSelect);
+    }else{
+      sqlite3ExprListDelete(db, p->x.pList);
+    }
+  }
+  if( !ExprHasProperty(p, EP_Static) ){
+    sqlite3DbFree(db, p);
+  }
+}
+
+/*
+** Return the number of bytes allocated for the expression structure 
+** passed as the first argument. This is always one of EXPR_FULLSIZE,
+** EXPR_REDUCEDSIZE or EXPR_TOKENONLYSIZE.
+*/
+static int exprStructSize(Expr *p){
+  if( ExprHasProperty(p, EP_TokenOnly) ) return EXPR_TOKENONLYSIZE;
+  if( ExprHasProperty(p, EP_Reduced) ) return EXPR_REDUCEDSIZE;
+  return EXPR_FULLSIZE;
+}
+
+/*
+** The dupedExpr*Size() routines each return the number of bytes required
+** to store a copy of an expression or expression tree.  They differ in
+** how much of the tree is measured.
+**
+**     dupedExprStructSize()     Size of only the Expr structure 
+**     dupedExprNodeSize()       Size of Expr + space for token
+**     dupedExprSize()           Expr + token + subtree components
+**
+***************************************************************************
+**
+** The dupedExprStructSize() function returns two values OR-ed together:  
+** (1) the space required for a copy of the Expr structure only and 
+** (2) the EP_xxx flags that indicate what the structure size should be.
+** The return values is always one of:
+**
+**      EXPR_FULLSIZE
+**      EXPR_REDUCEDSIZE   | EP_Reduced
+**      EXPR_TOKENONLYSIZE | EP_TokenOnly
+**
+** The size of the structure can be found by masking the return value
+** of this routine with 0xfff.  The flags can be found by masking the
+** return value with EP_Reduced|EP_TokenOnly.
+**
+** Note that with flags==EXPRDUP_REDUCE, this routines works on full-size
+** (unreduced) Expr objects as they or originally constructed by the parser.
+** During expression analysis, extra information is computed and moved into
+** later parts of teh Expr object and that extra information might get chopped
+** off if the expression is reduced.  Note also that it does not work to
+** make an EXPRDUP_REDUCE copy of a reduced expression.  It is only legal
+** to reduce a pristine expression tree from the parser.  The implementation
+** of dupedExprStructSize() contain multiple assert() statements that attempt
+** to enforce this constraint.
+*/
+static int dupedExprStructSize(Expr *p, int flags){
+  int nSize;
+  assert( flags==EXPRDUP_REDUCE || flags==0 ); /* Only one flag value allowed */
+  assert( EXPR_FULLSIZE<=0xfff );
+  assert( (0xfff & (EP_Reduced|EP_TokenOnly))==0 );
+  if( 0==(flags&EXPRDUP_REDUCE) ){
+    nSize = EXPR_FULLSIZE;
+  }else{
+    assert( !ExprHasProperty(p, EP_TokenOnly|EP_Reduced) );
+    assert( !ExprHasProperty(p, EP_FromJoin) ); 
+    assert( !ExprHasProperty(p, EP_MemToken) );
+    assert( !ExprHasProperty(p, EP_NoReduce) );
+    if( p->pLeft || p->x.pList ){
+      nSize = EXPR_REDUCEDSIZE | EP_Reduced;
+    }else{
+      assert( p->pRight==0 );
+      nSize = EXPR_TOKENONLYSIZE | EP_TokenOnly;
+    }
+  }
+  return nSize;
+}
+
+/*
+** This function returns the space in bytes required to store the copy 
+** of the Expr structure and a copy of the Expr.u.zToken string (if that
+** string is defined.)
+*/
+static int dupedExprNodeSize(Expr *p, int flags){
+  int nByte = dupedExprStructSize(p, flags) & 0xfff;
+  if( !ExprHasProperty(p, EP_IntValue) && p->u.zToken ){
+    nByte += sqlite3Strlen30(p->u.zToken)+1;
+  }
+  return ROUND8(nByte);
+}
+
+/*
+** Return the number of bytes required to create a duplicate of the 
+** expression passed as the first argument. The second argument is a
+** mask containing EXPRDUP_XXX flags.
+**
+** The value returned includes space to create a copy of the Expr struct
+** itself and the buffer referred to by Expr.u.zToken, if any.
+**
+** If the EXPRDUP_REDUCE flag is set, then the return value includes 
+** space to duplicate all Expr nodes in the tree formed by Expr.pLeft 
+** and Expr.pRight variables (but not for any structures pointed to or 
+** descended from the Expr.x.pList or Expr.x.pSelect variables).
+*/
+static int dupedExprSize(Expr *p, int flags){
+  int nByte = 0;
+  if( p ){
+    nByte = dupedExprNodeSize(p, flags);
+    if( flags&EXPRDUP_REDUCE ){
+      nByte += dupedExprSize(p->pLeft, flags) + dupedExprSize(p->pRight, flags);
+    }
+  }
+  return nByte;
+}
+
+/*
+** This function is similar to sqlite3ExprDup(), except that if pzBuffer 
+** is not NULL then *pzBuffer is assumed to point to a buffer large enough 
+** to store the copy of expression p, the copies of p->u.zToken
+** (if applicable), and the copies of the p->pLeft and p->pRight expressions,
+** if any. Before returning, *pzBuffer is set to the first byte past the
+** portion of the buffer copied into by this function.
+*/
+static Expr *exprDup(sqlite3 *db, Expr *p, int flags, u8 **pzBuffer){
+  Expr *pNew = 0;                      /* Value to return */
+  if( p ){
+    const int isReduced = (flags&EXPRDUP_REDUCE);
+    u8 *zAlloc;
+    u32 staticFlag = 0;
+
+    assert( pzBuffer==0 || isReduced );
+
+    /* Figure out where to write the new Expr structure. */
+    if( pzBuffer ){
+      zAlloc = *pzBuffer;
+      staticFlag = EP_Static;
+    }else{
+      zAlloc = sqlite3DbMallocRaw(db, dupedExprSize(p, flags));
+    }
+    pNew = (Expr *)zAlloc;
+
+    if( pNew ){
+      /* Set nNewSize to the size allocated for the structure pointed to
+      ** by pNew. This is either EXPR_FULLSIZE, EXPR_REDUCEDSIZE or
+      ** EXPR_TOKENONLYSIZE. nToken is set to the number of bytes consumed
+      ** by the copy of the p->u.zToken string (if any).
+      */
+      const unsigned nStructSize = dupedExprStructSize(p, flags);
+      const int nNewSize = nStructSize & 0xfff;
+      int nToken;
+      if( !ExprHasProperty(p, EP_IntValue) && p->u.zToken ){
+        nToken = sqlite3Strlen30(p->u.zToken) + 1;
+      }else{
+        nToken = 0;
+      }
+      if( isReduced ){
+        assert( ExprHasProperty(p, EP_Reduced)==0 );
+        memcpy(zAlloc, p, nNewSize);
+      }else{
+        int nSize = exprStructSize(p);
+        memcpy(zAlloc, p, nSize);
+        memset(&zAlloc[nSize], 0, EXPR_FULLSIZE-nSize);
+      }
+
+      /* Set the EP_Reduced, EP_TokenOnly, and EP_Static flags appropriately. */
+      pNew->flags &= ~(EP_Reduced|EP_TokenOnly|EP_Static|EP_MemToken);
+      pNew->flags |= nStructSize & (EP_Reduced|EP_TokenOnly);
+      pNew->flags |= staticFlag;
+
+      /* Copy the p->u.zToken string, if any. */
+      if( nToken ){
+        char *zToken = pNew->u.zToken = (char*)&zAlloc[nNewSize];
+        memcpy(zToken, p->u.zToken, nToken);
+      }
+
+      if( 0==((p->flags|pNew->flags) & EP_TokenOnly) ){
+        /* Fill in the pNew->x.pSelect or pNew->x.pList member. */
+        if( ExprHasProperty(p, EP_xIsSelect) ){
+          pNew->x.pSelect = sqlite3SelectDup(db, p->x.pSelect, isReduced);
+        }else{
+          pNew->x.pList = sqlite3ExprListDup(db, p->x.pList, isReduced);
+        }
+      }
+
+      /* Fill in pNew->pLeft and pNew->pRight. */
+      if( ExprHasProperty(pNew, EP_Reduced|EP_TokenOnly) ){
+        zAlloc += dupedExprNodeSize(p, flags);
+        if( ExprHasProperty(pNew, EP_Reduced) ){
+          pNew->pLeft = exprDup(db, p->pLeft, EXPRDUP_REDUCE, &zAlloc);
+          pNew->pRight = exprDup(db, p->pRight, EXPRDUP_REDUCE, &zAlloc);
+        }
+        if( pzBuffer ){
+          *pzBuffer = zAlloc;
+        }
+      }else{
+        if( !ExprHasProperty(p, EP_TokenOnly) ){
+          pNew->pLeft = sqlite3ExprDup(db, p->pLeft, 0);
+          pNew->pRight = sqlite3ExprDup(db, p->pRight, 0);
+        }
+      }
+
+    }
+  }
+  return pNew;
+}
+
+/*
+** Create and return a deep copy of the object passed as the second 
+** argument. If an OOM condition is encountered, NULL is returned
+** and the db->mallocFailed flag set.
+*/
+#ifndef SQLITE_OMIT_CTE
+static With *withDup(sqlite3 *db, With *p){
+  With *pRet = 0;
+  if( p ){
+    int nByte = sizeof(*p) + sizeof(p->a[0]) * (p->nCte-1);
+    pRet = sqlite3DbMallocZero(db, nByte);
+    if( pRet ){
+      int i;
+      pRet->nCte = p->nCte;
+      for(i=0; i<p->nCte; i++){
+        pRet->a[i].pSelect = sqlite3SelectDup(db, p->a[i].pSelect, 0);
+        pRet->a[i].pCols = sqlite3ExprListDup(db, p->a[i].pCols, 0);
+        pRet->a[i].zName = sqlite3DbStrDup(db, p->a[i].zName);
+      }
+    }
+  }
+  return pRet;
+}
+#else
+# define withDup(x,y) 0
+#endif
+
+/*
+** The following group of routines make deep copies of expressions,
+** expression lists, ID lists, and select statements.  The copies can
+** be deleted (by being passed to their respective ...Delete() routines)
+** without effecting the originals.
+**
+** The expression list, ID, and source lists return by sqlite3ExprListDup(),
+** sqlite3IdListDup(), and sqlite3SrcListDup() can not be further expanded 
+** by subsequent calls to sqlite*ListAppend() routines.
+**
+** Any tables that the SrcList might point to are not duplicated.
+**
+** The flags parameter contains a combination of the EXPRDUP_XXX flags.
+** If the EXPRDUP_REDUCE flag is set, then the structure returned is a
+** truncated version of the usual Expr structure that will be stored as
+** part of the in-memory representation of the database schema.
+*/
+SQLITE_PRIVATE Expr *sqlite3ExprDup(sqlite3 *db, Expr *p, int flags){
+  return exprDup(db, p, flags, 0);
+}
+SQLITE_PRIVATE ExprList *sqlite3ExprListDup(sqlite3 *db, ExprList *p, int flags){
+  ExprList *pNew;
+  struct ExprList_item *pItem, *pOldItem;
+  int i;
+  if( p==0 ) return 0;
+  pNew = sqlite3DbMallocRaw(db, sizeof(*pNew) );
+  if( pNew==0 ) return 0;
+  pNew->nExpr = i = p->nExpr;
+  if( (flags & EXPRDUP_REDUCE)==0 ) for(i=1; i<p->nExpr; i+=i){}
+  pNew->a = pItem = sqlite3DbMallocRaw(db,  i*sizeof(p->a[0]) );
+  if( pItem==0 ){
+    sqlite3DbFree(db, pNew);
+    return 0;
+  } 
+  pOldItem = p->a;
+  for(i=0; i<p->nExpr; i++, pItem++, pOldItem++){
+    Expr *pOldExpr = pOldItem->pExpr;
+    pItem->pExpr = sqlite3ExprDup(db, pOldExpr, flags);
+    pItem->zName = sqlite3DbStrDup(db, pOldItem->zName);
+    pItem->zSpan = sqlite3DbStrDup(db, pOldItem->zSpan);
+    pItem->sortOrder = pOldItem->sortOrder;
+    pItem->done = 0;
+    pItem->bSpanIsTab = pOldItem->bSpanIsTab;
+    pItem->u = pOldItem->u;
+  }
+  return pNew;
+}
+
+/*
+** If cursors, triggers, views and subqueries are all omitted from
+** the build, then none of the following routines, except for 
+** sqlite3SelectDup(), can be called. sqlite3SelectDup() is sometimes
+** called with a NULL argument.
+*/
+#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_TRIGGER) \
+ || !defined(SQLITE_OMIT_SUBQUERY)
+SQLITE_PRIVATE SrcList *sqlite3SrcListDup(sqlite3 *db, SrcList *p, int flags){
+  SrcList *pNew;
+  int i;
+  int nByte;
+  if( p==0 ) return 0;
+  nByte = sizeof(*p) + (p->nSrc>0 ? sizeof(p->a[0]) * (p->nSrc-1) : 0);
+  pNew = sqlite3DbMallocRaw(db, nByte );
+  if( pNew==0 ) return 0;
+  pNew->nSrc = pNew->nAlloc = p->nSrc;
+  for(i=0; i<p->nSrc; i++){
+    struct SrcList_item *pNewItem = &pNew->a[i];
+    struct SrcList_item *pOldItem = &p->a[i];
+    Table *pTab;
+    pNewItem->pSchema = pOldItem->pSchema;
+    pNewItem->zDatabase = sqlite3DbStrDup(db, pOldItem->zDatabase);
+    pNewItem->zName = sqlite3DbStrDup(db, pOldItem->zName);
+    pNewItem->zAlias = sqlite3DbStrDup(db, pOldItem->zAlias);
+    pNewItem->jointype = pOldItem->jointype;
+    pNewItem->iCursor = pOldItem->iCursor;
+    pNewItem->addrFillSub = pOldItem->addrFillSub;
+    pNewItem->regReturn = pOldItem->regReturn;
+    pNewItem->isCorrelated = pOldItem->isCorrelated;
+    pNewItem->viaCoroutine = pOldItem->viaCoroutine;
+    pNewItem->isRecursive = pOldItem->isRecursive;
+    pNewItem->zIndex = sqlite3DbStrDup(db, pOldItem->zIndex);
+    pNewItem->notIndexed = pOldItem->notIndexed;
+    pNewItem->pIndex = pOldItem->pIndex;
+    pTab = pNewItem->pTab = pOldItem->pTab;
+    if( pTab ){
+      pTab->nRef++;
+    }
+    pNewItem->pSelect = sqlite3SelectDup(db, pOldItem->pSelect, flags);
+    pNewItem->pOn = sqlite3ExprDup(db, pOldItem->pOn, flags);
+    pNewItem->pUsing = sqlite3IdListDup(db, pOldItem->pUsing);
+    pNewItem->colUsed = pOldItem->colUsed;
+  }
+  return pNew;
+}
+SQLITE_PRIVATE IdList *sqlite3IdListDup(sqlite3 *db, IdList *p){
+  IdList *pNew;
+  int i;
+  if( p==0 ) return 0;
+  pNew = sqlite3DbMallocRaw(db, sizeof(*pNew) );
+  if( pNew==0 ) return 0;
+  pNew->nId = p->nId;
+  pNew->a = sqlite3DbMallocRaw(db, p->nId*sizeof(p->a[0]) );
+  if( pNew->a==0 ){
+    sqlite3DbFree(db, pNew);
+    return 0;
+  }
+  /* Note that because the size of the allocation for p->a[] is not
+  ** necessarily a power of two, sqlite3IdListAppend() may not be called
+  ** on the duplicate created by this function. */
+  for(i=0; i<p->nId; i++){
+    struct IdList_item *pNewItem = &pNew->a[i];
+    struct IdList_item *pOldItem = &p->a[i];
+    pNewItem->zName = sqlite3DbStrDup(db, pOldItem->zName);
+    pNewItem->idx = pOldItem->idx;
+  }
+  return pNew;
+}
+SQLITE_PRIVATE Select *sqlite3SelectDup(sqlite3 *db, Select *p, int flags){
+  Select *pNew, *pPrior;
+  if( p==0 ) return 0;
+  pNew = sqlite3DbMallocRaw(db, sizeof(*p) );
+  if( pNew==0 ) return 0;
+  pNew->pEList = sqlite3ExprListDup(db, p->pEList, flags);
+  pNew->pSrc = sqlite3SrcListDup(db, p->pSrc, flags);
+  pNew->pWhere = sqlite3ExprDup(db, p->pWhere, flags);
+  pNew->pGroupBy = sqlite3ExprListDup(db, p->pGroupBy, flags);
+  pNew->pHaving = sqlite3ExprDup(db, p->pHaving, flags);
+  pNew->pOrderBy = sqlite3ExprListDup(db, p->pOrderBy, flags);
+  pNew->op = p->op;
+  pNew->pPrior = pPrior = sqlite3SelectDup(db, p->pPrior, flags);
+  if( pPrior ) pPrior->pNext = pNew;
+  pNew->pNext = 0;
+  pNew->pLimit = sqlite3ExprDup(db, p->pLimit, flags);
+  pNew->pOffset = sqlite3ExprDup(db, p->pOffset, flags);
+  pNew->iLimit = 0;
+  pNew->iOffset = 0;
+  pNew->selFlags = p->selFlags & ~SF_UsesEphemeral;
+  pNew->addrOpenEphm[0] = -1;
+  pNew->addrOpenEphm[1] = -1;
+  pNew->nSelectRow = p->nSelectRow;
+  pNew->pWith = withDup(db, p->pWith);
+  sqlite3SelectSetName(pNew, p->zSelName);
+  return pNew;
+}
+#else
+SQLITE_PRIVATE Select *sqlite3SelectDup(sqlite3 *db, Select *p, int flags){
+  assert( p==0 );
+  return 0;
+}
+#endif
+
+
+/*
+** Add a new element to the end of an expression list.  If pList is
+** initially NULL, then create a new expression list.
+**
+** If a memory allocation error occurs, the entire list is freed and
+** NULL is returned.  If non-NULL is returned, then it is guaranteed
+** that the new entry was successfully appended.
+*/
+SQLITE_PRIVATE ExprList *sqlite3ExprListAppend(
+  Parse *pParse,          /* Parsing context */
+  ExprList *pList,        /* List to which to append. Might be NULL */
+  Expr *pExpr             /* Expression to be appended. Might be NULL */
+){
+  sqlite3 *db = pParse->db;
+  if( pList==0 ){
+    pList = sqlite3DbMallocZero(db, sizeof(ExprList) );
+    if( pList==0 ){
+      goto no_mem;
+    }
+    pList->a = sqlite3DbMallocRaw(db, sizeof(pList->a[0]));
+    if( pList->a==0 ) goto no_mem;
+  }else if( (pList->nExpr & (pList->nExpr-1))==0 ){
+    struct ExprList_item *a;
+    assert( pList->nExpr>0 );
+    a = sqlite3DbRealloc(db, pList->a, pList->nExpr*2*sizeof(pList->a[0]));
+    if( a==0 ){
+      goto no_mem;
+    }
+    pList->a = a;
+  }
+  assert( pList->a!=0 );
+  if( 1 ){
+    struct ExprList_item *pItem = &pList->a[pList->nExpr++];
+    memset(pItem, 0, sizeof(*pItem));
+    pItem->pExpr = pExpr;
+  }
+  return pList;
+
+no_mem:     
+  /* Avoid leaking memory if malloc has failed. */
+  sqlite3ExprDelete(db, pExpr);
+  sqlite3ExprListDelete(db, pList);
+  return 0;
+}
+
+/*
+** Set the ExprList.a[].zName element of the most recently added item
+** on the expression list.
+**
+** pList might be NULL following an OOM error.  But pName should never be
+** NULL.  If a memory allocation fails, the pParse->db->mallocFailed flag
+** is set.
+*/
+SQLITE_PRIVATE void sqlite3ExprListSetName(
+  Parse *pParse,          /* Parsing context */
+  ExprList *pList,        /* List to which to add the span. */
+  Token *pName,           /* Name to be added */
+  int dequote             /* True to cause the name to be dequoted */
+){
+  assert( pList!=0 || pParse->db->mallocFailed!=0 );
+  if( pList ){
+    struct ExprList_item *pItem;
+    assert( pList->nExpr>0 );
+    pItem = &pList->a[pList->nExpr-1];
+    assert( pItem->zName==0 );
+    pItem->zName = sqlite3DbStrNDup(pParse->db, pName->z, pName->n);
+    if( dequote && pItem->zName ) sqlite3Dequote(pItem->zName);
+  }
+}
+
+/*
+** Set the ExprList.a[].zSpan element of the most recently added item
+** on the expression list.
+**
+** pList might be NULL following an OOM error.  But pSpan should never be
+** NULL.  If a memory allocation fails, the pParse->db->mallocFailed flag
+** is set.
+*/
+SQLITE_PRIVATE void sqlite3ExprListSetSpan(
+  Parse *pParse,          /* Parsing context */
+  ExprList *pList,        /* List to which to add the span. */
+  ExprSpan *pSpan         /* The span to be added */
+){
+  sqlite3 *db = pParse->db;
+  assert( pList!=0 || db->mallocFailed!=0 );
+  if( pList ){
+    struct ExprList_item *pItem = &pList->a[pList->nExpr-1];
+    assert( pList->nExpr>0 );
+    assert( db->mallocFailed || pItem->pExpr==pSpan->pExpr );
+    sqlite3DbFree(db, pItem->zSpan);
+    pItem->zSpan = sqlite3DbStrNDup(db, (char*)pSpan->zStart,
+                                    (int)(pSpan->zEnd - pSpan->zStart));
+  }
+}
+
+/*
+** If the expression list pEList contains more than iLimit elements,
+** leave an error message in pParse.
+*/
+SQLITE_PRIVATE void sqlite3ExprListCheckLength(
+  Parse *pParse,
+  ExprList *pEList,
+  const char *zObject
+){
+  int mx = pParse->db->aLimit[SQLITE_LIMIT_COLUMN];
+  testcase( pEList && pEList->nExpr==mx );
+  testcase( pEList && pEList->nExpr==mx+1 );
+  if( pEList && pEList->nExpr>mx ){
+    sqlite3ErrorMsg(pParse, "too many columns in %s", zObject);
+  }
+}
+
+/*
+** Delete an entire expression list.
+*/
+SQLITE_PRIVATE void sqlite3ExprListDelete(sqlite3 *db, ExprList *pList){
+  int i;
+  struct ExprList_item *pItem;
+  if( pList==0 ) return;
+  assert( pList->a!=0 || pList->nExpr==0 );
+  for(pItem=pList->a, i=0; i<pList->nExpr; i++, pItem++){
+    sqlite3ExprDelete(db, pItem->pExpr);
+    sqlite3DbFree(db, pItem->zName);
+    sqlite3DbFree(db, pItem->zSpan);
+  }
+  sqlite3DbFree(db, pList->a);
+  sqlite3DbFree(db, pList);
+}
+
+/*
+** These routines are Walker callbacks used to check expressions to
+** see if they are "constant" for some definition of constant.  The
+** Walker.eCode value determines the type of "constant" we are looking
+** for.
+**
+** These callback routines are used to implement the following:
+**
+**     sqlite3ExprIsConstant()                  pWalker->eCode==1
+**     sqlite3ExprIsConstantNotJoin()           pWalker->eCode==2
+**     sqlite3ExprRefOneTableOnly()             pWalker->eCode==3
+**     sqlite3ExprIsConstantOrFunction()        pWalker->eCode==4 or 5
+**
+** In all cases, the callbacks set Walker.eCode=0 and abort if the expression
+** is found to not be a constant.
+**
+** The sqlite3ExprIsConstantOrFunction() is used for evaluating expressions
+** in a CREATE TABLE statement.  The Walker.eCode value is 5 when parsing
+** an existing schema and 4 when processing a new statement.  A bound
+** parameter raises an error for new statements, but is silently converted
+** to NULL for existing schemas.  This allows sqlite_master tables that 
+** contain a bound parameter because they were generated by older versions
+** of SQLite to be parsed by newer versions of SQLite without raising a
+** malformed schema error.
+*/
+static int exprNodeIsConstant(Walker *pWalker, Expr *pExpr){
+
+  /* If pWalker->eCode is 2 then any term of the expression that comes from
+  ** the ON or USING clauses of a left join disqualifies the expression
+  ** from being considered constant. */
+  if( pWalker->eCode==2 && ExprHasProperty(pExpr, EP_FromJoin) ){
+    pWalker->eCode = 0;
+    return WRC_Abort;
+  }
+
+  switch( pExpr->op ){
+    /* Consider functions to be constant if all their arguments are constant
+    ** and either pWalker->eCode==4 or 5 or the function has the
+    ** SQLITE_FUNC_CONST flag. */
+    case TK_FUNCTION:
+      if( pWalker->eCode>=4 || ExprHasProperty(pExpr,EP_Constant) ){
+        return WRC_Continue;
+      }else{
+        pWalker->eCode = 0;
+        return WRC_Abort;
+      }
+    case TK_ID:
+    case TK_COLUMN:
+    case TK_AGG_FUNCTION:
+    case TK_AGG_COLUMN:
+      testcase( pExpr->op==TK_ID );
+      testcase( pExpr->op==TK_COLUMN );
+      testcase( pExpr->op==TK_AGG_FUNCTION );
+      testcase( pExpr->op==TK_AGG_COLUMN );
+      if( pWalker->eCode==3 && pExpr->iTable==pWalker->u.iCur ){
+        return WRC_Continue;
+      }else{
+        pWalker->eCode = 0;
+        return WRC_Abort;
+      }
+    case TK_VARIABLE:
+      if( pWalker->eCode==5 ){
+        /* Silently convert bound parameters that appear inside of CREATE
+        ** statements into a NULL when parsing the CREATE statement text out
+        ** of the sqlite_master table */
+        pExpr->op = TK_NULL;
+      }else if( pWalker->eCode==4 ){
+        /* A bound parameter in a CREATE statement that originates from
+        ** sqlite3_prepare() causes an error */
+        pWalker->eCode = 0;
+        return WRC_Abort;
+      }
+      /* Fall through */
+    default:
+      testcase( pExpr->op==TK_SELECT ); /* selectNodeIsConstant will disallow */
+      testcase( pExpr->op==TK_EXISTS ); /* selectNodeIsConstant will disallow */
+      return WRC_Continue;
+  }
+}
+static int selectNodeIsConstant(Walker *pWalker, Select *NotUsed){
+  UNUSED_PARAMETER(NotUsed);
+  pWalker->eCode = 0;
+  return WRC_Abort;
+}
+static int exprIsConst(Expr *p, int initFlag, int iCur){
+  Walker w;
+  memset(&w, 0, sizeof(w));
+  w.eCode = initFlag;
+  w.xExprCallback = exprNodeIsConstant;
+  w.xSelectCallback = selectNodeIsConstant;
+  w.u.iCur = iCur;
+  sqlite3WalkExpr(&w, p);
+  return w.eCode;
+}
+
+/*
+** Walk an expression tree.  Return non-zero if the expression is constant
+** and 0 if it involves variables or function calls.
+**
+** For the purposes of this function, a double-quoted string (ex: "abc")
+** is considered a variable but a single-quoted string (ex: 'abc') is
+** a constant.
+*/
+SQLITE_PRIVATE int sqlite3ExprIsConstant(Expr *p){
+  return exprIsConst(p, 1, 0);
+}
+
+/*
+** Walk an expression tree.  Return non-zero if the expression is constant
+** that does no originate from the ON or USING clauses of a join.
+** Return 0 if it involves variables or function calls or terms from
+** an ON or USING clause.
+*/
+SQLITE_PRIVATE int sqlite3ExprIsConstantNotJoin(Expr *p){
+  return exprIsConst(p, 2, 0);
+}
+
+/*
+** Walk an expression tree.  Return non-zero if the expression constant
+** for any single row of the table with cursor iCur.  In other words, the
+** expression must not refer to any non-deterministic function nor any
+** table other than iCur.
+*/
+SQLITE_PRIVATE int sqlite3ExprIsTableConstant(Expr *p, int iCur){
+  return exprIsConst(p, 3, iCur);
+}
+
+/*
+** Walk an expression tree.  Return non-zero if the expression is constant
+** or a function call with constant arguments.  Return and 0 if there
+** are any variables.
+**
+** For the purposes of this function, a double-quoted string (ex: "abc")
+** is considered a variable but a single-quoted string (ex: 'abc') is
+** a constant.
+*/
+SQLITE_PRIVATE int sqlite3ExprIsConstantOrFunction(Expr *p, u8 isInit){
+  assert( isInit==0 || isInit==1 );
+  return exprIsConst(p, 4+isInit, 0);
+}
+
+/*
+** If the expression p codes a constant integer that is small enough
+** to fit in a 32-bit integer, return 1 and put the value of the integer
+** in *pValue.  If the expression is not an integer or if it is too big
+** to fit in a signed 32-bit integer, return 0 and leave *pValue unchanged.
+*/
+SQLITE_PRIVATE int sqlite3ExprIsInteger(Expr *p, int *pValue){
+  int rc = 0;
+
+  /* If an expression is an integer literal that fits in a signed 32-bit
+  ** integer, then the EP_IntValue flag will have already been set */
+  assert( p->op!=TK_INTEGER || (p->flags & EP_IntValue)!=0
+           || sqlite3GetInt32(p->u.zToken, &rc)==0 );
+
+  if( p->flags & EP_IntValue ){
+    *pValue = p->u.iValue;
+    return 1;
+  }
+  switch( p->op ){
+    case TK_UPLUS: {
+      rc = sqlite3ExprIsInteger(p->pLeft, pValue);
+      break;
+    }
+    case TK_UMINUS: {
+      int v;
+      if( sqlite3ExprIsInteger(p->pLeft, &v) ){
+        assert( v!=(-2147483647-1) );
+        *pValue = -v;
+        rc = 1;
+      }
+      break;
+    }
+    default: break;
+  }
+  return rc;
+}
+
+/*
+** Return FALSE if there is no chance that the expression can be NULL.
+**
+** If the expression might be NULL or if the expression is too complex
+** to tell return TRUE.  
+**
+** This routine is used as an optimization, to skip OP_IsNull opcodes
+** when we know that a value cannot be NULL.  Hence, a false positive
+** (returning TRUE when in fact the expression can never be NULL) might
+** be a small performance hit but is otherwise harmless.  On the other
+** hand, a false negative (returning FALSE when the result could be NULL)
+** will likely result in an incorrect answer.  So when in doubt, return
+** TRUE.
+*/
+SQLITE_PRIVATE int sqlite3ExprCanBeNull(const Expr *p){
+  u8 op;
+  while( p->op==TK_UPLUS || p->op==TK_UMINUS ){ p = p->pLeft; }
+  op = p->op;
+  if( op==TK_REGISTER ) op = p->op2;
+  switch( op ){
+    case TK_INTEGER:
+    case TK_STRING:
+    case TK_FLOAT:
+    case TK_BLOB:
+      return 0;
+    case TK_COLUMN:
+      assert( p->pTab!=0 );
+      return ExprHasProperty(p, EP_CanBeNull) ||
+             (p->iColumn>=0 && p->pTab->aCol[p->iColumn].notNull==0);
+    default:
+      return 1;
+  }
+}
+
+/*
+** Return TRUE if the given expression is a constant which would be
+** unchanged by OP_Affinity with the affinity given in the second
+** argument.
+**
+** This routine is used to determine if the OP_Affinity operation
+** can be omitted.  When in doubt return FALSE.  A false negative
+** is harmless.  A false positive, however, can result in the wrong
+** answer.
+*/
+SQLITE_PRIVATE int sqlite3ExprNeedsNoAffinityChange(const Expr *p, char aff){
+  u8 op;
+  if( aff==SQLITE_AFF_NONE ) return 1;
+  while( p->op==TK_UPLUS || p->op==TK_UMINUS ){ p = p->pLeft; }
+  op = p->op;
+  if( op==TK_REGISTER ) op = p->op2;
+  switch( op ){
+    case TK_INTEGER: {
+      return aff==SQLITE_AFF_INTEGER || aff==SQLITE_AFF_NUMERIC;
+    }
+    case TK_FLOAT: {
+      return aff==SQLITE_AFF_REAL || aff==SQLITE_AFF_NUMERIC;
+    }
+    case TK_STRING: {
+      return aff==SQLITE_AFF_TEXT;
+    }
+    case TK_BLOB: {
+      return 1;
+    }
+    case TK_COLUMN: {
+      assert( p->iTable>=0 );  /* p cannot be part of a CHECK constraint */
+      return p->iColumn<0
+          && (aff==SQLITE_AFF_INTEGER || aff==SQLITE_AFF_NUMERIC);
+    }
+    default: {
+      return 0;
+    }
+  }
+}
+
+/*
+** Return TRUE if the given string is a row-id column name.
+*/
+SQLITE_PRIVATE int sqlite3IsRowid(const char *z){
+  if( sqlite3StrICmp(z, "_ROWID_")==0 ) return 1;
+  if( sqlite3StrICmp(z, "ROWID")==0 ) return 1;
+  if( sqlite3StrICmp(z, "OID")==0 ) return 1;
+  return 0;
+}
+
+/*
+** Return true if we are able to the IN operator optimization on a
+** query of the form
+**
+**       x IN (SELECT ...)
+**
+** Where the SELECT... clause is as specified by the parameter to this
+** routine.
+**
+** The Select object passed in has already been preprocessed and no
+** errors have been found.
+*/
+#ifndef SQLITE_OMIT_SUBQUERY
+static int isCandidateForInOpt(Select *p){
+  SrcList *pSrc;
+  ExprList *pEList;
+  Table *pTab;
+  if( p==0 ) return 0;                   /* right-hand side of IN is SELECT */
+  if( p->pPrior ) return 0;              /* Not a compound SELECT */
+  if( p->selFlags & (SF_Distinct|SF_Aggregate) ){
+    testcase( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct );
+    testcase( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Aggregate );
+    return 0; /* No DISTINCT keyword and no aggregate functions */
+  }
+  assert( p->pGroupBy==0 );              /* Has no GROUP BY clause */
+  if( p->pLimit ) return 0;              /* Has no LIMIT clause */
+  assert( p->pOffset==0 );               /* No LIMIT means no OFFSET */
+  if( p->pWhere ) return 0;              /* Has no WHERE clause */
+  pSrc = p->pSrc;
+  assert( pSrc!=0 );
+  if( pSrc->nSrc!=1 ) return 0;          /* Single term in FROM clause */
+  if( pSrc->a[0].pSelect ) return 0;     /* FROM is not a subquery or view */
+  pTab = pSrc->a[0].pTab;
+  if( NEVER(pTab==0) ) return 0;
+  assert( pTab->pSelect==0 );            /* FROM clause is not a view */
+  if( IsVirtual(pTab) ) return 0;        /* FROM clause not a virtual table */
+  pEList = p->pEList;
+  if( pEList->nExpr!=1 ) return 0;       /* One column in the result set */
+  if( pEList->a[0].pExpr->op!=TK_COLUMN ) return 0; /* Result is a column */
+  return 1;
+}
+#endif /* SQLITE_OMIT_SUBQUERY */
+
+/*
+** Code an OP_Once instruction and allocate space for its flag. Return the 
+** address of the new instruction.
+*/
+SQLITE_PRIVATE int sqlite3CodeOnce(Parse *pParse){
+  Vdbe *v = sqlite3GetVdbe(pParse);      /* Virtual machine being coded */
+  return sqlite3VdbeAddOp1(v, OP_Once, pParse->nOnce++);
+}
+
+/*
+** Generate code that checks the left-most column of index table iCur to see if
+** it contains any NULL entries.  Cause the register at regHasNull to be set
+** to a non-NULL value if iCur contains no NULLs.  Cause register regHasNull
+** to be set to NULL if iCur contains one or more NULL values.
+*/
+static void sqlite3SetHasNullFlag(Vdbe *v, int iCur, int regHasNull){
+  int j1;
+  sqlite3VdbeAddOp2(v, OP_Integer, 0, regHasNull);
+  j1 = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v);
+  sqlite3VdbeAddOp3(v, OP_Column, iCur, 0, regHasNull);
+  sqlite3VdbeChangeP5(v, OPFLAG_TYPEOFARG);
+  VdbeComment((v, "first_entry_in(%d)", iCur));
+  sqlite3VdbeJumpHere(v, j1);
+}
+
+
+#ifndef SQLITE_OMIT_SUBQUERY
+/*
+** The argument is an IN operator with a list (not a subquery) on the 
+** right-hand side.  Return TRUE if that list is constant.
+*/
+static int sqlite3InRhsIsConstant(Expr *pIn){
+  Expr *pLHS;
+  int res;
+  assert( !ExprHasProperty(pIn, EP_xIsSelect) );
+  pLHS = pIn->pLeft;
+  pIn->pLeft = 0;
+  res = sqlite3ExprIsConstant(pIn);
+  pIn->pLeft = pLHS;
+  return res;
+}
+#endif
+
+/*
+** This function is used by the implementation of the IN (...) operator.
+** The pX parameter is the expression on the RHS of the IN operator, which
+** might be either a list of expressions or a subquery.
+**
+** The job of this routine is to find or create a b-tree object that can
+** be used either to test for membership in the RHS set or to iterate through
+** all members of the RHS set, skipping duplicates.
+**
+** A cursor is opened on the b-tree object that is the RHS of the IN operator
+** and pX->iTable is set to the index of that cursor.
+**
+** The returned value of this function indicates the b-tree type, as follows:
+**
+**   IN_INDEX_ROWID      - The cursor was opened on a database table.
+**   IN_INDEX_INDEX_ASC  - The cursor was opened on an ascending index.
+**   IN_INDEX_INDEX_DESC - The cursor was opened on a descending index.
+**   IN_INDEX_EPH        - The cursor was opened on a specially created and
+**                         populated epheremal table.
+**   IN_INDEX_NOOP       - No cursor was allocated.  The IN operator must be
+**                         implemented as a sequence of comparisons.
+**
+** An existing b-tree might be used if the RHS expression pX is a simple
+** subquery such as:
+**
+**     SELECT <column> FROM <table>
+**
+** If the RHS of the IN operator is a list or a more complex subquery, then
+** an ephemeral table might need to be generated from the RHS and then
+** pX->iTable made to point to the ephemeral table instead of an
+** existing table.
+**
+** The inFlags parameter must contain exactly one of the bits
+** IN_INDEX_MEMBERSHIP or IN_INDEX_LOOP.  If inFlags contains
+** IN_INDEX_MEMBERSHIP, then the generated table will be used for a
+** fast membership test.  When the IN_INDEX_LOOP bit is set, the
+** IN index will be used to loop over all values of the RHS of the
+** IN operator.
+**
+** When IN_INDEX_LOOP is used (and the b-tree will be used to iterate
+** through the set members) then the b-tree must not contain duplicates.
+** An epheremal table must be used unless the selected <column> is guaranteed
+** to be unique - either because it is an INTEGER PRIMARY KEY or it
+** has a UNIQUE constraint or UNIQUE index.
+**
+** When IN_INDEX_MEMBERSHIP is used (and the b-tree will be used 
+** for fast set membership tests) then an epheremal table must 
+** be used unless <column> is an INTEGER PRIMARY KEY or an index can 
+** be found with <column> as its left-most column.
+**
+** If the IN_INDEX_NOOP_OK and IN_INDEX_MEMBERSHIP are both set and
+** if the RHS of the IN operator is a list (not a subquery) then this
+** routine might decide that creating an ephemeral b-tree for membership
+** testing is too expensive and return IN_INDEX_NOOP.  In that case, the
+** calling routine should implement the IN operator using a sequence
+** of Eq or Ne comparison operations.
+**
+** When the b-tree is being used for membership tests, the calling function
+** might need to know whether or not the RHS side of the IN operator
+** contains a NULL.  If prRhsHasNull is not a NULL pointer and 
+** if there is any chance that the (...) might contain a NULL value at
+** runtime, then a register is allocated and the register number written
+** to *prRhsHasNull. If there is no chance that the (...) contains a
+** NULL value, then *prRhsHasNull is left unchanged.
+**
+** If a register is allocated and its location stored in *prRhsHasNull, then
+** the value in that register will be NULL if the b-tree contains one or more
+** NULL values, and it will be some non-NULL value if the b-tree contains no
+** NULL values.
+*/
+#ifndef SQLITE_OMIT_SUBQUERY
+SQLITE_PRIVATE int sqlite3FindInIndex(Parse *pParse, Expr *pX, u32 inFlags, int *prRhsHasNull){
+  Select *p;                            /* SELECT to the right of IN operator */
+  int eType = 0;                        /* Type of RHS table. IN_INDEX_* */
+  int iTab = pParse->nTab++;            /* Cursor of the RHS table */
+  int mustBeUnique;                     /* True if RHS must be unique */
+  Vdbe *v = sqlite3GetVdbe(pParse);     /* Virtual machine being coded */
+
+  assert( pX->op==TK_IN );
+  mustBeUnique = (inFlags & IN_INDEX_LOOP)!=0;
+
+  /* Check to see if an existing table or index can be used to
+  ** satisfy the query.  This is preferable to generating a new 
+  ** ephemeral table.
+  */
+  p = (ExprHasProperty(pX, EP_xIsSelect) ? pX->x.pSelect : 0);
+  if( ALWAYS(pParse->nErr==0) && isCandidateForInOpt(p) ){
+    sqlite3 *db = pParse->db;              /* Database connection */
+    Table *pTab;                           /* Table <table>. */
+    Expr *pExpr;                           /* Expression <column> */
+    i16 iCol;                              /* Index of column <column> */
+    i16 iDb;                               /* Database idx for pTab */
+
+    assert( p );                        /* Because of isCandidateForInOpt(p) */
+    assert( p->pEList!=0 );             /* Because of isCandidateForInOpt(p) */
+    assert( p->pEList->a[0].pExpr!=0 ); /* Because of isCandidateForInOpt(p) */
+    assert( p->pSrc!=0 );               /* Because of isCandidateForInOpt(p) */
+    pTab = p->pSrc->a[0].pTab;
+    pExpr = p->pEList->a[0].pExpr;
+    iCol = (i16)pExpr->iColumn;
+   
+    /* Code an OP_Transaction and OP_TableLock for <table>. */
+    iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+    sqlite3CodeVerifySchema(pParse, iDb);
+    sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName);
+
+    /* This function is only called from two places. In both cases the vdbe
+    ** has already been allocated. So assume sqlite3GetVdbe() is always
+    ** successful here.
+    */
+    assert(v);
+    if( iCol<0 ){
+      int iAddr = sqlite3CodeOnce(pParse);
+      VdbeCoverage(v);
+
+      sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
+      eType = IN_INDEX_ROWID;
+
+      sqlite3VdbeJumpHere(v, iAddr);
+    }else{
+      Index *pIdx;                         /* Iterator variable */
+
+      /* The collation sequence used by the comparison. If an index is to
+      ** be used in place of a temp-table, it must be ordered according
+      ** to this collation sequence.  */
+      CollSeq *pReq = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pExpr);
+
+      /* Check that the affinity that will be used to perform the 
+      ** comparison is the same as the affinity of the column. If
+      ** it is not, it is not possible to use any index.
+      */
+      int affinity_ok = sqlite3IndexAffinityOk(pX, pTab->aCol[iCol].affinity);
+
+      for(pIdx=pTab->pIndex; pIdx && eType==0 && affinity_ok; pIdx=pIdx->pNext){
+        if( (pIdx->aiColumn[0]==iCol)
+         && sqlite3FindCollSeq(db, ENC(db), pIdx->azColl[0], 0)==pReq
+         && (!mustBeUnique || (pIdx->nKeyCol==1 && IsUniqueIndex(pIdx)))
+        ){
+          int iAddr = sqlite3CodeOnce(pParse); VdbeCoverage(v);
+          sqlite3VdbeAddOp3(v, OP_OpenRead, iTab, pIdx->tnum, iDb);
+          sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
+          VdbeComment((v, "%s", pIdx->zName));
+          assert( IN_INDEX_INDEX_DESC == IN_INDEX_INDEX_ASC+1 );
+          eType = IN_INDEX_INDEX_ASC + pIdx->aSortOrder[0];
+
+          if( prRhsHasNull && !pTab->aCol[iCol].notNull ){
+            *prRhsHasNull = ++pParse->nMem;
+            sqlite3SetHasNullFlag(v, iTab, *prRhsHasNull);
+          }
+          sqlite3VdbeJumpHere(v, iAddr);
+        }
+      }
+    }
+  }
+
+  /* If no preexisting index is available for the IN clause
+  ** and IN_INDEX_NOOP is an allowed reply
+  ** and the RHS of the IN operator is a list, not a subquery
+  ** and the RHS is not contant or has two or fewer terms,
+  ** then it is not worth creating an ephemeral table to evaluate
+  ** the IN operator so return IN_INDEX_NOOP.
+  */
+  if( eType==0
+   && (inFlags & IN_INDEX_NOOP_OK)
+   && !ExprHasProperty(pX, EP_xIsSelect)
+   && (!sqlite3InRhsIsConstant(pX) || pX->x.pList->nExpr<=2)
+  ){
+    eType = IN_INDEX_NOOP;
+  }
+     
+
+  if( eType==0 ){
+    /* Could not find an existing table or index to use as the RHS b-tree.
+    ** We will have to generate an ephemeral table to do the job.
+    */
+    u32 savedNQueryLoop = pParse->nQueryLoop;
+    int rMayHaveNull = 0;
+    eType = IN_INDEX_EPH;
+    if( inFlags & IN_INDEX_LOOP ){
+      pParse->nQueryLoop = 0;
+      if( pX->pLeft->iColumn<0 && !ExprHasProperty(pX, EP_xIsSelect) ){
+        eType = IN_INDEX_ROWID;
+      }
+    }else if( prRhsHasNull ){
+      *prRhsHasNull = rMayHaveNull = ++pParse->nMem;
+    }
+    sqlite3CodeSubselect(pParse, pX, rMayHaveNull, eType==IN_INDEX_ROWID);
+    pParse->nQueryLoop = savedNQueryLoop;
+  }else{
+    pX->iTable = iTab;
+  }
+  return eType;
+}
+#endif
+
+/*
+** Generate code for scalar subqueries used as a subquery expression, EXISTS,
+** or IN operators.  Examples:
+**
+**     (SELECT a FROM b)          -- subquery
+**     EXISTS (SELECT a FROM b)   -- EXISTS subquery
+**     x IN (4,5,11)              -- IN operator with list on right-hand side
+**     x IN (SELECT a FROM b)     -- IN operator with subquery on the right
+**
+** The pExpr parameter describes the expression that contains the IN
+** operator or subquery.
+**
+** If parameter isRowid is non-zero, then expression pExpr is guaranteed
+** to be of the form "<rowid> IN (?, ?, ?)", where <rowid> is a reference
+** to some integer key column of a table B-Tree. In this case, use an
+** intkey B-Tree to store the set of IN(...) values instead of the usual
+** (slower) variable length keys B-Tree.
+**
+** If rMayHaveNull is non-zero, that means that the operation is an IN
+** (not a SELECT or EXISTS) and that the RHS might contains NULLs.
+** All this routine does is initialize the register given by rMayHaveNull
+** to NULL.  Calling routines will take care of changing this register
+** value to non-NULL if the RHS is NULL-free.
+**
+** For a SELECT or EXISTS operator, return the register that holds the
+** result.  For IN operators or if an error occurs, the return value is 0.
+*/
+#ifndef SQLITE_OMIT_SUBQUERY
+SQLITE_PRIVATE int sqlite3CodeSubselect(
+  Parse *pParse,          /* Parsing context */
+  Expr *pExpr,            /* The IN, SELECT, or EXISTS operator */
+  int rHasNullFlag,       /* Register that records whether NULLs exist in RHS */
+  int isRowid             /* If true, LHS of IN operator is a rowid */
+){
+  int jmpIfDynamic = -1;                      /* One-time test address */
+  int rReg = 0;                           /* Register storing resulting */
+  Vdbe *v = sqlite3GetVdbe(pParse);
+  if( NEVER(v==0) ) return 0;
+  sqlite3ExprCachePush(pParse);
+
+  /* This code must be run in its entirety every time it is encountered
+  ** if any of the following is true:
+  **
+  **    *  The right-hand side is a correlated subquery
+  **    *  The right-hand side is an expression list containing variables
+  **    *  We are inside a trigger
+  **
+  ** If all of the above are false, then we can run this code just once
+  ** save the results, and reuse the same result on subsequent invocations.
+  */
+  if( !ExprHasProperty(pExpr, EP_VarSelect) ){
+    jmpIfDynamic = sqlite3CodeOnce(pParse); VdbeCoverage(v);
+  }
+
+#ifndef SQLITE_OMIT_EXPLAIN
+  if( pParse->explain==2 ){
+    char *zMsg = sqlite3MPrintf(
+        pParse->db, "EXECUTE %s%s SUBQUERY %d", jmpIfDynamic>=0?"":"CORRELATED ",
+        pExpr->op==TK_IN?"LIST":"SCALAR", pParse->iNextSelectId
+    );
+    sqlite3VdbeAddOp4(v, OP_Explain, pParse->iSelectId, 0, 0, zMsg, P4_DYNAMIC);
+  }
+#endif
+
+  switch( pExpr->op ){
+    case TK_IN: {
+      char affinity;              /* Affinity of the LHS of the IN */
+      int addr;                   /* Address of OP_OpenEphemeral instruction */
+      Expr *pLeft = pExpr->pLeft; /* the LHS of the IN operator */
+      KeyInfo *pKeyInfo = 0;      /* Key information */
+
+      affinity = sqlite3ExprAffinity(pLeft);
+
+      /* Whether this is an 'x IN(SELECT...)' or an 'x IN(<exprlist>)'
+      ** expression it is handled the same way.  An ephemeral table is 
+      ** filled with single-field index keys representing the results
+      ** from the SELECT or the <exprlist>.
+      **
+      ** If the 'x' expression is a column value, or the SELECT...
+      ** statement returns a column value, then the affinity of that
+      ** column is used to build the index keys. If both 'x' and the
+      ** SELECT... statement are columns, then numeric affinity is used
+      ** if either column has NUMERIC or INTEGER affinity. If neither
+      ** 'x' nor the SELECT... statement are columns, then numeric affinity
+      ** is used.
+      */
+      pExpr->iTable = pParse->nTab++;
+      addr = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pExpr->iTable, !isRowid);
+      pKeyInfo = isRowid ? 0 : sqlite3KeyInfoAlloc(pParse->db, 1, 1);
+
+      if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+        /* Case 1:     expr IN (SELECT ...)
+        **
+        ** Generate code to write the results of the select into the temporary
+        ** table allocated and opened above.
+        */
+        Select *pSelect = pExpr->x.pSelect;
+        SelectDest dest;
+        ExprList *pEList;
+
+        assert( !isRowid );
+        sqlite3SelectDestInit(&dest, SRT_Set, pExpr->iTable);
+        dest.affSdst = (u8)affinity;
+        assert( (pExpr->iTable&0x0000FFFF)==pExpr->iTable );
+        pSelect->iLimit = 0;
+        testcase( pSelect->selFlags & SF_Distinct );
+        testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */
+        if( sqlite3Select(pParse, pSelect, &dest) ){
+          sqlite3KeyInfoUnref(pKeyInfo);
+          return 0;
+        }
+        pEList = pSelect->pEList;
+        assert( pKeyInfo!=0 ); /* OOM will cause exit after sqlite3Select() */
+        assert( pEList!=0 );
+        assert( pEList->nExpr>0 );
+        assert( sqlite3KeyInfoIsWriteable(pKeyInfo) );
+        pKeyInfo->aColl[0] = sqlite3BinaryCompareCollSeq(pParse, pExpr->pLeft,
+                                                         pEList->a[0].pExpr);
+      }else if( ALWAYS(pExpr->x.pList!=0) ){
+        /* Case 2:     expr IN (exprlist)
+        **
+        ** For each expression, build an index key from the evaluation and
+        ** store it in the temporary table. If <expr> is a column, then use
+        ** that columns affinity when building index keys. If <expr> is not
+        ** a column, use numeric affinity.
+        */
+        int i;
+        ExprList *pList = pExpr->x.pList;
+        struct ExprList_item *pItem;
+        int r1, r2, r3;
+
+        if( !affinity ){
+          affinity = SQLITE_AFF_NONE;
+        }
+        if( pKeyInfo ){
+          assert( sqlite3KeyInfoIsWriteable(pKeyInfo) );
+          pKeyInfo->aColl[0] = sqlite3ExprCollSeq(pParse, pExpr->pLeft);
+        }
+
+        /* Loop through each expression in <exprlist>. */
+        r1 = sqlite3GetTempReg(pParse);
+        r2 = sqlite3GetTempReg(pParse);
+        if( isRowid ) sqlite3VdbeAddOp2(v, OP_Null, 0, r2);
+        for(i=pList->nExpr, pItem=pList->a; i>0; i--, pItem++){
+          Expr *pE2 = pItem->pExpr;
+          int iValToIns;
+
+          /* If the expression is not constant then we will need to
+          ** disable the test that was generated above that makes sure
+          ** this code only executes once.  Because for a non-constant
+          ** expression we need to rerun this code each time.
+          */
+          if( jmpIfDynamic>=0 && !sqlite3ExprIsConstant(pE2) ){
+            sqlite3VdbeChangeToNoop(v, jmpIfDynamic);
+            jmpIfDynamic = -1;
+          }
+
+          /* Evaluate the expression and insert it into the temp table */
+          if( isRowid && sqlite3ExprIsInteger(pE2, &iValToIns) ){
+            sqlite3VdbeAddOp3(v, OP_InsertInt, pExpr->iTable, r2, iValToIns);
+          }else{
+            r3 = sqlite3ExprCodeTarget(pParse, pE2, r1);
+            if( isRowid ){
+              sqlite3VdbeAddOp2(v, OP_MustBeInt, r3,
+                                sqlite3VdbeCurrentAddr(v)+2);
+              VdbeCoverage(v);
+              sqlite3VdbeAddOp3(v, OP_Insert, pExpr->iTable, r2, r3);
+            }else{
+              sqlite3VdbeAddOp4(v, OP_MakeRecord, r3, 1, r2, &affinity, 1);
+              sqlite3ExprCacheAffinityChange(pParse, r3, 1);
+              sqlite3VdbeAddOp2(v, OP_IdxInsert, pExpr->iTable, r2);
+            }
+          }
+        }
+        sqlite3ReleaseTempReg(pParse, r1);
+        sqlite3ReleaseTempReg(pParse, r2);
+      }
+      if( pKeyInfo ){
+        sqlite3VdbeChangeP4(v, addr, (void *)pKeyInfo, P4_KEYINFO);
+      }
+      break;
+    }
+
+    case TK_EXISTS:
+    case TK_SELECT:
+    default: {
+      /* If this has to be a scalar SELECT.  Generate code to put the
+      ** value of this select in a memory cell and record the number
+      ** of the memory cell in iColumn.  If this is an EXISTS, write
+      ** an integer 0 (not exists) or 1 (exists) into a memory cell
+      ** and record that memory cell in iColumn.
+      */
+      Select *pSel;                         /* SELECT statement to encode */
+      SelectDest dest;                      /* How to deal with SELECt result */
+
+      testcase( pExpr->op==TK_EXISTS );
+      testcase( pExpr->op==TK_SELECT );
+      assert( pExpr->op==TK_EXISTS || pExpr->op==TK_SELECT );
+
+      assert( ExprHasProperty(pExpr, EP_xIsSelect) );
+      pSel = pExpr->x.pSelect;
+      sqlite3SelectDestInit(&dest, 0, ++pParse->nMem);
+      if( pExpr->op==TK_SELECT ){
+        dest.eDest = SRT_Mem;
+        dest.iSdst = dest.iSDParm;
+        sqlite3VdbeAddOp2(v, OP_Null, 0, dest.iSDParm);
+        VdbeComment((v, "Init subquery result"));
+      }else{
+        dest.eDest = SRT_Exists;
+        sqlite3VdbeAddOp2(v, OP_Integer, 0, dest.iSDParm);
+        VdbeComment((v, "Init EXISTS result"));
+      }
+      sqlite3ExprDelete(pParse->db, pSel->pLimit);
+      pSel->pLimit = sqlite3PExpr(pParse, TK_INTEGER, 0, 0,
+                                  &sqlite3IntTokens[1]);
+      pSel->iLimit = 0;
+      if( sqlite3Select(pParse, pSel, &dest) ){
+        return 0;
+      }
+      rReg = dest.iSDParm;
+      ExprSetVVAProperty(pExpr, EP_NoReduce);
+      break;
+    }
+  }
+
+  if( rHasNullFlag ){
+    sqlite3SetHasNullFlag(v, pExpr->iTable, rHasNullFlag);
+  }
+
+  if( jmpIfDynamic>=0 ){
+    sqlite3VdbeJumpHere(v, jmpIfDynamic);
+  }
+  sqlite3ExprCachePop(pParse);
+
+  return rReg;
+}
+#endif /* SQLITE_OMIT_SUBQUERY */
+
+#ifndef SQLITE_OMIT_SUBQUERY
+/*
+** Generate code for an IN expression.
+**
+**      x IN (SELECT ...)
+**      x IN (value, value, ...)
+**
+** The left-hand side (LHS) is a scalar expression.  The right-hand side (RHS)
+** is an array of zero or more values.  The expression is true if the LHS is
+** contained within the RHS.  The value of the expression is unknown (NULL)
+** if the LHS is NULL or if the LHS is not contained within the RHS and the
+** RHS contains one or more NULL values.
+**
+** This routine generates code that jumps to destIfFalse if the LHS is not 
+** contained within the RHS.  If due to NULLs we cannot determine if the LHS
+** is contained in the RHS then jump to destIfNull.  If the LHS is contained
+** within the RHS then fall through.
+*/
+static void sqlite3ExprCodeIN(
+  Parse *pParse,        /* Parsing and code generating context */
+  Expr *pExpr,          /* The IN expression */
+  int destIfFalse,      /* Jump here if LHS is not contained in the RHS */
+  int destIfNull        /* Jump here if the results are unknown due to NULLs */
+){
+  int rRhsHasNull = 0;  /* Register that is true if RHS contains NULL values */
+  char affinity;        /* Comparison affinity to use */
+  int eType;            /* Type of the RHS */
+  int r1;               /* Temporary use register */
+  Vdbe *v;              /* Statement under construction */
+
+  /* Compute the RHS.   After this step, the table with cursor
+  ** pExpr->iTable will contains the values that make up the RHS.
+  */
+  v = pParse->pVdbe;
+  assert( v!=0 );       /* OOM detected prior to this routine */
+  VdbeNoopComment((v, "begin IN expr"));
+  eType = sqlite3FindInIndex(pParse, pExpr,
+                             IN_INDEX_MEMBERSHIP | IN_INDEX_NOOP_OK,
+                             destIfFalse==destIfNull ? 0 : &rRhsHasNull);
+
+  /* Figure out the affinity to use to create a key from the results
+  ** of the expression. affinityStr stores a static string suitable for
+  ** P4 of OP_MakeRecord.
+  */
+  affinity = comparisonAffinity(pExpr);
+
+  /* Code the LHS, the <expr> from "<expr> IN (...)".
+  */
+  sqlite3ExprCachePush(pParse);
+  r1 = sqlite3GetTempReg(pParse);
+  sqlite3ExprCode(pParse, pExpr->pLeft, r1);
+
+  /* If sqlite3FindInIndex() did not find or create an index that is
+  ** suitable for evaluating the IN operator, then evaluate using a
+  ** sequence of comparisons.
+  */
+  if( eType==IN_INDEX_NOOP ){
+    ExprList *pList = pExpr->x.pList;
+    CollSeq *pColl = sqlite3ExprCollSeq(pParse, pExpr->pLeft);
+    int labelOk = sqlite3VdbeMakeLabel(v);
+    int r2, regToFree;
+    int regCkNull = 0;
+    int ii;
+    assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
+    if( destIfNull!=destIfFalse ){
+      regCkNull = sqlite3GetTempReg(pParse);
+      sqlite3VdbeAddOp3(v, OP_BitAnd, r1, r1, regCkNull);
+    }
+    for(ii=0; ii<pList->nExpr; ii++){
+      r2 = sqlite3ExprCodeTemp(pParse, pList->a[ii].pExpr, &regToFree);
+      if( regCkNull && sqlite3ExprCanBeNull(pList->a[ii].pExpr) ){
+        sqlite3VdbeAddOp3(v, OP_BitAnd, regCkNull, r2, regCkNull);
+      }
+      if( ii<pList->nExpr-1 || destIfNull!=destIfFalse ){
+        sqlite3VdbeAddOp4(v, OP_Eq, r1, labelOk, r2,
+                          (void*)pColl, P4_COLLSEQ);
+        VdbeCoverageIf(v, ii<pList->nExpr-1);
+        VdbeCoverageIf(v, ii==pList->nExpr-1);
+        sqlite3VdbeChangeP5(v, affinity);
+      }else{
+        assert( destIfNull==destIfFalse );
+        sqlite3VdbeAddOp4(v, OP_Ne, r1, destIfFalse, r2,
+                          (void*)pColl, P4_COLLSEQ); VdbeCoverage(v);
+        sqlite3VdbeChangeP5(v, affinity | SQLITE_JUMPIFNULL);
+      }
+      sqlite3ReleaseTempReg(pParse, regToFree);
+    }
+    if( regCkNull ){
+      sqlite3VdbeAddOp2(v, OP_IsNull, regCkNull, destIfNull); VdbeCoverage(v);
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, destIfFalse);
+    }
+    sqlite3VdbeResolveLabel(v, labelOk);
+    sqlite3ReleaseTempReg(pParse, regCkNull);
+  }else{
+  
+    /* If the LHS is NULL, then the result is either false or NULL depending
+    ** on whether the RHS is empty or not, respectively.
+    */
+    if( sqlite3ExprCanBeNull(pExpr->pLeft) ){
+      if( destIfNull==destIfFalse ){
+        /* Shortcut for the common case where the false and NULL outcomes are
+        ** the same. */
+        sqlite3VdbeAddOp2(v, OP_IsNull, r1, destIfNull); VdbeCoverage(v);
+      }else{
+        int addr1 = sqlite3VdbeAddOp1(v, OP_NotNull, r1); VdbeCoverage(v);
+        sqlite3VdbeAddOp2(v, OP_Rewind, pExpr->iTable, destIfFalse);
+        VdbeCoverage(v);
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, destIfNull);
+        sqlite3VdbeJumpHere(v, addr1);
+      }
+    }
+  
+    if( eType==IN_INDEX_ROWID ){
+      /* In this case, the RHS is the ROWID of table b-tree
+      */
+      sqlite3VdbeAddOp2(v, OP_MustBeInt, r1, destIfFalse); VdbeCoverage(v);
+      sqlite3VdbeAddOp3(v, OP_NotExists, pExpr->iTable, destIfFalse, r1);
+      VdbeCoverage(v);
+    }else{
+      /* In this case, the RHS is an index b-tree.
+      */
+      sqlite3VdbeAddOp4(v, OP_Affinity, r1, 1, 0, &affinity, 1);
+  
+      /* If the set membership test fails, then the result of the 
+      ** "x IN (...)" expression must be either 0 or NULL. If the set
+      ** contains no NULL values, then the result is 0. If the set 
+      ** contains one or more NULL values, then the result of the
+      ** expression is also NULL.
+      */
+      assert( destIfFalse!=destIfNull || rRhsHasNull==0 );
+      if( rRhsHasNull==0 ){
+        /* This branch runs if it is known at compile time that the RHS
+        ** cannot contain NULL values. This happens as the result
+        ** of a "NOT NULL" constraint in the database schema.
+        **
+        ** Also run this branch if NULL is equivalent to FALSE
+        ** for this particular IN operator.
+        */
+        sqlite3VdbeAddOp4Int(v, OP_NotFound, pExpr->iTable, destIfFalse, r1, 1);
+        VdbeCoverage(v);
+      }else{
+        /* In this branch, the RHS of the IN might contain a NULL and
+        ** the presence of a NULL on the RHS makes a difference in the
+        ** outcome.
+        */
+        int j1;
+  
+        /* First check to see if the LHS is contained in the RHS.  If so,
+        ** then the answer is TRUE the presence of NULLs in the RHS does
+        ** not matter.  If the LHS is not contained in the RHS, then the
+        ** answer is NULL if the RHS contains NULLs and the answer is
+        ** FALSE if the RHS is NULL-free.
+        */
+        j1 = sqlite3VdbeAddOp4Int(v, OP_Found, pExpr->iTable, 0, r1, 1);
+        VdbeCoverage(v);
+        sqlite3VdbeAddOp2(v, OP_IsNull, rRhsHasNull, destIfNull);
+        VdbeCoverage(v);
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, destIfFalse);
+        sqlite3VdbeJumpHere(v, j1);
+      }
+    }
+  }
+  sqlite3ReleaseTempReg(pParse, r1);
+  sqlite3ExprCachePop(pParse);
+  VdbeComment((v, "end IN expr"));
+}
+#endif /* SQLITE_OMIT_SUBQUERY */
+
+/*
+** Duplicate an 8-byte value
+*/
+static char *dup8bytes(Vdbe *v, const char *in){
+  char *out = sqlite3DbMallocRaw(sqlite3VdbeDb(v), 8);
+  if( out ){
+    memcpy(out, in, 8);
+  }
+  return out;
+}
+
+#ifndef SQLITE_OMIT_FLOATING_POINT
+/*
+** Generate an instruction that will put the floating point
+** value described by z[0..n-1] into register iMem.
+**
+** The z[] string will probably not be zero-terminated.  But the 
+** z[n] character is guaranteed to be something that does not look
+** like the continuation of the number.
+*/
+static void codeReal(Vdbe *v, const char *z, int negateFlag, int iMem){
+  if( ALWAYS(z!=0) ){
+    double value;
+    char *zV;
+    sqlite3AtoF(z, &value, sqlite3Strlen30(z), SQLITE_UTF8);
+    assert( !sqlite3IsNaN(value) ); /* The new AtoF never returns NaN */
+    if( negateFlag ) value = -value;
+    zV = dup8bytes(v, (char*)&value);
+    sqlite3VdbeAddOp4(v, OP_Real, 0, iMem, 0, zV, P4_REAL);
+  }
+}
+#endif
+
+
+/*
+** Generate an instruction that will put the integer describe by
+** text z[0..n-1] into register iMem.
+**
+** Expr.u.zToken is always UTF8 and zero-terminated.
+*/
+static void codeInteger(Parse *pParse, Expr *pExpr, int negFlag, int iMem){
+  Vdbe *v = pParse->pVdbe;
+  if( pExpr->flags & EP_IntValue ){
+    int i = pExpr->u.iValue;
+    assert( i>=0 );
+    if( negFlag ) i = -i;
+    sqlite3VdbeAddOp2(v, OP_Integer, i, iMem);
+  }else{
+    int c;
+    i64 value;
+    const char *z = pExpr->u.zToken;
+    assert( z!=0 );
+    c = sqlite3DecOrHexToI64(z, &value);
+    if( c==0 || (c==2 && negFlag) ){
+      char *zV;
+      if( negFlag ){ value = c==2 ? SMALLEST_INT64 : -value; }
+      zV = dup8bytes(v, (char*)&value);
+      sqlite3VdbeAddOp4(v, OP_Int64, 0, iMem, 0, zV, P4_INT64);
+    }else{
+#ifdef SQLITE_OMIT_FLOATING_POINT
+      sqlite3ErrorMsg(pParse, "oversized integer: %s%s", negFlag ? "-" : "", z);
+#else
+#ifndef SQLITE_OMIT_HEX_INTEGER
+      if( sqlite3_strnicmp(z,"0x",2)==0 ){
+        sqlite3ErrorMsg(pParse, "hex literal too big: %s", z);
+      }else
+#endif
+      {
+        codeReal(v, z, negFlag, iMem);
+      }
+#endif
+    }
+  }
+}
+
+/*
+** Clear a cache entry.
+*/
+static void cacheEntryClear(Parse *pParse, struct yColCache *p){
+  if( p->tempReg ){
+    if( pParse->nTempReg<ArraySize(pParse->aTempReg) ){
+      pParse->aTempReg[pParse->nTempReg++] = p->iReg;
+    }
+    p->tempReg = 0;
+  }
+}
+
+
+/*
+** Record in the column cache that a particular column from a
+** particular table is stored in a particular register.
+*/
+SQLITE_PRIVATE void sqlite3ExprCacheStore(Parse *pParse, int iTab, int iCol, int iReg){
+  int i;
+  int minLru;
+  int idxLru;
+  struct yColCache *p;
+
+  assert( iReg>0 );  /* Register numbers are always positive */
+  assert( iCol>=-1 && iCol<32768 );  /* Finite column numbers */
+
+  /* The SQLITE_ColumnCache flag disables the column cache.  This is used
+  ** for testing only - to verify that SQLite always gets the same answer
+  ** with and without the column cache.
+  */
+  if( OptimizationDisabled(pParse->db, SQLITE_ColumnCache) ) return;
+
+  /* First replace any existing entry.
+  **
+  ** Actually, the way the column cache is currently used, we are guaranteed
+  ** that the object will never already be in cache.  Verify this guarantee.
+  */
+#ifndef NDEBUG
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    assert( p->iReg==0 || p->iTable!=iTab || p->iColumn!=iCol );
+  }
+#endif
+
+  /* Find an empty slot and replace it */
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    if( p->iReg==0 ){
+      p->iLevel = pParse->iCacheLevel;
+      p->iTable = iTab;
+      p->iColumn = iCol;
+      p->iReg = iReg;
+      p->tempReg = 0;
+      p->lru = pParse->iCacheCnt++;
+      return;
+    }
+  }
+
+  /* Replace the last recently used */
+  minLru = 0x7fffffff;
+  idxLru = -1;
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    if( p->lru<minLru ){
+      idxLru = i;
+      minLru = p->lru;
+    }
+  }
+  if( ALWAYS(idxLru>=0) ){
+    p = &pParse->aColCache[idxLru];
+    p->iLevel = pParse->iCacheLevel;
+    p->iTable = iTab;
+    p->iColumn = iCol;
+    p->iReg = iReg;
+    p->tempReg = 0;
+    p->lru = pParse->iCacheCnt++;
+    return;
+  }
+}
+
+/*
+** Indicate that registers between iReg..iReg+nReg-1 are being overwritten.
+** Purge the range of registers from the column cache.
+*/
+SQLITE_PRIVATE void sqlite3ExprCacheRemove(Parse *pParse, int iReg, int nReg){
+  int i;
+  int iLast = iReg + nReg - 1;
+  struct yColCache *p;
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    int r = p->iReg;
+    if( r>=iReg && r<=iLast ){
+      cacheEntryClear(pParse, p);
+      p->iReg = 0;
+    }
+  }
+}
+
+/*
+** Remember the current column cache context.  Any new entries added
+** added to the column cache after this call are removed when the
+** corresponding pop occurs.
+*/
+SQLITE_PRIVATE void sqlite3ExprCachePush(Parse *pParse){
+  pParse->iCacheLevel++;
+#ifdef SQLITE_DEBUG
+  if( pParse->db->flags & SQLITE_VdbeAddopTrace ){
+    printf("PUSH to %d\n", pParse->iCacheLevel);
+  }
+#endif
+}
+
+/*
+** Remove from the column cache any entries that were added since the
+** the previous sqlite3ExprCachePush operation.  In other words, restore
+** the cache to the state it was in prior the most recent Push.
+*/
+SQLITE_PRIVATE void sqlite3ExprCachePop(Parse *pParse){
+  int i;
+  struct yColCache *p;
+  assert( pParse->iCacheLevel>=1 );
+  pParse->iCacheLevel--;
+#ifdef SQLITE_DEBUG
+  if( pParse->db->flags & SQLITE_VdbeAddopTrace ){
+    printf("POP  to %d\n", pParse->iCacheLevel);
+  }
+#endif
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    if( p->iReg && p->iLevel>pParse->iCacheLevel ){
+      cacheEntryClear(pParse, p);
+      p->iReg = 0;
+    }
+  }
+}
+
+/*
+** When a cached column is reused, make sure that its register is
+** no longer available as a temp register.  ticket #3879:  that same
+** register might be in the cache in multiple places, so be sure to
+** get them all.
+*/
+static void sqlite3ExprCachePinRegister(Parse *pParse, int iReg){
+  int i;
+  struct yColCache *p;
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    if( p->iReg==iReg ){
+      p->tempReg = 0;
+    }
+  }
+}
+
+/*
+** Generate code to extract the value of the iCol-th column of a table.
+*/
+SQLITE_PRIVATE void sqlite3ExprCodeGetColumnOfTable(
+  Vdbe *v,        /* The VDBE under construction */
+  Table *pTab,    /* The table containing the value */
+  int iTabCur,    /* The table cursor.  Or the PK cursor for WITHOUT ROWID */
+  int iCol,       /* Index of the column to extract */
+  int regOut      /* Extract the value into this register */
+){
+  if( iCol<0 || iCol==pTab->iPKey ){
+    sqlite3VdbeAddOp2(v, OP_Rowid, iTabCur, regOut);
+  }else{
+    int op = IsVirtual(pTab) ? OP_VColumn : OP_Column;
+    int x = iCol;
+    if( !HasRowid(pTab) ){
+      x = sqlite3ColumnOfIndex(sqlite3PrimaryKeyIndex(pTab), iCol);
+    }
+    sqlite3VdbeAddOp3(v, op, iTabCur, x, regOut);
+  }
+  if( iCol>=0 ){
+    sqlite3ColumnDefault(v, pTab, iCol, regOut);
+  }
+}
+
+/*
+** Generate code that will extract the iColumn-th column from
+** table pTab and store the column value in a register.  An effort
+** is made to store the column value in register iReg, but this is
+** not guaranteed.  The location of the column value is returned.
+**
+** There must be an open cursor to pTab in iTable when this routine
+** is called.  If iColumn<0 then code is generated that extracts the rowid.
+*/
+SQLITE_PRIVATE int sqlite3ExprCodeGetColumn(
+  Parse *pParse,   /* Parsing and code generating context */
+  Table *pTab,     /* Description of the table we are reading from */
+  int iColumn,     /* Index of the table column */
+  int iTable,      /* The cursor pointing to the table */
+  int iReg,        /* Store results here */
+  u8 p5            /* P5 value for OP_Column */
+){
+  Vdbe *v = pParse->pVdbe;
+  int i;
+  struct yColCache *p;
+
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    if( p->iReg>0 && p->iTable==iTable && p->iColumn==iColumn ){
+      p->lru = pParse->iCacheCnt++;
+      sqlite3ExprCachePinRegister(pParse, p->iReg);
+      return p->iReg;
+    }
+  }  
+  assert( v!=0 );
+  sqlite3ExprCodeGetColumnOfTable(v, pTab, iTable, iColumn, iReg);
+  if( p5 ){
+    sqlite3VdbeChangeP5(v, p5);
+  }else{   
+    sqlite3ExprCacheStore(pParse, iTable, iColumn, iReg);
+  }
+  return iReg;
+}
+
+/*
+** Clear all column cache entries.
+*/
+SQLITE_PRIVATE void sqlite3ExprCacheClear(Parse *pParse){
+  int i;
+  struct yColCache *p;
+
+#if SQLITE_DEBUG
+  if( pParse->db->flags & SQLITE_VdbeAddopTrace ){
+    printf("CLEAR\n");
+  }
+#endif
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    if( p->iReg ){
+      cacheEntryClear(pParse, p);
+      p->iReg = 0;
+    }
+  }
+}
+
+/*
+** Record the fact that an affinity change has occurred on iCount
+** registers starting with iStart.
+*/
+SQLITE_PRIVATE void sqlite3ExprCacheAffinityChange(Parse *pParse, int iStart, int iCount){
+  sqlite3ExprCacheRemove(pParse, iStart, iCount);
+}
+
+/*
+** Generate code to move content from registers iFrom...iFrom+nReg-1
+** over to iTo..iTo+nReg-1. Keep the column cache up-to-date.
+*/
+SQLITE_PRIVATE void sqlite3ExprCodeMove(Parse *pParse, int iFrom, int iTo, int nReg){
+  assert( iFrom>=iTo+nReg || iFrom+nReg<=iTo );
+  sqlite3VdbeAddOp3(pParse->pVdbe, OP_Move, iFrom, iTo, nReg);
+  sqlite3ExprCacheRemove(pParse, iFrom, nReg);
+}
+
+#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
+/*
+** Return true if any register in the range iFrom..iTo (inclusive)
+** is used as part of the column cache.
+**
+** This routine is used within assert() and testcase() macros only
+** and does not appear in a normal build.
+*/
+static int usedAsColumnCache(Parse *pParse, int iFrom, int iTo){
+  int i;
+  struct yColCache *p;
+  for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+    int r = p->iReg;
+    if( r>=iFrom && r<=iTo ) return 1;    /*NO_TEST*/
+  }
+  return 0;
+}
+#endif /* SQLITE_DEBUG || SQLITE_COVERAGE_TEST */
+
+/*
+** Convert an expression node to a TK_REGISTER
+*/
+static void exprToRegister(Expr *p, int iReg){
+  p->op2 = p->op;
+  p->op = TK_REGISTER;
+  p->iTable = iReg;
+  ExprClearProperty(p, EP_Skip);
+}
+
+/*
+** Generate code into the current Vdbe to evaluate the given
+** expression.  Attempt to store the results in register "target".
+** Return the register where results are stored.
+**
+** With this routine, there is no guarantee that results will
+** be stored in target.  The result might be stored in some other
+** register if it is convenient to do so.  The calling function
+** must check the return code and move the results to the desired
+** register.
+*/
+SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target){
+  Vdbe *v = pParse->pVdbe;  /* The VM under construction */
+  int op;                   /* The opcode being coded */
+  int inReg = target;       /* Results stored in register inReg */
+  int regFree1 = 0;         /* If non-zero free this temporary register */
+  int regFree2 = 0;         /* If non-zero free this temporary register */
+  int r1, r2, r3, r4;       /* Various register numbers */
+  sqlite3 *db = pParse->db; /* The database connection */
+  Expr tempX;               /* Temporary expression node */
+
+  assert( target>0 && target<=pParse->nMem );
+  if( v==0 ){
+    assert( pParse->db->mallocFailed );
+    return 0;
+  }
+
+  if( pExpr==0 ){
+    op = TK_NULL;
+  }else{
+    op = pExpr->op;
+  }
+  switch( op ){
+    case TK_AGG_COLUMN: {
+      AggInfo *pAggInfo = pExpr->pAggInfo;
+      struct AggInfo_col *pCol = &pAggInfo->aCol[pExpr->iAgg];
+      if( !pAggInfo->directMode ){
+        assert( pCol->iMem>0 );
+        inReg = pCol->iMem;
+        break;
+      }else if( pAggInfo->useSortingIdx ){
+        sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdxPTab,
+                              pCol->iSorterColumn, target);
+        break;
+      }
+      /* Otherwise, fall thru into the TK_COLUMN case */
+    }
+    case TK_COLUMN: {
+      int iTab = pExpr->iTable;
+      if( iTab<0 ){
+        if( pParse->ckBase>0 ){
+          /* Generating CHECK constraints or inserting into partial index */
+          inReg = pExpr->iColumn + pParse->ckBase;
+          break;
+        }else{
+          /* Deleting from a partial index */
+          iTab = pParse->iPartIdxTab;
+        }
+      }
+      inReg = sqlite3ExprCodeGetColumn(pParse, pExpr->pTab,
+                               pExpr->iColumn, iTab, target,
+                               pExpr->op2);
+      break;
+    }
+    case TK_INTEGER: {
+      codeInteger(pParse, pExpr, 0, target);
+      break;
+    }
+#ifndef SQLITE_OMIT_FLOATING_POINT
+    case TK_FLOAT: {
+      assert( !ExprHasProperty(pExpr, EP_IntValue) );
+      codeReal(v, pExpr->u.zToken, 0, target);
+      break;
+    }
+#endif
+    case TK_STRING: {
+      assert( !ExprHasProperty(pExpr, EP_IntValue) );
+      sqlite3VdbeAddOp4(v, OP_String8, 0, target, 0, pExpr->u.zToken, 0);
+      break;
+    }
+    case TK_NULL: {
+      sqlite3VdbeAddOp2(v, OP_Null, 0, target);
+      break;
+    }
+#ifndef SQLITE_OMIT_BLOB_LITERAL
+    case TK_BLOB: {
+      int n;
+      const char *z;
+      char *zBlob;
+      assert( !ExprHasProperty(pExpr, EP_IntValue) );
+      assert( pExpr->u.zToken[0]=='x' || pExpr->u.zToken[0]=='X' );
+      assert( pExpr->u.zToken[1]=='\'' );
+      z = &pExpr->u.zToken[2];
+      n = sqlite3Strlen30(z) - 1;
+      assert( z[n]=='\'' );
+      zBlob = sqlite3HexToBlob(sqlite3VdbeDb(v), z, n);
+      sqlite3VdbeAddOp4(v, OP_Blob, n/2, target, 0, zBlob, P4_DYNAMIC);
+      break;
+    }
+#endif
+    case TK_VARIABLE: {
+      assert( !ExprHasProperty(pExpr, EP_IntValue) );
+      assert( pExpr->u.zToken!=0 );
+      assert( pExpr->u.zToken[0]!=0 );
+      sqlite3VdbeAddOp2(v, OP_Variable, pExpr->iColumn, target);
+      if( pExpr->u.zToken[1]!=0 ){
+        assert( pExpr->u.zToken[0]=='?' 
+             || strcmp(pExpr->u.zToken, pParse->azVar[pExpr->iColumn-1])==0 );
+        sqlite3VdbeChangeP4(v, -1, pParse->azVar[pExpr->iColumn-1], P4_STATIC);
+      }
+      break;
+    }
+    case TK_REGISTER: {
+      inReg = pExpr->iTable;
+      break;
+    }
+    case TK_AS: {
+      inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target);
+      break;
+    }
+#ifndef SQLITE_OMIT_CAST
+    case TK_CAST: {
+      /* Expressions of the form:   CAST(pLeft AS token) */
+      inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target);
+      if( inReg!=target ){
+        sqlite3VdbeAddOp2(v, OP_SCopy, inReg, target);
+        inReg = target;
+      }
+      sqlite3VdbeAddOp2(v, OP_Cast, target,
+                        sqlite3AffinityType(pExpr->u.zToken, 0));
+      testcase( usedAsColumnCache(pParse, inReg, inReg) );
+      sqlite3ExprCacheAffinityChange(pParse, inReg, 1);
+      break;
+    }
+#endif /* SQLITE_OMIT_CAST */
+    case TK_LT:
+    case TK_LE:
+    case TK_GT:
+    case TK_GE:
+    case TK_NE:
+    case TK_EQ: {
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, &regFree2);
+      codeCompare(pParse, pExpr->pLeft, pExpr->pRight, op,
+                  r1, r2, inReg, SQLITE_STOREP2);
+      assert(TK_LT==OP_Lt); testcase(op==OP_Lt); VdbeCoverageIf(v,op==OP_Lt);
+      assert(TK_LE==OP_Le); testcase(op==OP_Le); VdbeCoverageIf(v,op==OP_Le);
+      assert(TK_GT==OP_Gt); testcase(op==OP_Gt); VdbeCoverageIf(v,op==OP_Gt);
+      assert(TK_GE==OP_Ge); testcase(op==OP_Ge); VdbeCoverageIf(v,op==OP_Ge);
+      assert(TK_EQ==OP_Eq); testcase(op==OP_Eq); VdbeCoverageIf(v,op==OP_Eq);
+      assert(TK_NE==OP_Ne); testcase(op==OP_Ne); VdbeCoverageIf(v,op==OP_Ne);
+      testcase( regFree1==0 );
+      testcase( regFree2==0 );
+      break;
+    }
+    case TK_IS:
+    case TK_ISNOT: {
+      testcase( op==TK_IS );
+      testcase( op==TK_ISNOT );
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, &regFree2);
+      op = (op==TK_IS) ? TK_EQ : TK_NE;
+      codeCompare(pParse, pExpr->pLeft, pExpr->pRight, op,
+                  r1, r2, inReg, SQLITE_STOREP2 | SQLITE_NULLEQ);
+      VdbeCoverageIf(v, op==TK_EQ);
+      VdbeCoverageIf(v, op==TK_NE);
+      testcase( regFree1==0 );
+      testcase( regFree2==0 );
+      break;
+    }
+    case TK_AND:
+    case TK_OR:
+    case TK_PLUS:
+    case TK_STAR:
+    case TK_MINUS:
+    case TK_REM:
+    case TK_BITAND:
+    case TK_BITOR:
+    case TK_SLASH:
+    case TK_LSHIFT:
+    case TK_RSHIFT: 
+    case TK_CONCAT: {
+      assert( TK_AND==OP_And );            testcase( op==TK_AND );
+      assert( TK_OR==OP_Or );              testcase( op==TK_OR );
+      assert( TK_PLUS==OP_Add );           testcase( op==TK_PLUS );
+      assert( TK_MINUS==OP_Subtract );     testcase( op==TK_MINUS );
+      assert( TK_REM==OP_Remainder );      testcase( op==TK_REM );
+      assert( TK_BITAND==OP_BitAnd );      testcase( op==TK_BITAND );
+      assert( TK_BITOR==OP_BitOr );        testcase( op==TK_BITOR );
+      assert( TK_SLASH==OP_Divide );       testcase( op==TK_SLASH );
+      assert( TK_LSHIFT==OP_ShiftLeft );   testcase( op==TK_LSHIFT );
+      assert( TK_RSHIFT==OP_ShiftRight );  testcase( op==TK_RSHIFT );
+      assert( TK_CONCAT==OP_Concat );      testcase( op==TK_CONCAT );
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, &regFree2);
+      sqlite3VdbeAddOp3(v, op, r2, r1, target);
+      testcase( regFree1==0 );
+      testcase( regFree2==0 );
+      break;
+    }
+    case TK_UMINUS: {
+      Expr *pLeft = pExpr->pLeft;
+      assert( pLeft );
+      if( pLeft->op==TK_INTEGER ){
+        codeInteger(pParse, pLeft, 1, target);
+#ifndef SQLITE_OMIT_FLOATING_POINT
+      }else if( pLeft->op==TK_FLOAT ){
+        assert( !ExprHasProperty(pExpr, EP_IntValue) );
+        codeReal(v, pLeft->u.zToken, 1, target);
+#endif
+      }else{
+        tempX.op = TK_INTEGER;
+        tempX.flags = EP_IntValue|EP_TokenOnly;
+        tempX.u.iValue = 0;
+        r1 = sqlite3ExprCodeTemp(pParse, &tempX, &regFree1);
+        r2 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree2);
+        sqlite3VdbeAddOp3(v, OP_Subtract, r2, r1, target);
+        testcase( regFree2==0 );
+      }
+      inReg = target;
+      break;
+    }
+    case TK_BITNOT:
+    case TK_NOT: {
+      assert( TK_BITNOT==OP_BitNot );   testcase( op==TK_BITNOT );
+      assert( TK_NOT==OP_Not );         testcase( op==TK_NOT );
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      testcase( regFree1==0 );
+      inReg = target;
+      sqlite3VdbeAddOp2(v, op, r1, inReg);
+      break;
+    }
+    case TK_ISNULL:
+    case TK_NOTNULL: {
+      int addr;
+      assert( TK_ISNULL==OP_IsNull );   testcase( op==TK_ISNULL );
+      assert( TK_NOTNULL==OP_NotNull ); testcase( op==TK_NOTNULL );
+      sqlite3VdbeAddOp2(v, OP_Integer, 1, target);
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      testcase( regFree1==0 );
+      addr = sqlite3VdbeAddOp1(v, op, r1);
+      VdbeCoverageIf(v, op==TK_ISNULL);
+      VdbeCoverageIf(v, op==TK_NOTNULL);
+      sqlite3VdbeAddOp2(v, OP_Integer, 0, target);
+      sqlite3VdbeJumpHere(v, addr);
+      break;
+    }
+    case TK_AGG_FUNCTION: {
+      AggInfo *pInfo = pExpr->pAggInfo;
+      if( pInfo==0 ){
+        assert( !ExprHasProperty(pExpr, EP_IntValue) );
+        sqlite3ErrorMsg(pParse, "misuse of aggregate: %s()", pExpr->u.zToken);
+      }else{
+        inReg = pInfo->aFunc[pExpr->iAgg].iMem;
+      }
+      break;
+    }
+    case TK_FUNCTION: {
+      ExprList *pFarg;       /* List of function arguments */
+      int nFarg;             /* Number of function arguments */
+      FuncDef *pDef;         /* The function definition object */
+      int nId;               /* Length of the function name in bytes */
+      const char *zId;       /* The function name */
+      u32 constMask = 0;     /* Mask of function arguments that are constant */
+      int i;                 /* Loop counter */
+      u8 enc = ENC(db);      /* The text encoding used by this database */
+      CollSeq *pColl = 0;    /* A collating sequence */
+
+      assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
+      if( ExprHasProperty(pExpr, EP_TokenOnly) ){
+        pFarg = 0;
+      }else{
+        pFarg = pExpr->x.pList;
+      }
+      nFarg = pFarg ? pFarg->nExpr : 0;
+      assert( !ExprHasProperty(pExpr, EP_IntValue) );
+      zId = pExpr->u.zToken;
+      nId = sqlite3Strlen30(zId);
+      pDef = sqlite3FindFunction(db, zId, nId, nFarg, enc, 0);
+      if( pDef==0 || pDef->xFunc==0 ){
+        sqlite3ErrorMsg(pParse, "unknown function: %.*s()", nId, zId);
+        break;
+      }
+
+      /* Attempt a direct implementation of the built-in COALESCE() and
+      ** IFNULL() functions.  This avoids unnecessary evaluation of
+      ** arguments past the first non-NULL argument.
+      */
+      if( pDef->funcFlags & SQLITE_FUNC_COALESCE ){
+        int endCoalesce = sqlite3VdbeMakeLabel(v);
+        assert( nFarg>=2 );
+        sqlite3ExprCode(pParse, pFarg->a[0].pExpr, target);
+        for(i=1; i<nFarg; i++){
+          sqlite3VdbeAddOp2(v, OP_NotNull, target, endCoalesce);
+          VdbeCoverage(v);
+          sqlite3ExprCacheRemove(pParse, target, 1);
+          sqlite3ExprCachePush(pParse);
+          sqlite3ExprCode(pParse, pFarg->a[i].pExpr, target);
+          sqlite3ExprCachePop(pParse);
+        }
+        sqlite3VdbeResolveLabel(v, endCoalesce);
+        break;
+      }
+
+      /* The UNLIKELY() function is a no-op.  The result is the value
+      ** of the first argument.
+      */
+      if( pDef->funcFlags & SQLITE_FUNC_UNLIKELY ){
+        assert( nFarg>=1 );
+        sqlite3ExprCode(pParse, pFarg->a[0].pExpr, target);
+        break;
+      }
+
+      for(i=0; i<nFarg; i++){
+        if( i<32 && sqlite3ExprIsConstant(pFarg->a[i].pExpr) ){
+          testcase( i==31 );
+          constMask |= MASKBIT32(i);
+        }
+        if( (pDef->funcFlags & SQLITE_FUNC_NEEDCOLL)!=0 && !pColl ){
+          pColl = sqlite3ExprCollSeq(pParse, pFarg->a[i].pExpr);
+        }
+      }
+      if( pFarg ){
+        if( constMask ){
+          r1 = pParse->nMem+1;
+          pParse->nMem += nFarg;
+        }else{
+          r1 = sqlite3GetTempRange(pParse, nFarg);
+        }
+
+        /* For length() and typeof() functions with a column argument,
+        ** set the P5 parameter to the OP_Column opcode to OPFLAG_LENGTHARG
+        ** or OPFLAG_TYPEOFARG respectively, to avoid unnecessary data
+        ** loading.
+        */
+        if( (pDef->funcFlags & (SQLITE_FUNC_LENGTH|SQLITE_FUNC_TYPEOF))!=0 ){
+          u8 exprOp;
+          assert( nFarg==1 );
+          assert( pFarg->a[0].pExpr!=0 );
+          exprOp = pFarg->a[0].pExpr->op;
+          if( exprOp==TK_COLUMN || exprOp==TK_AGG_COLUMN ){
+            assert( SQLITE_FUNC_LENGTH==OPFLAG_LENGTHARG );
+            assert( SQLITE_FUNC_TYPEOF==OPFLAG_TYPEOFARG );
+            testcase( pDef->funcFlags & OPFLAG_LENGTHARG );
+            pFarg->a[0].pExpr->op2 = 
+                  pDef->funcFlags & (OPFLAG_LENGTHARG|OPFLAG_TYPEOFARG);
+          }
+        }
+
+        sqlite3ExprCachePush(pParse);     /* Ticket 2ea2425d34be */
+        sqlite3ExprCodeExprList(pParse, pFarg, r1,
+                                SQLITE_ECEL_DUP|SQLITE_ECEL_FACTOR);
+        sqlite3ExprCachePop(pParse);      /* Ticket 2ea2425d34be */
+      }else{
+        r1 = 0;
+      }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+      /* Possibly overload the function if the first argument is
+      ** a virtual table column.
+      **
+      ** For infix functions (LIKE, GLOB, REGEXP, and MATCH) use the
+      ** second argument, not the first, as the argument to test to
+      ** see if it is a column in a virtual table.  This is done because
+      ** the left operand of infix functions (the operand we want to
+      ** control overloading) ends up as the second argument to the
+      ** function.  The expression "A glob B" is equivalent to 
+      ** "glob(B,A).  We want to use the A in "A glob B" to test
+      ** for function overloading.  But we use the B term in "glob(B,A)".
+      */
+      if( nFarg>=2 && (pExpr->flags & EP_InfixFunc) ){
+        pDef = sqlite3VtabOverloadFunction(db, pDef, nFarg, pFarg->a[1].pExpr);
+      }else if( nFarg>0 ){
+        pDef = sqlite3VtabOverloadFunction(db, pDef, nFarg, pFarg->a[0].pExpr);
+      }
+#endif
+      if( pDef->funcFlags & SQLITE_FUNC_NEEDCOLL ){
+        if( !pColl ) pColl = db->pDfltColl; 
+        sqlite3VdbeAddOp4(v, OP_CollSeq, 0, 0, 0, (char *)pColl, P4_COLLSEQ);
+      }
+      sqlite3VdbeAddOp4(v, OP_Function, constMask, r1, target,
+                        (char*)pDef, P4_FUNCDEF);
+      sqlite3VdbeChangeP5(v, (u8)nFarg);
+      if( nFarg && constMask==0 ){
+        sqlite3ReleaseTempRange(pParse, r1, nFarg);
+      }
+      break;
+    }
+#ifndef SQLITE_OMIT_SUBQUERY
+    case TK_EXISTS:
+    case TK_SELECT: {
+      testcase( op==TK_EXISTS );
+      testcase( op==TK_SELECT );
+      inReg = sqlite3CodeSubselect(pParse, pExpr, 0, 0);
+      break;
+    }
+    case TK_IN: {
+      int destIfFalse = sqlite3VdbeMakeLabel(v);
+      int destIfNull = sqlite3VdbeMakeLabel(v);
+      sqlite3VdbeAddOp2(v, OP_Null, 0, target);
+      sqlite3ExprCodeIN(pParse, pExpr, destIfFalse, destIfNull);
+      sqlite3VdbeAddOp2(v, OP_Integer, 1, target);
+      sqlite3VdbeResolveLabel(v, destIfFalse);
+      sqlite3VdbeAddOp2(v, OP_AddImm, target, 0);
+      sqlite3VdbeResolveLabel(v, destIfNull);
+      break;
+    }
+#endif /* SQLITE_OMIT_SUBQUERY */
+
+
+    /*
+    **    x BETWEEN y AND z
+    **
+    ** This is equivalent to
+    **
+    **    x>=y AND x<=z
+    **
+    ** X is stored in pExpr->pLeft.
+    ** Y is stored in pExpr->pList->a[0].pExpr.
+    ** Z is stored in pExpr->pList->a[1].pExpr.
+    */
+    case TK_BETWEEN: {
+      Expr *pLeft = pExpr->pLeft;
+      struct ExprList_item *pLItem = pExpr->x.pList->a;
+      Expr *pRight = pLItem->pExpr;
+
+      r1 = sqlite3ExprCodeTemp(pParse, pLeft, &regFree1);
+      r2 = sqlite3ExprCodeTemp(pParse, pRight, &regFree2);
+      testcase( regFree1==0 );
+      testcase( regFree2==0 );
+      r3 = sqlite3GetTempReg(pParse);
+      r4 = sqlite3GetTempReg(pParse);
+      codeCompare(pParse, pLeft, pRight, OP_Ge,
+                  r1, r2, r3, SQLITE_STOREP2);  VdbeCoverage(v);
+      pLItem++;
+      pRight = pLItem->pExpr;
+      sqlite3ReleaseTempReg(pParse, regFree2);
+      r2 = sqlite3ExprCodeTemp(pParse, pRight, &regFree2);
+      testcase( regFree2==0 );
+      codeCompare(pParse, pLeft, pRight, OP_Le, r1, r2, r4, SQLITE_STOREP2);
+      VdbeCoverage(v);
+      sqlite3VdbeAddOp3(v, OP_And, r3, r4, target);
+      sqlite3ReleaseTempReg(pParse, r3);
+      sqlite3ReleaseTempReg(pParse, r4);
+      break;
+    }
+    case TK_COLLATE: 
+    case TK_UPLUS: {
+      inReg = sqlite3ExprCodeTarget(pParse, pExpr->pLeft, target);
+      break;
+    }
+
+    case TK_TRIGGER: {
+      /* If the opcode is TK_TRIGGER, then the expression is a reference
+      ** to a column in the new.* or old.* pseudo-tables available to
+      ** trigger programs. In this case Expr.iTable is set to 1 for the
+      ** new.* pseudo-table, or 0 for the old.* pseudo-table. Expr.iColumn
+      ** is set to the column of the pseudo-table to read, or to -1 to
+      ** read the rowid field.
+      **
+      ** The expression is implemented using an OP_Param opcode. The p1
+      ** parameter is set to 0 for an old.rowid reference, or to (i+1)
+      ** to reference another column of the old.* pseudo-table, where 
+      ** i is the index of the column. For a new.rowid reference, p1 is
+      ** set to (n+1), where n is the number of columns in each pseudo-table.
+      ** For a reference to any other column in the new.* pseudo-table, p1
+      ** is set to (n+2+i), where n and i are as defined previously. For
+      ** example, if the table on which triggers are being fired is
+      ** declared as:
+      **
+      **   CREATE TABLE t1(a, b);
+      **
+      ** Then p1 is interpreted as follows:
+      **
+      **   p1==0   ->    old.rowid     p1==3   ->    new.rowid
+      **   p1==1   ->    old.a         p1==4   ->    new.a
+      **   p1==2   ->    old.b         p1==5   ->    new.b       
+      */
+      Table *pTab = pExpr->pTab;
+      int p1 = pExpr->iTable * (pTab->nCol+1) + 1 + pExpr->iColumn;
+
+      assert( pExpr->iTable==0 || pExpr->iTable==1 );
+      assert( pExpr->iColumn>=-1 && pExpr->iColumn<pTab->nCol );
+      assert( pTab->iPKey<0 || pExpr->iColumn!=pTab->iPKey );
+      assert( p1>=0 && p1<(pTab->nCol*2+2) );
+
+      sqlite3VdbeAddOp2(v, OP_Param, p1, target);
+      VdbeComment((v, "%s.%s -> $%d",
+        (pExpr->iTable ? "new" : "old"),
+        (pExpr->iColumn<0 ? "rowid" : pExpr->pTab->aCol[pExpr->iColumn].zName),
+        target
+      ));
+
+#ifndef SQLITE_OMIT_FLOATING_POINT
+      /* If the column has REAL affinity, it may currently be stored as an
+      ** integer. Use OP_RealAffinity to make sure it is really real.
+      **
+      ** EVIDENCE-OF: R-60985-57662 SQLite will convert the value back to
+      ** floating point when extracting it from the record.  */
+      if( pExpr->iColumn>=0 
+       && pTab->aCol[pExpr->iColumn].affinity==SQLITE_AFF_REAL
+      ){
+        sqlite3VdbeAddOp1(v, OP_RealAffinity, target);
+      }
+#endif
+      break;
+    }
+
+
+    /*
+    ** Form A:
+    **   CASE x WHEN e1 THEN r1 WHEN e2 THEN r2 ... WHEN eN THEN rN ELSE y END
+    **
+    ** Form B:
+    **   CASE WHEN e1 THEN r1 WHEN e2 THEN r2 ... WHEN eN THEN rN ELSE y END
+    **
+    ** Form A is can be transformed into the equivalent form B as follows:
+    **   CASE WHEN x=e1 THEN r1 WHEN x=e2 THEN r2 ...
+    **        WHEN x=eN THEN rN ELSE y END
+    **
+    ** X (if it exists) is in pExpr->pLeft.
+    ** Y is in the last element of pExpr->x.pList if pExpr->x.pList->nExpr is
+    ** odd.  The Y is also optional.  If the number of elements in x.pList
+    ** is even, then Y is omitted and the "otherwise" result is NULL.
+    ** Ei is in pExpr->pList->a[i*2] and Ri is pExpr->pList->a[i*2+1].
+    **
+    ** The result of the expression is the Ri for the first matching Ei,
+    ** or if there is no matching Ei, the ELSE term Y, or if there is
+    ** no ELSE term, NULL.
+    */
+    default: assert( op==TK_CASE ); {
+      int endLabel;                     /* GOTO label for end of CASE stmt */
+      int nextCase;                     /* GOTO label for next WHEN clause */
+      int nExpr;                        /* 2x number of WHEN terms */
+      int i;                            /* Loop counter */
+      ExprList *pEList;                 /* List of WHEN terms */
+      struct ExprList_item *aListelem;  /* Array of WHEN terms */
+      Expr opCompare;                   /* The X==Ei expression */
+      Expr *pX;                         /* The X expression */
+      Expr *pTest = 0;                  /* X==Ei (form A) or just Ei (form B) */
+      VVA_ONLY( int iCacheLevel = pParse->iCacheLevel; )
+
+      assert( !ExprHasProperty(pExpr, EP_xIsSelect) && pExpr->x.pList );
+      assert(pExpr->x.pList->nExpr > 0);
+      pEList = pExpr->x.pList;
+      aListelem = pEList->a;
+      nExpr = pEList->nExpr;
+      endLabel = sqlite3VdbeMakeLabel(v);
+      if( (pX = pExpr->pLeft)!=0 ){
+        tempX = *pX;
+        testcase( pX->op==TK_COLUMN );
+        exprToRegister(&tempX, sqlite3ExprCodeTemp(pParse, pX, &regFree1));
+        testcase( regFree1==0 );
+        opCompare.op = TK_EQ;
+        opCompare.pLeft = &tempX;
+        pTest = &opCompare;
+        /* Ticket b351d95f9cd5ef17e9d9dbae18f5ca8611190001:
+        ** The value in regFree1 might get SCopy-ed into the file result.
+        ** So make sure that the regFree1 register is not reused for other
+        ** purposes and possibly overwritten.  */
+        regFree1 = 0;
+      }
+      for(i=0; i<nExpr-1; i=i+2){
+        sqlite3ExprCachePush(pParse);
+        if( pX ){
+          assert( pTest!=0 );
+          opCompare.pRight = aListelem[i].pExpr;
+        }else{
+          pTest = aListelem[i].pExpr;
+        }
+        nextCase = sqlite3VdbeMakeLabel(v);
+        testcase( pTest->op==TK_COLUMN );
+        sqlite3ExprIfFalse(pParse, pTest, nextCase, SQLITE_JUMPIFNULL);
+        testcase( aListelem[i+1].pExpr->op==TK_COLUMN );
+        sqlite3ExprCode(pParse, aListelem[i+1].pExpr, target);
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, endLabel);
+        sqlite3ExprCachePop(pParse);
+        sqlite3VdbeResolveLabel(v, nextCase);
+      }
+      if( (nExpr&1)!=0 ){
+        sqlite3ExprCachePush(pParse);
+        sqlite3ExprCode(pParse, pEList->a[nExpr-1].pExpr, target);
+        sqlite3ExprCachePop(pParse);
+      }else{
+        sqlite3VdbeAddOp2(v, OP_Null, 0, target);
+      }
+      assert( db->mallocFailed || pParse->nErr>0 
+           || pParse->iCacheLevel==iCacheLevel );
+      sqlite3VdbeResolveLabel(v, endLabel);
+      break;
+    }
+#ifndef SQLITE_OMIT_TRIGGER
+    case TK_RAISE: {
+      assert( pExpr->affinity==OE_Rollback 
+           || pExpr->affinity==OE_Abort
+           || pExpr->affinity==OE_Fail
+           || pExpr->affinity==OE_Ignore
+      );
+      if( !pParse->pTriggerTab ){
+        sqlite3ErrorMsg(pParse,
+                       "RAISE() may only be used within a trigger-program");
+        return 0;
+      }
+      if( pExpr->affinity==OE_Abort ){
+        sqlite3MayAbort(pParse);
+      }
+      assert( !ExprHasProperty(pExpr, EP_IntValue) );
+      if( pExpr->affinity==OE_Ignore ){
+        sqlite3VdbeAddOp4(
+            v, OP_Halt, SQLITE_OK, OE_Ignore, 0, pExpr->u.zToken,0);
+        VdbeCoverage(v);
+      }else{
+        sqlite3HaltConstraint(pParse, SQLITE_CONSTRAINT_TRIGGER,
+                              pExpr->affinity, pExpr->u.zToken, 0, 0);
+      }
+
+      break;
+    }
+#endif
+  }
+  sqlite3ReleaseTempReg(pParse, regFree1);
+  sqlite3ReleaseTempReg(pParse, regFree2);
+  return inReg;
+}
+
+/*
+** Factor out the code of the given expression to initialization time.
+*/
+SQLITE_PRIVATE void sqlite3ExprCodeAtInit(
+  Parse *pParse,    /* Parsing context */
+  Expr *pExpr,      /* The expression to code when the VDBE initializes */
+  int regDest,      /* Store the value in this register */
+  u8 reusable       /* True if this expression is reusable */
+){
+  ExprList *p;
+  assert( ConstFactorOk(pParse) );
+  p = pParse->pConstExpr;
+  pExpr = sqlite3ExprDup(pParse->db, pExpr, 0);
+  p = sqlite3ExprListAppend(pParse, p, pExpr);
+  if( p ){
+     struct ExprList_item *pItem = &p->a[p->nExpr-1];
+     pItem->u.iConstExprReg = regDest;
+     pItem->reusable = reusable;
+  }
+  pParse->pConstExpr = p;
+}
+
+/*
+** Generate code to evaluate an expression and store the results
+** into a register.  Return the register number where the results
+** are stored.
+**
+** If the register is a temporary register that can be deallocated,
+** then write its number into *pReg.  If the result register is not
+** a temporary, then set *pReg to zero.
+**
+** If pExpr is a constant, then this routine might generate this
+** code to fill the register in the initialization section of the
+** VDBE program, in order to factor it out of the evaluation loop.
+*/
+SQLITE_PRIVATE int sqlite3ExprCodeTemp(Parse *pParse, Expr *pExpr, int *pReg){
+  int r2;
+  pExpr = sqlite3ExprSkipCollate(pExpr);
+  if( ConstFactorOk(pParse)
+   && pExpr->op!=TK_REGISTER
+   && sqlite3ExprIsConstantNotJoin(pExpr)
+  ){
+    ExprList *p = pParse->pConstExpr;
+    int i;
+    *pReg  = 0;
+    if( p ){
+      struct ExprList_item *pItem;
+      for(pItem=p->a, i=p->nExpr; i>0; pItem++, i--){
+        if( pItem->reusable && sqlite3ExprCompare(pItem->pExpr,pExpr,-1)==0 ){
+          return pItem->u.iConstExprReg;
+        }
+      }
+    }
+    r2 = ++pParse->nMem;
+    sqlite3ExprCodeAtInit(pParse, pExpr, r2, 1);
+  }else{
+    int r1 = sqlite3GetTempReg(pParse);
+    r2 = sqlite3ExprCodeTarget(pParse, pExpr, r1);
+    if( r2==r1 ){
+      *pReg = r1;
+    }else{
+      sqlite3ReleaseTempReg(pParse, r1);
+      *pReg = 0;
+    }
+  }
+  return r2;
+}
+
+/*
+** Generate code that will evaluate expression pExpr and store the
+** results in register target.  The results are guaranteed to appear
+** in register target.
+*/
+SQLITE_PRIVATE void sqlite3ExprCode(Parse *pParse, Expr *pExpr, int target){
+  int inReg;
+
+  assert( target>0 && target<=pParse->nMem );
+  if( pExpr && pExpr->op==TK_REGISTER ){
+    sqlite3VdbeAddOp2(pParse->pVdbe, OP_Copy, pExpr->iTable, target);
+  }else{
+    inReg = sqlite3ExprCodeTarget(pParse, pExpr, target);
+    assert( pParse->pVdbe || pParse->db->mallocFailed );
+    if( inReg!=target && pParse->pVdbe ){
+      sqlite3VdbeAddOp2(pParse->pVdbe, OP_SCopy, inReg, target);
+    }
+  }
+}
+
+/*
+** Generate code that will evaluate expression pExpr and store the
+** results in register target.  The results are guaranteed to appear
+** in register target.  If the expression is constant, then this routine
+** might choose to code the expression at initialization time.
+*/
+SQLITE_PRIVATE void sqlite3ExprCodeFactorable(Parse *pParse, Expr *pExpr, int target){
+  if( pParse->okConstFactor && sqlite3ExprIsConstant(pExpr) ){
+    sqlite3ExprCodeAtInit(pParse, pExpr, target, 0);
+  }else{
+    sqlite3ExprCode(pParse, pExpr, target);
+  }
+}
+
+/*
+** Generate code that evaluates the given expression and puts the result
+** in register target.
+**
+** Also make a copy of the expression results into another "cache" register
+** and modify the expression so that the next time it is evaluated,
+** the result is a copy of the cache register.
+**
+** This routine is used for expressions that are used multiple 
+** times.  They are evaluated once and the results of the expression
+** are reused.
+*/
+SQLITE_PRIVATE void sqlite3ExprCodeAndCache(Parse *pParse, Expr *pExpr, int target){
+  Vdbe *v = pParse->pVdbe;
+  int iMem;
+
+  assert( target>0 );
+  assert( pExpr->op!=TK_REGISTER );
+  sqlite3ExprCode(pParse, pExpr, target);
+  iMem = ++pParse->nMem;
+  sqlite3VdbeAddOp2(v, OP_Copy, target, iMem);
+  exprToRegister(pExpr, iMem);
+}
+
+#ifdef SQLITE_DEBUG
+/*
+** Generate a human-readable explanation of an expression tree.
+*/
+SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 moreToFollow){
+  const char *zBinOp = 0;   /* Binary operator */
+  const char *zUniOp = 0;   /* Unary operator */
+  pView = sqlite3TreeViewPush(pView, moreToFollow);
+  if( pExpr==0 ){
+    sqlite3TreeViewLine(pView, "nil");
+    sqlite3TreeViewPop(pView);
+    return;
+  }
+  switch( pExpr->op ){
+    case TK_AGG_COLUMN: {
+      sqlite3TreeViewLine(pView, "AGG{%d:%d}",
+            pExpr->iTable, pExpr->iColumn);
+      break;
+    }
+    case TK_COLUMN: {
+      if( pExpr->iTable<0 ){
+        /* This only happens when coding check constraints */
+        sqlite3TreeViewLine(pView, "COLUMN(%d)", pExpr->iColumn);
+      }else{
+        sqlite3TreeViewLine(pView, "{%d:%d}",
+                             pExpr->iTable, pExpr->iColumn);
+      }
+      break;
+    }
+    case TK_INTEGER: {
+      if( pExpr->flags & EP_IntValue ){
+        sqlite3TreeViewLine(pView, "%d", pExpr->u.iValue);
+      }else{
+        sqlite3TreeViewLine(pView, "%s", pExpr->u.zToken);
+      }
+      break;
+    }
+#ifndef SQLITE_OMIT_FLOATING_POINT
+    case TK_FLOAT: {
+      sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken);
+      break;
+    }
+#endif
+    case TK_STRING: {
+      sqlite3TreeViewLine(pView,"%Q", pExpr->u.zToken);
+      break;
+    }
+    case TK_NULL: {
+      sqlite3TreeViewLine(pView,"NULL");
+      break;
+    }
+#ifndef SQLITE_OMIT_BLOB_LITERAL
+    case TK_BLOB: {
+      sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken);
+      break;
+    }
+#endif
+    case TK_VARIABLE: {
+      sqlite3TreeViewLine(pView,"VARIABLE(%s,%d)",
+                          pExpr->u.zToken, pExpr->iColumn);
+      break;
+    }
+    case TK_REGISTER: {
+      sqlite3TreeViewLine(pView,"REGISTER(%d)", pExpr->iTable);
+      break;
+    }
+    case TK_AS: {
+      sqlite3TreeViewLine(pView,"AS %Q", pExpr->u.zToken);
+      sqlite3TreeViewExpr(pView, pExpr->pLeft, 0);
+      break;
+    }
+    case TK_ID: {
+      sqlite3TreeViewLine(pView,"ID %Q", pExpr->u.zToken);
+      break;
+    }
+#ifndef SQLITE_OMIT_CAST
+    case TK_CAST: {
+      /* Expressions of the form:   CAST(pLeft AS token) */
+      sqlite3TreeViewLine(pView,"CAST %Q", pExpr->u.zToken);
+      sqlite3TreeViewExpr(pView, pExpr->pLeft, 0);
+      break;
+    }
+#endif /* SQLITE_OMIT_CAST */
+    case TK_LT:      zBinOp = "LT";     break;
+    case TK_LE:      zBinOp = "LE";     break;
+    case TK_GT:      zBinOp = "GT";     break;
+    case TK_GE:      zBinOp = "GE";     break;
+    case TK_NE:      zBinOp = "NE";     break;
+    case TK_EQ:      zBinOp = "EQ";     break;
+    case TK_IS:      zBinOp = "IS";     break;
+    case TK_ISNOT:   zBinOp = "ISNOT";  break;
+    case TK_AND:     zBinOp = "AND";    break;
+    case TK_OR:      zBinOp = "OR";     break;
+    case TK_PLUS:    zBinOp = "ADD";    break;
+    case TK_STAR:    zBinOp = "MUL";    break;
+    case TK_MINUS:   zBinOp = "SUB";    break;
+    case TK_REM:     zBinOp = "REM";    break;
+    case TK_BITAND:  zBinOp = "BITAND"; break;
+    case TK_BITOR:   zBinOp = "BITOR";  break;
+    case TK_SLASH:   zBinOp = "DIV";    break;
+    case TK_LSHIFT:  zBinOp = "LSHIFT"; break;
+    case TK_RSHIFT:  zBinOp = "RSHIFT"; break;
+    case TK_CONCAT:  zBinOp = "CONCAT"; break;
+    case TK_DOT:     zBinOp = "DOT";    break;
+
+    case TK_UMINUS:  zUniOp = "UMINUS"; break;
+    case TK_UPLUS:   zUniOp = "UPLUS";  break;
+    case TK_BITNOT:  zUniOp = "BITNOT"; break;
+    case TK_NOT:     zUniOp = "NOT";    break;
+    case TK_ISNULL:  zUniOp = "ISNULL"; break;
+    case TK_NOTNULL: zUniOp = "NOTNULL"; break;
+
+    case TK_COLLATE: {
+      sqlite3TreeViewLine(pView, "COLLATE %Q", pExpr->u.zToken);
+      sqlite3TreeViewExpr(pView, pExpr->pLeft, 0);
+      break;
+    }
+
+    case TK_AGG_FUNCTION:
+    case TK_FUNCTION: {
+      ExprList *pFarg;       /* List of function arguments */
+      if( ExprHasProperty(pExpr, EP_TokenOnly) ){
+        pFarg = 0;
+      }else{
+        pFarg = pExpr->x.pList;
+      }
+      if( pExpr->op==TK_AGG_FUNCTION ){
+        sqlite3TreeViewLine(pView, "AGG_FUNCTION%d %Q",
+                             pExpr->op2, pExpr->u.zToken);
+      }else{
+        sqlite3TreeViewLine(pView, "FUNCTION %Q", pExpr->u.zToken);
+      }
+      if( pFarg ){
+        sqlite3TreeViewExprList(pView, pFarg, 0, 0);
+      }
+      break;
+    }
+#ifndef SQLITE_OMIT_SUBQUERY
+    case TK_EXISTS: {
+      sqlite3TreeViewLine(pView, "EXISTS-expr");
+      sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0);
+      break;
+    }
+    case TK_SELECT: {
+      sqlite3TreeViewLine(pView, "SELECT-expr");
+      sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0);
+      break;
+    }
+    case TK_IN: {
+      sqlite3TreeViewLine(pView, "IN");
+      sqlite3TreeViewExpr(pView, pExpr->pLeft, 1);
+      if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+        sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0);
+      }else{
+        sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0);
+      }
+      break;
+    }
+#endif /* SQLITE_OMIT_SUBQUERY */
+
+    /*
+    **    x BETWEEN y AND z
+    **
+    ** This is equivalent to
+    **
+    **    x>=y AND x<=z
+    **
+    ** X is stored in pExpr->pLeft.
+    ** Y is stored in pExpr->pList->a[0].pExpr.
+    ** Z is stored in pExpr->pList->a[1].pExpr.
+    */
+    case TK_BETWEEN: {
+      Expr *pX = pExpr->pLeft;
+      Expr *pY = pExpr->x.pList->a[0].pExpr;
+      Expr *pZ = pExpr->x.pList->a[1].pExpr;
+      sqlite3TreeViewLine(pView, "BETWEEN");
+      sqlite3TreeViewExpr(pView, pX, 1);
+      sqlite3TreeViewExpr(pView, pY, 1);
+      sqlite3TreeViewExpr(pView, pZ, 0);
+      break;
+    }
+    case TK_TRIGGER: {
+      /* If the opcode is TK_TRIGGER, then the expression is a reference
+      ** to a column in the new.* or old.* pseudo-tables available to
+      ** trigger programs. In this case Expr.iTable is set to 1 for the
+      ** new.* pseudo-table, or 0 for the old.* pseudo-table. Expr.iColumn
+      ** is set to the column of the pseudo-table to read, or to -1 to
+      ** read the rowid field.
+      */
+      sqlite3TreeViewLine(pView, "%s(%d)", 
+          pExpr->iTable ? "NEW" : "OLD", pExpr->iColumn);
+      break;
+    }
+    case TK_CASE: {
+      sqlite3TreeViewLine(pView, "CASE");
+      sqlite3TreeViewExpr(pView, pExpr->pLeft, 1);
+      sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0);
+      break;
+    }
+#ifndef SQLITE_OMIT_TRIGGER
+    case TK_RAISE: {
+      const char *zType = "unk";
+      switch( pExpr->affinity ){
+        case OE_Rollback:   zType = "rollback";  break;
+        case OE_Abort:      zType = "abort";     break;
+        case OE_Fail:       zType = "fail";      break;
+        case OE_Ignore:     zType = "ignore";    break;
+      }
+      sqlite3TreeViewLine(pView, "RAISE %s(%Q)", zType, pExpr->u.zToken);
+      break;
+    }
+#endif
+    default: {
+      sqlite3TreeViewLine(pView, "op=%d", pExpr->op);
+      break;
+    }
+  }
+  if( zBinOp ){
+    sqlite3TreeViewLine(pView, "%s", zBinOp);
+    sqlite3TreeViewExpr(pView, pExpr->pLeft, 1);
+    sqlite3TreeViewExpr(pView, pExpr->pRight, 0);
+  }else if( zUniOp ){
+    sqlite3TreeViewLine(pView, "%s", zUniOp);
+    sqlite3TreeViewExpr(pView, pExpr->pLeft, 0);
+  }
+  sqlite3TreeViewPop(pView);
+}
+#endif /* SQLITE_DEBUG */
+
+#ifdef SQLITE_DEBUG
+/*
+** Generate a human-readable explanation of an expression list.
+*/
+SQLITE_PRIVATE void sqlite3TreeViewExprList(
+  TreeView *pView,
+  const ExprList *pList,
+  u8 moreToFollow,
+  const char *zLabel
+){
+  int i;
+  pView = sqlite3TreeViewPush(pView, moreToFollow);
+  if( zLabel==0 || zLabel[0]==0 ) zLabel = "LIST";
+  if( pList==0 ){
+    sqlite3TreeViewLine(pView, "%s (empty)", zLabel);
+  }else{
+    sqlite3TreeViewLine(pView, "%s", zLabel);
+    for(i=0; i<pList->nExpr; i++){
+      sqlite3TreeViewExpr(pView, pList->a[i].pExpr, i<pList->nExpr-1);
+#if 0
+     if( pList->a[i].zName ){
+        sqlite3ExplainPrintf(pOut, " AS %s", pList->a[i].zName);
+      }
+      if( pList->a[i].bSpanIsTab ){
+        sqlite3ExplainPrintf(pOut, " (%s)", pList->a[i].zSpan);
+      }
+#endif
+    }
+  }
+  sqlite3TreeViewPop(pView);
+}
+#endif /* SQLITE_DEBUG */
+
+/*
+** Generate code that pushes the value of every element of the given
+** expression list into a sequence of registers beginning at target.
+**
+** Return the number of elements evaluated.
+**
+** The SQLITE_ECEL_DUP flag prevents the arguments from being
+** filled using OP_SCopy.  OP_Copy must be used instead.
+**
+** The SQLITE_ECEL_FACTOR argument allows constant arguments to be
+** factored out into initialization code.
+*/
+SQLITE_PRIVATE int sqlite3ExprCodeExprList(
+  Parse *pParse,     /* Parsing context */
+  ExprList *pList,   /* The expression list to be coded */
+  int target,        /* Where to write results */
+  u8 flags           /* SQLITE_ECEL_* flags */
+){
+  struct ExprList_item *pItem;
+  int i, n;
+  u8 copyOp = (flags & SQLITE_ECEL_DUP) ? OP_Copy : OP_SCopy;
+  assert( pList!=0 );
+  assert( target>0 );
+  assert( pParse->pVdbe!=0 );  /* Never gets this far otherwise */
+  n = pList->nExpr;
+  if( !ConstFactorOk(pParse) ) flags &= ~SQLITE_ECEL_FACTOR;
+  for(pItem=pList->a, i=0; i<n; i++, pItem++){
+    Expr *pExpr = pItem->pExpr;
+    if( (flags & SQLITE_ECEL_FACTOR)!=0 && sqlite3ExprIsConstant(pExpr) ){
+      sqlite3ExprCodeAtInit(pParse, pExpr, target+i, 0);
+    }else{
+      int inReg = sqlite3ExprCodeTarget(pParse, pExpr, target+i);
+      if( inReg!=target+i ){
+        VdbeOp *pOp;
+        Vdbe *v = pParse->pVdbe;
+        if( copyOp==OP_Copy
+         && (pOp=sqlite3VdbeGetOp(v, -1))->opcode==OP_Copy
+         && pOp->p1+pOp->p3+1==inReg
+         && pOp->p2+pOp->p3+1==target+i
+        ){
+          pOp->p3++;
+        }else{
+          sqlite3VdbeAddOp2(v, copyOp, inReg, target+i);
+        }
+      }
+    }
+  }
+  return n;
+}
+
+/*
+** Generate code for a BETWEEN operator.
+**
+**    x BETWEEN y AND z
+**
+** The above is equivalent to 
+**
+**    x>=y AND x<=z
+**
+** Code it as such, taking care to do the common subexpression
+** elimination of x.
+*/
+static void exprCodeBetween(
+  Parse *pParse,    /* Parsing and code generating context */
+  Expr *pExpr,      /* The BETWEEN expression */
+  int dest,         /* Jump here if the jump is taken */
+  int jumpIfTrue,   /* Take the jump if the BETWEEN is true */
+  int jumpIfNull    /* Take the jump if the BETWEEN is NULL */
+){
+  Expr exprAnd;     /* The AND operator in  x>=y AND x<=z  */
+  Expr compLeft;    /* The  x>=y  term */
+  Expr compRight;   /* The  x<=z  term */
+  Expr exprX;       /* The  x  subexpression */
+  int regFree1 = 0; /* Temporary use register */
+
+  assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
+  exprX = *pExpr->pLeft;
+  exprAnd.op = TK_AND;
+  exprAnd.pLeft = &compLeft;
+  exprAnd.pRight = &compRight;
+  compLeft.op = TK_GE;
+  compLeft.pLeft = &exprX;
+  compLeft.pRight = pExpr->x.pList->a[0].pExpr;
+  compRight.op = TK_LE;
+  compRight.pLeft = &exprX;
+  compRight.pRight = pExpr->x.pList->a[1].pExpr;
+  exprToRegister(&exprX, sqlite3ExprCodeTemp(pParse, &exprX, &regFree1));
+  if( jumpIfTrue ){
+    sqlite3ExprIfTrue(pParse, &exprAnd, dest, jumpIfNull);
+  }else{
+    sqlite3ExprIfFalse(pParse, &exprAnd, dest, jumpIfNull);
+  }
+  sqlite3ReleaseTempReg(pParse, regFree1);
+
+  /* Ensure adequate test coverage */
+  testcase( jumpIfTrue==0 && jumpIfNull==0 && regFree1==0 );
+  testcase( jumpIfTrue==0 && jumpIfNull==0 && regFree1!=0 );
+  testcase( jumpIfTrue==0 && jumpIfNull!=0 && regFree1==0 );
+  testcase( jumpIfTrue==0 && jumpIfNull!=0 && regFree1!=0 );
+  testcase( jumpIfTrue!=0 && jumpIfNull==0 && regFree1==0 );
+  testcase( jumpIfTrue!=0 && jumpIfNull==0 && regFree1!=0 );
+  testcase( jumpIfTrue!=0 && jumpIfNull!=0 && regFree1==0 );
+  testcase( jumpIfTrue!=0 && jumpIfNull!=0 && regFree1!=0 );
+}
+
+/*
+** Generate code for a boolean expression such that a jump is made
+** to the label "dest" if the expression is true but execution
+** continues straight thru if the expression is false.
+**
+** If the expression evaluates to NULL (neither true nor false), then
+** take the jump if the jumpIfNull flag is SQLITE_JUMPIFNULL.
+**
+** This code depends on the fact that certain token values (ex: TK_EQ)
+** are the same as opcode values (ex: OP_Eq) that implement the corresponding
+** operation.  Special comments in vdbe.c and the mkopcodeh.awk script in
+** the make process cause these values to align.  Assert()s in the code
+** below verify that the numbers are aligned correctly.
+*/
+SQLITE_PRIVATE void sqlite3ExprIfTrue(Parse *pParse, Expr *pExpr, int dest, int jumpIfNull){
+  Vdbe *v = pParse->pVdbe;
+  int op = 0;
+  int regFree1 = 0;
+  int regFree2 = 0;
+  int r1, r2;
+
+  assert( jumpIfNull==SQLITE_JUMPIFNULL || jumpIfNull==0 );
+  if( NEVER(v==0) )     return;  /* Existence of VDBE checked by caller */
+  if( NEVER(pExpr==0) ) return;  /* No way this can happen */
+  op = pExpr->op;
+  switch( op ){
+    case TK_AND: {
+      int d2 = sqlite3VdbeMakeLabel(v);
+      testcase( jumpIfNull==0 );
+      sqlite3ExprIfFalse(pParse, pExpr->pLeft, d2,jumpIfNull^SQLITE_JUMPIFNULL);
+      sqlite3ExprCachePush(pParse);
+      sqlite3ExprIfTrue(pParse, pExpr->pRight, dest, jumpIfNull);
+      sqlite3VdbeResolveLabel(v, d2);
+      sqlite3ExprCachePop(pParse);
+      break;
+    }
+    case TK_OR: {
+      testcase( jumpIfNull==0 );
+      sqlite3ExprIfTrue(pParse, pExpr->pLeft, dest, jumpIfNull);
+      sqlite3ExprCachePush(pParse);
+      sqlite3ExprIfTrue(pParse, pExpr->pRight, dest, jumpIfNull);
+      sqlite3ExprCachePop(pParse);
+      break;
+    }
+    case TK_NOT: {
+      testcase( jumpIfNull==0 );
+      sqlite3ExprIfFalse(pParse, pExpr->pLeft, dest, jumpIfNull);
+      break;
+    }
+    case TK_LT:
+    case TK_LE:
+    case TK_GT:
+    case TK_GE:
+    case TK_NE:
+    case TK_EQ: {
+      testcase( jumpIfNull==0 );
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, &regFree2);
+      codeCompare(pParse, pExpr->pLeft, pExpr->pRight, op,
+                  r1, r2, dest, jumpIfNull);
+      assert(TK_LT==OP_Lt); testcase(op==OP_Lt); VdbeCoverageIf(v,op==OP_Lt);
+      assert(TK_LE==OP_Le); testcase(op==OP_Le); VdbeCoverageIf(v,op==OP_Le);
+      assert(TK_GT==OP_Gt); testcase(op==OP_Gt); VdbeCoverageIf(v,op==OP_Gt);
+      assert(TK_GE==OP_Ge); testcase(op==OP_Ge); VdbeCoverageIf(v,op==OP_Ge);
+      assert(TK_EQ==OP_Eq); testcase(op==OP_Eq); VdbeCoverageIf(v,op==OP_Eq);
+      assert(TK_NE==OP_Ne); testcase(op==OP_Ne); VdbeCoverageIf(v,op==OP_Ne);
+      testcase( regFree1==0 );
+      testcase( regFree2==0 );
+      break;
+    }
+    case TK_IS:
+    case TK_ISNOT: {
+      testcase( op==TK_IS );
+      testcase( op==TK_ISNOT );
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, &regFree2);
+      op = (op==TK_IS) ? TK_EQ : TK_NE;
+      codeCompare(pParse, pExpr->pLeft, pExpr->pRight, op,
+                  r1, r2, dest, SQLITE_NULLEQ);
+      VdbeCoverageIf(v, op==TK_EQ);
+      VdbeCoverageIf(v, op==TK_NE);
+      testcase( regFree1==0 );
+      testcase( regFree2==0 );
+      break;
+    }
+    case TK_ISNULL:
+    case TK_NOTNULL: {
+      assert( TK_ISNULL==OP_IsNull );   testcase( op==TK_ISNULL );
+      assert( TK_NOTNULL==OP_NotNull ); testcase( op==TK_NOTNULL );
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      sqlite3VdbeAddOp2(v, op, r1, dest);
+      VdbeCoverageIf(v, op==TK_ISNULL);
+      VdbeCoverageIf(v, op==TK_NOTNULL);
+      testcase( regFree1==0 );
+      break;
+    }
+    case TK_BETWEEN: {
+      testcase( jumpIfNull==0 );
+      exprCodeBetween(pParse, pExpr, dest, 1, jumpIfNull);
+      break;
+    }
+#ifndef SQLITE_OMIT_SUBQUERY
+    case TK_IN: {
+      int destIfFalse = sqlite3VdbeMakeLabel(v);
+      int destIfNull = jumpIfNull ? dest : destIfFalse;
+      sqlite3ExprCodeIN(pParse, pExpr, destIfFalse, destIfNull);
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, dest);
+      sqlite3VdbeResolveLabel(v, destIfFalse);
+      break;
+    }
+#endif
+    default: {
+      if( exprAlwaysTrue(pExpr) ){
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, dest);
+      }else if( exprAlwaysFalse(pExpr) ){
+        /* No-op */
+      }else{
+        r1 = sqlite3ExprCodeTemp(pParse, pExpr, &regFree1);
+        sqlite3VdbeAddOp3(v, OP_If, r1, dest, jumpIfNull!=0);
+        VdbeCoverage(v);
+        testcase( regFree1==0 );
+        testcase( jumpIfNull==0 );
+      }
+      break;
+    }
+  }
+  sqlite3ReleaseTempReg(pParse, regFree1);
+  sqlite3ReleaseTempReg(pParse, regFree2);  
+}
+
+/*
+** Generate code for a boolean expression such that a jump is made
+** to the label "dest" if the expression is false but execution
+** continues straight thru if the expression is true.
+**
+** If the expression evaluates to NULL (neither true nor false) then
+** jump if jumpIfNull is SQLITE_JUMPIFNULL or fall through if jumpIfNull
+** is 0.
+*/
+SQLITE_PRIVATE void sqlite3ExprIfFalse(Parse *pParse, Expr *pExpr, int dest, int jumpIfNull){
+  Vdbe *v = pParse->pVdbe;
+  int op = 0;
+  int regFree1 = 0;
+  int regFree2 = 0;
+  int r1, r2;
+
+  assert( jumpIfNull==SQLITE_JUMPIFNULL || jumpIfNull==0 );
+  if( NEVER(v==0) ) return; /* Existence of VDBE checked by caller */
+  if( pExpr==0 )    return;
+
+  /* The value of pExpr->op and op are related as follows:
+  **
+  **       pExpr->op            op
+  **       ---------          ----------
+  **       TK_ISNULL          OP_NotNull
+  **       TK_NOTNULL         OP_IsNull
+  **       TK_NE              OP_Eq
+  **       TK_EQ              OP_Ne
+  **       TK_GT              OP_Le
+  **       TK_LE              OP_Gt
+  **       TK_GE              OP_Lt
+  **       TK_LT              OP_Ge
+  **
+  ** For other values of pExpr->op, op is undefined and unused.
+  ** The value of TK_ and OP_ constants are arranged such that we
+  ** can compute the mapping above using the following expression.
+  ** Assert()s verify that the computation is correct.
+  */
+  op = ((pExpr->op+(TK_ISNULL&1))^1)-(TK_ISNULL&1);
+
+  /* Verify correct alignment of TK_ and OP_ constants
+  */
+  assert( pExpr->op!=TK_ISNULL || op==OP_NotNull );
+  assert( pExpr->op!=TK_NOTNULL || op==OP_IsNull );
+  assert( pExpr->op!=TK_NE || op==OP_Eq );
+  assert( pExpr->op!=TK_EQ || op==OP_Ne );
+  assert( pExpr->op!=TK_LT || op==OP_Ge );
+  assert( pExpr->op!=TK_LE || op==OP_Gt );
+  assert( pExpr->op!=TK_GT || op==OP_Le );
+  assert( pExpr->op!=TK_GE || op==OP_Lt );
+
+  switch( pExpr->op ){
+    case TK_AND: {
+      testcase( jumpIfNull==0 );
+      sqlite3ExprIfFalse(pParse, pExpr->pLeft, dest, jumpIfNull);
+      sqlite3ExprCachePush(pParse);
+      sqlite3ExprIfFalse(pParse, pExpr->pRight, dest, jumpIfNull);
+      sqlite3ExprCachePop(pParse);
+      break;
+    }
+    case TK_OR: {
+      int d2 = sqlite3VdbeMakeLabel(v);
+      testcase( jumpIfNull==0 );
+      sqlite3ExprIfTrue(pParse, pExpr->pLeft, d2, jumpIfNull^SQLITE_JUMPIFNULL);
+      sqlite3ExprCachePush(pParse);
+      sqlite3ExprIfFalse(pParse, pExpr->pRight, dest, jumpIfNull);
+      sqlite3VdbeResolveLabel(v, d2);
+      sqlite3ExprCachePop(pParse);
+      break;
+    }
+    case TK_NOT: {
+      testcase( jumpIfNull==0 );
+      sqlite3ExprIfTrue(pParse, pExpr->pLeft, dest, jumpIfNull);
+      break;
+    }
+    case TK_LT:
+    case TK_LE:
+    case TK_GT:
+    case TK_GE:
+    case TK_NE:
+    case TK_EQ: {
+      testcase( jumpIfNull==0 );
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, &regFree2);
+      codeCompare(pParse, pExpr->pLeft, pExpr->pRight, op,
+                  r1, r2, dest, jumpIfNull);
+      assert(TK_LT==OP_Lt); testcase(op==OP_Lt); VdbeCoverageIf(v,op==OP_Lt);
+      assert(TK_LE==OP_Le); testcase(op==OP_Le); VdbeCoverageIf(v,op==OP_Le);
+      assert(TK_GT==OP_Gt); testcase(op==OP_Gt); VdbeCoverageIf(v,op==OP_Gt);
+      assert(TK_GE==OP_Ge); testcase(op==OP_Ge); VdbeCoverageIf(v,op==OP_Ge);
+      assert(TK_EQ==OP_Eq); testcase(op==OP_Eq); VdbeCoverageIf(v,op==OP_Eq);
+      assert(TK_NE==OP_Ne); testcase(op==OP_Ne); VdbeCoverageIf(v,op==OP_Ne);
+      testcase( regFree1==0 );
+      testcase( regFree2==0 );
+      break;
+    }
+    case TK_IS:
+    case TK_ISNOT: {
+      testcase( pExpr->op==TK_IS );
+      testcase( pExpr->op==TK_ISNOT );
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      r2 = sqlite3ExprCodeTemp(pParse, pExpr->pRight, &regFree2);
+      op = (pExpr->op==TK_IS) ? TK_NE : TK_EQ;
+      codeCompare(pParse, pExpr->pLeft, pExpr->pRight, op,
+                  r1, r2, dest, SQLITE_NULLEQ);
+      VdbeCoverageIf(v, op==TK_EQ);
+      VdbeCoverageIf(v, op==TK_NE);
+      testcase( regFree1==0 );
+      testcase( regFree2==0 );
+      break;
+    }
+    case TK_ISNULL:
+    case TK_NOTNULL: {
+      r1 = sqlite3ExprCodeTemp(pParse, pExpr->pLeft, &regFree1);
+      sqlite3VdbeAddOp2(v, op, r1, dest);
+      testcase( op==TK_ISNULL );   VdbeCoverageIf(v, op==TK_ISNULL);
+      testcase( op==TK_NOTNULL );  VdbeCoverageIf(v, op==TK_NOTNULL);
+      testcase( regFree1==0 );
+      break;
+    }
+    case TK_BETWEEN: {
+      testcase( jumpIfNull==0 );
+      exprCodeBetween(pParse, pExpr, dest, 0, jumpIfNull);
+      break;
+    }
+#ifndef SQLITE_OMIT_SUBQUERY
+    case TK_IN: {
+      if( jumpIfNull ){
+        sqlite3ExprCodeIN(pParse, pExpr, dest, dest);
+      }else{
+        int destIfNull = sqlite3VdbeMakeLabel(v);
+        sqlite3ExprCodeIN(pParse, pExpr, dest, destIfNull);
+        sqlite3VdbeResolveLabel(v, destIfNull);
+      }
+      break;
+    }
+#endif
+    default: {
+      if( exprAlwaysFalse(pExpr) ){
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, dest);
+      }else if( exprAlwaysTrue(pExpr) ){
+        /* no-op */
+      }else{
+        r1 = sqlite3ExprCodeTemp(pParse, pExpr, &regFree1);
+        sqlite3VdbeAddOp3(v, OP_IfNot, r1, dest, jumpIfNull!=0);
+        VdbeCoverage(v);
+        testcase( regFree1==0 );
+        testcase( jumpIfNull==0 );
+      }
+      break;
+    }
+  }
+  sqlite3ReleaseTempReg(pParse, regFree1);
+  sqlite3ReleaseTempReg(pParse, regFree2);
+}
+
+/*
+** Do a deep comparison of two expression trees.  Return 0 if the two
+** expressions are completely identical.  Return 1 if they differ only
+** by a COLLATE operator at the top level.  Return 2 if there are differences
+** other than the top-level COLLATE operator.
+**
+** If any subelement of pB has Expr.iTable==(-1) then it is allowed
+** to compare equal to an equivalent element in pA with Expr.iTable==iTab.
+**
+** The pA side might be using TK_REGISTER.  If that is the case and pB is
+** not using TK_REGISTER but is otherwise equivalent, then still return 0.
+**
+** Sometimes this routine will return 2 even if the two expressions
+** really are equivalent.  If we cannot prove that the expressions are
+** identical, we return 2 just to be safe.  So if this routine
+** returns 2, then you do not really know for certain if the two
+** expressions are the same.  But if you get a 0 or 1 return, then you
+** can be sure the expressions are the same.  In the places where
+** this routine is used, it does not hurt to get an extra 2 - that
+** just might result in some slightly slower code.  But returning
+** an incorrect 0 or 1 could lead to a malfunction.
+*/
+SQLITE_PRIVATE int sqlite3ExprCompare(Expr *pA, Expr *pB, int iTab){
+  u32 combinedFlags;
+  if( pA==0 || pB==0 ){
+    return pB==pA ? 0 : 2;
+  }
+  combinedFlags = pA->flags | pB->flags;
+  if( combinedFlags & EP_IntValue ){
+    if( (pA->flags&pB->flags&EP_IntValue)!=0 && pA->u.iValue==pB->u.iValue ){
+      return 0;
+    }
+    return 2;
+  }
+  if( pA->op!=pB->op ){
+    if( pA->op==TK_COLLATE && sqlite3ExprCompare(pA->pLeft, pB, iTab)<2 ){
+      return 1;
+    }
+    if( pB->op==TK_COLLATE && sqlite3ExprCompare(pA, pB->pLeft, iTab)<2 ){
+      return 1;
+    }
+    return 2;
+  }
+  if( pA->op!=TK_COLUMN && ALWAYS(pA->op!=TK_AGG_COLUMN) && pA->u.zToken ){
+    if( strcmp(pA->u.zToken,pB->u.zToken)!=0 ){
+      return pA->op==TK_COLLATE ? 1 : 2;
+    }
+  }
+  if( (pA->flags & EP_Distinct)!=(pB->flags & EP_Distinct) ) return 2;
+  if( ALWAYS((combinedFlags & EP_TokenOnly)==0) ){
+    if( combinedFlags & EP_xIsSelect ) return 2;
+    if( sqlite3ExprCompare(pA->pLeft, pB->pLeft, iTab) ) return 2;
+    if( sqlite3ExprCompare(pA->pRight, pB->pRight, iTab) ) return 2;
+    if( sqlite3ExprListCompare(pA->x.pList, pB->x.pList, iTab) ) return 2;
+    if( ALWAYS((combinedFlags & EP_Reduced)==0) ){
+      if( pA->iColumn!=pB->iColumn ) return 2;
+      if( pA->iTable!=pB->iTable 
+       && (pA->iTable!=iTab || NEVER(pB->iTable>=0)) ) return 2;
+    }
+  }
+  return 0;
+}
+
+/*
+** Compare two ExprList objects.  Return 0 if they are identical and 
+** non-zero if they differ in any way.
+**
+** If any subelement of pB has Expr.iTable==(-1) then it is allowed
+** to compare equal to an equivalent element in pA with Expr.iTable==iTab.
+**
+** This routine might return non-zero for equivalent ExprLists.  The
+** only consequence will be disabled optimizations.  But this routine
+** must never return 0 if the two ExprList objects are different, or
+** a malfunction will result.
+**
+** Two NULL pointers are considered to be the same.  But a NULL pointer
+** always differs from a non-NULL pointer.
+*/
+SQLITE_PRIVATE int sqlite3ExprListCompare(ExprList *pA, ExprList *pB, int iTab){
+  int i;
+  if( pA==0 && pB==0 ) return 0;
+  if( pA==0 || pB==0 ) return 1;
+  if( pA->nExpr!=pB->nExpr ) return 1;
+  for(i=0; i<pA->nExpr; i++){
+    Expr *pExprA = pA->a[i].pExpr;
+    Expr *pExprB = pB->a[i].pExpr;
+    if( pA->a[i].sortOrder!=pB->a[i].sortOrder ) return 1;
+    if( sqlite3ExprCompare(pExprA, pExprB, iTab) ) return 1;
+  }
+  return 0;
+}
+
+/*
+** Return true if we can prove the pE2 will always be true if pE1 is
+** true.  Return false if we cannot complete the proof or if pE2 might
+** be false.  Examples:
+**
+**     pE1: x==5       pE2: x==5             Result: true
+**     pE1: x>0        pE2: x==5             Result: false
+**     pE1: x=21       pE2: x=21 OR y=43     Result: true
+**     pE1: x!=123     pE2: x IS NOT NULL    Result: true
+**     pE1: x!=?1      pE2: x IS NOT NULL    Result: true
+**     pE1: x IS NULL  pE2: x IS NOT NULL    Result: false
+**     pE1: x IS ?2    pE2: x IS NOT NULL    Reuslt: false
+**
+** When comparing TK_COLUMN nodes between pE1 and pE2, if pE2 has
+** Expr.iTable<0 then assume a table number given by iTab.
+**
+** When in doubt, return false.  Returning true might give a performance
+** improvement.  Returning false might cause a performance reduction, but
+** it will always give the correct answer and is hence always safe.
+*/
+SQLITE_PRIVATE int sqlite3ExprImpliesExpr(Expr *pE1, Expr *pE2, int iTab){
+  if( sqlite3ExprCompare(pE1, pE2, iTab)==0 ){
+    return 1;
+  }
+  if( pE2->op==TK_OR
+   && (sqlite3ExprImpliesExpr(pE1, pE2->pLeft, iTab)
+             || sqlite3ExprImpliesExpr(pE1, pE2->pRight, iTab) )
+  ){
+    return 1;
+  }
+  if( pE2->op==TK_NOTNULL
+   && sqlite3ExprCompare(pE1->pLeft, pE2->pLeft, iTab)==0
+   && (pE1->op!=TK_ISNULL && pE1->op!=TK_IS)
+  ){
+    return 1;
+  }
+  return 0;
+}
+
+/*
+** An instance of the following structure is used by the tree walker
+** to count references to table columns in the arguments of an 
+** aggregate function, in order to implement the
+** sqlite3FunctionThisSrc() routine.
+*/
+struct SrcCount {
+  SrcList *pSrc;   /* One particular FROM clause in a nested query */
+  int nThis;       /* Number of references to columns in pSrcList */
+  int nOther;      /* Number of references to columns in other FROM clauses */
+};
+
+/*
+** Count the number of references to columns.
+*/
+static int exprSrcCount(Walker *pWalker, Expr *pExpr){
+  /* The NEVER() on the second term is because sqlite3FunctionUsesThisSrc()
+  ** is always called before sqlite3ExprAnalyzeAggregates() and so the
+  ** TK_COLUMNs have not yet been converted into TK_AGG_COLUMN.  If
+  ** sqlite3FunctionUsesThisSrc() is used differently in the future, the
+  ** NEVER() will need to be removed. */
+  if( pExpr->op==TK_COLUMN || NEVER(pExpr->op==TK_AGG_COLUMN) ){
+    int i;
+    struct SrcCount *p = pWalker->u.pSrcCount;
+    SrcList *pSrc = p->pSrc;
+    int nSrc = pSrc ? pSrc->nSrc : 0;
+    for(i=0; i<nSrc; i++){
+      if( pExpr->iTable==pSrc->a[i].iCursor ) break;
+    }
+    if( i<nSrc ){
+      p->nThis++;
+    }else{
+      p->nOther++;
+    }
+  }
+  return WRC_Continue;
+}
+
+/*
+** Determine if any of the arguments to the pExpr Function reference
+** pSrcList.  Return true if they do.  Also return true if the function
+** has no arguments or has only constant arguments.  Return false if pExpr
+** references columns but not columns of tables found in pSrcList.
+*/
+SQLITE_PRIVATE int sqlite3FunctionUsesThisSrc(Expr *pExpr, SrcList *pSrcList){
+  Walker w;
+  struct SrcCount cnt;
+  assert( pExpr->op==TK_AGG_FUNCTION );
+  memset(&w, 0, sizeof(w));
+  w.xExprCallback = exprSrcCount;
+  w.u.pSrcCount = &cnt;
+  cnt.pSrc = pSrcList;
+  cnt.nThis = 0;
+  cnt.nOther = 0;
+  sqlite3WalkExprList(&w, pExpr->x.pList);
+  return cnt.nThis>0 || cnt.nOther==0;
+}
+
+/*
+** Add a new element to the pAggInfo->aCol[] array.  Return the index of
+** the new element.  Return a negative number if malloc fails.
+*/
+static int addAggInfoColumn(sqlite3 *db, AggInfo *pInfo){
+  int i;
+  pInfo->aCol = sqlite3ArrayAllocate(
+       db,
+       pInfo->aCol,
+       sizeof(pInfo->aCol[0]),
+       &pInfo->nColumn,
+       &i
+  );
+  return i;
+}    
+
+/*
+** Add a new element to the pAggInfo->aFunc[] array.  Return the index of
+** the new element.  Return a negative number if malloc fails.
+*/
+static int addAggInfoFunc(sqlite3 *db, AggInfo *pInfo){
+  int i;
+  pInfo->aFunc = sqlite3ArrayAllocate(
+       db, 
+       pInfo->aFunc,
+       sizeof(pInfo->aFunc[0]),
+       &pInfo->nFunc,
+       &i
+  );
+  return i;
+}    
+
+/*
+** This is the xExprCallback for a tree walker.  It is used to
+** implement sqlite3ExprAnalyzeAggregates().  See sqlite3ExprAnalyzeAggregates
+** for additional information.
+*/
+static int analyzeAggregate(Walker *pWalker, Expr *pExpr){
+  int i;
+  NameContext *pNC = pWalker->u.pNC;
+  Parse *pParse = pNC->pParse;
+  SrcList *pSrcList = pNC->pSrcList;
+  AggInfo *pAggInfo = pNC->pAggInfo;
+
+  switch( pExpr->op ){
+    case TK_AGG_COLUMN:
+    case TK_COLUMN: {
+      testcase( pExpr->op==TK_AGG_COLUMN );
+      testcase( pExpr->op==TK_COLUMN );
+      /* Check to see if the column is in one of the tables in the FROM
+      ** clause of the aggregate query */
+      if( ALWAYS(pSrcList!=0) ){
+        struct SrcList_item *pItem = pSrcList->a;
+        for(i=0; i<pSrcList->nSrc; i++, pItem++){
+          struct AggInfo_col *pCol;
+          assert( !ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced) );
+          if( pExpr->iTable==pItem->iCursor ){
+            /* If we reach this point, it means that pExpr refers to a table
+            ** that is in the FROM clause of the aggregate query.  
+            **
+            ** Make an entry for the column in pAggInfo->aCol[] if there
+            ** is not an entry there already.
+            */
+            int k;
+            pCol = pAggInfo->aCol;
+            for(k=0; k<pAggInfo->nColumn; k++, pCol++){
+              if( pCol->iTable==pExpr->iTable &&
+                  pCol->iColumn==pExpr->iColumn ){
+                break;
+              }
+            }
+            if( (k>=pAggInfo->nColumn)
+             && (k = addAggInfoColumn(pParse->db, pAggInfo))>=0 
+            ){
+              pCol = &pAggInfo->aCol[k];
+              pCol->pTab = pExpr->pTab;
+              pCol->iTable = pExpr->iTable;
+              pCol->iColumn = pExpr->iColumn;
+              pCol->iMem = ++pParse->nMem;
+              pCol->iSorterColumn = -1;
+              pCol->pExpr = pExpr;
+              if( pAggInfo->pGroupBy ){
+                int j, n;
+                ExprList *pGB = pAggInfo->pGroupBy;
+                struct ExprList_item *pTerm = pGB->a;
+                n = pGB->nExpr;
+                for(j=0; j<n; j++, pTerm++){
+                  Expr *pE = pTerm->pExpr;
+                  if( pE->op==TK_COLUMN && pE->iTable==pExpr->iTable &&
+                      pE->iColumn==pExpr->iColumn ){
+                    pCol->iSorterColumn = j;
+                    break;
+                  }
+                }
+              }
+              if( pCol->iSorterColumn<0 ){
+                pCol->iSorterColumn = pAggInfo->nSortingColumn++;
+              }
+            }
+            /* There is now an entry for pExpr in pAggInfo->aCol[] (either
+            ** because it was there before or because we just created it).
+            ** Convert the pExpr to be a TK_AGG_COLUMN referring to that
+            ** pAggInfo->aCol[] entry.
+            */
+            ExprSetVVAProperty(pExpr, EP_NoReduce);
+            pExpr->pAggInfo = pAggInfo;
+            pExpr->op = TK_AGG_COLUMN;
+            pExpr->iAgg = (i16)k;
+            break;
+          } /* endif pExpr->iTable==pItem->iCursor */
+        } /* end loop over pSrcList */
+      }
+      return WRC_Prune;
+    }
+    case TK_AGG_FUNCTION: {
+      if( (pNC->ncFlags & NC_InAggFunc)==0
+       && pWalker->walkerDepth==pExpr->op2
+      ){
+        /* Check to see if pExpr is a duplicate of another aggregate 
+        ** function that is already in the pAggInfo structure
+        */
+        struct AggInfo_func *pItem = pAggInfo->aFunc;
+        for(i=0; i<pAggInfo->nFunc; i++, pItem++){
+          if( sqlite3ExprCompare(pItem->pExpr, pExpr, -1)==0 ){
+            break;
+          }
+        }
+        if( i>=pAggInfo->nFunc ){
+          /* pExpr is original.  Make a new entry in pAggInfo->aFunc[]
+          */
+          u8 enc = ENC(pParse->db);
+          i = addAggInfoFunc(pParse->db, pAggInfo);
+          if( i>=0 ){
+            assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
+            pItem = &pAggInfo->aFunc[i];
+            pItem->pExpr = pExpr;
+            pItem->iMem = ++pParse->nMem;
+            assert( !ExprHasProperty(pExpr, EP_IntValue) );
+            pItem->pFunc = sqlite3FindFunction(pParse->db,
+                   pExpr->u.zToken, sqlite3Strlen30(pExpr->u.zToken),
+                   pExpr->x.pList ? pExpr->x.pList->nExpr : 0, enc, 0);
+            if( pExpr->flags & EP_Distinct ){
+              pItem->iDistinct = pParse->nTab++;
+            }else{
+              pItem->iDistinct = -1;
+            }
+          }
+        }
+        /* Make pExpr point to the appropriate pAggInfo->aFunc[] entry
+        */
+        assert( !ExprHasProperty(pExpr, EP_TokenOnly|EP_Reduced) );
+        ExprSetVVAProperty(pExpr, EP_NoReduce);
+        pExpr->iAgg = (i16)i;
+        pExpr->pAggInfo = pAggInfo;
+        return WRC_Prune;
+      }else{
+        return WRC_Continue;
+      }
+    }
+  }
+  return WRC_Continue;
+}
+static int analyzeAggregatesInSelect(Walker *pWalker, Select *pSelect){
+  UNUSED_PARAMETER(pWalker);
+  UNUSED_PARAMETER(pSelect);
+  return WRC_Continue;
+}
+
+/*
+** Analyze the pExpr expression looking for aggregate functions and
+** for variables that need to be added to AggInfo object that pNC->pAggInfo
+** points to.  Additional entries are made on the AggInfo object as
+** necessary.
+**
+** This routine should only be called after the expression has been
+** analyzed by sqlite3ResolveExprNames().
+*/
+SQLITE_PRIVATE void sqlite3ExprAnalyzeAggregates(NameContext *pNC, Expr *pExpr){
+  Walker w;
+  memset(&w, 0, sizeof(w));
+  w.xExprCallback = analyzeAggregate;
+  w.xSelectCallback = analyzeAggregatesInSelect;
+  w.u.pNC = pNC;
+  assert( pNC->pSrcList!=0 );
+  sqlite3WalkExpr(&w, pExpr);
+}
+
+/*
+** Call sqlite3ExprAnalyzeAggregates() for every expression in an
+** expression list.  Return the number of errors.
+**
+** If an error is found, the analysis is cut short.
+*/
+SQLITE_PRIVATE void sqlite3ExprAnalyzeAggList(NameContext *pNC, ExprList *pList){
+  struct ExprList_item *pItem;
+  int i;
+  if( pList ){
+    for(pItem=pList->a, i=0; i<pList->nExpr; i++, pItem++){
+      sqlite3ExprAnalyzeAggregates(pNC, pItem->pExpr);
+    }
+  }
+}
+
+/*
+** Allocate a single new register for use to hold some intermediate result.
+*/
+SQLITE_PRIVATE int sqlite3GetTempReg(Parse *pParse){
+  if( pParse->nTempReg==0 ){
+    return ++pParse->nMem;
+  }
+  return pParse->aTempReg[--pParse->nTempReg];
+}
+
+/*
+** Deallocate a register, making available for reuse for some other
+** purpose.
+**
+** If a register is currently being used by the column cache, then
+** the deallocation is deferred until the column cache line that uses
+** the register becomes stale.
+*/
+SQLITE_PRIVATE void sqlite3ReleaseTempReg(Parse *pParse, int iReg){
+  if( iReg && pParse->nTempReg<ArraySize(pParse->aTempReg) ){
+    int i;
+    struct yColCache *p;
+    for(i=0, p=pParse->aColCache; i<SQLITE_N_COLCACHE; i++, p++){
+      if( p->iReg==iReg ){
+        p->tempReg = 1;
+        return;
+      }
+    }
+    pParse->aTempReg[pParse->nTempReg++] = iReg;
+  }
+}
+
+/*
+** Allocate or deallocate a block of nReg consecutive registers
+*/
+SQLITE_PRIVATE int sqlite3GetTempRange(Parse *pParse, int nReg){
+  int i, n;
+  i = pParse->iRangeReg;
+  n = pParse->nRangeReg;
+  if( nReg<=n ){
+    assert( !usedAsColumnCache(pParse, i, i+n-1) );
+    pParse->iRangeReg += nReg;
+    pParse->nRangeReg -= nReg;
+  }else{
+    i = pParse->nMem+1;
+    pParse->nMem += nReg;
+  }
+  return i;
+}
+SQLITE_PRIVATE void sqlite3ReleaseTempRange(Parse *pParse, int iReg, int nReg){
+  sqlite3ExprCacheRemove(pParse, iReg, nReg);
+  if( nReg>pParse->nRangeReg ){
+    pParse->nRangeReg = nReg;
+    pParse->iRangeReg = iReg;
+  }
+}
+
+/*
+** Mark all temporary registers as being unavailable for reuse.
+*/
+SQLITE_PRIVATE void sqlite3ClearTempRegCache(Parse *pParse){
+  pParse->nTempReg = 0;
+  pParse->nRangeReg = 0;
+}
+
+/************** End of expr.c ************************************************/
+/************** Begin file alter.c *******************************************/
+/*
+** 2005 February 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains C code routines that used to generate VDBE code
+** that implements the ALTER TABLE command.
+*/
+
+/*
+** The code in this file only exists if we are not omitting the
+** ALTER TABLE logic from the build.
+*/
+#ifndef SQLITE_OMIT_ALTERTABLE
+
+
+/*
+** This function is used by SQL generated to implement the 
+** ALTER TABLE command. The first argument is the text of a CREATE TABLE or
+** CREATE INDEX command. The second is a table name. The table name in 
+** the CREATE TABLE or CREATE INDEX statement is replaced with the third
+** argument and the result returned. Examples:
+**
+** sqlite_rename_table('CREATE TABLE abc(a, b, c)', 'def')
+**     -> 'CREATE TABLE def(a, b, c)'
+**
+** sqlite_rename_table('CREATE INDEX i ON abc(a)', 'def')
+**     -> 'CREATE INDEX i ON def(a, b, c)'
+*/
+static void renameTableFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **argv
+){
+  unsigned char const *zSql = sqlite3_value_text(argv[0]);
+  unsigned char const *zTableName = sqlite3_value_text(argv[1]);
+
+  int token;
+  Token tname;
+  unsigned char const *zCsr = zSql;
+  int len = 0;
+  char *zRet;
+
+  sqlite3 *db = sqlite3_context_db_handle(context);
+
+  UNUSED_PARAMETER(NotUsed);
+
+  /* The principle used to locate the table name in the CREATE TABLE 
+  ** statement is that the table name is the first non-space token that
+  ** is immediately followed by a TK_LP or TK_USING token.
+  */
+  if( zSql ){
+    do {
+      if( !*zCsr ){
+        /* Ran out of input before finding an opening bracket. Return NULL. */
+        return;
+      }
+
+      /* Store the token that zCsr points to in tname. */
+      tname.z = (char*)zCsr;
+      tname.n = len;
+
+      /* Advance zCsr to the next token. Store that token type in 'token',
+      ** and its length in 'len' (to be used next iteration of this loop).
+      */
+      do {
+        zCsr += len;
+        len = sqlite3GetToken(zCsr, &token);
+      } while( token==TK_SPACE );
+      assert( len>0 );
+    } while( token!=TK_LP && token!=TK_USING );
+
+    zRet = sqlite3MPrintf(db, "%.*s\"%w\"%s", (int)(((u8*)tname.z) - zSql),
+       zSql, zTableName, tname.z+tname.n);
+    sqlite3_result_text(context, zRet, -1, SQLITE_DYNAMIC);
+  }
+}
+
+/*
+** This C function implements an SQL user function that is used by SQL code
+** generated by the ALTER TABLE ... RENAME command to modify the definition
+** of any foreign key constraints that use the table being renamed as the 
+** parent table. It is passed three arguments:
+**
+**   1) The complete text of the CREATE TABLE statement being modified,
+**   2) The old name of the table being renamed, and
+**   3) The new name of the table being renamed.
+**
+** It returns the new CREATE TABLE statement. For example:
+**
+**   sqlite_rename_parent('CREATE TABLE t1(a REFERENCES t2)', 't2', 't3')
+**       -> 'CREATE TABLE t1(a REFERENCES t3)'
+*/
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+static void renameParentFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **argv
+){
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  char *zOutput = 0;
+  char *zResult;
+  unsigned char const *zInput = sqlite3_value_text(argv[0]);
+  unsigned char const *zOld = sqlite3_value_text(argv[1]);
+  unsigned char const *zNew = sqlite3_value_text(argv[2]);
+
+  unsigned const char *z;         /* Pointer to token */
+  int n;                          /* Length of token z */
+  int token;                      /* Type of token */
+
+  UNUSED_PARAMETER(NotUsed);
+  if( zInput==0 || zOld==0 ) return;
+  for(z=zInput; *z; z=z+n){
+    n = sqlite3GetToken(z, &token);
+    if( token==TK_REFERENCES ){
+      char *zParent;
+      do {
+        z += n;
+        n = sqlite3GetToken(z, &token);
+      }while( token==TK_SPACE );
+
+      zParent = sqlite3DbStrNDup(db, (const char *)z, n);
+      if( zParent==0 ) break;
+      sqlite3Dequote(zParent);
+      if( 0==sqlite3StrICmp((const char *)zOld, zParent) ){
+        char *zOut = sqlite3MPrintf(db, "%s%.*s\"%w\"", 
+            (zOutput?zOutput:""), (int)(z-zInput), zInput, (const char *)zNew
+        );
+        sqlite3DbFree(db, zOutput);
+        zOutput = zOut;
+        zInput = &z[n];
+      }
+      sqlite3DbFree(db, zParent);
+    }
+  }
+
+  zResult = sqlite3MPrintf(db, "%s%s", (zOutput?zOutput:""), zInput), 
+  sqlite3_result_text(context, zResult, -1, SQLITE_DYNAMIC);
+  sqlite3DbFree(db, zOutput);
+}
+#endif
+
+#ifndef SQLITE_OMIT_TRIGGER
+/* This function is used by SQL generated to implement the
+** ALTER TABLE command. The first argument is the text of a CREATE TRIGGER 
+** statement. The second is a table name. The table name in the CREATE 
+** TRIGGER statement is replaced with the third argument and the result 
+** returned. This is analagous to renameTableFunc() above, except for CREATE
+** TRIGGER, not CREATE INDEX and CREATE TABLE.
+*/
+static void renameTriggerFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **argv
+){
+  unsigned char const *zSql = sqlite3_value_text(argv[0]);
+  unsigned char const *zTableName = sqlite3_value_text(argv[1]);
+
+  int token;
+  Token tname;
+  int dist = 3;
+  unsigned char const *zCsr = zSql;
+  int len = 0;
+  char *zRet;
+  sqlite3 *db = sqlite3_context_db_handle(context);
+
+  UNUSED_PARAMETER(NotUsed);
+
+  /* The principle used to locate the table name in the CREATE TRIGGER 
+  ** statement is that the table name is the first token that is immediately
+  ** preceded by either TK_ON or TK_DOT and immediately followed by one
+  ** of TK_WHEN, TK_BEGIN or TK_FOR.
+  */
+  if( zSql ){
+    do {
+
+      if( !*zCsr ){
+        /* Ran out of input before finding the table name. Return NULL. */
+        return;
+      }
+
+      /* Store the token that zCsr points to in tname. */
+      tname.z = (char*)zCsr;
+      tname.n = len;
+
+      /* Advance zCsr to the next token. Store that token type in 'token',
+      ** and its length in 'len' (to be used next iteration of this loop).
+      */
+      do {
+        zCsr += len;
+        len = sqlite3GetToken(zCsr, &token);
+      }while( token==TK_SPACE );
+      assert( len>0 );
+
+      /* Variable 'dist' stores the number of tokens read since the most
+      ** recent TK_DOT or TK_ON. This means that when a WHEN, FOR or BEGIN 
+      ** token is read and 'dist' equals 2, the condition stated above
+      ** to be met.
+      **
+      ** Note that ON cannot be a database, table or column name, so
+      ** there is no need to worry about syntax like 
+      ** "CREATE TRIGGER ... ON ON.ON BEGIN ..." etc.
+      */
+      dist++;
+      if( token==TK_DOT || token==TK_ON ){
+        dist = 0;
+      }
+    } while( dist!=2 || (token!=TK_WHEN && token!=TK_FOR && token!=TK_BEGIN) );
+
+    /* Variable tname now contains the token that is the old table-name
+    ** in the CREATE TRIGGER statement.
+    */
+    zRet = sqlite3MPrintf(db, "%.*s\"%w\"%s", (int)(((u8*)tname.z) - zSql),
+       zSql, zTableName, tname.z+tname.n);
+    sqlite3_result_text(context, zRet, -1, SQLITE_DYNAMIC);
+  }
+}
+#endif   /* !SQLITE_OMIT_TRIGGER */
+
+/*
+** Register built-in functions used to help implement ALTER TABLE
+*/
+SQLITE_PRIVATE void sqlite3AlterFunctions(void){
+  static SQLITE_WSD FuncDef aAlterTableFuncs[] = {
+    FUNCTION(sqlite_rename_table,   2, 0, 0, renameTableFunc),
+#ifndef SQLITE_OMIT_TRIGGER
+    FUNCTION(sqlite_rename_trigger, 2, 0, 0, renameTriggerFunc),
+#endif
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+    FUNCTION(sqlite_rename_parent,  3, 0, 0, renameParentFunc),
+#endif
+  };
+  int i;
+  FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions);
+  FuncDef *aFunc = (FuncDef*)&GLOBAL(FuncDef, aAlterTableFuncs);
+
+  for(i=0; i<ArraySize(aAlterTableFuncs); i++){
+    sqlite3FuncDefInsert(pHash, &aFunc[i]);
+  }
+}
+
+/*
+** This function is used to create the text of expressions of the form:
+**
+**   name=<constant1> OR name=<constant2> OR ...
+**
+** If argument zWhere is NULL, then a pointer string containing the text 
+** "name=<constant>" is returned, where <constant> is the quoted version
+** of the string passed as argument zConstant. The returned buffer is
+** allocated using sqlite3DbMalloc(). It is the responsibility of the
+** caller to ensure that it is eventually freed.
+**
+** If argument zWhere is not NULL, then the string returned is 
+** "<where> OR name=<constant>", where <where> is the contents of zWhere.
+** In this case zWhere is passed to sqlite3DbFree() before returning.
+** 
+*/
+static char *whereOrName(sqlite3 *db, char *zWhere, char *zConstant){
+  char *zNew;
+  if( !zWhere ){
+    zNew = sqlite3MPrintf(db, "name=%Q", zConstant);
+  }else{
+    zNew = sqlite3MPrintf(db, "%s OR name=%Q", zWhere, zConstant);
+    sqlite3DbFree(db, zWhere);
+  }
+  return zNew;
+}
+
+#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER)
+/*
+** Generate the text of a WHERE expression which can be used to select all
+** tables that have foreign key constraints that refer to table pTab (i.e.
+** constraints for which pTab is the parent table) from the sqlite_master
+** table.
+*/
+static char *whereForeignKeys(Parse *pParse, Table *pTab){
+  FKey *p;
+  char *zWhere = 0;
+  for(p=sqlite3FkReferences(pTab); p; p=p->pNextTo){
+    zWhere = whereOrName(pParse->db, zWhere, p->pFrom->zName);
+  }
+  return zWhere;
+}
+#endif
+
+/*
+** Generate the text of a WHERE expression which can be used to select all
+** temporary triggers on table pTab from the sqlite_temp_master table. If
+** table pTab has no temporary triggers, or is itself stored in the 
+** temporary database, NULL is returned.
+*/
+static char *whereTempTriggers(Parse *pParse, Table *pTab){
+  Trigger *pTrig;
+  char *zWhere = 0;
+  const Schema *pTempSchema = pParse->db->aDb[1].pSchema; /* Temp db schema */
+
+  /* If the table is not located in the temp-db (in which case NULL is 
+  ** returned, loop through the tables list of triggers. For each trigger
+  ** that is not part of the temp-db schema, add a clause to the WHERE 
+  ** expression being built up in zWhere.
+  */
+  if( pTab->pSchema!=pTempSchema ){
+    sqlite3 *db = pParse->db;
+    for(pTrig=sqlite3TriggerList(pParse, pTab); pTrig; pTrig=pTrig->pNext){
+      if( pTrig->pSchema==pTempSchema ){
+        zWhere = whereOrName(db, zWhere, pTrig->zName);
+      }
+    }
+  }
+  if( zWhere ){
+    char *zNew = sqlite3MPrintf(pParse->db, "type='trigger' AND (%s)", zWhere);
+    sqlite3DbFree(pParse->db, zWhere);
+    zWhere = zNew;
+  }
+  return zWhere;
+}
+
+/*
+** Generate code to drop and reload the internal representation of table
+** pTab from the database, including triggers and temporary triggers.
+** Argument zName is the name of the table in the database schema at
+** the time the generated code is executed. This can be different from
+** pTab->zName if this function is being called to code part of an 
+** "ALTER TABLE RENAME TO" statement.
+*/
+static void reloadTableSchema(Parse *pParse, Table *pTab, const char *zName){
+  Vdbe *v;
+  char *zWhere;
+  int iDb;                   /* Index of database containing pTab */
+#ifndef SQLITE_OMIT_TRIGGER
+  Trigger *pTrig;
+#endif
+
+  v = sqlite3GetVdbe(pParse);
+  if( NEVER(v==0) ) return;
+  assert( sqlite3BtreeHoldsAllMutexes(pParse->db) );
+  iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+  assert( iDb>=0 );
+
+#ifndef SQLITE_OMIT_TRIGGER
+  /* Drop any table triggers from the internal schema. */
+  for(pTrig=sqlite3TriggerList(pParse, pTab); pTrig; pTrig=pTrig->pNext){
+    int iTrigDb = sqlite3SchemaToIndex(pParse->db, pTrig->pSchema);
+    assert( iTrigDb==iDb || iTrigDb==1 );
+    sqlite3VdbeAddOp4(v, OP_DropTrigger, iTrigDb, 0, 0, pTrig->zName, 0);
+  }
+#endif
+
+  /* Drop the table and index from the internal schema.  */
+  sqlite3VdbeAddOp4(v, OP_DropTable, iDb, 0, 0, pTab->zName, 0);
+
+  /* Reload the table, index and permanent trigger schemas. */
+  zWhere = sqlite3MPrintf(pParse->db, "tbl_name=%Q", zName);
+  if( !zWhere ) return;
+  sqlite3VdbeAddParseSchemaOp(v, iDb, zWhere);
+
+#ifndef SQLITE_OMIT_TRIGGER
+  /* Now, if the table is not stored in the temp database, reload any temp 
+  ** triggers. Don't use IN(...) in case SQLITE_OMIT_SUBQUERY is defined. 
+  */
+  if( (zWhere=whereTempTriggers(pParse, pTab))!=0 ){
+    sqlite3VdbeAddParseSchemaOp(v, 1, zWhere);
+  }
+#endif
+}
+
+/*
+** Parameter zName is the name of a table that is about to be altered
+** (either with ALTER TABLE ... RENAME TO or ALTER TABLE ... ADD COLUMN).
+** If the table is a system table, this function leaves an error message
+** in pParse->zErr (system tables may not be altered) and returns non-zero.
+**
+** Or, if zName is not a system table, zero is returned.
+*/
+static int isSystemTable(Parse *pParse, const char *zName){
+  if( sqlite3Strlen30(zName)>6 && 0==sqlite3StrNICmp(zName, "sqlite_", 7) ){
+    sqlite3ErrorMsg(pParse, "table %s may not be altered", zName);
+    return 1;
+  }
+  return 0;
+}
+
+/*
+** Generate code to implement the "ALTER TABLE xxx RENAME TO yyy" 
+** command. 
+*/
+SQLITE_PRIVATE void sqlite3AlterRenameTable(
+  Parse *pParse,            /* Parser context. */
+  SrcList *pSrc,            /* The table to rename. */
+  Token *pName              /* The new table name. */
+){
+  int iDb;                  /* Database that contains the table */
+  char *zDb;                /* Name of database iDb */
+  Table *pTab;              /* Table being renamed */
+  char *zName = 0;          /* NULL-terminated version of pName */ 
+  sqlite3 *db = pParse->db; /* Database connection */
+  int nTabName;             /* Number of UTF-8 characters in zTabName */
+  const char *zTabName;     /* Original name of the table */
+  Vdbe *v;
+#ifndef SQLITE_OMIT_TRIGGER
+  char *zWhere = 0;         /* Where clause to locate temp triggers */
+#endif
+  VTable *pVTab = 0;        /* Non-zero if this is a v-tab with an xRename() */
+  int savedDbFlags;         /* Saved value of db->flags */
+
+  savedDbFlags = db->flags;  
+  if( NEVER(db->mallocFailed) ) goto exit_rename_table;
+  assert( pSrc->nSrc==1 );
+  assert( sqlite3BtreeHoldsAllMutexes(pParse->db) );
+
+  pTab = sqlite3LocateTableItem(pParse, 0, &pSrc->a[0]);
+  if( !pTab ) goto exit_rename_table;
+  iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+  zDb = db->aDb[iDb].zName;
+  db->flags |= SQLITE_PreferBuiltin;
+
+  /* Get a NULL terminated version of the new table name. */
+  zName = sqlite3NameFromToken(db, pName);
+  if( !zName ) goto exit_rename_table;
+
+  /* Check that a table or index named 'zName' does not already exist
+  ** in database iDb. If so, this is an error.
+  */
+  if( sqlite3FindTable(db, zName, zDb) || sqlite3FindIndex(db, zName, zDb) ){
+    sqlite3ErrorMsg(pParse, 
+        "there is already another table or index with this name: %s", zName);
+    goto exit_rename_table;
+  }
+
+  /* Make sure it is not a system table being altered, or a reserved name
+  ** that the table is being renamed to.
+  */
+  if( SQLITE_OK!=isSystemTable(pParse, pTab->zName) ){
+    goto exit_rename_table;
+  }
+  if( SQLITE_OK!=sqlite3CheckObjectName(pParse, zName) ){ goto
+    exit_rename_table;
+  }
+
+#ifndef SQLITE_OMIT_VIEW
+  if( pTab->pSelect ){
+    sqlite3ErrorMsg(pParse, "view %s may not be altered", pTab->zName);
+    goto exit_rename_table;
+  }
+#endif
+
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  /* Invoke the authorization callback. */
+  if( sqlite3AuthCheck(pParse, SQLITE_ALTER_TABLE, zDb, pTab->zName, 0) ){
+    goto exit_rename_table;
+  }
+#endif
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( sqlite3ViewGetColumnNames(pParse, pTab) ){
+    goto exit_rename_table;
+  }
+  if( IsVirtual(pTab) ){
+    pVTab = sqlite3GetVTable(db, pTab);
+    if( pVTab->pVtab->pModule->xRename==0 ){
+      pVTab = 0;
+    }
+  }
+#endif
+
+  /* Begin a transaction for database iDb. 
+  ** Then modify the schema cookie (since the ALTER TABLE modifies the
+  ** schema). Open a statement transaction if the table is a virtual
+  ** table.
+  */
+  v = sqlite3GetVdbe(pParse);
+  if( v==0 ){
+    goto exit_rename_table;
+  }
+  sqlite3BeginWriteOperation(pParse, pVTab!=0, iDb);
+  sqlite3ChangeCookie(pParse, iDb);
+
+  /* If this is a virtual table, invoke the xRename() function if
+  ** one is defined. The xRename() callback will modify the names
+  ** of any resources used by the v-table implementation (including other
+  ** SQLite tables) that are identified by the name of the virtual table.
+  */
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( pVTab ){
+    int i = ++pParse->nMem;
+    sqlite3VdbeAddOp4(v, OP_String8, 0, i, 0, zName, 0);
+    sqlite3VdbeAddOp4(v, OP_VRename, i, 0, 0,(const char*)pVTab, P4_VTAB);
+    sqlite3MayAbort(pParse);
+  }
+#endif
+
+  /* figure out how many UTF-8 characters are in zName */
+  zTabName = pTab->zName;
+  nTabName = sqlite3Utf8CharLen(zTabName, -1);
+
+#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER)
+  if( db->flags&SQLITE_ForeignKeys ){
+    /* If foreign-key support is enabled, rewrite the CREATE TABLE 
+    ** statements corresponding to all child tables of foreign key constraints
+    ** for which the renamed table is the parent table.  */
+    if( (zWhere=whereForeignKeys(pParse, pTab))!=0 ){
+      sqlite3NestedParse(pParse, 
+          "UPDATE \"%w\".%s SET "
+              "sql = sqlite_rename_parent(sql, %Q, %Q) "
+              "WHERE %s;", zDb, SCHEMA_TABLE(iDb), zTabName, zName, zWhere);
+      sqlite3DbFree(db, zWhere);
+    }
+  }
+#endif
+
+  /* Modify the sqlite_master table to use the new table name. */
+  sqlite3NestedParse(pParse,
+      "UPDATE %Q.%s SET "
+#ifdef SQLITE_OMIT_TRIGGER
+          "sql = sqlite_rename_table(sql, %Q), "
+#else
+          "sql = CASE "
+            "WHEN type = 'trigger' THEN sqlite_rename_trigger(sql, %Q)"
+            "ELSE sqlite_rename_table(sql, %Q) END, "
+#endif
+          "tbl_name = %Q, "
+          "name = CASE "
+            "WHEN type='table' THEN %Q "
+            "WHEN name LIKE 'sqlite_autoindex%%' AND type='index' THEN "
+             "'sqlite_autoindex_' || %Q || substr(name,%d+18) "
+            "ELSE name END "
+      "WHERE tbl_name=%Q COLLATE nocase AND "
+          "(type='table' OR type='index' OR type='trigger');", 
+      zDb, SCHEMA_TABLE(iDb), zName, zName, zName, 
+#ifndef SQLITE_OMIT_TRIGGER
+      zName,
+#endif
+      zName, nTabName, zTabName
+  );
+
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+  /* If the sqlite_sequence table exists in this database, then update 
+  ** it with the new table name.
+  */
+  if( sqlite3FindTable(db, "sqlite_sequence", zDb) ){
+    sqlite3NestedParse(pParse,
+        "UPDATE \"%w\".sqlite_sequence set name = %Q WHERE name = %Q",
+        zDb, zName, pTab->zName);
+  }
+#endif
+
+#ifndef SQLITE_OMIT_TRIGGER
+  /* If there are TEMP triggers on this table, modify the sqlite_temp_master
+  ** table. Don't do this if the table being ALTERed is itself located in
+  ** the temp database.
+  */
+  if( (zWhere=whereTempTriggers(pParse, pTab))!=0 ){
+    sqlite3NestedParse(pParse, 
+        "UPDATE sqlite_temp_master SET "
+            "sql = sqlite_rename_trigger(sql, %Q), "
+            "tbl_name = %Q "
+            "WHERE %s;", zName, zName, zWhere);
+    sqlite3DbFree(db, zWhere);
+  }
+#endif
+
+#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER)
+  if( db->flags&SQLITE_ForeignKeys ){
+    FKey *p;
+    for(p=sqlite3FkReferences(pTab); p; p=p->pNextTo){
+      Table *pFrom = p->pFrom;
+      if( pFrom!=pTab ){
+        reloadTableSchema(pParse, p->pFrom, pFrom->zName);
+      }
+    }
+  }
+#endif
+
+  /* Drop and reload the internal table schema. */
+  reloadTableSchema(pParse, pTab, zName);
+
+exit_rename_table:
+  sqlite3SrcListDelete(db, pSrc);
+  sqlite3DbFree(db, zName);
+  db->flags = savedDbFlags;
+}
+
+
+/*
+** Generate code to make sure the file format number is at least minFormat.
+** The generated code will increase the file format number if necessary.
+*/
+SQLITE_PRIVATE void sqlite3MinimumFileFormat(Parse *pParse, int iDb, int minFormat){
+  Vdbe *v;
+  v = sqlite3GetVdbe(pParse);
+  /* The VDBE should have been allocated before this routine is called.
+  ** If that allocation failed, we would have quit before reaching this
+  ** point */
+  if( ALWAYS(v) ){
+    int r1 = sqlite3GetTempReg(pParse);
+    int r2 = sqlite3GetTempReg(pParse);
+    int j1;
+    sqlite3VdbeAddOp3(v, OP_ReadCookie, iDb, r1, BTREE_FILE_FORMAT);
+    sqlite3VdbeUsesBtree(v, iDb);
+    sqlite3VdbeAddOp2(v, OP_Integer, minFormat, r2);
+    j1 = sqlite3VdbeAddOp3(v, OP_Ge, r2, 0, r1);
+    sqlite3VdbeChangeP5(v, SQLITE_NOTNULL); VdbeCoverage(v);
+    sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_FILE_FORMAT, r2);
+    sqlite3VdbeJumpHere(v, j1);
+    sqlite3ReleaseTempReg(pParse, r1);
+    sqlite3ReleaseTempReg(pParse, r2);
+  }
+}
+
+/*
+** This function is called after an "ALTER TABLE ... ADD" statement
+** has been parsed. Argument pColDef contains the text of the new
+** column definition.
+**
+** The Table structure pParse->pNewTable was extended to include
+** the new column during parsing.
+*/
+SQLITE_PRIVATE void sqlite3AlterFinishAddColumn(Parse *pParse, Token *pColDef){
+  Table *pNew;              /* Copy of pParse->pNewTable */
+  Table *pTab;              /* Table being altered */
+  int iDb;                  /* Database number */
+  const char *zDb;          /* Database name */
+  const char *zTab;         /* Table name */
+  char *zCol;               /* Null-terminated column definition */
+  Column *pCol;             /* The new column */
+  Expr *pDflt;              /* Default value for the new column */
+  sqlite3 *db;              /* The database connection; */
+
+  db = pParse->db;
+  if( pParse->nErr || db->mallocFailed ) return;
+  pNew = pParse->pNewTable;
+  assert( pNew );
+
+  assert( sqlite3BtreeHoldsAllMutexes(db) );
+  iDb = sqlite3SchemaToIndex(db, pNew->pSchema);
+  zDb = db->aDb[iDb].zName;
+  zTab = &pNew->zName[16];  /* Skip the "sqlite_altertab_" prefix on the name */
+  pCol = &pNew->aCol[pNew->nCol-1];
+  pDflt = pCol->pDflt;
+  pTab = sqlite3FindTable(db, zTab, zDb);
+  assert( pTab );
+
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  /* Invoke the authorization callback. */
+  if( sqlite3AuthCheck(pParse, SQLITE_ALTER_TABLE, zDb, pTab->zName, 0) ){
+    return;
+  }
+#endif
+
+  /* If the default value for the new column was specified with a 
+  ** literal NULL, then set pDflt to 0. This simplifies checking
+  ** for an SQL NULL default below.
+  */
+  if( pDflt && pDflt->op==TK_NULL ){
+    pDflt = 0;
+  }
+
+  /* Check that the new column is not specified as PRIMARY KEY or UNIQUE.
+  ** If there is a NOT NULL constraint, then the default value for the
+  ** column must not be NULL.
+  */
+  if( pCol->colFlags & COLFLAG_PRIMKEY ){
+    sqlite3ErrorMsg(pParse, "Cannot add a PRIMARY KEY column");
+    return;
+  }
+  if( pNew->pIndex ){
+    sqlite3ErrorMsg(pParse, "Cannot add a UNIQUE column");
+    return;
+  }
+  if( (db->flags&SQLITE_ForeignKeys) && pNew->pFKey && pDflt ){
+    sqlite3ErrorMsg(pParse, 
+        "Cannot add a REFERENCES column with non-NULL default value");
+    return;
+  }
+  if( pCol->notNull && !pDflt ){
+    sqlite3ErrorMsg(pParse, 
+        "Cannot add a NOT NULL column with default value NULL");
+    return;
+  }
+
+  /* Ensure the default expression is something that sqlite3ValueFromExpr()
+  ** can handle (i.e. not CURRENT_TIME etc.)
+  */
+  if( pDflt ){
+    sqlite3_value *pVal = 0;
+    if( sqlite3ValueFromExpr(db, pDflt, SQLITE_UTF8, SQLITE_AFF_NONE, &pVal) ){
+      db->mallocFailed = 1;
+      return;
+    }
+    if( !pVal ){
+      sqlite3ErrorMsg(pParse, "Cannot add a column with non-constant default");
+      return;
+    }
+    sqlite3ValueFree(pVal);
+  }
+
+  /* Modify the CREATE TABLE statement. */
+  zCol = sqlite3DbStrNDup(db, (char*)pColDef->z, pColDef->n);
+  if( zCol ){
+    char *zEnd = &zCol[pColDef->n-1];
+    int savedDbFlags = db->flags;
+    while( zEnd>zCol && (*zEnd==';' || sqlite3Isspace(*zEnd)) ){
+      *zEnd-- = '\0';
+    }
+    db->flags |= SQLITE_PreferBuiltin;
+    sqlite3NestedParse(pParse, 
+        "UPDATE \"%w\".%s SET "
+          "sql = substr(sql,1,%d) || ', ' || %Q || substr(sql,%d) "
+        "WHERE type = 'table' AND name = %Q", 
+      zDb, SCHEMA_TABLE(iDb), pNew->addColOffset, zCol, pNew->addColOffset+1,
+      zTab
+    );
+    sqlite3DbFree(db, zCol);
+    db->flags = savedDbFlags;
+  }
+
+  /* If the default value of the new column is NULL, then set the file
+  ** format to 2. If the default value of the new column is not NULL,
+  ** the file format becomes 3.
+  */
+  sqlite3MinimumFileFormat(pParse, iDb, pDflt ? 3 : 2);
+
+  /* Reload the schema of the modified table. */
+  reloadTableSchema(pParse, pTab, pTab->zName);
+}
+
+/*
+** This function is called by the parser after the table-name in
+** an "ALTER TABLE <table-name> ADD" statement is parsed. Argument 
+** pSrc is the full-name of the table being altered.
+**
+** This routine makes a (partial) copy of the Table structure
+** for the table being altered and sets Parse.pNewTable to point
+** to it. Routines called by the parser as the column definition
+** is parsed (i.e. sqlite3AddColumn()) add the new Column data to 
+** the copy. The copy of the Table structure is deleted by tokenize.c 
+** after parsing is finished.
+**
+** Routine sqlite3AlterFinishAddColumn() will be called to complete
+** coding the "ALTER TABLE ... ADD" statement.
+*/
+SQLITE_PRIVATE void sqlite3AlterBeginAddColumn(Parse *pParse, SrcList *pSrc){
+  Table *pNew;
+  Table *pTab;
+  Vdbe *v;
+  int iDb;
+  int i;
+  int nAlloc;
+  sqlite3 *db = pParse->db;
+
+  /* Look up the table being altered. */
+  assert( pParse->pNewTable==0 );
+  assert( sqlite3BtreeHoldsAllMutexes(db) );
+  if( db->mallocFailed ) goto exit_begin_add_column;
+  pTab = sqlite3LocateTableItem(pParse, 0, &pSrc->a[0]);
+  if( !pTab ) goto exit_begin_add_column;
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( IsVirtual(pTab) ){
+    sqlite3ErrorMsg(pParse, "virtual tables may not be altered");
+    goto exit_begin_add_column;
+  }
+#endif
+
+  /* Make sure this is not an attempt to ALTER a view. */
+  if( pTab->pSelect ){
+    sqlite3ErrorMsg(pParse, "Cannot add a column to a view");
+    goto exit_begin_add_column;
+  }
+  if( SQLITE_OK!=isSystemTable(pParse, pTab->zName) ){
+    goto exit_begin_add_column;
+  }
+
+  assert( pTab->addColOffset>0 );
+  iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+
+  /* Put a copy of the Table struct in Parse.pNewTable for the
+  ** sqlite3AddColumn() function and friends to modify.  But modify
+  ** the name by adding an "sqlite_altertab_" prefix.  By adding this
+  ** prefix, we insure that the name will not collide with an existing
+  ** table because user table are not allowed to have the "sqlite_"
+  ** prefix on their name.
+  */
+  pNew = (Table*)sqlite3DbMallocZero(db, sizeof(Table));
+  if( !pNew ) goto exit_begin_add_column;
+  pParse->pNewTable = pNew;
+  pNew->nRef = 1;
+  pNew->nCol = pTab->nCol;
+  assert( pNew->nCol>0 );
+  nAlloc = (((pNew->nCol-1)/8)*8)+8;
+  assert( nAlloc>=pNew->nCol && nAlloc%8==0 && nAlloc-pNew->nCol<8 );
+  pNew->aCol = (Column*)sqlite3DbMallocZero(db, sizeof(Column)*nAlloc);
+  pNew->zName = sqlite3MPrintf(db, "sqlite_altertab_%s", pTab->zName);
+  if( !pNew->aCol || !pNew->zName ){
+    db->mallocFailed = 1;
+    goto exit_begin_add_column;
+  }
+  memcpy(pNew->aCol, pTab->aCol, sizeof(Column)*pNew->nCol);
+  for(i=0; i<pNew->nCol; i++){
+    Column *pCol = &pNew->aCol[i];
+    pCol->zName = sqlite3DbStrDup(db, pCol->zName);
+    pCol->zColl = 0;
+    pCol->zType = 0;
+    pCol->pDflt = 0;
+    pCol->zDflt = 0;
+  }
+  pNew->pSchema = db->aDb[iDb].pSchema;
+  pNew->addColOffset = pTab->addColOffset;
+  pNew->nRef = 1;
+
+  /* Begin a transaction and increment the schema cookie.  */
+  sqlite3BeginWriteOperation(pParse, 0, iDb);
+  v = sqlite3GetVdbe(pParse);
+  if( !v ) goto exit_begin_add_column;
+  sqlite3ChangeCookie(pParse, iDb);
+
+exit_begin_add_column:
+  sqlite3SrcListDelete(db, pSrc);
+  return;
+}
+#endif  /* SQLITE_ALTER_TABLE */
+
+/************** End of alter.c ***********************************************/
+/************** Begin file analyze.c *****************************************/
+/*
+** 2005-07-08
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code associated with the ANALYZE command.
+**
+** The ANALYZE command gather statistics about the content of tables
+** and indices.  These statistics are made available to the query planner
+** to help it make better decisions about how to perform queries.
+**
+** The following system tables are or have been supported:
+**
+**    CREATE TABLE sqlite_stat1(tbl, idx, stat);
+**    CREATE TABLE sqlite_stat2(tbl, idx, sampleno, sample);
+**    CREATE TABLE sqlite_stat3(tbl, idx, nEq, nLt, nDLt, sample);
+**    CREATE TABLE sqlite_stat4(tbl, idx, nEq, nLt, nDLt, sample);
+**
+** Additional tables might be added in future releases of SQLite.
+** The sqlite_stat2 table is not created or used unless the SQLite version
+** is between 3.6.18 and 3.7.8, inclusive, and unless SQLite is compiled
+** with SQLITE_ENABLE_STAT2.  The sqlite_stat2 table is deprecated.
+** The sqlite_stat2 table is superseded by sqlite_stat3, which is only
+** created and used by SQLite versions 3.7.9 and later and with
+** SQLITE_ENABLE_STAT3 defined.  The functionality of sqlite_stat3
+** is a superset of sqlite_stat2.  The sqlite_stat4 is an enhanced
+** version of sqlite_stat3 and is only available when compiled with
+** SQLITE_ENABLE_STAT4 and in SQLite versions 3.8.1 and later.  It is
+** not possible to enable both STAT3 and STAT4 at the same time.  If they
+** are both enabled, then STAT4 takes precedence.
+**
+** For most applications, sqlite_stat1 provides all the statistics required
+** for the query planner to make good choices.
+**
+** Format of sqlite_stat1:
+**
+** There is normally one row per index, with the index identified by the
+** name in the idx column.  The tbl column is the name of the table to
+** which the index belongs.  In each such row, the stat column will be
+** a string consisting of a list of integers.  The first integer in this
+** list is the number of rows in the index.  (This is the same as the
+** number of rows in the table, except for partial indices.)  The second
+** integer is the average number of rows in the index that have the same
+** value in the first column of the index.  The third integer is the average
+** number of rows in the index that have the same value for the first two
+** columns.  The N-th integer (for N>1) is the average number of rows in 
+** the index which have the same value for the first N-1 columns.  For
+** a K-column index, there will be K+1 integers in the stat column.  If
+** the index is unique, then the last integer will be 1.
+**
+** The list of integers in the stat column can optionally be followed
+** by the keyword "unordered".  The "unordered" keyword, if it is present,
+** must be separated from the last integer by a single space.  If the
+** "unordered" keyword is present, then the query planner assumes that
+** the index is unordered and will not use the index for a range query.
+** 
+** If the sqlite_stat1.idx column is NULL, then the sqlite_stat1.stat
+** column contains a single integer which is the (estimated) number of
+** rows in the table identified by sqlite_stat1.tbl.
+**
+** Format of sqlite_stat2:
+**
+** The sqlite_stat2 is only created and is only used if SQLite is compiled
+** with SQLITE_ENABLE_STAT2 and if the SQLite version number is between
+** 3.6.18 and 3.7.8.  The "stat2" table contains additional information
+** about the distribution of keys within an index.  The index is identified by
+** the "idx" column and the "tbl" column is the name of the table to which
+** the index belongs.  There are usually 10 rows in the sqlite_stat2
+** table for each index.
+**
+** The sqlite_stat2 entries for an index that have sampleno between 0 and 9
+** inclusive are samples of the left-most key value in the index taken at
+** evenly spaced points along the index.  Let the number of samples be S
+** (10 in the standard build) and let C be the number of rows in the index.
+** Then the sampled rows are given by:
+**
+**     rownumber = (i*C*2 + C)/(S*2)
+**
+** For i between 0 and S-1.  Conceptually, the index space is divided into
+** S uniform buckets and the samples are the middle row from each bucket.
+**
+** The format for sqlite_stat2 is recorded here for legacy reference.  This
+** version of SQLite does not support sqlite_stat2.  It neither reads nor
+** writes the sqlite_stat2 table.  This version of SQLite only supports
+** sqlite_stat3.
+**
+** Format for sqlite_stat3:
+**
+** The sqlite_stat3 format is a subset of sqlite_stat4.  Hence, the
+** sqlite_stat4 format will be described first.  Further information
+** about sqlite_stat3 follows the sqlite_stat4 description.
+**
+** Format for sqlite_stat4:
+**
+** As with sqlite_stat2, the sqlite_stat4 table contains histogram data
+** to aid the query planner in choosing good indices based on the values
+** that indexed columns are compared against in the WHERE clauses of
+** queries.
+**
+** The sqlite_stat4 table contains multiple entries for each index.
+** The idx column names the index and the tbl column is the table of the
+** index.  If the idx and tbl columns are the same, then the sample is
+** of the INTEGER PRIMARY KEY.  The sample column is a blob which is the
+** binary encoding of a key from the index.  The nEq column is a
+** list of integers.  The first integer is the approximate number
+** of entries in the index whose left-most column exactly matches
+** the left-most column of the sample.  The second integer in nEq
+** is the approximate number of entries in the index where the
+** first two columns match the first two columns of the sample.
+** And so forth.  nLt is another list of integers that show the approximate
+** number of entries that are strictly less than the sample.  The first
+** integer in nLt contains the number of entries in the index where the
+** left-most column is less than the left-most column of the sample.
+** The K-th integer in the nLt entry is the number of index entries 
+** where the first K columns are less than the first K columns of the
+** sample.  The nDLt column is like nLt except that it contains the 
+** number of distinct entries in the index that are less than the
+** sample.
+**
+** There can be an arbitrary number of sqlite_stat4 entries per index.
+** The ANALYZE command will typically generate sqlite_stat4 tables
+** that contain between 10 and 40 samples which are distributed across
+** the key space, though not uniformly, and which include samples with
+** large nEq values.
+**
+** Format for sqlite_stat3 redux:
+**
+** The sqlite_stat3 table is like sqlite_stat4 except that it only
+** looks at the left-most column of the index.  The sqlite_stat3.sample
+** column contains the actual value of the left-most column instead
+** of a blob encoding of the complete index key as is found in
+** sqlite_stat4.sample.  The nEq, nLt, and nDLt entries of sqlite_stat3
+** all contain just a single integer which is the same as the first
+** integer in the equivalent columns in sqlite_stat4.
+*/
+#ifndef SQLITE_OMIT_ANALYZE
+
+#if defined(SQLITE_ENABLE_STAT4)
+# define IsStat4     1
+# define IsStat3     0
+#elif defined(SQLITE_ENABLE_STAT3)
+# define IsStat4     0
+# define IsStat3     1
+#else
+# define IsStat4     0
+# define IsStat3     0
+# undef SQLITE_STAT4_SAMPLES
+# define SQLITE_STAT4_SAMPLES 1
+#endif
+#define IsStat34    (IsStat3+IsStat4)  /* 1 for STAT3 or STAT4. 0 otherwise */
+
+/*
+** This routine generates code that opens the sqlite_statN tables.
+** The sqlite_stat1 table is always relevant.  sqlite_stat2 is now
+** obsolete.  sqlite_stat3 and sqlite_stat4 are only opened when
+** appropriate compile-time options are provided.
+**
+** If the sqlite_statN tables do not previously exist, it is created.
+**
+** Argument zWhere may be a pointer to a buffer containing a table name,
+** or it may be a NULL pointer. If it is not NULL, then all entries in
+** the sqlite_statN tables associated with the named table are deleted.
+** If zWhere==0, then code is generated to delete all stat table entries.
+*/
+static void openStatTable(
+  Parse *pParse,          /* Parsing context */
+  int iDb,                /* The database we are looking in */
+  int iStatCur,           /* Open the sqlite_stat1 table on this cursor */
+  const char *zWhere,     /* Delete entries for this table or index */
+  const char *zWhereType  /* Either "tbl" or "idx" */
+){
+  static const struct {
+    const char *zName;
+    const char *zCols;
+  } aTable[] = {
+    { "sqlite_stat1", "tbl,idx,stat" },
+#if defined(SQLITE_ENABLE_STAT4)
+    { "sqlite_stat4", "tbl,idx,neq,nlt,ndlt,sample" },
+    { "sqlite_stat3", 0 },
+#elif defined(SQLITE_ENABLE_STAT3)
+    { "sqlite_stat3", "tbl,idx,neq,nlt,ndlt,sample" },
+    { "sqlite_stat4", 0 },
+#else
+    { "sqlite_stat3", 0 },
+    { "sqlite_stat4", 0 },
+#endif
+  };
+  int i;
+  sqlite3 *db = pParse->db;
+  Db *pDb;
+  Vdbe *v = sqlite3GetVdbe(pParse);
+  int aRoot[ArraySize(aTable)];
+  u8 aCreateTbl[ArraySize(aTable)];
+
+  if( v==0 ) return;
+  assert( sqlite3BtreeHoldsAllMutexes(db) );
+  assert( sqlite3VdbeDb(v)==db );
+  pDb = &db->aDb[iDb];
+
+  /* Create new statistic tables if they do not exist, or clear them
+  ** if they do already exist.
+  */
+  for(i=0; i<ArraySize(aTable); i++){
+    const char *zTab = aTable[i].zName;
+    Table *pStat;
+    if( (pStat = sqlite3FindTable(db, zTab, pDb->zName))==0 ){
+      if( aTable[i].zCols ){
+        /* The sqlite_statN table does not exist. Create it. Note that a 
+        ** side-effect of the CREATE TABLE statement is to leave the rootpage 
+        ** of the new table in register pParse->regRoot. This is important 
+        ** because the OpenWrite opcode below will be needing it. */
+        sqlite3NestedParse(pParse,
+            "CREATE TABLE %Q.%s(%s)", pDb->zName, zTab, aTable[i].zCols
+        );
+        aRoot[i] = pParse->regRoot;
+        aCreateTbl[i] = OPFLAG_P2ISREG;
+      }
+    }else{
+      /* The table already exists. If zWhere is not NULL, delete all entries 
+      ** associated with the table zWhere. If zWhere is NULL, delete the
+      ** entire contents of the table. */
+      aRoot[i] = pStat->tnum;
+      aCreateTbl[i] = 0;
+      sqlite3TableLock(pParse, iDb, aRoot[i], 1, zTab);
+      if( zWhere ){
+        sqlite3NestedParse(pParse,
+           "DELETE FROM %Q.%s WHERE %s=%Q",
+           pDb->zName, zTab, zWhereType, zWhere
+        );
+      }else{
+        /* The sqlite_stat[134] table already exists.  Delete all rows. */
+        sqlite3VdbeAddOp2(v, OP_Clear, aRoot[i], iDb);
+      }
+    }
+  }
+
+  /* Open the sqlite_stat[134] tables for writing. */
+  for(i=0; aTable[i].zCols; i++){
+    assert( i<ArraySize(aTable) );
+    sqlite3VdbeAddOp4Int(v, OP_OpenWrite, iStatCur+i, aRoot[i], iDb, 3);
+    sqlite3VdbeChangeP5(v, aCreateTbl[i]);
+    VdbeComment((v, aTable[i].zName));
+  }
+}
+
+/*
+** Recommended number of samples for sqlite_stat4
+*/
+#ifndef SQLITE_STAT4_SAMPLES
+# define SQLITE_STAT4_SAMPLES 24
+#endif
+
+/*
+** Three SQL functions - stat_init(), stat_push(), and stat_get() -
+** share an instance of the following structure to hold their state
+** information.
+*/
+typedef struct Stat4Accum Stat4Accum;
+typedef struct Stat4Sample Stat4Sample;
+struct Stat4Sample {
+  tRowcnt *anEq;                  /* sqlite_stat4.nEq */
+  tRowcnt *anDLt;                 /* sqlite_stat4.nDLt */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  tRowcnt *anLt;                  /* sqlite_stat4.nLt */
+  union {
+    i64 iRowid;                     /* Rowid in main table of the key */
+    u8 *aRowid;                     /* Key for WITHOUT ROWID tables */
+  } u;
+  u32 nRowid;                     /* Sizeof aRowid[] */
+  u8 isPSample;                   /* True if a periodic sample */
+  int iCol;                       /* If !isPSample, the reason for inclusion */
+  u32 iHash;                      /* Tiebreaker hash */
+#endif
+};                                                    
+struct Stat4Accum {
+  tRowcnt nRow;             /* Number of rows in the entire table */
+  tRowcnt nPSample;         /* How often to do a periodic sample */
+  int nCol;                 /* Number of columns in index + pk/rowid */
+  int nKeyCol;              /* Number of index columns w/o the pk/rowid */
+  int mxSample;             /* Maximum number of samples to accumulate */
+  Stat4Sample current;      /* Current row as a Stat4Sample */
+  u32 iPrn;                 /* Pseudo-random number used for sampling */
+  Stat4Sample *aBest;       /* Array of nCol best samples */
+  int iMin;                 /* Index in a[] of entry with minimum score */
+  int nSample;              /* Current number of samples */
+  int iGet;                 /* Index of current sample accessed by stat_get() */
+  Stat4Sample *a;           /* Array of mxSample Stat4Sample objects */
+  sqlite3 *db;              /* Database connection, for malloc() */
+};
+
+/* Reclaim memory used by a Stat4Sample
+*/
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+static void sampleClear(sqlite3 *db, Stat4Sample *p){
+  assert( db!=0 );
+  if( p->nRowid ){
+    sqlite3DbFree(db, p->u.aRowid);
+    p->nRowid = 0;
+  }
+}
+#endif
+
+/* Initialize the BLOB value of a ROWID
+*/
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+static void sampleSetRowid(sqlite3 *db, Stat4Sample *p, int n, const u8 *pData){
+  assert( db!=0 );
+  if( p->nRowid ) sqlite3DbFree(db, p->u.aRowid);
+  p->u.aRowid = sqlite3DbMallocRaw(db, n);
+  if( p->u.aRowid ){
+    p->nRowid = n;
+    memcpy(p->u.aRowid, pData, n);
+  }else{
+    p->nRowid = 0;
+  }
+}
+#endif
+
+/* Initialize the INTEGER value of a ROWID.
+*/
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+static void sampleSetRowidInt64(sqlite3 *db, Stat4Sample *p, i64 iRowid){
+  assert( db!=0 );
+  if( p->nRowid ) sqlite3DbFree(db, p->u.aRowid);
+  p->nRowid = 0;
+  p->u.iRowid = iRowid;
+}
+#endif
+
+
+/*
+** Copy the contents of object (*pFrom) into (*pTo).
+*/
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+static void sampleCopy(Stat4Accum *p, Stat4Sample *pTo, Stat4Sample *pFrom){
+  pTo->isPSample = pFrom->isPSample;
+  pTo->iCol = pFrom->iCol;
+  pTo->iHash = pFrom->iHash;
+  memcpy(pTo->anEq, pFrom->anEq, sizeof(tRowcnt)*p->nCol);
+  memcpy(pTo->anLt, pFrom->anLt, sizeof(tRowcnt)*p->nCol);
+  memcpy(pTo->anDLt, pFrom->anDLt, sizeof(tRowcnt)*p->nCol);
+  if( pFrom->nRowid ){
+    sampleSetRowid(p->db, pTo, pFrom->nRowid, pFrom->u.aRowid);
+  }else{
+    sampleSetRowidInt64(p->db, pTo, pFrom->u.iRowid);
+  }
+}
+#endif
+
+/*
+** Reclaim all memory of a Stat4Accum structure.
+*/
+static void stat4Destructor(void *pOld){
+  Stat4Accum *p = (Stat4Accum*)pOld;
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  int i;
+  for(i=0; i<p->nCol; i++) sampleClear(p->db, p->aBest+i);
+  for(i=0; i<p->mxSample; i++) sampleClear(p->db, p->a+i);
+  sampleClear(p->db, &p->current);
+#endif
+  sqlite3DbFree(p->db, p);
+}
+
+/*
+** Implementation of the stat_init(N,K,C) SQL function. The three parameters
+** are:
+**     N:    The number of columns in the index including the rowid/pk (note 1)
+**     K:    The number of columns in the index excluding the rowid/pk.
+**     C:    The number of rows in the index (note 2)
+**
+** Note 1:  In the special case of the covering index that implements a
+** WITHOUT ROWID table, N is the number of PRIMARY KEY columns, not the
+** total number of columns in the table.
+**
+** Note 2:  C is only used for STAT3 and STAT4.
+**
+** For indexes on ordinary rowid tables, N==K+1.  But for indexes on
+** WITHOUT ROWID tables, N=K+P where P is the number of columns in the
+** PRIMARY KEY of the table.  The covering index that implements the
+** original WITHOUT ROWID table as N==K as a special case.
+**
+** This routine allocates the Stat4Accum object in heap memory. The return 
+** value is a pointer to the Stat4Accum object.  The datatype of the
+** return value is BLOB, but it is really just a pointer to the Stat4Accum
+** object.
+*/
+static void statInit(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Stat4Accum *p;
+  int nCol;                       /* Number of columns in index being sampled */
+  int nKeyCol;                    /* Number of key columns */
+  int nColUp;                     /* nCol rounded up for alignment */
+  int n;                          /* Bytes of space to allocate */
+  sqlite3 *db;                    /* Database connection */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  int mxSample = SQLITE_STAT4_SAMPLES;
+#endif
+
+  /* Decode the three function arguments */
+  UNUSED_PARAMETER(argc);
+  nCol = sqlite3_value_int(argv[0]);
+  assert( nCol>0 );
+  nColUp = sizeof(tRowcnt)<8 ? (nCol+1)&~1 : nCol;
+  nKeyCol = sqlite3_value_int(argv[1]);
+  assert( nKeyCol<=nCol );
+  assert( nKeyCol>0 );
+
+  /* Allocate the space required for the Stat4Accum object */
+  n = sizeof(*p) 
+    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anEq */
+    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anDLt */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    + sizeof(tRowcnt)*nColUp                  /* Stat4Accum.anLt */
+    + sizeof(Stat4Sample)*(nCol+mxSample)     /* Stat4Accum.aBest[], a[] */
+    + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample)
+#endif
+  ;
+  db = sqlite3_context_db_handle(context);
+  p = sqlite3DbMallocZero(db, n);
+  if( p==0 ){
+    sqlite3_result_error_nomem(context);
+    return;
+  }
+
+  p->db = db;
+  p->nRow = 0;
+  p->nCol = nCol;
+  p->nKeyCol = nKeyCol;
+  p->current.anDLt = (tRowcnt*)&p[1];
+  p->current.anEq = &p->current.anDLt[nColUp];
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  {
+    u8 *pSpace;                     /* Allocated space not yet assigned */
+    int i;                          /* Used to iterate through p->aSample[] */
+
+    p->iGet = -1;
+    p->mxSample = mxSample;
+    p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1);
+    p->current.anLt = &p->current.anEq[nColUp];
+    p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]);
+  
+    /* Set up the Stat4Accum.a[] and aBest[] arrays */
+    p->a = (struct Stat4Sample*)&p->current.anLt[nColUp];
+    p->aBest = &p->a[mxSample];
+    pSpace = (u8*)(&p->a[mxSample+nCol]);
+    for(i=0; i<(mxSample+nCol); i++){
+      p->a[i].anEq = (tRowcnt *)pSpace; pSpace += (sizeof(tRowcnt) * nColUp);
+      p->a[i].anLt = (tRowcnt *)pSpace; pSpace += (sizeof(tRowcnt) * nColUp);
+      p->a[i].anDLt = (tRowcnt *)pSpace; pSpace += (sizeof(tRowcnt) * nColUp);
+    }
+    assert( (pSpace - (u8*)p)==n );
+  
+    for(i=0; i<nCol; i++){
+      p->aBest[i].iCol = i;
+    }
+  }
+#endif
+
+  /* Return a pointer to the allocated object to the caller.  Note that
+  ** only the pointer (the 2nd parameter) matters.  The size of the object
+  ** (given by the 3rd parameter) is never used and can be any positive
+  ** value. */
+  sqlite3_result_blob(context, p, sizeof(*p), stat4Destructor);
+}
+static const FuncDef statInitFuncdef = {
+  2+IsStat34,      /* nArg */
+  SQLITE_UTF8,     /* funcFlags */
+  0,               /* pUserData */
+  0,               /* pNext */
+  statInit,        /* xFunc */
+  0,               /* xStep */
+  0,               /* xFinalize */
+  "stat_init",     /* zName */
+  0,               /* pHash */
+  0                /* pDestructor */
+};
+
+#ifdef SQLITE_ENABLE_STAT4
+/*
+** pNew and pOld are both candidate non-periodic samples selected for 
+** the same column (pNew->iCol==pOld->iCol). Ignoring this column and 
+** considering only any trailing columns and the sample hash value, this
+** function returns true if sample pNew is to be preferred over pOld.
+** In other words, if we assume that the cardinalities of the selected
+** column for pNew and pOld are equal, is pNew to be preferred over pOld.
+**
+** This function assumes that for each argument sample, the contents of
+** the anEq[] array from pSample->anEq[pSample->iCol+1] onwards are valid. 
+*/
+static int sampleIsBetterPost(
+  Stat4Accum *pAccum, 
+  Stat4Sample *pNew, 
+  Stat4Sample *pOld
+){
+  int nCol = pAccum->nCol;
+  int i;
+  assert( pNew->iCol==pOld->iCol );
+  for(i=pNew->iCol+1; i<nCol; i++){
+    if( pNew->anEq[i]>pOld->anEq[i] ) return 1;
+    if( pNew->anEq[i]<pOld->anEq[i] ) return 0;
+  }
+  if( pNew->iHash>pOld->iHash ) return 1;
+  return 0;
+}
+#endif
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+/*
+** Return true if pNew is to be preferred over pOld.
+**
+** This function assumes that for each argument sample, the contents of
+** the anEq[] array from pSample->anEq[pSample->iCol] onwards are valid. 
+*/
+static int sampleIsBetter(
+  Stat4Accum *pAccum, 
+  Stat4Sample *pNew, 
+  Stat4Sample *pOld
+){
+  tRowcnt nEqNew = pNew->anEq[pNew->iCol];
+  tRowcnt nEqOld = pOld->anEq[pOld->iCol];
+
+  assert( pOld->isPSample==0 && pNew->isPSample==0 );
+  assert( IsStat4 || (pNew->iCol==0 && pOld->iCol==0) );
+
+  if( (nEqNew>nEqOld) ) return 1;
+#ifdef SQLITE_ENABLE_STAT4
+  if( nEqNew==nEqOld ){
+    if( pNew->iCol<pOld->iCol ) return 1;
+    return (pNew->iCol==pOld->iCol && sampleIsBetterPost(pAccum, pNew, pOld));
+  }
+  return 0;
+#else
+  return (nEqNew==nEqOld && pNew->iHash>pOld->iHash);
+#endif
+}
+
+/*
+** Copy the contents of sample *pNew into the p->a[] array. If necessary,
+** remove the least desirable sample from p->a[] to make room.
+*/
+static void sampleInsert(Stat4Accum *p, Stat4Sample *pNew, int nEqZero){
+  Stat4Sample *pSample = 0;
+  int i;
+
+  assert( IsStat4 || nEqZero==0 );
+
+#ifdef SQLITE_ENABLE_STAT4
+  if( pNew->isPSample==0 ){
+    Stat4Sample *pUpgrade = 0;
+    assert( pNew->anEq[pNew->iCol]>0 );
+
+    /* This sample is being added because the prefix that ends in column 
+    ** iCol occurs many times in the table. However, if we have already
+    ** added a sample that shares this prefix, there is no need to add
+    ** this one. Instead, upgrade the priority of the highest priority
+    ** existing sample that shares this prefix.  */
+    for(i=p->nSample-1; i>=0; i--){
+      Stat4Sample *pOld = &p->a[i];
+      if( pOld->anEq[pNew->iCol]==0 ){
+        if( pOld->isPSample ) return;
+        assert( pOld->iCol>pNew->iCol );
+        assert( sampleIsBetter(p, pNew, pOld) );
+        if( pUpgrade==0 || sampleIsBetter(p, pOld, pUpgrade) ){
+          pUpgrade = pOld;
+        }
+      }
+    }
+    if( pUpgrade ){
+      pUpgrade->iCol = pNew->iCol;
+      pUpgrade->anEq[pUpgrade->iCol] = pNew->anEq[pUpgrade->iCol];
+      goto find_new_min;
+    }
+  }
+#endif
+
+  /* If necessary, remove sample iMin to make room for the new sample. */
+  if( p->nSample>=p->mxSample ){
+    Stat4Sample *pMin = &p->a[p->iMin];
+    tRowcnt *anEq = pMin->anEq;
+    tRowcnt *anLt = pMin->anLt;
+    tRowcnt *anDLt = pMin->anDLt;
+    sampleClear(p->db, pMin);
+    memmove(pMin, &pMin[1], sizeof(p->a[0])*(p->nSample-p->iMin-1));
+    pSample = &p->a[p->nSample-1];
+    pSample->nRowid = 0;
+    pSample->anEq = anEq;
+    pSample->anDLt = anDLt;
+    pSample->anLt = anLt;
+    p->nSample = p->mxSample-1;
+  }
+
+  /* The "rows less-than" for the rowid column must be greater than that
+  ** for the last sample in the p->a[] array. Otherwise, the samples would
+  ** be out of order. */
+#ifdef SQLITE_ENABLE_STAT4
+  assert( p->nSample==0 
+       || pNew->anLt[p->nCol-1] > p->a[p->nSample-1].anLt[p->nCol-1] );
+#endif
+
+  /* Insert the new sample */
+  pSample = &p->a[p->nSample];
+  sampleCopy(p, pSample, pNew);
+  p->nSample++;
+
+  /* Zero the first nEqZero entries in the anEq[] array. */
+  memset(pSample->anEq, 0, sizeof(tRowcnt)*nEqZero);
+
+#ifdef SQLITE_ENABLE_STAT4
+ find_new_min:
+#endif
+  if( p->nSample>=p->mxSample ){
+    int iMin = -1;
+    for(i=0; i<p->mxSample; i++){
+      if( p->a[i].isPSample ) continue;
+      if( iMin<0 || sampleIsBetter(p, &p->a[iMin], &p->a[i]) ){
+        iMin = i;
+      }
+    }
+    assert( iMin>=0 );
+    p->iMin = iMin;
+  }
+}
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+
+/*
+** Field iChng of the index being scanned has changed. So at this point
+** p->current contains a sample that reflects the previous row of the
+** index. The value of anEq[iChng] and subsequent anEq[] elements are
+** correct at this point.
+*/
+static void samplePushPrevious(Stat4Accum *p, int iChng){
+#ifdef SQLITE_ENABLE_STAT4
+  int i;
+
+  /* Check if any samples from the aBest[] array should be pushed
+  ** into IndexSample.a[] at this point.  */
+  for(i=(p->nCol-2); i>=iChng; i--){
+    Stat4Sample *pBest = &p->aBest[i];
+    pBest->anEq[i] = p->current.anEq[i];
+    if( p->nSample<p->mxSample || sampleIsBetter(p, pBest, &p->a[p->iMin]) ){
+      sampleInsert(p, pBest, i);
+    }
+  }
+
+  /* Update the anEq[] fields of any samples already collected. */
+  for(i=p->nSample-1; i>=0; i--){
+    int j;
+    for(j=iChng; j<p->nCol; j++){
+      if( p->a[i].anEq[j]==0 ) p->a[i].anEq[j] = p->current.anEq[j];
+    }
+  }
+#endif
+
+#if defined(SQLITE_ENABLE_STAT3) && !defined(SQLITE_ENABLE_STAT4)
+  if( iChng==0 ){
+    tRowcnt nLt = p->current.anLt[0];
+    tRowcnt nEq = p->current.anEq[0];
+
+    /* Check if this is to be a periodic sample. If so, add it. */
+    if( (nLt/p->nPSample)!=(nLt+nEq)/p->nPSample ){
+      p->current.isPSample = 1;
+      sampleInsert(p, &p->current, 0);
+      p->current.isPSample = 0;
+    }else 
+
+    /* Or if it is a non-periodic sample. Add it in this case too. */
+    if( p->nSample<p->mxSample 
+     || sampleIsBetter(p, &p->current, &p->a[p->iMin]) 
+    ){
+      sampleInsert(p, &p->current, 0);
+    }
+  }
+#endif
+
+#ifndef SQLITE_ENABLE_STAT3_OR_STAT4
+  UNUSED_PARAMETER( p );
+  UNUSED_PARAMETER( iChng );
+#endif
+}
+
+/*
+** Implementation of the stat_push SQL function:  stat_push(P,C,R)
+** Arguments:
+**
+**    P     Pointer to the Stat4Accum object created by stat_init()
+**    C     Index of left-most column to differ from previous row
+**    R     Rowid for the current row.  Might be a key record for
+**          WITHOUT ROWID tables.
+**
+** This SQL function always returns NULL.  It's purpose it to accumulate
+** statistical data and/or samples in the Stat4Accum object about the
+** index being analyzed.  The stat_get() SQL function will later be used to
+** extract relevant information for constructing the sqlite_statN tables.
+**
+** The R parameter is only used for STAT3 and STAT4
+*/
+static void statPush(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int i;
+
+  /* The three function arguments */
+  Stat4Accum *p = (Stat4Accum*)sqlite3_value_blob(argv[0]);
+  int iChng = sqlite3_value_int(argv[1]);
+
+  UNUSED_PARAMETER( argc );
+  UNUSED_PARAMETER( context );
+  assert( p->nCol>0 );
+  assert( iChng<p->nCol );
+
+  if( p->nRow==0 ){
+    /* This is the first call to this function. Do initialization. */
+    for(i=0; i<p->nCol; i++) p->current.anEq[i] = 1;
+  }else{
+    /* Second and subsequent calls get processed here */
+    samplePushPrevious(p, iChng);
+
+    /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply
+    ** to the current row of the index. */
+    for(i=0; i<iChng; i++){
+      p->current.anEq[i]++;
+    }
+    for(i=iChng; i<p->nCol; i++){
+      p->current.anDLt[i]++;
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+      p->current.anLt[i] += p->current.anEq[i];
+#endif
+      p->current.anEq[i] = 1;
+    }
+  }
+  p->nRow++;
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  if( sqlite3_value_type(argv[2])==SQLITE_INTEGER ){
+    sampleSetRowidInt64(p->db, &p->current, sqlite3_value_int64(argv[2]));
+  }else{
+    sampleSetRowid(p->db, &p->current, sqlite3_value_bytes(argv[2]),
+                                       sqlite3_value_blob(argv[2]));
+  }
+  p->current.iHash = p->iPrn = p->iPrn*1103515245 + 12345;
+#endif
+
+#ifdef SQLITE_ENABLE_STAT4
+  {
+    tRowcnt nLt = p->current.anLt[p->nCol-1];
+
+    /* Check if this is to be a periodic sample. If so, add it. */
+    if( (nLt/p->nPSample)!=(nLt+1)/p->nPSample ){
+      p->current.isPSample = 1;
+      p->current.iCol = 0;
+      sampleInsert(p, &p->current, p->nCol-1);
+      p->current.isPSample = 0;
+    }
+
+    /* Update the aBest[] array. */
+    for(i=0; i<(p->nCol-1); i++){
+      p->current.iCol = i;
+      if( i>=iChng || sampleIsBetterPost(p, &p->current, &p->aBest[i]) ){
+        sampleCopy(p, &p->aBest[i], &p->current);
+      }
+    }
+  }
+#endif
+}
+static const FuncDef statPushFuncdef = {
+  2+IsStat34,      /* nArg */
+  SQLITE_UTF8,     /* funcFlags */
+  0,               /* pUserData */
+  0,               /* pNext */
+  statPush,        /* xFunc */
+  0,               /* xStep */
+  0,               /* xFinalize */
+  "stat_push",     /* zName */
+  0,               /* pHash */
+  0                /* pDestructor */
+};
+
+#define STAT_GET_STAT1 0          /* "stat" column of stat1 table */
+#define STAT_GET_ROWID 1          /* "rowid" column of stat[34] entry */
+#define STAT_GET_NEQ   2          /* "neq" column of stat[34] entry */
+#define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
+#define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */
+
+/*
+** Implementation of the stat_get(P,J) SQL function.  This routine is
+** used to query statistical information that has been gathered into
+** the Stat4Accum object by prior calls to stat_push().  The P parameter
+** has type BLOB but it is really just a pointer to the Stat4Accum object.
+** The content to returned is determined by the parameter J
+** which is one of the STAT_GET_xxxx values defined above.
+**
+** If neither STAT3 nor STAT4 are enabled, then J is always
+** STAT_GET_STAT1 and is hence omitted and this routine becomes
+** a one-parameter function, stat_get(P), that always returns the
+** stat1 table entry information.
+*/
+static void statGet(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Stat4Accum *p = (Stat4Accum*)sqlite3_value_blob(argv[0]);
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  /* STAT3 and STAT4 have a parameter on this routine. */
+  int eCall = sqlite3_value_int(argv[1]);
+  assert( argc==2 );
+  assert( eCall==STAT_GET_STAT1 || eCall==STAT_GET_NEQ 
+       || eCall==STAT_GET_ROWID || eCall==STAT_GET_NLT
+       || eCall==STAT_GET_NDLT 
+  );
+  if( eCall==STAT_GET_STAT1 )
+#else
+  assert( argc==1 );
+#endif
+  {
+    /* Return the value to store in the "stat" column of the sqlite_stat1
+    ** table for this index.
+    **
+    ** The value is a string composed of a list of integers describing 
+    ** the index. The first integer in the list is the total number of 
+    ** entries in the index. There is one additional integer in the list 
+    ** for each indexed column. This additional integer is an estimate of
+    ** the number of rows matched by a stabbing query on the index using
+    ** a key with the corresponding number of fields. In other words,
+    ** if the index is on columns (a,b) and the sqlite_stat1 value is 
+    ** "100 10 2", then SQLite estimates that:
+    **
+    **   * the index contains 100 rows,
+    **   * "WHERE a=?" matches 10 rows, and
+    **   * "WHERE a=? AND b=?" matches 2 rows.
+    **
+    ** If D is the count of distinct values and K is the total number of 
+    ** rows, then each estimate is computed as:
+    **
+    **        I = (K+D-1)/D
+    */
+    char *z;
+    int i;
+
+    char *zRet = sqlite3MallocZero( (p->nKeyCol+1)*25 );
+    if( zRet==0 ){
+      sqlite3_result_error_nomem(context);
+      return;
+    }
+
+    sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow);
+    z = zRet + sqlite3Strlen30(zRet);
+    for(i=0; i<p->nKeyCol; i++){
+      u64 nDistinct = p->current.anDLt[i] + 1;
+      u64 iVal = (p->nRow + nDistinct - 1) / nDistinct;
+      sqlite3_snprintf(24, z, " %llu", iVal);
+      z += sqlite3Strlen30(z);
+      assert( p->current.anEq[i] );
+    }
+    assert( z[0]=='\0' && z>zRet );
+
+    sqlite3_result_text(context, zRet, -1, sqlite3_free);
+  }
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  else if( eCall==STAT_GET_ROWID ){
+    if( p->iGet<0 ){
+      samplePushPrevious(p, 0);
+      p->iGet = 0;
+    }
+    if( p->iGet<p->nSample ){
+      Stat4Sample *pS = p->a + p->iGet;
+      if( pS->nRowid==0 ){
+        sqlite3_result_int64(context, pS->u.iRowid);
+      }else{
+        sqlite3_result_blob(context, pS->u.aRowid, pS->nRowid,
+                            SQLITE_TRANSIENT);
+      }
+    }
+  }else{
+    tRowcnt *aCnt = 0;
+
+    assert( p->iGet<p->nSample );
+    switch( eCall ){
+      case STAT_GET_NEQ:  aCnt = p->a[p->iGet].anEq; break;
+      case STAT_GET_NLT:  aCnt = p->a[p->iGet].anLt; break;
+      default: {
+        aCnt = p->a[p->iGet].anDLt; 
+        p->iGet++;
+        break;
+      }
+    }
+
+    if( IsStat3 ){
+      sqlite3_result_int64(context, (i64)aCnt[0]);
+    }else{
+      char *zRet = sqlite3MallocZero(p->nCol * 25);
+      if( zRet==0 ){
+        sqlite3_result_error_nomem(context);
+      }else{
+        int i;
+        char *z = zRet;
+        for(i=0; i<p->nCol; i++){
+          sqlite3_snprintf(24, z, "%llu ", (u64)aCnt[i]);
+          z += sqlite3Strlen30(z);
+        }
+        assert( z[0]=='\0' && z>zRet );
+        z[-1] = '\0';
+        sqlite3_result_text(context, zRet, -1, sqlite3_free);
+      }
+    }
+  }
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+#ifndef SQLITE_DEBUG
+  UNUSED_PARAMETER( argc );
+#endif
+}
+static const FuncDef statGetFuncdef = {
+  1+IsStat34,      /* nArg */
+  SQLITE_UTF8,     /* funcFlags */
+  0,               /* pUserData */
+  0,               /* pNext */
+  statGet,         /* xFunc */
+  0,               /* xStep */
+  0,               /* xFinalize */
+  "stat_get",      /* zName */
+  0,               /* pHash */
+  0                /* pDestructor */
+};
+
+static void callStatGet(Vdbe *v, int regStat4, int iParam, int regOut){
+  assert( regOut!=regStat4 && regOut!=regStat4+1 );
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  sqlite3VdbeAddOp2(v, OP_Integer, iParam, regStat4+1);
+#elif SQLITE_DEBUG
+  assert( iParam==STAT_GET_STAT1 );
+#else
+  UNUSED_PARAMETER( iParam );
+#endif
+  sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4, regOut);
+  sqlite3VdbeChangeP4(v, -1, (char*)&statGetFuncdef, P4_FUNCDEF);
+  sqlite3VdbeChangeP5(v, 1 + IsStat34);
+}
+
+/*
+** Generate code to do an analysis of all indices associated with
+** a single table.
+*/
+static void analyzeOneTable(
+  Parse *pParse,   /* Parser context */
+  Table *pTab,     /* Table whose indices are to be analyzed */
+  Index *pOnlyIdx, /* If not NULL, only analyze this one index */
+  int iStatCur,    /* Index of VdbeCursor that writes the sqlite_stat1 table */
+  int iMem,        /* Available memory locations begin here */
+  int iTab         /* Next available cursor */
+){
+  sqlite3 *db = pParse->db;    /* Database handle */
+  Index *pIdx;                 /* An index to being analyzed */
+  int iIdxCur;                 /* Cursor open on index being analyzed */
+  int iTabCur;                 /* Table cursor */
+  Vdbe *v;                     /* The virtual machine being built up */
+  int i;                       /* Loop counter */
+  int jZeroRows = -1;          /* Jump from here if number of rows is zero */
+  int iDb;                     /* Index of database containing pTab */
+  u8 needTableCnt = 1;         /* True to count the table */
+  int regNewRowid = iMem++;    /* Rowid for the inserted record */
+  int regStat4 = iMem++;       /* Register to hold Stat4Accum object */
+  int regChng = iMem++;        /* Index of changed index field */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  int regRowid = iMem++;       /* Rowid argument passed to stat_push() */
+#endif
+  int regTemp = iMem++;        /* Temporary use register */
+  int regTabname = iMem++;     /* Register containing table name */
+  int regIdxname = iMem++;     /* Register containing index name */
+  int regStat1 = iMem++;       /* Value for the stat column of sqlite_stat1 */
+  int regPrev = iMem;          /* MUST BE LAST (see below) */
+
+  pParse->nMem = MAX(pParse->nMem, iMem);
+  v = sqlite3GetVdbe(pParse);
+  if( v==0 || NEVER(pTab==0) ){
+    return;
+  }
+  if( pTab->tnum==0 ){
+    /* Do not gather statistics on views or virtual tables */
+    return;
+  }
+  if( sqlite3_strnicmp(pTab->zName, "sqlite_", 7)==0 ){
+    /* Do not gather statistics on system tables */
+    return;
+  }
+  assert( sqlite3BtreeHoldsAllMutexes(db) );
+  iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+  assert( iDb>=0 );
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  if( sqlite3AuthCheck(pParse, SQLITE_ANALYZE, pTab->zName, 0,
+      db->aDb[iDb].zName ) ){
+    return;
+  }
+#endif
+
+  /* Establish a read-lock on the table at the shared-cache level. 
+  ** Open a read-only cursor on the table. Also allocate a cursor number
+  ** to use for scanning indexes (iIdxCur). No index cursor is opened at
+  ** this time though.  */
+  sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName);
+  iTabCur = iTab++;
+  iIdxCur = iTab++;
+  pParse->nTab = MAX(pParse->nTab, iTab);
+  sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
+  sqlite3VdbeAddOp4(v, OP_String8, 0, regTabname, 0, pTab->zName, 0);
+
+  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+    int nCol;                     /* Number of columns in pIdx. "N" */
+    int addrRewind;               /* Address of "OP_Rewind iIdxCur" */
+    int addrNextRow;              /* Address of "next_row:" */
+    const char *zIdxName;         /* Name of the index */
+    int nColTest;                 /* Number of columns to test for changes */
+
+    if( pOnlyIdx && pOnlyIdx!=pIdx ) continue;
+    if( pIdx->pPartIdxWhere==0 ) needTableCnt = 0;
+    if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIdx) ){
+      nCol = pIdx->nKeyCol;
+      zIdxName = pTab->zName;
+      nColTest = nCol - 1;
+    }else{
+      nCol = pIdx->nColumn;
+      zIdxName = pIdx->zName;
+      nColTest = pIdx->uniqNotNull ? pIdx->nKeyCol-1 : nCol-1;
+    }
+
+    /* Populate the register containing the index name. */
+    sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, zIdxName, 0);
+    VdbeComment((v, "Analysis for %s.%s", pTab->zName, zIdxName));
+
+    /*
+    ** Pseudo-code for loop that calls stat_push():
+    **
+    **   Rewind csr
+    **   if eof(csr) goto end_of_scan;
+    **   regChng = 0
+    **   goto chng_addr_0;
+    **
+    **  next_row:
+    **   regChng = 0
+    **   if( idx(0) != regPrev(0) ) goto chng_addr_0
+    **   regChng = 1
+    **   if( idx(1) != regPrev(1) ) goto chng_addr_1
+    **   ...
+    **   regChng = N
+    **   goto chng_addr_N
+    **
+    **  chng_addr_0:
+    **   regPrev(0) = idx(0)
+    **  chng_addr_1:
+    **   regPrev(1) = idx(1)
+    **  ...
+    **
+    **  endDistinctTest:
+    **   regRowid = idx(rowid)
+    **   stat_push(P, regChng, regRowid)
+    **   Next csr
+    **   if !eof(csr) goto next_row;
+    **
+    **  end_of_scan:
+    */
+
+    /* Make sure there are enough memory cells allocated to accommodate 
+    ** the regPrev array and a trailing rowid (the rowid slot is required
+    ** when building a record to insert into the sample column of 
+    ** the sqlite_stat4 table.  */
+    pParse->nMem = MAX(pParse->nMem, regPrev+nColTest);
+
+    /* Open a read-only cursor on the index being analyzed. */
+    assert( iDb==sqlite3SchemaToIndex(db, pIdx->pSchema) );
+    sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pIdx->tnum, iDb);
+    sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
+    VdbeComment((v, "%s", pIdx->zName));
+
+    /* Invoke the stat_init() function. The arguments are:
+    ** 
+    **    (1) the number of columns in the index including the rowid
+    **        (or for a WITHOUT ROWID table, the number of PK columns),
+    **    (2) the number of columns in the key without the rowid/pk
+    **    (3) the number of rows in the index,
+    **
+    **
+    ** The third argument is only used for STAT3 and STAT4
+    */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regStat4+3);
+#endif
+    sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1);
+    sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2);
+    sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4);
+    sqlite3VdbeChangeP4(v, -1, (char*)&statInitFuncdef, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, 2+IsStat34);
+
+    /* Implementation of the following:
+    **
+    **   Rewind csr
+    **   if eof(csr) goto end_of_scan;
+    **   regChng = 0
+    **   goto next_push_0;
+    **
+    */
+    addrRewind = sqlite3VdbeAddOp1(v, OP_Rewind, iIdxCur);
+    VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, regChng);
+    addrNextRow = sqlite3VdbeCurrentAddr(v);
+
+    if( nColTest>0 ){
+      int endDistinctTest = sqlite3VdbeMakeLabel(v);
+      int *aGotoChng;               /* Array of jump instruction addresses */
+      aGotoChng = sqlite3DbMallocRaw(db, sizeof(int)*nColTest);
+      if( aGotoChng==0 ) continue;
+
+      /*
+      **  next_row:
+      **   regChng = 0
+      **   if( idx(0) != regPrev(0) ) goto chng_addr_0
+      **   regChng = 1
+      **   if( idx(1) != regPrev(1) ) goto chng_addr_1
+      **   ...
+      **   regChng = N
+      **   goto endDistinctTest
+      */
+      sqlite3VdbeAddOp0(v, OP_Goto);
+      addrNextRow = sqlite3VdbeCurrentAddr(v);
+      if( nColTest==1 && pIdx->nKeyCol==1 && IsUniqueIndex(pIdx) ){
+        /* For a single-column UNIQUE index, once we have found a non-NULL
+        ** row, we know that all the rest will be distinct, so skip 
+        ** subsequent distinctness tests. */
+        sqlite3VdbeAddOp2(v, OP_NotNull, regPrev, endDistinctTest);
+        VdbeCoverage(v);
+      }
+      for(i=0; i<nColTest; i++){
+        char *pColl = (char*)sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
+        sqlite3VdbeAddOp2(v, OP_Integer, i, regChng);
+        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regTemp);
+        aGotoChng[i] = 
+        sqlite3VdbeAddOp4(v, OP_Ne, regTemp, 0, regPrev+i, pColl, P4_COLLSEQ);
+        sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
+        VdbeCoverage(v);
+      }
+      sqlite3VdbeAddOp2(v, OP_Integer, nColTest, regChng);
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, endDistinctTest);
+  
+  
+      /*
+      **  chng_addr_0:
+      **   regPrev(0) = idx(0)
+      **  chng_addr_1:
+      **   regPrev(1) = idx(1)
+      **  ...
+      */
+      sqlite3VdbeJumpHere(v, addrNextRow-1);
+      for(i=0; i<nColTest; i++){
+        sqlite3VdbeJumpHere(v, aGotoChng[i]);
+        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regPrev+i);
+      }
+      sqlite3VdbeResolveLabel(v, endDistinctTest);
+      sqlite3DbFree(db, aGotoChng);
+    }
+  
+    /*
+    **  chng_addr_N:
+    **   regRowid = idx(rowid)            // STAT34 only
+    **   stat_push(P, regChng, regRowid)  // 3rd parameter STAT34 only
+    **   Next csr
+    **   if !eof(csr) goto next_row;
+    */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    assert( regRowid==(regStat4+2) );
+    if( HasRowid(pTab) ){
+      sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, regRowid);
+    }else{
+      Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable);
+      int j, k, regKey;
+      regKey = sqlite3GetTempRange(pParse, pPk->nKeyCol);
+      for(j=0; j<pPk->nKeyCol; j++){
+        k = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[j]);
+        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, regKey+j);
+        VdbeComment((v, "%s", pTab->aCol[pPk->aiColumn[j]].zName));
+      }
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, regKey, pPk->nKeyCol, regRowid);
+      sqlite3ReleaseTempRange(pParse, regKey, pPk->nKeyCol);
+    }
+#endif
+    assert( regChng==(regStat4+1) );
+    sqlite3VdbeAddOp3(v, OP_Function, 1, regStat4, regTemp);
+    sqlite3VdbeChangeP4(v, -1, (char*)&statPushFuncdef, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, 2+IsStat34);
+    sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, addrNextRow); VdbeCoverage(v);
+
+    /* Add the entry to the stat1 table. */
+    callStatGet(v, regStat4, STAT_GET_STAT1, regStat1);
+    assert( "BBB"[0]==SQLITE_AFF_TEXT );
+    sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 3, regTemp, "BBB", 0);
+    sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regNewRowid);
+    sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regTemp, regNewRowid);
+    sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+
+    /* Add the entries to the stat3 or stat4 table. */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    {
+      int regEq = regStat1;
+      int regLt = regStat1+1;
+      int regDLt = regStat1+2;
+      int regSample = regStat1+3;
+      int regCol = regStat1+4;
+      int regSampleRowid = regCol + nCol;
+      int addrNext;
+      int addrIsNull;
+      u8 seekOp = HasRowid(pTab) ? OP_NotExists : OP_NotFound;
+
+      pParse->nMem = MAX(pParse->nMem, regCol+nCol);
+
+      addrNext = sqlite3VdbeCurrentAddr(v);
+      callStatGet(v, regStat4, STAT_GET_ROWID, regSampleRowid);
+      addrIsNull = sqlite3VdbeAddOp1(v, OP_IsNull, regSampleRowid);
+      VdbeCoverage(v);
+      callStatGet(v, regStat4, STAT_GET_NEQ, regEq);
+      callStatGet(v, regStat4, STAT_GET_NLT, regLt);
+      callStatGet(v, regStat4, STAT_GET_NDLT, regDLt);
+      sqlite3VdbeAddOp4Int(v, seekOp, iTabCur, addrNext, regSampleRowid, 0);
+      /* We know that the regSampleRowid row exists because it was read by
+      ** the previous loop.  Thus the not-found jump of seekOp will never
+      ** be taken */
+      VdbeCoverageNeverTaken(v);
+#ifdef SQLITE_ENABLE_STAT3
+      sqlite3ExprCodeGetColumnOfTable(v, pTab, iTabCur, 
+                                      pIdx->aiColumn[0], regSample);
+#else
+      for(i=0; i<nCol; i++){
+        i16 iCol = pIdx->aiColumn[i];
+        sqlite3ExprCodeGetColumnOfTable(v, pTab, iTabCur, iCol, regCol+i);
+      }
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, regCol, nCol, regSample);
+#endif
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, regTabname, 6, regTemp);
+      sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur+1, regNewRowid);
+      sqlite3VdbeAddOp3(v, OP_Insert, iStatCur+1, regTemp, regNewRowid);
+      sqlite3VdbeAddOp2(v, OP_Goto, 1, addrNext); /* P1==1 for end-of-loop */
+      sqlite3VdbeJumpHere(v, addrIsNull);
+    }
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+
+    /* End of analysis */
+    sqlite3VdbeJumpHere(v, addrRewind);
+  }
+
+
+  /* Create a single sqlite_stat1 entry containing NULL as the index
+  ** name and the row count as the content.
+  */
+  if( pOnlyIdx==0 && needTableCnt ){
+    VdbeComment((v, "%s", pTab->zName));
+    sqlite3VdbeAddOp2(v, OP_Count, iTabCur, regStat1);
+    jZeroRows = sqlite3VdbeAddOp1(v, OP_IfNot, regStat1); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_Null, 0, regIdxname);
+    assert( "BBB"[0]==SQLITE_AFF_TEXT );
+    sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 3, regTemp, "BBB", 0);
+    sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regNewRowid);
+    sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regTemp, regNewRowid);
+    sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+    sqlite3VdbeJumpHere(v, jZeroRows);
+  }
+}
+
+
+/*
+** Generate code that will cause the most recent index analysis to
+** be loaded into internal hash tables where is can be used.
+*/
+static void loadAnalysis(Parse *pParse, int iDb){
+  Vdbe *v = sqlite3GetVdbe(pParse);
+  if( v ){
+    sqlite3VdbeAddOp1(v, OP_LoadAnalysis, iDb);
+  }
+}
+
+/*
+** Generate code that will do an analysis of an entire database
+*/
+static void analyzeDatabase(Parse *pParse, int iDb){
+  sqlite3 *db = pParse->db;
+  Schema *pSchema = db->aDb[iDb].pSchema;    /* Schema of database iDb */
+  HashElem *k;
+  int iStatCur;
+  int iMem;
+  int iTab;
+
+  sqlite3BeginWriteOperation(pParse, 0, iDb);
+  iStatCur = pParse->nTab;
+  pParse->nTab += 3;
+  openStatTable(pParse, iDb, iStatCur, 0, 0);
+  iMem = pParse->nMem+1;
+  iTab = pParse->nTab;
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){
+    Table *pTab = (Table*)sqliteHashData(k);
+    analyzeOneTable(pParse, pTab, 0, iStatCur, iMem, iTab);
+  }
+  loadAnalysis(pParse, iDb);
+}
+
+/*
+** Generate code that will do an analysis of a single table in
+** a database.  If pOnlyIdx is not NULL then it is a single index
+** in pTab that should be analyzed.
+*/
+static void analyzeTable(Parse *pParse, Table *pTab, Index *pOnlyIdx){
+  int iDb;
+  int iStatCur;
+
+  assert( pTab!=0 );
+  assert( sqlite3BtreeHoldsAllMutexes(pParse->db) );
+  iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+  sqlite3BeginWriteOperation(pParse, 0, iDb);
+  iStatCur = pParse->nTab;
+  pParse->nTab += 3;
+  if( pOnlyIdx ){
+    openStatTable(pParse, iDb, iStatCur, pOnlyIdx->zName, "idx");
+  }else{
+    openStatTable(pParse, iDb, iStatCur, pTab->zName, "tbl");
+  }
+  analyzeOneTable(pParse, pTab, pOnlyIdx, iStatCur,pParse->nMem+1,pParse->nTab);
+  loadAnalysis(pParse, iDb);
+}
+
+/*
+** Generate code for the ANALYZE command.  The parser calls this routine
+** when it recognizes an ANALYZE command.
+**
+**        ANALYZE                            -- 1
+**        ANALYZE  <database>                -- 2
+**        ANALYZE  ?<database>.?<tablename>  -- 3
+**
+** Form 1 causes all indices in all attached databases to be analyzed.
+** Form 2 analyzes all indices the single database named.
+** Form 3 analyzes all indices associated with the named table.
+*/
+SQLITE_PRIVATE void sqlite3Analyze(Parse *pParse, Token *pName1, Token *pName2){
+  sqlite3 *db = pParse->db;
+  int iDb;
+  int i;
+  char *z, *zDb;
+  Table *pTab;
+  Index *pIdx;
+  Token *pTableName;
+  Vdbe *v;
+
+  /* Read the database schema. If an error occurs, leave an error message
+  ** and code in pParse and return NULL. */
+  assert( sqlite3BtreeHoldsAllMutexes(pParse->db) );
+  if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){
+    return;
+  }
+
+  assert( pName2!=0 || pName1==0 );
+  if( pName1==0 ){
+    /* Form 1:  Analyze everything */
+    for(i=0; i<db->nDb; i++){
+      if( i==1 ) continue;  /* Do not analyze the TEMP database */
+      analyzeDatabase(pParse, i);
+    }
+  }else if( pName2->n==0 ){
+    /* Form 2:  Analyze the database or table named */
+    iDb = sqlite3FindDb(db, pName1);
+    if( iDb>=0 ){
+      analyzeDatabase(pParse, iDb);
+    }else{
+      z = sqlite3NameFromToken(db, pName1);
+      if( z ){
+        if( (pIdx = sqlite3FindIndex(db, z, 0))!=0 ){
+          analyzeTable(pParse, pIdx->pTable, pIdx);
+        }else if( (pTab = sqlite3LocateTable(pParse, 0, z, 0))!=0 ){
+          analyzeTable(pParse, pTab, 0);
+        }
+        sqlite3DbFree(db, z);
+      }
+    }
+  }else{
+    /* Form 3: Analyze the fully qualified table name */
+    iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pTableName);
+    if( iDb>=0 ){
+      zDb = db->aDb[iDb].zName;
+      z = sqlite3NameFromToken(db, pTableName);
+      if( z ){
+        if( (pIdx = sqlite3FindIndex(db, z, zDb))!=0 ){
+          analyzeTable(pParse, pIdx->pTable, pIdx);
+        }else if( (pTab = sqlite3LocateTable(pParse, 0, z, zDb))!=0 ){
+          analyzeTable(pParse, pTab, 0);
+        }
+        sqlite3DbFree(db, z);
+      }
+    }   
+  }
+  v = sqlite3GetVdbe(pParse);
+  if( v ) sqlite3VdbeAddOp0(v, OP_Expire);
+}
+
+/*
+** Used to pass information from the analyzer reader through to the
+** callback routine.
+*/
+typedef struct analysisInfo analysisInfo;
+struct analysisInfo {
+  sqlite3 *db;
+  const char *zDatabase;
+};
+
+/*
+** The first argument points to a nul-terminated string containing a
+** list of space separated integers. Read the first nOut of these into
+** the array aOut[].
+*/
+static void decodeIntArray(
+  char *zIntArray,       /* String containing int array to decode */
+  int nOut,              /* Number of slots in aOut[] */
+  tRowcnt *aOut,         /* Store integers here */
+  LogEst *aLog,          /* Or, if aOut==0, here */
+  Index *pIndex          /* Handle extra flags for this index, if not NULL */
+){
+  char *z = zIntArray;
+  int c;
+  int i;
+  tRowcnt v;
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  if( z==0 ) z = "";
+#else
+  assert( z!=0 );
+#endif
+  for(i=0; *z && i<nOut; i++){
+    v = 0;
+    while( (c=z[0])>='0' && c<='9' ){
+      v = v*10 + c - '0';
+      z++;
+    }
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    if( aOut ) aOut[i] = v;
+    if( aLog ) aLog[i] = sqlite3LogEst(v);
+#else
+    assert( aOut==0 );
+    UNUSED_PARAMETER(aOut);
+    assert( aLog!=0 );
+    aLog[i] = sqlite3LogEst(v);
+#endif
+    if( *z==' ' ) z++;
+  }
+#ifndef SQLITE_ENABLE_STAT3_OR_STAT4
+  assert( pIndex!=0 ); {
+#else
+  if( pIndex ){
+#endif
+    pIndex->bUnordered = 0;
+    pIndex->noSkipScan = 0;
+    while( z[0] ){
+      if( sqlite3_strglob("unordered*", z)==0 ){
+        pIndex->bUnordered = 1;
+      }else if( sqlite3_strglob("sz=[0-9]*", z)==0 ){
+        pIndex->szIdxRow = sqlite3LogEst(sqlite3Atoi(z+3));
+      }else if( sqlite3_strglob("noskipscan*", z)==0 ){
+        pIndex->noSkipScan = 1;
+      }
+#ifdef SQLITE_ENABLE_COSTMULT
+      else if( sqlite3_strglob("costmult=[0-9]*",z)==0 ){
+        pIndex->pTable->costMult = sqlite3LogEst(sqlite3Atoi(z+9));
+      }
+#endif
+      while( z[0]!=0 && z[0]!=' ' ) z++;
+      while( z[0]==' ' ) z++;
+    }
+  }
+}
+
+/*
+** This callback is invoked once for each index when reading the
+** sqlite_stat1 table.  
+**
+**     argv[0] = name of the table
+**     argv[1] = name of the index (might be NULL)
+**     argv[2] = results of analysis - on integer for each column
+**
+** Entries for which argv[1]==NULL simply record the number of rows in
+** the table.
+*/
+static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){
+  analysisInfo *pInfo = (analysisInfo*)pData;
+  Index *pIndex;
+  Table *pTable;
+  const char *z;
+
+  assert( argc==3 );
+  UNUSED_PARAMETER2(NotUsed, argc);
+
+  if( argv==0 || argv[0]==0 || argv[2]==0 ){
+    return 0;
+  }
+  pTable = sqlite3FindTable(pInfo->db, argv[0], pInfo->zDatabase);
+  if( pTable==0 ){
+    return 0;
+  }
+  if( argv[1]==0 ){
+    pIndex = 0;
+  }else if( sqlite3_stricmp(argv[0],argv[1])==0 ){
+    pIndex = sqlite3PrimaryKeyIndex(pTable);
+  }else{
+    pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
+  }
+  z = argv[2];
+
+  if( pIndex ){
+    int nCol = pIndex->nKeyCol+1;
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3MallocZero(
+        sizeof(tRowcnt) * nCol
+    );
+    if( aiRowEst==0 ) pInfo->db->mallocFailed = 1;
+#else
+    tRowcnt * const aiRowEst = 0;
+#endif
+    pIndex->bUnordered = 0;
+    decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex);
+    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
+  }else{
+    Index fakeIdx;
+    fakeIdx.szIdxRow = pTable->szTabRow;
+#ifdef SQLITE_ENABLE_COSTMULT
+    fakeIdx.pTable = pTable;
+#endif
+    decodeIntArray((char*)z, 1, 0, &pTable->nRowLogEst, &fakeIdx);
+    pTable->szTabRow = fakeIdx.szIdxRow;
+  }
+
+  return 0;
+}
+
+/*
+** If the Index.aSample variable is not NULL, delete the aSample[] array
+** and its contents.
+*/
+SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  if( pIdx->aSample ){
+    int j;
+    for(j=0; j<pIdx->nSample; j++){
+      IndexSample *p = &pIdx->aSample[j];
+      sqlite3DbFree(db, p->p);
+    }
+    sqlite3DbFree(db, pIdx->aSample);
+  }
+  if( db && db->pnBytesFreed==0 ){
+    pIdx->nSample = 0;
+    pIdx->aSample = 0;
+  }
+#else
+  UNUSED_PARAMETER(db);
+  UNUSED_PARAMETER(pIdx);
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+}
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+/*
+** Populate the pIdx->aAvgEq[] array based on the samples currently
+** stored in pIdx->aSample[]. 
+*/
+static void initAvgEq(Index *pIdx){
+  if( pIdx ){
+    IndexSample *aSample = pIdx->aSample;
+    IndexSample *pFinal = &aSample[pIdx->nSample-1];
+    int iCol;
+    int nCol = 1;
+    if( pIdx->nSampleCol>1 ){
+      /* If this is stat4 data, then calculate aAvgEq[] values for all
+      ** sample columns except the last. The last is always set to 1, as
+      ** once the trailing PK fields are considered all index keys are
+      ** unique.  */
+      nCol = pIdx->nSampleCol-1;
+      pIdx->aAvgEq[nCol] = 1;
+    }
+    for(iCol=0; iCol<nCol; iCol++){
+      int nSample = pIdx->nSample;
+      int i;                    /* Used to iterate through samples */
+      tRowcnt sumEq = 0;        /* Sum of the nEq values */
+      tRowcnt avgEq = 0;
+      tRowcnt nRow;             /* Number of rows in index */
+      i64 nSum100 = 0;          /* Number of terms contributing to sumEq */
+      i64 nDist100;             /* Number of distinct values in index */
+
+      if( !pIdx->aiRowEst || iCol>=pIdx->nKeyCol || pIdx->aiRowEst[iCol+1]==0 ){
+        nRow = pFinal->anLt[iCol];
+        nDist100 = (i64)100 * pFinal->anDLt[iCol];
+        nSample--;
+      }else{
+        nRow = pIdx->aiRowEst[0];
+        nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1];
+      }
+      pIdx->nRowEst0 = nRow;
+
+      /* Set nSum to the number of distinct (iCol+1) field prefixes that
+      ** occur in the stat4 table for this index. Set sumEq to the sum of 
+      ** the nEq values for column iCol for the same set (adding the value 
+      ** only once where there exist duplicate prefixes).  */
+      for(i=0; i<nSample; i++){
+        if( i==(pIdx->nSample-1)
+         || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] 
+        ){
+          sumEq += aSample[i].anEq[iCol];
+          nSum100 += 100;
+        }
+      }
+
+      if( nDist100>nSum100 ){
+        avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100);
+      }
+      if( avgEq==0 ) avgEq = 1;
+      pIdx->aAvgEq[iCol] = avgEq;
+    }
+  }
+}
+
+/*
+** Look up an index by name.  Or, if the name of a WITHOUT ROWID table
+** is supplied instead, find the PRIMARY KEY index for that table.
+*/
+static Index *findIndexOrPrimaryKey(
+  sqlite3 *db,
+  const char *zName,
+  const char *zDb
+){
+  Index *pIdx = sqlite3FindIndex(db, zName, zDb);
+  if( pIdx==0 ){
+    Table *pTab = sqlite3FindTable(db, zName, zDb);
+    if( pTab && !HasRowid(pTab) ) pIdx = sqlite3PrimaryKeyIndex(pTab);
+  }
+  return pIdx;
+}
+
+/*
+** Load the content from either the sqlite_stat4 or sqlite_stat3 table 
+** into the relevant Index.aSample[] arrays.
+**
+** Arguments zSql1 and zSql2 must point to SQL statements that return
+** data equivalent to the following (statements are different for stat3,
+** see the caller of this function for details):
+**
+**    zSql1: SELECT idx,count(*) FROM %Q.sqlite_stat4 GROUP BY idx
+**    zSql2: SELECT idx,neq,nlt,ndlt,sample FROM %Q.sqlite_stat4
+**
+** where %Q is replaced with the database name before the SQL is executed.
+*/
+static int loadStatTbl(
+  sqlite3 *db,                  /* Database handle */
+  int bStat3,                   /* Assume single column records only */
+  const char *zSql1,            /* SQL statement 1 (see above) */
+  const char *zSql2,            /* SQL statement 2 (see above) */
+  const char *zDb               /* Database name (e.g. "main") */
+){
+  int rc;                       /* Result codes from subroutines */
+  sqlite3_stmt *pStmt = 0;      /* An SQL statement being run */
+  char *zSql;                   /* Text of the SQL statement */
+  Index *pPrevIdx = 0;          /* Previous index in the loop */
+  IndexSample *pSample;         /* A slot in pIdx->aSample[] */
+
+  assert( db->lookaside.bEnabled==0 );
+  zSql = sqlite3MPrintf(db, zSql1, zDb);
+  if( !zSql ){
+    return SQLITE_NOMEM;
+  }
+  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+  sqlite3DbFree(db, zSql);
+  if( rc ) return rc;
+
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    int nIdxCol = 1;              /* Number of columns in stat4 records */
+
+    char *zIndex;   /* Index name */
+    Index *pIdx;    /* Pointer to the index object */
+    int nSample;    /* Number of samples */
+    int nByte;      /* Bytes of space required */
+    int i;          /* Bytes of space required */
+    tRowcnt *pSpace;
+
+    zIndex = (char *)sqlite3_column_text(pStmt, 0);
+    if( zIndex==0 ) continue;
+    nSample = sqlite3_column_int(pStmt, 1);
+    pIdx = findIndexOrPrimaryKey(db, zIndex, zDb);
+    assert( pIdx==0 || bStat3 || pIdx->nSample==0 );
+    /* Index.nSample is non-zero at this point if data has already been
+    ** loaded from the stat4 table. In this case ignore stat3 data.  */
+    if( pIdx==0 || pIdx->nSample ) continue;
+    if( bStat3==0 ){
+      assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 );
+      if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){
+        nIdxCol = pIdx->nKeyCol;
+      }else{
+        nIdxCol = pIdx->nColumn;
+      }
+    }
+    pIdx->nSampleCol = nIdxCol;
+    nByte = sizeof(IndexSample) * nSample;
+    nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample;
+    nByte += nIdxCol * sizeof(tRowcnt);     /* Space for Index.aAvgEq[] */
+
+    pIdx->aSample = sqlite3DbMallocZero(db, nByte);
+    if( pIdx->aSample==0 ){
+      sqlite3_finalize(pStmt);
+      return SQLITE_NOMEM;
+    }
+    pSpace = (tRowcnt*)&pIdx->aSample[nSample];
+    pIdx->aAvgEq = pSpace; pSpace += nIdxCol;
+    for(i=0; i<nSample; i++){
+      pIdx->aSample[i].anEq = pSpace; pSpace += nIdxCol;
+      pIdx->aSample[i].anLt = pSpace; pSpace += nIdxCol;
+      pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol;
+    }
+    assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) );
+  }
+  rc = sqlite3_finalize(pStmt);
+  if( rc ) return rc;
+
+  zSql = sqlite3MPrintf(db, zSql2, zDb);
+  if( !zSql ){
+    return SQLITE_NOMEM;
+  }
+  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+  sqlite3DbFree(db, zSql);
+  if( rc ) return rc;
+
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    char *zIndex;                 /* Index name */
+    Index *pIdx;                  /* Pointer to the index object */
+    int nCol = 1;                 /* Number of columns in index */
+
+    zIndex = (char *)sqlite3_column_text(pStmt, 0);
+    if( zIndex==0 ) continue;
+    pIdx = findIndexOrPrimaryKey(db, zIndex, zDb);
+    if( pIdx==0 ) continue;
+    /* This next condition is true if data has already been loaded from 
+    ** the sqlite_stat4 table. In this case ignore stat3 data.  */
+    nCol = pIdx->nSampleCol;
+    if( bStat3 && nCol>1 ) continue;
+    if( pIdx!=pPrevIdx ){
+      initAvgEq(pPrevIdx);
+      pPrevIdx = pIdx;
+    }
+    pSample = &pIdx->aSample[pIdx->nSample];
+    decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0);
+    decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0);
+    decodeIntArray((char*)sqlite3_column_text(pStmt,3),nCol,pSample->anDLt,0,0);
+
+    /* Take a copy of the sample. Add two 0x00 bytes the end of the buffer.
+    ** This is in case the sample record is corrupted. In that case, the
+    ** sqlite3VdbeRecordCompare() may read up to two varints past the
+    ** end of the allocated buffer before it realizes it is dealing with
+    ** a corrupt record. Adding the two 0x00 bytes prevents this from causing
+    ** a buffer overread.  */
+    pSample->n = sqlite3_column_bytes(pStmt, 4);
+    pSample->p = sqlite3DbMallocZero(db, pSample->n + 2);
+    if( pSample->p==0 ){
+      sqlite3_finalize(pStmt);
+      return SQLITE_NOMEM;
+    }
+    memcpy(pSample->p, sqlite3_column_blob(pStmt, 4), pSample->n);
+    pIdx->nSample++;
+  }
+  rc = sqlite3_finalize(pStmt);
+  if( rc==SQLITE_OK ) initAvgEq(pPrevIdx);
+  return rc;
+}
+
+/*
+** Load content from the sqlite_stat4 and sqlite_stat3 tables into 
+** the Index.aSample[] arrays of all indices.
+*/
+static int loadStat4(sqlite3 *db, const char *zDb){
+  int rc = SQLITE_OK;             /* Result codes from subroutines */
+
+  assert( db->lookaside.bEnabled==0 );
+  if( sqlite3FindTable(db, "sqlite_stat4", zDb) ){
+    rc = loadStatTbl(db, 0,
+      "SELECT idx,count(*) FROM %Q.sqlite_stat4 GROUP BY idx", 
+      "SELECT idx,neq,nlt,ndlt,sample FROM %Q.sqlite_stat4",
+      zDb
+    );
+  }
+
+  if( rc==SQLITE_OK && sqlite3FindTable(db, "sqlite_stat3", zDb) ){
+    rc = loadStatTbl(db, 1,
+      "SELECT idx,count(*) FROM %Q.sqlite_stat3 GROUP BY idx", 
+      "SELECT idx,neq,nlt,ndlt,sqlite_record(sample) FROM %Q.sqlite_stat3",
+      zDb
+    );
+  }
+
+  return rc;
+}
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+
+/*
+** Load the content of the sqlite_stat1 and sqlite_stat3/4 tables. The
+** contents of sqlite_stat1 are used to populate the Index.aiRowEst[]
+** arrays. The contents of sqlite_stat3/4 are used to populate the
+** Index.aSample[] arrays.
+**
+** If the sqlite_stat1 table is not present in the database, SQLITE_ERROR
+** is returned. In this case, even if SQLITE_ENABLE_STAT3/4 was defined 
+** during compilation and the sqlite_stat3/4 table is present, no data is 
+** read from it.
+**
+** If SQLITE_ENABLE_STAT3/4 was defined during compilation and the 
+** sqlite_stat4 table is not present in the database, SQLITE_ERROR is
+** returned. However, in this case, data is read from the sqlite_stat1
+** table (if it is present) before returning.
+**
+** If an OOM error occurs, this function always sets db->mallocFailed.
+** This means if the caller does not care about other errors, the return
+** code may be ignored.
+*/
+SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
+  analysisInfo sInfo;
+  HashElem *i;
+  char *zSql;
+  int rc;
+
+  assert( iDb>=0 && iDb<db->nDb );
+  assert( db->aDb[iDb].pBt!=0 );
+
+  /* Clear any prior statistics */
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
+    Index *pIdx = sqliteHashData(i);
+    sqlite3DefaultRowEst(pIdx);
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    sqlite3DeleteIndexSamples(db, pIdx);
+    pIdx->aSample = 0;
+#endif
+  }
+
+  /* Check to make sure the sqlite_stat1 table exists */
+  sInfo.db = db;
+  sInfo.zDatabase = db->aDb[iDb].zName;
+  if( sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)==0 ){
+    return SQLITE_ERROR;
+  }
+
+  /* Load new statistics out of the sqlite_stat1 table */
+  zSql = sqlite3MPrintf(db, 
+      "SELECT tbl,idx,stat FROM %Q.sqlite_stat1", sInfo.zDatabase);
+  if( zSql==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    rc = sqlite3_exec(db, zSql, analysisLoader, &sInfo, 0);
+    sqlite3DbFree(db, zSql);
+  }
+
+
+  /* Load the statistics from the sqlite_stat4 table. */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  if( rc==SQLITE_OK && OptimizationEnabled(db, SQLITE_Stat34) ){
+    int lookasideEnabled = db->lookaside.bEnabled;
+    db->lookaside.bEnabled = 0;
+    rc = loadStat4(db, sInfo.zDatabase);
+    db->lookaside.bEnabled = lookasideEnabled;
+  }
+  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
+    Index *pIdx = sqliteHashData(i);
+    sqlite3_free(pIdx->aiRowEst);
+    pIdx->aiRowEst = 0;
+  }
+#endif
+
+  if( rc==SQLITE_NOMEM ){
+    db->mallocFailed = 1;
+  }
+  return rc;
+}
+
+
+#endif /* SQLITE_OMIT_ANALYZE */
+
+/************** End of analyze.c *********************************************/
+/************** Begin file attach.c ******************************************/
+/*
+** 2003 April 6
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code used to implement the ATTACH and DETACH commands.
+*/
+
+#ifndef SQLITE_OMIT_ATTACH
+/*
+** Resolve an expression that was part of an ATTACH or DETACH statement. This
+** is slightly different from resolving a normal SQL expression, because simple
+** identifiers are treated as strings, not possible column names or aliases.
+**
+** i.e. if the parser sees:
+**
+**     ATTACH DATABASE abc AS def
+**
+** it treats the two expressions as literal strings 'abc' and 'def' instead of
+** looking for columns of the same name.
+**
+** This only applies to the root node of pExpr, so the statement:
+**
+**     ATTACH DATABASE abc||def AS 'db2'
+**
+** will fail because neither abc or def can be resolved.
+*/
+static int resolveAttachExpr(NameContext *pName, Expr *pExpr)
+{
+  int rc = SQLITE_OK;
+  if( pExpr ){
+    if( pExpr->op!=TK_ID ){
+      rc = sqlite3ResolveExprNames(pName, pExpr);
+    }else{
+      pExpr->op = TK_STRING;
+    }
+  }
+  return rc;
+}
+
+/*
+** An SQL user-function registered to do the work of an ATTACH statement. The
+** three arguments to the function come directly from an attach statement:
+**
+**     ATTACH DATABASE x AS y KEY z
+**
+**     SELECT sqlite_attach(x, y, z)
+**
+** If the optional "KEY z" syntax is omitted, an SQL NULL is passed as the
+** third argument.
+*/
+static void attachFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **argv
+){
+  int i;
+  int rc = 0;
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  const char *zName;
+  const char *zFile;
+  char *zPath = 0;
+  char *zErr = 0;
+  unsigned int flags;
+  Db *aNew;
+  char *zErrDyn = 0;
+  sqlite3_vfs *pVfs;
+
+  UNUSED_PARAMETER(NotUsed);
+
+  zFile = (const char *)sqlite3_value_text(argv[0]);
+  zName = (const char *)sqlite3_value_text(argv[1]);
+  if( zFile==0 ) zFile = "";
+  if( zName==0 ) zName = "";
+
+  /* Check for the following errors:
+  **
+  **     * Too many attached databases,
+  **     * Transaction currently open
+  **     * Specified database name already being used.
+  */
+  if( db->nDb>=db->aLimit[SQLITE_LIMIT_ATTACHED]+2 ){
+    zErrDyn = sqlite3MPrintf(db, "too many attached databases - max %d", 
+      db->aLimit[SQLITE_LIMIT_ATTACHED]
+    );
+    goto attach_error;
+  }
+  if( !db->autoCommit ){
+    zErrDyn = sqlite3MPrintf(db, "cannot ATTACH database within transaction");
+    goto attach_error;
+  }
+  for(i=0; i<db->nDb; i++){
+    char *z = db->aDb[i].zName;
+    assert( z && zName );
+    if( sqlite3StrICmp(z, zName)==0 ){
+      zErrDyn = sqlite3MPrintf(db, "database %s is already in use", zName);
+      goto attach_error;
+    }
+  }
+
+  /* Allocate the new entry in the db->aDb[] array and initialize the schema
+  ** hash tables.
+  */
+  if( db->aDb==db->aDbStatic ){
+    aNew = sqlite3DbMallocRaw(db, sizeof(db->aDb[0])*3 );
+    if( aNew==0 ) return;
+    memcpy(aNew, db->aDb, sizeof(db->aDb[0])*2);
+  }else{
+    aNew = sqlite3DbRealloc(db, db->aDb, sizeof(db->aDb[0])*(db->nDb+1) );
+    if( aNew==0 ) return;
+  }
+  db->aDb = aNew;
+  aNew = &db->aDb[db->nDb];
+  memset(aNew, 0, sizeof(*aNew));
+
+  /* Open the database file. If the btree is successfully opened, use
+  ** it to obtain the database schema. At this point the schema may
+  ** or may not be initialized.
+  */
+  flags = db->openFlags;
+  rc = sqlite3ParseUri(db->pVfs->zName, zFile, &flags, &pVfs, &zPath, &zErr);
+  if( rc!=SQLITE_OK ){
+    if( rc==SQLITE_NOMEM ) db->mallocFailed = 1;
+    sqlite3_result_error(context, zErr, -1);
+    sqlite3_free(zErr);
+    return;
+  }
+  assert( pVfs );
+  flags |= SQLITE_OPEN_MAIN_DB;
+  rc = sqlite3BtreeOpen(pVfs, zPath, db, &aNew->pBt, 0, flags);
+  sqlite3_free( zPath );
+  db->nDb++;
+  if( rc==SQLITE_CONSTRAINT ){
+    rc = SQLITE_ERROR;
+    zErrDyn = sqlite3MPrintf(db, "database is already attached");
+  }else if( rc==SQLITE_OK ){
+    Pager *pPager;
+    aNew->pSchema = sqlite3SchemaGet(db, aNew->pBt);
+    if( !aNew->pSchema ){
+      rc = SQLITE_NOMEM;
+    }else if( aNew->pSchema->file_format && aNew->pSchema->enc!=ENC(db) ){
+      zErrDyn = sqlite3MPrintf(db, 
+        "attached databases must use the same text encoding as main database");
+      rc = SQLITE_ERROR;
+    }
+    sqlite3BtreeEnter(aNew->pBt);
+    pPager = sqlite3BtreePager(aNew->pBt);
+    sqlite3PagerLockingMode(pPager, db->dfltLockMode);
+    sqlite3BtreeSecureDelete(aNew->pBt,
+                             sqlite3BtreeSecureDelete(db->aDb[0].pBt,-1) );
+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
+    sqlite3BtreeSetPagerFlags(aNew->pBt, 3 | (db->flags & PAGER_FLAGS_MASK));
+#endif
+    sqlite3BtreeLeave(aNew->pBt);
+  }
+  aNew->safety_level = 3;
+  aNew->zName = sqlite3DbStrDup(db, zName);
+  if( rc==SQLITE_OK && aNew->zName==0 ){
+    rc = SQLITE_NOMEM;
+  }
+
+
+#ifdef SQLITE_HAS_CODEC
+  if( rc==SQLITE_OK ){
+    extern int sqlite3CodecAttach(sqlite3*, int, const void*, int);
+    extern void sqlite3CodecGetKey(sqlite3*, int, void**, int*);
+    int nKey;
+    char *zKey;
+    int t = sqlite3_value_type(argv[2]);
+    switch( t ){
+      case SQLITE_INTEGER:
+      case SQLITE_FLOAT:
+        zErrDyn = sqlite3DbStrDup(db, "Invalid key value");
+        rc = SQLITE_ERROR;
+        break;
+        
+      case SQLITE_TEXT:
+      case SQLITE_BLOB:
+        nKey = sqlite3_value_bytes(argv[2]);
+        zKey = (char *)sqlite3_value_blob(argv[2]);
+        rc = sqlite3CodecAttach(db, db->nDb-1, zKey, nKey);
+        break;
+
+      case SQLITE_NULL:
+        /* No key specified.  Use the key from the main database */
+        sqlite3CodecGetKey(db, 0, (void**)&zKey, &nKey);
+        if( nKey>0 || sqlite3BtreeGetReserve(db->aDb[0].pBt)>0 ){
+          rc = sqlite3CodecAttach(db, db->nDb-1, zKey, nKey);
+        }
+        break;
+    }
+  }
+#endif
+
+  /* If the file was opened successfully, read the schema for the new database.
+  ** If this fails, or if opening the file failed, then close the file and 
+  ** remove the entry from the db->aDb[] array. i.e. put everything back the way
+  ** we found it.
+  */
+  if( rc==SQLITE_OK ){
+    sqlite3BtreeEnterAll(db);
+    rc = sqlite3Init(db, &zErrDyn);
+    sqlite3BtreeLeaveAll(db);
+  }
+#ifdef SQLITE_USER_AUTHENTICATION
+  if( rc==SQLITE_OK ){
+    u8 newAuth = 0;
+    rc = sqlite3UserAuthCheckLogin(db, zName, &newAuth);
+    if( newAuth<db->auth.authLevel ){
+      rc = SQLITE_AUTH_USER;
+    }
+  }
+#endif
+  if( rc ){
+    int iDb = db->nDb - 1;
+    assert( iDb>=2 );
+    if( db->aDb[iDb].pBt ){
+      sqlite3BtreeClose(db->aDb[iDb].pBt);
+      db->aDb[iDb].pBt = 0;
+      db->aDb[iDb].pSchema = 0;
+    }
+    sqlite3ResetAllSchemasOfConnection(db);
+    db->nDb = iDb;
+    if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){
+      db->mallocFailed = 1;
+      sqlite3DbFree(db, zErrDyn);
+      zErrDyn = sqlite3MPrintf(db, "out of memory");
+    }else if( zErrDyn==0 ){
+      zErrDyn = sqlite3MPrintf(db, "unable to open database: %s", zFile);
+    }
+    goto attach_error;
+  }
+  
+  return;
+
+attach_error:
+  /* Return an error if we get here */
+  if( zErrDyn ){
+    sqlite3_result_error(context, zErrDyn, -1);
+    sqlite3DbFree(db, zErrDyn);
+  }
+  if( rc ) sqlite3_result_error_code(context, rc);
+}
+
+/*
+** An SQL user-function registered to do the work of an DETACH statement. The
+** three arguments to the function come directly from a detach statement:
+**
+**     DETACH DATABASE x
+**
+**     SELECT sqlite_detach(x)
+*/
+static void detachFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **argv
+){
+  const char *zName = (const char *)sqlite3_value_text(argv[0]);
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  int i;
+  Db *pDb = 0;
+  char zErr[128];
+
+  UNUSED_PARAMETER(NotUsed);
+
+  if( zName==0 ) zName = "";
+  for(i=0; i<db->nDb; i++){
+    pDb = &db->aDb[i];
+    if( pDb->pBt==0 ) continue;
+    if( sqlite3StrICmp(pDb->zName, zName)==0 ) break;
+  }
+
+  if( i>=db->nDb ){
+    sqlite3_snprintf(sizeof(zErr),zErr, "no such database: %s", zName);
+    goto detach_error;
+  }
+  if( i<2 ){
+    sqlite3_snprintf(sizeof(zErr),zErr, "cannot detach database %s", zName);
+    goto detach_error;
+  }
+  if( !db->autoCommit ){
+    sqlite3_snprintf(sizeof(zErr), zErr,
+                     "cannot DETACH database within transaction");
+    goto detach_error;
+  }
+  if( sqlite3BtreeIsInReadTrans(pDb->pBt) || sqlite3BtreeIsInBackup(pDb->pBt) ){
+    sqlite3_snprintf(sizeof(zErr),zErr, "database %s is locked", zName);
+    goto detach_error;
+  }
+
+  sqlite3BtreeClose(pDb->pBt);
+  pDb->pBt = 0;
+  pDb->pSchema = 0;
+  sqlite3ResetAllSchemasOfConnection(db);
+  return;
+
+detach_error:
+  sqlite3_result_error(context, zErr, -1);
+}
+
+/*
+** This procedure generates VDBE code for a single invocation of either the
+** sqlite_detach() or sqlite_attach() SQL user functions.
+*/
+static void codeAttach(
+  Parse *pParse,       /* The parser context */
+  int type,            /* Either SQLITE_ATTACH or SQLITE_DETACH */
+  FuncDef const *pFunc,/* FuncDef wrapper for detachFunc() or attachFunc() */
+  Expr *pAuthArg,      /* Expression to pass to authorization callback */
+  Expr *pFilename,     /* Name of database file */
+  Expr *pDbname,       /* Name of the database to use internally */
+  Expr *pKey           /* Database key for encryption extension */
+){
+  int rc;
+  NameContext sName;
+  Vdbe *v;
+  sqlite3* db = pParse->db;
+  int regArgs;
+
+  memset(&sName, 0, sizeof(NameContext));
+  sName.pParse = pParse;
+
+  if( 
+      SQLITE_OK!=(rc = resolveAttachExpr(&sName, pFilename)) ||
+      SQLITE_OK!=(rc = resolveAttachExpr(&sName, pDbname)) ||
+      SQLITE_OK!=(rc = resolveAttachExpr(&sName, pKey))
+  ){
+    pParse->nErr++;
+    goto attach_end;
+  }
+
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  if( pAuthArg ){
+    char *zAuthArg;
+    if( pAuthArg->op==TK_STRING ){
+      zAuthArg = pAuthArg->u.zToken;
+    }else{
+      zAuthArg = 0;
+    }
+    rc = sqlite3AuthCheck(pParse, type, zAuthArg, 0, 0);
+    if(rc!=SQLITE_OK ){
+      goto attach_end;
+    }
+  }
+#endif /* SQLITE_OMIT_AUTHORIZATION */
+
+
+  v = sqlite3GetVdbe(pParse);
+  regArgs = sqlite3GetTempRange(pParse, 4);
+  sqlite3ExprCode(pParse, pFilename, regArgs);
+  sqlite3ExprCode(pParse, pDbname, regArgs+1);
+  sqlite3ExprCode(pParse, pKey, regArgs+2);
+
+  assert( v || db->mallocFailed );
+  if( v ){
+    sqlite3VdbeAddOp3(v, OP_Function, 0, regArgs+3-pFunc->nArg, regArgs+3);
+    assert( pFunc->nArg==-1 || (pFunc->nArg&0xff)==pFunc->nArg );
+    sqlite3VdbeChangeP5(v, (u8)(pFunc->nArg));
+    sqlite3VdbeChangeP4(v, -1, (char *)pFunc, P4_FUNCDEF);
+
+    /* Code an OP_Expire. For an ATTACH statement, set P1 to true (expire this
+    ** statement only). For DETACH, set it to false (expire all existing
+    ** statements).
+    */
+    sqlite3VdbeAddOp1(v, OP_Expire, (type==SQLITE_ATTACH));
+  }
+  
+attach_end:
+  sqlite3ExprDelete(db, pFilename);
+  sqlite3ExprDelete(db, pDbname);
+  sqlite3ExprDelete(db, pKey);
+}
+
+/*
+** Called by the parser to compile a DETACH statement.
+**
+**     DETACH pDbname
+*/
+SQLITE_PRIVATE void sqlite3Detach(Parse *pParse, Expr *pDbname){
+  static const FuncDef detach_func = {
+    1,                /* nArg */
+    SQLITE_UTF8,      /* funcFlags */
+    0,                /* pUserData */
+    0,                /* pNext */
+    detachFunc,       /* xFunc */
+    0,                /* xStep */
+    0,                /* xFinalize */
+    "sqlite_detach",  /* zName */
+    0,                /* pHash */
+    0                 /* pDestructor */
+  };
+  codeAttach(pParse, SQLITE_DETACH, &detach_func, pDbname, 0, 0, pDbname);
+}
+
+/*
+** Called by the parser to compile an ATTACH statement.
+**
+**     ATTACH p AS pDbname KEY pKey
+*/
+SQLITE_PRIVATE void sqlite3Attach(Parse *pParse, Expr *p, Expr *pDbname, Expr *pKey){
+  static const FuncDef attach_func = {
+    3,                /* nArg */
+    SQLITE_UTF8,      /* funcFlags */
+    0,                /* pUserData */
+    0,                /* pNext */
+    attachFunc,       /* xFunc */
+    0,                /* xStep */
+    0,                /* xFinalize */
+    "sqlite_attach",  /* zName */
+    0,                /* pHash */
+    0                 /* pDestructor */
+  };
+  codeAttach(pParse, SQLITE_ATTACH, &attach_func, p, p, pDbname, pKey);
+}
+#endif /* SQLITE_OMIT_ATTACH */
+
+/*
+** Initialize a DbFixer structure.  This routine must be called prior
+** to passing the structure to one of the sqliteFixAAAA() routines below.
+*/
+SQLITE_PRIVATE void sqlite3FixInit(
+  DbFixer *pFix,      /* The fixer to be initialized */
+  Parse *pParse,      /* Error messages will be written here */
+  int iDb,            /* This is the database that must be used */
+  const char *zType,  /* "view", "trigger", or "index" */
+  const Token *pName  /* Name of the view, trigger, or index */
+){
+  sqlite3 *db;
+
+  db = pParse->db;
+  assert( db->nDb>iDb );
+  pFix->pParse = pParse;
+  pFix->zDb = db->aDb[iDb].zName;
+  pFix->pSchema = db->aDb[iDb].pSchema;
+  pFix->zType = zType;
+  pFix->pName = pName;
+  pFix->bVarOnly = (iDb==1);
+}
+
+/*
+** The following set of routines walk through the parse tree and assign
+** a specific database to all table references where the database name
+** was left unspecified in the original SQL statement.  The pFix structure
+** must have been initialized by a prior call to sqlite3FixInit().
+**
+** These routines are used to make sure that an index, trigger, or
+** view in one database does not refer to objects in a different database.
+** (Exception: indices, triggers, and views in the TEMP database are
+** allowed to refer to anything.)  If a reference is explicitly made
+** to an object in a different database, an error message is added to
+** pParse->zErrMsg and these routines return non-zero.  If everything
+** checks out, these routines return 0.
+*/
+SQLITE_PRIVATE int sqlite3FixSrcList(
+  DbFixer *pFix,       /* Context of the fixation */
+  SrcList *pList       /* The Source list to check and modify */
+){
+  int i;
+  const char *zDb;
+  struct SrcList_item *pItem;
+
+  if( NEVER(pList==0) ) return 0;
+  zDb = pFix->zDb;
+  for(i=0, pItem=pList->a; i<pList->nSrc; i++, pItem++){
+    if( pFix->bVarOnly==0 ){
+      if( pItem->zDatabase && sqlite3StrICmp(pItem->zDatabase, zDb) ){
+        sqlite3ErrorMsg(pFix->pParse,
+            "%s %T cannot reference objects in database %s",
+            pFix->zType, pFix->pName, pItem->zDatabase);
+        return 1;
+      }
+      sqlite3DbFree(pFix->pParse->db, pItem->zDatabase);
+      pItem->zDatabase = 0;
+      pItem->pSchema = pFix->pSchema;
+    }
+#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_TRIGGER)
+    if( sqlite3FixSelect(pFix, pItem->pSelect) ) return 1;
+    if( sqlite3FixExpr(pFix, pItem->pOn) ) return 1;
+#endif
+  }
+  return 0;
+}
+#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_TRIGGER)
+SQLITE_PRIVATE int sqlite3FixSelect(
+  DbFixer *pFix,       /* Context of the fixation */
+  Select *pSelect      /* The SELECT statement to be fixed to one database */
+){
+  while( pSelect ){
+    if( sqlite3FixExprList(pFix, pSelect->pEList) ){
+      return 1;
+    }
+    if( sqlite3FixSrcList(pFix, pSelect->pSrc) ){
+      return 1;
+    }
+    if( sqlite3FixExpr(pFix, pSelect->pWhere) ){
+      return 1;
+    }
+    if( sqlite3FixExprList(pFix, pSelect->pGroupBy) ){
+      return 1;
+    }
+    if( sqlite3FixExpr(pFix, pSelect->pHaving) ){
+      return 1;
+    }
+    if( sqlite3FixExprList(pFix, pSelect->pOrderBy) ){
+      return 1;
+    }
+    if( sqlite3FixExpr(pFix, pSelect->pLimit) ){
+      return 1;
+    }
+    if( sqlite3FixExpr(pFix, pSelect->pOffset) ){
+      return 1;
+    }
+    pSelect = pSelect->pPrior;
+  }
+  return 0;
+}
+SQLITE_PRIVATE int sqlite3FixExpr(
+  DbFixer *pFix,     /* Context of the fixation */
+  Expr *pExpr        /* The expression to be fixed to one database */
+){
+  while( pExpr ){
+    if( pExpr->op==TK_VARIABLE ){
+      if( pFix->pParse->db->init.busy ){
+        pExpr->op = TK_NULL;
+      }else{
+        sqlite3ErrorMsg(pFix->pParse, "%s cannot use variables", pFix->zType);
+        return 1;
+      }
+    }
+    if( ExprHasProperty(pExpr, EP_TokenOnly) ) break;
+    if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+      if( sqlite3FixSelect(pFix, pExpr->x.pSelect) ) return 1;
+    }else{
+      if( sqlite3FixExprList(pFix, pExpr->x.pList) ) return 1;
+    }
+    if( sqlite3FixExpr(pFix, pExpr->pRight) ){
+      return 1;
+    }
+    pExpr = pExpr->pLeft;
+  }
+  return 0;
+}
+SQLITE_PRIVATE int sqlite3FixExprList(
+  DbFixer *pFix,     /* Context of the fixation */
+  ExprList *pList    /* The expression to be fixed to one database */
+){
+  int i;
+  struct ExprList_item *pItem;
+  if( pList==0 ) return 0;
+  for(i=0, pItem=pList->a; i<pList->nExpr; i++, pItem++){
+    if( sqlite3FixExpr(pFix, pItem->pExpr) ){
+      return 1;
+    }
+  }
+  return 0;
+}
+#endif
+
+#ifndef SQLITE_OMIT_TRIGGER
+SQLITE_PRIVATE int sqlite3FixTriggerStep(
+  DbFixer *pFix,     /* Context of the fixation */
+  TriggerStep *pStep /* The trigger step be fixed to one database */
+){
+  while( pStep ){
+    if( sqlite3FixSelect(pFix, pStep->pSelect) ){
+      return 1;
+    }
+    if( sqlite3FixExpr(pFix, pStep->pWhere) ){
+      return 1;
+    }
+    if( sqlite3FixExprList(pFix, pStep->pExprList) ){
+      return 1;
+    }
+    pStep = pStep->pNext;
+  }
+  return 0;
+}
+#endif
+
+/************** End of attach.c **********************************************/
+/************** Begin file auth.c ********************************************/
+/*
+** 2003 January 11
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code used to implement the sqlite3_set_authorizer()
+** API.  This facility is an optional feature of the library.  Embedded
+** systems that do not need this facility may omit it by recompiling
+** the library with -DSQLITE_OMIT_AUTHORIZATION=1
+*/
+
+/*
+** All of the code in this file may be omitted by defining a single
+** macro.
+*/
+#ifndef SQLITE_OMIT_AUTHORIZATION
+
+/*
+** Set or clear the access authorization function.
+**
+** The access authorization function is be called during the compilation
+** phase to verify that the user has read and/or write access permission on
+** various fields of the database.  The first argument to the auth function
+** is a copy of the 3rd argument to this routine.  The second argument
+** to the auth function is one of these constants:
+**
+**       SQLITE_CREATE_INDEX
+**       SQLITE_CREATE_TABLE
+**       SQLITE_CREATE_TEMP_INDEX
+**       SQLITE_CREATE_TEMP_TABLE
+**       SQLITE_CREATE_TEMP_TRIGGER
+**       SQLITE_CREATE_TEMP_VIEW
+**       SQLITE_CREATE_TRIGGER
+**       SQLITE_CREATE_VIEW
+**       SQLITE_DELETE
+**       SQLITE_DROP_INDEX
+**       SQLITE_DROP_TABLE
+**       SQLITE_DROP_TEMP_INDEX
+**       SQLITE_DROP_TEMP_TABLE
+**       SQLITE_DROP_TEMP_TRIGGER
+**       SQLITE_DROP_TEMP_VIEW
+**       SQLITE_DROP_TRIGGER
+**       SQLITE_DROP_VIEW
+**       SQLITE_INSERT
+**       SQLITE_PRAGMA
+**       SQLITE_READ
+**       SQLITE_SELECT
+**       SQLITE_TRANSACTION
+**       SQLITE_UPDATE
+**
+** The third and fourth arguments to the auth function are the name of
+** the table and the column that are being accessed.  The auth function
+** should return either SQLITE_OK, SQLITE_DENY, or SQLITE_IGNORE.  If
+** SQLITE_OK is returned, it means that access is allowed.  SQLITE_DENY
+** means that the SQL statement will never-run - the sqlite3_exec() call
+** will return with an error.  SQLITE_IGNORE means that the SQL statement
+** should run but attempts to read the specified column will return NULL
+** and attempts to write the column will be ignored.
+**
+** Setting the auth function to NULL disables this hook.  The default
+** setting of the auth function is NULL.
+*/
+SQLITE_API int sqlite3_set_authorizer(
+  sqlite3 *db,
+  int (*xAuth)(void*,int,const char*,const char*,const char*,const char*),
+  void *pArg
+){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  db->xAuth = (sqlite3_xauth)xAuth;
+  db->pAuthArg = pArg;
+  sqlite3ExpirePreparedStatements(db);
+  sqlite3_mutex_leave(db->mutex);
+  return SQLITE_OK;
+}
+
+/*
+** Write an error message into pParse->zErrMsg that explains that the
+** user-supplied authorization function returned an illegal value.
+*/
+static void sqliteAuthBadReturnCode(Parse *pParse){
+  sqlite3ErrorMsg(pParse, "authorizer malfunction");
+  pParse->rc = SQLITE_ERROR;
+}
+
+/*
+** Invoke the authorization callback for permission to read column zCol from
+** table zTab in database zDb. This function assumes that an authorization
+** callback has been registered (i.e. that sqlite3.xAuth is not NULL).
+**
+** If SQLITE_IGNORE is returned and pExpr is not NULL, then pExpr is changed
+** to an SQL NULL expression. Otherwise, if pExpr is NULL, then SQLITE_IGNORE
+** is treated as SQLITE_DENY. In this case an error is left in pParse.
+*/
+SQLITE_PRIVATE int sqlite3AuthReadCol(
+  Parse *pParse,                  /* The parser context */
+  const char *zTab,               /* Table name */
+  const char *zCol,               /* Column name */
+  int iDb                         /* Index of containing database. */
+){
+  sqlite3 *db = pParse->db;       /* Database handle */
+  char *zDb = db->aDb[iDb].zName; /* Name of attached database */
+  int rc;                         /* Auth callback return code */
+
+  rc = db->xAuth(db->pAuthArg, SQLITE_READ, zTab,zCol,zDb,pParse->zAuthContext
+#ifdef SQLITE_USER_AUTHENTICATION
+                 ,db->auth.zAuthUser
+#endif
+                );
+  if( rc==SQLITE_DENY ){
+    if( db->nDb>2 || iDb!=0 ){
+      sqlite3ErrorMsg(pParse, "access to %s.%s.%s is prohibited",zDb,zTab,zCol);
+    }else{
+      sqlite3ErrorMsg(pParse, "access to %s.%s is prohibited", zTab, zCol);
+    }
+    pParse->rc = SQLITE_AUTH;
+  }else if( rc!=SQLITE_IGNORE && rc!=SQLITE_OK ){
+    sqliteAuthBadReturnCode(pParse);
+  }
+  return rc;
+}
+
+/*
+** The pExpr should be a TK_COLUMN expression.  The table referred to
+** is in pTabList or else it is the NEW or OLD table of a trigger.  
+** Check to see if it is OK to read this particular column.
+**
+** If the auth function returns SQLITE_IGNORE, change the TK_COLUMN 
+** instruction into a TK_NULL.  If the auth function returns SQLITE_DENY,
+** then generate an error.
+*/
+SQLITE_PRIVATE void sqlite3AuthRead(
+  Parse *pParse,        /* The parser context */
+  Expr *pExpr,          /* The expression to check authorization on */
+  Schema *pSchema,      /* The schema of the expression */
+  SrcList *pTabList     /* All table that pExpr might refer to */
+){
+  sqlite3 *db = pParse->db;
+  Table *pTab = 0;      /* The table being read */
+  const char *zCol;     /* Name of the column of the table */
+  int iSrc;             /* Index in pTabList->a[] of table being read */
+  int iDb;              /* The index of the database the expression refers to */
+  int iCol;             /* Index of column in table */
+
+  if( db->xAuth==0 ) return;
+  iDb = sqlite3SchemaToIndex(pParse->db, pSchema);
+  if( iDb<0 ){
+    /* An attempt to read a column out of a subquery or other
+    ** temporary table. */
+    return;
+  }
+
+  assert( pExpr->op==TK_COLUMN || pExpr->op==TK_TRIGGER );
+  if( pExpr->op==TK_TRIGGER ){
+    pTab = pParse->pTriggerTab;
+  }else{
+    assert( pTabList );
+    for(iSrc=0; ALWAYS(iSrc<pTabList->nSrc); iSrc++){
+      if( pExpr->iTable==pTabList->a[iSrc].iCursor ){
+        pTab = pTabList->a[iSrc].pTab;
+        break;
+      }
+    }
+  }
+  iCol = pExpr->iColumn;
+  if( NEVER(pTab==0) ) return;
+
+  if( iCol>=0 ){
+    assert( iCol<pTab->nCol );
+    zCol = pTab->aCol[iCol].zName;
+  }else if( pTab->iPKey>=0 ){
+    assert( pTab->iPKey<pTab->nCol );
+    zCol = pTab->aCol[pTab->iPKey].zName;
+  }else{
+    zCol = "ROWID";
+  }
+  assert( iDb>=0 && iDb<db->nDb );
+  if( SQLITE_IGNORE==sqlite3AuthReadCol(pParse, pTab->zName, zCol, iDb) ){
+    pExpr->op = TK_NULL;
+  }
+}
+
+/*
+** Do an authorization check using the code and arguments given.  Return
+** either SQLITE_OK (zero) or SQLITE_IGNORE or SQLITE_DENY.  If SQLITE_DENY
+** is returned, then the error count and error message in pParse are
+** modified appropriately.
+*/
+SQLITE_PRIVATE int sqlite3AuthCheck(
+  Parse *pParse,
+  int code,
+  const char *zArg1,
+  const char *zArg2,
+  const char *zArg3
+){
+  sqlite3 *db = pParse->db;
+  int rc;
+
+  /* Don't do any authorization checks if the database is initialising
+  ** or if the parser is being invoked from within sqlite3_declare_vtab.
+  */
+  if( db->init.busy || IN_DECLARE_VTAB ){
+    return SQLITE_OK;
+  }
+
+  if( db->xAuth==0 ){
+    return SQLITE_OK;
+  }
+  rc = db->xAuth(db->pAuthArg, code, zArg1, zArg2, zArg3, pParse->zAuthContext
+#ifdef SQLITE_USER_AUTHENTICATION
+                 ,db->auth.zAuthUser
+#endif
+                );
+  if( rc==SQLITE_DENY ){
+    sqlite3ErrorMsg(pParse, "not authorized");
+    pParse->rc = SQLITE_AUTH;
+  }else if( rc!=SQLITE_OK && rc!=SQLITE_IGNORE ){
+    rc = SQLITE_DENY;
+    sqliteAuthBadReturnCode(pParse);
+  }
+  return rc;
+}
+
+/*
+** Push an authorization context.  After this routine is called, the
+** zArg3 argument to authorization callbacks will be zContext until
+** popped.  Or if pParse==0, this routine is a no-op.
+*/
+SQLITE_PRIVATE void sqlite3AuthContextPush(
+  Parse *pParse,
+  AuthContext *pContext, 
+  const char *zContext
+){
+  assert( pParse );
+  pContext->pParse = pParse;
+  pContext->zAuthContext = pParse->zAuthContext;
+  pParse->zAuthContext = zContext;
+}
+
+/*
+** Pop an authorization context that was previously pushed
+** by sqlite3AuthContextPush
+*/
+SQLITE_PRIVATE void sqlite3AuthContextPop(AuthContext *pContext){
+  if( pContext->pParse ){
+    pContext->pParse->zAuthContext = pContext->zAuthContext;
+    pContext->pParse = 0;
+  }
+}
+
+#endif /* SQLITE_OMIT_AUTHORIZATION */
+
+/************** End of auth.c ************************************************/
+/************** Begin file build.c *******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains C code routines that are called by the SQLite parser
+** when syntax rules are reduced.  The routines in this file handle the
+** following kinds of SQL syntax:
+**
+**     CREATE TABLE
+**     DROP TABLE
+**     CREATE INDEX
+**     DROP INDEX
+**     creating ID lists
+**     BEGIN TRANSACTION
+**     COMMIT
+**     ROLLBACK
+*/
+
+/*
+** This routine is called when a new SQL statement is beginning to
+** be parsed.  Initialize the pParse structure as needed.
+*/
+SQLITE_PRIVATE void sqlite3BeginParse(Parse *pParse, int explainFlag){
+  pParse->explain = (u8)explainFlag;
+  pParse->nVar = 0;
+}
+
+#ifndef SQLITE_OMIT_SHARED_CACHE
+/*
+** The TableLock structure is only used by the sqlite3TableLock() and
+** codeTableLocks() functions.
+*/
+struct TableLock {
+  int iDb;             /* The database containing the table to be locked */
+  int iTab;            /* The root page of the table to be locked */
+  u8 isWriteLock;      /* True for write lock.  False for a read lock */
+  const char *zName;   /* Name of the table */
+};
+
+/*
+** Record the fact that we want to lock a table at run-time.  
+**
+** The table to be locked has root page iTab and is found in database iDb.
+** A read or a write lock can be taken depending on isWritelock.
+**
+** This routine just records the fact that the lock is desired.  The
+** code to make the lock occur is generated by a later call to
+** codeTableLocks() which occurs during sqlite3FinishCoding().
+*/
+SQLITE_PRIVATE void sqlite3TableLock(
+  Parse *pParse,     /* Parsing context */
+  int iDb,           /* Index of the database containing the table to lock */
+  int iTab,          /* Root page number of the table to be locked */
+  u8 isWriteLock,    /* True for a write lock */
+  const char *zName  /* Name of the table to be locked */
+){
+  Parse *pToplevel = sqlite3ParseToplevel(pParse);
+  int i;
+  int nBytes;
+  TableLock *p;
+  assert( iDb>=0 );
+
+  for(i=0; i<pToplevel->nTableLock; i++){
+    p = &pToplevel->aTableLock[i];
+    if( p->iDb==iDb && p->iTab==iTab ){
+      p->isWriteLock = (p->isWriteLock || isWriteLock);
+      return;
+    }
+  }
+
+  nBytes = sizeof(TableLock) * (pToplevel->nTableLock+1);
+  pToplevel->aTableLock =
+      sqlite3DbReallocOrFree(pToplevel->db, pToplevel->aTableLock, nBytes);
+  if( pToplevel->aTableLock ){
+    p = &pToplevel->aTableLock[pToplevel->nTableLock++];
+    p->iDb = iDb;
+    p->iTab = iTab;
+    p->isWriteLock = isWriteLock;
+    p->zName = zName;
+  }else{
+    pToplevel->nTableLock = 0;
+    pToplevel->db->mallocFailed = 1;
+  }
+}
+
+/*
+** Code an OP_TableLock instruction for each table locked by the
+** statement (configured by calls to sqlite3TableLock()).
+*/
+static void codeTableLocks(Parse *pParse){
+  int i;
+  Vdbe *pVdbe; 
+
+  pVdbe = sqlite3GetVdbe(pParse);
+  assert( pVdbe!=0 ); /* sqlite3GetVdbe cannot fail: VDBE already allocated */
+
+  for(i=0; i<pParse->nTableLock; i++){
+    TableLock *p = &pParse->aTableLock[i];
+    int p1 = p->iDb;
+    sqlite3VdbeAddOp4(pVdbe, OP_TableLock, p1, p->iTab, p->isWriteLock,
+                      p->zName, P4_STATIC);
+  }
+}
+#else
+  #define codeTableLocks(x)
+#endif
+
+/*
+** Return TRUE if the given yDbMask object is empty - if it contains no
+** 1 bits.  This routine is used by the DbMaskAllZero() and DbMaskNotZero()
+** macros when SQLITE_MAX_ATTACHED is greater than 30.
+*/
+#if SQLITE_MAX_ATTACHED>30
+SQLITE_PRIVATE int sqlite3DbMaskAllZero(yDbMask m){
+  int i;
+  for(i=0; i<sizeof(yDbMask); i++) if( m[i] ) return 0;
+  return 1;
+}
+#endif
+
+/*
+** This routine is called after a single SQL statement has been
+** parsed and a VDBE program to execute that statement has been
+** prepared.  This routine puts the finishing touches on the
+** VDBE program and resets the pParse structure for the next
+** parse.
+**
+** Note that if an error occurred, it might be the case that
+** no VDBE code was generated.
+*/
+SQLITE_PRIVATE void sqlite3FinishCoding(Parse *pParse){
+  sqlite3 *db;
+  Vdbe *v;
+
+  assert( pParse->pToplevel==0 );
+  db = pParse->db;
+  if( db->mallocFailed ) return;
+  if( pParse->nested ) return;
+  if( pParse->nErr ) return;
+
+  /* Begin by generating some termination code at the end of the
+  ** vdbe program
+  */
+  v = sqlite3GetVdbe(pParse);
+  assert( !pParse->isMultiWrite 
+       || sqlite3VdbeAssertMayAbort(v, pParse->mayAbort));
+  if( v ){
+    while( sqlite3VdbeDeletePriorOpcode(v, OP_Close) ){}
+    sqlite3VdbeAddOp0(v, OP_Halt);
+
+#if SQLITE_USER_AUTHENTICATION
+    if( pParse->nTableLock>0 && db->init.busy==0 ){
+      sqlite3UserAuthInit(db);
+      if( db->auth.authLevel<UAUTH_User ){
+        pParse->rc = SQLITE_AUTH_USER;
+        sqlite3ErrorMsg(pParse, "user not authenticated");
+        return;
+      }
+    }
+#endif
+
+    /* The cookie mask contains one bit for each database file open.
+    ** (Bit 0 is for main, bit 1 is for temp, and so forth.)  Bits are
+    ** set for each database that is used.  Generate code to start a
+    ** transaction on each used database and to verify the schema cookie
+    ** on each used database.
+    */
+    if( db->mallocFailed==0 
+     && (DbMaskNonZero(pParse->cookieMask) || pParse->pConstExpr)
+    ){
+      int iDb, i;
+      assert( sqlite3VdbeGetOp(v, 0)->opcode==OP_Init );
+      sqlite3VdbeJumpHere(v, 0);
+      for(iDb=0; iDb<db->nDb; iDb++){
+        if( DbMaskTest(pParse->cookieMask, iDb)==0 ) continue;
+        sqlite3VdbeUsesBtree(v, iDb);
+        sqlite3VdbeAddOp4Int(v,
+          OP_Transaction,                    /* Opcode */
+          iDb,                               /* P1 */
+          DbMaskTest(pParse->writeMask,iDb), /* P2 */
+          pParse->cookieValue[iDb],          /* P3 */
+          db->aDb[iDb].pSchema->iGeneration  /* P4 */
+        );
+        if( db->init.busy==0 ) sqlite3VdbeChangeP5(v, 1);
+      }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+      for(i=0; i<pParse->nVtabLock; i++){
+        char *vtab = (char *)sqlite3GetVTable(db, pParse->apVtabLock[i]);
+        sqlite3VdbeAddOp4(v, OP_VBegin, 0, 0, 0, vtab, P4_VTAB);
+      }
+      pParse->nVtabLock = 0;
+#endif
+
+      /* Once all the cookies have been verified and transactions opened, 
+      ** obtain the required table-locks. This is a no-op unless the 
+      ** shared-cache feature is enabled.
+      */
+      codeTableLocks(pParse);
+
+      /* Initialize any AUTOINCREMENT data structures required.
+      */
+      sqlite3AutoincrementBegin(pParse);
+
+      /* Code constant expressions that where factored out of inner loops */
+      if( pParse->pConstExpr ){
+        ExprList *pEL = pParse->pConstExpr;
+        pParse->okConstFactor = 0;
+        for(i=0; i<pEL->nExpr; i++){
+          sqlite3ExprCode(pParse, pEL->a[i].pExpr, pEL->a[i].u.iConstExprReg);
+        }
+      }
+
+      /* Finally, jump back to the beginning of the executable code. */
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, 1);
+    }
+  }
+
+
+  /* Get the VDBE program ready for execution
+  */
+  if( v && ALWAYS(pParse->nErr==0) && !db->mallocFailed ){
+    assert( pParse->iCacheLevel==0 );  /* Disables and re-enables match */
+    /* A minimum of one cursor is required if autoincrement is used
+    *  See ticket [a696379c1f08866] */
+    if( pParse->pAinc!=0 && pParse->nTab==0 ) pParse->nTab = 1;
+    sqlite3VdbeMakeReady(v, pParse);
+    pParse->rc = SQLITE_DONE;
+    pParse->colNamesSet = 0;
+  }else{
+    pParse->rc = SQLITE_ERROR;
+  }
+  pParse->nTab = 0;
+  pParse->nMem = 0;
+  pParse->nSet = 0;
+  pParse->nVar = 0;
+  DbMaskZero(pParse->cookieMask);
+}
+
+/*
+** Run the parser and code generator recursively in order to generate
+** code for the SQL statement given onto the end of the pParse context
+** currently under construction.  When the parser is run recursively
+** this way, the final OP_Halt is not appended and other initialization
+** and finalization steps are omitted because those are handling by the
+** outermost parser.
+**
+** Not everything is nestable.  This facility is designed to permit
+** INSERT, UPDATE, and DELETE operations against SQLITE_MASTER.  Use
+** care if you decide to try to use this routine for some other purposes.
+*/
+SQLITE_PRIVATE void sqlite3NestedParse(Parse *pParse, const char *zFormat, ...){
+  va_list ap;
+  char *zSql;
+  char *zErrMsg = 0;
+  sqlite3 *db = pParse->db;
+# define SAVE_SZ  (sizeof(Parse) - offsetof(Parse,nVar))
+  char saveBuf[SAVE_SZ];
+
+  if( pParse->nErr ) return;
+  assert( pParse->nested<10 );  /* Nesting should only be of limited depth */
+  va_start(ap, zFormat);
+  zSql = sqlite3VMPrintf(db, zFormat, ap);
+  va_end(ap);
+  if( zSql==0 ){
+    return;   /* A malloc must have failed */
+  }
+  pParse->nested++;
+  memcpy(saveBuf, &pParse->nVar, SAVE_SZ);
+  memset(&pParse->nVar, 0, SAVE_SZ);
+  sqlite3RunParser(pParse, zSql, &zErrMsg);
+  sqlite3DbFree(db, zErrMsg);
+  sqlite3DbFree(db, zSql);
+  memcpy(&pParse->nVar, saveBuf, SAVE_SZ);
+  pParse->nested--;
+}
+
+#if SQLITE_USER_AUTHENTICATION
+/*
+** Return TRUE if zTable is the name of the system table that stores the
+** list of users and their access credentials.
+*/
+SQLITE_PRIVATE int sqlite3UserAuthTable(const char *zTable){
+  return sqlite3_stricmp(zTable, "sqlite_user")==0;
+}
+#endif
+
+/*
+** Locate the in-memory structure that describes a particular database
+** table given the name of that table and (optionally) the name of the
+** database containing the table.  Return NULL if not found.
+**
+** If zDatabase is 0, all databases are searched for the table and the
+** first matching table is returned.  (No checking for duplicate table
+** names is done.)  The search order is TEMP first, then MAIN, then any
+** auxiliary databases added using the ATTACH command.
+**
+** See also sqlite3LocateTable().
+*/
+SQLITE_PRIVATE Table *sqlite3FindTable(sqlite3 *db, const char *zName, const char *zDatabase){
+  Table *p = 0;
+  int i;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || zName==0 ) return 0;
+#endif
+
+  /* All mutexes are required for schema access.  Make sure we hold them. */
+  assert( zDatabase!=0 || sqlite3BtreeHoldsAllMutexes(db) );
+#if SQLITE_USER_AUTHENTICATION
+  /* Only the admin user is allowed to know that the sqlite_user table
+  ** exists */
+  if( db->auth.authLevel<UAUTH_Admin && sqlite3UserAuthTable(zName)!=0 ){
+    return 0;
+  }
+#endif
+  for(i=OMIT_TEMPDB; i<db->nDb; i++){
+    int j = (i<2) ? i^1 : i;   /* Search TEMP before MAIN */
+    if( zDatabase!=0 && sqlite3StrICmp(zDatabase, db->aDb[j].zName) ) continue;
+    assert( sqlite3SchemaMutexHeld(db, j, 0) );
+    p = sqlite3HashFind(&db->aDb[j].pSchema->tblHash, zName);
+    if( p ) break;
+  }
+  return p;
+}
+
+/*
+** Locate the in-memory structure that describes a particular database
+** table given the name of that table and (optionally) the name of the
+** database containing the table.  Return NULL if not found.  Also leave an
+** error message in pParse->zErrMsg.
+**
+** The difference between this routine and sqlite3FindTable() is that this
+** routine leaves an error message in pParse->zErrMsg where
+** sqlite3FindTable() does not.
+*/
+SQLITE_PRIVATE Table *sqlite3LocateTable(
+  Parse *pParse,         /* context in which to report errors */
+  int isView,            /* True if looking for a VIEW rather than a TABLE */
+  const char *zName,     /* Name of the table we are looking for */
+  const char *zDbase     /* Name of the database.  Might be NULL */
+){
+  Table *p;
+
+  /* Read the database schema. If an error occurs, leave an error message
+  ** and code in pParse and return NULL. */
+  if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){
+    return 0;
+  }
+
+  p = sqlite3FindTable(pParse->db, zName, zDbase);
+  if( p==0 ){
+    const char *zMsg = isView ? "no such view" : "no such table";
+    if( zDbase ){
+      sqlite3ErrorMsg(pParse, "%s: %s.%s", zMsg, zDbase, zName);
+    }else{
+      sqlite3ErrorMsg(pParse, "%s: %s", zMsg, zName);
+    }
+    pParse->checkSchema = 1;
+  }
+#if SQLITE_USER_AUTHENICATION
+  else if( pParse->db->auth.authLevel<UAUTH_User ){
+    sqlite3ErrorMsg(pParse, "user not authenticated");
+    p = 0;
+  }
+#endif
+  return p;
+}
+
+/*
+** Locate the table identified by *p.
+**
+** This is a wrapper around sqlite3LocateTable(). The difference between
+** sqlite3LocateTable() and this function is that this function restricts
+** the search to schema (p->pSchema) if it is not NULL. p->pSchema may be
+** non-NULL if it is part of a view or trigger program definition. See
+** sqlite3FixSrcList() for details.
+*/
+SQLITE_PRIVATE Table *sqlite3LocateTableItem(
+  Parse *pParse, 
+  int isView, 
+  struct SrcList_item *p
+){
+  const char *zDb;
+  assert( p->pSchema==0 || p->zDatabase==0 );
+  if( p->pSchema ){
+    int iDb = sqlite3SchemaToIndex(pParse->db, p->pSchema);
+    zDb = pParse->db->aDb[iDb].zName;
+  }else{
+    zDb = p->zDatabase;
+  }
+  return sqlite3LocateTable(pParse, isView, p->zName, zDb);
+}
+
+/*
+** Locate the in-memory structure that describes 
+** a particular index given the name of that index
+** and the name of the database that contains the index.
+** Return NULL if not found.
+**
+** If zDatabase is 0, all databases are searched for the
+** table and the first matching index is returned.  (No checking
+** for duplicate index names is done.)  The search order is
+** TEMP first, then MAIN, then any auxiliary databases added
+** using the ATTACH command.
+*/
+SQLITE_PRIVATE Index *sqlite3FindIndex(sqlite3 *db, const char *zName, const char *zDb){
+  Index *p = 0;
+  int i;
+  /* All mutexes are required for schema access.  Make sure we hold them. */
+  assert( zDb!=0 || sqlite3BtreeHoldsAllMutexes(db) );
+  for(i=OMIT_TEMPDB; i<db->nDb; i++){
+    int j = (i<2) ? i^1 : i;  /* Search TEMP before MAIN */
+    Schema *pSchema = db->aDb[j].pSchema;
+    assert( pSchema );
+    if( zDb && sqlite3StrICmp(zDb, db->aDb[j].zName) ) continue;
+    assert( sqlite3SchemaMutexHeld(db, j, 0) );
+    p = sqlite3HashFind(&pSchema->idxHash, zName);
+    if( p ) break;
+  }
+  return p;
+}
+
+/*
+** Reclaim the memory used by an index
+*/
+static void freeIndex(sqlite3 *db, Index *p){
+#ifndef SQLITE_OMIT_ANALYZE
+  sqlite3DeleteIndexSamples(db, p);
+#endif
+  sqlite3ExprDelete(db, p->pPartIdxWhere);
+  sqlite3DbFree(db, p->zColAff);
+  if( p->isResized ) sqlite3DbFree(db, p->azColl);
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  sqlite3_free(p->aiRowEst);
+#endif
+  sqlite3DbFree(db, p);
+}
+
+/*
+** For the index called zIdxName which is found in the database iDb,
+** unlike that index from its Table then remove the index from
+** the index hash table and free all memory structures associated
+** with the index.
+*/
+SQLITE_PRIVATE void sqlite3UnlinkAndDeleteIndex(sqlite3 *db, int iDb, const char *zIdxName){
+  Index *pIndex;
+  Hash *pHash;
+
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  pHash = &db->aDb[iDb].pSchema->idxHash;
+  pIndex = sqlite3HashInsert(pHash, zIdxName, 0);
+  if( ALWAYS(pIndex) ){
+    if( pIndex->pTable->pIndex==pIndex ){
+      pIndex->pTable->pIndex = pIndex->pNext;
+    }else{
+      Index *p;
+      /* Justification of ALWAYS();  The index must be on the list of
+      ** indices. */
+      p = pIndex->pTable->pIndex;
+      while( ALWAYS(p) && p->pNext!=pIndex ){ p = p->pNext; }
+      if( ALWAYS(p && p->pNext==pIndex) ){
+        p->pNext = pIndex->pNext;
+      }
+    }
+    freeIndex(db, pIndex);
+  }
+  db->flags |= SQLITE_InternChanges;
+}
+
+/*
+** Look through the list of open database files in db->aDb[] and if
+** any have been closed, remove them from the list.  Reallocate the
+** db->aDb[] structure to a smaller size, if possible.
+**
+** Entry 0 (the "main" database) and entry 1 (the "temp" database)
+** are never candidates for being collapsed.
+*/
+SQLITE_PRIVATE void sqlite3CollapseDatabaseArray(sqlite3 *db){
+  int i, j;
+  for(i=j=2; i<db->nDb; i++){
+    struct Db *pDb = &db->aDb[i];
+    if( pDb->pBt==0 ){
+      sqlite3DbFree(db, pDb->zName);
+      pDb->zName = 0;
+      continue;
+    }
+    if( j<i ){
+      db->aDb[j] = db->aDb[i];
+    }
+    j++;
+  }
+  memset(&db->aDb[j], 0, (db->nDb-j)*sizeof(db->aDb[j]));
+  db->nDb = j;
+  if( db->nDb<=2 && db->aDb!=db->aDbStatic ){
+    memcpy(db->aDbStatic, db->aDb, 2*sizeof(db->aDb[0]));
+    sqlite3DbFree(db, db->aDb);
+    db->aDb = db->aDbStatic;
+  }
+}
+
+/*
+** Reset the schema for the database at index iDb.  Also reset the
+** TEMP schema.
+*/
+SQLITE_PRIVATE void sqlite3ResetOneSchema(sqlite3 *db, int iDb){
+  Db *pDb;
+  assert( iDb<db->nDb );
+
+  /* Case 1:  Reset the single schema identified by iDb */
+  pDb = &db->aDb[iDb];
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  assert( pDb->pSchema!=0 );
+  sqlite3SchemaClear(pDb->pSchema);
+
+  /* If any database other than TEMP is reset, then also reset TEMP
+  ** since TEMP might be holding triggers that reference tables in the
+  ** other database.
+  */
+  if( iDb!=1 ){
+    pDb = &db->aDb[1];
+    assert( pDb->pSchema!=0 );
+    sqlite3SchemaClear(pDb->pSchema);
+  }
+  return;
+}
+
+/*
+** Erase all schema information from all attached databases (including
+** "main" and "temp") for a single database connection.
+*/
+SQLITE_PRIVATE void sqlite3ResetAllSchemasOfConnection(sqlite3 *db){
+  int i;
+  sqlite3BtreeEnterAll(db);
+  for(i=0; i<db->nDb; i++){
+    Db *pDb = &db->aDb[i];
+    if( pDb->pSchema ){
+      sqlite3SchemaClear(pDb->pSchema);
+    }
+  }
+  db->flags &= ~SQLITE_InternChanges;
+  sqlite3VtabUnlockList(db);
+  sqlite3BtreeLeaveAll(db);
+  sqlite3CollapseDatabaseArray(db);
+}
+
+/*
+** This routine is called when a commit occurs.
+*/
+SQLITE_PRIVATE void sqlite3CommitInternalChanges(sqlite3 *db){
+  db->flags &= ~SQLITE_InternChanges;
+}
+
+/*
+** Delete memory allocated for the column names of a table or view (the
+** Table.aCol[] array).
+*/
+static void sqliteDeleteColumnNames(sqlite3 *db, Table *pTable){
+  int i;
+  Column *pCol;
+  assert( pTable!=0 );
+  if( (pCol = pTable->aCol)!=0 ){
+    for(i=0; i<pTable->nCol; i++, pCol++){
+      sqlite3DbFree(db, pCol->zName);
+      sqlite3ExprDelete(db, pCol->pDflt);
+      sqlite3DbFree(db, pCol->zDflt);
+      sqlite3DbFree(db, pCol->zType);
+      sqlite3DbFree(db, pCol->zColl);
+    }
+    sqlite3DbFree(db, pTable->aCol);
+  }
+}
+
+/*
+** Remove the memory data structures associated with the given
+** Table.  No changes are made to disk by this routine.
+**
+** This routine just deletes the data structure.  It does not unlink
+** the table data structure from the hash table.  But it does destroy
+** memory structures of the indices and foreign keys associated with 
+** the table.
+**
+** The db parameter is optional.  It is needed if the Table object 
+** contains lookaside memory.  (Table objects in the schema do not use
+** lookaside memory, but some ephemeral Table objects do.)  Or the
+** db parameter can be used with db->pnBytesFreed to measure the memory
+** used by the Table object.
+*/
+SQLITE_PRIVATE void sqlite3DeleteTable(sqlite3 *db, Table *pTable){
+  Index *pIndex, *pNext;
+  TESTONLY( int nLookaside; ) /* Used to verify lookaside not used for schema */
+
+  assert( !pTable || pTable->nRef>0 );
+
+  /* Do not delete the table until the reference count reaches zero. */
+  if( !pTable ) return;
+  if( ((!db || db->pnBytesFreed==0) && (--pTable->nRef)>0) ) return;
+
+  /* Record the number of outstanding lookaside allocations in schema Tables
+  ** prior to doing any free() operations.  Since schema Tables do not use
+  ** lookaside, this number should not change. */
+  TESTONLY( nLookaside = (db && (pTable->tabFlags & TF_Ephemeral)==0) ?
+                         db->lookaside.nOut : 0 );
+
+  /* Delete all indices associated with this table. */
+  for(pIndex = pTable->pIndex; pIndex; pIndex=pNext){
+    pNext = pIndex->pNext;
+    assert( pIndex->pSchema==pTable->pSchema );
+    if( !db || db->pnBytesFreed==0 ){
+      char *zName = pIndex->zName; 
+      TESTONLY ( Index *pOld = ) sqlite3HashInsert(
+         &pIndex->pSchema->idxHash, zName, 0
+      );
+      assert( db==0 || sqlite3SchemaMutexHeld(db, 0, pIndex->pSchema) );
+      assert( pOld==pIndex || pOld==0 );
+    }
+    freeIndex(db, pIndex);
+  }
+
+  /* Delete any foreign keys attached to this table. */
+  sqlite3FkDelete(db, pTable);
+
+  /* Delete the Table structure itself.
+  */
+  sqliteDeleteColumnNames(db, pTable);
+  sqlite3DbFree(db, pTable->zName);
+  sqlite3DbFree(db, pTable->zColAff);
+  sqlite3SelectDelete(db, pTable->pSelect);
+#ifndef SQLITE_OMIT_CHECK
+  sqlite3ExprListDelete(db, pTable->pCheck);
+#endif
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  sqlite3VtabClear(db, pTable);
+#endif
+  sqlite3DbFree(db, pTable);
+
+  /* Verify that no lookaside memory was used by schema tables */
+  assert( nLookaside==0 || nLookaside==db->lookaside.nOut );
+}
+
+/*
+** Unlink the given table from the hash tables and the delete the
+** table structure with all its indices and foreign keys.
+*/
+SQLITE_PRIVATE void sqlite3UnlinkAndDeleteTable(sqlite3 *db, int iDb, const char *zTabName){
+  Table *p;
+  Db *pDb;
+
+  assert( db!=0 );
+  assert( iDb>=0 && iDb<db->nDb );
+  assert( zTabName );
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  testcase( zTabName[0]==0 );  /* Zero-length table names are allowed */
+  pDb = &db->aDb[iDb];
+  p = sqlite3HashInsert(&pDb->pSchema->tblHash, zTabName, 0);
+  sqlite3DeleteTable(db, p);
+  db->flags |= SQLITE_InternChanges;
+}
+
+/*
+** Given a token, return a string that consists of the text of that
+** token.  Space to hold the returned string
+** is obtained from sqliteMalloc() and must be freed by the calling
+** function.
+**
+** Any quotation marks (ex:  "name", 'name', [name], or `name`) that
+** surround the body of the token are removed.
+**
+** Tokens are often just pointers into the original SQL text and so
+** are not \000 terminated and are not persistent.  The returned string
+** is \000 terminated and is persistent.
+*/
+SQLITE_PRIVATE char *sqlite3NameFromToken(sqlite3 *db, Token *pName){
+  char *zName;
+  if( pName ){
+    zName = sqlite3DbStrNDup(db, (char*)pName->z, pName->n);
+    sqlite3Dequote(zName);
+  }else{
+    zName = 0;
+  }
+  return zName;
+}
+
+/*
+** Open the sqlite_master table stored in database number iDb for
+** writing. The table is opened using cursor 0.
+*/
+SQLITE_PRIVATE void sqlite3OpenMasterTable(Parse *p, int iDb){
+  Vdbe *v = sqlite3GetVdbe(p);
+  sqlite3TableLock(p, iDb, MASTER_ROOT, 1, SCHEMA_TABLE(iDb));
+  sqlite3VdbeAddOp4Int(v, OP_OpenWrite, 0, MASTER_ROOT, iDb, 5);
+  if( p->nTab==0 ){
+    p->nTab = 1;
+  }
+}
+
+/*
+** Parameter zName points to a nul-terminated buffer containing the name
+** of a database ("main", "temp" or the name of an attached db). This
+** function returns the index of the named database in db->aDb[], or
+** -1 if the named db cannot be found.
+*/
+SQLITE_PRIVATE int sqlite3FindDbName(sqlite3 *db, const char *zName){
+  int i = -1;         /* Database number */
+  if( zName ){
+    Db *pDb;
+    int n = sqlite3Strlen30(zName);
+    for(i=(db->nDb-1), pDb=&db->aDb[i]; i>=0; i--, pDb--){
+      if( (!OMIT_TEMPDB || i!=1 ) && n==sqlite3Strlen30(pDb->zName) && 
+          0==sqlite3StrICmp(pDb->zName, zName) ){
+        break;
+      }
+    }
+  }
+  return i;
+}
+
+/*
+** The token *pName contains the name of a database (either "main" or
+** "temp" or the name of an attached db). This routine returns the
+** index of the named database in db->aDb[], or -1 if the named db 
+** does not exist.
+*/
+SQLITE_PRIVATE int sqlite3FindDb(sqlite3 *db, Token *pName){
+  int i;                               /* Database number */
+  char *zName;                         /* Name we are searching for */
+  zName = sqlite3NameFromToken(db, pName);
+  i = sqlite3FindDbName(db, zName);
+  sqlite3DbFree(db, zName);
+  return i;
+}
+
+/* The table or view or trigger name is passed to this routine via tokens
+** pName1 and pName2. If the table name was fully qualified, for example:
+**
+** CREATE TABLE xxx.yyy (...);
+** 
+** Then pName1 is set to "xxx" and pName2 "yyy". On the other hand if
+** the table name is not fully qualified, i.e.:
+**
+** CREATE TABLE yyy(...);
+**
+** Then pName1 is set to "yyy" and pName2 is "".
+**
+** This routine sets the *ppUnqual pointer to point at the token (pName1 or
+** pName2) that stores the unqualified table name.  The index of the
+** database "xxx" is returned.
+*/
+SQLITE_PRIVATE int sqlite3TwoPartName(
+  Parse *pParse,      /* Parsing and code generating context */
+  Token *pName1,      /* The "xxx" in the name "xxx.yyy" or "xxx" */
+  Token *pName2,      /* The "yyy" in the name "xxx.yyy" */
+  Token **pUnqual     /* Write the unqualified object name here */
+){
+  int iDb;                    /* Database holding the object */
+  sqlite3 *db = pParse->db;
+
+  if( ALWAYS(pName2!=0) && pName2->n>0 ){
+    if( db->init.busy ) {
+      sqlite3ErrorMsg(pParse, "corrupt database");
+      pParse->nErr++;
+      return -1;
+    }
+    *pUnqual = pName2;
+    iDb = sqlite3FindDb(db, pName1);
+    if( iDb<0 ){
+      sqlite3ErrorMsg(pParse, "unknown database %T", pName1);
+      pParse->nErr++;
+      return -1;
+    }
+  }else{
+    assert( db->init.iDb==0 || db->init.busy );
+    iDb = db->init.iDb;
+    *pUnqual = pName1;
+  }
+  return iDb;
+}
+
+/*
+** This routine is used to check if the UTF-8 string zName is a legal
+** unqualified name for a new schema object (table, index, view or
+** trigger). All names are legal except those that begin with the string
+** "sqlite_" (in upper, lower or mixed case). This portion of the namespace
+** is reserved for internal use.
+*/
+SQLITE_PRIVATE int sqlite3CheckObjectName(Parse *pParse, const char *zName){
+  if( !pParse->db->init.busy && pParse->nested==0 
+          && (pParse->db->flags & SQLITE_WriteSchema)==0
+          && 0==sqlite3StrNICmp(zName, "sqlite_", 7) ){
+    sqlite3ErrorMsg(pParse, "object name reserved for internal use: %s", zName);
+    return SQLITE_ERROR;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Return the PRIMARY KEY index of a table
+*/
+SQLITE_PRIVATE Index *sqlite3PrimaryKeyIndex(Table *pTab){
+  Index *p;
+  for(p=pTab->pIndex; p && !IsPrimaryKeyIndex(p); p=p->pNext){}
+  return p;
+}
+
+/*
+** Return the column of index pIdx that corresponds to table
+** column iCol.  Return -1 if not found.
+*/
+SQLITE_PRIVATE i16 sqlite3ColumnOfIndex(Index *pIdx, i16 iCol){
+  int i;
+  for(i=0; i<pIdx->nColumn; i++){
+    if( iCol==pIdx->aiColumn[i] ) return i;
+  }
+  return -1;
+}
+
+/*
+** Begin constructing a new table representation in memory.  This is
+** the first of several action routines that get called in response
+** to a CREATE TABLE statement.  In particular, this routine is called
+** after seeing tokens "CREATE" and "TABLE" and the table name. The isTemp
+** flag is true if the table should be stored in the auxiliary database
+** file instead of in the main database file.  This is normally the case
+** when the "TEMP" or "TEMPORARY" keyword occurs in between
+** CREATE and TABLE.
+**
+** The new table record is initialized and put in pParse->pNewTable.
+** As more of the CREATE TABLE statement is parsed, additional action
+** routines will be called to add more information to this record.
+** At the end of the CREATE TABLE statement, the sqlite3EndTable() routine
+** is called to complete the construction of the new table record.
+*/
+SQLITE_PRIVATE void sqlite3StartTable(
+  Parse *pParse,   /* Parser context */
+  Token *pName1,   /* First part of the name of the table or view */
+  Token *pName2,   /* Second part of the name of the table or view */
+  int isTemp,      /* True if this is a TEMP table */
+  int isView,      /* True if this is a VIEW */
+  int isVirtual,   /* True if this is a VIRTUAL table */
+  int noErr        /* Do nothing if table already exists */
+){
+  Table *pTable;
+  char *zName = 0; /* The name of the new table */
+  sqlite3 *db = pParse->db;
+  Vdbe *v;
+  int iDb;         /* Database number to create the table in */
+  Token *pName;    /* Unqualified name of the table to create */
+
+  /* The table or view name to create is passed to this routine via tokens
+  ** pName1 and pName2. If the table name was fully qualified, for example:
+  **
+  ** CREATE TABLE xxx.yyy (...);
+  ** 
+  ** Then pName1 is set to "xxx" and pName2 "yyy". On the other hand if
+  ** the table name is not fully qualified, i.e.:
+  **
+  ** CREATE TABLE yyy(...);
+  **
+  ** Then pName1 is set to "yyy" and pName2 is "".
+  **
+  ** The call below sets the pName pointer to point at the token (pName1 or
+  ** pName2) that stores the unqualified table name. The variable iDb is
+  ** set to the index of the database that the table or view is to be
+  ** created in.
+  */
+  iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pName);
+  if( iDb<0 ) return;
+  if( !OMIT_TEMPDB && isTemp && pName2->n>0 && iDb!=1 ){
+    /* If creating a temp table, the name may not be qualified. Unless 
+    ** the database name is "temp" anyway.  */
+    sqlite3ErrorMsg(pParse, "temporary table name must be unqualified");
+    return;
+  }
+  if( !OMIT_TEMPDB && isTemp ) iDb = 1;
+
+  pParse->sNameToken = *pName;
+  zName = sqlite3NameFromToken(db, pName);
+  if( zName==0 ) return;
+  if( SQLITE_OK!=sqlite3CheckObjectName(pParse, zName) ){
+    goto begin_table_error;
+  }
+  if( db->init.iDb==1 ) isTemp = 1;
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  assert( (isTemp & 1)==isTemp );
+  {
+    int code;
+    char *zDb = db->aDb[iDb].zName;
+    if( sqlite3AuthCheck(pParse, SQLITE_INSERT, SCHEMA_TABLE(isTemp), 0, zDb) ){
+      goto begin_table_error;
+    }
+    if( isView ){
+      if( !OMIT_TEMPDB && isTemp ){
+        code = SQLITE_CREATE_TEMP_VIEW;
+      }else{
+        code = SQLITE_CREATE_VIEW;
+      }
+    }else{
+      if( !OMIT_TEMPDB && isTemp ){
+        code = SQLITE_CREATE_TEMP_TABLE;
+      }else{
+        code = SQLITE_CREATE_TABLE;
+      }
+    }
+    if( !isVirtual && sqlite3AuthCheck(pParse, code, zName, 0, zDb) ){
+      goto begin_table_error;
+    }
+  }
+#endif
+
+  /* Make sure the new table name does not collide with an existing
+  ** index or table name in the same database.  Issue an error message if
+  ** it does. The exception is if the statement being parsed was passed
+  ** to an sqlite3_declare_vtab() call. In that case only the column names
+  ** and types will be used, so there is no need to test for namespace
+  ** collisions.
+  */
+  if( !IN_DECLARE_VTAB ){
+    char *zDb = db->aDb[iDb].zName;
+    if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){
+      goto begin_table_error;
+    }
+    pTable = sqlite3FindTable(db, zName, zDb);
+    if( pTable ){
+      if( !noErr ){
+        sqlite3ErrorMsg(pParse, "table %T already exists", pName);
+      }else{
+        assert( !db->init.busy );
+        sqlite3CodeVerifySchema(pParse, iDb);
+      }
+      goto begin_table_error;
+    }
+    if( sqlite3FindIndex(db, zName, zDb)!=0 ){
+      sqlite3ErrorMsg(pParse, "there is already an index named %s", zName);
+      goto begin_table_error;
+    }
+  }
+
+  pTable = sqlite3DbMallocZero(db, sizeof(Table));
+  if( pTable==0 ){
+    db->mallocFailed = 1;
+    pParse->rc = SQLITE_NOMEM;
+    pParse->nErr++;
+    goto begin_table_error;
+  }
+  pTable->zName = zName;
+  pTable->iPKey = -1;
+  pTable->pSchema = db->aDb[iDb].pSchema;
+  pTable->nRef = 1;
+  pTable->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
+  assert( pParse->pNewTable==0 );
+  pParse->pNewTable = pTable;
+
+  /* If this is the magic sqlite_sequence table used by autoincrement,
+  ** then record a pointer to this table in the main database structure
+  ** so that INSERT can find the table easily.
+  */
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+  if( !pParse->nested && strcmp(zName, "sqlite_sequence")==0 ){
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    pTable->pSchema->pSeqTab = pTable;
+  }
+#endif
+
+  /* Begin generating the code that will insert the table record into
+  ** the SQLITE_MASTER table.  Note in particular that we must go ahead
+  ** and allocate the record number for the table entry now.  Before any
+  ** PRIMARY KEY or UNIQUE keywords are parsed.  Those keywords will cause
+  ** indices to be created and the table record must come before the 
+  ** indices.  Hence, the record number for the table must be allocated
+  ** now.
+  */
+  if( !db->init.busy && (v = sqlite3GetVdbe(pParse))!=0 ){
+    int j1;
+    int fileFormat;
+    int reg1, reg2, reg3;
+    sqlite3BeginWriteOperation(pParse, 0, iDb);
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    if( isVirtual ){
+      sqlite3VdbeAddOp0(v, OP_VBegin);
+    }
+#endif
+
+    /* If the file format and encoding in the database have not been set, 
+    ** set them now.
+    */
+    reg1 = pParse->regRowid = ++pParse->nMem;
+    reg2 = pParse->regRoot = ++pParse->nMem;
+    reg3 = ++pParse->nMem;
+    sqlite3VdbeAddOp3(v, OP_ReadCookie, iDb, reg3, BTREE_FILE_FORMAT);
+    sqlite3VdbeUsesBtree(v, iDb);
+    j1 = sqlite3VdbeAddOp1(v, OP_If, reg3); VdbeCoverage(v);
+    fileFormat = (db->flags & SQLITE_LegacyFileFmt)!=0 ?
+                  1 : SQLITE_MAX_FILE_FORMAT;
+    sqlite3VdbeAddOp2(v, OP_Integer, fileFormat, reg3);
+    sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_FILE_FORMAT, reg3);
+    sqlite3VdbeAddOp2(v, OP_Integer, ENC(db), reg3);
+    sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_TEXT_ENCODING, reg3);
+    sqlite3VdbeJumpHere(v, j1);
+
+    /* This just creates a place-holder record in the sqlite_master table.
+    ** The record created does not contain anything yet.  It will be replaced
+    ** by the real entry in code generated at sqlite3EndTable().
+    **
+    ** The rowid for the new entry is left in register pParse->regRowid.
+    ** The root page number of the new table is left in reg pParse->regRoot.
+    ** The rowid and root page number values are needed by the code that
+    ** sqlite3EndTable will generate.
+    */
+#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE)
+    if( isView || isVirtual ){
+      sqlite3VdbeAddOp2(v, OP_Integer, 0, reg2);
+    }else
+#endif
+    {
+      pParse->addrCrTab = sqlite3VdbeAddOp2(v, OP_CreateTable, iDb, reg2);
+    }
+    sqlite3OpenMasterTable(pParse, iDb);
+    sqlite3VdbeAddOp2(v, OP_NewRowid, 0, reg1);
+    sqlite3VdbeAddOp2(v, OP_Null, 0, reg3);
+    sqlite3VdbeAddOp3(v, OP_Insert, 0, reg3, reg1);
+    sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+    sqlite3VdbeAddOp0(v, OP_Close);
+  }
+
+  /* Normal (non-error) return. */
+  return;
+
+  /* If an error occurs, we jump here */
+begin_table_error:
+  sqlite3DbFree(db, zName);
+  return;
+}
+
+/*
+** This macro is used to compare two strings in a case-insensitive manner.
+** It is slightly faster than calling sqlite3StrICmp() directly, but
+** produces larger code.
+**
+** WARNING: This macro is not compatible with the strcmp() family. It
+** returns true if the two strings are equal, otherwise false.
+*/
+#define STRICMP(x, y) (\
+sqlite3UpperToLower[*(unsigned char *)(x)]==   \
+sqlite3UpperToLower[*(unsigned char *)(y)]     \
+&& sqlite3StrICmp((x)+1,(y)+1)==0 )
+
+/*
+** Add a new column to the table currently being constructed.
+**
+** The parser calls this routine once for each column declaration
+** in a CREATE TABLE statement.  sqlite3StartTable() gets called
+** first to get things going.  Then this routine is called for each
+** column.
+*/
+SQLITE_PRIVATE void sqlite3AddColumn(Parse *pParse, Token *pName){
+  Table *p;
+  int i;
+  char *z;
+  Column *pCol;
+  sqlite3 *db = pParse->db;
+  if( (p = pParse->pNewTable)==0 ) return;
+#if SQLITE_MAX_COLUMN
+  if( p->nCol+1>db->aLimit[SQLITE_LIMIT_COLUMN] ){
+    sqlite3ErrorMsg(pParse, "too many columns on %s", p->zName);
+    return;
+  }
+#endif
+  z = sqlite3NameFromToken(db, pName);
+  if( z==0 ) return;
+  for(i=0; i<p->nCol; i++){
+    if( STRICMP(z, p->aCol[i].zName) ){
+      sqlite3ErrorMsg(pParse, "duplicate column name: %s", z);
+      sqlite3DbFree(db, z);
+      return;
+    }
+  }
+  if( (p->nCol & 0x7)==0 ){
+    Column *aNew;
+    aNew = sqlite3DbRealloc(db,p->aCol,(p->nCol+8)*sizeof(p->aCol[0]));
+    if( aNew==0 ){
+      sqlite3DbFree(db, z);
+      return;
+    }
+    p->aCol = aNew;
+  }
+  pCol = &p->aCol[p->nCol];
+  memset(pCol, 0, sizeof(p->aCol[0]));
+  pCol->zName = z;
+ 
+  /* If there is no type specified, columns have the default affinity
+  ** 'NONE'. If there is a type specified, then sqlite3AddColumnType() will
+  ** be called next to set pCol->affinity correctly.
+  */
+  pCol->affinity = SQLITE_AFF_NONE;
+  pCol->szEst = 1;
+  p->nCol++;
+}
+
+/*
+** This routine is called by the parser while in the middle of
+** parsing a CREATE TABLE statement.  A "NOT NULL" constraint has
+** been seen on a column.  This routine sets the notNull flag on
+** the column currently under construction.
+*/
+SQLITE_PRIVATE void sqlite3AddNotNull(Parse *pParse, int onError){
+  Table *p;
+  p = pParse->pNewTable;
+  if( p==0 || NEVER(p->nCol<1) ) return;
+  p->aCol[p->nCol-1].notNull = (u8)onError;
+}
+
+/*
+** Scan the column type name zType (length nType) and return the
+** associated affinity type.
+**
+** This routine does a case-independent search of zType for the 
+** substrings in the following table. If one of the substrings is
+** found, the corresponding affinity is returned. If zType contains
+** more than one of the substrings, entries toward the top of 
+** the table take priority. For example, if zType is 'BLOBINT', 
+** SQLITE_AFF_INTEGER is returned.
+**
+** Substring     | Affinity
+** --------------------------------
+** 'INT'         | SQLITE_AFF_INTEGER
+** 'CHAR'        | SQLITE_AFF_TEXT
+** 'CLOB'        | SQLITE_AFF_TEXT
+** 'TEXT'        | SQLITE_AFF_TEXT
+** 'BLOB'        | SQLITE_AFF_NONE
+** 'REAL'        | SQLITE_AFF_REAL
+** 'FLOA'        | SQLITE_AFF_REAL
+** 'DOUB'        | SQLITE_AFF_REAL
+**
+** If none of the substrings in the above table are found,
+** SQLITE_AFF_NUMERIC is returned.
+*/
+SQLITE_PRIVATE char sqlite3AffinityType(const char *zIn, u8 *pszEst){
+  u32 h = 0;
+  char aff = SQLITE_AFF_NUMERIC;
+  const char *zChar = 0;
+
+  if( zIn==0 ) return aff;
+  while( zIn[0] ){
+    h = (h<<8) + sqlite3UpperToLower[(*zIn)&0xff];
+    zIn++;
+    if( h==(('c'<<24)+('h'<<16)+('a'<<8)+'r') ){             /* CHAR */
+      aff = SQLITE_AFF_TEXT;
+      zChar = zIn;
+    }else if( h==(('c'<<24)+('l'<<16)+('o'<<8)+'b') ){       /* CLOB */
+      aff = SQLITE_AFF_TEXT;
+    }else if( h==(('t'<<24)+('e'<<16)+('x'<<8)+'t') ){       /* TEXT */
+      aff = SQLITE_AFF_TEXT;
+    }else if( h==(('b'<<24)+('l'<<16)+('o'<<8)+'b')          /* BLOB */
+        && (aff==SQLITE_AFF_NUMERIC || aff==SQLITE_AFF_REAL) ){
+      aff = SQLITE_AFF_NONE;
+      if( zIn[0]=='(' ) zChar = zIn;
+#ifndef SQLITE_OMIT_FLOATING_POINT
+    }else if( h==(('r'<<24)+('e'<<16)+('a'<<8)+'l')          /* REAL */
+        && aff==SQLITE_AFF_NUMERIC ){
+      aff = SQLITE_AFF_REAL;
+    }else if( h==(('f'<<24)+('l'<<16)+('o'<<8)+'a')          /* FLOA */
+        && aff==SQLITE_AFF_NUMERIC ){
+      aff = SQLITE_AFF_REAL;
+    }else if( h==(('d'<<24)+('o'<<16)+('u'<<8)+'b')          /* DOUB */
+        && aff==SQLITE_AFF_NUMERIC ){
+      aff = SQLITE_AFF_REAL;
+#endif
+    }else if( (h&0x00FFFFFF)==(('i'<<16)+('n'<<8)+'t') ){    /* INT */
+      aff = SQLITE_AFF_INTEGER;
+      break;
+    }
+  }
+
+  /* If pszEst is not NULL, store an estimate of the field size.  The
+  ** estimate is scaled so that the size of an integer is 1.  */
+  if( pszEst ){
+    *pszEst = 1;   /* default size is approx 4 bytes */
+    if( aff<SQLITE_AFF_NUMERIC ){
+      if( zChar ){
+        while( zChar[0] ){
+          if( sqlite3Isdigit(zChar[0]) ){
+            int v = 0;
+            sqlite3GetInt32(zChar, &v);
+            v = v/4 + 1;
+            if( v>255 ) v = 255;
+            *pszEst = v; /* BLOB(k), VARCHAR(k), CHAR(k) -> r=(k/4+1) */
+            break;
+          }
+          zChar++;
+        }
+      }else{
+        *pszEst = 5;   /* BLOB, TEXT, CLOB -> r=5  (approx 20 bytes)*/
+      }
+    }
+  }
+  return aff;
+}
+
+/*
+** This routine is called by the parser while in the middle of
+** parsing a CREATE TABLE statement.  The pFirst token is the first
+** token in the sequence of tokens that describe the type of the
+** column currently under construction.   pLast is the last token
+** in the sequence.  Use this information to construct a string
+** that contains the typename of the column and store that string
+** in zType.
+*/ 
+SQLITE_PRIVATE void sqlite3AddColumnType(Parse *pParse, Token *pType){
+  Table *p;
+  Column *pCol;
+
+  p = pParse->pNewTable;
+  if( p==0 || NEVER(p->nCol<1) ) return;
+  pCol = &p->aCol[p->nCol-1];
+  assert( pCol->zType==0 );
+  pCol->zType = sqlite3NameFromToken(pParse->db, pType);
+  pCol->affinity = sqlite3AffinityType(pCol->zType, &pCol->szEst);
+}
+
+/*
+** The expression is the default value for the most recently added column
+** of the table currently under construction.
+**
+** Default value expressions must be constant.  Raise an exception if this
+** is not the case.
+**
+** This routine is called by the parser while in the middle of
+** parsing a CREATE TABLE statement.
+*/
+SQLITE_PRIVATE void sqlite3AddDefaultValue(Parse *pParse, ExprSpan *pSpan){
+  Table *p;
+  Column *pCol;
+  sqlite3 *db = pParse->db;
+  p = pParse->pNewTable;
+  if( p!=0 ){
+    pCol = &(p->aCol[p->nCol-1]);
+    if( !sqlite3ExprIsConstantOrFunction(pSpan->pExpr, db->init.busy) ){
+      sqlite3ErrorMsg(pParse, "default value of column [%s] is not constant",
+          pCol->zName);
+    }else{
+      /* A copy of pExpr is used instead of the original, as pExpr contains
+      ** tokens that point to volatile memory. The 'span' of the expression
+      ** is required by pragma table_info.
+      */
+      sqlite3ExprDelete(db, pCol->pDflt);
+      pCol->pDflt = sqlite3ExprDup(db, pSpan->pExpr, EXPRDUP_REDUCE);
+      sqlite3DbFree(db, pCol->zDflt);
+      pCol->zDflt = sqlite3DbStrNDup(db, (char*)pSpan->zStart,
+                                     (int)(pSpan->zEnd - pSpan->zStart));
+    }
+  }
+  sqlite3ExprDelete(db, pSpan->pExpr);
+}
+
+/*
+** Designate the PRIMARY KEY for the table.  pList is a list of names 
+** of columns that form the primary key.  If pList is NULL, then the
+** most recently added column of the table is the primary key.
+**
+** A table can have at most one primary key.  If the table already has
+** a primary key (and this is the second primary key) then create an
+** error.
+**
+** If the PRIMARY KEY is on a single column whose datatype is INTEGER,
+** then we will try to use that column as the rowid.  Set the Table.iPKey
+** field of the table under construction to be the index of the
+** INTEGER PRIMARY KEY column.  Table.iPKey is set to -1 if there is
+** no INTEGER PRIMARY KEY.
+**
+** If the key is not an INTEGER PRIMARY KEY, then create a unique
+** index for the key.  No index is created for INTEGER PRIMARY KEYs.
+*/
+SQLITE_PRIVATE void sqlite3AddPrimaryKey(
+  Parse *pParse,    /* Parsing context */
+  ExprList *pList,  /* List of field names to be indexed */
+  int onError,      /* What to do with a uniqueness conflict */
+  int autoInc,      /* True if the AUTOINCREMENT keyword is present */
+  int sortOrder     /* SQLITE_SO_ASC or SQLITE_SO_DESC */
+){
+  Table *pTab = pParse->pNewTable;
+  char *zType = 0;
+  int iCol = -1, i;
+  int nTerm;
+  if( pTab==0 || IN_DECLARE_VTAB ) goto primary_key_exit;
+  if( pTab->tabFlags & TF_HasPrimaryKey ){
+    sqlite3ErrorMsg(pParse, 
+      "table \"%s\" has more than one primary key", pTab->zName);
+    goto primary_key_exit;
+  }
+  pTab->tabFlags |= TF_HasPrimaryKey;
+  if( pList==0 ){
+    iCol = pTab->nCol - 1;
+    pTab->aCol[iCol].colFlags |= COLFLAG_PRIMKEY;
+    zType = pTab->aCol[iCol].zType;
+    nTerm = 1;
+  }else{
+    nTerm = pList->nExpr;
+    for(i=0; i<nTerm; i++){
+      for(iCol=0; iCol<pTab->nCol; iCol++){
+        if( sqlite3StrICmp(pList->a[i].zName, pTab->aCol[iCol].zName)==0 ){
+          pTab->aCol[iCol].colFlags |= COLFLAG_PRIMKEY;
+          zType = pTab->aCol[iCol].zType;
+          break;
+        }
+      }
+    }
+  }
+  if( nTerm==1
+   && zType && sqlite3StrICmp(zType, "INTEGER")==0
+   && sortOrder==SQLITE_SO_ASC
+  ){
+    pTab->iPKey = iCol;
+    pTab->keyConf = (u8)onError;
+    assert( autoInc==0 || autoInc==1 );
+    pTab->tabFlags |= autoInc*TF_Autoincrement;
+    if( pList ) pParse->iPkSortOrder = pList->a[0].sortOrder;
+  }else if( autoInc ){
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+    sqlite3ErrorMsg(pParse, "AUTOINCREMENT is only allowed on an "
+       "INTEGER PRIMARY KEY");
+#endif
+  }else{
+    Vdbe *v = pParse->pVdbe;
+    Index *p;
+    if( v ) pParse->addrSkipPK = sqlite3VdbeAddOp0(v, OP_Noop);
+    p = sqlite3CreateIndex(pParse, 0, 0, 0, pList, onError, 0,
+                           0, sortOrder, 0);
+    if( p ){
+      p->idxType = SQLITE_IDXTYPE_PRIMARYKEY;
+      if( v ) sqlite3VdbeJumpHere(v, pParse->addrSkipPK);
+    }
+    pList = 0;
+  }
+
+primary_key_exit:
+  sqlite3ExprListDelete(pParse->db, pList);
+  return;
+}
+
+/*
+** Add a new CHECK constraint to the table currently under construction.
+*/
+SQLITE_PRIVATE void sqlite3AddCheckConstraint(
+  Parse *pParse,    /* Parsing context */
+  Expr *pCheckExpr  /* The check expression */
+){
+#ifndef SQLITE_OMIT_CHECK
+  Table *pTab = pParse->pNewTable;
+  sqlite3 *db = pParse->db;
+  if( pTab && !IN_DECLARE_VTAB
+   && !sqlite3BtreeIsReadonly(db->aDb[db->init.iDb].pBt)
+  ){
+    pTab->pCheck = sqlite3ExprListAppend(pParse, pTab->pCheck, pCheckExpr);
+    if( pParse->constraintName.n ){
+      sqlite3ExprListSetName(pParse, pTab->pCheck, &pParse->constraintName, 1);
+    }
+  }else
+#endif
+  {
+    sqlite3ExprDelete(pParse->db, pCheckExpr);
+  }
+}
+
+/*
+** Set the collation function of the most recently parsed table column
+** to the CollSeq given.
+*/
+SQLITE_PRIVATE void sqlite3AddCollateType(Parse *pParse, Token *pToken){
+  Table *p;
+  int i;
+  char *zColl;              /* Dequoted name of collation sequence */
+  sqlite3 *db;
+
+  if( (p = pParse->pNewTable)==0 ) return;
+  i = p->nCol-1;
+  db = pParse->db;
+  zColl = sqlite3NameFromToken(db, pToken);
+  if( !zColl ) return;
+
+  if( sqlite3LocateCollSeq(pParse, zColl) ){
+    Index *pIdx;
+    sqlite3DbFree(db, p->aCol[i].zColl);
+    p->aCol[i].zColl = zColl;
+  
+    /* If the column is declared as "<name> PRIMARY KEY COLLATE <type>",
+    ** then an index may have been created on this column before the
+    ** collation type was added. Correct this if it is the case.
+    */
+    for(pIdx=p->pIndex; pIdx; pIdx=pIdx->pNext){
+      assert( pIdx->nKeyCol==1 );
+      if( pIdx->aiColumn[0]==i ){
+        pIdx->azColl[0] = p->aCol[i].zColl;
+      }
+    }
+  }else{
+    sqlite3DbFree(db, zColl);
+  }
+}
+
+/*
+** This function returns the collation sequence for database native text
+** encoding identified by the string zName, length nName.
+**
+** If the requested collation sequence is not available, or not available
+** in the database native encoding, the collation factory is invoked to
+** request it. If the collation factory does not supply such a sequence,
+** and the sequence is available in another text encoding, then that is
+** returned instead.
+**
+** If no versions of the requested collations sequence are available, or
+** another error occurs, NULL is returned and an error message written into
+** pParse.
+**
+** This routine is a wrapper around sqlite3FindCollSeq().  This routine
+** invokes the collation factory if the named collation cannot be found
+** and generates an error message.
+**
+** See also: sqlite3FindCollSeq(), sqlite3GetCollSeq()
+*/
+SQLITE_PRIVATE CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName){
+  sqlite3 *db = pParse->db;
+  u8 enc = ENC(db);
+  u8 initbusy = db->init.busy;
+  CollSeq *pColl;
+
+  pColl = sqlite3FindCollSeq(db, enc, zName, initbusy);
+  if( !initbusy && (!pColl || !pColl->xCmp) ){
+    pColl = sqlite3GetCollSeq(pParse, enc, pColl, zName);
+  }
+
+  return pColl;
+}
+
+
+/*
+** Generate code that will increment the schema cookie.
+**
+** The schema cookie is used to determine when the schema for the
+** database changes.  After each schema change, the cookie value
+** changes.  When a process first reads the schema it records the
+** cookie.  Thereafter, whenever it goes to access the database,
+** it checks the cookie to make sure the schema has not changed
+** since it was last read.
+**
+** This plan is not completely bullet-proof.  It is possible for
+** the schema to change multiple times and for the cookie to be
+** set back to prior value.  But schema changes are infrequent
+** and the probability of hitting the same cookie value is only
+** 1 chance in 2^32.  So we're safe enough.
+*/
+SQLITE_PRIVATE void sqlite3ChangeCookie(Parse *pParse, int iDb){
+  int r1 = sqlite3GetTempReg(pParse);
+  sqlite3 *db = pParse->db;
+  Vdbe *v = pParse->pVdbe;
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  sqlite3VdbeAddOp2(v, OP_Integer, db->aDb[iDb].pSchema->schema_cookie+1, r1);
+  sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_SCHEMA_VERSION, r1);
+  sqlite3ReleaseTempReg(pParse, r1);
+}
+
+/*
+** Measure the number of characters needed to output the given
+** identifier.  The number returned includes any quotes used
+** but does not include the null terminator.
+**
+** The estimate is conservative.  It might be larger that what is
+** really needed.
+*/
+static int identLength(const char *z){
+  int n;
+  for(n=0; *z; n++, z++){
+    if( *z=='"' ){ n++; }
+  }
+  return n + 2;
+}
+
+/*
+** The first parameter is a pointer to an output buffer. The second 
+** parameter is a pointer to an integer that contains the offset at
+** which to write into the output buffer. This function copies the
+** nul-terminated string pointed to by the third parameter, zSignedIdent,
+** to the specified offset in the buffer and updates *pIdx to refer
+** to the first byte after the last byte written before returning.
+** 
+** If the string zSignedIdent consists entirely of alpha-numeric
+** characters, does not begin with a digit and is not an SQL keyword,
+** then it is copied to the output buffer exactly as it is. Otherwise,
+** it is quoted using double-quotes.
+*/
+static void identPut(char *z, int *pIdx, char *zSignedIdent){
+  unsigned char *zIdent = (unsigned char*)zSignedIdent;
+  int i, j, needQuote;
+  i = *pIdx;
+
+  for(j=0; zIdent[j]; j++){
+    if( !sqlite3Isalnum(zIdent[j]) && zIdent[j]!='_' ) break;
+  }
+  needQuote = sqlite3Isdigit(zIdent[0])
+            || sqlite3KeywordCode(zIdent, j)!=TK_ID
+            || zIdent[j]!=0
+            || j==0;
+
+  if( needQuote ) z[i++] = '"';
+  for(j=0; zIdent[j]; j++){
+    z[i++] = zIdent[j];
+    if( zIdent[j]=='"' ) z[i++] = '"';
+  }
+  if( needQuote ) z[i++] = '"';
+  z[i] = 0;
+  *pIdx = i;
+}
+
+/*
+** Generate a CREATE TABLE statement appropriate for the given
+** table.  Memory to hold the text of the statement is obtained
+** from sqliteMalloc() and must be freed by the calling function.
+*/
+static char *createTableStmt(sqlite3 *db, Table *p){
+  int i, k, n;
+  char *zStmt;
+  char *zSep, *zSep2, *zEnd;
+  Column *pCol;
+  n = 0;
+  for(pCol = p->aCol, i=0; i<p->nCol; i++, pCol++){
+    n += identLength(pCol->zName) + 5;
+  }
+  n += identLength(p->zName);
+  if( n<50 ){ 
+    zSep = "";
+    zSep2 = ",";
+    zEnd = ")";
+  }else{
+    zSep = "\n  ";
+    zSep2 = ",\n  ";
+    zEnd = "\n)";
+  }
+  n += 35 + 6*p->nCol;
+  zStmt = sqlite3DbMallocRaw(0, n);
+  if( zStmt==0 ){
+    db->mallocFailed = 1;
+    return 0;
+  }
+  sqlite3_snprintf(n, zStmt, "CREATE TABLE ");
+  k = sqlite3Strlen30(zStmt);
+  identPut(zStmt, &k, p->zName);
+  zStmt[k++] = '(';
+  for(pCol=p->aCol, i=0; i<p->nCol; i++, pCol++){
+    static const char * const azType[] = {
+        /* SQLITE_AFF_NONE    */ "",
+        /* SQLITE_AFF_TEXT    */ " TEXT",
+        /* SQLITE_AFF_NUMERIC */ " NUM",
+        /* SQLITE_AFF_INTEGER */ " INT",
+        /* SQLITE_AFF_REAL    */ " REAL"
+    };
+    int len;
+    const char *zType;
+
+    sqlite3_snprintf(n-k, &zStmt[k], zSep);
+    k += sqlite3Strlen30(&zStmt[k]);
+    zSep = zSep2;
+    identPut(zStmt, &k, pCol->zName);
+    assert( pCol->affinity-SQLITE_AFF_NONE >= 0 );
+    assert( pCol->affinity-SQLITE_AFF_NONE < ArraySize(azType) );
+    testcase( pCol->affinity==SQLITE_AFF_NONE );
+    testcase( pCol->affinity==SQLITE_AFF_TEXT );
+    testcase( pCol->affinity==SQLITE_AFF_NUMERIC );
+    testcase( pCol->affinity==SQLITE_AFF_INTEGER );
+    testcase( pCol->affinity==SQLITE_AFF_REAL );
+    
+    zType = azType[pCol->affinity - SQLITE_AFF_NONE];
+    len = sqlite3Strlen30(zType);
+    assert( pCol->affinity==SQLITE_AFF_NONE 
+            || pCol->affinity==sqlite3AffinityType(zType, 0) );
+    memcpy(&zStmt[k], zType, len);
+    k += len;
+    assert( k<=n );
+  }
+  sqlite3_snprintf(n-k, &zStmt[k], "%s", zEnd);
+  return zStmt;
+}
+
+/*
+** Resize an Index object to hold N columns total.  Return SQLITE_OK
+** on success and SQLITE_NOMEM on an OOM error.
+*/
+static int resizeIndexObject(sqlite3 *db, Index *pIdx, int N){
+  char *zExtra;
+  int nByte;
+  if( pIdx->nColumn>=N ) return SQLITE_OK;
+  assert( pIdx->isResized==0 );
+  nByte = (sizeof(char*) + sizeof(i16) + 1)*N;
+  zExtra = sqlite3DbMallocZero(db, nByte);
+  if( zExtra==0 ) return SQLITE_NOMEM;
+  memcpy(zExtra, pIdx->azColl, sizeof(char*)*pIdx->nColumn);
+  pIdx->azColl = (char**)zExtra;
+  zExtra += sizeof(char*)*N;
+  memcpy(zExtra, pIdx->aiColumn, sizeof(i16)*pIdx->nColumn);
+  pIdx->aiColumn = (i16*)zExtra;
+  zExtra += sizeof(i16)*N;
+  memcpy(zExtra, pIdx->aSortOrder, pIdx->nColumn);
+  pIdx->aSortOrder = (u8*)zExtra;
+  pIdx->nColumn = N;
+  pIdx->isResized = 1;
+  return SQLITE_OK;
+}
+
+/*
+** Estimate the total row width for a table.
+*/
+static void estimateTableWidth(Table *pTab){
+  unsigned wTable = 0;
+  const Column *pTabCol;
+  int i;
+  for(i=pTab->nCol, pTabCol=pTab->aCol; i>0; i--, pTabCol++){
+    wTable += pTabCol->szEst;
+  }
+  if( pTab->iPKey<0 ) wTable++;
+  pTab->szTabRow = sqlite3LogEst(wTable*4);
+}
+
+/*
+** Estimate the average size of a row for an index.
+*/
+static void estimateIndexWidth(Index *pIdx){
+  unsigned wIndex = 0;
+  int i;
+  const Column *aCol = pIdx->pTable->aCol;
+  for(i=0; i<pIdx->nColumn; i++){
+    i16 x = pIdx->aiColumn[i];
+    assert( x<pIdx->pTable->nCol );
+    wIndex += x<0 ? 1 : aCol[pIdx->aiColumn[i]].szEst;
+  }
+  pIdx->szIdxRow = sqlite3LogEst(wIndex*4);
+}
+
+/* Return true if value x is found any of the first nCol entries of aiCol[]
+*/
+static int hasColumn(const i16 *aiCol, int nCol, int x){
+  while( nCol-- > 0 ) if( x==*(aiCol++) ) return 1;
+  return 0;
+}
+
+/*
+** This routine runs at the end of parsing a CREATE TABLE statement that
+** has a WITHOUT ROWID clause.  The job of this routine is to convert both
+** internal schema data structures and the generated VDBE code so that they
+** are appropriate for a WITHOUT ROWID table instead of a rowid table.
+** Changes include:
+**
+**     (1)  Convert the OP_CreateTable into an OP_CreateIndex.  There is
+**          no rowid btree for a WITHOUT ROWID.  Instead, the canonical
+**          data storage is a covering index btree.
+**     (2)  Bypass the creation of the sqlite_master table entry
+**          for the PRIMARY KEY as the primary key index is now
+**          identified by the sqlite_master table entry of the table itself.
+**     (3)  Set the Index.tnum of the PRIMARY KEY Index object in the
+**          schema to the rootpage from the main table.
+**     (4)  Set all columns of the PRIMARY KEY schema object to be NOT NULL.
+**     (5)  Add all table columns to the PRIMARY KEY Index object
+**          so that the PRIMARY KEY is a covering index.  The surplus
+**          columns are part of KeyInfo.nXField and are not used for
+**          sorting or lookup or uniqueness checks.
+**     (6)  Replace the rowid tail on all automatically generated UNIQUE
+**          indices with the PRIMARY KEY columns.
+*/
+static void convertToWithoutRowidTable(Parse *pParse, Table *pTab){
+  Index *pIdx;
+  Index *pPk;
+  int nPk;
+  int i, j;
+  sqlite3 *db = pParse->db;
+  Vdbe *v = pParse->pVdbe;
+
+  /* Convert the OP_CreateTable opcode that would normally create the
+  ** root-page for the table into an OP_CreateIndex opcode.  The index
+  ** created will become the PRIMARY KEY index.
+  */
+  if( pParse->addrCrTab ){
+    assert( v );
+    sqlite3VdbeGetOp(v, pParse->addrCrTab)->opcode = OP_CreateIndex;
+  }
+
+  /* Bypass the creation of the PRIMARY KEY btree and the sqlite_master
+  ** table entry.
+  */
+  if( pParse->addrSkipPK ){
+    assert( v );
+    sqlite3VdbeGetOp(v, pParse->addrSkipPK)->opcode = OP_Goto;
+  }
+
+  /* Locate the PRIMARY KEY index.  Or, if this table was originally
+  ** an INTEGER PRIMARY KEY table, create a new PRIMARY KEY index. 
+  */
+  if( pTab->iPKey>=0 ){
+    ExprList *pList;
+    pList = sqlite3ExprListAppend(pParse, 0, 0);
+    if( pList==0 ) return;
+    pList->a[0].zName = sqlite3DbStrDup(pParse->db,
+                                        pTab->aCol[pTab->iPKey].zName);
+    pList->a[0].sortOrder = pParse->iPkSortOrder;
+    assert( pParse->pNewTable==pTab );
+    pPk = sqlite3CreateIndex(pParse, 0, 0, 0, pList, pTab->keyConf, 0, 0, 0, 0);
+    if( pPk==0 ) return;
+    pPk->idxType = SQLITE_IDXTYPE_PRIMARYKEY;
+    pTab->iPKey = -1;
+  }else{
+    pPk = sqlite3PrimaryKeyIndex(pTab);
+    /*
+    ** Remove all redundant columns from the PRIMARY KEY.  For example, change
+    ** "PRIMARY KEY(a,b,a,b,c,b,c,d)" into just "PRIMARY KEY(a,b,c,d)".  Later
+    ** code assumes the PRIMARY KEY contains no repeated columns.
+    */
+    for(i=j=1; i<pPk->nKeyCol; i++){
+      if( hasColumn(pPk->aiColumn, j, pPk->aiColumn[i]) ){
+        pPk->nColumn--;
+      }else{
+        pPk->aiColumn[j++] = pPk->aiColumn[i];
+      }
+    }
+    pPk->nKeyCol = j;
+  }
+  pPk->isCovering = 1;
+  assert( pPk!=0 );
+  nPk = pPk->nKeyCol;
+
+  /* Make sure every column of the PRIMARY KEY is NOT NULL */
+  for(i=0; i<nPk; i++){
+    pTab->aCol[pPk->aiColumn[i]].notNull = 1;
+  }
+  pPk->uniqNotNull = 1;
+
+  /* The root page of the PRIMARY KEY is the table root page */
+  pPk->tnum = pTab->tnum;
+
+  /* Update the in-memory representation of all UNIQUE indices by converting
+  ** the final rowid column into one or more columns of the PRIMARY KEY.
+  */
+  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+    int n;
+    if( IsPrimaryKeyIndex(pIdx) ) continue;
+    for(i=n=0; i<nPk; i++){
+      if( !hasColumn(pIdx->aiColumn, pIdx->nKeyCol, pPk->aiColumn[i]) ) n++;
+    }
+    if( n==0 ){
+      /* This index is a superset of the primary key */
+      pIdx->nColumn = pIdx->nKeyCol;
+      continue;
+    }
+    if( resizeIndexObject(db, pIdx, pIdx->nKeyCol+n) ) return;
+    for(i=0, j=pIdx->nKeyCol; i<nPk; i++){
+      if( !hasColumn(pIdx->aiColumn, pIdx->nKeyCol, pPk->aiColumn[i]) ){
+        pIdx->aiColumn[j] = pPk->aiColumn[i];
+        pIdx->azColl[j] = pPk->azColl[i];
+        j++;
+      }
+    }
+    assert( pIdx->nColumn>=pIdx->nKeyCol+n );
+    assert( pIdx->nColumn>=j );
+  }
+
+  /* Add all table columns to the PRIMARY KEY index
+  */
+  if( nPk<pTab->nCol ){
+    if( resizeIndexObject(db, pPk, pTab->nCol) ) return;
+    for(i=0, j=nPk; i<pTab->nCol; i++){
+      if( !hasColumn(pPk->aiColumn, j, i) ){
+        assert( j<pPk->nColumn );
+        pPk->aiColumn[j] = i;
+        pPk->azColl[j] = "BINARY";
+        j++;
+      }
+    }
+    assert( pPk->nColumn==j );
+    assert( pTab->nCol==j );
+  }else{
+    pPk->nColumn = pTab->nCol;
+  }
+}
+
+/*
+** This routine is called to report the final ")" that terminates
+** a CREATE TABLE statement.
+**
+** The table structure that other action routines have been building
+** is added to the internal hash tables, assuming no errors have
+** occurred.
+**
+** An entry for the table is made in the master table on disk, unless
+** this is a temporary table or db->init.busy==1.  When db->init.busy==1
+** it means we are reading the sqlite_master table because we just
+** connected to the database or because the sqlite_master table has
+** recently changed, so the entry for this table already exists in
+** the sqlite_master table.  We do not want to create it again.
+**
+** If the pSelect argument is not NULL, it means that this routine
+** was called to create a table generated from a 
+** "CREATE TABLE ... AS SELECT ..." statement.  The column names of
+** the new table will match the result set of the SELECT.
+*/
+SQLITE_PRIVATE void sqlite3EndTable(
+  Parse *pParse,          /* Parse context */
+  Token *pCons,           /* The ',' token after the last column defn. */
+  Token *pEnd,            /* The ')' before options in the CREATE TABLE */
+  u8 tabOpts,             /* Extra table options. Usually 0. */
+  Select *pSelect         /* Select from a "CREATE ... AS SELECT" */
+){
+  Table *p;                 /* The new table */
+  sqlite3 *db = pParse->db; /* The database connection */
+  int iDb;                  /* Database in which the table lives */
+  Index *pIdx;              /* An implied index of the table */
+
+  if( (pEnd==0 && pSelect==0) || db->mallocFailed ){
+    return;
+  }
+  p = pParse->pNewTable;
+  if( p==0 ) return;
+
+  assert( !db->init.busy || !pSelect );
+
+  /* If the db->init.busy is 1 it means we are reading the SQL off the
+  ** "sqlite_master" or "sqlite_temp_master" table on the disk.
+  ** So do not write to the disk again.  Extract the root page number
+  ** for the table from the db->init.newTnum field.  (The page number
+  ** should have been put there by the sqliteOpenCb routine.)
+  */
+  if( db->init.busy ){
+    p->tnum = db->init.newTnum;
+  }
+
+  /* Special processing for WITHOUT ROWID Tables */
+  if( tabOpts & TF_WithoutRowid ){
+    if( (p->tabFlags & TF_Autoincrement) ){
+      sqlite3ErrorMsg(pParse,
+          "AUTOINCREMENT not allowed on WITHOUT ROWID tables");
+      return;
+    }
+    if( (p->tabFlags & TF_HasPrimaryKey)==0 ){
+      sqlite3ErrorMsg(pParse, "PRIMARY KEY missing on table %s", p->zName);
+    }else{
+      p->tabFlags |= TF_WithoutRowid;
+      convertToWithoutRowidTable(pParse, p);
+    }
+  }
+
+  iDb = sqlite3SchemaToIndex(db, p->pSchema);
+
+#ifndef SQLITE_OMIT_CHECK
+  /* Resolve names in all CHECK constraint expressions.
+  */
+  if( p->pCheck ){
+    sqlite3ResolveSelfReference(pParse, p, NC_IsCheck, 0, p->pCheck);
+  }
+#endif /* !defined(SQLITE_OMIT_CHECK) */
+
+  /* Estimate the average row size for the table and for all implied indices */
+  estimateTableWidth(p);
+  for(pIdx=p->pIndex; pIdx; pIdx=pIdx->pNext){
+    estimateIndexWidth(pIdx);
+  }
+
+  /* If not initializing, then create a record for the new table
+  ** in the SQLITE_MASTER table of the database.
+  **
+  ** If this is a TEMPORARY table, write the entry into the auxiliary
+  ** file instead of into the main database file.
+  */
+  if( !db->init.busy ){
+    int n;
+    Vdbe *v;
+    char *zType;    /* "view" or "table" */
+    char *zType2;   /* "VIEW" or "TABLE" */
+    char *zStmt;    /* Text of the CREATE TABLE or CREATE VIEW statement */
+
+    v = sqlite3GetVdbe(pParse);
+    if( NEVER(v==0) ) return;
+
+    sqlite3VdbeAddOp1(v, OP_Close, 0);
+
+    /* 
+    ** Initialize zType for the new view or table.
+    */
+    if( p->pSelect==0 ){
+      /* A regular table */
+      zType = "table";
+      zType2 = "TABLE";
+#ifndef SQLITE_OMIT_VIEW
+    }else{
+      /* A view */
+      zType = "view";
+      zType2 = "VIEW";
+#endif
+    }
+
+    /* If this is a CREATE TABLE xx AS SELECT ..., execute the SELECT
+    ** statement to populate the new table. The root-page number for the
+    ** new table is in register pParse->regRoot.
+    **
+    ** Once the SELECT has been coded by sqlite3Select(), it is in a
+    ** suitable state to query for the column names and types to be used
+    ** by the new table.
+    **
+    ** A shared-cache write-lock is not required to write to the new table,
+    ** as a schema-lock must have already been obtained to create it. Since
+    ** a schema-lock excludes all other database users, the write-lock would
+    ** be redundant.
+    */
+    if( pSelect ){
+      SelectDest dest;
+      Table *pSelTab;
+
+      assert(pParse->nTab==1);
+      sqlite3VdbeAddOp3(v, OP_OpenWrite, 1, pParse->regRoot, iDb);
+      sqlite3VdbeChangeP5(v, OPFLAG_P2ISREG);
+      pParse->nTab = 2;
+      sqlite3SelectDestInit(&dest, SRT_Table, 1);
+      sqlite3Select(pParse, pSelect, &dest);
+      sqlite3VdbeAddOp1(v, OP_Close, 1);
+      if( pParse->nErr==0 ){
+        pSelTab = sqlite3ResultSetOfSelect(pParse, pSelect);
+        if( pSelTab==0 ) return;
+        assert( p->aCol==0 );
+        p->nCol = pSelTab->nCol;
+        p->aCol = pSelTab->aCol;
+        pSelTab->nCol = 0;
+        pSelTab->aCol = 0;
+        sqlite3DeleteTable(db, pSelTab);
+      }
+    }
+
+    /* Compute the complete text of the CREATE statement */
+    if( pSelect ){
+      zStmt = createTableStmt(db, p);
+    }else{
+      Token *pEnd2 = tabOpts ? &pParse->sLastToken : pEnd;
+      n = (int)(pEnd2->z - pParse->sNameToken.z);
+      if( pEnd2->z[0]!=';' ) n += pEnd2->n;
+      zStmt = sqlite3MPrintf(db, 
+          "CREATE %s %.*s", zType2, n, pParse->sNameToken.z
+      );
+    }
+
+    /* A slot for the record has already been allocated in the 
+    ** SQLITE_MASTER table.  We just need to update that slot with all
+    ** the information we've collected.
+    */
+    sqlite3NestedParse(pParse,
+      "UPDATE %Q.%s "
+         "SET type='%s', name=%Q, tbl_name=%Q, rootpage=#%d, sql=%Q "
+       "WHERE rowid=#%d",
+      db->aDb[iDb].zName, SCHEMA_TABLE(iDb),
+      zType,
+      p->zName,
+      p->zName,
+      pParse->regRoot,
+      zStmt,
+      pParse->regRowid
+    );
+    sqlite3DbFree(db, zStmt);
+    sqlite3ChangeCookie(pParse, iDb);
+
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+    /* Check to see if we need to create an sqlite_sequence table for
+    ** keeping track of autoincrement keys.
+    */
+    if( p->tabFlags & TF_Autoincrement ){
+      Db *pDb = &db->aDb[iDb];
+      assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+      if( pDb->pSchema->pSeqTab==0 ){
+        sqlite3NestedParse(pParse,
+          "CREATE TABLE %Q.sqlite_sequence(name,seq)",
+          pDb->zName
+        );
+      }
+    }
+#endif
+
+    /* Reparse everything to update our internal data structures */
+    sqlite3VdbeAddParseSchemaOp(v, iDb,
+           sqlite3MPrintf(db, "tbl_name='%q' AND type!='trigger'", p->zName));
+  }
+
+
+  /* Add the table to the in-memory representation of the database.
+  */
+  if( db->init.busy ){
+    Table *pOld;
+    Schema *pSchema = p->pSchema;
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    pOld = sqlite3HashInsert(&pSchema->tblHash, p->zName, p);
+    if( pOld ){
+      assert( p==pOld );  /* Malloc must have failed inside HashInsert() */
+      db->mallocFailed = 1;
+      return;
+    }
+    pParse->pNewTable = 0;
+    db->flags |= SQLITE_InternChanges;
+
+#ifndef SQLITE_OMIT_ALTERTABLE
+    if( !p->pSelect ){
+      const char *zName = (const char *)pParse->sNameToken.z;
+      int nName;
+      assert( !pSelect && pCons && pEnd );
+      if( pCons->z==0 ){
+        pCons = pEnd;
+      }
+      nName = (int)((const char *)pCons->z - zName);
+      p->addColOffset = 13 + sqlite3Utf8CharLen(zName, nName);
+    }
+#endif
+  }
+}
+
+#ifndef SQLITE_OMIT_VIEW
+/*
+** The parser calls this routine in order to create a new VIEW
+*/
+SQLITE_PRIVATE void sqlite3CreateView(
+  Parse *pParse,     /* The parsing context */
+  Token *pBegin,     /* The CREATE token that begins the statement */
+  Token *pName1,     /* The token that holds the name of the view */
+  Token *pName2,     /* The token that holds the name of the view */
+  Select *pSelect,   /* A SELECT statement that will become the new view */
+  int isTemp,        /* TRUE for a TEMPORARY view */
+  int noErr          /* Suppress error messages if VIEW already exists */
+){
+  Table *p;
+  int n;
+  const char *z;
+  Token sEnd;
+  DbFixer sFix;
+  Token *pName = 0;
+  int iDb;
+  sqlite3 *db = pParse->db;
+
+  if( pParse->nVar>0 ){
+    sqlite3ErrorMsg(pParse, "parameters are not allowed in views");
+    sqlite3SelectDelete(db, pSelect);
+    return;
+  }
+  sqlite3StartTable(pParse, pName1, pName2, isTemp, 1, 0, noErr);
+  p = pParse->pNewTable;
+  if( p==0 || pParse->nErr ){
+    sqlite3SelectDelete(db, pSelect);
+    return;
+  }
+  sqlite3TwoPartName(pParse, pName1, pName2, &pName);
+  iDb = sqlite3SchemaToIndex(db, p->pSchema);
+  sqlite3FixInit(&sFix, pParse, iDb, "view", pName);
+  if( sqlite3FixSelect(&sFix, pSelect) ){
+    sqlite3SelectDelete(db, pSelect);
+    return;
+  }
+
+  /* Make a copy of the entire SELECT statement that defines the view.
+  ** This will force all the Expr.token.z values to be dynamically
+  ** allocated rather than point to the input string - which means that
+  ** they will persist after the current sqlite3_exec() call returns.
+  */
+  p->pSelect = sqlite3SelectDup(db, pSelect, EXPRDUP_REDUCE);
+  sqlite3SelectDelete(db, pSelect);
+  if( db->mallocFailed ){
+    return;
+  }
+  if( !db->init.busy ){
+    sqlite3ViewGetColumnNames(pParse, p);
+  }
+
+  /* Locate the end of the CREATE VIEW statement.  Make sEnd point to
+  ** the end.
+  */
+  sEnd = pParse->sLastToken;
+  if( ALWAYS(sEnd.z[0]!=0) && sEnd.z[0]!=';' ){
+    sEnd.z += sEnd.n;
+  }
+  sEnd.n = 0;
+  n = (int)(sEnd.z - pBegin->z);
+  z = pBegin->z;
+  while( ALWAYS(n>0) && sqlite3Isspace(z[n-1]) ){ n--; }
+  sEnd.z = &z[n-1];
+  sEnd.n = 1;
+
+  /* Use sqlite3EndTable() to add the view to the SQLITE_MASTER table */
+  sqlite3EndTable(pParse, 0, &sEnd, 0, 0);
+  return;
+}
+#endif /* SQLITE_OMIT_VIEW */
+
+#if !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE)
+/*
+** The Table structure pTable is really a VIEW.  Fill in the names of
+** the columns of the view in the pTable structure.  Return the number
+** of errors.  If an error is seen leave an error message in pParse->zErrMsg.
+*/
+SQLITE_PRIVATE int sqlite3ViewGetColumnNames(Parse *pParse, Table *pTable){
+  Table *pSelTab;   /* A fake table from which we get the result set */
+  Select *pSel;     /* Copy of the SELECT that implements the view */
+  int nErr = 0;     /* Number of errors encountered */
+  int n;            /* Temporarily holds the number of cursors assigned */
+  sqlite3 *db = pParse->db;  /* Database connection for malloc errors */
+  sqlite3_xauth xAuth;       /* Saved xAuth pointer */
+
+  assert( pTable );
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( sqlite3VtabCallConnect(pParse, pTable) ){
+    return SQLITE_ERROR;
+  }
+  if( IsVirtual(pTable) ) return 0;
+#endif
+
+#ifndef SQLITE_OMIT_VIEW
+  /* A positive nCol means the columns names for this view are
+  ** already known.
+  */
+  if( pTable->nCol>0 ) return 0;
+
+  /* A negative nCol is a special marker meaning that we are currently
+  ** trying to compute the column names.  If we enter this routine with
+  ** a negative nCol, it means two or more views form a loop, like this:
+  **
+  **     CREATE VIEW one AS SELECT * FROM two;
+  **     CREATE VIEW two AS SELECT * FROM one;
+  **
+  ** Actually, the error above is now caught prior to reaching this point.
+  ** But the following test is still important as it does come up
+  ** in the following:
+  ** 
+  **     CREATE TABLE main.ex1(a);
+  **     CREATE TEMP VIEW ex1 AS SELECT a FROM ex1;
+  **     SELECT * FROM temp.ex1;
+  */
+  if( pTable->nCol<0 ){
+    sqlite3ErrorMsg(pParse, "view %s is circularly defined", pTable->zName);
+    return 1;
+  }
+  assert( pTable->nCol>=0 );
+
+  /* If we get this far, it means we need to compute the table names.
+  ** Note that the call to sqlite3ResultSetOfSelect() will expand any
+  ** "*" elements in the results set of the view and will assign cursors
+  ** to the elements of the FROM clause.  But we do not want these changes
+  ** to be permanent.  So the computation is done on a copy of the SELECT
+  ** statement that defines the view.
+  */
+  assert( pTable->pSelect );
+  pSel = sqlite3SelectDup(db, pTable->pSelect, 0);
+  if( pSel ){
+    u8 enableLookaside = db->lookaside.bEnabled;
+    n = pParse->nTab;
+    sqlite3SrcListAssignCursors(pParse, pSel->pSrc);
+    pTable->nCol = -1;
+    db->lookaside.bEnabled = 0;
+#ifndef SQLITE_OMIT_AUTHORIZATION
+    xAuth = db->xAuth;
+    db->xAuth = 0;
+    pSelTab = sqlite3ResultSetOfSelect(pParse, pSel);
+    db->xAuth = xAuth;
+#else
+    pSelTab = sqlite3ResultSetOfSelect(pParse, pSel);
+#endif
+    db->lookaside.bEnabled = enableLookaside;
+    pParse->nTab = n;
+    if( pSelTab ){
+      assert( pTable->aCol==0 );
+      pTable->nCol = pSelTab->nCol;
+      pTable->aCol = pSelTab->aCol;
+      pSelTab->nCol = 0;
+      pSelTab->aCol = 0;
+      sqlite3DeleteTable(db, pSelTab);
+      assert( sqlite3SchemaMutexHeld(db, 0, pTable->pSchema) );
+      pTable->pSchema->schemaFlags |= DB_UnresetViews;
+    }else{
+      pTable->nCol = 0;
+      nErr++;
+    }
+    sqlite3SelectDelete(db, pSel);
+  } else {
+    nErr++;
+  }
+#endif /* SQLITE_OMIT_VIEW */
+  return nErr;  
+}
+#endif /* !defined(SQLITE_OMIT_VIEW) || !defined(SQLITE_OMIT_VIRTUALTABLE) */
+
+#ifndef SQLITE_OMIT_VIEW
+/*
+** Clear the column names from every VIEW in database idx.
+*/
+static void sqliteViewResetAll(sqlite3 *db, int idx){
+  HashElem *i;
+  assert( sqlite3SchemaMutexHeld(db, idx, 0) );
+  if( !DbHasProperty(db, idx, DB_UnresetViews) ) return;
+  for(i=sqliteHashFirst(&db->aDb[idx].pSchema->tblHash); i;i=sqliteHashNext(i)){
+    Table *pTab = sqliteHashData(i);
+    if( pTab->pSelect ){
+      sqliteDeleteColumnNames(db, pTab);
+      pTab->aCol = 0;
+      pTab->nCol = 0;
+    }
+  }
+  DbClearProperty(db, idx, DB_UnresetViews);
+}
+#else
+# define sqliteViewResetAll(A,B)
+#endif /* SQLITE_OMIT_VIEW */
+
+/*
+** This function is called by the VDBE to adjust the internal schema
+** used by SQLite when the btree layer moves a table root page. The
+** root-page of a table or index in database iDb has changed from iFrom
+** to iTo.
+**
+** Ticket #1728:  The symbol table might still contain information
+** on tables and/or indices that are the process of being deleted.
+** If you are unlucky, one of those deleted indices or tables might
+** have the same rootpage number as the real table or index that is
+** being moved.  So we cannot stop searching after the first match 
+** because the first match might be for one of the deleted indices
+** or tables and not the table/index that is actually being moved.
+** We must continue looping until all tables and indices with
+** rootpage==iFrom have been converted to have a rootpage of iTo
+** in order to be certain that we got the right one.
+*/
+#ifndef SQLITE_OMIT_AUTOVACUUM
+SQLITE_PRIVATE void sqlite3RootPageMoved(sqlite3 *db, int iDb, int iFrom, int iTo){
+  HashElem *pElem;
+  Hash *pHash;
+  Db *pDb;
+
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  pDb = &db->aDb[iDb];
+  pHash = &pDb->pSchema->tblHash;
+  for(pElem=sqliteHashFirst(pHash); pElem; pElem=sqliteHashNext(pElem)){
+    Table *pTab = sqliteHashData(pElem);
+    if( pTab->tnum==iFrom ){
+      pTab->tnum = iTo;
+    }
+  }
+  pHash = &pDb->pSchema->idxHash;
+  for(pElem=sqliteHashFirst(pHash); pElem; pElem=sqliteHashNext(pElem)){
+    Index *pIdx = sqliteHashData(pElem);
+    if( pIdx->tnum==iFrom ){
+      pIdx->tnum = iTo;
+    }
+  }
+}
+#endif
+
+/*
+** Write code to erase the table with root-page iTable from database iDb.
+** Also write code to modify the sqlite_master table and internal schema
+** if a root-page of another table is moved by the btree-layer whilst
+** erasing iTable (this can happen with an auto-vacuum database).
+*/ 
+static void destroyRootPage(Parse *pParse, int iTable, int iDb){
+  Vdbe *v = sqlite3GetVdbe(pParse);
+  int r1 = sqlite3GetTempReg(pParse);
+  sqlite3VdbeAddOp3(v, OP_Destroy, iTable, r1, iDb);
+  sqlite3MayAbort(pParse);
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  /* OP_Destroy stores an in integer r1. If this integer
+  ** is non-zero, then it is the root page number of a table moved to
+  ** location iTable. The following code modifies the sqlite_master table to
+  ** reflect this.
+  **
+  ** The "#NNN" in the SQL is a special constant that means whatever value
+  ** is in register NNN.  See grammar rules associated with the TK_REGISTER
+  ** token for additional information.
+  */
+  sqlite3NestedParse(pParse, 
+     "UPDATE %Q.%s SET rootpage=%d WHERE #%d AND rootpage=#%d",
+     pParse->db->aDb[iDb].zName, SCHEMA_TABLE(iDb), iTable, r1, r1);
+#endif
+  sqlite3ReleaseTempReg(pParse, r1);
+}
+
+/*
+** Write VDBE code to erase table pTab and all associated indices on disk.
+** Code to update the sqlite_master tables and internal schema definitions
+** in case a root-page belonging to another table is moved by the btree layer
+** is also added (this can happen with an auto-vacuum database).
+*/
+static void destroyTable(Parse *pParse, Table *pTab){
+#ifdef SQLITE_OMIT_AUTOVACUUM
+  Index *pIdx;
+  int iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+  destroyRootPage(pParse, pTab->tnum, iDb);
+  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+    destroyRootPage(pParse, pIdx->tnum, iDb);
+  }
+#else
+  /* If the database may be auto-vacuum capable (if SQLITE_OMIT_AUTOVACUUM
+  ** is not defined), then it is important to call OP_Destroy on the
+  ** table and index root-pages in order, starting with the numerically 
+  ** largest root-page number. This guarantees that none of the root-pages
+  ** to be destroyed is relocated by an earlier OP_Destroy. i.e. if the
+  ** following were coded:
+  **
+  ** OP_Destroy 4 0
+  ** ...
+  ** OP_Destroy 5 0
+  **
+  ** and root page 5 happened to be the largest root-page number in the
+  ** database, then root page 5 would be moved to page 4 by the 
+  ** "OP_Destroy 4 0" opcode. The subsequent "OP_Destroy 5 0" would hit
+  ** a free-list page.
+  */
+  int iTab = pTab->tnum;
+  int iDestroyed = 0;
+
+  while( 1 ){
+    Index *pIdx;
+    int iLargest = 0;
+
+    if( iDestroyed==0 || iTab<iDestroyed ){
+      iLargest = iTab;
+    }
+    for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+      int iIdx = pIdx->tnum;
+      assert( pIdx->pSchema==pTab->pSchema );
+      if( (iDestroyed==0 || (iIdx<iDestroyed)) && iIdx>iLargest ){
+        iLargest = iIdx;
+      }
+    }
+    if( iLargest==0 ){
+      return;
+    }else{
+      int iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+      assert( iDb>=0 && iDb<pParse->db->nDb );
+      destroyRootPage(pParse, iLargest, iDb);
+      iDestroyed = iLargest;
+    }
+  }
+#endif
+}
+
+/*
+** Remove entries from the sqlite_statN tables (for N in (1,2,3))
+** after a DROP INDEX or DROP TABLE command.
+*/
+static void sqlite3ClearStatTables(
+  Parse *pParse,         /* The parsing context */
+  int iDb,               /* The database number */
+  const char *zType,     /* "idx" or "tbl" */
+  const char *zName      /* Name of index or table */
+){
+  int i;
+  const char *zDbName = pParse->db->aDb[iDb].zName;
+  for(i=1; i<=4; i++){
+    char zTab[24];
+    sqlite3_snprintf(sizeof(zTab),zTab,"sqlite_stat%d",i);
+    if( sqlite3FindTable(pParse->db, zTab, zDbName) ){
+      sqlite3NestedParse(pParse,
+        "DELETE FROM %Q.%s WHERE %s=%Q",
+        zDbName, zTab, zType, zName
+      );
+    }
+  }
+}
+
+/*
+** Generate code to drop a table.
+*/
+SQLITE_PRIVATE void sqlite3CodeDropTable(Parse *pParse, Table *pTab, int iDb, int isView){
+  Vdbe *v;
+  sqlite3 *db = pParse->db;
+  Trigger *pTrigger;
+  Db *pDb = &db->aDb[iDb];
+
+  v = sqlite3GetVdbe(pParse);
+  assert( v!=0 );
+  sqlite3BeginWriteOperation(pParse, 1, iDb);
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( IsVirtual(pTab) ){
+    sqlite3VdbeAddOp0(v, OP_VBegin);
+  }
+#endif
+
+  /* Drop all triggers associated with the table being dropped. Code
+  ** is generated to remove entries from sqlite_master and/or
+  ** sqlite_temp_master if required.
+  */
+  pTrigger = sqlite3TriggerList(pParse, pTab);
+  while( pTrigger ){
+    assert( pTrigger->pSchema==pTab->pSchema || 
+        pTrigger->pSchema==db->aDb[1].pSchema );
+    sqlite3DropTriggerPtr(pParse, pTrigger);
+    pTrigger = pTrigger->pNext;
+  }
+
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+  /* Remove any entries of the sqlite_sequence table associated with
+  ** the table being dropped. This is done before the table is dropped
+  ** at the btree level, in case the sqlite_sequence table needs to
+  ** move as a result of the drop (can happen in auto-vacuum mode).
+  */
+  if( pTab->tabFlags & TF_Autoincrement ){
+    sqlite3NestedParse(pParse,
+      "DELETE FROM %Q.sqlite_sequence WHERE name=%Q",
+      pDb->zName, pTab->zName
+    );
+  }
+#endif
+
+  /* Drop all SQLITE_MASTER table and index entries that refer to the
+  ** table. The program name loops through the master table and deletes
+  ** every row that refers to a table of the same name as the one being
+  ** dropped. Triggers are handled separately because a trigger can be
+  ** created in the temp database that refers to a table in another
+  ** database.
+  */
+  sqlite3NestedParse(pParse, 
+      "DELETE FROM %Q.%s WHERE tbl_name=%Q and type!='trigger'",
+      pDb->zName, SCHEMA_TABLE(iDb), pTab->zName);
+  if( !isView && !IsVirtual(pTab) ){
+    destroyTable(pParse, pTab);
+  }
+
+  /* Remove the table entry from SQLite's internal schema and modify
+  ** the schema cookie.
+  */
+  if( IsVirtual(pTab) ){
+    sqlite3VdbeAddOp4(v, OP_VDestroy, iDb, 0, 0, pTab->zName, 0);
+  }
+  sqlite3VdbeAddOp4(v, OP_DropTable, iDb, 0, 0, pTab->zName, 0);
+  sqlite3ChangeCookie(pParse, iDb);
+  sqliteViewResetAll(db, iDb);
+}
+
+/*
+** This routine is called to do the work of a DROP TABLE statement.
+** pName is the name of the table to be dropped.
+*/
+SQLITE_PRIVATE void sqlite3DropTable(Parse *pParse, SrcList *pName, int isView, int noErr){
+  Table *pTab;
+  Vdbe *v;
+  sqlite3 *db = pParse->db;
+  int iDb;
+
+  if( db->mallocFailed ){
+    goto exit_drop_table;
+  }
+  assert( pParse->nErr==0 );
+  assert( pName->nSrc==1 );
+  if( noErr ) db->suppressErr++;
+  pTab = sqlite3LocateTableItem(pParse, isView, &pName->a[0]);
+  if( noErr ) db->suppressErr--;
+
+  if( pTab==0 ){
+    if( noErr ) sqlite3CodeVerifyNamedSchema(pParse, pName->a[0].zDatabase);
+    goto exit_drop_table;
+  }
+  iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+  assert( iDb>=0 && iDb<db->nDb );
+
+  /* If pTab is a virtual table, call ViewGetColumnNames() to ensure
+  ** it is initialized.
+  */
+  if( IsVirtual(pTab) && sqlite3ViewGetColumnNames(pParse, pTab) ){
+    goto exit_drop_table;
+  }
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  {
+    int code;
+    const char *zTab = SCHEMA_TABLE(iDb);
+    const char *zDb = db->aDb[iDb].zName;
+    const char *zArg2 = 0;
+    if( sqlite3AuthCheck(pParse, SQLITE_DELETE, zTab, 0, zDb)){
+      goto exit_drop_table;
+    }
+    if( isView ){
+      if( !OMIT_TEMPDB && iDb==1 ){
+        code = SQLITE_DROP_TEMP_VIEW;
+      }else{
+        code = SQLITE_DROP_VIEW;
+      }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    }else if( IsVirtual(pTab) ){
+      code = SQLITE_DROP_VTABLE;
+      zArg2 = sqlite3GetVTable(db, pTab)->pMod->zName;
+#endif
+    }else{
+      if( !OMIT_TEMPDB && iDb==1 ){
+        code = SQLITE_DROP_TEMP_TABLE;
+      }else{
+        code = SQLITE_DROP_TABLE;
+      }
+    }
+    if( sqlite3AuthCheck(pParse, code, pTab->zName, zArg2, zDb) ){
+      goto exit_drop_table;
+    }
+    if( sqlite3AuthCheck(pParse, SQLITE_DELETE, pTab->zName, 0, zDb) ){
+      goto exit_drop_table;
+    }
+  }
+#endif
+  if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 
+    && sqlite3StrNICmp(pTab->zName, "sqlite_stat", 11)!=0 ){
+    sqlite3ErrorMsg(pParse, "table %s may not be dropped", pTab->zName);
+    goto exit_drop_table;
+  }
+
+#ifndef SQLITE_OMIT_VIEW
+  /* Ensure DROP TABLE is not used on a view, and DROP VIEW is not used
+  ** on a table.
+  */
+  if( isView && pTab->pSelect==0 ){
+    sqlite3ErrorMsg(pParse, "use DROP TABLE to delete table %s", pTab->zName);
+    goto exit_drop_table;
+  }
+  if( !isView && pTab->pSelect ){
+    sqlite3ErrorMsg(pParse, "use DROP VIEW to delete view %s", pTab->zName);
+    goto exit_drop_table;
+  }
+#endif
+
+  /* Generate code to remove the table from the master table
+  ** on disk.
+  */
+  v = sqlite3GetVdbe(pParse);
+  if( v ){
+    sqlite3BeginWriteOperation(pParse, 1, iDb);
+    sqlite3ClearStatTables(pParse, iDb, "tbl", pTab->zName);
+    sqlite3FkDropTable(pParse, pName, pTab);
+    sqlite3CodeDropTable(pParse, pTab, iDb, isView);
+  }
+
+exit_drop_table:
+  sqlite3SrcListDelete(db, pName);
+}
+
+/*
+** This routine is called to create a new foreign key on the table
+** currently under construction.  pFromCol determines which columns
+** in the current table point to the foreign key.  If pFromCol==0 then
+** connect the key to the last column inserted.  pTo is the name of
+** the table referred to (a.k.a the "parent" table).  pToCol is a list
+** of tables in the parent pTo table.  flags contains all
+** information about the conflict resolution algorithms specified
+** in the ON DELETE, ON UPDATE and ON INSERT clauses.
+**
+** An FKey structure is created and added to the table currently
+** under construction in the pParse->pNewTable field.
+**
+** The foreign key is set for IMMEDIATE processing.  A subsequent call
+** to sqlite3DeferForeignKey() might change this to DEFERRED.
+*/
+SQLITE_PRIVATE void sqlite3CreateForeignKey(
+  Parse *pParse,       /* Parsing context */
+  ExprList *pFromCol,  /* Columns in this table that point to other table */
+  Token *pTo,          /* Name of the other table */
+  ExprList *pToCol,    /* Columns in the other table */
+  int flags            /* Conflict resolution algorithms. */
+){
+  sqlite3 *db = pParse->db;
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+  FKey *pFKey = 0;
+  FKey *pNextTo;
+  Table *p = pParse->pNewTable;
+  int nByte;
+  int i;
+  int nCol;
+  char *z;
+
+  assert( pTo!=0 );
+  if( p==0 || IN_DECLARE_VTAB ) goto fk_end;
+  if( pFromCol==0 ){
+    int iCol = p->nCol-1;
+    if( NEVER(iCol<0) ) goto fk_end;
+    if( pToCol && pToCol->nExpr!=1 ){
+      sqlite3ErrorMsg(pParse, "foreign key on %s"
+         " should reference only one column of table %T",
+         p->aCol[iCol].zName, pTo);
+      goto fk_end;
+    }
+    nCol = 1;
+  }else if( pToCol && pToCol->nExpr!=pFromCol->nExpr ){
+    sqlite3ErrorMsg(pParse,
+        "number of columns in foreign key does not match the number of "
+        "columns in the referenced table");
+    goto fk_end;
+  }else{
+    nCol = pFromCol->nExpr;
+  }
+  nByte = sizeof(*pFKey) + (nCol-1)*sizeof(pFKey->aCol[0]) + pTo->n + 1;
+  if( pToCol ){
+    for(i=0; i<pToCol->nExpr; i++){
+      nByte += sqlite3Strlen30(pToCol->a[i].zName) + 1;
+    }
+  }
+  pFKey = sqlite3DbMallocZero(db, nByte );
+  if( pFKey==0 ){
+    goto fk_end;
+  }
+  pFKey->pFrom = p;
+  pFKey->pNextFrom = p->pFKey;
+  z = (char*)&pFKey->aCol[nCol];
+  pFKey->zTo = z;
+  memcpy(z, pTo->z, pTo->n);
+  z[pTo->n] = 0;
+  sqlite3Dequote(z);
+  z += pTo->n+1;
+  pFKey->nCol = nCol;
+  if( pFromCol==0 ){
+    pFKey->aCol[0].iFrom = p->nCol-1;
+  }else{
+    for(i=0; i<nCol; i++){
+      int j;
+      for(j=0; j<p->nCol; j++){
+        if( sqlite3StrICmp(p->aCol[j].zName, pFromCol->a[i].zName)==0 ){
+          pFKey->aCol[i].iFrom = j;
+          break;
+        }
+      }
+      if( j>=p->nCol ){
+        sqlite3ErrorMsg(pParse, 
+          "unknown column \"%s\" in foreign key definition", 
+          pFromCol->a[i].zName);
+        goto fk_end;
+      }
+    }
+  }
+  if( pToCol ){
+    for(i=0; i<nCol; i++){
+      int n = sqlite3Strlen30(pToCol->a[i].zName);
+      pFKey->aCol[i].zCol = z;
+      memcpy(z, pToCol->a[i].zName, n);
+      z[n] = 0;
+      z += n+1;
+    }
+  }
+  pFKey->isDeferred = 0;
+  pFKey->aAction[0] = (u8)(flags & 0xff);            /* ON DELETE action */
+  pFKey->aAction[1] = (u8)((flags >> 8 ) & 0xff);    /* ON UPDATE action */
+
+  assert( sqlite3SchemaMutexHeld(db, 0, p->pSchema) );
+  pNextTo = (FKey *)sqlite3HashInsert(&p->pSchema->fkeyHash, 
+      pFKey->zTo, (void *)pFKey
+  );
+  if( pNextTo==pFKey ){
+    db->mallocFailed = 1;
+    goto fk_end;
+  }
+  if( pNextTo ){
+    assert( pNextTo->pPrevTo==0 );
+    pFKey->pNextTo = pNextTo;
+    pNextTo->pPrevTo = pFKey;
+  }
+
+  /* Link the foreign key to the table as the last step.
+  */
+  p->pFKey = pFKey;
+  pFKey = 0;
+
+fk_end:
+  sqlite3DbFree(db, pFKey);
+#endif /* !defined(SQLITE_OMIT_FOREIGN_KEY) */
+  sqlite3ExprListDelete(db, pFromCol);
+  sqlite3ExprListDelete(db, pToCol);
+}
+
+/*
+** This routine is called when an INITIALLY IMMEDIATE or INITIALLY DEFERRED
+** clause is seen as part of a foreign key definition.  The isDeferred
+** parameter is 1 for INITIALLY DEFERRED and 0 for INITIALLY IMMEDIATE.
+** The behavior of the most recently created foreign key is adjusted
+** accordingly.
+*/
+SQLITE_PRIVATE void sqlite3DeferForeignKey(Parse *pParse, int isDeferred){
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+  Table *pTab;
+  FKey *pFKey;
+  if( (pTab = pParse->pNewTable)==0 || (pFKey = pTab->pFKey)==0 ) return;
+  assert( isDeferred==0 || isDeferred==1 ); /* EV: R-30323-21917 */
+  pFKey->isDeferred = (u8)isDeferred;
+#endif
+}
+
+/*
+** Generate code that will erase and refill index *pIdx.  This is
+** used to initialize a newly created index or to recompute the
+** content of an index in response to a REINDEX command.
+**
+** if memRootPage is not negative, it means that the index is newly
+** created.  The register specified by memRootPage contains the
+** root page number of the index.  If memRootPage is negative, then
+** the index already exists and must be cleared before being refilled and
+** the root page number of the index is taken from pIndex->tnum.
+*/
+static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
+  Table *pTab = pIndex->pTable;  /* The table that is indexed */
+  int iTab = pParse->nTab++;     /* Btree cursor used for pTab */
+  int iIdx = pParse->nTab++;     /* Btree cursor used for pIndex */
+  int iSorter;                   /* Cursor opened by OpenSorter (if in use) */
+  int addr1;                     /* Address of top of loop */
+  int addr2;                     /* Address to jump to for next iteration */
+  int tnum;                      /* Root page of index */
+  int iPartIdxLabel;             /* Jump to this label to skip a row */
+  Vdbe *v;                       /* Generate code into this virtual machine */
+  KeyInfo *pKey;                 /* KeyInfo for index */
+  int regRecord;                 /* Register holding assembled index record */
+  sqlite3 *db = pParse->db;      /* The database connection */
+  int iDb = sqlite3SchemaToIndex(db, pIndex->pSchema);
+
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  if( sqlite3AuthCheck(pParse, SQLITE_REINDEX, pIndex->zName, 0,
+      db->aDb[iDb].zName ) ){
+    return;
+  }
+#endif
+
+  /* Require a write-lock on the table to perform this operation */
+  sqlite3TableLock(pParse, iDb, pTab->tnum, 1, pTab->zName);
+
+  v = sqlite3GetVdbe(pParse);
+  if( v==0 ) return;
+  if( memRootPage>=0 ){
+    tnum = memRootPage;
+  }else{
+    tnum = pIndex->tnum;
+  }
+  pKey = sqlite3KeyInfoOfIndex(pParse, pIndex);
+
+  /* Open the sorter cursor if we are to use one. */
+  iSorter = pParse->nTab++;
+  sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, pIndex->nKeyCol, (char*)
+                    sqlite3KeyInfoRef(pKey), P4_KEYINFO);
+
+  /* Open the table. Loop through all rows of the table, inserting index
+  ** records into the sorter. */
+  sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
+  addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); VdbeCoverage(v);
+  regRecord = sqlite3GetTempReg(pParse);
+
+  sqlite3GenerateIndexKey(pParse,pIndex,iTab,regRecord,0,&iPartIdxLabel,0,0);
+  sqlite3VdbeAddOp2(v, OP_SorterInsert, iSorter, regRecord);
+  sqlite3ResolvePartIdxLabel(pParse, iPartIdxLabel);
+  sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1); VdbeCoverage(v);
+  sqlite3VdbeJumpHere(v, addr1);
+  if( memRootPage<0 ) sqlite3VdbeAddOp2(v, OP_Clear, tnum, iDb);
+  sqlite3VdbeAddOp4(v, OP_OpenWrite, iIdx, tnum, iDb, 
+                    (char *)pKey, P4_KEYINFO);
+  sqlite3VdbeChangeP5(v, OPFLAG_BULKCSR|((memRootPage>=0)?OPFLAG_P2ISREG:0));
+
+  addr1 = sqlite3VdbeAddOp2(v, OP_SorterSort, iSorter, 0); VdbeCoverage(v);
+  assert( pKey!=0 || db->mallocFailed || pParse->nErr );
+  if( IsUniqueIndex(pIndex) && pKey!=0 ){
+    int j2 = sqlite3VdbeCurrentAddr(v) + 3;
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, j2);
+    addr2 = sqlite3VdbeCurrentAddr(v);
+    sqlite3VdbeAddOp4Int(v, OP_SorterCompare, iSorter, j2, regRecord,
+                         pIndex->nKeyCol); VdbeCoverage(v);
+    sqlite3UniqueConstraint(pParse, OE_Abort, pIndex);
+  }else{
+    addr2 = sqlite3VdbeCurrentAddr(v);
+  }
+  sqlite3VdbeAddOp3(v, OP_SorterData, iSorter, regRecord, iIdx);
+  sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 1);
+  sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
+  sqlite3ReleaseTempReg(pParse, regRecord);
+  sqlite3VdbeAddOp2(v, OP_SorterNext, iSorter, addr2); VdbeCoverage(v);
+  sqlite3VdbeJumpHere(v, addr1);
+
+  sqlite3VdbeAddOp1(v, OP_Close, iTab);
+  sqlite3VdbeAddOp1(v, OP_Close, iIdx);
+  sqlite3VdbeAddOp1(v, OP_Close, iSorter);
+}
+
+/*
+** Allocate heap space to hold an Index object with nCol columns.
+**
+** Increase the allocation size to provide an extra nExtra bytes
+** of 8-byte aligned space after the Index object and return a
+** pointer to this extra space in *ppExtra.
+*/
+SQLITE_PRIVATE Index *sqlite3AllocateIndexObject(
+  sqlite3 *db,         /* Database connection */
+  i16 nCol,            /* Total number of columns in the index */
+  int nExtra,          /* Number of bytes of extra space to alloc */
+  char **ppExtra       /* Pointer to the "extra" space */
+){
+  Index *p;            /* Allocated index object */
+  int nByte;           /* Bytes of space for Index object + arrays */
+
+  nByte = ROUND8(sizeof(Index)) +              /* Index structure  */
+          ROUND8(sizeof(char*)*nCol) +         /* Index.azColl     */
+          ROUND8(sizeof(LogEst)*(nCol+1) +     /* Index.aiRowLogEst   */
+                 sizeof(i16)*nCol +            /* Index.aiColumn   */
+                 sizeof(u8)*nCol);             /* Index.aSortOrder */
+  p = sqlite3DbMallocZero(db, nByte + nExtra);
+  if( p ){
+    char *pExtra = ((char*)p)+ROUND8(sizeof(Index));
+    p->azColl = (char**)pExtra;       pExtra += ROUND8(sizeof(char*)*nCol);
+    p->aiRowLogEst = (LogEst*)pExtra; pExtra += sizeof(LogEst)*(nCol+1);
+    p->aiColumn = (i16*)pExtra;       pExtra += sizeof(i16)*nCol;
+    p->aSortOrder = (u8*)pExtra;
+    p->nColumn = nCol;
+    p->nKeyCol = nCol - 1;
+    *ppExtra = ((char*)p) + nByte;
+  }
+  return p;
+}
+
+/*
+** Create a new index for an SQL table.  pName1.pName2 is the name of the index 
+** and pTblList is the name of the table that is to be indexed.  Both will 
+** be NULL for a primary key or an index that is created to satisfy a
+** UNIQUE constraint.  If pTable and pIndex are NULL, use pParse->pNewTable
+** as the table to be indexed.  pParse->pNewTable is a table that is
+** currently being constructed by a CREATE TABLE statement.
+**
+** pList is a list of columns to be indexed.  pList will be NULL if this
+** is a primary key or unique-constraint on the most recent column added
+** to the table currently under construction.  
+**
+** If the index is created successfully, return a pointer to the new Index
+** structure. This is used by sqlite3AddPrimaryKey() to mark the index
+** as the tables primary key (Index.idxType==SQLITE_IDXTYPE_PRIMARYKEY)
+*/
+SQLITE_PRIVATE Index *sqlite3CreateIndex(
+  Parse *pParse,     /* All information about this parse */
+  Token *pName1,     /* First part of index name. May be NULL */
+  Token *pName2,     /* Second part of index name. May be NULL */
+  SrcList *pTblName, /* Table to index. Use pParse->pNewTable if 0 */
+  ExprList *pList,   /* A list of columns to be indexed */
+  int onError,       /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
+  Token *pStart,     /* The CREATE token that begins this statement */
+  Expr *pPIWhere,    /* WHERE clause for partial indices */
+  int sortOrder,     /* Sort order of primary key when pList==NULL */
+  int ifNotExist     /* Omit error if index already exists */
+){
+  Index *pRet = 0;     /* Pointer to return */
+  Table *pTab = 0;     /* Table to be indexed */
+  Index *pIndex = 0;   /* The index to be created */
+  char *zName = 0;     /* Name of the index */
+  int nName;           /* Number of characters in zName */
+  int i, j;
+  DbFixer sFix;        /* For assigning database names to pTable */
+  int sortOrderMask;   /* 1 to honor DESC in index.  0 to ignore. */
+  sqlite3 *db = pParse->db;
+  Db *pDb;             /* The specific table containing the indexed database */
+  int iDb;             /* Index of the database that is being written */
+  Token *pName = 0;    /* Unqualified name of the index to create */
+  struct ExprList_item *pListItem; /* For looping over pList */
+  const Column *pTabCol;           /* A column in the table */
+  int nExtra = 0;                  /* Space allocated for zExtra[] */
+  int nExtraCol;                   /* Number of extra columns needed */
+  char *zExtra = 0;                /* Extra space after the Index object */
+  Index *pPk = 0;      /* PRIMARY KEY index for WITHOUT ROWID tables */
+
+  assert( pParse->nErr==0 );      /* Never called with prior errors */
+  if( db->mallocFailed || IN_DECLARE_VTAB ){
+    goto exit_create_index;
+  }
+  if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){
+    goto exit_create_index;
+  }
+
+  /*
+  ** Find the table that is to be indexed.  Return early if not found.
+  */
+  if( pTblName!=0 ){
+
+    /* Use the two-part index name to determine the database 
+    ** to search for the table. 'Fix' the table name to this db
+    ** before looking up the table.
+    */
+    assert( pName1 && pName2 );
+    iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pName);
+    if( iDb<0 ) goto exit_create_index;
+    assert( pName && pName->z );
+
+#ifndef SQLITE_OMIT_TEMPDB
+    /* If the index name was unqualified, check if the table
+    ** is a temp table. If so, set the database to 1. Do not do this
+    ** if initialising a database schema.
+    */
+    if( !db->init.busy ){
+      pTab = sqlite3SrcListLookup(pParse, pTblName);
+      if( pName2->n==0 && pTab && pTab->pSchema==db->aDb[1].pSchema ){
+        iDb = 1;
+      }
+    }
+#endif
+
+    sqlite3FixInit(&sFix, pParse, iDb, "index", pName);
+    if( sqlite3FixSrcList(&sFix, pTblName) ){
+      /* Because the parser constructs pTblName from a single identifier,
+      ** sqlite3FixSrcList can never fail. */
+      assert(0);
+    }
+    pTab = sqlite3LocateTableItem(pParse, 0, &pTblName->a[0]);
+    assert( db->mallocFailed==0 || pTab==0 );
+    if( pTab==0 ) goto exit_create_index;
+    if( iDb==1 && db->aDb[iDb].pSchema!=pTab->pSchema ){
+      sqlite3ErrorMsg(pParse, 
+           "cannot create a TEMP index on non-TEMP table \"%s\"",
+           pTab->zName);
+      goto exit_create_index;
+    }
+    if( !HasRowid(pTab) ) pPk = sqlite3PrimaryKeyIndex(pTab);
+  }else{
+    assert( pName==0 );
+    assert( pStart==0 );
+    pTab = pParse->pNewTable;
+    if( !pTab ) goto exit_create_index;
+    iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+  }
+  pDb = &db->aDb[iDb];
+
+  assert( pTab!=0 );
+  assert( pParse->nErr==0 );
+  if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 
+       && db->init.busy==0
+#if SQLITE_USER_AUTHENTICATION
+       && sqlite3UserAuthTable(pTab->zName)==0
+#endif
+       && sqlite3StrNICmp(&pTab->zName[7],"altertab_",9)!=0 ){
+    sqlite3ErrorMsg(pParse, "table %s may not be indexed", pTab->zName);
+    goto exit_create_index;
+  }
+#ifndef SQLITE_OMIT_VIEW
+  if( pTab->pSelect ){
+    sqlite3ErrorMsg(pParse, "views may not be indexed");
+    goto exit_create_index;
+  }
+#endif
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( IsVirtual(pTab) ){
+    sqlite3ErrorMsg(pParse, "virtual tables may not be indexed");
+    goto exit_create_index;
+  }
+#endif
+
+  /*
+  ** Find the name of the index.  Make sure there is not already another
+  ** index or table with the same name.  
+  **
+  ** Exception:  If we are reading the names of permanent indices from the
+  ** sqlite_master table (because some other process changed the schema) and
+  ** one of the index names collides with the name of a temporary table or
+  ** index, then we will continue to process this index.
+  **
+  ** If pName==0 it means that we are
+  ** dealing with a primary key or UNIQUE constraint.  We have to invent our
+  ** own name.
+  */
+  if( pName ){
+    zName = sqlite3NameFromToken(db, pName);
+    if( zName==0 ) goto exit_create_index;
+    assert( pName->z!=0 );
+    if( SQLITE_OK!=sqlite3CheckObjectName(pParse, zName) ){
+      goto exit_create_index;
+    }
+    if( !db->init.busy ){
+      if( sqlite3FindTable(db, zName, 0)!=0 ){
+        sqlite3ErrorMsg(pParse, "there is already a table named %s", zName);
+        goto exit_create_index;
+      }
+    }
+    if( sqlite3FindIndex(db, zName, pDb->zName)!=0 ){
+      if( !ifNotExist ){
+        sqlite3ErrorMsg(pParse, "index %s already exists", zName);
+      }else{
+        assert( !db->init.busy );
+        sqlite3CodeVerifySchema(pParse, iDb);
+      }
+      goto exit_create_index;
+    }
+  }else{
+    int n;
+    Index *pLoop;
+    for(pLoop=pTab->pIndex, n=1; pLoop; pLoop=pLoop->pNext, n++){}
+    zName = sqlite3MPrintf(db, "sqlite_autoindex_%s_%d", pTab->zName, n);
+    if( zName==0 ){
+      goto exit_create_index;
+    }
+  }
+
+  /* Check for authorization to create an index.
+  */
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  {
+    const char *zDb = pDb->zName;
+    if( sqlite3AuthCheck(pParse, SQLITE_INSERT, SCHEMA_TABLE(iDb), 0, zDb) ){
+      goto exit_create_index;
+    }
+    i = SQLITE_CREATE_INDEX;
+    if( !OMIT_TEMPDB && iDb==1 ) i = SQLITE_CREATE_TEMP_INDEX;
+    if( sqlite3AuthCheck(pParse, i, zName, pTab->zName, zDb) ){
+      goto exit_create_index;
+    }
+  }
+#endif
+
+  /* If pList==0, it means this routine was called to make a primary
+  ** key out of the last column added to the table under construction.
+  ** So create a fake list to simulate this.
+  */
+  if( pList==0 ){
+    pList = sqlite3ExprListAppend(pParse, 0, 0);
+    if( pList==0 ) goto exit_create_index;
+    pList->a[0].zName = sqlite3DbStrDup(pParse->db,
+                                        pTab->aCol[pTab->nCol-1].zName);
+    pList->a[0].sortOrder = (u8)sortOrder;
+  }
+
+  /* Figure out how many bytes of space are required to store explicitly
+  ** specified collation sequence names.
+  */
+  for(i=0; i<pList->nExpr; i++){
+    Expr *pExpr = pList->a[i].pExpr;
+    if( pExpr ){
+      assert( pExpr->op==TK_COLLATE );
+      nExtra += (1 + sqlite3Strlen30(pExpr->u.zToken));
+    }
+  }
+
+  /* 
+  ** Allocate the index structure. 
+  */
+  nName = sqlite3Strlen30(zName);
+  nExtraCol = pPk ? pPk->nKeyCol : 1;
+  pIndex = sqlite3AllocateIndexObject(db, pList->nExpr + nExtraCol,
+                                      nName + nExtra + 1, &zExtra);
+  if( db->mallocFailed ){
+    goto exit_create_index;
+  }
+  assert( EIGHT_BYTE_ALIGNMENT(pIndex->aiRowLogEst) );
+  assert( EIGHT_BYTE_ALIGNMENT(pIndex->azColl) );
+  pIndex->zName = zExtra;
+  zExtra += nName + 1;
+  memcpy(pIndex->zName, zName, nName+1);
+  pIndex->pTable = pTab;
+  pIndex->onError = (u8)onError;
+  pIndex->uniqNotNull = onError!=OE_None;
+  pIndex->idxType = pName ? SQLITE_IDXTYPE_APPDEF : SQLITE_IDXTYPE_UNIQUE;
+  pIndex->pSchema = db->aDb[iDb].pSchema;
+  pIndex->nKeyCol = pList->nExpr;
+  if( pPIWhere ){
+    sqlite3ResolveSelfReference(pParse, pTab, NC_PartIdx, pPIWhere, 0);
+    pIndex->pPartIdxWhere = pPIWhere;
+    pPIWhere = 0;
+  }
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+
+  /* Check to see if we should honor DESC requests on index columns
+  */
+  if( pDb->pSchema->file_format>=4 ){
+    sortOrderMask = -1;   /* Honor DESC */
+  }else{
+    sortOrderMask = 0;    /* Ignore DESC */
+  }
+
+  /* Scan the names of the columns of the table to be indexed and
+  ** load the column indices into the Index structure.  Report an error
+  ** if any column is not found.
+  **
+  ** TODO:  Add a test to make sure that the same column is not named
+  ** more than once within the same index.  Only the first instance of
+  ** the column will ever be used by the optimizer.  Note that using the
+  ** same column more than once cannot be an error because that would 
+  ** break backwards compatibility - it needs to be a warning.
+  */
+  for(i=0, pListItem=pList->a; i<pList->nExpr; i++, pListItem++){
+    const char *zColName = pListItem->zName;
+    int requestedSortOrder;
+    char *zColl;                   /* Collation sequence name */
+
+    for(j=0, pTabCol=pTab->aCol; j<pTab->nCol; j++, pTabCol++){
+      if( sqlite3StrICmp(zColName, pTabCol->zName)==0 ) break;
+    }
+    if( j>=pTab->nCol ){
+      sqlite3ErrorMsg(pParse, "table %s has no column named %s",
+        pTab->zName, zColName);
+      pParse->checkSchema = 1;
+      goto exit_create_index;
+    }
+    assert( j<=0x7fff );
+    pIndex->aiColumn[i] = (i16)j;
+    if( pListItem->pExpr ){
+      int nColl;
+      assert( pListItem->pExpr->op==TK_COLLATE );
+      zColl = pListItem->pExpr->u.zToken;
+      nColl = sqlite3Strlen30(zColl) + 1;
+      assert( nExtra>=nColl );
+      memcpy(zExtra, zColl, nColl);
+      zColl = zExtra;
+      zExtra += nColl;
+      nExtra -= nColl;
+    }else{
+      zColl = pTab->aCol[j].zColl;
+      if( !zColl ) zColl = "BINARY";
+    }
+    if( !db->init.busy && !sqlite3LocateCollSeq(pParse, zColl) ){
+      goto exit_create_index;
+    }
+    pIndex->azColl[i] = zColl;
+    requestedSortOrder = pListItem->sortOrder & sortOrderMask;
+    pIndex->aSortOrder[i] = (u8)requestedSortOrder;
+    if( pTab->aCol[j].notNull==0 ) pIndex->uniqNotNull = 0;
+  }
+  if( pPk ){
+    for(j=0; j<pPk->nKeyCol; j++){
+      int x = pPk->aiColumn[j];
+      if( hasColumn(pIndex->aiColumn, pIndex->nKeyCol, x) ){
+        pIndex->nColumn--; 
+      }else{
+        pIndex->aiColumn[i] = x;
+        pIndex->azColl[i] = pPk->azColl[j];
+        pIndex->aSortOrder[i] = pPk->aSortOrder[j];
+        i++;
+      }
+    }
+    assert( i==pIndex->nColumn );
+  }else{
+    pIndex->aiColumn[i] = -1;
+    pIndex->azColl[i] = "BINARY";
+  }
+  sqlite3DefaultRowEst(pIndex);
+  if( pParse->pNewTable==0 ) estimateIndexWidth(pIndex);
+
+  if( pTab==pParse->pNewTable ){
+    /* This routine has been called to create an automatic index as a
+    ** result of a PRIMARY KEY or UNIQUE clause on a column definition, or
+    ** a PRIMARY KEY or UNIQUE clause following the column definitions.
+    ** i.e. one of:
+    **
+    ** CREATE TABLE t(x PRIMARY KEY, y);
+    ** CREATE TABLE t(x, y, UNIQUE(x, y));
+    **
+    ** Either way, check to see if the table already has such an index. If
+    ** so, don't bother creating this one. This only applies to
+    ** automatically created indices. Users can do as they wish with
+    ** explicit indices.
+    **
+    ** Two UNIQUE or PRIMARY KEY constraints are considered equivalent
+    ** (and thus suppressing the second one) even if they have different
+    ** sort orders.
+    **
+    ** If there are different collating sequences or if the columns of
+    ** the constraint occur in different orders, then the constraints are
+    ** considered distinct and both result in separate indices.
+    */
+    Index *pIdx;
+    for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+      int k;
+      assert( IsUniqueIndex(pIdx) );
+      assert( pIdx->idxType!=SQLITE_IDXTYPE_APPDEF );
+      assert( IsUniqueIndex(pIndex) );
+
+      if( pIdx->nKeyCol!=pIndex->nKeyCol ) continue;
+      for(k=0; k<pIdx->nKeyCol; k++){
+        const char *z1;
+        const char *z2;
+        if( pIdx->aiColumn[k]!=pIndex->aiColumn[k] ) break;
+        z1 = pIdx->azColl[k];
+        z2 = pIndex->azColl[k];
+        if( z1!=z2 && sqlite3StrICmp(z1, z2) ) break;
+      }
+      if( k==pIdx->nKeyCol ){
+        if( pIdx->onError!=pIndex->onError ){
+          /* This constraint creates the same index as a previous
+          ** constraint specified somewhere in the CREATE TABLE statement.
+          ** However the ON CONFLICT clauses are different. If both this 
+          ** constraint and the previous equivalent constraint have explicit
+          ** ON CONFLICT clauses this is an error. Otherwise, use the
+          ** explicitly specified behavior for the index.
+          */
+          if( !(pIdx->onError==OE_Default || pIndex->onError==OE_Default) ){
+            sqlite3ErrorMsg(pParse, 
+                "conflicting ON CONFLICT clauses specified", 0);
+          }
+          if( pIdx->onError==OE_Default ){
+            pIdx->onError = pIndex->onError;
+          }
+        }
+        goto exit_create_index;
+      }
+    }
+  }
+
+  /* Link the new Index structure to its table and to the other
+  ** in-memory database structures. 
+  */
+  if( db->init.busy ){
+    Index *p;
+    assert( sqlite3SchemaMutexHeld(db, 0, pIndex->pSchema) );
+    p = sqlite3HashInsert(&pIndex->pSchema->idxHash, 
+                          pIndex->zName, pIndex);
+    if( p ){
+      assert( p==pIndex );  /* Malloc must have failed */
+      db->mallocFailed = 1;
+      goto exit_create_index;
+    }
+    db->flags |= SQLITE_InternChanges;
+    if( pTblName!=0 ){
+      pIndex->tnum = db->init.newTnum;
+    }
+  }
+
+  /* If this is the initial CREATE INDEX statement (or CREATE TABLE if the
+  ** index is an implied index for a UNIQUE or PRIMARY KEY constraint) then
+  ** emit code to allocate the index rootpage on disk and make an entry for
+  ** the index in the sqlite_master table and populate the index with
+  ** content.  But, do not do this if we are simply reading the sqlite_master
+  ** table to parse the schema, or if this index is the PRIMARY KEY index
+  ** of a WITHOUT ROWID table.
+  **
+  ** If pTblName==0 it means this index is generated as an implied PRIMARY KEY
+  ** or UNIQUE index in a CREATE TABLE statement.  Since the table
+  ** has just been created, it contains no data and the index initialization
+  ** step can be skipped.
+  */
+  else if( pParse->nErr==0 && (HasRowid(pTab) || pTblName!=0) ){
+    Vdbe *v;
+    char *zStmt;
+    int iMem = ++pParse->nMem;
+
+    v = sqlite3GetVdbe(pParse);
+    if( v==0 ) goto exit_create_index;
+
+
+    /* Create the rootpage for the index
+    */
+    sqlite3BeginWriteOperation(pParse, 1, iDb);
+    sqlite3VdbeAddOp2(v, OP_CreateIndex, iDb, iMem);
+
+    /* Gather the complete text of the CREATE INDEX statement into
+    ** the zStmt variable
+    */
+    if( pStart ){
+      int n = (int)(pParse->sLastToken.z - pName->z) + pParse->sLastToken.n;
+      if( pName->z[n-1]==';' ) n--;
+      /* A named index with an explicit CREATE INDEX statement */
+      zStmt = sqlite3MPrintf(db, "CREATE%s INDEX %.*s",
+        onError==OE_None ? "" : " UNIQUE", n, pName->z);
+    }else{
+      /* An automatic index created by a PRIMARY KEY or UNIQUE constraint */
+      /* zStmt = sqlite3MPrintf(""); */
+      zStmt = 0;
+    }
+
+    /* Add an entry in sqlite_master for this index
+    */
+    sqlite3NestedParse(pParse, 
+        "INSERT INTO %Q.%s VALUES('index',%Q,%Q,#%d,%Q);",
+        db->aDb[iDb].zName, SCHEMA_TABLE(iDb),
+        pIndex->zName,
+        pTab->zName,
+        iMem,
+        zStmt
+    );
+    sqlite3DbFree(db, zStmt);
+
+    /* Fill the index with data and reparse the schema. Code an OP_Expire
+    ** to invalidate all pre-compiled statements.
+    */
+    if( pTblName ){
+      sqlite3RefillIndex(pParse, pIndex, iMem);
+      sqlite3ChangeCookie(pParse, iDb);
+      sqlite3VdbeAddParseSchemaOp(v, iDb,
+         sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName));
+      sqlite3VdbeAddOp1(v, OP_Expire, 0);
+    }
+  }
+
+  /* When adding an index to the list of indices for a table, make
+  ** sure all indices labeled OE_Replace come after all those labeled
+  ** OE_Ignore.  This is necessary for the correct constraint check
+  ** processing (in sqlite3GenerateConstraintChecks()) as part of
+  ** UPDATE and INSERT statements.  
+  */
+  if( db->init.busy || pTblName==0 ){
+    if( onError!=OE_Replace || pTab->pIndex==0
+         || pTab->pIndex->onError==OE_Replace){
+      pIndex->pNext = pTab->pIndex;
+      pTab->pIndex = pIndex;
+    }else{
+      Index *pOther = pTab->pIndex;
+      while( pOther->pNext && pOther->pNext->onError!=OE_Replace ){
+        pOther = pOther->pNext;
+      }
+      pIndex->pNext = pOther->pNext;
+      pOther->pNext = pIndex;
+    }
+    pRet = pIndex;
+    pIndex = 0;
+  }
+
+  /* Clean up before exiting */
+exit_create_index:
+  if( pIndex ) freeIndex(db, pIndex);
+  sqlite3ExprDelete(db, pPIWhere);
+  sqlite3ExprListDelete(db, pList);
+  sqlite3SrcListDelete(db, pTblName);
+  sqlite3DbFree(db, zName);
+  return pRet;
+}
+
+/*
+** Fill the Index.aiRowEst[] array with default information - information
+** to be used when we have not run the ANALYZE command.
+**
+** aiRowEst[0] is supposed to contain the number of elements in the index.
+** Since we do not know, guess 1 million.  aiRowEst[1] is an estimate of the
+** number of rows in the table that match any particular value of the
+** first column of the index.  aiRowEst[2] is an estimate of the number
+** of rows that match any particular combination of the first 2 columns
+** of the index.  And so forth.  It must always be the case that
+*
+**           aiRowEst[N]<=aiRowEst[N-1]
+**           aiRowEst[N]>=1
+**
+** Apart from that, we have little to go on besides intuition as to
+** how aiRowEst[] should be initialized.  The numbers generated here
+** are based on typical values found in actual indices.
+*/
+SQLITE_PRIVATE void sqlite3DefaultRowEst(Index *pIdx){
+  /*                10,  9,  8,  7,  6 */
+  LogEst aVal[] = { 33, 32, 30, 28, 26 };
+  LogEst *a = pIdx->aiRowLogEst;
+  int nCopy = MIN(ArraySize(aVal), pIdx->nKeyCol);
+  int i;
+
+  /* Set the first entry (number of rows in the index) to the estimated 
+  ** number of rows in the table. Or 10, if the estimated number of rows 
+  ** in the table is less than that.  */
+  a[0] = pIdx->pTable->nRowLogEst;
+  if( a[0]<33 ) a[0] = 33;        assert( 33==sqlite3LogEst(10) );
+
+  /* Estimate that a[1] is 10, a[2] is 9, a[3] is 8, a[4] is 7, a[5] is
+  ** 6 and each subsequent value (if any) is 5.  */
+  memcpy(&a[1], aVal, nCopy*sizeof(LogEst));
+  for(i=nCopy+1; i<=pIdx->nKeyCol; i++){
+    a[i] = 23;                    assert( 23==sqlite3LogEst(5) );
+  }
+
+  assert( 0==sqlite3LogEst(1) );
+  if( IsUniqueIndex(pIdx) ) a[pIdx->nKeyCol] = 0;
+}
+
+/*
+** This routine will drop an existing named index.  This routine
+** implements the DROP INDEX statement.
+*/
+SQLITE_PRIVATE void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists){
+  Index *pIndex;
+  Vdbe *v;
+  sqlite3 *db = pParse->db;
+  int iDb;
+
+  assert( pParse->nErr==0 );   /* Never called with prior errors */
+  if( db->mallocFailed ){
+    goto exit_drop_index;
+  }
+  assert( pName->nSrc==1 );
+  if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){
+    goto exit_drop_index;
+  }
+  pIndex = sqlite3FindIndex(db, pName->a[0].zName, pName->a[0].zDatabase);
+  if( pIndex==0 ){
+    if( !ifExists ){
+      sqlite3ErrorMsg(pParse, "no such index: %S", pName, 0);
+    }else{
+      sqlite3CodeVerifyNamedSchema(pParse, pName->a[0].zDatabase);
+    }
+    pParse->checkSchema = 1;
+    goto exit_drop_index;
+  }
+  if( pIndex->idxType!=SQLITE_IDXTYPE_APPDEF ){
+    sqlite3ErrorMsg(pParse, "index associated with UNIQUE "
+      "or PRIMARY KEY constraint cannot be dropped", 0);
+    goto exit_drop_index;
+  }
+  iDb = sqlite3SchemaToIndex(db, pIndex->pSchema);
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  {
+    int code = SQLITE_DROP_INDEX;
+    Table *pTab = pIndex->pTable;
+    const char *zDb = db->aDb[iDb].zName;
+    const char *zTab = SCHEMA_TABLE(iDb);
+    if( sqlite3AuthCheck(pParse, SQLITE_DELETE, zTab, 0, zDb) ){
+      goto exit_drop_index;
+    }
+    if( !OMIT_TEMPDB && iDb ) code = SQLITE_DROP_TEMP_INDEX;
+    if( sqlite3AuthCheck(pParse, code, pIndex->zName, pTab->zName, zDb) ){
+      goto exit_drop_index;
+    }
+  }
+#endif
+
+  /* Generate code to remove the index and from the master table */
+  v = sqlite3GetVdbe(pParse);
+  if( v ){
+    sqlite3BeginWriteOperation(pParse, 1, iDb);
+    sqlite3NestedParse(pParse,
+       "DELETE FROM %Q.%s WHERE name=%Q AND type='index'",
+       db->aDb[iDb].zName, SCHEMA_TABLE(iDb), pIndex->zName
+    );
+    sqlite3ClearStatTables(pParse, iDb, "idx", pIndex->zName);
+    sqlite3ChangeCookie(pParse, iDb);
+    destroyRootPage(pParse, pIndex->tnum, iDb);
+    sqlite3VdbeAddOp4(v, OP_DropIndex, iDb, 0, 0, pIndex->zName, 0);
+  }
+
+exit_drop_index:
+  sqlite3SrcListDelete(db, pName);
+}
+
+/*
+** pArray is a pointer to an array of objects. Each object in the
+** array is szEntry bytes in size. This routine uses sqlite3DbRealloc()
+** to extend the array so that there is space for a new object at the end.
+**
+** When this function is called, *pnEntry contains the current size of
+** the array (in entries - so the allocation is ((*pnEntry) * szEntry) bytes
+** in total).
+**
+** If the realloc() is successful (i.e. if no OOM condition occurs), the
+** space allocated for the new object is zeroed, *pnEntry updated to
+** reflect the new size of the array and a pointer to the new allocation
+** returned. *pIdx is set to the index of the new array entry in this case.
+**
+** Otherwise, if the realloc() fails, *pIdx is set to -1, *pnEntry remains
+** unchanged and a copy of pArray returned.
+*/
+SQLITE_PRIVATE void *sqlite3ArrayAllocate(
+  sqlite3 *db,      /* Connection to notify of malloc failures */
+  void *pArray,     /* Array of objects.  Might be reallocated */
+  int szEntry,      /* Size of each object in the array */
+  int *pnEntry,     /* Number of objects currently in use */
+  int *pIdx         /* Write the index of a new slot here */
+){
+  char *z;
+  int n = *pnEntry;
+  if( (n & (n-1))==0 ){
+    int sz = (n==0) ? 1 : 2*n;
+    void *pNew = sqlite3DbRealloc(db, pArray, sz*szEntry);
+    if( pNew==0 ){
+      *pIdx = -1;
+      return pArray;
+    }
+    pArray = pNew;
+  }
+  z = (char*)pArray;
+  memset(&z[n * szEntry], 0, szEntry);
+  *pIdx = n;
+  ++*pnEntry;
+  return pArray;
+}
+
+/*
+** Append a new element to the given IdList.  Create a new IdList if
+** need be.
+**
+** A new IdList is returned, or NULL if malloc() fails.
+*/
+SQLITE_PRIVATE IdList *sqlite3IdListAppend(sqlite3 *db, IdList *pList, Token *pToken){
+  int i;
+  if( pList==0 ){
+    pList = sqlite3DbMallocZero(db, sizeof(IdList) );
+    if( pList==0 ) return 0;
+  }
+  pList->a = sqlite3ArrayAllocate(
+      db,
+      pList->a,
+      sizeof(pList->a[0]),
+      &pList->nId,
+      &i
+  );
+  if( i<0 ){
+    sqlite3IdListDelete(db, pList);
+    return 0;
+  }
+  pList->a[i].zName = sqlite3NameFromToken(db, pToken);
+  return pList;
+}
+
+/*
+** Delete an IdList.
+*/
+SQLITE_PRIVATE void sqlite3IdListDelete(sqlite3 *db, IdList *pList){
+  int i;
+  if( pList==0 ) return;
+  for(i=0; i<pList->nId; i++){
+    sqlite3DbFree(db, pList->a[i].zName);
+  }
+  sqlite3DbFree(db, pList->a);
+  sqlite3DbFree(db, pList);
+}
+
+/*
+** Return the index in pList of the identifier named zId.  Return -1
+** if not found.
+*/
+SQLITE_PRIVATE int sqlite3IdListIndex(IdList *pList, const char *zName){
+  int i;
+  if( pList==0 ) return -1;
+  for(i=0; i<pList->nId; i++){
+    if( sqlite3StrICmp(pList->a[i].zName, zName)==0 ) return i;
+  }
+  return -1;
+}
+
+/*
+** Expand the space allocated for the given SrcList object by
+** creating nExtra new slots beginning at iStart.  iStart is zero based.
+** New slots are zeroed.
+**
+** For example, suppose a SrcList initially contains two entries: A,B.
+** To append 3 new entries onto the end, do this:
+**
+**    sqlite3SrcListEnlarge(db, pSrclist, 3, 2);
+**
+** After the call above it would contain:  A, B, nil, nil, nil.
+** If the iStart argument had been 1 instead of 2, then the result
+** would have been:  A, nil, nil, nil, B.  To prepend the new slots,
+** the iStart value would be 0.  The result then would
+** be: nil, nil, nil, A, B.
+**
+** If a memory allocation fails the SrcList is unchanged.  The
+** db->mallocFailed flag will be set to true.
+*/
+SQLITE_PRIVATE SrcList *sqlite3SrcListEnlarge(
+  sqlite3 *db,       /* Database connection to notify of OOM errors */
+  SrcList *pSrc,     /* The SrcList to be enlarged */
+  int nExtra,        /* Number of new slots to add to pSrc->a[] */
+  int iStart         /* Index in pSrc->a[] of first new slot */
+){
+  int i;
+
+  /* Sanity checking on calling parameters */
+  assert( iStart>=0 );
+  assert( nExtra>=1 );
+  assert( pSrc!=0 );
+  assert( iStart<=pSrc->nSrc );
+
+  /* Allocate additional space if needed */
+  if( (u32)pSrc->nSrc+nExtra>pSrc->nAlloc ){
+    SrcList *pNew;
+    int nAlloc = pSrc->nSrc+nExtra;
+    int nGot;
+    pNew = sqlite3DbRealloc(db, pSrc,
+               sizeof(*pSrc) + (nAlloc-1)*sizeof(pSrc->a[0]) );
+    if( pNew==0 ){
+      assert( db->mallocFailed );
+      return pSrc;
+    }
+    pSrc = pNew;
+    nGot = (sqlite3DbMallocSize(db, pNew) - sizeof(*pSrc))/sizeof(pSrc->a[0])+1;
+    pSrc->nAlloc = nGot;
+  }
+
+  /* Move existing slots that come after the newly inserted slots
+  ** out of the way */
+  for(i=pSrc->nSrc-1; i>=iStart; i--){
+    pSrc->a[i+nExtra] = pSrc->a[i];
+  }
+  pSrc->nSrc += nExtra;
+
+  /* Zero the newly allocated slots */
+  memset(&pSrc->a[iStart], 0, sizeof(pSrc->a[0])*nExtra);
+  for(i=iStart; i<iStart+nExtra; i++){
+    pSrc->a[i].iCursor = -1;
+  }
+
+  /* Return a pointer to the enlarged SrcList */
+  return pSrc;
+}
+
+
+/*
+** Append a new table name to the given SrcList.  Create a new SrcList if
+** need be.  A new entry is created in the SrcList even if pTable is NULL.
+**
+** A SrcList is returned, or NULL if there is an OOM error.  The returned
+** SrcList might be the same as the SrcList that was input or it might be
+** a new one.  If an OOM error does occurs, then the prior value of pList
+** that is input to this routine is automatically freed.
+**
+** If pDatabase is not null, it means that the table has an optional
+** database name prefix.  Like this:  "database.table".  The pDatabase
+** points to the table name and the pTable points to the database name.
+** The SrcList.a[].zName field is filled with the table name which might
+** come from pTable (if pDatabase is NULL) or from pDatabase.  
+** SrcList.a[].zDatabase is filled with the database name from pTable,
+** or with NULL if no database is specified.
+**
+** In other words, if call like this:
+**
+**         sqlite3SrcListAppend(D,A,B,0);
+**
+** Then B is a table name and the database name is unspecified.  If called
+** like this:
+**
+**         sqlite3SrcListAppend(D,A,B,C);
+**
+** Then C is the table name and B is the database name.  If C is defined
+** then so is B.  In other words, we never have a case where:
+**
+**         sqlite3SrcListAppend(D,A,0,C);
+**
+** Both pTable and pDatabase are assumed to be quoted.  They are dequoted
+** before being added to the SrcList.
+*/
+SQLITE_PRIVATE SrcList *sqlite3SrcListAppend(
+  sqlite3 *db,        /* Connection to notify of malloc failures */
+  SrcList *pList,     /* Append to this SrcList. NULL creates a new SrcList */
+  Token *pTable,      /* Table to append */
+  Token *pDatabase    /* Database of the table */
+){
+  struct SrcList_item *pItem;
+  assert( pDatabase==0 || pTable!=0 );  /* Cannot have C without B */
+  if( pList==0 ){
+    pList = sqlite3DbMallocZero(db, sizeof(SrcList) );
+    if( pList==0 ) return 0;
+    pList->nAlloc = 1;
+  }
+  pList = sqlite3SrcListEnlarge(db, pList, 1, pList->nSrc);
+  if( db->mallocFailed ){
+    sqlite3SrcListDelete(db, pList);
+    return 0;
+  }
+  pItem = &pList->a[pList->nSrc-1];
+  if( pDatabase && pDatabase->z==0 ){
+    pDatabase = 0;
+  }
+  if( pDatabase ){
+    Token *pTemp = pDatabase;
+    pDatabase = pTable;
+    pTable = pTemp;
+  }
+  pItem->zName = sqlite3NameFromToken(db, pTable);
+  pItem->zDatabase = sqlite3NameFromToken(db, pDatabase);
+  return pList;
+}
+
+/*
+** Assign VdbeCursor index numbers to all tables in a SrcList
+*/
+SQLITE_PRIVATE void sqlite3SrcListAssignCursors(Parse *pParse, SrcList *pList){
+  int i;
+  struct SrcList_item *pItem;
+  assert(pList || pParse->db->mallocFailed );
+  if( pList ){
+    for(i=0, pItem=pList->a; i<pList->nSrc; i++, pItem++){
+      if( pItem->iCursor>=0 ) break;
+      pItem->iCursor = pParse->nTab++;
+      if( pItem->pSelect ){
+        sqlite3SrcListAssignCursors(pParse, pItem->pSelect->pSrc);
+      }
+    }
+  }
+}
+
+/*
+** Delete an entire SrcList including all its substructure.
+*/
+SQLITE_PRIVATE void sqlite3SrcListDelete(sqlite3 *db, SrcList *pList){
+  int i;
+  struct SrcList_item *pItem;
+  if( pList==0 ) return;
+  for(pItem=pList->a, i=0; i<pList->nSrc; i++, pItem++){
+    sqlite3DbFree(db, pItem->zDatabase);
+    sqlite3DbFree(db, pItem->zName);
+    sqlite3DbFree(db, pItem->zAlias);
+    sqlite3DbFree(db, pItem->zIndex);
+    sqlite3DeleteTable(db, pItem->pTab);
+    sqlite3SelectDelete(db, pItem->pSelect);
+    sqlite3ExprDelete(db, pItem->pOn);
+    sqlite3IdListDelete(db, pItem->pUsing);
+  }
+  sqlite3DbFree(db, pList);
+}
+
+/*
+** This routine is called by the parser to add a new term to the
+** end of a growing FROM clause.  The "p" parameter is the part of
+** the FROM clause that has already been constructed.  "p" is NULL
+** if this is the first term of the FROM clause.  pTable and pDatabase
+** are the name of the table and database named in the FROM clause term.
+** pDatabase is NULL if the database name qualifier is missing - the
+** usual case.  If the term has an alias, then pAlias points to the
+** alias token.  If the term is a subquery, then pSubquery is the
+** SELECT statement that the subquery encodes.  The pTable and
+** pDatabase parameters are NULL for subqueries.  The pOn and pUsing
+** parameters are the content of the ON and USING clauses.
+**
+** Return a new SrcList which encodes is the FROM with the new
+** term added.
+*/
+SQLITE_PRIVATE SrcList *sqlite3SrcListAppendFromTerm(
+  Parse *pParse,          /* Parsing context */
+  SrcList *p,             /* The left part of the FROM clause already seen */
+  Token *pTable,          /* Name of the table to add to the FROM clause */
+  Token *pDatabase,       /* Name of the database containing pTable */
+  Token *pAlias,          /* The right-hand side of the AS subexpression */
+  Select *pSubquery,      /* A subquery used in place of a table name */
+  Expr *pOn,              /* The ON clause of a join */
+  IdList *pUsing          /* The USING clause of a join */
+){
+  struct SrcList_item *pItem;
+  sqlite3 *db = pParse->db;
+  if( !p && (pOn || pUsing) ){
+    sqlite3ErrorMsg(pParse, "a JOIN clause is required before %s", 
+      (pOn ? "ON" : "USING")
+    );
+    goto append_from_error;
+  }
+  p = sqlite3SrcListAppend(db, p, pTable, pDatabase);
+  if( p==0 || NEVER(p->nSrc==0) ){
+    goto append_from_error;
+  }
+  pItem = &p->a[p->nSrc-1];
+  assert( pAlias!=0 );
+  if( pAlias->n ){
+    pItem->zAlias = sqlite3NameFromToken(db, pAlias);
+  }
+  pItem->pSelect = pSubquery;
+  pItem->pOn = pOn;
+  pItem->pUsing = pUsing;
+  return p;
+
+ append_from_error:
+  assert( p==0 );
+  sqlite3ExprDelete(db, pOn);
+  sqlite3IdListDelete(db, pUsing);
+  sqlite3SelectDelete(db, pSubquery);
+  return 0;
+}
+
+/*
+** Add an INDEXED BY or NOT INDEXED clause to the most recently added 
+** element of the source-list passed as the second argument.
+*/
+SQLITE_PRIVATE void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pIndexedBy){
+  assert( pIndexedBy!=0 );
+  if( p && ALWAYS(p->nSrc>0) ){
+    struct SrcList_item *pItem = &p->a[p->nSrc-1];
+    assert( pItem->notIndexed==0 && pItem->zIndex==0 );
+    if( pIndexedBy->n==1 && !pIndexedBy->z ){
+      /* A "NOT INDEXED" clause was supplied. See parse.y 
+      ** construct "indexed_opt" for details. */
+      pItem->notIndexed = 1;
+    }else{
+      pItem->zIndex = sqlite3NameFromToken(pParse->db, pIndexedBy);
+    }
+  }
+}
+
+/*
+** When building up a FROM clause in the parser, the join operator
+** is initially attached to the left operand.  But the code generator
+** expects the join operator to be on the right operand.  This routine
+** Shifts all join operators from left to right for an entire FROM
+** clause.
+**
+** Example: Suppose the join is like this:
+**
+**           A natural cross join B
+**
+** The operator is "natural cross join".  The A and B operands are stored
+** in p->a[0] and p->a[1], respectively.  The parser initially stores the
+** operator with A.  This routine shifts that operator over to B.
+*/
+SQLITE_PRIVATE void sqlite3SrcListShiftJoinType(SrcList *p){
+  if( p ){
+    int i;
+    assert( p->a || p->nSrc==0 );
+    for(i=p->nSrc-1; i>0; i--){
+      p->a[i].jointype = p->a[i-1].jointype;
+    }
+    p->a[0].jointype = 0;
+  }
+}
+
+/*
+** Begin a transaction
+*/
+SQLITE_PRIVATE void sqlite3BeginTransaction(Parse *pParse, int type){
+  sqlite3 *db;
+  Vdbe *v;
+  int i;
+
+  assert( pParse!=0 );
+  db = pParse->db;
+  assert( db!=0 );
+/*  if( db->aDb[0].pBt==0 ) return; */
+  if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, "BEGIN", 0, 0) ){
+    return;
+  }
+  v = sqlite3GetVdbe(pParse);
+  if( !v ) return;
+  if( type!=TK_DEFERRED ){
+    for(i=0; i<db->nDb; i++){
+      sqlite3VdbeAddOp2(v, OP_Transaction, i, (type==TK_EXCLUSIVE)+1);
+      sqlite3VdbeUsesBtree(v, i);
+    }
+  }
+  sqlite3VdbeAddOp2(v, OP_AutoCommit, 0, 0);
+}
+
+/*
+** Commit a transaction
+*/
+SQLITE_PRIVATE void sqlite3CommitTransaction(Parse *pParse){
+  Vdbe *v;
+
+  assert( pParse!=0 );
+  assert( pParse->db!=0 );
+  if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, "COMMIT", 0, 0) ){
+    return;
+  }
+  v = sqlite3GetVdbe(pParse);
+  if( v ){
+    sqlite3VdbeAddOp2(v, OP_AutoCommit, 1, 0);
+  }
+}
+
+/*
+** Rollback a transaction
+*/
+SQLITE_PRIVATE void sqlite3RollbackTransaction(Parse *pParse){
+  Vdbe *v;
+
+  assert( pParse!=0 );
+  assert( pParse->db!=0 );
+  if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, "ROLLBACK", 0, 0) ){
+    return;
+  }
+  v = sqlite3GetVdbe(pParse);
+  if( v ){
+    sqlite3VdbeAddOp2(v, OP_AutoCommit, 1, 1);
+  }
+}
+
+/*
+** This function is called by the parser when it parses a command to create,
+** release or rollback an SQL savepoint. 
+*/
+SQLITE_PRIVATE void sqlite3Savepoint(Parse *pParse, int op, Token *pName){
+  char *zName = sqlite3NameFromToken(pParse->db, pName);
+  if( zName ){
+    Vdbe *v = sqlite3GetVdbe(pParse);
+#ifndef SQLITE_OMIT_AUTHORIZATION
+    static const char * const az[] = { "BEGIN", "RELEASE", "ROLLBACK" };
+    assert( !SAVEPOINT_BEGIN && SAVEPOINT_RELEASE==1 && SAVEPOINT_ROLLBACK==2 );
+#endif
+    if( !v || sqlite3AuthCheck(pParse, SQLITE_SAVEPOINT, az[op], zName, 0) ){
+      sqlite3DbFree(pParse->db, zName);
+      return;
+    }
+    sqlite3VdbeAddOp4(v, OP_Savepoint, op, 0, 0, zName, P4_DYNAMIC);
+  }
+}
+
+/*
+** Make sure the TEMP database is open and available for use.  Return
+** the number of errors.  Leave any error messages in the pParse structure.
+*/
+SQLITE_PRIVATE int sqlite3OpenTempDatabase(Parse *pParse){
+  sqlite3 *db = pParse->db;
+  if( db->aDb[1].pBt==0 && !pParse->explain ){
+    int rc;
+    Btree *pBt;
+    static const int flags = 
+          SQLITE_OPEN_READWRITE |
+          SQLITE_OPEN_CREATE |
+          SQLITE_OPEN_EXCLUSIVE |
+          SQLITE_OPEN_DELETEONCLOSE |
+          SQLITE_OPEN_TEMP_DB;
+
+    rc = sqlite3BtreeOpen(db->pVfs, 0, db, &pBt, 0, flags);
+    if( rc!=SQLITE_OK ){
+      sqlite3ErrorMsg(pParse, "unable to open a temporary database "
+        "file for storing temporary tables");
+      pParse->rc = rc;
+      return 1;
+    }
+    db->aDb[1].pBt = pBt;
+    assert( db->aDb[1].pSchema );
+    if( SQLITE_NOMEM==sqlite3BtreeSetPageSize(pBt, db->nextPagesize, -1, 0) ){
+      db->mallocFailed = 1;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** Record the fact that the schema cookie will need to be verified
+** for database iDb.  The code to actually verify the schema cookie
+** will occur at the end of the top-level VDBE and will be generated
+** later, by sqlite3FinishCoding().
+*/
+SQLITE_PRIVATE void sqlite3CodeVerifySchema(Parse *pParse, int iDb){
+  Parse *pToplevel = sqlite3ParseToplevel(pParse);
+  sqlite3 *db = pToplevel->db;
+
+  assert( iDb>=0 && iDb<db->nDb );
+  assert( db->aDb[iDb].pBt!=0 || iDb==1 );
+  assert( iDb<SQLITE_MAX_ATTACHED+2 );
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  if( DbMaskTest(pToplevel->cookieMask, iDb)==0 ){
+    DbMaskSet(pToplevel->cookieMask, iDb);
+    pToplevel->cookieValue[iDb] = db->aDb[iDb].pSchema->schema_cookie;
+    if( !OMIT_TEMPDB && iDb==1 ){
+      sqlite3OpenTempDatabase(pToplevel);
+    }
+  }
+}
+
+/*
+** If argument zDb is NULL, then call sqlite3CodeVerifySchema() for each 
+** attached database. Otherwise, invoke it for the database named zDb only.
+*/
+SQLITE_PRIVATE void sqlite3CodeVerifyNamedSchema(Parse *pParse, const char *zDb){
+  sqlite3 *db = pParse->db;
+  int i;
+  for(i=0; i<db->nDb; i++){
+    Db *pDb = &db->aDb[i];
+    if( pDb->pBt && (!zDb || 0==sqlite3StrICmp(zDb, pDb->zName)) ){
+      sqlite3CodeVerifySchema(pParse, i);
+    }
+  }
+}
+
+/*
+** Generate VDBE code that prepares for doing an operation that
+** might change the database.
+**
+** This routine starts a new transaction if we are not already within
+** a transaction.  If we are already within a transaction, then a checkpoint
+** is set if the setStatement parameter is true.  A checkpoint should
+** be set for operations that might fail (due to a constraint) part of
+** the way through and which will need to undo some writes without having to
+** rollback the whole transaction.  For operations where all constraints
+** can be checked before any changes are made to the database, it is never
+** necessary to undo a write and the checkpoint should not be set.
+*/
+SQLITE_PRIVATE void sqlite3BeginWriteOperation(Parse *pParse, int setStatement, int iDb){
+  Parse *pToplevel = sqlite3ParseToplevel(pParse);
+  sqlite3CodeVerifySchema(pParse, iDb);
+  DbMaskSet(pToplevel->writeMask, iDb);
+  pToplevel->isMultiWrite |= setStatement;
+}
+
+/*
+** Indicate that the statement currently under construction might write
+** more than one entry (example: deleting one row then inserting another,
+** inserting multiple rows in a table, or inserting a row and index entries.)
+** If an abort occurs after some of these writes have completed, then it will
+** be necessary to undo the completed writes.
+*/
+SQLITE_PRIVATE void sqlite3MultiWrite(Parse *pParse){
+  Parse *pToplevel = sqlite3ParseToplevel(pParse);
+  pToplevel->isMultiWrite = 1;
+}
+
+/* 
+** The code generator calls this routine if is discovers that it is
+** possible to abort a statement prior to completion.  In order to 
+** perform this abort without corrupting the database, we need to make
+** sure that the statement is protected by a statement transaction.
+**
+** Technically, we only need to set the mayAbort flag if the
+** isMultiWrite flag was previously set.  There is a time dependency
+** such that the abort must occur after the multiwrite.  This makes
+** some statements involving the REPLACE conflict resolution algorithm
+** go a little faster.  But taking advantage of this time dependency
+** makes it more difficult to prove that the code is correct (in 
+** particular, it prevents us from writing an effective
+** implementation of sqlite3AssertMayAbort()) and so we have chosen
+** to take the safe route and skip the optimization.
+*/
+SQLITE_PRIVATE void sqlite3MayAbort(Parse *pParse){
+  Parse *pToplevel = sqlite3ParseToplevel(pParse);
+  pToplevel->mayAbort = 1;
+}
+
+/*
+** Code an OP_Halt that causes the vdbe to return an SQLITE_CONSTRAINT
+** error. The onError parameter determines which (if any) of the statement
+** and/or current transaction is rolled back.
+*/
+SQLITE_PRIVATE void sqlite3HaltConstraint(
+  Parse *pParse,    /* Parsing context */
+  int errCode,      /* extended error code */
+  int onError,      /* Constraint type */
+  char *p4,         /* Error message */
+  i8 p4type,        /* P4_STATIC or P4_TRANSIENT */
+  u8 p5Errmsg       /* P5_ErrMsg type */
+){
+  Vdbe *v = sqlite3GetVdbe(pParse);
+  assert( (errCode&0xff)==SQLITE_CONSTRAINT );
+  if( onError==OE_Abort ){
+    sqlite3MayAbort(pParse);
+  }
+  sqlite3VdbeAddOp4(v, OP_Halt, errCode, onError, 0, p4, p4type);
+  if( p5Errmsg ) sqlite3VdbeChangeP5(v, p5Errmsg);
+}
+
+/*
+** Code an OP_Halt due to UNIQUE or PRIMARY KEY constraint violation.
+*/
+SQLITE_PRIVATE void sqlite3UniqueConstraint(
+  Parse *pParse,    /* Parsing context */
+  int onError,      /* Constraint type */
+  Index *pIdx       /* The index that triggers the constraint */
+){
+  char *zErr;
+  int j;
+  StrAccum errMsg;
+  Table *pTab = pIdx->pTable;
+
+  sqlite3StrAccumInit(&errMsg, 0, 0, 200);
+  errMsg.db = pParse->db;
+  for(j=0; j<pIdx->nKeyCol; j++){
+    char *zCol = pTab->aCol[pIdx->aiColumn[j]].zName;
+    if( j ) sqlite3StrAccumAppend(&errMsg, ", ", 2);
+    sqlite3StrAccumAppendAll(&errMsg, pTab->zName);
+    sqlite3StrAccumAppend(&errMsg, ".", 1);
+    sqlite3StrAccumAppendAll(&errMsg, zCol);
+  }
+  zErr = sqlite3StrAccumFinish(&errMsg);
+  sqlite3HaltConstraint(pParse, 
+    IsPrimaryKeyIndex(pIdx) ? SQLITE_CONSTRAINT_PRIMARYKEY 
+                            : SQLITE_CONSTRAINT_UNIQUE,
+    onError, zErr, P4_DYNAMIC, P5_ConstraintUnique);
+}
+
+
+/*
+** Code an OP_Halt due to non-unique rowid.
+*/
+SQLITE_PRIVATE void sqlite3RowidConstraint(
+  Parse *pParse,    /* Parsing context */
+  int onError,      /* Conflict resolution algorithm */
+  Table *pTab       /* The table with the non-unique rowid */ 
+){
+  char *zMsg;
+  int rc;
+  if( pTab->iPKey>=0 ){
+    zMsg = sqlite3MPrintf(pParse->db, "%s.%s", pTab->zName,
+                          pTab->aCol[pTab->iPKey].zName);
+    rc = SQLITE_CONSTRAINT_PRIMARYKEY;
+  }else{
+    zMsg = sqlite3MPrintf(pParse->db, "%s.rowid", pTab->zName);
+    rc = SQLITE_CONSTRAINT_ROWID;
+  }
+  sqlite3HaltConstraint(pParse, rc, onError, zMsg, P4_DYNAMIC,
+                        P5_ConstraintUnique);
+}
+
+/*
+** Check to see if pIndex uses the collating sequence pColl.  Return
+** true if it does and false if it does not.
+*/
+#ifndef SQLITE_OMIT_REINDEX
+static int collationMatch(const char *zColl, Index *pIndex){
+  int i;
+  assert( zColl!=0 );
+  for(i=0; i<pIndex->nColumn; i++){
+    const char *z = pIndex->azColl[i];
+    assert( z!=0 || pIndex->aiColumn[i]<0 );
+    if( pIndex->aiColumn[i]>=0 && 0==sqlite3StrICmp(z, zColl) ){
+      return 1;
+    }
+  }
+  return 0;
+}
+#endif
+
+/*
+** Recompute all indices of pTab that use the collating sequence pColl.
+** If pColl==0 then recompute all indices of pTab.
+*/
+#ifndef SQLITE_OMIT_REINDEX
+static void reindexTable(Parse *pParse, Table *pTab, char const *zColl){
+  Index *pIndex;              /* An index associated with pTab */
+
+  for(pIndex=pTab->pIndex; pIndex; pIndex=pIndex->pNext){
+    if( zColl==0 || collationMatch(zColl, pIndex) ){
+      int iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+      sqlite3BeginWriteOperation(pParse, 0, iDb);
+      sqlite3RefillIndex(pParse, pIndex, -1);
+    }
+  }
+}
+#endif
+
+/*
+** Recompute all indices of all tables in all databases where the
+** indices use the collating sequence pColl.  If pColl==0 then recompute
+** all indices everywhere.
+*/
+#ifndef SQLITE_OMIT_REINDEX
+static void reindexDatabases(Parse *pParse, char const *zColl){
+  Db *pDb;                    /* A single database */
+  int iDb;                    /* The database index number */
+  sqlite3 *db = pParse->db;   /* The database connection */
+  HashElem *k;                /* For looping over tables in pDb */
+  Table *pTab;                /* A table in the database */
+
+  assert( sqlite3BtreeHoldsAllMutexes(db) );  /* Needed for schema access */
+  for(iDb=0, pDb=db->aDb; iDb<db->nDb; iDb++, pDb++){
+    assert( pDb!=0 );
+    for(k=sqliteHashFirst(&pDb->pSchema->tblHash);  k; k=sqliteHashNext(k)){
+      pTab = (Table*)sqliteHashData(k);
+      reindexTable(pParse, pTab, zColl);
+    }
+  }
+}
+#endif
+
+/*
+** Generate code for the REINDEX command.
+**
+**        REINDEX                            -- 1
+**        REINDEX  <collation>               -- 2
+**        REINDEX  ?<database>.?<tablename>  -- 3
+**        REINDEX  ?<database>.?<indexname>  -- 4
+**
+** Form 1 causes all indices in all attached databases to be rebuilt.
+** Form 2 rebuilds all indices in all databases that use the named
+** collating function.  Forms 3 and 4 rebuild the named index or all
+** indices associated with the named table.
+*/
+#ifndef SQLITE_OMIT_REINDEX
+SQLITE_PRIVATE void sqlite3Reindex(Parse *pParse, Token *pName1, Token *pName2){
+  CollSeq *pColl;             /* Collating sequence to be reindexed, or NULL */
+  char *z;                    /* Name of a table or index */
+  const char *zDb;            /* Name of the database */
+  Table *pTab;                /* A table in the database */
+  Index *pIndex;              /* An index associated with pTab */
+  int iDb;                    /* The database index number */
+  sqlite3 *db = pParse->db;   /* The database connection */
+  Token *pObjName;            /* Name of the table or index to be reindexed */
+
+  /* Read the database schema. If an error occurs, leave an error message
+  ** and code in pParse and return NULL. */
+  if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){
+    return;
+  }
+
+  if( pName1==0 ){
+    reindexDatabases(pParse, 0);
+    return;
+  }else if( NEVER(pName2==0) || pName2->z==0 ){
+    char *zColl;
+    assert( pName1->z );
+    zColl = sqlite3NameFromToken(pParse->db, pName1);
+    if( !zColl ) return;
+    pColl = sqlite3FindCollSeq(db, ENC(db), zColl, 0);
+    if( pColl ){
+      reindexDatabases(pParse, zColl);
+      sqlite3DbFree(db, zColl);
+      return;
+    }
+    sqlite3DbFree(db, zColl);
+  }
+  iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pObjName);
+  if( iDb<0 ) return;
+  z = sqlite3NameFromToken(db, pObjName);
+  if( z==0 ) return;
+  zDb = db->aDb[iDb].zName;
+  pTab = sqlite3FindTable(db, z, zDb);
+  if( pTab ){
+    reindexTable(pParse, pTab, 0);
+    sqlite3DbFree(db, z);
+    return;
+  }
+  pIndex = sqlite3FindIndex(db, z, zDb);
+  sqlite3DbFree(db, z);
+  if( pIndex ){
+    sqlite3BeginWriteOperation(pParse, 0, iDb);
+    sqlite3RefillIndex(pParse, pIndex, -1);
+    return;
+  }
+  sqlite3ErrorMsg(pParse, "unable to identify the object to be reindexed");
+}
+#endif
+
+/*
+** Return a KeyInfo structure that is appropriate for the given Index.
+**
+** The KeyInfo structure for an index is cached in the Index object.
+** So there might be multiple references to the returned pointer.  The
+** caller should not try to modify the KeyInfo object.
+**
+** The caller should invoke sqlite3KeyInfoUnref() on the returned object
+** when it has finished using it.
+*/
+SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoOfIndex(Parse *pParse, Index *pIdx){
+  int i;
+  int nCol = pIdx->nColumn;
+  int nKey = pIdx->nKeyCol;
+  KeyInfo *pKey;
+  if( pParse->nErr ) return 0;
+  if( pIdx->uniqNotNull ){
+    pKey = sqlite3KeyInfoAlloc(pParse->db, nKey, nCol-nKey);
+  }else{
+    pKey = sqlite3KeyInfoAlloc(pParse->db, nCol, 0);
+  }
+  if( pKey ){
+    assert( sqlite3KeyInfoIsWriteable(pKey) );
+    for(i=0; i<nCol; i++){
+      char *zColl = pIdx->azColl[i];
+      assert( zColl!=0 );
+      pKey->aColl[i] = strcmp(zColl,"BINARY")==0 ? 0 :
+                        sqlite3LocateCollSeq(pParse, zColl);
+      pKey->aSortOrder[i] = pIdx->aSortOrder[i];
+    }
+    if( pParse->nErr ){
+      sqlite3KeyInfoUnref(pKey);
+      pKey = 0;
+    }
+  }
+  return pKey;
+}
+
+#ifndef SQLITE_OMIT_CTE
+/* 
+** This routine is invoked once per CTE by the parser while parsing a 
+** WITH clause. 
+*/
+SQLITE_PRIVATE With *sqlite3WithAdd(
+  Parse *pParse,          /* Parsing context */
+  With *pWith,            /* Existing WITH clause, or NULL */
+  Token *pName,           /* Name of the common-table */
+  ExprList *pArglist,     /* Optional column name list for the table */
+  Select *pQuery          /* Query used to initialize the table */
+){
+  sqlite3 *db = pParse->db;
+  With *pNew;
+  char *zName;
+
+  /* Check that the CTE name is unique within this WITH clause. If
+  ** not, store an error in the Parse structure. */
+  zName = sqlite3NameFromToken(pParse->db, pName);
+  if( zName && pWith ){
+    int i;
+    for(i=0; i<pWith->nCte; i++){
+      if( sqlite3StrICmp(zName, pWith->a[i].zName)==0 ){
+        sqlite3ErrorMsg(pParse, "duplicate WITH table name: %s", zName);
+      }
+    }
+  }
+
+  if( pWith ){
+    int nByte = sizeof(*pWith) + (sizeof(pWith->a[1]) * pWith->nCte);
+    pNew = sqlite3DbRealloc(db, pWith, nByte);
+  }else{
+    pNew = sqlite3DbMallocZero(db, sizeof(*pWith));
+  }
+  assert( zName!=0 || pNew==0 );
+  assert( db->mallocFailed==0 || pNew==0 );
+
+  if( pNew==0 ){
+    sqlite3ExprListDelete(db, pArglist);
+    sqlite3SelectDelete(db, pQuery);
+    sqlite3DbFree(db, zName);
+    pNew = pWith;
+  }else{
+    pNew->a[pNew->nCte].pSelect = pQuery;
+    pNew->a[pNew->nCte].pCols = pArglist;
+    pNew->a[pNew->nCte].zName = zName;
+    pNew->a[pNew->nCte].zErr = 0;
+    pNew->nCte++;
+  }
+
+  return pNew;
+}
+
+/*
+** Free the contents of the With object passed as the second argument.
+*/
+SQLITE_PRIVATE void sqlite3WithDelete(sqlite3 *db, With *pWith){
+  if( pWith ){
+    int i;
+    for(i=0; i<pWith->nCte; i++){
+      struct Cte *pCte = &pWith->a[i];
+      sqlite3ExprListDelete(db, pCte->pCols);
+      sqlite3SelectDelete(db, pCte->pSelect);
+      sqlite3DbFree(db, pCte->zName);
+    }
+    sqlite3DbFree(db, pWith);
+  }
+}
+#endif /* !defined(SQLITE_OMIT_CTE) */
+
+/************** End of build.c ***********************************************/
+/************** Begin file callback.c ****************************************/
+/*
+** 2005 May 23 
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains functions used to access the internal hash tables
+** of user defined functions and collation sequences.
+*/
+
+
+/*
+** Invoke the 'collation needed' callback to request a collation sequence
+** in the encoding enc of name zName, length nName.
+*/
+static void callCollNeeded(sqlite3 *db, int enc, const char *zName){
+  assert( !db->xCollNeeded || !db->xCollNeeded16 );
+  if( db->xCollNeeded ){
+    char *zExternal = sqlite3DbStrDup(db, zName);
+    if( !zExternal ) return;
+    db->xCollNeeded(db->pCollNeededArg, db, enc, zExternal);
+    sqlite3DbFree(db, zExternal);
+  }
+#ifndef SQLITE_OMIT_UTF16
+  if( db->xCollNeeded16 ){
+    char const *zExternal;
+    sqlite3_value *pTmp = sqlite3ValueNew(db);
+    sqlite3ValueSetStr(pTmp, -1, zName, SQLITE_UTF8, SQLITE_STATIC);
+    zExternal = sqlite3ValueText(pTmp, SQLITE_UTF16NATIVE);
+    if( zExternal ){
+      db->xCollNeeded16(db->pCollNeededArg, db, (int)ENC(db), zExternal);
+    }
+    sqlite3ValueFree(pTmp);
+  }
+#endif
+}
+
+/*
+** This routine is called if the collation factory fails to deliver a
+** collation function in the best encoding but there may be other versions
+** of this collation function (for other text encodings) available. Use one
+** of these instead if they exist. Avoid a UTF-8 <-> UTF-16 conversion if
+** possible.
+*/
+static int synthCollSeq(sqlite3 *db, CollSeq *pColl){
+  CollSeq *pColl2;
+  char *z = pColl->zName;
+  int i;
+  static const u8 aEnc[] = { SQLITE_UTF16BE, SQLITE_UTF16LE, SQLITE_UTF8 };
+  for(i=0; i<3; i++){
+    pColl2 = sqlite3FindCollSeq(db, aEnc[i], z, 0);
+    if( pColl2->xCmp!=0 ){
+      memcpy(pColl, pColl2, sizeof(CollSeq));
+      pColl->xDel = 0;         /* Do not copy the destructor */
+      return SQLITE_OK;
+    }
+  }
+  return SQLITE_ERROR;
+}
+
+/*
+** This function is responsible for invoking the collation factory callback
+** or substituting a collation sequence of a different encoding when the
+** requested collation sequence is not available in the desired encoding.
+** 
+** If it is not NULL, then pColl must point to the database native encoding 
+** collation sequence with name zName, length nName.
+**
+** The return value is either the collation sequence to be used in database
+** db for collation type name zName, length nName, or NULL, if no collation
+** sequence can be found.  If no collation is found, leave an error message.
+**
+** See also: sqlite3LocateCollSeq(), sqlite3FindCollSeq()
+*/
+SQLITE_PRIVATE CollSeq *sqlite3GetCollSeq(
+  Parse *pParse,        /* Parsing context */
+  u8 enc,               /* The desired encoding for the collating sequence */
+  CollSeq *pColl,       /* Collating sequence with native encoding, or NULL */
+  const char *zName     /* Collating sequence name */
+){
+  CollSeq *p;
+  sqlite3 *db = pParse->db;
+
+  p = pColl;
+  if( !p ){
+    p = sqlite3FindCollSeq(db, enc, zName, 0);
+  }
+  if( !p || !p->xCmp ){
+    /* No collation sequence of this type for this encoding is registered.
+    ** Call the collation factory to see if it can supply us with one.
+    */
+    callCollNeeded(db, enc, zName);
+    p = sqlite3FindCollSeq(db, enc, zName, 0);
+  }
+  if( p && !p->xCmp && synthCollSeq(db, p) ){
+    p = 0;
+  }
+  assert( !p || p->xCmp );
+  if( p==0 ){
+    sqlite3ErrorMsg(pParse, "no such collation sequence: %s", zName);
+  }
+  return p;
+}
+
+/*
+** This routine is called on a collation sequence before it is used to
+** check that it is defined. An undefined collation sequence exists when
+** a database is loaded that contains references to collation sequences
+** that have not been defined by sqlite3_create_collation() etc.
+**
+** If required, this routine calls the 'collation needed' callback to
+** request a definition of the collating sequence. If this doesn't work, 
+** an equivalent collating sequence that uses a text encoding different
+** from the main database is substituted, if one is available.
+*/
+SQLITE_PRIVATE int sqlite3CheckCollSeq(Parse *pParse, CollSeq *pColl){
+  if( pColl ){
+    const char *zName = pColl->zName;
+    sqlite3 *db = pParse->db;
+    CollSeq *p = sqlite3GetCollSeq(pParse, ENC(db), pColl, zName);
+    if( !p ){
+      return SQLITE_ERROR;
+    }
+    assert( p==pColl );
+  }
+  return SQLITE_OK;
+}
+
+
+
+/*
+** Locate and return an entry from the db.aCollSeq hash table. If the entry
+** specified by zName and nName is not found and parameter 'create' is
+** true, then create a new entry. Otherwise return NULL.
+**
+** Each pointer stored in the sqlite3.aCollSeq hash table contains an
+** array of three CollSeq structures. The first is the collation sequence
+** preferred for UTF-8, the second UTF-16le, and the third UTF-16be.
+**
+** Stored immediately after the three collation sequences is a copy of
+** the collation sequence name. A pointer to this string is stored in
+** each collation sequence structure.
+*/
+static CollSeq *findCollSeqEntry(
+  sqlite3 *db,          /* Database connection */
+  const char *zName,    /* Name of the collating sequence */
+  int create            /* Create a new entry if true */
+){
+  CollSeq *pColl;
+  pColl = sqlite3HashFind(&db->aCollSeq, zName);
+
+  if( 0==pColl && create ){
+    int nName = sqlite3Strlen30(zName);
+    pColl = sqlite3DbMallocZero(db, 3*sizeof(*pColl) + nName + 1);
+    if( pColl ){
+      CollSeq *pDel = 0;
+      pColl[0].zName = (char*)&pColl[3];
+      pColl[0].enc = SQLITE_UTF8;
+      pColl[1].zName = (char*)&pColl[3];
+      pColl[1].enc = SQLITE_UTF16LE;
+      pColl[2].zName = (char*)&pColl[3];
+      pColl[2].enc = SQLITE_UTF16BE;
+      memcpy(pColl[0].zName, zName, nName);
+      pColl[0].zName[nName] = 0;
+      pDel = sqlite3HashInsert(&db->aCollSeq, pColl[0].zName, pColl);
+
+      /* If a malloc() failure occurred in sqlite3HashInsert(), it will 
+      ** return the pColl pointer to be deleted (because it wasn't added
+      ** to the hash table).
+      */
+      assert( pDel==0 || pDel==pColl );
+      if( pDel!=0 ){
+        db->mallocFailed = 1;
+        sqlite3DbFree(db, pDel);
+        pColl = 0;
+      }
+    }
+  }
+  return pColl;
+}
+
+/*
+** Parameter zName points to a UTF-8 encoded string nName bytes long.
+** Return the CollSeq* pointer for the collation sequence named zName
+** for the encoding 'enc' from the database 'db'.
+**
+** If the entry specified is not found and 'create' is true, then create a
+** new entry.  Otherwise return NULL.
+**
+** A separate function sqlite3LocateCollSeq() is a wrapper around
+** this routine.  sqlite3LocateCollSeq() invokes the collation factory
+** if necessary and generates an error message if the collating sequence
+** cannot be found.
+**
+** See also: sqlite3LocateCollSeq(), sqlite3GetCollSeq()
+*/
+SQLITE_PRIVATE CollSeq *sqlite3FindCollSeq(
+  sqlite3 *db,
+  u8 enc,
+  const char *zName,
+  int create
+){
+  CollSeq *pColl;
+  if( zName ){
+    pColl = findCollSeqEntry(db, zName, create);
+  }else{
+    pColl = db->pDfltColl;
+  }
+  assert( SQLITE_UTF8==1 && SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 );
+  assert( enc>=SQLITE_UTF8 && enc<=SQLITE_UTF16BE );
+  if( pColl ) pColl += enc-1;
+  return pColl;
+}
+
+/* During the search for the best function definition, this procedure
+** is called to test how well the function passed as the first argument
+** matches the request for a function with nArg arguments in a system
+** that uses encoding enc. The value returned indicates how well the
+** request is matched. A higher value indicates a better match.
+**
+** If nArg is -1 that means to only return a match (non-zero) if p->nArg
+** is also -1.  In other words, we are searching for a function that
+** takes a variable number of arguments.
+**
+** If nArg is -2 that means that we are searching for any function 
+** regardless of the number of arguments it uses, so return a positive
+** match score for any
+**
+** The returned value is always between 0 and 6, as follows:
+**
+** 0: Not a match.
+** 1: UTF8/16 conversion required and function takes any number of arguments.
+** 2: UTF16 byte order change required and function takes any number of args.
+** 3: encoding matches and function takes any number of arguments
+** 4: UTF8/16 conversion required - argument count matches exactly
+** 5: UTF16 byte order conversion required - argument count matches exactly
+** 6: Perfect match:  encoding and argument count match exactly.
+**
+** If nArg==(-2) then any function with a non-null xStep or xFunc is
+** a perfect match and any function with both xStep and xFunc NULL is
+** a non-match.
+*/
+#define FUNC_PERFECT_MATCH 6  /* The score for a perfect match */
+static int matchQuality(
+  FuncDef *p,     /* The function we are evaluating for match quality */
+  int nArg,       /* Desired number of arguments.  (-1)==any */
+  u8 enc          /* Desired text encoding */
+){
+  int match;
+
+  /* nArg of -2 is a special case */
+  if( nArg==(-2) ) return (p->xFunc==0 && p->xStep==0) ? 0 : FUNC_PERFECT_MATCH;
+
+  /* Wrong number of arguments means "no match" */
+  if( p->nArg!=nArg && p->nArg>=0 ) return 0;
+
+  /* Give a better score to a function with a specific number of arguments
+  ** than to function that accepts any number of arguments. */
+  if( p->nArg==nArg ){
+    match = 4;
+  }else{
+    match = 1;
+  }
+
+  /* Bonus points if the text encoding matches */
+  if( enc==(p->funcFlags & SQLITE_FUNC_ENCMASK) ){
+    match += 2;  /* Exact encoding match */
+  }else if( (enc & p->funcFlags & 2)!=0 ){
+    match += 1;  /* Both are UTF16, but with different byte orders */
+  }
+
+  return match;
+}
+
+/*
+** Search a FuncDefHash for a function with the given name.  Return
+** a pointer to the matching FuncDef if found, or 0 if there is no match.
+*/
+static FuncDef *functionSearch(
+  FuncDefHash *pHash,  /* Hash table to search */
+  int h,               /* Hash of the name */
+  const char *zFunc,   /* Name of function */
+  int nFunc            /* Number of bytes in zFunc */
+){
+  FuncDef *p;
+  for(p=pHash->a[h]; p; p=p->pHash){
+    if( sqlite3StrNICmp(p->zName, zFunc, nFunc)==0 && p->zName[nFunc]==0 ){
+      return p;
+    }
+  }
+  return 0;
+}
+
+/*
+** Insert a new FuncDef into a FuncDefHash hash table.
+*/
+SQLITE_PRIVATE void sqlite3FuncDefInsert(
+  FuncDefHash *pHash,  /* The hash table into which to insert */
+  FuncDef *pDef        /* The function definition to insert */
+){
+  FuncDef *pOther;
+  int nName = sqlite3Strlen30(pDef->zName);
+  u8 c1 = (u8)pDef->zName[0];
+  int h = (sqlite3UpperToLower[c1] + nName) % ArraySize(pHash->a);
+  pOther = functionSearch(pHash, h, pDef->zName, nName);
+  if( pOther ){
+    assert( pOther!=pDef && pOther->pNext!=pDef );
+    pDef->pNext = pOther->pNext;
+    pOther->pNext = pDef;
+  }else{
+    pDef->pNext = 0;
+    pDef->pHash = pHash->a[h];
+    pHash->a[h] = pDef;
+  }
+}
+  
+  
+
+/*
+** Locate a user function given a name, a number of arguments and a flag
+** indicating whether the function prefers UTF-16 over UTF-8.  Return a
+** pointer to the FuncDef structure that defines that function, or return
+** NULL if the function does not exist.
+**
+** If the createFlag argument is true, then a new (blank) FuncDef
+** structure is created and liked into the "db" structure if a
+** no matching function previously existed.
+**
+** If nArg is -2, then the first valid function found is returned.  A
+** function is valid if either xFunc or xStep is non-zero.  The nArg==(-2)
+** case is used to see if zName is a valid function name for some number
+** of arguments.  If nArg is -2, then createFlag must be 0.
+**
+** If createFlag is false, then a function with the required name and
+** number of arguments may be returned even if the eTextRep flag does not
+** match that requested.
+*/
+SQLITE_PRIVATE FuncDef *sqlite3FindFunction(
+  sqlite3 *db,       /* An open database */
+  const char *zName, /* Name of the function.  Not null-terminated */
+  int nName,         /* Number of characters in the name */
+  int nArg,          /* Number of arguments.  -1 means any number */
+  u8 enc,            /* Preferred text encoding */
+  u8 createFlag      /* Create new entry if true and does not otherwise exist */
+){
+  FuncDef *p;         /* Iterator variable */
+  FuncDef *pBest = 0; /* Best match found so far */
+  int bestScore = 0;  /* Score of best match */
+  int h;              /* Hash value */
+
+  assert( nArg>=(-2) );
+  assert( nArg>=(-1) || createFlag==0 );
+  h = (sqlite3UpperToLower[(u8)zName[0]] + nName) % ArraySize(db->aFunc.a);
+
+  /* First search for a match amongst the application-defined functions.
+  */
+  p = functionSearch(&db->aFunc, h, zName, nName);
+  while( p ){
+    int score = matchQuality(p, nArg, enc);
+    if( score>bestScore ){
+      pBest = p;
+      bestScore = score;
+    }
+    p = p->pNext;
+  }
+
+  /* If no match is found, search the built-in functions.
+  **
+  ** If the SQLITE_PreferBuiltin flag is set, then search the built-in
+  ** functions even if a prior app-defined function was found.  And give
+  ** priority to built-in functions.
+  **
+  ** Except, if createFlag is true, that means that we are trying to
+  ** install a new function.  Whatever FuncDef structure is returned it will
+  ** have fields overwritten with new information appropriate for the
+  ** new function.  But the FuncDefs for built-in functions are read-only.
+  ** So we must not search for built-ins when creating a new function.
+  */ 
+  if( !createFlag && (pBest==0 || (db->flags & SQLITE_PreferBuiltin)!=0) ){
+    FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions);
+    bestScore = 0;
+    p = functionSearch(pHash, h, zName, nName);
+    while( p ){
+      int score = matchQuality(p, nArg, enc);
+      if( score>bestScore ){
+        pBest = p;
+        bestScore = score;
+      }
+      p = p->pNext;
+    }
+  }
+
+  /* If the createFlag parameter is true and the search did not reveal an
+  ** exact match for the name, number of arguments and encoding, then add a
+  ** new entry to the hash table and return it.
+  */
+  if( createFlag && bestScore<FUNC_PERFECT_MATCH && 
+      (pBest = sqlite3DbMallocZero(db, sizeof(*pBest)+nName+1))!=0 ){
+    pBest->zName = (char *)&pBest[1];
+    pBest->nArg = (u16)nArg;
+    pBest->funcFlags = enc;
+    memcpy(pBest->zName, zName, nName);
+    pBest->zName[nName] = 0;
+    sqlite3FuncDefInsert(&db->aFunc, pBest);
+  }
+
+  if( pBest && (pBest->xStep || pBest->xFunc || createFlag) ){
+    return pBest;
+  }
+  return 0;
+}
+
+/*
+** Free all resources held by the schema structure. The void* argument points
+** at a Schema struct. This function does not call sqlite3DbFree(db, ) on the 
+** pointer itself, it just cleans up subsidiary resources (i.e. the contents
+** of the schema hash tables).
+**
+** The Schema.cache_size variable is not cleared.
+*/
+SQLITE_PRIVATE void sqlite3SchemaClear(void *p){
+  Hash temp1;
+  Hash temp2;
+  HashElem *pElem;
+  Schema *pSchema = (Schema *)p;
+
+  temp1 = pSchema->tblHash;
+  temp2 = pSchema->trigHash;
+  sqlite3HashInit(&pSchema->trigHash);
+  sqlite3HashClear(&pSchema->idxHash);
+  for(pElem=sqliteHashFirst(&temp2); pElem; pElem=sqliteHashNext(pElem)){
+    sqlite3DeleteTrigger(0, (Trigger*)sqliteHashData(pElem));
+  }
+  sqlite3HashClear(&temp2);
+  sqlite3HashInit(&pSchema->tblHash);
+  for(pElem=sqliteHashFirst(&temp1); pElem; pElem=sqliteHashNext(pElem)){
+    Table *pTab = sqliteHashData(pElem);
+    sqlite3DeleteTable(0, pTab);
+  }
+  sqlite3HashClear(&temp1);
+  sqlite3HashClear(&pSchema->fkeyHash);
+  pSchema->pSeqTab = 0;
+  if( pSchema->schemaFlags & DB_SchemaLoaded ){
+    pSchema->iGeneration++;
+    pSchema->schemaFlags &= ~DB_SchemaLoaded;
+  }
+}
+
+/*
+** Find and return the schema associated with a BTree.  Create
+** a new one if necessary.
+*/
+SQLITE_PRIVATE Schema *sqlite3SchemaGet(sqlite3 *db, Btree *pBt){
+  Schema * p;
+  if( pBt ){
+    p = (Schema *)sqlite3BtreeSchema(pBt, sizeof(Schema), sqlite3SchemaClear);
+  }else{
+    p = (Schema *)sqlite3DbMallocZero(0, sizeof(Schema));
+  }
+  if( !p ){
+    db->mallocFailed = 1;
+  }else if ( 0==p->file_format ){
+    sqlite3HashInit(&p->tblHash);
+    sqlite3HashInit(&p->idxHash);
+    sqlite3HashInit(&p->trigHash);
+    sqlite3HashInit(&p->fkeyHash);
+    p->enc = SQLITE_UTF8;
+  }
+  return p;
+}
+
+/************** End of callback.c ********************************************/
+/************** Begin file delete.c ******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains C code routines that are called by the parser
+** in order to generate code for DELETE FROM statements.
+*/
+
+/*
+** While a SrcList can in general represent multiple tables and subqueries
+** (as in the FROM clause of a SELECT statement) in this case it contains
+** the name of a single table, as one might find in an INSERT, DELETE,
+** or UPDATE statement.  Look up that table in the symbol table and
+** return a pointer.  Set an error message and return NULL if the table 
+** name is not found or if any other error occurs.
+**
+** The following fields are initialized appropriate in pSrc:
+**
+**    pSrc->a[0].pTab       Pointer to the Table object
+**    pSrc->a[0].pIndex     Pointer to the INDEXED BY index, if there is one
+**
+*/
+SQLITE_PRIVATE Table *sqlite3SrcListLookup(Parse *pParse, SrcList *pSrc){
+  struct SrcList_item *pItem = pSrc->a;
+  Table *pTab;
+  assert( pItem && pSrc->nSrc==1 );
+  pTab = sqlite3LocateTableItem(pParse, 0, pItem);
+  sqlite3DeleteTable(pParse->db, pItem->pTab);
+  pItem->pTab = pTab;
+  if( pTab ){
+    pTab->nRef++;
+  }
+  if( sqlite3IndexedByLookup(pParse, pItem) ){
+    pTab = 0;
+  }
+  return pTab;
+}
+
+/*
+** Check to make sure the given table is writable.  If it is not
+** writable, generate an error message and return 1.  If it is
+** writable return 0;
+*/
+SQLITE_PRIVATE int sqlite3IsReadOnly(Parse *pParse, Table *pTab, int viewOk){
+  /* A table is not writable under the following circumstances:
+  **
+  **   1) It is a virtual table and no implementation of the xUpdate method
+  **      has been provided, or
+  **   2) It is a system table (i.e. sqlite_master), this call is not
+  **      part of a nested parse and writable_schema pragma has not 
+  **      been specified.
+  **
+  ** In either case leave an error message in pParse and return non-zero.
+  */
+  if( ( IsVirtual(pTab) 
+     && sqlite3GetVTable(pParse->db, pTab)->pMod->pModule->xUpdate==0 )
+   || ( (pTab->tabFlags & TF_Readonly)!=0
+     && (pParse->db->flags & SQLITE_WriteSchema)==0
+     && pParse->nested==0 )
+  ){
+    sqlite3ErrorMsg(pParse, "table %s may not be modified", pTab->zName);
+    return 1;
+  }
+
+#ifndef SQLITE_OMIT_VIEW
+  if( !viewOk && pTab->pSelect ){
+    sqlite3ErrorMsg(pParse,"cannot modify %s because it is a view",pTab->zName);
+    return 1;
+  }
+#endif
+  return 0;
+}
+
+
+#if !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER)
+/*
+** Evaluate a view and store its result in an ephemeral table.  The
+** pWhere argument is an optional WHERE clause that restricts the
+** set of rows in the view that are to be added to the ephemeral table.
+*/
+SQLITE_PRIVATE void sqlite3MaterializeView(
+  Parse *pParse,       /* Parsing context */
+  Table *pView,        /* View definition */
+  Expr *pWhere,        /* Optional WHERE clause to be added */
+  int iCur             /* Cursor number for ephemeral table */
+){
+  SelectDest dest;
+  Select *pSel;
+  SrcList *pFrom;
+  sqlite3 *db = pParse->db;
+  int iDb = sqlite3SchemaToIndex(db, pView->pSchema);
+  pWhere = sqlite3ExprDup(db, pWhere, 0);
+  pFrom = sqlite3SrcListAppend(db, 0, 0, 0);
+  if( pFrom ){
+    assert( pFrom->nSrc==1 );
+    pFrom->a[0].zName = sqlite3DbStrDup(db, pView->zName);
+    pFrom->a[0].zDatabase = sqlite3DbStrDup(db, db->aDb[iDb].zName);
+    assert( pFrom->a[0].pOn==0 );
+    assert( pFrom->a[0].pUsing==0 );
+  }
+  pSel = sqlite3SelectNew(pParse, 0, pFrom, pWhere, 0, 0, 0, 0, 0, 0);
+  sqlite3SelectDestInit(&dest, SRT_EphemTab, iCur);
+  sqlite3Select(pParse, pSel, &dest);
+  sqlite3SelectDelete(db, pSel);
+}
+#endif /* !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER) */
+
+#if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY)
+/*
+** Generate an expression tree to implement the WHERE, ORDER BY,
+** and LIMIT/OFFSET portion of DELETE and UPDATE statements.
+**
+**     DELETE FROM table_wxyz WHERE a<5 ORDER BY a LIMIT 1;
+**                            \__________________________/
+**                               pLimitWhere (pInClause)
+*/
+SQLITE_PRIVATE Expr *sqlite3LimitWhere(
+  Parse *pParse,               /* The parser context */
+  SrcList *pSrc,               /* the FROM clause -- which tables to scan */
+  Expr *pWhere,                /* The WHERE clause.  May be null */
+  ExprList *pOrderBy,          /* The ORDER BY clause.  May be null */
+  Expr *pLimit,                /* The LIMIT clause.  May be null */
+  Expr *pOffset,               /* The OFFSET clause.  May be null */
+  char *zStmtType              /* Either DELETE or UPDATE.  For err msgs. */
+){
+  Expr *pWhereRowid = NULL;    /* WHERE rowid .. */
+  Expr *pInClause = NULL;      /* WHERE rowid IN ( select ) */
+  Expr *pSelectRowid = NULL;   /* SELECT rowid ... */
+  ExprList *pEList = NULL;     /* Expression list contaning only pSelectRowid */
+  SrcList *pSelectSrc = NULL;  /* SELECT rowid FROM x ... (dup of pSrc) */
+  Select *pSelect = NULL;      /* Complete SELECT tree */
+
+  /* Check that there isn't an ORDER BY without a LIMIT clause.
+  */
+  if( pOrderBy && (pLimit == 0) ) {
+    sqlite3ErrorMsg(pParse, "ORDER BY without LIMIT on %s", zStmtType);
+    goto limit_where_cleanup_2;
+  }
+
+  /* We only need to generate a select expression if there
+  ** is a limit/offset term to enforce.
+  */
+  if( pLimit == 0 ) {
+    /* if pLimit is null, pOffset will always be null as well. */
+    assert( pOffset == 0 );
+    return pWhere;
+  }
+
+  /* Generate a select expression tree to enforce the limit/offset 
+  ** term for the DELETE or UPDATE statement.  For example:
+  **   DELETE FROM table_a WHERE col1=1 ORDER BY col2 LIMIT 1 OFFSET 1
+  ** becomes:
+  **   DELETE FROM table_a WHERE rowid IN ( 
+  **     SELECT rowid FROM table_a WHERE col1=1 ORDER BY col2 LIMIT 1 OFFSET 1
+  **   );
+  */
+
+  pSelectRowid = sqlite3PExpr(pParse, TK_ROW, 0, 0, 0);
+  if( pSelectRowid == 0 ) goto limit_where_cleanup_2;
+  pEList = sqlite3ExprListAppend(pParse, 0, pSelectRowid);
+  if( pEList == 0 ) goto limit_where_cleanup_2;
+
+  /* duplicate the FROM clause as it is needed by both the DELETE/UPDATE tree
+  ** and the SELECT subtree. */
+  pSelectSrc = sqlite3SrcListDup(pParse->db, pSrc, 0);
+  if( pSelectSrc == 0 ) {
+    sqlite3ExprListDelete(pParse->db, pEList);
+    goto limit_where_cleanup_2;
+  }
+
+  /* generate the SELECT expression tree. */
+  pSelect = sqlite3SelectNew(pParse,pEList,pSelectSrc,pWhere,0,0,
+                             pOrderBy,0,pLimit,pOffset);
+  if( pSelect == 0 ) return 0;
+
+  /* now generate the new WHERE rowid IN clause for the DELETE/UDPATE */
+  pWhereRowid = sqlite3PExpr(pParse, TK_ROW, 0, 0, 0);
+  if( pWhereRowid == 0 ) goto limit_where_cleanup_1;
+  pInClause = sqlite3PExpr(pParse, TK_IN, pWhereRowid, 0, 0);
+  if( pInClause == 0 ) goto limit_where_cleanup_1;
+
+  pInClause->x.pSelect = pSelect;
+  pInClause->flags |= EP_xIsSelect;
+  sqlite3ExprSetHeight(pParse, pInClause);
+  return pInClause;
+
+  /* something went wrong. clean up anything allocated. */
+limit_where_cleanup_1:
+  sqlite3SelectDelete(pParse->db, pSelect);
+  return 0;
+
+limit_where_cleanup_2:
+  sqlite3ExprDelete(pParse->db, pWhere);
+  sqlite3ExprListDelete(pParse->db, pOrderBy);
+  sqlite3ExprDelete(pParse->db, pLimit);
+  sqlite3ExprDelete(pParse->db, pOffset);
+  return 0;
+}
+#endif /* defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) */
+       /*      && !defined(SQLITE_OMIT_SUBQUERY) */
+
+/*
+** Generate code for a DELETE FROM statement.
+**
+**     DELETE FROM table_wxyz WHERE a<5 AND b NOT NULL;
+**                 \________/       \________________/
+**                  pTabList              pWhere
+*/
+SQLITE_PRIVATE void sqlite3DeleteFrom(
+  Parse *pParse,         /* The parser context */
+  SrcList *pTabList,     /* The table from which we should delete things */
+  Expr *pWhere           /* The WHERE clause.  May be null */
+){
+  Vdbe *v;               /* The virtual database engine */
+  Table *pTab;           /* The table from which records will be deleted */
+  const char *zDb;       /* Name of database holding pTab */
+  int i;                 /* Loop counter */
+  WhereInfo *pWInfo;     /* Information about the WHERE clause */
+  Index *pIdx;           /* For looping over indices of the table */
+  int iTabCur;           /* Cursor number for the table */
+  int iDataCur = 0;      /* VDBE cursor for the canonical data source */
+  int iIdxCur = 0;       /* Cursor number of the first index */
+  int nIdx;              /* Number of indices */
+  sqlite3 *db;           /* Main database structure */
+  AuthContext sContext;  /* Authorization context */
+  NameContext sNC;       /* Name context to resolve expressions in */
+  int iDb;               /* Database number */
+  int memCnt = -1;       /* Memory cell used for change counting */
+  int rcauth;            /* Value returned by authorization callback */
+  int okOnePass;         /* True for one-pass algorithm without the FIFO */
+  int aiCurOnePass[2];   /* The write cursors opened by WHERE_ONEPASS */
+  u8 *aToOpen = 0;       /* Open cursor iTabCur+j if aToOpen[j] is true */
+  Index *pPk;            /* The PRIMARY KEY index on the table */
+  int iPk = 0;           /* First of nPk registers holding PRIMARY KEY value */
+  i16 nPk = 1;           /* Number of columns in the PRIMARY KEY */
+  int iKey;              /* Memory cell holding key of row to be deleted */
+  i16 nKey;              /* Number of memory cells in the row key */
+  int iEphCur = 0;       /* Ephemeral table holding all primary key values */
+  int iRowSet = 0;       /* Register for rowset of rows to delete */
+  int addrBypass = 0;    /* Address of jump over the delete logic */
+  int addrLoop = 0;      /* Top of the delete loop */
+  int addrDelete = 0;    /* Jump directly to the delete logic */
+  int addrEphOpen = 0;   /* Instruction to open the Ephemeral table */
+ 
+#ifndef SQLITE_OMIT_TRIGGER
+  int isView;                  /* True if attempting to delete from a view */
+  Trigger *pTrigger;           /* List of table triggers, if required */
+#endif
+
+  memset(&sContext, 0, sizeof(sContext));
+  db = pParse->db;
+  if( pParse->nErr || db->mallocFailed ){
+    goto delete_from_cleanup;
+  }
+  assert( pTabList->nSrc==1 );
+
+  /* Locate the table which we want to delete.  This table has to be
+  ** put in an SrcList structure because some of the subroutines we
+  ** will be calling are designed to work with multiple tables and expect
+  ** an SrcList* parameter instead of just a Table* parameter.
+  */
+  pTab = sqlite3SrcListLookup(pParse, pTabList);
+  if( pTab==0 )  goto delete_from_cleanup;
+
+  /* Figure out if we have any triggers and if the table being
+  ** deleted from is a view
+  */
+#ifndef SQLITE_OMIT_TRIGGER
+  pTrigger = sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0);
+  isView = pTab->pSelect!=0;
+#else
+# define pTrigger 0
+# define isView 0
+#endif
+#ifdef SQLITE_OMIT_VIEW
+# undef isView
+# define isView 0
+#endif
+
+  /* If pTab is really a view, make sure it has been initialized.
+  */
+  if( sqlite3ViewGetColumnNames(pParse, pTab) ){
+    goto delete_from_cleanup;
+  }
+
+  if( sqlite3IsReadOnly(pParse, pTab, (pTrigger?1:0)) ){
+    goto delete_from_cleanup;
+  }
+  iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+  assert( iDb<db->nDb );
+  zDb = db->aDb[iDb].zName;
+  rcauth = sqlite3AuthCheck(pParse, SQLITE_DELETE, pTab->zName, 0, zDb);
+  assert( rcauth==SQLITE_OK || rcauth==SQLITE_DENY || rcauth==SQLITE_IGNORE );
+  if( rcauth==SQLITE_DENY ){
+    goto delete_from_cleanup;
+  }
+  assert(!isView || pTrigger);
+
+  /* Assign cursor numbers to the table and all its indices.
+  */
+  assert( pTabList->nSrc==1 );
+  iTabCur = pTabList->a[0].iCursor = pParse->nTab++;
+  for(nIdx=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, nIdx++){
+    pParse->nTab++;
+  }
+
+  /* Start the view context
+  */
+  if( isView ){
+    sqlite3AuthContextPush(pParse, &sContext, pTab->zName);
+  }
+
+  /* Begin generating code.
+  */
+  v = sqlite3GetVdbe(pParse);
+  if( v==0 ){
+    goto delete_from_cleanup;
+  }
+  if( pParse->nested==0 ) sqlite3VdbeCountChanges(v);
+  sqlite3BeginWriteOperation(pParse, 1, iDb);
+
+  /* If we are trying to delete from a view, realize that view into
+  ** an ephemeral table.
+  */
+#if !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER)
+  if( isView ){
+    sqlite3MaterializeView(pParse, pTab, pWhere, iTabCur);
+    iDataCur = iIdxCur = iTabCur;
+  }
+#endif
+
+  /* Resolve the column names in the WHERE clause.
+  */
+  memset(&sNC, 0, sizeof(sNC));
+  sNC.pParse = pParse;
+  sNC.pSrcList = pTabList;
+  if( sqlite3ResolveExprNames(&sNC, pWhere) ){
+    goto delete_from_cleanup;
+  }
+
+  /* Initialize the counter of the number of rows deleted, if
+  ** we are counting rows.
+  */
+  if( db->flags & SQLITE_CountRows ){
+    memCnt = ++pParse->nMem;
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, memCnt);
+  }
+
+#ifndef SQLITE_OMIT_TRUNCATE_OPTIMIZATION
+  /* Special case: A DELETE without a WHERE clause deletes everything.
+  ** It is easier just to erase the whole table. Prior to version 3.6.5,
+  ** this optimization caused the row change count (the value returned by 
+  ** API function sqlite3_count_changes) to be set incorrectly.  */
+  if( rcauth==SQLITE_OK && pWhere==0 && !pTrigger && !IsVirtual(pTab) 
+   && 0==sqlite3FkRequired(pParse, pTab, 0, 0)
+  ){
+    assert( !isView );
+    sqlite3TableLock(pParse, iDb, pTab->tnum, 1, pTab->zName);
+    if( HasRowid(pTab) ){
+      sqlite3VdbeAddOp4(v, OP_Clear, pTab->tnum, iDb, memCnt,
+                        pTab->zName, P4_STATIC);
+    }
+    for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+      assert( pIdx->pSchema==pTab->pSchema );
+      sqlite3VdbeAddOp2(v, OP_Clear, pIdx->tnum, iDb);
+    }
+  }else
+#endif /* SQLITE_OMIT_TRUNCATE_OPTIMIZATION */
+  {
+    if( HasRowid(pTab) ){
+      /* For a rowid table, initialize the RowSet to an empty set */
+      pPk = 0;
+      nPk = 1;
+      iRowSet = ++pParse->nMem;
+      sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet);
+    }else{
+      /* For a WITHOUT ROWID table, create an ephemeral table used to
+      ** hold all primary keys for rows to be deleted. */
+      pPk = sqlite3PrimaryKeyIndex(pTab);
+      assert( pPk!=0 );
+      nPk = pPk->nKeyCol;
+      iPk = pParse->nMem+1;
+      pParse->nMem += nPk;
+      iEphCur = pParse->nTab++;
+      addrEphOpen = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iEphCur, nPk);
+      sqlite3VdbeSetP4KeyInfo(pParse, pPk);
+    }
+  
+    /* Construct a query to find the rowid or primary key for every row
+    ** to be deleted, based on the WHERE clause.
+    */
+    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, 0, 0, 
+                               WHERE_ONEPASS_DESIRED|WHERE_DUPLICATES_OK,
+                               iTabCur+1);
+    if( pWInfo==0 ) goto delete_from_cleanup;
+    okOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass);
+  
+    /* Keep track of the number of rows to be deleted */
+    if( db->flags & SQLITE_CountRows ){
+      sqlite3VdbeAddOp2(v, OP_AddImm, memCnt, 1);
+    }
+  
+    /* Extract the rowid or primary key for the current row */
+    if( pPk ){
+      for(i=0; i<nPk; i++){
+        sqlite3ExprCodeGetColumnOfTable(v, pTab, iTabCur,
+                                        pPk->aiColumn[i], iPk+i);
+      }
+      iKey = iPk;
+    }else{
+      iKey = pParse->nMem + 1;
+      iKey = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iTabCur, iKey, 0);
+      if( iKey>pParse->nMem ) pParse->nMem = iKey;
+    }
+  
+    if( okOnePass ){
+      /* For ONEPASS, no need to store the rowid/primary-key.  There is only
+      ** one, so just keep it in its register(s) and fall through to the
+      ** delete code.
+      */
+      nKey = nPk; /* OP_Found will use an unpacked key */
+      aToOpen = sqlite3DbMallocRaw(db, nIdx+2);
+      if( aToOpen==0 ){
+        sqlite3WhereEnd(pWInfo);
+        goto delete_from_cleanup;
+      }
+      memset(aToOpen, 1, nIdx+1);
+      aToOpen[nIdx+1] = 0;
+      if( aiCurOnePass[0]>=0 ) aToOpen[aiCurOnePass[0]-iTabCur] = 0;
+      if( aiCurOnePass[1]>=0 ) aToOpen[aiCurOnePass[1]-iTabCur] = 0;
+      if( addrEphOpen ) sqlite3VdbeChangeToNoop(v, addrEphOpen);
+      addrDelete = sqlite3VdbeAddOp0(v, OP_Goto); /* Jump to DELETE logic */
+    }else if( pPk ){
+      /* Construct a composite key for the row to be deleted and remember it */
+      iKey = ++pParse->nMem;
+      nKey = 0;   /* Zero tells OP_Found to use a composite key */
+      sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, iKey,
+                        sqlite3IndexAffinityStr(v, pPk), nPk);
+      sqlite3VdbeAddOp2(v, OP_IdxInsert, iEphCur, iKey);
+    }else{
+      /* Get the rowid of the row to be deleted and remember it in the RowSet */
+      nKey = 1;  /* OP_Seek always uses a single rowid */
+      sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, iKey);
+    }
+  
+    /* End of the WHERE loop */
+    sqlite3WhereEnd(pWInfo);
+    if( okOnePass ){
+      /* Bypass the delete logic below if the WHERE loop found zero rows */
+      addrBypass = sqlite3VdbeMakeLabel(v);
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, addrBypass);
+      sqlite3VdbeJumpHere(v, addrDelete);
+    }
+  
+    /* Unless this is a view, open cursors for the table we are 
+    ** deleting from and all its indices. If this is a view, then the
+    ** only effect this statement has is to fire the INSTEAD OF 
+    ** triggers.
+    */
+    if( !isView ){
+      testcase( IsVirtual(pTab) );
+      sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenWrite, iTabCur, aToOpen,
+                                 &iDataCur, &iIdxCur);
+      assert( pPk || IsVirtual(pTab) || iDataCur==iTabCur );
+      assert( pPk || IsVirtual(pTab) || iIdxCur==iDataCur+1 );
+    }
+  
+    /* Set up a loop over the rowids/primary-keys that were found in the
+    ** where-clause loop above.
+    */
+    if( okOnePass ){
+      /* Just one row.  Hence the top-of-loop is a no-op */
+      assert( nKey==nPk );  /* OP_Found will use an unpacked key */
+      assert( !IsVirtual(pTab) );
+      if( aToOpen[iDataCur-iTabCur] ){
+        assert( pPk!=0 || pTab->pSelect!=0 );
+        sqlite3VdbeAddOp4Int(v, OP_NotFound, iDataCur, addrBypass, iKey, nKey);
+        VdbeCoverage(v);
+      }
+    }else if( pPk ){
+      addrLoop = sqlite3VdbeAddOp1(v, OP_Rewind, iEphCur); VdbeCoverage(v);
+      sqlite3VdbeAddOp2(v, OP_RowKey, iEphCur, iKey);
+      assert( nKey==0 );  /* OP_Found will use a composite key */
+    }else{
+      addrLoop = sqlite3VdbeAddOp3(v, OP_RowSetRead, iRowSet, 0, iKey);
+      VdbeCoverage(v);
+      assert( nKey==1 );
+    }  
+  
+    /* Delete the row */
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    if( IsVirtual(pTab) ){
+      const char *pVTab = (const char *)sqlite3GetVTable(db, pTab);
+      sqlite3VtabMakeWritable(pParse, pTab);
+      sqlite3VdbeAddOp4(v, OP_VUpdate, 0, 1, iKey, pVTab, P4_VTAB);
+      sqlite3VdbeChangeP5(v, OE_Abort);
+      sqlite3MayAbort(pParse);
+    }else
+#endif
+    {
+      int count = (pParse->nested==0);    /* True to count changes */
+      sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur,
+                               iKey, nKey, count, OE_Default, okOnePass);
+    }
+  
+    /* End of the loop over all rowids/primary-keys. */
+    if( okOnePass ){
+      sqlite3VdbeResolveLabel(v, addrBypass);
+    }else if( pPk ){
+      sqlite3VdbeAddOp2(v, OP_Next, iEphCur, addrLoop+1); VdbeCoverage(v);
+      sqlite3VdbeJumpHere(v, addrLoop);
+    }else{
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, addrLoop);
+      sqlite3VdbeJumpHere(v, addrLoop);
+    }     
+  
+    /* Close the cursors open on the table and its indexes. */
+    if( !isView && !IsVirtual(pTab) ){
+      if( !pPk ) sqlite3VdbeAddOp1(v, OP_Close, iDataCur);
+      for(i=0, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){
+        sqlite3VdbeAddOp1(v, OP_Close, iIdxCur + i);
+      }
+    }
+  } /* End non-truncate path */
+
+  /* Update the sqlite_sequence table by storing the content of the
+  ** maximum rowid counter values recorded while inserting into
+  ** autoincrement tables.
+  */
+  if( pParse->nested==0 && pParse->pTriggerTab==0 ){
+    sqlite3AutoincrementEnd(pParse);
+  }
+
+  /* Return the number of rows that were deleted. If this routine is 
+  ** generating code because of a call to sqlite3NestedParse(), do not
+  ** invoke the callback function.
+  */
+  if( (db->flags&SQLITE_CountRows) && !pParse->nested && !pParse->pTriggerTab ){
+    sqlite3VdbeAddOp2(v, OP_ResultRow, memCnt, 1);
+    sqlite3VdbeSetNumCols(v, 1);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "rows deleted", SQLITE_STATIC);
+  }
+
+delete_from_cleanup:
+  sqlite3AuthContextPop(&sContext);
+  sqlite3SrcListDelete(db, pTabList);
+  sqlite3ExprDelete(db, pWhere);
+  sqlite3DbFree(db, aToOpen);
+  return;
+}
+/* Make sure "isView" and other macros defined above are undefined. Otherwise
+** they may interfere with compilation of other functions in this file
+** (or in another file, if this file becomes part of the amalgamation).  */
+#ifdef isView
+ #undef isView
+#endif
+#ifdef pTrigger
+ #undef pTrigger
+#endif
+
+/*
+** This routine generates VDBE code that causes a single row of a
+** single table to be deleted.  Both the original table entry and
+** all indices are removed.
+**
+** Preconditions:
+**
+**   1.  iDataCur is an open cursor on the btree that is the canonical data
+**       store for the table.  (This will be either the table itself,
+**       in the case of a rowid table, or the PRIMARY KEY index in the case
+**       of a WITHOUT ROWID table.)
+**
+**   2.  Read/write cursors for all indices of pTab must be open as
+**       cursor number iIdxCur+i for the i-th index.
+**
+**   3.  The primary key for the row to be deleted must be stored in a
+**       sequence of nPk memory cells starting at iPk.  If nPk==0 that means
+**       that a search record formed from OP_MakeRecord is contained in the
+**       single memory location iPk.
+*/
+SQLITE_PRIVATE void sqlite3GenerateRowDelete(
+  Parse *pParse,     /* Parsing context */
+  Table *pTab,       /* Table containing the row to be deleted */
+  Trigger *pTrigger, /* List of triggers to (potentially) fire */
+  int iDataCur,      /* Cursor from which column data is extracted */
+  int iIdxCur,       /* First index cursor */
+  int iPk,           /* First memory cell containing the PRIMARY KEY */
+  i16 nPk,           /* Number of PRIMARY KEY memory cells */
+  u8 count,          /* If non-zero, increment the row change counter */
+  u8 onconf,         /* Default ON CONFLICT policy for triggers */
+  u8 bNoSeek         /* iDataCur is already pointing to the row to delete */
+){
+  Vdbe *v = pParse->pVdbe;        /* Vdbe */
+  int iOld = 0;                   /* First register in OLD.* array */
+  int iLabel;                     /* Label resolved to end of generated code */
+  u8 opSeek;                      /* Seek opcode */
+
+  /* Vdbe is guaranteed to have been allocated by this stage. */
+  assert( v );
+  VdbeModuleComment((v, "BEGIN: GenRowDel(%d,%d,%d,%d)",
+                         iDataCur, iIdxCur, iPk, (int)nPk));
+
+  /* Seek cursor iCur to the row to delete. If this row no longer exists 
+  ** (this can happen if a trigger program has already deleted it), do
+  ** not attempt to delete it or fire any DELETE triggers.  */
+  iLabel = sqlite3VdbeMakeLabel(v);
+  opSeek = HasRowid(pTab) ? OP_NotExists : OP_NotFound;
+  if( !bNoSeek ){
+    sqlite3VdbeAddOp4Int(v, opSeek, iDataCur, iLabel, iPk, nPk);
+    VdbeCoverageIf(v, opSeek==OP_NotExists);
+    VdbeCoverageIf(v, opSeek==OP_NotFound);
+  }
+ 
+  /* If there are any triggers to fire, allocate a range of registers to
+  ** use for the old.* references in the triggers.  */
+  if( sqlite3FkRequired(pParse, pTab, 0, 0) || pTrigger ){
+    u32 mask;                     /* Mask of OLD.* columns in use */
+    int iCol;                     /* Iterator used while populating OLD.* */
+    int addrStart;                /* Start of BEFORE trigger programs */
+
+    /* TODO: Could use temporary registers here. Also could attempt to
+    ** avoid copying the contents of the rowid register.  */
+    mask = sqlite3TriggerColmask(
+        pParse, pTrigger, 0, 0, TRIGGER_BEFORE|TRIGGER_AFTER, pTab, onconf
+    );
+    mask |= sqlite3FkOldmask(pParse, pTab);
+    iOld = pParse->nMem+1;
+    pParse->nMem += (1 + pTab->nCol);
+
+    /* Populate the OLD.* pseudo-table register array. These values will be 
+    ** used by any BEFORE and AFTER triggers that exist.  */
+    sqlite3VdbeAddOp2(v, OP_Copy, iPk, iOld);
+    for(iCol=0; iCol<pTab->nCol; iCol++){
+      testcase( mask!=0xffffffff && iCol==31 );
+      testcase( mask!=0xffffffff && iCol==32 );
+      if( mask==0xffffffff || (iCol<=31 && (mask & MASKBIT32(iCol))!=0) ){
+        sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, iCol, iOld+iCol+1);
+      }
+    }
+
+    /* Invoke BEFORE DELETE trigger programs. */
+    addrStart = sqlite3VdbeCurrentAddr(v);
+    sqlite3CodeRowTrigger(pParse, pTrigger, 
+        TK_DELETE, 0, TRIGGER_BEFORE, pTab, iOld, onconf, iLabel
+    );
+
+    /* If any BEFORE triggers were coded, then seek the cursor to the 
+    ** row to be deleted again. It may be that the BEFORE triggers moved
+    ** the cursor or of already deleted the row that the cursor was
+    ** pointing to.
+    */
+    if( addrStart<sqlite3VdbeCurrentAddr(v) ){
+      sqlite3VdbeAddOp4Int(v, opSeek, iDataCur, iLabel, iPk, nPk);
+      VdbeCoverageIf(v, opSeek==OP_NotExists);
+      VdbeCoverageIf(v, opSeek==OP_NotFound);
+    }
+
+    /* Do FK processing. This call checks that any FK constraints that
+    ** refer to this table (i.e. constraints attached to other tables) 
+    ** are not violated by deleting this row.  */
+    sqlite3FkCheck(pParse, pTab, iOld, 0, 0, 0);
+  }
+
+  /* Delete the index and table entries. Skip this step if pTab is really
+  ** a view (in which case the only effect of the DELETE statement is to
+  ** fire the INSTEAD OF triggers).  */ 
+  if( pTab->pSelect==0 ){
+    sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur, 0);
+    sqlite3VdbeAddOp2(v, OP_Delete, iDataCur, (count?OPFLAG_NCHANGE:0));
+    if( count ){
+      sqlite3VdbeChangeP4(v, -1, pTab->zName, P4_TRANSIENT);
+    }
+  }
+
+  /* Do any ON CASCADE, SET NULL or SET DEFAULT operations required to
+  ** handle rows (possibly in other tables) that refer via a foreign key
+  ** to the row just deleted. */ 
+  sqlite3FkActions(pParse, pTab, 0, iOld, 0, 0);
+
+  /* Invoke AFTER DELETE trigger programs. */
+  sqlite3CodeRowTrigger(pParse, pTrigger, 
+      TK_DELETE, 0, TRIGGER_AFTER, pTab, iOld, onconf, iLabel
+  );
+
+  /* Jump here if the row had already been deleted before any BEFORE
+  ** trigger programs were invoked. Or if a trigger program throws a 
+  ** RAISE(IGNORE) exception.  */
+  sqlite3VdbeResolveLabel(v, iLabel);
+  VdbeModuleComment((v, "END: GenRowDel()"));
+}
+
+/*
+** This routine generates VDBE code that causes the deletion of all
+** index entries associated with a single row of a single table, pTab
+**
+** Preconditions:
+**
+**   1.  A read/write cursor "iDataCur" must be open on the canonical storage
+**       btree for the table pTab.  (This will be either the table itself
+**       for rowid tables or to the primary key index for WITHOUT ROWID
+**       tables.)
+**
+**   2.  Read/write cursors for all indices of pTab must be open as
+**       cursor number iIdxCur+i for the i-th index.  (The pTab->pIndex
+**       index is the 0-th index.)
+**
+**   3.  The "iDataCur" cursor must be already be positioned on the row
+**       that is to be deleted.
+*/
+SQLITE_PRIVATE void sqlite3GenerateRowIndexDelete(
+  Parse *pParse,     /* Parsing and code generating context */
+  Table *pTab,       /* Table containing the row to be deleted */
+  int iDataCur,      /* Cursor of table holding data. */
+  int iIdxCur,       /* First index cursor */
+  int *aRegIdx       /* Only delete if aRegIdx!=0 && aRegIdx[i]>0 */
+){
+  int i;             /* Index loop counter */
+  int r1 = -1;       /* Register holding an index key */
+  int iPartIdxLabel; /* Jump destination for skipping partial index entries */
+  Index *pIdx;       /* Current index */
+  Index *pPrior = 0; /* Prior index */
+  Vdbe *v;           /* The prepared statement under construction */
+  Index *pPk;        /* PRIMARY KEY index, or NULL for rowid tables */
+
+  v = pParse->pVdbe;
+  pPk = HasRowid(pTab) ? 0 : sqlite3PrimaryKeyIndex(pTab);
+  for(i=0, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){
+    assert( iIdxCur+i!=iDataCur || pPk==pIdx );
+    if( aRegIdx!=0 && aRegIdx[i]==0 ) continue;
+    if( pIdx==pPk ) continue;
+    VdbeModuleComment((v, "GenRowIdxDel for %s", pIdx->zName));
+    r1 = sqlite3GenerateIndexKey(pParse, pIdx, iDataCur, 0, 1,
+                                 &iPartIdxLabel, pPrior, r1);
+    sqlite3VdbeAddOp3(v, OP_IdxDelete, iIdxCur+i, r1,
+                      pIdx->uniqNotNull ? pIdx->nKeyCol : pIdx->nColumn);
+    sqlite3ResolvePartIdxLabel(pParse, iPartIdxLabel);
+    pPrior = pIdx;
+  }
+}
+
+/*
+** Generate code that will assemble an index key and stores it in register
+** regOut.  The key with be for index pIdx which is an index on pTab.
+** iCur is the index of a cursor open on the pTab table and pointing to
+** the entry that needs indexing.  If pTab is a WITHOUT ROWID table, then
+** iCur must be the cursor of the PRIMARY KEY index.
+**
+** Return a register number which is the first in a block of
+** registers that holds the elements of the index key.  The
+** block of registers has already been deallocated by the time
+** this routine returns.
+**
+** If *piPartIdxLabel is not NULL, fill it in with a label and jump
+** to that label if pIdx is a partial index that should be skipped.
+** The label should be resolved using sqlite3ResolvePartIdxLabel().
+** A partial index should be skipped if its WHERE clause evaluates
+** to false or null.  If pIdx is not a partial index, *piPartIdxLabel
+** will be set to zero which is an empty label that is ignored by
+** sqlite3ResolvePartIdxLabel().
+**
+** The pPrior and regPrior parameters are used to implement a cache to
+** avoid unnecessary register loads.  If pPrior is not NULL, then it is
+** a pointer to a different index for which an index key has just been
+** computed into register regPrior.  If the current pIdx index is generating
+** its key into the same sequence of registers and if pPrior and pIdx share
+** a column in common, then the register corresponding to that column already
+** holds the correct value and the loading of that register is skipped.
+** This optimization is helpful when doing a DELETE or an INTEGRITY_CHECK 
+** on a table with multiple indices, and especially with the ROWID or
+** PRIMARY KEY columns of the index.
+*/
+SQLITE_PRIVATE int sqlite3GenerateIndexKey(
+  Parse *pParse,       /* Parsing context */
+  Index *pIdx,         /* The index for which to generate a key */
+  int iDataCur,        /* Cursor number from which to take column data */
+  int regOut,          /* Put the new key into this register if not 0 */
+  int prefixOnly,      /* Compute only a unique prefix of the key */
+  int *piPartIdxLabel, /* OUT: Jump to this label to skip partial index */
+  Index *pPrior,       /* Previously generated index key */
+  int regPrior         /* Register holding previous generated key */
+){
+  Vdbe *v = pParse->pVdbe;
+  int j;
+  Table *pTab = pIdx->pTable;
+  int regBase;
+  int nCol;
+
+  if( piPartIdxLabel ){
+    if( pIdx->pPartIdxWhere ){
+      *piPartIdxLabel = sqlite3VdbeMakeLabel(v);
+      pParse->iPartIdxTab = iDataCur;
+      sqlite3ExprCachePush(pParse);
+      sqlite3ExprIfFalse(pParse, pIdx->pPartIdxWhere, *piPartIdxLabel, 
+                         SQLITE_JUMPIFNULL);
+    }else{
+      *piPartIdxLabel = 0;
+    }
+  }
+  nCol = (prefixOnly && pIdx->uniqNotNull) ? pIdx->nKeyCol : pIdx->nColumn;
+  regBase = sqlite3GetTempRange(pParse, nCol);
+  if( pPrior && (regBase!=regPrior || pPrior->pPartIdxWhere) ) pPrior = 0;
+  for(j=0; j<nCol; j++){
+    if( pPrior && pPrior->aiColumn[j]==pIdx->aiColumn[j] ) continue;
+    sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, pIdx->aiColumn[j],
+                                    regBase+j);
+    /* If the column affinity is REAL but the number is an integer, then it
+    ** might be stored in the table as an integer (using a compact
+    ** representation) then converted to REAL by an OP_RealAffinity opcode.
+    ** But we are getting ready to store this value back into an index, where
+    ** it should be converted by to INTEGER again.  So omit the OP_RealAffinity
+    ** opcode if it is present */
+    sqlite3VdbeDeletePriorOpcode(v, OP_RealAffinity);
+  }
+  if( regOut ){
+    sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nCol, regOut);
+  }
+  sqlite3ReleaseTempRange(pParse, regBase, nCol);
+  return regBase;
+}
+
+/*
+** If a prior call to sqlite3GenerateIndexKey() generated a jump-over label
+** because it was a partial index, then this routine should be called to
+** resolve that label.
+*/
+SQLITE_PRIVATE void sqlite3ResolvePartIdxLabel(Parse *pParse, int iLabel){
+  if( iLabel ){
+    sqlite3VdbeResolveLabel(pParse->pVdbe, iLabel);
+    sqlite3ExprCachePop(pParse);
+  }
+}
+
+/************** End of delete.c **********************************************/
+/************** Begin file func.c ********************************************/
+/*
+** 2002 February 23
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the C-language implementations for many of the SQL
+** functions of SQLite.  (Some function, and in particular the date and
+** time functions, are implemented separately.)
+*/
+/* #include <stdlib.h> */
+/* #include <assert.h> */
+
+/*
+** Return the collating function associated with a function.
+*/
+static CollSeq *sqlite3GetFuncCollSeq(sqlite3_context *context){
+  VdbeOp *pOp = &context->pVdbe->aOp[context->iOp-1];
+  assert( pOp->opcode==OP_CollSeq );
+  assert( pOp->p4type==P4_COLLSEQ );
+  return pOp->p4.pColl;
+}
+
+/*
+** Indicate that the accumulator load should be skipped on this
+** iteration of the aggregate loop.
+*/
+static void sqlite3SkipAccumulatorLoad(sqlite3_context *context){
+  context->skipFlag = 1;
+}
+
+/*
+** Implementation of the non-aggregate min() and max() functions
+*/
+static void minmaxFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int i;
+  int mask;    /* 0 for min() or 0xffffffff for max() */
+  int iBest;
+  CollSeq *pColl;
+
+  assert( argc>1 );
+  mask = sqlite3_user_data(context)==0 ? 0 : -1;
+  pColl = sqlite3GetFuncCollSeq(context);
+  assert( pColl );
+  assert( mask==-1 || mask==0 );
+  iBest = 0;
+  if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
+  for(i=1; i<argc; i++){
+    if( sqlite3_value_type(argv[i])==SQLITE_NULL ) return;
+    if( (sqlite3MemCompare(argv[iBest], argv[i], pColl)^mask)>=0 ){
+      testcase( mask==0 );
+      iBest = i;
+    }
+  }
+  sqlite3_result_value(context, argv[iBest]);
+}
+
+/*
+** Return the type of the argument.
+*/
+static void typeofFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **argv
+){
+  const char *z = 0;
+  UNUSED_PARAMETER(NotUsed);
+  switch( sqlite3_value_type(argv[0]) ){
+    case SQLITE_INTEGER: z = "integer"; break;
+    case SQLITE_TEXT:    z = "text";    break;
+    case SQLITE_FLOAT:   z = "real";    break;
+    case SQLITE_BLOB:    z = "blob";    break;
+    default:             z = "null";    break;
+  }
+  sqlite3_result_text(context, z, -1, SQLITE_STATIC);
+}
+
+
+/*
+** Implementation of the length() function
+*/
+static void lengthFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int len;
+
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  switch( sqlite3_value_type(argv[0]) ){
+    case SQLITE_BLOB:
+    case SQLITE_INTEGER:
+    case SQLITE_FLOAT: {
+      sqlite3_result_int(context, sqlite3_value_bytes(argv[0]));
+      break;
+    }
+    case SQLITE_TEXT: {
+      const unsigned char *z = sqlite3_value_text(argv[0]);
+      if( z==0 ) return;
+      len = 0;
+      while( *z ){
+        len++;
+        SQLITE_SKIP_UTF8(z);
+      }
+      sqlite3_result_int(context, len);
+      break;
+    }
+    default: {
+      sqlite3_result_null(context);
+      break;
+    }
+  }
+}
+
+/*
+** Implementation of the abs() function.
+**
+** IMP: R-23979-26855 The abs(X) function returns the absolute value of
+** the numeric argument X. 
+*/
+static void absFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  switch( sqlite3_value_type(argv[0]) ){
+    case SQLITE_INTEGER: {
+      i64 iVal = sqlite3_value_int64(argv[0]);
+      if( iVal<0 ){
+        if( iVal==SMALLEST_INT64 ){
+          /* IMP: R-31676-45509 If X is the integer -9223372036854775808
+          ** then abs(X) throws an integer overflow error since there is no
+          ** equivalent positive 64-bit two complement value. */
+          sqlite3_result_error(context, "integer overflow", -1);
+          return;
+        }
+        iVal = -iVal;
+      } 
+      sqlite3_result_int64(context, iVal);
+      break;
+    }
+    case SQLITE_NULL: {
+      /* IMP: R-37434-19929 Abs(X) returns NULL if X is NULL. */
+      sqlite3_result_null(context);
+      break;
+    }
+    default: {
+      /* Because sqlite3_value_double() returns 0.0 if the argument is not
+      ** something that can be converted into a number, we have:
+      ** IMP: R-01992-00519 Abs(X) returns 0.0 if X is a string or blob
+      ** that cannot be converted to a numeric value.
+      */
+      double rVal = sqlite3_value_double(argv[0]);
+      if( rVal<0 ) rVal = -rVal;
+      sqlite3_result_double(context, rVal);
+      break;
+    }
+  }
+}
+
+/*
+** Implementation of the instr() function.
+**
+** instr(haystack,needle) finds the first occurrence of needle
+** in haystack and returns the number of previous characters plus 1,
+** or 0 if needle does not occur within haystack.
+**
+** If both haystack and needle are BLOBs, then the result is one more than
+** the number of bytes in haystack prior to the first occurrence of needle,
+** or 0 if needle never occurs in haystack.
+*/
+static void instrFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  const unsigned char *zHaystack;
+  const unsigned char *zNeedle;
+  int nHaystack;
+  int nNeedle;
+  int typeHaystack, typeNeedle;
+  int N = 1;
+  int isText;
+
+  UNUSED_PARAMETER(argc);
+  typeHaystack = sqlite3_value_type(argv[0]);
+  typeNeedle = sqlite3_value_type(argv[1]);
+  if( typeHaystack==SQLITE_NULL || typeNeedle==SQLITE_NULL ) return;
+  nHaystack = sqlite3_value_bytes(argv[0]);
+  nNeedle = sqlite3_value_bytes(argv[1]);
+  if( typeHaystack==SQLITE_BLOB && typeNeedle==SQLITE_BLOB ){
+    zHaystack = sqlite3_value_blob(argv[0]);
+    zNeedle = sqlite3_value_blob(argv[1]);
+    isText = 0;
+  }else{
+    zHaystack = sqlite3_value_text(argv[0]);
+    zNeedle = sqlite3_value_text(argv[1]);
+    isText = 1;
+  }
+  while( nNeedle<=nHaystack && memcmp(zHaystack, zNeedle, nNeedle)!=0 ){
+    N++;
+    do{
+      nHaystack--;
+      zHaystack++;
+    }while( isText && (zHaystack[0]&0xc0)==0x80 );
+  }
+  if( nNeedle>nHaystack ) N = 0;
+  sqlite3_result_int(context, N);
+}
+
+/*
+** Implementation of the printf() function.
+*/
+static void printfFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  PrintfArguments x;
+  StrAccum str;
+  const char *zFormat;
+  int n;
+
+  if( argc>=1 && (zFormat = (const char*)sqlite3_value_text(argv[0]))!=0 ){
+    x.nArg = argc-1;
+    x.nUsed = 0;
+    x.apArg = argv+1;
+    sqlite3StrAccumInit(&str, 0, 0, SQLITE_MAX_LENGTH);
+    str.db = sqlite3_context_db_handle(context);
+    sqlite3XPrintf(&str, SQLITE_PRINTF_SQLFUNC, zFormat, &x);
+    n = str.nChar;
+    sqlite3_result_text(context, sqlite3StrAccumFinish(&str), n,
+                        SQLITE_DYNAMIC);
+  }
+}
+
+/*
+** Implementation of the substr() function.
+**
+** substr(x,p1,p2)  returns p2 characters of x[] beginning with p1.
+** p1 is 1-indexed.  So substr(x,1,1) returns the first character
+** of x.  If x is text, then we actually count UTF-8 characters.
+** If x is a blob, then we count bytes.
+**
+** If p1 is negative, then we begin abs(p1) from the end of x[].
+**
+** If p2 is negative, return the p2 characters preceding p1.
+*/
+static void substrFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  const unsigned char *z;
+  const unsigned char *z2;
+  int len;
+  int p0type;
+  i64 p1, p2;
+  int negP2 = 0;
+
+  assert( argc==3 || argc==2 );
+  if( sqlite3_value_type(argv[1])==SQLITE_NULL
+   || (argc==3 && sqlite3_value_type(argv[2])==SQLITE_NULL)
+  ){
+    return;
+  }
+  p0type = sqlite3_value_type(argv[0]);
+  p1 = sqlite3_value_int(argv[1]);
+  if( p0type==SQLITE_BLOB ){
+    len = sqlite3_value_bytes(argv[0]);
+    z = sqlite3_value_blob(argv[0]);
+    if( z==0 ) return;
+    assert( len==sqlite3_value_bytes(argv[0]) );
+  }else{
+    z = sqlite3_value_text(argv[0]);
+    if( z==0 ) return;
+    len = 0;
+    if( p1<0 ){
+      for(z2=z; *z2; len++){
+        SQLITE_SKIP_UTF8(z2);
+      }
+    }
+  }
+  if( argc==3 ){
+    p2 = sqlite3_value_int(argv[2]);
+    if( p2<0 ){
+      p2 = -p2;
+      negP2 = 1;
+    }
+  }else{
+    p2 = sqlite3_context_db_handle(context)->aLimit[SQLITE_LIMIT_LENGTH];
+  }
+  if( p1<0 ){
+    p1 += len;
+    if( p1<0 ){
+      p2 += p1;
+      if( p2<0 ) p2 = 0;
+      p1 = 0;
+    }
+  }else if( p1>0 ){
+    p1--;
+  }else if( p2>0 ){
+    p2--;
+  }
+  if( negP2 ){
+    p1 -= p2;
+    if( p1<0 ){
+      p2 += p1;
+      p1 = 0;
+    }
+  }
+  assert( p1>=0 && p2>=0 );
+  if( p0type!=SQLITE_BLOB ){
+    while( *z && p1 ){
+      SQLITE_SKIP_UTF8(z);
+      p1--;
+    }
+    for(z2=z; *z2 && p2; p2--){
+      SQLITE_SKIP_UTF8(z2);
+    }
+    sqlite3_result_text64(context, (char*)z, z2-z, SQLITE_TRANSIENT,
+                          SQLITE_UTF8);
+  }else{
+    if( p1+p2>len ){
+      p2 = len-p1;
+      if( p2<0 ) p2 = 0;
+    }
+    sqlite3_result_blob64(context, (char*)&z[p1], (u64)p2, SQLITE_TRANSIENT);
+  }
+}
+
+/*
+** Implementation of the round() function
+*/
+#ifndef SQLITE_OMIT_FLOATING_POINT
+static void roundFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
+  int n = 0;
+  double r;
+  char *zBuf;
+  assert( argc==1 || argc==2 );
+  if( argc==2 ){
+    if( SQLITE_NULL==sqlite3_value_type(argv[1]) ) return;
+    n = sqlite3_value_int(argv[1]);
+    if( n>30 ) n = 30;
+    if( n<0 ) n = 0;
+  }
+  if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
+  r = sqlite3_value_double(argv[0]);
+  /* If Y==0 and X will fit in a 64-bit int,
+  ** handle the rounding directly,
+  ** otherwise use printf.
+  */
+  if( n==0 && r>=0 && r<LARGEST_INT64-1 ){
+    r = (double)((sqlite_int64)(r+0.5));
+  }else if( n==0 && r<0 && (-r)<LARGEST_INT64-1 ){
+    r = -(double)((sqlite_int64)((-r)+0.5));
+  }else{
+    zBuf = sqlite3_mprintf("%.*f",n,r);
+    if( zBuf==0 ){
+      sqlite3_result_error_nomem(context);
+      return;
+    }
+    sqlite3AtoF(zBuf, &r, sqlite3Strlen30(zBuf), SQLITE_UTF8);
+    sqlite3_free(zBuf);
+  }
+  sqlite3_result_double(context, r);
+}
+#endif
+
+/*
+** Allocate nByte bytes of space using sqlite3_malloc(). If the
+** allocation fails, call sqlite3_result_error_nomem() to notify
+** the database handle that malloc() has failed and return NULL.
+** If nByte is larger than the maximum string or blob length, then
+** raise an SQLITE_TOOBIG exception and return NULL.
+*/
+static void *contextMalloc(sqlite3_context *context, i64 nByte){
+  char *z;
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  assert( nByte>0 );
+  testcase( nByte==db->aLimit[SQLITE_LIMIT_LENGTH] );
+  testcase( nByte==db->aLimit[SQLITE_LIMIT_LENGTH]+1 );
+  if( nByte>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+    sqlite3_result_error_toobig(context);
+    z = 0;
+  }else{
+    z = sqlite3Malloc(nByte);
+    if( !z ){
+      sqlite3_result_error_nomem(context);
+    }
+  }
+  return z;
+}
+
+/*
+** Implementation of the upper() and lower() SQL functions.
+*/
+static void upperFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
+  char *z1;
+  const char *z2;
+  int i, n;
+  UNUSED_PARAMETER(argc);
+  z2 = (char*)sqlite3_value_text(argv[0]);
+  n = sqlite3_value_bytes(argv[0]);
+  /* Verify that the call to _bytes() does not invalidate the _text() pointer */
+  assert( z2==(char*)sqlite3_value_text(argv[0]) );
+  if( z2 ){
+    z1 = contextMalloc(context, ((i64)n)+1);
+    if( z1 ){
+      for(i=0; i<n; i++){
+        z1[i] = (char)sqlite3Toupper(z2[i]);
+      }
+      sqlite3_result_text(context, z1, n, sqlite3_free);
+    }
+  }
+}
+static void lowerFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
+  char *z1;
+  const char *z2;
+  int i, n;
+  UNUSED_PARAMETER(argc);
+  z2 = (char*)sqlite3_value_text(argv[0]);
+  n = sqlite3_value_bytes(argv[0]);
+  /* Verify that the call to _bytes() does not invalidate the _text() pointer */
+  assert( z2==(char*)sqlite3_value_text(argv[0]) );
+  if( z2 ){
+    z1 = contextMalloc(context, ((i64)n)+1);
+    if( z1 ){
+      for(i=0; i<n; i++){
+        z1[i] = sqlite3Tolower(z2[i]);
+      }
+      sqlite3_result_text(context, z1, n, sqlite3_free);
+    }
+  }
+}
+
+/*
+** Some functions like COALESCE() and IFNULL() and UNLIKELY() are implemented
+** as VDBE code so that unused argument values do not have to be computed.
+** However, we still need some kind of function implementation for this
+** routines in the function table.  The noopFunc macro provides this.
+** noopFunc will never be called so it doesn't matter what the implementation
+** is.  We might as well use the "version()" function as a substitute.
+*/
+#define noopFunc versionFunc   /* Substitute function - never called */
+
+/*
+** Implementation of random().  Return a random integer.  
+*/
+static void randomFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **NotUsed2
+){
+  sqlite_int64 r;
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  sqlite3_randomness(sizeof(r), &r);
+  if( r<0 ){
+    /* We need to prevent a random number of 0x8000000000000000 
+    ** (or -9223372036854775808) since when you do abs() of that
+    ** number of you get the same value back again.  To do this
+    ** in a way that is testable, mask the sign bit off of negative
+    ** values, resulting in a positive value.  Then take the 
+    ** 2s complement of that positive value.  The end result can
+    ** therefore be no less than -9223372036854775807.
+    */
+    r = -(r & LARGEST_INT64);
+  }
+  sqlite3_result_int64(context, r);
+}
+
+/*
+** Implementation of randomblob(N).  Return a random blob
+** that is N bytes long.
+*/
+static void randomBlob(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int n;
+  unsigned char *p;
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  n = sqlite3_value_int(argv[0]);
+  if( n<1 ){
+    n = 1;
+  }
+  p = contextMalloc(context, n);
+  if( p ){
+    sqlite3_randomness(n, p);
+    sqlite3_result_blob(context, (char*)p, n, sqlite3_free);
+  }
+}
+
+/*
+** Implementation of the last_insert_rowid() SQL function.  The return
+** value is the same as the sqlite3_last_insert_rowid() API function.
+*/
+static void last_insert_rowid(
+  sqlite3_context *context, 
+  int NotUsed, 
+  sqlite3_value **NotUsed2
+){
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  /* IMP: R-51513-12026 The last_insert_rowid() SQL function is a
+  ** wrapper around the sqlite3_last_insert_rowid() C/C++ interface
+  ** function. */
+  sqlite3_result_int64(context, sqlite3_last_insert_rowid(db));
+}
+
+/*
+** Implementation of the changes() SQL function.
+**
+** IMP: R-62073-11209 The changes() SQL function is a wrapper
+** around the sqlite3_changes() C/C++ function and hence follows the same
+** rules for counting changes.
+*/
+static void changes(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **NotUsed2
+){
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  sqlite3_result_int(context, sqlite3_changes(db));
+}
+
+/*
+** Implementation of the total_changes() SQL function.  The return value is
+** the same as the sqlite3_total_changes() API function.
+*/
+static void total_changes(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **NotUsed2
+){
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  /* IMP: R-52756-41993 This function is a wrapper around the
+  ** sqlite3_total_changes() C/C++ interface. */
+  sqlite3_result_int(context, sqlite3_total_changes(db));
+}
+
+/*
+** A structure defining how to do GLOB-style comparisons.
+*/
+struct compareInfo {
+  u8 matchAll;
+  u8 matchOne;
+  u8 matchSet;
+  u8 noCase;
+};
+
+/*
+** For LIKE and GLOB matching on EBCDIC machines, assume that every
+** character is exactly one byte in size.  Also, all characters are
+** able to participate in upper-case-to-lower-case mappings in EBCDIC
+** whereas only characters less than 0x80 do in ASCII.
+*/
+#if defined(SQLITE_EBCDIC)
+# define sqlite3Utf8Read(A)        (*((*A)++))
+# define GlobUpperToLower(A)       A = sqlite3UpperToLower[A]
+# define GlobUpperToLowerAscii(A)  A = sqlite3UpperToLower[A]
+#else
+# define GlobUpperToLower(A)       if( A<=0x7f ){ A = sqlite3UpperToLower[A]; }
+# define GlobUpperToLowerAscii(A)  A = sqlite3UpperToLower[A]
+#endif
+
+static const struct compareInfo globInfo = { '*', '?', '[', 0 };
+/* The correct SQL-92 behavior is for the LIKE operator to ignore
+** case.  Thus  'a' LIKE 'A' would be true. */
+static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
+/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
+** is case sensitive causing 'a' LIKE 'A' to be false */
+static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };
+
+/*
+** Compare two UTF-8 strings for equality where the first string can
+** potentially be a "glob" or "like" expression.  Return true (1) if they
+** are the same and false (0) if they are different.
+**
+** Globbing rules:
+**
+**      '*'       Matches any sequence of zero or more characters.
+**
+**      '?'       Matches exactly one character.
+**
+**     [...]      Matches one character from the enclosed list of
+**                characters.
+**
+**     [^...]     Matches one character not in the enclosed list.
+**
+** With the [...] and [^...] matching, a ']' character can be included
+** in the list by making it the first character after '[' or '^'.  A
+** range of characters can be specified using '-'.  Example:
+** "[a-z]" matches any single lower-case letter.  To match a '-', make
+** it the last character in the list.
+**
+** Like matching rules:
+** 
+**      '%'       Matches any sequence of zero or more characters
+**
+***     '_'       Matches any one character
+**
+**      Ec        Where E is the "esc" character and c is any other
+**                character, including '%', '_', and esc, match exactly c.
+**
+** The comments through this routine usually assume glob matching.
+**
+** This routine is usually quick, but can be N**2 in the worst case.
+*/
+static int patternCompare(
+  const u8 *zPattern,              /* The glob pattern */
+  const u8 *zString,               /* The string to compare against the glob */
+  const struct compareInfo *pInfo, /* Information about how to do the compare */
+  u32 esc                          /* The escape character */
+){
+  u32 c, c2;                       /* Next pattern and input string chars */
+  u32 matchOne = pInfo->matchOne;  /* "?" or "_" */
+  u32 matchAll = pInfo->matchAll;  /* "*" or "%" */
+  u32 matchOther;                  /* "[" or the escape character */
+  u8 noCase = pInfo->noCase;       /* True if uppercase==lowercase */
+  const u8 *zEscaped = 0;          /* One past the last escaped input char */
+  
+  /* The GLOB operator does not have an ESCAPE clause.  And LIKE does not
+  ** have the matchSet operator.  So we either have to look for one or
+  ** the other, never both.  Hence the single variable matchOther is used
+  ** to store the one we have to look for.
+  */
+  matchOther = esc ? esc : pInfo->matchSet;
+
+  while( (c = sqlite3Utf8Read(&zPattern))!=0 ){
+    if( c==matchAll ){  /* Match "*" */
+      /* Skip over multiple "*" characters in the pattern.  If there
+      ** are also "?" characters, skip those as well, but consume a
+      ** single character of the input string for each "?" skipped */
+      while( (c=sqlite3Utf8Read(&zPattern)) == matchAll
+               || c == matchOne ){
+        if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
+          return 0;
+        }
+      }
+      if( c==0 ){
+        return 1;   /* "*" at the end of the pattern matches */
+      }else if( c==matchOther ){
+        if( esc ){
+          c = sqlite3Utf8Read(&zPattern);
+          if( c==0 ) return 0;
+        }else{
+          /* "[...]" immediately follows the "*".  We have to do a slow
+          ** recursive search in this case, but it is an unusual case. */
+          assert( matchOther<0x80 );  /* '[' is a single-byte character */
+          while( *zString
+                 && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
+            SQLITE_SKIP_UTF8(zString);
+          }
+          return *zString!=0;
+        }
+      }
+
+      /* At this point variable c contains the first character of the
+      ** pattern string past the "*".  Search in the input string for the
+      ** first matching character and recursively contine the match from
+      ** that point.
+      **
+      ** For a case-insensitive search, set variable cx to be the same as
+      ** c but in the other case and search the input string for either
+      ** c or cx.
+      */
+      if( c<=0x80 ){
+        u32 cx;
+        if( noCase ){
+          cx = sqlite3Toupper(c);
+          c = sqlite3Tolower(c);
+        }else{
+          cx = c;
+        }
+        while( (c2 = *(zString++))!=0 ){
+          if( c2!=c && c2!=cx ) continue;
+          if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
+        }
+      }else{
+        while( (c2 = sqlite3Utf8Read(&zString))!=0 ){
+          if( c2!=c ) continue;
+          if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
+        }
+      }
+      return 0;
+    }
+    if( c==matchOther ){
+      if( esc ){
+        c = sqlite3Utf8Read(&zPattern);
+        if( c==0 ) return 0;
+        zEscaped = zPattern;
+      }else{
+        u32 prior_c = 0;
+        int seen = 0;
+        int invert = 0;
+        c = sqlite3Utf8Read(&zString);
+        if( c==0 ) return 0;
+        c2 = sqlite3Utf8Read(&zPattern);
+        if( c2=='^' ){
+          invert = 1;
+          c2 = sqlite3Utf8Read(&zPattern);
+        }
+        if( c2==']' ){
+          if( c==']' ) seen = 1;
+          c2 = sqlite3Utf8Read(&zPattern);
+        }
+        while( c2 && c2!=']' ){
+          if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){
+            c2 = sqlite3Utf8Read(&zPattern);
+            if( c>=prior_c && c<=c2 ) seen = 1;
+            prior_c = 0;
+          }else{
+            if( c==c2 ){
+              seen = 1;
+            }
+            prior_c = c2;
+          }
+          c2 = sqlite3Utf8Read(&zPattern);
+        }
+        if( c2==0 || (seen ^ invert)==0 ){
+          return 0;
+        }
+        continue;
+      }
+    }
+    c2 = sqlite3Utf8Read(&zString);
+    if( c==c2 ) continue;
+    if( noCase && c<0x80 && c2<0x80 && sqlite3Tolower(c)==sqlite3Tolower(c2) ){
+      continue;
+    }
+    if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue;
+    return 0;
+  }
+  return *zString==0;
+}
+
+/*
+** The sqlite3_strglob() interface.
+*/
+SQLITE_API int sqlite3_strglob(const char *zGlobPattern, const char *zString){
+  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, 0)==0;
+}
+
+/*
+** Count the number of times that the LIKE operator (or GLOB which is
+** just a variation of LIKE) gets called.  This is used for testing
+** only.
+*/
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_like_count = 0;
+#endif
+
+
+/*
+** Implementation of the like() SQL function.  This function implements
+** the build-in LIKE operator.  The first argument to the function is the
+** pattern and the second argument is the string.  So, the SQL statements:
+**
+**       A LIKE B
+**
+** is implemented as like(B,A).
+**
+** This same function (with a different compareInfo structure) computes
+** the GLOB operator.
+*/
+static void likeFunc(
+  sqlite3_context *context, 
+  int argc, 
+  sqlite3_value **argv
+){
+  const unsigned char *zA, *zB;
+  u32 escape = 0;
+  int nPat;
+  sqlite3 *db = sqlite3_context_db_handle(context);
+
+  zB = sqlite3_value_text(argv[0]);
+  zA = sqlite3_value_text(argv[1]);
+
+  /* Limit the length of the LIKE or GLOB pattern to avoid problems
+  ** of deep recursion and N*N behavior in patternCompare().
+  */
+  nPat = sqlite3_value_bytes(argv[0]);
+  testcase( nPat==db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH] );
+  testcase( nPat==db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]+1 );
+  if( nPat > db->aLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH] ){
+    sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
+    return;
+  }
+  assert( zB==sqlite3_value_text(argv[0]) );  /* Encoding did not change */
+
+  if( argc==3 ){
+    /* The escape character string must consist of a single UTF-8 character.
+    ** Otherwise, return an error.
+    */
+    const unsigned char *zEsc = sqlite3_value_text(argv[2]);
+    if( zEsc==0 ) return;
+    if( sqlite3Utf8CharLen((char*)zEsc, -1)!=1 ){
+      sqlite3_result_error(context, 
+          "ESCAPE expression must be a single character", -1);
+      return;
+    }
+    escape = sqlite3Utf8Read(&zEsc);
+  }
+  if( zA && zB ){
+    struct compareInfo *pInfo = sqlite3_user_data(context);
+#ifdef SQLITE_TEST
+    sqlite3_like_count++;
+#endif
+    
+    sqlite3_result_int(context, patternCompare(zB, zA, pInfo, escape));
+  }
+}
+
+/*
+** Implementation of the NULLIF(x,y) function.  The result is the first
+** argument if the arguments are different.  The result is NULL if the
+** arguments are equal to each other.
+*/
+static void nullifFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **argv
+){
+  CollSeq *pColl = sqlite3GetFuncCollSeq(context);
+  UNUSED_PARAMETER(NotUsed);
+  if( sqlite3MemCompare(argv[0], argv[1], pColl)!=0 ){
+    sqlite3_result_value(context, argv[0]);
+  }
+}
+
+/*
+** Implementation of the sqlite_version() function.  The result is the version
+** of the SQLite library that is running.
+*/
+static void versionFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **NotUsed2
+){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  /* IMP: R-48699-48617 This function is an SQL wrapper around the
+  ** sqlite3_libversion() C-interface. */
+  sqlite3_result_text(context, sqlite3_libversion(), -1, SQLITE_STATIC);
+}
+
+/*
+** Implementation of the sqlite_source_id() function. The result is a string
+** that identifies the particular version of the source code used to build
+** SQLite.
+*/
+static void sourceidFunc(
+  sqlite3_context *context,
+  int NotUsed,
+  sqlite3_value **NotUsed2
+){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  /* IMP: R-24470-31136 This function is an SQL wrapper around the
+  ** sqlite3_sourceid() C interface. */
+  sqlite3_result_text(context, sqlite3_sourceid(), -1, SQLITE_STATIC);
+}
+
+/*
+** Implementation of the sqlite_log() function.  This is a wrapper around
+** sqlite3_log().  The return value is NULL.  The function exists purely for
+** its side-effects.
+*/
+static void errlogFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  UNUSED_PARAMETER(argc);
+  UNUSED_PARAMETER(context);
+  sqlite3_log(sqlite3_value_int(argv[0]), "%s", sqlite3_value_text(argv[1]));
+}
+
+/*
+** Implementation of the sqlite_compileoption_used() function.
+** The result is an integer that identifies if the compiler option
+** was used to build SQLite.
+*/
+#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
+static void compileoptionusedFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  const char *zOptName;
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  /* IMP: R-39564-36305 The sqlite_compileoption_used() SQL
+  ** function is a wrapper around the sqlite3_compileoption_used() C/C++
+  ** function.
+  */
+  if( (zOptName = (const char*)sqlite3_value_text(argv[0]))!=0 ){
+    sqlite3_result_int(context, sqlite3_compileoption_used(zOptName));
+  }
+}
+#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */
+
+/*
+** Implementation of the sqlite_compileoption_get() function. 
+** The result is a string that identifies the compiler options 
+** used to build SQLite.
+*/
+#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
+static void compileoptiongetFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int n;
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  /* IMP: R-04922-24076 The sqlite_compileoption_get() SQL function
+  ** is a wrapper around the sqlite3_compileoption_get() C/C++ function.
+  */
+  n = sqlite3_value_int(argv[0]);
+  sqlite3_result_text(context, sqlite3_compileoption_get(n), -1, SQLITE_STATIC);
+}
+#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */
+
+/* Array for converting from half-bytes (nybbles) into ASCII hex
+** digits. */
+static const char hexdigits[] = {
+  '0', '1', '2', '3', '4', '5', '6', '7',
+  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' 
+};
+
+/*
+** Implementation of the QUOTE() function.  This function takes a single
+** argument.  If the argument is numeric, the return value is the same as
+** the argument.  If the argument is NULL, the return value is the string
+** "NULL".  Otherwise, the argument is enclosed in single quotes with
+** single-quote escapes.
+*/
+static void quoteFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  switch( sqlite3_value_type(argv[0]) ){
+    case SQLITE_FLOAT: {
+      double r1, r2;
+      char zBuf[50];
+      r1 = sqlite3_value_double(argv[0]);
+      sqlite3_snprintf(sizeof(zBuf), zBuf, "%!.15g", r1);
+      sqlite3AtoF(zBuf, &r2, 20, SQLITE_UTF8);
+      if( r1!=r2 ){
+        sqlite3_snprintf(sizeof(zBuf), zBuf, "%!.20e", r1);
+      }
+      sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+      break;
+    }
+    case SQLITE_INTEGER: {
+      sqlite3_result_value(context, argv[0]);
+      break;
+    }
+    case SQLITE_BLOB: {
+      char *zText = 0;
+      char const *zBlob = sqlite3_value_blob(argv[0]);
+      int nBlob = sqlite3_value_bytes(argv[0]);
+      assert( zBlob==sqlite3_value_blob(argv[0]) ); /* No encoding change */
+      zText = (char *)contextMalloc(context, (2*(i64)nBlob)+4); 
+      if( zText ){
+        int i;
+        for(i=0; i<nBlob; i++){
+          zText[(i*2)+2] = hexdigits[(zBlob[i]>>4)&0x0F];
+          zText[(i*2)+3] = hexdigits[(zBlob[i])&0x0F];
+        }
+        zText[(nBlob*2)+2] = '\'';
+        zText[(nBlob*2)+3] = '\0';
+        zText[0] = 'X';
+        zText[1] = '\'';
+        sqlite3_result_text(context, zText, -1, SQLITE_TRANSIENT);
+        sqlite3_free(zText);
+      }
+      break;
+    }
+    case SQLITE_TEXT: {
+      int i,j;
+      u64 n;
+      const unsigned char *zArg = sqlite3_value_text(argv[0]);
+      char *z;
+
+      if( zArg==0 ) return;
+      for(i=0, n=0; zArg[i]; i++){ if( zArg[i]=='\'' ) n++; }
+      z = contextMalloc(context, ((i64)i)+((i64)n)+3);
+      if( z ){
+        z[0] = '\'';
+        for(i=0, j=1; zArg[i]; i++){
+          z[j++] = zArg[i];
+          if( zArg[i]=='\'' ){
+            z[j++] = '\'';
+          }
+        }
+        z[j++] = '\'';
+        z[j] = 0;
+        sqlite3_result_text(context, z, j, sqlite3_free);
+      }
+      break;
+    }
+    default: {
+      assert( sqlite3_value_type(argv[0])==SQLITE_NULL );
+      sqlite3_result_text(context, "NULL", 4, SQLITE_STATIC);
+      break;
+    }
+  }
+}
+
+/*
+** The unicode() function.  Return the integer unicode code-point value
+** for the first character of the input string. 
+*/
+static void unicodeFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  const unsigned char *z = sqlite3_value_text(argv[0]);
+  (void)argc;
+  if( z && z[0] ) sqlite3_result_int(context, sqlite3Utf8Read(&z));
+}
+
+/*
+** The char() function takes zero or more arguments, each of which is
+** an integer.  It constructs a string where each character of the string
+** is the unicode character for the corresponding integer argument.
+*/
+static void charFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  unsigned char *z, *zOut;
+  int i;
+  zOut = z = sqlite3_malloc( argc*4+1 );
+  if( z==0 ){
+    sqlite3_result_error_nomem(context);
+    return;
+  }
+  for(i=0; i<argc; i++){
+    sqlite3_int64 x;
+    unsigned c;
+    x = sqlite3_value_int64(argv[i]);
+    if( x<0 || x>0x10ffff ) x = 0xfffd;
+    c = (unsigned)(x & 0x1fffff);
+    if( c<0x00080 ){
+      *zOut++ = (u8)(c&0xFF);
+    }else if( c<0x00800 ){
+      *zOut++ = 0xC0 + (u8)((c>>6)&0x1F);
+      *zOut++ = 0x80 + (u8)(c & 0x3F);
+    }else if( c<0x10000 ){
+      *zOut++ = 0xE0 + (u8)((c>>12)&0x0F);
+      *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);
+      *zOut++ = 0x80 + (u8)(c & 0x3F);
+    }else{
+      *zOut++ = 0xF0 + (u8)((c>>18) & 0x07);
+      *zOut++ = 0x80 + (u8)((c>>12) & 0x3F);
+      *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);
+      *zOut++ = 0x80 + (u8)(c & 0x3F);
+    }                                                    \
+  }
+  sqlite3_result_text64(context, (char*)z, zOut-z, sqlite3_free, SQLITE_UTF8);
+}
+
+/*
+** The hex() function.  Interpret the argument as a blob.  Return
+** a hexadecimal rendering as text.
+*/
+static void hexFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int i, n;
+  const unsigned char *pBlob;
+  char *zHex, *z;
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  pBlob = sqlite3_value_blob(argv[0]);
+  n = sqlite3_value_bytes(argv[0]);
+  assert( pBlob==sqlite3_value_blob(argv[0]) );  /* No encoding change */
+  z = zHex = contextMalloc(context, ((i64)n)*2 + 1);
+  if( zHex ){
+    for(i=0; i<n; i++, pBlob++){
+      unsigned char c = *pBlob;
+      *(z++) = hexdigits[(c>>4)&0xf];
+      *(z++) = hexdigits[c&0xf];
+    }
+    *z = 0;
+    sqlite3_result_text(context, zHex, n*2, sqlite3_free);
+  }
+}
+
+/*
+** The zeroblob(N) function returns a zero-filled blob of size N bytes.
+*/
+static void zeroblobFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  i64 n;
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  n = sqlite3_value_int64(argv[0]);
+  testcase( n==db->aLimit[SQLITE_LIMIT_LENGTH] );
+  testcase( n==db->aLimit[SQLITE_LIMIT_LENGTH]+1 );
+  if( n>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+    sqlite3_result_error_toobig(context);
+  }else{
+    sqlite3_result_zeroblob(context, (int)n); /* IMP: R-00293-64994 */
+  }
+}
+
+/*
+** The replace() function.  Three arguments are all strings: call
+** them A, B, and C. The result is also a string which is derived
+** from A by replacing every occurrence of B with C.  The match
+** must be exact.  Collating sequences are not used.
+*/
+static void replaceFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  const unsigned char *zStr;        /* The input string A */
+  const unsigned char *zPattern;    /* The pattern string B */
+  const unsigned char *zRep;        /* The replacement string C */
+  unsigned char *zOut;              /* The output */
+  int nStr;                /* Size of zStr */
+  int nPattern;            /* Size of zPattern */
+  int nRep;                /* Size of zRep */
+  i64 nOut;                /* Maximum size of zOut */
+  int loopLimit;           /* Last zStr[] that might match zPattern[] */
+  int i, j;                /* Loop counters */
+
+  assert( argc==3 );
+  UNUSED_PARAMETER(argc);
+  zStr = sqlite3_value_text(argv[0]);
+  if( zStr==0 ) return;
+  nStr = sqlite3_value_bytes(argv[0]);
+  assert( zStr==sqlite3_value_text(argv[0]) );  /* No encoding change */
+  zPattern = sqlite3_value_text(argv[1]);
+  if( zPattern==0 ){
+    assert( sqlite3_value_type(argv[1])==SQLITE_NULL
+            || sqlite3_context_db_handle(context)->mallocFailed );
+    return;
+  }
+  if( zPattern[0]==0 ){
+    assert( sqlite3_value_type(argv[1])!=SQLITE_NULL );
+    sqlite3_result_value(context, argv[0]);
+    return;
+  }
+  nPattern = sqlite3_value_bytes(argv[1]);
+  assert( zPattern==sqlite3_value_text(argv[1]) );  /* No encoding change */
+  zRep = sqlite3_value_text(argv[2]);
+  if( zRep==0 ) return;
+  nRep = sqlite3_value_bytes(argv[2]);
+  assert( zRep==sqlite3_value_text(argv[2]) );
+  nOut = nStr + 1;
+  assert( nOut<SQLITE_MAX_LENGTH );
+  zOut = contextMalloc(context, (i64)nOut);
+  if( zOut==0 ){
+    return;
+  }
+  loopLimit = nStr - nPattern;  
+  for(i=j=0; i<=loopLimit; i++){
+    if( zStr[i]!=zPattern[0] || memcmp(&zStr[i], zPattern, nPattern) ){
+      zOut[j++] = zStr[i];
+    }else{
+      u8 *zOld;
+      sqlite3 *db = sqlite3_context_db_handle(context);
+      nOut += nRep - nPattern;
+      testcase( nOut-1==db->aLimit[SQLITE_LIMIT_LENGTH] );
+      testcase( nOut-2==db->aLimit[SQLITE_LIMIT_LENGTH] );
+      if( nOut-1>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+        sqlite3_result_error_toobig(context);
+        sqlite3_free(zOut);
+        return;
+      }
+      zOld = zOut;
+      zOut = sqlite3_realloc(zOut, (int)nOut);
+      if( zOut==0 ){
+        sqlite3_result_error_nomem(context);
+        sqlite3_free(zOld);
+        return;
+      }
+      memcpy(&zOut[j], zRep, nRep);
+      j += nRep;
+      i += nPattern-1;
+    }
+  }
+  assert( j+nStr-i+1==nOut );
+  memcpy(&zOut[j], &zStr[i], nStr-i);
+  j += nStr - i;
+  assert( j<=nOut );
+  zOut[j] = 0;
+  sqlite3_result_text(context, (char*)zOut, j, sqlite3_free);
+}
+
+/*
+** Implementation of the TRIM(), LTRIM(), and RTRIM() functions.
+** The userdata is 0x1 for left trim, 0x2 for right trim, 0x3 for both.
+*/
+static void trimFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  const unsigned char *zIn;         /* Input string */
+  const unsigned char *zCharSet;    /* Set of characters to trim */
+  int nIn;                          /* Number of bytes in input */
+  int flags;                        /* 1: trimleft  2: trimright  3: trim */
+  int i;                            /* Loop counter */
+  unsigned char *aLen = 0;          /* Length of each character in zCharSet */
+  unsigned char **azChar = 0;       /* Individual characters in zCharSet */
+  int nChar;                        /* Number of characters in zCharSet */
+
+  if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
+    return;
+  }
+  zIn = sqlite3_value_text(argv[0]);
+  if( zIn==0 ) return;
+  nIn = sqlite3_value_bytes(argv[0]);
+  assert( zIn==sqlite3_value_text(argv[0]) );
+  if( argc==1 ){
+    static const unsigned char lenOne[] = { 1 };
+    static unsigned char * const azOne[] = { (u8*)" " };
+    nChar = 1;
+    aLen = (u8*)lenOne;
+    azChar = (unsigned char **)azOne;
+    zCharSet = 0;
+  }else if( (zCharSet = sqlite3_value_text(argv[1]))==0 ){
+    return;
+  }else{
+    const unsigned char *z;
+    for(z=zCharSet, nChar=0; *z; nChar++){
+      SQLITE_SKIP_UTF8(z);
+    }
+    if( nChar>0 ){
+      azChar = contextMalloc(context, ((i64)nChar)*(sizeof(char*)+1));
+      if( azChar==0 ){
+        return;
+      }
+      aLen = (unsigned char*)&azChar[nChar];
+      for(z=zCharSet, nChar=0; *z; nChar++){
+        azChar[nChar] = (unsigned char *)z;
+        SQLITE_SKIP_UTF8(z);
+        aLen[nChar] = (u8)(z - azChar[nChar]);
+      }
+    }
+  }
+  if( nChar>0 ){
+    flags = SQLITE_PTR_TO_INT(sqlite3_user_data(context));
+    if( flags & 1 ){
+      while( nIn>0 ){
+        int len = 0;
+        for(i=0; i<nChar; i++){
+          len = aLen[i];
+          if( len<=nIn && memcmp(zIn, azChar[i], len)==0 ) break;
+        }
+        if( i>=nChar ) break;
+        zIn += len;
+        nIn -= len;
+      }
+    }
+    if( flags & 2 ){
+      while( nIn>0 ){
+        int len = 0;
+        for(i=0; i<nChar; i++){
+          len = aLen[i];
+          if( len<=nIn && memcmp(&zIn[nIn-len],azChar[i],len)==0 ) break;
+        }
+        if( i>=nChar ) break;
+        nIn -= len;
+      }
+    }
+    if( zCharSet ){
+      sqlite3_free(azChar);
+    }
+  }
+  sqlite3_result_text(context, (char*)zIn, nIn, SQLITE_TRANSIENT);
+}
+
+
+/* IMP: R-25361-16150 This function is omitted from SQLite by default. It
+** is only available if the SQLITE_SOUNDEX compile-time option is used
+** when SQLite is built.
+*/
+#ifdef SQLITE_SOUNDEX
+/*
+** Compute the soundex encoding of a word.
+**
+** IMP: R-59782-00072 The soundex(X) function returns a string that is the
+** soundex encoding of the string X. 
+*/
+static void soundexFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  char zResult[8];
+  const u8 *zIn;
+  int i, j;
+  static const unsigned char iCode[] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0,
+    1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0,
+    0, 0, 1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0,
+    1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 0,
+  };
+  assert( argc==1 );
+  zIn = (u8*)sqlite3_value_text(argv[0]);
+  if( zIn==0 ) zIn = (u8*)"";
+  for(i=0; zIn[i] && !sqlite3Isalpha(zIn[i]); i++){}
+  if( zIn[i] ){
+    u8 prevcode = iCode[zIn[i]&0x7f];
+    zResult[0] = sqlite3Toupper(zIn[i]);
+    for(j=1; j<4 && zIn[i]; i++){
+      int code = iCode[zIn[i]&0x7f];
+      if( code>0 ){
+        if( code!=prevcode ){
+          prevcode = code;
+          zResult[j++] = code + '0';
+        }
+      }else{
+        prevcode = 0;
+      }
+    }
+    while( j<4 ){
+      zResult[j++] = '0';
+    }
+    zResult[j] = 0;
+    sqlite3_result_text(context, zResult, 4, SQLITE_TRANSIENT);
+  }else{
+    /* IMP: R-64894-50321 The string "?000" is returned if the argument
+    ** is NULL or contains no ASCII alphabetic characters. */
+    sqlite3_result_text(context, "?000", 4, SQLITE_STATIC);
+  }
+}
+#endif /* SQLITE_SOUNDEX */
+
+#ifndef SQLITE_OMIT_LOAD_EXTENSION
+/*
+** A function that loads a shared-library extension then returns NULL.
+*/
+static void loadExt(sqlite3_context *context, int argc, sqlite3_value **argv){
+  const char *zFile = (const char *)sqlite3_value_text(argv[0]);
+  const char *zProc;
+  sqlite3 *db = sqlite3_context_db_handle(context);
+  char *zErrMsg = 0;
+
+  if( argc==2 ){
+    zProc = (const char *)sqlite3_value_text(argv[1]);
+  }else{
+    zProc = 0;
+  }
+  if( zFile && sqlite3_load_extension(db, zFile, zProc, &zErrMsg) ){
+    sqlite3_result_error(context, zErrMsg, -1);
+    sqlite3_free(zErrMsg);
+  }
+}
+#endif
+
+
+/*
+** An instance of the following structure holds the context of a
+** sum() or avg() aggregate computation.
+*/
+typedef struct SumCtx SumCtx;
+struct SumCtx {
+  double rSum;      /* Floating point sum */
+  i64 iSum;         /* Integer sum */   
+  i64 cnt;          /* Number of elements summed */
+  u8 overflow;      /* True if integer overflow seen */
+  u8 approx;        /* True if non-integer value was input to the sum */
+};
+
+/*
+** Routines used to compute the sum, average, and total.
+**
+** The SUM() function follows the (broken) SQL standard which means
+** that it returns NULL if it sums over no inputs.  TOTAL returns
+** 0.0 in that case.  In addition, TOTAL always returns a float where
+** SUM might return an integer if it never encounters a floating point
+** value.  TOTAL never fails, but SUM might through an exception if
+** it overflows an integer.
+*/
+static void sumStep(sqlite3_context *context, int argc, sqlite3_value **argv){
+  SumCtx *p;
+  int type;
+  assert( argc==1 );
+  UNUSED_PARAMETER(argc);
+  p = sqlite3_aggregate_context(context, sizeof(*p));
+  type = sqlite3_value_numeric_type(argv[0]);
+  if( p && type!=SQLITE_NULL ){
+    p->cnt++;
+    if( type==SQLITE_INTEGER ){
+      i64 v = sqlite3_value_int64(argv[0]);
+      p->rSum += v;
+      if( (p->approx|p->overflow)==0 && sqlite3AddInt64(&p->iSum, v) ){
+        p->overflow = 1;
+      }
+    }else{
+      p->rSum += sqlite3_value_double(argv[0]);
+      p->approx = 1;
+    }
+  }
+}
+static void sumFinalize(sqlite3_context *context){
+  SumCtx *p;
+  p = sqlite3_aggregate_context(context, 0);
+  if( p && p->cnt>0 ){
+    if( p->overflow ){
+      sqlite3_result_error(context,"integer overflow",-1);
+    }else if( p->approx ){
+      sqlite3_result_double(context, p->rSum);
+    }else{
+      sqlite3_result_int64(context, p->iSum);
+    }
+  }
+}
+static void avgFinalize(sqlite3_context *context){
+  SumCtx *p;
+  p = sqlite3_aggregate_context(context, 0);
+  if( p && p->cnt>0 ){
+    sqlite3_result_double(context, p->rSum/(double)p->cnt);
+  }
+}
+static void totalFinalize(sqlite3_context *context){
+  SumCtx *p;
+  p = sqlite3_aggregate_context(context, 0);
+  /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */
+  sqlite3_result_double(context, p ? p->rSum : (double)0);
+}
+
+/*
+** The following structure keeps track of state information for the
+** count() aggregate function.
+*/
+typedef struct CountCtx CountCtx;
+struct CountCtx {
+  i64 n;
+};
+
+/*
+** Routines to implement the count() aggregate function.
+*/
+static void countStep(sqlite3_context *context, int argc, sqlite3_value **argv){
+  CountCtx *p;
+  p = sqlite3_aggregate_context(context, sizeof(*p));
+  if( (argc==0 || SQLITE_NULL!=sqlite3_value_type(argv[0])) && p ){
+    p->n++;
+  }
+
+#ifndef SQLITE_OMIT_DEPRECATED
+  /* The sqlite3_aggregate_count() function is deprecated.  But just to make
+  ** sure it still operates correctly, verify that its count agrees with our 
+  ** internal count when using count(*) and when the total count can be
+  ** expressed as a 32-bit integer. */
+  assert( argc==1 || p==0 || p->n>0x7fffffff
+          || p->n==sqlite3_aggregate_count(context) );
+#endif
+}   
+static void countFinalize(sqlite3_context *context){
+  CountCtx *p;
+  p = sqlite3_aggregate_context(context, 0);
+  sqlite3_result_int64(context, p ? p->n : 0);
+}
+
+/*
+** Routines to implement min() and max() aggregate functions.
+*/
+static void minmaxStep(
+  sqlite3_context *context, 
+  int NotUsed, 
+  sqlite3_value **argv
+){
+  Mem *pArg  = (Mem *)argv[0];
+  Mem *pBest;
+  UNUSED_PARAMETER(NotUsed);
+
+  pBest = (Mem *)sqlite3_aggregate_context(context, sizeof(*pBest));
+  if( !pBest ) return;
+
+  if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
+    if( pBest->flags ) sqlite3SkipAccumulatorLoad(context);
+  }else if( pBest->flags ){
+    int max;
+    int cmp;
+    CollSeq *pColl = sqlite3GetFuncCollSeq(context);
+    /* This step function is used for both the min() and max() aggregates,
+    ** the only difference between the two being that the sense of the
+    ** comparison is inverted. For the max() aggregate, the
+    ** sqlite3_user_data() function returns (void *)-1. For min() it
+    ** returns (void *)db, where db is the sqlite3* database pointer.
+    ** Therefore the next statement sets variable 'max' to 1 for the max()
+    ** aggregate, or 0 for min().
+    */
+    max = sqlite3_user_data(context)!=0;
+    cmp = sqlite3MemCompare(pBest, pArg, pColl);
+    if( (max && cmp<0) || (!max && cmp>0) ){
+      sqlite3VdbeMemCopy(pBest, pArg);
+    }else{
+      sqlite3SkipAccumulatorLoad(context);
+    }
+  }else{
+    pBest->db = sqlite3_context_db_handle(context);
+    sqlite3VdbeMemCopy(pBest, pArg);
+  }
+}
+static void minMaxFinalize(sqlite3_context *context){
+  sqlite3_value *pRes;
+  pRes = (sqlite3_value *)sqlite3_aggregate_context(context, 0);
+  if( pRes ){
+    if( pRes->flags ){
+      sqlite3_result_value(context, pRes);
+    }
+    sqlite3VdbeMemRelease(pRes);
+  }
+}
+
+/*
+** group_concat(EXPR, ?SEPARATOR?)
+*/
+static void groupConcatStep(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  const char *zVal;
+  StrAccum *pAccum;
+  const char *zSep;
+  int nVal, nSep;
+  assert( argc==1 || argc==2 );
+  if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
+  pAccum = (StrAccum*)sqlite3_aggregate_context(context, sizeof(*pAccum));
+
+  if( pAccum ){
+    sqlite3 *db = sqlite3_context_db_handle(context);
+    int firstTerm = pAccum->useMalloc==0;
+    pAccum->useMalloc = 2;
+    pAccum->mxAlloc = db->aLimit[SQLITE_LIMIT_LENGTH];
+    if( !firstTerm ){
+      if( argc==2 ){
+        zSep = (char*)sqlite3_value_text(argv[1]);
+        nSep = sqlite3_value_bytes(argv[1]);
+      }else{
+        zSep = ",";
+        nSep = 1;
+      }
+      if( nSep ) sqlite3StrAccumAppend(pAccum, zSep, nSep);
+    }
+    zVal = (char*)sqlite3_value_text(argv[0]);
+    nVal = sqlite3_value_bytes(argv[0]);
+    if( zVal ) sqlite3StrAccumAppend(pAccum, zVal, nVal);
+  }
+}
+static void groupConcatFinalize(sqlite3_context *context){
+  StrAccum *pAccum;
+  pAccum = sqlite3_aggregate_context(context, 0);
+  if( pAccum ){
+    if( pAccum->accError==STRACCUM_TOOBIG ){
+      sqlite3_result_error_toobig(context);
+    }else if( pAccum->accError==STRACCUM_NOMEM ){
+      sqlite3_result_error_nomem(context);
+    }else{    
+      sqlite3_result_text(context, sqlite3StrAccumFinish(pAccum), -1, 
+                          sqlite3_free);
+    }
+  }
+}
+
+/*
+** This routine does per-connection function registration.  Most
+** of the built-in functions above are part of the global function set.
+** This routine only deals with those that are not global.
+*/
+SQLITE_PRIVATE void sqlite3RegisterBuiltinFunctions(sqlite3 *db){
+  int rc = sqlite3_overload_function(db, "MATCH", 2);
+  assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
+  if( rc==SQLITE_NOMEM ){
+    db->mallocFailed = 1;
+  }
+}
+
+/*
+** Set the LIKEOPT flag on the 2-argument function with the given name.
+*/
+static void setLikeOptFlag(sqlite3 *db, const char *zName, u8 flagVal){
+  FuncDef *pDef;
+  pDef = sqlite3FindFunction(db, zName, sqlite3Strlen30(zName),
+                             2, SQLITE_UTF8, 0);
+  if( ALWAYS(pDef) ){
+    pDef->funcFlags |= flagVal;
+  }
+}
+
+/*
+** Register the built-in LIKE and GLOB functions.  The caseSensitive
+** parameter determines whether or not the LIKE operator is case
+** sensitive.  GLOB is always case sensitive.
+*/
+SQLITE_PRIVATE void sqlite3RegisterLikeFunctions(sqlite3 *db, int caseSensitive){
+  struct compareInfo *pInfo;
+  if( caseSensitive ){
+    pInfo = (struct compareInfo*)&likeInfoAlt;
+  }else{
+    pInfo = (struct compareInfo*)&likeInfoNorm;
+  }
+  sqlite3CreateFunc(db, "like", 2, SQLITE_UTF8, pInfo, likeFunc, 0, 0, 0);
+  sqlite3CreateFunc(db, "like", 3, SQLITE_UTF8, pInfo, likeFunc, 0, 0, 0);
+  sqlite3CreateFunc(db, "glob", 2, SQLITE_UTF8, 
+      (struct compareInfo*)&globInfo, likeFunc, 0, 0, 0);
+  setLikeOptFlag(db, "glob", SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE);
+  setLikeOptFlag(db, "like", 
+      caseSensitive ? (SQLITE_FUNC_LIKE | SQLITE_FUNC_CASE) : SQLITE_FUNC_LIKE);
+}
+
+/*
+** pExpr points to an expression which implements a function.  If
+** it is appropriate to apply the LIKE optimization to that function
+** then set aWc[0] through aWc[2] to the wildcard characters and
+** return TRUE.  If the function is not a LIKE-style function then
+** return FALSE.
+*/
+SQLITE_PRIVATE int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, int *pIsNocase, char *aWc){
+  FuncDef *pDef;
+  if( pExpr->op!=TK_FUNCTION 
+   || !pExpr->x.pList 
+   || pExpr->x.pList->nExpr!=2
+  ){
+    return 0;
+  }
+  assert( !ExprHasProperty(pExpr, EP_xIsSelect) );
+  pDef = sqlite3FindFunction(db, pExpr->u.zToken, 
+                             sqlite3Strlen30(pExpr->u.zToken),
+                             2, SQLITE_UTF8, 0);
+  if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_FUNC_LIKE)==0 ){
+    return 0;
+  }
+
+  /* The memcpy() statement assumes that the wildcard characters are
+  ** the first three statements in the compareInfo structure.  The
+  ** asserts() that follow verify that assumption
+  */
+  memcpy(aWc, pDef->pUserData, 3);
+  assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );
+  assert( &((char*)&likeInfoAlt)[1] == (char*)&likeInfoAlt.matchOne );
+  assert( &((char*)&likeInfoAlt)[2] == (char*)&likeInfoAlt.matchSet );
+  *pIsNocase = (pDef->funcFlags & SQLITE_FUNC_CASE)==0;
+  return 1;
+}
+
+/*
+** All of the FuncDef structures in the aBuiltinFunc[] array above
+** to the global function hash table.  This occurs at start-time (as
+** a consequence of calling sqlite3_initialize()).
+**
+** After this routine runs
+*/
+SQLITE_PRIVATE void sqlite3RegisterGlobalFunctions(void){
+  /*
+  ** The following array holds FuncDef structures for all of the functions
+  ** defined in this file.
+  **
+  ** The array cannot be constant since changes are made to the
+  ** FuncDef.pHash elements at start-time.  The elements of this array
+  ** are read-only after initialization is complete.
+  */
+  static SQLITE_WSD FuncDef aBuiltinFunc[] = {
+    FUNCTION(ltrim,              1, 1, 0, trimFunc         ),
+    FUNCTION(ltrim,              2, 1, 0, trimFunc         ),
+    FUNCTION(rtrim,              1, 2, 0, trimFunc         ),
+    FUNCTION(rtrim,              2, 2, 0, trimFunc         ),
+    FUNCTION(trim,               1, 3, 0, trimFunc         ),
+    FUNCTION(trim,               2, 3, 0, trimFunc         ),
+    FUNCTION(min,               -1, 0, 1, minmaxFunc       ),
+    FUNCTION(min,                0, 0, 1, 0                ),
+    AGGREGATE2(min,              1, 0, 1, minmaxStep,      minMaxFinalize,
+                                          SQLITE_FUNC_MINMAX ),
+    FUNCTION(max,               -1, 1, 1, minmaxFunc       ),
+    FUNCTION(max,                0, 1, 1, 0                ),
+    AGGREGATE2(max,              1, 1, 1, minmaxStep,      minMaxFinalize,
+                                          SQLITE_FUNC_MINMAX ),
+    FUNCTION2(typeof,            1, 0, 0, typeofFunc,  SQLITE_FUNC_TYPEOF),
+    FUNCTION2(length,            1, 0, 0, lengthFunc,  SQLITE_FUNC_LENGTH),
+    FUNCTION(instr,              2, 0, 0, instrFunc        ),
+    FUNCTION(substr,             2, 0, 0, substrFunc       ),
+    FUNCTION(substr,             3, 0, 0, substrFunc       ),
+    FUNCTION(printf,            -1, 0, 0, printfFunc       ),
+    FUNCTION(unicode,            1, 0, 0, unicodeFunc      ),
+    FUNCTION(char,              -1, 0, 0, charFunc         ),
+    FUNCTION(abs,                1, 0, 0, absFunc          ),
+#ifndef SQLITE_OMIT_FLOATING_POINT
+    FUNCTION(round,              1, 0, 0, roundFunc        ),
+    FUNCTION(round,              2, 0, 0, roundFunc        ),
+#endif
+    FUNCTION(upper,              1, 0, 0, upperFunc        ),
+    FUNCTION(lower,              1, 0, 0, lowerFunc        ),
+    FUNCTION(coalesce,           1, 0, 0, 0                ),
+    FUNCTION(coalesce,           0, 0, 0, 0                ),
+    FUNCTION2(coalesce,         -1, 0, 0, noopFunc,  SQLITE_FUNC_COALESCE),
+    FUNCTION(hex,                1, 0, 0, hexFunc          ),
+    FUNCTION2(ifnull,            2, 0, 0, noopFunc,  SQLITE_FUNC_COALESCE),
+    FUNCTION2(unlikely,          1, 0, 0, noopFunc,  SQLITE_FUNC_UNLIKELY),
+    FUNCTION2(likelihood,        2, 0, 0, noopFunc,  SQLITE_FUNC_UNLIKELY),
+    FUNCTION2(likely,            1, 0, 0, noopFunc,  SQLITE_FUNC_UNLIKELY),
+    VFUNCTION(random,            0, 0, 0, randomFunc       ),
+    VFUNCTION(randomblob,        1, 0, 0, randomBlob       ),
+    FUNCTION(nullif,             2, 0, 1, nullifFunc       ),
+    FUNCTION(sqlite_version,     0, 0, 0, versionFunc      ),
+    FUNCTION(sqlite_source_id,   0, 0, 0, sourceidFunc     ),
+    FUNCTION(sqlite_log,         2, 0, 0, errlogFunc       ),
+#if SQLITE_USER_AUTHENTICATION
+    FUNCTION(sqlite_crypt,       2, 0, 0, sqlite3CryptFunc ),
+#endif
+#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
+    FUNCTION(sqlite_compileoption_used,1, 0, 0, compileoptionusedFunc  ),
+    FUNCTION(sqlite_compileoption_get, 1, 0, 0, compileoptiongetFunc  ),
+#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */
+    FUNCTION(quote,              1, 0, 0, quoteFunc        ),
+    VFUNCTION(last_insert_rowid, 0, 0, 0, last_insert_rowid),
+    VFUNCTION(changes,           0, 0, 0, changes          ),
+    VFUNCTION(total_changes,     0, 0, 0, total_changes    ),
+    FUNCTION(replace,            3, 0, 0, replaceFunc      ),
+    FUNCTION(zeroblob,           1, 0, 0, zeroblobFunc     ),
+  #ifdef SQLITE_SOUNDEX
+    FUNCTION(soundex,            1, 0, 0, soundexFunc      ),
+  #endif
+  #ifndef SQLITE_OMIT_LOAD_EXTENSION
+    FUNCTION(load_extension,     1, 0, 0, loadExt          ),
+    FUNCTION(load_extension,     2, 0, 0, loadExt          ),
+  #endif
+    AGGREGATE(sum,               1, 0, 0, sumStep,         sumFinalize    ),
+    AGGREGATE(total,             1, 0, 0, sumStep,         totalFinalize    ),
+    AGGREGATE(avg,               1, 0, 0, sumStep,         avgFinalize    ),
+    AGGREGATE2(count,            0, 0, 0, countStep,       countFinalize,
+               SQLITE_FUNC_COUNT  ),
+    AGGREGATE(count,             1, 0, 0, countStep,       countFinalize  ),
+    AGGREGATE(group_concat,      1, 0, 0, groupConcatStep, groupConcatFinalize),
+    AGGREGATE(group_concat,      2, 0, 0, groupConcatStep, groupConcatFinalize),
+  
+    LIKEFUNC(glob, 2, &globInfo, SQLITE_FUNC_LIKE|SQLITE_FUNC_CASE),
+  #ifdef SQLITE_CASE_SENSITIVE_LIKE
+    LIKEFUNC(like, 2, &likeInfoAlt, SQLITE_FUNC_LIKE|SQLITE_FUNC_CASE),
+    LIKEFUNC(like, 3, &likeInfoAlt, SQLITE_FUNC_LIKE|SQLITE_FUNC_CASE),
+  #else
+    LIKEFUNC(like, 2, &likeInfoNorm, SQLITE_FUNC_LIKE),
+    LIKEFUNC(like, 3, &likeInfoNorm, SQLITE_FUNC_LIKE),
+  #endif
+  };
+
+  int i;
+  FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions);
+  FuncDef *aFunc = (FuncDef*)&GLOBAL(FuncDef, aBuiltinFunc);
+
+  for(i=0; i<ArraySize(aBuiltinFunc); i++){
+    sqlite3FuncDefInsert(pHash, &aFunc[i]);
+  }
+  sqlite3RegisterDateTimeFunctions();
+#ifndef SQLITE_OMIT_ALTERTABLE
+  sqlite3AlterFunctions();
+#endif
+#if defined(SQLITE_ENABLE_STAT3) || defined(SQLITE_ENABLE_STAT4)
+  sqlite3AnalyzeFunctions();
+#endif
+}
+
+/************** End of func.c ************************************************/
+/************** Begin file fkey.c ********************************************/
+/*
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code used by the compiler to add foreign key
+** support to compiled SQL statements.
+*/
+
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+#ifndef SQLITE_OMIT_TRIGGER
+
+/*
+** Deferred and Immediate FKs
+** --------------------------
+**
+** Foreign keys in SQLite come in two flavours: deferred and immediate.
+** If an immediate foreign key constraint is violated,
+** SQLITE_CONSTRAINT_FOREIGNKEY is returned and the current
+** statement transaction rolled back. If a 
+** deferred foreign key constraint is violated, no action is taken 
+** immediately. However if the application attempts to commit the 
+** transaction before fixing the constraint violation, the attempt fails.
+**
+** Deferred constraints are implemented using a simple counter associated
+** with the database handle. The counter is set to zero each time a 
+** database transaction is opened. Each time a statement is executed 
+** that causes a foreign key violation, the counter is incremented. Each
+** time a statement is executed that removes an existing violation from
+** the database, the counter is decremented. When the transaction is
+** committed, the commit fails if the current value of the counter is
+** greater than zero. This scheme has two big drawbacks:
+**
+**   * When a commit fails due to a deferred foreign key constraint, 
+**     there is no way to tell which foreign constraint is not satisfied,
+**     or which row it is not satisfied for.
+**
+**   * If the database contains foreign key violations when the 
+**     transaction is opened, this may cause the mechanism to malfunction.
+**
+** Despite these problems, this approach is adopted as it seems simpler
+** than the alternatives.
+**
+** INSERT operations:
+**
+**   I.1) For each FK for which the table is the child table, search
+**        the parent table for a match. If none is found increment the
+**        constraint counter.
+**
+**   I.2) For each FK for which the table is the parent table, 
+**        search the child table for rows that correspond to the new
+**        row in the parent table. Decrement the counter for each row
+**        found (as the constraint is now satisfied).
+**
+** DELETE operations:
+**
+**   D.1) For each FK for which the table is the child table, 
+**        search the parent table for a row that corresponds to the 
+**        deleted row in the child table. If such a row is not found, 
+**        decrement the counter.
+**
+**   D.2) For each FK for which the table is the parent table, search 
+**        the child table for rows that correspond to the deleted row 
+**        in the parent table. For each found increment the counter.
+**
+** UPDATE operations:
+**
+**   An UPDATE command requires that all 4 steps above are taken, but only
+**   for FK constraints for which the affected columns are actually 
+**   modified (values must be compared at runtime).
+**
+** Note that I.1 and D.1 are very similar operations, as are I.2 and D.2.
+** This simplifies the implementation a bit.
+**
+** For the purposes of immediate FK constraints, the OR REPLACE conflict
+** resolution is considered to delete rows before the new row is inserted.
+** If a delete caused by OR REPLACE violates an FK constraint, an exception
+** is thrown, even if the FK constraint would be satisfied after the new 
+** row is inserted.
+**
+** Immediate constraints are usually handled similarly. The only difference 
+** is that the counter used is stored as part of each individual statement
+** object (struct Vdbe). If, after the statement has run, its immediate
+** constraint counter is greater than zero,
+** it returns SQLITE_CONSTRAINT_FOREIGNKEY
+** and the statement transaction is rolled back. An exception is an INSERT
+** statement that inserts a single row only (no triggers). In this case,
+** instead of using a counter, an exception is thrown immediately if the
+** INSERT violates a foreign key constraint. This is necessary as such
+** an INSERT does not open a statement transaction.
+**
+** TODO: How should dropping a table be handled? How should renaming a 
+** table be handled?
+**
+**
+** Query API Notes
+** ---------------
+**
+** Before coding an UPDATE or DELETE row operation, the code-generator
+** for those two operations needs to know whether or not the operation
+** requires any FK processing and, if so, which columns of the original
+** row are required by the FK processing VDBE code (i.e. if FKs were
+** implemented using triggers, which of the old.* columns would be 
+** accessed). No information is required by the code-generator before
+** coding an INSERT operation. The functions used by the UPDATE/DELETE
+** generation code to query for this information are:
+**
+**   sqlite3FkRequired() - Test to see if FK processing is required.
+**   sqlite3FkOldmask()  - Query for the set of required old.* columns.
+**
+**
+** Externally accessible module functions
+** --------------------------------------
+**
+**   sqlite3FkCheck()    - Check for foreign key violations.
+**   sqlite3FkActions()  - Code triggers for ON UPDATE/ON DELETE actions.
+**   sqlite3FkDelete()   - Delete an FKey structure.
+*/
+
+/*
+** VDBE Calling Convention
+** -----------------------
+**
+** Example:
+**
+**   For the following INSERT statement:
+**
+**     CREATE TABLE t1(a, b INTEGER PRIMARY KEY, c);
+**     INSERT INTO t1 VALUES(1, 2, 3.1);
+**
+**   Register (x):        2    (type integer)
+**   Register (x+1):      1    (type integer)
+**   Register (x+2):      NULL (type NULL)
+**   Register (x+3):      3.1  (type real)
+*/
+
+/*
+** A foreign key constraint requires that the key columns in the parent
+** table are collectively subject to a UNIQUE or PRIMARY KEY constraint.
+** Given that pParent is the parent table for foreign key constraint pFKey, 
+** search the schema for a unique index on the parent key columns. 
+**
+** If successful, zero is returned. If the parent key is an INTEGER PRIMARY 
+** KEY column, then output variable *ppIdx is set to NULL. Otherwise, *ppIdx 
+** is set to point to the unique index. 
+** 
+** If the parent key consists of a single column (the foreign key constraint
+** is not a composite foreign key), output variable *paiCol is set to NULL.
+** Otherwise, it is set to point to an allocated array of size N, where
+** N is the number of columns in the parent key. The first element of the
+** array is the index of the child table column that is mapped by the FK
+** constraint to the parent table column stored in the left-most column
+** of index *ppIdx. The second element of the array is the index of the
+** child table column that corresponds to the second left-most column of
+** *ppIdx, and so on.
+**
+** If the required index cannot be found, either because:
+**
+**   1) The named parent key columns do not exist, or
+**
+**   2) The named parent key columns do exist, but are not subject to a
+**      UNIQUE or PRIMARY KEY constraint, or
+**
+**   3) No parent key columns were provided explicitly as part of the
+**      foreign key definition, and the parent table does not have a
+**      PRIMARY KEY, or
+**
+**   4) No parent key columns were provided explicitly as part of the
+**      foreign key definition, and the PRIMARY KEY of the parent table 
+**      consists of a different number of columns to the child key in 
+**      the child table.
+**
+** then non-zero is returned, and a "foreign key mismatch" error loaded
+** into pParse. If an OOM error occurs, non-zero is returned and the
+** pParse->db->mallocFailed flag is set.
+*/
+SQLITE_PRIVATE int sqlite3FkLocateIndex(
+  Parse *pParse,                  /* Parse context to store any error in */
+  Table *pParent,                 /* Parent table of FK constraint pFKey */
+  FKey *pFKey,                    /* Foreign key to find index for */
+  Index **ppIdx,                  /* OUT: Unique index on parent table */
+  int **paiCol                    /* OUT: Map of index columns in pFKey */
+){
+  Index *pIdx = 0;                    /* Value to return via *ppIdx */
+  int *aiCol = 0;                     /* Value to return via *paiCol */
+  int nCol = pFKey->nCol;             /* Number of columns in parent key */
+  char *zKey = pFKey->aCol[0].zCol;   /* Name of left-most parent key column */
+
+  /* The caller is responsible for zeroing output parameters. */
+  assert( ppIdx && *ppIdx==0 );
+  assert( !paiCol || *paiCol==0 );
+  assert( pParse );
+
+  /* If this is a non-composite (single column) foreign key, check if it 
+  ** maps to the INTEGER PRIMARY KEY of table pParent. If so, leave *ppIdx 
+  ** and *paiCol set to zero and return early. 
+  **
+  ** Otherwise, for a composite foreign key (more than one column), allocate
+  ** space for the aiCol array (returned via output parameter *paiCol).
+  ** Non-composite foreign keys do not require the aiCol array.
+  */
+  if( nCol==1 ){
+    /* The FK maps to the IPK if any of the following are true:
+    **
+    **   1) There is an INTEGER PRIMARY KEY column and the FK is implicitly 
+    **      mapped to the primary key of table pParent, or
+    **   2) The FK is explicitly mapped to a column declared as INTEGER
+    **      PRIMARY KEY.
+    */
+    if( pParent->iPKey>=0 ){
+      if( !zKey ) return 0;
+      if( !sqlite3StrICmp(pParent->aCol[pParent->iPKey].zName, zKey) ) return 0;
+    }
+  }else if( paiCol ){
+    assert( nCol>1 );
+    aiCol = (int *)sqlite3DbMallocRaw(pParse->db, nCol*sizeof(int));
+    if( !aiCol ) return 1;
+    *paiCol = aiCol;
+  }
+
+  for(pIdx=pParent->pIndex; pIdx; pIdx=pIdx->pNext){
+    if( pIdx->nKeyCol==nCol && IsUniqueIndex(pIdx) ){ 
+      /* pIdx is a UNIQUE index (or a PRIMARY KEY) and has the right number
+      ** of columns. If each indexed column corresponds to a foreign key
+      ** column of pFKey, then this index is a winner.  */
+
+      if( zKey==0 ){
+        /* If zKey is NULL, then this foreign key is implicitly mapped to 
+        ** the PRIMARY KEY of table pParent. The PRIMARY KEY index may be 
+        ** identified by the test.  */
+        if( IsPrimaryKeyIndex(pIdx) ){
+          if( aiCol ){
+            int i;
+            for(i=0; i<nCol; i++) aiCol[i] = pFKey->aCol[i].iFrom;
+          }
+          break;
+        }
+      }else{
+        /* If zKey is non-NULL, then this foreign key was declared to
+        ** map to an explicit list of columns in table pParent. Check if this
+        ** index matches those columns. Also, check that the index uses
+        ** the default collation sequences for each column. */
+        int i, j;
+        for(i=0; i<nCol; i++){
+          i16 iCol = pIdx->aiColumn[i];     /* Index of column in parent tbl */
+          char *zDfltColl;                  /* Def. collation for column */
+          char *zIdxCol;                    /* Name of indexed column */
+
+          /* If the index uses a collation sequence that is different from
+          ** the default collation sequence for the column, this index is
+          ** unusable. Bail out early in this case.  */
+          zDfltColl = pParent->aCol[iCol].zColl;
+          if( !zDfltColl ){
+            zDfltColl = "BINARY";
+          }
+          if( sqlite3StrICmp(pIdx->azColl[i], zDfltColl) ) break;
+
+          zIdxCol = pParent->aCol[iCol].zName;
+          for(j=0; j<nCol; j++){
+            if( sqlite3StrICmp(pFKey->aCol[j].zCol, zIdxCol)==0 ){
+              if( aiCol ) aiCol[i] = pFKey->aCol[j].iFrom;
+              break;
+            }
+          }
+          if( j==nCol ) break;
+        }
+        if( i==nCol ) break;      /* pIdx is usable */
+      }
+    }
+  }
+
+  if( !pIdx ){
+    if( !pParse->disableTriggers ){
+      sqlite3ErrorMsg(pParse,
+           "foreign key mismatch - \"%w\" referencing \"%w\"",
+           pFKey->pFrom->zName, pFKey->zTo);
+    }
+    sqlite3DbFree(pParse->db, aiCol);
+    return 1;
+  }
+
+  *ppIdx = pIdx;
+  return 0;
+}
+
+/*
+** This function is called when a row is inserted into or deleted from the 
+** child table of foreign key constraint pFKey. If an SQL UPDATE is executed 
+** on the child table of pFKey, this function is invoked twice for each row
+** affected - once to "delete" the old row, and then again to "insert" the
+** new row.
+**
+** Each time it is called, this function generates VDBE code to locate the
+** row in the parent table that corresponds to the row being inserted into 
+** or deleted from the child table. If the parent row can be found, no 
+** special action is taken. Otherwise, if the parent row can *not* be
+** found in the parent table:
+**
+**   Operation | FK type   | Action taken
+**   --------------------------------------------------------------------------
+**   INSERT      immediate   Increment the "immediate constraint counter".
+**
+**   DELETE      immediate   Decrement the "immediate constraint counter".
+**
+**   INSERT      deferred    Increment the "deferred constraint counter".
+**
+**   DELETE      deferred    Decrement the "deferred constraint counter".
+**
+** These operations are identified in the comment at the top of this file 
+** (fkey.c) as "I.1" and "D.1".
+*/
+static void fkLookupParent(
+  Parse *pParse,        /* Parse context */
+  int iDb,              /* Index of database housing pTab */
+  Table *pTab,          /* Parent table of FK pFKey */
+  Index *pIdx,          /* Unique index on parent key columns in pTab */
+  FKey *pFKey,          /* Foreign key constraint */
+  int *aiCol,           /* Map from parent key columns to child table columns */
+  int regData,          /* Address of array containing child table row */
+  int nIncr,            /* Increment constraint counter by this */
+  int isIgnore          /* If true, pretend pTab contains all NULL values */
+){
+  int i;                                    /* Iterator variable */
+  Vdbe *v = sqlite3GetVdbe(pParse);         /* Vdbe to add code to */
+  int iCur = pParse->nTab - 1;              /* Cursor number to use */
+  int iOk = sqlite3VdbeMakeLabel(v);        /* jump here if parent key found */
+
+  /* If nIncr is less than zero, then check at runtime if there are any
+  ** outstanding constraints to resolve. If there are not, there is no need
+  ** to check if deleting this row resolves any outstanding violations.
+  **
+  ** Check if any of the key columns in the child table row are NULL. If 
+  ** any are, then the constraint is considered satisfied. No need to 
+  ** search for a matching row in the parent table.  */
+  if( nIncr<0 ){
+    sqlite3VdbeAddOp2(v, OP_FkIfZero, pFKey->isDeferred, iOk);
+    VdbeCoverage(v);
+  }
+  for(i=0; i<pFKey->nCol; i++){
+    int iReg = aiCol[i] + regData + 1;
+    sqlite3VdbeAddOp2(v, OP_IsNull, iReg, iOk); VdbeCoverage(v);
+  }
+
+  if( isIgnore==0 ){
+    if( pIdx==0 ){
+      /* If pIdx is NULL, then the parent key is the INTEGER PRIMARY KEY
+      ** column of the parent table (table pTab).  */
+      int iMustBeInt;               /* Address of MustBeInt instruction */
+      int regTemp = sqlite3GetTempReg(pParse);
+  
+      /* Invoke MustBeInt to coerce the child key value to an integer (i.e. 
+      ** apply the affinity of the parent key). If this fails, then there
+      ** is no matching parent key. Before using MustBeInt, make a copy of
+      ** the value. Otherwise, the value inserted into the child key column
+      ** will have INTEGER affinity applied to it, which may not be correct.  */
+      sqlite3VdbeAddOp2(v, OP_SCopy, aiCol[0]+1+regData, regTemp);
+      iMustBeInt = sqlite3VdbeAddOp2(v, OP_MustBeInt, regTemp, 0);
+      VdbeCoverage(v);
+  
+      /* If the parent table is the same as the child table, and we are about
+      ** to increment the constraint-counter (i.e. this is an INSERT operation),
+      ** then check if the row being inserted matches itself. If so, do not
+      ** increment the constraint-counter.  */
+      if( pTab==pFKey->pFrom && nIncr==1 ){
+        sqlite3VdbeAddOp3(v, OP_Eq, regData, iOk, regTemp); VdbeCoverage(v);
+        sqlite3VdbeChangeP5(v, SQLITE_NOTNULL);
+      }
+  
+      sqlite3OpenTable(pParse, iCur, iDb, pTab, OP_OpenRead);
+      sqlite3VdbeAddOp3(v, OP_NotExists, iCur, 0, regTemp); VdbeCoverage(v);
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, iOk);
+      sqlite3VdbeJumpHere(v, sqlite3VdbeCurrentAddr(v)-2);
+      sqlite3VdbeJumpHere(v, iMustBeInt);
+      sqlite3ReleaseTempReg(pParse, regTemp);
+    }else{
+      int nCol = pFKey->nCol;
+      int regTemp = sqlite3GetTempRange(pParse, nCol);
+      int regRec = sqlite3GetTempReg(pParse);
+  
+      sqlite3VdbeAddOp3(v, OP_OpenRead, iCur, pIdx->tnum, iDb);
+      sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
+      for(i=0; i<nCol; i++){
+        sqlite3VdbeAddOp2(v, OP_Copy, aiCol[i]+1+regData, regTemp+i);
+      }
+  
+      /* If the parent table is the same as the child table, and we are about
+      ** to increment the constraint-counter (i.e. this is an INSERT operation),
+      ** then check if the row being inserted matches itself. If so, do not
+      ** increment the constraint-counter. 
+      **
+      ** If any of the parent-key values are NULL, then the row cannot match 
+      ** itself. So set JUMPIFNULL to make sure we do the OP_Found if any
+      ** of the parent-key values are NULL (at this point it is known that
+      ** none of the child key values are).
+      */
+      if( pTab==pFKey->pFrom && nIncr==1 ){
+        int iJump = sqlite3VdbeCurrentAddr(v) + nCol + 1;
+        for(i=0; i<nCol; i++){
+          int iChild = aiCol[i]+1+regData;
+          int iParent = pIdx->aiColumn[i]+1+regData;
+          assert( aiCol[i]!=pTab->iPKey );
+          if( pIdx->aiColumn[i]==pTab->iPKey ){
+            /* The parent key is a composite key that includes the IPK column */
+            iParent = regData;
+          }
+          sqlite3VdbeAddOp3(v, OP_Ne, iChild, iJump, iParent); VdbeCoverage(v);
+          sqlite3VdbeChangeP5(v, SQLITE_JUMPIFNULL);
+        }
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, iOk);
+      }
+  
+      sqlite3VdbeAddOp4(v, OP_MakeRecord, regTemp, nCol, regRec,
+                        sqlite3IndexAffinityStr(v,pIdx), nCol);
+      sqlite3VdbeAddOp4Int(v, OP_Found, iCur, iOk, regRec, 0); VdbeCoverage(v);
+  
+      sqlite3ReleaseTempReg(pParse, regRec);
+      sqlite3ReleaseTempRange(pParse, regTemp, nCol);
+    }
+  }
+
+  if( !pFKey->isDeferred && !(pParse->db->flags & SQLITE_DeferFKs)
+   && !pParse->pToplevel 
+   && !pParse->isMultiWrite 
+  ){
+    /* Special case: If this is an INSERT statement that will insert exactly
+    ** one row into the table, raise a constraint immediately instead of
+    ** incrementing a counter. This is necessary as the VM code is being
+    ** generated for will not open a statement transaction.  */
+    assert( nIncr==1 );
+    sqlite3HaltConstraint(pParse, SQLITE_CONSTRAINT_FOREIGNKEY,
+        OE_Abort, 0, P4_STATIC, P5_ConstraintFK);
+  }else{
+    if( nIncr>0 && pFKey->isDeferred==0 ){
+      sqlite3MayAbort(pParse);
+    }
+    sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, nIncr);
+  }
+
+  sqlite3VdbeResolveLabel(v, iOk);
+  sqlite3VdbeAddOp1(v, OP_Close, iCur);
+}
+
+
+/*
+** Return an Expr object that refers to a memory register corresponding
+** to column iCol of table pTab.
+**
+** regBase is the first of an array of register that contains the data
+** for pTab.  regBase itself holds the rowid.  regBase+1 holds the first
+** column.  regBase+2 holds the second column, and so forth.
+*/
+static Expr *exprTableRegister(
+  Parse *pParse,     /* Parsing and code generating context */
+  Table *pTab,       /* The table whose content is at r[regBase]... */
+  int regBase,       /* Contents of table pTab */
+  i16 iCol           /* Which column of pTab is desired */
+){
+  Expr *pExpr;
+  Column *pCol;
+  const char *zColl;
+  sqlite3 *db = pParse->db;
+
+  pExpr = sqlite3Expr(db, TK_REGISTER, 0);
+  if( pExpr ){
+    if( iCol>=0 && iCol!=pTab->iPKey ){
+      pCol = &pTab->aCol[iCol];
+      pExpr->iTable = regBase + iCol + 1;
+      pExpr->affinity = pCol->affinity;
+      zColl = pCol->zColl;
+      if( zColl==0 ) zColl = db->pDfltColl->zName;
+      pExpr = sqlite3ExprAddCollateString(pParse, pExpr, zColl);
+    }else{
+      pExpr->iTable = regBase;
+      pExpr->affinity = SQLITE_AFF_INTEGER;
+    }
+  }
+  return pExpr;
+}
+
+/*
+** Return an Expr object that refers to column iCol of table pTab which
+** has cursor iCur.
+*/
+static Expr *exprTableColumn(
+  sqlite3 *db,      /* The database connection */
+  Table *pTab,      /* The table whose column is desired */
+  int iCursor,      /* The open cursor on the table */
+  i16 iCol          /* The column that is wanted */
+){
+  Expr *pExpr = sqlite3Expr(db, TK_COLUMN, 0);
+  if( pExpr ){
+    pExpr->pTab = pTab;
+    pExpr->iTable = iCursor;
+    pExpr->iColumn = iCol;
+  }
+  return pExpr;
+}
+
+/*
+** This function is called to generate code executed when a row is deleted
+** from the parent table of foreign key constraint pFKey and, if pFKey is 
+** deferred, when a row is inserted into the same table. When generating
+** code for an SQL UPDATE operation, this function may be called twice -
+** once to "delete" the old row and once to "insert" the new row.
+**
+** Parameter nIncr is passed -1 when inserting a row (as this may decrease
+** the number of FK violations in the db) or +1 when deleting one (as this
+** may increase the number of FK constraint problems).
+**
+** The code generated by this function scans through the rows in the child
+** table that correspond to the parent table row being deleted or inserted.
+** For each child row found, one of the following actions is taken:
+**
+**   Operation | FK type   | Action taken
+**   --------------------------------------------------------------------------
+**   DELETE      immediate   Increment the "immediate constraint counter".
+**                           Or, if the ON (UPDATE|DELETE) action is RESTRICT,
+**                           throw a "FOREIGN KEY constraint failed" exception.
+**
+**   INSERT      immediate   Decrement the "immediate constraint counter".
+**
+**   DELETE      deferred    Increment the "deferred constraint counter".
+**                           Or, if the ON (UPDATE|DELETE) action is RESTRICT,
+**                           throw a "FOREIGN KEY constraint failed" exception.
+**
+**   INSERT      deferred    Decrement the "deferred constraint counter".
+**
+** These operations are identified in the comment at the top of this file 
+** (fkey.c) as "I.2" and "D.2".
+*/
+static void fkScanChildren(
+  Parse *pParse,                  /* Parse context */
+  SrcList *pSrc,                  /* The child table to be scanned */
+  Table *pTab,                    /* The parent table */
+  Index *pIdx,                    /* Index on parent covering the foreign key */
+  FKey *pFKey,                    /* The foreign key linking pSrc to pTab */
+  int *aiCol,                     /* Map from pIdx cols to child table cols */
+  int regData,                    /* Parent row data starts here */
+  int nIncr                       /* Amount to increment deferred counter by */
+){
+  sqlite3 *db = pParse->db;       /* Database handle */
+  int i;                          /* Iterator variable */
+  Expr *pWhere = 0;               /* WHERE clause to scan with */
+  NameContext sNameContext;       /* Context used to resolve WHERE clause */
+  WhereInfo *pWInfo;              /* Context used by sqlite3WhereXXX() */
+  int iFkIfZero = 0;              /* Address of OP_FkIfZero */
+  Vdbe *v = sqlite3GetVdbe(pParse);
+
+  assert( pIdx==0 || pIdx->pTable==pTab );
+  assert( pIdx==0 || pIdx->nKeyCol==pFKey->nCol );
+  assert( pIdx!=0 || pFKey->nCol==1 );
+  assert( pIdx!=0 || HasRowid(pTab) );
+
+  if( nIncr<0 ){
+    iFkIfZero = sqlite3VdbeAddOp2(v, OP_FkIfZero, pFKey->isDeferred, 0);
+    VdbeCoverage(v);
+  }
+
+  /* Create an Expr object representing an SQL expression like:
+  **
+  **   <parent-key1> = <child-key1> AND <parent-key2> = <child-key2> ...
+  **
+  ** The collation sequence used for the comparison should be that of
+  ** the parent key columns. The affinity of the parent key column should
+  ** be applied to each child key value before the comparison takes place.
+  */
+  for(i=0; i<pFKey->nCol; i++){
+    Expr *pLeft;                  /* Value from parent table row */
+    Expr *pRight;                 /* Column ref to child table */
+    Expr *pEq;                    /* Expression (pLeft = pRight) */
+    i16 iCol;                     /* Index of column in child table */ 
+    const char *zCol;             /* Name of column in child table */
+
+    iCol = pIdx ? pIdx->aiColumn[i] : -1;
+    pLeft = exprTableRegister(pParse, pTab, regData, iCol);
+    iCol = aiCol ? aiCol[i] : pFKey->aCol[0].iFrom;
+    assert( iCol>=0 );
+    zCol = pFKey->pFrom->aCol[iCol].zName;
+    pRight = sqlite3Expr(db, TK_ID, zCol);
+    pEq = sqlite3PExpr(pParse, TK_EQ, pLeft, pRight, 0);
+    pWhere = sqlite3ExprAnd(db, pWhere, pEq);
+  }
+
+  /* If the child table is the same as the parent table, then add terms
+  ** to the WHERE clause that prevent this entry from being scanned.
+  ** The added WHERE clause terms are like this:
+  **
+  **     $current_rowid!=rowid
+  **     NOT( $current_a==a AND $current_b==b AND ... )
+  **
+  ** The first form is used for rowid tables.  The second form is used
+  ** for WITHOUT ROWID tables.  In the second form, the primary key is
+  ** (a,b,...)
+  */
+  if( pTab==pFKey->pFrom && nIncr>0 ){
+    Expr *pNe;                    /* Expression (pLeft != pRight) */
+    Expr *pLeft;                  /* Value from parent table row */
+    Expr *pRight;                 /* Column ref to child table */
+    if( HasRowid(pTab) ){
+      pLeft = exprTableRegister(pParse, pTab, regData, -1);
+      pRight = exprTableColumn(db, pTab, pSrc->a[0].iCursor, -1);
+      pNe = sqlite3PExpr(pParse, TK_NE, pLeft, pRight, 0);
+    }else{
+      Expr *pEq, *pAll = 0;
+      Index *pPk = sqlite3PrimaryKeyIndex(pTab);
+      assert( pIdx!=0 );
+      for(i=0; i<pPk->nKeyCol; i++){
+        i16 iCol = pIdx->aiColumn[i];
+        pLeft = exprTableRegister(pParse, pTab, regData, iCol);
+        pRight = exprTableColumn(db, pTab, pSrc->a[0].iCursor, iCol);
+        pEq = sqlite3PExpr(pParse, TK_EQ, pLeft, pRight, 0);
+        pAll = sqlite3ExprAnd(db, pAll, pEq);
+      }
+      pNe = sqlite3PExpr(pParse, TK_NOT, pAll, 0, 0);
+    }
+    pWhere = sqlite3ExprAnd(db, pWhere, pNe);
+  }
+
+  /* Resolve the references in the WHERE clause. */
+  memset(&sNameContext, 0, sizeof(NameContext));
+  sNameContext.pSrcList = pSrc;
+  sNameContext.pParse = pParse;
+  sqlite3ResolveExprNames(&sNameContext, pWhere);
+
+  /* Create VDBE to loop through the entries in pSrc that match the WHERE
+  ** clause. For each row found, increment either the deferred or immediate
+  ** foreign key constraint counter. */
+  pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0, 0);
+  sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, nIncr);
+  if( pWInfo ){
+    sqlite3WhereEnd(pWInfo);
+  }
+
+  /* Clean up the WHERE clause constructed above. */
+  sqlite3ExprDelete(db, pWhere);
+  if( iFkIfZero ){
+    sqlite3VdbeJumpHere(v, iFkIfZero);
+  }
+}
+
+/*
+** This function returns a linked list of FKey objects (connected by
+** FKey.pNextTo) holding all children of table pTab.  For example,
+** given the following schema:
+**
+**   CREATE TABLE t1(a PRIMARY KEY);
+**   CREATE TABLE t2(b REFERENCES t1(a);
+**
+** Calling this function with table "t1" as an argument returns a pointer
+** to the FKey structure representing the foreign key constraint on table
+** "t2". Calling this function with "t2" as the argument would return a
+** NULL pointer (as there are no FK constraints for which t2 is the parent
+** table).
+*/
+SQLITE_PRIVATE FKey *sqlite3FkReferences(Table *pTab){
+  return (FKey *)sqlite3HashFind(&pTab->pSchema->fkeyHash, pTab->zName);
+}
+
+/*
+** The second argument is a Trigger structure allocated by the 
+** fkActionTrigger() routine. This function deletes the Trigger structure
+** and all of its sub-components.
+**
+** The Trigger structure or any of its sub-components may be allocated from
+** the lookaside buffer belonging to database handle dbMem.
+*/
+static void fkTriggerDelete(sqlite3 *dbMem, Trigger *p){
+  if( p ){
+    TriggerStep *pStep = p->step_list;
+    sqlite3ExprDelete(dbMem, pStep->pWhere);
+    sqlite3ExprListDelete(dbMem, pStep->pExprList);
+    sqlite3SelectDelete(dbMem, pStep->pSelect);
+    sqlite3ExprDelete(dbMem, p->pWhen);
+    sqlite3DbFree(dbMem, p);
+  }
+}
+
+/*
+** This function is called to generate code that runs when table pTab is
+** being dropped from the database. The SrcList passed as the second argument
+** to this function contains a single entry guaranteed to resolve to
+** table pTab.
+**
+** Normally, no code is required. However, if either
+**
+**   (a) The table is the parent table of a FK constraint, or
+**   (b) The table is the child table of a deferred FK constraint and it is
+**       determined at runtime that there are outstanding deferred FK 
+**       constraint violations in the database,
+**
+** then the equivalent of "DELETE FROM <tbl>" is executed before dropping
+** the table from the database. Triggers are disabled while running this
+** DELETE, but foreign key actions are not.
+*/
+SQLITE_PRIVATE void sqlite3FkDropTable(Parse *pParse, SrcList *pName, Table *pTab){
+  sqlite3 *db = pParse->db;
+  if( (db->flags&SQLITE_ForeignKeys) && !IsVirtual(pTab) && !pTab->pSelect ){
+    int iSkip = 0;
+    Vdbe *v = sqlite3GetVdbe(pParse);
+
+    assert( v );                  /* VDBE has already been allocated */
+    if( sqlite3FkReferences(pTab)==0 ){
+      /* Search for a deferred foreign key constraint for which this table
+      ** is the child table. If one cannot be found, return without 
+      ** generating any VDBE code. If one can be found, then jump over
+      ** the entire DELETE if there are no outstanding deferred constraints
+      ** when this statement is run.  */
+      FKey *p;
+      for(p=pTab->pFKey; p; p=p->pNextFrom){
+        if( p->isDeferred || (db->flags & SQLITE_DeferFKs) ) break;
+      }
+      if( !p ) return;
+      iSkip = sqlite3VdbeMakeLabel(v);
+      sqlite3VdbeAddOp2(v, OP_FkIfZero, 1, iSkip); VdbeCoverage(v);
+    }
+
+    pParse->disableTriggers = 1;
+    sqlite3DeleteFrom(pParse, sqlite3SrcListDup(db, pName, 0), 0);
+    pParse->disableTriggers = 0;
+
+    /* If the DELETE has generated immediate foreign key constraint 
+    ** violations, halt the VDBE and return an error at this point, before
+    ** any modifications to the schema are made. This is because statement
+    ** transactions are not able to rollback schema changes.  
+    **
+    ** If the SQLITE_DeferFKs flag is set, then this is not required, as
+    ** the statement transaction will not be rolled back even if FK
+    ** constraints are violated.
+    */
+    if( (db->flags & SQLITE_DeferFKs)==0 ){
+      sqlite3VdbeAddOp2(v, OP_FkIfZero, 0, sqlite3VdbeCurrentAddr(v)+2);
+      VdbeCoverage(v);
+      sqlite3HaltConstraint(pParse, SQLITE_CONSTRAINT_FOREIGNKEY,
+          OE_Abort, 0, P4_STATIC, P5_ConstraintFK);
+    }
+
+    if( iSkip ){
+      sqlite3VdbeResolveLabel(v, iSkip);
+    }
+  }
+}
+
+
+/*
+** The second argument points to an FKey object representing a foreign key
+** for which pTab is the child table. An UPDATE statement against pTab
+** is currently being processed. For each column of the table that is 
+** actually updated, the corresponding element in the aChange[] array
+** is zero or greater (if a column is unmodified the corresponding element
+** is set to -1). If the rowid column is modified by the UPDATE statement
+** the bChngRowid argument is non-zero.
+**
+** This function returns true if any of the columns that are part of the
+** child key for FK constraint *p are modified.
+*/
+static int fkChildIsModified(
+  Table *pTab,                    /* Table being updated */
+  FKey *p,                        /* Foreign key for which pTab is the child */
+  int *aChange,                   /* Array indicating modified columns */
+  int bChngRowid                  /* True if rowid is modified by this update */
+){
+  int i;
+  for(i=0; i<p->nCol; i++){
+    int iChildKey = p->aCol[i].iFrom;
+    if( aChange[iChildKey]>=0 ) return 1;
+    if( iChildKey==pTab->iPKey && bChngRowid ) return 1;
+  }
+  return 0;
+}
+
+/*
+** The second argument points to an FKey object representing a foreign key
+** for which pTab is the parent table. An UPDATE statement against pTab
+** is currently being processed. For each column of the table that is 
+** actually updated, the corresponding element in the aChange[] array
+** is zero or greater (if a column is unmodified the corresponding element
+** is set to -1). If the rowid column is modified by the UPDATE statement
+** the bChngRowid argument is non-zero.
+**
+** This function returns true if any of the columns that are part of the
+** parent key for FK constraint *p are modified.
+*/
+static int fkParentIsModified(
+  Table *pTab, 
+  FKey *p, 
+  int *aChange, 
+  int bChngRowid
+){
+  int i;
+  for(i=0; i<p->nCol; i++){
+    char *zKey = p->aCol[i].zCol;
+    int iKey;
+    for(iKey=0; iKey<pTab->nCol; iKey++){
+      if( aChange[iKey]>=0 || (iKey==pTab->iPKey && bChngRowid) ){
+        Column *pCol = &pTab->aCol[iKey];
+        if( zKey ){
+          if( 0==sqlite3StrICmp(pCol->zName, zKey) ) return 1;
+        }else if( pCol->colFlags & COLFLAG_PRIMKEY ){
+          return 1;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+** Return true if the parser passed as the first argument is being
+** used to code a trigger that is really a "SET NULL" action belonging
+** to trigger pFKey.
+*/
+static int isSetNullAction(Parse *pParse, FKey *pFKey){
+  Parse *pTop = sqlite3ParseToplevel(pParse);
+  if( pTop->pTriggerPrg ){
+    Trigger *p = pTop->pTriggerPrg->pTrigger;
+    if( (p==pFKey->apTrigger[0] && pFKey->aAction[0]==OE_SetNull)
+     || (p==pFKey->apTrigger[1] && pFKey->aAction[1]==OE_SetNull)
+    ){
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** This function is called when inserting, deleting or updating a row of
+** table pTab to generate VDBE code to perform foreign key constraint 
+** processing for the operation.
+**
+** For a DELETE operation, parameter regOld is passed the index of the
+** first register in an array of (pTab->nCol+1) registers containing the
+** rowid of the row being deleted, followed by each of the column values
+** of the row being deleted, from left to right. Parameter regNew is passed
+** zero in this case.
+**
+** For an INSERT operation, regOld is passed zero and regNew is passed the
+** first register of an array of (pTab->nCol+1) registers containing the new
+** row data.
+**
+** For an UPDATE operation, this function is called twice. Once before
+** the original record is deleted from the table using the calling convention
+** described for DELETE. Then again after the original record is deleted
+** but before the new record is inserted using the INSERT convention. 
+*/
+SQLITE_PRIVATE void sqlite3FkCheck(
+  Parse *pParse,                  /* Parse context */
+  Table *pTab,                    /* Row is being deleted from this table */ 
+  int regOld,                     /* Previous row data is stored here */
+  int regNew,                     /* New row data is stored here */
+  int *aChange,                   /* Array indicating UPDATEd columns (or 0) */
+  int bChngRowid                  /* True if rowid is UPDATEd */
+){
+  sqlite3 *db = pParse->db;       /* Database handle */
+  FKey *pFKey;                    /* Used to iterate through FKs */
+  int iDb;                        /* Index of database containing pTab */
+  const char *zDb;                /* Name of database containing pTab */
+  int isIgnoreErrors = pParse->disableTriggers;
+
+  /* Exactly one of regOld and regNew should be non-zero. */
+  assert( (regOld==0)!=(regNew==0) );
+
+  /* If foreign-keys are disabled, this function is a no-op. */
+  if( (db->flags&SQLITE_ForeignKeys)==0 ) return;
+
+  iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+  zDb = db->aDb[iDb].zName;
+
+  /* Loop through all the foreign key constraints for which pTab is the
+  ** child table (the table that the foreign key definition is part of).  */
+  for(pFKey=pTab->pFKey; pFKey; pFKey=pFKey->pNextFrom){
+    Table *pTo;                   /* Parent table of foreign key pFKey */
+    Index *pIdx = 0;              /* Index on key columns in pTo */
+    int *aiFree = 0;
+    int *aiCol;
+    int iCol;
+    int i;
+    int bIgnore = 0;
+
+    if( aChange 
+     && sqlite3_stricmp(pTab->zName, pFKey->zTo)!=0
+     && fkChildIsModified(pTab, pFKey, aChange, bChngRowid)==0 
+    ){
+      continue;
+    }
+
+    /* Find the parent table of this foreign key. Also find a unique index 
+    ** on the parent key columns in the parent table. If either of these 
+    ** schema items cannot be located, set an error in pParse and return 
+    ** early.  */
+    if( pParse->disableTriggers ){
+      pTo = sqlite3FindTable(db, pFKey->zTo, zDb);
+    }else{
+      pTo = sqlite3LocateTable(pParse, 0, pFKey->zTo, zDb);
+    }
+    if( !pTo || sqlite3FkLocateIndex(pParse, pTo, pFKey, &pIdx, &aiFree) ){
+      assert( isIgnoreErrors==0 || (regOld!=0 && regNew==0) );
+      if( !isIgnoreErrors || db->mallocFailed ) return;
+      if( pTo==0 ){
+        /* If isIgnoreErrors is true, then a table is being dropped. In this
+        ** case SQLite runs a "DELETE FROM xxx" on the table being dropped
+        ** before actually dropping it in order to check FK constraints.
+        ** If the parent table of an FK constraint on the current table is
+        ** missing, behave as if it is empty. i.e. decrement the relevant
+        ** FK counter for each row of the current table with non-NULL keys.
+        */
+        Vdbe *v = sqlite3GetVdbe(pParse);
+        int iJump = sqlite3VdbeCurrentAddr(v) + pFKey->nCol + 1;
+        for(i=0; i<pFKey->nCol; i++){
+          int iReg = pFKey->aCol[i].iFrom + regOld + 1;
+          sqlite3VdbeAddOp2(v, OP_IsNull, iReg, iJump); VdbeCoverage(v);
+        }
+        sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, -1);
+      }
+      continue;
+    }
+    assert( pFKey->nCol==1 || (aiFree && pIdx) );
+
+    if( aiFree ){
+      aiCol = aiFree;
+    }else{
+      iCol = pFKey->aCol[0].iFrom;
+      aiCol = &iCol;
+    }
+    for(i=0; i<pFKey->nCol; i++){
+      if( aiCol[i]==pTab->iPKey ){
+        aiCol[i] = -1;
+      }
+#ifndef SQLITE_OMIT_AUTHORIZATION
+      /* Request permission to read the parent key columns. If the 
+      ** authorization callback returns SQLITE_IGNORE, behave as if any
+      ** values read from the parent table are NULL. */
+      if( db->xAuth ){
+        int rcauth;
+        char *zCol = pTo->aCol[pIdx ? pIdx->aiColumn[i] : pTo->iPKey].zName;
+        rcauth = sqlite3AuthReadCol(pParse, pTo->zName, zCol, iDb);
+        bIgnore = (rcauth==SQLITE_IGNORE);
+      }
+#endif
+    }
+
+    /* Take a shared-cache advisory read-lock on the parent table. Allocate 
+    ** a cursor to use to search the unique index on the parent key columns 
+    ** in the parent table.  */
+    sqlite3TableLock(pParse, iDb, pTo->tnum, 0, pTo->zName);
+    pParse->nTab++;
+
+    if( regOld!=0 ){
+      /* A row is being removed from the child table. Search for the parent.
+      ** If the parent does not exist, removing the child row resolves an 
+      ** outstanding foreign key constraint violation. */
+      fkLookupParent(pParse, iDb, pTo, pIdx, pFKey, aiCol, regOld, -1, bIgnore);
+    }
+    if( regNew!=0 && !isSetNullAction(pParse, pFKey) ){
+      /* A row is being added to the child table. If a parent row cannot
+      ** be found, adding the child row has violated the FK constraint. 
+      **
+      ** If this operation is being performed as part of a trigger program
+      ** that is actually a "SET NULL" action belonging to this very 
+      ** foreign key, then omit this scan altogether. As all child key
+      ** values are guaranteed to be NULL, it is not possible for adding
+      ** this row to cause an FK violation.  */
+      fkLookupParent(pParse, iDb, pTo, pIdx, pFKey, aiCol, regNew, +1, bIgnore);
+    }
+
+    sqlite3DbFree(db, aiFree);
+  }
+
+  /* Loop through all the foreign key constraints that refer to this table.
+  ** (the "child" constraints) */
+  for(pFKey = sqlite3FkReferences(pTab); pFKey; pFKey=pFKey->pNextTo){
+    Index *pIdx = 0;              /* Foreign key index for pFKey */
+    SrcList *pSrc;
+    int *aiCol = 0;
+
+    if( aChange && fkParentIsModified(pTab, pFKey, aChange, bChngRowid)==0 ){
+      continue;
+    }
+
+    if( !pFKey->isDeferred && !(db->flags & SQLITE_DeferFKs) 
+     && !pParse->pToplevel && !pParse->isMultiWrite 
+    ){
+      assert( regOld==0 && regNew!=0 );
+      /* Inserting a single row into a parent table cannot cause (or fix)
+      ** an immediate foreign key violation. So do nothing in this case.  */
+      continue;
+    }
+
+    if( sqlite3FkLocateIndex(pParse, pTab, pFKey, &pIdx, &aiCol) ){
+      if( !isIgnoreErrors || db->mallocFailed ) return;
+      continue;
+    }
+    assert( aiCol || pFKey->nCol==1 );
+
+    /* Create a SrcList structure containing the child table.  We need the
+    ** child table as a SrcList for sqlite3WhereBegin() */
+    pSrc = sqlite3SrcListAppend(db, 0, 0, 0);
+    if( pSrc ){
+      struct SrcList_item *pItem = pSrc->a;
+      pItem->pTab = pFKey->pFrom;
+      pItem->zName = pFKey->pFrom->zName;
+      pItem->pTab->nRef++;
+      pItem->iCursor = pParse->nTab++;
+  
+      if( regNew!=0 ){
+        fkScanChildren(pParse, pSrc, pTab, pIdx, pFKey, aiCol, regNew, -1);
+      }
+      if( regOld!=0 ){
+        int eAction = pFKey->aAction[aChange!=0];
+        fkScanChildren(pParse, pSrc, pTab, pIdx, pFKey, aiCol, regOld, 1);
+        /* If this is a deferred FK constraint, or a CASCADE or SET NULL
+        ** action applies, then any foreign key violations caused by
+        ** removing the parent key will be rectified by the action trigger.
+        ** So do not set the "may-abort" flag in this case.
+        **
+        ** Note 1: If the FK is declared "ON UPDATE CASCADE", then the
+        ** may-abort flag will eventually be set on this statement anyway
+        ** (when this function is called as part of processing the UPDATE
+        ** within the action trigger).
+        **
+        ** Note 2: At first glance it may seem like SQLite could simply omit
+        ** all OP_FkCounter related scans when either CASCADE or SET NULL
+        ** applies. The trouble starts if the CASCADE or SET NULL action 
+        ** trigger causes other triggers or action rules attached to the 
+        ** child table to fire. In these cases the fk constraint counters
+        ** might be set incorrectly if any OP_FkCounter related scans are 
+        ** omitted.  */
+        if( !pFKey->isDeferred && eAction!=OE_Cascade && eAction!=OE_SetNull ){
+          sqlite3MayAbort(pParse);
+        }
+      }
+      pItem->zName = 0;
+      sqlite3SrcListDelete(db, pSrc);
+    }
+    sqlite3DbFree(db, aiCol);
+  }
+}
+
+#define COLUMN_MASK(x) (((x)>31) ? 0xffffffff : ((u32)1<<(x)))
+
+/*
+** This function is called before generating code to update or delete a 
+** row contained in table pTab.
+*/
+SQLITE_PRIVATE u32 sqlite3FkOldmask(
+  Parse *pParse,                  /* Parse context */
+  Table *pTab                     /* Table being modified */
+){
+  u32 mask = 0;
+  if( pParse->db->flags&SQLITE_ForeignKeys ){
+    FKey *p;
+    int i;
+    for(p=pTab->pFKey; p; p=p->pNextFrom){
+      for(i=0; i<p->nCol; i++) mask |= COLUMN_MASK(p->aCol[i].iFrom);
+    }
+    for(p=sqlite3FkReferences(pTab); p; p=p->pNextTo){
+      Index *pIdx = 0;
+      sqlite3FkLocateIndex(pParse, pTab, p, &pIdx, 0);
+      if( pIdx ){
+        for(i=0; i<pIdx->nKeyCol; i++) mask |= COLUMN_MASK(pIdx->aiColumn[i]);
+      }
+    }
+  }
+  return mask;
+}
+
+
+/*
+** This function is called before generating code to update or delete a 
+** row contained in table pTab. If the operation is a DELETE, then
+** parameter aChange is passed a NULL value. For an UPDATE, aChange points
+** to an array of size N, where N is the number of columns in table pTab.
+** If the i'th column is not modified by the UPDATE, then the corresponding 
+** entry in the aChange[] array is set to -1. If the column is modified,
+** the value is 0 or greater. Parameter chngRowid is set to true if the
+** UPDATE statement modifies the rowid fields of the table.
+**
+** If any foreign key processing will be required, this function returns
+** true. If there is no foreign key related processing, this function 
+** returns false.
+*/
+SQLITE_PRIVATE int sqlite3FkRequired(
+  Parse *pParse,                  /* Parse context */
+  Table *pTab,                    /* Table being modified */
+  int *aChange,                   /* Non-NULL for UPDATE operations */
+  int chngRowid                   /* True for UPDATE that affects rowid */
+){
+  if( pParse->db->flags&SQLITE_ForeignKeys ){
+    if( !aChange ){
+      /* A DELETE operation. Foreign key processing is required if the 
+      ** table in question is either the child or parent table for any 
+      ** foreign key constraint.  */
+      return (sqlite3FkReferences(pTab) || pTab->pFKey);
+    }else{
+      /* This is an UPDATE. Foreign key processing is only required if the
+      ** operation modifies one or more child or parent key columns. */
+      FKey *p;
+
+      /* Check if any child key columns are being modified. */
+      for(p=pTab->pFKey; p; p=p->pNextFrom){
+        if( fkChildIsModified(pTab, p, aChange, chngRowid) ) return 1;
+      }
+
+      /* Check if any parent key columns are being modified. */
+      for(p=sqlite3FkReferences(pTab); p; p=p->pNextTo){
+        if( fkParentIsModified(pTab, p, aChange, chngRowid) ) return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+** This function is called when an UPDATE or DELETE operation is being 
+** compiled on table pTab, which is the parent table of foreign-key pFKey.
+** If the current operation is an UPDATE, then the pChanges parameter is
+** passed a pointer to the list of columns being modified. If it is a
+** DELETE, pChanges is passed a NULL pointer.
+**
+** It returns a pointer to a Trigger structure containing a trigger
+** equivalent to the ON UPDATE or ON DELETE action specified by pFKey.
+** If the action is "NO ACTION" or "RESTRICT", then a NULL pointer is
+** returned (these actions require no special handling by the triggers
+** sub-system, code for them is created by fkScanChildren()).
+**
+** For example, if pFKey is the foreign key and pTab is table "p" in 
+** the following schema:
+**
+**   CREATE TABLE p(pk PRIMARY KEY);
+**   CREATE TABLE c(ck REFERENCES p ON DELETE CASCADE);
+**
+** then the returned trigger structure is equivalent to:
+**
+**   CREATE TRIGGER ... DELETE ON p BEGIN
+**     DELETE FROM c WHERE ck = old.pk;
+**   END;
+**
+** The returned pointer is cached as part of the foreign key object. It
+** is eventually freed along with the rest of the foreign key object by 
+** sqlite3FkDelete().
+*/
+static Trigger *fkActionTrigger(
+  Parse *pParse,                  /* Parse context */
+  Table *pTab,                    /* Table being updated or deleted from */
+  FKey *pFKey,                    /* Foreign key to get action for */
+  ExprList *pChanges              /* Change-list for UPDATE, NULL for DELETE */
+){
+  sqlite3 *db = pParse->db;       /* Database handle */
+  int action;                     /* One of OE_None, OE_Cascade etc. */
+  Trigger *pTrigger;              /* Trigger definition to return */
+  int iAction = (pChanges!=0);    /* 1 for UPDATE, 0 for DELETE */
+
+  action = pFKey->aAction[iAction];
+  pTrigger = pFKey->apTrigger[iAction];
+
+  if( action!=OE_None && !pTrigger ){
+    u8 enableLookaside;           /* Copy of db->lookaside.bEnabled */
+    char const *zFrom;            /* Name of child table */
+    int nFrom;                    /* Length in bytes of zFrom */
+    Index *pIdx = 0;              /* Parent key index for this FK */
+    int *aiCol = 0;               /* child table cols -> parent key cols */
+    TriggerStep *pStep = 0;        /* First (only) step of trigger program */
+    Expr *pWhere = 0;             /* WHERE clause of trigger step */
+    ExprList *pList = 0;          /* Changes list if ON UPDATE CASCADE */
+    Select *pSelect = 0;          /* If RESTRICT, "SELECT RAISE(...)" */
+    int i;                        /* Iterator variable */
+    Expr *pWhen = 0;              /* WHEN clause for the trigger */
+
+    if( sqlite3FkLocateIndex(pParse, pTab, pFKey, &pIdx, &aiCol) ) return 0;
+    assert( aiCol || pFKey->nCol==1 );
+
+    for(i=0; i<pFKey->nCol; i++){
+      Token tOld = { "old", 3 };  /* Literal "old" token */
+      Token tNew = { "new", 3 };  /* Literal "new" token */
+      Token tFromCol;             /* Name of column in child table */
+      Token tToCol;               /* Name of column in parent table */
+      int iFromCol;               /* Idx of column in child table */
+      Expr *pEq;                  /* tFromCol = OLD.tToCol */
+
+      iFromCol = aiCol ? aiCol[i] : pFKey->aCol[0].iFrom;
+      assert( iFromCol>=0 );
+      tToCol.z = pIdx ? pTab->aCol[pIdx->aiColumn[i]].zName : "oid";
+      tFromCol.z = pFKey->pFrom->aCol[iFromCol].zName;
+
+      tToCol.n = sqlite3Strlen30(tToCol.z);
+      tFromCol.n = sqlite3Strlen30(tFromCol.z);
+
+      /* Create the expression "OLD.zToCol = zFromCol". It is important
+      ** that the "OLD.zToCol" term is on the LHS of the = operator, so
+      ** that the affinity and collation sequence associated with the
+      ** parent table are used for the comparison. */
+      pEq = sqlite3PExpr(pParse, TK_EQ,
+          sqlite3PExpr(pParse, TK_DOT, 
+            sqlite3PExpr(pParse, TK_ID, 0, 0, &tOld),
+            sqlite3PExpr(pParse, TK_ID, 0, 0, &tToCol)
+          , 0),
+          sqlite3PExpr(pParse, TK_ID, 0, 0, &tFromCol)
+      , 0);
+      pWhere = sqlite3ExprAnd(db, pWhere, pEq);
+
+      /* For ON UPDATE, construct the next term of the WHEN clause.
+      ** The final WHEN clause will be like this:
+      **
+      **    WHEN NOT(old.col1 IS new.col1 AND ... AND old.colN IS new.colN)
+      */
+      if( pChanges ){
+        pEq = sqlite3PExpr(pParse, TK_IS,
+            sqlite3PExpr(pParse, TK_DOT, 
+              sqlite3PExpr(pParse, TK_ID, 0, 0, &tOld),
+              sqlite3PExpr(pParse, TK_ID, 0, 0, &tToCol),
+              0),
+            sqlite3PExpr(pParse, TK_DOT, 
+              sqlite3PExpr(pParse, TK_ID, 0, 0, &tNew),
+              sqlite3PExpr(pParse, TK_ID, 0, 0, &tToCol),
+              0),
+            0);
+        pWhen = sqlite3ExprAnd(db, pWhen, pEq);
+      }
+  
+      if( action!=OE_Restrict && (action!=OE_Cascade || pChanges) ){
+        Expr *pNew;
+        if( action==OE_Cascade ){
+          pNew = sqlite3PExpr(pParse, TK_DOT, 
+            sqlite3PExpr(pParse, TK_ID, 0, 0, &tNew),
+            sqlite3PExpr(pParse, TK_ID, 0, 0, &tToCol)
+          , 0);
+        }else if( action==OE_SetDflt ){
+          Expr *pDflt = pFKey->pFrom->aCol[iFromCol].pDflt;
+          if( pDflt ){
+            pNew = sqlite3ExprDup(db, pDflt, 0);
+          }else{
+            pNew = sqlite3PExpr(pParse, TK_NULL, 0, 0, 0);
+          }
+        }else{
+          pNew = sqlite3PExpr(pParse, TK_NULL, 0, 0, 0);
+        }
+        pList = sqlite3ExprListAppend(pParse, pList, pNew);
+        sqlite3ExprListSetName(pParse, pList, &tFromCol, 0);
+      }
+    }
+    sqlite3DbFree(db, aiCol);
+
+    zFrom = pFKey->pFrom->zName;
+    nFrom = sqlite3Strlen30(zFrom);
+
+    if( action==OE_Restrict ){
+      Token tFrom;
+      Expr *pRaise; 
+
+      tFrom.z = zFrom;
+      tFrom.n = nFrom;
+      pRaise = sqlite3Expr(db, TK_RAISE, "FOREIGN KEY constraint failed");
+      if( pRaise ){
+        pRaise->affinity = OE_Abort;
+      }
+      pSelect = sqlite3SelectNew(pParse, 
+          sqlite3ExprListAppend(pParse, 0, pRaise),
+          sqlite3SrcListAppend(db, 0, &tFrom, 0),
+          pWhere,
+          0, 0, 0, 0, 0, 0
+      );
+      pWhere = 0;
+    }
+
+    /* Disable lookaside memory allocation */
+    enableLookaside = db->lookaside.bEnabled;
+    db->lookaside.bEnabled = 0;
+
+    pTrigger = (Trigger *)sqlite3DbMallocZero(db, 
+        sizeof(Trigger) +         /* struct Trigger */
+        sizeof(TriggerStep) +     /* Single step in trigger program */
+        nFrom + 1                 /* Space for pStep->target.z */
+    );
+    if( pTrigger ){
+      pStep = pTrigger->step_list = (TriggerStep *)&pTrigger[1];
+      pStep->target.z = (char *)&pStep[1];
+      pStep->target.n = nFrom;
+      memcpy((char *)pStep->target.z, zFrom, nFrom);
+  
+      pStep->pWhere = sqlite3ExprDup(db, pWhere, EXPRDUP_REDUCE);
+      pStep->pExprList = sqlite3ExprListDup(db, pList, EXPRDUP_REDUCE);
+      pStep->pSelect = sqlite3SelectDup(db, pSelect, EXPRDUP_REDUCE);
+      if( pWhen ){
+        pWhen = sqlite3PExpr(pParse, TK_NOT, pWhen, 0, 0);
+        pTrigger->pWhen = sqlite3ExprDup(db, pWhen, EXPRDUP_REDUCE);
+      }
+    }
+
+    /* Re-enable the lookaside buffer, if it was disabled earlier. */
+    db->lookaside.bEnabled = enableLookaside;
+
+    sqlite3ExprDelete(db, pWhere);
+    sqlite3ExprDelete(db, pWhen);
+    sqlite3ExprListDelete(db, pList);
+    sqlite3SelectDelete(db, pSelect);
+    if( db->mallocFailed==1 ){
+      fkTriggerDelete(db, pTrigger);
+      return 0;
+    }
+    assert( pStep!=0 );
+
+    switch( action ){
+      case OE_Restrict:
+        pStep->op = TK_SELECT; 
+        break;
+      case OE_Cascade: 
+        if( !pChanges ){ 
+          pStep->op = TK_DELETE; 
+          break; 
+        }
+      default:
+        pStep->op = TK_UPDATE;
+    }
+    pStep->pTrig = pTrigger;
+    pTrigger->pSchema = pTab->pSchema;
+    pTrigger->pTabSchema = pTab->pSchema;
+    pFKey->apTrigger[iAction] = pTrigger;
+    pTrigger->op = (pChanges ? TK_UPDATE : TK_DELETE);
+  }
+
+  return pTrigger;
+}
+
+/*
+** This function is called when deleting or updating a row to implement
+** any required CASCADE, SET NULL or SET DEFAULT actions.
+*/
+SQLITE_PRIVATE void sqlite3FkActions(
+  Parse *pParse,                  /* Parse context */
+  Table *pTab,                    /* Table being updated or deleted from */
+  ExprList *pChanges,             /* Change-list for UPDATE, NULL for DELETE */
+  int regOld,                     /* Address of array containing old row */
+  int *aChange,                   /* Array indicating UPDATEd columns (or 0) */
+  int bChngRowid                  /* True if rowid is UPDATEd */
+){
+  /* If foreign-key support is enabled, iterate through all FKs that 
+  ** refer to table pTab. If there is an action associated with the FK 
+  ** for this operation (either update or delete), invoke the associated 
+  ** trigger sub-program.  */
+  if( pParse->db->flags&SQLITE_ForeignKeys ){
+    FKey *pFKey;                  /* Iterator variable */
+    for(pFKey = sqlite3FkReferences(pTab); pFKey; pFKey=pFKey->pNextTo){
+      if( aChange==0 || fkParentIsModified(pTab, pFKey, aChange, bChngRowid) ){
+        Trigger *pAct = fkActionTrigger(pParse, pTab, pFKey, pChanges);
+        if( pAct ){
+          sqlite3CodeRowTriggerDirect(pParse, pAct, pTab, regOld, OE_Abort, 0);
+        }
+      }
+    }
+  }
+}
+
+#endif /* ifndef SQLITE_OMIT_TRIGGER */
+
+/*
+** Free all memory associated with foreign key definitions attached to
+** table pTab. Remove the deleted foreign keys from the Schema.fkeyHash
+** hash table.
+*/
+SQLITE_PRIVATE void sqlite3FkDelete(sqlite3 *db, Table *pTab){
+  FKey *pFKey;                    /* Iterator variable */
+  FKey *pNext;                    /* Copy of pFKey->pNextFrom */
+
+  assert( db==0 || sqlite3SchemaMutexHeld(db, 0, pTab->pSchema) );
+  for(pFKey=pTab->pFKey; pFKey; pFKey=pNext){
+
+    /* Remove the FK from the fkeyHash hash table. */
+    if( !db || db->pnBytesFreed==0 ){
+      if( pFKey->pPrevTo ){
+        pFKey->pPrevTo->pNextTo = pFKey->pNextTo;
+      }else{
+        void *p = (void *)pFKey->pNextTo;
+        const char *z = (p ? pFKey->pNextTo->zTo : pFKey->zTo);
+        sqlite3HashInsert(&pTab->pSchema->fkeyHash, z, p);
+      }
+      if( pFKey->pNextTo ){
+        pFKey->pNextTo->pPrevTo = pFKey->pPrevTo;
+      }
+    }
+
+    /* EV: R-30323-21917 Each foreign key constraint in SQLite is
+    ** classified as either immediate or deferred.
+    */
+    assert( pFKey->isDeferred==0 || pFKey->isDeferred==1 );
+
+    /* Delete any triggers created to implement actions for this FK. */
+#ifndef SQLITE_OMIT_TRIGGER
+    fkTriggerDelete(db, pFKey->apTrigger[0]);
+    fkTriggerDelete(db, pFKey->apTrigger[1]);
+#endif
+
+    pNext = pFKey->pNextFrom;
+    sqlite3DbFree(db, pFKey);
+  }
+}
+#endif /* ifndef SQLITE_OMIT_FOREIGN_KEY */
+
+/************** End of fkey.c ************************************************/
+/************** Begin file insert.c ******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains C code routines that are called by the parser
+** to handle INSERT statements in SQLite.
+*/
+
+/*
+** Generate code that will 
+**
+**   (1) acquire a lock for table pTab then
+**   (2) open pTab as cursor iCur.
+**
+** If pTab is a WITHOUT ROWID table, then it is the PRIMARY KEY index
+** for that table that is actually opened.
+*/
+SQLITE_PRIVATE void sqlite3OpenTable(
+  Parse *pParse,  /* Generate code into this VDBE */
+  int iCur,       /* The cursor number of the table */
+  int iDb,        /* The database index in sqlite3.aDb[] */
+  Table *pTab,    /* The table to be opened */
+  int opcode      /* OP_OpenRead or OP_OpenWrite */
+){
+  Vdbe *v;
+  assert( !IsVirtual(pTab) );
+  v = sqlite3GetVdbe(pParse);
+  assert( opcode==OP_OpenWrite || opcode==OP_OpenRead );
+  sqlite3TableLock(pParse, iDb, pTab->tnum, 
+                   (opcode==OP_OpenWrite)?1:0, pTab->zName);
+  if( HasRowid(pTab) ){
+    sqlite3VdbeAddOp4Int(v, opcode, iCur, pTab->tnum, iDb, pTab->nCol);
+    VdbeComment((v, "%s", pTab->zName));
+  }else{
+    Index *pPk = sqlite3PrimaryKeyIndex(pTab);
+    assert( pPk!=0 );
+    assert( pPk->tnum=pTab->tnum );
+    sqlite3VdbeAddOp3(v, opcode, iCur, pPk->tnum, iDb);
+    sqlite3VdbeSetP4KeyInfo(pParse, pPk);
+    VdbeComment((v, "%s", pTab->zName));
+  }
+}
+
+/*
+** Return a pointer to the column affinity string associated with index
+** pIdx. A column affinity string has one character for each column in 
+** the table, according to the affinity of the column:
+**
+**  Character      Column affinity
+**  ------------------------------
+**  'A'            NONE
+**  'B'            TEXT
+**  'C'            NUMERIC
+**  'D'            INTEGER
+**  'F'            REAL
+**
+** An extra 'D' is appended to the end of the string to cover the
+** rowid that appears as the last column in every index.
+**
+** Memory for the buffer containing the column index affinity string
+** is managed along with the rest of the Index structure. It will be
+** released when sqlite3DeleteIndex() is called.
+*/
+SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(Vdbe *v, Index *pIdx){
+  if( !pIdx->zColAff ){
+    /* The first time a column affinity string for a particular index is
+    ** required, it is allocated and populated here. It is then stored as
+    ** a member of the Index structure for subsequent use.
+    **
+    ** The column affinity string will eventually be deleted by
+    ** sqliteDeleteIndex() when the Index structure itself is cleaned
+    ** up.
+    */
+    int n;
+    Table *pTab = pIdx->pTable;
+    sqlite3 *db = sqlite3VdbeDb(v);
+    pIdx->zColAff = (char *)sqlite3DbMallocRaw(0, pIdx->nColumn+1);
+    if( !pIdx->zColAff ){
+      db->mallocFailed = 1;
+      return 0;
+    }
+    for(n=0; n<pIdx->nColumn; n++){
+      i16 x = pIdx->aiColumn[n];
+      pIdx->zColAff[n] = x<0 ? SQLITE_AFF_INTEGER : pTab->aCol[x].affinity;
+    }
+    pIdx->zColAff[n] = 0;
+  }
+ 
+  return pIdx->zColAff;
+}
+
+/*
+** Compute the affinity string for table pTab, if it has not already been
+** computed.  As an optimization, omit trailing SQLITE_AFF_NONE affinities.
+**
+** If the affinity exists (if it is no entirely SQLITE_AFF_NONE values) and
+** if iReg>0 then code an OP_Affinity opcode that will set the affinities
+** for register iReg and following.  Or if affinities exists and iReg==0,
+** then just set the P4 operand of the previous opcode (which should  be
+** an OP_MakeRecord) to the affinity string.
+**
+** A column affinity string has one character per column:
+**
+**  Character      Column affinity
+**  ------------------------------
+**  'A'            NONE
+**  'B'            TEXT
+**  'C'            NUMERIC
+**  'D'            INTEGER
+**  'E'            REAL
+*/
+SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe *v, Table *pTab, int iReg){
+  int i;
+  char *zColAff = pTab->zColAff;
+  if( zColAff==0 ){
+    sqlite3 *db = sqlite3VdbeDb(v);
+    zColAff = (char *)sqlite3DbMallocRaw(0, pTab->nCol+1);
+    if( !zColAff ){
+      db->mallocFailed = 1;
+      return;
+    }
+
+    for(i=0; i<pTab->nCol; i++){
+      zColAff[i] = pTab->aCol[i].affinity;
+    }
+    do{
+      zColAff[i--] = 0;
+    }while( i>=0 && zColAff[i]==SQLITE_AFF_NONE );
+    pTab->zColAff = zColAff;
+  }
+  i = sqlite3Strlen30(zColAff);
+  if( i ){
+    if( iReg ){
+      sqlite3VdbeAddOp4(v, OP_Affinity, iReg, i, 0, zColAff, i);
+    }else{
+      sqlite3VdbeChangeP4(v, -1, zColAff, i);
+    }
+  }
+}
+
+/*
+** Return non-zero if the table pTab in database iDb or any of its indices
+** have been opened at any point in the VDBE program. This is used to see if 
+** a statement of the form  "INSERT INTO <iDb, pTab> SELECT ..." can 
+** run without using a temporary table for the results of the SELECT. 
+*/
+static int readsTable(Parse *p, int iDb, Table *pTab){
+  Vdbe *v = sqlite3GetVdbe(p);
+  int i;
+  int iEnd = sqlite3VdbeCurrentAddr(v);
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  VTable *pVTab = IsVirtual(pTab) ? sqlite3GetVTable(p->db, pTab) : 0;
+#endif
+
+  for(i=1; i<iEnd; i++){
+    VdbeOp *pOp = sqlite3VdbeGetOp(v, i);
+    assert( pOp!=0 );
+    if( pOp->opcode==OP_OpenRead && pOp->p3==iDb ){
+      Index *pIndex;
+      int tnum = pOp->p2;
+      if( tnum==pTab->tnum ){
+        return 1;
+      }
+      for(pIndex=pTab->pIndex; pIndex; pIndex=pIndex->pNext){
+        if( tnum==pIndex->tnum ){
+          return 1;
+        }
+      }
+    }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    if( pOp->opcode==OP_VOpen && pOp->p4.pVtab==pVTab ){
+      assert( pOp->p4.pVtab!=0 );
+      assert( pOp->p4type==P4_VTAB );
+      return 1;
+    }
+#endif
+  }
+  return 0;
+}
+
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+/*
+** Locate or create an AutoincInfo structure associated with table pTab
+** which is in database iDb.  Return the register number for the register
+** that holds the maximum rowid.
+**
+** There is at most one AutoincInfo structure per table even if the
+** same table is autoincremented multiple times due to inserts within
+** triggers.  A new AutoincInfo structure is created if this is the
+** first use of table pTab.  On 2nd and subsequent uses, the original
+** AutoincInfo structure is used.
+**
+** Three memory locations are allocated:
+**
+**   (1)  Register to hold the name of the pTab table.
+**   (2)  Register to hold the maximum ROWID of pTab.
+**   (3)  Register to hold the rowid in sqlite_sequence of pTab
+**
+** The 2nd register is the one that is returned.  That is all the
+** insert routine needs to know about.
+*/
+static int autoIncBegin(
+  Parse *pParse,      /* Parsing context */
+  int iDb,            /* Index of the database holding pTab */
+  Table *pTab         /* The table we are writing to */
+){
+  int memId = 0;      /* Register holding maximum rowid */
+  if( pTab->tabFlags & TF_Autoincrement ){
+    Parse *pToplevel = sqlite3ParseToplevel(pParse);
+    AutoincInfo *pInfo;
+
+    pInfo = pToplevel->pAinc;
+    while( pInfo && pInfo->pTab!=pTab ){ pInfo = pInfo->pNext; }
+    if( pInfo==0 ){
+      pInfo = sqlite3DbMallocRaw(pParse->db, sizeof(*pInfo));
+      if( pInfo==0 ) return 0;
+      pInfo->pNext = pToplevel->pAinc;
+      pToplevel->pAinc = pInfo;
+      pInfo->pTab = pTab;
+      pInfo->iDb = iDb;
+      pToplevel->nMem++;                  /* Register to hold name of table */
+      pInfo->regCtr = ++pToplevel->nMem;  /* Max rowid register */
+      pToplevel->nMem++;                  /* Rowid in sqlite_sequence */
+    }
+    memId = pInfo->regCtr;
+  }
+  return memId;
+}
+
+/*
+** This routine generates code that will initialize all of the
+** register used by the autoincrement tracker.  
+*/
+SQLITE_PRIVATE void sqlite3AutoincrementBegin(Parse *pParse){
+  AutoincInfo *p;            /* Information about an AUTOINCREMENT */
+  sqlite3 *db = pParse->db;  /* The database connection */
+  Db *pDb;                   /* Database only autoinc table */
+  int memId;                 /* Register holding max rowid */
+  int addr;                  /* A VDBE address */
+  Vdbe *v = pParse->pVdbe;   /* VDBE under construction */
+
+  /* This routine is never called during trigger-generation.  It is
+  ** only called from the top-level */
+  assert( pParse->pTriggerTab==0 );
+  assert( pParse==sqlite3ParseToplevel(pParse) );
+
+  assert( v );   /* We failed long ago if this is not so */
+  for(p = pParse->pAinc; p; p = p->pNext){
+    pDb = &db->aDb[p->iDb];
+    memId = p->regCtr;
+    assert( sqlite3SchemaMutexHeld(db, 0, pDb->pSchema) );
+    sqlite3OpenTable(pParse, 0, p->iDb, pDb->pSchema->pSeqTab, OP_OpenRead);
+    sqlite3VdbeAddOp3(v, OP_Null, 0, memId, memId+1);
+    addr = sqlite3VdbeCurrentAddr(v);
+    sqlite3VdbeAddOp4(v, OP_String8, 0, memId-1, 0, p->pTab->zName, 0);
+    sqlite3VdbeAddOp2(v, OP_Rewind, 0, addr+9); VdbeCoverage(v);
+    sqlite3VdbeAddOp3(v, OP_Column, 0, 0, memId);
+    sqlite3VdbeAddOp3(v, OP_Ne, memId-1, addr+7, memId); VdbeCoverage(v);
+    sqlite3VdbeChangeP5(v, SQLITE_JUMPIFNULL);
+    sqlite3VdbeAddOp2(v, OP_Rowid, 0, memId+1);
+    sqlite3VdbeAddOp3(v, OP_Column, 0, 1, memId);
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, addr+9);
+    sqlite3VdbeAddOp2(v, OP_Next, 0, addr+2); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, memId);
+    sqlite3VdbeAddOp0(v, OP_Close);
+  }
+}
+
+/*
+** Update the maximum rowid for an autoincrement calculation.
+**
+** This routine should be called when the top of the stack holds a
+** new rowid that is about to be inserted.  If that new rowid is
+** larger than the maximum rowid in the memId memory cell, then the
+** memory cell is updated.  The stack is unchanged.
+*/
+static void autoIncStep(Parse *pParse, int memId, int regRowid){
+  if( memId>0 ){
+    sqlite3VdbeAddOp2(pParse->pVdbe, OP_MemMax, memId, regRowid);
+  }
+}
+
+/*
+** This routine generates the code needed to write autoincrement
+** maximum rowid values back into the sqlite_sequence register.
+** Every statement that might do an INSERT into an autoincrement
+** table (either directly or through triggers) needs to call this
+** routine just before the "exit" code.
+*/
+SQLITE_PRIVATE void sqlite3AutoincrementEnd(Parse *pParse){
+  AutoincInfo *p;
+  Vdbe *v = pParse->pVdbe;
+  sqlite3 *db = pParse->db;
+
+  assert( v );
+  for(p = pParse->pAinc; p; p = p->pNext){
+    Db *pDb = &db->aDb[p->iDb];
+    int j1;
+    int iRec;
+    int memId = p->regCtr;
+
+    iRec = sqlite3GetTempReg(pParse);
+    assert( sqlite3SchemaMutexHeld(db, 0, pDb->pSchema) );
+    sqlite3OpenTable(pParse, 0, p->iDb, pDb->pSchema->pSeqTab, OP_OpenWrite);
+    j1 = sqlite3VdbeAddOp1(v, OP_NotNull, memId+1); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_NewRowid, 0, memId+1);
+    sqlite3VdbeJumpHere(v, j1);
+    sqlite3VdbeAddOp3(v, OP_MakeRecord, memId-1, 2, iRec);
+    sqlite3VdbeAddOp3(v, OP_Insert, 0, iRec, memId+1);
+    sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+    sqlite3VdbeAddOp0(v, OP_Close);
+    sqlite3ReleaseTempReg(pParse, iRec);
+  }
+}
+#else
+/*
+** If SQLITE_OMIT_AUTOINCREMENT is defined, then the three routines
+** above are all no-ops
+*/
+# define autoIncBegin(A,B,C) (0)
+# define autoIncStep(A,B,C)
+#endif /* SQLITE_OMIT_AUTOINCREMENT */
+
+
+/* Forward declaration */
+static int xferOptimization(
+  Parse *pParse,        /* Parser context */
+  Table *pDest,         /* The table we are inserting into */
+  Select *pSelect,      /* A SELECT statement to use as the data source */
+  int onError,          /* How to handle constraint errors */
+  int iDbDest           /* The database of pDest */
+);
+
+/*
+** This routine is called to handle SQL of the following forms:
+**
+**    insert into TABLE (IDLIST) values(EXPRLIST)
+**    insert into TABLE (IDLIST) select
+**
+** The IDLIST following the table name is always optional.  If omitted,
+** then a list of all columns for the table is substituted.  The IDLIST
+** appears in the pColumn parameter.  pColumn is NULL if IDLIST is omitted.
+**
+** The pList parameter holds EXPRLIST in the first form of the INSERT
+** statement above, and pSelect is NULL.  For the second form, pList is
+** NULL and pSelect is a pointer to the select statement used to generate
+** data for the insert.
+**
+** The code generated follows one of four templates.  For a simple
+** insert with data coming from a VALUES clause, the code executes
+** once straight down through.  Pseudo-code follows (we call this
+** the "1st template"):
+**
+**         open write cursor to <table> and its indices
+**         put VALUES clause expressions into registers
+**         write the resulting record into <table>
+**         cleanup
+**
+** The three remaining templates assume the statement is of the form
+**
+**   INSERT INTO <table> SELECT ...
+**
+** If the SELECT clause is of the restricted form "SELECT * FROM <table2>" -
+** in other words if the SELECT pulls all columns from a single table
+** and there is no WHERE or LIMIT or GROUP BY or ORDER BY clauses, and
+** if <table2> and <table1> are distinct tables but have identical
+** schemas, including all the same indices, then a special optimization
+** is invoked that copies raw records from <table2> over to <table1>.
+** See the xferOptimization() function for the implementation of this
+** template.  This is the 2nd template.
+**
+**         open a write cursor to <table>
+**         open read cursor on <table2>
+**         transfer all records in <table2> over to <table>
+**         close cursors
+**         foreach index on <table>
+**           open a write cursor on the <table> index
+**           open a read cursor on the corresponding <table2> index
+**           transfer all records from the read to the write cursors
+**           close cursors
+**         end foreach
+**
+** The 3rd template is for when the second template does not apply
+** and the SELECT clause does not read from <table> at any time.
+** The generated code follows this template:
+**
+**         X <- A
+**         goto B
+**      A: setup for the SELECT
+**         loop over the rows in the SELECT
+**           load values into registers R..R+n
+**           yield X
+**         end loop
+**         cleanup after the SELECT
+**         end-coroutine X
+**      B: open write cursor to <table> and its indices
+**      C: yield X, at EOF goto D
+**         insert the select result into <table> from R..R+n
+**         goto C
+**      D: cleanup
+**
+** The 4th template is used if the insert statement takes its
+** values from a SELECT but the data is being inserted into a table
+** that is also read as part of the SELECT.  In the third form,
+** we have to use an intermediate table to store the results of
+** the select.  The template is like this:
+**
+**         X <- A
+**         goto B
+**      A: setup for the SELECT
+**         loop over the tables in the SELECT
+**           load value into register R..R+n
+**           yield X
+**         end loop
+**         cleanup after the SELECT
+**         end co-routine R
+**      B: open temp table
+**      L: yield X, at EOF goto M
+**         insert row from R..R+n into temp table
+**         goto L
+**      M: open write cursor to <table> and its indices
+**         rewind temp table
+**      C: loop over rows of intermediate table
+**           transfer values form intermediate table into <table>
+**         end loop
+**      D: cleanup
+*/
+SQLITE_PRIVATE void sqlite3Insert(
+  Parse *pParse,        /* Parser context */
+  SrcList *pTabList,    /* Name of table into which we are inserting */
+  Select *pSelect,      /* A SELECT statement to use as the data source */
+  IdList *pColumn,      /* Column names corresponding to IDLIST. */
+  int onError           /* How to handle constraint errors */
+){
+  sqlite3 *db;          /* The main database structure */
+  Table *pTab;          /* The table to insert into.  aka TABLE */
+  char *zTab;           /* Name of the table into which we are inserting */
+  const char *zDb;      /* Name of the database holding this table */
+  int i, j, idx;        /* Loop counters */
+  Vdbe *v;              /* Generate code into this virtual machine */
+  Index *pIdx;          /* For looping over indices of the table */
+  int nColumn;          /* Number of columns in the data */
+  int nHidden = 0;      /* Number of hidden columns if TABLE is virtual */
+  int iDataCur = 0;     /* VDBE cursor that is the main data repository */
+  int iIdxCur = 0;      /* First index cursor */
+  int ipkColumn = -1;   /* Column that is the INTEGER PRIMARY KEY */
+  int endOfLoop;        /* Label for the end of the insertion loop */
+  int srcTab = 0;       /* Data comes from this temporary cursor if >=0 */
+  int addrInsTop = 0;   /* Jump to label "D" */
+  int addrCont = 0;     /* Top of insert loop. Label "C" in templates 3 and 4 */
+  SelectDest dest;      /* Destination for SELECT on rhs of INSERT */
+  int iDb;              /* Index of database holding TABLE */
+  Db *pDb;              /* The database containing table being inserted into */
+  u8 useTempTable = 0;  /* Store SELECT results in intermediate table */
+  u8 appendFlag = 0;    /* True if the insert is likely to be an append */
+  u8 withoutRowid;      /* 0 for normal table.  1 for WITHOUT ROWID table */
+  u8 bIdListInOrder = 1; /* True if IDLIST is in table order */
+  ExprList *pList = 0;  /* List of VALUES() to be inserted  */
+
+  /* Register allocations */
+  int regFromSelect = 0;/* Base register for data coming from SELECT */
+  int regAutoinc = 0;   /* Register holding the AUTOINCREMENT counter */
+  int regRowCount = 0;  /* Memory cell used for the row counter */
+  int regIns;           /* Block of regs holding rowid+data being inserted */
+  int regRowid;         /* registers holding insert rowid */
+  int regData;          /* register holding first column to insert */
+  int *aRegIdx = 0;     /* One register allocated to each index */
+
+#ifndef SQLITE_OMIT_TRIGGER
+  int isView;                 /* True if attempting to insert into a view */
+  Trigger *pTrigger;          /* List of triggers on pTab, if required */
+  int tmask;                  /* Mask of trigger times */
+#endif
+
+  db = pParse->db;
+  memset(&dest, 0, sizeof(dest));
+  if( pParse->nErr || db->mallocFailed ){
+    goto insert_cleanup;
+  }
+
+  /* If the Select object is really just a simple VALUES() list with a
+  ** single row values (the common case) then keep that one row of values
+  ** and go ahead and discard the Select object
+  */
+  if( pSelect && (pSelect->selFlags & SF_Values)!=0 && pSelect->pPrior==0 ){
+    pList = pSelect->pEList;
+    pSelect->pEList = 0;
+    sqlite3SelectDelete(db, pSelect);
+    pSelect = 0;
+  }
+
+  /* Locate the table into which we will be inserting new information.
+  */
+  assert( pTabList->nSrc==1 );
+  zTab = pTabList->a[0].zName;
+  if( NEVER(zTab==0) ) goto insert_cleanup;
+  pTab = sqlite3SrcListLookup(pParse, pTabList);
+  if( pTab==0 ){
+    goto insert_cleanup;
+  }
+  iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+  assert( iDb<db->nDb );
+  pDb = &db->aDb[iDb];
+  zDb = pDb->zName;
+  if( sqlite3AuthCheck(pParse, SQLITE_INSERT, pTab->zName, 0, zDb) ){
+    goto insert_cleanup;
+  }
+  withoutRowid = !HasRowid(pTab);
+
+  /* Figure out if we have any triggers and if the table being
+  ** inserted into is a view
+  */
+#ifndef SQLITE_OMIT_TRIGGER
+  pTrigger = sqlite3TriggersExist(pParse, pTab, TK_INSERT, 0, &tmask);
+  isView = pTab->pSelect!=0;
+#else
+# define pTrigger 0
+# define tmask 0
+# define isView 0
+#endif
+#ifdef SQLITE_OMIT_VIEW
+# undef isView
+# define isView 0
+#endif
+  assert( (pTrigger && tmask) || (pTrigger==0 && tmask==0) );
+
+  /* If pTab is really a view, make sure it has been initialized.
+  ** ViewGetColumnNames() is a no-op if pTab is not a view.
+  */
+  if( sqlite3ViewGetColumnNames(pParse, pTab) ){
+    goto insert_cleanup;
+  }
+
+  /* Cannot insert into a read-only table.
+  */
+  if( sqlite3IsReadOnly(pParse, pTab, tmask) ){
+    goto insert_cleanup;
+  }
+
+  /* Allocate a VDBE
+  */
+  v = sqlite3GetVdbe(pParse);
+  if( v==0 ) goto insert_cleanup;
+  if( pParse->nested==0 ) sqlite3VdbeCountChanges(v);
+  sqlite3BeginWriteOperation(pParse, pSelect || pTrigger, iDb);
+
+#ifndef SQLITE_OMIT_XFER_OPT
+  /* If the statement is of the form
+  **
+  **       INSERT INTO <table1> SELECT * FROM <table2>;
+  **
+  ** Then special optimizations can be applied that make the transfer
+  ** very fast and which reduce fragmentation of indices.
+  **
+  ** This is the 2nd template.
+  */
+  if( pColumn==0 && xferOptimization(pParse, pTab, pSelect, onError, iDb) ){
+    assert( !pTrigger );
+    assert( pList==0 );
+    goto insert_end;
+  }
+#endif /* SQLITE_OMIT_XFER_OPT */
+
+  /* If this is an AUTOINCREMENT table, look up the sequence number in the
+  ** sqlite_sequence table and store it in memory cell regAutoinc.
+  */
+  regAutoinc = autoIncBegin(pParse, iDb, pTab);
+
+  /* Allocate registers for holding the rowid of the new row,
+  ** the content of the new row, and the assembled row record.
+  */
+  regRowid = regIns = pParse->nMem+1;
+  pParse->nMem += pTab->nCol + 1;
+  if( IsVirtual(pTab) ){
+    regRowid++;
+    pParse->nMem++;
+  }
+  regData = regRowid+1;
+
+  /* If the INSERT statement included an IDLIST term, then make sure
+  ** all elements of the IDLIST really are columns of the table and 
+  ** remember the column indices.
+  **
+  ** If the table has an INTEGER PRIMARY KEY column and that column
+  ** is named in the IDLIST, then record in the ipkColumn variable
+  ** the index into IDLIST of the primary key column.  ipkColumn is
+  ** the index of the primary key as it appears in IDLIST, not as
+  ** is appears in the original table.  (The index of the INTEGER
+  ** PRIMARY KEY in the original table is pTab->iPKey.)
+  */
+  if( pColumn ){
+    for(i=0; i<pColumn->nId; i++){
+      pColumn->a[i].idx = -1;
+    }
+    for(i=0; i<pColumn->nId; i++){
+      for(j=0; j<pTab->nCol; j++){
+        if( sqlite3StrICmp(pColumn->a[i].zName, pTab->aCol[j].zName)==0 ){
+          pColumn->a[i].idx = j;
+          if( i!=j ) bIdListInOrder = 0;
+          if( j==pTab->iPKey ){
+            ipkColumn = i;  assert( !withoutRowid );
+          }
+          break;
+        }
+      }
+      if( j>=pTab->nCol ){
+        if( sqlite3IsRowid(pColumn->a[i].zName) && !withoutRowid ){
+          ipkColumn = i;
+          bIdListInOrder = 0;
+        }else{
+          sqlite3ErrorMsg(pParse, "table %S has no column named %s",
+              pTabList, 0, pColumn->a[i].zName);
+          pParse->checkSchema = 1;
+          goto insert_cleanup;
+        }
+      }
+    }
+  }
+
+  /* Figure out how many columns of data are supplied.  If the data
+  ** is coming from a SELECT statement, then generate a co-routine that
+  ** produces a single row of the SELECT on each invocation.  The
+  ** co-routine is the common header to the 3rd and 4th templates.
+  */
+  if( pSelect ){
+    /* Data is coming from a SELECT.  Generate a co-routine to run the SELECT */
+    int regYield;       /* Register holding co-routine entry-point */
+    int addrTop;        /* Top of the co-routine */
+    int rc;             /* Result code */
+
+    regYield = ++pParse->nMem;
+    addrTop = sqlite3VdbeCurrentAddr(v) + 1;
+    sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, addrTop);
+    sqlite3SelectDestInit(&dest, SRT_Coroutine, regYield);
+    dest.iSdst = bIdListInOrder ? regData : 0;
+    dest.nSdst = pTab->nCol;
+    rc = sqlite3Select(pParse, pSelect, &dest);
+    regFromSelect = dest.iSdst;
+    assert( pParse->nErr==0 || rc );
+    if( rc || db->mallocFailed ) goto insert_cleanup;
+    sqlite3VdbeAddOp1(v, OP_EndCoroutine, regYield);
+    sqlite3VdbeJumpHere(v, addrTop - 1);                       /* label B: */
+    assert( pSelect->pEList );
+    nColumn = pSelect->pEList->nExpr;
+
+    /* Set useTempTable to TRUE if the result of the SELECT statement
+    ** should be written into a temporary table (template 4).  Set to
+    ** FALSE if each output row of the SELECT can be written directly into
+    ** the destination table (template 3).
+    **
+    ** A temp table must be used if the table being updated is also one
+    ** of the tables being read by the SELECT statement.  Also use a 
+    ** temp table in the case of row triggers.
+    */
+    if( pTrigger || readsTable(pParse, iDb, pTab) ){
+      useTempTable = 1;
+    }
+
+    if( useTempTable ){
+      /* Invoke the coroutine to extract information from the SELECT
+      ** and add it to a transient table srcTab.  The code generated
+      ** here is from the 4th template:
+      **
+      **      B: open temp table
+      **      L: yield X, goto M at EOF
+      **         insert row from R..R+n into temp table
+      **         goto L
+      **      M: ...
+      */
+      int regRec;          /* Register to hold packed record */
+      int regTempRowid;    /* Register to hold temp table ROWID */
+      int addrL;           /* Label "L" */
+
+      srcTab = pParse->nTab++;
+      regRec = sqlite3GetTempReg(pParse);
+      regTempRowid = sqlite3GetTempReg(pParse);
+      sqlite3VdbeAddOp2(v, OP_OpenEphemeral, srcTab, nColumn);
+      addrL = sqlite3VdbeAddOp1(v, OP_Yield, dest.iSDParm); VdbeCoverage(v);
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, regFromSelect, nColumn, regRec);
+      sqlite3VdbeAddOp2(v, OP_NewRowid, srcTab, regTempRowid);
+      sqlite3VdbeAddOp3(v, OP_Insert, srcTab, regRec, regTempRowid);
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, addrL);
+      sqlite3VdbeJumpHere(v, addrL);
+      sqlite3ReleaseTempReg(pParse, regRec);
+      sqlite3ReleaseTempReg(pParse, regTempRowid);
+    }
+  }else{
+    /* This is the case if the data for the INSERT is coming from a VALUES
+    ** clause
+    */
+    NameContext sNC;
+    memset(&sNC, 0, sizeof(sNC));
+    sNC.pParse = pParse;
+    srcTab = -1;
+    assert( useTempTable==0 );
+    nColumn = pList ? pList->nExpr : 0;
+    for(i=0; i<nColumn; i++){
+      if( sqlite3ResolveExprNames(&sNC, pList->a[i].pExpr) ){
+        goto insert_cleanup;
+      }
+    }
+  }
+
+  /* If there is no IDLIST term but the table has an integer primary
+  ** key, the set the ipkColumn variable to the integer primary key 
+  ** column index in the original table definition.
+  */
+  if( pColumn==0 && nColumn>0 ){
+    ipkColumn = pTab->iPKey;
+  }
+
+  /* Make sure the number of columns in the source data matches the number
+  ** of columns to be inserted into the table.
+  */
+  if( IsVirtual(pTab) ){
+    for(i=0; i<pTab->nCol; i++){
+      nHidden += (IsHiddenColumn(&pTab->aCol[i]) ? 1 : 0);
+    }
+  }
+  if( pColumn==0 && nColumn && nColumn!=(pTab->nCol-nHidden) ){
+    sqlite3ErrorMsg(pParse, 
+       "table %S has %d columns but %d values were supplied",
+       pTabList, 0, pTab->nCol-nHidden, nColumn);
+    goto insert_cleanup;
+  }
+  if( pColumn!=0 && nColumn!=pColumn->nId ){
+    sqlite3ErrorMsg(pParse, "%d values for %d columns", nColumn, pColumn->nId);
+    goto insert_cleanup;
+  }
+    
+  /* Initialize the count of rows to be inserted
+  */
+  if( db->flags & SQLITE_CountRows ){
+    regRowCount = ++pParse->nMem;
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, regRowCount);
+  }
+
+  /* If this is not a view, open the table and and all indices */
+  if( !isView ){
+    int nIdx;
+    nIdx = sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenWrite, -1, 0,
+                                      &iDataCur, &iIdxCur);
+    aRegIdx = sqlite3DbMallocRaw(db, sizeof(int)*(nIdx+1));
+    if( aRegIdx==0 ){
+      goto insert_cleanup;
+    }
+    for(i=0; i<nIdx; i++){
+      aRegIdx[i] = ++pParse->nMem;
+    }
+  }
+
+  /* This is the top of the main insertion loop */
+  if( useTempTable ){
+    /* This block codes the top of loop only.  The complete loop is the
+    ** following pseudocode (template 4):
+    **
+    **         rewind temp table, if empty goto D
+    **      C: loop over rows of intermediate table
+    **           transfer values form intermediate table into <table>
+    **         end loop
+    **      D: ...
+    */
+    addrInsTop = sqlite3VdbeAddOp1(v, OP_Rewind, srcTab); VdbeCoverage(v);
+    addrCont = sqlite3VdbeCurrentAddr(v);
+  }else if( pSelect ){
+    /* This block codes the top of loop only.  The complete loop is the
+    ** following pseudocode (template 3):
+    **
+    **      C: yield X, at EOF goto D
+    **         insert the select result into <table> from R..R+n
+    **         goto C
+    **      D: ...
+    */
+    addrInsTop = addrCont = sqlite3VdbeAddOp1(v, OP_Yield, dest.iSDParm);
+    VdbeCoverage(v);
+  }
+
+  /* Run the BEFORE and INSTEAD OF triggers, if there are any
+  */
+  endOfLoop = sqlite3VdbeMakeLabel(v);
+  if( tmask & TRIGGER_BEFORE ){
+    int regCols = sqlite3GetTempRange(pParse, pTab->nCol+1);
+
+    /* build the NEW.* reference row.  Note that if there is an INTEGER
+    ** PRIMARY KEY into which a NULL is being inserted, that NULL will be
+    ** translated into a unique ID for the row.  But on a BEFORE trigger,
+    ** we do not know what the unique ID will be (because the insert has
+    ** not happened yet) so we substitute a rowid of -1
+    */
+    if( ipkColumn<0 ){
+      sqlite3VdbeAddOp2(v, OP_Integer, -1, regCols);
+    }else{
+      int j1;
+      assert( !withoutRowid );
+      if( useTempTable ){
+        sqlite3VdbeAddOp3(v, OP_Column, srcTab, ipkColumn, regCols);
+      }else{
+        assert( pSelect==0 );  /* Otherwise useTempTable is true */
+        sqlite3ExprCode(pParse, pList->a[ipkColumn].pExpr, regCols);
+      }
+      j1 = sqlite3VdbeAddOp1(v, OP_NotNull, regCols); VdbeCoverage(v);
+      sqlite3VdbeAddOp2(v, OP_Integer, -1, regCols);
+      sqlite3VdbeJumpHere(v, j1);
+      sqlite3VdbeAddOp1(v, OP_MustBeInt, regCols); VdbeCoverage(v);
+    }
+
+    /* Cannot have triggers on a virtual table. If it were possible,
+    ** this block would have to account for hidden column.
+    */
+    assert( !IsVirtual(pTab) );
+
+    /* Create the new column data
+    */
+    for(i=0; i<pTab->nCol; i++){
+      if( pColumn==0 ){
+        j = i;
+      }else{
+        for(j=0; j<pColumn->nId; j++){
+          if( pColumn->a[j].idx==i ) break;
+        }
+      }
+      if( (!useTempTable && !pList) || (pColumn && j>=pColumn->nId) ){
+        sqlite3ExprCode(pParse, pTab->aCol[i].pDflt, regCols+i+1);
+      }else if( useTempTable ){
+        sqlite3VdbeAddOp3(v, OP_Column, srcTab, j, regCols+i+1); 
+      }else{
+        assert( pSelect==0 ); /* Otherwise useTempTable is true */
+        sqlite3ExprCodeAndCache(pParse, pList->a[j].pExpr, regCols+i+1);
+      }
+    }
+
+    /* If this is an INSERT on a view with an INSTEAD OF INSERT trigger,
+    ** do not attempt any conversions before assembling the record.
+    ** If this is a real table, attempt conversions as required by the
+    ** table column affinities.
+    */
+    if( !isView ){
+      sqlite3TableAffinity(v, pTab, regCols+1);
+    }
+
+    /* Fire BEFORE or INSTEAD OF triggers */
+    sqlite3CodeRowTrigger(pParse, pTrigger, TK_INSERT, 0, TRIGGER_BEFORE, 
+        pTab, regCols-pTab->nCol-1, onError, endOfLoop);
+
+    sqlite3ReleaseTempRange(pParse, regCols, pTab->nCol+1);
+  }
+
+  /* Compute the content of the next row to insert into a range of
+  ** registers beginning at regIns.
+  */
+  if( !isView ){
+    if( IsVirtual(pTab) ){
+      /* The row that the VUpdate opcode will delete: none */
+      sqlite3VdbeAddOp2(v, OP_Null, 0, regIns);
+    }
+    if( ipkColumn>=0 ){
+      if( useTempTable ){
+        sqlite3VdbeAddOp3(v, OP_Column, srcTab, ipkColumn, regRowid);
+      }else if( pSelect ){
+        sqlite3VdbeAddOp2(v, OP_Copy, regFromSelect+ipkColumn, regRowid);
+      }else{
+        VdbeOp *pOp;
+        sqlite3ExprCode(pParse, pList->a[ipkColumn].pExpr, regRowid);
+        pOp = sqlite3VdbeGetOp(v, -1);
+        if( ALWAYS(pOp) && pOp->opcode==OP_Null && !IsVirtual(pTab) ){
+          appendFlag = 1;
+          pOp->opcode = OP_NewRowid;
+          pOp->p1 = iDataCur;
+          pOp->p2 = regRowid;
+          pOp->p3 = regAutoinc;
+        }
+      }
+      /* If the PRIMARY KEY expression is NULL, then use OP_NewRowid
+      ** to generate a unique primary key value.
+      */
+      if( !appendFlag ){
+        int j1;
+        if( !IsVirtual(pTab) ){
+          j1 = sqlite3VdbeAddOp1(v, OP_NotNull, regRowid); VdbeCoverage(v);
+          sqlite3VdbeAddOp3(v, OP_NewRowid, iDataCur, regRowid, regAutoinc);
+          sqlite3VdbeJumpHere(v, j1);
+        }else{
+          j1 = sqlite3VdbeCurrentAddr(v);
+          sqlite3VdbeAddOp2(v, OP_IsNull, regRowid, j1+2); VdbeCoverage(v);
+        }
+        sqlite3VdbeAddOp1(v, OP_MustBeInt, regRowid); VdbeCoverage(v);
+      }
+    }else if( IsVirtual(pTab) || withoutRowid ){
+      sqlite3VdbeAddOp2(v, OP_Null, 0, regRowid);
+    }else{
+      sqlite3VdbeAddOp3(v, OP_NewRowid, iDataCur, regRowid, regAutoinc);
+      appendFlag = 1;
+    }
+    autoIncStep(pParse, regAutoinc, regRowid);
+
+    /* Compute data for all columns of the new entry, beginning
+    ** with the first column.
+    */
+    nHidden = 0;
+    for(i=0; i<pTab->nCol; i++){
+      int iRegStore = regRowid+1+i;
+      if( i==pTab->iPKey ){
+        /* The value of the INTEGER PRIMARY KEY column is always a NULL.
+        ** Whenever this column is read, the rowid will be substituted
+        ** in its place.  Hence, fill this column with a NULL to avoid
+        ** taking up data space with information that will never be used.
+        ** As there may be shallow copies of this value, make it a soft-NULL */
+        sqlite3VdbeAddOp1(v, OP_SoftNull, iRegStore);
+        continue;
+      }
+      if( pColumn==0 ){
+        if( IsHiddenColumn(&pTab->aCol[i]) ){
+          assert( IsVirtual(pTab) );
+          j = -1;
+          nHidden++;
+        }else{
+          j = i - nHidden;
+        }
+      }else{
+        for(j=0; j<pColumn->nId; j++){
+          if( pColumn->a[j].idx==i ) break;
+        }
+      }
+      if( j<0 || nColumn==0 || (pColumn && j>=pColumn->nId) ){
+        sqlite3ExprCodeFactorable(pParse, pTab->aCol[i].pDflt, iRegStore);
+      }else if( useTempTable ){
+        sqlite3VdbeAddOp3(v, OP_Column, srcTab, j, iRegStore); 
+      }else if( pSelect ){
+        if( regFromSelect!=regData ){
+          sqlite3VdbeAddOp2(v, OP_SCopy, regFromSelect+j, iRegStore);
+        }
+      }else{
+        sqlite3ExprCode(pParse, pList->a[j].pExpr, iRegStore);
+      }
+    }
+
+    /* Generate code to check constraints and generate index keys and
+    ** do the insertion.
+    */
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    if( IsVirtual(pTab) ){
+      const char *pVTab = (const char *)sqlite3GetVTable(db, pTab);
+      sqlite3VtabMakeWritable(pParse, pTab);
+      sqlite3VdbeAddOp4(v, OP_VUpdate, 1, pTab->nCol+2, regIns, pVTab, P4_VTAB);
+      sqlite3VdbeChangeP5(v, onError==OE_Default ? OE_Abort : onError);
+      sqlite3MayAbort(pParse);
+    }else
+#endif
+    {
+      int isReplace;    /* Set to true if constraints may cause a replace */
+      sqlite3GenerateConstraintChecks(pParse, pTab, aRegIdx, iDataCur, iIdxCur,
+          regIns, 0, ipkColumn>=0, onError, endOfLoop, &isReplace
+      );
+      sqlite3FkCheck(pParse, pTab, 0, regIns, 0, 0);
+      sqlite3CompleteInsertion(pParse, pTab, iDataCur, iIdxCur,
+                               regIns, aRegIdx, 0, appendFlag, isReplace==0);
+    }
+  }
+
+  /* Update the count of rows that are inserted
+  */
+  if( (db->flags & SQLITE_CountRows)!=0 ){
+    sqlite3VdbeAddOp2(v, OP_AddImm, regRowCount, 1);
+  }
+
+  if( pTrigger ){
+    /* Code AFTER triggers */
+    sqlite3CodeRowTrigger(pParse, pTrigger, TK_INSERT, 0, TRIGGER_AFTER, 
+        pTab, regData-2-pTab->nCol, onError, endOfLoop);
+  }
+
+  /* The bottom of the main insertion loop, if the data source
+  ** is a SELECT statement.
+  */
+  sqlite3VdbeResolveLabel(v, endOfLoop);
+  if( useTempTable ){
+    sqlite3VdbeAddOp2(v, OP_Next, srcTab, addrCont); VdbeCoverage(v);
+    sqlite3VdbeJumpHere(v, addrInsTop);
+    sqlite3VdbeAddOp1(v, OP_Close, srcTab);
+  }else if( pSelect ){
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, addrCont);
+    sqlite3VdbeJumpHere(v, addrInsTop);
+  }
+
+  if( !IsVirtual(pTab) && !isView ){
+    /* Close all tables opened */
+    if( iDataCur<iIdxCur ) sqlite3VdbeAddOp1(v, OP_Close, iDataCur);
+    for(idx=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, idx++){
+      sqlite3VdbeAddOp1(v, OP_Close, idx+iIdxCur);
+    }
+  }
+
+insert_end:
+  /* Update the sqlite_sequence table by storing the content of the
+  ** maximum rowid counter values recorded while inserting into
+  ** autoincrement tables.
+  */
+  if( pParse->nested==0 && pParse->pTriggerTab==0 ){
+    sqlite3AutoincrementEnd(pParse);
+  }
+
+  /*
+  ** Return the number of rows inserted. If this routine is 
+  ** generating code because of a call to sqlite3NestedParse(), do not
+  ** invoke the callback function.
+  */
+  if( (db->flags&SQLITE_CountRows) && !pParse->nested && !pParse->pTriggerTab ){
+    sqlite3VdbeAddOp2(v, OP_ResultRow, regRowCount, 1);
+    sqlite3VdbeSetNumCols(v, 1);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "rows inserted", SQLITE_STATIC);
+  }
+
+insert_cleanup:
+  sqlite3SrcListDelete(db, pTabList);
+  sqlite3ExprListDelete(db, pList);
+  sqlite3SelectDelete(db, pSelect);
+  sqlite3IdListDelete(db, pColumn);
+  sqlite3DbFree(db, aRegIdx);
+}
+
+/* Make sure "isView" and other macros defined above are undefined. Otherwise
+** they may interfere with compilation of other functions in this file
+** (or in another file, if this file becomes part of the amalgamation).  */
+#ifdef isView
+ #undef isView
+#endif
+#ifdef pTrigger
+ #undef pTrigger
+#endif
+#ifdef tmask
+ #undef tmask
+#endif
+
+/*
+** Generate code to do constraint checks prior to an INSERT or an UPDATE
+** on table pTab.
+**
+** The regNewData parameter is the first register in a range that contains
+** the data to be inserted or the data after the update.  There will be
+** pTab->nCol+1 registers in this range.  The first register (the one
+** that regNewData points to) will contain the new rowid, or NULL in the
+** case of a WITHOUT ROWID table.  The second register in the range will
+** contain the content of the first table column.  The third register will
+** contain the content of the second table column.  And so forth.
+**
+** The regOldData parameter is similar to regNewData except that it contains
+** the data prior to an UPDATE rather than afterwards.  regOldData is zero
+** for an INSERT.  This routine can distinguish between UPDATE and INSERT by
+** checking regOldData for zero.
+**
+** For an UPDATE, the pkChng boolean is true if the true primary key (the
+** rowid for a normal table or the PRIMARY KEY for a WITHOUT ROWID table)
+** might be modified by the UPDATE.  If pkChng is false, then the key of
+** the iDataCur content table is guaranteed to be unchanged by the UPDATE.
+**
+** For an INSERT, the pkChng boolean indicates whether or not the rowid
+** was explicitly specified as part of the INSERT statement.  If pkChng
+** is zero, it means that the either rowid is computed automatically or
+** that the table is a WITHOUT ROWID table and has no rowid.  On an INSERT,
+** pkChng will only be true if the INSERT statement provides an integer
+** value for either the rowid column or its INTEGER PRIMARY KEY alias.
+**
+** The code generated by this routine will store new index entries into
+** registers identified by aRegIdx[].  No index entry is created for
+** indices where aRegIdx[i]==0.  The order of indices in aRegIdx[] is
+** the same as the order of indices on the linked list of indices
+** at pTab->pIndex.
+**
+** The caller must have already opened writeable cursors on the main
+** table and all applicable indices (that is to say, all indices for which
+** aRegIdx[] is not zero).  iDataCur is the cursor for the main table when
+** inserting or updating a rowid table, or the cursor for the PRIMARY KEY
+** index when operating on a WITHOUT ROWID table.  iIdxCur is the cursor
+** for the first index in the pTab->pIndex list.  Cursors for other indices
+** are at iIdxCur+N for the N-th element of the pTab->pIndex list.
+**
+** This routine also generates code to check constraints.  NOT NULL,
+** CHECK, and UNIQUE constraints are all checked.  If a constraint fails,
+** then the appropriate action is performed.  There are five possible
+** actions: ROLLBACK, ABORT, FAIL, REPLACE, and IGNORE.
+**
+**  Constraint type  Action       What Happens
+**  ---------------  ----------   ----------------------------------------
+**  any              ROLLBACK     The current transaction is rolled back and
+**                                sqlite3_step() returns immediately with a
+**                                return code of SQLITE_CONSTRAINT.
+**
+**  any              ABORT        Back out changes from the current command
+**                                only (do not do a complete rollback) then
+**                                cause sqlite3_step() to return immediately
+**                                with SQLITE_CONSTRAINT.
+**
+**  any              FAIL         Sqlite3_step() returns immediately with a
+**                                return code of SQLITE_CONSTRAINT.  The
+**                                transaction is not rolled back and any
+**                                changes to prior rows are retained.
+**
+**  any              IGNORE       The attempt in insert or update the current
+**                                row is skipped, without throwing an error.
+**                                Processing continues with the next row.
+**                                (There is an immediate jump to ignoreDest.)
+**
+**  NOT NULL         REPLACE      The NULL value is replace by the default
+**                                value for that column.  If the default value
+**                                is NULL, the action is the same as ABORT.
+**
+**  UNIQUE           REPLACE      The other row that conflicts with the row
+**                                being inserted is removed.
+**
+**  CHECK            REPLACE      Illegal.  The results in an exception.
+**
+** Which action to take is determined by the overrideError parameter.
+** Or if overrideError==OE_Default, then the pParse->onError parameter
+** is used.  Or if pParse->onError==OE_Default then the onError value
+** for the constraint is used.
+*/
+SQLITE_PRIVATE void sqlite3GenerateConstraintChecks(
+  Parse *pParse,       /* The parser context */
+  Table *pTab,         /* The table being inserted or updated */
+  int *aRegIdx,        /* Use register aRegIdx[i] for index i.  0 for unused */
+  int iDataCur,        /* Canonical data cursor (main table or PK index) */
+  int iIdxCur,         /* First index cursor */
+  int regNewData,      /* First register in a range holding values to insert */
+  int regOldData,      /* Previous content.  0 for INSERTs */
+  u8 pkChng,           /* Non-zero if the rowid or PRIMARY KEY changed */
+  u8 overrideError,    /* Override onError to this if not OE_Default */
+  int ignoreDest,      /* Jump to this label on an OE_Ignore resolution */
+  int *pbMayReplace    /* OUT: Set to true if constraint may cause a replace */
+){
+  Vdbe *v;             /* VDBE under constrution */
+  Index *pIdx;         /* Pointer to one of the indices */
+  Index *pPk = 0;      /* The PRIMARY KEY index */
+  sqlite3 *db;         /* Database connection */
+  int i;               /* loop counter */
+  int ix;              /* Index loop counter */
+  int nCol;            /* Number of columns */
+  int onError;         /* Conflict resolution strategy */
+  int j1;              /* Address of jump instruction */
+  int seenReplace = 0; /* True if REPLACE is used to resolve INT PK conflict */
+  int nPkField;        /* Number of fields in PRIMARY KEY. 1 for ROWID tables */
+  int ipkTop = 0;      /* Top of the rowid change constraint check */
+  int ipkBottom = 0;   /* Bottom of the rowid change constraint check */
+  u8 isUpdate;         /* True if this is an UPDATE operation */
+  u8 bAffinityDone = 0;  /* True if the OP_Affinity operation has been run */
+  int regRowid = -1;   /* Register holding ROWID value */
+
+  isUpdate = regOldData!=0;
+  db = pParse->db;
+  v = sqlite3GetVdbe(pParse);
+  assert( v!=0 );
+  assert( pTab->pSelect==0 );  /* This table is not a VIEW */
+  nCol = pTab->nCol;
+  
+  /* pPk is the PRIMARY KEY index for WITHOUT ROWID tables and NULL for
+  ** normal rowid tables.  nPkField is the number of key fields in the 
+  ** pPk index or 1 for a rowid table.  In other words, nPkField is the
+  ** number of fields in the true primary key of the table. */
+  if( HasRowid(pTab) ){
+    pPk = 0;
+    nPkField = 1;
+  }else{
+    pPk = sqlite3PrimaryKeyIndex(pTab);
+    nPkField = pPk->nKeyCol;
+  }
+
+  /* Record that this module has started */
+  VdbeModuleComment((v, "BEGIN: GenCnstCks(%d,%d,%d,%d,%d)",
+                     iDataCur, iIdxCur, regNewData, regOldData, pkChng));
+
+  /* Test all NOT NULL constraints.
+  */
+  for(i=0; i<nCol; i++){
+    if( i==pTab->iPKey ){
+      continue;
+    }
+    onError = pTab->aCol[i].notNull;
+    if( onError==OE_None ) continue;
+    if( overrideError!=OE_Default ){
+      onError = overrideError;
+    }else if( onError==OE_Default ){
+      onError = OE_Abort;
+    }
+    if( onError==OE_Replace && pTab->aCol[i].pDflt==0 ){
+      onError = OE_Abort;
+    }
+    assert( onError==OE_Rollback || onError==OE_Abort || onError==OE_Fail
+        || onError==OE_Ignore || onError==OE_Replace );
+    switch( onError ){
+      case OE_Abort:
+        sqlite3MayAbort(pParse);
+        /* Fall through */
+      case OE_Rollback:
+      case OE_Fail: {
+        char *zMsg = sqlite3MPrintf(db, "%s.%s", pTab->zName,
+                                    pTab->aCol[i].zName);
+        sqlite3VdbeAddOp4(v, OP_HaltIfNull, SQLITE_CONSTRAINT_NOTNULL, onError,
+                          regNewData+1+i, zMsg, P4_DYNAMIC);
+        sqlite3VdbeChangeP5(v, P5_ConstraintNotNull);
+        VdbeCoverage(v);
+        break;
+      }
+      case OE_Ignore: {
+        sqlite3VdbeAddOp2(v, OP_IsNull, regNewData+1+i, ignoreDest);
+        VdbeCoverage(v);
+        break;
+      }
+      default: {
+        assert( onError==OE_Replace );
+        j1 = sqlite3VdbeAddOp1(v, OP_NotNull, regNewData+1+i); VdbeCoverage(v);
+        sqlite3ExprCode(pParse, pTab->aCol[i].pDflt, regNewData+1+i);
+        sqlite3VdbeJumpHere(v, j1);
+        break;
+      }
+    }
+  }
+
+  /* Test all CHECK constraints
+  */
+#ifndef SQLITE_OMIT_CHECK
+  if( pTab->pCheck && (db->flags & SQLITE_IgnoreChecks)==0 ){
+    ExprList *pCheck = pTab->pCheck;
+    pParse->ckBase = regNewData+1;
+    onError = overrideError!=OE_Default ? overrideError : OE_Abort;
+    for(i=0; i<pCheck->nExpr; i++){
+      int allOk = sqlite3VdbeMakeLabel(v);
+      sqlite3ExprIfTrue(pParse, pCheck->a[i].pExpr, allOk, SQLITE_JUMPIFNULL);
+      if( onError==OE_Ignore ){
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, ignoreDest);
+      }else{
+        char *zName = pCheck->a[i].zName;
+        if( zName==0 ) zName = pTab->zName;
+        if( onError==OE_Replace ) onError = OE_Abort; /* IMP: R-15569-63625 */
+        sqlite3HaltConstraint(pParse, SQLITE_CONSTRAINT_CHECK,
+                              onError, zName, P4_TRANSIENT,
+                              P5_ConstraintCheck);
+      }
+      sqlite3VdbeResolveLabel(v, allOk);
+    }
+  }
+#endif /* !defined(SQLITE_OMIT_CHECK) */
+
+  /* If rowid is changing, make sure the new rowid does not previously
+  ** exist in the table.
+  */
+  if( pkChng && pPk==0 ){
+    int addrRowidOk = sqlite3VdbeMakeLabel(v);
+
+    /* Figure out what action to take in case of a rowid collision */
+    onError = pTab->keyConf;
+    if( overrideError!=OE_Default ){
+      onError = overrideError;
+    }else if( onError==OE_Default ){
+      onError = OE_Abort;
+    }
+
+    if( isUpdate ){
+      /* pkChng!=0 does not mean that the rowid has change, only that
+      ** it might have changed.  Skip the conflict logic below if the rowid
+      ** is unchanged. */
+      sqlite3VdbeAddOp3(v, OP_Eq, regNewData, addrRowidOk, regOldData);
+      sqlite3VdbeChangeP5(v, SQLITE_NOTNULL);
+      VdbeCoverage(v);
+    }
+
+    /* If the response to a rowid conflict is REPLACE but the response
+    ** to some other UNIQUE constraint is FAIL or IGNORE, then we need
+    ** to defer the running of the rowid conflict checking until after
+    ** the UNIQUE constraints have run.
+    */
+    if( onError==OE_Replace && overrideError!=OE_Replace ){
+      for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+        if( pIdx->onError==OE_Ignore || pIdx->onError==OE_Fail ){
+          ipkTop = sqlite3VdbeAddOp0(v, OP_Goto);
+          break;
+        }
+      }
+    }
+
+    /* Check to see if the new rowid already exists in the table.  Skip
+    ** the following conflict logic if it does not. */
+    sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, addrRowidOk, regNewData);
+    VdbeCoverage(v);
+
+    /* Generate code that deals with a rowid collision */
+    switch( onError ){
+      default: {
+        onError = OE_Abort;
+        /* Fall thru into the next case */
+      }
+      case OE_Rollback:
+      case OE_Abort:
+      case OE_Fail: {
+        sqlite3RowidConstraint(pParse, onError, pTab);
+        break;
+      }
+      case OE_Replace: {
+        /* If there are DELETE triggers on this table and the
+        ** recursive-triggers flag is set, call GenerateRowDelete() to
+        ** remove the conflicting row from the table. This will fire
+        ** the triggers and remove both the table and index b-tree entries.
+        **
+        ** Otherwise, if there are no triggers or the recursive-triggers
+        ** flag is not set, but the table has one or more indexes, call 
+        ** GenerateRowIndexDelete(). This removes the index b-tree entries 
+        ** only. The table b-tree entry will be replaced by the new entry 
+        ** when it is inserted.  
+        **
+        ** If either GenerateRowDelete() or GenerateRowIndexDelete() is called,
+        ** also invoke MultiWrite() to indicate that this VDBE may require
+        ** statement rollback (if the statement is aborted after the delete
+        ** takes place). Earlier versions called sqlite3MultiWrite() regardless,
+        ** but being more selective here allows statements like:
+        **
+        **   REPLACE INTO t(rowid) VALUES($newrowid)
+        **
+        ** to run without a statement journal if there are no indexes on the
+        ** table.
+        */
+        Trigger *pTrigger = 0;
+        if( db->flags&SQLITE_RecTriggers ){
+          pTrigger = sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0);
+        }
+        if( pTrigger || sqlite3FkRequired(pParse, pTab, 0, 0) ){
+          sqlite3MultiWrite(pParse);
+          sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur,
+                                   regNewData, 1, 0, OE_Replace, 1);
+        }else if( pTab->pIndex ){
+          sqlite3MultiWrite(pParse);
+          sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur, 0);
+        }
+        seenReplace = 1;
+        break;
+      }
+      case OE_Ignore: {
+        /*assert( seenReplace==0 );*/
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, ignoreDest);
+        break;
+      }
+    }
+    sqlite3VdbeResolveLabel(v, addrRowidOk);
+    if( ipkTop ){
+      ipkBottom = sqlite3VdbeAddOp0(v, OP_Goto);
+      sqlite3VdbeJumpHere(v, ipkTop);
+    }
+  }
+
+  /* Test all UNIQUE constraints by creating entries for each UNIQUE
+  ** index and making sure that duplicate entries do not already exist.
+  ** Compute the revised record entries for indices as we go.
+  **
+  ** This loop also handles the case of the PRIMARY KEY index for a
+  ** WITHOUT ROWID table.
+  */
+  for(ix=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, ix++){
+    int regIdx;          /* Range of registers hold conent for pIdx */
+    int regR;            /* Range of registers holding conflicting PK */
+    int iThisCur;        /* Cursor for this UNIQUE index */
+    int addrUniqueOk;    /* Jump here if the UNIQUE constraint is satisfied */
+
+    if( aRegIdx[ix]==0 ) continue;  /* Skip indices that do not change */
+    if( bAffinityDone==0 ){
+      sqlite3TableAffinity(v, pTab, regNewData+1);
+      bAffinityDone = 1;
+    }
+    iThisCur = iIdxCur+ix;
+    addrUniqueOk = sqlite3VdbeMakeLabel(v);
+
+    /* Skip partial indices for which the WHERE clause is not true */
+    if( pIdx->pPartIdxWhere ){
+      sqlite3VdbeAddOp2(v, OP_Null, 0, aRegIdx[ix]);
+      pParse->ckBase = regNewData+1;
+      sqlite3ExprIfFalse(pParse, pIdx->pPartIdxWhere, addrUniqueOk,
+                         SQLITE_JUMPIFNULL);
+      pParse->ckBase = 0;
+    }
+
+    /* Create a record for this index entry as it should appear after
+    ** the insert or update.  Store that record in the aRegIdx[ix] register
+    */
+    regIdx = sqlite3GetTempRange(pParse, pIdx->nColumn);
+    for(i=0; i<pIdx->nColumn; i++){
+      int iField = pIdx->aiColumn[i];
+      int x;
+      if( iField<0 || iField==pTab->iPKey ){
+        if( regRowid==regIdx+i ) continue; /* ROWID already in regIdx+i */
+        x = regNewData;
+        regRowid =  pIdx->pPartIdxWhere ? -1 : regIdx+i;
+      }else{
+        x = iField + regNewData + 1;
+      }
+      sqlite3VdbeAddOp2(v, OP_SCopy, x, regIdx+i);
+      VdbeComment((v, "%s", iField<0 ? "rowid" : pTab->aCol[iField].zName));
+    }
+    sqlite3VdbeAddOp3(v, OP_MakeRecord, regIdx, pIdx->nColumn, aRegIdx[ix]);
+    VdbeComment((v, "for %s", pIdx->zName));
+    sqlite3ExprCacheAffinityChange(pParse, regIdx, pIdx->nColumn);
+
+    /* In an UPDATE operation, if this index is the PRIMARY KEY index 
+    ** of a WITHOUT ROWID table and there has been no change the
+    ** primary key, then no collision is possible.  The collision detection
+    ** logic below can all be skipped. */
+    if( isUpdate && pPk==pIdx && pkChng==0 ){
+      sqlite3VdbeResolveLabel(v, addrUniqueOk);
+      continue;
+    }
+
+    /* Find out what action to take in case there is a uniqueness conflict */
+    onError = pIdx->onError;
+    if( onError==OE_None ){ 
+      sqlite3ReleaseTempRange(pParse, regIdx, pIdx->nColumn);
+      sqlite3VdbeResolveLabel(v, addrUniqueOk);
+      continue;  /* pIdx is not a UNIQUE index */
+    }
+    if( overrideError!=OE_Default ){
+      onError = overrideError;
+    }else if( onError==OE_Default ){
+      onError = OE_Abort;
+    }
+    
+    /* Check to see if the new index entry will be unique */
+    sqlite3VdbeAddOp4Int(v, OP_NoConflict, iThisCur, addrUniqueOk,
+                         regIdx, pIdx->nKeyCol); VdbeCoverage(v);
+
+    /* Generate code to handle collisions */
+    regR = (pIdx==pPk) ? regIdx : sqlite3GetTempRange(pParse, nPkField);
+    if( isUpdate || onError==OE_Replace ){
+      if( HasRowid(pTab) ){
+        sqlite3VdbeAddOp2(v, OP_IdxRowid, iThisCur, regR);
+        /* Conflict only if the rowid of the existing index entry
+        ** is different from old-rowid */
+        if( isUpdate ){
+          sqlite3VdbeAddOp3(v, OP_Eq, regR, addrUniqueOk, regOldData);
+          sqlite3VdbeChangeP5(v, SQLITE_NOTNULL);
+          VdbeCoverage(v);
+        }
+      }else{
+        int x;
+        /* Extract the PRIMARY KEY from the end of the index entry and
+        ** store it in registers regR..regR+nPk-1 */
+        if( pIdx!=pPk ){
+          for(i=0; i<pPk->nKeyCol; i++){
+            x = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[i]);
+            sqlite3VdbeAddOp3(v, OP_Column, iThisCur, x, regR+i);
+            VdbeComment((v, "%s.%s", pTab->zName,
+                         pTab->aCol[pPk->aiColumn[i]].zName));
+          }
+        }
+        if( isUpdate ){
+          /* If currently processing the PRIMARY KEY of a WITHOUT ROWID 
+          ** table, only conflict if the new PRIMARY KEY values are actually
+          ** different from the old.
+          **
+          ** For a UNIQUE index, only conflict if the PRIMARY KEY values
+          ** of the matched index row are different from the original PRIMARY
+          ** KEY values of this row before the update.  */
+          int addrJump = sqlite3VdbeCurrentAddr(v)+pPk->nKeyCol;
+          int op = OP_Ne;
+          int regCmp = (IsPrimaryKeyIndex(pIdx) ? regIdx : regR);
+  
+          for(i=0; i<pPk->nKeyCol; i++){
+            char *p4 = (char*)sqlite3LocateCollSeq(pParse, pPk->azColl[i]);
+            x = pPk->aiColumn[i];
+            if( i==(pPk->nKeyCol-1) ){
+              addrJump = addrUniqueOk;
+              op = OP_Eq;
+            }
+            sqlite3VdbeAddOp4(v, op, 
+                regOldData+1+x, addrJump, regCmp+i, p4, P4_COLLSEQ
+            );
+            sqlite3VdbeChangeP5(v, SQLITE_NOTNULL);
+            VdbeCoverageIf(v, op==OP_Eq);
+            VdbeCoverageIf(v, op==OP_Ne);
+          }
+        }
+      }
+    }
+
+    /* Generate code that executes if the new index entry is not unique */
+    assert( onError==OE_Rollback || onError==OE_Abort || onError==OE_Fail
+        || onError==OE_Ignore || onError==OE_Replace );
+    switch( onError ){
+      case OE_Rollback:
+      case OE_Abort:
+      case OE_Fail: {
+        sqlite3UniqueConstraint(pParse, onError, pIdx);
+        break;
+      }
+      case OE_Ignore: {
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, ignoreDest);
+        break;
+      }
+      default: {
+        Trigger *pTrigger = 0;
+        assert( onError==OE_Replace );
+        sqlite3MultiWrite(pParse);
+        if( db->flags&SQLITE_RecTriggers ){
+          pTrigger = sqlite3TriggersExist(pParse, pTab, TK_DELETE, 0, 0);
+        }
+        sqlite3GenerateRowDelete(pParse, pTab, pTrigger, iDataCur, iIdxCur,
+                                 regR, nPkField, 0, OE_Replace, pIdx==pPk);
+        seenReplace = 1;
+        break;
+      }
+    }
+    sqlite3VdbeResolveLabel(v, addrUniqueOk);
+    sqlite3ReleaseTempRange(pParse, regIdx, pIdx->nColumn);
+    if( regR!=regIdx ) sqlite3ReleaseTempRange(pParse, regR, nPkField);
+  }
+  if( ipkTop ){
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, ipkTop+1);
+    sqlite3VdbeJumpHere(v, ipkBottom);
+  }
+  
+  *pbMayReplace = seenReplace;
+  VdbeModuleComment((v, "END: GenCnstCks(%d)", seenReplace));
+}
+
+/*
+** This routine generates code to finish the INSERT or UPDATE operation
+** that was started by a prior call to sqlite3GenerateConstraintChecks.
+** A consecutive range of registers starting at regNewData contains the
+** rowid and the content to be inserted.
+**
+** The arguments to this routine should be the same as the first six
+** arguments to sqlite3GenerateConstraintChecks.
+*/
+SQLITE_PRIVATE void sqlite3CompleteInsertion(
+  Parse *pParse,      /* The parser context */
+  Table *pTab,        /* the table into which we are inserting */
+  int iDataCur,       /* Cursor of the canonical data source */
+  int iIdxCur,        /* First index cursor */
+  int regNewData,     /* Range of content */
+  int *aRegIdx,       /* Register used by each index.  0 for unused indices */
+  int isUpdate,       /* True for UPDATE, False for INSERT */
+  int appendBias,     /* True if this is likely to be an append */
+  int useSeekResult   /* True to set the USESEEKRESULT flag on OP_[Idx]Insert */
+){
+  Vdbe *v;            /* Prepared statements under construction */
+  Index *pIdx;        /* An index being inserted or updated */
+  u8 pik_flags;       /* flag values passed to the btree insert */
+  int regData;        /* Content registers (after the rowid) */
+  int regRec;         /* Register holding assembled record for the table */
+  int i;              /* Loop counter */
+  u8 bAffinityDone = 0; /* True if OP_Affinity has been run already */
+
+  v = sqlite3GetVdbe(pParse);
+  assert( v!=0 );
+  assert( pTab->pSelect==0 );  /* This table is not a VIEW */
+  for(i=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, i++){
+    if( aRegIdx[i]==0 ) continue;
+    bAffinityDone = 1;
+    if( pIdx->pPartIdxWhere ){
+      sqlite3VdbeAddOp2(v, OP_IsNull, aRegIdx[i], sqlite3VdbeCurrentAddr(v)+2);
+      VdbeCoverage(v);
+    }
+    sqlite3VdbeAddOp2(v, OP_IdxInsert, iIdxCur+i, aRegIdx[i]);
+    pik_flags = 0;
+    if( useSeekResult ) pik_flags = OPFLAG_USESEEKRESULT;
+    if( IsPrimaryKeyIndex(pIdx) && !HasRowid(pTab) ){
+      assert( pParse->nested==0 );
+      pik_flags |= OPFLAG_NCHANGE;
+    }
+    if( pik_flags )  sqlite3VdbeChangeP5(v, pik_flags);
+  }
+  if( !HasRowid(pTab) ) return;
+  regData = regNewData + 1;
+  regRec = sqlite3GetTempReg(pParse);
+  sqlite3VdbeAddOp3(v, OP_MakeRecord, regData, pTab->nCol, regRec);
+  if( !bAffinityDone ) sqlite3TableAffinity(v, pTab, 0);
+  sqlite3ExprCacheAffinityChange(pParse, regData, pTab->nCol);
+  if( pParse->nested ){
+    pik_flags = 0;
+  }else{
+    pik_flags = OPFLAG_NCHANGE;
+    pik_flags |= (isUpdate?OPFLAG_ISUPDATE:OPFLAG_LASTROWID);
+  }
+  if( appendBias ){
+    pik_flags |= OPFLAG_APPEND;
+  }
+  if( useSeekResult ){
+    pik_flags |= OPFLAG_USESEEKRESULT;
+  }
+  sqlite3VdbeAddOp3(v, OP_Insert, iDataCur, regRec, regNewData);
+  if( !pParse->nested ){
+    sqlite3VdbeChangeP4(v, -1, pTab->zName, P4_TRANSIENT);
+  }
+  sqlite3VdbeChangeP5(v, pik_flags);
+}
+
+/*
+** Allocate cursors for the pTab table and all its indices and generate
+** code to open and initialized those cursors.
+**
+** The cursor for the object that contains the complete data (normally
+** the table itself, but the PRIMARY KEY index in the case of a WITHOUT
+** ROWID table) is returned in *piDataCur.  The first index cursor is
+** returned in *piIdxCur.  The number of indices is returned.
+**
+** Use iBase as the first cursor (either the *piDataCur for rowid tables
+** or the first index for WITHOUT ROWID tables) if it is non-negative.
+** If iBase is negative, then allocate the next available cursor.
+**
+** For a rowid table, *piDataCur will be exactly one less than *piIdxCur.
+** For a WITHOUT ROWID table, *piDataCur will be somewhere in the range
+** of *piIdxCurs, depending on where the PRIMARY KEY index appears on the
+** pTab->pIndex list.
+**
+** If pTab is a virtual table, then this routine is a no-op and the
+** *piDataCur and *piIdxCur values are left uninitialized.
+*/
+SQLITE_PRIVATE int sqlite3OpenTableAndIndices(
+  Parse *pParse,   /* Parsing context */
+  Table *pTab,     /* Table to be opened */
+  int op,          /* OP_OpenRead or OP_OpenWrite */
+  int iBase,       /* Use this for the table cursor, if there is one */
+  u8 *aToOpen,     /* If not NULL: boolean for each table and index */
+  int *piDataCur,  /* Write the database source cursor number here */
+  int *piIdxCur    /* Write the first index cursor number here */
+){
+  int i;
+  int iDb;
+  int iDataCur;
+  Index *pIdx;
+  Vdbe *v;
+
+  assert( op==OP_OpenRead || op==OP_OpenWrite );
+  if( IsVirtual(pTab) ){
+    /* This routine is a no-op for virtual tables. Leave the output
+    ** variables *piDataCur and *piIdxCur uninitialized so that valgrind
+    ** can detect if they are used by mistake in the caller. */
+    return 0;
+  }
+  iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+  v = sqlite3GetVdbe(pParse);
+  assert( v!=0 );
+  if( iBase<0 ) iBase = pParse->nTab;
+  iDataCur = iBase++;
+  if( piDataCur ) *piDataCur = iDataCur;
+  if( HasRowid(pTab) && (aToOpen==0 || aToOpen[0]) ){
+    sqlite3OpenTable(pParse, iDataCur, iDb, pTab, op);
+  }else{
+    sqlite3TableLock(pParse, iDb, pTab->tnum, op==OP_OpenWrite, pTab->zName);
+  }
+  if( piIdxCur ) *piIdxCur = iBase;
+  for(i=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, i++){
+    int iIdxCur = iBase++;
+    assert( pIdx->pSchema==pTab->pSchema );
+    if( IsPrimaryKeyIndex(pIdx) && !HasRowid(pTab) && piDataCur ){
+      *piDataCur = iIdxCur;
+    }
+    if( aToOpen==0 || aToOpen[i+1] ){
+      sqlite3VdbeAddOp3(v, op, iIdxCur, pIdx->tnum, iDb);
+      sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
+      VdbeComment((v, "%s", pIdx->zName));
+    }
+  }
+  if( iBase>pParse->nTab ) pParse->nTab = iBase;
+  return i;
+}
+
+
+#ifdef SQLITE_TEST
+/*
+** The following global variable is incremented whenever the
+** transfer optimization is used.  This is used for testing
+** purposes only - to make sure the transfer optimization really
+** is happening when it is supposed to.
+*/
+SQLITE_API int sqlite3_xferopt_count;
+#endif /* SQLITE_TEST */
+
+
+#ifndef SQLITE_OMIT_XFER_OPT
+/*
+** Check to collation names to see if they are compatible.
+*/
+static int xferCompatibleCollation(const char *z1, const char *z2){
+  if( z1==0 ){
+    return z2==0;
+  }
+  if( z2==0 ){
+    return 0;
+  }
+  return sqlite3StrICmp(z1, z2)==0;
+}
+
+
+/*
+** Check to see if index pSrc is compatible as a source of data
+** for index pDest in an insert transfer optimization.  The rules
+** for a compatible index:
+**
+**    *   The index is over the same set of columns
+**    *   The same DESC and ASC markings occurs on all columns
+**    *   The same onError processing (OE_Abort, OE_Ignore, etc)
+**    *   The same collating sequence on each column
+**    *   The index has the exact same WHERE clause
+*/
+static int xferCompatibleIndex(Index *pDest, Index *pSrc){
+  int i;
+  assert( pDest && pSrc );
+  assert( pDest->pTable!=pSrc->pTable );
+  if( pDest->nKeyCol!=pSrc->nKeyCol ){
+    return 0;   /* Different number of columns */
+  }
+  if( pDest->onError!=pSrc->onError ){
+    return 0;   /* Different conflict resolution strategies */
+  }
+  for(i=0; i<pSrc->nKeyCol; i++){
+    if( pSrc->aiColumn[i]!=pDest->aiColumn[i] ){
+      return 0;   /* Different columns indexed */
+    }
+    if( pSrc->aSortOrder[i]!=pDest->aSortOrder[i] ){
+      return 0;   /* Different sort orders */
+    }
+    if( !xferCompatibleCollation(pSrc->azColl[i],pDest->azColl[i]) ){
+      return 0;   /* Different collating sequences */
+    }
+  }
+  if( sqlite3ExprCompare(pSrc->pPartIdxWhere, pDest->pPartIdxWhere, -1) ){
+    return 0;     /* Different WHERE clauses */
+  }
+
+  /* If no test above fails then the indices must be compatible */
+  return 1;
+}
+
+/*
+** Attempt the transfer optimization on INSERTs of the form
+**
+**     INSERT INTO tab1 SELECT * FROM tab2;
+**
+** The xfer optimization transfers raw records from tab2 over to tab1.  
+** Columns are not decoded and reassembled, which greatly improves
+** performance.  Raw index records are transferred in the same way.
+**
+** The xfer optimization is only attempted if tab1 and tab2 are compatible.
+** There are lots of rules for determining compatibility - see comments
+** embedded in the code for details.
+**
+** This routine returns TRUE if the optimization is guaranteed to be used.
+** Sometimes the xfer optimization will only work if the destination table
+** is empty - a factor that can only be determined at run-time.  In that
+** case, this routine generates code for the xfer optimization but also
+** does a test to see if the destination table is empty and jumps over the
+** xfer optimization code if the test fails.  In that case, this routine
+** returns FALSE so that the caller will know to go ahead and generate
+** an unoptimized transfer.  This routine also returns FALSE if there
+** is no chance that the xfer optimization can be applied.
+**
+** This optimization is particularly useful at making VACUUM run faster.
+*/
+static int xferOptimization(
+  Parse *pParse,        /* Parser context */
+  Table *pDest,         /* The table we are inserting into */
+  Select *pSelect,      /* A SELECT statement to use as the data source */
+  int onError,          /* How to handle constraint errors */
+  int iDbDest           /* The database of pDest */
+){
+  ExprList *pEList;                /* The result set of the SELECT */
+  Table *pSrc;                     /* The table in the FROM clause of SELECT */
+  Index *pSrcIdx, *pDestIdx;       /* Source and destination indices */
+  struct SrcList_item *pItem;      /* An element of pSelect->pSrc */
+  int i;                           /* Loop counter */
+  int iDbSrc;                      /* The database of pSrc */
+  int iSrc, iDest;                 /* Cursors from source and destination */
+  int addr1, addr2;                /* Loop addresses */
+  int emptyDestTest = 0;           /* Address of test for empty pDest */
+  int emptySrcTest = 0;            /* Address of test for empty pSrc */
+  Vdbe *v;                         /* The VDBE we are building */
+  int regAutoinc;                  /* Memory register used by AUTOINC */
+  int destHasUniqueIdx = 0;        /* True if pDest has a UNIQUE index */
+  int regData, regRowid;           /* Registers holding data and rowid */
+
+  if( pSelect==0 ){
+    return 0;   /* Must be of the form  INSERT INTO ... SELECT ... */
+  }
+  if( pParse->pWith || pSelect->pWith ){
+    /* Do not attempt to process this query if there are an WITH clauses
+    ** attached to it. Proceeding may generate a false "no such table: xxx"
+    ** error if pSelect reads from a CTE named "xxx".  */
+    return 0;
+  }
+  if( sqlite3TriggerList(pParse, pDest) ){
+    return 0;   /* tab1 must not have triggers */
+  }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( pDest->tabFlags & TF_Virtual ){
+    return 0;   /* tab1 must not be a virtual table */
+  }
+#endif
+  if( onError==OE_Default ){
+    if( pDest->iPKey>=0 ) onError = pDest->keyConf;
+    if( onError==OE_Default ) onError = OE_Abort;
+  }
+  assert(pSelect->pSrc);   /* allocated even if there is no FROM clause */
+  if( pSelect->pSrc->nSrc!=1 ){
+    return 0;   /* FROM clause must have exactly one term */
+  }
+  if( pSelect->pSrc->a[0].pSelect ){
+    return 0;   /* FROM clause cannot contain a subquery */
+  }
+  if( pSelect->pWhere ){
+    return 0;   /* SELECT may not have a WHERE clause */
+  }
+  if( pSelect->pOrderBy ){
+    return 0;   /* SELECT may not have an ORDER BY clause */
+  }
+  /* Do not need to test for a HAVING clause.  If HAVING is present but
+  ** there is no ORDER BY, we will get an error. */
+  if( pSelect->pGroupBy ){
+    return 0;   /* SELECT may not have a GROUP BY clause */
+  }
+  if( pSelect->pLimit ){
+    return 0;   /* SELECT may not have a LIMIT clause */
+  }
+  assert( pSelect->pOffset==0 );  /* Must be so if pLimit==0 */
+  if( pSelect->pPrior ){
+    return 0;   /* SELECT may not be a compound query */
+  }
+  if( pSelect->selFlags & SF_Distinct ){
+    return 0;   /* SELECT may not be DISTINCT */
+  }
+  pEList = pSelect->pEList;
+  assert( pEList!=0 );
+  if( pEList->nExpr!=1 ){
+    return 0;   /* The result set must have exactly one column */
+  }
+  assert( pEList->a[0].pExpr );
+  if( pEList->a[0].pExpr->op!=TK_ALL ){
+    return 0;   /* The result set must be the special operator "*" */
+  }
+
+  /* At this point we have established that the statement is of the
+  ** correct syntactic form to participate in this optimization.  Now
+  ** we have to check the semantics.
+  */
+  pItem = pSelect->pSrc->a;
+  pSrc = sqlite3LocateTableItem(pParse, 0, pItem);
+  if( pSrc==0 ){
+    return 0;   /* FROM clause does not contain a real table */
+  }
+  if( pSrc==pDest ){
+    return 0;   /* tab1 and tab2 may not be the same table */
+  }
+  if( HasRowid(pDest)!=HasRowid(pSrc) ){
+    return 0;   /* source and destination must both be WITHOUT ROWID or not */
+  }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( pSrc->tabFlags & TF_Virtual ){
+    return 0;   /* tab2 must not be a virtual table */
+  }
+#endif
+  if( pSrc->pSelect ){
+    return 0;   /* tab2 may not be a view */
+  }
+  if( pDest->nCol!=pSrc->nCol ){
+    return 0;   /* Number of columns must be the same in tab1 and tab2 */
+  }
+  if( pDest->iPKey!=pSrc->iPKey ){
+    return 0;   /* Both tables must have the same INTEGER PRIMARY KEY */
+  }
+  for(i=0; i<pDest->nCol; i++){
+    Column *pDestCol = &pDest->aCol[i];
+    Column *pSrcCol = &pSrc->aCol[i];
+    if( pDestCol->affinity!=pSrcCol->affinity ){
+      return 0;    /* Affinity must be the same on all columns */
+    }
+    if( !xferCompatibleCollation(pDestCol->zColl, pSrcCol->zColl) ){
+      return 0;    /* Collating sequence must be the same on all columns */
+    }
+    if( pDestCol->notNull && !pSrcCol->notNull ){
+      return 0;    /* tab2 must be NOT NULL if tab1 is */
+    }
+    /* Default values for second and subsequent columns need to match. */
+    if( i>0
+     && ((pDestCol->zDflt==0)!=(pSrcCol->zDflt==0) 
+         || (pDestCol->zDflt && strcmp(pDestCol->zDflt, pSrcCol->zDflt)!=0))
+    ){
+      return 0;    /* Default values must be the same for all columns */
+    }
+  }
+  for(pDestIdx=pDest->pIndex; pDestIdx; pDestIdx=pDestIdx->pNext){
+    if( IsUniqueIndex(pDestIdx) ){
+      destHasUniqueIdx = 1;
+    }
+    for(pSrcIdx=pSrc->pIndex; pSrcIdx; pSrcIdx=pSrcIdx->pNext){
+      if( xferCompatibleIndex(pDestIdx, pSrcIdx) ) break;
+    }
+    if( pSrcIdx==0 ){
+      return 0;    /* pDestIdx has no corresponding index in pSrc */
+    }
+  }
+#ifndef SQLITE_OMIT_CHECK
+  if( pDest->pCheck && sqlite3ExprListCompare(pSrc->pCheck,pDest->pCheck,-1) ){
+    return 0;   /* Tables have different CHECK constraints.  Ticket #2252 */
+  }
+#endif
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+  /* Disallow the transfer optimization if the destination table constains
+  ** any foreign key constraints.  This is more restrictive than necessary.
+  ** But the main beneficiary of the transfer optimization is the VACUUM 
+  ** command, and the VACUUM command disables foreign key constraints.  So
+  ** the extra complication to make this rule less restrictive is probably
+  ** not worth the effort.  Ticket [6284df89debdfa61db8073e062908af0c9b6118e]
+  */
+  if( (pParse->db->flags & SQLITE_ForeignKeys)!=0 && pDest->pFKey!=0 ){
+    return 0;
+  }
+#endif
+  if( (pParse->db->flags & SQLITE_CountRows)!=0 ){
+    return 0;  /* xfer opt does not play well with PRAGMA count_changes */
+  }
+
+  /* If we get this far, it means that the xfer optimization is at
+  ** least a possibility, though it might only work if the destination
+  ** table (tab1) is initially empty.
+  */
+#ifdef SQLITE_TEST
+  sqlite3_xferopt_count++;
+#endif
+  iDbSrc = sqlite3SchemaToIndex(pParse->db, pSrc->pSchema);
+  v = sqlite3GetVdbe(pParse);
+  sqlite3CodeVerifySchema(pParse, iDbSrc);
+  iSrc = pParse->nTab++;
+  iDest = pParse->nTab++;
+  regAutoinc = autoIncBegin(pParse, iDbDest, pDest);
+  regData = sqlite3GetTempReg(pParse);
+  regRowid = sqlite3GetTempReg(pParse);
+  sqlite3OpenTable(pParse, iDest, iDbDest, pDest, OP_OpenWrite);
+  assert( HasRowid(pDest) || destHasUniqueIdx );
+  if( (pDest->iPKey<0 && pDest->pIndex!=0)          /* (1) */
+   || destHasUniqueIdx                              /* (2) */
+   || (onError!=OE_Abort && onError!=OE_Rollback)   /* (3) */
+  ){
+    /* In some circumstances, we are able to run the xfer optimization
+    ** only if the destination table is initially empty.  This code makes
+    ** that determination.  Conditions under which the destination must
+    ** be empty:
+    **
+    ** (1) There is no INTEGER PRIMARY KEY but there are indices.
+    **     (If the destination is not initially empty, the rowid fields
+    **     of index entries might need to change.)
+    **
+    ** (2) The destination has a unique index.  (The xfer optimization 
+    **     is unable to test uniqueness.)
+    **
+    ** (3) onError is something other than OE_Abort and OE_Rollback.
+    */
+    addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iDest, 0); VdbeCoverage(v);
+    emptyDestTest = sqlite3VdbeAddOp2(v, OP_Goto, 0, 0);
+    sqlite3VdbeJumpHere(v, addr1);
+  }
+  if( HasRowid(pSrc) ){
+    sqlite3OpenTable(pParse, iSrc, iDbSrc, pSrc, OP_OpenRead);
+    emptySrcTest = sqlite3VdbeAddOp2(v, OP_Rewind, iSrc, 0); VdbeCoverage(v);
+    if( pDest->iPKey>=0 ){
+      addr1 = sqlite3VdbeAddOp2(v, OP_Rowid, iSrc, regRowid);
+      addr2 = sqlite3VdbeAddOp3(v, OP_NotExists, iDest, 0, regRowid);
+      VdbeCoverage(v);
+      sqlite3RowidConstraint(pParse, onError, pDest);
+      sqlite3VdbeJumpHere(v, addr2);
+      autoIncStep(pParse, regAutoinc, regRowid);
+    }else if( pDest->pIndex==0 ){
+      addr1 = sqlite3VdbeAddOp2(v, OP_NewRowid, iDest, regRowid);
+    }else{
+      addr1 = sqlite3VdbeAddOp2(v, OP_Rowid, iSrc, regRowid);
+      assert( (pDest->tabFlags & TF_Autoincrement)==0 );
+    }
+    sqlite3VdbeAddOp2(v, OP_RowData, iSrc, regData);
+    sqlite3VdbeAddOp3(v, OP_Insert, iDest, regData, regRowid);
+    sqlite3VdbeChangeP5(v, OPFLAG_NCHANGE|OPFLAG_LASTROWID|OPFLAG_APPEND);
+    sqlite3VdbeChangeP4(v, -1, pDest->zName, 0);
+    sqlite3VdbeAddOp2(v, OP_Next, iSrc, addr1); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_Close, iSrc, 0);
+    sqlite3VdbeAddOp2(v, OP_Close, iDest, 0);
+  }else{
+    sqlite3TableLock(pParse, iDbDest, pDest->tnum, 1, pDest->zName);
+    sqlite3TableLock(pParse, iDbSrc, pSrc->tnum, 0, pSrc->zName);
+  }
+  for(pDestIdx=pDest->pIndex; pDestIdx; pDestIdx=pDestIdx->pNext){
+    for(pSrcIdx=pSrc->pIndex; ALWAYS(pSrcIdx); pSrcIdx=pSrcIdx->pNext){
+      if( xferCompatibleIndex(pDestIdx, pSrcIdx) ) break;
+    }
+    assert( pSrcIdx );
+    sqlite3VdbeAddOp3(v, OP_OpenRead, iSrc, pSrcIdx->tnum, iDbSrc);
+    sqlite3VdbeSetP4KeyInfo(pParse, pSrcIdx);
+    VdbeComment((v, "%s", pSrcIdx->zName));
+    sqlite3VdbeAddOp3(v, OP_OpenWrite, iDest, pDestIdx->tnum, iDbDest);
+    sqlite3VdbeSetP4KeyInfo(pParse, pDestIdx);
+    sqlite3VdbeChangeP5(v, OPFLAG_BULKCSR);
+    VdbeComment((v, "%s", pDestIdx->zName));
+    addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iSrc, 0); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_RowKey, iSrc, regData);
+    sqlite3VdbeAddOp3(v, OP_IdxInsert, iDest, regData, 1);
+    sqlite3VdbeAddOp2(v, OP_Next, iSrc, addr1+1); VdbeCoverage(v);
+    sqlite3VdbeJumpHere(v, addr1);
+    sqlite3VdbeAddOp2(v, OP_Close, iSrc, 0);
+    sqlite3VdbeAddOp2(v, OP_Close, iDest, 0);
+  }
+  if( emptySrcTest ) sqlite3VdbeJumpHere(v, emptySrcTest);
+  sqlite3ReleaseTempReg(pParse, regRowid);
+  sqlite3ReleaseTempReg(pParse, regData);
+  if( emptyDestTest ){
+    sqlite3VdbeAddOp2(v, OP_Halt, SQLITE_OK, 0);
+    sqlite3VdbeJumpHere(v, emptyDestTest);
+    sqlite3VdbeAddOp2(v, OP_Close, iDest, 0);
+    return 0;
+  }else{
+    return 1;
+  }
+}
+#endif /* SQLITE_OMIT_XFER_OPT */
+
+/************** End of insert.c **********************************************/
+/************** Begin file legacy.c ******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Main file for the SQLite library.  The routines in this file
+** implement the programmer interface to the library.  Routines in
+** other files are for internal use by SQLite and should not be
+** accessed by users of the library.
+*/
+
+
+/*
+** Execute SQL code.  Return one of the SQLITE_ success/failure
+** codes.  Also write an error message into memory obtained from
+** malloc() and make *pzErrMsg point to that message.
+**
+** If the SQL is a query, then for each row in the query result
+** the xCallback() function is called.  pArg becomes the first
+** argument to xCallback().  If xCallback=NULL then no callback
+** is invoked, even for queries.
+*/
+SQLITE_API int sqlite3_exec(
+  sqlite3 *db,                /* The database on which the SQL executes */
+  const char *zSql,           /* The SQL to be executed */
+  sqlite3_callback xCallback, /* Invoke this callback routine */
+  void *pArg,                 /* First argument to xCallback() */
+  char **pzErrMsg             /* Write error messages here */
+){
+  int rc = SQLITE_OK;         /* Return code */
+  const char *zLeftover;      /* Tail of unprocessed SQL */
+  sqlite3_stmt *pStmt = 0;    /* The current SQL statement */
+  char **azCols = 0;          /* Names of result columns */
+  int callbackIsInit;         /* True if callback data is initialized */
+
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+  if( zSql==0 ) zSql = "";
+
+  sqlite3_mutex_enter(db->mutex);
+  sqlite3Error(db, SQLITE_OK);
+  while( rc==SQLITE_OK && zSql[0] ){
+    int nCol;
+    char **azVals = 0;
+
+    pStmt = 0;
+    rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, &zLeftover);
+    assert( rc==SQLITE_OK || pStmt==0 );
+    if( rc!=SQLITE_OK ){
+      continue;
+    }
+    if( !pStmt ){
+      /* this happens for a comment or white-space */
+      zSql = zLeftover;
+      continue;
+    }
+
+    callbackIsInit = 0;
+    nCol = sqlite3_column_count(pStmt);
+
+    while( 1 ){
+      int i;
+      rc = sqlite3_step(pStmt);
+
+      /* Invoke the callback function if required */
+      if( xCallback && (SQLITE_ROW==rc || 
+          (SQLITE_DONE==rc && !callbackIsInit
+                           && db->flags&SQLITE_NullCallback)) ){
+        if( !callbackIsInit ){
+          azCols = sqlite3DbMallocZero(db, 2*nCol*sizeof(const char*) + 1);
+          if( azCols==0 ){
+            goto exec_out;
+          }
+          for(i=0; i<nCol; i++){
+            azCols[i] = (char *)sqlite3_column_name(pStmt, i);
+            /* sqlite3VdbeSetColName() installs column names as UTF8
+            ** strings so there is no way for sqlite3_column_name() to fail. */
+            assert( azCols[i]!=0 );
+          }
+          callbackIsInit = 1;
+        }
+        if( rc==SQLITE_ROW ){
+          azVals = &azCols[nCol];
+          for(i=0; i<nCol; i++){
+            azVals[i] = (char *)sqlite3_column_text(pStmt, i);
+            if( !azVals[i] && sqlite3_column_type(pStmt, i)!=SQLITE_NULL ){
+              db->mallocFailed = 1;
+              goto exec_out;
+            }
+          }
+        }
+        if( xCallback(pArg, nCol, azVals, azCols) ){
+          /* EVIDENCE-OF: R-38229-40159 If the callback function to
+          ** sqlite3_exec() returns non-zero, then sqlite3_exec() will
+          ** return SQLITE_ABORT. */
+          rc = SQLITE_ABORT;
+          sqlite3VdbeFinalize((Vdbe *)pStmt);
+          pStmt = 0;
+          sqlite3Error(db, SQLITE_ABORT);
+          goto exec_out;
+        }
+      }
+
+      if( rc!=SQLITE_ROW ){
+        rc = sqlite3VdbeFinalize((Vdbe *)pStmt);
+        pStmt = 0;
+        zSql = zLeftover;
+        while( sqlite3Isspace(zSql[0]) ) zSql++;
+        break;
+      }
+    }
+
+    sqlite3DbFree(db, azCols);
+    azCols = 0;
+  }
+
+exec_out:
+  if( pStmt ) sqlite3VdbeFinalize((Vdbe *)pStmt);
+  sqlite3DbFree(db, azCols);
+
+  rc = sqlite3ApiExit(db, rc);
+  if( rc!=SQLITE_OK && pzErrMsg ){
+    int nErrMsg = 1 + sqlite3Strlen30(sqlite3_errmsg(db));
+    *pzErrMsg = sqlite3Malloc(nErrMsg);
+    if( *pzErrMsg ){
+      memcpy(*pzErrMsg, sqlite3_errmsg(db), nErrMsg);
+    }else{
+      rc = SQLITE_NOMEM;
+      sqlite3Error(db, SQLITE_NOMEM);
+    }
+  }else if( pzErrMsg ){
+    *pzErrMsg = 0;
+  }
+
+  assert( (rc&db->errMask)==rc );
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/************** End of legacy.c **********************************************/
+/************** Begin file loadext.c *****************************************/
+/*
+** 2006 June 7
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code used to dynamically load extensions into
+** the SQLite library.
+*/
+
+#ifndef SQLITE_CORE
+  #define SQLITE_CORE 1  /* Disable the API redefinition in sqlite3ext.h */
+#endif
+/************** Include sqlite3ext.h in the middle of loadext.c **************/
+/************** Begin file sqlite3ext.h **************************************/
+/*
+** 2006 June 7
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This header file defines the SQLite interface for use by
+** shared libraries that want to be imported as extensions into
+** an SQLite instance.  Shared libraries that intend to be loaded
+** as extensions by SQLite should #include this file instead of 
+** sqlite3.h.
+*/
+#ifndef _SQLITE3EXT_H_
+#define _SQLITE3EXT_H_
+
+typedef struct sqlite3_api_routines sqlite3_api_routines;
+
+/*
+** The following structure holds pointers to all of the SQLite API
+** routines.
+**
+** WARNING:  In order to maintain backwards compatibility, add new
+** interfaces to the end of this structure only.  If you insert new
+** interfaces in the middle of this structure, then older different
+** versions of SQLite will not be able to load each other's shared
+** libraries!
+*/
+struct sqlite3_api_routines {
+  void * (*aggregate_context)(sqlite3_context*,int nBytes);
+  int  (*aggregate_count)(sqlite3_context*);
+  int  (*bind_blob)(sqlite3_stmt*,int,const void*,int n,void(*)(void*));
+  int  (*bind_double)(sqlite3_stmt*,int,double);
+  int  (*bind_int)(sqlite3_stmt*,int,int);
+  int  (*bind_int64)(sqlite3_stmt*,int,sqlite_int64);
+  int  (*bind_null)(sqlite3_stmt*,int);
+  int  (*bind_parameter_count)(sqlite3_stmt*);
+  int  (*bind_parameter_index)(sqlite3_stmt*,const char*zName);
+  const char * (*bind_parameter_name)(sqlite3_stmt*,int);
+  int  (*bind_text)(sqlite3_stmt*,int,const char*,int n,void(*)(void*));
+  int  (*bind_text16)(sqlite3_stmt*,int,const void*,int,void(*)(void*));
+  int  (*bind_value)(sqlite3_stmt*,int,const sqlite3_value*);
+  int  (*busy_handler)(sqlite3*,int(*)(void*,int),void*);
+  int  (*busy_timeout)(sqlite3*,int ms);
+  int  (*changes)(sqlite3*);
+  int  (*close)(sqlite3*);
+  int  (*collation_needed)(sqlite3*,void*,void(*)(void*,sqlite3*,
+                           int eTextRep,const char*));
+  int  (*collation_needed16)(sqlite3*,void*,void(*)(void*,sqlite3*,
+                             int eTextRep,const void*));
+  const void * (*column_blob)(sqlite3_stmt*,int iCol);
+  int  (*column_bytes)(sqlite3_stmt*,int iCol);
+  int  (*column_bytes16)(sqlite3_stmt*,int iCol);
+  int  (*column_count)(sqlite3_stmt*pStmt);
+  const char * (*column_database_name)(sqlite3_stmt*,int);
+  const void * (*column_database_name16)(sqlite3_stmt*,int);
+  const char * (*column_decltype)(sqlite3_stmt*,int i);
+  const void * (*column_decltype16)(sqlite3_stmt*,int);
+  double  (*column_double)(sqlite3_stmt*,int iCol);
+  int  (*column_int)(sqlite3_stmt*,int iCol);
+  sqlite_int64  (*column_int64)(sqlite3_stmt*,int iCol);
+  const char * (*column_name)(sqlite3_stmt*,int);
+  const void * (*column_name16)(sqlite3_stmt*,int);
+  const char * (*column_origin_name)(sqlite3_stmt*,int);
+  const void * (*column_origin_name16)(sqlite3_stmt*,int);
+  const char * (*column_table_name)(sqlite3_stmt*,int);
+  const void * (*column_table_name16)(sqlite3_stmt*,int);
+  const unsigned char * (*column_text)(sqlite3_stmt*,int iCol);
+  const void * (*column_text16)(sqlite3_stmt*,int iCol);
+  int  (*column_type)(sqlite3_stmt*,int iCol);
+  sqlite3_value* (*column_value)(sqlite3_stmt*,int iCol);
+  void * (*commit_hook)(sqlite3*,int(*)(void*),void*);
+  int  (*complete)(const char*sql);
+  int  (*complete16)(const void*sql);
+  int  (*create_collation)(sqlite3*,const char*,int,void*,
+                           int(*)(void*,int,const void*,int,const void*));
+  int  (*create_collation16)(sqlite3*,const void*,int,void*,
+                             int(*)(void*,int,const void*,int,const void*));
+  int  (*create_function)(sqlite3*,const char*,int,int,void*,
+                          void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+                          void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+                          void (*xFinal)(sqlite3_context*));
+  int  (*create_function16)(sqlite3*,const void*,int,int,void*,
+                            void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+                            void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+                            void (*xFinal)(sqlite3_context*));
+  int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*);
+  int  (*data_count)(sqlite3_stmt*pStmt);
+  sqlite3 * (*db_handle)(sqlite3_stmt*);
+  int (*declare_vtab)(sqlite3*,const char*);
+  int  (*enable_shared_cache)(int);
+  int  (*errcode)(sqlite3*db);
+  const char * (*errmsg)(sqlite3*);
+  const void * (*errmsg16)(sqlite3*);
+  int  (*exec)(sqlite3*,const char*,sqlite3_callback,void*,char**);
+  int  (*expired)(sqlite3_stmt*);
+  int  (*finalize)(sqlite3_stmt*pStmt);
+  void  (*free)(void*);
+  void  (*free_table)(char**result);
+  int  (*get_autocommit)(sqlite3*);
+  void * (*get_auxdata)(sqlite3_context*,int);
+  int  (*get_table)(sqlite3*,const char*,char***,int*,int*,char**);
+  int  (*global_recover)(void);
+  void  (*interruptx)(sqlite3*);
+  sqlite_int64  (*last_insert_rowid)(sqlite3*);
+  const char * (*libversion)(void);
+  int  (*libversion_number)(void);
+  void *(*malloc)(int);
+  char * (*mprintf)(const char*,...);
+  int  (*open)(const char*,sqlite3**);
+  int  (*open16)(const void*,sqlite3**);
+  int  (*prepare)(sqlite3*,const char*,int,sqlite3_stmt**,const char**);
+  int  (*prepare16)(sqlite3*,const void*,int,sqlite3_stmt**,const void**);
+  void * (*profile)(sqlite3*,void(*)(void*,const char*,sqlite_uint64),void*);
+  void  (*progress_handler)(sqlite3*,int,int(*)(void*),void*);
+  void *(*realloc)(void*,int);
+  int  (*reset)(sqlite3_stmt*pStmt);
+  void  (*result_blob)(sqlite3_context*,const void*,int,void(*)(void*));
+  void  (*result_double)(sqlite3_context*,double);
+  void  (*result_error)(sqlite3_context*,const char*,int);
+  void  (*result_error16)(sqlite3_context*,const void*,int);
+  void  (*result_int)(sqlite3_context*,int);
+  void  (*result_int64)(sqlite3_context*,sqlite_int64);
+  void  (*result_null)(sqlite3_context*);
+  void  (*result_text)(sqlite3_context*,const char*,int,void(*)(void*));
+  void  (*result_text16)(sqlite3_context*,const void*,int,void(*)(void*));
+  void  (*result_text16be)(sqlite3_context*,const void*,int,void(*)(void*));
+  void  (*result_text16le)(sqlite3_context*,const void*,int,void(*)(void*));
+  void  (*result_value)(sqlite3_context*,sqlite3_value*);
+  void * (*rollback_hook)(sqlite3*,void(*)(void*),void*);
+  int  (*set_authorizer)(sqlite3*,int(*)(void*,int,const char*,const char*,
+                         const char*,const char*),void*);
+  void  (*set_auxdata)(sqlite3_context*,int,void*,void (*)(void*));
+  char * (*snprintf)(int,char*,const char*,...);
+  int  (*step)(sqlite3_stmt*);
+  int  (*table_column_metadata)(sqlite3*,const char*,const char*,const char*,
+                                char const**,char const**,int*,int*,int*);
+  void  (*thread_cleanup)(void);
+  int  (*total_changes)(sqlite3*);
+  void * (*trace)(sqlite3*,void(*xTrace)(void*,const char*),void*);
+  int  (*transfer_bindings)(sqlite3_stmt*,sqlite3_stmt*);
+  void * (*update_hook)(sqlite3*,void(*)(void*,int ,char const*,char const*,
+                                         sqlite_int64),void*);
+  void * (*user_data)(sqlite3_context*);
+  const void * (*value_blob)(sqlite3_value*);
+  int  (*value_bytes)(sqlite3_value*);
+  int  (*value_bytes16)(sqlite3_value*);
+  double  (*value_double)(sqlite3_value*);
+  int  (*value_int)(sqlite3_value*);
+  sqlite_int64  (*value_int64)(sqlite3_value*);
+  int  (*value_numeric_type)(sqlite3_value*);
+  const unsigned char * (*value_text)(sqlite3_value*);
+  const void * (*value_text16)(sqlite3_value*);
+  const void * (*value_text16be)(sqlite3_value*);
+  const void * (*value_text16le)(sqlite3_value*);
+  int  (*value_type)(sqlite3_value*);
+  char *(*vmprintf)(const char*,va_list);
+  /* Added ??? */
+  int (*overload_function)(sqlite3*, const char *zFuncName, int nArg);
+  /* Added by 3.3.13 */
+  int (*prepare_v2)(sqlite3*,const char*,int,sqlite3_stmt**,const char**);
+  int (*prepare16_v2)(sqlite3*,const void*,int,sqlite3_stmt**,const void**);
+  int (*clear_bindings)(sqlite3_stmt*);
+  /* Added by 3.4.1 */
+  int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*,
+                          void (*xDestroy)(void *));
+  /* Added by 3.5.0 */
+  int (*bind_zeroblob)(sqlite3_stmt*,int,int);
+  int (*blob_bytes)(sqlite3_blob*);
+  int (*blob_close)(sqlite3_blob*);
+  int (*blob_open)(sqlite3*,const char*,const char*,const char*,sqlite3_int64,
+                   int,sqlite3_blob**);
+  int (*blob_read)(sqlite3_blob*,void*,int,int);
+  int (*blob_write)(sqlite3_blob*,const void*,int,int);
+  int (*create_collation_v2)(sqlite3*,const char*,int,void*,
+                             int(*)(void*,int,const void*,int,const void*),
+                             void(*)(void*));
+  int (*file_control)(sqlite3*,const char*,int,void*);
+  sqlite3_int64 (*memory_highwater)(int);
+  sqlite3_int64 (*memory_used)(void);
+  sqlite3_mutex *(*mutex_alloc)(int);
+  void (*mutex_enter)(sqlite3_mutex*);
+  void (*mutex_free)(sqlite3_mutex*);
+  void (*mutex_leave)(sqlite3_mutex*);
+  int (*mutex_try)(sqlite3_mutex*);
+  int (*open_v2)(const char*,sqlite3**,int,const char*);
+  int (*release_memory)(int);
+  void (*result_error_nomem)(sqlite3_context*);
+  void (*result_error_toobig)(sqlite3_context*);
+  int (*sleep)(int);
+  void (*soft_heap_limit)(int);
+  sqlite3_vfs *(*vfs_find)(const char*);
+  int (*vfs_register)(sqlite3_vfs*,int);
+  int (*vfs_unregister)(sqlite3_vfs*);
+  int (*xthreadsafe)(void);
+  void (*result_zeroblob)(sqlite3_context*,int);
+  void (*result_error_code)(sqlite3_context*,int);
+  int (*test_control)(int, ...);
+  void (*randomness)(int,void*);
+  sqlite3 *(*context_db_handle)(sqlite3_context*);
+  int (*extended_result_codes)(sqlite3*,int);
+  int (*limit)(sqlite3*,int,int);
+  sqlite3_stmt *(*next_stmt)(sqlite3*,sqlite3_stmt*);
+  const char *(*sql)(sqlite3_stmt*);
+  int (*status)(int,int*,int*,int);
+  int (*backup_finish)(sqlite3_backup*);
+  sqlite3_backup *(*backup_init)(sqlite3*,const char*,sqlite3*,const char*);
+  int (*backup_pagecount)(sqlite3_backup*);
+  int (*backup_remaining)(sqlite3_backup*);
+  int (*backup_step)(sqlite3_backup*,int);
+  const char *(*compileoption_get)(int);
+  int (*compileoption_used)(const char*);
+  int (*create_function_v2)(sqlite3*,const char*,int,int,void*,
+                            void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+                            void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+                            void (*xFinal)(sqlite3_context*),
+                            void(*xDestroy)(void*));
+  int (*db_config)(sqlite3*,int,...);
+  sqlite3_mutex *(*db_mutex)(sqlite3*);
+  int (*db_status)(sqlite3*,int,int*,int*,int);
+  int (*extended_errcode)(sqlite3*);
+  void (*log)(int,const char*,...);
+  sqlite3_int64 (*soft_heap_limit64)(sqlite3_int64);
+  const char *(*sourceid)(void);
+  int (*stmt_status)(sqlite3_stmt*,int,int);
+  int (*strnicmp)(const char*,const char*,int);
+  int (*unlock_notify)(sqlite3*,void(*)(void**,int),void*);
+  int (*wal_autocheckpoint)(sqlite3*,int);
+  int (*wal_checkpoint)(sqlite3*,const char*);
+  void *(*wal_hook)(sqlite3*,int(*)(void*,sqlite3*,const char*,int),void*);
+  int (*blob_reopen)(sqlite3_blob*,sqlite3_int64);
+  int (*vtab_config)(sqlite3*,int op,...);
+  int (*vtab_on_conflict)(sqlite3*);
+  /* Version 3.7.16 and later */
+  int (*close_v2)(sqlite3*);
+  const char *(*db_filename)(sqlite3*,const char*);
+  int (*db_readonly)(sqlite3*,const char*);
+  int (*db_release_memory)(sqlite3*);
+  const char *(*errstr)(int);
+  int (*stmt_busy)(sqlite3_stmt*);
+  int (*stmt_readonly)(sqlite3_stmt*);
+  int (*stricmp)(const char*,const char*);
+  int (*uri_boolean)(const char*,const char*,int);
+  sqlite3_int64 (*uri_int64)(const char*,const char*,sqlite3_int64);
+  const char *(*uri_parameter)(const char*,const char*);
+  char *(*vsnprintf)(int,char*,const char*,va_list);
+  int (*wal_checkpoint_v2)(sqlite3*,const char*,int,int*,int*);
+  /* Version 3.8.7 and later */
+  int (*auto_extension)(void(*)(void));
+  int (*bind_blob64)(sqlite3_stmt*,int,const void*,sqlite3_uint64,
+                     void(*)(void*));
+  int (*bind_text64)(sqlite3_stmt*,int,const char*,sqlite3_uint64,
+                      void(*)(void*),unsigned char);
+  int (*cancel_auto_extension)(void(*)(void));
+  int (*load_extension)(sqlite3*,const char*,const char*,char**);
+  void *(*malloc64)(sqlite3_uint64);
+  sqlite3_uint64 (*msize)(void*);
+  void *(*realloc64)(void*,sqlite3_uint64);
+  void (*reset_auto_extension)(void);
+  void (*result_blob64)(sqlite3_context*,const void*,sqlite3_uint64,
+                        void(*)(void*));
+  void (*result_text64)(sqlite3_context*,const char*,sqlite3_uint64,
+                         void(*)(void*), unsigned char);
+  int (*strglob)(const char*,const char*);
+};
+
+/*
+** The following macros redefine the API routines so that they are
+** redirected through the global sqlite3_api structure.
+**
+** This header file is also used by the loadext.c source file
+** (part of the main SQLite library - not an extension) so that
+** it can get access to the sqlite3_api_routines structure
+** definition.  But the main library does not want to redefine
+** the API.  So the redefinition macros are only valid if the
+** SQLITE_CORE macros is undefined.
+*/
+#ifndef SQLITE_CORE
+#define sqlite3_aggregate_context      sqlite3_api->aggregate_context
+#ifndef SQLITE_OMIT_DEPRECATED
+#define sqlite3_aggregate_count        sqlite3_api->aggregate_count
+#endif
+#define sqlite3_bind_blob              sqlite3_api->bind_blob
+#define sqlite3_bind_double            sqlite3_api->bind_double
+#define sqlite3_bind_int               sqlite3_api->bind_int
+#define sqlite3_bind_int64             sqlite3_api->bind_int64
+#define sqlite3_bind_null              sqlite3_api->bind_null
+#define sqlite3_bind_parameter_count   sqlite3_api->bind_parameter_count
+#define sqlite3_bind_parameter_index   sqlite3_api->bind_parameter_index
+#define sqlite3_bind_parameter_name    sqlite3_api->bind_parameter_name
+#define sqlite3_bind_text              sqlite3_api->bind_text
+#define sqlite3_bind_text16            sqlite3_api->bind_text16
+#define sqlite3_bind_value             sqlite3_api->bind_value
+#define sqlite3_busy_handler           sqlite3_api->busy_handler
+#define sqlite3_busy_timeout           sqlite3_api->busy_timeout
+#define sqlite3_changes                sqlite3_api->changes
+#define sqlite3_close                  sqlite3_api->close
+#define sqlite3_collation_needed       sqlite3_api->collation_needed
+#define sqlite3_collation_needed16     sqlite3_api->collation_needed16
+#define sqlite3_column_blob            sqlite3_api->column_blob
+#define sqlite3_column_bytes           sqlite3_api->column_bytes
+#define sqlite3_column_bytes16         sqlite3_api->column_bytes16
+#define sqlite3_column_count           sqlite3_api->column_count
+#define sqlite3_column_database_name   sqlite3_api->column_database_name
+#define sqlite3_column_database_name16 sqlite3_api->column_database_name16
+#define sqlite3_column_decltype        sqlite3_api->column_decltype
+#define sqlite3_column_decltype16      sqlite3_api->column_decltype16
+#define sqlite3_column_double          sqlite3_api->column_double
+#define sqlite3_column_int             sqlite3_api->column_int
+#define sqlite3_column_int64           sqlite3_api->column_int64
+#define sqlite3_column_name            sqlite3_api->column_name
+#define sqlite3_column_name16          sqlite3_api->column_name16
+#define sqlite3_column_origin_name     sqlite3_api->column_origin_name
+#define sqlite3_column_origin_name16   sqlite3_api->column_origin_name16
+#define sqlite3_column_table_name      sqlite3_api->column_table_name
+#define sqlite3_column_table_name16    sqlite3_api->column_table_name16
+#define sqlite3_column_text            sqlite3_api->column_text
+#define sqlite3_column_text16          sqlite3_api->column_text16
+#define sqlite3_column_type            sqlite3_api->column_type
+#define sqlite3_column_value           sqlite3_api->column_value
+#define sqlite3_commit_hook            sqlite3_api->commit_hook
+#define sqlite3_complete               sqlite3_api->complete
+#define sqlite3_complete16             sqlite3_api->complete16
+#define sqlite3_create_collation       sqlite3_api->create_collation
+#define sqlite3_create_collation16     sqlite3_api->create_collation16
+#define sqlite3_create_function        sqlite3_api->create_function
+#define sqlite3_create_function16      sqlite3_api->create_function16
+#define sqlite3_create_module          sqlite3_api->create_module
+#define sqlite3_create_module_v2       sqlite3_api->create_module_v2
+#define sqlite3_data_count             sqlite3_api->data_count
+#define sqlite3_db_handle              sqlite3_api->db_handle
+#define sqlite3_declare_vtab           sqlite3_api->declare_vtab
+#define sqlite3_enable_shared_cache    sqlite3_api->enable_shared_cache
+#define sqlite3_errcode                sqlite3_api->errcode
+#define sqlite3_errmsg                 sqlite3_api->errmsg
+#define sqlite3_errmsg16               sqlite3_api->errmsg16
+#define sqlite3_exec                   sqlite3_api->exec
+#ifndef SQLITE_OMIT_DEPRECATED
+#define sqlite3_expired                sqlite3_api->expired
+#endif
+#define sqlite3_finalize               sqlite3_api->finalize
+#define sqlite3_free                   sqlite3_api->free
+#define sqlite3_free_table             sqlite3_api->free_table
+#define sqlite3_get_autocommit         sqlite3_api->get_autocommit
+#define sqlite3_get_auxdata            sqlite3_api->get_auxdata
+#define sqlite3_get_table              sqlite3_api->get_table
+#ifndef SQLITE_OMIT_DEPRECATED
+#define sqlite3_global_recover         sqlite3_api->global_recover
+#endif
+#define sqlite3_interrupt              sqlite3_api->interruptx
+#define sqlite3_last_insert_rowid      sqlite3_api->last_insert_rowid
+#define sqlite3_libversion             sqlite3_api->libversion
+#define sqlite3_libversion_number      sqlite3_api->libversion_number
+#define sqlite3_malloc                 sqlite3_api->malloc
+#define sqlite3_mprintf                sqlite3_api->mprintf
+#define sqlite3_open                   sqlite3_api->open
+#define sqlite3_open16                 sqlite3_api->open16
+#define sqlite3_prepare                sqlite3_api->prepare
+#define sqlite3_prepare16              sqlite3_api->prepare16
+#define sqlite3_prepare_v2             sqlite3_api->prepare_v2
+#define sqlite3_prepare16_v2           sqlite3_api->prepare16_v2
+#define sqlite3_profile                sqlite3_api->profile
+#define sqlite3_progress_handler       sqlite3_api->progress_handler
+#define sqlite3_realloc                sqlite3_api->realloc
+#define sqlite3_reset                  sqlite3_api->reset
+#define sqlite3_result_blob            sqlite3_api->result_blob
+#define sqlite3_result_double          sqlite3_api->result_double
+#define sqlite3_result_error           sqlite3_api->result_error
+#define sqlite3_result_error16         sqlite3_api->result_error16
+#define sqlite3_result_int             sqlite3_api->result_int
+#define sqlite3_result_int64           sqlite3_api->result_int64
+#define sqlite3_result_null            sqlite3_api->result_null
+#define sqlite3_result_text            sqlite3_api->result_text
+#define sqlite3_result_text16          sqlite3_api->result_text16
+#define sqlite3_result_text16be        sqlite3_api->result_text16be
+#define sqlite3_result_text16le        sqlite3_api->result_text16le
+#define sqlite3_result_value           sqlite3_api->result_value
+#define sqlite3_rollback_hook          sqlite3_api->rollback_hook
+#define sqlite3_set_authorizer         sqlite3_api->set_authorizer
+#define sqlite3_set_auxdata            sqlite3_api->set_auxdata
+#define sqlite3_snprintf               sqlite3_api->snprintf
+#define sqlite3_step                   sqlite3_api->step
+#define sqlite3_table_column_metadata  sqlite3_api->table_column_metadata
+#define sqlite3_thread_cleanup         sqlite3_api->thread_cleanup
+#define sqlite3_total_changes          sqlite3_api->total_changes
+#define sqlite3_trace                  sqlite3_api->trace
+#ifndef SQLITE_OMIT_DEPRECATED
+#define sqlite3_transfer_bindings      sqlite3_api->transfer_bindings
+#endif
+#define sqlite3_update_hook            sqlite3_api->update_hook
+#define sqlite3_user_data              sqlite3_api->user_data
+#define sqlite3_value_blob             sqlite3_api->value_blob
+#define sqlite3_value_bytes            sqlite3_api->value_bytes
+#define sqlite3_value_bytes16          sqlite3_api->value_bytes16
+#define sqlite3_value_double           sqlite3_api->value_double
+#define sqlite3_value_int              sqlite3_api->value_int
+#define sqlite3_value_int64            sqlite3_api->value_int64
+#define sqlite3_value_numeric_type     sqlite3_api->value_numeric_type
+#define sqlite3_value_text             sqlite3_api->value_text
+#define sqlite3_value_text16           sqlite3_api->value_text16
+#define sqlite3_value_text16be         sqlite3_api->value_text16be
+#define sqlite3_value_text16le         sqlite3_api->value_text16le
+#define sqlite3_value_type             sqlite3_api->value_type
+#define sqlite3_vmprintf               sqlite3_api->vmprintf
+#define sqlite3_overload_function      sqlite3_api->overload_function
+#define sqlite3_prepare_v2             sqlite3_api->prepare_v2
+#define sqlite3_prepare16_v2           sqlite3_api->prepare16_v2
+#define sqlite3_clear_bindings         sqlite3_api->clear_bindings
+#define sqlite3_bind_zeroblob          sqlite3_api->bind_zeroblob
+#define sqlite3_blob_bytes             sqlite3_api->blob_bytes
+#define sqlite3_blob_close             sqlite3_api->blob_close
+#define sqlite3_blob_open              sqlite3_api->blob_open
+#define sqlite3_blob_read              sqlite3_api->blob_read
+#define sqlite3_blob_write             sqlite3_api->blob_write
+#define sqlite3_create_collation_v2    sqlite3_api->create_collation_v2
+#define sqlite3_file_control           sqlite3_api->file_control
+#define sqlite3_memory_highwater       sqlite3_api->memory_highwater
+#define sqlite3_memory_used            sqlite3_api->memory_used
+#define sqlite3_mutex_alloc            sqlite3_api->mutex_alloc
+#define sqlite3_mutex_enter            sqlite3_api->mutex_enter
+#define sqlite3_mutex_free             sqlite3_api->mutex_free
+#define sqlite3_mutex_leave            sqlite3_api->mutex_leave
+#define sqlite3_mutex_try              sqlite3_api->mutex_try
+#define sqlite3_open_v2                sqlite3_api->open_v2
+#define sqlite3_release_memory         sqlite3_api->release_memory
+#define sqlite3_result_error_nomem     sqlite3_api->result_error_nomem
+#define sqlite3_result_error_toobig    sqlite3_api->result_error_toobig
+#define sqlite3_sleep                  sqlite3_api->sleep
+#define sqlite3_soft_heap_limit        sqlite3_api->soft_heap_limit
+#define sqlite3_vfs_find               sqlite3_api->vfs_find
+#define sqlite3_vfs_register           sqlite3_api->vfs_register
+#define sqlite3_vfs_unregister         sqlite3_api->vfs_unregister
+#define sqlite3_threadsafe             sqlite3_api->xthreadsafe
+#define sqlite3_result_zeroblob        sqlite3_api->result_zeroblob
+#define sqlite3_result_error_code      sqlite3_api->result_error_code
+#define sqlite3_test_control           sqlite3_api->test_control
+#define sqlite3_randomness             sqlite3_api->randomness
+#define sqlite3_context_db_handle      sqlite3_api->context_db_handle
+#define sqlite3_extended_result_codes  sqlite3_api->extended_result_codes
+#define sqlite3_limit                  sqlite3_api->limit
+#define sqlite3_next_stmt              sqlite3_api->next_stmt
+#define sqlite3_sql                    sqlite3_api->sql
+#define sqlite3_status                 sqlite3_api->status
+#define sqlite3_backup_finish          sqlite3_api->backup_finish
+#define sqlite3_backup_init            sqlite3_api->backup_init
+#define sqlite3_backup_pagecount       sqlite3_api->backup_pagecount
+#define sqlite3_backup_remaining       sqlite3_api->backup_remaining
+#define sqlite3_backup_step            sqlite3_api->backup_step
+#define sqlite3_compileoption_get      sqlite3_api->compileoption_get
+#define sqlite3_compileoption_used     sqlite3_api->compileoption_used
+#define sqlite3_create_function_v2     sqlite3_api->create_function_v2
+#define sqlite3_db_config              sqlite3_api->db_config
+#define sqlite3_db_mutex               sqlite3_api->db_mutex
+#define sqlite3_db_status              sqlite3_api->db_status
+#define sqlite3_extended_errcode       sqlite3_api->extended_errcode
+#define sqlite3_log                    sqlite3_api->log
+#define sqlite3_soft_heap_limit64      sqlite3_api->soft_heap_limit64
+#define sqlite3_sourceid               sqlite3_api->sourceid
+#define sqlite3_stmt_status            sqlite3_api->stmt_status
+#define sqlite3_strnicmp               sqlite3_api->strnicmp
+#define sqlite3_unlock_notify          sqlite3_api->unlock_notify
+#define sqlite3_wal_autocheckpoint     sqlite3_api->wal_autocheckpoint
+#define sqlite3_wal_checkpoint         sqlite3_api->wal_checkpoint
+#define sqlite3_wal_hook               sqlite3_api->wal_hook
+#define sqlite3_blob_reopen            sqlite3_api->blob_reopen
+#define sqlite3_vtab_config            sqlite3_api->vtab_config
+#define sqlite3_vtab_on_conflict       sqlite3_api->vtab_on_conflict
+/* Version 3.7.16 and later */
+#define sqlite3_close_v2               sqlite3_api->close_v2
+#define sqlite3_db_filename            sqlite3_api->db_filename
+#define sqlite3_db_readonly            sqlite3_api->db_readonly
+#define sqlite3_db_release_memory      sqlite3_api->db_release_memory
+#define sqlite3_errstr                 sqlite3_api->errstr
+#define sqlite3_stmt_busy              sqlite3_api->stmt_busy
+#define sqlite3_stmt_readonly          sqlite3_api->stmt_readonly
+#define sqlite3_stricmp                sqlite3_api->stricmp
+#define sqlite3_uri_boolean            sqlite3_api->uri_boolean
+#define sqlite3_uri_int64              sqlite3_api->uri_int64
+#define sqlite3_uri_parameter          sqlite3_api->uri_parameter
+#define sqlite3_uri_vsnprintf          sqlite3_api->vsnprintf
+#define sqlite3_wal_checkpoint_v2      sqlite3_api->wal_checkpoint_v2
+/* Version 3.8.7 and later */
+#define sqlite3_auto_extension         sqlite3_api->auto_extension
+#define sqlite3_bind_blob64            sqlite3_api->bind_blob64
+#define sqlite3_bind_text64            sqlite3_api->bind_text64
+#define sqlite3_cancel_auto_extension  sqlite3_api->cancel_auto_extension
+#define sqlite3_load_extension         sqlite3_api->load_extension
+#define sqlite3_malloc64               sqlite3_api->malloc64
+#define sqlite3_msize                  sqlite3_api->msize
+#define sqlite3_realloc64              sqlite3_api->realloc64
+#define sqlite3_reset_auto_extension   sqlite3_api->reset_auto_extension
+#define sqlite3_result_blob64          sqlite3_api->result_blob64
+#define sqlite3_result_text64          sqlite3_api->result_text64
+#define sqlite3_strglob                sqlite3_api->strglob
+#endif /* SQLITE_CORE */
+
+#ifndef SQLITE_CORE
+  /* This case when the file really is being compiled as a loadable 
+  ** extension */
+# define SQLITE_EXTENSION_INIT1     const sqlite3_api_routines *sqlite3_api=0;
+# define SQLITE_EXTENSION_INIT2(v)  sqlite3_api=v;
+# define SQLITE_EXTENSION_INIT3     \
+    extern const sqlite3_api_routines *sqlite3_api;
+#else
+  /* This case when the file is being statically linked into the 
+  ** application */
+# define SQLITE_EXTENSION_INIT1     /*no-op*/
+# define SQLITE_EXTENSION_INIT2(v)  (void)v; /* unused parameter */
+# define SQLITE_EXTENSION_INIT3     /*no-op*/
+#endif
+
+#endif /* _SQLITE3EXT_H_ */
+
+/************** End of sqlite3ext.h ******************************************/
+/************** Continuing where we left off in loadext.c ********************/
+/* #include <string.h> */
+
+#ifndef SQLITE_OMIT_LOAD_EXTENSION
+
+/*
+** Some API routines are omitted when various features are
+** excluded from a build of SQLite.  Substitute a NULL pointer
+** for any missing APIs.
+*/
+#ifndef SQLITE_ENABLE_COLUMN_METADATA
+# define sqlite3_column_database_name   0
+# define sqlite3_column_database_name16 0
+# define sqlite3_column_table_name      0
+# define sqlite3_column_table_name16    0
+# define sqlite3_column_origin_name     0
+# define sqlite3_column_origin_name16   0
+#endif
+
+#ifdef SQLITE_OMIT_AUTHORIZATION
+# define sqlite3_set_authorizer         0
+#endif
+
+#ifdef SQLITE_OMIT_UTF16
+# define sqlite3_bind_text16            0
+# define sqlite3_collation_needed16     0
+# define sqlite3_column_decltype16      0
+# define sqlite3_column_name16          0
+# define sqlite3_column_text16          0
+# define sqlite3_complete16             0
+# define sqlite3_create_collation16     0
+# define sqlite3_create_function16      0
+# define sqlite3_errmsg16               0
+# define sqlite3_open16                 0
+# define sqlite3_prepare16              0
+# define sqlite3_prepare16_v2           0
+# define sqlite3_result_error16         0
+# define sqlite3_result_text16          0
+# define sqlite3_result_text16be        0
+# define sqlite3_result_text16le        0
+# define sqlite3_value_text16           0
+# define sqlite3_value_text16be         0
+# define sqlite3_value_text16le         0
+# define sqlite3_column_database_name16 0
+# define sqlite3_column_table_name16    0
+# define sqlite3_column_origin_name16   0
+#endif
+
+#ifdef SQLITE_OMIT_COMPLETE
+# define sqlite3_complete 0
+# define sqlite3_complete16 0
+#endif
+
+#ifdef SQLITE_OMIT_DECLTYPE
+# define sqlite3_column_decltype16      0
+# define sqlite3_column_decltype        0
+#endif
+
+#ifdef SQLITE_OMIT_PROGRESS_CALLBACK
+# define sqlite3_progress_handler 0
+#endif
+
+#ifdef SQLITE_OMIT_VIRTUALTABLE
+# define sqlite3_create_module 0
+# define sqlite3_create_module_v2 0
+# define sqlite3_declare_vtab 0
+# define sqlite3_vtab_config 0
+# define sqlite3_vtab_on_conflict 0
+#endif
+
+#ifdef SQLITE_OMIT_SHARED_CACHE
+# define sqlite3_enable_shared_cache 0
+#endif
+
+#ifdef SQLITE_OMIT_TRACE
+# define sqlite3_profile       0
+# define sqlite3_trace         0
+#endif
+
+#ifdef SQLITE_OMIT_GET_TABLE
+# define sqlite3_free_table    0
+# define sqlite3_get_table     0
+#endif
+
+#ifdef SQLITE_OMIT_INCRBLOB
+#define sqlite3_bind_zeroblob  0
+#define sqlite3_blob_bytes     0
+#define sqlite3_blob_close     0
+#define sqlite3_blob_open      0
+#define sqlite3_blob_read      0
+#define sqlite3_blob_write     0
+#define sqlite3_blob_reopen    0
+#endif
+
+/*
+** The following structure contains pointers to all SQLite API routines.
+** A pointer to this structure is passed into extensions when they are
+** loaded so that the extension can make calls back into the SQLite
+** library.
+**
+** When adding new APIs, add them to the bottom of this structure
+** in order to preserve backwards compatibility.
+**
+** Extensions that use newer APIs should first call the
+** sqlite3_libversion_number() to make sure that the API they
+** intend to use is supported by the library.  Extensions should
+** also check to make sure that the pointer to the function is
+** not NULL before calling it.
+*/
+static const sqlite3_api_routines sqlite3Apis = {
+  sqlite3_aggregate_context,
+#ifndef SQLITE_OMIT_DEPRECATED
+  sqlite3_aggregate_count,
+#else
+  0,
+#endif
+  sqlite3_bind_blob,
+  sqlite3_bind_double,
+  sqlite3_bind_int,
+  sqlite3_bind_int64,
+  sqlite3_bind_null,
+  sqlite3_bind_parameter_count,
+  sqlite3_bind_parameter_index,
+  sqlite3_bind_parameter_name,
+  sqlite3_bind_text,
+  sqlite3_bind_text16,
+  sqlite3_bind_value,
+  sqlite3_busy_handler,
+  sqlite3_busy_timeout,
+  sqlite3_changes,
+  sqlite3_close,
+  sqlite3_collation_needed,
+  sqlite3_collation_needed16,
+  sqlite3_column_blob,
+  sqlite3_column_bytes,
+  sqlite3_column_bytes16,
+  sqlite3_column_count,
+  sqlite3_column_database_name,
+  sqlite3_column_database_name16,
+  sqlite3_column_decltype,
+  sqlite3_column_decltype16,
+  sqlite3_column_double,
+  sqlite3_column_int,
+  sqlite3_column_int64,
+  sqlite3_column_name,
+  sqlite3_column_name16,
+  sqlite3_column_origin_name,
+  sqlite3_column_origin_name16,
+  sqlite3_column_table_name,
+  sqlite3_column_table_name16,
+  sqlite3_column_text,
+  sqlite3_column_text16,
+  sqlite3_column_type,
+  sqlite3_column_value,
+  sqlite3_commit_hook,
+  sqlite3_complete,
+  sqlite3_complete16,
+  sqlite3_create_collation,
+  sqlite3_create_collation16,
+  sqlite3_create_function,
+  sqlite3_create_function16,
+  sqlite3_create_module,
+  sqlite3_data_count,
+  sqlite3_db_handle,
+  sqlite3_declare_vtab,
+  sqlite3_enable_shared_cache,
+  sqlite3_errcode,
+  sqlite3_errmsg,
+  sqlite3_errmsg16,
+  sqlite3_exec,
+#ifndef SQLITE_OMIT_DEPRECATED
+  sqlite3_expired,
+#else
+  0,
+#endif
+  sqlite3_finalize,
+  sqlite3_free,
+  sqlite3_free_table,
+  sqlite3_get_autocommit,
+  sqlite3_get_auxdata,
+  sqlite3_get_table,
+  0,     /* Was sqlite3_global_recover(), but that function is deprecated */
+  sqlite3_interrupt,
+  sqlite3_last_insert_rowid,
+  sqlite3_libversion,
+  sqlite3_libversion_number,
+  sqlite3_malloc,
+  sqlite3_mprintf,
+  sqlite3_open,
+  sqlite3_open16,
+  sqlite3_prepare,
+  sqlite3_prepare16,
+  sqlite3_profile,
+  sqlite3_progress_handler,
+  sqlite3_realloc,
+  sqlite3_reset,
+  sqlite3_result_blob,
+  sqlite3_result_double,
+  sqlite3_result_error,
+  sqlite3_result_error16,
+  sqlite3_result_int,
+  sqlite3_result_int64,
+  sqlite3_result_null,
+  sqlite3_result_text,
+  sqlite3_result_text16,
+  sqlite3_result_text16be,
+  sqlite3_result_text16le,
+  sqlite3_result_value,
+  sqlite3_rollback_hook,
+  sqlite3_set_authorizer,
+  sqlite3_set_auxdata,
+  sqlite3_snprintf,
+  sqlite3_step,
+  sqlite3_table_column_metadata,
+#ifndef SQLITE_OMIT_DEPRECATED
+  sqlite3_thread_cleanup,
+#else
+  0,
+#endif
+  sqlite3_total_changes,
+  sqlite3_trace,
+#ifndef SQLITE_OMIT_DEPRECATED
+  sqlite3_transfer_bindings,
+#else
+  0,
+#endif
+  sqlite3_update_hook,
+  sqlite3_user_data,
+  sqlite3_value_blob,
+  sqlite3_value_bytes,
+  sqlite3_value_bytes16,
+  sqlite3_value_double,
+  sqlite3_value_int,
+  sqlite3_value_int64,
+  sqlite3_value_numeric_type,
+  sqlite3_value_text,
+  sqlite3_value_text16,
+  sqlite3_value_text16be,
+  sqlite3_value_text16le,
+  sqlite3_value_type,
+  sqlite3_vmprintf,
+  /*
+  ** The original API set ends here.  All extensions can call any
+  ** of the APIs above provided that the pointer is not NULL.  But
+  ** before calling APIs that follow, extension should check the
+  ** sqlite3_libversion_number() to make sure they are dealing with
+  ** a library that is new enough to support that API.
+  *************************************************************************
+  */
+  sqlite3_overload_function,
+
+  /*
+  ** Added after 3.3.13
+  */
+  sqlite3_prepare_v2,
+  sqlite3_prepare16_v2,
+  sqlite3_clear_bindings,
+
+  /*
+  ** Added for 3.4.1
+  */
+  sqlite3_create_module_v2,
+
+  /*
+  ** Added for 3.5.0
+  */
+  sqlite3_bind_zeroblob,
+  sqlite3_blob_bytes,
+  sqlite3_blob_close,
+  sqlite3_blob_open,
+  sqlite3_blob_read,
+  sqlite3_blob_write,
+  sqlite3_create_collation_v2,
+  sqlite3_file_control,
+  sqlite3_memory_highwater,
+  sqlite3_memory_used,
+#ifdef SQLITE_MUTEX_OMIT
+  0, 
+  0, 
+  0,
+  0,
+  0,
+#else
+  sqlite3_mutex_alloc,
+  sqlite3_mutex_enter,
+  sqlite3_mutex_free,
+  sqlite3_mutex_leave,
+  sqlite3_mutex_try,
+#endif
+  sqlite3_open_v2,
+  sqlite3_release_memory,
+  sqlite3_result_error_nomem,
+  sqlite3_result_error_toobig,
+  sqlite3_sleep,
+  sqlite3_soft_heap_limit,
+  sqlite3_vfs_find,
+  sqlite3_vfs_register,
+  sqlite3_vfs_unregister,
+
+  /*
+  ** Added for 3.5.8
+  */
+  sqlite3_threadsafe,
+  sqlite3_result_zeroblob,
+  sqlite3_result_error_code,
+  sqlite3_test_control,
+  sqlite3_randomness,
+  sqlite3_context_db_handle,
+
+  /*
+  ** Added for 3.6.0
+  */
+  sqlite3_extended_result_codes,
+  sqlite3_limit,
+  sqlite3_next_stmt,
+  sqlite3_sql,
+  sqlite3_status,
+
+  /*
+  ** Added for 3.7.4
+  */
+  sqlite3_backup_finish,
+  sqlite3_backup_init,
+  sqlite3_backup_pagecount,
+  sqlite3_backup_remaining,
+  sqlite3_backup_step,
+#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
+  sqlite3_compileoption_get,
+  sqlite3_compileoption_used,
+#else
+  0,
+  0,
+#endif
+  sqlite3_create_function_v2,
+  sqlite3_db_config,
+  sqlite3_db_mutex,
+  sqlite3_db_status,
+  sqlite3_extended_errcode,
+  sqlite3_log,
+  sqlite3_soft_heap_limit64,
+  sqlite3_sourceid,
+  sqlite3_stmt_status,
+  sqlite3_strnicmp,
+#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY
+  sqlite3_unlock_notify,
+#else
+  0,
+#endif
+#ifndef SQLITE_OMIT_WAL
+  sqlite3_wal_autocheckpoint,
+  sqlite3_wal_checkpoint,
+  sqlite3_wal_hook,
+#else
+  0,
+  0,
+  0,
+#endif
+  sqlite3_blob_reopen,
+  sqlite3_vtab_config,
+  sqlite3_vtab_on_conflict,
+  sqlite3_close_v2,
+  sqlite3_db_filename,
+  sqlite3_db_readonly,
+  sqlite3_db_release_memory,
+  sqlite3_errstr,
+  sqlite3_stmt_busy,
+  sqlite3_stmt_readonly,
+  sqlite3_stricmp,
+  sqlite3_uri_boolean,
+  sqlite3_uri_int64,
+  sqlite3_uri_parameter,
+  sqlite3_vsnprintf,
+  sqlite3_wal_checkpoint_v2,
+  /* Version 3.8.7 and later */
+  sqlite3_auto_extension,
+  sqlite3_bind_blob64,
+  sqlite3_bind_text64,
+  sqlite3_cancel_auto_extension,
+  sqlite3_load_extension,
+  sqlite3_malloc64,
+  sqlite3_msize,
+  sqlite3_realloc64,
+  sqlite3_reset_auto_extension,
+  sqlite3_result_blob64,
+  sqlite3_result_text64,
+  sqlite3_strglob
+};
+
+/*
+** Attempt to load an SQLite extension library contained in the file
+** zFile.  The entry point is zProc.  zProc may be 0 in which case a
+** default entry point name (sqlite3_extension_init) is used.  Use
+** of the default name is recommended.
+**
+** Return SQLITE_OK on success and SQLITE_ERROR if something goes wrong.
+**
+** If an error occurs and pzErrMsg is not 0, then fill *pzErrMsg with 
+** error message text.  The calling function should free this memory
+** by calling sqlite3DbFree(db, ).
+*/
+static int sqlite3LoadExtension(
+  sqlite3 *db,          /* Load the extension into this database connection */
+  const char *zFile,    /* Name of the shared library containing extension */
+  const char *zProc,    /* Entry point.  Use "sqlite3_extension_init" if 0 */
+  char **pzErrMsg       /* Put error message here if not 0 */
+){
+  sqlite3_vfs *pVfs = db->pVfs;
+  void *handle;
+  int (*xInit)(sqlite3*,char**,const sqlite3_api_routines*);
+  char *zErrmsg = 0;
+  const char *zEntry;
+  char *zAltEntry = 0;
+  void **aHandle;
+  int nMsg = 300 + sqlite3Strlen30(zFile);
+  int ii;
+
+  /* Shared library endings to try if zFile cannot be loaded as written */
+  static const char *azEndings[] = {
+#if SQLITE_OS_WIN
+     "dll"   
+#elif defined(__APPLE__)
+     "dylib"
+#else
+     "so"
+#endif
+  };
+
+
+  if( pzErrMsg ) *pzErrMsg = 0;
+
+  /* Ticket #1863.  To avoid a creating security problems for older
+  ** applications that relink against newer versions of SQLite, the
+  ** ability to run load_extension is turned off by default.  One
+  ** must call sqlite3_enable_load_extension() to turn on extension
+  ** loading.  Otherwise you get the following error.
+  */
+  if( (db->flags & SQLITE_LoadExtension)==0 ){
+    if( pzErrMsg ){
+      *pzErrMsg = sqlite3_mprintf("not authorized");
+    }
+    return SQLITE_ERROR;
+  }
+
+  zEntry = zProc ? zProc : "sqlite3_extension_init";
+
+  handle = sqlite3OsDlOpen(pVfs, zFile);
+#if SQLITE_OS_UNIX || SQLITE_OS_WIN
+  for(ii=0; ii<ArraySize(azEndings) && handle==0; ii++){
+    char *zAltFile = sqlite3_mprintf("%s.%s", zFile, azEndings[ii]);
+    if( zAltFile==0 ) return SQLITE_NOMEM;
+    handle = sqlite3OsDlOpen(pVfs, zAltFile);
+    sqlite3_free(zAltFile);
+  }
+#endif
+  if( handle==0 ){
+    if( pzErrMsg ){
+      *pzErrMsg = zErrmsg = sqlite3_malloc(nMsg);
+      if( zErrmsg ){
+        sqlite3_snprintf(nMsg, zErrmsg, 
+            "unable to open shared library [%s]", zFile);
+        sqlite3OsDlError(pVfs, nMsg-1, zErrmsg);
+      }
+    }
+    return SQLITE_ERROR;
+  }
+  xInit = (int(*)(sqlite3*,char**,const sqlite3_api_routines*))
+                   sqlite3OsDlSym(pVfs, handle, zEntry);
+
+  /* If no entry point was specified and the default legacy
+  ** entry point name "sqlite3_extension_init" was not found, then
+  ** construct an entry point name "sqlite3_X_init" where the X is
+  ** replaced by the lowercase value of every ASCII alphabetic 
+  ** character in the filename after the last "/" upto the first ".",
+  ** and eliding the first three characters if they are "lib".  
+  ** Examples:
+  **
+  **    /usr/local/lib/libExample5.4.3.so ==>  sqlite3_example_init
+  **    C:/lib/mathfuncs.dll              ==>  sqlite3_mathfuncs_init
+  */
+  if( xInit==0 && zProc==0 ){
+    int iFile, iEntry, c;
+    int ncFile = sqlite3Strlen30(zFile);
+    zAltEntry = sqlite3_malloc(ncFile+30);
+    if( zAltEntry==0 ){
+      sqlite3OsDlClose(pVfs, handle);
+      return SQLITE_NOMEM;
+    }
+    memcpy(zAltEntry, "sqlite3_", 8);
+    for(iFile=ncFile-1; iFile>=0 && zFile[iFile]!='/'; iFile--){}
+    iFile++;
+    if( sqlite3_strnicmp(zFile+iFile, "lib", 3)==0 ) iFile += 3;
+    for(iEntry=8; (c = zFile[iFile])!=0 && c!='.'; iFile++){
+      if( sqlite3Isalpha(c) ){
+        zAltEntry[iEntry++] = (char)sqlite3UpperToLower[(unsigned)c];
+      }
+    }
+    memcpy(zAltEntry+iEntry, "_init", 6);
+    zEntry = zAltEntry;
+    xInit = (int(*)(sqlite3*,char**,const sqlite3_api_routines*))
+                     sqlite3OsDlSym(pVfs, handle, zEntry);
+  }
+  if( xInit==0 ){
+    if( pzErrMsg ){
+      nMsg += sqlite3Strlen30(zEntry);
+      *pzErrMsg = zErrmsg = sqlite3_malloc(nMsg);
+      if( zErrmsg ){
+        sqlite3_snprintf(nMsg, zErrmsg,
+            "no entry point [%s] in shared library [%s]", zEntry, zFile);
+        sqlite3OsDlError(pVfs, nMsg-1, zErrmsg);
+      }
+    }
+    sqlite3OsDlClose(pVfs, handle);
+    sqlite3_free(zAltEntry);
+    return SQLITE_ERROR;
+  }
+  sqlite3_free(zAltEntry);
+  if( xInit(db, &zErrmsg, &sqlite3Apis) ){
+    if( pzErrMsg ){
+      *pzErrMsg = sqlite3_mprintf("error during initialization: %s", zErrmsg);
+    }
+    sqlite3_free(zErrmsg);
+    sqlite3OsDlClose(pVfs, handle);
+    return SQLITE_ERROR;
+  }
+
+  /* Append the new shared library handle to the db->aExtension array. */
+  aHandle = sqlite3DbMallocZero(db, sizeof(handle)*(db->nExtension+1));
+  if( aHandle==0 ){
+    return SQLITE_NOMEM;
+  }
+  if( db->nExtension>0 ){
+    memcpy(aHandle, db->aExtension, sizeof(handle)*db->nExtension);
+  }
+  sqlite3DbFree(db, db->aExtension);
+  db->aExtension = aHandle;
+
+  db->aExtension[db->nExtension++] = handle;
+  return SQLITE_OK;
+}
+SQLITE_API int sqlite3_load_extension(
+  sqlite3 *db,          /* Load the extension into this database connection */
+  const char *zFile,    /* Name of the shared library containing extension */
+  const char *zProc,    /* Entry point.  Use "sqlite3_extension_init" if 0 */
+  char **pzErrMsg       /* Put error message here if not 0 */
+){
+  int rc;
+  sqlite3_mutex_enter(db->mutex);
+  rc = sqlite3LoadExtension(db, zFile, zProc, pzErrMsg);
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/*
+** Call this routine when the database connection is closing in order
+** to clean up loaded extensions
+*/
+SQLITE_PRIVATE void sqlite3CloseExtensions(sqlite3 *db){
+  int i;
+  assert( sqlite3_mutex_held(db->mutex) );
+  for(i=0; i<db->nExtension; i++){
+    sqlite3OsDlClose(db->pVfs, db->aExtension[i]);
+  }
+  sqlite3DbFree(db, db->aExtension);
+}
+
+/*
+** Enable or disable extension loading.  Extension loading is disabled by
+** default so as not to open security holes in older applications.
+*/
+SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff){
+  sqlite3_mutex_enter(db->mutex);
+  if( onoff ){
+    db->flags |= SQLITE_LoadExtension;
+  }else{
+    db->flags &= ~SQLITE_LoadExtension;
+  }
+  sqlite3_mutex_leave(db->mutex);
+  return SQLITE_OK;
+}
+
+#endif /* SQLITE_OMIT_LOAD_EXTENSION */
+
+/*
+** The auto-extension code added regardless of whether or not extension
+** loading is supported.  We need a dummy sqlite3Apis pointer for that
+** code if regular extension loading is not available.  This is that
+** dummy pointer.
+*/
+#ifdef SQLITE_OMIT_LOAD_EXTENSION
+static const sqlite3_api_routines sqlite3Apis = { 0 };
+#endif
+
+
+/*
+** The following object holds the list of automatically loaded
+** extensions.
+**
+** This list is shared across threads.  The SQLITE_MUTEX_STATIC_MASTER
+** mutex must be held while accessing this list.
+*/
+typedef struct sqlite3AutoExtList sqlite3AutoExtList;
+static SQLITE_WSD struct sqlite3AutoExtList {
+  int nExt;              /* Number of entries in aExt[] */          
+  void (**aExt)(void);   /* Pointers to the extension init functions */
+} sqlite3Autoext = { 0, 0 };
+
+/* The "wsdAutoext" macro will resolve to the autoextension
+** state vector.  If writable static data is unsupported on the target,
+** we have to locate the state vector at run-time.  In the more common
+** case where writable static data is supported, wsdStat can refer directly
+** to the "sqlite3Autoext" state vector declared above.
+*/
+#ifdef SQLITE_OMIT_WSD
+# define wsdAutoextInit \
+  sqlite3AutoExtList *x = &GLOBAL(sqlite3AutoExtList,sqlite3Autoext)
+# define wsdAutoext x[0]
+#else
+# define wsdAutoextInit
+# define wsdAutoext sqlite3Autoext
+#endif
+
+
+/*
+** Register a statically linked extension that is automatically
+** loaded by every new database connection.
+*/
+SQLITE_API int sqlite3_auto_extension(void (*xInit)(void)){
+  int rc = SQLITE_OK;
+#ifndef SQLITE_OMIT_AUTOINIT
+  rc = sqlite3_initialize();
+  if( rc ){
+    return rc;
+  }else
+#endif
+  {
+    int i;
+#if SQLITE_THREADSAFE
+    sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+#endif
+    wsdAutoextInit;
+    sqlite3_mutex_enter(mutex);
+    for(i=0; i<wsdAutoext.nExt; i++){
+      if( wsdAutoext.aExt[i]==xInit ) break;
+    }
+    if( i==wsdAutoext.nExt ){
+      int nByte = (wsdAutoext.nExt+1)*sizeof(wsdAutoext.aExt[0]);
+      void (**aNew)(void);
+      aNew = sqlite3_realloc(wsdAutoext.aExt, nByte);
+      if( aNew==0 ){
+        rc = SQLITE_NOMEM;
+      }else{
+        wsdAutoext.aExt = aNew;
+        wsdAutoext.aExt[wsdAutoext.nExt] = xInit;
+        wsdAutoext.nExt++;
+      }
+    }
+    sqlite3_mutex_leave(mutex);
+    assert( (rc&0xff)==rc );
+    return rc;
+  }
+}
+
+/*
+** Cancel a prior call to sqlite3_auto_extension.  Remove xInit from the
+** set of routines that is invoked for each new database connection, if it
+** is currently on the list.  If xInit is not on the list, then this
+** routine is a no-op.
+**
+** Return 1 if xInit was found on the list and removed.  Return 0 if xInit
+** was not on the list.
+*/
+SQLITE_API int sqlite3_cancel_auto_extension(void (*xInit)(void)){
+#if SQLITE_THREADSAFE
+  sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+#endif
+  int i;
+  int n = 0;
+  wsdAutoextInit;
+  sqlite3_mutex_enter(mutex);
+  for(i=wsdAutoext.nExt-1; i>=0; i--){
+    if( wsdAutoext.aExt[i]==xInit ){
+      wsdAutoext.nExt--;
+      wsdAutoext.aExt[i] = wsdAutoext.aExt[wsdAutoext.nExt];
+      n++;
+      break;
+    }
+  }
+  sqlite3_mutex_leave(mutex);
+  return n;
+}
+
+/*
+** Reset the automatic extension loading mechanism.
+*/
+SQLITE_API void sqlite3_reset_auto_extension(void){
+#ifndef SQLITE_OMIT_AUTOINIT
+  if( sqlite3_initialize()==SQLITE_OK )
+#endif
+  {
+#if SQLITE_THREADSAFE
+    sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+#endif
+    wsdAutoextInit;
+    sqlite3_mutex_enter(mutex);
+    sqlite3_free(wsdAutoext.aExt);
+    wsdAutoext.aExt = 0;
+    wsdAutoext.nExt = 0;
+    sqlite3_mutex_leave(mutex);
+  }
+}
+
+/*
+** Load all automatic extensions.
+**
+** If anything goes wrong, set an error in the database connection.
+*/
+SQLITE_PRIVATE void sqlite3AutoLoadExtensions(sqlite3 *db){
+  int i;
+  int go = 1;
+  int rc;
+  int (*xInit)(sqlite3*,char**,const sqlite3_api_routines*);
+
+  wsdAutoextInit;
+  if( wsdAutoext.nExt==0 ){
+    /* Common case: early out without every having to acquire a mutex */
+    return;
+  }
+  for(i=0; go; i++){
+    char *zErrmsg;
+#if SQLITE_THREADSAFE
+    sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+#endif
+    sqlite3_mutex_enter(mutex);
+    if( i>=wsdAutoext.nExt ){
+      xInit = 0;
+      go = 0;
+    }else{
+      xInit = (int(*)(sqlite3*,char**,const sqlite3_api_routines*))
+              wsdAutoext.aExt[i];
+    }
+    sqlite3_mutex_leave(mutex);
+    zErrmsg = 0;
+    if( xInit && (rc = xInit(db, &zErrmsg, &sqlite3Apis))!=0 ){
+      sqlite3ErrorWithMsg(db, rc,
+            "automatic extension loading failed: %s", zErrmsg);
+      go = 0;
+    }
+    sqlite3_free(zErrmsg);
+  }
+}
+
+/************** End of loadext.c *********************************************/
+/************** Begin file pragma.c ******************************************/
+/*
+** 2003 April 6
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code used to implement the PRAGMA command.
+*/
+
+#if !defined(SQLITE_ENABLE_LOCKING_STYLE)
+#  if defined(__APPLE__)
+#    define SQLITE_ENABLE_LOCKING_STYLE 1
+#  else
+#    define SQLITE_ENABLE_LOCKING_STYLE 0
+#  endif
+#endif
+
+/***************************************************************************
+** The next block of code, including the PragTyp_XXXX macro definitions and
+** the aPragmaName[] object is composed of generated code. DO NOT EDIT.
+**
+** To add new pragmas, edit the code in ../tool/mkpragmatab.tcl and rerun
+** that script.  Then copy/paste the output in place of the following:
+*/
+#define PragTyp_HEADER_VALUE                   0
+#define PragTyp_AUTO_VACUUM                    1
+#define PragTyp_FLAG                           2
+#define PragTyp_BUSY_TIMEOUT                   3
+#define PragTyp_CACHE_SIZE                     4
+#define PragTyp_CASE_SENSITIVE_LIKE            5
+#define PragTyp_COLLATION_LIST                 6
+#define PragTyp_COMPILE_OPTIONS                7
+#define PragTyp_DATA_STORE_DIRECTORY           8
+#define PragTyp_DATABASE_LIST                  9
+#define PragTyp_DEFAULT_CACHE_SIZE            10
+#define PragTyp_ENCODING                      11
+#define PragTyp_FOREIGN_KEY_CHECK             12
+#define PragTyp_FOREIGN_KEY_LIST              13
+#define PragTyp_INCREMENTAL_VACUUM            14
+#define PragTyp_INDEX_INFO                    15
+#define PragTyp_INDEX_LIST                    16
+#define PragTyp_INTEGRITY_CHECK               17
+#define PragTyp_JOURNAL_MODE                  18
+#define PragTyp_JOURNAL_SIZE_LIMIT            19
+#define PragTyp_LOCK_PROXY_FILE               20
+#define PragTyp_LOCKING_MODE                  21
+#define PragTyp_PAGE_COUNT                    22
+#define PragTyp_MMAP_SIZE                     23
+#define PragTyp_PAGE_SIZE                     24
+#define PragTyp_SECURE_DELETE                 25
+#define PragTyp_SHRINK_MEMORY                 26
+#define PragTyp_SOFT_HEAP_LIMIT               27
+#define PragTyp_STATS                         28
+#define PragTyp_SYNCHRONOUS                   29
+#define PragTyp_TABLE_INFO                    30
+#define PragTyp_TEMP_STORE                    31
+#define PragTyp_TEMP_STORE_DIRECTORY          32
+#define PragTyp_THREADS                       33
+#define PragTyp_WAL_AUTOCHECKPOINT            34
+#define PragTyp_WAL_CHECKPOINT                35
+#define PragTyp_ACTIVATE_EXTENSIONS           36
+#define PragTyp_HEXKEY                        37
+#define PragTyp_KEY                           38
+#define PragTyp_REKEY                         39
+#define PragTyp_LOCK_STATUS                   40
+#define PragTyp_PARSER_TRACE                  41
+#define PragFlag_NeedSchema           0x01
+#define PragFlag_ReadOnly             0x02
+static const struct sPragmaNames {
+  const char *const zName;  /* Name of pragma */
+  u8 ePragTyp;              /* PragTyp_XXX value */
+  u8 mPragFlag;             /* Zero or more PragFlag_XXX values */
+  u32 iArg;                 /* Extra argument */
+} aPragmaNames[] = {
+#if defined(SQLITE_HAS_CODEC) || defined(SQLITE_ENABLE_CEROD)
+  { /* zName:     */ "activate_extensions",
+    /* ePragTyp:  */ PragTyp_ACTIVATE_EXTENSIONS,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS)
+  { /* zName:     */ "application_id",
+    /* ePragTyp:  */ PragTyp_HEADER_VALUE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ BTREE_APPLICATION_ID },
+#endif
+#if !defined(SQLITE_OMIT_AUTOVACUUM)
+  { /* zName:     */ "auto_vacuum",
+    /* ePragTyp:  */ PragTyp_AUTO_VACUUM,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+#if !defined(SQLITE_OMIT_AUTOMATIC_INDEX)
+  { /* zName:     */ "automatic_index",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_AutoIndex },
+#endif
+#endif
+  { /* zName:     */ "busy_timeout",
+    /* ePragTyp:  */ PragTyp_BUSY_TIMEOUT,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS)
+  { /* zName:     */ "cache_size",
+    /* ePragTyp:  */ PragTyp_CACHE_SIZE,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "cache_spill",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_CacheSpill },
+#endif
+  { /* zName:     */ "case_sensitive_like",
+    /* ePragTyp:  */ PragTyp_CASE_SENSITIVE_LIKE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "checkpoint_fullfsync",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_CkptFullFSync },
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS)
+  { /* zName:     */ "collation_list",
+    /* ePragTyp:  */ PragTyp_COLLATION_LIST,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_COMPILEOPTION_DIAGS)
+  { /* zName:     */ "compile_options",
+    /* ePragTyp:  */ PragTyp_COMPILE_OPTIONS,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "count_changes",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_CountRows },
+#endif
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && SQLITE_OS_WIN
+  { /* zName:     */ "data_store_directory",
+    /* ePragTyp:  */ PragTyp_DATA_STORE_DIRECTORY,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS)
+  { /* zName:     */ "data_version",
+    /* ePragTyp:  */ PragTyp_HEADER_VALUE,
+    /* ePragFlag: */ PragFlag_ReadOnly,
+    /* iArg:      */ BTREE_DATA_VERSION },
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS)
+  { /* zName:     */ "database_list",
+    /* ePragTyp:  */ PragTyp_DATABASE_LIST,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && !defined(SQLITE_OMIT_DEPRECATED)
+  { /* zName:     */ "default_cache_size",
+    /* ePragTyp:  */ PragTyp_DEFAULT_CACHE_SIZE,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER)
+  { /* zName:     */ "defer_foreign_keys",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_DeferFKs },
+#endif
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "empty_result_callbacks",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_NullCallback },
+#endif
+#if !defined(SQLITE_OMIT_UTF16)
+  { /* zName:     */ "encoding",
+    /* ePragTyp:  */ PragTyp_ENCODING,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER)
+  { /* zName:     */ "foreign_key_check",
+    /* ePragTyp:  */ PragTyp_FOREIGN_KEY_CHECK,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FOREIGN_KEY)
+  { /* zName:     */ "foreign_key_list",
+    /* ePragTyp:  */ PragTyp_FOREIGN_KEY_LIST,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+#if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER)
+  { /* zName:     */ "foreign_keys",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_ForeignKeys },
+#endif
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS)
+  { /* zName:     */ "freelist_count",
+    /* ePragTyp:  */ PragTyp_HEADER_VALUE,
+    /* ePragFlag: */ PragFlag_ReadOnly,
+    /* iArg:      */ BTREE_FREE_PAGE_COUNT },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "full_column_names",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_FullColNames },
+  { /* zName:     */ "fullfsync",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_FullFSync },
+#endif
+#if defined(SQLITE_HAS_CODEC)
+  { /* zName:     */ "hexkey",
+    /* ePragTyp:  */ PragTyp_HEXKEY,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "hexrekey",
+    /* ePragTyp:  */ PragTyp_HEXKEY,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+#if !defined(SQLITE_OMIT_CHECK)
+  { /* zName:     */ "ignore_check_constraints",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_IgnoreChecks },
+#endif
+#endif
+#if !defined(SQLITE_OMIT_AUTOVACUUM)
+  { /* zName:     */ "incremental_vacuum",
+    /* ePragTyp:  */ PragTyp_INCREMENTAL_VACUUM,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS)
+  { /* zName:     */ "index_info",
+    /* ePragTyp:  */ PragTyp_INDEX_INFO,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "index_list",
+    /* ePragTyp:  */ PragTyp_INDEX_LIST,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_INTEGRITY_CHECK)
+  { /* zName:     */ "integrity_check",
+    /* ePragTyp:  */ PragTyp_INTEGRITY_CHECK,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS)
+  { /* zName:     */ "journal_mode",
+    /* ePragTyp:  */ PragTyp_JOURNAL_MODE,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "journal_size_limit",
+    /* ePragTyp:  */ PragTyp_JOURNAL_SIZE_LIMIT,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if defined(SQLITE_HAS_CODEC)
+  { /* zName:     */ "key",
+    /* ePragTyp:  */ PragTyp_KEY,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "legacy_file_format",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_LegacyFileFmt },
+#endif
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && SQLITE_ENABLE_LOCKING_STYLE
+  { /* zName:     */ "lock_proxy_file",
+    /* ePragTyp:  */ PragTyp_LOCK_PROXY_FILE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
+  { /* zName:     */ "lock_status",
+    /* ePragTyp:  */ PragTyp_LOCK_STATUS,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS)
+  { /* zName:     */ "locking_mode",
+    /* ePragTyp:  */ PragTyp_LOCKING_MODE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "max_page_count",
+    /* ePragTyp:  */ PragTyp_PAGE_COUNT,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "mmap_size",
+    /* ePragTyp:  */ PragTyp_MMAP_SIZE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "page_count",
+    /* ePragTyp:  */ PragTyp_PAGE_COUNT,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "page_size",
+    /* ePragTyp:  */ PragTyp_PAGE_SIZE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if defined(SQLITE_DEBUG)
+  { /* zName:     */ "parser_trace",
+    /* ePragTyp:  */ PragTyp_PARSER_TRACE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "query_only",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_QueryOnly },
+#endif
+#if !defined(SQLITE_OMIT_INTEGRITY_CHECK)
+  { /* zName:     */ "quick_check",
+    /* ePragTyp:  */ PragTyp_INTEGRITY_CHECK,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "read_uncommitted",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_ReadUncommitted },
+  { /* zName:     */ "recursive_triggers",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_RecTriggers },
+#endif
+#if defined(SQLITE_HAS_CODEC)
+  { /* zName:     */ "rekey",
+    /* ePragTyp:  */ PragTyp_REKEY,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "reverse_unordered_selects",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_ReverseOrder },
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS)
+  { /* zName:     */ "schema_version",
+    /* ePragTyp:  */ PragTyp_HEADER_VALUE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ BTREE_SCHEMA_VERSION },
+#endif
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS)
+  { /* zName:     */ "secure_delete",
+    /* ePragTyp:  */ PragTyp_SECURE_DELETE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "short_column_names",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_ShortColNames },
+#endif
+  { /* zName:     */ "shrink_memory",
+    /* ePragTyp:  */ PragTyp_SHRINK_MEMORY,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "soft_heap_limit",
+    /* ePragTyp:  */ PragTyp_SOFT_HEAP_LIMIT,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+#if defined(SQLITE_DEBUG)
+  { /* zName:     */ "sql_trace",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_SqlTrace },
+#endif
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS)
+  { /* zName:     */ "stats",
+    /* ePragTyp:  */ PragTyp_STATS,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS)
+  { /* zName:     */ "synchronous",
+    /* ePragTyp:  */ PragTyp_SYNCHRONOUS,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS)
+  { /* zName:     */ "table_info",
+    /* ePragTyp:  */ PragTyp_TABLE_INFO,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS)
+  { /* zName:     */ "temp_store",
+    /* ePragTyp:  */ PragTyp_TEMP_STORE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "temp_store_directory",
+    /* ePragTyp:  */ PragTyp_TEMP_STORE_DIRECTORY,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#endif
+  { /* zName:     */ "threads",
+    /* ePragTyp:  */ PragTyp_THREADS,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+#if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS)
+  { /* zName:     */ "user_version",
+    /* ePragTyp:  */ PragTyp_HEADER_VALUE,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ BTREE_USER_VERSION },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+#if defined(SQLITE_DEBUG)
+  { /* zName:     */ "vdbe_addoptrace",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_VdbeAddopTrace },
+  { /* zName:     */ "vdbe_debug",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_SqlTrace|SQLITE_VdbeListing|SQLITE_VdbeTrace },
+  { /* zName:     */ "vdbe_eqp",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_VdbeEQP },
+  { /* zName:     */ "vdbe_listing",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_VdbeListing },
+  { /* zName:     */ "vdbe_trace",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_VdbeTrace },
+#endif
+#endif
+#if !defined(SQLITE_OMIT_WAL)
+  { /* zName:     */ "wal_autocheckpoint",
+    /* ePragTyp:  */ PragTyp_WAL_AUTOCHECKPOINT,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
+  { /* zName:     */ "wal_checkpoint",
+    /* ePragTyp:  */ PragTyp_WAL_CHECKPOINT,
+    /* ePragFlag: */ PragFlag_NeedSchema,
+    /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
+  { /* zName:     */ "writable_schema",
+    /* ePragTyp:  */ PragTyp_FLAG,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ SQLITE_WriteSchema|SQLITE_RecoveryMode },
+#endif
+};
+/* Number of pragmas: 58 on by default, 71 total. */
+/* End of the automatically generated pragma table.
+***************************************************************************/
+
+/*
+** Interpret the given string as a safety level.  Return 0 for OFF,
+** 1 for ON or NORMAL and 2 for FULL.  Return 1 for an empty or 
+** unrecognized string argument.  The FULL option is disallowed
+** if the omitFull parameter it 1.
+**
+** Note that the values returned are one less that the values that
+** should be passed into sqlite3BtreeSetSafetyLevel().  The is done
+** to support legacy SQL code.  The safety level used to be boolean
+** and older scripts may have used numbers 0 for OFF and 1 for ON.
+*/
+static u8 getSafetyLevel(const char *z, int omitFull, u8 dflt){
+                             /* 123456789 123456789 */
+  static const char zText[] = "onoffalseyestruefull";
+  static const u8 iOffset[] = {0, 1, 2, 4, 9, 12, 16};
+  static const u8 iLength[] = {2, 2, 3, 5, 3, 4, 4};
+  static const u8 iValue[] =  {1, 0, 0, 0, 1, 1, 2};
+  int i, n;
+  if( sqlite3Isdigit(*z) ){
+    return (u8)sqlite3Atoi(z);
+  }
+  n = sqlite3Strlen30(z);
+  for(i=0; i<ArraySize(iLength)-omitFull; i++){
+    if( iLength[i]==n && sqlite3StrNICmp(&zText[iOffset[i]],z,n)==0 ){
+      return iValue[i];
+    }
+  }
+  return dflt;
+}
+
+/*
+** Interpret the given string as a boolean value.
+*/
+SQLITE_PRIVATE u8 sqlite3GetBoolean(const char *z, u8 dflt){
+  return getSafetyLevel(z,1,dflt)!=0;
+}
+
+/* The sqlite3GetBoolean() function is used by other modules but the
+** remainder of this file is specific to PRAGMA processing.  So omit
+** the rest of the file if PRAGMAs are omitted from the build.
+*/
+#if !defined(SQLITE_OMIT_PRAGMA)
+
+/*
+** Interpret the given string as a locking mode value.
+*/
+static int getLockingMode(const char *z){
+  if( z ){
+    if( 0==sqlite3StrICmp(z, "exclusive") ) return PAGER_LOCKINGMODE_EXCLUSIVE;
+    if( 0==sqlite3StrICmp(z, "normal") ) return PAGER_LOCKINGMODE_NORMAL;
+  }
+  return PAGER_LOCKINGMODE_QUERY;
+}
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+/*
+** Interpret the given string as an auto-vacuum mode value.
+**
+** The following strings, "none", "full" and "incremental" are 
+** acceptable, as are their numeric equivalents: 0, 1 and 2 respectively.
+*/
+static int getAutoVacuum(const char *z){
+  int i;
+  if( 0==sqlite3StrICmp(z, "none") ) return BTREE_AUTOVACUUM_NONE;
+  if( 0==sqlite3StrICmp(z, "full") ) return BTREE_AUTOVACUUM_FULL;
+  if( 0==sqlite3StrICmp(z, "incremental") ) return BTREE_AUTOVACUUM_INCR;
+  i = sqlite3Atoi(z);
+  return (u8)((i>=0&&i<=2)?i:0);
+}
+#endif /* ifndef SQLITE_OMIT_AUTOVACUUM */
+
+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
+/*
+** Interpret the given string as a temp db location. Return 1 for file
+** backed temporary databases, 2 for the Red-Black tree in memory database
+** and 0 to use the compile-time default.
+*/
+static int getTempStore(const char *z){
+  if( z[0]>='0' && z[0]<='2' ){
+    return z[0] - '0';
+  }else if( sqlite3StrICmp(z, "file")==0 ){
+    return 1;
+  }else if( sqlite3StrICmp(z, "memory")==0 ){
+    return 2;
+  }else{
+    return 0;
+  }
+}
+#endif /* SQLITE_PAGER_PRAGMAS */
+
+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
+/*
+** Invalidate temp storage, either when the temp storage is changed
+** from default, or when 'file' and the temp_store_directory has changed
+*/
+static int invalidateTempStorage(Parse *pParse){
+  sqlite3 *db = pParse->db;
+  if( db->aDb[1].pBt!=0 ){
+    if( !db->autoCommit || sqlite3BtreeIsInReadTrans(db->aDb[1].pBt) ){
+      sqlite3ErrorMsg(pParse, "temporary storage cannot be changed "
+        "from within a transaction");
+      return SQLITE_ERROR;
+    }
+    sqlite3BtreeClose(db->aDb[1].pBt);
+    db->aDb[1].pBt = 0;
+    sqlite3ResetAllSchemasOfConnection(db);
+  }
+  return SQLITE_OK;
+}
+#endif /* SQLITE_PAGER_PRAGMAS */
+
+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
+/*
+** If the TEMP database is open, close it and mark the database schema
+** as needing reloading.  This must be done when using the SQLITE_TEMP_STORE
+** or DEFAULT_TEMP_STORE pragmas.
+*/
+static int changeTempStorage(Parse *pParse, const char *zStorageType){
+  int ts = getTempStore(zStorageType);
+  sqlite3 *db = pParse->db;
+  if( db->temp_store==ts ) return SQLITE_OK;
+  if( invalidateTempStorage( pParse ) != SQLITE_OK ){
+    return SQLITE_ERROR;
+  }
+  db->temp_store = (u8)ts;
+  return SQLITE_OK;
+}
+#endif /* SQLITE_PAGER_PRAGMAS */
+
+/*
+** Generate code to return a single integer value.
+*/
+static void returnSingleInt(Parse *pParse, const char *zLabel, i64 value){
+  Vdbe *v = sqlite3GetVdbe(pParse);
+  int mem = ++pParse->nMem;
+  i64 *pI64 = sqlite3DbMallocRaw(pParse->db, sizeof(value));
+  if( pI64 ){
+    memcpy(pI64, &value, sizeof(value));
+  }
+  sqlite3VdbeAddOp4(v, OP_Int64, 0, mem, 0, (char*)pI64, P4_INT64);
+  sqlite3VdbeSetNumCols(v, 1);
+  sqlite3VdbeSetColName(v, 0, COLNAME_NAME, zLabel, SQLITE_STATIC);
+  sqlite3VdbeAddOp2(v, OP_ResultRow, mem, 1);
+}
+
+
+/*
+** Set the safety_level and pager flags for pager iDb.  Or if iDb<0
+** set these values for all pagers.
+*/
+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
+static void setAllPagerFlags(sqlite3 *db){
+  if( db->autoCommit ){
+    Db *pDb = db->aDb;
+    int n = db->nDb;
+    assert( SQLITE_FullFSync==PAGER_FULLFSYNC );
+    assert( SQLITE_CkptFullFSync==PAGER_CKPT_FULLFSYNC );
+    assert( SQLITE_CacheSpill==PAGER_CACHESPILL );
+    assert( (PAGER_FULLFSYNC | PAGER_CKPT_FULLFSYNC | PAGER_CACHESPILL)
+             ==  PAGER_FLAGS_MASK );
+    assert( (pDb->safety_level & PAGER_SYNCHRONOUS_MASK)==pDb->safety_level );
+    while( (n--) > 0 ){
+      if( pDb->pBt ){
+        sqlite3BtreeSetPagerFlags(pDb->pBt,
+                 pDb->safety_level | (db->flags & PAGER_FLAGS_MASK) );
+      }
+      pDb++;
+    }
+  }
+}
+#else
+# define setAllPagerFlags(X)  /* no-op */
+#endif
+
+
+/*
+** Return a human-readable name for a constraint resolution action.
+*/
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+static const char *actionName(u8 action){
+  const char *zName;
+  switch( action ){
+    case OE_SetNull:  zName = "SET NULL";        break;
+    case OE_SetDflt:  zName = "SET DEFAULT";     break;
+    case OE_Cascade:  zName = "CASCADE";         break;
+    case OE_Restrict: zName = "RESTRICT";        break;
+    default:          zName = "NO ACTION";  
+                      assert( action==OE_None ); break;
+  }
+  return zName;
+}
+#endif
+
+
+/*
+** Parameter eMode must be one of the PAGER_JOURNALMODE_XXX constants
+** defined in pager.h. This function returns the associated lowercase
+** journal-mode name.
+*/
+SQLITE_PRIVATE const char *sqlite3JournalModename(int eMode){
+  static char * const azModeName[] = {
+    "delete", "persist", "off", "truncate", "memory"
+#ifndef SQLITE_OMIT_WAL
+     , "wal"
+#endif
+  };
+  assert( PAGER_JOURNALMODE_DELETE==0 );
+  assert( PAGER_JOURNALMODE_PERSIST==1 );
+  assert( PAGER_JOURNALMODE_OFF==2 );
+  assert( PAGER_JOURNALMODE_TRUNCATE==3 );
+  assert( PAGER_JOURNALMODE_MEMORY==4 );
+  assert( PAGER_JOURNALMODE_WAL==5 );
+  assert( eMode>=0 && eMode<=ArraySize(azModeName) );
+
+  if( eMode==ArraySize(azModeName) ) return 0;
+  return azModeName[eMode];
+}
+
+/*
+** Process a pragma statement.  
+**
+** Pragmas are of this form:
+**
+**      PRAGMA [database.]id [= value]
+**
+** The identifier might also be a string.  The value is a string, and
+** identifier, or a number.  If minusFlag is true, then the value is
+** a number that was preceded by a minus sign.
+**
+** If the left side is "database.id" then pId1 is the database name
+** and pId2 is the id.  If the left side is just "id" then pId1 is the
+** id and pId2 is any empty string.
+*/
+SQLITE_PRIVATE void sqlite3Pragma(
+  Parse *pParse, 
+  Token *pId1,        /* First part of [database.]id field */
+  Token *pId2,        /* Second part of [database.]id field, or NULL */
+  Token *pValue,      /* Token for <value>, or NULL */
+  int minusFlag       /* True if a '-' sign preceded <value> */
+){
+  char *zLeft = 0;       /* Nul-terminated UTF-8 string <id> */
+  char *zRight = 0;      /* Nul-terminated UTF-8 string <value>, or NULL */
+  const char *zDb = 0;   /* The database name */
+  Token *pId;            /* Pointer to <id> token */
+  char *aFcntl[4];       /* Argument to SQLITE_FCNTL_PRAGMA */
+  int iDb;               /* Database index for <database> */
+  int lwr, upr, mid = 0;       /* Binary search bounds */
+  int rc;                      /* return value form SQLITE_FCNTL_PRAGMA */
+  sqlite3 *db = pParse->db;    /* The database connection */
+  Db *pDb;                     /* The specific database being pragmaed */
+  Vdbe *v = sqlite3GetVdbe(pParse);  /* Prepared statement */
+
+  if( v==0 ) return;
+  sqlite3VdbeRunOnlyOnce(v);
+  pParse->nMem = 2;
+
+  /* Interpret the [database.] part of the pragma statement. iDb is the
+  ** index of the database this pragma is being applied to in db.aDb[]. */
+  iDb = sqlite3TwoPartName(pParse, pId1, pId2, &pId);
+  if( iDb<0 ) return;
+  pDb = &db->aDb[iDb];
+
+  /* If the temp database has been explicitly named as part of the 
+  ** pragma, make sure it is open. 
+  */
+  if( iDb==1 && sqlite3OpenTempDatabase(pParse) ){
+    return;
+  }
+
+  zLeft = sqlite3NameFromToken(db, pId);
+  if( !zLeft ) return;
+  if( minusFlag ){
+    zRight = sqlite3MPrintf(db, "-%T", pValue);
+  }else{
+    zRight = sqlite3NameFromToken(db, pValue);
+  }
+
+  assert( pId2 );
+  zDb = pId2->n>0 ? pDb->zName : 0;
+  if( sqlite3AuthCheck(pParse, SQLITE_PRAGMA, zLeft, zRight, zDb) ){
+    goto pragma_out;
+  }
+
+  /* Send an SQLITE_FCNTL_PRAGMA file-control to the underlying VFS
+  ** connection.  If it returns SQLITE_OK, then assume that the VFS
+  ** handled the pragma and generate a no-op prepared statement.
+  */
+  aFcntl[0] = 0;
+  aFcntl[1] = zLeft;
+  aFcntl[2] = zRight;
+  aFcntl[3] = 0;
+  db->busyHandler.nBusy = 0;
+  rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_PRAGMA, (void*)aFcntl);
+  if( rc==SQLITE_OK ){
+    if( aFcntl[0] ){
+      int mem = ++pParse->nMem;
+      sqlite3VdbeAddOp4(v, OP_String8, 0, mem, 0, aFcntl[0], 0);
+      sqlite3VdbeSetNumCols(v, 1);
+      sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "result", SQLITE_STATIC);
+      sqlite3VdbeAddOp2(v, OP_ResultRow, mem, 1);
+      sqlite3_free(aFcntl[0]);
+    }
+    goto pragma_out;
+  }
+  if( rc!=SQLITE_NOTFOUND ){
+    if( aFcntl[0] ){
+      sqlite3ErrorMsg(pParse, "%s", aFcntl[0]);
+      sqlite3_free(aFcntl[0]);
+    }
+    pParse->nErr++;
+    pParse->rc = rc;
+    goto pragma_out;
+  }
+
+  /* Locate the pragma in the lookup table */
+  lwr = 0;
+  upr = ArraySize(aPragmaNames)-1;
+  while( lwr<=upr ){
+    mid = (lwr+upr)/2;
+    rc = sqlite3_stricmp(zLeft, aPragmaNames[mid].zName);
+    if( rc==0 ) break;
+    if( rc<0 ){
+      upr = mid - 1;
+    }else{
+      lwr = mid + 1;
+    }
+  }
+  if( lwr>upr ) goto pragma_out;
+
+  /* Make sure the database schema is loaded if the pragma requires that */
+  if( (aPragmaNames[mid].mPragFlag & PragFlag_NeedSchema)!=0 ){
+    if( sqlite3ReadSchema(pParse) ) goto pragma_out;
+  }
+
+  /* Jump to the appropriate pragma handler */
+  switch( aPragmaNames[mid].ePragTyp ){
+  
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && !defined(SQLITE_OMIT_DEPRECATED)
+  /*
+  **  PRAGMA [database.]default_cache_size
+  **  PRAGMA [database.]default_cache_size=N
+  **
+  ** The first form reports the current persistent setting for the
+  ** page cache size.  The value returned is the maximum number of
+  ** pages in the page cache.  The second form sets both the current
+  ** page cache size value and the persistent page cache size value
+  ** stored in the database file.
+  **
+  ** Older versions of SQLite would set the default cache size to a
+  ** negative number to indicate synchronous=OFF.  These days, synchronous
+  ** is always on by default regardless of the sign of the default cache
+  ** size.  But continue to take the absolute value of the default cache
+  ** size of historical compatibility.
+  */
+  case PragTyp_DEFAULT_CACHE_SIZE: {
+    static const int iLn = VDBE_OFFSET_LINENO(2);
+    static const VdbeOpList getCacheSize[] = {
+      { OP_Transaction, 0, 0,        0},                         /* 0 */
+      { OP_ReadCookie,  0, 1,        BTREE_DEFAULT_CACHE_SIZE},  /* 1 */
+      { OP_IfPos,       1, 8,        0},
+      { OP_Integer,     0, 2,        0},
+      { OP_Subtract,    1, 2,        1},
+      { OP_IfPos,       1, 8,        0},
+      { OP_Integer,     0, 1,        0},                         /* 6 */
+      { OP_Noop,        0, 0,        0},
+      { OP_ResultRow,   1, 1,        0},
+    };
+    int addr;
+    sqlite3VdbeUsesBtree(v, iDb);
+    if( !zRight ){
+      sqlite3VdbeSetNumCols(v, 1);
+      sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "cache_size", SQLITE_STATIC);
+      pParse->nMem += 2;
+      addr = sqlite3VdbeAddOpList(v, ArraySize(getCacheSize), getCacheSize,iLn);
+      sqlite3VdbeChangeP1(v, addr, iDb);
+      sqlite3VdbeChangeP1(v, addr+1, iDb);
+      sqlite3VdbeChangeP1(v, addr+6, SQLITE_DEFAULT_CACHE_SIZE);
+    }else{
+      int size = sqlite3AbsInt32(sqlite3Atoi(zRight));
+      sqlite3BeginWriteOperation(pParse, 0, iDb);
+      sqlite3VdbeAddOp2(v, OP_Integer, size, 1);
+      sqlite3VdbeAddOp3(v, OP_SetCookie, iDb, BTREE_DEFAULT_CACHE_SIZE, 1);
+      assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+      pDb->pSchema->cache_size = size;
+      sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size);
+    }
+    break;
+  }
+#endif /* !SQLITE_OMIT_PAGER_PRAGMAS && !SQLITE_OMIT_DEPRECATED */
+
+#if !defined(SQLITE_OMIT_PAGER_PRAGMAS)
+  /*
+  **  PRAGMA [database.]page_size
+  **  PRAGMA [database.]page_size=N
+  **
+  ** The first form reports the current setting for the
+  ** database page size in bytes.  The second form sets the
+  ** database page size value.  The value can only be set if
+  ** the database has not yet been created.
+  */
+  case PragTyp_PAGE_SIZE: {
+    Btree *pBt = pDb->pBt;
+    assert( pBt!=0 );
+    if( !zRight ){
+      int size = ALWAYS(pBt) ? sqlite3BtreeGetPageSize(pBt) : 0;
+      returnSingleInt(pParse, "page_size", size);
+    }else{
+      /* Malloc may fail when setting the page-size, as there is an internal
+      ** buffer that the pager module resizes using sqlite3_realloc().
+      */
+      db->nextPagesize = sqlite3Atoi(zRight);
+      if( SQLITE_NOMEM==sqlite3BtreeSetPageSize(pBt, db->nextPagesize,-1,0) ){
+        db->mallocFailed = 1;
+      }
+    }
+    break;
+  }
+
+  /*
+  **  PRAGMA [database.]secure_delete
+  **  PRAGMA [database.]secure_delete=ON/OFF
+  **
+  ** The first form reports the current setting for the
+  ** secure_delete flag.  The second form changes the secure_delete
+  ** flag setting and reports thenew value.
+  */
+  case PragTyp_SECURE_DELETE: {
+    Btree *pBt = pDb->pBt;
+    int b = -1;
+    assert( pBt!=0 );
+    if( zRight ){
+      b = sqlite3GetBoolean(zRight, 0);
+    }
+    if( pId2->n==0 && b>=0 ){
+      int ii;
+      for(ii=0; ii<db->nDb; ii++){
+        sqlite3BtreeSecureDelete(db->aDb[ii].pBt, b);
+      }
+    }
+    b = sqlite3BtreeSecureDelete(pBt, b);
+    returnSingleInt(pParse, "secure_delete", b);
+    break;
+  }
+
+  /*
+  **  PRAGMA [database.]max_page_count
+  **  PRAGMA [database.]max_page_count=N
+  **
+  ** The first form reports the current setting for the
+  ** maximum number of pages in the database file.  The 
+  ** second form attempts to change this setting.  Both
+  ** forms return the current setting.
+  **
+  ** The absolute value of N is used.  This is undocumented and might
+  ** change.  The only purpose is to provide an easy way to test
+  ** the sqlite3AbsInt32() function.
+  **
+  **  PRAGMA [database.]page_count
+  **
+  ** Return the number of pages in the specified database.
+  */
+  case PragTyp_PAGE_COUNT: {
+    int iReg;
+    sqlite3CodeVerifySchema(pParse, iDb);
+    iReg = ++pParse->nMem;
+    if( sqlite3Tolower(zLeft[0])=='p' ){
+      sqlite3VdbeAddOp2(v, OP_Pagecount, iDb, iReg);
+    }else{
+      sqlite3VdbeAddOp3(v, OP_MaxPgcnt, iDb, iReg, 
+                        sqlite3AbsInt32(sqlite3Atoi(zRight)));
+    }
+    sqlite3VdbeAddOp2(v, OP_ResultRow, iReg, 1);
+    sqlite3VdbeSetNumCols(v, 1);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, zLeft, SQLITE_TRANSIENT);
+    break;
+  }
+
+  /*
+  **  PRAGMA [database.]locking_mode
+  **  PRAGMA [database.]locking_mode = (normal|exclusive)
+  */
+  case PragTyp_LOCKING_MODE: {
+    const char *zRet = "normal";
+    int eMode = getLockingMode(zRight);
+
+    if( pId2->n==0 && eMode==PAGER_LOCKINGMODE_QUERY ){
+      /* Simple "PRAGMA locking_mode;" statement. This is a query for
+      ** the current default locking mode (which may be different to
+      ** the locking-mode of the main database).
+      */
+      eMode = db->dfltLockMode;
+    }else{
+      Pager *pPager;
+      if( pId2->n==0 ){
+        /* This indicates that no database name was specified as part
+        ** of the PRAGMA command. In this case the locking-mode must be
+        ** set on all attached databases, as well as the main db file.
+        **
+        ** Also, the sqlite3.dfltLockMode variable is set so that
+        ** any subsequently attached databases also use the specified
+        ** locking mode.
+        */
+        int ii;
+        assert(pDb==&db->aDb[0]);
+        for(ii=2; ii<db->nDb; ii++){
+          pPager = sqlite3BtreePager(db->aDb[ii].pBt);
+          sqlite3PagerLockingMode(pPager, eMode);
+        }
+        db->dfltLockMode = (u8)eMode;
+      }
+      pPager = sqlite3BtreePager(pDb->pBt);
+      eMode = sqlite3PagerLockingMode(pPager, eMode);
+    }
+
+    assert( eMode==PAGER_LOCKINGMODE_NORMAL
+            || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
+    if( eMode==PAGER_LOCKINGMODE_EXCLUSIVE ){
+      zRet = "exclusive";
+    }
+    sqlite3VdbeSetNumCols(v, 1);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "locking_mode", SQLITE_STATIC);
+    sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, zRet, 0);
+    sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
+    break;
+  }
+
+  /*
+  **  PRAGMA [database.]journal_mode
+  **  PRAGMA [database.]journal_mode =
+  **                      (delete|persist|off|truncate|memory|wal|off)
+  */
+  case PragTyp_JOURNAL_MODE: {
+    int eMode;        /* One of the PAGER_JOURNALMODE_XXX symbols */
+    int ii;           /* Loop counter */
+
+    sqlite3VdbeSetNumCols(v, 1);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "journal_mode", SQLITE_STATIC);
+
+    if( zRight==0 ){
+      /* If there is no "=MODE" part of the pragma, do a query for the
+      ** current mode */
+      eMode = PAGER_JOURNALMODE_QUERY;
+    }else{
+      const char *zMode;
+      int n = sqlite3Strlen30(zRight);
+      for(eMode=0; (zMode = sqlite3JournalModename(eMode))!=0; eMode++){
+        if( sqlite3StrNICmp(zRight, zMode, n)==0 ) break;
+      }
+      if( !zMode ){
+        /* If the "=MODE" part does not match any known journal mode,
+        ** then do a query */
+        eMode = PAGER_JOURNALMODE_QUERY;
+      }
+    }
+    if( eMode==PAGER_JOURNALMODE_QUERY && pId2->n==0 ){
+      /* Convert "PRAGMA journal_mode" into "PRAGMA main.journal_mode" */
+      iDb = 0;
+      pId2->n = 1;
+    }
+    for(ii=db->nDb-1; ii>=0; ii--){
+      if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){
+        sqlite3VdbeUsesBtree(v, ii);
+        sqlite3VdbeAddOp3(v, OP_JournalMode, ii, 1, eMode);
+      }
+    }
+    sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
+    break;
+  }
+
+  /*
+  **  PRAGMA [database.]journal_size_limit
+  **  PRAGMA [database.]journal_size_limit=N
+  **
+  ** Get or set the size limit on rollback journal files.
+  */
+  case PragTyp_JOURNAL_SIZE_LIMIT: {
+    Pager *pPager = sqlite3BtreePager(pDb->pBt);
+    i64 iLimit = -2;
+    if( zRight ){
+      sqlite3DecOrHexToI64(zRight, &iLimit);
+      if( iLimit<-1 ) iLimit = -1;
+    }
+    iLimit = sqlite3PagerJournalSizeLimit(pPager, iLimit);
+    returnSingleInt(pParse, "journal_size_limit", iLimit);
+    break;
+  }
+
+#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
+
+  /*
+  **  PRAGMA [database.]auto_vacuum
+  **  PRAGMA [database.]auto_vacuum=N
+  **
+  ** Get or set the value of the database 'auto-vacuum' parameter.
+  ** The value is one of:  0 NONE 1 FULL 2 INCREMENTAL
+  */
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  case PragTyp_AUTO_VACUUM: {
+    Btree *pBt = pDb->pBt;
+    assert( pBt!=0 );
+    if( !zRight ){
+      returnSingleInt(pParse, "auto_vacuum", sqlite3BtreeGetAutoVacuum(pBt));
+    }else{
+      int eAuto = getAutoVacuum(zRight);
+      assert( eAuto>=0 && eAuto<=2 );
+      db->nextAutovac = (u8)eAuto;
+      /* Call SetAutoVacuum() to set initialize the internal auto and
+      ** incr-vacuum flags. This is required in case this connection
+      ** creates the database file. It is important that it is created
+      ** as an auto-vacuum capable db.
+      */
+      rc = sqlite3BtreeSetAutoVacuum(pBt, eAuto);
+      if( rc==SQLITE_OK && (eAuto==1 || eAuto==2) ){
+        /* When setting the auto_vacuum mode to either "full" or 
+        ** "incremental", write the value of meta[6] in the database
+        ** file. Before writing to meta[6], check that meta[3] indicates
+        ** that this really is an auto-vacuum capable database.
+        */
+        static const int iLn = VDBE_OFFSET_LINENO(2);
+        static const VdbeOpList setMeta6[] = {
+          { OP_Transaction,    0,         1,                 0},    /* 0 */
+          { OP_ReadCookie,     0,         1,         BTREE_LARGEST_ROOT_PAGE},
+          { OP_If,             1,         0,                 0},    /* 2 */
+          { OP_Halt,           SQLITE_OK, OE_Abort,          0},    /* 3 */
+          { OP_Integer,        0,         1,                 0},    /* 4 */
+          { OP_SetCookie,      0,         BTREE_INCR_VACUUM, 1},    /* 5 */
+        };
+        int iAddr;
+        iAddr = sqlite3VdbeAddOpList(v, ArraySize(setMeta6), setMeta6, iLn);
+        sqlite3VdbeChangeP1(v, iAddr, iDb);
+        sqlite3VdbeChangeP1(v, iAddr+1, iDb);
+        sqlite3VdbeChangeP2(v, iAddr+2, iAddr+4);
+        sqlite3VdbeChangeP1(v, iAddr+4, eAuto-1);
+        sqlite3VdbeChangeP1(v, iAddr+5, iDb);
+        sqlite3VdbeUsesBtree(v, iDb);
+      }
+    }
+    break;
+  }
+#endif
+
+  /*
+  **  PRAGMA [database.]incremental_vacuum(N)
+  **
+  ** Do N steps of incremental vacuuming on a database.
+  */
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  case PragTyp_INCREMENTAL_VACUUM: {
+    int iLimit, addr;
+    if( zRight==0 || !sqlite3GetInt32(zRight, &iLimit) || iLimit<=0 ){
+      iLimit = 0x7fffffff;
+    }
+    sqlite3BeginWriteOperation(pParse, 0, iDb);
+    sqlite3VdbeAddOp2(v, OP_Integer, iLimit, 1);
+    addr = sqlite3VdbeAddOp1(v, OP_IncrVacuum, iDb); VdbeCoverage(v);
+    sqlite3VdbeAddOp1(v, OP_ResultRow, 1);
+    sqlite3VdbeAddOp2(v, OP_AddImm, 1, -1);
+    sqlite3VdbeAddOp2(v, OP_IfPos, 1, addr); VdbeCoverage(v);
+    sqlite3VdbeJumpHere(v, addr);
+    break;
+  }
+#endif
+
+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
+  /*
+  **  PRAGMA [database.]cache_size
+  **  PRAGMA [database.]cache_size=N
+  **
+  ** The first form reports the current local setting for the
+  ** page cache size. The second form sets the local
+  ** page cache size value.  If N is positive then that is the
+  ** number of pages in the cache.  If N is negative, then the
+  ** number of pages is adjusted so that the cache uses -N kibibytes
+  ** of memory.
+  */
+  case PragTyp_CACHE_SIZE: {
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    if( !zRight ){
+      returnSingleInt(pParse, "cache_size", pDb->pSchema->cache_size);
+    }else{
+      int size = sqlite3Atoi(zRight);
+      pDb->pSchema->cache_size = size;
+      sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size);
+    }
+    break;
+  }
+
+  /*
+  **  PRAGMA [database.]mmap_size(N)
+  **
+  ** Used to set mapping size limit. The mapping size limit is
+  ** used to limit the aggregate size of all memory mapped regions of the
+  ** database file. If this parameter is set to zero, then memory mapping
+  ** is not used at all.  If N is negative, then the default memory map
+  ** limit determined by sqlite3_config(SQLITE_CONFIG_MMAP_SIZE) is set.
+  ** The parameter N is measured in bytes.
+  **
+  ** This value is advisory.  The underlying VFS is free to memory map
+  ** as little or as much as it wants.  Except, if N is set to 0 then the
+  ** upper layers will never invoke the xFetch interfaces to the VFS.
+  */
+  case PragTyp_MMAP_SIZE: {
+    sqlite3_int64 sz;
+#if SQLITE_MAX_MMAP_SIZE>0
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    if( zRight ){
+      int ii;
+      sqlite3DecOrHexToI64(zRight, &sz);
+      if( sz<0 ) sz = sqlite3GlobalConfig.szMmap;
+      if( pId2->n==0 ) db->szMmap = sz;
+      for(ii=db->nDb-1; ii>=0; ii--){
+        if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){
+          sqlite3BtreeSetMmapLimit(db->aDb[ii].pBt, sz);
+        }
+      }
+    }
+    sz = -1;
+    rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_MMAP_SIZE, &sz);
+#else
+    sz = 0;
+    rc = SQLITE_OK;
+#endif
+    if( rc==SQLITE_OK ){
+      returnSingleInt(pParse, "mmap_size", sz);
+    }else if( rc!=SQLITE_NOTFOUND ){
+      pParse->nErr++;
+      pParse->rc = rc;
+    }
+    break;
+  }
+
+  /*
+  **   PRAGMA temp_store
+  **   PRAGMA temp_store = "default"|"memory"|"file"
+  **
+  ** Return or set the local value of the temp_store flag.  Changing
+  ** the local value does not make changes to the disk file and the default
+  ** value will be restored the next time the database is opened.
+  **
+  ** Note that it is possible for the library compile-time options to
+  ** override this setting
+  */
+  case PragTyp_TEMP_STORE: {
+    if( !zRight ){
+      returnSingleInt(pParse, "temp_store", db->temp_store);
+    }else{
+      changeTempStorage(pParse, zRight);
+    }
+    break;
+  }
+
+  /*
+  **   PRAGMA temp_store_directory
+  **   PRAGMA temp_store_directory = ""|"directory_name"
+  **
+  ** Return or set the local value of the temp_store_directory flag.  Changing
+  ** the value sets a specific directory to be used for temporary files.
+  ** Setting to a null string reverts to the default temporary directory search.
+  ** If temporary directory is changed, then invalidateTempStorage.
+  **
+  */
+  case PragTyp_TEMP_STORE_DIRECTORY: {
+    if( !zRight ){
+      if( sqlite3_temp_directory ){
+        sqlite3VdbeSetNumCols(v, 1);
+        sqlite3VdbeSetColName(v, 0, COLNAME_NAME, 
+            "temp_store_directory", SQLITE_STATIC);
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, sqlite3_temp_directory, 0);
+        sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
+      }
+    }else{
+#ifndef SQLITE_OMIT_WSD
+      if( zRight[0] ){
+        int res;
+        rc = sqlite3OsAccess(db->pVfs, zRight, SQLITE_ACCESS_READWRITE, &res);
+        if( rc!=SQLITE_OK || res==0 ){
+          sqlite3ErrorMsg(pParse, "not a writable directory");
+          goto pragma_out;
+        }
+      }
+      if( SQLITE_TEMP_STORE==0
+       || (SQLITE_TEMP_STORE==1 && db->temp_store<=1)
+       || (SQLITE_TEMP_STORE==2 && db->temp_store==1)
+      ){
+        invalidateTempStorage(pParse);
+      }
+      sqlite3_free(sqlite3_temp_directory);
+      if( zRight[0] ){
+        sqlite3_temp_directory = sqlite3_mprintf("%s", zRight);
+      }else{
+        sqlite3_temp_directory = 0;
+      }
+#endif /* SQLITE_OMIT_WSD */
+    }
+    break;
+  }
+
+#if SQLITE_OS_WIN
+  /*
+  **   PRAGMA data_store_directory
+  **   PRAGMA data_store_directory = ""|"directory_name"
+  **
+  ** Return or set the local value of the data_store_directory flag.  Changing
+  ** the value sets a specific directory to be used for database files that
+  ** were specified with a relative pathname.  Setting to a null string reverts
+  ** to the default database directory, which for database files specified with
+  ** a relative path will probably be based on the current directory for the
+  ** process.  Database file specified with an absolute path are not impacted
+  ** by this setting, regardless of its value.
+  **
+  */
+  case PragTyp_DATA_STORE_DIRECTORY: {
+    if( !zRight ){
+      if( sqlite3_data_directory ){
+        sqlite3VdbeSetNumCols(v, 1);
+        sqlite3VdbeSetColName(v, 0, COLNAME_NAME, 
+            "data_store_directory", SQLITE_STATIC);
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, sqlite3_data_directory, 0);
+        sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
+      }
+    }else{
+#ifndef SQLITE_OMIT_WSD
+      if( zRight[0] ){
+        int res;
+        rc = sqlite3OsAccess(db->pVfs, zRight, SQLITE_ACCESS_READWRITE, &res);
+        if( rc!=SQLITE_OK || res==0 ){
+          sqlite3ErrorMsg(pParse, "not a writable directory");
+          goto pragma_out;
+        }
+      }
+      sqlite3_free(sqlite3_data_directory);
+      if( zRight[0] ){
+        sqlite3_data_directory = sqlite3_mprintf("%s", zRight);
+      }else{
+        sqlite3_data_directory = 0;
+      }
+#endif /* SQLITE_OMIT_WSD */
+    }
+    break;
+  }
+#endif
+
+#if SQLITE_ENABLE_LOCKING_STYLE
+  /*
+  **   PRAGMA [database.]lock_proxy_file
+  **   PRAGMA [database.]lock_proxy_file = ":auto:"|"lock_file_path"
+  **
+  ** Return or set the value of the lock_proxy_file flag.  Changing
+  ** the value sets a specific file to be used for database access locks.
+  **
+  */
+  case PragTyp_LOCK_PROXY_FILE: {
+    if( !zRight ){
+      Pager *pPager = sqlite3BtreePager(pDb->pBt);
+      char *proxy_file_path = NULL;
+      sqlite3_file *pFile = sqlite3PagerFile(pPager);
+      sqlite3OsFileControlHint(pFile, SQLITE_GET_LOCKPROXYFILE, 
+                           &proxy_file_path);
+      
+      if( proxy_file_path ){
+        sqlite3VdbeSetNumCols(v, 1);
+        sqlite3VdbeSetColName(v, 0, COLNAME_NAME, 
+                              "lock_proxy_file", SQLITE_STATIC);
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, proxy_file_path, 0);
+        sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
+      }
+    }else{
+      Pager *pPager = sqlite3BtreePager(pDb->pBt);
+      sqlite3_file *pFile = sqlite3PagerFile(pPager);
+      int res;
+      if( zRight[0] ){
+        res=sqlite3OsFileControl(pFile, SQLITE_SET_LOCKPROXYFILE, 
+                                     zRight);
+      } else {
+        res=sqlite3OsFileControl(pFile, SQLITE_SET_LOCKPROXYFILE, 
+                                     NULL);
+      }
+      if( res!=SQLITE_OK ){
+        sqlite3ErrorMsg(pParse, "failed to set lock proxy file");
+        goto pragma_out;
+      }
+    }
+    break;
+  }
+#endif /* SQLITE_ENABLE_LOCKING_STYLE */      
+    
+  /*
+  **   PRAGMA [database.]synchronous
+  **   PRAGMA [database.]synchronous=OFF|ON|NORMAL|FULL
+  **
+  ** Return or set the local value of the synchronous flag.  Changing
+  ** the local value does not make changes to the disk file and the
+  ** default value will be restored the next time the database is
+  ** opened.
+  */
+  case PragTyp_SYNCHRONOUS: {
+    if( !zRight ){
+      returnSingleInt(pParse, "synchronous", pDb->safety_level-1);
+    }else{
+      if( !db->autoCommit ){
+        sqlite3ErrorMsg(pParse, 
+            "Safety level may not be changed inside a transaction");
+      }else{
+        pDb->safety_level = getSafetyLevel(zRight,0,1)+1;
+        setAllPagerFlags(db);
+      }
+    }
+    break;
+  }
+#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
+
+#ifndef SQLITE_OMIT_FLAG_PRAGMAS
+  case PragTyp_FLAG: {
+    if( zRight==0 ){
+      returnSingleInt(pParse, aPragmaNames[mid].zName,
+                     (db->flags & aPragmaNames[mid].iArg)!=0 );
+    }else{
+      int mask = aPragmaNames[mid].iArg;    /* Mask of bits to set or clear. */
+      if( db->autoCommit==0 ){
+        /* Foreign key support may not be enabled or disabled while not
+        ** in auto-commit mode.  */
+        mask &= ~(SQLITE_ForeignKeys);
+      }
+#if SQLITE_USER_AUTHENTICATION
+      if( db->auth.authLevel==UAUTH_User ){
+        /* Do not allow non-admin users to modify the schema arbitrarily */
+        mask &= ~(SQLITE_WriteSchema);
+      }
+#endif
+
+      if( sqlite3GetBoolean(zRight, 0) ){
+        db->flags |= mask;
+      }else{
+        db->flags &= ~mask;
+        if( mask==SQLITE_DeferFKs ) db->nDeferredImmCons = 0;
+      }
+
+      /* Many of the flag-pragmas modify the code generated by the SQL 
+      ** compiler (eg. count_changes). So add an opcode to expire all
+      ** compiled SQL statements after modifying a pragma value.
+      */
+      sqlite3VdbeAddOp2(v, OP_Expire, 0, 0);
+      setAllPagerFlags(db);
+    }
+    break;
+  }
+#endif /* SQLITE_OMIT_FLAG_PRAGMAS */
+
+#ifndef SQLITE_OMIT_SCHEMA_PRAGMAS
+  /*
+  **   PRAGMA table_info(<table>)
+  **
+  ** Return a single row for each column of the named table. The columns of
+  ** the returned data set are:
+  **
+  ** cid:        Column id (numbered from left to right, starting at 0)
+  ** name:       Column name
+  ** type:       Column declaration type.
+  ** notnull:    True if 'NOT NULL' is part of column declaration
+  ** dflt_value: The default value for the column, if any.
+  */
+  case PragTyp_TABLE_INFO: if( zRight ){
+    Table *pTab;
+    pTab = sqlite3FindTable(db, zRight, zDb);
+    if( pTab ){
+      int i, k;
+      int nHidden = 0;
+      Column *pCol;
+      Index *pPk = sqlite3PrimaryKeyIndex(pTab);
+      sqlite3VdbeSetNumCols(v, 6);
+      pParse->nMem = 6;
+      sqlite3CodeVerifySchema(pParse, iDb);
+      sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "cid", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "name", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "type", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 3, COLNAME_NAME, "notnull", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 4, COLNAME_NAME, "dflt_value", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 5, COLNAME_NAME, "pk", SQLITE_STATIC);
+      sqlite3ViewGetColumnNames(pParse, pTab);
+      for(i=0, pCol=pTab->aCol; i<pTab->nCol; i++, pCol++){
+        if( IsHiddenColumn(pCol) ){
+          nHidden++;
+          continue;
+        }
+        sqlite3VdbeAddOp2(v, OP_Integer, i-nHidden, 1);
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pCol->zName, 0);
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0,
+           pCol->zType ? pCol->zType : "", 0);
+        sqlite3VdbeAddOp2(v, OP_Integer, (pCol->notNull ? 1 : 0), 4);
+        if( pCol->zDflt ){
+          sqlite3VdbeAddOp4(v, OP_String8, 0, 5, 0, (char*)pCol->zDflt, 0);
+        }else{
+          sqlite3VdbeAddOp2(v, OP_Null, 0, 5);
+        }
+        if( (pCol->colFlags & COLFLAG_PRIMKEY)==0 ){
+          k = 0;
+        }else if( pPk==0 ){
+          k = 1;
+        }else{
+          for(k=1; ALWAYS(k<=pTab->nCol) && pPk->aiColumn[k-1]!=i; k++){}
+        }
+        sqlite3VdbeAddOp2(v, OP_Integer, k, 6);
+        sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 6);
+      }
+    }
+  }
+  break;
+
+  case PragTyp_STATS: {
+    Index *pIdx;
+    HashElem *i;
+    v = sqlite3GetVdbe(pParse);
+    sqlite3VdbeSetNumCols(v, 4);
+    pParse->nMem = 4;
+    sqlite3CodeVerifySchema(pParse, iDb);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "table", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "index", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "width", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 3, COLNAME_NAME, "height", SQLITE_STATIC);
+    for(i=sqliteHashFirst(&pDb->pSchema->tblHash); i; i=sqliteHashNext(i)){
+      Table *pTab = sqliteHashData(i);
+      sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, pTab->zName, 0);
+      sqlite3VdbeAddOp2(v, OP_Null, 0, 2);
+      sqlite3VdbeAddOp2(v, OP_Integer,
+                           (int)sqlite3LogEstToInt(pTab->szTabRow), 3);
+      sqlite3VdbeAddOp2(v, OP_Integer, 
+          (int)sqlite3LogEstToInt(pTab->nRowLogEst), 4);
+      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4);
+      for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pIdx->zName, 0);
+        sqlite3VdbeAddOp2(v, OP_Integer,
+                             (int)sqlite3LogEstToInt(pIdx->szIdxRow), 3);
+        sqlite3VdbeAddOp2(v, OP_Integer, 
+            (int)sqlite3LogEstToInt(pIdx->aiRowLogEst[0]), 4);
+        sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 4);
+      }
+    }
+  }
+  break;
+
+  case PragTyp_INDEX_INFO: if( zRight ){
+    Index *pIdx;
+    Table *pTab;
+    pIdx = sqlite3FindIndex(db, zRight, zDb);
+    if( pIdx ){
+      int i;
+      pTab = pIdx->pTable;
+      sqlite3VdbeSetNumCols(v, 3);
+      pParse->nMem = 3;
+      sqlite3CodeVerifySchema(pParse, iDb);
+      sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "seqno", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "cid", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "name", SQLITE_STATIC);
+      for(i=0; i<pIdx->nKeyCol; i++){
+        i16 cnum = pIdx->aiColumn[i];
+        sqlite3VdbeAddOp2(v, OP_Integer, i, 1);
+        sqlite3VdbeAddOp2(v, OP_Integer, cnum, 2);
+        assert( pTab->nCol>cnum );
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, pTab->aCol[cnum].zName, 0);
+        sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 3);
+      }
+    }
+  }
+  break;
+
+  case PragTyp_INDEX_LIST: if( zRight ){
+    Index *pIdx;
+    Table *pTab;
+    int i;
+    pTab = sqlite3FindTable(db, zRight, zDb);
+    if( pTab ){
+      v = sqlite3GetVdbe(pParse);
+      sqlite3VdbeSetNumCols(v, 3);
+      pParse->nMem = 3;
+      sqlite3CodeVerifySchema(pParse, iDb);
+      sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "seq", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "name", SQLITE_STATIC);
+      sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "unique", SQLITE_STATIC);
+      for(pIdx=pTab->pIndex, i=0; pIdx; pIdx=pIdx->pNext, i++){
+        sqlite3VdbeAddOp2(v, OP_Integer, i, 1);
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pIdx->zName, 0);
+        sqlite3VdbeAddOp2(v, OP_Integer, IsUniqueIndex(pIdx), 3);
+        sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 3);
+      }
+    }
+  }
+  break;
+
+  case PragTyp_DATABASE_LIST: {
+    int i;
+    sqlite3VdbeSetNumCols(v, 3);
+    pParse->nMem = 3;
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "seq", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "name", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "file", SQLITE_STATIC);
+    for(i=0; i<db->nDb; i++){
+      if( db->aDb[i].pBt==0 ) continue;
+      assert( db->aDb[i].zName!=0 );
+      sqlite3VdbeAddOp2(v, OP_Integer, i, 1);
+      sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, db->aDb[i].zName, 0);
+      sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0,
+           sqlite3BtreeGetFilename(db->aDb[i].pBt), 0);
+      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 3);
+    }
+  }
+  break;
+
+  case PragTyp_COLLATION_LIST: {
+    int i = 0;
+    HashElem *p;
+    sqlite3VdbeSetNumCols(v, 2);
+    pParse->nMem = 2;
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "seq", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "name", SQLITE_STATIC);
+    for(p=sqliteHashFirst(&db->aCollSeq); p; p=sqliteHashNext(p)){
+      CollSeq *pColl = (CollSeq *)sqliteHashData(p);
+      sqlite3VdbeAddOp2(v, OP_Integer, i++, 1);
+      sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, pColl->zName, 0);
+      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 2);
+    }
+  }
+  break;
+#endif /* SQLITE_OMIT_SCHEMA_PRAGMAS */
+
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+  case PragTyp_FOREIGN_KEY_LIST: if( zRight ){
+    FKey *pFK;
+    Table *pTab;
+    pTab = sqlite3FindTable(db, zRight, zDb);
+    if( pTab ){
+      v = sqlite3GetVdbe(pParse);
+      pFK = pTab->pFKey;
+      if( pFK ){
+        int i = 0; 
+        sqlite3VdbeSetNumCols(v, 8);
+        pParse->nMem = 8;
+        sqlite3CodeVerifySchema(pParse, iDb);
+        sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "id", SQLITE_STATIC);
+        sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "seq", SQLITE_STATIC);
+        sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "table", SQLITE_STATIC);
+        sqlite3VdbeSetColName(v, 3, COLNAME_NAME, "from", SQLITE_STATIC);
+        sqlite3VdbeSetColName(v, 4, COLNAME_NAME, "to", SQLITE_STATIC);
+        sqlite3VdbeSetColName(v, 5, COLNAME_NAME, "on_update", SQLITE_STATIC);
+        sqlite3VdbeSetColName(v, 6, COLNAME_NAME, "on_delete", SQLITE_STATIC);
+        sqlite3VdbeSetColName(v, 7, COLNAME_NAME, "match", SQLITE_STATIC);
+        while(pFK){
+          int j;
+          for(j=0; j<pFK->nCol; j++){
+            char *zCol = pFK->aCol[j].zCol;
+            char *zOnDelete = (char *)actionName(pFK->aAction[0]);
+            char *zOnUpdate = (char *)actionName(pFK->aAction[1]);
+            sqlite3VdbeAddOp2(v, OP_Integer, i, 1);
+            sqlite3VdbeAddOp2(v, OP_Integer, j, 2);
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, pFK->zTo, 0);
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 4, 0,
+                              pTab->aCol[pFK->aCol[j].iFrom].zName, 0);
+            sqlite3VdbeAddOp4(v, zCol ? OP_String8 : OP_Null, 0, 5, 0, zCol, 0);
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 6, 0, zOnUpdate, 0);
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 7, 0, zOnDelete, 0);
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 8, 0, "NONE", 0);
+            sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 8);
+          }
+          ++i;
+          pFK = pFK->pNextFrom;
+        }
+      }
+    }
+  }
+  break;
+#endif /* !defined(SQLITE_OMIT_FOREIGN_KEY) */
+
+#ifndef SQLITE_OMIT_FOREIGN_KEY
+#ifndef SQLITE_OMIT_TRIGGER
+  case PragTyp_FOREIGN_KEY_CHECK: {
+    FKey *pFK;             /* A foreign key constraint */
+    Table *pTab;           /* Child table contain "REFERENCES" keyword */
+    Table *pParent;        /* Parent table that child points to */
+    Index *pIdx;           /* Index in the parent table */
+    int i;                 /* Loop counter:  Foreign key number for pTab */
+    int j;                 /* Loop counter:  Field of the foreign key */
+    HashElem *k;           /* Loop counter:  Next table in schema */
+    int x;                 /* result variable */
+    int regResult;         /* 3 registers to hold a result row */
+    int regKey;            /* Register to hold key for checking the FK */
+    int regRow;            /* Registers to hold a row from pTab */
+    int addrTop;           /* Top of a loop checking foreign keys */
+    int addrOk;            /* Jump here if the key is OK */
+    int *aiCols;           /* child to parent column mapping */
+
+    regResult = pParse->nMem+1;
+    pParse->nMem += 4;
+    regKey = ++pParse->nMem;
+    regRow = ++pParse->nMem;
+    v = sqlite3GetVdbe(pParse);
+    sqlite3VdbeSetNumCols(v, 4);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "table", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "rowid", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "parent", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 3, COLNAME_NAME, "fkid", SQLITE_STATIC);
+    sqlite3CodeVerifySchema(pParse, iDb);
+    k = sqliteHashFirst(&db->aDb[iDb].pSchema->tblHash);
+    while( k ){
+      if( zRight ){
+        pTab = sqlite3LocateTable(pParse, 0, zRight, zDb);
+        k = 0;
+      }else{
+        pTab = (Table*)sqliteHashData(k);
+        k = sqliteHashNext(k);
+      }
+      if( pTab==0 || pTab->pFKey==0 ) continue;
+      sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName);
+      if( pTab->nCol+regRow>pParse->nMem ) pParse->nMem = pTab->nCol + regRow;
+      sqlite3OpenTable(pParse, 0, iDb, pTab, OP_OpenRead);
+      sqlite3VdbeAddOp4(v, OP_String8, 0, regResult, 0, pTab->zName,
+                        P4_TRANSIENT);
+      for(i=1, pFK=pTab->pFKey; pFK; i++, pFK=pFK->pNextFrom){
+        pParent = sqlite3FindTable(db, pFK->zTo, zDb);
+        if( pParent==0 ) continue;
+        pIdx = 0;
+        sqlite3TableLock(pParse, iDb, pParent->tnum, 0, pParent->zName);
+        x = sqlite3FkLocateIndex(pParse, pParent, pFK, &pIdx, 0);
+        if( x==0 ){
+          if( pIdx==0 ){
+            sqlite3OpenTable(pParse, i, iDb, pParent, OP_OpenRead);
+          }else{
+            sqlite3VdbeAddOp3(v, OP_OpenRead, i, pIdx->tnum, iDb);
+            sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
+          }
+        }else{
+          k = 0;
+          break;
+        }
+      }
+      assert( pParse->nErr>0 || pFK==0 );
+      if( pFK ) break;
+      if( pParse->nTab<i ) pParse->nTab = i;
+      addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, 0); VdbeCoverage(v);
+      for(i=1, pFK=pTab->pFKey; pFK; i++, pFK=pFK->pNextFrom){
+        pParent = sqlite3FindTable(db, pFK->zTo, zDb);
+        pIdx = 0;
+        aiCols = 0;
+        if( pParent ){
+          x = sqlite3FkLocateIndex(pParse, pParent, pFK, &pIdx, &aiCols);
+          assert( x==0 );
+        }
+        addrOk = sqlite3VdbeMakeLabel(v);
+        if( pParent && pIdx==0 ){
+          int iKey = pFK->aCol[0].iFrom;
+          assert( iKey>=0 && iKey<pTab->nCol );
+          if( iKey!=pTab->iPKey ){
+            sqlite3VdbeAddOp3(v, OP_Column, 0, iKey, regRow);
+            sqlite3ColumnDefault(v, pTab, iKey, regRow);
+            sqlite3VdbeAddOp2(v, OP_IsNull, regRow, addrOk); VdbeCoverage(v);
+            sqlite3VdbeAddOp2(v, OP_MustBeInt, regRow, 
+               sqlite3VdbeCurrentAddr(v)+3); VdbeCoverage(v);
+          }else{
+            sqlite3VdbeAddOp2(v, OP_Rowid, 0, regRow);
+          }
+          sqlite3VdbeAddOp3(v, OP_NotExists, i, 0, regRow); VdbeCoverage(v);
+          sqlite3VdbeAddOp2(v, OP_Goto, 0, addrOk);
+          sqlite3VdbeJumpHere(v, sqlite3VdbeCurrentAddr(v)-2);
+        }else{
+          for(j=0; j<pFK->nCol; j++){
+            sqlite3ExprCodeGetColumnOfTable(v, pTab, 0,
+                            aiCols ? aiCols[j] : pFK->aCol[j].iFrom, regRow+j);
+            sqlite3VdbeAddOp2(v, OP_IsNull, regRow+j, addrOk); VdbeCoverage(v);
+          }
+          if( pParent ){
+            sqlite3VdbeAddOp4(v, OP_MakeRecord, regRow, pFK->nCol, regKey,
+                              sqlite3IndexAffinityStr(v,pIdx), pFK->nCol);
+            sqlite3VdbeAddOp4Int(v, OP_Found, i, addrOk, regKey, 0);
+            VdbeCoverage(v);
+          }
+        }
+        sqlite3VdbeAddOp2(v, OP_Rowid, 0, regResult+1);
+        sqlite3VdbeAddOp4(v, OP_String8, 0, regResult+2, 0, 
+                          pFK->zTo, P4_TRANSIENT);
+        sqlite3VdbeAddOp2(v, OP_Integer, i-1, regResult+3);
+        sqlite3VdbeAddOp2(v, OP_ResultRow, regResult, 4);
+        sqlite3VdbeResolveLabel(v, addrOk);
+        sqlite3DbFree(db, aiCols);
+      }
+      sqlite3VdbeAddOp2(v, OP_Next, 0, addrTop+1); VdbeCoverage(v);
+      sqlite3VdbeJumpHere(v, addrTop);
+    }
+  }
+  break;
+#endif /* !defined(SQLITE_OMIT_TRIGGER) */
+#endif /* !defined(SQLITE_OMIT_FOREIGN_KEY) */
+
+#ifndef NDEBUG
+  case PragTyp_PARSER_TRACE: {
+    if( zRight ){
+      if( sqlite3GetBoolean(zRight, 0) ){
+        sqlite3ParserTrace(stderr, "parser: ");
+      }else{
+        sqlite3ParserTrace(0, 0);
+      }
+    }
+  }
+  break;
+#endif
+
+  /* Reinstall the LIKE and GLOB functions.  The variant of LIKE
+  ** used will be case sensitive or not depending on the RHS.
+  */
+  case PragTyp_CASE_SENSITIVE_LIKE: {
+    if( zRight ){
+      sqlite3RegisterLikeFunctions(db, sqlite3GetBoolean(zRight, 0));
+    }
+  }
+  break;
+
+#ifndef SQLITE_INTEGRITY_CHECK_ERROR_MAX
+# define SQLITE_INTEGRITY_CHECK_ERROR_MAX 100
+#endif
+
+#ifndef SQLITE_OMIT_INTEGRITY_CHECK
+  /* Pragma "quick_check" is reduced version of 
+  ** integrity_check designed to detect most database corruption
+  ** without most of the overhead of a full integrity-check.
+  */
+  case PragTyp_INTEGRITY_CHECK: {
+    int i, j, addr, mxErr;
+
+    /* Code that appears at the end of the integrity check.  If no error
+    ** messages have been generated, output OK.  Otherwise output the
+    ** error message
+    */
+    static const int iLn = VDBE_OFFSET_LINENO(2);
+    static const VdbeOpList endCode[] = {
+      { OP_IfNeg,       1, 0,        0},    /* 0 */
+      { OP_String8,     0, 3,        0},    /* 1 */
+      { OP_ResultRow,   3, 1,        0},
+    };
+
+    int isQuick = (sqlite3Tolower(zLeft[0])=='q');
+
+    /* If the PRAGMA command was of the form "PRAGMA <db>.integrity_check",
+    ** then iDb is set to the index of the database identified by <db>.
+    ** In this case, the integrity of database iDb only is verified by
+    ** the VDBE created below.
+    **
+    ** Otherwise, if the command was simply "PRAGMA integrity_check" (or
+    ** "PRAGMA quick_check"), then iDb is set to 0. In this case, set iDb
+    ** to -1 here, to indicate that the VDBE should verify the integrity
+    ** of all attached databases.  */
+    assert( iDb>=0 );
+    assert( iDb==0 || pId2->z );
+    if( pId2->z==0 ) iDb = -1;
+
+    /* Initialize the VDBE program */
+    pParse->nMem = 6;
+    sqlite3VdbeSetNumCols(v, 1);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "integrity_check", SQLITE_STATIC);
+
+    /* Set the maximum error count */
+    mxErr = SQLITE_INTEGRITY_CHECK_ERROR_MAX;
+    if( zRight ){
+      sqlite3GetInt32(zRight, &mxErr);
+      if( mxErr<=0 ){
+        mxErr = SQLITE_INTEGRITY_CHECK_ERROR_MAX;
+      }
+    }
+    sqlite3VdbeAddOp2(v, OP_Integer, mxErr, 1);  /* reg[1] holds errors left */
+
+    /* Do an integrity check on each database file */
+    for(i=0; i<db->nDb; i++){
+      HashElem *x;
+      Hash *pTbls;
+      int cnt = 0;
+
+      if( OMIT_TEMPDB && i==1 ) continue;
+      if( iDb>=0 && i!=iDb ) continue;
+
+      sqlite3CodeVerifySchema(pParse, i);
+      addr = sqlite3VdbeAddOp1(v, OP_IfPos, 1); /* Halt if out of errors */
+      VdbeCoverage(v);
+      sqlite3VdbeAddOp2(v, OP_Halt, 0, 0);
+      sqlite3VdbeJumpHere(v, addr);
+
+      /* Do an integrity check of the B-Tree
+      **
+      ** Begin by filling registers 2, 3, ... with the root pages numbers
+      ** for all tables and indices in the database.
+      */
+      assert( sqlite3SchemaMutexHeld(db, i, 0) );
+      pTbls = &db->aDb[i].pSchema->tblHash;
+      for(x=sqliteHashFirst(pTbls); x; x=sqliteHashNext(x)){
+        Table *pTab = sqliteHashData(x);
+        Index *pIdx;
+        if( HasRowid(pTab) ){
+          sqlite3VdbeAddOp2(v, OP_Integer, pTab->tnum, 2+cnt);
+          VdbeComment((v, "%s", pTab->zName));
+          cnt++;
+        }
+        for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+          sqlite3VdbeAddOp2(v, OP_Integer, pIdx->tnum, 2+cnt);
+          VdbeComment((v, "%s", pIdx->zName));
+          cnt++;
+        }
+      }
+
+      /* Make sure sufficient number of registers have been allocated */
+      pParse->nMem = MAX( pParse->nMem, cnt+8 );
+
+      /* Do the b-tree integrity checks */
+      sqlite3VdbeAddOp3(v, OP_IntegrityCk, 2, cnt, 1);
+      sqlite3VdbeChangeP5(v, (u8)i);
+      addr = sqlite3VdbeAddOp1(v, OP_IsNull, 2); VdbeCoverage(v);
+      sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0,
+         sqlite3MPrintf(db, "*** in database %s ***\n", db->aDb[i].zName),
+         P4_DYNAMIC);
+      sqlite3VdbeAddOp3(v, OP_Move, 2, 4, 1);
+      sqlite3VdbeAddOp3(v, OP_Concat, 4, 3, 2);
+      sqlite3VdbeAddOp2(v, OP_ResultRow, 2, 1);
+      sqlite3VdbeJumpHere(v, addr);
+
+      /* Make sure all the indices are constructed correctly.
+      */
+      for(x=sqliteHashFirst(pTbls); x && !isQuick; x=sqliteHashNext(x)){
+        Table *pTab = sqliteHashData(x);
+        Index *pIdx, *pPk;
+        Index *pPrior = 0;
+        int loopTop;
+        int iDataCur, iIdxCur;
+        int r1 = -1;
+
+        if( pTab->pIndex==0 ) continue;
+        pPk = HasRowid(pTab) ? 0 : sqlite3PrimaryKeyIndex(pTab);
+        addr = sqlite3VdbeAddOp1(v, OP_IfPos, 1);  /* Stop if out of errors */
+        VdbeCoverage(v);
+        sqlite3VdbeAddOp2(v, OP_Halt, 0, 0);
+        sqlite3VdbeJumpHere(v, addr);
+        sqlite3ExprCacheClear(pParse);
+        sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenRead,
+                                   1, 0, &iDataCur, &iIdxCur);
+        sqlite3VdbeAddOp2(v, OP_Integer, 0, 7);
+        for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){
+          sqlite3VdbeAddOp2(v, OP_Integer, 0, 8+j); /* index entries counter */
+        }
+        pParse->nMem = MAX(pParse->nMem, 8+j);
+        sqlite3VdbeAddOp2(v, OP_Rewind, iDataCur, 0); VdbeCoverage(v);
+        loopTop = sqlite3VdbeAddOp2(v, OP_AddImm, 7, 1);
+        /* Verify that all NOT NULL columns really are NOT NULL */
+        for(j=0; j<pTab->nCol; j++){
+          char *zErr;
+          int jmp2, jmp3;
+          if( j==pTab->iPKey ) continue;
+          if( pTab->aCol[j].notNull==0 ) continue;
+          sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, j, 3);
+          sqlite3VdbeChangeP5(v, OPFLAG_TYPEOFARG);
+          jmp2 = sqlite3VdbeAddOp1(v, OP_NotNull, 3); VdbeCoverage(v);
+          sqlite3VdbeAddOp2(v, OP_AddImm, 1, -1); /* Decrement error limit */
+          zErr = sqlite3MPrintf(db, "NULL value in %s.%s", pTab->zName,
+                              pTab->aCol[j].zName);
+          sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, zErr, P4_DYNAMIC);
+          sqlite3VdbeAddOp2(v, OP_ResultRow, 3, 1);
+          jmp3 = sqlite3VdbeAddOp1(v, OP_IfPos, 1); VdbeCoverage(v);
+          sqlite3VdbeAddOp0(v, OP_Halt);
+          sqlite3VdbeJumpHere(v, jmp2);
+          sqlite3VdbeJumpHere(v, jmp3);
+        }
+        /* Validate index entries for the current row */
+        for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){
+          int jmp2, jmp3, jmp4, jmp5;
+          int ckUniq = sqlite3VdbeMakeLabel(v);
+          if( pPk==pIdx ) continue;
+          r1 = sqlite3GenerateIndexKey(pParse, pIdx, iDataCur, 0, 0, &jmp3,
+                                       pPrior, r1);
+          pPrior = pIdx;
+          sqlite3VdbeAddOp2(v, OP_AddImm, 8+j, 1);  /* increment entry count */
+          /* Verify that an index entry exists for the current table row */
+          jmp2 = sqlite3VdbeAddOp4Int(v, OP_Found, iIdxCur+j, ckUniq, r1,
+                                      pIdx->nColumn); VdbeCoverage(v);
+          sqlite3VdbeAddOp2(v, OP_AddImm, 1, -1); /* Decrement error limit */
+          sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, "row ", P4_STATIC);
+          sqlite3VdbeAddOp3(v, OP_Concat, 7, 3, 3);
+          sqlite3VdbeAddOp4(v, OP_String8, 0, 4, 0, 
+                            " missing from index ", P4_STATIC);
+          sqlite3VdbeAddOp3(v, OP_Concat, 4, 3, 3);
+          jmp5 = sqlite3VdbeAddOp4(v, OP_String8, 0, 4, 0,
+                                   pIdx->zName, P4_TRANSIENT);
+          sqlite3VdbeAddOp3(v, OP_Concat, 4, 3, 3);
+          sqlite3VdbeAddOp2(v, OP_ResultRow, 3, 1);
+          jmp4 = sqlite3VdbeAddOp1(v, OP_IfPos, 1); VdbeCoverage(v);
+          sqlite3VdbeAddOp0(v, OP_Halt);
+          sqlite3VdbeJumpHere(v, jmp2);
+          /* For UNIQUE indexes, verify that only one entry exists with the
+          ** current key.  The entry is unique if (1) any column is NULL
+          ** or (2) the next entry has a different key */
+          if( IsUniqueIndex(pIdx) ){
+            int uniqOk = sqlite3VdbeMakeLabel(v);
+            int jmp6;
+            int kk;
+            for(kk=0; kk<pIdx->nKeyCol; kk++){
+              int iCol = pIdx->aiColumn[kk];
+              assert( iCol>=0 && iCol<pTab->nCol );
+              if( pTab->aCol[iCol].notNull ) continue;
+              sqlite3VdbeAddOp2(v, OP_IsNull, r1+kk, uniqOk);
+              VdbeCoverage(v);
+            }
+            jmp6 = sqlite3VdbeAddOp1(v, OP_Next, iIdxCur+j); VdbeCoverage(v);
+            sqlite3VdbeAddOp2(v, OP_Goto, 0, uniqOk);
+            sqlite3VdbeJumpHere(v, jmp6);
+            sqlite3VdbeAddOp4Int(v, OP_IdxGT, iIdxCur+j, uniqOk, r1,
+                                 pIdx->nKeyCol); VdbeCoverage(v);
+            sqlite3VdbeAddOp2(v, OP_AddImm, 1, -1); /* Decrement error limit */
+            sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0,
+                              "non-unique entry in index ", P4_STATIC);
+            sqlite3VdbeAddOp2(v, OP_Goto, 0, jmp5);
+            sqlite3VdbeResolveLabel(v, uniqOk);
+          }
+          sqlite3VdbeJumpHere(v, jmp4);
+          sqlite3ResolvePartIdxLabel(pParse, jmp3);
+        }
+        sqlite3VdbeAddOp2(v, OP_Next, iDataCur, loopTop); VdbeCoverage(v);
+        sqlite3VdbeJumpHere(v, loopTop-1);
+#ifndef SQLITE_OMIT_BTREECOUNT
+        sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, 
+                     "wrong # of entries in index ", P4_STATIC);
+        for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){
+          if( pPk==pIdx ) continue;
+          addr = sqlite3VdbeCurrentAddr(v);
+          sqlite3VdbeAddOp2(v, OP_IfPos, 1, addr+2); VdbeCoverage(v);
+          sqlite3VdbeAddOp2(v, OP_Halt, 0, 0);
+          sqlite3VdbeAddOp2(v, OP_Count, iIdxCur+j, 3);
+          sqlite3VdbeAddOp3(v, OP_Eq, 8+j, addr+8, 3); VdbeCoverage(v);
+          sqlite3VdbeChangeP5(v, SQLITE_NOTNULL);
+          sqlite3VdbeAddOp2(v, OP_AddImm, 1, -1);
+          sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, pIdx->zName, P4_TRANSIENT);
+          sqlite3VdbeAddOp3(v, OP_Concat, 3, 2, 7);
+          sqlite3VdbeAddOp2(v, OP_ResultRow, 7, 1);
+        }
+#endif /* SQLITE_OMIT_BTREECOUNT */
+      } 
+    }
+    addr = sqlite3VdbeAddOpList(v, ArraySize(endCode), endCode, iLn);
+    sqlite3VdbeChangeP3(v, addr, -mxErr);
+    sqlite3VdbeJumpHere(v, addr);
+    sqlite3VdbeChangeP4(v, addr+1, "ok", P4_STATIC);
+  }
+  break;
+#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
+
+#ifndef SQLITE_OMIT_UTF16
+  /*
+  **   PRAGMA encoding
+  **   PRAGMA encoding = "utf-8"|"utf-16"|"utf-16le"|"utf-16be"
+  **
+  ** In its first form, this pragma returns the encoding of the main
+  ** database. If the database is not initialized, it is initialized now.
+  **
+  ** The second form of this pragma is a no-op if the main database file
+  ** has not already been initialized. In this case it sets the default
+  ** encoding that will be used for the main database file if a new file
+  ** is created. If an existing main database file is opened, then the
+  ** default text encoding for the existing database is used.
+  ** 
+  ** In all cases new databases created using the ATTACH command are
+  ** created to use the same default text encoding as the main database. If
+  ** the main database has not been initialized and/or created when ATTACH
+  ** is executed, this is done before the ATTACH operation.
+  **
+  ** In the second form this pragma sets the text encoding to be used in
+  ** new database files created using this database handle. It is only
+  ** useful if invoked immediately after the main database i
+  */
+  case PragTyp_ENCODING: {
+    static const struct EncName {
+      char *zName;
+      u8 enc;
+    } encnames[] = {
+      { "UTF8",     SQLITE_UTF8        },
+      { "UTF-8",    SQLITE_UTF8        },  /* Must be element [1] */
+      { "UTF-16le", SQLITE_UTF16LE     },  /* Must be element [2] */
+      { "UTF-16be", SQLITE_UTF16BE     },  /* Must be element [3] */
+      { "UTF16le",  SQLITE_UTF16LE     },
+      { "UTF16be",  SQLITE_UTF16BE     },
+      { "UTF-16",   0                  }, /* SQLITE_UTF16NATIVE */
+      { "UTF16",    0                  }, /* SQLITE_UTF16NATIVE */
+      { 0, 0 }
+    };
+    const struct EncName *pEnc;
+    if( !zRight ){    /* "PRAGMA encoding" */
+      if( sqlite3ReadSchema(pParse) ) goto pragma_out;
+      sqlite3VdbeSetNumCols(v, 1);
+      sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "encoding", SQLITE_STATIC);
+      sqlite3VdbeAddOp2(v, OP_String8, 0, 1);
+      assert( encnames[SQLITE_UTF8].enc==SQLITE_UTF8 );
+      assert( encnames[SQLITE_UTF16LE].enc==SQLITE_UTF16LE );
+      assert( encnames[SQLITE_UTF16BE].enc==SQLITE_UTF16BE );
+      sqlite3VdbeChangeP4(v, -1, encnames[ENC(pParse->db)].zName, P4_STATIC);
+      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
+    }else{                        /* "PRAGMA encoding = XXX" */
+      /* Only change the value of sqlite.enc if the database handle is not
+      ** initialized. If the main database exists, the new sqlite.enc value
+      ** will be overwritten when the schema is next loaded. If it does not
+      ** already exists, it will be created to use the new encoding value.
+      */
+      if( 
+        !(DbHasProperty(db, 0, DB_SchemaLoaded)) || 
+        DbHasProperty(db, 0, DB_Empty) 
+      ){
+        for(pEnc=&encnames[0]; pEnc->zName; pEnc++){
+          if( 0==sqlite3StrICmp(zRight, pEnc->zName) ){
+            SCHEMA_ENC(db) = ENC(db) =
+                pEnc->enc ? pEnc->enc : SQLITE_UTF16NATIVE;
+            break;
+          }
+        }
+        if( !pEnc->zName ){
+          sqlite3ErrorMsg(pParse, "unsupported encoding: %s", zRight);
+        }
+      }
+    }
+  }
+  break;
+#endif /* SQLITE_OMIT_UTF16 */
+
+#ifndef SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS
+  /*
+  **   PRAGMA [database.]schema_version
+  **   PRAGMA [database.]schema_version = <integer>
+  **
+  **   PRAGMA [database.]user_version
+  **   PRAGMA [database.]user_version = <integer>
+  **
+  **   PRAGMA [database.]freelist_count = <integer>
+  **
+  **   PRAGMA [database.]application_id
+  **   PRAGMA [database.]application_id = <integer>
+  **
+  ** The pragma's schema_version and user_version are used to set or get
+  ** the value of the schema-version and user-version, respectively. Both
+  ** the schema-version and the user-version are 32-bit signed integers
+  ** stored in the database header.
+  **
+  ** The schema-cookie is usually only manipulated internally by SQLite. It
+  ** is incremented by SQLite whenever the database schema is modified (by
+  ** creating or dropping a table or index). The schema version is used by
+  ** SQLite each time a query is executed to ensure that the internal cache
+  ** of the schema used when compiling the SQL query matches the schema of
+  ** the database against which the compiled query is actually executed.
+  ** Subverting this mechanism by using "PRAGMA schema_version" to modify
+  ** the schema-version is potentially dangerous and may lead to program
+  ** crashes or database corruption. Use with caution!
+  **
+  ** The user-version is not used internally by SQLite. It may be used by
+  ** applications for any purpose.
+  */
+  case PragTyp_HEADER_VALUE: {
+    int iCookie = aPragmaNames[mid].iArg;  /* Which cookie to read or write */
+    sqlite3VdbeUsesBtree(v, iDb);
+    if( zRight && (aPragmaNames[mid].mPragFlag & PragFlag_ReadOnly)==0 ){
+      /* Write the specified cookie value */
+      static const VdbeOpList setCookie[] = {
+        { OP_Transaction,    0,  1,  0},    /* 0 */
+        { OP_Integer,        0,  1,  0},    /* 1 */
+        { OP_SetCookie,      0,  0,  1},    /* 2 */
+      };
+      int addr = sqlite3VdbeAddOpList(v, ArraySize(setCookie), setCookie, 0);
+      sqlite3VdbeChangeP1(v, addr, iDb);
+      sqlite3VdbeChangeP1(v, addr+1, sqlite3Atoi(zRight));
+      sqlite3VdbeChangeP1(v, addr+2, iDb);
+      sqlite3VdbeChangeP2(v, addr+2, iCookie);
+    }else{
+      /* Read the specified cookie value */
+      static const VdbeOpList readCookie[] = {
+        { OP_Transaction,     0,  0,  0},    /* 0 */
+        { OP_ReadCookie,      0,  1,  0},    /* 1 */
+        { OP_ResultRow,       1,  1,  0}
+      };
+      int addr = sqlite3VdbeAddOpList(v, ArraySize(readCookie), readCookie, 0);
+      sqlite3VdbeChangeP1(v, addr, iDb);
+      sqlite3VdbeChangeP1(v, addr+1, iDb);
+      sqlite3VdbeChangeP3(v, addr+1, iCookie);
+      sqlite3VdbeSetNumCols(v, 1);
+      sqlite3VdbeSetColName(v, 0, COLNAME_NAME, zLeft, SQLITE_TRANSIENT);
+    }
+  }
+  break;
+#endif /* SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS */
+
+#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
+  /*
+  **   PRAGMA compile_options
+  **
+  ** Return the names of all compile-time options used in this build,
+  ** one option per row.
+  */
+  case PragTyp_COMPILE_OPTIONS: {
+    int i = 0;
+    const char *zOpt;
+    sqlite3VdbeSetNumCols(v, 1);
+    pParse->nMem = 1;
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "compile_option", SQLITE_STATIC);
+    while( (zOpt = sqlite3_compileoption_get(i++))!=0 ){
+      sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, zOpt, 0);
+      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
+    }
+  }
+  break;
+#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */
+
+#ifndef SQLITE_OMIT_WAL
+  /*
+  **   PRAGMA [database.]wal_checkpoint = passive|full|restart|truncate
+  **
+  ** Checkpoint the database.
+  */
+  case PragTyp_WAL_CHECKPOINT: {
+    int iBt = (pId2->z?iDb:SQLITE_MAX_ATTACHED);
+    int eMode = SQLITE_CHECKPOINT_PASSIVE;
+    if( zRight ){
+      if( sqlite3StrICmp(zRight, "full")==0 ){
+        eMode = SQLITE_CHECKPOINT_FULL;
+      }else if( sqlite3StrICmp(zRight, "restart")==0 ){
+        eMode = SQLITE_CHECKPOINT_RESTART;
+      }else if( sqlite3StrICmp(zRight, "truncate")==0 ){
+        eMode = SQLITE_CHECKPOINT_TRUNCATE;
+      }
+    }
+    sqlite3VdbeSetNumCols(v, 3);
+    pParse->nMem = 3;
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "busy", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "log", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 2, COLNAME_NAME, "checkpointed", SQLITE_STATIC);
+
+    sqlite3VdbeAddOp3(v, OP_Checkpoint, iBt, eMode, 1);
+    sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 3);
+  }
+  break;
+
+  /*
+  **   PRAGMA wal_autocheckpoint
+  **   PRAGMA wal_autocheckpoint = N
+  **
+  ** Configure a database connection to automatically checkpoint a database
+  ** after accumulating N frames in the log. Or query for the current value
+  ** of N.
+  */
+  case PragTyp_WAL_AUTOCHECKPOINT: {
+    if( zRight ){
+      sqlite3_wal_autocheckpoint(db, sqlite3Atoi(zRight));
+    }
+    returnSingleInt(pParse, "wal_autocheckpoint", 
+       db->xWalCallback==sqlite3WalDefaultHook ? 
+           SQLITE_PTR_TO_INT(db->pWalArg) : 0);
+  }
+  break;
+#endif
+
+  /*
+  **  PRAGMA shrink_memory
+  **
+  ** This pragma attempts to free as much memory as possible from the
+  ** current database connection.
+  */
+  case PragTyp_SHRINK_MEMORY: {
+    sqlite3_db_release_memory(db);
+    break;
+  }
+
+  /*
+  **   PRAGMA busy_timeout
+  **   PRAGMA busy_timeout = N
+  **
+  ** Call sqlite3_busy_timeout(db, N).  Return the current timeout value
+  ** if one is set.  If no busy handler or a different busy handler is set
+  ** then 0 is returned.  Setting the busy_timeout to 0 or negative
+  ** disables the timeout.
+  */
+  /*case PragTyp_BUSY_TIMEOUT*/ default: {
+    assert( aPragmaNames[mid].ePragTyp==PragTyp_BUSY_TIMEOUT );
+    if( zRight ){
+      sqlite3_busy_timeout(db, sqlite3Atoi(zRight));
+    }
+    returnSingleInt(pParse, "timeout",  db->busyTimeout);
+    break;
+  }
+
+  /*
+  **   PRAGMA soft_heap_limit
+  **   PRAGMA soft_heap_limit = N
+  **
+  ** Call sqlite3_soft_heap_limit64(N).  Return the result.  If N is omitted,
+  ** use -1.
+  */
+  case PragTyp_SOFT_HEAP_LIMIT: {
+    sqlite3_int64 N;
+    if( zRight && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK ){
+      sqlite3_soft_heap_limit64(N);
+    }
+    returnSingleInt(pParse, "soft_heap_limit",  sqlite3_soft_heap_limit64(-1));
+    break;
+  }
+
+  /*
+  **   PRAGMA threads
+  **   PRAGMA threads = N
+  **
+  ** Configure the maximum number of worker threads.  Return the new
+  ** maximum, which might be less than requested.
+  */
+  case PragTyp_THREADS: {
+    sqlite3_int64 N;
+    if( zRight
+     && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK
+     && N>=0
+    ){
+      sqlite3_limit(db, SQLITE_LIMIT_WORKER_THREADS, (int)(N&0x7fffffff));
+    }
+    returnSingleInt(pParse, "threads",
+                    sqlite3_limit(db, SQLITE_LIMIT_WORKER_THREADS, -1));
+    break;
+  }
+
+#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
+  /*
+  ** Report the current state of file logs for all databases
+  */
+  case PragTyp_LOCK_STATUS: {
+    static const char *const azLockName[] = {
+      "unlocked", "shared", "reserved", "pending", "exclusive"
+    };
+    int i;
+    sqlite3VdbeSetNumCols(v, 2);
+    pParse->nMem = 2;
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "database", SQLITE_STATIC);
+    sqlite3VdbeSetColName(v, 1, COLNAME_NAME, "status", SQLITE_STATIC);
+    for(i=0; i<db->nDb; i++){
+      Btree *pBt;
+      const char *zState = "unknown";
+      int j;
+      if( db->aDb[i].zName==0 ) continue;
+      sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, db->aDb[i].zName, P4_STATIC);
+      pBt = db->aDb[i].pBt;
+      if( pBt==0 || sqlite3BtreePager(pBt)==0 ){
+        zState = "closed";
+      }else if( sqlite3_file_control(db, i ? db->aDb[i].zName : 0, 
+                                     SQLITE_FCNTL_LOCKSTATE, &j)==SQLITE_OK ){
+         zState = azLockName[j];
+      }
+      sqlite3VdbeAddOp4(v, OP_String8, 0, 2, 0, zState, P4_STATIC);
+      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 2);
+    }
+    break;
+  }
+#endif
+
+#ifdef SQLITE_HAS_CODEC
+  case PragTyp_KEY: {
+    if( zRight ) sqlite3_key_v2(db, zDb, zRight, sqlite3Strlen30(zRight));
+    break;
+  }
+  case PragTyp_REKEY: {
+    if( zRight ) sqlite3_rekey_v2(db, zDb, zRight, sqlite3Strlen30(zRight));
+    break;
+  }
+  case PragTyp_HEXKEY: {
+    if( zRight ){
+      u8 iByte;
+      int i;
+      char zKey[40];
+      for(i=0, iByte=0; i<sizeof(zKey)*2 && sqlite3Isxdigit(zRight[i]); i++){
+        iByte = (iByte<<4) + sqlite3HexToInt(zRight[i]);
+        if( (i&1)!=0 ) zKey[i/2] = iByte;
+      }
+      if( (zLeft[3] & 0xf)==0xb ){
+        sqlite3_key_v2(db, zDb, zKey, i/2);
+      }else{
+        sqlite3_rekey_v2(db, zDb, zKey, i/2);
+      }
+    }
+    break;
+  }
+#endif
+#if defined(SQLITE_HAS_CODEC) || defined(SQLITE_ENABLE_CEROD)
+  case PragTyp_ACTIVATE_EXTENSIONS: if( zRight ){
+#ifdef SQLITE_HAS_CODEC
+    if( sqlite3StrNICmp(zRight, "see-", 4)==0 ){
+      sqlite3_activate_see(&zRight[4]);
+    }
+#endif
+#ifdef SQLITE_ENABLE_CEROD
+    if( sqlite3StrNICmp(zRight, "cerod-", 6)==0 ){
+      sqlite3_activate_cerod(&zRight[6]);
+    }
+#endif
+  }
+  break;
+#endif
+
+  } /* End of the PRAGMA switch */
+
+pragma_out:
+  sqlite3DbFree(db, zLeft);
+  sqlite3DbFree(db, zRight);
+}
+
+#endif /* SQLITE_OMIT_PRAGMA */
+
+/************** End of pragma.c **********************************************/
+/************** Begin file prepare.c *****************************************/
+/*
+** 2005 May 25
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the implementation of the sqlite3_prepare()
+** interface, and routines that contribute to loading the database schema
+** from disk.
+*/
+
+/*
+** Fill the InitData structure with an error message that indicates
+** that the database is corrupt.
+*/
+static void corruptSchema(
+  InitData *pData,     /* Initialization context */
+  const char *zObj,    /* Object being parsed at the point of error */
+  const char *zExtra   /* Error information */
+){
+  sqlite3 *db = pData->db;
+  if( !db->mallocFailed && (db->flags & SQLITE_RecoveryMode)==0 ){
+    if( zObj==0 ) zObj = "?";
+    sqlite3SetString(pData->pzErrMsg, db,
+      "malformed database schema (%s)", zObj);
+    if( zExtra ){
+      *pData->pzErrMsg = sqlite3MAppendf(db, *pData->pzErrMsg, 
+                                 "%s - %s", *pData->pzErrMsg, zExtra);
+    }
+  }
+  pData->rc = db->mallocFailed ? SQLITE_NOMEM : SQLITE_CORRUPT_BKPT;
+}
+
+/*
+** This is the callback routine for the code that initializes the
+** database.  See sqlite3Init() below for additional information.
+** This routine is also called from the OP_ParseSchema opcode of the VDBE.
+**
+** Each callback contains the following information:
+**
+**     argv[0] = name of thing being created
+**     argv[1] = root page number for table or index. 0 for trigger or view.
+**     argv[2] = SQL text for the CREATE statement.
+**
+*/
+SQLITE_PRIVATE int sqlite3InitCallback(void *pInit, int argc, char **argv, char **NotUsed){
+  InitData *pData = (InitData*)pInit;
+  sqlite3 *db = pData->db;
+  int iDb = pData->iDb;
+
+  assert( argc==3 );
+  UNUSED_PARAMETER2(NotUsed, argc);
+  assert( sqlite3_mutex_held(db->mutex) );
+  DbClearProperty(db, iDb, DB_Empty);
+  if( db->mallocFailed ){
+    corruptSchema(pData, argv[0], 0);
+    return 1;
+  }
+
+  assert( iDb>=0 && iDb<db->nDb );
+  if( argv==0 ) return 0;   /* Might happen if EMPTY_RESULT_CALLBACKS are on */
+  if( argv[1]==0 ){
+    corruptSchema(pData, argv[0], 0);
+  }else if( argv[2] && argv[2][0] ){
+    /* Call the parser to process a CREATE TABLE, INDEX or VIEW.
+    ** But because db->init.busy is set to 1, no VDBE code is generated
+    ** or executed.  All the parser does is build the internal data
+    ** structures that describe the table, index, or view.
+    */
+    int rc;
+    sqlite3_stmt *pStmt;
+    TESTONLY(int rcp);            /* Return code from sqlite3_prepare() */
+
+    assert( db->init.busy );
+    db->init.iDb = iDb;
+    db->init.newTnum = sqlite3Atoi(argv[1]);
+    db->init.orphanTrigger = 0;
+    TESTONLY(rcp = ) sqlite3_prepare(db, argv[2], -1, &pStmt, 0);
+    rc = db->errCode;
+    assert( (rc&0xFF)==(rcp&0xFF) );
+    db->init.iDb = 0;
+    if( SQLITE_OK!=rc ){
+      if( db->init.orphanTrigger ){
+        assert( iDb==1 );
+      }else{
+        pData->rc = rc;
+        if( rc==SQLITE_NOMEM ){
+          db->mallocFailed = 1;
+        }else if( rc!=SQLITE_INTERRUPT && (rc&0xFF)!=SQLITE_LOCKED ){
+          corruptSchema(pData, argv[0], sqlite3_errmsg(db));
+        }
+      }
+    }
+    sqlite3_finalize(pStmt);
+  }else if( argv[0]==0 ){
+    corruptSchema(pData, 0, 0);
+  }else{
+    /* If the SQL column is blank it means this is an index that
+    ** was created to be the PRIMARY KEY or to fulfill a UNIQUE
+    ** constraint for a CREATE TABLE.  The index should have already
+    ** been created when we processed the CREATE TABLE.  All we have
+    ** to do here is record the root page number for that index.
+    */
+    Index *pIndex;
+    pIndex = sqlite3FindIndex(db, argv[0], db->aDb[iDb].zName);
+    if( pIndex==0 ){
+      /* This can occur if there exists an index on a TEMP table which
+      ** has the same name as another index on a permanent index.  Since
+      ** the permanent table is hidden by the TEMP table, we can also
+      ** safely ignore the index on the permanent table.
+      */
+      /* Do Nothing */;
+    }else if( sqlite3GetInt32(argv[1], &pIndex->tnum)==0 ){
+      corruptSchema(pData, argv[0], "invalid rootpage");
+    }
+  }
+  return 0;
+}
+
+/*
+** Attempt to read the database schema and initialize internal
+** data structures for a single database file.  The index of the
+** database file is given by iDb.  iDb==0 is used for the main
+** database.  iDb==1 should never be used.  iDb>=2 is used for
+** auxiliary databases.  Return one of the SQLITE_ error codes to
+** indicate success or failure.
+*/
+static int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg){
+  int rc;
+  int i;
+#ifndef SQLITE_OMIT_DEPRECATED
+  int size;
+#endif
+  Table *pTab;
+  Db *pDb;
+  char const *azArg[4];
+  int meta[5];
+  InitData initData;
+  char const *zMasterSchema;
+  char const *zMasterName;
+  int openedTransaction = 0;
+
+  /*
+  ** The master database table has a structure like this
+  */
+  static const char master_schema[] = 
+     "CREATE TABLE sqlite_master(\n"
+     "  type text,\n"
+     "  name text,\n"
+     "  tbl_name text,\n"
+     "  rootpage integer,\n"
+     "  sql text\n"
+     ")"
+  ;
+#ifndef SQLITE_OMIT_TEMPDB
+  static const char temp_master_schema[] = 
+     "CREATE TEMP TABLE sqlite_temp_master(\n"
+     "  type text,\n"
+     "  name text,\n"
+     "  tbl_name text,\n"
+     "  rootpage integer,\n"
+     "  sql text\n"
+     ")"
+  ;
+#else
+  #define temp_master_schema 0
+#endif
+
+  assert( iDb>=0 && iDb<db->nDb );
+  assert( db->aDb[iDb].pSchema );
+  assert( sqlite3_mutex_held(db->mutex) );
+  assert( iDb==1 || sqlite3BtreeHoldsMutex(db->aDb[iDb].pBt) );
+
+  /* zMasterSchema and zInitScript are set to point at the master schema
+  ** and initialisation script appropriate for the database being
+  ** initialized. zMasterName is the name of the master table.
+  */
+  if( !OMIT_TEMPDB && iDb==1 ){
+    zMasterSchema = temp_master_schema;
+  }else{
+    zMasterSchema = master_schema;
+  }
+  zMasterName = SCHEMA_TABLE(iDb);
+
+  /* Construct the schema tables.  */
+  azArg[0] = zMasterName;
+  azArg[1] = "1";
+  azArg[2] = zMasterSchema;
+  azArg[3] = 0;
+  initData.db = db;
+  initData.iDb = iDb;
+  initData.rc = SQLITE_OK;
+  initData.pzErrMsg = pzErrMsg;
+  sqlite3InitCallback(&initData, 3, (char **)azArg, 0);
+  if( initData.rc ){
+    rc = initData.rc;
+    goto error_out;
+  }
+  pTab = sqlite3FindTable(db, zMasterName, db->aDb[iDb].zName);
+  if( ALWAYS(pTab) ){
+    pTab->tabFlags |= TF_Readonly;
+  }
+
+  /* Create a cursor to hold the database open
+  */
+  pDb = &db->aDb[iDb];
+  if( pDb->pBt==0 ){
+    if( !OMIT_TEMPDB && ALWAYS(iDb==1) ){
+      DbSetProperty(db, 1, DB_SchemaLoaded);
+    }
+    return SQLITE_OK;
+  }
+
+  /* If there is not already a read-only (or read-write) transaction opened
+  ** on the b-tree database, open one now. If a transaction is opened, it 
+  ** will be closed before this function returns.  */
+  sqlite3BtreeEnter(pDb->pBt);
+  if( !sqlite3BtreeIsInReadTrans(pDb->pBt) ){
+    rc = sqlite3BtreeBeginTrans(pDb->pBt, 0);
+    if( rc!=SQLITE_OK ){
+      sqlite3SetString(pzErrMsg, db, "%s", sqlite3ErrStr(rc));
+      goto initone_error_out;
+    }
+    openedTransaction = 1;
+  }
+
+  /* Get the database meta information.
+  **
+  ** Meta values are as follows:
+  **    meta[0]   Schema cookie.  Changes with each schema change.
+  **    meta[1]   File format of schema layer.
+  **    meta[2]   Size of the page cache.
+  **    meta[3]   Largest rootpage (auto/incr_vacuum mode)
+  **    meta[4]   Db text encoding. 1:UTF-8 2:UTF-16LE 3:UTF-16BE
+  **    meta[5]   User version
+  **    meta[6]   Incremental vacuum mode
+  **    meta[7]   unused
+  **    meta[8]   unused
+  **    meta[9]   unused
+  **
+  ** Note: The #defined SQLITE_UTF* symbols in sqliteInt.h correspond to
+  ** the possible values of meta[4].
+  */
+  for(i=0; i<ArraySize(meta); i++){
+    sqlite3BtreeGetMeta(pDb->pBt, i+1, (u32 *)&meta[i]);
+  }
+  pDb->pSchema->schema_cookie = meta[BTREE_SCHEMA_VERSION-1];
+
+  /* If opening a non-empty database, check the text encoding. For the
+  ** main database, set sqlite3.enc to the encoding of the main database.
+  ** For an attached db, it is an error if the encoding is not the same
+  ** as sqlite3.enc.
+  */
+  if( meta[BTREE_TEXT_ENCODING-1] ){  /* text encoding */
+    if( iDb==0 ){
+#ifndef SQLITE_OMIT_UTF16
+      u8 encoding;
+      /* If opening the main database, set ENC(db). */
+      encoding = (u8)meta[BTREE_TEXT_ENCODING-1] & 3;
+      if( encoding==0 ) encoding = SQLITE_UTF8;
+      ENC(db) = encoding;
+#else
+      ENC(db) = SQLITE_UTF8;
+#endif
+    }else{
+      /* If opening an attached database, the encoding much match ENC(db) */
+      if( meta[BTREE_TEXT_ENCODING-1]!=ENC(db) ){
+        sqlite3SetString(pzErrMsg, db, "attached databases must use the same"
+            " text encoding as main database");
+        rc = SQLITE_ERROR;
+        goto initone_error_out;
+      }
+    }
+  }else{
+    DbSetProperty(db, iDb, DB_Empty);
+  }
+  pDb->pSchema->enc = ENC(db);
+
+  if( pDb->pSchema->cache_size==0 ){
+#ifndef SQLITE_OMIT_DEPRECATED
+    size = sqlite3AbsInt32(meta[BTREE_DEFAULT_CACHE_SIZE-1]);
+    if( size==0 ){ size = SQLITE_DEFAULT_CACHE_SIZE; }
+    pDb->pSchema->cache_size = size;
+#else
+    pDb->pSchema->cache_size = SQLITE_DEFAULT_CACHE_SIZE;
+#endif
+    sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size);
+  }
+
+  /*
+  ** file_format==1    Version 3.0.0.
+  ** file_format==2    Version 3.1.3.  // ALTER TABLE ADD COLUMN
+  ** file_format==3    Version 3.1.4.  // ditto but with non-NULL defaults
+  ** file_format==4    Version 3.3.0.  // DESC indices.  Boolean constants
+  */
+  pDb->pSchema->file_format = (u8)meta[BTREE_FILE_FORMAT-1];
+  if( pDb->pSchema->file_format==0 ){
+    pDb->pSchema->file_format = 1;
+  }
+  if( pDb->pSchema->file_format>SQLITE_MAX_FILE_FORMAT ){
+    sqlite3SetString(pzErrMsg, db, "unsupported file format");
+    rc = SQLITE_ERROR;
+    goto initone_error_out;
+  }
+
+  /* Ticket #2804:  When we open a database in the newer file format,
+  ** clear the legacy_file_format pragma flag so that a VACUUM will
+  ** not downgrade the database and thus invalidate any descending
+  ** indices that the user might have created.
+  */
+  if( iDb==0 && meta[BTREE_FILE_FORMAT-1]>=4 ){
+    db->flags &= ~SQLITE_LegacyFileFmt;
+  }
+
+  /* Read the schema information out of the schema tables
+  */
+  assert( db->init.busy );
+  {
+    char *zSql;
+    zSql = sqlite3MPrintf(db, 
+        "SELECT name, rootpage, sql FROM '%q'.%s ORDER BY rowid",
+        db->aDb[iDb].zName, zMasterName);
+#ifndef SQLITE_OMIT_AUTHORIZATION
+    {
+      sqlite3_xauth xAuth;
+      xAuth = db->xAuth;
+      db->xAuth = 0;
+#endif
+      rc = sqlite3_exec(db, zSql, sqlite3InitCallback, &initData, 0);
+#ifndef SQLITE_OMIT_AUTHORIZATION
+      db->xAuth = xAuth;
+    }
+#endif
+    if( rc==SQLITE_OK ) rc = initData.rc;
+    sqlite3DbFree(db, zSql);
+#ifndef SQLITE_OMIT_ANALYZE
+    if( rc==SQLITE_OK ){
+      sqlite3AnalysisLoad(db, iDb);
+    }
+#endif
+  }
+  if( db->mallocFailed ){
+    rc = SQLITE_NOMEM;
+    sqlite3ResetAllSchemasOfConnection(db);
+  }
+  if( rc==SQLITE_OK || (db->flags&SQLITE_RecoveryMode)){
+    /* Black magic: If the SQLITE_RecoveryMode flag is set, then consider
+    ** the schema loaded, even if errors occurred. In this situation the 
+    ** current sqlite3_prepare() operation will fail, but the following one
+    ** will attempt to compile the supplied statement against whatever subset
+    ** of the schema was loaded before the error occurred. The primary
+    ** purpose of this is to allow access to the sqlite_master table
+    ** even when its contents have been corrupted.
+    */
+    DbSetProperty(db, iDb, DB_SchemaLoaded);
+    rc = SQLITE_OK;
+  }
+
+  /* Jump here for an error that occurs after successfully allocating
+  ** curMain and calling sqlite3BtreeEnter(). For an error that occurs
+  ** before that point, jump to error_out.
+  */
+initone_error_out:
+  if( openedTransaction ){
+    sqlite3BtreeCommit(pDb->pBt);
+  }
+  sqlite3BtreeLeave(pDb->pBt);
+
+error_out:
+  if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){
+    db->mallocFailed = 1;
+  }
+  return rc;
+}
+
+/*
+** Initialize all database files - the main database file, the file
+** used to store temporary tables, and any additional database files
+** created using ATTACH statements.  Return a success code.  If an
+** error occurs, write an error message into *pzErrMsg.
+**
+** After a database is initialized, the DB_SchemaLoaded bit is set
+** bit is set in the flags field of the Db structure. If the database
+** file was of zero-length, then the DB_Empty flag is also set.
+*/
+SQLITE_PRIVATE int sqlite3Init(sqlite3 *db, char **pzErrMsg){
+  int i, rc;
+  int commit_internal = !(db->flags&SQLITE_InternChanges);
+  
+  assert( sqlite3_mutex_held(db->mutex) );
+  assert( sqlite3BtreeHoldsMutex(db->aDb[0].pBt) );
+  assert( db->init.busy==0 );
+  rc = SQLITE_OK;
+  db->init.busy = 1;
+  ENC(db) = SCHEMA_ENC(db);
+  for(i=0; rc==SQLITE_OK && i<db->nDb; i++){
+    if( DbHasProperty(db, i, DB_SchemaLoaded) || i==1 ) continue;
+    rc = sqlite3InitOne(db, i, pzErrMsg);
+    if( rc ){
+      sqlite3ResetOneSchema(db, i);
+    }
+  }
+
+  /* Once all the other databases have been initialized, load the schema
+  ** for the TEMP database. This is loaded last, as the TEMP database
+  ** schema may contain references to objects in other databases.
+  */
+#ifndef SQLITE_OMIT_TEMPDB
+  assert( db->nDb>1 );
+  if( rc==SQLITE_OK && !DbHasProperty(db, 1, DB_SchemaLoaded) ){
+    rc = sqlite3InitOne(db, 1, pzErrMsg);
+    if( rc ){
+      sqlite3ResetOneSchema(db, 1);
+    }
+  }
+#endif
+
+  db->init.busy = 0;
+  if( rc==SQLITE_OK && commit_internal ){
+    sqlite3CommitInternalChanges(db);
+  }
+
+  return rc; 
+}
+
+/*
+** This routine is a no-op if the database schema is already initialized.
+** Otherwise, the schema is loaded. An error code is returned.
+*/
+SQLITE_PRIVATE int sqlite3ReadSchema(Parse *pParse){
+  int rc = SQLITE_OK;
+  sqlite3 *db = pParse->db;
+  assert( sqlite3_mutex_held(db->mutex) );
+  if( !db->init.busy ){
+    rc = sqlite3Init(db, &pParse->zErrMsg);
+  }
+  if( rc!=SQLITE_OK ){
+    pParse->rc = rc;
+    pParse->nErr++;
+  }
+  return rc;
+}
+
+
+/*
+** Check schema cookies in all databases.  If any cookie is out
+** of date set pParse->rc to SQLITE_SCHEMA.  If all schema cookies
+** make no changes to pParse->rc.
+*/
+static void schemaIsValid(Parse *pParse){
+  sqlite3 *db = pParse->db;
+  int iDb;
+  int rc;
+  int cookie;
+
+  assert( pParse->checkSchema );
+  assert( sqlite3_mutex_held(db->mutex) );
+  for(iDb=0; iDb<db->nDb; iDb++){
+    int openedTransaction = 0;         /* True if a transaction is opened */
+    Btree *pBt = db->aDb[iDb].pBt;     /* Btree database to read cookie from */
+    if( pBt==0 ) continue;
+
+    /* If there is not already a read-only (or read-write) transaction opened
+    ** on the b-tree database, open one now. If a transaction is opened, it 
+    ** will be closed immediately after reading the meta-value. */
+    if( !sqlite3BtreeIsInReadTrans(pBt) ){
+      rc = sqlite3BtreeBeginTrans(pBt, 0);
+      if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){
+        db->mallocFailed = 1;
+      }
+      if( rc!=SQLITE_OK ) return;
+      openedTransaction = 1;
+    }
+
+    /* Read the schema cookie from the database. If it does not match the 
+    ** value stored as part of the in-memory schema representation,
+    ** set Parse.rc to SQLITE_SCHEMA. */
+    sqlite3BtreeGetMeta(pBt, BTREE_SCHEMA_VERSION, (u32 *)&cookie);
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    if( cookie!=db->aDb[iDb].pSchema->schema_cookie ){
+      sqlite3ResetOneSchema(db, iDb);
+      pParse->rc = SQLITE_SCHEMA;
+    }
+
+    /* Close the transaction, if one was opened. */
+    if( openedTransaction ){
+      sqlite3BtreeCommit(pBt);
+    }
+  }
+}
+
+/*
+** Convert a schema pointer into the iDb index that indicates
+** which database file in db->aDb[] the schema refers to.
+**
+** If the same database is attached more than once, the first
+** attached database is returned.
+*/
+SQLITE_PRIVATE int sqlite3SchemaToIndex(sqlite3 *db, Schema *pSchema){
+  int i = -1000000;
+
+  /* If pSchema is NULL, then return -1000000. This happens when code in 
+  ** expr.c is trying to resolve a reference to a transient table (i.e. one
+  ** created by a sub-select). In this case the return value of this 
+  ** function should never be used.
+  **
+  ** We return -1000000 instead of the more usual -1 simply because using
+  ** -1000000 as the incorrect index into db->aDb[] is much 
+  ** more likely to cause a segfault than -1 (of course there are assert()
+  ** statements too, but it never hurts to play the odds).
+  */
+  assert( sqlite3_mutex_held(db->mutex) );
+  if( pSchema ){
+    for(i=0; ALWAYS(i<db->nDb); i++){
+      if( db->aDb[i].pSchema==pSchema ){
+        break;
+      }
+    }
+    assert( i>=0 && i<db->nDb );
+  }
+  return i;
+}
+
+/*
+** Free all memory allocations in the pParse object
+*/
+SQLITE_PRIVATE void sqlite3ParserReset(Parse *pParse){
+  if( pParse ){
+    sqlite3 *db = pParse->db;
+    sqlite3DbFree(db, pParse->aLabel);
+    sqlite3ExprListDelete(db, pParse->pConstExpr);
+  }
+}
+
+/*
+** Compile the UTF-8 encoded SQL statement zSql into a statement handle.
+*/
+static int sqlite3Prepare(
+  sqlite3 *db,              /* Database handle. */
+  const char *zSql,         /* UTF-8 encoded SQL statement. */
+  int nBytes,               /* Length of zSql in bytes. */
+  int saveSqlFlag,          /* True to copy SQL text into the sqlite3_stmt */
+  Vdbe *pReprepare,         /* VM being reprepared */
+  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
+  const char **pzTail       /* OUT: End of parsed string */
+){
+  Parse *pParse;            /* Parsing context */
+  char *zErrMsg = 0;        /* Error message */
+  int rc = SQLITE_OK;       /* Result code */
+  int i;                    /* Loop counter */
+
+  /* Allocate the parsing context */
+  pParse = sqlite3StackAllocZero(db, sizeof(*pParse));
+  if( pParse==0 ){
+    rc = SQLITE_NOMEM;
+    goto end_prepare;
+  }
+  pParse->pReprepare = pReprepare;
+  assert( ppStmt && *ppStmt==0 );
+  assert( !db->mallocFailed );
+  assert( sqlite3_mutex_held(db->mutex) );
+
+  /* Check to verify that it is possible to get a read lock on all
+  ** database schemas.  The inability to get a read lock indicates that
+  ** some other database connection is holding a write-lock, which in
+  ** turn means that the other connection has made uncommitted changes
+  ** to the schema.
+  **
+  ** Were we to proceed and prepare the statement against the uncommitted
+  ** schema changes and if those schema changes are subsequently rolled
+  ** back and different changes are made in their place, then when this
+  ** prepared statement goes to run the schema cookie would fail to detect
+  ** the schema change.  Disaster would follow.
+  **
+  ** This thread is currently holding mutexes on all Btrees (because
+  ** of the sqlite3BtreeEnterAll() in sqlite3LockAndPrepare()) so it
+  ** is not possible for another thread to start a new schema change
+  ** while this routine is running.  Hence, we do not need to hold 
+  ** locks on the schema, we just need to make sure nobody else is 
+  ** holding them.
+  **
+  ** Note that setting READ_UNCOMMITTED overrides most lock detection,
+  ** but it does *not* override schema lock detection, so this all still
+  ** works even if READ_UNCOMMITTED is set.
+  */
+  for(i=0; i<db->nDb; i++) {
+    Btree *pBt = db->aDb[i].pBt;
+    if( pBt ){
+      assert( sqlite3BtreeHoldsMutex(pBt) );
+      rc = sqlite3BtreeSchemaLocked(pBt);
+      if( rc ){
+        const char *zDb = db->aDb[i].zName;
+        sqlite3ErrorWithMsg(db, rc, "database schema is locked: %s", zDb);
+        testcase( db->flags & SQLITE_ReadUncommitted );
+        goto end_prepare;
+      }
+    }
+  }
+
+  sqlite3VtabUnlockList(db);
+
+  pParse->db = db;
+  pParse->nQueryLoop = 0;  /* Logarithmic, so 0 really means 1 */
+  if( nBytes>=0 && (nBytes==0 || zSql[nBytes-1]!=0) ){
+    char *zSqlCopy;
+    int mxLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH];
+    testcase( nBytes==mxLen );
+    testcase( nBytes==mxLen+1 );
+    if( nBytes>mxLen ){
+      sqlite3ErrorWithMsg(db, SQLITE_TOOBIG, "statement too long");
+      rc = sqlite3ApiExit(db, SQLITE_TOOBIG);
+      goto end_prepare;
+    }
+    zSqlCopy = sqlite3DbStrNDup(db, zSql, nBytes);
+    if( zSqlCopy ){
+      sqlite3RunParser(pParse, zSqlCopy, &zErrMsg);
+      sqlite3DbFree(db, zSqlCopy);
+      pParse->zTail = &zSql[pParse->zTail-zSqlCopy];
+    }else{
+      pParse->zTail = &zSql[nBytes];
+    }
+  }else{
+    sqlite3RunParser(pParse, zSql, &zErrMsg);
+  }
+  assert( 0==pParse->nQueryLoop );
+
+  if( db->mallocFailed ){
+    pParse->rc = SQLITE_NOMEM;
+  }
+  if( pParse->rc==SQLITE_DONE ) pParse->rc = SQLITE_OK;
+  if( pParse->checkSchema ){
+    schemaIsValid(pParse);
+  }
+  if( db->mallocFailed ){
+    pParse->rc = SQLITE_NOMEM;
+  }
+  if( pzTail ){
+    *pzTail = pParse->zTail;
+  }
+  rc = pParse->rc;
+
+#ifndef SQLITE_OMIT_EXPLAIN
+  if( rc==SQLITE_OK && pParse->pVdbe && pParse->explain ){
+    static const char * const azColName[] = {
+       "addr", "opcode", "p1", "p2", "p3", "p4", "p5", "comment",
+       "selectid", "order", "from", "detail"
+    };
+    int iFirst, mx;
+    if( pParse->explain==2 ){
+      sqlite3VdbeSetNumCols(pParse->pVdbe, 4);
+      iFirst = 8;
+      mx = 12;
+    }else{
+      sqlite3VdbeSetNumCols(pParse->pVdbe, 8);
+      iFirst = 0;
+      mx = 8;
+    }
+    for(i=iFirst; i<mx; i++){
+      sqlite3VdbeSetColName(pParse->pVdbe, i-iFirst, COLNAME_NAME,
+                            azColName[i], SQLITE_STATIC);
+    }
+  }
+#endif
+
+  if( db->init.busy==0 ){
+    Vdbe *pVdbe = pParse->pVdbe;
+    sqlite3VdbeSetSql(pVdbe, zSql, (int)(pParse->zTail-zSql), saveSqlFlag);
+  }
+  if( pParse->pVdbe && (rc!=SQLITE_OK || db->mallocFailed) ){
+    sqlite3VdbeFinalize(pParse->pVdbe);
+    assert(!(*ppStmt));
+  }else{
+    *ppStmt = (sqlite3_stmt*)pParse->pVdbe;
+  }
+
+  if( zErrMsg ){
+    sqlite3ErrorWithMsg(db, rc, "%s", zErrMsg);
+    sqlite3DbFree(db, zErrMsg);
+  }else{
+    sqlite3Error(db, rc);
+  }
+
+  /* Delete any TriggerPrg structures allocated while parsing this statement. */
+  while( pParse->pTriggerPrg ){
+    TriggerPrg *pT = pParse->pTriggerPrg;
+    pParse->pTriggerPrg = pT->pNext;
+    sqlite3DbFree(db, pT);
+  }
+
+end_prepare:
+
+  sqlite3ParserReset(pParse);
+  sqlite3StackFree(db, pParse);
+  rc = sqlite3ApiExit(db, rc);
+  assert( (rc&db->errMask)==rc );
+  return rc;
+}
+static int sqlite3LockAndPrepare(
+  sqlite3 *db,              /* Database handle. */
+  const char *zSql,         /* UTF-8 encoded SQL statement. */
+  int nBytes,               /* Length of zSql in bytes. */
+  int saveSqlFlag,          /* True to copy SQL text into the sqlite3_stmt */
+  Vdbe *pOld,               /* VM being reprepared */
+  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
+  const char **pzTail       /* OUT: End of parsed string */
+){
+  int rc;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( ppStmt==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  *ppStmt = 0;
+  if( !sqlite3SafetyCheckOk(db)||zSql==0 ){
+    return SQLITE_MISUSE_BKPT;
+  }
+  sqlite3_mutex_enter(db->mutex);
+  sqlite3BtreeEnterAll(db);
+  rc = sqlite3Prepare(db, zSql, nBytes, saveSqlFlag, pOld, ppStmt, pzTail);
+  if( rc==SQLITE_SCHEMA ){
+    sqlite3_finalize(*ppStmt);
+    rc = sqlite3Prepare(db, zSql, nBytes, saveSqlFlag, pOld, ppStmt, pzTail);
+  }
+  sqlite3BtreeLeaveAll(db);
+  sqlite3_mutex_leave(db->mutex);
+  assert( rc==SQLITE_OK || *ppStmt==0 );
+  return rc;
+}
+
+/*
+** Rerun the compilation of a statement after a schema change.
+**
+** If the statement is successfully recompiled, return SQLITE_OK. Otherwise,
+** if the statement cannot be recompiled because another connection has
+** locked the sqlite3_master table, return SQLITE_LOCKED. If any other error
+** occurs, return SQLITE_SCHEMA.
+*/
+SQLITE_PRIVATE int sqlite3Reprepare(Vdbe *p){
+  int rc;
+  sqlite3_stmt *pNew;
+  const char *zSql;
+  sqlite3 *db;
+
+  assert( sqlite3_mutex_held(sqlite3VdbeDb(p)->mutex) );
+  zSql = sqlite3_sql((sqlite3_stmt *)p);
+  assert( zSql!=0 );  /* Reprepare only called for prepare_v2() statements */
+  db = sqlite3VdbeDb(p);
+  assert( sqlite3_mutex_held(db->mutex) );
+  rc = sqlite3LockAndPrepare(db, zSql, -1, 0, p, &pNew, 0);
+  if( rc ){
+    if( rc==SQLITE_NOMEM ){
+      db->mallocFailed = 1;
+    }
+    assert( pNew==0 );
+    return rc;
+  }else{
+    assert( pNew!=0 );
+  }
+  sqlite3VdbeSwap((Vdbe*)pNew, p);
+  sqlite3TransferBindings(pNew, (sqlite3_stmt*)p);
+  sqlite3VdbeResetStepResult((Vdbe*)pNew);
+  sqlite3VdbeFinalize((Vdbe*)pNew);
+  return SQLITE_OK;
+}
+
+
+/*
+** Two versions of the official API.  Legacy and new use.  In the legacy
+** version, the original SQL text is not saved in the prepared statement
+** and so if a schema change occurs, SQLITE_SCHEMA is returned by
+** sqlite3_step().  In the new version, the original SQL text is retained
+** and the statement is automatically recompiled if an schema change
+** occurs.
+*/
+SQLITE_API int sqlite3_prepare(
+  sqlite3 *db,              /* Database handle. */
+  const char *zSql,         /* UTF-8 encoded SQL statement. */
+  int nBytes,               /* Length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
+  const char **pzTail       /* OUT: End of parsed string */
+){
+  int rc;
+  rc = sqlite3LockAndPrepare(db,zSql,nBytes,0,0,ppStmt,pzTail);
+  assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 );  /* VERIFY: F13021 */
+  return rc;
+}
+SQLITE_API int sqlite3_prepare_v2(
+  sqlite3 *db,              /* Database handle. */
+  const char *zSql,         /* UTF-8 encoded SQL statement. */
+  int nBytes,               /* Length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
+  const char **pzTail       /* OUT: End of parsed string */
+){
+  int rc;
+  rc = sqlite3LockAndPrepare(db,zSql,nBytes,1,0,ppStmt,pzTail);
+  assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 );  /* VERIFY: F13021 */
+  return rc;
+}
+
+
+#ifndef SQLITE_OMIT_UTF16
+/*
+** Compile the UTF-16 encoded SQL statement zSql into a statement handle.
+*/
+static int sqlite3Prepare16(
+  sqlite3 *db,              /* Database handle. */ 
+  const void *zSql,         /* UTF-16 encoded SQL statement. */
+  int nBytes,               /* Length of zSql in bytes. */
+  int saveSqlFlag,          /* True to save SQL text into the sqlite3_stmt */
+  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
+  const void **pzTail       /* OUT: End of parsed string */
+){
+  /* This function currently works by first transforming the UTF-16
+  ** encoded string to UTF-8, then invoking sqlite3_prepare(). The
+  ** tricky bit is figuring out the pointer to return in *pzTail.
+  */
+  char *zSql8;
+  const char *zTail8 = 0;
+  int rc = SQLITE_OK;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( ppStmt==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  *ppStmt = 0;
+  if( !sqlite3SafetyCheckOk(db)||zSql==0 ){
+    return SQLITE_MISUSE_BKPT;
+  }
+  if( nBytes>=0 ){
+    int sz;
+    const char *z = (const char*)zSql;
+    for(sz=0; sz<nBytes && (z[sz]!=0 || z[sz+1]!=0); sz += 2){}
+    nBytes = sz;
+  }
+  sqlite3_mutex_enter(db->mutex);
+  zSql8 = sqlite3Utf16to8(db, zSql, nBytes, SQLITE_UTF16NATIVE);
+  if( zSql8 ){
+    rc = sqlite3LockAndPrepare(db, zSql8, -1, saveSqlFlag, 0, ppStmt, &zTail8);
+  }
+
+  if( zTail8 && pzTail ){
+    /* If sqlite3_prepare returns a tail pointer, we calculate the
+    ** equivalent pointer into the UTF-16 string by counting the unicode
+    ** characters between zSql8 and zTail8, and then returning a pointer
+    ** the same number of characters into the UTF-16 string.
+    */
+    int chars_parsed = sqlite3Utf8CharLen(zSql8, (int)(zTail8-zSql8));
+    *pzTail = (u8 *)zSql + sqlite3Utf16ByteLen(zSql, chars_parsed);
+  }
+  sqlite3DbFree(db, zSql8); 
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/*
+** Two versions of the official API.  Legacy and new use.  In the legacy
+** version, the original SQL text is not saved in the prepared statement
+** and so if a schema change occurs, SQLITE_SCHEMA is returned by
+** sqlite3_step().  In the new version, the original SQL text is retained
+** and the statement is automatically recompiled if an schema change
+** occurs.
+*/
+SQLITE_API int sqlite3_prepare16(
+  sqlite3 *db,              /* Database handle. */ 
+  const void *zSql,         /* UTF-16 encoded SQL statement. */
+  int nBytes,               /* Length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
+  const void **pzTail       /* OUT: End of parsed string */
+){
+  int rc;
+  rc = sqlite3Prepare16(db,zSql,nBytes,0,ppStmt,pzTail);
+  assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 );  /* VERIFY: F13021 */
+  return rc;
+}
+SQLITE_API int sqlite3_prepare16_v2(
+  sqlite3 *db,              /* Database handle. */ 
+  const void *zSql,         /* UTF-16 encoded SQL statement. */
+  int nBytes,               /* Length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,    /* OUT: A pointer to the prepared statement */
+  const void **pzTail       /* OUT: End of parsed string */
+){
+  int rc;
+  rc = sqlite3Prepare16(db,zSql,nBytes,1,ppStmt,pzTail);
+  assert( rc==SQLITE_OK || ppStmt==0 || *ppStmt==0 );  /* VERIFY: F13021 */
+  return rc;
+}
+
+#endif /* SQLITE_OMIT_UTF16 */
+
+/************** End of prepare.c *********************************************/
+/************** Begin file select.c ******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains C code routines that are called by the parser
+** to handle SELECT statements in SQLite.
+*/
+
+/*
+** Trace output macros
+*/
+#if SELECTTRACE_ENABLED
+/***/ int sqlite3SelectTrace = 0;
+# define SELECTTRACE(K,P,S,X)  \
+  if(sqlite3SelectTrace&(K))   \
+    sqlite3DebugPrintf("%*s%s.%p: ",(P)->nSelectIndent*2-2,"",(S)->zSelName,(S)),\
+    sqlite3DebugPrintf X
+#else
+# define SELECTTRACE(K,P,S,X)
+#endif
+
+
+/*
+** An instance of the following object is used to record information about
+** how to process the DISTINCT keyword, to simplify passing that information
+** into the selectInnerLoop() routine.
+*/
+typedef struct DistinctCtx DistinctCtx;
+struct DistinctCtx {
+  u8 isTnct;      /* True if the DISTINCT keyword is present */
+  u8 eTnctType;   /* One of the WHERE_DISTINCT_* operators */
+  int tabTnct;    /* Ephemeral table used for DISTINCT processing */
+  int addrTnct;   /* Address of OP_OpenEphemeral opcode for tabTnct */
+};
+
+/*
+** An instance of the following object is used to record information about
+** the ORDER BY (or GROUP BY) clause of query is being coded.
+*/
+typedef struct SortCtx SortCtx;
+struct SortCtx {
+  ExprList *pOrderBy;   /* The ORDER BY (or GROUP BY clause) */
+  int nOBSat;           /* Number of ORDER BY terms satisfied by indices */
+  int iECursor;         /* Cursor number for the sorter */
+  int regReturn;        /* Register holding block-output return address */
+  int labelBkOut;       /* Start label for the block-output subroutine */
+  int addrSortIndex;    /* Address of the OP_SorterOpen or OP_OpenEphemeral */
+  u8 sortFlags;         /* Zero or more SORTFLAG_* bits */
+};
+#define SORTFLAG_UseSorter  0x01   /* Use SorterOpen instead of OpenEphemeral */
+
+/*
+** Delete all the content of a Select structure.  Deallocate the structure
+** itself only if bFree is true.
+*/
+static void clearSelect(sqlite3 *db, Select *p, int bFree){
+  while( p ){
+    Select *pPrior = p->pPrior;
+    sqlite3ExprListDelete(db, p->pEList);
+    sqlite3SrcListDelete(db, p->pSrc);
+    sqlite3ExprDelete(db, p->pWhere);
+    sqlite3ExprListDelete(db, p->pGroupBy);
+    sqlite3ExprDelete(db, p->pHaving);
+    sqlite3ExprListDelete(db, p->pOrderBy);
+    sqlite3ExprDelete(db, p->pLimit);
+    sqlite3ExprDelete(db, p->pOffset);
+    sqlite3WithDelete(db, p->pWith);
+    if( bFree ) sqlite3DbFree(db, p);
+    p = pPrior;
+    bFree = 1;
+  }
+}
+
+/*
+** Initialize a SelectDest structure.
+*/
+SQLITE_PRIVATE void sqlite3SelectDestInit(SelectDest *pDest, int eDest, int iParm){
+  pDest->eDest = (u8)eDest;
+  pDest->iSDParm = iParm;
+  pDest->affSdst = 0;
+  pDest->iSdst = 0;
+  pDest->nSdst = 0;
+}
+
+
+/*
+** Allocate a new Select structure and return a pointer to that
+** structure.
+*/
+SQLITE_PRIVATE Select *sqlite3SelectNew(
+  Parse *pParse,        /* Parsing context */
+  ExprList *pEList,     /* which columns to include in the result */
+  SrcList *pSrc,        /* the FROM clause -- which tables to scan */
+  Expr *pWhere,         /* the WHERE clause */
+  ExprList *pGroupBy,   /* the GROUP BY clause */
+  Expr *pHaving,        /* the HAVING clause */
+  ExprList *pOrderBy,   /* the ORDER BY clause */
+  u16 selFlags,         /* Flag parameters, such as SF_Distinct */
+  Expr *pLimit,         /* LIMIT value.  NULL means not used */
+  Expr *pOffset         /* OFFSET value.  NULL means no offset */
+){
+  Select *pNew;
+  Select standin;
+  sqlite3 *db = pParse->db;
+  pNew = sqlite3DbMallocZero(db, sizeof(*pNew) );
+  assert( db->mallocFailed || !pOffset || pLimit ); /* OFFSET implies LIMIT */
+  if( pNew==0 ){
+    assert( db->mallocFailed );
+    pNew = &standin;
+    memset(pNew, 0, sizeof(*pNew));
+  }
+  if( pEList==0 ){
+    pEList = sqlite3ExprListAppend(pParse, 0, sqlite3Expr(db,TK_ALL,0));
+  }
+  pNew->pEList = pEList;
+  if( pSrc==0 ) pSrc = sqlite3DbMallocZero(db, sizeof(*pSrc));
+  pNew->pSrc = pSrc;
+  pNew->pWhere = pWhere;
+  pNew->pGroupBy = pGroupBy;
+  pNew->pHaving = pHaving;
+  pNew->pOrderBy = pOrderBy;
+  pNew->selFlags = selFlags;
+  pNew->op = TK_SELECT;
+  pNew->pLimit = pLimit;
+  pNew->pOffset = pOffset;
+  assert( pOffset==0 || pLimit!=0 );
+  pNew->addrOpenEphm[0] = -1;
+  pNew->addrOpenEphm[1] = -1;
+  if( db->mallocFailed ) {
+    clearSelect(db, pNew, pNew!=&standin);
+    pNew = 0;
+  }else{
+    assert( pNew->pSrc!=0 || pParse->nErr>0 );
+  }
+  assert( pNew!=&standin );
+  return pNew;
+}
+
+#if SELECTTRACE_ENABLED
+/*
+** Set the name of a Select object
+*/
+SQLITE_PRIVATE void sqlite3SelectSetName(Select *p, const char *zName){
+  if( p && zName ){
+    sqlite3_snprintf(sizeof(p->zSelName), p->zSelName, "%s", zName);
+  }
+}
+#endif
+
+
+/*
+** Delete the given Select structure and all of its substructures.
+*/
+SQLITE_PRIVATE void sqlite3SelectDelete(sqlite3 *db, Select *p){
+  clearSelect(db, p, 1);
+}
+
+/*
+** Return a pointer to the right-most SELECT statement in a compound.
+*/
+static Select *findRightmost(Select *p){
+  while( p->pNext ) p = p->pNext;
+  return p;
+}
+
+/*
+** Given 1 to 3 identifiers preceding the JOIN keyword, determine the
+** type of join.  Return an integer constant that expresses that type
+** in terms of the following bit values:
+**
+**     JT_INNER
+**     JT_CROSS
+**     JT_OUTER
+**     JT_NATURAL
+**     JT_LEFT
+**     JT_RIGHT
+**
+** A full outer join is the combination of JT_LEFT and JT_RIGHT.
+**
+** If an illegal or unsupported join type is seen, then still return
+** a join type, but put an error in the pParse structure.
+*/
+SQLITE_PRIVATE int sqlite3JoinType(Parse *pParse, Token *pA, Token *pB, Token *pC){
+  int jointype = 0;
+  Token *apAll[3];
+  Token *p;
+                             /*   0123456789 123456789 123456789 123 */
+  static const char zKeyText[] = "naturaleftouterightfullinnercross";
+  static const struct {
+    u8 i;        /* Beginning of keyword text in zKeyText[] */
+    u8 nChar;    /* Length of the keyword in characters */
+    u8 code;     /* Join type mask */
+  } aKeyword[] = {
+    /* natural */ { 0,  7, JT_NATURAL                },
+    /* left    */ { 6,  4, JT_LEFT|JT_OUTER          },
+    /* outer   */ { 10, 5, JT_OUTER                  },
+    /* right   */ { 14, 5, JT_RIGHT|JT_OUTER         },
+    /* full    */ { 19, 4, JT_LEFT|JT_RIGHT|JT_OUTER },
+    /* inner   */ { 23, 5, JT_INNER                  },
+    /* cross   */ { 28, 5, JT_INNER|JT_CROSS         },
+  };
+  int i, j;
+  apAll[0] = pA;
+  apAll[1] = pB;
+  apAll[2] = pC;
+  for(i=0; i<3 && apAll[i]; i++){
+    p = apAll[i];
+    for(j=0; j<ArraySize(aKeyword); j++){
+      if( p->n==aKeyword[j].nChar 
+          && sqlite3StrNICmp((char*)p->z, &zKeyText[aKeyword[j].i], p->n)==0 ){
+        jointype |= aKeyword[j].code;
+        break;
+      }
+    }
+    testcase( j==0 || j==1 || j==2 || j==3 || j==4 || j==5 || j==6 );
+    if( j>=ArraySize(aKeyword) ){
+      jointype |= JT_ERROR;
+      break;
+    }
+  }
+  if(
+     (jointype & (JT_INNER|JT_OUTER))==(JT_INNER|JT_OUTER) ||
+     (jointype & JT_ERROR)!=0
+  ){
+    const char *zSp = " ";
+    assert( pB!=0 );
+    if( pC==0 ){ zSp++; }
+    sqlite3ErrorMsg(pParse, "unknown or unsupported join type: "
+       "%T %T%s%T", pA, pB, zSp, pC);
+    jointype = JT_INNER;
+  }else if( (jointype & JT_OUTER)!=0 
+         && (jointype & (JT_LEFT|JT_RIGHT))!=JT_LEFT ){
+    sqlite3ErrorMsg(pParse, 
+      "RIGHT and FULL OUTER JOINs are not currently supported");
+    jointype = JT_INNER;
+  }
+  return jointype;
+}
+
+/*
+** Return the index of a column in a table.  Return -1 if the column
+** is not contained in the table.
+*/
+static int columnIndex(Table *pTab, const char *zCol){
+  int i;
+  for(i=0; i<pTab->nCol; i++){
+    if( sqlite3StrICmp(pTab->aCol[i].zName, zCol)==0 ) return i;
+  }
+  return -1;
+}
+
+/*
+** Search the first N tables in pSrc, from left to right, looking for a
+** table that has a column named zCol.  
+**
+** When found, set *piTab and *piCol to the table index and column index
+** of the matching column and return TRUE.
+**
+** If not found, return FALSE.
+*/
+static int tableAndColumnIndex(
+  SrcList *pSrc,       /* Array of tables to search */
+  int N,               /* Number of tables in pSrc->a[] to search */
+  const char *zCol,    /* Name of the column we are looking for */
+  int *piTab,          /* Write index of pSrc->a[] here */
+  int *piCol           /* Write index of pSrc->a[*piTab].pTab->aCol[] here */
+){
+  int i;               /* For looping over tables in pSrc */
+  int iCol;            /* Index of column matching zCol */
+
+  assert( (piTab==0)==(piCol==0) );  /* Both or neither are NULL */
+  for(i=0; i<N; i++){
+    iCol = columnIndex(pSrc->a[i].pTab, zCol);
+    if( iCol>=0 ){
+      if( piTab ){
+        *piTab = i;
+        *piCol = iCol;
+      }
+      return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+** This function is used to add terms implied by JOIN syntax to the
+** WHERE clause expression of a SELECT statement. The new term, which
+** is ANDed with the existing WHERE clause, is of the form:
+**
+**    (tab1.col1 = tab2.col2)
+**
+** where tab1 is the iSrc'th table in SrcList pSrc and tab2 is the 
+** (iSrc+1)'th. Column col1 is column iColLeft of tab1, and col2 is
+** column iColRight of tab2.
+*/
+static void addWhereTerm(
+  Parse *pParse,                  /* Parsing context */
+  SrcList *pSrc,                  /* List of tables in FROM clause */
+  int iLeft,                      /* Index of first table to join in pSrc */
+  int iColLeft,                   /* Index of column in first table */
+  int iRight,                     /* Index of second table in pSrc */
+  int iColRight,                  /* Index of column in second table */
+  int isOuterJoin,                /* True if this is an OUTER join */
+  Expr **ppWhere                  /* IN/OUT: The WHERE clause to add to */
+){
+  sqlite3 *db = pParse->db;
+  Expr *pE1;
+  Expr *pE2;
+  Expr *pEq;
+
+  assert( iLeft<iRight );
+  assert( pSrc->nSrc>iRight );
+  assert( pSrc->a[iLeft].pTab );
+  assert( pSrc->a[iRight].pTab );
+
+  pE1 = sqlite3CreateColumnExpr(db, pSrc, iLeft, iColLeft);
+  pE2 = sqlite3CreateColumnExpr(db, pSrc, iRight, iColRight);
+
+  pEq = sqlite3PExpr(pParse, TK_EQ, pE1, pE2, 0);
+  if( pEq && isOuterJoin ){
+    ExprSetProperty(pEq, EP_FromJoin);
+    assert( !ExprHasProperty(pEq, EP_TokenOnly|EP_Reduced) );
+    ExprSetVVAProperty(pEq, EP_NoReduce);
+    pEq->iRightJoinTable = (i16)pE2->iTable;
+  }
+  *ppWhere = sqlite3ExprAnd(db, *ppWhere, pEq);
+}
+
+/*
+** Set the EP_FromJoin property on all terms of the given expression.
+** And set the Expr.iRightJoinTable to iTable for every term in the
+** expression.
+**
+** The EP_FromJoin property is used on terms of an expression to tell
+** the LEFT OUTER JOIN processing logic that this term is part of the
+** join restriction specified in the ON or USING clause and not a part
+** of the more general WHERE clause.  These terms are moved over to the
+** WHERE clause during join processing but we need to remember that they
+** originated in the ON or USING clause.
+**
+** The Expr.iRightJoinTable tells the WHERE clause processing that the
+** expression depends on table iRightJoinTable even if that table is not
+** explicitly mentioned in the expression.  That information is needed
+** for cases like this:
+**
+**    SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.b AND t1.x=5
+**
+** The where clause needs to defer the handling of the t1.x=5
+** term until after the t2 loop of the join.  In that way, a
+** NULL t2 row will be inserted whenever t1.x!=5.  If we do not
+** defer the handling of t1.x=5, it will be processed immediately
+** after the t1 loop and rows with t1.x!=5 will never appear in
+** the output, which is incorrect.
+*/
+static void setJoinExpr(Expr *p, int iTable){
+  while( p ){
+    ExprSetProperty(p, EP_FromJoin);
+    assert( !ExprHasProperty(p, EP_TokenOnly|EP_Reduced) );
+    ExprSetVVAProperty(p, EP_NoReduce);
+    p->iRightJoinTable = (i16)iTable;
+    setJoinExpr(p->pLeft, iTable);
+    p = p->pRight;
+  } 
+}
+
+/*
+** This routine processes the join information for a SELECT statement.
+** ON and USING clauses are converted into extra terms of the WHERE clause.
+** NATURAL joins also create extra WHERE clause terms.
+**
+** The terms of a FROM clause are contained in the Select.pSrc structure.
+** The left most table is the first entry in Select.pSrc.  The right-most
+** table is the last entry.  The join operator is held in the entry to
+** the left.  Thus entry 0 contains the join operator for the join between
+** entries 0 and 1.  Any ON or USING clauses associated with the join are
+** also attached to the left entry.
+**
+** This routine returns the number of errors encountered.
+*/
+static int sqliteProcessJoin(Parse *pParse, Select *p){
+  SrcList *pSrc;                  /* All tables in the FROM clause */
+  int i, j;                       /* Loop counters */
+  struct SrcList_item *pLeft;     /* Left table being joined */
+  struct SrcList_item *pRight;    /* Right table being joined */
+
+  pSrc = p->pSrc;
+  pLeft = &pSrc->a[0];
+  pRight = &pLeft[1];
+  for(i=0; i<pSrc->nSrc-1; i++, pRight++, pLeft++){
+    Table *pLeftTab = pLeft->pTab;
+    Table *pRightTab = pRight->pTab;
+    int isOuter;
+
+    if( NEVER(pLeftTab==0 || pRightTab==0) ) continue;
+    isOuter = (pRight->jointype & JT_OUTER)!=0;
+
+    /* When the NATURAL keyword is present, add WHERE clause terms for
+    ** every column that the two tables have in common.
+    */
+    if( pRight->jointype & JT_NATURAL ){
+      if( pRight->pOn || pRight->pUsing ){
+        sqlite3ErrorMsg(pParse, "a NATURAL join may not have "
+           "an ON or USING clause", 0);
+        return 1;
+      }
+      for(j=0; j<pRightTab->nCol; j++){
+        char *zName;   /* Name of column in the right table */
+        int iLeft;     /* Matching left table */
+        int iLeftCol;  /* Matching column in the left table */
+
+        zName = pRightTab->aCol[j].zName;
+        if( tableAndColumnIndex(pSrc, i+1, zName, &iLeft, &iLeftCol) ){
+          addWhereTerm(pParse, pSrc, iLeft, iLeftCol, i+1, j,
+                       isOuter, &p->pWhere);
+        }
+      }
+    }
+
+    /* Disallow both ON and USING clauses in the same join
+    */
+    if( pRight->pOn && pRight->pUsing ){
+      sqlite3ErrorMsg(pParse, "cannot have both ON and USING "
+        "clauses in the same join");
+      return 1;
+    }
+
+    /* Add the ON clause to the end of the WHERE clause, connected by
+    ** an AND operator.
+    */
+    if( pRight->pOn ){
+      if( isOuter ) setJoinExpr(pRight->pOn, pRight->iCursor);
+      p->pWhere = sqlite3ExprAnd(pParse->db, p->pWhere, pRight->pOn);
+      pRight->pOn = 0;
+    }
+
+    /* Create extra terms on the WHERE clause for each column named
+    ** in the USING clause.  Example: If the two tables to be joined are 
+    ** A and B and the USING clause names X, Y, and Z, then add this
+    ** to the WHERE clause:    A.X=B.X AND A.Y=B.Y AND A.Z=B.Z
+    ** Report an error if any column mentioned in the USING clause is
+    ** not contained in both tables to be joined.
+    */
+    if( pRight->pUsing ){
+      IdList *pList = pRight->pUsing;
+      for(j=0; j<pList->nId; j++){
+        char *zName;     /* Name of the term in the USING clause */
+        int iLeft;       /* Table on the left with matching column name */
+        int iLeftCol;    /* Column number of matching column on the left */
+        int iRightCol;   /* Column number of matching column on the right */
+
+        zName = pList->a[j].zName;
+        iRightCol = columnIndex(pRightTab, zName);
+        if( iRightCol<0
+         || !tableAndColumnIndex(pSrc, i+1, zName, &iLeft, &iLeftCol)
+        ){
+          sqlite3ErrorMsg(pParse, "cannot join using column %s - column "
+            "not present in both tables", zName);
+          return 1;
+        }
+        addWhereTerm(pParse, pSrc, iLeft, iLeftCol, i+1, iRightCol,
+                     isOuter, &p->pWhere);
+      }
+    }
+  }
+  return 0;
+}
+
+/* Forward reference */
+static KeyInfo *keyInfoFromExprList(
+  Parse *pParse,       /* Parsing context */
+  ExprList *pList,     /* Form the KeyInfo object from this ExprList */
+  int iStart,          /* Begin with this column of pList */
+  int nExtra           /* Add this many extra columns to the end */
+);
+
+/*
+** Generate code that will push the record in registers regData
+** through regData+nData-1 onto the sorter.
+*/
+static void pushOntoSorter(
+  Parse *pParse,         /* Parser context */
+  SortCtx *pSort,        /* Information about the ORDER BY clause */
+  Select *pSelect,       /* The whole SELECT statement */
+  int regData,           /* First register holding data to be sorted */
+  int nData,             /* Number of elements in the data array */
+  int nPrefixReg         /* No. of reg prior to regData available for use */
+){
+  Vdbe *v = pParse->pVdbe;                         /* Stmt under construction */
+  int bSeq = ((pSort->sortFlags & SORTFLAG_UseSorter)==0);
+  int nExpr = pSort->pOrderBy->nExpr;              /* No. of ORDER BY terms */
+  int nBase = nExpr + bSeq + nData;                /* Fields in sorter record */
+  int regBase;                                     /* Regs for sorter record */
+  int regRecord = ++pParse->nMem;                  /* Assembled sorter record */
+  int nOBSat = pSort->nOBSat;                      /* ORDER BY terms to skip */
+  int op;                            /* Opcode to add sorter record to sorter */
+
+  assert( bSeq==0 || bSeq==1 );
+  if( nPrefixReg ){
+    assert( nPrefixReg==nExpr+bSeq );
+    regBase = regData - nExpr - bSeq;
+  }else{
+    regBase = pParse->nMem + 1;
+    pParse->nMem += nBase;
+  }
+  sqlite3ExprCodeExprList(pParse, pSort->pOrderBy, regBase, SQLITE_ECEL_DUP);
+  if( bSeq ){
+    sqlite3VdbeAddOp2(v, OP_Sequence, pSort->iECursor, regBase+nExpr);
+  }
+  if( nPrefixReg==0 ){
+    sqlite3ExprCodeMove(pParse, regData, regBase+nExpr+bSeq, nData);
+  }
+
+  sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase+nOBSat, nBase-nOBSat, regRecord);
+  if( nOBSat>0 ){
+    int regPrevKey;   /* The first nOBSat columns of the previous row */
+    int addrFirst;    /* Address of the OP_IfNot opcode */
+    int addrJmp;      /* Address of the OP_Jump opcode */
+    VdbeOp *pOp;      /* Opcode that opens the sorter */
+    int nKey;         /* Number of sorting key columns, including OP_Sequence */
+    KeyInfo *pKI;     /* Original KeyInfo on the sorter table */
+
+    regPrevKey = pParse->nMem+1;
+    pParse->nMem += pSort->nOBSat;
+    nKey = nExpr - pSort->nOBSat + bSeq;
+    if( bSeq ){
+      addrFirst = sqlite3VdbeAddOp1(v, OP_IfNot, regBase+nExpr); 
+    }else{
+      addrFirst = sqlite3VdbeAddOp1(v, OP_SequenceTest, pSort->iECursor);
+    }
+    VdbeCoverage(v);
+    sqlite3VdbeAddOp3(v, OP_Compare, regPrevKey, regBase, pSort->nOBSat);
+    pOp = sqlite3VdbeGetOp(v, pSort->addrSortIndex);
+    if( pParse->db->mallocFailed ) return;
+    pOp->p2 = nKey + nData;
+    pKI = pOp->p4.pKeyInfo;
+    memset(pKI->aSortOrder, 0, pKI->nField); /* Makes OP_Jump below testable */
+    sqlite3VdbeChangeP4(v, -1, (char*)pKI, P4_KEYINFO);
+    testcase( pKI->nXField>2 );
+    pOp->p4.pKeyInfo = keyInfoFromExprList(pParse, pSort->pOrderBy, nOBSat,
+                                           pKI->nXField-1);
+    addrJmp = sqlite3VdbeCurrentAddr(v);
+    sqlite3VdbeAddOp3(v, OP_Jump, addrJmp+1, 0, addrJmp+1); VdbeCoverage(v);
+    pSort->labelBkOut = sqlite3VdbeMakeLabel(v);
+    pSort->regReturn = ++pParse->nMem;
+    sqlite3VdbeAddOp2(v, OP_Gosub, pSort->regReturn, pSort->labelBkOut);
+    sqlite3VdbeAddOp1(v, OP_ResetSorter, pSort->iECursor);
+    sqlite3VdbeJumpHere(v, addrFirst);
+    sqlite3ExprCodeMove(pParse, regBase, regPrevKey, pSort->nOBSat);
+    sqlite3VdbeJumpHere(v, addrJmp);
+  }
+  if( pSort->sortFlags & SORTFLAG_UseSorter ){
+    op = OP_SorterInsert;
+  }else{
+    op = OP_IdxInsert;
+  }
+  sqlite3VdbeAddOp2(v, op, pSort->iECursor, regRecord);
+  if( pSelect->iLimit ){
+    int addr1, addr2;
+    int iLimit;
+    if( pSelect->iOffset ){
+      iLimit = pSelect->iOffset+1;
+    }else{
+      iLimit = pSelect->iLimit;
+    }
+    addr1 = sqlite3VdbeAddOp1(v, OP_IfZero, iLimit); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_AddImm, iLimit, -1);
+    addr2 = sqlite3VdbeAddOp0(v, OP_Goto);
+    sqlite3VdbeJumpHere(v, addr1);
+    sqlite3VdbeAddOp1(v, OP_Last, pSort->iECursor);
+    sqlite3VdbeAddOp1(v, OP_Delete, pSort->iECursor);
+    sqlite3VdbeJumpHere(v, addr2);
+  }
+}
+
+/*
+** Add code to implement the OFFSET
+*/
+static void codeOffset(
+  Vdbe *v,          /* Generate code into this VM */
+  int iOffset,      /* Register holding the offset counter */
+  int iContinue     /* Jump here to skip the current record */
+){
+  if( iOffset>0 ){
+    int addr;
+    addr = sqlite3VdbeAddOp3(v, OP_IfNeg, iOffset, 0, -1); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, iContinue);
+    VdbeComment((v, "skip OFFSET records"));
+    sqlite3VdbeJumpHere(v, addr);
+  }
+}
+
+/*
+** Add code that will check to make sure the N registers starting at iMem
+** form a distinct entry.  iTab is a sorting index that holds previously
+** seen combinations of the N values.  A new entry is made in iTab
+** if the current N values are new.
+**
+** A jump to addrRepeat is made and the N+1 values are popped from the
+** stack if the top N elements are not distinct.
+*/
+static void codeDistinct(
+  Parse *pParse,     /* Parsing and code generating context */
+  int iTab,          /* A sorting index used to test for distinctness */
+  int addrRepeat,    /* Jump to here if not distinct */
+  int N,             /* Number of elements */
+  int iMem           /* First element */
+){
+  Vdbe *v;
+  int r1;
+
+  v = pParse->pVdbe;
+  r1 = sqlite3GetTempReg(pParse);
+  sqlite3VdbeAddOp4Int(v, OP_Found, iTab, addrRepeat, iMem, N); VdbeCoverage(v);
+  sqlite3VdbeAddOp3(v, OP_MakeRecord, iMem, N, r1);
+  sqlite3VdbeAddOp2(v, OP_IdxInsert, iTab, r1);
+  sqlite3ReleaseTempReg(pParse, r1);
+}
+
+#ifndef SQLITE_OMIT_SUBQUERY
+/*
+** Generate an error message when a SELECT is used within a subexpression
+** (example:  "a IN (SELECT * FROM table)") but it has more than 1 result
+** column.  We do this in a subroutine because the error used to occur
+** in multiple places.  (The error only occurs in one place now, but we
+** retain the subroutine to minimize code disruption.)
+*/
+static int checkForMultiColumnSelectError(
+  Parse *pParse,       /* Parse context. */
+  SelectDest *pDest,   /* Destination of SELECT results */
+  int nExpr            /* Number of result columns returned by SELECT */
+){
+  int eDest = pDest->eDest;
+  if( nExpr>1 && (eDest==SRT_Mem || eDest==SRT_Set) ){
+    sqlite3ErrorMsg(pParse, "only a single result allowed for "
+       "a SELECT that is part of an expression");
+    return 1;
+  }else{
+    return 0;
+  }
+}
+#endif
+
+/*
+** This routine generates the code for the inside of the inner loop
+** of a SELECT.
+**
+** If srcTab is negative, then the pEList expressions
+** are evaluated in order to get the data for this row.  If srcTab is
+** zero or more, then data is pulled from srcTab and pEList is used only 
+** to get number columns and the datatype for each column.
+*/
+static void selectInnerLoop(
+  Parse *pParse,          /* The parser context */
+  Select *p,              /* The complete select statement being coded */
+  ExprList *pEList,       /* List of values being extracted */
+  int srcTab,             /* Pull data from this table */
+  SortCtx *pSort,         /* If not NULL, info on how to process ORDER BY */
+  DistinctCtx *pDistinct, /* If not NULL, info on how to process DISTINCT */
+  SelectDest *pDest,      /* How to dispose of the results */
+  int iContinue,          /* Jump here to continue with next row */
+  int iBreak              /* Jump here to break out of the inner loop */
+){
+  Vdbe *v = pParse->pVdbe;
+  int i;
+  int hasDistinct;        /* True if the DISTINCT keyword is present */
+  int regResult;              /* Start of memory holding result set */
+  int eDest = pDest->eDest;   /* How to dispose of results */
+  int iParm = pDest->iSDParm; /* First argument to disposal method */
+  int nResultCol;             /* Number of result columns */
+  int nPrefixReg = 0;         /* Number of extra registers before regResult */
+
+  assert( v );
+  assert( pEList!=0 );
+  hasDistinct = pDistinct ? pDistinct->eTnctType : WHERE_DISTINCT_NOOP;
+  if( pSort && pSort->pOrderBy==0 ) pSort = 0;
+  if( pSort==0 && !hasDistinct ){
+    assert( iContinue!=0 );
+    codeOffset(v, p->iOffset, iContinue);
+  }
+
+  /* Pull the requested columns.
+  */
+  nResultCol = pEList->nExpr;
+
+  if( pDest->iSdst==0 ){
+    if( pSort ){
+      nPrefixReg = pSort->pOrderBy->nExpr;
+      if( !(pSort->sortFlags & SORTFLAG_UseSorter) ) nPrefixReg++;
+      pParse->nMem += nPrefixReg;
+    }
+    pDest->iSdst = pParse->nMem+1;
+    pParse->nMem += nResultCol;
+  }else if( pDest->iSdst+nResultCol > pParse->nMem ){
+    /* This is an error condition that can result, for example, when a SELECT
+    ** on the right-hand side of an INSERT contains more result columns than
+    ** there are columns in the table on the left.  The error will be caught
+    ** and reported later.  But we need to make sure enough memory is allocated
+    ** to avoid other spurious errors in the meantime. */
+    pParse->nMem += nResultCol;
+  }
+  pDest->nSdst = nResultCol;
+  regResult = pDest->iSdst;
+  if( srcTab>=0 ){
+    for(i=0; i<nResultCol; i++){
+      sqlite3VdbeAddOp3(v, OP_Column, srcTab, i, regResult+i);
+      VdbeComment((v, "%s", pEList->a[i].zName));
+    }
+  }else if( eDest!=SRT_Exists ){
+    /* If the destination is an EXISTS(...) expression, the actual
+    ** values returned by the SELECT are not required.
+    */
+    sqlite3ExprCodeExprList(pParse, pEList, regResult,
+                  (eDest==SRT_Output||eDest==SRT_Coroutine)?SQLITE_ECEL_DUP:0);
+  }
+
+  /* If the DISTINCT keyword was present on the SELECT statement
+  ** and this row has been seen before, then do not make this row
+  ** part of the result.
+  */
+  if( hasDistinct ){
+    switch( pDistinct->eTnctType ){
+      case WHERE_DISTINCT_ORDERED: {
+        VdbeOp *pOp;            /* No longer required OpenEphemeral instr. */
+        int iJump;              /* Jump destination */
+        int regPrev;            /* Previous row content */
+
+        /* Allocate space for the previous row */
+        regPrev = pParse->nMem+1;
+        pParse->nMem += nResultCol;
+
+        /* Change the OP_OpenEphemeral coded earlier to an OP_Null
+        ** sets the MEM_Cleared bit on the first register of the
+        ** previous value.  This will cause the OP_Ne below to always
+        ** fail on the first iteration of the loop even if the first
+        ** row is all NULLs.
+        */
+        sqlite3VdbeChangeToNoop(v, pDistinct->addrTnct);
+        pOp = sqlite3VdbeGetOp(v, pDistinct->addrTnct);
+        pOp->opcode = OP_Null;
+        pOp->p1 = 1;
+        pOp->p2 = regPrev;
+
+        iJump = sqlite3VdbeCurrentAddr(v) + nResultCol;
+        for(i=0; i<nResultCol; i++){
+          CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[i].pExpr);
+          if( i<nResultCol-1 ){
+            sqlite3VdbeAddOp3(v, OP_Ne, regResult+i, iJump, regPrev+i);
+            VdbeCoverage(v);
+          }else{
+            sqlite3VdbeAddOp3(v, OP_Eq, regResult+i, iContinue, regPrev+i);
+            VdbeCoverage(v);
+           }
+          sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
+          sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
+        }
+        assert( sqlite3VdbeCurrentAddr(v)==iJump || pParse->db->mallocFailed );
+        sqlite3VdbeAddOp3(v, OP_Copy, regResult, regPrev, nResultCol-1);
+        break;
+      }
+
+      case WHERE_DISTINCT_UNIQUE: {
+        sqlite3VdbeChangeToNoop(v, pDistinct->addrTnct);
+        break;
+      }
+
+      default: {
+        assert( pDistinct->eTnctType==WHERE_DISTINCT_UNORDERED );
+        codeDistinct(pParse, pDistinct->tabTnct, iContinue, nResultCol, regResult);
+        break;
+      }
+    }
+    if( pSort==0 ){
+      codeOffset(v, p->iOffset, iContinue);
+    }
+  }
+
+  switch( eDest ){
+    /* In this mode, write each query result to the key of the temporary
+    ** table iParm.
+    */
+#ifndef SQLITE_OMIT_COMPOUND_SELECT
+    case SRT_Union: {
+      int r1;
+      r1 = sqlite3GetTempReg(pParse);
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1);
+      sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, r1);
+      sqlite3ReleaseTempReg(pParse, r1);
+      break;
+    }
+
+    /* Construct a record from the query result, but instead of
+    ** saving that record, use it as a key to delete elements from
+    ** the temporary table iParm.
+    */
+    case SRT_Except: {
+      sqlite3VdbeAddOp3(v, OP_IdxDelete, iParm, regResult, nResultCol);
+      break;
+    }
+#endif /* SQLITE_OMIT_COMPOUND_SELECT */
+
+    /* Store the result as data using a unique key.
+    */
+    case SRT_Fifo:
+    case SRT_DistFifo:
+    case SRT_Table:
+    case SRT_EphemTab: {
+      int r1 = sqlite3GetTempRange(pParse, nPrefixReg+1);
+      testcase( eDest==SRT_Table );
+      testcase( eDest==SRT_EphemTab );
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1+nPrefixReg);
+#ifndef SQLITE_OMIT_CTE
+      if( eDest==SRT_DistFifo ){
+        /* If the destination is DistFifo, then cursor (iParm+1) is open
+        ** on an ephemeral index. If the current row is already present
+        ** in the index, do not write it to the output. If not, add the
+        ** current row to the index and proceed with writing it to the
+        ** output table as well.  */
+        int addr = sqlite3VdbeCurrentAddr(v) + 4;
+        sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, addr, r1, 0); VdbeCoverage(v);
+        sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm+1, r1);
+        assert( pSort==0 );
+      }
+#endif
+      if( pSort ){
+        pushOntoSorter(pParse, pSort, p, r1+nPrefixReg, 1, nPrefixReg);
+      }else{
+        int r2 = sqlite3GetTempReg(pParse);
+        sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, r2);
+        sqlite3VdbeAddOp3(v, OP_Insert, iParm, r1, r2);
+        sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+        sqlite3ReleaseTempReg(pParse, r2);
+      }
+      sqlite3ReleaseTempRange(pParse, r1, nPrefixReg+1);
+      break;
+    }
+
+#ifndef SQLITE_OMIT_SUBQUERY
+    /* If we are creating a set for an "expr IN (SELECT ...)" construct,
+    ** then there should be a single item on the stack.  Write this
+    ** item into the set table with bogus data.
+    */
+    case SRT_Set: {
+      assert( nResultCol==1 );
+      pDest->affSdst =
+                  sqlite3CompareAffinity(pEList->a[0].pExpr, pDest->affSdst);
+      if( pSort ){
+        /* At first glance you would think we could optimize out the
+        ** ORDER BY in this case since the order of entries in the set
+        ** does not matter.  But there might be a LIMIT clause, in which
+        ** case the order does matter */
+        pushOntoSorter(pParse, pSort, p, regResult, 1, nPrefixReg);
+      }else{
+        int r1 = sqlite3GetTempReg(pParse);
+        sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult,1,r1, &pDest->affSdst, 1);
+        sqlite3ExprCacheAffinityChange(pParse, regResult, 1);
+        sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, r1);
+        sqlite3ReleaseTempReg(pParse, r1);
+      }
+      break;
+    }
+
+    /* If any row exist in the result set, record that fact and abort.
+    */
+    case SRT_Exists: {
+      sqlite3VdbeAddOp2(v, OP_Integer, 1, iParm);
+      /* The LIMIT clause will terminate the loop for us */
+      break;
+    }
+
+    /* If this is a scalar select that is part of an expression, then
+    ** store the results in the appropriate memory cell and break out
+    ** of the scan loop.
+    */
+    case SRT_Mem: {
+      assert( nResultCol==1 );
+      if( pSort ){
+        pushOntoSorter(pParse, pSort, p, regResult, 1, nPrefixReg);
+      }else{
+        assert( regResult==iParm );
+        /* The LIMIT clause will jump out of the loop for us */
+      }
+      break;
+    }
+#endif /* #ifndef SQLITE_OMIT_SUBQUERY */
+
+    case SRT_Coroutine:       /* Send data to a co-routine */
+    case SRT_Output: {        /* Return the results */
+      testcase( eDest==SRT_Coroutine );
+      testcase( eDest==SRT_Output );
+      if( pSort ){
+        pushOntoSorter(pParse, pSort, p, regResult, nResultCol, nPrefixReg);
+      }else if( eDest==SRT_Coroutine ){
+        sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm);
+      }else{
+        sqlite3VdbeAddOp2(v, OP_ResultRow, regResult, nResultCol);
+        sqlite3ExprCacheAffinityChange(pParse, regResult, nResultCol);
+      }
+      break;
+    }
+
+#ifndef SQLITE_OMIT_CTE
+    /* Write the results into a priority queue that is order according to
+    ** pDest->pOrderBy (in pSO).  pDest->iSDParm (in iParm) is the cursor for an
+    ** index with pSO->nExpr+2 columns.  Build a key using pSO for the first
+    ** pSO->nExpr columns, then make sure all keys are unique by adding a
+    ** final OP_Sequence column.  The last column is the record as a blob.
+    */
+    case SRT_DistQueue:
+    case SRT_Queue: {
+      int nKey;
+      int r1, r2, r3;
+      int addrTest = 0;
+      ExprList *pSO;
+      pSO = pDest->pOrderBy;
+      assert( pSO );
+      nKey = pSO->nExpr;
+      r1 = sqlite3GetTempReg(pParse);
+      r2 = sqlite3GetTempRange(pParse, nKey+2);
+      r3 = r2+nKey+1;
+      if( eDest==SRT_DistQueue ){
+        /* If the destination is DistQueue, then cursor (iParm+1) is open
+        ** on a second ephemeral index that holds all values every previously
+        ** added to the queue. */
+        addrTest = sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, 0, 
+                                        regResult, nResultCol);
+        VdbeCoverage(v);
+      }
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r3);
+      if( eDest==SRT_DistQueue ){
+        sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm+1, r3);
+        sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
+      }
+      for(i=0; i<nKey; i++){
+        sqlite3VdbeAddOp2(v, OP_SCopy,
+                          regResult + pSO->a[i].u.x.iOrderByCol - 1,
+                          r2+i);
+      }
+      sqlite3VdbeAddOp2(v, OP_Sequence, iParm, r2+nKey);
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, r2, nKey+2, r1);
+      sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, r1);
+      if( addrTest ) sqlite3VdbeJumpHere(v, addrTest);
+      sqlite3ReleaseTempReg(pParse, r1);
+      sqlite3ReleaseTempRange(pParse, r2, nKey+2);
+      break;
+    }
+#endif /* SQLITE_OMIT_CTE */
+
+
+
+#if !defined(SQLITE_OMIT_TRIGGER)
+    /* Discard the results.  This is used for SELECT statements inside
+    ** the body of a TRIGGER.  The purpose of such selects is to call
+    ** user-defined functions that have side effects.  We do not care
+    ** about the actual results of the select.
+    */
+    default: {
+      assert( eDest==SRT_Discard );
+      break;
+    }
+#endif
+  }
+
+  /* Jump to the end of the loop if the LIMIT is reached.  Except, if
+  ** there is a sorter, in which case the sorter has already limited
+  ** the output for us.
+  */
+  if( pSort==0 && p->iLimit ){
+    sqlite3VdbeAddOp3(v, OP_IfZero, p->iLimit, iBreak, -1); VdbeCoverage(v);
+  }
+}
+
+/*
+** Allocate a KeyInfo object sufficient for an index of N key columns and
+** X extra columns.
+*/
+SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoAlloc(sqlite3 *db, int N, int X){
+  KeyInfo *p = sqlite3DbMallocZero(0, 
+                   sizeof(KeyInfo) + (N+X)*(sizeof(CollSeq*)+1));
+  if( p ){
+    p->aSortOrder = (u8*)&p->aColl[N+X];
+    p->nField = (u16)N;
+    p->nXField = (u16)X;
+    p->enc = ENC(db);
+    p->db = db;
+    p->nRef = 1;
+  }else{
+    db->mallocFailed = 1;
+  }
+  return p;
+}
+
+/*
+** Deallocate a KeyInfo object
+*/
+SQLITE_PRIVATE void sqlite3KeyInfoUnref(KeyInfo *p){
+  if( p ){
+    assert( p->nRef>0 );
+    p->nRef--;
+    if( p->nRef==0 ) sqlite3DbFree(0, p);
+  }
+}
+
+/*
+** Make a new pointer to a KeyInfo object
+*/
+SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoRef(KeyInfo *p){
+  if( p ){
+    assert( p->nRef>0 );
+    p->nRef++;
+  }
+  return p;
+}
+
+#ifdef SQLITE_DEBUG
+/*
+** Return TRUE if a KeyInfo object can be change.  The KeyInfo object
+** can only be changed if this is just a single reference to the object.
+**
+** This routine is used only inside of assert() statements.
+*/
+SQLITE_PRIVATE int sqlite3KeyInfoIsWriteable(KeyInfo *p){ return p->nRef==1; }
+#endif /* SQLITE_DEBUG */
+
+/*
+** Given an expression list, generate a KeyInfo structure that records
+** the collating sequence for each expression in that expression list.
+**
+** If the ExprList is an ORDER BY or GROUP BY clause then the resulting
+** KeyInfo structure is appropriate for initializing a virtual index to
+** implement that clause.  If the ExprList is the result set of a SELECT
+** then the KeyInfo structure is appropriate for initializing a virtual
+** index to implement a DISTINCT test.
+**
+** Space to hold the KeyInfo structure is obtained from malloc.  The calling
+** function is responsible for seeing that this structure is eventually
+** freed.
+*/
+static KeyInfo *keyInfoFromExprList(
+  Parse *pParse,       /* Parsing context */
+  ExprList *pList,     /* Form the KeyInfo object from this ExprList */
+  int iStart,          /* Begin with this column of pList */
+  int nExtra           /* Add this many extra columns to the end */
+){
+  int nExpr;
+  KeyInfo *pInfo;
+  struct ExprList_item *pItem;
+  sqlite3 *db = pParse->db;
+  int i;
+
+  nExpr = pList->nExpr;
+  pInfo = sqlite3KeyInfoAlloc(db, nExpr-iStart, nExtra+1);
+  if( pInfo ){
+    assert( sqlite3KeyInfoIsWriteable(pInfo) );
+    for(i=iStart, pItem=pList->a+iStart; i<nExpr; i++, pItem++){
+      CollSeq *pColl;
+      pColl = sqlite3ExprCollSeq(pParse, pItem->pExpr);
+      if( !pColl ) pColl = db->pDfltColl;
+      pInfo->aColl[i-iStart] = pColl;
+      pInfo->aSortOrder[i-iStart] = pItem->sortOrder;
+    }
+  }
+  return pInfo;
+}
+
+#ifndef SQLITE_OMIT_COMPOUND_SELECT
+/*
+** Name of the connection operator, used for error messages.
+*/
+static const char *selectOpName(int id){
+  char *z;
+  switch( id ){
+    case TK_ALL:       z = "UNION ALL";   break;
+    case TK_INTERSECT: z = "INTERSECT";   break;
+    case TK_EXCEPT:    z = "EXCEPT";      break;
+    default:           z = "UNION";       break;
+  }
+  return z;
+}
+#endif /* SQLITE_OMIT_COMPOUND_SELECT */
+
+#ifndef SQLITE_OMIT_EXPLAIN
+/*
+** Unless an "EXPLAIN QUERY PLAN" command is being processed, this function
+** is a no-op. Otherwise, it adds a single row of output to the EQP result,
+** where the caption is of the form:
+**
+**   "USE TEMP B-TREE FOR xxx"
+**
+** where xxx is one of "DISTINCT", "ORDER BY" or "GROUP BY". Exactly which
+** is determined by the zUsage argument.
+*/
+static void explainTempTable(Parse *pParse, const char *zUsage){
+  if( pParse->explain==2 ){
+    Vdbe *v = pParse->pVdbe;
+    char *zMsg = sqlite3MPrintf(pParse->db, "USE TEMP B-TREE FOR %s", zUsage);
+    sqlite3VdbeAddOp4(v, OP_Explain, pParse->iSelectId, 0, 0, zMsg, P4_DYNAMIC);
+  }
+}
+
+/*
+** Assign expression b to lvalue a. A second, no-op, version of this macro
+** is provided when SQLITE_OMIT_EXPLAIN is defined. This allows the code
+** in sqlite3Select() to assign values to structure member variables that
+** only exist if SQLITE_OMIT_EXPLAIN is not defined without polluting the
+** code with #ifndef directives.
+*/
+# define explainSetInteger(a, b) a = b
+
+#else
+/* No-op versions of the explainXXX() functions and macros. */
+# define explainTempTable(y,z)
+# define explainSetInteger(y,z)
+#endif
+
+#if !defined(SQLITE_OMIT_EXPLAIN) && !defined(SQLITE_OMIT_COMPOUND_SELECT)
+/*
+** Unless an "EXPLAIN QUERY PLAN" command is being processed, this function
+** is a no-op. Otherwise, it adds a single row of output to the EQP result,
+** where the caption is of one of the two forms:
+**
+**   "COMPOSITE SUBQUERIES iSub1 and iSub2 (op)"
+**   "COMPOSITE SUBQUERIES iSub1 and iSub2 USING TEMP B-TREE (op)"
+**
+** where iSub1 and iSub2 are the integers passed as the corresponding
+** function parameters, and op is the text representation of the parameter
+** of the same name. The parameter "op" must be one of TK_UNION, TK_EXCEPT,
+** TK_INTERSECT or TK_ALL. The first form is used if argument bUseTmp is 
+** false, or the second form if it is true.
+*/
+static void explainComposite(
+  Parse *pParse,                  /* Parse context */
+  int op,                         /* One of TK_UNION, TK_EXCEPT etc. */
+  int iSub1,                      /* Subquery id 1 */
+  int iSub2,                      /* Subquery id 2 */
+  int bUseTmp                     /* True if a temp table was used */
+){
+  assert( op==TK_UNION || op==TK_EXCEPT || op==TK_INTERSECT || op==TK_ALL );
+  if( pParse->explain==2 ){
+    Vdbe *v = pParse->pVdbe;
+    char *zMsg = sqlite3MPrintf(
+        pParse->db, "COMPOUND SUBQUERIES %d AND %d %s(%s)", iSub1, iSub2,
+        bUseTmp?"USING TEMP B-TREE ":"", selectOpName(op)
+    );
+    sqlite3VdbeAddOp4(v, OP_Explain, pParse->iSelectId, 0, 0, zMsg, P4_DYNAMIC);
+  }
+}
+#else
+/* No-op versions of the explainXXX() functions and macros. */
+# define explainComposite(v,w,x,y,z)
+#endif
+
+/*
+** If the inner loop was generated using a non-null pOrderBy argument,
+** then the results were placed in a sorter.  After the loop is terminated
+** we need to run the sorter and output the results.  The following
+** routine generates the code needed to do that.
+*/
+static void generateSortTail(
+  Parse *pParse,    /* Parsing context */
+  Select *p,        /* The SELECT statement */
+  SortCtx *pSort,   /* Information on the ORDER BY clause */
+  int nColumn,      /* Number of columns of data */
+  SelectDest *pDest /* Write the sorted results here */
+){
+  Vdbe *v = pParse->pVdbe;                     /* The prepared statement */
+  int addrBreak = sqlite3VdbeMakeLabel(v);     /* Jump here to exit loop */
+  int addrContinue = sqlite3VdbeMakeLabel(v);  /* Jump here for next cycle */
+  int addr;
+  int addrOnce = 0;
+  int iTab;
+  ExprList *pOrderBy = pSort->pOrderBy;
+  int eDest = pDest->eDest;
+  int iParm = pDest->iSDParm;
+  int regRow;
+  int regRowid;
+  int nKey;
+  int iSortTab;                   /* Sorter cursor to read from */
+  int nSortData;                  /* Trailing values to read from sorter */
+  int i;
+  int bSeq;                       /* True if sorter record includes seq. no. */
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+  struct ExprList_item *aOutEx = p->pEList->a;
+#endif
+
+  if( pSort->labelBkOut ){
+    sqlite3VdbeAddOp2(v, OP_Gosub, pSort->regReturn, pSort->labelBkOut);
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, addrBreak);
+    sqlite3VdbeResolveLabel(v, pSort->labelBkOut);
+  }
+  iTab = pSort->iECursor;
+  if( eDest==SRT_Output || eDest==SRT_Coroutine ){
+    regRowid = 0;
+    regRow = pDest->iSdst;
+    nSortData = nColumn;
+  }else{
+    regRowid = sqlite3GetTempReg(pParse);
+    regRow = sqlite3GetTempReg(pParse);
+    nSortData = 1;
+  }
+  nKey = pOrderBy->nExpr - pSort->nOBSat;
+  if( pSort->sortFlags & SORTFLAG_UseSorter ){
+    int regSortOut = ++pParse->nMem;
+    iSortTab = pParse->nTab++;
+    if( pSort->labelBkOut ){
+      addrOnce = sqlite3CodeOnce(pParse); VdbeCoverage(v);
+    }
+    sqlite3VdbeAddOp3(v, OP_OpenPseudo, iSortTab, regSortOut, nKey+1+nSortData);
+    if( addrOnce ) sqlite3VdbeJumpHere(v, addrOnce);
+    addr = 1 + sqlite3VdbeAddOp2(v, OP_SorterSort, iTab, addrBreak);
+    VdbeCoverage(v);
+    codeOffset(v, p->iOffset, addrContinue);
+    sqlite3VdbeAddOp3(v, OP_SorterData, iTab, regSortOut, iSortTab);
+    bSeq = 0;
+  }else{
+    addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak); VdbeCoverage(v);
+    codeOffset(v, p->iOffset, addrContinue);
+    iSortTab = iTab;
+    bSeq = 1;
+  }
+  for(i=0; i<nSortData; i++){
+    sqlite3VdbeAddOp3(v, OP_Column, iSortTab, nKey+bSeq+i, regRow+i);
+    VdbeComment((v, "%s", aOutEx[i].zName ? aOutEx[i].zName : aOutEx[i].zSpan));
+  }
+  switch( eDest ){
+    case SRT_Table:
+    case SRT_EphemTab: {
+      testcase( eDest==SRT_Table );
+      testcase( eDest==SRT_EphemTab );
+      sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, regRowid);
+      sqlite3VdbeAddOp3(v, OP_Insert, iParm, regRow, regRowid);
+      sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+      break;
+    }
+#ifndef SQLITE_OMIT_SUBQUERY
+    case SRT_Set: {
+      assert( nColumn==1 );
+      sqlite3VdbeAddOp4(v, OP_MakeRecord, regRow, 1, regRowid,
+                        &pDest->affSdst, 1);
+      sqlite3ExprCacheAffinityChange(pParse, regRow, 1);
+      sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, regRowid);
+      break;
+    }
+    case SRT_Mem: {
+      assert( nColumn==1 );
+      sqlite3ExprCodeMove(pParse, regRow, iParm, 1);
+      /* The LIMIT clause will terminate the loop for us */
+      break;
+    }
+#endif
+    default: {
+      assert( eDest==SRT_Output || eDest==SRT_Coroutine ); 
+      testcase( eDest==SRT_Output );
+      testcase( eDest==SRT_Coroutine );
+      if( eDest==SRT_Output ){
+        sqlite3VdbeAddOp2(v, OP_ResultRow, pDest->iSdst, nColumn);
+        sqlite3ExprCacheAffinityChange(pParse, pDest->iSdst, nColumn);
+      }else{
+        sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm);
+      }
+      break;
+    }
+  }
+  if( regRowid ){
+    sqlite3ReleaseTempReg(pParse, regRow);
+    sqlite3ReleaseTempReg(pParse, regRowid);
+  }
+  /* The bottom of the loop
+  */
+  sqlite3VdbeResolveLabel(v, addrContinue);
+  if( pSort->sortFlags & SORTFLAG_UseSorter ){
+    sqlite3VdbeAddOp2(v, OP_SorterNext, iTab, addr); VdbeCoverage(v);
+  }else{
+    sqlite3VdbeAddOp2(v, OP_Next, iTab, addr); VdbeCoverage(v);
+  }
+  if( pSort->regReturn ) sqlite3VdbeAddOp1(v, OP_Return, pSort->regReturn);
+  sqlite3VdbeResolveLabel(v, addrBreak);
+}
+
+/*
+** Return a pointer to a string containing the 'declaration type' of the
+** expression pExpr. The string may be treated as static by the caller.
+**
+** Also try to estimate the size of the returned value and return that
+** result in *pEstWidth.
+**
+** The declaration type is the exact datatype definition extracted from the
+** original CREATE TABLE statement if the expression is a column. The
+** declaration type for a ROWID field is INTEGER. Exactly when an expression
+** is considered a column can be complex in the presence of subqueries. The
+** result-set expression in all of the following SELECT statements is 
+** considered a column by this function.
+**
+**   SELECT col FROM tbl;
+**   SELECT (SELECT col FROM tbl;
+**   SELECT (SELECT col FROM tbl);
+**   SELECT abc FROM (SELECT col AS abc FROM tbl);
+** 
+** The declaration type for any expression other than a column is NULL.
+**
+** This routine has either 3 or 6 parameters depending on whether or not
+** the SQLITE_ENABLE_COLUMN_METADATA compile-time option is used.
+*/
+#ifdef SQLITE_ENABLE_COLUMN_METADATA
+# define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,C,D,E,F)
+static const char *columnTypeImpl(
+  NameContext *pNC, 
+  Expr *pExpr,
+  const char **pzOrigDb,
+  const char **pzOrigTab,
+  const char **pzOrigCol,
+  u8 *pEstWidth
+){
+  char const *zOrigDb = 0;
+  char const *zOrigTab = 0;
+  char const *zOrigCol = 0;
+#else /* if !defined(SQLITE_ENABLE_COLUMN_METADATA) */
+# define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,F)
+static const char *columnTypeImpl(
+  NameContext *pNC, 
+  Expr *pExpr,
+  u8 *pEstWidth
+){
+#endif /* !defined(SQLITE_ENABLE_COLUMN_METADATA) */
+  char const *zType = 0;
+  int j;
+  u8 estWidth = 1;
+
+  if( NEVER(pExpr==0) || pNC->pSrcList==0 ) return 0;
+  switch( pExpr->op ){
+    case TK_AGG_COLUMN:
+    case TK_COLUMN: {
+      /* The expression is a column. Locate the table the column is being
+      ** extracted from in NameContext.pSrcList. This table may be real
+      ** database table or a subquery.
+      */
+      Table *pTab = 0;            /* Table structure column is extracted from */
+      Select *pS = 0;             /* Select the column is extracted from */
+      int iCol = pExpr->iColumn;  /* Index of column in pTab */
+      testcase( pExpr->op==TK_AGG_COLUMN );
+      testcase( pExpr->op==TK_COLUMN );
+      while( pNC && !pTab ){
+        SrcList *pTabList = pNC->pSrcList;
+        for(j=0;j<pTabList->nSrc && pTabList->a[j].iCursor!=pExpr->iTable;j++);
+        if( j<pTabList->nSrc ){
+          pTab = pTabList->a[j].pTab;
+          pS = pTabList->a[j].pSelect;
+        }else{
+          pNC = pNC->pNext;
+        }
+      }
+
+      if( pTab==0 ){
+        /* At one time, code such as "SELECT new.x" within a trigger would
+        ** cause this condition to run.  Since then, we have restructured how
+        ** trigger code is generated and so this condition is no longer 
+        ** possible. However, it can still be true for statements like
+        ** the following:
+        **
+        **   CREATE TABLE t1(col INTEGER);
+        **   SELECT (SELECT t1.col) FROM FROM t1;
+        **
+        ** when columnType() is called on the expression "t1.col" in the 
+        ** sub-select. In this case, set the column type to NULL, even
+        ** though it should really be "INTEGER".
+        **
+        ** This is not a problem, as the column type of "t1.col" is never
+        ** used. When columnType() is called on the expression 
+        ** "(SELECT t1.col)", the correct type is returned (see the TK_SELECT
+        ** branch below.  */
+        break;
+      }
+
+      assert( pTab && pExpr->pTab==pTab );
+      if( pS ){
+        /* The "table" is actually a sub-select or a view in the FROM clause
+        ** of the SELECT statement. Return the declaration type and origin
+        ** data for the result-set column of the sub-select.
+        */
+        if( iCol>=0 && ALWAYS(iCol<pS->pEList->nExpr) ){
+          /* If iCol is less than zero, then the expression requests the
+          ** rowid of the sub-select or view. This expression is legal (see 
+          ** test case misc2.2.2) - it always evaluates to NULL.
+          */
+          NameContext sNC;
+          Expr *p = pS->pEList->a[iCol].pExpr;
+          sNC.pSrcList = pS->pSrc;
+          sNC.pNext = pNC;
+          sNC.pParse = pNC->pParse;
+          zType = columnType(&sNC, p,&zOrigDb,&zOrigTab,&zOrigCol, &estWidth); 
+        }
+      }else if( pTab->pSchema ){
+        /* A real table */
+        assert( !pS );
+        if( iCol<0 ) iCol = pTab->iPKey;
+        assert( iCol==-1 || (iCol>=0 && iCol<pTab->nCol) );
+#ifdef SQLITE_ENABLE_COLUMN_METADATA
+        if( iCol<0 ){
+          zType = "INTEGER";
+          zOrigCol = "rowid";
+        }else{
+          zType = pTab->aCol[iCol].zType;
+          zOrigCol = pTab->aCol[iCol].zName;
+          estWidth = pTab->aCol[iCol].szEst;
+        }
+        zOrigTab = pTab->zName;
+        if( pNC->pParse ){
+          int iDb = sqlite3SchemaToIndex(pNC->pParse->db, pTab->pSchema);
+          zOrigDb = pNC->pParse->db->aDb[iDb].zName;
+        }
+#else
+        if( iCol<0 ){
+          zType = "INTEGER";
+        }else{
+          zType = pTab->aCol[iCol].zType;
+          estWidth = pTab->aCol[iCol].szEst;
+        }
+#endif
+      }
+      break;
+    }
+#ifndef SQLITE_OMIT_SUBQUERY
+    case TK_SELECT: {
+      /* The expression is a sub-select. Return the declaration type and
+      ** origin info for the single column in the result set of the SELECT
+      ** statement.
+      */
+      NameContext sNC;
+      Select *pS = pExpr->x.pSelect;
+      Expr *p = pS->pEList->a[0].pExpr;
+      assert( ExprHasProperty(pExpr, EP_xIsSelect) );
+      sNC.pSrcList = pS->pSrc;
+      sNC.pNext = pNC;
+      sNC.pParse = pNC->pParse;
+      zType = columnType(&sNC, p, &zOrigDb, &zOrigTab, &zOrigCol, &estWidth); 
+      break;
+    }
+#endif
+  }
+
+#ifdef SQLITE_ENABLE_COLUMN_METADATA  
+  if( pzOrigDb ){
+    assert( pzOrigTab && pzOrigCol );
+    *pzOrigDb = zOrigDb;
+    *pzOrigTab = zOrigTab;
+    *pzOrigCol = zOrigCol;
+  }
+#endif
+  if( pEstWidth ) *pEstWidth = estWidth;
+  return zType;
+}
+
+/*
+** Generate code that will tell the VDBE the declaration types of columns
+** in the result set.
+*/
+static void generateColumnTypes(
+  Parse *pParse,      /* Parser context */
+  SrcList *pTabList,  /* List of tables */
+  ExprList *pEList    /* Expressions defining the result set */
+){
+#ifndef SQLITE_OMIT_DECLTYPE
+  Vdbe *v = pParse->pVdbe;
+  int i;
+  NameContext sNC;
+  sNC.pSrcList = pTabList;
+  sNC.pParse = pParse;
+  for(i=0; i<pEList->nExpr; i++){
+    Expr *p = pEList->a[i].pExpr;
+    const char *zType;
+#ifdef SQLITE_ENABLE_COLUMN_METADATA
+    const char *zOrigDb = 0;
+    const char *zOrigTab = 0;
+    const char *zOrigCol = 0;
+    zType = columnType(&sNC, p, &zOrigDb, &zOrigTab, &zOrigCol, 0);
+
+    /* The vdbe must make its own copy of the column-type and other 
+    ** column specific strings, in case the schema is reset before this
+    ** virtual machine is deleted.
+    */
+    sqlite3VdbeSetColName(v, i, COLNAME_DATABASE, zOrigDb, SQLITE_TRANSIENT);
+    sqlite3VdbeSetColName(v, i, COLNAME_TABLE, zOrigTab, SQLITE_TRANSIENT);
+    sqlite3VdbeSetColName(v, i, COLNAME_COLUMN, zOrigCol, SQLITE_TRANSIENT);
+#else
+    zType = columnType(&sNC, p, 0, 0, 0, 0);
+#endif
+    sqlite3VdbeSetColName(v, i, COLNAME_DECLTYPE, zType, SQLITE_TRANSIENT);
+  }
+#endif /* !defined(SQLITE_OMIT_DECLTYPE) */
+}
+
+/*
+** Generate code that will tell the VDBE the names of columns
+** in the result set.  This information is used to provide the
+** azCol[] values in the callback.
+*/
+static void generateColumnNames(
+  Parse *pParse,      /* Parser context */
+  SrcList *pTabList,  /* List of tables */
+  ExprList *pEList    /* Expressions defining the result set */
+){
+  Vdbe *v = pParse->pVdbe;
+  int i, j;
+  sqlite3 *db = pParse->db;
+  int fullNames, shortNames;
+
+#ifndef SQLITE_OMIT_EXPLAIN
+  /* If this is an EXPLAIN, skip this step */
+  if( pParse->explain ){
+    return;
+  }
+#endif
+
+  if( pParse->colNamesSet || NEVER(v==0) || db->mallocFailed ) return;
+  pParse->colNamesSet = 1;
+  fullNames = (db->flags & SQLITE_FullColNames)!=0;
+  shortNames = (db->flags & SQLITE_ShortColNames)!=0;
+  sqlite3VdbeSetNumCols(v, pEList->nExpr);
+  for(i=0; i<pEList->nExpr; i++){
+    Expr *p;
+    p = pEList->a[i].pExpr;
+    if( NEVER(p==0) ) continue;
+    if( pEList->a[i].zName ){
+      char *zName = pEList->a[i].zName;
+      sqlite3VdbeSetColName(v, i, COLNAME_NAME, zName, SQLITE_TRANSIENT);
+    }else if( (p->op==TK_COLUMN || p->op==TK_AGG_COLUMN) && pTabList ){
+      Table *pTab;
+      char *zCol;
+      int iCol = p->iColumn;
+      for(j=0; ALWAYS(j<pTabList->nSrc); j++){
+        if( pTabList->a[j].iCursor==p->iTable ) break;
+      }
+      assert( j<pTabList->nSrc );
+      pTab = pTabList->a[j].pTab;
+      if( iCol<0 ) iCol = pTab->iPKey;
+      assert( iCol==-1 || (iCol>=0 && iCol<pTab->nCol) );
+      if( iCol<0 ){
+        zCol = "rowid";
+      }else{
+        zCol = pTab->aCol[iCol].zName;
+      }
+      if( !shortNames && !fullNames ){
+        sqlite3VdbeSetColName(v, i, COLNAME_NAME, 
+            sqlite3DbStrDup(db, pEList->a[i].zSpan), SQLITE_DYNAMIC);
+      }else if( fullNames ){
+        char *zName = 0;
+        zName = sqlite3MPrintf(db, "%s.%s", pTab->zName, zCol);
+        sqlite3VdbeSetColName(v, i, COLNAME_NAME, zName, SQLITE_DYNAMIC);
+      }else{
+        sqlite3VdbeSetColName(v, i, COLNAME_NAME, zCol, SQLITE_TRANSIENT);
+      }
+    }else{
+      const char *z = pEList->a[i].zSpan;
+      z = z==0 ? sqlite3MPrintf(db, "column%d", i+1) : sqlite3DbStrDup(db, z);
+      sqlite3VdbeSetColName(v, i, COLNAME_NAME, z, SQLITE_DYNAMIC);
+    }
+  }
+  generateColumnTypes(pParse, pTabList, pEList);
+}
+
+/*
+** Given an expression list (which is really the list of expressions
+** that form the result set of a SELECT statement) compute appropriate
+** column names for a table that would hold the expression list.
+**
+** All column names will be unique.
+**
+** Only the column names are computed.  Column.zType, Column.zColl,
+** and other fields of Column are zeroed.
+**
+** Return SQLITE_OK on success.  If a memory allocation error occurs,
+** store NULL in *paCol and 0 in *pnCol and return SQLITE_NOMEM.
+*/
+static int selectColumnsFromExprList(
+  Parse *pParse,          /* Parsing context */
+  ExprList *pEList,       /* Expr list from which to derive column names */
+  i16 *pnCol,             /* Write the number of columns here */
+  Column **paCol          /* Write the new column list here */
+){
+  sqlite3 *db = pParse->db;   /* Database connection */
+  int i, j;                   /* Loop counters */
+  int cnt;                    /* Index added to make the name unique */
+  Column *aCol, *pCol;        /* For looping over result columns */
+  int nCol;                   /* Number of columns in the result set */
+  Expr *p;                    /* Expression for a single result column */
+  char *zName;                /* Column name */
+  int nName;                  /* Size of name in zName[] */
+
+  if( pEList ){
+    nCol = pEList->nExpr;
+    aCol = sqlite3DbMallocZero(db, sizeof(aCol[0])*nCol);
+    testcase( aCol==0 );
+  }else{
+    nCol = 0;
+    aCol = 0;
+  }
+  *pnCol = nCol;
+  *paCol = aCol;
+
+  for(i=0, pCol=aCol; i<nCol; i++, pCol++){
+    /* Get an appropriate name for the column
+    */
+    p = sqlite3ExprSkipCollate(pEList->a[i].pExpr);
+    if( (zName = pEList->a[i].zName)!=0 ){
+      /* If the column contains an "AS <name>" phrase, use <name> as the name */
+      zName = sqlite3DbStrDup(db, zName);
+    }else{
+      Expr *pColExpr = p;  /* The expression that is the result column name */
+      Table *pTab;         /* Table associated with this expression */
+      while( pColExpr->op==TK_DOT ){
+        pColExpr = pColExpr->pRight;
+        assert( pColExpr!=0 );
+      }
+      if( pColExpr->op==TK_COLUMN && ALWAYS(pColExpr->pTab!=0) ){
+        /* For columns use the column name name */
+        int iCol = pColExpr->iColumn;
+        pTab = pColExpr->pTab;
+        if( iCol<0 ) iCol = pTab->iPKey;
+        zName = sqlite3MPrintf(db, "%s",
+                 iCol>=0 ? pTab->aCol[iCol].zName : "rowid");
+      }else if( pColExpr->op==TK_ID ){
+        assert( !ExprHasProperty(pColExpr, EP_IntValue) );
+        zName = sqlite3MPrintf(db, "%s", pColExpr->u.zToken);
+      }else{
+        /* Use the original text of the column expression as its name */
+        zName = sqlite3MPrintf(db, "%s", pEList->a[i].zSpan);
+      }
+    }
+    if( db->mallocFailed ){
+      sqlite3DbFree(db, zName);
+      break;
+    }
+
+    /* Make sure the column name is unique.  If the name is not unique,
+    ** append an integer to the name so that it becomes unique.
+    */
+    nName = sqlite3Strlen30(zName);
+    for(j=cnt=0; j<i; j++){
+      if( sqlite3StrICmp(aCol[j].zName, zName)==0 ){
+        char *zNewName;
+        int k;
+        for(k=nName-1; k>1 && sqlite3Isdigit(zName[k]); k--){}
+        if( k>=0 && zName[k]==':' ) nName = k;
+        zName[nName] = 0;
+        zNewName = sqlite3MPrintf(db, "%s:%d", zName, ++cnt);
+        sqlite3DbFree(db, zName);
+        zName = zNewName;
+        j = -1;
+        if( zName==0 ) break;
+      }
+    }
+    pCol->zName = zName;
+  }
+  if( db->mallocFailed ){
+    for(j=0; j<i; j++){
+      sqlite3DbFree(db, aCol[j].zName);
+    }
+    sqlite3DbFree(db, aCol);
+    *paCol = 0;
+    *pnCol = 0;
+    return SQLITE_NOMEM;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Add type and collation information to a column list based on
+** a SELECT statement.
+** 
+** The column list presumably came from selectColumnNamesFromExprList().
+** The column list has only names, not types or collations.  This
+** routine goes through and adds the types and collations.
+**
+** This routine requires that all identifiers in the SELECT
+** statement be resolved.
+*/
+static void selectAddColumnTypeAndCollation(
+  Parse *pParse,        /* Parsing contexts */
+  Table *pTab,          /* Add column type information to this table */
+  Select *pSelect       /* SELECT used to determine types and collations */
+){
+  sqlite3 *db = pParse->db;
+  NameContext sNC;
+  Column *pCol;
+  CollSeq *pColl;
+  int i;
+  Expr *p;
+  struct ExprList_item *a;
+  u64 szAll = 0;
+
+  assert( pSelect!=0 );
+  assert( (pSelect->selFlags & SF_Resolved)!=0 );
+  assert( pTab->nCol==pSelect->pEList->nExpr || db->mallocFailed );
+  if( db->mallocFailed ) return;
+  memset(&sNC, 0, sizeof(sNC));
+  sNC.pSrcList = pSelect->pSrc;
+  a = pSelect->pEList->a;
+  for(i=0, pCol=pTab->aCol; i<pTab->nCol; i++, pCol++){
+    p = a[i].pExpr;
+    pCol->zType = sqlite3DbStrDup(db, columnType(&sNC, p,0,0,0, &pCol->szEst));
+    szAll += pCol->szEst;
+    pCol->affinity = sqlite3ExprAffinity(p);
+    if( pCol->affinity==0 ) pCol->affinity = SQLITE_AFF_NONE;
+    pColl = sqlite3ExprCollSeq(pParse, p);
+    if( pColl ){
+      pCol->zColl = sqlite3DbStrDup(db, pColl->zName);
+    }
+  }
+  pTab->szTabRow = sqlite3LogEst(szAll*4);
+}
+
+/*
+** Given a SELECT statement, generate a Table structure that describes
+** the result set of that SELECT.
+*/
+SQLITE_PRIVATE Table *sqlite3ResultSetOfSelect(Parse *pParse, Select *pSelect){
+  Table *pTab;
+  sqlite3 *db = pParse->db;
+  int savedFlags;
+
+  savedFlags = db->flags;
+  db->flags &= ~SQLITE_FullColNames;
+  db->flags |= SQLITE_ShortColNames;
+  sqlite3SelectPrep(pParse, pSelect, 0);
+  if( pParse->nErr ) return 0;
+  while( pSelect->pPrior ) pSelect = pSelect->pPrior;
+  db->flags = savedFlags;
+  pTab = sqlite3DbMallocZero(db, sizeof(Table) );
+  if( pTab==0 ){
+    return 0;
+  }
+  /* The sqlite3ResultSetOfSelect() is only used n contexts where lookaside
+  ** is disabled */
+  assert( db->lookaside.bEnabled==0 );
+  pTab->nRef = 1;
+  pTab->zName = 0;
+  pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
+  selectColumnsFromExprList(pParse, pSelect->pEList, &pTab->nCol, &pTab->aCol);
+  selectAddColumnTypeAndCollation(pParse, pTab, pSelect);
+  pTab->iPKey = -1;
+  if( db->mallocFailed ){
+    sqlite3DeleteTable(db, pTab);
+    return 0;
+  }
+  return pTab;
+}
+
+/*
+** Get a VDBE for the given parser context.  Create a new one if necessary.
+** If an error occurs, return NULL and leave a message in pParse.
+*/
+SQLITE_PRIVATE Vdbe *sqlite3GetVdbe(Parse *pParse){
+  Vdbe *v = pParse->pVdbe;
+  if( v==0 ){
+    v = pParse->pVdbe = sqlite3VdbeCreate(pParse);
+    if( v ) sqlite3VdbeAddOp0(v, OP_Init);
+    if( pParse->pToplevel==0
+     && OptimizationEnabled(pParse->db,SQLITE_FactorOutConst)
+    ){
+      pParse->okConstFactor = 1;
+    }
+
+  }
+  return v;
+}
+
+
+/*
+** Compute the iLimit and iOffset fields of the SELECT based on the
+** pLimit and pOffset expressions.  pLimit and pOffset hold the expressions
+** that appear in the original SQL statement after the LIMIT and OFFSET
+** keywords.  Or NULL if those keywords are omitted. iLimit and iOffset 
+** are the integer memory register numbers for counters used to compute 
+** the limit and offset.  If there is no limit and/or offset, then 
+** iLimit and iOffset are negative.
+**
+** This routine changes the values of iLimit and iOffset only if
+** a limit or offset is defined by pLimit and pOffset.  iLimit and
+** iOffset should have been preset to appropriate default values (zero)
+** prior to calling this routine.
+**
+** The iOffset register (if it exists) is initialized to the value
+** of the OFFSET.  The iLimit register is initialized to LIMIT.  Register
+** iOffset+1 is initialized to LIMIT+OFFSET.
+**
+** Only if pLimit!=0 or pOffset!=0 do the limit registers get
+** redefined.  The UNION ALL operator uses this property to force
+** the reuse of the same limit and offset registers across multiple
+** SELECT statements.
+*/
+static void computeLimitRegisters(Parse *pParse, Select *p, int iBreak){
+  Vdbe *v = 0;
+  int iLimit = 0;
+  int iOffset;
+  int addr1, n;
+  if( p->iLimit ) return;
+
+  /* 
+  ** "LIMIT -1" always shows all rows.  There is some
+  ** controversy about what the correct behavior should be.
+  ** The current implementation interprets "LIMIT 0" to mean
+  ** no rows.
+  */
+  sqlite3ExprCacheClear(pParse);
+  assert( p->pOffset==0 || p->pLimit!=0 );
+  if( p->pLimit ){
+    p->iLimit = iLimit = ++pParse->nMem;
+    v = sqlite3GetVdbe(pParse);
+    assert( v!=0 );
+    if( sqlite3ExprIsInteger(p->pLimit, &n) ){
+      sqlite3VdbeAddOp2(v, OP_Integer, n, iLimit);
+      VdbeComment((v, "LIMIT counter"));
+      if( n==0 ){
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, iBreak);
+      }else if( n>=0 && p->nSelectRow>(u64)n ){
+        p->nSelectRow = n;
+      }
+    }else{
+      sqlite3ExprCode(pParse, p->pLimit, iLimit);
+      sqlite3VdbeAddOp1(v, OP_MustBeInt, iLimit); VdbeCoverage(v);
+      VdbeComment((v, "LIMIT counter"));
+      sqlite3VdbeAddOp2(v, OP_IfZero, iLimit, iBreak); VdbeCoverage(v);
+    }
+    if( p->pOffset ){
+      p->iOffset = iOffset = ++pParse->nMem;
+      pParse->nMem++;   /* Allocate an extra register for limit+offset */
+      sqlite3ExprCode(pParse, p->pOffset, iOffset);
+      sqlite3VdbeAddOp1(v, OP_MustBeInt, iOffset); VdbeCoverage(v);
+      VdbeComment((v, "OFFSET counter"));
+      addr1 = sqlite3VdbeAddOp1(v, OP_IfPos, iOffset); VdbeCoverage(v);
+      sqlite3VdbeAddOp2(v, OP_Integer, 0, iOffset);
+      sqlite3VdbeJumpHere(v, addr1);
+      sqlite3VdbeAddOp3(v, OP_Add, iLimit, iOffset, iOffset+1);
+      VdbeComment((v, "LIMIT+OFFSET"));
+      addr1 = sqlite3VdbeAddOp1(v, OP_IfPos, iLimit); VdbeCoverage(v);
+      sqlite3VdbeAddOp2(v, OP_Integer, -1, iOffset+1);
+      sqlite3VdbeJumpHere(v, addr1);
+    }
+  }
+}
+
+#ifndef SQLITE_OMIT_COMPOUND_SELECT
+/*
+** Return the appropriate collating sequence for the iCol-th column of
+** the result set for the compound-select statement "p".  Return NULL if
+** the column has no default collating sequence.
+**
+** The collating sequence for the compound select is taken from the
+** left-most term of the select that has a collating sequence.
+*/
+static CollSeq *multiSelectCollSeq(Parse *pParse, Select *p, int iCol){
+  CollSeq *pRet;
+  if( p->pPrior ){
+    pRet = multiSelectCollSeq(pParse, p->pPrior, iCol);
+  }else{
+    pRet = 0;
+  }
+  assert( iCol>=0 );
+  if( pRet==0 && iCol<p->pEList->nExpr ){
+    pRet = sqlite3ExprCollSeq(pParse, p->pEList->a[iCol].pExpr);
+  }
+  return pRet;
+}
+
+/*
+** The select statement passed as the second parameter is a compound SELECT
+** with an ORDER BY clause. This function allocates and returns a KeyInfo
+** structure suitable for implementing the ORDER BY.
+**
+** Space to hold the KeyInfo structure is obtained from malloc. The calling
+** function is responsible for ensuring that this structure is eventually
+** freed.
+*/
+static KeyInfo *multiSelectOrderByKeyInfo(Parse *pParse, Select *p, int nExtra){
+  ExprList *pOrderBy = p->pOrderBy;
+  int nOrderBy = p->pOrderBy->nExpr;
+  sqlite3 *db = pParse->db;
+  KeyInfo *pRet = sqlite3KeyInfoAlloc(db, nOrderBy+nExtra, 1);
+  if( pRet ){
+    int i;
+    for(i=0; i<nOrderBy; i++){
+      struct ExprList_item *pItem = &pOrderBy->a[i];
+      Expr *pTerm = pItem->pExpr;
+      CollSeq *pColl;
+
+      if( pTerm->flags & EP_Collate ){
+        pColl = sqlite3ExprCollSeq(pParse, pTerm);
+      }else{
+        pColl = multiSelectCollSeq(pParse, p, pItem->u.x.iOrderByCol-1);
+        if( pColl==0 ) pColl = db->pDfltColl;
+        pOrderBy->a[i].pExpr =
+          sqlite3ExprAddCollateString(pParse, pTerm, pColl->zName);
+      }
+      assert( sqlite3KeyInfoIsWriteable(pRet) );
+      pRet->aColl[i] = pColl;
+      pRet->aSortOrder[i] = pOrderBy->a[i].sortOrder;
+    }
+  }
+
+  return pRet;
+}
+
+#ifndef SQLITE_OMIT_CTE
+/*
+** This routine generates VDBE code to compute the content of a WITH RECURSIVE
+** query of the form:
+**
+**   <recursive-table> AS (<setup-query> UNION [ALL] <recursive-query>)
+**                         \___________/             \_______________/
+**                           p->pPrior                      p
+**
+**
+** There is exactly one reference to the recursive-table in the FROM clause
+** of recursive-query, marked with the SrcList->a[].isRecursive flag.
+**
+** The setup-query runs once to generate an initial set of rows that go
+** into a Queue table.  Rows are extracted from the Queue table one by
+** one.  Each row extracted from Queue is output to pDest.  Then the single
+** extracted row (now in the iCurrent table) becomes the content of the
+** recursive-table for a recursive-query run.  The output of the recursive-query
+** is added back into the Queue table.  Then another row is extracted from Queue
+** and the iteration continues until the Queue table is empty.
+**
+** If the compound query operator is UNION then no duplicate rows are ever
+** inserted into the Queue table.  The iDistinct table keeps a copy of all rows
+** that have ever been inserted into Queue and causes duplicates to be
+** discarded.  If the operator is UNION ALL, then duplicates are allowed.
+** 
+** If the query has an ORDER BY, then entries in the Queue table are kept in
+** ORDER BY order and the first entry is extracted for each cycle.  Without
+** an ORDER BY, the Queue table is just a FIFO.
+**
+** If a LIMIT clause is provided, then the iteration stops after LIMIT rows
+** have been output to pDest.  A LIMIT of zero means to output no rows and a
+** negative LIMIT means to output all rows.  If there is also an OFFSET clause
+** with a positive value, then the first OFFSET outputs are discarded rather
+** than being sent to pDest.  The LIMIT count does not begin until after OFFSET
+** rows have been skipped.
+*/
+static void generateWithRecursiveQuery(
+  Parse *pParse,        /* Parsing context */
+  Select *p,            /* The recursive SELECT to be coded */
+  SelectDest *pDest     /* What to do with query results */
+){
+  SrcList *pSrc = p->pSrc;      /* The FROM clause of the recursive query */
+  int nCol = p->pEList->nExpr;  /* Number of columns in the recursive table */
+  Vdbe *v = pParse->pVdbe;      /* The prepared statement under construction */
+  Select *pSetup = p->pPrior;   /* The setup query */
+  int addrTop;                  /* Top of the loop */
+  int addrCont, addrBreak;      /* CONTINUE and BREAK addresses */
+  int iCurrent = 0;             /* The Current table */
+  int regCurrent;               /* Register holding Current table */
+  int iQueue;                   /* The Queue table */
+  int iDistinct = 0;            /* To ensure unique results if UNION */
+  int eDest = SRT_Fifo;         /* How to write to Queue */
+  SelectDest destQueue;         /* SelectDest targetting the Queue table */
+  int i;                        /* Loop counter */
+  int rc;                       /* Result code */
+  ExprList *pOrderBy;           /* The ORDER BY clause */
+  Expr *pLimit, *pOffset;       /* Saved LIMIT and OFFSET */
+  int regLimit, regOffset;      /* Registers used by LIMIT and OFFSET */
+
+  /* Obtain authorization to do a recursive query */
+  if( sqlite3AuthCheck(pParse, SQLITE_RECURSIVE, 0, 0, 0) ) return;
+
+  /* Process the LIMIT and OFFSET clauses, if they exist */
+  addrBreak = sqlite3VdbeMakeLabel(v);
+  computeLimitRegisters(pParse, p, addrBreak);
+  pLimit = p->pLimit;
+  pOffset = p->pOffset;
+  regLimit = p->iLimit;
+  regOffset = p->iOffset;
+  p->pLimit = p->pOffset = 0;
+  p->iLimit = p->iOffset = 0;
+  pOrderBy = p->pOrderBy;
+
+  /* Locate the cursor number of the Current table */
+  for(i=0; ALWAYS(i<pSrc->nSrc); i++){
+    if( pSrc->a[i].isRecursive ){
+      iCurrent = pSrc->a[i].iCursor;
+      break;
+    }
+  }
+
+  /* Allocate cursors numbers for Queue and Distinct.  The cursor number for
+  ** the Distinct table must be exactly one greater than Queue in order
+  ** for the SRT_DistFifo and SRT_DistQueue destinations to work. */
+  iQueue = pParse->nTab++;
+  if( p->op==TK_UNION ){
+    eDest = pOrderBy ? SRT_DistQueue : SRT_DistFifo;
+    iDistinct = pParse->nTab++;
+  }else{
+    eDest = pOrderBy ? SRT_Queue : SRT_Fifo;
+  }
+  sqlite3SelectDestInit(&destQueue, eDest, iQueue);
+
+  /* Allocate cursors for Current, Queue, and Distinct. */
+  regCurrent = ++pParse->nMem;
+  sqlite3VdbeAddOp3(v, OP_OpenPseudo, iCurrent, regCurrent, nCol);
+  if( pOrderBy ){
+    KeyInfo *pKeyInfo = multiSelectOrderByKeyInfo(pParse, p, 1);
+    sqlite3VdbeAddOp4(v, OP_OpenEphemeral, iQueue, pOrderBy->nExpr+2, 0,
+                      (char*)pKeyInfo, P4_KEYINFO);
+    destQueue.pOrderBy = pOrderBy;
+  }else{
+    sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iQueue, nCol);
+  }
+  VdbeComment((v, "Queue table"));
+  if( iDistinct ){
+    p->addrOpenEphm[0] = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iDistinct, 0);
+    p->selFlags |= SF_UsesEphemeral;
+  }
+
+  /* Detach the ORDER BY clause from the compound SELECT */
+  p->pOrderBy = 0;
+
+  /* Store the results of the setup-query in Queue. */
+  pSetup->pNext = 0;
+  rc = sqlite3Select(pParse, pSetup, &destQueue);
+  pSetup->pNext = p;
+  if( rc ) goto end_of_recursive_query;
+
+  /* Find the next row in the Queue and output that row */
+  addrTop = sqlite3VdbeAddOp2(v, OP_Rewind, iQueue, addrBreak); VdbeCoverage(v);
+
+  /* Transfer the next row in Queue over to Current */
+  sqlite3VdbeAddOp1(v, OP_NullRow, iCurrent); /* To reset column cache */
+  if( pOrderBy ){
+    sqlite3VdbeAddOp3(v, OP_Column, iQueue, pOrderBy->nExpr+1, regCurrent);
+  }else{
+    sqlite3VdbeAddOp2(v, OP_RowData, iQueue, regCurrent);
+  }
+  sqlite3VdbeAddOp1(v, OP_Delete, iQueue);
+
+  /* Output the single row in Current */
+  addrCont = sqlite3VdbeMakeLabel(v);
+  codeOffset(v, regOffset, addrCont);
+  selectInnerLoop(pParse, p, p->pEList, iCurrent,
+      0, 0, pDest, addrCont, addrBreak);
+  if( regLimit ){
+    sqlite3VdbeAddOp3(v, OP_IfZero, regLimit, addrBreak, -1);
+    VdbeCoverage(v);
+  }
+  sqlite3VdbeResolveLabel(v, addrCont);
+
+  /* Execute the recursive SELECT taking the single row in Current as
+  ** the value for the recursive-table. Store the results in the Queue.
+  */
+  p->pPrior = 0;
+  sqlite3Select(pParse, p, &destQueue);
+  assert( p->pPrior==0 );
+  p->pPrior = pSetup;
+
+  /* Keep running the loop until the Queue is empty */
+  sqlite3VdbeAddOp2(v, OP_Goto, 0, addrTop);
+  sqlite3VdbeResolveLabel(v, addrBreak);
+
+end_of_recursive_query:
+  sqlite3ExprListDelete(pParse->db, p->pOrderBy);
+  p->pOrderBy = pOrderBy;
+  p->pLimit = pLimit;
+  p->pOffset = pOffset;
+  return;
+}
+#endif /* SQLITE_OMIT_CTE */
+
+/* Forward references */
+static int multiSelectOrderBy(
+  Parse *pParse,        /* Parsing context */
+  Select *p,            /* The right-most of SELECTs to be coded */
+  SelectDest *pDest     /* What to do with query results */
+);
+
+/*
+** Error message for when two or more terms of a compound select have different
+** size result sets.
+*/
+static void selectWrongNumTermsError(Parse *pParse, Select *p){
+  if( p->selFlags & SF_Values ){
+    sqlite3ErrorMsg(pParse, "all VALUES must have the same number of terms");
+  }else{
+    sqlite3ErrorMsg(pParse, "SELECTs to the left and right of %s"
+      " do not have the same number of result columns", selectOpName(p->op));
+  }
+}
+
+/*
+** Handle the special case of a compound-select that originates from a
+** VALUES clause.  By handling this as a special case, we avoid deep
+** recursion, and thus do not need to enforce the SQLITE_LIMIT_COMPOUND_SELECT
+** on a VALUES clause.
+**
+** Because the Select object originates from a VALUES clause:
+**   (1) It has no LIMIT or OFFSET
+**   (2) All terms are UNION ALL
+**   (3) There is no ORDER BY clause
+*/
+static int multiSelectValues(
+  Parse *pParse,        /* Parsing context */
+  Select *p,            /* The right-most of SELECTs to be coded */
+  SelectDest *pDest     /* What to do with query results */
+){
+  Select *pPrior;
+  int nExpr = p->pEList->nExpr;
+  int nRow = 1;
+  int rc = 0;
+  assert( p->pNext==0 );
+  assert( p->selFlags & SF_AllValues );
+  do{
+    assert( p->selFlags & SF_Values );
+    assert( p->op==TK_ALL || (p->op==TK_SELECT && p->pPrior==0) );
+    assert( p->pLimit==0 );
+    assert( p->pOffset==0 );
+    if( p->pEList->nExpr!=nExpr ){
+      selectWrongNumTermsError(pParse, p);
+      return 1;
+    }
+    if( p->pPrior==0 ) break;
+    assert( p->pPrior->pNext==p );
+    p = p->pPrior;
+    nRow++;
+  }while(1);
+  while( p ){
+    pPrior = p->pPrior;
+    p->pPrior = 0;
+    rc = sqlite3Select(pParse, p, pDest);
+    p->pPrior = pPrior;
+    if( rc ) break;
+    p->nSelectRow = nRow;
+    p = p->pNext;
+  }
+  return rc;
+}
+
+/*
+** This routine is called to process a compound query form from
+** two or more separate queries using UNION, UNION ALL, EXCEPT, or
+** INTERSECT
+**
+** "p" points to the right-most of the two queries.  the query on the
+** left is p->pPrior.  The left query could also be a compound query
+** in which case this routine will be called recursively. 
+**
+** The results of the total query are to be written into a destination
+** of type eDest with parameter iParm.
+**
+** Example 1:  Consider a three-way compound SQL statement.
+**
+**     SELECT a FROM t1 UNION SELECT b FROM t2 UNION SELECT c FROM t3
+**
+** This statement is parsed up as follows:
+**
+**     SELECT c FROM t3
+**      |
+**      `----->  SELECT b FROM t2
+**                |
+**                `------>  SELECT a FROM t1
+**
+** The arrows in the diagram above represent the Select.pPrior pointer.
+** So if this routine is called with p equal to the t3 query, then
+** pPrior will be the t2 query.  p->op will be TK_UNION in this case.
+**
+** Notice that because of the way SQLite parses compound SELECTs, the
+** individual selects always group from left to right.
+*/
+static int multiSelect(
+  Parse *pParse,        /* Parsing context */
+  Select *p,            /* The right-most of SELECTs to be coded */
+  SelectDest *pDest     /* What to do with query results */
+){
+  int rc = SQLITE_OK;   /* Success code from a subroutine */
+  Select *pPrior;       /* Another SELECT immediately to our left */
+  Vdbe *v;              /* Generate code to this VDBE */
+  SelectDest dest;      /* Alternative data destination */
+  Select *pDelete = 0;  /* Chain of simple selects to delete */
+  sqlite3 *db;          /* Database connection */
+#ifndef SQLITE_OMIT_EXPLAIN
+  int iSub1 = 0;        /* EQP id of left-hand query */
+  int iSub2 = 0;        /* EQP id of right-hand query */
+#endif
+
+  /* Make sure there is no ORDER BY or LIMIT clause on prior SELECTs.  Only
+  ** the last (right-most) SELECT in the series may have an ORDER BY or LIMIT.
+  */
+  assert( p && p->pPrior );  /* Calling function guarantees this much */
+  assert( (p->selFlags & SF_Recursive)==0 || p->op==TK_ALL || p->op==TK_UNION );
+  db = pParse->db;
+  pPrior = p->pPrior;
+  dest = *pDest;
+  if( pPrior->pOrderBy ){
+    sqlite3ErrorMsg(pParse,"ORDER BY clause should come after %s not before",
+      selectOpName(p->op));
+    rc = 1;
+    goto multi_select_end;
+  }
+  if( pPrior->pLimit ){
+    sqlite3ErrorMsg(pParse,"LIMIT clause should come after %s not before",
+      selectOpName(p->op));
+    rc = 1;
+    goto multi_select_end;
+  }
+
+  v = sqlite3GetVdbe(pParse);
+  assert( v!=0 );  /* The VDBE already created by calling function */
+
+  /* Create the destination temporary table if necessary
+  */
+  if( dest.eDest==SRT_EphemTab ){
+    assert( p->pEList );
+    sqlite3VdbeAddOp2(v, OP_OpenEphemeral, dest.iSDParm, p->pEList->nExpr);
+    sqlite3VdbeChangeP5(v, BTREE_UNORDERED);
+    dest.eDest = SRT_Table;
+  }
+
+  /* Special handling for a compound-select that originates as a VALUES clause.
+  */
+  if( p->selFlags & SF_AllValues ){
+    rc = multiSelectValues(pParse, p, &dest);
+    goto multi_select_end;
+  }
+
+  /* Make sure all SELECTs in the statement have the same number of elements
+  ** in their result sets.
+  */
+  assert( p->pEList && pPrior->pEList );
+  if( p->pEList->nExpr!=pPrior->pEList->nExpr ){
+    selectWrongNumTermsError(pParse, p);
+    rc = 1;
+    goto multi_select_end;
+  }
+
+#ifndef SQLITE_OMIT_CTE
+  if( p->selFlags & SF_Recursive ){
+    generateWithRecursiveQuery(pParse, p, &dest);
+  }else
+#endif
+
+  /* Compound SELECTs that have an ORDER BY clause are handled separately.
+  */
+  if( p->pOrderBy ){
+    return multiSelectOrderBy(pParse, p, pDest);
+  }else
+
+  /* Generate code for the left and right SELECT statements.
+  */
+  switch( p->op ){
+    case TK_ALL: {
+      int addr = 0;
+      int nLimit;
+      assert( !pPrior->pLimit );
+      pPrior->iLimit = p->iLimit;
+      pPrior->iOffset = p->iOffset;
+      pPrior->pLimit = p->pLimit;
+      pPrior->pOffset = p->pOffset;
+      explainSetInteger(iSub1, pParse->iNextSelectId);
+      rc = sqlite3Select(pParse, pPrior, &dest);
+      p->pLimit = 0;
+      p->pOffset = 0;
+      if( rc ){
+        goto multi_select_end;
+      }
+      p->pPrior = 0;
+      p->iLimit = pPrior->iLimit;
+      p->iOffset = pPrior->iOffset;
+      if( p->iLimit ){
+        addr = sqlite3VdbeAddOp1(v, OP_IfZero, p->iLimit); VdbeCoverage(v);
+        VdbeComment((v, "Jump ahead if LIMIT reached"));
+      }
+      explainSetInteger(iSub2, pParse->iNextSelectId);
+      rc = sqlite3Select(pParse, p, &dest);
+      testcase( rc!=SQLITE_OK );
+      pDelete = p->pPrior;
+      p->pPrior = pPrior;
+      p->nSelectRow += pPrior->nSelectRow;
+      if( pPrior->pLimit
+       && sqlite3ExprIsInteger(pPrior->pLimit, &nLimit)
+       && nLimit>0 && p->nSelectRow > (u64)nLimit 
+      ){
+        p->nSelectRow = nLimit;
+      }
+      if( addr ){
+        sqlite3VdbeJumpHere(v, addr);
+      }
+      break;
+    }
+    case TK_EXCEPT:
+    case TK_UNION: {
+      int unionTab;    /* Cursor number of the temporary table holding result */
+      u8 op = 0;       /* One of the SRT_ operations to apply to self */
+      int priorOp;     /* The SRT_ operation to apply to prior selects */
+      Expr *pLimit, *pOffset; /* Saved values of p->nLimit and p->nOffset */
+      int addr;
+      SelectDest uniondest;
+
+      testcase( p->op==TK_EXCEPT );
+      testcase( p->op==TK_UNION );
+      priorOp = SRT_Union;
+      if( dest.eDest==priorOp ){
+        /* We can reuse a temporary table generated by a SELECT to our
+        ** right.
+        */
+        assert( p->pLimit==0 );      /* Not allowed on leftward elements */
+        assert( p->pOffset==0 );     /* Not allowed on leftward elements */
+        unionTab = dest.iSDParm;
+      }else{
+        /* We will need to create our own temporary table to hold the
+        ** intermediate results.
+        */
+        unionTab = pParse->nTab++;
+        assert( p->pOrderBy==0 );
+        addr = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, unionTab, 0);
+        assert( p->addrOpenEphm[0] == -1 );
+        p->addrOpenEphm[0] = addr;
+        findRightmost(p)->selFlags |= SF_UsesEphemeral;
+        assert( p->pEList );
+      }
+
+      /* Code the SELECT statements to our left
+      */
+      assert( !pPrior->pOrderBy );
+      sqlite3SelectDestInit(&uniondest, priorOp, unionTab);
+      explainSetInteger(iSub1, pParse->iNextSelectId);
+      rc = sqlite3Select(pParse, pPrior, &uniondest);
+      if( rc ){
+        goto multi_select_end;
+      }
+
+      /* Code the current SELECT statement
+      */
+      if( p->op==TK_EXCEPT ){
+        op = SRT_Except;
+      }else{
+        assert( p->op==TK_UNION );
+        op = SRT_Union;
+      }
+      p->pPrior = 0;
+      pLimit = p->pLimit;
+      p->pLimit = 0;
+      pOffset = p->pOffset;
+      p->pOffset = 0;
+      uniondest.eDest = op;
+      explainSetInteger(iSub2, pParse->iNextSelectId);
+      rc = sqlite3Select(pParse, p, &uniondest);
+      testcase( rc!=SQLITE_OK );
+      /* Query flattening in sqlite3Select() might refill p->pOrderBy.
+      ** Be sure to delete p->pOrderBy, therefore, to avoid a memory leak. */
+      sqlite3ExprListDelete(db, p->pOrderBy);
+      pDelete = p->pPrior;
+      p->pPrior = pPrior;
+      p->pOrderBy = 0;
+      if( p->op==TK_UNION ) p->nSelectRow += pPrior->nSelectRow;
+      sqlite3ExprDelete(db, p->pLimit);
+      p->pLimit = pLimit;
+      p->pOffset = pOffset;
+      p->iLimit = 0;
+      p->iOffset = 0;
+
+      /* Convert the data in the temporary table into whatever form
+      ** it is that we currently need.
+      */
+      assert( unionTab==dest.iSDParm || dest.eDest!=priorOp );
+      if( dest.eDest!=priorOp ){
+        int iCont, iBreak, iStart;
+        assert( p->pEList );
+        if( dest.eDest==SRT_Output ){
+          Select *pFirst = p;
+          while( pFirst->pPrior ) pFirst = pFirst->pPrior;
+          generateColumnNames(pParse, 0, pFirst->pEList);
+        }
+        iBreak = sqlite3VdbeMakeLabel(v);
+        iCont = sqlite3VdbeMakeLabel(v);
+        computeLimitRegisters(pParse, p, iBreak);
+        sqlite3VdbeAddOp2(v, OP_Rewind, unionTab, iBreak); VdbeCoverage(v);
+        iStart = sqlite3VdbeCurrentAddr(v);
+        selectInnerLoop(pParse, p, p->pEList, unionTab,
+                        0, 0, &dest, iCont, iBreak);
+        sqlite3VdbeResolveLabel(v, iCont);
+        sqlite3VdbeAddOp2(v, OP_Next, unionTab, iStart); VdbeCoverage(v);
+        sqlite3VdbeResolveLabel(v, iBreak);
+        sqlite3VdbeAddOp2(v, OP_Close, unionTab, 0);
+      }
+      break;
+    }
+    default: assert( p->op==TK_INTERSECT ); {
+      int tab1, tab2;
+      int iCont, iBreak, iStart;
+      Expr *pLimit, *pOffset;
+      int addr;
+      SelectDest intersectdest;
+      int r1;
+
+      /* INTERSECT is different from the others since it requires
+      ** two temporary tables.  Hence it has its own case.  Begin
+      ** by allocating the tables we will need.
+      */
+      tab1 = pParse->nTab++;
+      tab2 = pParse->nTab++;
+      assert( p->pOrderBy==0 );
+
+      addr = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, tab1, 0);
+      assert( p->addrOpenEphm[0] == -1 );
+      p->addrOpenEphm[0] = addr;
+      findRightmost(p)->selFlags |= SF_UsesEphemeral;
+      assert( p->pEList );
+
+      /* Code the SELECTs to our left into temporary table "tab1".
+      */
+      sqlite3SelectDestInit(&intersectdest, SRT_Union, tab1);
+      explainSetInteger(iSub1, pParse->iNextSelectId);
+      rc = sqlite3Select(pParse, pPrior, &intersectdest);
+      if( rc ){
+        goto multi_select_end;
+      }
+
+      /* Code the current SELECT into temporary table "tab2"
+      */
+      addr = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, tab2, 0);
+      assert( p->addrOpenEphm[1] == -1 );
+      p->addrOpenEphm[1] = addr;
+      p->pPrior = 0;
+      pLimit = p->pLimit;
+      p->pLimit = 0;
+      pOffset = p->pOffset;
+      p->pOffset = 0;
+      intersectdest.iSDParm = tab2;
+      explainSetInteger(iSub2, pParse->iNextSelectId);
+      rc = sqlite3Select(pParse, p, &intersectdest);
+      testcase( rc!=SQLITE_OK );
+      pDelete = p->pPrior;
+      p->pPrior = pPrior;
+      if( p->nSelectRow>pPrior->nSelectRow ) p->nSelectRow = pPrior->nSelectRow;
+      sqlite3ExprDelete(db, p->pLimit);
+      p->pLimit = pLimit;
+      p->pOffset = pOffset;
+
+      /* Generate code to take the intersection of the two temporary
+      ** tables.
+      */
+      assert( p->pEList );
+      if( dest.eDest==SRT_Output ){
+        Select *pFirst = p;
+        while( pFirst->pPrior ) pFirst = pFirst->pPrior;
+        generateColumnNames(pParse, 0, pFirst->pEList);
+      }
+      iBreak = sqlite3VdbeMakeLabel(v);
+      iCont = sqlite3VdbeMakeLabel(v);
+      computeLimitRegisters(pParse, p, iBreak);
+      sqlite3VdbeAddOp2(v, OP_Rewind, tab1, iBreak); VdbeCoverage(v);
+      r1 = sqlite3GetTempReg(pParse);
+      iStart = sqlite3VdbeAddOp2(v, OP_RowKey, tab1, r1);
+      sqlite3VdbeAddOp4Int(v, OP_NotFound, tab2, iCont, r1, 0); VdbeCoverage(v);
+      sqlite3ReleaseTempReg(pParse, r1);
+      selectInnerLoop(pParse, p, p->pEList, tab1,
+                      0, 0, &dest, iCont, iBreak);
+      sqlite3VdbeResolveLabel(v, iCont);
+      sqlite3VdbeAddOp2(v, OP_Next, tab1, iStart); VdbeCoverage(v);
+      sqlite3VdbeResolveLabel(v, iBreak);
+      sqlite3VdbeAddOp2(v, OP_Close, tab2, 0);
+      sqlite3VdbeAddOp2(v, OP_Close, tab1, 0);
+      break;
+    }
+  }
+
+  explainComposite(pParse, p->op, iSub1, iSub2, p->op!=TK_ALL);
+
+  /* Compute collating sequences used by 
+  ** temporary tables needed to implement the compound select.
+  ** Attach the KeyInfo structure to all temporary tables.
+  **
+  ** This section is run by the right-most SELECT statement only.
+  ** SELECT statements to the left always skip this part.  The right-most
+  ** SELECT might also skip this part if it has no ORDER BY clause and
+  ** no temp tables are required.
+  */
+  if( p->selFlags & SF_UsesEphemeral ){
+    int i;                        /* Loop counter */
+    KeyInfo *pKeyInfo;            /* Collating sequence for the result set */
+    Select *pLoop;                /* For looping through SELECT statements */
+    CollSeq **apColl;             /* For looping through pKeyInfo->aColl[] */
+    int nCol;                     /* Number of columns in result set */
+
+    assert( p->pNext==0 );
+    nCol = p->pEList->nExpr;
+    pKeyInfo = sqlite3KeyInfoAlloc(db, nCol, 1);
+    if( !pKeyInfo ){
+      rc = SQLITE_NOMEM;
+      goto multi_select_end;
+    }
+    for(i=0, apColl=pKeyInfo->aColl; i<nCol; i++, apColl++){
+      *apColl = multiSelectCollSeq(pParse, p, i);
+      if( 0==*apColl ){
+        *apColl = db->pDfltColl;
+      }
+    }
+
+    for(pLoop=p; pLoop; pLoop=pLoop->pPrior){
+      for(i=0; i<2; i++){
+        int addr = pLoop->addrOpenEphm[i];
+        if( addr<0 ){
+          /* If [0] is unused then [1] is also unused.  So we can
+          ** always safely abort as soon as the first unused slot is found */
+          assert( pLoop->addrOpenEphm[1]<0 );
+          break;
+        }
+        sqlite3VdbeChangeP2(v, addr, nCol);
+        sqlite3VdbeChangeP4(v, addr, (char*)sqlite3KeyInfoRef(pKeyInfo),
+                            P4_KEYINFO);
+        pLoop->addrOpenEphm[i] = -1;
+      }
+    }
+    sqlite3KeyInfoUnref(pKeyInfo);
+  }
+
+multi_select_end:
+  pDest->iSdst = dest.iSdst;
+  pDest->nSdst = dest.nSdst;
+  sqlite3SelectDelete(db, pDelete);
+  return rc;
+}
+#endif /* SQLITE_OMIT_COMPOUND_SELECT */
+
+/*
+** Code an output subroutine for a coroutine implementation of a
+** SELECT statment.
+**
+** The data to be output is contained in pIn->iSdst.  There are
+** pIn->nSdst columns to be output.  pDest is where the output should
+** be sent.
+**
+** regReturn is the number of the register holding the subroutine
+** return address.
+**
+** If regPrev>0 then it is the first register in a vector that
+** records the previous output.  mem[regPrev] is a flag that is false
+** if there has been no previous output.  If regPrev>0 then code is
+** generated to suppress duplicates.  pKeyInfo is used for comparing
+** keys.
+**
+** If the LIMIT found in p->iLimit is reached, jump immediately to
+** iBreak.
+*/
+static int generateOutputSubroutine(
+  Parse *pParse,          /* Parsing context */
+  Select *p,              /* The SELECT statement */
+  SelectDest *pIn,        /* Coroutine supplying data */
+  SelectDest *pDest,      /* Where to send the data */
+  int regReturn,          /* The return address register */
+  int regPrev,            /* Previous result register.  No uniqueness if 0 */
+  KeyInfo *pKeyInfo,      /* For comparing with previous entry */
+  int iBreak              /* Jump here if we hit the LIMIT */
+){
+  Vdbe *v = pParse->pVdbe;
+  int iContinue;
+  int addr;
+
+  addr = sqlite3VdbeCurrentAddr(v);
+  iContinue = sqlite3VdbeMakeLabel(v);
+
+  /* Suppress duplicates for UNION, EXCEPT, and INTERSECT 
+  */
+  if( regPrev ){
+    int j1, j2;
+    j1 = sqlite3VdbeAddOp1(v, OP_IfNot, regPrev); VdbeCoverage(v);
+    j2 = sqlite3VdbeAddOp4(v, OP_Compare, pIn->iSdst, regPrev+1, pIn->nSdst,
+                              (char*)sqlite3KeyInfoRef(pKeyInfo), P4_KEYINFO);
+    sqlite3VdbeAddOp3(v, OP_Jump, j2+2, iContinue, j2+2); VdbeCoverage(v);
+    sqlite3VdbeJumpHere(v, j1);
+    sqlite3VdbeAddOp3(v, OP_Copy, pIn->iSdst, regPrev+1, pIn->nSdst-1);
+    sqlite3VdbeAddOp2(v, OP_Integer, 1, regPrev);
+  }
+  if( pParse->db->mallocFailed ) return 0;
+
+  /* Suppress the first OFFSET entries if there is an OFFSET clause
+  */
+  codeOffset(v, p->iOffset, iContinue);
+
+  switch( pDest->eDest ){
+    /* Store the result as data using a unique key.
+    */
+    case SRT_Table:
+    case SRT_EphemTab: {
+      int r1 = sqlite3GetTempReg(pParse);
+      int r2 = sqlite3GetTempReg(pParse);
+      testcase( pDest->eDest==SRT_Table );
+      testcase( pDest->eDest==SRT_EphemTab );
+      sqlite3VdbeAddOp3(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1);
+      sqlite3VdbeAddOp2(v, OP_NewRowid, pDest->iSDParm, r2);
+      sqlite3VdbeAddOp3(v, OP_Insert, pDest->iSDParm, r1, r2);
+      sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
+      sqlite3ReleaseTempReg(pParse, r2);
+      sqlite3ReleaseTempReg(pParse, r1);
+      break;
+    }
+
+#ifndef SQLITE_OMIT_SUBQUERY
+    /* If we are creating a set for an "expr IN (SELECT ...)" construct,
+    ** then there should be a single item on the stack.  Write this
+    ** item into the set table with bogus data.
+    */
+    case SRT_Set: {
+      int r1;
+      assert( pIn->nSdst==1 );
+      pDest->affSdst = 
+         sqlite3CompareAffinity(p->pEList->a[0].pExpr, pDest->affSdst);
+      r1 = sqlite3GetTempReg(pParse);
+      sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, 1, r1, &pDest->affSdst,1);
+      sqlite3ExprCacheAffinityChange(pParse, pIn->iSdst, 1);
+      sqlite3VdbeAddOp2(v, OP_IdxInsert, pDest->iSDParm, r1);
+      sqlite3ReleaseTempReg(pParse, r1);
+      break;
+    }
+
+#if 0  /* Never occurs on an ORDER BY query */
+    /* If any row exist in the result set, record that fact and abort.
+    */
+    case SRT_Exists: {
+      sqlite3VdbeAddOp2(v, OP_Integer, 1, pDest->iSDParm);
+      /* The LIMIT clause will terminate the loop for us */
+      break;
+    }
+#endif
+
+    /* If this is a scalar select that is part of an expression, then
+    ** store the results in the appropriate memory cell and break out
+    ** of the scan loop.
+    */
+    case SRT_Mem: {
+      assert( pIn->nSdst==1 );
+      sqlite3ExprCodeMove(pParse, pIn->iSdst, pDest->iSDParm, 1);
+      /* The LIMIT clause will jump out of the loop for us */
+      break;
+    }
+#endif /* #ifndef SQLITE_OMIT_SUBQUERY */
+
+    /* The results are stored in a sequence of registers
+    ** starting at pDest->iSdst.  Then the co-routine yields.
+    */
+    case SRT_Coroutine: {
+      if( pDest->iSdst==0 ){
+        pDest->iSdst = sqlite3GetTempRange(pParse, pIn->nSdst);
+        pDest->nSdst = pIn->nSdst;
+      }
+      sqlite3ExprCodeMove(pParse, pIn->iSdst, pDest->iSdst, pDest->nSdst);
+      sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm);
+      break;
+    }
+
+    /* If none of the above, then the result destination must be
+    ** SRT_Output.  This routine is never called with any other
+    ** destination other than the ones handled above or SRT_Output.
+    **
+    ** For SRT_Output, results are stored in a sequence of registers.  
+    ** Then the OP_ResultRow opcode is used to cause sqlite3_step() to
+    ** return the next row of result.
+    */
+    default: {
+      assert( pDest->eDest==SRT_Output );
+      sqlite3VdbeAddOp2(v, OP_ResultRow, pIn->iSdst, pIn->nSdst);
+      sqlite3ExprCacheAffinityChange(pParse, pIn->iSdst, pIn->nSdst);
+      break;
+    }
+  }
+
+  /* Jump to the end of the loop if the LIMIT is reached.
+  */
+  if( p->iLimit ){
+    sqlite3VdbeAddOp3(v, OP_IfZero, p->iLimit, iBreak, -1); VdbeCoverage(v);
+  }
+
+  /* Generate the subroutine return
+  */
+  sqlite3VdbeResolveLabel(v, iContinue);
+  sqlite3VdbeAddOp1(v, OP_Return, regReturn);
+
+  return addr;
+}
+
+/*
+** Alternative compound select code generator for cases when there
+** is an ORDER BY clause.
+**
+** We assume a query of the following form:
+**
+**      <selectA>  <operator>  <selectB>  ORDER BY <orderbylist>
+**
+** <operator> is one of UNION ALL, UNION, EXCEPT, or INTERSECT.  The idea
+** is to code both <selectA> and <selectB> with the ORDER BY clause as
+** co-routines.  Then run the co-routines in parallel and merge the results
+** into the output.  In addition to the two coroutines (called selectA and
+** selectB) there are 7 subroutines:
+**
+**    outA:    Move the output of the selectA coroutine into the output
+**             of the compound query.
+**
+**    outB:    Move the output of the selectB coroutine into the output
+**             of the compound query.  (Only generated for UNION and
+**             UNION ALL.  EXCEPT and INSERTSECT never output a row that
+**             appears only in B.)
+**
+**    AltB:    Called when there is data from both coroutines and A<B.
+**
+**    AeqB:    Called when there is data from both coroutines and A==B.
+**
+**    AgtB:    Called when there is data from both coroutines and A>B.
+**
+**    EofA:    Called when data is exhausted from selectA.
+**
+**    EofB:    Called when data is exhausted from selectB.
+**
+** The implementation of the latter five subroutines depend on which 
+** <operator> is used:
+**
+**
+**             UNION ALL         UNION            EXCEPT          INTERSECT
+**          -------------  -----------------  --------------  -----------------
+**   AltB:   outA, nextA      outA, nextA       outA, nextA         nextA
+**
+**   AeqB:   outA, nextA         nextA             nextA         outA, nextA
+**
+**   AgtB:   outB, nextB      outB, nextB          nextB            nextB
+**
+**   EofA:   outB, nextB      outB, nextB          halt             halt
+**
+**   EofB:   outA, nextA      outA, nextA       outA, nextA         halt
+**
+** In the AltB, AeqB, and AgtB subroutines, an EOF on A following nextA
+** causes an immediate jump to EofA and an EOF on B following nextB causes
+** an immediate jump to EofB.  Within EofA and EofB, and EOF on entry or
+** following nextX causes a jump to the end of the select processing.
+**
+** Duplicate removal in the UNION, EXCEPT, and INTERSECT cases is handled
+** within the output subroutine.  The regPrev register set holds the previously
+** output value.  A comparison is made against this value and the output
+** is skipped if the next results would be the same as the previous.
+**
+** The implementation plan is to implement the two coroutines and seven
+** subroutines first, then put the control logic at the bottom.  Like this:
+**
+**          goto Init
+**     coA: coroutine for left query (A)
+**     coB: coroutine for right query (B)
+**    outA: output one row of A
+**    outB: output one row of B (UNION and UNION ALL only)
+**    EofA: ...
+**    EofB: ...
+**    AltB: ...
+**    AeqB: ...
+**    AgtB: ...
+**    Init: initialize coroutine registers
+**          yield coA
+**          if eof(A) goto EofA
+**          yield coB
+**          if eof(B) goto EofB
+**    Cmpr: Compare A, B
+**          Jump AltB, AeqB, AgtB
+**     End: ...
+**
+** We call AltB, AeqB, AgtB, EofA, and EofB "subroutines" but they are not
+** actually called using Gosub and they do not Return.  EofA and EofB loop
+** until all data is exhausted then jump to the "end" labe.  AltB, AeqB,
+** and AgtB jump to either L2 or to one of EofA or EofB.
+*/
+#ifndef SQLITE_OMIT_COMPOUND_SELECT
+static int multiSelectOrderBy(
+  Parse *pParse,        /* Parsing context */
+  Select *p,            /* The right-most of SELECTs to be coded */
+  SelectDest *pDest     /* What to do with query results */
+){
+  int i, j;             /* Loop counters */
+  Select *pPrior;       /* Another SELECT immediately to our left */
+  Vdbe *v;              /* Generate code to this VDBE */
+  SelectDest destA;     /* Destination for coroutine A */
+  SelectDest destB;     /* Destination for coroutine B */
+  int regAddrA;         /* Address register for select-A coroutine */
+  int regAddrB;         /* Address register for select-B coroutine */
+  int addrSelectA;      /* Address of the select-A coroutine */
+  int addrSelectB;      /* Address of the select-B coroutine */
+  int regOutA;          /* Address register for the output-A subroutine */
+  int regOutB;          /* Address register for the output-B subroutine */
+  int addrOutA;         /* Address of the output-A subroutine */
+  int addrOutB = 0;     /* Address of the output-B subroutine */
+  int addrEofA;         /* Address of the select-A-exhausted subroutine */
+  int addrEofA_noB;     /* Alternate addrEofA if B is uninitialized */
+  int addrEofB;         /* Address of the select-B-exhausted subroutine */
+  int addrAltB;         /* Address of the A<B subroutine */
+  int addrAeqB;         /* Address of the A==B subroutine */
+  int addrAgtB;         /* Address of the A>B subroutine */
+  int regLimitA;        /* Limit register for select-A */
+  int regLimitB;        /* Limit register for select-A */
+  int regPrev;          /* A range of registers to hold previous output */
+  int savedLimit;       /* Saved value of p->iLimit */
+  int savedOffset;      /* Saved value of p->iOffset */
+  int labelCmpr;        /* Label for the start of the merge algorithm */
+  int labelEnd;         /* Label for the end of the overall SELECT stmt */
+  int j1;               /* Jump instructions that get retargetted */
+  int op;               /* One of TK_ALL, TK_UNION, TK_EXCEPT, TK_INTERSECT */
+  KeyInfo *pKeyDup = 0; /* Comparison information for duplicate removal */
+  KeyInfo *pKeyMerge;   /* Comparison information for merging rows */
+  sqlite3 *db;          /* Database connection */
+  ExprList *pOrderBy;   /* The ORDER BY clause */
+  int nOrderBy;         /* Number of terms in the ORDER BY clause */
+  int *aPermute;        /* Mapping from ORDER BY terms to result set columns */
+#ifndef SQLITE_OMIT_EXPLAIN
+  int iSub1;            /* EQP id of left-hand query */
+  int iSub2;            /* EQP id of right-hand query */
+#endif
+
+  assert( p->pOrderBy!=0 );
+  assert( pKeyDup==0 ); /* "Managed" code needs this.  Ticket #3382. */
+  db = pParse->db;
+  v = pParse->pVdbe;
+  assert( v!=0 );       /* Already thrown the error if VDBE alloc failed */
+  labelEnd = sqlite3VdbeMakeLabel(v);
+  labelCmpr = sqlite3VdbeMakeLabel(v);
+
+
+  /* Patch up the ORDER BY clause
+  */
+  op = p->op;  
+  pPrior = p->pPrior;
+  assert( pPrior->pOrderBy==0 );
+  pOrderBy = p->pOrderBy;
+  assert( pOrderBy );
+  nOrderBy = pOrderBy->nExpr;
+
+  /* For operators other than UNION ALL we have to make sure that
+  ** the ORDER BY clause covers every term of the result set.  Add
+  ** terms to the ORDER BY clause as necessary.
+  */
+  if( op!=TK_ALL ){
+    for(i=1; db->mallocFailed==0 && i<=p->pEList->nExpr; i++){
+      struct ExprList_item *pItem;
+      for(j=0, pItem=pOrderBy->a; j<nOrderBy; j++, pItem++){
+        assert( pItem->u.x.iOrderByCol>0 );
+        if( pItem->u.x.iOrderByCol==i ) break;
+      }
+      if( j==nOrderBy ){
+        Expr *pNew = sqlite3Expr(db, TK_INTEGER, 0);
+        if( pNew==0 ) return SQLITE_NOMEM;
+        pNew->flags |= EP_IntValue;
+        pNew->u.iValue = i;
+        pOrderBy = sqlite3ExprListAppend(pParse, pOrderBy, pNew);
+        if( pOrderBy ) pOrderBy->a[nOrderBy++].u.x.iOrderByCol = (u16)i;
+      }
+    }
+  }
+
+  /* Compute the comparison permutation and keyinfo that is used with
+  ** the permutation used to determine if the next
+  ** row of results comes from selectA or selectB.  Also add explicit
+  ** collations to the ORDER BY clause terms so that when the subqueries
+  ** to the right and the left are evaluated, they use the correct
+  ** collation.
+  */
+  aPermute = sqlite3DbMallocRaw(db, sizeof(int)*nOrderBy);
+  if( aPermute ){
+    struct ExprList_item *pItem;
+    for(i=0, pItem=pOrderBy->a; i<nOrderBy; i++, pItem++){
+      assert( pItem->u.x.iOrderByCol>0
+          && pItem->u.x.iOrderByCol<=p->pEList->nExpr );
+      aPermute[i] = pItem->u.x.iOrderByCol - 1;
+    }
+    pKeyMerge = multiSelectOrderByKeyInfo(pParse, p, 1);
+  }else{
+    pKeyMerge = 0;
+  }
+
+  /* Reattach the ORDER BY clause to the query.
+  */
+  p->pOrderBy = pOrderBy;
+  pPrior->pOrderBy = sqlite3ExprListDup(pParse->db, pOrderBy, 0);
+
+  /* Allocate a range of temporary registers and the KeyInfo needed
+  ** for the logic that removes duplicate result rows when the
+  ** operator is UNION, EXCEPT, or INTERSECT (but not UNION ALL).
+  */
+  if( op==TK_ALL ){
+    regPrev = 0;
+  }else{
+    int nExpr = p->pEList->nExpr;
+    assert( nOrderBy>=nExpr || db->mallocFailed );
+    regPrev = pParse->nMem+1;
+    pParse->nMem += nExpr+1;
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, regPrev);
+    pKeyDup = sqlite3KeyInfoAlloc(db, nExpr, 1);
+    if( pKeyDup ){
+      assert( sqlite3KeyInfoIsWriteable(pKeyDup) );
+      for(i=0; i<nExpr; i++){
+        pKeyDup->aColl[i] = multiSelectCollSeq(pParse, p, i);
+        pKeyDup->aSortOrder[i] = 0;
+      }
+    }
+  }
+ 
+  /* Separate the left and the right query from one another
+  */
+  p->pPrior = 0;
+  pPrior->pNext = 0;
+  sqlite3ResolveOrderGroupBy(pParse, p, p->pOrderBy, "ORDER");
+  if( pPrior->pPrior==0 ){
+    sqlite3ResolveOrderGroupBy(pParse, pPrior, pPrior->pOrderBy, "ORDER");
+  }
+
+  /* Compute the limit registers */
+  computeLimitRegisters(pParse, p, labelEnd);
+  if( p->iLimit && op==TK_ALL ){
+    regLimitA = ++pParse->nMem;
+    regLimitB = ++pParse->nMem;
+    sqlite3VdbeAddOp2(v, OP_Copy, p->iOffset ? p->iOffset+1 : p->iLimit,
+                                  regLimitA);
+    sqlite3VdbeAddOp2(v, OP_Copy, regLimitA, regLimitB);
+  }else{
+    regLimitA = regLimitB = 0;
+  }
+  sqlite3ExprDelete(db, p->pLimit);
+  p->pLimit = 0;
+  sqlite3ExprDelete(db, p->pOffset);
+  p->pOffset = 0;
+
+  regAddrA = ++pParse->nMem;
+  regAddrB = ++pParse->nMem;
+  regOutA = ++pParse->nMem;
+  regOutB = ++pParse->nMem;
+  sqlite3SelectDestInit(&destA, SRT_Coroutine, regAddrA);
+  sqlite3SelectDestInit(&destB, SRT_Coroutine, regAddrB);
+
+  /* Generate a coroutine to evaluate the SELECT statement to the
+  ** left of the compound operator - the "A" select.
+  */
+  addrSelectA = sqlite3VdbeCurrentAddr(v) + 1;
+  j1 = sqlite3VdbeAddOp3(v, OP_InitCoroutine, regAddrA, 0, addrSelectA);
+  VdbeComment((v, "left SELECT"));
+  pPrior->iLimit = regLimitA;
+  explainSetInteger(iSub1, pParse->iNextSelectId);
+  sqlite3Select(pParse, pPrior, &destA);
+  sqlite3VdbeAddOp1(v, OP_EndCoroutine, regAddrA);
+  sqlite3VdbeJumpHere(v, j1);
+
+  /* Generate a coroutine to evaluate the SELECT statement on 
+  ** the right - the "B" select
+  */
+  addrSelectB = sqlite3VdbeCurrentAddr(v) + 1;
+  j1 = sqlite3VdbeAddOp3(v, OP_InitCoroutine, regAddrB, 0, addrSelectB);
+  VdbeComment((v, "right SELECT"));
+  savedLimit = p->iLimit;
+  savedOffset = p->iOffset;
+  p->iLimit = regLimitB;
+  p->iOffset = 0;  
+  explainSetInteger(iSub2, pParse->iNextSelectId);
+  sqlite3Select(pParse, p, &destB);
+  p->iLimit = savedLimit;
+  p->iOffset = savedOffset;
+  sqlite3VdbeAddOp1(v, OP_EndCoroutine, regAddrB);
+
+  /* Generate a subroutine that outputs the current row of the A
+  ** select as the next output row of the compound select.
+  */
+  VdbeNoopComment((v, "Output routine for A"));
+  addrOutA = generateOutputSubroutine(pParse,
+                 p, &destA, pDest, regOutA,
+                 regPrev, pKeyDup, labelEnd);
+  
+  /* Generate a subroutine that outputs the current row of the B
+  ** select as the next output row of the compound select.
+  */
+  if( op==TK_ALL || op==TK_UNION ){
+    VdbeNoopComment((v, "Output routine for B"));
+    addrOutB = generateOutputSubroutine(pParse,
+                 p, &destB, pDest, regOutB,
+                 regPrev, pKeyDup, labelEnd);
+  }
+  sqlite3KeyInfoUnref(pKeyDup);
+
+  /* Generate a subroutine to run when the results from select A
+  ** are exhausted and only data in select B remains.
+  */
+  if( op==TK_EXCEPT || op==TK_INTERSECT ){
+    addrEofA_noB = addrEofA = labelEnd;
+  }else{  
+    VdbeNoopComment((v, "eof-A subroutine"));
+    addrEofA = sqlite3VdbeAddOp2(v, OP_Gosub, regOutB, addrOutB);
+    addrEofA_noB = sqlite3VdbeAddOp2(v, OP_Yield, regAddrB, labelEnd);
+                                     VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, addrEofA);
+    p->nSelectRow += pPrior->nSelectRow;
+  }
+
+  /* Generate a subroutine to run when the results from select B
+  ** are exhausted and only data in select A remains.
+  */
+  if( op==TK_INTERSECT ){
+    addrEofB = addrEofA;
+    if( p->nSelectRow > pPrior->nSelectRow ) p->nSelectRow = pPrior->nSelectRow;
+  }else{  
+    VdbeNoopComment((v, "eof-B subroutine"));
+    addrEofB = sqlite3VdbeAddOp2(v, OP_Gosub, regOutA, addrOutA);
+    sqlite3VdbeAddOp2(v, OP_Yield, regAddrA, labelEnd); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, addrEofB);
+  }
+
+  /* Generate code to handle the case of A<B
+  */
+  VdbeNoopComment((v, "A-lt-B subroutine"));
+  addrAltB = sqlite3VdbeAddOp2(v, OP_Gosub, regOutA, addrOutA);
+  sqlite3VdbeAddOp2(v, OP_Yield, regAddrA, addrEofA); VdbeCoverage(v);
+  sqlite3VdbeAddOp2(v, OP_Goto, 0, labelCmpr);
+
+  /* Generate code to handle the case of A==B
+  */
+  if( op==TK_ALL ){
+    addrAeqB = addrAltB;
+  }else if( op==TK_INTERSECT ){
+    addrAeqB = addrAltB;
+    addrAltB++;
+  }else{
+    VdbeNoopComment((v, "A-eq-B subroutine"));
+    addrAeqB =
+    sqlite3VdbeAddOp2(v, OP_Yield, regAddrA, addrEofA); VdbeCoverage(v);
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, labelCmpr);
+  }
+
+  /* Generate code to handle the case of A>B
+  */
+  VdbeNoopComment((v, "A-gt-B subroutine"));
+  addrAgtB = sqlite3VdbeCurrentAddr(v);
+  if( op==TK_ALL || op==TK_UNION ){
+    sqlite3VdbeAddOp2(v, OP_Gosub, regOutB, addrOutB);
+  }
+  sqlite3VdbeAddOp2(v, OP_Yield, regAddrB, addrEofB); VdbeCoverage(v);
+  sqlite3VdbeAddOp2(v, OP_Goto, 0, labelCmpr);
+
+  /* This code runs once to initialize everything.
+  */
+  sqlite3VdbeJumpHere(v, j1);
+  sqlite3VdbeAddOp2(v, OP_Yield, regAddrA, addrEofA_noB); VdbeCoverage(v);
+  sqlite3VdbeAddOp2(v, OP_Yield, regAddrB, addrEofB); VdbeCoverage(v);
+
+  /* Implement the main merge loop
+  */
+  sqlite3VdbeResolveLabel(v, labelCmpr);
+  sqlite3VdbeAddOp4(v, OP_Permutation, 0, 0, 0, (char*)aPermute, P4_INTARRAY);
+  sqlite3VdbeAddOp4(v, OP_Compare, destA.iSdst, destB.iSdst, nOrderBy,
+                         (char*)pKeyMerge, P4_KEYINFO);
+  sqlite3VdbeChangeP5(v, OPFLAG_PERMUTE);
+  sqlite3VdbeAddOp3(v, OP_Jump, addrAltB, addrAeqB, addrAgtB); VdbeCoverage(v);
+
+  /* Jump to the this point in order to terminate the query.
+  */
+  sqlite3VdbeResolveLabel(v, labelEnd);
+
+  /* Set the number of output columns
+  */
+  if( pDest->eDest==SRT_Output ){
+    Select *pFirst = pPrior;
+    while( pFirst->pPrior ) pFirst = pFirst->pPrior;
+    generateColumnNames(pParse, 0, pFirst->pEList);
+  }
+
+  /* Reassembly the compound query so that it will be freed correctly
+  ** by the calling function */
+  if( p->pPrior ){
+    sqlite3SelectDelete(db, p->pPrior);
+  }
+  p->pPrior = pPrior;
+  pPrior->pNext = p;
+
+  /*** TBD:  Insert subroutine calls to close cursors on incomplete
+  **** subqueries ****/
+  explainComposite(pParse, p->op, iSub1, iSub2, 0);
+  return SQLITE_OK;
+}
+#endif
+
+#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW)
+/* Forward Declarations */
+static void substExprList(sqlite3*, ExprList*, int, ExprList*);
+static void substSelect(sqlite3*, Select *, int, ExprList *);
+
+/*
+** Scan through the expression pExpr.  Replace every reference to
+** a column in table number iTable with a copy of the iColumn-th
+** entry in pEList.  (But leave references to the ROWID column 
+** unchanged.)
+**
+** This routine is part of the flattening procedure.  A subquery
+** whose result set is defined by pEList appears as entry in the
+** FROM clause of a SELECT such that the VDBE cursor assigned to that
+** FORM clause entry is iTable.  This routine make the necessary 
+** changes to pExpr so that it refers directly to the source table
+** of the subquery rather the result set of the subquery.
+*/
+static Expr *substExpr(
+  sqlite3 *db,        /* Report malloc errors to this connection */
+  Expr *pExpr,        /* Expr in which substitution occurs */
+  int iTable,         /* Table to be substituted */
+  ExprList *pEList    /* Substitute expressions */
+){
+  if( pExpr==0 ) return 0;
+  if( pExpr->op==TK_COLUMN && pExpr->iTable==iTable ){
+    if( pExpr->iColumn<0 ){
+      pExpr->op = TK_NULL;
+    }else{
+      Expr *pNew;
+      assert( pEList!=0 && pExpr->iColumn<pEList->nExpr );
+      assert( pExpr->pLeft==0 && pExpr->pRight==0 );
+      pNew = sqlite3ExprDup(db, pEList->a[pExpr->iColumn].pExpr, 0);
+      sqlite3ExprDelete(db, pExpr);
+      pExpr = pNew;
+    }
+  }else{
+    pExpr->pLeft = substExpr(db, pExpr->pLeft, iTable, pEList);
+    pExpr->pRight = substExpr(db, pExpr->pRight, iTable, pEList);
+    if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+      substSelect(db, pExpr->x.pSelect, iTable, pEList);
+    }else{
+      substExprList(db, pExpr->x.pList, iTable, pEList);
+    }
+  }
+  return pExpr;
+}
+static void substExprList(
+  sqlite3 *db,         /* Report malloc errors here */
+  ExprList *pList,     /* List to scan and in which to make substitutes */
+  int iTable,          /* Table to be substituted */
+  ExprList *pEList     /* Substitute values */
+){
+  int i;
+  if( pList==0 ) return;
+  for(i=0; i<pList->nExpr; i++){
+    pList->a[i].pExpr = substExpr(db, pList->a[i].pExpr, iTable, pEList);
+  }
+}
+static void substSelect(
+  sqlite3 *db,         /* Report malloc errors here */
+  Select *p,           /* SELECT statement in which to make substitutions */
+  int iTable,          /* Table to be replaced */
+  ExprList *pEList     /* Substitute values */
+){
+  SrcList *pSrc;
+  struct SrcList_item *pItem;
+  int i;
+  if( !p ) return;
+  substExprList(db, p->pEList, iTable, pEList);
+  substExprList(db, p->pGroupBy, iTable, pEList);
+  substExprList(db, p->pOrderBy, iTable, pEList);
+  p->pHaving = substExpr(db, p->pHaving, iTable, pEList);
+  p->pWhere = substExpr(db, p->pWhere, iTable, pEList);
+  substSelect(db, p->pPrior, iTable, pEList);
+  pSrc = p->pSrc;
+  assert( pSrc );  /* Even for (SELECT 1) we have: pSrc!=0 but pSrc->nSrc==0 */
+  if( ALWAYS(pSrc) ){
+    for(i=pSrc->nSrc, pItem=pSrc->a; i>0; i--, pItem++){
+      substSelect(db, pItem->pSelect, iTable, pEList);
+    }
+  }
+}
+#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */
+
+#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW)
+/*
+** This routine attempts to flatten subqueries as a performance optimization.
+** This routine returns 1 if it makes changes and 0 if no flattening occurs.
+**
+** To understand the concept of flattening, consider the following
+** query:
+**
+**     SELECT a FROM (SELECT x+y AS a FROM t1 WHERE z<100) WHERE a>5
+**
+** The default way of implementing this query is to execute the
+** subquery first and store the results in a temporary table, then
+** run the outer query on that temporary table.  This requires two
+** passes over the data.  Furthermore, because the temporary table
+** has no indices, the WHERE clause on the outer query cannot be
+** optimized.
+**
+** This routine attempts to rewrite queries such as the above into
+** a single flat select, like this:
+**
+**     SELECT x+y AS a FROM t1 WHERE z<100 AND a>5
+**
+** The code generated for this simplification gives the same result
+** but only has to scan the data once.  And because indices might 
+** exist on the table t1, a complete scan of the data might be
+** avoided.
+**
+** Flattening is only attempted if all of the following are true:
+**
+**   (1)  The subquery and the outer query do not both use aggregates.
+**
+**   (2)  The subquery is not an aggregate or the outer query is not a join.
+**
+**   (3)  The subquery is not the right operand of a left outer join
+**        (Originally ticket #306.  Strengthened by ticket #3300)
+**
+**   (4)  The subquery is not DISTINCT.
+**
+**  (**)  At one point restrictions (4) and (5) defined a subset of DISTINCT
+**        sub-queries that were excluded from this optimization. Restriction 
+**        (4) has since been expanded to exclude all DISTINCT subqueries.
+**
+**   (6)  The subquery does not use aggregates or the outer query is not
+**        DISTINCT.
+**
+**   (7)  The subquery has a FROM clause.  TODO:  For subqueries without
+**        A FROM clause, consider adding a FROM close with the special
+**        table sqlite_once that consists of a single row containing a
+**        single NULL.
+**
+**   (8)  The subquery does not use LIMIT or the outer query is not a join.
+**
+**   (9)  The subquery does not use LIMIT or the outer query does not use
+**        aggregates.
+**
+**  (**)  Restriction (10) was removed from the code on 2005-02-05 but we
+**        accidently carried the comment forward until 2014-09-15.  Original
+**        text: "The subquery does not use aggregates or the outer query does not
+**        use LIMIT."
+**
+**  (11)  The subquery and the outer query do not both have ORDER BY clauses.
+**
+**  (**)  Not implemented.  Subsumed into restriction (3).  Was previously
+**        a separate restriction deriving from ticket #350.
+**
+**  (13)  The subquery and outer query do not both use LIMIT.
+**
+**  (14)  The subquery does not use OFFSET.
+**
+**  (15)  The outer query is not part of a compound select or the
+**        subquery does not have a LIMIT clause.
+**        (See ticket #2339 and ticket [02a8e81d44]).
+**
+**  (16)  The outer query is not an aggregate or the subquery does
+**        not contain ORDER BY.  (Ticket #2942)  This used to not matter
+**        until we introduced the group_concat() function.  
+**
+**  (17)  The sub-query is not a compound select, or it is a UNION ALL 
+**        compound clause made up entirely of non-aggregate queries, and 
+**        the parent query:
+**
+**          * is not itself part of a compound select,
+**          * is not an aggregate or DISTINCT query, and
+**          * is not a join
+**
+**        The parent and sub-query may contain WHERE clauses. Subject to
+**        rules (11), (13) and (14), they may also contain ORDER BY,
+**        LIMIT and OFFSET clauses.  The subquery cannot use any compound
+**        operator other than UNION ALL because all the other compound
+**        operators have an implied DISTINCT which is disallowed by
+**        restriction (4).
+**
+**        Also, each component of the sub-query must return the same number
+**        of result columns. This is actually a requirement for any compound
+**        SELECT statement, but all the code here does is make sure that no
+**        such (illegal) sub-query is flattened. The caller will detect the
+**        syntax error and return a detailed message.
+**
+**  (18)  If the sub-query is a compound select, then all terms of the
+**        ORDER by clause of the parent must be simple references to 
+**        columns of the sub-query.
+**
+**  (19)  The subquery does not use LIMIT or the outer query does not
+**        have a WHERE clause.
+**
+**  (20)  If the sub-query is a compound select, then it must not use
+**        an ORDER BY clause.  Ticket #3773.  We could relax this constraint
+**        somewhat by saying that the terms of the ORDER BY clause must
+**        appear as unmodified result columns in the outer query.  But we
+**        have other optimizations in mind to deal with that case.
+**
+**  (21)  The subquery does not use LIMIT or the outer query is not
+**        DISTINCT.  (See ticket [752e1646fc]).
+**
+**  (22)  The subquery is not a recursive CTE.
+**
+**  (23)  The parent is not a recursive CTE, or the sub-query is not a
+**        compound query. This restriction is because transforming the
+**        parent to a compound query confuses the code that handles
+**        recursive queries in multiSelect().
+**
+**  (24)  The subquery is not an aggregate that uses the built-in min() or 
+**        or max() functions.  (Without this restriction, a query like:
+**        "SELECT x FROM (SELECT max(y), x FROM t1)" would not necessarily
+**        return the value X for which Y was maximal.)
+**
+**
+** In this routine, the "p" parameter is a pointer to the outer query.
+** The subquery is p->pSrc->a[iFrom].  isAgg is true if the outer query
+** uses aggregates and subqueryIsAgg is true if the subquery uses aggregates.
+**
+** If flattening is not attempted, this routine is a no-op and returns 0.
+** If flattening is attempted this routine returns 1.
+**
+** All of the expression analysis must occur on both the outer query and
+** the subquery before this routine runs.
+*/
+static int flattenSubquery(
+  Parse *pParse,       /* Parsing context */
+  Select *p,           /* The parent or outer SELECT statement */
+  int iFrom,           /* Index in p->pSrc->a[] of the inner subquery */
+  int isAgg,           /* True if outer SELECT uses aggregate functions */
+  int subqueryIsAgg    /* True if the subquery uses aggregate functions */
+){
+  const char *zSavedAuthContext = pParse->zAuthContext;
+  Select *pParent;
+  Select *pSub;       /* The inner query or "subquery" */
+  Select *pSub1;      /* Pointer to the rightmost select in sub-query */
+  SrcList *pSrc;      /* The FROM clause of the outer query */
+  SrcList *pSubSrc;   /* The FROM clause of the subquery */
+  ExprList *pList;    /* The result set of the outer query */
+  int iParent;        /* VDBE cursor number of the pSub result set temp table */
+  int i;              /* Loop counter */
+  Expr *pWhere;                    /* The WHERE clause */
+  struct SrcList_item *pSubitem;   /* The subquery */
+  sqlite3 *db = pParse->db;
+
+  /* Check to see if flattening is permitted.  Return 0 if not.
+  */
+  assert( p!=0 );
+  assert( p->pPrior==0 );  /* Unable to flatten compound queries */
+  if( OptimizationDisabled(db, SQLITE_QueryFlattener) ) return 0;
+  pSrc = p->pSrc;
+  assert( pSrc && iFrom>=0 && iFrom<pSrc->nSrc );
+  pSubitem = &pSrc->a[iFrom];
+  iParent = pSubitem->iCursor;
+  pSub = pSubitem->pSelect;
+  assert( pSub!=0 );
+  if( isAgg && subqueryIsAgg ) return 0;                 /* Restriction (1)  */
+  if( subqueryIsAgg && pSrc->nSrc>1 ) return 0;          /* Restriction (2)  */
+  pSubSrc = pSub->pSrc;
+  assert( pSubSrc );
+  /* Prior to version 3.1.2, when LIMIT and OFFSET had to be simple constants,
+  ** not arbitrary expressions, we allowed some combining of LIMIT and OFFSET
+  ** because they could be computed at compile-time.  But when LIMIT and OFFSET
+  ** became arbitrary expressions, we were forced to add restrictions (13)
+  ** and (14). */
+  if( pSub->pLimit && p->pLimit ) return 0;              /* Restriction (13) */
+  if( pSub->pOffset ) return 0;                          /* Restriction (14) */
+  if( (p->selFlags & SF_Compound)!=0 && pSub->pLimit ){
+    return 0;                                            /* Restriction (15) */
+  }
+  if( pSubSrc->nSrc==0 ) return 0;                       /* Restriction (7)  */
+  if( pSub->selFlags & SF_Distinct ) return 0;           /* Restriction (5)  */
+  if( pSub->pLimit && (pSrc->nSrc>1 || isAgg) ){
+     return 0;         /* Restrictions (8)(9) */
+  }
+  if( (p->selFlags & SF_Distinct)!=0 && subqueryIsAgg ){
+     return 0;         /* Restriction (6)  */
+  }
+  if( p->pOrderBy && pSub->pOrderBy ){
+     return 0;                                           /* Restriction (11) */
+  }
+  if( isAgg && pSub->pOrderBy ) return 0;                /* Restriction (16) */
+  if( pSub->pLimit && p->pWhere ) return 0;              /* Restriction (19) */
+  if( pSub->pLimit && (p->selFlags & SF_Distinct)!=0 ){
+     return 0;         /* Restriction (21) */
+  }
+  testcase( pSub->selFlags & SF_Recursive );
+  testcase( pSub->selFlags & SF_MinMaxAgg );
+  if( pSub->selFlags & (SF_Recursive|SF_MinMaxAgg) ){
+    return 0; /* Restrictions (22) and (24) */
+  }
+  if( (p->selFlags & SF_Recursive) && pSub->pPrior ){
+    return 0; /* Restriction (23) */
+  }
+
+  /* OBSOLETE COMMENT 1:
+  ** Restriction 3:  If the subquery is a join, make sure the subquery is 
+  ** not used as the right operand of an outer join.  Examples of why this
+  ** is not allowed:
+  **
+  **         t1 LEFT OUTER JOIN (t2 JOIN t3)
+  **
+  ** If we flatten the above, we would get
+  **
+  **         (t1 LEFT OUTER JOIN t2) JOIN t3
+  **
+  ** which is not at all the same thing.
+  **
+  ** OBSOLETE COMMENT 2:
+  ** Restriction 12:  If the subquery is the right operand of a left outer
+  ** join, make sure the subquery has no WHERE clause.
+  ** An examples of why this is not allowed:
+  **
+  **         t1 LEFT OUTER JOIN (SELECT * FROM t2 WHERE t2.x>0)
+  **
+  ** If we flatten the above, we would get
+  **
+  **         (t1 LEFT OUTER JOIN t2) WHERE t2.x>0
+  **
+  ** But the t2.x>0 test will always fail on a NULL row of t2, which
+  ** effectively converts the OUTER JOIN into an INNER JOIN.
+  **
+  ** THIS OVERRIDES OBSOLETE COMMENTS 1 AND 2 ABOVE:
+  ** Ticket #3300 shows that flattening the right term of a LEFT JOIN
+  ** is fraught with danger.  Best to avoid the whole thing.  If the
+  ** subquery is the right term of a LEFT JOIN, then do not flatten.
+  */
+  if( (pSubitem->jointype & JT_OUTER)!=0 ){
+    return 0;
+  }
+
+  /* Restriction 17: If the sub-query is a compound SELECT, then it must
+  ** use only the UNION ALL operator. And none of the simple select queries
+  ** that make up the compound SELECT are allowed to be aggregate or distinct
+  ** queries.
+  */
+  if( pSub->pPrior ){
+    if( pSub->pOrderBy ){
+      return 0;  /* Restriction 20 */
+    }
+    if( isAgg || (p->selFlags & SF_Distinct)!=0 || pSrc->nSrc!=1 ){
+      return 0;
+    }
+    for(pSub1=pSub; pSub1; pSub1=pSub1->pPrior){
+      testcase( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct );
+      testcase( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))==SF_Aggregate );
+      assert( pSub->pSrc!=0 );
+      if( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))!=0
+       || (pSub1->pPrior && pSub1->op!=TK_ALL) 
+       || pSub1->pSrc->nSrc<1
+       || pSub->pEList->nExpr!=pSub1->pEList->nExpr
+      ){
+        return 0;
+      }
+      testcase( pSub1->pSrc->nSrc>1 );
+    }
+
+    /* Restriction 18. */
+    if( p->pOrderBy ){
+      int ii;
+      for(ii=0; ii<p->pOrderBy->nExpr; ii++){
+        if( p->pOrderBy->a[ii].u.x.iOrderByCol==0 ) return 0;
+      }
+    }
+  }
+
+  /***** If we reach this point, flattening is permitted. *****/
+  SELECTTRACE(1,pParse,p,("flatten %s.%p from term %d\n",
+                   pSub->zSelName, pSub, iFrom));
+
+  /* Authorize the subquery */
+  pParse->zAuthContext = pSubitem->zName;
+  TESTONLY(i =) sqlite3AuthCheck(pParse, SQLITE_SELECT, 0, 0, 0);
+  testcase( i==SQLITE_DENY );
+  pParse->zAuthContext = zSavedAuthContext;
+
+  /* If the sub-query is a compound SELECT statement, then (by restrictions
+  ** 17 and 18 above) it must be a UNION ALL and the parent query must 
+  ** be of the form:
+  **
+  **     SELECT <expr-list> FROM (<sub-query>) <where-clause> 
+  **
+  ** followed by any ORDER BY, LIMIT and/or OFFSET clauses. This block
+  ** creates N-1 copies of the parent query without any ORDER BY, LIMIT or 
+  ** OFFSET clauses and joins them to the left-hand-side of the original
+  ** using UNION ALL operators. In this case N is the number of simple
+  ** select statements in the compound sub-query.
+  **
+  ** Example:
+  **
+  **     SELECT a+1 FROM (
+  **        SELECT x FROM tab
+  **        UNION ALL
+  **        SELECT y FROM tab
+  **        UNION ALL
+  **        SELECT abs(z*2) FROM tab2
+  **     ) WHERE a!=5 ORDER BY 1
+  **
+  ** Transformed into:
+  **
+  **     SELECT x+1 FROM tab WHERE x+1!=5
+  **     UNION ALL
+  **     SELECT y+1 FROM tab WHERE y+1!=5
+  **     UNION ALL
+  **     SELECT abs(z*2)+1 FROM tab2 WHERE abs(z*2)+1!=5
+  **     ORDER BY 1
+  **
+  ** We call this the "compound-subquery flattening".
+  */
+  for(pSub=pSub->pPrior; pSub; pSub=pSub->pPrior){
+    Select *pNew;
+    ExprList *pOrderBy = p->pOrderBy;
+    Expr *pLimit = p->pLimit;
+    Expr *pOffset = p->pOffset;
+    Select *pPrior = p->pPrior;
+    p->pOrderBy = 0;
+    p->pSrc = 0;
+    p->pPrior = 0;
+    p->pLimit = 0;
+    p->pOffset = 0;
+    pNew = sqlite3SelectDup(db, p, 0);
+    sqlite3SelectSetName(pNew, pSub->zSelName);
+    p->pOffset = pOffset;
+    p->pLimit = pLimit;
+    p->pOrderBy = pOrderBy;
+    p->pSrc = pSrc;
+    p->op = TK_ALL;
+    if( pNew==0 ){
+      p->pPrior = pPrior;
+    }else{
+      pNew->pPrior = pPrior;
+      if( pPrior ) pPrior->pNext = pNew;
+      pNew->pNext = p;
+      p->pPrior = pNew;
+      SELECTTRACE(2,pParse,p,
+         ("compound-subquery flattener creates %s.%p as peer\n",
+         pNew->zSelName, pNew));
+    }
+    if( db->mallocFailed ) return 1;
+  }
+
+  /* Begin flattening the iFrom-th entry of the FROM clause 
+  ** in the outer query.
+  */
+  pSub = pSub1 = pSubitem->pSelect;
+
+  /* Delete the transient table structure associated with the
+  ** subquery
+  */
+  sqlite3DbFree(db, pSubitem->zDatabase);
+  sqlite3DbFree(db, pSubitem->zName);
+  sqlite3DbFree(db, pSubitem->zAlias);
+  pSubitem->zDatabase = 0;
+  pSubitem->zName = 0;
+  pSubitem->zAlias = 0;
+  pSubitem->pSelect = 0;
+
+  /* Defer deleting the Table object associated with the
+  ** subquery until code generation is
+  ** complete, since there may still exist Expr.pTab entries that
+  ** refer to the subquery even after flattening.  Ticket #3346.
+  **
+  ** pSubitem->pTab is always non-NULL by test restrictions and tests above.
+  */
+  if( ALWAYS(pSubitem->pTab!=0) ){
+    Table *pTabToDel = pSubitem->pTab;
+    if( pTabToDel->nRef==1 ){
+      Parse *pToplevel = sqlite3ParseToplevel(pParse);
+      pTabToDel->pNextZombie = pToplevel->pZombieTab;
+      pToplevel->pZombieTab = pTabToDel;
+    }else{
+      pTabToDel->nRef--;
+    }
+    pSubitem->pTab = 0;
+  }
+
+  /* The following loop runs once for each term in a compound-subquery
+  ** flattening (as described above).  If we are doing a different kind
+  ** of flattening - a flattening other than a compound-subquery flattening -
+  ** then this loop only runs once.
+  **
+  ** This loop moves all of the FROM elements of the subquery into the
+  ** the FROM clause of the outer query.  Before doing this, remember
+  ** the cursor number for the original outer query FROM element in
+  ** iParent.  The iParent cursor will never be used.  Subsequent code
+  ** will scan expressions looking for iParent references and replace
+  ** those references with expressions that resolve to the subquery FROM
+  ** elements we are now copying in.
+  */
+  for(pParent=p; pParent; pParent=pParent->pPrior, pSub=pSub->pPrior){
+    int nSubSrc;
+    u8 jointype = 0;
+    pSubSrc = pSub->pSrc;     /* FROM clause of subquery */
+    nSubSrc = pSubSrc->nSrc;  /* Number of terms in subquery FROM clause */
+    pSrc = pParent->pSrc;     /* FROM clause of the outer query */
+
+    if( pSrc ){
+      assert( pParent==p );  /* First time through the loop */
+      jointype = pSubitem->jointype;
+    }else{
+      assert( pParent!=p );  /* 2nd and subsequent times through the loop */
+      pSrc = pParent->pSrc = sqlite3SrcListAppend(db, 0, 0, 0);
+      if( pSrc==0 ){
+        assert( db->mallocFailed );
+        break;
+      }
+    }
+
+    /* The subquery uses a single slot of the FROM clause of the outer
+    ** query.  If the subquery has more than one element in its FROM clause,
+    ** then expand the outer query to make space for it to hold all elements
+    ** of the subquery.
+    **
+    ** Example:
+    **
+    **    SELECT * FROM tabA, (SELECT * FROM sub1, sub2), tabB;
+    **
+    ** The outer query has 3 slots in its FROM clause.  One slot of the
+    ** outer query (the middle slot) is used by the subquery.  The next
+    ** block of code will expand the out query to 4 slots.  The middle
+    ** slot is expanded to two slots in order to make space for the
+    ** two elements in the FROM clause of the subquery.
+    */
+    if( nSubSrc>1 ){
+      pParent->pSrc = pSrc = sqlite3SrcListEnlarge(db, pSrc, nSubSrc-1,iFrom+1);
+      if( db->mallocFailed ){
+        break;
+      }
+    }
+
+    /* Transfer the FROM clause terms from the subquery into the
+    ** outer query.
+    */
+    for(i=0; i<nSubSrc; i++){
+      sqlite3IdListDelete(db, pSrc->a[i+iFrom].pUsing);
+      pSrc->a[i+iFrom] = pSubSrc->a[i];
+      memset(&pSubSrc->a[i], 0, sizeof(pSubSrc->a[i]));
+    }
+    pSrc->a[iFrom].jointype = jointype;
+  
+    /* Now begin substituting subquery result set expressions for 
+    ** references to the iParent in the outer query.
+    ** 
+    ** Example:
+    **
+    **   SELECT a+5, b*10 FROM (SELECT x*3 AS a, y+10 AS b FROM t1) WHERE a>b;
+    **   \                     \_____________ subquery __________/          /
+    **    \_____________________ outer query ______________________________/
+    **
+    ** We look at every expression in the outer query and every place we see
+    ** "a" we substitute "x*3" and every place we see "b" we substitute "y+10".
+    */
+    pList = pParent->pEList;
+    for(i=0; i<pList->nExpr; i++){
+      if( pList->a[i].zName==0 ){
+        char *zName = sqlite3DbStrDup(db, pList->a[i].zSpan);
+        sqlite3Dequote(zName);
+        pList->a[i].zName = zName;
+      }
+    }
+    substExprList(db, pParent->pEList, iParent, pSub->pEList);
+    if( isAgg ){
+      substExprList(db, pParent->pGroupBy, iParent, pSub->pEList);
+      pParent->pHaving = substExpr(db, pParent->pHaving, iParent, pSub->pEList);
+    }
+    if( pSub->pOrderBy ){
+      /* At this point, any non-zero iOrderByCol values indicate that the
+      ** ORDER BY column expression is identical to the iOrderByCol'th
+      ** expression returned by SELECT statement pSub. Since these values
+      ** do not necessarily correspond to columns in SELECT statement pParent,
+      ** zero them before transfering the ORDER BY clause.
+      **
+      ** Not doing this may cause an error if a subsequent call to this
+      ** function attempts to flatten a compound sub-query into pParent
+      ** (the only way this can happen is if the compound sub-query is
+      ** currently part of pSub->pSrc). See ticket [d11a6e908f].  */
+      ExprList *pOrderBy = pSub->pOrderBy;
+      for(i=0; i<pOrderBy->nExpr; i++){
+        pOrderBy->a[i].u.x.iOrderByCol = 0;
+      }
+      assert( pParent->pOrderBy==0 );
+      assert( pSub->pPrior==0 );
+      pParent->pOrderBy = pOrderBy;
+      pSub->pOrderBy = 0;
+    }else if( pParent->pOrderBy ){
+      substExprList(db, pParent->pOrderBy, iParent, pSub->pEList);
+    }
+    if( pSub->pWhere ){
+      pWhere = sqlite3ExprDup(db, pSub->pWhere, 0);
+    }else{
+      pWhere = 0;
+    }
+    if( subqueryIsAgg ){
+      assert( pParent->pHaving==0 );
+      pParent->pHaving = pParent->pWhere;
+      pParent->pWhere = pWhere;
+      pParent->pHaving = substExpr(db, pParent->pHaving, iParent, pSub->pEList);
+      pParent->pHaving = sqlite3ExprAnd(db, pParent->pHaving, 
+                                  sqlite3ExprDup(db, pSub->pHaving, 0));
+      assert( pParent->pGroupBy==0 );
+      pParent->pGroupBy = sqlite3ExprListDup(db, pSub->pGroupBy, 0);
+    }else{
+      pParent->pWhere = substExpr(db, pParent->pWhere, iParent, pSub->pEList);
+      pParent->pWhere = sqlite3ExprAnd(db, pParent->pWhere, pWhere);
+    }
+  
+    /* The flattened query is distinct if either the inner or the
+    ** outer query is distinct. 
+    */
+    pParent->selFlags |= pSub->selFlags & SF_Distinct;
+  
+    /*
+    ** SELECT ... FROM (SELECT ... LIMIT a OFFSET b) LIMIT x OFFSET y;
+    **
+    ** One is tempted to try to add a and b to combine the limits.  But this
+    ** does not work if either limit is negative.
+    */
+    if( pSub->pLimit ){
+      pParent->pLimit = pSub->pLimit;
+      pSub->pLimit = 0;
+    }
+  }
+
+  /* Finially, delete what is left of the subquery and return
+  ** success.
+  */
+  sqlite3SelectDelete(db, pSub1);
+
+#if SELECTTRACE_ENABLED
+  if( sqlite3SelectTrace & 0x100 ){
+    sqlite3DebugPrintf("After flattening:\n");
+    sqlite3TreeViewSelect(0, p, 0);
+  }
+#endif
+
+  return 1;
+}
+#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */
+
+/*
+** Based on the contents of the AggInfo structure indicated by the first
+** argument, this function checks if the following are true:
+**
+**    * the query contains just a single aggregate function,
+**    * the aggregate function is either min() or max(), and
+**    * the argument to the aggregate function is a column value.
+**
+** If all of the above are true, then WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX
+** is returned as appropriate. Also, *ppMinMax is set to point to the 
+** list of arguments passed to the aggregate before returning.
+**
+** Or, if the conditions above are not met, *ppMinMax is set to 0 and
+** WHERE_ORDERBY_NORMAL is returned.
+*/
+static u8 minMaxQuery(AggInfo *pAggInfo, ExprList **ppMinMax){
+  int eRet = WHERE_ORDERBY_NORMAL;          /* Return value */
+
+  *ppMinMax = 0;
+  if( pAggInfo->nFunc==1 ){
+    Expr *pExpr = pAggInfo->aFunc[0].pExpr; /* Aggregate function */
+    ExprList *pEList = pExpr->x.pList;      /* Arguments to agg function */
+
+    assert( pExpr->op==TK_AGG_FUNCTION );
+    if( pEList && pEList->nExpr==1 && pEList->a[0].pExpr->op==TK_AGG_COLUMN ){
+      const char *zFunc = pExpr->u.zToken;
+      if( sqlite3StrICmp(zFunc, "min")==0 ){
+        eRet = WHERE_ORDERBY_MIN;
+        *ppMinMax = pEList;
+      }else if( sqlite3StrICmp(zFunc, "max")==0 ){
+        eRet = WHERE_ORDERBY_MAX;
+        *ppMinMax = pEList;
+      }
+    }
+  }
+
+  assert( *ppMinMax==0 || (*ppMinMax)->nExpr==1 );
+  return eRet;
+}
+
+/*
+** The select statement passed as the first argument is an aggregate query.
+** The second argument is the associated aggregate-info object. This 
+** function tests if the SELECT is of the form:
+**
+**   SELECT count(*) FROM <tbl>
+**
+** where table is a database table, not a sub-select or view. If the query
+** does match this pattern, then a pointer to the Table object representing
+** <tbl> is returned. Otherwise, 0 is returned.
+*/
+static Table *isSimpleCount(Select *p, AggInfo *pAggInfo){
+  Table *pTab;
+  Expr *pExpr;
+
+  assert( !p->pGroupBy );
+
+  if( p->pWhere || p->pEList->nExpr!=1 
+   || p->pSrc->nSrc!=1 || p->pSrc->a[0].pSelect
+  ){
+    return 0;
+  }
+  pTab = p->pSrc->a[0].pTab;
+  pExpr = p->pEList->a[0].pExpr;
+  assert( pTab && !pTab->pSelect && pExpr );
+
+  if( IsVirtual(pTab) ) return 0;
+  if( pExpr->op!=TK_AGG_FUNCTION ) return 0;
+  if( NEVER(pAggInfo->nFunc==0) ) return 0;
+  if( (pAggInfo->aFunc[0].pFunc->funcFlags&SQLITE_FUNC_COUNT)==0 ) return 0;
+  if( pExpr->flags&EP_Distinct ) return 0;
+
+  return pTab;
+}
+
+/*
+** If the source-list item passed as an argument was augmented with an
+** INDEXED BY clause, then try to locate the specified index. If there
+** was such a clause and the named index cannot be found, return 
+** SQLITE_ERROR and leave an error in pParse. Otherwise, populate 
+** pFrom->pIndex and return SQLITE_OK.
+*/
+SQLITE_PRIVATE int sqlite3IndexedByLookup(Parse *pParse, struct SrcList_item *pFrom){
+  if( pFrom->pTab && pFrom->zIndex ){
+    Table *pTab = pFrom->pTab;
+    char *zIndex = pFrom->zIndex;
+    Index *pIdx;
+    for(pIdx=pTab->pIndex; 
+        pIdx && sqlite3StrICmp(pIdx->zName, zIndex); 
+        pIdx=pIdx->pNext
+    );
+    if( !pIdx ){
+      sqlite3ErrorMsg(pParse, "no such index: %s", zIndex, 0);
+      pParse->checkSchema = 1;
+      return SQLITE_ERROR;
+    }
+    pFrom->pIndex = pIdx;
+  }
+  return SQLITE_OK;
+}
+/*
+** Detect compound SELECT statements that use an ORDER BY clause with 
+** an alternative collating sequence.
+**
+**    SELECT ... FROM t1 EXCEPT SELECT ... FROM t2 ORDER BY .. COLLATE ...
+**
+** These are rewritten as a subquery:
+**
+**    SELECT * FROM (SELECT ... FROM t1 EXCEPT SELECT ... FROM t2)
+**     ORDER BY ... COLLATE ...
+**
+** This transformation is necessary because the multiSelectOrderBy() routine
+** above that generates the code for a compound SELECT with an ORDER BY clause
+** uses a merge algorithm that requires the same collating sequence on the
+** result columns as on the ORDER BY clause.  See ticket
+** http://www.sqlite.org/src/info/6709574d2a
+**
+** This transformation is only needed for EXCEPT, INTERSECT, and UNION.
+** The UNION ALL operator works fine with multiSelectOrderBy() even when
+** there are COLLATE terms in the ORDER BY.
+*/
+static int convertCompoundSelectToSubquery(Walker *pWalker, Select *p){
+  int i;
+  Select *pNew;
+  Select *pX;
+  sqlite3 *db;
+  struct ExprList_item *a;
+  SrcList *pNewSrc;
+  Parse *pParse;
+  Token dummy;
+
+  if( p->pPrior==0 ) return WRC_Continue;
+  if( p->pOrderBy==0 ) return WRC_Continue;
+  for(pX=p; pX && (pX->op==TK_ALL || pX->op==TK_SELECT); pX=pX->pPrior){}
+  if( pX==0 ) return WRC_Continue;
+  a = p->pOrderBy->a;
+  for(i=p->pOrderBy->nExpr-1; i>=0; i--){
+    if( a[i].pExpr->flags & EP_Collate ) break;
+  }
+  if( i<0 ) return WRC_Continue;
+
+  /* If we reach this point, that means the transformation is required. */
+
+  pParse = pWalker->pParse;
+  db = pParse->db;
+  pNew = sqlite3DbMallocZero(db, sizeof(*pNew) );
+  if( pNew==0 ) return WRC_Abort;
+  memset(&dummy, 0, sizeof(dummy));
+  pNewSrc = sqlite3SrcListAppendFromTerm(pParse,0,0,0,&dummy,pNew,0,0);
+  if( pNewSrc==0 ) return WRC_Abort;
+  *pNew = *p;
+  p->pSrc = pNewSrc;
+  p->pEList = sqlite3ExprListAppend(pParse, 0, sqlite3Expr(db, TK_ALL, 0));
+  p->op = TK_SELECT;
+  p->pWhere = 0;
+  pNew->pGroupBy = 0;
+  pNew->pHaving = 0;
+  pNew->pOrderBy = 0;
+  p->pPrior = 0;
+  p->pNext = 0;
+  p->selFlags &= ~SF_Compound;
+  assert( pNew->pPrior!=0 );
+  pNew->pPrior->pNext = pNew;
+  pNew->pLimit = 0;
+  pNew->pOffset = 0;
+  return WRC_Continue;
+}
+
+#ifndef SQLITE_OMIT_CTE
+/*
+** Argument pWith (which may be NULL) points to a linked list of nested 
+** WITH contexts, from inner to outermost. If the table identified by 
+** FROM clause element pItem is really a common-table-expression (CTE) 
+** then return a pointer to the CTE definition for that table. Otherwise
+** return NULL.
+**
+** If a non-NULL value is returned, set *ppContext to point to the With
+** object that the returned CTE belongs to.
+*/
+static struct Cte *searchWith(
+  With *pWith,                    /* Current outermost WITH clause */
+  struct SrcList_item *pItem,     /* FROM clause element to resolve */
+  With **ppContext                /* OUT: WITH clause return value belongs to */
+){
+  const char *zName;
+  if( pItem->zDatabase==0 && (zName = pItem->zName)!=0 ){
+    With *p;
+    for(p=pWith; p; p=p->pOuter){
+      int i;
+      for(i=0; i<p->nCte; i++){
+        if( sqlite3StrICmp(zName, p->a[i].zName)==0 ){
+          *ppContext = p;
+          return &p->a[i];
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/* The code generator maintains a stack of active WITH clauses
+** with the inner-most WITH clause being at the top of the stack.
+**
+** This routine pushes the WITH clause passed as the second argument
+** onto the top of the stack. If argument bFree is true, then this
+** WITH clause will never be popped from the stack. In this case it
+** should be freed along with the Parse object. In other cases, when
+** bFree==0, the With object will be freed along with the SELECT 
+** statement with which it is associated.
+*/
+SQLITE_PRIVATE void sqlite3WithPush(Parse *pParse, With *pWith, u8 bFree){
+  assert( bFree==0 || pParse->pWith==0 );
+  if( pWith ){
+    pWith->pOuter = pParse->pWith;
+    pParse->pWith = pWith;
+    pParse->bFreeWith = bFree;
+  }
+}
+
+/*
+** This function checks if argument pFrom refers to a CTE declared by 
+** a WITH clause on the stack currently maintained by the parser. And,
+** if currently processing a CTE expression, if it is a recursive
+** reference to the current CTE.
+**
+** If pFrom falls into either of the two categories above, pFrom->pTab
+** and other fields are populated accordingly. The caller should check
+** (pFrom->pTab!=0) to determine whether or not a successful match
+** was found.
+**
+** Whether or not a match is found, SQLITE_OK is returned if no error
+** occurs. If an error does occur, an error message is stored in the
+** parser and some error code other than SQLITE_OK returned.
+*/
+static int withExpand(
+  Walker *pWalker, 
+  struct SrcList_item *pFrom
+){
+  Parse *pParse = pWalker->pParse;
+  sqlite3 *db = pParse->db;
+  struct Cte *pCte;               /* Matched CTE (or NULL if no match) */
+  With *pWith;                    /* WITH clause that pCte belongs to */
+
+  assert( pFrom->pTab==0 );
+
+  pCte = searchWith(pParse->pWith, pFrom, &pWith);
+  if( pCte ){
+    Table *pTab;
+    ExprList *pEList;
+    Select *pSel;
+    Select *pLeft;                /* Left-most SELECT statement */
+    int bMayRecursive;            /* True if compound joined by UNION [ALL] */
+    With *pSavedWith;             /* Initial value of pParse->pWith */
+
+    /* If pCte->zErr is non-NULL at this point, then this is an illegal
+    ** recursive reference to CTE pCte. Leave an error in pParse and return
+    ** early. If pCte->zErr is NULL, then this is not a recursive reference.
+    ** In this case, proceed.  */
+    if( pCte->zErr ){
+      sqlite3ErrorMsg(pParse, pCte->zErr, pCte->zName);
+      return SQLITE_ERROR;
+    }
+
+    assert( pFrom->pTab==0 );
+    pFrom->pTab = pTab = sqlite3DbMallocZero(db, sizeof(Table));
+    if( pTab==0 ) return WRC_Abort;
+    pTab->nRef = 1;
+    pTab->zName = sqlite3DbStrDup(db, pCte->zName);
+    pTab->iPKey = -1;
+    pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
+    pTab->tabFlags |= TF_Ephemeral;
+    pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0);
+    if( db->mallocFailed ) return SQLITE_NOMEM;
+    assert( pFrom->pSelect );
+
+    /* Check if this is a recursive CTE. */
+    pSel = pFrom->pSelect;
+    bMayRecursive = ( pSel->op==TK_ALL || pSel->op==TK_UNION );
+    if( bMayRecursive ){
+      int i;
+      SrcList *pSrc = pFrom->pSelect->pSrc;
+      for(i=0; i<pSrc->nSrc; i++){
+        struct SrcList_item *pItem = &pSrc->a[i];
+        if( pItem->zDatabase==0 
+         && pItem->zName!=0 
+         && 0==sqlite3StrICmp(pItem->zName, pCte->zName)
+          ){
+          pItem->pTab = pTab;
+          pItem->isRecursive = 1;
+          pTab->nRef++;
+          pSel->selFlags |= SF_Recursive;
+        }
+      }
+    }
+
+    /* Only one recursive reference is permitted. */ 
+    if( pTab->nRef>2 ){
+      sqlite3ErrorMsg(
+          pParse, "multiple references to recursive table: %s", pCte->zName
+      );
+      return SQLITE_ERROR;
+    }
+    assert( pTab->nRef==1 || ((pSel->selFlags&SF_Recursive) && pTab->nRef==2 ));
+
+    pCte->zErr = "circular reference: %s";
+    pSavedWith = pParse->pWith;
+    pParse->pWith = pWith;
+    sqlite3WalkSelect(pWalker, bMayRecursive ? pSel->pPrior : pSel);
+
+    for(pLeft=pSel; pLeft->pPrior; pLeft=pLeft->pPrior);
+    pEList = pLeft->pEList;
+    if( pCte->pCols ){
+      if( pEList->nExpr!=pCte->pCols->nExpr ){
+        sqlite3ErrorMsg(pParse, "table %s has %d values for %d columns",
+            pCte->zName, pEList->nExpr, pCte->pCols->nExpr
+        );
+        pParse->pWith = pSavedWith;
+        return SQLITE_ERROR;
+      }
+      pEList = pCte->pCols;
+    }
+
+    selectColumnsFromExprList(pParse, pEList, &pTab->nCol, &pTab->aCol);
+    if( bMayRecursive ){
+      if( pSel->selFlags & SF_Recursive ){
+        pCte->zErr = "multiple recursive references: %s";
+      }else{
+        pCte->zErr = "recursive reference in a subquery: %s";
+      }
+      sqlite3WalkSelect(pWalker, pSel);
+    }
+    pCte->zErr = 0;
+    pParse->pWith = pSavedWith;
+  }
+
+  return SQLITE_OK;
+}
+#endif
+
+#ifndef SQLITE_OMIT_CTE
+/*
+** If the SELECT passed as the second argument has an associated WITH 
+** clause, pop it from the stack stored as part of the Parse object.
+**
+** This function is used as the xSelectCallback2() callback by
+** sqlite3SelectExpand() when walking a SELECT tree to resolve table
+** names and other FROM clause elements. 
+*/
+static void selectPopWith(Walker *pWalker, Select *p){
+  Parse *pParse = pWalker->pParse;
+  With *pWith = findRightmost(p)->pWith;
+  if( pWith!=0 ){
+    assert( pParse->pWith==pWith );
+    pParse->pWith = pWith->pOuter;
+  }
+}
+#else
+#define selectPopWith 0
+#endif
+
+/*
+** This routine is a Walker callback for "expanding" a SELECT statement.
+** "Expanding" means to do the following:
+**
+**    (1)  Make sure VDBE cursor numbers have been assigned to every
+**         element of the FROM clause.
+**
+**    (2)  Fill in the pTabList->a[].pTab fields in the SrcList that 
+**         defines FROM clause.  When views appear in the FROM clause,
+**         fill pTabList->a[].pSelect with a copy of the SELECT statement
+**         that implements the view.  A copy is made of the view's SELECT
+**         statement so that we can freely modify or delete that statement
+**         without worrying about messing up the persistent representation
+**         of the view.
+**
+**    (3)  Add terms to the WHERE clause to accommodate the NATURAL keyword
+**         on joins and the ON and USING clause of joins.
+**
+**    (4)  Scan the list of columns in the result set (pEList) looking
+**         for instances of the "*" operator or the TABLE.* operator.
+**         If found, expand each "*" to be every column in every table
+**         and TABLE.* to be every column in TABLE.
+**
+*/
+static int selectExpander(Walker *pWalker, Select *p){
+  Parse *pParse = pWalker->pParse;
+  int i, j, k;
+  SrcList *pTabList;
+  ExprList *pEList;
+  struct SrcList_item *pFrom;
+  sqlite3 *db = pParse->db;
+  Expr *pE, *pRight, *pExpr;
+  u16 selFlags = p->selFlags;
+
+  p->selFlags |= SF_Expanded;
+  if( db->mallocFailed  ){
+    return WRC_Abort;
+  }
+  if( NEVER(p->pSrc==0) || (selFlags & SF_Expanded)!=0 ){
+    return WRC_Prune;
+  }
+  pTabList = p->pSrc;
+  pEList = p->pEList;
+  if( pWalker->xSelectCallback2==selectPopWith ){
+    sqlite3WithPush(pParse, findRightmost(p)->pWith, 0);
+  }
+
+  /* Make sure cursor numbers have been assigned to all entries in
+  ** the FROM clause of the SELECT statement.
+  */
+  sqlite3SrcListAssignCursors(pParse, pTabList);
+
+  /* Look up every table named in the FROM clause of the select.  If
+  ** an entry of the FROM clause is a subquery instead of a table or view,
+  ** then create a transient table structure to describe the subquery.
+  */
+  for(i=0, pFrom=pTabList->a; i<pTabList->nSrc; i++, pFrom++){
+    Table *pTab;
+    assert( pFrom->isRecursive==0 || pFrom->pTab );
+    if( pFrom->isRecursive ) continue;
+    if( pFrom->pTab!=0 ){
+      /* This statement has already been prepared.  There is no need
+      ** to go further. */
+      assert( i==0 );
+#ifndef SQLITE_OMIT_CTE
+      selectPopWith(pWalker, p);
+#endif
+      return WRC_Prune;
+    }
+#ifndef SQLITE_OMIT_CTE
+    if( withExpand(pWalker, pFrom) ) return WRC_Abort;
+    if( pFrom->pTab ) {} else
+#endif
+    if( pFrom->zName==0 ){
+#ifndef SQLITE_OMIT_SUBQUERY
+      Select *pSel = pFrom->pSelect;
+      /* A sub-query in the FROM clause of a SELECT */
+      assert( pSel!=0 );
+      assert( pFrom->pTab==0 );
+      sqlite3WalkSelect(pWalker, pSel);
+      pFrom->pTab = pTab = sqlite3DbMallocZero(db, sizeof(Table));
+      if( pTab==0 ) return WRC_Abort;
+      pTab->nRef = 1;
+      pTab->zName = sqlite3MPrintf(db, "sqlite_sq_%p", (void*)pTab);
+      while( pSel->pPrior ){ pSel = pSel->pPrior; }
+      selectColumnsFromExprList(pParse, pSel->pEList, &pTab->nCol, &pTab->aCol);
+      pTab->iPKey = -1;
+      pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) );
+      pTab->tabFlags |= TF_Ephemeral;
+#endif
+    }else{
+      /* An ordinary table or view name in the FROM clause */
+      assert( pFrom->pTab==0 );
+      pFrom->pTab = pTab = sqlite3LocateTableItem(pParse, 0, pFrom);
+      if( pTab==0 ) return WRC_Abort;
+      if( pTab->nRef==0xffff ){
+        sqlite3ErrorMsg(pParse, "too many references to \"%s\": max 65535",
+           pTab->zName);
+        pFrom->pTab = 0;
+        return WRC_Abort;
+      }
+      pTab->nRef++;
+#if !defined(SQLITE_OMIT_VIEW) || !defined (SQLITE_OMIT_VIRTUALTABLE)
+      if( pTab->pSelect || IsVirtual(pTab) ){
+        /* We reach here if the named table is a really a view */
+        if( sqlite3ViewGetColumnNames(pParse, pTab) ) return WRC_Abort;
+        assert( pFrom->pSelect==0 );
+        pFrom->pSelect = sqlite3SelectDup(db, pTab->pSelect, 0);
+        sqlite3SelectSetName(pFrom->pSelect, pTab->zName);
+        sqlite3WalkSelect(pWalker, pFrom->pSelect);
+      }
+#endif
+    }
+
+    /* Locate the index named by the INDEXED BY clause, if any. */
+    if( sqlite3IndexedByLookup(pParse, pFrom) ){
+      return WRC_Abort;
+    }
+  }
+
+  /* Process NATURAL keywords, and ON and USING clauses of joins.
+  */
+  if( db->mallocFailed || sqliteProcessJoin(pParse, p) ){
+    return WRC_Abort;
+  }
+
+  /* For every "*" that occurs in the column list, insert the names of
+  ** all columns in all tables.  And for every TABLE.* insert the names
+  ** of all columns in TABLE.  The parser inserted a special expression
+  ** with the TK_ALL operator for each "*" that it found in the column list.
+  ** The following code just has to locate the TK_ALL expressions and expand
+  ** each one to the list of all columns in all tables.
+  **
+  ** The first loop just checks to see if there are any "*" operators
+  ** that need expanding.
+  */
+  for(k=0; k<pEList->nExpr; k++){
+    pE = pEList->a[k].pExpr;
+    if( pE->op==TK_ALL ) break;
+    assert( pE->op!=TK_DOT || pE->pRight!=0 );
+    assert( pE->op!=TK_DOT || (pE->pLeft!=0 && pE->pLeft->op==TK_ID) );
+    if( pE->op==TK_DOT && pE->pRight->op==TK_ALL ) break;
+  }
+  if( k<pEList->nExpr ){
+    /*
+    ** If we get here it means the result set contains one or more "*"
+    ** operators that need to be expanded.  Loop through each expression
+    ** in the result set and expand them one by one.
+    */
+    struct ExprList_item *a = pEList->a;
+    ExprList *pNew = 0;
+    int flags = pParse->db->flags;
+    int longNames = (flags & SQLITE_FullColNames)!=0
+                      && (flags & SQLITE_ShortColNames)==0;
+
+    /* When processing FROM-clause subqueries, it is always the case
+    ** that full_column_names=OFF and short_column_names=ON.  The
+    ** sqlite3ResultSetOfSelect() routine makes it so. */
+    assert( (p->selFlags & SF_NestedFrom)==0
+          || ((flags & SQLITE_FullColNames)==0 &&
+              (flags & SQLITE_ShortColNames)!=0) );
+
+    for(k=0; k<pEList->nExpr; k++){
+      pE = a[k].pExpr;
+      pRight = pE->pRight;
+      assert( pE->op!=TK_DOT || pRight!=0 );
+      if( pE->op!=TK_ALL && (pE->op!=TK_DOT || pRight->op!=TK_ALL) ){
+        /* This particular expression does not need to be expanded.
+        */
+        pNew = sqlite3ExprListAppend(pParse, pNew, a[k].pExpr);
+        if( pNew ){
+          pNew->a[pNew->nExpr-1].zName = a[k].zName;
+          pNew->a[pNew->nExpr-1].zSpan = a[k].zSpan;
+          a[k].zName = 0;
+          a[k].zSpan = 0;
+        }
+        a[k].pExpr = 0;
+      }else{
+        /* This expression is a "*" or a "TABLE.*" and needs to be
+        ** expanded. */
+        int tableSeen = 0;      /* Set to 1 when TABLE matches */
+        char *zTName = 0;       /* text of name of TABLE */
+        if( pE->op==TK_DOT ){
+          assert( pE->pLeft!=0 );
+          assert( !ExprHasProperty(pE->pLeft, EP_IntValue) );
+          zTName = pE->pLeft->u.zToken;
+        }
+        for(i=0, pFrom=pTabList->a; i<pTabList->nSrc; i++, pFrom++){
+          Table *pTab = pFrom->pTab;
+          Select *pSub = pFrom->pSelect;
+          char *zTabName = pFrom->zAlias;
+          const char *zSchemaName = 0;
+          int iDb;
+          if( zTabName==0 ){
+            zTabName = pTab->zName;
+          }
+          if( db->mallocFailed ) break;
+          if( pSub==0 || (pSub->selFlags & SF_NestedFrom)==0 ){
+            pSub = 0;
+            if( zTName && sqlite3StrICmp(zTName, zTabName)!=0 ){
+              continue;
+            }
+            iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+            zSchemaName = iDb>=0 ? db->aDb[iDb].zName : "*";
+          }
+          for(j=0; j<pTab->nCol; j++){
+            char *zName = pTab->aCol[j].zName;
+            char *zColname;  /* The computed column name */
+            char *zToFree;   /* Malloced string that needs to be freed */
+            Token sColname;  /* Computed column name as a token */
+
+            assert( zName );
+            if( zTName && pSub
+             && sqlite3MatchSpanName(pSub->pEList->a[j].zSpan, 0, zTName, 0)==0
+            ){
+              continue;
+            }
+
+            /* If a column is marked as 'hidden' (currently only possible
+            ** for virtual tables), do not include it in the expanded
+            ** result-set list.
+            */
+            if( IsHiddenColumn(&pTab->aCol[j]) ){
+              assert(IsVirtual(pTab));
+              continue;
+            }
+            tableSeen = 1;
+
+            if( i>0 && zTName==0 ){
+              if( (pFrom->jointype & JT_NATURAL)!=0
+                && tableAndColumnIndex(pTabList, i, zName, 0, 0)
+              ){
+                /* In a NATURAL join, omit the join columns from the 
+                ** table to the right of the join */
+                continue;
+              }
+              if( sqlite3IdListIndex(pFrom->pUsing, zName)>=0 ){
+                /* In a join with a USING clause, omit columns in the
+                ** using clause from the table on the right. */
+                continue;
+              }
+            }
+            pRight = sqlite3Expr(db, TK_ID, zName);
+            zColname = zName;
+            zToFree = 0;
+            if( longNames || pTabList->nSrc>1 ){
+              Expr *pLeft;
+              pLeft = sqlite3Expr(db, TK_ID, zTabName);
+              pExpr = sqlite3PExpr(pParse, TK_DOT, pLeft, pRight, 0);
+              if( zSchemaName ){
+                pLeft = sqlite3Expr(db, TK_ID, zSchemaName);
+                pExpr = sqlite3PExpr(pParse, TK_DOT, pLeft, pExpr, 0);
+              }
+              if( longNames ){
+                zColname = sqlite3MPrintf(db, "%s.%s", zTabName, zName);
+                zToFree = zColname;
+              }
+            }else{
+              pExpr = pRight;
+            }
+            pNew = sqlite3ExprListAppend(pParse, pNew, pExpr);
+            sColname.z = zColname;
+            sColname.n = sqlite3Strlen30(zColname);
+            sqlite3ExprListSetName(pParse, pNew, &sColname, 0);
+            if( pNew && (p->selFlags & SF_NestedFrom)!=0 ){
+              struct ExprList_item *pX = &pNew->a[pNew->nExpr-1];
+              if( pSub ){
+                pX->zSpan = sqlite3DbStrDup(db, pSub->pEList->a[j].zSpan);
+                testcase( pX->zSpan==0 );
+              }else{
+                pX->zSpan = sqlite3MPrintf(db, "%s.%s.%s",
+                                           zSchemaName, zTabName, zColname);
+                testcase( pX->zSpan==0 );
+              }
+              pX->bSpanIsTab = 1;
+            }
+            sqlite3DbFree(db, zToFree);
+          }
+        }
+        if( !tableSeen ){
+          if( zTName ){
+            sqlite3ErrorMsg(pParse, "no such table: %s", zTName);
+          }else{
+            sqlite3ErrorMsg(pParse, "no tables specified");
+          }
+        }
+      }
+    }
+    sqlite3ExprListDelete(db, pEList);
+    p->pEList = pNew;
+  }
+#if SQLITE_MAX_COLUMN
+  if( p->pEList && p->pEList->nExpr>db->aLimit[SQLITE_LIMIT_COLUMN] ){
+    sqlite3ErrorMsg(pParse, "too many columns in result set");
+  }
+#endif
+  return WRC_Continue;
+}
+
+/*
+** No-op routine for the parse-tree walker.
+**
+** When this routine is the Walker.xExprCallback then expression trees
+** are walked without any actions being taken at each node.  Presumably,
+** when this routine is used for Walker.xExprCallback then 
+** Walker.xSelectCallback is set to do something useful for every 
+** subquery in the parser tree.
+*/
+static int exprWalkNoop(Walker *NotUsed, Expr *NotUsed2){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  return WRC_Continue;
+}
+
+/*
+** This routine "expands" a SELECT statement and all of its subqueries.
+** For additional information on what it means to "expand" a SELECT
+** statement, see the comment on the selectExpand worker callback above.
+**
+** Expanding a SELECT statement is the first step in processing a
+** SELECT statement.  The SELECT statement must be expanded before
+** name resolution is performed.
+**
+** If anything goes wrong, an error message is written into pParse.
+** The calling function can detect the problem by looking at pParse->nErr
+** and/or pParse->db->mallocFailed.
+*/
+static void sqlite3SelectExpand(Parse *pParse, Select *pSelect){
+  Walker w;
+  memset(&w, 0, sizeof(w));
+  w.xExprCallback = exprWalkNoop;
+  w.pParse = pParse;
+  if( pParse->hasCompound ){
+    w.xSelectCallback = convertCompoundSelectToSubquery;
+    sqlite3WalkSelect(&w, pSelect);
+  }
+  w.xSelectCallback = selectExpander;
+  if( (pSelect->selFlags & SF_AllValues)==0 ){
+    w.xSelectCallback2 = selectPopWith;
+  }
+  sqlite3WalkSelect(&w, pSelect);
+}
+
+
+#ifndef SQLITE_OMIT_SUBQUERY
+/*
+** This is a Walker.xSelectCallback callback for the sqlite3SelectTypeInfo()
+** interface.
+**
+** For each FROM-clause subquery, add Column.zType and Column.zColl
+** information to the Table structure that represents the result set
+** of that subquery.
+**
+** The Table structure that represents the result set was constructed
+** by selectExpander() but the type and collation information was omitted
+** at that point because identifiers had not yet been resolved.  This
+** routine is called after identifier resolution.
+*/
+static void selectAddSubqueryTypeInfo(Walker *pWalker, Select *p){
+  Parse *pParse;
+  int i;
+  SrcList *pTabList;
+  struct SrcList_item *pFrom;
+
+  assert( p->selFlags & SF_Resolved );
+  if( (p->selFlags & SF_HasTypeInfo)==0 ){
+    p->selFlags |= SF_HasTypeInfo;
+    pParse = pWalker->pParse;
+    pTabList = p->pSrc;
+    for(i=0, pFrom=pTabList->a; i<pTabList->nSrc; i++, pFrom++){
+      Table *pTab = pFrom->pTab;
+      if( ALWAYS(pTab!=0) && (pTab->tabFlags & TF_Ephemeral)!=0 ){
+        /* A sub-query in the FROM clause of a SELECT */
+        Select *pSel = pFrom->pSelect;
+        if( pSel ){
+          while( pSel->pPrior ) pSel = pSel->pPrior;
+          selectAddColumnTypeAndCollation(pParse, pTab, pSel);
+        }
+      }
+    }
+  }
+}
+#endif
+
+
+/*
+** This routine adds datatype and collating sequence information to
+** the Table structures of all FROM-clause subqueries in a
+** SELECT statement.
+**
+** Use this routine after name resolution.
+*/
+static void sqlite3SelectAddTypeInfo(Parse *pParse, Select *pSelect){
+#ifndef SQLITE_OMIT_SUBQUERY
+  Walker w;
+  memset(&w, 0, sizeof(w));
+  w.xSelectCallback2 = selectAddSubqueryTypeInfo;
+  w.xExprCallback = exprWalkNoop;
+  w.pParse = pParse;
+  sqlite3WalkSelect(&w, pSelect);
+#endif
+}
+
+
+/*
+** This routine sets up a SELECT statement for processing.  The
+** following is accomplished:
+**
+**     *  VDBE Cursor numbers are assigned to all FROM-clause terms.
+**     *  Ephemeral Table objects are created for all FROM-clause subqueries.
+**     *  ON and USING clauses are shifted into WHERE statements
+**     *  Wildcards "*" and "TABLE.*" in result sets are expanded.
+**     *  Identifiers in expression are matched to tables.
+**
+** This routine acts recursively on all subqueries within the SELECT.
+*/
+SQLITE_PRIVATE void sqlite3SelectPrep(
+  Parse *pParse,         /* The parser context */
+  Select *p,             /* The SELECT statement being coded. */
+  NameContext *pOuterNC  /* Name context for container */
+){
+  sqlite3 *db;
+  if( NEVER(p==0) ) return;
+  db = pParse->db;
+  if( db->mallocFailed ) return;
+  if( p->selFlags & SF_HasTypeInfo ) return;
+  sqlite3SelectExpand(pParse, p);
+  if( pParse->nErr || db->mallocFailed ) return;
+  sqlite3ResolveSelectNames(pParse, p, pOuterNC);
+  if( pParse->nErr || db->mallocFailed ) return;
+  sqlite3SelectAddTypeInfo(pParse, p);
+}
+
+/*
+** Reset the aggregate accumulator.
+**
+** The aggregate accumulator is a set of memory cells that hold
+** intermediate results while calculating an aggregate.  This
+** routine generates code that stores NULLs in all of those memory
+** cells.
+*/
+static void resetAccumulator(Parse *pParse, AggInfo *pAggInfo){
+  Vdbe *v = pParse->pVdbe;
+  int i;
+  struct AggInfo_func *pFunc;
+  int nReg = pAggInfo->nFunc + pAggInfo->nColumn;
+  if( nReg==0 ) return;
+#ifdef SQLITE_DEBUG
+  /* Verify that all AggInfo registers are within the range specified by
+  ** AggInfo.mnReg..AggInfo.mxReg */
+  assert( nReg==pAggInfo->mxReg-pAggInfo->mnReg+1 );
+  for(i=0; i<pAggInfo->nColumn; i++){
+    assert( pAggInfo->aCol[i].iMem>=pAggInfo->mnReg
+         && pAggInfo->aCol[i].iMem<=pAggInfo->mxReg );
+  }
+  for(i=0; i<pAggInfo->nFunc; i++){
+    assert( pAggInfo->aFunc[i].iMem>=pAggInfo->mnReg
+         && pAggInfo->aFunc[i].iMem<=pAggInfo->mxReg );
+  }
+#endif
+  sqlite3VdbeAddOp3(v, OP_Null, 0, pAggInfo->mnReg, pAggInfo->mxReg);
+  for(pFunc=pAggInfo->aFunc, i=0; i<pAggInfo->nFunc; i++, pFunc++){
+    if( pFunc->iDistinct>=0 ){
+      Expr *pE = pFunc->pExpr;
+      assert( !ExprHasProperty(pE, EP_xIsSelect) );
+      if( pE->x.pList==0 || pE->x.pList->nExpr!=1 ){
+        sqlite3ErrorMsg(pParse, "DISTINCT aggregates must have exactly one "
+           "argument");
+        pFunc->iDistinct = -1;
+      }else{
+        KeyInfo *pKeyInfo = keyInfoFromExprList(pParse, pE->x.pList, 0, 0);
+        sqlite3VdbeAddOp4(v, OP_OpenEphemeral, pFunc->iDistinct, 0, 0,
+                          (char*)pKeyInfo, P4_KEYINFO);
+      }
+    }
+  }
+}
+
+/*
+** Invoke the OP_AggFinalize opcode for every aggregate function
+** in the AggInfo structure.
+*/
+static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){
+  Vdbe *v = pParse->pVdbe;
+  int i;
+  struct AggInfo_func *pF;
+  for(i=0, pF=pAggInfo->aFunc; i<pAggInfo->nFunc; i++, pF++){
+    ExprList *pList = pF->pExpr->x.pList;
+    assert( !ExprHasProperty(pF->pExpr, EP_xIsSelect) );
+    sqlite3VdbeAddOp4(v, OP_AggFinal, pF->iMem, pList ? pList->nExpr : 0, 0,
+                      (void*)pF->pFunc, P4_FUNCDEF);
+  }
+}
+
+/*
+** Update the accumulator memory cells for an aggregate based on
+** the current cursor position.
+*/
+static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){
+  Vdbe *v = pParse->pVdbe;
+  int i;
+  int regHit = 0;
+  int addrHitTest = 0;
+  struct AggInfo_func *pF;
+  struct AggInfo_col *pC;
+
+  pAggInfo->directMode = 1;
+  for(i=0, pF=pAggInfo->aFunc; i<pAggInfo->nFunc; i++, pF++){
+    int nArg;
+    int addrNext = 0;
+    int regAgg;
+    ExprList *pList = pF->pExpr->x.pList;
+    assert( !ExprHasProperty(pF->pExpr, EP_xIsSelect) );
+    if( pList ){
+      nArg = pList->nExpr;
+      regAgg = sqlite3GetTempRange(pParse, nArg);
+      sqlite3ExprCodeExprList(pParse, pList, regAgg, SQLITE_ECEL_DUP);
+    }else{
+      nArg = 0;
+      regAgg = 0;
+    }
+    if( pF->iDistinct>=0 ){
+      addrNext = sqlite3VdbeMakeLabel(v);
+      assert( nArg==1 );
+      codeDistinct(pParse, pF->iDistinct, addrNext, 1, regAgg);
+    }
+    if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
+      CollSeq *pColl = 0;
+      struct ExprList_item *pItem;
+      int j;
+      assert( pList!=0 );  /* pList!=0 if pF->pFunc has NEEDCOLL */
+      for(j=0, pItem=pList->a; !pColl && j<nArg; j++, pItem++){
+        pColl = sqlite3ExprCollSeq(pParse, pItem->pExpr);
+      }
+      if( !pColl ){
+        pColl = pParse->db->pDfltColl;
+      }
+      if( regHit==0 && pAggInfo->nAccumulator ) regHit = ++pParse->nMem;
+      sqlite3VdbeAddOp4(v, OP_CollSeq, regHit, 0, 0, (char *)pColl, P4_COLLSEQ);
+    }
+    sqlite3VdbeAddOp4(v, OP_AggStep, 0, regAgg, pF->iMem,
+                      (void*)pF->pFunc, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, (u8)nArg);
+    sqlite3ExprCacheAffinityChange(pParse, regAgg, nArg);
+    sqlite3ReleaseTempRange(pParse, regAgg, nArg);
+    if( addrNext ){
+      sqlite3VdbeResolveLabel(v, addrNext);
+      sqlite3ExprCacheClear(pParse);
+    }
+  }
+
+  /* Before populating the accumulator registers, clear the column cache.
+  ** Otherwise, if any of the required column values are already present 
+  ** in registers, sqlite3ExprCode() may use OP_SCopy to copy the value
+  ** to pC->iMem. But by the time the value is used, the original register
+  ** may have been used, invalidating the underlying buffer holding the
+  ** text or blob value. See ticket [883034dcb5].
+  **
+  ** Another solution would be to change the OP_SCopy used to copy cached
+  ** values to an OP_Copy.
+  */
+  if( regHit ){
+    addrHitTest = sqlite3VdbeAddOp1(v, OP_If, regHit); VdbeCoverage(v);
+  }
+  sqlite3ExprCacheClear(pParse);
+  for(i=0, pC=pAggInfo->aCol; i<pAggInfo->nAccumulator; i++, pC++){
+    sqlite3ExprCode(pParse, pC->pExpr, pC->iMem);
+  }
+  pAggInfo->directMode = 0;
+  sqlite3ExprCacheClear(pParse);
+  if( addrHitTest ){
+    sqlite3VdbeJumpHere(v, addrHitTest);
+  }
+}
+
+/*
+** Add a single OP_Explain instruction to the VDBE to explain a simple
+** count(*) query ("SELECT count(*) FROM pTab").
+*/
+#ifndef SQLITE_OMIT_EXPLAIN
+static void explainSimpleCount(
+  Parse *pParse,                  /* Parse context */
+  Table *pTab,                    /* Table being queried */
+  Index *pIdx                     /* Index used to optimize scan, or NULL */
+){
+  if( pParse->explain==2 ){
+    int bCover = (pIdx!=0 && (HasRowid(pTab) || !IsPrimaryKeyIndex(pIdx)));
+    char *zEqp = sqlite3MPrintf(pParse->db, "SCAN TABLE %s%s%s",
+        pTab->zName,
+        bCover ? " USING COVERING INDEX " : "",
+        bCover ? pIdx->zName : ""
+    );
+    sqlite3VdbeAddOp4(
+        pParse->pVdbe, OP_Explain, pParse->iSelectId, 0, 0, zEqp, P4_DYNAMIC
+    );
+  }
+}
+#else
+# define explainSimpleCount(a,b,c)
+#endif
+
+/*
+** Generate code for the SELECT statement given in the p argument.  
+**
+** The results are returned according to the SelectDest structure.
+** See comments in sqliteInt.h for further information.
+**
+** This routine returns the number of errors.  If any errors are
+** encountered, then an appropriate error message is left in
+** pParse->zErrMsg.
+**
+** This routine does NOT free the Select structure passed in.  The
+** calling function needs to do that.
+*/
+SQLITE_PRIVATE int sqlite3Select(
+  Parse *pParse,         /* The parser context */
+  Select *p,             /* The SELECT statement being coded. */
+  SelectDest *pDest      /* What to do with the query results */
+){
+  int i, j;              /* Loop counters */
+  WhereInfo *pWInfo;     /* Return from sqlite3WhereBegin() */
+  Vdbe *v;               /* The virtual machine under construction */
+  int isAgg;             /* True for select lists like "count(*)" */
+  ExprList *pEList;      /* List of columns to extract. */
+  SrcList *pTabList;     /* List of tables to select from */
+  Expr *pWhere;          /* The WHERE clause.  May be NULL */
+  ExprList *pGroupBy;    /* The GROUP BY clause.  May be NULL */
+  Expr *pHaving;         /* The HAVING clause.  May be NULL */
+  int rc = 1;            /* Value to return from this function */
+  DistinctCtx sDistinct; /* Info on how to code the DISTINCT keyword */
+  SortCtx sSort;         /* Info on how to code the ORDER BY clause */
+  AggInfo sAggInfo;      /* Information used by aggregate queries */
+  int iEnd;              /* Address of the end of the query */
+  sqlite3 *db;           /* The database connection */
+
+#ifndef SQLITE_OMIT_EXPLAIN
+  int iRestoreSelectId = pParse->iSelectId;
+  pParse->iSelectId = pParse->iNextSelectId++;
+#endif
+
+  db = pParse->db;
+  if( p==0 || db->mallocFailed || pParse->nErr ){
+    return 1;
+  }
+  if( sqlite3AuthCheck(pParse, SQLITE_SELECT, 0, 0, 0) ) return 1;
+  memset(&sAggInfo, 0, sizeof(sAggInfo));
+#if SELECTTRACE_ENABLED
+  pParse->nSelectIndent++;
+  SELECTTRACE(1,pParse,p, ("begin processing:\n"));
+  if( sqlite3SelectTrace & 0x100 ){
+    sqlite3TreeViewSelect(0, p, 0);
+  }
+#endif
+
+  assert( p->pOrderBy==0 || pDest->eDest!=SRT_DistFifo );
+  assert( p->pOrderBy==0 || pDest->eDest!=SRT_Fifo );
+  assert( p->pOrderBy==0 || pDest->eDest!=SRT_DistQueue );
+  assert( p->pOrderBy==0 || pDest->eDest!=SRT_Queue );
+  if( IgnorableOrderby(pDest) ){
+    assert(pDest->eDest==SRT_Exists || pDest->eDest==SRT_Union || 
+           pDest->eDest==SRT_Except || pDest->eDest==SRT_Discard ||
+           pDest->eDest==SRT_Queue  || pDest->eDest==SRT_DistFifo ||
+           pDest->eDest==SRT_DistQueue || pDest->eDest==SRT_Fifo);
+    /* If ORDER BY makes no difference in the output then neither does
+    ** DISTINCT so it can be removed too. */
+    sqlite3ExprListDelete(db, p->pOrderBy);
+    p->pOrderBy = 0;
+    p->selFlags &= ~SF_Distinct;
+  }
+  sqlite3SelectPrep(pParse, p, 0);
+  memset(&sSort, 0, sizeof(sSort));
+  sSort.pOrderBy = p->pOrderBy;
+  pTabList = p->pSrc;
+  pEList = p->pEList;
+  if( pParse->nErr || db->mallocFailed ){
+    goto select_end;
+  }
+  isAgg = (p->selFlags & SF_Aggregate)!=0;
+  assert( pEList!=0 );
+
+  /* Begin generating code.
+  */
+  v = sqlite3GetVdbe(pParse);
+  if( v==0 ) goto select_end;
+
+  /* If writing to memory or generating a set
+  ** only a single column may be output.
+  */
+#ifndef SQLITE_OMIT_SUBQUERY
+  if( checkForMultiColumnSelectError(pParse, pDest, pEList->nExpr) ){
+    goto select_end;
+  }
+#endif
+
+  /* Generate code for all sub-queries in the FROM clause
+  */
+#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW)
+  for(i=0; !p->pPrior && i<pTabList->nSrc; i++){
+    struct SrcList_item *pItem = &pTabList->a[i];
+    SelectDest dest;
+    Select *pSub = pItem->pSelect;
+    int isAggSub;
+
+    if( pSub==0 ) continue;
+
+    /* Sometimes the code for a subquery will be generated more than
+    ** once, if the subquery is part of the WHERE clause in a LEFT JOIN,
+    ** for example.  In that case, do not regenerate the code to manifest
+    ** a view or the co-routine to implement a view.  The first instance
+    ** is sufficient, though the subroutine to manifest the view does need
+    ** to be invoked again. */
+    if( pItem->addrFillSub ){
+      if( pItem->viaCoroutine==0 ){
+        sqlite3VdbeAddOp2(v, OP_Gosub, pItem->regReturn, pItem->addrFillSub);
+      }
+      continue;
+    }
+
+    /* Increment Parse.nHeight by the height of the largest expression
+    ** tree referred to by this, the parent select. The child select
+    ** may contain expression trees of at most
+    ** (SQLITE_MAX_EXPR_DEPTH-Parse.nHeight) height. This is a bit
+    ** more conservative than necessary, but much easier than enforcing
+    ** an exact limit.
+    */
+    pParse->nHeight += sqlite3SelectExprHeight(p);
+
+    isAggSub = (pSub->selFlags & SF_Aggregate)!=0;
+    if( flattenSubquery(pParse, p, i, isAgg, isAggSub) ){
+      /* This subquery can be absorbed into its parent. */
+      if( isAggSub ){
+        isAgg = 1;
+        p->selFlags |= SF_Aggregate;
+      }
+      i = -1;
+    }else if( pTabList->nSrc==1
+           && OptimizationEnabled(db, SQLITE_SubqCoroutine)
+    ){
+      /* Implement a co-routine that will return a single row of the result
+      ** set on each invocation.
+      */
+      int addrTop = sqlite3VdbeCurrentAddr(v)+1;
+      pItem->regReturn = ++pParse->nMem;
+      sqlite3VdbeAddOp3(v, OP_InitCoroutine, pItem->regReturn, 0, addrTop);
+      VdbeComment((v, "%s", pItem->pTab->zName));
+      pItem->addrFillSub = addrTop;
+      sqlite3SelectDestInit(&dest, SRT_Coroutine, pItem->regReturn);
+      explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId);
+      sqlite3Select(pParse, pSub, &dest);
+      pItem->pTab->nRowLogEst = sqlite3LogEst(pSub->nSelectRow);
+      pItem->viaCoroutine = 1;
+      pItem->regResult = dest.iSdst;
+      sqlite3VdbeAddOp1(v, OP_EndCoroutine, pItem->regReturn);
+      sqlite3VdbeJumpHere(v, addrTop-1);
+      sqlite3ClearTempRegCache(pParse);
+    }else{
+      /* Generate a subroutine that will fill an ephemeral table with
+      ** the content of this subquery.  pItem->addrFillSub will point
+      ** to the address of the generated subroutine.  pItem->regReturn
+      ** is a register allocated to hold the subroutine return address
+      */
+      int topAddr;
+      int onceAddr = 0;
+      int retAddr;
+      assert( pItem->addrFillSub==0 );
+      pItem->regReturn = ++pParse->nMem;
+      topAddr = sqlite3VdbeAddOp2(v, OP_Integer, 0, pItem->regReturn);
+      pItem->addrFillSub = topAddr+1;
+      if( pItem->isCorrelated==0 ){
+        /* If the subquery is not correlated and if we are not inside of
+        ** a trigger, then we only need to compute the value of the subquery
+        ** once. */
+        onceAddr = sqlite3CodeOnce(pParse); VdbeCoverage(v);
+        VdbeComment((v, "materialize \"%s\"", pItem->pTab->zName));
+      }else{
+        VdbeNoopComment((v, "materialize \"%s\"", pItem->pTab->zName));
+      }
+      sqlite3SelectDestInit(&dest, SRT_EphemTab, pItem->iCursor);
+      explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId);
+      sqlite3Select(pParse, pSub, &dest);
+      pItem->pTab->nRowLogEst = sqlite3LogEst(pSub->nSelectRow);
+      if( onceAddr ) sqlite3VdbeJumpHere(v, onceAddr);
+      retAddr = sqlite3VdbeAddOp1(v, OP_Return, pItem->regReturn);
+      VdbeComment((v, "end %s", pItem->pTab->zName));
+      sqlite3VdbeChangeP1(v, topAddr, retAddr);
+      sqlite3ClearTempRegCache(pParse);
+    }
+    if( /*pParse->nErr ||*/ db->mallocFailed ){
+      goto select_end;
+    }
+    pParse->nHeight -= sqlite3SelectExprHeight(p);
+    pTabList = p->pSrc;
+    if( !IgnorableOrderby(pDest) ){
+      sSort.pOrderBy = p->pOrderBy;
+    }
+  }
+  pEList = p->pEList;
+#endif
+  pWhere = p->pWhere;
+  pGroupBy = p->pGroupBy;
+  pHaving = p->pHaving;
+  sDistinct.isTnct = (p->selFlags & SF_Distinct)!=0;
+
+#ifndef SQLITE_OMIT_COMPOUND_SELECT
+  /* If there is are a sequence of queries, do the earlier ones first.
+  */
+  if( p->pPrior ){
+    rc = multiSelect(pParse, p, pDest);
+    explainSetInteger(pParse->iSelectId, iRestoreSelectId);
+#if SELECTTRACE_ENABLED
+    SELECTTRACE(1,pParse,p,("end compound-select processing\n"));
+    pParse->nSelectIndent--;
+#endif
+    return rc;
+  }
+#endif
+
+  /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and 
+  ** if the select-list is the same as the ORDER BY list, then this query
+  ** can be rewritten as a GROUP BY. In other words, this:
+  **
+  **     SELECT DISTINCT xyz FROM ... ORDER BY xyz
+  **
+  ** is transformed to:
+  **
+  **     SELECT xyz FROM ... GROUP BY xyz ORDER BY xyz
+  **
+  ** The second form is preferred as a single index (or temp-table) may be 
+  ** used for both the ORDER BY and DISTINCT processing. As originally 
+  ** written the query must use a temp-table for at least one of the ORDER 
+  ** BY and DISTINCT, and an index or separate temp-table for the other.
+  */
+  if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct 
+   && sqlite3ExprListCompare(sSort.pOrderBy, p->pEList, -1)==0
+  ){
+    p->selFlags &= ~SF_Distinct;
+    p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
+    pGroupBy = p->pGroupBy;
+    /* Notice that even thought SF_Distinct has been cleared from p->selFlags,
+    ** the sDistinct.isTnct is still set.  Hence, isTnct represents the
+    ** original setting of the SF_Distinct flag, not the current setting */
+    assert( sDistinct.isTnct );
+  }
+
+  /* If there is an ORDER BY clause, then this sorting
+  ** index might end up being unused if the data can be 
+  ** extracted in pre-sorted order.  If that is the case, then the
+  ** OP_OpenEphemeral instruction will be changed to an OP_Noop once
+  ** we figure out that the sorting index is not needed.  The addrSortIndex
+  ** variable is used to facilitate that change.
+  */
+  if( sSort.pOrderBy ){
+    KeyInfo *pKeyInfo;
+    pKeyInfo = keyInfoFromExprList(pParse, sSort.pOrderBy, 0, pEList->nExpr);
+    sSort.iECursor = pParse->nTab++;
+    sSort.addrSortIndex =
+      sqlite3VdbeAddOp4(v, OP_OpenEphemeral,
+          sSort.iECursor, sSort.pOrderBy->nExpr+1+pEList->nExpr, 0,
+          (char*)pKeyInfo, P4_KEYINFO
+      );
+  }else{
+    sSort.addrSortIndex = -1;
+  }
+
+  /* If the output is destined for a temporary table, open that table.
+  */
+  if( pDest->eDest==SRT_EphemTab ){
+    sqlite3VdbeAddOp2(v, OP_OpenEphemeral, pDest->iSDParm, pEList->nExpr);
+  }
+
+  /* Set the limiter.
+  */
+  iEnd = sqlite3VdbeMakeLabel(v);
+  p->nSelectRow = LARGEST_INT64;
+  computeLimitRegisters(pParse, p, iEnd);
+  if( p->iLimit==0 && sSort.addrSortIndex>=0 ){
+    sqlite3VdbeGetOp(v, sSort.addrSortIndex)->opcode = OP_SorterOpen;
+    sSort.sortFlags |= SORTFLAG_UseSorter;
+  }
+
+  /* Open a virtual index to use for the distinct set.
+  */
+  if( p->selFlags & SF_Distinct ){
+    sDistinct.tabTnct = pParse->nTab++;
+    sDistinct.addrTnct = sqlite3VdbeAddOp4(v, OP_OpenEphemeral,
+                                sDistinct.tabTnct, 0, 0,
+                                (char*)keyInfoFromExprList(pParse, p->pEList,0,0),
+                                P4_KEYINFO);
+    sqlite3VdbeChangeP5(v, BTREE_UNORDERED);
+    sDistinct.eTnctType = WHERE_DISTINCT_UNORDERED;
+  }else{
+    sDistinct.eTnctType = WHERE_DISTINCT_NOOP;
+  }
+
+  if( !isAgg && pGroupBy==0 ){
+    /* No aggregate functions and no GROUP BY clause */
+    u16 wctrlFlags = (sDistinct.isTnct ? WHERE_WANT_DISTINCT : 0);
+
+    /* Begin the database scan. */
+    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, sSort.pOrderBy,
+                               p->pEList, wctrlFlags, 0);
+    if( pWInfo==0 ) goto select_end;
+    if( sqlite3WhereOutputRowCount(pWInfo) < p->nSelectRow ){
+      p->nSelectRow = sqlite3WhereOutputRowCount(pWInfo);
+    }
+    if( sDistinct.isTnct && sqlite3WhereIsDistinct(pWInfo) ){
+      sDistinct.eTnctType = sqlite3WhereIsDistinct(pWInfo);
+    }
+    if( sSort.pOrderBy ){
+      sSort.nOBSat = sqlite3WhereIsOrdered(pWInfo);
+      if( sSort.nOBSat==sSort.pOrderBy->nExpr ){
+        sSort.pOrderBy = 0;
+      }
+    }
+
+    /* If sorting index that was created by a prior OP_OpenEphemeral 
+    ** instruction ended up not being needed, then change the OP_OpenEphemeral
+    ** into an OP_Noop.
+    */
+    if( sSort.addrSortIndex>=0 && sSort.pOrderBy==0 ){
+      sqlite3VdbeChangeToNoop(v, sSort.addrSortIndex);
+    }
+
+    /* Use the standard inner loop. */
+    selectInnerLoop(pParse, p, pEList, -1, &sSort, &sDistinct, pDest,
+                    sqlite3WhereContinueLabel(pWInfo),
+                    sqlite3WhereBreakLabel(pWInfo));
+
+    /* End the database scan loop.
+    */
+    sqlite3WhereEnd(pWInfo);
+  }else{
+    /* This case when there exist aggregate functions or a GROUP BY clause
+    ** or both */
+    NameContext sNC;    /* Name context for processing aggregate information */
+    int iAMem;          /* First Mem address for storing current GROUP BY */
+    int iBMem;          /* First Mem address for previous GROUP BY */
+    int iUseFlag;       /* Mem address holding flag indicating that at least
+                        ** one row of the input to the aggregator has been
+                        ** processed */
+    int iAbortFlag;     /* Mem address which causes query abort if positive */
+    int groupBySort;    /* Rows come from source in GROUP BY order */
+    int addrEnd;        /* End of processing for this SELECT */
+    int sortPTab = 0;   /* Pseudotable used to decode sorting results */
+    int sortOut = 0;    /* Output register from the sorter */
+    int orderByGrp = 0; /* True if the GROUP BY and ORDER BY are the same */
+
+    /* Remove any and all aliases between the result set and the
+    ** GROUP BY clause.
+    */
+    if( pGroupBy ){
+      int k;                        /* Loop counter */
+      struct ExprList_item *pItem;  /* For looping over expression in a list */
+
+      for(k=p->pEList->nExpr, pItem=p->pEList->a; k>0; k--, pItem++){
+        pItem->u.x.iAlias = 0;
+      }
+      for(k=pGroupBy->nExpr, pItem=pGroupBy->a; k>0; k--, pItem++){
+        pItem->u.x.iAlias = 0;
+      }
+      if( p->nSelectRow>100 ) p->nSelectRow = 100;
+    }else{
+      p->nSelectRow = 1;
+    }
+
+
+    /* If there is both a GROUP BY and an ORDER BY clause and they are
+    ** identical, then it may be possible to disable the ORDER BY clause 
+    ** on the grounds that the GROUP BY will cause elements to come out 
+    ** in the correct order. It also may not - the GROUP BY may use a
+    ** database index that causes rows to be grouped together as required
+    ** but not actually sorted. Either way, record the fact that the
+    ** ORDER BY and GROUP BY clauses are the same by setting the orderByGrp
+    ** variable.  */
+    if( sqlite3ExprListCompare(pGroupBy, sSort.pOrderBy, -1)==0 ){
+      orderByGrp = 1;
+    }
+ 
+    /* Create a label to jump to when we want to abort the query */
+    addrEnd = sqlite3VdbeMakeLabel(v);
+
+    /* Convert TK_COLUMN nodes into TK_AGG_COLUMN and make entries in
+    ** sAggInfo for all TK_AGG_FUNCTION nodes in expressions of the
+    ** SELECT statement.
+    */
+    memset(&sNC, 0, sizeof(sNC));
+    sNC.pParse = pParse;
+    sNC.pSrcList = pTabList;
+    sNC.pAggInfo = &sAggInfo;
+    sAggInfo.mnReg = pParse->nMem+1;
+    sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0;
+    sAggInfo.pGroupBy = pGroupBy;
+    sqlite3ExprAnalyzeAggList(&sNC, pEList);
+    sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy);
+    if( pHaving ){
+      sqlite3ExprAnalyzeAggregates(&sNC, pHaving);
+    }
+    sAggInfo.nAccumulator = sAggInfo.nColumn;
+    for(i=0; i<sAggInfo.nFunc; i++){
+      assert( !ExprHasProperty(sAggInfo.aFunc[i].pExpr, EP_xIsSelect) );
+      sNC.ncFlags |= NC_InAggFunc;
+      sqlite3ExprAnalyzeAggList(&sNC, sAggInfo.aFunc[i].pExpr->x.pList);
+      sNC.ncFlags &= ~NC_InAggFunc;
+    }
+    sAggInfo.mxReg = pParse->nMem;
+    if( db->mallocFailed ) goto select_end;
+
+    /* Processing for aggregates with GROUP BY is very different and
+    ** much more complex than aggregates without a GROUP BY.
+    */
+    if( pGroupBy ){
+      KeyInfo *pKeyInfo;  /* Keying information for the group by clause */
+      int j1;             /* A-vs-B comparision jump */
+      int addrOutputRow;  /* Start of subroutine that outputs a result row */
+      int regOutputRow;   /* Return address register for output subroutine */
+      int addrSetAbort;   /* Set the abort flag and return */
+      int addrTopOfLoop;  /* Top of the input loop */
+      int addrSortingIdx; /* The OP_OpenEphemeral for the sorting index */
+      int addrReset;      /* Subroutine for resetting the accumulator */
+      int regReset;       /* Return address register for reset subroutine */
+
+      /* If there is a GROUP BY clause we might need a sorting index to
+      ** implement it.  Allocate that sorting index now.  If it turns out
+      ** that we do not need it after all, the OP_SorterOpen instruction
+      ** will be converted into a Noop.  
+      */
+      sAggInfo.sortingIdx = pParse->nTab++;
+      pKeyInfo = keyInfoFromExprList(pParse, pGroupBy, 0, sAggInfo.nColumn);
+      addrSortingIdx = sqlite3VdbeAddOp4(v, OP_SorterOpen, 
+          sAggInfo.sortingIdx, sAggInfo.nSortingColumn, 
+          0, (char*)pKeyInfo, P4_KEYINFO);
+
+      /* Initialize memory locations used by GROUP BY aggregate processing
+      */
+      iUseFlag = ++pParse->nMem;
+      iAbortFlag = ++pParse->nMem;
+      regOutputRow = ++pParse->nMem;
+      addrOutputRow = sqlite3VdbeMakeLabel(v);
+      regReset = ++pParse->nMem;
+      addrReset = sqlite3VdbeMakeLabel(v);
+      iAMem = pParse->nMem + 1;
+      pParse->nMem += pGroupBy->nExpr;
+      iBMem = pParse->nMem + 1;
+      pParse->nMem += pGroupBy->nExpr;
+      sqlite3VdbeAddOp2(v, OP_Integer, 0, iAbortFlag);
+      VdbeComment((v, "clear abort flag"));
+      sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag);
+      VdbeComment((v, "indicate accumulator empty"));
+      sqlite3VdbeAddOp3(v, OP_Null, 0, iAMem, iAMem+pGroupBy->nExpr-1);
+
+      /* Begin a loop that will extract all source rows in GROUP BY order.
+      ** This might involve two separate loops with an OP_Sort in between, or
+      ** it might be a single loop that uses an index to extract information
+      ** in the right order to begin with.
+      */
+      sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
+      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pGroupBy, 0,
+          WHERE_GROUPBY | (orderByGrp ? WHERE_SORTBYGROUP : 0), 0
+      );
+      if( pWInfo==0 ) goto select_end;
+      if( sqlite3WhereIsOrdered(pWInfo)==pGroupBy->nExpr ){
+        /* The optimizer is able to deliver rows in group by order so
+        ** we do not have to sort.  The OP_OpenEphemeral table will be
+        ** cancelled later because we still need to use the pKeyInfo
+        */
+        groupBySort = 0;
+      }else{
+        /* Rows are coming out in undetermined order.  We have to push
+        ** each row into a sorting index, terminate the first loop,
+        ** then loop over the sorting index in order to get the output
+        ** in sorted order
+        */
+        int regBase;
+        int regRecord;
+        int nCol;
+        int nGroupBy;
+
+        explainTempTable(pParse, 
+            (sDistinct.isTnct && (p->selFlags&SF_Distinct)==0) ?
+                    "DISTINCT" : "GROUP BY");
+
+        groupBySort = 1;
+        nGroupBy = pGroupBy->nExpr;
+        nCol = nGroupBy;
+        j = nGroupBy;
+        for(i=0; i<sAggInfo.nColumn; i++){
+          if( sAggInfo.aCol[i].iSorterColumn>=j ){
+            nCol++;
+            j++;
+          }
+        }
+        regBase = sqlite3GetTempRange(pParse, nCol);
+        sqlite3ExprCacheClear(pParse);
+        sqlite3ExprCodeExprList(pParse, pGroupBy, regBase, 0);
+        j = nGroupBy;
+        for(i=0; i<sAggInfo.nColumn; i++){
+          struct AggInfo_col *pCol = &sAggInfo.aCol[i];
+          if( pCol->iSorterColumn>=j ){
+            int r1 = j + regBase;
+            int r2;
+
+            r2 = sqlite3ExprCodeGetColumn(pParse, 
+                               pCol->pTab, pCol->iColumn, pCol->iTable, r1, 0);
+            if( r1!=r2 ){
+              sqlite3VdbeAddOp2(v, OP_SCopy, r2, r1);
+            }
+            j++;
+          }
+        }
+        regRecord = sqlite3GetTempReg(pParse);
+        sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nCol, regRecord);
+        sqlite3VdbeAddOp2(v, OP_SorterInsert, sAggInfo.sortingIdx, regRecord);
+        sqlite3ReleaseTempReg(pParse, regRecord);
+        sqlite3ReleaseTempRange(pParse, regBase, nCol);
+        sqlite3WhereEnd(pWInfo);
+        sAggInfo.sortingIdxPTab = sortPTab = pParse->nTab++;
+        sortOut = sqlite3GetTempReg(pParse);
+        sqlite3VdbeAddOp3(v, OP_OpenPseudo, sortPTab, sortOut, nCol);
+        sqlite3VdbeAddOp2(v, OP_SorterSort, sAggInfo.sortingIdx, addrEnd);
+        VdbeComment((v, "GROUP BY sort")); VdbeCoverage(v);
+        sAggInfo.useSortingIdx = 1;
+        sqlite3ExprCacheClear(pParse);
+
+      }
+
+      /* If the index or temporary table used by the GROUP BY sort
+      ** will naturally deliver rows in the order required by the ORDER BY
+      ** clause, cancel the ephemeral table open coded earlier.
+      **
+      ** This is an optimization - the correct answer should result regardless.
+      ** Use the SQLITE_GroupByOrder flag with SQLITE_TESTCTRL_OPTIMIZER to 
+      ** disable this optimization for testing purposes.  */
+      if( orderByGrp && OptimizationEnabled(db, SQLITE_GroupByOrder) 
+       && (groupBySort || sqlite3WhereIsSorted(pWInfo))
+      ){
+        sSort.pOrderBy = 0;
+        sqlite3VdbeChangeToNoop(v, sSort.addrSortIndex);
+      }
+
+      /* Evaluate the current GROUP BY terms and store in b0, b1, b2...
+      ** (b0 is memory location iBMem+0, b1 is iBMem+1, and so forth)
+      ** Then compare the current GROUP BY terms against the GROUP BY terms
+      ** from the previous row currently stored in a0, a1, a2...
+      */
+      addrTopOfLoop = sqlite3VdbeCurrentAddr(v);
+      sqlite3ExprCacheClear(pParse);
+      if( groupBySort ){
+        sqlite3VdbeAddOp3(v, OP_SorterData, sAggInfo.sortingIdx, sortOut,sortPTab);
+      }
+      for(j=0; j<pGroupBy->nExpr; j++){
+        if( groupBySort ){
+          sqlite3VdbeAddOp3(v, OP_Column, sortPTab, j, iBMem+j);
+        }else{
+          sAggInfo.directMode = 1;
+          sqlite3ExprCode(pParse, pGroupBy->a[j].pExpr, iBMem+j);
+        }
+      }
+      sqlite3VdbeAddOp4(v, OP_Compare, iAMem, iBMem, pGroupBy->nExpr,
+                          (char*)sqlite3KeyInfoRef(pKeyInfo), P4_KEYINFO);
+      j1 = sqlite3VdbeCurrentAddr(v);
+      sqlite3VdbeAddOp3(v, OP_Jump, j1+1, 0, j1+1); VdbeCoverage(v);
+
+      /* Generate code that runs whenever the GROUP BY changes.
+      ** Changes in the GROUP BY are detected by the previous code
+      ** block.  If there were no changes, this block is skipped.
+      **
+      ** This code copies current group by terms in b0,b1,b2,...
+      ** over to a0,a1,a2.  It then calls the output subroutine
+      ** and resets the aggregate accumulator registers in preparation
+      ** for the next GROUP BY batch.
+      */
+      sqlite3ExprCodeMove(pParse, iBMem, iAMem, pGroupBy->nExpr);
+      sqlite3VdbeAddOp2(v, OP_Gosub, regOutputRow, addrOutputRow);
+      VdbeComment((v, "output one row"));
+      sqlite3VdbeAddOp2(v, OP_IfPos, iAbortFlag, addrEnd); VdbeCoverage(v);
+      VdbeComment((v, "check abort flag"));
+      sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
+      VdbeComment((v, "reset accumulator"));
+
+      /* Update the aggregate accumulators based on the content of
+      ** the current row
+      */
+      sqlite3VdbeJumpHere(v, j1);
+      updateAccumulator(pParse, &sAggInfo);
+      sqlite3VdbeAddOp2(v, OP_Integer, 1, iUseFlag);
+      VdbeComment((v, "indicate data in accumulator"));
+
+      /* End of the loop
+      */
+      if( groupBySort ){
+        sqlite3VdbeAddOp2(v, OP_SorterNext, sAggInfo.sortingIdx, addrTopOfLoop);
+        VdbeCoverage(v);
+      }else{
+        sqlite3WhereEnd(pWInfo);
+        sqlite3VdbeChangeToNoop(v, addrSortingIdx);
+      }
+
+      /* Output the final row of result
+      */
+      sqlite3VdbeAddOp2(v, OP_Gosub, regOutputRow, addrOutputRow);
+      VdbeComment((v, "output final row"));
+
+      /* Jump over the subroutines
+      */
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, addrEnd);
+
+      /* Generate a subroutine that outputs a single row of the result
+      ** set.  This subroutine first looks at the iUseFlag.  If iUseFlag
+      ** is less than or equal to zero, the subroutine is a no-op.  If
+      ** the processing calls for the query to abort, this subroutine
+      ** increments the iAbortFlag memory location before returning in
+      ** order to signal the caller to abort.
+      */
+      addrSetAbort = sqlite3VdbeCurrentAddr(v);
+      sqlite3VdbeAddOp2(v, OP_Integer, 1, iAbortFlag);
+      VdbeComment((v, "set abort flag"));
+      sqlite3VdbeAddOp1(v, OP_Return, regOutputRow);
+      sqlite3VdbeResolveLabel(v, addrOutputRow);
+      addrOutputRow = sqlite3VdbeCurrentAddr(v);
+      sqlite3VdbeAddOp2(v, OP_IfPos, iUseFlag, addrOutputRow+2); VdbeCoverage(v);
+      VdbeComment((v, "Groupby result generator entry point"));
+      sqlite3VdbeAddOp1(v, OP_Return, regOutputRow);
+      finalizeAggFunctions(pParse, &sAggInfo);
+      sqlite3ExprIfFalse(pParse, pHaving, addrOutputRow+1, SQLITE_JUMPIFNULL);
+      selectInnerLoop(pParse, p, p->pEList, -1, &sSort,
+                      &sDistinct, pDest,
+                      addrOutputRow+1, addrSetAbort);
+      sqlite3VdbeAddOp1(v, OP_Return, regOutputRow);
+      VdbeComment((v, "end groupby result generator"));
+
+      /* Generate a subroutine that will reset the group-by accumulator
+      */
+      sqlite3VdbeResolveLabel(v, addrReset);
+      resetAccumulator(pParse, &sAggInfo);
+      sqlite3VdbeAddOp1(v, OP_Return, regReset);
+     
+    } /* endif pGroupBy.  Begin aggregate queries without GROUP BY: */
+    else {
+      ExprList *pDel = 0;
+#ifndef SQLITE_OMIT_BTREECOUNT
+      Table *pTab;
+      if( (pTab = isSimpleCount(p, &sAggInfo))!=0 ){
+        /* If isSimpleCount() returns a pointer to a Table structure, then
+        ** the SQL statement is of the form:
+        **
+        **   SELECT count(*) FROM <tbl>
+        **
+        ** where the Table structure returned represents table <tbl>.
+        **
+        ** This statement is so common that it is optimized specially. The
+        ** OP_Count instruction is executed either on the intkey table that
+        ** contains the data for table <tbl> or on one of its indexes. It
+        ** is better to execute the op on an index, as indexes are almost
+        ** always spread across less pages than their corresponding tables.
+        */
+        const int iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+        const int iCsr = pParse->nTab++;     /* Cursor to scan b-tree */
+        Index *pIdx;                         /* Iterator variable */
+        KeyInfo *pKeyInfo = 0;               /* Keyinfo for scanned index */
+        Index *pBest = 0;                    /* Best index found so far */
+        int iRoot = pTab->tnum;              /* Root page of scanned b-tree */
+
+        sqlite3CodeVerifySchema(pParse, iDb);
+        sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName);
+
+        /* Search for the index that has the lowest scan cost.
+        **
+        ** (2011-04-15) Do not do a full scan of an unordered index.
+        **
+        ** (2013-10-03) Do not count the entries in a partial index.
+        **
+        ** In practice the KeyInfo structure will not be used. It is only 
+        ** passed to keep OP_OpenRead happy.
+        */
+        if( !HasRowid(pTab) ) pBest = sqlite3PrimaryKeyIndex(pTab);
+        for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+          if( pIdx->bUnordered==0
+           && pIdx->szIdxRow<pTab->szTabRow
+           && pIdx->pPartIdxWhere==0
+           && (!pBest || pIdx->szIdxRow<pBest->szIdxRow)
+          ){
+            pBest = pIdx;
+          }
+        }
+        if( pBest ){
+          iRoot = pBest->tnum;
+          pKeyInfo = sqlite3KeyInfoOfIndex(pParse, pBest);
+        }
+
+        /* Open a read-only cursor, execute the OP_Count, close the cursor. */
+        sqlite3VdbeAddOp4Int(v, OP_OpenRead, iCsr, iRoot, iDb, 1);
+        if( pKeyInfo ){
+          sqlite3VdbeChangeP4(v, -1, (char *)pKeyInfo, P4_KEYINFO);
+        }
+        sqlite3VdbeAddOp2(v, OP_Count, iCsr, sAggInfo.aFunc[0].iMem);
+        sqlite3VdbeAddOp1(v, OP_Close, iCsr);
+        explainSimpleCount(pParse, pTab, pBest);
+      }else
+#endif /* SQLITE_OMIT_BTREECOUNT */
+      {
+        /* Check if the query is of one of the following forms:
+        **
+        **   SELECT min(x) FROM ...
+        **   SELECT max(x) FROM ...
+        **
+        ** If it is, then ask the code in where.c to attempt to sort results
+        ** as if there was an "ORDER ON x" or "ORDER ON x DESC" clause. 
+        ** If where.c is able to produce results sorted in this order, then
+        ** add vdbe code to break out of the processing loop after the 
+        ** first iteration (since the first iteration of the loop is 
+        ** guaranteed to operate on the row with the minimum or maximum 
+        ** value of x, the only row required).
+        **
+        ** A special flag must be passed to sqlite3WhereBegin() to slightly
+        ** modify behavior as follows:
+        **
+        **   + If the query is a "SELECT min(x)", then the loop coded by
+        **     where.c should not iterate over any values with a NULL value
+        **     for x.
+        **
+        **   + The optimizer code in where.c (the thing that decides which
+        **     index or indices to use) should place a different priority on 
+        **     satisfying the 'ORDER BY' clause than it does in other cases.
+        **     Refer to code and comments in where.c for details.
+        */
+        ExprList *pMinMax = 0;
+        u8 flag = WHERE_ORDERBY_NORMAL;
+        
+        assert( p->pGroupBy==0 );
+        assert( flag==0 );
+        if( p->pHaving==0 ){
+          flag = minMaxQuery(&sAggInfo, &pMinMax);
+        }
+        assert( flag==0 || (pMinMax!=0 && pMinMax->nExpr==1) );
+
+        if( flag ){
+          pMinMax = sqlite3ExprListDup(db, pMinMax, 0);
+          pDel = pMinMax;
+          if( pMinMax && !db->mallocFailed ){
+            pMinMax->a[0].sortOrder = flag!=WHERE_ORDERBY_MIN ?1:0;
+            pMinMax->a[0].pExpr->op = TK_COLUMN;
+          }
+        }
+  
+        /* This case runs if the aggregate has no GROUP BY clause.  The
+        ** processing is much simpler since there is only a single row
+        ** of output.
+        */
+        resetAccumulator(pParse, &sAggInfo);
+        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMax,0,flag,0);
+        if( pWInfo==0 ){
+          sqlite3ExprListDelete(db, pDel);
+          goto select_end;
+        }
+        updateAccumulator(pParse, &sAggInfo);
+        assert( pMinMax==0 || pMinMax->nExpr==1 );
+        if( sqlite3WhereIsOrdered(pWInfo)>0 ){
+          sqlite3VdbeAddOp2(v, OP_Goto, 0, sqlite3WhereBreakLabel(pWInfo));
+          VdbeComment((v, "%s() by index",
+                (flag==WHERE_ORDERBY_MIN?"min":"max")));
+        }
+        sqlite3WhereEnd(pWInfo);
+        finalizeAggFunctions(pParse, &sAggInfo);
+      }
+
+      sSort.pOrderBy = 0;
+      sqlite3ExprIfFalse(pParse, pHaving, addrEnd, SQLITE_JUMPIFNULL);
+      selectInnerLoop(pParse, p, p->pEList, -1, 0, 0, 
+                      pDest, addrEnd, addrEnd);
+      sqlite3ExprListDelete(db, pDel);
+    }
+    sqlite3VdbeResolveLabel(v, addrEnd);
+    
+  } /* endif aggregate query */
+
+  if( sDistinct.eTnctType==WHERE_DISTINCT_UNORDERED ){
+    explainTempTable(pParse, "DISTINCT");
+  }
+
+  /* If there is an ORDER BY clause, then we need to sort the results
+  ** and send them to the callback one by one.
+  */
+  if( sSort.pOrderBy ){
+    explainTempTable(pParse, sSort.nOBSat>0 ? "RIGHT PART OF ORDER BY":"ORDER BY");
+    generateSortTail(pParse, p, &sSort, pEList->nExpr, pDest);
+  }
+
+  /* Jump here to skip this query
+  */
+  sqlite3VdbeResolveLabel(v, iEnd);
+
+  /* The SELECT was successfully coded.   Set the return code to 0
+  ** to indicate no errors.
+  */
+  rc = 0;
+
+  /* Control jumps to here if an error is encountered above, or upon
+  ** successful coding of the SELECT.
+  */
+select_end:
+  explainSetInteger(pParse->iSelectId, iRestoreSelectId);
+
+  /* Identify column names if results of the SELECT are to be output.
+  */
+  if( rc==SQLITE_OK && pDest->eDest==SRT_Output ){
+    generateColumnNames(pParse, pTabList, pEList);
+  }
+
+  sqlite3DbFree(db, sAggInfo.aCol);
+  sqlite3DbFree(db, sAggInfo.aFunc);
+#if SELECTTRACE_ENABLED
+  SELECTTRACE(1,pParse,p,("end processing\n"));
+  pParse->nSelectIndent--;
+#endif
+  return rc;
+}
+
+#ifdef SQLITE_DEBUG
+/*
+** Generate a human-readable description of a the Select object.
+*/
+SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView *pView, const Select *p, u8 moreToFollow){
+  int n = 0;
+  pView = sqlite3TreeViewPush(pView, moreToFollow);
+  sqlite3TreeViewLine(pView, "SELECT%s%s",
+    ((p->selFlags & SF_Distinct) ? " DISTINCT" : ""),
+    ((p->selFlags & SF_Aggregate) ? " agg_flag" : "")
+  );
+  if( p->pSrc && p->pSrc->nSrc ) n++;
+  if( p->pWhere ) n++;
+  if( p->pGroupBy ) n++;
+  if( p->pHaving ) n++;
+  if( p->pOrderBy ) n++;
+  if( p->pLimit ) n++;
+  if( p->pOffset ) n++;
+  if( p->pPrior ) n++;
+  sqlite3TreeViewExprList(pView, p->pEList, (n--)>0, "result-set");
+  if( p->pSrc && p->pSrc->nSrc ){
+    int i;
+    pView = sqlite3TreeViewPush(pView, (n--)>0);
+    sqlite3TreeViewLine(pView, "FROM");
+    for(i=0; i<p->pSrc->nSrc; i++){
+      struct SrcList_item *pItem = &p->pSrc->a[i];
+      StrAccum x;
+      char zLine[100];
+      sqlite3StrAccumInit(&x, zLine, sizeof(zLine), 0);
+      sqlite3XPrintf(&x, 0, "{%d,*}", pItem->iCursor);
+      if( pItem->zDatabase ){
+        sqlite3XPrintf(&x, 0, " %s.%s", pItem->zDatabase, pItem->zName);
+      }else if( pItem->zName ){
+        sqlite3XPrintf(&x, 0, " %s", pItem->zName);
+      }
+      if( pItem->pTab ){
+        sqlite3XPrintf(&x, 0, " tabname=%Q", pItem->pTab->zName);
+      }
+      if( pItem->zAlias ){
+        sqlite3XPrintf(&x, 0, " (AS %s)", pItem->zAlias);
+      }
+      if( pItem->jointype & JT_LEFT ){
+        sqlite3XPrintf(&x, 0, " LEFT-JOIN");
+      }
+      sqlite3StrAccumFinish(&x);
+      sqlite3TreeViewItem(pView, zLine, i<p->pSrc->nSrc-1); 
+      if( pItem->pSelect ){
+        sqlite3TreeViewSelect(pView, pItem->pSelect, 0);
+      }
+      sqlite3TreeViewPop(pView);
+    }
+    sqlite3TreeViewPop(pView);
+  }
+  if( p->pWhere ){
+    sqlite3TreeViewItem(pView, "WHERE", (n--)>0);
+    sqlite3TreeViewExpr(pView, p->pWhere, 0);
+    sqlite3TreeViewPop(pView);
+  }
+  if( p->pGroupBy ){
+    sqlite3TreeViewExprList(pView, p->pGroupBy, (n--)>0, "GROUPBY");
+  }
+  if( p->pHaving ){
+    sqlite3TreeViewItem(pView, "HAVING", (n--)>0);
+    sqlite3TreeViewExpr(pView, p->pHaving, 0);
+    sqlite3TreeViewPop(pView);
+  }
+  if( p->pOrderBy ){
+    sqlite3TreeViewExprList(pView, p->pOrderBy, (n--)>0, "ORDERBY");
+  }
+  if( p->pLimit ){
+    sqlite3TreeViewItem(pView, "LIMIT", (n--)>0);
+    sqlite3TreeViewExpr(pView, p->pLimit, 0);
+    sqlite3TreeViewPop(pView);
+  }
+  if( p->pOffset ){
+    sqlite3TreeViewItem(pView, "OFFSET", (n--)>0);
+    sqlite3TreeViewExpr(pView, p->pOffset, 0);
+    sqlite3TreeViewPop(pView);
+  }
+  if( p->pPrior ){
+    const char *zOp = "UNION";
+    switch( p->op ){
+      case TK_ALL:         zOp = "UNION ALL";  break;
+      case TK_INTERSECT:   zOp = "INTERSECT";  break;
+      case TK_EXCEPT:      zOp = "EXCEPT";     break;
+    }
+    sqlite3TreeViewItem(pView, zOp, (n--)>0);
+    sqlite3TreeViewSelect(pView, p->pPrior, 0);
+    sqlite3TreeViewPop(pView);
+  }
+  sqlite3TreeViewPop(pView);
+}
+#endif /* SQLITE_DEBUG */
+
+/************** End of select.c **********************************************/
+/************** Begin file table.c *******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the sqlite3_get_table() and sqlite3_free_table()
+** interface routines.  These are just wrappers around the main
+** interface routine of sqlite3_exec().
+**
+** These routines are in a separate files so that they will not be linked
+** if they are not used.
+*/
+/* #include <stdlib.h> */
+/* #include <string.h> */
+
+#ifndef SQLITE_OMIT_GET_TABLE
+
+/*
+** This structure is used to pass data from sqlite3_get_table() through
+** to the callback function is uses to build the result.
+*/
+typedef struct TabResult {
+  char **azResult;   /* Accumulated output */
+  char *zErrMsg;     /* Error message text, if an error occurs */
+  u32 nAlloc;        /* Slots allocated for azResult[] */
+  u32 nRow;          /* Number of rows in the result */
+  u32 nColumn;       /* Number of columns in the result */
+  u32 nData;         /* Slots used in azResult[].  (nRow+1)*nColumn */
+  int rc;            /* Return code from sqlite3_exec() */
+} TabResult;
+
+/*
+** This routine is called once for each row in the result table.  Its job
+** is to fill in the TabResult structure appropriately, allocating new
+** memory as necessary.
+*/
+static int sqlite3_get_table_cb(void *pArg, int nCol, char **argv, char **colv){
+  TabResult *p = (TabResult*)pArg;  /* Result accumulator */
+  int need;                         /* Slots needed in p->azResult[] */
+  int i;                            /* Loop counter */
+  char *z;                          /* A single column of result */
+
+  /* Make sure there is enough space in p->azResult to hold everything
+  ** we need to remember from this invocation of the callback.
+  */
+  if( p->nRow==0 && argv!=0 ){
+    need = nCol*2;
+  }else{
+    need = nCol;
+  }
+  if( p->nData + need > p->nAlloc ){
+    char **azNew;
+    p->nAlloc = p->nAlloc*2 + need;
+    azNew = sqlite3_realloc64( p->azResult, sizeof(char*)*p->nAlloc );
+    if( azNew==0 ) goto malloc_failed;
+    p->azResult = azNew;
+  }
+
+  /* If this is the first row, then generate an extra row containing
+  ** the names of all columns.
+  */
+  if( p->nRow==0 ){
+    p->nColumn = nCol;
+    for(i=0; i<nCol; i++){
+      z = sqlite3_mprintf("%s", colv[i]);
+      if( z==0 ) goto malloc_failed;
+      p->azResult[p->nData++] = z;
+    }
+  }else if( (int)p->nColumn!=nCol ){
+    sqlite3_free(p->zErrMsg);
+    p->zErrMsg = sqlite3_mprintf(
+       "sqlite3_get_table() called with two or more incompatible queries"
+    );
+    p->rc = SQLITE_ERROR;
+    return 1;
+  }
+
+  /* Copy over the row data
+  */
+  if( argv!=0 ){
+    for(i=0; i<nCol; i++){
+      if( argv[i]==0 ){
+        z = 0;
+      }else{
+        int n = sqlite3Strlen30(argv[i])+1;
+        z = sqlite3_malloc( n );
+        if( z==0 ) goto malloc_failed;
+        memcpy(z, argv[i], n);
+      }
+      p->azResult[p->nData++] = z;
+    }
+    p->nRow++;
+  }
+  return 0;
+
+malloc_failed:
+  p->rc = SQLITE_NOMEM;
+  return 1;
+}
+
+/*
+** Query the database.  But instead of invoking a callback for each row,
+** malloc() for space to hold the result and return the entire results
+** at the conclusion of the call.
+**
+** The result that is written to ***pazResult is held in memory obtained
+** from malloc().  But the caller cannot free this memory directly.  
+** Instead, the entire table should be passed to sqlite3_free_table() when
+** the calling procedure is finished using it.
+*/
+SQLITE_API int sqlite3_get_table(
+  sqlite3 *db,                /* The database on which the SQL executes */
+  const char *zSql,           /* The SQL to be executed */
+  char ***pazResult,          /* Write the result table here */
+  int *pnRow,                 /* Write the number of rows in the result here */
+  int *pnColumn,              /* Write the number of columns of result here */
+  char **pzErrMsg             /* Write error messages here */
+){
+  int rc;
+  TabResult res;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || pazResult==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  *pazResult = 0;
+  if( pnColumn ) *pnColumn = 0;
+  if( pnRow ) *pnRow = 0;
+  if( pzErrMsg ) *pzErrMsg = 0;
+  res.zErrMsg = 0;
+  res.nRow = 0;
+  res.nColumn = 0;
+  res.nData = 1;
+  res.nAlloc = 20;
+  res.rc = SQLITE_OK;
+  res.azResult = sqlite3_malloc(sizeof(char*)*res.nAlloc );
+  if( res.azResult==0 ){
+     db->errCode = SQLITE_NOMEM;
+     return SQLITE_NOMEM;
+  }
+  res.azResult[0] = 0;
+  rc = sqlite3_exec(db, zSql, sqlite3_get_table_cb, &res, pzErrMsg);
+  assert( sizeof(res.azResult[0])>= sizeof(res.nData) );
+  res.azResult[0] = SQLITE_INT_TO_PTR(res.nData);
+  if( (rc&0xff)==SQLITE_ABORT ){
+    sqlite3_free_table(&res.azResult[1]);
+    if( res.zErrMsg ){
+      if( pzErrMsg ){
+        sqlite3_free(*pzErrMsg);
+        *pzErrMsg = sqlite3_mprintf("%s",res.zErrMsg);
+      }
+      sqlite3_free(res.zErrMsg);
+    }
+    db->errCode = res.rc;  /* Assume 32-bit assignment is atomic */
+    return res.rc;
+  }
+  sqlite3_free(res.zErrMsg);
+  if( rc!=SQLITE_OK ){
+    sqlite3_free_table(&res.azResult[1]);
+    return rc;
+  }
+  if( res.nAlloc>res.nData ){
+    char **azNew;
+    azNew = sqlite3_realloc( res.azResult, sizeof(char*)*res.nData );
+    if( azNew==0 ){
+      sqlite3_free_table(&res.azResult[1]);
+      db->errCode = SQLITE_NOMEM;
+      return SQLITE_NOMEM;
+    }
+    res.azResult = azNew;
+  }
+  *pazResult = &res.azResult[1];
+  if( pnColumn ) *pnColumn = res.nColumn;
+  if( pnRow ) *pnRow = res.nRow;
+  return rc;
+}
+
+/*
+** This routine frees the space the sqlite3_get_table() malloced.
+*/
+SQLITE_API void sqlite3_free_table(
+  char **azResult            /* Result returned from sqlite3_get_table() */
+){
+  if( azResult ){
+    int i, n;
+    azResult--;
+    assert( azResult!=0 );
+    n = SQLITE_PTR_TO_INT(azResult[0]);
+    for(i=1; i<n; i++){ if( azResult[i] ) sqlite3_free(azResult[i]); }
+    sqlite3_free(azResult);
+  }
+}
+
+#endif /* SQLITE_OMIT_GET_TABLE */
+
+/************** End of table.c ***********************************************/
+/************** Begin file trigger.c *****************************************/
+/*
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains the implementation for TRIGGERs
+*/
+
+#ifndef SQLITE_OMIT_TRIGGER
+/*
+** Delete a linked list of TriggerStep structures.
+*/
+SQLITE_PRIVATE void sqlite3DeleteTriggerStep(sqlite3 *db, TriggerStep *pTriggerStep){
+  while( pTriggerStep ){
+    TriggerStep * pTmp = pTriggerStep;
+    pTriggerStep = pTriggerStep->pNext;
+
+    sqlite3ExprDelete(db, pTmp->pWhere);
+    sqlite3ExprListDelete(db, pTmp->pExprList);
+    sqlite3SelectDelete(db, pTmp->pSelect);
+    sqlite3IdListDelete(db, pTmp->pIdList);
+
+    sqlite3DbFree(db, pTmp);
+  }
+}
+
+/*
+** Given table pTab, return a list of all the triggers attached to 
+** the table. The list is connected by Trigger.pNext pointers.
+**
+** All of the triggers on pTab that are in the same database as pTab
+** are already attached to pTab->pTrigger.  But there might be additional
+** triggers on pTab in the TEMP schema.  This routine prepends all
+** TEMP triggers on pTab to the beginning of the pTab->pTrigger list
+** and returns the combined list.
+**
+** To state it another way:  This routine returns a list of all triggers
+** that fire off of pTab.  The list will include any TEMP triggers on
+** pTab as well as the triggers lised in pTab->pTrigger.
+*/
+SQLITE_PRIVATE Trigger *sqlite3TriggerList(Parse *pParse, Table *pTab){
+  Schema * const pTmpSchema = pParse->db->aDb[1].pSchema;
+  Trigger *pList = 0;                  /* List of triggers to return */
+
+  if( pParse->disableTriggers ){
+    return 0;
+  }
+
+  if( pTmpSchema!=pTab->pSchema ){
+    HashElem *p;
+    assert( sqlite3SchemaMutexHeld(pParse->db, 0, pTmpSchema) );
+    for(p=sqliteHashFirst(&pTmpSchema->trigHash); p; p=sqliteHashNext(p)){
+      Trigger *pTrig = (Trigger *)sqliteHashData(p);
+      if( pTrig->pTabSchema==pTab->pSchema
+       && 0==sqlite3StrICmp(pTrig->table, pTab->zName) 
+      ){
+        pTrig->pNext = (pList ? pList : pTab->pTrigger);
+        pList = pTrig;
+      }
+    }
+  }
+
+  return (pList ? pList : pTab->pTrigger);
+}
+
+/*
+** This is called by the parser when it sees a CREATE TRIGGER statement
+** up to the point of the BEGIN before the trigger actions.  A Trigger
+** structure is generated based on the information available and stored
+** in pParse->pNewTrigger.  After the trigger actions have been parsed, the
+** sqlite3FinishTrigger() function is called to complete the trigger
+** construction process.
+*/
+SQLITE_PRIVATE void sqlite3BeginTrigger(
+  Parse *pParse,      /* The parse context of the CREATE TRIGGER statement */
+  Token *pName1,      /* The name of the trigger */
+  Token *pName2,      /* The name of the trigger */
+  int tr_tm,          /* One of TK_BEFORE, TK_AFTER, TK_INSTEAD */
+  int op,             /* One of TK_INSERT, TK_UPDATE, TK_DELETE */
+  IdList *pColumns,   /* column list if this is an UPDATE OF trigger */
+  SrcList *pTableName,/* The name of the table/view the trigger applies to */
+  Expr *pWhen,        /* WHEN clause */
+  int isTemp,         /* True if the TEMPORARY keyword is present */
+  int noErr           /* Suppress errors if the trigger already exists */
+){
+  Trigger *pTrigger = 0;  /* The new trigger */
+  Table *pTab;            /* Table that the trigger fires off of */
+  char *zName = 0;        /* Name of the trigger */
+  sqlite3 *db = pParse->db;  /* The database connection */
+  int iDb;                /* The database to store the trigger in */
+  Token *pName;           /* The unqualified db name */
+  DbFixer sFix;           /* State vector for the DB fixer */
+  int iTabDb;             /* Index of the database holding pTab */
+
+  assert( pName1!=0 );   /* pName1->z might be NULL, but not pName1 itself */
+  assert( pName2!=0 );
+  assert( op==TK_INSERT || op==TK_UPDATE || op==TK_DELETE );
+  assert( op>0 && op<0xff );
+  if( isTemp ){
+    /* If TEMP was specified, then the trigger name may not be qualified. */
+    if( pName2->n>0 ){
+      sqlite3ErrorMsg(pParse, "temporary trigger may not have qualified name");
+      goto trigger_cleanup;
+    }
+    iDb = 1;
+    pName = pName1;
+  }else{
+    /* Figure out the db that the trigger will be created in */
+    iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pName);
+    if( iDb<0 ){
+      goto trigger_cleanup;
+    }
+  }
+  if( !pTableName || db->mallocFailed ){
+    goto trigger_cleanup;
+  }
+
+  /* A long-standing parser bug is that this syntax was allowed:
+  **
+  **    CREATE TRIGGER attached.demo AFTER INSERT ON attached.tab ....
+  **                                                 ^^^^^^^^
+  **
+  ** To maintain backwards compatibility, ignore the database
+  ** name on pTableName if we are reparsing out of SQLITE_MASTER.
+  */
+  if( db->init.busy && iDb!=1 ){
+    sqlite3DbFree(db, pTableName->a[0].zDatabase);
+    pTableName->a[0].zDatabase = 0;
+  }
+
+  /* If the trigger name was unqualified, and the table is a temp table,
+  ** then set iDb to 1 to create the trigger in the temporary database.
+  ** If sqlite3SrcListLookup() returns 0, indicating the table does not
+  ** exist, the error is caught by the block below.
+  */
+  pTab = sqlite3SrcListLookup(pParse, pTableName);
+  if( db->init.busy==0 && pName2->n==0 && pTab
+        && pTab->pSchema==db->aDb[1].pSchema ){
+    iDb = 1;
+  }
+
+  /* Ensure the table name matches database name and that the table exists */
+  if( db->mallocFailed ) goto trigger_cleanup;
+  assert( pTableName->nSrc==1 );
+  sqlite3FixInit(&sFix, pParse, iDb, "trigger", pName);
+  if( sqlite3FixSrcList(&sFix, pTableName) ){
+    goto trigger_cleanup;
+  }
+  pTab = sqlite3SrcListLookup(pParse, pTableName);
+  if( !pTab ){
+    /* The table does not exist. */
+    if( db->init.iDb==1 ){
+      /* Ticket #3810.
+      ** Normally, whenever a table is dropped, all associated triggers are
+      ** dropped too.  But if a TEMP trigger is created on a non-TEMP table
+      ** and the table is dropped by a different database connection, the
+      ** trigger is not visible to the database connection that does the
+      ** drop so the trigger cannot be dropped.  This results in an
+      ** "orphaned trigger" - a trigger whose associated table is missing.
+      */
+      db->init.orphanTrigger = 1;
+    }
+    goto trigger_cleanup;
+  }
+  if( IsVirtual(pTab) ){
+    sqlite3ErrorMsg(pParse, "cannot create triggers on virtual tables");
+    goto trigger_cleanup;
+  }
+
+  /* Check that the trigger name is not reserved and that no trigger of the
+  ** specified name exists */
+  zName = sqlite3NameFromToken(db, pName);
+  if( !zName || SQLITE_OK!=sqlite3CheckObjectName(pParse, zName) ){
+    goto trigger_cleanup;
+  }
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  if( sqlite3HashFind(&(db->aDb[iDb].pSchema->trigHash),zName) ){
+    if( !noErr ){
+      sqlite3ErrorMsg(pParse, "trigger %T already exists", pName);
+    }else{
+      assert( !db->init.busy );
+      sqlite3CodeVerifySchema(pParse, iDb);
+    }
+    goto trigger_cleanup;
+  }
+
+  /* Do not create a trigger on a system table */
+  if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 ){
+    sqlite3ErrorMsg(pParse, "cannot create trigger on system table");
+    pParse->nErr++;
+    goto trigger_cleanup;
+  }
+
+  /* INSTEAD of triggers are only for views and views only support INSTEAD
+  ** of triggers.
+  */
+  if( pTab->pSelect && tr_tm!=TK_INSTEAD ){
+    sqlite3ErrorMsg(pParse, "cannot create %s trigger on view: %S", 
+        (tr_tm == TK_BEFORE)?"BEFORE":"AFTER", pTableName, 0);
+    goto trigger_cleanup;
+  }
+  if( !pTab->pSelect && tr_tm==TK_INSTEAD ){
+    sqlite3ErrorMsg(pParse, "cannot create INSTEAD OF"
+        " trigger on table: %S", pTableName, 0);
+    goto trigger_cleanup;
+  }
+  iTabDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  {
+    int code = SQLITE_CREATE_TRIGGER;
+    const char *zDb = db->aDb[iTabDb].zName;
+    const char *zDbTrig = isTemp ? db->aDb[1].zName : zDb;
+    if( iTabDb==1 || isTemp ) code = SQLITE_CREATE_TEMP_TRIGGER;
+    if( sqlite3AuthCheck(pParse, code, zName, pTab->zName, zDbTrig) ){
+      goto trigger_cleanup;
+    }
+    if( sqlite3AuthCheck(pParse, SQLITE_INSERT, SCHEMA_TABLE(iTabDb),0,zDb)){
+      goto trigger_cleanup;
+    }
+  }
+#endif
+
+  /* INSTEAD OF triggers can only appear on views and BEFORE triggers
+  ** cannot appear on views.  So we might as well translate every
+  ** INSTEAD OF trigger into a BEFORE trigger.  It simplifies code
+  ** elsewhere.
+  */
+  if (tr_tm == TK_INSTEAD){
+    tr_tm = TK_BEFORE;
+  }
+
+  /* Build the Trigger object */
+  pTrigger = (Trigger*)sqlite3DbMallocZero(db, sizeof(Trigger));
+  if( pTrigger==0 ) goto trigger_cleanup;
+  pTrigger->zName = zName;
+  zName = 0;
+  pTrigger->table = sqlite3DbStrDup(db, pTableName->a[0].zName);
+  pTrigger->pSchema = db->aDb[iDb].pSchema;
+  pTrigger->pTabSchema = pTab->pSchema;
+  pTrigger->op = (u8)op;
+  pTrigger->tr_tm = tr_tm==TK_BEFORE ? TRIGGER_BEFORE : TRIGGER_AFTER;
+  pTrigger->pWhen = sqlite3ExprDup(db, pWhen, EXPRDUP_REDUCE);
+  pTrigger->pColumns = sqlite3IdListDup(db, pColumns);
+  assert( pParse->pNewTrigger==0 );
+  pParse->pNewTrigger = pTrigger;
+
+trigger_cleanup:
+  sqlite3DbFree(db, zName);
+  sqlite3SrcListDelete(db, pTableName);
+  sqlite3IdListDelete(db, pColumns);
+  sqlite3ExprDelete(db, pWhen);
+  if( !pParse->pNewTrigger ){
+    sqlite3DeleteTrigger(db, pTrigger);
+  }else{
+    assert( pParse->pNewTrigger==pTrigger );
+  }
+}
+
+/*
+** This routine is called after all of the trigger actions have been parsed
+** in order to complete the process of building the trigger.
+*/
+SQLITE_PRIVATE void sqlite3FinishTrigger(
+  Parse *pParse,          /* Parser context */
+  TriggerStep *pStepList, /* The triggered program */
+  Token *pAll             /* Token that describes the complete CREATE TRIGGER */
+){
+  Trigger *pTrig = pParse->pNewTrigger;   /* Trigger being finished */
+  char *zName;                            /* Name of trigger */
+  sqlite3 *db = pParse->db;               /* The database */
+  DbFixer sFix;                           /* Fixer object */
+  int iDb;                                /* Database containing the trigger */
+  Token nameToken;                        /* Trigger name for error reporting */
+
+  pParse->pNewTrigger = 0;
+  if( NEVER(pParse->nErr) || !pTrig ) goto triggerfinish_cleanup;
+  zName = pTrig->zName;
+  iDb = sqlite3SchemaToIndex(pParse->db, pTrig->pSchema);
+  pTrig->step_list = pStepList;
+  while( pStepList ){
+    pStepList->pTrig = pTrig;
+    pStepList = pStepList->pNext;
+  }
+  nameToken.z = pTrig->zName;
+  nameToken.n = sqlite3Strlen30(nameToken.z);
+  sqlite3FixInit(&sFix, pParse, iDb, "trigger", &nameToken);
+  if( sqlite3FixTriggerStep(&sFix, pTrig->step_list) 
+   || sqlite3FixExpr(&sFix, pTrig->pWhen) 
+  ){
+    goto triggerfinish_cleanup;
+  }
+
+  /* if we are not initializing,
+  ** build the sqlite_master entry
+  */
+  if( !db->init.busy ){
+    Vdbe *v;
+    char *z;
+
+    /* Make an entry in the sqlite_master table */
+    v = sqlite3GetVdbe(pParse);
+    if( v==0 ) goto triggerfinish_cleanup;
+    sqlite3BeginWriteOperation(pParse, 0, iDb);
+    z = sqlite3DbStrNDup(db, (char*)pAll->z, pAll->n);
+    sqlite3NestedParse(pParse,
+       "INSERT INTO %Q.%s VALUES('trigger',%Q,%Q,0,'CREATE TRIGGER %q')",
+       db->aDb[iDb].zName, SCHEMA_TABLE(iDb), zName,
+       pTrig->table, z);
+    sqlite3DbFree(db, z);
+    sqlite3ChangeCookie(pParse, iDb);
+    sqlite3VdbeAddParseSchemaOp(v, iDb,
+        sqlite3MPrintf(db, "type='trigger' AND name='%q'", zName));
+  }
+
+  if( db->init.busy ){
+    Trigger *pLink = pTrig;
+    Hash *pHash = &db->aDb[iDb].pSchema->trigHash;
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    pTrig = sqlite3HashInsert(pHash, zName, pTrig);
+    if( pTrig ){
+      db->mallocFailed = 1;
+    }else if( pLink->pSchema==pLink->pTabSchema ){
+      Table *pTab;
+      pTab = sqlite3HashFind(&pLink->pTabSchema->tblHash, pLink->table);
+      assert( pTab!=0 );
+      pLink->pNext = pTab->pTrigger;
+      pTab->pTrigger = pLink;
+    }
+  }
+
+triggerfinish_cleanup:
+  sqlite3DeleteTrigger(db, pTrig);
+  assert( !pParse->pNewTrigger );
+  sqlite3DeleteTriggerStep(db, pStepList);
+}
+
+/*
+** Turn a SELECT statement (that the pSelect parameter points to) into
+** a trigger step.  Return a pointer to a TriggerStep structure.
+**
+** The parser calls this routine when it finds a SELECT statement in
+** body of a TRIGGER.  
+*/
+SQLITE_PRIVATE TriggerStep *sqlite3TriggerSelectStep(sqlite3 *db, Select *pSelect){
+  TriggerStep *pTriggerStep = sqlite3DbMallocZero(db, sizeof(TriggerStep));
+  if( pTriggerStep==0 ) {
+    sqlite3SelectDelete(db, pSelect);
+    return 0;
+  }
+  pTriggerStep->op = TK_SELECT;
+  pTriggerStep->pSelect = pSelect;
+  pTriggerStep->orconf = OE_Default;
+  return pTriggerStep;
+}
+
+/*
+** Allocate space to hold a new trigger step.  The allocated space
+** holds both the TriggerStep object and the TriggerStep.target.z string.
+**
+** If an OOM error occurs, NULL is returned and db->mallocFailed is set.
+*/
+static TriggerStep *triggerStepAllocate(
+  sqlite3 *db,                /* Database connection */
+  u8 op,                      /* Trigger opcode */
+  Token *pName                /* The target name */
+){
+  TriggerStep *pTriggerStep;
+
+  pTriggerStep = sqlite3DbMallocZero(db, sizeof(TriggerStep) + pName->n);
+  if( pTriggerStep ){
+    char *z = (char*)&pTriggerStep[1];
+    memcpy(z, pName->z, pName->n);
+    pTriggerStep->target.z = z;
+    pTriggerStep->target.n = pName->n;
+    pTriggerStep->op = op;
+  }
+  return pTriggerStep;
+}
+
+/*
+** Build a trigger step out of an INSERT statement.  Return a pointer
+** to the new trigger step.
+**
+** The parser calls this routine when it sees an INSERT inside the
+** body of a trigger.
+*/
+SQLITE_PRIVATE TriggerStep *sqlite3TriggerInsertStep(
+  sqlite3 *db,        /* The database connection */
+  Token *pTableName,  /* Name of the table into which we insert */
+  IdList *pColumn,    /* List of columns in pTableName to insert into */
+  Select *pSelect,    /* A SELECT statement that supplies values */
+  u8 orconf           /* The conflict algorithm (OE_Abort, OE_Replace, etc.) */
+){
+  TriggerStep *pTriggerStep;
+
+  assert(pSelect != 0 || db->mallocFailed);
+
+  pTriggerStep = triggerStepAllocate(db, TK_INSERT, pTableName);
+  if( pTriggerStep ){
+    pTriggerStep->pSelect = sqlite3SelectDup(db, pSelect, EXPRDUP_REDUCE);
+    pTriggerStep->pIdList = pColumn;
+    pTriggerStep->orconf = orconf;
+  }else{
+    sqlite3IdListDelete(db, pColumn);
+  }
+  sqlite3SelectDelete(db, pSelect);
+
+  return pTriggerStep;
+}
+
+/*
+** Construct a trigger step that implements an UPDATE statement and return
+** a pointer to that trigger step.  The parser calls this routine when it
+** sees an UPDATE statement inside the body of a CREATE TRIGGER.
+*/
+SQLITE_PRIVATE TriggerStep *sqlite3TriggerUpdateStep(
+  sqlite3 *db,         /* The database connection */
+  Token *pTableName,   /* Name of the table to be updated */
+  ExprList *pEList,    /* The SET clause: list of column and new values */
+  Expr *pWhere,        /* The WHERE clause */
+  u8 orconf            /* The conflict algorithm. (OE_Abort, OE_Ignore, etc) */
+){
+  TriggerStep *pTriggerStep;
+
+  pTriggerStep = triggerStepAllocate(db, TK_UPDATE, pTableName);
+  if( pTriggerStep ){
+    pTriggerStep->pExprList = sqlite3ExprListDup(db, pEList, EXPRDUP_REDUCE);
+    pTriggerStep->pWhere = sqlite3ExprDup(db, pWhere, EXPRDUP_REDUCE);
+    pTriggerStep->orconf = orconf;
+  }
+  sqlite3ExprListDelete(db, pEList);
+  sqlite3ExprDelete(db, pWhere);
+  return pTriggerStep;
+}
+
+/*
+** Construct a trigger step that implements a DELETE statement and return
+** a pointer to that trigger step.  The parser calls this routine when it
+** sees a DELETE statement inside the body of a CREATE TRIGGER.
+*/
+SQLITE_PRIVATE TriggerStep *sqlite3TriggerDeleteStep(
+  sqlite3 *db,            /* Database connection */
+  Token *pTableName,      /* The table from which rows are deleted */
+  Expr *pWhere            /* The WHERE clause */
+){
+  TriggerStep *pTriggerStep;
+
+  pTriggerStep = triggerStepAllocate(db, TK_DELETE, pTableName);
+  if( pTriggerStep ){
+    pTriggerStep->pWhere = sqlite3ExprDup(db, pWhere, EXPRDUP_REDUCE);
+    pTriggerStep->orconf = OE_Default;
+  }
+  sqlite3ExprDelete(db, pWhere);
+  return pTriggerStep;
+}
+
+/* 
+** Recursively delete a Trigger structure
+*/
+SQLITE_PRIVATE void sqlite3DeleteTrigger(sqlite3 *db, Trigger *pTrigger){
+  if( pTrigger==0 ) return;
+  sqlite3DeleteTriggerStep(db, pTrigger->step_list);
+  sqlite3DbFree(db, pTrigger->zName);
+  sqlite3DbFree(db, pTrigger->table);
+  sqlite3ExprDelete(db, pTrigger->pWhen);
+  sqlite3IdListDelete(db, pTrigger->pColumns);
+  sqlite3DbFree(db, pTrigger);
+}
+
+/*
+** This function is called to drop a trigger from the database schema. 
+**
+** This may be called directly from the parser and therefore identifies
+** the trigger by name.  The sqlite3DropTriggerPtr() routine does the
+** same job as this routine except it takes a pointer to the trigger
+** instead of the trigger name.
+**/
+SQLITE_PRIVATE void sqlite3DropTrigger(Parse *pParse, SrcList *pName, int noErr){
+  Trigger *pTrigger = 0;
+  int i;
+  const char *zDb;
+  const char *zName;
+  sqlite3 *db = pParse->db;
+
+  if( db->mallocFailed ) goto drop_trigger_cleanup;
+  if( SQLITE_OK!=sqlite3ReadSchema(pParse) ){
+    goto drop_trigger_cleanup;
+  }
+
+  assert( pName->nSrc==1 );
+  zDb = pName->a[0].zDatabase;
+  zName = pName->a[0].zName;
+  assert( zDb!=0 || sqlite3BtreeHoldsAllMutexes(db) );
+  for(i=OMIT_TEMPDB; i<db->nDb; i++){
+    int j = (i<2) ? i^1 : i;  /* Search TEMP before MAIN */
+    if( zDb && sqlite3StrICmp(db->aDb[j].zName, zDb) ) continue;
+    assert( sqlite3SchemaMutexHeld(db, j, 0) );
+    pTrigger = sqlite3HashFind(&(db->aDb[j].pSchema->trigHash), zName);
+    if( pTrigger ) break;
+  }
+  if( !pTrigger ){
+    if( !noErr ){
+      sqlite3ErrorMsg(pParse, "no such trigger: %S", pName, 0);
+    }else{
+      sqlite3CodeVerifyNamedSchema(pParse, zDb);
+    }
+    pParse->checkSchema = 1;
+    goto drop_trigger_cleanup;
+  }
+  sqlite3DropTriggerPtr(pParse, pTrigger);
+
+drop_trigger_cleanup:
+  sqlite3SrcListDelete(db, pName);
+}
+
+/*
+** Return a pointer to the Table structure for the table that a trigger
+** is set on.
+*/
+static Table *tableOfTrigger(Trigger *pTrigger){
+  return sqlite3HashFind(&pTrigger->pTabSchema->tblHash, pTrigger->table);
+}
+
+
+/*
+** Drop a trigger given a pointer to that trigger. 
+*/
+SQLITE_PRIVATE void sqlite3DropTriggerPtr(Parse *pParse, Trigger *pTrigger){
+  Table   *pTable;
+  Vdbe *v;
+  sqlite3 *db = pParse->db;
+  int iDb;
+
+  iDb = sqlite3SchemaToIndex(pParse->db, pTrigger->pSchema);
+  assert( iDb>=0 && iDb<db->nDb );
+  pTable = tableOfTrigger(pTrigger);
+  assert( pTable );
+  assert( pTable->pSchema==pTrigger->pSchema || iDb==1 );
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  {
+    int code = SQLITE_DROP_TRIGGER;
+    const char *zDb = db->aDb[iDb].zName;
+    const char *zTab = SCHEMA_TABLE(iDb);
+    if( iDb==1 ) code = SQLITE_DROP_TEMP_TRIGGER;
+    if( sqlite3AuthCheck(pParse, code, pTrigger->zName, pTable->zName, zDb) ||
+      sqlite3AuthCheck(pParse, SQLITE_DELETE, zTab, 0, zDb) ){
+      return;
+    }
+  }
+#endif
+
+  /* Generate code to destroy the database record of the trigger.
+  */
+  assert( pTable!=0 );
+  if( (v = sqlite3GetVdbe(pParse))!=0 ){
+    int base;
+    static const int iLn = VDBE_OFFSET_LINENO(2);
+    static const VdbeOpList dropTrigger[] = {
+      { OP_Rewind,     0, ADDR(9),  0},
+      { OP_String8,    0, 1,        0}, /* 1 */
+      { OP_Column,     0, 1,        2},
+      { OP_Ne,         2, ADDR(8),  1},
+      { OP_String8,    0, 1,        0}, /* 4: "trigger" */
+      { OP_Column,     0, 0,        2},
+      { OP_Ne,         2, ADDR(8),  1},
+      { OP_Delete,     0, 0,        0},
+      { OP_Next,       0, ADDR(1),  0}, /* 8 */
+    };
+
+    sqlite3BeginWriteOperation(pParse, 0, iDb);
+    sqlite3OpenMasterTable(pParse, iDb);
+    base = sqlite3VdbeAddOpList(v,  ArraySize(dropTrigger), dropTrigger, iLn);
+    sqlite3VdbeChangeP4(v, base+1, pTrigger->zName, P4_TRANSIENT);
+    sqlite3VdbeChangeP4(v, base+4, "trigger", P4_STATIC);
+    sqlite3ChangeCookie(pParse, iDb);
+    sqlite3VdbeAddOp2(v, OP_Close, 0, 0);
+    sqlite3VdbeAddOp4(v, OP_DropTrigger, iDb, 0, 0, pTrigger->zName, 0);
+    if( pParse->nMem<3 ){
+      pParse->nMem = 3;
+    }
+  }
+}
+
+/*
+** Remove a trigger from the hash tables of the sqlite* pointer.
+*/
+SQLITE_PRIVATE void sqlite3UnlinkAndDeleteTrigger(sqlite3 *db, int iDb, const char *zName){
+  Trigger *pTrigger;
+  Hash *pHash;
+
+  assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+  pHash = &(db->aDb[iDb].pSchema->trigHash);
+  pTrigger = sqlite3HashInsert(pHash, zName, 0);
+  if( ALWAYS(pTrigger) ){
+    if( pTrigger->pSchema==pTrigger->pTabSchema ){
+      Table *pTab = tableOfTrigger(pTrigger);
+      Trigger **pp;
+      for(pp=&pTab->pTrigger; *pp!=pTrigger; pp=&((*pp)->pNext));
+      *pp = (*pp)->pNext;
+    }
+    sqlite3DeleteTrigger(db, pTrigger);
+    db->flags |= SQLITE_InternChanges;
+  }
+}
+
+/*
+** pEList is the SET clause of an UPDATE statement.  Each entry
+** in pEList is of the format <id>=<expr>.  If any of the entries
+** in pEList have an <id> which matches an identifier in pIdList,
+** then return TRUE.  If pIdList==NULL, then it is considered a
+** wildcard that matches anything.  Likewise if pEList==NULL then
+** it matches anything so always return true.  Return false only
+** if there is no match.
+*/
+static int checkColumnOverlap(IdList *pIdList, ExprList *pEList){
+  int e;
+  if( pIdList==0 || NEVER(pEList==0) ) return 1;
+  for(e=0; e<pEList->nExpr; e++){
+    if( sqlite3IdListIndex(pIdList, pEList->a[e].zName)>=0 ) return 1;
+  }
+  return 0; 
+}
+
+/*
+** Return a list of all triggers on table pTab if there exists at least
+** one trigger that must be fired when an operation of type 'op' is 
+** performed on the table, and, if that operation is an UPDATE, if at
+** least one of the columns in pChanges is being modified.
+*/
+SQLITE_PRIVATE Trigger *sqlite3TriggersExist(
+  Parse *pParse,          /* Parse context */
+  Table *pTab,            /* The table the contains the triggers */
+  int op,                 /* one of TK_DELETE, TK_INSERT, TK_UPDATE */
+  ExprList *pChanges,     /* Columns that change in an UPDATE statement */
+  int *pMask              /* OUT: Mask of TRIGGER_BEFORE|TRIGGER_AFTER */
+){
+  int mask = 0;
+  Trigger *pList = 0;
+  Trigger *p;
+
+  if( (pParse->db->flags & SQLITE_EnableTrigger)!=0 ){
+    pList = sqlite3TriggerList(pParse, pTab);
+  }
+  assert( pList==0 || IsVirtual(pTab)==0 );
+  for(p=pList; p; p=p->pNext){
+    if( p->op==op && checkColumnOverlap(p->pColumns, pChanges) ){
+      mask |= p->tr_tm;
+    }
+  }
+  if( pMask ){
+    *pMask = mask;
+  }
+  return (mask ? pList : 0);
+}
+
+/*
+** Convert the pStep->target token into a SrcList and return a pointer
+** to that SrcList.
+**
+** This routine adds a specific database name, if needed, to the target when
+** forming the SrcList.  This prevents a trigger in one database from
+** referring to a target in another database.  An exception is when the
+** trigger is in TEMP in which case it can refer to any other database it
+** wants.
+*/
+static SrcList *targetSrcList(
+  Parse *pParse,       /* The parsing context */
+  TriggerStep *pStep   /* The trigger containing the target token */
+){
+  int iDb;             /* Index of the database to use */
+  SrcList *pSrc;       /* SrcList to be returned */
+
+  pSrc = sqlite3SrcListAppend(pParse->db, 0, &pStep->target, 0);
+  if( pSrc ){
+    assert( pSrc->nSrc>0 );
+    assert( pSrc->a!=0 );
+    iDb = sqlite3SchemaToIndex(pParse->db, pStep->pTrig->pSchema);
+    if( iDb==0 || iDb>=2 ){
+      sqlite3 *db = pParse->db;
+      assert( iDb<pParse->db->nDb );
+      pSrc->a[pSrc->nSrc-1].zDatabase = sqlite3DbStrDup(db, db->aDb[iDb].zName);
+    }
+  }
+  return pSrc;
+}
+
+/*
+** Generate VDBE code for the statements inside the body of a single 
+** trigger.
+*/
+static int codeTriggerProgram(
+  Parse *pParse,            /* The parser context */
+  TriggerStep *pStepList,   /* List of statements inside the trigger body */
+  int orconf                /* Conflict algorithm. (OE_Abort, etc) */  
+){
+  TriggerStep *pStep;
+  Vdbe *v = pParse->pVdbe;
+  sqlite3 *db = pParse->db;
+
+  assert( pParse->pTriggerTab && pParse->pToplevel );
+  assert( pStepList );
+  assert( v!=0 );
+  for(pStep=pStepList; pStep; pStep=pStep->pNext){
+    /* Figure out the ON CONFLICT policy that will be used for this step
+    ** of the trigger program. If the statement that caused this trigger
+    ** to fire had an explicit ON CONFLICT, then use it. Otherwise, use
+    ** the ON CONFLICT policy that was specified as part of the trigger
+    ** step statement. Example:
+    **
+    **   CREATE TRIGGER AFTER INSERT ON t1 BEGIN;
+    **     INSERT OR REPLACE INTO t2 VALUES(new.a, new.b);
+    **   END;
+    **
+    **   INSERT INTO t1 ... ;            -- insert into t2 uses REPLACE policy
+    **   INSERT OR IGNORE INTO t1 ... ;  -- insert into t2 uses IGNORE policy
+    */
+    pParse->eOrconf = (orconf==OE_Default)?pStep->orconf:(u8)orconf;
+    assert( pParse->okConstFactor==0 );
+
+    switch( pStep->op ){
+      case TK_UPDATE: {
+        sqlite3Update(pParse, 
+          targetSrcList(pParse, pStep),
+          sqlite3ExprListDup(db, pStep->pExprList, 0), 
+          sqlite3ExprDup(db, pStep->pWhere, 0), 
+          pParse->eOrconf
+        );
+        break;
+      }
+      case TK_INSERT: {
+        sqlite3Insert(pParse, 
+          targetSrcList(pParse, pStep),
+          sqlite3SelectDup(db, pStep->pSelect, 0), 
+          sqlite3IdListDup(db, pStep->pIdList), 
+          pParse->eOrconf
+        );
+        break;
+      }
+      case TK_DELETE: {
+        sqlite3DeleteFrom(pParse, 
+          targetSrcList(pParse, pStep),
+          sqlite3ExprDup(db, pStep->pWhere, 0)
+        );
+        break;
+      }
+      default: assert( pStep->op==TK_SELECT ); {
+        SelectDest sDest;
+        Select *pSelect = sqlite3SelectDup(db, pStep->pSelect, 0);
+        sqlite3SelectDestInit(&sDest, SRT_Discard, 0);
+        sqlite3Select(pParse, pSelect, &sDest);
+        sqlite3SelectDelete(db, pSelect);
+        break;
+      }
+    } 
+    if( pStep->op!=TK_SELECT ){
+      sqlite3VdbeAddOp0(v, OP_ResetCount);
+    }
+  }
+
+  return 0;
+}
+
+#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
+/*
+** This function is used to add VdbeComment() annotations to a VDBE
+** program. It is not used in production code, only for debugging.
+*/
+static const char *onErrorText(int onError){
+  switch( onError ){
+    case OE_Abort:    return "abort";
+    case OE_Rollback: return "rollback";
+    case OE_Fail:     return "fail";
+    case OE_Replace:  return "replace";
+    case OE_Ignore:   return "ignore";
+    case OE_Default:  return "default";
+  }
+  return "n/a";
+}
+#endif
+
+/*
+** Parse context structure pFrom has just been used to create a sub-vdbe
+** (trigger program). If an error has occurred, transfer error information
+** from pFrom to pTo.
+*/
+static void transferParseError(Parse *pTo, Parse *pFrom){
+  assert( pFrom->zErrMsg==0 || pFrom->nErr );
+  assert( pTo->zErrMsg==0 || pTo->nErr );
+  if( pTo->nErr==0 ){
+    pTo->zErrMsg = pFrom->zErrMsg;
+    pTo->nErr = pFrom->nErr;
+  }else{
+    sqlite3DbFree(pFrom->db, pFrom->zErrMsg);
+  }
+}
+
+/*
+** Create and populate a new TriggerPrg object with a sub-program 
+** implementing trigger pTrigger with ON CONFLICT policy orconf.
+*/
+static TriggerPrg *codeRowTrigger(
+  Parse *pParse,       /* Current parse context */
+  Trigger *pTrigger,   /* Trigger to code */
+  Table *pTab,         /* The table pTrigger is attached to */
+  int orconf           /* ON CONFLICT policy to code trigger program with */
+){
+  Parse *pTop = sqlite3ParseToplevel(pParse);
+  sqlite3 *db = pParse->db;   /* Database handle */
+  TriggerPrg *pPrg;           /* Value to return */
+  Expr *pWhen = 0;            /* Duplicate of trigger WHEN expression */
+  Vdbe *v;                    /* Temporary VM */
+  NameContext sNC;            /* Name context for sub-vdbe */
+  SubProgram *pProgram = 0;   /* Sub-vdbe for trigger program */
+  Parse *pSubParse;           /* Parse context for sub-vdbe */
+  int iEndTrigger = 0;        /* Label to jump to if WHEN is false */
+
+  assert( pTrigger->zName==0 || pTab==tableOfTrigger(pTrigger) );
+  assert( pTop->pVdbe );
+
+  /* Allocate the TriggerPrg and SubProgram objects. To ensure that they
+  ** are freed if an error occurs, link them into the Parse.pTriggerPrg 
+  ** list of the top-level Parse object sooner rather than later.  */
+  pPrg = sqlite3DbMallocZero(db, sizeof(TriggerPrg));
+  if( !pPrg ) return 0;
+  pPrg->pNext = pTop->pTriggerPrg;
+  pTop->pTriggerPrg = pPrg;
+  pPrg->pProgram = pProgram = sqlite3DbMallocZero(db, sizeof(SubProgram));
+  if( !pProgram ) return 0;
+  sqlite3VdbeLinkSubProgram(pTop->pVdbe, pProgram);
+  pPrg->pTrigger = pTrigger;
+  pPrg->orconf = orconf;
+  pPrg->aColmask[0] = 0xffffffff;
+  pPrg->aColmask[1] = 0xffffffff;
+
+  /* Allocate and populate a new Parse context to use for coding the 
+  ** trigger sub-program.  */
+  pSubParse = sqlite3StackAllocZero(db, sizeof(Parse));
+  if( !pSubParse ) return 0;
+  memset(&sNC, 0, sizeof(sNC));
+  sNC.pParse = pSubParse;
+  pSubParse->db = db;
+  pSubParse->pTriggerTab = pTab;
+  pSubParse->pToplevel = pTop;
+  pSubParse->zAuthContext = pTrigger->zName;
+  pSubParse->eTriggerOp = pTrigger->op;
+  pSubParse->nQueryLoop = pParse->nQueryLoop;
+
+  v = sqlite3GetVdbe(pSubParse);
+  if( v ){
+    VdbeComment((v, "Start: %s.%s (%s %s%s%s ON %s)", 
+      pTrigger->zName, onErrorText(orconf),
+      (pTrigger->tr_tm==TRIGGER_BEFORE ? "BEFORE" : "AFTER"),
+        (pTrigger->op==TK_UPDATE ? "UPDATE" : ""),
+        (pTrigger->op==TK_INSERT ? "INSERT" : ""),
+        (pTrigger->op==TK_DELETE ? "DELETE" : ""),
+      pTab->zName
+    ));
+#ifndef SQLITE_OMIT_TRACE
+    sqlite3VdbeChangeP4(v, -1, 
+      sqlite3MPrintf(db, "-- TRIGGER %s", pTrigger->zName), P4_DYNAMIC
+    );
+#endif
+
+    /* If one was specified, code the WHEN clause. If it evaluates to false
+    ** (or NULL) the sub-vdbe is immediately halted by jumping to the 
+    ** OP_Halt inserted at the end of the program.  */
+    if( pTrigger->pWhen ){
+      pWhen = sqlite3ExprDup(db, pTrigger->pWhen, 0);
+      if( SQLITE_OK==sqlite3ResolveExprNames(&sNC, pWhen) 
+       && db->mallocFailed==0 
+      ){
+        iEndTrigger = sqlite3VdbeMakeLabel(v);
+        sqlite3ExprIfFalse(pSubParse, pWhen, iEndTrigger, SQLITE_JUMPIFNULL);
+      }
+      sqlite3ExprDelete(db, pWhen);
+    }
+
+    /* Code the trigger program into the sub-vdbe. */
+    codeTriggerProgram(pSubParse, pTrigger->step_list, orconf);
+
+    /* Insert an OP_Halt at the end of the sub-program. */
+    if( iEndTrigger ){
+      sqlite3VdbeResolveLabel(v, iEndTrigger);
+    }
+    sqlite3VdbeAddOp0(v, OP_Halt);
+    VdbeComment((v, "End: %s.%s", pTrigger->zName, onErrorText(orconf)));
+
+    transferParseError(pParse, pSubParse);
+    if( db->mallocFailed==0 ){
+      pProgram->aOp = sqlite3VdbeTakeOpArray(v, &pProgram->nOp, &pTop->nMaxArg);
+    }
+    pProgram->nMem = pSubParse->nMem;
+    pProgram->nCsr = pSubParse->nTab;
+    pProgram->nOnce = pSubParse->nOnce;
+    pProgram->token = (void *)pTrigger;
+    pPrg->aColmask[0] = pSubParse->oldmask;
+    pPrg->aColmask[1] = pSubParse->newmask;
+    sqlite3VdbeDelete(v);
+  }
+
+  assert( !pSubParse->pAinc       && !pSubParse->pZombieTab );
+  assert( !pSubParse->pTriggerPrg && !pSubParse->nMaxArg );
+  sqlite3ParserReset(pSubParse);
+  sqlite3StackFree(db, pSubParse);
+
+  return pPrg;
+}
+    
+/*
+** Return a pointer to a TriggerPrg object containing the sub-program for
+** trigger pTrigger with default ON CONFLICT algorithm orconf. If no such
+** TriggerPrg object exists, a new object is allocated and populated before
+** being returned.
+*/
+static TriggerPrg *getRowTrigger(
+  Parse *pParse,       /* Current parse context */
+  Trigger *pTrigger,   /* Trigger to code */
+  Table *pTab,         /* The table trigger pTrigger is attached to */
+  int orconf           /* ON CONFLICT algorithm. */
+){
+  Parse *pRoot = sqlite3ParseToplevel(pParse);
+  TriggerPrg *pPrg;
+
+  assert( pTrigger->zName==0 || pTab==tableOfTrigger(pTrigger) );
+
+  /* It may be that this trigger has already been coded (or is in the
+  ** process of being coded). If this is the case, then an entry with
+  ** a matching TriggerPrg.pTrigger field will be present somewhere
+  ** in the Parse.pTriggerPrg list. Search for such an entry.  */
+  for(pPrg=pRoot->pTriggerPrg; 
+      pPrg && (pPrg->pTrigger!=pTrigger || pPrg->orconf!=orconf); 
+      pPrg=pPrg->pNext
+  );
+
+  /* If an existing TriggerPrg could not be located, create a new one. */
+  if( !pPrg ){
+    pPrg = codeRowTrigger(pParse, pTrigger, pTab, orconf);
+  }
+
+  return pPrg;
+}
+
+/*
+** Generate code for the trigger program associated with trigger p on 
+** table pTab. The reg, orconf and ignoreJump parameters passed to this
+** function are the same as those described in the header function for
+** sqlite3CodeRowTrigger()
+*/
+SQLITE_PRIVATE void sqlite3CodeRowTriggerDirect(
+  Parse *pParse,       /* Parse context */
+  Trigger *p,          /* Trigger to code */
+  Table *pTab,         /* The table to code triggers from */
+  int reg,             /* Reg array containing OLD.* and NEW.* values */
+  int orconf,          /* ON CONFLICT policy */
+  int ignoreJump       /* Instruction to jump to for RAISE(IGNORE) */
+){
+  Vdbe *v = sqlite3GetVdbe(pParse); /* Main VM */
+  TriggerPrg *pPrg;
+  pPrg = getRowTrigger(pParse, p, pTab, orconf);
+  assert( pPrg || pParse->nErr || pParse->db->mallocFailed );
+
+  /* Code the OP_Program opcode in the parent VDBE. P4 of the OP_Program 
+  ** is a pointer to the sub-vdbe containing the trigger program.  */
+  if( pPrg ){
+    int bRecursive = (p->zName && 0==(pParse->db->flags&SQLITE_RecTriggers));
+
+    sqlite3VdbeAddOp3(v, OP_Program, reg, ignoreJump, ++pParse->nMem);
+    sqlite3VdbeChangeP4(v, -1, (const char *)pPrg->pProgram, P4_SUBPROGRAM);
+    VdbeComment(
+        (v, "Call: %s.%s", (p->zName?p->zName:"fkey"), onErrorText(orconf)));
+
+    /* Set the P5 operand of the OP_Program instruction to non-zero if
+    ** recursive invocation of this trigger program is disallowed. Recursive
+    ** invocation is disallowed if (a) the sub-program is really a trigger,
+    ** not a foreign key action, and (b) the flag to enable recursive triggers
+    ** is clear.  */
+    sqlite3VdbeChangeP5(v, (u8)bRecursive);
+  }
+}
+
+/*
+** This is called to code the required FOR EACH ROW triggers for an operation
+** on table pTab. The operation to code triggers for (INSERT, UPDATE or DELETE)
+** is given by the op parameter. The tr_tm parameter determines whether the
+** BEFORE or AFTER triggers are coded. If the operation is an UPDATE, then
+** parameter pChanges is passed the list of columns being modified.
+**
+** If there are no triggers that fire at the specified time for the specified
+** operation on pTab, this function is a no-op.
+**
+** The reg argument is the address of the first in an array of registers 
+** that contain the values substituted for the new.* and old.* references
+** in the trigger program. If N is the number of columns in table pTab
+** (a copy of pTab->nCol), then registers are populated as follows:
+**
+**   Register       Contains
+**   ------------------------------------------------------
+**   reg+0          OLD.rowid
+**   reg+1          OLD.* value of left-most column of pTab
+**   ...            ...
+**   reg+N          OLD.* value of right-most column of pTab
+**   reg+N+1        NEW.rowid
+**   reg+N+2        OLD.* value of left-most column of pTab
+**   ...            ...
+**   reg+N+N+1      NEW.* value of right-most column of pTab
+**
+** For ON DELETE triggers, the registers containing the NEW.* values will
+** never be accessed by the trigger program, so they are not allocated or 
+** populated by the caller (there is no data to populate them with anyway). 
+** Similarly, for ON INSERT triggers the values stored in the OLD.* registers
+** are never accessed, and so are not allocated by the caller. So, for an
+** ON INSERT trigger, the value passed to this function as parameter reg
+** is not a readable register, although registers (reg+N) through 
+** (reg+N+N+1) are.
+**
+** Parameter orconf is the default conflict resolution algorithm for the
+** trigger program to use (REPLACE, IGNORE etc.). Parameter ignoreJump
+** is the instruction that control should jump to if a trigger program
+** raises an IGNORE exception.
+*/
+SQLITE_PRIVATE void sqlite3CodeRowTrigger(
+  Parse *pParse,       /* Parse context */
+  Trigger *pTrigger,   /* List of triggers on table pTab */
+  int op,              /* One of TK_UPDATE, TK_INSERT, TK_DELETE */
+  ExprList *pChanges,  /* Changes list for any UPDATE OF triggers */
+  int tr_tm,           /* One of TRIGGER_BEFORE, TRIGGER_AFTER */
+  Table *pTab,         /* The table to code triggers from */
+  int reg,             /* The first in an array of registers (see above) */
+  int orconf,          /* ON CONFLICT policy */
+  int ignoreJump       /* Instruction to jump to for RAISE(IGNORE) */
+){
+  Trigger *p;          /* Used to iterate through pTrigger list */
+
+  assert( op==TK_UPDATE || op==TK_INSERT || op==TK_DELETE );
+  assert( tr_tm==TRIGGER_BEFORE || tr_tm==TRIGGER_AFTER );
+  assert( (op==TK_UPDATE)==(pChanges!=0) );
+
+  for(p=pTrigger; p; p=p->pNext){
+
+    /* Sanity checking:  The schema for the trigger and for the table are
+    ** always defined.  The trigger must be in the same schema as the table
+    ** or else it must be a TEMP trigger. */
+    assert( p->pSchema!=0 );
+    assert( p->pTabSchema!=0 );
+    assert( p->pSchema==p->pTabSchema 
+         || p->pSchema==pParse->db->aDb[1].pSchema );
+
+    /* Determine whether we should code this trigger */
+    if( p->op==op 
+     && p->tr_tm==tr_tm 
+     && checkColumnOverlap(p->pColumns, pChanges)
+    ){
+      sqlite3CodeRowTriggerDirect(pParse, p, pTab, reg, orconf, ignoreJump);
+    }
+  }
+}
+
+/*
+** Triggers may access values stored in the old.* or new.* pseudo-table. 
+** This function returns a 32-bit bitmask indicating which columns of the 
+** old.* or new.* tables actually are used by triggers. This information 
+** may be used by the caller, for example, to avoid having to load the entire
+** old.* record into memory when executing an UPDATE or DELETE command.
+**
+** Bit 0 of the returned mask is set if the left-most column of the
+** table may be accessed using an [old|new].<col> reference. Bit 1 is set if
+** the second leftmost column value is required, and so on. If there
+** are more than 32 columns in the table, and at least one of the columns
+** with an index greater than 32 may be accessed, 0xffffffff is returned.
+**
+** It is not possible to determine if the old.rowid or new.rowid column is 
+** accessed by triggers. The caller must always assume that it is.
+**
+** Parameter isNew must be either 1 or 0. If it is 0, then the mask returned
+** applies to the old.* table. If 1, the new.* table.
+**
+** Parameter tr_tm must be a mask with one or both of the TRIGGER_BEFORE
+** and TRIGGER_AFTER bits set. Values accessed by BEFORE triggers are only
+** included in the returned mask if the TRIGGER_BEFORE bit is set in the
+** tr_tm parameter. Similarly, values accessed by AFTER triggers are only
+** included in the returned mask if the TRIGGER_AFTER bit is set in tr_tm.
+*/
+SQLITE_PRIVATE u32 sqlite3TriggerColmask(
+  Parse *pParse,       /* Parse context */
+  Trigger *pTrigger,   /* List of triggers on table pTab */
+  ExprList *pChanges,  /* Changes list for any UPDATE OF triggers */
+  int isNew,           /* 1 for new.* ref mask, 0 for old.* ref mask */
+  int tr_tm,           /* Mask of TRIGGER_BEFORE|TRIGGER_AFTER */
+  Table *pTab,         /* The table to code triggers from */
+  int orconf           /* Default ON CONFLICT policy for trigger steps */
+){
+  const int op = pChanges ? TK_UPDATE : TK_DELETE;
+  u32 mask = 0;
+  Trigger *p;
+
+  assert( isNew==1 || isNew==0 );
+  for(p=pTrigger; p; p=p->pNext){
+    if( p->op==op && (tr_tm&p->tr_tm)
+     && checkColumnOverlap(p->pColumns,pChanges)
+    ){
+      TriggerPrg *pPrg;
+      pPrg = getRowTrigger(pParse, p, pTab, orconf);
+      if( pPrg ){
+        mask |= pPrg->aColmask[isNew];
+      }
+    }
+  }
+
+  return mask;
+}
+
+#endif /* !defined(SQLITE_OMIT_TRIGGER) */
+
+/************** End of trigger.c *********************************************/
+/************** Begin file update.c ******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains C code routines that are called by the parser
+** to handle UPDATE statements.
+*/
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/* Forward declaration */
+static void updateVirtualTable(
+  Parse *pParse,       /* The parsing context */
+  SrcList *pSrc,       /* The virtual table to be modified */
+  Table *pTab,         /* The virtual table */
+  ExprList *pChanges,  /* The columns to change in the UPDATE statement */
+  Expr *pRowidExpr,    /* Expression used to recompute the rowid */
+  int *aXRef,          /* Mapping from columns of pTab to entries in pChanges */
+  Expr *pWhere,        /* WHERE clause of the UPDATE statement */
+  int onError          /* ON CONFLICT strategy */
+);
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+/*
+** The most recently coded instruction was an OP_Column to retrieve the
+** i-th column of table pTab. This routine sets the P4 parameter of the 
+** OP_Column to the default value, if any.
+**
+** The default value of a column is specified by a DEFAULT clause in the 
+** column definition. This was either supplied by the user when the table
+** was created, or added later to the table definition by an ALTER TABLE
+** command. If the latter, then the row-records in the table btree on disk
+** may not contain a value for the column and the default value, taken
+** from the P4 parameter of the OP_Column instruction, is returned instead.
+** If the former, then all row-records are guaranteed to include a value
+** for the column and the P4 value is not required.
+**
+** Column definitions created by an ALTER TABLE command may only have 
+** literal default values specified: a number, null or a string. (If a more
+** complicated default expression value was provided, it is evaluated 
+** when the ALTER TABLE is executed and one of the literal values written
+** into the sqlite_master table.)
+**
+** Therefore, the P4 parameter is only required if the default value for
+** the column is a literal number, string or null. The sqlite3ValueFromExpr()
+** function is capable of transforming these types of expressions into
+** sqlite3_value objects.
+**
+** If parameter iReg is not negative, code an OP_RealAffinity instruction
+** on register iReg. This is used when an equivalent integer value is 
+** stored in place of an 8-byte floating point value in order to save 
+** space.
+*/
+SQLITE_PRIVATE void sqlite3ColumnDefault(Vdbe *v, Table *pTab, int i, int iReg){
+  assert( pTab!=0 );
+  if( !pTab->pSelect ){
+    sqlite3_value *pValue = 0;
+    u8 enc = ENC(sqlite3VdbeDb(v));
+    Column *pCol = &pTab->aCol[i];
+    VdbeComment((v, "%s.%s", pTab->zName, pCol->zName));
+    assert( i<pTab->nCol );
+    sqlite3ValueFromExpr(sqlite3VdbeDb(v), pCol->pDflt, enc, 
+                         pCol->affinity, &pValue);
+    if( pValue ){
+      sqlite3VdbeChangeP4(v, -1, (const char *)pValue, P4_MEM);
+    }
+#ifndef SQLITE_OMIT_FLOATING_POINT
+    if( pTab->aCol[i].affinity==SQLITE_AFF_REAL ){
+      sqlite3VdbeAddOp1(v, OP_RealAffinity, iReg);
+    }
+#endif
+  }
+}
+
+/*
+** Process an UPDATE statement.
+**
+**   UPDATE OR IGNORE table_wxyz SET a=b, c=d WHERE e<5 AND f NOT NULL;
+**          \_______/ \________/     \______/       \________________/
+*            onError   pTabList      pChanges             pWhere
+*/
+SQLITE_PRIVATE void sqlite3Update(
+  Parse *pParse,         /* The parser context */
+  SrcList *pTabList,     /* The table in which we should change things */
+  ExprList *pChanges,    /* Things to be changed */
+  Expr *pWhere,          /* The WHERE clause.  May be null */
+  int onError            /* How to handle constraint errors */
+){
+  int i, j;              /* Loop counters */
+  Table *pTab;           /* The table to be updated */
+  int addrTop = 0;       /* VDBE instruction address of the start of the loop */
+  WhereInfo *pWInfo;     /* Information about the WHERE clause */
+  Vdbe *v;               /* The virtual database engine */
+  Index *pIdx;           /* For looping over indices */
+  Index *pPk;            /* The PRIMARY KEY index for WITHOUT ROWID tables */
+  int nIdx;              /* Number of indices that need updating */
+  int iBaseCur;          /* Base cursor number */
+  int iDataCur;          /* Cursor for the canonical data btree */
+  int iIdxCur;           /* Cursor for the first index */
+  sqlite3 *db;           /* The database structure */
+  int *aRegIdx = 0;      /* One register assigned to each index to be updated */
+  int *aXRef = 0;        /* aXRef[i] is the index in pChanges->a[] of the
+                         ** an expression for the i-th column of the table.
+                         ** aXRef[i]==-1 if the i-th column is not changed. */
+  u8 *aToOpen;           /* 1 for tables and indices to be opened */
+  u8 chngPk;             /* PRIMARY KEY changed in a WITHOUT ROWID table */
+  u8 chngRowid;          /* Rowid changed in a normal table */
+  u8 chngKey;            /* Either chngPk or chngRowid */
+  Expr *pRowidExpr = 0;  /* Expression defining the new record number */
+  AuthContext sContext;  /* The authorization context */
+  NameContext sNC;       /* The name-context to resolve expressions in */
+  int iDb;               /* Database containing the table being updated */
+  int okOnePass;         /* True for one-pass algorithm without the FIFO */
+  int hasFK;             /* True if foreign key processing is required */
+  int labelBreak;        /* Jump here to break out of UPDATE loop */
+  int labelContinue;     /* Jump here to continue next step of UPDATE loop */
+
+#ifndef SQLITE_OMIT_TRIGGER
+  int isView;            /* True when updating a view (INSTEAD OF trigger) */
+  Trigger *pTrigger;     /* List of triggers on pTab, if required */
+  int tmask;             /* Mask of TRIGGER_BEFORE|TRIGGER_AFTER */
+#endif
+  int newmask;           /* Mask of NEW.* columns accessed by BEFORE triggers */
+  int iEph = 0;          /* Ephemeral table holding all primary key values */
+  int nKey = 0;          /* Number of elements in regKey for WITHOUT ROWID */
+  int aiCurOnePass[2];   /* The write cursors opened by WHERE_ONEPASS */
+
+  /* Register Allocations */
+  int regRowCount = 0;   /* A count of rows changed */
+  int regOldRowid;       /* The old rowid */
+  int regNewRowid;       /* The new rowid */
+  int regNew;            /* Content of the NEW.* table in triggers */
+  int regOld = 0;        /* Content of OLD.* table in triggers */
+  int regRowSet = 0;     /* Rowset of rows to be updated */
+  int regKey = 0;        /* composite PRIMARY KEY value */
+
+  memset(&sContext, 0, sizeof(sContext));
+  db = pParse->db;
+  if( pParse->nErr || db->mallocFailed ){
+    goto update_cleanup;
+  }
+  assert( pTabList->nSrc==1 );
+
+  /* Locate the table which we want to update. 
+  */
+  pTab = sqlite3SrcListLookup(pParse, pTabList);
+  if( pTab==0 ) goto update_cleanup;
+  iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
+
+  /* Figure out if we have any triggers and if the table being
+  ** updated is a view.
+  */
+#ifndef SQLITE_OMIT_TRIGGER
+  pTrigger = sqlite3TriggersExist(pParse, pTab, TK_UPDATE, pChanges, &tmask);
+  isView = pTab->pSelect!=0;
+  assert( pTrigger || tmask==0 );
+#else
+# define pTrigger 0
+# define isView 0
+# define tmask 0
+#endif
+#ifdef SQLITE_OMIT_VIEW
+# undef isView
+# define isView 0
+#endif
+
+  if( sqlite3ViewGetColumnNames(pParse, pTab) ){
+    goto update_cleanup;
+  }
+  if( sqlite3IsReadOnly(pParse, pTab, tmask) ){
+    goto update_cleanup;
+  }
+
+  /* Allocate a cursors for the main database table and for all indices.
+  ** The index cursors might not be used, but if they are used they
+  ** need to occur right after the database cursor.  So go ahead and
+  ** allocate enough space, just in case.
+  */
+  pTabList->a[0].iCursor = iBaseCur = iDataCur = pParse->nTab++;
+  iIdxCur = iDataCur+1;
+  pPk = HasRowid(pTab) ? 0 : sqlite3PrimaryKeyIndex(pTab);
+  for(nIdx=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, nIdx++){
+    if( IsPrimaryKeyIndex(pIdx) && pPk!=0 ){
+      iDataCur = pParse->nTab;
+      pTabList->a[0].iCursor = iDataCur;
+    }
+    pParse->nTab++;
+  }
+
+  /* Allocate space for aXRef[], aRegIdx[], and aToOpen[].  
+  ** Initialize aXRef[] and aToOpen[] to their default values.
+  */
+  aXRef = sqlite3DbMallocRaw(db, sizeof(int) * (pTab->nCol+nIdx) + nIdx+2 );
+  if( aXRef==0 ) goto update_cleanup;
+  aRegIdx = aXRef+pTab->nCol;
+  aToOpen = (u8*)(aRegIdx+nIdx);
+  memset(aToOpen, 1, nIdx+1);
+  aToOpen[nIdx+1] = 0;
+  for(i=0; i<pTab->nCol; i++) aXRef[i] = -1;
+
+  /* Initialize the name-context */
+  memset(&sNC, 0, sizeof(sNC));
+  sNC.pParse = pParse;
+  sNC.pSrcList = pTabList;
+
+  /* Resolve the column names in all the expressions of the
+  ** of the UPDATE statement.  Also find the column index
+  ** for each column to be updated in the pChanges array.  For each
+  ** column to be updated, make sure we have authorization to change
+  ** that column.
+  */
+  chngRowid = chngPk = 0;
+  for(i=0; i<pChanges->nExpr; i++){
+    if( sqlite3ResolveExprNames(&sNC, pChanges->a[i].pExpr) ){
+      goto update_cleanup;
+    }
+    for(j=0; j<pTab->nCol; j++){
+      if( sqlite3StrICmp(pTab->aCol[j].zName, pChanges->a[i].zName)==0 ){
+        if( j==pTab->iPKey ){
+          chngRowid = 1;
+          pRowidExpr = pChanges->a[i].pExpr;
+        }else if( pPk && (pTab->aCol[j].colFlags & COLFLAG_PRIMKEY)!=0 ){
+          chngPk = 1;
+        }
+        aXRef[j] = i;
+        break;
+      }
+    }
+    if( j>=pTab->nCol ){
+      if( pPk==0 && sqlite3IsRowid(pChanges->a[i].zName) ){
+        j = -1;
+        chngRowid = 1;
+        pRowidExpr = pChanges->a[i].pExpr;
+      }else{
+        sqlite3ErrorMsg(pParse, "no such column: %s", pChanges->a[i].zName);
+        pParse->checkSchema = 1;
+        goto update_cleanup;
+      }
+    }
+#ifndef SQLITE_OMIT_AUTHORIZATION
+    {
+      int rc;
+      rc = sqlite3AuthCheck(pParse, SQLITE_UPDATE, pTab->zName,
+                            j<0 ? "ROWID" : pTab->aCol[j].zName,
+                            db->aDb[iDb].zName);
+      if( rc==SQLITE_DENY ){
+        goto update_cleanup;
+      }else if( rc==SQLITE_IGNORE ){
+        aXRef[j] = -1;
+      }
+    }
+#endif
+  }
+  assert( (chngRowid & chngPk)==0 );
+  assert( chngRowid==0 || chngRowid==1 );
+  assert( chngPk==0 || chngPk==1 );
+  chngKey = chngRowid + chngPk;
+
+  /* The SET expressions are not actually used inside the WHERE loop.
+  ** So reset the colUsed mask
+  */
+  pTabList->a[0].colUsed = 0;
+
+  hasFK = sqlite3FkRequired(pParse, pTab, aXRef, chngKey);
+
+  /* There is one entry in the aRegIdx[] array for each index on the table
+  ** being updated.  Fill in aRegIdx[] with a register number that will hold
+  ** the key for accessing each index.  
+  */
+  for(j=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, j++){
+    int reg;
+    if( chngKey || hasFK || pIdx->pPartIdxWhere || pIdx==pPk ){
+      reg = ++pParse->nMem;
+    }else{
+      reg = 0;
+      for(i=0; i<pIdx->nKeyCol; i++){
+        if( aXRef[pIdx->aiColumn[i]]>=0 ){
+          reg = ++pParse->nMem;
+          break;
+        }
+      }
+    }
+    if( reg==0 ) aToOpen[j+1] = 0;
+    aRegIdx[j] = reg;
+  }
+
+  /* Begin generating code. */
+  v = sqlite3GetVdbe(pParse);
+  if( v==0 ) goto update_cleanup;
+  if( pParse->nested==0 ) sqlite3VdbeCountChanges(v);
+  sqlite3BeginWriteOperation(pParse, 1, iDb);
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  /* Virtual tables must be handled separately */
+  if( IsVirtual(pTab) ){
+    updateVirtualTable(pParse, pTabList, pTab, pChanges, pRowidExpr, aXRef,
+                       pWhere, onError);
+    pWhere = 0;
+    pTabList = 0;
+    goto update_cleanup;
+  }
+#endif
+
+  /* Allocate required registers. */
+  regRowSet = ++pParse->nMem;
+  regOldRowid = regNewRowid = ++pParse->nMem;
+  if( chngPk || pTrigger || hasFK ){
+    regOld = pParse->nMem + 1;
+    pParse->nMem += pTab->nCol;
+  }
+  if( chngKey || pTrigger || hasFK ){
+    regNewRowid = ++pParse->nMem;
+  }
+  regNew = pParse->nMem + 1;
+  pParse->nMem += pTab->nCol;
+
+  /* Start the view context. */
+  if( isView ){
+    sqlite3AuthContextPush(pParse, &sContext, pTab->zName);
+  }
+
+  /* If we are trying to update a view, realize that view into
+  ** an ephemeral table.
+  */
+#if !defined(SQLITE_OMIT_VIEW) && !defined(SQLITE_OMIT_TRIGGER)
+  if( isView ){
+    sqlite3MaterializeView(pParse, pTab, pWhere, iDataCur);
+  }
+#endif
+
+  /* Resolve the column names in all the expressions in the
+  ** WHERE clause.
+  */
+  if( sqlite3ResolveExprNames(&sNC, pWhere) ){
+    goto update_cleanup;
+  }
+
+  /* Begin the database scan
+  */
+  if( HasRowid(pTab) ){
+    sqlite3VdbeAddOp3(v, OP_Null, 0, regRowSet, regOldRowid);
+    pWInfo = sqlite3WhereBegin(
+        pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED, iIdxCur
+    );
+    if( pWInfo==0 ) goto update_cleanup;
+    okOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass);
+  
+    /* Remember the rowid of every item to be updated.
+    */
+    sqlite3VdbeAddOp2(v, OP_Rowid, iDataCur, regOldRowid);
+    if( !okOnePass ){
+      sqlite3VdbeAddOp2(v, OP_RowSetAdd, regRowSet, regOldRowid);
+    }
+  
+    /* End the database scan loop.
+    */
+    sqlite3WhereEnd(pWInfo);
+  }else{
+    int iPk;         /* First of nPk memory cells holding PRIMARY KEY value */
+    i16 nPk;         /* Number of components of the PRIMARY KEY */
+    int addrOpen;    /* Address of the OpenEphemeral instruction */
+
+    assert( pPk!=0 );
+    nPk = pPk->nKeyCol;
+    iPk = pParse->nMem+1;
+    pParse->nMem += nPk;
+    regKey = ++pParse->nMem;
+    iEph = pParse->nTab++;
+    sqlite3VdbeAddOp2(v, OP_Null, 0, iPk);
+    addrOpen = sqlite3VdbeAddOp2(v, OP_OpenEphemeral, iEph, nPk);
+    sqlite3VdbeSetP4KeyInfo(pParse, pPk);
+    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, 0, 0, 
+                               WHERE_ONEPASS_DESIRED, iIdxCur);
+    if( pWInfo==0 ) goto update_cleanup;
+    okOnePass = sqlite3WhereOkOnePass(pWInfo, aiCurOnePass);
+    for(i=0; i<nPk; i++){
+      sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, pPk->aiColumn[i],
+                                      iPk+i);
+    }
+    if( okOnePass ){
+      sqlite3VdbeChangeToNoop(v, addrOpen);
+      nKey = nPk;
+      regKey = iPk;
+    }else{
+      sqlite3VdbeAddOp4(v, OP_MakeRecord, iPk, nPk, regKey,
+                        sqlite3IndexAffinityStr(v, pPk), nPk);
+      sqlite3VdbeAddOp2(v, OP_IdxInsert, iEph, regKey);
+    }
+    sqlite3WhereEnd(pWInfo);
+  }
+
+  /* Initialize the count of updated rows
+  */
+  if( (db->flags & SQLITE_CountRows) && !pParse->pTriggerTab ){
+    regRowCount = ++pParse->nMem;
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, regRowCount);
+  }
+
+  labelBreak = sqlite3VdbeMakeLabel(v);
+  if( !isView ){
+    /* 
+    ** Open every index that needs updating.  Note that if any
+    ** index could potentially invoke a REPLACE conflict resolution 
+    ** action, then we need to open all indices because we might need
+    ** to be deleting some records.
+    */
+    if( onError==OE_Replace ){
+      memset(aToOpen, 1, nIdx+1);
+    }else{
+      for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+        if( pIdx->onError==OE_Replace ){
+          memset(aToOpen, 1, nIdx+1);
+          break;
+        }
+      }
+    }
+    if( okOnePass ){
+      if( aiCurOnePass[0]>=0 ) aToOpen[aiCurOnePass[0]-iBaseCur] = 0;
+      if( aiCurOnePass[1]>=0 ) aToOpen[aiCurOnePass[1]-iBaseCur] = 0;
+    }
+    sqlite3OpenTableAndIndices(pParse, pTab, OP_OpenWrite, iBaseCur, aToOpen,
+                               0, 0);
+  }
+
+  /* Top of the update loop */
+  if( okOnePass ){
+    if( aToOpen[iDataCur-iBaseCur] && !isView ){
+      assert( pPk );
+      sqlite3VdbeAddOp4Int(v, OP_NotFound, iDataCur, labelBreak, regKey, nKey);
+      VdbeCoverageNeverTaken(v);
+    }
+    labelContinue = labelBreak;
+    sqlite3VdbeAddOp2(v, OP_IsNull, pPk ? regKey : regOldRowid, labelBreak);
+    VdbeCoverageIf(v, pPk==0);
+    VdbeCoverageIf(v, pPk!=0);
+  }else if( pPk ){
+    labelContinue = sqlite3VdbeMakeLabel(v);
+    sqlite3VdbeAddOp2(v, OP_Rewind, iEph, labelBreak); VdbeCoverage(v);
+    addrTop = sqlite3VdbeAddOp2(v, OP_RowKey, iEph, regKey);
+    sqlite3VdbeAddOp4Int(v, OP_NotFound, iDataCur, labelContinue, regKey, 0);
+    VdbeCoverage(v);
+  }else{
+    labelContinue = sqlite3VdbeAddOp3(v, OP_RowSetRead, regRowSet, labelBreak,
+                             regOldRowid);
+    VdbeCoverage(v);
+    sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, labelContinue, regOldRowid);
+    VdbeCoverage(v);
+  }
+
+  /* If the record number will change, set register regNewRowid to
+  ** contain the new value. If the record number is not being modified,
+  ** then regNewRowid is the same register as regOldRowid, which is
+  ** already populated.  */
+  assert( chngKey || pTrigger || hasFK || regOldRowid==regNewRowid );
+  if( chngRowid ){
+    sqlite3ExprCode(pParse, pRowidExpr, regNewRowid);
+    sqlite3VdbeAddOp1(v, OP_MustBeInt, regNewRowid); VdbeCoverage(v);
+  }
+
+  /* Compute the old pre-UPDATE content of the row being changed, if that
+  ** information is needed */
+  if( chngPk || hasFK || pTrigger ){
+    u32 oldmask = (hasFK ? sqlite3FkOldmask(pParse, pTab) : 0);
+    oldmask |= sqlite3TriggerColmask(pParse, 
+        pTrigger, pChanges, 0, TRIGGER_BEFORE|TRIGGER_AFTER, pTab, onError
+    );
+    for(i=0; i<pTab->nCol; i++){
+      if( oldmask==0xffffffff
+       || (i<32 && (oldmask & MASKBIT32(i))!=0)
+       || (pTab->aCol[i].colFlags & COLFLAG_PRIMKEY)!=0
+      ){
+        testcase(  oldmask!=0xffffffff && i==31 );
+        sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, i, regOld+i);
+      }else{
+        sqlite3VdbeAddOp2(v, OP_Null, 0, regOld+i);
+      }
+    }
+    if( chngRowid==0 && pPk==0 ){
+      sqlite3VdbeAddOp2(v, OP_Copy, regOldRowid, regNewRowid);
+    }
+  }
+
+  /* Populate the array of registers beginning at regNew with the new
+  ** row data. This array is used to check constants, create the new
+  ** table and index records, and as the values for any new.* references
+  ** made by triggers.
+  **
+  ** If there are one or more BEFORE triggers, then do not populate the
+  ** registers associated with columns that are (a) not modified by
+  ** this UPDATE statement and (b) not accessed by new.* references. The
+  ** values for registers not modified by the UPDATE must be reloaded from 
+  ** the database after the BEFORE triggers are fired anyway (as the trigger 
+  ** may have modified them). So not loading those that are not going to
+  ** be used eliminates some redundant opcodes.
+  */
+  newmask = sqlite3TriggerColmask(
+      pParse, pTrigger, pChanges, 1, TRIGGER_BEFORE, pTab, onError
+  );
+  /*sqlite3VdbeAddOp3(v, OP_Null, 0, regNew, regNew+pTab->nCol-1);*/
+  for(i=0; i<pTab->nCol; i++){
+    if( i==pTab->iPKey ){
+      sqlite3VdbeAddOp2(v, OP_Null, 0, regNew+i);
+    }else{
+      j = aXRef[i];
+      if( j>=0 ){
+        sqlite3ExprCode(pParse, pChanges->a[j].pExpr, regNew+i);
+      }else if( 0==(tmask&TRIGGER_BEFORE) || i>31 || (newmask & MASKBIT32(i)) ){
+        /* This branch loads the value of a column that will not be changed 
+        ** into a register. This is done if there are no BEFORE triggers, or
+        ** if there are one or more BEFORE triggers that use this value via
+        ** a new.* reference in a trigger program.
+        */
+        testcase( i==31 );
+        testcase( i==32 );
+        sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, i, regNew+i);
+      }else{
+        sqlite3VdbeAddOp2(v, OP_Null, 0, regNew+i);
+      }
+    }
+  }
+
+  /* Fire any BEFORE UPDATE triggers. This happens before constraints are
+  ** verified. One could argue that this is wrong.
+  */
+  if( tmask&TRIGGER_BEFORE ){
+    sqlite3TableAffinity(v, pTab, regNew);
+    sqlite3CodeRowTrigger(pParse, pTrigger, TK_UPDATE, pChanges, 
+        TRIGGER_BEFORE, pTab, regOldRowid, onError, labelContinue);
+
+    /* The row-trigger may have deleted the row being updated. In this
+    ** case, jump to the next row. No updates or AFTER triggers are 
+    ** required. This behavior - what happens when the row being updated
+    ** is deleted or renamed by a BEFORE trigger - is left undefined in the
+    ** documentation.
+    */
+    if( pPk ){
+      sqlite3VdbeAddOp4Int(v, OP_NotFound, iDataCur, labelContinue,regKey,nKey);
+      VdbeCoverage(v);
+    }else{
+      sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, labelContinue, regOldRowid);
+      VdbeCoverage(v);
+    }
+
+    /* If it did not delete it, the row-trigger may still have modified 
+    ** some of the columns of the row being updated. Load the values for 
+    ** all columns not modified by the update statement into their 
+    ** registers in case this has happened.
+    */
+    for(i=0; i<pTab->nCol; i++){
+      if( aXRef[i]<0 && i!=pTab->iPKey ){
+        sqlite3ExprCodeGetColumnOfTable(v, pTab, iDataCur, i, regNew+i);
+      }
+    }
+  }
+
+  if( !isView ){
+    int j1 = 0;           /* Address of jump instruction */
+    int bReplace = 0;     /* True if REPLACE conflict resolution might happen */
+
+    /* Do constraint checks. */
+    assert( regOldRowid>0 );
+    sqlite3GenerateConstraintChecks(pParse, pTab, aRegIdx, iDataCur, iIdxCur,
+        regNewRowid, regOldRowid, chngKey, onError, labelContinue, &bReplace);
+
+    /* Do FK constraint checks. */
+    if( hasFK ){
+      sqlite3FkCheck(pParse, pTab, regOldRowid, 0, aXRef, chngKey);
+    }
+
+    /* Delete the index entries associated with the current record.  */
+    if( bReplace || chngKey ){
+      if( pPk ){
+        j1 = sqlite3VdbeAddOp4Int(v, OP_NotFound, iDataCur, 0, regKey, nKey);
+      }else{
+        j1 = sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, 0, regOldRowid);
+      }
+      VdbeCoverageNeverTaken(v);
+    }
+    sqlite3GenerateRowIndexDelete(pParse, pTab, iDataCur, iIdxCur, aRegIdx);
+  
+    /* If changing the record number, delete the old record.  */
+    if( hasFK || chngKey || pPk!=0 ){
+      sqlite3VdbeAddOp2(v, OP_Delete, iDataCur, 0);
+    }
+    if( bReplace || chngKey ){
+      sqlite3VdbeJumpHere(v, j1);
+    }
+
+    if( hasFK ){
+      sqlite3FkCheck(pParse, pTab, 0, regNewRowid, aXRef, chngKey);
+    }
+  
+    /* Insert the new index entries and the new record. */
+    sqlite3CompleteInsertion(pParse, pTab, iDataCur, iIdxCur,
+                             regNewRowid, aRegIdx, 1, 0, 0);
+
+    /* Do any ON CASCADE, SET NULL or SET DEFAULT operations required to
+    ** handle rows (possibly in other tables) that refer via a foreign key
+    ** to the row just updated. */ 
+    if( hasFK ){
+      sqlite3FkActions(pParse, pTab, pChanges, regOldRowid, aXRef, chngKey);
+    }
+  }
+
+  /* Increment the row counter 
+  */
+  if( (db->flags & SQLITE_CountRows) && !pParse->pTriggerTab){
+    sqlite3VdbeAddOp2(v, OP_AddImm, regRowCount, 1);
+  }
+
+  sqlite3CodeRowTrigger(pParse, pTrigger, TK_UPDATE, pChanges, 
+      TRIGGER_AFTER, pTab, regOldRowid, onError, labelContinue);
+
+  /* Repeat the above with the next record to be updated, until
+  ** all record selected by the WHERE clause have been updated.
+  */
+  if( okOnePass ){
+    /* Nothing to do at end-of-loop for a single-pass */
+  }else if( pPk ){
+    sqlite3VdbeResolveLabel(v, labelContinue);
+    sqlite3VdbeAddOp2(v, OP_Next, iEph, addrTop); VdbeCoverage(v);
+  }else{
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, labelContinue);
+  }
+  sqlite3VdbeResolveLabel(v, labelBreak);
+
+  /* Close all tables */
+  for(i=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, i++){
+    assert( aRegIdx );
+    if( aToOpen[i+1] ){
+      sqlite3VdbeAddOp2(v, OP_Close, iIdxCur+i, 0);
+    }
+  }
+  if( iDataCur<iIdxCur ) sqlite3VdbeAddOp2(v, OP_Close, iDataCur, 0);
+
+  /* Update the sqlite_sequence table by storing the content of the
+  ** maximum rowid counter values recorded while inserting into
+  ** autoincrement tables.
+  */
+  if( pParse->nested==0 && pParse->pTriggerTab==0 ){
+    sqlite3AutoincrementEnd(pParse);
+  }
+
+  /*
+  ** Return the number of rows that were changed. If this routine is 
+  ** generating code because of a call to sqlite3NestedParse(), do not
+  ** invoke the callback function.
+  */
+  if( (db->flags&SQLITE_CountRows) && !pParse->pTriggerTab && !pParse->nested ){
+    sqlite3VdbeAddOp2(v, OP_ResultRow, regRowCount, 1);
+    sqlite3VdbeSetNumCols(v, 1);
+    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "rows updated", SQLITE_STATIC);
+  }
+
+update_cleanup:
+  sqlite3AuthContextPop(&sContext);
+  sqlite3DbFree(db, aXRef); /* Also frees aRegIdx[] and aToOpen[] */
+  sqlite3SrcListDelete(db, pTabList);
+  sqlite3ExprListDelete(db, pChanges);
+  sqlite3ExprDelete(db, pWhere);
+  return;
+}
+/* Make sure "isView" and other macros defined above are undefined. Otherwise
+** they may interfere with compilation of other functions in this file
+** (or in another file, if this file becomes part of the amalgamation).  */
+#ifdef isView
+ #undef isView
+#endif
+#ifdef pTrigger
+ #undef pTrigger
+#endif
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/*
+** Generate code for an UPDATE of a virtual table.
+**
+** The strategy is that we create an ephemeral table that contains
+** for each row to be changed:
+**
+**   (A)  The original rowid of that row.
+**   (B)  The revised rowid for the row. (note1)
+**   (C)  The content of every column in the row.
+**
+** Then we loop over this ephemeral table and for each row in
+** the ephemeral table call VUpdate.
+**
+** When finished, drop the ephemeral table.
+**
+** (note1) Actually, if we know in advance that (A) is always the same
+** as (B) we only store (A), then duplicate (A) when pulling
+** it out of the ephemeral table before calling VUpdate.
+*/
+static void updateVirtualTable(
+  Parse *pParse,       /* The parsing context */
+  SrcList *pSrc,       /* The virtual table to be modified */
+  Table *pTab,         /* The virtual table */
+  ExprList *pChanges,  /* The columns to change in the UPDATE statement */
+  Expr *pRowid,        /* Expression used to recompute the rowid */
+  int *aXRef,          /* Mapping from columns of pTab to entries in pChanges */
+  Expr *pWhere,        /* WHERE clause of the UPDATE statement */
+  int onError          /* ON CONFLICT strategy */
+){
+  Vdbe *v = pParse->pVdbe;  /* Virtual machine under construction */
+  ExprList *pEList = 0;     /* The result set of the SELECT statement */
+  Select *pSelect = 0;      /* The SELECT statement */
+  Expr *pExpr;              /* Temporary expression */
+  int ephemTab;             /* Table holding the result of the SELECT */
+  int i;                    /* Loop counter */
+  int addr;                 /* Address of top of loop */
+  int iReg;                 /* First register in set passed to OP_VUpdate */
+  sqlite3 *db = pParse->db; /* Database connection */
+  const char *pVTab = (const char*)sqlite3GetVTable(db, pTab);
+  SelectDest dest;
+
+  /* Construct the SELECT statement that will find the new values for
+  ** all updated rows. 
+  */
+  pEList = sqlite3ExprListAppend(pParse, 0, sqlite3Expr(db, TK_ID, "_rowid_"));
+  if( pRowid ){
+    pEList = sqlite3ExprListAppend(pParse, pEList,
+                                   sqlite3ExprDup(db, pRowid, 0));
+  }
+  assert( pTab->iPKey<0 );
+  for(i=0; i<pTab->nCol; i++){
+    if( aXRef[i]>=0 ){
+      pExpr = sqlite3ExprDup(db, pChanges->a[aXRef[i]].pExpr, 0);
+    }else{
+      pExpr = sqlite3Expr(db, TK_ID, pTab->aCol[i].zName);
+    }
+    pEList = sqlite3ExprListAppend(pParse, pEList, pExpr);
+  }
+  pSelect = sqlite3SelectNew(pParse, pEList, pSrc, pWhere, 0, 0, 0, 0, 0, 0);
+  
+  /* Create the ephemeral table into which the update results will
+  ** be stored.
+  */
+  assert( v );
+  ephemTab = pParse->nTab++;
+  sqlite3VdbeAddOp2(v, OP_OpenEphemeral, ephemTab, pTab->nCol+1+(pRowid!=0));
+  sqlite3VdbeChangeP5(v, BTREE_UNORDERED);
+
+  /* fill the ephemeral table 
+  */
+  sqlite3SelectDestInit(&dest, SRT_Table, ephemTab);
+  sqlite3Select(pParse, pSelect, &dest);
+
+  /* Generate code to scan the ephemeral table and call VUpdate. */
+  iReg = ++pParse->nMem;
+  pParse->nMem += pTab->nCol+1;
+  addr = sqlite3VdbeAddOp2(v, OP_Rewind, ephemTab, 0); VdbeCoverage(v);
+  sqlite3VdbeAddOp3(v, OP_Column,  ephemTab, 0, iReg);
+  sqlite3VdbeAddOp3(v, OP_Column, ephemTab, (pRowid?1:0), iReg+1);
+  for(i=0; i<pTab->nCol; i++){
+    sqlite3VdbeAddOp3(v, OP_Column, ephemTab, i+1+(pRowid!=0), iReg+2+i);
+  }
+  sqlite3VtabMakeWritable(pParse, pTab);
+  sqlite3VdbeAddOp4(v, OP_VUpdate, 0, pTab->nCol+2, iReg, pVTab, P4_VTAB);
+  sqlite3VdbeChangeP5(v, onError==OE_Default ? OE_Abort : onError);
+  sqlite3MayAbort(pParse);
+  sqlite3VdbeAddOp2(v, OP_Next, ephemTab, addr+1); VdbeCoverage(v);
+  sqlite3VdbeJumpHere(v, addr);
+  sqlite3VdbeAddOp2(v, OP_Close, ephemTab, 0);
+
+  /* Cleanup */
+  sqlite3SelectDelete(db, pSelect);  
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+/************** End of update.c **********************************************/
+/************** Begin file vacuum.c ******************************************/
+/*
+** 2003 April 6
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code used to implement the VACUUM command.
+**
+** Most of the code in this file may be omitted by defining the
+** SQLITE_OMIT_VACUUM macro.
+*/
+
+#if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH)
+/*
+** Finalize a prepared statement.  If there was an error, store the
+** text of the error message in *pzErrMsg.  Return the result code.
+*/
+static int vacuumFinalize(sqlite3 *db, sqlite3_stmt *pStmt, char **pzErrMsg){
+  int rc;
+  rc = sqlite3VdbeFinalize((Vdbe*)pStmt);
+  if( rc ){
+    sqlite3SetString(pzErrMsg, db, sqlite3_errmsg(db));
+  }
+  return rc;
+}
+
+/*
+** Execute zSql on database db. Return an error code.
+*/
+static int execSql(sqlite3 *db, char **pzErrMsg, const char *zSql){
+  sqlite3_stmt *pStmt;
+  VVA_ONLY( int rc; )
+  if( !zSql ){
+    return SQLITE_NOMEM;
+  }
+  if( SQLITE_OK!=sqlite3_prepare(db, zSql, -1, &pStmt, 0) ){
+    sqlite3SetString(pzErrMsg, db, sqlite3_errmsg(db));
+    return sqlite3_errcode(db);
+  }
+  VVA_ONLY( rc = ) sqlite3_step(pStmt);
+  assert( rc!=SQLITE_ROW || (db->flags&SQLITE_CountRows) );
+  return vacuumFinalize(db, pStmt, pzErrMsg);
+}
+
+/*
+** Execute zSql on database db. The statement returns exactly
+** one column. Execute this as SQL on the same database.
+*/
+static int execExecSql(sqlite3 *db, char **pzErrMsg, const char *zSql){
+  sqlite3_stmt *pStmt;
+  int rc;
+
+  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+  if( rc!=SQLITE_OK ) return rc;
+
+  while( SQLITE_ROW==sqlite3_step(pStmt) ){
+    rc = execSql(db, pzErrMsg, (char*)sqlite3_column_text(pStmt, 0));
+    if( rc!=SQLITE_OK ){
+      vacuumFinalize(db, pStmt, pzErrMsg);
+      return rc;
+    }
+  }
+
+  return vacuumFinalize(db, pStmt, pzErrMsg);
+}
+
+/*
+** The VACUUM command is used to clean up the database,
+** collapse free space, etc.  It is modelled after the VACUUM command
+** in PostgreSQL.  The VACUUM command works as follows:
+**
+**   (1)  Create a new transient database file
+**   (2)  Copy all content from the database being vacuumed into
+**        the new transient database file
+**   (3)  Copy content from the transient database back into the
+**        original database.
+**
+** The transient database requires temporary disk space approximately
+** equal to the size of the original database.  The copy operation of
+** step (3) requires additional temporary disk space approximately equal
+** to the size of the original database for the rollback journal.
+** Hence, temporary disk space that is approximately 2x the size of the
+** original database is required.  Every page of the database is written
+** approximately 3 times:  Once for step (2) and twice for step (3).
+** Two writes per page are required in step (3) because the original
+** database content must be written into the rollback journal prior to
+** overwriting the database with the vacuumed content.
+**
+** Only 1x temporary space and only 1x writes would be required if
+** the copy of step (3) were replaced by deleting the original database
+** and renaming the transient database as the original.  But that will
+** not work if other processes are attached to the original database.
+** And a power loss in between deleting the original and renaming the
+** transient would cause the database file to appear to be deleted
+** following reboot.
+*/
+SQLITE_PRIVATE void sqlite3Vacuum(Parse *pParse){
+  Vdbe *v = sqlite3GetVdbe(pParse);
+  if( v ){
+    sqlite3VdbeAddOp2(v, OP_Vacuum, 0, 0);
+    sqlite3VdbeUsesBtree(v, 0);
+  }
+  return;
+}
+
+/*
+** This routine implements the OP_Vacuum opcode of the VDBE.
+*/
+SQLITE_PRIVATE int sqlite3RunVacuum(char **pzErrMsg, sqlite3 *db){
+  int rc = SQLITE_OK;     /* Return code from service routines */
+  Btree *pMain;           /* The database being vacuumed */
+  Btree *pTemp;           /* The temporary database we vacuum into */
+  char *zSql = 0;         /* SQL statements */
+  int saved_flags;        /* Saved value of the db->flags */
+  int saved_nChange;      /* Saved value of db->nChange */
+  int saved_nTotalChange; /* Saved value of db->nTotalChange */
+  void (*saved_xTrace)(void*,const char*);  /* Saved db->xTrace */
+  Db *pDb = 0;            /* Database to detach at end of vacuum */
+  int isMemDb;            /* True if vacuuming a :memory: database */
+  int nRes;               /* Bytes of reserved space at the end of each page */
+  int nDb;                /* Number of attached databases */
+
+  if( !db->autoCommit ){
+    sqlite3SetString(pzErrMsg, db, "cannot VACUUM from within a transaction");
+    return SQLITE_ERROR;
+  }
+  if( db->nVdbeActive>1 ){
+    sqlite3SetString(pzErrMsg, db,"cannot VACUUM - SQL statements in progress");
+    return SQLITE_ERROR;
+  }
+
+  /* Save the current value of the database flags so that it can be 
+  ** restored before returning. Then set the writable-schema flag, and
+  ** disable CHECK and foreign key constraints.  */
+  saved_flags = db->flags;
+  saved_nChange = db->nChange;
+  saved_nTotalChange = db->nTotalChange;
+  saved_xTrace = db->xTrace;
+  db->flags |= SQLITE_WriteSchema | SQLITE_IgnoreChecks | SQLITE_PreferBuiltin;
+  db->flags &= ~(SQLITE_ForeignKeys | SQLITE_ReverseOrder);
+  db->xTrace = 0;
+
+  pMain = db->aDb[0].pBt;
+  isMemDb = sqlite3PagerIsMemdb(sqlite3BtreePager(pMain));
+
+  /* Attach the temporary database as 'vacuum_db'. The synchronous pragma
+  ** can be set to 'off' for this file, as it is not recovered if a crash
+  ** occurs anyway. The integrity of the database is maintained by a
+  ** (possibly synchronous) transaction opened on the main database before
+  ** sqlite3BtreeCopyFile() is called.
+  **
+  ** An optimisation would be to use a non-journaled pager.
+  ** (Later:) I tried setting "PRAGMA vacuum_db.journal_mode=OFF" but
+  ** that actually made the VACUUM run slower.  Very little journalling
+  ** actually occurs when doing a vacuum since the vacuum_db is initially
+  ** empty.  Only the journal header is written.  Apparently it takes more
+  ** time to parse and run the PRAGMA to turn journalling off than it does
+  ** to write the journal header file.
+  */
+  nDb = db->nDb;
+  if( sqlite3TempInMemory(db) ){
+    zSql = "ATTACH ':memory:' AS vacuum_db;";
+  }else{
+    zSql = "ATTACH '' AS vacuum_db;";
+  }
+  rc = execSql(db, pzErrMsg, zSql);
+  if( db->nDb>nDb ){
+    pDb = &db->aDb[db->nDb-1];
+    assert( strcmp(pDb->zName,"vacuum_db")==0 );
+  }
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+  pTemp = db->aDb[db->nDb-1].pBt;
+
+  /* The call to execSql() to attach the temp database has left the file
+  ** locked (as there was more than one active statement when the transaction
+  ** to read the schema was concluded. Unlock it here so that this doesn't
+  ** cause problems for the call to BtreeSetPageSize() below.  */
+  sqlite3BtreeCommit(pTemp);
+
+  nRes = sqlite3BtreeGetReserve(pMain);
+
+  /* A VACUUM cannot change the pagesize of an encrypted database. */
+#ifdef SQLITE_HAS_CODEC
+  if( db->nextPagesize ){
+    extern void sqlite3CodecGetKey(sqlite3*, int, void**, int*);
+    int nKey;
+    char *zKey;
+    sqlite3CodecGetKey(db, 0, (void**)&zKey, &nKey);
+    if( nKey ) db->nextPagesize = 0;
+  }
+#endif
+
+  rc = execSql(db, pzErrMsg, "PRAGMA vacuum_db.synchronous=OFF");
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+
+  /* Begin a transaction and take an exclusive lock on the main database
+  ** file. This is done before the sqlite3BtreeGetPageSize(pMain) call below,
+  ** to ensure that we do not try to change the page-size on a WAL database.
+  */
+  rc = execSql(db, pzErrMsg, "BEGIN;");
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+  rc = sqlite3BtreeBeginTrans(pMain, 2);
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+
+  /* Do not attempt to change the page size for a WAL database */
+  if( sqlite3PagerGetJournalMode(sqlite3BtreePager(pMain))
+                                               ==PAGER_JOURNALMODE_WAL ){
+    db->nextPagesize = 0;
+  }
+
+  if( sqlite3BtreeSetPageSize(pTemp, sqlite3BtreeGetPageSize(pMain), nRes, 0)
+   || (!isMemDb && sqlite3BtreeSetPageSize(pTemp, db->nextPagesize, nRes, 0))
+   || NEVER(db->mallocFailed)
+  ){
+    rc = SQLITE_NOMEM;
+    goto end_of_vacuum;
+  }
+
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  sqlite3BtreeSetAutoVacuum(pTemp, db->nextAutovac>=0 ? db->nextAutovac :
+                                           sqlite3BtreeGetAutoVacuum(pMain));
+#endif
+
+  /* Query the schema of the main database. Create a mirror schema
+  ** in the temporary database.
+  */
+  rc = execExecSql(db, pzErrMsg,
+      "SELECT 'CREATE TABLE vacuum_db.' || substr(sql,14) "
+      "  FROM sqlite_master WHERE type='table' AND name!='sqlite_sequence'"
+      "   AND coalesce(rootpage,1)>0"
+  );
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+  rc = execExecSql(db, pzErrMsg,
+      "SELECT 'CREATE INDEX vacuum_db.' || substr(sql,14)"
+      "  FROM sqlite_master WHERE sql LIKE 'CREATE INDEX %' ");
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+  rc = execExecSql(db, pzErrMsg,
+      "SELECT 'CREATE UNIQUE INDEX vacuum_db.' || substr(sql,21) "
+      "  FROM sqlite_master WHERE sql LIKE 'CREATE UNIQUE INDEX %'");
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+
+  /* Loop through the tables in the main database. For each, do
+  ** an "INSERT INTO vacuum_db.xxx SELECT * FROM main.xxx;" to copy
+  ** the contents to the temporary database.
+  */
+  rc = execExecSql(db, pzErrMsg,
+      "SELECT 'INSERT INTO vacuum_db.' || quote(name) "
+      "|| ' SELECT * FROM main.' || quote(name) || ';'"
+      "FROM main.sqlite_master "
+      "WHERE type = 'table' AND name!='sqlite_sequence' "
+      "  AND coalesce(rootpage,1)>0"
+  );
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+
+  /* Copy over the sequence table
+  */
+  rc = execExecSql(db, pzErrMsg,
+      "SELECT 'DELETE FROM vacuum_db.' || quote(name) || ';' "
+      "FROM vacuum_db.sqlite_master WHERE name='sqlite_sequence' "
+  );
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+  rc = execExecSql(db, pzErrMsg,
+      "SELECT 'INSERT INTO vacuum_db.' || quote(name) "
+      "|| ' SELECT * FROM main.' || quote(name) || ';' "
+      "FROM vacuum_db.sqlite_master WHERE name=='sqlite_sequence';"
+  );
+  if( rc!=SQLITE_OK ) goto end_of_vacuum;
+
+
+  /* Copy the triggers, views, and virtual tables from the main database
+  ** over to the temporary database.  None of these objects has any
+  ** associated storage, so all we have to do is copy their entries
+  ** from the SQLITE_MASTER table.
+  */
+  rc = execSql(db, pzErrMsg,
+      "INSERT INTO vacuum_db.sqlite_master "
+      "  SELECT type, name, tbl_name, rootpage, sql"
+      "    FROM main.sqlite_master"
+      "   WHERE type='view' OR type='trigger'"
+      "      OR (type='table' AND rootpage=0)"
+  );
+  if( rc ) goto end_of_vacuum;
+
+  /* At this point, there is a write transaction open on both the 
+  ** vacuum database and the main database. Assuming no error occurs,
+  ** both transactions are closed by this block - the main database
+  ** transaction by sqlite3BtreeCopyFile() and the other by an explicit
+  ** call to sqlite3BtreeCommit().
+  */
+  {
+    u32 meta;
+    int i;
+
+    /* This array determines which meta meta values are preserved in the
+    ** vacuum.  Even entries are the meta value number and odd entries
+    ** are an increment to apply to the meta value after the vacuum.
+    ** The increment is used to increase the schema cookie so that other
+    ** connections to the same database will know to reread the schema.
+    */
+    static const unsigned char aCopy[] = {
+       BTREE_SCHEMA_VERSION,     1,  /* Add one to the old schema cookie */
+       BTREE_DEFAULT_CACHE_SIZE, 0,  /* Preserve the default page cache size */
+       BTREE_TEXT_ENCODING,      0,  /* Preserve the text encoding */
+       BTREE_USER_VERSION,       0,  /* Preserve the user version */
+       BTREE_APPLICATION_ID,     0,  /* Preserve the application id */
+    };
+
+    assert( 1==sqlite3BtreeIsInTrans(pTemp) );
+    assert( 1==sqlite3BtreeIsInTrans(pMain) );
+
+    /* Copy Btree meta values */
+    for(i=0; i<ArraySize(aCopy); i+=2){
+      /* GetMeta() and UpdateMeta() cannot fail in this context because
+      ** we already have page 1 loaded into cache and marked dirty. */
+      sqlite3BtreeGetMeta(pMain, aCopy[i], &meta);
+      rc = sqlite3BtreeUpdateMeta(pTemp, aCopy[i], meta+aCopy[i+1]);
+      if( NEVER(rc!=SQLITE_OK) ) goto end_of_vacuum;
+    }
+
+    rc = sqlite3BtreeCopyFile(pMain, pTemp);
+    if( rc!=SQLITE_OK ) goto end_of_vacuum;
+    rc = sqlite3BtreeCommit(pTemp);
+    if( rc!=SQLITE_OK ) goto end_of_vacuum;
+#ifndef SQLITE_OMIT_AUTOVACUUM
+    sqlite3BtreeSetAutoVacuum(pMain, sqlite3BtreeGetAutoVacuum(pTemp));
+#endif
+  }
+
+  assert( rc==SQLITE_OK );
+  rc = sqlite3BtreeSetPageSize(pMain, sqlite3BtreeGetPageSize(pTemp), nRes,1);
+
+end_of_vacuum:
+  /* Restore the original value of db->flags */
+  db->flags = saved_flags;
+  db->nChange = saved_nChange;
+  db->nTotalChange = saved_nTotalChange;
+  db->xTrace = saved_xTrace;
+  sqlite3BtreeSetPageSize(pMain, -1, -1, 1);
+
+  /* Currently there is an SQL level transaction open on the vacuum
+  ** database. No locks are held on any other files (since the main file
+  ** was committed at the btree level). So it safe to end the transaction
+  ** by manually setting the autoCommit flag to true and detaching the
+  ** vacuum database. The vacuum_db journal file is deleted when the pager
+  ** is closed by the DETACH.
+  */
+  db->autoCommit = 1;
+
+  if( pDb ){
+    sqlite3BtreeClose(pDb->pBt);
+    pDb->pBt = 0;
+    pDb->pSchema = 0;
+  }
+
+  /* This both clears the schemas and reduces the size of the db->aDb[]
+  ** array. */ 
+  sqlite3ResetAllSchemasOfConnection(db);
+
+  return rc;
+}
+
+#endif  /* SQLITE_OMIT_VACUUM && SQLITE_OMIT_ATTACH */
+
+/************** End of vacuum.c **********************************************/
+/************** Begin file vtab.c ********************************************/
+/*
+** 2006 June 10
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code used to help implement virtual tables.
+*/
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+
+/*
+** Before a virtual table xCreate() or xConnect() method is invoked, the
+** sqlite3.pVtabCtx member variable is set to point to an instance of
+** this struct allocated on the stack. It is used by the implementation of 
+** the sqlite3_declare_vtab() and sqlite3_vtab_config() APIs, both of which
+** are invoked only from within xCreate and xConnect methods.
+*/
+struct VtabCtx {
+  VTable *pVTable;    /* The virtual table being constructed */
+  Table *pTab;        /* The Table object to which the virtual table belongs */
+};
+
+/*
+** The actual function that does the work of creating a new module.
+** This function implements the sqlite3_create_module() and
+** sqlite3_create_module_v2() interfaces.
+*/
+static int createModule(
+  sqlite3 *db,                    /* Database in which module is registered */
+  const char *zName,              /* Name assigned to this module */
+  const sqlite3_module *pModule,  /* The definition of the module */
+  void *pAux,                     /* Context pointer for xCreate/xConnect */
+  void (*xDestroy)(void *)        /* Module destructor function */
+){
+  int rc = SQLITE_OK;
+  int nName;
+
+  sqlite3_mutex_enter(db->mutex);
+  nName = sqlite3Strlen30(zName);
+  if( sqlite3HashFind(&db->aModule, zName) ){
+    rc = SQLITE_MISUSE_BKPT;
+  }else{
+    Module *pMod;
+    pMod = (Module *)sqlite3DbMallocRaw(db, sizeof(Module) + nName + 1);
+    if( pMod ){
+      Module *pDel;
+      char *zCopy = (char *)(&pMod[1]);
+      memcpy(zCopy, zName, nName+1);
+      pMod->zName = zCopy;
+      pMod->pModule = pModule;
+      pMod->pAux = pAux;
+      pMod->xDestroy = xDestroy;
+      pDel = (Module *)sqlite3HashInsert(&db->aModule,zCopy,(void*)pMod);
+      assert( pDel==0 || pDel==pMod );
+      if( pDel ){
+        db->mallocFailed = 1;
+        sqlite3DbFree(db, pDel);
+      }
+    }
+  }
+  rc = sqlite3ApiExit(db, rc);
+  if( rc!=SQLITE_OK && xDestroy ) xDestroy(pAux);
+
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+
+/*
+** External API function used to create a new virtual-table module.
+*/
+SQLITE_API int sqlite3_create_module(
+  sqlite3 *db,                    /* Database in which module is registered */
+  const char *zName,              /* Name assigned to this module */
+  const sqlite3_module *pModule,  /* The definition of the module */
+  void *pAux                      /* Context pointer for xCreate/xConnect */
+){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  return createModule(db, zName, pModule, pAux, 0);
+}
+
+/*
+** External API function used to create a new virtual-table module.
+*/
+SQLITE_API int sqlite3_create_module_v2(
+  sqlite3 *db,                    /* Database in which module is registered */
+  const char *zName,              /* Name assigned to this module */
+  const sqlite3_module *pModule,  /* The definition of the module */
+  void *pAux,                     /* Context pointer for xCreate/xConnect */
+  void (*xDestroy)(void *)        /* Module destructor function */
+){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  return createModule(db, zName, pModule, pAux, xDestroy);
+}
+
+/*
+** Lock the virtual table so that it cannot be disconnected.
+** Locks nest.  Every lock should have a corresponding unlock.
+** If an unlock is omitted, resources leaks will occur.  
+**
+** If a disconnect is attempted while a virtual table is locked,
+** the disconnect is deferred until all locks have been removed.
+*/
+SQLITE_PRIVATE void sqlite3VtabLock(VTable *pVTab){
+  pVTab->nRef++;
+}
+
+
+/*
+** pTab is a pointer to a Table structure representing a virtual-table.
+** Return a pointer to the VTable object used by connection db to access 
+** this virtual-table, if one has been created, or NULL otherwise.
+*/
+SQLITE_PRIVATE VTable *sqlite3GetVTable(sqlite3 *db, Table *pTab){
+  VTable *pVtab;
+  assert( IsVirtual(pTab) );
+  for(pVtab=pTab->pVTable; pVtab && pVtab->db!=db; pVtab=pVtab->pNext);
+  return pVtab;
+}
+
+/*
+** Decrement the ref-count on a virtual table object. When the ref-count
+** reaches zero, call the xDisconnect() method to delete the object.
+*/
+SQLITE_PRIVATE void sqlite3VtabUnlock(VTable *pVTab){
+  sqlite3 *db = pVTab->db;
+
+  assert( db );
+  assert( pVTab->nRef>0 );
+  assert( db->magic==SQLITE_MAGIC_OPEN || db->magic==SQLITE_MAGIC_ZOMBIE );
+
+  pVTab->nRef--;
+  if( pVTab->nRef==0 ){
+    sqlite3_vtab *p = pVTab->pVtab;
+    if( p ){
+      p->pModule->xDisconnect(p);
+    }
+    sqlite3DbFree(db, pVTab);
+  }
+}
+
+/*
+** Table p is a virtual table. This function moves all elements in the
+** p->pVTable list to the sqlite3.pDisconnect lists of their associated
+** database connections to be disconnected at the next opportunity. 
+** Except, if argument db is not NULL, then the entry associated with
+** connection db is left in the p->pVTable list.
+*/
+static VTable *vtabDisconnectAll(sqlite3 *db, Table *p){
+  VTable *pRet = 0;
+  VTable *pVTable = p->pVTable;
+  p->pVTable = 0;
+
+  /* Assert that the mutex (if any) associated with the BtShared database 
+  ** that contains table p is held by the caller. See header comments 
+  ** above function sqlite3VtabUnlockList() for an explanation of why
+  ** this makes it safe to access the sqlite3.pDisconnect list of any
+  ** database connection that may have an entry in the p->pVTable list.
+  */
+  assert( db==0 || sqlite3SchemaMutexHeld(db, 0, p->pSchema) );
+
+  while( pVTable ){
+    sqlite3 *db2 = pVTable->db;
+    VTable *pNext = pVTable->pNext;
+    assert( db2 );
+    if( db2==db ){
+      pRet = pVTable;
+      p->pVTable = pRet;
+      pRet->pNext = 0;
+    }else{
+      pVTable->pNext = db2->pDisconnect;
+      db2->pDisconnect = pVTable;
+    }
+    pVTable = pNext;
+  }
+
+  assert( !db || pRet );
+  return pRet;
+}
+
+/*
+** Table *p is a virtual table. This function removes the VTable object
+** for table *p associated with database connection db from the linked
+** list in p->pVTab. It also decrements the VTable ref count. This is
+** used when closing database connection db to free all of its VTable
+** objects without disturbing the rest of the Schema object (which may
+** be being used by other shared-cache connections).
+*/
+SQLITE_PRIVATE void sqlite3VtabDisconnect(sqlite3 *db, Table *p){
+  VTable **ppVTab;
+
+  assert( IsVirtual(p) );
+  assert( sqlite3BtreeHoldsAllMutexes(db) );
+  assert( sqlite3_mutex_held(db->mutex) );
+
+  for(ppVTab=&p->pVTable; *ppVTab; ppVTab=&(*ppVTab)->pNext){
+    if( (*ppVTab)->db==db  ){
+      VTable *pVTab = *ppVTab;
+      *ppVTab = pVTab->pNext;
+      sqlite3VtabUnlock(pVTab);
+      break;
+    }
+  }
+}
+
+
+/*
+** Disconnect all the virtual table objects in the sqlite3.pDisconnect list.
+**
+** This function may only be called when the mutexes associated with all
+** shared b-tree databases opened using connection db are held by the 
+** caller. This is done to protect the sqlite3.pDisconnect list. The
+** sqlite3.pDisconnect list is accessed only as follows:
+**
+**   1) By this function. In this case, all BtShared mutexes and the mutex
+**      associated with the database handle itself must be held.
+**
+**   2) By function vtabDisconnectAll(), when it adds a VTable entry to
+**      the sqlite3.pDisconnect list. In this case either the BtShared mutex
+**      associated with the database the virtual table is stored in is held
+**      or, if the virtual table is stored in a non-sharable database, then
+**      the database handle mutex is held.
+**
+** As a result, a sqlite3.pDisconnect cannot be accessed simultaneously 
+** by multiple threads. It is thread-safe.
+*/
+SQLITE_PRIVATE void sqlite3VtabUnlockList(sqlite3 *db){
+  VTable *p = db->pDisconnect;
+  db->pDisconnect = 0;
+
+  assert( sqlite3BtreeHoldsAllMutexes(db) );
+  assert( sqlite3_mutex_held(db->mutex) );
+
+  if( p ){
+    sqlite3ExpirePreparedStatements(db);
+    do {
+      VTable *pNext = p->pNext;
+      sqlite3VtabUnlock(p);
+      p = pNext;
+    }while( p );
+  }
+}
+
+/*
+** Clear any and all virtual-table information from the Table record.
+** This routine is called, for example, just before deleting the Table
+** record.
+**
+** Since it is a virtual-table, the Table structure contains a pointer
+** to the head of a linked list of VTable structures. Each VTable 
+** structure is associated with a single sqlite3* user of the schema.
+** The reference count of the VTable structure associated with database 
+** connection db is decremented immediately (which may lead to the 
+** structure being xDisconnected and free). Any other VTable structures
+** in the list are moved to the sqlite3.pDisconnect list of the associated 
+** database connection.
+*/
+SQLITE_PRIVATE void sqlite3VtabClear(sqlite3 *db, Table *p){
+  if( !db || db->pnBytesFreed==0 ) vtabDisconnectAll(0, p);
+  if( p->azModuleArg ){
+    int i;
+    for(i=0; i<p->nModuleArg; i++){
+      if( i!=1 ) sqlite3DbFree(db, p->azModuleArg[i]);
+    }
+    sqlite3DbFree(db, p->azModuleArg);
+  }
+}
+
+/*
+** Add a new module argument to pTable->azModuleArg[].
+** The string is not copied - the pointer is stored.  The
+** string will be freed automatically when the table is
+** deleted.
+*/
+static void addModuleArgument(sqlite3 *db, Table *pTable, char *zArg){
+  int i = pTable->nModuleArg++;
+  int nBytes = sizeof(char *)*(1+pTable->nModuleArg);
+  char **azModuleArg;
+  azModuleArg = sqlite3DbRealloc(db, pTable->azModuleArg, nBytes);
+  if( azModuleArg==0 ){
+    int j;
+    for(j=0; j<i; j++){
+      sqlite3DbFree(db, pTable->azModuleArg[j]);
+    }
+    sqlite3DbFree(db, zArg);
+    sqlite3DbFree(db, pTable->azModuleArg);
+    pTable->nModuleArg = 0;
+  }else{
+    azModuleArg[i] = zArg;
+    azModuleArg[i+1] = 0;
+  }
+  pTable->azModuleArg = azModuleArg;
+}
+
+/*
+** The parser calls this routine when it first sees a CREATE VIRTUAL TABLE
+** statement.  The module name has been parsed, but the optional list
+** of parameters that follow the module name are still pending.
+*/
+SQLITE_PRIVATE void sqlite3VtabBeginParse(
+  Parse *pParse,        /* Parsing context */
+  Token *pName1,        /* Name of new table, or database name */
+  Token *pName2,        /* Name of new table or NULL */
+  Token *pModuleName,   /* Name of the module for the virtual table */
+  int ifNotExists       /* No error if the table already exists */
+){
+  int iDb;              /* The database the table is being created in */
+  Table *pTable;        /* The new virtual table */
+  sqlite3 *db;          /* Database connection */
+
+  sqlite3StartTable(pParse, pName1, pName2, 0, 0, 1, ifNotExists);
+  pTable = pParse->pNewTable;
+  if( pTable==0 ) return;
+  assert( 0==pTable->pIndex );
+
+  db = pParse->db;
+  iDb = sqlite3SchemaToIndex(db, pTable->pSchema);
+  assert( iDb>=0 );
+
+  pTable->tabFlags |= TF_Virtual;
+  pTable->nModuleArg = 0;
+  addModuleArgument(db, pTable, sqlite3NameFromToken(db, pModuleName));
+  addModuleArgument(db, pTable, 0);
+  addModuleArgument(db, pTable, sqlite3DbStrDup(db, pTable->zName));
+  assert( (pParse->sNameToken.z==pName2->z && pName2->z!=0)
+       || (pParse->sNameToken.z==pName1->z && pName2->z==0)
+  );
+  pParse->sNameToken.n = (int)(
+      &pModuleName->z[pModuleName->n] - pParse->sNameToken.z
+  );
+
+#ifndef SQLITE_OMIT_AUTHORIZATION
+  /* Creating a virtual table invokes the authorization callback twice.
+  ** The first invocation, to obtain permission to INSERT a row into the
+  ** sqlite_master table, has already been made by sqlite3StartTable().
+  ** The second call, to obtain permission to create the table, is made now.
+  */
+  if( pTable->azModuleArg ){
+    sqlite3AuthCheck(pParse, SQLITE_CREATE_VTABLE, pTable->zName, 
+            pTable->azModuleArg[0], pParse->db->aDb[iDb].zName);
+  }
+#endif
+}
+
+/*
+** This routine takes the module argument that has been accumulating
+** in pParse->zArg[] and appends it to the list of arguments on the
+** virtual table currently under construction in pParse->pTable.
+*/
+static void addArgumentToVtab(Parse *pParse){
+  if( pParse->sArg.z && pParse->pNewTable ){
+    const char *z = (const char*)pParse->sArg.z;
+    int n = pParse->sArg.n;
+    sqlite3 *db = pParse->db;
+    addModuleArgument(db, pParse->pNewTable, sqlite3DbStrNDup(db, z, n));
+  }
+}
+
+/*
+** The parser calls this routine after the CREATE VIRTUAL TABLE statement
+** has been completely parsed.
+*/
+SQLITE_PRIVATE void sqlite3VtabFinishParse(Parse *pParse, Token *pEnd){
+  Table *pTab = pParse->pNewTable;  /* The table being constructed */
+  sqlite3 *db = pParse->db;         /* The database connection */
+
+  if( pTab==0 ) return;
+  addArgumentToVtab(pParse);
+  pParse->sArg.z = 0;
+  if( pTab->nModuleArg<1 ) return;
+  
+  /* If the CREATE VIRTUAL TABLE statement is being entered for the
+  ** first time (in other words if the virtual table is actually being
+  ** created now instead of just being read out of sqlite_master) then
+  ** do additional initialization work and store the statement text
+  ** in the sqlite_master table.
+  */
+  if( !db->init.busy ){
+    char *zStmt;
+    char *zWhere;
+    int iDb;
+    Vdbe *v;
+
+    /* Compute the complete text of the CREATE VIRTUAL TABLE statement */
+    if( pEnd ){
+      pParse->sNameToken.n = (int)(pEnd->z - pParse->sNameToken.z) + pEnd->n;
+    }
+    zStmt = sqlite3MPrintf(db, "CREATE VIRTUAL TABLE %T", &pParse->sNameToken);
+
+    /* A slot for the record has already been allocated in the 
+    ** SQLITE_MASTER table.  We just need to update that slot with all
+    ** the information we've collected.  
+    **
+    ** The VM register number pParse->regRowid holds the rowid of an
+    ** entry in the sqlite_master table tht was created for this vtab
+    ** by sqlite3StartTable().
+    */
+    iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+    sqlite3NestedParse(pParse,
+      "UPDATE %Q.%s "
+         "SET type='table', name=%Q, tbl_name=%Q, rootpage=0, sql=%Q "
+       "WHERE rowid=#%d",
+      db->aDb[iDb].zName, SCHEMA_TABLE(iDb),
+      pTab->zName,
+      pTab->zName,
+      zStmt,
+      pParse->regRowid
+    );
+    sqlite3DbFree(db, zStmt);
+    v = sqlite3GetVdbe(pParse);
+    sqlite3ChangeCookie(pParse, iDb);
+
+    sqlite3VdbeAddOp2(v, OP_Expire, 0, 0);
+    zWhere = sqlite3MPrintf(db, "name='%q' AND type='table'", pTab->zName);
+    sqlite3VdbeAddParseSchemaOp(v, iDb, zWhere);
+    sqlite3VdbeAddOp4(v, OP_VCreate, iDb, 0, 0, 
+                         pTab->zName, sqlite3Strlen30(pTab->zName) + 1);
+  }
+
+  /* If we are rereading the sqlite_master table create the in-memory
+  ** record of the table. The xConnect() method is not called until
+  ** the first time the virtual table is used in an SQL statement. This
+  ** allows a schema that contains virtual tables to be loaded before
+  ** the required virtual table implementations are registered.  */
+  else {
+    Table *pOld;
+    Schema *pSchema = pTab->pSchema;
+    const char *zName = pTab->zName;
+    assert( sqlite3SchemaMutexHeld(db, 0, pSchema) );
+    pOld = sqlite3HashInsert(&pSchema->tblHash, zName, pTab);
+    if( pOld ){
+      db->mallocFailed = 1;
+      assert( pTab==pOld );  /* Malloc must have failed inside HashInsert() */
+      return;
+    }
+    pParse->pNewTable = 0;
+  }
+}
+
+/*
+** The parser calls this routine when it sees the first token
+** of an argument to the module name in a CREATE VIRTUAL TABLE statement.
+*/
+SQLITE_PRIVATE void sqlite3VtabArgInit(Parse *pParse){
+  addArgumentToVtab(pParse);
+  pParse->sArg.z = 0;
+  pParse->sArg.n = 0;
+}
+
+/*
+** The parser calls this routine for each token after the first token
+** in an argument to the module name in a CREATE VIRTUAL TABLE statement.
+*/
+SQLITE_PRIVATE void sqlite3VtabArgExtend(Parse *pParse, Token *p){
+  Token *pArg = &pParse->sArg;
+  if( pArg->z==0 ){
+    pArg->z = p->z;
+    pArg->n = p->n;
+  }else{
+    assert(pArg->z < p->z);
+    pArg->n = (int)(&p->z[p->n] - pArg->z);
+  }
+}
+
+/*
+** Invoke a virtual table constructor (either xCreate or xConnect). The
+** pointer to the function to invoke is passed as the fourth parameter
+** to this procedure.
+*/
+static int vtabCallConstructor(
+  sqlite3 *db, 
+  Table *pTab,
+  Module *pMod,
+  int (*xConstruct)(sqlite3*,void*,int,const char*const*,sqlite3_vtab**,char**),
+  char **pzErr
+){
+  VtabCtx sCtx, *pPriorCtx;
+  VTable *pVTable;
+  int rc;
+  const char *const*azArg = (const char *const*)pTab->azModuleArg;
+  int nArg = pTab->nModuleArg;
+  char *zErr = 0;
+  char *zModuleName = sqlite3MPrintf(db, "%s", pTab->zName);
+  int iDb;
+
+  if( !zModuleName ){
+    return SQLITE_NOMEM;
+  }
+
+  pVTable = sqlite3DbMallocZero(db, sizeof(VTable));
+  if( !pVTable ){
+    sqlite3DbFree(db, zModuleName);
+    return SQLITE_NOMEM;
+  }
+  pVTable->db = db;
+  pVTable->pMod = pMod;
+
+  iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+  pTab->azModuleArg[1] = db->aDb[iDb].zName;
+
+  /* Invoke the virtual table constructor */
+  assert( &db->pVtabCtx );
+  assert( xConstruct );
+  sCtx.pTab = pTab;
+  sCtx.pVTable = pVTable;
+  pPriorCtx = db->pVtabCtx;
+  db->pVtabCtx = &sCtx;
+  rc = xConstruct(db, pMod->pAux, nArg, azArg, &pVTable->pVtab, &zErr);
+  db->pVtabCtx = pPriorCtx;
+  if( rc==SQLITE_NOMEM ) db->mallocFailed = 1;
+
+  if( SQLITE_OK!=rc ){
+    if( zErr==0 ){
+      *pzErr = sqlite3MPrintf(db, "vtable constructor failed: %s", zModuleName);
+    }else {
+      *pzErr = sqlite3MPrintf(db, "%s", zErr);
+      sqlite3_free(zErr);
+    }
+    sqlite3DbFree(db, pVTable);
+  }else if( ALWAYS(pVTable->pVtab) ){
+    /* Justification of ALWAYS():  A correct vtab constructor must allocate
+    ** the sqlite3_vtab object if successful.  */
+    memset(pVTable->pVtab, 0, sizeof(pVTable->pVtab[0]));
+    pVTable->pVtab->pModule = pMod->pModule;
+    pVTable->nRef = 1;
+    if( sCtx.pTab ){
+      const char *zFormat = "vtable constructor did not declare schema: %s";
+      *pzErr = sqlite3MPrintf(db, zFormat, pTab->zName);
+      sqlite3VtabUnlock(pVTable);
+      rc = SQLITE_ERROR;
+    }else{
+      int iCol;
+      /* If everything went according to plan, link the new VTable structure
+      ** into the linked list headed by pTab->pVTable. Then loop through the 
+      ** columns of the table to see if any of them contain the token "hidden".
+      ** If so, set the Column COLFLAG_HIDDEN flag and remove the token from
+      ** the type string.  */
+      pVTable->pNext = pTab->pVTable;
+      pTab->pVTable = pVTable;
+
+      for(iCol=0; iCol<pTab->nCol; iCol++){
+        char *zType = pTab->aCol[iCol].zType;
+        int nType;
+        int i = 0;
+        if( !zType ) continue;
+        nType = sqlite3Strlen30(zType);
+        if( sqlite3StrNICmp("hidden", zType, 6)||(zType[6] && zType[6]!=' ') ){
+          for(i=0; i<nType; i++){
+            if( (0==sqlite3StrNICmp(" hidden", &zType[i], 7))
+             && (zType[i+7]=='\0' || zType[i+7]==' ')
+            ){
+              i++;
+              break;
+            }
+          }
+        }
+        if( i<nType ){
+          int j;
+          int nDel = 6 + (zType[i+6] ? 1 : 0);
+          for(j=i; (j+nDel)<=nType; j++){
+            zType[j] = zType[j+nDel];
+          }
+          if( zType[i]=='\0' && i>0 ){
+            assert(zType[i-1]==' ');
+            zType[i-1] = '\0';
+          }
+          pTab->aCol[iCol].colFlags |= COLFLAG_HIDDEN;
+        }
+      }
+    }
+  }
+
+  sqlite3DbFree(db, zModuleName);
+  return rc;
+}
+
+/*
+** This function is invoked by the parser to call the xConnect() method
+** of the virtual table pTab. If an error occurs, an error code is returned 
+** and an error left in pParse.
+**
+** This call is a no-op if table pTab is not a virtual table.
+*/
+SQLITE_PRIVATE int sqlite3VtabCallConnect(Parse *pParse, Table *pTab){
+  sqlite3 *db = pParse->db;
+  const char *zMod;
+  Module *pMod;
+  int rc;
+
+  assert( pTab );
+  if( (pTab->tabFlags & TF_Virtual)==0 || sqlite3GetVTable(db, pTab) ){
+    return SQLITE_OK;
+  }
+
+  /* Locate the required virtual table module */
+  zMod = pTab->azModuleArg[0];
+  pMod = (Module*)sqlite3HashFind(&db->aModule, zMod);
+
+  if( !pMod ){
+    const char *zModule = pTab->azModuleArg[0];
+    sqlite3ErrorMsg(pParse, "no such module: %s", zModule);
+    rc = SQLITE_ERROR;
+  }else{
+    char *zErr = 0;
+    rc = vtabCallConstructor(db, pTab, pMod, pMod->pModule->xConnect, &zErr);
+    if( rc!=SQLITE_OK ){
+      sqlite3ErrorMsg(pParse, "%s", zErr);
+    }
+    sqlite3DbFree(db, zErr);
+  }
+
+  return rc;
+}
+/*
+** Grow the db->aVTrans[] array so that there is room for at least one
+** more v-table. Return SQLITE_NOMEM if a malloc fails, or SQLITE_OK otherwise.
+*/
+static int growVTrans(sqlite3 *db){
+  const int ARRAY_INCR = 5;
+
+  /* Grow the sqlite3.aVTrans array if required */
+  if( (db->nVTrans%ARRAY_INCR)==0 ){
+    VTable **aVTrans;
+    int nBytes = sizeof(sqlite3_vtab *) * (db->nVTrans + ARRAY_INCR);
+    aVTrans = sqlite3DbRealloc(db, (void *)db->aVTrans, nBytes);
+    if( !aVTrans ){
+      return SQLITE_NOMEM;
+    }
+    memset(&aVTrans[db->nVTrans], 0, sizeof(sqlite3_vtab *)*ARRAY_INCR);
+    db->aVTrans = aVTrans;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Add the virtual table pVTab to the array sqlite3.aVTrans[]. Space should
+** have already been reserved using growVTrans().
+*/
+static void addToVTrans(sqlite3 *db, VTable *pVTab){
+  /* Add pVtab to the end of sqlite3.aVTrans */
+  db->aVTrans[db->nVTrans++] = pVTab;
+  sqlite3VtabLock(pVTab);
+}
+
+/*
+** This function is invoked by the vdbe to call the xCreate method
+** of the virtual table named zTab in database iDb. 
+**
+** If an error occurs, *pzErr is set to point an an English language
+** description of the error and an SQLITE_XXX error code is returned.
+** In this case the caller must call sqlite3DbFree(db, ) on *pzErr.
+*/
+SQLITE_PRIVATE int sqlite3VtabCallCreate(sqlite3 *db, int iDb, const char *zTab, char **pzErr){
+  int rc = SQLITE_OK;
+  Table *pTab;
+  Module *pMod;
+  const char *zMod;
+
+  pTab = sqlite3FindTable(db, zTab, db->aDb[iDb].zName);
+  assert( pTab && (pTab->tabFlags & TF_Virtual)!=0 && !pTab->pVTable );
+
+  /* Locate the required virtual table module */
+  zMod = pTab->azModuleArg[0];
+  pMod = (Module*)sqlite3HashFind(&db->aModule, zMod);
+
+  /* If the module has been registered and includes a Create method, 
+  ** invoke it now. If the module has not been registered, return an 
+  ** error. Otherwise, do nothing.
+  */
+  if( !pMod ){
+    *pzErr = sqlite3MPrintf(db, "no such module: %s", zMod);
+    rc = SQLITE_ERROR;
+  }else{
+    rc = vtabCallConstructor(db, pTab, pMod, pMod->pModule->xCreate, pzErr);
+  }
+
+  /* Justification of ALWAYS():  The xConstructor method is required to
+  ** create a valid sqlite3_vtab if it returns SQLITE_OK. */
+  if( rc==SQLITE_OK && ALWAYS(sqlite3GetVTable(db, pTab)) ){
+    rc = growVTrans(db);
+    if( rc==SQLITE_OK ){
+      addToVTrans(db, sqlite3GetVTable(db, pTab));
+    }
+  }
+
+  return rc;
+}
+
+/*
+** This function is used to set the schema of a virtual table.  It is only
+** valid to call this function from within the xCreate() or xConnect() of a
+** virtual table module.
+*/
+SQLITE_API int sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){
+  Parse *pParse;
+
+  int rc = SQLITE_OK;
+  Table *pTab;
+  char *zErr = 0;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  if( !db->pVtabCtx || !(pTab = db->pVtabCtx->pTab) ){
+    sqlite3Error(db, SQLITE_MISUSE);
+    sqlite3_mutex_leave(db->mutex);
+    return SQLITE_MISUSE_BKPT;
+  }
+  assert( (pTab->tabFlags & TF_Virtual)!=0 );
+
+  pParse = sqlite3StackAllocZero(db, sizeof(*pParse));
+  if( pParse==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    pParse->declareVtab = 1;
+    pParse->db = db;
+    pParse->nQueryLoop = 1;
+  
+    if( SQLITE_OK==sqlite3RunParser(pParse, zCreateTable, &zErr) 
+     && pParse->pNewTable
+     && !db->mallocFailed
+     && !pParse->pNewTable->pSelect
+     && (pParse->pNewTable->tabFlags & TF_Virtual)==0
+    ){
+      if( !pTab->aCol ){
+        pTab->aCol = pParse->pNewTable->aCol;
+        pTab->nCol = pParse->pNewTable->nCol;
+        pParse->pNewTable->nCol = 0;
+        pParse->pNewTable->aCol = 0;
+      }
+      db->pVtabCtx->pTab = 0;
+    }else{
+      sqlite3ErrorWithMsg(db, SQLITE_ERROR, (zErr ? "%s" : 0), zErr);
+      sqlite3DbFree(db, zErr);
+      rc = SQLITE_ERROR;
+    }
+    pParse->declareVtab = 0;
+  
+    if( pParse->pVdbe ){
+      sqlite3VdbeFinalize(pParse->pVdbe);
+    }
+    sqlite3DeleteTable(db, pParse->pNewTable);
+    sqlite3ParserReset(pParse);
+    sqlite3StackFree(db, pParse);
+  }
+
+  assert( (rc&0xff)==rc );
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/*
+** This function is invoked by the vdbe to call the xDestroy method
+** of the virtual table named zTab in database iDb. This occurs
+** when a DROP TABLE is mentioned.
+**
+** This call is a no-op if zTab is not a virtual table.
+*/
+SQLITE_PRIVATE int sqlite3VtabCallDestroy(sqlite3 *db, int iDb, const char *zTab){
+  int rc = SQLITE_OK;
+  Table *pTab;
+
+  pTab = sqlite3FindTable(db, zTab, db->aDb[iDb].zName);
+  if( ALWAYS(pTab!=0 && pTab->pVTable!=0) ){
+    VTable *p = vtabDisconnectAll(db, pTab);
+
+    assert( rc==SQLITE_OK );
+    rc = p->pMod->pModule->xDestroy(p->pVtab);
+
+    /* Remove the sqlite3_vtab* from the aVTrans[] array, if applicable */
+    if( rc==SQLITE_OK ){
+      assert( pTab->pVTable==p && p->pNext==0 );
+      p->pVtab = 0;
+      pTab->pVTable = 0;
+      sqlite3VtabUnlock(p);
+    }
+  }
+
+  return rc;
+}
+
+/*
+** This function invokes either the xRollback or xCommit method
+** of each of the virtual tables in the sqlite3.aVTrans array. The method
+** called is identified by the second argument, "offset", which is
+** the offset of the method to call in the sqlite3_module structure.
+**
+** The array is cleared after invoking the callbacks. 
+*/
+static void callFinaliser(sqlite3 *db, int offset){
+  int i;
+  if( db->aVTrans ){
+    for(i=0; i<db->nVTrans; i++){
+      VTable *pVTab = db->aVTrans[i];
+      sqlite3_vtab *p = pVTab->pVtab;
+      if( p ){
+        int (*x)(sqlite3_vtab *);
+        x = *(int (**)(sqlite3_vtab *))((char *)p->pModule + offset);
+        if( x ) x(p);
+      }
+      pVTab->iSavepoint = 0;
+      sqlite3VtabUnlock(pVTab);
+    }
+    sqlite3DbFree(db, db->aVTrans);
+    db->nVTrans = 0;
+    db->aVTrans = 0;
+  }
+}
+
+/*
+** Invoke the xSync method of all virtual tables in the sqlite3.aVTrans
+** array. Return the error code for the first error that occurs, or
+** SQLITE_OK if all xSync operations are successful.
+**
+** If an error message is available, leave it in p->zErrMsg.
+*/
+SQLITE_PRIVATE int sqlite3VtabSync(sqlite3 *db, Vdbe *p){
+  int i;
+  int rc = SQLITE_OK;
+  VTable **aVTrans = db->aVTrans;
+
+  db->aVTrans = 0;
+  for(i=0; rc==SQLITE_OK && i<db->nVTrans; i++){
+    int (*x)(sqlite3_vtab *);
+    sqlite3_vtab *pVtab = aVTrans[i]->pVtab;
+    if( pVtab && (x = pVtab->pModule->xSync)!=0 ){
+      rc = x(pVtab);
+      sqlite3VtabImportErrmsg(p, pVtab);
+    }
+  }
+  db->aVTrans = aVTrans;
+  return rc;
+}
+
+/*
+** Invoke the xRollback method of all virtual tables in the 
+** sqlite3.aVTrans array. Then clear the array itself.
+*/
+SQLITE_PRIVATE int sqlite3VtabRollback(sqlite3 *db){
+  callFinaliser(db, offsetof(sqlite3_module,xRollback));
+  return SQLITE_OK;
+}
+
+/*
+** Invoke the xCommit method of all virtual tables in the 
+** sqlite3.aVTrans array. Then clear the array itself.
+*/
+SQLITE_PRIVATE int sqlite3VtabCommit(sqlite3 *db){
+  callFinaliser(db, offsetof(sqlite3_module,xCommit));
+  return SQLITE_OK;
+}
+
+/*
+** If the virtual table pVtab supports the transaction interface
+** (xBegin/xRollback/xCommit and optionally xSync) and a transaction is
+** not currently open, invoke the xBegin method now.
+**
+** If the xBegin call is successful, place the sqlite3_vtab pointer
+** in the sqlite3.aVTrans array.
+*/
+SQLITE_PRIVATE int sqlite3VtabBegin(sqlite3 *db, VTable *pVTab){
+  int rc = SQLITE_OK;
+  const sqlite3_module *pModule;
+
+  /* Special case: If db->aVTrans is NULL and db->nVTrans is greater
+  ** than zero, then this function is being called from within a
+  ** virtual module xSync() callback. It is illegal to write to 
+  ** virtual module tables in this case, so return SQLITE_LOCKED.
+  */
+  if( sqlite3VtabInSync(db) ){
+    return SQLITE_LOCKED;
+  }
+  if( !pVTab ){
+    return SQLITE_OK;
+  } 
+  pModule = pVTab->pVtab->pModule;
+
+  if( pModule->xBegin ){
+    int i;
+
+    /* If pVtab is already in the aVTrans array, return early */
+    for(i=0; i<db->nVTrans; i++){
+      if( db->aVTrans[i]==pVTab ){
+        return SQLITE_OK;
+      }
+    }
+
+    /* Invoke the xBegin method. If successful, add the vtab to the 
+    ** sqlite3.aVTrans[] array. */
+    rc = growVTrans(db);
+    if( rc==SQLITE_OK ){
+      rc = pModule->xBegin(pVTab->pVtab);
+      if( rc==SQLITE_OK ){
+        addToVTrans(db, pVTab);
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** Invoke either the xSavepoint, xRollbackTo or xRelease method of all
+** virtual tables that currently have an open transaction. Pass iSavepoint
+** as the second argument to the virtual table method invoked.
+**
+** If op is SAVEPOINT_BEGIN, the xSavepoint method is invoked. If it is
+** SAVEPOINT_ROLLBACK, the xRollbackTo method. Otherwise, if op is 
+** SAVEPOINT_RELEASE, then the xRelease method of each virtual table with
+** an open transaction is invoked.
+**
+** If any virtual table method returns an error code other than SQLITE_OK, 
+** processing is abandoned and the error returned to the caller of this
+** function immediately. If all calls to virtual table methods are successful,
+** SQLITE_OK is returned.
+*/
+SQLITE_PRIVATE int sqlite3VtabSavepoint(sqlite3 *db, int op, int iSavepoint){
+  int rc = SQLITE_OK;
+
+  assert( op==SAVEPOINT_RELEASE||op==SAVEPOINT_ROLLBACK||op==SAVEPOINT_BEGIN );
+  assert( iSavepoint>=0 );
+  if( db->aVTrans ){
+    int i;
+    for(i=0; rc==SQLITE_OK && i<db->nVTrans; i++){
+      VTable *pVTab = db->aVTrans[i];
+      const sqlite3_module *pMod = pVTab->pMod->pModule;
+      if( pVTab->pVtab && pMod->iVersion>=2 ){
+        int (*xMethod)(sqlite3_vtab *, int);
+        switch( op ){
+          case SAVEPOINT_BEGIN:
+            xMethod = pMod->xSavepoint;
+            pVTab->iSavepoint = iSavepoint+1;
+            break;
+          case SAVEPOINT_ROLLBACK:
+            xMethod = pMod->xRollbackTo;
+            break;
+          default:
+            xMethod = pMod->xRelease;
+            break;
+        }
+        if( xMethod && pVTab->iSavepoint>iSavepoint ){
+          rc = xMethod(pVTab->pVtab, iSavepoint);
+        }
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** The first parameter (pDef) is a function implementation.  The
+** second parameter (pExpr) is the first argument to this function.
+** If pExpr is a column in a virtual table, then let the virtual
+** table implementation have an opportunity to overload the function.
+**
+** This routine is used to allow virtual table implementations to
+** overload MATCH, LIKE, GLOB, and REGEXP operators.
+**
+** Return either the pDef argument (indicating no change) or a 
+** new FuncDef structure that is marked as ephemeral using the
+** SQLITE_FUNC_EPHEM flag.
+*/
+SQLITE_PRIVATE FuncDef *sqlite3VtabOverloadFunction(
+  sqlite3 *db,    /* Database connection for reporting malloc problems */
+  FuncDef *pDef,  /* Function to possibly overload */
+  int nArg,       /* Number of arguments to the function */
+  Expr *pExpr     /* First argument to the function */
+){
+  Table *pTab;
+  sqlite3_vtab *pVtab;
+  sqlite3_module *pMod;
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**) = 0;
+  void *pArg = 0;
+  FuncDef *pNew;
+  int rc = 0;
+  char *zLowerName;
+  unsigned char *z;
+
+
+  /* Check to see the left operand is a column in a virtual table */
+  if( NEVER(pExpr==0) ) return pDef;
+  if( pExpr->op!=TK_COLUMN ) return pDef;
+  pTab = pExpr->pTab;
+  if( NEVER(pTab==0) ) return pDef;
+  if( (pTab->tabFlags & TF_Virtual)==0 ) return pDef;
+  pVtab = sqlite3GetVTable(db, pTab)->pVtab;
+  assert( pVtab!=0 );
+  assert( pVtab->pModule!=0 );
+  pMod = (sqlite3_module *)pVtab->pModule;
+  if( pMod->xFindFunction==0 ) return pDef;
+ 
+  /* Call the xFindFunction method on the virtual table implementation
+  ** to see if the implementation wants to overload this function 
+  */
+  zLowerName = sqlite3DbStrDup(db, pDef->zName);
+  if( zLowerName ){
+    for(z=(unsigned char*)zLowerName; *z; z++){
+      *z = sqlite3UpperToLower[*z];
+    }
+    rc = pMod->xFindFunction(pVtab, nArg, zLowerName, &xFunc, &pArg);
+    sqlite3DbFree(db, zLowerName);
+  }
+  if( rc==0 ){
+    return pDef;
+  }
+
+  /* Create a new ephemeral function definition for the overloaded
+  ** function */
+  pNew = sqlite3DbMallocZero(db, sizeof(*pNew)
+                             + sqlite3Strlen30(pDef->zName) + 1);
+  if( pNew==0 ){
+    return pDef;
+  }
+  *pNew = *pDef;
+  pNew->zName = (char *)&pNew[1];
+  memcpy(pNew->zName, pDef->zName, sqlite3Strlen30(pDef->zName)+1);
+  pNew->xFunc = xFunc;
+  pNew->pUserData = pArg;
+  pNew->funcFlags |= SQLITE_FUNC_EPHEM;
+  return pNew;
+}
+
+/*
+** Make sure virtual table pTab is contained in the pParse->apVirtualLock[]
+** array so that an OP_VBegin will get generated for it.  Add pTab to the
+** array if it is missing.  If pTab is already in the array, this routine
+** is a no-op.
+*/
+SQLITE_PRIVATE void sqlite3VtabMakeWritable(Parse *pParse, Table *pTab){
+  Parse *pToplevel = sqlite3ParseToplevel(pParse);
+  int i, n;
+  Table **apVtabLock;
+
+  assert( IsVirtual(pTab) );
+  for(i=0; i<pToplevel->nVtabLock; i++){
+    if( pTab==pToplevel->apVtabLock[i] ) return;
+  }
+  n = (pToplevel->nVtabLock+1)*sizeof(pToplevel->apVtabLock[0]);
+  apVtabLock = sqlite3_realloc(pToplevel->apVtabLock, n);
+  if( apVtabLock ){
+    pToplevel->apVtabLock = apVtabLock;
+    pToplevel->apVtabLock[pToplevel->nVtabLock++] = pTab;
+  }else{
+    pToplevel->db->mallocFailed = 1;
+  }
+}
+
+/*
+** Return the ON CONFLICT resolution mode in effect for the virtual
+** table update operation currently in progress.
+**
+** The results of this routine are undefined unless it is called from
+** within an xUpdate method.
+*/
+SQLITE_API int sqlite3_vtab_on_conflict(sqlite3 *db){
+  static const unsigned char aMap[] = { 
+    SQLITE_ROLLBACK, SQLITE_ABORT, SQLITE_FAIL, SQLITE_IGNORE, SQLITE_REPLACE 
+  };
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  assert( OE_Rollback==1 && OE_Abort==2 && OE_Fail==3 );
+  assert( OE_Ignore==4 && OE_Replace==5 );
+  assert( db->vtabOnConflict>=1 && db->vtabOnConflict<=5 );
+  return (int)aMap[db->vtabOnConflict-1];
+}
+
+/*
+** Call from within the xCreate() or xConnect() methods to provide 
+** the SQLite core with additional information about the behavior
+** of the virtual table being implemented.
+*/
+SQLITE_API int sqlite3_vtab_config(sqlite3 *db, int op, ...){
+  va_list ap;
+  int rc = SQLITE_OK;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  va_start(ap, op);
+  switch( op ){
+    case SQLITE_VTAB_CONSTRAINT_SUPPORT: {
+      VtabCtx *p = db->pVtabCtx;
+      if( !p ){
+        rc = SQLITE_MISUSE_BKPT;
+      }else{
+        assert( p->pTab==0 || (p->pTab->tabFlags & TF_Virtual)!=0 );
+        p->pVTable->bConstraint = (u8)va_arg(ap, int);
+      }
+      break;
+    }
+    default:
+      rc = SQLITE_MISUSE_BKPT;
+      break;
+  }
+  va_end(ap);
+
+  if( rc!=SQLITE_OK ) sqlite3Error(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+/************** End of vtab.c ************************************************/
+/************** Begin file where.c *******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This module contains C code that generates VDBE code used to process
+** the WHERE clause of SQL statements.  This module is responsible for
+** generating the code that loops through a table looking for applicable
+** rows.  Indices are selected and used to speed the search when doing
+** so is applicable.  Because this module is responsible for selecting
+** indices, you might also think of this module as the "query optimizer".
+*/
+/************** Include whereInt.h in the middle of where.c ******************/
+/************** Begin file whereInt.h ****************************************/
+/*
+** 2013-11-12
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains structure and macro definitions for the query
+** planner logic in "where.c".  These definitions are broken out into
+** a separate source file for easier editing.
+*/
+
+/*
+** Trace output macros
+*/
+#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
+/***/ int sqlite3WhereTrace = 0;
+#endif
+#if defined(SQLITE_DEBUG) \
+    && (defined(SQLITE_TEST) || defined(SQLITE_ENABLE_WHERETRACE))
+# define WHERETRACE(K,X)  if(sqlite3WhereTrace&(K)) sqlite3DebugPrintf X
+# define WHERETRACE_ENABLED 1
+#else
+# define WHERETRACE(K,X)
+#endif
+
+/* Forward references
+*/
+typedef struct WhereClause WhereClause;
+typedef struct WhereMaskSet WhereMaskSet;
+typedef struct WhereOrInfo WhereOrInfo;
+typedef struct WhereAndInfo WhereAndInfo;
+typedef struct WhereLevel WhereLevel;
+typedef struct WhereLoop WhereLoop;
+typedef struct WherePath WherePath;
+typedef struct WhereTerm WhereTerm;
+typedef struct WhereLoopBuilder WhereLoopBuilder;
+typedef struct WhereScan WhereScan;
+typedef struct WhereOrCost WhereOrCost;
+typedef struct WhereOrSet WhereOrSet;
+
+/*
+** This object contains information needed to implement a single nested
+** loop in WHERE clause.
+**
+** Contrast this object with WhereLoop.  This object describes the
+** implementation of the loop.  WhereLoop describes the algorithm.
+** This object contains a pointer to the WhereLoop algorithm as one of
+** its elements.
+**
+** The WhereInfo object contains a single instance of this object for
+** each term in the FROM clause (which is to say, for each of the
+** nested loops as implemented).  The order of WhereLevel objects determines
+** the loop nested order, with WhereInfo.a[0] being the outer loop and
+** WhereInfo.a[WhereInfo.nLevel-1] being the inner loop.
+*/
+struct WhereLevel {
+  int iLeftJoin;        /* Memory cell used to implement LEFT OUTER JOIN */
+  int iTabCur;          /* The VDBE cursor used to access the table */
+  int iIdxCur;          /* The VDBE cursor used to access pIdx */
+  int addrBrk;          /* Jump here to break out of the loop */
+  int addrNxt;          /* Jump here to start the next IN combination */
+  int addrSkip;         /* Jump here for next iteration of skip-scan */
+  int addrCont;         /* Jump here to continue with the next loop cycle */
+  int addrFirst;        /* First instruction of interior of the loop */
+  int addrBody;         /* Beginning of the body of this loop */
+  u8 iFrom;             /* Which entry in the FROM clause */
+  u8 op, p3, p5;        /* Opcode, P3 & P5 of the opcode that ends the loop */
+  int p1, p2;           /* Operands of the opcode used to ends the loop */
+  union {               /* Information that depends on pWLoop->wsFlags */
+    struct {
+      int nIn;              /* Number of entries in aInLoop[] */
+      struct InLoop {
+        int iCur;              /* The VDBE cursor used by this IN operator */
+        int addrInTop;         /* Top of the IN loop */
+        u8 eEndLoopOp;         /* IN Loop terminator. OP_Next or OP_Prev */
+      } *aInLoop;           /* Information about each nested IN operator */
+    } in;                 /* Used when pWLoop->wsFlags&WHERE_IN_ABLE */
+    Index *pCovidx;       /* Possible covering index for WHERE_MULTI_OR */
+  } u;
+  struct WhereLoop *pWLoop;  /* The selected WhereLoop object */
+  Bitmask notReady;          /* FROM entries not usable at this level */
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  int addrVisit;        /* Address at which row is visited */
+#endif
+};
+
+/*
+** Each instance of this object represents an algorithm for evaluating one
+** term of a join.  Every term of the FROM clause will have at least
+** one corresponding WhereLoop object (unless INDEXED BY constraints
+** prevent a query solution - which is an error) and many terms of the
+** FROM clause will have multiple WhereLoop objects, each describing a
+** potential way of implementing that FROM-clause term, together with
+** dependencies and cost estimates for using the chosen algorithm.
+**
+** Query planning consists of building up a collection of these WhereLoop
+** objects, then computing a particular sequence of WhereLoop objects, with
+** one WhereLoop object per FROM clause term, that satisfy all dependencies
+** and that minimize the overall cost.
+*/
+struct WhereLoop {
+  Bitmask prereq;       /* Bitmask of other loops that must run first */
+  Bitmask maskSelf;     /* Bitmask identifying table iTab */
+#ifdef SQLITE_DEBUG
+  char cId;             /* Symbolic ID of this loop for debugging use */
+#endif
+  u8 iTab;              /* Position in FROM clause of table for this loop */
+  u8 iSortIdx;          /* Sorting index number.  0==None */
+  LogEst rSetup;        /* One-time setup cost (ex: create transient index) */
+  LogEst rRun;          /* Cost of running each loop */
+  LogEst nOut;          /* Estimated number of output rows */
+  union {
+    struct {               /* Information for internal btree tables */
+      u16 nEq;               /* Number of equality constraints */
+      Index *pIndex;         /* Index used, or NULL */
+    } btree;
+    struct {               /* Information for virtual tables */
+      int idxNum;            /* Index number */
+      u8 needFree;           /* True if sqlite3_free(idxStr) is needed */
+      i8 isOrdered;          /* True if satisfies ORDER BY */
+      u16 omitMask;          /* Terms that may be omitted */
+      char *idxStr;          /* Index identifier string */
+    } vtab;
+  } u;
+  u32 wsFlags;          /* WHERE_* flags describing the plan */
+  u16 nLTerm;           /* Number of entries in aLTerm[] */
+  u16 nSkip;            /* Number of NULL aLTerm[] entries */
+  /**** whereLoopXfer() copies fields above ***********************/
+# define WHERE_LOOP_XFER_SZ offsetof(WhereLoop,nLSlot)
+  u16 nLSlot;           /* Number of slots allocated for aLTerm[] */
+  WhereTerm **aLTerm;   /* WhereTerms used */
+  WhereLoop *pNextLoop; /* Next WhereLoop object in the WhereClause */
+  WhereTerm *aLTermSpace[3];  /* Initial aLTerm[] space */
+};
+
+/* This object holds the prerequisites and the cost of running a
+** subquery on one operand of an OR operator in the WHERE clause.
+** See WhereOrSet for additional information 
+*/
+struct WhereOrCost {
+  Bitmask prereq;     /* Prerequisites */
+  LogEst rRun;        /* Cost of running this subquery */
+  LogEst nOut;        /* Number of outputs for this subquery */
+};
+
+/* The WhereOrSet object holds a set of possible WhereOrCosts that
+** correspond to the subquery(s) of OR-clause processing.  Only the
+** best N_OR_COST elements are retained.
+*/
+#define N_OR_COST 3
+struct WhereOrSet {
+  u16 n;                      /* Number of valid a[] entries */
+  WhereOrCost a[N_OR_COST];   /* Set of best costs */
+};
+
+
+/* Forward declaration of methods */
+static int whereLoopResize(sqlite3*, WhereLoop*, int);
+
+/*
+** Each instance of this object holds a sequence of WhereLoop objects
+** that implement some or all of a query plan.
+**
+** Think of each WhereLoop object as a node in a graph with arcs
+** showing dependencies and costs for travelling between nodes.  (That is
+** not a completely accurate description because WhereLoop costs are a
+** vector, not a scalar, and because dependencies are many-to-one, not
+** one-to-one as are graph nodes.  But it is a useful visualization aid.)
+** Then a WherePath object is a path through the graph that visits some
+** or all of the WhereLoop objects once.
+**
+** The "solver" works by creating the N best WherePath objects of length
+** 1.  Then using those as a basis to compute the N best WherePath objects
+** of length 2.  And so forth until the length of WherePaths equals the
+** number of nodes in the FROM clause.  The best (lowest cost) WherePath
+** at the end is the chosen query plan.
+*/
+struct WherePath {
+  Bitmask maskLoop;     /* Bitmask of all WhereLoop objects in this path */
+  Bitmask revLoop;      /* aLoop[]s that should be reversed for ORDER BY */
+  LogEst nRow;          /* Estimated number of rows generated by this path */
+  LogEst rCost;         /* Total cost of this path */
+  LogEst rUnsorted;     /* Total cost of this path ignoring sorting costs */
+  i8 isOrdered;         /* No. of ORDER BY terms satisfied. -1 for unknown */
+  WhereLoop **aLoop;    /* Array of WhereLoop objects implementing this path */
+};
+
+/*
+** The query generator uses an array of instances of this structure to
+** help it analyze the subexpressions of the WHERE clause.  Each WHERE
+** clause subexpression is separated from the others by AND operators,
+** usually, or sometimes subexpressions separated by OR.
+**
+** All WhereTerms are collected into a single WhereClause structure.  
+** The following identity holds:
+**
+**        WhereTerm.pWC->a[WhereTerm.idx] == WhereTerm
+**
+** When a term is of the form:
+**
+**              X <op> <expr>
+**
+** where X is a column name and <op> is one of certain operators,
+** then WhereTerm.leftCursor and WhereTerm.u.leftColumn record the
+** cursor number and column number for X.  WhereTerm.eOperator records
+** the <op> using a bitmask encoding defined by WO_xxx below.  The
+** use of a bitmask encoding for the operator allows us to search
+** quickly for terms that match any of several different operators.
+**
+** A WhereTerm might also be two or more subterms connected by OR:
+**
+**         (t1.X <op> <expr>) OR (t1.Y <op> <expr>) OR ....
+**
+** In this second case, wtFlag has the TERM_ORINFO bit set and eOperator==WO_OR
+** and the WhereTerm.u.pOrInfo field points to auxiliary information that
+** is collected about the OR clause.
+**
+** If a term in the WHERE clause does not match either of the two previous
+** categories, then eOperator==0.  The WhereTerm.pExpr field is still set
+** to the original subexpression content and wtFlags is set up appropriately
+** but no other fields in the WhereTerm object are meaningful.
+**
+** When eOperator!=0, prereqRight and prereqAll record sets of cursor numbers,
+** but they do so indirectly.  A single WhereMaskSet structure translates
+** cursor number into bits and the translated bit is stored in the prereq
+** fields.  The translation is used in order to maximize the number of
+** bits that will fit in a Bitmask.  The VDBE cursor numbers might be
+** spread out over the non-negative integers.  For example, the cursor
+** numbers might be 3, 8, 9, 10, 20, 23, 41, and 45.  The WhereMaskSet
+** translates these sparse cursor numbers into consecutive integers
+** beginning with 0 in order to make the best possible use of the available
+** bits in the Bitmask.  So, in the example above, the cursor numbers
+** would be mapped into integers 0 through 7.
+**
+** The number of terms in a join is limited by the number of bits
+** in prereqRight and prereqAll.  The default is 64 bits, hence SQLite
+** is only able to process joins with 64 or fewer tables.
+*/
+struct WhereTerm {
+  Expr *pExpr;            /* Pointer to the subexpression that is this term */
+  int iParent;            /* Disable pWC->a[iParent] when this term disabled */
+  int leftCursor;         /* Cursor number of X in "X <op> <expr>" */
+  union {
+    int leftColumn;         /* Column number of X in "X <op> <expr>" */
+    WhereOrInfo *pOrInfo;   /* Extra information if (eOperator & WO_OR)!=0 */
+    WhereAndInfo *pAndInfo; /* Extra information if (eOperator& WO_AND)!=0 */
+  } u;
+  LogEst truthProb;       /* Probability of truth for this expression */
+  u16 eOperator;          /* A WO_xx value describing <op> */
+  u8 wtFlags;             /* TERM_xxx bit flags.  See below */
+  u8 nChild;              /* Number of children that must disable us */
+  WhereClause *pWC;       /* The clause this term is part of */
+  Bitmask prereqRight;    /* Bitmask of tables used by pExpr->pRight */
+  Bitmask prereqAll;      /* Bitmask of tables referenced by pExpr */
+};
+
+/*
+** Allowed values of WhereTerm.wtFlags
+*/
+#define TERM_DYNAMIC    0x01   /* Need to call sqlite3ExprDelete(db, pExpr) */
+#define TERM_VIRTUAL    0x02   /* Added by the optimizer.  Do not code */
+#define TERM_CODED      0x04   /* This term is already coded */
+#define TERM_COPIED     0x08   /* Has a child */
+#define TERM_ORINFO     0x10   /* Need to free the WhereTerm.u.pOrInfo object */
+#define TERM_ANDINFO    0x20   /* Need to free the WhereTerm.u.pAndInfo obj */
+#define TERM_OR_OK      0x40   /* Used during OR-clause processing */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+#  define TERM_VNULL    0x80   /* Manufactured x>NULL or x<=NULL term */
+#else
+#  define TERM_VNULL    0x00   /* Disabled if not using stat3 */
+#endif
+
+/*
+** An instance of the WhereScan object is used as an iterator for locating
+** terms in the WHERE clause that are useful to the query planner.
+*/
+struct WhereScan {
+  WhereClause *pOrigWC;      /* Original, innermost WhereClause */
+  WhereClause *pWC;          /* WhereClause currently being scanned */
+  char *zCollName;           /* Required collating sequence, if not NULL */
+  char idxaff;               /* Must match this affinity, if zCollName!=NULL */
+  unsigned char nEquiv;      /* Number of entries in aEquiv[] */
+  unsigned char iEquiv;      /* Next unused slot in aEquiv[] */
+  u32 opMask;                /* Acceptable operators */
+  int k;                     /* Resume scanning at this->pWC->a[this->k] */
+  int aEquiv[22];            /* Cursor,Column pairs for equivalence classes */
+};
+
+/*
+** An instance of the following structure holds all information about a
+** WHERE clause.  Mostly this is a container for one or more WhereTerms.
+**
+** Explanation of pOuter:  For a WHERE clause of the form
+**
+**           a AND ((b AND c) OR (d AND e)) AND f
+**
+** There are separate WhereClause objects for the whole clause and for
+** the subclauses "(b AND c)" and "(d AND e)".  The pOuter field of the
+** subclauses points to the WhereClause object for the whole clause.
+*/
+struct WhereClause {
+  WhereInfo *pWInfo;       /* WHERE clause processing context */
+  WhereClause *pOuter;     /* Outer conjunction */
+  u8 op;                   /* Split operator.  TK_AND or TK_OR */
+  int nTerm;               /* Number of terms */
+  int nSlot;               /* Number of entries in a[] */
+  WhereTerm *a;            /* Each a[] describes a term of the WHERE cluase */
+#if defined(SQLITE_SMALL_STACK)
+  WhereTerm aStatic[1];    /* Initial static space for a[] */
+#else
+  WhereTerm aStatic[8];    /* Initial static space for a[] */
+#endif
+};
+
+/*
+** A WhereTerm with eOperator==WO_OR has its u.pOrInfo pointer set to
+** a dynamically allocated instance of the following structure.
+*/
+struct WhereOrInfo {
+  WhereClause wc;          /* Decomposition into subterms */
+  Bitmask indexable;       /* Bitmask of all indexable tables in the clause */
+};
+
+/*
+** A WhereTerm with eOperator==WO_AND has its u.pAndInfo pointer set to
+** a dynamically allocated instance of the following structure.
+*/
+struct WhereAndInfo {
+  WhereClause wc;          /* The subexpression broken out */
+};
+
+/*
+** An instance of the following structure keeps track of a mapping
+** between VDBE cursor numbers and bits of the bitmasks in WhereTerm.
+**
+** The VDBE cursor numbers are small integers contained in 
+** SrcList_item.iCursor and Expr.iTable fields.  For any given WHERE 
+** clause, the cursor numbers might not begin with 0 and they might
+** contain gaps in the numbering sequence.  But we want to make maximum
+** use of the bits in our bitmasks.  This structure provides a mapping
+** from the sparse cursor numbers into consecutive integers beginning
+** with 0.
+**
+** If WhereMaskSet.ix[A]==B it means that The A-th bit of a Bitmask
+** corresponds VDBE cursor number B.  The A-th bit of a bitmask is 1<<A.
+**
+** For example, if the WHERE clause expression used these VDBE
+** cursors:  4, 5, 8, 29, 57, 73.  Then the  WhereMaskSet structure
+** would map those cursor numbers into bits 0 through 5.
+**
+** Note that the mapping is not necessarily ordered.  In the example
+** above, the mapping might go like this:  4->3, 5->1, 8->2, 29->0,
+** 57->5, 73->4.  Or one of 719 other combinations might be used. It
+** does not really matter.  What is important is that sparse cursor
+** numbers all get mapped into bit numbers that begin with 0 and contain
+** no gaps.
+*/
+struct WhereMaskSet {
+  int n;                        /* Number of assigned cursor values */
+  int ix[BMS];                  /* Cursor assigned to each bit */
+};
+
+/*
+** This object is a convenience wrapper holding all information needed
+** to construct WhereLoop objects for a particular query.
+*/
+struct WhereLoopBuilder {
+  WhereInfo *pWInfo;        /* Information about this WHERE */
+  WhereClause *pWC;         /* WHERE clause terms */
+  ExprList *pOrderBy;       /* ORDER BY clause */
+  WhereLoop *pNew;          /* Template WhereLoop */
+  WhereOrSet *pOrSet;       /* Record best loops here, if not NULL */
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  UnpackedRecord *pRec;     /* Probe for stat4 (if required) */
+  int nRecValid;            /* Number of valid fields currently in pRec */
+#endif
+};
+
+/*
+** The WHERE clause processing routine has two halves.  The
+** first part does the start of the WHERE loop and the second
+** half does the tail of the WHERE loop.  An instance of
+** this structure is returned by the first half and passed
+** into the second half to give some continuity.
+**
+** An instance of this object holds the complete state of the query
+** planner.
+*/
+struct WhereInfo {
+  Parse *pParse;            /* Parsing and code generating context */
+  SrcList *pTabList;        /* List of tables in the join */
+  ExprList *pOrderBy;       /* The ORDER BY clause or NULL */
+  ExprList *pResultSet;     /* Result set. DISTINCT operates on these */
+  WhereLoop *pLoops;        /* List of all WhereLoop objects */
+  Bitmask revMask;          /* Mask of ORDER BY terms that need reversing */
+  LogEst nRowOut;           /* Estimated number of output rows */
+  u16 wctrlFlags;           /* Flags originally passed to sqlite3WhereBegin() */
+  i8 nOBSat;                /* Number of ORDER BY terms satisfied by indices */
+  u8 sorted;                /* True if really sorted (not just grouped) */
+  u8 okOnePass;             /* Ok to use one-pass algorithm for UPDATE/DELETE */
+  u8 untestedTerms;         /* Not all WHERE terms resolved by outer loop */
+  u8 eDistinct;             /* One of the WHERE_DISTINCT_* values below */
+  u8 nLevel;                /* Number of nested loop */
+  int iTop;                 /* The very beginning of the WHERE loop */
+  int iContinue;            /* Jump here to continue with next record */
+  int iBreak;               /* Jump here to break out of the loop */
+  int savedNQueryLoop;      /* pParse->nQueryLoop outside the WHERE loop */
+  int aiCurOnePass[2];      /* OP_OpenWrite cursors for the ONEPASS opt */
+  WhereMaskSet sMaskSet;    /* Map cursor numbers to bitmasks */
+  WhereClause sWC;          /* Decomposition of the WHERE clause */
+  WhereLevel a[1];          /* Information about each nest loop in WHERE */
+};
+
+/*
+** Bitmasks for the operators on WhereTerm objects.  These are all
+** operators that are of interest to the query planner.  An
+** OR-ed combination of these values can be used when searching for
+** particular WhereTerms within a WhereClause.
+*/
+#define WO_IN     0x001
+#define WO_EQ     0x002
+#define WO_LT     (WO_EQ<<(TK_LT-TK_EQ))
+#define WO_LE     (WO_EQ<<(TK_LE-TK_EQ))
+#define WO_GT     (WO_EQ<<(TK_GT-TK_EQ))
+#define WO_GE     (WO_EQ<<(TK_GE-TK_EQ))
+#define WO_MATCH  0x040
+#define WO_ISNULL 0x080
+#define WO_OR     0x100       /* Two or more OR-connected terms */
+#define WO_AND    0x200       /* Two or more AND-connected terms */
+#define WO_EQUIV  0x400       /* Of the form A==B, both columns */
+#define WO_NOOP   0x800       /* This term does not restrict search space */
+
+#define WO_ALL    0xfff       /* Mask of all possible WO_* values */
+#define WO_SINGLE 0x0ff       /* Mask of all non-compound WO_* values */
+
+/*
+** These are definitions of bits in the WhereLoop.wsFlags field.
+** The particular combination of bits in each WhereLoop help to
+** determine the algorithm that WhereLoop represents.
+*/
+#define WHERE_COLUMN_EQ    0x00000001  /* x=EXPR */
+#define WHERE_COLUMN_RANGE 0x00000002  /* x<EXPR and/or x>EXPR */
+#define WHERE_COLUMN_IN    0x00000004  /* x IN (...) */
+#define WHERE_COLUMN_NULL  0x00000008  /* x IS NULL */
+#define WHERE_CONSTRAINT   0x0000000f  /* Any of the WHERE_COLUMN_xxx values */
+#define WHERE_TOP_LIMIT    0x00000010  /* x<EXPR or x<=EXPR constraint */
+#define WHERE_BTM_LIMIT    0x00000020  /* x>EXPR or x>=EXPR constraint */
+#define WHERE_BOTH_LIMIT   0x00000030  /* Both x>EXPR and x<EXPR */
+#define WHERE_IDX_ONLY     0x00000040  /* Use index only - omit table */
+#define WHERE_IPK          0x00000100  /* x is the INTEGER PRIMARY KEY */
+#define WHERE_INDEXED      0x00000200  /* WhereLoop.u.btree.pIndex is valid */
+#define WHERE_VIRTUALTABLE 0x00000400  /* WhereLoop.u.vtab is valid */
+#define WHERE_IN_ABLE      0x00000800  /* Able to support an IN operator */
+#define WHERE_ONEROW       0x00001000  /* Selects no more than one row */
+#define WHERE_MULTI_OR     0x00002000  /* OR using multiple indices */
+#define WHERE_AUTO_INDEX   0x00004000  /* Uses an ephemeral index */
+#define WHERE_SKIPSCAN     0x00008000  /* Uses the skip-scan algorithm */
+#define WHERE_UNQ_WANTED   0x00010000  /* WHERE_ONEROW would have been helpful*/
+#define WHERE_PARTIALIDX   0x00020000  /* The automatic index is partial */
+
+/************** End of whereInt.h ********************************************/
+/************** Continuing where we left off in where.c **********************/
+
+/*
+** Return the estimated number of output rows from a WHERE clause
+*/
+SQLITE_PRIVATE u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){
+  return sqlite3LogEstToInt(pWInfo->nRowOut);
+}
+
+/*
+** Return one of the WHERE_DISTINCT_xxxxx values to indicate how this
+** WHERE clause returns outputs for DISTINCT processing.
+*/
+SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo *pWInfo){
+  return pWInfo->eDistinct;
+}
+
+/*
+** Return TRUE if the WHERE clause returns rows in ORDER BY order.
+** Return FALSE if the output needs to be sorted.
+*/
+SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo *pWInfo){
+  return pWInfo->nOBSat;
+}
+
+/*
+** Return the VDBE address or label to jump to in order to continue
+** immediately with the next row of a WHERE clause.
+*/
+SQLITE_PRIVATE int sqlite3WhereContinueLabel(WhereInfo *pWInfo){
+  assert( pWInfo->iContinue!=0 );
+  return pWInfo->iContinue;
+}
+
+/*
+** Return the VDBE address or label to jump to in order to break
+** out of a WHERE loop.
+*/
+SQLITE_PRIVATE int sqlite3WhereBreakLabel(WhereInfo *pWInfo){
+  return pWInfo->iBreak;
+}
+
+/*
+** Return TRUE if an UPDATE or DELETE statement can operate directly on
+** the rowids returned by a WHERE clause.  Return FALSE if doing an
+** UPDATE or DELETE might change subsequent WHERE clause results.
+**
+** If the ONEPASS optimization is used (if this routine returns true)
+** then also write the indices of open cursors used by ONEPASS
+** into aiCur[0] and aiCur[1].  iaCur[0] gets the cursor of the data
+** table and iaCur[1] gets the cursor used by an auxiliary index.
+** Either value may be -1, indicating that cursor is not used.
+** Any cursors returned will have been opened for writing.
+**
+** aiCur[0] and aiCur[1] both get -1 if the where-clause logic is
+** unable to use the ONEPASS optimization.
+*/
+SQLITE_PRIVATE int sqlite3WhereOkOnePass(WhereInfo *pWInfo, int *aiCur){
+  memcpy(aiCur, pWInfo->aiCurOnePass, sizeof(int)*2);
+  return pWInfo->okOnePass;
+}
+
+/*
+** Move the content of pSrc into pDest
+*/
+static void whereOrMove(WhereOrSet *pDest, WhereOrSet *pSrc){
+  pDest->n = pSrc->n;
+  memcpy(pDest->a, pSrc->a, pDest->n*sizeof(pDest->a[0]));
+}
+
+/*
+** Try to insert a new prerequisite/cost entry into the WhereOrSet pSet.
+**
+** The new entry might overwrite an existing entry, or it might be
+** appended, or it might be discarded.  Do whatever is the right thing
+** so that pSet keeps the N_OR_COST best entries seen so far.
+*/
+static int whereOrInsert(
+  WhereOrSet *pSet,      /* The WhereOrSet to be updated */
+  Bitmask prereq,        /* Prerequisites of the new entry */
+  LogEst rRun,           /* Run-cost of the new entry */
+  LogEst nOut            /* Number of outputs for the new entry */
+){
+  u16 i;
+  WhereOrCost *p;
+  for(i=pSet->n, p=pSet->a; i>0; i--, p++){
+    if( rRun<=p->rRun && (prereq & p->prereq)==prereq ){
+      goto whereOrInsert_done;
+    }
+    if( p->rRun<=rRun && (p->prereq & prereq)==p->prereq ){
+      return 0;
+    }
+  }
+  if( pSet->n<N_OR_COST ){
+    p = &pSet->a[pSet->n++];
+    p->nOut = nOut;
+  }else{
+    p = pSet->a;
+    for(i=1; i<pSet->n; i++){
+      if( p->rRun>pSet->a[i].rRun ) p = pSet->a + i;
+    }
+    if( p->rRun<=rRun ) return 0;
+  }
+whereOrInsert_done:
+  p->prereq = prereq;
+  p->rRun = rRun;
+  if( p->nOut>nOut ) p->nOut = nOut;
+  return 1;
+}
+
+/*
+** Initialize a preallocated WhereClause structure.
+*/
+static void whereClauseInit(
+  WhereClause *pWC,        /* The WhereClause to be initialized */
+  WhereInfo *pWInfo        /* The WHERE processing context */
+){
+  pWC->pWInfo = pWInfo;
+  pWC->pOuter = 0;
+  pWC->nTerm = 0;
+  pWC->nSlot = ArraySize(pWC->aStatic);
+  pWC->a = pWC->aStatic;
+}
+
+/* Forward reference */
+static void whereClauseClear(WhereClause*);
+
+/*
+** Deallocate all memory associated with a WhereOrInfo object.
+*/
+static void whereOrInfoDelete(sqlite3 *db, WhereOrInfo *p){
+  whereClauseClear(&p->wc);
+  sqlite3DbFree(db, p);
+}
+
+/*
+** Deallocate all memory associated with a WhereAndInfo object.
+*/
+static void whereAndInfoDelete(sqlite3 *db, WhereAndInfo *p){
+  whereClauseClear(&p->wc);
+  sqlite3DbFree(db, p);
+}
+
+/*
+** Deallocate a WhereClause structure.  The WhereClause structure
+** itself is not freed.  This routine is the inverse of whereClauseInit().
+*/
+static void whereClauseClear(WhereClause *pWC){
+  int i;
+  WhereTerm *a;
+  sqlite3 *db = pWC->pWInfo->pParse->db;
+  for(i=pWC->nTerm-1, a=pWC->a; i>=0; i--, a++){
+    if( a->wtFlags & TERM_DYNAMIC ){
+      sqlite3ExprDelete(db, a->pExpr);
+    }
+    if( a->wtFlags & TERM_ORINFO ){
+      whereOrInfoDelete(db, a->u.pOrInfo);
+    }else if( a->wtFlags & TERM_ANDINFO ){
+      whereAndInfoDelete(db, a->u.pAndInfo);
+    }
+  }
+  if( pWC->a!=pWC->aStatic ){
+    sqlite3DbFree(db, pWC->a);
+  }
+}
+
+/*
+** Add a single new WhereTerm entry to the WhereClause object pWC.
+** The new WhereTerm object is constructed from Expr p and with wtFlags.
+** The index in pWC->a[] of the new WhereTerm is returned on success.
+** 0 is returned if the new WhereTerm could not be added due to a memory
+** allocation error.  The memory allocation failure will be recorded in
+** the db->mallocFailed flag so that higher-level functions can detect it.
+**
+** This routine will increase the size of the pWC->a[] array as necessary.
+**
+** If the wtFlags argument includes TERM_DYNAMIC, then responsibility
+** for freeing the expression p is assumed by the WhereClause object pWC.
+** This is true even if this routine fails to allocate a new WhereTerm.
+**
+** WARNING:  This routine might reallocate the space used to store
+** WhereTerms.  All pointers to WhereTerms should be invalidated after
+** calling this routine.  Such pointers may be reinitialized by referencing
+** the pWC->a[] array.
+*/
+static int whereClauseInsert(WhereClause *pWC, Expr *p, u8 wtFlags){
+  WhereTerm *pTerm;
+  int idx;
+  testcase( wtFlags & TERM_VIRTUAL );
+  if( pWC->nTerm>=pWC->nSlot ){
+    WhereTerm *pOld = pWC->a;
+    sqlite3 *db = pWC->pWInfo->pParse->db;
+    pWC->a = sqlite3DbMallocRaw(db, sizeof(pWC->a[0])*pWC->nSlot*2 );
+    if( pWC->a==0 ){
+      if( wtFlags & TERM_DYNAMIC ){
+        sqlite3ExprDelete(db, p);
+      }
+      pWC->a = pOld;
+      return 0;
+    }
+    memcpy(pWC->a, pOld, sizeof(pWC->a[0])*pWC->nTerm);
+    if( pOld!=pWC->aStatic ){
+      sqlite3DbFree(db, pOld);
+    }
+    pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]);
+    memset(&pWC->a[pWC->nTerm], 0, sizeof(pWC->a[0])*(pWC->nSlot-pWC->nTerm));
+  }
+  pTerm = &pWC->a[idx = pWC->nTerm++];
+  if( p && ExprHasProperty(p, EP_Unlikely) ){
+    pTerm->truthProb = sqlite3LogEst(p->iTable) - 270;
+  }else{
+    pTerm->truthProb = 1;
+  }
+  pTerm->pExpr = sqlite3ExprSkipCollate(p);
+  pTerm->wtFlags = wtFlags;
+  pTerm->pWC = pWC;
+  pTerm->iParent = -1;
+  return idx;
+}
+
+/*
+** This routine identifies subexpressions in the WHERE clause where
+** each subexpression is separated by the AND operator or some other
+** operator specified in the op parameter.  The WhereClause structure
+** is filled with pointers to subexpressions.  For example:
+**
+**    WHERE  a=='hello' AND coalesce(b,11)<10 AND (c+12!=d OR c==22)
+**           \________/     \_______________/     \________________/
+**            slot[0]            slot[1]               slot[2]
+**
+** The original WHERE clause in pExpr is unaltered.  All this routine
+** does is make slot[] entries point to substructure within pExpr.
+**
+** In the previous sentence and in the diagram, "slot[]" refers to
+** the WhereClause.a[] array.  The slot[] array grows as needed to contain
+** all terms of the WHERE clause.
+*/
+static void whereSplit(WhereClause *pWC, Expr *pExpr, u8 op){
+  pWC->op = op;
+  if( pExpr==0 ) return;
+  if( pExpr->op!=op ){
+    whereClauseInsert(pWC, pExpr, 0);
+  }else{
+    whereSplit(pWC, pExpr->pLeft, op);
+    whereSplit(pWC, pExpr->pRight, op);
+  }
+}
+
+/*
+** Initialize a WhereMaskSet object
+*/
+#define initMaskSet(P)  (P)->n=0
+
+/*
+** Return the bitmask for the given cursor number.  Return 0 if
+** iCursor is not in the set.
+*/
+static Bitmask getMask(WhereMaskSet *pMaskSet, int iCursor){
+  int i;
+  assert( pMaskSet->n<=(int)sizeof(Bitmask)*8 );
+  for(i=0; i<pMaskSet->n; i++){
+    if( pMaskSet->ix[i]==iCursor ){
+      return MASKBIT(i);
+    }
+  }
+  return 0;
+}
+
+/*
+** Create a new mask for cursor iCursor.
+**
+** There is one cursor per table in the FROM clause.  The number of
+** tables in the FROM clause is limited by a test early in the
+** sqlite3WhereBegin() routine.  So we know that the pMaskSet->ix[]
+** array will never overflow.
+*/
+static void createMask(WhereMaskSet *pMaskSet, int iCursor){
+  assert( pMaskSet->n < ArraySize(pMaskSet->ix) );
+  pMaskSet->ix[pMaskSet->n++] = iCursor;
+}
+
+/*
+** These routines walk (recursively) an expression tree and generate
+** a bitmask indicating which tables are used in that expression
+** tree.
+*/
+static Bitmask exprListTableUsage(WhereMaskSet*, ExprList*);
+static Bitmask exprSelectTableUsage(WhereMaskSet*, Select*);
+static Bitmask exprTableUsage(WhereMaskSet *pMaskSet, Expr *p){
+  Bitmask mask = 0;
+  if( p==0 ) return 0;
+  if( p->op==TK_COLUMN ){
+    mask = getMask(pMaskSet, p->iTable);
+    return mask;
+  }
+  mask = exprTableUsage(pMaskSet, p->pRight);
+  mask |= exprTableUsage(pMaskSet, p->pLeft);
+  if( ExprHasProperty(p, EP_xIsSelect) ){
+    mask |= exprSelectTableUsage(pMaskSet, p->x.pSelect);
+  }else{
+    mask |= exprListTableUsage(pMaskSet, p->x.pList);
+  }
+  return mask;
+}
+static Bitmask exprListTableUsage(WhereMaskSet *pMaskSet, ExprList *pList){
+  int i;
+  Bitmask mask = 0;
+  if( pList ){
+    for(i=0; i<pList->nExpr; i++){
+      mask |= exprTableUsage(pMaskSet, pList->a[i].pExpr);
+    }
+  }
+  return mask;
+}
+static Bitmask exprSelectTableUsage(WhereMaskSet *pMaskSet, Select *pS){
+  Bitmask mask = 0;
+  while( pS ){
+    SrcList *pSrc = pS->pSrc;
+    mask |= exprListTableUsage(pMaskSet, pS->pEList);
+    mask |= exprListTableUsage(pMaskSet, pS->pGroupBy);
+    mask |= exprListTableUsage(pMaskSet, pS->pOrderBy);
+    mask |= exprTableUsage(pMaskSet, pS->pWhere);
+    mask |= exprTableUsage(pMaskSet, pS->pHaving);
+    if( ALWAYS(pSrc!=0) ){
+      int i;
+      for(i=0; i<pSrc->nSrc; i++){
+        mask |= exprSelectTableUsage(pMaskSet, pSrc->a[i].pSelect);
+        mask |= exprTableUsage(pMaskSet, pSrc->a[i].pOn);
+      }
+    }
+    pS = pS->pPrior;
+  }
+  return mask;
+}
+
+/*
+** Return TRUE if the given operator is one of the operators that is
+** allowed for an indexable WHERE clause term.  The allowed operators are
+** "=", "<", ">", "<=", ">=", "IN", and "IS NULL"
+*/
+static int allowedOp(int op){
+  assert( TK_GT>TK_EQ && TK_GT<TK_GE );
+  assert( TK_LT>TK_EQ && TK_LT<TK_GE );
+  assert( TK_LE>TK_EQ && TK_LE<TK_GE );
+  assert( TK_GE==TK_EQ+4 );
+  return op==TK_IN || (op>=TK_EQ && op<=TK_GE) || op==TK_ISNULL;
+}
+
+/*
+** Commute a comparison operator.  Expressions of the form "X op Y"
+** are converted into "Y op X".
+**
+** If left/right precedence rules come into play when determining the
+** collating sequence, then COLLATE operators are adjusted to ensure
+** that the collating sequence does not change.  For example:
+** "Y collate NOCASE op X" becomes "X op Y" because any collation sequence on
+** the left hand side of a comparison overrides any collation sequence 
+** attached to the right. For the same reason the EP_Collate flag
+** is not commuted.
+*/
+static void exprCommute(Parse *pParse, Expr *pExpr){
+  u16 expRight = (pExpr->pRight->flags & EP_Collate);
+  u16 expLeft = (pExpr->pLeft->flags & EP_Collate);
+  assert( allowedOp(pExpr->op) && pExpr->op!=TK_IN );
+  if( expRight==expLeft ){
+    /* Either X and Y both have COLLATE operator or neither do */
+    if( expRight ){
+      /* Both X and Y have COLLATE operators.  Make sure X is always
+      ** used by clearing the EP_Collate flag from Y. */
+      pExpr->pRight->flags &= ~EP_Collate;
+    }else if( sqlite3ExprCollSeq(pParse, pExpr->pLeft)!=0 ){
+      /* Neither X nor Y have COLLATE operators, but X has a non-default
+      ** collating sequence.  So add the EP_Collate marker on X to cause
+      ** it to be searched first. */
+      pExpr->pLeft->flags |= EP_Collate;
+    }
+  }
+  SWAP(Expr*,pExpr->pRight,pExpr->pLeft);
+  if( pExpr->op>=TK_GT ){
+    assert( TK_LT==TK_GT+2 );
+    assert( TK_GE==TK_LE+2 );
+    assert( TK_GT>TK_EQ );
+    assert( TK_GT<TK_LE );
+    assert( pExpr->op>=TK_GT && pExpr->op<=TK_GE );
+    pExpr->op = ((pExpr->op-TK_GT)^2)+TK_GT;
+  }
+}
+
+/*
+** Translate from TK_xx operator to WO_xx bitmask.
+*/
+static u16 operatorMask(int op){
+  u16 c;
+  assert( allowedOp(op) );
+  if( op==TK_IN ){
+    c = WO_IN;
+  }else if( op==TK_ISNULL ){
+    c = WO_ISNULL;
+  }else{
+    assert( (WO_EQ<<(op-TK_EQ)) < 0x7fff );
+    c = (u16)(WO_EQ<<(op-TK_EQ));
+  }
+  assert( op!=TK_ISNULL || c==WO_ISNULL );
+  assert( op!=TK_IN || c==WO_IN );
+  assert( op!=TK_EQ || c==WO_EQ );
+  assert( op!=TK_LT || c==WO_LT );
+  assert( op!=TK_LE || c==WO_LE );
+  assert( op!=TK_GT || c==WO_GT );
+  assert( op!=TK_GE || c==WO_GE );
+  return c;
+}
+
+/*
+** Advance to the next WhereTerm that matches according to the criteria
+** established when the pScan object was initialized by whereScanInit().
+** Return NULL if there are no more matching WhereTerms.
+*/
+static WhereTerm *whereScanNext(WhereScan *pScan){
+  int iCur;            /* The cursor on the LHS of the term */
+  int iColumn;         /* The column on the LHS of the term.  -1 for IPK */
+  Expr *pX;            /* An expression being tested */
+  WhereClause *pWC;    /* Shorthand for pScan->pWC */
+  WhereTerm *pTerm;    /* The term being tested */
+  int k = pScan->k;    /* Where to start scanning */
+
+  while( pScan->iEquiv<=pScan->nEquiv ){
+    iCur = pScan->aEquiv[pScan->iEquiv-2];
+    iColumn = pScan->aEquiv[pScan->iEquiv-1];
+    while( (pWC = pScan->pWC)!=0 ){
+      for(pTerm=pWC->a+k; k<pWC->nTerm; k++, pTerm++){
+        if( pTerm->leftCursor==iCur
+         && pTerm->u.leftColumn==iColumn
+         && (pScan->iEquiv<=2 || !ExprHasProperty(pTerm->pExpr, EP_FromJoin))
+        ){
+          if( (pTerm->eOperator & WO_EQUIV)!=0
+           && pScan->nEquiv<ArraySize(pScan->aEquiv)
+          ){
+            int j;
+            pX = sqlite3ExprSkipCollate(pTerm->pExpr->pRight);
+            assert( pX->op==TK_COLUMN );
+            for(j=0; j<pScan->nEquiv; j+=2){
+              if( pScan->aEquiv[j]==pX->iTable
+               && pScan->aEquiv[j+1]==pX->iColumn ){
+                  break;
+              }
+            }
+            if( j==pScan->nEquiv ){
+              pScan->aEquiv[j] = pX->iTable;
+              pScan->aEquiv[j+1] = pX->iColumn;
+              pScan->nEquiv += 2;
+            }
+          }
+          if( (pTerm->eOperator & pScan->opMask)!=0 ){
+            /* Verify the affinity and collating sequence match */
+            if( pScan->zCollName && (pTerm->eOperator & WO_ISNULL)==0 ){
+              CollSeq *pColl;
+              Parse *pParse = pWC->pWInfo->pParse;
+              pX = pTerm->pExpr;
+              if( !sqlite3IndexAffinityOk(pX, pScan->idxaff) ){
+                continue;
+              }
+              assert(pX->pLeft);
+              pColl = sqlite3BinaryCompareCollSeq(pParse,
+                                                  pX->pLeft, pX->pRight);
+              if( pColl==0 ) pColl = pParse->db->pDfltColl;
+              if( sqlite3StrICmp(pColl->zName, pScan->zCollName) ){
+                continue;
+              }
+            }
+            if( (pTerm->eOperator & WO_EQ)!=0
+             && (pX = pTerm->pExpr->pRight)->op==TK_COLUMN
+             && pX->iTable==pScan->aEquiv[0]
+             && pX->iColumn==pScan->aEquiv[1]
+            ){
+              continue;
+            }
+            pScan->k = k+1;
+            return pTerm;
+          }
+        }
+      }
+      pScan->pWC = pScan->pWC->pOuter;
+      k = 0;
+    }
+    pScan->pWC = pScan->pOrigWC;
+    k = 0;
+    pScan->iEquiv += 2;
+  }
+  return 0;
+}
+
+/*
+** Initialize a WHERE clause scanner object.  Return a pointer to the
+** first match.  Return NULL if there are no matches.
+**
+** The scanner will be searching the WHERE clause pWC.  It will look
+** for terms of the form "X <op> <expr>" where X is column iColumn of table
+** iCur.  The <op> must be one of the operators described by opMask.
+**
+** If the search is for X and the WHERE clause contains terms of the
+** form X=Y then this routine might also return terms of the form
+** "Y <op> <expr>".  The number of levels of transitivity is limited,
+** but is enough to handle most commonly occurring SQL statements.
+**
+** If X is not the INTEGER PRIMARY KEY then X must be compatible with
+** index pIdx.
+*/
+static WhereTerm *whereScanInit(
+  WhereScan *pScan,       /* The WhereScan object being initialized */
+  WhereClause *pWC,       /* The WHERE clause to be scanned */
+  int iCur,               /* Cursor to scan for */
+  int iColumn,            /* Column to scan for */
+  u32 opMask,             /* Operator(s) to scan for */
+  Index *pIdx             /* Must be compatible with this index */
+){
+  int j;
+
+  /* memset(pScan, 0, sizeof(*pScan)); */
+  pScan->pOrigWC = pWC;
+  pScan->pWC = pWC;
+  if( pIdx && iColumn>=0 ){
+    pScan->idxaff = pIdx->pTable->aCol[iColumn].affinity;
+    for(j=0; pIdx->aiColumn[j]!=iColumn; j++){
+      if( NEVER(j>pIdx->nColumn) ) return 0;
+    }
+    pScan->zCollName = pIdx->azColl[j];
+  }else{
+    pScan->idxaff = 0;
+    pScan->zCollName = 0;
+  }
+  pScan->opMask = opMask;
+  pScan->k = 0;
+  pScan->aEquiv[0] = iCur;
+  pScan->aEquiv[1] = iColumn;
+  pScan->nEquiv = 2;
+  pScan->iEquiv = 2;
+  return whereScanNext(pScan);
+}
+
+/*
+** Search for a term in the WHERE clause that is of the form "X <op> <expr>"
+** where X is a reference to the iColumn of table iCur and <op> is one of
+** the WO_xx operator codes specified by the op parameter.
+** Return a pointer to the term.  Return 0 if not found.
+**
+** The term returned might by Y=<expr> if there is another constraint in
+** the WHERE clause that specifies that X=Y.  Any such constraints will be
+** identified by the WO_EQUIV bit in the pTerm->eOperator field.  The
+** aEquiv[] array holds X and all its equivalents, with each SQL variable
+** taking up two slots in aEquiv[].  The first slot is for the cursor number
+** and the second is for the column number.  There are 22 slots in aEquiv[]
+** so that means we can look for X plus up to 10 other equivalent values.
+** Hence a search for X will return <expr> if X=A1 and A1=A2 and A2=A3
+** and ... and A9=A10 and A10=<expr>.
+**
+** If there are multiple terms in the WHERE clause of the form "X <op> <expr>"
+** then try for the one with no dependencies on <expr> - in other words where
+** <expr> is a constant expression of some kind.  Only return entries of
+** the form "X <op> Y" where Y is a column in another table if no terms of
+** the form "X <op> <const-expr>" exist.   If no terms with a constant RHS
+** exist, try to return a term that does not use WO_EQUIV.
+*/
+static WhereTerm *findTerm(
+  WhereClause *pWC,     /* The WHERE clause to be searched */
+  int iCur,             /* Cursor number of LHS */
+  int iColumn,          /* Column number of LHS */
+  Bitmask notReady,     /* RHS must not overlap with this mask */
+  u32 op,               /* Mask of WO_xx values describing operator */
+  Index *pIdx           /* Must be compatible with this index, if not NULL */
+){
+  WhereTerm *pResult = 0;
+  WhereTerm *p;
+  WhereScan scan;
+
+  p = whereScanInit(&scan, pWC, iCur, iColumn, op, pIdx);
+  while( p ){
+    if( (p->prereqRight & notReady)==0 ){
+      if( p->prereqRight==0 && (p->eOperator&WO_EQ)!=0 ){
+        return p;
+      }
+      if( pResult==0 ) pResult = p;
+    }
+    p = whereScanNext(&scan);
+  }
+  return pResult;
+}
+
+/* Forward reference */
+static void exprAnalyze(SrcList*, WhereClause*, int);
+
+/*
+** Call exprAnalyze on all terms in a WHERE clause.  
+*/
+static void exprAnalyzeAll(
+  SrcList *pTabList,       /* the FROM clause */
+  WhereClause *pWC         /* the WHERE clause to be analyzed */
+){
+  int i;
+  for(i=pWC->nTerm-1; i>=0; i--){
+    exprAnalyze(pTabList, pWC, i);
+  }
+}
+
+#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION
+/*
+** Check to see if the given expression is a LIKE or GLOB operator that
+** can be optimized using inequality constraints.  Return TRUE if it is
+** so and false if not.
+**
+** In order for the operator to be optimizible, the RHS must be a string
+** literal that does not begin with a wildcard.  
+*/
+static int isLikeOrGlob(
+  Parse *pParse,    /* Parsing and code generating context */
+  Expr *pExpr,      /* Test this expression */
+  Expr **ppPrefix,  /* Pointer to TK_STRING expression with pattern prefix */
+  int *pisComplete, /* True if the only wildcard is % in the last character */
+  int *pnoCase      /* True if uppercase is equivalent to lowercase */
+){
+  const char *z = 0;         /* String on RHS of LIKE operator */
+  Expr *pRight, *pLeft;      /* Right and left size of LIKE operator */
+  ExprList *pList;           /* List of operands to the LIKE operator */
+  int c;                     /* One character in z[] */
+  int cnt;                   /* Number of non-wildcard prefix characters */
+  char wc[3];                /* Wildcard characters */
+  sqlite3 *db = pParse->db;  /* Database connection */
+  sqlite3_value *pVal = 0;
+  int op;                    /* Opcode of pRight */
+
+  if( !sqlite3IsLikeFunction(db, pExpr, pnoCase, wc) ){
+    return 0;
+  }
+#ifdef SQLITE_EBCDIC
+  if( *pnoCase ) return 0;
+#endif
+  pList = pExpr->x.pList;
+  pLeft = pList->a[1].pExpr;
+  if( pLeft->op!=TK_COLUMN 
+   || sqlite3ExprAffinity(pLeft)!=SQLITE_AFF_TEXT 
+   || IsVirtual(pLeft->pTab)
+  ){
+    /* IMP: R-02065-49465 The left-hand side of the LIKE or GLOB operator must
+    ** be the name of an indexed column with TEXT affinity. */
+    return 0;
+  }
+  assert( pLeft->iColumn!=(-1) ); /* Because IPK never has AFF_TEXT */
+
+  pRight = sqlite3ExprSkipCollate(pList->a[0].pExpr);
+  op = pRight->op;
+  if( op==TK_VARIABLE ){
+    Vdbe *pReprepare = pParse->pReprepare;
+    int iCol = pRight->iColumn;
+    pVal = sqlite3VdbeGetBoundValue(pReprepare, iCol, SQLITE_AFF_NONE);
+    if( pVal && sqlite3_value_type(pVal)==SQLITE_TEXT ){
+      z = (char *)sqlite3_value_text(pVal);
+    }
+    sqlite3VdbeSetVarmask(pParse->pVdbe, iCol);
+    assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER );
+  }else if( op==TK_STRING ){
+    z = pRight->u.zToken;
+  }
+  if( z ){
+    cnt = 0;
+    while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){
+      cnt++;
+    }
+    if( cnt!=0 && 255!=(u8)z[cnt-1] ){
+      Expr *pPrefix;
+      *pisComplete = c==wc[0] && z[cnt+1]==0;
+      pPrefix = sqlite3Expr(db, TK_STRING, z);
+      if( pPrefix ) pPrefix->u.zToken[cnt] = 0;
+      *ppPrefix = pPrefix;
+      if( op==TK_VARIABLE ){
+        Vdbe *v = pParse->pVdbe;
+        sqlite3VdbeSetVarmask(v, pRight->iColumn);
+        if( *pisComplete && pRight->u.zToken[1] ){
+          /* If the rhs of the LIKE expression is a variable, and the current
+          ** value of the variable means there is no need to invoke the LIKE
+          ** function, then no OP_Variable will be added to the program.
+          ** This causes problems for the sqlite3_bind_parameter_name()
+          ** API. To work around them, add a dummy OP_Variable here.
+          */ 
+          int r1 = sqlite3GetTempReg(pParse);
+          sqlite3ExprCodeTarget(pParse, pRight, r1);
+          sqlite3VdbeChangeP3(v, sqlite3VdbeCurrentAddr(v)-1, 0);
+          sqlite3ReleaseTempReg(pParse, r1);
+        }
+      }
+    }else{
+      z = 0;
+    }
+  }
+
+  sqlite3ValueFree(pVal);
+  return (z!=0);
+}
+#endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */
+
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/*
+** Check to see if the given expression is of the form
+**
+**         column MATCH expr
+**
+** If it is then return TRUE.  If not, return FALSE.
+*/
+static int isMatchOfColumn(
+  Expr *pExpr      /* Test this expression */
+){
+  ExprList *pList;
+
+  if( pExpr->op!=TK_FUNCTION ){
+    return 0;
+  }
+  if( sqlite3StrICmp(pExpr->u.zToken,"match")!=0 ){
+    return 0;
+  }
+  pList = pExpr->x.pList;
+  if( pList->nExpr!=2 ){
+    return 0;
+  }
+  if( pList->a[1].pExpr->op != TK_COLUMN ){
+    return 0;
+  }
+  return 1;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+/*
+** If the pBase expression originated in the ON or USING clause of
+** a join, then transfer the appropriate markings over to derived.
+*/
+static void transferJoinMarkings(Expr *pDerived, Expr *pBase){
+  if( pDerived ){
+    pDerived->flags |= pBase->flags & EP_FromJoin;
+    pDerived->iRightJoinTable = pBase->iRightJoinTable;
+  }
+}
+
+/*
+** Mark term iChild as being a child of term iParent
+*/
+static void markTermAsChild(WhereClause *pWC, int iChild, int iParent){
+  pWC->a[iChild].iParent = iParent;
+  pWC->a[iChild].truthProb = pWC->a[iParent].truthProb;
+  pWC->a[iParent].nChild++;
+}
+
+#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY)
+/*
+** Analyze a term that consists of two or more OR-connected
+** subterms.  So in:
+**
+**     ... WHERE  (a=5) AND (b=7 OR c=9 OR d=13) AND (d=13)
+**                          ^^^^^^^^^^^^^^^^^^^^
+**
+** This routine analyzes terms such as the middle term in the above example.
+** A WhereOrTerm object is computed and attached to the term under
+** analysis, regardless of the outcome of the analysis.  Hence:
+**
+**     WhereTerm.wtFlags   |=  TERM_ORINFO
+**     WhereTerm.u.pOrInfo  =  a dynamically allocated WhereOrTerm object
+**
+** The term being analyzed must have two or more of OR-connected subterms.
+** A single subterm might be a set of AND-connected sub-subterms.
+** Examples of terms under analysis:
+**
+**     (A)     t1.x=t2.y OR t1.x=t2.z OR t1.y=15 OR t1.z=t3.a+5
+**     (B)     x=expr1 OR expr2=x OR x=expr3
+**     (C)     t1.x=t2.y OR (t1.x=t2.z AND t1.y=15)
+**     (D)     x=expr1 OR (y>11 AND y<22 AND z LIKE '*hello*')
+**     (E)     (p.a=1 AND q.b=2 AND r.c=3) OR (p.x=4 AND q.y=5 AND r.z=6)
+**
+** CASE 1:
+**
+** If all subterms are of the form T.C=expr for some single column of C and
+** a single table T (as shown in example B above) then create a new virtual
+** term that is an equivalent IN expression.  In other words, if the term
+** being analyzed is:
+**
+**      x = expr1  OR  expr2 = x  OR  x = expr3
+**
+** then create a new virtual term like this:
+**
+**      x IN (expr1,expr2,expr3)
+**
+** CASE 2:
+**
+** If all subterms are indexable by a single table T, then set
+**
+**     WhereTerm.eOperator              =  WO_OR
+**     WhereTerm.u.pOrInfo->indexable  |=  the cursor number for table T
+**
+** A subterm is "indexable" if it is of the form
+** "T.C <op> <expr>" where C is any column of table T and 
+** <op> is one of "=", "<", "<=", ">", ">=", "IS NULL", or "IN".
+** A subterm is also indexable if it is an AND of two or more
+** subsubterms at least one of which is indexable.  Indexable AND 
+** subterms have their eOperator set to WO_AND and they have
+** u.pAndInfo set to a dynamically allocated WhereAndTerm object.
+**
+** From another point of view, "indexable" means that the subterm could
+** potentially be used with an index if an appropriate index exists.
+** This analysis does not consider whether or not the index exists; that
+** is decided elsewhere.  This analysis only looks at whether subterms
+** appropriate for indexing exist.
+**
+** All examples A through E above satisfy case 2.  But if a term
+** also satisfies case 1 (such as B) we know that the optimizer will
+** always prefer case 1, so in that case we pretend that case 2 is not
+** satisfied.
+**
+** It might be the case that multiple tables are indexable.  For example,
+** (E) above is indexable on tables P, Q, and R.
+**
+** Terms that satisfy case 2 are candidates for lookup by using
+** separate indices to find rowids for each subterm and composing
+** the union of all rowids using a RowSet object.  This is similar
+** to "bitmap indices" in other database engines.
+**
+** OTHERWISE:
+**
+** If neither case 1 nor case 2 apply, then leave the eOperator set to
+** zero.  This term is not useful for search.
+*/
+static void exprAnalyzeOrTerm(
+  SrcList *pSrc,            /* the FROM clause */
+  WhereClause *pWC,         /* the complete WHERE clause */
+  int idxTerm               /* Index of the OR-term to be analyzed */
+){
+  WhereInfo *pWInfo = pWC->pWInfo;        /* WHERE clause processing context */
+  Parse *pParse = pWInfo->pParse;         /* Parser context */
+  sqlite3 *db = pParse->db;               /* Database connection */
+  WhereTerm *pTerm = &pWC->a[idxTerm];    /* The term to be analyzed */
+  Expr *pExpr = pTerm->pExpr;             /* The expression of the term */
+  int i;                                  /* Loop counters */
+  WhereClause *pOrWc;       /* Breakup of pTerm into subterms */
+  WhereTerm *pOrTerm;       /* A Sub-term within the pOrWc */
+  WhereOrInfo *pOrInfo;     /* Additional information associated with pTerm */
+  Bitmask chngToIN;         /* Tables that might satisfy case 1 */
+  Bitmask indexable;        /* Tables that are indexable, satisfying case 2 */
+
+  /*
+  ** Break the OR clause into its separate subterms.  The subterms are
+  ** stored in a WhereClause structure containing within the WhereOrInfo
+  ** object that is attached to the original OR clause term.
+  */
+  assert( (pTerm->wtFlags & (TERM_DYNAMIC|TERM_ORINFO|TERM_ANDINFO))==0 );
+  assert( pExpr->op==TK_OR );
+  pTerm->u.pOrInfo = pOrInfo = sqlite3DbMallocZero(db, sizeof(*pOrInfo));
+  if( pOrInfo==0 ) return;
+  pTerm->wtFlags |= TERM_ORINFO;
+  pOrWc = &pOrInfo->wc;
+  whereClauseInit(pOrWc, pWInfo);
+  whereSplit(pOrWc, pExpr, TK_OR);
+  exprAnalyzeAll(pSrc, pOrWc);
+  if( db->mallocFailed ) return;
+  assert( pOrWc->nTerm>=2 );
+
+  /*
+  ** Compute the set of tables that might satisfy cases 1 or 2.
+  */
+  indexable = ~(Bitmask)0;
+  chngToIN = ~(Bitmask)0;
+  for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0 && indexable; i--, pOrTerm++){
+    if( (pOrTerm->eOperator & WO_SINGLE)==0 ){
+      WhereAndInfo *pAndInfo;
+      assert( (pOrTerm->wtFlags & (TERM_ANDINFO|TERM_ORINFO))==0 );
+      chngToIN = 0;
+      pAndInfo = sqlite3DbMallocRaw(db, sizeof(*pAndInfo));
+      if( pAndInfo ){
+        WhereClause *pAndWC;
+        WhereTerm *pAndTerm;
+        int j;
+        Bitmask b = 0;
+        pOrTerm->u.pAndInfo = pAndInfo;
+        pOrTerm->wtFlags |= TERM_ANDINFO;
+        pOrTerm->eOperator = WO_AND;
+        pAndWC = &pAndInfo->wc;
+        whereClauseInit(pAndWC, pWC->pWInfo);
+        whereSplit(pAndWC, pOrTerm->pExpr, TK_AND);
+        exprAnalyzeAll(pSrc, pAndWC);
+        pAndWC->pOuter = pWC;
+        testcase( db->mallocFailed );
+        if( !db->mallocFailed ){
+          for(j=0, pAndTerm=pAndWC->a; j<pAndWC->nTerm; j++, pAndTerm++){
+            assert( pAndTerm->pExpr );
+            if( allowedOp(pAndTerm->pExpr->op) ){
+              b |= getMask(&pWInfo->sMaskSet, pAndTerm->leftCursor);
+            }
+          }
+        }
+        indexable &= b;
+      }
+    }else if( pOrTerm->wtFlags & TERM_COPIED ){
+      /* Skip this term for now.  We revisit it when we process the
+      ** corresponding TERM_VIRTUAL term */
+    }else{
+      Bitmask b;
+      b = getMask(&pWInfo->sMaskSet, pOrTerm->leftCursor);
+      if( pOrTerm->wtFlags & TERM_VIRTUAL ){
+        WhereTerm *pOther = &pOrWc->a[pOrTerm->iParent];
+        b |= getMask(&pWInfo->sMaskSet, pOther->leftCursor);
+      }
+      indexable &= b;
+      if( (pOrTerm->eOperator & WO_EQ)==0 ){
+        chngToIN = 0;
+      }else{
+        chngToIN &= b;
+      }
+    }
+  }
+
+  /*
+  ** Record the set of tables that satisfy case 2.  The set might be
+  ** empty.
+  */
+  pOrInfo->indexable = indexable;
+  pTerm->eOperator = indexable==0 ? 0 : WO_OR;
+
+  /*
+  ** chngToIN holds a set of tables that *might* satisfy case 1.  But
+  ** we have to do some additional checking to see if case 1 really
+  ** is satisfied.
+  **
+  ** chngToIN will hold either 0, 1, or 2 bits.  The 0-bit case means
+  ** that there is no possibility of transforming the OR clause into an
+  ** IN operator because one or more terms in the OR clause contain
+  ** something other than == on a column in the single table.  The 1-bit
+  ** case means that every term of the OR clause is of the form
+  ** "table.column=expr" for some single table.  The one bit that is set
+  ** will correspond to the common table.  We still need to check to make
+  ** sure the same column is used on all terms.  The 2-bit case is when
+  ** the all terms are of the form "table1.column=table2.column".  It
+  ** might be possible to form an IN operator with either table1.column
+  ** or table2.column as the LHS if either is common to every term of
+  ** the OR clause.
+  **
+  ** Note that terms of the form "table.column1=table.column2" (the
+  ** same table on both sizes of the ==) cannot be optimized.
+  */
+  if( chngToIN ){
+    int okToChngToIN = 0;     /* True if the conversion to IN is valid */
+    int iColumn = -1;         /* Column index on lhs of IN operator */
+    int iCursor = -1;         /* Table cursor common to all terms */
+    int j = 0;                /* Loop counter */
+
+    /* Search for a table and column that appears on one side or the
+    ** other of the == operator in every subterm.  That table and column
+    ** will be recorded in iCursor and iColumn.  There might not be any
+    ** such table and column.  Set okToChngToIN if an appropriate table
+    ** and column is found but leave okToChngToIN false if not found.
+    */
+    for(j=0; j<2 && !okToChngToIN; j++){
+      pOrTerm = pOrWc->a;
+      for(i=pOrWc->nTerm-1; i>=0; i--, pOrTerm++){
+        assert( pOrTerm->eOperator & WO_EQ );
+        pOrTerm->wtFlags &= ~TERM_OR_OK;
+        if( pOrTerm->leftCursor==iCursor ){
+          /* This is the 2-bit case and we are on the second iteration and
+          ** current term is from the first iteration.  So skip this term. */
+          assert( j==1 );
+          continue;
+        }
+        if( (chngToIN & getMask(&pWInfo->sMaskSet, pOrTerm->leftCursor))==0 ){
+          /* This term must be of the form t1.a==t2.b where t2 is in the
+          ** chngToIN set but t1 is not.  This term will be either preceded
+          ** or follwed by an inverted copy (t2.b==t1.a).  Skip this term 
+          ** and use its inversion. */
+          testcase( pOrTerm->wtFlags & TERM_COPIED );
+          testcase( pOrTerm->wtFlags & TERM_VIRTUAL );
+          assert( pOrTerm->wtFlags & (TERM_COPIED|TERM_VIRTUAL) );
+          continue;
+        }
+        iColumn = pOrTerm->u.leftColumn;
+        iCursor = pOrTerm->leftCursor;
+        break;
+      }
+      if( i<0 ){
+        /* No candidate table+column was found.  This can only occur
+        ** on the second iteration */
+        assert( j==1 );
+        assert( IsPowerOfTwo(chngToIN) );
+        assert( chngToIN==getMask(&pWInfo->sMaskSet, iCursor) );
+        break;
+      }
+      testcase( j==1 );
+
+      /* We have found a candidate table and column.  Check to see if that
+      ** table and column is common to every term in the OR clause */
+      okToChngToIN = 1;
+      for(; i>=0 && okToChngToIN; i--, pOrTerm++){
+        assert( pOrTerm->eOperator & WO_EQ );
+        if( pOrTerm->leftCursor!=iCursor ){
+          pOrTerm->wtFlags &= ~TERM_OR_OK;
+        }else if( pOrTerm->u.leftColumn!=iColumn ){
+          okToChngToIN = 0;
+        }else{
+          int affLeft, affRight;
+          /* If the right-hand side is also a column, then the affinities
+          ** of both right and left sides must be such that no type
+          ** conversions are required on the right.  (Ticket #2249)
+          */
+          affRight = sqlite3ExprAffinity(pOrTerm->pExpr->pRight);
+          affLeft = sqlite3ExprAffinity(pOrTerm->pExpr->pLeft);
+          if( affRight!=0 && affRight!=affLeft ){
+            okToChngToIN = 0;
+          }else{
+            pOrTerm->wtFlags |= TERM_OR_OK;
+          }
+        }
+      }
+    }
+
+    /* At this point, okToChngToIN is true if original pTerm satisfies
+    ** case 1.  In that case, construct a new virtual term that is 
+    ** pTerm converted into an IN operator.
+    */
+    if( okToChngToIN ){
+      Expr *pDup;            /* A transient duplicate expression */
+      ExprList *pList = 0;   /* The RHS of the IN operator */
+      Expr *pLeft = 0;       /* The LHS of the IN operator */
+      Expr *pNew;            /* The complete IN operator */
+
+      for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0; i--, pOrTerm++){
+        if( (pOrTerm->wtFlags & TERM_OR_OK)==0 ) continue;
+        assert( pOrTerm->eOperator & WO_EQ );
+        assert( pOrTerm->leftCursor==iCursor );
+        assert( pOrTerm->u.leftColumn==iColumn );
+        pDup = sqlite3ExprDup(db, pOrTerm->pExpr->pRight, 0);
+        pList = sqlite3ExprListAppend(pWInfo->pParse, pList, pDup);
+        pLeft = pOrTerm->pExpr->pLeft;
+      }
+      assert( pLeft!=0 );
+      pDup = sqlite3ExprDup(db, pLeft, 0);
+      pNew = sqlite3PExpr(pParse, TK_IN, pDup, 0, 0);
+      if( pNew ){
+        int idxNew;
+        transferJoinMarkings(pNew, pExpr);
+        assert( !ExprHasProperty(pNew, EP_xIsSelect) );
+        pNew->x.pList = pList;
+        idxNew = whereClauseInsert(pWC, pNew, TERM_VIRTUAL|TERM_DYNAMIC);
+        testcase( idxNew==0 );
+        exprAnalyze(pSrc, pWC, idxNew);
+        pTerm = &pWC->a[idxTerm];
+        markTermAsChild(pWC, idxNew, idxTerm);
+      }else{
+        sqlite3ExprListDelete(db, pList);
+      }
+      pTerm->eOperator = WO_NOOP;  /* case 1 trumps case 2 */
+    }
+  }
+}
+#endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */
+
+/*
+** The input to this routine is an WhereTerm structure with only the
+** "pExpr" field filled in.  The job of this routine is to analyze the
+** subexpression and populate all the other fields of the WhereTerm
+** structure.
+**
+** If the expression is of the form "<expr> <op> X" it gets commuted
+** to the standard form of "X <op> <expr>".
+**
+** If the expression is of the form "X <op> Y" where both X and Y are
+** columns, then the original expression is unchanged and a new virtual
+** term of the form "Y <op> X" is added to the WHERE clause and
+** analyzed separately.  The original term is marked with TERM_COPIED
+** and the new term is marked with TERM_DYNAMIC (because it's pExpr
+** needs to be freed with the WhereClause) and TERM_VIRTUAL (because it
+** is a commuted copy of a prior term.)  The original term has nChild=1
+** and the copy has idxParent set to the index of the original term.
+*/
+static void exprAnalyze(
+  SrcList *pSrc,            /* the FROM clause */
+  WhereClause *pWC,         /* the WHERE clause */
+  int idxTerm               /* Index of the term to be analyzed */
+){
+  WhereInfo *pWInfo = pWC->pWInfo; /* WHERE clause processing context */
+  WhereTerm *pTerm;                /* The term to be analyzed */
+  WhereMaskSet *pMaskSet;          /* Set of table index masks */
+  Expr *pExpr;                     /* The expression to be analyzed */
+  Bitmask prereqLeft;              /* Prerequesites of the pExpr->pLeft */
+  Bitmask prereqAll;               /* Prerequesites of pExpr */
+  Bitmask extraRight = 0;          /* Extra dependencies on LEFT JOIN */
+  Expr *pStr1 = 0;                 /* RHS of LIKE/GLOB operator */
+  int isComplete = 0;              /* RHS of LIKE/GLOB ends with wildcard */
+  int noCase = 0;                  /* LIKE/GLOB distinguishes case */
+  int op;                          /* Top-level operator.  pExpr->op */
+  Parse *pParse = pWInfo->pParse;  /* Parsing context */
+  sqlite3 *db = pParse->db;        /* Database connection */
+
+  if( db->mallocFailed ){
+    return;
+  }
+  pTerm = &pWC->a[idxTerm];
+  pMaskSet = &pWInfo->sMaskSet;
+  pExpr = pTerm->pExpr;
+  assert( pExpr->op!=TK_AS && pExpr->op!=TK_COLLATE );
+  prereqLeft = exprTableUsage(pMaskSet, pExpr->pLeft);
+  op = pExpr->op;
+  if( op==TK_IN ){
+    assert( pExpr->pRight==0 );
+    if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+      pTerm->prereqRight = exprSelectTableUsage(pMaskSet, pExpr->x.pSelect);
+    }else{
+      pTerm->prereqRight = exprListTableUsage(pMaskSet, pExpr->x.pList);
+    }
+  }else if( op==TK_ISNULL ){
+    pTerm->prereqRight = 0;
+  }else{
+    pTerm->prereqRight = exprTableUsage(pMaskSet, pExpr->pRight);
+  }
+  prereqAll = exprTableUsage(pMaskSet, pExpr);
+  if( ExprHasProperty(pExpr, EP_FromJoin) ){
+    Bitmask x = getMask(pMaskSet, pExpr->iRightJoinTable);
+    prereqAll |= x;
+    extraRight = x-1;  /* ON clause terms may not be used with an index
+                       ** on left table of a LEFT JOIN.  Ticket #3015 */
+  }
+  pTerm->prereqAll = prereqAll;
+  pTerm->leftCursor = -1;
+  pTerm->iParent = -1;
+  pTerm->eOperator = 0;
+  if( allowedOp(op) ){
+    Expr *pLeft = sqlite3ExprSkipCollate(pExpr->pLeft);
+    Expr *pRight = sqlite3ExprSkipCollate(pExpr->pRight);
+    u16 opMask = (pTerm->prereqRight & prereqLeft)==0 ? WO_ALL : WO_EQUIV;
+    if( pLeft->op==TK_COLUMN ){
+      pTerm->leftCursor = pLeft->iTable;
+      pTerm->u.leftColumn = pLeft->iColumn;
+      pTerm->eOperator = operatorMask(op) & opMask;
+    }
+    if( pRight && pRight->op==TK_COLUMN ){
+      WhereTerm *pNew;
+      Expr *pDup;
+      u16 eExtraOp = 0;        /* Extra bits for pNew->eOperator */
+      if( pTerm->leftCursor>=0 ){
+        int idxNew;
+        pDup = sqlite3ExprDup(db, pExpr, 0);
+        if( db->mallocFailed ){
+          sqlite3ExprDelete(db, pDup);
+          return;
+        }
+        idxNew = whereClauseInsert(pWC, pDup, TERM_VIRTUAL|TERM_DYNAMIC);
+        if( idxNew==0 ) return;
+        pNew = &pWC->a[idxNew];
+        markTermAsChild(pWC, idxNew, idxTerm);
+        pTerm = &pWC->a[idxTerm];
+        pTerm->wtFlags |= TERM_COPIED;
+        if( pExpr->op==TK_EQ
+         && !ExprHasProperty(pExpr, EP_FromJoin)
+         && OptimizationEnabled(db, SQLITE_Transitive)
+        ){
+          pTerm->eOperator |= WO_EQUIV;
+          eExtraOp = WO_EQUIV;
+        }
+      }else{
+        pDup = pExpr;
+        pNew = pTerm;
+      }
+      exprCommute(pParse, pDup);
+      pLeft = sqlite3ExprSkipCollate(pDup->pLeft);
+      pNew->leftCursor = pLeft->iTable;
+      pNew->u.leftColumn = pLeft->iColumn;
+      testcase( (prereqLeft | extraRight) != prereqLeft );
+      pNew->prereqRight = prereqLeft | extraRight;
+      pNew->prereqAll = prereqAll;
+      pNew->eOperator = (operatorMask(pDup->op) + eExtraOp) & opMask;
+    }
+  }
+
+#ifndef SQLITE_OMIT_BETWEEN_OPTIMIZATION
+  /* If a term is the BETWEEN operator, create two new virtual terms
+  ** that define the range that the BETWEEN implements.  For example:
+  **
+  **      a BETWEEN b AND c
+  **
+  ** is converted into:
+  **
+  **      (a BETWEEN b AND c) AND (a>=b) AND (a<=c)
+  **
+  ** The two new terms are added onto the end of the WhereClause object.
+  ** The new terms are "dynamic" and are children of the original BETWEEN
+  ** term.  That means that if the BETWEEN term is coded, the children are
+  ** skipped.  Or, if the children are satisfied by an index, the original
+  ** BETWEEN term is skipped.
+  */
+  else if( pExpr->op==TK_BETWEEN && pWC->op==TK_AND ){
+    ExprList *pList = pExpr->x.pList;
+    int i;
+    static const u8 ops[] = {TK_GE, TK_LE};
+    assert( pList!=0 );
+    assert( pList->nExpr==2 );
+    for(i=0; i<2; i++){
+      Expr *pNewExpr;
+      int idxNew;
+      pNewExpr = sqlite3PExpr(pParse, ops[i], 
+                             sqlite3ExprDup(db, pExpr->pLeft, 0),
+                             sqlite3ExprDup(db, pList->a[i].pExpr, 0), 0);
+      transferJoinMarkings(pNewExpr, pExpr);
+      idxNew = whereClauseInsert(pWC, pNewExpr, TERM_VIRTUAL|TERM_DYNAMIC);
+      testcase( idxNew==0 );
+      exprAnalyze(pSrc, pWC, idxNew);
+      pTerm = &pWC->a[idxTerm];
+      markTermAsChild(pWC, idxNew, idxTerm);
+    }
+  }
+#endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */
+
+#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY)
+  /* Analyze a term that is composed of two or more subterms connected by
+  ** an OR operator.
+  */
+  else if( pExpr->op==TK_OR ){
+    assert( pWC->op==TK_AND );
+    exprAnalyzeOrTerm(pSrc, pWC, idxTerm);
+    pTerm = &pWC->a[idxTerm];
+  }
+#endif /* SQLITE_OMIT_OR_OPTIMIZATION */
+
+#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION
+  /* Add constraints to reduce the search space on a LIKE or GLOB
+  ** operator.
+  **
+  ** A like pattern of the form "x LIKE 'abc%'" is changed into constraints
+  **
+  **          x>='abc' AND x<'abd' AND x LIKE 'abc%'
+  **
+  ** The last character of the prefix "abc" is incremented to form the
+  ** termination condition "abd".
+  */
+  if( pWC->op==TK_AND 
+   && isLikeOrGlob(pParse, pExpr, &pStr1, &isComplete, &noCase)
+  ){
+    Expr *pLeft;       /* LHS of LIKE/GLOB operator */
+    Expr *pStr2;       /* Copy of pStr1 - RHS of LIKE/GLOB operator */
+    Expr *pNewExpr1;
+    Expr *pNewExpr2;
+    int idxNew1;
+    int idxNew2;
+    Token sCollSeqName;  /* Name of collating sequence */
+
+    pLeft = pExpr->x.pList->a[1].pExpr;
+    pStr2 = sqlite3ExprDup(db, pStr1, 0);
+    if( !db->mallocFailed ){
+      u8 c, *pC;       /* Last character before the first wildcard */
+      pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1];
+      c = *pC;
+      if( noCase ){
+        /* The point is to increment the last character before the first
+        ** wildcard.  But if we increment '@', that will push it into the
+        ** alphabetic range where case conversions will mess up the 
+        ** inequality.  To avoid this, make sure to also run the full
+        ** LIKE on all candidate expressions by clearing the isComplete flag
+        */
+        if( c=='A'-1 ) isComplete = 0;
+        c = sqlite3UpperToLower[c];
+      }
+      *pC = c + 1;
+    }
+    sCollSeqName.z = noCase ? "NOCASE" : "BINARY";
+    sCollSeqName.n = 6;
+    pNewExpr1 = sqlite3ExprDup(db, pLeft, 0);
+    pNewExpr1 = sqlite3PExpr(pParse, TK_GE, 
+           sqlite3ExprAddCollateToken(pParse,pNewExpr1,&sCollSeqName),
+           pStr1, 0);
+    transferJoinMarkings(pNewExpr1, pExpr);
+    idxNew1 = whereClauseInsert(pWC, pNewExpr1, TERM_VIRTUAL|TERM_DYNAMIC);
+    testcase( idxNew1==0 );
+    exprAnalyze(pSrc, pWC, idxNew1);
+    pNewExpr2 = sqlite3ExprDup(db, pLeft, 0);
+    pNewExpr2 = sqlite3PExpr(pParse, TK_LT,
+           sqlite3ExprAddCollateToken(pParse,pNewExpr2,&sCollSeqName),
+           pStr2, 0);
+    transferJoinMarkings(pNewExpr2, pExpr);
+    idxNew2 = whereClauseInsert(pWC, pNewExpr2, TERM_VIRTUAL|TERM_DYNAMIC);
+    testcase( idxNew2==0 );
+    exprAnalyze(pSrc, pWC, idxNew2);
+    pTerm = &pWC->a[idxTerm];
+    if( isComplete ){
+      markTermAsChild(pWC, idxNew1, idxTerm);
+      markTermAsChild(pWC, idxNew2, idxTerm);
+    }
+  }
+#endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  /* Add a WO_MATCH auxiliary term to the constraint set if the
+  ** current expression is of the form:  column MATCH expr.
+  ** This information is used by the xBestIndex methods of
+  ** virtual tables.  The native query optimizer does not attempt
+  ** to do anything with MATCH functions.
+  */
+  if( isMatchOfColumn(pExpr) ){
+    int idxNew;
+    Expr *pRight, *pLeft;
+    WhereTerm *pNewTerm;
+    Bitmask prereqColumn, prereqExpr;
+
+    pRight = pExpr->x.pList->a[0].pExpr;
+    pLeft = pExpr->x.pList->a[1].pExpr;
+    prereqExpr = exprTableUsage(pMaskSet, pRight);
+    prereqColumn = exprTableUsage(pMaskSet, pLeft);
+    if( (prereqExpr & prereqColumn)==0 ){
+      Expr *pNewExpr;
+      pNewExpr = sqlite3PExpr(pParse, TK_MATCH, 
+                              0, sqlite3ExprDup(db, pRight, 0), 0);
+      idxNew = whereClauseInsert(pWC, pNewExpr, TERM_VIRTUAL|TERM_DYNAMIC);
+      testcase( idxNew==0 );
+      pNewTerm = &pWC->a[idxNew];
+      pNewTerm->prereqRight = prereqExpr;
+      pNewTerm->leftCursor = pLeft->iTable;
+      pNewTerm->u.leftColumn = pLeft->iColumn;
+      pNewTerm->eOperator = WO_MATCH;
+      markTermAsChild(pWC, idxNew, idxTerm);
+      pTerm = &pWC->a[idxTerm];
+      pTerm->wtFlags |= TERM_COPIED;
+      pNewTerm->prereqAll = pTerm->prereqAll;
+    }
+  }
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  /* When sqlite_stat3 histogram data is available an operator of the
+  ** form "x IS NOT NULL" can sometimes be evaluated more efficiently
+  ** as "x>NULL" if x is not an INTEGER PRIMARY KEY.  So construct a
+  ** virtual term of that form.
+  **
+  ** Note that the virtual term must be tagged with TERM_VNULL.  This
+  ** TERM_VNULL tag will suppress the not-null check at the beginning
+  ** of the loop.  Without the TERM_VNULL flag, the not-null check at
+  ** the start of the loop will prevent any results from being returned.
+  */
+  if( pExpr->op==TK_NOTNULL
+   && pExpr->pLeft->op==TK_COLUMN
+   && pExpr->pLeft->iColumn>=0
+   && OptimizationEnabled(db, SQLITE_Stat34)
+  ){
+    Expr *pNewExpr;
+    Expr *pLeft = pExpr->pLeft;
+    int idxNew;
+    WhereTerm *pNewTerm;
+
+    pNewExpr = sqlite3PExpr(pParse, TK_GT,
+                            sqlite3ExprDup(db, pLeft, 0),
+                            sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0);
+
+    idxNew = whereClauseInsert(pWC, pNewExpr,
+                              TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL);
+    if( idxNew ){
+      pNewTerm = &pWC->a[idxNew];
+      pNewTerm->prereqRight = 0;
+      pNewTerm->leftCursor = pLeft->iTable;
+      pNewTerm->u.leftColumn = pLeft->iColumn;
+      pNewTerm->eOperator = WO_GT;
+      markTermAsChild(pWC, idxNew, idxTerm);
+      pTerm = &pWC->a[idxTerm];
+      pTerm->wtFlags |= TERM_COPIED;
+      pNewTerm->prereqAll = pTerm->prereqAll;
+    }
+  }
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+
+  /* Prevent ON clause terms of a LEFT JOIN from being used to drive
+  ** an index for tables to the left of the join.
+  */
+  pTerm->prereqRight |= extraRight;
+}
+
+/*
+** This function searches pList for an entry that matches the iCol-th column
+** of index pIdx.
+**
+** If such an expression is found, its index in pList->a[] is returned. If
+** no expression is found, -1 is returned.
+*/
+static int findIndexCol(
+  Parse *pParse,                  /* Parse context */
+  ExprList *pList,                /* Expression list to search */
+  int iBase,                      /* Cursor for table associated with pIdx */
+  Index *pIdx,                    /* Index to match column of */
+  int iCol                        /* Column of index to match */
+){
+  int i;
+  const char *zColl = pIdx->azColl[iCol];
+
+  for(i=0; i<pList->nExpr; i++){
+    Expr *p = sqlite3ExprSkipCollate(pList->a[i].pExpr);
+    if( p->op==TK_COLUMN
+     && p->iColumn==pIdx->aiColumn[iCol]
+     && p->iTable==iBase
+    ){
+      CollSeq *pColl = sqlite3ExprCollSeq(pParse, pList->a[i].pExpr);
+      if( ALWAYS(pColl) && 0==sqlite3StrICmp(pColl->zName, zColl) ){
+        return i;
+      }
+    }
+  }
+
+  return -1;
+}
+
+/*
+** Return true if the DISTINCT expression-list passed as the third argument
+** is redundant.
+**
+** A DISTINCT list is redundant if the database contains some subset of
+** columns that are unique and non-null.
+*/
+static int isDistinctRedundant(
+  Parse *pParse,            /* Parsing context */
+  SrcList *pTabList,        /* The FROM clause */
+  WhereClause *pWC,         /* The WHERE clause */
+  ExprList *pDistinct       /* The result set that needs to be DISTINCT */
+){
+  Table *pTab;
+  Index *pIdx;
+  int i;                          
+  int iBase;
+
+  /* If there is more than one table or sub-select in the FROM clause of
+  ** this query, then it will not be possible to show that the DISTINCT 
+  ** clause is redundant. */
+  if( pTabList->nSrc!=1 ) return 0;
+  iBase = pTabList->a[0].iCursor;
+  pTab = pTabList->a[0].pTab;
+
+  /* If any of the expressions is an IPK column on table iBase, then return 
+  ** true. Note: The (p->iTable==iBase) part of this test may be false if the
+  ** current SELECT is a correlated sub-query.
+  */
+  for(i=0; i<pDistinct->nExpr; i++){
+    Expr *p = sqlite3ExprSkipCollate(pDistinct->a[i].pExpr);
+    if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1;
+  }
+
+  /* Loop through all indices on the table, checking each to see if it makes
+  ** the DISTINCT qualifier redundant. It does so if:
+  **
+  **   1. The index is itself UNIQUE, and
+  **
+  **   2. All of the columns in the index are either part of the pDistinct
+  **      list, or else the WHERE clause contains a term of the form "col=X",
+  **      where X is a constant value. The collation sequences of the
+  **      comparison and select-list expressions must match those of the index.
+  **
+  **   3. All of those index columns for which the WHERE clause does not
+  **      contain a "col=X" term are subject to a NOT NULL constraint.
+  */
+  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+    if( !IsUniqueIndex(pIdx) ) continue;
+    for(i=0; i<pIdx->nKeyCol; i++){
+      i16 iCol = pIdx->aiColumn[i];
+      if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){
+        int iIdxCol = findIndexCol(pParse, pDistinct, iBase, pIdx, i);
+        if( iIdxCol<0 || pTab->aCol[iCol].notNull==0 ){
+          break;
+        }
+      }
+    }
+    if( i==pIdx->nKeyCol ){
+      /* This index implies that the DISTINCT qualifier is redundant. */
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+
+/*
+** Estimate the logarithm of the input value to base 2.
+*/
+static LogEst estLog(LogEst N){
+  return N<=10 ? 0 : sqlite3LogEst(N) - 33;
+}
+
+/*
+** Two routines for printing the content of an sqlite3_index_info
+** structure.  Used for testing and debugging only.  If neither
+** SQLITE_TEST or SQLITE_DEBUG are defined, then these routines
+** are no-ops.
+*/
+#if !defined(SQLITE_OMIT_VIRTUALTABLE) && defined(WHERETRACE_ENABLED)
+static void TRACE_IDX_INPUTS(sqlite3_index_info *p){
+  int i;
+  if( !sqlite3WhereTrace ) return;
+  for(i=0; i<p->nConstraint; i++){
+    sqlite3DebugPrintf("  constraint[%d]: col=%d termid=%d op=%d usabled=%d\n",
+       i,
+       p->aConstraint[i].iColumn,
+       p->aConstraint[i].iTermOffset,
+       p->aConstraint[i].op,
+       p->aConstraint[i].usable);
+  }
+  for(i=0; i<p->nOrderBy; i++){
+    sqlite3DebugPrintf("  orderby[%d]: col=%d desc=%d\n",
+       i,
+       p->aOrderBy[i].iColumn,
+       p->aOrderBy[i].desc);
+  }
+}
+static void TRACE_IDX_OUTPUTS(sqlite3_index_info *p){
+  int i;
+  if( !sqlite3WhereTrace ) return;
+  for(i=0; i<p->nConstraint; i++){
+    sqlite3DebugPrintf("  usage[%d]: argvIdx=%d omit=%d\n",
+       i,
+       p->aConstraintUsage[i].argvIndex,
+       p->aConstraintUsage[i].omit);
+  }
+  sqlite3DebugPrintf("  idxNum=%d\n", p->idxNum);
+  sqlite3DebugPrintf("  idxStr=%s\n", p->idxStr);
+  sqlite3DebugPrintf("  orderByConsumed=%d\n", p->orderByConsumed);
+  sqlite3DebugPrintf("  estimatedCost=%g\n", p->estimatedCost);
+  sqlite3DebugPrintf("  estimatedRows=%lld\n", p->estimatedRows);
+}
+#else
+#define TRACE_IDX_INPUTS(A)
+#define TRACE_IDX_OUTPUTS(A)
+#endif
+
+#ifndef SQLITE_OMIT_AUTOMATIC_INDEX
+/*
+** Return TRUE if the WHERE clause term pTerm is of a form where it
+** could be used with an index to access pSrc, assuming an appropriate
+** index existed.
+*/
+static int termCanDriveIndex(
+  WhereTerm *pTerm,              /* WHERE clause term to check */
+  struct SrcList_item *pSrc,     /* Table we are trying to access */
+  Bitmask notReady               /* Tables in outer loops of the join */
+){
+  char aff;
+  if( pTerm->leftCursor!=pSrc->iCursor ) return 0;
+  if( (pTerm->eOperator & WO_EQ)==0 ) return 0;
+  if( (pTerm->prereqRight & notReady)!=0 ) return 0;
+  if( pTerm->u.leftColumn<0 ) return 0;
+  aff = pSrc->pTab->aCol[pTerm->u.leftColumn].affinity;
+  if( !sqlite3IndexAffinityOk(pTerm->pExpr, aff) ) return 0;
+  return 1;
+}
+#endif
+
+
+#ifndef SQLITE_OMIT_AUTOMATIC_INDEX
+/*
+** Generate code to construct the Index object for an automatic index
+** and to set up the WhereLevel object pLevel so that the code generator
+** makes use of the automatic index.
+*/
+static void constructAutomaticIndex(
+  Parse *pParse,              /* The parsing context */
+  WhereClause *pWC,           /* The WHERE clause */
+  struct SrcList_item *pSrc,  /* The FROM clause term to get the next index */
+  Bitmask notReady,           /* Mask of cursors that are not available */
+  WhereLevel *pLevel          /* Write new index here */
+){
+  int nKeyCol;                /* Number of columns in the constructed index */
+  WhereTerm *pTerm;           /* A single term of the WHERE clause */
+  WhereTerm *pWCEnd;          /* End of pWC->a[] */
+  Index *pIdx;                /* Object describing the transient index */
+  Vdbe *v;                    /* Prepared statement under construction */
+  int addrInit;               /* Address of the initialization bypass jump */
+  Table *pTable;              /* The table being indexed */
+  int addrTop;                /* Top of the index fill loop */
+  int regRecord;              /* Register holding an index record */
+  int n;                      /* Column counter */
+  int i;                      /* Loop counter */
+  int mxBitCol;               /* Maximum column in pSrc->colUsed */
+  CollSeq *pColl;             /* Collating sequence to on a column */
+  WhereLoop *pLoop;           /* The Loop object */
+  char *zNotUsed;             /* Extra space on the end of pIdx */
+  Bitmask idxCols;            /* Bitmap of columns used for indexing */
+  Bitmask extraCols;          /* Bitmap of additional columns */
+  u8 sentWarning = 0;         /* True if a warnning has been issued */
+  Expr *pPartial = 0;         /* Partial Index Expression */
+  int iContinue = 0;          /* Jump here to skip excluded rows */
+
+  /* Generate code to skip over the creation and initialization of the
+  ** transient index on 2nd and subsequent iterations of the loop. */
+  v = pParse->pVdbe;
+  assert( v!=0 );
+  addrInit = sqlite3CodeOnce(pParse); VdbeCoverage(v);
+
+  /* Count the number of columns that will be added to the index
+  ** and used to match WHERE clause constraints */
+  nKeyCol = 0;
+  pTable = pSrc->pTab;
+  pWCEnd = &pWC->a[pWC->nTerm];
+  pLoop = pLevel->pWLoop;
+  idxCols = 0;
+  for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){
+    if( pLoop->prereq==0
+     && (pTerm->wtFlags & TERM_VIRTUAL)==0
+     && sqlite3ExprIsTableConstant(pTerm->pExpr, pSrc->iCursor) ){
+      pPartial = sqlite3ExprAnd(pParse->db, pPartial,
+                                sqlite3ExprDup(pParse->db, pTerm->pExpr, 0));
+    }
+    if( termCanDriveIndex(pTerm, pSrc, notReady) ){
+      int iCol = pTerm->u.leftColumn;
+      Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol);
+      testcase( iCol==BMS );
+      testcase( iCol==BMS-1 );
+      if( !sentWarning ){
+        sqlite3_log(SQLITE_WARNING_AUTOINDEX,
+            "automatic index on %s(%s)", pTable->zName,
+            pTable->aCol[iCol].zName);
+        sentWarning = 1;
+      }
+      if( (idxCols & cMask)==0 ){
+        if( whereLoopResize(pParse->db, pLoop, nKeyCol+1) ){
+          goto end_auto_index_create;
+        }
+        pLoop->aLTerm[nKeyCol++] = pTerm;
+        idxCols |= cMask;
+      }
+    }
+  }
+  assert( nKeyCol>0 );
+  pLoop->u.btree.nEq = pLoop->nLTerm = nKeyCol;
+  pLoop->wsFlags = WHERE_COLUMN_EQ | WHERE_IDX_ONLY | WHERE_INDEXED
+                     | WHERE_AUTO_INDEX;
+
+  /* Count the number of additional columns needed to create a
+  ** covering index.  A "covering index" is an index that contains all
+  ** columns that are needed by the query.  With a covering index, the
+  ** original table never needs to be accessed.  Automatic indices must
+  ** be a covering index because the index will not be updated if the
+  ** original table changes and the index and table cannot both be used
+  ** if they go out of sync.
+  */
+  extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1));
+  mxBitCol = MIN(BMS-1,pTable->nCol);
+  testcase( pTable->nCol==BMS-1 );
+  testcase( pTable->nCol==BMS-2 );
+  for(i=0; i<mxBitCol; i++){
+    if( extraCols & MASKBIT(i) ) nKeyCol++;
+  }
+  if( pSrc->colUsed & MASKBIT(BMS-1) ){
+    nKeyCol += pTable->nCol - BMS + 1;
+  }
+
+  /* Construct the Index object to describe this index */
+  pIdx = sqlite3AllocateIndexObject(pParse->db, nKeyCol+1, 0, &zNotUsed);
+  if( pIdx==0 ) goto end_auto_index_create;
+  pLoop->u.btree.pIndex = pIdx;
+  pIdx->zName = "auto-index";
+  pIdx->pTable = pTable;
+  n = 0;
+  idxCols = 0;
+  for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){
+    if( termCanDriveIndex(pTerm, pSrc, notReady) ){
+      int iCol = pTerm->u.leftColumn;
+      Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol);
+      testcase( iCol==BMS-1 );
+      testcase( iCol==BMS );
+      if( (idxCols & cMask)==0 ){
+        Expr *pX = pTerm->pExpr;
+        idxCols |= cMask;
+        pIdx->aiColumn[n] = pTerm->u.leftColumn;
+        pColl = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight);
+        pIdx->azColl[n] = ALWAYS(pColl) ? pColl->zName : "BINARY";
+        n++;
+      }
+    }
+  }
+  assert( (u32)n==pLoop->u.btree.nEq );
+
+  /* Add additional columns needed to make the automatic index into
+  ** a covering index */
+  for(i=0; i<mxBitCol; i++){
+    if( extraCols & MASKBIT(i) ){
+      pIdx->aiColumn[n] = i;
+      pIdx->azColl[n] = "BINARY";
+      n++;
+    }
+  }
+  if( pSrc->colUsed & MASKBIT(BMS-1) ){
+    for(i=BMS-1; i<pTable->nCol; i++){
+      pIdx->aiColumn[n] = i;
+      pIdx->azColl[n] = "BINARY";
+      n++;
+    }
+  }
+  assert( n==nKeyCol );
+  pIdx->aiColumn[n] = -1;
+  pIdx->azColl[n] = "BINARY";
+
+  /* Create the automatic index */
+  assert( pLevel->iIdxCur>=0 );
+  pLevel->iIdxCur = pParse->nTab++;
+  sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1);
+  sqlite3VdbeSetP4KeyInfo(pParse, pIdx);
+  VdbeComment((v, "for %s", pTable->zName));
+
+  /* Fill the automatic index with content */
+  sqlite3ExprCachePush(pParse);
+  addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); VdbeCoverage(v);
+  if( pPartial ){
+    iContinue = sqlite3VdbeMakeLabel(v);
+    sqlite3ExprIfFalse(pParse, pPartial, iContinue, SQLITE_JUMPIFNULL);
+    pLoop->wsFlags |= WHERE_PARTIALIDX;
+  }
+  regRecord = sqlite3GetTempReg(pParse);
+  sqlite3GenerateIndexKey(pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0);
+  sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord);
+  sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
+  if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue);
+  sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); VdbeCoverage(v);
+  sqlite3VdbeChangeP5(v, SQLITE_STMTSTATUS_AUTOINDEX);
+  sqlite3VdbeJumpHere(v, addrTop);
+  sqlite3ReleaseTempReg(pParse, regRecord);
+  sqlite3ExprCachePop(pParse);
+  
+  /* Jump here when skipping the initialization */
+  sqlite3VdbeJumpHere(v, addrInit);
+
+end_auto_index_create:
+  sqlite3ExprDelete(pParse->db, pPartial);
+}
+#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/*
+** Allocate and populate an sqlite3_index_info structure. It is the 
+** responsibility of the caller to eventually release the structure
+** by passing the pointer returned by this function to sqlite3_free().
+*/
+static sqlite3_index_info *allocateIndexInfo(
+  Parse *pParse,
+  WhereClause *pWC,
+  struct SrcList_item *pSrc,
+  ExprList *pOrderBy
+){
+  int i, j;
+  int nTerm;
+  struct sqlite3_index_constraint *pIdxCons;
+  struct sqlite3_index_orderby *pIdxOrderBy;
+  struct sqlite3_index_constraint_usage *pUsage;
+  WhereTerm *pTerm;
+  int nOrderBy;
+  sqlite3_index_info *pIdxInfo;
+
+  /* Count the number of possible WHERE clause constraints referring
+  ** to this virtual table */
+  for(i=nTerm=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){
+    if( pTerm->leftCursor != pSrc->iCursor ) continue;
+    assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) );
+    testcase( pTerm->eOperator & WO_IN );
+    testcase( pTerm->eOperator & WO_ISNULL );
+    testcase( pTerm->eOperator & WO_ALL );
+    if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV))==0 ) continue;
+    if( pTerm->wtFlags & TERM_VNULL ) continue;
+    nTerm++;
+  }
+
+  /* If the ORDER BY clause contains only columns in the current 
+  ** virtual table then allocate space for the aOrderBy part of
+  ** the sqlite3_index_info structure.
+  */
+  nOrderBy = 0;
+  if( pOrderBy ){
+    int n = pOrderBy->nExpr;
+    for(i=0; i<n; i++){
+      Expr *pExpr = pOrderBy->a[i].pExpr;
+      if( pExpr->op!=TK_COLUMN || pExpr->iTable!=pSrc->iCursor ) break;
+    }
+    if( i==n){
+      nOrderBy = n;
+    }
+  }
+
+  /* Allocate the sqlite3_index_info structure
+  */
+  pIdxInfo = sqlite3DbMallocZero(pParse->db, sizeof(*pIdxInfo)
+                           + (sizeof(*pIdxCons) + sizeof(*pUsage))*nTerm
+                           + sizeof(*pIdxOrderBy)*nOrderBy );
+  if( pIdxInfo==0 ){
+    sqlite3ErrorMsg(pParse, "out of memory");
+    return 0;
+  }
+
+  /* Initialize the structure.  The sqlite3_index_info structure contains
+  ** many fields that are declared "const" to prevent xBestIndex from
+  ** changing them.  We have to do some funky casting in order to
+  ** initialize those fields.
+  */
+  pIdxCons = (struct sqlite3_index_constraint*)&pIdxInfo[1];
+  pIdxOrderBy = (struct sqlite3_index_orderby*)&pIdxCons[nTerm];
+  pUsage = (struct sqlite3_index_constraint_usage*)&pIdxOrderBy[nOrderBy];
+  *(int*)&pIdxInfo->nConstraint = nTerm;
+  *(int*)&pIdxInfo->nOrderBy = nOrderBy;
+  *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint = pIdxCons;
+  *(struct sqlite3_index_orderby**)&pIdxInfo->aOrderBy = pIdxOrderBy;
+  *(struct sqlite3_index_constraint_usage**)&pIdxInfo->aConstraintUsage =
+                                                                   pUsage;
+
+  for(i=j=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){
+    u8 op;
+    if( pTerm->leftCursor != pSrc->iCursor ) continue;
+    assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) );
+    testcase( pTerm->eOperator & WO_IN );
+    testcase( pTerm->eOperator & WO_ISNULL );
+    testcase( pTerm->eOperator & WO_ALL );
+    if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV))==0 ) continue;
+    if( pTerm->wtFlags & TERM_VNULL ) continue;
+    pIdxCons[j].iColumn = pTerm->u.leftColumn;
+    pIdxCons[j].iTermOffset = i;
+    op = (u8)pTerm->eOperator & WO_ALL;
+    if( op==WO_IN ) op = WO_EQ;
+    pIdxCons[j].op = op;
+    /* The direct assignment in the previous line is possible only because
+    ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical.  The
+    ** following asserts verify this fact. */
+    assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ );
+    assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT );
+    assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE );
+    assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT );
+    assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE );
+    assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH );
+    assert( pTerm->eOperator & (WO_IN|WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) );
+    j++;
+  }
+  for(i=0; i<nOrderBy; i++){
+    Expr *pExpr = pOrderBy->a[i].pExpr;
+    pIdxOrderBy[i].iColumn = pExpr->iColumn;
+    pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder;
+  }
+
+  return pIdxInfo;
+}
+
+/*
+** The table object reference passed as the second argument to this function
+** must represent a virtual table. This function invokes the xBestIndex()
+** method of the virtual table with the sqlite3_index_info object that
+** comes in as the 3rd argument to this function.
+**
+** If an error occurs, pParse is populated with an error message and a
+** non-zero value is returned. Otherwise, 0 is returned and the output
+** part of the sqlite3_index_info structure is left populated.
+**
+** Whether or not an error is returned, it is the responsibility of the
+** caller to eventually free p->idxStr if p->needToFreeIdxStr indicates
+** that this is required.
+*/
+static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){
+  sqlite3_vtab *pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab;
+  int i;
+  int rc;
+
+  TRACE_IDX_INPUTS(p);
+  rc = pVtab->pModule->xBestIndex(pVtab, p);
+  TRACE_IDX_OUTPUTS(p);
+
+  if( rc!=SQLITE_OK ){
+    if( rc==SQLITE_NOMEM ){
+      pParse->db->mallocFailed = 1;
+    }else if( !pVtab->zErrMsg ){
+      sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc));
+    }else{
+      sqlite3ErrorMsg(pParse, "%s", pVtab->zErrMsg);
+    }
+  }
+  sqlite3_free(pVtab->zErrMsg);
+  pVtab->zErrMsg = 0;
+
+  for(i=0; i<p->nConstraint; i++){
+    if( !p->aConstraint[i].usable && p->aConstraintUsage[i].argvIndex>0 ){
+      sqlite3ErrorMsg(pParse, 
+          "table %s: xBestIndex returned an invalid plan", pTab->zName);
+    }
+  }
+
+  return pParse->nErr;
+}
+#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+/*
+** Estimate the location of a particular key among all keys in an
+** index.  Store the results in aStat as follows:
+**
+**    aStat[0]      Est. number of rows less than pVal
+**    aStat[1]      Est. number of rows equal to pVal
+**
+** Return the index of the sample that is the smallest sample that
+** is greater than or equal to pRec.
+*/
+static int whereKeyStats(
+  Parse *pParse,              /* Database connection */
+  Index *pIdx,                /* Index to consider domain of */
+  UnpackedRecord *pRec,       /* Vector of values to consider */
+  int roundUp,                /* Round up if true.  Round down if false */
+  tRowcnt *aStat              /* OUT: stats written here */
+){
+  IndexSample *aSample = pIdx->aSample;
+  int iCol;                   /* Index of required stats in anEq[] etc. */
+  int iMin = 0;               /* Smallest sample not yet tested */
+  int i = pIdx->nSample;      /* Smallest sample larger than or equal to pRec */
+  int iTest;                  /* Next sample to test */
+  int res;                    /* Result of comparison operation */
+
+#ifndef SQLITE_DEBUG
+  UNUSED_PARAMETER( pParse );
+#endif
+  assert( pRec!=0 );
+  iCol = pRec->nField - 1;
+  assert( pIdx->nSample>0 );
+  assert( pRec->nField>0 && iCol<pIdx->nSampleCol );
+  do{
+    iTest = (iMin+i)/2;
+    res = sqlite3VdbeRecordCompare(aSample[iTest].n, aSample[iTest].p, pRec);
+    if( res<0 ){
+      iMin = iTest+1;
+    }else{
+      i = iTest;
+    }
+  }while( res && iMin<i );
+
+#ifdef SQLITE_DEBUG
+  /* The following assert statements check that the binary search code
+  ** above found the right answer. This block serves no purpose other
+  ** than to invoke the asserts.  */
+  if( res==0 ){
+    /* If (res==0) is true, then sample $i must be equal to pRec */
+    assert( i<pIdx->nSample );
+    assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)
+         || pParse->db->mallocFailed );
+  }else{
+    /* Otherwise, pRec must be smaller than sample $i and larger than
+    ** sample ($i-1).  */
+    assert( i==pIdx->nSample 
+         || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0
+         || pParse->db->mallocFailed );
+    assert( i==0
+         || sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0
+         || pParse->db->mallocFailed );
+  }
+#endif /* ifdef SQLITE_DEBUG */
+
+  /* At this point, aSample[i] is the first sample that is greater than
+  ** or equal to pVal.  Or if i==pIdx->nSample, then all samples are less
+  ** than pVal.  If aSample[i]==pVal, then res==0.
+  */
+  if( res==0 ){
+    aStat[0] = aSample[i].anLt[iCol];
+    aStat[1] = aSample[i].anEq[iCol];
+  }else{
+    tRowcnt iLower, iUpper, iGap;
+    if( i==0 ){
+      iLower = 0;
+      iUpper = aSample[0].anLt[iCol];
+    }else{
+      i64 nRow0 = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]);
+      iUpper = i>=pIdx->nSample ? nRow0 : aSample[i].anLt[iCol];
+      iLower = aSample[i-1].anEq[iCol] + aSample[i-1].anLt[iCol];
+    }
+    aStat[1] = pIdx->aAvgEq[iCol];
+    if( iLower>=iUpper ){
+      iGap = 0;
+    }else{
+      iGap = iUpper - iLower;
+    }
+    if( roundUp ){
+      iGap = (iGap*2)/3;
+    }else{
+      iGap = iGap/3;
+    }
+    aStat[0] = iLower + iGap;
+  }
+  return i;
+}
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+
+/*
+** If it is not NULL, pTerm is a term that provides an upper or lower
+** bound on a range scan. Without considering pTerm, it is estimated 
+** that the scan will visit nNew rows. This function returns the number
+** estimated to be visited after taking pTerm into account.
+**
+** If the user explicitly specified a likelihood() value for this term,
+** then the return value is the likelihood multiplied by the number of
+** input rows. Otherwise, this function assumes that an "IS NOT NULL" term
+** has a likelihood of 0.50, and any other term a likelihood of 0.25.
+*/
+static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){
+  LogEst nRet = nNew;
+  if( pTerm ){
+    if( pTerm->truthProb<=0 ){
+      nRet += pTerm->truthProb;
+    }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){
+      nRet -= 20;        assert( 20==sqlite3LogEst(4) );
+    }
+  }
+  return nRet;
+}
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+/* 
+** This function is called to estimate the number of rows visited by a
+** range-scan on a skip-scan index. For example:
+**
+**   CREATE INDEX i1 ON t1(a, b, c);
+**   SELECT * FROM t1 WHERE a=? AND c BETWEEN ? AND ?;
+**
+** Value pLoop->nOut is currently set to the estimated number of rows 
+** visited for scanning (a=? AND b=?). This function reduces that estimate 
+** by some factor to account for the (c BETWEEN ? AND ?) expression based
+** on the stat4 data for the index. this scan will be peformed multiple 
+** times (once for each (a,b) combination that matches a=?) is dealt with 
+** by the caller.
+**
+** It does this by scanning through all stat4 samples, comparing values
+** extracted from pLower and pUpper with the corresponding column in each
+** sample. If L and U are the number of samples found to be less than or
+** equal to the values extracted from pLower and pUpper respectively, and
+** N is the total number of samples, the pLoop->nOut value is adjusted
+** as follows:
+**
+**   nOut = nOut * ( min(U - L, 1) / N )
+**
+** If pLower is NULL, or a value cannot be extracted from the term, L is
+** set to zero. If pUpper is NULL, or a value cannot be extracted from it,
+** U is set to N.
+**
+** Normally, this function sets *pbDone to 1 before returning. However,
+** if no value can be extracted from either pLower or pUpper (and so the
+** estimate of the number of rows delivered remains unchanged), *pbDone
+** is left as is.
+**
+** If an error occurs, an SQLite error code is returned. Otherwise, 
+** SQLITE_OK.
+*/
+static int whereRangeSkipScanEst(
+  Parse *pParse,       /* Parsing & code generating context */
+  WhereTerm *pLower,   /* Lower bound on the range. ex: "x>123" Might be NULL */
+  WhereTerm *pUpper,   /* Upper bound on the range. ex: "x<455" Might be NULL */
+  WhereLoop *pLoop,    /* Update the .nOut value of this loop */
+  int *pbDone          /* Set to true if at least one expr. value extracted */
+){
+  Index *p = pLoop->u.btree.pIndex;
+  int nEq = pLoop->u.btree.nEq;
+  sqlite3 *db = pParse->db;
+  int nLower = -1;
+  int nUpper = p->nSample+1;
+  int rc = SQLITE_OK;
+  int iCol = p->aiColumn[nEq];
+  u8 aff = iCol>=0 ? p->pTable->aCol[iCol].affinity : SQLITE_AFF_INTEGER;
+  CollSeq *pColl;
+  
+  sqlite3_value *p1 = 0;          /* Value extracted from pLower */
+  sqlite3_value *p2 = 0;          /* Value extracted from pUpper */
+  sqlite3_value *pVal = 0;        /* Value extracted from record */
+
+  pColl = sqlite3LocateCollSeq(pParse, p->azColl[nEq]);
+  if( pLower ){
+    rc = sqlite3Stat4ValueFromExpr(pParse, pLower->pExpr->pRight, aff, &p1);
+    nLower = 0;
+  }
+  if( pUpper && rc==SQLITE_OK ){
+    rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2);
+    nUpper = p2 ? 0 : p->nSample;
+  }
+
+  if( p1 || p2 ){
+    int i;
+    int nDiff;
+    for(i=0; rc==SQLITE_OK && i<p->nSample; i++){
+      rc = sqlite3Stat4Column(db, p->aSample[i].p, p->aSample[i].n, nEq, &pVal);
+      if( rc==SQLITE_OK && p1 ){
+        int res = sqlite3MemCompare(p1, pVal, pColl);
+        if( res>=0 ) nLower++;
+      }
+      if( rc==SQLITE_OK && p2 ){
+        int res = sqlite3MemCompare(p2, pVal, pColl);
+        if( res>=0 ) nUpper++;
+      }
+    }
+    nDiff = (nUpper - nLower);
+    if( nDiff<=0 ) nDiff = 1;
+
+    /* If there is both an upper and lower bound specified, and the 
+    ** comparisons indicate that they are close together, use the fallback
+    ** method (assume that the scan visits 1/64 of the rows) for estimating
+    ** the number of rows visited. Otherwise, estimate the number of rows
+    ** using the method described in the header comment for this function. */
+    if( nDiff!=1 || pUpper==0 || pLower==0 ){
+      int nAdjust = (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff));
+      pLoop->nOut -= nAdjust;
+      *pbDone = 1;
+      WHERETRACE(0x10, ("range skip-scan regions: %u..%u  adjust=%d est=%d\n",
+                           nLower, nUpper, nAdjust*-1, pLoop->nOut));
+    }
+
+  }else{
+    assert( *pbDone==0 );
+  }
+
+  sqlite3ValueFree(p1);
+  sqlite3ValueFree(p2);
+  sqlite3ValueFree(pVal);
+
+  return rc;
+}
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+
+/*
+** This function is used to estimate the number of rows that will be visited
+** by scanning an index for a range of values. The range may have an upper
+** bound, a lower bound, or both. The WHERE clause terms that set the upper
+** and lower bounds are represented by pLower and pUpper respectively. For
+** example, assuming that index p is on t1(a):
+**
+**   ... FROM t1 WHERE a > ? AND a < ? ...
+**                    |_____|   |_____|
+**                       |         |
+**                     pLower    pUpper
+**
+** If either of the upper or lower bound is not present, then NULL is passed in
+** place of the corresponding WhereTerm.
+**
+** The value in (pBuilder->pNew->u.btree.nEq) is the number of the index
+** column subject to the range constraint. Or, equivalently, the number of
+** equality constraints optimized by the proposed index scan. For example,
+** assuming index p is on t1(a, b), and the SQL query is:
+**
+**   ... FROM t1 WHERE a = ? AND b > ? AND b < ? ...
+**
+** then nEq is set to 1 (as the range restricted column, b, is the second 
+** left-most column of the index). Or, if the query is:
+**
+**   ... FROM t1 WHERE a > ? AND a < ? ...
+**
+** then nEq is set to 0.
+**
+** When this function is called, *pnOut is set to the sqlite3LogEst() of the
+** number of rows that the index scan is expected to visit without 
+** considering the range constraints. If nEq is 0, then *pnOut is the number of 
+** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced)
+** to account for the range constraints pLower and pUpper.
+** 
+** In the absence of sqlite_stat4 ANALYZE data, or if such data cannot be
+** used, a single range inequality reduces the search space by a factor of 4. 
+** and a pair of constraints (x>? AND x<?) reduces the expected number of
+** rows visited by a factor of 64.
+*/
+static int whereRangeScanEst(
+  Parse *pParse,       /* Parsing & code generating context */
+  WhereLoopBuilder *pBuilder,
+  WhereTerm *pLower,   /* Lower bound on the range. ex: "x>123" Might be NULL */
+  WhereTerm *pUpper,   /* Upper bound on the range. ex: "x<455" Might be NULL */
+  WhereLoop *pLoop     /* Modify the .nOut and maybe .rRun fields */
+){
+  int rc = SQLITE_OK;
+  int nOut = pLoop->nOut;
+  LogEst nNew;
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+  Index *p = pLoop->u.btree.pIndex;
+  int nEq = pLoop->u.btree.nEq;
+
+  if( p->nSample>0 && nEq<p->nSampleCol ){
+    if( nEq==pBuilder->nRecValid ){
+      UnpackedRecord *pRec = pBuilder->pRec;
+      tRowcnt a[2];
+      u8 aff;
+
+      /* Variable iLower will be set to the estimate of the number of rows in 
+      ** the index that are less than the lower bound of the range query. The
+      ** lower bound being the concatenation of $P and $L, where $P is the
+      ** key-prefix formed by the nEq values matched against the nEq left-most
+      ** columns of the index, and $L is the value in pLower.
+      **
+      ** Or, if pLower is NULL or $L cannot be extracted from it (because it
+      ** is not a simple variable or literal value), the lower bound of the
+      ** range is $P. Due to a quirk in the way whereKeyStats() works, even
+      ** if $L is available, whereKeyStats() is called for both ($P) and 
+      ** ($P:$L) and the larger of the two returned values is used.
+      **
+      ** Similarly, iUpper is to be set to the estimate of the number of rows
+      ** less than the upper bound of the range query. Where the upper bound
+      ** is either ($P) or ($P:$U). Again, even if $U is available, both values
+      ** of iUpper are requested of whereKeyStats() and the smaller used.
+      **
+      ** The number of rows between the two bounds is then just iUpper-iLower.
+      */
+      tRowcnt iLower;     /* Rows less than the lower bound */
+      tRowcnt iUpper;     /* Rows less than the upper bound */
+      int iLwrIdx = -2;   /* aSample[] for the lower bound */
+      int iUprIdx = -1;   /* aSample[] for the upper bound */
+
+      if( pRec ){
+        testcase( pRec->nField!=pBuilder->nRecValid );
+        pRec->nField = pBuilder->nRecValid;
+      }
+      if( nEq==p->nKeyCol ){
+        aff = SQLITE_AFF_INTEGER;
+      }else{
+        aff = p->pTable->aCol[p->aiColumn[nEq]].affinity;
+      }
+      /* Determine iLower and iUpper using ($P) only. */
+      if( nEq==0 ){
+        iLower = 0;
+        iUpper = p->nRowEst0;
+      }else{
+        /* Note: this call could be optimized away - since the same values must 
+        ** have been requested when testing key $P in whereEqualScanEst().  */
+        whereKeyStats(pParse, p, pRec, 0, a);
+        iLower = a[0];
+        iUpper = a[0] + a[1];
+      }
+
+      assert( pLower==0 || (pLower->eOperator & (WO_GT|WO_GE))!=0 );
+      assert( pUpper==0 || (pUpper->eOperator & (WO_LT|WO_LE))!=0 );
+      assert( p->aSortOrder!=0 );
+      if( p->aSortOrder[nEq] ){
+        /* The roles of pLower and pUpper are swapped for a DESC index */
+        SWAP(WhereTerm*, pLower, pUpper);
+      }
+
+      /* If possible, improve on the iLower estimate using ($P:$L). */
+      if( pLower ){
+        int bOk;                    /* True if value is extracted from pExpr */
+        Expr *pExpr = pLower->pExpr->pRight;
+        rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk);
+        if( rc==SQLITE_OK && bOk ){
+          tRowcnt iNew;
+          iLwrIdx = whereKeyStats(pParse, p, pRec, 0, a);
+          iNew = a[0] + ((pLower->eOperator & (WO_GT|WO_LE)) ? a[1] : 0);
+          if( iNew>iLower ) iLower = iNew;
+          nOut--;
+          pLower = 0;
+        }
+      }
+
+      /* If possible, improve on the iUpper estimate using ($P:$U). */
+      if( pUpper ){
+        int bOk;                    /* True if value is extracted from pExpr */
+        Expr *pExpr = pUpper->pExpr->pRight;
+        rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk);
+        if( rc==SQLITE_OK && bOk ){
+          tRowcnt iNew;
+          iUprIdx = whereKeyStats(pParse, p, pRec, 1, a);
+          iNew = a[0] + ((pUpper->eOperator & (WO_GT|WO_LE)) ? a[1] : 0);
+          if( iNew<iUpper ) iUpper = iNew;
+          nOut--;
+          pUpper = 0;
+        }
+      }
+
+      pBuilder->pRec = pRec;
+      if( rc==SQLITE_OK ){
+        if( iUpper>iLower ){
+          nNew = sqlite3LogEst(iUpper - iLower);
+          /* TUNING:  If both iUpper and iLower are derived from the same
+          ** sample, then assume they are 4x more selective.  This brings
+          ** the estimated selectivity more in line with what it would be
+          ** if estimated without the use of STAT3/4 tables. */
+          if( iLwrIdx==iUprIdx ) nNew -= 20;  assert( 20==sqlite3LogEst(4) );
+        }else{
+          nNew = 10;        assert( 10==sqlite3LogEst(2) );
+        }
+        if( nNew<nOut ){
+          nOut = nNew;
+        }
+        WHERETRACE(0x10, ("STAT4 range scan: %u..%u  est=%d\n",
+                           (u32)iLower, (u32)iUpper, nOut));
+      }
+    }else{
+      int bDone = 0;
+      rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone);
+      if( bDone ) return rc;
+    }
+  }
+#else
+  UNUSED_PARAMETER(pParse);
+  UNUSED_PARAMETER(pBuilder);
+  assert( pLower || pUpper );
+#endif
+  assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 );
+  nNew = whereRangeAdjust(pLower, nOut);
+  nNew = whereRangeAdjust(pUpper, nNew);
+
+  /* TUNING: If there is both an upper and lower limit and neither limit
+  ** has an application-defined likelihood(), assume the range is
+  ** reduced by an additional 75%. This means that, by default, an open-ended
+  ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the
+  ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to
+  ** match 1/64 of the index. */ 
+  if( pLower && pLower->truthProb>0 && pUpper && pUpper->truthProb>0 ){
+    nNew -= 20;
+  }
+
+  nOut -= (pLower!=0) + (pUpper!=0);
+  if( nNew<10 ) nNew = 10;
+  if( nNew<nOut ) nOut = nNew;
+#if defined(WHERETRACE_ENABLED)
+  if( pLoop->nOut>nOut ){
+    WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n",
+                    pLoop->nOut, nOut));
+  }
+#endif
+  pLoop->nOut = (LogEst)nOut;
+  return rc;
+}
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+/*
+** Estimate the number of rows that will be returned based on
+** an equality constraint x=VALUE and where that VALUE occurs in
+** the histogram data.  This only works when x is the left-most
+** column of an index and sqlite_stat3 histogram data is available
+** for that index.  When pExpr==NULL that means the constraint is
+** "x IS NULL" instead of "x=VALUE".
+**
+** Write the estimated row count into *pnRow and return SQLITE_OK. 
+** If unable to make an estimate, leave *pnRow unchanged and return
+** non-zero.
+**
+** This routine can fail if it is unable to load a collating sequence
+** required for string comparison, or if unable to allocate memory
+** for a UTF conversion required for comparison.  The error is stored
+** in the pParse structure.
+*/
+static int whereEqualScanEst(
+  Parse *pParse,       /* Parsing & code generating context */
+  WhereLoopBuilder *pBuilder,
+  Expr *pExpr,         /* Expression for VALUE in the x=VALUE constraint */
+  tRowcnt *pnRow       /* Write the revised row estimate here */
+){
+  Index *p = pBuilder->pNew->u.btree.pIndex;
+  int nEq = pBuilder->pNew->u.btree.nEq;
+  UnpackedRecord *pRec = pBuilder->pRec;
+  u8 aff;                   /* Column affinity */
+  int rc;                   /* Subfunction return code */
+  tRowcnt a[2];             /* Statistics */
+  int bOk;
+
+  assert( nEq>=1 );
+  assert( nEq<=p->nColumn );
+  assert( p->aSample!=0 );
+  assert( p->nSample>0 );
+  assert( pBuilder->nRecValid<nEq );
+
+  /* If values are not available for all fields of the index to the left
+  ** of this one, no estimate can be made. Return SQLITE_NOTFOUND. */
+  if( pBuilder->nRecValid<(nEq-1) ){
+    return SQLITE_NOTFOUND;
+  }
+
+  /* This is an optimization only. The call to sqlite3Stat4ProbeSetValue()
+  ** below would return the same value.  */
+  if( nEq>=p->nColumn ){
+    *pnRow = 1;
+    return SQLITE_OK;
+  }
+
+  aff = p->pTable->aCol[p->aiColumn[nEq-1]].affinity;
+  rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq-1, &bOk);
+  pBuilder->pRec = pRec;
+  if( rc!=SQLITE_OK ) return rc;
+  if( bOk==0 ) return SQLITE_NOTFOUND;
+  pBuilder->nRecValid = nEq;
+
+  whereKeyStats(pParse, p, pRec, 0, a);
+  WHERETRACE(0x10,("equality scan regions: %d\n", (int)a[1]));
+  *pnRow = a[1];
+  
+  return rc;
+}
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+/*
+** Estimate the number of rows that will be returned based on
+** an IN constraint where the right-hand side of the IN operator
+** is a list of values.  Example:
+**
+**        WHERE x IN (1,2,3,4)
+**
+** Write the estimated row count into *pnRow and return SQLITE_OK. 
+** If unable to make an estimate, leave *pnRow unchanged and return
+** non-zero.
+**
+** This routine can fail if it is unable to load a collating sequence
+** required for string comparison, or if unable to allocate memory
+** for a UTF conversion required for comparison.  The error is stored
+** in the pParse structure.
+*/
+static int whereInScanEst(
+  Parse *pParse,       /* Parsing & code generating context */
+  WhereLoopBuilder *pBuilder,
+  ExprList *pList,     /* The value list on the RHS of "x IN (v1,v2,v3,...)" */
+  tRowcnt *pnRow       /* Write the revised row estimate here */
+){
+  Index *p = pBuilder->pNew->u.btree.pIndex;
+  i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]);
+  int nRecValid = pBuilder->nRecValid;
+  int rc = SQLITE_OK;     /* Subfunction return code */
+  tRowcnt nEst;           /* Number of rows for a single term */
+  tRowcnt nRowEst = 0;    /* New estimate of the number of rows */
+  int i;                  /* Loop counter */
+
+  assert( p->aSample!=0 );
+  for(i=0; rc==SQLITE_OK && i<pList->nExpr; i++){
+    nEst = nRow0;
+    rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst);
+    nRowEst += nEst;
+    pBuilder->nRecValid = nRecValid;
+  }
+
+  if( rc==SQLITE_OK ){
+    if( nRowEst > nRow0 ) nRowEst = nRow0;
+    *pnRow = nRowEst;
+    WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst));
+  }
+  assert( pBuilder->nRecValid==nRecValid );
+  return rc;
+}
+#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
+
+/*
+** Disable a term in the WHERE clause.  Except, do not disable the term
+** if it controls a LEFT OUTER JOIN and it did not originate in the ON
+** or USING clause of that join.
+**
+** Consider the term t2.z='ok' in the following queries:
+**
+**   (1)  SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok'
+**   (2)  SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok'
+**   (3)  SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok'
+**
+** The t2.z='ok' is disabled in the in (2) because it originates
+** in the ON clause.  The term is disabled in (3) because it is not part
+** of a LEFT OUTER JOIN.  In (1), the term is not disabled.
+**
+** Disabling a term causes that term to not be tested in the inner loop
+** of the join.  Disabling is an optimization.  When terms are satisfied
+** by indices, we disable them to prevent redundant tests in the inner
+** loop.  We would get the correct results if nothing were ever disabled,
+** but joins might run a little slower.  The trick is to disable as much
+** as we can without disabling too much.  If we disabled in (1), we'd get
+** the wrong answer.  See ticket #813.
+*/
+static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){
+  if( pTerm
+      && (pTerm->wtFlags & TERM_CODED)==0
+      && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin))
+      && (pLevel->notReady & pTerm->prereqAll)==0
+  ){
+    pTerm->wtFlags |= TERM_CODED;
+    if( pTerm->iParent>=0 ){
+      WhereTerm *pOther = &pTerm->pWC->a[pTerm->iParent];
+      if( (--pOther->nChild)==0 ){
+        disableTerm(pLevel, pOther);
+      }
+    }
+  }
+}
+
+/*
+** Code an OP_Affinity opcode to apply the column affinity string zAff
+** to the n registers starting at base. 
+**
+** As an optimization, SQLITE_AFF_NONE entries (which are no-ops) at the
+** beginning and end of zAff are ignored.  If all entries in zAff are
+** SQLITE_AFF_NONE, then no code gets generated.
+**
+** This routine makes its own copy of zAff so that the caller is free
+** to modify zAff after this routine returns.
+*/
+static void codeApplyAffinity(Parse *pParse, int base, int n, char *zAff){
+  Vdbe *v = pParse->pVdbe;
+  if( zAff==0 ){
+    assert( pParse->db->mallocFailed );
+    return;
+  }
+  assert( v!=0 );
+
+  /* Adjust base and n to skip over SQLITE_AFF_NONE entries at the beginning
+  ** and end of the affinity string.
+  */
+  while( n>0 && zAff[0]==SQLITE_AFF_NONE ){
+    n--;
+    base++;
+    zAff++;
+  }
+  while( n>1 && zAff[n-1]==SQLITE_AFF_NONE ){
+    n--;
+  }
+
+  /* Code the OP_Affinity opcode if there is anything left to do. */
+  if( n>0 ){
+    sqlite3VdbeAddOp2(v, OP_Affinity, base, n);
+    sqlite3VdbeChangeP4(v, -1, zAff, n);
+    sqlite3ExprCacheAffinityChange(pParse, base, n);
+  }
+}
+
+
+/*
+** Generate code for a single equality term of the WHERE clause.  An equality
+** term can be either X=expr or X IN (...).   pTerm is the term to be 
+** coded.
+**
+** The current value for the constraint is left in register iReg.
+**
+** For a constraint of the form X=expr, the expression is evaluated and its
+** result is left on the stack.  For constraints of the form X IN (...)
+** this routine sets up a loop that will iterate over all values of X.
+*/
+static int codeEqualityTerm(
+  Parse *pParse,      /* The parsing context */
+  WhereTerm *pTerm,   /* The term of the WHERE clause to be coded */
+  WhereLevel *pLevel, /* The level of the FROM clause we are working on */
+  int iEq,            /* Index of the equality term within this level */
+  int bRev,           /* True for reverse-order IN operations */
+  int iTarget         /* Attempt to leave results in this register */
+){
+  Expr *pX = pTerm->pExpr;
+  Vdbe *v = pParse->pVdbe;
+  int iReg;                  /* Register holding results */
+
+  assert( iTarget>0 );
+  if( pX->op==TK_EQ ){
+    iReg = sqlite3ExprCodeTarget(pParse, pX->pRight, iTarget);
+  }else if( pX->op==TK_ISNULL ){
+    iReg = iTarget;
+    sqlite3VdbeAddOp2(v, OP_Null, 0, iReg);
+#ifndef SQLITE_OMIT_SUBQUERY
+  }else{
+    int eType;
+    int iTab;
+    struct InLoop *pIn;
+    WhereLoop *pLoop = pLevel->pWLoop;
+
+    if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0
+      && pLoop->u.btree.pIndex!=0
+      && pLoop->u.btree.pIndex->aSortOrder[iEq]
+    ){
+      testcase( iEq==0 );
+      testcase( bRev );
+      bRev = !bRev;
+    }
+    assert( pX->op==TK_IN );
+    iReg = iTarget;
+    eType = sqlite3FindInIndex(pParse, pX, IN_INDEX_LOOP, 0);
+    if( eType==IN_INDEX_INDEX_DESC ){
+      testcase( bRev );
+      bRev = !bRev;
+    }
+    iTab = pX->iTable;
+    sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iTab, 0);
+    VdbeCoverageIf(v, bRev);
+    VdbeCoverageIf(v, !bRev);
+    assert( (pLoop->wsFlags & WHERE_MULTI_OR)==0 );
+    pLoop->wsFlags |= WHERE_IN_ABLE;
+    if( pLevel->u.in.nIn==0 ){
+      pLevel->addrNxt = sqlite3VdbeMakeLabel(v);
+    }
+    pLevel->u.in.nIn++;
+    pLevel->u.in.aInLoop =
+       sqlite3DbReallocOrFree(pParse->db, pLevel->u.in.aInLoop,
+                              sizeof(pLevel->u.in.aInLoop[0])*pLevel->u.in.nIn);
+    pIn = pLevel->u.in.aInLoop;
+    if( pIn ){
+      pIn += pLevel->u.in.nIn - 1;
+      pIn->iCur = iTab;
+      if( eType==IN_INDEX_ROWID ){
+        pIn->addrInTop = sqlite3VdbeAddOp2(v, OP_Rowid, iTab, iReg);
+      }else{
+        pIn->addrInTop = sqlite3VdbeAddOp3(v, OP_Column, iTab, 0, iReg);
+      }
+      pIn->eEndLoopOp = bRev ? OP_PrevIfOpen : OP_NextIfOpen;
+      sqlite3VdbeAddOp1(v, OP_IsNull, iReg); VdbeCoverage(v);
+    }else{
+      pLevel->u.in.nIn = 0;
+    }
+#endif
+  }
+  disableTerm(pLevel, pTerm);
+  return iReg;
+}
+
+/*
+** Generate code that will evaluate all == and IN constraints for an
+** index scan.
+**
+** For example, consider table t1(a,b,c,d,e,f) with index i1(a,b,c).
+** Suppose the WHERE clause is this:  a==5 AND b IN (1,2,3) AND c>5 AND c<10
+** The index has as many as three equality constraints, but in this
+** example, the third "c" value is an inequality.  So only two 
+** constraints are coded.  This routine will generate code to evaluate
+** a==5 and b IN (1,2,3).  The current values for a and b will be stored
+** in consecutive registers and the index of the first register is returned.
+**
+** In the example above nEq==2.  But this subroutine works for any value
+** of nEq including 0.  If nEq==0, this routine is nearly a no-op.
+** The only thing it does is allocate the pLevel->iMem memory cell and
+** compute the affinity string.
+**
+** The nExtraReg parameter is 0 or 1.  It is 0 if all WHERE clause constraints
+** are == or IN and are covered by the nEq.  nExtraReg is 1 if there is
+** an inequality constraint (such as the "c>=5 AND c<10" in the example) that
+** occurs after the nEq quality constraints.
+**
+** This routine allocates a range of nEq+nExtraReg memory cells and returns
+** the index of the first memory cell in that range. The code that
+** calls this routine will use that memory range to store keys for
+** start and termination conditions of the loop.
+** key value of the loop.  If one or more IN operators appear, then
+** this routine allocates an additional nEq memory cells for internal
+** use.
+**
+** Before returning, *pzAff is set to point to a buffer containing a
+** copy of the column affinity string of the index allocated using
+** sqlite3DbMalloc(). Except, entries in the copy of the string associated
+** with equality constraints that use NONE affinity are set to
+** SQLITE_AFF_NONE. This is to deal with SQL such as the following:
+**
+**   CREATE TABLE t1(a TEXT PRIMARY KEY, b);
+**   SELECT ... FROM t1 AS t2, t1 WHERE t1.a = t2.b;
+**
+** In the example above, the index on t1(a) has TEXT affinity. But since
+** the right hand side of the equality constraint (t2.b) has NONE affinity,
+** no conversion should be attempted before using a t2.b value as part of
+** a key to search the index. Hence the first byte in the returned affinity
+** string in this example would be set to SQLITE_AFF_NONE.
+*/
+static int codeAllEqualityTerms(
+  Parse *pParse,        /* Parsing context */
+  WhereLevel *pLevel,   /* Which nested loop of the FROM we are coding */
+  int bRev,             /* Reverse the order of IN operators */
+  int nExtraReg,        /* Number of extra registers to allocate */
+  char **pzAff          /* OUT: Set to point to affinity string */
+){
+  u16 nEq;                      /* The number of == or IN constraints to code */
+  u16 nSkip;                    /* Number of left-most columns to skip */
+  Vdbe *v = pParse->pVdbe;      /* The vm under construction */
+  Index *pIdx;                  /* The index being used for this loop */
+  WhereTerm *pTerm;             /* A single constraint term */
+  WhereLoop *pLoop;             /* The WhereLoop object */
+  int j;                        /* Loop counter */
+  int regBase;                  /* Base register */
+  int nReg;                     /* Number of registers to allocate */
+  char *zAff;                   /* Affinity string to return */
+
+  /* This module is only called on query plans that use an index. */
+  pLoop = pLevel->pWLoop;
+  assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 );
+  nEq = pLoop->u.btree.nEq;
+  nSkip = pLoop->nSkip;
+  pIdx = pLoop->u.btree.pIndex;
+  assert( pIdx!=0 );
+
+  /* Figure out how many memory cells we will need then allocate them.
+  */
+  regBase = pParse->nMem + 1;
+  nReg = pLoop->u.btree.nEq + nExtraReg;
+  pParse->nMem += nReg;
+
+  zAff = sqlite3DbStrDup(pParse->db, sqlite3IndexAffinityStr(v, pIdx));
+  if( !zAff ){
+    pParse->db->mallocFailed = 1;
+  }
+
+  if( nSkip ){
+    int iIdxCur = pLevel->iIdxCur;
+    sqlite3VdbeAddOp1(v, (bRev?OP_Last:OP_Rewind), iIdxCur);
+    VdbeCoverageIf(v, bRev==0);
+    VdbeCoverageIf(v, bRev!=0);
+    VdbeComment((v, "begin skip-scan on %s", pIdx->zName));
+    j = sqlite3VdbeAddOp0(v, OP_Goto);
+    pLevel->addrSkip = sqlite3VdbeAddOp4Int(v, (bRev?OP_SeekLT:OP_SeekGT),
+                            iIdxCur, 0, regBase, nSkip);
+    VdbeCoverageIf(v, bRev==0);
+    VdbeCoverageIf(v, bRev!=0);
+    sqlite3VdbeJumpHere(v, j);
+    for(j=0; j<nSkip; j++){
+      sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, j, regBase+j);
+      assert( pIdx->aiColumn[j]>=0 );
+      VdbeComment((v, "%s", pIdx->pTable->aCol[pIdx->aiColumn[j]].zName));
+    }
+  }    
+
+  /* Evaluate the equality constraints
+  */
+  assert( zAff==0 || (int)strlen(zAff)>=nEq );
+  for(j=nSkip; j<nEq; j++){
+    int r1;
+    pTerm = pLoop->aLTerm[j];
+    assert( pTerm!=0 );
+    /* The following testcase is true for indices with redundant columns. 
+    ** Ex: CREATE INDEX i1 ON t1(a,b,a); SELECT * FROM t1 WHERE a=0 AND b=0; */
+    testcase( (pTerm->wtFlags & TERM_CODED)!=0 );
+    testcase( pTerm->wtFlags & TERM_VIRTUAL );
+    r1 = codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, regBase+j);
+    if( r1!=regBase+j ){
+      if( nReg==1 ){
+        sqlite3ReleaseTempReg(pParse, regBase);
+        regBase = r1;
+      }else{
+        sqlite3VdbeAddOp2(v, OP_SCopy, r1, regBase+j);
+      }
+    }
+    testcase( pTerm->eOperator & WO_ISNULL );
+    testcase( pTerm->eOperator & WO_IN );
+    if( (pTerm->eOperator & (WO_ISNULL|WO_IN))==0 ){
+      Expr *pRight = pTerm->pExpr->pRight;
+      if( sqlite3ExprCanBeNull(pRight) ){
+        sqlite3VdbeAddOp2(v, OP_IsNull, regBase+j, pLevel->addrBrk);
+        VdbeCoverage(v);
+      }
+      if( zAff ){
+        if( sqlite3CompareAffinity(pRight, zAff[j])==SQLITE_AFF_NONE ){
+          zAff[j] = SQLITE_AFF_NONE;
+        }
+        if( sqlite3ExprNeedsNoAffinityChange(pRight, zAff[j]) ){
+          zAff[j] = SQLITE_AFF_NONE;
+        }
+      }
+    }
+  }
+  *pzAff = zAff;
+  return regBase;
+}
+
+#ifndef SQLITE_OMIT_EXPLAIN
+/*
+** This routine is a helper for explainIndexRange() below
+**
+** pStr holds the text of an expression that we are building up one term
+** at a time.  This routine adds a new term to the end of the expression.
+** Terms are separated by AND so add the "AND" text for second and subsequent
+** terms only.
+*/
+static void explainAppendTerm(
+  StrAccum *pStr,             /* The text expression being built */
+  int iTerm,                  /* Index of this term.  First is zero */
+  const char *zColumn,        /* Name of the column */
+  const char *zOp             /* Name of the operator */
+){
+  if( iTerm ) sqlite3StrAccumAppend(pStr, " AND ", 5);
+  sqlite3StrAccumAppendAll(pStr, zColumn);
+  sqlite3StrAccumAppend(pStr, zOp, 1);
+  sqlite3StrAccumAppend(pStr, "?", 1);
+}
+
+/*
+** Argument pLevel describes a strategy for scanning table pTab. This 
+** function appends text to pStr that describes the subset of table
+** rows scanned by the strategy in the form of an SQL expression.
+**
+** For example, if the query:
+**
+**   SELECT * FROM t1 WHERE a=1 AND b>2;
+**
+** is run and there is an index on (a, b), then this function returns a
+** string similar to:
+**
+**   "a=? AND b>?"
+*/
+static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){
+  Index *pIndex = pLoop->u.btree.pIndex;
+  u16 nEq = pLoop->u.btree.nEq;
+  u16 nSkip = pLoop->nSkip;
+  int i, j;
+  Column *aCol = pTab->aCol;
+  i16 *aiColumn = pIndex->aiColumn;
+
+  if( nEq==0 && (pLoop->wsFlags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))==0 ) return;
+  sqlite3StrAccumAppend(pStr, " (", 2);
+  for(i=0; i<nEq; i++){
+    char *z = aiColumn[i] < 0 ? "rowid" : aCol[aiColumn[i]].zName;
+    if( i>=nSkip ){
+      explainAppendTerm(pStr, i, z, "=");
+    }else{
+      if( i ) sqlite3StrAccumAppend(pStr, " AND ", 5);
+      sqlite3XPrintf(pStr, 0, "ANY(%s)", z);
+    }
+  }
+
+  j = i;
+  if( pLoop->wsFlags&WHERE_BTM_LIMIT ){
+    char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName;
+    explainAppendTerm(pStr, i++, z, ">");
+  }
+  if( pLoop->wsFlags&WHERE_TOP_LIMIT ){
+    char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName;
+    explainAppendTerm(pStr, i, z, "<");
+  }
+  sqlite3StrAccumAppend(pStr, ")", 1);
+}
+
+/*
+** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN
+** command, or if either SQLITE_DEBUG or SQLITE_ENABLE_STMT_SCANSTATUS was
+** defined at compile-time. If it is not a no-op, a single OP_Explain opcode 
+** is added to the output to describe the table scan strategy in pLevel.
+**
+** If an OP_Explain opcode is added to the VM, its address is returned.
+** Otherwise, if no OP_Explain is coded, zero is returned.
+*/
+static int explainOneScan(
+  Parse *pParse,                  /* Parse context */
+  SrcList *pTabList,              /* Table list this loop refers to */
+  WhereLevel *pLevel,             /* Scan to write OP_Explain opcode for */
+  int iLevel,                     /* Value for "level" column of output */
+  int iFrom,                      /* Value for "from" column of output */
+  u16 wctrlFlags                  /* Flags passed to sqlite3WhereBegin() */
+){
+  int ret = 0;
+#if !defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_STMT_SCANSTATUS)
+  if( pParse->explain==2 )
+#endif
+  {
+    struct SrcList_item *pItem = &pTabList->a[pLevel->iFrom];
+    Vdbe *v = pParse->pVdbe;      /* VM being constructed */
+    sqlite3 *db = pParse->db;     /* Database handle */
+    int iId = pParse->iSelectId;  /* Select id (left-most output column) */
+    int isSearch;                 /* True for a SEARCH. False for SCAN. */
+    WhereLoop *pLoop;             /* The controlling WhereLoop object */
+    u32 flags;                    /* Flags that describe this loop */
+    char *zMsg;                   /* Text to add to EQP output */
+    StrAccum str;                 /* EQP output string */
+    char zBuf[100];               /* Initial space for EQP output string */
+
+    pLoop = pLevel->pWLoop;
+    flags = pLoop->wsFlags;
+    if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_ONETABLE_ONLY) ) return 0;
+
+    isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0
+            || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0))
+            || (wctrlFlags&(WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX));
+
+    sqlite3StrAccumInit(&str, zBuf, sizeof(zBuf), SQLITE_MAX_LENGTH);
+    str.db = db;
+    sqlite3StrAccumAppendAll(&str, isSearch ? "SEARCH" : "SCAN");
+    if( pItem->pSelect ){
+      sqlite3XPrintf(&str, 0, " SUBQUERY %d", pItem->iSelectId);
+    }else{
+      sqlite3XPrintf(&str, 0, " TABLE %s", pItem->zName);
+    }
+
+    if( pItem->zAlias ){
+      sqlite3XPrintf(&str, 0, " AS %s", pItem->zAlias);
+    }
+    if( (flags & (WHERE_IPK|WHERE_VIRTUALTABLE))==0 ){
+      const char *zFmt = 0;
+      Index *pIdx;
+
+      assert( pLoop->u.btree.pIndex!=0 );
+      pIdx = pLoop->u.btree.pIndex;
+      assert( !(flags&WHERE_AUTO_INDEX) || (flags&WHERE_IDX_ONLY) );
+      if( !HasRowid(pItem->pTab) && IsPrimaryKeyIndex(pIdx) ){
+        if( isSearch ){
+          zFmt = "PRIMARY KEY";
+        }
+      }else if( flags & WHERE_PARTIALIDX ){
+        zFmt = "AUTOMATIC PARTIAL COVERING INDEX";
+      }else if( flags & WHERE_AUTO_INDEX ){
+        zFmt = "AUTOMATIC COVERING INDEX";
+      }else if( flags & WHERE_IDX_ONLY ){
+        zFmt = "COVERING INDEX %s";
+      }else{
+        zFmt = "INDEX %s";
+      }
+      if( zFmt ){
+        sqlite3StrAccumAppend(&str, " USING ", 7);
+        sqlite3XPrintf(&str, 0, zFmt, pIdx->zName);
+        explainIndexRange(&str, pLoop, pItem->pTab);
+      }
+    }else if( (flags & WHERE_IPK)!=0 && (flags & WHERE_CONSTRAINT)!=0 ){
+      const char *zRange;
+      if( flags&(WHERE_COLUMN_EQ|WHERE_COLUMN_IN) ){
+        zRange = "(rowid=?)";
+      }else if( (flags&WHERE_BOTH_LIMIT)==WHERE_BOTH_LIMIT ){
+        zRange = "(rowid>? AND rowid<?)";
+      }else if( flags&WHERE_BTM_LIMIT ){
+        zRange = "(rowid>?)";
+      }else{
+        assert( flags&WHERE_TOP_LIMIT);
+        zRange = "(rowid<?)";
+      }
+      sqlite3StrAccumAppendAll(&str, " USING INTEGER PRIMARY KEY ");
+      sqlite3StrAccumAppendAll(&str, zRange);
+    }
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    else if( (flags & WHERE_VIRTUALTABLE)!=0 ){
+      sqlite3XPrintf(&str, 0, " VIRTUAL TABLE INDEX %d:%s",
+                  pLoop->u.vtab.idxNum, pLoop->u.vtab.idxStr);
+    }
+#endif
+#ifdef SQLITE_EXPLAIN_ESTIMATED_ROWS
+    if( pLoop->nOut>=10 ){
+      sqlite3XPrintf(&str, 0, " (~%llu rows)", sqlite3LogEstToInt(pLoop->nOut));
+    }else{
+      sqlite3StrAccumAppend(&str, " (~1 row)", 9);
+    }
+#endif
+    zMsg = sqlite3StrAccumFinish(&str);
+    ret = sqlite3VdbeAddOp4(v, OP_Explain, iId, iLevel, iFrom, zMsg,P4_DYNAMIC);
+  }
+  return ret;
+}
+#else
+# define explainOneScan(u,v,w,x,y,z) 0
+#endif /* SQLITE_OMIT_EXPLAIN */
+
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+/*
+** Configure the VM passed as the first argument with an
+** sqlite3_stmt_scanstatus() entry corresponding to the scan used to 
+** implement level pLvl. Argument pSrclist is a pointer to the FROM 
+** clause that the scan reads data from.
+**
+** If argument addrExplain is not 0, it must be the address of an 
+** OP_Explain instruction that describes the same loop.
+*/
+static void addScanStatus(
+  Vdbe *v,                        /* Vdbe to add scanstatus entry to */
+  SrcList *pSrclist,              /* FROM clause pLvl reads data from */
+  WhereLevel *pLvl,               /* Level to add scanstatus() entry for */
+  int addrExplain                 /* Address of OP_Explain (or 0) */
+){
+  const char *zObj = 0;
+  WhereLoop *pLoop = pLvl->pWLoop;
+  if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0  &&  pLoop->u.btree.pIndex!=0 ){
+    zObj = pLoop->u.btree.pIndex->zName;
+  }else{
+    zObj = pSrclist->a[pLvl->iFrom].zName;
+  }
+  sqlite3VdbeScanStatus(
+      v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj
+  );
+}
+#else
+# define addScanStatus(a, b, c, d) ((void)d)
+#endif
+
+
+
+/*
+** Generate code for the start of the iLevel-th loop in the WHERE clause
+** implementation described by pWInfo.
+*/
+static Bitmask codeOneLoopStart(
+  WhereInfo *pWInfo,   /* Complete information about the WHERE clause */
+  int iLevel,          /* Which level of pWInfo->a[] should be coded */
+  Bitmask notReady     /* Which tables are currently available */
+){
+  int j, k;            /* Loop counters */
+  int iCur;            /* The VDBE cursor for the table */
+  int addrNxt;         /* Where to jump to continue with the next IN case */
+  int omitTable;       /* True if we use the index only */
+  int bRev;            /* True if we need to scan in reverse order */
+  WhereLevel *pLevel;  /* The where level to be coded */
+  WhereLoop *pLoop;    /* The WhereLoop object being coded */
+  WhereClause *pWC;    /* Decomposition of the entire WHERE clause */
+  WhereTerm *pTerm;               /* A WHERE clause term */
+  Parse *pParse;                  /* Parsing context */
+  sqlite3 *db;                    /* Database connection */
+  Vdbe *v;                        /* The prepared stmt under constructions */
+  struct SrcList_item *pTabItem;  /* FROM clause term being coded */
+  int addrBrk;                    /* Jump here to break out of the loop */
+  int addrCont;                   /* Jump here to continue with next cycle */
+  int iRowidReg = 0;        /* Rowid is stored in this register, if not zero */
+  int iReleaseReg = 0;      /* Temp register to free before returning */
+
+  pParse = pWInfo->pParse;
+  v = pParse->pVdbe;
+  pWC = &pWInfo->sWC;
+  db = pParse->db;
+  pLevel = &pWInfo->a[iLevel];
+  pLoop = pLevel->pWLoop;
+  pTabItem = &pWInfo->pTabList->a[pLevel->iFrom];
+  iCur = pTabItem->iCursor;
+  pLevel->notReady = notReady & ~getMask(&pWInfo->sMaskSet, iCur);
+  bRev = (pWInfo->revMask>>iLevel)&1;
+  omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY)!=0 
+           && (pWInfo->wctrlFlags & WHERE_FORCE_TABLE)==0;
+  VdbeModuleComment((v, "Begin WHERE-loop%d: %s",iLevel,pTabItem->pTab->zName));
+
+  /* Create labels for the "break" and "continue" instructions
+  ** for the current loop.  Jump to addrBrk to break out of a loop.
+  ** Jump to cont to go immediately to the next iteration of the
+  ** loop.
+  **
+  ** When there is an IN operator, we also have a "addrNxt" label that
+  ** means to continue with the next IN value combination.  When
+  ** there are no IN operators in the constraints, the "addrNxt" label
+  ** is the same as "addrBrk".
+  */
+  addrBrk = pLevel->addrBrk = pLevel->addrNxt = sqlite3VdbeMakeLabel(v);
+  addrCont = pLevel->addrCont = sqlite3VdbeMakeLabel(v);
+
+  /* If this is the right table of a LEFT OUTER JOIN, allocate and
+  ** initialize a memory cell that records if this table matches any
+  ** row of the left table of the join.
+  */
+  if( pLevel->iFrom>0 && (pTabItem[0].jointype & JT_LEFT)!=0 ){
+    pLevel->iLeftJoin = ++pParse->nMem;
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, pLevel->iLeftJoin);
+    VdbeComment((v, "init LEFT JOIN no-match flag"));
+  }
+
+  /* Special case of a FROM clause subquery implemented as a co-routine */
+  if( pTabItem->viaCoroutine ){
+    int regYield = pTabItem->regReturn;
+    sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pTabItem->addrFillSub);
+    pLevel->p2 =  sqlite3VdbeAddOp2(v, OP_Yield, regYield, addrBrk);
+    VdbeCoverage(v);
+    VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName));
+    pLevel->op = OP_Goto;
+  }else
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if(  (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ){
+    /* Case 1:  The table is a virtual-table.  Use the VFilter and VNext
+    **          to access the data.
+    */
+    int iReg;   /* P3 Value for OP_VFilter */
+    int addrNotFound;
+    int nConstraint = pLoop->nLTerm;
+
+    sqlite3ExprCachePush(pParse);
+    iReg = sqlite3GetTempRange(pParse, nConstraint+2);
+    addrNotFound = pLevel->addrBrk;
+    for(j=0; j<nConstraint; j++){
+      int iTarget = iReg+j+2;
+      pTerm = pLoop->aLTerm[j];
+      if( pTerm==0 ) continue;
+      if( pTerm->eOperator & WO_IN ){
+        codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, iTarget);
+        addrNotFound = pLevel->addrNxt;
+      }else{
+        sqlite3ExprCode(pParse, pTerm->pExpr->pRight, iTarget);
+      }
+    }
+    sqlite3VdbeAddOp2(v, OP_Integer, pLoop->u.vtab.idxNum, iReg);
+    sqlite3VdbeAddOp2(v, OP_Integer, nConstraint, iReg+1);
+    sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrNotFound, iReg,
+                      pLoop->u.vtab.idxStr,
+                      pLoop->u.vtab.needFree ? P4_MPRINTF : P4_STATIC);
+    VdbeCoverage(v);
+    pLoop->u.vtab.needFree = 0;
+    for(j=0; j<nConstraint && j<16; j++){
+      if( (pLoop->u.vtab.omitMask>>j)&1 ){
+        disableTerm(pLevel, pLoop->aLTerm[j]);
+      }
+    }
+    pLevel->op = OP_VNext;
+    pLevel->p1 = iCur;
+    pLevel->p2 = sqlite3VdbeCurrentAddr(v);
+    sqlite3ReleaseTempRange(pParse, iReg, nConstraint+2);
+    sqlite3ExprCachePop(pParse);
+  }else
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+  if( (pLoop->wsFlags & WHERE_IPK)!=0
+   && (pLoop->wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_EQ))!=0
+  ){
+    /* Case 2:  We can directly reference a single row using an
+    **          equality comparison against the ROWID field.  Or
+    **          we reference multiple rows using a "rowid IN (...)"
+    **          construct.
+    */
+    assert( pLoop->u.btree.nEq==1 );
+    pTerm = pLoop->aLTerm[0];
+    assert( pTerm!=0 );
+    assert( pTerm->pExpr!=0 );
+    assert( omitTable==0 );
+    testcase( pTerm->wtFlags & TERM_VIRTUAL );
+    iReleaseReg = ++pParse->nMem;
+    iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg);
+    if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg);
+    addrNxt = pLevel->addrNxt;
+    sqlite3VdbeAddOp2(v, OP_MustBeInt, iRowidReg, addrNxt); VdbeCoverage(v);
+    sqlite3VdbeAddOp3(v, OP_NotExists, iCur, addrNxt, iRowidReg);
+    VdbeCoverage(v);
+    sqlite3ExprCacheAffinityChange(pParse, iRowidReg, 1);
+    sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg);
+    VdbeComment((v, "pk"));
+    pLevel->op = OP_Noop;
+  }else if( (pLoop->wsFlags & WHERE_IPK)!=0
+         && (pLoop->wsFlags & WHERE_COLUMN_RANGE)!=0
+  ){
+    /* Case 3:  We have an inequality comparison against the ROWID field.
+    */
+    int testOp = OP_Noop;
+    int start;
+    int memEndValue = 0;
+    WhereTerm *pStart, *pEnd;
+
+    assert( omitTable==0 );
+    j = 0;
+    pStart = pEnd = 0;
+    if( pLoop->wsFlags & WHERE_BTM_LIMIT ) pStart = pLoop->aLTerm[j++];
+    if( pLoop->wsFlags & WHERE_TOP_LIMIT ) pEnd = pLoop->aLTerm[j++];
+    assert( pStart!=0 || pEnd!=0 );
+    if( bRev ){
+      pTerm = pStart;
+      pStart = pEnd;
+      pEnd = pTerm;
+    }
+    if( pStart ){
+      Expr *pX;             /* The expression that defines the start bound */
+      int r1, rTemp;        /* Registers for holding the start boundary */
+
+      /* The following constant maps TK_xx codes into corresponding 
+      ** seek opcodes.  It depends on a particular ordering of TK_xx
+      */
+      const u8 aMoveOp[] = {
+           /* TK_GT */  OP_SeekGT,
+           /* TK_LE */  OP_SeekLE,
+           /* TK_LT */  OP_SeekLT,
+           /* TK_GE */  OP_SeekGE
+      };
+      assert( TK_LE==TK_GT+1 );      /* Make sure the ordering.. */
+      assert( TK_LT==TK_GT+2 );      /*  ... of the TK_xx values... */
+      assert( TK_GE==TK_GT+3 );      /*  ... is correcct. */
+
+      assert( (pStart->wtFlags & TERM_VNULL)==0 );
+      testcase( pStart->wtFlags & TERM_VIRTUAL );
+      pX = pStart->pExpr;
+      assert( pX!=0 );
+      testcase( pStart->leftCursor!=iCur ); /* transitive constraints */
+      r1 = sqlite3ExprCodeTemp(pParse, pX->pRight, &rTemp);
+      sqlite3VdbeAddOp3(v, aMoveOp[pX->op-TK_GT], iCur, addrBrk, r1);
+      VdbeComment((v, "pk"));
+      VdbeCoverageIf(v, pX->op==TK_GT);
+      VdbeCoverageIf(v, pX->op==TK_LE);
+      VdbeCoverageIf(v, pX->op==TK_LT);
+      VdbeCoverageIf(v, pX->op==TK_GE);
+      sqlite3ExprCacheAffinityChange(pParse, r1, 1);
+      sqlite3ReleaseTempReg(pParse, rTemp);
+      disableTerm(pLevel, pStart);
+    }else{
+      sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iCur, addrBrk);
+      VdbeCoverageIf(v, bRev==0);
+      VdbeCoverageIf(v, bRev!=0);
+    }
+    if( pEnd ){
+      Expr *pX;
+      pX = pEnd->pExpr;
+      assert( pX!=0 );
+      assert( (pEnd->wtFlags & TERM_VNULL)==0 );
+      testcase( pEnd->leftCursor!=iCur ); /* Transitive constraints */
+      testcase( pEnd->wtFlags & TERM_VIRTUAL );
+      memEndValue = ++pParse->nMem;
+      sqlite3ExprCode(pParse, pX->pRight, memEndValue);
+      if( pX->op==TK_LT || pX->op==TK_GT ){
+        testOp = bRev ? OP_Le : OP_Ge;
+      }else{
+        testOp = bRev ? OP_Lt : OP_Gt;
+      }
+      disableTerm(pLevel, pEnd);
+    }
+    start = sqlite3VdbeCurrentAddr(v);
+    pLevel->op = bRev ? OP_Prev : OP_Next;
+    pLevel->p1 = iCur;
+    pLevel->p2 = start;
+    assert( pLevel->p5==0 );
+    if( testOp!=OP_Noop ){
+      iRowidReg = ++pParse->nMem;
+      sqlite3VdbeAddOp2(v, OP_Rowid, iCur, iRowidReg);
+      sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg);
+      sqlite3VdbeAddOp3(v, testOp, memEndValue, addrBrk, iRowidReg);
+      VdbeCoverageIf(v, testOp==OP_Le);
+      VdbeCoverageIf(v, testOp==OP_Lt);
+      VdbeCoverageIf(v, testOp==OP_Ge);
+      VdbeCoverageIf(v, testOp==OP_Gt);
+      sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC | SQLITE_JUMPIFNULL);
+    }
+  }else if( pLoop->wsFlags & WHERE_INDEXED ){
+    /* Case 4: A scan using an index.
+    **
+    **         The WHERE clause may contain zero or more equality 
+    **         terms ("==" or "IN" operators) that refer to the N
+    **         left-most columns of the index. It may also contain
+    **         inequality constraints (>, <, >= or <=) on the indexed
+    **         column that immediately follows the N equalities. Only 
+    **         the right-most column can be an inequality - the rest must
+    **         use the "==" and "IN" operators. For example, if the 
+    **         index is on (x,y,z), then the following clauses are all 
+    **         optimized:
+    **
+    **            x=5
+    **            x=5 AND y=10
+    **            x=5 AND y<10
+    **            x=5 AND y>5 AND y<10
+    **            x=5 AND y=5 AND z<=10
+    **
+    **         The z<10 term of the following cannot be used, only
+    **         the x=5 term:
+    **
+    **            x=5 AND z<10
+    **
+    **         N may be zero if there are inequality constraints.
+    **         If there are no inequality constraints, then N is at
+    **         least one.
+    **
+    **         This case is also used when there are no WHERE clause
+    **         constraints but an index is selected anyway, in order
+    **         to force the output order to conform to an ORDER BY.
+    */  
+    static const u8 aStartOp[] = {
+      0,
+      0,
+      OP_Rewind,           /* 2: (!start_constraints && startEq &&  !bRev) */
+      OP_Last,             /* 3: (!start_constraints && startEq &&   bRev) */
+      OP_SeekGT,           /* 4: (start_constraints  && !startEq && !bRev) */
+      OP_SeekLT,           /* 5: (start_constraints  && !startEq &&  bRev) */
+      OP_SeekGE,           /* 6: (start_constraints  &&  startEq && !bRev) */
+      OP_SeekLE            /* 7: (start_constraints  &&  startEq &&  bRev) */
+    };
+    static const u8 aEndOp[] = {
+      OP_IdxGE,            /* 0: (end_constraints && !bRev && !endEq) */
+      OP_IdxGT,            /* 1: (end_constraints && !bRev &&  endEq) */
+      OP_IdxLE,            /* 2: (end_constraints &&  bRev && !endEq) */
+      OP_IdxLT,            /* 3: (end_constraints &&  bRev &&  endEq) */
+    };
+    u16 nEq = pLoop->u.btree.nEq;     /* Number of == or IN terms */
+    int regBase;                 /* Base register holding constraint values */
+    WhereTerm *pRangeStart = 0;  /* Inequality constraint at range start */
+    WhereTerm *pRangeEnd = 0;    /* Inequality constraint at range end */
+    int startEq;                 /* True if range start uses ==, >= or <= */
+    int endEq;                   /* True if range end uses ==, >= or <= */
+    int start_constraints;       /* Start of range is constrained */
+    int nConstraint;             /* Number of constraint terms */
+    Index *pIdx;                 /* The index we will be using */
+    int iIdxCur;                 /* The VDBE cursor for the index */
+    int nExtraReg = 0;           /* Number of extra registers needed */
+    int op;                      /* Instruction opcode */
+    char *zStartAff;             /* Affinity for start of range constraint */
+    char cEndAff = 0;            /* Affinity for end of range constraint */
+    u8 bSeekPastNull = 0;        /* True to seek past initial nulls */
+    u8 bStopAtNull = 0;          /* Add condition to terminate at NULLs */
+
+    pIdx = pLoop->u.btree.pIndex;
+    iIdxCur = pLevel->iIdxCur;
+    assert( nEq>=pLoop->nSkip );
+
+    /* If this loop satisfies a sort order (pOrderBy) request that 
+    ** was passed to this function to implement a "SELECT min(x) ..." 
+    ** query, then the caller will only allow the loop to run for
+    ** a single iteration. This means that the first row returned
+    ** should not have a NULL value stored in 'x'. If column 'x' is
+    ** the first one after the nEq equality constraints in the index,
+    ** this requires some special handling.
+    */
+    assert( pWInfo->pOrderBy==0
+         || pWInfo->pOrderBy->nExpr==1
+         || (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 );
+    if( (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)!=0
+     && pWInfo->nOBSat>0
+     && (pIdx->nKeyCol>nEq)
+    ){
+      assert( pLoop->nSkip==0 );
+      bSeekPastNull = 1;
+      nExtraReg = 1;
+    }
+
+    /* Find any inequality constraint terms for the start and end 
+    ** of the range. 
+    */
+    j = nEq;
+    if( pLoop->wsFlags & WHERE_BTM_LIMIT ){
+      pRangeStart = pLoop->aLTerm[j++];
+      nExtraReg = 1;
+    }
+    if( pLoop->wsFlags & WHERE_TOP_LIMIT ){
+      pRangeEnd = pLoop->aLTerm[j++];
+      nExtraReg = 1;
+      if( pRangeStart==0
+       && (j = pIdx->aiColumn[nEq])>=0 
+       && pIdx->pTable->aCol[j].notNull==0
+      ){
+        bSeekPastNull = 1;
+      }
+    }
+    assert( pRangeEnd==0 || (pRangeEnd->wtFlags & TERM_VNULL)==0 );
+
+    /* Generate code to evaluate all constraint terms using == or IN
+    ** and store the values of those terms in an array of registers
+    ** starting at regBase.
+    */
+    regBase = codeAllEqualityTerms(pParse,pLevel,bRev,nExtraReg,&zStartAff);
+    assert( zStartAff==0 || sqlite3Strlen30(zStartAff)>=nEq );
+    if( zStartAff ) cEndAff = zStartAff[nEq];
+    addrNxt = pLevel->addrNxt;
+
+    /* If we are doing a reverse order scan on an ascending index, or
+    ** a forward order scan on a descending index, interchange the 
+    ** start and end terms (pRangeStart and pRangeEnd).
+    */
+    if( (nEq<pIdx->nKeyCol && bRev==(pIdx->aSortOrder[nEq]==SQLITE_SO_ASC))
+     || (bRev && pIdx->nKeyCol==nEq)
+    ){
+      SWAP(WhereTerm *, pRangeEnd, pRangeStart);
+      SWAP(u8, bSeekPastNull, bStopAtNull);
+    }
+
+    testcase( pRangeStart && (pRangeStart->eOperator & WO_LE)!=0 );
+    testcase( pRangeStart && (pRangeStart->eOperator & WO_GE)!=0 );
+    testcase( pRangeEnd && (pRangeEnd->eOperator & WO_LE)!=0 );
+    testcase( pRangeEnd && (pRangeEnd->eOperator & WO_GE)!=0 );
+    startEq = !pRangeStart || pRangeStart->eOperator & (WO_LE|WO_GE);
+    endEq =   !pRangeEnd || pRangeEnd->eOperator & (WO_LE|WO_GE);
+    start_constraints = pRangeStart || nEq>0;
+
+    /* Seek the index cursor to the start of the range. */
+    nConstraint = nEq;
+    if( pRangeStart ){
+      Expr *pRight = pRangeStart->pExpr->pRight;
+      sqlite3ExprCode(pParse, pRight, regBase+nEq);
+      if( (pRangeStart->wtFlags & TERM_VNULL)==0
+       && sqlite3ExprCanBeNull(pRight)
+      ){
+        sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt);
+        VdbeCoverage(v);
+      }
+      if( zStartAff ){
+        if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_NONE){
+          /* Since the comparison is to be performed with no conversions
+          ** applied to the operands, set the affinity to apply to pRight to 
+          ** SQLITE_AFF_NONE.  */
+          zStartAff[nEq] = SQLITE_AFF_NONE;
+        }
+        if( sqlite3ExprNeedsNoAffinityChange(pRight, zStartAff[nEq]) ){
+          zStartAff[nEq] = SQLITE_AFF_NONE;
+        }
+      }  
+      nConstraint++;
+      testcase( pRangeStart->wtFlags & TERM_VIRTUAL );
+    }else if( bSeekPastNull ){
+      sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq);
+      nConstraint++;
+      startEq = 0;
+      start_constraints = 1;
+    }
+    codeApplyAffinity(pParse, regBase, nConstraint - bSeekPastNull, zStartAff);
+    op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev];
+    assert( op!=0 );
+    sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint);
+    VdbeCoverage(v);
+    VdbeCoverageIf(v, op==OP_Rewind);  testcase( op==OP_Rewind );
+    VdbeCoverageIf(v, op==OP_Last);    testcase( op==OP_Last );
+    VdbeCoverageIf(v, op==OP_SeekGT);  testcase( op==OP_SeekGT );
+    VdbeCoverageIf(v, op==OP_SeekGE);  testcase( op==OP_SeekGE );
+    VdbeCoverageIf(v, op==OP_SeekLE);  testcase( op==OP_SeekLE );
+    VdbeCoverageIf(v, op==OP_SeekLT);  testcase( op==OP_SeekLT );
+
+    /* Load the value for the inequality constraint at the end of the
+    ** range (if any).
+    */
+    nConstraint = nEq;
+    if( pRangeEnd ){
+      Expr *pRight = pRangeEnd->pExpr->pRight;
+      sqlite3ExprCacheRemove(pParse, regBase+nEq, 1);
+      sqlite3ExprCode(pParse, pRight, regBase+nEq);
+      if( (pRangeEnd->wtFlags & TERM_VNULL)==0
+       && sqlite3ExprCanBeNull(pRight)
+      ){
+        sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt);
+        VdbeCoverage(v);
+      }
+      if( sqlite3CompareAffinity(pRight, cEndAff)!=SQLITE_AFF_NONE
+       && !sqlite3ExprNeedsNoAffinityChange(pRight, cEndAff)
+      ){
+        codeApplyAffinity(pParse, regBase+nEq, 1, &cEndAff);
+      }
+      nConstraint++;
+      testcase( pRangeEnd->wtFlags & TERM_VIRTUAL );
+    }else if( bStopAtNull ){
+      sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq);
+      endEq = 0;
+      nConstraint++;
+    }
+    sqlite3DbFree(db, zStartAff);
+
+    /* Top of the loop body */
+    pLevel->p2 = sqlite3VdbeCurrentAddr(v);
+
+    /* Check if the index cursor is past the end of the range. */
+    if( nConstraint ){
+      op = aEndOp[bRev*2 + endEq];
+      sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint);
+      testcase( op==OP_IdxGT );  VdbeCoverageIf(v, op==OP_IdxGT );
+      testcase( op==OP_IdxGE );  VdbeCoverageIf(v, op==OP_IdxGE );
+      testcase( op==OP_IdxLT );  VdbeCoverageIf(v, op==OP_IdxLT );
+      testcase( op==OP_IdxLE );  VdbeCoverageIf(v, op==OP_IdxLE );
+    }
+
+    /* Seek the table cursor, if required */
+    disableTerm(pLevel, pRangeStart);
+    disableTerm(pLevel, pRangeEnd);
+    if( omitTable ){
+      /* pIdx is a covering index.  No need to access the main table. */
+    }else if( HasRowid(pIdx->pTable) ){
+      iRowidReg = ++pParse->nMem;
+      sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, iRowidReg);
+      sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg);
+      sqlite3VdbeAddOp2(v, OP_Seek, iCur, iRowidReg);  /* Deferred seek */
+    }else if( iCur!=iIdxCur ){
+      Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable);
+      iRowidReg = sqlite3GetTempRange(pParse, pPk->nKeyCol);
+      for(j=0; j<pPk->nKeyCol; j++){
+        k = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[j]);
+        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, iRowidReg+j);
+      }
+      sqlite3VdbeAddOp4Int(v, OP_NotFound, iCur, addrCont,
+                           iRowidReg, pPk->nKeyCol); VdbeCoverage(v);
+    }
+
+    /* Record the instruction used to terminate the loop. Disable 
+    ** WHERE clause terms made redundant by the index range scan.
+    */
+    if( pLoop->wsFlags & WHERE_ONEROW ){
+      pLevel->op = OP_Noop;
+    }else if( bRev ){
+      pLevel->op = OP_Prev;
+    }else{
+      pLevel->op = OP_Next;
+    }
+    pLevel->p1 = iIdxCur;
+    pLevel->p3 = (pLoop->wsFlags&WHERE_UNQ_WANTED)!=0 ? 1:0;
+    if( (pLoop->wsFlags & WHERE_CONSTRAINT)==0 ){
+      pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP;
+    }else{
+      assert( pLevel->p5==0 );
+    }
+  }else
+
+#ifndef SQLITE_OMIT_OR_OPTIMIZATION
+  if( pLoop->wsFlags & WHERE_MULTI_OR ){
+    /* Case 5:  Two or more separately indexed terms connected by OR
+    **
+    ** Example:
+    **
+    **   CREATE TABLE t1(a,b,c,d);
+    **   CREATE INDEX i1 ON t1(a);
+    **   CREATE INDEX i2 ON t1(b);
+    **   CREATE INDEX i3 ON t1(c);
+    **
+    **   SELECT * FROM t1 WHERE a=5 OR b=7 OR (c=11 AND d=13)
+    **
+    ** In the example, there are three indexed terms connected by OR.
+    ** The top of the loop looks like this:
+    **
+    **          Null       1                # Zero the rowset in reg 1
+    **
+    ** Then, for each indexed term, the following. The arguments to
+    ** RowSetTest are such that the rowid of the current row is inserted
+    ** into the RowSet. If it is already present, control skips the
+    ** Gosub opcode and jumps straight to the code generated by WhereEnd().
+    **
+    **        sqlite3WhereBegin(<term>)
+    **          RowSetTest                  # Insert rowid into rowset
+    **          Gosub      2 A
+    **        sqlite3WhereEnd()
+    **
+    ** Following the above, code to terminate the loop. Label A, the target
+    ** of the Gosub above, jumps to the instruction right after the Goto.
+    **
+    **          Null       1                # Zero the rowset in reg 1
+    **          Goto       B                # The loop is finished.
+    **
+    **       A: <loop body>                 # Return data, whatever.
+    **
+    **          Return     2                # Jump back to the Gosub
+    **
+    **       B: <after the loop>
+    **
+    ** Added 2014-05-26: If the table is a WITHOUT ROWID table, then
+    ** use an ephemeral index instead of a RowSet to record the primary
+    ** keys of the rows we have already seen.
+    **
+    */
+    WhereClause *pOrWc;    /* The OR-clause broken out into subterms */
+    SrcList *pOrTab;       /* Shortened table list or OR-clause generation */
+    Index *pCov = 0;             /* Potential covering index (or NULL) */
+    int iCovCur = pParse->nTab++;  /* Cursor used for index scans (if any) */
+
+    int regReturn = ++pParse->nMem;           /* Register used with OP_Gosub */
+    int regRowset = 0;                        /* Register for RowSet object */
+    int regRowid = 0;                         /* Register holding rowid */
+    int iLoopBody = sqlite3VdbeMakeLabel(v);  /* Start of loop body */
+    int iRetInit;                             /* Address of regReturn init */
+    int untestedTerms = 0;             /* Some terms not completely tested */
+    int ii;                            /* Loop counter */
+    u16 wctrlFlags;                    /* Flags for sub-WHERE clause */
+    Expr *pAndExpr = 0;                /* An ".. AND (...)" expression */
+    Table *pTab = pTabItem->pTab;
+   
+    pTerm = pLoop->aLTerm[0];
+    assert( pTerm!=0 );
+    assert( pTerm->eOperator & WO_OR );
+    assert( (pTerm->wtFlags & TERM_ORINFO)!=0 );
+    pOrWc = &pTerm->u.pOrInfo->wc;
+    pLevel->op = OP_Return;
+    pLevel->p1 = regReturn;
+
+    /* Set up a new SrcList in pOrTab containing the table being scanned
+    ** by this loop in the a[0] slot and all notReady tables in a[1..] slots.
+    ** This becomes the SrcList in the recursive call to sqlite3WhereBegin().
+    */
+    if( pWInfo->nLevel>1 ){
+      int nNotReady;                 /* The number of notReady tables */
+      struct SrcList_item *origSrc;     /* Original list of tables */
+      nNotReady = pWInfo->nLevel - iLevel - 1;
+      pOrTab = sqlite3StackAllocRaw(db,
+                            sizeof(*pOrTab)+ nNotReady*sizeof(pOrTab->a[0]));
+      if( pOrTab==0 ) return notReady;
+      pOrTab->nAlloc = (u8)(nNotReady + 1);
+      pOrTab->nSrc = pOrTab->nAlloc;
+      memcpy(pOrTab->a, pTabItem, sizeof(*pTabItem));
+      origSrc = pWInfo->pTabList->a;
+      for(k=1; k<=nNotReady; k++){
+        memcpy(&pOrTab->a[k], &origSrc[pLevel[k].iFrom], sizeof(pOrTab->a[k]));
+      }
+    }else{
+      pOrTab = pWInfo->pTabList;
+    }
+
+    /* Initialize the rowset register to contain NULL. An SQL NULL is 
+    ** equivalent to an empty rowset.  Or, create an ephemeral index
+    ** capable of holding primary keys in the case of a WITHOUT ROWID.
+    **
+    ** Also initialize regReturn to contain the address of the instruction 
+    ** immediately following the OP_Return at the bottom of the loop. This
+    ** is required in a few obscure LEFT JOIN cases where control jumps
+    ** over the top of the loop into the body of it. In this case the 
+    ** correct response for the end-of-loop code (the OP_Return) is to 
+    ** fall through to the next instruction, just as an OP_Next does if
+    ** called on an uninitialized cursor.
+    */
+    if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){
+      if( HasRowid(pTab) ){
+        regRowset = ++pParse->nMem;
+        sqlite3VdbeAddOp2(v, OP_Null, 0, regRowset);
+      }else{
+        Index *pPk = sqlite3PrimaryKeyIndex(pTab);
+        regRowset = pParse->nTab++;
+        sqlite3VdbeAddOp2(v, OP_OpenEphemeral, regRowset, pPk->nKeyCol);
+        sqlite3VdbeSetP4KeyInfo(pParse, pPk);
+      }
+      regRowid = ++pParse->nMem;
+    }
+    iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn);
+
+    /* If the original WHERE clause is z of the form:  (x1 OR x2 OR ...) AND y
+    ** Then for every term xN, evaluate as the subexpression: xN AND z
+    ** That way, terms in y that are factored into the disjunction will
+    ** be picked up by the recursive calls to sqlite3WhereBegin() below.
+    **
+    ** Actually, each subexpression is converted to "xN AND w" where w is
+    ** the "interesting" terms of z - terms that did not originate in the
+    ** ON or USING clause of a LEFT JOIN, and terms that are usable as 
+    ** indices.
+    **
+    ** This optimization also only applies if the (x1 OR x2 OR ...) term
+    ** is not contained in the ON clause of a LEFT JOIN.
+    ** See ticket http://www.sqlite.org/src/info/f2369304e4
+    */
+    if( pWC->nTerm>1 ){
+      int iTerm;
+      for(iTerm=0; iTerm<pWC->nTerm; iTerm++){
+        Expr *pExpr = pWC->a[iTerm].pExpr;
+        if( &pWC->a[iTerm] == pTerm ) continue;
+        if( ExprHasProperty(pExpr, EP_FromJoin) ) continue;
+        if( (pWC->a[iTerm].wtFlags & TERM_VIRTUAL)!=0 ) continue;
+        if( (pWC->a[iTerm].eOperator & WO_ALL)==0 ) continue;
+        testcase( pWC->a[iTerm].wtFlags & TERM_ORINFO );
+        pExpr = sqlite3ExprDup(db, pExpr, 0);
+        pAndExpr = sqlite3ExprAnd(db, pAndExpr, pExpr);
+      }
+      if( pAndExpr ){
+        pAndExpr = sqlite3PExpr(pParse, TK_AND, 0, pAndExpr, 0);
+      }
+    }
+
+    /* Run a separate WHERE clause for each term of the OR clause.  After
+    ** eliminating duplicates from other WHERE clauses, the action for each
+    ** sub-WHERE clause is to to invoke the main loop body as a subroutine.
+    */
+    wctrlFlags =  WHERE_OMIT_OPEN_CLOSE
+                | WHERE_FORCE_TABLE
+                | WHERE_ONETABLE_ONLY;
+    for(ii=0; ii<pOrWc->nTerm; ii++){
+      WhereTerm *pOrTerm = &pOrWc->a[ii];
+      if( pOrTerm->leftCursor==iCur || (pOrTerm->eOperator & WO_AND)!=0 ){
+        WhereInfo *pSubWInfo;           /* Info for single OR-term scan */
+        Expr *pOrExpr = pOrTerm->pExpr; /* Current OR clause term */
+        int j1 = 0;                     /* Address of jump operation */
+        if( pAndExpr && !ExprHasProperty(pOrExpr, EP_FromJoin) ){
+          pAndExpr->pLeft = pOrExpr;
+          pOrExpr = pAndExpr;
+        }
+        /* Loop through table entries that match term pOrTerm. */
+        WHERETRACE(0xffff, ("Subplan for OR-clause:\n"));
+        pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrExpr, 0, 0,
+                                      wctrlFlags, iCovCur);
+        assert( pSubWInfo || pParse->nErr || db->mallocFailed );
+        if( pSubWInfo ){
+          WhereLoop *pSubLoop;
+          int addrExplain = explainOneScan(
+              pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0
+          );
+          addScanStatus(v, pOrTab, &pSubWInfo->a[0], addrExplain);
+
+          /* This is the sub-WHERE clause body.  First skip over
+          ** duplicate rows from prior sub-WHERE clauses, and record the
+          ** rowid (or PRIMARY KEY) for the current row so that the same
+          ** row will be skipped in subsequent sub-WHERE clauses.
+          */
+          if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){
+            int r;
+            int iSet = ((ii==pOrWc->nTerm-1)?-1:ii);
+            if( HasRowid(pTab) ){
+              r = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, regRowid, 0);
+              j1 = sqlite3VdbeAddOp4Int(v, OP_RowSetTest, regRowset, 0, r,iSet);
+              VdbeCoverage(v);
+            }else{
+              Index *pPk = sqlite3PrimaryKeyIndex(pTab);
+              int nPk = pPk->nKeyCol;
+              int iPk;
+
+              /* Read the PK into an array of temp registers. */
+              r = sqlite3GetTempRange(pParse, nPk);
+              for(iPk=0; iPk<nPk; iPk++){
+                int iCol = pPk->aiColumn[iPk];
+                sqlite3ExprCodeGetColumn(pParse, pTab, iCol, iCur, r+iPk, 0);
+              }
+
+              /* Check if the temp table already contains this key. If so,
+              ** the row has already been included in the result set and
+              ** can be ignored (by jumping past the Gosub below). Otherwise,
+              ** insert the key into the temp table and proceed with processing
+              ** the row.
+              **
+              ** Use some of the same optimizations as OP_RowSetTest: If iSet
+              ** is zero, assume that the key cannot already be present in
+              ** the temp table. And if iSet is -1, assume that there is no 
+              ** need to insert the key into the temp table, as it will never 
+              ** be tested for.  */ 
+              if( iSet ){
+                j1 = sqlite3VdbeAddOp4Int(v, OP_Found, regRowset, 0, r, nPk);
+                VdbeCoverage(v);
+              }
+              if( iSet>=0 ){
+                sqlite3VdbeAddOp3(v, OP_MakeRecord, r, nPk, regRowid);
+                sqlite3VdbeAddOp3(v, OP_IdxInsert, regRowset, regRowid, 0);
+                if( iSet ) sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
+              }
+
+              /* Release the array of temp registers */
+              sqlite3ReleaseTempRange(pParse, r, nPk);
+            }
+          }
+
+          /* Invoke the main loop body as a subroutine */
+          sqlite3VdbeAddOp2(v, OP_Gosub, regReturn, iLoopBody);
+
+          /* Jump here (skipping the main loop body subroutine) if the
+          ** current sub-WHERE row is a duplicate from prior sub-WHEREs. */
+          if( j1 ) sqlite3VdbeJumpHere(v, j1);
+
+          /* The pSubWInfo->untestedTerms flag means that this OR term
+          ** contained one or more AND term from a notReady table.  The
+          ** terms from the notReady table could not be tested and will
+          ** need to be tested later.
+          */
+          if( pSubWInfo->untestedTerms ) untestedTerms = 1;
+
+          /* If all of the OR-connected terms are optimized using the same
+          ** index, and the index is opened using the same cursor number
+          ** by each call to sqlite3WhereBegin() made by this loop, it may
+          ** be possible to use that index as a covering index.
+          **
+          ** If the call to sqlite3WhereBegin() above resulted in a scan that
+          ** uses an index, and this is either the first OR-connected term
+          ** processed or the index is the same as that used by all previous
+          ** terms, set pCov to the candidate covering index. Otherwise, set 
+          ** pCov to NULL to indicate that no candidate covering index will 
+          ** be available.
+          */
+          pSubLoop = pSubWInfo->a[0].pWLoop;
+          assert( (pSubLoop->wsFlags & WHERE_AUTO_INDEX)==0 );
+          if( (pSubLoop->wsFlags & WHERE_INDEXED)!=0
+           && (ii==0 || pSubLoop->u.btree.pIndex==pCov)
+           && (HasRowid(pTab) || !IsPrimaryKeyIndex(pSubLoop->u.btree.pIndex))
+          ){
+            assert( pSubWInfo->a[0].iIdxCur==iCovCur );
+            pCov = pSubLoop->u.btree.pIndex;
+            wctrlFlags |= WHERE_REOPEN_IDX;
+          }else{
+            pCov = 0;
+          }
+
+          /* Finish the loop through table entries that match term pOrTerm. */
+          sqlite3WhereEnd(pSubWInfo);
+        }
+      }
+    }
+    pLevel->u.pCovidx = pCov;
+    if( pCov ) pLevel->iIdxCur = iCovCur;
+    if( pAndExpr ){
+      pAndExpr->pLeft = 0;
+      sqlite3ExprDelete(db, pAndExpr);
+    }
+    sqlite3VdbeChangeP1(v, iRetInit, sqlite3VdbeCurrentAddr(v));
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, pLevel->addrBrk);
+    sqlite3VdbeResolveLabel(v, iLoopBody);
+
+    if( pWInfo->nLevel>1 ) sqlite3StackFree(db, pOrTab);
+    if( !untestedTerms ) disableTerm(pLevel, pTerm);
+  }else
+#endif /* SQLITE_OMIT_OR_OPTIMIZATION */
+
+  {
+    /* Case 6:  There is no usable index.  We must do a complete
+    **          scan of the entire table.
+    */
+    static const u8 aStep[] = { OP_Next, OP_Prev };
+    static const u8 aStart[] = { OP_Rewind, OP_Last };
+    assert( bRev==0 || bRev==1 );
+    if( pTabItem->isRecursive ){
+      /* Tables marked isRecursive have only a single row that is stored in
+      ** a pseudo-cursor.  No need to Rewind or Next such cursors. */
+      pLevel->op = OP_Noop;
+    }else{
+      pLevel->op = aStep[bRev];
+      pLevel->p1 = iCur;
+      pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrBrk);
+      VdbeCoverageIf(v, bRev==0);
+      VdbeCoverageIf(v, bRev!=0);
+      pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP;
+    }
+  }
+
+#ifdef SQLITE_ENABLE_STMT_SCANSTATUS
+  pLevel->addrVisit = sqlite3VdbeCurrentAddr(v);
+#endif
+
+  /* Insert code to test every subexpression that can be completely
+  ** computed using the current set of tables.
+  */
+  for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){
+    Expr *pE;
+    testcase( pTerm->wtFlags & TERM_VIRTUAL );
+    testcase( pTerm->wtFlags & TERM_CODED );
+    if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue;
+    if( (pTerm->prereqAll & pLevel->notReady)!=0 ){
+      testcase( pWInfo->untestedTerms==0
+               && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 );
+      pWInfo->untestedTerms = 1;
+      continue;
+    }
+    pE = pTerm->pExpr;
+    assert( pE!=0 );
+    if( pLevel->iLeftJoin && !ExprHasProperty(pE, EP_FromJoin) ){
+      continue;
+    }
+    sqlite3ExprIfFalse(pParse, pE, addrCont, SQLITE_JUMPIFNULL);
+    pTerm->wtFlags |= TERM_CODED;
+  }
+
+  /* Insert code to test for implied constraints based on transitivity
+  ** of the "==" operator.
+  **
+  ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123"
+  ** and we are coding the t1 loop and the t2 loop has not yet coded,
+  ** then we cannot use the "t1.a=t2.b" constraint, but we can code
+  ** the implied "t1.a=123" constraint.
+  */
+  for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){
+    Expr *pE, *pEAlt;
+    WhereTerm *pAlt;
+    if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue;
+    if( pTerm->eOperator!=(WO_EQUIV|WO_EQ) ) continue;
+    if( pTerm->leftCursor!=iCur ) continue;
+    if( pLevel->iLeftJoin ) continue;
+    pE = pTerm->pExpr;
+    assert( !ExprHasProperty(pE, EP_FromJoin) );
+    assert( (pTerm->prereqRight & pLevel->notReady)!=0 );
+    pAlt = findTerm(pWC, iCur, pTerm->u.leftColumn, notReady, WO_EQ|WO_IN, 0);
+    if( pAlt==0 ) continue;
+    if( pAlt->wtFlags & (TERM_CODED) ) continue;
+    testcase( pAlt->eOperator & WO_EQ );
+    testcase( pAlt->eOperator & WO_IN );
+    VdbeModuleComment((v, "begin transitive constraint"));
+    pEAlt = sqlite3StackAllocRaw(db, sizeof(*pEAlt));
+    if( pEAlt ){
+      *pEAlt = *pAlt->pExpr;
+      pEAlt->pLeft = pE->pLeft;
+      sqlite3ExprIfFalse(pParse, pEAlt, addrCont, SQLITE_JUMPIFNULL);
+      sqlite3StackFree(db, pEAlt);
+    }
+  }
+
+  /* For a LEFT OUTER JOIN, generate code that will record the fact that
+  ** at least one row of the right table has matched the left table.  
+  */
+  if( pLevel->iLeftJoin ){
+    pLevel->addrFirst = sqlite3VdbeCurrentAddr(v);
+    sqlite3VdbeAddOp2(v, OP_Integer, 1, pLevel->iLeftJoin);
+    VdbeComment((v, "record LEFT JOIN hit"));
+    sqlite3ExprCacheClear(pParse);
+    for(pTerm=pWC->a, j=0; j<pWC->nTerm; j++, pTerm++){
+      testcase( pTerm->wtFlags & TERM_VIRTUAL );
+      testcase( pTerm->wtFlags & TERM_CODED );
+      if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue;
+      if( (pTerm->prereqAll & pLevel->notReady)!=0 ){
+        assert( pWInfo->untestedTerms );
+        continue;
+      }
+      assert( pTerm->pExpr );
+      sqlite3ExprIfFalse(pParse, pTerm->pExpr, addrCont, SQLITE_JUMPIFNULL);
+      pTerm->wtFlags |= TERM_CODED;
+    }
+  }
+
+  return pLevel->notReady;
+}
+
+#ifdef WHERETRACE_ENABLED
+/*
+** Print the content of a WhereTerm object
+*/
+static void whereTermPrint(WhereTerm *pTerm, int iTerm){
+  if( pTerm==0 ){
+    sqlite3DebugPrintf("TERM-%-3d NULL\n", iTerm);
+  }else{
+    char zType[4];
+    memcpy(zType, "...", 4);
+    if( pTerm->wtFlags & TERM_VIRTUAL ) zType[0] = 'V';
+    if( pTerm->eOperator & WO_EQUIV  ) zType[1] = 'E';
+    if( ExprHasProperty(pTerm->pExpr, EP_FromJoin) ) zType[2] = 'L';
+    sqlite3DebugPrintf("TERM-%-3d %p %s cursor=%-3d prob=%-3d op=0x%03x\n",
+                       iTerm, pTerm, zType, pTerm->leftCursor, pTerm->truthProb,
+                       pTerm->eOperator);
+    sqlite3TreeViewExpr(0, pTerm->pExpr, 0);
+  }
+}
+#endif
+
+#ifdef WHERETRACE_ENABLED
+/*
+** Print a WhereLoop object for debugging purposes
+*/
+static void whereLoopPrint(WhereLoop *p, WhereClause *pWC){
+  WhereInfo *pWInfo = pWC->pWInfo;
+  int nb = 1+(pWInfo->pTabList->nSrc+7)/8;
+  struct SrcList_item *pItem = pWInfo->pTabList->a + p->iTab;
+  Table *pTab = pItem->pTab;
+  sqlite3DebugPrintf("%c%2d.%0*llx.%0*llx", p->cId,
+                     p->iTab, nb, p->maskSelf, nb, p->prereq);
+  sqlite3DebugPrintf(" %12s",
+                     pItem->zAlias ? pItem->zAlias : pTab->zName);
+  if( (p->wsFlags & WHERE_VIRTUALTABLE)==0 ){
+    const char *zName;
+    if( p->u.btree.pIndex && (zName = p->u.btree.pIndex->zName)!=0 ){
+      if( strncmp(zName, "sqlite_autoindex_", 17)==0 ){
+        int i = sqlite3Strlen30(zName) - 1;
+        while( zName[i]!='_' ) i--;
+        zName += i;
+      }
+      sqlite3DebugPrintf(".%-16s %2d", zName, p->u.btree.nEq);
+    }else{
+      sqlite3DebugPrintf("%20s","");
+    }
+  }else{
+    char *z;
+    if( p->u.vtab.idxStr ){
+      z = sqlite3_mprintf("(%d,\"%s\",%x)",
+                p->u.vtab.idxNum, p->u.vtab.idxStr, p->u.vtab.omitMask);
+    }else{
+      z = sqlite3_mprintf("(%d,%x)", p->u.vtab.idxNum, p->u.vtab.omitMask);
+    }
+    sqlite3DebugPrintf(" %-19s", z);
+    sqlite3_free(z);
+  }
+  if( p->wsFlags & WHERE_SKIPSCAN ){
+    sqlite3DebugPrintf(" f %05x %d-%d", p->wsFlags, p->nLTerm,p->nSkip);
+  }else{
+    sqlite3DebugPrintf(" f %05x N %d", p->wsFlags, p->nLTerm);
+  }
+  sqlite3DebugPrintf(" cost %d,%d,%d\n", p->rSetup, p->rRun, p->nOut);
+  if( p->nLTerm && (sqlite3WhereTrace & 0x100)!=0 ){
+    int i;
+    for(i=0; i<p->nLTerm; i++){
+      whereTermPrint(p->aLTerm[i], i);
+    }
+  }
+}
+#endif
+
+/*
+** Convert bulk memory into a valid WhereLoop that can be passed
+** to whereLoopClear harmlessly.
+*/
+static void whereLoopInit(WhereLoop *p){
+  p->aLTerm = p->aLTermSpace;
+  p->nLTerm = 0;
+  p->nLSlot = ArraySize(p->aLTermSpace);
+  p->wsFlags = 0;
+}
+
+/*
+** Clear the WhereLoop.u union.  Leave WhereLoop.pLTerm intact.
+*/
+static void whereLoopClearUnion(sqlite3 *db, WhereLoop *p){
+  if( p->wsFlags & (WHERE_VIRTUALTABLE|WHERE_AUTO_INDEX) ){
+    if( (p->wsFlags & WHERE_VIRTUALTABLE)!=0 && p->u.vtab.needFree ){
+      sqlite3_free(p->u.vtab.idxStr);
+      p->u.vtab.needFree = 0;
+      p->u.vtab.idxStr = 0;
+    }else if( (p->wsFlags & WHERE_AUTO_INDEX)!=0 && p->u.btree.pIndex!=0 ){
+      sqlite3DbFree(db, p->u.btree.pIndex->zColAff);
+      sqlite3DbFree(db, p->u.btree.pIndex);
+      p->u.btree.pIndex = 0;
+    }
+  }
+}
+
+/*
+** Deallocate internal memory used by a WhereLoop object
+*/
+static void whereLoopClear(sqlite3 *db, WhereLoop *p){
+  if( p->aLTerm!=p->aLTermSpace ) sqlite3DbFree(db, p->aLTerm);
+  whereLoopClearUnion(db, p);
+  whereLoopInit(p);
+}
+
+/*
+** Increase the memory allocation for pLoop->aLTerm[] to be at least n.
+*/
+static int whereLoopResize(sqlite3 *db, WhereLoop *p, int n){
+  WhereTerm **paNew;
+  if( p->nLSlot>=n ) return SQLITE_OK;
+  n = (n+7)&~7;
+  paNew = sqlite3DbMallocRaw(db, sizeof(p->aLTerm[0])*n);
+  if( paNew==0 ) return SQLITE_NOMEM;
+  memcpy(paNew, p->aLTerm, sizeof(p->aLTerm[0])*p->nLSlot);
+  if( p->aLTerm!=p->aLTermSpace ) sqlite3DbFree(db, p->aLTerm);
+  p->aLTerm = paNew;
+  p->nLSlot = n;
+  return SQLITE_OK;
+}
+
+/*
+** Transfer content from the second pLoop into the first.
+*/
+static int whereLoopXfer(sqlite3 *db, WhereLoop *pTo, WhereLoop *pFrom){
+  whereLoopClearUnion(db, pTo);
+  if( whereLoopResize(db, pTo, pFrom->nLTerm) ){
+    memset(&pTo->u, 0, sizeof(pTo->u));
+    return SQLITE_NOMEM;
+  }
+  memcpy(pTo, pFrom, WHERE_LOOP_XFER_SZ);
+  memcpy(pTo->aLTerm, pFrom->aLTerm, pTo->nLTerm*sizeof(pTo->aLTerm[0]));
+  if( pFrom->wsFlags & WHERE_VIRTUALTABLE ){
+    pFrom->u.vtab.needFree = 0;
+  }else if( (pFrom->wsFlags & WHERE_AUTO_INDEX)!=0 ){
+    pFrom->u.btree.pIndex = 0;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Delete a WhereLoop object
+*/
+static void whereLoopDelete(sqlite3 *db, WhereLoop *p){
+  whereLoopClear(db, p);
+  sqlite3DbFree(db, p);
+}
+
+/*
+** Free a WhereInfo structure
+*/
+static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){
+  if( ALWAYS(pWInfo) ){
+    whereClauseClear(&pWInfo->sWC);
+    while( pWInfo->pLoops ){
+      WhereLoop *p = pWInfo->pLoops;
+      pWInfo->pLoops = p->pNextLoop;
+      whereLoopDelete(db, p);
+    }
+    sqlite3DbFree(db, pWInfo);
+  }
+}
+
+/*
+** Return TRUE if all of the following are true:
+**
+**   (1)  X has the same or lower cost that Y
+**   (2)  X is a proper subset of Y
+**   (3)  X skips at least as many columns as Y
+**
+** By "proper subset" we mean that X uses fewer WHERE clause terms
+** than Y and that every WHERE clause term used by X is also used
+** by Y.
+**
+** If X is a proper subset of Y then Y is a better choice and ought
+** to have a lower cost.  This routine returns TRUE when that cost 
+** relationship is inverted and needs to be adjusted.  The third rule
+** was added because if X uses skip-scan less than Y it still might
+** deserve a lower cost even if it is a proper subset of Y.
+*/
+static int whereLoopCheaperProperSubset(
+  const WhereLoop *pX,       /* First WhereLoop to compare */
+  const WhereLoop *pY        /* Compare against this WhereLoop */
+){
+  int i, j;
+  if( pX->nLTerm-pX->nSkip >= pY->nLTerm-pY->nSkip ){
+    return 0; /* X is not a subset of Y */
+  }
+  if( pY->nSkip > pX->nSkip ) return 0;
+  if( pX->rRun >= pY->rRun ){
+    if( pX->rRun > pY->rRun ) return 0;    /* X costs more than Y */
+    if( pX->nOut > pY->nOut ) return 0;    /* X costs more than Y */
+  }
+  for(i=pX->nLTerm-1; i>=0; i--){
+    if( pX->aLTerm[i]==0 ) continue;
+    for(j=pY->nLTerm-1; j>=0; j--){
+      if( pY->aLTerm[j]==pX->aLTerm[i] ) break;
+    }
+    if( j<0 ) return 0;  /* X not a subset of Y since term X[i] not used by Y */
+  }
+  return 1;  /* All conditions meet */
+}
+
+/*
+** Try to adjust the cost of WhereLoop pTemplate upwards or downwards so
+** that:
+**
+**   (1) pTemplate costs less than any other WhereLoops that are a proper
+**       subset of pTemplate
+**
+**   (2) pTemplate costs more than any other WhereLoops for which pTemplate
+**       is a proper subset.
+**
+** To say "WhereLoop X is a proper subset of Y" means that X uses fewer
+** WHERE clause terms than Y and that every WHERE clause term used by X is
+** also used by Y.
+*/
+static void whereLoopAdjustCost(const WhereLoop *p, WhereLoop *pTemplate){
+  if( (pTemplate->wsFlags & WHERE_INDEXED)==0 ) return;
+  for(; p; p=p->pNextLoop){
+    if( p->iTab!=pTemplate->iTab ) continue;
+    if( (p->wsFlags & WHERE_INDEXED)==0 ) continue;
+    if( whereLoopCheaperProperSubset(p, pTemplate) ){
+      /* Adjust pTemplate cost downward so that it is cheaper than its 
+      ** subset p. */
+      WHERETRACE(0x80,("subset cost adjustment %d,%d to %d,%d\n",
+                       pTemplate->rRun, pTemplate->nOut, p->rRun, p->nOut-1));
+      pTemplate->rRun = p->rRun;
+      pTemplate->nOut = p->nOut - 1;
+    }else if( whereLoopCheaperProperSubset(pTemplate, p) ){
+      /* Adjust pTemplate cost upward so that it is costlier than p since
+      ** pTemplate is a proper subset of p */
+      WHERETRACE(0x80,("subset cost adjustment %d,%d to %d,%d\n",
+                       pTemplate->rRun, pTemplate->nOut, p->rRun, p->nOut+1));
+      pTemplate->rRun = p->rRun;
+      pTemplate->nOut = p->nOut + 1;
+    }
+  }
+}
+
+/*
+** Search the list of WhereLoops in *ppPrev looking for one that can be
+** supplanted by pTemplate.
+**
+** Return NULL if the WhereLoop list contains an entry that can supplant
+** pTemplate, in other words if pTemplate does not belong on the list.
+**
+** If pX is a WhereLoop that pTemplate can supplant, then return the
+** link that points to pX.
+**
+** If pTemplate cannot supplant any existing element of the list but needs
+** to be added to the list, then return a pointer to the tail of the list.
+*/
+static WhereLoop **whereLoopFindLesser(
+  WhereLoop **ppPrev,
+  const WhereLoop *pTemplate
+){
+  WhereLoop *p;
+  for(p=(*ppPrev); p; ppPrev=&p->pNextLoop, p=*ppPrev){
+    if( p->iTab!=pTemplate->iTab || p->iSortIdx!=pTemplate->iSortIdx ){
+      /* If either the iTab or iSortIdx values for two WhereLoop are different
+      ** then those WhereLoops need to be considered separately.  Neither is
+      ** a candidate to replace the other. */
+      continue;
+    }
+    /* In the current implementation, the rSetup value is either zero
+    ** or the cost of building an automatic index (NlogN) and the NlogN
+    ** is the same for compatible WhereLoops. */
+    assert( p->rSetup==0 || pTemplate->rSetup==0 
+                 || p->rSetup==pTemplate->rSetup );
+
+    /* whereLoopAddBtree() always generates and inserts the automatic index
+    ** case first.  Hence compatible candidate WhereLoops never have a larger
+    ** rSetup. Call this SETUP-INVARIANT */
+    assert( p->rSetup>=pTemplate->rSetup );
+
+    /* Any loop using an appliation-defined index (or PRIMARY KEY or
+    ** UNIQUE constraint) with one or more == constraints is better
+    ** than an automatic index. Unless it is a skip-scan. */
+    if( (p->wsFlags & WHERE_AUTO_INDEX)!=0
+     && (pTemplate->nSkip)==0
+     && (pTemplate->wsFlags & WHERE_INDEXED)!=0
+     && (pTemplate->wsFlags & WHERE_COLUMN_EQ)!=0
+     && (p->prereq & pTemplate->prereq)==pTemplate->prereq
+    ){
+      break;
+    }
+
+    /* If existing WhereLoop p is better than pTemplate, pTemplate can be
+    ** discarded.  WhereLoop p is better if:
+    **   (1)  p has no more dependencies than pTemplate, and
+    **   (2)  p has an equal or lower cost than pTemplate
+    */
+    if( (p->prereq & pTemplate->prereq)==p->prereq    /* (1)  */
+     && p->rSetup<=pTemplate->rSetup                  /* (2a) */
+     && p->rRun<=pTemplate->rRun                      /* (2b) */
+     && p->nOut<=pTemplate->nOut                      /* (2c) */
+    ){
+      return 0;  /* Discard pTemplate */
+    }
+
+    /* If pTemplate is always better than p, then cause p to be overwritten
+    ** with pTemplate.  pTemplate is better than p if:
+    **   (1)  pTemplate has no more dependences than p, and
+    **   (2)  pTemplate has an equal or lower cost than p.
+    */
+    if( (p->prereq & pTemplate->prereq)==pTemplate->prereq   /* (1)  */
+     && p->rRun>=pTemplate->rRun                             /* (2a) */
+     && p->nOut>=pTemplate->nOut                             /* (2b) */
+    ){
+      assert( p->rSetup>=pTemplate->rSetup ); /* SETUP-INVARIANT above */
+      break;   /* Cause p to be overwritten by pTemplate */
+    }
+  }
+  return ppPrev;
+}
+
+/*
+** Insert or replace a WhereLoop entry using the template supplied.
+**
+** An existing WhereLoop entry might be overwritten if the new template
+** is better and has fewer dependencies.  Or the template will be ignored
+** and no insert will occur if an existing WhereLoop is faster and has
+** fewer dependencies than the template.  Otherwise a new WhereLoop is
+** added based on the template.
+**
+** If pBuilder->pOrSet is not NULL then we care about only the
+** prerequisites and rRun and nOut costs of the N best loops.  That
+** information is gathered in the pBuilder->pOrSet object.  This special
+** processing mode is used only for OR clause processing.
+**
+** When accumulating multiple loops (when pBuilder->pOrSet is NULL) we
+** still might overwrite similar loops with the new template if the
+** new template is better.  Loops may be overwritten if the following 
+** conditions are met:
+**
+**    (1)  They have the same iTab.
+**    (2)  They have the same iSortIdx.
+**    (3)  The template has same or fewer dependencies than the current loop
+**    (4)  The template has the same or lower cost than the current loop
+*/
+static int whereLoopInsert(WhereLoopBuilder *pBuilder, WhereLoop *pTemplate){
+  WhereLoop **ppPrev, *p;
+  WhereInfo *pWInfo = pBuilder->pWInfo;
+  sqlite3 *db = pWInfo->pParse->db;
+
+  /* If pBuilder->pOrSet is defined, then only keep track of the costs
+  ** and prereqs.
+  */
+  if( pBuilder->pOrSet!=0 ){
+#if WHERETRACE_ENABLED
+    u16 n = pBuilder->pOrSet->n;
+    int x =
+#endif
+    whereOrInsert(pBuilder->pOrSet, pTemplate->prereq, pTemplate->rRun,
+                                    pTemplate->nOut);
+#if WHERETRACE_ENABLED /* 0x8 */
+    if( sqlite3WhereTrace & 0x8 ){
+      sqlite3DebugPrintf(x?"   or-%d:  ":"   or-X:  ", n);
+      whereLoopPrint(pTemplate, pBuilder->pWC);
+    }
+#endif
+    return SQLITE_OK;
+  }
+
+  /* Look for an existing WhereLoop to replace with pTemplate
+  */
+  whereLoopAdjustCost(pWInfo->pLoops, pTemplate);
+  ppPrev = whereLoopFindLesser(&pWInfo->pLoops, pTemplate);
+
+  if( ppPrev==0 ){
+    /* There already exists a WhereLoop on the list that is better
+    ** than pTemplate, so just ignore pTemplate */
+#if WHERETRACE_ENABLED /* 0x8 */
+    if( sqlite3WhereTrace & 0x8 ){
+      sqlite3DebugPrintf("   skip: ");
+      whereLoopPrint(pTemplate, pBuilder->pWC);
+    }
+#endif
+    return SQLITE_OK;  
+  }else{
+    p = *ppPrev;
+  }
+
+  /* If we reach this point it means that either p[] should be overwritten
+  ** with pTemplate[] if p[] exists, or if p==NULL then allocate a new
+  ** WhereLoop and insert it.
+  */
+#if WHERETRACE_ENABLED /* 0x8 */
+  if( sqlite3WhereTrace & 0x8 ){
+    if( p!=0 ){
+      sqlite3DebugPrintf("replace: ");
+      whereLoopPrint(p, pBuilder->pWC);
+    }
+    sqlite3DebugPrintf("    add: ");
+    whereLoopPrint(pTemplate, pBuilder->pWC);
+  }
+#endif
+  if( p==0 ){
+    /* Allocate a new WhereLoop to add to the end of the list */
+    *ppPrev = p = sqlite3DbMallocRaw(db, sizeof(WhereLoop));
+    if( p==0 ) return SQLITE_NOMEM;
+    whereLoopInit(p);
+    p->pNextLoop = 0;
+  }else{
+    /* We will be overwriting WhereLoop p[].  But before we do, first
+    ** go through the rest of the list and delete any other entries besides
+    ** p[] that are also supplated by pTemplate */
+    WhereLoop **ppTail = &p->pNextLoop;
+    WhereLoop *pToDel;
+    while( *ppTail ){
+      ppTail = whereLoopFindLesser(ppTail, pTemplate);
+      if( ppTail==0 ) break;
+      pToDel = *ppTail;
+      if( pToDel==0 ) break;
+      *ppTail = pToDel->pNextLoop;
+#if WHERETRACE_ENABLED /* 0x8 */
+      if( sqlite3WhereTrace & 0x8 ){
+        sqlite3DebugPrintf(" delete: ");
+        whereLoopPrint(pToDel, pBuilder->pWC);
+      }
+#endif
+      whereLoopDelete(db, pToDel);
+    }
+  }
+  whereLoopXfer(db, p, pTemplate);
+  if( (p->wsFlags & WHERE_VIRTUALTABLE)==0 ){
+    Index *pIndex = p->u.btree.pIndex;
+    if( pIndex && pIndex->tnum==0 ){
+      p->u.btree.pIndex = 0;
+    }
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Adjust the WhereLoop.nOut value downward to account for terms of the
+** WHERE clause that reference the loop but which are not used by an
+** index.
+*
+** For every WHERE clause term that is not used by the index
+** and which has a truth probability assigned by one of the likelihood(),
+** likely(), or unlikely() SQL functions, reduce the estimated number
+** of output rows by the probability specified.
+**
+** TUNING:  For every WHERE clause term that is not used by the index
+** and which does not have an assigned truth probability, heuristics
+** described below are used to try to estimate the truth probability.
+** TODO --> Perhaps this is something that could be improved by better
+** table statistics.
+**
+** Heuristic 1:  Estimate the truth probability as 93.75%.  The 93.75%
+** value corresponds to -1 in LogEst notation, so this means decrement
+** the WhereLoop.nOut field for every such WHERE clause term.
+**
+** Heuristic 2:  If there exists one or more WHERE clause terms of the
+** form "x==EXPR" and EXPR is not a constant 0 or 1, then make sure the
+** final output row estimate is no greater than 1/4 of the total number
+** of rows in the table.  In other words, assume that x==EXPR will filter
+** out at least 3 out of 4 rows.  If EXPR is -1 or 0 or 1, then maybe the
+** "x" column is boolean or else -1 or 0 or 1 is a common default value
+** on the "x" column and so in that case only cap the output row estimate
+** at 1/2 instead of 1/4.
+*/
+static void whereLoopOutputAdjust(
+  WhereClause *pWC,      /* The WHERE clause */
+  WhereLoop *pLoop,      /* The loop to adjust downward */
+  LogEst nRow            /* Number of rows in the entire table */
+){
+  WhereTerm *pTerm, *pX;
+  Bitmask notAllowed = ~(pLoop->prereq|pLoop->maskSelf);
+  int i, j, k;
+  LogEst iReduce = 0;    /* pLoop->nOut should not exceed nRow-iReduce */
+
+  assert( (pLoop->wsFlags & WHERE_AUTO_INDEX)==0 );
+  for(i=pWC->nTerm, pTerm=pWC->a; i>0; i--, pTerm++){
+    if( (pTerm->wtFlags & TERM_VIRTUAL)!=0 ) break;
+    if( (pTerm->prereqAll & pLoop->maskSelf)==0 ) continue;
+    if( (pTerm->prereqAll & notAllowed)!=0 ) continue;
+    for(j=pLoop->nLTerm-1; j>=0; j--){
+      pX = pLoop->aLTerm[j];
+      if( pX==0 ) continue;
+      if( pX==pTerm ) break;
+      if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
+    }
+    if( j<0 ){
+      if( pTerm->truthProb<=0 ){
+        /* If a truth probability is specified using the likelihood() hints,
+        ** then use the probability provided by the application. */
+        pLoop->nOut += pTerm->truthProb;
+      }else{
+        /* In the absence of explicit truth probabilities, use heuristics to
+        ** guess a reasonable truth probability. */
+        pLoop->nOut--;
+        if( pTerm->eOperator&WO_EQ ){
+          Expr *pRight = pTerm->pExpr->pRight;
+          if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){
+            k = 10;
+          }else{
+            k = 20;
+          }
+          if( iReduce<k ) iReduce = k;
+        }
+      }
+    }
+  }
+  if( pLoop->nOut > nRow-iReduce )  pLoop->nOut = nRow - iReduce;
+}
+
+/*
+** Adjust the cost C by the costMult facter T.  This only occurs if
+** compiled with -DSQLITE_ENABLE_COSTMULT
+*/
+#ifdef SQLITE_ENABLE_COSTMULT
+# define ApplyCostMultiplier(C,T)  C += T
+#else
+# define ApplyCostMultiplier(C,T)
+#endif
+
+/*
+** We have so far matched pBuilder->pNew->u.btree.nEq terms of the 
+** index pIndex. Try to match one more.
+**
+** When this function is called, pBuilder->pNew->nOut contains the 
+** number of rows expected to be visited by filtering using the nEq 
+** terms only. If it is modified, this value is restored before this 
+** function returns.
+**
+** If pProbe->tnum==0, that means pIndex is a fake index used for the
+** INTEGER PRIMARY KEY.
+*/
+static int whereLoopAddBtreeIndex(
+  WhereLoopBuilder *pBuilder,     /* The WhereLoop factory */
+  struct SrcList_item *pSrc,      /* FROM clause term being analyzed */
+  Index *pProbe,                  /* An index on pSrc */
+  LogEst nInMul                   /* log(Number of iterations due to IN) */
+){
+  WhereInfo *pWInfo = pBuilder->pWInfo;  /* WHERE analyse context */
+  Parse *pParse = pWInfo->pParse;        /* Parsing context */
+  sqlite3 *db = pParse->db;       /* Database connection malloc context */
+  WhereLoop *pNew;                /* Template WhereLoop under construction */
+  WhereTerm *pTerm;               /* A WhereTerm under consideration */
+  int opMask;                     /* Valid operators for constraints */
+  WhereScan scan;                 /* Iterator for WHERE terms */
+  Bitmask saved_prereq;           /* Original value of pNew->prereq */
+  u16 saved_nLTerm;               /* Original value of pNew->nLTerm */
+  u16 saved_nEq;                  /* Original value of pNew->u.btree.nEq */
+  u16 saved_nSkip;                /* Original value of pNew->nSkip */
+  u32 saved_wsFlags;              /* Original value of pNew->wsFlags */
+  LogEst saved_nOut;              /* Original value of pNew->nOut */
+  int iCol;                       /* Index of the column in the table */
+  int rc = SQLITE_OK;             /* Return code */
+  LogEst rSize;                   /* Number of rows in the table */
+  LogEst rLogSize;                /* Logarithm of table size */
+  WhereTerm *pTop = 0, *pBtm = 0; /* Top and bottom range constraints */
+
+  pNew = pBuilder->pNew;
+  if( db->mallocFailed ) return SQLITE_NOMEM;
+
+  assert( (pNew->wsFlags & WHERE_VIRTUALTABLE)==0 );
+  assert( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 );
+  if( pNew->wsFlags & WHERE_BTM_LIMIT ){
+    opMask = WO_LT|WO_LE;
+  }else if( pProbe->tnum<=0 || (pSrc->jointype & JT_LEFT)!=0 ){
+    opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE;
+  }else{
+    opMask = WO_EQ|WO_IN|WO_ISNULL|WO_GT|WO_GE|WO_LT|WO_LE;
+  }
+  if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE);
+
+  assert( pNew->u.btree.nEq<pProbe->nColumn );
+  iCol = pProbe->aiColumn[pNew->u.btree.nEq];
+
+  pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol,
+                        opMask, pProbe);
+  saved_nEq = pNew->u.btree.nEq;
+  saved_nSkip = pNew->nSkip;
+  saved_nLTerm = pNew->nLTerm;
+  saved_wsFlags = pNew->wsFlags;
+  saved_prereq = pNew->prereq;
+  saved_nOut = pNew->nOut;
+  pNew->rSetup = 0;
+  rSize = pProbe->aiRowLogEst[0];
+  rLogSize = estLog(rSize);
+  for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){
+    u16 eOp = pTerm->eOperator;   /* Shorthand for pTerm->eOperator */
+    LogEst rCostIdx;
+    LogEst nOutUnadjusted;        /* nOut before IN() and WHERE adjustments */
+    int nIn = 0;
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    int nRecValid = pBuilder->nRecValid;
+#endif
+    if( (eOp==WO_ISNULL || (pTerm->wtFlags&TERM_VNULL)!=0)
+     && (iCol<0 || pSrc->pTab->aCol[iCol].notNull)
+    ){
+      continue; /* ignore IS [NOT] NULL constraints on NOT NULL columns */
+    }
+    if( pTerm->prereqRight & pNew->maskSelf ) continue;
+
+    pNew->wsFlags = saved_wsFlags;
+    pNew->u.btree.nEq = saved_nEq;
+    pNew->nLTerm = saved_nLTerm;
+    if( whereLoopResize(db, pNew, pNew->nLTerm+1) ) break; /* OOM */
+    pNew->aLTerm[pNew->nLTerm++] = pTerm;
+    pNew->prereq = (saved_prereq | pTerm->prereqRight) & ~pNew->maskSelf;
+
+    assert( nInMul==0
+        || (pNew->wsFlags & WHERE_COLUMN_NULL)!=0 
+        || (pNew->wsFlags & WHERE_COLUMN_IN)!=0 
+        || (pNew->wsFlags & WHERE_SKIPSCAN)!=0 
+    );
+
+    if( eOp & WO_IN ){
+      Expr *pExpr = pTerm->pExpr;
+      pNew->wsFlags |= WHERE_COLUMN_IN;
+      if( ExprHasProperty(pExpr, EP_xIsSelect) ){
+        /* "x IN (SELECT ...)":  TUNING: the SELECT returns 25 rows */
+        nIn = 46;  assert( 46==sqlite3LogEst(25) );
+      }else if( ALWAYS(pExpr->x.pList && pExpr->x.pList->nExpr) ){
+        /* "x IN (value, value, ...)" */
+        nIn = sqlite3LogEst(pExpr->x.pList->nExpr);
+      }
+      assert( nIn>0 );  /* RHS always has 2 or more terms...  The parser
+                        ** changes "x IN (?)" into "x=?". */
+
+    }else if( eOp & (WO_EQ) ){
+      pNew->wsFlags |= WHERE_COLUMN_EQ;
+      if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){
+        if( iCol>=0 && !IsUniqueIndex(pProbe) ){
+          pNew->wsFlags |= WHERE_UNQ_WANTED;
+        }else{
+          pNew->wsFlags |= WHERE_ONEROW;
+        }
+      }
+    }else if( eOp & WO_ISNULL ){
+      pNew->wsFlags |= WHERE_COLUMN_NULL;
+    }else if( eOp & (WO_GT|WO_GE) ){
+      testcase( eOp & WO_GT );
+      testcase( eOp & WO_GE );
+      pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_BTM_LIMIT;
+      pBtm = pTerm;
+      pTop = 0;
+    }else{
+      assert( eOp & (WO_LT|WO_LE) );
+      testcase( eOp & WO_LT );
+      testcase( eOp & WO_LE );
+      pNew->wsFlags |= WHERE_COLUMN_RANGE|WHERE_TOP_LIMIT;
+      pTop = pTerm;
+      pBtm = (pNew->wsFlags & WHERE_BTM_LIMIT)!=0 ?
+                     pNew->aLTerm[pNew->nLTerm-2] : 0;
+    }
+
+    /* At this point pNew->nOut is set to the number of rows expected to
+    ** be visited by the index scan before considering term pTerm, or the
+    ** values of nIn and nInMul. In other words, assuming that all 
+    ** "x IN(...)" terms are replaced with "x = ?". This block updates
+    ** the value of pNew->nOut to account for pTerm (but not nIn/nInMul).  */
+    assert( pNew->nOut==saved_nOut );
+    if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
+      /* Adjust nOut using stat3/stat4 data. Or, if there is no stat3/stat4
+      ** data, using some other estimate.  */
+      whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew);
+    }else{
+      int nEq = ++pNew->u.btree.nEq;
+      assert( eOp & (WO_ISNULL|WO_EQ|WO_IN) );
+
+      assert( pNew->nOut==saved_nOut );
+      if( pTerm->truthProb<=0 && iCol>=0 ){
+        assert( (eOp & WO_IN) || nIn==0 );
+        testcase( eOp & WO_IN );
+        pNew->nOut += pTerm->truthProb;
+        pNew->nOut -= nIn;
+      }else{
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+        tRowcnt nOut = 0;
+        if( nInMul==0 
+         && pProbe->nSample 
+         && pNew->u.btree.nEq<=pProbe->nSampleCol
+         && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect))
+        ){
+          Expr *pExpr = pTerm->pExpr;
+          if( (eOp & (WO_EQ|WO_ISNULL))!=0 ){
+            testcase( eOp & WO_EQ );
+            testcase( eOp & WO_ISNULL );
+            rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut);
+          }else{
+            rc = whereInScanEst(pParse, pBuilder, pExpr->x.pList, &nOut);
+          }
+          if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK;
+          if( rc!=SQLITE_OK ) break;          /* Jump out of the pTerm loop */
+          if( nOut ){
+            pNew->nOut = sqlite3LogEst(nOut);
+            if( pNew->nOut>saved_nOut ) pNew->nOut = saved_nOut;
+            pNew->nOut -= nIn;
+          }
+        }
+        if( nOut==0 )
+#endif
+        {
+          pNew->nOut += (pProbe->aiRowLogEst[nEq] - pProbe->aiRowLogEst[nEq-1]);
+          if( eOp & WO_ISNULL ){
+            /* TUNING: If there is no likelihood() value, assume that a 
+            ** "col IS NULL" expression matches twice as many rows 
+            ** as (col=?). */
+            pNew->nOut += 10;
+          }
+        }
+      }
+    }
+
+    /* Set rCostIdx to the cost of visiting selected rows in index. Add
+    ** it to pNew->rRun, which is currently set to the cost of the index
+    ** seek only. Then, if this is a non-covering index, add the cost of
+    ** visiting the rows in the main table.  */
+    rCostIdx = pNew->nOut + 1 + (15*pProbe->szIdxRow)/pSrc->pTab->szTabRow;
+    pNew->rRun = sqlite3LogEstAdd(rLogSize, rCostIdx);
+    if( (pNew->wsFlags & (WHERE_IDX_ONLY|WHERE_IPK))==0 ){
+      pNew->rRun = sqlite3LogEstAdd(pNew->rRun, pNew->nOut + 16);
+    }
+    ApplyCostMultiplier(pNew->rRun, pProbe->pTable->costMult);
+
+    nOutUnadjusted = pNew->nOut;
+    pNew->rRun += nInMul + nIn;
+    pNew->nOut += nInMul + nIn;
+    whereLoopOutputAdjust(pBuilder->pWC, pNew, rSize);
+    rc = whereLoopInsert(pBuilder, pNew);
+
+    if( pNew->wsFlags & WHERE_COLUMN_RANGE ){
+      pNew->nOut = saved_nOut;
+    }else{
+      pNew->nOut = nOutUnadjusted;
+    }
+
+    if( (pNew->wsFlags & WHERE_TOP_LIMIT)==0
+     && pNew->u.btree.nEq<pProbe->nColumn
+    ){
+      whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nInMul+nIn);
+    }
+    pNew->nOut = saved_nOut;
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    pBuilder->nRecValid = nRecValid;
+#endif
+  }
+  pNew->prereq = saved_prereq;
+  pNew->u.btree.nEq = saved_nEq;
+  pNew->nSkip = saved_nSkip;
+  pNew->wsFlags = saved_wsFlags;
+  pNew->nOut = saved_nOut;
+  pNew->nLTerm = saved_nLTerm;
+
+  /* Consider using a skip-scan if there are no WHERE clause constraints
+  ** available for the left-most terms of the index, and if the average
+  ** number of repeats in the left-most terms is at least 18. 
+  **
+  ** The magic number 18 is selected on the basis that scanning 17 rows
+  ** is almost always quicker than an index seek (even though if the index
+  ** contains fewer than 2^17 rows we assume otherwise in other parts of
+  ** the code). And, even if it is not, it should not be too much slower. 
+  ** On the other hand, the extra seeks could end up being significantly
+  ** more expensive.  */
+  assert( 42==sqlite3LogEst(18) );
+  if( saved_nEq==saved_nSkip
+   && saved_nEq+1<pProbe->nKeyCol
+   && pProbe->noSkipScan==0
+   && pProbe->aiRowLogEst[saved_nEq+1]>=42  /* TUNING: Minimum for skip-scan */
+   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
+  ){
+    LogEst nIter;
+    pNew->u.btree.nEq++;
+    pNew->nSkip++;
+    pNew->aLTerm[pNew->nLTerm++] = 0;
+    pNew->wsFlags |= WHERE_SKIPSCAN;
+    nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
+    pNew->nOut -= nIter;
+    /* TUNING:  Because uncertainties in the estimates for skip-scan queries,
+    ** add a 1.375 fudge factor to make skip-scan slightly less likely. */
+    nIter += 5;
+    whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
+    pNew->nOut = saved_nOut;
+    pNew->u.btree.nEq = saved_nEq;
+    pNew->nSkip = saved_nSkip;
+    pNew->wsFlags = saved_wsFlags;
+  }
+
+  return rc;
+}
+
+/*
+** Return True if it is possible that pIndex might be useful in
+** implementing the ORDER BY clause in pBuilder.
+**
+** Return False if pBuilder does not contain an ORDER BY clause or
+** if there is no way for pIndex to be useful in implementing that
+** ORDER BY clause.
+*/
+static int indexMightHelpWithOrderBy(
+  WhereLoopBuilder *pBuilder,
+  Index *pIndex,
+  int iCursor
+){
+  ExprList *pOB;
+  int ii, jj;
+
+  if( pIndex->bUnordered ) return 0;
+  if( (pOB = pBuilder->pWInfo->pOrderBy)==0 ) return 0;
+  for(ii=0; ii<pOB->nExpr; ii++){
+    Expr *pExpr = sqlite3ExprSkipCollate(pOB->a[ii].pExpr);
+    if( pExpr->op!=TK_COLUMN ) return 0;
+    if( pExpr->iTable==iCursor ){
+      if( pExpr->iColumn<0 ) return 1;
+      for(jj=0; jj<pIndex->nKeyCol; jj++){
+        if( pExpr->iColumn==pIndex->aiColumn[jj] ) return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+** Return a bitmask where 1s indicate that the corresponding column of
+** the table is used by an index.  Only the first 63 columns are considered.
+*/
+static Bitmask columnsInIndex(Index *pIdx){
+  Bitmask m = 0;
+  int j;
+  for(j=pIdx->nColumn-1; j>=0; j--){
+    int x = pIdx->aiColumn[j];
+    if( x>=0 ){
+      testcase( x==BMS-1 );
+      testcase( x==BMS-2 );
+      if( x<BMS-1 ) m |= MASKBIT(x);
+    }
+  }
+  return m;
+}
+
+/* Check to see if a partial index with pPartIndexWhere can be used
+** in the current query.  Return true if it can be and false if not.
+*/
+static int whereUsablePartialIndex(int iTab, WhereClause *pWC, Expr *pWhere){
+  int i;
+  WhereTerm *pTerm;
+  for(i=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){
+    if( sqlite3ExprImpliesExpr(pTerm->pExpr, pWhere, iTab) ) return 1;
+  }
+  return 0;
+}
+
+/*
+** Add all WhereLoop objects for a single table of the join where the table
+** is idenfied by pBuilder->pNew->iTab.  That table is guaranteed to be
+** a b-tree table, not a virtual table.
+**
+** The costs (WhereLoop.rRun) of the b-tree loops added by this function
+** are calculated as follows:
+**
+** For a full scan, assuming the table (or index) contains nRow rows:
+**
+**     cost = nRow * 3.0                    // full-table scan
+**     cost = nRow * K                      // scan of covering index
+**     cost = nRow * (K+3.0)                // scan of non-covering index
+**
+** where K is a value between 1.1 and 3.0 set based on the relative 
+** estimated average size of the index and table records.
+**
+** For an index scan, where nVisit is the number of index rows visited
+** by the scan, and nSeek is the number of seek operations required on 
+** the index b-tree:
+**
+**     cost = nSeek * (log(nRow) + K * nVisit)          // covering index
+**     cost = nSeek * (log(nRow) + (K+3.0) * nVisit)    // non-covering index
+**
+** Normally, nSeek is 1. nSeek values greater than 1 come about if the 
+** WHERE clause includes "x IN (....)" terms used in place of "x=?". Or when 
+** implicit "x IN (SELECT x FROM tbl)" terms are added for skip-scans.
+**
+** The estimated values (nRow, nVisit, nSeek) often contain a large amount
+** of uncertainty.  For this reason, scoring is designed to pick plans that
+** "do the least harm" if the estimates are inaccurate.  For example, a
+** log(nRow) factor is omitted from a non-covering index scan in order to
+** bias the scoring in favor of using an index, since the worst-case
+** performance of using an index is far better than the worst-case performance
+** of a full table scan.
+*/
+static int whereLoopAddBtree(
+  WhereLoopBuilder *pBuilder, /* WHERE clause information */
+  Bitmask mExtra              /* Extra prerequesites for using this table */
+){
+  WhereInfo *pWInfo;          /* WHERE analysis context */
+  Index *pProbe;              /* An index we are evaluating */
+  Index sPk;                  /* A fake index object for the primary key */
+  LogEst aiRowEstPk[2];       /* The aiRowLogEst[] value for the sPk index */
+  i16 aiColumnPk = -1;        /* The aColumn[] value for the sPk index */
+  SrcList *pTabList;          /* The FROM clause */
+  struct SrcList_item *pSrc;  /* The FROM clause btree term to add */
+  WhereLoop *pNew;            /* Template WhereLoop object */
+  int rc = SQLITE_OK;         /* Return code */
+  int iSortIdx = 1;           /* Index number */
+  int b;                      /* A boolean value */
+  LogEst rSize;               /* number of rows in the table */
+  LogEst rLogSize;            /* Logarithm of the number of rows in the table */
+  WhereClause *pWC;           /* The parsed WHERE clause */
+  Table *pTab;                /* Table being queried */
+  
+  pNew = pBuilder->pNew;
+  pWInfo = pBuilder->pWInfo;
+  pTabList = pWInfo->pTabList;
+  pSrc = pTabList->a + pNew->iTab;
+  pTab = pSrc->pTab;
+  pWC = pBuilder->pWC;
+  assert( !IsVirtual(pSrc->pTab) );
+
+  if( pSrc->pIndex ){
+    /* An INDEXED BY clause specifies a particular index to use */
+    pProbe = pSrc->pIndex;
+  }else if( !HasRowid(pTab) ){
+    pProbe = pTab->pIndex;
+  }else{
+    /* There is no INDEXED BY clause.  Create a fake Index object in local
+    ** variable sPk to represent the rowid primary key index.  Make this
+    ** fake index the first in a chain of Index objects with all of the real
+    ** indices to follow */
+    Index *pFirst;                  /* First of real indices on the table */
+    memset(&sPk, 0, sizeof(Index));
+    sPk.nKeyCol = 1;
+    sPk.nColumn = 1;
+    sPk.aiColumn = &aiColumnPk;
+    sPk.aiRowLogEst = aiRowEstPk;
+    sPk.onError = OE_Replace;
+    sPk.pTable = pTab;
+    sPk.szIdxRow = pTab->szTabRow;
+    aiRowEstPk[0] = pTab->nRowLogEst;
+    aiRowEstPk[1] = 0;
+    pFirst = pSrc->pTab->pIndex;
+    if( pSrc->notIndexed==0 ){
+      /* The real indices of the table are only considered if the
+      ** NOT INDEXED qualifier is omitted from the FROM clause */
+      sPk.pNext = pFirst;
+    }
+    pProbe = &sPk;
+  }
+  rSize = pTab->nRowLogEst;
+  rLogSize = estLog(rSize);
+
+#ifndef SQLITE_OMIT_AUTOMATIC_INDEX
+  /* Automatic indexes */
+  if( !pBuilder->pOrSet
+   && (pWInfo->pParse->db->flags & SQLITE_AutoIndex)!=0
+   && pSrc->pIndex==0
+   && !pSrc->viaCoroutine
+   && !pSrc->notIndexed
+   && HasRowid(pTab)
+   && !pSrc->isCorrelated
+   && !pSrc->isRecursive
+  ){
+    /* Generate auto-index WhereLoops */
+    WhereTerm *pTerm;
+    WhereTerm *pWCEnd = pWC->a + pWC->nTerm;
+    for(pTerm=pWC->a; rc==SQLITE_OK && pTerm<pWCEnd; pTerm++){
+      if( pTerm->prereqRight & pNew->maskSelf ) continue;
+      if( termCanDriveIndex(pTerm, pSrc, 0) ){
+        pNew->u.btree.nEq = 1;
+        pNew->nSkip = 0;
+        pNew->u.btree.pIndex = 0;
+        pNew->nLTerm = 1;
+        pNew->aLTerm[0] = pTerm;
+        /* TUNING: One-time cost for computing the automatic index is
+        ** estimated to be X*N*log2(N) where N is the number of rows in
+        ** the table being indexed and where X is 7 (LogEst=28) for normal
+        ** tables or 1.375 (LogEst=4) for views and subqueries.  The value
+        ** of X is smaller for views and subqueries so that the query planner
+        ** will be more aggressive about generating automatic indexes for
+        ** those objects, since there is no opportunity to add schema
+        ** indexes on subqueries and views. */
+        pNew->rSetup = rLogSize + rSize + 4;
+        if( pTab->pSelect==0 && (pTab->tabFlags & TF_Ephemeral)==0 ){
+          pNew->rSetup += 24;
+        }
+        ApplyCostMultiplier(pNew->rSetup, pTab->costMult);
+        /* TUNING: Each index lookup yields 20 rows in the table.  This
+        ** is more than the usual guess of 10 rows, since we have no way
+        ** of knowing how selective the index will ultimately be.  It would
+        ** not be unreasonable to make this value much larger. */
+        pNew->nOut = 43;  assert( 43==sqlite3LogEst(20) );
+        pNew->rRun = sqlite3LogEstAdd(rLogSize,pNew->nOut);
+        pNew->wsFlags = WHERE_AUTO_INDEX;
+        pNew->prereq = mExtra | pTerm->prereqRight;
+        rc = whereLoopInsert(pBuilder, pNew);
+      }
+    }
+  }
+#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */
+
+  /* Loop over all indices
+  */
+  for(; rc==SQLITE_OK && pProbe; pProbe=pProbe->pNext, iSortIdx++){
+    if( pProbe->pPartIdxWhere!=0
+     && !whereUsablePartialIndex(pSrc->iCursor, pWC, pProbe->pPartIdxWhere) ){
+      testcase( pNew->iTab!=pSrc->iCursor );  /* See ticket [98d973b8f5] */
+      continue;  /* Partial index inappropriate for this query */
+    }
+    rSize = pProbe->aiRowLogEst[0];
+    pNew->u.btree.nEq = 0;
+    pNew->nSkip = 0;
+    pNew->nLTerm = 0;
+    pNew->iSortIdx = 0;
+    pNew->rSetup = 0;
+    pNew->prereq = mExtra;
+    pNew->nOut = rSize;
+    pNew->u.btree.pIndex = pProbe;
+    b = indexMightHelpWithOrderBy(pBuilder, pProbe, pSrc->iCursor);
+    /* The ONEPASS_DESIRED flags never occurs together with ORDER BY */
+    assert( (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || b==0 );
+    if( pProbe->tnum<=0 ){
+      /* Integer primary key index */
+      pNew->wsFlags = WHERE_IPK;
+
+      /* Full table scan */
+      pNew->iSortIdx = b ? iSortIdx : 0;
+      /* TUNING: Cost of full table scan is (N*3.0). */
+      pNew->rRun = rSize + 16;
+      ApplyCostMultiplier(pNew->rRun, pTab->costMult);
+      whereLoopOutputAdjust(pWC, pNew, rSize);
+      rc = whereLoopInsert(pBuilder, pNew);
+      pNew->nOut = rSize;
+      if( rc ) break;
+    }else{
+      Bitmask m;
+      if( pProbe->isCovering ){
+        pNew->wsFlags = WHERE_IDX_ONLY | WHERE_INDEXED;
+        m = 0;
+      }else{
+        m = pSrc->colUsed & ~columnsInIndex(pProbe);
+        pNew->wsFlags = (m==0) ? (WHERE_IDX_ONLY|WHERE_INDEXED) : WHERE_INDEXED;
+      }
+
+      /* Full scan via index */
+      if( b
+       || !HasRowid(pTab)
+       || ( m==0
+         && pProbe->bUnordered==0
+         && (pProbe->szIdxRow<pTab->szTabRow)
+         && (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0
+         && sqlite3GlobalConfig.bUseCis
+         && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan)
+          )
+      ){
+        pNew->iSortIdx = b ? iSortIdx : 0;
+
+        /* The cost of visiting the index rows is N*K, where K is
+        ** between 1.1 and 3.0, depending on the relative sizes of the
+        ** index and table rows. If this is a non-covering index scan,
+        ** also add the cost of visiting table rows (N*3.0).  */
+        pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
+        if( m!=0 ){
+          pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16);
+        }
+        ApplyCostMultiplier(pNew->rRun, pTab->costMult);
+        whereLoopOutputAdjust(pWC, pNew, rSize);
+        rc = whereLoopInsert(pBuilder, pNew);
+        pNew->nOut = rSize;
+        if( rc ) break;
+      }
+    }
+
+    rc = whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, 0);
+#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
+    sqlite3Stat4ProbeFree(pBuilder->pRec);
+    pBuilder->nRecValid = 0;
+    pBuilder->pRec = 0;
+#endif
+
+    /* If there was an INDEXED BY clause, then only that one index is
+    ** considered. */
+    if( pSrc->pIndex ) break;
+  }
+  return rc;
+}
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+/*
+** Add all WhereLoop objects for a table of the join identified by
+** pBuilder->pNew->iTab.  That table is guaranteed to be a virtual table.
+*/
+static int whereLoopAddVirtual(
+  WhereLoopBuilder *pBuilder,  /* WHERE clause information */
+  Bitmask mExtra
+){
+  WhereInfo *pWInfo;           /* WHERE analysis context */
+  Parse *pParse;               /* The parsing context */
+  WhereClause *pWC;            /* The WHERE clause */
+  struct SrcList_item *pSrc;   /* The FROM clause term to search */
+  Table *pTab;
+  sqlite3 *db;
+  sqlite3_index_info *pIdxInfo;
+  struct sqlite3_index_constraint *pIdxCons;
+  struct sqlite3_index_constraint_usage *pUsage;
+  WhereTerm *pTerm;
+  int i, j;
+  int iTerm, mxTerm;
+  int nConstraint;
+  int seenIn = 0;              /* True if an IN operator is seen */
+  int seenVar = 0;             /* True if a non-constant constraint is seen */
+  int iPhase;                  /* 0: const w/o IN, 1: const, 2: no IN,  2: IN */
+  WhereLoop *pNew;
+  int rc = SQLITE_OK;
+
+  pWInfo = pBuilder->pWInfo;
+  pParse = pWInfo->pParse;
+  db = pParse->db;
+  pWC = pBuilder->pWC;
+  pNew = pBuilder->pNew;
+  pSrc = &pWInfo->pTabList->a[pNew->iTab];
+  pTab = pSrc->pTab;
+  assert( IsVirtual(pTab) );
+  pIdxInfo = allocateIndexInfo(pParse, pWC, pSrc, pBuilder->pOrderBy);
+  if( pIdxInfo==0 ) return SQLITE_NOMEM;
+  pNew->prereq = 0;
+  pNew->rSetup = 0;
+  pNew->wsFlags = WHERE_VIRTUALTABLE;
+  pNew->nLTerm = 0;
+  pNew->u.vtab.needFree = 0;
+  pUsage = pIdxInfo->aConstraintUsage;
+  nConstraint = pIdxInfo->nConstraint;
+  if( whereLoopResize(db, pNew, nConstraint) ){
+    sqlite3DbFree(db, pIdxInfo);
+    return SQLITE_NOMEM;
+  }
+
+  for(iPhase=0; iPhase<=3; iPhase++){
+    if( !seenIn && (iPhase&1)!=0 ){
+      iPhase++;
+      if( iPhase>3 ) break;
+    }
+    if( !seenVar && iPhase>1 ) break;
+    pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint;
+    for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){
+      j = pIdxCons->iTermOffset;
+      pTerm = &pWC->a[j];
+      switch( iPhase ){
+        case 0:    /* Constants without IN operator */
+          pIdxCons->usable = 0;
+          if( (pTerm->eOperator & WO_IN)!=0 ){
+            seenIn = 1;
+          }
+          if( pTerm->prereqRight!=0 ){
+            seenVar = 1;
+          }else if( (pTerm->eOperator & WO_IN)==0 ){
+            pIdxCons->usable = 1;
+          }
+          break;
+        case 1:    /* Constants with IN operators */
+          assert( seenIn );
+          pIdxCons->usable = (pTerm->prereqRight==0);
+          break;
+        case 2:    /* Variables without IN */
+          assert( seenVar );
+          pIdxCons->usable = (pTerm->eOperator & WO_IN)==0;
+          break;
+        default:   /* Variables with IN */
+          assert( seenVar && seenIn );
+          pIdxCons->usable = 1;
+          break;
+      }
+    }
+    memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint);
+    if( pIdxInfo->needToFreeIdxStr ) sqlite3_free(pIdxInfo->idxStr);
+    pIdxInfo->idxStr = 0;
+    pIdxInfo->idxNum = 0;
+    pIdxInfo->needToFreeIdxStr = 0;
+    pIdxInfo->orderByConsumed = 0;
+    pIdxInfo->estimatedCost = SQLITE_BIG_DBL / (double)2;
+    pIdxInfo->estimatedRows = 25;
+    rc = vtabBestIndex(pParse, pTab, pIdxInfo);
+    if( rc ) goto whereLoopAddVtab_exit;
+    pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint;
+    pNew->prereq = mExtra;
+    mxTerm = -1;
+    assert( pNew->nLSlot>=nConstraint );
+    for(i=0; i<nConstraint; i++) pNew->aLTerm[i] = 0;
+    pNew->u.vtab.omitMask = 0;
+    for(i=0; i<nConstraint; i++, pIdxCons++){
+      if( (iTerm = pUsage[i].argvIndex - 1)>=0 ){
+        j = pIdxCons->iTermOffset;
+        if( iTerm>=nConstraint
+         || j<0
+         || j>=pWC->nTerm
+         || pNew->aLTerm[iTerm]!=0
+        ){
+          rc = SQLITE_ERROR;
+          sqlite3ErrorMsg(pParse, "%s.xBestIndex() malfunction", pTab->zName);
+          goto whereLoopAddVtab_exit;
+        }
+        testcase( iTerm==nConstraint-1 );
+        testcase( j==0 );
+        testcase( j==pWC->nTerm-1 );
+        pTerm = &pWC->a[j];
+        pNew->prereq |= pTerm->prereqRight;
+        assert( iTerm<pNew->nLSlot );
+        pNew->aLTerm[iTerm] = pTerm;
+        if( iTerm>mxTerm ) mxTerm = iTerm;
+        testcase( iTerm==15 );
+        testcase( iTerm==16 );
+        if( iTerm<16 && pUsage[i].omit ) pNew->u.vtab.omitMask |= 1<<iTerm;
+        if( (pTerm->eOperator & WO_IN)!=0 ){
+          if( pUsage[i].omit==0 ){
+            /* Do not attempt to use an IN constraint if the virtual table
+            ** says that the equivalent EQ constraint cannot be safely omitted.
+            ** If we do attempt to use such a constraint, some rows might be
+            ** repeated in the output. */
+            break;
+          }
+          /* A virtual table that is constrained by an IN clause may not
+          ** consume the ORDER BY clause because (1) the order of IN terms
+          ** is not necessarily related to the order of output terms and
+          ** (2) Multiple outputs from a single IN value will not merge
+          ** together.  */
+          pIdxInfo->orderByConsumed = 0;
+        }
+      }
+    }
+    if( i>=nConstraint ){
+      pNew->nLTerm = mxTerm+1;
+      assert( pNew->nLTerm<=pNew->nLSlot );
+      pNew->u.vtab.idxNum = pIdxInfo->idxNum;
+      pNew->u.vtab.needFree = pIdxInfo->needToFreeIdxStr;
+      pIdxInfo->needToFreeIdxStr = 0;
+      pNew->u.vtab.idxStr = pIdxInfo->idxStr;
+      pNew->u.vtab.isOrdered = (i8)(pIdxInfo->orderByConsumed ?
+                                      pIdxInfo->nOrderBy : 0);
+      pNew->rSetup = 0;
+      pNew->rRun = sqlite3LogEstFromDouble(pIdxInfo->estimatedCost);
+      pNew->nOut = sqlite3LogEst(pIdxInfo->estimatedRows);
+      whereLoopInsert(pBuilder, pNew);
+      if( pNew->u.vtab.needFree ){
+        sqlite3_free(pNew->u.vtab.idxStr);
+        pNew->u.vtab.needFree = 0;
+      }
+    }
+  }  
+
+whereLoopAddVtab_exit:
+  if( pIdxInfo->needToFreeIdxStr ) sqlite3_free(pIdxInfo->idxStr);
+  sqlite3DbFree(db, pIdxInfo);
+  return rc;
+}
+#endif /* SQLITE_OMIT_VIRTUALTABLE */
+
+/*
+** Add WhereLoop entries to handle OR terms.  This works for either
+** btrees or virtual tables.
+*/
+static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){
+  WhereInfo *pWInfo = pBuilder->pWInfo;
+  WhereClause *pWC;
+  WhereLoop *pNew;
+  WhereTerm *pTerm, *pWCEnd;
+  int rc = SQLITE_OK;
+  int iCur;
+  WhereClause tempWC;
+  WhereLoopBuilder sSubBuild;
+  WhereOrSet sSum, sCur;
+  struct SrcList_item *pItem;
+  
+  pWC = pBuilder->pWC;
+  pWCEnd = pWC->a + pWC->nTerm;
+  pNew = pBuilder->pNew;
+  memset(&sSum, 0, sizeof(sSum));
+  pItem = pWInfo->pTabList->a + pNew->iTab;
+  iCur = pItem->iCursor;
+
+  for(pTerm=pWC->a; pTerm<pWCEnd && rc==SQLITE_OK; pTerm++){
+    if( (pTerm->eOperator & WO_OR)!=0
+     && (pTerm->u.pOrInfo->indexable & pNew->maskSelf)!=0 
+    ){
+      WhereClause * const pOrWC = &pTerm->u.pOrInfo->wc;
+      WhereTerm * const pOrWCEnd = &pOrWC->a[pOrWC->nTerm];
+      WhereTerm *pOrTerm;
+      int once = 1;
+      int i, j;
+    
+      sSubBuild = *pBuilder;
+      sSubBuild.pOrderBy = 0;
+      sSubBuild.pOrSet = &sCur;
+
+      WHERETRACE(0x200, ("Begin processing OR-clause %p\n", pTerm));
+      for(pOrTerm=pOrWC->a; pOrTerm<pOrWCEnd; pOrTerm++){
+        if( (pOrTerm->eOperator & WO_AND)!=0 ){
+          sSubBuild.pWC = &pOrTerm->u.pAndInfo->wc;
+        }else if( pOrTerm->leftCursor==iCur ){
+          tempWC.pWInfo = pWC->pWInfo;
+          tempWC.pOuter = pWC;
+          tempWC.op = TK_AND;
+          tempWC.nTerm = 1;
+          tempWC.a = pOrTerm;
+          sSubBuild.pWC = &tempWC;
+        }else{
+          continue;
+        }
+        sCur.n = 0;
+#ifdef WHERETRACE_ENABLED
+        WHERETRACE(0x200, ("OR-term %d of %p has %d subterms:\n", 
+                   (int)(pOrTerm-pOrWC->a), pTerm, sSubBuild.pWC->nTerm));
+        if( sqlite3WhereTrace & 0x400 ){
+          for(i=0; i<sSubBuild.pWC->nTerm; i++){
+            whereTermPrint(&sSubBuild.pWC->a[i], i);
+          }
+        }
+#endif
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+        if( IsVirtual(pItem->pTab) ){
+          rc = whereLoopAddVirtual(&sSubBuild, mExtra);
+        }else
+#endif
+        {
+          rc = whereLoopAddBtree(&sSubBuild, mExtra);
+        }
+        if( rc==SQLITE_OK ){
+          rc = whereLoopAddOr(&sSubBuild, mExtra);
+        }
+        assert( rc==SQLITE_OK || sCur.n==0 );
+        if( sCur.n==0 ){
+          sSum.n = 0;
+          break;
+        }else if( once ){
+          whereOrMove(&sSum, &sCur);
+          once = 0;
+        }else{
+          WhereOrSet sPrev;
+          whereOrMove(&sPrev, &sSum);
+          sSum.n = 0;
+          for(i=0; i<sPrev.n; i++){
+            for(j=0; j<sCur.n; j++){
+              whereOrInsert(&sSum, sPrev.a[i].prereq | sCur.a[j].prereq,
+                            sqlite3LogEstAdd(sPrev.a[i].rRun, sCur.a[j].rRun),
+                            sqlite3LogEstAdd(sPrev.a[i].nOut, sCur.a[j].nOut));
+            }
+          }
+        }
+      }
+      pNew->nLTerm = 1;
+      pNew->aLTerm[0] = pTerm;
+      pNew->wsFlags = WHERE_MULTI_OR;
+      pNew->rSetup = 0;
+      pNew->iSortIdx = 0;
+      memset(&pNew->u, 0, sizeof(pNew->u));
+      for(i=0; rc==SQLITE_OK && i<sSum.n; i++){
+        /* TUNING: Currently sSum.a[i].rRun is set to the sum of the costs
+        ** of all sub-scans required by the OR-scan. However, due to rounding
+        ** errors, it may be that the cost of the OR-scan is equal to its
+        ** most expensive sub-scan. Add the smallest possible penalty 
+        ** (equivalent to multiplying the cost by 1.07) to ensure that 
+        ** this does not happen. Otherwise, for WHERE clauses such as the
+        ** following where there is an index on "y":
+        **
+        **     WHERE likelihood(x=?, 0.99) OR y=?
+        **
+        ** the planner may elect to "OR" together a full-table scan and an
+        ** index lookup. And other similarly odd results.  */
+        pNew->rRun = sSum.a[i].rRun + 1;
+        pNew->nOut = sSum.a[i].nOut;
+        pNew->prereq = sSum.a[i].prereq;
+        rc = whereLoopInsert(pBuilder, pNew);
+      }
+      WHERETRACE(0x200, ("End processing OR-clause %p\n", pTerm));
+    }
+  }
+  return rc;
+}
+
+/*
+** Add all WhereLoop objects for all tables 
+*/
+static int whereLoopAddAll(WhereLoopBuilder *pBuilder){
+  WhereInfo *pWInfo = pBuilder->pWInfo;
+  Bitmask mExtra = 0;
+  Bitmask mPrior = 0;
+  int iTab;
+  SrcList *pTabList = pWInfo->pTabList;
+  struct SrcList_item *pItem;
+  sqlite3 *db = pWInfo->pParse->db;
+  int nTabList = pWInfo->nLevel;
+  int rc = SQLITE_OK;
+  u8 priorJoinType = 0;
+  WhereLoop *pNew;
+
+  /* Loop over the tables in the join, from left to right */
+  pNew = pBuilder->pNew;
+  whereLoopInit(pNew);
+  for(iTab=0, pItem=pTabList->a; iTab<nTabList; iTab++, pItem++){
+    pNew->iTab = iTab;
+    pNew->maskSelf = getMask(&pWInfo->sMaskSet, pItem->iCursor);
+    if( ((pItem->jointype|priorJoinType) & (JT_LEFT|JT_CROSS))!=0 ){
+      mExtra = mPrior;
+    }
+    priorJoinType = pItem->jointype;
+    if( IsVirtual(pItem->pTab) ){
+      rc = whereLoopAddVirtual(pBuilder, mExtra);
+    }else{
+      rc = whereLoopAddBtree(pBuilder, mExtra);
+    }
+    if( rc==SQLITE_OK ){
+      rc = whereLoopAddOr(pBuilder, mExtra);
+    }
+    mPrior |= pNew->maskSelf;
+    if( rc || db->mallocFailed ) break;
+  }
+  whereLoopClear(db, pNew);
+  return rc;
+}
+
+/*
+** Examine a WherePath (with the addition of the extra WhereLoop of the 5th
+** parameters) to see if it outputs rows in the requested ORDER BY
+** (or GROUP BY) without requiring a separate sort operation.  Return N:
+** 
+**   N>0:   N terms of the ORDER BY clause are satisfied
+**   N==0:  No terms of the ORDER BY clause are satisfied
+**   N<0:   Unknown yet how many terms of ORDER BY might be satisfied.   
+**
+** Note that processing for WHERE_GROUPBY and WHERE_DISTINCTBY is not as
+** strict.  With GROUP BY and DISTINCT the only requirement is that
+** equivalent rows appear immediately adjacent to one another.  GROUP BY
+** and DISTINCT do not require rows to appear in any particular order as long
+** as equivalent rows are grouped together.  Thus for GROUP BY and DISTINCT
+** the pOrderBy terms can be matched in any order.  With ORDER BY, the 
+** pOrderBy terms must be matched in strict left-to-right order.
+*/
+static i8 wherePathSatisfiesOrderBy(
+  WhereInfo *pWInfo,    /* The WHERE clause */
+  ExprList *pOrderBy,   /* ORDER BY or GROUP BY or DISTINCT clause to check */
+  WherePath *pPath,     /* The WherePath to check */
+  u16 wctrlFlags,       /* Might contain WHERE_GROUPBY or WHERE_DISTINCTBY */
+  u16 nLoop,            /* Number of entries in pPath->aLoop[] */
+  WhereLoop *pLast,     /* Add this WhereLoop to the end of pPath->aLoop[] */
+  Bitmask *pRevMask     /* OUT: Mask of WhereLoops to run in reverse order */
+){
+  u8 revSet;            /* True if rev is known */
+  u8 rev;               /* Composite sort order */
+  u8 revIdx;            /* Index sort order */
+  u8 isOrderDistinct;   /* All prior WhereLoops are order-distinct */
+  u8 distinctColumns;   /* True if the loop has UNIQUE NOT NULL columns */
+  u8 isMatch;           /* iColumn matches a term of the ORDER BY clause */
+  u16 nKeyCol;          /* Number of key columns in pIndex */
+  u16 nColumn;          /* Total number of ordered columns in the index */
+  u16 nOrderBy;         /* Number terms in the ORDER BY clause */
+  int iLoop;            /* Index of WhereLoop in pPath being processed */
+  int i, j;             /* Loop counters */
+  int iCur;             /* Cursor number for current WhereLoop */
+  int iColumn;          /* A column number within table iCur */
+  WhereLoop *pLoop = 0; /* Current WhereLoop being processed. */
+  WhereTerm *pTerm;     /* A single term of the WHERE clause */
+  Expr *pOBExpr;        /* An expression from the ORDER BY clause */
+  CollSeq *pColl;       /* COLLATE function from an ORDER BY clause term */
+  Index *pIndex;        /* The index associated with pLoop */
+  sqlite3 *db = pWInfo->pParse->db;  /* Database connection */
+  Bitmask obSat = 0;    /* Mask of ORDER BY terms satisfied so far */
+  Bitmask obDone;       /* Mask of all ORDER BY terms */
+  Bitmask orderDistinctMask;  /* Mask of all well-ordered loops */
+  Bitmask ready;              /* Mask of inner loops */
+
+  /*
+  ** We say the WhereLoop is "one-row" if it generates no more than one
+  ** row of output.  A WhereLoop is one-row if all of the following are true:
+  **  (a) All index columns match with WHERE_COLUMN_EQ.
+  **  (b) The index is unique
+  ** Any WhereLoop with an WHERE_COLUMN_EQ constraint on the rowid is one-row.
+  ** Every one-row WhereLoop will have the WHERE_ONEROW bit set in wsFlags.
+  **
+  ** We say the WhereLoop is "order-distinct" if the set of columns from
+  ** that WhereLoop that are in the ORDER BY clause are different for every
+  ** row of the WhereLoop.  Every one-row WhereLoop is automatically
+  ** order-distinct.   A WhereLoop that has no columns in the ORDER BY clause
+  ** is not order-distinct. To be order-distinct is not quite the same as being
+  ** UNIQUE since a UNIQUE column or index can have multiple rows that 
+  ** are NULL and NULL values are equivalent for the purpose of order-distinct.
+  ** To be order-distinct, the columns must be UNIQUE and NOT NULL.
+  **
+  ** The rowid for a table is always UNIQUE and NOT NULL so whenever the
+  ** rowid appears in the ORDER BY clause, the corresponding WhereLoop is
+  ** automatically order-distinct.
+  */
+
+  assert( pOrderBy!=0 );
+  if( nLoop && OptimizationDisabled(db, SQLITE_OrderByIdxJoin) ) return 0;
+
+  nOrderBy = pOrderBy->nExpr;
+  testcase( nOrderBy==BMS-1 );
+  if( nOrderBy>BMS-1 ) return 0;  /* Cannot optimize overly large ORDER BYs */
+  isOrderDistinct = 1;
+  obDone = MASKBIT(nOrderBy)-1;
+  orderDistinctMask = 0;
+  ready = 0;
+  for(iLoop=0; isOrderDistinct && obSat<obDone && iLoop<=nLoop; iLoop++){
+    if( iLoop>0 ) ready |= pLoop->maskSelf;
+    pLoop = iLoop<nLoop ? pPath->aLoop[iLoop] : pLast;
+    if( pLoop->wsFlags & WHERE_VIRTUALTABLE ){
+      if( pLoop->u.vtab.isOrdered ) obSat = obDone;
+      break;
+    }
+    iCur = pWInfo->pTabList->a[pLoop->iTab].iCursor;
+
+    /* Mark off any ORDER BY term X that is a column in the table of
+    ** the current loop for which there is term in the WHERE
+    ** clause of the form X IS NULL or X=? that reference only outer
+    ** loops.
+    */
+    for(i=0; i<nOrderBy; i++){
+      if( MASKBIT(i) & obSat ) continue;
+      pOBExpr = sqlite3ExprSkipCollate(pOrderBy->a[i].pExpr);
+      if( pOBExpr->op!=TK_COLUMN ) continue;
+      if( pOBExpr->iTable!=iCur ) continue;
+      pTerm = findTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn,
+                       ~ready, WO_EQ|WO_ISNULL, 0);
+      if( pTerm==0 ) continue;
+      if( (pTerm->eOperator&WO_EQ)!=0 && pOBExpr->iColumn>=0 ){
+        const char *z1, *z2;
+        pColl = sqlite3ExprCollSeq(pWInfo->pParse, pOrderBy->a[i].pExpr);
+        if( !pColl ) pColl = db->pDfltColl;
+        z1 = pColl->zName;
+        pColl = sqlite3ExprCollSeq(pWInfo->pParse, pTerm->pExpr);
+        if( !pColl ) pColl = db->pDfltColl;
+        z2 = pColl->zName;
+        if( sqlite3StrICmp(z1, z2)!=0 ) continue;
+      }
+      obSat |= MASKBIT(i);
+    }
+
+    if( (pLoop->wsFlags & WHERE_ONEROW)==0 ){
+      if( pLoop->wsFlags & WHERE_IPK ){
+        pIndex = 0;
+        nKeyCol = 0;
+        nColumn = 1;
+      }else if( (pIndex = pLoop->u.btree.pIndex)==0 || pIndex->bUnordered ){
+        return 0;
+      }else{
+        nKeyCol = pIndex->nKeyCol;
+        nColumn = pIndex->nColumn;
+        assert( nColumn==nKeyCol+1 || !HasRowid(pIndex->pTable) );
+        assert( pIndex->aiColumn[nColumn-1]==(-1) || !HasRowid(pIndex->pTable));
+        isOrderDistinct = IsUniqueIndex(pIndex);
+      }
+
+      /* Loop through all columns of the index and deal with the ones
+      ** that are not constrained by == or IN.
+      */
+      rev = revSet = 0;
+      distinctColumns = 0;
+      for(j=0; j<nColumn; j++){
+        u8 bOnce;   /* True to run the ORDER BY search loop */
+
+        /* Skip over == and IS NULL terms */
+        if( j<pLoop->u.btree.nEq
+         && pLoop->nSkip==0
+         && ((i = pLoop->aLTerm[j]->eOperator) & (WO_EQ|WO_ISNULL))!=0
+        ){
+          if( i & WO_ISNULL ){
+            testcase( isOrderDistinct );
+            isOrderDistinct = 0;
+          }
+          continue;  
+        }
+
+        /* Get the column number in the table (iColumn) and sort order
+        ** (revIdx) for the j-th column of the index.
+        */
+        if( pIndex ){
+          iColumn = pIndex->aiColumn[j];
+          revIdx = pIndex->aSortOrder[j];
+          if( iColumn==pIndex->pTable->iPKey ) iColumn = -1;
+        }else{
+          iColumn = -1;
+          revIdx = 0;
+        }
+
+        /* An unconstrained column that might be NULL means that this
+        ** WhereLoop is not well-ordered
+        */
+        if( isOrderDistinct
+         && iColumn>=0
+         && j>=pLoop->u.btree.nEq
+         && pIndex->pTable->aCol[iColumn].notNull==0
+        ){
+          isOrderDistinct = 0;
+        }
+
+        /* Find the ORDER BY term that corresponds to the j-th column
+        ** of the index and mark that ORDER BY term off 
+        */
+        bOnce = 1;
+        isMatch = 0;
+        for(i=0; bOnce && i<nOrderBy; i++){
+          if( MASKBIT(i) & obSat ) continue;
+          pOBExpr = sqlite3ExprSkipCollate(pOrderBy->a[i].pExpr);
+          testcase( wctrlFlags & WHERE_GROUPBY );
+          testcase( wctrlFlags & WHERE_DISTINCTBY );
+          if( (wctrlFlags & (WHERE_GROUPBY|WHERE_DISTINCTBY))==0 ) bOnce = 0;
+          if( pOBExpr->op!=TK_COLUMN ) continue;
+          if( pOBExpr->iTable!=iCur ) continue;
+          if( pOBExpr->iColumn!=iColumn ) continue;
+          if( iColumn>=0 ){
+            pColl = sqlite3ExprCollSeq(pWInfo->pParse, pOrderBy->a[i].pExpr);
+            if( !pColl ) pColl = db->pDfltColl;
+            if( sqlite3StrICmp(pColl->zName, pIndex->azColl[j])!=0 ) continue;
+          }
+          isMatch = 1;
+          break;
+        }
+        if( isMatch && (wctrlFlags & WHERE_GROUPBY)==0 ){
+          /* Make sure the sort order is compatible in an ORDER BY clause.
+          ** Sort order is irrelevant for a GROUP BY clause. */
+          if( revSet ){
+            if( (rev ^ revIdx)!=pOrderBy->a[i].sortOrder ) isMatch = 0;
+          }else{
+            rev = revIdx ^ pOrderBy->a[i].sortOrder;
+            if( rev ) *pRevMask |= MASKBIT(iLoop);
+            revSet = 1;
+          }
+        }
+        if( isMatch ){
+          if( iColumn<0 ){
+            testcase( distinctColumns==0 );
+            distinctColumns = 1;
+          }
+          obSat |= MASKBIT(i);
+        }else{
+          /* No match found */
+          if( j==0 || j<nKeyCol ){
+            testcase( isOrderDistinct!=0 );
+            isOrderDistinct = 0;
+          }
+          break;
+        }
+      } /* end Loop over all index columns */
+      if( distinctColumns ){
+        testcase( isOrderDistinct==0 );
+        isOrderDistinct = 1;
+      }
+    } /* end-if not one-row */
+
+    /* Mark off any other ORDER BY terms that reference pLoop */
+    if( isOrderDistinct ){
+      orderDistinctMask |= pLoop->maskSelf;
+      for(i=0; i<nOrderBy; i++){
+        Expr *p;
+        Bitmask mTerm;
+        if( MASKBIT(i) & obSat ) continue;
+        p = pOrderBy->a[i].pExpr;
+        mTerm = exprTableUsage(&pWInfo->sMaskSet,p);
+        if( mTerm==0 && !sqlite3ExprIsConstant(p) ) continue;
+        if( (mTerm&~orderDistinctMask)==0 ){
+          obSat |= MASKBIT(i);
+        }
+      }
+    }
+  } /* End the loop over all WhereLoops from outer-most down to inner-most */
+  if( obSat==obDone ) return (i8)nOrderBy;
+  if( !isOrderDistinct ){
+    for(i=nOrderBy-1; i>0; i--){
+      Bitmask m = MASKBIT(i) - 1;
+      if( (obSat&m)==m ) return i;
+    }
+    return 0;
+  }
+  return -1;
+}
+
+
+/*
+** If the WHERE_GROUPBY flag is set in the mask passed to sqlite3WhereBegin(),
+** the planner assumes that the specified pOrderBy list is actually a GROUP
+** BY clause - and so any order that groups rows as required satisfies the
+** request.
+**
+** Normally, in this case it is not possible for the caller to determine
+** whether or not the rows are really being delivered in sorted order, or
+** just in some other order that provides the required grouping. However,
+** if the WHERE_SORTBYGROUP flag is also passed to sqlite3WhereBegin(), then
+** this function may be called on the returned WhereInfo object. It returns
+** true if the rows really will be sorted in the specified order, or false
+** otherwise.
+**
+** For example, assuming:
+**
+**   CREATE INDEX i1 ON t1(x, Y);
+**
+** then
+**
+**   SELECT * FROM t1 GROUP BY x,y ORDER BY x,y;   -- IsSorted()==1
+**   SELECT * FROM t1 GROUP BY y,x ORDER BY y,x;   -- IsSorted()==0
+*/
+SQLITE_PRIVATE int sqlite3WhereIsSorted(WhereInfo *pWInfo){
+  assert( pWInfo->wctrlFlags & WHERE_GROUPBY );
+  assert( pWInfo->wctrlFlags & WHERE_SORTBYGROUP );
+  return pWInfo->sorted;
+}
+
+#ifdef WHERETRACE_ENABLED
+/* For debugging use only: */
+static const char *wherePathName(WherePath *pPath, int nLoop, WhereLoop *pLast){
+  static char zName[65];
+  int i;
+  for(i=0; i<nLoop; i++){ zName[i] = pPath->aLoop[i]->cId; }
+  if( pLast ) zName[i++] = pLast->cId;
+  zName[i] = 0;
+  return zName;
+}
+#endif
+
+/*
+** Return the cost of sorting nRow rows, assuming that the keys have 
+** nOrderby columns and that the first nSorted columns are already in
+** order.
+*/
+static LogEst whereSortingCost(
+  WhereInfo *pWInfo,
+  LogEst nRow,
+  int nOrderBy,
+  int nSorted
+){
+  /* TUNING: Estimated cost of a full external sort, where N is 
+  ** the number of rows to sort is:
+  **
+  **   cost = (3.0 * N * log(N)).
+  ** 
+  ** Or, if the order-by clause has X terms but only the last Y 
+  ** terms are out of order, then block-sorting will reduce the 
+  ** sorting cost to:
+  **
+  **   cost = (3.0 * N * log(N)) * (Y/X)
+  **
+  ** The (Y/X) term is implemented using stack variable rScale
+  ** below.  */
+  LogEst rScale, rSortCost;
+  assert( nOrderBy>0 && 66==sqlite3LogEst(100) );
+  rScale = sqlite3LogEst((nOrderBy-nSorted)*100/nOrderBy) - 66;
+  rSortCost = nRow + estLog(nRow) + rScale + 16;
+
+  /* TUNING: The cost of implementing DISTINCT using a B-TREE is
+  ** similar but with a larger constant of proportionality. 
+  ** Multiply by an additional factor of 3.0.  */
+  if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){
+    rSortCost += 16;
+  }
+
+  return rSortCost;
+}
+
+/*
+** Given the list of WhereLoop objects at pWInfo->pLoops, this routine
+** attempts to find the lowest cost path that visits each WhereLoop
+** once.  This path is then loaded into the pWInfo->a[].pWLoop fields.
+**
+** Assume that the total number of output rows that will need to be sorted
+** will be nRowEst (in the 10*log2 representation).  Or, ignore sorting
+** costs if nRowEst==0.
+**
+** Return SQLITE_OK on success or SQLITE_NOMEM of a memory allocation
+** error occurs.
+*/
+static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){
+  int mxChoice;             /* Maximum number of simultaneous paths tracked */
+  int nLoop;                /* Number of terms in the join */
+  Parse *pParse;            /* Parsing context */
+  sqlite3 *db;              /* The database connection */
+  int iLoop;                /* Loop counter over the terms of the join */
+  int ii, jj;               /* Loop counters */
+  int mxI = 0;              /* Index of next entry to replace */
+  int nOrderBy;             /* Number of ORDER BY clause terms */
+  LogEst mxCost = 0;        /* Maximum cost of a set of paths */
+  LogEst mxUnsorted = 0;    /* Maximum unsorted cost of a set of path */
+  int nTo, nFrom;           /* Number of valid entries in aTo[] and aFrom[] */
+  WherePath *aFrom;         /* All nFrom paths at the previous level */
+  WherePath *aTo;           /* The nTo best paths at the current level */
+  WherePath *pFrom;         /* An element of aFrom[] that we are working on */
+  WherePath *pTo;           /* An element of aTo[] that we are working on */
+  WhereLoop *pWLoop;        /* One of the WhereLoop objects */
+  WhereLoop **pX;           /* Used to divy up the pSpace memory */
+  LogEst *aSortCost = 0;    /* Sorting and partial sorting costs */
+  char *pSpace;             /* Temporary memory used by this routine */
+  int nSpace;               /* Bytes of space allocated at pSpace */
+
+  pParse = pWInfo->pParse;
+  db = pParse->db;
+  nLoop = pWInfo->nLevel;
+  /* TUNING: For simple queries, only the best path is tracked.
+  ** For 2-way joins, the 5 best paths are followed.
+  ** For joins of 3 or more tables, track the 10 best paths */
+  mxChoice = (nLoop<=1) ? 1 : (nLoop==2 ? 5 : 10);
+  assert( nLoop<=pWInfo->pTabList->nSrc );
+  WHERETRACE(0x002, ("---- begin solver.  (nRowEst=%d)\n", nRowEst));
+
+  /* If nRowEst is zero and there is an ORDER BY clause, ignore it. In this
+  ** case the purpose of this call is to estimate the number of rows returned
+  ** by the overall query. Once this estimate has been obtained, the caller
+  ** will invoke this function a second time, passing the estimate as the
+  ** nRowEst parameter.  */
+  if( pWInfo->pOrderBy==0 || nRowEst==0 ){
+    nOrderBy = 0;
+  }else{
+    nOrderBy = pWInfo->pOrderBy->nExpr;
+  }
+
+  /* Allocate and initialize space for aTo, aFrom and aSortCost[] */
+  nSpace = (sizeof(WherePath)+sizeof(WhereLoop*)*nLoop)*mxChoice*2;
+  nSpace += sizeof(LogEst) * nOrderBy;
+  pSpace = sqlite3DbMallocRaw(db, nSpace);
+  if( pSpace==0 ) return SQLITE_NOMEM;
+  aTo = (WherePath*)pSpace;
+  aFrom = aTo+mxChoice;
+  memset(aFrom, 0, sizeof(aFrom[0]));
+  pX = (WhereLoop**)(aFrom+mxChoice);
+  for(ii=mxChoice*2, pFrom=aTo; ii>0; ii--, pFrom++, pX += nLoop){
+    pFrom->aLoop = pX;
+  }
+  if( nOrderBy ){
+    /* If there is an ORDER BY clause and it is not being ignored, set up
+    ** space for the aSortCost[] array. Each element of the aSortCost array
+    ** is either zero - meaning it has not yet been initialized - or the
+    ** cost of sorting nRowEst rows of data where the first X terms of
+    ** the ORDER BY clause are already in order, where X is the array 
+    ** index.  */
+    aSortCost = (LogEst*)pX;
+    memset(aSortCost, 0, sizeof(LogEst) * nOrderBy);
+  }
+  assert( aSortCost==0 || &pSpace[nSpace]==(char*)&aSortCost[nOrderBy] );
+  assert( aSortCost!=0 || &pSpace[nSpace]==(char*)pX );
+
+  /* Seed the search with a single WherePath containing zero WhereLoops.
+  **
+  ** TUNING: Do not let the number of iterations go above 25.  If the cost
+  ** of computing an automatic index is not paid back within the first 25
+  ** rows, then do not use the automatic index. */
+  aFrom[0].nRow = MIN(pParse->nQueryLoop, 46);  assert( 46==sqlite3LogEst(25) );
+  nFrom = 1;
+  assert( aFrom[0].isOrdered==0 );
+  if( nOrderBy ){
+    /* If nLoop is zero, then there are no FROM terms in the query. Since
+    ** in this case the query may return a maximum of one row, the results
+    ** are already in the requested order. Set isOrdered to nOrderBy to
+    ** indicate this. Or, if nLoop is greater than zero, set isOrdered to
+    ** -1, indicating that the result set may or may not be ordered, 
+    ** depending on the loops added to the current plan.  */
+    aFrom[0].isOrdered = nLoop>0 ? -1 : nOrderBy;
+  }
+
+  /* Compute successively longer WherePaths using the previous generation
+  ** of WherePaths as the basis for the next.  Keep track of the mxChoice
+  ** best paths at each generation */
+  for(iLoop=0; iLoop<nLoop; iLoop++){
+    nTo = 0;
+    for(ii=0, pFrom=aFrom; ii<nFrom; ii++, pFrom++){
+      for(pWLoop=pWInfo->pLoops; pWLoop; pWLoop=pWLoop->pNextLoop){
+        LogEst nOut;                      /* Rows visited by (pFrom+pWLoop) */
+        LogEst rCost;                     /* Cost of path (pFrom+pWLoop) */
+        LogEst rUnsorted;                 /* Unsorted cost of (pFrom+pWLoop) */
+        i8 isOrdered = pFrom->isOrdered;  /* isOrdered for (pFrom+pWLoop) */
+        Bitmask maskNew;                  /* Mask of src visited by (..) */
+        Bitmask revMask = 0;              /* Mask of rev-order loops for (..) */
+
+        if( (pWLoop->prereq & ~pFrom->maskLoop)!=0 ) continue;
+        if( (pWLoop->maskSelf & pFrom->maskLoop)!=0 ) continue;
+        /* At this point, pWLoop is a candidate to be the next loop. 
+        ** Compute its cost */
+        rUnsorted = sqlite3LogEstAdd(pWLoop->rSetup,pWLoop->rRun + pFrom->nRow);
+        rUnsorted = sqlite3LogEstAdd(rUnsorted, pFrom->rUnsorted);
+        nOut = pFrom->nRow + pWLoop->nOut;
+        maskNew = pFrom->maskLoop | pWLoop->maskSelf;
+        if( isOrdered<0 ){
+          isOrdered = wherePathSatisfiesOrderBy(pWInfo,
+                       pWInfo->pOrderBy, pFrom, pWInfo->wctrlFlags,
+                       iLoop, pWLoop, &revMask);
+        }else{
+          revMask = pFrom->revLoop;
+        }
+        if( isOrdered>=0 && isOrdered<nOrderBy ){
+          if( aSortCost[isOrdered]==0 ){
+            aSortCost[isOrdered] = whereSortingCost(
+                pWInfo, nRowEst, nOrderBy, isOrdered
+            );
+          }
+          rCost = sqlite3LogEstAdd(rUnsorted, aSortCost[isOrdered]);
+
+          WHERETRACE(0x002,
+              ("---- sort cost=%-3d (%d/%d) increases cost %3d to %-3d\n",
+               aSortCost[isOrdered], (nOrderBy-isOrdered), nOrderBy, 
+               rUnsorted, rCost));
+        }else{
+          rCost = rUnsorted;
+        }
+
+        /* Check to see if pWLoop should be added to the set of
+        ** mxChoice best-so-far paths.
+        **
+        ** First look for an existing path among best-so-far paths
+        ** that covers the same set of loops and has the same isOrdered
+        ** setting as the current path candidate.
+        **
+        ** The term "((pTo->isOrdered^isOrdered)&0x80)==0" is equivalent
+        ** to (pTo->isOrdered==(-1))==(isOrdered==(-1))" for the range
+        ** of legal values for isOrdered, -1..64.
+        */
+        for(jj=0, pTo=aTo; jj<nTo; jj++, pTo++){
+          if( pTo->maskLoop==maskNew
+           && ((pTo->isOrdered^isOrdered)&0x80)==0
+          ){
+            testcase( jj==nTo-1 );
+            break;
+          }
+        }
+        if( jj>=nTo ){
+          /* None of the existing best-so-far paths match the candidate. */
+          if( nTo>=mxChoice
+           && (rCost>mxCost || (rCost==mxCost && rUnsorted>=mxUnsorted))
+          ){
+            /* The current candidate is no better than any of the mxChoice
+            ** paths currently in the best-so-far buffer.  So discard
+            ** this candidate as not viable. */
+#ifdef WHERETRACE_ENABLED /* 0x4 */
+            if( sqlite3WhereTrace&0x4 ){
+              sqlite3DebugPrintf("Skip   %s cost=%-3d,%3d order=%c\n",
+                  wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
+                  isOrdered>=0 ? isOrdered+'0' : '?');
+            }
+#endif
+            continue;
+          }
+          /* If we reach this points it means that the new candidate path
+          ** needs to be added to the set of best-so-far paths. */
+          if( nTo<mxChoice ){
+            /* Increase the size of the aTo set by one */
+            jj = nTo++;
+          }else{
+            /* New path replaces the prior worst to keep count below mxChoice */
+            jj = mxI;
+          }
+          pTo = &aTo[jj];
+#ifdef WHERETRACE_ENABLED /* 0x4 */
+          if( sqlite3WhereTrace&0x4 ){
+            sqlite3DebugPrintf("New    %s cost=%-3d,%3d order=%c\n",
+                wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
+                isOrdered>=0 ? isOrdered+'0' : '?');
+          }
+#endif
+        }else{
+          /* Control reaches here if best-so-far path pTo=aTo[jj] covers the
+          ** same set of loops and has the sam isOrdered setting as the
+          ** candidate path.  Check to see if the candidate should replace
+          ** pTo or if the candidate should be skipped */
+          if( pTo->rCost<rCost || (pTo->rCost==rCost && pTo->nRow<=nOut) ){
+#ifdef WHERETRACE_ENABLED /* 0x4 */
+            if( sqlite3WhereTrace&0x4 ){
+              sqlite3DebugPrintf(
+                  "Skip   %s cost=%-3d,%3d order=%c",
+                  wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
+                  isOrdered>=0 ? isOrdered+'0' : '?');
+              sqlite3DebugPrintf("   vs %s cost=%-3d,%d order=%c\n",
+                  wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
+                  pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?');
+            }
+#endif
+            /* Discard the candidate path from further consideration */
+            testcase( pTo->rCost==rCost );
+            continue;
+          }
+          testcase( pTo->rCost==rCost+1 );
+          /* Control reaches here if the candidate path is better than the
+          ** pTo path.  Replace pTo with the candidate. */
+#ifdef WHERETRACE_ENABLED /* 0x4 */
+          if( sqlite3WhereTrace&0x4 ){
+            sqlite3DebugPrintf(
+                "Update %s cost=%-3d,%3d order=%c",
+                wherePathName(pFrom, iLoop, pWLoop), rCost, nOut,
+                isOrdered>=0 ? isOrdered+'0' : '?');
+            sqlite3DebugPrintf("  was %s cost=%-3d,%3d order=%c\n",
+                wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
+                pTo->isOrdered>=0 ? pTo->isOrdered+'0' : '?');
+          }
+#endif
+        }
+        /* pWLoop is a winner.  Add it to the set of best so far */
+        pTo->maskLoop = pFrom->maskLoop | pWLoop->maskSelf;
+        pTo->revLoop = revMask;
+        pTo->nRow = nOut;
+        pTo->rCost = rCost;
+        pTo->rUnsorted = rUnsorted;
+        pTo->isOrdered = isOrdered;
+        memcpy(pTo->aLoop, pFrom->aLoop, sizeof(WhereLoop*)*iLoop);
+        pTo->aLoop[iLoop] = pWLoop;
+        if( nTo>=mxChoice ){
+          mxI = 0;
+          mxCost = aTo[0].rCost;
+          mxUnsorted = aTo[0].nRow;
+          for(jj=1, pTo=&aTo[1]; jj<mxChoice; jj++, pTo++){
+            if( pTo->rCost>mxCost 
+             || (pTo->rCost==mxCost && pTo->rUnsorted>mxUnsorted) 
+            ){
+              mxCost = pTo->rCost;
+              mxUnsorted = pTo->rUnsorted;
+              mxI = jj;
+            }
+          }
+        }
+      }
+    }
+
+#ifdef WHERETRACE_ENABLED  /* >=2 */
+    if( sqlite3WhereTrace & 0x02 ){
+      sqlite3DebugPrintf("---- after round %d ----\n", iLoop);
+      for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){
+        sqlite3DebugPrintf(" %s cost=%-3d nrow=%-3d order=%c",
+           wherePathName(pTo, iLoop+1, 0), pTo->rCost, pTo->nRow,
+           pTo->isOrdered>=0 ? (pTo->isOrdered+'0') : '?');
+        if( pTo->isOrdered>0 ){
+          sqlite3DebugPrintf(" rev=0x%llx\n", pTo->revLoop);
+        }else{
+          sqlite3DebugPrintf("\n");
+        }
+      }
+    }
+#endif
+
+    /* Swap the roles of aFrom and aTo for the next generation */
+    pFrom = aTo;
+    aTo = aFrom;
+    aFrom = pFrom;
+    nFrom = nTo;
+  }
+
+  if( nFrom==0 ){
+    sqlite3ErrorMsg(pParse, "no query solution");
+    sqlite3DbFree(db, pSpace);
+    return SQLITE_ERROR;
+  }
+  
+  /* Find the lowest cost path.  pFrom will be left pointing to that path */
+  pFrom = aFrom;
+  for(ii=1; ii<nFrom; ii++){
+    if( pFrom->rCost>aFrom[ii].rCost ) pFrom = &aFrom[ii];
+  }
+  assert( pWInfo->nLevel==nLoop );
+  /* Load the lowest cost path into pWInfo */
+  for(iLoop=0; iLoop<nLoop; iLoop++){
+    WhereLevel *pLevel = pWInfo->a + iLoop;
+    pLevel->pWLoop = pWLoop = pFrom->aLoop[iLoop];
+    pLevel->iFrom = pWLoop->iTab;
+    pLevel->iTabCur = pWInfo->pTabList->a[pLevel->iFrom].iCursor;
+  }
+  if( (pWInfo->wctrlFlags & WHERE_WANT_DISTINCT)!=0
+   && (pWInfo->wctrlFlags & WHERE_DISTINCTBY)==0
+   && pWInfo->eDistinct==WHERE_DISTINCT_NOOP
+   && nRowEst
+  ){
+    Bitmask notUsed;
+    int rc = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pResultSet, pFrom,
+                 WHERE_DISTINCTBY, nLoop-1, pFrom->aLoop[nLoop-1], &notUsed);
+    if( rc==pWInfo->pResultSet->nExpr ){
+      pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
+    }
+  }
+  if( pWInfo->pOrderBy ){
+    if( pWInfo->wctrlFlags & WHERE_DISTINCTBY ){
+      if( pFrom->isOrdered==pWInfo->pOrderBy->nExpr ){
+        pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
+      }
+    }else{
+      pWInfo->nOBSat = pFrom->isOrdered;
+      if( pWInfo->nOBSat<0 ) pWInfo->nOBSat = 0;
+      pWInfo->revMask = pFrom->revLoop;
+    }
+    if( (pWInfo->wctrlFlags & WHERE_SORTBYGROUP)
+        && pWInfo->nOBSat==pWInfo->pOrderBy->nExpr
+    ){
+      Bitmask revMask = 0;
+      int nOrder = wherePathSatisfiesOrderBy(pWInfo, pWInfo->pOrderBy, 
+          pFrom, 0, nLoop-1, pFrom->aLoop[nLoop-1], &revMask
+      );
+      assert( pWInfo->sorted==0 );
+      if( nOrder==pWInfo->pOrderBy->nExpr ){
+        pWInfo->sorted = 1;
+        pWInfo->revMask = revMask;
+      }
+    }
+  }
+
+
+  pWInfo->nRowOut = pFrom->nRow;
+
+  /* Free temporary memory and return success */
+  sqlite3DbFree(db, pSpace);
+  return SQLITE_OK;
+}
+
+/*
+** Most queries use only a single table (they are not joins) and have
+** simple == constraints against indexed fields.  This routine attempts
+** to plan those simple cases using much less ceremony than the
+** general-purpose query planner, and thereby yield faster sqlite3_prepare()
+** times for the common case.
+**
+** Return non-zero on success, if this query can be handled by this
+** no-frills query planner.  Return zero if this query needs the 
+** general-purpose query planner.
+*/
+static int whereShortCut(WhereLoopBuilder *pBuilder){
+  WhereInfo *pWInfo;
+  struct SrcList_item *pItem;
+  WhereClause *pWC;
+  WhereTerm *pTerm;
+  WhereLoop *pLoop;
+  int iCur;
+  int j;
+  Table *pTab;
+  Index *pIdx;
+  
+  pWInfo = pBuilder->pWInfo;
+  if( pWInfo->wctrlFlags & WHERE_FORCE_TABLE ) return 0;
+  assert( pWInfo->pTabList->nSrc>=1 );
+  pItem = pWInfo->pTabList->a;
+  pTab = pItem->pTab;
+  if( IsVirtual(pTab) ) return 0;
+  if( pItem->zIndex ) return 0;
+  iCur = pItem->iCursor;
+  pWC = &pWInfo->sWC;
+  pLoop = pBuilder->pNew;
+  pLoop->wsFlags = 0;
+  pLoop->nSkip = 0;
+  pTerm = findTerm(pWC, iCur, -1, 0, WO_EQ, 0);
+  if( pTerm ){
+    pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_IPK|WHERE_ONEROW;
+    pLoop->aLTerm[0] = pTerm;
+    pLoop->nLTerm = 1;
+    pLoop->u.btree.nEq = 1;
+    /* TUNING: Cost of a rowid lookup is 10 */
+    pLoop->rRun = 33;  /* 33==sqlite3LogEst(10) */
+  }else{
+    for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+      assert( pLoop->aLTermSpace==pLoop->aLTerm );
+      if( !IsUniqueIndex(pIdx)
+       || pIdx->pPartIdxWhere!=0 
+       || pIdx->nKeyCol>ArraySize(pLoop->aLTermSpace) 
+      ) continue;
+      for(j=0; j<pIdx->nKeyCol; j++){
+        pTerm = findTerm(pWC, iCur, pIdx->aiColumn[j], 0, WO_EQ, pIdx);
+        if( pTerm==0 ) break;
+        pLoop->aLTerm[j] = pTerm;
+      }
+      if( j!=pIdx->nKeyCol ) continue;
+      pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_ONEROW|WHERE_INDEXED;
+      if( pIdx->isCovering || (pItem->colUsed & ~columnsInIndex(pIdx))==0 ){
+        pLoop->wsFlags |= WHERE_IDX_ONLY;
+      }
+      pLoop->nLTerm = j;
+      pLoop->u.btree.nEq = j;
+      pLoop->u.btree.pIndex = pIdx;
+      /* TUNING: Cost of a unique index lookup is 15 */
+      pLoop->rRun = 39;  /* 39==sqlite3LogEst(15) */
+      break;
+    }
+  }
+  if( pLoop->wsFlags ){
+    pLoop->nOut = (LogEst)1;
+    pWInfo->a[0].pWLoop = pLoop;
+    pLoop->maskSelf = getMask(&pWInfo->sMaskSet, iCur);
+    pWInfo->a[0].iTabCur = iCur;
+    pWInfo->nRowOut = 1;
+    if( pWInfo->pOrderBy ) pWInfo->nOBSat =  pWInfo->pOrderBy->nExpr;
+    if( pWInfo->wctrlFlags & WHERE_WANT_DISTINCT ){
+      pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
+    }
+#ifdef SQLITE_DEBUG
+    pLoop->cId = '0';
+#endif
+    return 1;
+  }
+  return 0;
+}
+
+/*
+** Generate the beginning of the loop used for WHERE clause processing.
+** The return value is a pointer to an opaque structure that contains
+** information needed to terminate the loop.  Later, the calling routine
+** should invoke sqlite3WhereEnd() with the return value of this function
+** in order to complete the WHERE clause processing.
+**
+** If an error occurs, this routine returns NULL.
+**
+** The basic idea is to do a nested loop, one loop for each table in
+** the FROM clause of a select.  (INSERT and UPDATE statements are the
+** same as a SELECT with only a single table in the FROM clause.)  For
+** example, if the SQL is this:
+**
+**       SELECT * FROM t1, t2, t3 WHERE ...;
+**
+** Then the code generated is conceptually like the following:
+**
+**      foreach row1 in t1 do       \    Code generated
+**        foreach row2 in t2 do      |-- by sqlite3WhereBegin()
+**          foreach row3 in t3 do   /
+**            ...
+**          end                     \    Code generated
+**        end                        |-- by sqlite3WhereEnd()
+**      end                         /
+**
+** Note that the loops might not be nested in the order in which they
+** appear in the FROM clause if a different order is better able to make
+** use of indices.  Note also that when the IN operator appears in
+** the WHERE clause, it might result in additional nested loops for
+** scanning through all values on the right-hand side of the IN.
+**
+** There are Btree cursors associated with each table.  t1 uses cursor
+** number pTabList->a[0].iCursor.  t2 uses the cursor pTabList->a[1].iCursor.
+** And so forth.  This routine generates code to open those VDBE cursors
+** and sqlite3WhereEnd() generates the code to close them.
+**
+** The code that sqlite3WhereBegin() generates leaves the cursors named
+** in pTabList pointing at their appropriate entries.  The [...] code
+** can use OP_Column and OP_Rowid opcodes on these cursors to extract
+** data from the various tables of the loop.
+**
+** If the WHERE clause is empty, the foreach loops must each scan their
+** entire tables.  Thus a three-way join is an O(N^3) operation.  But if
+** the tables have indices and there are terms in the WHERE clause that
+** refer to those indices, a complete table scan can be avoided and the
+** code will run much faster.  Most of the work of this routine is checking
+** to see if there are indices that can be used to speed up the loop.
+**
+** Terms of the WHERE clause are also used to limit which rows actually
+** make it to the "..." in the middle of the loop.  After each "foreach",
+** terms of the WHERE clause that use only terms in that loop and outer
+** loops are evaluated and if false a jump is made around all subsequent
+** inner loops (or around the "..." if the test occurs within the inner-
+** most loop)
+**
+** OUTER JOINS
+**
+** An outer join of tables t1 and t2 is conceptally coded as follows:
+**
+**    foreach row1 in t1 do
+**      flag = 0
+**      foreach row2 in t2 do
+**        start:
+**          ...
+**          flag = 1
+**      end
+**      if flag==0 then
+**        move the row2 cursor to a null row
+**        goto start
+**      fi
+**    end
+**
+** ORDER BY CLAUSE PROCESSING
+**
+** pOrderBy is a pointer to the ORDER BY clause (or the GROUP BY clause
+** if the WHERE_GROUPBY flag is set in wctrlFlags) of a SELECT statement
+** if there is one.  If there is no ORDER BY clause or if this routine
+** is called from an UPDATE or DELETE statement, then pOrderBy is NULL.
+**
+** The iIdxCur parameter is the cursor number of an index.  If 
+** WHERE_ONETABLE_ONLY is set, iIdxCur is the cursor number of an index
+** to use for OR clause processing.  The WHERE clause should use this
+** specific cursor.  If WHERE_ONEPASS_DESIRED is set, then iIdxCur is
+** the first cursor in an array of cursors for all indices.  iIdxCur should
+** be used to compute the appropriate cursor depending on which index is
+** used.
+*/
+SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
+  Parse *pParse,        /* The parser context */
+  SrcList *pTabList,    /* FROM clause: A list of all tables to be scanned */
+  Expr *pWhere,         /* The WHERE clause */
+  ExprList *pOrderBy,   /* An ORDER BY (or GROUP BY) clause, or NULL */
+  ExprList *pResultSet, /* Result set of the query */
+  u16 wctrlFlags,       /* One of the WHERE_* flags defined in sqliteInt.h */
+  int iIdxCur           /* If WHERE_ONETABLE_ONLY is set, index cursor number */
+){
+  int nByteWInfo;            /* Num. bytes allocated for WhereInfo struct */
+  int nTabList;              /* Number of elements in pTabList */
+  WhereInfo *pWInfo;         /* Will become the return value of this function */
+  Vdbe *v = pParse->pVdbe;   /* The virtual database engine */
+  Bitmask notReady;          /* Cursors that are not yet positioned */
+  WhereLoopBuilder sWLB;     /* The WhereLoop builder */
+  WhereMaskSet *pMaskSet;    /* The expression mask set */
+  WhereLevel *pLevel;        /* A single level in pWInfo->a[] */
+  WhereLoop *pLoop;          /* Pointer to a single WhereLoop object */
+  int ii;                    /* Loop counter */
+  sqlite3 *db;               /* Database connection */
+  int rc;                    /* Return code */
+
+
+  /* Variable initialization */
+  db = pParse->db;
+  memset(&sWLB, 0, sizeof(sWLB));
+
+  /* An ORDER/GROUP BY clause of more than 63 terms cannot be optimized */
+  testcase( pOrderBy && pOrderBy->nExpr==BMS-1 );
+  if( pOrderBy && pOrderBy->nExpr>=BMS ) pOrderBy = 0;
+  sWLB.pOrderBy = pOrderBy;
+
+  /* Disable the DISTINCT optimization if SQLITE_DistinctOpt is set via
+  ** sqlite3_test_ctrl(SQLITE_TESTCTRL_OPTIMIZATIONS,...) */
+  if( OptimizationDisabled(db, SQLITE_DistinctOpt) ){
+    wctrlFlags &= ~WHERE_WANT_DISTINCT;
+  }
+
+  /* The number of tables in the FROM clause is limited by the number of
+  ** bits in a Bitmask 
+  */
+  testcase( pTabList->nSrc==BMS );
+  if( pTabList->nSrc>BMS ){
+    sqlite3ErrorMsg(pParse, "at most %d tables in a join", BMS);
+    return 0;
+  }
+
+  /* This function normally generates a nested loop for all tables in 
+  ** pTabList.  But if the WHERE_ONETABLE_ONLY flag is set, then we should
+  ** only generate code for the first table in pTabList and assume that
+  ** any cursors associated with subsequent tables are uninitialized.
+  */
+  nTabList = (wctrlFlags & WHERE_ONETABLE_ONLY) ? 1 : pTabList->nSrc;
+
+  /* Allocate and initialize the WhereInfo structure that will become the
+  ** return value. A single allocation is used to store the WhereInfo
+  ** struct, the contents of WhereInfo.a[], the WhereClause structure
+  ** and the WhereMaskSet structure. Since WhereClause contains an 8-byte
+  ** field (type Bitmask) it must be aligned on an 8-byte boundary on
+  ** some architectures. Hence the ROUND8() below.
+  */
+  nByteWInfo = ROUND8(sizeof(WhereInfo)+(nTabList-1)*sizeof(WhereLevel));
+  pWInfo = sqlite3DbMallocZero(db, nByteWInfo + sizeof(WhereLoop));
+  if( db->mallocFailed ){
+    sqlite3DbFree(db, pWInfo);
+    pWInfo = 0;
+    goto whereBeginError;
+  }
+  pWInfo->aiCurOnePass[0] = pWInfo->aiCurOnePass[1] = -1;
+  pWInfo->nLevel = nTabList;
+  pWInfo->pParse = pParse;
+  pWInfo->pTabList = pTabList;
+  pWInfo->pOrderBy = pOrderBy;
+  pWInfo->pResultSet = pResultSet;
+  pWInfo->iBreak = pWInfo->iContinue = sqlite3VdbeMakeLabel(v);
+  pWInfo->wctrlFlags = wctrlFlags;
+  pWInfo->savedNQueryLoop = pParse->nQueryLoop;
+  pMaskSet = &pWInfo->sMaskSet;
+  sWLB.pWInfo = pWInfo;
+  sWLB.pWC = &pWInfo->sWC;
+  sWLB.pNew = (WhereLoop*)(((char*)pWInfo)+nByteWInfo);
+  assert( EIGHT_BYTE_ALIGNMENT(sWLB.pNew) );
+  whereLoopInit(sWLB.pNew);
+#ifdef SQLITE_DEBUG
+  sWLB.pNew->cId = '*';
+#endif
+
+  /* Split the WHERE clause into separate subexpressions where each
+  ** subexpression is separated by an AND operator.
+  */
+  initMaskSet(pMaskSet);
+  whereClauseInit(&pWInfo->sWC, pWInfo);
+  whereSplit(&pWInfo->sWC, pWhere, TK_AND);
+    
+  /* Special case: a WHERE clause that is constant.  Evaluate the
+  ** expression and either jump over all of the code or fall thru.
+  */
+  for(ii=0; ii<sWLB.pWC->nTerm; ii++){
+    if( nTabList==0 || sqlite3ExprIsConstantNotJoin(sWLB.pWC->a[ii].pExpr) ){
+      sqlite3ExprIfFalse(pParse, sWLB.pWC->a[ii].pExpr, pWInfo->iBreak,
+                         SQLITE_JUMPIFNULL);
+      sWLB.pWC->a[ii].wtFlags |= TERM_CODED;
+    }
+  }
+
+  /* Special case: No FROM clause
+  */
+  if( nTabList==0 ){
+    if( pOrderBy ) pWInfo->nOBSat = pOrderBy->nExpr;
+    if( wctrlFlags & WHERE_WANT_DISTINCT ){
+      pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
+    }
+  }
+
+  /* Assign a bit from the bitmask to every term in the FROM clause.
+  **
+  ** When assigning bitmask values to FROM clause cursors, it must be
+  ** the case that if X is the bitmask for the N-th FROM clause term then
+  ** the bitmask for all FROM clause terms to the left of the N-th term
+  ** is (X-1).   An expression from the ON clause of a LEFT JOIN can use
+  ** its Expr.iRightJoinTable value to find the bitmask of the right table
+  ** of the join.  Subtracting one from the right table bitmask gives a
+  ** bitmask for all tables to the left of the join.  Knowing the bitmask
+  ** for all tables to the left of a left join is important.  Ticket #3015.
+  **
+  ** Note that bitmasks are created for all pTabList->nSrc tables in
+  ** pTabList, not just the first nTabList tables.  nTabList is normally
+  ** equal to pTabList->nSrc but might be shortened to 1 if the
+  ** WHERE_ONETABLE_ONLY flag is set.
+  */
+  for(ii=0; ii<pTabList->nSrc; ii++){
+    createMask(pMaskSet, pTabList->a[ii].iCursor);
+  }
+#ifndef NDEBUG
+  {
+    Bitmask toTheLeft = 0;
+    for(ii=0; ii<pTabList->nSrc; ii++){
+      Bitmask m = getMask(pMaskSet, pTabList->a[ii].iCursor);
+      assert( (m-1)==toTheLeft );
+      toTheLeft |= m;
+    }
+  }
+#endif
+
+  /* Analyze all of the subexpressions.  Note that exprAnalyze() might
+  ** add new virtual terms onto the end of the WHERE clause.  We do not
+  ** want to analyze these virtual terms, so start analyzing at the end
+  ** and work forward so that the added virtual terms are never processed.
+  */
+  exprAnalyzeAll(pTabList, &pWInfo->sWC);
+  if( db->mallocFailed ){
+    goto whereBeginError;
+  }
+
+  if( wctrlFlags & WHERE_WANT_DISTINCT ){
+    if( isDistinctRedundant(pParse, pTabList, &pWInfo->sWC, pResultSet) ){
+      /* The DISTINCT marking is pointless.  Ignore it. */
+      pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
+    }else if( pOrderBy==0 ){
+      /* Try to ORDER BY the result set to make distinct processing easier */
+      pWInfo->wctrlFlags |= WHERE_DISTINCTBY;
+      pWInfo->pOrderBy = pResultSet;
+    }
+  }
+
+  /* Construct the WhereLoop objects */
+  WHERETRACE(0xffff,("*** Optimizer Start ***\n"));
+#if defined(WHERETRACE_ENABLED)
+  /* Display all terms of the WHERE clause */
+  if( sqlite3WhereTrace & 0x100 ){
+    int i;
+    for(i=0; i<sWLB.pWC->nTerm; i++){
+      whereTermPrint(&sWLB.pWC->a[i], i);
+    }
+  }
+#endif
+
+  if( nTabList!=1 || whereShortCut(&sWLB)==0 ){
+    rc = whereLoopAddAll(&sWLB);
+    if( rc ) goto whereBeginError;
+  
+    /* Display all of the WhereLoop objects if wheretrace is enabled */
+#ifdef WHERETRACE_ENABLED /* !=0 */
+    if( sqlite3WhereTrace ){
+      WhereLoop *p;
+      int i;
+      static char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz"
+                                       "ABCDEFGHIJKLMNOPQRSTUVWYXZ";
+      for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){
+        p->cId = zLabel[i%sizeof(zLabel)];
+        whereLoopPrint(p, sWLB.pWC);
+      }
+    }
+#endif
+  
+    wherePathSolver(pWInfo, 0);
+    if( db->mallocFailed ) goto whereBeginError;
+    if( pWInfo->pOrderBy ){
+       wherePathSolver(pWInfo, pWInfo->nRowOut+1);
+       if( db->mallocFailed ) goto whereBeginError;
+    }
+  }
+  if( pWInfo->pOrderBy==0 && (db->flags & SQLITE_ReverseOrder)!=0 ){
+     pWInfo->revMask = (Bitmask)(-1);
+  }
+  if( pParse->nErr || NEVER(db->mallocFailed) ){
+    goto whereBeginError;
+  }
+#ifdef WHERETRACE_ENABLED /* !=0 */
+  if( sqlite3WhereTrace ){
+    int ii;
+    sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut);
+    if( pWInfo->nOBSat>0 ){
+      sqlite3DebugPrintf(" ORDERBY=%d,0x%llx", pWInfo->nOBSat, pWInfo->revMask);
+    }
+    switch( pWInfo->eDistinct ){
+      case WHERE_DISTINCT_UNIQUE: {
+        sqlite3DebugPrintf("  DISTINCT=unique");
+        break;
+      }
+      case WHERE_DISTINCT_ORDERED: {
+        sqlite3DebugPrintf("  DISTINCT=ordered");
+        break;
+      }
+      case WHERE_DISTINCT_UNORDERED: {
+        sqlite3DebugPrintf("  DISTINCT=unordered");
+        break;
+      }
+    }
+    sqlite3DebugPrintf("\n");
+    for(ii=0; ii<pWInfo->nLevel; ii++){
+      whereLoopPrint(pWInfo->a[ii].pWLoop, sWLB.pWC);
+    }
+  }
+#endif
+  /* Attempt to omit tables from the join that do not effect the result */
+  if( pWInfo->nLevel>=2
+   && pResultSet!=0
+   && OptimizationEnabled(db, SQLITE_OmitNoopJoin)
+  ){
+    Bitmask tabUsed = exprListTableUsage(pMaskSet, pResultSet);
+    if( sWLB.pOrderBy ) tabUsed |= exprListTableUsage(pMaskSet, sWLB.pOrderBy);
+    while( pWInfo->nLevel>=2 ){
+      WhereTerm *pTerm, *pEnd;
+      pLoop = pWInfo->a[pWInfo->nLevel-1].pWLoop;
+      if( (pWInfo->pTabList->a[pLoop->iTab].jointype & JT_LEFT)==0 ) break;
+      if( (wctrlFlags & WHERE_WANT_DISTINCT)==0
+       && (pLoop->wsFlags & WHERE_ONEROW)==0
+      ){
+        break;
+      }
+      if( (tabUsed & pLoop->maskSelf)!=0 ) break;
+      pEnd = sWLB.pWC->a + sWLB.pWC->nTerm;
+      for(pTerm=sWLB.pWC->a; pTerm<pEnd; pTerm++){
+        if( (pTerm->prereqAll & pLoop->maskSelf)!=0
+         && !ExprHasProperty(pTerm->pExpr, EP_FromJoin)
+        ){
+          break;
+        }
+      }
+      if( pTerm<pEnd ) break;
+      WHERETRACE(0xffff, ("-> drop loop %c not used\n", pLoop->cId));
+      pWInfo->nLevel--;
+      nTabList--;
+    }
+  }
+  WHERETRACE(0xffff,("*** Optimizer Finished ***\n"));
+  pWInfo->pParse->nQueryLoop += pWInfo->nRowOut;
+
+  /* If the caller is an UPDATE or DELETE statement that is requesting
+  ** to use a one-pass algorithm, determine if this is appropriate.
+  ** The one-pass algorithm only works if the WHERE clause constrains
+  ** the statement to update a single row.
+  */
+  assert( (wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || pWInfo->nLevel==1 );
+  if( (wctrlFlags & WHERE_ONEPASS_DESIRED)!=0 
+   && (pWInfo->a[0].pWLoop->wsFlags & WHERE_ONEROW)!=0 ){
+    pWInfo->okOnePass = 1;
+    if( HasRowid(pTabList->a[0].pTab) ){
+      pWInfo->a[0].pWLoop->wsFlags &= ~WHERE_IDX_ONLY;
+    }
+  }
+
+  /* Open all tables in the pTabList and any indices selected for
+  ** searching those tables.
+  */
+  notReady = ~(Bitmask)0;
+  for(ii=0, pLevel=pWInfo->a; ii<nTabList; ii++, pLevel++){
+    Table *pTab;     /* Table to open */
+    int iDb;         /* Index of database containing table/index */
+    struct SrcList_item *pTabItem;
+
+    pTabItem = &pTabList->a[pLevel->iFrom];
+    pTab = pTabItem->pTab;
+    iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
+    pLoop = pLevel->pWLoop;
+    if( (pTab->tabFlags & TF_Ephemeral)!=0 || pTab->pSelect ){
+      /* Do nothing */
+    }else
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+    if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ){
+      const char *pVTab = (const char *)sqlite3GetVTable(db, pTab);
+      int iCur = pTabItem->iCursor;
+      sqlite3VdbeAddOp4(v, OP_VOpen, iCur, 0, 0, pVTab, P4_VTAB);
+    }else if( IsVirtual(pTab) ){
+      /* noop */
+    }else
+#endif
+    if( (pLoop->wsFlags & WHERE_IDX_ONLY)==0
+         && (wctrlFlags & WHERE_OMIT_OPEN_CLOSE)==0 ){
+      int op = OP_OpenRead;
+      if( pWInfo->okOnePass ){
+        op = OP_OpenWrite;
+        pWInfo->aiCurOnePass[0] = pTabItem->iCursor;
+      };
+      sqlite3OpenTable(pParse, pTabItem->iCursor, iDb, pTab, op);
+      assert( pTabItem->iCursor==pLevel->iTabCur );
+      testcase( !pWInfo->okOnePass && pTab->nCol==BMS-1 );
+      testcase( !pWInfo->okOnePass && pTab->nCol==BMS );
+      if( !pWInfo->okOnePass && pTab->nCol<BMS && HasRowid(pTab) ){
+        Bitmask b = pTabItem->colUsed;
+        int n = 0;
+        for(; b; b=b>>1, n++){}
+        sqlite3VdbeChangeP4(v, sqlite3VdbeCurrentAddr(v)-1, 
+                            SQLITE_INT_TO_PTR(n), P4_INT32);
+        assert( n<=pTab->nCol );
+      }
+    }else{
+      sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName);
+    }
+    if( pLoop->wsFlags & WHERE_INDEXED ){
+      Index *pIx = pLoop->u.btree.pIndex;
+      int iIndexCur;
+      int op = OP_OpenRead;
+      /* iIdxCur is always set if to a positive value if ONEPASS is possible */
+      assert( iIdxCur!=0 || (pWInfo->wctrlFlags & WHERE_ONEPASS_DESIRED)==0 );
+      if( !HasRowid(pTab) && IsPrimaryKeyIndex(pIx)
+       && (wctrlFlags & WHERE_ONETABLE_ONLY)!=0
+      ){
+        /* This is one term of an OR-optimization using the PRIMARY KEY of a
+        ** WITHOUT ROWID table.  No need for a separate index */
+        iIndexCur = pLevel->iTabCur;
+        op = 0;
+      }else if( pWInfo->okOnePass ){
+        Index *pJ = pTabItem->pTab->pIndex;
+        iIndexCur = iIdxCur;
+        assert( wctrlFlags & WHERE_ONEPASS_DESIRED );
+        while( ALWAYS(pJ) && pJ!=pIx ){
+          iIndexCur++;
+          pJ = pJ->pNext;
+        }
+        op = OP_OpenWrite;
+        pWInfo->aiCurOnePass[1] = iIndexCur;
+      }else if( iIdxCur && (wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ){
+        iIndexCur = iIdxCur;
+        if( wctrlFlags & WHERE_REOPEN_IDX ) op = OP_ReopenIdx;
+      }else{
+        iIndexCur = pParse->nTab++;
+      }
+      pLevel->iIdxCur = iIndexCur;
+      assert( pIx->pSchema==pTab->pSchema );
+      assert( iIndexCur>=0 );
+      if( op ){
+        sqlite3VdbeAddOp3(v, op, iIndexCur, pIx->tnum, iDb);
+        sqlite3VdbeSetP4KeyInfo(pParse, pIx);
+        VdbeComment((v, "%s", pIx->zName));
+      }
+    }
+    if( iDb>=0 ) sqlite3CodeVerifySchema(pParse, iDb);
+    notReady &= ~getMask(&pWInfo->sMaskSet, pTabItem->iCursor);
+  }
+  pWInfo->iTop = sqlite3VdbeCurrentAddr(v);
+  if( db->mallocFailed ) goto whereBeginError;
+
+  /* Generate the code to do the search.  Each iteration of the for
+  ** loop below generates code for a single nested loop of the VM
+  ** program.
+  */
+  notReady = ~(Bitmask)0;
+  for(ii=0; ii<nTabList; ii++){
+    int addrExplain;
+    int wsFlags;
+    pLevel = &pWInfo->a[ii];
+    wsFlags = pLevel->pWLoop->wsFlags;
+#ifndef SQLITE_OMIT_AUTOMATIC_INDEX
+    if( (pLevel->pWLoop->wsFlags & WHERE_AUTO_INDEX)!=0 ){
+      constructAutomaticIndex(pParse, &pWInfo->sWC,
+                &pTabList->a[pLevel->iFrom], notReady, pLevel);
+      if( db->mallocFailed ) goto whereBeginError;
+    }
+#endif
+    addrExplain = explainOneScan(
+        pParse, pTabList, pLevel, ii, pLevel->iFrom, wctrlFlags
+    );
+    pLevel->addrBody = sqlite3VdbeCurrentAddr(v);
+    notReady = codeOneLoopStart(pWInfo, ii, notReady);
+    pWInfo->iContinue = pLevel->addrCont;
+    if( (wsFlags&WHERE_MULTI_OR)==0 && (wctrlFlags&WHERE_ONETABLE_ONLY)==0 ){
+      addScanStatus(v, pTabList, pLevel, addrExplain);
+    }
+  }
+
+  /* Done. */
+  VdbeModuleComment((v, "Begin WHERE-core"));
+  return pWInfo;
+
+  /* Jump here if malloc fails */
+whereBeginError:
+  if( pWInfo ){
+    pParse->nQueryLoop = pWInfo->savedNQueryLoop;
+    whereInfoFree(db, pWInfo);
+  }
+  return 0;
+}
+
+/*
+** Generate the end of the WHERE loop.  See comments on 
+** sqlite3WhereBegin() for additional information.
+*/
+SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){
+  Parse *pParse = pWInfo->pParse;
+  Vdbe *v = pParse->pVdbe;
+  int i;
+  WhereLevel *pLevel;
+  WhereLoop *pLoop;
+  SrcList *pTabList = pWInfo->pTabList;
+  sqlite3 *db = pParse->db;
+
+  /* Generate loop termination code.
+  */
+  VdbeModuleComment((v, "End WHERE-core"));
+  sqlite3ExprCacheClear(pParse);
+  for(i=pWInfo->nLevel-1; i>=0; i--){
+    int addr;
+    pLevel = &pWInfo->a[i];
+    pLoop = pLevel->pWLoop;
+    sqlite3VdbeResolveLabel(v, pLevel->addrCont);
+    if( pLevel->op!=OP_Noop ){
+      sqlite3VdbeAddOp3(v, pLevel->op, pLevel->p1, pLevel->p2, pLevel->p3);
+      sqlite3VdbeChangeP5(v, pLevel->p5);
+      VdbeCoverage(v);
+      VdbeCoverageIf(v, pLevel->op==OP_Next);
+      VdbeCoverageIf(v, pLevel->op==OP_Prev);
+      VdbeCoverageIf(v, pLevel->op==OP_VNext);
+    }
+    if( pLoop->wsFlags & WHERE_IN_ABLE && pLevel->u.in.nIn>0 ){
+      struct InLoop *pIn;
+      int j;
+      sqlite3VdbeResolveLabel(v, pLevel->addrNxt);
+      for(j=pLevel->u.in.nIn, pIn=&pLevel->u.in.aInLoop[j-1]; j>0; j--, pIn--){
+        sqlite3VdbeJumpHere(v, pIn->addrInTop+1);
+        sqlite3VdbeAddOp2(v, pIn->eEndLoopOp, pIn->iCur, pIn->addrInTop);
+        VdbeCoverage(v);
+        VdbeCoverageIf(v, pIn->eEndLoopOp==OP_PrevIfOpen);
+        VdbeCoverageIf(v, pIn->eEndLoopOp==OP_NextIfOpen);
+        sqlite3VdbeJumpHere(v, pIn->addrInTop-1);
+      }
+      sqlite3DbFree(db, pLevel->u.in.aInLoop);
+    }
+    sqlite3VdbeResolveLabel(v, pLevel->addrBrk);
+    if( pLevel->addrSkip ){
+      sqlite3VdbeAddOp2(v, OP_Goto, 0, pLevel->addrSkip);
+      VdbeComment((v, "next skip-scan on %s", pLoop->u.btree.pIndex->zName));
+      sqlite3VdbeJumpHere(v, pLevel->addrSkip);
+      sqlite3VdbeJumpHere(v, pLevel->addrSkip-2);
+    }
+    if( pLevel->iLeftJoin ){
+      addr = sqlite3VdbeAddOp1(v, OP_IfPos, pLevel->iLeftJoin); VdbeCoverage(v);
+      assert( (pLoop->wsFlags & WHERE_IDX_ONLY)==0
+           || (pLoop->wsFlags & WHERE_INDEXED)!=0 );
+      if( (pLoop->wsFlags & WHERE_IDX_ONLY)==0 ){
+        sqlite3VdbeAddOp1(v, OP_NullRow, pTabList->a[i].iCursor);
+      }
+      if( pLoop->wsFlags & WHERE_INDEXED ){
+        sqlite3VdbeAddOp1(v, OP_NullRow, pLevel->iIdxCur);
+      }
+      if( pLevel->op==OP_Return ){
+        sqlite3VdbeAddOp2(v, OP_Gosub, pLevel->p1, pLevel->addrFirst);
+      }else{
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, pLevel->addrFirst);
+      }
+      sqlite3VdbeJumpHere(v, addr);
+    }
+    VdbeModuleComment((v, "End WHERE-loop%d: %s", i,
+                     pWInfo->pTabList->a[pLevel->iFrom].pTab->zName));
+  }
+
+  /* The "break" point is here, just past the end of the outer loop.
+  ** Set it.
+  */
+  sqlite3VdbeResolveLabel(v, pWInfo->iBreak);
+
+  assert( pWInfo->nLevel<=pTabList->nSrc );
+  for(i=0, pLevel=pWInfo->a; i<pWInfo->nLevel; i++, pLevel++){
+    int k, last;
+    VdbeOp *pOp;
+    Index *pIdx = 0;
+    struct SrcList_item *pTabItem = &pTabList->a[pLevel->iFrom];
+    Table *pTab = pTabItem->pTab;
+    assert( pTab!=0 );
+    pLoop = pLevel->pWLoop;
+
+    /* For a co-routine, change all OP_Column references to the table of
+    ** the co-routine into OP_SCopy of result contained in a register.
+    ** OP_Rowid becomes OP_Null.
+    */
+    if( pTabItem->viaCoroutine && !db->mallocFailed ){
+      last = sqlite3VdbeCurrentAddr(v);
+      k = pLevel->addrBody;
+      pOp = sqlite3VdbeGetOp(v, k);
+      for(; k<last; k++, pOp++){
+        if( pOp->p1!=pLevel->iTabCur ) continue;
+        if( pOp->opcode==OP_Column ){
+          pOp->opcode = OP_Copy;
+          pOp->p1 = pOp->p2 + pTabItem->regResult;
+          pOp->p2 = pOp->p3;
+          pOp->p3 = 0;
+        }else if( pOp->opcode==OP_Rowid ){
+          pOp->opcode = OP_Null;
+          pOp->p1 = 0;
+          pOp->p3 = 0;
+        }
+      }
+      continue;
+    }
+
+    /* Close all of the cursors that were opened by sqlite3WhereBegin.
+    ** Except, do not close cursors that will be reused by the OR optimization
+    ** (WHERE_OMIT_OPEN_CLOSE).  And do not close the OP_OpenWrite cursors
+    ** created for the ONEPASS optimization.
+    */
+    if( (pTab->tabFlags & TF_Ephemeral)==0
+     && pTab->pSelect==0
+     && (pWInfo->wctrlFlags & WHERE_OMIT_OPEN_CLOSE)==0
+    ){
+      int ws = pLoop->wsFlags;
+      if( !pWInfo->okOnePass && (ws & WHERE_IDX_ONLY)==0 ){
+        sqlite3VdbeAddOp1(v, OP_Close, pTabItem->iCursor);
+      }
+      if( (ws & WHERE_INDEXED)!=0
+       && (ws & (WHERE_IPK|WHERE_AUTO_INDEX))==0 
+       && pLevel->iIdxCur!=pWInfo->aiCurOnePass[1]
+      ){
+        sqlite3VdbeAddOp1(v, OP_Close, pLevel->iIdxCur);
+      }
+    }
+
+    /* If this scan uses an index, make VDBE code substitutions to read data
+    ** from the index instead of from the table where possible.  In some cases
+    ** this optimization prevents the table from ever being read, which can
+    ** yield a significant performance boost.
+    ** 
+    ** Calls to the code generator in between sqlite3WhereBegin and
+    ** sqlite3WhereEnd will have created code that references the table
+    ** directly.  This loop scans all that code looking for opcodes
+    ** that reference the table and converts them into opcodes that
+    ** reference the index.
+    */
+    if( pLoop->wsFlags & (WHERE_INDEXED|WHERE_IDX_ONLY) ){
+      pIdx = pLoop->u.btree.pIndex;
+    }else if( pLoop->wsFlags & WHERE_MULTI_OR ){
+      pIdx = pLevel->u.pCovidx;
+    }
+    if( pIdx && !db->mallocFailed ){
+      last = sqlite3VdbeCurrentAddr(v);
+      k = pLevel->addrBody;
+      pOp = sqlite3VdbeGetOp(v, k);
+      for(; k<last; k++, pOp++){
+        if( pOp->p1!=pLevel->iTabCur ) continue;
+        if( pOp->opcode==OP_Column ){
+          int x = pOp->p2;
+          assert( pIdx->pTable==pTab );
+          if( !HasRowid(pTab) ){
+            Index *pPk = sqlite3PrimaryKeyIndex(pTab);
+            x = pPk->aiColumn[x];
+          }
+          x = sqlite3ColumnOfIndex(pIdx, x);
+          if( x>=0 ){
+            pOp->p2 = x;
+            pOp->p1 = pLevel->iIdxCur;
+          }
+          assert( (pLoop->wsFlags & WHERE_IDX_ONLY)==0 || x>=0 );
+        }else if( pOp->opcode==OP_Rowid ){
+          pOp->p1 = pLevel->iIdxCur;
+          pOp->opcode = OP_IdxRowid;
+        }
+      }
+    }
+  }
+
+  /* Final cleanup
+  */
+  pParse->nQueryLoop = pWInfo->savedNQueryLoop;
+  whereInfoFree(db, pWInfo);
+  return;
+}
+
+/************** End of where.c ***********************************************/
+/************** Begin file parse.c *******************************************/
+/* Driver template for the LEMON parser generator.
+** The author disclaims copyright to this source code.
+**
+** This version of "lempar.c" is modified, slightly, for use by SQLite.
+** The only modifications are the addition of a couple of NEVER()
+** macros to disable tests that are needed in the case of a general
+** LALR(1) grammar but which are always false in the
+** specific grammar used by SQLite.
+*/
+/* First off, code is included that follows the "include" declaration
+** in the input grammar file. */
+/* #include <stdio.h> */
+
+
+/*
+** Disable all error recovery processing in the parser push-down
+** automaton.
+*/
+#define YYNOERRORRECOVERY 1
+
+/*
+** Make yytestcase() the same as testcase()
+*/
+#define yytestcase(X) testcase(X)
+
+/*
+** An instance of this structure holds information about the
+** LIMIT clause of a SELECT statement.
+*/
+struct LimitVal {
+  Expr *pLimit;    /* The LIMIT expression.  NULL if there is no limit */
+  Expr *pOffset;   /* The OFFSET expression.  NULL if there is none */
+};
+
+/*
+** An instance of this structure is used to store the LIKE,
+** GLOB, NOT LIKE, and NOT GLOB operators.
+*/
+struct LikeOp {
+  Token eOperator;  /* "like" or "glob" or "regexp" */
+  int bNot;         /* True if the NOT keyword is present */
+};
+
+/*
+** An instance of the following structure describes the event of a
+** TRIGGER.  "a" is the event type, one of TK_UPDATE, TK_INSERT,
+** TK_DELETE, or TK_INSTEAD.  If the event is of the form
+**
+**      UPDATE ON (a,b,c)
+**
+** Then the "b" IdList records the list "a,b,c".
+*/
+struct TrigEvent { int a; IdList * b; };
+
+/*
+** An instance of this structure holds the ATTACH key and the key type.
+*/
+struct AttachKey { int type;  Token key; };
+
+
+  /* This is a utility routine used to set the ExprSpan.zStart and
+  ** ExprSpan.zEnd values of pOut so that the span covers the complete
+  ** range of text beginning with pStart and going to the end of pEnd.
+  */
+  static void spanSet(ExprSpan *pOut, Token *pStart, Token *pEnd){
+    pOut->zStart = pStart->z;
+    pOut->zEnd = &pEnd->z[pEnd->n];
+  }
+
+  /* Construct a new Expr object from a single identifier.  Use the
+  ** new Expr to populate pOut.  Set the span of pOut to be the identifier
+  ** that created the expression.
+  */
+  static void spanExpr(ExprSpan *pOut, Parse *pParse, int op, Token *pValue){
+    pOut->pExpr = sqlite3PExpr(pParse, op, 0, 0, pValue);
+    pOut->zStart = pValue->z;
+    pOut->zEnd = &pValue->z[pValue->n];
+  }
+
+  /* This routine constructs a binary expression node out of two ExprSpan
+  ** objects and uses the result to populate a new ExprSpan object.
+  */
+  static void spanBinaryExpr(
+    ExprSpan *pOut,     /* Write the result here */
+    Parse *pParse,      /* The parsing context.  Errors accumulate here */
+    int op,             /* The binary operation */
+    ExprSpan *pLeft,    /* The left operand */
+    ExprSpan *pRight    /* The right operand */
+  ){
+    pOut->pExpr = sqlite3PExpr(pParse, op, pLeft->pExpr, pRight->pExpr, 0);
+    pOut->zStart = pLeft->zStart;
+    pOut->zEnd = pRight->zEnd;
+  }
+
+  /* Construct an expression node for a unary postfix operator
+  */
+  static void spanUnaryPostfix(
+    ExprSpan *pOut,        /* Write the new expression node here */
+    Parse *pParse,         /* Parsing context to record errors */
+    int op,                /* The operator */
+    ExprSpan *pOperand,    /* The operand */
+    Token *pPostOp         /* The operand token for setting the span */
+  ){
+    pOut->pExpr = sqlite3PExpr(pParse, op, pOperand->pExpr, 0, 0);
+    pOut->zStart = pOperand->zStart;
+    pOut->zEnd = &pPostOp->z[pPostOp->n];
+  }                           
+
+  /* A routine to convert a binary TK_IS or TK_ISNOT expression into a
+  ** unary TK_ISNULL or TK_NOTNULL expression. */
+  static void binaryToUnaryIfNull(Parse *pParse, Expr *pY, Expr *pA, int op){
+    sqlite3 *db = pParse->db;
+    if( pY && pA && pY->op==TK_NULL ){
+      pA->op = (u8)op;
+      sqlite3ExprDelete(db, pA->pRight);
+      pA->pRight = 0;
+    }
+  }
+
+  /* Construct an expression node for a unary prefix operator
+  */
+  static void spanUnaryPrefix(
+    ExprSpan *pOut,        /* Write the new expression node here */
+    Parse *pParse,         /* Parsing context to record errors */
+    int op,                /* The operator */
+    ExprSpan *pOperand,    /* The operand */
+    Token *pPreOp         /* The operand token for setting the span */
+  ){
+    pOut->pExpr = sqlite3PExpr(pParse, op, pOperand->pExpr, 0, 0);
+    pOut->zStart = pPreOp->z;
+    pOut->zEnd = pOperand->zEnd;
+  }
+/* Next is all token values, in a form suitable for use by makeheaders.
+** This section will be null unless lemon is run with the -m switch.
+*/
+/* 
+** These constants (all generated automatically by the parser generator)
+** specify the various kinds of tokens (terminals) that the parser
+** understands. 
+**
+** Each symbol here is a terminal symbol in the grammar.
+*/
+/* Make sure the INTERFACE macro is defined.
+*/
+#ifndef INTERFACE
+# define INTERFACE 1
+#endif
+/* The next thing included is series of defines which control
+** various aspects of the generated parser.
+**    YYCODETYPE         is the data type used for storing terminal
+**                       and nonterminal numbers.  "unsigned char" is
+**                       used if there are fewer than 250 terminals
+**                       and nonterminals.  "int" is used otherwise.
+**    YYNOCODE           is a number of type YYCODETYPE which corresponds
+**                       to no legal terminal or nonterminal number.  This
+**                       number is used to fill in empty slots of the hash 
+**                       table.
+**    YYFALLBACK         If defined, this indicates that one or more tokens
+**                       have fall-back values which should be used if the
+**                       original value of the token will not parse.
+**    YYACTIONTYPE       is the data type used for storing terminal
+**                       and nonterminal numbers.  "unsigned char" is
+**                       used if there are fewer than 250 rules and
+**                       states combined.  "int" is used otherwise.
+**    sqlite3ParserTOKENTYPE     is the data type used for minor tokens given 
+**                       directly to the parser from the tokenizer.
+**    YYMINORTYPE        is the data type used for all minor tokens.
+**                       This is typically a union of many types, one of
+**                       which is sqlite3ParserTOKENTYPE.  The entry in the union
+**                       for base tokens is called "yy0".
+**    YYSTACKDEPTH       is the maximum depth of the parser's stack.  If
+**                       zero the stack is dynamically sized using realloc()
+**    sqlite3ParserARG_SDECL     A static variable declaration for the %extra_argument
+**    sqlite3ParserARG_PDECL     A parameter declaration for the %extra_argument
+**    sqlite3ParserARG_STORE     Code to store %extra_argument into yypParser
+**    sqlite3ParserARG_FETCH     Code to extract %extra_argument from yypParser
+**    YYNSTATE           the combined number of states.
+**    YYNRULE            the number of rules in the grammar
+**    YYERRORSYMBOL      is the code number of the error symbol.  If not
+**                       defined, then do no error processing.
+*/
+#define YYCODETYPE unsigned char
+#define YYNOCODE 254
+#define YYACTIONTYPE unsigned short int
+#define YYWILDCARD 70
+#define sqlite3ParserTOKENTYPE Token
+typedef union {
+  int yyinit;
+  sqlite3ParserTOKENTYPE yy0;
+  Select* yy3;
+  ExprList* yy14;
+  With* yy59;
+  SrcList* yy65;
+  struct LikeOp yy96;
+  Expr* yy132;
+  u8 yy186;
+  int yy328;
+  ExprSpan yy346;
+  struct TrigEvent yy378;
+  u16 yy381;
+  IdList* yy408;
+  struct {int value; int mask;} yy429;
+  TriggerStep* yy473;
+  struct LimitVal yy476;
+} YYMINORTYPE;
+#ifndef YYSTACKDEPTH
+#define YYSTACKDEPTH 100
+#endif
+#define sqlite3ParserARG_SDECL Parse *pParse;
+#define sqlite3ParserARG_PDECL ,Parse *pParse
+#define sqlite3ParserARG_FETCH Parse *pParse = yypParser->pParse
+#define sqlite3ParserARG_STORE yypParser->pParse = pParse
+#define YYNSTATE 642
+#define YYNRULE 327
+#define YYFALLBACK 1
+#define YY_NO_ACTION      (YYNSTATE+YYNRULE+2)
+#define YY_ACCEPT_ACTION  (YYNSTATE+YYNRULE+1)
+#define YY_ERROR_ACTION   (YYNSTATE+YYNRULE)
+
+/* The yyzerominor constant is used to initialize instances of
+** YYMINORTYPE objects to zero. */
+static const YYMINORTYPE yyzerominor = { 0 };
+
+/* Define the yytestcase() macro to be a no-op if is not already defined
+** otherwise.
+**
+** Applications can choose to define yytestcase() in the %include section
+** to a macro that can assist in verifying code coverage.  For production
+** code the yytestcase() macro should be turned off.  But it is useful
+** for testing.
+*/
+#ifndef yytestcase
+# define yytestcase(X)
+#endif
+
+
+/* Next are the tables used to determine what action to take based on the
+** current state and lookahead token.  These tables are used to implement
+** functions that take a state number and lookahead value and return an
+** action integer.  
+**
+** Suppose the action integer is N.  Then the action is determined as
+** follows
+**
+**   0 <= N < YYNSTATE                  Shift N.  That is, push the lookahead
+**                                      token onto the stack and goto state N.
+**
+**   YYNSTATE <= N < YYNSTATE+YYNRULE   Reduce by rule N-YYNSTATE.
+**
+**   N == YYNSTATE+YYNRULE              A syntax error has occurred.
+**
+**   N == YYNSTATE+YYNRULE+1            The parser accepts its input.
+**
+**   N == YYNSTATE+YYNRULE+2            No such action.  Denotes unused
+**                                      slots in the yy_action[] table.
+**
+** The action table is constructed as a single large table named yy_action[].
+** Given state S and lookahead X, the action is computed as
+**
+**      yy_action[ yy_shift_ofst[S] + X ]
+**
+** If the index value yy_shift_ofst[S]+X is out of range or if the value
+** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X or if yy_shift_ofst[S]
+** is equal to YY_SHIFT_USE_DFLT, it means that the action is not in the table
+** and that yy_default[S] should be used instead.  
+**
+** The formula above is for computing the action when the lookahead is
+** a terminal symbol.  If the lookahead is a non-terminal (as occurs after
+** a reduce action) then the yy_reduce_ofst[] array is used in place of
+** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of
+** YY_SHIFT_USE_DFLT.
+**
+** The following are the tables generated in this section:
+**
+**  yy_action[]        A single table containing all actions.
+**  yy_lookahead[]     A table containing the lookahead for each entry in
+**                     yy_action.  Used to detect hash collisions.
+**  yy_shift_ofst[]    For each state, the offset into yy_action for
+**                     shifting terminals.
+**  yy_reduce_ofst[]   For each state, the offset into yy_action for
+**                     shifting non-terminals after a reduce.
+**  yy_default[]       Default action for each state.
+*/
+#define YY_ACTTAB_COUNT (1497)
+static const YYACTIONTYPE yy_action[] = {
+ /*     0 */   306,  212,  432,  955,  639,  191,  955,  295,  559,   88,
+ /*    10 */    88,   88,   88,   81,   86,   86,   86,   86,   85,   85,
+ /*    20 */    84,   84,   84,   83,  330,  185,  184,  183,  635,  635,
+ /*    30 */   292,  606,  606,   88,   88,   88,   88,  683,   86,   86,
+ /*    40 */    86,   86,   85,   85,   84,   84,   84,   83,  330,   16,
+ /*    50 */   436,  597,   89,   90,   80,  600,  599,  601,  601,   87,
+ /*    60 */    87,   88,   88,   88,   88,  684,   86,   86,   86,   86,
+ /*    70 */    85,   85,   84,   84,   84,   83,  330,  306,  559,   84,
+ /*    80 */    84,   84,   83,  330,   65,   86,   86,   86,   86,   85,
+ /*    90 */    85,   84,   84,   84,   83,  330,  635,  635,  634,  633,
+ /*   100 */   182,  682,  550,  379,  376,  375,   17,  322,  606,  606,
+ /*   110 */   371,  198,  479,   91,  374,   82,   79,  165,   85,   85,
+ /*   120 */    84,   84,   84,   83,  330,  598,  635,  635,  107,   89,
+ /*   130 */    90,   80,  600,  599,  601,  601,   87,   87,   88,   88,
+ /*   140 */    88,   88,  186,   86,   86,   86,   86,   85,   85,   84,
+ /*   150 */    84,   84,   83,  330,  306,  594,  594,  142,  328,  327,
+ /*   160 */   484,  249,  344,  238,  635,  635,  634,  633,  585,  448,
+ /*   170 */   526,  525,  229,  388,    1,  394,  450,  584,  449,  635,
+ /*   180 */   635,  635,  635,  319,  395,  606,  606,  199,  157,  273,
+ /*   190 */   382,  268,  381,  187,  635,  635,  634,  633,  311,  555,
+ /*   200 */   266,  593,  593,  266,  347,  588,   89,   90,   80,  600,
+ /*   210 */   599,  601,  601,   87,   87,   88,   88,   88,   88,  478,
+ /*   220 */    86,   86,   86,   86,   85,   85,   84,   84,   84,   83,
+ /*   230 */   330,  306,  272,  536,  634,  633,  146,  610,  197,  310,
+ /*   240 */   575,  182,  482,  271,  379,  376,  375,  506,   21,  634,
+ /*   250 */   633,  634,  633,  635,  635,  374,  611,  574,  548,  440,
+ /*   260 */   111,  563,  606,  606,  634,  633,  324,  479,  608,  608,
+ /*   270 */   608,  300,  435,  573,  119,  407,  210,  162,  562,  883,
+ /*   280 */   592,  592,  306,   89,   90,   80,  600,  599,  601,  601,
+ /*   290 */    87,   87,   88,   88,   88,   88,  506,   86,   86,   86,
+ /*   300 */    86,   85,   85,   84,   84,   84,   83,  330,  620,  111,
+ /*   310 */   635,  635,  361,  606,  606,  358,  249,  349,  248,  433,
+ /*   320 */   243,  479,  586,  634,  633,  195,  611,   93,  119,  221,
+ /*   330 */   575,  497,  534,  534,   89,   90,   80,  600,  599,  601,
+ /*   340 */   601,   87,   87,   88,   88,   88,   88,  574,   86,   86,
+ /*   350 */    86,   86,   85,   85,   84,   84,   84,   83,  330,  306,
+ /*   360 */    77,  429,  638,  573,  589,  530,  240,  230,  242,  105,
+ /*   370 */   249,  349,  248,  515,  588,  208,  460,  529,  564,  173,
+ /*   380 */   634,  633,  970,  144,  430,    2,  424,  228,  380,  557,
+ /*   390 */   606,  606,  190,  153,  159,  158,  514,   51,  632,  631,
+ /*   400 */   630,   71,  536,  432,  954,  196,  610,  954,  614,   45,
+ /*   410 */    18,   89,   90,   80,  600,  599,  601,  601,   87,   87,
+ /*   420 */    88,   88,   88,   88,  261,   86,   86,   86,   86,   85,
+ /*   430 */    85,   84,   84,   84,   83,  330,  306,  608,  608,  608,
+ /*   440 */   542,  424,  402,  385,  241,  506,  451,  320,  211,  543,
+ /*   450 */   164,  436,  386,  293,  451,  587,  108,  496,  111,  334,
+ /*   460 */   391,  591,  424,  614,   27,  452,  453,  606,  606,   72,
+ /*   470 */   257,   70,  259,  452,  339,  342,  564,  582,   68,  415,
+ /*   480 */   469,  328,  327,   62,  614,   45,  110,  393,   89,   90,
+ /*   490 */    80,  600,  599,  601,  601,   87,   87,   88,   88,   88,
+ /*   500 */    88,  152,   86,   86,   86,   86,   85,   85,   84,   84,
+ /*   510 */    84,   83,  330,  306,  110,  499,  520,  538,  402,  389,
+ /*   520 */   424,  110,  566,  500,  593,  593,  454,   82,   79,  165,
+ /*   530 */   424,  591,  384,  564,  340,  615,  188,  162,  424,  350,
+ /*   540 */   616,  424,  614,   44,  606,  606,  445,  582,  300,  434,
+ /*   550 */   151,   19,  614,    9,  568,  580,  348,  615,  469,  567,
+ /*   560 */   614,   26,  616,  614,   45,   89,   90,   80,  600,  599,
+ /*   570 */   601,  601,   87,   87,   88,   88,   88,   88,  411,   86,
+ /*   580 */    86,   86,   86,   85,   85,   84,   84,   84,   83,  330,
+ /*   590 */   306,  579,  110,  578,  521,  282,  433,  398,  400,  255,
+ /*   600 */   486,   82,   79,  165,  487,  164,   82,   79,  165,  488,
+ /*   610 */   488,  364,  387,  424,  544,  544,  509,  350,  362,  155,
+ /*   620 */   191,  606,  606,  559,  642,  640,  333,   82,   79,  165,
+ /*   630 */   305,  564,  507,  312,  357,  614,   45,  329,  596,  595,
+ /*   640 */   194,  337,   89,   90,   80,  600,  599,  601,  601,   87,
+ /*   650 */    87,   88,   88,   88,   88,  424,   86,   86,   86,   86,
+ /*   660 */    85,   85,   84,   84,   84,   83,  330,  306,   20,  323,
+ /*   670 */   150,  263,  211,  543,  421,  596,  595,  614,   22,  424,
+ /*   680 */   193,  424,  284,  424,  391,  424,  509,  424,  577,  424,
+ /*   690 */   186,  335,  424,  559,  424,  313,  120,  546,  606,  606,
+ /*   700 */    67,  614,   47,  614,   50,  614,   48,  614,  100,  614,
+ /*   710 */    99,  614,  101,  576,  614,  102,  614,  109,  326,   89,
+ /*   720 */    90,   80,  600,  599,  601,  601,   87,   87,   88,   88,
+ /*   730 */    88,   88,  424,   86,   86,   86,   86,   85,   85,   84,
+ /*   740 */    84,   84,   83,  330,  306,  424,  311,  424,  585,   54,
+ /*   750 */   424,  516,  517,  590,  614,  112,  424,  584,  424,  572,
+ /*   760 */   424,  195,  424,  571,  424,   67,  424,  614,   94,  614,
+ /*   770 */    98,  424,  614,   97,  264,  606,  606,  195,  614,   46,
+ /*   780 */   614,   96,  614,   30,  614,   49,  614,  115,  614,  114,
+ /*   790 */   418,  229,  388,  614,  113,  306,   89,   90,   80,  600,
+ /*   800 */   599,  601,  601,   87,   87,   88,   88,   88,   88,  424,
+ /*   810 */    86,   86,   86,   86,   85,   85,   84,   84,   84,   83,
+ /*   820 */   330,  119,  424,  590,  110,  372,  606,  606,  195,   53,
+ /*   830 */   250,  614,   29,  195,  472,  438,  729,  190,  302,  498,
+ /*   840 */    14,  523,  641,    2,  614,   43,  306,   89,   90,   80,
+ /*   850 */   600,  599,  601,  601,   87,   87,   88,   88,   88,   88,
+ /*   860 */   424,   86,   86,   86,   86,   85,   85,   84,   84,   84,
+ /*   870 */    83,  330,  424,  613,  964,  964,  354,  606,  606,  420,
+ /*   880 */   312,   64,  614,   42,  391,  355,  283,  437,  301,  255,
+ /*   890 */   414,  410,  495,  492,  614,   28,  471,  306,   89,   90,
+ /*   900 */    80,  600,  599,  601,  601,   87,   87,   88,   88,   88,
+ /*   910 */    88,  424,   86,   86,   86,   86,   85,   85,   84,   84,
+ /*   920 */    84,   83,  330,  424,  110,  110,  110,  110,  606,  606,
+ /*   930 */   110,  254,   13,  614,   41,  532,  531,  283,  481,  531,
+ /*   940 */   457,  284,  119,  561,  356,  614,   40,  284,  306,   89,
+ /*   950 */    78,   80,  600,  599,  601,  601,   87,   87,   88,   88,
+ /*   960 */    88,   88,  424,   86,   86,   86,   86,   85,   85,   84,
+ /*   970 */    84,   84,   83,  330,  110,  424,  341,  220,  555,  606,
+ /*   980 */   606,  351,  555,  318,  614,   95,  413,  255,   83,  330,
+ /*   990 */   284,  284,  255,  640,  333,  356,  255,  614,   39,  306,
+ /*  1000 */   356,   90,   80,  600,  599,  601,  601,   87,   87,   88,
+ /*  1010 */    88,   88,   88,  424,   86,   86,   86,   86,   85,   85,
+ /*  1020 */    84,   84,   84,   83,  330,  424,  317,  316,  141,  465,
+ /*  1030 */   606,  606,  219,  619,  463,  614,   10,  417,  462,  255,
+ /*  1040 */   189,  510,  553,  351,  207,  363,  161,  614,   38,  315,
+ /*  1050 */   218,  255,  255,   80,  600,  599,  601,  601,   87,   87,
+ /*  1060 */    88,   88,   88,   88,  424,   86,   86,   86,   86,   85,
+ /*  1070 */    85,   84,   84,   84,   83,  330,   76,  419,  255,    3,
+ /*  1080 */   878,  461,  424,  247,  331,  331,  614,   37,  217,   76,
+ /*  1090 */   419,  390,    3,  216,  215,  422,    4,  331,  331,  424,
+ /*  1100 */   547,   12,  424,  545,  614,   36,  424,  541,  422,  424,
+ /*  1110 */   540,  424,  214,  424,  408,  424,  539,  403,  605,  605,
+ /*  1120 */   237,  614,   25,  119,  614,   24,  588,  408,  614,   45,
+ /*  1130 */   118,  614,   35,  614,   34,  614,   33,  614,   23,  588,
+ /*  1140 */    60,  223,  603,  602,  513,  378,   73,   74,  140,  139,
+ /*  1150 */   424,  110,  265,   75,  426,  425,   59,  424,  610,   73,
+ /*  1160 */    74,  549,  402,  404,  424,  373,   75,  426,  425,  604,
+ /*  1170 */   138,  610,  614,   11,  392,   76,  419,  181,    3,  614,
+ /*  1180 */    32,  271,  369,  331,  331,  493,  614,   31,  149,  608,
+ /*  1190 */   608,  608,  607,   15,  422,  365,  614,    8,  137,  489,
+ /*  1200 */   136,  190,  608,  608,  608,  607,   15,  485,  176,  135,
+ /*  1210 */     7,  252,  477,  408,  174,  133,  175,  474,   57,   56,
+ /*  1220 */   132,  130,  119,   76,  419,  588,    3,  468,  245,  464,
+ /*  1230 */   171,  331,  331,  125,  123,  456,  447,  122,  446,  104,
+ /*  1240 */   336,  231,  422,  166,  154,   73,   74,  332,  116,  431,
+ /*  1250 */   121,  309,   75,  426,  425,  222,  106,  610,  308,  637,
+ /*  1260 */   204,  408,  629,  627,  628,    6,  200,  428,  427,  290,
+ /*  1270 */   203,  622,  201,  588,   62,   63,  289,   66,  419,  399,
+ /*  1280 */     3,  401,  288,   92,  143,  331,  331,  287,  608,  608,
+ /*  1290 */   608,  607,   15,   73,   74,  227,  422,  325,   69,  416,
+ /*  1300 */    75,  426,  425,  612,  412,  610,  192,   61,  569,  209,
+ /*  1310 */   396,  226,  278,  225,  383,  408,  527,  558,  276,  533,
+ /*  1320 */   552,  528,  321,  523,  370,  508,  180,  588,  494,  179,
+ /*  1330 */   366,  117,  253,  269,  522,  503,  608,  608,  608,  607,
+ /*  1340 */    15,  551,  502,   58,  274,  524,  178,   73,   74,  304,
+ /*  1350 */   501,  368,  303,  206,   75,  426,  425,  491,  360,  610,
+ /*  1360 */   213,  177,  483,  131,  345,  298,  297,  296,  202,  294,
+ /*  1370 */   480,  490,  466,  134,  172,  129,  444,  346,  470,  128,
+ /*  1380 */   314,  459,  103,  127,  126,  148,  124,  167,  443,  235,
+ /*  1390 */   608,  608,  608,  607,   15,  442,  439,  623,  234,  299,
+ /*  1400 */   145,  583,  291,  377,  581,  160,  119,  156,  270,  636,
+ /*  1410 */   971,  169,  279,  626,  520,  625,  473,  624,  170,  621,
+ /*  1420 */   618,  119,  168,   55,  409,  423,  537,  609,  286,  285,
+ /*  1430 */   405,  570,  560,  556,    5,   52,  458,  554,  147,  267,
+ /*  1440 */   519,  504,  518,  406,  262,  239,  260,  512,  343,  511,
+ /*  1450 */   258,  353,  565,  256,  224,  251,  359,  277,  275,  476,
+ /*  1460 */   475,  246,  352,  244,  467,  455,  236,  233,  232,  307,
+ /*  1470 */   441,  281,  205,  163,  397,  280,  535,  505,  330,  617,
+ /*  1480 */   971,  971,  971,  971,  367,  971,  971,  971,  971,  971,
+ /*  1490 */   971,  971,  971,  971,  971,  971,  338,
+};
+static const YYCODETYPE yy_lookahead[] = {
+ /*     0 */    19,   22,   22,   23,    1,   24,   26,   15,   27,   80,
+ /*    10 */    81,   82,   83,   84,   85,   86,   87,   88,   89,   90,
+ /*    20 */    91,   92,   93,   94,   95,  108,  109,  110,   27,   28,
+ /*    30 */    23,   50,   51,   80,   81,   82,   83,  122,   85,   86,
+ /*    40 */    87,   88,   89,   90,   91,   92,   93,   94,   95,   22,
+ /*    50 */    70,   23,   71,   72,   73,   74,   75,   76,   77,   78,
+ /*    60 */    79,   80,   81,   82,   83,  122,   85,   86,   87,   88,
+ /*    70 */    89,   90,   91,   92,   93,   94,   95,   19,   97,   91,
+ /*    80 */    92,   93,   94,   95,   26,   85,   86,   87,   88,   89,
+ /*    90 */    90,   91,   92,   93,   94,   95,   27,   28,   97,   98,
+ /*   100 */    99,  122,  211,  102,  103,  104,   79,   19,   50,   51,
+ /*   110 */    19,  122,   59,   55,  113,  224,  225,  226,   89,   90,
+ /*   120 */    91,   92,   93,   94,   95,   23,   27,   28,   26,   71,
+ /*   130 */    72,   73,   74,   75,   76,   77,   78,   79,   80,   81,
+ /*   140 */    82,   83,   51,   85,   86,   87,   88,   89,   90,   91,
+ /*   150 */    92,   93,   94,   95,   19,  132,  133,   58,   89,   90,
+ /*   160 */    21,  108,  109,  110,   27,   28,   97,   98,   33,  100,
+ /*   170 */     7,    8,  119,  120,   22,   19,  107,   42,  109,   27,
+ /*   180 */    28,   27,   28,   95,   28,   50,   51,   99,  100,  101,
+ /*   190 */   102,  103,  104,  105,   27,   28,   97,   98,  107,  152,
+ /*   200 */   112,  132,  133,  112,   65,   69,   71,   72,   73,   74,
+ /*   210 */    75,   76,   77,   78,   79,   80,   81,   82,   83,   11,
+ /*   220 */    85,   86,   87,   88,   89,   90,   91,   92,   93,   94,
+ /*   230 */    95,   19,  101,   97,   97,   98,   24,  101,  122,  157,
+ /*   240 */    12,   99,  103,  112,  102,  103,  104,  152,   22,   97,
+ /*   250 */    98,   97,   98,   27,   28,  113,   27,   29,   91,  164,
+ /*   260 */   165,  124,   50,   51,   97,   98,  219,   59,  132,  133,
+ /*   270 */   134,   22,   23,   45,   66,   47,  212,  213,  124,  140,
+ /*   280 */   132,  133,   19,   71,   72,   73,   74,   75,   76,   77,
+ /*   290 */    78,   79,   80,   81,   82,   83,  152,   85,   86,   87,
+ /*   300 */    88,   89,   90,   91,   92,   93,   94,   95,  164,  165,
+ /*   310 */    27,   28,  230,   50,   51,  233,  108,  109,  110,   70,
+ /*   320 */    16,   59,   23,   97,   98,   26,   97,   22,   66,  185,
+ /*   330 */    12,  187,   27,   28,   71,   72,   73,   74,   75,   76,
+ /*   340 */    77,   78,   79,   80,   81,   82,   83,   29,   85,   86,
+ /*   350 */    87,   88,   89,   90,   91,   92,   93,   94,   95,   19,
+ /*   360 */    22,  148,  149,   45,   23,   47,   62,  154,   64,  156,
+ /*   370 */   108,  109,  110,   37,   69,   23,  163,   59,   26,   26,
+ /*   380 */    97,   98,  144,  145,  146,  147,  152,  200,   52,   23,
+ /*   390 */    50,   51,   26,   22,   89,   90,   60,  210,    7,    8,
+ /*   400 */     9,  138,   97,   22,   23,   26,  101,   26,  174,  175,
+ /*   410 */   197,   71,   72,   73,   74,   75,   76,   77,   78,   79,
+ /*   420 */    80,   81,   82,   83,   16,   85,   86,   87,   88,   89,
+ /*   430 */    90,   91,   92,   93,   94,   95,   19,  132,  133,  134,
+ /*   440 */    23,  152,  208,  209,  140,  152,  152,  111,  195,  196,
+ /*   450 */    98,   70,  163,  160,  152,   23,   22,  164,  165,  246,
+ /*   460 */   207,   27,  152,  174,  175,  171,  172,   50,   51,  137,
+ /*   470 */    62,  139,   64,  171,  172,  222,  124,   27,  138,   24,
+ /*   480 */   163,   89,   90,  130,  174,  175,  197,  163,   71,   72,
+ /*   490 */    73,   74,   75,   76,   77,   78,   79,   80,   81,   82,
+ /*   500 */    83,   22,   85,   86,   87,   88,   89,   90,   91,   92,
+ /*   510 */    93,   94,   95,   19,  197,  181,  182,   23,  208,  209,
+ /*   520 */   152,  197,   26,  189,  132,  133,  232,  224,  225,  226,
+ /*   530 */   152,   97,   91,   26,  232,  116,  212,  213,  152,  222,
+ /*   540 */   121,  152,  174,  175,   50,   51,  243,   97,   22,   23,
+ /*   550 */    22,  234,  174,  175,  177,   23,  239,  116,  163,  177,
+ /*   560 */   174,  175,  121,  174,  175,   71,   72,   73,   74,   75,
+ /*   570 */    76,   77,   78,   79,   80,   81,   82,   83,   24,   85,
+ /*   580 */    86,   87,   88,   89,   90,   91,   92,   93,   94,   95,
+ /*   590 */    19,   23,  197,   11,   23,  227,   70,  208,  220,  152,
+ /*   600 */    31,  224,  225,  226,   35,   98,  224,  225,  226,  108,
+ /*   610 */   109,  110,  115,  152,  117,  118,   27,  222,   49,  123,
+ /*   620 */    24,   50,   51,   27,    0,    1,    2,  224,  225,  226,
+ /*   630 */   166,  124,  168,  169,  239,  174,  175,  170,  171,  172,
+ /*   640 */    22,  194,   71,   72,   73,   74,   75,   76,   77,   78,
+ /*   650 */    79,   80,   81,   82,   83,  152,   85,   86,   87,   88,
+ /*   660 */    89,   90,   91,   92,   93,   94,   95,   19,   22,  208,
+ /*   670 */    24,   23,  195,  196,  170,  171,  172,  174,  175,  152,
+ /*   680 */    26,  152,  152,  152,  207,  152,   97,  152,   23,  152,
+ /*   690 */    51,  244,  152,   97,  152,  247,  248,   23,   50,   51,
+ /*   700 */    26,  174,  175,  174,  175,  174,  175,  174,  175,  174,
+ /*   710 */   175,  174,  175,   23,  174,  175,  174,  175,  188,   71,
+ /*   720 */    72,   73,   74,   75,   76,   77,   78,   79,   80,   81,
+ /*   730 */    82,   83,  152,   85,   86,   87,   88,   89,   90,   91,
+ /*   740 */    92,   93,   94,   95,   19,  152,  107,  152,   33,   24,
+ /*   750 */   152,  100,  101,   27,  174,  175,  152,   42,  152,   23,
+ /*   760 */   152,   26,  152,   23,  152,   26,  152,  174,  175,  174,
+ /*   770 */   175,  152,  174,  175,   23,   50,   51,   26,  174,  175,
+ /*   780 */   174,  175,  174,  175,  174,  175,  174,  175,  174,  175,
+ /*   790 */   163,  119,  120,  174,  175,   19,   71,   72,   73,   74,
+ /*   800 */    75,   76,   77,   78,   79,   80,   81,   82,   83,  152,
+ /*   810 */    85,   86,   87,   88,   89,   90,   91,   92,   93,   94,
+ /*   820 */    95,   66,  152,   97,  197,   23,   50,   51,   26,   53,
+ /*   830 */    23,  174,  175,   26,   23,   23,   23,   26,   26,   26,
+ /*   840 */    36,  106,  146,  147,  174,  175,   19,   71,   72,   73,
+ /*   850 */    74,   75,   76,   77,   78,   79,   80,   81,   82,   83,
+ /*   860 */   152,   85,   86,   87,   88,   89,   90,   91,   92,   93,
+ /*   870 */    94,   95,  152,  196,  119,  120,   19,   50,   51,  168,
+ /*   880 */   169,   26,  174,  175,  207,   28,  152,  249,  250,  152,
+ /*   890 */   163,  163,  163,  163,  174,  175,  163,   19,   71,   72,
+ /*   900 */    73,   74,   75,   76,   77,   78,   79,   80,   81,   82,
+ /*   910 */    83,  152,   85,   86,   87,   88,   89,   90,   91,   92,
+ /*   920 */    93,   94,   95,  152,  197,  197,  197,  197,   50,   51,
+ /*   930 */   197,  194,   36,  174,  175,  191,  192,  152,  191,  192,
+ /*   940 */   163,  152,   66,  124,  152,  174,  175,  152,   19,   71,
+ /*   950 */    72,   73,   74,   75,   76,   77,   78,   79,   80,   81,
+ /*   960 */    82,   83,  152,   85,   86,   87,   88,   89,   90,   91,
+ /*   970 */    92,   93,   94,   95,  197,  152,  100,  188,  152,   50,
+ /*   980 */    51,  152,  152,  188,  174,  175,  252,  152,   94,   95,
+ /*   990 */   152,  152,  152,    1,    2,  152,  152,  174,  175,   19,
+ /*  1000 */   152,   72,   73,   74,   75,   76,   77,   78,   79,   80,
+ /*  1010 */    81,   82,   83,  152,   85,   86,   87,   88,   89,   90,
+ /*  1020 */    91,   92,   93,   94,   95,  152,  188,  188,   22,  194,
+ /*  1030 */    50,   51,  240,  173,  194,  174,  175,  252,  194,  152,
+ /*  1040 */    36,  181,   28,  152,   23,  219,  122,  174,  175,  219,
+ /*  1050 */   221,  152,  152,   73,   74,   75,   76,   77,   78,   79,
+ /*  1060 */    80,   81,   82,   83,  152,   85,   86,   87,   88,   89,
+ /*  1070 */    90,   91,   92,   93,   94,   95,   19,   20,  152,   22,
+ /*  1080 */    23,  194,  152,  240,   27,   28,  174,  175,  240,   19,
+ /*  1090 */    20,   26,   22,  194,  194,   38,   22,   27,   28,  152,
+ /*  1100 */    23,   22,  152,  116,  174,  175,  152,   23,   38,  152,
+ /*  1110 */    23,  152,  221,  152,   57,  152,   23,  163,   50,   51,
+ /*  1120 */   194,  174,  175,   66,  174,  175,   69,   57,  174,  175,
+ /*  1130 */    40,  174,  175,  174,  175,  174,  175,  174,  175,   69,
+ /*  1140 */    22,   53,   74,   75,   30,   53,   89,   90,   22,   22,
+ /*  1150 */   152,  197,   23,   96,   97,   98,   22,  152,  101,   89,
+ /*  1160 */    90,   91,  208,  209,  152,   53,   96,   97,   98,  101,
+ /*  1170 */    22,  101,  174,  175,  152,   19,   20,  105,   22,  174,
+ /*  1180 */   175,  112,   19,   27,   28,   20,  174,  175,   24,  132,
+ /*  1190 */   133,  134,  135,  136,   38,   44,  174,  175,  107,   61,
+ /*  1200 */    54,   26,  132,  133,  134,  135,  136,   54,  107,   22,
+ /*  1210 */     5,  140,    1,   57,   36,  111,  122,   28,   79,   79,
+ /*  1220 */   131,  123,   66,   19,   20,   69,   22,    1,   16,   20,
+ /*  1230 */   125,   27,   28,  123,  111,  120,   23,  131,   23,   16,
+ /*  1240 */    68,  142,   38,   15,   22,   89,   90,    3,  167,    4,
+ /*  1250 */   248,  251,   96,   97,   98,  180,  180,  101,  251,  151,
+ /*  1260 */     6,   57,  151,   13,  151,   26,   25,  151,  161,  202,
+ /*  1270 */   153,  162,  153,   69,  130,  128,  203,   19,   20,  127,
+ /*  1280 */    22,  126,  204,  129,   22,   27,   28,  205,  132,  133,
+ /*  1290 */   134,  135,  136,   89,   90,  231,   38,   95,  137,  179,
+ /*  1300 */    96,   97,   98,  206,  179,  101,  122,  107,  159,  159,
+ /*  1310 */   125,  231,  216,  228,  107,   57,  184,  217,  216,  176,
+ /*  1320 */   217,  176,   48,  106,   18,  184,  158,   69,  159,  158,
+ /*  1330 */    46,   71,  237,  176,  176,  176,  132,  133,  134,  135,
+ /*  1340 */   136,  217,  176,  137,  216,  178,  158,   89,   90,  179,
+ /*  1350 */   176,  159,  179,  159,   96,   97,   98,  159,  159,  101,
+ /*  1360 */     5,  158,  202,   22,   18,   10,   11,   12,   13,   14,
+ /*  1370 */   190,  238,   17,  190,  158,  193,   41,  159,  202,  193,
+ /*  1380 */   159,  202,  245,  193,  193,  223,  190,   32,  159,   34,
+ /*  1390 */   132,  133,  134,  135,  136,  159,   39,  155,   43,  150,
+ /*  1400 */   223,  177,  201,  178,  177,  186,   66,  199,  177,  152,
+ /*  1410 */   253,   56,  215,  152,  182,  152,  202,  152,   63,  152,
+ /*  1420 */   152,   66,   67,  242,  229,  152,  174,  152,  152,  152,
+ /*  1430 */   152,  152,  152,  152,  199,  242,  202,  152,  198,  152,
+ /*  1440 */   152,  152,  183,  192,  152,  215,  152,  183,  215,  183,
+ /*  1450 */   152,  241,  214,  152,  211,  152,  152,  211,  211,  152,
+ /*  1460 */   152,  241,  152,  152,  152,  152,  152,  152,  152,  114,
+ /*  1470 */   152,  152,  235,  152,  152,  152,  174,  187,   95,  174,
+ /*  1480 */   253,  253,  253,  253,  236,  253,  253,  253,  253,  253,
+ /*  1490 */   253,  253,  253,  253,  253,  253,  141,
+};
+#define YY_SHIFT_USE_DFLT (-86)
+#define YY_SHIFT_COUNT (429)
+#define YY_SHIFT_MIN   (-85)
+#define YY_SHIFT_MAX   (1383)
+static const short yy_shift_ofst[] = {
+ /*     0 */   992, 1057, 1355, 1156, 1204, 1204,    1,  262,  -19,  135,
+ /*    10 */   135,  776, 1204, 1204, 1204, 1204,   69,   69,   53,  208,
+ /*    20 */   283,  755,   58,  725,  648,  571,  494,  417,  340,  263,
+ /*    30 */   212,  827,  827,  827,  827,  827,  827,  827,  827,  827,
+ /*    40 */   827,  827,  827,  827,  827,  827,  878,  827,  929,  980,
+ /*    50 */   980, 1070, 1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204,
+ /*    60 */  1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204,
+ /*    70 */  1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204,
+ /*    80 */  1258, 1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204, 1204,
+ /*    90 */  1204, 1204, 1204, 1204,  -71,  -47,  -47,  -47,  -47,  -47,
+ /*   100 */     0,   29,  -12,  283,  283,  139,   91,  392,  392,  894,
+ /*   110 */   672,  726, 1383,  -86,  -86,  -86,   88,  318,  318,   99,
+ /*   120 */   381,  -20,  283,  283,  283,  283,  283,  283,  283,  283,
+ /*   130 */   283,  283,  283,  283,  283,  283,  283,  283,  283,  283,
+ /*   140 */   283,  283,  283,  283,  624,  876,  726,  672, 1340, 1340,
+ /*   150 */  1340, 1340, 1340, 1340,  -86,  -86,  -86,  305,  136,  136,
+ /*   160 */   142,  167,  226,  154,  137,  152,  283,  283,  283,  283,
+ /*   170 */   283,  283,  283,  283,  283,  283,  283,  283,  283,  283,
+ /*   180 */   283,  283,  283,  336,  336,  336,  283,  283,  352,  283,
+ /*   190 */   283,  283,  283,  283,  228,  283,  283,  283,  283,  283,
+ /*   200 */   283,  283,  283,  283,  283,  501,  569,  596,  596,  596,
+ /*   210 */   507,  497,  441,  391,  353,  156,  156,  857,  353,  857,
+ /*   220 */   735,  813,  639,  715,  156,  332,  715,  715,  496,  419,
+ /*   230 */   646, 1357, 1184, 1184, 1335, 1335, 1184, 1341, 1260, 1144,
+ /*   240 */  1346, 1346, 1346, 1346, 1184, 1306, 1144, 1341, 1260, 1260,
+ /*   250 */  1144, 1184, 1306, 1206, 1284, 1184, 1184, 1306, 1184, 1306,
+ /*   260 */  1184, 1306, 1262, 1207, 1207, 1207, 1274, 1262, 1207, 1217,
+ /*   270 */  1207, 1274, 1207, 1207, 1185, 1200, 1185, 1200, 1185, 1200,
+ /*   280 */  1184, 1184, 1161, 1262, 1202, 1202, 1262, 1154, 1155, 1147,
+ /*   290 */  1152, 1144, 1241, 1239, 1250, 1250, 1254, 1254, 1254, 1254,
+ /*   300 */   -86,  -86,  -86,  -86,  -86,  -86, 1068,  304,  526,  249,
+ /*   310 */   408,  -83,  434,  812,   27,  811,  807,  802,  751,  589,
+ /*   320 */   651,  163,  131,  674,  366,  450,  299,  148,   23,  102,
+ /*   330 */   229,  -21, 1245, 1244, 1222, 1099, 1228, 1172, 1223, 1215,
+ /*   340 */  1213, 1115, 1106, 1123, 1110, 1209, 1105, 1212, 1226, 1098,
+ /*   350 */  1089, 1140, 1139, 1104, 1189, 1178, 1094, 1211, 1205, 1187,
+ /*   360 */  1101, 1071, 1153, 1175, 1146, 1138, 1151, 1091, 1164, 1165,
+ /*   370 */  1163, 1069, 1072, 1148, 1112, 1134, 1127, 1129, 1126, 1092,
+ /*   380 */  1114, 1118, 1088, 1090, 1093, 1087, 1084,  987, 1079, 1077,
+ /*   390 */  1074, 1065,  924, 1021, 1014, 1004, 1006,  819,  739,  896,
+ /*   400 */   855,  804,  739,  740,  736,  690,  654,  665,  618,  582,
+ /*   410 */   568,  528,  554,  379,  532,  479,  455,  379,  432,  371,
+ /*   420 */   341,   28,  338,  116,  -11,  -57,  -85,    7,   -8,    3,
+};
+#define YY_REDUCE_USE_DFLT (-110)
+#define YY_REDUCE_COUNT (305)
+#define YY_REDUCE_MIN   (-109)
+#define YY_REDUCE_MAX   (1323)
+static const short yy_reduce_ofst[] = {
+ /*     0 */   238,  954,  213,  289,  310,  234,  144,  317, -109,  382,
+ /*    10 */   377,  303,  461,  389,  378,  368,  302,  294,  253,  395,
+ /*    20 */   293,  324,  403,  403,  403,  403,  403,  403,  403,  403,
+ /*    30 */   403,  403,  403,  403,  403,  403,  403,  403,  403,  403,
+ /*    40 */   403,  403,  403,  403,  403,  403,  403,  403,  403,  403,
+ /*    50 */   403, 1022, 1012, 1005,  998,  963,  961,  959,  957,  950,
+ /*    60 */   947,  930,  912,  873,  861,  823,  810,  771,  759,  720,
+ /*    70 */   708,  670,  657,  619,  614,  612,  610,  608,  606,  604,
+ /*    80 */   598,  595,  593,  580,  542,  540,  537,  535,  533,  531,
+ /*    90 */   529,  527,  503,  386,  403,  403,  403,  403,  403,  403,
+ /*   100 */   403,  403,  403,   95,  447,   82,  334,  504,  467,  403,
+ /*   110 */   477,  464,  403,  403,  403,  403,  860,  747,  744,  785,
+ /*   120 */   638,  638,  926,  891,  900,  899,  887,  844,  840,  835,
+ /*   130 */   848,  830,  843,  829,  792,  839,  826,  737,  838,  795,
+ /*   140 */   789,   47,  734,  530,  696,  777,  711,  677,  733,  730,
+ /*   150 */   729,  728,  727,  627,  448,   64,  187, 1305, 1302, 1252,
+ /*   160 */  1290, 1273, 1323, 1322, 1321, 1319, 1318, 1316, 1315, 1314,
+ /*   170 */  1313, 1312, 1311, 1310, 1308, 1307, 1304, 1303, 1301, 1298,
+ /*   180 */  1294, 1292, 1289, 1266, 1264, 1259, 1288, 1287, 1238, 1285,
+ /*   190 */  1281, 1280, 1279, 1278, 1251, 1277, 1276, 1275, 1273, 1268,
+ /*   200 */  1267, 1265, 1263, 1261, 1257, 1248, 1237, 1247, 1246, 1243,
+ /*   210 */  1238, 1240, 1235, 1249, 1234, 1233, 1230, 1220, 1214, 1210,
+ /*   220 */  1225, 1219, 1232, 1231, 1197, 1195, 1227, 1224, 1201, 1208,
+ /*   230 */  1242, 1137, 1236, 1229, 1193, 1181, 1221, 1177, 1196, 1179,
+ /*   240 */  1191, 1190, 1186, 1182, 1218, 1216, 1176, 1162, 1183, 1180,
+ /*   250 */  1160, 1199, 1203, 1133, 1095, 1198, 1194, 1188, 1192, 1171,
+ /*   260 */  1169, 1168, 1173, 1174, 1166, 1159, 1141, 1170, 1158, 1167,
+ /*   270 */  1157, 1132, 1145, 1143, 1124, 1128, 1103, 1102, 1100, 1096,
+ /*   280 */  1150, 1149, 1085, 1125, 1080, 1064, 1120, 1097, 1082, 1078,
+ /*   290 */  1073, 1067, 1109, 1107, 1119, 1117, 1116, 1113, 1111, 1108,
+ /*   300 */  1007, 1000, 1002, 1076, 1075, 1081,
+};
+static const YYACTIONTYPE yy_default[] = {
+ /*     0 */   647,  964,  964,  964,  878,  878,  969,  964,  774,  802,
+ /*    10 */   802,  938,  969,  969,  969,  876,  969,  969,  969,  964,
+ /*    20 */   969,  778,  808,  969,  969,  969,  969,  969,  969,  969,
+ /*    30 */   969,  937,  939,  816,  815,  918,  789,  813,  806,  810,
+ /*    40 */   879,  872,  873,  871,  875,  880,  969,  809,  841,  856,
+ /*    50 */   840,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*    60 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*    70 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*    80 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*    90 */   969,  969,  969,  969,  850,  855,  862,  854,  851,  843,
+ /*   100 */   842,  844,  845,  969,  969,  673,  739,  969,  969,  846,
+ /*   110 */   969,  685,  847,  859,  858,  857,  680,  969,  969,  969,
+ /*   120 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*   130 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*   140 */   969,  969,  969,  969,  647,  964,  969,  969,  964,  964,
+ /*   150 */   964,  964,  964,  964,  956,  778,  768,  969,  969,  969,
+ /*   160 */   969,  969,  969,  969,  969,  969,  969,  944,  942,  969,
+ /*   170 */   891,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*   180 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*   190 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*   200 */   969,  969,  969,  969,  653,  969,  911,  774,  774,  774,
+ /*   210 */   776,  754,  766,  655,  812,  791,  791,  923,  812,  923,
+ /*   220 */   710,  733,  707,  802,  791,  874,  802,  802,  775,  766,
+ /*   230 */   969,  949,  782,  782,  941,  941,  782,  821,  743,  812,
+ /*   240 */   750,  750,  750,  750,  782,  670,  812,  821,  743,  743,
+ /*   250 */   812,  782,  670,  917,  915,  782,  782,  670,  782,  670,
+ /*   260 */   782,  670,  884,  741,  741,  741,  725,  884,  741,  710,
+ /*   270 */   741,  725,  741,  741,  795,  790,  795,  790,  795,  790,
+ /*   280 */   782,  782,  969,  884,  888,  888,  884,  807,  796,  805,
+ /*   290 */   803,  812,  676,  728,  663,  663,  652,  652,  652,  652,
+ /*   300 */   961,  961,  956,  712,  712,  695,  969,  969,  969,  969,
+ /*   310 */   969,  969,  687,  969,  893,  969,  969,  969,  969,  969,
+ /*   320 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*   330 */   969,  828,  969,  648,  951,  969,  969,  948,  969,  969,
+ /*   340 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*   350 */   969,  969,  969,  969,  969,  969,  921,  969,  969,  969,
+ /*   360 */   969,  969,  969,  914,  913,  969,  969,  969,  969,  969,
+ /*   370 */   969,  969,  969,  969,  969,  969,  969,  969,  969,  969,
+ /*   380 */   969,  969,  969,  969,  969,  969,  969,  757,  969,  969,
+ /*   390 */   969,  761,  969,  969,  969,  969,  969,  969,  804,  969,
+ /*   400 */   797,  969,  877,  969,  969,  969,  969,  969,  969,  969,
+ /*   410 */   969,  969,  969,  966,  969,  969,  969,  965,  969,  969,
+ /*   420 */   969,  969,  969,  830,  969,  829,  833,  969,  661,  969,
+ /*   430 */   644,  649,  960,  963,  962,  959,  958,  957,  952,  950,
+ /*   440 */   947,  946,  945,  943,  940,  936,  897,  895,  902,  901,
+ /*   450 */   900,  899,  898,  896,  894,  892,  818,  817,  814,  811,
+ /*   460 */   753,  935,  890,  752,  749,  748,  669,  953,  920,  929,
+ /*   470 */   928,  927,  822,  926,  925,  924,  922,  919,  906,  820,
+ /*   480 */   819,  744,  882,  881,  672,  910,  909,  908,  912,  916,
+ /*   490 */   907,  784,  751,  671,  668,  675,  679,  731,  732,  740,
+ /*   500 */   738,  737,  736,  735,  734,  730,  681,  686,  724,  709,
+ /*   510 */   708,  717,  716,  722,  721,  720,  719,  718,  715,  714,
+ /*   520 */   713,  706,  705,  711,  704,  727,  726,  723,  703,  747,
+ /*   530 */   746,  745,  742,  702,  701,  700,  833,  699,  698,  838,
+ /*   540 */   837,  866,  826,  755,  759,  758,  762,  763,  771,  770,
+ /*   550 */   769,  780,  781,  793,  792,  824,  823,  794,  779,  773,
+ /*   560 */   772,  788,  787,  786,  785,  777,  767,  799,  798,  868,
+ /*   570 */   783,  867,  865,  934,  933,  932,  931,  930,  870,  967,
+ /*   580 */   968,  887,  889,  886,  801,  800,  885,  869,  839,  836,
+ /*   590 */   690,  691,  905,  904,  903,  693,  692,  689,  688,  863,
+ /*   600 */   860,  852,  864,  861,  853,  849,  848,  834,  832,  831,
+ /*   610 */   827,  835,  760,  756,  825,  765,  764,  697,  696,  694,
+ /*   620 */   678,  677,  674,  667,  665,  664,  666,  662,  660,  659,
+ /*   630 */   658,  657,  656,  684,  683,  682,  654,  651,  650,  646,
+ /*   640 */   645,  643,
+};
+
+/* The next table maps tokens into fallback tokens.  If a construct
+** like the following:
+** 
+**      %fallback ID X Y Z.
+**
+** appears in the grammar, then ID becomes a fallback token for X, Y,
+** and Z.  Whenever one of the tokens X, Y, or Z is input to the parser
+** but it does not parse, the type of the token is changed to ID and
+** the parse is retried before an error is thrown.
+*/
+#ifdef YYFALLBACK
+static const YYCODETYPE yyFallback[] = {
+    0,  /*          $ => nothing */
+    0,  /*       SEMI => nothing */
+   27,  /*    EXPLAIN => ID */
+   27,  /*      QUERY => ID */
+   27,  /*       PLAN => ID */
+   27,  /*      BEGIN => ID */
+    0,  /* TRANSACTION => nothing */
+   27,  /*   DEFERRED => ID */
+   27,  /*  IMMEDIATE => ID */
+   27,  /*  EXCLUSIVE => ID */
+    0,  /*     COMMIT => nothing */
+   27,  /*        END => ID */
+   27,  /*   ROLLBACK => ID */
+   27,  /*  SAVEPOINT => ID */
+   27,  /*    RELEASE => ID */
+    0,  /*         TO => nothing */
+    0,  /*      TABLE => nothing */
+    0,  /*     CREATE => nothing */
+   27,  /*         IF => ID */
+    0,  /*        NOT => nothing */
+    0,  /*     EXISTS => nothing */
+   27,  /*       TEMP => ID */
+    0,  /*         LP => nothing */
+    0,  /*         RP => nothing */
+    0,  /*         AS => nothing */
+   27,  /*    WITHOUT => ID */
+    0,  /*      COMMA => nothing */
+    0,  /*         ID => nothing */
+    0,  /*    INDEXED => nothing */
+   27,  /*      ABORT => ID */
+   27,  /*     ACTION => ID */
+   27,  /*      AFTER => ID */
+   27,  /*    ANALYZE => ID */
+   27,  /*        ASC => ID */
+   27,  /*     ATTACH => ID */
+   27,  /*     BEFORE => ID */
+   27,  /*         BY => ID */
+   27,  /*    CASCADE => ID */
+   27,  /*       CAST => ID */
+   27,  /*   COLUMNKW => ID */
+   27,  /*   CONFLICT => ID */
+   27,  /*   DATABASE => ID */
+   27,  /*       DESC => ID */
+   27,  /*     DETACH => ID */
+   27,  /*       EACH => ID */
+   27,  /*       FAIL => ID */
+   27,  /*        FOR => ID */
+   27,  /*     IGNORE => ID */
+   27,  /*  INITIALLY => ID */
+   27,  /*    INSTEAD => ID */
+   27,  /*    LIKE_KW => ID */
+   27,  /*      MATCH => ID */
+   27,  /*         NO => ID */
+   27,  /*        KEY => ID */
+   27,  /*         OF => ID */
+   27,  /*     OFFSET => ID */
+   27,  /*     PRAGMA => ID */
+   27,  /*      RAISE => ID */
+   27,  /*  RECURSIVE => ID */
+   27,  /*    REPLACE => ID */
+   27,  /*   RESTRICT => ID */
+   27,  /*        ROW => ID */
+   27,  /*    TRIGGER => ID */
+   27,  /*     VACUUM => ID */
+   27,  /*       VIEW => ID */
+   27,  /*    VIRTUAL => ID */
+   27,  /*       WITH => ID */
+   27,  /*    REINDEX => ID */
+   27,  /*     RENAME => ID */
+   27,  /*   CTIME_KW => ID */
+};
+#endif /* YYFALLBACK */
+
+/* The following structure represents a single element of the
+** parser's stack.  Information stored includes:
+**
+**   +  The state number for the parser at this level of the stack.
+**
+**   +  The value of the token stored at this level of the stack.
+**      (In other words, the "major" token.)
+**
+**   +  The semantic value stored at this level of the stack.  This is
+**      the information used by the action routines in the grammar.
+**      It is sometimes called the "minor" token.
+*/
+struct yyStackEntry {
+  YYACTIONTYPE stateno;  /* The state-number */
+  YYCODETYPE major;      /* The major token value.  This is the code
+                         ** number for the token at this stack level */
+  YYMINORTYPE minor;     /* The user-supplied minor token value.  This
+                         ** is the value of the token  */
+};
+typedef struct yyStackEntry yyStackEntry;
+
+/* The state of the parser is completely contained in an instance of
+** the following structure */
+struct yyParser {
+  int yyidx;                    /* Index of top element in stack */
+#ifdef YYTRACKMAXSTACKDEPTH
+  int yyidxMax;                 /* Maximum value of yyidx */
+#endif
+  int yyerrcnt;                 /* Shifts left before out of the error */
+  sqlite3ParserARG_SDECL                /* A place to hold %extra_argument */
+#if YYSTACKDEPTH<=0
+  int yystksz;                  /* Current side of the stack */
+  yyStackEntry *yystack;        /* The parser's stack */
+#else
+  yyStackEntry yystack[YYSTACKDEPTH];  /* The parser's stack */
+#endif
+};
+typedef struct yyParser yyParser;
+
+#ifndef NDEBUG
+/* #include <stdio.h> */
+static FILE *yyTraceFILE = 0;
+static char *yyTracePrompt = 0;
+#endif /* NDEBUG */
+
+#ifndef NDEBUG
+/* 
+** Turn parser tracing on by giving a stream to which to write the trace
+** and a prompt to preface each trace message.  Tracing is turned off
+** by making either argument NULL 
+**
+** Inputs:
+** <ul>
+** <li> A FILE* to which trace output should be written.
+**      If NULL, then tracing is turned off.
+** <li> A prefix string written at the beginning of every
+**      line of trace output.  If NULL, then tracing is
+**      turned off.
+** </ul>
+**
+** Outputs:
+** None.
+*/
+SQLITE_PRIVATE void sqlite3ParserTrace(FILE *TraceFILE, char *zTracePrompt){
+  yyTraceFILE = TraceFILE;
+  yyTracePrompt = zTracePrompt;
+  if( yyTraceFILE==0 ) yyTracePrompt = 0;
+  else if( yyTracePrompt==0 ) yyTraceFILE = 0;
+}
+#endif /* NDEBUG */
+
+#ifndef NDEBUG
+/* For tracing shifts, the names of all terminals and nonterminals
+** are required.  The following table supplies these names */
+static const char *const yyTokenName[] = { 
+  "$",             "SEMI",          "EXPLAIN",       "QUERY",       
+  "PLAN",          "BEGIN",         "TRANSACTION",   "DEFERRED",    
+  "IMMEDIATE",     "EXCLUSIVE",     "COMMIT",        "END",         
+  "ROLLBACK",      "SAVEPOINT",     "RELEASE",       "TO",          
+  "TABLE",         "CREATE",        "IF",            "NOT",         
+  "EXISTS",        "TEMP",          "LP",            "RP",          
+  "AS",            "WITHOUT",       "COMMA",         "ID",          
+  "INDEXED",       "ABORT",         "ACTION",        "AFTER",       
+  "ANALYZE",       "ASC",           "ATTACH",        "BEFORE",      
+  "BY",            "CASCADE",       "CAST",          "COLUMNKW",    
+  "CONFLICT",      "DATABASE",      "DESC",          "DETACH",      
+  "EACH",          "FAIL",          "FOR",           "IGNORE",      
+  "INITIALLY",     "INSTEAD",       "LIKE_KW",       "MATCH",       
+  "NO",            "KEY",           "OF",            "OFFSET",      
+  "PRAGMA",        "RAISE",         "RECURSIVE",     "REPLACE",     
+  "RESTRICT",      "ROW",           "TRIGGER",       "VACUUM",      
+  "VIEW",          "VIRTUAL",       "WITH",          "REINDEX",     
+  "RENAME",        "CTIME_KW",      "ANY",           "OR",          
+  "AND",           "IS",            "BETWEEN",       "IN",          
+  "ISNULL",        "NOTNULL",       "NE",            "EQ",          
+  "GT",            "LE",            "LT",            "GE",          
+  "ESCAPE",        "BITAND",        "BITOR",         "LSHIFT",      
+  "RSHIFT",        "PLUS",          "MINUS",         "STAR",        
+  "SLASH",         "REM",           "CONCAT",        "COLLATE",     
+  "BITNOT",        "STRING",        "JOIN_KW",       "CONSTRAINT",  
+  "DEFAULT",       "NULL",          "PRIMARY",       "UNIQUE",      
+  "CHECK",         "REFERENCES",    "AUTOINCR",      "ON",          
+  "INSERT",        "DELETE",        "UPDATE",        "SET",         
+  "DEFERRABLE",    "FOREIGN",       "DROP",          "UNION",       
+  "ALL",           "EXCEPT",        "INTERSECT",     "SELECT",      
+  "VALUES",        "DISTINCT",      "DOT",           "FROM",        
+  "JOIN",          "USING",         "ORDER",         "GROUP",       
+  "HAVING",        "LIMIT",         "WHERE",         "INTO",        
+  "INTEGER",       "FLOAT",         "BLOB",          "VARIABLE",    
+  "CASE",          "WHEN",          "THEN",          "ELSE",        
+  "INDEX",         "ALTER",         "ADD",           "error",       
+  "input",         "cmdlist",       "ecmd",          "explain",     
+  "cmdx",          "cmd",           "transtype",     "trans_opt",   
+  "nm",            "savepoint_opt",  "create_table",  "create_table_args",
+  "createkw",      "temp",          "ifnotexists",   "dbnm",        
+  "columnlist",    "conslist_opt",  "table_options",  "select",      
+  "column",        "columnid",      "type",          "carglist",    
+  "typetoken",     "typename",      "signed",        "plus_num",    
+  "minus_num",     "ccons",         "term",          "expr",        
+  "onconf",        "sortorder",     "autoinc",       "idxlist_opt", 
+  "refargs",       "defer_subclause",  "refarg",        "refact",      
+  "init_deferred_pred_opt",  "conslist",      "tconscomma",    "tcons",       
+  "idxlist",       "defer_subclause_opt",  "orconf",        "resolvetype", 
+  "raisetype",     "ifexists",      "fullname",      "selectnowith",
+  "oneselect",     "with",          "multiselect_op",  "distinct",    
+  "selcollist",    "from",          "where_opt",     "groupby_opt", 
+  "having_opt",    "orderby_opt",   "limit_opt",     "values",      
+  "nexprlist",     "exprlist",      "sclp",          "as",          
+  "seltablist",    "stl_prefix",    "joinop",        "indexed_opt", 
+  "on_opt",        "using_opt",     "joinop2",       "idlist",      
+  "sortlist",      "setlist",       "insert_cmd",    "inscollist_opt",
+  "likeop",        "between_op",    "in_op",         "case_operand",
+  "case_exprlist",  "case_else",     "uniqueflag",    "collate",     
+  "nmnum",         "trigger_decl",  "trigger_cmd_list",  "trigger_time",
+  "trigger_event",  "foreach_clause",  "when_clause",   "trigger_cmd", 
+  "trnm",          "tridxby",       "database_kw_opt",  "key_opt",     
+  "add_column_fullname",  "kwcolumn_opt",  "create_vtab",   "vtabarglist", 
+  "vtabarg",       "vtabargtoken",  "lp",            "anylist",     
+  "wqlist",      
+};
+#endif /* NDEBUG */
+
+#ifndef NDEBUG
+/* For tracing reduce actions, the names of all rules are required.
+*/
+static const char *const yyRuleName[] = {
+ /*   0 */ "input ::= cmdlist",
+ /*   1 */ "cmdlist ::= cmdlist ecmd",
+ /*   2 */ "cmdlist ::= ecmd",
+ /*   3 */ "ecmd ::= SEMI",
+ /*   4 */ "ecmd ::= explain cmdx SEMI",
+ /*   5 */ "explain ::=",
+ /*   6 */ "explain ::= EXPLAIN",
+ /*   7 */ "explain ::= EXPLAIN QUERY PLAN",
+ /*   8 */ "cmdx ::= cmd",
+ /*   9 */ "cmd ::= BEGIN transtype trans_opt",
+ /*  10 */ "trans_opt ::=",
+ /*  11 */ "trans_opt ::= TRANSACTION",
+ /*  12 */ "trans_opt ::= TRANSACTION nm",
+ /*  13 */ "transtype ::=",
+ /*  14 */ "transtype ::= DEFERRED",
+ /*  15 */ "transtype ::= IMMEDIATE",
+ /*  16 */ "transtype ::= EXCLUSIVE",
+ /*  17 */ "cmd ::= COMMIT trans_opt",
+ /*  18 */ "cmd ::= END trans_opt",
+ /*  19 */ "cmd ::= ROLLBACK trans_opt",
+ /*  20 */ "savepoint_opt ::= SAVEPOINT",
+ /*  21 */ "savepoint_opt ::=",
+ /*  22 */ "cmd ::= SAVEPOINT nm",
+ /*  23 */ "cmd ::= RELEASE savepoint_opt nm",
+ /*  24 */ "cmd ::= ROLLBACK trans_opt TO savepoint_opt nm",
+ /*  25 */ "cmd ::= create_table create_table_args",
+ /*  26 */ "create_table ::= createkw temp TABLE ifnotexists nm dbnm",
+ /*  27 */ "createkw ::= CREATE",
+ /*  28 */ "ifnotexists ::=",
+ /*  29 */ "ifnotexists ::= IF NOT EXISTS",
+ /*  30 */ "temp ::= TEMP",
+ /*  31 */ "temp ::=",
+ /*  32 */ "create_table_args ::= LP columnlist conslist_opt RP table_options",
+ /*  33 */ "create_table_args ::= AS select",
+ /*  34 */ "table_options ::=",
+ /*  35 */ "table_options ::= WITHOUT nm",
+ /*  36 */ "columnlist ::= columnlist COMMA column",
+ /*  37 */ "columnlist ::= column",
+ /*  38 */ "column ::= columnid type carglist",
+ /*  39 */ "columnid ::= nm",
+ /*  40 */ "nm ::= ID|INDEXED",
+ /*  41 */ "nm ::= STRING",
+ /*  42 */ "nm ::= JOIN_KW",
+ /*  43 */ "type ::=",
+ /*  44 */ "type ::= typetoken",
+ /*  45 */ "typetoken ::= typename",
+ /*  46 */ "typetoken ::= typename LP signed RP",
+ /*  47 */ "typetoken ::= typename LP signed COMMA signed RP",
+ /*  48 */ "typename ::= ID|STRING",
+ /*  49 */ "typename ::= typename ID|STRING",
+ /*  50 */ "signed ::= plus_num",
+ /*  51 */ "signed ::= minus_num",
+ /*  52 */ "carglist ::= carglist ccons",
+ /*  53 */ "carglist ::=",
+ /*  54 */ "ccons ::= CONSTRAINT nm",
+ /*  55 */ "ccons ::= DEFAULT term",
+ /*  56 */ "ccons ::= DEFAULT LP expr RP",
+ /*  57 */ "ccons ::= DEFAULT PLUS term",
+ /*  58 */ "ccons ::= DEFAULT MINUS term",
+ /*  59 */ "ccons ::= DEFAULT ID|INDEXED",
+ /*  60 */ "ccons ::= NULL onconf",
+ /*  61 */ "ccons ::= NOT NULL onconf",
+ /*  62 */ "ccons ::= PRIMARY KEY sortorder onconf autoinc",
+ /*  63 */ "ccons ::= UNIQUE onconf",
+ /*  64 */ "ccons ::= CHECK LP expr RP",
+ /*  65 */ "ccons ::= REFERENCES nm idxlist_opt refargs",
+ /*  66 */ "ccons ::= defer_subclause",
+ /*  67 */ "ccons ::= COLLATE ID|STRING",
+ /*  68 */ "autoinc ::=",
+ /*  69 */ "autoinc ::= AUTOINCR",
+ /*  70 */ "refargs ::=",
+ /*  71 */ "refargs ::= refargs refarg",
+ /*  72 */ "refarg ::= MATCH nm",
+ /*  73 */ "refarg ::= ON INSERT refact",
+ /*  74 */ "refarg ::= ON DELETE refact",
+ /*  75 */ "refarg ::= ON UPDATE refact",
+ /*  76 */ "refact ::= SET NULL",
+ /*  77 */ "refact ::= SET DEFAULT",
+ /*  78 */ "refact ::= CASCADE",
+ /*  79 */ "refact ::= RESTRICT",
+ /*  80 */ "refact ::= NO ACTION",
+ /*  81 */ "defer_subclause ::= NOT DEFERRABLE init_deferred_pred_opt",
+ /*  82 */ "defer_subclause ::= DEFERRABLE init_deferred_pred_opt",
+ /*  83 */ "init_deferred_pred_opt ::=",
+ /*  84 */ "init_deferred_pred_opt ::= INITIALLY DEFERRED",
+ /*  85 */ "init_deferred_pred_opt ::= INITIALLY IMMEDIATE",
+ /*  86 */ "conslist_opt ::=",
+ /*  87 */ "conslist_opt ::= COMMA conslist",
+ /*  88 */ "conslist ::= conslist tconscomma tcons",
+ /*  89 */ "conslist ::= tcons",
+ /*  90 */ "tconscomma ::= COMMA",
+ /*  91 */ "tconscomma ::=",
+ /*  92 */ "tcons ::= CONSTRAINT nm",
+ /*  93 */ "tcons ::= PRIMARY KEY LP idxlist autoinc RP onconf",
+ /*  94 */ "tcons ::= UNIQUE LP idxlist RP onconf",
+ /*  95 */ "tcons ::= CHECK LP expr RP onconf",
+ /*  96 */ "tcons ::= FOREIGN KEY LP idxlist RP REFERENCES nm idxlist_opt refargs defer_subclause_opt",
+ /*  97 */ "defer_subclause_opt ::=",
+ /*  98 */ "defer_subclause_opt ::= defer_subclause",
+ /*  99 */ "onconf ::=",
+ /* 100 */ "onconf ::= ON CONFLICT resolvetype",
+ /* 101 */ "orconf ::=",
+ /* 102 */ "orconf ::= OR resolvetype",
+ /* 103 */ "resolvetype ::= raisetype",
+ /* 104 */ "resolvetype ::= IGNORE",
+ /* 105 */ "resolvetype ::= REPLACE",
+ /* 106 */ "cmd ::= DROP TABLE ifexists fullname",
+ /* 107 */ "ifexists ::= IF EXISTS",
+ /* 108 */ "ifexists ::=",
+ /* 109 */ "cmd ::= createkw temp VIEW ifnotexists nm dbnm AS select",
+ /* 110 */ "cmd ::= DROP VIEW ifexists fullname",
+ /* 111 */ "cmd ::= select",
+ /* 112 */ "select ::= with selectnowith",
+ /* 113 */ "selectnowith ::= oneselect",
+ /* 114 */ "selectnowith ::= selectnowith multiselect_op oneselect",
+ /* 115 */ "multiselect_op ::= UNION",
+ /* 116 */ "multiselect_op ::= UNION ALL",
+ /* 117 */ "multiselect_op ::= EXCEPT|INTERSECT",
+ /* 118 */ "oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt orderby_opt limit_opt",
+ /* 119 */ "oneselect ::= values",
+ /* 120 */ "values ::= VALUES LP nexprlist RP",
+ /* 121 */ "values ::= values COMMA LP exprlist RP",
+ /* 122 */ "distinct ::= DISTINCT",
+ /* 123 */ "distinct ::= ALL",
+ /* 124 */ "distinct ::=",
+ /* 125 */ "sclp ::= selcollist COMMA",
+ /* 126 */ "sclp ::=",
+ /* 127 */ "selcollist ::= sclp expr as",
+ /* 128 */ "selcollist ::= sclp STAR",
+ /* 129 */ "selcollist ::= sclp nm DOT STAR",
+ /* 130 */ "as ::= AS nm",
+ /* 131 */ "as ::= ID|STRING",
+ /* 132 */ "as ::=",
+ /* 133 */ "from ::=",
+ /* 134 */ "from ::= FROM seltablist",
+ /* 135 */ "stl_prefix ::= seltablist joinop",
+ /* 136 */ "stl_prefix ::=",
+ /* 137 */ "seltablist ::= stl_prefix nm dbnm as indexed_opt on_opt using_opt",
+ /* 138 */ "seltablist ::= stl_prefix LP select RP as on_opt using_opt",
+ /* 139 */ "seltablist ::= stl_prefix LP seltablist RP as on_opt using_opt",
+ /* 140 */ "dbnm ::=",
+ /* 141 */ "dbnm ::= DOT nm",
+ /* 142 */ "fullname ::= nm dbnm",
+ /* 143 */ "joinop ::= COMMA|JOIN",
+ /* 144 */ "joinop ::= JOIN_KW JOIN",
+ /* 145 */ "joinop ::= JOIN_KW nm JOIN",
+ /* 146 */ "joinop ::= JOIN_KW nm nm JOIN",
+ /* 147 */ "on_opt ::= ON expr",
+ /* 148 */ "on_opt ::=",
+ /* 149 */ "indexed_opt ::=",
+ /* 150 */ "indexed_opt ::= INDEXED BY nm",
+ /* 151 */ "indexed_opt ::= NOT INDEXED",
+ /* 152 */ "using_opt ::= USING LP idlist RP",
+ /* 153 */ "using_opt ::=",
+ /* 154 */ "orderby_opt ::=",
+ /* 155 */ "orderby_opt ::= ORDER BY sortlist",
+ /* 156 */ "sortlist ::= sortlist COMMA expr sortorder",
+ /* 157 */ "sortlist ::= expr sortorder",
+ /* 158 */ "sortorder ::= ASC",
+ /* 159 */ "sortorder ::= DESC",
+ /* 160 */ "sortorder ::=",
+ /* 161 */ "groupby_opt ::=",
+ /* 162 */ "groupby_opt ::= GROUP BY nexprlist",
+ /* 163 */ "having_opt ::=",
+ /* 164 */ "having_opt ::= HAVING expr",
+ /* 165 */ "limit_opt ::=",
+ /* 166 */ "limit_opt ::= LIMIT expr",
+ /* 167 */ "limit_opt ::= LIMIT expr OFFSET expr",
+ /* 168 */ "limit_opt ::= LIMIT expr COMMA expr",
+ /* 169 */ "cmd ::= with DELETE FROM fullname indexed_opt where_opt",
+ /* 170 */ "where_opt ::=",
+ /* 171 */ "where_opt ::= WHERE expr",
+ /* 172 */ "cmd ::= with UPDATE orconf fullname indexed_opt SET setlist where_opt",
+ /* 173 */ "setlist ::= setlist COMMA nm EQ expr",
+ /* 174 */ "setlist ::= nm EQ expr",
+ /* 175 */ "cmd ::= with insert_cmd INTO fullname inscollist_opt select",
+ /* 176 */ "cmd ::= with insert_cmd INTO fullname inscollist_opt DEFAULT VALUES",
+ /* 177 */ "insert_cmd ::= INSERT orconf",
+ /* 178 */ "insert_cmd ::= REPLACE",
+ /* 179 */ "inscollist_opt ::=",
+ /* 180 */ "inscollist_opt ::= LP idlist RP",
+ /* 181 */ "idlist ::= idlist COMMA nm",
+ /* 182 */ "idlist ::= nm",
+ /* 183 */ "expr ::= term",
+ /* 184 */ "expr ::= LP expr RP",
+ /* 185 */ "term ::= NULL",
+ /* 186 */ "expr ::= ID|INDEXED",
+ /* 187 */ "expr ::= JOIN_KW",
+ /* 188 */ "expr ::= nm DOT nm",
+ /* 189 */ "expr ::= nm DOT nm DOT nm",
+ /* 190 */ "term ::= INTEGER|FLOAT|BLOB",
+ /* 191 */ "term ::= STRING",
+ /* 192 */ "expr ::= VARIABLE",
+ /* 193 */ "expr ::= expr COLLATE ID|STRING",
+ /* 194 */ "expr ::= CAST LP expr AS typetoken RP",
+ /* 195 */ "expr ::= ID|INDEXED LP distinct exprlist RP",
+ /* 196 */ "expr ::= ID|INDEXED LP STAR RP",
+ /* 197 */ "term ::= CTIME_KW",
+ /* 198 */ "expr ::= expr AND expr",
+ /* 199 */ "expr ::= expr OR expr",
+ /* 200 */ "expr ::= expr LT|GT|GE|LE expr",
+ /* 201 */ "expr ::= expr EQ|NE expr",
+ /* 202 */ "expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr",
+ /* 203 */ "expr ::= expr PLUS|MINUS expr",
+ /* 204 */ "expr ::= expr STAR|SLASH|REM expr",
+ /* 205 */ "expr ::= expr CONCAT expr",
+ /* 206 */ "likeop ::= LIKE_KW|MATCH",
+ /* 207 */ "likeop ::= NOT LIKE_KW|MATCH",
+ /* 208 */ "expr ::= expr likeop expr",
+ /* 209 */ "expr ::= expr likeop expr ESCAPE expr",
+ /* 210 */ "expr ::= expr ISNULL|NOTNULL",
+ /* 211 */ "expr ::= expr NOT NULL",
+ /* 212 */ "expr ::= expr IS expr",
+ /* 213 */ "expr ::= expr IS NOT expr",
+ /* 214 */ "expr ::= NOT expr",
+ /* 215 */ "expr ::= BITNOT expr",
+ /* 216 */ "expr ::= MINUS expr",
+ /* 217 */ "expr ::= PLUS expr",
+ /* 218 */ "between_op ::= BETWEEN",
+ /* 219 */ "between_op ::= NOT BETWEEN",
+ /* 220 */ "expr ::= expr between_op expr AND expr",
+ /* 221 */ "in_op ::= IN",
+ /* 222 */ "in_op ::= NOT IN",
+ /* 223 */ "expr ::= expr in_op LP exprlist RP",
+ /* 224 */ "expr ::= LP select RP",
+ /* 225 */ "expr ::= expr in_op LP select RP",
+ /* 226 */ "expr ::= expr in_op nm dbnm",
+ /* 227 */ "expr ::= EXISTS LP select RP",
+ /* 228 */ "expr ::= CASE case_operand case_exprlist case_else END",
+ /* 229 */ "case_exprlist ::= case_exprlist WHEN expr THEN expr",
+ /* 230 */ "case_exprlist ::= WHEN expr THEN expr",
+ /* 231 */ "case_else ::= ELSE expr",
+ /* 232 */ "case_else ::=",
+ /* 233 */ "case_operand ::= expr",
+ /* 234 */ "case_operand ::=",
+ /* 235 */ "exprlist ::= nexprlist",
+ /* 236 */ "exprlist ::=",
+ /* 237 */ "nexprlist ::= nexprlist COMMA expr",
+ /* 238 */ "nexprlist ::= expr",
+ /* 239 */ "cmd ::= createkw uniqueflag INDEX ifnotexists nm dbnm ON nm LP idxlist RP where_opt",
+ /* 240 */ "uniqueflag ::= UNIQUE",
+ /* 241 */ "uniqueflag ::=",
+ /* 242 */ "idxlist_opt ::=",
+ /* 243 */ "idxlist_opt ::= LP idxlist RP",
+ /* 244 */ "idxlist ::= idxlist COMMA nm collate sortorder",
+ /* 245 */ "idxlist ::= nm collate sortorder",
+ /* 246 */ "collate ::=",
+ /* 247 */ "collate ::= COLLATE ID|STRING",
+ /* 248 */ "cmd ::= DROP INDEX ifexists fullname",
+ /* 249 */ "cmd ::= VACUUM",
+ /* 250 */ "cmd ::= VACUUM nm",
+ /* 251 */ "cmd ::= PRAGMA nm dbnm",
+ /* 252 */ "cmd ::= PRAGMA nm dbnm EQ nmnum",
+ /* 253 */ "cmd ::= PRAGMA nm dbnm LP nmnum RP",
+ /* 254 */ "cmd ::= PRAGMA nm dbnm EQ minus_num",
+ /* 255 */ "cmd ::= PRAGMA nm dbnm LP minus_num RP",
+ /* 256 */ "nmnum ::= plus_num",
+ /* 257 */ "nmnum ::= nm",
+ /* 258 */ "nmnum ::= ON",
+ /* 259 */ "nmnum ::= DELETE",
+ /* 260 */ "nmnum ::= DEFAULT",
+ /* 261 */ "plus_num ::= PLUS INTEGER|FLOAT",
+ /* 262 */ "plus_num ::= INTEGER|FLOAT",
+ /* 263 */ "minus_num ::= MINUS INTEGER|FLOAT",
+ /* 264 */ "cmd ::= createkw trigger_decl BEGIN trigger_cmd_list END",
+ /* 265 */ "trigger_decl ::= temp TRIGGER ifnotexists nm dbnm trigger_time trigger_event ON fullname foreach_clause when_clause",
+ /* 266 */ "trigger_time ::= BEFORE",
+ /* 267 */ "trigger_time ::= AFTER",
+ /* 268 */ "trigger_time ::= INSTEAD OF",
+ /* 269 */ "trigger_time ::=",
+ /* 270 */ "trigger_event ::= DELETE|INSERT",
+ /* 271 */ "trigger_event ::= UPDATE",
+ /* 272 */ "trigger_event ::= UPDATE OF idlist",
+ /* 273 */ "foreach_clause ::=",
+ /* 274 */ "foreach_clause ::= FOR EACH ROW",
+ /* 275 */ "when_clause ::=",
+ /* 276 */ "when_clause ::= WHEN expr",
+ /* 277 */ "trigger_cmd_list ::= trigger_cmd_list trigger_cmd SEMI",
+ /* 278 */ "trigger_cmd_list ::= trigger_cmd SEMI",
+ /* 279 */ "trnm ::= nm",
+ /* 280 */ "trnm ::= nm DOT nm",
+ /* 281 */ "tridxby ::=",
+ /* 282 */ "tridxby ::= INDEXED BY nm",
+ /* 283 */ "tridxby ::= NOT INDEXED",
+ /* 284 */ "trigger_cmd ::= UPDATE orconf trnm tridxby SET setlist where_opt",
+ /* 285 */ "trigger_cmd ::= insert_cmd INTO trnm inscollist_opt select",
+ /* 286 */ "trigger_cmd ::= DELETE FROM trnm tridxby where_opt",
+ /* 287 */ "trigger_cmd ::= select",
+ /* 288 */ "expr ::= RAISE LP IGNORE RP",
+ /* 289 */ "expr ::= RAISE LP raisetype COMMA nm RP",
+ /* 290 */ "raisetype ::= ROLLBACK",
+ /* 291 */ "raisetype ::= ABORT",
+ /* 292 */ "raisetype ::= FAIL",
+ /* 293 */ "cmd ::= DROP TRIGGER ifexists fullname",
+ /* 294 */ "cmd ::= ATTACH database_kw_opt expr AS expr key_opt",
+ /* 295 */ "cmd ::= DETACH database_kw_opt expr",
+ /* 296 */ "key_opt ::=",
+ /* 297 */ "key_opt ::= KEY expr",
+ /* 298 */ "database_kw_opt ::= DATABASE",
+ /* 299 */ "database_kw_opt ::=",
+ /* 300 */ "cmd ::= REINDEX",
+ /* 301 */ "cmd ::= REINDEX nm dbnm",
+ /* 302 */ "cmd ::= ANALYZE",
+ /* 303 */ "cmd ::= ANALYZE nm dbnm",
+ /* 304 */ "cmd ::= ALTER TABLE fullname RENAME TO nm",
+ /* 305 */ "cmd ::= ALTER TABLE add_column_fullname ADD kwcolumn_opt column",
+ /* 306 */ "add_column_fullname ::= fullname",
+ /* 307 */ "kwcolumn_opt ::=",
+ /* 308 */ "kwcolumn_opt ::= COLUMNKW",
+ /* 309 */ "cmd ::= create_vtab",
+ /* 310 */ "cmd ::= create_vtab LP vtabarglist RP",
+ /* 311 */ "create_vtab ::= createkw VIRTUAL TABLE ifnotexists nm dbnm USING nm",
+ /* 312 */ "vtabarglist ::= vtabarg",
+ /* 313 */ "vtabarglist ::= vtabarglist COMMA vtabarg",
+ /* 314 */ "vtabarg ::=",
+ /* 315 */ "vtabarg ::= vtabarg vtabargtoken",
+ /* 316 */ "vtabargtoken ::= ANY",
+ /* 317 */ "vtabargtoken ::= lp anylist RP",
+ /* 318 */ "lp ::= LP",
+ /* 319 */ "anylist ::=",
+ /* 320 */ "anylist ::= anylist LP anylist RP",
+ /* 321 */ "anylist ::= anylist ANY",
+ /* 322 */ "with ::=",
+ /* 323 */ "with ::= WITH wqlist",
+ /* 324 */ "with ::= WITH RECURSIVE wqlist",
+ /* 325 */ "wqlist ::= nm idxlist_opt AS LP select RP",
+ /* 326 */ "wqlist ::= wqlist COMMA nm idxlist_opt AS LP select RP",
+};
+#endif /* NDEBUG */
+
+
+#if YYSTACKDEPTH<=0
+/*
+** Try to increase the size of the parser stack.
+*/
+static void yyGrowStack(yyParser *p){
+  int newSize;
+  yyStackEntry *pNew;
+
+  newSize = p->yystksz*2 + 100;
+  pNew = realloc(p->yystack, newSize*sizeof(pNew[0]));
+  if( pNew ){
+    p->yystack = pNew;
+    p->yystksz = newSize;
+#ifndef NDEBUG
+    if( yyTraceFILE ){
+      fprintf(yyTraceFILE,"%sStack grows to %d entries!\n",
+              yyTracePrompt, p->yystksz);
+    }
+#endif
+  }
+}
+#endif
+
+/* 
+** This function allocates a new parser.
+** The only argument is a pointer to a function which works like
+** malloc.
+**
+** Inputs:
+** A pointer to the function used to allocate memory.
+**
+** Outputs:
+** A pointer to a parser.  This pointer is used in subsequent calls
+** to sqlite3Parser and sqlite3ParserFree.
+*/
+SQLITE_PRIVATE void *sqlite3ParserAlloc(void *(*mallocProc)(u64)){
+  yyParser *pParser;
+  pParser = (yyParser*)(*mallocProc)( (u64)sizeof(yyParser) );
+  if( pParser ){
+    pParser->yyidx = -1;
+#ifdef YYTRACKMAXSTACKDEPTH
+    pParser->yyidxMax = 0;
+#endif
+#if YYSTACKDEPTH<=0
+    pParser->yystack = NULL;
+    pParser->yystksz = 0;
+    yyGrowStack(pParser);
+#endif
+  }
+  return pParser;
+}
+
+/* The following function deletes the value associated with a
+** symbol.  The symbol can be either a terminal or nonterminal.
+** "yymajor" is the symbol code, and "yypminor" is a pointer to
+** the value.
+*/
+static void yy_destructor(
+  yyParser *yypParser,    /* The parser */
+  YYCODETYPE yymajor,     /* Type code for object to destroy */
+  YYMINORTYPE *yypminor   /* The object to be destroyed */
+){
+  sqlite3ParserARG_FETCH;
+  switch( yymajor ){
+    /* Here is inserted the actions which take place when a
+    ** terminal or non-terminal is destroyed.  This can happen
+    ** when the symbol is popped from the stack during a
+    ** reduce or during error processing or when a parser is 
+    ** being destroyed before it is finished parsing.
+    **
+    ** Note: during a reduce, the only symbols destroyed are those
+    ** which appear on the RHS of the rule, but which are not used
+    ** inside the C code.
+    */
+    case 163: /* select */
+    case 195: /* selectnowith */
+    case 196: /* oneselect */
+    case 207: /* values */
+{
+sqlite3SelectDelete(pParse->db, (yypminor->yy3));
+}
+      break;
+    case 174: /* term */
+    case 175: /* expr */
+{
+sqlite3ExprDelete(pParse->db, (yypminor->yy346).pExpr);
+}
+      break;
+    case 179: /* idxlist_opt */
+    case 188: /* idxlist */
+    case 200: /* selcollist */
+    case 203: /* groupby_opt */
+    case 205: /* orderby_opt */
+    case 208: /* nexprlist */
+    case 209: /* exprlist */
+    case 210: /* sclp */
+    case 220: /* sortlist */
+    case 221: /* setlist */
+    case 228: /* case_exprlist */
+{
+sqlite3ExprListDelete(pParse->db, (yypminor->yy14));
+}
+      break;
+    case 194: /* fullname */
+    case 201: /* from */
+    case 212: /* seltablist */
+    case 213: /* stl_prefix */
+{
+sqlite3SrcListDelete(pParse->db, (yypminor->yy65));
+}
+      break;
+    case 197: /* with */
+    case 252: /* wqlist */
+{
+sqlite3WithDelete(pParse->db, (yypminor->yy59));
+}
+      break;
+    case 202: /* where_opt */
+    case 204: /* having_opt */
+    case 216: /* on_opt */
+    case 227: /* case_operand */
+    case 229: /* case_else */
+    case 238: /* when_clause */
+    case 243: /* key_opt */
+{
+sqlite3ExprDelete(pParse->db, (yypminor->yy132));
+}
+      break;
+    case 217: /* using_opt */
+    case 219: /* idlist */
+    case 223: /* inscollist_opt */
+{
+sqlite3IdListDelete(pParse->db, (yypminor->yy408));
+}
+      break;
+    case 234: /* trigger_cmd_list */
+    case 239: /* trigger_cmd */
+{
+sqlite3DeleteTriggerStep(pParse->db, (yypminor->yy473));
+}
+      break;
+    case 236: /* trigger_event */
+{
+sqlite3IdListDelete(pParse->db, (yypminor->yy378).b);
+}
+      break;
+    default:  break;   /* If no destructor action specified: do nothing */
+  }
+}
+
+/*
+** Pop the parser's stack once.
+**
+** If there is a destructor routine associated with the token which
+** is popped from the stack, then call it.
+**
+** Return the major token number for the symbol popped.
+*/
+static int yy_pop_parser_stack(yyParser *pParser){
+  YYCODETYPE yymajor;
+  yyStackEntry *yytos = &pParser->yystack[pParser->yyidx];
+
+  /* There is no mechanism by which the parser stack can be popped below
+  ** empty in SQLite.  */
+  if( NEVER(pParser->yyidx<0) ) return 0;
+#ifndef NDEBUG
+  if( yyTraceFILE && pParser->yyidx>=0 ){
+    fprintf(yyTraceFILE,"%sPopping %s\n",
+      yyTracePrompt,
+      yyTokenName[yytos->major]);
+  }
+#endif
+  yymajor = yytos->major;
+  yy_destructor(pParser, yymajor, &yytos->minor);
+  pParser->yyidx--;
+  return yymajor;
+}
+
+/* 
+** Deallocate and destroy a parser.  Destructors are all called for
+** all stack elements before shutting the parser down.
+**
+** Inputs:
+** <ul>
+** <li>  A pointer to the parser.  This should be a pointer
+**       obtained from sqlite3ParserAlloc.
+** <li>  A pointer to a function used to reclaim memory obtained
+**       from malloc.
+** </ul>
+*/
+SQLITE_PRIVATE void sqlite3ParserFree(
+  void *p,                    /* The parser to be deleted */
+  void (*freeProc)(void*)     /* Function used to reclaim memory */
+){
+  yyParser *pParser = (yyParser*)p;
+  /* In SQLite, we never try to destroy a parser that was not successfully
+  ** created in the first place. */
+  if( NEVER(pParser==0) ) return;
+  while( pParser->yyidx>=0 ) yy_pop_parser_stack(pParser);
+#if YYSTACKDEPTH<=0
+  free(pParser->yystack);
+#endif
+  (*freeProc)((void*)pParser);
+}
+
+/*
+** Return the peak depth of the stack for a parser.
+*/
+#ifdef YYTRACKMAXSTACKDEPTH
+SQLITE_PRIVATE int sqlite3ParserStackPeak(void *p){
+  yyParser *pParser = (yyParser*)p;
+  return pParser->yyidxMax;
+}
+#endif
+
+/*
+** Find the appropriate action for a parser given the terminal
+** look-ahead token iLookAhead.
+**
+** If the look-ahead token is YYNOCODE, then check to see if the action is
+** independent of the look-ahead.  If it is, return the action, otherwise
+** return YY_NO_ACTION.
+*/
+static int yy_find_shift_action(
+  yyParser *pParser,        /* The parser */
+  YYCODETYPE iLookAhead     /* The look-ahead token */
+){
+  int i;
+  int stateno = pParser->yystack[pParser->yyidx].stateno;
+ 
+  if( stateno>YY_SHIFT_COUNT
+   || (i = yy_shift_ofst[stateno])==YY_SHIFT_USE_DFLT ){
+    return yy_default[stateno];
+  }
+  assert( iLookAhead!=YYNOCODE );
+  i += iLookAhead;
+  if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
+    if( iLookAhead>0 ){
+#ifdef YYFALLBACK
+      YYCODETYPE iFallback;            /* Fallback token */
+      if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0])
+             && (iFallback = yyFallback[iLookAhead])!=0 ){
+#ifndef NDEBUG
+        if( yyTraceFILE ){
+          fprintf(yyTraceFILE, "%sFALLBACK %s => %s\n",
+             yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]);
+        }
+#endif
+        return yy_find_shift_action(pParser, iFallback);
+      }
+#endif
+#ifdef YYWILDCARD
+      {
+        int j = i - iLookAhead + YYWILDCARD;
+        if( 
+#if YY_SHIFT_MIN+YYWILDCARD<0
+          j>=0 &&
+#endif
+#if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT
+          j<YY_ACTTAB_COUNT &&
+#endif
+          yy_lookahead[j]==YYWILDCARD
+        ){
+#ifndef NDEBUG
+          if( yyTraceFILE ){
+            fprintf(yyTraceFILE, "%sWILDCARD %s => %s\n",
+               yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[YYWILDCARD]);
+          }
+#endif /* NDEBUG */
+          return yy_action[j];
+        }
+      }
+#endif /* YYWILDCARD */
+    }
+    return yy_default[stateno];
+  }else{
+    return yy_action[i];
+  }
+}
+
+/*
+** Find the appropriate action for a parser given the non-terminal
+** look-ahead token iLookAhead.
+**
+** If the look-ahead token is YYNOCODE, then check to see if the action is
+** independent of the look-ahead.  If it is, return the action, otherwise
+** return YY_NO_ACTION.
+*/
+static int yy_find_reduce_action(
+  int stateno,              /* Current state number */
+  YYCODETYPE iLookAhead     /* The look-ahead token */
+){
+  int i;
+#ifdef YYERRORSYMBOL
+  if( stateno>YY_REDUCE_COUNT ){
+    return yy_default[stateno];
+  }
+#else
+  assert( stateno<=YY_REDUCE_COUNT );
+#endif
+  i = yy_reduce_ofst[stateno];
+  assert( i!=YY_REDUCE_USE_DFLT );
+  assert( iLookAhead!=YYNOCODE );
+  i += iLookAhead;
+#ifdef YYERRORSYMBOL
+  if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
+    return yy_default[stateno];
+  }
+#else
+  assert( i>=0 && i<YY_ACTTAB_COUNT );
+  assert( yy_lookahead[i]==iLookAhead );
+#endif
+  return yy_action[i];
+}
+
+/*
+** The following routine is called if the stack overflows.
+*/
+static void yyStackOverflow(yyParser *yypParser, YYMINORTYPE *yypMinor){
+   sqlite3ParserARG_FETCH;
+   yypParser->yyidx--;
+#ifndef NDEBUG
+   if( yyTraceFILE ){
+     fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt);
+   }
+#endif
+   while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser);
+   /* Here code is inserted which will execute if the parser
+   ** stack every overflows */
+
+  UNUSED_PARAMETER(yypMinor); /* Silence some compiler warnings */
+  sqlite3ErrorMsg(pParse, "parser stack overflow");
+   sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument var */
+}
+
+/*
+** Perform a shift action.
+*/
+static void yy_shift(
+  yyParser *yypParser,          /* The parser to be shifted */
+  int yyNewState,               /* The new state to shift in */
+  int yyMajor,                  /* The major token to shift in */
+  YYMINORTYPE *yypMinor         /* Pointer to the minor token to shift in */
+){
+  yyStackEntry *yytos;
+  yypParser->yyidx++;
+#ifdef YYTRACKMAXSTACKDEPTH
+  if( yypParser->yyidx>yypParser->yyidxMax ){
+    yypParser->yyidxMax = yypParser->yyidx;
+  }
+#endif
+#if YYSTACKDEPTH>0 
+  if( yypParser->yyidx>=YYSTACKDEPTH ){
+    yyStackOverflow(yypParser, yypMinor);
+    return;
+  }
+#else
+  if( yypParser->yyidx>=yypParser->yystksz ){
+    yyGrowStack(yypParser);
+    if( yypParser->yyidx>=yypParser->yystksz ){
+      yyStackOverflow(yypParser, yypMinor);
+      return;
+    }
+  }
+#endif
+  yytos = &yypParser->yystack[yypParser->yyidx];
+  yytos->stateno = (YYACTIONTYPE)yyNewState;
+  yytos->major = (YYCODETYPE)yyMajor;
+  yytos->minor = *yypMinor;
+#ifndef NDEBUG
+  if( yyTraceFILE && yypParser->yyidx>0 ){
+    int i;
+    fprintf(yyTraceFILE,"%sShift %d\n",yyTracePrompt,yyNewState);
+    fprintf(yyTraceFILE,"%sStack:",yyTracePrompt);
+    for(i=1; i<=yypParser->yyidx; i++)
+      fprintf(yyTraceFILE," %s",yyTokenName[yypParser->yystack[i].major]);
+    fprintf(yyTraceFILE,"\n");
+  }
+#endif
+}
+
+/* The following table contains information about every rule that
+** is used during the reduce.
+*/
+static const struct {
+  YYCODETYPE lhs;         /* Symbol on the left-hand side of the rule */
+  unsigned char nrhs;     /* Number of right-hand side symbols in the rule */
+} yyRuleInfo[] = {
+  { 144, 1 },
+  { 145, 2 },
+  { 145, 1 },
+  { 146, 1 },
+  { 146, 3 },
+  { 147, 0 },
+  { 147, 1 },
+  { 147, 3 },
+  { 148, 1 },
+  { 149, 3 },
+  { 151, 0 },
+  { 151, 1 },
+  { 151, 2 },
+  { 150, 0 },
+  { 150, 1 },
+  { 150, 1 },
+  { 150, 1 },
+  { 149, 2 },
+  { 149, 2 },
+  { 149, 2 },
+  { 153, 1 },
+  { 153, 0 },
+  { 149, 2 },
+  { 149, 3 },
+  { 149, 5 },
+  { 149, 2 },
+  { 154, 6 },
+  { 156, 1 },
+  { 158, 0 },
+  { 158, 3 },
+  { 157, 1 },
+  { 157, 0 },
+  { 155, 5 },
+  { 155, 2 },
+  { 162, 0 },
+  { 162, 2 },
+  { 160, 3 },
+  { 160, 1 },
+  { 164, 3 },
+  { 165, 1 },
+  { 152, 1 },
+  { 152, 1 },
+  { 152, 1 },
+  { 166, 0 },
+  { 166, 1 },
+  { 168, 1 },
+  { 168, 4 },
+  { 168, 6 },
+  { 169, 1 },
+  { 169, 2 },
+  { 170, 1 },
+  { 170, 1 },
+  { 167, 2 },
+  { 167, 0 },
+  { 173, 2 },
+  { 173, 2 },
+  { 173, 4 },
+  { 173, 3 },
+  { 173, 3 },
+  { 173, 2 },
+  { 173, 2 },
+  { 173, 3 },
+  { 173, 5 },
+  { 173, 2 },
+  { 173, 4 },
+  { 173, 4 },
+  { 173, 1 },
+  { 173, 2 },
+  { 178, 0 },
+  { 178, 1 },
+  { 180, 0 },
+  { 180, 2 },
+  { 182, 2 },
+  { 182, 3 },
+  { 182, 3 },
+  { 182, 3 },
+  { 183, 2 },
+  { 183, 2 },
+  { 183, 1 },
+  { 183, 1 },
+  { 183, 2 },
+  { 181, 3 },
+  { 181, 2 },
+  { 184, 0 },
+  { 184, 2 },
+  { 184, 2 },
+  { 161, 0 },
+  { 161, 2 },
+  { 185, 3 },
+  { 185, 1 },
+  { 186, 1 },
+  { 186, 0 },
+  { 187, 2 },
+  { 187, 7 },
+  { 187, 5 },
+  { 187, 5 },
+  { 187, 10 },
+  { 189, 0 },
+  { 189, 1 },
+  { 176, 0 },
+  { 176, 3 },
+  { 190, 0 },
+  { 190, 2 },
+  { 191, 1 },
+  { 191, 1 },
+  { 191, 1 },
+  { 149, 4 },
+  { 193, 2 },
+  { 193, 0 },
+  { 149, 8 },
+  { 149, 4 },
+  { 149, 1 },
+  { 163, 2 },
+  { 195, 1 },
+  { 195, 3 },
+  { 198, 1 },
+  { 198, 2 },
+  { 198, 1 },
+  { 196, 9 },
+  { 196, 1 },
+  { 207, 4 },
+  { 207, 5 },
+  { 199, 1 },
+  { 199, 1 },
+  { 199, 0 },
+  { 210, 2 },
+  { 210, 0 },
+  { 200, 3 },
+  { 200, 2 },
+  { 200, 4 },
+  { 211, 2 },
+  { 211, 1 },
+  { 211, 0 },
+  { 201, 0 },
+  { 201, 2 },
+  { 213, 2 },
+  { 213, 0 },
+  { 212, 7 },
+  { 212, 7 },
+  { 212, 7 },
+  { 159, 0 },
+  { 159, 2 },
+  { 194, 2 },
+  { 214, 1 },
+  { 214, 2 },
+  { 214, 3 },
+  { 214, 4 },
+  { 216, 2 },
+  { 216, 0 },
+  { 215, 0 },
+  { 215, 3 },
+  { 215, 2 },
+  { 217, 4 },
+  { 217, 0 },
+  { 205, 0 },
+  { 205, 3 },
+  { 220, 4 },
+  { 220, 2 },
+  { 177, 1 },
+  { 177, 1 },
+  { 177, 0 },
+  { 203, 0 },
+  { 203, 3 },
+  { 204, 0 },
+  { 204, 2 },
+  { 206, 0 },
+  { 206, 2 },
+  { 206, 4 },
+  { 206, 4 },
+  { 149, 6 },
+  { 202, 0 },
+  { 202, 2 },
+  { 149, 8 },
+  { 221, 5 },
+  { 221, 3 },
+  { 149, 6 },
+  { 149, 7 },
+  { 222, 2 },
+  { 222, 1 },
+  { 223, 0 },
+  { 223, 3 },
+  { 219, 3 },
+  { 219, 1 },
+  { 175, 1 },
+  { 175, 3 },
+  { 174, 1 },
+  { 175, 1 },
+  { 175, 1 },
+  { 175, 3 },
+  { 175, 5 },
+  { 174, 1 },
+  { 174, 1 },
+  { 175, 1 },
+  { 175, 3 },
+  { 175, 6 },
+  { 175, 5 },
+  { 175, 4 },
+  { 174, 1 },
+  { 175, 3 },
+  { 175, 3 },
+  { 175, 3 },
+  { 175, 3 },
+  { 175, 3 },
+  { 175, 3 },
+  { 175, 3 },
+  { 175, 3 },
+  { 224, 1 },
+  { 224, 2 },
+  { 175, 3 },
+  { 175, 5 },
+  { 175, 2 },
+  { 175, 3 },
+  { 175, 3 },
+  { 175, 4 },
+  { 175, 2 },
+  { 175, 2 },
+  { 175, 2 },
+  { 175, 2 },
+  { 225, 1 },
+  { 225, 2 },
+  { 175, 5 },
+  { 226, 1 },
+  { 226, 2 },
+  { 175, 5 },
+  { 175, 3 },
+  { 175, 5 },
+  { 175, 4 },
+  { 175, 4 },
+  { 175, 5 },
+  { 228, 5 },
+  { 228, 4 },
+  { 229, 2 },
+  { 229, 0 },
+  { 227, 1 },
+  { 227, 0 },
+  { 209, 1 },
+  { 209, 0 },
+  { 208, 3 },
+  { 208, 1 },
+  { 149, 12 },
+  { 230, 1 },
+  { 230, 0 },
+  { 179, 0 },
+  { 179, 3 },
+  { 188, 5 },
+  { 188, 3 },
+  { 231, 0 },
+  { 231, 2 },
+  { 149, 4 },
+  { 149, 1 },
+  { 149, 2 },
+  { 149, 3 },
+  { 149, 5 },
+  { 149, 6 },
+  { 149, 5 },
+  { 149, 6 },
+  { 232, 1 },
+  { 232, 1 },
+  { 232, 1 },
+  { 232, 1 },
+  { 232, 1 },
+  { 171, 2 },
+  { 171, 1 },
+  { 172, 2 },
+  { 149, 5 },
+  { 233, 11 },
+  { 235, 1 },
+  { 235, 1 },
+  { 235, 2 },
+  { 235, 0 },
+  { 236, 1 },
+  { 236, 1 },
+  { 236, 3 },
+  { 237, 0 },
+  { 237, 3 },
+  { 238, 0 },
+  { 238, 2 },
+  { 234, 3 },
+  { 234, 2 },
+  { 240, 1 },
+  { 240, 3 },
+  { 241, 0 },
+  { 241, 3 },
+  { 241, 2 },
+  { 239, 7 },
+  { 239, 5 },
+  { 239, 5 },
+  { 239, 1 },
+  { 175, 4 },
+  { 175, 6 },
+  { 192, 1 },
+  { 192, 1 },
+  { 192, 1 },
+  { 149, 4 },
+  { 149, 6 },
+  { 149, 3 },
+  { 243, 0 },
+  { 243, 2 },
+  { 242, 1 },
+  { 242, 0 },
+  { 149, 1 },
+  { 149, 3 },
+  { 149, 1 },
+  { 149, 3 },
+  { 149, 6 },
+  { 149, 6 },
+  { 244, 1 },
+  { 245, 0 },
+  { 245, 1 },
+  { 149, 1 },
+  { 149, 4 },
+  { 246, 8 },
+  { 247, 1 },
+  { 247, 3 },
+  { 248, 0 },
+  { 248, 2 },
+  { 249, 1 },
+  { 249, 3 },
+  { 250, 1 },
+  { 251, 0 },
+  { 251, 4 },
+  { 251, 2 },
+  { 197, 0 },
+  { 197, 2 },
+  { 197, 3 },
+  { 252, 6 },
+  { 252, 8 },
+};
+
+static void yy_accept(yyParser*);  /* Forward Declaration */
+
+/*
+** Perform a reduce action and the shift that must immediately
+** follow the reduce.
+*/
+static void yy_reduce(
+  yyParser *yypParser,         /* The parser */
+  int yyruleno                 /* Number of the rule by which to reduce */
+){
+  int yygoto;                     /* The next state */
+  int yyact;                      /* The next action */
+  YYMINORTYPE yygotominor;        /* The LHS of the rule reduced */
+  yyStackEntry *yymsp;            /* The top of the parser's stack */
+  int yysize;                     /* Amount to pop the stack */
+  sqlite3ParserARG_FETCH;
+  yymsp = &yypParser->yystack[yypParser->yyidx];
+#ifndef NDEBUG
+  if( yyTraceFILE && yyruleno>=0 
+        && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){
+    fprintf(yyTraceFILE, "%sReduce [%s].\n", yyTracePrompt,
+      yyRuleName[yyruleno]);
+  }
+#endif /* NDEBUG */
+
+  /* Silence complaints from purify about yygotominor being uninitialized
+  ** in some cases when it is copied into the stack after the following
+  ** switch.  yygotominor is uninitialized when a rule reduces that does
+  ** not set the value of its left-hand side nonterminal.  Leaving the
+  ** value of the nonterminal uninitialized is utterly harmless as long
+  ** as the value is never used.  So really the only thing this code
+  ** accomplishes is to quieten purify.  
+  **
+  ** 2007-01-16:  The wireshark project (www.wireshark.org) reports that
+  ** without this code, their parser segfaults.  I'm not sure what there
+  ** parser is doing to make this happen.  This is the second bug report
+  ** from wireshark this week.  Clearly they are stressing Lemon in ways
+  ** that it has not been previously stressed...  (SQLite ticket #2172)
+  */
+  /*memset(&yygotominor, 0, sizeof(yygotominor));*/
+  yygotominor = yyzerominor;
+
+
+  switch( yyruleno ){
+  /* Beginning here are the reduction cases.  A typical example
+  ** follows:
+  **   case 0:
+  **  #line <lineno> <grammarfile>
+  **     { ... }           // User supplied code
+  **  #line <lineno> <thisfile>
+  **     break;
+  */
+      case 5: /* explain ::= */
+{ sqlite3BeginParse(pParse, 0); }
+        break;
+      case 6: /* explain ::= EXPLAIN */
+{ sqlite3BeginParse(pParse, 1); }
+        break;
+      case 7: /* explain ::= EXPLAIN QUERY PLAN */
+{ sqlite3BeginParse(pParse, 2); }
+        break;
+      case 8: /* cmdx ::= cmd */
+{ sqlite3FinishCoding(pParse); }
+        break;
+      case 9: /* cmd ::= BEGIN transtype trans_opt */
+{sqlite3BeginTransaction(pParse, yymsp[-1].minor.yy328);}
+        break;
+      case 13: /* transtype ::= */
+{yygotominor.yy328 = TK_DEFERRED;}
+        break;
+      case 14: /* transtype ::= DEFERRED */
+      case 15: /* transtype ::= IMMEDIATE */ yytestcase(yyruleno==15);
+      case 16: /* transtype ::= EXCLUSIVE */ yytestcase(yyruleno==16);
+      case 115: /* multiselect_op ::= UNION */ yytestcase(yyruleno==115);
+      case 117: /* multiselect_op ::= EXCEPT|INTERSECT */ yytestcase(yyruleno==117);
+{yygotominor.yy328 = yymsp[0].major;}
+        break;
+      case 17: /* cmd ::= COMMIT trans_opt */
+      case 18: /* cmd ::= END trans_opt */ yytestcase(yyruleno==18);
+{sqlite3CommitTransaction(pParse);}
+        break;
+      case 19: /* cmd ::= ROLLBACK trans_opt */
+{sqlite3RollbackTransaction(pParse);}
+        break;
+      case 22: /* cmd ::= SAVEPOINT nm */
+{
+  sqlite3Savepoint(pParse, SAVEPOINT_BEGIN, &yymsp[0].minor.yy0);
+}
+        break;
+      case 23: /* cmd ::= RELEASE savepoint_opt nm */
+{
+  sqlite3Savepoint(pParse, SAVEPOINT_RELEASE, &yymsp[0].minor.yy0);
+}
+        break;
+      case 24: /* cmd ::= ROLLBACK trans_opt TO savepoint_opt nm */
+{
+  sqlite3Savepoint(pParse, SAVEPOINT_ROLLBACK, &yymsp[0].minor.yy0);
+}
+        break;
+      case 26: /* create_table ::= createkw temp TABLE ifnotexists nm dbnm */
+{
+   sqlite3StartTable(pParse,&yymsp[-1].minor.yy0,&yymsp[0].minor.yy0,yymsp[-4].minor.yy328,0,0,yymsp[-2].minor.yy328);
+}
+        break;
+      case 27: /* createkw ::= CREATE */
+{
+  pParse->db->lookaside.bEnabled = 0;
+  yygotominor.yy0 = yymsp[0].minor.yy0;
+}
+        break;
+      case 28: /* ifnotexists ::= */
+      case 31: /* temp ::= */ yytestcase(yyruleno==31);
+      case 68: /* autoinc ::= */ yytestcase(yyruleno==68);
+      case 81: /* defer_subclause ::= NOT DEFERRABLE init_deferred_pred_opt */ yytestcase(yyruleno==81);
+      case 83: /* init_deferred_pred_opt ::= */ yytestcase(yyruleno==83);
+      case 85: /* init_deferred_pred_opt ::= INITIALLY IMMEDIATE */ yytestcase(yyruleno==85);
+      case 97: /* defer_subclause_opt ::= */ yytestcase(yyruleno==97);
+      case 108: /* ifexists ::= */ yytestcase(yyruleno==108);
+      case 218: /* between_op ::= BETWEEN */ yytestcase(yyruleno==218);
+      case 221: /* in_op ::= IN */ yytestcase(yyruleno==221);
+{yygotominor.yy328 = 0;}
+        break;
+      case 29: /* ifnotexists ::= IF NOT EXISTS */
+      case 30: /* temp ::= TEMP */ yytestcase(yyruleno==30);
+      case 69: /* autoinc ::= AUTOINCR */ yytestcase(yyruleno==69);
+      case 84: /* init_deferred_pred_opt ::= INITIALLY DEFERRED */ yytestcase(yyruleno==84);
+      case 107: /* ifexists ::= IF EXISTS */ yytestcase(yyruleno==107);
+      case 219: /* between_op ::= NOT BETWEEN */ yytestcase(yyruleno==219);
+      case 222: /* in_op ::= NOT IN */ yytestcase(yyruleno==222);
+{yygotominor.yy328 = 1;}
+        break;
+      case 32: /* create_table_args ::= LP columnlist conslist_opt RP table_options */
+{
+  sqlite3EndTable(pParse,&yymsp[-2].minor.yy0,&yymsp[-1].minor.yy0,yymsp[0].minor.yy186,0);
+}
+        break;
+      case 33: /* create_table_args ::= AS select */
+{
+  sqlite3EndTable(pParse,0,0,0,yymsp[0].minor.yy3);
+  sqlite3SelectDelete(pParse->db, yymsp[0].minor.yy3);
+}
+        break;
+      case 34: /* table_options ::= */
+{yygotominor.yy186 = 0;}
+        break;
+      case 35: /* table_options ::= WITHOUT nm */
+{
+  if( yymsp[0].minor.yy0.n==5 && sqlite3_strnicmp(yymsp[0].minor.yy0.z,"rowid",5)==0 ){
+    yygotominor.yy186 = TF_WithoutRowid;
+  }else{
+    yygotominor.yy186 = 0;
+    sqlite3ErrorMsg(pParse, "unknown table option: %.*s", yymsp[0].minor.yy0.n, yymsp[0].minor.yy0.z);
+  }
+}
+        break;
+      case 38: /* column ::= columnid type carglist */
+{
+  yygotominor.yy0.z = yymsp[-2].minor.yy0.z;
+  yygotominor.yy0.n = (int)(pParse->sLastToken.z-yymsp[-2].minor.yy0.z) + pParse->sLastToken.n;
+}
+        break;
+      case 39: /* columnid ::= nm */
+{
+  sqlite3AddColumn(pParse,&yymsp[0].minor.yy0);
+  yygotominor.yy0 = yymsp[0].minor.yy0;
+  pParse->constraintName.n = 0;
+}
+        break;
+      case 40: /* nm ::= ID|INDEXED */
+      case 41: /* nm ::= STRING */ yytestcase(yyruleno==41);
+      case 42: /* nm ::= JOIN_KW */ yytestcase(yyruleno==42);
+      case 45: /* typetoken ::= typename */ yytestcase(yyruleno==45);
+      case 48: /* typename ::= ID|STRING */ yytestcase(yyruleno==48);
+      case 130: /* as ::= AS nm */ yytestcase(yyruleno==130);
+      case 131: /* as ::= ID|STRING */ yytestcase(yyruleno==131);
+      case 141: /* dbnm ::= DOT nm */ yytestcase(yyruleno==141);
+      case 150: /* indexed_opt ::= INDEXED BY nm */ yytestcase(yyruleno==150);
+      case 247: /* collate ::= COLLATE ID|STRING */ yytestcase(yyruleno==247);
+      case 256: /* nmnum ::= plus_num */ yytestcase(yyruleno==256);
+      case 257: /* nmnum ::= nm */ yytestcase(yyruleno==257);
+      case 258: /* nmnum ::= ON */ yytestcase(yyruleno==258);
+      case 259: /* nmnum ::= DELETE */ yytestcase(yyruleno==259);
+      case 260: /* nmnum ::= DEFAULT */ yytestcase(yyruleno==260);
+      case 261: /* plus_num ::= PLUS INTEGER|FLOAT */ yytestcase(yyruleno==261);
+      case 262: /* plus_num ::= INTEGER|FLOAT */ yytestcase(yyruleno==262);
+      case 263: /* minus_num ::= MINUS INTEGER|FLOAT */ yytestcase(yyruleno==263);
+      case 279: /* trnm ::= nm */ yytestcase(yyruleno==279);
+{yygotominor.yy0 = yymsp[0].minor.yy0;}
+        break;
+      case 44: /* type ::= typetoken */
+{sqlite3AddColumnType(pParse,&yymsp[0].minor.yy0);}
+        break;
+      case 46: /* typetoken ::= typename LP signed RP */
+{
+  yygotominor.yy0.z = yymsp[-3].minor.yy0.z;
+  yygotominor.yy0.n = (int)(&yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n] - yymsp[-3].minor.yy0.z);
+}
+        break;
+      case 47: /* typetoken ::= typename LP signed COMMA signed RP */
+{
+  yygotominor.yy0.z = yymsp[-5].minor.yy0.z;
+  yygotominor.yy0.n = (int)(&yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n] - yymsp[-5].minor.yy0.z);
+}
+        break;
+      case 49: /* typename ::= typename ID|STRING */
+{yygotominor.yy0.z=yymsp[-1].minor.yy0.z; yygotominor.yy0.n=yymsp[0].minor.yy0.n+(int)(yymsp[0].minor.yy0.z-yymsp[-1].minor.yy0.z);}
+        break;
+      case 54: /* ccons ::= CONSTRAINT nm */
+      case 92: /* tcons ::= CONSTRAINT nm */ yytestcase(yyruleno==92);
+{pParse->constraintName = yymsp[0].minor.yy0;}
+        break;
+      case 55: /* ccons ::= DEFAULT term */
+      case 57: /* ccons ::= DEFAULT PLUS term */ yytestcase(yyruleno==57);
+{sqlite3AddDefaultValue(pParse,&yymsp[0].minor.yy346);}
+        break;
+      case 56: /* ccons ::= DEFAULT LP expr RP */
+{sqlite3AddDefaultValue(pParse,&yymsp[-1].minor.yy346);}
+        break;
+      case 58: /* ccons ::= DEFAULT MINUS term */
+{
+  ExprSpan v;
+  v.pExpr = sqlite3PExpr(pParse, TK_UMINUS, yymsp[0].minor.yy346.pExpr, 0, 0);
+  v.zStart = yymsp[-1].minor.yy0.z;
+  v.zEnd = yymsp[0].minor.yy346.zEnd;
+  sqlite3AddDefaultValue(pParse,&v);
+}
+        break;
+      case 59: /* ccons ::= DEFAULT ID|INDEXED */
+{
+  ExprSpan v;
+  spanExpr(&v, pParse, TK_STRING, &yymsp[0].minor.yy0);
+  sqlite3AddDefaultValue(pParse,&v);
+}
+        break;
+      case 61: /* ccons ::= NOT NULL onconf */
+{sqlite3AddNotNull(pParse, yymsp[0].minor.yy328);}
+        break;
+      case 62: /* ccons ::= PRIMARY KEY sortorder onconf autoinc */
+{sqlite3AddPrimaryKey(pParse,0,yymsp[-1].minor.yy328,yymsp[0].minor.yy328,yymsp[-2].minor.yy328);}
+        break;
+      case 63: /* ccons ::= UNIQUE onconf */
+{sqlite3CreateIndex(pParse,0,0,0,0,yymsp[0].minor.yy328,0,0,0,0);}
+        break;
+      case 64: /* ccons ::= CHECK LP expr RP */
+{sqlite3AddCheckConstraint(pParse,yymsp[-1].minor.yy346.pExpr);}
+        break;
+      case 65: /* ccons ::= REFERENCES nm idxlist_opt refargs */
+{sqlite3CreateForeignKey(pParse,0,&yymsp[-2].minor.yy0,yymsp[-1].minor.yy14,yymsp[0].minor.yy328);}
+        break;
+      case 66: /* ccons ::= defer_subclause */
+{sqlite3DeferForeignKey(pParse,yymsp[0].minor.yy328);}
+        break;
+      case 67: /* ccons ::= COLLATE ID|STRING */
+{sqlite3AddCollateType(pParse, &yymsp[0].minor.yy0);}
+        break;
+      case 70: /* refargs ::= */
+{ yygotominor.yy328 = OE_None*0x0101; /* EV: R-19803-45884 */}
+        break;
+      case 71: /* refargs ::= refargs refarg */
+{ yygotominor.yy328 = (yymsp[-1].minor.yy328 & ~yymsp[0].minor.yy429.mask) | yymsp[0].minor.yy429.value; }
+        break;
+      case 72: /* refarg ::= MATCH nm */
+      case 73: /* refarg ::= ON INSERT refact */ yytestcase(yyruleno==73);
+{ yygotominor.yy429.value = 0;     yygotominor.yy429.mask = 0x000000; }
+        break;
+      case 74: /* refarg ::= ON DELETE refact */
+{ yygotominor.yy429.value = yymsp[0].minor.yy328;     yygotominor.yy429.mask = 0x0000ff; }
+        break;
+      case 75: /* refarg ::= ON UPDATE refact */
+{ yygotominor.yy429.value = yymsp[0].minor.yy328<<8;  yygotominor.yy429.mask = 0x00ff00; }
+        break;
+      case 76: /* refact ::= SET NULL */
+{ yygotominor.yy328 = OE_SetNull;  /* EV: R-33326-45252 */}
+        break;
+      case 77: /* refact ::= SET DEFAULT */
+{ yygotominor.yy328 = OE_SetDflt;  /* EV: R-33326-45252 */}
+        break;
+      case 78: /* refact ::= CASCADE */
+{ yygotominor.yy328 = OE_Cascade;  /* EV: R-33326-45252 */}
+        break;
+      case 79: /* refact ::= RESTRICT */
+{ yygotominor.yy328 = OE_Restrict; /* EV: R-33326-45252 */}
+        break;
+      case 80: /* refact ::= NO ACTION */
+{ yygotominor.yy328 = OE_None;     /* EV: R-33326-45252 */}
+        break;
+      case 82: /* defer_subclause ::= DEFERRABLE init_deferred_pred_opt */
+      case 98: /* defer_subclause_opt ::= defer_subclause */ yytestcase(yyruleno==98);
+      case 100: /* onconf ::= ON CONFLICT resolvetype */ yytestcase(yyruleno==100);
+      case 103: /* resolvetype ::= raisetype */ yytestcase(yyruleno==103);
+{yygotominor.yy328 = yymsp[0].minor.yy328;}
+        break;
+      case 86: /* conslist_opt ::= */
+{yygotominor.yy0.n = 0; yygotominor.yy0.z = 0;}
+        break;
+      case 87: /* conslist_opt ::= COMMA conslist */
+{yygotominor.yy0 = yymsp[-1].minor.yy0;}
+        break;
+      case 90: /* tconscomma ::= COMMA */
+{pParse->constraintName.n = 0;}
+        break;
+      case 93: /* tcons ::= PRIMARY KEY LP idxlist autoinc RP onconf */
+{sqlite3AddPrimaryKey(pParse,yymsp[-3].minor.yy14,yymsp[0].minor.yy328,yymsp[-2].minor.yy328,0);}
+        break;
+      case 94: /* tcons ::= UNIQUE LP idxlist RP onconf */
+{sqlite3CreateIndex(pParse,0,0,0,yymsp[-2].minor.yy14,yymsp[0].minor.yy328,0,0,0,0);}
+        break;
+      case 95: /* tcons ::= CHECK LP expr RP onconf */
+{sqlite3AddCheckConstraint(pParse,yymsp[-2].minor.yy346.pExpr);}
+        break;
+      case 96: /* tcons ::= FOREIGN KEY LP idxlist RP REFERENCES nm idxlist_opt refargs defer_subclause_opt */
+{
+    sqlite3CreateForeignKey(pParse, yymsp[-6].minor.yy14, &yymsp[-3].minor.yy0, yymsp[-2].minor.yy14, yymsp[-1].minor.yy328);
+    sqlite3DeferForeignKey(pParse, yymsp[0].minor.yy328);
+}
+        break;
+      case 99: /* onconf ::= */
+{yygotominor.yy328 = OE_Default;}
+        break;
+      case 101: /* orconf ::= */
+{yygotominor.yy186 = OE_Default;}
+        break;
+      case 102: /* orconf ::= OR resolvetype */
+{yygotominor.yy186 = (u8)yymsp[0].minor.yy328;}
+        break;
+      case 104: /* resolvetype ::= IGNORE */
+{yygotominor.yy328 = OE_Ignore;}
+        break;
+      case 105: /* resolvetype ::= REPLACE */
+{yygotominor.yy328 = OE_Replace;}
+        break;
+      case 106: /* cmd ::= DROP TABLE ifexists fullname */
+{
+  sqlite3DropTable(pParse, yymsp[0].minor.yy65, 0, yymsp[-1].minor.yy328);
+}
+        break;
+      case 109: /* cmd ::= createkw temp VIEW ifnotexists nm dbnm AS select */
+{
+  sqlite3CreateView(pParse, &yymsp[-7].minor.yy0, &yymsp[-3].minor.yy0, &yymsp[-2].minor.yy0, yymsp[0].minor.yy3, yymsp[-6].minor.yy328, yymsp[-4].minor.yy328);
+}
+        break;
+      case 110: /* cmd ::= DROP VIEW ifexists fullname */
+{
+  sqlite3DropTable(pParse, yymsp[0].minor.yy65, 1, yymsp[-1].minor.yy328);
+}
+        break;
+      case 111: /* cmd ::= select */
+{
+  SelectDest dest = {SRT_Output, 0, 0, 0, 0, 0};
+  sqlite3Select(pParse, yymsp[0].minor.yy3, &dest);
+  sqlite3SelectDelete(pParse->db, yymsp[0].minor.yy3);
+}
+        break;
+      case 112: /* select ::= with selectnowith */
+{
+  Select *p = yymsp[0].minor.yy3, *pNext, *pLoop;
+  if( p ){
+    int cnt = 0, mxSelect;
+    p->pWith = yymsp[-1].minor.yy59;
+    if( p->pPrior ){
+      u16 allValues = SF_Values;
+      pNext = 0;
+      for(pLoop=p; pLoop; pNext=pLoop, pLoop=pLoop->pPrior, cnt++){
+        pLoop->pNext = pNext;
+        pLoop->selFlags |= SF_Compound;
+        allValues &= pLoop->selFlags;
+      }
+      if( allValues ){
+        p->selFlags |= SF_AllValues;
+      }else if(
+        (mxSelect = pParse->db->aLimit[SQLITE_LIMIT_COMPOUND_SELECT])>0
+        && cnt>mxSelect
+      ){
+        sqlite3ErrorMsg(pParse, "too many terms in compound SELECT");
+      }
+    }
+  }else{
+    sqlite3WithDelete(pParse->db, yymsp[-1].minor.yy59);
+  }
+  yygotominor.yy3 = p;
+}
+        break;
+      case 113: /* selectnowith ::= oneselect */
+      case 119: /* oneselect ::= values */ yytestcase(yyruleno==119);
+{yygotominor.yy3 = yymsp[0].minor.yy3;}
+        break;
+      case 114: /* selectnowith ::= selectnowith multiselect_op oneselect */
+{
+  Select *pRhs = yymsp[0].minor.yy3;
+  if( pRhs && pRhs->pPrior ){
+    SrcList *pFrom;
+    Token x;
+    x.n = 0;
+    pFrom = sqlite3SrcListAppendFromTerm(pParse,0,0,0,&x,pRhs,0,0);
+    pRhs = sqlite3SelectNew(pParse,0,pFrom,0,0,0,0,0,0,0);
+  }
+  if( pRhs ){
+    pRhs->op = (u8)yymsp[-1].minor.yy328;
+    pRhs->pPrior = yymsp[-2].minor.yy3;
+    if( yymsp[-1].minor.yy328!=TK_ALL ) pParse->hasCompound = 1;
+  }else{
+    sqlite3SelectDelete(pParse->db, yymsp[-2].minor.yy3);
+  }
+  yygotominor.yy3 = pRhs;
+}
+        break;
+      case 116: /* multiselect_op ::= UNION ALL */
+{yygotominor.yy328 = TK_ALL;}
+        break;
+      case 118: /* oneselect ::= SELECT distinct selcollist from where_opt groupby_opt having_opt orderby_opt limit_opt */
+{
+  yygotominor.yy3 = sqlite3SelectNew(pParse,yymsp[-6].minor.yy14,yymsp[-5].minor.yy65,yymsp[-4].minor.yy132,yymsp[-3].minor.yy14,yymsp[-2].minor.yy132,yymsp[-1].minor.yy14,yymsp[-7].minor.yy381,yymsp[0].minor.yy476.pLimit,yymsp[0].minor.yy476.pOffset);
+#if SELECTTRACE_ENABLED
+  /* Populate the Select.zSelName[] string that is used to help with
+  ** query planner debugging, to differentiate between multiple Select
+  ** objects in a complex query.
+  **
+  ** If the SELECT keyword is immediately followed by a C-style comment
+  ** then extract the first few alphanumeric characters from within that
+  ** comment to be the zSelName value.  Otherwise, the label is #N where
+  ** is an integer that is incremented with each SELECT statement seen.
+  */
+  if( yygotominor.yy3!=0 ){
+    const char *z = yymsp[-8].minor.yy0.z+6;
+    int i;
+    sqlite3_snprintf(sizeof(yygotominor.yy3->zSelName), yygotominor.yy3->zSelName, "#%d",
+                     ++pParse->nSelect);
+    while( z[0]==' ' ) z++;
+    if( z[0]=='/' && z[1]=='*' ){
+      z += 2;
+      while( z[0]==' ' ) z++;
+      for(i=0; sqlite3Isalnum(z[i]); i++){}
+      sqlite3_snprintf(sizeof(yygotominor.yy3->zSelName), yygotominor.yy3->zSelName, "%.*s", i, z);
+    }
+  }
+#endif /* SELECTRACE_ENABLED */
+}
+        break;
+      case 120: /* values ::= VALUES LP nexprlist RP */
+{
+  yygotominor.yy3 = sqlite3SelectNew(pParse,yymsp[-1].minor.yy14,0,0,0,0,0,SF_Values,0,0);
+}
+        break;
+      case 121: /* values ::= values COMMA LP exprlist RP */
+{
+  Select *pRight = sqlite3SelectNew(pParse,yymsp[-1].minor.yy14,0,0,0,0,0,SF_Values,0,0);
+  if( pRight ){
+    pRight->op = TK_ALL;
+    pRight->pPrior = yymsp[-4].minor.yy3;
+    yygotominor.yy3 = pRight;
+  }else{
+    yygotominor.yy3 = yymsp[-4].minor.yy3;
+  }
+}
+        break;
+      case 122: /* distinct ::= DISTINCT */
+{yygotominor.yy381 = SF_Distinct;}
+        break;
+      case 123: /* distinct ::= ALL */
+      case 124: /* distinct ::= */ yytestcase(yyruleno==124);
+{yygotominor.yy381 = 0;}
+        break;
+      case 125: /* sclp ::= selcollist COMMA */
+      case 243: /* idxlist_opt ::= LP idxlist RP */ yytestcase(yyruleno==243);
+{yygotominor.yy14 = yymsp[-1].minor.yy14;}
+        break;
+      case 126: /* sclp ::= */
+      case 154: /* orderby_opt ::= */ yytestcase(yyruleno==154);
+      case 161: /* groupby_opt ::= */ yytestcase(yyruleno==161);
+      case 236: /* exprlist ::= */ yytestcase(yyruleno==236);
+      case 242: /* idxlist_opt ::= */ yytestcase(yyruleno==242);
+{yygotominor.yy14 = 0;}
+        break;
+      case 127: /* selcollist ::= sclp expr as */
+{
+   yygotominor.yy14 = sqlite3ExprListAppend(pParse, yymsp[-2].minor.yy14, yymsp[-1].minor.yy346.pExpr);
+   if( yymsp[0].minor.yy0.n>0 ) sqlite3ExprListSetName(pParse, yygotominor.yy14, &yymsp[0].minor.yy0, 1);
+   sqlite3ExprListSetSpan(pParse,yygotominor.yy14,&yymsp[-1].minor.yy346);
+}
+        break;
+      case 128: /* selcollist ::= sclp STAR */
+{
+  Expr *p = sqlite3Expr(pParse->db, TK_ALL, 0);
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse, yymsp[-1].minor.yy14, p);
+}
+        break;
+      case 129: /* selcollist ::= sclp nm DOT STAR */
+{
+  Expr *pRight = sqlite3PExpr(pParse, TK_ALL, 0, 0, &yymsp[0].minor.yy0);
+  Expr *pLeft = sqlite3PExpr(pParse, TK_ID, 0, 0, &yymsp[-2].minor.yy0);
+  Expr *pDot = sqlite3PExpr(pParse, TK_DOT, pLeft, pRight, 0);
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,yymsp[-3].minor.yy14, pDot);
+}
+        break;
+      case 132: /* as ::= */
+{yygotominor.yy0.n = 0;}
+        break;
+      case 133: /* from ::= */
+{yygotominor.yy65 = sqlite3DbMallocZero(pParse->db, sizeof(*yygotominor.yy65));}
+        break;
+      case 134: /* from ::= FROM seltablist */
+{
+  yygotominor.yy65 = yymsp[0].minor.yy65;
+  sqlite3SrcListShiftJoinType(yygotominor.yy65);
+}
+        break;
+      case 135: /* stl_prefix ::= seltablist joinop */
+{
+   yygotominor.yy65 = yymsp[-1].minor.yy65;
+   if( ALWAYS(yygotominor.yy65 && yygotominor.yy65->nSrc>0) ) yygotominor.yy65->a[yygotominor.yy65->nSrc-1].jointype = (u8)yymsp[0].minor.yy328;
+}
+        break;
+      case 136: /* stl_prefix ::= */
+{yygotominor.yy65 = 0;}
+        break;
+      case 137: /* seltablist ::= stl_prefix nm dbnm as indexed_opt on_opt using_opt */
+{
+  yygotominor.yy65 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-6].minor.yy65,&yymsp[-5].minor.yy0,&yymsp[-4].minor.yy0,&yymsp[-3].minor.yy0,0,yymsp[-1].minor.yy132,yymsp[0].minor.yy408);
+  sqlite3SrcListIndexedBy(pParse, yygotominor.yy65, &yymsp[-2].minor.yy0);
+}
+        break;
+      case 138: /* seltablist ::= stl_prefix LP select RP as on_opt using_opt */
+{
+    yygotominor.yy65 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-6].minor.yy65,0,0,&yymsp[-2].minor.yy0,yymsp[-4].minor.yy3,yymsp[-1].minor.yy132,yymsp[0].minor.yy408);
+  }
+        break;
+      case 139: /* seltablist ::= stl_prefix LP seltablist RP as on_opt using_opt */
+{
+    if( yymsp[-6].minor.yy65==0 && yymsp[-2].minor.yy0.n==0 && yymsp[-1].minor.yy132==0 && yymsp[0].minor.yy408==0 ){
+      yygotominor.yy65 = yymsp[-4].minor.yy65;
+    }else if( yymsp[-4].minor.yy65->nSrc==1 ){
+      yygotominor.yy65 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-6].minor.yy65,0,0,&yymsp[-2].minor.yy0,0,yymsp[-1].minor.yy132,yymsp[0].minor.yy408);
+      if( yygotominor.yy65 ){
+        struct SrcList_item *pNew = &yygotominor.yy65->a[yygotominor.yy65->nSrc-1];
+        struct SrcList_item *pOld = yymsp[-4].minor.yy65->a;
+        pNew->zName = pOld->zName;
+        pNew->zDatabase = pOld->zDatabase;
+        pNew->pSelect = pOld->pSelect;
+        pOld->zName = pOld->zDatabase = 0;
+        pOld->pSelect = 0;
+      }
+      sqlite3SrcListDelete(pParse->db, yymsp[-4].minor.yy65);
+    }else{
+      Select *pSubquery;
+      sqlite3SrcListShiftJoinType(yymsp[-4].minor.yy65);
+      pSubquery = sqlite3SelectNew(pParse,0,yymsp[-4].minor.yy65,0,0,0,0,SF_NestedFrom,0,0);
+      yygotominor.yy65 = sqlite3SrcListAppendFromTerm(pParse,yymsp[-6].minor.yy65,0,0,&yymsp[-2].minor.yy0,pSubquery,yymsp[-1].minor.yy132,yymsp[0].minor.yy408);
+    }
+  }
+        break;
+      case 140: /* dbnm ::= */
+      case 149: /* indexed_opt ::= */ yytestcase(yyruleno==149);
+{yygotominor.yy0.z=0; yygotominor.yy0.n=0;}
+        break;
+      case 142: /* fullname ::= nm dbnm */
+{yygotominor.yy65 = sqlite3SrcListAppend(pParse->db,0,&yymsp[-1].minor.yy0,&yymsp[0].minor.yy0);}
+        break;
+      case 143: /* joinop ::= COMMA|JOIN */
+{ yygotominor.yy328 = JT_INNER; }
+        break;
+      case 144: /* joinop ::= JOIN_KW JOIN */
+{ yygotominor.yy328 = sqlite3JoinType(pParse,&yymsp[-1].minor.yy0,0,0); }
+        break;
+      case 145: /* joinop ::= JOIN_KW nm JOIN */
+{ yygotominor.yy328 = sqlite3JoinType(pParse,&yymsp[-2].minor.yy0,&yymsp[-1].minor.yy0,0); }
+        break;
+      case 146: /* joinop ::= JOIN_KW nm nm JOIN */
+{ yygotominor.yy328 = sqlite3JoinType(pParse,&yymsp[-3].minor.yy0,&yymsp[-2].minor.yy0,&yymsp[-1].minor.yy0); }
+        break;
+      case 147: /* on_opt ::= ON expr */
+      case 164: /* having_opt ::= HAVING expr */ yytestcase(yyruleno==164);
+      case 171: /* where_opt ::= WHERE expr */ yytestcase(yyruleno==171);
+      case 231: /* case_else ::= ELSE expr */ yytestcase(yyruleno==231);
+      case 233: /* case_operand ::= expr */ yytestcase(yyruleno==233);
+{yygotominor.yy132 = yymsp[0].minor.yy346.pExpr;}
+        break;
+      case 148: /* on_opt ::= */
+      case 163: /* having_opt ::= */ yytestcase(yyruleno==163);
+      case 170: /* where_opt ::= */ yytestcase(yyruleno==170);
+      case 232: /* case_else ::= */ yytestcase(yyruleno==232);
+      case 234: /* case_operand ::= */ yytestcase(yyruleno==234);
+{yygotominor.yy132 = 0;}
+        break;
+      case 151: /* indexed_opt ::= NOT INDEXED */
+{yygotominor.yy0.z=0; yygotominor.yy0.n=1;}
+        break;
+      case 152: /* using_opt ::= USING LP idlist RP */
+      case 180: /* inscollist_opt ::= LP idlist RP */ yytestcase(yyruleno==180);
+{yygotominor.yy408 = yymsp[-1].minor.yy408;}
+        break;
+      case 153: /* using_opt ::= */
+      case 179: /* inscollist_opt ::= */ yytestcase(yyruleno==179);
+{yygotominor.yy408 = 0;}
+        break;
+      case 155: /* orderby_opt ::= ORDER BY sortlist */
+      case 162: /* groupby_opt ::= GROUP BY nexprlist */ yytestcase(yyruleno==162);
+      case 235: /* exprlist ::= nexprlist */ yytestcase(yyruleno==235);
+{yygotominor.yy14 = yymsp[0].minor.yy14;}
+        break;
+      case 156: /* sortlist ::= sortlist COMMA expr sortorder */
+{
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,yymsp[-3].minor.yy14,yymsp[-1].minor.yy346.pExpr);
+  if( yygotominor.yy14 ) yygotominor.yy14->a[yygotominor.yy14->nExpr-1].sortOrder = (u8)yymsp[0].minor.yy328;
+}
+        break;
+      case 157: /* sortlist ::= expr sortorder */
+{
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,0,yymsp[-1].minor.yy346.pExpr);
+  if( yygotominor.yy14 && ALWAYS(yygotominor.yy14->a) ) yygotominor.yy14->a[0].sortOrder = (u8)yymsp[0].minor.yy328;
+}
+        break;
+      case 158: /* sortorder ::= ASC */
+      case 160: /* sortorder ::= */ yytestcase(yyruleno==160);
+{yygotominor.yy328 = SQLITE_SO_ASC;}
+        break;
+      case 159: /* sortorder ::= DESC */
+{yygotominor.yy328 = SQLITE_SO_DESC;}
+        break;
+      case 165: /* limit_opt ::= */
+{yygotominor.yy476.pLimit = 0; yygotominor.yy476.pOffset = 0;}
+        break;
+      case 166: /* limit_opt ::= LIMIT expr */
+{yygotominor.yy476.pLimit = yymsp[0].minor.yy346.pExpr; yygotominor.yy476.pOffset = 0;}
+        break;
+      case 167: /* limit_opt ::= LIMIT expr OFFSET expr */
+{yygotominor.yy476.pLimit = yymsp[-2].minor.yy346.pExpr; yygotominor.yy476.pOffset = yymsp[0].minor.yy346.pExpr;}
+        break;
+      case 168: /* limit_opt ::= LIMIT expr COMMA expr */
+{yygotominor.yy476.pOffset = yymsp[-2].minor.yy346.pExpr; yygotominor.yy476.pLimit = yymsp[0].minor.yy346.pExpr;}
+        break;
+      case 169: /* cmd ::= with DELETE FROM fullname indexed_opt where_opt */
+{
+  sqlite3WithPush(pParse, yymsp[-5].minor.yy59, 1);
+  sqlite3SrcListIndexedBy(pParse, yymsp[-2].minor.yy65, &yymsp[-1].minor.yy0);
+  sqlite3DeleteFrom(pParse,yymsp[-2].minor.yy65,yymsp[0].minor.yy132);
+}
+        break;
+      case 172: /* cmd ::= with UPDATE orconf fullname indexed_opt SET setlist where_opt */
+{
+  sqlite3WithPush(pParse, yymsp[-7].minor.yy59, 1);
+  sqlite3SrcListIndexedBy(pParse, yymsp[-4].minor.yy65, &yymsp[-3].minor.yy0);
+  sqlite3ExprListCheckLength(pParse,yymsp[-1].minor.yy14,"set list"); 
+  sqlite3Update(pParse,yymsp[-4].minor.yy65,yymsp[-1].minor.yy14,yymsp[0].minor.yy132,yymsp[-5].minor.yy186);
+}
+        break;
+      case 173: /* setlist ::= setlist COMMA nm EQ expr */
+{
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse, yymsp[-4].minor.yy14, yymsp[0].minor.yy346.pExpr);
+  sqlite3ExprListSetName(pParse, yygotominor.yy14, &yymsp[-2].minor.yy0, 1);
+}
+        break;
+      case 174: /* setlist ::= nm EQ expr */
+{
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse, 0, yymsp[0].minor.yy346.pExpr);
+  sqlite3ExprListSetName(pParse, yygotominor.yy14, &yymsp[-2].minor.yy0, 1);
+}
+        break;
+      case 175: /* cmd ::= with insert_cmd INTO fullname inscollist_opt select */
+{
+  sqlite3WithPush(pParse, yymsp[-5].minor.yy59, 1);
+  sqlite3Insert(pParse, yymsp[-2].minor.yy65, yymsp[0].minor.yy3, yymsp[-1].minor.yy408, yymsp[-4].minor.yy186);
+}
+        break;
+      case 176: /* cmd ::= with insert_cmd INTO fullname inscollist_opt DEFAULT VALUES */
+{
+  sqlite3WithPush(pParse, yymsp[-6].minor.yy59, 1);
+  sqlite3Insert(pParse, yymsp[-3].minor.yy65, 0, yymsp[-2].minor.yy408, yymsp[-5].minor.yy186);
+}
+        break;
+      case 177: /* insert_cmd ::= INSERT orconf */
+{yygotominor.yy186 = yymsp[0].minor.yy186;}
+        break;
+      case 178: /* insert_cmd ::= REPLACE */
+{yygotominor.yy186 = OE_Replace;}
+        break;
+      case 181: /* idlist ::= idlist COMMA nm */
+{yygotominor.yy408 = sqlite3IdListAppend(pParse->db,yymsp[-2].minor.yy408,&yymsp[0].minor.yy0);}
+        break;
+      case 182: /* idlist ::= nm */
+{yygotominor.yy408 = sqlite3IdListAppend(pParse->db,0,&yymsp[0].minor.yy0);}
+        break;
+      case 183: /* expr ::= term */
+{yygotominor.yy346 = yymsp[0].minor.yy346;}
+        break;
+      case 184: /* expr ::= LP expr RP */
+{yygotominor.yy346.pExpr = yymsp[-1].minor.yy346.pExpr; spanSet(&yygotominor.yy346,&yymsp[-2].minor.yy0,&yymsp[0].minor.yy0);}
+        break;
+      case 185: /* term ::= NULL */
+      case 190: /* term ::= INTEGER|FLOAT|BLOB */ yytestcase(yyruleno==190);
+      case 191: /* term ::= STRING */ yytestcase(yyruleno==191);
+{spanExpr(&yygotominor.yy346, pParse, yymsp[0].major, &yymsp[0].minor.yy0);}
+        break;
+      case 186: /* expr ::= ID|INDEXED */
+      case 187: /* expr ::= JOIN_KW */ yytestcase(yyruleno==187);
+{spanExpr(&yygotominor.yy346, pParse, TK_ID, &yymsp[0].minor.yy0);}
+        break;
+      case 188: /* expr ::= nm DOT nm */
+{
+  Expr *temp1 = sqlite3PExpr(pParse, TK_ID, 0, 0, &yymsp[-2].minor.yy0);
+  Expr *temp2 = sqlite3PExpr(pParse, TK_ID, 0, 0, &yymsp[0].minor.yy0);
+  yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_DOT, temp1, temp2, 0);
+  spanSet(&yygotominor.yy346,&yymsp[-2].minor.yy0,&yymsp[0].minor.yy0);
+}
+        break;
+      case 189: /* expr ::= nm DOT nm DOT nm */
+{
+  Expr *temp1 = sqlite3PExpr(pParse, TK_ID, 0, 0, &yymsp[-4].minor.yy0);
+  Expr *temp2 = sqlite3PExpr(pParse, TK_ID, 0, 0, &yymsp[-2].minor.yy0);
+  Expr *temp3 = sqlite3PExpr(pParse, TK_ID, 0, 0, &yymsp[0].minor.yy0);
+  Expr *temp4 = sqlite3PExpr(pParse, TK_DOT, temp2, temp3, 0);
+  yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_DOT, temp1, temp4, 0);
+  spanSet(&yygotominor.yy346,&yymsp[-4].minor.yy0,&yymsp[0].minor.yy0);
+}
+        break;
+      case 192: /* expr ::= VARIABLE */
+{
+  if( yymsp[0].minor.yy0.n>=2 && yymsp[0].minor.yy0.z[0]=='#' && sqlite3Isdigit(yymsp[0].minor.yy0.z[1]) ){
+    /* When doing a nested parse, one can include terms in an expression
+    ** that look like this:   #1 #2 ...  These terms refer to registers
+    ** in the virtual machine.  #N is the N-th register. */
+    if( pParse->nested==0 ){
+      sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &yymsp[0].minor.yy0);
+      yygotominor.yy346.pExpr = 0;
+    }else{
+      yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_REGISTER, 0, 0, &yymsp[0].minor.yy0);
+      if( yygotominor.yy346.pExpr ) sqlite3GetInt32(&yymsp[0].minor.yy0.z[1], &yygotominor.yy346.pExpr->iTable);
+    }
+  }else{
+    spanExpr(&yygotominor.yy346, pParse, TK_VARIABLE, &yymsp[0].minor.yy0);
+    sqlite3ExprAssignVarNumber(pParse, yygotominor.yy346.pExpr);
+  }
+  spanSet(&yygotominor.yy346, &yymsp[0].minor.yy0, &yymsp[0].minor.yy0);
+}
+        break;
+      case 193: /* expr ::= expr COLLATE ID|STRING */
+{
+  yygotominor.yy346.pExpr = sqlite3ExprAddCollateToken(pParse, yymsp[-2].minor.yy346.pExpr, &yymsp[0].minor.yy0);
+  yygotominor.yy346.zStart = yymsp[-2].minor.yy346.zStart;
+  yygotominor.yy346.zEnd = &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n];
+}
+        break;
+      case 194: /* expr ::= CAST LP expr AS typetoken RP */
+{
+  yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_CAST, yymsp[-3].minor.yy346.pExpr, 0, &yymsp[-1].minor.yy0);
+  spanSet(&yygotominor.yy346,&yymsp[-5].minor.yy0,&yymsp[0].minor.yy0);
+}
+        break;
+      case 195: /* expr ::= ID|INDEXED LP distinct exprlist RP */
+{
+  if( yymsp[-1].minor.yy14 && yymsp[-1].minor.yy14->nExpr>pParse->db->aLimit[SQLITE_LIMIT_FUNCTION_ARG] ){
+    sqlite3ErrorMsg(pParse, "too many arguments on function %T", &yymsp[-4].minor.yy0);
+  }
+  yygotominor.yy346.pExpr = sqlite3ExprFunction(pParse, yymsp[-1].minor.yy14, &yymsp[-4].minor.yy0);
+  spanSet(&yygotominor.yy346,&yymsp[-4].minor.yy0,&yymsp[0].minor.yy0);
+  if( yymsp[-2].minor.yy381 && yygotominor.yy346.pExpr ){
+    yygotominor.yy346.pExpr->flags |= EP_Distinct;
+  }
+}
+        break;
+      case 196: /* expr ::= ID|INDEXED LP STAR RP */
+{
+  yygotominor.yy346.pExpr = sqlite3ExprFunction(pParse, 0, &yymsp[-3].minor.yy0);
+  spanSet(&yygotominor.yy346,&yymsp[-3].minor.yy0,&yymsp[0].minor.yy0);
+}
+        break;
+      case 197: /* term ::= CTIME_KW */
+{
+  yygotominor.yy346.pExpr = sqlite3ExprFunction(pParse, 0, &yymsp[0].minor.yy0);
+  spanSet(&yygotominor.yy346, &yymsp[0].minor.yy0, &yymsp[0].minor.yy0);
+}
+        break;
+      case 198: /* expr ::= expr AND expr */
+      case 199: /* expr ::= expr OR expr */ yytestcase(yyruleno==199);
+      case 200: /* expr ::= expr LT|GT|GE|LE expr */ yytestcase(yyruleno==200);
+      case 201: /* expr ::= expr EQ|NE expr */ yytestcase(yyruleno==201);
+      case 202: /* expr ::= expr BITAND|BITOR|LSHIFT|RSHIFT expr */ yytestcase(yyruleno==202);
+      case 203: /* expr ::= expr PLUS|MINUS expr */ yytestcase(yyruleno==203);
+      case 204: /* expr ::= expr STAR|SLASH|REM expr */ yytestcase(yyruleno==204);
+      case 205: /* expr ::= expr CONCAT expr */ yytestcase(yyruleno==205);
+{spanBinaryExpr(&yygotominor.yy346,pParse,yymsp[-1].major,&yymsp[-2].minor.yy346,&yymsp[0].minor.yy346);}
+        break;
+      case 206: /* likeop ::= LIKE_KW|MATCH */
+{yygotominor.yy96.eOperator = yymsp[0].minor.yy0; yygotominor.yy96.bNot = 0;}
+        break;
+      case 207: /* likeop ::= NOT LIKE_KW|MATCH */
+{yygotominor.yy96.eOperator = yymsp[0].minor.yy0; yygotominor.yy96.bNot = 1;}
+        break;
+      case 208: /* expr ::= expr likeop expr */
+{
+  ExprList *pList;
+  pList = sqlite3ExprListAppend(pParse,0, yymsp[0].minor.yy346.pExpr);
+  pList = sqlite3ExprListAppend(pParse,pList, yymsp[-2].minor.yy346.pExpr);
+  yygotominor.yy346.pExpr = sqlite3ExprFunction(pParse, pList, &yymsp[-1].minor.yy96.eOperator);
+  if( yymsp[-1].minor.yy96.bNot ) yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_NOT, yygotominor.yy346.pExpr, 0, 0);
+  yygotominor.yy346.zStart = yymsp[-2].minor.yy346.zStart;
+  yygotominor.yy346.zEnd = yymsp[0].minor.yy346.zEnd;
+  if( yygotominor.yy346.pExpr ) yygotominor.yy346.pExpr->flags |= EP_InfixFunc;
+}
+        break;
+      case 209: /* expr ::= expr likeop expr ESCAPE expr */
+{
+  ExprList *pList;
+  pList = sqlite3ExprListAppend(pParse,0, yymsp[-2].minor.yy346.pExpr);
+  pList = sqlite3ExprListAppend(pParse,pList, yymsp[-4].minor.yy346.pExpr);
+  pList = sqlite3ExprListAppend(pParse,pList, yymsp[0].minor.yy346.pExpr);
+  yygotominor.yy346.pExpr = sqlite3ExprFunction(pParse, pList, &yymsp[-3].minor.yy96.eOperator);
+  if( yymsp[-3].minor.yy96.bNot ) yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_NOT, yygotominor.yy346.pExpr, 0, 0);
+  yygotominor.yy346.zStart = yymsp[-4].minor.yy346.zStart;
+  yygotominor.yy346.zEnd = yymsp[0].minor.yy346.zEnd;
+  if( yygotominor.yy346.pExpr ) yygotominor.yy346.pExpr->flags |= EP_InfixFunc;
+}
+        break;
+      case 210: /* expr ::= expr ISNULL|NOTNULL */
+{spanUnaryPostfix(&yygotominor.yy346,pParse,yymsp[0].major,&yymsp[-1].minor.yy346,&yymsp[0].minor.yy0);}
+        break;
+      case 211: /* expr ::= expr NOT NULL */
+{spanUnaryPostfix(&yygotominor.yy346,pParse,TK_NOTNULL,&yymsp[-2].minor.yy346,&yymsp[0].minor.yy0);}
+        break;
+      case 212: /* expr ::= expr IS expr */
+{
+  spanBinaryExpr(&yygotominor.yy346,pParse,TK_IS,&yymsp[-2].minor.yy346,&yymsp[0].minor.yy346);
+  binaryToUnaryIfNull(pParse, yymsp[0].minor.yy346.pExpr, yygotominor.yy346.pExpr, TK_ISNULL);
+}
+        break;
+      case 213: /* expr ::= expr IS NOT expr */
+{
+  spanBinaryExpr(&yygotominor.yy346,pParse,TK_ISNOT,&yymsp[-3].minor.yy346,&yymsp[0].minor.yy346);
+  binaryToUnaryIfNull(pParse, yymsp[0].minor.yy346.pExpr, yygotominor.yy346.pExpr, TK_NOTNULL);
+}
+        break;
+      case 214: /* expr ::= NOT expr */
+      case 215: /* expr ::= BITNOT expr */ yytestcase(yyruleno==215);
+{spanUnaryPrefix(&yygotominor.yy346,pParse,yymsp[-1].major,&yymsp[0].minor.yy346,&yymsp[-1].minor.yy0);}
+        break;
+      case 216: /* expr ::= MINUS expr */
+{spanUnaryPrefix(&yygotominor.yy346,pParse,TK_UMINUS,&yymsp[0].minor.yy346,&yymsp[-1].minor.yy0);}
+        break;
+      case 217: /* expr ::= PLUS expr */
+{spanUnaryPrefix(&yygotominor.yy346,pParse,TK_UPLUS,&yymsp[0].minor.yy346,&yymsp[-1].minor.yy0);}
+        break;
+      case 220: /* expr ::= expr between_op expr AND expr */
+{
+  ExprList *pList = sqlite3ExprListAppend(pParse,0, yymsp[-2].minor.yy346.pExpr);
+  pList = sqlite3ExprListAppend(pParse,pList, yymsp[0].minor.yy346.pExpr);
+  yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_BETWEEN, yymsp[-4].minor.yy346.pExpr, 0, 0);
+  if( yygotominor.yy346.pExpr ){
+    yygotominor.yy346.pExpr->x.pList = pList;
+  }else{
+    sqlite3ExprListDelete(pParse->db, pList);
+  } 
+  if( yymsp[-3].minor.yy328 ) yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_NOT, yygotominor.yy346.pExpr, 0, 0);
+  yygotominor.yy346.zStart = yymsp[-4].minor.yy346.zStart;
+  yygotominor.yy346.zEnd = yymsp[0].minor.yy346.zEnd;
+}
+        break;
+      case 223: /* expr ::= expr in_op LP exprlist RP */
+{
+    if( yymsp[-1].minor.yy14==0 ){
+      /* Expressions of the form
+      **
+      **      expr1 IN ()
+      **      expr1 NOT IN ()
+      **
+      ** simplify to constants 0 (false) and 1 (true), respectively,
+      ** regardless of the value of expr1.
+      */
+      yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_INTEGER, 0, 0, &sqlite3IntTokens[yymsp[-3].minor.yy328]);
+      sqlite3ExprDelete(pParse->db, yymsp[-4].minor.yy346.pExpr);
+    }else if( yymsp[-1].minor.yy14->nExpr==1 ){
+      /* Expressions of the form:
+      **
+      **      expr1 IN (?1)
+      **      expr1 NOT IN (?2)
+      **
+      ** with exactly one value on the RHS can be simplified to something
+      ** like this:
+      **
+      **      expr1 == ?1
+      **      expr1 <> ?2
+      **
+      ** But, the RHS of the == or <> is marked with the EP_Generic flag
+      ** so that it may not contribute to the computation of comparison
+      ** affinity or the collating sequence to use for comparison.  Otherwise,
+      ** the semantics would be subtly different from IN or NOT IN.
+      */
+      Expr *pRHS = yymsp[-1].minor.yy14->a[0].pExpr;
+      yymsp[-1].minor.yy14->a[0].pExpr = 0;
+      sqlite3ExprListDelete(pParse->db, yymsp[-1].minor.yy14);
+      /* pRHS cannot be NULL because a malloc error would have been detected
+      ** before now and control would have never reached this point */
+      if( ALWAYS(pRHS) ){
+        pRHS->flags &= ~EP_Collate;
+        pRHS->flags |= EP_Generic;
+      }
+      yygotominor.yy346.pExpr = sqlite3PExpr(pParse, yymsp[-3].minor.yy328 ? TK_NE : TK_EQ, yymsp[-4].minor.yy346.pExpr, pRHS, 0);
+    }else{
+      yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_IN, yymsp[-4].minor.yy346.pExpr, 0, 0);
+      if( yygotominor.yy346.pExpr ){
+        yygotominor.yy346.pExpr->x.pList = yymsp[-1].minor.yy14;
+        sqlite3ExprSetHeight(pParse, yygotominor.yy346.pExpr);
+      }else{
+        sqlite3ExprListDelete(pParse->db, yymsp[-1].minor.yy14);
+      }
+      if( yymsp[-3].minor.yy328 ) yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_NOT, yygotominor.yy346.pExpr, 0, 0);
+    }
+    yygotominor.yy346.zStart = yymsp[-4].minor.yy346.zStart;
+    yygotominor.yy346.zEnd = &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n];
+  }
+        break;
+      case 224: /* expr ::= LP select RP */
+{
+    yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_SELECT, 0, 0, 0);
+    if( yygotominor.yy346.pExpr ){
+      yygotominor.yy346.pExpr->x.pSelect = yymsp[-1].minor.yy3;
+      ExprSetProperty(yygotominor.yy346.pExpr, EP_xIsSelect);
+      sqlite3ExprSetHeight(pParse, yygotominor.yy346.pExpr);
+    }else{
+      sqlite3SelectDelete(pParse->db, yymsp[-1].minor.yy3);
+    }
+    yygotominor.yy346.zStart = yymsp[-2].minor.yy0.z;
+    yygotominor.yy346.zEnd = &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n];
+  }
+        break;
+      case 225: /* expr ::= expr in_op LP select RP */
+{
+    yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_IN, yymsp[-4].minor.yy346.pExpr, 0, 0);
+    if( yygotominor.yy346.pExpr ){
+      yygotominor.yy346.pExpr->x.pSelect = yymsp[-1].minor.yy3;
+      ExprSetProperty(yygotominor.yy346.pExpr, EP_xIsSelect);
+      sqlite3ExprSetHeight(pParse, yygotominor.yy346.pExpr);
+    }else{
+      sqlite3SelectDelete(pParse->db, yymsp[-1].minor.yy3);
+    }
+    if( yymsp[-3].minor.yy328 ) yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_NOT, yygotominor.yy346.pExpr, 0, 0);
+    yygotominor.yy346.zStart = yymsp[-4].minor.yy346.zStart;
+    yygotominor.yy346.zEnd = &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n];
+  }
+        break;
+      case 226: /* expr ::= expr in_op nm dbnm */
+{
+    SrcList *pSrc = sqlite3SrcListAppend(pParse->db, 0,&yymsp[-1].minor.yy0,&yymsp[0].minor.yy0);
+    yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_IN, yymsp[-3].minor.yy346.pExpr, 0, 0);
+    if( yygotominor.yy346.pExpr ){
+      yygotominor.yy346.pExpr->x.pSelect = sqlite3SelectNew(pParse, 0,pSrc,0,0,0,0,0,0,0);
+      ExprSetProperty(yygotominor.yy346.pExpr, EP_xIsSelect);
+      sqlite3ExprSetHeight(pParse, yygotominor.yy346.pExpr);
+    }else{
+      sqlite3SrcListDelete(pParse->db, pSrc);
+    }
+    if( yymsp[-2].minor.yy328 ) yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_NOT, yygotominor.yy346.pExpr, 0, 0);
+    yygotominor.yy346.zStart = yymsp[-3].minor.yy346.zStart;
+    yygotominor.yy346.zEnd = yymsp[0].minor.yy0.z ? &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n] : &yymsp[-1].minor.yy0.z[yymsp[-1].minor.yy0.n];
+  }
+        break;
+      case 227: /* expr ::= EXISTS LP select RP */
+{
+    Expr *p = yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_EXISTS, 0, 0, 0);
+    if( p ){
+      p->x.pSelect = yymsp[-1].minor.yy3;
+      ExprSetProperty(p, EP_xIsSelect);
+      sqlite3ExprSetHeight(pParse, p);
+    }else{
+      sqlite3SelectDelete(pParse->db, yymsp[-1].minor.yy3);
+    }
+    yygotominor.yy346.zStart = yymsp[-3].minor.yy0.z;
+    yygotominor.yy346.zEnd = &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n];
+  }
+        break;
+      case 228: /* expr ::= CASE case_operand case_exprlist case_else END */
+{
+  yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_CASE, yymsp[-3].minor.yy132, 0, 0);
+  if( yygotominor.yy346.pExpr ){
+    yygotominor.yy346.pExpr->x.pList = yymsp[-1].minor.yy132 ? sqlite3ExprListAppend(pParse,yymsp[-2].minor.yy14,yymsp[-1].minor.yy132) : yymsp[-2].minor.yy14;
+    sqlite3ExprSetHeight(pParse, yygotominor.yy346.pExpr);
+  }else{
+    sqlite3ExprListDelete(pParse->db, yymsp[-2].minor.yy14);
+    sqlite3ExprDelete(pParse->db, yymsp[-1].minor.yy132);
+  }
+  yygotominor.yy346.zStart = yymsp[-4].minor.yy0.z;
+  yygotominor.yy346.zEnd = &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n];
+}
+        break;
+      case 229: /* case_exprlist ::= case_exprlist WHEN expr THEN expr */
+{
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy14, yymsp[-2].minor.yy346.pExpr);
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,yygotominor.yy14, yymsp[0].minor.yy346.pExpr);
+}
+        break;
+      case 230: /* case_exprlist ::= WHEN expr THEN expr */
+{
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,0, yymsp[-2].minor.yy346.pExpr);
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,yygotominor.yy14, yymsp[0].minor.yy346.pExpr);
+}
+        break;
+      case 237: /* nexprlist ::= nexprlist COMMA expr */
+{yygotominor.yy14 = sqlite3ExprListAppend(pParse,yymsp[-2].minor.yy14,yymsp[0].minor.yy346.pExpr);}
+        break;
+      case 238: /* nexprlist ::= expr */
+{yygotominor.yy14 = sqlite3ExprListAppend(pParse,0,yymsp[0].minor.yy346.pExpr);}
+        break;
+      case 239: /* cmd ::= createkw uniqueflag INDEX ifnotexists nm dbnm ON nm LP idxlist RP where_opt */
+{
+  sqlite3CreateIndex(pParse, &yymsp[-7].minor.yy0, &yymsp[-6].minor.yy0, 
+                     sqlite3SrcListAppend(pParse->db,0,&yymsp[-4].minor.yy0,0), yymsp[-2].minor.yy14, yymsp[-10].minor.yy328,
+                      &yymsp[-11].minor.yy0, yymsp[0].minor.yy132, SQLITE_SO_ASC, yymsp[-8].minor.yy328);
+}
+        break;
+      case 240: /* uniqueflag ::= UNIQUE */
+      case 291: /* raisetype ::= ABORT */ yytestcase(yyruleno==291);
+{yygotominor.yy328 = OE_Abort;}
+        break;
+      case 241: /* uniqueflag ::= */
+{yygotominor.yy328 = OE_None;}
+        break;
+      case 244: /* idxlist ::= idxlist COMMA nm collate sortorder */
+{
+  Expr *p = sqlite3ExprAddCollateToken(pParse, 0, &yymsp[-1].minor.yy0);
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,yymsp[-4].minor.yy14, p);
+  sqlite3ExprListSetName(pParse,yygotominor.yy14,&yymsp[-2].minor.yy0,1);
+  sqlite3ExprListCheckLength(pParse, yygotominor.yy14, "index");
+  if( yygotominor.yy14 ) yygotominor.yy14->a[yygotominor.yy14->nExpr-1].sortOrder = (u8)yymsp[0].minor.yy328;
+}
+        break;
+      case 245: /* idxlist ::= nm collate sortorder */
+{
+  Expr *p = sqlite3ExprAddCollateToken(pParse, 0, &yymsp[-1].minor.yy0);
+  yygotominor.yy14 = sqlite3ExprListAppend(pParse,0, p);
+  sqlite3ExprListSetName(pParse, yygotominor.yy14, &yymsp[-2].minor.yy0, 1);
+  sqlite3ExprListCheckLength(pParse, yygotominor.yy14, "index");
+  if( yygotominor.yy14 ) yygotominor.yy14->a[yygotominor.yy14->nExpr-1].sortOrder = (u8)yymsp[0].minor.yy328;
+}
+        break;
+      case 246: /* collate ::= */
+{yygotominor.yy0.z = 0; yygotominor.yy0.n = 0;}
+        break;
+      case 248: /* cmd ::= DROP INDEX ifexists fullname */
+{sqlite3DropIndex(pParse, yymsp[0].minor.yy65, yymsp[-1].minor.yy328);}
+        break;
+      case 249: /* cmd ::= VACUUM */
+      case 250: /* cmd ::= VACUUM nm */ yytestcase(yyruleno==250);
+{sqlite3Vacuum(pParse);}
+        break;
+      case 251: /* cmd ::= PRAGMA nm dbnm */
+{sqlite3Pragma(pParse,&yymsp[-1].minor.yy0,&yymsp[0].minor.yy0,0,0);}
+        break;
+      case 252: /* cmd ::= PRAGMA nm dbnm EQ nmnum */
+{sqlite3Pragma(pParse,&yymsp[-3].minor.yy0,&yymsp[-2].minor.yy0,&yymsp[0].minor.yy0,0);}
+        break;
+      case 253: /* cmd ::= PRAGMA nm dbnm LP nmnum RP */
+{sqlite3Pragma(pParse,&yymsp[-4].minor.yy0,&yymsp[-3].minor.yy0,&yymsp[-1].minor.yy0,0);}
+        break;
+      case 254: /* cmd ::= PRAGMA nm dbnm EQ minus_num */
+{sqlite3Pragma(pParse,&yymsp[-3].minor.yy0,&yymsp[-2].minor.yy0,&yymsp[0].minor.yy0,1);}
+        break;
+      case 255: /* cmd ::= PRAGMA nm dbnm LP minus_num RP */
+{sqlite3Pragma(pParse,&yymsp[-4].minor.yy0,&yymsp[-3].minor.yy0,&yymsp[-1].minor.yy0,1);}
+        break;
+      case 264: /* cmd ::= createkw trigger_decl BEGIN trigger_cmd_list END */
+{
+  Token all;
+  all.z = yymsp[-3].minor.yy0.z;
+  all.n = (int)(yymsp[0].minor.yy0.z - yymsp[-3].minor.yy0.z) + yymsp[0].minor.yy0.n;
+  sqlite3FinishTrigger(pParse, yymsp[-1].minor.yy473, &all);
+}
+        break;
+      case 265: /* trigger_decl ::= temp TRIGGER ifnotexists nm dbnm trigger_time trigger_event ON fullname foreach_clause when_clause */
+{
+  sqlite3BeginTrigger(pParse, &yymsp[-7].minor.yy0, &yymsp[-6].minor.yy0, yymsp[-5].minor.yy328, yymsp[-4].minor.yy378.a, yymsp[-4].minor.yy378.b, yymsp[-2].minor.yy65, yymsp[0].minor.yy132, yymsp[-10].minor.yy328, yymsp[-8].minor.yy328);
+  yygotominor.yy0 = (yymsp[-6].minor.yy0.n==0?yymsp[-7].minor.yy0:yymsp[-6].minor.yy0);
+}
+        break;
+      case 266: /* trigger_time ::= BEFORE */
+      case 269: /* trigger_time ::= */ yytestcase(yyruleno==269);
+{ yygotominor.yy328 = TK_BEFORE; }
+        break;
+      case 267: /* trigger_time ::= AFTER */
+{ yygotominor.yy328 = TK_AFTER;  }
+        break;
+      case 268: /* trigger_time ::= INSTEAD OF */
+{ yygotominor.yy328 = TK_INSTEAD;}
+        break;
+      case 270: /* trigger_event ::= DELETE|INSERT */
+      case 271: /* trigger_event ::= UPDATE */ yytestcase(yyruleno==271);
+{yygotominor.yy378.a = yymsp[0].major; yygotominor.yy378.b = 0;}
+        break;
+      case 272: /* trigger_event ::= UPDATE OF idlist */
+{yygotominor.yy378.a = TK_UPDATE; yygotominor.yy378.b = yymsp[0].minor.yy408;}
+        break;
+      case 275: /* when_clause ::= */
+      case 296: /* key_opt ::= */ yytestcase(yyruleno==296);
+{ yygotominor.yy132 = 0; }
+        break;
+      case 276: /* when_clause ::= WHEN expr */
+      case 297: /* key_opt ::= KEY expr */ yytestcase(yyruleno==297);
+{ yygotominor.yy132 = yymsp[0].minor.yy346.pExpr; }
+        break;
+      case 277: /* trigger_cmd_list ::= trigger_cmd_list trigger_cmd SEMI */
+{
+  assert( yymsp[-2].minor.yy473!=0 );
+  yymsp[-2].minor.yy473->pLast->pNext = yymsp[-1].minor.yy473;
+  yymsp[-2].minor.yy473->pLast = yymsp[-1].minor.yy473;
+  yygotominor.yy473 = yymsp[-2].minor.yy473;
+}
+        break;
+      case 278: /* trigger_cmd_list ::= trigger_cmd SEMI */
+{ 
+  assert( yymsp[-1].minor.yy473!=0 );
+  yymsp[-1].minor.yy473->pLast = yymsp[-1].minor.yy473;
+  yygotominor.yy473 = yymsp[-1].minor.yy473;
+}
+        break;
+      case 280: /* trnm ::= nm DOT nm */
+{
+  yygotominor.yy0 = yymsp[0].minor.yy0;
+  sqlite3ErrorMsg(pParse, 
+        "qualified table names are not allowed on INSERT, UPDATE, and DELETE "
+        "statements within triggers");
+}
+        break;
+      case 282: /* tridxby ::= INDEXED BY nm */
+{
+  sqlite3ErrorMsg(pParse,
+        "the INDEXED BY clause is not allowed on UPDATE or DELETE statements "
+        "within triggers");
+}
+        break;
+      case 283: /* tridxby ::= NOT INDEXED */
+{
+  sqlite3ErrorMsg(pParse,
+        "the NOT INDEXED clause is not allowed on UPDATE or DELETE statements "
+        "within triggers");
+}
+        break;
+      case 284: /* trigger_cmd ::= UPDATE orconf trnm tridxby SET setlist where_opt */
+{ yygotominor.yy473 = sqlite3TriggerUpdateStep(pParse->db, &yymsp[-4].minor.yy0, yymsp[-1].minor.yy14, yymsp[0].minor.yy132, yymsp[-5].minor.yy186); }
+        break;
+      case 285: /* trigger_cmd ::= insert_cmd INTO trnm inscollist_opt select */
+{yygotominor.yy473 = sqlite3TriggerInsertStep(pParse->db, &yymsp[-2].minor.yy0, yymsp[-1].minor.yy408, yymsp[0].minor.yy3, yymsp[-4].minor.yy186);}
+        break;
+      case 286: /* trigger_cmd ::= DELETE FROM trnm tridxby where_opt */
+{yygotominor.yy473 = sqlite3TriggerDeleteStep(pParse->db, &yymsp[-2].minor.yy0, yymsp[0].minor.yy132);}
+        break;
+      case 287: /* trigger_cmd ::= select */
+{yygotominor.yy473 = sqlite3TriggerSelectStep(pParse->db, yymsp[0].minor.yy3); }
+        break;
+      case 288: /* expr ::= RAISE LP IGNORE RP */
+{
+  yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_RAISE, 0, 0, 0); 
+  if( yygotominor.yy346.pExpr ){
+    yygotominor.yy346.pExpr->affinity = OE_Ignore;
+  }
+  yygotominor.yy346.zStart = yymsp[-3].minor.yy0.z;
+  yygotominor.yy346.zEnd = &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n];
+}
+        break;
+      case 289: /* expr ::= RAISE LP raisetype COMMA nm RP */
+{
+  yygotominor.yy346.pExpr = sqlite3PExpr(pParse, TK_RAISE, 0, 0, &yymsp[-1].minor.yy0); 
+  if( yygotominor.yy346.pExpr ) {
+    yygotominor.yy346.pExpr->affinity = (char)yymsp[-3].minor.yy328;
+  }
+  yygotominor.yy346.zStart = yymsp[-5].minor.yy0.z;
+  yygotominor.yy346.zEnd = &yymsp[0].minor.yy0.z[yymsp[0].minor.yy0.n];
+}
+        break;
+      case 290: /* raisetype ::= ROLLBACK */
+{yygotominor.yy328 = OE_Rollback;}
+        break;
+      case 292: /* raisetype ::= FAIL */
+{yygotominor.yy328 = OE_Fail;}
+        break;
+      case 293: /* cmd ::= DROP TRIGGER ifexists fullname */
+{
+  sqlite3DropTrigger(pParse,yymsp[0].minor.yy65,yymsp[-1].minor.yy328);
+}
+        break;
+      case 294: /* cmd ::= ATTACH database_kw_opt expr AS expr key_opt */
+{
+  sqlite3Attach(pParse, yymsp[-3].minor.yy346.pExpr, yymsp[-1].minor.yy346.pExpr, yymsp[0].minor.yy132);
+}
+        break;
+      case 295: /* cmd ::= DETACH database_kw_opt expr */
+{
+  sqlite3Detach(pParse, yymsp[0].minor.yy346.pExpr);
+}
+        break;
+      case 300: /* cmd ::= REINDEX */
+{sqlite3Reindex(pParse, 0, 0);}
+        break;
+      case 301: /* cmd ::= REINDEX nm dbnm */
+{sqlite3Reindex(pParse, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy0);}
+        break;
+      case 302: /* cmd ::= ANALYZE */
+{sqlite3Analyze(pParse, 0, 0);}
+        break;
+      case 303: /* cmd ::= ANALYZE nm dbnm */
+{sqlite3Analyze(pParse, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy0);}
+        break;
+      case 304: /* cmd ::= ALTER TABLE fullname RENAME TO nm */
+{
+  sqlite3AlterRenameTable(pParse,yymsp[-3].minor.yy65,&yymsp[0].minor.yy0);
+}
+        break;
+      case 305: /* cmd ::= ALTER TABLE add_column_fullname ADD kwcolumn_opt column */
+{
+  sqlite3AlterFinishAddColumn(pParse, &yymsp[0].minor.yy0);
+}
+        break;
+      case 306: /* add_column_fullname ::= fullname */
+{
+  pParse->db->lookaside.bEnabled = 0;
+  sqlite3AlterBeginAddColumn(pParse, yymsp[0].minor.yy65);
+}
+        break;
+      case 309: /* cmd ::= create_vtab */
+{sqlite3VtabFinishParse(pParse,0);}
+        break;
+      case 310: /* cmd ::= create_vtab LP vtabarglist RP */
+{sqlite3VtabFinishParse(pParse,&yymsp[0].minor.yy0);}
+        break;
+      case 311: /* create_vtab ::= createkw VIRTUAL TABLE ifnotexists nm dbnm USING nm */
+{
+    sqlite3VtabBeginParse(pParse, &yymsp[-3].minor.yy0, &yymsp[-2].minor.yy0, &yymsp[0].minor.yy0, yymsp[-4].minor.yy328);
+}
+        break;
+      case 314: /* vtabarg ::= */
+{sqlite3VtabArgInit(pParse);}
+        break;
+      case 316: /* vtabargtoken ::= ANY */
+      case 317: /* vtabargtoken ::= lp anylist RP */ yytestcase(yyruleno==317);
+      case 318: /* lp ::= LP */ yytestcase(yyruleno==318);
+{sqlite3VtabArgExtend(pParse,&yymsp[0].minor.yy0);}
+        break;
+      case 322: /* with ::= */
+{yygotominor.yy59 = 0;}
+        break;
+      case 323: /* with ::= WITH wqlist */
+      case 324: /* with ::= WITH RECURSIVE wqlist */ yytestcase(yyruleno==324);
+{ yygotominor.yy59 = yymsp[0].minor.yy59; }
+        break;
+      case 325: /* wqlist ::= nm idxlist_opt AS LP select RP */
+{
+  yygotominor.yy59 = sqlite3WithAdd(pParse, 0, &yymsp[-5].minor.yy0, yymsp[-4].minor.yy14, yymsp[-1].minor.yy3);
+}
+        break;
+      case 326: /* wqlist ::= wqlist COMMA nm idxlist_opt AS LP select RP */
+{
+  yygotominor.yy59 = sqlite3WithAdd(pParse, yymsp[-7].minor.yy59, &yymsp[-5].minor.yy0, yymsp[-4].minor.yy14, yymsp[-1].minor.yy3);
+}
+        break;
+      default:
+      /* (0) input ::= cmdlist */ yytestcase(yyruleno==0);
+      /* (1) cmdlist ::= cmdlist ecmd */ yytestcase(yyruleno==1);
+      /* (2) cmdlist ::= ecmd */ yytestcase(yyruleno==2);
+      /* (3) ecmd ::= SEMI */ yytestcase(yyruleno==3);
+      /* (4) ecmd ::= explain cmdx SEMI */ yytestcase(yyruleno==4);
+      /* (10) trans_opt ::= */ yytestcase(yyruleno==10);
+      /* (11) trans_opt ::= TRANSACTION */ yytestcase(yyruleno==11);
+      /* (12) trans_opt ::= TRANSACTION nm */ yytestcase(yyruleno==12);
+      /* (20) savepoint_opt ::= SAVEPOINT */ yytestcase(yyruleno==20);
+      /* (21) savepoint_opt ::= */ yytestcase(yyruleno==21);
+      /* (25) cmd ::= create_table create_table_args */ yytestcase(yyruleno==25);
+      /* (36) columnlist ::= columnlist COMMA column */ yytestcase(yyruleno==36);
+      /* (37) columnlist ::= column */ yytestcase(yyruleno==37);
+      /* (43) type ::= */ yytestcase(yyruleno==43);
+      /* (50) signed ::= plus_num */ yytestcase(yyruleno==50);
+      /* (51) signed ::= minus_num */ yytestcase(yyruleno==51);
+      /* (52) carglist ::= carglist ccons */ yytestcase(yyruleno==52);
+      /* (53) carglist ::= */ yytestcase(yyruleno==53);
+      /* (60) ccons ::= NULL onconf */ yytestcase(yyruleno==60);
+      /* (88) conslist ::= conslist tconscomma tcons */ yytestcase(yyruleno==88);
+      /* (89) conslist ::= tcons */ yytestcase(yyruleno==89);
+      /* (91) tconscomma ::= */ yytestcase(yyruleno==91);
+      /* (273) foreach_clause ::= */ yytestcase(yyruleno==273);
+      /* (274) foreach_clause ::= FOR EACH ROW */ yytestcase(yyruleno==274);
+      /* (281) tridxby ::= */ yytestcase(yyruleno==281);
+      /* (298) database_kw_opt ::= DATABASE */ yytestcase(yyruleno==298);
+      /* (299) database_kw_opt ::= */ yytestcase(yyruleno==299);
+      /* (307) kwcolumn_opt ::= */ yytestcase(yyruleno==307);
+      /* (308) kwcolumn_opt ::= COLUMNKW */ yytestcase(yyruleno==308);
+      /* (312) vtabarglist ::= vtabarg */ yytestcase(yyruleno==312);
+      /* (313) vtabarglist ::= vtabarglist COMMA vtabarg */ yytestcase(yyruleno==313);
+      /* (315) vtabarg ::= vtabarg vtabargtoken */ yytestcase(yyruleno==315);
+      /* (319) anylist ::= */ yytestcase(yyruleno==319);
+      /* (320) anylist ::= anylist LP anylist RP */ yytestcase(yyruleno==320);
+      /* (321) anylist ::= anylist ANY */ yytestcase(yyruleno==321);
+        break;
+  };
+  assert( yyruleno>=0 && yyruleno<sizeof(yyRuleInfo)/sizeof(yyRuleInfo[0]) );
+  yygoto = yyRuleInfo[yyruleno].lhs;
+  yysize = yyRuleInfo[yyruleno].nrhs;
+  yypParser->yyidx -= yysize;
+  yyact = yy_find_reduce_action(yymsp[-yysize].stateno,(YYCODETYPE)yygoto);
+  if( yyact < YYNSTATE ){
+#ifdef NDEBUG
+    /* If we are not debugging and the reduce action popped at least
+    ** one element off the stack, then we can push the new element back
+    ** onto the stack here, and skip the stack overflow test in yy_shift().
+    ** That gives a significant speed improvement. */
+    if( yysize ){
+      yypParser->yyidx++;
+      yymsp -= yysize-1;
+      yymsp->stateno = (YYACTIONTYPE)yyact;
+      yymsp->major = (YYCODETYPE)yygoto;
+      yymsp->minor = yygotominor;
+    }else
+#endif
+    {
+      yy_shift(yypParser,yyact,yygoto,&yygotominor);
+    }
+  }else{
+    assert( yyact == YYNSTATE + YYNRULE + 1 );
+    yy_accept(yypParser);
+  }
+}
+
+/*
+** The following code executes when the parse fails
+*/
+#ifndef YYNOERRORRECOVERY
+static void yy_parse_failed(
+  yyParser *yypParser           /* The parser */
+){
+  sqlite3ParserARG_FETCH;
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt);
+  }
+#endif
+  while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser);
+  /* Here code is inserted which will be executed whenever the
+  ** parser fails */
+  sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */
+}
+#endif /* YYNOERRORRECOVERY */
+
+/*
+** The following code executes when a syntax error first occurs.
+*/
+static void yy_syntax_error(
+  yyParser *yypParser,           /* The parser */
+  int yymajor,                   /* The major type of the error token */
+  YYMINORTYPE yyminor            /* The minor type of the error token */
+){
+  sqlite3ParserARG_FETCH;
+#define TOKEN (yyminor.yy0)
+
+  UNUSED_PARAMETER(yymajor);  /* Silence some compiler warnings */
+  assert( TOKEN.z[0] );  /* The tokenizer always gives us a token */
+  sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &TOKEN);
+  sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */
+}
+
+/*
+** The following is executed when the parser accepts
+*/
+static void yy_accept(
+  yyParser *yypParser           /* The parser */
+){
+  sqlite3ParserARG_FETCH;
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt);
+  }
+#endif
+  while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser);
+  /* Here code is inserted which will be executed whenever the
+  ** parser accepts */
+  sqlite3ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */
+}
+
+/* The main parser program.
+** The first argument is a pointer to a structure obtained from
+** "sqlite3ParserAlloc" which describes the current state of the parser.
+** The second argument is the major token number.  The third is
+** the minor token.  The fourth optional argument is whatever the
+** user wants (and specified in the grammar) and is available for
+** use by the action routines.
+**
+** Inputs:
+** <ul>
+** <li> A pointer to the parser (an opaque structure.)
+** <li> The major token number.
+** <li> The minor token number.
+** <li> An option argument of a grammar-specified type.
+** </ul>
+**
+** Outputs:
+** None.
+*/
+SQLITE_PRIVATE void sqlite3Parser(
+  void *yyp,                   /* The parser */
+  int yymajor,                 /* The major token code number */
+  sqlite3ParserTOKENTYPE yyminor       /* The value for the token */
+  sqlite3ParserARG_PDECL               /* Optional %extra_argument parameter */
+){
+  YYMINORTYPE yyminorunion;
+  int yyact;            /* The parser action. */
+#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
+  int yyendofinput;     /* True if we are at the end of input */
+#endif
+#ifdef YYERRORSYMBOL
+  int yyerrorhit = 0;   /* True if yymajor has invoked an error */
+#endif
+  yyParser *yypParser;  /* The parser */
+
+  /* (re)initialize the parser, if necessary */
+  yypParser = (yyParser*)yyp;
+  if( yypParser->yyidx<0 ){
+#if YYSTACKDEPTH<=0
+    if( yypParser->yystksz <=0 ){
+      /*memset(&yyminorunion, 0, sizeof(yyminorunion));*/
+      yyminorunion = yyzerominor;
+      yyStackOverflow(yypParser, &yyminorunion);
+      return;
+    }
+#endif
+    yypParser->yyidx = 0;
+    yypParser->yyerrcnt = -1;
+    yypParser->yystack[0].stateno = 0;
+    yypParser->yystack[0].major = 0;
+  }
+  yyminorunion.yy0 = yyminor;
+#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
+  yyendofinput = (yymajor==0);
+#endif
+  sqlite3ParserARG_STORE;
+
+#ifndef NDEBUG
+  if( yyTraceFILE ){
+    fprintf(yyTraceFILE,"%sInput %s\n",yyTracePrompt,yyTokenName[yymajor]);
+  }
+#endif
+
+  do{
+    yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor);
+    if( yyact<YYNSTATE ){
+      yy_shift(yypParser,yyact,yymajor,&yyminorunion);
+      yypParser->yyerrcnt--;
+      yymajor = YYNOCODE;
+    }else if( yyact < YYNSTATE + YYNRULE ){
+      yy_reduce(yypParser,yyact-YYNSTATE);
+    }else{
+      assert( yyact == YY_ERROR_ACTION );
+#ifdef YYERRORSYMBOL
+      int yymx;
+#endif
+#ifndef NDEBUG
+      if( yyTraceFILE ){
+        fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt);
+      }
+#endif
+#ifdef YYERRORSYMBOL
+      /* A syntax error has occurred.
+      ** The response to an error depends upon whether or not the
+      ** grammar defines an error token "ERROR".  
+      **
+      ** This is what we do if the grammar does define ERROR:
+      **
+      **  * Call the %syntax_error function.
+      **
+      **  * Begin popping the stack until we enter a state where
+      **    it is legal to shift the error symbol, then shift
+      **    the error symbol.
+      **
+      **  * Set the error count to three.
+      **
+      **  * Begin accepting and shifting new tokens.  No new error
+      **    processing will occur until three tokens have been
+      **    shifted successfully.
+      **
+      */
+      if( yypParser->yyerrcnt<0 ){
+        yy_syntax_error(yypParser,yymajor,yyminorunion);
+      }
+      yymx = yypParser->yystack[yypParser->yyidx].major;
+      if( yymx==YYERRORSYMBOL || yyerrorhit ){
+#ifndef NDEBUG
+        if( yyTraceFILE ){
+          fprintf(yyTraceFILE,"%sDiscard input token %s\n",
+             yyTracePrompt,yyTokenName[yymajor]);
+        }
+#endif
+        yy_destructor(yypParser, (YYCODETYPE)yymajor,&yyminorunion);
+        yymajor = YYNOCODE;
+      }else{
+         while(
+          yypParser->yyidx >= 0 &&
+          yymx != YYERRORSYMBOL &&
+          (yyact = yy_find_reduce_action(
+                        yypParser->yystack[yypParser->yyidx].stateno,
+                        YYERRORSYMBOL)) >= YYNSTATE
+        ){
+          yy_pop_parser_stack(yypParser);
+        }
+        if( yypParser->yyidx < 0 || yymajor==0 ){
+          yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
+          yy_parse_failed(yypParser);
+          yymajor = YYNOCODE;
+        }else if( yymx!=YYERRORSYMBOL ){
+          YYMINORTYPE u2;
+          u2.YYERRSYMDT = 0;
+          yy_shift(yypParser,yyact,YYERRORSYMBOL,&u2);
+        }
+      }
+      yypParser->yyerrcnt = 3;
+      yyerrorhit = 1;
+#elif defined(YYNOERRORRECOVERY)
+      /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to
+      ** do any kind of error recovery.  Instead, simply invoke the syntax
+      ** error routine and continue going as if nothing had happened.
+      **
+      ** Applications can set this macro (for example inside %include) if
+      ** they intend to abandon the parse upon the first syntax error seen.
+      */
+      yy_syntax_error(yypParser,yymajor,yyminorunion);
+      yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
+      yymajor = YYNOCODE;
+      
+#else  /* YYERRORSYMBOL is not defined */
+      /* This is what we do if the grammar does not define ERROR:
+      **
+      **  * Report an error message, and throw away the input token.
+      **
+      **  * If the input token is $, then fail the parse.
+      **
+      ** As before, subsequent error messages are suppressed until
+      ** three input tokens have been successfully shifted.
+      */
+      if( yypParser->yyerrcnt<=0 ){
+        yy_syntax_error(yypParser,yymajor,yyminorunion);
+      }
+      yypParser->yyerrcnt = 3;
+      yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion);
+      if( yyendofinput ){
+        yy_parse_failed(yypParser);
+      }
+      yymajor = YYNOCODE;
+#endif
+    }
+  }while( yymajor!=YYNOCODE && yypParser->yyidx>=0 );
+  return;
+}
+
+/************** End of parse.c ***********************************************/
+/************** Begin file tokenize.c ****************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** An tokenizer for SQL
+**
+** This file contains C code that splits an SQL input string up into
+** individual tokens and sends those tokens one-by-one over to the
+** parser for analysis.
+*/
+/* #include <stdlib.h> */
+
+/*
+** The charMap() macro maps alphabetic characters into their
+** lower-case ASCII equivalent.  On ASCII machines, this is just
+** an upper-to-lower case map.  On EBCDIC machines we also need
+** to adjust the encoding.  Only alphabetic characters and underscores
+** need to be translated.
+*/
+#ifdef SQLITE_ASCII
+# define charMap(X) sqlite3UpperToLower[(unsigned char)X]
+#endif
+#ifdef SQLITE_EBCDIC
+# define charMap(X) ebcdicToAscii[(unsigned char)X]
+const unsigned char ebcdicToAscii[] = {
+/* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 0x */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 1x */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 2x */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 3x */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 4x */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 5x */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 95,  0,  0,  /* 6x */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 7x */
+   0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* 8x */
+   0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* 9x */
+   0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ax */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Bx */
+   0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* Cx */
+   0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* Dx */
+   0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ex */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Fx */
+};
+#endif
+
+/*
+** The sqlite3KeywordCode function looks up an identifier to determine if
+** it is a keyword.  If it is a keyword, the token code of that keyword is 
+** returned.  If the input is not a keyword, TK_ID is returned.
+**
+** The implementation of this routine was generated by a program,
+** mkkeywordhash.h, located in the tool subdirectory of the distribution.
+** The output of the mkkeywordhash.c program is written into a file
+** named keywordhash.h and then included into this source file by
+** the #include below.
+*/
+/************** Include keywordhash.h in the middle of tokenize.c ************/
+/************** Begin file keywordhash.h *************************************/
+/***** This file contains automatically generated code ******
+**
+** The code in this file has been automatically generated by
+**
+**   sqlite/tool/mkkeywordhash.c
+**
+** The code in this file implements a function that determines whether
+** or not a given identifier is really an SQL keyword.  The same thing
+** might be implemented more directly using a hand-written hash table.
+** But by using this automatically generated code, the size of the code
+** is substantially reduced.  This is important for embedded applications
+** on platforms with limited memory.
+*/
+/* Hash score: 182 */
+static int keywordCode(const char *z, int n){
+  /* zText[] encodes 834 bytes of keywords in 554 bytes */
+  /*   REINDEXEDESCAPEACHECKEYBEFOREIGNOREGEXPLAINSTEADDATABASELECT       */
+  /*   ABLEFTHENDEFERRABLELSEXCEPTRANSACTIONATURALTERAISEXCLUSIVE         */
+  /*   XISTSAVEPOINTERSECTRIGGEREFERENCESCONSTRAINTOFFSETEMPORARY         */
+  /*   UNIQUERYWITHOUTERELEASEATTACHAVINGROUPDATEBEGINNERECURSIVE         */
+  /*   BETWEENOTNULLIKECASCADELETECASECOLLATECREATECURRENT_DATEDETACH     */
+  /*   IMMEDIATEJOINSERTMATCHPLANALYZEPRAGMABORTVALUESVIRTUALIMITWHEN     */
+  /*   WHERENAMEAFTEREPLACEANDEFAULTAUTOINCREMENTCASTCOLUMNCOMMIT         */
+  /*   CONFLICTCROSSCURRENT_TIMESTAMPRIMARYDEFERREDISTINCTDROPFAIL        */
+  /*   FROMFULLGLOBYIFISNULLORDERESTRICTRIGHTROLLBACKROWUNIONUSING        */
+  /*   VACUUMVIEWINITIALLY                                                */
+  static const char zText[553] = {
+    'R','E','I','N','D','E','X','E','D','E','S','C','A','P','E','A','C','H',
+    'E','C','K','E','Y','B','E','F','O','R','E','I','G','N','O','R','E','G',
+    'E','X','P','L','A','I','N','S','T','E','A','D','D','A','T','A','B','A',
+    'S','E','L','E','C','T','A','B','L','E','F','T','H','E','N','D','E','F',
+    'E','R','R','A','B','L','E','L','S','E','X','C','E','P','T','R','A','N',
+    'S','A','C','T','I','O','N','A','T','U','R','A','L','T','E','R','A','I',
+    'S','E','X','C','L','U','S','I','V','E','X','I','S','T','S','A','V','E',
+    'P','O','I','N','T','E','R','S','E','C','T','R','I','G','G','E','R','E',
+    'F','E','R','E','N','C','E','S','C','O','N','S','T','R','A','I','N','T',
+    'O','F','F','S','E','T','E','M','P','O','R','A','R','Y','U','N','I','Q',
+    'U','E','R','Y','W','I','T','H','O','U','T','E','R','E','L','E','A','S',
+    'E','A','T','T','A','C','H','A','V','I','N','G','R','O','U','P','D','A',
+    'T','E','B','E','G','I','N','N','E','R','E','C','U','R','S','I','V','E',
+    'B','E','T','W','E','E','N','O','T','N','U','L','L','I','K','E','C','A',
+    'S','C','A','D','E','L','E','T','E','C','A','S','E','C','O','L','L','A',
+    'T','E','C','R','E','A','T','E','C','U','R','R','E','N','T','_','D','A',
+    'T','E','D','E','T','A','C','H','I','M','M','E','D','I','A','T','E','J',
+    'O','I','N','S','E','R','T','M','A','T','C','H','P','L','A','N','A','L',
+    'Y','Z','E','P','R','A','G','M','A','B','O','R','T','V','A','L','U','E',
+    'S','V','I','R','T','U','A','L','I','M','I','T','W','H','E','N','W','H',
+    'E','R','E','N','A','M','E','A','F','T','E','R','E','P','L','A','C','E',
+    'A','N','D','E','F','A','U','L','T','A','U','T','O','I','N','C','R','E',
+    'M','E','N','T','C','A','S','T','C','O','L','U','M','N','C','O','M','M',
+    'I','T','C','O','N','F','L','I','C','T','C','R','O','S','S','C','U','R',
+    'R','E','N','T','_','T','I','M','E','S','T','A','M','P','R','I','M','A',
+    'R','Y','D','E','F','E','R','R','E','D','I','S','T','I','N','C','T','D',
+    'R','O','P','F','A','I','L','F','R','O','M','F','U','L','L','G','L','O',
+    'B','Y','I','F','I','S','N','U','L','L','O','R','D','E','R','E','S','T',
+    'R','I','C','T','R','I','G','H','T','R','O','L','L','B','A','C','K','R',
+    'O','W','U','N','I','O','N','U','S','I','N','G','V','A','C','U','U','M',
+    'V','I','E','W','I','N','I','T','I','A','L','L','Y',
+  };
+  static const unsigned char aHash[127] = {
+      76, 105, 117,  74,   0,  45,   0,   0,  82,   0,  77,   0,   0,
+      42,  12,  78,  15,   0, 116,  85,  54, 112,   0,  19,   0,   0,
+     121,   0, 119, 115,   0,  22,  93,   0,   9,   0,   0,  70,  71,
+       0,  69,   6,   0,  48,  90, 102,   0, 118, 101,   0,   0,  44,
+       0, 103,  24,   0,  17,   0, 122,  53,  23,   0,   5, 110,  25,
+      96,   0,   0, 124, 106,  60, 123,  57,  28,  55,   0,  91,   0,
+     100,  26,   0,  99,   0,   0,   0,  95,  92,  97,  88, 109,  14,
+      39, 108,   0,  81,   0,  18,  89, 111,  32,   0, 120,  80, 113,
+      62,  46,  84,   0,   0,  94,  40,  59, 114,   0,  36,   0,   0,
+      29,   0,  86,  63,  64,   0,  20,  61,   0,  56,
+  };
+  static const unsigned char aNext[124] = {
+       0,   0,   0,   0,   4,   0,   0,   0,   0,   0,   0,   0,   0,
+       0,   2,   0,   0,   0,   0,   0,   0,  13,   0,   0,   0,   0,
+       0,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+       0,   0,   0,   0,  33,   0,  21,   0,   0,   0,   0,   0,  50,
+       0,  43,   3,  47,   0,   0,   0,   0,  30,   0,  58,   0,  38,
+       0,   0,   0,   1,  66,   0,   0,  67,   0,  41,   0,   0,   0,
+       0,   0,   0,  49,  65,   0,   0,   0,   0,  31,  52,  16,  34,
+      10,   0,   0,   0,   0,   0,   0,   0,  11,  72,  79,   0,   8,
+       0, 104,  98,   0, 107,   0,  87,   0,  75,  51,   0,  27,  37,
+      73,  83,   0,  35,  68,   0,   0,
+  };
+  static const unsigned char aLen[124] = {
+       7,   7,   5,   4,   6,   4,   5,   3,   6,   7,   3,   6,   6,
+       7,   7,   3,   8,   2,   6,   5,   4,   4,   3,  10,   4,   6,
+      11,   6,   2,   7,   5,   5,   9,   6,   9,   9,   7,  10,  10,
+       4,   6,   2,   3,   9,   4,   2,   6,   5,   7,   4,   5,   7,
+       6,   6,   5,   6,   5,   5,   9,   7,   7,   3,   2,   4,   4,
+       7,   3,   6,   4,   7,   6,  12,   6,   9,   4,   6,   5,   4,
+       7,   6,   5,   6,   7,   5,   4,   5,   6,   5,   7,   3,   7,
+      13,   2,   2,   4,   6,   6,   8,   5,  17,  12,   7,   8,   8,
+       2,   4,   4,   4,   4,   4,   2,   2,   6,   5,   8,   5,   8,
+       3,   5,   5,   6,   4,   9,   3,
+  };
+  static const unsigned short int aOffset[124] = {
+       0,   2,   2,   8,   9,  14,  16,  20,  23,  25,  25,  29,  33,
+      36,  41,  46,  48,  53,  54,  59,  62,  65,  67,  69,  78,  81,
+      86,  91,  95,  96, 101, 105, 109, 117, 122, 128, 136, 142, 152,
+     159, 162, 162, 165, 167, 167, 171, 176, 179, 184, 184, 188, 192,
+     199, 204, 209, 212, 218, 221, 225, 234, 240, 240, 240, 243, 246,
+     250, 251, 255, 261, 265, 272, 278, 290, 296, 305, 307, 313, 318,
+     320, 327, 332, 337, 343, 349, 354, 358, 361, 367, 371, 378, 380,
+     387, 389, 391, 400, 404, 410, 416, 424, 429, 429, 445, 452, 459,
+     460, 467, 471, 475, 479, 483, 486, 488, 490, 496, 500, 508, 513,
+     521, 524, 529, 534, 540, 544, 549,
+  };
+  static const unsigned char aCode[124] = {
+    TK_REINDEX,    TK_INDEXED,    TK_INDEX,      TK_DESC,       TK_ESCAPE,     
+    TK_EACH,       TK_CHECK,      TK_KEY,        TK_BEFORE,     TK_FOREIGN,    
+    TK_FOR,        TK_IGNORE,     TK_LIKE_KW,    TK_EXPLAIN,    TK_INSTEAD,    
+    TK_ADD,        TK_DATABASE,   TK_AS,         TK_SELECT,     TK_TABLE,      
+    TK_JOIN_KW,    TK_THEN,       TK_END,        TK_DEFERRABLE, TK_ELSE,       
+    TK_EXCEPT,     TK_TRANSACTION,TK_ACTION,     TK_ON,         TK_JOIN_KW,    
+    TK_ALTER,      TK_RAISE,      TK_EXCLUSIVE,  TK_EXISTS,     TK_SAVEPOINT,  
+    TK_INTERSECT,  TK_TRIGGER,    TK_REFERENCES, TK_CONSTRAINT, TK_INTO,       
+    TK_OFFSET,     TK_OF,         TK_SET,        TK_TEMP,       TK_TEMP,       
+    TK_OR,         TK_UNIQUE,     TK_QUERY,      TK_WITHOUT,    TK_WITH,       
+    TK_JOIN_KW,    TK_RELEASE,    TK_ATTACH,     TK_HAVING,     TK_GROUP,      
+    TK_UPDATE,     TK_BEGIN,      TK_JOIN_KW,    TK_RECURSIVE,  TK_BETWEEN,    
+    TK_NOTNULL,    TK_NOT,        TK_NO,         TK_NULL,       TK_LIKE_KW,    
+    TK_CASCADE,    TK_ASC,        TK_DELETE,     TK_CASE,       TK_COLLATE,    
+    TK_CREATE,     TK_CTIME_KW,   TK_DETACH,     TK_IMMEDIATE,  TK_JOIN,       
+    TK_INSERT,     TK_MATCH,      TK_PLAN,       TK_ANALYZE,    TK_PRAGMA,     
+    TK_ABORT,      TK_VALUES,     TK_VIRTUAL,    TK_LIMIT,      TK_WHEN,       
+    TK_WHERE,      TK_RENAME,     TK_AFTER,      TK_REPLACE,    TK_AND,        
+    TK_DEFAULT,    TK_AUTOINCR,   TK_TO,         TK_IN,         TK_CAST,       
+    TK_COLUMNKW,   TK_COMMIT,     TK_CONFLICT,   TK_JOIN_KW,    TK_CTIME_KW,   
+    TK_CTIME_KW,   TK_PRIMARY,    TK_DEFERRED,   TK_DISTINCT,   TK_IS,         
+    TK_DROP,       TK_FAIL,       TK_FROM,       TK_JOIN_KW,    TK_LIKE_KW,    
+    TK_BY,         TK_IF,         TK_ISNULL,     TK_ORDER,      TK_RESTRICT,   
+    TK_JOIN_KW,    TK_ROLLBACK,   TK_ROW,        TK_UNION,      TK_USING,      
+    TK_VACUUM,     TK_VIEW,       TK_INITIALLY,  TK_ALL,        
+  };
+  int h, i;
+  if( n<2 ) return TK_ID;
+  h = ((charMap(z[0])*4) ^
+      (charMap(z[n-1])*3) ^
+      n) % 127;
+  for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){
+    if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){
+      testcase( i==0 ); /* REINDEX */
+      testcase( i==1 ); /* INDEXED */
+      testcase( i==2 ); /* INDEX */
+      testcase( i==3 ); /* DESC */
+      testcase( i==4 ); /* ESCAPE */
+      testcase( i==5 ); /* EACH */
+      testcase( i==6 ); /* CHECK */
+      testcase( i==7 ); /* KEY */
+      testcase( i==8 ); /* BEFORE */
+      testcase( i==9 ); /* FOREIGN */
+      testcase( i==10 ); /* FOR */
+      testcase( i==11 ); /* IGNORE */
+      testcase( i==12 ); /* REGEXP */
+      testcase( i==13 ); /* EXPLAIN */
+      testcase( i==14 ); /* INSTEAD */
+      testcase( i==15 ); /* ADD */
+      testcase( i==16 ); /* DATABASE */
+      testcase( i==17 ); /* AS */
+      testcase( i==18 ); /* SELECT */
+      testcase( i==19 ); /* TABLE */
+      testcase( i==20 ); /* LEFT */
+      testcase( i==21 ); /* THEN */
+      testcase( i==22 ); /* END */
+      testcase( i==23 ); /* DEFERRABLE */
+      testcase( i==24 ); /* ELSE */
+      testcase( i==25 ); /* EXCEPT */
+      testcase( i==26 ); /* TRANSACTION */
+      testcase( i==27 ); /* ACTION */
+      testcase( i==28 ); /* ON */
+      testcase( i==29 ); /* NATURAL */
+      testcase( i==30 ); /* ALTER */
+      testcase( i==31 ); /* RAISE */
+      testcase( i==32 ); /* EXCLUSIVE */
+      testcase( i==33 ); /* EXISTS */
+      testcase( i==34 ); /* SAVEPOINT */
+      testcase( i==35 ); /* INTERSECT */
+      testcase( i==36 ); /* TRIGGER */
+      testcase( i==37 ); /* REFERENCES */
+      testcase( i==38 ); /* CONSTRAINT */
+      testcase( i==39 ); /* INTO */
+      testcase( i==40 ); /* OFFSET */
+      testcase( i==41 ); /* OF */
+      testcase( i==42 ); /* SET */
+      testcase( i==43 ); /* TEMPORARY */
+      testcase( i==44 ); /* TEMP */
+      testcase( i==45 ); /* OR */
+      testcase( i==46 ); /* UNIQUE */
+      testcase( i==47 ); /* QUERY */
+      testcase( i==48 ); /* WITHOUT */
+      testcase( i==49 ); /* WITH */
+      testcase( i==50 ); /* OUTER */
+      testcase( i==51 ); /* RELEASE */
+      testcase( i==52 ); /* ATTACH */
+      testcase( i==53 ); /* HAVING */
+      testcase( i==54 ); /* GROUP */
+      testcase( i==55 ); /* UPDATE */
+      testcase( i==56 ); /* BEGIN */
+      testcase( i==57 ); /* INNER */
+      testcase( i==58 ); /* RECURSIVE */
+      testcase( i==59 ); /* BETWEEN */
+      testcase( i==60 ); /* NOTNULL */
+      testcase( i==61 ); /* NOT */
+      testcase( i==62 ); /* NO */
+      testcase( i==63 ); /* NULL */
+      testcase( i==64 ); /* LIKE */
+      testcase( i==65 ); /* CASCADE */
+      testcase( i==66 ); /* ASC */
+      testcase( i==67 ); /* DELETE */
+      testcase( i==68 ); /* CASE */
+      testcase( i==69 ); /* COLLATE */
+      testcase( i==70 ); /* CREATE */
+      testcase( i==71 ); /* CURRENT_DATE */
+      testcase( i==72 ); /* DETACH */
+      testcase( i==73 ); /* IMMEDIATE */
+      testcase( i==74 ); /* JOIN */
+      testcase( i==75 ); /* INSERT */
+      testcase( i==76 ); /* MATCH */
+      testcase( i==77 ); /* PLAN */
+      testcase( i==78 ); /* ANALYZE */
+      testcase( i==79 ); /* PRAGMA */
+      testcase( i==80 ); /* ABORT */
+      testcase( i==81 ); /* VALUES */
+      testcase( i==82 ); /* VIRTUAL */
+      testcase( i==83 ); /* LIMIT */
+      testcase( i==84 ); /* WHEN */
+      testcase( i==85 ); /* WHERE */
+      testcase( i==86 ); /* RENAME */
+      testcase( i==87 ); /* AFTER */
+      testcase( i==88 ); /* REPLACE */
+      testcase( i==89 ); /* AND */
+      testcase( i==90 ); /* DEFAULT */
+      testcase( i==91 ); /* AUTOINCREMENT */
+      testcase( i==92 ); /* TO */
+      testcase( i==93 ); /* IN */
+      testcase( i==94 ); /* CAST */
+      testcase( i==95 ); /* COLUMN */
+      testcase( i==96 ); /* COMMIT */
+      testcase( i==97 ); /* CONFLICT */
+      testcase( i==98 ); /* CROSS */
+      testcase( i==99 ); /* CURRENT_TIMESTAMP */
+      testcase( i==100 ); /* CURRENT_TIME */
+      testcase( i==101 ); /* PRIMARY */
+      testcase( i==102 ); /* DEFERRED */
+      testcase( i==103 ); /* DISTINCT */
+      testcase( i==104 ); /* IS */
+      testcase( i==105 ); /* DROP */
+      testcase( i==106 ); /* FAIL */
+      testcase( i==107 ); /* FROM */
+      testcase( i==108 ); /* FULL */
+      testcase( i==109 ); /* GLOB */
+      testcase( i==110 ); /* BY */
+      testcase( i==111 ); /* IF */
+      testcase( i==112 ); /* ISNULL */
+      testcase( i==113 ); /* ORDER */
+      testcase( i==114 ); /* RESTRICT */
+      testcase( i==115 ); /* RIGHT */
+      testcase( i==116 ); /* ROLLBACK */
+      testcase( i==117 ); /* ROW */
+      testcase( i==118 ); /* UNION */
+      testcase( i==119 ); /* USING */
+      testcase( i==120 ); /* VACUUM */
+      testcase( i==121 ); /* VIEW */
+      testcase( i==122 ); /* INITIALLY */
+      testcase( i==123 ); /* ALL */
+      return aCode[i];
+    }
+  }
+  return TK_ID;
+}
+SQLITE_PRIVATE int sqlite3KeywordCode(const unsigned char *z, int n){
+  return keywordCode((char*)z, n);
+}
+#define SQLITE_N_KEYWORD 124
+
+/************** End of keywordhash.h *****************************************/
+/************** Continuing where we left off in tokenize.c *******************/
+
+
+/*
+** If X is a character that can be used in an identifier then
+** IdChar(X) will be true.  Otherwise it is false.
+**
+** For ASCII, any character with the high-order bit set is
+** allowed in an identifier.  For 7-bit characters, 
+** sqlite3IsIdChar[X] must be 1.
+**
+** For EBCDIC, the rules are more complex but have the same
+** end result.
+**
+** Ticket #1066.  the SQL standard does not allow '$' in the
+** middle of identifiers.  But many SQL implementations do. 
+** SQLite will allow '$' in identifiers for compatibility.
+** But the feature is undocumented.
+*/
+#ifdef SQLITE_ASCII
+#define IdChar(C)  ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
+#endif
+#ifdef SQLITE_EBCDIC
+SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 4x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,  /* 5x */
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,  /* 6x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,  /* 7x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,  /* 8x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,  /* 9x */
+    1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,  /* Ax */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* Bx */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Cx */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Dx */
+    0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Ex */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,  /* Fx */
+};
+#define IdChar(C)  (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
+#endif
+SQLITE_PRIVATE int sqlite3IsIdChar(u8 c){ return IdChar(c); }
+
+
+/*
+** Return the length of the token that begins at z[0]. 
+** Store the token type in *tokenType before returning.
+*/
+SQLITE_PRIVATE int sqlite3GetToken(const unsigned char *z, int *tokenType){
+  int i, c;
+  switch( *z ){
+    case ' ': case '\t': case '\n': case '\f': case '\r': {
+      testcase( z[0]==' ' );
+      testcase( z[0]=='\t' );
+      testcase( z[0]=='\n' );
+      testcase( z[0]=='\f' );
+      testcase( z[0]=='\r' );
+      for(i=1; sqlite3Isspace(z[i]); i++){}
+      *tokenType = TK_SPACE;
+      return i;
+    }
+    case '-': {
+      if( z[1]=='-' ){
+        for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
+        *tokenType = TK_SPACE;   /* IMP: R-22934-25134 */
+        return i;
+      }
+      *tokenType = TK_MINUS;
+      return 1;
+    }
+    case '(': {
+      *tokenType = TK_LP;
+      return 1;
+    }
+    case ')': {
+      *tokenType = TK_RP;
+      return 1;
+    }
+    case ';': {
+      *tokenType = TK_SEMI;
+      return 1;
+    }
+    case '+': {
+      *tokenType = TK_PLUS;
+      return 1;
+    }
+    case '*': {
+      *tokenType = TK_STAR;
+      return 1;
+    }
+    case '/': {
+      if( z[1]!='*' || z[2]==0 ){
+        *tokenType = TK_SLASH;
+        return 1;
+      }
+      for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
+      if( c ) i++;
+      *tokenType = TK_SPACE;   /* IMP: R-22934-25134 */
+      return i;
+    }
+    case '%': {
+      *tokenType = TK_REM;
+      return 1;
+    }
+    case '=': {
+      *tokenType = TK_EQ;
+      return 1 + (z[1]=='=');
+    }
+    case '<': {
+      if( (c=z[1])=='=' ){
+        *tokenType = TK_LE;
+        return 2;
+      }else if( c=='>' ){
+        *tokenType = TK_NE;
+        return 2;
+      }else if( c=='<' ){
+        *tokenType = TK_LSHIFT;
+        return 2;
+      }else{
+        *tokenType = TK_LT;
+        return 1;
+      }
+    }
+    case '>': {
+      if( (c=z[1])=='=' ){
+        *tokenType = TK_GE;
+        return 2;
+      }else if( c=='>' ){
+        *tokenType = TK_RSHIFT;
+        return 2;
+      }else{
+        *tokenType = TK_GT;
+        return 1;
+      }
+    }
+    case '!': {
+      if( z[1]!='=' ){
+        *tokenType = TK_ILLEGAL;
+        return 2;
+      }else{
+        *tokenType = TK_NE;
+        return 2;
+      }
+    }
+    case '|': {
+      if( z[1]!='|' ){
+        *tokenType = TK_BITOR;
+        return 1;
+      }else{
+        *tokenType = TK_CONCAT;
+        return 2;
+      }
+    }
+    case ',': {
+      *tokenType = TK_COMMA;
+      return 1;
+    }
+    case '&': {
+      *tokenType = TK_BITAND;
+      return 1;
+    }
+    case '~': {
+      *tokenType = TK_BITNOT;
+      return 1;
+    }
+    case '`':
+    case '\'':
+    case '"': {
+      int delim = z[0];
+      testcase( delim=='`' );
+      testcase( delim=='\'' );
+      testcase( delim=='"' );
+      for(i=1; (c=z[i])!=0; i++){
+        if( c==delim ){
+          if( z[i+1]==delim ){
+            i++;
+          }else{
+            break;
+          }
+        }
+      }
+      if( c=='\'' ){
+        *tokenType = TK_STRING;
+        return i+1;
+      }else if( c!=0 ){
+        *tokenType = TK_ID;
+        return i+1;
+      }else{
+        *tokenType = TK_ILLEGAL;
+        return i;
+      }
+    }
+    case '.': {
+#ifndef SQLITE_OMIT_FLOATING_POINT
+      if( !sqlite3Isdigit(z[1]) )
+#endif
+      {
+        *tokenType = TK_DOT;
+        return 1;
+      }
+      /* If the next character is a digit, this is a floating point
+      ** number that begins with ".".  Fall thru into the next case */
+    }
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9': {
+      testcase( z[0]=='0' );  testcase( z[0]=='1' );  testcase( z[0]=='2' );
+      testcase( z[0]=='3' );  testcase( z[0]=='4' );  testcase( z[0]=='5' );
+      testcase( z[0]=='6' );  testcase( z[0]=='7' );  testcase( z[0]=='8' );
+      testcase( z[0]=='9' );
+      *tokenType = TK_INTEGER;
+#ifndef SQLITE_OMIT_HEX_INTEGER
+      if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){
+        for(i=3; sqlite3Isxdigit(z[i]); i++){}
+        return i;
+      }
+#endif
+      for(i=0; sqlite3Isdigit(z[i]); i++){}
+#ifndef SQLITE_OMIT_FLOATING_POINT
+      if( z[i]=='.' ){
+        i++;
+        while( sqlite3Isdigit(z[i]) ){ i++; }
+        *tokenType = TK_FLOAT;
+      }
+      if( (z[i]=='e' || z[i]=='E') &&
+           ( sqlite3Isdigit(z[i+1]) 
+            || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2]))
+           )
+      ){
+        i += 2;
+        while( sqlite3Isdigit(z[i]) ){ i++; }
+        *tokenType = TK_FLOAT;
+      }
+#endif
+      while( IdChar(z[i]) ){
+        *tokenType = TK_ILLEGAL;
+        i++;
+      }
+      return i;
+    }
+    case '[': {
+      for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
+      *tokenType = c==']' ? TK_ID : TK_ILLEGAL;
+      return i;
+    }
+    case '?': {
+      *tokenType = TK_VARIABLE;
+      for(i=1; sqlite3Isdigit(z[i]); i++){}
+      return i;
+    }
+#ifndef SQLITE_OMIT_TCL_VARIABLE
+    case '$':
+#endif
+    case '@':  /* For compatibility with MS SQL Server */
+    case '#':
+    case ':': {
+      int n = 0;
+      testcase( z[0]=='$' );  testcase( z[0]=='@' );
+      testcase( z[0]==':' );  testcase( z[0]=='#' );
+      *tokenType = TK_VARIABLE;
+      for(i=1; (c=z[i])!=0; i++){
+        if( IdChar(c) ){
+          n++;
+#ifndef SQLITE_OMIT_TCL_VARIABLE
+        }else if( c=='(' && n>0 ){
+          do{
+            i++;
+          }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' );
+          if( c==')' ){
+            i++;
+          }else{
+            *tokenType = TK_ILLEGAL;
+          }
+          break;
+        }else if( c==':' && z[i+1]==':' ){
+          i++;
+#endif
+        }else{
+          break;
+        }
+      }
+      if( n==0 ) *tokenType = TK_ILLEGAL;
+      return i;
+    }
+#ifndef SQLITE_OMIT_BLOB_LITERAL
+    case 'x': case 'X': {
+      testcase( z[0]=='x' ); testcase( z[0]=='X' );
+      if( z[1]=='\'' ){
+        *tokenType = TK_BLOB;
+        for(i=2; sqlite3Isxdigit(z[i]); i++){}
+        if( z[i]!='\'' || i%2 ){
+          *tokenType = TK_ILLEGAL;
+          while( z[i] && z[i]!='\'' ){ i++; }
+        }
+        if( z[i] ) i++;
+        return i;
+      }
+      /* Otherwise fall through to the next case */
+    }
+#endif
+    default: {
+      if( !IdChar(*z) ){
+        break;
+      }
+      for(i=1; IdChar(z[i]); i++){}
+      *tokenType = keywordCode((char*)z, i);
+      return i;
+    }
+  }
+  *tokenType = TK_ILLEGAL;
+  return 1;
+}
+
+/*
+** Run the parser on the given SQL string.  The parser structure is
+** passed in.  An SQLITE_ status code is returned.  If an error occurs
+** then an and attempt is made to write an error message into 
+** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that
+** error message.
+*/
+SQLITE_PRIVATE int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
+  int nErr = 0;                   /* Number of errors encountered */
+  int i;                          /* Loop counter */
+  void *pEngine;                  /* The LEMON-generated LALR(1) parser */
+  int tokenType;                  /* type of the next token */
+  int lastTokenParsed = -1;       /* type of the previous token */
+  u8 enableLookaside;             /* Saved value of db->lookaside.bEnabled */
+  sqlite3 *db = pParse->db;       /* The database connection */
+  int mxSqlLen;                   /* Max length of an SQL string */
+
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( zSql==0 || pzErrMsg==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH];
+  if( db->nVdbeActive==0 ){
+    db->u1.isInterrupted = 0;
+  }
+  pParse->rc = SQLITE_OK;
+  pParse->zTail = zSql;
+  i = 0;
+  assert( pzErrMsg!=0 );
+  pEngine = sqlite3ParserAlloc(sqlite3Malloc);
+  if( pEngine==0 ){
+    db->mallocFailed = 1;
+    return SQLITE_NOMEM;
+  }
+  assert( pParse->pNewTable==0 );
+  assert( pParse->pNewTrigger==0 );
+  assert( pParse->nVar==0 );
+  assert( pParse->nzVar==0 );
+  assert( pParse->azVar==0 );
+  enableLookaside = db->lookaside.bEnabled;
+  if( db->lookaside.pStart ) db->lookaside.bEnabled = 1;
+  while( !db->mallocFailed && zSql[i]!=0 ){
+    assert( i>=0 );
+    pParse->sLastToken.z = &zSql[i];
+    pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType);
+    i += pParse->sLastToken.n;
+    if( i>mxSqlLen ){
+      pParse->rc = SQLITE_TOOBIG;
+      break;
+    }
+    switch( tokenType ){
+      case TK_SPACE: {
+        if( db->u1.isInterrupted ){
+          sqlite3ErrorMsg(pParse, "interrupt");
+          pParse->rc = SQLITE_INTERRUPT;
+          goto abort_parse;
+        }
+        break;
+      }
+      case TK_ILLEGAL: {
+        sqlite3DbFree(db, *pzErrMsg);
+        *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"",
+                        &pParse->sLastToken);
+        nErr++;
+        goto abort_parse;
+      }
+      case TK_SEMI: {
+        pParse->zTail = &zSql[i];
+        /* Fall thru into the default case */
+      }
+      default: {
+        sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
+        lastTokenParsed = tokenType;
+        if( pParse->rc!=SQLITE_OK ){
+          goto abort_parse;
+        }
+        break;
+      }
+    }
+  }
+abort_parse:
+  if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
+    if( lastTokenParsed!=TK_SEMI ){
+      sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
+      pParse->zTail = &zSql[i];
+    }
+    sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
+  }
+#ifdef YYTRACKMAXSTACKDEPTH
+  sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK,
+      sqlite3ParserStackPeak(pEngine)
+  );
+#endif /* YYDEBUG */
+  sqlite3ParserFree(pEngine, sqlite3_free);
+  db->lookaside.bEnabled = enableLookaside;
+  if( db->mallocFailed ){
+    pParse->rc = SQLITE_NOMEM;
+  }
+  if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
+    sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc));
+  }
+  assert( pzErrMsg!=0 );
+  if( pParse->zErrMsg ){
+    *pzErrMsg = pParse->zErrMsg;
+    sqlite3_log(pParse->rc, "%s", *pzErrMsg);
+    pParse->zErrMsg = 0;
+    nErr++;
+  }
+  if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
+    sqlite3VdbeDelete(pParse->pVdbe);
+    pParse->pVdbe = 0;
+  }
+#ifndef SQLITE_OMIT_SHARED_CACHE
+  if( pParse->nested==0 ){
+    sqlite3DbFree(db, pParse->aTableLock);
+    pParse->aTableLock = 0;
+    pParse->nTableLock = 0;
+  }
+#endif
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  sqlite3_free(pParse->apVtabLock);
+#endif
+
+  if( !IN_DECLARE_VTAB ){
+    /* If the pParse->declareVtab flag is set, do not delete any table 
+    ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
+    ** will take responsibility for freeing the Table structure.
+    */
+    sqlite3DeleteTable(db, pParse->pNewTable);
+  }
+
+  if( pParse->bFreeWith ) sqlite3WithDelete(db, pParse->pWith);
+  sqlite3DeleteTrigger(db, pParse->pNewTrigger);
+  for(i=pParse->nzVar-1; i>=0; i--) sqlite3DbFree(db, pParse->azVar[i]);
+  sqlite3DbFree(db, pParse->azVar);
+  while( pParse->pAinc ){
+    AutoincInfo *p = pParse->pAinc;
+    pParse->pAinc = p->pNext;
+    sqlite3DbFree(db, p);
+  }
+  while( pParse->pZombieTab ){
+    Table *p = pParse->pZombieTab;
+    pParse->pZombieTab = p->pNextZombie;
+    sqlite3DeleteTable(db, p);
+  }
+  if( nErr>0 && pParse->rc==SQLITE_OK ){
+    pParse->rc = SQLITE_ERROR;
+  }
+  return nErr;
+}
+
+/************** End of tokenize.c ********************************************/
+/************** Begin file complete.c ****************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** An tokenizer for SQL
+**
+** This file contains C code that implements the sqlite3_complete() API.
+** This code used to be part of the tokenizer.c source file.  But by
+** separating it out, the code will be automatically omitted from
+** static links that do not use it.
+*/
+#ifndef SQLITE_OMIT_COMPLETE
+
+/*
+** This is defined in tokenize.c.  We just have to import the definition.
+*/
+#ifndef SQLITE_AMALGAMATION
+#ifdef SQLITE_ASCII
+#define IdChar(C)  ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
+#endif
+#ifdef SQLITE_EBCDIC
+SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[];
+#define IdChar(C)  (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
+#endif
+#endif /* SQLITE_AMALGAMATION */
+
+
+/*
+** Token types used by the sqlite3_complete() routine.  See the header
+** comments on that procedure for additional information.
+*/
+#define tkSEMI    0
+#define tkWS      1
+#define tkOTHER   2
+#ifndef SQLITE_OMIT_TRIGGER
+#define tkEXPLAIN 3
+#define tkCREATE  4
+#define tkTEMP    5
+#define tkTRIGGER 6
+#define tkEND     7
+#endif
+
+/*
+** Return TRUE if the given SQL string ends in a semicolon.
+**
+** Special handling is require for CREATE TRIGGER statements.
+** Whenever the CREATE TRIGGER keywords are seen, the statement
+** must end with ";END;".
+**
+** This implementation uses a state machine with 8 states:
+**
+**   (0) INVALID   We have not yet seen a non-whitespace character.
+**
+**   (1) START     At the beginning or end of an SQL statement.  This routine
+**                 returns 1 if it ends in the START state and 0 if it ends
+**                 in any other state.
+**
+**   (2) NORMAL    We are in the middle of statement which ends with a single
+**                 semicolon.
+**
+**   (3) EXPLAIN   The keyword EXPLAIN has been seen at the beginning of 
+**                 a statement.
+**
+**   (4) CREATE    The keyword CREATE has been seen at the beginning of a
+**                 statement, possibly preceded by EXPLAIN and/or followed by
+**                 TEMP or TEMPORARY
+**
+**   (5) TRIGGER   We are in the middle of a trigger definition that must be
+**                 ended by a semicolon, the keyword END, and another semicolon.
+**
+**   (6) SEMI      We've seen the first semicolon in the ";END;" that occurs at
+**                 the end of a trigger definition.
+**
+**   (7) END       We've seen the ";END" of the ";END;" that occurs at the end
+**                 of a trigger definition.
+**
+** Transitions between states above are determined by tokens extracted
+** from the input.  The following tokens are significant:
+**
+**   (0) tkSEMI      A semicolon.
+**   (1) tkWS        Whitespace.
+**   (2) tkOTHER     Any other SQL token.
+**   (3) tkEXPLAIN   The "explain" keyword.
+**   (4) tkCREATE    The "create" keyword.
+**   (5) tkTEMP      The "temp" or "temporary" keyword.
+**   (6) tkTRIGGER   The "trigger" keyword.
+**   (7) tkEND       The "end" keyword.
+**
+** Whitespace never causes a state transition and is always ignored.
+** This means that a SQL string of all whitespace is invalid.
+**
+** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed
+** to recognize the end of a trigger can be omitted.  All we have to do
+** is look for a semicolon that is not part of an string or comment.
+*/
+SQLITE_API int sqlite3_complete(const char *zSql){
+  u8 state = 0;   /* Current state, using numbers defined in header comment */
+  u8 token;       /* Value of the next token */
+
+#ifndef SQLITE_OMIT_TRIGGER
+  /* A complex statement machine used to detect the end of a CREATE TRIGGER
+  ** statement.  This is the normal case.
+  */
+  static const u8 trans[8][8] = {
+                     /* Token:                                                */
+     /* State:       **  SEMI  WS  OTHER  EXPLAIN  CREATE  TEMP  TRIGGER  END */
+     /* 0 INVALID: */ {    1,  0,     2,       3,      4,    2,       2,   2, },
+     /* 1   START: */ {    1,  1,     2,       3,      4,    2,       2,   2, },
+     /* 2  NORMAL: */ {    1,  2,     2,       2,      2,    2,       2,   2, },
+     /* 3 EXPLAIN: */ {    1,  3,     3,       2,      4,    2,       2,   2, },
+     /* 4  CREATE: */ {    1,  4,     2,       2,      2,    4,       5,   2, },
+     /* 5 TRIGGER: */ {    6,  5,     5,       5,      5,    5,       5,   5, },
+     /* 6    SEMI: */ {    6,  6,     5,       5,      5,    5,       5,   7, },
+     /* 7     END: */ {    1,  7,     5,       5,      5,    5,       5,   5, },
+  };
+#else
+  /* If triggers are not supported by this compile then the statement machine
+  ** used to detect the end of a statement is much simpler
+  */
+  static const u8 trans[3][3] = {
+                     /* Token:           */
+     /* State:       **  SEMI  WS  OTHER */
+     /* 0 INVALID: */ {    1,  0,     2, },
+     /* 1   START: */ {    1,  1,     2, },
+     /* 2  NORMAL: */ {    1,  2,     2, },
+  };
+#endif /* SQLITE_OMIT_TRIGGER */
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( zSql==0 ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+
+  while( *zSql ){
+    switch( *zSql ){
+      case ';': {  /* A semicolon */
+        token = tkSEMI;
+        break;
+      }
+      case ' ':
+      case '\r':
+      case '\t':
+      case '\n':
+      case '\f': {  /* White space is ignored */
+        token = tkWS;
+        break;
+      }
+      case '/': {   /* C-style comments */
+        if( zSql[1]!='*' ){
+          token = tkOTHER;
+          break;
+        }
+        zSql += 2;
+        while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; }
+        if( zSql[0]==0 ) return 0;
+        zSql++;
+        token = tkWS;
+        break;
+      }
+      case '-': {   /* SQL-style comments from "--" to end of line */
+        if( zSql[1]!='-' ){
+          token = tkOTHER;
+          break;
+        }
+        while( *zSql && *zSql!='\n' ){ zSql++; }
+        if( *zSql==0 ) return state==1;
+        token = tkWS;
+        break;
+      }
+      case '[': {   /* Microsoft-style identifiers in [...] */
+        zSql++;
+        while( *zSql && *zSql!=']' ){ zSql++; }
+        if( *zSql==0 ) return 0;
+        token = tkOTHER;
+        break;
+      }
+      case '`':     /* Grave-accent quoted symbols used by MySQL */
+      case '"':     /* single- and double-quoted strings */
+      case '\'': {
+        int c = *zSql;
+        zSql++;
+        while( *zSql && *zSql!=c ){ zSql++; }
+        if( *zSql==0 ) return 0;
+        token = tkOTHER;
+        break;
+      }
+      default: {
+#ifdef SQLITE_EBCDIC
+        unsigned char c;
+#endif
+        if( IdChar((u8)*zSql) ){
+          /* Keywords and unquoted identifiers */
+          int nId;
+          for(nId=1; IdChar(zSql[nId]); nId++){}
+#ifdef SQLITE_OMIT_TRIGGER
+          token = tkOTHER;
+#else
+          switch( *zSql ){
+            case 'c': case 'C': {
+              if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){
+                token = tkCREATE;
+              }else{
+                token = tkOTHER;
+              }
+              break;
+            }
+            case 't': case 'T': {
+              if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){
+                token = tkTRIGGER;
+              }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){
+                token = tkTEMP;
+              }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){
+                token = tkTEMP;
+              }else{
+                token = tkOTHER;
+              }
+              break;
+            }
+            case 'e':  case 'E': {
+              if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){
+                token = tkEND;
+              }else
+#ifndef SQLITE_OMIT_EXPLAIN
+              if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){
+                token = tkEXPLAIN;
+              }else
+#endif
+              {
+                token = tkOTHER;
+              }
+              break;
+            }
+            default: {
+              token = tkOTHER;
+              break;
+            }
+          }
+#endif /* SQLITE_OMIT_TRIGGER */
+          zSql += nId-1;
+        }else{
+          /* Operators and special symbols */
+          token = tkOTHER;
+        }
+        break;
+      }
+    }
+    state = trans[state][token];
+    zSql++;
+  }
+  return state==1;
+}
+
+#ifndef SQLITE_OMIT_UTF16
+/*
+** This routine is the same as the sqlite3_complete() routine described
+** above, except that the parameter is required to be UTF-16 encoded, not
+** UTF-8.
+*/
+SQLITE_API int sqlite3_complete16(const void *zSql){
+  sqlite3_value *pVal;
+  char const *zSql8;
+  int rc = SQLITE_NOMEM;
+
+#ifndef SQLITE_OMIT_AUTOINIT
+  rc = sqlite3_initialize();
+  if( rc ) return rc;
+#endif
+  pVal = sqlite3ValueNew(0);
+  sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC);
+  zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8);
+  if( zSql8 ){
+    rc = sqlite3_complete(zSql8);
+  }else{
+    rc = SQLITE_NOMEM;
+  }
+  sqlite3ValueFree(pVal);
+  return sqlite3ApiExit(0, rc);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+#endif /* SQLITE_OMIT_COMPLETE */
+
+/************** End of complete.c ********************************************/
+/************** Begin file main.c ********************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Main file for the SQLite library.  The routines in this file
+** implement the programmer interface to the library.  Routines in
+** other files are for internal use by SQLite and should not be
+** accessed by users of the library.
+*/
+
+#ifdef SQLITE_ENABLE_FTS3
+/************** Include fts3.h in the middle of main.c ***********************/
+/************** Begin file fts3.h ********************************************/
+/*
+** 2006 Oct 10
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This header file is used by programs that want to link against the
+** FTS3 library.  All it does is declare the sqlite3Fts3Init() interface.
+*/
+
+#if 0
+extern "C" {
+#endif  /* __cplusplus */
+
+SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db);
+
+#if 0
+}  /* extern "C" */
+#endif  /* __cplusplus */
+
+/************** End of fts3.h ************************************************/
+/************** Continuing where we left off in main.c ***********************/
+#endif
+#ifdef SQLITE_ENABLE_RTREE
+/************** Include rtree.h in the middle of main.c **********************/
+/************** Begin file rtree.h *******************************************/
+/*
+** 2008 May 26
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This header file is used by programs that want to link against the
+** RTREE library.  All it does is declare the sqlite3RtreeInit() interface.
+*/
+
+#if 0
+extern "C" {
+#endif  /* __cplusplus */
+
+SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db);
+
+#if 0
+}  /* extern "C" */
+#endif  /* __cplusplus */
+
+/************** End of rtree.h ***********************************************/
+/************** Continuing where we left off in main.c ***********************/
+#endif
+#ifdef SQLITE_ENABLE_ICU
+/************** Include sqliteicu.h in the middle of main.c ******************/
+/************** Begin file sqliteicu.h ***************************************/
+/*
+** 2008 May 26
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This header file is used by programs that want to link against the
+** ICU extension.  All it does is declare the sqlite3IcuInit() interface.
+*/
+
+#if 0
+extern "C" {
+#endif  /* __cplusplus */
+
+SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db);
+
+#if 0
+}  /* extern "C" */
+#endif  /* __cplusplus */
+
+
+/************** End of sqliteicu.h *******************************************/
+/************** Continuing where we left off in main.c ***********************/
+#endif
+
+#ifndef SQLITE_AMALGAMATION
+/* IMPLEMENTATION-OF: R-46656-45156 The sqlite3_version[] string constant
+** contains the text of SQLITE_VERSION macro. 
+*/
+SQLITE_API const char sqlite3_version[] = SQLITE_VERSION;
+#endif
+
+/* IMPLEMENTATION-OF: R-53536-42575 The sqlite3_libversion() function returns
+** a pointer to the to the sqlite3_version[] string constant. 
+*/
+SQLITE_API const char *sqlite3_libversion(void){ return sqlite3_version; }
+
+/* IMPLEMENTATION-OF: R-63124-39300 The sqlite3_sourceid() function returns a
+** pointer to a string constant whose value is the same as the
+** SQLITE_SOURCE_ID C preprocessor macro. 
+*/
+SQLITE_API const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; }
+
+/* IMPLEMENTATION-OF: R-35210-63508 The sqlite3_libversion_number() function
+** returns an integer equal to SQLITE_VERSION_NUMBER.
+*/
+SQLITE_API int sqlite3_libversion_number(void){ return SQLITE_VERSION_NUMBER; }
+
+/* IMPLEMENTATION-OF: R-20790-14025 The sqlite3_threadsafe() function returns
+** zero if and only if SQLite was compiled with mutexing code omitted due to
+** the SQLITE_THREADSAFE compile-time option being set to 0.
+*/
+SQLITE_API int sqlite3_threadsafe(void){ return SQLITE_THREADSAFE; }
+
+#if !defined(SQLITE_OMIT_TRACE) && defined(SQLITE_ENABLE_IOTRACE)
+/*
+** If the following function pointer is not NULL and if
+** SQLITE_ENABLE_IOTRACE is enabled, then messages describing
+** I/O active are written using this function.  These messages
+** are intended for debugging activity only.
+*/
+/* not-private */ void (*sqlite3IoTrace)(const char*, ...) = 0;
+#endif
+
+/*
+** If the following global variable points to a string which is the
+** name of a directory, then that directory will be used to store
+** temporary files.
+**
+** See also the "PRAGMA temp_store_directory" SQL command.
+*/
+SQLITE_API char *sqlite3_temp_directory = 0;
+
+/*
+** If the following global variable points to a string which is the
+** name of a directory, then that directory will be used to store
+** all database files specified with a relative pathname.
+**
+** See also the "PRAGMA data_store_directory" SQL command.
+*/
+SQLITE_API char *sqlite3_data_directory = 0;
+
+/*
+** Initialize SQLite.  
+**
+** This routine must be called to initialize the memory allocation,
+** VFS, and mutex subsystems prior to doing any serious work with
+** SQLite.  But as long as you do not compile with SQLITE_OMIT_AUTOINIT
+** this routine will be called automatically by key routines such as
+** sqlite3_open().  
+**
+** This routine is a no-op except on its very first call for the process,
+** or for the first call after a call to sqlite3_shutdown.
+**
+** The first thread to call this routine runs the initialization to
+** completion.  If subsequent threads call this routine before the first
+** thread has finished the initialization process, then the subsequent
+** threads must block until the first thread finishes with the initialization.
+**
+** The first thread might call this routine recursively.  Recursive
+** calls to this routine should not block, of course.  Otherwise the
+** initialization process would never complete.
+**
+** Let X be the first thread to enter this routine.  Let Y be some other
+** thread.  Then while the initial invocation of this routine by X is
+** incomplete, it is required that:
+**
+**    *  Calls to this routine from Y must block until the outer-most
+**       call by X completes.
+**
+**    *  Recursive calls to this routine from thread X return immediately
+**       without blocking.
+*/
+SQLITE_API int sqlite3_initialize(void){
+  MUTEX_LOGIC( sqlite3_mutex *pMaster; )       /* The main static mutex */
+  int rc;                                      /* Result code */
+#ifdef SQLITE_EXTRA_INIT
+  int bRunExtraInit = 0;                       /* Extra initialization needed */
+#endif
+
+#ifdef SQLITE_OMIT_WSD
+  rc = sqlite3_wsd_init(4096, 24);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+#endif
+
+  /* If SQLite is already completely initialized, then this call
+  ** to sqlite3_initialize() should be a no-op.  But the initialization
+  ** must be complete.  So isInit must not be set until the very end
+  ** of this routine.
+  */
+  if( sqlite3GlobalConfig.isInit ) return SQLITE_OK;
+
+  /* Make sure the mutex subsystem is initialized.  If unable to 
+  ** initialize the mutex subsystem, return early with the error.
+  ** If the system is so sick that we are unable to allocate a mutex,
+  ** there is not much SQLite is going to be able to do.
+  **
+  ** The mutex subsystem must take care of serializing its own
+  ** initialization.
+  */
+  rc = sqlite3MutexInit();
+  if( rc ) return rc;
+
+  /* Initialize the malloc() system and the recursive pInitMutex mutex.
+  ** This operation is protected by the STATIC_MASTER mutex.  Note that
+  ** MutexAlloc() is called for a static mutex prior to initializing the
+  ** malloc subsystem - this implies that the allocation of a static
+  ** mutex must not require support from the malloc subsystem.
+  */
+  MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); )
+  sqlite3_mutex_enter(pMaster);
+  sqlite3GlobalConfig.isMutexInit = 1;
+  if( !sqlite3GlobalConfig.isMallocInit ){
+    rc = sqlite3MallocInit();
+  }
+  if( rc==SQLITE_OK ){
+    sqlite3GlobalConfig.isMallocInit = 1;
+    if( !sqlite3GlobalConfig.pInitMutex ){
+      sqlite3GlobalConfig.pInitMutex =
+           sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE);
+      if( sqlite3GlobalConfig.bCoreMutex && !sqlite3GlobalConfig.pInitMutex ){
+        rc = SQLITE_NOMEM;
+      }
+    }
+  }
+  if( rc==SQLITE_OK ){
+    sqlite3GlobalConfig.nRefInitMutex++;
+  }
+  sqlite3_mutex_leave(pMaster);
+
+  /* If rc is not SQLITE_OK at this point, then either the malloc
+  ** subsystem could not be initialized or the system failed to allocate
+  ** the pInitMutex mutex. Return an error in either case.  */
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  /* Do the rest of the initialization under the recursive mutex so
+  ** that we will be able to handle recursive calls into
+  ** sqlite3_initialize().  The recursive calls normally come through
+  ** sqlite3_os_init() when it invokes sqlite3_vfs_register(), but other
+  ** recursive calls might also be possible.
+  **
+  ** IMPLEMENTATION-OF: R-00140-37445 SQLite automatically serializes calls
+  ** to the xInit method, so the xInit method need not be threadsafe.
+  **
+  ** The following mutex is what serializes access to the appdef pcache xInit
+  ** methods.  The sqlite3_pcache_methods.xInit() all is embedded in the
+  ** call to sqlite3PcacheInitialize().
+  */
+  sqlite3_mutex_enter(sqlite3GlobalConfig.pInitMutex);
+  if( sqlite3GlobalConfig.isInit==0 && sqlite3GlobalConfig.inProgress==0 ){
+    FuncDefHash *pHash = &GLOBAL(FuncDefHash, sqlite3GlobalFunctions);
+    sqlite3GlobalConfig.inProgress = 1;
+    memset(pHash, 0, sizeof(sqlite3GlobalFunctions));
+    sqlite3RegisterGlobalFunctions();
+    if( sqlite3GlobalConfig.isPCacheInit==0 ){
+      rc = sqlite3PcacheInitialize();
+    }
+    if( rc==SQLITE_OK ){
+      sqlite3GlobalConfig.isPCacheInit = 1;
+      rc = sqlite3OsInit();
+    }
+    if( rc==SQLITE_OK ){
+      sqlite3PCacheBufferSetup( sqlite3GlobalConfig.pPage, 
+          sqlite3GlobalConfig.szPage, sqlite3GlobalConfig.nPage);
+      sqlite3GlobalConfig.isInit = 1;
+#ifdef SQLITE_EXTRA_INIT
+      bRunExtraInit = 1;
+#endif
+    }
+    sqlite3GlobalConfig.inProgress = 0;
+  }
+  sqlite3_mutex_leave(sqlite3GlobalConfig.pInitMutex);
+
+  /* Go back under the static mutex and clean up the recursive
+  ** mutex to prevent a resource leak.
+  */
+  sqlite3_mutex_enter(pMaster);
+  sqlite3GlobalConfig.nRefInitMutex--;
+  if( sqlite3GlobalConfig.nRefInitMutex<=0 ){
+    assert( sqlite3GlobalConfig.nRefInitMutex==0 );
+    sqlite3_mutex_free(sqlite3GlobalConfig.pInitMutex);
+    sqlite3GlobalConfig.pInitMutex = 0;
+  }
+  sqlite3_mutex_leave(pMaster);
+
+  /* The following is just a sanity check to make sure SQLite has
+  ** been compiled correctly.  It is important to run this code, but
+  ** we don't want to run it too often and soak up CPU cycles for no
+  ** reason.  So we run it once during initialization.
+  */
+#ifndef NDEBUG
+#ifndef SQLITE_OMIT_FLOATING_POINT
+  /* This section of code's only "output" is via assert() statements. */
+  if ( rc==SQLITE_OK ){
+    u64 x = (((u64)1)<<63)-1;
+    double y;
+    assert(sizeof(x)==8);
+    assert(sizeof(x)==sizeof(y));
+    memcpy(&y, &x, 8);
+    assert( sqlite3IsNaN(y) );
+  }
+#endif
+#endif
+
+  /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT
+  ** compile-time option.
+  */
+#ifdef SQLITE_EXTRA_INIT
+  if( bRunExtraInit ){
+    int SQLITE_EXTRA_INIT(const char*);
+    rc = SQLITE_EXTRA_INIT(0);
+  }
+#endif
+
+  return rc;
+}
+
+/*
+** Undo the effects of sqlite3_initialize().  Must not be called while
+** there are outstanding database connections or memory allocations or
+** while any part of SQLite is otherwise in use in any thread.  This
+** routine is not threadsafe.  But it is safe to invoke this routine
+** on when SQLite is already shut down.  If SQLite is already shut down
+** when this routine is invoked, then this routine is a harmless no-op.
+*/
+SQLITE_API int sqlite3_shutdown(void){
+#ifdef SQLITE_OMIT_WSD
+  int rc = sqlite3_wsd_init(4096, 24);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+#endif
+
+  if( sqlite3GlobalConfig.isInit ){
+#ifdef SQLITE_EXTRA_SHUTDOWN
+    void SQLITE_EXTRA_SHUTDOWN(void);
+    SQLITE_EXTRA_SHUTDOWN();
+#endif
+    sqlite3_os_end();
+    sqlite3_reset_auto_extension();
+    sqlite3GlobalConfig.isInit = 0;
+  }
+  if( sqlite3GlobalConfig.isPCacheInit ){
+    sqlite3PcacheShutdown();
+    sqlite3GlobalConfig.isPCacheInit = 0;
+  }
+  if( sqlite3GlobalConfig.isMallocInit ){
+    sqlite3MallocEnd();
+    sqlite3GlobalConfig.isMallocInit = 0;
+
+#ifndef SQLITE_OMIT_SHUTDOWN_DIRECTORIES
+    /* The heap subsystem has now been shutdown and these values are supposed
+    ** to be NULL or point to memory that was obtained from sqlite3_malloc(),
+    ** which would rely on that heap subsystem; therefore, make sure these
+    ** values cannot refer to heap memory that was just invalidated when the
+    ** heap subsystem was shutdown.  This is only done if the current call to
+    ** this function resulted in the heap subsystem actually being shutdown.
+    */
+    sqlite3_data_directory = 0;
+    sqlite3_temp_directory = 0;
+#endif
+  }
+  if( sqlite3GlobalConfig.isMutexInit ){
+    sqlite3MutexEnd();
+    sqlite3GlobalConfig.isMutexInit = 0;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** This API allows applications to modify the global configuration of
+** the SQLite library at run-time.
+**
+** This routine should only be called when there are no outstanding
+** database connections or memory allocations.  This routine is not
+** threadsafe.  Failure to heed these warnings can lead to unpredictable
+** behavior.
+*/
+SQLITE_API int sqlite3_config(int op, ...){
+  va_list ap;
+  int rc = SQLITE_OK;
+
+  /* sqlite3_config() shall return SQLITE_MISUSE if it is invoked while
+  ** the SQLite library is in use. */
+  if( sqlite3GlobalConfig.isInit ) return SQLITE_MISUSE_BKPT;
+
+  va_start(ap, op);
+  switch( op ){
+
+    /* Mutex configuration options are only available in a threadsafe
+    ** compile.
+    */
+#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0  /* IMP: R-54466-46756 */
+    case SQLITE_CONFIG_SINGLETHREAD: {
+      /* Disable all mutexing */
+      sqlite3GlobalConfig.bCoreMutex = 0;
+      sqlite3GlobalConfig.bFullMutex = 0;
+      break;
+    }
+#endif
+#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-20520-54086 */
+    case SQLITE_CONFIG_MULTITHREAD: {
+      /* Disable mutexing of database connections */
+      /* Enable mutexing of core data structures */
+      sqlite3GlobalConfig.bCoreMutex = 1;
+      sqlite3GlobalConfig.bFullMutex = 0;
+      break;
+    }
+#endif
+#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-59593-21810 */
+    case SQLITE_CONFIG_SERIALIZED: {
+      /* Enable all mutexing */
+      sqlite3GlobalConfig.bCoreMutex = 1;
+      sqlite3GlobalConfig.bFullMutex = 1;
+      break;
+    }
+#endif
+#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-63666-48755 */
+    case SQLITE_CONFIG_MUTEX: {
+      /* Specify an alternative mutex implementation */
+      sqlite3GlobalConfig.mutex = *va_arg(ap, sqlite3_mutex_methods*);
+      break;
+    }
+#endif
+#if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-14450-37597 */
+    case SQLITE_CONFIG_GETMUTEX: {
+      /* Retrieve the current mutex implementation */
+      *va_arg(ap, sqlite3_mutex_methods*) = sqlite3GlobalConfig.mutex;
+      break;
+    }
+#endif
+
+    case SQLITE_CONFIG_MALLOC: {
+      /* EVIDENCE-OF: R-55594-21030 The SQLITE_CONFIG_MALLOC option takes a
+      ** single argument which is a pointer to an instance of the
+      ** sqlite3_mem_methods structure. The argument specifies alternative
+      ** low-level memory allocation routines to be used in place of the memory
+      ** allocation routines built into SQLite. */
+      sqlite3GlobalConfig.m = *va_arg(ap, sqlite3_mem_methods*);
+      break;
+    }
+    case SQLITE_CONFIG_GETMALLOC: {
+      /* EVIDENCE-OF: R-51213-46414 The SQLITE_CONFIG_GETMALLOC option takes a
+      ** single argument which is a pointer to an instance of the
+      ** sqlite3_mem_methods structure. The sqlite3_mem_methods structure is
+      ** filled with the currently defined memory allocation routines. */
+      if( sqlite3GlobalConfig.m.xMalloc==0 ) sqlite3MemSetDefault();
+      *va_arg(ap, sqlite3_mem_methods*) = sqlite3GlobalConfig.m;
+      break;
+    }
+    case SQLITE_CONFIG_MEMSTATUS: {
+      /* EVIDENCE-OF: R-61275-35157 The SQLITE_CONFIG_MEMSTATUS option takes
+      ** single argument of type int, interpreted as a boolean, which enables
+      ** or disables the collection of memory allocation statistics. */
+      sqlite3GlobalConfig.bMemstat = va_arg(ap, int);
+      break;
+    }
+    case SQLITE_CONFIG_SCRATCH: {
+      /* EVIDENCE-OF: R-08404-60887 There are three arguments to
+      ** SQLITE_CONFIG_SCRATCH: A pointer an 8-byte aligned memory buffer from
+      ** which the scratch allocations will be drawn, the size of each scratch
+      ** allocation (sz), and the maximum number of scratch allocations (N). */
+      sqlite3GlobalConfig.pScratch = va_arg(ap, void*);
+      sqlite3GlobalConfig.szScratch = va_arg(ap, int);
+      sqlite3GlobalConfig.nScratch = va_arg(ap, int);
+      break;
+    }
+    case SQLITE_CONFIG_PAGECACHE: {
+      /* EVIDENCE-OF: R-31408-40510 There are three arguments to
+      ** SQLITE_CONFIG_PAGECACHE: A pointer to 8-byte aligned memory, the size
+      ** of each page buffer (sz), and the number of pages (N). */
+      sqlite3GlobalConfig.pPage = va_arg(ap, void*);
+      sqlite3GlobalConfig.szPage = va_arg(ap, int);
+      sqlite3GlobalConfig.nPage = va_arg(ap, int);
+      break;
+    }
+    case SQLITE_CONFIG_PCACHE_HDRSZ: {
+      /* EVIDENCE-OF: R-39100-27317 The SQLITE_CONFIG_PCACHE_HDRSZ option takes
+      ** a single parameter which is a pointer to an integer and writes into
+      ** that integer the number of extra bytes per page required for each page
+      ** in SQLITE_CONFIG_PAGECACHE. */
+      *va_arg(ap, int*) = 
+          sqlite3HeaderSizeBtree() +
+          sqlite3HeaderSizePcache() +
+          sqlite3HeaderSizePcache1();
+      break;
+    }
+
+    case SQLITE_CONFIG_PCACHE: {
+      /* no-op */
+      break;
+    }
+    case SQLITE_CONFIG_GETPCACHE: {
+      /* now an error */
+      rc = SQLITE_ERROR;
+      break;
+    }
+
+    case SQLITE_CONFIG_PCACHE2: {
+      /* EVIDENCE-OF: R-63325-48378 The SQLITE_CONFIG_PCACHE2 option takes a
+      ** single argument which is a pointer to an sqlite3_pcache_methods2
+      ** object. This object specifies the interface to a custom page cache
+      ** implementation. */
+      sqlite3GlobalConfig.pcache2 = *va_arg(ap, sqlite3_pcache_methods2*);
+      break;
+    }
+    case SQLITE_CONFIG_GETPCACHE2: {
+      /* EVIDENCE-OF: R-22035-46182 The SQLITE_CONFIG_GETPCACHE2 option takes a
+      ** single argument which is a pointer to an sqlite3_pcache_methods2
+      ** object. SQLite copies of the current page cache implementation into
+      ** that object. */
+      if( sqlite3GlobalConfig.pcache2.xInit==0 ){
+        sqlite3PCacheSetDefault();
+      }
+      *va_arg(ap, sqlite3_pcache_methods2*) = sqlite3GlobalConfig.pcache2;
+      break;
+    }
+
+/* EVIDENCE-OF: R-06626-12911 The SQLITE_CONFIG_HEAP option is only
+** available if SQLite is compiled with either SQLITE_ENABLE_MEMSYS3 or
+** SQLITE_ENABLE_MEMSYS5 and returns SQLITE_ERROR if invoked otherwise. */
+#if defined(SQLITE_ENABLE_MEMSYS3) || defined(SQLITE_ENABLE_MEMSYS5)
+    case SQLITE_CONFIG_HEAP: {
+      /* EVIDENCE-OF: R-19854-42126 There are three arguments to
+      ** SQLITE_CONFIG_HEAP: An 8-byte aligned pointer to the memory, the
+      ** number of bytes in the memory buffer, and the minimum allocation size. */
+      sqlite3GlobalConfig.pHeap = va_arg(ap, void*);
+      sqlite3GlobalConfig.nHeap = va_arg(ap, int);
+      sqlite3GlobalConfig.mnReq = va_arg(ap, int);
+
+      if( sqlite3GlobalConfig.mnReq<1 ){
+        sqlite3GlobalConfig.mnReq = 1;
+      }else if( sqlite3GlobalConfig.mnReq>(1<<12) ){
+        /* cap min request size at 2^12 */
+        sqlite3GlobalConfig.mnReq = (1<<12);
+      }
+
+      if( sqlite3GlobalConfig.pHeap==0 ){
+        /* EVIDENCE-OF: R-49920-60189 If the first pointer (the memory pointer)
+        ** is NULL, then SQLite reverts to using its default memory allocator
+        ** (the system malloc() implementation), undoing any prior invocation of
+        ** SQLITE_CONFIG_MALLOC.
+        **
+        ** Setting sqlite3GlobalConfig.m to all zeros will cause malloc to
+        ** revert to its default implementation when sqlite3_initialize() is run
+        */
+        memset(&sqlite3GlobalConfig.m, 0, sizeof(sqlite3GlobalConfig.m));
+      }else{
+        /* EVIDENCE-OF: R-61006-08918 If the memory pointer is not NULL then the
+        ** alternative memory allocator is engaged to handle all of SQLites
+        ** memory allocation needs. */
+#ifdef SQLITE_ENABLE_MEMSYS3
+        sqlite3GlobalConfig.m = *sqlite3MemGetMemsys3();
+#endif
+#ifdef SQLITE_ENABLE_MEMSYS5
+        sqlite3GlobalConfig.m = *sqlite3MemGetMemsys5();
+#endif
+      }
+      break;
+    }
+#endif
+
+    case SQLITE_CONFIG_LOOKASIDE: {
+      sqlite3GlobalConfig.szLookaside = va_arg(ap, int);
+      sqlite3GlobalConfig.nLookaside = va_arg(ap, int);
+      break;
+    }
+    
+    /* Record a pointer to the logger function and its first argument.
+    ** The default is NULL.  Logging is disabled if the function pointer is
+    ** NULL.
+    */
+    case SQLITE_CONFIG_LOG: {
+      /* MSVC is picky about pulling func ptrs from va lists.
+      ** http://support.microsoft.com/kb/47961
+      ** sqlite3GlobalConfig.xLog = va_arg(ap, void(*)(void*,int,const char*));
+      */
+      typedef void(*LOGFUNC_t)(void*,int,const char*);
+      sqlite3GlobalConfig.xLog = va_arg(ap, LOGFUNC_t);
+      sqlite3GlobalConfig.pLogArg = va_arg(ap, void*);
+      break;
+    }
+
+    /* EVIDENCE-OF: R-55548-33817 The compile-time setting for URI filenames
+    ** can be changed at start-time using the
+    ** sqlite3_config(SQLITE_CONFIG_URI,1) or
+    ** sqlite3_config(SQLITE_CONFIG_URI,0) configuration calls.
+    */
+    case SQLITE_CONFIG_URI: {
+      /* EVIDENCE-OF: R-25451-61125 The SQLITE_CONFIG_URI option takes a single
+      ** argument of type int. If non-zero, then URI handling is globally
+      ** enabled. If the parameter is zero, then URI handling is globally
+      ** disabled. */
+      sqlite3GlobalConfig.bOpenUri = va_arg(ap, int);
+      break;
+    }
+
+    case SQLITE_CONFIG_COVERING_INDEX_SCAN: {
+      /* EVIDENCE-OF: R-36592-02772 The SQLITE_CONFIG_COVERING_INDEX_SCAN
+      ** option takes a single integer argument which is interpreted as a
+      ** boolean in order to enable or disable the use of covering indices for
+      ** full table scans in the query optimizer. */
+      sqlite3GlobalConfig.bUseCis = va_arg(ap, int);
+      break;
+    }
+
+#ifdef SQLITE_ENABLE_SQLLOG
+    case SQLITE_CONFIG_SQLLOG: {
+      typedef void(*SQLLOGFUNC_t)(void*, sqlite3*, const char*, int);
+      sqlite3GlobalConfig.xSqllog = va_arg(ap, SQLLOGFUNC_t);
+      sqlite3GlobalConfig.pSqllogArg = va_arg(ap, void *);
+      break;
+    }
+#endif
+
+    case SQLITE_CONFIG_MMAP_SIZE: {
+      /* EVIDENCE-OF: R-58063-38258 SQLITE_CONFIG_MMAP_SIZE takes two 64-bit
+      ** integer (sqlite3_int64) values that are the default mmap size limit
+      ** (the default setting for PRAGMA mmap_size) and the maximum allowed
+      ** mmap size limit. */
+      sqlite3_int64 szMmap = va_arg(ap, sqlite3_int64);
+      sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64);
+      /* EVIDENCE-OF: R-53367-43190 If either argument to this option is
+      ** negative, then that argument is changed to its compile-time default.
+      **
+      ** EVIDENCE-OF: R-34993-45031 The maximum allowed mmap size will be
+      ** silently truncated if necessary so that it does not exceed the
+      ** compile-time maximum mmap size set by the SQLITE_MAX_MMAP_SIZE
+      ** compile-time option.
+      */
+      if( mxMmap<0 || mxMmap>SQLITE_MAX_MMAP_SIZE ) mxMmap = SQLITE_MAX_MMAP_SIZE;
+      if( szMmap<0 ) szMmap = SQLITE_DEFAULT_MMAP_SIZE;
+      if( szMmap>mxMmap) szMmap = mxMmap;
+      sqlite3GlobalConfig.mxMmap = mxMmap;
+      sqlite3GlobalConfig.szMmap = szMmap;
+      break;
+    }
+
+#if SQLITE_OS_WIN && defined(SQLITE_WIN32_MALLOC) /* IMP: R-04780-55815 */
+    case SQLITE_CONFIG_WIN32_HEAPSIZE: {
+      /* EVIDENCE-OF: R-34926-03360 SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit
+      ** unsigned integer value that specifies the maximum size of the created
+      ** heap. */
+      sqlite3GlobalConfig.nHeap = va_arg(ap, int);
+      break;
+    }
+#endif
+
+    case SQLITE_CONFIG_PMASZ: {
+      sqlite3GlobalConfig.szPma = va_arg(ap, unsigned int);
+      break;
+    }
+
+    default: {
+      rc = SQLITE_ERROR;
+      break;
+    }
+  }
+  va_end(ap);
+  return rc;
+}
+
+/*
+** Set up the lookaside buffers for a database connection.
+** Return SQLITE_OK on success.  
+** If lookaside is already active, return SQLITE_BUSY.
+**
+** The sz parameter is the number of bytes in each lookaside slot.
+** The cnt parameter is the number of slots.  If pStart is NULL the
+** space for the lookaside memory is obtained from sqlite3_malloc().
+** If pStart is not NULL then it is sz*cnt bytes of memory to use for
+** the lookaside memory.
+*/
+static int setupLookaside(sqlite3 *db, void *pBuf, int sz, int cnt){
+  void *pStart;
+  if( db->lookaside.nOut ){
+    return SQLITE_BUSY;
+  }
+  /* Free any existing lookaside buffer for this handle before
+  ** allocating a new one so we don't have to have space for 
+  ** both at the same time.
+  */
+  if( db->lookaside.bMalloced ){
+    sqlite3_free(db->lookaside.pStart);
+  }
+  /* The size of a lookaside slot after ROUNDDOWN8 needs to be larger
+  ** than a pointer to be useful.
+  */
+  sz = ROUNDDOWN8(sz);  /* IMP: R-33038-09382 */
+  if( sz<=(int)sizeof(LookasideSlot*) ) sz = 0;
+  if( cnt<0 ) cnt = 0;
+  if( sz==0 || cnt==0 ){
+    sz = 0;
+    pStart = 0;
+  }else if( pBuf==0 ){
+    sqlite3BeginBenignMalloc();
+    pStart = sqlite3Malloc( sz*cnt );  /* IMP: R-61949-35727 */
+    sqlite3EndBenignMalloc();
+    if( pStart ) cnt = sqlite3MallocSize(pStart)/sz;
+  }else{
+    pStart = pBuf;
+  }
+  db->lookaside.pStart = pStart;
+  db->lookaside.pFree = 0;
+  db->lookaside.sz = (u16)sz;
+  if( pStart ){
+    int i;
+    LookasideSlot *p;
+    assert( sz > (int)sizeof(LookasideSlot*) );
+    p = (LookasideSlot*)pStart;
+    for(i=cnt-1; i>=0; i--){
+      p->pNext = db->lookaside.pFree;
+      db->lookaside.pFree = p;
+      p = (LookasideSlot*)&((u8*)p)[sz];
+    }
+    db->lookaside.pEnd = p;
+    db->lookaside.bEnabled = 1;
+    db->lookaside.bMalloced = pBuf==0 ?1:0;
+  }else{
+    db->lookaside.pStart = db;
+    db->lookaside.pEnd = db;
+    db->lookaside.bEnabled = 0;
+    db->lookaside.bMalloced = 0;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Return the mutex associated with a database connection.
+*/
+SQLITE_API sqlite3_mutex *sqlite3_db_mutex(sqlite3 *db){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  return db->mutex;
+}
+
+/*
+** Free up as much memory as we can from the given database
+** connection.
+*/
+SQLITE_API int sqlite3_db_release_memory(sqlite3 *db){
+  int i;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  sqlite3BtreeEnterAll(db);
+  for(i=0; i<db->nDb; i++){
+    Btree *pBt = db->aDb[i].pBt;
+    if( pBt ){
+      Pager *pPager = sqlite3BtreePager(pBt);
+      sqlite3PagerShrink(pPager);
+    }
+  }
+  sqlite3BtreeLeaveAll(db);
+  sqlite3_mutex_leave(db->mutex);
+  return SQLITE_OK;
+}
+
+/*
+** Configuration settings for an individual database connection
+*/
+SQLITE_API int sqlite3_db_config(sqlite3 *db, int op, ...){
+  va_list ap;
+  int rc;
+  va_start(ap, op);
+  switch( op ){
+    case SQLITE_DBCONFIG_LOOKASIDE: {
+      void *pBuf = va_arg(ap, void*); /* IMP: R-26835-10964 */
+      int sz = va_arg(ap, int);       /* IMP: R-47871-25994 */
+      int cnt = va_arg(ap, int);      /* IMP: R-04460-53386 */
+      rc = setupLookaside(db, pBuf, sz, cnt);
+      break;
+    }
+    default: {
+      static const struct {
+        int op;      /* The opcode */
+        u32 mask;    /* Mask of the bit in sqlite3.flags to set/clear */
+      } aFlagOp[] = {
+        { SQLITE_DBCONFIG_ENABLE_FKEY,    SQLITE_ForeignKeys    },
+        { SQLITE_DBCONFIG_ENABLE_TRIGGER, SQLITE_EnableTrigger  },
+      };
+      unsigned int i;
+      rc = SQLITE_ERROR; /* IMP: R-42790-23372 */
+      for(i=0; i<ArraySize(aFlagOp); i++){
+        if( aFlagOp[i].op==op ){
+          int onoff = va_arg(ap, int);
+          int *pRes = va_arg(ap, int*);
+          int oldFlags = db->flags;
+          if( onoff>0 ){
+            db->flags |= aFlagOp[i].mask;
+          }else if( onoff==0 ){
+            db->flags &= ~aFlagOp[i].mask;
+          }
+          if( oldFlags!=db->flags ){
+            sqlite3ExpirePreparedStatements(db);
+          }
+          if( pRes ){
+            *pRes = (db->flags & aFlagOp[i].mask)!=0;
+          }
+          rc = SQLITE_OK;
+          break;
+        }
+      }
+      break;
+    }
+  }
+  va_end(ap);
+  return rc;
+}
+
+
+/*
+** Return true if the buffer z[0..n-1] contains all spaces.
+*/
+static int allSpaces(const char *z, int n){
+  while( n>0 && z[n-1]==' ' ){ n--; }
+  return n==0;
+}
+
+/*
+** This is the default collating function named "BINARY" which is always
+** available.
+**
+** If the padFlag argument is not NULL then space padding at the end
+** of strings is ignored.  This implements the RTRIM collation.
+*/
+static int binCollFunc(
+  void *padFlag,
+  int nKey1, const void *pKey1,
+  int nKey2, const void *pKey2
+){
+  int rc, n;
+  n = nKey1<nKey2 ? nKey1 : nKey2;
+  /* EVIDENCE-OF: R-65033-28449 The built-in BINARY collation compares
+  ** strings byte by byte using the memcmp() function from the standard C
+  ** library. */
+  rc = memcmp(pKey1, pKey2, n);
+  if( rc==0 ){
+    if( padFlag
+     && allSpaces(((char*)pKey1)+n, nKey1-n)
+     && allSpaces(((char*)pKey2)+n, nKey2-n)
+    ){
+      /* EVIDENCE-OF: R-31624-24737 RTRIM is like BINARY except that extra
+      ** spaces at the end of either string do not change the result. In other
+      ** words, strings will compare equal to one another as long as they
+      ** differ only in the number of spaces at the end.
+      */
+    }else{
+      rc = nKey1 - nKey2;
+    }
+  }
+  return rc;
+}
+
+/*
+** Another built-in collating sequence: NOCASE. 
+**
+** This collating sequence is intended to be used for "case independent
+** comparison". SQLite's knowledge of upper and lower case equivalents
+** extends only to the 26 characters used in the English language.
+**
+** At the moment there is only a UTF-8 implementation.
+*/
+static int nocaseCollatingFunc(
+  void *NotUsed,
+  int nKey1, const void *pKey1,
+  int nKey2, const void *pKey2
+){
+  int r = sqlite3StrNICmp(
+      (const char *)pKey1, (const char *)pKey2, (nKey1<nKey2)?nKey1:nKey2);
+  UNUSED_PARAMETER(NotUsed);
+  if( 0==r ){
+    r = nKey1-nKey2;
+  }
+  return r;
+}
+
+/*
+** Return the ROWID of the most recent insert
+*/
+SQLITE_API sqlite_int64 sqlite3_last_insert_rowid(sqlite3 *db){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  return db->lastRowid;
+}
+
+/*
+** Return the number of changes in the most recent call to sqlite3_exec().
+*/
+SQLITE_API int sqlite3_changes(sqlite3 *db){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  return db->nChange;
+}
+
+/*
+** Return the number of changes since the database handle was opened.
+*/
+SQLITE_API int sqlite3_total_changes(sqlite3 *db){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  return db->nTotalChange;
+}
+
+/*
+** Close all open savepoints. This function only manipulates fields of the
+** database handle object, it does not close any savepoints that may be open
+** at the b-tree/pager level.
+*/
+SQLITE_PRIVATE void sqlite3CloseSavepoints(sqlite3 *db){
+  while( db->pSavepoint ){
+    Savepoint *pTmp = db->pSavepoint;
+    db->pSavepoint = pTmp->pNext;
+    sqlite3DbFree(db, pTmp);
+  }
+  db->nSavepoint = 0;
+  db->nStatement = 0;
+  db->isTransactionSavepoint = 0;
+}
+
+/*
+** Invoke the destructor function associated with FuncDef p, if any. Except,
+** if this is not the last copy of the function, do not invoke it. Multiple
+** copies of a single function are created when create_function() is called
+** with SQLITE_ANY as the encoding.
+*/
+static void functionDestroy(sqlite3 *db, FuncDef *p){
+  FuncDestructor *pDestructor = p->pDestructor;
+  if( pDestructor ){
+    pDestructor->nRef--;
+    if( pDestructor->nRef==0 ){
+      pDestructor->xDestroy(pDestructor->pUserData);
+      sqlite3DbFree(db, pDestructor);
+    }
+  }
+}
+
+/*
+** Disconnect all sqlite3_vtab objects that belong to database connection
+** db. This is called when db is being closed.
+*/
+static void disconnectAllVtab(sqlite3 *db){
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  int i;
+  sqlite3BtreeEnterAll(db);
+  for(i=0; i<db->nDb; i++){
+    Schema *pSchema = db->aDb[i].pSchema;
+    if( db->aDb[i].pSchema ){
+      HashElem *p;
+      for(p=sqliteHashFirst(&pSchema->tblHash); p; p=sqliteHashNext(p)){
+        Table *pTab = (Table *)sqliteHashData(p);
+        if( IsVirtual(pTab) ) sqlite3VtabDisconnect(db, pTab);
+      }
+    }
+  }
+  sqlite3VtabUnlockList(db);
+  sqlite3BtreeLeaveAll(db);
+#else
+  UNUSED_PARAMETER(db);
+#endif
+}
+
+/*
+** Return TRUE if database connection db has unfinalized prepared
+** statements or unfinished sqlite3_backup objects.  
+*/
+static int connectionIsBusy(sqlite3 *db){
+  int j;
+  assert( sqlite3_mutex_held(db->mutex) );
+  if( db->pVdbe ) return 1;
+  for(j=0; j<db->nDb; j++){
+    Btree *pBt = db->aDb[j].pBt;
+    if( pBt && sqlite3BtreeIsInBackup(pBt) ) return 1;
+  }
+  return 0;
+}
+
+/*
+** Close an existing SQLite database
+*/
+static int sqlite3Close(sqlite3 *db, int forceZombie){
+  if( !db ){
+    /* EVIDENCE-OF: R-63257-11740 Calling sqlite3_close() or
+    ** sqlite3_close_v2() with a NULL pointer argument is a harmless no-op. */
+    return SQLITE_OK;
+  }
+  if( !sqlite3SafetyCheckSickOrOk(db) ){
+    return SQLITE_MISUSE_BKPT;
+  }
+  sqlite3_mutex_enter(db->mutex);
+
+  /* Force xDisconnect calls on all virtual tables */
+  disconnectAllVtab(db);
+
+  /* If a transaction is open, the disconnectAllVtab() call above
+  ** will not have called the xDisconnect() method on any virtual
+  ** tables in the db->aVTrans[] array. The following sqlite3VtabRollback()
+  ** call will do so. We need to do this before the check for active
+  ** SQL statements below, as the v-table implementation may be storing
+  ** some prepared statements internally.
+  */
+  sqlite3VtabRollback(db);
+
+  /* Legacy behavior (sqlite3_close() behavior) is to return
+  ** SQLITE_BUSY if the connection can not be closed immediately.
+  */
+  if( !forceZombie && connectionIsBusy(db) ){
+    sqlite3ErrorWithMsg(db, SQLITE_BUSY, "unable to close due to unfinalized "
+       "statements or unfinished backups");
+    sqlite3_mutex_leave(db->mutex);
+    return SQLITE_BUSY;
+  }
+
+#ifdef SQLITE_ENABLE_SQLLOG
+  if( sqlite3GlobalConfig.xSqllog ){
+    /* Closing the handle. Fourth parameter is passed the value 2. */
+    sqlite3GlobalConfig.xSqllog(sqlite3GlobalConfig.pSqllogArg, db, 0, 2);
+  }
+#endif
+
+  /* Convert the connection into a zombie and then close it.
+  */
+  db->magic = SQLITE_MAGIC_ZOMBIE;
+  sqlite3LeaveMutexAndCloseZombie(db);
+  return SQLITE_OK;
+}
+
+/*
+** Two variations on the public interface for closing a database
+** connection. The sqlite3_close() version returns SQLITE_BUSY and
+** leaves the connection option if there are unfinalized prepared
+** statements or unfinished sqlite3_backups.  The sqlite3_close_v2()
+** version forces the connection to become a zombie if there are
+** unclosed resources, and arranges for deallocation when the last
+** prepare statement or sqlite3_backup closes.
+*/
+SQLITE_API int sqlite3_close(sqlite3 *db){ return sqlite3Close(db,0); }
+SQLITE_API int sqlite3_close_v2(sqlite3 *db){ return sqlite3Close(db,1); }
+
+
+/*
+** Close the mutex on database connection db.
+**
+** Furthermore, if database connection db is a zombie (meaning that there
+** has been a prior call to sqlite3_close(db) or sqlite3_close_v2(db)) and
+** every sqlite3_stmt has now been finalized and every sqlite3_backup has
+** finished, then free all resources.
+*/
+SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3 *db){
+  HashElem *i;                    /* Hash table iterator */
+  int j;
+
+  /* If there are outstanding sqlite3_stmt or sqlite3_backup objects
+  ** or if the connection has not yet been closed by sqlite3_close_v2(),
+  ** then just leave the mutex and return.
+  */
+  if( db->magic!=SQLITE_MAGIC_ZOMBIE || connectionIsBusy(db) ){
+    sqlite3_mutex_leave(db->mutex);
+    return;
+  }
+
+  /* If we reach this point, it means that the database connection has
+  ** closed all sqlite3_stmt and sqlite3_backup objects and has been
+  ** passed to sqlite3_close (meaning that it is a zombie).  Therefore,
+  ** go ahead and free all resources.
+  */
+
+  /* If a transaction is open, roll it back. This also ensures that if
+  ** any database schemas have been modified by an uncommitted transaction
+  ** they are reset. And that the required b-tree mutex is held to make
+  ** the pager rollback and schema reset an atomic operation. */
+  sqlite3RollbackAll(db, SQLITE_OK);
+
+  /* Free any outstanding Savepoint structures. */
+  sqlite3CloseSavepoints(db);
+
+  /* Close all database connections */
+  for(j=0; j<db->nDb; j++){
+    struct Db *pDb = &db->aDb[j];
+    if( pDb->pBt ){
+      sqlite3BtreeClose(pDb->pBt);
+      pDb->pBt = 0;
+      if( j!=1 ){
+        pDb->pSchema = 0;
+      }
+    }
+  }
+  /* Clear the TEMP schema separately and last */
+  if( db->aDb[1].pSchema ){
+    sqlite3SchemaClear(db->aDb[1].pSchema);
+  }
+  sqlite3VtabUnlockList(db);
+
+  /* Free up the array of auxiliary databases */
+  sqlite3CollapseDatabaseArray(db);
+  assert( db->nDb<=2 );
+  assert( db->aDb==db->aDbStatic );
+
+  /* Tell the code in notify.c that the connection no longer holds any
+  ** locks and does not require any further unlock-notify callbacks.
+  */
+  sqlite3ConnectionClosed(db);
+
+  for(j=0; j<ArraySize(db->aFunc.a); j++){
+    FuncDef *pNext, *pHash, *p;
+    for(p=db->aFunc.a[j]; p; p=pHash){
+      pHash = p->pHash;
+      while( p ){
+        functionDestroy(db, p);
+        pNext = p->pNext;
+        sqlite3DbFree(db, p);
+        p = pNext;
+      }
+    }
+  }
+  for(i=sqliteHashFirst(&db->aCollSeq); i; i=sqliteHashNext(i)){
+    CollSeq *pColl = (CollSeq *)sqliteHashData(i);
+    /* Invoke any destructors registered for collation sequence user data. */
+    for(j=0; j<3; j++){
+      if( pColl[j].xDel ){
+        pColl[j].xDel(pColl[j].pUser);
+      }
+    }
+    sqlite3DbFree(db, pColl);
+  }
+  sqlite3HashClear(&db->aCollSeq);
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){
+    Module *pMod = (Module *)sqliteHashData(i);
+    if( pMod->xDestroy ){
+      pMod->xDestroy(pMod->pAux);
+    }
+    sqlite3DbFree(db, pMod);
+  }
+  sqlite3HashClear(&db->aModule);
+#endif
+
+  sqlite3Error(db, SQLITE_OK); /* Deallocates any cached error strings. */
+  sqlite3ValueFree(db->pErr);
+  sqlite3CloseExtensions(db);
+#if SQLITE_USER_AUTHENTICATION
+  sqlite3_free(db->auth.zAuthUser);
+  sqlite3_free(db->auth.zAuthPW);
+#endif
+
+  db->magic = SQLITE_MAGIC_ERROR;
+
+  /* The temp-database schema is allocated differently from the other schema
+  ** objects (using sqliteMalloc() directly, instead of sqlite3BtreeSchema()).
+  ** So it needs to be freed here. Todo: Why not roll the temp schema into
+  ** the same sqliteMalloc() as the one that allocates the database 
+  ** structure?
+  */
+  sqlite3DbFree(db, db->aDb[1].pSchema);
+  sqlite3_mutex_leave(db->mutex);
+  db->magic = SQLITE_MAGIC_CLOSED;
+  sqlite3_mutex_free(db->mutex);
+  assert( db->lookaside.nOut==0 );  /* Fails on a lookaside memory leak */
+  if( db->lookaside.bMalloced ){
+    sqlite3_free(db->lookaside.pStart);
+  }
+  sqlite3_free(db);
+}
+
+/*
+** Rollback all database files.  If tripCode is not SQLITE_OK, then
+** any write cursors are invalidated ("tripped" - as in "tripping a circuit
+** breaker") and made to return tripCode if there are any further
+** attempts to use that cursor.  Read cursors remain open and valid
+** but are "saved" in case the table pages are moved around.
+*/
+SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3 *db, int tripCode){
+  int i;
+  int inTrans = 0;
+  int schemaChange;
+  assert( sqlite3_mutex_held(db->mutex) );
+  sqlite3BeginBenignMalloc();
+
+  /* Obtain all b-tree mutexes before making any calls to BtreeRollback(). 
+  ** This is important in case the transaction being rolled back has
+  ** modified the database schema. If the b-tree mutexes are not taken
+  ** here, then another shared-cache connection might sneak in between
+  ** the database rollback and schema reset, which can cause false
+  ** corruption reports in some cases.  */
+  sqlite3BtreeEnterAll(db);
+  schemaChange = (db->flags & SQLITE_InternChanges)!=0 && db->init.busy==0;
+
+  for(i=0; i<db->nDb; i++){
+    Btree *p = db->aDb[i].pBt;
+    if( p ){
+      if( sqlite3BtreeIsInTrans(p) ){
+        inTrans = 1;
+      }
+      sqlite3BtreeRollback(p, tripCode, !schemaChange);
+    }
+  }
+  sqlite3VtabRollback(db);
+  sqlite3EndBenignMalloc();
+
+  if( (db->flags&SQLITE_InternChanges)!=0 && db->init.busy==0 ){
+    sqlite3ExpirePreparedStatements(db);
+    sqlite3ResetAllSchemasOfConnection(db);
+  }
+  sqlite3BtreeLeaveAll(db);
+
+  /* Any deferred constraint violations have now been resolved. */
+  db->nDeferredCons = 0;
+  db->nDeferredImmCons = 0;
+  db->flags &= ~SQLITE_DeferFKs;
+
+  /* If one has been configured, invoke the rollback-hook callback */
+  if( db->xRollbackCallback && (inTrans || !db->autoCommit) ){
+    db->xRollbackCallback(db->pRollbackArg);
+  }
+}
+
+/*
+** Return a static string containing the name corresponding to the error code
+** specified in the argument.
+*/
+#if (defined(SQLITE_DEBUG) && SQLITE_OS_WIN) || defined(SQLITE_TEST)
+SQLITE_PRIVATE const char *sqlite3ErrName(int rc){
+  const char *zName = 0;
+  int i, origRc = rc;
+  for(i=0; i<2 && zName==0; i++, rc &= 0xff){
+    switch( rc ){
+      case SQLITE_OK:                 zName = "SQLITE_OK";                break;
+      case SQLITE_ERROR:              zName = "SQLITE_ERROR";             break;
+      case SQLITE_INTERNAL:           zName = "SQLITE_INTERNAL";          break;
+      case SQLITE_PERM:               zName = "SQLITE_PERM";              break;
+      case SQLITE_ABORT:              zName = "SQLITE_ABORT";             break;
+      case SQLITE_ABORT_ROLLBACK:     zName = "SQLITE_ABORT_ROLLBACK";    break;
+      case SQLITE_BUSY:               zName = "SQLITE_BUSY";              break;
+      case SQLITE_BUSY_RECOVERY:      zName = "SQLITE_BUSY_RECOVERY";     break;
+      case SQLITE_BUSY_SNAPSHOT:      zName = "SQLITE_BUSY_SNAPSHOT";     break;
+      case SQLITE_LOCKED:             zName = "SQLITE_LOCKED";            break;
+      case SQLITE_LOCKED_SHAREDCACHE: zName = "SQLITE_LOCKED_SHAREDCACHE";break;
+      case SQLITE_NOMEM:              zName = "SQLITE_NOMEM";             break;
+      case SQLITE_READONLY:           zName = "SQLITE_READONLY";          break;
+      case SQLITE_READONLY_RECOVERY:  zName = "SQLITE_READONLY_RECOVERY"; break;
+      case SQLITE_READONLY_CANTLOCK:  zName = "SQLITE_READONLY_CANTLOCK"; break;
+      case SQLITE_READONLY_ROLLBACK:  zName = "SQLITE_READONLY_ROLLBACK"; break;
+      case SQLITE_READONLY_DBMOVED:   zName = "SQLITE_READONLY_DBMOVED";  break;
+      case SQLITE_INTERRUPT:          zName = "SQLITE_INTERRUPT";         break;
+      case SQLITE_IOERR:              zName = "SQLITE_IOERR";             break;
+      case SQLITE_IOERR_READ:         zName = "SQLITE_IOERR_READ";        break;
+      case SQLITE_IOERR_SHORT_READ:   zName = "SQLITE_IOERR_SHORT_READ";  break;
+      case SQLITE_IOERR_WRITE:        zName = "SQLITE_IOERR_WRITE";       break;
+      case SQLITE_IOERR_FSYNC:        zName = "SQLITE_IOERR_FSYNC";       break;
+      case SQLITE_IOERR_DIR_FSYNC:    zName = "SQLITE_IOERR_DIR_FSYNC";   break;
+      case SQLITE_IOERR_TRUNCATE:     zName = "SQLITE_IOERR_TRUNCATE";    break;
+      case SQLITE_IOERR_FSTAT:        zName = "SQLITE_IOERR_FSTAT";       break;
+      case SQLITE_IOERR_UNLOCK:       zName = "SQLITE_IOERR_UNLOCK";      break;
+      case SQLITE_IOERR_RDLOCK:       zName = "SQLITE_IOERR_RDLOCK";      break;
+      case SQLITE_IOERR_DELETE:       zName = "SQLITE_IOERR_DELETE";      break;
+      case SQLITE_IOERR_NOMEM:        zName = "SQLITE_IOERR_NOMEM";       break;
+      case SQLITE_IOERR_ACCESS:       zName = "SQLITE_IOERR_ACCESS";      break;
+      case SQLITE_IOERR_CHECKRESERVEDLOCK:
+                                zName = "SQLITE_IOERR_CHECKRESERVEDLOCK"; break;
+      case SQLITE_IOERR_LOCK:         zName = "SQLITE_IOERR_LOCK";        break;
+      case SQLITE_IOERR_CLOSE:        zName = "SQLITE_IOERR_CLOSE";       break;
+      case SQLITE_IOERR_DIR_CLOSE:    zName = "SQLITE_IOERR_DIR_CLOSE";   break;
+      case SQLITE_IOERR_SHMOPEN:      zName = "SQLITE_IOERR_SHMOPEN";     break;
+      case SQLITE_IOERR_SHMSIZE:      zName = "SQLITE_IOERR_SHMSIZE";     break;
+      case SQLITE_IOERR_SHMLOCK:      zName = "SQLITE_IOERR_SHMLOCK";     break;
+      case SQLITE_IOERR_SHMMAP:       zName = "SQLITE_IOERR_SHMMAP";      break;
+      case SQLITE_IOERR_SEEK:         zName = "SQLITE_IOERR_SEEK";        break;
+      case SQLITE_IOERR_DELETE_NOENT: zName = "SQLITE_IOERR_DELETE_NOENT";break;
+      case SQLITE_IOERR_MMAP:         zName = "SQLITE_IOERR_MMAP";        break;
+      case SQLITE_IOERR_GETTEMPPATH:  zName = "SQLITE_IOERR_GETTEMPPATH"; break;
+      case SQLITE_IOERR_CONVPATH:     zName = "SQLITE_IOERR_CONVPATH";    break;
+      case SQLITE_CORRUPT:            zName = "SQLITE_CORRUPT";           break;
+      case SQLITE_CORRUPT_VTAB:       zName = "SQLITE_CORRUPT_VTAB";      break;
+      case SQLITE_NOTFOUND:           zName = "SQLITE_NOTFOUND";          break;
+      case SQLITE_FULL:               zName = "SQLITE_FULL";              break;
+      case SQLITE_CANTOPEN:           zName = "SQLITE_CANTOPEN";          break;
+      case SQLITE_CANTOPEN_NOTEMPDIR: zName = "SQLITE_CANTOPEN_NOTEMPDIR";break;
+      case SQLITE_CANTOPEN_ISDIR:     zName = "SQLITE_CANTOPEN_ISDIR";    break;
+      case SQLITE_CANTOPEN_FULLPATH:  zName = "SQLITE_CANTOPEN_FULLPATH"; break;
+      case SQLITE_CANTOPEN_CONVPATH:  zName = "SQLITE_CANTOPEN_CONVPATH"; break;
+      case SQLITE_PROTOCOL:           zName = "SQLITE_PROTOCOL";          break;
+      case SQLITE_EMPTY:              zName = "SQLITE_EMPTY";             break;
+      case SQLITE_SCHEMA:             zName = "SQLITE_SCHEMA";            break;
+      case SQLITE_TOOBIG:             zName = "SQLITE_TOOBIG";            break;
+      case SQLITE_CONSTRAINT:         zName = "SQLITE_CONSTRAINT";        break;
+      case SQLITE_CONSTRAINT_UNIQUE:  zName = "SQLITE_CONSTRAINT_UNIQUE"; break;
+      case SQLITE_CONSTRAINT_TRIGGER: zName = "SQLITE_CONSTRAINT_TRIGGER";break;
+      case SQLITE_CONSTRAINT_FOREIGNKEY:
+                                zName = "SQLITE_CONSTRAINT_FOREIGNKEY";   break;
+      case SQLITE_CONSTRAINT_CHECK:   zName = "SQLITE_CONSTRAINT_CHECK";  break;
+      case SQLITE_CONSTRAINT_PRIMARYKEY:
+                                zName = "SQLITE_CONSTRAINT_PRIMARYKEY";   break;
+      case SQLITE_CONSTRAINT_NOTNULL: zName = "SQLITE_CONSTRAINT_NOTNULL";break;
+      case SQLITE_CONSTRAINT_COMMITHOOK:
+                                zName = "SQLITE_CONSTRAINT_COMMITHOOK";   break;
+      case SQLITE_CONSTRAINT_VTAB:    zName = "SQLITE_CONSTRAINT_VTAB";   break;
+      case SQLITE_CONSTRAINT_FUNCTION:
+                                zName = "SQLITE_CONSTRAINT_FUNCTION";     break;
+      case SQLITE_CONSTRAINT_ROWID:   zName = "SQLITE_CONSTRAINT_ROWID";  break;
+      case SQLITE_MISMATCH:           zName = "SQLITE_MISMATCH";          break;
+      case SQLITE_MISUSE:             zName = "SQLITE_MISUSE";            break;
+      case SQLITE_NOLFS:              zName = "SQLITE_NOLFS";             break;
+      case SQLITE_AUTH:               zName = "SQLITE_AUTH";              break;
+      case SQLITE_FORMAT:             zName = "SQLITE_FORMAT";            break;
+      case SQLITE_RANGE:              zName = "SQLITE_RANGE";             break;
+      case SQLITE_NOTADB:             zName = "SQLITE_NOTADB";            break;
+      case SQLITE_ROW:                zName = "SQLITE_ROW";               break;
+      case SQLITE_NOTICE:             zName = "SQLITE_NOTICE";            break;
+      case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break;
+      case SQLITE_NOTICE_RECOVER_ROLLBACK:
+                                zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break;
+      case SQLITE_WARNING:            zName = "SQLITE_WARNING";           break;
+      case SQLITE_WARNING_AUTOINDEX:  zName = "SQLITE_WARNING_AUTOINDEX"; break;
+      case SQLITE_DONE:               zName = "SQLITE_DONE";              break;
+    }
+  }
+  if( zName==0 ){
+    static char zBuf[50];
+    sqlite3_snprintf(sizeof(zBuf), zBuf, "SQLITE_UNKNOWN(%d)", origRc);
+    zName = zBuf;
+  }
+  return zName;
+}
+#endif
+
+/*
+** Return a static string that describes the kind of error specified in the
+** argument.
+*/
+SQLITE_PRIVATE const char *sqlite3ErrStr(int rc){
+  static const char* const aMsg[] = {
+    /* SQLITE_OK          */ "not an error",
+    /* SQLITE_ERROR       */ "SQL logic error or missing database",
+    /* SQLITE_INTERNAL    */ 0,
+    /* SQLITE_PERM        */ "access permission denied",
+    /* SQLITE_ABORT       */ "callback requested query abort",
+    /* SQLITE_BUSY        */ "database is locked",
+    /* SQLITE_LOCKED      */ "database table is locked",
+    /* SQLITE_NOMEM       */ "out of memory",
+    /* SQLITE_READONLY    */ "attempt to write a readonly database",
+    /* SQLITE_INTERRUPT   */ "interrupted",
+    /* SQLITE_IOERR       */ "disk I/O error",
+    /* SQLITE_CORRUPT     */ "database disk image is malformed",
+    /* SQLITE_NOTFOUND    */ "unknown operation",
+    /* SQLITE_FULL        */ "database or disk is full",
+    /* SQLITE_CANTOPEN    */ "unable to open database file",
+    /* SQLITE_PROTOCOL    */ "locking protocol",
+    /* SQLITE_EMPTY       */ "table contains no data",
+    /* SQLITE_SCHEMA      */ "database schema has changed",
+    /* SQLITE_TOOBIG      */ "string or blob too big",
+    /* SQLITE_CONSTRAINT  */ "constraint failed",
+    /* SQLITE_MISMATCH    */ "datatype mismatch",
+    /* SQLITE_MISUSE      */ "library routine called out of sequence",
+    /* SQLITE_NOLFS       */ "large file support is disabled",
+    /* SQLITE_AUTH        */ "authorization denied",
+    /* SQLITE_FORMAT      */ "auxiliary database format error",
+    /* SQLITE_RANGE       */ "bind or column index out of range",
+    /* SQLITE_NOTADB      */ "file is encrypted or is not a database",
+  };
+  const char *zErr = "unknown error";
+  switch( rc ){
+    case SQLITE_ABORT_ROLLBACK: {
+      zErr = "abort due to ROLLBACK";
+      break;
+    }
+    default: {
+      rc &= 0xff;
+      if( ALWAYS(rc>=0) && rc<ArraySize(aMsg) && aMsg[rc]!=0 ){
+        zErr = aMsg[rc];
+      }
+      break;
+    }
+  }
+  return zErr;
+}
+
+/*
+** This routine implements a busy callback that sleeps and tries
+** again until a timeout value is reached.  The timeout value is
+** an integer number of milliseconds passed in as the first
+** argument.
+*/
+static int sqliteDefaultBusyCallback(
+ void *ptr,               /* Database connection */
+ int count                /* Number of times table has been busy */
+){
+#if SQLITE_OS_WIN || HAVE_USLEEP
+  static const u8 delays[] =
+     { 1, 2, 5, 10, 15, 20, 25, 25,  25,  50,  50, 100 };
+  static const u8 totals[] =
+     { 0, 1, 3,  8, 18, 33, 53, 78, 103, 128, 178, 228 };
+# define NDELAY ArraySize(delays)
+  sqlite3 *db = (sqlite3 *)ptr;
+  int timeout = db->busyTimeout;
+  int delay, prior;
+
+  assert( count>=0 );
+  if( count < NDELAY ){
+    delay = delays[count];
+    prior = totals[count];
+  }else{
+    delay = delays[NDELAY-1];
+    prior = totals[NDELAY-1] + delay*(count-(NDELAY-1));
+  }
+  if( prior + delay > timeout ){
+    delay = timeout - prior;
+    if( delay<=0 ) return 0;
+  }
+  sqlite3OsSleep(db->pVfs, delay*1000);
+  return 1;
+#else
+  sqlite3 *db = (sqlite3 *)ptr;
+  int timeout = ((sqlite3 *)ptr)->busyTimeout;
+  if( (count+1)*1000 > timeout ){
+    return 0;
+  }
+  sqlite3OsSleep(db->pVfs, 1000000);
+  return 1;
+#endif
+}
+
+/*
+** Invoke the given busy handler.
+**
+** This routine is called when an operation failed with a lock.
+** If this routine returns non-zero, the lock is retried.  If it
+** returns 0, the operation aborts with an SQLITE_BUSY error.
+*/
+SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler *p){
+  int rc;
+  if( NEVER(p==0) || p->xFunc==0 || p->nBusy<0 ) return 0;
+  rc = p->xFunc(p->pArg, p->nBusy);
+  if( rc==0 ){
+    p->nBusy = -1;
+  }else{
+    p->nBusy++;
+  }
+  return rc; 
+}
+
+/*
+** This routine sets the busy callback for an Sqlite database to the
+** given callback function with the given argument.
+*/
+SQLITE_API int sqlite3_busy_handler(
+  sqlite3 *db,
+  int (*xBusy)(void*,int),
+  void *pArg
+){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  db->busyHandler.xFunc = xBusy;
+  db->busyHandler.pArg = pArg;
+  db->busyHandler.nBusy = 0;
+  db->busyTimeout = 0;
+  sqlite3_mutex_leave(db->mutex);
+  return SQLITE_OK;
+}
+
+#ifndef SQLITE_OMIT_PROGRESS_CALLBACK
+/*
+** This routine sets the progress callback for an Sqlite database to the
+** given callback function with the given argument. The progress callback will
+** be invoked every nOps opcodes.
+*/
+SQLITE_API void sqlite3_progress_handler(
+  sqlite3 *db, 
+  int nOps,
+  int (*xProgress)(void*), 
+  void *pArg
+){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  if( nOps>0 ){
+    db->xProgress = xProgress;
+    db->nProgressOps = (unsigned)nOps;
+    db->pProgressArg = pArg;
+  }else{
+    db->xProgress = 0;
+    db->nProgressOps = 0;
+    db->pProgressArg = 0;
+  }
+  sqlite3_mutex_leave(db->mutex);
+}
+#endif
+
+
+/*
+** This routine installs a default busy handler that waits for the
+** specified number of milliseconds before returning 0.
+*/
+SQLITE_API int sqlite3_busy_timeout(sqlite3 *db, int ms){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  if( ms>0 ){
+    sqlite3_busy_handler(db, sqliteDefaultBusyCallback, (void*)db);
+    db->busyTimeout = ms;
+  }else{
+    sqlite3_busy_handler(db, 0, 0);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Cause any pending operation to stop at its earliest opportunity.
+*/
+SQLITE_API void sqlite3_interrupt(sqlite3 *db){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return;
+  }
+#endif
+  db->u1.isInterrupted = 1;
+}
+
+
+/*
+** This function is exactly the same as sqlite3_create_function(), except
+** that it is designed to be called by internal code. The difference is
+** that if a malloc() fails in sqlite3_create_function(), an error code
+** is returned and the mallocFailed flag cleared. 
+*/
+SQLITE_PRIVATE int sqlite3CreateFunc(
+  sqlite3 *db,
+  const char *zFunctionName,
+  int nArg,
+  int enc,
+  void *pUserData,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value **),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value **),
+  void (*xFinal)(sqlite3_context*),
+  FuncDestructor *pDestructor
+){
+  FuncDef *p;
+  int nName;
+  int extraFlags;
+
+  assert( sqlite3_mutex_held(db->mutex) );
+  if( zFunctionName==0 ||
+      (xFunc && (xFinal || xStep)) || 
+      (!xFunc && (xFinal && !xStep)) ||
+      (!xFunc && (!xFinal && xStep)) ||
+      (nArg<-1 || nArg>SQLITE_MAX_FUNCTION_ARG) ||
+      (255<(nName = sqlite3Strlen30( zFunctionName))) ){
+    return SQLITE_MISUSE_BKPT;
+  }
+
+  assert( SQLITE_FUNC_CONSTANT==SQLITE_DETERMINISTIC );
+  extraFlags = enc &  SQLITE_DETERMINISTIC;
+  enc &= (SQLITE_FUNC_ENCMASK|SQLITE_ANY);
+  
+#ifndef SQLITE_OMIT_UTF16
+  /* If SQLITE_UTF16 is specified as the encoding type, transform this
+  ** to one of SQLITE_UTF16LE or SQLITE_UTF16BE using the
+  ** SQLITE_UTF16NATIVE macro. SQLITE_UTF16 is not used internally.
+  **
+  ** If SQLITE_ANY is specified, add three versions of the function
+  ** to the hash table.
+  */
+  if( enc==SQLITE_UTF16 ){
+    enc = SQLITE_UTF16NATIVE;
+  }else if( enc==SQLITE_ANY ){
+    int rc;
+    rc = sqlite3CreateFunc(db, zFunctionName, nArg, SQLITE_UTF8|extraFlags,
+         pUserData, xFunc, xStep, xFinal, pDestructor);
+    if( rc==SQLITE_OK ){
+      rc = sqlite3CreateFunc(db, zFunctionName, nArg, SQLITE_UTF16LE|extraFlags,
+          pUserData, xFunc, xStep, xFinal, pDestructor);
+    }
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+    enc = SQLITE_UTF16BE;
+  }
+#else
+  enc = SQLITE_UTF8;
+#endif
+  
+  /* Check if an existing function is being overridden or deleted. If so,
+  ** and there are active VMs, then return SQLITE_BUSY. If a function
+  ** is being overridden/deleted but there are no active VMs, allow the
+  ** operation to continue but invalidate all precompiled statements.
+  */
+  p = sqlite3FindFunction(db, zFunctionName, nName, nArg, (u8)enc, 0);
+  if( p && (p->funcFlags & SQLITE_FUNC_ENCMASK)==enc && p->nArg==nArg ){
+    if( db->nVdbeActive ){
+      sqlite3ErrorWithMsg(db, SQLITE_BUSY, 
+        "unable to delete/modify user-function due to active statements");
+      assert( !db->mallocFailed );
+      return SQLITE_BUSY;
+    }else{
+      sqlite3ExpirePreparedStatements(db);
+    }
+  }
+
+  p = sqlite3FindFunction(db, zFunctionName, nName, nArg, (u8)enc, 1);
+  assert(p || db->mallocFailed);
+  if( !p ){
+    return SQLITE_NOMEM;
+  }
+
+  /* If an older version of the function with a configured destructor is
+  ** being replaced invoke the destructor function here. */
+  functionDestroy(db, p);
+
+  if( pDestructor ){
+    pDestructor->nRef++;
+  }
+  p->pDestructor = pDestructor;
+  p->funcFlags = (p->funcFlags & SQLITE_FUNC_ENCMASK) | extraFlags;
+  testcase( p->funcFlags & SQLITE_DETERMINISTIC );
+  p->xFunc = xFunc;
+  p->xStep = xStep;
+  p->xFinalize = xFinal;
+  p->pUserData = pUserData;
+  p->nArg = (u16)nArg;
+  return SQLITE_OK;
+}
+
+/*
+** Create new user functions.
+*/
+SQLITE_API int sqlite3_create_function(
+  sqlite3 *db,
+  const char *zFunc,
+  int nArg,
+  int enc,
+  void *p,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value **),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value **),
+  void (*xFinal)(sqlite3_context*)
+){
+  return sqlite3_create_function_v2(db, zFunc, nArg, enc, p, xFunc, xStep,
+                                    xFinal, 0);
+}
+
+SQLITE_API int sqlite3_create_function_v2(
+  sqlite3 *db,
+  const char *zFunc,
+  int nArg,
+  int enc,
+  void *p,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value **),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value **),
+  void (*xFinal)(sqlite3_context*),
+  void (*xDestroy)(void *)
+){
+  int rc = SQLITE_ERROR;
+  FuncDestructor *pArg = 0;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    return SQLITE_MISUSE_BKPT;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  if( xDestroy ){
+    pArg = (FuncDestructor *)sqlite3DbMallocZero(db, sizeof(FuncDestructor));
+    if( !pArg ){
+      xDestroy(p);
+      goto out;
+    }
+    pArg->xDestroy = xDestroy;
+    pArg->pUserData = p;
+  }
+  rc = sqlite3CreateFunc(db, zFunc, nArg, enc, p, xFunc, xStep, xFinal, pArg);
+  if( pArg && pArg->nRef==0 ){
+    assert( rc!=SQLITE_OK );
+    xDestroy(p);
+    sqlite3DbFree(db, pArg);
+  }
+
+ out:
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+#ifndef SQLITE_OMIT_UTF16
+SQLITE_API int sqlite3_create_function16(
+  sqlite3 *db,
+  const void *zFunctionName,
+  int nArg,
+  int eTextRep,
+  void *p,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+  void (*xFinal)(sqlite3_context*)
+){
+  int rc;
+  char *zFunc8;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || zFunctionName==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  assert( !db->mallocFailed );
+  zFunc8 = sqlite3Utf16to8(db, zFunctionName, -1, SQLITE_UTF16NATIVE);
+  rc = sqlite3CreateFunc(db, zFunc8, nArg, eTextRep, p, xFunc, xStep, xFinal,0);
+  sqlite3DbFree(db, zFunc8);
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+#endif
+
+
+/*
+** Declare that a function has been overloaded by a virtual table.
+**
+** If the function already exists as a regular global function, then
+** this routine is a no-op.  If the function does not exist, then create
+** a new one that always throws a run-time error.  
+**
+** When virtual tables intend to provide an overloaded function, they
+** should call this routine to make sure the global function exists.
+** A global function must exist in order for name resolution to work
+** properly.
+*/
+SQLITE_API int sqlite3_overload_function(
+  sqlite3 *db,
+  const char *zName,
+  int nArg
+){
+  int nName = sqlite3Strlen30(zName);
+  int rc = SQLITE_OK;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || zName==0 || nArg<-2 ){
+    return SQLITE_MISUSE_BKPT;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  if( sqlite3FindFunction(db, zName, nName, nArg, SQLITE_UTF8, 0)==0 ){
+    rc = sqlite3CreateFunc(db, zName, nArg, SQLITE_UTF8,
+                           0, sqlite3InvalidFunction, 0, 0, 0);
+  }
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+#ifndef SQLITE_OMIT_TRACE
+/*
+** Register a trace function.  The pArg from the previously registered trace
+** is returned.  
+**
+** A NULL trace function means that no tracing is executes.  A non-NULL
+** trace is a pointer to a function that is invoked at the start of each
+** SQL statement.
+*/
+SQLITE_API void *sqlite3_trace(sqlite3 *db, void (*xTrace)(void*,const char*), void *pArg){
+  void *pOld;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  pOld = db->pTraceArg;
+  db->xTrace = xTrace;
+  db->pTraceArg = pArg;
+  sqlite3_mutex_leave(db->mutex);
+  return pOld;
+}
+/*
+** Register a profile function.  The pArg from the previously registered 
+** profile function is returned.  
+**
+** A NULL profile function means that no profiling is executes.  A non-NULL
+** profile is a pointer to a function that is invoked at the conclusion of
+** each SQL statement that is run.
+*/
+SQLITE_API void *sqlite3_profile(
+  sqlite3 *db,
+  void (*xProfile)(void*,const char*,sqlite_uint64),
+  void *pArg
+){
+  void *pOld;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  pOld = db->pProfileArg;
+  db->xProfile = xProfile;
+  db->pProfileArg = pArg;
+  sqlite3_mutex_leave(db->mutex);
+  return pOld;
+}
+#endif /* SQLITE_OMIT_TRACE */
+
+/*
+** Register a function to be invoked when a transaction commits.
+** If the invoked function returns non-zero, then the commit becomes a
+** rollback.
+*/
+SQLITE_API void *sqlite3_commit_hook(
+  sqlite3 *db,              /* Attach the hook to this database */
+  int (*xCallback)(void*),  /* Function to invoke on each commit */
+  void *pArg                /* Argument to the function */
+){
+  void *pOld;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  pOld = db->pCommitArg;
+  db->xCommitCallback = xCallback;
+  db->pCommitArg = pArg;
+  sqlite3_mutex_leave(db->mutex);
+  return pOld;
+}
+
+/*
+** Register a callback to be invoked each time a row is updated,
+** inserted or deleted using this database connection.
+*/
+SQLITE_API void *sqlite3_update_hook(
+  sqlite3 *db,              /* Attach the hook to this database */
+  void (*xCallback)(void*,int,char const *,char const *,sqlite_int64),
+  void *pArg                /* Argument to the function */
+){
+  void *pRet;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  pRet = db->pUpdateArg;
+  db->xUpdateCallback = xCallback;
+  db->pUpdateArg = pArg;
+  sqlite3_mutex_leave(db->mutex);
+  return pRet;
+}
+
+/*
+** Register a callback to be invoked each time a transaction is rolled
+** back by this database connection.
+*/
+SQLITE_API void *sqlite3_rollback_hook(
+  sqlite3 *db,              /* Attach the hook to this database */
+  void (*xCallback)(void*), /* Callback function */
+  void *pArg                /* Argument to the function */
+){
+  void *pRet;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  pRet = db->pRollbackArg;
+  db->xRollbackCallback = xCallback;
+  db->pRollbackArg = pArg;
+  sqlite3_mutex_leave(db->mutex);
+  return pRet;
+}
+
+#ifndef SQLITE_OMIT_WAL
+/*
+** The sqlite3_wal_hook() callback registered by sqlite3_wal_autocheckpoint().
+** Invoke sqlite3_wal_checkpoint if the number of frames in the log file
+** is greater than sqlite3.pWalArg cast to an integer (the value configured by
+** wal_autocheckpoint()).
+*/ 
+SQLITE_PRIVATE int sqlite3WalDefaultHook(
+  void *pClientData,     /* Argument */
+  sqlite3 *db,           /* Connection */
+  const char *zDb,       /* Database */
+  int nFrame             /* Size of WAL */
+){
+  if( nFrame>=SQLITE_PTR_TO_INT(pClientData) ){
+    sqlite3BeginBenignMalloc();
+    sqlite3_wal_checkpoint(db, zDb);
+    sqlite3EndBenignMalloc();
+  }
+  return SQLITE_OK;
+}
+#endif /* SQLITE_OMIT_WAL */
+
+/*
+** Configure an sqlite3_wal_hook() callback to automatically checkpoint
+** a database after committing a transaction if there are nFrame or
+** more frames in the log file. Passing zero or a negative value as the
+** nFrame parameter disables automatic checkpoints entirely.
+**
+** The callback registered by this function replaces any existing callback
+** registered using sqlite3_wal_hook(). Likewise, registering a callback
+** using sqlite3_wal_hook() disables the automatic checkpoint mechanism
+** configured by this function.
+*/
+SQLITE_API int sqlite3_wal_autocheckpoint(sqlite3 *db, int nFrame){
+#ifdef SQLITE_OMIT_WAL
+  UNUSED_PARAMETER(db);
+  UNUSED_PARAMETER(nFrame);
+#else
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  if( nFrame>0 ){
+    sqlite3_wal_hook(db, sqlite3WalDefaultHook, SQLITE_INT_TO_PTR(nFrame));
+  }else{
+    sqlite3_wal_hook(db, 0, 0);
+  }
+#endif
+  return SQLITE_OK;
+}
+
+/*
+** Register a callback to be invoked each time a transaction is written
+** into the write-ahead-log by this database connection.
+*/
+SQLITE_API void *sqlite3_wal_hook(
+  sqlite3 *db,                    /* Attach the hook to this db handle */
+  int(*xCallback)(void *, sqlite3*, const char*, int),
+  void *pArg                      /* First argument passed to xCallback() */
+){
+#ifndef SQLITE_OMIT_WAL
+  void *pRet;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  pRet = db->pWalArg;
+  db->xWalCallback = xCallback;
+  db->pWalArg = pArg;
+  sqlite3_mutex_leave(db->mutex);
+  return pRet;
+#else
+  return 0;
+#endif
+}
+
+/*
+** Checkpoint database zDb.
+*/
+SQLITE_API int sqlite3_wal_checkpoint_v2(
+  sqlite3 *db,                    /* Database handle */
+  const char *zDb,                /* Name of attached database (or NULL) */
+  int eMode,                      /* SQLITE_CHECKPOINT_* value */
+  int *pnLog,                     /* OUT: Size of WAL log in frames */
+  int *pnCkpt                     /* OUT: Total number of frames checkpointed */
+){
+#ifdef SQLITE_OMIT_WAL
+  return SQLITE_OK;
+#else
+  int rc;                         /* Return code */
+  int iDb = SQLITE_MAX_ATTACHED;  /* sqlite3.aDb[] index of db to checkpoint */
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+
+  /* Initialize the output variables to -1 in case an error occurs. */
+  if( pnLog ) *pnLog = -1;
+  if( pnCkpt ) *pnCkpt = -1;
+
+  assert( SQLITE_CHECKPOINT_PASSIVE==0 );
+  assert( SQLITE_CHECKPOINT_FULL==1 );
+  assert( SQLITE_CHECKPOINT_RESTART==2 );
+  assert( SQLITE_CHECKPOINT_TRUNCATE==3 );
+  if( eMode<SQLITE_CHECKPOINT_PASSIVE || eMode>SQLITE_CHECKPOINT_TRUNCATE ){
+    /* EVIDENCE-OF: R-03996-12088 The M parameter must be a valid checkpoint
+    ** mode: */
+    return SQLITE_MISUSE;
+  }
+
+  sqlite3_mutex_enter(db->mutex);
+  if( zDb && zDb[0] ){
+    iDb = sqlite3FindDbName(db, zDb);
+  }
+  if( iDb<0 ){
+    rc = SQLITE_ERROR;
+    sqlite3ErrorWithMsg(db, SQLITE_ERROR, "unknown database: %s", zDb);
+  }else{
+    rc = sqlite3Checkpoint(db, iDb, eMode, pnLog, pnCkpt);
+    sqlite3Error(db, rc);
+  }
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+#endif
+}
+
+
+/*
+** Checkpoint database zDb. If zDb is NULL, or if the buffer zDb points
+** to contains a zero-length string, all attached databases are 
+** checkpointed.
+*/
+SQLITE_API int sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb){
+  /* EVIDENCE-OF: R-41613-20553 The sqlite3_wal_checkpoint(D,X) is equivalent to
+  ** sqlite3_wal_checkpoint_v2(D,X,SQLITE_CHECKPOINT_PASSIVE,0,0). */
+  return sqlite3_wal_checkpoint_v2(db,zDb,SQLITE_CHECKPOINT_PASSIVE,0,0);
+}
+
+#ifndef SQLITE_OMIT_WAL
+/*
+** Run a checkpoint on database iDb. This is a no-op if database iDb is
+** not currently open in WAL mode.
+**
+** If a transaction is open on the database being checkpointed, this 
+** function returns SQLITE_LOCKED and a checkpoint is not attempted. If 
+** an error occurs while running the checkpoint, an SQLite error code is 
+** returned (i.e. SQLITE_IOERR). Otherwise, SQLITE_OK.
+**
+** The mutex on database handle db should be held by the caller. The mutex
+** associated with the specific b-tree being checkpointed is taken by
+** this function while the checkpoint is running.
+**
+** If iDb is passed SQLITE_MAX_ATTACHED, then all attached databases are
+** checkpointed. If an error is encountered it is returned immediately -
+** no attempt is made to checkpoint any remaining databases.
+**
+** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
+*/
+SQLITE_PRIVATE int sqlite3Checkpoint(sqlite3 *db, int iDb, int eMode, int *pnLog, int *pnCkpt){
+  int rc = SQLITE_OK;             /* Return code */
+  int i;                          /* Used to iterate through attached dbs */
+  int bBusy = 0;                  /* True if SQLITE_BUSY has been encountered */
+
+  assert( sqlite3_mutex_held(db->mutex) );
+  assert( !pnLog || *pnLog==-1 );
+  assert( !pnCkpt || *pnCkpt==-1 );
+
+  for(i=0; i<db->nDb && rc==SQLITE_OK; i++){
+    if( i==iDb || iDb==SQLITE_MAX_ATTACHED ){
+      rc = sqlite3BtreeCheckpoint(db->aDb[i].pBt, eMode, pnLog, pnCkpt);
+      pnLog = 0;
+      pnCkpt = 0;
+      if( rc==SQLITE_BUSY ){
+        bBusy = 1;
+        rc = SQLITE_OK;
+      }
+    }
+  }
+
+  return (rc==SQLITE_OK && bBusy) ? SQLITE_BUSY : rc;
+}
+#endif /* SQLITE_OMIT_WAL */
+
+/*
+** This function returns true if main-memory should be used instead of
+** a temporary file for transient pager files and statement journals.
+** The value returned depends on the value of db->temp_store (runtime
+** parameter) and the compile time value of SQLITE_TEMP_STORE. The
+** following table describes the relationship between these two values
+** and this functions return value.
+**
+**   SQLITE_TEMP_STORE     db->temp_store     Location of temporary database
+**   -----------------     --------------     ------------------------------
+**   0                     any                file      (return 0)
+**   1                     1                  file      (return 0)
+**   1                     2                  memory    (return 1)
+**   1                     0                  file      (return 0)
+**   2                     1                  file      (return 0)
+**   2                     2                  memory    (return 1)
+**   2                     0                  memory    (return 1)
+**   3                     any                memory    (return 1)
+*/
+SQLITE_PRIVATE int sqlite3TempInMemory(const sqlite3 *db){
+#if SQLITE_TEMP_STORE==1
+  return ( db->temp_store==2 );
+#endif
+#if SQLITE_TEMP_STORE==2
+  return ( db->temp_store!=1 );
+#endif
+#if SQLITE_TEMP_STORE==3
+  return 1;
+#endif
+#if SQLITE_TEMP_STORE<1 || SQLITE_TEMP_STORE>3
+  return 0;
+#endif
+}
+
+/*
+** Return UTF-8 encoded English language explanation of the most recent
+** error.
+*/
+SQLITE_API const char *sqlite3_errmsg(sqlite3 *db){
+  const char *z;
+  if( !db ){
+    return sqlite3ErrStr(SQLITE_NOMEM);
+  }
+  if( !sqlite3SafetyCheckSickOrOk(db) ){
+    return sqlite3ErrStr(SQLITE_MISUSE_BKPT);
+  }
+  sqlite3_mutex_enter(db->mutex);
+  if( db->mallocFailed ){
+    z = sqlite3ErrStr(SQLITE_NOMEM);
+  }else{
+    testcase( db->pErr==0 );
+    z = (char*)sqlite3_value_text(db->pErr);
+    assert( !db->mallocFailed );
+    if( z==0 ){
+      z = sqlite3ErrStr(db->errCode);
+    }
+  }
+  sqlite3_mutex_leave(db->mutex);
+  return z;
+}
+
+#ifndef SQLITE_OMIT_UTF16
+/*
+** Return UTF-16 encoded English language explanation of the most recent
+** error.
+*/
+SQLITE_API const void *sqlite3_errmsg16(sqlite3 *db){
+  static const u16 outOfMem[] = {
+    'o', 'u', 't', ' ', 'o', 'f', ' ', 'm', 'e', 'm', 'o', 'r', 'y', 0
+  };
+  static const u16 misuse[] = {
+    'l', 'i', 'b', 'r', 'a', 'r', 'y', ' ', 
+    'r', 'o', 'u', 't', 'i', 'n', 'e', ' ', 
+    'c', 'a', 'l', 'l', 'e', 'd', ' ', 
+    'o', 'u', 't', ' ', 
+    'o', 'f', ' ', 
+    's', 'e', 'q', 'u', 'e', 'n', 'c', 'e', 0
+  };
+
+  const void *z;
+  if( !db ){
+    return (void *)outOfMem;
+  }
+  if( !sqlite3SafetyCheckSickOrOk(db) ){
+    return (void *)misuse;
+  }
+  sqlite3_mutex_enter(db->mutex);
+  if( db->mallocFailed ){
+    z = (void *)outOfMem;
+  }else{
+    z = sqlite3_value_text16(db->pErr);
+    if( z==0 ){
+      sqlite3ErrorWithMsg(db, db->errCode, sqlite3ErrStr(db->errCode));
+      z = sqlite3_value_text16(db->pErr);
+    }
+    /* A malloc() may have failed within the call to sqlite3_value_text16()
+    ** above. If this is the case, then the db->mallocFailed flag needs to
+    ** be cleared before returning. Do this directly, instead of via
+    ** sqlite3ApiExit(), to avoid setting the database handle error message.
+    */
+    db->mallocFailed = 0;
+  }
+  sqlite3_mutex_leave(db->mutex);
+  return z;
+}
+#endif /* SQLITE_OMIT_UTF16 */
+
+/*
+** Return the most recent error code generated by an SQLite routine. If NULL is
+** passed to this function, we assume a malloc() failed during sqlite3_open().
+*/
+SQLITE_API int sqlite3_errcode(sqlite3 *db){
+  if( db && !sqlite3SafetyCheckSickOrOk(db) ){
+    return SQLITE_MISUSE_BKPT;
+  }
+  if( !db || db->mallocFailed ){
+    return SQLITE_NOMEM;
+  }
+  return db->errCode & db->errMask;
+}
+SQLITE_API int sqlite3_extended_errcode(sqlite3 *db){
+  if( db && !sqlite3SafetyCheckSickOrOk(db) ){
+    return SQLITE_MISUSE_BKPT;
+  }
+  if( !db || db->mallocFailed ){
+    return SQLITE_NOMEM;
+  }
+  return db->errCode;
+}
+
+/*
+** Return a string that describes the kind of error specified in the
+** argument.  For now, this simply calls the internal sqlite3ErrStr()
+** function.
+*/
+SQLITE_API const char *sqlite3_errstr(int rc){
+  return sqlite3ErrStr(rc);
+}
+
+/*
+** Create a new collating function for database "db".  The name is zName
+** and the encoding is enc.
+*/
+static int createCollation(
+  sqlite3* db,
+  const char *zName, 
+  u8 enc,
+  void* pCtx,
+  int(*xCompare)(void*,int,const void*,int,const void*),
+  void(*xDel)(void*)
+){
+  CollSeq *pColl;
+  int enc2;
+  
+  assert( sqlite3_mutex_held(db->mutex) );
+
+  /* If SQLITE_UTF16 is specified as the encoding type, transform this
+  ** to one of SQLITE_UTF16LE or SQLITE_UTF16BE using the
+  ** SQLITE_UTF16NATIVE macro. SQLITE_UTF16 is not used internally.
+  */
+  enc2 = enc;
+  testcase( enc2==SQLITE_UTF16 );
+  testcase( enc2==SQLITE_UTF16_ALIGNED );
+  if( enc2==SQLITE_UTF16 || enc2==SQLITE_UTF16_ALIGNED ){
+    enc2 = SQLITE_UTF16NATIVE;
+  }
+  if( enc2<SQLITE_UTF8 || enc2>SQLITE_UTF16BE ){
+    return SQLITE_MISUSE_BKPT;
+  }
+
+  /* Check if this call is removing or replacing an existing collation 
+  ** sequence. If so, and there are active VMs, return busy. If there
+  ** are no active VMs, invalidate any pre-compiled statements.
+  */
+  pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, 0);
+  if( pColl && pColl->xCmp ){
+    if( db->nVdbeActive ){
+      sqlite3ErrorWithMsg(db, SQLITE_BUSY, 
+        "unable to delete/modify collation sequence due to active statements");
+      return SQLITE_BUSY;
+    }
+    sqlite3ExpirePreparedStatements(db);
+
+    /* If collation sequence pColl was created directly by a call to
+    ** sqlite3_create_collation, and not generated by synthCollSeq(),
+    ** then any copies made by synthCollSeq() need to be invalidated.
+    ** Also, collation destructor - CollSeq.xDel() - function may need
+    ** to be called.
+    */ 
+    if( (pColl->enc & ~SQLITE_UTF16_ALIGNED)==enc2 ){
+      CollSeq *aColl = sqlite3HashFind(&db->aCollSeq, zName);
+      int j;
+      for(j=0; j<3; j++){
+        CollSeq *p = &aColl[j];
+        if( p->enc==pColl->enc ){
+          if( p->xDel ){
+            p->xDel(p->pUser);
+          }
+          p->xCmp = 0;
+        }
+      }
+    }
+  }
+
+  pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, 1);
+  if( pColl==0 ) return SQLITE_NOMEM;
+  pColl->xCmp = xCompare;
+  pColl->pUser = pCtx;
+  pColl->xDel = xDel;
+  pColl->enc = (u8)(enc2 | (enc & SQLITE_UTF16_ALIGNED));
+  sqlite3Error(db, SQLITE_OK);
+  return SQLITE_OK;
+}
+
+
+/*
+** This array defines hard upper bounds on limit values.  The
+** initializer must be kept in sync with the SQLITE_LIMIT_*
+** #defines in sqlite3.h.
+*/
+static const int aHardLimit[] = {
+  SQLITE_MAX_LENGTH,
+  SQLITE_MAX_SQL_LENGTH,
+  SQLITE_MAX_COLUMN,
+  SQLITE_MAX_EXPR_DEPTH,
+  SQLITE_MAX_COMPOUND_SELECT,
+  SQLITE_MAX_VDBE_OP,
+  SQLITE_MAX_FUNCTION_ARG,
+  SQLITE_MAX_ATTACHED,
+  SQLITE_MAX_LIKE_PATTERN_LENGTH,
+  SQLITE_MAX_VARIABLE_NUMBER,      /* IMP: R-38091-32352 */
+  SQLITE_MAX_TRIGGER_DEPTH,
+  SQLITE_MAX_WORKER_THREADS,
+};
+
+/*
+** Make sure the hard limits are set to reasonable values
+*/
+#if SQLITE_MAX_LENGTH<100
+# error SQLITE_MAX_LENGTH must be at least 100
+#endif
+#if SQLITE_MAX_SQL_LENGTH<100
+# error SQLITE_MAX_SQL_LENGTH must be at least 100
+#endif
+#if SQLITE_MAX_SQL_LENGTH>SQLITE_MAX_LENGTH
+# error SQLITE_MAX_SQL_LENGTH must not be greater than SQLITE_MAX_LENGTH
+#endif
+#if SQLITE_MAX_COMPOUND_SELECT<2
+# error SQLITE_MAX_COMPOUND_SELECT must be at least 2
+#endif
+#if SQLITE_MAX_VDBE_OP<40
+# error SQLITE_MAX_VDBE_OP must be at least 40
+#endif
+#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>1000
+# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 1000
+#endif
+#if SQLITE_MAX_ATTACHED<0 || SQLITE_MAX_ATTACHED>125
+# error SQLITE_MAX_ATTACHED must be between 0 and 125
+#endif
+#if SQLITE_MAX_LIKE_PATTERN_LENGTH<1
+# error SQLITE_MAX_LIKE_PATTERN_LENGTH must be at least 1
+#endif
+#if SQLITE_MAX_COLUMN>32767
+# error SQLITE_MAX_COLUMN must not exceed 32767
+#endif
+#if SQLITE_MAX_TRIGGER_DEPTH<1
+# error SQLITE_MAX_TRIGGER_DEPTH must be at least 1
+#endif
+#if SQLITE_MAX_WORKER_THREADS<0 || SQLITE_MAX_WORKER_THREADS>50
+# error SQLITE_MAX_WORKER_THREADS must be between 0 and 50
+#endif
+
+
+/*
+** Change the value of a limit.  Report the old value.
+** If an invalid limit index is supplied, report -1.
+** Make no changes but still report the old value if the
+** new limit is negative.
+**
+** A new lower limit does not shrink existing constructs.
+** It merely prevents new constructs that exceed the limit
+** from forming.
+*/
+SQLITE_API int sqlite3_limit(sqlite3 *db, int limitId, int newLimit){
+  int oldLimit;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return -1;
+  }
+#endif
+
+  /* EVIDENCE-OF: R-30189-54097 For each limit category SQLITE_LIMIT_NAME
+  ** there is a hard upper bound set at compile-time by a C preprocessor
+  ** macro called SQLITE_MAX_NAME. (The "_LIMIT_" in the name is changed to
+  ** "_MAX_".)
+  */
+  assert( aHardLimit[SQLITE_LIMIT_LENGTH]==SQLITE_MAX_LENGTH );
+  assert( aHardLimit[SQLITE_LIMIT_SQL_LENGTH]==SQLITE_MAX_SQL_LENGTH );
+  assert( aHardLimit[SQLITE_LIMIT_COLUMN]==SQLITE_MAX_COLUMN );
+  assert( aHardLimit[SQLITE_LIMIT_EXPR_DEPTH]==SQLITE_MAX_EXPR_DEPTH );
+  assert( aHardLimit[SQLITE_LIMIT_COMPOUND_SELECT]==SQLITE_MAX_COMPOUND_SELECT);
+  assert( aHardLimit[SQLITE_LIMIT_VDBE_OP]==SQLITE_MAX_VDBE_OP );
+  assert( aHardLimit[SQLITE_LIMIT_FUNCTION_ARG]==SQLITE_MAX_FUNCTION_ARG );
+  assert( aHardLimit[SQLITE_LIMIT_ATTACHED]==SQLITE_MAX_ATTACHED );
+  assert( aHardLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]==
+                                               SQLITE_MAX_LIKE_PATTERN_LENGTH );
+  assert( aHardLimit[SQLITE_LIMIT_VARIABLE_NUMBER]==SQLITE_MAX_VARIABLE_NUMBER);
+  assert( aHardLimit[SQLITE_LIMIT_TRIGGER_DEPTH]==SQLITE_MAX_TRIGGER_DEPTH );
+  assert( aHardLimit[SQLITE_LIMIT_WORKER_THREADS]==SQLITE_MAX_WORKER_THREADS );
+  assert( SQLITE_LIMIT_WORKER_THREADS==(SQLITE_N_LIMIT-1) );
+
+
+  if( limitId<0 || limitId>=SQLITE_N_LIMIT ){
+    return -1;
+  }
+  oldLimit = db->aLimit[limitId];
+  if( newLimit>=0 ){                   /* IMP: R-52476-28732 */
+    if( newLimit>aHardLimit[limitId] ){
+      newLimit = aHardLimit[limitId];  /* IMP: R-51463-25634 */
+    }
+    db->aLimit[limitId] = newLimit;
+  }
+  return oldLimit;                     /* IMP: R-53341-35419 */
+}
+
+/*
+** This function is used to parse both URIs and non-URI filenames passed by the
+** user to API functions sqlite3_open() or sqlite3_open_v2(), and for database
+** URIs specified as part of ATTACH statements.
+**
+** The first argument to this function is the name of the VFS to use (or
+** a NULL to signify the default VFS) if the URI does not contain a "vfs=xxx"
+** query parameter. The second argument contains the URI (or non-URI filename)
+** itself. When this function is called the *pFlags variable should contain
+** the default flags to open the database handle with. The value stored in
+** *pFlags may be updated before returning if the URI filename contains 
+** "cache=xxx" or "mode=xxx" query parameters.
+**
+** If successful, SQLITE_OK is returned. In this case *ppVfs is set to point to
+** the VFS that should be used to open the database file. *pzFile is set to
+** point to a buffer containing the name of the file to open. It is the 
+** responsibility of the caller to eventually call sqlite3_free() to release
+** this buffer.
+**
+** If an error occurs, then an SQLite error code is returned and *pzErrMsg
+** may be set to point to a buffer containing an English language error 
+** message. It is the responsibility of the caller to eventually release
+** this buffer by calling sqlite3_free().
+*/
+SQLITE_PRIVATE int sqlite3ParseUri(
+  const char *zDefaultVfs,        /* VFS to use if no "vfs=xxx" query option */
+  const char *zUri,               /* Nul-terminated URI to parse */
+  unsigned int *pFlags,           /* IN/OUT: SQLITE_OPEN_XXX flags */
+  sqlite3_vfs **ppVfs,            /* OUT: VFS to use */ 
+  char **pzFile,                  /* OUT: Filename component of URI */
+  char **pzErrMsg                 /* OUT: Error message (if rc!=SQLITE_OK) */
+){
+  int rc = SQLITE_OK;
+  unsigned int flags = *pFlags;
+  const char *zVfs = zDefaultVfs;
+  char *zFile;
+  char c;
+  int nUri = sqlite3Strlen30(zUri);
+
+  assert( *pzErrMsg==0 );
+
+  if( ((flags & SQLITE_OPEN_URI)             /* IMP: R-48725-32206 */
+            || sqlite3GlobalConfig.bOpenUri) /* IMP: R-51689-46548 */
+   && nUri>=5 && memcmp(zUri, "file:", 5)==0 /* IMP: R-57884-37496 */
+  ){
+    char *zOpt;
+    int eState;                   /* Parser state when parsing URI */
+    int iIn;                      /* Input character index */
+    int iOut = 0;                 /* Output character index */
+    int nByte = nUri+2;           /* Bytes of space to allocate */
+
+    /* Make sure the SQLITE_OPEN_URI flag is set to indicate to the VFS xOpen 
+    ** method that there may be extra parameters following the file-name.  */
+    flags |= SQLITE_OPEN_URI;
+
+    for(iIn=0; iIn<nUri; iIn++) nByte += (zUri[iIn]=='&');
+    zFile = sqlite3_malloc(nByte);
+    if( !zFile ) return SQLITE_NOMEM;
+
+    iIn = 5;
+#ifndef SQLITE_ALLOW_URI_AUTHORITY
+    /* Discard the scheme and authority segments of the URI. */
+    if( zUri[5]=='/' && zUri[6]=='/' ){
+      iIn = 7;
+      while( zUri[iIn] && zUri[iIn]!='/' ) iIn++;
+      if( iIn!=7 && (iIn!=16 || memcmp("localhost", &zUri[7], 9)) ){
+        *pzErrMsg = sqlite3_mprintf("invalid uri authority: %.*s", 
+            iIn-7, &zUri[7]);
+        rc = SQLITE_ERROR;
+        goto parse_uri_out;
+      }
+    }
+#endif
+
+    /* Copy the filename and any query parameters into the zFile buffer. 
+    ** Decode %HH escape codes along the way. 
+    **
+    ** Within this loop, variable eState may be set to 0, 1 or 2, depending
+    ** on the parsing context. As follows:
+    **
+    **   0: Parsing file-name.
+    **   1: Parsing name section of a name=value query parameter.
+    **   2: Parsing value section of a name=value query parameter.
+    */
+    eState = 0;
+    while( (c = zUri[iIn])!=0 && c!='#' ){
+      iIn++;
+      if( c=='%' 
+       && sqlite3Isxdigit(zUri[iIn]) 
+       && sqlite3Isxdigit(zUri[iIn+1]) 
+      ){
+        int octet = (sqlite3HexToInt(zUri[iIn++]) << 4);
+        octet += sqlite3HexToInt(zUri[iIn++]);
+
+        assert( octet>=0 && octet<256 );
+        if( octet==0 ){
+          /* This branch is taken when "%00" appears within the URI. In this
+          ** case we ignore all text in the remainder of the path, name or
+          ** value currently being parsed. So ignore the current character
+          ** and skip to the next "?", "=" or "&", as appropriate. */
+          while( (c = zUri[iIn])!=0 && c!='#' 
+              && (eState!=0 || c!='?')
+              && (eState!=1 || (c!='=' && c!='&'))
+              && (eState!=2 || c!='&')
+          ){
+            iIn++;
+          }
+          continue;
+        }
+        c = octet;
+      }else if( eState==1 && (c=='&' || c=='=') ){
+        if( zFile[iOut-1]==0 ){
+          /* An empty option name. Ignore this option altogether. */
+          while( zUri[iIn] && zUri[iIn]!='#' && zUri[iIn-1]!='&' ) iIn++;
+          continue;
+        }
+        if( c=='&' ){
+          zFile[iOut++] = '\0';
+        }else{
+          eState = 2;
+        }
+        c = 0;
+      }else if( (eState==0 && c=='?') || (eState==2 && c=='&') ){
+        c = 0;
+        eState = 1;
+      }
+      zFile[iOut++] = c;
+    }
+    if( eState==1 ) zFile[iOut++] = '\0';
+    zFile[iOut++] = '\0';
+    zFile[iOut++] = '\0';
+
+    /* Check if there were any options specified that should be interpreted 
+    ** here. Options that are interpreted here include "vfs" and those that
+    ** correspond to flags that may be passed to the sqlite3_open_v2()
+    ** method. */
+    zOpt = &zFile[sqlite3Strlen30(zFile)+1];
+    while( zOpt[0] ){
+      int nOpt = sqlite3Strlen30(zOpt);
+      char *zVal = &zOpt[nOpt+1];
+      int nVal = sqlite3Strlen30(zVal);
+
+      if( nOpt==3 && memcmp("vfs", zOpt, 3)==0 ){
+        zVfs = zVal;
+      }else{
+        struct OpenMode {
+          const char *z;
+          int mode;
+        } *aMode = 0;
+        char *zModeType = 0;
+        int mask = 0;
+        int limit = 0;
+
+        if( nOpt==5 && memcmp("cache", zOpt, 5)==0 ){
+          static struct OpenMode aCacheMode[] = {
+            { "shared",  SQLITE_OPEN_SHAREDCACHE },
+            { "private", SQLITE_OPEN_PRIVATECACHE },
+            { 0, 0 }
+          };
+
+          mask = SQLITE_OPEN_SHAREDCACHE|SQLITE_OPEN_PRIVATECACHE;
+          aMode = aCacheMode;
+          limit = mask;
+          zModeType = "cache";
+        }
+        if( nOpt==4 && memcmp("mode", zOpt, 4)==0 ){
+          static struct OpenMode aOpenMode[] = {
+            { "ro",  SQLITE_OPEN_READONLY },
+            { "rw",  SQLITE_OPEN_READWRITE }, 
+            { "rwc", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE },
+            { "memory", SQLITE_OPEN_MEMORY },
+            { 0, 0 }
+          };
+
+          mask = SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE
+                   | SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY;
+          aMode = aOpenMode;
+          limit = mask & flags;
+          zModeType = "access";
+        }
+
+        if( aMode ){
+          int i;
+          int mode = 0;
+          for(i=0; aMode[i].z; i++){
+            const char *z = aMode[i].z;
+            if( nVal==sqlite3Strlen30(z) && 0==memcmp(zVal, z, nVal) ){
+              mode = aMode[i].mode;
+              break;
+            }
+          }
+          if( mode==0 ){
+            *pzErrMsg = sqlite3_mprintf("no such %s mode: %s", zModeType, zVal);
+            rc = SQLITE_ERROR;
+            goto parse_uri_out;
+          }
+          if( (mode & ~SQLITE_OPEN_MEMORY)>limit ){
+            *pzErrMsg = sqlite3_mprintf("%s mode not allowed: %s",
+                                        zModeType, zVal);
+            rc = SQLITE_PERM;
+            goto parse_uri_out;
+          }
+          flags = (flags & ~mask) | mode;
+        }
+      }
+
+      zOpt = &zVal[nVal+1];
+    }
+
+  }else{
+    zFile = sqlite3_malloc(nUri+2);
+    if( !zFile ) return SQLITE_NOMEM;
+    memcpy(zFile, zUri, nUri);
+    zFile[nUri] = '\0';
+    zFile[nUri+1] = '\0';
+    flags &= ~SQLITE_OPEN_URI;
+  }
+
+  *ppVfs = sqlite3_vfs_find(zVfs);
+  if( *ppVfs==0 ){
+    *pzErrMsg = sqlite3_mprintf("no such vfs: %s", zVfs);
+    rc = SQLITE_ERROR;
+  }
+ parse_uri_out:
+  if( rc!=SQLITE_OK ){
+    sqlite3_free(zFile);
+    zFile = 0;
+  }
+  *pFlags = flags;
+  *pzFile = zFile;
+  return rc;
+}
+
+
+/*
+** This routine does the work of opening a database on behalf of
+** sqlite3_open() and sqlite3_open16(). The database filename "zFilename"  
+** is UTF-8 encoded.
+*/
+static int openDatabase(
+  const char *zFilename, /* Database filename UTF-8 encoded */
+  sqlite3 **ppDb,        /* OUT: Returned database handle */
+  unsigned int flags,    /* Operational flags */
+  const char *zVfs       /* Name of the VFS to use */
+){
+  sqlite3 *db;                    /* Store allocated handle here */
+  int rc;                         /* Return code */
+  int isThreadsafe;               /* True for threadsafe connections */
+  char *zOpen = 0;                /* Filename argument to pass to BtreeOpen() */
+  char *zErrMsg = 0;              /* Error message from sqlite3ParseUri() */
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( ppDb==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  *ppDb = 0;
+#ifndef SQLITE_OMIT_AUTOINIT
+  rc = sqlite3_initialize();
+  if( rc ) return rc;
+#endif
+
+  /* Only allow sensible combinations of bits in the flags argument.  
+  ** Throw an error if any non-sense combination is used.  If we
+  ** do not block illegal combinations here, it could trigger
+  ** assert() statements in deeper layers.  Sensible combinations
+  ** are:
+  **
+  **  1:  SQLITE_OPEN_READONLY
+  **  2:  SQLITE_OPEN_READWRITE
+  **  6:  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE
+  */
+  assert( SQLITE_OPEN_READONLY  == 0x01 );
+  assert( SQLITE_OPEN_READWRITE == 0x02 );
+  assert( SQLITE_OPEN_CREATE    == 0x04 );
+  testcase( (1<<(flags&7))==0x02 ); /* READONLY */
+  testcase( (1<<(flags&7))==0x04 ); /* READWRITE */
+  testcase( (1<<(flags&7))==0x40 ); /* READWRITE | CREATE */
+  if( ((1<<(flags&7)) & 0x46)==0 ){
+    return SQLITE_MISUSE_BKPT;  /* IMP: R-65497-44594 */
+  }
+
+  if( sqlite3GlobalConfig.bCoreMutex==0 ){
+    isThreadsafe = 0;
+  }else if( flags & SQLITE_OPEN_NOMUTEX ){
+    isThreadsafe = 0;
+  }else if( flags & SQLITE_OPEN_FULLMUTEX ){
+    isThreadsafe = 1;
+  }else{
+    isThreadsafe = sqlite3GlobalConfig.bFullMutex;
+  }
+  if( flags & SQLITE_OPEN_PRIVATECACHE ){
+    flags &= ~SQLITE_OPEN_SHAREDCACHE;
+  }else if( sqlite3GlobalConfig.sharedCacheEnabled ){
+    flags |= SQLITE_OPEN_SHAREDCACHE;
+  }
+
+  /* Remove harmful bits from the flags parameter
+  **
+  ** The SQLITE_OPEN_NOMUTEX and SQLITE_OPEN_FULLMUTEX flags were
+  ** dealt with in the previous code block.  Besides these, the only
+  ** valid input flags for sqlite3_open_v2() are SQLITE_OPEN_READONLY,
+  ** SQLITE_OPEN_READWRITE, SQLITE_OPEN_CREATE, SQLITE_OPEN_SHAREDCACHE,
+  ** SQLITE_OPEN_PRIVATECACHE, and some reserved bits.  Silently mask
+  ** off all other flags.
+  */
+  flags &=  ~( SQLITE_OPEN_DELETEONCLOSE |
+               SQLITE_OPEN_EXCLUSIVE |
+               SQLITE_OPEN_MAIN_DB |
+               SQLITE_OPEN_TEMP_DB | 
+               SQLITE_OPEN_TRANSIENT_DB | 
+               SQLITE_OPEN_MAIN_JOURNAL | 
+               SQLITE_OPEN_TEMP_JOURNAL | 
+               SQLITE_OPEN_SUBJOURNAL | 
+               SQLITE_OPEN_MASTER_JOURNAL |
+               SQLITE_OPEN_NOMUTEX |
+               SQLITE_OPEN_FULLMUTEX |
+               SQLITE_OPEN_WAL
+             );
+
+  /* Allocate the sqlite data structure */
+  db = sqlite3MallocZero( sizeof(sqlite3) );
+  if( db==0 ) goto opendb_out;
+  if( isThreadsafe ){
+    db->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE);
+    if( db->mutex==0 ){
+      sqlite3_free(db);
+      db = 0;
+      goto opendb_out;
+    }
+  }
+  sqlite3_mutex_enter(db->mutex);
+  db->errMask = 0xff;
+  db->nDb = 2;
+  db->magic = SQLITE_MAGIC_BUSY;
+  db->aDb = db->aDbStatic;
+
+  assert( sizeof(db->aLimit)==sizeof(aHardLimit) );
+  memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit));
+  db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS;
+  db->autoCommit = 1;
+  db->nextAutovac = -1;
+  db->szMmap = sqlite3GlobalConfig.szMmap;
+  db->nextPagesize = 0;
+  db->nMaxSorterMmap = 0x7FFFFFFF;
+  db->flags |= SQLITE_ShortColNames | SQLITE_EnableTrigger | SQLITE_CacheSpill
+#if !defined(SQLITE_DEFAULT_AUTOMATIC_INDEX) || SQLITE_DEFAULT_AUTOMATIC_INDEX
+                 | SQLITE_AutoIndex
+#endif
+#if SQLITE_DEFAULT_FILE_FORMAT<4
+                 | SQLITE_LegacyFileFmt
+#endif
+#ifdef SQLITE_ENABLE_LOAD_EXTENSION
+                 | SQLITE_LoadExtension
+#endif
+#if SQLITE_DEFAULT_RECURSIVE_TRIGGERS
+                 | SQLITE_RecTriggers
+#endif
+#if defined(SQLITE_DEFAULT_FOREIGN_KEYS) && SQLITE_DEFAULT_FOREIGN_KEYS
+                 | SQLITE_ForeignKeys
+#endif
+#if defined(SQLITE_REVERSE_UNORDERED_SELECTS)
+                 | SQLITE_ReverseOrder
+#endif
+      ;
+  sqlite3HashInit(&db->aCollSeq);
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  sqlite3HashInit(&db->aModule);
+#endif
+
+  /* Add the default collation sequence BINARY. BINARY works for both UTF-8
+  ** and UTF-16, so add a version for each to avoid any unnecessary
+  ** conversions. The only error that can occur here is a malloc() failure.
+  **
+  ** EVIDENCE-OF: R-52786-44878 SQLite defines three built-in collating
+  ** functions:
+  */
+  createCollation(db, "BINARY", SQLITE_UTF8, 0, binCollFunc, 0);
+  createCollation(db, "BINARY", SQLITE_UTF16BE, 0, binCollFunc, 0);
+  createCollation(db, "BINARY", SQLITE_UTF16LE, 0, binCollFunc, 0);
+  createCollation(db, "NOCASE", SQLITE_UTF8, 0, nocaseCollatingFunc, 0);
+  createCollation(db, "RTRIM", SQLITE_UTF8, (void*)1, binCollFunc, 0);
+  if( db->mallocFailed ){
+    goto opendb_out;
+  }
+  /* EVIDENCE-OF: R-08308-17224 The default collating function for all
+  ** strings is BINARY. 
+  */
+  db->pDfltColl = sqlite3FindCollSeq(db, SQLITE_UTF8, "BINARY", 0);
+  assert( db->pDfltColl!=0 );
+
+  /* Parse the filename/URI argument. */
+  db->openFlags = flags;
+  rc = sqlite3ParseUri(zVfs, zFilename, &flags, &db->pVfs, &zOpen, &zErrMsg);
+  if( rc!=SQLITE_OK ){
+    if( rc==SQLITE_NOMEM ) db->mallocFailed = 1;
+    sqlite3ErrorWithMsg(db, rc, zErrMsg ? "%s" : 0, zErrMsg);
+    sqlite3_free(zErrMsg);
+    goto opendb_out;
+  }
+
+  /* Open the backend database driver */
+  rc = sqlite3BtreeOpen(db->pVfs, zOpen, db, &db->aDb[0].pBt, 0,
+                        flags | SQLITE_OPEN_MAIN_DB);
+  if( rc!=SQLITE_OK ){
+    if( rc==SQLITE_IOERR_NOMEM ){
+      rc = SQLITE_NOMEM;
+    }
+    sqlite3Error(db, rc);
+    goto opendb_out;
+  }
+  sqlite3BtreeEnter(db->aDb[0].pBt);
+  db->aDb[0].pSchema = sqlite3SchemaGet(db, db->aDb[0].pBt);
+  if( !db->mallocFailed ) ENC(db) = SCHEMA_ENC(db);
+  sqlite3BtreeLeave(db->aDb[0].pBt);
+  db->aDb[1].pSchema = sqlite3SchemaGet(db, 0);
+
+  /* The default safety_level for the main database is 'full'; for the temp
+  ** database it is 'NONE'. This matches the pager layer defaults.  
+  */
+  db->aDb[0].zName = "main";
+  db->aDb[0].safety_level = 3;
+  db->aDb[1].zName = "temp";
+  db->aDb[1].safety_level = 1;
+
+  db->magic = SQLITE_MAGIC_OPEN;
+  if( db->mallocFailed ){
+    goto opendb_out;
+  }
+
+  /* Register all built-in functions, but do not attempt to read the
+  ** database schema yet. This is delayed until the first time the database
+  ** is accessed.
+  */
+  sqlite3Error(db, SQLITE_OK);
+  sqlite3RegisterBuiltinFunctions(db);
+
+  /* Load automatic extensions - extensions that have been registered
+  ** using the sqlite3_automatic_extension() API.
+  */
+  rc = sqlite3_errcode(db);
+  if( rc==SQLITE_OK ){
+    sqlite3AutoLoadExtensions(db);
+    rc = sqlite3_errcode(db);
+    if( rc!=SQLITE_OK ){
+      goto opendb_out;
+    }
+  }
+
+#ifdef SQLITE_ENABLE_FTS1
+  if( !db->mallocFailed ){
+    extern int sqlite3Fts1Init(sqlite3*);
+    rc = sqlite3Fts1Init(db);
+  }
+#endif
+
+#ifdef SQLITE_ENABLE_FTS2
+  if( !db->mallocFailed && rc==SQLITE_OK ){
+    extern int sqlite3Fts2Init(sqlite3*);
+    rc = sqlite3Fts2Init(db);
+  }
+#endif
+
+#ifdef SQLITE_ENABLE_FTS3
+  if( !db->mallocFailed && rc==SQLITE_OK ){
+    rc = sqlite3Fts3Init(db);
+  }
+#endif
+
+#ifdef SQLITE_ENABLE_ICU
+  if( !db->mallocFailed && rc==SQLITE_OK ){
+    rc = sqlite3IcuInit(db);
+  }
+#endif
+
+#ifdef SQLITE_ENABLE_RTREE
+  if( !db->mallocFailed && rc==SQLITE_OK){
+    rc = sqlite3RtreeInit(db);
+  }
+#endif
+
+  /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking
+  ** mode.  -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking
+  ** mode.  Doing nothing at all also makes NORMAL the default.
+  */
+#ifdef SQLITE_DEFAULT_LOCKING_MODE
+  db->dfltLockMode = SQLITE_DEFAULT_LOCKING_MODE;
+  sqlite3PagerLockingMode(sqlite3BtreePager(db->aDb[0].pBt),
+                          SQLITE_DEFAULT_LOCKING_MODE);
+#endif
+
+  if( rc ) sqlite3Error(db, rc);
+
+  /* Enable the lookaside-malloc subsystem */
+  setupLookaside(db, 0, sqlite3GlobalConfig.szLookaside,
+                        sqlite3GlobalConfig.nLookaside);
+
+  sqlite3_wal_autocheckpoint(db, SQLITE_DEFAULT_WAL_AUTOCHECKPOINT);
+
+opendb_out:
+  sqlite3_free(zOpen);
+  if( db ){
+    assert( db->mutex!=0 || isThreadsafe==0 || sqlite3GlobalConfig.bFullMutex==0 );
+    sqlite3_mutex_leave(db->mutex);
+  }
+  rc = sqlite3_errcode(db);
+  assert( db!=0 || rc==SQLITE_NOMEM );
+  if( rc==SQLITE_NOMEM ){
+    sqlite3_close(db);
+    db = 0;
+  }else if( rc!=SQLITE_OK ){
+    db->magic = SQLITE_MAGIC_SICK;
+  }
+  *ppDb = db;
+#ifdef SQLITE_ENABLE_SQLLOG
+  if( sqlite3GlobalConfig.xSqllog ){
+    /* Opening a db handle. Fourth parameter is passed 0. */
+    void *pArg = sqlite3GlobalConfig.pSqllogArg;
+    sqlite3GlobalConfig.xSqllog(pArg, db, zFilename, 0);
+  }
+#endif
+  return sqlite3ApiExit(0, rc);
+}
+
+/*
+** Open a new database handle.
+*/
+SQLITE_API int sqlite3_open(
+  const char *zFilename, 
+  sqlite3 **ppDb 
+){
+  return openDatabase(zFilename, ppDb,
+                      SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0);
+}
+SQLITE_API int sqlite3_open_v2(
+  const char *filename,   /* Database filename (UTF-8) */
+  sqlite3 **ppDb,         /* OUT: SQLite db handle */
+  int flags,              /* Flags */
+  const char *zVfs        /* Name of VFS module to use */
+){
+  return openDatabase(filename, ppDb, (unsigned int)flags, zVfs);
+}
+
+#ifndef SQLITE_OMIT_UTF16
+/*
+** Open a new database handle.
+*/
+SQLITE_API int sqlite3_open16(
+  const void *zFilename, 
+  sqlite3 **ppDb
+){
+  char const *zFilename8;   /* zFilename encoded in UTF-8 instead of UTF-16 */
+  sqlite3_value *pVal;
+  int rc;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( ppDb==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  *ppDb = 0;
+#ifndef SQLITE_OMIT_AUTOINIT
+  rc = sqlite3_initialize();
+  if( rc ) return rc;
+#endif
+  if( zFilename==0 ) zFilename = "\000\000";
+  pVal = sqlite3ValueNew(0);
+  sqlite3ValueSetStr(pVal, -1, zFilename, SQLITE_UTF16NATIVE, SQLITE_STATIC);
+  zFilename8 = sqlite3ValueText(pVal, SQLITE_UTF8);
+  if( zFilename8 ){
+    rc = openDatabase(zFilename8, ppDb,
+                      SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0);
+    assert( *ppDb || rc==SQLITE_NOMEM );
+    if( rc==SQLITE_OK && !DbHasProperty(*ppDb, 0, DB_SchemaLoaded) ){
+      SCHEMA_ENC(*ppDb) = ENC(*ppDb) = SQLITE_UTF16NATIVE;
+    }
+  }else{
+    rc = SQLITE_NOMEM;
+  }
+  sqlite3ValueFree(pVal);
+
+  return sqlite3ApiExit(0, rc);
+}
+#endif /* SQLITE_OMIT_UTF16 */
+
+/*
+** Register a new collation sequence with the database handle db.
+*/
+SQLITE_API int sqlite3_create_collation(
+  sqlite3* db, 
+  const char *zName, 
+  int enc, 
+  void* pCtx,
+  int(*xCompare)(void*,int,const void*,int,const void*)
+){
+  return sqlite3_create_collation_v2(db, zName, enc, pCtx, xCompare, 0);
+}
+
+/*
+** Register a new collation sequence with the database handle db.
+*/
+SQLITE_API int sqlite3_create_collation_v2(
+  sqlite3* db, 
+  const char *zName, 
+  int enc, 
+  void* pCtx,
+  int(*xCompare)(void*,int,const void*,int,const void*),
+  void(*xDel)(void*)
+){
+  int rc;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  assert( !db->mallocFailed );
+  rc = createCollation(db, zName, (u8)enc, pCtx, xCompare, xDel);
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+#ifndef SQLITE_OMIT_UTF16
+/*
+** Register a new collation sequence with the database handle db.
+*/
+SQLITE_API int sqlite3_create_collation16(
+  sqlite3* db, 
+  const void *zName,
+  int enc, 
+  void* pCtx,
+  int(*xCompare)(void*,int,const void*,int,const void*)
+){
+  int rc = SQLITE_OK;
+  char *zName8;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) || zName==0 ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  assert( !db->mallocFailed );
+  zName8 = sqlite3Utf16to8(db, zName, -1, SQLITE_UTF16NATIVE);
+  if( zName8 ){
+    rc = createCollation(db, zName8, (u8)enc, pCtx, xCompare, 0);
+    sqlite3DbFree(db, zName8);
+  }
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+#endif /* SQLITE_OMIT_UTF16 */
+
+/*
+** Register a collation sequence factory callback with the database handle
+** db. Replace any previously installed collation sequence factory.
+*/
+SQLITE_API int sqlite3_collation_needed(
+  sqlite3 *db, 
+  void *pCollNeededArg, 
+  void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*)
+){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  db->xCollNeeded = xCollNeeded;
+  db->xCollNeeded16 = 0;
+  db->pCollNeededArg = pCollNeededArg;
+  sqlite3_mutex_leave(db->mutex);
+  return SQLITE_OK;
+}
+
+#ifndef SQLITE_OMIT_UTF16
+/*
+** Register a collation sequence factory callback with the database handle
+** db. Replace any previously installed collation sequence factory.
+*/
+SQLITE_API int sqlite3_collation_needed16(
+  sqlite3 *db, 
+  void *pCollNeededArg, 
+  void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*)
+){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  db->xCollNeeded = 0;
+  db->xCollNeeded16 = xCollNeeded16;
+  db->pCollNeededArg = pCollNeededArg;
+  sqlite3_mutex_leave(db->mutex);
+  return SQLITE_OK;
+}
+#endif /* SQLITE_OMIT_UTF16 */
+
+#ifndef SQLITE_OMIT_DEPRECATED
+/*
+** This function is now an anachronism. It used to be used to recover from a
+** malloc() failure, but SQLite now does this automatically.
+*/
+SQLITE_API int sqlite3_global_recover(void){
+  return SQLITE_OK;
+}
+#endif
+
+/*
+** Test to see whether or not the database connection is in autocommit
+** mode.  Return TRUE if it is and FALSE if not.  Autocommit mode is on
+** by default.  Autocommit is disabled by a BEGIN statement and reenabled
+** by the next COMMIT or ROLLBACK.
+*/
+SQLITE_API int sqlite3_get_autocommit(sqlite3 *db){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  return db->autoCommit;
+}
+
+/*
+** The following routines are substitutes for constants SQLITE_CORRUPT,
+** SQLITE_MISUSE, SQLITE_CANTOPEN, SQLITE_IOERR and possibly other error
+** constants.  They serve two purposes:
+**
+**   1.  Serve as a convenient place to set a breakpoint in a debugger
+**       to detect when version error conditions occurs.
+**
+**   2.  Invoke sqlite3_log() to provide the source code location where
+**       a low-level error is first detected.
+*/
+SQLITE_PRIVATE int sqlite3CorruptError(int lineno){
+  testcase( sqlite3GlobalConfig.xLog!=0 );
+  sqlite3_log(SQLITE_CORRUPT,
+              "database corruption at line %d of [%.10s]",
+              lineno, 20+sqlite3_sourceid());
+  return SQLITE_CORRUPT;
+}
+SQLITE_PRIVATE int sqlite3MisuseError(int lineno){
+  testcase( sqlite3GlobalConfig.xLog!=0 );
+  sqlite3_log(SQLITE_MISUSE, 
+              "misuse at line %d of [%.10s]",
+              lineno, 20+sqlite3_sourceid());
+  return SQLITE_MISUSE;
+}
+SQLITE_PRIVATE int sqlite3CantopenError(int lineno){
+  testcase( sqlite3GlobalConfig.xLog!=0 );
+  sqlite3_log(SQLITE_CANTOPEN, 
+              "cannot open file at line %d of [%.10s]",
+              lineno, 20+sqlite3_sourceid());
+  return SQLITE_CANTOPEN;
+}
+
+
+#ifndef SQLITE_OMIT_DEPRECATED
+/*
+** This is a convenience routine that makes sure that all thread-specific
+** data for this thread has been deallocated.
+**
+** SQLite no longer uses thread-specific data so this routine is now a
+** no-op.  It is retained for historical compatibility.
+*/
+SQLITE_API void sqlite3_thread_cleanup(void){
+}
+#endif
+
+/*
+** Return meta information about a specific column of a database table.
+** See comment in sqlite3.h (sqlite.h.in) for details.
+*/
+SQLITE_API int sqlite3_table_column_metadata(
+  sqlite3 *db,                /* Connection handle */
+  const char *zDbName,        /* Database name or NULL */
+  const char *zTableName,     /* Table name */
+  const char *zColumnName,    /* Column name */
+  char const **pzDataType,    /* OUTPUT: Declared data type */
+  char const **pzCollSeq,     /* OUTPUT: Collation sequence name */
+  int *pNotNull,              /* OUTPUT: True if NOT NULL constraint exists */
+  int *pPrimaryKey,           /* OUTPUT: True if column part of PK */
+  int *pAutoinc               /* OUTPUT: True if column is auto-increment */
+){
+  int rc;
+  char *zErrMsg = 0;
+  Table *pTab = 0;
+  Column *pCol = 0;
+  int iCol = 0;
+
+  char const *zDataType = 0;
+  char const *zCollSeq = 0;
+  int notnull = 0;
+  int primarykey = 0;
+  int autoinc = 0;
+
+  /* Ensure the database schema has been loaded */
+  sqlite3_mutex_enter(db->mutex);
+  sqlite3BtreeEnterAll(db);
+  rc = sqlite3Init(db, &zErrMsg);
+  if( SQLITE_OK!=rc ){
+    goto error_out;
+  }
+
+  /* Locate the table in question */
+  pTab = sqlite3FindTable(db, zTableName, zDbName);
+  if( !pTab || pTab->pSelect ){
+    pTab = 0;
+    goto error_out;
+  }
+
+  /* Find the column for which info is requested */
+  if( zColumnName==0 ){
+    /* Query for existance of table only */
+  }else{
+    for(iCol=0; iCol<pTab->nCol; iCol++){
+      pCol = &pTab->aCol[iCol];
+      if( 0==sqlite3StrICmp(pCol->zName, zColumnName) ){
+        break;
+      }
+    }
+    if( iCol==pTab->nCol ){
+      if( HasRowid(pTab) && sqlite3IsRowid(zColumnName) ){
+        iCol = pTab->iPKey;
+        pCol = iCol>=0 ? &pTab->aCol[iCol] : 0;
+      }else{
+        pTab = 0;
+        goto error_out;
+      }
+    }
+  }
+
+  /* The following block stores the meta information that will be returned
+  ** to the caller in local variables zDataType, zCollSeq, notnull, primarykey
+  ** and autoinc. At this point there are two possibilities:
+  ** 
+  **     1. The specified column name was rowid", "oid" or "_rowid_" 
+  **        and there is no explicitly declared IPK column. 
+  **
+  **     2. The table is not a view and the column name identified an 
+  **        explicitly declared column. Copy meta information from *pCol.
+  */ 
+  if( pCol ){
+    zDataType = pCol->zType;
+    zCollSeq = pCol->zColl;
+    notnull = pCol->notNull!=0;
+    primarykey  = (pCol->colFlags & COLFLAG_PRIMKEY)!=0;
+    autoinc = pTab->iPKey==iCol && (pTab->tabFlags & TF_Autoincrement)!=0;
+  }else{
+    zDataType = "INTEGER";
+    primarykey = 1;
+  }
+  if( !zCollSeq ){
+    zCollSeq = "BINARY";
+  }
+
+error_out:
+  sqlite3BtreeLeaveAll(db);
+
+  /* Whether the function call succeeded or failed, set the output parameters
+  ** to whatever their local counterparts contain. If an error did occur,
+  ** this has the effect of zeroing all output parameters.
+  */
+  if( pzDataType ) *pzDataType = zDataType;
+  if( pzCollSeq ) *pzCollSeq = zCollSeq;
+  if( pNotNull ) *pNotNull = notnull;
+  if( pPrimaryKey ) *pPrimaryKey = primarykey;
+  if( pAutoinc ) *pAutoinc = autoinc;
+
+  if( SQLITE_OK==rc && !pTab ){
+    sqlite3DbFree(db, zErrMsg);
+    zErrMsg = sqlite3MPrintf(db, "no such table column: %s.%s", zTableName,
+        zColumnName);
+    rc = SQLITE_ERROR;
+  }
+  sqlite3ErrorWithMsg(db, rc, (zErrMsg?"%s":0), zErrMsg);
+  sqlite3DbFree(db, zErrMsg);
+  rc = sqlite3ApiExit(db, rc);
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/*
+** Sleep for a little while.  Return the amount of time slept.
+*/
+SQLITE_API int sqlite3_sleep(int ms){
+  sqlite3_vfs *pVfs;
+  int rc;
+  pVfs = sqlite3_vfs_find(0);
+  if( pVfs==0 ) return 0;
+
+  /* This function works in milliseconds, but the underlying OsSleep() 
+  ** API uses microseconds. Hence the 1000's.
+  */
+  rc = (sqlite3OsSleep(pVfs, 1000*ms)/1000);
+  return rc;
+}
+
+/*
+** Enable or disable the extended result codes.
+*/
+SQLITE_API int sqlite3_extended_result_codes(sqlite3 *db, int onoff){
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  db->errMask = onoff ? 0xffffffff : 0xff;
+  sqlite3_mutex_leave(db->mutex);
+  return SQLITE_OK;
+}
+
+/*
+** Invoke the xFileControl method on a particular database.
+*/
+SQLITE_API int sqlite3_file_control(sqlite3 *db, const char *zDbName, int op, void *pArg){
+  int rc = SQLITE_ERROR;
+  Btree *pBtree;
+
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ) return SQLITE_MISUSE_BKPT;
+#endif
+  sqlite3_mutex_enter(db->mutex);
+  pBtree = sqlite3DbNameToBtree(db, zDbName);
+  if( pBtree ){
+    Pager *pPager;
+    sqlite3_file *fd;
+    sqlite3BtreeEnter(pBtree);
+    pPager = sqlite3BtreePager(pBtree);
+    assert( pPager!=0 );
+    fd = sqlite3PagerFile(pPager);
+    assert( fd!=0 );
+    if( op==SQLITE_FCNTL_FILE_POINTER ){
+      *(sqlite3_file**)pArg = fd;
+      rc = SQLITE_OK;
+    }else if( fd->pMethods ){
+      rc = sqlite3OsFileControl(fd, op, pArg);
+    }else{
+      rc = SQLITE_NOTFOUND;
+    }
+    sqlite3BtreeLeave(pBtree);
+  }
+  sqlite3_mutex_leave(db->mutex);
+  return rc;   
+}
+
+/*
+** Interface to the testing logic.
+*/
+SQLITE_API int sqlite3_test_control(int op, ...){
+  int rc = 0;
+#ifndef SQLITE_OMIT_BUILTIN_TEST
+  va_list ap;
+  va_start(ap, op);
+  switch( op ){
+
+    /*
+    ** Save the current state of the PRNG.
+    */
+    case SQLITE_TESTCTRL_PRNG_SAVE: {
+      sqlite3PrngSaveState();
+      break;
+    }
+
+    /*
+    ** Restore the state of the PRNG to the last state saved using
+    ** PRNG_SAVE.  If PRNG_SAVE has never before been called, then
+    ** this verb acts like PRNG_RESET.
+    */
+    case SQLITE_TESTCTRL_PRNG_RESTORE: {
+      sqlite3PrngRestoreState();
+      break;
+    }
+
+    /*
+    ** Reset the PRNG back to its uninitialized state.  The next call
+    ** to sqlite3_randomness() will reseed the PRNG using a single call
+    ** to the xRandomness method of the default VFS.
+    */
+    case SQLITE_TESTCTRL_PRNG_RESET: {
+      sqlite3_randomness(0,0);
+      break;
+    }
+
+    /*
+    **  sqlite3_test_control(BITVEC_TEST, size, program)
+    **
+    ** Run a test against a Bitvec object of size.  The program argument
+    ** is an array of integers that defines the test.  Return -1 on a
+    ** memory allocation error, 0 on success, or non-zero for an error.
+    ** See the sqlite3BitvecBuiltinTest() for additional information.
+    */
+    case SQLITE_TESTCTRL_BITVEC_TEST: {
+      int sz = va_arg(ap, int);
+      int *aProg = va_arg(ap, int*);
+      rc = sqlite3BitvecBuiltinTest(sz, aProg);
+      break;
+    }
+
+    /*
+    **  sqlite3_test_control(FAULT_INSTALL, xCallback)
+    **
+    ** Arrange to invoke xCallback() whenever sqlite3FaultSim() is called,
+    ** if xCallback is not NULL.
+    **
+    ** As a test of the fault simulator mechanism itself, sqlite3FaultSim(0)
+    ** is called immediately after installing the new callback and the return
+    ** value from sqlite3FaultSim(0) becomes the return from
+    ** sqlite3_test_control().
+    */
+    case SQLITE_TESTCTRL_FAULT_INSTALL: {
+      /* MSVC is picky about pulling func ptrs from va lists.
+      ** http://support.microsoft.com/kb/47961
+      ** sqlite3GlobalConfig.xTestCallback = va_arg(ap, int(*)(int));
+      */
+      typedef int(*TESTCALLBACKFUNC_t)(int);
+      sqlite3GlobalConfig.xTestCallback = va_arg(ap, TESTCALLBACKFUNC_t);
+      rc = sqlite3FaultSim(0);
+      break;
+    }
+
+    /*
+    **  sqlite3_test_control(BENIGN_MALLOC_HOOKS, xBegin, xEnd)
+    **
+    ** Register hooks to call to indicate which malloc() failures 
+    ** are benign.
+    */
+    case SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS: {
+      typedef void (*void_function)(void);
+      void_function xBenignBegin;
+      void_function xBenignEnd;
+      xBenignBegin = va_arg(ap, void_function);
+      xBenignEnd = va_arg(ap, void_function);
+      sqlite3BenignMallocHooks(xBenignBegin, xBenignEnd);
+      break;
+    }
+
+    /*
+    **  sqlite3_test_control(SQLITE_TESTCTRL_PENDING_BYTE, unsigned int X)
+    **
+    ** Set the PENDING byte to the value in the argument, if X>0.
+    ** Make no changes if X==0.  Return the value of the pending byte
+    ** as it existing before this routine was called.
+    **
+    ** IMPORTANT:  Changing the PENDING byte from 0x40000000 results in
+    ** an incompatible database file format.  Changing the PENDING byte
+    ** while any database connection is open results in undefined and
+    ** deleterious behavior.
+    */
+    case SQLITE_TESTCTRL_PENDING_BYTE: {
+      rc = PENDING_BYTE;
+#ifndef SQLITE_OMIT_WSD
+      {
+        unsigned int newVal = va_arg(ap, unsigned int);
+        if( newVal ) sqlite3PendingByte = newVal;
+      }
+#endif
+      break;
+    }
+
+    /*
+    **  sqlite3_test_control(SQLITE_TESTCTRL_ASSERT, int X)
+    **
+    ** This action provides a run-time test to see whether or not
+    ** assert() was enabled at compile-time.  If X is true and assert()
+    ** is enabled, then the return value is true.  If X is true and
+    ** assert() is disabled, then the return value is zero.  If X is
+    ** false and assert() is enabled, then the assertion fires and the
+    ** process aborts.  If X is false and assert() is disabled, then the
+    ** return value is zero.
+    */
+    case SQLITE_TESTCTRL_ASSERT: {
+      volatile int x = 0;
+      assert( (x = va_arg(ap,int))!=0 );
+      rc = x;
+      break;
+    }
+
+
+    /*
+    **  sqlite3_test_control(SQLITE_TESTCTRL_ALWAYS, int X)
+    **
+    ** This action provides a run-time test to see how the ALWAYS and
+    ** NEVER macros were defined at compile-time.
+    **
+    ** The return value is ALWAYS(X).  
+    **
+    ** The recommended test is X==2.  If the return value is 2, that means
+    ** ALWAYS() and NEVER() are both no-op pass-through macros, which is the
+    ** default setting.  If the return value is 1, then ALWAYS() is either
+    ** hard-coded to true or else it asserts if its argument is false.
+    ** The first behavior (hard-coded to true) is the case if
+    ** SQLITE_TESTCTRL_ASSERT shows that assert() is disabled and the second
+    ** behavior (assert if the argument to ALWAYS() is false) is the case if
+    ** SQLITE_TESTCTRL_ASSERT shows that assert() is enabled.
+    **
+    ** The run-time test procedure might look something like this:
+    **
+    **    if( sqlite3_test_control(SQLITE_TESTCTRL_ALWAYS, 2)==2 ){
+    **      // ALWAYS() and NEVER() are no-op pass-through macros
+    **    }else if( sqlite3_test_control(SQLITE_TESTCTRL_ASSERT, 1) ){
+    **      // ALWAYS(x) asserts that x is true. NEVER(x) asserts x is false.
+    **    }else{
+    **      // ALWAYS(x) is a constant 1.  NEVER(x) is a constant 0.
+    **    }
+    */
+    case SQLITE_TESTCTRL_ALWAYS: {
+      int x = va_arg(ap,int);
+      rc = ALWAYS(x);
+      break;
+    }
+
+    /*
+    **   sqlite3_test_control(SQLITE_TESTCTRL_BYTEORDER);
+    **
+    ** The integer returned reveals the byte-order of the computer on which
+    ** SQLite is running:
+    **
+    **       1     big-endian,    determined at run-time
+    **      10     little-endian, determined at run-time
+    **  432101     big-endian,    determined at compile-time
+    **  123410     little-endian, determined at compile-time
+    */ 
+    case SQLITE_TESTCTRL_BYTEORDER: {
+      rc = SQLITE_BYTEORDER*100 + SQLITE_LITTLEENDIAN*10 + SQLITE_BIGENDIAN;
+      break;
+    }
+
+    /*   sqlite3_test_control(SQLITE_TESTCTRL_RESERVE, sqlite3 *db, int N)
+    **
+    ** Set the nReserve size to N for the main database on the database
+    ** connection db.
+    */
+    case SQLITE_TESTCTRL_RESERVE: {
+      sqlite3 *db = va_arg(ap, sqlite3*);
+      int x = va_arg(ap,int);
+      sqlite3_mutex_enter(db->mutex);
+      sqlite3BtreeSetPageSize(db->aDb[0].pBt, 0, x, 0);
+      sqlite3_mutex_leave(db->mutex);
+      break;
+    }
+
+    /*  sqlite3_test_control(SQLITE_TESTCTRL_OPTIMIZATIONS, sqlite3 *db, int N)
+    **
+    ** Enable or disable various optimizations for testing purposes.  The 
+    ** argument N is a bitmask of optimizations to be disabled.  For normal
+    ** operation N should be 0.  The idea is that a test program (like the
+    ** SQL Logic Test or SLT test module) can run the same SQL multiple times
+    ** with various optimizations disabled to verify that the same answer
+    ** is obtained in every case.
+    */
+    case SQLITE_TESTCTRL_OPTIMIZATIONS: {
+      sqlite3 *db = va_arg(ap, sqlite3*);
+      db->dbOptFlags = (u16)(va_arg(ap, int) & 0xffff);
+      break;
+    }
+
+#ifdef SQLITE_N_KEYWORD
+    /* sqlite3_test_control(SQLITE_TESTCTRL_ISKEYWORD, const char *zWord)
+    **
+    ** If zWord is a keyword recognized by the parser, then return the
+    ** number of keywords.  Or if zWord is not a keyword, return 0.
+    ** 
+    ** This test feature is only available in the amalgamation since
+    ** the SQLITE_N_KEYWORD macro is not defined in this file if SQLite
+    ** is built using separate source files.
+    */
+    case SQLITE_TESTCTRL_ISKEYWORD: {
+      const char *zWord = va_arg(ap, const char*);
+      int n = sqlite3Strlen30(zWord);
+      rc = (sqlite3KeywordCode((u8*)zWord, n)!=TK_ID) ? SQLITE_N_KEYWORD : 0;
+      break;
+    }
+#endif 
+
+    /* sqlite3_test_control(SQLITE_TESTCTRL_SCRATCHMALLOC, sz, &pNew, pFree);
+    **
+    ** Pass pFree into sqlite3ScratchFree(). 
+    ** If sz>0 then allocate a scratch buffer into pNew.  
+    */
+    case SQLITE_TESTCTRL_SCRATCHMALLOC: {
+      void *pFree, **ppNew;
+      int sz;
+      sz = va_arg(ap, int);
+      ppNew = va_arg(ap, void**);
+      pFree = va_arg(ap, void*);
+      if( sz ) *ppNew = sqlite3ScratchMalloc(sz);
+      sqlite3ScratchFree(pFree);
+      break;
+    }
+
+    /*   sqlite3_test_control(SQLITE_TESTCTRL_LOCALTIME_FAULT, int onoff);
+    **
+    ** If parameter onoff is non-zero, configure the wrappers so that all
+    ** subsequent calls to localtime() and variants fail. If onoff is zero,
+    ** undo this setting.
+    */
+    case SQLITE_TESTCTRL_LOCALTIME_FAULT: {
+      sqlite3GlobalConfig.bLocaltimeFault = va_arg(ap, int);
+      break;
+    }
+
+    /*   sqlite3_test_control(SQLITE_TESTCTRL_NEVER_CORRUPT, int);
+    **
+    ** Set or clear a flag that indicates that the database file is always well-
+    ** formed and never corrupt.  This flag is clear by default, indicating that
+    ** database files might have arbitrary corruption.  Setting the flag during
+    ** testing causes certain assert() statements in the code to be activated
+    ** that demonstrat invariants on well-formed database files.
+    */
+    case SQLITE_TESTCTRL_NEVER_CORRUPT: {
+      sqlite3GlobalConfig.neverCorrupt = va_arg(ap, int);
+      break;
+    }
+
+
+    /*   sqlite3_test_control(SQLITE_TESTCTRL_VDBE_COVERAGE, xCallback, ptr);
+    **
+    ** Set the VDBE coverage callback function to xCallback with context 
+    ** pointer ptr.
+    */
+    case SQLITE_TESTCTRL_VDBE_COVERAGE: {
+#ifdef SQLITE_VDBE_COVERAGE
+      typedef void (*branch_callback)(void*,int,u8,u8);
+      sqlite3GlobalConfig.xVdbeBranch = va_arg(ap,branch_callback);
+      sqlite3GlobalConfig.pVdbeBranchArg = va_arg(ap,void*);
+#endif
+      break;
+    }
+
+    /*   sqlite3_test_control(SQLITE_TESTCTRL_SORTER_MMAP, db, nMax); */
+    case SQLITE_TESTCTRL_SORTER_MMAP: {
+      sqlite3 *db = va_arg(ap, sqlite3*);
+      db->nMaxSorterMmap = va_arg(ap, int);
+      break;
+    }
+
+    /*   sqlite3_test_control(SQLITE_TESTCTRL_ISINIT);
+    **
+    ** Return SQLITE_OK if SQLite has been initialized and SQLITE_ERROR if
+    ** not.
+    */
+    case SQLITE_TESTCTRL_ISINIT: {
+      if( sqlite3GlobalConfig.isInit==0 ) rc = SQLITE_ERROR;
+      break;
+    }
+  }
+  va_end(ap);
+#endif /* SQLITE_OMIT_BUILTIN_TEST */
+  return rc;
+}
+
+/*
+** This is a utility routine, useful to VFS implementations, that checks
+** to see if a database file was a URI that contained a specific query 
+** parameter, and if so obtains the value of the query parameter.
+**
+** The zFilename argument is the filename pointer passed into the xOpen()
+** method of a VFS implementation.  The zParam argument is the name of the
+** query parameter we seek.  This routine returns the value of the zParam
+** parameter if it exists.  If the parameter does not exist, this routine
+** returns a NULL pointer.
+*/
+SQLITE_API const char *sqlite3_uri_parameter(const char *zFilename, const char *zParam){
+  if( zFilename==0 || zParam==0 ) return 0;
+  zFilename += sqlite3Strlen30(zFilename) + 1;
+  while( zFilename[0] ){
+    int x = strcmp(zFilename, zParam);
+    zFilename += sqlite3Strlen30(zFilename) + 1;
+    if( x==0 ) return zFilename;
+    zFilename += sqlite3Strlen30(zFilename) + 1;
+  }
+  return 0;
+}
+
+/*
+** Return a boolean value for a query parameter.
+*/
+SQLITE_API int sqlite3_uri_boolean(const char *zFilename, const char *zParam, int bDflt){
+  const char *z = sqlite3_uri_parameter(zFilename, zParam);
+  bDflt = bDflt!=0;
+  return z ? sqlite3GetBoolean(z, bDflt) : bDflt;
+}
+
+/*
+** Return a 64-bit integer value for a query parameter.
+*/
+SQLITE_API sqlite3_int64 sqlite3_uri_int64(
+  const char *zFilename,    /* Filename as passed to xOpen */
+  const char *zParam,       /* URI parameter sought */
+  sqlite3_int64 bDflt       /* return if parameter is missing */
+){
+  const char *z = sqlite3_uri_parameter(zFilename, zParam);
+  sqlite3_int64 v;
+  if( z && sqlite3DecOrHexToI64(z, &v)==SQLITE_OK ){
+    bDflt = v;
+  }
+  return bDflt;
+}
+
+/*
+** Return the Btree pointer identified by zDbName.  Return NULL if not found.
+*/
+SQLITE_PRIVATE Btree *sqlite3DbNameToBtree(sqlite3 *db, const char *zDbName){
+  int i;
+  for(i=0; i<db->nDb; i++){
+    if( db->aDb[i].pBt
+     && (zDbName==0 || sqlite3StrICmp(zDbName, db->aDb[i].zName)==0)
+    ){
+      return db->aDb[i].pBt;
+    }
+  }
+  return 0;
+}
+
+/*
+** Return the filename of the database associated with a database
+** connection.
+*/
+SQLITE_API const char *sqlite3_db_filename(sqlite3 *db, const char *zDbName){
+  Btree *pBt;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return 0;
+  }
+#endif
+  pBt = sqlite3DbNameToBtree(db, zDbName);
+  return pBt ? sqlite3BtreeGetFilename(pBt) : 0;
+}
+
+/*
+** Return 1 if database is read-only or 0 if read/write.  Return -1 if
+** no such database exists.
+*/
+SQLITE_API int sqlite3_db_readonly(sqlite3 *db, const char *zDbName){
+  Btree *pBt;
+#ifdef SQLITE_ENABLE_API_ARMOR
+  if( !sqlite3SafetyCheckOk(db) ){
+    (void)SQLITE_MISUSE_BKPT;
+    return -1;
+  }
+#endif
+  pBt = sqlite3DbNameToBtree(db, zDbName);
+  return pBt ? sqlite3BtreeIsReadonly(pBt) : -1;
+}
+
+/************** End of main.c ************************************************/
+/************** Begin file notify.c ******************************************/
+/*
+** 2009 March 3
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** This file contains the implementation of the sqlite3_unlock_notify()
+** API method and its associated functionality.
+*/
+
+/* Omit this entire file if SQLITE_ENABLE_UNLOCK_NOTIFY is not defined. */
+#ifdef SQLITE_ENABLE_UNLOCK_NOTIFY
+
+/*
+** Public interfaces:
+**
+**   sqlite3ConnectionBlocked()
+**   sqlite3ConnectionUnlocked()
+**   sqlite3ConnectionClosed()
+**   sqlite3_unlock_notify()
+*/
+
+#define assertMutexHeld() \
+  assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)) )
+
+/*
+** Head of a linked list of all sqlite3 objects created by this process
+** for which either sqlite3.pBlockingConnection or sqlite3.pUnlockConnection
+** is not NULL. This variable may only accessed while the STATIC_MASTER
+** mutex is held.
+*/
+static sqlite3 *SQLITE_WSD sqlite3BlockedList = 0;
+
+#ifndef NDEBUG
+/*
+** This function is a complex assert() that verifies the following 
+** properties of the blocked connections list:
+**
+**   1) Each entry in the list has a non-NULL value for either 
+**      pUnlockConnection or pBlockingConnection, or both.
+**
+**   2) All entries in the list that share a common value for 
+**      xUnlockNotify are grouped together.
+**
+**   3) If the argument db is not NULL, then none of the entries in the
+**      blocked connections list have pUnlockConnection or pBlockingConnection
+**      set to db. This is used when closing connection db.
+*/
+static void checkListProperties(sqlite3 *db){
+  sqlite3 *p;
+  for(p=sqlite3BlockedList; p; p=p->pNextBlocked){
+    int seen = 0;
+    sqlite3 *p2;
+
+    /* Verify property (1) */
+    assert( p->pUnlockConnection || p->pBlockingConnection );
+
+    /* Verify property (2) */
+    for(p2=sqlite3BlockedList; p2!=p; p2=p2->pNextBlocked){
+      if( p2->xUnlockNotify==p->xUnlockNotify ) seen = 1;
+      assert( p2->xUnlockNotify==p->xUnlockNotify || !seen );
+      assert( db==0 || p->pUnlockConnection!=db );
+      assert( db==0 || p->pBlockingConnection!=db );
+    }
+  }
+}
+#else
+# define checkListProperties(x)
+#endif
+
+/*
+** Remove connection db from the blocked connections list. If connection
+** db is not currently a part of the list, this function is a no-op.
+*/
+static void removeFromBlockedList(sqlite3 *db){
+  sqlite3 **pp;
+  assertMutexHeld();
+  for(pp=&sqlite3BlockedList; *pp; pp = &(*pp)->pNextBlocked){
+    if( *pp==db ){
+      *pp = (*pp)->pNextBlocked;
+      break;
+    }
+  }
+}
+
+/*
+** Add connection db to the blocked connections list. It is assumed
+** that it is not already a part of the list.
+*/
+static void addToBlockedList(sqlite3 *db){
+  sqlite3 **pp;
+  assertMutexHeld();
+  for(
+    pp=&sqlite3BlockedList; 
+    *pp && (*pp)->xUnlockNotify!=db->xUnlockNotify; 
+    pp=&(*pp)->pNextBlocked
+  );
+  db->pNextBlocked = *pp;
+  *pp = db;
+}
+
+/*
+** Obtain the STATIC_MASTER mutex.
+*/
+static void enterMutex(void){
+  sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+  checkListProperties(0);
+}
+
+/*
+** Release the STATIC_MASTER mutex.
+*/
+static void leaveMutex(void){
+  assertMutexHeld();
+  checkListProperties(0);
+  sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+}
+
+/*
+** Register an unlock-notify callback.
+**
+** This is called after connection "db" has attempted some operation
+** but has received an SQLITE_LOCKED error because another connection
+** (call it pOther) in the same process was busy using the same shared
+** cache.  pOther is found by looking at db->pBlockingConnection.
+**
+** If there is no blocking connection, the callback is invoked immediately,
+** before this routine returns.
+**
+** If pOther is already blocked on db, then report SQLITE_LOCKED, to indicate
+** a deadlock.
+**
+** Otherwise, make arrangements to invoke xNotify when pOther drops
+** its locks.
+**
+** Each call to this routine overrides any prior callbacks registered
+** on the same "db".  If xNotify==0 then any prior callbacks are immediately
+** cancelled.
+*/
+SQLITE_API int sqlite3_unlock_notify(
+  sqlite3 *db,
+  void (*xNotify)(void **, int),
+  void *pArg
+){
+  int rc = SQLITE_OK;
+
+  sqlite3_mutex_enter(db->mutex);
+  enterMutex();
+
+  if( xNotify==0 ){
+    removeFromBlockedList(db);
+    db->pBlockingConnection = 0;
+    db->pUnlockConnection = 0;
+    db->xUnlockNotify = 0;
+    db->pUnlockArg = 0;
+  }else if( 0==db->pBlockingConnection ){
+    /* The blocking transaction has been concluded. Or there never was a 
+    ** blocking transaction. In either case, invoke the notify callback
+    ** immediately. 
+    */
+    xNotify(&pArg, 1);
+  }else{
+    sqlite3 *p;
+
+    for(p=db->pBlockingConnection; p && p!=db; p=p->pUnlockConnection){}
+    if( p ){
+      rc = SQLITE_LOCKED;              /* Deadlock detected. */
+    }else{
+      db->pUnlockConnection = db->pBlockingConnection;
+      db->xUnlockNotify = xNotify;
+      db->pUnlockArg = pArg;
+      removeFromBlockedList(db);
+      addToBlockedList(db);
+    }
+  }
+
+  leaveMutex();
+  assert( !db->mallocFailed );
+  sqlite3ErrorWithMsg(db, rc, (rc?"database is deadlocked":0));
+  sqlite3_mutex_leave(db->mutex);
+  return rc;
+}
+
+/*
+** This function is called while stepping or preparing a statement 
+** associated with connection db. The operation will return SQLITE_LOCKED
+** to the user because it requires a lock that will not be available
+** until connection pBlocker concludes its current transaction.
+*/
+SQLITE_PRIVATE void sqlite3ConnectionBlocked(sqlite3 *db, sqlite3 *pBlocker){
+  enterMutex();
+  if( db->pBlockingConnection==0 && db->pUnlockConnection==0 ){
+    addToBlockedList(db);
+  }
+  db->pBlockingConnection = pBlocker;
+  leaveMutex();
+}
+
+/*
+** This function is called when
+** the transaction opened by database db has just finished. Locks held 
+** by database connection db have been released.
+**
+** This function loops through each entry in the blocked connections
+** list and does the following:
+**
+**   1) If the sqlite3.pBlockingConnection member of a list entry is
+**      set to db, then set pBlockingConnection=0.
+**
+**   2) If the sqlite3.pUnlockConnection member of a list entry is
+**      set to db, then invoke the configured unlock-notify callback and
+**      set pUnlockConnection=0.
+**
+**   3) If the two steps above mean that pBlockingConnection==0 and
+**      pUnlockConnection==0, remove the entry from the blocked connections
+**      list.
+*/
+SQLITE_PRIVATE void sqlite3ConnectionUnlocked(sqlite3 *db){
+  void (*xUnlockNotify)(void **, int) = 0; /* Unlock-notify cb to invoke */
+  int nArg = 0;                            /* Number of entries in aArg[] */
+  sqlite3 **pp;                            /* Iterator variable */
+  void **aArg;               /* Arguments to the unlock callback */
+  void **aDyn = 0;           /* Dynamically allocated space for aArg[] */
+  void *aStatic[16];         /* Starter space for aArg[].  No malloc required */
+
+  aArg = aStatic;
+  enterMutex();         /* Enter STATIC_MASTER mutex */
+
+  /* This loop runs once for each entry in the blocked-connections list. */
+  for(pp=&sqlite3BlockedList; *pp; /* no-op */ ){
+    sqlite3 *p = *pp;
+
+    /* Step 1. */
+    if( p->pBlockingConnection==db ){
+      p->pBlockingConnection = 0;
+    }
+
+    /* Step 2. */
+    if( p->pUnlockConnection==db ){
+      assert( p->xUnlockNotify );
+      if( p->xUnlockNotify!=xUnlockNotify && nArg!=0 ){
+        xUnlockNotify(aArg, nArg);
+        nArg = 0;
+      }
+
+      sqlite3BeginBenignMalloc();
+      assert( aArg==aDyn || (aDyn==0 && aArg==aStatic) );
+      assert( nArg<=(int)ArraySize(aStatic) || aArg==aDyn );
+      if( (!aDyn && nArg==(int)ArraySize(aStatic))
+       || (aDyn && nArg==(int)(sqlite3MallocSize(aDyn)/sizeof(void*)))
+      ){
+        /* The aArg[] array needs to grow. */
+        void **pNew = (void **)sqlite3Malloc(nArg*sizeof(void *)*2);
+        if( pNew ){
+          memcpy(pNew, aArg, nArg*sizeof(void *));
+          sqlite3_free(aDyn);
+          aDyn = aArg = pNew;
+        }else{
+          /* This occurs when the array of context pointers that need to
+          ** be passed to the unlock-notify callback is larger than the
+          ** aStatic[] array allocated on the stack and the attempt to 
+          ** allocate a larger array from the heap has failed.
+          **
+          ** This is a difficult situation to handle. Returning an error
+          ** code to the caller is insufficient, as even if an error code
+          ** is returned the transaction on connection db will still be
+          ** closed and the unlock-notify callbacks on blocked connections
+          ** will go unissued. This might cause the application to wait
+          ** indefinitely for an unlock-notify callback that will never 
+          ** arrive.
+          **
+          ** Instead, invoke the unlock-notify callback with the context
+          ** array already accumulated. We can then clear the array and
+          ** begin accumulating any further context pointers without 
+          ** requiring any dynamic allocation. This is sub-optimal because
+          ** it means that instead of one callback with a large array of
+          ** context pointers the application will receive two or more
+          ** callbacks with smaller arrays of context pointers, which will
+          ** reduce the applications ability to prioritize multiple 
+          ** connections. But it is the best that can be done under the
+          ** circumstances.
+          */
+          xUnlockNotify(aArg, nArg);
+          nArg = 0;
+        }
+      }
+      sqlite3EndBenignMalloc();
+
+      aArg[nArg++] = p->pUnlockArg;
+      xUnlockNotify = p->xUnlockNotify;
+      p->pUnlockConnection = 0;
+      p->xUnlockNotify = 0;
+      p->pUnlockArg = 0;
+    }
+
+    /* Step 3. */
+    if( p->pBlockingConnection==0 && p->pUnlockConnection==0 ){
+      /* Remove connection p from the blocked connections list. */
+      *pp = p->pNextBlocked;
+      p->pNextBlocked = 0;
+    }else{
+      pp = &p->pNextBlocked;
+    }
+  }
+
+  if( nArg!=0 ){
+    xUnlockNotify(aArg, nArg);
+  }
+  sqlite3_free(aDyn);
+  leaveMutex();         /* Leave STATIC_MASTER mutex */
+}
+
+/*
+** This is called when the database connection passed as an argument is 
+** being closed. The connection is removed from the blocked list.
+*/
+SQLITE_PRIVATE void sqlite3ConnectionClosed(sqlite3 *db){
+  sqlite3ConnectionUnlocked(db);
+  enterMutex();
+  removeFromBlockedList(db);
+  checkListProperties(db);
+  leaveMutex();
+}
+#endif
+
+/************** End of notify.c **********************************************/
+/************** Begin file fts3.c ********************************************/
+/*
+** 2006 Oct 10
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This is an SQLite module implementing full-text search.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+
+/* The full-text index is stored in a series of b+tree (-like)
+** structures called segments which map terms to doclists.  The
+** structures are like b+trees in layout, but are constructed from the
+** bottom up in optimal fashion and are not updatable.  Since trees
+** are built from the bottom up, things will be described from the
+** bottom up.
+**
+**
+**** Varints ****
+** The basic unit of encoding is a variable-length integer called a
+** varint.  We encode variable-length integers in little-endian order
+** using seven bits * per byte as follows:
+**
+** KEY:
+**         A = 0xxxxxxx    7 bits of data and one flag bit
+**         B = 1xxxxxxx    7 bits of data and one flag bit
+**
+**  7 bits - A
+** 14 bits - BA
+** 21 bits - BBA
+** and so on.
+**
+** This is similar in concept to how sqlite encodes "varints" but
+** the encoding is not the same.  SQLite varints are big-endian
+** are are limited to 9 bytes in length whereas FTS3 varints are
+** little-endian and can be up to 10 bytes in length (in theory).
+**
+** Example encodings:
+**
+**     1:    0x01
+**   127:    0x7f
+**   128:    0x81 0x00
+**
+**
+**** Document lists ****
+** A doclist (document list) holds a docid-sorted list of hits for a
+** given term.  Doclists hold docids and associated token positions.
+** A docid is the unique integer identifier for a single document.
+** A position is the index of a word within the document.  The first 
+** word of the document has a position of 0.
+**
+** FTS3 used to optionally store character offsets using a compile-time
+** option.  But that functionality is no longer supported.
+**
+** A doclist is stored like this:
+**
+** array {
+**   varint docid;          (delta from previous doclist)
+**   array {                (position list for column 0)
+**     varint position;     (2 more than the delta from previous position)
+**   }
+**   array {
+**     varint POS_COLUMN;   (marks start of position list for new column)
+**     varint column;       (index of new column)
+**     array {
+**       varint position;   (2 more than the delta from previous position)
+**     }
+**   }
+**   varint POS_END;        (marks end of positions for this document.
+** }
+**
+** Here, array { X } means zero or more occurrences of X, adjacent in
+** memory.  A "position" is an index of a token in the token stream
+** generated by the tokenizer. Note that POS_END and POS_COLUMN occur 
+** in the same logical place as the position element, and act as sentinals
+** ending a position list array.  POS_END is 0.  POS_COLUMN is 1.
+** The positions numbers are not stored literally but rather as two more
+** than the difference from the prior position, or the just the position plus
+** 2 for the first position.  Example:
+**
+**   label:       A B C D E  F  G H   I  J K
+**   value:     123 5 9 1 1 14 35 0 234 72 0
+**
+** The 123 value is the first docid.  For column zero in this document
+** there are two matches at positions 3 and 10 (5-2 and 9-2+3).  The 1
+** at D signals the start of a new column; the 1 at E indicates that the
+** new column is column number 1.  There are two positions at 12 and 45
+** (14-2 and 35-2+12).  The 0 at H indicate the end-of-document.  The
+** 234 at I is the delta to next docid (357).  It has one position 70
+** (72-2) and then terminates with the 0 at K.
+**
+** A "position-list" is the list of positions for multiple columns for
+** a single docid.  A "column-list" is the set of positions for a single
+** column.  Hence, a position-list consists of one or more column-lists,
+** a document record consists of a docid followed by a position-list and
+** a doclist consists of one or more document records.
+**
+** A bare doclist omits the position information, becoming an 
+** array of varint-encoded docids.
+**
+**** Segment leaf nodes ****
+** Segment leaf nodes store terms and doclists, ordered by term.  Leaf
+** nodes are written using LeafWriter, and read using LeafReader (to
+** iterate through a single leaf node's data) and LeavesReader (to
+** iterate through a segment's entire leaf layer).  Leaf nodes have
+** the format:
+**
+** varint iHeight;             (height from leaf level, always 0)
+** varint nTerm;               (length of first term)
+** char pTerm[nTerm];          (content of first term)
+** varint nDoclist;            (length of term's associated doclist)
+** char pDoclist[nDoclist];    (content of doclist)
+** array {
+**                             (further terms are delta-encoded)
+**   varint nPrefix;           (length of prefix shared with previous term)
+**   varint nSuffix;           (length of unshared suffix)
+**   char pTermSuffix[nSuffix];(unshared suffix of next term)
+**   varint nDoclist;          (length of term's associated doclist)
+**   char pDoclist[nDoclist];  (content of doclist)
+** }
+**
+** Here, array { X } means zero or more occurrences of X, adjacent in
+** memory.
+**
+** Leaf nodes are broken into blocks which are stored contiguously in
+** the %_segments table in sorted order.  This means that when the end
+** of a node is reached, the next term is in the node with the next
+** greater node id.
+**
+** New data is spilled to a new leaf node when the current node
+** exceeds LEAF_MAX bytes (default 2048).  New data which itself is
+** larger than STANDALONE_MIN (default 1024) is placed in a standalone
+** node (a leaf node with a single term and doclist).  The goal of
+** these settings is to pack together groups of small doclists while
+** making it efficient to directly access large doclists.  The
+** assumption is that large doclists represent terms which are more
+** likely to be query targets.
+**
+** TODO(shess) It may be useful for blocking decisions to be more
+** dynamic.  For instance, it may make more sense to have a 2.5k leaf
+** node rather than splitting into 2k and .5k nodes.  My intuition is
+** that this might extend through 2x or 4x the pagesize.
+**
+**
+**** Segment interior nodes ****
+** Segment interior nodes store blockids for subtree nodes and terms
+** to describe what data is stored by the each subtree.  Interior
+** nodes are written using InteriorWriter, and read using
+** InteriorReader.  InteriorWriters are created as needed when
+** SegmentWriter creates new leaf nodes, or when an interior node
+** itself grows too big and must be split.  The format of interior
+** nodes:
+**
+** varint iHeight;           (height from leaf level, always >0)
+** varint iBlockid;          (block id of node's leftmost subtree)
+** optional {
+**   varint nTerm;           (length of first term)
+**   char pTerm[nTerm];      (content of first term)
+**   array {
+**                                (further terms are delta-encoded)
+**     varint nPrefix;            (length of shared prefix with previous term)
+**     varint nSuffix;            (length of unshared suffix)
+**     char pTermSuffix[nSuffix]; (unshared suffix of next term)
+**   }
+** }
+**
+** Here, optional { X } means an optional element, while array { X }
+** means zero or more occurrences of X, adjacent in memory.
+**
+** An interior node encodes n terms separating n+1 subtrees.  The
+** subtree blocks are contiguous, so only the first subtree's blockid
+** is encoded.  The subtree at iBlockid will contain all terms less
+** than the first term encoded (or all terms if no term is encoded).
+** Otherwise, for terms greater than or equal to pTerm[i] but less
+** than pTerm[i+1], the subtree for that term will be rooted at
+** iBlockid+i.  Interior nodes only store enough term data to
+** distinguish adjacent children (if the rightmost term of the left
+** child is "something", and the leftmost term of the right child is
+** "wicked", only "w" is stored).
+**
+** New data is spilled to a new interior node at the same height when
+** the current node exceeds INTERIOR_MAX bytes (default 2048).
+** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing
+** interior nodes and making the tree too skinny.  The interior nodes
+** at a given height are naturally tracked by interior nodes at
+** height+1, and so on.
+**
+**
+**** Segment directory ****
+** The segment directory in table %_segdir stores meta-information for
+** merging and deleting segments, and also the root node of the
+** segment's tree.
+**
+** The root node is the top node of the segment's tree after encoding
+** the entire segment, restricted to ROOT_MAX bytes (default 1024).
+** This could be either a leaf node or an interior node.  If the top
+** node requires more than ROOT_MAX bytes, it is flushed to %_segments
+** and a new root interior node is generated (which should always fit
+** within ROOT_MAX because it only needs space for 2 varints, the
+** height and the blockid of the previous root).
+**
+** The meta-information in the segment directory is:
+**   level               - segment level (see below)
+**   idx                 - index within level
+**                       - (level,idx uniquely identify a segment)
+**   start_block         - first leaf node
+**   leaves_end_block    - last leaf node
+**   end_block           - last block (including interior nodes)
+**   root                - contents of root node
+**
+** If the root node is a leaf node, then start_block,
+** leaves_end_block, and end_block are all 0.
+**
+**
+**** Segment merging ****
+** To amortize update costs, segments are grouped into levels and
+** merged in batches.  Each increase in level represents exponentially
+** more documents.
+**
+** New documents (actually, document updates) are tokenized and
+** written individually (using LeafWriter) to a level 0 segment, with
+** incrementing idx.  When idx reaches MERGE_COUNT (default 16), all
+** level 0 segments are merged into a single level 1 segment.  Level 1
+** is populated like level 0, and eventually MERGE_COUNT level 1
+** segments are merged to a single level 2 segment (representing
+** MERGE_COUNT^2 updates), and so on.
+**
+** A segment merge traverses all segments at a given level in
+** parallel, performing a straightforward sorted merge.  Since segment
+** leaf nodes are written in to the %_segments table in order, this
+** merge traverses the underlying sqlite disk structures efficiently.
+** After the merge, all segment blocks from the merged level are
+** deleted.
+**
+** MERGE_COUNT controls how often we merge segments.  16 seems to be
+** somewhat of a sweet spot for insertion performance.  32 and 64 show
+** very similar performance numbers to 16 on insertion, though they're
+** a tiny bit slower (perhaps due to more overhead in merge-time
+** sorting).  8 is about 20% slower than 16, 4 about 50% slower than
+** 16, 2 about 66% slower than 16.
+**
+** At query time, high MERGE_COUNT increases the number of segments
+** which need to be scanned and merged.  For instance, with 100k docs
+** inserted:
+**
+**    MERGE_COUNT   segments
+**       16           25
+**        8           12
+**        4           10
+**        2            6
+**
+** This appears to have only a moderate impact on queries for very
+** frequent terms (which are somewhat dominated by segment merge
+** costs), and infrequent and non-existent terms still seem to be fast
+** even with many segments.
+**
+** TODO(shess) That said, it would be nice to have a better query-side
+** argument for MERGE_COUNT of 16.  Also, it is possible/likely that
+** optimizations to things like doclist merging will swing the sweet
+** spot around.
+**
+**
+**
+**** Handling of deletions and updates ****
+** Since we're using a segmented structure, with no docid-oriented
+** index into the term index, we clearly cannot simply update the term
+** index when a document is deleted or updated.  For deletions, we
+** write an empty doclist (varint(docid) varint(POS_END)), for updates
+** we simply write the new doclist.  Segment merges overwrite older
+** data for a particular docid with newer data, so deletes or updates
+** will eventually overtake the earlier data and knock it out.  The
+** query logic likewise merges doclists so that newer data knocks out
+** older data.
+*/
+
+/************** Include fts3Int.h in the middle of fts3.c ********************/
+/************** Begin file fts3Int.h *****************************************/
+/*
+** 2009 Nov 12
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+#ifndef _FTSINT_H
+#define _FTSINT_H
+
+#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 
+# define NDEBUG 1
+#endif
+
+/*
+** FTS4 is really an extension for FTS3.  It is enabled using the
+** SQLITE_ENABLE_FTS3 macro.  But to avoid confusion we also all
+** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3.
+*/
+#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
+# define SQLITE_ENABLE_FTS3
+#endif
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* If not building as part of the core, include sqlite3ext.h. */
+#ifndef SQLITE_CORE
+SQLITE_EXTENSION_INIT3
+#endif
+
+/************** Include fts3_tokenizer.h in the middle of fts3Int.h **********/
+/************** Begin file fts3_tokenizer.h **********************************/
+/*
+** 2006 July 10
+**
+** The author disclaims copyright to this source code.
+**
+*************************************************************************
+** Defines the interface to tokenizers used by fulltext-search.  There
+** are three basic components:
+**
+** sqlite3_tokenizer_module is a singleton defining the tokenizer
+** interface functions.  This is essentially the class structure for
+** tokenizers.
+**
+** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
+** including customization information defined at creation time.
+**
+** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
+** tokens from a particular input.
+*/
+#ifndef _FTS3_TOKENIZER_H_
+#define _FTS3_TOKENIZER_H_
+
+/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
+** If tokenizers are to be allowed to call sqlite3_*() functions, then
+** we will need a way to register the API consistently.
+*/
+
+/*
+** Structures used by the tokenizer interface. When a new tokenizer
+** implementation is registered, the caller provides a pointer to
+** an sqlite3_tokenizer_module containing pointers to the callback
+** functions that make up an implementation.
+**
+** When an fts3 table is created, it passes any arguments passed to
+** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
+** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
+** implementation. The xCreate() function in turn returns an 
+** sqlite3_tokenizer structure representing the specific tokenizer to
+** be used for the fts3 table (customized by the tokenizer clause arguments).
+**
+** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
+** method is called. It returns an sqlite3_tokenizer_cursor object
+** that may be used to tokenize a specific input buffer based on
+** the tokenization rules supplied by a specific sqlite3_tokenizer
+** object.
+*/
+typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
+typedef struct sqlite3_tokenizer sqlite3_tokenizer;
+typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
+
+struct sqlite3_tokenizer_module {
+
+  /*
+  ** Structure version. Should always be set to 0 or 1.
+  */
+  int iVersion;
+
+  /*
+  ** Create a new tokenizer. The values in the argv[] array are the
+  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
+  ** TABLE statement that created the fts3 table. For example, if
+  ** the following SQL is executed:
+  **
+  **   CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
+  **
+  ** then argc is set to 2, and the argv[] array contains pointers
+  ** to the strings "arg1" and "arg2".
+  **
+  ** This method should return either SQLITE_OK (0), or an SQLite error 
+  ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
+  ** to point at the newly created tokenizer structure. The generic
+  ** sqlite3_tokenizer.pModule variable should not be initialized by
+  ** this callback. The caller will do so.
+  */
+  int (*xCreate)(
+    int argc,                           /* Size of argv array */
+    const char *const*argv,             /* Tokenizer argument strings */
+    sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
+  );
+
+  /*
+  ** Destroy an existing tokenizer. The fts3 module calls this method
+  ** exactly once for each successful call to xCreate().
+  */
+  int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
+
+  /*
+  ** Create a tokenizer cursor to tokenize an input buffer. The caller
+  ** is responsible for ensuring that the input buffer remains valid
+  ** until the cursor is closed (using the xClose() method). 
+  */
+  int (*xOpen)(
+    sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
+    const char *pInput, int nBytes,      /* Input buffer */
+    sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
+  );
+
+  /*
+  ** Destroy an existing tokenizer cursor. The fts3 module calls this 
+  ** method exactly once for each successful call to xOpen().
+  */
+  int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
+
+  /*
+  ** Retrieve the next token from the tokenizer cursor pCursor. This
+  ** method should either return SQLITE_OK and set the values of the
+  ** "OUT" variables identified below, or SQLITE_DONE to indicate that
+  ** the end of the buffer has been reached, or an SQLite error code.
+  **
+  ** *ppToken should be set to point at a buffer containing the 
+  ** normalized version of the token (i.e. after any case-folding and/or
+  ** stemming has been performed). *pnBytes should be set to the length
+  ** of this buffer in bytes. The input text that generated the token is
+  ** identified by the byte offsets returned in *piStartOffset and
+  ** *piEndOffset. *piStartOffset should be set to the index of the first
+  ** byte of the token in the input buffer. *piEndOffset should be set
+  ** to the index of the first byte just past the end of the token in
+  ** the input buffer.
+  **
+  ** The buffer *ppToken is set to point at is managed by the tokenizer
+  ** implementation. It is only required to be valid until the next call
+  ** to xNext() or xClose(). 
+  */
+  /* TODO(shess) current implementation requires pInput to be
+  ** nul-terminated.  This should either be fixed, or pInput/nBytes
+  ** should be converted to zInput.
+  */
+  int (*xNext)(
+    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
+    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
+    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
+    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
+    int *piPosition      /* OUT: Number of tokens returned before this one */
+  );
+
+  /***********************************************************************
+  ** Methods below this point are only available if iVersion>=1.
+  */
+
+  /* 
+  ** Configure the language id of a tokenizer cursor.
+  */
+  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
+};
+
+struct sqlite3_tokenizer {
+  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
+  /* Tokenizer implementations will typically add additional fields */
+};
+
+struct sqlite3_tokenizer_cursor {
+  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
+  /* Tokenizer implementations will typically add additional fields */
+};
+
+int fts3_global_term_cnt(int iTerm, int iCol);
+int fts3_term_cnt(int iTerm, int iCol);
+
+
+#endif /* _FTS3_TOKENIZER_H_ */
+
+/************** End of fts3_tokenizer.h **************************************/
+/************** Continuing where we left off in fts3Int.h ********************/
+/************** Include fts3_hash.h in the middle of fts3Int.h ***************/
+/************** Begin file fts3_hash.h ***************************************/
+/*
+** 2001 September 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This is the header file for the generic hash-table implementation
+** used in SQLite.  We've modified it slightly to serve as a standalone
+** hash table implementation for the full-text indexing module.
+**
+*/
+#ifndef _FTS3_HASH_H_
+#define _FTS3_HASH_H_
+
+/* Forward declarations of structures. */
+typedef struct Fts3Hash Fts3Hash;
+typedef struct Fts3HashElem Fts3HashElem;
+
+/* A complete hash table is an instance of the following structure.
+** The internals of this structure are intended to be opaque -- client
+** code should not attempt to access or modify the fields of this structure
+** directly.  Change this structure only by using the routines below.
+** However, many of the "procedures" and "functions" for modifying and
+** accessing this structure are really macros, so we can't really make
+** this structure opaque.
+*/
+struct Fts3Hash {
+  char keyClass;          /* HASH_INT, _POINTER, _STRING, _BINARY */
+  char copyKey;           /* True if copy of key made on insert */
+  int count;              /* Number of entries in this table */
+  Fts3HashElem *first;    /* The first element of the array */
+  int htsize;             /* Number of buckets in the hash table */
+  struct _fts3ht {        /* the hash table */
+    int count;               /* Number of entries with this hash */
+    Fts3HashElem *chain;     /* Pointer to first entry with this hash */
+  } *ht;
+};
+
+/* Each element in the hash table is an instance of the following 
+** structure.  All elements are stored on a single doubly-linked list.
+**
+** Again, this structure is intended to be opaque, but it can't really
+** be opaque because it is used by macros.
+*/
+struct Fts3HashElem {
+  Fts3HashElem *next, *prev; /* Next and previous elements in the table */
+  void *data;                /* Data associated with this element */
+  void *pKey; int nKey;      /* Key associated with this element */
+};
+
+/*
+** There are 2 different modes of operation for a hash table:
+**
+**   FTS3_HASH_STRING        pKey points to a string that is nKey bytes long
+**                           (including the null-terminator, if any).  Case
+**                           is respected in comparisons.
+**
+**   FTS3_HASH_BINARY        pKey points to binary data nKey bytes long. 
+**                           memcmp() is used to compare keys.
+**
+** A copy of the key is made if the copyKey parameter to fts3HashInit is 1.  
+*/
+#define FTS3_HASH_STRING    1
+#define FTS3_HASH_BINARY    2
+
+/*
+** Access routines.  To delete, insert a NULL pointer.
+*/
+SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey);
+SQLITE_PRIVATE void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
+SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
+SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash*);
+SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int);
+
+/*
+** Shorthand for the functions above
+*/
+#define fts3HashInit     sqlite3Fts3HashInit
+#define fts3HashInsert   sqlite3Fts3HashInsert
+#define fts3HashFind     sqlite3Fts3HashFind
+#define fts3HashClear    sqlite3Fts3HashClear
+#define fts3HashFindElem sqlite3Fts3HashFindElem
+
+/*
+** Macros for looping over all elements of a hash table.  The idiom is
+** like this:
+**
+**   Fts3Hash h;
+**   Fts3HashElem *p;
+**   ...
+**   for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
+**     SomeStructure *pData = fts3HashData(p);
+**     // do something with pData
+**   }
+*/
+#define fts3HashFirst(H)  ((H)->first)
+#define fts3HashNext(E)   ((E)->next)
+#define fts3HashData(E)   ((E)->data)
+#define fts3HashKey(E)    ((E)->pKey)
+#define fts3HashKeysize(E) ((E)->nKey)
+
+/*
+** Number of entries in a hash table
+*/
+#define fts3HashCount(H)  ((H)->count)
+
+#endif /* _FTS3_HASH_H_ */
+
+/************** End of fts3_hash.h *******************************************/
+/************** Continuing where we left off in fts3Int.h ********************/
+
+/*
+** This constant determines the maximum depth of an FTS expression tree
+** that the library will create and use. FTS uses recursion to perform 
+** various operations on the query tree, so the disadvantage of a large
+** limit is that it may allow very large queries to use large amounts
+** of stack space (perhaps causing a stack overflow).
+*/
+#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH
+# define SQLITE_FTS3_MAX_EXPR_DEPTH 12
+#endif
+
+
+/*
+** This constant controls how often segments are merged. Once there are
+** FTS3_MERGE_COUNT segments of level N, they are merged into a single
+** segment of level N+1.
+*/
+#define FTS3_MERGE_COUNT 16
+
+/*
+** This is the maximum amount of data (in bytes) to store in the 
+** Fts3Table.pendingTerms hash table. Normally, the hash table is
+** populated as documents are inserted/updated/deleted in a transaction
+** and used to create a new segment when the transaction is committed.
+** However if this limit is reached midway through a transaction, a new 
+** segment is created and the hash table cleared immediately.
+*/
+#define FTS3_MAX_PENDING_DATA (1*1024*1024)
+
+/*
+** Macro to return the number of elements in an array. SQLite has a
+** similar macro called ArraySize(). Use a different name to avoid
+** a collision when building an amalgamation with built-in FTS3.
+*/
+#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
+
+
+#ifndef MIN
+# define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+#ifndef MAX
+# define MAX(x,y) ((x)>(y)?(x):(y))
+#endif
+
+/*
+** Maximum length of a varint encoded integer. The varint format is different
+** from that used by SQLite, so the maximum length is 10, not 9.
+*/
+#define FTS3_VARINT_MAX 10
+
+/*
+** FTS4 virtual tables may maintain multiple indexes - one index of all terms
+** in the document set and zero or more prefix indexes. All indexes are stored
+** as one or more b+-trees in the %_segments and %_segdir tables. 
+**
+** It is possible to determine which index a b+-tree belongs to based on the
+** value stored in the "%_segdir.level" column. Given this value L, the index
+** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with
+** level values between 0 and 1023 (inclusive) belong to index 0, all levels
+** between 1024 and 2047 to index 1, and so on.
+**
+** It is considered impossible for an index to use more than 1024 levels. In 
+** theory though this may happen, but only after at least 
+** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables.
+*/
+#define FTS3_SEGDIR_MAXLEVEL      1024
+#define FTS3_SEGDIR_MAXLEVEL_STR "1024"
+
+/*
+** The testcase() macro is only used by the amalgamation.  If undefined,
+** make it a no-op.
+*/
+#ifndef testcase
+# define testcase(X)
+#endif
+
+/*
+** Terminator values for position-lists and column-lists.
+*/
+#define POS_COLUMN  (1)     /* Column-list terminator */
+#define POS_END     (0)     /* Position-list terminator */ 
+
+/*
+** This section provides definitions to allow the
+** FTS3 extension to be compiled outside of the 
+** amalgamation.
+*/
+#ifndef SQLITE_AMALGAMATION
+/*
+** Macros indicating that conditional expressions are always true or
+** false.
+*/
+#ifdef SQLITE_COVERAGE_TEST
+# define ALWAYS(x) (1)
+# define NEVER(X)  (0)
+#else
+# define ALWAYS(x) (x)
+# define NEVER(x)  (x)
+#endif
+
+/*
+** Internal types used by SQLite.
+*/
+typedef unsigned char u8;         /* 1-byte (or larger) unsigned integer */
+typedef short int i16;            /* 2-byte (or larger) signed integer */
+typedef unsigned int u32;         /* 4-byte unsigned integer */
+typedef sqlite3_uint64 u64;       /* 8-byte unsigned integer */
+typedef sqlite3_int64 i64;        /* 8-byte signed integer */
+
+/*
+** Macro used to suppress compiler warnings for unused parameters.
+*/
+#define UNUSED_PARAMETER(x) (void)(x)
+
+/*
+** Activate assert() only if SQLITE_TEST is enabled.
+*/
+#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 
+# define NDEBUG 1
+#endif
+
+/*
+** The TESTONLY macro is used to enclose variable declarations or
+** other bits of code that are needed to support the arguments
+** within testcase() and assert() macros.
+*/
+#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
+# define TESTONLY(X)  X
+#else
+# define TESTONLY(X)
+#endif
+
+#endif /* SQLITE_AMALGAMATION */
+
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE int sqlite3Fts3Corrupt(void);
+# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
+#else
+# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
+#endif
+
+typedef struct Fts3Table Fts3Table;
+typedef struct Fts3Cursor Fts3Cursor;
+typedef struct Fts3Expr Fts3Expr;
+typedef struct Fts3Phrase Fts3Phrase;
+typedef struct Fts3PhraseToken Fts3PhraseToken;
+
+typedef struct Fts3Doclist Fts3Doclist;
+typedef struct Fts3SegFilter Fts3SegFilter;
+typedef struct Fts3DeferredToken Fts3DeferredToken;
+typedef struct Fts3SegReader Fts3SegReader;
+typedef struct Fts3MultiSegReader Fts3MultiSegReader;
+
+/*
+** A connection to a fulltext index is an instance of the following
+** structure. The xCreate and xConnect methods create an instance
+** of this structure and xDestroy and xDisconnect free that instance.
+** All other methods receive a pointer to the structure as one of their
+** arguments.
+*/
+struct Fts3Table {
+  sqlite3_vtab base;              /* Base class used by SQLite core */
+  sqlite3 *db;                    /* The database connection */
+  const char *zDb;                /* logical database name */
+  const char *zName;              /* virtual table name */
+  int nColumn;                    /* number of named columns in virtual table */
+  char **azColumn;                /* column names.  malloced */
+  u8 *abNotindexed;               /* True for 'notindexed' columns */
+  sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
+  char *zContentTbl;              /* content=xxx option, or NULL */
+  char *zLanguageid;              /* languageid=xxx option, or NULL */
+  int nAutoincrmerge;             /* Value configured by 'automerge' */
+  u32 nLeafAdd;                   /* Number of leaf blocks added this trans */
+
+  /* Precompiled statements used by the implementation. Each of these 
+  ** statements is run and reset within a single virtual table API call. 
+  */
+  sqlite3_stmt *aStmt[40];
+
+  char *zReadExprlist;
+  char *zWriteExprlist;
+
+  int nNodeSize;                  /* Soft limit for node size */
+  u8 bFts4;                       /* True for FTS4, false for FTS3 */
+  u8 bHasStat;                    /* True if %_stat table exists (2==unknown) */
+  u8 bHasDocsize;                 /* True if %_docsize table exists */
+  u8 bDescIdx;                    /* True if doclists are in reverse order */
+  u8 bIgnoreSavepoint;            /* True to ignore xSavepoint invocations */
+  int nPgsz;                      /* Page size for host database */
+  char *zSegmentsTbl;             /* Name of %_segments table */
+  sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */
+
+  /* 
+  ** The following array of hash tables is used to buffer pending index 
+  ** updates during transactions. All pending updates buffered at any one
+  ** time must share a common language-id (see the FTS4 langid= feature).
+  ** The current language id is stored in variable iPrevLangid.
+  **
+  ** A single FTS4 table may have multiple full-text indexes. For each index
+  ** there is an entry in the aIndex[] array. Index 0 is an index of all the
+  ** terms that appear in the document set. Each subsequent index in aIndex[]
+  ** is an index of prefixes of a specific length.
+  **
+  ** Variable nPendingData contains an estimate the memory consumed by the 
+  ** pending data structures, including hash table overhead, but not including
+  ** malloc overhead.  When nPendingData exceeds nMaxPendingData, all hash
+  ** tables are flushed to disk. Variable iPrevDocid is the docid of the most 
+  ** recently inserted record.
+  */
+  int nIndex;                     /* Size of aIndex[] */
+  struct Fts3Index {
+    int nPrefix;                  /* Prefix length (0 for main terms index) */
+    Fts3Hash hPending;            /* Pending terms table for this index */
+  } *aIndex;
+  int nMaxPendingData;            /* Max pending data before flush to disk */
+  int nPendingData;               /* Current bytes of pending data */
+  sqlite_int64 iPrevDocid;        /* Docid of most recently inserted document */
+  int iPrevLangid;                /* Langid of recently inserted document */
+
+#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
+  /* State variables used for validating that the transaction control
+  ** methods of the virtual table are called at appropriate times.  These
+  ** values do not contribute to FTS functionality; they are used for
+  ** verifying the operation of the SQLite core.
+  */
+  int inTransaction;     /* True after xBegin but before xCommit/xRollback */
+  int mxSavepoint;       /* Largest valid xSavepoint integer */
+#endif
+
+#ifdef SQLITE_TEST
+  /* True to disable the incremental doclist optimization. This is controled
+  ** by special insert command 'test-no-incr-doclist'.  */
+  int bNoIncrDoclist;
+#endif
+};
+
+/*
+** When the core wants to read from the virtual table, it creates a
+** virtual table cursor (an instance of the following structure) using
+** the xOpen method. Cursors are destroyed using the xClose method.
+*/
+struct Fts3Cursor {
+  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
+  i16 eSearch;                    /* Search strategy (see below) */
+  u8 isEof;                       /* True if at End Of Results */
+  u8 isRequireSeek;               /* True if must seek pStmt to %_content row */
+  sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
+  Fts3Expr *pExpr;                /* Parsed MATCH query string */
+  int iLangid;                    /* Language being queried for */
+  int nPhrase;                    /* Number of matchable phrases in query */
+  Fts3DeferredToken *pDeferred;   /* Deferred search tokens, if any */
+  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
+  char *pNextId;                  /* Pointer into the body of aDoclist */
+  char *aDoclist;                 /* List of docids for full-text queries */
+  int nDoclist;                   /* Size of buffer at aDoclist */
+  u8 bDesc;                       /* True to sort in descending order */
+  int eEvalmode;                  /* An FTS3_EVAL_XX constant */
+  int nRowAvg;                    /* Average size of database rows, in pages */
+  sqlite3_int64 nDoc;             /* Documents in table */
+  i64 iMinDocid;                  /* Minimum docid to return */
+  i64 iMaxDocid;                  /* Maximum docid to return */
+  int isMatchinfoNeeded;          /* True when aMatchinfo[] needs filling in */
+  u32 *aMatchinfo;                /* Information about most recent match */
+  int nMatchinfo;                 /* Number of elements in aMatchinfo[] */
+  char *zMatchinfo;               /* Matchinfo specification */
+};
+
+#define FTS3_EVAL_FILTER    0
+#define FTS3_EVAL_NEXT      1
+#define FTS3_EVAL_MATCHINFO 2
+
+/*
+** The Fts3Cursor.eSearch member is always set to one of the following.
+** Actualy, Fts3Cursor.eSearch can be greater than or equal to
+** FTS3_FULLTEXT_SEARCH.  If so, then Fts3Cursor.eSearch - 2 is the index
+** of the column to be searched.  For example, in
+**
+**     CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
+**     SELECT docid FROM ex1 WHERE b MATCH 'one two three';
+** 
+** Because the LHS of the MATCH operator is 2nd column "b",
+** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1.  (+0 for a,
+** +1 for b, +2 for c, +3 for d.)  If the LHS of MATCH were "ex1" 
+** indicating that all columns should be searched,
+** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
+*/
+#define FTS3_FULLSCAN_SEARCH 0    /* Linear scan of %_content table */
+#define FTS3_DOCID_SEARCH    1    /* Lookup by rowid on %_content table */
+#define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */
+
+/*
+** The lower 16-bits of the sqlite3_index_info.idxNum value set by
+** the xBestIndex() method contains the Fts3Cursor.eSearch value described
+** above. The upper 16-bits contain a combination of the following
+** bits, used to describe extra constraints on full-text searches.
+*/
+#define FTS3_HAVE_LANGID    0x00010000      /* languageid=? */
+#define FTS3_HAVE_DOCID_GE  0x00020000      /* docid>=? */
+#define FTS3_HAVE_DOCID_LE  0x00040000      /* docid<=? */
+
+struct Fts3Doclist {
+  char *aAll;                    /* Array containing doclist (or NULL) */
+  int nAll;                      /* Size of a[] in bytes */
+  char *pNextDocid;              /* Pointer to next docid */
+
+  sqlite3_int64 iDocid;          /* Current docid (if pList!=0) */
+  int bFreeList;                 /* True if pList should be sqlite3_free()d */
+  char *pList;                   /* Pointer to position list following iDocid */
+  int nList;                     /* Length of position list */
+};
+
+/*
+** A "phrase" is a sequence of one or more tokens that must match in
+** sequence.  A single token is the base case and the most common case.
+** For a sequence of tokens contained in double-quotes (i.e. "one two three")
+** nToken will be the number of tokens in the string.
+*/
+struct Fts3PhraseToken {
+  char *z;                        /* Text of the token */
+  int n;                          /* Number of bytes in buffer z */
+  int isPrefix;                   /* True if token ends with a "*" character */
+  int bFirst;                     /* True if token must appear at position 0 */
+
+  /* Variables above this point are populated when the expression is
+  ** parsed (by code in fts3_expr.c). Below this point the variables are
+  ** used when evaluating the expression. */
+  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
+  Fts3MultiSegReader *pSegcsr;    /* Segment-reader for this token */
+};
+
+struct Fts3Phrase {
+  /* Cache of doclist for this phrase. */
+  Fts3Doclist doclist;
+  int bIncr;                 /* True if doclist is loaded incrementally */
+  int iDoclistToken;
+
+  /* Variables below this point are populated by fts3_expr.c when parsing 
+  ** a MATCH expression. Everything above is part of the evaluation phase. 
+  */
+  int nToken;                /* Number of tokens in the phrase */
+  int iColumn;               /* Index of column this phrase must match */
+  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
+};
+
+/*
+** A tree of these objects forms the RHS of a MATCH operator.
+**
+** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist 
+** points to a malloced buffer, size nDoclist bytes, containing the results 
+** of this phrase query in FTS3 doclist format. As usual, the initial 
+** "Length" field found in doclists stored on disk is omitted from this 
+** buffer.
+**
+** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
+** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
+** where nCol is the number of columns in the queried FTS table. The array
+** is populated as follows:
+**
+**   aMI[iCol*3 + 0] = Undefined
+**   aMI[iCol*3 + 1] = Number of occurrences
+**   aMI[iCol*3 + 2] = Number of rows containing at least one instance
+**
+** The aMI array is allocated using sqlite3_malloc(). It should be freed 
+** when the expression node is.
+*/
+struct Fts3Expr {
+  int eType;                 /* One of the FTSQUERY_XXX values defined below */
+  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
+  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
+  Fts3Expr *pLeft;           /* Left operand */
+  Fts3Expr *pRight;          /* Right operand */
+  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
+
+  /* The following are used by the fts3_eval.c module. */
+  sqlite3_int64 iDocid;      /* Current docid */
+  u8 bEof;                   /* True this expression is at EOF already */
+  u8 bStart;                 /* True if iDocid is valid */
+  u8 bDeferred;              /* True if this expression is entirely deferred */
+
+  u32 *aMI;
+};
+
+/*
+** Candidate values for Fts3Query.eType. Note that the order of the first
+** four values is in order of precedence when parsing expressions. For 
+** example, the following:
+**
+**   "a OR b AND c NOT d NEAR e"
+**
+** is equivalent to:
+**
+**   "a OR (b AND (c NOT (d NEAR e)))"
+*/
+#define FTSQUERY_NEAR   1
+#define FTSQUERY_NOT    2
+#define FTSQUERY_AND    3
+#define FTSQUERY_OR     4
+#define FTSQUERY_PHRASE 5
+
+
+/* fts3_write.c */
+SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
+SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *);
+SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *);
+SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *);
+SQLITE_PRIVATE int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
+  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
+SQLITE_PRIVATE int sqlite3Fts3SegReaderPending(
+  Fts3Table*,int,const char*,int,int,Fts3SegReader**);
+SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *);
+SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
+SQLITE_PRIVATE int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
+
+SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
+SQLITE_PRIVATE int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
+
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
+SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
+SQLITE_PRIVATE int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
+SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
+SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
+SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
+#else
+# define sqlite3Fts3FreeDeferredTokens(x)
+# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK
+# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK
+# define sqlite3Fts3FreeDeferredDoclists(x)
+# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK
+#endif
+
+SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *);
+SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *, int *);
+
+/* Special values interpreted by sqlite3SegReaderCursor() */
+#define FTS3_SEGCURSOR_PENDING        -1
+#define FTS3_SEGCURSOR_ALL            -2
+
+SQLITE_PRIVATE int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
+SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
+SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
+
+SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(Fts3Table *, 
+    int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
+
+/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
+#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
+#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
+#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
+#define FTS3_SEGMENT_PREFIX        0x00000008
+#define FTS3_SEGMENT_SCAN          0x00000010
+#define FTS3_SEGMENT_FIRST         0x00000020
+
+/* Type passed as 4th argument to SegmentReaderIterate() */
+struct Fts3SegFilter {
+  const char *zTerm;
+  int nTerm;
+  int iCol;
+  int flags;
+};
+
+struct Fts3MultiSegReader {
+  /* Used internally by sqlite3Fts3SegReaderXXX() calls */
+  Fts3SegReader **apSegment;      /* Array of Fts3SegReader objects */
+  int nSegment;                   /* Size of apSegment array */
+  int nAdvance;                   /* How many seg-readers to advance */
+  Fts3SegFilter *pFilter;         /* Pointer to filter object */
+  char *aBuffer;                  /* Buffer to merge doclists in */
+  int nBuffer;                    /* Allocated size of aBuffer[] in bytes */
+
+  int iColFilter;                 /* If >=0, filter for this column */
+  int bRestart;
+
+  /* Used by fts3.c only. */
+  int nCost;                      /* Cost of running iterator */
+  int bLookup;                    /* True if a lookup of a single entry. */
+
+  /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
+  char *zTerm;                    /* Pointer to term buffer */
+  int nTerm;                      /* Size of zTerm in bytes */
+  char *aDoclist;                 /* Pointer to doclist buffer */
+  int nDoclist;                   /* Size of aDoclist[] in bytes */
+};
+
+SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
+
+#define fts3GetVarint32(p, piVal) (                                           \
+  (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \
+)
+
+/* fts3.c */
+SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *, sqlite3_int64);
+SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
+SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *, int *);
+SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64);
+SQLITE_PRIVATE void sqlite3Fts3Dequote(char *);
+SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
+SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
+SQLITE_PRIVATE int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
+SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
+
+/* fts3_tokenizer.c */
+SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *, int *);
+SQLITE_PRIVATE int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
+SQLITE_PRIVATE int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, 
+    sqlite3_tokenizer **, char **
+);
+SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char);
+
+/* fts3_snippet.c */
+SQLITE_PRIVATE void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
+SQLITE_PRIVATE void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
+  const char *, const char *, int, int
+);
+SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
+
+/* fts3_expr.c */
+SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
+  char **, int, int, int, const char *, int, Fts3Expr **, char **
+);
+SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *);
+#ifdef SQLITE_TEST
+SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
+SQLITE_PRIVATE int sqlite3Fts3InitTerm(sqlite3 *db);
+#endif
+
+SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
+  sqlite3_tokenizer_cursor **
+);
+
+/* fts3_aux.c */
+SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db);
+
+SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
+
+SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart(
+    Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
+SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext(
+    Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
+SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); 
+SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
+SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
+
+/* fts3_tokenize_vtab.c */
+SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
+
+/* fts3_unicode2.c (functions generated by parsing unicode text files) */
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int, int);
+SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int);
+SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int);
+#endif
+
+#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
+#endif /* _FTSINT_H */
+
+/************** End of fts3Int.h *********************************************/
+/************** Continuing where we left off in fts3.c ***********************/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE)
+# define SQLITE_CORE 1
+#endif
+
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <stddef.h> */
+/* #include <stdio.h> */
+/* #include <string.h> */
+/* #include <stdarg.h> */
+
+#ifndef SQLITE_CORE 
+  SQLITE_EXTENSION_INIT1
+#endif
+
+static int fts3EvalNext(Fts3Cursor *pCsr);
+static int fts3EvalStart(Fts3Cursor *pCsr);
+static int fts3TermSegReaderCursor(
+    Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
+
+/* 
+** Write a 64-bit variable-length integer to memory starting at p[0].
+** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
+** The number of bytes written is returned.
+*/
+SQLITE_PRIVATE int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
+  unsigned char *q = (unsigned char *) p;
+  sqlite_uint64 vu = v;
+  do{
+    *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
+    vu >>= 7;
+  }while( vu!=0 );
+  q[-1] &= 0x7f;  /* turn off high bit in final byte */
+  assert( q - (unsigned char *)p <= FTS3_VARINT_MAX );
+  return (int) (q - (unsigned char *)p);
+}
+
+#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \
+  v = (v & mask1) | ( (*ptr++) << shift );                    \
+  if( (v & mask2)==0 ){ var = v; return ret; }
+#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \
+  v = (*ptr++);                                               \
+  if( (v & mask2)==0 ){ var = v; return ret; }
+
+/* 
+** Read a 64-bit variable-length integer from memory starting at p[0].
+** Return the number of bytes read, or 0 on error.
+** The value is stored in *v.
+*/
+SQLITE_PRIVATE int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
+  const char *pStart = p;
+  u32 a;
+  u64 b;
+  int shift;
+
+  GETVARINT_INIT(a, p, 0,  0x00,     0x80, *v, 1);
+  GETVARINT_STEP(a, p, 7,  0x7F,     0x4000, *v, 2);
+  GETVARINT_STEP(a, p, 14, 0x3FFF,   0x200000, *v, 3);
+  GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4);
+  b = (a & 0x0FFFFFFF );
+
+  for(shift=28; shift<=63; shift+=7){
+    u64 c = *p++;
+    b += (c&0x7F) << shift;
+    if( (c & 0x80)==0 ) break;
+  }
+  *v = b;
+  return (int)(p - pStart);
+}
+
+/*
+** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a
+** 32-bit integer before it is returned.
+*/
+SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *p, int *pi){
+  u32 a;
+
+#ifndef fts3GetVarint32
+  GETVARINT_INIT(a, p, 0,  0x00,     0x80, *pi, 1);
+#else
+  a = (*p++);
+  assert( a & 0x80 );
+#endif
+
+  GETVARINT_STEP(a, p, 7,  0x7F,     0x4000, *pi, 2);
+  GETVARINT_STEP(a, p, 14, 0x3FFF,   0x200000, *pi, 3);
+  GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4);
+  a = (a & 0x0FFFFFFF );
+  *pi = (int)(a | ((u32)(*p & 0x0F) << 28));
+  return 5;
+}
+
+/*
+** Return the number of bytes required to encode v as a varint
+*/
+SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64 v){
+  int i = 0;
+  do{
+    i++;
+    v >>= 7;
+  }while( v!=0 );
+  return i;
+}
+
+/*
+** Convert an SQL-style quoted string into a normal string by removing
+** the quote characters.  The conversion is done in-place.  If the
+** input does not begin with a quote character, then this routine
+** is a no-op.
+**
+** Examples:
+**
+**     "abc"   becomes   abc
+**     'xyz'   becomes   xyz
+**     [pqr]   becomes   pqr
+**     `mno`   becomes   mno
+**
+*/
+SQLITE_PRIVATE void sqlite3Fts3Dequote(char *z){
+  char quote;                     /* Quote character (if any ) */
+
+  quote = z[0];
+  if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
+    int iIn = 1;                  /* Index of next byte to read from input */
+    int iOut = 0;                 /* Index of next byte to write to output */
+
+    /* If the first byte was a '[', then the close-quote character is a ']' */
+    if( quote=='[' ) quote = ']';  
+
+    while( ALWAYS(z[iIn]) ){
+      if( z[iIn]==quote ){
+        if( z[iIn+1]!=quote ) break;
+        z[iOut++] = quote;
+        iIn += 2;
+      }else{
+        z[iOut++] = z[iIn++];
+      }
+    }
+    z[iOut] = '\0';
+  }
+}
+
+/*
+** Read a single varint from the doclist at *pp and advance *pp to point
+** to the first byte past the end of the varint.  Add the value of the varint
+** to *pVal.
+*/
+static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
+  sqlite3_int64 iVal;
+  *pp += sqlite3Fts3GetVarint(*pp, &iVal);
+  *pVal += iVal;
+}
+
+/*
+** When this function is called, *pp points to the first byte following a
+** varint that is part of a doclist (or position-list, or any other list
+** of varints). This function moves *pp to point to the start of that varint,
+** and sets *pVal by the varint value.
+**
+** Argument pStart points to the first byte of the doclist that the
+** varint is part of.
+*/
+static void fts3GetReverseVarint(
+  char **pp, 
+  char *pStart, 
+  sqlite3_int64 *pVal
+){
+  sqlite3_int64 iVal;
+  char *p;
+
+  /* Pointer p now points at the first byte past the varint we are 
+  ** interested in. So, unless the doclist is corrupt, the 0x80 bit is
+  ** clear on character p[-1]. */
+  for(p = (*pp)-2; p>=pStart && *p&0x80; p--);
+  p++;
+  *pp = p;
+
+  sqlite3Fts3GetVarint(p, &iVal);
+  *pVal = iVal;
+}
+
+/*
+** The xDisconnect() virtual table method.
+*/
+static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
+  Fts3Table *p = (Fts3Table *)pVtab;
+  int i;
+
+  assert( p->nPendingData==0 );
+  assert( p->pSegments==0 );
+
+  /* Free any prepared statements held */
+  for(i=0; i<SizeofArray(p->aStmt); i++){
+    sqlite3_finalize(p->aStmt[i]);
+  }
+  sqlite3_free(p->zSegmentsTbl);
+  sqlite3_free(p->zReadExprlist);
+  sqlite3_free(p->zWriteExprlist);
+  sqlite3_free(p->zContentTbl);
+  sqlite3_free(p->zLanguageid);
+
+  /* Invoke the tokenizer destructor to free the tokenizer. */
+  p->pTokenizer->pModule->xDestroy(p->pTokenizer);
+
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+/*
+** Construct one or more SQL statements from the format string given
+** and then evaluate those statements. The success code is written
+** into *pRc.
+**
+** If *pRc is initially non-zero then this routine is a no-op.
+*/
+static void fts3DbExec(
+  int *pRc,              /* Success code */
+  sqlite3 *db,           /* Database in which to run SQL */
+  const char *zFormat,   /* Format string for SQL */
+  ...                    /* Arguments to the format string */
+){
+  va_list ap;
+  char *zSql;
+  if( *pRc ) return;
+  va_start(ap, zFormat);
+  zSql = sqlite3_vmprintf(zFormat, ap);
+  va_end(ap);
+  if( zSql==0 ){
+    *pRc = SQLITE_NOMEM;
+  }else{
+    *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
+    sqlite3_free(zSql);
+  }
+}
+
+/*
+** The xDestroy() virtual table method.
+*/
+static int fts3DestroyMethod(sqlite3_vtab *pVtab){
+  Fts3Table *p = (Fts3Table *)pVtab;
+  int rc = SQLITE_OK;              /* Return code */
+  const char *zDb = p->zDb;        /* Name of database (e.g. "main", "temp") */
+  sqlite3 *db = p->db;             /* Database handle */
+
+  /* Drop the shadow tables */
+  if( p->zContentTbl==0 ){
+    fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName);
+  }
+  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName);
+  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName);
+  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName);
+  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName);
+
+  /* If everything has worked, invoke fts3DisconnectMethod() to free the
+  ** memory associated with the Fts3Table structure and return SQLITE_OK.
+  ** Otherwise, return an SQLite error code.
+  */
+  return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc);
+}
+
+
+/*
+** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
+** passed as the first argument. This is done as part of the xConnect()
+** and xCreate() methods.
+**
+** If *pRc is non-zero when this function is called, it is a no-op. 
+** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
+** before returning.
+*/
+static void fts3DeclareVtab(int *pRc, Fts3Table *p){
+  if( *pRc==SQLITE_OK ){
+    int i;                        /* Iterator variable */
+    int rc;                       /* Return code */
+    char *zSql;                   /* SQL statement passed to declare_vtab() */
+    char *zCols;                  /* List of user defined columns */
+    const char *zLanguageid;
+
+    zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
+    sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
+
+    /* Create a list of user columns for the virtual table */
+    zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
+    for(i=1; zCols && i<p->nColumn; i++){
+      zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
+    }
+
+    /* Create the whole "CREATE TABLE" statement to pass to SQLite */
+    zSql = sqlite3_mprintf(
+        "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", 
+        zCols, p->zName, zLanguageid
+    );
+    if( !zCols || !zSql ){
+      rc = SQLITE_NOMEM;
+    }else{
+      rc = sqlite3_declare_vtab(p->db, zSql);
+    }
+
+    sqlite3_free(zSql);
+    sqlite3_free(zCols);
+    *pRc = rc;
+  }
+}
+
+/*
+** Create the %_stat table if it does not already exist.
+*/
+SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){
+  fts3DbExec(pRc, p->db, 
+      "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'"
+          "(id INTEGER PRIMARY KEY, value BLOB);",
+      p->zDb, p->zName
+  );
+  if( (*pRc)==SQLITE_OK ) p->bHasStat = 1;
+}
+
+/*
+** Create the backing store tables (%_content, %_segments and %_segdir)
+** required by the FTS3 table passed as the only argument. This is done
+** as part of the vtab xCreate() method.
+**
+** If the p->bHasDocsize boolean is true (indicating that this is an
+** FTS4 table, not an FTS3 table) then also create the %_docsize and
+** %_stat tables required by FTS4.
+*/
+static int fts3CreateTables(Fts3Table *p){
+  int rc = SQLITE_OK;             /* Return code */
+  int i;                          /* Iterator variable */
+  sqlite3 *db = p->db;            /* The database connection */
+
+  if( p->zContentTbl==0 ){
+    const char *zLanguageid = p->zLanguageid;
+    char *zContentCols;           /* Columns of %_content table */
+
+    /* Create a list of user columns for the content table */
+    zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
+    for(i=0; zContentCols && i<p->nColumn; i++){
+      char *z = p->azColumn[i];
+      zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
+    }
+    if( zLanguageid && zContentCols ){
+      zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
+    }
+    if( zContentCols==0 ) rc = SQLITE_NOMEM;
+  
+    /* Create the content table */
+    fts3DbExec(&rc, db, 
+       "CREATE TABLE %Q.'%q_content'(%s)",
+       p->zDb, p->zName, zContentCols
+    );
+    sqlite3_free(zContentCols);
+  }
+
+  /* Create other tables */
+  fts3DbExec(&rc, db, 
+      "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
+      p->zDb, p->zName
+  );
+  fts3DbExec(&rc, db, 
+      "CREATE TABLE %Q.'%q_segdir'("
+        "level INTEGER,"
+        "idx INTEGER,"
+        "start_block INTEGER,"
+        "leaves_end_block INTEGER,"
+        "end_block INTEGER,"
+        "root BLOB,"
+        "PRIMARY KEY(level, idx)"
+      ");",
+      p->zDb, p->zName
+  );
+  if( p->bHasDocsize ){
+    fts3DbExec(&rc, db, 
+        "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
+        p->zDb, p->zName
+    );
+  }
+  assert( p->bHasStat==p->bFts4 );
+  if( p->bHasStat ){
+    sqlite3Fts3CreateStatTable(&rc, p);
+  }
+  return rc;
+}
+
+/*
+** Store the current database page-size in bytes in p->nPgsz.
+**
+** If *pRc is non-zero when this function is called, it is a no-op. 
+** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
+** before returning.
+*/
+static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
+  if( *pRc==SQLITE_OK ){
+    int rc;                       /* Return code */
+    char *zSql;                   /* SQL text "PRAGMA %Q.page_size" */
+    sqlite3_stmt *pStmt;          /* Compiled "PRAGMA %Q.page_size" statement */
+  
+    zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb);
+    if( !zSql ){
+      rc = SQLITE_NOMEM;
+    }else{
+      rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
+      if( rc==SQLITE_OK ){
+        sqlite3_step(pStmt);
+        p->nPgsz = sqlite3_column_int(pStmt, 0);
+        rc = sqlite3_finalize(pStmt);
+      }else if( rc==SQLITE_AUTH ){
+        p->nPgsz = 1024;
+        rc = SQLITE_OK;
+      }
+    }
+    assert( p->nPgsz>0 || rc!=SQLITE_OK );
+    sqlite3_free(zSql);
+    *pRc = rc;
+  }
+}
+
+/*
+** "Special" FTS4 arguments are column specifications of the following form:
+**
+**   <key> = <value>
+**
+** There may not be whitespace surrounding the "=" character. The <value> 
+** term may be quoted, but the <key> may not.
+*/
+static int fts3IsSpecialColumn(
+  const char *z, 
+  int *pnKey,
+  char **pzValue
+){
+  char *zValue;
+  const char *zCsr = z;
+
+  while( *zCsr!='=' ){
+    if( *zCsr=='\0' ) return 0;
+    zCsr++;
+  }
+
+  *pnKey = (int)(zCsr-z);
+  zValue = sqlite3_mprintf("%s", &zCsr[1]);
+  if( zValue ){
+    sqlite3Fts3Dequote(zValue);
+  }
+  *pzValue = zValue;
+  return 1;
+}
+
+/*
+** Append the output of a printf() style formatting to an existing string.
+*/
+static void fts3Appendf(
+  int *pRc,                       /* IN/OUT: Error code */
+  char **pz,                      /* IN/OUT: Pointer to string buffer */
+  const char *zFormat,            /* Printf format string to append */
+  ...                             /* Arguments for printf format string */
+){
+  if( *pRc==SQLITE_OK ){
+    va_list ap;
+    char *z;
+    va_start(ap, zFormat);
+    z = sqlite3_vmprintf(zFormat, ap);
+    va_end(ap);
+    if( z && *pz ){
+      char *z2 = sqlite3_mprintf("%s%s", *pz, z);
+      sqlite3_free(z);
+      z = z2;
+    }
+    if( z==0 ) *pRc = SQLITE_NOMEM;
+    sqlite3_free(*pz);
+    *pz = z;
+  }
+}
+
+/*
+** Return a copy of input string zInput enclosed in double-quotes (") and
+** with all double quote characters escaped. For example:
+**
+**     fts3QuoteId("un \"zip\"")   ->    "un \"\"zip\"\""
+**
+** The pointer returned points to memory obtained from sqlite3_malloc(). It
+** is the callers responsibility to call sqlite3_free() to release this
+** memory.
+*/
+static char *fts3QuoteId(char const *zInput){
+  int nRet;
+  char *zRet;
+  nRet = 2 + (int)strlen(zInput)*2 + 1;
+  zRet = sqlite3_malloc(nRet);
+  if( zRet ){
+    int i;
+    char *z = zRet;
+    *(z++) = '"';
+    for(i=0; zInput[i]; i++){
+      if( zInput[i]=='"' ) *(z++) = '"';
+      *(z++) = zInput[i];
+    }
+    *(z++) = '"';
+    *(z++) = '\0';
+  }
+  return zRet;
+}
+
+/*
+** Return a list of comma separated SQL expressions and a FROM clause that 
+** could be used in a SELECT statement such as the following:
+**
+**     SELECT <list of expressions> FROM %_content AS x ...
+**
+** to return the docid, followed by each column of text data in order
+** from left to write. If parameter zFunc is not NULL, then instead of
+** being returned directly each column of text data is passed to an SQL
+** function named zFunc first. For example, if zFunc is "unzip" and the
+** table has the three user-defined columns "a", "b", and "c", the following
+** string is returned:
+**
+**     "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
+**
+** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
+** is the responsibility of the caller to eventually free it.
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
+** a NULL pointer is returned). Otherwise, if an OOM error is encountered
+** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
+** no error occurs, *pRc is left unmodified.
+*/
+static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
+  char *zRet = 0;
+  char *zFree = 0;
+  char *zFunction;
+  int i;
+
+  if( p->zContentTbl==0 ){
+    if( !zFunc ){
+      zFunction = "";
+    }else{
+      zFree = zFunction = fts3QuoteId(zFunc);
+    }
+    fts3Appendf(pRc, &zRet, "docid");
+    for(i=0; i<p->nColumn; i++){
+      fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
+    }
+    if( p->zLanguageid ){
+      fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
+    }
+    sqlite3_free(zFree);
+  }else{
+    fts3Appendf(pRc, &zRet, "rowid");
+    for(i=0; i<p->nColumn; i++){
+      fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
+    }
+    if( p->zLanguageid ){
+      fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
+    }
+  }
+  fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", 
+      p->zDb,
+      (p->zContentTbl ? p->zContentTbl : p->zName),
+      (p->zContentTbl ? "" : "_content")
+  );
+  return zRet;
+}
+
+/*
+** Return a list of N comma separated question marks, where N is the number
+** of columns in the %_content table (one for the docid plus one for each
+** user-defined text column).
+**
+** If argument zFunc is not NULL, then all but the first question mark
+** is preceded by zFunc and an open bracket, and followed by a closed
+** bracket. For example, if zFunc is "zip" and the FTS3 table has three 
+** user-defined text columns, the following string is returned:
+**
+**     "?, zip(?), zip(?), zip(?)"
+**
+** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
+** is the responsibility of the caller to eventually free it.
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
+** a NULL pointer is returned). Otherwise, if an OOM error is encountered
+** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
+** no error occurs, *pRc is left unmodified.
+*/
+static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
+  char *zRet = 0;
+  char *zFree = 0;
+  char *zFunction;
+  int i;
+
+  if( !zFunc ){
+    zFunction = "";
+  }else{
+    zFree = zFunction = fts3QuoteId(zFunc);
+  }
+  fts3Appendf(pRc, &zRet, "?");
+  for(i=0; i<p->nColumn; i++){
+    fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
+  }
+  if( p->zLanguageid ){
+    fts3Appendf(pRc, &zRet, ", ?");
+  }
+  sqlite3_free(zFree);
+  return zRet;
+}
+
+/*
+** This function interprets the string at (*pp) as a non-negative integer
+** value. It reads the integer and sets *pnOut to the value read, then 
+** sets *pp to point to the byte immediately following the last byte of
+** the integer value.
+**
+** Only decimal digits ('0'..'9') may be part of an integer value. 
+**
+** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
+** the output value undefined. Otherwise SQLITE_OK is returned.
+**
+** This function is used when parsing the "prefix=" FTS4 parameter.
+*/
+static int fts3GobbleInt(const char **pp, int *pnOut){
+  const char *p;                  /* Iterator pointer */
+  int nInt = 0;                   /* Output value */
+
+  for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
+    nInt = nInt * 10 + (p[0] - '0');
+  }
+  if( p==*pp ) return SQLITE_ERROR;
+  *pnOut = nInt;
+  *pp = p;
+  return SQLITE_OK;
+}
+
+/*
+** This function is called to allocate an array of Fts3Index structures
+** representing the indexes maintained by the current FTS table. FTS tables
+** always maintain the main "terms" index, but may also maintain one or
+** more "prefix" indexes, depending on the value of the "prefix=" parameter
+** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
+**
+** Argument zParam is passed the value of the "prefix=" option if one was
+** specified, or NULL otherwise.
+**
+** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
+** the allocated array. *pnIndex is set to the number of elements in the
+** array. If an error does occur, an SQLite error code is returned.
+**
+** Regardless of whether or not an error is returned, it is the responsibility
+** of the caller to call sqlite3_free() on the output array to free it.
+*/
+static int fts3PrefixParameter(
+  const char *zParam,             /* ABC in prefix=ABC parameter to parse */
+  int *pnIndex,                   /* OUT: size of *apIndex[] array */
+  struct Fts3Index **apIndex      /* OUT: Array of indexes for this table */
+){
+  struct Fts3Index *aIndex;       /* Allocated array */
+  int nIndex = 1;                 /* Number of entries in array */
+
+  if( zParam && zParam[0] ){
+    const char *p;
+    nIndex++;
+    for(p=zParam; *p; p++){
+      if( *p==',' ) nIndex++;
+    }
+  }
+
+  aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
+  *apIndex = aIndex;
+  *pnIndex = nIndex;
+  if( !aIndex ){
+    return SQLITE_NOMEM;
+  }
+
+  memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex);
+  if( zParam ){
+    const char *p = zParam;
+    int i;
+    for(i=1; i<nIndex; i++){
+      int nPrefix;
+      if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR;
+      aIndex[i].nPrefix = nPrefix;
+      p++;
+    }
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** This function is called when initializing an FTS4 table that uses the
+** content=xxx option. It determines the number of and names of the columns
+** of the new FTS4 table.
+**
+** The third argument passed to this function is the value passed to the
+** config=xxx option (i.e. "xxx"). This function queries the database for
+** a table of that name. If found, the output variables are populated
+** as follows:
+**
+**   *pnCol:   Set to the number of columns table xxx has,
+**
+**   *pnStr:   Set to the total amount of space required to store a copy
+**             of each columns name, including the nul-terminator.
+**
+**   *pazCol:  Set to point to an array of *pnCol strings. Each string is
+**             the name of the corresponding column in table xxx. The array
+**             and its contents are allocated using a single allocation. It
+**             is the responsibility of the caller to free this allocation
+**             by eventually passing the *pazCol value to sqlite3_free().
+**
+** If the table cannot be found, an error code is returned and the output
+** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is
+** returned (and the output variables are undefined).
+*/
+static int fts3ContentColumns(
+  sqlite3 *db,                    /* Database handle */
+  const char *zDb,                /* Name of db (i.e. "main", "temp" etc.) */
+  const char *zTbl,               /* Name of content table */
+  const char ***pazCol,           /* OUT: Malloc'd array of column names */
+  int *pnCol,                     /* OUT: Size of array *pazCol */
+  int *pnStr                      /* OUT: Bytes of string content */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  char *zSql;                     /* "SELECT *" statement on zTbl */  
+  sqlite3_stmt *pStmt = 0;        /* Compiled version of zSql */
+
+  zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl);
+  if( !zSql ){
+    rc = SQLITE_NOMEM;
+  }else{
+    rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+  }
+  sqlite3_free(zSql);
+
+  if( rc==SQLITE_OK ){
+    const char **azCol;           /* Output array */
+    int nStr = 0;                 /* Size of all column names (incl. 0x00) */
+    int nCol;                     /* Number of table columns */
+    int i;                        /* Used to iterate through columns */
+
+    /* Loop through the returned columns. Set nStr to the number of bytes of
+    ** space required to store a copy of each column name, including the
+    ** nul-terminator byte.  */
+    nCol = sqlite3_column_count(pStmt);
+    for(i=0; i<nCol; i++){
+      const char *zCol = sqlite3_column_name(pStmt, i);
+      nStr += (int)strlen(zCol) + 1;
+    }
+
+    /* Allocate and populate the array to return. */
+    azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr);
+    if( azCol==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      char *p = (char *)&azCol[nCol];
+      for(i=0; i<nCol; i++){
+        const char *zCol = sqlite3_column_name(pStmt, i);
+        int n = (int)strlen(zCol)+1;
+        memcpy(p, zCol, n);
+        azCol[i] = p;
+        p += n;
+      }
+    }
+    sqlite3_finalize(pStmt);
+
+    /* Set the output variables. */
+    *pnCol = nCol;
+    *pnStr = nStr;
+    *pazCol = azCol;
+  }
+
+  return rc;
+}
+
+/*
+** This function is the implementation of both the xConnect and xCreate
+** methods of the FTS3 virtual table.
+**
+** The argv[] array contains the following:
+**
+**   argv[0]   -> module name  ("fts3" or "fts4")
+**   argv[1]   -> database name
+**   argv[2]   -> table name
+**   argv[...] -> "column name" and other module argument fields.
+*/
+static int fts3InitVtab(
+  int isCreate,                   /* True for xCreate, false for xConnect */
+  sqlite3 *db,                    /* The SQLite database connection */
+  void *pAux,                     /* Hash table containing tokenizers */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */
+  char **pzErr                    /* Write any error message here */
+){
+  Fts3Hash *pHash = (Fts3Hash *)pAux;
+  Fts3Table *p = 0;               /* Pointer to allocated vtab */
+  int rc = SQLITE_OK;             /* Return code */
+  int i;                          /* Iterator variable */
+  int nByte;                      /* Size of allocation used for *p */
+  int iCol;                       /* Column index */
+  int nString = 0;                /* Bytes required to hold all column names */
+  int nCol = 0;                   /* Number of columns in the FTS table */
+  char *zCsr;                     /* Space for holding column names */
+  int nDb;                        /* Bytes required to hold database name */
+  int nName;                      /* Bytes required to hold table name */
+  int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
+  const char **aCol;              /* Array of column names */
+  sqlite3_tokenizer *pTokenizer = 0;        /* Tokenizer for this table */
+
+  int nIndex;                     /* Size of aIndex[] array */
+  struct Fts3Index *aIndex = 0;   /* Array of indexes for this table */
+
+  /* The results of parsing supported FTS4 key=value options: */
+  int bNoDocsize = 0;             /* True to omit %_docsize table */
+  int bDescIdx = 0;               /* True to store descending indexes */
+  char *zPrefix = 0;              /* Prefix parameter value (or NULL) */
+  char *zCompress = 0;            /* compress=? parameter (or NULL) */
+  char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */
+  char *zContent = 0;             /* content=? parameter (or NULL) */
+  char *zLanguageid = 0;          /* languageid=? parameter (or NULL) */
+  char **azNotindexed = 0;        /* The set of notindexed= columns */
+  int nNotindexed = 0;            /* Size of azNotindexed[] array */
+
+  assert( strlen(argv[0])==4 );
+  assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
+       || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
+  );
+
+  nDb = (int)strlen(argv[1]) + 1;
+  nName = (int)strlen(argv[2]) + 1;
+
+  nByte = sizeof(const char *) * (argc-2);
+  aCol = (const char **)sqlite3_malloc(nByte);
+  if( aCol ){
+    memset((void*)aCol, 0, nByte);
+    azNotindexed = (char **)sqlite3_malloc(nByte);
+  }
+  if( azNotindexed ){
+    memset(azNotindexed, 0, nByte);
+  }
+  if( !aCol || !azNotindexed ){
+    rc = SQLITE_NOMEM;
+    goto fts3_init_out;
+  }
+
+  /* Loop through all of the arguments passed by the user to the FTS3/4
+  ** module (i.e. all the column names and special arguments). This loop
+  ** does the following:
+  **
+  **   + Figures out the number of columns the FTSX table will have, and
+  **     the number of bytes of space that must be allocated to store copies
+  **     of the column names.
+  **
+  **   + If there is a tokenizer specification included in the arguments,
+  **     initializes the tokenizer pTokenizer.
+  */
+  for(i=3; rc==SQLITE_OK && i<argc; i++){
+    char const *z = argv[i];
+    int nKey;
+    char *zVal;
+
+    /* Check if this is a tokenizer specification */
+    if( !pTokenizer 
+     && strlen(z)>8
+     && 0==sqlite3_strnicmp(z, "tokenize", 8) 
+     && 0==sqlite3Fts3IsIdChar(z[8])
+    ){
+      rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr);
+    }
+
+    /* Check if it is an FTS4 special argument. */
+    else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){
+      struct Fts4Option {
+        const char *zOpt;
+        int nOpt;
+      } aFts4Opt[] = {
+        { "matchinfo",   9 },     /* 0 -> MATCHINFO */
+        { "prefix",      6 },     /* 1 -> PREFIX */
+        { "compress",    8 },     /* 2 -> COMPRESS */
+        { "uncompress", 10 },     /* 3 -> UNCOMPRESS */
+        { "order",       5 },     /* 4 -> ORDER */
+        { "content",     7 },     /* 5 -> CONTENT */
+        { "languageid", 10 },     /* 6 -> LANGUAGEID */
+        { "notindexed", 10 }      /* 7 -> NOTINDEXED */
+      };
+
+      int iOpt;
+      if( !zVal ){
+        rc = SQLITE_NOMEM;
+      }else{
+        for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){
+          struct Fts4Option *pOp = &aFts4Opt[iOpt];
+          if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){
+            break;
+          }
+        }
+        if( iOpt==SizeofArray(aFts4Opt) ){
+          *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z);
+          rc = SQLITE_ERROR;
+        }else{
+          switch( iOpt ){
+            case 0:               /* MATCHINFO */
+              if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){
+                *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal);
+                rc = SQLITE_ERROR;
+              }
+              bNoDocsize = 1;
+              break;
+
+            case 1:               /* PREFIX */
+              sqlite3_free(zPrefix);
+              zPrefix = zVal;
+              zVal = 0;
+              break;
+
+            case 2:               /* COMPRESS */
+              sqlite3_free(zCompress);
+              zCompress = zVal;
+              zVal = 0;
+              break;
+
+            case 3:               /* UNCOMPRESS */
+              sqlite3_free(zUncompress);
+              zUncompress = zVal;
+              zVal = 0;
+              break;
+
+            case 4:               /* ORDER */
+              if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) 
+               && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) 
+              ){
+                *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
+                rc = SQLITE_ERROR;
+              }
+              bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
+              break;
+
+            case 5:              /* CONTENT */
+              sqlite3_free(zContent);
+              zContent = zVal;
+              zVal = 0;
+              break;
+
+            case 6:              /* LANGUAGEID */
+              assert( iOpt==6 );
+              sqlite3_free(zLanguageid);
+              zLanguageid = zVal;
+              zVal = 0;
+              break;
+
+            case 7:              /* NOTINDEXED */
+              azNotindexed[nNotindexed++] = zVal;
+              zVal = 0;
+              break;
+          }
+        }
+        sqlite3_free(zVal);
+      }
+    }
+
+    /* Otherwise, the argument is a column name. */
+    else {
+      nString += (int)(strlen(z) + 1);
+      aCol[nCol++] = z;
+    }
+  }
+
+  /* If a content=xxx option was specified, the following:
+  **
+  **   1. Ignore any compress= and uncompress= options.
+  **
+  **   2. If no column names were specified as part of the CREATE VIRTUAL
+  **      TABLE statement, use all columns from the content table.
+  */
+  if( rc==SQLITE_OK && zContent ){
+    sqlite3_free(zCompress); 
+    sqlite3_free(zUncompress); 
+    zCompress = 0;
+    zUncompress = 0;
+    if( nCol==0 ){
+      sqlite3_free((void*)aCol); 
+      aCol = 0;
+      rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
+
+      /* If a languageid= option was specified, remove the language id
+      ** column from the aCol[] array. */ 
+      if( rc==SQLITE_OK && zLanguageid ){
+        int j;
+        for(j=0; j<nCol; j++){
+          if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
+            int k;
+            for(k=j; k<nCol; k++) aCol[k] = aCol[k+1];
+            nCol--;
+            break;
+          }
+        }
+      }
+    }
+  }
+  if( rc!=SQLITE_OK ) goto fts3_init_out;
+
+  if( nCol==0 ){
+    assert( nString==0 );
+    aCol[0] = "content";
+    nString = 8;
+    nCol = 1;
+  }
+
+  if( pTokenizer==0 ){
+    rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
+    if( rc!=SQLITE_OK ) goto fts3_init_out;
+  }
+  assert( pTokenizer );
+
+  rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
+  if( rc==SQLITE_ERROR ){
+    assert( zPrefix );
+    *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
+  }
+  if( rc!=SQLITE_OK ) goto fts3_init_out;
+
+  /* Allocate and populate the Fts3Table structure. */
+  nByte = sizeof(Fts3Table) +                  /* Fts3Table */
+          nCol * sizeof(char *) +              /* azColumn */
+          nIndex * sizeof(struct Fts3Index) +  /* aIndex */
+          nCol * sizeof(u8) +                  /* abNotindexed */
+          nName +                              /* zName */
+          nDb +                                /* zDb */
+          nString;                             /* Space for azColumn strings */
+  p = (Fts3Table*)sqlite3_malloc(nByte);
+  if( p==0 ){
+    rc = SQLITE_NOMEM;
+    goto fts3_init_out;
+  }
+  memset(p, 0, nByte);
+  p->db = db;
+  p->nColumn = nCol;
+  p->nPendingData = 0;
+  p->azColumn = (char **)&p[1];
+  p->pTokenizer = pTokenizer;
+  p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
+  p->bHasDocsize = (isFts4 && bNoDocsize==0);
+  p->bHasStat = isFts4;
+  p->bFts4 = isFts4;
+  p->bDescIdx = bDescIdx;
+  p->nAutoincrmerge = 0xff;   /* 0xff means setting unknown */
+  p->zContentTbl = zContent;
+  p->zLanguageid = zLanguageid;
+  zContent = 0;
+  zLanguageid = 0;
+  TESTONLY( p->inTransaction = -1 );
+  TESTONLY( p->mxSavepoint = -1 );
+
+  p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
+  memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
+  p->nIndex = nIndex;
+  for(i=0; i<nIndex; i++){
+    fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1);
+  }
+  p->abNotindexed = (u8 *)&p->aIndex[nIndex];
+
+  /* Fill in the zName and zDb fields of the vtab structure. */
+  zCsr = (char *)&p->abNotindexed[nCol];
+  p->zName = zCsr;
+  memcpy(zCsr, argv[2], nName);
+  zCsr += nName;
+  p->zDb = zCsr;
+  memcpy(zCsr, argv[1], nDb);
+  zCsr += nDb;
+
+  /* Fill in the azColumn array */
+  for(iCol=0; iCol<nCol; iCol++){
+    char *z; 
+    int n = 0;
+    z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n);
+    memcpy(zCsr, z, n);
+    zCsr[n] = '\0';
+    sqlite3Fts3Dequote(zCsr);
+    p->azColumn[iCol] = zCsr;
+    zCsr += n+1;
+    assert( zCsr <= &((char *)p)[nByte] );
+  }
+
+  /* Fill in the abNotindexed array */
+  for(iCol=0; iCol<nCol; iCol++){
+    int n = (int)strlen(p->azColumn[iCol]);
+    for(i=0; i<nNotindexed; i++){
+      char *zNot = azNotindexed[i];
+      if( zNot && n==(int)strlen(zNot)
+       && 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n) 
+      ){
+        p->abNotindexed[iCol] = 1;
+        sqlite3_free(zNot);
+        azNotindexed[i] = 0;
+      }
+    }
+  }
+  for(i=0; i<nNotindexed; i++){
+    if( azNotindexed[i] ){
+      *pzErr = sqlite3_mprintf("no such column: %s", azNotindexed[i]);
+      rc = SQLITE_ERROR;
+    }
+  }
+
+  if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){
+    char const *zMiss = (zCompress==0 ? "compress" : "uncompress");
+    rc = SQLITE_ERROR;
+    *pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss);
+  }
+  p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc);
+  p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc);
+  if( rc!=SQLITE_OK ) goto fts3_init_out;
+
+  /* If this is an xCreate call, create the underlying tables in the 
+  ** database. TODO: For xConnect(), it could verify that said tables exist.
+  */
+  if( isCreate ){
+    rc = fts3CreateTables(p);
+  }
+
+  /* Check to see if a legacy fts3 table has been "upgraded" by the
+  ** addition of a %_stat table so that it can use incremental merge.
+  */
+  if( !isFts4 && !isCreate ){
+    p->bHasStat = 2;
+  }
+
+  /* Figure out the page-size for the database. This is required in order to
+  ** estimate the cost of loading large doclists from the database.  */
+  fts3DatabasePageSize(&rc, p);
+  p->nNodeSize = p->nPgsz-35;
+
+  /* Declare the table schema to SQLite. */
+  fts3DeclareVtab(&rc, p);
+
+fts3_init_out:
+  sqlite3_free(zPrefix);
+  sqlite3_free(aIndex);
+  sqlite3_free(zCompress);
+  sqlite3_free(zUncompress);
+  sqlite3_free(zContent);
+  sqlite3_free(zLanguageid);
+  for(i=0; i<nNotindexed; i++) sqlite3_free(azNotindexed[i]);
+  sqlite3_free((void *)aCol);
+  sqlite3_free((void *)azNotindexed);
+  if( rc!=SQLITE_OK ){
+    if( p ){
+      fts3DisconnectMethod((sqlite3_vtab *)p);
+    }else if( pTokenizer ){
+      pTokenizer->pModule->xDestroy(pTokenizer);
+    }
+  }else{
+    assert( p->pSegments==0 );
+    *ppVTab = &p->base;
+  }
+  return rc;
+}
+
+/*
+** The xConnect() and xCreate() methods for the virtual table. All the
+** work is done in function fts3InitVtab().
+*/
+static int fts3ConnectMethod(
+  sqlite3 *db,                    /* Database connection */
+  void *pAux,                     /* Pointer to tokenizer hash table */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
+  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
+){
+  return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
+}
+static int fts3CreateMethod(
+  sqlite3 *db,                    /* Database connection */
+  void *pAux,                     /* Pointer to tokenizer hash table */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
+  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
+){
+  return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
+}
+
+/*
+** Set the pIdxInfo->estimatedRows variable to nRow. Unless this
+** extension is currently being used by a version of SQLite too old to
+** support estimatedRows. In that case this function is a no-op.
+*/
+static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
+#if SQLITE_VERSION_NUMBER>=3008002
+  if( sqlite3_libversion_number()>=3008002 ){
+    pIdxInfo->estimatedRows = nRow;
+  }
+#endif
+}
+
+/* 
+** Implementation of the xBestIndex method for FTS3 tables. There
+** are three possible strategies, in order of preference:
+**
+**   1. Direct lookup by rowid or docid. 
+**   2. Full-text search using a MATCH operator on a non-docid column.
+**   3. Linear scan of %_content table.
+*/
+static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
+  Fts3Table *p = (Fts3Table *)pVTab;
+  int i;                          /* Iterator variable */
+  int iCons = -1;                 /* Index of constraint to use */
+
+  int iLangidCons = -1;           /* Index of langid=x constraint, if present */
+  int iDocidGe = -1;              /* Index of docid>=x constraint, if present */
+  int iDocidLe = -1;              /* Index of docid<=x constraint, if present */
+  int iIdx;
+
+  /* By default use a full table scan. This is an expensive option,
+  ** so search through the constraints to see if a more efficient 
+  ** strategy is possible.
+  */
+  pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
+  pInfo->estimatedCost = 5000000;
+  for(i=0; i<pInfo->nConstraint; i++){
+    int bDocid;                 /* True if this constraint is on docid */
+    struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
+    if( pCons->usable==0 ){
+      if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
+        /* There exists an unusable MATCH constraint. This means that if
+        ** the planner does elect to use the results of this call as part
+        ** of the overall query plan the user will see an "unable to use
+        ** function MATCH in the requested context" error. To discourage
+        ** this, return a very high cost here.  */
+        pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
+        pInfo->estimatedCost = 1e50;
+        fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50);
+        return SQLITE_OK;
+      }
+      continue;
+    }
+
+    bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1);
+
+    /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
+    if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){
+      pInfo->idxNum = FTS3_DOCID_SEARCH;
+      pInfo->estimatedCost = 1.0;
+      iCons = i;
+    }
+
+    /* A MATCH constraint. Use a full-text search.
+    **
+    ** If there is more than one MATCH constraint available, use the first
+    ** one encountered. If there is both a MATCH constraint and a direct
+    ** rowid/docid lookup, prefer the MATCH strategy. This is done even 
+    ** though the rowid/docid lookup is faster than a MATCH query, selecting
+    ** it would lead to an "unable to use function MATCH in the requested 
+    ** context" error.
+    */
+    if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH 
+     && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
+    ){
+      pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
+      pInfo->estimatedCost = 2.0;
+      iCons = i;
+    }
+
+    /* Equality constraint on the langid column */
+    if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
+     && pCons->iColumn==p->nColumn + 2
+    ){
+      iLangidCons = i;
+    }
+
+    if( bDocid ){
+      switch( pCons->op ){
+        case SQLITE_INDEX_CONSTRAINT_GE:
+        case SQLITE_INDEX_CONSTRAINT_GT:
+          iDocidGe = i;
+          break;
+
+        case SQLITE_INDEX_CONSTRAINT_LE:
+        case SQLITE_INDEX_CONSTRAINT_LT:
+          iDocidLe = i;
+          break;
+      }
+    }
+  }
+
+  iIdx = 1;
+  if( iCons>=0 ){
+    pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
+    pInfo->aConstraintUsage[iCons].omit = 1;
+  } 
+  if( iLangidCons>=0 ){
+    pInfo->idxNum |= FTS3_HAVE_LANGID;
+    pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++;
+  } 
+  if( iDocidGe>=0 ){
+    pInfo->idxNum |= FTS3_HAVE_DOCID_GE;
+    pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++;
+  } 
+  if( iDocidLe>=0 ){
+    pInfo->idxNum |= FTS3_HAVE_DOCID_LE;
+    pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++;
+  } 
+
+  /* Regardless of the strategy selected, FTS can deliver rows in rowid (or
+  ** docid) order. Both ascending and descending are possible. 
+  */
+  if( pInfo->nOrderBy==1 ){
+    struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
+    if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
+      if( pOrder->desc ){
+        pInfo->idxStr = "DESC";
+      }else{
+        pInfo->idxStr = "ASC";
+      }
+      pInfo->orderByConsumed = 1;
+    }
+  }
+
+  assert( p->pSegments==0 );
+  return SQLITE_OK;
+}
+
+/*
+** Implementation of xOpen method.
+*/
+static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+  sqlite3_vtab_cursor *pCsr;               /* Allocated cursor */
+
+  UNUSED_PARAMETER(pVTab);
+
+  /* Allocate a buffer large enough for an Fts3Cursor structure. If the
+  ** allocation succeeds, zero it and return SQLITE_OK. Otherwise, 
+  ** if the allocation fails, return SQLITE_NOMEM.
+  */
+  *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor));
+  if( !pCsr ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCsr, 0, sizeof(Fts3Cursor));
+  return SQLITE_OK;
+}
+
+/*
+** Close the cursor.  For additional information see the documentation
+** on the xClose method of the virtual table interface.
+*/
+static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
+  assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
+  sqlite3_finalize(pCsr->pStmt);
+  sqlite3Fts3ExprFree(pCsr->pExpr);
+  sqlite3Fts3FreeDeferredTokens(pCsr);
+  sqlite3_free(pCsr->aDoclist);
+  sqlite3_free(pCsr->aMatchinfo);
+  assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then
+** compose and prepare an SQL statement of the form:
+**
+**    "SELECT <columns> FROM %_content WHERE rowid = ?"
+**
+** (or the equivalent for a content=xxx table) and set pCsr->pStmt to
+** it. If an error occurs, return an SQLite error code.
+**
+** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK.
+*/
+static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){
+  int rc = SQLITE_OK;
+  if( pCsr->pStmt==0 ){
+    Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+    char *zSql;
+    zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist);
+    if( !zSql ) return SQLITE_NOMEM;
+    rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
+    sqlite3_free(zSql);
+  }
+  *ppStmt = pCsr->pStmt;
+  return rc;
+}
+
+/*
+** Position the pCsr->pStmt statement so that it is on the row
+** of the %_content table that contains the last match.  Return
+** SQLITE_OK on success.  
+*/
+static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
+  int rc = SQLITE_OK;
+  if( pCsr->isRequireSeek ){
+    sqlite3_stmt *pStmt = 0;
+
+    rc = fts3CursorSeekStmt(pCsr, &pStmt);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
+      pCsr->isRequireSeek = 0;
+      if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
+        return SQLITE_OK;
+      }else{
+        rc = sqlite3_reset(pCsr->pStmt);
+        if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
+          /* If no row was found and no error has occurred, then the %_content
+          ** table is missing a row that is present in the full-text index.
+          ** The data structures are corrupt.  */
+          rc = FTS_CORRUPT_VTAB;
+          pCsr->isEof = 1;
+        }
+      }
+    }
+  }
+
+  if( rc!=SQLITE_OK && pContext ){
+    sqlite3_result_error_code(pContext, rc);
+  }
+  return rc;
+}
+
+/*
+** This function is used to process a single interior node when searching
+** a b-tree for a term or term prefix. The node data is passed to this 
+** function via the zNode/nNode parameters. The term to search for is
+** passed in zTerm/nTerm.
+**
+** If piFirst is not NULL, then this function sets *piFirst to the blockid
+** of the child node that heads the sub-tree that may contain the term.
+**
+** If piLast is not NULL, then *piLast is set to the right-most child node
+** that heads a sub-tree that may contain a term for which zTerm/nTerm is
+** a prefix.
+**
+** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
+*/
+static int fts3ScanInteriorNode(
+  const char *zTerm,              /* Term to select leaves for */
+  int nTerm,                      /* Size of term zTerm in bytes */
+  const char *zNode,              /* Buffer containing segment interior node */
+  int nNode,                      /* Size of buffer at zNode */
+  sqlite3_int64 *piFirst,         /* OUT: Selected child node */
+  sqlite3_int64 *piLast           /* OUT: Selected child node */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  const char *zCsr = zNode;       /* Cursor to iterate through node */
+  const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
+  char *zBuffer = 0;              /* Buffer to load terms into */
+  int nAlloc = 0;                 /* Size of allocated buffer */
+  int isFirstTerm = 1;            /* True when processing first term on page */
+  sqlite3_int64 iChild;           /* Block id of child node to descend to */
+
+  /* Skip over the 'height' varint that occurs at the start of every 
+  ** interior node. Then load the blockid of the left-child of the b-tree
+  ** node into variable iChild.  
+  **
+  ** Even if the data structure on disk is corrupted, this (reading two
+  ** varints from the buffer) does not risk an overread. If zNode is a
+  ** root node, then the buffer comes from a SELECT statement. SQLite does
+  ** not make this guarantee explicitly, but in practice there are always
+  ** either more than 20 bytes of allocated space following the nNode bytes of
+  ** contents, or two zero bytes. Or, if the node is read from the %_segments
+  ** table, then there are always 20 bytes of zeroed padding following the
+  ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details).
+  */
+  zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
+  zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
+  if( zCsr>zEnd ){
+    return FTS_CORRUPT_VTAB;
+  }
+  
+  while( zCsr<zEnd && (piFirst || piLast) ){
+    int cmp;                      /* memcmp() result */
+    int nSuffix;                  /* Size of term suffix */
+    int nPrefix = 0;              /* Size of term prefix */
+    int nBuffer;                  /* Total term size */
+  
+    /* Load the next term on the node into zBuffer. Use realloc() to expand
+    ** the size of zBuffer if required.  */
+    if( !isFirstTerm ){
+      zCsr += fts3GetVarint32(zCsr, &nPrefix);
+    }
+    isFirstTerm = 0;
+    zCsr += fts3GetVarint32(zCsr, &nSuffix);
+    
+    if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){
+      rc = FTS_CORRUPT_VTAB;
+      goto finish_scan;
+    }
+    if( nPrefix+nSuffix>nAlloc ){
+      char *zNew;
+      nAlloc = (nPrefix+nSuffix) * 2;
+      zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
+      if( !zNew ){
+        rc = SQLITE_NOMEM;
+        goto finish_scan;
+      }
+      zBuffer = zNew;
+    }
+    assert( zBuffer );
+    memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
+    nBuffer = nPrefix + nSuffix;
+    zCsr += nSuffix;
+
+    /* Compare the term we are searching for with the term just loaded from
+    ** the interior node. If the specified term is greater than or equal
+    ** to the term from the interior node, then all terms on the sub-tree 
+    ** headed by node iChild are smaller than zTerm. No need to search 
+    ** iChild.
+    **
+    ** If the interior node term is larger than the specified term, then
+    ** the tree headed by iChild may contain the specified term.
+    */
+    cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer));
+    if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){
+      *piFirst = iChild;
+      piFirst = 0;
+    }
+
+    if( piLast && cmp<0 ){
+      *piLast = iChild;
+      piLast = 0;
+    }
+
+    iChild++;
+  };
+
+  if( piFirst ) *piFirst = iChild;
+  if( piLast ) *piLast = iChild;
+
+ finish_scan:
+  sqlite3_free(zBuffer);
+  return rc;
+}
+
+
+/*
+** The buffer pointed to by argument zNode (size nNode bytes) contains an
+** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes)
+** contains a term. This function searches the sub-tree headed by the zNode
+** node for the range of leaf nodes that may contain the specified term
+** or terms for which the specified term is a prefix.
+**
+** If piLeaf is not NULL, then *piLeaf is set to the blockid of the 
+** left-most leaf node in the tree that may contain the specified term.
+** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the
+** right-most leaf node that may contain a term for which the specified
+** term is a prefix.
+**
+** It is possible that the range of returned leaf nodes does not contain 
+** the specified term or any terms for which it is a prefix. However, if the 
+** segment does contain any such terms, they are stored within the identified
+** range. Because this function only inspects interior segment nodes (and
+** never loads leaf nodes into memory), it is not possible to be sure.
+**
+** If an error occurs, an error code other than SQLITE_OK is returned.
+*/ 
+static int fts3SelectLeaf(
+  Fts3Table *p,                   /* Virtual table handle */
+  const char *zTerm,              /* Term to select leaves for */
+  int nTerm,                      /* Size of term zTerm in bytes */
+  const char *zNode,              /* Buffer containing segment interior node */
+  int nNode,                      /* Size of buffer at zNode */
+  sqlite3_int64 *piLeaf,          /* Selected leaf node */
+  sqlite3_int64 *piLeaf2          /* Selected leaf node */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  int iHeight;                    /* Height of this node in tree */
+
+  assert( piLeaf || piLeaf2 );
+
+  fts3GetVarint32(zNode, &iHeight);
+  rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2);
+  assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) );
+
+  if( rc==SQLITE_OK && iHeight>1 ){
+    char *zBlob = 0;              /* Blob read from %_segments table */
+    int nBlob = 0;                /* Size of zBlob in bytes */
+
+    if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){
+      rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0);
+      if( rc==SQLITE_OK ){
+        rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0);
+      }
+      sqlite3_free(zBlob);
+      piLeaf = 0;
+      zBlob = 0;
+    }
+
+    if( rc==SQLITE_OK ){
+      rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0);
+    }
+    if( rc==SQLITE_OK ){
+      rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2);
+    }
+    sqlite3_free(zBlob);
+  }
+
+  return rc;
+}
+
+/*
+** This function is used to create delta-encoded serialized lists of FTS3 
+** varints. Each call to this function appends a single varint to a list.
+*/
+static void fts3PutDeltaVarint(
+  char **pp,                      /* IN/OUT: Output pointer */
+  sqlite3_int64 *piPrev,          /* IN/OUT: Previous value written to list */
+  sqlite3_int64 iVal              /* Write this value to the list */
+){
+  assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) );
+  *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev);
+  *piPrev = iVal;
+}
+
+/*
+** When this function is called, *ppPoslist is assumed to point to the 
+** start of a position-list. After it returns, *ppPoslist points to the
+** first byte after the position-list.
+**
+** A position list is list of positions (delta encoded) and columns for 
+** a single document record of a doclist.  So, in other words, this
+** routine advances *ppPoslist so that it points to the next docid in
+** the doclist, or to the first byte past the end of the doclist.
+**
+** If pp is not NULL, then the contents of the position list are copied
+** to *pp. *pp is set to point to the first byte past the last byte copied
+** before this function returns.
+*/
+static void fts3PoslistCopy(char **pp, char **ppPoslist){
+  char *pEnd = *ppPoslist;
+  char c = 0;
+
+  /* The end of a position list is marked by a zero encoded as an FTS3 
+  ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by
+  ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail
+  ** of some other, multi-byte, value.
+  **
+  ** The following while-loop moves pEnd to point to the first byte that is not 
+  ** immediately preceded by a byte with the 0x80 bit set. Then increments
+  ** pEnd once more so that it points to the byte immediately following the
+  ** last byte in the position-list.
+  */
+  while( *pEnd | c ){
+    c = *pEnd++ & 0x80;
+    testcase( c!=0 && (*pEnd)==0 );
+  }
+  pEnd++;  /* Advance past the POS_END terminator byte */
+
+  if( pp ){
+    int n = (int)(pEnd - *ppPoslist);
+    char *p = *pp;
+    memcpy(p, *ppPoslist, n);
+    p += n;
+    *pp = p;
+  }
+  *ppPoslist = pEnd;
+}
+
+/*
+** When this function is called, *ppPoslist is assumed to point to the 
+** start of a column-list. After it returns, *ppPoslist points to the
+** to the terminator (POS_COLUMN or POS_END) byte of the column-list.
+**
+** A column-list is list of delta-encoded positions for a single column
+** within a single document within a doclist.
+**
+** The column-list is terminated either by a POS_COLUMN varint (1) or
+** a POS_END varint (0).  This routine leaves *ppPoslist pointing to
+** the POS_COLUMN or POS_END that terminates the column-list.
+**
+** If pp is not NULL, then the contents of the column-list are copied
+** to *pp. *pp is set to point to the first byte past the last byte copied
+** before this function returns.  The POS_COLUMN or POS_END terminator
+** is not copied into *pp.
+*/
+static void fts3ColumnlistCopy(char **pp, char **ppPoslist){
+  char *pEnd = *ppPoslist;
+  char c = 0;
+
+  /* A column-list is terminated by either a 0x01 or 0x00 byte that is
+  ** not part of a multi-byte varint.
+  */
+  while( 0xFE & (*pEnd | c) ){
+    c = *pEnd++ & 0x80;
+    testcase( c!=0 && ((*pEnd)&0xfe)==0 );
+  }
+  if( pp ){
+    int n = (int)(pEnd - *ppPoslist);
+    char *p = *pp;
+    memcpy(p, *ppPoslist, n);
+    p += n;
+    *pp = p;
+  }
+  *ppPoslist = pEnd;
+}
+
+/*
+** Value used to signify the end of an position-list. This is safe because
+** it is not possible to have a document with 2^31 terms.
+*/
+#define POSITION_LIST_END 0x7fffffff
+
+/*
+** This function is used to help parse position-lists. When this function is
+** called, *pp may point to the start of the next varint in the position-list
+** being parsed, or it may point to 1 byte past the end of the position-list
+** (in which case **pp will be a terminator bytes POS_END (0) or
+** (1)).
+**
+** If *pp points past the end of the current position-list, set *pi to 
+** POSITION_LIST_END and return. Otherwise, read the next varint from *pp,
+** increment the current value of *pi by the value read, and set *pp to
+** point to the next value before returning.
+**
+** Before calling this routine *pi must be initialized to the value of
+** the previous position, or zero if we are reading the first position
+** in the position-list.  Because positions are delta-encoded, the value
+** of the previous position is needed in order to compute the value of
+** the next position.
+*/
+static void fts3ReadNextPos(
+  char **pp,                    /* IN/OUT: Pointer into position-list buffer */
+  sqlite3_int64 *pi             /* IN/OUT: Value read from position-list */
+){
+  if( (**pp)&0xFE ){
+    fts3GetDeltaVarint(pp, pi);
+    *pi -= 2;
+  }else{
+    *pi = POSITION_LIST_END;
+  }
+}
+
+/*
+** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by
+** the value of iCol encoded as a varint to *pp.   This will start a new
+** column list.
+**
+** Set *pp to point to the byte just after the last byte written before 
+** returning (do not modify it if iCol==0). Return the total number of bytes
+** written (0 if iCol==0).
+*/
+static int fts3PutColNumber(char **pp, int iCol){
+  int n = 0;                      /* Number of bytes written */
+  if( iCol ){
+    char *p = *pp;                /* Output pointer */
+    n = 1 + sqlite3Fts3PutVarint(&p[1], iCol);
+    *p = 0x01;
+    *pp = &p[n];
+  }
+  return n;
+}
+
+/*
+** Compute the union of two position lists.  The output written
+** into *pp contains all positions of both *pp1 and *pp2 in sorted
+** order and with any duplicates removed.  All pointers are
+** updated appropriately.   The caller is responsible for insuring
+** that there is enough space in *pp to hold the complete output.
+*/
+static void fts3PoslistMerge(
+  char **pp,                      /* Output buffer */
+  char **pp1,                     /* Left input list */
+  char **pp2                      /* Right input list */
+){
+  char *p = *pp;
+  char *p1 = *pp1;
+  char *p2 = *pp2;
+
+  while( *p1 || *p2 ){
+    int iCol1;         /* The current column index in pp1 */
+    int iCol2;         /* The current column index in pp2 */
+
+    if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1);
+    else if( *p1==POS_END ) iCol1 = POSITION_LIST_END;
+    else iCol1 = 0;
+
+    if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2);
+    else if( *p2==POS_END ) iCol2 = POSITION_LIST_END;
+    else iCol2 = 0;
+
+    if( iCol1==iCol2 ){
+      sqlite3_int64 i1 = 0;       /* Last position from pp1 */
+      sqlite3_int64 i2 = 0;       /* Last position from pp2 */
+      sqlite3_int64 iPrev = 0;
+      int n = fts3PutColNumber(&p, iCol1);
+      p1 += n;
+      p2 += n;
+
+      /* At this point, both p1 and p2 point to the start of column-lists
+      ** for the same column (the column with index iCol1 and iCol2).
+      ** A column-list is a list of non-negative delta-encoded varints, each 
+      ** incremented by 2 before being stored. Each list is terminated by a
+      ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists
+      ** and writes the results to buffer p. p is left pointing to the byte
+      ** after the list written. No terminator (POS_END or POS_COLUMN) is
+      ** written to the output.
+      */
+      fts3GetDeltaVarint(&p1, &i1);
+      fts3GetDeltaVarint(&p2, &i2);
+      do {
+        fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2); 
+        iPrev -= 2;
+        if( i1==i2 ){
+          fts3ReadNextPos(&p1, &i1);
+          fts3ReadNextPos(&p2, &i2);
+        }else if( i1<i2 ){
+          fts3ReadNextPos(&p1, &i1);
+        }else{
+          fts3ReadNextPos(&p2, &i2);
+        }
+      }while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END );
+    }else if( iCol1<iCol2 ){
+      p1 += fts3PutColNumber(&p, iCol1);
+      fts3ColumnlistCopy(&p, &p1);
+    }else{
+      p2 += fts3PutColNumber(&p, iCol2);
+      fts3ColumnlistCopy(&p, &p2);
+    }
+  }
+
+  *p++ = POS_END;
+  *pp = p;
+  *pp1 = p1 + 1;
+  *pp2 = p2 + 1;
+}
+
+/*
+** This function is used to merge two position lists into one. When it is
+** called, *pp1 and *pp2 must both point to position lists. A position-list is
+** the part of a doclist that follows each document id. For example, if a row
+** contains:
+**
+**     'a b c'|'x y z'|'a b b a'
+**
+** Then the position list for this row for token 'b' would consist of:
+**
+**     0x02 0x01 0x02 0x03 0x03 0x00
+**
+** When this function returns, both *pp1 and *pp2 are left pointing to the
+** byte following the 0x00 terminator of their respective position lists.
+**
+** If isSaveLeft is 0, an entry is added to the output position list for 
+** each position in *pp2 for which there exists one or more positions in
+** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
+** when the *pp1 token appears before the *pp2 token, but not more than nToken
+** slots before it.
+**
+** e.g. nToken==1 searches for adjacent positions.
+*/
+static int fts3PoslistPhraseMerge(
+  char **pp,                      /* IN/OUT: Preallocated output buffer */
+  int nToken,                     /* Maximum difference in token positions */
+  int isSaveLeft,                 /* Save the left position */
+  int isExact,                    /* If *pp1 is exactly nTokens before *pp2 */
+  char **pp1,                     /* IN/OUT: Left input list */
+  char **pp2                      /* IN/OUT: Right input list */
+){
+  char *p = *pp;
+  char *p1 = *pp1;
+  char *p2 = *pp2;
+  int iCol1 = 0;
+  int iCol2 = 0;
+
+  /* Never set both isSaveLeft and isExact for the same invocation. */
+  assert( isSaveLeft==0 || isExact==0 );
+
+  assert( p!=0 && *p1!=0 && *p2!=0 );
+  if( *p1==POS_COLUMN ){ 
+    p1++;
+    p1 += fts3GetVarint32(p1, &iCol1);
+  }
+  if( *p2==POS_COLUMN ){ 
+    p2++;
+    p2 += fts3GetVarint32(p2, &iCol2);
+  }
+
+  while( 1 ){
+    if( iCol1==iCol2 ){
+      char *pSave = p;
+      sqlite3_int64 iPrev = 0;
+      sqlite3_int64 iPos1 = 0;
+      sqlite3_int64 iPos2 = 0;
+
+      if( iCol1 ){
+        *p++ = POS_COLUMN;
+        p += sqlite3Fts3PutVarint(p, iCol1);
+      }
+
+      assert( *p1!=POS_END && *p1!=POS_COLUMN );
+      assert( *p2!=POS_END && *p2!=POS_COLUMN );
+      fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
+      fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
+
+      while( 1 ){
+        if( iPos2==iPos1+nToken 
+         || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) 
+        ){
+          sqlite3_int64 iSave;
+          iSave = isSaveLeft ? iPos1 : iPos2;
+          fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
+          pSave = 0;
+          assert( p );
+        }
+        if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
+          if( (*p2&0xFE)==0 ) break;
+          fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
+        }else{
+          if( (*p1&0xFE)==0 ) break;
+          fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
+        }
+      }
+
+      if( pSave ){
+        assert( pp && p );
+        p = pSave;
+      }
+
+      fts3ColumnlistCopy(0, &p1);
+      fts3ColumnlistCopy(0, &p2);
+      assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 );
+      if( 0==*p1 || 0==*p2 ) break;
+
+      p1++;
+      p1 += fts3GetVarint32(p1, &iCol1);
+      p2++;
+      p2 += fts3GetVarint32(p2, &iCol2);
+    }
+
+    /* Advance pointer p1 or p2 (whichever corresponds to the smaller of
+    ** iCol1 and iCol2) so that it points to either the 0x00 that marks the
+    ** end of the position list, or the 0x01 that precedes the next 
+    ** column-number in the position list. 
+    */
+    else if( iCol1<iCol2 ){
+      fts3ColumnlistCopy(0, &p1);
+      if( 0==*p1 ) break;
+      p1++;
+      p1 += fts3GetVarint32(p1, &iCol1);
+    }else{
+      fts3ColumnlistCopy(0, &p2);
+      if( 0==*p2 ) break;
+      p2++;
+      p2 += fts3GetVarint32(p2, &iCol2);
+    }
+  }
+
+  fts3PoslistCopy(0, &p2);
+  fts3PoslistCopy(0, &p1);
+  *pp1 = p1;
+  *pp2 = p2;
+  if( *pp==p ){
+    return 0;
+  }
+  *p++ = 0x00;
+  *pp = p;
+  return 1;
+}
+
+/*
+** Merge two position-lists as required by the NEAR operator. The argument
+** position lists correspond to the left and right phrases of an expression 
+** like:
+**
+**     "phrase 1" NEAR "phrase number 2"
+**
+** Position list *pp1 corresponds to the left-hand side of the NEAR 
+** expression and *pp2 to the right. As usual, the indexes in the position 
+** lists are the offsets of the last token in each phrase (tokens "1" and "2" 
+** in the example above).
+**
+** The output position list - written to *pp - is a copy of *pp2 with those
+** entries that are not sufficiently NEAR entries in *pp1 removed.
+*/
+static int fts3PoslistNearMerge(
+  char **pp,                      /* Output buffer */
+  char *aTmp,                     /* Temporary buffer space */
+  int nRight,                     /* Maximum difference in token positions */
+  int nLeft,                      /* Maximum difference in token positions */
+  char **pp1,                     /* IN/OUT: Left input list */
+  char **pp2                      /* IN/OUT: Right input list */
+){
+  char *p1 = *pp1;
+  char *p2 = *pp2;
+
+  char *pTmp1 = aTmp;
+  char *pTmp2;
+  char *aTmp2;
+  int res = 1;
+
+  fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
+  aTmp2 = pTmp2 = pTmp1;
+  *pp1 = p1;
+  *pp2 = p2;
+  fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
+  if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
+    fts3PoslistMerge(pp, &aTmp, &aTmp2);
+  }else if( pTmp1!=aTmp ){
+    fts3PoslistCopy(pp, &aTmp);
+  }else if( pTmp2!=aTmp2 ){
+    fts3PoslistCopy(pp, &aTmp2);
+  }else{
+    res = 0;
+  }
+
+  return res;
+}
+
+/* 
+** An instance of this function is used to merge together the (potentially
+** large number of) doclists for each term that matches a prefix query.
+** See function fts3TermSelectMerge() for details.
+*/
+typedef struct TermSelect TermSelect;
+struct TermSelect {
+  char *aaOutput[16];             /* Malloc'd output buffers */
+  int anOutput[16];               /* Size each output buffer in bytes */
+};
+
+/*
+** This function is used to read a single varint from a buffer. Parameter
+** pEnd points 1 byte past the end of the buffer. When this function is
+** called, if *pp points to pEnd or greater, then the end of the buffer
+** has been reached. In this case *pp is set to 0 and the function returns.
+**
+** If *pp does not point to or past pEnd, then a single varint is read
+** from *pp. *pp is then set to point 1 byte past the end of the read varint.
+**
+** If bDescIdx is false, the value read is added to *pVal before returning.
+** If it is true, the value read is subtracted from *pVal before this 
+** function returns.
+*/
+static void fts3GetDeltaVarint3(
+  char **pp,                      /* IN/OUT: Point to read varint from */
+  char *pEnd,                     /* End of buffer */
+  int bDescIdx,                   /* True if docids are descending */
+  sqlite3_int64 *pVal             /* IN/OUT: Integer value */
+){
+  if( *pp>=pEnd ){
+    *pp = 0;
+  }else{
+    sqlite3_int64 iVal;
+    *pp += sqlite3Fts3GetVarint(*pp, &iVal);
+    if( bDescIdx ){
+      *pVal -= iVal;
+    }else{
+      *pVal += iVal;
+    }
+  }
+}
+
+/*
+** This function is used to write a single varint to a buffer. The varint
+** is written to *pp. Before returning, *pp is set to point 1 byte past the
+** end of the value written.
+**
+** If *pbFirst is zero when this function is called, the value written to
+** the buffer is that of parameter iVal. 
+**
+** If *pbFirst is non-zero when this function is called, then the value 
+** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
+** (if bDescIdx is non-zero).
+**
+** Before returning, this function always sets *pbFirst to 1 and *piPrev
+** to the value of parameter iVal.
+*/
+static void fts3PutDeltaVarint3(
+  char **pp,                      /* IN/OUT: Output pointer */
+  int bDescIdx,                   /* True for descending docids */
+  sqlite3_int64 *piPrev,          /* IN/OUT: Previous value written to list */
+  int *pbFirst,                   /* IN/OUT: True after first int written */
+  sqlite3_int64 iVal              /* Write this value to the list */
+){
+  sqlite3_int64 iWrite;
+  if( bDescIdx==0 || *pbFirst==0 ){
+    iWrite = iVal - *piPrev;
+  }else{
+    iWrite = *piPrev - iVal;
+  }
+  assert( *pbFirst || *piPrev==0 );
+  assert( *pbFirst==0 || iWrite>0 );
+  *pp += sqlite3Fts3PutVarint(*pp, iWrite);
+  *piPrev = iVal;
+  *pbFirst = 1;
+}
+
+
+/*
+** This macro is used by various functions that merge doclists. The two
+** arguments are 64-bit docid values. If the value of the stack variable
+** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). 
+** Otherwise, (i2-i1).
+**
+** Using this makes it easier to write code that can merge doclists that are
+** sorted in either ascending or descending order.
+*/
+#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
+
+/*
+** This function does an "OR" merge of two doclists (output contains all
+** positions contained in either argument doclist). If the docids in the 
+** input doclists are sorted in ascending order, parameter bDescDoclist
+** should be false. If they are sorted in ascending order, it should be
+** passed a non-zero value.
+**
+** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
+** containing the output doclist and SQLITE_OK is returned. In this case
+** *pnOut is set to the number of bytes in the output doclist.
+**
+** If an error occurs, an SQLite error code is returned. The output values
+** are undefined in this case.
+*/
+static int fts3DoclistOrMerge(
+  int bDescDoclist,               /* True if arguments are desc */
+  char *a1, int n1,               /* First doclist */
+  char *a2, int n2,               /* Second doclist */
+  char **paOut, int *pnOut        /* OUT: Malloc'd doclist */
+){
+  sqlite3_int64 i1 = 0;
+  sqlite3_int64 i2 = 0;
+  sqlite3_int64 iPrev = 0;
+  char *pEnd1 = &a1[n1];
+  char *pEnd2 = &a2[n2];
+  char *p1 = a1;
+  char *p2 = a2;
+  char *p;
+  char *aOut;
+  int bFirstOut = 0;
+
+  *paOut = 0;
+  *pnOut = 0;
+
+  /* Allocate space for the output. Both the input and output doclists
+  ** are delta encoded. If they are in ascending order (bDescDoclist==0),
+  ** then the first docid in each list is simply encoded as a varint. For
+  ** each subsequent docid, the varint stored is the difference between the
+  ** current and previous docid (a positive number - since the list is in
+  ** ascending order).
+  **
+  ** The first docid written to the output is therefore encoded using the 
+  ** same number of bytes as it is in whichever of the input lists it is
+  ** read from. And each subsequent docid read from the same input list 
+  ** consumes either the same or less bytes as it did in the input (since
+  ** the difference between it and the previous value in the output must
+  ** be a positive value less than or equal to the delta value read from 
+  ** the input list). The same argument applies to all but the first docid
+  ** read from the 'other' list. And to the contents of all position lists
+  ** that will be copied and merged from the input to the output.
+  **
+  ** However, if the first docid copied to the output is a negative number,
+  ** then the encoding of the first docid from the 'other' input list may
+  ** be larger in the output than it was in the input (since the delta value
+  ** may be a larger positive integer than the actual docid).
+  **
+  ** The space required to store the output is therefore the sum of the
+  ** sizes of the two inputs, plus enough space for exactly one of the input
+  ** docids to grow. 
+  **
+  ** A symetric argument may be made if the doclists are in descending 
+  ** order.
+  */
+  aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1);
+  if( !aOut ) return SQLITE_NOMEM;
+
+  p = aOut;
+  fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
+  fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
+  while( p1 || p2 ){
+    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
+
+    if( p2 && p1 && iDiff==0 ){
+      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
+      fts3PoslistMerge(&p, &p1, &p2);
+      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
+      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
+    }else if( !p2 || (p1 && iDiff<0) ){
+      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
+      fts3PoslistCopy(&p, &p1);
+      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
+    }else{
+      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
+      fts3PoslistCopy(&p, &p2);
+      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
+    }
+  }
+
+  *paOut = aOut;
+  *pnOut = (int)(p-aOut);
+  assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 );
+  return SQLITE_OK;
+}
+
+/*
+** This function does a "phrase" merge of two doclists. In a phrase merge,
+** the output contains a copy of each position from the right-hand input
+** doclist for which there is a position in the left-hand input doclist
+** exactly nDist tokens before it.
+**
+** If the docids in the input doclists are sorted in ascending order,
+** parameter bDescDoclist should be false. If they are sorted in ascending 
+** order, it should be passed a non-zero value.
+**
+** The right-hand input doclist is overwritten by this function.
+*/
+static void fts3DoclistPhraseMerge(
+  int bDescDoclist,               /* True if arguments are desc */
+  int nDist,                      /* Distance from left to right (1=adjacent) */
+  char *aLeft, int nLeft,         /* Left doclist */
+  char *aRight, int *pnRight      /* IN/OUT: Right/output doclist */
+){
+  sqlite3_int64 i1 = 0;
+  sqlite3_int64 i2 = 0;
+  sqlite3_int64 iPrev = 0;
+  char *pEnd1 = &aLeft[nLeft];
+  char *pEnd2 = &aRight[*pnRight];
+  char *p1 = aLeft;
+  char *p2 = aRight;
+  char *p;
+  int bFirstOut = 0;
+  char *aOut = aRight;
+
+  assert( nDist>0 );
+
+  p = aOut;
+  fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
+  fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
+
+  while( p1 && p2 ){
+    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
+    if( iDiff==0 ){
+      char *pSave = p;
+      sqlite3_int64 iPrevSave = iPrev;
+      int bFirstOutSave = bFirstOut;
+
+      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
+      if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
+        p = pSave;
+        iPrev = iPrevSave;
+        bFirstOut = bFirstOutSave;
+      }
+      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
+      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
+    }else if( iDiff<0 ){
+      fts3PoslistCopy(0, &p1);
+      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
+    }else{
+      fts3PoslistCopy(0, &p2);
+      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
+    }
+  }
+
+  *pnRight = (int)(p - aOut);
+}
+
+/*
+** Argument pList points to a position list nList bytes in size. This
+** function checks to see if the position list contains any entries for
+** a token in position 0 (of any column). If so, it writes argument iDelta
+** to the output buffer pOut, followed by a position list consisting only
+** of the entries from pList at position 0, and terminated by an 0x00 byte.
+** The value returned is the number of bytes written to pOut (if any).
+*/
+SQLITE_PRIVATE int sqlite3Fts3FirstFilter(
+  sqlite3_int64 iDelta,           /* Varint that may be written to pOut */
+  char *pList,                    /* Position list (no 0x00 term) */
+  int nList,                      /* Size of pList in bytes */
+  char *pOut                      /* Write output here */
+){
+  int nOut = 0;
+  int bWritten = 0;               /* True once iDelta has been written */
+  char *p = pList;
+  char *pEnd = &pList[nList];
+
+  if( *p!=0x01 ){
+    if( *p==0x02 ){
+      nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
+      pOut[nOut++] = 0x02;
+      bWritten = 1;
+    }
+    fts3ColumnlistCopy(0, &p);
+  }
+
+  while( p<pEnd && *p==0x01 ){
+    sqlite3_int64 iCol;
+    p++;
+    p += sqlite3Fts3GetVarint(p, &iCol);
+    if( *p==0x02 ){
+      if( bWritten==0 ){
+        nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
+        bWritten = 1;
+      }
+      pOut[nOut++] = 0x01;
+      nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
+      pOut[nOut++] = 0x02;
+    }
+    fts3ColumnlistCopy(0, &p);
+  }
+  if( bWritten ){
+    pOut[nOut++] = 0x00;
+  }
+
+  return nOut;
+}
+
+
+/*
+** Merge all doclists in the TermSelect.aaOutput[] array into a single
+** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
+** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
+**
+** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
+** the responsibility of the caller to free any doclists left in the
+** TermSelect.aaOutput[] array.
+*/
+static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
+  char *aOut = 0;
+  int nOut = 0;
+  int i;
+
+  /* Loop through the doclists in the aaOutput[] array. Merge them all
+  ** into a single doclist.
+  */
+  for(i=0; i<SizeofArray(pTS->aaOutput); i++){
+    if( pTS->aaOutput[i] ){
+      if( !aOut ){
+        aOut = pTS->aaOutput[i];
+        nOut = pTS->anOutput[i];
+        pTS->aaOutput[i] = 0;
+      }else{
+        int nNew;
+        char *aNew;
+
+        int rc = fts3DoclistOrMerge(p->bDescIdx, 
+            pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew
+        );
+        if( rc!=SQLITE_OK ){
+          sqlite3_free(aOut);
+          return rc;
+        }
+
+        sqlite3_free(pTS->aaOutput[i]);
+        sqlite3_free(aOut);
+        pTS->aaOutput[i] = 0;
+        aOut = aNew;
+        nOut = nNew;
+      }
+    }
+  }
+
+  pTS->aaOutput[0] = aOut;
+  pTS->anOutput[0] = nOut;
+  return SQLITE_OK;
+}
+
+/*
+** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
+** as the first argument. The merge is an "OR" merge (see function
+** fts3DoclistOrMerge() for details).
+**
+** This function is called with the doclist for each term that matches
+** a queried prefix. It merges all these doclists into one, the doclist
+** for the specified prefix. Since there can be a very large number of
+** doclists to merge, the merging is done pair-wise using the TermSelect
+** object.
+**
+** This function returns SQLITE_OK if the merge is successful, or an
+** SQLite error code (SQLITE_NOMEM) if an error occurs.
+*/
+static int fts3TermSelectMerge(
+  Fts3Table *p,                   /* FTS table handle */
+  TermSelect *pTS,                /* TermSelect object to merge into */
+  char *aDoclist,                 /* Pointer to doclist */
+  int nDoclist                    /* Size of aDoclist in bytes */
+){
+  if( pTS->aaOutput[0]==0 ){
+    /* If this is the first term selected, copy the doclist to the output
+    ** buffer using memcpy(). */
+    pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
+    pTS->anOutput[0] = nDoclist;
+    if( pTS->aaOutput[0] ){
+      memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
+    }else{
+      return SQLITE_NOMEM;
+    }
+  }else{
+    char *aMerge = aDoclist;
+    int nMerge = nDoclist;
+    int iOut;
+
+    for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){
+      if( pTS->aaOutput[iOut]==0 ){
+        assert( iOut>0 );
+        pTS->aaOutput[iOut] = aMerge;
+        pTS->anOutput[iOut] = nMerge;
+        break;
+      }else{
+        char *aNew;
+        int nNew;
+
+        int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, 
+            pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew
+        );
+        if( rc!=SQLITE_OK ){
+          if( aMerge!=aDoclist ) sqlite3_free(aMerge);
+          return rc;
+        }
+
+        if( aMerge!=aDoclist ) sqlite3_free(aMerge);
+        sqlite3_free(pTS->aaOutput[iOut]);
+        pTS->aaOutput[iOut] = 0;
+  
+        aMerge = aNew;
+        nMerge = nNew;
+        if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
+          pTS->aaOutput[iOut] = aMerge;
+          pTS->anOutput[iOut] = nMerge;
+        }
+      }
+    }
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Append SegReader object pNew to the end of the pCsr->apSegment[] array.
+*/
+static int fts3SegReaderCursorAppend(
+  Fts3MultiSegReader *pCsr, 
+  Fts3SegReader *pNew
+){
+  if( (pCsr->nSegment%16)==0 ){
+    Fts3SegReader **apNew;
+    int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
+    apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte);
+    if( !apNew ){
+      sqlite3Fts3SegReaderFree(pNew);
+      return SQLITE_NOMEM;
+    }
+    pCsr->apSegment = apNew;
+  }
+  pCsr->apSegment[pCsr->nSegment++] = pNew;
+  return SQLITE_OK;
+}
+
+/*
+** Add seg-reader objects to the Fts3MultiSegReader object passed as the
+** 8th argument.
+**
+** This function returns SQLITE_OK if successful, or an SQLite error code
+** otherwise.
+*/
+static int fts3SegReaderCursor(
+  Fts3Table *p,                   /* FTS3 table handle */
+  int iLangid,                    /* Language id */
+  int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
+  int iLevel,                     /* Level of segments to scan */
+  const char *zTerm,              /* Term to query for */
+  int nTerm,                      /* Size of zTerm in bytes */
+  int isPrefix,                   /* True for a prefix search */
+  int isScan,                     /* True to scan from zTerm to EOF */
+  Fts3MultiSegReader *pCsr        /* Cursor object to populate */
+){
+  int rc = SQLITE_OK;             /* Error code */
+  sqlite3_stmt *pStmt = 0;        /* Statement to iterate through segments */
+  int rc2;                        /* Result of sqlite3_reset() */
+
+  /* If iLevel is less than 0 and this is not a scan, include a seg-reader 
+  ** for the pending-terms. If this is a scan, then this call must be being
+  ** made by an fts4aux module, not an FTS table. In this case calling
+  ** Fts3SegReaderPending might segfault, as the data structures used by 
+  ** fts4aux are not completely populated. So it's easiest to filter these
+  ** calls out here.  */
+  if( iLevel<0 && p->aIndex ){
+    Fts3SegReader *pSeg = 0;
+    rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix, &pSeg);
+    if( rc==SQLITE_OK && pSeg ){
+      rc = fts3SegReaderCursorAppend(pCsr, pSeg);
+    }
+  }
+
+  if( iLevel!=FTS3_SEGCURSOR_PENDING ){
+    if( rc==SQLITE_OK ){
+      rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
+    }
+
+    while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
+      Fts3SegReader *pSeg = 0;
+
+      /* Read the values returned by the SELECT into local variables. */
+      sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
+      sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2);
+      sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3);
+      int nRoot = sqlite3_column_bytes(pStmt, 4);
+      char const *zRoot = sqlite3_column_blob(pStmt, 4);
+
+      /* If zTerm is not NULL, and this segment is not stored entirely on its
+      ** root node, the range of leaves scanned can be reduced. Do this. */
+      if( iStartBlock && zTerm ){
+        sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0);
+        rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi);
+        if( rc!=SQLITE_OK ) goto finished;
+        if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock;
+      }
+ 
+      rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1, 
+          (isPrefix==0 && isScan==0),
+          iStartBlock, iLeavesEndBlock, 
+          iEndBlock, zRoot, nRoot, &pSeg
+      );
+      if( rc!=SQLITE_OK ) goto finished;
+      rc = fts3SegReaderCursorAppend(pCsr, pSeg);
+    }
+  }
+
+ finished:
+  rc2 = sqlite3_reset(pStmt);
+  if( rc==SQLITE_DONE ) rc = rc2;
+
+  return rc;
+}
+
+/*
+** Set up a cursor object for iterating through a full-text index or a 
+** single level therein.
+*/
+SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(
+  Fts3Table *p,                   /* FTS3 table handle */
+  int iLangid,                    /* Language-id to search */
+  int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
+  int iLevel,                     /* Level of segments to scan */
+  const char *zTerm,              /* Term to query for */
+  int nTerm,                      /* Size of zTerm in bytes */
+  int isPrefix,                   /* True for a prefix search */
+  int isScan,                     /* True to scan from zTerm to EOF */
+  Fts3MultiSegReader *pCsr       /* Cursor object to populate */
+){
+  assert( iIndex>=0 && iIndex<p->nIndex );
+  assert( iLevel==FTS3_SEGCURSOR_ALL
+      ||  iLevel==FTS3_SEGCURSOR_PENDING 
+      ||  iLevel>=0
+  );
+  assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
+  assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
+  assert( isPrefix==0 || isScan==0 );
+
+  memset(pCsr, 0, sizeof(Fts3MultiSegReader));
+  return fts3SegReaderCursor(
+      p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
+  );
+}
+
+/*
+** In addition to its current configuration, have the Fts3MultiSegReader
+** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
+static int fts3SegReaderCursorAddZero(
+  Fts3Table *p,                   /* FTS virtual table handle */
+  int iLangid,
+  const char *zTerm,              /* Term to scan doclist of */
+  int nTerm,                      /* Number of bytes in zTerm */
+  Fts3MultiSegReader *pCsr        /* Fts3MultiSegReader to modify */
+){
+  return fts3SegReaderCursor(p, 
+      iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
+  );
+}
+
+/*
+** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
+** if isPrefix is true, to scan the doclist for all terms for which 
+** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
+** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
+** an SQLite error code.
+**
+** It is the responsibility of the caller to free this object by eventually
+** passing it to fts3SegReaderCursorFree() 
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+** Output parameter *ppSegcsr is set to 0 if an error occurs.
+*/
+static int fts3TermSegReaderCursor(
+  Fts3Cursor *pCsr,               /* Virtual table cursor handle */
+  const char *zTerm,              /* Term to query for */
+  int nTerm,                      /* Size of zTerm in bytes */
+  int isPrefix,                   /* True for a prefix search */
+  Fts3MultiSegReader **ppSegcsr   /* OUT: Allocated seg-reader cursor */
+){
+  Fts3MultiSegReader *pSegcsr;    /* Object to allocate and return */
+  int rc = SQLITE_NOMEM;          /* Return code */
+
+  pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
+  if( pSegcsr ){
+    int i;
+    int bFound = 0;               /* True once an index has been found */
+    Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+
+    if( isPrefix ){
+      for(i=1; bFound==0 && i<p->nIndex; i++){
+        if( p->aIndex[i].nPrefix==nTerm ){
+          bFound = 1;
+          rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
+              i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
+          );
+          pSegcsr->bLookup = 1;
+        }
+      }
+
+      for(i=1; bFound==0 && i<p->nIndex; i++){
+        if( p->aIndex[i].nPrefix==nTerm+1 ){
+          bFound = 1;
+          rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
+              i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
+          );
+          if( rc==SQLITE_OK ){
+            rc = fts3SegReaderCursorAddZero(
+                p, pCsr->iLangid, zTerm, nTerm, pSegcsr
+            );
+          }
+        }
+      }
+    }
+
+    if( bFound==0 ){
+      rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
+          0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
+      );
+      pSegcsr->bLookup = !isPrefix;
+    }
+  }
+
+  *ppSegcsr = pSegcsr;
+  return rc;
+}
+
+/*
+** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
+*/
+static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
+  sqlite3Fts3SegReaderFinish(pSegcsr);
+  sqlite3_free(pSegcsr);
+}
+
+/*
+** This function retrieves the doclist for the specified term (or term
+** prefix) from the database.
+*/
+static int fts3TermSelect(
+  Fts3Table *p,                   /* Virtual table handle */
+  Fts3PhraseToken *pTok,          /* Token to query for */
+  int iColumn,                    /* Column to query (or -ve for all columns) */
+  int *pnOut,                     /* OUT: Size of buffer at *ppOut */
+  char **ppOut                    /* OUT: Malloced result buffer */
+){
+  int rc;                         /* Return code */
+  Fts3MultiSegReader *pSegcsr;    /* Seg-reader cursor for this term */
+  TermSelect tsc;                 /* Object for pair-wise doclist merging */
+  Fts3SegFilter filter;           /* Segment term filter configuration */
+
+  pSegcsr = pTok->pSegcsr;
+  memset(&tsc, 0, sizeof(TermSelect));
+
+  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
+        | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
+        | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
+        | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
+  filter.iCol = iColumn;
+  filter.zTerm = pTok->z;
+  filter.nTerm = pTok->n;
+
+  rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
+  while( SQLITE_OK==rc
+      && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) 
+  ){
+    rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
+  }
+
+  if( rc==SQLITE_OK ){
+    rc = fts3TermSelectFinishMerge(p, &tsc);
+  }
+  if( rc==SQLITE_OK ){
+    *ppOut = tsc.aaOutput[0];
+    *pnOut = tsc.anOutput[0];
+  }else{
+    int i;
+    for(i=0; i<SizeofArray(tsc.aaOutput); i++){
+      sqlite3_free(tsc.aaOutput[i]);
+    }
+  }
+
+  fts3SegReaderCursorFree(pSegcsr);
+  pTok->pSegcsr = 0;
+  return rc;
+}
+
+/*
+** This function counts the total number of docids in the doclist stored
+** in buffer aList[], size nList bytes.
+**
+** If the isPoslist argument is true, then it is assumed that the doclist
+** contains a position-list following each docid. Otherwise, it is assumed
+** that the doclist is simply a list of docids stored as delta encoded 
+** varints.
+*/
+static int fts3DoclistCountDocids(char *aList, int nList){
+  int nDoc = 0;                   /* Return value */
+  if( aList ){
+    char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
+    char *p = aList;              /* Cursor */
+    while( p<aEnd ){
+      nDoc++;
+      while( (*p++)&0x80 );     /* Skip docid varint */
+      fts3PoslistCopy(0, &p);   /* Skip over position list */
+    }
+  }
+
+  return nDoc;
+}
+
+/*
+** Advance the cursor to the next row in the %_content table that
+** matches the search criteria.  For a MATCH search, this will be
+** the next row that matches. For a full-table scan, this will be
+** simply the next row in the %_content table.  For a docid lookup,
+** this routine simply sets the EOF flag.
+**
+** Return SQLITE_OK if nothing goes wrong.  SQLITE_OK is returned
+** even if we reach end-of-file.  The fts3EofMethod() will be called
+** subsequently to determine whether or not an EOF was hit.
+*/
+static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
+  int rc;
+  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
+  if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){
+    if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
+      pCsr->isEof = 1;
+      rc = sqlite3_reset(pCsr->pStmt);
+    }else{
+      pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
+      rc = SQLITE_OK;
+    }
+  }else{
+    rc = fts3EvalNext((Fts3Cursor *)pCursor);
+  }
+  assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
+  return rc;
+}
+
+/*
+** The following are copied from sqliteInt.h.
+**
+** Constants for the largest and smallest possible 64-bit signed integers.
+** These macros are designed to work correctly on both 32-bit and 64-bit
+** compilers.
+*/
+#ifndef SQLITE_AMALGAMATION
+# define LARGEST_INT64  (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32))
+# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64)
+#endif
+
+/*
+** If the numeric type of argument pVal is "integer", then return it
+** converted to a 64-bit signed integer. Otherwise, return a copy of
+** the second parameter, iDefault.
+*/
+static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){
+  if( pVal ){
+    int eType = sqlite3_value_numeric_type(pVal);
+    if( eType==SQLITE_INTEGER ){
+      return sqlite3_value_int64(pVal);
+    }
+  }
+  return iDefault;
+}
+
+/*
+** This is the xFilter interface for the virtual table.  See
+** the virtual table xFilter method documentation for additional
+** information.
+**
+** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against
+** the %_content table.
+**
+** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry
+** in the %_content table.
+**
+** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index.  The
+** column on the left-hand side of the MATCH operator is column
+** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed.  argv[0] is the right-hand
+** side of the MATCH operator.
+*/
+static int fts3FilterMethod(
+  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
+  int idxNum,                     /* Strategy index */
+  const char *idxStr,             /* Unused */
+  int nVal,                       /* Number of elements in apVal */
+  sqlite3_value **apVal           /* Arguments for the indexing scheme */
+){
+  int rc = SQLITE_OK;
+  char *zSql;                     /* SQL statement used to access %_content */
+  int eSearch;
+  Fts3Table *p = (Fts3Table *)pCursor->pVtab;
+  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
+
+  sqlite3_value *pCons = 0;       /* The MATCH or rowid constraint, if any */
+  sqlite3_value *pLangid = 0;     /* The "langid = ?" constraint, if any */
+  sqlite3_value *pDocidGe = 0;    /* The "docid >= ?" constraint, if any */
+  sqlite3_value *pDocidLe = 0;    /* The "docid <= ?" constraint, if any */
+  int iIdx;
+
+  UNUSED_PARAMETER(idxStr);
+  UNUSED_PARAMETER(nVal);
+
+  eSearch = (idxNum & 0x0000FFFF);
+  assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
+  assert( p->pSegments==0 );
+
+  /* Collect arguments into local variables */
+  iIdx = 0;
+  if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++];
+  if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++];
+  if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++];
+  if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++];
+  assert( iIdx==nVal );
+
+  /* In case the cursor has been used before, clear it now. */
+  sqlite3_finalize(pCsr->pStmt);
+  sqlite3_free(pCsr->aDoclist);
+  sqlite3_free(pCsr->aMatchinfo);
+  sqlite3Fts3ExprFree(pCsr->pExpr);
+  memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
+
+  /* Set the lower and upper bounds on docids to return */
+  pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64);
+  pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64);
+
+  if( idxStr ){
+    pCsr->bDesc = (idxStr[0]=='D');
+  }else{
+    pCsr->bDesc = p->bDescIdx;
+  }
+  pCsr->eSearch = (i16)eSearch;
+
+  if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){
+    int iCol = eSearch-FTS3_FULLTEXT_SEARCH;
+    const char *zQuery = (const char *)sqlite3_value_text(pCons);
+
+    if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){
+      return SQLITE_NOMEM;
+    }
+
+    pCsr->iLangid = 0;
+    if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid);
+
+    assert( p->base.zErrMsg==0 );
+    rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
+        p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, 
+        &p->base.zErrMsg
+    );
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+
+    rc = fts3EvalStart(pCsr);
+    sqlite3Fts3SegmentsClose(p);
+    if( rc!=SQLITE_OK ) return rc;
+    pCsr->pNextId = pCsr->aDoclist;
+    pCsr->iPrevId = 0;
+  }
+
+  /* Compile a SELECT statement for this cursor. For a full-table-scan, the
+  ** statement loops through all rows of the %_content table. For a
+  ** full-text query or docid lookup, the statement retrieves a single
+  ** row by docid.
+  */
+  if( eSearch==FTS3_FULLSCAN_SEARCH ){
+    zSql = sqlite3_mprintf(
+        "SELECT %s ORDER BY rowid %s",
+        p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
+    );
+    if( zSql ){
+      rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
+      sqlite3_free(zSql);
+    }else{
+      rc = SQLITE_NOMEM;
+    }
+  }else if( eSearch==FTS3_DOCID_SEARCH ){
+    rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
+    if( rc==SQLITE_OK ){
+      rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons);
+    }
+  }
+  if( rc!=SQLITE_OK ) return rc;
+
+  return fts3NextMethod(pCursor);
+}
+
+/* 
+** This is the xEof method of the virtual table. SQLite calls this 
+** routine to find out if it has reached the end of a result set.
+*/
+static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){
+  return ((Fts3Cursor *)pCursor)->isEof;
+}
+
+/* 
+** This is the xRowid method. The SQLite core calls this routine to
+** retrieve the rowid for the current row of the result set. fts3
+** exposes %_content.docid as the rowid for the virtual table. The
+** rowid should be written to *pRowid.
+*/
+static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
+  Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
+  *pRowid = pCsr->iPrevId;
+  return SQLITE_OK;
+}
+
+/* 
+** This is the xColumn method, called by SQLite to request a value from
+** the row that the supplied cursor currently points to.
+**
+** If:
+**
+**   (iCol <  p->nColumn)   -> The value of the iCol'th user column.
+**   (iCol == p->nColumn)   -> Magic column with the same name as the table.
+**   (iCol == p->nColumn+1) -> Docid column
+**   (iCol == p->nColumn+2) -> Langid column
+*/
+static int fts3ColumnMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
+  int iCol                        /* Index of column to read value from */
+){
+  int rc = SQLITE_OK;             /* Return Code */
+  Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
+  Fts3Table *p = (Fts3Table *)pCursor->pVtab;
+
+  /* The column value supplied by SQLite must be in range. */
+  assert( iCol>=0 && iCol<=p->nColumn+2 );
+
+  if( iCol==p->nColumn+1 ){
+    /* This call is a request for the "docid" column. Since "docid" is an 
+    ** alias for "rowid", use the xRowid() method to obtain the value.
+    */
+    sqlite3_result_int64(pCtx, pCsr->iPrevId);
+  }else if( iCol==p->nColumn ){
+    /* The extra column whose name is the same as the table.
+    ** Return a blob which is a pointer to the cursor.  */
+    sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
+  }else if( iCol==p->nColumn+2 && pCsr->pExpr ){
+    sqlite3_result_int64(pCtx, pCsr->iLangid);
+  }else{
+    /* The requested column is either a user column (one that contains 
+    ** indexed data), or the language-id column.  */
+    rc = fts3CursorSeek(0, pCsr);
+
+    if( rc==SQLITE_OK ){
+      if( iCol==p->nColumn+2 ){
+        int iLangid = 0;
+        if( p->zLanguageid ){
+          iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1);
+        }
+        sqlite3_result_int(pCtx, iLangid);
+      }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
+        sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
+      }
+    }
+  }
+
+  assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
+  return rc;
+}
+
+/* 
+** This function is the implementation of the xUpdate callback used by 
+** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
+** inserted, updated or deleted.
+*/
+static int fts3UpdateMethod(
+  sqlite3_vtab *pVtab,            /* Virtual table handle */
+  int nArg,                       /* Size of argument array */
+  sqlite3_value **apVal,          /* Array of arguments */
+  sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
+){
+  return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid);
+}
+
+/*
+** Implementation of xSync() method. Flush the contents of the pending-terms
+** hash-table to the database.
+*/
+static int fts3SyncMethod(sqlite3_vtab *pVtab){
+
+  /* Following an incremental-merge operation, assuming that the input
+  ** segments are not completely consumed (the usual case), they are updated
+  ** in place to remove the entries that have already been merged. This
+  ** involves updating the leaf block that contains the smallest unmerged
+  ** entry and each block (if any) between the leaf and the root node. So
+  ** if the height of the input segment b-trees is N, and input segments
+  ** are merged eight at a time, updating the input segments at the end
+  ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually
+  ** small - often between 0 and 2. So the overhead of the incremental
+  ** merge is somewhere between 8 and 24 blocks. To avoid this overhead
+  ** dwarfing the actual productive work accomplished, the incremental merge
+  ** is only attempted if it will write at least 64 leaf blocks. Hence
+  ** nMinMerge.
+  **
+  ** Of course, updating the input segments also involves deleting a bunch
+  ** of blocks from the segments table. But this is not considered overhead
+  ** as it would also be required by a crisis-merge that used the same input 
+  ** segments.
+  */
+  const u32 nMinMerge = 64;       /* Minimum amount of incr-merge work to do */
+
+  Fts3Table *p = (Fts3Table*)pVtab;
+  int rc = sqlite3Fts3PendingTermsFlush(p);
+
+  if( rc==SQLITE_OK 
+   && p->nLeafAdd>(nMinMerge/16) 
+   && p->nAutoincrmerge && p->nAutoincrmerge!=0xff
+  ){
+    int mxLevel = 0;              /* Maximum relative level value in db */
+    int A;                        /* Incr-merge parameter A */
+
+    rc = sqlite3Fts3MaxLevel(p, &mxLevel);
+    assert( rc==SQLITE_OK || mxLevel==0 );
+    A = p->nLeafAdd * mxLevel;
+    A += (A/2);
+    if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge);
+  }
+  sqlite3Fts3SegmentsClose(p);
+  return rc;
+}
+
+/*
+** If it is currently unknown whether or not the FTS table has an %_stat
+** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat
+** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code
+** if an error occurs.
+*/
+static int fts3SetHasStat(Fts3Table *p){
+  int rc = SQLITE_OK;
+  if( p->bHasStat==2 ){
+    const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'";
+    char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName);
+    if( zSql ){
+      sqlite3_stmt *pStmt = 0;
+      rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
+      if( rc==SQLITE_OK ){
+        int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW);
+        rc = sqlite3_finalize(pStmt);
+        if( rc==SQLITE_OK ) p->bHasStat = bHasStat;
+      }
+      sqlite3_free(zSql);
+    }else{
+      rc = SQLITE_NOMEM;
+    }
+  }
+  return rc;
+}
+
+/*
+** Implementation of xBegin() method. 
+*/
+static int fts3BeginMethod(sqlite3_vtab *pVtab){
+  Fts3Table *p = (Fts3Table*)pVtab;
+  UNUSED_PARAMETER(pVtab);
+  assert( p->pSegments==0 );
+  assert( p->nPendingData==0 );
+  assert( p->inTransaction!=1 );
+  TESTONLY( p->inTransaction = 1 );
+  TESTONLY( p->mxSavepoint = -1; );
+  p->nLeafAdd = 0;
+  return fts3SetHasStat(p);
+}
+
+/*
+** Implementation of xCommit() method. This is a no-op. The contents of
+** the pending-terms hash-table have already been flushed into the database
+** by fts3SyncMethod().
+*/
+static int fts3CommitMethod(sqlite3_vtab *pVtab){
+  TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
+  UNUSED_PARAMETER(pVtab);
+  assert( p->nPendingData==0 );
+  assert( p->inTransaction!=0 );
+  assert( p->pSegments==0 );
+  TESTONLY( p->inTransaction = 0 );
+  TESTONLY( p->mxSavepoint = -1; );
+  return SQLITE_OK;
+}
+
+/*
+** Implementation of xRollback(). Discard the contents of the pending-terms
+** hash-table. Any changes made to the database are reverted by SQLite.
+*/
+static int fts3RollbackMethod(sqlite3_vtab *pVtab){
+  Fts3Table *p = (Fts3Table*)pVtab;
+  sqlite3Fts3PendingTermsClear(p);
+  assert( p->inTransaction!=0 );
+  TESTONLY( p->inTransaction = 0 );
+  TESTONLY( p->mxSavepoint = -1; );
+  return SQLITE_OK;
+}
+
+/*
+** When called, *ppPoslist must point to the byte immediately following the
+** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function
+** moves *ppPoslist so that it instead points to the first byte of the
+** same position list.
+*/
+static void fts3ReversePoslist(char *pStart, char **ppPoslist){
+  char *p = &(*ppPoslist)[-2];
+  char c = 0;
+
+  while( p>pStart && (c=*p--)==0 );
+  while( p>pStart && (*p & 0x80) | c ){ 
+    c = *p--; 
+  }
+  if( p>pStart ){ p = &p[2]; }
+  while( *p++&0x80 );
+  *ppPoslist = p;
+}
+
+/*
+** Helper function used by the implementation of the overloaded snippet(),
+** offsets() and optimize() SQL functions.
+**
+** If the value passed as the third argument is a blob of size
+** sizeof(Fts3Cursor*), then the blob contents are copied to the 
+** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error
+** message is written to context pContext and SQLITE_ERROR returned. The
+** string passed via zFunc is used as part of the error message.
+*/
+static int fts3FunctionArg(
+  sqlite3_context *pContext,      /* SQL function call context */
+  const char *zFunc,              /* Function name */
+  sqlite3_value *pVal,            /* argv[0] passed to function */
+  Fts3Cursor **ppCsr              /* OUT: Store cursor handle here */
+){
+  Fts3Cursor *pRet;
+  if( sqlite3_value_type(pVal)!=SQLITE_BLOB 
+   || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *)
+  ){
+    char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc);
+    sqlite3_result_error(pContext, zErr, -1);
+    sqlite3_free(zErr);
+    return SQLITE_ERROR;
+  }
+  memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *));
+  *ppCsr = pRet;
+  return SQLITE_OK;
+}
+
+/*
+** Implementation of the snippet() function for FTS3
+*/
+static void fts3SnippetFunc(
+  sqlite3_context *pContext,      /* SQLite function call context */
+  int nVal,                       /* Size of apVal[] array */
+  sqlite3_value **apVal           /* Array of arguments */
+){
+  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
+  const char *zStart = "<b>";
+  const char *zEnd = "</b>";
+  const char *zEllipsis = "<b>...</b>";
+  int iCol = -1;
+  int nToken = 15;                /* Default number of tokens in snippet */
+
+  /* There must be at least one argument passed to this function (otherwise
+  ** the non-overloaded version would have been called instead of this one).
+  */
+  assert( nVal>=1 );
+
+  if( nVal>6 ){
+    sqlite3_result_error(pContext, 
+        "wrong number of arguments to function snippet()", -1);
+    return;
+  }
+  if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return;
+
+  switch( nVal ){
+    case 6: nToken = sqlite3_value_int(apVal[5]);
+    case 5: iCol = sqlite3_value_int(apVal[4]);
+    case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]);
+    case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]);
+    case 2: zStart = (const char*)sqlite3_value_text(apVal[1]);
+  }
+  if( !zEllipsis || !zEnd || !zStart ){
+    sqlite3_result_error_nomem(pContext);
+  }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
+    sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
+  }
+}
+
+/*
+** Implementation of the offsets() function for FTS3
+*/
+static void fts3OffsetsFunc(
+  sqlite3_context *pContext,      /* SQLite function call context */
+  int nVal,                       /* Size of argument array */
+  sqlite3_value **apVal           /* Array of arguments */
+){
+  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
+
+  UNUSED_PARAMETER(nVal);
+
+  assert( nVal==1 );
+  if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return;
+  assert( pCsr );
+  if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
+    sqlite3Fts3Offsets(pContext, pCsr);
+  }
+}
+
+/* 
+** Implementation of the special optimize() function for FTS3. This 
+** function merges all segments in the database to a single segment.
+** Example usage is:
+**
+**   SELECT optimize(t) FROM t LIMIT 1;
+**
+** where 't' is the name of an FTS3 table.
+*/
+static void fts3OptimizeFunc(
+  sqlite3_context *pContext,      /* SQLite function call context */
+  int nVal,                       /* Size of argument array */
+  sqlite3_value **apVal           /* Array of arguments */
+){
+  int rc;                         /* Return code */
+  Fts3Table *p;                   /* Virtual table handle */
+  Fts3Cursor *pCursor;            /* Cursor handle passed through apVal[0] */
+
+  UNUSED_PARAMETER(nVal);
+
+  assert( nVal==1 );
+  if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return;
+  p = (Fts3Table *)pCursor->base.pVtab;
+  assert( p );
+
+  rc = sqlite3Fts3Optimize(p);
+
+  switch( rc ){
+    case SQLITE_OK:
+      sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
+      break;
+    case SQLITE_DONE:
+      sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC);
+      break;
+    default:
+      sqlite3_result_error_code(pContext, rc);
+      break;
+  }
+}
+
+/*
+** Implementation of the matchinfo() function for FTS3
+*/
+static void fts3MatchinfoFunc(
+  sqlite3_context *pContext,      /* SQLite function call context */
+  int nVal,                       /* Size of argument array */
+  sqlite3_value **apVal           /* Array of arguments */
+){
+  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
+  assert( nVal==1 || nVal==2 );
+  if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){
+    const char *zArg = 0;
+    if( nVal>1 ){
+      zArg = (const char *)sqlite3_value_text(apVal[1]);
+    }
+    sqlite3Fts3Matchinfo(pContext, pCsr, zArg);
+  }
+}
+
+/*
+** This routine implements the xFindFunction method for the FTS3
+** virtual table.
+*/
+static int fts3FindFunctionMethod(
+  sqlite3_vtab *pVtab,            /* Virtual table handle */
+  int nArg,                       /* Number of SQL function arguments */
+  const char *zName,              /* Name of SQL function */
+  void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
+  void **ppArg                    /* Unused */
+){
+  struct Overloaded {
+    const char *zName;
+    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
+  } aOverload[] = {
+    { "snippet", fts3SnippetFunc },
+    { "offsets", fts3OffsetsFunc },
+    { "optimize", fts3OptimizeFunc },
+    { "matchinfo", fts3MatchinfoFunc },
+  };
+  int i;                          /* Iterator variable */
+
+  UNUSED_PARAMETER(pVtab);
+  UNUSED_PARAMETER(nArg);
+  UNUSED_PARAMETER(ppArg);
+
+  for(i=0; i<SizeofArray(aOverload); i++){
+    if( strcmp(zName, aOverload[i].zName)==0 ){
+      *pxFunc = aOverload[i].xFunc;
+      return 1;
+    }
+  }
+
+  /* No function of the specified name was found. Return 0. */
+  return 0;
+}
+
+/*
+** Implementation of FTS3 xRename method. Rename an fts3 table.
+*/
+static int fts3RenameMethod(
+  sqlite3_vtab *pVtab,            /* Virtual table handle */
+  const char *zName               /* New name of table */
+){
+  Fts3Table *p = (Fts3Table *)pVtab;
+  sqlite3 *db = p->db;            /* Database connection */
+  int rc;                         /* Return Code */
+
+  /* At this point it must be known if the %_stat table exists or not.
+  ** So bHasStat may not be 2.  */
+  rc = fts3SetHasStat(p);
+  
+  /* As it happens, the pending terms table is always empty here. This is
+  ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction 
+  ** always opens a savepoint transaction. And the xSavepoint() method 
+  ** flushes the pending terms table. But leave the (no-op) call to
+  ** PendingTermsFlush() in in case that changes.
+  */
+  assert( p->nPendingData==0 );
+  if( rc==SQLITE_OK ){
+    rc = sqlite3Fts3PendingTermsFlush(p);
+  }
+
+  if( p->zContentTbl==0 ){
+    fts3DbExec(&rc, db,
+      "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';",
+      p->zDb, p->zName, zName
+    );
+  }
+
+  if( p->bHasDocsize ){
+    fts3DbExec(&rc, db,
+      "ALTER TABLE %Q.'%q_docsize'  RENAME TO '%q_docsize';",
+      p->zDb, p->zName, zName
+    );
+  }
+  if( p->bHasStat ){
+    fts3DbExec(&rc, db,
+      "ALTER TABLE %Q.'%q_stat'  RENAME TO '%q_stat';",
+      p->zDb, p->zName, zName
+    );
+  }
+  fts3DbExec(&rc, db,
+    "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';",
+    p->zDb, p->zName, zName
+  );
+  fts3DbExec(&rc, db,
+    "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';",
+    p->zDb, p->zName, zName
+  );
+  return rc;
+}
+
+/*
+** The xSavepoint() method.
+**
+** Flush the contents of the pending-terms table to disk.
+*/
+static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
+  int rc = SQLITE_OK;
+  UNUSED_PARAMETER(iSavepoint);
+  assert( ((Fts3Table *)pVtab)->inTransaction );
+  assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
+  TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
+  if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){
+    rc = fts3SyncMethod(pVtab);
+  }
+  return rc;
+}
+
+/*
+** The xRelease() method.
+**
+** This is a no-op.
+*/
+static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
+  TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
+  UNUSED_PARAMETER(iSavepoint);
+  UNUSED_PARAMETER(pVtab);
+  assert( p->inTransaction );
+  assert( p->mxSavepoint >= iSavepoint );
+  TESTONLY( p->mxSavepoint = iSavepoint-1 );
+  return SQLITE_OK;
+}
+
+/*
+** The xRollbackTo() method.
+**
+** Discard the contents of the pending terms table.
+*/
+static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
+  Fts3Table *p = (Fts3Table*)pVtab;
+  UNUSED_PARAMETER(iSavepoint);
+  assert( p->inTransaction );
+  assert( p->mxSavepoint >= iSavepoint );
+  TESTONLY( p->mxSavepoint = iSavepoint );
+  sqlite3Fts3PendingTermsClear(p);
+  return SQLITE_OK;
+}
+
+static const sqlite3_module fts3Module = {
+  /* iVersion      */ 2,
+  /* xCreate       */ fts3CreateMethod,
+  /* xConnect      */ fts3ConnectMethod,
+  /* xBestIndex    */ fts3BestIndexMethod,
+  /* xDisconnect   */ fts3DisconnectMethod,
+  /* xDestroy      */ fts3DestroyMethod,
+  /* xOpen         */ fts3OpenMethod,
+  /* xClose        */ fts3CloseMethod,
+  /* xFilter       */ fts3FilterMethod,
+  /* xNext         */ fts3NextMethod,
+  /* xEof          */ fts3EofMethod,
+  /* xColumn       */ fts3ColumnMethod,
+  /* xRowid        */ fts3RowidMethod,
+  /* xUpdate       */ fts3UpdateMethod,
+  /* xBegin        */ fts3BeginMethod,
+  /* xSync         */ fts3SyncMethod,
+  /* xCommit       */ fts3CommitMethod,
+  /* xRollback     */ fts3RollbackMethod,
+  /* xFindFunction */ fts3FindFunctionMethod,
+  /* xRename */       fts3RenameMethod,
+  /* xSavepoint    */ fts3SavepointMethod,
+  /* xRelease      */ fts3ReleaseMethod,
+  /* xRollbackTo   */ fts3RollbackToMethod,
+};
+
+/*
+** This function is registered as the module destructor (called when an
+** FTS3 enabled database connection is closed). It frees the memory
+** allocated for the tokenizer hash table.
+*/
+static void hashDestroy(void *p){
+  Fts3Hash *pHash = (Fts3Hash *)p;
+  sqlite3Fts3HashClear(pHash);
+  sqlite3_free(pHash);
+}
+
+/*
+** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are 
+** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c
+** respectively. The following three forward declarations are for functions
+** declared in these files used to retrieve the respective implementations.
+**
+** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
+** to by the argument to point to the "simple" tokenizer implementation.
+** And so on.
+*/
+SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
+#endif
+#ifdef SQLITE_ENABLE_ICU
+SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+#endif
+
+/*
+** Initialize the fts3 extension. If this extension is built as part
+** of the sqlite library, then this function is called directly by
+** SQLite. If fts3 is built as a dynamically loadable extension, this
+** function is called by the sqlite3_extension_init() entry point.
+*/
+SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
+  int rc = SQLITE_OK;
+  Fts3Hash *pHash = 0;
+  const sqlite3_tokenizer_module *pSimple = 0;
+  const sqlite3_tokenizer_module *pPorter = 0;
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+  const sqlite3_tokenizer_module *pUnicode = 0;
+#endif
+
+#ifdef SQLITE_ENABLE_ICU
+  const sqlite3_tokenizer_module *pIcu = 0;
+  sqlite3Fts3IcuTokenizerModule(&pIcu);
+#endif
+
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+  sqlite3Fts3UnicodeTokenizer(&pUnicode);
+#endif
+
+#ifdef SQLITE_TEST
+  rc = sqlite3Fts3InitTerm(db);
+  if( rc!=SQLITE_OK ) return rc;
+#endif
+
+  rc = sqlite3Fts3InitAux(db);
+  if( rc!=SQLITE_OK ) return rc;
+
+  sqlite3Fts3SimpleTokenizerModule(&pSimple);
+  sqlite3Fts3PorterTokenizerModule(&pPorter);
+
+  /* Allocate and initialize the hash-table used to store tokenizers. */
+  pHash = sqlite3_malloc(sizeof(Fts3Hash));
+  if( !pHash ){
+    rc = SQLITE_NOMEM;
+  }else{
+    sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
+  }
+
+  /* Load the built-in tokenizers into the hash table */
+  if( rc==SQLITE_OK ){
+    if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
+     || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) 
+
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+     || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) 
+#endif
+#ifdef SQLITE_ENABLE_ICU
+     || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
+#endif
+    ){
+      rc = SQLITE_NOMEM;
+    }
+  }
+
+#ifdef SQLITE_TEST
+  if( rc==SQLITE_OK ){
+    rc = sqlite3Fts3ExprInitTestInterface(db);
+  }
+#endif
+
+  /* Create the virtual table wrapper around the hash-table and overload 
+  ** the two scalar functions. If this is successful, register the
+  ** module with sqlite.
+  */
+  if( SQLITE_OK==rc 
+   && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
+   && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
+   && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
+   && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1))
+   && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2))
+   && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
+  ){
+    rc = sqlite3_create_module_v2(
+        db, "fts3", &fts3Module, (void *)pHash, hashDestroy
+    );
+    if( rc==SQLITE_OK ){
+      rc = sqlite3_create_module_v2(
+          db, "fts4", &fts3Module, (void *)pHash, 0
+      );
+    }
+    if( rc==SQLITE_OK ){
+      rc = sqlite3Fts3InitTok(db, (void *)pHash);
+    }
+    return rc;
+  }
+
+
+  /* An error has occurred. Delete the hash table and return the error code. */
+  assert( rc!=SQLITE_OK );
+  if( pHash ){
+    sqlite3Fts3HashClear(pHash);
+    sqlite3_free(pHash);
+  }
+  return rc;
+}
+
+/*
+** Allocate an Fts3MultiSegReader for each token in the expression headed
+** by pExpr. 
+**
+** An Fts3SegReader object is a cursor that can seek or scan a range of
+** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple
+** Fts3SegReader objects internally to provide an interface to seek or scan
+** within the union of all segments of a b-tree. Hence the name.
+**
+** If the allocated Fts3MultiSegReader just seeks to a single entry in a
+** segment b-tree (if the term is not a prefix or it is a prefix for which
+** there exists prefix b-tree of the right length) then it may be traversed
+** and merged incrementally. Otherwise, it has to be merged into an in-memory 
+** doclist and then traversed.
+*/
+static void fts3EvalAllocateReaders(
+  Fts3Cursor *pCsr,               /* FTS cursor handle */
+  Fts3Expr *pExpr,                /* Allocate readers for this expression */
+  int *pnToken,                   /* OUT: Total number of tokens in phrase. */
+  int *pnOr,                      /* OUT: Total number of OR nodes in expr. */
+  int *pRc                        /* IN/OUT: Error code */
+){
+  if( pExpr && SQLITE_OK==*pRc ){
+    if( pExpr->eType==FTSQUERY_PHRASE ){
+      int i;
+      int nToken = pExpr->pPhrase->nToken;
+      *pnToken += nToken;
+      for(i=0; i<nToken; i++){
+        Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
+        int rc = fts3TermSegReaderCursor(pCsr, 
+            pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
+        );
+        if( rc!=SQLITE_OK ){
+          *pRc = rc;
+          return;
+        }
+      }
+      assert( pExpr->pPhrase->iDoclistToken==0 );
+      pExpr->pPhrase->iDoclistToken = -1;
+    }else{
+      *pnOr += (pExpr->eType==FTSQUERY_OR);
+      fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
+      fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
+    }
+  }
+}
+
+/*
+** Arguments pList/nList contain the doclist for token iToken of phrase p.
+** It is merged into the main doclist stored in p->doclist.aAll/nAll.
+**
+** This function assumes that pList points to a buffer allocated using
+** sqlite3_malloc(). This function takes responsibility for eventually
+** freeing the buffer.
+*/
+static void fts3EvalPhraseMergeToken(
+  Fts3Table *pTab,                /* FTS Table pointer */
+  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
+  int iToken,                     /* Token pList/nList corresponds to */
+  char *pList,                    /* Pointer to doclist */
+  int nList                       /* Number of bytes in pList */
+){
+  assert( iToken!=p->iDoclistToken );
+
+  if( pList==0 ){
+    sqlite3_free(p->doclist.aAll);
+    p->doclist.aAll = 0;
+    p->doclist.nAll = 0;
+  }
+
+  else if( p->iDoclistToken<0 ){
+    p->doclist.aAll = pList;
+    p->doclist.nAll = nList;
+  }
+
+  else if( p->doclist.aAll==0 ){
+    sqlite3_free(pList);
+  }
+
+  else {
+    char *pLeft;
+    char *pRight;
+    int nLeft;
+    int nRight;
+    int nDiff;
+
+    if( p->iDoclistToken<iToken ){
+      pLeft = p->doclist.aAll;
+      nLeft = p->doclist.nAll;
+      pRight = pList;
+      nRight = nList;
+      nDiff = iToken - p->iDoclistToken;
+    }else{
+      pRight = p->doclist.aAll;
+      nRight = p->doclist.nAll;
+      pLeft = pList;
+      nLeft = nList;
+      nDiff = p->iDoclistToken - iToken;
+    }
+
+    fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
+    sqlite3_free(pLeft);
+    p->doclist.aAll = pRight;
+    p->doclist.nAll = nRight;
+  }
+
+  if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
+}
+
+/*
+** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
+** does not take deferred tokens into account.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
+static int fts3EvalPhraseLoad(
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Phrase *p                   /* Phrase object */
+){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int iToken;
+  int rc = SQLITE_OK;
+
+  for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
+    Fts3PhraseToken *pToken = &p->aToken[iToken];
+    assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
+
+    if( pToken->pSegcsr ){
+      int nThis = 0;
+      char *pThis = 0;
+      rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
+      if( rc==SQLITE_OK ){
+        fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
+      }
+    }
+    assert( pToken->pSegcsr==0 );
+  }
+
+  return rc;
+}
+
+/*
+** This function is called on each phrase after the position lists for
+** any deferred tokens have been loaded into memory. It updates the phrases
+** current position list to include only those positions that are really
+** instances of the phrase (after considering deferred tokens). If this
+** means that the phrase does not appear in the current row, doclist.pList
+** and doclist.nList are both zeroed.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
+static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
+  int iToken;                     /* Used to iterate through phrase tokens */
+  char *aPoslist = 0;             /* Position list for deferred tokens */
+  int nPoslist = 0;               /* Number of bytes in aPoslist */
+  int iPrev = -1;                 /* Token number of previous deferred token */
+
+  assert( pPhrase->doclist.bFreeList==0 );
+
+  for(iToken=0; iToken<pPhrase->nToken; iToken++){
+    Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
+    Fts3DeferredToken *pDeferred = pToken->pDeferred;
+
+    if( pDeferred ){
+      char *pList;
+      int nList;
+      int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
+      if( rc!=SQLITE_OK ) return rc;
+
+      if( pList==0 ){
+        sqlite3_free(aPoslist);
+        pPhrase->doclist.pList = 0;
+        pPhrase->doclist.nList = 0;
+        return SQLITE_OK;
+
+      }else if( aPoslist==0 ){
+        aPoslist = pList;
+        nPoslist = nList;
+
+      }else{
+        char *aOut = pList;
+        char *p1 = aPoslist;
+        char *p2 = aOut;
+
+        assert( iPrev>=0 );
+        fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2);
+        sqlite3_free(aPoslist);
+        aPoslist = pList;
+        nPoslist = (int)(aOut - aPoslist);
+        if( nPoslist==0 ){
+          sqlite3_free(aPoslist);
+          pPhrase->doclist.pList = 0;
+          pPhrase->doclist.nList = 0;
+          return SQLITE_OK;
+        }
+      }
+      iPrev = iToken;
+    }
+  }
+
+  if( iPrev>=0 ){
+    int nMaxUndeferred = pPhrase->iDoclistToken;
+    if( nMaxUndeferred<0 ){
+      pPhrase->doclist.pList = aPoslist;
+      pPhrase->doclist.nList = nPoslist;
+      pPhrase->doclist.iDocid = pCsr->iPrevId;
+      pPhrase->doclist.bFreeList = 1;
+    }else{
+      int nDistance;
+      char *p1;
+      char *p2;
+      char *aOut;
+
+      if( nMaxUndeferred>iPrev ){
+        p1 = aPoslist;
+        p2 = pPhrase->doclist.pList;
+        nDistance = nMaxUndeferred - iPrev;
+      }else{
+        p1 = pPhrase->doclist.pList;
+        p2 = aPoslist;
+        nDistance = iPrev - nMaxUndeferred;
+      }
+
+      aOut = (char *)sqlite3_malloc(nPoslist+8);
+      if( !aOut ){
+        sqlite3_free(aPoslist);
+        return SQLITE_NOMEM;
+      }
+      
+      pPhrase->doclist.pList = aOut;
+      if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){
+        pPhrase->doclist.bFreeList = 1;
+        pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList);
+      }else{
+        sqlite3_free(aOut);
+        pPhrase->doclist.pList = 0;
+        pPhrase->doclist.nList = 0;
+      }
+      sqlite3_free(aPoslist);
+    }
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Maximum number of tokens a phrase may have to be considered for the
+** incremental doclists strategy.
+*/
+#define MAX_INCR_PHRASE_TOKENS 4
+
+/*
+** This function is called for each Fts3Phrase in a full-text query 
+** expression to initialize the mechanism for returning rows. Once this
+** function has been called successfully on an Fts3Phrase, it may be
+** used with fts3EvalPhraseNext() to iterate through the matching docids.
+**
+** If parameter bOptOk is true, then the phrase may (or may not) use the
+** incremental loading strategy. Otherwise, the entire doclist is loaded into
+** memory within this call.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
+static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int rc = SQLITE_OK;             /* Error code */
+  int i;
+
+  /* Determine if doclists may be loaded from disk incrementally. This is
+  ** possible if the bOptOk argument is true, the FTS doclists will be
+  ** scanned in forward order, and the phrase consists of 
+  ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
+  ** tokens or prefix tokens that cannot use a prefix-index.  */
+  int bHaveIncr = 0;
+  int bIncrOk = (bOptOk 
+   && pCsr->bDesc==pTab->bDescIdx 
+   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
+   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
+#ifdef SQLITE_TEST
+   && pTab->bNoIncrDoclist==0
+#endif
+  );
+  for(i=0; bIncrOk==1 && i<p->nToken; i++){
+    Fts3PhraseToken *pToken = &p->aToken[i];
+    if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){
+      bIncrOk = 0;
+    }
+    if( pToken->pSegcsr ) bHaveIncr = 1;
+  }
+
+  if( bIncrOk && bHaveIncr ){
+    /* Use the incremental approach. */
+    int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
+    for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
+      Fts3PhraseToken *pToken = &p->aToken[i];
+      Fts3MultiSegReader *pSegcsr = pToken->pSegcsr;
+      if( pSegcsr ){
+        rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n);
+      }
+    }
+    p->bIncr = 1;
+  }else{
+    /* Load the full doclist for the phrase into memory. */
+    rc = fts3EvalPhraseLoad(pCsr, p);
+    p->bIncr = 0;
+  }
+
+  assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
+  return rc;
+}
+
+/*
+** This function is used to iterate backwards (from the end to start) 
+** through doclists. It is used by this module to iterate through phrase
+** doclists in reverse and by the fts3_write.c module to iterate through
+** pending-terms lists when writing to databases with "order=desc".
+**
+** The doclist may be sorted in ascending (parameter bDescIdx==0) or 
+** descending (parameter bDescIdx==1) order of docid. Regardless, this
+** function iterates from the end of the doclist to the beginning.
+*/
+SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(
+  int bDescIdx,                   /* True if the doclist is desc */
+  char *aDoclist,                 /* Pointer to entire doclist */
+  int nDoclist,                   /* Length of aDoclist in bytes */
+  char **ppIter,                  /* IN/OUT: Iterator pointer */
+  sqlite3_int64 *piDocid,         /* IN/OUT: Docid pointer */
+  int *pnList,                    /* OUT: List length pointer */
+  u8 *pbEof                       /* OUT: End-of-file flag */
+){
+  char *p = *ppIter;
+
+  assert( nDoclist>0 );
+  assert( *pbEof==0 );
+  assert( p || *piDocid==0 );
+  assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) );
+
+  if( p==0 ){
+    sqlite3_int64 iDocid = 0;
+    char *pNext = 0;
+    char *pDocid = aDoclist;
+    char *pEnd = &aDoclist[nDoclist];
+    int iMul = 1;
+
+    while( pDocid<pEnd ){
+      sqlite3_int64 iDelta;
+      pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta);
+      iDocid += (iMul * iDelta);
+      pNext = pDocid;
+      fts3PoslistCopy(0, &pDocid);
+      while( pDocid<pEnd && *pDocid==0 ) pDocid++;
+      iMul = (bDescIdx ? -1 : 1);
+    }
+
+    *pnList = (int)(pEnd - pNext);
+    *ppIter = pNext;
+    *piDocid = iDocid;
+  }else{
+    int iMul = (bDescIdx ? -1 : 1);
+    sqlite3_int64 iDelta;
+    fts3GetReverseVarint(&p, aDoclist, &iDelta);
+    *piDocid -= (iMul * iDelta);
+
+    if( p==aDoclist ){
+      *pbEof = 1;
+    }else{
+      char *pSave = p;
+      fts3ReversePoslist(aDoclist, &p);
+      *pnList = (int)(pSave - p);
+    }
+    *ppIter = p;
+  }
+}
+
+/*
+** Iterate forwards through a doclist.
+*/
+SQLITE_PRIVATE void sqlite3Fts3DoclistNext(
+  int bDescIdx,                   /* True if the doclist is desc */
+  char *aDoclist,                 /* Pointer to entire doclist */
+  int nDoclist,                   /* Length of aDoclist in bytes */
+  char **ppIter,                  /* IN/OUT: Iterator pointer */
+  sqlite3_int64 *piDocid,         /* IN/OUT: Docid pointer */
+  u8 *pbEof                       /* OUT: End-of-file flag */
+){
+  char *p = *ppIter;
+
+  assert( nDoclist>0 );
+  assert( *pbEof==0 );
+  assert( p || *piDocid==0 );
+  assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) );
+
+  if( p==0 ){
+    p = aDoclist;
+    p += sqlite3Fts3GetVarint(p, piDocid);
+  }else{
+    fts3PoslistCopy(0, &p);
+    if( p>=&aDoclist[nDoclist] ){
+      *pbEof = 1;
+    }else{
+      sqlite3_int64 iVar;
+      p += sqlite3Fts3GetVarint(p, &iVar);
+      *piDocid += ((bDescIdx ? -1 : 1) * iVar);
+    }
+  }
+
+  *ppIter = p;
+}
+
+/*
+** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof
+** to true if EOF is reached.
+*/
+static void fts3EvalDlPhraseNext(
+  Fts3Table *pTab,
+  Fts3Doclist *pDL,
+  u8 *pbEof
+){
+  char *pIter;                            /* Used to iterate through aAll */
+  char *pEnd = &pDL->aAll[pDL->nAll];     /* 1 byte past end of aAll */
+ 
+  if( pDL->pNextDocid ){
+    pIter = pDL->pNextDocid;
+  }else{
+    pIter = pDL->aAll;
+  }
+
+  if( pIter>=pEnd ){
+    /* We have already reached the end of this doclist. EOF. */
+    *pbEof = 1;
+  }else{
+    sqlite3_int64 iDelta;
+    pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
+    if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
+      pDL->iDocid += iDelta;
+    }else{
+      pDL->iDocid -= iDelta;
+    }
+    pDL->pList = pIter;
+    fts3PoslistCopy(0, &pIter);
+    pDL->nList = (int)(pIter - pDL->pList);
+
+    /* pIter now points just past the 0x00 that terminates the position-
+    ** list for document pDL->iDocid. However, if this position-list was
+    ** edited in place by fts3EvalNearTrim(), then pIter may not actually
+    ** point to the start of the next docid value. The following line deals
+    ** with this case by advancing pIter past the zero-padding added by
+    ** fts3EvalNearTrim().  */
+    while( pIter<pEnd && *pIter==0 ) pIter++;
+
+    pDL->pNextDocid = pIter;
+    assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
+    *pbEof = 0;
+  }
+}
+
+/*
+** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
+*/
+typedef struct TokenDoclist TokenDoclist;
+struct TokenDoclist {
+  int bIgnore;
+  sqlite3_int64 iDocid;
+  char *pList;
+  int nList;
+};
+
+/*
+** Token pToken is an incrementally loaded token that is part of a 
+** multi-token phrase. Advance it to the next matching document in the
+** database and populate output variable *p with the details of the new
+** entry. Or, if the iterator has reached EOF, set *pbEof to true.
+**
+** If an error occurs, return an SQLite error code. Otherwise, return 
+** SQLITE_OK.
+*/
+static int incrPhraseTokenNext(
+  Fts3Table *pTab,                /* Virtual table handle */
+  Fts3Phrase *pPhrase,            /* Phrase to advance token of */
+  int iToken,                     /* Specific token to advance */
+  TokenDoclist *p,                /* OUT: Docid and doclist for new entry */
+  u8 *pbEof                       /* OUT: True if iterator is at EOF */
+){
+  int rc = SQLITE_OK;
+
+  if( pPhrase->iDoclistToken==iToken ){
+    assert( p->bIgnore==0 );
+    assert( pPhrase->aToken[iToken].pSegcsr==0 );
+    fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof);
+    p->pList = pPhrase->doclist.pList;
+    p->nList = pPhrase->doclist.nList;
+    p->iDocid = pPhrase->doclist.iDocid;
+  }else{
+    Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
+    assert( pToken->pDeferred==0 );
+    assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 );
+    if( pToken->pSegcsr ){
+      assert( p->bIgnore==0 );
+      rc = sqlite3Fts3MsrIncrNext(
+          pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
+      );
+      if( p->pList==0 ) *pbEof = 1;
+    }else{
+      p->bIgnore = 1;
+    }
+  }
+
+  return rc;
+}
+
+
+/*
+** The phrase iterator passed as the second argument:
+**
+**   * features at least one token that uses an incremental doclist, and 
+**
+**   * does not contain any deferred tokens.
+**
+** Advance it to the next matching documnent in the database and populate
+** the Fts3Doclist.pList and nList fields. 
+**
+** If there is no "next" entry and no error occurs, then *pbEof is set to
+** 1 before returning. Otherwise, if no error occurs and the iterator is
+** successfully advanced, *pbEof is set to 0.
+**
+** If an error occurs, return an SQLite error code. Otherwise, return 
+** SQLITE_OK.
+*/
+static int fts3EvalIncrPhraseNext(
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
+  u8 *pbEof                       /* OUT: Set to 1 if EOF */
+){
+  int rc = SQLITE_OK;
+  Fts3Doclist *pDL = &p->doclist;
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  u8 bEof = 0;
+
+  /* This is only called if it is guaranteed that the phrase has at least
+  ** one incremental token. In which case the bIncr flag is set. */
+  assert( p->bIncr==1 );
+
+  if( p->nToken==1 && p->bIncr ){
+    rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
+        &pDL->iDocid, &pDL->pList, &pDL->nList
+    );
+    if( pDL->pList==0 ) bEof = 1;
+  }else{
+    int bDescDoclist = pCsr->bDesc;
+    struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];
+
+    memset(a, 0, sizeof(a));
+    assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );
+    assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS );
+
+    while( bEof==0 ){
+      int bMaxSet = 0;
+      sqlite3_int64 iMax = 0;     /* Largest docid for all iterators */
+      int i;                      /* Used to iterate through tokens */
+
+      /* Advance the iterator for each token in the phrase once. */
+      for(i=0; rc==SQLITE_OK && i<p->nToken && bEof==0; i++){
+        rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
+        if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){
+          iMax = a[i].iDocid;
+          bMaxSet = 1;
+        }
+      }
+      assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) );
+      assert( rc!=SQLITE_OK || bMaxSet );
+
+      /* Keep advancing iterators until they all point to the same document */
+      for(i=0; i<p->nToken; i++){
+        while( rc==SQLITE_OK && bEof==0 
+            && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 
+        ){
+          rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
+          if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
+            iMax = a[i].iDocid;
+            i = 0;
+          }
+        }
+      }
+
+      /* Check if the current entries really are a phrase match */
+      if( bEof==0 ){
+        int nList = 0;
+        int nByte = a[p->nToken-1].nList;
+        char *aDoclist = sqlite3_malloc(nByte+1);
+        if( !aDoclist ) return SQLITE_NOMEM;
+        memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
+
+        for(i=0; i<(p->nToken-1); i++){
+          if( a[i].bIgnore==0 ){
+            char *pL = a[i].pList;
+            char *pR = aDoclist;
+            char *pOut = aDoclist;
+            int nDist = p->nToken-1-i;
+            int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR);
+            if( res==0 ) break;
+            nList = (int)(pOut - aDoclist);
+          }
+        }
+        if( i==(p->nToken-1) ){
+          pDL->iDocid = iMax;
+          pDL->pList = aDoclist;
+          pDL->nList = nList;
+          pDL->bFreeList = 1;
+          break;
+        }
+        sqlite3_free(aDoclist);
+      }
+    }
+  }
+
+  *pbEof = bEof;
+  return rc;
+}
+
+/*
+** Attempt to move the phrase iterator to point to the next matching docid. 
+** If an error occurs, return an SQLite error code. Otherwise, return 
+** SQLITE_OK.
+**
+** If there is no "next" entry and no error occurs, then *pbEof is set to
+** 1 before returning. Otherwise, if no error occurs and the iterator is
+** successfully advanced, *pbEof is set to 0.
+*/
+static int fts3EvalPhraseNext(
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
+  u8 *pbEof                       /* OUT: Set to 1 if EOF */
+){
+  int rc = SQLITE_OK;
+  Fts3Doclist *pDL = &p->doclist;
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+
+  if( p->bIncr ){
+    rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);
+  }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
+    sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, 
+        &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
+    );
+    pDL->pList = pDL->pNextDocid;
+  }else{
+    fts3EvalDlPhraseNext(pTab, pDL, pbEof);
+  }
+
+  return rc;
+}
+
+/*
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
+** expression. Also the Fts3Expr.bDeferred variable is set to true for any
+** expressions for which all descendent tokens are deferred.
+**
+** If parameter bOptOk is zero, then it is guaranteed that the
+** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
+** each phrase in the expression (subject to deferred token processing).
+** Or, if bOptOk is non-zero, then one or more tokens within the expression
+** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
+**
+** If an error occurs within this function, *pRc is set to an SQLite error
+** code before returning.
+*/
+static void fts3EvalStartReaders(
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Expr *pExpr,                /* Expression to initialize phrases in */
+  int *pRc                        /* IN/OUT: Error code */
+){
+  if( pExpr && SQLITE_OK==*pRc ){
+    if( pExpr->eType==FTSQUERY_PHRASE ){
+      int i;
+      int nToken = pExpr->pPhrase->nToken;
+      for(i=0; i<nToken; i++){
+        if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
+      }
+      pExpr->bDeferred = (i==nToken);
+      *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
+    }else{
+      fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
+      fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);
+      pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
+    }
+  }
+}
+
+/*
+** An array of the following structures is assembled as part of the process
+** of selecting tokens to defer before the query starts executing (as part
+** of the xFilter() method). There is one element in the array for each
+** token in the FTS expression.
+**
+** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
+** to phrases that are connected only by AND and NEAR operators (not OR or
+** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
+** separately. The root of a tokens AND/NEAR cluster is stored in 
+** Fts3TokenAndCost.pRoot.
+*/
+typedef struct Fts3TokenAndCost Fts3TokenAndCost;
+struct Fts3TokenAndCost {
+  Fts3Phrase *pPhrase;            /* The phrase the token belongs to */
+  int iToken;                     /* Position of token in phrase */
+  Fts3PhraseToken *pToken;        /* The token itself */
+  Fts3Expr *pRoot;                /* Root of NEAR/AND cluster */
+  int nOvfl;                      /* Number of overflow pages to load doclist */
+  int iCol;                       /* The column the token must match */
+};
+
+/*
+** This function is used to populate an allocated Fts3TokenAndCost array.
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, if an error occurs during execution, *pRc is set to an
+** SQLite error code.
+*/
+static void fts3EvalTokenCosts(
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Expr *pRoot,                /* Root of current AND/NEAR cluster */
+  Fts3Expr *pExpr,                /* Expression to consider */
+  Fts3TokenAndCost **ppTC,        /* Write new entries to *(*ppTC)++ */
+  Fts3Expr ***ppOr,               /* Write new OR root to *(*ppOr)++ */
+  int *pRc                        /* IN/OUT: Error code */
+){
+  if( *pRc==SQLITE_OK ){
+    if( pExpr->eType==FTSQUERY_PHRASE ){
+      Fts3Phrase *pPhrase = pExpr->pPhrase;
+      int i;
+      for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
+        Fts3TokenAndCost *pTC = (*ppTC)++;
+        pTC->pPhrase = pPhrase;
+        pTC->iToken = i;
+        pTC->pRoot = pRoot;
+        pTC->pToken = &pPhrase->aToken[i];
+        pTC->iCol = pPhrase->iColumn;
+        *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
+      }
+    }else if( pExpr->eType!=FTSQUERY_NOT ){
+      assert( pExpr->eType==FTSQUERY_OR
+           || pExpr->eType==FTSQUERY_AND
+           || pExpr->eType==FTSQUERY_NEAR
+      );
+      assert( pExpr->pLeft && pExpr->pRight );
+      if( pExpr->eType==FTSQUERY_OR ){
+        pRoot = pExpr->pLeft;
+        **ppOr = pRoot;
+        (*ppOr)++;
+      }
+      fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc);
+      if( pExpr->eType==FTSQUERY_OR ){
+        pRoot = pExpr->pRight;
+        **ppOr = pRoot;
+        (*ppOr)++;
+      }
+      fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
+    }
+  }
+}
+
+/*
+** Determine the average document (row) size in pages. If successful,
+** write this value to *pnPage and return SQLITE_OK. Otherwise, return
+** an SQLite error code.
+**
+** The average document size in pages is calculated by first calculating 
+** determining the average size in bytes, B. If B is less than the amount
+** of data that will fit on a single leaf page of an intkey table in
+** this database, then the average docsize is 1. Otherwise, it is 1 plus
+** the number of overflow pages consumed by a record B bytes in size.
+*/
+static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
+  if( pCsr->nRowAvg==0 ){
+    /* The average document size, which is required to calculate the cost
+    ** of each doclist, has not yet been determined. Read the required 
+    ** data from the %_stat table to calculate it.
+    **
+    ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 
+    ** varints, where nCol is the number of columns in the FTS3 table.
+    ** The first varint is the number of documents currently stored in
+    ** the table. The following nCol varints contain the total amount of
+    ** data stored in all rows of each column of the table, from left
+    ** to right.
+    */
+    int rc;
+    Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
+    sqlite3_stmt *pStmt;
+    sqlite3_int64 nDoc = 0;
+    sqlite3_int64 nByte = 0;
+    const char *pEnd;
+    const char *a;
+
+    rc = sqlite3Fts3SelectDoctotal(p, &pStmt);
+    if( rc!=SQLITE_OK ) return rc;
+    a = sqlite3_column_blob(pStmt, 0);
+    assert( a );
+
+    pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
+    a += sqlite3Fts3GetVarint(a, &nDoc);
+    while( a<pEnd ){
+      a += sqlite3Fts3GetVarint(a, &nByte);
+    }
+    if( nDoc==0 || nByte==0 ){
+      sqlite3_reset(pStmt);
+      return FTS_CORRUPT_VTAB;
+    }
+
+    pCsr->nDoc = nDoc;
+    pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz);
+    assert( pCsr->nRowAvg>0 ); 
+    rc = sqlite3_reset(pStmt);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+
+  *pnPage = pCsr->nRowAvg;
+  return SQLITE_OK;
+}
+
+/*
+** This function is called to select the tokens (if any) that will be 
+** deferred. The array aTC[] has already been populated when this is
+** called.
+**
+** This function is called once for each AND/NEAR cluster in the 
+** expression. Each invocation determines which tokens to defer within
+** the cluster with root node pRoot. See comments above the definition
+** of struct Fts3TokenAndCost for more details.
+**
+** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
+** called on each token to defer. Otherwise, an SQLite error code is
+** returned.
+*/
+static int fts3EvalSelectDeferred(
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Expr *pRoot,                /* Consider tokens with this root node */
+  Fts3TokenAndCost *aTC,          /* Array of expression tokens and costs */
+  int nTC                         /* Number of entries in aTC[] */
+){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int nDocSize = 0;               /* Number of pages per doc loaded */
+  int rc = SQLITE_OK;             /* Return code */
+  int ii;                         /* Iterator variable for various purposes */
+  int nOvfl = 0;                  /* Total overflow pages used by doclists */
+  int nToken = 0;                 /* Total number of tokens in cluster */
+
+  int nMinEst = 0;                /* The minimum count for any phrase so far. */
+  int nLoad4 = 1;                 /* (Phrases that will be loaded)^4. */
+
+  /* Tokens are never deferred for FTS tables created using the content=xxx
+  ** option. The reason being that it is not guaranteed that the content
+  ** table actually contains the same data as the index. To prevent this from
+  ** causing any problems, the deferred token optimization is completely
+  ** disabled for content=xxx tables. */
+  if( pTab->zContentTbl ){
+    return SQLITE_OK;
+  }
+
+  /* Count the tokens in this AND/NEAR cluster. If none of the doclists
+  ** associated with the tokens spill onto overflow pages, or if there is
+  ** only 1 token, exit early. No tokens to defer in this case. */
+  for(ii=0; ii<nTC; ii++){
+    if( aTC[ii].pRoot==pRoot ){
+      nOvfl += aTC[ii].nOvfl;
+      nToken++;
+    }
+  }
+  if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
+
+  /* Obtain the average docsize (in pages). */
+  rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
+  assert( rc!=SQLITE_OK || nDocSize>0 );
+
+
+  /* Iterate through all tokens in this AND/NEAR cluster, in ascending order 
+  ** of the number of overflow pages that will be loaded by the pager layer 
+  ** to retrieve the entire doclist for the token from the full-text index.
+  ** Load the doclists for tokens that are either:
+  **
+  **   a. The cheapest token in the entire query (i.e. the one visited by the
+  **      first iteration of this loop), or
+  **
+  **   b. Part of a multi-token phrase.
+  **
+  ** After each token doclist is loaded, merge it with the others from the
+  ** same phrase and count the number of documents that the merged doclist
+  ** contains. Set variable "nMinEst" to the smallest number of documents in 
+  ** any phrase doclist for which 1 or more token doclists have been loaded.
+  ** Let nOther be the number of other phrases for which it is certain that
+  ** one or more tokens will not be deferred.
+  **
+  ** Then, for each token, defer it if loading the doclist would result in
+  ** loading N or more overflow pages into memory, where N is computed as:
+  **
+  **    (nMinEst + 4^nOther - 1) / (4^nOther)
+  */
+  for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
+    int iTC;                      /* Used to iterate through aTC[] array. */
+    Fts3TokenAndCost *pTC = 0;    /* Set to cheapest remaining token. */
+
+    /* Set pTC to point to the cheapest remaining token. */
+    for(iTC=0; iTC<nTC; iTC++){
+      if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot 
+       && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl) 
+      ){
+        pTC = &aTC[iTC];
+      }
+    }
+    assert( pTC );
+
+    if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){
+      /* The number of overflow pages to load for this (and therefore all
+      ** subsequent) tokens is greater than the estimated number of pages 
+      ** that will be loaded if all subsequent tokens are deferred.
+      */
+      Fts3PhraseToken *pToken = pTC->pToken;
+      rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
+      fts3SegReaderCursorFree(pToken->pSegcsr);
+      pToken->pSegcsr = 0;
+    }else{
+      /* Set nLoad4 to the value of (4^nOther) for the next iteration of the
+      ** for-loop. Except, limit the value to 2^24 to prevent it from 
+      ** overflowing the 32-bit integer it is stored in. */
+      if( ii<12 ) nLoad4 = nLoad4*4;
+
+      if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){
+        /* Either this is the cheapest token in the entire query, or it is
+        ** part of a multi-token phrase. Either way, the entire doclist will
+        ** (eventually) be loaded into memory. It may as well be now. */
+        Fts3PhraseToken *pToken = pTC->pToken;
+        int nList = 0;
+        char *pList = 0;
+        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
+        assert( rc==SQLITE_OK || pList==0 );
+        if( rc==SQLITE_OK ){
+          int nCount;
+          fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
+          nCount = fts3DoclistCountDocids(
+              pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
+          );
+          if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
+        }
+      }
+    }
+    pTC->pToken = 0;
+  }
+
+  return rc;
+}
+
+/*
+** This function is called from within the xFilter method. It initializes
+** the full-text query currently stored in pCsr->pExpr. To iterate through
+** the results of a query, the caller does:
+**
+**    fts3EvalStart(pCsr);
+**    while( 1 ){
+**      fts3EvalNext(pCsr);
+**      if( pCsr->bEof ) break;
+**      ... return row pCsr->iPrevId to the caller ...
+**    }
+*/
+static int fts3EvalStart(Fts3Cursor *pCsr){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int rc = SQLITE_OK;
+  int nToken = 0;
+  int nOr = 0;
+
+  /* Allocate a MultiSegReader for each token in the expression. */
+  fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
+
+  /* Determine which, if any, tokens in the expression should be deferred. */
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
+  if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){
+    Fts3TokenAndCost *aTC;
+    Fts3Expr **apOr;
+    aTC = (Fts3TokenAndCost *)sqlite3_malloc(
+        sizeof(Fts3TokenAndCost) * nToken
+      + sizeof(Fts3Expr *) * nOr * 2
+    );
+    apOr = (Fts3Expr **)&aTC[nToken];
+
+    if( !aTC ){
+      rc = SQLITE_NOMEM;
+    }else{
+      int ii;
+      Fts3TokenAndCost *pTC = aTC;
+      Fts3Expr **ppOr = apOr;
+
+      fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
+      nToken = (int)(pTC-aTC);
+      nOr = (int)(ppOr-apOr);
+
+      if( rc==SQLITE_OK ){
+        rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
+        for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
+          rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
+        }
+      }
+
+      sqlite3_free(aTC);
+    }
+  }
+#endif
+
+  fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);
+  return rc;
+}
+
+/*
+** Invalidate the current position list for phrase pPhrase.
+*/
+static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
+  if( pPhrase->doclist.bFreeList ){
+    sqlite3_free(pPhrase->doclist.pList);
+  }
+  pPhrase->doclist.pList = 0;
+  pPhrase->doclist.nList = 0;
+  pPhrase->doclist.bFreeList = 0;
+}
+
+/*
+** This function is called to edit the position list associated with
+** the phrase object passed as the fifth argument according to a NEAR
+** condition. For example:
+**
+**     abc NEAR/5 "def ghi"
+**
+** Parameter nNear is passed the NEAR distance of the expression (5 in
+** the example above). When this function is called, *paPoslist points to
+** the position list, and *pnToken is the number of phrase tokens in, the
+** phrase on the other side of the NEAR operator to pPhrase. For example,
+** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
+** the position list associated with phrase "abc".
+**
+** All positions in the pPhrase position list that are not sufficiently
+** close to a position in the *paPoslist position list are removed. If this
+** leaves 0 positions, zero is returned. Otherwise, non-zero.
+**
+** Before returning, *paPoslist is set to point to the position lsit 
+** associated with pPhrase. And *pnToken is set to the number of tokens in
+** pPhrase.
+*/
+static int fts3EvalNearTrim(
+  int nNear,                      /* NEAR distance. As in "NEAR/nNear". */
+  char *aTmp,                     /* Temporary space to use */
+  char **paPoslist,               /* IN/OUT: Position list */
+  int *pnToken,                   /* IN/OUT: Tokens in phrase of *paPoslist */
+  Fts3Phrase *pPhrase             /* The phrase object to trim the doclist of */
+){
+  int nParam1 = nNear + pPhrase->nToken;
+  int nParam2 = nNear + *pnToken;
+  int nNew;
+  char *p2; 
+  char *pOut; 
+  int res;
+
+  assert( pPhrase->doclist.pList );
+
+  p2 = pOut = pPhrase->doclist.pList;
+  res = fts3PoslistNearMerge(
+    &pOut, aTmp, nParam1, nParam2, paPoslist, &p2
+  );
+  if( res ){
+    nNew = (int)(pOut - pPhrase->doclist.pList) - 1;
+    assert( pPhrase->doclist.pList[nNew]=='\0' );
+    assert( nNew<=pPhrase->doclist.nList && nNew>0 );
+    memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew);
+    pPhrase->doclist.nList = nNew;
+    *paPoslist = pPhrase->doclist.pList;
+    *pnToken = pPhrase->nToken;
+  }
+
+  return res;
+}
+
+/*
+** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
+** Otherwise, it advances the expression passed as the second argument to
+** point to the next matching row in the database. Expressions iterate through
+** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
+** or descending if it is non-zero.
+**
+** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
+** successful, the following variables in pExpr are set:
+**
+**   Fts3Expr.bEof                (non-zero if EOF - there is no next row)
+**   Fts3Expr.iDocid              (valid if bEof==0. The docid of the next row)
+**
+** If the expression is of type FTSQUERY_PHRASE, and the expression is not
+** at EOF, then the following variables are populated with the position list
+** for the phrase for the visited row:
+**
+**   FTs3Expr.pPhrase->doclist.nList        (length of pList in bytes)
+**   FTs3Expr.pPhrase->doclist.pList        (pointer to position list)
+**
+** It says above that this function advances the expression to the next
+** matching row. This is usually true, but there are the following exceptions:
+**
+**   1. Deferred tokens are not taken into account. If a phrase consists
+**      entirely of deferred tokens, it is assumed to match every row in
+**      the db. In this case the position-list is not populated at all. 
+**
+**      Or, if a phrase contains one or more deferred tokens and one or
+**      more non-deferred tokens, then the expression is advanced to the 
+**      next possible match, considering only non-deferred tokens. In other
+**      words, if the phrase is "A B C", and "B" is deferred, the expression
+**      is advanced to the next row that contains an instance of "A * C", 
+**      where "*" may match any single token. The position list in this case
+**      is populated as for "A * C" before returning.
+**
+**   2. NEAR is treated as AND. If the expression is "x NEAR y", it is 
+**      advanced to point to the next row that matches "x AND y".
+** 
+** See fts3EvalTestDeferredAndNear() for details on testing if a row is
+** really a match, taking into account deferred tokens and NEAR operators.
+*/
+static void fts3EvalNextRow(
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Expr *pExpr,                /* Expr. to advance to next matching row */
+  int *pRc                        /* IN/OUT: Error code */
+){
+  if( *pRc==SQLITE_OK ){
+    int bDescDoclist = pCsr->bDesc;         /* Used by DOCID_CMP() macro */
+    assert( pExpr->bEof==0 );
+    pExpr->bStart = 1;
+
+    switch( pExpr->eType ){
+      case FTSQUERY_NEAR:
+      case FTSQUERY_AND: {
+        Fts3Expr *pLeft = pExpr->pLeft;
+        Fts3Expr *pRight = pExpr->pRight;
+        assert( !pLeft->bDeferred || !pRight->bDeferred );
+
+        if( pLeft->bDeferred ){
+          /* LHS is entirely deferred. So we assume it matches every row.
+          ** Advance the RHS iterator to find the next row visited. */
+          fts3EvalNextRow(pCsr, pRight, pRc);
+          pExpr->iDocid = pRight->iDocid;
+          pExpr->bEof = pRight->bEof;
+        }else if( pRight->bDeferred ){
+          /* RHS is entirely deferred. So we assume it matches every row.
+          ** Advance the LHS iterator to find the next row visited. */
+          fts3EvalNextRow(pCsr, pLeft, pRc);
+          pExpr->iDocid = pLeft->iDocid;
+          pExpr->bEof = pLeft->bEof;
+        }else{
+          /* Neither the RHS or LHS are deferred. */
+          fts3EvalNextRow(pCsr, pLeft, pRc);
+          fts3EvalNextRow(pCsr, pRight, pRc);
+          while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
+            sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
+            if( iDiff==0 ) break;
+            if( iDiff<0 ){
+              fts3EvalNextRow(pCsr, pLeft, pRc);
+            }else{
+              fts3EvalNextRow(pCsr, pRight, pRc);
+            }
+          }
+          pExpr->iDocid = pLeft->iDocid;
+          pExpr->bEof = (pLeft->bEof || pRight->bEof);
+        }
+        break;
+      }
+  
+      case FTSQUERY_OR: {
+        Fts3Expr *pLeft = pExpr->pLeft;
+        Fts3Expr *pRight = pExpr->pRight;
+        sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
+
+        assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
+        assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
+
+        if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
+          fts3EvalNextRow(pCsr, pLeft, pRc);
+        }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
+          fts3EvalNextRow(pCsr, pRight, pRc);
+        }else{
+          fts3EvalNextRow(pCsr, pLeft, pRc);
+          fts3EvalNextRow(pCsr, pRight, pRc);
+        }
+
+        pExpr->bEof = (pLeft->bEof && pRight->bEof);
+        iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
+        if( pRight->bEof || (pLeft->bEof==0 &&  iCmp<0) ){
+          pExpr->iDocid = pLeft->iDocid;
+        }else{
+          pExpr->iDocid = pRight->iDocid;
+        }
+
+        break;
+      }
+
+      case FTSQUERY_NOT: {
+        Fts3Expr *pLeft = pExpr->pLeft;
+        Fts3Expr *pRight = pExpr->pRight;
+
+        if( pRight->bStart==0 ){
+          fts3EvalNextRow(pCsr, pRight, pRc);
+          assert( *pRc!=SQLITE_OK || pRight->bStart );
+        }
+
+        fts3EvalNextRow(pCsr, pLeft, pRc);
+        if( pLeft->bEof==0 ){
+          while( !*pRc 
+              && !pRight->bEof 
+              && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 
+          ){
+            fts3EvalNextRow(pCsr, pRight, pRc);
+          }
+        }
+        pExpr->iDocid = pLeft->iDocid;
+        pExpr->bEof = pLeft->bEof;
+        break;
+      }
+
+      default: {
+        Fts3Phrase *pPhrase = pExpr->pPhrase;
+        fts3EvalInvalidatePoslist(pPhrase);
+        *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
+        pExpr->iDocid = pPhrase->doclist.iDocid;
+        break;
+      }
+    }
+  }
+}
+
+/*
+** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
+** cluster, then this function returns 1 immediately.
+**
+** Otherwise, it checks if the current row really does match the NEAR 
+** expression, using the data currently stored in the position lists 
+** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. 
+**
+** If the current row is a match, the position list associated with each
+** phrase in the NEAR expression is edited in place to contain only those
+** phrase instances sufficiently close to their peers to satisfy all NEAR
+** constraints. In this case it returns 1. If the NEAR expression does not 
+** match the current row, 0 is returned. The position lists may or may not
+** be edited if 0 is returned.
+*/
+static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
+  int res = 1;
+
+  /* The following block runs if pExpr is the root of a NEAR query.
+  ** For example, the query:
+  **
+  **         "w" NEAR "x" NEAR "y" NEAR "z"
+  **
+  ** which is represented in tree form as:
+  **
+  **                               |
+  **                          +--NEAR--+      <-- root of NEAR query
+  **                          |        |
+  **                     +--NEAR--+   "z"
+  **                     |        |
+  **                +--NEAR--+   "y"
+  **                |        |
+  **               "w"      "x"
+  **
+  ** The right-hand child of a NEAR node is always a phrase. The 
+  ** left-hand child may be either a phrase or a NEAR node. There are
+  ** no exceptions to this - it's the way the parser in fts3_expr.c works.
+  */
+  if( *pRc==SQLITE_OK 
+   && pExpr->eType==FTSQUERY_NEAR 
+   && pExpr->bEof==0
+   && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
+  ){
+    Fts3Expr *p; 
+    int nTmp = 0;                 /* Bytes of temp space */
+    char *aTmp;                   /* Temp space for PoslistNearMerge() */
+
+    /* Allocate temporary working space. */
+    for(p=pExpr; p->pLeft; p=p->pLeft){
+      nTmp += p->pRight->pPhrase->doclist.nList;
+    }
+    nTmp += p->pPhrase->doclist.nList;
+    if( nTmp==0 ){
+      res = 0;
+    }else{
+      aTmp = sqlite3_malloc(nTmp*2);
+      if( !aTmp ){
+        *pRc = SQLITE_NOMEM;
+        res = 0;
+      }else{
+        char *aPoslist = p->pPhrase->doclist.pList;
+        int nToken = p->pPhrase->nToken;
+
+        for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
+          Fts3Phrase *pPhrase = p->pRight->pPhrase;
+          int nNear = p->nNear;
+          res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+        }
+
+        aPoslist = pExpr->pRight->pPhrase->doclist.pList;
+        nToken = pExpr->pRight->pPhrase->nToken;
+        for(p=pExpr->pLeft; p && res; p=p->pLeft){
+          int nNear;
+          Fts3Phrase *pPhrase;
+          assert( p->pParent && p->pParent->pLeft==p );
+          nNear = p->pParent->nNear;
+          pPhrase = (
+              p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
+              );
+          res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+        }
+      }
+
+      sqlite3_free(aTmp);
+    }
+  }
+
+  return res;
+}
+
+/*
+** This function is a helper function for fts3EvalTestDeferredAndNear().
+** Assuming no error occurs or has occurred, It returns non-zero if the
+** expression passed as the second argument matches the row that pCsr 
+** currently points to, or zero if it does not.
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** If an error occurs during execution of this function, *pRc is set to 
+** the appropriate SQLite error code. In this case the returned value is 
+** undefined.
+*/
+static int fts3EvalTestExpr(
+  Fts3Cursor *pCsr,               /* FTS cursor handle */
+  Fts3Expr *pExpr,                /* Expr to test. May or may not be root. */
+  int *pRc                        /* IN/OUT: Error code */
+){
+  int bHit = 1;                   /* Return value */
+  if( *pRc==SQLITE_OK ){
+    switch( pExpr->eType ){
+      case FTSQUERY_NEAR:
+      case FTSQUERY_AND:
+        bHit = (
+            fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
+         && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
+         && fts3EvalNearTest(pExpr, pRc)
+        );
+
+        /* If the NEAR expression does not match any rows, zero the doclist for 
+        ** all phrases involved in the NEAR. This is because the snippet(),
+        ** offsets() and matchinfo() functions are not supposed to recognize 
+        ** any instances of phrases that are part of unmatched NEAR queries. 
+        ** For example if this expression:
+        **
+        **    ... MATCH 'a OR (b NEAR c)'
+        **
+        ** is matched against a row containing:
+        **
+        **        'a b d e'
+        **
+        ** then any snippet() should ony highlight the "a" term, not the "b"
+        ** (as "b" is part of a non-matching NEAR clause).
+        */
+        if( bHit==0 
+         && pExpr->eType==FTSQUERY_NEAR 
+         && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
+        ){
+          Fts3Expr *p;
+          for(p=pExpr; p->pPhrase==0; p=p->pLeft){
+            if( p->pRight->iDocid==pCsr->iPrevId ){
+              fts3EvalInvalidatePoslist(p->pRight->pPhrase);
+            }
+          }
+          if( p->iDocid==pCsr->iPrevId ){
+            fts3EvalInvalidatePoslist(p->pPhrase);
+          }
+        }
+
+        break;
+
+      case FTSQUERY_OR: {
+        int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
+        int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
+        bHit = bHit1 || bHit2;
+        break;
+      }
+
+      case FTSQUERY_NOT:
+        bHit = (
+            fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
+         && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
+        );
+        break;
+
+      default: {
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
+        if( pCsr->pDeferred 
+         && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred)
+        ){
+          Fts3Phrase *pPhrase = pExpr->pPhrase;
+          assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
+          if( pExpr->bDeferred ){
+            fts3EvalInvalidatePoslist(pPhrase);
+          }
+          *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
+          bHit = (pPhrase->doclist.pList!=0);
+          pExpr->iDocid = pCsr->iPrevId;
+        }else
+#endif
+        {
+          bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
+        }
+        break;
+      }
+    }
+  }
+  return bHit;
+}
+
+/*
+** This function is called as the second part of each xNext operation when
+** iterating through the results of a full-text query. At this point the
+** cursor points to a row that matches the query expression, with the
+** following caveats:
+**
+**   * Up until this point, "NEAR" operators in the expression have been
+**     treated as "AND".
+**
+**   * Deferred tokens have not yet been considered.
+**
+** If *pRc is not SQLITE_OK when this function is called, it immediately
+** returns 0. Otherwise, it tests whether or not after considering NEAR
+** operators and deferred tokens the current row is still a match for the
+** expression. It returns 1 if both of the following are true:
+**
+**   1. *pRc is SQLITE_OK when this function returns, and
+**
+**   2. After scanning the current FTS table row for the deferred tokens,
+**      it is determined that the row does *not* match the query.
+**
+** Or, if no error occurs and it seems the current row does match the FTS
+** query, return 0.
+*/
+static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
+  int rc = *pRc;
+  int bMiss = 0;
+  if( rc==SQLITE_OK ){
+
+    /* If there are one or more deferred tokens, load the current row into
+    ** memory and scan it to determine the position list for each deferred
+    ** token. Then, see if this row is really a match, considering deferred
+    ** tokens and NEAR operators (neither of which were taken into account
+    ** earlier, by fts3EvalNextRow()). 
+    */
+    if( pCsr->pDeferred ){
+      rc = fts3CursorSeek(0, pCsr);
+      if( rc==SQLITE_OK ){
+        rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
+      }
+    }
+    bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
+
+    /* Free the position-lists accumulated for each deferred token above. */
+    sqlite3Fts3FreeDeferredDoclists(pCsr);
+    *pRc = rc;
+  }
+  return (rc==SQLITE_OK && bMiss);
+}
+
+/*
+** Advance to the next document that matches the FTS expression in
+** Fts3Cursor.pExpr.
+*/
+static int fts3EvalNext(Fts3Cursor *pCsr){
+  int rc = SQLITE_OK;             /* Return Code */
+  Fts3Expr *pExpr = pCsr->pExpr;
+  assert( pCsr->isEof==0 );
+  if( pExpr==0 ){
+    pCsr->isEof = 1;
+  }else{
+    do {
+      if( pCsr->isRequireSeek==0 ){
+        sqlite3_reset(pCsr->pStmt);
+      }
+      assert( sqlite3_data_count(pCsr->pStmt)==0 );
+      fts3EvalNextRow(pCsr, pExpr, &rc);
+      pCsr->isEof = pExpr->bEof;
+      pCsr->isRequireSeek = 1;
+      pCsr->isMatchinfoNeeded = 1;
+      pCsr->iPrevId = pExpr->iDocid;
+    }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
+  }
+
+  /* Check if the cursor is past the end of the docid range specified
+  ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag.  */
+  if( rc==SQLITE_OK && (
+        (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid)
+     || (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid)
+  )){
+    pCsr->isEof = 1;
+  }
+
+  return rc;
+}
+
+/*
+** Restart interation for expression pExpr so that the next call to
+** fts3EvalNext() visits the first row. Do not allow incremental 
+** loading or merging of phrase doclists for this iteration.
+**
+** If *pRc is other than SQLITE_OK when this function is called, it is
+** a no-op. If an error occurs within this function, *pRc is set to an
+** SQLite error code before returning.
+*/
+static void fts3EvalRestart(
+  Fts3Cursor *pCsr,
+  Fts3Expr *pExpr,
+  int *pRc
+){
+  if( pExpr && *pRc==SQLITE_OK ){
+    Fts3Phrase *pPhrase = pExpr->pPhrase;
+
+    if( pPhrase ){
+      fts3EvalInvalidatePoslist(pPhrase);
+      if( pPhrase->bIncr ){
+        int i;
+        for(i=0; i<pPhrase->nToken; i++){
+          Fts3PhraseToken *pToken = &pPhrase->aToken[i];
+          assert( pToken->pDeferred==0 );
+          if( pToken->pSegcsr ){
+            sqlite3Fts3MsrIncrRestart(pToken->pSegcsr);
+          }
+        }
+        *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
+      }
+      pPhrase->doclist.pNextDocid = 0;
+      pPhrase->doclist.iDocid = 0;
+    }
+
+    pExpr->iDocid = 0;
+    pExpr->bEof = 0;
+    pExpr->bStart = 0;
+
+    fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
+    fts3EvalRestart(pCsr, pExpr->pRight, pRc);
+  }
+}
+
+/*
+** After allocating the Fts3Expr.aMI[] array for each phrase in the 
+** expression rooted at pExpr, the cursor iterates through all rows matched
+** by pExpr, calling this function for each row. This function increments
+** the values in Fts3Expr.aMI[] according to the position-list currently
+** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase 
+** expression nodes.
+*/
+static void fts3EvalUpdateCounts(Fts3Expr *pExpr){
+  if( pExpr ){
+    Fts3Phrase *pPhrase = pExpr->pPhrase;
+    if( pPhrase && pPhrase->doclist.pList ){
+      int iCol = 0;
+      char *p = pPhrase->doclist.pList;
+
+      assert( *p );
+      while( 1 ){
+        u8 c = 0;
+        int iCnt = 0;
+        while( 0xFE & (*p | c) ){
+          if( (c&0x80)==0 ) iCnt++;
+          c = *p++ & 0x80;
+        }
+
+        /* aMI[iCol*3 + 1] = Number of occurrences
+        ** aMI[iCol*3 + 2] = Number of rows containing at least one instance
+        */
+        pExpr->aMI[iCol*3 + 1] += iCnt;
+        pExpr->aMI[iCol*3 + 2] += (iCnt>0);
+        if( *p==0x00 ) break;
+        p++;
+        p += fts3GetVarint32(p, &iCol);
+      }
+    }
+
+    fts3EvalUpdateCounts(pExpr->pLeft);
+    fts3EvalUpdateCounts(pExpr->pRight);
+  }
+}
+
+/*
+** Expression pExpr must be of type FTSQUERY_PHRASE.
+**
+** If it is not already allocated and populated, this function allocates and
+** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part
+** of a NEAR expression, then it also allocates and populates the same array
+** for all other phrases that are part of the NEAR expression.
+**
+** SQLITE_OK is returned if the aMI[] array is successfully allocated and
+** populated. Otherwise, if an error occurs, an SQLite error code is returned.
+*/
+static int fts3EvalGatherStats(
+  Fts3Cursor *pCsr,               /* Cursor object */
+  Fts3Expr *pExpr                 /* FTSQUERY_PHRASE expression */
+){
+  int rc = SQLITE_OK;             /* Return code */
+
+  assert( pExpr->eType==FTSQUERY_PHRASE );
+  if( pExpr->aMI==0 ){
+    Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+    Fts3Expr *pRoot;                /* Root of NEAR expression */
+    Fts3Expr *p;                    /* Iterator used for several purposes */
+
+    sqlite3_int64 iPrevId = pCsr->iPrevId;
+    sqlite3_int64 iDocid;
+    u8 bEof;
+
+    /* Find the root of the NEAR expression */
+    pRoot = pExpr;
+    while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
+      pRoot = pRoot->pParent;
+    }
+    iDocid = pRoot->iDocid;
+    bEof = pRoot->bEof;
+    assert( pRoot->bStart );
+
+    /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
+    for(p=pRoot; p; p=p->pLeft){
+      Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
+      assert( pE->aMI==0 );
+      pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32));
+      if( !pE->aMI ) return SQLITE_NOMEM;
+      memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
+    }
+
+    fts3EvalRestart(pCsr, pRoot, &rc);
+
+    while( pCsr->isEof==0 && rc==SQLITE_OK ){
+
+      do {
+        /* Ensure the %_content statement is reset. */
+        if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
+        assert( sqlite3_data_count(pCsr->pStmt)==0 );
+
+        /* Advance to the next document */
+        fts3EvalNextRow(pCsr, pRoot, &rc);
+        pCsr->isEof = pRoot->bEof;
+        pCsr->isRequireSeek = 1;
+        pCsr->isMatchinfoNeeded = 1;
+        pCsr->iPrevId = pRoot->iDocid;
+      }while( pCsr->isEof==0 
+           && pRoot->eType==FTSQUERY_NEAR 
+           && fts3EvalTestDeferredAndNear(pCsr, &rc) 
+      );
+
+      if( rc==SQLITE_OK && pCsr->isEof==0 ){
+        fts3EvalUpdateCounts(pRoot);
+      }
+    }
+
+    pCsr->isEof = 0;
+    pCsr->iPrevId = iPrevId;
+
+    if( bEof ){
+      pRoot->bEof = bEof;
+    }else{
+      /* Caution: pRoot may iterate through docids in ascending or descending
+      ** order. For this reason, even though it seems more defensive, the 
+      ** do loop can not be written:
+      **
+      **   do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK );
+      */
+      fts3EvalRestart(pCsr, pRoot, &rc);
+      do {
+        fts3EvalNextRow(pCsr, pRoot, &rc);
+        assert( pRoot->bEof==0 );
+      }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
+      fts3EvalTestDeferredAndNear(pCsr, &rc);
+    }
+  }
+  return rc;
+}
+
+/*
+** This function is used by the matchinfo() module to query a phrase 
+** expression node for the following information:
+**
+**   1. The total number of occurrences of the phrase in each column of 
+**      the FTS table (considering all rows), and
+**
+**   2. For each column, the number of rows in the table for which the
+**      column contains at least one instance of the phrase.
+**
+** If no error occurs, SQLITE_OK is returned and the values for each column
+** written into the array aiOut as follows:
+**
+**   aiOut[iCol*3 + 1] = Number of occurrences
+**   aiOut[iCol*3 + 2] = Number of rows containing at least one instance
+**
+** Caveats:
+**
+**   * If a phrase consists entirely of deferred tokens, then all output 
+**     values are set to the number of documents in the table. In other
+**     words we assume that very common tokens occur exactly once in each 
+**     column of each row of the table.
+**
+**   * If a phrase contains some deferred tokens (and some non-deferred 
+**     tokens), count the potential occurrence identified by considering
+**     the non-deferred tokens instead of actual phrase occurrences.
+**
+**   * If the phrase is part of a NEAR expression, then only phrase instances
+**     that meet the NEAR constraint are included in the counts.
+*/
+SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(
+  Fts3Cursor *pCsr,               /* FTS cursor handle */
+  Fts3Expr *pExpr,                /* Phrase expression */
+  u32 *aiOut                      /* Array to write results into (see above) */
+){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int rc = SQLITE_OK;
+  int iCol;
+
+  if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){
+    assert( pCsr->nDoc>0 );
+    for(iCol=0; iCol<pTab->nColumn; iCol++){
+      aiOut[iCol*3 + 1] = (u32)pCsr->nDoc;
+      aiOut[iCol*3 + 2] = (u32)pCsr->nDoc;
+    }
+  }else{
+    rc = fts3EvalGatherStats(pCsr, pExpr);
+    if( rc==SQLITE_OK ){
+      assert( pExpr->aMI );
+      for(iCol=0; iCol<pTab->nColumn; iCol++){
+        aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1];
+        aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2];
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
+** The expression pExpr passed as the second argument to this function
+** must be of type FTSQUERY_PHRASE. 
+**
+** The returned value is either NULL or a pointer to a buffer containing
+** a position-list indicating the occurrences of the phrase in column iCol
+** of the current row. 
+**
+** More specifically, the returned buffer contains 1 varint for each 
+** occurrence of the phrase in the column, stored using the normal (delta+2) 
+** compression and is terminated by either an 0x01 or 0x00 byte. For example,
+** if the requested column contains "a b X c d X X" and the position-list
+** for 'X' is requested, the buffer returned may contain:
+**
+**     0x04 0x05 0x03 0x01   or   0x04 0x05 0x03 0x00
+**
+** This function works regardless of whether or not the phrase is deferred,
+** incremental, or neither.
+*/
+SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist(
+  Fts3Cursor *pCsr,               /* FTS3 cursor object */
+  Fts3Expr *pExpr,                /* Phrase to return doclist for */
+  int iCol,                       /* Column to return position list for */
+  char **ppOut                    /* OUT: Pointer to position list */
+){
+  Fts3Phrase *pPhrase = pExpr->pPhrase;
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  char *pIter;
+  int iThis;
+  sqlite3_int64 iDocid;
+
+  /* If this phrase is applies specifically to some column other than 
+  ** column iCol, return a NULL pointer.  */
+  *ppOut = 0;
+  assert( iCol>=0 && iCol<pTab->nColumn );
+  if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){
+    return SQLITE_OK;
+  }
+
+  iDocid = pExpr->iDocid;
+  pIter = pPhrase->doclist.pList;
+  if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
+    int bDescDoclist = pTab->bDescIdx;      /* For DOCID_CMP macro */
+    int iMul;                     /* +1 if csr dir matches index dir, else -1 */
+    int bOr = 0;
+    u8 bEof = 0;
+    u8 bTreeEof = 0;
+    Fts3Expr *p;                  /* Used to iterate from pExpr to root */
+    Fts3Expr *pNear;              /* Most senior NEAR ancestor (or pExpr) */
+
+    /* Check if this phrase descends from an OR expression node. If not, 
+    ** return NULL. Otherwise, the entry that corresponds to docid 
+    ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the
+    ** tree that the node is part of has been marked as EOF, but the node
+    ** itself is not EOF, then it may point to an earlier entry. */
+    pNear = pExpr;
+    for(p=pExpr->pParent; p; p=p->pParent){
+      if( p->eType==FTSQUERY_OR ) bOr = 1;
+      if( p->eType==FTSQUERY_NEAR ) pNear = p;
+      if( p->bEof ) bTreeEof = 1;
+    }
+    if( bOr==0 ) return SQLITE_OK;
+
+    /* This is the descendent of an OR node. In this case we cannot use
+    ** an incremental phrase. Load the entire doclist for the phrase
+    ** into memory in this case.  */
+    if( pPhrase->bIncr ){
+      int rc = SQLITE_OK;
+      int bEofSave = pExpr->bEof;
+      fts3EvalRestart(pCsr, pExpr, &rc);
+      while( rc==SQLITE_OK && !pExpr->bEof ){
+        fts3EvalNextRow(pCsr, pExpr, &rc);
+        if( bEofSave==0 && pExpr->iDocid==iDocid ) break;
+      }
+      pIter = pPhrase->doclist.pList;
+      assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
+      if( rc!=SQLITE_OK ) return rc;
+    }
+    
+    iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1);
+    while( bTreeEof==1 
+        && pNear->bEof==0
+        && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0
+    ){
+      int rc = SQLITE_OK;
+      fts3EvalNextRow(pCsr, pExpr, &rc);
+      if( rc!=SQLITE_OK ) return rc;
+      iDocid = pExpr->iDocid;
+      pIter = pPhrase->doclist.pList;
+    }
+
+    bEof = (pPhrase->doclist.nAll==0);
+    assert( bDescDoclist==0 || bDescDoclist==1 );
+    assert( pCsr->bDesc==0 || pCsr->bDesc==1 );
+
+    if( bEof==0 ){
+      if( pCsr->bDesc==bDescDoclist ){
+        int dummy;
+        if( pNear->bEof ){
+          /* This expression is already at EOF. So position it to point to the
+          ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable
+          ** iDocid is already set for this entry, so all that is required is
+          ** to set pIter to point to the first byte of the last position-list
+          ** in the doclist. 
+          **
+          ** It would also be correct to set pIter and iDocid to zero. In
+          ** this case, the first call to sqltie3Fts4DoclistPrev() below
+          ** would also move the iterator to point to the last entry in the 
+          ** doclist. However, this is expensive, as to do so it has to 
+          ** iterate through the entire doclist from start to finish (since
+          ** it does not know the docid for the last entry).  */
+          pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1];
+          fts3ReversePoslist(pPhrase->doclist.aAll, &pIter);
+        }
+        while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
+          sqlite3Fts3DoclistPrev(
+              bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
+              &pIter, &iDocid, &dummy, &bEof
+          );
+        }
+      }else{
+        if( pNear->bEof ){
+          pIter = 0;
+          iDocid = 0;
+        }
+        while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
+          sqlite3Fts3DoclistNext(
+              bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, 
+              &pIter, &iDocid, &bEof
+          );
+        }
+      }
+    }
+
+    if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
+  }
+  if( pIter==0 ) return SQLITE_OK;
+
+  if( *pIter==0x01 ){
+    pIter++;
+    pIter += fts3GetVarint32(pIter, &iThis);
+  }else{
+    iThis = 0;
+  }
+  while( iThis<iCol ){
+    fts3ColumnlistCopy(0, &pIter);
+    if( *pIter==0x00 ) return 0;
+    pIter++;
+    pIter += fts3GetVarint32(pIter, &iThis);
+  }
+
+  *ppOut = ((iCol==iThis)?pIter:0);
+  return SQLITE_OK;
+}
+
+/*
+** Free all components of the Fts3Phrase structure that were allocated by
+** the eval module. Specifically, this means to free:
+**
+**   * the contents of pPhrase->doclist, and
+**   * any Fts3MultiSegReader objects held by phrase tokens.
+*/
+SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
+  if( pPhrase ){
+    int i;
+    sqlite3_free(pPhrase->doclist.aAll);
+    fts3EvalInvalidatePoslist(pPhrase);
+    memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
+    for(i=0; i<pPhrase->nToken; i++){
+      fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
+      pPhrase->aToken[i].pSegcsr = 0;
+    }
+  }
+}
+
+
+/*
+** Return SQLITE_CORRUPT_VTAB.
+*/
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE int sqlite3Fts3Corrupt(){
+  return SQLITE_CORRUPT_VTAB;
+}
+#endif
+
+#if !SQLITE_CORE
+/*
+** Initialize API pointer table, if required.
+*/
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+SQLITE_API int sqlite3_fts3_init(
+  sqlite3 *db, 
+  char **pzErrMsg,
+  const sqlite3_api_routines *pApi
+){
+  SQLITE_EXTENSION_INIT2(pApi)
+  return sqlite3Fts3Init(db);
+}
+#endif
+
+#endif
+
+/************** End of fts3.c ************************************************/
+/************** Begin file fts3_aux.c ****************************************/
+/*
+** 2011 Jan 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <string.h> */
+/* #include <assert.h> */
+
+typedef struct Fts3auxTable Fts3auxTable;
+typedef struct Fts3auxCursor Fts3auxCursor;
+
+struct Fts3auxTable {
+  sqlite3_vtab base;              /* Base class used by SQLite core */
+  Fts3Table *pFts3Tab;
+};
+
+struct Fts3auxCursor {
+  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
+  Fts3MultiSegReader csr;        /* Must be right after "base" */
+  Fts3SegFilter filter;
+  char *zStop;
+  int nStop;                      /* Byte-length of string zStop */
+  int iLangid;                    /* Language id to query */
+  int isEof;                      /* True if cursor is at EOF */
+  sqlite3_int64 iRowid;           /* Current rowid */
+
+  int iCol;                       /* Current value of 'col' column */
+  int nStat;                      /* Size of aStat[] array */
+  struct Fts3auxColstats {
+    sqlite3_int64 nDoc;           /* 'documents' values for current csr row */
+    sqlite3_int64 nOcc;           /* 'occurrences' values for current csr row */
+  } *aStat;
+};
+
+/*
+** Schema of the terms table.
+*/
+#define FTS3_AUX_SCHEMA \
+  "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"
+
+/*
+** This function does all the work for both the xConnect and xCreate methods.
+** These tables have no persistent representation of their own, so xConnect
+** and xCreate are identical operations.
+*/
+static int fts3auxConnectMethod(
+  sqlite3 *db,                    /* Database connection */
+  void *pUnused,                  /* Unused */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
+  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
+){
+  char const *zDb;                /* Name of database (e.g. "main") */
+  char const *zFts3;              /* Name of fts3 table */
+  int nDb;                        /* Result of strlen(zDb) */
+  int nFts3;                      /* Result of strlen(zFts3) */
+  int nByte;                      /* Bytes of space to allocate here */
+  int rc;                         /* value returned by declare_vtab() */
+  Fts3auxTable *p;                /* Virtual table object to return */
+
+  UNUSED_PARAMETER(pUnused);
+
+  /* The user should invoke this in one of two forms:
+  **
+  **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
+  **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
+  */
+  if( argc!=4 && argc!=5 ) goto bad_args;
+
+  zDb = argv[1]; 
+  nDb = (int)strlen(zDb);
+  if( argc==5 ){
+    if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
+      zDb = argv[3]; 
+      nDb = (int)strlen(zDb);
+      zFts3 = argv[4];
+    }else{
+      goto bad_args;
+    }
+  }else{
+    zFts3 = argv[3];
+  }
+  nFts3 = (int)strlen(zFts3);
+
+  rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);
+  if( rc!=SQLITE_OK ) return rc;
+
+  nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
+  p = (Fts3auxTable *)sqlite3_malloc(nByte);
+  if( !p ) return SQLITE_NOMEM;
+  memset(p, 0, nByte);
+
+  p->pFts3Tab = (Fts3Table *)&p[1];
+  p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
+  p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
+  p->pFts3Tab->db = db;
+  p->pFts3Tab->nIndex = 1;
+
+  memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
+  memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
+  sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
+
+  *ppVtab = (sqlite3_vtab *)p;
+  return SQLITE_OK;
+
+ bad_args:
+  *pzErr = sqlite3_mprintf("invalid arguments to fts4aux constructor");
+  return SQLITE_ERROR;
+}
+
+/*
+** This function does the work for both the xDisconnect and xDestroy methods.
+** These tables have no persistent representation of their own, so xDisconnect
+** and xDestroy are identical operations.
+*/
+static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){
+  Fts3auxTable *p = (Fts3auxTable *)pVtab;
+  Fts3Table *pFts3 = p->pFts3Tab;
+  int i;
+
+  /* Free any prepared statements held */
+  for(i=0; i<SizeofArray(pFts3->aStmt); i++){
+    sqlite3_finalize(pFts3->aStmt[i]);
+  }
+  sqlite3_free(pFts3->zSegmentsTbl);
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+#define FTS4AUX_EQ_CONSTRAINT 1
+#define FTS4AUX_GE_CONSTRAINT 2
+#define FTS4AUX_LE_CONSTRAINT 4
+
+/*
+** xBestIndex - Analyze a WHERE and ORDER BY clause.
+*/
+static int fts3auxBestIndexMethod(
+  sqlite3_vtab *pVTab, 
+  sqlite3_index_info *pInfo
+){
+  int i;
+  int iEq = -1;
+  int iGe = -1;
+  int iLe = -1;
+  int iLangid = -1;
+  int iNext = 1;                  /* Next free argvIndex value */
+
+  UNUSED_PARAMETER(pVTab);
+
+  /* This vtab delivers always results in "ORDER BY term ASC" order. */
+  if( pInfo->nOrderBy==1 
+   && pInfo->aOrderBy[0].iColumn==0 
+   && pInfo->aOrderBy[0].desc==0
+  ){
+    pInfo->orderByConsumed = 1;
+  }
+
+  /* Search for equality and range constraints on the "term" column. 
+  ** And equality constraints on the hidden "languageid" column. */
+  for(i=0; i<pInfo->nConstraint; i++){
+    if( pInfo->aConstraint[i].usable ){
+      int op = pInfo->aConstraint[i].op;
+      int iCol = pInfo->aConstraint[i].iColumn;
+
+      if( iCol==0 ){
+        if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
+        if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
+        if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
+        if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
+        if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
+      }
+      if( iCol==4 ){
+        if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
+      }
+    }
+  }
+
+  if( iEq>=0 ){
+    pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
+    pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
+    pInfo->estimatedCost = 5;
+  }else{
+    pInfo->idxNum = 0;
+    pInfo->estimatedCost = 20000;
+    if( iGe>=0 ){
+      pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
+      pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
+      pInfo->estimatedCost /= 2;
+    }
+    if( iLe>=0 ){
+      pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
+      pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
+      pInfo->estimatedCost /= 2;
+    }
+  }
+  if( iLangid>=0 ){
+    pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
+    pInfo->estimatedCost--;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** xOpen - Open a cursor.
+*/
+static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+  Fts3auxCursor *pCsr;            /* Pointer to cursor object to return */
+
+  UNUSED_PARAMETER(pVTab);
+
+  pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor));
+  if( !pCsr ) return SQLITE_NOMEM;
+  memset(pCsr, 0, sizeof(Fts3auxCursor));
+
+  *ppCsr = (sqlite3_vtab_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** xClose - Close a cursor.
+*/
+static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+
+  sqlite3Fts3SegmentsClose(pFts3);
+  sqlite3Fts3SegReaderFinish(&pCsr->csr);
+  sqlite3_free((void *)pCsr->filter.zTerm);
+  sqlite3_free(pCsr->zStop);
+  sqlite3_free(pCsr->aStat);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
+  if( nSize>pCsr->nStat ){
+    struct Fts3auxColstats *aNew;
+    aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat, 
+        sizeof(struct Fts3auxColstats) * nSize
+    );
+    if( aNew==0 ) return SQLITE_NOMEM;
+    memset(&aNew[pCsr->nStat], 0, 
+        sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
+    );
+    pCsr->aStat = aNew;
+    pCsr->nStat = nSize;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** xNext - Advance the cursor to the next row, if any.
+*/
+static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+  Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
+  int rc;
+
+  /* Increment our pretend rowid value. */
+  pCsr->iRowid++;
+
+  for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
+    if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
+  }
+
+  rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
+  if( rc==SQLITE_ROW ){
+    int i = 0;
+    int nDoclist = pCsr->csr.nDoclist;
+    char *aDoclist = pCsr->csr.aDoclist;
+    int iCol;
+
+    int eState = 0;
+
+    if( pCsr->zStop ){
+      int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
+      int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n);
+      if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){
+        pCsr->isEof = 1;
+        return SQLITE_OK;
+      }
+    }
+
+    if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
+    memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
+    iCol = 0;
+
+    while( i<nDoclist ){
+      sqlite3_int64 v = 0;
+
+      i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
+      switch( eState ){
+        /* State 0. In this state the integer just read was a docid. */
+        case 0:
+          pCsr->aStat[0].nDoc++;
+          eState = 1;
+          iCol = 0;
+          break;
+
+        /* State 1. In this state we are expecting either a 1, indicating
+        ** that the following integer will be a column number, or the
+        ** start of a position list for column 0.  
+        ** 
+        ** The only difference between state 1 and state 2 is that if the
+        ** integer encountered in state 1 is not 0 or 1, then we need to
+        ** increment the column 0 "nDoc" count for this term.
+        */
+        case 1:
+          assert( iCol==0 );
+          if( v>1 ){
+            pCsr->aStat[1].nDoc++;
+          }
+          eState = 2;
+          /* fall through */
+
+        case 2:
+          if( v==0 ){       /* 0x00. Next integer will be a docid. */
+            eState = 0;
+          }else if( v==1 ){ /* 0x01. Next integer will be a column number. */
+            eState = 3;
+          }else{            /* 2 or greater. A position. */
+            pCsr->aStat[iCol+1].nOcc++;
+            pCsr->aStat[0].nOcc++;
+          }
+          break;
+
+        /* State 3. The integer just read is a column number. */
+        default: assert( eState==3 );
+          iCol = (int)v;
+          if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
+          pCsr->aStat[iCol+1].nDoc++;
+          eState = 2;
+          break;
+      }
+    }
+
+    pCsr->iCol = 0;
+    rc = SQLITE_OK;
+  }else{
+    pCsr->isEof = 1;
+  }
+  return rc;
+}
+
+/*
+** xFilter - Initialize a cursor to point at the start of its data.
+*/
+static int fts3auxFilterMethod(
+  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
+  int idxNum,                     /* Strategy index */
+  const char *idxStr,             /* Unused */
+  int nVal,                       /* Number of elements in apVal */
+  sqlite3_value **apVal           /* Arguments for the indexing scheme */
+){
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+  Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
+  int rc;
+  int isScan = 0;
+  int iLangVal = 0;               /* Language id to query */
+
+  int iEq = -1;                   /* Index of term=? value in apVal */
+  int iGe = -1;                   /* Index of term>=? value in apVal */
+  int iLe = -1;                   /* Index of term<=? value in apVal */
+  int iLangid = -1;               /* Index of languageid=? value in apVal */
+  int iNext = 0;
+
+  UNUSED_PARAMETER(nVal);
+  UNUSED_PARAMETER(idxStr);
+
+  assert( idxStr==0 );
+  assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
+       || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
+       || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
+  );
+
+  if( idxNum==FTS4AUX_EQ_CONSTRAINT ){
+    iEq = iNext++;
+  }else{
+    isScan = 1;
+    if( idxNum & FTS4AUX_GE_CONSTRAINT ){
+      iGe = iNext++;
+    }
+    if( idxNum & FTS4AUX_LE_CONSTRAINT ){
+      iLe = iNext++;
+    }
+  }
+  if( iNext<nVal ){
+    iLangid = iNext++;
+  }
+
+  /* In case this cursor is being reused, close and zero it. */
+  testcase(pCsr->filter.zTerm);
+  sqlite3Fts3SegReaderFinish(&pCsr->csr);
+  sqlite3_free((void *)pCsr->filter.zTerm);
+  sqlite3_free(pCsr->aStat);
+  memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
+
+  pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
+  if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
+
+  if( iEq>=0 || iGe>=0 ){
+    const unsigned char *zStr = sqlite3_value_text(apVal[0]);
+    assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );
+    if( zStr ){
+      pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
+      pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]);
+      if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
+    }
+  }
+
+  if( iLe>=0 ){
+    pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
+    pCsr->nStop = sqlite3_value_bytes(apVal[iLe]);
+    if( pCsr->zStop==0 ) return SQLITE_NOMEM;
+  }
+  
+  if( iLangid>=0 ){
+    iLangVal = sqlite3_value_int(apVal[iLangid]);
+
+    /* If the user specified a negative value for the languageid, use zero
+    ** instead. This works, as the "languageid=?" constraint will also
+    ** be tested by the VDBE layer. The test will always be false (since
+    ** this module will not return a row with a negative languageid), and
+    ** so the overall query will return zero rows.  */
+    if( iLangVal<0 ) iLangVal = 0;
+  }
+  pCsr->iLangid = iLangVal;
+
+  rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
+      pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
+  );
+  if( rc==SQLITE_OK ){
+    rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
+  }
+
+  if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
+  return rc;
+}
+
+/*
+** xEof - Return true if the cursor is at EOF, or false otherwise.
+*/
+static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+  return pCsr->isEof;
+}
+
+/*
+** xColumn - Return a column value.
+*/
+static int fts3auxColumnMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
+  int iCol                        /* Index of column to read value from */
+){
+  Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
+
+  assert( p->isEof==0 );
+  switch( iCol ){
+    case 0: /* term */
+      sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
+      break;
+
+    case 1: /* col */
+      if( p->iCol ){
+        sqlite3_result_int(pCtx, p->iCol-1);
+      }else{
+        sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC);
+      }
+      break;
+
+    case 2: /* documents */
+      sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
+      break;
+
+    case 3: /* occurrences */
+      sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
+      break;
+
+    default: /* languageid */
+      assert( iCol==4 );
+      sqlite3_result_int(pCtx, p->iLangid);
+      break;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** xRowid - Return the current rowid for the cursor.
+*/
+static int fts3auxRowidMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite_int64 *pRowid            /* OUT: Rowid value */
+){
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+  *pRowid = pCsr->iRowid;
+  return SQLITE_OK;
+}
+
+/*
+** Register the fts3aux module with database connection db. Return SQLITE_OK
+** if successful or an error code if sqlite3_create_module() fails.
+*/
+SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){
+  static const sqlite3_module fts3aux_module = {
+     0,                           /* iVersion      */
+     fts3auxConnectMethod,        /* xCreate       */
+     fts3auxConnectMethod,        /* xConnect      */
+     fts3auxBestIndexMethod,      /* xBestIndex    */
+     fts3auxDisconnectMethod,     /* xDisconnect   */
+     fts3auxDisconnectMethod,     /* xDestroy      */
+     fts3auxOpenMethod,           /* xOpen         */
+     fts3auxCloseMethod,          /* xClose        */
+     fts3auxFilterMethod,         /* xFilter       */
+     fts3auxNextMethod,           /* xNext         */
+     fts3auxEofMethod,            /* xEof          */
+     fts3auxColumnMethod,         /* xColumn       */
+     fts3auxRowidMethod,          /* xRowid        */
+     0,                           /* xUpdate       */
+     0,                           /* xBegin        */
+     0,                           /* xSync         */
+     0,                           /* xCommit       */
+     0,                           /* xRollback     */
+     0,                           /* xFindFunction */
+     0,                           /* xRename       */
+     0,                           /* xSavepoint    */
+     0,                           /* xRelease      */
+     0                            /* xRollbackTo   */
+  };
+  int rc;                         /* Return code */
+
+  rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0);
+  return rc;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_aux.c ********************************************/
+/************** Begin file fts3_expr.c ***************************************/
+/*
+** 2008 Nov 28
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This module contains code that implements a parser for fts3 query strings
+** (the right-hand argument to the MATCH operator). Because the supported 
+** syntax is relatively simple, the whole tokenizer/parser system is
+** hand-coded. 
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/*
+** By default, this module parses the legacy syntax that has been 
+** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
+** is defined, then it uses the new syntax. The differences between
+** the new and the old syntaxes are:
+**
+**  a) The new syntax supports parenthesis. The old does not.
+**
+**  b) The new syntax supports the AND and NOT operators. The old does not.
+**
+**  c) The old syntax supports the "-" token qualifier. This is not 
+**     supported by the new syntax (it is replaced by the NOT operator).
+**
+**  d) When using the old syntax, the OR operator has a greater precedence
+**     than an implicit AND. When using the new, both implicity and explicit
+**     AND operators have a higher precedence than OR.
+**
+** If compiled with SQLITE_TEST defined, then this module exports the
+** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
+** to zero causes the module to use the old syntax. If it is set to 
+** non-zero the new syntax is activated. This is so both syntaxes can
+** be tested using a single build of testfixture.
+**
+** The following describes the syntax supported by the fts3 MATCH
+** operator in a similar format to that used by the lemon parser
+** generator. This module does not use actually lemon, it uses a
+** custom parser.
+**
+**   query ::= andexpr (OR andexpr)*.
+**
+**   andexpr ::= notexpr (AND? notexpr)*.
+**
+**   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
+**   notexpr ::= LP query RP.
+**
+**   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
+**
+**   distance_opt ::= .
+**   distance_opt ::= / INTEGER.
+**
+**   phrase ::= TOKEN.
+**   phrase ::= COLUMN:TOKEN.
+**   phrase ::= "TOKEN TOKEN TOKEN...".
+*/
+
+#ifdef SQLITE_TEST
+SQLITE_API int sqlite3_fts3_enable_parentheses = 0;
+#else
+# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS 
+#  define sqlite3_fts3_enable_parentheses 1
+# else
+#  define sqlite3_fts3_enable_parentheses 0
+# endif
+#endif
+
+/*
+** Default span for NEAR operators.
+*/
+#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
+
+/* #include <string.h> */
+/* #include <assert.h> */
+
+/*
+** isNot:
+**   This variable is used by function getNextNode(). When getNextNode() is
+**   called, it sets ParseContext.isNot to true if the 'next node' is a 
+**   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
+**   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
+**   zero.
+*/
+typedef struct ParseContext ParseContext;
+struct ParseContext {
+  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
+  int iLangid;                        /* Language id used with tokenizer */
+  const char **azCol;                 /* Array of column names for fts3 table */
+  int bFts4;                          /* True to allow FTS4-only syntax */
+  int nCol;                           /* Number of entries in azCol[] */
+  int iDefaultCol;                    /* Default column to query */
+  int isNot;                          /* True if getNextNode() sees a unary - */
+  sqlite3_context *pCtx;              /* Write error message here */
+  int nNest;                          /* Number of nested brackets */
+};
+
+/*
+** This function is equivalent to the standard isspace() function. 
+**
+** The standard isspace() can be awkward to use safely, because although it
+** is defined to accept an argument of type int, its behavior when passed
+** an integer that falls outside of the range of the unsigned char type
+** is undefined (and sometimes, "undefined" means segfault). This wrapper
+** is defined to accept an argument of type char, and always returns 0 for
+** any values that fall outside of the range of the unsigned char type (i.e.
+** negative values).
+*/
+static int fts3isspace(char c){
+  return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
+}
+
+/*
+** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
+** zero the memory before returning a pointer to it. If unsuccessful, 
+** return NULL.
+*/
+static void *fts3MallocZero(int nByte){
+  void *pRet = sqlite3_malloc(nByte);
+  if( pRet ) memset(pRet, 0, nByte);
+  return pRet;
+}
+
+SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer(
+  sqlite3_tokenizer *pTokenizer,
+  int iLangid,
+  const char *z,
+  int n,
+  sqlite3_tokenizer_cursor **ppCsr
+){
+  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+  sqlite3_tokenizer_cursor *pCsr = 0;
+  int rc;
+
+  rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
+  assert( rc==SQLITE_OK || pCsr==0 );
+  if( rc==SQLITE_OK ){
+    pCsr->pTokenizer = pTokenizer;
+    if( pModule->iVersion>=1 ){
+      rc = pModule->xLanguageid(pCsr, iLangid);
+      if( rc!=SQLITE_OK ){
+        pModule->xClose(pCsr);
+        pCsr = 0;
+      }
+    }
+  }
+  *ppCsr = pCsr;
+  return rc;
+}
+
+/*
+** Function getNextNode(), which is called by fts3ExprParse(), may itself
+** call fts3ExprParse(). So this forward declaration is required.
+*/
+static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
+
+/*
+** Extract the next token from buffer z (length n) using the tokenizer
+** and other information (column names etc.) in pParse. Create an Fts3Expr
+** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
+** single token and set *ppExpr to point to it. If the end of the buffer is
+** reached before a token is found, set *ppExpr to zero. It is the
+** responsibility of the caller to eventually deallocate the allocated 
+** Fts3Expr structure (if any) by passing it to sqlite3_free().
+**
+** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
+** fails.
+*/
+static int getNextToken(
+  ParseContext *pParse,                   /* fts3 query parse context */
+  int iCol,                               /* Value for Fts3Phrase.iColumn */
+  const char *z, int n,                   /* Input string */
+  Fts3Expr **ppExpr,                      /* OUT: expression */
+  int *pnConsumed                         /* OUT: Number of bytes consumed */
+){
+  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
+  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+  int rc;
+  sqlite3_tokenizer_cursor *pCursor;
+  Fts3Expr *pRet = 0;
+  int i = 0;
+
+  /* Set variable i to the maximum number of bytes of input to tokenize. */
+  for(i=0; i<n; i++){
+    if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
+    if( z[i]=='"' ) break;
+  }
+
+  *pnConsumed = i;
+  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
+  if( rc==SQLITE_OK ){
+    const char *zToken;
+    int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
+    int nByte;                               /* total space to allocate */
+
+    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
+    if( rc==SQLITE_OK ){
+      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
+      pRet = (Fts3Expr *)fts3MallocZero(nByte);
+      if( !pRet ){
+        rc = SQLITE_NOMEM;
+      }else{
+        pRet->eType = FTSQUERY_PHRASE;
+        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
+        pRet->pPhrase->nToken = 1;
+        pRet->pPhrase->iColumn = iCol;
+        pRet->pPhrase->aToken[0].n = nToken;
+        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
+        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
+
+        if( iEnd<n && z[iEnd]=='*' ){
+          pRet->pPhrase->aToken[0].isPrefix = 1;
+          iEnd++;
+        }
+
+        while( 1 ){
+          if( !sqlite3_fts3_enable_parentheses 
+           && iStart>0 && z[iStart-1]=='-' 
+          ){
+            pParse->isNot = 1;
+            iStart--;
+          }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
+            pRet->pPhrase->aToken[0].bFirst = 1;
+            iStart--;
+          }else{
+            break;
+          }
+        }
+
+      }
+      *pnConsumed = iEnd;
+    }else if( i && rc==SQLITE_DONE ){
+      rc = SQLITE_OK;
+    }
+
+    pModule->xClose(pCursor);
+  }
+  
+  *ppExpr = pRet;
+  return rc;
+}
+
+
+/*
+** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
+** then free the old allocation.
+*/
+static void *fts3ReallocOrFree(void *pOrig, int nNew){
+  void *pRet = sqlite3_realloc(pOrig, nNew);
+  if( !pRet ){
+    sqlite3_free(pOrig);
+  }
+  return pRet;
+}
+
+/*
+** Buffer zInput, length nInput, contains the contents of a quoted string
+** that appeared as part of an fts3 query expression. Neither quote character
+** is included in the buffer. This function attempts to tokenize the entire
+** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE 
+** containing the results.
+**
+** If successful, SQLITE_OK is returned and *ppExpr set to point at the
+** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
+** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
+** to 0.
+*/
+static int getNextString(
+  ParseContext *pParse,                   /* fts3 query parse context */
+  const char *zInput, int nInput,         /* Input string */
+  Fts3Expr **ppExpr                       /* OUT: expression */
+){
+  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
+  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+  int rc;
+  Fts3Expr *p = 0;
+  sqlite3_tokenizer_cursor *pCursor = 0;
+  char *zTemp = 0;
+  int nTemp = 0;
+
+  const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
+  int nToken = 0;
+
+  /* The final Fts3Expr data structure, including the Fts3Phrase,
+  ** Fts3PhraseToken structures token buffers are all stored as a single 
+  ** allocation so that the expression can be freed with a single call to
+  ** sqlite3_free(). Setting this up requires a two pass approach.
+  **
+  ** The first pass, in the block below, uses a tokenizer cursor to iterate
+  ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
+  ** to assemble data in two dynamic buffers:
+  **
+  **   Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
+  **             structure, followed by the array of Fts3PhraseToken 
+  **             structures. This pass only populates the Fts3PhraseToken array.
+  **
+  **   Buffer zTemp: Contains copies of all tokens.
+  **
+  ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
+  ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
+  ** structures.
+  */
+  rc = sqlite3Fts3OpenTokenizer(
+      pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
+  if( rc==SQLITE_OK ){
+    int ii;
+    for(ii=0; rc==SQLITE_OK; ii++){
+      const char *zByte;
+      int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
+      rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
+      if( rc==SQLITE_OK ){
+        Fts3PhraseToken *pToken;
+
+        p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
+        if( !p ) goto no_mem;
+
+        zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
+        if( !zTemp ) goto no_mem;
+
+        assert( nToken==ii );
+        pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
+        memset(pToken, 0, sizeof(Fts3PhraseToken));
+
+        memcpy(&zTemp[nTemp], zByte, nByte);
+        nTemp += nByte;
+
+        pToken->n = nByte;
+        pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
+        pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
+        nToken = ii+1;
+      }
+    }
+
+    pModule->xClose(pCursor);
+    pCursor = 0;
+  }
+
+  if( rc==SQLITE_DONE ){
+    int jj;
+    char *zBuf = 0;
+
+    p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
+    if( !p ) goto no_mem;
+    memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
+    p->eType = FTSQUERY_PHRASE;
+    p->pPhrase = (Fts3Phrase *)&p[1];
+    p->pPhrase->iColumn = pParse->iDefaultCol;
+    p->pPhrase->nToken = nToken;
+
+    zBuf = (char *)&p->pPhrase->aToken[nToken];
+    if( zTemp ){
+      memcpy(zBuf, zTemp, nTemp);
+      sqlite3_free(zTemp);
+    }else{
+      assert( nTemp==0 );
+    }
+
+    for(jj=0; jj<p->pPhrase->nToken; jj++){
+      p->pPhrase->aToken[jj].z = zBuf;
+      zBuf += p->pPhrase->aToken[jj].n;
+    }
+    rc = SQLITE_OK;
+  }
+
+  *ppExpr = p;
+  return rc;
+no_mem:
+
+  if( pCursor ){
+    pModule->xClose(pCursor);
+  }
+  sqlite3_free(zTemp);
+  sqlite3_free(p);
+  *ppExpr = 0;
+  return SQLITE_NOMEM;
+}
+
+/*
+** The output variable *ppExpr is populated with an allocated Fts3Expr 
+** structure, or set to 0 if the end of the input buffer is reached.
+**
+** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
+** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
+** If SQLITE_ERROR is returned, pContext is populated with an error message.
+*/
+static int getNextNode(
+  ParseContext *pParse,                   /* fts3 query parse context */
+  const char *z, int n,                   /* Input string */
+  Fts3Expr **ppExpr,                      /* OUT: expression */
+  int *pnConsumed                         /* OUT: Number of bytes consumed */
+){
+  static const struct Fts3Keyword {
+    char *z;                              /* Keyword text */
+    unsigned char n;                      /* Length of the keyword */
+    unsigned char parenOnly;              /* Only valid in paren mode */
+    unsigned char eType;                  /* Keyword code */
+  } aKeyword[] = {
+    { "OR" ,  2, 0, FTSQUERY_OR   },
+    { "AND",  3, 1, FTSQUERY_AND  },
+    { "NOT",  3, 1, FTSQUERY_NOT  },
+    { "NEAR", 4, 0, FTSQUERY_NEAR }
+  };
+  int ii;
+  int iCol;
+  int iColLen;
+  int rc;
+  Fts3Expr *pRet = 0;
+
+  const char *zInput = z;
+  int nInput = n;
+
+  pParse->isNot = 0;
+
+  /* Skip over any whitespace before checking for a keyword, an open or
+  ** close bracket, or a quoted string. 
+  */
+  while( nInput>0 && fts3isspace(*zInput) ){
+    nInput--;
+    zInput++;
+  }
+  if( nInput==0 ){
+    return SQLITE_DONE;
+  }
+
+  /* See if we are dealing with a keyword. */
+  for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
+    const struct Fts3Keyword *pKey = &aKeyword[ii];
+
+    if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
+      continue;
+    }
+
+    if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
+      int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
+      int nKey = pKey->n;
+      char cNext;
+
+      /* If this is a "NEAR" keyword, check for an explicit nearness. */
+      if( pKey->eType==FTSQUERY_NEAR ){
+        assert( nKey==4 );
+        if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
+          nNear = 0;
+          for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
+            nNear = nNear * 10 + (zInput[nKey] - '0');
+          }
+        }
+      }
+
+      /* At this point this is probably a keyword. But for that to be true,
+      ** the next byte must contain either whitespace, an open or close
+      ** parenthesis, a quote character, or EOF. 
+      */
+      cNext = zInput[nKey];
+      if( fts3isspace(cNext) 
+       || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
+      ){
+        pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
+        if( !pRet ){
+          return SQLITE_NOMEM;
+        }
+        pRet->eType = pKey->eType;
+        pRet->nNear = nNear;
+        *ppExpr = pRet;
+        *pnConsumed = (int)((zInput - z) + nKey);
+        return SQLITE_OK;
+      }
+
+      /* Turns out that wasn't a keyword after all. This happens if the
+      ** user has supplied a token such as "ORacle". Continue.
+      */
+    }
+  }
+
+  /* See if we are dealing with a quoted phrase. If this is the case, then
+  ** search for the closing quote and pass the whole string to getNextString()
+  ** for processing. This is easy to do, as fts3 has no syntax for escaping
+  ** a quote character embedded in a string.
+  */
+  if( *zInput=='"' ){
+    for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
+    *pnConsumed = (int)((zInput - z) + ii + 1);
+    if( ii==nInput ){
+      return SQLITE_ERROR;
+    }
+    return getNextString(pParse, &zInput[1], ii-1, ppExpr);
+  }
+
+  if( sqlite3_fts3_enable_parentheses ){
+    if( *zInput=='(' ){
+      int nConsumed = 0;
+      pParse->nNest++;
+      rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
+      if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; }
+      *pnConsumed = (int)(zInput - z) + 1 + nConsumed;
+      return rc;
+    }else if( *zInput==')' ){
+      pParse->nNest--;
+      *pnConsumed = (int)((zInput - z) + 1);
+      *ppExpr = 0;
+      return SQLITE_DONE;
+    }
+  }
+
+  /* If control flows to this point, this must be a regular token, or 
+  ** the end of the input. Read a regular token using the sqlite3_tokenizer
+  ** interface. Before doing so, figure out if there is an explicit
+  ** column specifier for the token. 
+  **
+  ** TODO: Strangely, it is not possible to associate a column specifier
+  ** with a quoted phrase, only with a single token. Not sure if this was
+  ** an implementation artifact or an intentional decision when fts3 was
+  ** first implemented. Whichever it was, this module duplicates the 
+  ** limitation.
+  */
+  iCol = pParse->iDefaultCol;
+  iColLen = 0;
+  for(ii=0; ii<pParse->nCol; ii++){
+    const char *zStr = pParse->azCol[ii];
+    int nStr = (int)strlen(zStr);
+    if( nInput>nStr && zInput[nStr]==':' 
+     && sqlite3_strnicmp(zStr, zInput, nStr)==0 
+    ){
+      iCol = ii;
+      iColLen = (int)((zInput - z) + nStr + 1);
+      break;
+    }
+  }
+  rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
+  *pnConsumed += iColLen;
+  return rc;
+}
+
+/*
+** The argument is an Fts3Expr structure for a binary operator (any type
+** except an FTSQUERY_PHRASE). Return an integer value representing the
+** precedence of the operator. Lower values have a higher precedence (i.e.
+** group more tightly). For example, in the C language, the == operator
+** groups more tightly than ||, and would therefore have a higher precedence.
+**
+** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
+** is defined), the order of the operators in precedence from highest to
+** lowest is:
+**
+**   NEAR
+**   NOT
+**   AND (including implicit ANDs)
+**   OR
+**
+** Note that when using the old query syntax, the OR operator has a higher
+** precedence than the AND operator.
+*/
+static int opPrecedence(Fts3Expr *p){
+  assert( p->eType!=FTSQUERY_PHRASE );
+  if( sqlite3_fts3_enable_parentheses ){
+    return p->eType;
+  }else if( p->eType==FTSQUERY_NEAR ){
+    return 1;
+  }else if( p->eType==FTSQUERY_OR ){
+    return 2;
+  }
+  assert( p->eType==FTSQUERY_AND );
+  return 3;
+}
+
+/*
+** Argument ppHead contains a pointer to the current head of a query 
+** expression tree being parsed. pPrev is the expression node most recently
+** inserted into the tree. This function adds pNew, which is always a binary
+** operator node, into the expression tree based on the relative precedence
+** of pNew and the existing nodes of the tree. This may result in the head
+** of the tree changing, in which case *ppHead is set to the new root node.
+*/
+static void insertBinaryOperator(
+  Fts3Expr **ppHead,       /* Pointer to the root node of a tree */
+  Fts3Expr *pPrev,         /* Node most recently inserted into the tree */
+  Fts3Expr *pNew           /* New binary node to insert into expression tree */
+){
+  Fts3Expr *pSplit = pPrev;
+  while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
+    pSplit = pSplit->pParent;
+  }
+
+  if( pSplit->pParent ){
+    assert( pSplit->pParent->pRight==pSplit );
+    pSplit->pParent->pRight = pNew;
+    pNew->pParent = pSplit->pParent;
+  }else{
+    *ppHead = pNew;
+  }
+  pNew->pLeft = pSplit;
+  pSplit->pParent = pNew;
+}
+
+/*
+** Parse the fts3 query expression found in buffer z, length n. This function
+** returns either when the end of the buffer is reached or an unmatched 
+** closing bracket - ')' - is encountered.
+**
+** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
+** parsed form of the expression and *pnConsumed is set to the number of
+** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
+** (out of memory error) or SQLITE_ERROR (parse error) is returned.
+*/
+static int fts3ExprParse(
+  ParseContext *pParse,                   /* fts3 query parse context */
+  const char *z, int n,                   /* Text of MATCH query */
+  Fts3Expr **ppExpr,                      /* OUT: Parsed query structure */
+  int *pnConsumed                         /* OUT: Number of bytes consumed */
+){
+  Fts3Expr *pRet = 0;
+  Fts3Expr *pPrev = 0;
+  Fts3Expr *pNotBranch = 0;               /* Only used in legacy parse mode */
+  int nIn = n;
+  const char *zIn = z;
+  int rc = SQLITE_OK;
+  int isRequirePhrase = 1;
+
+  while( rc==SQLITE_OK ){
+    Fts3Expr *p = 0;
+    int nByte = 0;
+
+    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
+    assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
+    if( rc==SQLITE_OK ){
+      if( p ){
+        int isPhrase;
+
+        if( !sqlite3_fts3_enable_parentheses 
+            && p->eType==FTSQUERY_PHRASE && pParse->isNot 
+        ){
+          /* Create an implicit NOT operator. */
+          Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
+          if( !pNot ){
+            sqlite3Fts3ExprFree(p);
+            rc = SQLITE_NOMEM;
+            goto exprparse_out;
+          }
+          pNot->eType = FTSQUERY_NOT;
+          pNot->pRight = p;
+          p->pParent = pNot;
+          if( pNotBranch ){
+            pNot->pLeft = pNotBranch;
+            pNotBranch->pParent = pNot;
+          }
+          pNotBranch = pNot;
+          p = pPrev;
+        }else{
+          int eType = p->eType;
+          isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
+
+          /* The isRequirePhrase variable is set to true if a phrase or
+          ** an expression contained in parenthesis is required. If a
+          ** binary operator (AND, OR, NOT or NEAR) is encounted when
+          ** isRequirePhrase is set, this is a syntax error.
+          */
+          if( !isPhrase && isRequirePhrase ){
+            sqlite3Fts3ExprFree(p);
+            rc = SQLITE_ERROR;
+            goto exprparse_out;
+          }
+
+          if( isPhrase && !isRequirePhrase ){
+            /* Insert an implicit AND operator. */
+            Fts3Expr *pAnd;
+            assert( pRet && pPrev );
+            pAnd = fts3MallocZero(sizeof(Fts3Expr));
+            if( !pAnd ){
+              sqlite3Fts3ExprFree(p);
+              rc = SQLITE_NOMEM;
+              goto exprparse_out;
+            }
+            pAnd->eType = FTSQUERY_AND;
+            insertBinaryOperator(&pRet, pPrev, pAnd);
+            pPrev = pAnd;
+          }
+
+          /* This test catches attempts to make either operand of a NEAR
+           ** operator something other than a phrase. For example, either of
+           ** the following:
+           **
+           **    (bracketed expression) NEAR phrase
+           **    phrase NEAR (bracketed expression)
+           **
+           ** Return an error in either case.
+           */
+          if( pPrev && (
+            (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
+         || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
+          )){
+            sqlite3Fts3ExprFree(p);
+            rc = SQLITE_ERROR;
+            goto exprparse_out;
+          }
+
+          if( isPhrase ){
+            if( pRet ){
+              assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
+              pPrev->pRight = p;
+              p->pParent = pPrev;
+            }else{
+              pRet = p;
+            }
+          }else{
+            insertBinaryOperator(&pRet, pPrev, p);
+          }
+          isRequirePhrase = !isPhrase;
+        }
+        pPrev = p;
+      }
+      assert( nByte>0 );
+    }
+    assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
+    nIn -= nByte;
+    zIn += nByte;
+  }
+
+  if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
+    rc = SQLITE_ERROR;
+  }
+
+  if( rc==SQLITE_DONE ){
+    rc = SQLITE_OK;
+    if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
+      if( !pRet ){
+        rc = SQLITE_ERROR;
+      }else{
+        Fts3Expr *pIter = pNotBranch;
+        while( pIter->pLeft ){
+          pIter = pIter->pLeft;
+        }
+        pIter->pLeft = pRet;
+        pRet->pParent = pIter;
+        pRet = pNotBranch;
+      }
+    }
+  }
+  *pnConsumed = n - nIn;
+
+exprparse_out:
+  if( rc!=SQLITE_OK ){
+    sqlite3Fts3ExprFree(pRet);
+    sqlite3Fts3ExprFree(pNotBranch);
+    pRet = 0;
+  }
+  *ppExpr = pRet;
+  return rc;
+}
+
+/*
+** Return SQLITE_ERROR if the maximum depth of the expression tree passed 
+** as the only argument is more than nMaxDepth.
+*/
+static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
+  int rc = SQLITE_OK;
+  if( p ){
+    if( nMaxDepth<0 ){ 
+      rc = SQLITE_TOOBIG;
+    }else{
+      rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
+      if( rc==SQLITE_OK ){
+        rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** This function attempts to transform the expression tree at (*pp) to
+** an equivalent but more balanced form. The tree is modified in place.
+** If successful, SQLITE_OK is returned and (*pp) set to point to the 
+** new root expression node. 
+**
+** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
+**
+** Otherwise, if an error occurs, an SQLite error code is returned and 
+** expression (*pp) freed.
+*/
+static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
+  int rc = SQLITE_OK;             /* Return code */
+  Fts3Expr *pRoot = *pp;          /* Initial root node */
+  Fts3Expr *pFree = 0;            /* List of free nodes. Linked by pParent. */
+  int eType = pRoot->eType;       /* Type of node in this tree */
+
+  if( nMaxDepth==0 ){
+    rc = SQLITE_ERROR;
+  }
+
+  if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
+    Fts3Expr **apLeaf;
+    apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth);
+    if( 0==apLeaf ){
+      rc = SQLITE_NOMEM;
+    }else{
+      memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
+    }
+
+    if( rc==SQLITE_OK ){
+      int i;
+      Fts3Expr *p;
+
+      /* Set $p to point to the left-most leaf in the tree of eType nodes. */
+      for(p=pRoot; p->eType==eType; p=p->pLeft){
+        assert( p->pParent==0 || p->pParent->pLeft==p );
+        assert( p->pLeft && p->pRight );
+      }
+
+      /* This loop runs once for each leaf in the tree of eType nodes. */
+      while( 1 ){
+        int iLvl;
+        Fts3Expr *pParent = p->pParent;     /* Current parent of p */
+
+        assert( pParent==0 || pParent->pLeft==p );
+        p->pParent = 0;
+        if( pParent ){
+          pParent->pLeft = 0;
+        }else{
+          pRoot = 0;
+        }
+        rc = fts3ExprBalance(&p, nMaxDepth-1);
+        if( rc!=SQLITE_OK ) break;
+
+        for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
+          if( apLeaf[iLvl]==0 ){
+            apLeaf[iLvl] = p;
+            p = 0;
+          }else{
+            assert( pFree );
+            pFree->pLeft = apLeaf[iLvl];
+            pFree->pRight = p;
+            pFree->pLeft->pParent = pFree;
+            pFree->pRight->pParent = pFree;
+
+            p = pFree;
+            pFree = pFree->pParent;
+            p->pParent = 0;
+            apLeaf[iLvl] = 0;
+          }
+        }
+        if( p ){
+          sqlite3Fts3ExprFree(p);
+          rc = SQLITE_TOOBIG;
+          break;
+        }
+
+        /* If that was the last leaf node, break out of the loop */
+        if( pParent==0 ) break;
+
+        /* Set $p to point to the next leaf in the tree of eType nodes */
+        for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
+
+        /* Remove pParent from the original tree. */
+        assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
+        pParent->pRight->pParent = pParent->pParent;
+        if( pParent->pParent ){
+          pParent->pParent->pLeft = pParent->pRight;
+        }else{
+          assert( pParent==pRoot );
+          pRoot = pParent->pRight;
+        }
+
+        /* Link pParent into the free node list. It will be used as an
+        ** internal node of the new tree.  */
+        pParent->pParent = pFree;
+        pFree = pParent;
+      }
+
+      if( rc==SQLITE_OK ){
+        p = 0;
+        for(i=0; i<nMaxDepth; i++){
+          if( apLeaf[i] ){
+            if( p==0 ){
+              p = apLeaf[i];
+              p->pParent = 0;
+            }else{
+              assert( pFree!=0 );
+              pFree->pRight = p;
+              pFree->pLeft = apLeaf[i];
+              pFree->pLeft->pParent = pFree;
+              pFree->pRight->pParent = pFree;
+
+              p = pFree;
+              pFree = pFree->pParent;
+              p->pParent = 0;
+            }
+          }
+        }
+        pRoot = p;
+      }else{
+        /* An error occurred. Delete the contents of the apLeaf[] array 
+        ** and pFree list. Everything else is cleaned up by the call to
+        ** sqlite3Fts3ExprFree(pRoot) below.  */
+        Fts3Expr *pDel;
+        for(i=0; i<nMaxDepth; i++){
+          sqlite3Fts3ExprFree(apLeaf[i]);
+        }
+        while( (pDel=pFree)!=0 ){
+          pFree = pDel->pParent;
+          sqlite3_free(pDel);
+        }
+      }
+
+      assert( pFree==0 );
+      sqlite3_free( apLeaf );
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    sqlite3Fts3ExprFree(pRoot);
+    pRoot = 0;
+  }
+  *pp = pRoot;
+  return rc;
+}
+
+/*
+** This function is similar to sqlite3Fts3ExprParse(), with the following
+** differences:
+**
+**   1. It does not do expression rebalancing.
+**   2. It does not check that the expression does not exceed the 
+**      maximum allowable depth.
+**   3. Even if it fails, *ppExpr may still be set to point to an 
+**      expression tree. It should be deleted using sqlite3Fts3ExprFree()
+**      in this case.
+*/
+static int fts3ExprParseUnbalanced(
+  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
+  int iLangid,                        /* Language id for tokenizer */
+  char **azCol,                       /* Array of column names for fts3 table */
+  int bFts4,                          /* True to allow FTS4-only syntax */
+  int nCol,                           /* Number of entries in azCol[] */
+  int iDefaultCol,                    /* Default column to query */
+  const char *z, int n,               /* Text of MATCH query */
+  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
+){
+  int nParsed;
+  int rc;
+  ParseContext sParse;
+
+  memset(&sParse, 0, sizeof(ParseContext));
+  sParse.pTokenizer = pTokenizer;
+  sParse.iLangid = iLangid;
+  sParse.azCol = (const char **)azCol;
+  sParse.nCol = nCol;
+  sParse.iDefaultCol = iDefaultCol;
+  sParse.bFts4 = bFts4;
+  if( z==0 ){
+    *ppExpr = 0;
+    return SQLITE_OK;
+  }
+  if( n<0 ){
+    n = (int)strlen(z);
+  }
+  rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
+  assert( rc==SQLITE_OK || *ppExpr==0 );
+
+  /* Check for mismatched parenthesis */
+  if( rc==SQLITE_OK && sParse.nNest ){
+    rc = SQLITE_ERROR;
+  }
+  
+  return rc;
+}
+
+/*
+** Parameters z and n contain a pointer to and length of a buffer containing
+** an fts3 query expression, respectively. This function attempts to parse the
+** query expression and create a tree of Fts3Expr structures representing the
+** parsed expression. If successful, *ppExpr is set to point to the head
+** of the parsed expression tree and SQLITE_OK is returned. If an error
+** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
+** error) is returned and *ppExpr is set to 0.
+**
+** If parameter n is a negative number, then z is assumed to point to a
+** nul-terminated string and the length is determined using strlen().
+**
+** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
+** use to normalize query tokens while parsing the expression. The azCol[]
+** array, which is assumed to contain nCol entries, should contain the names
+** of each column in the target fts3 table, in order from left to right. 
+** Column names must be nul-terminated strings.
+**
+** The iDefaultCol parameter should be passed the index of the table column
+** that appears on the left-hand-side of the MATCH operator (the default
+** column to match against for tokens for which a column name is not explicitly
+** specified as part of the query string), or -1 if tokens may by default
+** match any table column.
+*/
+SQLITE_PRIVATE int sqlite3Fts3ExprParse(
+  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
+  int iLangid,                        /* Language id for tokenizer */
+  char **azCol,                       /* Array of column names for fts3 table */
+  int bFts4,                          /* True to allow FTS4-only syntax */
+  int nCol,                           /* Number of entries in azCol[] */
+  int iDefaultCol,                    /* Default column to query */
+  const char *z, int n,               /* Text of MATCH query */
+  Fts3Expr **ppExpr,                  /* OUT: Parsed query structure */
+  char **pzErr                        /* OUT: Error message (sqlite3_malloc) */
+){
+  int rc = fts3ExprParseUnbalanced(
+      pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
+  );
+  
+  /* Rebalance the expression. And check that its depth does not exceed
+  ** SQLITE_FTS3_MAX_EXPR_DEPTH.  */
+  if( rc==SQLITE_OK && *ppExpr ){
+    rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
+    if( rc==SQLITE_OK ){
+      rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    sqlite3Fts3ExprFree(*ppExpr);
+    *ppExpr = 0;
+    if( rc==SQLITE_TOOBIG ){
+      *pzErr = sqlite3_mprintf(
+          "FTS expression tree is too large (maximum depth %d)", 
+          SQLITE_FTS3_MAX_EXPR_DEPTH
+      );
+      rc = SQLITE_ERROR;
+    }else if( rc==SQLITE_ERROR ){
+      *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z);
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Free a single node of an expression tree.
+*/
+static void fts3FreeExprNode(Fts3Expr *p){
+  assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
+  sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
+  sqlite3_free(p->aMI);
+  sqlite3_free(p);
+}
+
+/*
+** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
+**
+** This function would be simpler if it recursively called itself. But
+** that would mean passing a sufficiently large expression to ExprParse()
+** could cause a stack overflow.
+*/
+SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){
+  Fts3Expr *p;
+  assert( pDel==0 || pDel->pParent==0 );
+  for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
+    assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
+  }
+  while( p ){
+    Fts3Expr *pParent = p->pParent;
+    fts3FreeExprNode(p);
+    if( pParent && p==pParent->pLeft && pParent->pRight ){
+      p = pParent->pRight;
+      while( p && (p->pLeft || p->pRight) ){
+        assert( p==p->pParent->pRight || p==p->pParent->pLeft );
+        p = (p->pLeft ? p->pLeft : p->pRight);
+      }
+    }else{
+      p = pParent;
+    }
+  }
+}
+
+/****************************************************************************
+*****************************************************************************
+** Everything after this point is just test code.
+*/
+
+#ifdef SQLITE_TEST
+
+/* #include <stdio.h> */
+
+/*
+** Function to query the hash-table of tokenizers (see README.tokenizers).
+*/
+static int queryTestTokenizer(
+  sqlite3 *db, 
+  const char *zName,  
+  const sqlite3_tokenizer_module **pp
+){
+  int rc;
+  sqlite3_stmt *pStmt;
+  const char zSql[] = "SELECT fts3_tokenizer(?)";
+
+  *pp = 0;
+  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+  if( SQLITE_ROW==sqlite3_step(pStmt) ){
+    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
+      memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
+    }
+  }
+
+  return sqlite3_finalize(pStmt);
+}
+
+/*
+** Return a pointer to a buffer containing a text representation of the
+** expression passed as the first argument. The buffer is obtained from
+** sqlite3_malloc(). It is the responsibility of the caller to use 
+** sqlite3_free() to release the memory. If an OOM condition is encountered,
+** NULL is returned.
+**
+** If the second argument is not NULL, then its contents are prepended to 
+** the returned expression text and then freed using sqlite3_free().
+*/
+static char *exprToString(Fts3Expr *pExpr, char *zBuf){
+  if( pExpr==0 ){
+    return sqlite3_mprintf("");
+  }
+  switch( pExpr->eType ){
+    case FTSQUERY_PHRASE: {
+      Fts3Phrase *pPhrase = pExpr->pPhrase;
+      int i;
+      zBuf = sqlite3_mprintf(
+          "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
+      for(i=0; zBuf && i<pPhrase->nToken; i++){
+        zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, 
+            pPhrase->aToken[i].n, pPhrase->aToken[i].z,
+            (pPhrase->aToken[i].isPrefix?"+":"")
+        );
+      }
+      return zBuf;
+    }
+
+    case FTSQUERY_NEAR:
+      zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
+      break;
+    case FTSQUERY_NOT:
+      zBuf = sqlite3_mprintf("%zNOT ", zBuf);
+      break;
+    case FTSQUERY_AND:
+      zBuf = sqlite3_mprintf("%zAND ", zBuf);
+      break;
+    case FTSQUERY_OR:
+      zBuf = sqlite3_mprintf("%zOR ", zBuf);
+      break;
+  }
+
+  if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
+  if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
+  if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
+
+  if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
+  if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
+
+  return zBuf;
+}
+
+/*
+** This is the implementation of a scalar SQL function used to test the 
+** expression parser. It should be called as follows:
+**
+**   fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
+**
+** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
+** to parse the query expression (see README.tokenizers). The second argument
+** is the query expression to parse. Each subsequent argument is the name
+** of a column of the fts3 table that the query expression may refer to.
+** For example:
+**
+**   SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
+*/
+static void fts3ExprTest(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  sqlite3_tokenizer_module const *pModule = 0;
+  sqlite3_tokenizer *pTokenizer = 0;
+  int rc;
+  char **azCol = 0;
+  const char *zExpr;
+  int nExpr;
+  int nCol;
+  int ii;
+  Fts3Expr *pExpr;
+  char *zBuf = 0;
+  sqlite3 *db = sqlite3_context_db_handle(context);
+
+  if( argc<3 ){
+    sqlite3_result_error(context, 
+        "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
+    );
+    return;
+  }
+
+  rc = queryTestTokenizer(db,
+                          (const char *)sqlite3_value_text(argv[0]), &pModule);
+  if( rc==SQLITE_NOMEM ){
+    sqlite3_result_error_nomem(context);
+    goto exprtest_out;
+  }else if( !pModule ){
+    sqlite3_result_error(context, "No such tokenizer module", -1);
+    goto exprtest_out;
+  }
+
+  rc = pModule->xCreate(0, 0, &pTokenizer);
+  assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
+  if( rc==SQLITE_NOMEM ){
+    sqlite3_result_error_nomem(context);
+    goto exprtest_out;
+  }
+  pTokenizer->pModule = pModule;
+
+  zExpr = (const char *)sqlite3_value_text(argv[1]);
+  nExpr = sqlite3_value_bytes(argv[1]);
+  nCol = argc-2;
+  azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
+  if( !azCol ){
+    sqlite3_result_error_nomem(context);
+    goto exprtest_out;
+  }
+  for(ii=0; ii<nCol; ii++){
+    azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
+  }
+
+  if( sqlite3_user_data(context) ){
+    char *zDummy = 0;
+    rc = sqlite3Fts3ExprParse(
+        pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy
+    );
+    assert( rc==SQLITE_OK || pExpr==0 );
+    sqlite3_free(zDummy);
+  }else{
+    rc = fts3ExprParseUnbalanced(
+        pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
+    );
+  }
+
+  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
+    sqlite3Fts3ExprFree(pExpr);
+    sqlite3_result_error(context, "Error parsing expression", -1);
+  }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
+    sqlite3_result_error_nomem(context);
+  }else{
+    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+    sqlite3_free(zBuf);
+  }
+
+  sqlite3Fts3ExprFree(pExpr);
+
+exprtest_out:
+  if( pModule && pTokenizer ){
+    rc = pModule->xDestroy(pTokenizer);
+  }
+  sqlite3_free(azCol);
+}
+
+/*
+** Register the query expression parser test function fts3_exprtest() 
+** with database connection db. 
+*/
+SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
+  int rc = sqlite3_create_function(
+      db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
+  );
+  if( rc==SQLITE_OK ){
+    rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", 
+        -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0
+    );
+  }
+  return rc;
+}
+
+#endif
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_expr.c *******************************************/
+/************** Begin file fts3_hash.c ***************************************/
+/*
+** 2001 September 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This is the implementation of generic hash-tables used in SQLite.
+** We've modified it slightly to serve as a standalone hash table
+** implementation for the full-text indexing module.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <string.h> */
+
+
+/*
+** Malloc and Free functions
+*/
+static void *fts3HashMalloc(int n){
+  void *p = sqlite3_malloc(n);
+  if( p ){
+    memset(p, 0, n);
+  }
+  return p;
+}
+static void fts3HashFree(void *p){
+  sqlite3_free(p);
+}
+
+/* Turn bulk memory into a hash table object by initializing the
+** fields of the Hash structure.
+**
+** "pNew" is a pointer to the hash table that is to be initialized.
+** keyClass is one of the constants 
+** FTS3_HASH_BINARY or FTS3_HASH_STRING.  The value of keyClass 
+** determines what kind of key the hash table will use.  "copyKey" is
+** true if the hash table should make its own private copy of keys and
+** false if it should just use the supplied pointer.
+*/
+SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){
+  assert( pNew!=0 );
+  assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
+  pNew->keyClass = keyClass;
+  pNew->copyKey = copyKey;
+  pNew->first = 0;
+  pNew->count = 0;
+  pNew->htsize = 0;
+  pNew->ht = 0;
+}
+
+/* Remove all entries from a hash table.  Reclaim all memory.
+** Call this routine to delete a hash table or to reset a hash table
+** to the empty state.
+*/
+SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash *pH){
+  Fts3HashElem *elem;         /* For looping over all elements of the table */
+
+  assert( pH!=0 );
+  elem = pH->first;
+  pH->first = 0;
+  fts3HashFree(pH->ht);
+  pH->ht = 0;
+  pH->htsize = 0;
+  while( elem ){
+    Fts3HashElem *next_elem = elem->next;
+    if( pH->copyKey && elem->pKey ){
+      fts3HashFree(elem->pKey);
+    }
+    fts3HashFree(elem);
+    elem = next_elem;
+  }
+  pH->count = 0;
+}
+
+/*
+** Hash and comparison functions when the mode is FTS3_HASH_STRING
+*/
+static int fts3StrHash(const void *pKey, int nKey){
+  const char *z = (const char *)pKey;
+  unsigned h = 0;
+  if( nKey<=0 ) nKey = (int) strlen(z);
+  while( nKey > 0  ){
+    h = (h<<3) ^ h ^ *z++;
+    nKey--;
+  }
+  return (int)(h & 0x7fffffff);
+}
+static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
+  if( n1!=n2 ) return 1;
+  return strncmp((const char*)pKey1,(const char*)pKey2,n1);
+}
+
+/*
+** Hash and comparison functions when the mode is FTS3_HASH_BINARY
+*/
+static int fts3BinHash(const void *pKey, int nKey){
+  int h = 0;
+  const char *z = (const char *)pKey;
+  while( nKey-- > 0 ){
+    h = (h<<3) ^ h ^ *(z++);
+  }
+  return h & 0x7fffffff;
+}
+static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){
+  if( n1!=n2 ) return 1;
+  return memcmp(pKey1,pKey2,n1);
+}
+
+/*
+** Return a pointer to the appropriate hash function given the key class.
+**
+** The C syntax in this function definition may be unfamilar to some 
+** programmers, so we provide the following additional explanation:
+**
+** The name of the function is "ftsHashFunction".  The function takes a
+** single parameter "keyClass".  The return value of ftsHashFunction()
+** is a pointer to another function.  Specifically, the return value
+** of ftsHashFunction() is a pointer to a function that takes two parameters
+** with types "const void*" and "int" and returns an "int".
+*/
+static int (*ftsHashFunction(int keyClass))(const void*,int){
+  if( keyClass==FTS3_HASH_STRING ){
+    return &fts3StrHash;
+  }else{
+    assert( keyClass==FTS3_HASH_BINARY );
+    return &fts3BinHash;
+  }
+}
+
+/*
+** Return a pointer to the appropriate hash function given the key class.
+**
+** For help in interpreted the obscure C code in the function definition,
+** see the header comment on the previous function.
+*/
+static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
+  if( keyClass==FTS3_HASH_STRING ){
+    return &fts3StrCompare;
+  }else{
+    assert( keyClass==FTS3_HASH_BINARY );
+    return &fts3BinCompare;
+  }
+}
+
+/* Link an element into the hash table
+*/
+static void fts3HashInsertElement(
+  Fts3Hash *pH,            /* The complete hash table */
+  struct _fts3ht *pEntry,  /* The entry into which pNew is inserted */
+  Fts3HashElem *pNew       /* The element to be inserted */
+){
+  Fts3HashElem *pHead;     /* First element already in pEntry */
+  pHead = pEntry->chain;
+  if( pHead ){
+    pNew->next = pHead;
+    pNew->prev = pHead->prev;
+    if( pHead->prev ){ pHead->prev->next = pNew; }
+    else             { pH->first = pNew; }
+    pHead->prev = pNew;
+  }else{
+    pNew->next = pH->first;
+    if( pH->first ){ pH->first->prev = pNew; }
+    pNew->prev = 0;
+    pH->first = pNew;
+  }
+  pEntry->count++;
+  pEntry->chain = pNew;
+}
+
+
+/* Resize the hash table so that it cantains "new_size" buckets.
+** "new_size" must be a power of 2.  The hash table might fail 
+** to resize if sqliteMalloc() fails.
+**
+** Return non-zero if a memory allocation error occurs.
+*/
+static int fts3Rehash(Fts3Hash *pH, int new_size){
+  struct _fts3ht *new_ht;          /* The new hash table */
+  Fts3HashElem *elem, *next_elem;  /* For looping over existing elements */
+  int (*xHash)(const void*,int);   /* The hash function */
+
+  assert( (new_size & (new_size-1))==0 );
+  new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
+  if( new_ht==0 ) return 1;
+  fts3HashFree(pH->ht);
+  pH->ht = new_ht;
+  pH->htsize = new_size;
+  xHash = ftsHashFunction(pH->keyClass);
+  for(elem=pH->first, pH->first=0; elem; elem = next_elem){
+    int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
+    next_elem = elem->next;
+    fts3HashInsertElement(pH, &new_ht[h], elem);
+  }
+  return 0;
+}
+
+/* This function (for internal use only) locates an element in an
+** hash table that matches the given key.  The hash for this key has
+** already been computed and is passed as the 4th parameter.
+*/
+static Fts3HashElem *fts3FindElementByHash(
+  const Fts3Hash *pH, /* The pH to be searched */
+  const void *pKey,   /* The key we are searching for */
+  int nKey,
+  int h               /* The hash for this key. */
+){
+  Fts3HashElem *elem;            /* Used to loop thru the element list */
+  int count;                     /* Number of elements left to test */
+  int (*xCompare)(const void*,int,const void*,int);  /* comparison function */
+
+  if( pH->ht ){
+    struct _fts3ht *pEntry = &pH->ht[h];
+    elem = pEntry->chain;
+    count = pEntry->count;
+    xCompare = ftsCompareFunction(pH->keyClass);
+    while( count-- && elem ){
+      if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ 
+        return elem;
+      }
+      elem = elem->next;
+    }
+  }
+  return 0;
+}
+
+/* Remove a single entry from the hash table given a pointer to that
+** element and a hash on the element's key.
+*/
+static void fts3RemoveElementByHash(
+  Fts3Hash *pH,         /* The pH containing "elem" */
+  Fts3HashElem* elem,   /* The element to be removed from the pH */
+  int h                 /* Hash value for the element */
+){
+  struct _fts3ht *pEntry;
+  if( elem->prev ){
+    elem->prev->next = elem->next; 
+  }else{
+    pH->first = elem->next;
+  }
+  if( elem->next ){
+    elem->next->prev = elem->prev;
+  }
+  pEntry = &pH->ht[h];
+  if( pEntry->chain==elem ){
+    pEntry->chain = elem->next;
+  }
+  pEntry->count--;
+  if( pEntry->count<=0 ){
+    pEntry->chain = 0;
+  }
+  if( pH->copyKey && elem->pKey ){
+    fts3HashFree(elem->pKey);
+  }
+  fts3HashFree( elem );
+  pH->count--;
+  if( pH->count<=0 ){
+    assert( pH->first==0 );
+    assert( pH->count==0 );
+    fts3HashClear(pH);
+  }
+}
+
+SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem(
+  const Fts3Hash *pH, 
+  const void *pKey, 
+  int nKey
+){
+  int h;                          /* A hash on key */
+  int (*xHash)(const void*,int);  /* The hash function */
+
+  if( pH==0 || pH->ht==0 ) return 0;
+  xHash = ftsHashFunction(pH->keyClass);
+  assert( xHash!=0 );
+  h = (*xHash)(pKey,nKey);
+  assert( (pH->htsize & (pH->htsize-1))==0 );
+  return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
+}
+
+/* 
+** Attempt to locate an element of the hash table pH with a key
+** that matches pKey,nKey.  Return the data for this element if it is
+** found, or NULL if there is no match.
+*/
+SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
+  Fts3HashElem *pElem;            /* The element that matches key (if any) */
+
+  pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey);
+  return pElem ? pElem->data : 0;
+}
+
+/* Insert an element into the hash table pH.  The key is pKey,nKey
+** and the data is "data".
+**
+** If no element exists with a matching key, then a new
+** element is created.  A copy of the key is made if the copyKey
+** flag is set.  NULL is returned.
+**
+** If another element already exists with the same key, then the
+** new data replaces the old data and the old data is returned.
+** The key is not copied in this instance.  If a malloc fails, then
+** the new data is returned and the hash table is unchanged.
+**
+** If the "data" parameter to this function is NULL, then the
+** element corresponding to "key" is removed from the hash table.
+*/
+SQLITE_PRIVATE void *sqlite3Fts3HashInsert(
+  Fts3Hash *pH,        /* The hash table to insert into */
+  const void *pKey,    /* The key */
+  int nKey,            /* Number of bytes in the key */
+  void *data           /* The data */
+){
+  int hraw;                 /* Raw hash value of the key */
+  int h;                    /* the hash of the key modulo hash table size */
+  Fts3HashElem *elem;       /* Used to loop thru the element list */
+  Fts3HashElem *new_elem;   /* New element added to the pH */
+  int (*xHash)(const void*,int);  /* The hash function */
+
+  assert( pH!=0 );
+  xHash = ftsHashFunction(pH->keyClass);
+  assert( xHash!=0 );
+  hraw = (*xHash)(pKey, nKey);
+  assert( (pH->htsize & (pH->htsize-1))==0 );
+  h = hraw & (pH->htsize-1);
+  elem = fts3FindElementByHash(pH,pKey,nKey,h);
+  if( elem ){
+    void *old_data = elem->data;
+    if( data==0 ){
+      fts3RemoveElementByHash(pH,elem,h);
+    }else{
+      elem->data = data;
+    }
+    return old_data;
+  }
+  if( data==0 ) return 0;
+  if( (pH->htsize==0 && fts3Rehash(pH,8))
+   || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2))
+  ){
+    pH->count = 0;
+    return data;
+  }
+  assert( pH->htsize>0 );
+  new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) );
+  if( new_elem==0 ) return data;
+  if( pH->copyKey && pKey!=0 ){
+    new_elem->pKey = fts3HashMalloc( nKey );
+    if( new_elem->pKey==0 ){
+      fts3HashFree(new_elem);
+      return data;
+    }
+    memcpy((void*)new_elem->pKey, pKey, nKey);
+  }else{
+    new_elem->pKey = (void*)pKey;
+  }
+  new_elem->nKey = nKey;
+  pH->count++;
+  assert( pH->htsize>0 );
+  assert( (pH->htsize & (pH->htsize-1))==0 );
+  h = hraw & (pH->htsize-1);
+  fts3HashInsertElement(pH, &pH->ht[h], new_elem);
+  new_elem->data = data;
+  return 0;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_hash.c *******************************************/
+/************** Begin file fts3_porter.c *************************************/
+/*
+** 2006 September 30
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Implementation of the full-text-search tokenizer that implements
+** a Porter stemmer.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <stdio.h> */
+/* #include <string.h> */
+
+
+/*
+** Class derived from sqlite3_tokenizer
+*/
+typedef struct porter_tokenizer {
+  sqlite3_tokenizer base;      /* Base class */
+} porter_tokenizer;
+
+/*
+** Class derived from sqlite3_tokenizer_cursor
+*/
+typedef struct porter_tokenizer_cursor {
+  sqlite3_tokenizer_cursor base;
+  const char *zInput;          /* input we are tokenizing */
+  int nInput;                  /* size of the input */
+  int iOffset;                 /* current position in zInput */
+  int iToken;                  /* index of next token to be returned */
+  char *zToken;                /* storage for current token */
+  int nAllocated;              /* space allocated to zToken buffer */
+} porter_tokenizer_cursor;
+
+
+/*
+** Create a new tokenizer instance.
+*/
+static int porterCreate(
+  int argc, const char * const *argv,
+  sqlite3_tokenizer **ppTokenizer
+){
+  porter_tokenizer *t;
+
+  UNUSED_PARAMETER(argc);
+  UNUSED_PARAMETER(argv);
+
+  t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
+  if( t==NULL ) return SQLITE_NOMEM;
+  memset(t, 0, sizeof(*t));
+  *ppTokenizer = &t->base;
+  return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int porterDestroy(sqlite3_tokenizer *pTokenizer){
+  sqlite3_free(pTokenizer);
+  return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is zInput[0..nInput-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int porterOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *zInput, int nInput,        /* String to be tokenized */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  porter_tokenizer_cursor *c;
+
+  UNUSED_PARAMETER(pTokenizer);
+
+  c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
+  if( c==NULL ) return SQLITE_NOMEM;
+
+  c->zInput = zInput;
+  if( zInput==0 ){
+    c->nInput = 0;
+  }else if( nInput<0 ){
+    c->nInput = (int)strlen(zInput);
+  }else{
+    c->nInput = nInput;
+  }
+  c->iOffset = 0;                 /* start tokenizing at the beginning */
+  c->iToken = 0;
+  c->zToken = NULL;               /* no space allocated, yet. */
+  c->nAllocated = 0;
+
+  *ppCursor = &c->base;
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** porterOpen() above.
+*/
+static int porterClose(sqlite3_tokenizer_cursor *pCursor){
+  porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+  sqlite3_free(c->zToken);
+  sqlite3_free(c);
+  return SQLITE_OK;
+}
+/*
+** Vowel or consonant
+*/
+static const char cType[] = {
+   0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
+   1, 1, 1, 2, 1
+};
+
+/*
+** isConsonant() and isVowel() determine if their first character in
+** the string they point to is a consonant or a vowel, according
+** to Porter ruls.  
+**
+** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
+** 'Y' is a consonant unless it follows another consonant,
+** in which case it is a vowel.
+**
+** In these routine, the letters are in reverse order.  So the 'y' rule
+** is that 'y' is a consonant unless it is followed by another
+** consonent.
+*/
+static int isVowel(const char*);
+static int isConsonant(const char *z){
+  int j;
+  char x = *z;
+  if( x==0 ) return 0;
+  assert( x>='a' && x<='z' );
+  j = cType[x-'a'];
+  if( j<2 ) return j;
+  return z[1]==0 || isVowel(z + 1);
+}
+static int isVowel(const char *z){
+  int j;
+  char x = *z;
+  if( x==0 ) return 0;
+  assert( x>='a' && x<='z' );
+  j = cType[x-'a'];
+  if( j<2 ) return 1-j;
+  return isConsonant(z + 1);
+}
+
+/*
+** Let any sequence of one or more vowels be represented by V and let
+** C be sequence of one or more consonants.  Then every word can be
+** represented as:
+**
+**           [C] (VC){m} [V]
+**
+** In prose:  A word is an optional consonant followed by zero or
+** vowel-consonant pairs followed by an optional vowel.  "m" is the
+** number of vowel consonant pairs.  This routine computes the value
+** of m for the first i bytes of a word.
+**
+** Return true if the m-value for z is 1 or more.  In other words,
+** return true if z contains at least one vowel that is followed
+** by a consonant.
+**
+** In this routine z[] is in reverse order.  So we are really looking
+** for an instance of a consonant followed by a vowel.
+*/
+static int m_gt_0(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/* Like mgt0 above except we are looking for a value of m which is
+** exactly 1
+*/
+static int m_eq_1(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 1;
+  while( isConsonant(z) ){ z++; }
+  return *z==0;
+}
+
+/* Like mgt0 above except we are looking for a value of m>1 instead
+** or m>0
+*/
+static int m_gt_1(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/*
+** Return TRUE if there is a vowel anywhere within z[0..n-1]
+*/
+static int hasVowel(const char *z){
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/*
+** Return TRUE if the word ends in a double consonant.
+**
+** The text is reversed here. So we are really looking at
+** the first two characters of z[].
+*/
+static int doubleConsonant(const char *z){
+  return isConsonant(z) && z[0]==z[1];
+}
+
+/*
+** Return TRUE if the word ends with three letters which
+** are consonant-vowel-consonent and where the final consonant
+** is not 'w', 'x', or 'y'.
+**
+** The word is reversed here.  So we are really checking the
+** first three letters and the first one cannot be in [wxy].
+*/
+static int star_oh(const char *z){
+  return
+    isConsonant(z) &&
+    z[0]!='w' && z[0]!='x' && z[0]!='y' &&
+    isVowel(z+1) &&
+    isConsonant(z+2);
+}
+
+/*
+** If the word ends with zFrom and xCond() is true for the stem
+** of the word that preceeds the zFrom ending, then change the 
+** ending to zTo.
+**
+** The input word *pz and zFrom are both in reverse order.  zTo
+** is in normal order. 
+**
+** Return TRUE if zFrom matches.  Return FALSE if zFrom does not
+** match.  Not that TRUE is returned even if xCond() fails and
+** no substitution occurs.
+*/
+static int stem(
+  char **pz,             /* The word being stemmed (Reversed) */
+  const char *zFrom,     /* If the ending matches this... (Reversed) */
+  const char *zTo,       /* ... change the ending to this (not reversed) */
+  int (*xCond)(const char*)   /* Condition that must be true */
+){
+  char *z = *pz;
+  while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
+  if( *zFrom!=0 ) return 0;
+  if( xCond && !xCond(z) ) return 1;
+  while( *zTo ){
+    *(--z) = *(zTo++);
+  }
+  *pz = z;
+  return 1;
+}
+
+/*
+** This is the fallback stemmer used when the porter stemmer is
+** inappropriate.  The input word is copied into the output with
+** US-ASCII case folding.  If the input word is too long (more
+** than 20 bytes if it contains no digits or more than 6 bytes if
+** it contains digits) then word is truncated to 20 or 6 bytes
+** by taking 10 or 3 bytes from the beginning and end.
+*/
+static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
+  int i, mx, j;
+  int hasDigit = 0;
+  for(i=0; i<nIn; i++){
+    char c = zIn[i];
+    if( c>='A' && c<='Z' ){
+      zOut[i] = c - 'A' + 'a';
+    }else{
+      if( c>='0' && c<='9' ) hasDigit = 1;
+      zOut[i] = c;
+    }
+  }
+  mx = hasDigit ? 3 : 10;
+  if( nIn>mx*2 ){
+    for(j=mx, i=nIn-mx; i<nIn; i++, j++){
+      zOut[j] = zOut[i];
+    }
+    i = j;
+  }
+  zOut[i] = 0;
+  *pnOut = i;
+}
+
+
+/*
+** Stem the input word zIn[0..nIn-1].  Store the output in zOut.
+** zOut is at least big enough to hold nIn bytes.  Write the actual
+** size of the output word (exclusive of the '\0' terminator) into *pnOut.
+**
+** Any upper-case characters in the US-ASCII character set ([A-Z])
+** are converted to lower case.  Upper-case UTF characters are
+** unchanged.
+**
+** Words that are longer than about 20 bytes are stemmed by retaining
+** a few bytes from the beginning and the end of the word.  If the
+** word contains digits, 3 bytes are taken from the beginning and
+** 3 bytes from the end.  For long words without digits, 10 bytes
+** are taken from each end.  US-ASCII case folding still applies.
+** 
+** If the input word contains not digits but does characters not 
+** in [a-zA-Z] then no stemming is attempted and this routine just 
+** copies the input into the input into the output with US-ASCII
+** case folding.
+**
+** Stemming never increases the length of the word.  So there is
+** no chance of overflowing the zOut buffer.
+*/
+static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
+  int i, j;
+  char zReverse[28];
+  char *z, *z2;
+  if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){
+    /* The word is too big or too small for the porter stemmer.
+    ** Fallback to the copy stemmer */
+    copy_stemmer(zIn, nIn, zOut, pnOut);
+    return;
+  }
+  for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
+    char c = zIn[i];
+    if( c>='A' && c<='Z' ){
+      zReverse[j] = c + 'a' - 'A';
+    }else if( c>='a' && c<='z' ){
+      zReverse[j] = c;
+    }else{
+      /* The use of a character not in [a-zA-Z] means that we fallback
+      ** to the copy stemmer */
+      copy_stemmer(zIn, nIn, zOut, pnOut);
+      return;
+    }
+  }
+  memset(&zReverse[sizeof(zReverse)-5], 0, 5);
+  z = &zReverse[j+1];
+
+
+  /* Step 1a */
+  if( z[0]=='s' ){
+    if(
+     !stem(&z, "sess", "ss", 0) &&
+     !stem(&z, "sei", "i", 0)  &&
+     !stem(&z, "ss", "ss", 0)
+    ){
+      z++;
+    }
+  }
+
+  /* Step 1b */  
+  z2 = z;
+  if( stem(&z, "dee", "ee", m_gt_0) ){
+    /* Do nothing.  The work was all in the test */
+  }else if( 
+     (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
+      && z!=z2
+  ){
+     if( stem(&z, "ta", "ate", 0) ||
+         stem(&z, "lb", "ble", 0) ||
+         stem(&z, "zi", "ize", 0) ){
+       /* Do nothing.  The work was all in the test */
+     }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
+       z++;
+     }else if( m_eq_1(z) && star_oh(z) ){
+       *(--z) = 'e';
+     }
+  }
+
+  /* Step 1c */
+  if( z[0]=='y' && hasVowel(z+1) ){
+    z[0] = 'i';
+  }
+
+  /* Step 2 */
+  switch( z[1] ){
+   case 'a':
+     if( !stem(&z, "lanoita", "ate", m_gt_0) ){
+       stem(&z, "lanoit", "tion", m_gt_0);
+     }
+     break;
+   case 'c':
+     if( !stem(&z, "icne", "ence", m_gt_0) ){
+       stem(&z, "icna", "ance", m_gt_0);
+     }
+     break;
+   case 'e':
+     stem(&z, "rezi", "ize", m_gt_0);
+     break;
+   case 'g':
+     stem(&z, "igol", "log", m_gt_0);
+     break;
+   case 'l':
+     if( !stem(&z, "ilb", "ble", m_gt_0) 
+      && !stem(&z, "illa", "al", m_gt_0)
+      && !stem(&z, "iltne", "ent", m_gt_0)
+      && !stem(&z, "ile", "e", m_gt_0)
+     ){
+       stem(&z, "ilsuo", "ous", m_gt_0);
+     }
+     break;
+   case 'o':
+     if( !stem(&z, "noitazi", "ize", m_gt_0)
+      && !stem(&z, "noita", "ate", m_gt_0)
+     ){
+       stem(&z, "rota", "ate", m_gt_0);
+     }
+     break;
+   case 's':
+     if( !stem(&z, "msila", "al", m_gt_0)
+      && !stem(&z, "ssenevi", "ive", m_gt_0)
+      && !stem(&z, "ssenluf", "ful", m_gt_0)
+     ){
+       stem(&z, "ssensuo", "ous", m_gt_0);
+     }
+     break;
+   case 't':
+     if( !stem(&z, "itila", "al", m_gt_0)
+      && !stem(&z, "itivi", "ive", m_gt_0)
+     ){
+       stem(&z, "itilib", "ble", m_gt_0);
+     }
+     break;
+  }
+
+  /* Step 3 */
+  switch( z[0] ){
+   case 'e':
+     if( !stem(&z, "etaci", "ic", m_gt_0)
+      && !stem(&z, "evita", "", m_gt_0)
+     ){
+       stem(&z, "ezila", "al", m_gt_0);
+     }
+     break;
+   case 'i':
+     stem(&z, "itici", "ic", m_gt_0);
+     break;
+   case 'l':
+     if( !stem(&z, "laci", "ic", m_gt_0) ){
+       stem(&z, "luf", "", m_gt_0);
+     }
+     break;
+   case 's':
+     stem(&z, "ssen", "", m_gt_0);
+     break;
+  }
+
+  /* Step 4 */
+  switch( z[1] ){
+   case 'a':
+     if( z[0]=='l' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'c':
+     if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e')  && m_gt_1(z+4)  ){
+       z += 4;
+     }
+     break;
+   case 'e':
+     if( z[0]=='r' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'i':
+     if( z[0]=='c' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'l':
+     if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
+       z += 4;
+     }
+     break;
+   case 'n':
+     if( z[0]=='t' ){
+       if( z[2]=='a' ){
+         if( m_gt_1(z+3) ){
+           z += 3;
+         }
+       }else if( z[2]=='e' ){
+         if( !stem(&z, "tneme", "", m_gt_1)
+          && !stem(&z, "tnem", "", m_gt_1)
+         ){
+           stem(&z, "tne", "", m_gt_1);
+         }
+       }
+     }
+     break;
+   case 'o':
+     if( z[0]=='u' ){
+       if( m_gt_1(z+2) ){
+         z += 2;
+       }
+     }else if( z[3]=='s' || z[3]=='t' ){
+       stem(&z, "noi", "", m_gt_1);
+     }
+     break;
+   case 's':
+     if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+   case 't':
+     if( !stem(&z, "eta", "", m_gt_1) ){
+       stem(&z, "iti", "", m_gt_1);
+     }
+     break;
+   case 'u':
+     if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+   case 'v':
+   case 'z':
+     if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+  }
+
+  /* Step 5a */
+  if( z[0]=='e' ){
+    if( m_gt_1(z+1) ){
+      z++;
+    }else if( m_eq_1(z+1) && !star_oh(z+1) ){
+      z++;
+    }
+  }
+
+  /* Step 5b */
+  if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
+    z++;
+  }
+
+  /* z[] is now the stemmed word in reverse order.  Flip it back
+  ** around into forward order and return.
+  */
+  *pnOut = i = (int)strlen(z);
+  zOut[i] = 0;
+  while( *z ){
+    zOut[--i] = *(z++);
+  }
+}
+
+/*
+** Characters that can be part of a token.  We assume any character
+** whose value is greater than 0x80 (any UTF character) can be
+** part of a token.  In other words, delimiters all must have
+** values of 0x7f or lower.
+*/
+static const char porterIdChar[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
+};
+#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
+
+/*
+** Extract the next token from a tokenization cursor.  The cursor must
+** have been opened by a prior call to porterOpen().
+*/
+static int porterNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by porterOpen */
+  const char **pzToken,               /* OUT: *pzToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+  const char *z = c->zInput;
+
+  while( c->iOffset<c->nInput ){
+    int iStartOffset, ch;
+
+    /* Scan past delimiter characters */
+    while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
+      c->iOffset++;
+    }
+
+    /* Count non-delimiter characters. */
+    iStartOffset = c->iOffset;
+    while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
+      c->iOffset++;
+    }
+
+    if( c->iOffset>iStartOffset ){
+      int n = c->iOffset-iStartOffset;
+      if( n>c->nAllocated ){
+        char *pNew;
+        c->nAllocated = n+20;
+        pNew = sqlite3_realloc(c->zToken, c->nAllocated);
+        if( !pNew ) return SQLITE_NOMEM;
+        c->zToken = pNew;
+      }
+      porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
+      *pzToken = c->zToken;
+      *piStartOffset = iStartOffset;
+      *piEndOffset = c->iOffset;
+      *piPosition = c->iToken++;
+      return SQLITE_OK;
+    }
+  }
+  return SQLITE_DONE;
+}
+
+/*
+** The set of routines that implement the porter-stemmer tokenizer
+*/
+static const sqlite3_tokenizer_module porterTokenizerModule = {
+  0,
+  porterCreate,
+  porterDestroy,
+  porterOpen,
+  porterClose,
+  porterNext,
+  0
+};
+
+/*
+** Allocate a new porter tokenizer.  Return a pointer to the new
+** tokenizer in *ppModule
+*/
+SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(
+  sqlite3_tokenizer_module const**ppModule
+){
+  *ppModule = &porterTokenizerModule;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_porter.c *****************************************/
+/************** Begin file fts3_tokenizer.c **********************************/
+/*
+** 2007 June 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This is part of an SQLite module implementing full-text search.
+** This particular file implements the generic tokenizer interface.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <assert.h> */
+/* #include <string.h> */
+
+/*
+** Implementation of the SQL scalar function for accessing the underlying 
+** hash table. This function may be called as follows:
+**
+**   SELECT <function-name>(<key-name>);
+**   SELECT <function-name>(<key-name>, <pointer>);
+**
+** where <function-name> is the name passed as the second argument
+** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer').
+**
+** If the <pointer> argument is specified, it must be a blob value
+** containing a pointer to be stored as the hash data corresponding
+** to the string <key-name>. If <pointer> is not specified, then
+** the string <key-name> must already exist in the has table. Otherwise,
+** an error is returned.
+**
+** Whether or not the <pointer> argument is specified, the value returned
+** is a blob containing the pointer stored as the hash data corresponding
+** to string <key-name> (after the hash-table is updated, if applicable).
+*/
+static void scalarFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Fts3Hash *pHash;
+  void *pPtr = 0;
+  const unsigned char *zName;
+  int nName;
+
+  assert( argc==1 || argc==2 );
+
+  pHash = (Fts3Hash *)sqlite3_user_data(context);
+
+  zName = sqlite3_value_text(argv[0]);
+  nName = sqlite3_value_bytes(argv[0])+1;
+
+  if( argc==2 ){
+    void *pOld;
+    int n = sqlite3_value_bytes(argv[1]);
+    if( n!=sizeof(pPtr) ){
+      sqlite3_result_error(context, "argument type mismatch", -1);
+      return;
+    }
+    pPtr = *(void **)sqlite3_value_blob(argv[1]);
+    pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr);
+    if( pOld==pPtr ){
+      sqlite3_result_error(context, "out of memory", -1);
+      return;
+    }
+  }else{
+    pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
+    if( !pPtr ){
+      char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
+      sqlite3_result_error(context, zErr, -1);
+      sqlite3_free(zErr);
+      return;
+    }
+  }
+
+  sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
+}
+
+SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char c){
+  static const char isFtsIdChar[] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 0x */
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 1x */
+      0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
+      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
+      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
+  };
+  return (c&0x80 || isFtsIdChar[(int)(c)]);
+}
+
+SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
+  const char *z1;
+  const char *z2 = 0;
+
+  /* Find the start of the next token. */
+  z1 = zStr;
+  while( z2==0 ){
+    char c = *z1;
+    switch( c ){
+      case '\0': return 0;        /* No more tokens here */
+      case '\'':
+      case '"':
+      case '`': {
+        z2 = z1;
+        while( *++z2 && (*z2!=c || *++z2==c) );
+        break;
+      }
+      case '[':
+        z2 = &z1[1];
+        while( *z2 && z2[0]!=']' ) z2++;
+        if( *z2 ) z2++;
+        break;
+
+      default:
+        if( sqlite3Fts3IsIdChar(*z1) ){
+          z2 = &z1[1];
+          while( sqlite3Fts3IsIdChar(*z2) ) z2++;
+        }else{
+          z1++;
+        }
+    }
+  }
+
+  *pn = (int)(z2-z1);
+  return z1;
+}
+
+SQLITE_PRIVATE int sqlite3Fts3InitTokenizer(
+  Fts3Hash *pHash,                /* Tokenizer hash table */
+  const char *zArg,               /* Tokenizer name */
+  sqlite3_tokenizer **ppTok,      /* OUT: Tokenizer (if applicable) */
+  char **pzErr                    /* OUT: Set to malloced error message */
+){
+  int rc;
+  char *z = (char *)zArg;
+  int n = 0;
+  char *zCopy;
+  char *zEnd;                     /* Pointer to nul-term of zCopy */
+  sqlite3_tokenizer_module *m;
+
+  zCopy = sqlite3_mprintf("%s", zArg);
+  if( !zCopy ) return SQLITE_NOMEM;
+  zEnd = &zCopy[strlen(zCopy)];
+
+  z = (char *)sqlite3Fts3NextToken(zCopy, &n);
+  z[n] = '\0';
+  sqlite3Fts3Dequote(z);
+
+  m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
+  if( !m ){
+    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
+    rc = SQLITE_ERROR;
+  }else{
+    char const **aArg = 0;
+    int iArg = 0;
+    z = &z[n+1];
+    while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){
+      int nNew = sizeof(char *)*(iArg+1);
+      char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew);
+      if( !aNew ){
+        sqlite3_free(zCopy);
+        sqlite3_free((void *)aArg);
+        return SQLITE_NOMEM;
+      }
+      aArg = aNew;
+      aArg[iArg++] = z;
+      z[n] = '\0';
+      sqlite3Fts3Dequote(z);
+      z = &z[n+1];
+    }
+    rc = m->xCreate(iArg, aArg, ppTok);
+    assert( rc!=SQLITE_OK || *ppTok );
+    if( rc!=SQLITE_OK ){
+      *pzErr = sqlite3_mprintf("unknown tokenizer");
+    }else{
+      (*ppTok)->pModule = m; 
+    }
+    sqlite3_free((void *)aArg);
+  }
+
+  sqlite3_free(zCopy);
+  return rc;
+}
+
+
+#ifdef SQLITE_TEST
+
+#include <tcl.h>
+/* #include <string.h> */
+
+/*
+** Implementation of a special SQL scalar function for testing tokenizers 
+** designed to be used in concert with the Tcl testing framework. This
+** function must be called with two or more arguments:
+**
+**   SELECT <function-name>(<key-name>, ..., <input-string>);
+**
+** where <function-name> is the name passed as the second argument
+** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer')
+** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test').
+**
+** The return value is a string that may be interpreted as a Tcl
+** list. For each token in the <input-string>, three elements are
+** added to the returned list. The first is the token position, the 
+** second is the token text (folded, stemmed, etc.) and the third is the
+** substring of <input-string> associated with the token. For example, 
+** using the built-in "simple" tokenizer:
+**
+**   SELECT fts_tokenizer_test('simple', 'I don't see how');
+**
+** will return the string:
+**
+**   "{0 i I 1 dont don't 2 see see 3 how how}"
+**   
+*/
+static void testFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Fts3Hash *pHash;
+  sqlite3_tokenizer_module *p;
+  sqlite3_tokenizer *pTokenizer = 0;
+  sqlite3_tokenizer_cursor *pCsr = 0;
+
+  const char *zErr = 0;
+
+  const char *zName;
+  int nName;
+  const char *zInput;
+  int nInput;
+
+  const char *azArg[64];
+
+  const char *zToken;
+  int nToken = 0;
+  int iStart = 0;
+  int iEnd = 0;
+  int iPos = 0;
+  int i;
+
+  Tcl_Obj *pRet;
+
+  if( argc<2 ){
+    sqlite3_result_error(context, "insufficient arguments", -1);
+    return;
+  }
+
+  nName = sqlite3_value_bytes(argv[0]);
+  zName = (const char *)sqlite3_value_text(argv[0]);
+  nInput = sqlite3_value_bytes(argv[argc-1]);
+  zInput = (const char *)sqlite3_value_text(argv[argc-1]);
+
+  pHash = (Fts3Hash *)sqlite3_user_data(context);
+  p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
+
+  if( !p ){
+    char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
+    sqlite3_result_error(context, zErr, -1);
+    sqlite3_free(zErr);
+    return;
+  }
+
+  pRet = Tcl_NewObj();
+  Tcl_IncrRefCount(pRet);
+
+  for(i=1; i<argc-1; i++){
+    azArg[i-1] = (const char *)sqlite3_value_text(argv[i]);
+  }
+
+  if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){
+    zErr = "error in xCreate()";
+    goto finish;
+  }
+  pTokenizer->pModule = p;
+  if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
+    zErr = "error in xOpen()";
+    goto finish;
+  }
+
+  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
+    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
+    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
+    zToken = &zInput[iStart];
+    nToken = iEnd-iStart;
+    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
+  }
+
+  if( SQLITE_OK!=p->xClose(pCsr) ){
+    zErr = "error in xClose()";
+    goto finish;
+  }
+  if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
+    zErr = "error in xDestroy()";
+    goto finish;
+  }
+
+finish:
+  if( zErr ){
+    sqlite3_result_error(context, zErr, -1);
+  }else{
+    sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
+  }
+  Tcl_DecrRefCount(pRet);
+}
+
+static
+int registerTokenizer(
+  sqlite3 *db, 
+  char *zName, 
+  const sqlite3_tokenizer_module *p
+){
+  int rc;
+  sqlite3_stmt *pStmt;
+  const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
+
+  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+  sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
+  sqlite3_step(pStmt);
+
+  return sqlite3_finalize(pStmt);
+}
+
+static
+int queryTokenizer(
+  sqlite3 *db, 
+  char *zName,  
+  const sqlite3_tokenizer_module **pp
+){
+  int rc;
+  sqlite3_stmt *pStmt;
+  const char zSql[] = "SELECT fts3_tokenizer(?)";
+
+  *pp = 0;
+  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+  if( SQLITE_ROW==sqlite3_step(pStmt) ){
+    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
+      memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
+    }
+  }
+
+  return sqlite3_finalize(pStmt);
+}
+
+SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+
+/*
+** Implementation of the scalar function fts3_tokenizer_internal_test().
+** This function is used for testing only, it is not included in the
+** build unless SQLITE_TEST is defined.
+**
+** The purpose of this is to test that the fts3_tokenizer() function
+** can be used as designed by the C-code in the queryTokenizer and
+** registerTokenizer() functions above. These two functions are repeated
+** in the README.tokenizer file as an example, so it is important to
+** test them.
+**
+** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar
+** function with no arguments. An assert() will fail if a problem is
+** detected. i.e.:
+**
+**     SELECT fts3_tokenizer_internal_test();
+**
+*/
+static void intTestFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int rc;
+  const sqlite3_tokenizer_module *p1;
+  const sqlite3_tokenizer_module *p2;
+  sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
+
+  UNUSED_PARAMETER(argc);
+  UNUSED_PARAMETER(argv);
+
+  /* Test the query function */
+  sqlite3Fts3SimpleTokenizerModule(&p1);
+  rc = queryTokenizer(db, "simple", &p2);
+  assert( rc==SQLITE_OK );
+  assert( p1==p2 );
+  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
+  assert( rc==SQLITE_ERROR );
+  assert( p2==0 );
+  assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
+
+  /* Test the storage function */
+  rc = registerTokenizer(db, "nosuchtokenizer", p1);
+  assert( rc==SQLITE_OK );
+  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
+  assert( rc==SQLITE_OK );
+  assert( p2==p1 );
+
+  sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
+}
+
+#endif
+
+/*
+** Set up SQL objects in database db used to access the contents of
+** the hash table pointed to by argument pHash. The hash table must
+** been initialized to use string keys, and to take a private copy 
+** of the key when a value is inserted. i.e. by a call similar to:
+**
+**    sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
+**
+** This function adds a scalar function (see header comment above
+** scalarFunc() in this file for details) and, if ENABLE_TABLE is
+** defined at compilation time, a temporary virtual table (see header 
+** comment above struct HashTableVtab) to the database schema. Both 
+** provide read/write access to the contents of *pHash.
+**
+** The third argument to this function, zName, is used as the name
+** of both the scalar and, if created, the virtual table.
+*/
+SQLITE_PRIVATE int sqlite3Fts3InitHashTable(
+  sqlite3 *db, 
+  Fts3Hash *pHash, 
+  const char *zName
+){
+  int rc = SQLITE_OK;
+  void *p = (void *)pHash;
+  const int any = SQLITE_ANY;
+
+#ifdef SQLITE_TEST
+  char *zTest = 0;
+  char *zTest2 = 0;
+  void *pdb = (void *)db;
+  zTest = sqlite3_mprintf("%s_test", zName);
+  zTest2 = sqlite3_mprintf("%s_internal_test", zName);
+  if( !zTest || !zTest2 ){
+    rc = SQLITE_NOMEM;
+  }
+#endif
+
+  if( SQLITE_OK==rc ){
+    rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0);
+  }
+  if( SQLITE_OK==rc ){
+    rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0);
+  }
+#ifdef SQLITE_TEST
+  if( SQLITE_OK==rc ){
+    rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0);
+  }
+  if( SQLITE_OK==rc ){
+    rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0);
+  }
+#endif
+
+#ifdef SQLITE_TEST
+  sqlite3_free(zTest);
+  sqlite3_free(zTest2);
+#endif
+
+  return rc;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_tokenizer.c **************************************/
+/************** Begin file fts3_tokenizer1.c *********************************/
+/*
+** 2006 Oct 10
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Implementation of the "simple" full-text-search tokenizer.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <stdio.h> */
+/* #include <string.h> */
+
+
+typedef struct simple_tokenizer {
+  sqlite3_tokenizer base;
+  char delim[128];             /* flag ASCII delimiters */
+} simple_tokenizer;
+
+typedef struct simple_tokenizer_cursor {
+  sqlite3_tokenizer_cursor base;
+  const char *pInput;          /* input we are tokenizing */
+  int nBytes;                  /* size of the input */
+  int iOffset;                 /* current position in pInput */
+  int iToken;                  /* index of next token to be returned */
+  char *pToken;                /* storage for current token */
+  int nTokenAllocated;         /* space allocated to zToken buffer */
+} simple_tokenizer_cursor;
+
+
+static int simpleDelim(simple_tokenizer *t, unsigned char c){
+  return c<0x80 && t->delim[c];
+}
+static int fts3_isalnum(int x){
+  return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z');
+}
+
+/*
+** Create a new tokenizer instance.
+*/
+static int simpleCreate(
+  int argc, const char * const *argv,
+  sqlite3_tokenizer **ppTokenizer
+){
+  simple_tokenizer *t;
+
+  t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
+  if( t==NULL ) return SQLITE_NOMEM;
+  memset(t, 0, sizeof(*t));
+
+  /* TODO(shess) Delimiters need to remain the same from run to run,
+  ** else we need to reindex.  One solution would be a meta-table to
+  ** track such information in the database, then we'd only want this
+  ** information on the initial create.
+  */
+  if( argc>1 ){
+    int i, n = (int)strlen(argv[1]);
+    for(i=0; i<n; i++){
+      unsigned char ch = argv[1][i];
+      /* We explicitly don't support UTF-8 delimiters for now. */
+      if( ch>=0x80 ){
+        sqlite3_free(t);
+        return SQLITE_ERROR;
+      }
+      t->delim[ch] = 1;
+    }
+  } else {
+    /* Mark non-alphanumeric ASCII characters as delimiters */
+    int i;
+    for(i=1; i<0x80; i++){
+      t->delim[i] = !fts3_isalnum(i) ? -1 : 0;
+    }
+  }
+
+  *ppTokenizer = &t->base;
+  return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
+  sqlite3_free(pTokenizer);
+  return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is pInput[0..nBytes-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int simpleOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *pInput, int nBytes,        /* String to be tokenized */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  simple_tokenizer_cursor *c;
+
+  UNUSED_PARAMETER(pTokenizer);
+
+  c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
+  if( c==NULL ) return SQLITE_NOMEM;
+
+  c->pInput = pInput;
+  if( pInput==0 ){
+    c->nBytes = 0;
+  }else if( nBytes<0 ){
+    c->nBytes = (int)strlen(pInput);
+  }else{
+    c->nBytes = nBytes;
+  }
+  c->iOffset = 0;                 /* start tokenizing at the beginning */
+  c->iToken = 0;
+  c->pToken = NULL;               /* no space allocated, yet. */
+  c->nTokenAllocated = 0;
+
+  *ppCursor = &c->base;
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** simpleOpen() above.
+*/
+static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
+  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
+  sqlite3_free(c->pToken);
+  sqlite3_free(c);
+  return SQLITE_OK;
+}
+
+/*
+** Extract the next token from a tokenization cursor.  The cursor must
+** have been opened by a prior call to simpleOpen().
+*/
+static int simpleNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
+  const char **ppToken,               /* OUT: *ppToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
+  simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
+  unsigned char *p = (unsigned char *)c->pInput;
+
+  while( c->iOffset<c->nBytes ){
+    int iStartOffset;
+
+    /* Scan past delimiter characters */
+    while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
+      c->iOffset++;
+    }
+
+    /* Count non-delimiter characters. */
+    iStartOffset = c->iOffset;
+    while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
+      c->iOffset++;
+    }
+
+    if( c->iOffset>iStartOffset ){
+      int i, n = c->iOffset-iStartOffset;
+      if( n>c->nTokenAllocated ){
+        char *pNew;
+        c->nTokenAllocated = n+20;
+        pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
+        if( !pNew ) return SQLITE_NOMEM;
+        c->pToken = pNew;
+      }
+      for(i=0; i<n; i++){
+        /* TODO(shess) This needs expansion to handle UTF-8
+        ** case-insensitivity.
+        */
+        unsigned char ch = p[iStartOffset+i];
+        c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch);
+      }
+      *ppToken = c->pToken;
+      *pnBytes = n;
+      *piStartOffset = iStartOffset;
+      *piEndOffset = c->iOffset;
+      *piPosition = c->iToken++;
+
+      return SQLITE_OK;
+    }
+  }
+  return SQLITE_DONE;
+}
+
+/*
+** The set of routines that implement the simple tokenizer
+*/
+static const sqlite3_tokenizer_module simpleTokenizerModule = {
+  0,
+  simpleCreate,
+  simpleDestroy,
+  simpleOpen,
+  simpleClose,
+  simpleNext,
+  0,
+};
+
+/*
+** Allocate a new simple tokenizer.  Return a pointer to the new
+** tokenizer in *ppModule
+*/
+SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(
+  sqlite3_tokenizer_module const**ppModule
+){
+  *ppModule = &simpleTokenizerModule;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_tokenizer1.c *************************************/
+/************** Begin file fts3_tokenize_vtab.c ******************************/
+/*
+** 2013 Apr 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains code for the "fts3tokenize" virtual table module.
+** An fts3tokenize virtual table is created as follows:
+**
+**   CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
+**       <tokenizer-name>, <arg-1>, ...
+**   );
+**
+** The table created has the following schema:
+**
+**   CREATE TABLE <tbl>(input, token, start, end, position)
+**
+** When queried, the query must include a WHERE clause of type:
+**
+**   input = <string>
+**
+** The virtual table module tokenizes this <string>, using the FTS3 
+** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE 
+** statement and returns one row for each token in the result. With
+** fields set as follows:
+**
+**   input:   Always set to a copy of <string>
+**   token:   A token from the input.
+**   start:   Byte offset of the token within the input <string>.
+**   end:     Byte offset of the byte immediately following the end of the
+**            token within the input string.
+**   pos:     Token offset of token within input.
+**
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <string.h> */
+/* #include <assert.h> */
+
+typedef struct Fts3tokTable Fts3tokTable;
+typedef struct Fts3tokCursor Fts3tokCursor;
+
+/*
+** Virtual table structure.
+*/
+struct Fts3tokTable {
+  sqlite3_vtab base;              /* Base class used by SQLite core */
+  const sqlite3_tokenizer_module *pMod;
+  sqlite3_tokenizer *pTok;
+};
+
+/*
+** Virtual table cursor structure.
+*/
+struct Fts3tokCursor {
+  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
+  char *zInput;                   /* Input string */
+  sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */
+  int iRowid;                     /* Current 'rowid' value */
+  const char *zToken;             /* Current 'token' value */
+  int nToken;                     /* Size of zToken in bytes */
+  int iStart;                     /* Current 'start' value */
+  int iEnd;                       /* Current 'end' value */
+  int iPos;                       /* Current 'pos' value */
+};
+
+/*
+** Query FTS for the tokenizer implementation named zName.
+*/
+static int fts3tokQueryTokenizer(
+  Fts3Hash *pHash,
+  const char *zName,
+  const sqlite3_tokenizer_module **pp,
+  char **pzErr
+){
+  sqlite3_tokenizer_module *p;
+  int nName = (int)strlen(zName);
+
+  p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
+  if( !p ){
+    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
+    return SQLITE_ERROR;
+  }
+
+  *pp = p;
+  return SQLITE_OK;
+}
+
+/*
+** The second argument, argv[], is an array of pointers to nul-terminated
+** strings. This function makes a copy of the array and strings into a 
+** single block of memory. It then dequotes any of the strings that appear
+** to be quoted.
+**
+** If successful, output parameter *pazDequote is set to point at the
+** array of dequoted strings and SQLITE_OK is returned. The caller is
+** responsible for eventually calling sqlite3_free() to free the array
+** in this case. Or, if an error occurs, an SQLite error code is returned.
+** The final value of *pazDequote is undefined in this case.
+*/
+static int fts3tokDequoteArray(
+  int argc,                       /* Number of elements in argv[] */
+  const char * const *argv,       /* Input array */
+  char ***pazDequote              /* Output array */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  if( argc==0 ){
+    *pazDequote = 0;
+  }else{
+    int i;
+    int nByte = 0;
+    char **azDequote;
+
+    for(i=0; i<argc; i++){
+      nByte += (int)(strlen(argv[i]) + 1);
+    }
+
+    *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte);
+    if( azDequote==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      char *pSpace = (char *)&azDequote[argc];
+      for(i=0; i<argc; i++){
+        int n = (int)strlen(argv[i]);
+        azDequote[i] = pSpace;
+        memcpy(pSpace, argv[i], n+1);
+        sqlite3Fts3Dequote(pSpace);
+        pSpace += (n+1);
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Schema of the tokenizer table.
+*/
+#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
+
+/*
+** This function does all the work for both the xConnect and xCreate methods.
+** These tables have no persistent representation of their own, so xConnect
+** and xCreate are identical operations.
+**
+**   argv[0]: module name
+**   argv[1]: database name 
+**   argv[2]: table name
+**   argv[3]: first argument (tokenizer name)
+*/
+static int fts3tokConnectMethod(
+  sqlite3 *db,                    /* Database connection */
+  void *pHash,                    /* Hash table of tokenizers */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
+  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
+){
+  Fts3tokTable *pTab = 0;
+  const sqlite3_tokenizer_module *pMod = 0;
+  sqlite3_tokenizer *pTok = 0;
+  int rc;
+  char **azDequote = 0;
+  int nDequote;
+
+  rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA);
+  if( rc!=SQLITE_OK ) return rc;
+
+  nDequote = argc-3;
+  rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);
+
+  if( rc==SQLITE_OK ){
+    const char *zModule;
+    if( nDequote<1 ){
+      zModule = "simple";
+    }else{
+      zModule = azDequote[0];
+    }
+    rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr);
+  }
+
+  assert( (rc==SQLITE_OK)==(pMod!=0) );
+  if( rc==SQLITE_OK ){
+    const char * const *azArg = (const char * const *)&azDequote[1];
+    rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok);
+  }
+
+  if( rc==SQLITE_OK ){
+    pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable));
+    if( pTab==0 ){
+      rc = SQLITE_NOMEM;
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    memset(pTab, 0, sizeof(Fts3tokTable));
+    pTab->pMod = pMod;
+    pTab->pTok = pTok;
+    *ppVtab = &pTab->base;
+  }else{
+    if( pTok ){
+      pMod->xDestroy(pTok);
+    }
+  }
+
+  sqlite3_free(azDequote);
+  return rc;
+}
+
+/*
+** This function does the work for both the xDisconnect and xDestroy methods.
+** These tables have no persistent representation of their own, so xDisconnect
+** and xDestroy are identical operations.
+*/
+static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){
+  Fts3tokTable *pTab = (Fts3tokTable *)pVtab;
+
+  pTab->pMod->xDestroy(pTab->pTok);
+  sqlite3_free(pTab);
+  return SQLITE_OK;
+}
+
+/*
+** xBestIndex - Analyze a WHERE and ORDER BY clause.
+*/
+static int fts3tokBestIndexMethod(
+  sqlite3_vtab *pVTab, 
+  sqlite3_index_info *pInfo
+){
+  int i;
+  UNUSED_PARAMETER(pVTab);
+
+  for(i=0; i<pInfo->nConstraint; i++){
+    if( pInfo->aConstraint[i].usable 
+     && pInfo->aConstraint[i].iColumn==0 
+     && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ 
+    ){
+      pInfo->idxNum = 1;
+      pInfo->aConstraintUsage[i].argvIndex = 1;
+      pInfo->aConstraintUsage[i].omit = 1;
+      pInfo->estimatedCost = 1;
+      return SQLITE_OK;
+    }
+  }
+
+  pInfo->idxNum = 0;
+  assert( pInfo->estimatedCost>1000000.0 );
+
+  return SQLITE_OK;
+}
+
+/*
+** xOpen - Open a cursor.
+*/
+static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+  Fts3tokCursor *pCsr;
+  UNUSED_PARAMETER(pVTab);
+
+  pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor));
+  if( pCsr==0 ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCsr, 0, sizeof(Fts3tokCursor));
+
+  *ppCsr = (sqlite3_vtab_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** Reset the tokenizer cursor passed as the only argument. As if it had
+** just been returned by fts3tokOpenMethod().
+*/
+static void fts3tokResetCursor(Fts3tokCursor *pCsr){
+  if( pCsr->pCsr ){
+    Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab);
+    pTab->pMod->xClose(pCsr->pCsr);
+    pCsr->pCsr = 0;
+  }
+  sqlite3_free(pCsr->zInput);
+  pCsr->zInput = 0;
+  pCsr->zToken = 0;
+  pCsr->nToken = 0;
+  pCsr->iStart = 0;
+  pCsr->iEnd = 0;
+  pCsr->iPos = 0;
+  pCsr->iRowid = 0;
+}
+
+/*
+** xClose - Close a cursor.
+*/
+static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+
+  fts3tokResetCursor(pCsr);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** xNext - Advance the cursor to the next row, if any.
+*/
+static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
+  int rc;                         /* Return code */
+
+  pCsr->iRowid++;
+  rc = pTab->pMod->xNext(pCsr->pCsr,
+      &pCsr->zToken, &pCsr->nToken,
+      &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos
+  );
+
+  if( rc!=SQLITE_OK ){
+    fts3tokResetCursor(pCsr);
+    if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+  }
+
+  return rc;
+}
+
+/*
+** xFilter - Initialize a cursor to point at the start of its data.
+*/
+static int fts3tokFilterMethod(
+  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
+  int idxNum,                     /* Strategy index */
+  const char *idxStr,             /* Unused */
+  int nVal,                       /* Number of elements in apVal */
+  sqlite3_value **apVal           /* Arguments for the indexing scheme */
+){
+  int rc = SQLITE_ERROR;
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
+  UNUSED_PARAMETER(idxStr);
+  UNUSED_PARAMETER(nVal);
+
+  fts3tokResetCursor(pCsr);
+  if( idxNum==1 ){
+    const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
+    int nByte = sqlite3_value_bytes(apVal[0]);
+    pCsr->zInput = sqlite3_malloc(nByte+1);
+    if( pCsr->zInput==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      memcpy(pCsr->zInput, zByte, nByte);
+      pCsr->zInput[nByte] = 0;
+      rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr);
+      if( rc==SQLITE_OK ){
+        pCsr->pCsr->pTokenizer = pTab->pTok;
+      }
+    }
+  }
+
+  if( rc!=SQLITE_OK ) return rc;
+  return fts3tokNextMethod(pCursor);
+}
+
+/*
+** xEof - Return true if the cursor is at EOF, or false otherwise.
+*/
+static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  return (pCsr->zToken==0);
+}
+
+/*
+** xColumn - Return a column value.
+*/
+static int fts3tokColumnMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
+  int iCol                        /* Index of column to read value from */
+){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+
+  /* CREATE TABLE x(input, token, start, end, position) */
+  switch( iCol ){
+    case 0:
+      sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
+      break;
+    case 1:
+      sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT);
+      break;
+    case 2:
+      sqlite3_result_int(pCtx, pCsr->iStart);
+      break;
+    case 3:
+      sqlite3_result_int(pCtx, pCsr->iEnd);
+      break;
+    default:
+      assert( iCol==4 );
+      sqlite3_result_int(pCtx, pCsr->iPos);
+      break;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** xRowid - Return the current rowid for the cursor.
+*/
+static int fts3tokRowidMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite_int64 *pRowid            /* OUT: Rowid value */
+){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  *pRowid = (sqlite3_int64)pCsr->iRowid;
+  return SQLITE_OK;
+}
+
+/*
+** Register the fts3tok module with database connection db. Return SQLITE_OK
+** if successful or an error code if sqlite3_create_module() fails.
+*/
+SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){
+  static const sqlite3_module fts3tok_module = {
+     0,                           /* iVersion      */
+     fts3tokConnectMethod,        /* xCreate       */
+     fts3tokConnectMethod,        /* xConnect      */
+     fts3tokBestIndexMethod,      /* xBestIndex    */
+     fts3tokDisconnectMethod,     /* xDisconnect   */
+     fts3tokDisconnectMethod,     /* xDestroy      */
+     fts3tokOpenMethod,           /* xOpen         */
+     fts3tokCloseMethod,          /* xClose        */
+     fts3tokFilterMethod,         /* xFilter       */
+     fts3tokNextMethod,           /* xNext         */
+     fts3tokEofMethod,            /* xEof          */
+     fts3tokColumnMethod,         /* xColumn       */
+     fts3tokRowidMethod,          /* xRowid        */
+     0,                           /* xUpdate       */
+     0,                           /* xBegin        */
+     0,                           /* xSync         */
+     0,                           /* xCommit       */
+     0,                           /* xRollback     */
+     0,                           /* xFindFunction */
+     0,                           /* xRename       */
+     0,                           /* xSavepoint    */
+     0,                           /* xRelease      */
+     0                            /* xRollbackTo   */
+  };
+  int rc;                         /* Return code */
+
+  rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash);
+  return rc;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_tokenize_vtab.c **********************************/
+/************** Begin file fts3_write.c **************************************/
+/*
+** 2009 Oct 23
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file is part of the SQLite FTS3 extension module. Specifically,
+** this file contains code to insert, update and delete rows from FTS3
+** tables. It also contains code to merge FTS3 b-tree segments. Some
+** of the sub-routines used to merge segments are also used by the query 
+** code in fts3.c.
+*/
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <string.h> */
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+
+
+#define FTS_MAX_APPENDABLE_HEIGHT 16
+
+/*
+** When full-text index nodes are loaded from disk, the buffer that they
+** are loaded into has the following number of bytes of padding at the end 
+** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer
+** of 920 bytes is allocated for it.
+**
+** This means that if we have a pointer into a buffer containing node data,
+** it is always safe to read up to two varints from it without risking an
+** overread, even if the node data is corrupted.
+*/
+#define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2)
+
+/*
+** Under certain circumstances, b-tree nodes (doclists) can be loaded into
+** memory incrementally instead of all at once. This can be a big performance
+** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext()
+** method before retrieving all query results (as may happen, for example,
+** if a query has a LIMIT clause).
+**
+** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD 
+** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes.
+** The code is written so that the hard lower-limit for each of these values 
+** is 1. Clearly such small values would be inefficient, but can be useful 
+** for testing purposes.
+**
+** If this module is built with SQLITE_TEST defined, these constants may
+** be overridden at runtime for testing purposes. File fts3_test.c contains
+** a Tcl interface to read and write the values.
+*/
+#ifdef SQLITE_TEST
+int test_fts3_node_chunksize = (4*1024);
+int test_fts3_node_chunk_threshold = (4*1024)*4;
+# define FTS3_NODE_CHUNKSIZE       test_fts3_node_chunksize
+# define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold
+#else
+# define FTS3_NODE_CHUNKSIZE (4*1024) 
+# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4)
+#endif
+
+/*
+** The two values that may be meaningfully bound to the :1 parameter in
+** statements SQL_REPLACE_STAT and SQL_SELECT_STAT.
+*/
+#define FTS_STAT_DOCTOTAL      0
+#define FTS_STAT_INCRMERGEHINT 1
+#define FTS_STAT_AUTOINCRMERGE 2
+
+/*
+** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic
+** and incremental merge operation that takes place. This is used for 
+** debugging FTS only, it should not usually be turned on in production
+** systems.
+*/
+#ifdef FTS3_LOG_MERGES
+static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){
+  sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel);
+}
+#else
+#define fts3LogMerge(x, y)
+#endif
+
+
+typedef struct PendingList PendingList;
+typedef struct SegmentNode SegmentNode;
+typedef struct SegmentWriter SegmentWriter;
+
+/*
+** An instance of the following data structure is used to build doclists
+** incrementally. See function fts3PendingListAppend() for details.
+*/
+struct PendingList {
+  int nData;
+  char *aData;
+  int nSpace;
+  sqlite3_int64 iLastDocid;
+  sqlite3_int64 iLastCol;
+  sqlite3_int64 iLastPos;
+};
+
+
+/*
+** Each cursor has a (possibly empty) linked list of the following objects.
+*/
+struct Fts3DeferredToken {
+  Fts3PhraseToken *pToken;        /* Pointer to corresponding expr token */
+  int iCol;                       /* Column token must occur in */
+  Fts3DeferredToken *pNext;       /* Next in list of deferred tokens */
+  PendingList *pList;             /* Doclist is assembled here */
+};
+
+/*
+** An instance of this structure is used to iterate through the terms on
+** a contiguous set of segment b-tree leaf nodes. Although the details of
+** this structure are only manipulated by code in this file, opaque handles
+** of type Fts3SegReader* are also used by code in fts3.c to iterate through
+** terms when querying the full-text index. See functions:
+**
+**   sqlite3Fts3SegReaderNew()
+**   sqlite3Fts3SegReaderFree()
+**   sqlite3Fts3SegReaderIterate()
+**
+** Methods used to manipulate Fts3SegReader structures:
+**
+**   fts3SegReaderNext()
+**   fts3SegReaderFirstDocid()
+**   fts3SegReaderNextDocid()
+*/
+struct Fts3SegReader {
+  int iIdx;                       /* Index within level, or 0x7FFFFFFF for PT */
+  u8 bLookup;                     /* True for a lookup only */
+  u8 rootOnly;                    /* True for a root-only reader */
+
+  sqlite3_int64 iStartBlock;      /* Rowid of first leaf block to traverse */
+  sqlite3_int64 iLeafEndBlock;    /* Rowid of final leaf block to traverse */
+  sqlite3_int64 iEndBlock;        /* Rowid of final block in segment (or 0) */
+  sqlite3_int64 iCurrentBlock;    /* Current leaf block (or 0) */
+
+  char *aNode;                    /* Pointer to node data (or NULL) */
+  int nNode;                      /* Size of buffer at aNode (or 0) */
+  int nPopulate;                  /* If >0, bytes of buffer aNode[] loaded */
+  sqlite3_blob *pBlob;            /* If not NULL, blob handle to read node */
+
+  Fts3HashElem **ppNextElem;
+
+  /* Variables set by fts3SegReaderNext(). These may be read directly
+  ** by the caller. They are valid from the time SegmentReaderNew() returns
+  ** until SegmentReaderNext() returns something other than SQLITE_OK
+  ** (i.e. SQLITE_DONE).
+  */
+  int nTerm;                      /* Number of bytes in current term */
+  char *zTerm;                    /* Pointer to current term */
+  int nTermAlloc;                 /* Allocated size of zTerm buffer */
+  char *aDoclist;                 /* Pointer to doclist of current entry */
+  int nDoclist;                   /* Size of doclist in current entry */
+
+  /* The following variables are used by fts3SegReaderNextDocid() to iterate 
+  ** through the current doclist (aDoclist/nDoclist).
+  */
+  char *pOffsetList;
+  int nOffsetList;                /* For descending pending seg-readers only */
+  sqlite3_int64 iDocid;
+};
+
+#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0)
+#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0)
+
+/*
+** An instance of this structure is used to create a segment b-tree in the
+** database. The internal details of this type are only accessed by the
+** following functions:
+**
+**   fts3SegWriterAdd()
+**   fts3SegWriterFlush()
+**   fts3SegWriterFree()
+*/
+struct SegmentWriter {
+  SegmentNode *pTree;             /* Pointer to interior tree structure */
+  sqlite3_int64 iFirst;           /* First slot in %_segments written */
+  sqlite3_int64 iFree;            /* Next free slot in %_segments */
+  char *zTerm;                    /* Pointer to previous term buffer */
+  int nTerm;                      /* Number of bytes in zTerm */
+  int nMalloc;                    /* Size of malloc'd buffer at zMalloc */
+  char *zMalloc;                  /* Malloc'd space (possibly) used for zTerm */
+  int nSize;                      /* Size of allocation at aData */
+  int nData;                      /* Bytes of data in aData */
+  char *aData;                    /* Pointer to block from malloc() */
+  i64 nLeafData;                  /* Number of bytes of leaf data written */
+};
+
+/*
+** Type SegmentNode is used by the following three functions to create
+** the interior part of the segment b+-tree structures (everything except
+** the leaf nodes). These functions and type are only ever used by code
+** within the fts3SegWriterXXX() family of functions described above.
+**
+**   fts3NodeAddTerm()
+**   fts3NodeWrite()
+**   fts3NodeFree()
+**
+** When a b+tree is written to the database (either as a result of a merge
+** or the pending-terms table being flushed), leaves are written into the 
+** database file as soon as they are completely populated. The interior of
+** the tree is assembled in memory and written out only once all leaves have
+** been populated and stored. This is Ok, as the b+-tree fanout is usually
+** very large, meaning that the interior of the tree consumes relatively 
+** little memory.
+*/
+struct SegmentNode {
+  SegmentNode *pParent;           /* Parent node (or NULL for root node) */
+  SegmentNode *pRight;            /* Pointer to right-sibling */
+  SegmentNode *pLeftmost;         /* Pointer to left-most node of this depth */
+  int nEntry;                     /* Number of terms written to node so far */
+  char *zTerm;                    /* Pointer to previous term buffer */
+  int nTerm;                      /* Number of bytes in zTerm */
+  int nMalloc;                    /* Size of malloc'd buffer at zMalloc */
+  char *zMalloc;                  /* Malloc'd space (possibly) used for zTerm */
+  int nData;                      /* Bytes of valid data so far */
+  char *aData;                    /* Node data */
+};
+
+/*
+** Valid values for the second argument to fts3SqlStmt().
+*/
+#define SQL_DELETE_CONTENT             0
+#define SQL_IS_EMPTY                   1
+#define SQL_DELETE_ALL_CONTENT         2 
+#define SQL_DELETE_ALL_SEGMENTS        3
+#define SQL_DELETE_ALL_SEGDIR          4
+#define SQL_DELETE_ALL_DOCSIZE         5
+#define SQL_DELETE_ALL_STAT            6
+#define SQL_SELECT_CONTENT_BY_ROWID    7
+#define SQL_NEXT_SEGMENT_INDEX         8
+#define SQL_INSERT_SEGMENTS            9
+#define SQL_NEXT_SEGMENTS_ID          10
+#define SQL_INSERT_SEGDIR             11
+#define SQL_SELECT_LEVEL              12
+#define SQL_SELECT_LEVEL_RANGE        13
+#define SQL_SELECT_LEVEL_COUNT        14
+#define SQL_SELECT_SEGDIR_MAX_LEVEL   15
+#define SQL_DELETE_SEGDIR_LEVEL       16
+#define SQL_DELETE_SEGMENTS_RANGE     17
+#define SQL_CONTENT_INSERT            18
+#define SQL_DELETE_DOCSIZE            19
+#define SQL_REPLACE_DOCSIZE           20
+#define SQL_SELECT_DOCSIZE            21
+#define SQL_SELECT_STAT               22
+#define SQL_REPLACE_STAT              23
+
+#define SQL_SELECT_ALL_PREFIX_LEVEL   24
+#define SQL_DELETE_ALL_TERMS_SEGDIR   25
+#define SQL_DELETE_SEGDIR_RANGE       26
+#define SQL_SELECT_ALL_LANGID         27
+#define SQL_FIND_MERGE_LEVEL          28
+#define SQL_MAX_LEAF_NODE_ESTIMATE    29
+#define SQL_DELETE_SEGDIR_ENTRY       30
+#define SQL_SHIFT_SEGDIR_ENTRY        31
+#define SQL_SELECT_SEGDIR             32
+#define SQL_CHOMP_SEGDIR              33
+#define SQL_SEGMENT_IS_APPENDABLE     34
+#define SQL_SELECT_INDEXES            35
+#define SQL_SELECT_MXLEVEL            36
+
+#define SQL_SELECT_LEVEL_RANGE2       37
+#define SQL_UPDATE_LEVEL_IDX          38
+#define SQL_UPDATE_LEVEL              39
+
+/*
+** This function is used to obtain an SQLite prepared statement handle
+** for the statement identified by the second argument. If successful,
+** *pp is set to the requested statement handle and SQLITE_OK returned.
+** Otherwise, an SQLite error code is returned and *pp is set to 0.
+**
+** If argument apVal is not NULL, then it must point to an array with
+** at least as many entries as the requested statement has bound 
+** parameters. The values are bound to the statements parameters before
+** returning.
+*/
+static int fts3SqlStmt(
+  Fts3Table *p,                   /* Virtual table handle */
+  int eStmt,                      /* One of the SQL_XXX constants above */
+  sqlite3_stmt **pp,              /* OUT: Statement handle */
+  sqlite3_value **apVal           /* Values to bind to statement */
+){
+  const char *azSql[] = {
+/* 0  */  "DELETE FROM %Q.'%q_content' WHERE rowid = ?",
+/* 1  */  "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)",
+/* 2  */  "DELETE FROM %Q.'%q_content'",
+/* 3  */  "DELETE FROM %Q.'%q_segments'",
+/* 4  */  "DELETE FROM %Q.'%q_segdir'",
+/* 5  */  "DELETE FROM %Q.'%q_docsize'",
+/* 6  */  "DELETE FROM %Q.'%q_stat'",
+/* 7  */  "SELECT %s WHERE rowid=?",
+/* 8  */  "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1",
+/* 9  */  "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)",
+/* 10 */  "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)",
+/* 11 */  "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)",
+
+          /* Return segments in order from oldest to newest.*/ 
+/* 12 */  "SELECT idx, start_block, leaves_end_block, end_block, root "
+            "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC",
+/* 13 */  "SELECT idx, start_block, leaves_end_block, end_block, root "
+            "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?"
+            "ORDER BY level DESC, idx ASC",
+
+/* 14 */  "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?",
+/* 15 */  "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
+
+/* 16 */  "DELETE FROM %Q.'%q_segdir' WHERE level = ?",
+/* 17 */  "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?",
+/* 18 */  "INSERT INTO %Q.'%q_content' VALUES(%s)",
+/* 19 */  "DELETE FROM %Q.'%q_docsize' WHERE docid = ?",
+/* 20 */  "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",
+/* 21 */  "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
+/* 22 */  "SELECT value FROM %Q.'%q_stat' WHERE id=?",
+/* 23 */  "REPLACE INTO %Q.'%q_stat' VALUES(?,?)",
+/* 24 */  "",
+/* 25 */  "",
+
+/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
+/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",
+
+/* This statement is used to determine which level to read the input from
+** when performing an incremental merge. It returns the absolute level number
+** of the oldest level in the db that contains at least ? segments. Or,
+** if no level in the FTS index contains more than ? segments, the statement
+** returns zero rows.  */
+/* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?"
+         "  ORDER BY (level %% 1024) ASC LIMIT 1",
+
+/* Estimate the upper limit on the number of leaf nodes in a new segment
+** created by merging the oldest :2 segments from absolute level :1. See 
+** function sqlite3Fts3Incrmerge() for details.  */
+/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) "
+         "  FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?",
+
+/* SQL_DELETE_SEGDIR_ENTRY
+**   Delete the %_segdir entry on absolute level :1 with index :2.  */
+/* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
+
+/* SQL_SHIFT_SEGDIR_ENTRY
+**   Modify the idx value for the segment with idx=:3 on absolute level :2
+**   to :1.  */
+/* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?",
+
+/* SQL_SELECT_SEGDIR
+**   Read a single entry from the %_segdir table. The entry from absolute 
+**   level :1 with index value :2.  */
+/* 32 */  "SELECT idx, start_block, leaves_end_block, end_block, root "
+            "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
+
+/* SQL_CHOMP_SEGDIR
+**   Update the start_block (:1) and root (:2) fields of the %_segdir
+**   entry located on absolute level :3 with index :4.  */
+/* 33 */  "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?"
+            "WHERE level = ? AND idx = ?",
+
+/* SQL_SEGMENT_IS_APPENDABLE
+**   Return a single row if the segment with end_block=? is appendable. Or
+**   no rows otherwise.  */
+/* 34 */  "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL",
+
+/* SQL_SELECT_INDEXES
+**   Return the list of valid segment indexes for absolute level ?  */
+/* 35 */  "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC",
+
+/* SQL_SELECT_MXLEVEL
+**   Return the largest relative level in the FTS index or indexes.  */
+/* 36 */  "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'",
+
+          /* Return segments in order from oldest to newest.*/ 
+/* 37 */  "SELECT level, idx, end_block "
+            "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? "
+            "ORDER BY level DESC, idx ASC",
+
+          /* Update statements used while promoting segments */
+/* 38 */  "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? "
+            "WHERE level=? AND idx=?",
+/* 39 */  "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1"
+
+  };
+  int rc = SQLITE_OK;
+  sqlite3_stmt *pStmt;
+
+  assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
+  assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
+  
+  pStmt = p->aStmt[eStmt];
+  if( !pStmt ){
+    char *zSql;
+    if( eStmt==SQL_CONTENT_INSERT ){
+      zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist);
+    }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){
+      zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist);
+    }else{
+      zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName);
+    }
+    if( !zSql ){
+      rc = SQLITE_NOMEM;
+    }else{
+      rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL);
+      sqlite3_free(zSql);
+      assert( rc==SQLITE_OK || pStmt==0 );
+      p->aStmt[eStmt] = pStmt;
+    }
+  }
+  if( apVal ){
+    int i;
+    int nParam = sqlite3_bind_parameter_count(pStmt);
+    for(i=0; rc==SQLITE_OK && i<nParam; i++){
+      rc = sqlite3_bind_value(pStmt, i+1, apVal[i]);
+    }
+  }
+  *pp = pStmt;
+  return rc;
+}
+
+
+static int fts3SelectDocsize(
+  Fts3Table *pTab,                /* FTS3 table handle */
+  sqlite3_int64 iDocid,           /* Docid to bind for SQL_SELECT_DOCSIZE */
+  sqlite3_stmt **ppStmt           /* OUT: Statement handle */
+){
+  sqlite3_stmt *pStmt = 0;        /* Statement requested from fts3SqlStmt() */
+  int rc;                         /* Return code */
+
+  rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pStmt, 1, iDocid);
+    rc = sqlite3_step(pStmt);
+    if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){
+      rc = sqlite3_reset(pStmt);
+      if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
+      pStmt = 0;
+    }else{
+      rc = SQLITE_OK;
+    }
+  }
+
+  *ppStmt = pStmt;
+  return rc;
+}
+
+SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal(
+  Fts3Table *pTab,                /* Fts3 table handle */
+  sqlite3_stmt **ppStmt           /* OUT: Statement handle */
+){
+  sqlite3_stmt *pStmt = 0;
+  int rc;
+  rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
+    if( sqlite3_step(pStmt)!=SQLITE_ROW
+     || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB
+    ){
+      rc = sqlite3_reset(pStmt);
+      if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
+      pStmt = 0;
+    }
+  }
+  *ppStmt = pStmt;
+  return rc;
+}
+
+SQLITE_PRIVATE int sqlite3Fts3SelectDocsize(
+  Fts3Table *pTab,                /* Fts3 table handle */
+  sqlite3_int64 iDocid,           /* Docid to read size data for */
+  sqlite3_stmt **ppStmt           /* OUT: Statement handle */
+){
+  return fts3SelectDocsize(pTab, iDocid, ppStmt);
+}
+
+/*
+** Similar to fts3SqlStmt(). Except, after binding the parameters in
+** array apVal[] to the SQL statement identified by eStmt, the statement
+** is executed.
+**
+** Returns SQLITE_OK if the statement is successfully executed, or an
+** SQLite error code otherwise.
+*/
+static void fts3SqlExec(
+  int *pRC,                /* Result code */
+  Fts3Table *p,            /* The FTS3 table */
+  int eStmt,               /* Index of statement to evaluate */
+  sqlite3_value **apVal    /* Parameters to bind */
+){
+  sqlite3_stmt *pStmt;
+  int rc;
+  if( *pRC ) return;
+  rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); 
+  if( rc==SQLITE_OK ){
+    sqlite3_step(pStmt);
+    rc = sqlite3_reset(pStmt);
+  }
+  *pRC = rc;
+}
+
+
+/*
+** This function ensures that the caller has obtained an exclusive 
+** shared-cache table-lock on the %_segdir table. This is required before 
+** writing data to the fts3 table. If this lock is not acquired first, then
+** the caller may end up attempting to take this lock as part of committing
+** a transaction, causing SQLite to return SQLITE_LOCKED or 
+** LOCKED_SHAREDCACHEto a COMMIT command.
+**
+** It is best to avoid this because if FTS3 returns any error when 
+** committing a transaction, the whole transaction will be rolled back. 
+** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. 
+** It can still happen if the user locks the underlying tables directly 
+** instead of accessing them via FTS.
+*/
+static int fts3Writelock(Fts3Table *p){
+  int rc = SQLITE_OK;
+  
+  if( p->nPendingData==0 ){
+    sqlite3_stmt *pStmt;
+    rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_null(pStmt, 1);
+      sqlite3_step(pStmt);
+      rc = sqlite3_reset(pStmt);
+    }
+  }
+
+  return rc;
+}
+
+/*
+** FTS maintains a separate indexes for each language-id (a 32-bit integer).
+** Within each language id, a separate index is maintained to store the
+** document terms, and each configured prefix size (configured the FTS 
+** "prefix=" option). And each index consists of multiple levels ("relative
+** levels").
+**
+** All three of these values (the language id, the specific index and the
+** level within the index) are encoded in 64-bit integer values stored
+** in the %_segdir table on disk. This function is used to convert three
+** separate component values into the single 64-bit integer value that
+** can be used to query the %_segdir table.
+**
+** Specifically, each language-id/index combination is allocated 1024 
+** 64-bit integer level values ("absolute levels"). The main terms index
+** for language-id 0 is allocate values 0-1023. The first prefix index
+** (if any) for language-id 0 is allocated values 1024-2047. And so on.
+** Language 1 indexes are allocated immediately following language 0.
+**
+** So, for a system with nPrefix prefix indexes configured, the block of
+** absolute levels that corresponds to language-id iLangid and index 
+** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024).
+*/
+static sqlite3_int64 getAbsoluteLevel(
+  Fts3Table *p,                   /* FTS3 table handle */
+  int iLangid,                    /* Language id */
+  int iIndex,                     /* Index in p->aIndex[] */
+  int iLevel                      /* Level of segments */
+){
+  sqlite3_int64 iBase;            /* First absolute level for iLangid/iIndex */
+  assert( iLangid>=0 );
+  assert( p->nIndex>0 );
+  assert( iIndex>=0 && iIndex<p->nIndex );
+
+  iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL;
+  return iBase + iLevel;
+}
+
+/*
+** Set *ppStmt to a statement handle that may be used to iterate through
+** all rows in the %_segdir table, from oldest to newest. If successful,
+** return SQLITE_OK. If an error occurs while preparing the statement, 
+** return an SQLite error code.
+**
+** There is only ever one instance of this SQL statement compiled for
+** each FTS3 table.
+**
+** The statement returns the following columns from the %_segdir table:
+**
+**   0: idx
+**   1: start_block
+**   2: leaves_end_block
+**   3: end_block
+**   4: root
+*/
+SQLITE_PRIVATE int sqlite3Fts3AllSegdirs(
+  Fts3Table *p,                   /* FTS3 table */
+  int iLangid,                    /* Language being queried */
+  int iIndex,                     /* Index for p->aIndex[] */
+  int iLevel,                     /* Level to select (relative level) */
+  sqlite3_stmt **ppStmt           /* OUT: Compiled statement */
+){
+  int rc;
+  sqlite3_stmt *pStmt = 0;
+
+  assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 );
+  assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
+  assert( iIndex>=0 && iIndex<p->nIndex );
+
+  if( iLevel<0 ){
+    /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
+    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
+    if( rc==SQLITE_OK ){ 
+      sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+      sqlite3_bind_int64(pStmt, 2, 
+          getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+      );
+    }
+  }else{
+    /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
+    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
+    if( rc==SQLITE_OK ){ 
+      sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
+    }
+  }
+  *ppStmt = pStmt;
+  return rc;
+}
+
+
+/*
+** Append a single varint to a PendingList buffer. SQLITE_OK is returned
+** if successful, or an SQLite error code otherwise.
+**
+** This function also serves to allocate the PendingList structure itself.
+** For example, to create a new PendingList structure containing two
+** varints:
+**
+**   PendingList *p = 0;
+**   fts3PendingListAppendVarint(&p, 1);
+**   fts3PendingListAppendVarint(&p, 2);
+*/
+static int fts3PendingListAppendVarint(
+  PendingList **pp,               /* IN/OUT: Pointer to PendingList struct */
+  sqlite3_int64 i                 /* Value to append to data */
+){
+  PendingList *p = *pp;
+
+  /* Allocate or grow the PendingList as required. */
+  if( !p ){
+    p = sqlite3_malloc(sizeof(*p) + 100);
+    if( !p ){
+      return SQLITE_NOMEM;
+    }
+    p->nSpace = 100;
+    p->aData = (char *)&p[1];
+    p->nData = 0;
+  }
+  else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){
+    int nNew = p->nSpace * 2;
+    p = sqlite3_realloc(p, sizeof(*p) + nNew);
+    if( !p ){
+      sqlite3_free(*pp);
+      *pp = 0;
+      return SQLITE_NOMEM;
+    }
+    p->nSpace = nNew;
+    p->aData = (char *)&p[1];
+  }
+
+  /* Append the new serialized varint to the end of the list. */
+  p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i);
+  p->aData[p->nData] = '\0';
+  *pp = p;
+  return SQLITE_OK;
+}
+
+/*
+** Add a docid/column/position entry to a PendingList structure. Non-zero
+** is returned if the structure is sqlite3_realloced as part of adding
+** the entry. Otherwise, zero.
+**
+** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning.
+** Zero is always returned in this case. Otherwise, if no OOM error occurs,
+** it is set to SQLITE_OK.
+*/
+static int fts3PendingListAppend(
+  PendingList **pp,               /* IN/OUT: PendingList structure */
+  sqlite3_int64 iDocid,           /* Docid for entry to add */
+  sqlite3_int64 iCol,             /* Column for entry to add */
+  sqlite3_int64 iPos,             /* Position of term for entry to add */
+  int *pRc                        /* OUT: Return code */
+){
+  PendingList *p = *pp;
+  int rc = SQLITE_OK;
+
+  assert( !p || p->iLastDocid<=iDocid );
+
+  if( !p || p->iLastDocid!=iDocid ){
+    sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0);
+    if( p ){
+      assert( p->nData<p->nSpace );
+      assert( p->aData[p->nData]==0 );
+      p->nData++;
+    }
+    if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){
+      goto pendinglistappend_out;
+    }
+    p->iLastCol = -1;
+    p->iLastPos = 0;
+    p->iLastDocid = iDocid;
+  }
+  if( iCol>0 && p->iLastCol!=iCol ){
+    if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1))
+     || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol))
+    ){
+      goto pendinglistappend_out;
+    }
+    p->iLastCol = iCol;
+    p->iLastPos = 0;
+  }
+  if( iCol>=0 ){
+    assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) );
+    rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos);
+    if( rc==SQLITE_OK ){
+      p->iLastPos = iPos;
+    }
+  }
+
+ pendinglistappend_out:
+  *pRc = rc;
+  if( p!=*pp ){
+    *pp = p;
+    return 1;
+  }
+  return 0;
+}
+
+/*
+** Free a PendingList object allocated by fts3PendingListAppend().
+*/
+static void fts3PendingListDelete(PendingList *pList){
+  sqlite3_free(pList);
+}
+
+/*
+** Add an entry to one of the pending-terms hash tables.
+*/
+static int fts3PendingTermsAddOne(
+  Fts3Table *p,
+  int iCol,
+  int iPos,
+  Fts3Hash *pHash,                /* Pending terms hash table to add entry to */
+  const char *zToken,
+  int nToken
+){
+  PendingList *pList;
+  int rc = SQLITE_OK;
+
+  pList = (PendingList *)fts3HashFind(pHash, zToken, nToken);
+  if( pList ){
+    p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem));
+  }
+  if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){
+    if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){
+      /* Malloc failed while inserting the new entry. This can only 
+      ** happen if there was no previous entry for this token.
+      */
+      assert( 0==fts3HashFind(pHash, zToken, nToken) );
+      sqlite3_free(pList);
+      rc = SQLITE_NOMEM;
+    }
+  }
+  if( rc==SQLITE_OK ){
+    p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem));
+  }
+  return rc;
+}
+
+/*
+** Tokenize the nul-terminated string zText and add all tokens to the
+** pending-terms hash-table. The docid used is that currently stored in
+** p->iPrevDocid, and the column is specified by argument iCol.
+**
+** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
+*/
+static int fts3PendingTermsAdd(
+  Fts3Table *p,                   /* Table into which text will be inserted */
+  int iLangid,                    /* Language id to use */
+  const char *zText,              /* Text of document to be inserted */
+  int iCol,                       /* Column into which text is being inserted */
+  u32 *pnWord                     /* IN/OUT: Incr. by number tokens inserted */
+){
+  int rc;
+  int iStart = 0;
+  int iEnd = 0;
+  int iPos = 0;
+  int nWord = 0;
+
+  char const *zToken;
+  int nToken = 0;
+
+  sqlite3_tokenizer *pTokenizer = p->pTokenizer;
+  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+  sqlite3_tokenizer_cursor *pCsr;
+  int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
+      const char**,int*,int*,int*,int*);
+
+  assert( pTokenizer && pModule );
+
+  /* If the user has inserted a NULL value, this function may be called with
+  ** zText==0. In this case, add zero token entries to the hash table and 
+  ** return early. */
+  if( zText==0 ){
+    *pnWord = 0;
+    return SQLITE_OK;
+  }
+
+  rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  xNext = pModule->xNext;
+  while( SQLITE_OK==rc
+      && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
+  ){
+    int i;
+    if( iPos>=nWord ) nWord = iPos+1;
+
+    /* Positions cannot be negative; we use -1 as a terminator internally.
+    ** Tokens must have a non-zero length.
+    */
+    if( iPos<0 || !zToken || nToken<=0 ){
+      rc = SQLITE_ERROR;
+      break;
+    }
+
+    /* Add the term to the terms index */
+    rc = fts3PendingTermsAddOne(
+        p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken
+    );
+    
+    /* Add the term to each of the prefix indexes that it is not too 
+    ** short for. */
+    for(i=1; rc==SQLITE_OK && i<p->nIndex; i++){
+      struct Fts3Index *pIndex = &p->aIndex[i];
+      if( nToken<pIndex->nPrefix ) continue;
+      rc = fts3PendingTermsAddOne(
+          p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix
+      );
+    }
+  }
+
+  pModule->xClose(pCsr);
+  *pnWord += nWord;
+  return (rc==SQLITE_DONE ? SQLITE_OK : rc);
+}
+
+/* 
+** Calling this function indicates that subsequent calls to 
+** fts3PendingTermsAdd() are to add term/position-list pairs for the
+** contents of the document with docid iDocid.
+*/
+static int fts3PendingTermsDocid(
+  Fts3Table *p,                   /* Full-text table handle */
+  int iLangid,                    /* Language id of row being written */
+  sqlite_int64 iDocid             /* Docid of row being written */
+){
+  assert( iLangid>=0 );
+
+  /* TODO(shess) Explore whether partially flushing the buffer on
+  ** forced-flush would provide better performance.  I suspect that if
+  ** we ordered the doclists by size and flushed the largest until the
+  ** buffer was half empty, that would let the less frequent terms
+  ** generate longer doclists.
+  */
+  if( iDocid<=p->iPrevDocid 
+   || p->iPrevLangid!=iLangid
+   || p->nPendingData>p->nMaxPendingData 
+  ){
+    int rc = sqlite3Fts3PendingTermsFlush(p);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+  p->iPrevDocid = iDocid;
+  p->iPrevLangid = iLangid;
+  return SQLITE_OK;
+}
+
+/*
+** Discard the contents of the pending-terms hash tables. 
+*/
+SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){
+  int i;
+  for(i=0; i<p->nIndex; i++){
+    Fts3HashElem *pElem;
+    Fts3Hash *pHash = &p->aIndex[i].hPending;
+    for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){
+      PendingList *pList = (PendingList *)fts3HashData(pElem);
+      fts3PendingListDelete(pList);
+    }
+    fts3HashClear(pHash);
+  }
+  p->nPendingData = 0;
+}
+
+/*
+** This function is called by the xUpdate() method as part of an INSERT
+** operation. It adds entries for each term in the new record to the
+** pendingTerms hash table.
+**
+** Argument apVal is the same as the similarly named argument passed to
+** fts3InsertData(). Parameter iDocid is the docid of the new row.
+*/
+static int fts3InsertTerms(
+  Fts3Table *p, 
+  int iLangid, 
+  sqlite3_value **apVal, 
+  u32 *aSz
+){
+  int i;                          /* Iterator variable */
+  for(i=2; i<p->nColumn+2; i++){
+    int iCol = i-2;
+    if( p->abNotindexed[iCol]==0 ){
+      const char *zText = (const char *)sqlite3_value_text(apVal[i]);
+      int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+      aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
+    }
+  }
+  return SQLITE_OK;
+}
+
+/*
+** This function is called by the xUpdate() method for an INSERT operation.
+** The apVal parameter is passed a copy of the apVal argument passed by
+** SQLite to the xUpdate() method. i.e:
+**
+**   apVal[0]                Not used for INSERT.
+**   apVal[1]                rowid
+**   apVal[2]                Left-most user-defined column
+**   ...
+**   apVal[p->nColumn+1]     Right-most user-defined column
+**   apVal[p->nColumn+2]     Hidden column with same name as table
+**   apVal[p->nColumn+3]     Hidden "docid" column (alias for rowid)
+**   apVal[p->nColumn+4]     Hidden languageid column
+*/
+static int fts3InsertData(
+  Fts3Table *p,                   /* Full-text table */
+  sqlite3_value **apVal,          /* Array of values to insert */
+  sqlite3_int64 *piDocid          /* OUT: Docid for row just inserted */
+){
+  int rc;                         /* Return code */
+  sqlite3_stmt *pContentInsert;   /* INSERT INTO %_content VALUES(...) */
+
+  if( p->zContentTbl ){
+    sqlite3_value *pRowid = apVal[p->nColumn+3];
+    if( sqlite3_value_type(pRowid)==SQLITE_NULL ){
+      pRowid = apVal[1];
+    }
+    if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){
+      return SQLITE_CONSTRAINT;
+    }
+    *piDocid = sqlite3_value_int64(pRowid);
+    return SQLITE_OK;
+  }
+
+  /* Locate the statement handle used to insert data into the %_content
+  ** table. The SQL for this statement is:
+  **
+  **   INSERT INTO %_content VALUES(?, ?, ?, ...)
+  **
+  ** The statement features N '?' variables, where N is the number of user
+  ** defined columns in the FTS3 table, plus one for the docid field.
+  */
+  rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
+  if( rc==SQLITE_OK && p->zLanguageid ){
+    rc = sqlite3_bind_int(
+        pContentInsert, p->nColumn+2, 
+        sqlite3_value_int(apVal[p->nColumn+4])
+    );
+  }
+  if( rc!=SQLITE_OK ) return rc;
+
+  /* There is a quirk here. The users INSERT statement may have specified
+  ** a value for the "rowid" field, for the "docid" field, or for both.
+  ** Which is a problem, since "rowid" and "docid" are aliases for the
+  ** same value. For example:
+  **
+  **   INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2);
+  **
+  ** In FTS3, this is an error. It is an error to specify non-NULL values
+  ** for both docid and some other rowid alias.
+  */
+  if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){
+    if( SQLITE_NULL==sqlite3_value_type(apVal[0])
+     && SQLITE_NULL!=sqlite3_value_type(apVal[1])
+    ){
+      /* A rowid/docid conflict. */
+      return SQLITE_ERROR;
+    }
+    rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+
+  /* Execute the statement to insert the record. Set *piDocid to the 
+  ** new docid value. 
+  */
+  sqlite3_step(pContentInsert);
+  rc = sqlite3_reset(pContentInsert);
+
+  *piDocid = sqlite3_last_insert_rowid(p->db);
+  return rc;
+}
+
+
+
+/*
+** Remove all data from the FTS3 table. Clear the hash table containing
+** pending terms.
+*/
+static int fts3DeleteAll(Fts3Table *p, int bContent){
+  int rc = SQLITE_OK;             /* Return code */
+
+  /* Discard the contents of the pending-terms hash table. */
+  sqlite3Fts3PendingTermsClear(p);
+
+  /* Delete everything from the shadow tables. Except, leave %_content as
+  ** is if bContent is false.  */
+  assert( p->zContentTbl==0 || bContent==0 );
+  if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
+  fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0);
+  fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
+  if( p->bHasDocsize ){
+    fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
+  }
+  if( p->bHasStat ){
+    fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
+  }
+  return rc;
+}
+
+/*
+**
+*/
+static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
+  int iLangid = 0;
+  if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
+  return iLangid;
+}
+
+/*
+** The first element in the apVal[] array is assumed to contain the docid
+** (an integer) of a row about to be deleted. Remove all terms from the
+** full-text index.
+*/
+static void fts3DeleteTerms( 
+  int *pRC,               /* Result code */
+  Fts3Table *p,           /* The FTS table to delete from */
+  sqlite3_value *pRowid,  /* The docid to be deleted */
+  u32 *aSz,               /* Sizes of deleted document written here */
+  int *pbFound            /* OUT: Set to true if row really does exist */
+){
+  int rc;
+  sqlite3_stmt *pSelect;
+
+  assert( *pbFound==0 );
+  if( *pRC ) return;
+  rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
+  if( rc==SQLITE_OK ){
+    if( SQLITE_ROW==sqlite3_step(pSelect) ){
+      int i;
+      int iLangid = langidFromSelect(p, pSelect);
+      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
+      for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
+        int iCol = i-1;
+        if( p->abNotindexed[iCol]==0 ){
+          const char *zText = (const char *)sqlite3_column_text(pSelect, i);
+          rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]);
+          aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
+        }
+      }
+      if( rc!=SQLITE_OK ){
+        sqlite3_reset(pSelect);
+        *pRC = rc;
+        return;
+      }
+      *pbFound = 1;
+    }
+    rc = sqlite3_reset(pSelect);
+  }else{
+    sqlite3_reset(pSelect);
+  }
+  *pRC = rc;
+}
+
+/*
+** Forward declaration to account for the circular dependency between
+** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
+*/
+static int fts3SegmentMerge(Fts3Table *, int, int, int);
+
+/* 
+** This function allocates a new level iLevel index in the segdir table.
+** Usually, indexes are allocated within a level sequentially starting
+** with 0, so the allocated index is one greater than the value returned
+** by:
+**
+**   SELECT max(idx) FROM %_segdir WHERE level = :iLevel
+**
+** However, if there are already FTS3_MERGE_COUNT indexes at the requested
+** level, they are merged into a single level (iLevel+1) segment and the 
+** allocated index is 0.
+**
+** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
+** returned. Otherwise, an SQLite error code is returned.
+*/
+static int fts3AllocateSegdirIdx(
+  Fts3Table *p, 
+  int iLangid,                    /* Language id */
+  int iIndex,                     /* Index for p->aIndex */
+  int iLevel, 
+  int *piIdx
+){
+  int rc;                         /* Return Code */
+  sqlite3_stmt *pNextIdx;         /* Query for next idx at level iLevel */
+  int iNext = 0;                  /* Result of query pNextIdx */
+
+  assert( iLangid>=0 );
+  assert( p->nIndex>=1 );
+
+  /* Set variable iNext to the next available segdir index at level iLevel. */
+  rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(
+        pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
+    );
+    if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
+      iNext = sqlite3_column_int(pNextIdx, 0);
+    }
+    rc = sqlite3_reset(pNextIdx);
+  }
+
+  if( rc==SQLITE_OK ){
+    /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
+    ** full, merge all segments in level iLevel into a single iLevel+1
+    ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
+    ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
+    */
+    if( iNext>=FTS3_MERGE_COUNT ){
+      fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
+      rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
+      *piIdx = 0;
+    }else{
+      *piIdx = iNext;
+    }
+  }
+
+  return rc;
+}
+
+/*
+** The %_segments table is declared as follows:
+**
+**   CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB)
+**
+** This function reads data from a single row of the %_segments table. The
+** specific row is identified by the iBlockid parameter. If paBlob is not
+** NULL, then a buffer is allocated using sqlite3_malloc() and populated
+** with the contents of the blob stored in the "block" column of the 
+** identified table row is. Whether or not paBlob is NULL, *pnBlob is set
+** to the size of the blob in bytes before returning.
+**
+** If an error occurs, or the table does not contain the specified row,
+** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If
+** paBlob is non-NULL, then it is the responsibility of the caller to
+** eventually free the returned buffer.
+**
+** This function may leave an open sqlite3_blob* handle in the
+** Fts3Table.pSegments variable. This handle is reused by subsequent calls
+** to this function. The handle may be closed by calling the
+** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy
+** performance improvement, but the blob handle should always be closed
+** before control is returned to the user (to prevent a lock being held
+** on the database file for longer than necessary). Thus, any virtual table
+** method (xFilter etc.) that may directly or indirectly call this function
+** must call sqlite3Fts3SegmentsClose() before returning.
+*/
+SQLITE_PRIVATE int sqlite3Fts3ReadBlock(
+  Fts3Table *p,                   /* FTS3 table handle */
+  sqlite3_int64 iBlockid,         /* Access the row with blockid=$iBlockid */
+  char **paBlob,                  /* OUT: Blob data in malloc'd buffer */
+  int *pnBlob,                    /* OUT: Size of blob data */
+  int *pnLoad                     /* OUT: Bytes actually loaded */
+){
+  int rc;                         /* Return code */
+
+  /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */
+  assert( pnBlob );
+
+  if( p->pSegments ){
+    rc = sqlite3_blob_reopen(p->pSegments, iBlockid);
+  }else{
+    if( 0==p->zSegmentsTbl ){
+      p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName);
+      if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM;
+    }
+    rc = sqlite3_blob_open(
+       p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments
+    );
+  }
+
+  if( rc==SQLITE_OK ){
+    int nByte = sqlite3_blob_bytes(p->pSegments);
+    *pnBlob = nByte;
+    if( paBlob ){
+      char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING);
+      if( !aByte ){
+        rc = SQLITE_NOMEM;
+      }else{
+        if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){
+          nByte = FTS3_NODE_CHUNKSIZE;
+          *pnLoad = nByte;
+        }
+        rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0);
+        memset(&aByte[nByte], 0, FTS3_NODE_PADDING);
+        if( rc!=SQLITE_OK ){
+          sqlite3_free(aByte);
+          aByte = 0;
+        }
+      }
+      *paBlob = aByte;
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Close the blob handle at p->pSegments, if it is open. See comments above
+** the sqlite3Fts3ReadBlock() function for details.
+*/
+SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *p){
+  sqlite3_blob_close(p->pSegments);
+  p->pSegments = 0;
+}
+    
+static int fts3SegReaderIncrRead(Fts3SegReader *pReader){
+  int nRead;                      /* Number of bytes to read */
+  int rc;                         /* Return code */
+
+  nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE);
+  rc = sqlite3_blob_read(
+      pReader->pBlob, 
+      &pReader->aNode[pReader->nPopulate],
+      nRead,
+      pReader->nPopulate
+  );
+
+  if( rc==SQLITE_OK ){
+    pReader->nPopulate += nRead;
+    memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING);
+    if( pReader->nPopulate==pReader->nNode ){
+      sqlite3_blob_close(pReader->pBlob);
+      pReader->pBlob = 0;
+      pReader->nPopulate = 0;
+    }
+  }
+  return rc;
+}
+
+static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){
+  int rc = SQLITE_OK;
+  assert( !pReader->pBlob 
+       || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode])
+  );
+  while( pReader->pBlob && rc==SQLITE_OK 
+     &&  (pFrom - pReader->aNode + nByte)>pReader->nPopulate
+  ){
+    rc = fts3SegReaderIncrRead(pReader);
+  }
+  return rc;
+}
+
+/*
+** Set an Fts3SegReader cursor to point at EOF.
+*/
+static void fts3SegReaderSetEof(Fts3SegReader *pSeg){
+  if( !fts3SegReaderIsRootOnly(pSeg) ){
+    sqlite3_free(pSeg->aNode);
+    sqlite3_blob_close(pSeg->pBlob);
+    pSeg->pBlob = 0;
+  }
+  pSeg->aNode = 0;
+}
+
+/*
+** Move the iterator passed as the first argument to the next term in the
+** segment. If successful, SQLITE_OK is returned. If there is no next term,
+** SQLITE_DONE. Otherwise, an SQLite error code.
+*/
+static int fts3SegReaderNext(
+  Fts3Table *p, 
+  Fts3SegReader *pReader,
+  int bIncr
+){
+  int rc;                         /* Return code of various sub-routines */
+  char *pNext;                    /* Cursor variable */
+  int nPrefix;                    /* Number of bytes in term prefix */
+  int nSuffix;                    /* Number of bytes in term suffix */
+
+  if( !pReader->aDoclist ){
+    pNext = pReader->aNode;
+  }else{
+    pNext = &pReader->aDoclist[pReader->nDoclist];
+  }
+
+  if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){
+
+    if( fts3SegReaderIsPending(pReader) ){
+      Fts3HashElem *pElem = *(pReader->ppNextElem);
+      if( pElem==0 ){
+        pReader->aNode = 0;
+      }else{
+        PendingList *pList = (PendingList *)fts3HashData(pElem);
+        pReader->zTerm = (char *)fts3HashKey(pElem);
+        pReader->nTerm = fts3HashKeysize(pElem);
+        pReader->nNode = pReader->nDoclist = pList->nData + 1;
+        pReader->aNode = pReader->aDoclist = pList->aData;
+        pReader->ppNextElem++;
+        assert( pReader->aNode );
+      }
+      return SQLITE_OK;
+    }
+
+    fts3SegReaderSetEof(pReader);
+
+    /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf 
+    ** blocks have already been traversed.  */
+    assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock );
+    if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){
+      return SQLITE_OK;
+    }
+
+    rc = sqlite3Fts3ReadBlock(
+        p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, 
+        (bIncr ? &pReader->nPopulate : 0)
+    );
+    if( rc!=SQLITE_OK ) return rc;
+    assert( pReader->pBlob==0 );
+    if( bIncr && pReader->nPopulate<pReader->nNode ){
+      pReader->pBlob = p->pSegments;
+      p->pSegments = 0;
+    }
+    pNext = pReader->aNode;
+  }
+
+  assert( !fts3SegReaderIsPending(pReader) );
+
+  rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2);
+  if( rc!=SQLITE_OK ) return rc;
+  
+  /* Because of the FTS3_NODE_PADDING bytes of padding, the following is 
+  ** safe (no risk of overread) even if the node data is corrupted. */
+  pNext += fts3GetVarint32(pNext, &nPrefix);
+  pNext += fts3GetVarint32(pNext, &nSuffix);
+  if( nPrefix<0 || nSuffix<=0 
+   || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] 
+  ){
+    return FTS_CORRUPT_VTAB;
+  }
+
+  if( nPrefix+nSuffix>pReader->nTermAlloc ){
+    int nNew = (nPrefix+nSuffix)*2;
+    char *zNew = sqlite3_realloc(pReader->zTerm, nNew);
+    if( !zNew ){
+      return SQLITE_NOMEM;
+    }
+    pReader->zTerm = zNew;
+    pReader->nTermAlloc = nNew;
+  }
+
+  rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX);
+  if( rc!=SQLITE_OK ) return rc;
+
+  memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix);
+  pReader->nTerm = nPrefix+nSuffix;
+  pNext += nSuffix;
+  pNext += fts3GetVarint32(pNext, &pReader->nDoclist);
+  pReader->aDoclist = pNext;
+  pReader->pOffsetList = 0;
+
+  /* Check that the doclist does not appear to extend past the end of the
+  ** b-tree node. And that the final byte of the doclist is 0x00. If either 
+  ** of these statements is untrue, then the data structure is corrupt.
+  */
+  if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] 
+   || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1])
+  ){
+    return FTS_CORRUPT_VTAB;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Set the SegReader to point to the first docid in the doclist associated
+** with the current term.
+*/
+static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){
+  int rc = SQLITE_OK;
+  assert( pReader->aDoclist );
+  assert( !pReader->pOffsetList );
+  if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){
+    u8 bEof = 0;
+    pReader->iDocid = 0;
+    pReader->nOffsetList = 0;
+    sqlite3Fts3DoclistPrev(0,
+        pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, 
+        &pReader->iDocid, &pReader->nOffsetList, &bEof
+    );
+  }else{
+    rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX);
+    if( rc==SQLITE_OK ){
+      int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid);
+      pReader->pOffsetList = &pReader->aDoclist[n];
+    }
+  }
+  return rc;
+}
+
+/*
+** Advance the SegReader to point to the next docid in the doclist
+** associated with the current term.
+** 
+** If arguments ppOffsetList and pnOffsetList are not NULL, then 
+** *ppOffsetList is set to point to the first column-offset list
+** in the doclist entry (i.e. immediately past the docid varint).
+** *pnOffsetList is set to the length of the set of column-offset
+** lists, not including the nul-terminator byte. For example:
+*/
+static int fts3SegReaderNextDocid(
+  Fts3Table *pTab,
+  Fts3SegReader *pReader,         /* Reader to advance to next docid */
+  char **ppOffsetList,            /* OUT: Pointer to current position-list */
+  int *pnOffsetList               /* OUT: Length of *ppOffsetList in bytes */
+){
+  int rc = SQLITE_OK;
+  char *p = pReader->pOffsetList;
+  char c = 0;
+
+  assert( p );
+
+  if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){
+    /* A pending-terms seg-reader for an FTS4 table that uses order=desc.
+    ** Pending-terms doclists are always built up in ascending order, so
+    ** we have to iterate through them backwards here. */
+    u8 bEof = 0;
+    if( ppOffsetList ){
+      *ppOffsetList = pReader->pOffsetList;
+      *pnOffsetList = pReader->nOffsetList - 1;
+    }
+    sqlite3Fts3DoclistPrev(0,
+        pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid,
+        &pReader->nOffsetList, &bEof
+    );
+    if( bEof ){
+      pReader->pOffsetList = 0;
+    }else{
+      pReader->pOffsetList = p;
+    }
+  }else{
+    char *pEnd = &pReader->aDoclist[pReader->nDoclist];
+
+    /* Pointer p currently points at the first byte of an offset list. The
+    ** following block advances it to point one byte past the end of
+    ** the same offset list. */
+    while( 1 ){
+  
+      /* The following line of code (and the "p++" below the while() loop) is
+      ** normally all that is required to move pointer p to the desired 
+      ** position. The exception is if this node is being loaded from disk
+      ** incrementally and pointer "p" now points to the first byte past
+      ** the populated part of pReader->aNode[].
+      */
+      while( *p | c ) c = *p++ & 0x80;
+      assert( *p==0 );
+  
+      if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break;
+      rc = fts3SegReaderIncrRead(pReader);
+      if( rc!=SQLITE_OK ) return rc;
+    }
+    p++;
+  
+    /* If required, populate the output variables with a pointer to and the
+    ** size of the previous offset-list.
+    */
+    if( ppOffsetList ){
+      *ppOffsetList = pReader->pOffsetList;
+      *pnOffsetList = (int)(p - pReader->pOffsetList - 1);
+    }
+
+    /* List may have been edited in place by fts3EvalNearTrim() */
+    while( p<pEnd && *p==0 ) p++;
+  
+    /* If there are no more entries in the doclist, set pOffsetList to
+    ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and
+    ** Fts3SegReader.pOffsetList to point to the next offset list before
+    ** returning.
+    */
+    if( p>=pEnd ){
+      pReader->pOffsetList = 0;
+    }else{
+      rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX);
+      if( rc==SQLITE_OK ){
+        sqlite3_int64 iDelta;
+        pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta);
+        if( pTab->bDescIdx ){
+          pReader->iDocid -= iDelta;
+        }else{
+          pReader->iDocid += iDelta;
+        }
+      }
+    }
+  }
+
+  return SQLITE_OK;
+}
+
+
+SQLITE_PRIVATE int sqlite3Fts3MsrOvfl(
+  Fts3Cursor *pCsr, 
+  Fts3MultiSegReader *pMsr,
+  int *pnOvfl
+){
+  Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
+  int nOvfl = 0;
+  int ii;
+  int rc = SQLITE_OK;
+  int pgsz = p->nPgsz;
+
+  assert( p->bFts4 );
+  assert( pgsz>0 );
+
+  for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){
+    Fts3SegReader *pReader = pMsr->apSegment[ii];
+    if( !fts3SegReaderIsPending(pReader) 
+     && !fts3SegReaderIsRootOnly(pReader) 
+    ){
+      sqlite3_int64 jj;
+      for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){
+        int nBlob;
+        rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0);
+        if( rc!=SQLITE_OK ) break;
+        if( (nBlob+35)>pgsz ){
+          nOvfl += (nBlob + 34)/pgsz;
+        }
+      }
+    }
+  }
+  *pnOvfl = nOvfl;
+  return rc;
+}
+
+/*
+** Free all allocations associated with the iterator passed as the 
+** second argument.
+*/
+SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){
+  if( pReader && !fts3SegReaderIsPending(pReader) ){
+    sqlite3_free(pReader->zTerm);
+    if( !fts3SegReaderIsRootOnly(pReader) ){
+      sqlite3_free(pReader->aNode);
+      sqlite3_blob_close(pReader->pBlob);
+    }
+  }
+  sqlite3_free(pReader);
+}
+
+/*
+** Allocate a new SegReader object.
+*/
+SQLITE_PRIVATE int sqlite3Fts3SegReaderNew(
+  int iAge,                       /* Segment "age". */
+  int bLookup,                    /* True for a lookup only */
+  sqlite3_int64 iStartLeaf,       /* First leaf to traverse */
+  sqlite3_int64 iEndLeaf,         /* Final leaf to traverse */
+  sqlite3_int64 iEndBlock,        /* Final block of segment */
+  const char *zRoot,              /* Buffer containing root node */
+  int nRoot,                      /* Size of buffer containing root node */
+  Fts3SegReader **ppReader        /* OUT: Allocated Fts3SegReader */
+){
+  Fts3SegReader *pReader;         /* Newly allocated SegReader object */
+  int nExtra = 0;                 /* Bytes to allocate segment root node */
+
+  assert( iStartLeaf<=iEndLeaf );
+  if( iStartLeaf==0 ){
+    nExtra = nRoot + FTS3_NODE_PADDING;
+  }
+
+  pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
+  if( !pReader ){
+    return SQLITE_NOMEM;
+  }
+  memset(pReader, 0, sizeof(Fts3SegReader));
+  pReader->iIdx = iAge;
+  pReader->bLookup = bLookup!=0;
+  pReader->iStartBlock = iStartLeaf;
+  pReader->iLeafEndBlock = iEndLeaf;
+  pReader->iEndBlock = iEndBlock;
+
+  if( nExtra ){
+    /* The entire segment is stored in the root node. */
+    pReader->aNode = (char *)&pReader[1];
+    pReader->rootOnly = 1;
+    pReader->nNode = nRoot;
+    memcpy(pReader->aNode, zRoot, nRoot);
+    memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING);
+  }else{
+    pReader->iCurrentBlock = iStartLeaf-1;
+  }
+  *ppReader = pReader;
+  return SQLITE_OK;
+}
+
+/*
+** This is a comparison function used as a qsort() callback when sorting
+** an array of pending terms by term. This occurs as part of flushing
+** the contents of the pending-terms hash table to the database.
+*/
+static int fts3CompareElemByTerm(const void *lhs, const void *rhs){
+  char *z1 = fts3HashKey(*(Fts3HashElem **)lhs);
+  char *z2 = fts3HashKey(*(Fts3HashElem **)rhs);
+  int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs);
+  int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs);
+
+  int n = (n1<n2 ? n1 : n2);
+  int c = memcmp(z1, z2, n);
+  if( c==0 ){
+    c = n1 - n2;
+  }
+  return c;
+}
+
+/*
+** This function is used to allocate an Fts3SegReader that iterates through
+** a subset of the terms stored in the Fts3Table.pendingTerms array.
+**
+** If the isPrefixIter parameter is zero, then the returned SegReader iterates
+** through each term in the pending-terms table. Or, if isPrefixIter is
+** non-zero, it iterates through each term and its prefixes. For example, if
+** the pending terms hash table contains the terms "sqlite", "mysql" and
+** "firebird", then the iterator visits the following 'terms' (in the order
+** shown):
+**
+**   f fi fir fire fireb firebi firebir firebird
+**   m my mys mysq mysql
+**   s sq sql sqli sqlit sqlite
+**
+** Whereas if isPrefixIter is zero, the terms visited are:
+**
+**   firebird mysql sqlite
+*/
+SQLITE_PRIVATE int sqlite3Fts3SegReaderPending(
+  Fts3Table *p,                   /* Virtual table handle */
+  int iIndex,                     /* Index for p->aIndex */
+  const char *zTerm,              /* Term to search for */
+  int nTerm,                      /* Size of buffer zTerm */
+  int bPrefix,                    /* True for a prefix iterator */
+  Fts3SegReader **ppReader        /* OUT: SegReader for pending-terms */
+){
+  Fts3SegReader *pReader = 0;     /* Fts3SegReader object to return */
+  Fts3HashElem *pE;               /* Iterator variable */
+  Fts3HashElem **aElem = 0;       /* Array of term hash entries to scan */
+  int nElem = 0;                  /* Size of array at aElem */
+  int rc = SQLITE_OK;             /* Return Code */
+  Fts3Hash *pHash;
+
+  pHash = &p->aIndex[iIndex].hPending;
+  if( bPrefix ){
+    int nAlloc = 0;               /* Size of allocated array at aElem */
+
+    for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
+      char *zKey = (char *)fts3HashKey(pE);
+      int nKey = fts3HashKeysize(pE);
+      if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){
+        if( nElem==nAlloc ){
+          Fts3HashElem **aElem2;
+          nAlloc += 16;
+          aElem2 = (Fts3HashElem **)sqlite3_realloc(
+              aElem, nAlloc*sizeof(Fts3HashElem *)
+          );
+          if( !aElem2 ){
+            rc = SQLITE_NOMEM;
+            nElem = 0;
+            break;
+          }
+          aElem = aElem2;
+        }
+
+        aElem[nElem++] = pE;
+      }
+    }
+
+    /* If more than one term matches the prefix, sort the Fts3HashElem
+    ** objects in term order using qsort(). This uses the same comparison
+    ** callback as is used when flushing terms to disk.
+    */
+    if( nElem>1 ){
+      qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm);
+    }
+
+  }else{
+    /* The query is a simple term lookup that matches at most one term in
+    ** the index. All that is required is a straight hash-lookup. 
+    **
+    ** Because the stack address of pE may be accessed via the aElem pointer
+    ** below, the "Fts3HashElem *pE" must be declared so that it is valid
+    ** within this entire function, not just this "else{...}" block.
+    */
+    pE = fts3HashFindElem(pHash, zTerm, nTerm);
+    if( pE ){
+      aElem = &pE;
+      nElem = 1;
+    }
+  }
+
+  if( nElem>0 ){
+    int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *);
+    pReader = (Fts3SegReader *)sqlite3_malloc(nByte);
+    if( !pReader ){
+      rc = SQLITE_NOMEM;
+    }else{
+      memset(pReader, 0, nByte);
+      pReader->iIdx = 0x7FFFFFFF;
+      pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
+      memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
+    }
+  }
+
+  if( bPrefix ){
+    sqlite3_free(aElem);
+  }
+  *ppReader = pReader;
+  return rc;
+}
+
+/*
+** Compare the entries pointed to by two Fts3SegReader structures. 
+** Comparison is as follows:
+**
+**   1) EOF is greater than not EOF.
+**
+**   2) The current terms (if any) are compared using memcmp(). If one
+**      term is a prefix of another, the longer term is considered the
+**      larger.
+**
+**   3) By segment age. An older segment is considered larger.
+*/
+static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
+  int rc;
+  if( pLhs->aNode && pRhs->aNode ){
+    int rc2 = pLhs->nTerm - pRhs->nTerm;
+    if( rc2<0 ){
+      rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm);
+    }else{
+      rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm);
+    }
+    if( rc==0 ){
+      rc = rc2;
+    }
+  }else{
+    rc = (pLhs->aNode==0) - (pRhs->aNode==0);
+  }
+  if( rc==0 ){
+    rc = pRhs->iIdx - pLhs->iIdx;
+  }
+  assert( rc!=0 );
+  return rc;
+}
+
+/*
+** A different comparison function for SegReader structures. In this
+** version, it is assumed that each SegReader points to an entry in
+** a doclist for identical terms. Comparison is made as follows:
+**
+**   1) EOF (end of doclist in this case) is greater than not EOF.
+**
+**   2) By current docid.
+**
+**   3) By segment age. An older segment is considered larger.
+*/
+static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
+  int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0);
+  if( rc==0 ){
+    if( pLhs->iDocid==pRhs->iDocid ){
+      rc = pRhs->iIdx - pLhs->iIdx;
+    }else{
+      rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1;
+    }
+  }
+  assert( pLhs->aNode && pRhs->aNode );
+  return rc;
+}
+static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
+  int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0);
+  if( rc==0 ){
+    if( pLhs->iDocid==pRhs->iDocid ){
+      rc = pRhs->iIdx - pLhs->iIdx;
+    }else{
+      rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1;
+    }
+  }
+  assert( pLhs->aNode && pRhs->aNode );
+  return rc;
+}
+
+/*
+** Compare the term that the Fts3SegReader object passed as the first argument
+** points to with the term specified by arguments zTerm and nTerm. 
+**
+** If the pSeg iterator is already at EOF, return 0. Otherwise, return
+** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are
+** equal, or +ve if the pSeg term is greater than zTerm/nTerm.
+*/
+static int fts3SegReaderTermCmp(
+  Fts3SegReader *pSeg,            /* Segment reader object */
+  const char *zTerm,              /* Term to compare to */
+  int nTerm                       /* Size of term zTerm in bytes */
+){
+  int res = 0;
+  if( pSeg->aNode ){
+    if( pSeg->nTerm>nTerm ){
+      res = memcmp(pSeg->zTerm, zTerm, nTerm);
+    }else{
+      res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm);
+    }
+    if( res==0 ){
+      res = pSeg->nTerm-nTerm;
+    }
+  }
+  return res;
+}
+
+/*
+** Argument apSegment is an array of nSegment elements. It is known that
+** the final (nSegment-nSuspect) members are already in sorted order
+** (according to the comparison function provided). This function shuffles
+** the array around until all entries are in sorted order.
+*/
+static void fts3SegReaderSort(
+  Fts3SegReader **apSegment,                     /* Array to sort entries of */
+  int nSegment,                                  /* Size of apSegment array */
+  int nSuspect,                                  /* Unsorted entry count */
+  int (*xCmp)(Fts3SegReader *, Fts3SegReader *)  /* Comparison function */
+){
+  int i;                          /* Iterator variable */
+
+  assert( nSuspect<=nSegment );
+
+  if( nSuspect==nSegment ) nSuspect--;
+  for(i=nSuspect-1; i>=0; i--){
+    int j;
+    for(j=i; j<(nSegment-1); j++){
+      Fts3SegReader *pTmp;
+      if( xCmp(apSegment[j], apSegment[j+1])<0 ) break;
+      pTmp = apSegment[j+1];
+      apSegment[j+1] = apSegment[j];
+      apSegment[j] = pTmp;
+    }
+  }
+
+#ifndef NDEBUG
+  /* Check that the list really is sorted now. */
+  for(i=0; i<(nSuspect-1); i++){
+    assert( xCmp(apSegment[i], apSegment[i+1])<0 );
+  }
+#endif
+}
+
+/* 
+** Insert a record into the %_segments table.
+*/
+static int fts3WriteSegment(
+  Fts3Table *p,                   /* Virtual table handle */
+  sqlite3_int64 iBlock,           /* Block id for new block */
+  char *z,                        /* Pointer to buffer containing block data */
+  int n                           /* Size of buffer z in bytes */
+){
+  sqlite3_stmt *pStmt;
+  int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pStmt, 1, iBlock);
+    sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC);
+    sqlite3_step(pStmt);
+    rc = sqlite3_reset(pStmt);
+  }
+  return rc;
+}
+
+/*
+** Find the largest relative level number in the table. If successful, set
+** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs,
+** set *pnMax to zero and return an SQLite error code.
+*/
+SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){
+  int rc;
+  int mxLevel = 0;
+  sqlite3_stmt *pStmt = 0;
+
+  rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0);
+  if( rc==SQLITE_OK ){
+    if( SQLITE_ROW==sqlite3_step(pStmt) ){
+      mxLevel = sqlite3_column_int(pStmt, 0);
+    }
+    rc = sqlite3_reset(pStmt);
+  }
+  *pnMax = mxLevel;
+  return rc;
+}
+
+/* 
+** Insert a record into the %_segdir table.
+*/
+static int fts3WriteSegdir(
+  Fts3Table *p,                   /* Virtual table handle */
+  sqlite3_int64 iLevel,           /* Value for "level" field (absolute level) */
+  int iIdx,                       /* Value for "idx" field */
+  sqlite3_int64 iStartBlock,      /* Value for "start_block" field */
+  sqlite3_int64 iLeafEndBlock,    /* Value for "leaves_end_block" field */
+  sqlite3_int64 iEndBlock,        /* Value for "end_block" field */
+  sqlite3_int64 nLeafData,        /* Bytes of leaf data in segment */
+  char *zRoot,                    /* Blob value for "root" field */
+  int nRoot                       /* Number of bytes in buffer zRoot */
+){
+  sqlite3_stmt *pStmt;
+  int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pStmt, 1, iLevel);
+    sqlite3_bind_int(pStmt, 2, iIdx);
+    sqlite3_bind_int64(pStmt, 3, iStartBlock);
+    sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
+    if( nLeafData==0 ){
+      sqlite3_bind_int64(pStmt, 5, iEndBlock);
+    }else{
+      char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData);
+      if( !zEnd ) return SQLITE_NOMEM;
+      sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free);
+    }
+    sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
+    sqlite3_step(pStmt);
+    rc = sqlite3_reset(pStmt);
+  }
+  return rc;
+}
+
+/*
+** Return the size of the common prefix (if any) shared by zPrev and
+** zNext, in bytes. For example, 
+**
+**   fts3PrefixCompress("abc", 3, "abcdef", 6)   // returns 3
+**   fts3PrefixCompress("abX", 3, "abcdef", 6)   // returns 2
+**   fts3PrefixCompress("abX", 3, "Xbcdef", 6)   // returns 0
+*/
+static int fts3PrefixCompress(
+  const char *zPrev,              /* Buffer containing previous term */
+  int nPrev,                      /* Size of buffer zPrev in bytes */
+  const char *zNext,              /* Buffer containing next term */
+  int nNext                       /* Size of buffer zNext in bytes */
+){
+  int n;
+  UNUSED_PARAMETER(nNext);
+  for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++);
+  return n;
+}
+
+/*
+** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger
+** (according to memcmp) than the previous term.
+*/
+static int fts3NodeAddTerm(
+  Fts3Table *p,                   /* Virtual table handle */
+  SegmentNode **ppTree,           /* IN/OUT: SegmentNode handle */ 
+  int isCopyTerm,                 /* True if zTerm/nTerm is transient */
+  const char *zTerm,              /* Pointer to buffer containing term */
+  int nTerm                       /* Size of term in bytes */
+){
+  SegmentNode *pTree = *ppTree;
+  int rc;
+  SegmentNode *pNew;
+
+  /* First try to append the term to the current node. Return early if 
+  ** this is possible.
+  */
+  if( pTree ){
+    int nData = pTree->nData;     /* Current size of node in bytes */
+    int nReq = nData;             /* Required space after adding zTerm */
+    int nPrefix;                  /* Number of bytes of prefix compression */
+    int nSuffix;                  /* Suffix length */
+
+    nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm);
+    nSuffix = nTerm-nPrefix;
+
+    nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix;
+    if( nReq<=p->nNodeSize || !pTree->zTerm ){
+
+      if( nReq>p->nNodeSize ){
+        /* An unusual case: this is the first term to be added to the node
+        ** and the static node buffer (p->nNodeSize bytes) is not large
+        ** enough. Use a separately malloced buffer instead This wastes
+        ** p->nNodeSize bytes, but since this scenario only comes about when
+        ** the database contain two terms that share a prefix of almost 2KB, 
+        ** this is not expected to be a serious problem. 
+        */
+        assert( pTree->aData==(char *)&pTree[1] );
+        pTree->aData = (char *)sqlite3_malloc(nReq);
+        if( !pTree->aData ){
+          return SQLITE_NOMEM;
+        }
+      }
+
+      if( pTree->zTerm ){
+        /* There is no prefix-length field for first term in a node */
+        nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix);
+      }
+
+      nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix);
+      memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix);
+      pTree->nData = nData + nSuffix;
+      pTree->nEntry++;
+
+      if( isCopyTerm ){
+        if( pTree->nMalloc<nTerm ){
+          char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2);
+          if( !zNew ){
+            return SQLITE_NOMEM;
+          }
+          pTree->nMalloc = nTerm*2;
+          pTree->zMalloc = zNew;
+        }
+        pTree->zTerm = pTree->zMalloc;
+        memcpy(pTree->zTerm, zTerm, nTerm);
+        pTree->nTerm = nTerm;
+      }else{
+        pTree->zTerm = (char *)zTerm;
+        pTree->nTerm = nTerm;
+      }
+      return SQLITE_OK;
+    }
+  }
+
+  /* If control flows to here, it was not possible to append zTerm to the
+  ** current node. Create a new node (a right-sibling of the current node).
+  ** If this is the first node in the tree, the term is added to it.
+  **
+  ** Otherwise, the term is not added to the new node, it is left empty for
+  ** now. Instead, the term is inserted into the parent of pTree. If pTree 
+  ** has no parent, one is created here.
+  */
+  pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize);
+  if( !pNew ){
+    return SQLITE_NOMEM;
+  }
+  memset(pNew, 0, sizeof(SegmentNode));
+  pNew->nData = 1 + FTS3_VARINT_MAX;
+  pNew->aData = (char *)&pNew[1];
+
+  if( pTree ){
+    SegmentNode *pParent = pTree->pParent;
+    rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm);
+    if( pTree->pParent==0 ){
+      pTree->pParent = pParent;
+    }
+    pTree->pRight = pNew;
+    pNew->pLeftmost = pTree->pLeftmost;
+    pNew->pParent = pParent;
+    pNew->zMalloc = pTree->zMalloc;
+    pNew->nMalloc = pTree->nMalloc;
+    pTree->zMalloc = 0;
+  }else{
+    pNew->pLeftmost = pNew;
+    rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); 
+  }
+
+  *ppTree = pNew;
+  return rc;
+}
+
+/*
+** Helper function for fts3NodeWrite().
+*/
+static int fts3TreeFinishNode(
+  SegmentNode *pTree, 
+  int iHeight, 
+  sqlite3_int64 iLeftChild
+){
+  int nStart;
+  assert( iHeight>=1 && iHeight<128 );
+  nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild);
+  pTree->aData[nStart] = (char)iHeight;
+  sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild);
+  return nStart;
+}
+
+/*
+** Write the buffer for the segment node pTree and all of its peers to the
+** database. Then call this function recursively to write the parent of 
+** pTree and its peers to the database. 
+**
+** Except, if pTree is a root node, do not write it to the database. Instead,
+** set output variables *paRoot and *pnRoot to contain the root node.
+**
+** If successful, SQLITE_OK is returned and output variable *piLast is
+** set to the largest blockid written to the database (or zero if no
+** blocks were written to the db). Otherwise, an SQLite error code is 
+** returned.
+*/
+static int fts3NodeWrite(
+  Fts3Table *p,                   /* Virtual table handle */
+  SegmentNode *pTree,             /* SegmentNode handle */
+  int iHeight,                    /* Height of this node in tree */
+  sqlite3_int64 iLeaf,            /* Block id of first leaf node */
+  sqlite3_int64 iFree,            /* Block id of next free slot in %_segments */
+  sqlite3_int64 *piLast,          /* OUT: Block id of last entry written */
+  char **paRoot,                  /* OUT: Data for root node */
+  int *pnRoot                     /* OUT: Size of root node in bytes */
+){
+  int rc = SQLITE_OK;
+
+  if( !pTree->pParent ){
+    /* Root node of the tree. */
+    int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf);
+    *piLast = iFree-1;
+    *pnRoot = pTree->nData - nStart;
+    *paRoot = &pTree->aData[nStart];
+  }else{
+    SegmentNode *pIter;
+    sqlite3_int64 iNextFree = iFree;
+    sqlite3_int64 iNextLeaf = iLeaf;
+    for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){
+      int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf);
+      int nWrite = pIter->nData - nStart;
+  
+      rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite);
+      iNextFree++;
+      iNextLeaf += (pIter->nEntry+1);
+    }
+    if( rc==SQLITE_OK ){
+      assert( iNextLeaf==iFree );
+      rc = fts3NodeWrite(
+          p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot
+      );
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Free all memory allocations associated with the tree pTree.
+*/
+static void fts3NodeFree(SegmentNode *pTree){
+  if( pTree ){
+    SegmentNode *p = pTree->pLeftmost;
+    fts3NodeFree(p->pParent);
+    while( p ){
+      SegmentNode *pRight = p->pRight;
+      if( p->aData!=(char *)&p[1] ){
+        sqlite3_free(p->aData);
+      }
+      assert( pRight==0 || p->zMalloc==0 );
+      sqlite3_free(p->zMalloc);
+      sqlite3_free(p);
+      p = pRight;
+    }
+  }
+}
+
+/*
+** Add a term to the segment being constructed by the SegmentWriter object
+** *ppWriter. When adding the first term to a segment, *ppWriter should
+** be passed NULL. This function will allocate a new SegmentWriter object
+** and return it via the input/output variable *ppWriter in this case.
+**
+** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
+*/
+static int fts3SegWriterAdd(
+  Fts3Table *p,                   /* Virtual table handle */
+  SegmentWriter **ppWriter,       /* IN/OUT: SegmentWriter handle */ 
+  int isCopyTerm,                 /* True if buffer zTerm must be copied */
+  const char *zTerm,              /* Pointer to buffer containing term */
+  int nTerm,                      /* Size of term in bytes */
+  const char *aDoclist,           /* Pointer to buffer containing doclist */
+  int nDoclist                    /* Size of doclist in bytes */
+){
+  int nPrefix;                    /* Size of term prefix in bytes */
+  int nSuffix;                    /* Size of term suffix in bytes */
+  int nReq;                       /* Number of bytes required on leaf page */
+  int nData;
+  SegmentWriter *pWriter = *ppWriter;
+
+  if( !pWriter ){
+    int rc;
+    sqlite3_stmt *pStmt;
+
+    /* Allocate the SegmentWriter structure */
+    pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter));
+    if( !pWriter ) return SQLITE_NOMEM;
+    memset(pWriter, 0, sizeof(SegmentWriter));
+    *ppWriter = pWriter;
+
+    /* Allocate a buffer in which to accumulate data */
+    pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize);
+    if( !pWriter->aData ) return SQLITE_NOMEM;
+    pWriter->nSize = p->nNodeSize;
+
+    /* Find the next free blockid in the %_segments table */
+    rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0);
+    if( rc!=SQLITE_OK ) return rc;
+    if( SQLITE_ROW==sqlite3_step(pStmt) ){
+      pWriter->iFree = sqlite3_column_int64(pStmt, 0);
+      pWriter->iFirst = pWriter->iFree;
+    }
+    rc = sqlite3_reset(pStmt);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+  nData = pWriter->nData;
+
+  nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm);
+  nSuffix = nTerm-nPrefix;
+
+  /* Figure out how many bytes are required by this new entry */
+  nReq = sqlite3Fts3VarintLen(nPrefix) +    /* varint containing prefix size */
+    sqlite3Fts3VarintLen(nSuffix) +         /* varint containing suffix size */
+    nSuffix +                               /* Term suffix */
+    sqlite3Fts3VarintLen(nDoclist) +        /* Size of doclist */
+    nDoclist;                               /* Doclist data */
+
+  if( nData>0 && nData+nReq>p->nNodeSize ){
+    int rc;
+
+    /* The current leaf node is full. Write it out to the database. */
+    rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData);
+    if( rc!=SQLITE_OK ) return rc;
+    p->nLeafAdd++;
+
+    /* Add the current term to the interior node tree. The term added to
+    ** the interior tree must:
+    **
+    **   a) be greater than the largest term on the leaf node just written
+    **      to the database (still available in pWriter->zTerm), and
+    **
+    **   b) be less than or equal to the term about to be added to the new
+    **      leaf node (zTerm/nTerm).
+    **
+    ** In other words, it must be the prefix of zTerm 1 byte longer than
+    ** the common prefix (if any) of zTerm and pWriter->zTerm.
+    */
+    assert( nPrefix<nTerm );
+    rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1);
+    if( rc!=SQLITE_OK ) return rc;
+
+    nData = 0;
+    pWriter->nTerm = 0;
+
+    nPrefix = 0;
+    nSuffix = nTerm;
+    nReq = 1 +                              /* varint containing prefix size */
+      sqlite3Fts3VarintLen(nTerm) +         /* varint containing suffix size */
+      nTerm +                               /* Term suffix */
+      sqlite3Fts3VarintLen(nDoclist) +      /* Size of doclist */
+      nDoclist;                             /* Doclist data */
+  }
+
+  /* Increase the total number of bytes written to account for the new entry. */
+  pWriter->nLeafData += nReq;
+
+  /* If the buffer currently allocated is too small for this entry, realloc
+  ** the buffer to make it large enough.
+  */
+  if( nReq>pWriter->nSize ){
+    char *aNew = sqlite3_realloc(pWriter->aData, nReq);
+    if( !aNew ) return SQLITE_NOMEM;
+    pWriter->aData = aNew;
+    pWriter->nSize = nReq;
+  }
+  assert( nData+nReq<=pWriter->nSize );
+
+  /* Append the prefix-compressed term and doclist to the buffer. */
+  nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix);
+  nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix);
+  memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix);
+  nData += nSuffix;
+  nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist);
+  memcpy(&pWriter->aData[nData], aDoclist, nDoclist);
+  pWriter->nData = nData + nDoclist;
+
+  /* Save the current term so that it can be used to prefix-compress the next.
+  ** If the isCopyTerm parameter is true, then the buffer pointed to by
+  ** zTerm is transient, so take a copy of the term data. Otherwise, just
+  ** store a copy of the pointer.
+  */
+  if( isCopyTerm ){
+    if( nTerm>pWriter->nMalloc ){
+      char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2);
+      if( !zNew ){
+        return SQLITE_NOMEM;
+      }
+      pWriter->nMalloc = nTerm*2;
+      pWriter->zMalloc = zNew;
+      pWriter->zTerm = zNew;
+    }
+    assert( pWriter->zTerm==pWriter->zMalloc );
+    memcpy(pWriter->zTerm, zTerm, nTerm);
+  }else{
+    pWriter->zTerm = (char *)zTerm;
+  }
+  pWriter->nTerm = nTerm;
+
+  return SQLITE_OK;
+}
+
+/*
+** Flush all data associated with the SegmentWriter object pWriter to the
+** database. This function must be called after all terms have been added
+** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is
+** returned. Otherwise, an SQLite error code.
+*/
+static int fts3SegWriterFlush(
+  Fts3Table *p,                   /* Virtual table handle */
+  SegmentWriter *pWriter,         /* SegmentWriter to flush to the db */
+  sqlite3_int64 iLevel,           /* Value for 'level' column of %_segdir */
+  int iIdx                        /* Value for 'idx' column of %_segdir */
+){
+  int rc;                         /* Return code */
+  if( pWriter->pTree ){
+    sqlite3_int64 iLast = 0;      /* Largest block id written to database */
+    sqlite3_int64 iLastLeaf;      /* Largest leaf block id written to db */
+    char *zRoot = NULL;           /* Pointer to buffer containing root node */
+    int nRoot = 0;                /* Size of buffer zRoot */
+
+    iLastLeaf = pWriter->iFree;
+    rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData);
+    if( rc==SQLITE_OK ){
+      rc = fts3NodeWrite(p, pWriter->pTree, 1,
+          pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);
+    }
+    if( rc==SQLITE_OK ){
+      rc = fts3WriteSegdir(p, iLevel, iIdx, 
+          pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot);
+    }
+  }else{
+    /* The entire tree fits on the root node. Write it to the segdir table. */
+    rc = fts3WriteSegdir(p, iLevel, iIdx, 
+        0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData);
+  }
+  p->nLeafAdd++;
+  return rc;
+}
+
+/*
+** Release all memory held by the SegmentWriter object passed as the 
+** first argument.
+*/
+static void fts3SegWriterFree(SegmentWriter *pWriter){
+  if( pWriter ){
+    sqlite3_free(pWriter->aData);
+    sqlite3_free(pWriter->zMalloc);
+    fts3NodeFree(pWriter->pTree);
+    sqlite3_free(pWriter);
+  }
+}
+
+/*
+** The first value in the apVal[] array is assumed to contain an integer.
+** This function tests if there exist any documents with docid values that
+** are different from that integer. i.e. if deleting the document with docid
+** pRowid would mean the FTS3 table were empty.
+**
+** If successful, *pisEmpty is set to true if the table is empty except for
+** document pRowid, or false otherwise, and SQLITE_OK is returned. If an
+** error occurs, an SQLite error code is returned.
+*/
+static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){
+  sqlite3_stmt *pStmt;
+  int rc;
+  if( p->zContentTbl ){
+    /* If using the content=xxx option, assume the table is never empty */
+    *pisEmpty = 0;
+    rc = SQLITE_OK;
+  }else{
+    rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid);
+    if( rc==SQLITE_OK ){
+      if( SQLITE_ROW==sqlite3_step(pStmt) ){
+        *pisEmpty = sqlite3_column_int(pStmt, 0);
+      }
+      rc = sqlite3_reset(pStmt);
+    }
+  }
+  return rc;
+}
+
+/*
+** Set *pnMax to the largest segment level in the database for the index
+** iIndex.
+**
+** Segment levels are stored in the 'level' column of the %_segdir table.
+**
+** Return SQLITE_OK if successful, or an SQLite error code if not.
+*/
+static int fts3SegmentMaxLevel(
+  Fts3Table *p, 
+  int iLangid,
+  int iIndex, 
+  sqlite3_int64 *pnMax
+){
+  sqlite3_stmt *pStmt;
+  int rc;
+  assert( iIndex>=0 && iIndex<p->nIndex );
+
+  /* Set pStmt to the compiled version of:
+  **
+  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
+  **
+  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
+  */
+  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
+  if( rc!=SQLITE_OK ) return rc;
+  sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+  sqlite3_bind_int64(pStmt, 2, 
+      getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+  );
+  if( SQLITE_ROW==sqlite3_step(pStmt) ){
+    *pnMax = sqlite3_column_int64(pStmt, 0);
+  }
+  return sqlite3_reset(pStmt);
+}
+
+/*
+** iAbsLevel is an absolute level that may be assumed to exist within
+** the database. This function checks if it is the largest level number
+** within its index. Assuming no error occurs, *pbMax is set to 1 if
+** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK
+** is returned. If an error occurs, an error code is returned and the
+** final value of *pbMax is undefined.
+*/
+static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){
+
+  /* Set pStmt to the compiled version of:
+  **
+  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
+  **
+  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
+  */
+  sqlite3_stmt *pStmt;
+  int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
+  if( rc!=SQLITE_OK ) return rc;
+  sqlite3_bind_int64(pStmt, 1, iAbsLevel+1);
+  sqlite3_bind_int64(pStmt, 2, 
+      ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL
+  );
+
+  *pbMax = 0;
+  if( SQLITE_ROW==sqlite3_step(pStmt) ){
+    *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL;
+  }
+  return sqlite3_reset(pStmt);
+}
+
+/*
+** Delete all entries in the %_segments table associated with the segment
+** opened with seg-reader pSeg. This function does not affect the contents
+** of the %_segdir table.
+*/
+static int fts3DeleteSegment(
+  Fts3Table *p,                   /* FTS table handle */
+  Fts3SegReader *pSeg             /* Segment to delete */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  if( pSeg->iStartBlock ){
+    sqlite3_stmt *pDelete;        /* SQL statement to delete rows */
+    rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock);
+      sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock);
+      sqlite3_step(pDelete);
+      rc = sqlite3_reset(pDelete);
+    }
+  }
+  return rc;
+}
+
+/*
+** This function is used after merging multiple segments into a single large
+** segment to delete the old, now redundant, segment b-trees. Specifically,
+** it:
+** 
+**   1) Deletes all %_segments entries for the segments associated with 
+**      each of the SegReader objects in the array passed as the third 
+**      argument, and
+**
+**   2) deletes all %_segdir entries with level iLevel, or all %_segdir
+**      entries regardless of level if (iLevel<0).
+**
+** SQLITE_OK is returned if successful, otherwise an SQLite error code.
+*/
+static int fts3DeleteSegdir(
+  Fts3Table *p,                   /* Virtual table handle */
+  int iLangid,                    /* Language id */
+  int iIndex,                     /* Index for p->aIndex */
+  int iLevel,                     /* Level of %_segdir entries to delete */
+  Fts3SegReader **apSegment,      /* Array of SegReader objects */
+  int nReader                     /* Size of array apSegment */
+){
+  int rc = SQLITE_OK;             /* Return Code */
+  int i;                          /* Iterator variable */
+  sqlite3_stmt *pDelete = 0;      /* SQL statement to delete rows */
+
+  for(i=0; rc==SQLITE_OK && i<nReader; i++){
+    rc = fts3DeleteSegment(p, apSegment[i]);
+  }
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL );
+  if( iLevel==FTS3_SEGCURSOR_ALL ){
+    rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+      sqlite3_bind_int64(pDelete, 2, 
+          getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+      );
+    }
+  }else{
+    rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_int64(
+          pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
+      );
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    sqlite3_step(pDelete);
+    rc = sqlite3_reset(pDelete);
+  }
+
+  return rc;
+}
+
+/*
+** When this function is called, buffer *ppList (size *pnList bytes) contains 
+** a position list that may (or may not) feature multiple columns. This
+** function adjusts the pointer *ppList and the length *pnList so that they
+** identify the subset of the position list that corresponds to column iCol.
+**
+** If there are no entries in the input position list for column iCol, then
+** *pnList is set to zero before returning.
+**
+** If parameter bZero is non-zero, then any part of the input list following
+** the end of the output list is zeroed before returning.
+*/
+static void fts3ColumnFilter(
+  int iCol,                       /* Column to filter on */
+  int bZero,                      /* Zero out anything following *ppList */
+  char **ppList,                  /* IN/OUT: Pointer to position list */
+  int *pnList                     /* IN/OUT: Size of buffer *ppList in bytes */
+){
+  char *pList = *ppList;
+  int nList = *pnList;
+  char *pEnd = &pList[nList];
+  int iCurrent = 0;
+  char *p = pList;
+
+  assert( iCol>=0 );
+  while( 1 ){
+    char c = 0;
+    while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
+  
+    if( iCol==iCurrent ){
+      nList = (int)(p - pList);
+      break;
+    }
+
+    nList -= (int)(p - pList);
+    pList = p;
+    if( nList==0 ){
+      break;
+    }
+    p = &pList[1];
+    p += fts3GetVarint32(p, &iCurrent);
+  }
+
+  if( bZero && &pList[nList]!=pEnd ){
+    memset(&pList[nList], 0, pEnd - &pList[nList]);
+  }
+  *ppList = pList;
+  *pnList = nList;
+}
+
+/*
+** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any
+** existing data). Grow the buffer if required.
+**
+** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered
+** trying to resize the buffer, return SQLITE_NOMEM.
+*/
+static int fts3MsrBufferData(
+  Fts3MultiSegReader *pMsr,       /* Multi-segment-reader handle */
+  char *pList,
+  int nList
+){
+  if( nList>pMsr->nBuffer ){
+    char *pNew;
+    pMsr->nBuffer = nList*2;
+    pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer);
+    if( !pNew ) return SQLITE_NOMEM;
+    pMsr->aBuffer = pNew;
+  }
+
+  memcpy(pMsr->aBuffer, pList, nList);
+  return SQLITE_OK;
+}
+
+SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext(
+  Fts3Table *p,                   /* Virtual table handle */
+  Fts3MultiSegReader *pMsr,       /* Multi-segment-reader handle */
+  sqlite3_int64 *piDocid,         /* OUT: Docid value */
+  char **paPoslist,               /* OUT: Pointer to position list */
+  int *pnPoslist                  /* OUT: Size of position list in bytes */
+){
+  int nMerge = pMsr->nAdvance;
+  Fts3SegReader **apSegment = pMsr->apSegment;
+  int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
+    p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
+  );
+
+  if( nMerge==0 ){
+    *paPoslist = 0;
+    return SQLITE_OK;
+  }
+
+  while( 1 ){
+    Fts3SegReader *pSeg;
+    pSeg = pMsr->apSegment[0];
+
+    if( pSeg->pOffsetList==0 ){
+      *paPoslist = 0;
+      break;
+    }else{
+      int rc;
+      char *pList;
+      int nList;
+      int j;
+      sqlite3_int64 iDocid = apSegment[0]->iDocid;
+
+      rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList);
+      j = 1;
+      while( rc==SQLITE_OK 
+        && j<nMerge
+        && apSegment[j]->pOffsetList
+        && apSegment[j]->iDocid==iDocid
+      ){
+        rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0);
+        j++;
+      }
+      if( rc!=SQLITE_OK ) return rc;
+      fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp);
+
+      if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){
+        rc = fts3MsrBufferData(pMsr, pList, nList+1);
+        if( rc!=SQLITE_OK ) return rc;
+        assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 );
+        pList = pMsr->aBuffer;
+      }
+
+      if( pMsr->iColFilter>=0 ){
+        fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList);
+      }
+
+      if( nList>0 ){
+        *paPoslist = pList;
+        *piDocid = iDocid;
+        *pnPoslist = nList;
+        break;
+      }
+    }
+  }
+
+  return SQLITE_OK;
+}
+
+static int fts3SegReaderStart(
+  Fts3Table *p,                   /* Virtual table handle */
+  Fts3MultiSegReader *pCsr,       /* Cursor object */
+  const char *zTerm,              /* Term searched for (or NULL) */
+  int nTerm                       /* Length of zTerm in bytes */
+){
+  int i;
+  int nSeg = pCsr->nSegment;
+
+  /* If the Fts3SegFilter defines a specific term (or term prefix) to search 
+  ** for, then advance each segment iterator until it points to a term of
+  ** equal or greater value than the specified term. This prevents many
+  ** unnecessary merge/sort operations for the case where single segment
+  ** b-tree leaf nodes contain more than one term.
+  */
+  for(i=0; pCsr->bRestart==0 && i<pCsr->nSegment; i++){
+    int res = 0;
+    Fts3SegReader *pSeg = pCsr->apSegment[i];
+    do {
+      int rc = fts3SegReaderNext(p, pSeg, 0);
+      if( rc!=SQLITE_OK ) return rc;
+    }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 );
+
+    if( pSeg->bLookup && res!=0 ){
+      fts3SegReaderSetEof(pSeg);
+    }
+  }
+  fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp);
+
+  return SQLITE_OK;
+}
+
+SQLITE_PRIVATE int sqlite3Fts3SegReaderStart(
+  Fts3Table *p,                   /* Virtual table handle */
+  Fts3MultiSegReader *pCsr,       /* Cursor object */
+  Fts3SegFilter *pFilter          /* Restrictions on range of iteration */
+){
+  pCsr->pFilter = pFilter;
+  return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm);
+}
+
+SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart(
+  Fts3Table *p,                   /* Virtual table handle */
+  Fts3MultiSegReader *pCsr,       /* Cursor object */
+  int iCol,                       /* Column to match on. */
+  const char *zTerm,              /* Term to iterate through a doclist for */
+  int nTerm                       /* Number of bytes in zTerm */
+){
+  int i;
+  int rc;
+  int nSegment = pCsr->nSegment;
+  int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
+    p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
+  );
+
+  assert( pCsr->pFilter==0 );
+  assert( zTerm && nTerm>0 );
+
+  /* Advance each segment iterator until it points to the term zTerm/nTerm. */
+  rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm);
+  if( rc!=SQLITE_OK ) return rc;
+
+  /* Determine how many of the segments actually point to zTerm/nTerm. */
+  for(i=0; i<nSegment; i++){
+    Fts3SegReader *pSeg = pCsr->apSegment[i];
+    if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){
+      break;
+    }
+  }
+  pCsr->nAdvance = i;
+
+  /* Advance each of the segments to point to the first docid. */
+  for(i=0; i<pCsr->nAdvance; i++){
+    rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+  fts3SegReaderSort(pCsr->apSegment, i, i, xCmp);
+
+  assert( iCol<0 || iCol<p->nColumn );
+  pCsr->iColFilter = iCol;
+
+  return SQLITE_OK;
+}
+
+/*
+** This function is called on a MultiSegReader that has been started using
+** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also
+** have been made. Calling this function puts the MultiSegReader in such
+** a state that if the next two calls are:
+**
+**   sqlite3Fts3SegReaderStart()
+**   sqlite3Fts3SegReaderStep()
+**
+** then the entire doclist for the term is available in 
+** MultiSegReader.aDoclist/nDoclist.
+*/
+SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){
+  int i;                          /* Used to iterate through segment-readers */
+
+  assert( pCsr->zTerm==0 );
+  assert( pCsr->nTerm==0 );
+  assert( pCsr->aDoclist==0 );
+  assert( pCsr->nDoclist==0 );
+
+  pCsr->nAdvance = 0;
+  pCsr->bRestart = 1;
+  for(i=0; i<pCsr->nSegment; i++){
+    pCsr->apSegment[i]->pOffsetList = 0;
+    pCsr->apSegment[i]->nOffsetList = 0;
+    pCsr->apSegment[i]->iDocid = 0;
+  }
+
+  return SQLITE_OK;
+}
+
+
+SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(
+  Fts3Table *p,                   /* Virtual table handle */
+  Fts3MultiSegReader *pCsr        /* Cursor object */
+){
+  int rc = SQLITE_OK;
+
+  int isIgnoreEmpty =  (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
+  int isRequirePos =   (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
+  int isColFilter =    (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
+  int isPrefix =       (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX);
+  int isScan =         (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN);
+  int isFirst =        (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST);
+
+  Fts3SegReader **apSegment = pCsr->apSegment;
+  int nSegment = pCsr->nSegment;
+  Fts3SegFilter *pFilter = pCsr->pFilter;
+  int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
+    p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
+  );
+
+  if( pCsr->nSegment==0 ) return SQLITE_OK;
+
+  do {
+    int nMerge;
+    int i;
+  
+    /* Advance the first pCsr->nAdvance entries in the apSegment[] array
+    ** forward. Then sort the list in order of current term again.  
+    */
+    for(i=0; i<pCsr->nAdvance; i++){
+      Fts3SegReader *pSeg = apSegment[i];
+      if( pSeg->bLookup ){
+        fts3SegReaderSetEof(pSeg);
+      }else{
+        rc = fts3SegReaderNext(p, pSeg, 0);
+      }
+      if( rc!=SQLITE_OK ) return rc;
+    }
+    fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp);
+    pCsr->nAdvance = 0;
+
+    /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */
+    assert( rc==SQLITE_OK );
+    if( apSegment[0]->aNode==0 ) break;
+
+    pCsr->nTerm = apSegment[0]->nTerm;
+    pCsr->zTerm = apSegment[0]->zTerm;
+
+    /* If this is a prefix-search, and if the term that apSegment[0] points
+    ** to does not share a suffix with pFilter->zTerm/nTerm, then all 
+    ** required callbacks have been made. In this case exit early.
+    **
+    ** Similarly, if this is a search for an exact match, and the first term
+    ** of segment apSegment[0] is not a match, exit early.
+    */
+    if( pFilter->zTerm && !isScan ){
+      if( pCsr->nTerm<pFilter->nTerm 
+       || (!isPrefix && pCsr->nTerm>pFilter->nTerm)
+       || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) 
+      ){
+        break;
+      }
+    }
+
+    nMerge = 1;
+    while( nMerge<nSegment 
+        && apSegment[nMerge]->aNode
+        && apSegment[nMerge]->nTerm==pCsr->nTerm 
+        && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm)
+    ){
+      nMerge++;
+    }
+
+    assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
+    if( nMerge==1 
+     && !isIgnoreEmpty 
+     && !isFirst 
+     && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0)
+    ){
+      pCsr->nDoclist = apSegment[0]->nDoclist;
+      if( fts3SegReaderIsPending(apSegment[0]) ){
+        rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist);
+        pCsr->aDoclist = pCsr->aBuffer;
+      }else{
+        pCsr->aDoclist = apSegment[0]->aDoclist;
+      }
+      if( rc==SQLITE_OK ) rc = SQLITE_ROW;
+    }else{
+      int nDoclist = 0;           /* Size of doclist */
+      sqlite3_int64 iPrev = 0;    /* Previous docid stored in doclist */
+
+      /* The current term of the first nMerge entries in the array
+      ** of Fts3SegReader objects is the same. The doclists must be merged
+      ** and a single term returned with the merged doclist.
+      */
+      for(i=0; i<nMerge; i++){
+        fts3SegReaderFirstDocid(p, apSegment[i]);
+      }
+      fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp);
+      while( apSegment[0]->pOffsetList ){
+        int j;                    /* Number of segments that share a docid */
+        char *pList = 0;
+        int nList = 0;
+        int nByte;
+        sqlite3_int64 iDocid = apSegment[0]->iDocid;
+        fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList);
+        j = 1;
+        while( j<nMerge
+            && apSegment[j]->pOffsetList
+            && apSegment[j]->iDocid==iDocid
+        ){
+          fts3SegReaderNextDocid(p, apSegment[j], 0, 0);
+          j++;
+        }
+
+        if( isColFilter ){
+          fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList);
+        }
+
+        if( !isIgnoreEmpty || nList>0 ){
+
+          /* Calculate the 'docid' delta value to write into the merged 
+          ** doclist. */
+          sqlite3_int64 iDelta;
+          if( p->bDescIdx && nDoclist>0 ){
+            iDelta = iPrev - iDocid;
+          }else{
+            iDelta = iDocid - iPrev;
+          }
+          assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) );
+          assert( nDoclist>0 || iDelta==iDocid );
+
+          nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0);
+          if( nDoclist+nByte>pCsr->nBuffer ){
+            char *aNew;
+            pCsr->nBuffer = (nDoclist+nByte)*2;
+            aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer);
+            if( !aNew ){
+              return SQLITE_NOMEM;
+            }
+            pCsr->aBuffer = aNew;
+          }
+
+          if( isFirst ){
+            char *a = &pCsr->aBuffer[nDoclist];
+            int nWrite;
+           
+            nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a);
+            if( nWrite ){
+              iPrev = iDocid;
+              nDoclist += nWrite;
+            }
+          }else{
+            nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
+            iPrev = iDocid;
+            if( isRequirePos ){
+              memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
+              nDoclist += nList;
+              pCsr->aBuffer[nDoclist++] = '\0';
+            }
+          }
+        }
+
+        fts3SegReaderSort(apSegment, nMerge, j, xCmp);
+      }
+      if( nDoclist>0 ){
+        pCsr->aDoclist = pCsr->aBuffer;
+        pCsr->nDoclist = nDoclist;
+        rc = SQLITE_ROW;
+      }
+    }
+    pCsr->nAdvance = nMerge;
+  }while( rc==SQLITE_OK );
+
+  return rc;
+}
+
+
+SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish(
+  Fts3MultiSegReader *pCsr       /* Cursor object */
+){
+  if( pCsr ){
+    int i;
+    for(i=0; i<pCsr->nSegment; i++){
+      sqlite3Fts3SegReaderFree(pCsr->apSegment[i]);
+    }
+    sqlite3_free(pCsr->apSegment);
+    sqlite3_free(pCsr->aBuffer);
+
+    pCsr->nSegment = 0;
+    pCsr->apSegment = 0;
+    pCsr->aBuffer = 0;
+  }
+}
+
+/*
+** Decode the "end_block" field, selected by column iCol of the SELECT 
+** statement passed as the first argument. 
+**
+** The "end_block" field may contain either an integer, or a text field
+** containing the text representation of two non-negative integers separated 
+** by one or more space (0x20) characters. In the first case, set *piEndBlock 
+** to the integer value and *pnByte to zero before returning. In the second, 
+** set *piEndBlock to the first value and *pnByte to the second.
+*/
+static void fts3ReadEndBlockField(
+  sqlite3_stmt *pStmt, 
+  int iCol, 
+  i64 *piEndBlock,
+  i64 *pnByte
+){
+  const unsigned char *zText = sqlite3_column_text(pStmt, iCol);
+  if( zText ){
+    int i;
+    int iMul = 1;
+    i64 iVal = 0;
+    for(i=0; zText[i]>='0' && zText[i]<='9'; i++){
+      iVal = iVal*10 + (zText[i] - '0');
+    }
+    *piEndBlock = iVal;
+    while( zText[i]==' ' ) i++;
+    iVal = 0;
+    if( zText[i]=='-' ){
+      i++;
+      iMul = -1;
+    }
+    for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){
+      iVal = iVal*10 + (zText[i] - '0');
+    }
+    *pnByte = (iVal * (i64)iMul);
+  }
+}
+
+
+/*
+** A segment of size nByte bytes has just been written to absolute level
+** iAbsLevel. Promote any segments that should be promoted as a result.
+*/
+static int fts3PromoteSegments(
+  Fts3Table *p,                   /* FTS table handle */
+  sqlite3_int64 iAbsLevel,        /* Absolute level just updated */
+  sqlite3_int64 nByte             /* Size of new segment at iAbsLevel */
+){
+  int rc = SQLITE_OK;
+  sqlite3_stmt *pRange;
+
+  rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0);
+
+  if( rc==SQLITE_OK ){
+    int bOk = 0;
+    i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1;
+    i64 nLimit = (nByte*3)/2;
+
+    /* Loop through all entries in the %_segdir table corresponding to 
+    ** segments in this index on levels greater than iAbsLevel. If there is
+    ** at least one such segment, and it is possible to determine that all 
+    ** such segments are smaller than nLimit bytes in size, they will be 
+    ** promoted to level iAbsLevel.  */
+    sqlite3_bind_int64(pRange, 1, iAbsLevel+1);
+    sqlite3_bind_int64(pRange, 2, iLast);
+    while( SQLITE_ROW==sqlite3_step(pRange) ){
+      i64 nSize = 0, dummy;
+      fts3ReadEndBlockField(pRange, 2, &dummy, &nSize);
+      if( nSize<=0 || nSize>nLimit ){
+        /* If nSize==0, then the %_segdir.end_block field does not not 
+        ** contain a size value. This happens if it was written by an
+        ** old version of FTS. In this case it is not possible to determine
+        ** the size of the segment, and so segment promotion does not
+        ** take place.  */
+        bOk = 0;
+        break;
+      }
+      bOk = 1;
+    }
+    rc = sqlite3_reset(pRange);
+
+    if( bOk ){
+      int iIdx = 0;
+      sqlite3_stmt *pUpdate1 = 0;
+      sqlite3_stmt *pUpdate2 = 0;
+
+      if( rc==SQLITE_OK ){
+        rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0);
+      }
+      if( rc==SQLITE_OK ){
+        rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0);
+      }
+
+      if( rc==SQLITE_OK ){
+
+        /* Loop through all %_segdir entries for segments in this index with
+        ** levels equal to or greater than iAbsLevel. As each entry is visited,
+        ** updated it to set (level = -1) and (idx = N), where N is 0 for the
+        ** oldest segment in the range, 1 for the next oldest, and so on.
+        **
+        ** In other words, move all segments being promoted to level -1,
+        ** setting the "idx" fields as appropriate to keep them in the same
+        ** order. The contents of level -1 (which is never used, except
+        ** transiently here), will be moved back to level iAbsLevel below.  */
+        sqlite3_bind_int64(pRange, 1, iAbsLevel);
+        while( SQLITE_ROW==sqlite3_step(pRange) ){
+          sqlite3_bind_int(pUpdate1, 1, iIdx++);
+          sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0));
+          sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1));
+          sqlite3_step(pUpdate1);
+          rc = sqlite3_reset(pUpdate1);
+          if( rc!=SQLITE_OK ){
+            sqlite3_reset(pRange);
+            break;
+          }
+        }
+      }
+      if( rc==SQLITE_OK ){
+        rc = sqlite3_reset(pRange);
+      }
+
+      /* Move level -1 to level iAbsLevel */
+      if( rc==SQLITE_OK ){
+        sqlite3_bind_int64(pUpdate2, 1, iAbsLevel);
+        sqlite3_step(pUpdate2);
+        rc = sqlite3_reset(pUpdate2);
+      }
+    }
+  }
+
+
+  return rc;
+}
+
+/*
+** Merge all level iLevel segments in the database into a single 
+** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
+** single segment with a level equal to the numerically largest level 
+** currently present in the database.
+**
+** If this function is called with iLevel<0, but there is only one
+** segment in the database, SQLITE_DONE is returned immediately. 
+** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, 
+** an SQLite error code is returned.
+*/
+static int fts3SegmentMerge(
+  Fts3Table *p, 
+  int iLangid,                    /* Language id to merge */
+  int iIndex,                     /* Index in p->aIndex[] to merge */
+  int iLevel                      /* Level to merge */
+){
+  int rc;                         /* Return code */
+  int iIdx = 0;                   /* Index of new segment */
+  sqlite3_int64 iNewLevel = 0;    /* Level/index to create new segment at */
+  SegmentWriter *pWriter = 0;     /* Used to write the new, merged, segment */
+  Fts3SegFilter filter;           /* Segment term filter condition */
+  Fts3MultiSegReader csr;         /* Cursor to iterate through level(s) */
+  int bIgnoreEmpty = 0;           /* True to ignore empty segments */
+  i64 iMaxLevel = 0;              /* Max level number for this index/langid */
+
+  assert( iLevel==FTS3_SEGCURSOR_ALL
+       || iLevel==FTS3_SEGCURSOR_PENDING
+       || iLevel>=0
+  );
+  assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
+  assert( iIndex>=0 && iIndex<p->nIndex );
+
+  rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
+  if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
+
+  if( iLevel!=FTS3_SEGCURSOR_PENDING ){
+    rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel);
+    if( rc!=SQLITE_OK ) goto finished;
+  }
+
+  if( iLevel==FTS3_SEGCURSOR_ALL ){
+    /* This call is to merge all segments in the database to a single
+    ** segment. The level of the new segment is equal to the numerically
+    ** greatest segment level currently present in the database for this
+    ** index. The idx of the new segment is always 0.  */
+    if( csr.nSegment==1 ){
+      rc = SQLITE_DONE;
+      goto finished;
+    }
+    iNewLevel = iMaxLevel;
+    bIgnoreEmpty = 1;
+
+  }else{
+    /* This call is to merge all segments at level iLevel. find the next
+    ** available segment index at level iLevel+1. The call to
+    ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to 
+    ** a single iLevel+2 segment if necessary.  */
+    assert( FTS3_SEGCURSOR_PENDING==-1 );
+    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
+    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
+    bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel);
+  }
+  if( rc!=SQLITE_OK ) goto finished;
+
+  assert( csr.nSegment>0 );
+  assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
+  assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
+
+  memset(&filter, 0, sizeof(Fts3SegFilter));
+  filter.flags = FTS3_SEGMENT_REQUIRE_POS;
+  filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
+
+  rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
+  while( SQLITE_OK==rc ){
+    rc = sqlite3Fts3SegReaderStep(p, &csr);
+    if( rc!=SQLITE_ROW ) break;
+    rc = fts3SegWriterAdd(p, &pWriter, 1, 
+        csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
+  }
+  if( rc!=SQLITE_OK ) goto finished;
+  assert( pWriter || bIgnoreEmpty );
+
+  if( iLevel!=FTS3_SEGCURSOR_PENDING ){
+    rc = fts3DeleteSegdir(
+        p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
+    );
+    if( rc!=SQLITE_OK ) goto finished;
+  }
+  if( pWriter ){
+    rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
+    if( rc==SQLITE_OK ){
+      if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){
+        rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData);
+      }
+    }
+  }
+
+ finished:
+  fts3SegWriterFree(pWriter);
+  sqlite3Fts3SegReaderFinish(&csr);
+  return rc;
+}
+
+
+/* 
+** Flush the contents of pendingTerms to level 0 segments. 
+*/
+SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
+  int rc = SQLITE_OK;
+  int i;
+        
+  for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
+    rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
+    if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+  }
+  sqlite3Fts3PendingTermsClear(p);
+
+  /* Determine the auto-incr-merge setting if unknown.  If enabled,
+  ** estimate the number of leaf blocks of content to be written
+  */
+  if( rc==SQLITE_OK && p->bHasStat
+   && p->nAutoincrmerge==0xff && p->nLeafAdd>0
+  ){
+    sqlite3_stmt *pStmt = 0;
+    rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
+      rc = sqlite3_step(pStmt);
+      if( rc==SQLITE_ROW ){
+        p->nAutoincrmerge = sqlite3_column_int(pStmt, 0);
+        if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8;
+      }else if( rc==SQLITE_DONE ){
+        p->nAutoincrmerge = 0;
+      }
+      rc = sqlite3_reset(pStmt);
+    }
+  }
+  return rc;
+}
+
+/*
+** Encode N integers as varints into a blob.
+*/
+static void fts3EncodeIntArray(
+  int N,             /* The number of integers to encode */
+  u32 *a,            /* The integer values */
+  char *zBuf,        /* Write the BLOB here */
+  int *pNBuf         /* Write number of bytes if zBuf[] used here */
+){
+  int i, j;
+  for(i=j=0; i<N; i++){
+    j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]);
+  }
+  *pNBuf = j;
+}
+
+/*
+** Decode a blob of varints into N integers
+*/
+static void fts3DecodeIntArray(
+  int N,             /* The number of integers to decode */
+  u32 *a,            /* Write the integer values */
+  const char *zBuf,  /* The BLOB containing the varints */
+  int nBuf           /* size of the BLOB */
+){
+  int i, j;
+  UNUSED_PARAMETER(nBuf);
+  for(i=j=0; i<N; i++){
+    sqlite3_int64 x;
+    j += sqlite3Fts3GetVarint(&zBuf[j], &x);
+    assert(j<=nBuf);
+    a[i] = (u32)(x & 0xffffffff);
+  }
+}
+
+/*
+** Insert the sizes (in tokens) for each column of the document
+** with docid equal to p->iPrevDocid.  The sizes are encoded as
+** a blob of varints.
+*/
+static void fts3InsertDocsize(
+  int *pRC,                       /* Result code */
+  Fts3Table *p,                   /* Table into which to insert */
+  u32 *aSz                        /* Sizes of each column, in tokens */
+){
+  char *pBlob;             /* The BLOB encoding of the document size */
+  int nBlob;               /* Number of bytes in the BLOB */
+  sqlite3_stmt *pStmt;     /* Statement used to insert the encoding */
+  int rc;                  /* Result code from subfunctions */
+
+  if( *pRC ) return;
+  pBlob = sqlite3_malloc( 10*p->nColumn );
+  if( pBlob==0 ){
+    *pRC = SQLITE_NOMEM;
+    return;
+  }
+  fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob);
+  rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0);
+  if( rc ){
+    sqlite3_free(pBlob);
+    *pRC = rc;
+    return;
+  }
+  sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
+  sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
+  sqlite3_step(pStmt);
+  *pRC = sqlite3_reset(pStmt);
+}
+
+/*
+** Record 0 of the %_stat table contains a blob consisting of N varints,
+** where N is the number of user defined columns in the fts3 table plus
+** two. If nCol is the number of user defined columns, then values of the 
+** varints are set as follows:
+**
+**   Varint 0:       Total number of rows in the table.
+**
+**   Varint 1..nCol: For each column, the total number of tokens stored in
+**                   the column for all rows of the table.
+**
+**   Varint 1+nCol:  The total size, in bytes, of all text values in all
+**                   columns of all rows of the table.
+**
+*/
+static void fts3UpdateDocTotals(
+  int *pRC,                       /* The result code */
+  Fts3Table *p,                   /* Table being updated */
+  u32 *aSzIns,                    /* Size increases */
+  u32 *aSzDel,                    /* Size decreases */
+  int nChng                       /* Change in the number of documents */
+){
+  char *pBlob;             /* Storage for BLOB written into %_stat */
+  int nBlob;               /* Size of BLOB written into %_stat */
+  u32 *a;                  /* Array of integers that becomes the BLOB */
+  sqlite3_stmt *pStmt;     /* Statement for reading and writing */
+  int i;                   /* Loop counter */
+  int rc;                  /* Result code from subfunctions */
+
+  const int nStat = p->nColumn+2;
+
+  if( *pRC ) return;
+  a = sqlite3_malloc( (sizeof(u32)+10)*nStat );
+  if( a==0 ){
+    *pRC = SQLITE_NOMEM;
+    return;
+  }
+  pBlob = (char*)&a[nStat];
+  rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
+  if( rc ){
+    sqlite3_free(a);
+    *pRC = rc;
+    return;
+  }
+  sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
+  if( sqlite3_step(pStmt)==SQLITE_ROW ){
+    fts3DecodeIntArray(nStat, a,
+         sqlite3_column_blob(pStmt, 0),
+         sqlite3_column_bytes(pStmt, 0));
+  }else{
+    memset(a, 0, sizeof(u32)*(nStat) );
+  }
+  rc = sqlite3_reset(pStmt);
+  if( rc!=SQLITE_OK ){
+    sqlite3_free(a);
+    *pRC = rc;
+    return;
+  }
+  if( nChng<0 && a[0]<(u32)(-nChng) ){
+    a[0] = 0;
+  }else{
+    a[0] += nChng;
+  }
+  for(i=0; i<p->nColumn+1; i++){
+    u32 x = a[i+1];
+    if( x+aSzIns[i] < aSzDel[i] ){
+      x = 0;
+    }else{
+      x = x + aSzIns[i] - aSzDel[i];
+    }
+    a[i+1] = x;
+  }
+  fts3EncodeIntArray(nStat, a, pBlob, &nBlob);
+  rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
+  if( rc ){
+    sqlite3_free(a);
+    *pRC = rc;
+    return;
+  }
+  sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
+  sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC);
+  sqlite3_step(pStmt);
+  *pRC = sqlite3_reset(pStmt);
+  sqlite3_free(a);
+}
+
+/*
+** Merge the entire database so that there is one segment for each 
+** iIndex/iLangid combination.
+*/
+static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
+  int bSeenDone = 0;
+  int rc;
+  sqlite3_stmt *pAllLangid = 0;
+
+  rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
+  if( rc==SQLITE_OK ){
+    int rc2;
+    sqlite3_bind_int(pAllLangid, 1, p->nIndex);
+    while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
+      int i;
+      int iLangid = sqlite3_column_int(pAllLangid, 0);
+      for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
+        rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
+        if( rc==SQLITE_DONE ){
+          bSeenDone = 1;
+          rc = SQLITE_OK;
+        }
+      }
+    }
+    rc2 = sqlite3_reset(pAllLangid);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+
+  sqlite3Fts3SegmentsClose(p);
+  sqlite3Fts3PendingTermsClear(p);
+
+  return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
+}
+
+/*
+** This function is called when the user executes the following statement:
+**
+**     INSERT INTO <tbl>(<tbl>) VALUES('rebuild');
+**
+** The entire FTS index is discarded and rebuilt. If the table is one 
+** created using the content=xxx option, then the new index is based on
+** the current contents of the xxx table. Otherwise, it is rebuilt based
+** on the contents of the %_content table.
+*/
+static int fts3DoRebuild(Fts3Table *p){
+  int rc;                         /* Return Code */
+
+  rc = fts3DeleteAll(p, 0);
+  if( rc==SQLITE_OK ){
+    u32 *aSz = 0;
+    u32 *aSzIns = 0;
+    u32 *aSzDel = 0;
+    sqlite3_stmt *pStmt = 0;
+    int nEntry = 0;
+
+    /* Compose and prepare an SQL statement to loop through the content table */
+    char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
+    if( !zSql ){
+      rc = SQLITE_NOMEM;
+    }else{
+      rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
+      sqlite3_free(zSql);
+    }
+
+    if( rc==SQLITE_OK ){
+      int nByte = sizeof(u32) * (p->nColumn+1)*3;
+      aSz = (u32 *)sqlite3_malloc(nByte);
+      if( aSz==0 ){
+        rc = SQLITE_NOMEM;
+      }else{
+        memset(aSz, 0, nByte);
+        aSzIns = &aSz[p->nColumn+1];
+        aSzDel = &aSzIns[p->nColumn+1];
+      }
+    }
+
+    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
+      int iCol;
+      int iLangid = langidFromSelect(p, pStmt);
+      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
+      memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
+      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
+        if( p->abNotindexed[iCol]==0 ){
+          const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
+          rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
+          aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
+        }
+      }
+      if( p->bHasDocsize ){
+        fts3InsertDocsize(&rc, p, aSz);
+      }
+      if( rc!=SQLITE_OK ){
+        sqlite3_finalize(pStmt);
+        pStmt = 0;
+      }else{
+        nEntry++;
+        for(iCol=0; iCol<=p->nColumn; iCol++){
+          aSzIns[iCol] += aSz[iCol];
+        }
+      }
+    }
+    if( p->bFts4 ){
+      fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry);
+    }
+    sqlite3_free(aSz);
+
+    if( pStmt ){
+      int rc2 = sqlite3_finalize(pStmt);
+      if( rc==SQLITE_OK ){
+        rc = rc2;
+      }
+    }
+  }
+
+  return rc;
+}
+
+
+/*
+** This function opens a cursor used to read the input data for an 
+** incremental merge operation. Specifically, it opens a cursor to scan
+** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute 
+** level iAbsLevel.
+*/
+static int fts3IncrmergeCsr(
+  Fts3Table *p,                   /* FTS3 table handle */
+  sqlite3_int64 iAbsLevel,        /* Absolute level to open */
+  int nSeg,                       /* Number of segments to merge */
+  Fts3MultiSegReader *pCsr        /* Cursor object to populate */
+){
+  int rc;                         /* Return Code */
+  sqlite3_stmt *pStmt = 0;        /* Statement used to read %_segdir entry */  
+  int nByte;                      /* Bytes allocated at pCsr->apSegment[] */
+
+  /* Allocate space for the Fts3MultiSegReader.aCsr[] array */
+  memset(pCsr, 0, sizeof(*pCsr));
+  nByte = sizeof(Fts3SegReader *) * nSeg;
+  pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte);
+
+  if( pCsr->apSegment==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    memset(pCsr->apSegment, 0, nByte);
+    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
+  }
+  if( rc==SQLITE_OK ){
+    int i;
+    int rc2;
+    sqlite3_bind_int64(pStmt, 1, iAbsLevel);
+    assert( pCsr->nSegment==0 );
+    for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){
+      rc = sqlite3Fts3SegReaderNew(i, 0,
+          sqlite3_column_int64(pStmt, 1),        /* segdir.start_block */
+          sqlite3_column_int64(pStmt, 2),        /* segdir.leaves_end_block */
+          sqlite3_column_int64(pStmt, 3),        /* segdir.end_block */
+          sqlite3_column_blob(pStmt, 4),         /* segdir.root */
+          sqlite3_column_bytes(pStmt, 4),        /* segdir.root */
+          &pCsr->apSegment[i]
+      );
+      pCsr->nSegment++;
+    }
+    rc2 = sqlite3_reset(pStmt);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+
+  return rc;
+}
+
+typedef struct IncrmergeWriter IncrmergeWriter;
+typedef struct NodeWriter NodeWriter;
+typedef struct Blob Blob;
+typedef struct NodeReader NodeReader;
+
+/*
+** An instance of the following structure is used as a dynamic buffer
+** to build up nodes or other blobs of data in.
+**
+** The function blobGrowBuffer() is used to extend the allocation.
+*/
+struct Blob {
+  char *a;                        /* Pointer to allocation */
+  int n;                          /* Number of valid bytes of data in a[] */
+  int nAlloc;                     /* Allocated size of a[] (nAlloc>=n) */
+};
+
+/*
+** This structure is used to build up buffers containing segment b-tree 
+** nodes (blocks).
+*/
+struct NodeWriter {
+  sqlite3_int64 iBlock;           /* Current block id */
+  Blob key;                       /* Last key written to the current block */
+  Blob block;                     /* Current block image */
+};
+
+/*
+** An object of this type contains the state required to create or append
+** to an appendable b-tree segment.
+*/
+struct IncrmergeWriter {
+  int nLeafEst;                   /* Space allocated for leaf blocks */
+  int nWork;                      /* Number of leaf pages flushed */
+  sqlite3_int64 iAbsLevel;        /* Absolute level of input segments */
+  int iIdx;                       /* Index of *output* segment in iAbsLevel+1 */
+  sqlite3_int64 iStart;           /* Block number of first allocated block */
+  sqlite3_int64 iEnd;             /* Block number of last allocated block */
+  sqlite3_int64 nLeafData;        /* Bytes of leaf page data so far */
+  u8 bNoLeafData;                 /* If true, store 0 for segment size */
+  NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];
+};
+
+/*
+** An object of the following type is used to read data from a single
+** FTS segment node. See the following functions:
+**
+**     nodeReaderInit()
+**     nodeReaderNext()
+**     nodeReaderRelease()
+*/
+struct NodeReader {
+  const char *aNode;
+  int nNode;
+  int iOff;                       /* Current offset within aNode[] */
+
+  /* Output variables. Containing the current node entry. */
+  sqlite3_int64 iChild;           /* Pointer to child node */
+  Blob term;                      /* Current term */
+  const char *aDoclist;           /* Pointer to doclist */
+  int nDoclist;                   /* Size of doclist in bytes */
+};
+
+/*
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, if the allocation at pBlob->a is not already at least nMin
+** bytes in size, extend (realloc) it to be so.
+**
+** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a
+** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc
+** to reflect the new size of the pBlob->a[] buffer.
+*/
+static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){
+  if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){
+    int nAlloc = nMin;
+    char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc);
+    if( a ){
+      pBlob->nAlloc = nAlloc;
+      pBlob->a = a;
+    }else{
+      *pRc = SQLITE_NOMEM;
+    }
+  }
+}
+
+/*
+** Attempt to advance the node-reader object passed as the first argument to
+** the next entry on the node. 
+**
+** Return an error code if an error occurs (SQLITE_NOMEM is possible). 
+** Otherwise return SQLITE_OK. If there is no next entry on the node
+** (e.g. because the current entry is the last) set NodeReader->aNode to
+** NULL to indicate EOF. Otherwise, populate the NodeReader structure output 
+** variables for the new entry.
+*/
+static int nodeReaderNext(NodeReader *p){
+  int bFirst = (p->term.n==0);    /* True for first term on the node */
+  int nPrefix = 0;                /* Bytes to copy from previous term */
+  int nSuffix = 0;                /* Bytes to append to the prefix */
+  int rc = SQLITE_OK;             /* Return code */
+
+  assert( p->aNode );
+  if( p->iChild && bFirst==0 ) p->iChild++;
+  if( p->iOff>=p->nNode ){
+    /* EOF */
+    p->aNode = 0;
+  }else{
+    if( bFirst==0 ){
+      p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix);
+    }
+    p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix);
+
+    blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc);
+    if( rc==SQLITE_OK ){
+      memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix);
+      p->term.n = nPrefix+nSuffix;
+      p->iOff += nSuffix;
+      if( p->iChild==0 ){
+        p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist);
+        p->aDoclist = &p->aNode[p->iOff];
+        p->iOff += p->nDoclist;
+      }
+    }
+  }
+
+  assert( p->iOff<=p->nNode );
+
+  return rc;
+}
+
+/*
+** Release all dynamic resources held by node-reader object *p.
+*/
+static void nodeReaderRelease(NodeReader *p){
+  sqlite3_free(p->term.a);
+}
+
+/*
+** Initialize a node-reader object to read the node in buffer aNode/nNode.
+**
+** If successful, SQLITE_OK is returned and the NodeReader object set to 
+** point to the first entry on the node (if any). Otherwise, an SQLite
+** error code is returned.
+*/
+static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){
+  memset(p, 0, sizeof(NodeReader));
+  p->aNode = aNode;
+  p->nNode = nNode;
+
+  /* Figure out if this is a leaf or an internal node. */
+  if( p->aNode[0] ){
+    /* An internal node. */
+    p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild);
+  }else{
+    p->iOff = 1;
+  }
+
+  return nodeReaderNext(p);
+}
+
+/*
+** This function is called while writing an FTS segment each time a leaf o
+** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed
+** to be greater than the largest key on the node just written, but smaller
+** than or equal to the first key that will be written to the next leaf
+** node.
+**
+** The block id of the leaf node just written to disk may be found in
+** (pWriter->aNodeWriter[0].iBlock) when this function is called.
+*/
+static int fts3IncrmergePush(
+  Fts3Table *p,                   /* Fts3 table handle */
+  IncrmergeWriter *pWriter,       /* Writer object */
+  const char *zTerm,              /* Term to write to internal node */
+  int nTerm                       /* Bytes at zTerm */
+){
+  sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock;
+  int iLayer;
+
+  assert( nTerm>0 );
+  for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){
+    sqlite3_int64 iNextPtr = 0;
+    NodeWriter *pNode = &pWriter->aNodeWriter[iLayer];
+    int rc = SQLITE_OK;
+    int nPrefix;
+    int nSuffix;
+    int nSpace;
+
+    /* Figure out how much space the key will consume if it is written to
+    ** the current node of layer iLayer. Due to the prefix compression, 
+    ** the space required changes depending on which node the key is to
+    ** be added to.  */
+    nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm);
+    nSuffix = nTerm - nPrefix;
+    nSpace  = sqlite3Fts3VarintLen(nPrefix);
+    nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
+
+    if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ 
+      /* If the current node of layer iLayer contains zero keys, or if adding
+      ** the key to it will not cause it to grow to larger than nNodeSize 
+      ** bytes in size, write the key here.  */
+
+      Blob *pBlk = &pNode->block;
+      if( pBlk->n==0 ){
+        blobGrowBuffer(pBlk, p->nNodeSize, &rc);
+        if( rc==SQLITE_OK ){
+          pBlk->a[0] = (char)iLayer;
+          pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr);
+        }
+      }
+      blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc);
+      blobGrowBuffer(&pNode->key, nTerm, &rc);
+
+      if( rc==SQLITE_OK ){
+        if( pNode->key.n ){
+          pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix);
+        }
+        pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix);
+        memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix);
+        pBlk->n += nSuffix;
+
+        memcpy(pNode->key.a, zTerm, nTerm);
+        pNode->key.n = nTerm;
+      }
+    }else{
+      /* Otherwise, flush the current node of layer iLayer to disk.
+      ** Then allocate a new, empty sibling node. The key will be written
+      ** into the parent of this node. */
+      rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
+
+      assert( pNode->block.nAlloc>=p->nNodeSize );
+      pNode->block.a[0] = (char)iLayer;
+      pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1);
+
+      iNextPtr = pNode->iBlock;
+      pNode->iBlock++;
+      pNode->key.n = 0;
+    }
+
+    if( rc!=SQLITE_OK || iNextPtr==0 ) return rc;
+    iPtr = iNextPtr;
+  }
+
+  assert( 0 );
+  return 0;
+}
+
+/*
+** Append a term and (optionally) doclist to the FTS segment node currently
+** stored in blob *pNode. The node need not contain any terms, but the
+** header must be written before this function is called.
+**
+** A node header is a single 0x00 byte for a leaf node, or a height varint
+** followed by the left-hand-child varint for an internal node.
+**
+** The term to be appended is passed via arguments zTerm/nTerm. For a 
+** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal
+** node, both aDoclist and nDoclist must be passed 0.
+**
+** If the size of the value in blob pPrev is zero, then this is the first
+** term written to the node. Otherwise, pPrev contains a copy of the 
+** previous term. Before this function returns, it is updated to contain a
+** copy of zTerm/nTerm.
+**
+** It is assumed that the buffer associated with pNode is already large
+** enough to accommodate the new entry. The buffer associated with pPrev
+** is extended by this function if requrired.
+**
+** If an error (i.e. OOM condition) occurs, an SQLite error code is
+** returned. Otherwise, SQLITE_OK.
+*/
+static int fts3AppendToNode(
+  Blob *pNode,                    /* Current node image to append to */
+  Blob *pPrev,                    /* Buffer containing previous term written */
+  const char *zTerm,              /* New term to write */
+  int nTerm,                      /* Size of zTerm in bytes */
+  const char *aDoclist,           /* Doclist (or NULL) to write */
+  int nDoclist                    /* Size of aDoclist in bytes */ 
+){
+  int rc = SQLITE_OK;             /* Return code */
+  int bFirst = (pPrev->n==0);     /* True if this is the first term written */
+  int nPrefix;                    /* Size of term prefix in bytes */
+  int nSuffix;                    /* Size of term suffix in bytes */
+
+  /* Node must have already been started. There must be a doclist for a
+  ** leaf node, and there must not be a doclist for an internal node.  */
+  assert( pNode->n>0 );
+  assert( (pNode->a[0]=='\0')==(aDoclist!=0) );
+
+  blobGrowBuffer(pPrev, nTerm, &rc);
+  if( rc!=SQLITE_OK ) return rc;
+
+  nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm);
+  nSuffix = nTerm - nPrefix;
+  memcpy(pPrev->a, zTerm, nTerm);
+  pPrev->n = nTerm;
+
+  if( bFirst==0 ){
+    pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix);
+  }
+  pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix);
+  memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix);
+  pNode->n += nSuffix;
+
+  if( aDoclist ){
+    pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist);
+    memcpy(&pNode->a[pNode->n], aDoclist, nDoclist);
+    pNode->n += nDoclist;
+  }
+
+  assert( pNode->n<=pNode->nAlloc );
+
+  return SQLITE_OK;
+}
+
+/*
+** Append the current term and doclist pointed to by cursor pCsr to the
+** appendable b-tree segment opened for writing by pWriter.
+**
+** Return SQLITE_OK if successful, or an SQLite error code otherwise.
+*/
+static int fts3IncrmergeAppend(
+  Fts3Table *p,                   /* Fts3 table handle */
+  IncrmergeWriter *pWriter,       /* Writer object */
+  Fts3MultiSegReader *pCsr        /* Cursor containing term and doclist */
+){
+  const char *zTerm = pCsr->zTerm;
+  int nTerm = pCsr->nTerm;
+  const char *aDoclist = pCsr->aDoclist;
+  int nDoclist = pCsr->nDoclist;
+  int rc = SQLITE_OK;           /* Return code */
+  int nSpace;                   /* Total space in bytes required on leaf */
+  int nPrefix;                  /* Size of prefix shared with previous term */
+  int nSuffix;                  /* Size of suffix (nTerm - nPrefix) */
+  NodeWriter *pLeaf;            /* Object used to write leaf nodes */
+
+  pLeaf = &pWriter->aNodeWriter[0];
+  nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm);
+  nSuffix = nTerm - nPrefix;
+
+  nSpace  = sqlite3Fts3VarintLen(nPrefix);
+  nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
+  nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
+
+  /* If the current block is not empty, and if adding this term/doclist
+  ** to the current block would make it larger than Fts3Table.nNodeSize
+  ** bytes, write this block out to the database. */
+  if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){
+    rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n);
+    pWriter->nWork++;
+
+    /* Add the current term to the parent node. The term added to the 
+    ** parent must:
+    **
+    **   a) be greater than the largest term on the leaf node just written
+    **      to the database (still available in pLeaf->key), and
+    **
+    **   b) be less than or equal to the term about to be added to the new
+    **      leaf node (zTerm/nTerm).
+    **
+    ** In other words, it must be the prefix of zTerm 1 byte longer than
+    ** the common prefix (if any) of zTerm and pWriter->zTerm.
+    */
+    if( rc==SQLITE_OK ){
+      rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1);
+    }
+
+    /* Advance to the next output block */
+    pLeaf->iBlock++;
+    pLeaf->key.n = 0;
+    pLeaf->block.n = 0;
+
+    nSuffix = nTerm;
+    nSpace  = 1;
+    nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
+    nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
+  }
+
+  pWriter->nLeafData += nSpace;
+  blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc);
+  if( rc==SQLITE_OK ){
+    if( pLeaf->block.n==0 ){
+      pLeaf->block.n = 1;
+      pLeaf->block.a[0] = '\0';
+    }
+    rc = fts3AppendToNode(
+        &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist
+    );
+  }
+
+  return rc;
+}
+
+/*
+** This function is called to release all dynamic resources held by the
+** merge-writer object pWriter, and if no error has occurred, to flush
+** all outstanding node buffers held by pWriter to disk.
+**
+** If *pRc is not SQLITE_OK when this function is called, then no attempt
+** is made to write any data to disk. Instead, this function serves only
+** to release outstanding resources.
+**
+** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while
+** flushing buffers to disk, *pRc is set to an SQLite error code before
+** returning.
+*/
+static void fts3IncrmergeRelease(
+  Fts3Table *p,                   /* FTS3 table handle */
+  IncrmergeWriter *pWriter,       /* Merge-writer object */
+  int *pRc                        /* IN/OUT: Error code */
+){
+  int i;                          /* Used to iterate through non-root layers */
+  int iRoot;                      /* Index of root in pWriter->aNodeWriter */
+  NodeWriter *pRoot;              /* NodeWriter for root node */
+  int rc = *pRc;                  /* Error code */
+
+  /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment 
+  ** root node. If the segment fits entirely on a single leaf node, iRoot
+  ** will be set to 0. If the root node is the parent of the leaves, iRoot
+  ** will be 1. And so on.  */
+  for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){
+    NodeWriter *pNode = &pWriter->aNodeWriter[iRoot];
+    if( pNode->block.n>0 ) break;
+    assert( *pRc || pNode->block.nAlloc==0 );
+    assert( *pRc || pNode->key.nAlloc==0 );
+    sqlite3_free(pNode->block.a);
+    sqlite3_free(pNode->key.a);
+  }
+
+  /* Empty output segment. This is a no-op. */
+  if( iRoot<0 ) return;
+
+  /* The entire output segment fits on a single node. Normally, this means
+  ** the node would be stored as a blob in the "root" column of the %_segdir
+  ** table. However, this is not permitted in this case. The problem is that 
+  ** space has already been reserved in the %_segments table, and so the 
+  ** start_block and end_block fields of the %_segdir table must be populated. 
+  ** And, by design or by accident, released versions of FTS cannot handle 
+  ** segments that fit entirely on the root node with start_block!=0.
+  **
+  ** Instead, create a synthetic root node that contains nothing but a 
+  ** pointer to the single content node. So that the segment consists of a
+  ** single leaf and a single interior (root) node.
+  **
+  ** Todo: Better might be to defer allocating space in the %_segments 
+  ** table until we are sure it is needed.
+  */
+  if( iRoot==0 ){
+    Blob *pBlock = &pWriter->aNodeWriter[1].block;
+    blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc);
+    if( rc==SQLITE_OK ){
+      pBlock->a[0] = 0x01;
+      pBlock->n = 1 + sqlite3Fts3PutVarint(
+          &pBlock->a[1], pWriter->aNodeWriter[0].iBlock
+      );
+    }
+    iRoot = 1;
+  }
+  pRoot = &pWriter->aNodeWriter[iRoot];
+
+  /* Flush all currently outstanding nodes to disk. */
+  for(i=0; i<iRoot; i++){
+    NodeWriter *pNode = &pWriter->aNodeWriter[i];
+    if( pNode->block.n>0 && rc==SQLITE_OK ){
+      rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
+    }
+    sqlite3_free(pNode->block.a);
+    sqlite3_free(pNode->key.a);
+  }
+
+  /* Write the %_segdir record. */
+  if( rc==SQLITE_OK ){
+    rc = fts3WriteSegdir(p, 
+        pWriter->iAbsLevel+1,               /* level */
+        pWriter->iIdx,                      /* idx */
+        pWriter->iStart,                    /* start_block */
+        pWriter->aNodeWriter[0].iBlock,     /* leaves_end_block */
+        pWriter->iEnd,                      /* end_block */
+        (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0),   /* end_block */
+        pRoot->block.a, pRoot->block.n      /* root */
+    );
+  }
+  sqlite3_free(pRoot->block.a);
+  sqlite3_free(pRoot->key.a);
+
+  *pRc = rc;
+}
+
+/*
+** Compare the term in buffer zLhs (size in bytes nLhs) with that in
+** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of
+** the other, it is considered to be smaller than the other.
+**
+** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve
+** if it is greater.
+*/
+static int fts3TermCmp(
+  const char *zLhs, int nLhs,     /* LHS of comparison */
+  const char *zRhs, int nRhs      /* RHS of comparison */
+){
+  int nCmp = MIN(nLhs, nRhs);
+  int res;
+
+  res = memcmp(zLhs, zRhs, nCmp);
+  if( res==0 ) res = nLhs - nRhs;
+
+  return res;
+}
+
+
+/*
+** Query to see if the entry in the %_segments table with blockid iEnd is 
+** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before
+** returning. Otherwise, set *pbRes to 0. 
+**
+** Or, if an error occurs while querying the database, return an SQLite 
+** error code. The final value of *pbRes is undefined in this case.
+**
+** This is used to test if a segment is an "appendable" segment. If it
+** is, then a NULL entry has been inserted into the %_segments table
+** with blockid %_segdir.end_block.
+*/
+static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){
+  int bRes = 0;                   /* Result to set *pbRes to */
+  sqlite3_stmt *pCheck = 0;       /* Statement to query database with */
+  int rc;                         /* Return code */
+
+  rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pCheck, 1, iEnd);
+    if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1;
+    rc = sqlite3_reset(pCheck);
+  }
+  
+  *pbRes = bRes;
+  return rc;
+}
+
+/*
+** This function is called when initializing an incremental-merge operation.
+** It checks if the existing segment with index value iIdx at absolute level 
+** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the
+** merge-writer object *pWriter is initialized to write to it.
+**
+** An existing segment can be appended to by an incremental merge if:
+**
+**   * It was initially created as an appendable segment (with all required
+**     space pre-allocated), and
+**
+**   * The first key read from the input (arguments zKey and nKey) is 
+**     greater than the largest key currently stored in the potential
+**     output segment.
+*/
+static int fts3IncrmergeLoad(
+  Fts3Table *p,                   /* Fts3 table handle */
+  sqlite3_int64 iAbsLevel,        /* Absolute level of input segments */
+  int iIdx,                       /* Index of candidate output segment */
+  const char *zKey,               /* First key to write */
+  int nKey,                       /* Number of bytes in nKey */
+  IncrmergeWriter *pWriter        /* Populate this object */
+){
+  int rc;                         /* Return code */
+  sqlite3_stmt *pSelect = 0;      /* SELECT to read %_segdir entry */
+
+  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_int64 iStart = 0;     /* Value of %_segdir.start_block */
+    sqlite3_int64 iLeafEnd = 0;   /* Value of %_segdir.leaves_end_block */
+    sqlite3_int64 iEnd = 0;       /* Value of %_segdir.end_block */
+    const char *aRoot = 0;        /* Pointer to %_segdir.root buffer */
+    int nRoot = 0;                /* Size of aRoot[] in bytes */
+    int rc2;                      /* Return code from sqlite3_reset() */
+    int bAppendable = 0;          /* Set to true if segment is appendable */
+
+    /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */
+    sqlite3_bind_int64(pSelect, 1, iAbsLevel+1);
+    sqlite3_bind_int(pSelect, 2, iIdx);
+    if( sqlite3_step(pSelect)==SQLITE_ROW ){
+      iStart = sqlite3_column_int64(pSelect, 1);
+      iLeafEnd = sqlite3_column_int64(pSelect, 2);
+      fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData);
+      if( pWriter->nLeafData<0 ){
+        pWriter->nLeafData = pWriter->nLeafData * -1;
+      }
+      pWriter->bNoLeafData = (pWriter->nLeafData==0);
+      nRoot = sqlite3_column_bytes(pSelect, 4);
+      aRoot = sqlite3_column_blob(pSelect, 4);
+    }else{
+      return sqlite3_reset(pSelect);
+    }
+
+    /* Check for the zero-length marker in the %_segments table */
+    rc = fts3IsAppendable(p, iEnd, &bAppendable);
+
+    /* Check that zKey/nKey is larger than the largest key the candidate */
+    if( rc==SQLITE_OK && bAppendable ){
+      char *aLeaf = 0;
+      int nLeaf = 0;
+
+      rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0);
+      if( rc==SQLITE_OK ){
+        NodeReader reader;
+        for(rc = nodeReaderInit(&reader, aLeaf, nLeaf);
+            rc==SQLITE_OK && reader.aNode;
+            rc = nodeReaderNext(&reader)
+        ){
+          assert( reader.aNode );
+        }
+        if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){
+          bAppendable = 0;
+        }
+        nodeReaderRelease(&reader);
+      }
+      sqlite3_free(aLeaf);
+    }
+
+    if( rc==SQLITE_OK && bAppendable ){
+      /* It is possible to append to this segment. Set up the IncrmergeWriter
+      ** object to do so.  */
+      int i;
+      int nHeight = (int)aRoot[0];
+      NodeWriter *pNode;
+
+      pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT;
+      pWriter->iStart = iStart;
+      pWriter->iEnd = iEnd;
+      pWriter->iAbsLevel = iAbsLevel;
+      pWriter->iIdx = iIdx;
+
+      for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
+        pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
+      }
+
+      pNode = &pWriter->aNodeWriter[nHeight];
+      pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight;
+      blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc);
+      if( rc==SQLITE_OK ){
+        memcpy(pNode->block.a, aRoot, nRoot);
+        pNode->block.n = nRoot;
+      }
+
+      for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){
+        NodeReader reader;
+        pNode = &pWriter->aNodeWriter[i];
+
+        rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n);
+        while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader);
+        blobGrowBuffer(&pNode->key, reader.term.n, &rc);
+        if( rc==SQLITE_OK ){
+          memcpy(pNode->key.a, reader.term.a, reader.term.n);
+          pNode->key.n = reader.term.n;
+          if( i>0 ){
+            char *aBlock = 0;
+            int nBlock = 0;
+            pNode = &pWriter->aNodeWriter[i-1];
+            pNode->iBlock = reader.iChild;
+            rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0);
+            blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc);
+            if( rc==SQLITE_OK ){
+              memcpy(pNode->block.a, aBlock, nBlock);
+              pNode->block.n = nBlock;
+            }
+            sqlite3_free(aBlock);
+          }
+        }
+        nodeReaderRelease(&reader);
+      }
+    }
+
+    rc2 = sqlite3_reset(pSelect);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+
+  return rc;
+}
+
+/*
+** Determine the largest segment index value that exists within absolute
+** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus
+** one before returning SQLITE_OK. Or, if there are no segments at all 
+** within level iAbsLevel, set *piIdx to zero.
+**
+** If an error occurs, return an SQLite error code. The final value of
+** *piIdx is undefined in this case.
+*/
+static int fts3IncrmergeOutputIdx( 
+  Fts3Table *p,                   /* FTS Table handle */
+  sqlite3_int64 iAbsLevel,        /* Absolute index of input segments */
+  int *piIdx                      /* OUT: Next free index at iAbsLevel+1 */
+){
+  int rc;
+  sqlite3_stmt *pOutputIdx = 0;   /* SQL used to find output index */
+
+  rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1);
+    sqlite3_step(pOutputIdx);
+    *piIdx = sqlite3_column_int(pOutputIdx, 0);
+    rc = sqlite3_reset(pOutputIdx);
+  }
+
+  return rc;
+}
+
+/* 
+** Allocate an appendable output segment on absolute level iAbsLevel+1
+** with idx value iIdx.
+**
+** In the %_segdir table, a segment is defined by the values in three
+** columns:
+**
+**     start_block
+**     leaves_end_block
+**     end_block
+**
+** When an appendable segment is allocated, it is estimated that the
+** maximum number of leaf blocks that may be required is the sum of the
+** number of leaf blocks consumed by the input segments, plus the number
+** of input segments, multiplied by two. This value is stored in stack 
+** variable nLeafEst.
+**
+** A total of 16*nLeafEst blocks are allocated when an appendable segment
+** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous
+** array of leaf nodes starts at the first block allocated. The array
+** of interior nodes that are parents of the leaf nodes start at block
+** (start_block + (1 + end_block - start_block) / 16). And so on.
+**
+** In the actual code below, the value "16" is replaced with the 
+** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT.
+*/
+static int fts3IncrmergeWriter( 
+  Fts3Table *p,                   /* Fts3 table handle */
+  sqlite3_int64 iAbsLevel,        /* Absolute level of input segments */
+  int iIdx,                       /* Index of new output segment */
+  Fts3MultiSegReader *pCsr,       /* Cursor that data will be read from */
+  IncrmergeWriter *pWriter        /* Populate this object */
+){
+  int rc;                         /* Return Code */
+  int i;                          /* Iterator variable */
+  int nLeafEst = 0;               /* Blocks allocated for leaf nodes */
+  sqlite3_stmt *pLeafEst = 0;     /* SQL used to determine nLeafEst */
+  sqlite3_stmt *pFirstBlock = 0;  /* SQL used to determine first block */
+
+  /* Calculate nLeafEst. */
+  rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pLeafEst, 1, iAbsLevel);
+    sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment);
+    if( SQLITE_ROW==sqlite3_step(pLeafEst) ){
+      nLeafEst = sqlite3_column_int(pLeafEst, 0);
+    }
+    rc = sqlite3_reset(pLeafEst);
+  }
+  if( rc!=SQLITE_OK ) return rc;
+
+  /* Calculate the first block to use in the output segment */
+  rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0);
+  if( rc==SQLITE_OK ){
+    if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){
+      pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0);
+      pWriter->iEnd = pWriter->iStart - 1;
+      pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT;
+    }
+    rc = sqlite3_reset(pFirstBlock);
+  }
+  if( rc!=SQLITE_OK ) return rc;
+
+  /* Insert the marker in the %_segments table to make sure nobody tries
+  ** to steal the space just allocated. This is also used to identify 
+  ** appendable segments.  */
+  rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0);
+  if( rc!=SQLITE_OK ) return rc;
+
+  pWriter->iAbsLevel = iAbsLevel;
+  pWriter->nLeafEst = nLeafEst;
+  pWriter->iIdx = iIdx;
+
+  /* Set up the array of NodeWriter objects */
+  for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
+    pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Remove an entry from the %_segdir table. This involves running the 
+** following two statements:
+**
+**   DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx
+**   UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx
+**
+** The DELETE statement removes the specific %_segdir level. The UPDATE 
+** statement ensures that the remaining segments have contiguously allocated
+** idx values.
+*/
+static int fts3RemoveSegdirEntry(
+  Fts3Table *p,                   /* FTS3 table handle */
+  sqlite3_int64 iAbsLevel,        /* Absolute level to delete from */
+  int iIdx                        /* Index of %_segdir entry to delete */
+){
+  int rc;                         /* Return code */
+  sqlite3_stmt *pDelete = 0;      /* DELETE statement */
+
+  rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pDelete, 1, iAbsLevel);
+    sqlite3_bind_int(pDelete, 2, iIdx);
+    sqlite3_step(pDelete);
+    rc = sqlite3_reset(pDelete);
+  }
+
+  return rc;
+}
+
+/*
+** One or more segments have just been removed from absolute level iAbsLevel.
+** Update the 'idx' values of the remaining segments in the level so that
+** the idx values are a contiguous sequence starting from 0.
+*/
+static int fts3RepackSegdirLevel(
+  Fts3Table *p,                   /* FTS3 table handle */
+  sqlite3_int64 iAbsLevel         /* Absolute level to repack */
+){
+  int rc;                         /* Return code */
+  int *aIdx = 0;                  /* Array of remaining idx values */
+  int nIdx = 0;                   /* Valid entries in aIdx[] */
+  int nAlloc = 0;                 /* Allocated size of aIdx[] */
+  int i;                          /* Iterator variable */
+  sqlite3_stmt *pSelect = 0;      /* Select statement to read idx values */
+  sqlite3_stmt *pUpdate = 0;      /* Update statement to modify idx values */
+
+  rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0);
+  if( rc==SQLITE_OK ){
+    int rc2;
+    sqlite3_bind_int64(pSelect, 1, iAbsLevel);
+    while( SQLITE_ROW==sqlite3_step(pSelect) ){
+      if( nIdx>=nAlloc ){
+        int *aNew;
+        nAlloc += 16;
+        aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int));
+        if( !aNew ){
+          rc = SQLITE_NOMEM;
+          break;
+        }
+        aIdx = aNew;
+      }
+      aIdx[nIdx++] = sqlite3_column_int(pSelect, 0);
+    }
+    rc2 = sqlite3_reset(pSelect);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+
+  if( rc==SQLITE_OK ){
+    rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0);
+  }
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pUpdate, 2, iAbsLevel);
+  }
+
+  assert( p->bIgnoreSavepoint==0 );
+  p->bIgnoreSavepoint = 1;
+  for(i=0; rc==SQLITE_OK && i<nIdx; i++){
+    if( aIdx[i]!=i ){
+      sqlite3_bind_int(pUpdate, 3, aIdx[i]);
+      sqlite3_bind_int(pUpdate, 1, i);
+      sqlite3_step(pUpdate);
+      rc = sqlite3_reset(pUpdate);
+    }
+  }
+  p->bIgnoreSavepoint = 0;
+
+  sqlite3_free(aIdx);
+  return rc;
+}
+
+static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){
+  pNode->a[0] = (char)iHeight;
+  if( iChild ){
+    assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) );
+    pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild);
+  }else{
+    assert( pNode->nAlloc>=1 );
+    pNode->n = 1;
+  }
+}
+
+/*
+** The first two arguments are a pointer to and the size of a segment b-tree
+** node. The node may be a leaf or an internal node.
+**
+** This function creates a new node image in blob object *pNew by copying
+** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes)
+** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode.
+*/
+static int fts3TruncateNode(
+  const char *aNode,              /* Current node image */
+  int nNode,                      /* Size of aNode in bytes */
+  Blob *pNew,                     /* OUT: Write new node image here */
+  const char *zTerm,              /* Omit all terms smaller than this */
+  int nTerm,                      /* Size of zTerm in bytes */
+  sqlite3_int64 *piBlock          /* OUT: Block number in next layer down */
+){
+  NodeReader reader;              /* Reader object */
+  Blob prev = {0, 0, 0};          /* Previous term written to new node */
+  int rc = SQLITE_OK;             /* Return code */
+  int bLeaf = aNode[0]=='\0';     /* True for a leaf node */
+
+  /* Allocate required output space */
+  blobGrowBuffer(pNew, nNode, &rc);
+  if( rc!=SQLITE_OK ) return rc;
+  pNew->n = 0;
+
+  /* Populate new node buffer */
+  for(rc = nodeReaderInit(&reader, aNode, nNode); 
+      rc==SQLITE_OK && reader.aNode; 
+      rc = nodeReaderNext(&reader)
+  ){
+    if( pNew->n==0 ){
+      int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm);
+      if( res<0 || (bLeaf==0 && res==0) ) continue;
+      fts3StartNode(pNew, (int)aNode[0], reader.iChild);
+      *piBlock = reader.iChild;
+    }
+    rc = fts3AppendToNode(
+        pNew, &prev, reader.term.a, reader.term.n,
+        reader.aDoclist, reader.nDoclist
+    );
+    if( rc!=SQLITE_OK ) break;
+  }
+  if( pNew->n==0 ){
+    fts3StartNode(pNew, (int)aNode[0], reader.iChild);
+    *piBlock = reader.iChild;
+  }
+  assert( pNew->n<=pNew->nAlloc );
+
+  nodeReaderRelease(&reader);
+  sqlite3_free(prev.a);
+  return rc;
+}
+
+/*
+** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute 
+** level iAbsLevel. This may involve deleting entries from the %_segments
+** table, and modifying existing entries in both the %_segments and %_segdir
+** tables.
+**
+** SQLITE_OK is returned if the segment is updated successfully. Or an
+** SQLite error code otherwise.
+*/
+static int fts3TruncateSegment(
+  Fts3Table *p,                   /* FTS3 table handle */
+  sqlite3_int64 iAbsLevel,        /* Absolute level of segment to modify */
+  int iIdx,                       /* Index within level of segment to modify */
+  const char *zTerm,              /* Remove terms smaller than this */
+  int nTerm                      /* Number of bytes in buffer zTerm */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  Blob root = {0,0,0};            /* New root page image */
+  Blob block = {0,0,0};           /* Buffer used for any other block */
+  sqlite3_int64 iBlock = 0;       /* Block id */
+  sqlite3_int64 iNewStart = 0;    /* New value for iStartBlock */
+  sqlite3_int64 iOldStart = 0;    /* Old value for iStartBlock */
+  sqlite3_stmt *pFetch = 0;       /* Statement used to fetch segdir */
+
+  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0);
+  if( rc==SQLITE_OK ){
+    int rc2;                      /* sqlite3_reset() return code */
+    sqlite3_bind_int64(pFetch, 1, iAbsLevel);
+    sqlite3_bind_int(pFetch, 2, iIdx);
+    if( SQLITE_ROW==sqlite3_step(pFetch) ){
+      const char *aRoot = sqlite3_column_blob(pFetch, 4);
+      int nRoot = sqlite3_column_bytes(pFetch, 4);
+      iOldStart = sqlite3_column_int64(pFetch, 1);
+      rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock);
+    }
+    rc2 = sqlite3_reset(pFetch);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+
+  while( rc==SQLITE_OK && iBlock ){
+    char *aBlock = 0;
+    int nBlock = 0;
+    iNewStart = iBlock;
+
+    rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0);
+    if( rc==SQLITE_OK ){
+      rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock);
+    }
+    if( rc==SQLITE_OK ){
+      rc = fts3WriteSegment(p, iNewStart, block.a, block.n);
+    }
+    sqlite3_free(aBlock);
+  }
+
+  /* Variable iNewStart now contains the first valid leaf node. */
+  if( rc==SQLITE_OK && iNewStart ){
+    sqlite3_stmt *pDel = 0;
+    rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_int64(pDel, 1, iOldStart);
+      sqlite3_bind_int64(pDel, 2, iNewStart-1);
+      sqlite3_step(pDel);
+      rc = sqlite3_reset(pDel);
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    sqlite3_stmt *pChomp = 0;
+    rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_int64(pChomp, 1, iNewStart);
+      sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC);
+      sqlite3_bind_int64(pChomp, 3, iAbsLevel);
+      sqlite3_bind_int(pChomp, 4, iIdx);
+      sqlite3_step(pChomp);
+      rc = sqlite3_reset(pChomp);
+    }
+  }
+
+  sqlite3_free(root.a);
+  sqlite3_free(block.a);
+  return rc;
+}
+
+/*
+** This function is called after an incrmental-merge operation has run to
+** merge (or partially merge) two or more segments from absolute level
+** iAbsLevel.
+**
+** Each input segment is either removed from the db completely (if all of
+** its data was copied to the output segment by the incrmerge operation)
+** or modified in place so that it no longer contains those entries that
+** have been duplicated in the output segment.
+*/
+static int fts3IncrmergeChomp(
+  Fts3Table *p,                   /* FTS table handle */
+  sqlite3_int64 iAbsLevel,        /* Absolute level containing segments */
+  Fts3MultiSegReader *pCsr,       /* Chomp all segments opened by this cursor */
+  int *pnRem                      /* Number of segments not deleted */
+){
+  int i;
+  int nRem = 0;
+  int rc = SQLITE_OK;
+
+  for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){
+    Fts3SegReader *pSeg = 0;
+    int j;
+
+    /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding
+    ** somewhere in the pCsr->apSegment[] array.  */
+    for(j=0; ALWAYS(j<pCsr->nSegment); j++){
+      pSeg = pCsr->apSegment[j];
+      if( pSeg->iIdx==i ) break;
+    }
+    assert( j<pCsr->nSegment && pSeg->iIdx==i );
+
+    if( pSeg->aNode==0 ){
+      /* Seg-reader is at EOF. Remove the entire input segment. */
+      rc = fts3DeleteSegment(p, pSeg);
+      if( rc==SQLITE_OK ){
+        rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx);
+      }
+      *pnRem = 0;
+    }else{
+      /* The incremental merge did not copy all the data from this 
+      ** segment to the upper level. The segment is modified in place
+      ** so that it contains no keys smaller than zTerm/nTerm. */ 
+      const char *zTerm = pSeg->zTerm;
+      int nTerm = pSeg->nTerm;
+      rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm);
+      nRem++;
+    }
+  }
+
+  if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){
+    rc = fts3RepackSegdirLevel(p, iAbsLevel);
+  }
+
+  *pnRem = nRem;
+  return rc;
+}
+
+/*
+** Store an incr-merge hint in the database.
+*/
+static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){
+  sqlite3_stmt *pReplace = 0;
+  int rc;                         /* Return code */
+
+  rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT);
+    sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC);
+    sqlite3_step(pReplace);
+    rc = sqlite3_reset(pReplace);
+  }
+
+  return rc;
+}
+
+/*
+** Load an incr-merge hint from the database. The incr-merge hint, if one 
+** exists, is stored in the rowid==1 row of the %_stat table.
+**
+** If successful, populate blob *pHint with the value read from the %_stat
+** table and return SQLITE_OK. Otherwise, if an error occurs, return an
+** SQLite error code.
+*/
+static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){
+  sqlite3_stmt *pSelect = 0;
+  int rc;
+
+  pHint->n = 0;
+  rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0);
+  if( rc==SQLITE_OK ){
+    int rc2;
+    sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT);
+    if( SQLITE_ROW==sqlite3_step(pSelect) ){
+      const char *aHint = sqlite3_column_blob(pSelect, 0);
+      int nHint = sqlite3_column_bytes(pSelect, 0);
+      if( aHint ){
+        blobGrowBuffer(pHint, nHint, &rc);
+        if( rc==SQLITE_OK ){
+          memcpy(pHint->a, aHint, nHint);
+          pHint->n = nHint;
+        }
+      }
+    }
+    rc2 = sqlite3_reset(pSelect);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+
+  return rc;
+}
+
+/*
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, append an entry to the hint stored in blob *pHint. Each entry
+** consists of two varints, the absolute level number of the input segments 
+** and the number of input segments.
+**
+** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs,
+** set *pRc to an SQLite error code before returning.
+*/
+static void fts3IncrmergeHintPush(
+  Blob *pHint,                    /* Hint blob to append to */
+  i64 iAbsLevel,                  /* First varint to store in hint */
+  int nInput,                     /* Second varint to store in hint */
+  int *pRc                        /* IN/OUT: Error code */
+){
+  blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc);
+  if( *pRc==SQLITE_OK ){
+    pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel);
+    pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput);
+  }
+}
+
+/*
+** Read the last entry (most recently pushed) from the hint blob *pHint
+** and then remove the entry. Write the two values read to *piAbsLevel and 
+** *pnInput before returning.
+**
+** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does
+** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB.
+*/
+static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
+  const int nHint = pHint->n;
+  int i;
+
+  i = pHint->n-2;
+  while( i>0 && (pHint->a[i-1] & 0x80) ) i--;
+  while( i>0 && (pHint->a[i-1] & 0x80) ) i--;
+
+  pHint->n = i;
+  i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel);
+  i += fts3GetVarint32(&pHint->a[i], pnInput);
+  if( i!=nHint ) return SQLITE_CORRUPT_VTAB;
+
+  return SQLITE_OK;
+}
+
+
+/*
+** Attempt an incremental merge that writes nMerge leaf blocks.
+**
+** Incremental merges happen nMin segments at a time. The segments 
+** to be merged are the nMin oldest segments (the ones with the smallest 
+** values for the _segdir.idx field) in the highest level that contains 
+** at least nMin segments. Multiple merges might occur in an attempt to 
+** write the quota of nMerge leaf blocks.
+*/
+SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
+  int rc;                         /* Return code */
+  int nRem = nMerge;              /* Number of leaf pages yet to  be written */
+  Fts3MultiSegReader *pCsr;       /* Cursor used to read input data */
+  Fts3SegFilter *pFilter;         /* Filter used with cursor pCsr */
+  IncrmergeWriter *pWriter;       /* Writer object */
+  int nSeg = 0;                   /* Number of input segments */
+  sqlite3_int64 iAbsLevel = 0;    /* Absolute level number to work on */
+  Blob hint = {0, 0, 0};          /* Hint read from %_stat table */
+  int bDirtyHint = 0;             /* True if blob 'hint' has been modified */
+
+  /* Allocate space for the cursor, filter and writer objects */
+  const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter);
+  pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc);
+  if( !pWriter ) return SQLITE_NOMEM;
+  pFilter = (Fts3SegFilter *)&pWriter[1];
+  pCsr = (Fts3MultiSegReader *)&pFilter[1];
+
+  rc = fts3IncrmergeHintLoad(p, &hint);
+  while( rc==SQLITE_OK && nRem>0 ){
+    const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex;
+    sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */
+    int bUseHint = 0;             /* True if attempting to append */
+    int iIdx = 0;                 /* Largest idx in level (iAbsLevel+1) */
+
+    /* Search the %_segdir table for the absolute level with the smallest
+    ** relative level number that contains at least nMin segments, if any.
+    ** If one is found, set iAbsLevel to the absolute level number and
+    ** nSeg to nMin. If no level with at least nMin segments can be found, 
+    ** set nSeg to -1.
+    */
+    rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0);
+    sqlite3_bind_int(pFindLevel, 1, nMin);
+    if( sqlite3_step(pFindLevel)==SQLITE_ROW ){
+      iAbsLevel = sqlite3_column_int64(pFindLevel, 0);
+      nSeg = nMin;
+    }else{
+      nSeg = -1;
+    }
+    rc = sqlite3_reset(pFindLevel);
+
+    /* If the hint read from the %_stat table is not empty, check if the
+    ** last entry in it specifies a relative level smaller than or equal
+    ** to the level identified by the block above (if any). If so, this 
+    ** iteration of the loop will work on merging at the hinted level.
+    */
+    if( rc==SQLITE_OK && hint.n ){
+      int nHint = hint.n;
+      sqlite3_int64 iHintAbsLevel = 0;      /* Hint level */
+      int nHintSeg = 0;                     /* Hint number of segments */
+
+      rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg);
+      if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){
+        iAbsLevel = iHintAbsLevel;
+        nSeg = nHintSeg;
+        bUseHint = 1;
+        bDirtyHint = 1;
+      }else{
+        /* This undoes the effect of the HintPop() above - so that no entry
+        ** is removed from the hint blob.  */
+        hint.n = nHint;
+      }
+    }
+
+    /* If nSeg is less that zero, then there is no level with at least
+    ** nMin segments and no hint in the %_stat table. No work to do.
+    ** Exit early in this case.  */
+    if( nSeg<0 ) break;
+
+    /* Open a cursor to iterate through the contents of the oldest nSeg 
+    ** indexes of absolute level iAbsLevel. If this cursor is opened using 
+    ** the 'hint' parameters, it is possible that there are less than nSeg
+    ** segments available in level iAbsLevel. In this case, no work is
+    ** done on iAbsLevel - fall through to the next iteration of the loop 
+    ** to start work on some other level.  */
+    memset(pWriter, 0, nAlloc);
+    pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
+
+    if( rc==SQLITE_OK ){
+      rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
+      assert( bUseHint==1 || bUseHint==0 );
+      if( iIdx==0 || (bUseHint && iIdx==1) ){
+        int bIgnore = 0;
+        rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore);
+        if( bIgnore ){
+          pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY;
+        }
+      }
+    }
+
+    if( rc==SQLITE_OK ){
+      rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr);
+    }
+    if( SQLITE_OK==rc && pCsr->nSegment==nSeg
+     && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter))
+     && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr))
+    ){
+      if( bUseHint && iIdx>0 ){
+        const char *zKey = pCsr->zTerm;
+        int nKey = pCsr->nTerm;
+        rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
+      }else{
+        rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
+      }
+
+      if( rc==SQLITE_OK && pWriter->nLeafEst ){
+        fts3LogMerge(nSeg, iAbsLevel);
+        do {
+          rc = fts3IncrmergeAppend(p, pWriter, pCsr);
+          if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr);
+          if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK;
+        }while( rc==SQLITE_ROW );
+
+        /* Update or delete the input segments */
+        if( rc==SQLITE_OK ){
+          nRem -= (1 + pWriter->nWork);
+          rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg);
+          if( nSeg!=0 ){
+            bDirtyHint = 1;
+            fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc);
+          }
+        }
+      }
+
+      if( nSeg!=0 ){
+        pWriter->nLeafData = pWriter->nLeafData * -1;
+      }
+      fts3IncrmergeRelease(p, pWriter, &rc);
+      if( nSeg==0 && pWriter->bNoLeafData==0 ){
+        fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData);
+      }
+    }
+
+    sqlite3Fts3SegReaderFinish(pCsr);
+  }
+
+  /* Write the hint values into the %_stat table for the next incr-merger */
+  if( bDirtyHint && rc==SQLITE_OK ){
+    rc = fts3IncrmergeHintStore(p, &hint);
+  }
+
+  sqlite3_free(pWriter);
+  sqlite3_free(hint.a);
+  return rc;
+}
+
+/*
+** Convert the text beginning at *pz into an integer and return
+** its value.  Advance *pz to point to the first character past
+** the integer.
+*/
+static int fts3Getint(const char **pz){
+  const char *z = *pz;
+  int i = 0;
+  while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0';
+  *pz = z;
+  return i;
+}
+
+/*
+** Process statements of the form:
+**
+**    INSERT INTO table(table) VALUES('merge=A,B');
+**
+** A and B are integers that decode to be the number of leaf pages
+** written for the merge, and the minimum number of segments on a level
+** before it will be selected for a merge, respectively.
+*/
+static int fts3DoIncrmerge(
+  Fts3Table *p,                   /* FTS3 table handle */
+  const char *zParam              /* Nul-terminated string containing "A,B" */
+){
+  int rc;
+  int nMin = (FTS3_MERGE_COUNT / 2);
+  int nMerge = 0;
+  const char *z = zParam;
+
+  /* Read the first integer value */
+  nMerge = fts3Getint(&z);
+
+  /* If the first integer value is followed by a ',',  read the second
+  ** integer value. */
+  if( z[0]==',' && z[1]!='\0' ){
+    z++;
+    nMin = fts3Getint(&z);
+  }
+
+  if( z[0]!='\0' || nMin<2 ){
+    rc = SQLITE_ERROR;
+  }else{
+    rc = SQLITE_OK;
+    if( !p->bHasStat ){
+      assert( p->bFts4==0 );
+      sqlite3Fts3CreateStatTable(&rc, p);
+    }
+    if( rc==SQLITE_OK ){
+      rc = sqlite3Fts3Incrmerge(p, nMerge, nMin);
+    }
+    sqlite3Fts3SegmentsClose(p);
+  }
+  return rc;
+}
+
+/*
+** Process statements of the form:
+**
+**    INSERT INTO table(table) VALUES('automerge=X');
+**
+** where X is an integer.  X==0 means to turn automerge off.  X!=0 means
+** turn it on.  The setting is persistent.
+*/
+static int fts3DoAutoincrmerge(
+  Fts3Table *p,                   /* FTS3 table handle */
+  const char *zParam              /* Nul-terminated string containing boolean */
+){
+  int rc = SQLITE_OK;
+  sqlite3_stmt *pStmt = 0;
+  p->nAutoincrmerge = fts3Getint(&zParam);
+  if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){
+    p->nAutoincrmerge = 8;
+  }
+  if( !p->bHasStat ){
+    assert( p->bFts4==0 );
+    sqlite3Fts3CreateStatTable(&rc, p);
+    if( rc ) return rc;
+  }
+  rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
+  if( rc ) return rc;
+  sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
+  sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge);
+  sqlite3_step(pStmt);
+  rc = sqlite3_reset(pStmt);
+  return rc;
+}
+
+/*
+** Return a 64-bit checksum for the FTS index entry specified by the
+** arguments to this function.
+*/
+static u64 fts3ChecksumEntry(
+  const char *zTerm,              /* Pointer to buffer containing term */
+  int nTerm,                      /* Size of zTerm in bytes */
+  int iLangid,                    /* Language id for current row */
+  int iIndex,                     /* Index (0..Fts3Table.nIndex-1) */
+  i64 iDocid,                     /* Docid for current row. */
+  int iCol,                       /* Column number */
+  int iPos                        /* Position */
+){
+  int i;
+  u64 ret = (u64)iDocid;
+
+  ret += (ret<<3) + iLangid;
+  ret += (ret<<3) + iIndex;
+  ret += (ret<<3) + iCol;
+  ret += (ret<<3) + iPos;
+  for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i];
+
+  return ret;
+}
+
+/*
+** Return a checksum of all entries in the FTS index that correspond to
+** language id iLangid. The checksum is calculated by XORing the checksums
+** of each individual entry (see fts3ChecksumEntry()) together.
+**
+** If successful, the checksum value is returned and *pRc set to SQLITE_OK.
+** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The
+** return value is undefined in this case.
+*/
+static u64 fts3ChecksumIndex(
+  Fts3Table *p,                   /* FTS3 table handle */
+  int iLangid,                    /* Language id to return cksum for */
+  int iIndex,                     /* Index to cksum (0..p->nIndex-1) */
+  int *pRc                        /* OUT: Return code */
+){
+  Fts3SegFilter filter;
+  Fts3MultiSegReader csr;
+  int rc;
+  u64 cksum = 0;
+
+  assert( *pRc==SQLITE_OK );
+
+  memset(&filter, 0, sizeof(filter));
+  memset(&csr, 0, sizeof(csr));
+  filter.flags =  FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
+  filter.flags |= FTS3_SEGMENT_SCAN;
+
+  rc = sqlite3Fts3SegReaderCursor(
+      p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr
+  );
+  if( rc==SQLITE_OK ){
+    rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
+  }
+
+  if( rc==SQLITE_OK ){
+    while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){
+      char *pCsr = csr.aDoclist;
+      char *pEnd = &pCsr[csr.nDoclist];
+
+      i64 iDocid = 0;
+      i64 iCol = 0;
+      i64 iPos = 0;
+
+      pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid);
+      while( pCsr<pEnd ){
+        i64 iVal = 0;
+        pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
+        if( pCsr<pEnd ){
+          if( iVal==0 || iVal==1 ){
+            iCol = 0;
+            iPos = 0;
+            if( iVal ){
+              pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
+            }else{
+              pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
+              iDocid += iVal;
+            }
+          }else{
+            iPos += (iVal - 2);
+            cksum = cksum ^ fts3ChecksumEntry(
+                csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid,
+                (int)iCol, (int)iPos
+            );
+          }
+        }
+      }
+    }
+  }
+  sqlite3Fts3SegReaderFinish(&csr);
+
+  *pRc = rc;
+  return cksum;
+}
+
+/*
+** Check if the contents of the FTS index match the current contents of the
+** content table. If no error occurs and the contents do match, set *pbOk
+** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk
+** to false before returning.
+**
+** If an error occurs (e.g. an OOM or IO error), return an SQLite error 
+** code. The final value of *pbOk is undefined in this case.
+*/
+static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
+  int rc = SQLITE_OK;             /* Return code */
+  u64 cksum1 = 0;                 /* Checksum based on FTS index contents */
+  u64 cksum2 = 0;                 /* Checksum based on %_content contents */
+  sqlite3_stmt *pAllLangid = 0;   /* Statement to return all language-ids */
+
+  /* This block calculates the checksum according to the FTS index. */
+  rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
+  if( rc==SQLITE_OK ){
+    int rc2;
+    sqlite3_bind_int(pAllLangid, 1, p->nIndex);
+    while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){
+      int iLangid = sqlite3_column_int(pAllLangid, 0);
+      int i;
+      for(i=0; i<p->nIndex; i++){
+        cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc);
+      }
+    }
+    rc2 = sqlite3_reset(pAllLangid);
+    if( rc==SQLITE_OK ) rc = rc2;
+  }
+
+  /* This block calculates the checksum according to the %_content table */
+  rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
+  if( rc==SQLITE_OK ){
+    sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule;
+    sqlite3_stmt *pStmt = 0;
+    char *zSql;
+   
+    zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
+    if( !zSql ){
+      rc = SQLITE_NOMEM;
+    }else{
+      rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
+      sqlite3_free(zSql);
+    }
+
+    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
+      i64 iDocid = sqlite3_column_int64(pStmt, 0);
+      int iLang = langidFromSelect(p, pStmt);
+      int iCol;
+
+      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
+        if( p->abNotindexed[iCol]==0 ){
+          const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
+          int nText = sqlite3_column_bytes(pStmt, iCol+1);
+          sqlite3_tokenizer_cursor *pT = 0;
+
+          rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT);
+          while( rc==SQLITE_OK ){
+            char const *zToken;       /* Buffer containing token */
+            int nToken = 0;           /* Number of bytes in token */
+            int iDum1 = 0, iDum2 = 0; /* Dummy variables */
+            int iPos = 0;             /* Position of token in zText */
+
+            rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
+            if( rc==SQLITE_OK ){
+              int i;
+              cksum2 = cksum2 ^ fts3ChecksumEntry(
+                  zToken, nToken, iLang, 0, iDocid, iCol, iPos
+              );
+              for(i=1; i<p->nIndex; i++){
+                if( p->aIndex[i].nPrefix<=nToken ){
+                  cksum2 = cksum2 ^ fts3ChecksumEntry(
+                      zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
+                  );
+                }
+              }
+            }
+          }
+          if( pT ) pModule->xClose(pT);
+          if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+        }
+      }
+    }
+
+    sqlite3_finalize(pStmt);
+  }
+
+  *pbOk = (cksum1==cksum2);
+  return rc;
+}
+
+/*
+** Run the integrity-check. If no error occurs and the current contents of
+** the FTS index are correct, return SQLITE_OK. Or, if the contents of the
+** FTS index are incorrect, return SQLITE_CORRUPT_VTAB.
+**
+** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite 
+** error code.
+**
+** The integrity-check works as follows. For each token and indexed token
+** prefix in the document set, a 64-bit checksum is calculated (by code
+** in fts3ChecksumEntry()) based on the following:
+**
+**     + The index number (0 for the main index, 1 for the first prefix
+**       index etc.),
+**     + The token (or token prefix) text itself, 
+**     + The language-id of the row it appears in,
+**     + The docid of the row it appears in,
+**     + The column it appears in, and
+**     + The tokens position within that column.
+**
+** The checksums for all entries in the index are XORed together to create
+** a single checksum for the entire index.
+**
+** The integrity-check code calculates the same checksum in two ways:
+**
+**     1. By scanning the contents of the FTS index, and 
+**     2. By scanning and tokenizing the content table.
+**
+** If the two checksums are identical, the integrity-check is deemed to have
+** passed.
+*/
+static int fts3DoIntegrityCheck(
+  Fts3Table *p                    /* FTS3 table handle */
+){
+  int rc;
+  int bOk = 0;
+  rc = fts3IntegrityCheck(p, &bOk);
+  if( rc==SQLITE_OK && bOk==0 ) rc = SQLITE_CORRUPT_VTAB;
+  return rc;
+}
+
+/*
+** Handle a 'special' INSERT of the form:
+**
+**   "INSERT INTO tbl(tbl) VALUES(<expr>)"
+**
+** Argument pVal contains the result of <expr>. Currently the only 
+** meaningful value to insert is the text 'optimize'.
+*/
+static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
+  int rc;                         /* Return Code */
+  const char *zVal = (const char *)sqlite3_value_text(pVal);
+  int nVal = sqlite3_value_bytes(pVal);
+
+  if( !zVal ){
+    return SQLITE_NOMEM;
+  }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
+    rc = fts3DoOptimize(p, 0);
+  }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){
+    rc = fts3DoRebuild(p);
+  }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){
+    rc = fts3DoIntegrityCheck(p);
+  }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){
+    rc = fts3DoIncrmerge(p, &zVal[6]);
+  }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){
+    rc = fts3DoAutoincrmerge(p, &zVal[10]);
+#ifdef SQLITE_TEST
+  }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
+    p->nNodeSize = atoi(&zVal[9]);
+    rc = SQLITE_OK;
+  }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
+    p->nMaxPendingData = atoi(&zVal[11]);
+    rc = SQLITE_OK;
+  }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){
+    p->bNoIncrDoclist = atoi(&zVal[21]);
+    rc = SQLITE_OK;
+#endif
+  }else{
+    rc = SQLITE_ERROR;
+  }
+
+  return rc;
+}
+
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
+/*
+** Delete all cached deferred doclists. Deferred doclists are cached
+** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
+*/
+SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){
+  Fts3DeferredToken *pDef;
+  for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){
+    fts3PendingListDelete(pDef->pList);
+    pDef->pList = 0;
+  }
+}
+
+/*
+** Free all entries in the pCsr->pDeffered list. Entries are added to 
+** this list using sqlite3Fts3DeferToken().
+*/
+SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){
+  Fts3DeferredToken *pDef;
+  Fts3DeferredToken *pNext;
+  for(pDef=pCsr->pDeferred; pDef; pDef=pNext){
+    pNext = pDef->pNext;
+    fts3PendingListDelete(pDef->pList);
+    sqlite3_free(pDef);
+  }
+  pCsr->pDeferred = 0;
+}
+
+/*
+** Generate deferred-doclists for all tokens in the pCsr->pDeferred list
+** based on the row that pCsr currently points to.
+**
+** A deferred-doclist is like any other doclist with position information
+** included, except that it only contains entries for a single row of the
+** table, not for all rows.
+*/
+SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
+  int rc = SQLITE_OK;             /* Return code */
+  if( pCsr->pDeferred ){
+    int i;                        /* Used to iterate through table columns */
+    sqlite3_int64 iDocid;         /* Docid of the row pCsr points to */
+    Fts3DeferredToken *pDef;      /* Used to iterate through deferred tokens */
+  
+    Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+    sqlite3_tokenizer *pT = p->pTokenizer;
+    sqlite3_tokenizer_module const *pModule = pT->pModule;
+   
+    assert( pCsr->isRequireSeek==0 );
+    iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
+  
+    for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
+      if( p->abNotindexed[i]==0 ){
+        const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
+        sqlite3_tokenizer_cursor *pTC = 0;
+
+        rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
+        while( rc==SQLITE_OK ){
+          char const *zToken;       /* Buffer containing token */
+          int nToken = 0;           /* Number of bytes in token */
+          int iDum1 = 0, iDum2 = 0; /* Dummy variables */
+          int iPos = 0;             /* Position of token in zText */
+
+          rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
+          for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
+            Fts3PhraseToken *pPT = pDef->pToken;
+            if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
+                && (pPT->bFirst==0 || iPos==0)
+                && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
+                && (0==memcmp(zToken, pPT->z, pPT->n))
+              ){
+              fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
+            }
+          }
+        }
+        if( pTC ) pModule->xClose(pTC);
+        if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+      }
+    }
+
+    for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
+      if( pDef->pList ){
+        rc = fts3PendingListAppendVarint(&pDef->pList, 0);
+      }
+    }
+  }
+
+  return rc;
+}
+
+SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(
+  Fts3DeferredToken *p, 
+  char **ppData, 
+  int *pnData
+){
+  char *pRet;
+  int nSkip;
+  sqlite3_int64 dummy;
+
+  *ppData = 0;
+  *pnData = 0;
+
+  if( p->pList==0 ){
+    return SQLITE_OK;
+  }
+
+  pRet = (char *)sqlite3_malloc(p->pList->nData);
+  if( !pRet ) return SQLITE_NOMEM;
+
+  nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy);
+  *pnData = p->pList->nData - nSkip;
+  *ppData = pRet;
+  
+  memcpy(pRet, &p->pList->aData[nSkip], *pnData);
+  return SQLITE_OK;
+}
+
+/*
+** Add an entry for token pToken to the pCsr->pDeferred list.
+*/
+SQLITE_PRIVATE int sqlite3Fts3DeferToken(
+  Fts3Cursor *pCsr,               /* Fts3 table cursor */
+  Fts3PhraseToken *pToken,        /* Token to defer */
+  int iCol                        /* Column that token must appear in (or -1) */
+){
+  Fts3DeferredToken *pDeferred;
+  pDeferred = sqlite3_malloc(sizeof(*pDeferred));
+  if( !pDeferred ){
+    return SQLITE_NOMEM;
+  }
+  memset(pDeferred, 0, sizeof(*pDeferred));
+  pDeferred->pToken = pToken;
+  pDeferred->pNext = pCsr->pDeferred; 
+  pDeferred->iCol = iCol;
+  pCsr->pDeferred = pDeferred;
+
+  assert( pToken->pDeferred==0 );
+  pToken->pDeferred = pDeferred;
+
+  return SQLITE_OK;
+}
+#endif
+
+/*
+** SQLite value pRowid contains the rowid of a row that may or may not be
+** present in the FTS3 table. If it is, delete it and adjust the contents
+** of subsiduary data structures accordingly.
+*/
+static int fts3DeleteByRowid(
+  Fts3Table *p, 
+  sqlite3_value *pRowid, 
+  int *pnChng,                    /* IN/OUT: Decrement if row is deleted */
+  u32 *aSzDel
+){
+  int rc = SQLITE_OK;             /* Return code */
+  int bFound = 0;                 /* True if *pRowid really is in the table */
+
+  fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound);
+  if( bFound && rc==SQLITE_OK ){
+    int isEmpty = 0;              /* Deleting *pRowid leaves the table empty */
+    rc = fts3IsEmpty(p, pRowid, &isEmpty);
+    if( rc==SQLITE_OK ){
+      if( isEmpty ){
+        /* Deleting this row means the whole table is empty. In this case
+        ** delete the contents of all three tables and throw away any
+        ** data in the pendingTerms hash table.  */
+        rc = fts3DeleteAll(p, 1);
+        *pnChng = 0;
+        memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2);
+      }else{
+        *pnChng = *pnChng - 1;
+        if( p->zContentTbl==0 ){
+          fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
+        }
+        if( p->bHasDocsize ){
+          fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
+        }
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
+** This function does the work for the xUpdate method of FTS3 virtual
+** tables. The schema of the virtual table being:
+**
+**     CREATE TABLE <table name>( 
+**       <user columns>,
+**       <table name> HIDDEN, 
+**       docid HIDDEN, 
+**       <langid> HIDDEN
+**     );
+**
+** 
+*/
+SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(
+  sqlite3_vtab *pVtab,            /* FTS3 vtab object */
+  int nArg,                       /* Size of argument array */
+  sqlite3_value **apVal,          /* Array of arguments */
+  sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
+){
+  Fts3Table *p = (Fts3Table *)pVtab;
+  int rc = SQLITE_OK;             /* Return Code */
+  int isRemove = 0;               /* True for an UPDATE or DELETE */
+  u32 *aSzIns = 0;                /* Sizes of inserted documents */
+  u32 *aSzDel = 0;                /* Sizes of deleted documents */
+  int nChng = 0;                  /* Net change in number of documents */
+  int bInsertDone = 0;
+
+  /* At this point it must be known if the %_stat table exists or not.
+  ** So bHasStat may not be 2.  */
+  assert( p->bHasStat==0 || p->bHasStat==1 );
+
+  assert( p->pSegments==0 );
+  assert( 
+      nArg==1                     /* DELETE operations */
+   || nArg==(2 + p->nColumn + 3)  /* INSERT or UPDATE operations */
+  );
+
+  /* Check for a "special" INSERT operation. One of the form:
+  **
+  **   INSERT INTO xyz(xyz) VALUES('command');
+  */
+  if( nArg>1 
+   && sqlite3_value_type(apVal[0])==SQLITE_NULL 
+   && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL 
+  ){
+    rc = fts3SpecialInsert(p, apVal[p->nColumn+2]);
+    goto update_out;
+  }
+
+  if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
+    rc = SQLITE_CONSTRAINT;
+    goto update_out;
+  }
+
+  /* Allocate space to hold the change in document sizes */
+  aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 );
+  if( aSzDel==0 ){
+    rc = SQLITE_NOMEM;
+    goto update_out;
+  }
+  aSzIns = &aSzDel[p->nColumn+1];
+  memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2);
+
+  rc = fts3Writelock(p);
+  if( rc!=SQLITE_OK ) goto update_out;
+
+  /* If this is an INSERT operation, or an UPDATE that modifies the rowid
+  ** value, then this operation requires constraint handling.
+  **
+  ** If the on-conflict mode is REPLACE, this means that the existing row
+  ** should be deleted from the database before inserting the new row. Or,
+  ** if the on-conflict mode is other than REPLACE, then this method must
+  ** detect the conflict and return SQLITE_CONSTRAINT before beginning to
+  ** modify the database file.
+  */
+  if( nArg>1 && p->zContentTbl==0 ){
+    /* Find the value object that holds the new rowid value. */
+    sqlite3_value *pNewRowid = apVal[3+p->nColumn];
+    if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){
+      pNewRowid = apVal[1];
+    }
+
+    if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( 
+        sqlite3_value_type(apVal[0])==SQLITE_NULL
+     || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid)
+    )){
+      /* The new rowid is not NULL (in this case the rowid will be
+      ** automatically assigned and there is no chance of a conflict), and 
+      ** the statement is either an INSERT or an UPDATE that modifies the
+      ** rowid column. So if the conflict mode is REPLACE, then delete any
+      ** existing row with rowid=pNewRowid. 
+      **
+      ** Or, if the conflict mode is not REPLACE, insert the new record into 
+      ** the %_content table. If we hit the duplicate rowid constraint (or any
+      ** other error) while doing so, return immediately.
+      **
+      ** This branch may also run if pNewRowid contains a value that cannot
+      ** be losslessly converted to an integer. In this case, the eventual 
+      ** call to fts3InsertData() (either just below or further on in this
+      ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is 
+      ** invoked, it will delete zero rows (since no row will have
+      ** docid=$pNewRowid if $pNewRowid is not an integer value).
+      */
+      if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){
+        rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel);
+      }else{
+        rc = fts3InsertData(p, apVal, pRowid);
+        bInsertDone = 1;
+      }
+    }
+  }
+  if( rc!=SQLITE_OK ){
+    goto update_out;
+  }
+
+  /* If this is a DELETE or UPDATE operation, remove the old record. */
+  if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
+    assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
+    rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
+    isRemove = 1;
+  }
+  
+  /* If this is an INSERT or UPDATE operation, insert the new record. */
+  if( nArg>1 && rc==SQLITE_OK ){
+    int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
+    if( bInsertDone==0 ){
+      rc = fts3InsertData(p, apVal, pRowid);
+      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
+        rc = FTS_CORRUPT_VTAB;
+      }
+    }
+    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
+      rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
+    }
+    if( rc==SQLITE_OK ){
+      assert( p->iPrevDocid==*pRowid );
+      rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
+    }
+    if( p->bHasDocsize ){
+      fts3InsertDocsize(&rc, p, aSzIns);
+    }
+    nChng++;
+  }
+
+  if( p->bFts4 ){
+    fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
+  }
+
+ update_out:
+  sqlite3_free(aSzDel);
+  sqlite3Fts3SegmentsClose(p);
+  return rc;
+}
+
+/* 
+** Flush any data in the pending-terms hash table to disk. If successful,
+** merge all segments in the database (including the new segment, if 
+** there was any data to flush) into a single segment. 
+*/
+SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){
+  int rc;
+  rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
+  if( rc==SQLITE_OK ){
+    rc = fts3DoOptimize(p, 1);
+    if( rc==SQLITE_OK || rc==SQLITE_DONE ){
+      int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
+      if( rc2!=SQLITE_OK ) rc = rc2;
+    }else{
+      sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
+      sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
+    }
+  }
+  sqlite3Fts3SegmentsClose(p);
+  return rc;
+}
+
+#endif
+
+/************** End of fts3_write.c ******************************************/
+/************** Begin file fts3_snippet.c ************************************/
+/*
+** 2009 Oct 23
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <string.h> */
+/* #include <assert.h> */
+
+/*
+** Characters that may appear in the second argument to matchinfo().
+*/
+#define FTS3_MATCHINFO_NPHRASE   'p'        /* 1 value */
+#define FTS3_MATCHINFO_NCOL      'c'        /* 1 value */
+#define FTS3_MATCHINFO_NDOC      'n'        /* 1 value */
+#define FTS3_MATCHINFO_AVGLENGTH 'a'        /* nCol values */
+#define FTS3_MATCHINFO_LENGTH    'l'        /* nCol values */
+#define FTS3_MATCHINFO_LCS       's'        /* nCol values */
+#define FTS3_MATCHINFO_HITS      'x'        /* 3*nCol*nPhrase values */
+
+/*
+** The default value for the second argument to matchinfo(). 
+*/
+#define FTS3_MATCHINFO_DEFAULT   "pcx"
+
+
+/*
+** Used as an fts3ExprIterate() context when loading phrase doclists to
+** Fts3Expr.aDoclist[]/nDoclist.
+*/
+typedef struct LoadDoclistCtx LoadDoclistCtx;
+struct LoadDoclistCtx {
+  Fts3Cursor *pCsr;               /* FTS3 Cursor */
+  int nPhrase;                    /* Number of phrases seen so far */
+  int nToken;                     /* Number of tokens seen so far */
+};
+
+/*
+** The following types are used as part of the implementation of the 
+** fts3BestSnippet() routine.
+*/
+typedef struct SnippetIter SnippetIter;
+typedef struct SnippetPhrase SnippetPhrase;
+typedef struct SnippetFragment SnippetFragment;
+
+struct SnippetIter {
+  Fts3Cursor *pCsr;               /* Cursor snippet is being generated from */
+  int iCol;                       /* Extract snippet from this column */
+  int nSnippet;                   /* Requested snippet length (in tokens) */
+  int nPhrase;                    /* Number of phrases in query */
+  SnippetPhrase *aPhrase;         /* Array of size nPhrase */
+  int iCurrent;                   /* First token of current snippet */
+};
+
+struct SnippetPhrase {
+  int nToken;                     /* Number of tokens in phrase */
+  char *pList;                    /* Pointer to start of phrase position list */
+  int iHead;                      /* Next value in position list */
+  char *pHead;                    /* Position list data following iHead */
+  int iTail;                      /* Next value in trailing position list */
+  char *pTail;                    /* Position list data following iTail */
+};
+
+struct SnippetFragment {
+  int iCol;                       /* Column snippet is extracted from */
+  int iPos;                       /* Index of first token in snippet */
+  u64 covered;                    /* Mask of query phrases covered */
+  u64 hlmask;                     /* Mask of snippet terms to highlight */
+};
+
+/*
+** This type is used as an fts3ExprIterate() context object while 
+** accumulating the data returned by the matchinfo() function.
+*/
+typedef struct MatchInfo MatchInfo;
+struct MatchInfo {
+  Fts3Cursor *pCursor;            /* FTS3 Cursor */
+  int nCol;                       /* Number of columns in table */
+  int nPhrase;                    /* Number of matchable phrases in query */
+  sqlite3_int64 nDoc;             /* Number of docs in database */
+  u32 *aMatchinfo;                /* Pre-allocated buffer */
+};
+
+
+
+/*
+** The snippet() and offsets() functions both return text values. An instance
+** of the following structure is used to accumulate those values while the
+** functions are running. See fts3StringAppend() for details.
+*/
+typedef struct StrBuffer StrBuffer;
+struct StrBuffer {
+  char *z;                        /* Pointer to buffer containing string */
+  int n;                          /* Length of z in bytes (excl. nul-term) */
+  int nAlloc;                     /* Allocated size of buffer z in bytes */
+};
+
+
+/*
+** This function is used to help iterate through a position-list. A position
+** list is a list of unique integers, sorted from smallest to largest. Each
+** element of the list is represented by an FTS3 varint that takes the value
+** of the difference between the current element and the previous one plus
+** two. For example, to store the position-list:
+**
+**     4 9 113
+**
+** the three varints:
+**
+**     6 7 106
+**
+** are encoded.
+**
+** When this function is called, *pp points to the start of an element of
+** the list. *piPos contains the value of the previous entry in the list.
+** After it returns, *piPos contains the value of the next element of the
+** list and *pp is advanced to the following varint.
+*/
+static void fts3GetDeltaPosition(char **pp, int *piPos){
+  int iVal;
+  *pp += fts3GetVarint32(*pp, &iVal);
+  *piPos += (iVal-2);
+}
+
+/*
+** Helper function for fts3ExprIterate() (see below).
+*/
+static int fts3ExprIterate2(
+  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
+  int *piPhrase,                  /* Pointer to phrase counter */
+  int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
+  void *pCtx                      /* Second argument to pass to callback */
+){
+  int rc;                         /* Return code */
+  int eType = pExpr->eType;       /* Type of expression node pExpr */
+
+  if( eType!=FTSQUERY_PHRASE ){
+    assert( pExpr->pLeft && pExpr->pRight );
+    rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
+    if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
+      rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
+    }
+  }else{
+    rc = x(pExpr, *piPhrase, pCtx);
+    (*piPhrase)++;
+  }
+  return rc;
+}
+
+/*
+** Iterate through all phrase nodes in an FTS3 query, except those that
+** are part of a sub-tree that is the right-hand-side of a NOT operator.
+** For each phrase node found, the supplied callback function is invoked.
+**
+** If the callback function returns anything other than SQLITE_OK, 
+** the iteration is abandoned and the error code returned immediately.
+** Otherwise, SQLITE_OK is returned after a callback has been made for
+** all eligible phrase nodes.
+*/
+static int fts3ExprIterate(
+  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
+  int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
+  void *pCtx                      /* Second argument to pass to callback */
+){
+  int iPhrase = 0;                /* Variable used as the phrase counter */
+  return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
+}
+
+/*
+** This is an fts3ExprIterate() callback used while loading the doclists
+** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
+** fts3ExprLoadDoclists().
+*/
+static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
+  int rc = SQLITE_OK;
+  Fts3Phrase *pPhrase = pExpr->pPhrase;
+  LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
+
+  UNUSED_PARAMETER(iPhrase);
+
+  p->nPhrase++;
+  p->nToken += pPhrase->nToken;
+
+  return rc;
+}
+
+/*
+** Load the doclists for each phrase in the query associated with FTS3 cursor
+** pCsr. 
+**
+** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable 
+** phrases in the expression (all phrases except those directly or 
+** indirectly descended from the right-hand-side of a NOT operator). If 
+** pnToken is not NULL, then it is set to the number of tokens in all
+** matchable phrases of the expression.
+*/
+static int fts3ExprLoadDoclists(
+  Fts3Cursor *pCsr,               /* Fts3 cursor for current query */
+  int *pnPhrase,                  /* OUT: Number of phrases in query */
+  int *pnToken                    /* OUT: Number of tokens in query */
+){
+  int rc;                         /* Return Code */
+  LoadDoclistCtx sCtx = {0,0,0};  /* Context for fts3ExprIterate() */
+  sCtx.pCsr = pCsr;
+  rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
+  if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
+  if( pnToken ) *pnToken = sCtx.nToken;
+  return rc;
+}
+
+static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
+  (*(int *)ctx)++;
+  UNUSED_PARAMETER(pExpr);
+  UNUSED_PARAMETER(iPhrase);
+  return SQLITE_OK;
+}
+static int fts3ExprPhraseCount(Fts3Expr *pExpr){
+  int nPhrase = 0;
+  (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
+  return nPhrase;
+}
+
+/*
+** Advance the position list iterator specified by the first two 
+** arguments so that it points to the first element with a value greater
+** than or equal to parameter iNext.
+*/
+static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
+  char *pIter = *ppIter;
+  if( pIter ){
+    int iIter = *piIter;
+
+    while( iIter<iNext ){
+      if( 0==(*pIter & 0xFE) ){
+        iIter = -1;
+        pIter = 0;
+        break;
+      }
+      fts3GetDeltaPosition(&pIter, &iIter);
+    }
+
+    *piIter = iIter;
+    *ppIter = pIter;
+  }
+}
+
+/*
+** Advance the snippet iterator to the next candidate snippet.
+*/
+static int fts3SnippetNextCandidate(SnippetIter *pIter){
+  int i;                          /* Loop counter */
+
+  if( pIter->iCurrent<0 ){
+    /* The SnippetIter object has just been initialized. The first snippet
+    ** candidate always starts at offset 0 (even if this candidate has a
+    ** score of 0.0).
+    */
+    pIter->iCurrent = 0;
+
+    /* Advance the 'head' iterator of each phrase to the first offset that
+    ** is greater than or equal to (iNext+nSnippet).
+    */
+    for(i=0; i<pIter->nPhrase; i++){
+      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
+      fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
+    }
+  }else{
+    int iStart;
+    int iEnd = 0x7FFFFFFF;
+
+    for(i=0; i<pIter->nPhrase; i++){
+      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
+      if( pPhrase->pHead && pPhrase->iHead<iEnd ){
+        iEnd = pPhrase->iHead;
+      }
+    }
+    if( iEnd==0x7FFFFFFF ){
+      return 1;
+    }
+
+    pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
+    for(i=0; i<pIter->nPhrase; i++){
+      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
+      fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
+      fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
+    }
+  }
+
+  return 0;
+}
+
+/*
+** Retrieve information about the current candidate snippet of snippet 
+** iterator pIter.
+*/
+static void fts3SnippetDetails(
+  SnippetIter *pIter,             /* Snippet iterator */
+  u64 mCovered,                   /* Bitmask of phrases already covered */
+  int *piToken,                   /* OUT: First token of proposed snippet */
+  int *piScore,                   /* OUT: "Score" for this snippet */
+  u64 *pmCover,                   /* OUT: Bitmask of phrases covered */
+  u64 *pmHighlight                /* OUT: Bitmask of terms to highlight */
+){
+  int iStart = pIter->iCurrent;   /* First token of snippet */
+  int iScore = 0;                 /* Score of this snippet */
+  int i;                          /* Loop counter */
+  u64 mCover = 0;                 /* Mask of phrases covered by this snippet */
+  u64 mHighlight = 0;             /* Mask of tokens to highlight in snippet */
+
+  for(i=0; i<pIter->nPhrase; i++){
+    SnippetPhrase *pPhrase = &pIter->aPhrase[i];
+    if( pPhrase->pTail ){
+      char *pCsr = pPhrase->pTail;
+      int iCsr = pPhrase->iTail;
+
+      while( iCsr<(iStart+pIter->nSnippet) ){
+        int j;
+        u64 mPhrase = (u64)1 << i;
+        u64 mPos = (u64)1 << (iCsr - iStart);
+        assert( iCsr>=iStart );
+        if( (mCover|mCovered)&mPhrase ){
+          iScore++;
+        }else{
+          iScore += 1000;
+        }
+        mCover |= mPhrase;
+
+        for(j=0; j<pPhrase->nToken; j++){
+          mHighlight |= (mPos>>j);
+        }
+
+        if( 0==(*pCsr & 0x0FE) ) break;
+        fts3GetDeltaPosition(&pCsr, &iCsr);
+      }
+    }
+  }
+
+  /* Set the output variables before returning. */
+  *piToken = iStart;
+  *piScore = iScore;
+  *pmCover = mCover;
+  *pmHighlight = mHighlight;
+}
+
+/*
+** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
+** Each invocation populates an element of the SnippetIter.aPhrase[] array.
+*/
+static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
+  SnippetIter *p = (SnippetIter *)ctx;
+  SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
+  char *pCsr;
+  int rc;
+
+  pPhrase->nToken = pExpr->pPhrase->nToken;
+  rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
+  assert( rc==SQLITE_OK || pCsr==0 );
+  if( pCsr ){
+    int iFirst = 0;
+    pPhrase->pList = pCsr;
+    fts3GetDeltaPosition(&pCsr, &iFirst);
+    assert( iFirst>=0 );
+    pPhrase->pHead = pCsr;
+    pPhrase->pTail = pCsr;
+    pPhrase->iHead = iFirst;
+    pPhrase->iTail = iFirst;
+  }else{
+    assert( rc!=SQLITE_OK || (
+       pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 
+    ));
+  }
+
+  return rc;
+}
+
+/*
+** Select the fragment of text consisting of nFragment contiguous tokens 
+** from column iCol that represent the "best" snippet. The best snippet
+** is the snippet with the highest score, where scores are calculated
+** by adding:
+**
+**   (a) +1 point for each occurrence of a matchable phrase in the snippet.
+**
+**   (b) +1000 points for the first occurrence of each matchable phrase in 
+**       the snippet for which the corresponding mCovered bit is not set.
+**
+** The selected snippet parameters are stored in structure *pFragment before
+** returning. The score of the selected snippet is stored in *piScore
+** before returning.
+*/
+static int fts3BestSnippet(
+  int nSnippet,                   /* Desired snippet length */
+  Fts3Cursor *pCsr,               /* Cursor to create snippet for */
+  int iCol,                       /* Index of column to create snippet from */
+  u64 mCovered,                   /* Mask of phrases already covered */
+  u64 *pmSeen,                    /* IN/OUT: Mask of phrases seen */
+  SnippetFragment *pFragment,     /* OUT: Best snippet found */
+  int *piScore                    /* OUT: Score of snippet pFragment */
+){
+  int rc;                         /* Return Code */
+  int nList;                      /* Number of phrases in expression */
+  SnippetIter sIter;              /* Iterates through snippet candidates */
+  int nByte;                      /* Number of bytes of space to allocate */
+  int iBestScore = -1;            /* Best snippet score found so far */
+  int i;                          /* Loop counter */
+
+  memset(&sIter, 0, sizeof(sIter));
+
+  /* Iterate through the phrases in the expression to count them. The same
+  ** callback makes sure the doclists are loaded for each phrase.
+  */
+  rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  /* Now that it is known how many phrases there are, allocate and zero
+  ** the required space using malloc().
+  */
+  nByte = sizeof(SnippetPhrase) * nList;
+  sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
+  if( !sIter.aPhrase ){
+    return SQLITE_NOMEM;
+  }
+  memset(sIter.aPhrase, 0, nByte);
+
+  /* Initialize the contents of the SnippetIter object. Then iterate through
+  ** the set of phrases in the expression to populate the aPhrase[] array.
+  */
+  sIter.pCsr = pCsr;
+  sIter.iCol = iCol;
+  sIter.nSnippet = nSnippet;
+  sIter.nPhrase = nList;
+  sIter.iCurrent = -1;
+  (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
+
+  /* Set the *pmSeen output variable. */
+  for(i=0; i<nList; i++){
+    if( sIter.aPhrase[i].pHead ){
+      *pmSeen |= (u64)1 << i;
+    }
+  }
+
+  /* Loop through all candidate snippets. Store the best snippet in 
+  ** *pFragment. Store its associated 'score' in iBestScore.
+  */
+  pFragment->iCol = iCol;
+  while( !fts3SnippetNextCandidate(&sIter) ){
+    int iPos;
+    int iScore;
+    u64 mCover;
+    u64 mHighlight;
+    fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
+    assert( iScore>=0 );
+    if( iScore>iBestScore ){
+      pFragment->iPos = iPos;
+      pFragment->hlmask = mHighlight;
+      pFragment->covered = mCover;
+      iBestScore = iScore;
+    }
+  }
+
+  sqlite3_free(sIter.aPhrase);
+  *piScore = iBestScore;
+  return SQLITE_OK;
+}
+
+
+/*
+** Append a string to the string-buffer passed as the first argument.
+**
+** If nAppend is negative, then the length of the string zAppend is
+** determined using strlen().
+*/
+static int fts3StringAppend(
+  StrBuffer *pStr,                /* Buffer to append to */
+  const char *zAppend,            /* Pointer to data to append to buffer */
+  int nAppend                     /* Size of zAppend in bytes (or -1) */
+){
+  if( nAppend<0 ){
+    nAppend = (int)strlen(zAppend);
+  }
+
+  /* If there is insufficient space allocated at StrBuffer.z, use realloc()
+  ** to grow the buffer until so that it is big enough to accomadate the
+  ** appended data.
+  */
+  if( pStr->n+nAppend+1>=pStr->nAlloc ){
+    int nAlloc = pStr->nAlloc+nAppend+100;
+    char *zNew = sqlite3_realloc(pStr->z, nAlloc);
+    if( !zNew ){
+      return SQLITE_NOMEM;
+    }
+    pStr->z = zNew;
+    pStr->nAlloc = nAlloc;
+  }
+  assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
+
+  /* Append the data to the string buffer. */
+  memcpy(&pStr->z[pStr->n], zAppend, nAppend);
+  pStr->n += nAppend;
+  pStr->z[pStr->n] = '\0';
+
+  return SQLITE_OK;
+}
+
+/*
+** The fts3BestSnippet() function often selects snippets that end with a
+** query term. That is, the final term of the snippet is always a term
+** that requires highlighting. For example, if 'X' is a highlighted term
+** and '.' is a non-highlighted term, BestSnippet() may select:
+**
+**     ........X.....X
+**
+** This function "shifts" the beginning of the snippet forward in the 
+** document so that there are approximately the same number of 
+** non-highlighted terms to the right of the final highlighted term as there
+** are to the left of the first highlighted term. For example, to this:
+**
+**     ....X.....X....
+**
+** This is done as part of extracting the snippet text, not when selecting
+** the snippet. Snippet selection is done based on doclists only, so there
+** is no way for fts3BestSnippet() to know whether or not the document 
+** actually contains terms that follow the final highlighted term. 
+*/
+static int fts3SnippetShift(
+  Fts3Table *pTab,                /* FTS3 table snippet comes from */
+  int iLangid,                    /* Language id to use in tokenizing */
+  int nSnippet,                   /* Number of tokens desired for snippet */
+  const char *zDoc,               /* Document text to extract snippet from */
+  int nDoc,                       /* Size of buffer zDoc in bytes */
+  int *piPos,                     /* IN/OUT: First token of snippet */
+  u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
+){
+  u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */
+
+  if( hlmask ){
+    int nLeft;                    /* Tokens to the left of first highlight */
+    int nRight;                   /* Tokens to the right of last highlight */
+    int nDesired;                 /* Ideal number of tokens to shift forward */
+
+    for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
+    for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
+    nDesired = (nLeft-nRight)/2;
+
+    /* Ideally, the start of the snippet should be pushed forward in the
+    ** document nDesired tokens. This block checks if there are actually
+    ** nDesired tokens to the right of the snippet. If so, *piPos and
+    ** *pHlMask are updated to shift the snippet nDesired tokens to the
+    ** right. Otherwise, the snippet is shifted by the number of tokens
+    ** available.
+    */
+    if( nDesired>0 ){
+      int nShift;                 /* Number of tokens to shift snippet by */
+      int iCurrent = 0;           /* Token counter */
+      int rc;                     /* Return Code */
+      sqlite3_tokenizer_module *pMod;
+      sqlite3_tokenizer_cursor *pC;
+      pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
+
+      /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
+      ** or more tokens in zDoc/nDoc.
+      */
+      rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
+      if( rc!=SQLITE_OK ){
+        return rc;
+      }
+      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
+        const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
+        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
+      }
+      pMod->xClose(pC);
+      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
+
+      nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
+      assert( nShift<=nDesired );
+      if( nShift>0 ){
+        *piPos += nShift;
+        *pHlmask = hlmask >> nShift;
+      }
+    }
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Extract the snippet text for fragment pFragment from cursor pCsr and
+** append it to string buffer pOut.
+*/
+static int fts3SnippetText(
+  Fts3Cursor *pCsr,               /* FTS3 Cursor */
+  SnippetFragment *pFragment,     /* Snippet to extract */
+  int iFragment,                  /* Fragment number */
+  int isLast,                     /* True for final fragment in snippet */
+  int nSnippet,                   /* Number of tokens in extracted snippet */
+  const char *zOpen,              /* String inserted before highlighted term */
+  const char *zClose,             /* String inserted after highlighted term */
+  const char *zEllipsis,          /* String inserted between snippets */
+  StrBuffer *pOut                 /* Write output here */
+){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int rc;                         /* Return code */
+  const char *zDoc;               /* Document text to extract snippet from */
+  int nDoc;                       /* Size of zDoc in bytes */
+  int iCurrent = 0;               /* Current token number of document */
+  int iEnd = 0;                   /* Byte offset of end of current token */
+  int isShiftDone = 0;            /* True after snippet is shifted */
+  int iPos = pFragment->iPos;     /* First token of snippet */
+  u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
+  int iCol = pFragment->iCol+1;   /* Query column to extract text from */
+  sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
+  sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
+  
+  zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
+  if( zDoc==0 ){
+    if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
+      return SQLITE_NOMEM;
+    }
+    return SQLITE_OK;
+  }
+  nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
+
+  /* Open a token cursor on the document. */
+  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
+  rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  while( rc==SQLITE_OK ){
+    const char *ZDUMMY;           /* Dummy argument used with tokenizer */
+    int DUMMY1 = -1;              /* Dummy argument used with tokenizer */
+    int iBegin = 0;               /* Offset in zDoc of start of token */
+    int iFin = 0;                 /* Offset in zDoc of end of token */
+    int isHighlight = 0;          /* True for highlighted terms */
+
+    /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
+    ** in the FTS code the variable that the third argument to xNext points to
+    ** is initialized to zero before the first (*but not necessarily
+    ** subsequent*) call to xNext(). This is done for a particular application
+    ** that needs to know whether or not the tokenizer is being used for
+    ** snippet generation or for some other purpose.
+    **
+    ** Extreme care is required when writing code to depend on this
+    ** initialization. It is not a documented part of the tokenizer interface.
+    ** If a tokenizer is used directly by any code outside of FTS, this
+    ** convention might not be respected.  */
+    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
+    if( rc!=SQLITE_OK ){
+      if( rc==SQLITE_DONE ){
+        /* Special case - the last token of the snippet is also the last token
+        ** of the column. Append any punctuation that occurred between the end
+        ** of the previous token and the end of the document to the output. 
+        ** Then break out of the loop. */
+        rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
+      }
+      break;
+    }
+    if( iCurrent<iPos ){ continue; }
+
+    if( !isShiftDone ){
+      int n = nDoc - iBegin;
+      rc = fts3SnippetShift(
+          pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
+      );
+      isShiftDone = 1;
+
+      /* Now that the shift has been done, check if the initial "..." are
+      ** required. They are required if (a) this is not the first fragment,
+      ** or (b) this fragment does not begin at position 0 of its column. 
+      */
+      if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
+        rc = fts3StringAppend(pOut, zEllipsis, -1);
+      }
+      if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
+    }
+
+    if( iCurrent>=(iPos+nSnippet) ){
+      if( isLast ){
+        rc = fts3StringAppend(pOut, zEllipsis, -1);
+      }
+      break;
+    }
+
+    /* Set isHighlight to true if this term should be highlighted. */
+    isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
+
+    if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
+    if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
+    if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
+    if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
+
+    iEnd = iFin;
+  }
+
+  pMod->xClose(pC);
+  return rc;
+}
+
+
+/*
+** This function is used to count the entries in a column-list (a 
+** delta-encoded list of term offsets within a single column of a single 
+** row). When this function is called, *ppCollist should point to the
+** beginning of the first varint in the column-list (the varint that
+** contains the position of the first matching term in the column data).
+** Before returning, *ppCollist is set to point to the first byte after
+** the last varint in the column-list (either the 0x00 signifying the end
+** of the position-list, or the 0x01 that precedes the column number of
+** the next column in the position-list).
+**
+** The number of elements in the column-list is returned.
+*/
+static int fts3ColumnlistCount(char **ppCollist){
+  char *pEnd = *ppCollist;
+  char c = 0;
+  int nEntry = 0;
+
+  /* A column-list is terminated by either a 0x01 or 0x00. */
+  while( 0xFE & (*pEnd | c) ){
+    c = *pEnd++ & 0x80;
+    if( !c ) nEntry++;
+  }
+
+  *ppCollist = pEnd;
+  return nEntry;
+}
+
+/*
+** fts3ExprIterate() callback used to collect the "global" matchinfo stats
+** for a single query. 
+**
+** fts3ExprIterate() callback to load the 'global' elements of a
+** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements 
+** of the matchinfo array that are constant for all rows returned by the 
+** current query.
+**
+** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
+** function populates Matchinfo.aMatchinfo[] as follows:
+**
+**   for(iCol=0; iCol<nCol; iCol++){
+**     aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
+**     aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
+**   }
+**
+** where X is the number of matches for phrase iPhrase is column iCol of all
+** rows of the table. Y is the number of rows for which column iCol contains
+** at least one instance of phrase iPhrase.
+**
+** If the phrase pExpr consists entirely of deferred tokens, then all X and
+** Y values are set to nDoc, where nDoc is the number of documents in the 
+** file system. This is done because the full-text index doclist is required
+** to calculate these values properly, and the full-text index doclist is
+** not available for deferred tokens.
+*/
+static int fts3ExprGlobalHitsCb(
+  Fts3Expr *pExpr,                /* Phrase expression node */
+  int iPhrase,                    /* Phrase number (numbered from zero) */
+  void *pCtx                      /* Pointer to MatchInfo structure */
+){
+  MatchInfo *p = (MatchInfo *)pCtx;
+  return sqlite3Fts3EvalPhraseStats(
+      p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
+  );
+}
+
+/*
+** fts3ExprIterate() callback used to collect the "local" part of the
+** FTS3_MATCHINFO_HITS array. The local stats are those elements of the 
+** array that are different for each row returned by the query.
+*/
+static int fts3ExprLocalHitsCb(
+  Fts3Expr *pExpr,                /* Phrase expression node */
+  int iPhrase,                    /* Phrase number */
+  void *pCtx                      /* Pointer to MatchInfo structure */
+){
+  int rc = SQLITE_OK;
+  MatchInfo *p = (MatchInfo *)pCtx;
+  int iStart = iPhrase * p->nCol * 3;
+  int i;
+
+  for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
+    char *pCsr;
+    rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
+    if( pCsr ){
+      p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
+    }else{
+      p->aMatchinfo[iStart+i*3] = 0;
+    }
+  }
+
+  return rc;
+}
+
+static int fts3MatchinfoCheck(
+  Fts3Table *pTab, 
+  char cArg,
+  char **pzErr
+){
+  if( (cArg==FTS3_MATCHINFO_NPHRASE)
+   || (cArg==FTS3_MATCHINFO_NCOL)
+   || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
+   || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
+   || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
+   || (cArg==FTS3_MATCHINFO_LCS)
+   || (cArg==FTS3_MATCHINFO_HITS)
+  ){
+    return SQLITE_OK;
+  }
+  *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
+  return SQLITE_ERROR;
+}
+
+static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
+  int nVal;                       /* Number of integers output by cArg */
+
+  switch( cArg ){
+    case FTS3_MATCHINFO_NDOC:
+    case FTS3_MATCHINFO_NPHRASE: 
+    case FTS3_MATCHINFO_NCOL: 
+      nVal = 1;
+      break;
+
+    case FTS3_MATCHINFO_AVGLENGTH:
+    case FTS3_MATCHINFO_LENGTH:
+    case FTS3_MATCHINFO_LCS:
+      nVal = pInfo->nCol;
+      break;
+
+    default:
+      assert( cArg==FTS3_MATCHINFO_HITS );
+      nVal = pInfo->nCol * pInfo->nPhrase * 3;
+      break;
+  }
+
+  return nVal;
+}
+
+static int fts3MatchinfoSelectDoctotal(
+  Fts3Table *pTab,
+  sqlite3_stmt **ppStmt,
+  sqlite3_int64 *pnDoc,
+  const char **paLen
+){
+  sqlite3_stmt *pStmt;
+  const char *a;
+  sqlite3_int64 nDoc;
+
+  if( !*ppStmt ){
+    int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+  pStmt = *ppStmt;
+  assert( sqlite3_data_count(pStmt)==1 );
+
+  a = sqlite3_column_blob(pStmt, 0);
+  a += sqlite3Fts3GetVarint(a, &nDoc);
+  if( nDoc==0 ) return FTS_CORRUPT_VTAB;
+  *pnDoc = (u32)nDoc;
+
+  if( paLen ) *paLen = a;
+  return SQLITE_OK;
+}
+
+/*
+** An instance of the following structure is used to store state while 
+** iterating through a multi-column position-list corresponding to the
+** hits for a single phrase on a single row in order to calculate the
+** values for a matchinfo() FTS3_MATCHINFO_LCS request.
+*/
+typedef struct LcsIterator LcsIterator;
+struct LcsIterator {
+  Fts3Expr *pExpr;                /* Pointer to phrase expression */
+  int iPosOffset;                 /* Tokens count up to end of this phrase */
+  char *pRead;                    /* Cursor used to iterate through aDoclist */
+  int iPos;                       /* Current position */
+};
+
+/* 
+** If LcsIterator.iCol is set to the following value, the iterator has
+** finished iterating through all offsets for all columns.
+*/
+#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
+
+static int fts3MatchinfoLcsCb(
+  Fts3Expr *pExpr,                /* Phrase expression node */
+  int iPhrase,                    /* Phrase number (numbered from zero) */
+  void *pCtx                      /* Pointer to MatchInfo structure */
+){
+  LcsIterator *aIter = (LcsIterator *)pCtx;
+  aIter[iPhrase].pExpr = pExpr;
+  return SQLITE_OK;
+}
+
+/*
+** Advance the iterator passed as an argument to the next position. Return
+** 1 if the iterator is at EOF or if it now points to the start of the
+** position list for the next column.
+*/
+static int fts3LcsIteratorAdvance(LcsIterator *pIter){
+  char *pRead = pIter->pRead;
+  sqlite3_int64 iRead;
+  int rc = 0;
+
+  pRead += sqlite3Fts3GetVarint(pRead, &iRead);
+  if( iRead==0 || iRead==1 ){
+    pRead = 0;
+    rc = 1;
+  }else{
+    pIter->iPos += (int)(iRead-2);
+  }
+
+  pIter->pRead = pRead;
+  return rc;
+}
+  
+/*
+** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. 
+**
+** If the call is successful, the longest-common-substring lengths for each
+** column are written into the first nCol elements of the pInfo->aMatchinfo[] 
+** array before returning. SQLITE_OK is returned in this case.
+**
+** Otherwise, if an error occurs, an SQLite error code is returned and the
+** data written to the first nCol elements of pInfo->aMatchinfo[] is 
+** undefined.
+*/
+static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
+  LcsIterator *aIter;
+  int i;
+  int iCol;
+  int nToken = 0;
+
+  /* Allocate and populate the array of LcsIterator objects. The array
+  ** contains one element for each matchable phrase in the query.
+  **/
+  aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
+  if( !aIter ) return SQLITE_NOMEM;
+  memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
+  (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
+
+  for(i=0; i<pInfo->nPhrase; i++){
+    LcsIterator *pIter = &aIter[i];
+    nToken -= pIter->pExpr->pPhrase->nToken;
+    pIter->iPosOffset = nToken;
+  }
+
+  for(iCol=0; iCol<pInfo->nCol; iCol++){
+    int nLcs = 0;                 /* LCS value for this column */
+    int nLive = 0;                /* Number of iterators in aIter not at EOF */
+
+    for(i=0; i<pInfo->nPhrase; i++){
+      int rc;
+      LcsIterator *pIt = &aIter[i];
+      rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
+      if( rc!=SQLITE_OK ) return rc;
+      if( pIt->pRead ){
+        pIt->iPos = pIt->iPosOffset;
+        fts3LcsIteratorAdvance(&aIter[i]);
+        nLive++;
+      }
+    }
+
+    while( nLive>0 ){
+      LcsIterator *pAdv = 0;      /* The iterator to advance by one position */
+      int nThisLcs = 0;           /* LCS for the current iterator positions */
+
+      for(i=0; i<pInfo->nPhrase; i++){
+        LcsIterator *pIter = &aIter[i];
+        if( pIter->pRead==0 ){
+          /* This iterator is already at EOF for this column. */
+          nThisLcs = 0;
+        }else{
+          if( pAdv==0 || pIter->iPos<pAdv->iPos ){
+            pAdv = pIter;
+          }
+          if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
+            nThisLcs++;
+          }else{
+            nThisLcs = 1;
+          }
+          if( nThisLcs>nLcs ) nLcs = nThisLcs;
+        }
+      }
+      if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
+    }
+
+    pInfo->aMatchinfo[iCol] = nLcs;
+  }
+
+  sqlite3_free(aIter);
+  return SQLITE_OK;
+}
+
+/*
+** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
+** be returned by the matchinfo() function. Argument zArg contains the 
+** format string passed as the second argument to matchinfo (or the
+** default value "pcx" if no second argument was specified). The format
+** string has already been validated and the pInfo->aMatchinfo[] array
+** is guaranteed to be large enough for the output.
+**
+** If bGlobal is true, then populate all fields of the matchinfo() output.
+** If it is false, then assume that those fields that do not change between
+** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
+** have already been populated.
+**
+** Return SQLITE_OK if successful, or an SQLite error code if an error 
+** occurs. If a value other than SQLITE_OK is returned, the state the
+** pInfo->aMatchinfo[] buffer is left in is undefined.
+*/
+static int fts3MatchinfoValues(
+  Fts3Cursor *pCsr,               /* FTS3 cursor object */
+  int bGlobal,                    /* True to grab the global stats */
+  MatchInfo *pInfo,               /* Matchinfo context object */
+  const char *zArg                /* Matchinfo format string */
+){
+  int rc = SQLITE_OK;
+  int i;
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  sqlite3_stmt *pSelect = 0;
+
+  for(i=0; rc==SQLITE_OK && zArg[i]; i++){
+
+    switch( zArg[i] ){
+      case FTS3_MATCHINFO_NPHRASE:
+        if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
+        break;
+
+      case FTS3_MATCHINFO_NCOL:
+        if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
+        break;
+        
+      case FTS3_MATCHINFO_NDOC:
+        if( bGlobal ){
+          sqlite3_int64 nDoc = 0;
+          rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
+          pInfo->aMatchinfo[0] = (u32)nDoc;
+        }
+        break;
+
+      case FTS3_MATCHINFO_AVGLENGTH: 
+        if( bGlobal ){
+          sqlite3_int64 nDoc;     /* Number of rows in table */
+          const char *a;          /* Aggregate column length array */
+
+          rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a);
+          if( rc==SQLITE_OK ){
+            int iCol;
+            for(iCol=0; iCol<pInfo->nCol; iCol++){
+              u32 iVal;
+              sqlite3_int64 nToken;
+              a += sqlite3Fts3GetVarint(a, &nToken);
+              iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
+              pInfo->aMatchinfo[iCol] = iVal;
+            }
+          }
+        }
+        break;
+
+      case FTS3_MATCHINFO_LENGTH: {
+        sqlite3_stmt *pSelectDocsize = 0;
+        rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
+        if( rc==SQLITE_OK ){
+          int iCol;
+          const char *a = sqlite3_column_blob(pSelectDocsize, 0);
+          for(iCol=0; iCol<pInfo->nCol; iCol++){
+            sqlite3_int64 nToken;
+            a += sqlite3Fts3GetVarint(a, &nToken);
+            pInfo->aMatchinfo[iCol] = (u32)nToken;
+          }
+        }
+        sqlite3_reset(pSelectDocsize);
+        break;
+      }
+
+      case FTS3_MATCHINFO_LCS:
+        rc = fts3ExprLoadDoclists(pCsr, 0, 0);
+        if( rc==SQLITE_OK ){
+          rc = fts3MatchinfoLcs(pCsr, pInfo);
+        }
+        break;
+
+      default: {
+        Fts3Expr *pExpr;
+        assert( zArg[i]==FTS3_MATCHINFO_HITS );
+        pExpr = pCsr->pExpr;
+        rc = fts3ExprLoadDoclists(pCsr, 0, 0);
+        if( rc!=SQLITE_OK ) break;
+        if( bGlobal ){
+          if( pCsr->pDeferred ){
+            rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
+            if( rc!=SQLITE_OK ) break;
+          }
+          rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
+          if( rc!=SQLITE_OK ) break;
+        }
+        (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
+        break;
+      }
+    }
+
+    pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
+  }
+
+  sqlite3_reset(pSelect);
+  return rc;
+}
+
+
+/*
+** Populate pCsr->aMatchinfo[] with data for the current row. The 
+** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
+*/
+static int fts3GetMatchinfo(
+  Fts3Cursor *pCsr,               /* FTS3 Cursor object */
+  const char *zArg                /* Second argument to matchinfo() function */
+){
+  MatchInfo sInfo;
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int rc = SQLITE_OK;
+  int bGlobal = 0;                /* Collect 'global' stats as well as local */
+
+  memset(&sInfo, 0, sizeof(MatchInfo));
+  sInfo.pCursor = pCsr;
+  sInfo.nCol = pTab->nColumn;
+
+  /* If there is cached matchinfo() data, but the format string for the 
+  ** cache does not match the format string for this request, discard 
+  ** the cached data. */
+  if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){
+    assert( pCsr->aMatchinfo );
+    sqlite3_free(pCsr->aMatchinfo);
+    pCsr->zMatchinfo = 0;
+    pCsr->aMatchinfo = 0;
+  }
+
+  /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
+  ** matchinfo function has been called for this query. In this case 
+  ** allocate the array used to accumulate the matchinfo data and
+  ** initialize those elements that are constant for every row.
+  */
+  if( pCsr->aMatchinfo==0 ){
+    int nMatchinfo = 0;           /* Number of u32 elements in match-info */
+    int nArg;                     /* Bytes in zArg */
+    int i;                        /* Used to iterate through zArg */
+
+    /* Determine the number of phrases in the query */
+    pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
+    sInfo.nPhrase = pCsr->nPhrase;
+
+    /* Determine the number of integers in the buffer returned by this call. */
+    for(i=0; zArg[i]; i++){
+      nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
+    }
+
+    /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
+    nArg = (int)strlen(zArg);
+    pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);
+    if( !pCsr->aMatchinfo ) return SQLITE_NOMEM;
+
+    pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo];
+    pCsr->nMatchinfo = nMatchinfo;
+    memcpy(pCsr->zMatchinfo, zArg, nArg+1);
+    memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo);
+    pCsr->isMatchinfoNeeded = 1;
+    bGlobal = 1;
+  }
+
+  sInfo.aMatchinfo = pCsr->aMatchinfo;
+  sInfo.nPhrase = pCsr->nPhrase;
+  if( pCsr->isMatchinfoNeeded ){
+    rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
+    pCsr->isMatchinfoNeeded = 0;
+  }
+
+  return rc;
+}
+
+/*
+** Implementation of snippet() function.
+*/
+SQLITE_PRIVATE void sqlite3Fts3Snippet(
+  sqlite3_context *pCtx,          /* SQLite function call context */
+  Fts3Cursor *pCsr,               /* Cursor object */
+  const char *zStart,             /* Snippet start text - "<b>" */
+  const char *zEnd,               /* Snippet end text - "</b>" */
+  const char *zEllipsis,          /* Snippet ellipsis text - "<b>...</b>" */
+  int iCol,                       /* Extract snippet from this column */
+  int nToken                      /* Approximate number of tokens in snippet */
+){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int rc = SQLITE_OK;
+  int i;
+  StrBuffer res = {0, 0, 0};
+
+  /* The returned text includes up to four fragments of text extracted from
+  ** the data in the current row. The first iteration of the for(...) loop
+  ** below attempts to locate a single fragment of text nToken tokens in 
+  ** size that contains at least one instance of all phrases in the query
+  ** expression that appear in the current row. If such a fragment of text
+  ** cannot be found, the second iteration of the loop attempts to locate
+  ** a pair of fragments, and so on.
+  */
+  int nSnippet = 0;               /* Number of fragments in this snippet */
+  SnippetFragment aSnippet[4];    /* Maximum of 4 fragments per snippet */
+  int nFToken = -1;               /* Number of tokens in each fragment */
+
+  if( !pCsr->pExpr ){
+    sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
+    return;
+  }
+
+  for(nSnippet=1; 1; nSnippet++){
+
+    int iSnip;                    /* Loop counter 0..nSnippet-1 */
+    u64 mCovered = 0;             /* Bitmask of phrases covered by snippet */
+    u64 mSeen = 0;                /* Bitmask of phrases seen by BestSnippet() */
+
+    if( nToken>=0 ){
+      nFToken = (nToken+nSnippet-1) / nSnippet;
+    }else{
+      nFToken = -1 * nToken;
+    }
+
+    for(iSnip=0; iSnip<nSnippet; iSnip++){
+      int iBestScore = -1;        /* Best score of columns checked so far */
+      int iRead;                  /* Used to iterate through columns */
+      SnippetFragment *pFragment = &aSnippet[iSnip];
+
+      memset(pFragment, 0, sizeof(*pFragment));
+
+      /* Loop through all columns of the table being considered for snippets.
+      ** If the iCol argument to this function was negative, this means all
+      ** columns of the FTS3 table. Otherwise, only column iCol is considered.
+      */
+      for(iRead=0; iRead<pTab->nColumn; iRead++){
+        SnippetFragment sF = {0, 0, 0, 0};
+        int iS;
+        if( iCol>=0 && iRead!=iCol ) continue;
+
+        /* Find the best snippet of nFToken tokens in column iRead. */
+        rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
+        if( rc!=SQLITE_OK ){
+          goto snippet_out;
+        }
+        if( iS>iBestScore ){
+          *pFragment = sF;
+          iBestScore = iS;
+        }
+      }
+
+      mCovered |= pFragment->covered;
+    }
+
+    /* If all query phrases seen by fts3BestSnippet() are present in at least
+    ** one of the nSnippet snippet fragments, break out of the loop.
+    */
+    assert( (mCovered&mSeen)==mCovered );
+    if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
+  }
+
+  assert( nFToken>0 );
+
+  for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
+    rc = fts3SnippetText(pCsr, &aSnippet[i], 
+        i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
+    );
+  }
+
+ snippet_out:
+  sqlite3Fts3SegmentsClose(pTab);
+  if( rc!=SQLITE_OK ){
+    sqlite3_result_error_code(pCtx, rc);
+    sqlite3_free(res.z);
+  }else{
+    sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
+  }
+}
+
+
+typedef struct TermOffset TermOffset;
+typedef struct TermOffsetCtx TermOffsetCtx;
+
+struct TermOffset {
+  char *pList;                    /* Position-list */
+  int iPos;                       /* Position just read from pList */
+  int iOff;                       /* Offset of this term from read positions */
+};
+
+struct TermOffsetCtx {
+  Fts3Cursor *pCsr;
+  int iCol;                       /* Column of table to populate aTerm for */
+  int iTerm;
+  sqlite3_int64 iDocid;
+  TermOffset *aTerm;
+};
+
+/*
+** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
+*/
+static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
+  TermOffsetCtx *p = (TermOffsetCtx *)ctx;
+  int nTerm;                      /* Number of tokens in phrase */
+  int iTerm;                      /* For looping through nTerm phrase terms */
+  char *pList;                    /* Pointer to position list for phrase */
+  int iPos = 0;                   /* First position in position-list */
+  int rc;
+
+  UNUSED_PARAMETER(iPhrase);
+  rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
+  nTerm = pExpr->pPhrase->nToken;
+  if( pList ){
+    fts3GetDeltaPosition(&pList, &iPos);
+    assert( iPos>=0 );
+  }
+
+  for(iTerm=0; iTerm<nTerm; iTerm++){
+    TermOffset *pT = &p->aTerm[p->iTerm++];
+    pT->iOff = nTerm-iTerm-1;
+    pT->pList = pList;
+    pT->iPos = iPos;
+  }
+
+  return rc;
+}
+
+/*
+** Implementation of offsets() function.
+*/
+SQLITE_PRIVATE void sqlite3Fts3Offsets(
+  sqlite3_context *pCtx,          /* SQLite function call context */
+  Fts3Cursor *pCsr                /* Cursor object */
+){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
+  int rc;                         /* Return Code */
+  int nToken;                     /* Number of tokens in query */
+  int iCol;                       /* Column currently being processed */
+  StrBuffer res = {0, 0, 0};      /* Result string */
+  TermOffsetCtx sCtx;             /* Context for fts3ExprTermOffsetInit() */
+
+  if( !pCsr->pExpr ){
+    sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
+    return;
+  }
+
+  memset(&sCtx, 0, sizeof(sCtx));
+  assert( pCsr->isRequireSeek==0 );
+
+  /* Count the number of terms in the query */
+  rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
+  if( rc!=SQLITE_OK ) goto offsets_out;
+
+  /* Allocate the array of TermOffset iterators. */
+  sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
+  if( 0==sCtx.aTerm ){
+    rc = SQLITE_NOMEM;
+    goto offsets_out;
+  }
+  sCtx.iDocid = pCsr->iPrevId;
+  sCtx.pCsr = pCsr;
+
+  /* Loop through the table columns, appending offset information to 
+  ** string-buffer res for each column.
+  */
+  for(iCol=0; iCol<pTab->nColumn; iCol++){
+    sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
+    const char *ZDUMMY;           /* Dummy argument used with xNext() */
+    int NDUMMY = 0;               /* Dummy argument used with xNext() */
+    int iStart = 0;
+    int iEnd = 0;
+    int iCurrent = 0;
+    const char *zDoc;
+    int nDoc;
+
+    /* Initialize the contents of sCtx.aTerm[] for column iCol. There is 
+    ** no way that this operation can fail, so the return code from
+    ** fts3ExprIterate() can be discarded.
+    */
+    sCtx.iCol = iCol;
+    sCtx.iTerm = 0;
+    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
+
+    /* Retreive the text stored in column iCol. If an SQL NULL is stored 
+    ** in column iCol, jump immediately to the next iteration of the loop.
+    ** If an OOM occurs while retrieving the data (this can happen if SQLite
+    ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM 
+    ** to the caller. 
+    */
+    zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
+    nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
+    if( zDoc==0 ){
+      if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
+        continue;
+      }
+      rc = SQLITE_NOMEM;
+      goto offsets_out;
+    }
+
+    /* Initialize a tokenizer iterator to iterate through column iCol. */
+    rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
+        zDoc, nDoc, &pC
+    );
+    if( rc!=SQLITE_OK ) goto offsets_out;
+
+    rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
+    while( rc==SQLITE_OK ){
+      int i;                      /* Used to loop through terms */
+      int iMinPos = 0x7FFFFFFF;   /* Position of next token */
+      TermOffset *pTerm = 0;      /* TermOffset associated with next token */
+
+      for(i=0; i<nToken; i++){
+        TermOffset *pT = &sCtx.aTerm[i];
+        if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
+          iMinPos = pT->iPos-pT->iOff;
+          pTerm = pT;
+        }
+      }
+
+      if( !pTerm ){
+        /* All offsets for this column have been gathered. */
+        rc = SQLITE_DONE;
+      }else{
+        assert( iCurrent<=iMinPos );
+        if( 0==(0xFE&*pTerm->pList) ){
+          pTerm->pList = 0;
+        }else{
+          fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
+        }
+        while( rc==SQLITE_OK && iCurrent<iMinPos ){
+          rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
+        }
+        if( rc==SQLITE_OK ){
+          char aBuffer[64];
+          sqlite3_snprintf(sizeof(aBuffer), aBuffer, 
+              "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
+          );
+          rc = fts3StringAppend(&res, aBuffer, -1);
+        }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
+          rc = FTS_CORRUPT_VTAB;
+        }
+      }
+    }
+    if( rc==SQLITE_DONE ){
+      rc = SQLITE_OK;
+    }
+
+    pMod->xClose(pC);
+    if( rc!=SQLITE_OK ) goto offsets_out;
+  }
+
+ offsets_out:
+  sqlite3_free(sCtx.aTerm);
+  assert( rc!=SQLITE_DONE );
+  sqlite3Fts3SegmentsClose(pTab);
+  if( rc!=SQLITE_OK ){
+    sqlite3_result_error_code(pCtx,  rc);
+    sqlite3_free(res.z);
+  }else{
+    sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
+  }
+  return;
+}
+
+/*
+** Implementation of matchinfo() function.
+*/
+SQLITE_PRIVATE void sqlite3Fts3Matchinfo(
+  sqlite3_context *pContext,      /* Function call context */
+  Fts3Cursor *pCsr,               /* FTS3 table cursor */
+  const char *zArg                /* Second arg to matchinfo() function */
+){
+  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+  int rc;
+  int i;
+  const char *zFormat;
+
+  if( zArg ){
+    for(i=0; zArg[i]; i++){
+      char *zErr = 0;
+      if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
+        sqlite3_result_error(pContext, zErr, -1);
+        sqlite3_free(zErr);
+        return;
+      }
+    }
+    zFormat = zArg;
+  }else{
+    zFormat = FTS3_MATCHINFO_DEFAULT;
+  }
+
+  if( !pCsr->pExpr ){
+    sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
+    return;
+  }
+
+  /* Retrieve matchinfo() data. */
+  rc = fts3GetMatchinfo(pCsr, zFormat);
+  sqlite3Fts3SegmentsClose(pTab);
+
+  if( rc!=SQLITE_OK ){
+    sqlite3_result_error_code(pContext, rc);
+  }else{
+    int n = pCsr->nMatchinfo * sizeof(u32);
+    sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
+  }
+}
+
+#endif
+
+/************** End of fts3_snippet.c ****************************************/
+/************** Begin file fts3_unicode.c ************************************/
+/*
+** 2012 May 24
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Implementation of the "unicode" full-text-search tokenizer.
+*/
+
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <stdio.h> */
+/* #include <string.h> */
+
+
+/*
+** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
+** from the sqlite3 source file utf.c. If this file is compiled as part
+** of the amalgamation, they are not required.
+*/
+#ifndef SQLITE_AMALGAMATION
+
+static const unsigned char sqlite3Utf8Trans1[] = {
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+#define READ_UTF8(zIn, zTerm, c)                           \
+  c = *(zIn++);                                            \
+  if( c>=0xc0 ){                                           \
+    c = sqlite3Utf8Trans1[c-0xc0];                         \
+    while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
+      c = (c<<6) + (0x3f & *(zIn++));                      \
+    }                                                      \
+    if( c<0x80                                             \
+        || (c&0xFFFFF800)==0xD800                          \
+        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
+  }
+
+#define WRITE_UTF8(zOut, c) {                          \
+  if( c<0x00080 ){                                     \
+    *zOut++ = (u8)(c&0xFF);                            \
+  }                                                    \
+  else if( c<0x00800 ){                                \
+    *zOut++ = 0xC0 + (u8)((c>>6)&0x1F);                \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }                                                    \
+  else if( c<0x10000 ){                                \
+    *zOut++ = 0xE0 + (u8)((c>>12)&0x0F);               \
+    *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);              \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }else{                                               \
+    *zOut++ = 0xF0 + (u8)((c>>18) & 0x07);             \
+    *zOut++ = 0x80 + (u8)((c>>12) & 0x3F);             \
+    *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);              \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }                                                    \
+}
+
+#endif /* ifndef SQLITE_AMALGAMATION */
+
+typedef struct unicode_tokenizer unicode_tokenizer;
+typedef struct unicode_cursor unicode_cursor;
+
+struct unicode_tokenizer {
+  sqlite3_tokenizer base;
+  int bRemoveDiacritic;
+  int nException;
+  int *aiException;
+};
+
+struct unicode_cursor {
+  sqlite3_tokenizer_cursor base;
+  const unsigned char *aInput;    /* Input text being tokenized */
+  int nInput;                     /* Size of aInput[] in bytes */
+  int iOff;                       /* Current offset within aInput[] */
+  int iToken;                     /* Index of next token to be returned */
+  char *zToken;                   /* storage for current token */
+  int nAlloc;                     /* space allocated at zToken */
+};
+
+
+/*
+** Destroy a tokenizer allocated by unicodeCreate().
+*/
+static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
+  if( pTokenizer ){
+    unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer;
+    sqlite3_free(p->aiException);
+    sqlite3_free(p);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE
+** statement has specified that the tokenizer for this table shall consider
+** all characters in string zIn/nIn to be separators (if bAlnum==0) or
+** token characters (if bAlnum==1).
+**
+** For each codepoint in the zIn/nIn string, this function checks if the
+** sqlite3FtsUnicodeIsalnum() function already returns the desired result.
+** If so, no action is taken. Otherwise, the codepoint is added to the 
+** unicode_tokenizer.aiException[] array. For the purposes of tokenization,
+** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all
+** codepoints in the aiException[] array.
+**
+** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic()
+** identifies as a diacritic) occurs in the zIn/nIn string it is ignored.
+** It is not possible to change the behavior of the tokenizer with respect
+** to these codepoints.
+*/
+static int unicodeAddExceptions(
+  unicode_tokenizer *p,           /* Tokenizer to add exceptions to */
+  int bAlnum,                     /* Replace Isalnum() return value with this */
+  const char *zIn,                /* Array of characters to make exceptions */
+  int nIn                         /* Length of z in bytes */
+){
+  const unsigned char *z = (const unsigned char *)zIn;
+  const unsigned char *zTerm = &z[nIn];
+  int iCode;
+  int nEntry = 0;
+
+  assert( bAlnum==0 || bAlnum==1 );
+
+  while( z<zTerm ){
+    READ_UTF8(z, zTerm, iCode);
+    assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
+    if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum 
+     && sqlite3FtsUnicodeIsdiacritic(iCode)==0 
+    ){
+      nEntry++;
+    }
+  }
+
+  if( nEntry ){
+    int *aNew;                    /* New aiException[] array */
+    int nNew;                     /* Number of valid entries in array aNew[] */
+
+    aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int));
+    if( aNew==0 ) return SQLITE_NOMEM;
+    nNew = p->nException;
+
+    z = (const unsigned char *)zIn;
+    while( z<zTerm ){
+      READ_UTF8(z, zTerm, iCode);
+      if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum 
+       && sqlite3FtsUnicodeIsdiacritic(iCode)==0
+      ){
+        int i, j;
+        for(i=0; i<nNew && aNew[i]<iCode; i++);
+        for(j=nNew; j>i; j--) aNew[j] = aNew[j-1];
+        aNew[i] = iCode;
+        nNew++;
+      }
+    }
+    p->aiException = aNew;
+    p->nException = nNew;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Return true if the p->aiException[] array contains the value iCode.
+*/
+static int unicodeIsException(unicode_tokenizer *p, int iCode){
+  if( p->nException>0 ){
+    int *a = p->aiException;
+    int iLo = 0;
+    int iHi = p->nException-1;
+
+    while( iHi>=iLo ){
+      int iTest = (iHi + iLo) / 2;
+      if( iCode==a[iTest] ){
+        return 1;
+      }else if( iCode>a[iTest] ){
+        iLo = iTest+1;
+      }else{
+        iHi = iTest-1;
+      }
+    }
+  }
+
+  return 0;
+}
+
+/*
+** Return true if, for the purposes of tokenization, codepoint iCode is
+** considered a token character (not a separator).
+*/
+static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){
+  assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
+  return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode);
+}
+
+/*
+** Create a new tokenizer instance.
+*/
+static int unicodeCreate(
+  int nArg,                       /* Size of array argv[] */
+  const char * const *azArg,      /* Tokenizer creation arguments */
+  sqlite3_tokenizer **pp          /* OUT: New tokenizer handle */
+){
+  unicode_tokenizer *pNew;        /* New tokenizer object */
+  int i;
+  int rc = SQLITE_OK;
+
+  pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
+  if( pNew==NULL ) return SQLITE_NOMEM;
+  memset(pNew, 0, sizeof(unicode_tokenizer));
+  pNew->bRemoveDiacritic = 1;
+
+  for(i=0; rc==SQLITE_OK && i<nArg; i++){
+    const char *z = azArg[i];
+    int n = (int)strlen(z);
+
+    if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
+      pNew->bRemoveDiacritic = 1;
+    }
+    else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){
+      pNew->bRemoveDiacritic = 0;
+    }
+    else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){
+      rc = unicodeAddExceptions(pNew, 1, &z[11], n-11);
+    }
+    else if( n>=11 && memcmp("separators=", z, 11)==0 ){
+      rc = unicodeAddExceptions(pNew, 0, &z[11], n-11);
+    }
+    else{
+      /* Unrecognized argument */
+      rc  = SQLITE_ERROR;
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    unicodeDestroy((sqlite3_tokenizer *)pNew);
+    pNew = 0;
+  }
+  *pp = (sqlite3_tokenizer *)pNew;
+  return rc;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is pInput[0..nBytes-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int unicodeOpen(
+  sqlite3_tokenizer *p,           /* The tokenizer */
+  const char *aInput,             /* Input string */
+  int nInput,                     /* Size of string aInput in bytes */
+  sqlite3_tokenizer_cursor **pp   /* OUT: New cursor object */
+){
+  unicode_cursor *pCsr;
+
+  pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor));
+  if( pCsr==0 ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCsr, 0, sizeof(unicode_cursor));
+
+  pCsr->aInput = (const unsigned char *)aInput;
+  if( aInput==0 ){
+    pCsr->nInput = 0;
+  }else if( nInput<0 ){
+    pCsr->nInput = (int)strlen(aInput);
+  }else{
+    pCsr->nInput = nInput;
+  }
+
+  *pp = &pCsr->base;
+  UNUSED_PARAMETER(p);
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** simpleOpen() above.
+*/
+static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){
+  unicode_cursor *pCsr = (unicode_cursor *) pCursor;
+  sqlite3_free(pCsr->zToken);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** Extract the next token from a tokenization cursor.  The cursor must
+** have been opened by a prior call to simpleOpen().
+*/
+static int unicodeNext(
+  sqlite3_tokenizer_cursor *pC,   /* Cursor returned by simpleOpen */
+  const char **paToken,           /* OUT: Token text */
+  int *pnToken,                   /* OUT: Number of bytes at *paToken */
+  int *piStart,                   /* OUT: Starting offset of token */
+  int *piEnd,                     /* OUT: Ending offset of token */
+  int *piPos                      /* OUT: Position integer of token */
+){
+  unicode_cursor *pCsr = (unicode_cursor *)pC;
+  unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
+  int iCode = 0;
+  char *zOut;
+  const unsigned char *z = &pCsr->aInput[pCsr->iOff];
+  const unsigned char *zStart = z;
+  const unsigned char *zEnd;
+  const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
+
+  /* Scan past any delimiter characters before the start of the next token.
+  ** Return SQLITE_DONE early if this takes us all the way to the end of 
+  ** the input.  */
+  while( z<zTerm ){
+    READ_UTF8(z, zTerm, iCode);
+    if( unicodeIsAlnum(p, iCode) ) break;
+    zStart = z;
+  }
+  if( zStart>=zTerm ) return SQLITE_DONE;
+
+  zOut = pCsr->zToken;
+  do {
+    int iOut;
+
+    /* Grow the output buffer if required. */
+    if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){
+      char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64);
+      if( !zNew ) return SQLITE_NOMEM;
+      zOut = &zNew[zOut - pCsr->zToken];
+      pCsr->zToken = zNew;
+      pCsr->nAlloc += 64;
+    }
+
+    /* Write the folded case of the last character read to the output */
+    zEnd = z;
+    iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic);
+    if( iOut ){
+      WRITE_UTF8(zOut, iOut);
+    }
+
+    /* If the cursor is not at EOF, read the next character */
+    if( z>=zTerm ) break;
+    READ_UTF8(z, zTerm, iCode);
+  }while( unicodeIsAlnum(p, iCode) 
+       || sqlite3FtsUnicodeIsdiacritic(iCode)
+  );
+
+  /* Set the output variables and return. */
+  pCsr->iOff = (int)(z - pCsr->aInput);
+  *paToken = pCsr->zToken;
+  *pnToken = (int)(zOut - pCsr->zToken);
+  *piStart = (int)(zStart - pCsr->aInput);
+  *piEnd = (int)(zEnd - pCsr->aInput);
+  *piPos = pCsr->iToken++;
+  return SQLITE_OK;
+}
+
+/*
+** Set *ppModule to a pointer to the sqlite3_tokenizer_module 
+** structure for the unicode tokenizer.
+*/
+SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
+  static const sqlite3_tokenizer_module module = {
+    0,
+    unicodeCreate,
+    unicodeDestroy,
+    unicodeOpen,
+    unicodeClose,
+    unicodeNext,
+    0,
+  };
+  *ppModule = &module;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */
+
+/************** End of fts3_unicode.c ****************************************/
+/************** Begin file fts3_unicode2.c ***********************************/
+/*
+** 2012 May 25
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+/*
+** DO NOT EDIT THIS MACHINE GENERATED FILE.
+*/
+
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
+
+/* #include <assert.h> */
+
+/*
+** Return true if the argument corresponds to a unicode codepoint
+** classified as either a letter or a number. Otherwise false.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int c){
+  /* Each unsigned integer in the following array corresponds to a contiguous
+  ** range of unicode codepoints that are not either letters or numbers (i.e.
+  ** codepoints for which this function should return 0).
+  **
+  ** The most significant 22 bits in each 32-bit value contain the first 
+  ** codepoint in the range. The least significant 10 bits are used to store
+  ** the size of the range (always at least 1). In other words, the value 
+  ** ((C<<22) + N) represents a range of N codepoints starting with codepoint 
+  ** C. It is not possible to represent a range larger than 1023 codepoints 
+  ** using this format.
+  */
+  static const unsigned int aEntry[] = {
+    0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
+    0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
+    0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
+    0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
+    0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
+    0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
+    0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
+    0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
+    0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
+    0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
+    0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
+    0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
+    0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
+    0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
+    0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
+    0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
+    0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
+    0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
+    0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
+    0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
+    0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
+    0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
+    0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
+    0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
+    0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
+    0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
+    0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
+    0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
+    0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
+    0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
+    0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
+    0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
+    0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
+    0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
+    0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
+    0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
+    0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
+    0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
+    0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
+    0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
+    0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
+    0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
+    0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
+    0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
+    0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
+    0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
+    0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
+    0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
+    0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
+    0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
+    0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
+    0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
+    0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
+    0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
+    0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
+    0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
+    0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
+    0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
+    0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
+    0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
+    0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
+    0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
+    0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
+    0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
+    0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
+    0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
+    0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
+    0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
+    0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
+    0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
+    0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
+    0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
+    0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
+    0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
+    0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
+    0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
+    0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
+    0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
+    0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
+    0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
+    0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
+    0x380400F0,
+  };
+  static const unsigned int aAscii[4] = {
+    0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
+  };
+
+  if( c<128 ){
+    return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
+  }else if( c<(1<<22) ){
+    unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
+    int iRes = 0;
+    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+    int iLo = 0;
+    while( iHi>=iLo ){
+      int iTest = (iHi + iLo) / 2;
+      if( key >= aEntry[iTest] ){
+        iRes = iTest;
+        iLo = iTest+1;
+      }else{
+        iHi = iTest-1;
+      }
+    }
+    assert( aEntry[0]<key );
+    assert( key>=aEntry[iRes] );
+    return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
+  }
+  return 1;
+}
+
+
+/*
+** If the argument is a codepoint corresponding to a lowercase letter
+** in the ASCII range with a diacritic added, return the codepoint
+** of the ASCII letter only. For example, if passed 235 - "LATIN
+** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
+** E"). The resuls of passing a codepoint that corresponds to an
+** uppercase letter are undefined.
+*/
+static int remove_diacritic(int c){
+  unsigned short aDia[] = {
+        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995, 
+     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286, 
+     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732, 
+     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336, 
+     3456,  3696,  3712,  3728,  3744,  3896,  3912,  3928, 
+     3968,  4008,  4040,  4106,  4138,  4170,  4202,  4234, 
+     4266,  4296,  4312,  4344,  4408,  4424,  4472,  4504, 
+     6148,  6198,  6264,  6280,  6360,  6429,  6505,  6529, 
+    61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, 
+    61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, 
+    62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, 
+    62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, 
+    62924, 63050, 63082, 63274, 63390, 
+  };
+  char aChar[] = {
+    '\0', 'a',  'c',  'e',  'i',  'n',  'o',  'u',  'y',  'y',  'a',  'c',  
+    'd',  'e',  'e',  'g',  'h',  'i',  'j',  'k',  'l',  'n',  'o',  'r',  
+    's',  't',  'u',  'u',  'w',  'y',  'z',  'o',  'u',  'a',  'i',  'o',  
+    'u',  'g',  'k',  'o',  'j',  'g',  'n',  'a',  'e',  'i',  'o',  'r',  
+    'u',  's',  't',  'h',  'a',  'e',  'o',  'y',  '\0', '\0', '\0', '\0', 
+    '\0', '\0', '\0', '\0', 'a',  'b',  'd',  'd',  'e',  'f',  'g',  'h',  
+    'h',  'i',  'k',  'l',  'l',  'm',  'n',  'p',  'r',  'r',  's',  't',  
+    'u',  'v',  'w',  'w',  'x',  'y',  'z',  'h',  't',  'w',  'y',  'a',  
+    'e',  'i',  'o',  'u',  'y',  
+  };
+
+  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
+  int iRes = 0;
+  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
+  int iLo = 0;
+  while( iHi>=iLo ){
+    int iTest = (iHi + iLo) / 2;
+    if( key >= aDia[iTest] ){
+      iRes = iTest;
+      iLo = iTest+1;
+    }else{
+      iHi = iTest-1;
+    }
+  }
+  assert( key>=aDia[iRes] );
+  return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
+}
+
+
+/*
+** Return true if the argument interpreted as a unicode codepoint
+** is a diacritical modifier character.
+*/
+SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int c){
+  unsigned int mask0 = 0x08029FDF;
+  unsigned int mask1 = 0x000361F8;
+  if( c<768 || c>817 ) return 0;
+  return (c < 768+32) ?
+      (mask0 & (1 << (c-768))) :
+      (mask1 & (1 << (c-768-32)));
+}
+
+
+/*
+** Interpret the argument as a unicode codepoint. If the codepoint
+** is an upper case character that has a lower case equivalent,
+** return the codepoint corresponding to the lower case version.
+** Otherwise, return a copy of the argument.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
+  /* Each entry in the following array defines a rule for folding a range
+  ** of codepoints to lower case. The rule applies to a range of nRange
+  ** codepoints starting at codepoint iCode.
+  **
+  ** If the least significant bit in flags is clear, then the rule applies
+  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
+  ** need to be folded). Or, if it is set, then the rule only applies to
+  ** every second codepoint in the range, starting with codepoint C.
+  **
+  ** The 7 most significant bits in flags are an index into the aiOff[]
+  ** array. If a specific codepoint C does require folding, then its lower
+  ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
+  **
+  ** The contents of this array are generated by parsing the CaseFolding.txt
+  ** file distributed as part of the "Unicode Character Database". See
+  ** http://www.unicode.org for details.
+  */
+  static const struct TableEntry {
+    unsigned short iCode;
+    unsigned char flags;
+    unsigned char nRange;
+  } aEntry[] = {
+    {65, 14, 26},          {181, 64, 1},          {192, 14, 23},
+    {216, 14, 7},          {256, 1, 48},          {306, 1, 6},
+    {313, 1, 16},          {330, 1, 46},          {376, 116, 1},
+    {377, 1, 6},           {383, 104, 1},         {385, 50, 1},
+    {386, 1, 4},           {390, 44, 1},          {391, 0, 1},
+    {393, 42, 2},          {395, 0, 1},           {398, 32, 1},
+    {399, 38, 1},          {400, 40, 1},          {401, 0, 1},
+    {403, 42, 1},          {404, 46, 1},          {406, 52, 1},
+    {407, 48, 1},          {408, 0, 1},           {412, 52, 1},
+    {413, 54, 1},          {415, 56, 1},          {416, 1, 6},
+    {422, 60, 1},          {423, 0, 1},           {425, 60, 1},
+    {428, 0, 1},           {430, 60, 1},          {431, 0, 1},
+    {433, 58, 2},          {435, 1, 4},           {439, 62, 1},
+    {440, 0, 1},           {444, 0, 1},           {452, 2, 1},
+    {453, 0, 1},           {455, 2, 1},           {456, 0, 1},
+    {458, 2, 1},           {459, 1, 18},          {478, 1, 18},
+    {497, 2, 1},           {498, 1, 4},           {502, 122, 1},
+    {503, 134, 1},         {504, 1, 40},          {544, 110, 1},
+    {546, 1, 18},          {570, 70, 1},          {571, 0, 1},
+    {573, 108, 1},         {574, 68, 1},          {577, 0, 1},
+    {579, 106, 1},         {580, 28, 1},          {581, 30, 1},
+    {582, 1, 10},          {837, 36, 1},          {880, 1, 4},
+    {886, 0, 1},           {902, 18, 1},          {904, 16, 3},
+    {908, 26, 1},          {910, 24, 2},          {913, 14, 17},
+    {931, 14, 9},          {962, 0, 1},           {975, 4, 1},
+    {976, 140, 1},         {977, 142, 1},         {981, 146, 1},
+    {982, 144, 1},         {984, 1, 24},          {1008, 136, 1},
+    {1009, 138, 1},        {1012, 130, 1},        {1013, 128, 1},
+    {1015, 0, 1},          {1017, 152, 1},        {1018, 0, 1},
+    {1021, 110, 3},        {1024, 34, 16},        {1040, 14, 32},
+    {1120, 1, 34},         {1162, 1, 54},         {1216, 6, 1},
+    {1217, 1, 14},         {1232, 1, 88},         {1329, 22, 38},
+    {4256, 66, 38},        {4295, 66, 1},         {4301, 66, 1},
+    {7680, 1, 150},        {7835, 132, 1},        {7838, 96, 1},
+    {7840, 1, 96},         {7944, 150, 8},        {7960, 150, 6},
+    {7976, 150, 8},        {7992, 150, 8},        {8008, 150, 6},
+    {8025, 151, 8},        {8040, 150, 8},        {8072, 150, 8},
+    {8088, 150, 8},        {8104, 150, 8},        {8120, 150, 2},
+    {8122, 126, 2},        {8124, 148, 1},        {8126, 100, 1},
+    {8136, 124, 4},        {8140, 148, 1},        {8152, 150, 2},
+    {8154, 120, 2},        {8168, 150, 2},        {8170, 118, 2},
+    {8172, 152, 1},        {8184, 112, 2},        {8186, 114, 2},
+    {8188, 148, 1},        {8486, 98, 1},         {8490, 92, 1},
+    {8491, 94, 1},         {8498, 12, 1},         {8544, 8, 16},
+    {8579, 0, 1},          {9398, 10, 26},        {11264, 22, 47},
+    {11360, 0, 1},         {11362, 88, 1},        {11363, 102, 1},
+    {11364, 90, 1},        {11367, 1, 6},         {11373, 84, 1},
+    {11374, 86, 1},        {11375, 80, 1},        {11376, 82, 1},
+    {11378, 0, 1},         {11381, 0, 1},         {11390, 78, 2},
+    {11392, 1, 100},       {11499, 1, 4},         {11506, 0, 1},
+    {42560, 1, 46},        {42624, 1, 24},        {42786, 1, 14},
+    {42802, 1, 62},        {42873, 1, 4},         {42877, 76, 1},
+    {42878, 1, 10},        {42891, 0, 1},         {42893, 74, 1},
+    {42896, 1, 4},         {42912, 1, 10},        {42922, 72, 1},
+    {65313, 14, 26},       
+  };
+  static const unsigned short aiOff[] = {
+   1,     2,     8,     15,    16,    26,    28,    32,    
+   37,    38,    40,    48,    63,    64,    69,    71,    
+   79,    80,    116,   202,   203,   205,   206,   207,   
+   209,   210,   211,   213,   214,   217,   218,   219,   
+   775,   7264,  10792, 10795, 23228, 23256, 30204, 54721, 
+   54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 
+   57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 
+   65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 
+   65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 
+   65514, 65521, 65527, 65528, 65529, 
+  };
+
+  int ret = c;
+
+  assert( c>=0 );
+  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
+
+  if( c<128 ){
+    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
+  }else if( c<65536 ){
+    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+    int iLo = 0;
+    int iRes = -1;
+
+    while( iHi>=iLo ){
+      int iTest = (iHi + iLo) / 2;
+      int cmp = (c - aEntry[iTest].iCode);
+      if( cmp>=0 ){
+        iRes = iTest;
+        iLo = iTest+1;
+      }else{
+        iHi = iTest-1;
+      }
+    }
+    assert( iRes<0 || c>=aEntry[iRes].iCode );
+
+    if( iRes>=0 ){
+      const struct TableEntry *p = &aEntry[iRes];
+      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
+        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
+        assert( ret>0 );
+      }
+    }
+
+    if( bRemoveDiacritic ) ret = remove_diacritic(ret);
+  }
+  
+  else if( c>=66560 && c<66600 ){
+    ret = c + 40;
+  }
+
+  return ret;
+}
+#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
+#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */
+
+/************** End of fts3_unicode2.c ***************************************/
+/************** Begin file rtree.c *******************************************/
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code for implementations of the r-tree and r*-tree
+** algorithms packaged as an SQLite virtual table module.
+*/
+
+/*
+** Database Format of R-Tree Tables
+** --------------------------------
+**
+** The data structure for a single virtual r-tree table is stored in three 
+** native SQLite tables declared as follows. In each case, the '%' character
+** in the table name is replaced with the user-supplied name of the r-tree
+** table.
+**
+**   CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data BLOB)
+**   CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER)
+**   CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER)
+**
+** The data for each node of the r-tree structure is stored in the %_node
+** table. For each node that is not the root node of the r-tree, there is
+** an entry in the %_parent table associating the node with its parent.
+** And for each row of data in the table, there is an entry in the %_rowid
+** table that maps from the entries rowid to the id of the node that it
+** is stored on.
+**
+** The root node of an r-tree always exists, even if the r-tree table is
+** empty. The nodeno of the root node is always 1. All other nodes in the
+** table must be the same size as the root node. The content of each node
+** is formatted as follows:
+**
+**   1. If the node is the root node (node 1), then the first 2 bytes
+**      of the node contain the tree depth as a big-endian integer.
+**      For non-root nodes, the first 2 bytes are left unused.
+**
+**   2. The next 2 bytes contain the number of entries currently 
+**      stored in the node.
+**
+**   3. The remainder of the node contains the node entries. Each entry
+**      consists of a single 8-byte integer followed by an even number
+**      of 4-byte coordinates. For leaf nodes the integer is the rowid
+**      of a record. For internal nodes it is the node number of a
+**      child page.
+*/
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RTREE)
+
+#ifndef SQLITE_CORE
+  SQLITE_EXTENSION_INIT1
+#else
+#endif
+
+/* #include <string.h> */
+/* #include <assert.h> */
+/* #include <stdio.h> */
+
+#ifndef SQLITE_AMALGAMATION
+#include "sqlite3rtree.h"
+typedef sqlite3_int64 i64;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+#endif
+
+/*  The following macro is used to suppress compiler warnings.
+*/
+#ifndef UNUSED_PARAMETER
+# define UNUSED_PARAMETER(x) (void)(x)
+#endif
+
+typedef struct Rtree Rtree;
+typedef struct RtreeCursor RtreeCursor;
+typedef struct RtreeNode RtreeNode;
+typedef struct RtreeCell RtreeCell;
+typedef struct RtreeConstraint RtreeConstraint;
+typedef struct RtreeMatchArg RtreeMatchArg;
+typedef struct RtreeGeomCallback RtreeGeomCallback;
+typedef union RtreeCoord RtreeCoord;
+typedef struct RtreeSearchPoint RtreeSearchPoint;
+
+/* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */
+#define RTREE_MAX_DIMENSIONS 5
+
+/* Size of hash table Rtree.aHash. This hash table is not expected to
+** ever contain very many entries, so a fixed number of buckets is 
+** used.
+*/
+#define HASHSIZE 97
+
+/* The xBestIndex method of this virtual table requires an estimate of
+** the number of rows in the virtual table to calculate the costs of
+** various strategies. If possible, this estimate is loaded from the
+** sqlite_stat1 table (with RTREE_MIN_ROWEST as a hard-coded minimum).
+** Otherwise, if no sqlite_stat1 entry is available, use 
+** RTREE_DEFAULT_ROWEST.
+*/
+#define RTREE_DEFAULT_ROWEST 1048576
+#define RTREE_MIN_ROWEST         100
+
+/* 
+** An rtree virtual-table object.
+*/
+struct Rtree {
+  sqlite3_vtab base;          /* Base class.  Must be first */
+  sqlite3 *db;                /* Host database connection */
+  int iNodeSize;              /* Size in bytes of each node in the node table */
+  u8 nDim;                    /* Number of dimensions */
+  u8 eCoordType;              /* RTREE_COORD_REAL32 or RTREE_COORD_INT32 */
+  u8 nBytesPerCell;           /* Bytes consumed per cell */
+  int iDepth;                 /* Current depth of the r-tree structure */
+  char *zDb;                  /* Name of database containing r-tree table */
+  char *zName;                /* Name of r-tree table */ 
+  int nBusy;                  /* Current number of users of this structure */
+  i64 nRowEst;                /* Estimated number of rows in this table */
+
+  /* List of nodes removed during a CondenseTree operation. List is
+  ** linked together via the pointer normally used for hash chains -
+  ** RtreeNode.pNext. RtreeNode.iNode stores the depth of the sub-tree 
+  ** headed by the node (leaf nodes have RtreeNode.iNode==0).
+  */
+  RtreeNode *pDeleted;
+  int iReinsertHeight;        /* Height of sub-trees Reinsert() has run on */
+
+  /* Statements to read/write/delete a record from xxx_node */
+  sqlite3_stmt *pReadNode;
+  sqlite3_stmt *pWriteNode;
+  sqlite3_stmt *pDeleteNode;
+
+  /* Statements to read/write/delete a record from xxx_rowid */
+  sqlite3_stmt *pReadRowid;
+  sqlite3_stmt *pWriteRowid;
+  sqlite3_stmt *pDeleteRowid;
+
+  /* Statements to read/write/delete a record from xxx_parent */
+  sqlite3_stmt *pReadParent;
+  sqlite3_stmt *pWriteParent;
+  sqlite3_stmt *pDeleteParent;
+
+  RtreeNode *aHash[HASHSIZE]; /* Hash table of in-memory nodes. */ 
+};
+
+/* Possible values for Rtree.eCoordType: */
+#define RTREE_COORD_REAL32 0
+#define RTREE_COORD_INT32  1
+
+/*
+** If SQLITE_RTREE_INT_ONLY is defined, then this virtual table will
+** only deal with integer coordinates.  No floating point operations
+** will be done.
+*/
+#ifdef SQLITE_RTREE_INT_ONLY
+  typedef sqlite3_int64 RtreeDValue;       /* High accuracy coordinate */
+  typedef int RtreeValue;                  /* Low accuracy coordinate */
+# define RTREE_ZERO 0
+#else
+  typedef double RtreeDValue;              /* High accuracy coordinate */
+  typedef float RtreeValue;                /* Low accuracy coordinate */
+# define RTREE_ZERO 0.0
+#endif
+
+/*
+** When doing a search of an r-tree, instances of the following structure
+** record intermediate results from the tree walk.
+**
+** The id is always a node-id.  For iLevel>=1 the id is the node-id of
+** the node that the RtreeSearchPoint represents.  When iLevel==0, however,
+** the id is of the parent node and the cell that RtreeSearchPoint
+** represents is the iCell-th entry in the parent node.
+*/
+struct RtreeSearchPoint {
+  RtreeDValue rScore;    /* The score for this node.  Smallest goes first. */
+  sqlite3_int64 id;      /* Node ID */
+  u8 iLevel;             /* 0=entries.  1=leaf node.  2+ for higher */
+  u8 eWithin;            /* PARTLY_WITHIN or FULLY_WITHIN */
+  u8 iCell;              /* Cell index within the node */
+};
+
+/*
+** The minimum number of cells allowed for a node is a third of the 
+** maximum. In Gutman's notation:
+**
+**     m = M/3
+**
+** If an R*-tree "Reinsert" operation is required, the same number of
+** cells are removed from the overfull node and reinserted into the tree.
+*/
+#define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3)
+#define RTREE_REINSERT(p) RTREE_MINCELLS(p)
+#define RTREE_MAXCELLS 51
+
+/*
+** The smallest possible node-size is (512-64)==448 bytes. And the largest
+** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates).
+** Therefore all non-root nodes must contain at least 3 entries. Since 
+** 2^40 is greater than 2^64, an r-tree structure always has a depth of
+** 40 or less.
+*/
+#define RTREE_MAX_DEPTH 40
+
+
+/*
+** Number of entries in the cursor RtreeNode cache.  The first entry is
+** used to cache the RtreeNode for RtreeCursor.sPoint.  The remaining
+** entries cache the RtreeNode for the first elements of the priority queue.
+*/
+#define RTREE_CACHE_SZ  5
+
+/* 
+** An rtree cursor object.
+*/
+struct RtreeCursor {
+  sqlite3_vtab_cursor base;         /* Base class.  Must be first */
+  u8 atEOF;                         /* True if at end of search */
+  u8 bPoint;                        /* True if sPoint is valid */
+  int iStrategy;                    /* Copy of idxNum search parameter */
+  int nConstraint;                  /* Number of entries in aConstraint */
+  RtreeConstraint *aConstraint;     /* Search constraints. */
+  int nPointAlloc;                  /* Number of slots allocated for aPoint[] */
+  int nPoint;                       /* Number of slots used in aPoint[] */
+  int mxLevel;                      /* iLevel value for root of the tree */
+  RtreeSearchPoint *aPoint;         /* Priority queue for search points */
+  RtreeSearchPoint sPoint;          /* Cached next search point */
+  RtreeNode *aNode[RTREE_CACHE_SZ]; /* Rtree node cache */
+  u32 anQueue[RTREE_MAX_DEPTH+1];   /* Number of queued entries by iLevel */
+};
+
+/* Return the Rtree of a RtreeCursor */
+#define RTREE_OF_CURSOR(X)   ((Rtree*)((X)->base.pVtab))
+
+/*
+** A coordinate can be either a floating point number or a integer.  All
+** coordinates within a single R-Tree are always of the same time.
+*/
+union RtreeCoord {
+  RtreeValue f;      /* Floating point value */
+  int i;             /* Integer value */
+  u32 u;             /* Unsigned for byte-order conversions */
+};
+
+/*
+** The argument is an RtreeCoord. Return the value stored within the RtreeCoord
+** formatted as a RtreeDValue (double or int64). This macro assumes that local
+** variable pRtree points to the Rtree structure associated with the
+** RtreeCoord.
+*/
+#ifdef SQLITE_RTREE_INT_ONLY
+# define DCOORD(coord) ((RtreeDValue)coord.i)
+#else
+# define DCOORD(coord) (                           \
+    (pRtree->eCoordType==RTREE_COORD_REAL32) ?      \
+      ((double)coord.f) :                           \
+      ((double)coord.i)                             \
+  )
+#endif
+
+/*
+** A search constraint.
+*/
+struct RtreeConstraint {
+  int iCoord;                     /* Index of constrained coordinate */
+  int op;                         /* Constraining operation */
+  union {
+    RtreeDValue rValue;             /* Constraint value. */
+    int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*);
+    int (*xQueryFunc)(sqlite3_rtree_query_info*);
+  } u;
+  sqlite3_rtree_query_info *pInfo;  /* xGeom and xQueryFunc argument */
+};
+
+/* Possible values for RtreeConstraint.op */
+#define RTREE_EQ    0x41  /* A */
+#define RTREE_LE    0x42  /* B */
+#define RTREE_LT    0x43  /* C */
+#define RTREE_GE    0x44  /* D */
+#define RTREE_GT    0x45  /* E */
+#define RTREE_MATCH 0x46  /* F: Old-style sqlite3_rtree_geometry_callback() */
+#define RTREE_QUERY 0x47  /* G: New-style sqlite3_rtree_query_callback() */
+
+
+/* 
+** An rtree structure node.
+*/
+struct RtreeNode {
+  RtreeNode *pParent;         /* Parent node */
+  i64 iNode;                  /* The node number */
+  int nRef;                   /* Number of references to this node */
+  int isDirty;                /* True if the node needs to be written to disk */
+  u8 *zData;                  /* Content of the node, as should be on disk */
+  RtreeNode *pNext;           /* Next node in this hash collision chain */
+};
+
+/* Return the number of cells in a node  */
+#define NCELL(pNode) readInt16(&(pNode)->zData[2])
+
+/* 
+** A single cell from a node, deserialized
+*/
+struct RtreeCell {
+  i64 iRowid;                                 /* Node or entry ID */
+  RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2];  /* Bounding box coordinates */
+};
+
+
+/*
+** This object becomes the sqlite3_user_data() for the SQL functions
+** that are created by sqlite3_rtree_geometry_callback() and
+** sqlite3_rtree_query_callback() and which appear on the right of MATCH
+** operators in order to constrain a search.
+**
+** xGeom and xQueryFunc are the callback functions.  Exactly one of 
+** xGeom and xQueryFunc fields is non-NULL, depending on whether the
+** SQL function was created using sqlite3_rtree_geometry_callback() or
+** sqlite3_rtree_query_callback().
+** 
+** This object is deleted automatically by the destructor mechanism in
+** sqlite3_create_function_v2().
+*/
+struct RtreeGeomCallback {
+  int (*xGeom)(sqlite3_rtree_geometry*, int, RtreeDValue*, int*);
+  int (*xQueryFunc)(sqlite3_rtree_query_info*);
+  void (*xDestructor)(void*);
+  void *pContext;
+};
+
+
+/*
+** Value for the first field of every RtreeMatchArg object. The MATCH
+** operator tests that the first field of a blob operand matches this
+** value to avoid operating on invalid blobs (which could cause a segfault).
+*/
+#define RTREE_GEOMETRY_MAGIC 0x891245AB
+
+/*
+** An instance of this structure (in the form of a BLOB) is returned by
+** the SQL functions that sqlite3_rtree_geometry_callback() and
+** sqlite3_rtree_query_callback() create, and is read as the right-hand
+** operand to the MATCH operator of an R-Tree.
+*/
+struct RtreeMatchArg {
+  u32 magic;                  /* Always RTREE_GEOMETRY_MAGIC */
+  RtreeGeomCallback cb;       /* Info about the callback functions */
+  int nParam;                 /* Number of parameters to the SQL function */
+  RtreeDValue aParam[1];      /* Values for parameters to the SQL function */
+};
+
+#ifndef MAX
+# define MAX(x,y) ((x) < (y) ? (y) : (x))
+#endif
+#ifndef MIN
+# define MIN(x,y) ((x) > (y) ? (y) : (x))
+#endif
+
+/*
+** Functions to deserialize a 16 bit integer, 32 bit real number and
+** 64 bit integer. The deserialized value is returned.
+*/
+static int readInt16(u8 *p){
+  return (p[0]<<8) + p[1];
+}
+static void readCoord(u8 *p, RtreeCoord *pCoord){
+  pCoord->u = (
+    (((u32)p[0]) << 24) + 
+    (((u32)p[1]) << 16) + 
+    (((u32)p[2]) <<  8) + 
+    (((u32)p[3]) <<  0)
+  );
+}
+static i64 readInt64(u8 *p){
+  return (
+    (((i64)p[0]) << 56) + 
+    (((i64)p[1]) << 48) + 
+    (((i64)p[2]) << 40) + 
+    (((i64)p[3]) << 32) + 
+    (((i64)p[4]) << 24) + 
+    (((i64)p[5]) << 16) + 
+    (((i64)p[6]) <<  8) + 
+    (((i64)p[7]) <<  0)
+  );
+}
+
+/*
+** Functions to serialize a 16 bit integer, 32 bit real number and
+** 64 bit integer. The value returned is the number of bytes written
+** to the argument buffer (always 2, 4 and 8 respectively).
+*/
+static int writeInt16(u8 *p, int i){
+  p[0] = (i>> 8)&0xFF;
+  p[1] = (i>> 0)&0xFF;
+  return 2;
+}
+static int writeCoord(u8 *p, RtreeCoord *pCoord){
+  u32 i;
+  assert( sizeof(RtreeCoord)==4 );
+  assert( sizeof(u32)==4 );
+  i = pCoord->u;
+  p[0] = (i>>24)&0xFF;
+  p[1] = (i>>16)&0xFF;
+  p[2] = (i>> 8)&0xFF;
+  p[3] = (i>> 0)&0xFF;
+  return 4;
+}
+static int writeInt64(u8 *p, i64 i){
+  p[0] = (i>>56)&0xFF;
+  p[1] = (i>>48)&0xFF;
+  p[2] = (i>>40)&0xFF;
+  p[3] = (i>>32)&0xFF;
+  p[4] = (i>>24)&0xFF;
+  p[5] = (i>>16)&0xFF;
+  p[6] = (i>> 8)&0xFF;
+  p[7] = (i>> 0)&0xFF;
+  return 8;
+}
+
+/*
+** Increment the reference count of node p.
+*/
+static void nodeReference(RtreeNode *p){
+  if( p ){
+    p->nRef++;
+  }
+}
+
+/*
+** Clear the content of node p (set all bytes to 0x00).
+*/
+static void nodeZero(Rtree *pRtree, RtreeNode *p){
+  memset(&p->zData[2], 0, pRtree->iNodeSize-2);
+  p->isDirty = 1;
+}
+
+/*
+** Given a node number iNode, return the corresponding key to use
+** in the Rtree.aHash table.
+*/
+static int nodeHash(i64 iNode){
+  return iNode % HASHSIZE;
+}
+
+/*
+** Search the node hash table for node iNode. If found, return a pointer
+** to it. Otherwise, return 0.
+*/
+static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){
+  RtreeNode *p;
+  for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext);
+  return p;
+}
+
+/*
+** Add node pNode to the node hash table.
+*/
+static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){
+  int iHash;
+  assert( pNode->pNext==0 );
+  iHash = nodeHash(pNode->iNode);
+  pNode->pNext = pRtree->aHash[iHash];
+  pRtree->aHash[iHash] = pNode;
+}
+
+/*
+** Remove node pNode from the node hash table.
+*/
+static void nodeHashDelete(Rtree *pRtree, RtreeNode *pNode){
+  RtreeNode **pp;
+  if( pNode->iNode!=0 ){
+    pp = &pRtree->aHash[nodeHash(pNode->iNode)];
+    for( ; (*pp)!=pNode; pp = &(*pp)->pNext){ assert(*pp); }
+    *pp = pNode->pNext;
+    pNode->pNext = 0;
+  }
+}
+
+/*
+** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0),
+** indicating that node has not yet been assigned a node number. It is
+** assigned a node number when nodeWrite() is called to write the
+** node contents out to the database.
+*/
+static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){
+  RtreeNode *pNode;
+  pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize);
+  if( pNode ){
+    memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize);
+    pNode->zData = (u8 *)&pNode[1];
+    pNode->nRef = 1;
+    pNode->pParent = pParent;
+    pNode->isDirty = 1;
+    nodeReference(pParent);
+  }
+  return pNode;
+}
+
+/*
+** Obtain a reference to an r-tree node.
+*/
+static int nodeAcquire(
+  Rtree *pRtree,             /* R-tree structure */
+  i64 iNode,                 /* Node number to load */
+  RtreeNode *pParent,        /* Either the parent node or NULL */
+  RtreeNode **ppNode         /* OUT: Acquired node */
+){
+  int rc;
+  int rc2 = SQLITE_OK;
+  RtreeNode *pNode;
+
+  /* Check if the requested node is already in the hash table. If so,
+  ** increase its reference count and return it.
+  */
+  if( (pNode = nodeHashLookup(pRtree, iNode)) ){
+    assert( !pParent || !pNode->pParent || pNode->pParent==pParent );
+    if( pParent && !pNode->pParent ){
+      nodeReference(pParent);
+      pNode->pParent = pParent;
+    }
+    pNode->nRef++;
+    *ppNode = pNode;
+    return SQLITE_OK;
+  }
+
+  sqlite3_bind_int64(pRtree->pReadNode, 1, iNode);
+  rc = sqlite3_step(pRtree->pReadNode);
+  if( rc==SQLITE_ROW ){
+    const u8 *zBlob = sqlite3_column_blob(pRtree->pReadNode, 0);
+    if( pRtree->iNodeSize==sqlite3_column_bytes(pRtree->pReadNode, 0) ){
+      pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode)+pRtree->iNodeSize);
+      if( !pNode ){
+        rc2 = SQLITE_NOMEM;
+      }else{
+        pNode->pParent = pParent;
+        pNode->zData = (u8 *)&pNode[1];
+        pNode->nRef = 1;
+        pNode->iNode = iNode;
+        pNode->isDirty = 0;
+        pNode->pNext = 0;
+        memcpy(pNode->zData, zBlob, pRtree->iNodeSize);
+        nodeReference(pParent);
+      }
+    }
+  }
+  rc = sqlite3_reset(pRtree->pReadNode);
+  if( rc==SQLITE_OK ) rc = rc2;
+
+  /* If the root node was just loaded, set pRtree->iDepth to the height
+  ** of the r-tree structure. A height of zero means all data is stored on
+  ** the root node. A height of one means the children of the root node
+  ** are the leaves, and so on. If the depth as specified on the root node
+  ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt.
+  */
+  if( pNode && iNode==1 ){
+    pRtree->iDepth = readInt16(pNode->zData);
+    if( pRtree->iDepth>RTREE_MAX_DEPTH ){
+      rc = SQLITE_CORRUPT_VTAB;
+    }
+  }
+
+  /* If no error has occurred so far, check if the "number of entries"
+  ** field on the node is too large. If so, set the return code to 
+  ** SQLITE_CORRUPT_VTAB.
+  */
+  if( pNode && rc==SQLITE_OK ){
+    if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){
+      rc = SQLITE_CORRUPT_VTAB;
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    if( pNode!=0 ){
+      nodeHashInsert(pRtree, pNode);
+    }else{
+      rc = SQLITE_CORRUPT_VTAB;
+    }
+    *ppNode = pNode;
+  }else{
+    sqlite3_free(pNode);
+    *ppNode = 0;
+  }
+
+  return rc;
+}
+
+/*
+** Overwrite cell iCell of node pNode with the contents of pCell.
+*/
+static void nodeOverwriteCell(
+  Rtree *pRtree,             /* The overall R-Tree */
+  RtreeNode *pNode,          /* The node into which the cell is to be written */
+  RtreeCell *pCell,          /* The cell to write */
+  int iCell                  /* Index into pNode into which pCell is written */
+){
+  int ii;
+  u8 *p = &pNode->zData[4 + pRtree->nBytesPerCell*iCell];
+  p += writeInt64(p, pCell->iRowid);
+  for(ii=0; ii<(pRtree->nDim*2); ii++){
+    p += writeCoord(p, &pCell->aCoord[ii]);
+  }
+  pNode->isDirty = 1;
+}
+
+/*
+** Remove the cell with index iCell from node pNode.
+*/
+static void nodeDeleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell){
+  u8 *pDst = &pNode->zData[4 + pRtree->nBytesPerCell*iCell];
+  u8 *pSrc = &pDst[pRtree->nBytesPerCell];
+  int nByte = (NCELL(pNode) - iCell - 1) * pRtree->nBytesPerCell;
+  memmove(pDst, pSrc, nByte);
+  writeInt16(&pNode->zData[2], NCELL(pNode)-1);
+  pNode->isDirty = 1;
+}
+
+/*
+** Insert the contents of cell pCell into node pNode. If the insert
+** is successful, return SQLITE_OK.
+**
+** If there is not enough free space in pNode, return SQLITE_FULL.
+*/
+static int nodeInsertCell(
+  Rtree *pRtree,                /* The overall R-Tree */
+  RtreeNode *pNode,             /* Write new cell into this node */
+  RtreeCell *pCell              /* The cell to be inserted */
+){
+  int nCell;                    /* Current number of cells in pNode */
+  int nMaxCell;                 /* Maximum number of cells for pNode */
+
+  nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell;
+  nCell = NCELL(pNode);
+
+  assert( nCell<=nMaxCell );
+  if( nCell<nMaxCell ){
+    nodeOverwriteCell(pRtree, pNode, pCell, nCell);
+    writeInt16(&pNode->zData[2], nCell+1);
+    pNode->isDirty = 1;
+  }
+
+  return (nCell==nMaxCell);
+}
+
+/*
+** If the node is dirty, write it out to the database.
+*/
+static int nodeWrite(Rtree *pRtree, RtreeNode *pNode){
+  int rc = SQLITE_OK;
+  if( pNode->isDirty ){
+    sqlite3_stmt *p = pRtree->pWriteNode;
+    if( pNode->iNode ){
+      sqlite3_bind_int64(p, 1, pNode->iNode);
+    }else{
+      sqlite3_bind_null(p, 1);
+    }
+    sqlite3_bind_blob(p, 2, pNode->zData, pRtree->iNodeSize, SQLITE_STATIC);
+    sqlite3_step(p);
+    pNode->isDirty = 0;
+    rc = sqlite3_reset(p);
+    if( pNode->iNode==0 && rc==SQLITE_OK ){
+      pNode->iNode = sqlite3_last_insert_rowid(pRtree->db);
+      nodeHashInsert(pRtree, pNode);
+    }
+  }
+  return rc;
+}
+
+/*
+** Release a reference to a node. If the node is dirty and the reference
+** count drops to zero, the node data is written to the database.
+*/
+static int nodeRelease(Rtree *pRtree, RtreeNode *pNode){
+  int rc = SQLITE_OK;
+  if( pNode ){
+    assert( pNode->nRef>0 );
+    pNode->nRef--;
+    if( pNode->nRef==0 ){
+      if( pNode->iNode==1 ){
+        pRtree->iDepth = -1;
+      }
+      if( pNode->pParent ){
+        rc = nodeRelease(pRtree, pNode->pParent);
+      }
+      if( rc==SQLITE_OK ){
+        rc = nodeWrite(pRtree, pNode);
+      }
+      nodeHashDelete(pRtree, pNode);
+      sqlite3_free(pNode);
+    }
+  }
+  return rc;
+}
+
+/*
+** Return the 64-bit integer value associated with cell iCell of
+** node pNode. If pNode is a leaf node, this is a rowid. If it is
+** an internal node, then the 64-bit integer is a child page number.
+*/
+static i64 nodeGetRowid(
+  Rtree *pRtree,       /* The overall R-Tree */
+  RtreeNode *pNode,    /* The node from which to extract the ID */
+  int iCell            /* The cell index from which to extract the ID */
+){
+  assert( iCell<NCELL(pNode) );
+  return readInt64(&pNode->zData[4 + pRtree->nBytesPerCell*iCell]);
+}
+
+/*
+** Return coordinate iCoord from cell iCell in node pNode.
+*/
+static void nodeGetCoord(
+  Rtree *pRtree,               /* The overall R-Tree */
+  RtreeNode *pNode,            /* The node from which to extract a coordinate */
+  int iCell,                   /* The index of the cell within the node */
+  int iCoord,                  /* Which coordinate to extract */
+  RtreeCoord *pCoord           /* OUT: Space to write result to */
+){
+  readCoord(&pNode->zData[12 + pRtree->nBytesPerCell*iCell + 4*iCoord], pCoord);
+}
+
+/*
+** Deserialize cell iCell of node pNode. Populate the structure pointed
+** to by pCell with the results.
+*/
+static void nodeGetCell(
+  Rtree *pRtree,               /* The overall R-Tree */
+  RtreeNode *pNode,            /* The node containing the cell to be read */
+  int iCell,                   /* Index of the cell within the node */
+  RtreeCell *pCell             /* OUT: Write the cell contents here */
+){
+  u8 *pData;
+  RtreeCoord *pCoord;
+  int ii;
+  pCell->iRowid = nodeGetRowid(pRtree, pNode, iCell);
+  pData = pNode->zData + (12 + pRtree->nBytesPerCell*iCell);
+  pCoord = pCell->aCoord;
+  for(ii=0; ii<pRtree->nDim*2; ii++){
+    readCoord(&pData[ii*4], &pCoord[ii]);
+  }
+}
+
+
+/* Forward declaration for the function that does the work of
+** the virtual table module xCreate() and xConnect() methods.
+*/
+static int rtreeInit(
+  sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **, int
+);
+
+/* 
+** Rtree virtual table module xCreate method.
+*/
+static int rtreeCreate(
+  sqlite3 *db,
+  void *pAux,
+  int argc, const char *const*argv,
+  sqlite3_vtab **ppVtab,
+  char **pzErr
+){
+  return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 1);
+}
+
+/* 
+** Rtree virtual table module xConnect method.
+*/
+static int rtreeConnect(
+  sqlite3 *db,
+  void *pAux,
+  int argc, const char *const*argv,
+  sqlite3_vtab **ppVtab,
+  char **pzErr
+){
+  return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 0);
+}
+
+/*
+** Increment the r-tree reference count.
+*/
+static void rtreeReference(Rtree *pRtree){
+  pRtree->nBusy++;
+}
+
+/*
+** Decrement the r-tree reference count. When the reference count reaches
+** zero the structure is deleted.
+*/
+static void rtreeRelease(Rtree *pRtree){
+  pRtree->nBusy--;
+  if( pRtree->nBusy==0 ){
+    sqlite3_finalize(pRtree->pReadNode);
+    sqlite3_finalize(pRtree->pWriteNode);
+    sqlite3_finalize(pRtree->pDeleteNode);
+    sqlite3_finalize(pRtree->pReadRowid);
+    sqlite3_finalize(pRtree->pWriteRowid);
+    sqlite3_finalize(pRtree->pDeleteRowid);
+    sqlite3_finalize(pRtree->pReadParent);
+    sqlite3_finalize(pRtree->pWriteParent);
+    sqlite3_finalize(pRtree->pDeleteParent);
+    sqlite3_free(pRtree);
+  }
+}
+
+/* 
+** Rtree virtual table module xDisconnect method.
+*/
+static int rtreeDisconnect(sqlite3_vtab *pVtab){
+  rtreeRelease((Rtree *)pVtab);
+  return SQLITE_OK;
+}
+
+/* 
+** Rtree virtual table module xDestroy method.
+*/
+static int rtreeDestroy(sqlite3_vtab *pVtab){
+  Rtree *pRtree = (Rtree *)pVtab;
+  int rc;
+  char *zCreate = sqlite3_mprintf(
+    "DROP TABLE '%q'.'%q_node';"
+    "DROP TABLE '%q'.'%q_rowid';"
+    "DROP TABLE '%q'.'%q_parent';",
+    pRtree->zDb, pRtree->zName, 
+    pRtree->zDb, pRtree->zName,
+    pRtree->zDb, pRtree->zName
+  );
+  if( !zCreate ){
+    rc = SQLITE_NOMEM;
+  }else{
+    rc = sqlite3_exec(pRtree->db, zCreate, 0, 0, 0);
+    sqlite3_free(zCreate);
+  }
+  if( rc==SQLITE_OK ){
+    rtreeRelease(pRtree);
+  }
+
+  return rc;
+}
+
+/* 
+** Rtree virtual table module xOpen method.
+*/
+static int rtreeOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
+  int rc = SQLITE_NOMEM;
+  RtreeCursor *pCsr;
+
+  pCsr = (RtreeCursor *)sqlite3_malloc(sizeof(RtreeCursor));
+  if( pCsr ){
+    memset(pCsr, 0, sizeof(RtreeCursor));
+    pCsr->base.pVtab = pVTab;
+    rc = SQLITE_OK;
+  }
+  *ppCursor = (sqlite3_vtab_cursor *)pCsr;
+
+  return rc;
+}
+
+
+/*
+** Free the RtreeCursor.aConstraint[] array and its contents.
+*/
+static void freeCursorConstraints(RtreeCursor *pCsr){
+  if( pCsr->aConstraint ){
+    int i;                        /* Used to iterate through constraint array */
+    for(i=0; i<pCsr->nConstraint; i++){
+      sqlite3_rtree_query_info *pInfo = pCsr->aConstraint[i].pInfo;
+      if( pInfo ){
+        if( pInfo->xDelUser ) pInfo->xDelUser(pInfo->pUser);
+        sqlite3_free(pInfo);
+      }
+    }
+    sqlite3_free(pCsr->aConstraint);
+    pCsr->aConstraint = 0;
+  }
+}
+
+/* 
+** Rtree virtual table module xClose method.
+*/
+static int rtreeClose(sqlite3_vtab_cursor *cur){
+  Rtree *pRtree = (Rtree *)(cur->pVtab);
+  int ii;
+  RtreeCursor *pCsr = (RtreeCursor *)cur;
+  freeCursorConstraints(pCsr);
+  sqlite3_free(pCsr->aPoint);
+  for(ii=0; ii<RTREE_CACHE_SZ; ii++) nodeRelease(pRtree, pCsr->aNode[ii]);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** Rtree virtual table module xEof method.
+**
+** Return non-zero if the cursor does not currently point to a valid 
+** record (i.e if the scan has finished), or zero otherwise.
+*/
+static int rtreeEof(sqlite3_vtab_cursor *cur){
+  RtreeCursor *pCsr = (RtreeCursor *)cur;
+  return pCsr->atEOF;
+}
+
+/*
+** Convert raw bits from the on-disk RTree record into a coordinate value.
+** The on-disk format is big-endian and needs to be converted for little-
+** endian platforms.  The on-disk record stores integer coordinates if
+** eInt is true and it stores 32-bit floating point records if eInt is
+** false.  a[] is the four bytes of the on-disk record to be decoded.
+** Store the results in "r".
+**
+** There are three versions of this macro, one each for little-endian and
+** big-endian processors and a third generic implementation.  The endian-
+** specific implementations are much faster and are preferred if the
+** processor endianness is known at compile-time.  The SQLITE_BYTEORDER
+** macro is part of sqliteInt.h and hence the endian-specific
+** implementation will only be used if this module is compiled as part
+** of the amalgamation.
+*/
+#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234
+#define RTREE_DECODE_COORD(eInt, a, r) {                        \
+    RtreeCoord c;    /* Coordinate decoded */                   \
+    memcpy(&c.u,a,4);                                           \
+    c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)|                   \
+          ((c.u&0xff)<<24)|((c.u&0xff00)<<8);                   \
+    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
+}
+#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321
+#define RTREE_DECODE_COORD(eInt, a, r) {                        \
+    RtreeCoord c;    /* Coordinate decoded */                   \
+    memcpy(&c.u,a,4);                                           \
+    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
+}
+#else
+#define RTREE_DECODE_COORD(eInt, a, r) {                        \
+    RtreeCoord c;    /* Coordinate decoded */                   \
+    c.u = ((u32)a[0]<<24) + ((u32)a[1]<<16)                     \
+           +((u32)a[2]<<8) + a[3];                              \
+    r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \
+}
+#endif
+
+/*
+** Check the RTree node or entry given by pCellData and p against the MATCH
+** constraint pConstraint.  
+*/
+static int rtreeCallbackConstraint(
+  RtreeConstraint *pConstraint,  /* The constraint to test */
+  int eInt,                      /* True if RTree holding integer coordinates */
+  u8 *pCellData,                 /* Raw cell content */
+  RtreeSearchPoint *pSearch,     /* Container of this cell */
+  sqlite3_rtree_dbl *prScore,    /* OUT: score for the cell */
+  int *peWithin                  /* OUT: visibility of the cell */
+){
+  int i;                                                /* Loop counter */
+  sqlite3_rtree_query_info *pInfo = pConstraint->pInfo; /* Callback info */
+  int nCoord = pInfo->nCoord;                           /* No. of coordinates */
+  int rc;                                             /* Callback return code */
+  sqlite3_rtree_dbl aCoord[RTREE_MAX_DIMENSIONS*2];   /* Decoded coordinates */
+
+  assert( pConstraint->op==RTREE_MATCH || pConstraint->op==RTREE_QUERY );
+  assert( nCoord==2 || nCoord==4 || nCoord==6 || nCoord==8 || nCoord==10 );
+
+  if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){
+    pInfo->iRowid = readInt64(pCellData);
+  }
+  pCellData += 8;
+  for(i=0; i<nCoord; i++, pCellData += 4){
+    RTREE_DECODE_COORD(eInt, pCellData, aCoord[i]);
+  }
+  if( pConstraint->op==RTREE_MATCH ){
+    rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo,
+                              nCoord, aCoord, &i);
+    if( i==0 ) *peWithin = NOT_WITHIN;
+    *prScore = RTREE_ZERO;
+  }else{
+    pInfo->aCoord = aCoord;
+    pInfo->iLevel = pSearch->iLevel - 1;
+    pInfo->rScore = pInfo->rParentScore = pSearch->rScore;
+    pInfo->eWithin = pInfo->eParentWithin = pSearch->eWithin;
+    rc = pConstraint->u.xQueryFunc(pInfo);
+    if( pInfo->eWithin<*peWithin ) *peWithin = pInfo->eWithin;
+    if( pInfo->rScore<*prScore || *prScore<RTREE_ZERO ){
+      *prScore = pInfo->rScore;
+    }
+  }
+  return rc;
+}
+
+/* 
+** Check the internal RTree node given by pCellData against constraint p.
+** If this constraint cannot be satisfied by any child within the node,
+** set *peWithin to NOT_WITHIN.
+*/
+static void rtreeNonleafConstraint(
+  RtreeConstraint *p,        /* The constraint to test */
+  int eInt,                  /* True if RTree holds integer coordinates */
+  u8 *pCellData,             /* Raw cell content as appears on disk */
+  int *peWithin              /* Adjust downward, as appropriate */
+){
+  sqlite3_rtree_dbl val;     /* Coordinate value convert to a double */
+
+  /* p->iCoord might point to either a lower or upper bound coordinate
+  ** in a coordinate pair.  But make pCellData point to the lower bound.
+  */
+  pCellData += 8 + 4*(p->iCoord&0xfe);
+
+  assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE 
+      || p->op==RTREE_GT || p->op==RTREE_EQ );
+  switch( p->op ){
+    case RTREE_LE:
+    case RTREE_LT:
+    case RTREE_EQ:
+      RTREE_DECODE_COORD(eInt, pCellData, val);
+      /* val now holds the lower bound of the coordinate pair */
+      if( p->u.rValue>=val ) return;
+      if( p->op!=RTREE_EQ ) break;  /* RTREE_LE and RTREE_LT end here */
+      /* Fall through for the RTREE_EQ case */
+
+    default: /* RTREE_GT or RTREE_GE,  or fallthrough of RTREE_EQ */
+      pCellData += 4;
+      RTREE_DECODE_COORD(eInt, pCellData, val);
+      /* val now holds the upper bound of the coordinate pair */
+      if( p->u.rValue<=val ) return;
+  }
+  *peWithin = NOT_WITHIN;
+}
+
+/*
+** Check the leaf RTree cell given by pCellData against constraint p.
+** If this constraint is not satisfied, set *peWithin to NOT_WITHIN.
+** If the constraint is satisfied, leave *peWithin unchanged.
+**
+** The constraint is of the form:  xN op $val
+**
+** The op is given by p->op.  The xN is p->iCoord-th coordinate in
+** pCellData.  $val is given by p->u.rValue.
+*/
+static void rtreeLeafConstraint(
+  RtreeConstraint *p,        /* The constraint to test */
+  int eInt,                  /* True if RTree holds integer coordinates */
+  u8 *pCellData,             /* Raw cell content as appears on disk */
+  int *peWithin              /* Adjust downward, as appropriate */
+){
+  RtreeDValue xN;      /* Coordinate value converted to a double */
+
+  assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE 
+      || p->op==RTREE_GT || p->op==RTREE_EQ );
+  pCellData += 8 + p->iCoord*4;
+  RTREE_DECODE_COORD(eInt, pCellData, xN);
+  switch( p->op ){
+    case RTREE_LE: if( xN <= p->u.rValue ) return;  break;
+    case RTREE_LT: if( xN <  p->u.rValue ) return;  break;
+    case RTREE_GE: if( xN >= p->u.rValue ) return;  break;
+    case RTREE_GT: if( xN >  p->u.rValue ) return;  break;
+    default:       if( xN == p->u.rValue ) return;  break;
+  }
+  *peWithin = NOT_WITHIN;
+}
+
+/*
+** One of the cells in node pNode is guaranteed to have a 64-bit 
+** integer value equal to iRowid. Return the index of this cell.
+*/
+static int nodeRowidIndex(
+  Rtree *pRtree, 
+  RtreeNode *pNode, 
+  i64 iRowid,
+  int *piIndex
+){
+  int ii;
+  int nCell = NCELL(pNode);
+  assert( nCell<200 );
+  for(ii=0; ii<nCell; ii++){
+    if( nodeGetRowid(pRtree, pNode, ii)==iRowid ){
+      *piIndex = ii;
+      return SQLITE_OK;
+    }
+  }
+  return SQLITE_CORRUPT_VTAB;
+}
+
+/*
+** Return the index of the cell containing a pointer to node pNode
+** in its parent. If pNode is the root node, return -1.
+*/
+static int nodeParentIndex(Rtree *pRtree, RtreeNode *pNode, int *piIndex){
+  RtreeNode *pParent = pNode->pParent;
+  if( pParent ){
+    return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex);
+  }
+  *piIndex = -1;
+  return SQLITE_OK;
+}
+
+/*
+** Compare two search points.  Return negative, zero, or positive if the first
+** is less than, equal to, or greater than the second.
+**
+** The rScore is the primary key.  Smaller rScore values come first.
+** If the rScore is a tie, then use iLevel as the tie breaker with smaller
+** iLevel values coming first.  In this way, if rScore is the same for all
+** SearchPoints, then iLevel becomes the deciding factor and the result
+** is a depth-first search, which is the desired default behavior.
+*/
+static int rtreeSearchPointCompare(
+  const RtreeSearchPoint *pA,
+  const RtreeSearchPoint *pB
+){
+  if( pA->rScore<pB->rScore ) return -1;
+  if( pA->rScore>pB->rScore ) return +1;
+  if( pA->iLevel<pB->iLevel ) return -1;
+  if( pA->iLevel>pB->iLevel ) return +1;
+  return 0;
+}
+
+/*
+** Interchange to search points in a cursor.
+*/
+static void rtreeSearchPointSwap(RtreeCursor *p, int i, int j){
+  RtreeSearchPoint t = p->aPoint[i];
+  assert( i<j );
+  p->aPoint[i] = p->aPoint[j];
+  p->aPoint[j] = t;
+  i++; j++;
+  if( i<RTREE_CACHE_SZ ){
+    if( j>=RTREE_CACHE_SZ ){
+      nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]);
+      p->aNode[i] = 0;
+    }else{
+      RtreeNode *pTemp = p->aNode[i];
+      p->aNode[i] = p->aNode[j];
+      p->aNode[j] = pTemp;
+    }
+  }
+}
+
+/*
+** Return the search point with the lowest current score.
+*/
+static RtreeSearchPoint *rtreeSearchPointFirst(RtreeCursor *pCur){
+  return pCur->bPoint ? &pCur->sPoint : pCur->nPoint ? pCur->aPoint : 0;
+}
+
+/*
+** Get the RtreeNode for the search point with the lowest score.
+*/
+static RtreeNode *rtreeNodeOfFirstSearchPoint(RtreeCursor *pCur, int *pRC){
+  sqlite3_int64 id;
+  int ii = 1 - pCur->bPoint;
+  assert( ii==0 || ii==1 );
+  assert( pCur->bPoint || pCur->nPoint );
+  if( pCur->aNode[ii]==0 ){
+    assert( pRC!=0 );
+    id = ii ? pCur->aPoint[0].id : pCur->sPoint.id;
+    *pRC = nodeAcquire(RTREE_OF_CURSOR(pCur), id, 0, &pCur->aNode[ii]);
+  }
+  return pCur->aNode[ii];
+}
+
+/*
+** Push a new element onto the priority queue
+*/
+static RtreeSearchPoint *rtreeEnqueue(
+  RtreeCursor *pCur,    /* The cursor */
+  RtreeDValue rScore,   /* Score for the new search point */
+  u8 iLevel             /* Level for the new search point */
+){
+  int i, j;
+  RtreeSearchPoint *pNew;
+  if( pCur->nPoint>=pCur->nPointAlloc ){
+    int nNew = pCur->nPointAlloc*2 + 8;
+    pNew = sqlite3_realloc(pCur->aPoint, nNew*sizeof(pCur->aPoint[0]));
+    if( pNew==0 ) return 0;
+    pCur->aPoint = pNew;
+    pCur->nPointAlloc = nNew;
+  }
+  i = pCur->nPoint++;
+  pNew = pCur->aPoint + i;
+  pNew->rScore = rScore;
+  pNew->iLevel = iLevel;
+  assert( iLevel<=RTREE_MAX_DEPTH );
+  while( i>0 ){
+    RtreeSearchPoint *pParent;
+    j = (i-1)/2;
+    pParent = pCur->aPoint + j;
+    if( rtreeSearchPointCompare(pNew, pParent)>=0 ) break;
+    rtreeSearchPointSwap(pCur, j, i);
+    i = j;
+    pNew = pParent;
+  }
+  return pNew;
+}
+
+/*
+** Allocate a new RtreeSearchPoint and return a pointer to it.  Return
+** NULL if malloc fails.
+*/
+static RtreeSearchPoint *rtreeSearchPointNew(
+  RtreeCursor *pCur,    /* The cursor */
+  RtreeDValue rScore,   /* Score for the new search point */
+  u8 iLevel             /* Level for the new search point */
+){
+  RtreeSearchPoint *pNew, *pFirst;
+  pFirst = rtreeSearchPointFirst(pCur);
+  pCur->anQueue[iLevel]++;
+  if( pFirst==0
+   || pFirst->rScore>rScore 
+   || (pFirst->rScore==rScore && pFirst->iLevel>iLevel)
+  ){
+    if( pCur->bPoint ){
+      int ii;
+      pNew = rtreeEnqueue(pCur, rScore, iLevel);
+      if( pNew==0 ) return 0;
+      ii = (int)(pNew - pCur->aPoint) + 1;
+      if( ii<RTREE_CACHE_SZ ){
+        assert( pCur->aNode[ii]==0 );
+        pCur->aNode[ii] = pCur->aNode[0];
+       }else{
+        nodeRelease(RTREE_OF_CURSOR(pCur), pCur->aNode[0]);
+      }
+      pCur->aNode[0] = 0;
+      *pNew = pCur->sPoint;
+    }
+    pCur->sPoint.rScore = rScore;
+    pCur->sPoint.iLevel = iLevel;
+    pCur->bPoint = 1;
+    return &pCur->sPoint;
+  }else{
+    return rtreeEnqueue(pCur, rScore, iLevel);
+  }
+}
+
+#if 0
+/* Tracing routines for the RtreeSearchPoint queue */
+static void tracePoint(RtreeSearchPoint *p, int idx, RtreeCursor *pCur){
+  if( idx<0 ){ printf(" s"); }else{ printf("%2d", idx); }
+  printf(" %d.%05lld.%02d %g %d",
+    p->iLevel, p->id, p->iCell, p->rScore, p->eWithin
+  );
+  idx++;
+  if( idx<RTREE_CACHE_SZ ){
+    printf(" %p\n", pCur->aNode[idx]);
+  }else{
+    printf("\n");
+  }
+}
+static void traceQueue(RtreeCursor *pCur, const char *zPrefix){
+  int ii;
+  printf("=== %9s ", zPrefix);
+  if( pCur->bPoint ){
+    tracePoint(&pCur->sPoint, -1, pCur);
+  }
+  for(ii=0; ii<pCur->nPoint; ii++){
+    if( ii>0 || pCur->bPoint ) printf("              ");
+    tracePoint(&pCur->aPoint[ii], ii, pCur);
+  }
+}
+# define RTREE_QUEUE_TRACE(A,B) traceQueue(A,B)
+#else
+# define RTREE_QUEUE_TRACE(A,B)   /* no-op */
+#endif
+
+/* Remove the search point with the lowest current score.
+*/
+static void rtreeSearchPointPop(RtreeCursor *p){
+  int i, j, k, n;
+  i = 1 - p->bPoint;
+  assert( i==0 || i==1 );
+  if( p->aNode[i] ){
+    nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]);
+    p->aNode[i] = 0;
+  }
+  if( p->bPoint ){
+    p->anQueue[p->sPoint.iLevel]--;
+    p->bPoint = 0;
+  }else if( p->nPoint ){
+    p->anQueue[p->aPoint[0].iLevel]--;
+    n = --p->nPoint;
+    p->aPoint[0] = p->aPoint[n];
+    if( n<RTREE_CACHE_SZ-1 ){
+      p->aNode[1] = p->aNode[n+1];
+      p->aNode[n+1] = 0;
+    }
+    i = 0;
+    while( (j = i*2+1)<n ){
+      k = j+1;
+      if( k<n && rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[j])<0 ){
+        if( rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[i])<0 ){
+          rtreeSearchPointSwap(p, i, k);
+          i = k;
+        }else{
+          break;
+        }
+      }else{
+        if( rtreeSearchPointCompare(&p->aPoint[j], &p->aPoint[i])<0 ){
+          rtreeSearchPointSwap(p, i, j);
+          i = j;
+        }else{
+          break;
+        }
+      }
+    }
+  }
+}
+
+
+/*
+** Continue the search on cursor pCur until the front of the queue
+** contains an entry suitable for returning as a result-set row,
+** or until the RtreeSearchPoint queue is empty, indicating that the
+** query has completed.
+*/
+static int rtreeStepToLeaf(RtreeCursor *pCur){
+  RtreeSearchPoint *p;
+  Rtree *pRtree = RTREE_OF_CURSOR(pCur);
+  RtreeNode *pNode;
+  int eWithin;
+  int rc = SQLITE_OK;
+  int nCell;
+  int nConstraint = pCur->nConstraint;
+  int ii;
+  int eInt;
+  RtreeSearchPoint x;
+
+  eInt = pRtree->eCoordType==RTREE_COORD_INT32;
+  while( (p = rtreeSearchPointFirst(pCur))!=0 && p->iLevel>0 ){
+    pNode = rtreeNodeOfFirstSearchPoint(pCur, &rc);
+    if( rc ) return rc;
+    nCell = NCELL(pNode);
+    assert( nCell<200 );
+    while( p->iCell<nCell ){
+      sqlite3_rtree_dbl rScore = (sqlite3_rtree_dbl)-1;
+      u8 *pCellData = pNode->zData + (4+pRtree->nBytesPerCell*p->iCell);
+      eWithin = FULLY_WITHIN;
+      for(ii=0; ii<nConstraint; ii++){
+        RtreeConstraint *pConstraint = pCur->aConstraint + ii;
+        if( pConstraint->op>=RTREE_MATCH ){
+          rc = rtreeCallbackConstraint(pConstraint, eInt, pCellData, p,
+                                       &rScore, &eWithin);
+          if( rc ) return rc;
+        }else if( p->iLevel==1 ){
+          rtreeLeafConstraint(pConstraint, eInt, pCellData, &eWithin);
+        }else{
+          rtreeNonleafConstraint(pConstraint, eInt, pCellData, &eWithin);
+        }
+        if( eWithin==NOT_WITHIN ) break;
+      }
+      p->iCell++;
+      if( eWithin==NOT_WITHIN ) continue;
+      x.iLevel = p->iLevel - 1;
+      if( x.iLevel ){
+        x.id = readInt64(pCellData);
+        x.iCell = 0;
+      }else{
+        x.id = p->id;
+        x.iCell = p->iCell - 1;
+      }
+      if( p->iCell>=nCell ){
+        RTREE_QUEUE_TRACE(pCur, "POP-S:");
+        rtreeSearchPointPop(pCur);
+      }
+      if( rScore<RTREE_ZERO ) rScore = RTREE_ZERO;
+      p = rtreeSearchPointNew(pCur, rScore, x.iLevel);
+      if( p==0 ) return SQLITE_NOMEM;
+      p->eWithin = eWithin;
+      p->id = x.id;
+      p->iCell = x.iCell;
+      RTREE_QUEUE_TRACE(pCur, "PUSH-S:");
+      break;
+    }
+    if( p->iCell>=nCell ){
+      RTREE_QUEUE_TRACE(pCur, "POP-Se:");
+      rtreeSearchPointPop(pCur);
+    }
+  }
+  pCur->atEOF = p==0;
+  return SQLITE_OK;
+}
+
+/* 
+** Rtree virtual table module xNext method.
+*/
+static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){
+  RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor;
+  int rc = SQLITE_OK;
+
+  /* Move to the next entry that matches the configured constraints. */
+  RTREE_QUEUE_TRACE(pCsr, "POP-Nx:");
+  rtreeSearchPointPop(pCsr);
+  rc = rtreeStepToLeaf(pCsr);
+  return rc;
+}
+
+/* 
+** Rtree virtual table module xRowid method.
+*/
+static int rtreeRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){
+  RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor;
+  RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr);
+  int rc = SQLITE_OK;
+  RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc);
+  if( rc==SQLITE_OK && p ){
+    *pRowid = nodeGetRowid(RTREE_OF_CURSOR(pCsr), pNode, p->iCell);
+  }
+  return rc;
+}
+
+/* 
+** Rtree virtual table module xColumn method.
+*/
+static int rtreeColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){
+  Rtree *pRtree = (Rtree *)cur->pVtab;
+  RtreeCursor *pCsr = (RtreeCursor *)cur;
+  RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr);
+  RtreeCoord c;
+  int rc = SQLITE_OK;
+  RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc);
+
+  if( rc ) return rc;
+  if( p==0 ) return SQLITE_OK;
+  if( i==0 ){
+    sqlite3_result_int64(ctx, nodeGetRowid(pRtree, pNode, p->iCell));
+  }else{
+    if( rc ) return rc;
+    nodeGetCoord(pRtree, pNode, p->iCell, i-1, &c);
+#ifndef SQLITE_RTREE_INT_ONLY
+    if( pRtree->eCoordType==RTREE_COORD_REAL32 ){
+      sqlite3_result_double(ctx, c.f);
+    }else
+#endif
+    {
+      assert( pRtree->eCoordType==RTREE_COORD_INT32 );
+      sqlite3_result_int(ctx, c.i);
+    }
+  }
+  return SQLITE_OK;
+}
+
+/* 
+** Use nodeAcquire() to obtain the leaf node containing the record with 
+** rowid iRowid. If successful, set *ppLeaf to point to the node and
+** return SQLITE_OK. If there is no such record in the table, set
+** *ppLeaf to 0 and return SQLITE_OK. If an error occurs, set *ppLeaf
+** to zero and return an SQLite error code.
+*/
+static int findLeafNode(
+  Rtree *pRtree,              /* RTree to search */
+  i64 iRowid,                 /* The rowid searching for */
+  RtreeNode **ppLeaf,         /* Write the node here */
+  sqlite3_int64 *piNode       /* Write the node-id here */
+){
+  int rc;
+  *ppLeaf = 0;
+  sqlite3_bind_int64(pRtree->pReadRowid, 1, iRowid);
+  if( sqlite3_step(pRtree->pReadRowid)==SQLITE_ROW ){
+    i64 iNode = sqlite3_column_int64(pRtree->pReadRowid, 0);
+    if( piNode ) *piNode = iNode;
+    rc = nodeAcquire(pRtree, iNode, 0, ppLeaf);
+    sqlite3_reset(pRtree->pReadRowid);
+  }else{
+    rc = sqlite3_reset(pRtree->pReadRowid);
+  }
+  return rc;
+}
+
+/*
+** This function is called to configure the RtreeConstraint object passed
+** as the second argument for a MATCH constraint. The value passed as the
+** first argument to this function is the right-hand operand to the MATCH
+** operator.
+*/
+static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){
+  RtreeMatchArg *pBlob;              /* BLOB returned by geometry function */
+  sqlite3_rtree_query_info *pInfo;   /* Callback information */
+  int nBlob;                         /* Size of the geometry function blob */
+  int nExpected;                     /* Expected size of the BLOB */
+
+  /* Check that value is actually a blob. */
+  if( sqlite3_value_type(pValue)!=SQLITE_BLOB ) return SQLITE_ERROR;
+
+  /* Check that the blob is roughly the right size. */
+  nBlob = sqlite3_value_bytes(pValue);
+  if( nBlob<(int)sizeof(RtreeMatchArg) 
+   || ((nBlob-sizeof(RtreeMatchArg))%sizeof(RtreeDValue))!=0
+  ){
+    return SQLITE_ERROR;
+  }
+
+  pInfo = (sqlite3_rtree_query_info*)sqlite3_malloc( sizeof(*pInfo)+nBlob );
+  if( !pInfo ) return SQLITE_NOMEM;
+  memset(pInfo, 0, sizeof(*pInfo));
+  pBlob = (RtreeMatchArg*)&pInfo[1];
+
+  memcpy(pBlob, sqlite3_value_blob(pValue), nBlob);
+  nExpected = (int)(sizeof(RtreeMatchArg) +
+                    (pBlob->nParam-1)*sizeof(RtreeDValue));
+  if( pBlob->magic!=RTREE_GEOMETRY_MAGIC || nBlob!=nExpected ){
+    sqlite3_free(pInfo);
+    return SQLITE_ERROR;
+  }
+  pInfo->pContext = pBlob->cb.pContext;
+  pInfo->nParam = pBlob->nParam;
+  pInfo->aParam = pBlob->aParam;
+
+  if( pBlob->cb.xGeom ){
+    pCons->u.xGeom = pBlob->cb.xGeom;
+  }else{
+    pCons->op = RTREE_QUERY;
+    pCons->u.xQueryFunc = pBlob->cb.xQueryFunc;
+  }
+  pCons->pInfo = pInfo;
+  return SQLITE_OK;
+}
+
+/* 
+** Rtree virtual table module xFilter method.
+*/
+static int rtreeFilter(
+  sqlite3_vtab_cursor *pVtabCursor, 
+  int idxNum, const char *idxStr,
+  int argc, sqlite3_value **argv
+){
+  Rtree *pRtree = (Rtree *)pVtabCursor->pVtab;
+  RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor;
+  RtreeNode *pRoot = 0;
+  int ii;
+  int rc = SQLITE_OK;
+  int iCell = 0;
+
+  rtreeReference(pRtree);
+
+  /* Reset the cursor to the same state as rtreeOpen() leaves it in. */
+  freeCursorConstraints(pCsr);
+  sqlite3_free(pCsr->aPoint);
+  memset(pCsr, 0, sizeof(RtreeCursor));
+  pCsr->base.pVtab = (sqlite3_vtab*)pRtree;
+
+  pCsr->iStrategy = idxNum;
+  if( idxNum==1 ){
+    /* Special case - lookup by rowid. */
+    RtreeNode *pLeaf;        /* Leaf on which the required cell resides */
+    RtreeSearchPoint *p;     /* Search point for the the leaf */
+    i64 iRowid = sqlite3_value_int64(argv[0]);
+    i64 iNode = 0;
+    rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode);
+    if( rc==SQLITE_OK && pLeaf!=0 ){
+      p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0);
+      assert( p!=0 );  /* Always returns pCsr->sPoint */
+      pCsr->aNode[0] = pLeaf;
+      p->id = iNode;
+      p->eWithin = PARTLY_WITHIN;
+      rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell);
+      p->iCell = iCell;
+      RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:");
+    }else{
+      pCsr->atEOF = 1;
+    }
+  }else{
+    /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array 
+    ** with the configured constraints. 
+    */
+    rc = nodeAcquire(pRtree, 1, 0, &pRoot);
+    if( rc==SQLITE_OK && argc>0 ){
+      pCsr->aConstraint = sqlite3_malloc(sizeof(RtreeConstraint)*argc);
+      pCsr->nConstraint = argc;
+      if( !pCsr->aConstraint ){
+        rc = SQLITE_NOMEM;
+      }else{
+        memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc);
+        memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1));
+        assert( (idxStr==0 && argc==0)
+                || (idxStr && (int)strlen(idxStr)==argc*2) );
+        for(ii=0; ii<argc; ii++){
+          RtreeConstraint *p = &pCsr->aConstraint[ii];
+          p->op = idxStr[ii*2];
+          p->iCoord = idxStr[ii*2+1]-'0';
+          if( p->op>=RTREE_MATCH ){
+            /* A MATCH operator. The right-hand-side must be a blob that
+            ** can be cast into an RtreeMatchArg object. One created using
+            ** an sqlite3_rtree_geometry_callback() SQL user function.
+            */
+            rc = deserializeGeometry(argv[ii], p);
+            if( rc!=SQLITE_OK ){
+              break;
+            }
+            p->pInfo->nCoord = pRtree->nDim*2;
+            p->pInfo->anQueue = pCsr->anQueue;
+            p->pInfo->mxLevel = pRtree->iDepth + 1;
+          }else{
+#ifdef SQLITE_RTREE_INT_ONLY
+            p->u.rValue = sqlite3_value_int64(argv[ii]);
+#else
+            p->u.rValue = sqlite3_value_double(argv[ii]);
+#endif
+          }
+        }
+      }
+    }
+    if( rc==SQLITE_OK ){
+      RtreeSearchPoint *pNew;
+      pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, pRtree->iDepth+1);
+      if( pNew==0 ) return SQLITE_NOMEM;
+      pNew->id = 1;
+      pNew->iCell = 0;
+      pNew->eWithin = PARTLY_WITHIN;
+      assert( pCsr->bPoint==1 );
+      pCsr->aNode[0] = pRoot;
+      pRoot = 0;
+      RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:");
+      rc = rtreeStepToLeaf(pCsr);
+    }
+  }
+
+  nodeRelease(pRtree, pRoot);
+  rtreeRelease(pRtree);
+  return rc;
+}
+
+/*
+** Set the pIdxInfo->estimatedRows variable to nRow. Unless this
+** extension is currently being used by a version of SQLite too old to
+** support estimatedRows. In that case this function is a no-op.
+*/
+static void setEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
+#if SQLITE_VERSION_NUMBER>=3008002
+  if( sqlite3_libversion_number()>=3008002 ){
+    pIdxInfo->estimatedRows = nRow;
+  }
+#endif
+}
+
+/*
+** Rtree virtual table module xBestIndex method. There are three
+** table scan strategies to choose from (in order from most to 
+** least desirable):
+**
+**   idxNum     idxStr        Strategy
+**   ------------------------------------------------
+**     1        Unused        Direct lookup by rowid.
+**     2        See below     R-tree query or full-table scan.
+**   ------------------------------------------------
+**
+** If strategy 1 is used, then idxStr is not meaningful. If strategy
+** 2 is used, idxStr is formatted to contain 2 bytes for each 
+** constraint used. The first two bytes of idxStr correspond to 
+** the constraint in sqlite3_index_info.aConstraintUsage[] with
+** (argvIndex==1) etc.
+**
+** The first of each pair of bytes in idxStr identifies the constraint
+** operator as follows:
+**
+**   Operator    Byte Value
+**   ----------------------
+**      =        0x41 ('A')
+**     <=        0x42 ('B')
+**      <        0x43 ('C')
+**     >=        0x44 ('D')
+**      >        0x45 ('E')
+**   MATCH       0x46 ('F')
+**   ----------------------
+**
+** The second of each pair of bytes identifies the coordinate column
+** to which the constraint applies. The leftmost coordinate column
+** is 'a', the second from the left 'b' etc.
+*/
+static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
+  Rtree *pRtree = (Rtree*)tab;
+  int rc = SQLITE_OK;
+  int ii;
+  i64 nRow;                       /* Estimated rows returned by this scan */
+
+  int iIdx = 0;
+  char zIdxStr[RTREE_MAX_DIMENSIONS*8+1];
+  memset(zIdxStr, 0, sizeof(zIdxStr));
+
+  assert( pIdxInfo->idxStr==0 );
+  for(ii=0; ii<pIdxInfo->nConstraint && iIdx<(int)(sizeof(zIdxStr)-1); ii++){
+    struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii];
+
+    if( p->usable && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ ){
+      /* We have an equality constraint on the rowid. Use strategy 1. */
+      int jj;
+      for(jj=0; jj<ii; jj++){
+        pIdxInfo->aConstraintUsage[jj].argvIndex = 0;
+        pIdxInfo->aConstraintUsage[jj].omit = 0;
+      }
+      pIdxInfo->idxNum = 1;
+      pIdxInfo->aConstraintUsage[ii].argvIndex = 1;
+      pIdxInfo->aConstraintUsage[jj].omit = 1;
+
+      /* This strategy involves a two rowid lookups on an B-Tree structures
+      ** and then a linear search of an R-Tree node. This should be 
+      ** considered almost as quick as a direct rowid lookup (for which 
+      ** sqlite uses an internal cost of 0.0). It is expected to return
+      ** a single row.
+      */ 
+      pIdxInfo->estimatedCost = 30.0;
+      setEstimatedRows(pIdxInfo, 1);
+      return SQLITE_OK;
+    }
+
+    if( p->usable && (p->iColumn>0 || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) ){
+      u8 op;
+      switch( p->op ){
+        case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break;
+        case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break;
+        case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break;
+        case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break;
+        case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break;
+        default:
+          assert( p->op==SQLITE_INDEX_CONSTRAINT_MATCH );
+          op = RTREE_MATCH; 
+          break;
+      }
+      zIdxStr[iIdx++] = op;
+      zIdxStr[iIdx++] = p->iColumn - 1 + '0';
+      pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2);
+      pIdxInfo->aConstraintUsage[ii].omit = 1;
+    }
+  }
+
+  pIdxInfo->idxNum = 2;
+  pIdxInfo->needToFreeIdxStr = 1;
+  if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){
+    return SQLITE_NOMEM;
+  }
+
+  nRow = pRtree->nRowEst / (iIdx + 1);
+  pIdxInfo->estimatedCost = (double)6.0 * (double)nRow;
+  setEstimatedRows(pIdxInfo, nRow);
+
+  return rc;
+}
+
+/*
+** Return the N-dimensional volumn of the cell stored in *p.
+*/
+static RtreeDValue cellArea(Rtree *pRtree, RtreeCell *p){
+  RtreeDValue area = (RtreeDValue)1;
+  int ii;
+  for(ii=0; ii<(pRtree->nDim*2); ii+=2){
+    area = (area * (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii])));
+  }
+  return area;
+}
+
+/*
+** Return the margin length of cell p. The margin length is the sum
+** of the objects size in each dimension.
+*/
+static RtreeDValue cellMargin(Rtree *pRtree, RtreeCell *p){
+  RtreeDValue margin = (RtreeDValue)0;
+  int ii;
+  for(ii=0; ii<(pRtree->nDim*2); ii+=2){
+    margin += (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii]));
+  }
+  return margin;
+}
+
+/*
+** Store the union of cells p1 and p2 in p1.
+*/
+static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){
+  int ii;
+  if( pRtree->eCoordType==RTREE_COORD_REAL32 ){
+    for(ii=0; ii<(pRtree->nDim*2); ii+=2){
+      p1->aCoord[ii].f = MIN(p1->aCoord[ii].f, p2->aCoord[ii].f);
+      p1->aCoord[ii+1].f = MAX(p1->aCoord[ii+1].f, p2->aCoord[ii+1].f);
+    }
+  }else{
+    for(ii=0; ii<(pRtree->nDim*2); ii+=2){
+      p1->aCoord[ii].i = MIN(p1->aCoord[ii].i, p2->aCoord[ii].i);
+      p1->aCoord[ii+1].i = MAX(p1->aCoord[ii+1].i, p2->aCoord[ii+1].i);
+    }
+  }
+}
+
+/*
+** Return true if the area covered by p2 is a subset of the area covered
+** by p1. False otherwise.
+*/
+static int cellContains(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){
+  int ii;
+  int isInt = (pRtree->eCoordType==RTREE_COORD_INT32);
+  for(ii=0; ii<(pRtree->nDim*2); ii+=2){
+    RtreeCoord *a1 = &p1->aCoord[ii];
+    RtreeCoord *a2 = &p2->aCoord[ii];
+    if( (!isInt && (a2[0].f<a1[0].f || a2[1].f>a1[1].f)) 
+     || ( isInt && (a2[0].i<a1[0].i || a2[1].i>a1[1].i)) 
+    ){
+      return 0;
+    }
+  }
+  return 1;
+}
+
+/*
+** Return the amount cell p would grow by if it were unioned with pCell.
+*/
+static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){
+  RtreeDValue area;
+  RtreeCell cell;
+  memcpy(&cell, p, sizeof(RtreeCell));
+  area = cellArea(pRtree, &cell);
+  cellUnion(pRtree, &cell, pCell);
+  return (cellArea(pRtree, &cell)-area);
+}
+
+static RtreeDValue cellOverlap(
+  Rtree *pRtree, 
+  RtreeCell *p, 
+  RtreeCell *aCell, 
+  int nCell
+){
+  int ii;
+  RtreeDValue overlap = RTREE_ZERO;
+  for(ii=0; ii<nCell; ii++){
+    int jj;
+    RtreeDValue o = (RtreeDValue)1;
+    for(jj=0; jj<(pRtree->nDim*2); jj+=2){
+      RtreeDValue x1, x2;
+      x1 = MAX(DCOORD(p->aCoord[jj]), DCOORD(aCell[ii].aCoord[jj]));
+      x2 = MIN(DCOORD(p->aCoord[jj+1]), DCOORD(aCell[ii].aCoord[jj+1]));
+      if( x2<x1 ){
+        o = (RtreeDValue)0;
+        break;
+      }else{
+        o = o * (x2-x1);
+      }
+    }
+    overlap += o;
+  }
+  return overlap;
+}
+
+
+/*
+** This function implements the ChooseLeaf algorithm from Gutman[84].
+** ChooseSubTree in r*tree terminology.
+*/
+static int ChooseLeaf(
+  Rtree *pRtree,               /* Rtree table */
+  RtreeCell *pCell,            /* Cell to insert into rtree */
+  int iHeight,                 /* Height of sub-tree rooted at pCell */
+  RtreeNode **ppLeaf           /* OUT: Selected leaf page */
+){
+  int rc;
+  int ii;
+  RtreeNode *pNode;
+  rc = nodeAcquire(pRtree, 1, 0, &pNode);
+
+  for(ii=0; rc==SQLITE_OK && ii<(pRtree->iDepth-iHeight); ii++){
+    int iCell;
+    sqlite3_int64 iBest = 0;
+
+    RtreeDValue fMinGrowth = RTREE_ZERO;
+    RtreeDValue fMinArea = RTREE_ZERO;
+
+    int nCell = NCELL(pNode);
+    RtreeCell cell;
+    RtreeNode *pChild;
+
+    RtreeCell *aCell = 0;
+
+    /* Select the child node which will be enlarged the least if pCell
+    ** is inserted into it. Resolve ties by choosing the entry with
+    ** the smallest area.
+    */
+    for(iCell=0; iCell<nCell; iCell++){
+      int bBest = 0;
+      RtreeDValue growth;
+      RtreeDValue area;
+      nodeGetCell(pRtree, pNode, iCell, &cell);
+      growth = cellGrowth(pRtree, &cell, pCell);
+      area = cellArea(pRtree, &cell);
+      if( iCell==0||growth<fMinGrowth||(growth==fMinGrowth && area<fMinArea) ){
+        bBest = 1;
+      }
+      if( bBest ){
+        fMinGrowth = growth;
+        fMinArea = area;
+        iBest = cell.iRowid;
+      }
+    }
+
+    sqlite3_free(aCell);
+    rc = nodeAcquire(pRtree, iBest, pNode, &pChild);
+    nodeRelease(pRtree, pNode);
+    pNode = pChild;
+  }
+
+  *ppLeaf = pNode;
+  return rc;
+}
+
+/*
+** A cell with the same content as pCell has just been inserted into
+** the node pNode. This function updates the bounding box cells in
+** all ancestor elements.
+*/
+static int AdjustTree(
+  Rtree *pRtree,                    /* Rtree table */
+  RtreeNode *pNode,                 /* Adjust ancestry of this node. */
+  RtreeCell *pCell                  /* This cell was just inserted */
+){
+  RtreeNode *p = pNode;
+  while( p->pParent ){
+    RtreeNode *pParent = p->pParent;
+    RtreeCell cell;
+    int iCell;
+
+    if( nodeParentIndex(pRtree, p, &iCell) ){
+      return SQLITE_CORRUPT_VTAB;
+    }
+
+    nodeGetCell(pRtree, pParent, iCell, &cell);
+    if( !cellContains(pRtree, &cell, pCell) ){
+      cellUnion(pRtree, &cell, pCell);
+      nodeOverwriteCell(pRtree, pParent, &cell, iCell);
+    }
+ 
+    p = pParent;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Write mapping (iRowid->iNode) to the <rtree>_rowid table.
+*/
+static int rowidWrite(Rtree *pRtree, sqlite3_int64 iRowid, sqlite3_int64 iNode){
+  sqlite3_bind_int64(pRtree->pWriteRowid, 1, iRowid);
+  sqlite3_bind_int64(pRtree->pWriteRowid, 2, iNode);
+  sqlite3_step(pRtree->pWriteRowid);
+  return sqlite3_reset(pRtree->pWriteRowid);
+}
+
+/*
+** Write mapping (iNode->iPar) to the <rtree>_parent table.
+*/
+static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){
+  sqlite3_bind_int64(pRtree->pWriteParent, 1, iNode);
+  sqlite3_bind_int64(pRtree->pWriteParent, 2, iPar);
+  sqlite3_step(pRtree->pWriteParent);
+  return sqlite3_reset(pRtree->pWriteParent);
+}
+
+static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int);
+
+
+/*
+** Arguments aIdx, aDistance and aSpare all point to arrays of size
+** nIdx. The aIdx array contains the set of integers from 0 to 
+** (nIdx-1) in no particular order. This function sorts the values
+** in aIdx according to the indexed values in aDistance. For
+** example, assuming the inputs:
+**
+**   aIdx      = { 0,   1,   2,   3 }
+**   aDistance = { 5.0, 2.0, 7.0, 6.0 }
+**
+** this function sets the aIdx array to contain:
+**
+**   aIdx      = { 0,   1,   2,   3 }
+**
+** The aSpare array is used as temporary working space by the
+** sorting algorithm.
+*/
+static void SortByDistance(
+  int *aIdx, 
+  int nIdx, 
+  RtreeDValue *aDistance, 
+  int *aSpare
+){
+  if( nIdx>1 ){
+    int iLeft = 0;
+    int iRight = 0;
+
+    int nLeft = nIdx/2;
+    int nRight = nIdx-nLeft;
+    int *aLeft = aIdx;
+    int *aRight = &aIdx[nLeft];
+
+    SortByDistance(aLeft, nLeft, aDistance, aSpare);
+    SortByDistance(aRight, nRight, aDistance, aSpare);
+
+    memcpy(aSpare, aLeft, sizeof(int)*nLeft);
+    aLeft = aSpare;
+
+    while( iLeft<nLeft || iRight<nRight ){
+      if( iLeft==nLeft ){
+        aIdx[iLeft+iRight] = aRight[iRight];
+        iRight++;
+      }else if( iRight==nRight ){
+        aIdx[iLeft+iRight] = aLeft[iLeft];
+        iLeft++;
+      }else{
+        RtreeDValue fLeft = aDistance[aLeft[iLeft]];
+        RtreeDValue fRight = aDistance[aRight[iRight]];
+        if( fLeft<fRight ){
+          aIdx[iLeft+iRight] = aLeft[iLeft];
+          iLeft++;
+        }else{
+          aIdx[iLeft+iRight] = aRight[iRight];
+          iRight++;
+        }
+      }
+    }
+
+#if 0
+    /* Check that the sort worked */
+    {
+      int jj;
+      for(jj=1; jj<nIdx; jj++){
+        RtreeDValue left = aDistance[aIdx[jj-1]];
+        RtreeDValue right = aDistance[aIdx[jj]];
+        assert( left<=right );
+      }
+    }
+#endif
+  }
+}
+
+/*
+** Arguments aIdx, aCell and aSpare all point to arrays of size
+** nIdx. The aIdx array contains the set of integers from 0 to 
+** (nIdx-1) in no particular order. This function sorts the values
+** in aIdx according to dimension iDim of the cells in aCell. The
+** minimum value of dimension iDim is considered first, the
+** maximum used to break ties.
+**
+** The aSpare array is used as temporary working space by the
+** sorting algorithm.
+*/
+static void SortByDimension(
+  Rtree *pRtree,
+  int *aIdx, 
+  int nIdx, 
+  int iDim, 
+  RtreeCell *aCell, 
+  int *aSpare
+){
+  if( nIdx>1 ){
+
+    int iLeft = 0;
+    int iRight = 0;
+
+    int nLeft = nIdx/2;
+    int nRight = nIdx-nLeft;
+    int *aLeft = aIdx;
+    int *aRight = &aIdx[nLeft];
+
+    SortByDimension(pRtree, aLeft, nLeft, iDim, aCell, aSpare);
+    SortByDimension(pRtree, aRight, nRight, iDim, aCell, aSpare);
+
+    memcpy(aSpare, aLeft, sizeof(int)*nLeft);
+    aLeft = aSpare;
+    while( iLeft<nLeft || iRight<nRight ){
+      RtreeDValue xleft1 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2]);
+      RtreeDValue xleft2 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2+1]);
+      RtreeDValue xright1 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2]);
+      RtreeDValue xright2 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2+1]);
+      if( (iLeft!=nLeft) && ((iRight==nRight)
+       || (xleft1<xright1)
+       || (xleft1==xright1 && xleft2<xright2)
+      )){
+        aIdx[iLeft+iRight] = aLeft[iLeft];
+        iLeft++;
+      }else{
+        aIdx[iLeft+iRight] = aRight[iRight];
+        iRight++;
+      }
+    }
+
+#if 0
+    /* Check that the sort worked */
+    {
+      int jj;
+      for(jj=1; jj<nIdx; jj++){
+        RtreeDValue xleft1 = aCell[aIdx[jj-1]].aCoord[iDim*2];
+        RtreeDValue xleft2 = aCell[aIdx[jj-1]].aCoord[iDim*2+1];
+        RtreeDValue xright1 = aCell[aIdx[jj]].aCoord[iDim*2];
+        RtreeDValue xright2 = aCell[aIdx[jj]].aCoord[iDim*2+1];
+        assert( xleft1<=xright1 && (xleft1<xright1 || xleft2<=xright2) );
+      }
+    }
+#endif
+  }
+}
+
+/*
+** Implementation of the R*-tree variant of SplitNode from Beckman[1990].
+*/
+static int splitNodeStartree(
+  Rtree *pRtree,
+  RtreeCell *aCell,
+  int nCell,
+  RtreeNode *pLeft,
+  RtreeNode *pRight,
+  RtreeCell *pBboxLeft,
+  RtreeCell *pBboxRight
+){
+  int **aaSorted;
+  int *aSpare;
+  int ii;
+
+  int iBestDim = 0;
+  int iBestSplit = 0;
+  RtreeDValue fBestMargin = RTREE_ZERO;
+
+  int nByte = (pRtree->nDim+1)*(sizeof(int*)+nCell*sizeof(int));
+
+  aaSorted = (int **)sqlite3_malloc(nByte);
+  if( !aaSorted ){
+    return SQLITE_NOMEM;
+  }
+
+  aSpare = &((int *)&aaSorted[pRtree->nDim])[pRtree->nDim*nCell];
+  memset(aaSorted, 0, nByte);
+  for(ii=0; ii<pRtree->nDim; ii++){
+    int jj;
+    aaSorted[ii] = &((int *)&aaSorted[pRtree->nDim])[ii*nCell];
+    for(jj=0; jj<nCell; jj++){
+      aaSorted[ii][jj] = jj;
+    }
+    SortByDimension(pRtree, aaSorted[ii], nCell, ii, aCell, aSpare);
+  }
+
+  for(ii=0; ii<pRtree->nDim; ii++){
+    RtreeDValue margin = RTREE_ZERO;
+    RtreeDValue fBestOverlap = RTREE_ZERO;
+    RtreeDValue fBestArea = RTREE_ZERO;
+    int iBestLeft = 0;
+    int nLeft;
+
+    for(
+      nLeft=RTREE_MINCELLS(pRtree); 
+      nLeft<=(nCell-RTREE_MINCELLS(pRtree)); 
+      nLeft++
+    ){
+      RtreeCell left;
+      RtreeCell right;
+      int kk;
+      RtreeDValue overlap;
+      RtreeDValue area;
+
+      memcpy(&left, &aCell[aaSorted[ii][0]], sizeof(RtreeCell));
+      memcpy(&right, &aCell[aaSorted[ii][nCell-1]], sizeof(RtreeCell));
+      for(kk=1; kk<(nCell-1); kk++){
+        if( kk<nLeft ){
+          cellUnion(pRtree, &left, &aCell[aaSorted[ii][kk]]);
+        }else{
+          cellUnion(pRtree, &right, &aCell[aaSorted[ii][kk]]);
+        }
+      }
+      margin += cellMargin(pRtree, &left);
+      margin += cellMargin(pRtree, &right);
+      overlap = cellOverlap(pRtree, &left, &right, 1);
+      area = cellArea(pRtree, &left) + cellArea(pRtree, &right);
+      if( (nLeft==RTREE_MINCELLS(pRtree))
+       || (overlap<fBestOverlap)
+       || (overlap==fBestOverlap && area<fBestArea)
+      ){
+        iBestLeft = nLeft;
+        fBestOverlap = overlap;
+        fBestArea = area;
+      }
+    }
+
+    if( ii==0 || margin<fBestMargin ){
+      iBestDim = ii;
+      fBestMargin = margin;
+      iBestSplit = iBestLeft;
+    }
+  }
+
+  memcpy(pBboxLeft, &aCell[aaSorted[iBestDim][0]], sizeof(RtreeCell));
+  memcpy(pBboxRight, &aCell[aaSorted[iBestDim][iBestSplit]], sizeof(RtreeCell));
+  for(ii=0; ii<nCell; ii++){
+    RtreeNode *pTarget = (ii<iBestSplit)?pLeft:pRight;
+    RtreeCell *pBbox = (ii<iBestSplit)?pBboxLeft:pBboxRight;
+    RtreeCell *pCell = &aCell[aaSorted[iBestDim][ii]];
+    nodeInsertCell(pRtree, pTarget, pCell);
+    cellUnion(pRtree, pBbox, pCell);
+  }
+
+  sqlite3_free(aaSorted);
+  return SQLITE_OK;
+}
+
+
+static int updateMapping(
+  Rtree *pRtree, 
+  i64 iRowid, 
+  RtreeNode *pNode, 
+  int iHeight
+){
+  int (*xSetMapping)(Rtree *, sqlite3_int64, sqlite3_int64);
+  xSetMapping = ((iHeight==0)?rowidWrite:parentWrite);
+  if( iHeight>0 ){
+    RtreeNode *pChild = nodeHashLookup(pRtree, iRowid);
+    if( pChild ){
+      nodeRelease(pRtree, pChild->pParent);
+      nodeReference(pNode);
+      pChild->pParent = pNode;
+    }
+  }
+  return xSetMapping(pRtree, iRowid, pNode->iNode);
+}
+
+static int SplitNode(
+  Rtree *pRtree,
+  RtreeNode *pNode,
+  RtreeCell *pCell,
+  int iHeight
+){
+  int i;
+  int newCellIsRight = 0;
+
+  int rc = SQLITE_OK;
+  int nCell = NCELL(pNode);
+  RtreeCell *aCell;
+  int *aiUsed;
+
+  RtreeNode *pLeft = 0;
+  RtreeNode *pRight = 0;
+
+  RtreeCell leftbbox;
+  RtreeCell rightbbox;
+
+  /* Allocate an array and populate it with a copy of pCell and 
+  ** all cells from node pLeft. Then zero the original node.
+  */
+  aCell = sqlite3_malloc((sizeof(RtreeCell)+sizeof(int))*(nCell+1));
+  if( !aCell ){
+    rc = SQLITE_NOMEM;
+    goto splitnode_out;
+  }
+  aiUsed = (int *)&aCell[nCell+1];
+  memset(aiUsed, 0, sizeof(int)*(nCell+1));
+  for(i=0; i<nCell; i++){
+    nodeGetCell(pRtree, pNode, i, &aCell[i]);
+  }
+  nodeZero(pRtree, pNode);
+  memcpy(&aCell[nCell], pCell, sizeof(RtreeCell));
+  nCell++;
+
+  if( pNode->iNode==1 ){
+    pRight = nodeNew(pRtree, pNode);
+    pLeft = nodeNew(pRtree, pNode);
+    pRtree->iDepth++;
+    pNode->isDirty = 1;
+    writeInt16(pNode->zData, pRtree->iDepth);
+  }else{
+    pLeft = pNode;
+    pRight = nodeNew(pRtree, pLeft->pParent);
+    nodeReference(pLeft);
+  }
+
+  if( !pLeft || !pRight ){
+    rc = SQLITE_NOMEM;
+    goto splitnode_out;
+  }
+
+  memset(pLeft->zData, 0, pRtree->iNodeSize);
+  memset(pRight->zData, 0, pRtree->iNodeSize);
+
+  rc = splitNodeStartree(pRtree, aCell, nCell, pLeft, pRight,
+                         &leftbbox, &rightbbox);
+  if( rc!=SQLITE_OK ){
+    goto splitnode_out;
+  }
+
+  /* Ensure both child nodes have node numbers assigned to them by calling
+  ** nodeWrite(). Node pRight always needs a node number, as it was created
+  ** by nodeNew() above. But node pLeft sometimes already has a node number.
+  ** In this case avoid the all to nodeWrite().
+  */
+  if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight))
+   || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft)))
+  ){
+    goto splitnode_out;
+  }
+
+  rightbbox.iRowid = pRight->iNode;
+  leftbbox.iRowid = pLeft->iNode;
+
+  if( pNode->iNode==1 ){
+    rc = rtreeInsertCell(pRtree, pLeft->pParent, &leftbbox, iHeight+1);
+    if( rc!=SQLITE_OK ){
+      goto splitnode_out;
+    }
+  }else{
+    RtreeNode *pParent = pLeft->pParent;
+    int iCell;
+    rc = nodeParentIndex(pRtree, pLeft, &iCell);
+    if( rc==SQLITE_OK ){
+      nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell);
+      rc = AdjustTree(pRtree, pParent, &leftbbox);
+    }
+    if( rc!=SQLITE_OK ){
+      goto splitnode_out;
+    }
+  }
+  if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){
+    goto splitnode_out;
+  }
+
+  for(i=0; i<NCELL(pRight); i++){
+    i64 iRowid = nodeGetRowid(pRtree, pRight, i);
+    rc = updateMapping(pRtree, iRowid, pRight, iHeight);
+    if( iRowid==pCell->iRowid ){
+      newCellIsRight = 1;
+    }
+    if( rc!=SQLITE_OK ){
+      goto splitnode_out;
+    }
+  }
+  if( pNode->iNode==1 ){
+    for(i=0; i<NCELL(pLeft); i++){
+      i64 iRowid = nodeGetRowid(pRtree, pLeft, i);
+      rc = updateMapping(pRtree, iRowid, pLeft, iHeight);
+      if( rc!=SQLITE_OK ){
+        goto splitnode_out;
+      }
+    }
+  }else if( newCellIsRight==0 ){
+    rc = updateMapping(pRtree, pCell->iRowid, pLeft, iHeight);
+  }
+
+  if( rc==SQLITE_OK ){
+    rc = nodeRelease(pRtree, pRight);
+    pRight = 0;
+  }
+  if( rc==SQLITE_OK ){
+    rc = nodeRelease(pRtree, pLeft);
+    pLeft = 0;
+  }
+
+splitnode_out:
+  nodeRelease(pRtree, pRight);
+  nodeRelease(pRtree, pLeft);
+  sqlite3_free(aCell);
+  return rc;
+}
+
+/*
+** If node pLeaf is not the root of the r-tree and its pParent pointer is 
+** still NULL, load all ancestor nodes of pLeaf into memory and populate
+** the pLeaf->pParent chain all the way up to the root node.
+**
+** This operation is required when a row is deleted (or updated - an update
+** is implemented as a delete followed by an insert). SQLite provides the
+** rowid of the row to delete, which can be used to find the leaf on which
+** the entry resides (argument pLeaf). Once the leaf is located, this 
+** function is called to determine its ancestry.
+*/
+static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){
+  int rc = SQLITE_OK;
+  RtreeNode *pChild = pLeaf;
+  while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){
+    int rc2 = SQLITE_OK;          /* sqlite3_reset() return code */
+    sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode);
+    rc = sqlite3_step(pRtree->pReadParent);
+    if( rc==SQLITE_ROW ){
+      RtreeNode *pTest;           /* Used to test for reference loops */
+      i64 iNode;                  /* Node number of parent node */
+
+      /* Before setting pChild->pParent, test that we are not creating a
+      ** loop of references (as we would if, say, pChild==pParent). We don't
+      ** want to do this as it leads to a memory leak when trying to delete
+      ** the referenced counted node structures.
+      */
+      iNode = sqlite3_column_int64(pRtree->pReadParent, 0);
+      for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent);
+      if( !pTest ){
+        rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent);
+      }
+    }
+    rc = sqlite3_reset(pRtree->pReadParent);
+    if( rc==SQLITE_OK ) rc = rc2;
+    if( rc==SQLITE_OK && !pChild->pParent ) rc = SQLITE_CORRUPT_VTAB;
+    pChild = pChild->pParent;
+  }
+  return rc;
+}
+
+static int deleteCell(Rtree *, RtreeNode *, int, int);
+
+static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){
+  int rc;
+  int rc2;
+  RtreeNode *pParent = 0;
+  int iCell;
+
+  assert( pNode->nRef==1 );
+
+  /* Remove the entry in the parent cell. */
+  rc = nodeParentIndex(pRtree, pNode, &iCell);
+  if( rc==SQLITE_OK ){
+    pParent = pNode->pParent;
+    pNode->pParent = 0;
+    rc = deleteCell(pRtree, pParent, iCell, iHeight+1);
+  }
+  rc2 = nodeRelease(pRtree, pParent);
+  if( rc==SQLITE_OK ){
+    rc = rc2;
+  }
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  /* Remove the xxx_node entry. */
+  sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode);
+  sqlite3_step(pRtree->pDeleteNode);
+  if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteNode)) ){
+    return rc;
+  }
+
+  /* Remove the xxx_parent entry. */
+  sqlite3_bind_int64(pRtree->pDeleteParent, 1, pNode->iNode);
+  sqlite3_step(pRtree->pDeleteParent);
+  if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteParent)) ){
+    return rc;
+  }
+  
+  /* Remove the node from the in-memory hash table and link it into
+  ** the Rtree.pDeleted list. Its contents will be re-inserted later on.
+  */
+  nodeHashDelete(pRtree, pNode);
+  pNode->iNode = iHeight;
+  pNode->pNext = pRtree->pDeleted;
+  pNode->nRef++;
+  pRtree->pDeleted = pNode;
+
+  return SQLITE_OK;
+}
+
+static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){
+  RtreeNode *pParent = pNode->pParent;
+  int rc = SQLITE_OK; 
+  if( pParent ){
+    int ii; 
+    int nCell = NCELL(pNode);
+    RtreeCell box;                            /* Bounding box for pNode */
+    nodeGetCell(pRtree, pNode, 0, &box);
+    for(ii=1; ii<nCell; ii++){
+      RtreeCell cell;
+      nodeGetCell(pRtree, pNode, ii, &cell);
+      cellUnion(pRtree, &box, &cell);
+    }
+    box.iRowid = pNode->iNode;
+    rc = nodeParentIndex(pRtree, pNode, &ii);
+    if( rc==SQLITE_OK ){
+      nodeOverwriteCell(pRtree, pParent, &box, ii);
+      rc = fixBoundingBox(pRtree, pParent);
+    }
+  }
+  return rc;
+}
+
+/*
+** Delete the cell at index iCell of node pNode. After removing the
+** cell, adjust the r-tree data structure if required.
+*/
+static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){
+  RtreeNode *pParent;
+  int rc;
+
+  if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){
+    return rc;
+  }
+
+  /* Remove the cell from the node. This call just moves bytes around
+  ** the in-memory node image, so it cannot fail.
+  */
+  nodeDeleteCell(pRtree, pNode, iCell);
+
+  /* If the node is not the tree root and now has less than the minimum
+  ** number of cells, remove it from the tree. Otherwise, update the
+  ** cell in the parent node so that it tightly contains the updated
+  ** node.
+  */
+  pParent = pNode->pParent;
+  assert( pParent || pNode->iNode==1 );
+  if( pParent ){
+    if( NCELL(pNode)<RTREE_MINCELLS(pRtree) ){
+      rc = removeNode(pRtree, pNode, iHeight);
+    }else{
+      rc = fixBoundingBox(pRtree, pNode);
+    }
+  }
+
+  return rc;
+}
+
+static int Reinsert(
+  Rtree *pRtree, 
+  RtreeNode *pNode, 
+  RtreeCell *pCell, 
+  int iHeight
+){
+  int *aOrder;
+  int *aSpare;
+  RtreeCell *aCell;
+  RtreeDValue *aDistance;
+  int nCell;
+  RtreeDValue aCenterCoord[RTREE_MAX_DIMENSIONS];
+  int iDim;
+  int ii;
+  int rc = SQLITE_OK;
+  int n;
+
+  memset(aCenterCoord, 0, sizeof(RtreeDValue)*RTREE_MAX_DIMENSIONS);
+
+  nCell = NCELL(pNode)+1;
+  n = (nCell+1)&(~1);
+
+  /* Allocate the buffers used by this operation. The allocation is
+  ** relinquished before this function returns.
+  */
+  aCell = (RtreeCell *)sqlite3_malloc(n * (
+    sizeof(RtreeCell)     +         /* aCell array */
+    sizeof(int)           +         /* aOrder array */
+    sizeof(int)           +         /* aSpare array */
+    sizeof(RtreeDValue)             /* aDistance array */
+  ));
+  if( !aCell ){
+    return SQLITE_NOMEM;
+  }
+  aOrder    = (int *)&aCell[n];
+  aSpare    = (int *)&aOrder[n];
+  aDistance = (RtreeDValue *)&aSpare[n];
+
+  for(ii=0; ii<nCell; ii++){
+    if( ii==(nCell-1) ){
+      memcpy(&aCell[ii], pCell, sizeof(RtreeCell));
+    }else{
+      nodeGetCell(pRtree, pNode, ii, &aCell[ii]);
+    }
+    aOrder[ii] = ii;
+    for(iDim=0; iDim<pRtree->nDim; iDim++){
+      aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]);
+      aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]);
+    }
+  }
+  for(iDim=0; iDim<pRtree->nDim; iDim++){
+    aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2));
+  }
+
+  for(ii=0; ii<nCell; ii++){
+    aDistance[ii] = RTREE_ZERO;
+    for(iDim=0; iDim<pRtree->nDim; iDim++){
+      RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) - 
+                               DCOORD(aCell[ii].aCoord[iDim*2]));
+      aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]);
+    }
+  }
+
+  SortByDistance(aOrder, nCell, aDistance, aSpare);
+  nodeZero(pRtree, pNode);
+
+  for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){
+    RtreeCell *p = &aCell[aOrder[ii]];
+    nodeInsertCell(pRtree, pNode, p);
+    if( p->iRowid==pCell->iRowid ){
+      if( iHeight==0 ){
+        rc = rowidWrite(pRtree, p->iRowid, pNode->iNode);
+      }else{
+        rc = parentWrite(pRtree, p->iRowid, pNode->iNode);
+      }
+    }
+  }
+  if( rc==SQLITE_OK ){
+    rc = fixBoundingBox(pRtree, pNode);
+  }
+  for(; rc==SQLITE_OK && ii<nCell; ii++){
+    /* Find a node to store this cell in. pNode->iNode currently contains
+    ** the height of the sub-tree headed by the cell.
+    */
+    RtreeNode *pInsert;
+    RtreeCell *p = &aCell[aOrder[ii]];
+    rc = ChooseLeaf(pRtree, p, iHeight, &pInsert);
+    if( rc==SQLITE_OK ){
+      int rc2;
+      rc = rtreeInsertCell(pRtree, pInsert, p, iHeight);
+      rc2 = nodeRelease(pRtree, pInsert);
+      if( rc==SQLITE_OK ){
+        rc = rc2;
+      }
+    }
+  }
+
+  sqlite3_free(aCell);
+  return rc;
+}
+
+/*
+** Insert cell pCell into node pNode. Node pNode is the head of a 
+** subtree iHeight high (leaf nodes have iHeight==0).
+*/
+static int rtreeInsertCell(
+  Rtree *pRtree,
+  RtreeNode *pNode,
+  RtreeCell *pCell,
+  int iHeight
+){
+  int rc = SQLITE_OK;
+  if( iHeight>0 ){
+    RtreeNode *pChild = nodeHashLookup(pRtree, pCell->iRowid);
+    if( pChild ){
+      nodeRelease(pRtree, pChild->pParent);
+      nodeReference(pNode);
+      pChild->pParent = pNode;
+    }
+  }
+  if( nodeInsertCell(pRtree, pNode, pCell) ){
+    if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){
+      rc = SplitNode(pRtree, pNode, pCell, iHeight);
+    }else{
+      pRtree->iReinsertHeight = iHeight;
+      rc = Reinsert(pRtree, pNode, pCell, iHeight);
+    }
+  }else{
+    rc = AdjustTree(pRtree, pNode, pCell);
+    if( rc==SQLITE_OK ){
+      if( iHeight==0 ){
+        rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode);
+      }else{
+        rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode);
+      }
+    }
+  }
+  return rc;
+}
+
+static int reinsertNodeContent(Rtree *pRtree, RtreeNode *pNode){
+  int ii;
+  int rc = SQLITE_OK;
+  int nCell = NCELL(pNode);
+
+  for(ii=0; rc==SQLITE_OK && ii<nCell; ii++){
+    RtreeNode *pInsert;
+    RtreeCell cell;
+    nodeGetCell(pRtree, pNode, ii, &cell);
+
+    /* Find a node to store this cell in. pNode->iNode currently contains
+    ** the height of the sub-tree headed by the cell.
+    */
+    rc = ChooseLeaf(pRtree, &cell, (int)pNode->iNode, &pInsert);
+    if( rc==SQLITE_OK ){
+      int rc2;
+      rc = rtreeInsertCell(pRtree, pInsert, &cell, (int)pNode->iNode);
+      rc2 = nodeRelease(pRtree, pInsert);
+      if( rc==SQLITE_OK ){
+        rc = rc2;
+      }
+    }
+  }
+  return rc;
+}
+
+/*
+** Select a currently unused rowid for a new r-tree record.
+*/
+static int newRowid(Rtree *pRtree, i64 *piRowid){
+  int rc;
+  sqlite3_bind_null(pRtree->pWriteRowid, 1);
+  sqlite3_bind_null(pRtree->pWriteRowid, 2);
+  sqlite3_step(pRtree->pWriteRowid);
+  rc = sqlite3_reset(pRtree->pWriteRowid);
+  *piRowid = sqlite3_last_insert_rowid(pRtree->db);
+  return rc;
+}
+
+/*
+** Remove the entry with rowid=iDelete from the r-tree structure.
+*/
+static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){
+  int rc;                         /* Return code */
+  RtreeNode *pLeaf = 0;           /* Leaf node containing record iDelete */
+  int iCell;                      /* Index of iDelete cell in pLeaf */
+  RtreeNode *pRoot;               /* Root node of rtree structure */
+
+
+  /* Obtain a reference to the root node to initialize Rtree.iDepth */
+  rc = nodeAcquire(pRtree, 1, 0, &pRoot);
+
+  /* Obtain a reference to the leaf node that contains the entry 
+  ** about to be deleted. 
+  */
+  if( rc==SQLITE_OK ){
+    rc = findLeafNode(pRtree, iDelete, &pLeaf, 0);
+  }
+
+  /* Delete the cell in question from the leaf node. */
+  if( rc==SQLITE_OK ){
+    int rc2;
+    rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell);
+    if( rc==SQLITE_OK ){
+      rc = deleteCell(pRtree, pLeaf, iCell, 0);
+    }
+    rc2 = nodeRelease(pRtree, pLeaf);
+    if( rc==SQLITE_OK ){
+      rc = rc2;
+    }
+  }
+
+  /* Delete the corresponding entry in the <rtree>_rowid table. */
+  if( rc==SQLITE_OK ){
+    sqlite3_bind_int64(pRtree->pDeleteRowid, 1, iDelete);
+    sqlite3_step(pRtree->pDeleteRowid);
+    rc = sqlite3_reset(pRtree->pDeleteRowid);
+  }
+
+  /* Check if the root node now has exactly one child. If so, remove
+  ** it, schedule the contents of the child for reinsertion and 
+  ** reduce the tree height by one.
+  **
+  ** This is equivalent to copying the contents of the child into
+  ** the root node (the operation that Gutman's paper says to perform 
+  ** in this scenario).
+  */
+  if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){
+    int rc2;
+    RtreeNode *pChild;
+    i64 iChild = nodeGetRowid(pRtree, pRoot, 0);
+    rc = nodeAcquire(pRtree, iChild, pRoot, &pChild);
+    if( rc==SQLITE_OK ){
+      rc = removeNode(pRtree, pChild, pRtree->iDepth-1);
+    }
+    rc2 = nodeRelease(pRtree, pChild);
+    if( rc==SQLITE_OK ) rc = rc2;
+    if( rc==SQLITE_OK ){
+      pRtree->iDepth--;
+      writeInt16(pRoot->zData, pRtree->iDepth);
+      pRoot->isDirty = 1;
+    }
+  }
+
+  /* Re-insert the contents of any underfull nodes removed from the tree. */
+  for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){
+    if( rc==SQLITE_OK ){
+      rc = reinsertNodeContent(pRtree, pLeaf);
+    }
+    pRtree->pDeleted = pLeaf->pNext;
+    sqlite3_free(pLeaf);
+  }
+
+  /* Release the reference to the root node. */
+  if( rc==SQLITE_OK ){
+    rc = nodeRelease(pRtree, pRoot);
+  }else{
+    nodeRelease(pRtree, pRoot);
+  }
+
+  return rc;
+}
+
+/*
+** Rounding constants for float->double conversion.
+*/
+#define RNDTOWARDS  (1.0 - 1.0/8388608.0)  /* Round towards zero */
+#define RNDAWAY     (1.0 + 1.0/8388608.0)  /* Round away from zero */
+
+#if !defined(SQLITE_RTREE_INT_ONLY)
+/*
+** Convert an sqlite3_value into an RtreeValue (presumably a float)
+** while taking care to round toward negative or positive, respectively.
+*/
+static RtreeValue rtreeValueDown(sqlite3_value *v){
+  double d = sqlite3_value_double(v);
+  float f = (float)d;
+  if( f>d ){
+    f = (float)(d*(d<0 ? RNDAWAY : RNDTOWARDS));
+  }
+  return f;
+}
+static RtreeValue rtreeValueUp(sqlite3_value *v){
+  double d = sqlite3_value_double(v);
+  float f = (float)d;
+  if( f<d ){
+    f = (float)(d*(d<0 ? RNDTOWARDS : RNDAWAY));
+  }
+  return f;
+}
+#endif /* !defined(SQLITE_RTREE_INT_ONLY) */
+
+
+/*
+** The xUpdate method for rtree module virtual tables.
+*/
+static int rtreeUpdate(
+  sqlite3_vtab *pVtab, 
+  int nData, 
+  sqlite3_value **azData, 
+  sqlite_int64 *pRowid
+){
+  Rtree *pRtree = (Rtree *)pVtab;
+  int rc = SQLITE_OK;
+  RtreeCell cell;                 /* New cell to insert if nData>1 */
+  int bHaveRowid = 0;             /* Set to 1 after new rowid is determined */
+
+  rtreeReference(pRtree);
+  assert(nData>=1);
+
+  cell.iRowid = 0;  /* Used only to suppress a compiler warning */
+
+  /* Constraint handling. A write operation on an r-tree table may return
+  ** SQLITE_CONSTRAINT for two reasons:
+  **
+  **   1. A duplicate rowid value, or
+  **   2. The supplied data violates the "x2>=x1" constraint.
+  **
+  ** In the first case, if the conflict-handling mode is REPLACE, then
+  ** the conflicting row can be removed before proceeding. In the second
+  ** case, SQLITE_CONSTRAINT must be returned regardless of the
+  ** conflict-handling mode specified by the user.
+  */
+  if( nData>1 ){
+    int ii;
+
+    /* Populate the cell.aCoord[] array. The first coordinate is azData[3]. */
+    assert( nData==(pRtree->nDim*2 + 3) );
+#ifndef SQLITE_RTREE_INT_ONLY
+    if( pRtree->eCoordType==RTREE_COORD_REAL32 ){
+      for(ii=0; ii<(pRtree->nDim*2); ii+=2){
+        cell.aCoord[ii].f = rtreeValueDown(azData[ii+3]);
+        cell.aCoord[ii+1].f = rtreeValueUp(azData[ii+4]);
+        if( cell.aCoord[ii].f>cell.aCoord[ii+1].f ){
+          rc = SQLITE_CONSTRAINT;
+          goto constraint;
+        }
+      }
+    }else
+#endif
+    {
+      for(ii=0; ii<(pRtree->nDim*2); ii+=2){
+        cell.aCoord[ii].i = sqlite3_value_int(azData[ii+3]);
+        cell.aCoord[ii+1].i = sqlite3_value_int(azData[ii+4]);
+        if( cell.aCoord[ii].i>cell.aCoord[ii+1].i ){
+          rc = SQLITE_CONSTRAINT;
+          goto constraint;
+        }
+      }
+    }
+
+    /* If a rowid value was supplied, check if it is already present in 
+    ** the table. If so, the constraint has failed. */
+    if( sqlite3_value_type(azData[2])!=SQLITE_NULL ){
+      cell.iRowid = sqlite3_value_int64(azData[2]);
+      if( sqlite3_value_type(azData[0])==SQLITE_NULL
+       || sqlite3_value_int64(azData[0])!=cell.iRowid
+      ){
+        int steprc;
+        sqlite3_bind_int64(pRtree->pReadRowid, 1, cell.iRowid);
+        steprc = sqlite3_step(pRtree->pReadRowid);
+        rc = sqlite3_reset(pRtree->pReadRowid);
+        if( SQLITE_ROW==steprc ){
+          if( sqlite3_vtab_on_conflict(pRtree->db)==SQLITE_REPLACE ){
+            rc = rtreeDeleteRowid(pRtree, cell.iRowid);
+          }else{
+            rc = SQLITE_CONSTRAINT;
+            goto constraint;
+          }
+        }
+      }
+      bHaveRowid = 1;
+    }
+  }
+
+  /* If azData[0] is not an SQL NULL value, it is the rowid of a
+  ** record to delete from the r-tree table. The following block does
+  ** just that.
+  */
+  if( sqlite3_value_type(azData[0])!=SQLITE_NULL ){
+    rc = rtreeDeleteRowid(pRtree, sqlite3_value_int64(azData[0]));
+  }
+
+  /* If the azData[] array contains more than one element, elements
+  ** (azData[2]..azData[argc-1]) contain a new record to insert into
+  ** the r-tree structure.
+  */
+  if( rc==SQLITE_OK && nData>1 ){
+    /* Insert the new record into the r-tree */
+    RtreeNode *pLeaf = 0;
+
+    /* Figure out the rowid of the new row. */
+    if( bHaveRowid==0 ){
+      rc = newRowid(pRtree, &cell.iRowid);
+    }
+    *pRowid = cell.iRowid;
+
+    if( rc==SQLITE_OK ){
+      rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf);
+    }
+    if( rc==SQLITE_OK ){
+      int rc2;
+      pRtree->iReinsertHeight = -1;
+      rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0);
+      rc2 = nodeRelease(pRtree, pLeaf);
+      if( rc==SQLITE_OK ){
+        rc = rc2;
+      }
+    }
+  }
+
+constraint:
+  rtreeRelease(pRtree);
+  return rc;
+}
+
+/*
+** The xRename method for rtree module virtual tables.
+*/
+static int rtreeRename(sqlite3_vtab *pVtab, const char *zNewName){
+  Rtree *pRtree = (Rtree *)pVtab;
+  int rc = SQLITE_NOMEM;
+  char *zSql = sqlite3_mprintf(
+    "ALTER TABLE %Q.'%q_node'   RENAME TO \"%w_node\";"
+    "ALTER TABLE %Q.'%q_parent' RENAME TO \"%w_parent\";"
+    "ALTER TABLE %Q.'%q_rowid'  RENAME TO \"%w_rowid\";"
+    , pRtree->zDb, pRtree->zName, zNewName 
+    , pRtree->zDb, pRtree->zName, zNewName 
+    , pRtree->zDb, pRtree->zName, zNewName
+  );
+  if( zSql ){
+    rc = sqlite3_exec(pRtree->db, zSql, 0, 0, 0);
+    sqlite3_free(zSql);
+  }
+  return rc;
+}
+
+/*
+** This function populates the pRtree->nRowEst variable with an estimate
+** of the number of rows in the virtual table. If possible, this is based
+** on sqlite_stat1 data. Otherwise, use RTREE_DEFAULT_ROWEST.
+*/
+static int rtreeQueryStat1(sqlite3 *db, Rtree *pRtree){
+  const char *zFmt = "SELECT stat FROM %Q.sqlite_stat1 WHERE tbl = '%q_rowid'";
+  char *zSql;
+  sqlite3_stmt *p;
+  int rc;
+  i64 nRow = 0;
+
+  zSql = sqlite3_mprintf(zFmt, pRtree->zDb, pRtree->zName);
+  if( zSql==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    rc = sqlite3_prepare_v2(db, zSql, -1, &p, 0);
+    if( rc==SQLITE_OK ){
+      if( sqlite3_step(p)==SQLITE_ROW ) nRow = sqlite3_column_int64(p, 0);
+      rc = sqlite3_finalize(p);
+    }else if( rc!=SQLITE_NOMEM ){
+      rc = SQLITE_OK;
+    }
+
+    if( rc==SQLITE_OK ){
+      if( nRow==0 ){
+        pRtree->nRowEst = RTREE_DEFAULT_ROWEST;
+      }else{
+        pRtree->nRowEst = MAX(nRow, RTREE_MIN_ROWEST);
+      }
+    }
+    sqlite3_free(zSql);
+  }
+
+  return rc;
+}
+
+static sqlite3_module rtreeModule = {
+  0,                          /* iVersion */
+  rtreeCreate,                /* xCreate - create a table */
+  rtreeConnect,               /* xConnect - connect to an existing table */
+  rtreeBestIndex,             /* xBestIndex - Determine search strategy */
+  rtreeDisconnect,            /* xDisconnect - Disconnect from a table */
+  rtreeDestroy,               /* xDestroy - Drop a table */
+  rtreeOpen,                  /* xOpen - open a cursor */
+  rtreeClose,                 /* xClose - close a cursor */
+  rtreeFilter,                /* xFilter - configure scan constraints */
+  rtreeNext,                  /* xNext - advance a cursor */
+  rtreeEof,                   /* xEof */
+  rtreeColumn,                /* xColumn - read data */
+  rtreeRowid,                 /* xRowid - read data */
+  rtreeUpdate,                /* xUpdate - write data */
+  0,                          /* xBegin - begin transaction */
+  0,                          /* xSync - sync transaction */
+  0,                          /* xCommit - commit transaction */
+  0,                          /* xRollback - rollback transaction */
+  0,                          /* xFindFunction - function overloading */
+  rtreeRename,                /* xRename - rename the table */
+  0,                          /* xSavepoint */
+  0,                          /* xRelease */
+  0                           /* xRollbackTo */
+};
+
+static int rtreeSqlInit(
+  Rtree *pRtree, 
+  sqlite3 *db, 
+  const char *zDb, 
+  const char *zPrefix, 
+  int isCreate
+){
+  int rc = SQLITE_OK;
+
+  #define N_STATEMENT 9
+  static const char *azSql[N_STATEMENT] = {
+    /* Read and write the xxx_node table */
+    "SELECT data FROM '%q'.'%q_node' WHERE nodeno = :1",
+    "INSERT OR REPLACE INTO '%q'.'%q_node' VALUES(:1, :2)",
+    "DELETE FROM '%q'.'%q_node' WHERE nodeno = :1",
+
+    /* Read and write the xxx_rowid table */
+    "SELECT nodeno FROM '%q'.'%q_rowid' WHERE rowid = :1",
+    "INSERT OR REPLACE INTO '%q'.'%q_rowid' VALUES(:1, :2)",
+    "DELETE FROM '%q'.'%q_rowid' WHERE rowid = :1",
+
+    /* Read and write the xxx_parent table */
+    "SELECT parentnode FROM '%q'.'%q_parent' WHERE nodeno = :1",
+    "INSERT OR REPLACE INTO '%q'.'%q_parent' VALUES(:1, :2)",
+    "DELETE FROM '%q'.'%q_parent' WHERE nodeno = :1"
+  };
+  sqlite3_stmt **appStmt[N_STATEMENT];
+  int i;
+
+  pRtree->db = db;
+
+  if( isCreate ){
+    char *zCreate = sqlite3_mprintf(
+"CREATE TABLE \"%w\".\"%w_node\"(nodeno INTEGER PRIMARY KEY, data BLOB);"
+"CREATE TABLE \"%w\".\"%w_rowid\"(rowid INTEGER PRIMARY KEY, nodeno INTEGER);"
+"CREATE TABLE \"%w\".\"%w_parent\"(nodeno INTEGER PRIMARY KEY,"
+                                  " parentnode INTEGER);"
+"INSERT INTO '%q'.'%q_node' VALUES(1, zeroblob(%d))",
+      zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, pRtree->iNodeSize
+    );
+    if( !zCreate ){
+      return SQLITE_NOMEM;
+    }
+    rc = sqlite3_exec(db, zCreate, 0, 0, 0);
+    sqlite3_free(zCreate);
+    if( rc!=SQLITE_OK ){
+      return rc;
+    }
+  }
+
+  appStmt[0] = &pRtree->pReadNode;
+  appStmt[1] = &pRtree->pWriteNode;
+  appStmt[2] = &pRtree->pDeleteNode;
+  appStmt[3] = &pRtree->pReadRowid;
+  appStmt[4] = &pRtree->pWriteRowid;
+  appStmt[5] = &pRtree->pDeleteRowid;
+  appStmt[6] = &pRtree->pReadParent;
+  appStmt[7] = &pRtree->pWriteParent;
+  appStmt[8] = &pRtree->pDeleteParent;
+
+  rc = rtreeQueryStat1(db, pRtree);
+  for(i=0; i<N_STATEMENT && rc==SQLITE_OK; i++){
+    char *zSql = sqlite3_mprintf(azSql[i], zDb, zPrefix);
+    if( zSql ){
+      rc = sqlite3_prepare_v2(db, zSql, -1, appStmt[i], 0); 
+    }else{
+      rc = SQLITE_NOMEM;
+    }
+    sqlite3_free(zSql);
+  }
+
+  return rc;
+}
+
+/*
+** The second argument to this function contains the text of an SQL statement
+** that returns a single integer value. The statement is compiled and executed
+** using database connection db. If successful, the integer value returned
+** is written to *piVal and SQLITE_OK returned. Otherwise, an SQLite error
+** code is returned and the value of *piVal after returning is not defined.
+*/
+static int getIntFromStmt(sqlite3 *db, const char *zSql, int *piVal){
+  int rc = SQLITE_NOMEM;
+  if( zSql ){
+    sqlite3_stmt *pStmt = 0;
+    rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+    if( rc==SQLITE_OK ){
+      if( SQLITE_ROW==sqlite3_step(pStmt) ){
+        *piVal = sqlite3_column_int(pStmt, 0);
+      }
+      rc = sqlite3_finalize(pStmt);
+    }
+  }
+  return rc;
+}
+
+/*
+** This function is called from within the xConnect() or xCreate() method to
+** determine the node-size used by the rtree table being created or connected
+** to. If successful, pRtree->iNodeSize is populated and SQLITE_OK returned.
+** Otherwise, an SQLite error code is returned.
+**
+** If this function is being called as part of an xConnect(), then the rtree
+** table already exists. In this case the node-size is determined by inspecting
+** the root node of the tree.
+**
+** Otherwise, for an xCreate(), use 64 bytes less than the database page-size. 
+** This ensures that each node is stored on a single database page. If the 
+** database page-size is so large that more than RTREE_MAXCELLS entries 
+** would fit in a single node, use a smaller node-size.
+*/
+static int getNodeSize(
+  sqlite3 *db,                    /* Database handle */
+  Rtree *pRtree,                  /* Rtree handle */
+  int isCreate,                   /* True for xCreate, false for xConnect */
+  char **pzErr                    /* OUT: Error message, if any */
+){
+  int rc;
+  char *zSql;
+  if( isCreate ){
+    int iPageSize = 0;
+    zSql = sqlite3_mprintf("PRAGMA %Q.page_size", pRtree->zDb);
+    rc = getIntFromStmt(db, zSql, &iPageSize);
+    if( rc==SQLITE_OK ){
+      pRtree->iNodeSize = iPageSize-64;
+      if( (4+pRtree->nBytesPerCell*RTREE_MAXCELLS)<pRtree->iNodeSize ){
+        pRtree->iNodeSize = 4+pRtree->nBytesPerCell*RTREE_MAXCELLS;
+      }
+    }else{
+      *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db));
+    }
+  }else{
+    zSql = sqlite3_mprintf(
+        "SELECT length(data) FROM '%q'.'%q_node' WHERE nodeno = 1",
+        pRtree->zDb, pRtree->zName
+    );
+    rc = getIntFromStmt(db, zSql, &pRtree->iNodeSize);
+    if( rc!=SQLITE_OK ){
+      *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db));
+    }
+  }
+
+  sqlite3_free(zSql);
+  return rc;
+}
+
+/* 
+** This function is the implementation of both the xConnect and xCreate
+** methods of the r-tree virtual table.
+**
+**   argv[0]   -> module name
+**   argv[1]   -> database name
+**   argv[2]   -> table name
+**   argv[...] -> column names...
+*/
+static int rtreeInit(
+  sqlite3 *db,                        /* Database connection */
+  void *pAux,                         /* One of the RTREE_COORD_* constants */
+  int argc, const char *const*argv,   /* Parameters to CREATE TABLE statement */
+  sqlite3_vtab **ppVtab,              /* OUT: New virtual table */
+  char **pzErr,                       /* OUT: Error message, if any */
+  int isCreate                        /* True for xCreate, false for xConnect */
+){
+  int rc = SQLITE_OK;
+  Rtree *pRtree;
+  int nDb;              /* Length of string argv[1] */
+  int nName;            /* Length of string argv[2] */
+  int eCoordType = (pAux ? RTREE_COORD_INT32 : RTREE_COORD_REAL32);
+
+  const char *aErrMsg[] = {
+    0,                                                    /* 0 */
+    "Wrong number of columns for an rtree table",         /* 1 */
+    "Too few columns for an rtree table",                 /* 2 */
+    "Too many columns for an rtree table"                 /* 3 */
+  };
+
+  int iErr = (argc<6) ? 2 : argc>(RTREE_MAX_DIMENSIONS*2+4) ? 3 : argc%2;
+  if( aErrMsg[iErr] ){
+    *pzErr = sqlite3_mprintf("%s", aErrMsg[iErr]);
+    return SQLITE_ERROR;
+  }
+
+  sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
+
+  /* Allocate the sqlite3_vtab structure */
+  nDb = (int)strlen(argv[1]);
+  nName = (int)strlen(argv[2]);
+  pRtree = (Rtree *)sqlite3_malloc(sizeof(Rtree)+nDb+nName+2);
+  if( !pRtree ){
+    return SQLITE_NOMEM;
+  }
+  memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2);
+  pRtree->nBusy = 1;
+  pRtree->base.pModule = &rtreeModule;
+  pRtree->zDb = (char *)&pRtree[1];
+  pRtree->zName = &pRtree->zDb[nDb+1];
+  pRtree->nDim = (argc-4)/2;
+  pRtree->nBytesPerCell = 8 + pRtree->nDim*4*2;
+  pRtree->eCoordType = eCoordType;
+  memcpy(pRtree->zDb, argv[1], nDb);
+  memcpy(pRtree->zName, argv[2], nName);
+
+  /* Figure out the node size to use. */
+  rc = getNodeSize(db, pRtree, isCreate, pzErr);
+
+  /* Create/Connect to the underlying relational database schema. If
+  ** that is successful, call sqlite3_declare_vtab() to configure
+  ** the r-tree table schema.
+  */
+  if( rc==SQLITE_OK ){
+    if( (rc = rtreeSqlInit(pRtree, db, argv[1], argv[2], isCreate)) ){
+      *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db));
+    }else{
+      char *zSql = sqlite3_mprintf("CREATE TABLE x(%s", argv[3]);
+      char *zTmp;
+      int ii;
+      for(ii=4; zSql && ii<argc; ii++){
+        zTmp = zSql;
+        zSql = sqlite3_mprintf("%s, %s", zTmp, argv[ii]);
+        sqlite3_free(zTmp);
+      }
+      if( zSql ){
+        zTmp = zSql;
+        zSql = sqlite3_mprintf("%s);", zTmp);
+        sqlite3_free(zTmp);
+      }
+      if( !zSql ){
+        rc = SQLITE_NOMEM;
+      }else if( SQLITE_OK!=(rc = sqlite3_declare_vtab(db, zSql)) ){
+        *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db));
+      }
+      sqlite3_free(zSql);
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    *ppVtab = (sqlite3_vtab *)pRtree;
+  }else{
+    assert( *ppVtab==0 );
+    assert( pRtree->nBusy==1 );
+    rtreeRelease(pRtree);
+  }
+  return rc;
+}
+
+
+/*
+** Implementation of a scalar function that decodes r-tree nodes to
+** human readable strings. This can be used for debugging and analysis.
+**
+** The scalar function takes two arguments: (1) the number of dimensions
+** to the rtree (between 1 and 5, inclusive) and (2) a blob of data containing
+** an r-tree node.  For a two-dimensional r-tree structure called "rt", to
+** deserialize all nodes, a statement like:
+**
+**   SELECT rtreenode(2, data) FROM rt_node;
+**
+** The human readable string takes the form of a Tcl list with one
+** entry for each cell in the r-tree node. Each entry is itself a
+** list, containing the 8-byte rowid/pageno followed by the 
+** <num-dimension>*2 coordinates.
+*/
+static void rtreenode(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){
+  char *zText = 0;
+  RtreeNode node;
+  Rtree tree;
+  int ii;
+
+  UNUSED_PARAMETER(nArg);
+  memset(&node, 0, sizeof(RtreeNode));
+  memset(&tree, 0, sizeof(Rtree));
+  tree.nDim = sqlite3_value_int(apArg[0]);
+  tree.nBytesPerCell = 8 + 8 * tree.nDim;
+  node.zData = (u8 *)sqlite3_value_blob(apArg[1]);
+
+  for(ii=0; ii<NCELL(&node); ii++){
+    char zCell[512];
+    int nCell = 0;
+    RtreeCell cell;
+    int jj;
+
+    nodeGetCell(&tree, &node, ii, &cell);
+    sqlite3_snprintf(512-nCell,&zCell[nCell],"%lld", cell.iRowid);
+    nCell = (int)strlen(zCell);
+    for(jj=0; jj<tree.nDim*2; jj++){
+#ifndef SQLITE_RTREE_INT_ONLY
+      sqlite3_snprintf(512-nCell,&zCell[nCell], " %g",
+                       (double)cell.aCoord[jj].f);
+#else
+      sqlite3_snprintf(512-nCell,&zCell[nCell], " %d",
+                       cell.aCoord[jj].i);
+#endif
+      nCell = (int)strlen(zCell);
+    }
+
+    if( zText ){
+      char *zTextNew = sqlite3_mprintf("%s {%s}", zText, zCell);
+      sqlite3_free(zText);
+      zText = zTextNew;
+    }else{
+      zText = sqlite3_mprintf("{%s}", zCell);
+    }
+  }
+  
+  sqlite3_result_text(ctx, zText, -1, sqlite3_free);
+}
+
+/* This routine implements an SQL function that returns the "depth" parameter
+** from the front of a blob that is an r-tree node.  For example:
+**
+**     SELECT rtreedepth(data) FROM rt_node WHERE nodeno=1;
+**
+** The depth value is 0 for all nodes other than the root node, and the root
+** node always has nodeno=1, so the example above is the primary use for this
+** routine.  This routine is intended for testing and analysis only.
+*/
+static void rtreedepth(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){
+  UNUSED_PARAMETER(nArg);
+  if( sqlite3_value_type(apArg[0])!=SQLITE_BLOB 
+   || sqlite3_value_bytes(apArg[0])<2
+  ){
+    sqlite3_result_error(ctx, "Invalid argument to rtreedepth()", -1); 
+  }else{
+    u8 *zBlob = (u8 *)sqlite3_value_blob(apArg[0]);
+    sqlite3_result_int(ctx, readInt16(zBlob));
+  }
+}
+
+/*
+** Register the r-tree module with database handle db. This creates the
+** virtual table module "rtree" and the debugging/analysis scalar 
+** function "rtreenode".
+*/
+SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db){
+  const int utf8 = SQLITE_UTF8;
+  int rc;
+
+  rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0);
+  if( rc==SQLITE_OK ){
+    rc = sqlite3_create_function(db, "rtreedepth", 1, utf8, 0,rtreedepth, 0, 0);
+  }
+  if( rc==SQLITE_OK ){
+#ifdef SQLITE_RTREE_INT_ONLY
+    void *c = (void *)RTREE_COORD_INT32;
+#else
+    void *c = (void *)RTREE_COORD_REAL32;
+#endif
+    rc = sqlite3_create_module_v2(db, "rtree", &rtreeModule, c, 0);
+  }
+  if( rc==SQLITE_OK ){
+    void *c = (void *)RTREE_COORD_INT32;
+    rc = sqlite3_create_module_v2(db, "rtree_i32", &rtreeModule, c, 0);
+  }
+
+  return rc;
+}
+
+/*
+** This routine deletes the RtreeGeomCallback object that was attached
+** one of the SQL functions create by sqlite3_rtree_geometry_callback()
+** or sqlite3_rtree_query_callback().  In other words, this routine is the
+** destructor for an RtreeGeomCallback objecct.  This routine is called when
+** the corresponding SQL function is deleted.
+*/
+static void rtreeFreeCallback(void *p){
+  RtreeGeomCallback *pInfo = (RtreeGeomCallback*)p;
+  if( pInfo->xDestructor ) pInfo->xDestructor(pInfo->pContext);
+  sqlite3_free(p);
+}
+
+/*
+** Each call to sqlite3_rtree_geometry_callback() or
+** sqlite3_rtree_query_callback() creates an ordinary SQLite
+** scalar function that is implemented by this routine.
+**
+** All this function does is construct an RtreeMatchArg object that
+** contains the geometry-checking callback routines and a list of
+** parameters to this function, then return that RtreeMatchArg object
+** as a BLOB.
+**
+** The R-Tree MATCH operator will read the returned BLOB, deserialize
+** the RtreeMatchArg object, and use the RtreeMatchArg object to figure
+** out which elements of the R-Tree should be returned by the query.
+*/
+static void geomCallback(sqlite3_context *ctx, int nArg, sqlite3_value **aArg){
+  RtreeGeomCallback *pGeomCtx = (RtreeGeomCallback *)sqlite3_user_data(ctx);
+  RtreeMatchArg *pBlob;
+  int nBlob;
+
+  nBlob = sizeof(RtreeMatchArg) + (nArg-1)*sizeof(RtreeDValue);
+  pBlob = (RtreeMatchArg *)sqlite3_malloc(nBlob);
+  if( !pBlob ){
+    sqlite3_result_error_nomem(ctx);
+  }else{
+    int i;
+    pBlob->magic = RTREE_GEOMETRY_MAGIC;
+    pBlob->cb = pGeomCtx[0];
+    pBlob->nParam = nArg;
+    for(i=0; i<nArg; i++){
+#ifdef SQLITE_RTREE_INT_ONLY
+      pBlob->aParam[i] = sqlite3_value_int64(aArg[i]);
+#else
+      pBlob->aParam[i] = sqlite3_value_double(aArg[i]);
+#endif
+    }
+    sqlite3_result_blob(ctx, pBlob, nBlob, sqlite3_free);
+  }
+}
+
+/*
+** Register a new geometry function for use with the r-tree MATCH operator.
+*/
+SQLITE_API int sqlite3_rtree_geometry_callback(
+  sqlite3 *db,                  /* Register SQL function on this connection */
+  const char *zGeom,            /* Name of the new SQL function */
+  int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*), /* Callback */
+  void *pContext                /* Extra data associated with the callback */
+){
+  RtreeGeomCallback *pGeomCtx;      /* Context object for new user-function */
+
+  /* Allocate and populate the context object. */
+  pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback));
+  if( !pGeomCtx ) return SQLITE_NOMEM;
+  pGeomCtx->xGeom = xGeom;
+  pGeomCtx->xQueryFunc = 0;
+  pGeomCtx->xDestructor = 0;
+  pGeomCtx->pContext = pContext;
+  return sqlite3_create_function_v2(db, zGeom, -1, SQLITE_ANY, 
+      (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback
+  );
+}
+
+/*
+** Register a new 2nd-generation geometry function for use with the
+** r-tree MATCH operator.
+*/
+SQLITE_API int sqlite3_rtree_query_callback(
+  sqlite3 *db,                 /* Register SQL function on this connection */
+  const char *zQueryFunc,      /* Name of new SQL function */
+  int (*xQueryFunc)(sqlite3_rtree_query_info*), /* Callback */
+  void *pContext,              /* Extra data passed into the callback */
+  void (*xDestructor)(void*)   /* Destructor for the extra data */
+){
+  RtreeGeomCallback *pGeomCtx;      /* Context object for new user-function */
+
+  /* Allocate and populate the context object. */
+  pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback));
+  if( !pGeomCtx ) return SQLITE_NOMEM;
+  pGeomCtx->xGeom = 0;
+  pGeomCtx->xQueryFunc = xQueryFunc;
+  pGeomCtx->xDestructor = xDestructor;
+  pGeomCtx->pContext = pContext;
+  return sqlite3_create_function_v2(db, zQueryFunc, -1, SQLITE_ANY, 
+      (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback
+  );
+}
+
+#if !SQLITE_CORE
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+SQLITE_API int sqlite3_rtree_init(
+  sqlite3 *db,
+  char **pzErrMsg,
+  const sqlite3_api_routines *pApi
+){
+  SQLITE_EXTENSION_INIT2(pApi)
+  return sqlite3RtreeInit(db);
+}
+#endif
+
+#endif
+
+/************** End of rtree.c ***********************************************/
+/************** Begin file icu.c *********************************************/
+/*
+** 2007 May 6
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $
+**
+** This file implements an integration between the ICU library 
+** ("International Components for Unicode", an open-source library 
+** for handling unicode data) and SQLite. The integration uses 
+** ICU to provide the following to SQLite:
+**
+**   * An implementation of the SQL regexp() function (and hence REGEXP
+**     operator) using the ICU uregex_XX() APIs.
+**
+**   * Implementations of the SQL scalar upper() and lower() functions
+**     for case mapping.
+**
+**   * Integration of ICU and SQLite collation sequences.
+**
+**   * An implementation of the LIKE operator that uses ICU to 
+**     provide case-independent matching.
+*/
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
+
+/* Include ICU headers */
+#include <unicode/utypes.h>
+#include <unicode/uregex.h>
+#include <unicode/ustring.h>
+#include <unicode/ucol.h>
+
+/* #include <assert.h> */
+
+#ifndef SQLITE_CORE
+  SQLITE_EXTENSION_INIT1
+#else
+#endif
+
+/*
+** Maximum length (in bytes) of the pattern in a LIKE or GLOB
+** operator.
+*/
+#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
+# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000
+#endif
+
+/*
+** Version of sqlite3_free() that is always a function, never a macro.
+*/
+static void xFree(void *p){
+  sqlite3_free(p);
+}
+
+/*
+** Compare two UTF-8 strings for equality where the first string is
+** a "LIKE" expression. Return true (1) if they are the same and 
+** false (0) if they are different.
+*/
+static int icuLikeCompare(
+  const uint8_t *zPattern,   /* LIKE pattern */
+  const uint8_t *zString,    /* The UTF-8 string to compare against */
+  const UChar32 uEsc         /* The escape character */
+){
+  static const int MATCH_ONE = (UChar32)'_';
+  static const int MATCH_ALL = (UChar32)'%';
+
+  int iPattern = 0;       /* Current byte index in zPattern */
+  int iString = 0;        /* Current byte index in zString */
+
+  int prevEscape = 0;     /* True if the previous character was uEsc */
+
+  while( zPattern[iPattern]!=0 ){
+
+    /* Read (and consume) the next character from the input pattern. */
+    UChar32 uPattern;
+    U8_NEXT_UNSAFE(zPattern, iPattern, uPattern);
+    assert(uPattern!=0);
+
+    /* There are now 4 possibilities:
+    **
+    **     1. uPattern is an unescaped match-all character "%",
+    **     2. uPattern is an unescaped match-one character "_",
+    **     3. uPattern is an unescaped escape character, or
+    **     4. uPattern is to be handled as an ordinary character
+    */
+    if( !prevEscape && uPattern==MATCH_ALL ){
+      /* Case 1. */
+      uint8_t c;
+
+      /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
+      ** MATCH_ALL. For each MATCH_ONE, skip one character in the 
+      ** test string.
+      */
+      while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){
+        if( c==MATCH_ONE ){
+          if( zString[iString]==0 ) return 0;
+          U8_FWD_1_UNSAFE(zString, iString);
+        }
+        iPattern++;
+      }
+
+      if( zPattern[iPattern]==0 ) return 1;
+
+      while( zString[iString] ){
+        if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){
+          return 1;
+        }
+        U8_FWD_1_UNSAFE(zString, iString);
+      }
+      return 0;
+
+    }else if( !prevEscape && uPattern==MATCH_ONE ){
+      /* Case 2. */
+      if( zString[iString]==0 ) return 0;
+      U8_FWD_1_UNSAFE(zString, iString);
+
+    }else if( !prevEscape && uPattern==uEsc){
+      /* Case 3. */
+      prevEscape = 1;
+
+    }else{
+      /* Case 4. */
+      UChar32 uString;
+      U8_NEXT_UNSAFE(zString, iString, uString);
+      uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT);
+      uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT);
+      if( uString!=uPattern ){
+        return 0;
+      }
+      prevEscape = 0;
+    }
+  }
+
+  return zString[iString]==0;
+}
+
+/*
+** Implementation of the like() SQL function.  This function implements
+** the build-in LIKE operator.  The first argument to the function is the
+** pattern and the second argument is the string.  So, the SQL statements:
+**
+**       A LIKE B
+**
+** is implemented as like(B, A). If there is an escape character E, 
+**
+**       A LIKE B ESCAPE E
+**
+** is mapped to like(B, A, E).
+*/
+static void icuLikeFunc(
+  sqlite3_context *context, 
+  int argc, 
+  sqlite3_value **argv
+){
+  const unsigned char *zA = sqlite3_value_text(argv[0]);
+  const unsigned char *zB = sqlite3_value_text(argv[1]);
+  UChar32 uEsc = 0;
+
+  /* Limit the length of the LIKE or GLOB pattern to avoid problems
+  ** of deep recursion and N*N behavior in patternCompare().
+  */
+  if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){
+    sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1);
+    return;
+  }
+
+
+  if( argc==3 ){
+    /* The escape character string must consist of a single UTF-8 character.
+    ** Otherwise, return an error.
+    */
+    int nE= sqlite3_value_bytes(argv[2]);
+    const unsigned char *zE = sqlite3_value_text(argv[2]);
+    int i = 0;
+    if( zE==0 ) return;
+    U8_NEXT(zE, i, nE, uEsc);
+    if( i!=nE){
+      sqlite3_result_error(context, 
+          "ESCAPE expression must be a single character", -1);
+      return;
+    }
+  }
+
+  if( zA && zB ){
+    sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
+  }
+}
+
+/*
+** This function is called when an ICU function called from within
+** the implementation of an SQL scalar function returns an error.
+**
+** The scalar function context passed as the first argument is 
+** loaded with an error message based on the following two args.
+*/
+static void icuFunctionError(
+  sqlite3_context *pCtx,       /* SQLite scalar function context */
+  const char *zName,           /* Name of ICU function that failed */
+  UErrorCode e                 /* Error code returned by ICU function */
+){
+  char zBuf[128];
+  sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
+  zBuf[127] = '\0';
+  sqlite3_result_error(pCtx, zBuf, -1);
+}
+
+/*
+** Function to delete compiled regexp objects. Registered as
+** a destructor function with sqlite3_set_auxdata().
+*/
+static void icuRegexpDelete(void *p){
+  URegularExpression *pExpr = (URegularExpression *)p;
+  uregex_close(pExpr);
+}
+
+/*
+** Implementation of SQLite REGEXP operator. This scalar function takes
+** two arguments. The first is a regular expression pattern to compile
+** the second is a string to match against that pattern. If either 
+** argument is an SQL NULL, then NULL Is returned. Otherwise, the result
+** is 1 if the string matches the pattern, or 0 otherwise.
+**
+** SQLite maps the regexp() function to the regexp() operator such
+** that the following two are equivalent:
+**
+**     zString REGEXP zPattern
+**     regexp(zPattern, zString)
+**
+** Uses the following ICU regexp APIs:
+**
+**     uregex_open()
+**     uregex_matches()
+**     uregex_close()
+*/
+static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){
+  UErrorCode status = U_ZERO_ERROR;
+  URegularExpression *pExpr;
+  UBool res;
+  const UChar *zString = sqlite3_value_text16(apArg[1]);
+
+  (void)nArg;  /* Unused parameter */
+
+  /* If the left hand side of the regexp operator is NULL, 
+  ** then the result is also NULL. 
+  */
+  if( !zString ){
+    return;
+  }
+
+  pExpr = sqlite3_get_auxdata(p, 0);
+  if( !pExpr ){
+    const UChar *zPattern = sqlite3_value_text16(apArg[0]);
+    if( !zPattern ){
+      return;
+    }
+    pExpr = uregex_open(zPattern, -1, 0, 0, &status);
+
+    if( U_SUCCESS(status) ){
+      sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
+    }else{
+      assert(!pExpr);
+      icuFunctionError(p, "uregex_open", status);
+      return;
+    }
+  }
+
+  /* Configure the text that the regular expression operates on. */
+  uregex_setText(pExpr, zString, -1, &status);
+  if( !U_SUCCESS(status) ){
+    icuFunctionError(p, "uregex_setText", status);
+    return;
+  }
+
+  /* Attempt the match */
+  res = uregex_matches(pExpr, 0, &status);
+  if( !U_SUCCESS(status) ){
+    icuFunctionError(p, "uregex_matches", status);
+    return;
+  }
+
+  /* Set the text that the regular expression operates on to a NULL
+  ** pointer. This is not really necessary, but it is tidier than 
+  ** leaving the regular expression object configured with an invalid
+  ** pointer after this function returns.
+  */
+  uregex_setText(pExpr, 0, 0, &status);
+
+  /* Return 1 or 0. */
+  sqlite3_result_int(p, res ? 1 : 0);
+}
+
+/*
+** Implementations of scalar functions for case mapping - upper() and 
+** lower(). Function upper() converts its input to upper-case (ABC).
+** Function lower() converts to lower-case (abc).
+**
+** ICU provides two types of case mapping, "general" case mapping and
+** "language specific". Refer to ICU documentation for the differences
+** between the two.
+**
+** To utilise "general" case mapping, the upper() or lower() scalar 
+** functions are invoked with one argument:
+**
+**     upper('ABC') -> 'abc'
+**     lower('abc') -> 'ABC'
+**
+** To access ICU "language specific" case mapping, upper() or lower()
+** should be invoked with two arguments. The second argument is the name
+** of the locale to use. Passing an empty string ("") or SQL NULL value
+** as the second argument is the same as invoking the 1 argument version
+** of upper() or lower().
+**
+**     lower('I', 'en_us') -> 'i'
+**     lower('I', 'tr_tr') -> 'ı' (small dotless i)
+**
+** http://www.icu-project.org/userguide/posix.html#case_mappings
+*/
+static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
+  const UChar *zInput;
+  UChar *zOutput;
+  int nInput;
+  int nOutput;
+
+  UErrorCode status = U_ZERO_ERROR;
+  const char *zLocale = 0;
+
+  assert(nArg==1 || nArg==2);
+  if( nArg==2 ){
+    zLocale = (const char *)sqlite3_value_text(apArg[1]);
+  }
+
+  zInput = sqlite3_value_text16(apArg[0]);
+  if( !zInput ){
+    return;
+  }
+  nInput = sqlite3_value_bytes16(apArg[0]);
+
+  nOutput = nInput * 2 + 2;
+  zOutput = sqlite3_malloc(nOutput);
+  if( !zOutput ){
+    return;
+  }
+
+  if( sqlite3_user_data(p) ){
+    u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
+  }else{
+    u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
+  }
+
+  if( !U_SUCCESS(status) ){
+    icuFunctionError(p, "u_strToLower()/u_strToUpper", status);
+    return;
+  }
+
+  sqlite3_result_text16(p, zOutput, -1, xFree);
+}
+
+/*
+** Collation sequence destructor function. The pCtx argument points to
+** a UCollator structure previously allocated using ucol_open().
+*/
+static void icuCollationDel(void *pCtx){
+  UCollator *p = (UCollator *)pCtx;
+  ucol_close(p);
+}
+
+/*
+** Collation sequence comparison function. The pCtx argument points to
+** a UCollator structure previously allocated using ucol_open().
+*/
+static int icuCollationColl(
+  void *pCtx,
+  int nLeft,
+  const void *zLeft,
+  int nRight,
+  const void *zRight
+){
+  UCollationResult res;
+  UCollator *p = (UCollator *)pCtx;
+  res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
+  switch( res ){
+    case UCOL_LESS:    return -1;
+    case UCOL_GREATER: return +1;
+    case UCOL_EQUAL:   return 0;
+  }
+  assert(!"Unexpected return value from ucol_strcoll()");
+  return 0;
+}
+
+/*
+** Implementation of the scalar function icu_load_collation().
+**
+** This scalar function is used to add ICU collation based collation 
+** types to an SQLite database connection. It is intended to be called
+** as follows:
+**
+**     SELECT icu_load_collation(<locale>, <collation-name>);
+**
+** Where <locale> is a string containing an ICU locale identifier (i.e.
+** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
+** collation sequence to create.
+*/
+static void icuLoadCollation(
+  sqlite3_context *p, 
+  int nArg, 
+  sqlite3_value **apArg
+){
+  sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
+  UErrorCode status = U_ZERO_ERROR;
+  const char *zLocale;      /* Locale identifier - (eg. "jp_JP") */
+  const char *zName;        /* SQL Collation sequence name (eg. "japanese") */
+  UCollator *pUCollator;    /* ICU library collation object */
+  int rc;                   /* Return code from sqlite3_create_collation_x() */
+
+  assert(nArg==2);
+  zLocale = (const char *)sqlite3_value_text(apArg[0]);
+  zName = (const char *)sqlite3_value_text(apArg[1]);
+
+  if( !zLocale || !zName ){
+    return;
+  }
+
+  pUCollator = ucol_open(zLocale, &status);
+  if( !U_SUCCESS(status) ){
+    icuFunctionError(p, "ucol_open", status);
+    return;
+  }
+  assert(p);
+
+  rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, 
+      icuCollationColl, icuCollationDel
+  );
+  if( rc!=SQLITE_OK ){
+    ucol_close(pUCollator);
+    sqlite3_result_error(p, "Error registering collation function", -1);
+  }
+}
+
+/*
+** Register the ICU extension functions with database db.
+*/
+SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){
+  struct IcuScalar {
+    const char *zName;                        /* Function name */
+    int nArg;                                 /* Number of arguments */
+    int enc;                                  /* Optimal text encoding */
+    void *pContext;                           /* sqlite3_user_data() context */
+    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
+  } scalars[] = {
+    {"regexp", 2, SQLITE_ANY,          0, icuRegexpFunc},
+
+    {"lower",  1, SQLITE_UTF16,        0, icuCaseFunc16},
+    {"lower",  2, SQLITE_UTF16,        0, icuCaseFunc16},
+    {"upper",  1, SQLITE_UTF16, (void*)1, icuCaseFunc16},
+    {"upper",  2, SQLITE_UTF16, (void*)1, icuCaseFunc16},
+
+    {"lower",  1, SQLITE_UTF8,         0, icuCaseFunc16},
+    {"lower",  2, SQLITE_UTF8,         0, icuCaseFunc16},
+    {"upper",  1, SQLITE_UTF8,  (void*)1, icuCaseFunc16},
+    {"upper",  2, SQLITE_UTF8,  (void*)1, icuCaseFunc16},
+
+    {"like",   2, SQLITE_UTF8,         0, icuLikeFunc},
+    {"like",   3, SQLITE_UTF8,         0, icuLikeFunc},
+
+    {"icu_load_collation",  2, SQLITE_UTF8, (void*)db, icuLoadCollation},
+  };
+
+  int rc = SQLITE_OK;
+  int i;
+
+  for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){
+    struct IcuScalar *p = &scalars[i];
+    rc = sqlite3_create_function(
+        db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0
+    );
+  }
+
+  return rc;
+}
+
+#if !SQLITE_CORE
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+SQLITE_API int sqlite3_icu_init(
+  sqlite3 *db, 
+  char **pzErrMsg,
+  const sqlite3_api_routines *pApi
+){
+  SQLITE_EXTENSION_INIT2(pApi)
+  return sqlite3IcuInit(db);
+}
+#endif
+
+#endif
+
+/************** End of icu.c *************************************************/
+/************** Begin file fts3_icu.c ****************************************/
+/*
+** 2007 June 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file implements a tokenizer for fts3 based on the ICU library.
+*/
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+#ifdef SQLITE_ENABLE_ICU
+
+/* #include <assert.h> */
+/* #include <string.h> */
+
+#include <unicode/ubrk.h>
+/* #include <unicode/ucol.h> */
+/* #include <unicode/ustring.h> */
+#include <unicode/utf16.h>
+
+typedef struct IcuTokenizer IcuTokenizer;
+typedef struct IcuCursor IcuCursor;
+
+struct IcuTokenizer {
+  sqlite3_tokenizer base;
+  char *zLocale;
+};
+
+struct IcuCursor {
+  sqlite3_tokenizer_cursor base;
+
+  UBreakIterator *pIter;      /* ICU break-iterator object */
+  int nChar;                  /* Number of UChar elements in pInput */
+  UChar *aChar;               /* Copy of input using utf-16 encoding */
+  int *aOffset;               /* Offsets of each character in utf-8 input */
+
+  int nBuffer;
+  char *zBuffer;
+
+  int iToken;
+};
+
+/*
+** Create a new tokenizer instance.
+*/
+static int icuCreate(
+  int argc,                            /* Number of entries in argv[] */
+  const char * const *argv,            /* Tokenizer creation arguments */
+  sqlite3_tokenizer **ppTokenizer      /* OUT: Created tokenizer */
+){
+  IcuTokenizer *p;
+  int n = 0;
+
+  if( argc>0 ){
+    n = strlen(argv[0])+1;
+  }
+  p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
+  if( !p ){
+    return SQLITE_NOMEM;
+  }
+  memset(p, 0, sizeof(IcuTokenizer));
+
+  if( n ){
+    p->zLocale = (char *)&p[1];
+    memcpy(p->zLocale, argv[0], n);
+  }
+
+  *ppTokenizer = (sqlite3_tokenizer *)p;
+
+  return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int icuDestroy(sqlite3_tokenizer *pTokenizer){
+  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is pInput[0..nBytes-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int icuOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *zInput,                    /* Input string */
+  int nInput,                            /* Length of zInput in bytes */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
+  IcuCursor *pCsr;
+
+  const int32_t opt = U_FOLD_CASE_DEFAULT;
+  UErrorCode status = U_ZERO_ERROR;
+  int nChar;
+
+  UChar32 c;
+  int iInput = 0;
+  int iOut = 0;
+
+  *ppCursor = 0;
+
+  if( zInput==0 ){
+    nInput = 0;
+    zInput = "";
+  }else if( nInput<0 ){
+    nInput = strlen(zInput);
+  }
+  nChar = nInput+1;
+  pCsr = (IcuCursor *)sqlite3_malloc(
+      sizeof(IcuCursor) +                /* IcuCursor */
+      ((nChar+3)&~3) * sizeof(UChar) +   /* IcuCursor.aChar[] */
+      (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */
+  );
+  if( !pCsr ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCsr, 0, sizeof(IcuCursor));
+  pCsr->aChar = (UChar *)&pCsr[1];
+  pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
+
+  pCsr->aOffset[iOut] = iInput;
+  U8_NEXT(zInput, iInput, nInput, c); 
+  while( c>0 ){
+    int isError = 0;
+    c = u_foldCase(c, opt);
+    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
+    if( isError ){
+      sqlite3_free(pCsr);
+      return SQLITE_ERROR;
+    }
+    pCsr->aOffset[iOut] = iInput;
+
+    if( iInput<nInput ){
+      U8_NEXT(zInput, iInput, nInput, c);
+    }else{
+      c = 0;
+    }
+  }
+
+  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
+  if( !U_SUCCESS(status) ){
+    sqlite3_free(pCsr);
+    return SQLITE_ERROR;
+  }
+  pCsr->nChar = iOut;
+
+  ubrk_first(pCsr->pIter);
+  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to icuOpen().
+*/
+static int icuClose(sqlite3_tokenizer_cursor *pCursor){
+  IcuCursor *pCsr = (IcuCursor *)pCursor;
+  ubrk_close(pCsr->pIter);
+  sqlite3_free(pCsr->zBuffer);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** Extract the next token from a tokenization cursor.
+*/
+static int icuNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
+  const char **ppToken,               /* OUT: *ppToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  IcuCursor *pCsr = (IcuCursor *)pCursor;
+
+  int iStart = 0;
+  int iEnd = 0;
+  int nByte = 0;
+
+  while( iStart==iEnd ){
+    UChar32 c;
+
+    iStart = ubrk_current(pCsr->pIter);
+    iEnd = ubrk_next(pCsr->pIter);
+    if( iEnd==UBRK_DONE ){
+      return SQLITE_DONE;
+    }
+
+    while( iStart<iEnd ){
+      int iWhite = iStart;
+      U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
+      if( u_isspace(c) ){
+        iStart = iWhite;
+      }else{
+        break;
+      }
+    }
+    assert(iStart<=iEnd);
+  }
+
+  do {
+    UErrorCode status = U_ZERO_ERROR;
+    if( nByte ){
+      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
+      if( !zNew ){
+        return SQLITE_NOMEM;
+      }
+      pCsr->zBuffer = zNew;
+      pCsr->nBuffer = nByte;
+    }
+
+    u_strToUTF8(
+        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */
+        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */
+        &status                                  /* Output success/failure */
+    );
+  } while( nByte>pCsr->nBuffer );
+
+  *ppToken = pCsr->zBuffer;
+  *pnBytes = nByte;
+  *piStartOffset = pCsr->aOffset[iStart];
+  *piEndOffset = pCsr->aOffset[iEnd];
+  *piPosition = pCsr->iToken++;
+
+  return SQLITE_OK;
+}
+
+/*
+** The set of routines that implement the simple tokenizer
+*/
+static const sqlite3_tokenizer_module icuTokenizerModule = {
+  0,                           /* iVersion */
+  icuCreate,                   /* xCreate  */
+  icuDestroy,                  /* xCreate  */
+  icuOpen,                     /* xOpen    */
+  icuClose,                    /* xClose   */
+  icuNext,                     /* xNext    */
+};
+
+/*
+** Set *ppModule to point at the implementation of the ICU tokenizer.
+*/
+SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(
+  sqlite3_tokenizer_module const**ppModule
+){
+  *ppModule = &icuTokenizerModule;
+}
+
+#endif /* defined(SQLITE_ENABLE_ICU) */
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+
+/************** End of fts3_icu.c ********************************************/
diff --git a/src/main/jni/sqlite/sqlite3.h b/src/main/jni/sqlite/sqlite3.h
new file mode 100644
index 000000000..07406477d
--- /dev/null
+++ b/src/main/jni/sqlite/sqlite3.h
@@ -0,0 +1,7715 @@
+/*
+** 2001 September 15
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This header file defines the interface that the SQLite library
+** presents to client programs.  If a C-function, structure, datatype,
+** or constant definition does not appear in this file, then it is
+** not a published API of SQLite, is subject to change without
+** notice, and should not be referenced by programs that use SQLite.
+**
+** Some of the definitions that are in this file are marked as
+** "experimental".  Experimental interfaces are normally new
+** features recently added to SQLite.  We do not anticipate changes
+** to experimental interfaces but reserve the right to make minor changes
+** if experience from use "in the wild" suggest such changes are prudent.
+**
+** The official C-language API documentation for SQLite is derived
+** from comments in this file.  This file is the authoritative source
+** on how SQLite interfaces are suppose to operate.
+**
+** The name of this file under configuration management is "sqlite.h.in".
+** The makefile makes some minor changes to this file (such as inserting
+** the version number) and changes its name to "sqlite3.h" as
+** part of the build process.
+*/
+#ifndef _SQLITE3_H_
+#define _SQLITE3_H_
+#include <stdarg.h>     /* Needed for the definition of va_list */
+
+/*
+** Make sure we can call this stuff from C++.
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+** Add the ability to override 'extern'
+*/
+#ifndef SQLITE_EXTERN
+# define SQLITE_EXTERN extern
+#endif
+
+#ifndef SQLITE_API
+# define SQLITE_API
+#endif
+
+
+/*
+** These no-op macros are used in front of interfaces to mark those
+** interfaces as either deprecated or experimental.  New applications
+** should not use deprecated interfaces - they are supported for backwards
+** compatibility only.  Application writers should be aware that
+** experimental interfaces are subject to change in point releases.
+**
+** These macros used to resolve to various kinds of compiler magic that
+** would generate warning messages when they were used.  But that
+** compiler magic ended up generating such a flurry of bug reports
+** that we have taken it all out and gone back to using simple
+** noop macros.
+*/
+#define SQLITE_DEPRECATED
+#define SQLITE_EXPERIMENTAL
+
+/*
+** Ensure these symbols were not defined by some previous header file.
+*/
+#ifdef SQLITE_VERSION
+# undef SQLITE_VERSION
+#endif
+#ifdef SQLITE_VERSION_NUMBER
+# undef SQLITE_VERSION_NUMBER
+#endif
+
+/*
+** CAPI3REF: Compile-Time Library Version Numbers
+**
+** ^(The [SQLITE_VERSION] C preprocessor macro in the sqlite3.h header
+** evaluates to a string literal that is the SQLite version in the
+** format "X.Y.Z" where X is the major version number (always 3 for
+** SQLite3) and Y is the minor version number and Z is the release number.)^
+** ^(The [SQLITE_VERSION_NUMBER] C preprocessor macro resolves to an integer
+** with the value (X*1000000 + Y*1000 + Z) where X, Y, and Z are the same
+** numbers used in [SQLITE_VERSION].)^
+** The SQLITE_VERSION_NUMBER for any given release of SQLite will also
+** be larger than the release from which it is derived.  Either Y will
+** be held constant and Z will be incremented or else Y will be incremented
+** and Z will be reset to zero.
+**
+** Since version 3.6.18, SQLite source code has been stored in the
+** <a href="http://www.fossil-scm.org/">Fossil configuration management
+** system</a>.  ^The SQLITE_SOURCE_ID macro evaluates to
+** a string which identifies a particular check-in of SQLite
+** within its configuration management system.  ^The SQLITE_SOURCE_ID
+** string contains the date and time of the check-in (UTC) and an SHA1
+** hash of the entire source tree.
+**
+** See also: [sqlite3_libversion()],
+** [sqlite3_libversion_number()], [sqlite3_sourceid()],
+** [sqlite_version()] and [sqlite_source_id()].
+*/
+#define SQLITE_VERSION        "3.8.8.1"
+#define SQLITE_VERSION_NUMBER 3008008
+#define SQLITE_SOURCE_ID      "2015-01-20 16:51:25 f73337e3e289915a76ca96e7a05a1a8d4e890d55"
+
+/*
+** CAPI3REF: Run-Time Library Version Numbers
+** KEYWORDS: sqlite3_version, sqlite3_sourceid
+**
+** These interfaces provide the same information as the [SQLITE_VERSION],
+** [SQLITE_VERSION_NUMBER], and [SQLITE_SOURCE_ID] C preprocessor macros
+** but are associated with the library instead of the header file.  ^(Cautious
+** programmers might include assert() statements in their application to
+** verify that values returned by these interfaces match the macros in
+** the header, and thus insure that the application is
+** compiled with matching library and header files.
+**
+** <blockquote><pre>
+** assert( sqlite3_libversion_number()==SQLITE_VERSION_NUMBER );
+** assert( strcmp(sqlite3_sourceid(),SQLITE_SOURCE_ID)==0 );
+** assert( strcmp(sqlite3_libversion(),SQLITE_VERSION)==0 );
+** </pre></blockquote>)^
+**
+** ^The sqlite3_version[] string constant contains the text of [SQLITE_VERSION]
+** macro.  ^The sqlite3_libversion() function returns a pointer to the
+** to the sqlite3_version[] string constant.  The sqlite3_libversion()
+** function is provided for use in DLLs since DLL users usually do not have
+** direct access to string constants within the DLL.  ^The
+** sqlite3_libversion_number() function returns an integer equal to
+** [SQLITE_VERSION_NUMBER].  ^The sqlite3_sourceid() function returns 
+** a pointer to a string constant whose value is the same as the 
+** [SQLITE_SOURCE_ID] C preprocessor macro.
+**
+** See also: [sqlite_version()] and [sqlite_source_id()].
+*/
+SQLITE_API SQLITE_EXTERN const char sqlite3_version[];
+SQLITE_API const char *sqlite3_libversion(void);
+SQLITE_API const char *sqlite3_sourceid(void);
+SQLITE_API int sqlite3_libversion_number(void);
+
+/*
+** CAPI3REF: Run-Time Library Compilation Options Diagnostics
+**
+** ^The sqlite3_compileoption_used() function returns 0 or 1 
+** indicating whether the specified option was defined at 
+** compile time.  ^The SQLITE_ prefix may be omitted from the 
+** option name passed to sqlite3_compileoption_used().  
+**
+** ^The sqlite3_compileoption_get() function allows iterating
+** over the list of options that were defined at compile time by
+** returning the N-th compile time option string.  ^If N is out of range,
+** sqlite3_compileoption_get() returns a NULL pointer.  ^The SQLITE_ 
+** prefix is omitted from any strings returned by 
+** sqlite3_compileoption_get().
+**
+** ^Support for the diagnostic functions sqlite3_compileoption_used()
+** and sqlite3_compileoption_get() may be omitted by specifying the 
+** [SQLITE_OMIT_COMPILEOPTION_DIAGS] option at compile time.
+**
+** See also: SQL functions [sqlite_compileoption_used()] and
+** [sqlite_compileoption_get()] and the [compile_options pragma].
+*/
+#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
+SQLITE_API int sqlite3_compileoption_used(const char *zOptName);
+SQLITE_API const char *sqlite3_compileoption_get(int N);
+#endif
+
+/*
+** CAPI3REF: Test To See If The Library Is Threadsafe
+**
+** ^The sqlite3_threadsafe() function returns zero if and only if
+** SQLite was compiled with mutexing code omitted due to the
+** [SQLITE_THREADSAFE] compile-time option being set to 0.
+**
+** SQLite can be compiled with or without mutexes.  When
+** the [SQLITE_THREADSAFE] C preprocessor macro is 1 or 2, mutexes
+** are enabled and SQLite is threadsafe.  When the
+** [SQLITE_THREADSAFE] macro is 0, 
+** the mutexes are omitted.  Without the mutexes, it is not safe
+** to use SQLite concurrently from more than one thread.
+**
+** Enabling mutexes incurs a measurable performance penalty.
+** So if speed is of utmost importance, it makes sense to disable
+** the mutexes.  But for maximum safety, mutexes should be enabled.
+** ^The default behavior is for mutexes to be enabled.
+**
+** This interface can be used by an application to make sure that the
+** version of SQLite that it is linking against was compiled with
+** the desired setting of the [SQLITE_THREADSAFE] macro.
+**
+** This interface only reports on the compile-time mutex setting
+** of the [SQLITE_THREADSAFE] flag.  If SQLite is compiled with
+** SQLITE_THREADSAFE=1 or =2 then mutexes are enabled by default but
+** can be fully or partially disabled using a call to [sqlite3_config()]
+** with the verbs [SQLITE_CONFIG_SINGLETHREAD], [SQLITE_CONFIG_MULTITHREAD],
+** or [SQLITE_CONFIG_SERIALIZED].  ^(The return value of the
+** sqlite3_threadsafe() function shows only the compile-time setting of
+** thread safety, not any run-time changes to that setting made by
+** sqlite3_config(). In other words, the return value from sqlite3_threadsafe()
+** is unchanged by calls to sqlite3_config().)^
+**
+** See the [threading mode] documentation for additional information.
+*/
+SQLITE_API int sqlite3_threadsafe(void);
+
+/*
+** CAPI3REF: Database Connection Handle
+** KEYWORDS: {database connection} {database connections}
+**
+** Each open SQLite database is represented by a pointer to an instance of
+** the opaque structure named "sqlite3".  It is useful to think of an sqlite3
+** pointer as an object.  The [sqlite3_open()], [sqlite3_open16()], and
+** [sqlite3_open_v2()] interfaces are its constructors, and [sqlite3_close()]
+** and [sqlite3_close_v2()] are its destructors.  There are many other
+** interfaces (such as
+** [sqlite3_prepare_v2()], [sqlite3_create_function()], and
+** [sqlite3_busy_timeout()] to name but three) that are methods on an
+** sqlite3 object.
+*/
+typedef struct sqlite3 sqlite3;
+
+/*
+** CAPI3REF: 64-Bit Integer Types
+** KEYWORDS: sqlite_int64 sqlite_uint64
+**
+** Because there is no cross-platform way to specify 64-bit integer types
+** SQLite includes typedefs for 64-bit signed and unsigned integers.
+**
+** The sqlite3_int64 and sqlite3_uint64 are the preferred type definitions.
+** The sqlite_int64 and sqlite_uint64 types are supported for backwards
+** compatibility only.
+**
+** ^The sqlite3_int64 and sqlite_int64 types can store integer values
+** between -9223372036854775808 and +9223372036854775807 inclusive.  ^The
+** sqlite3_uint64 and sqlite_uint64 types can store integer values 
+** between 0 and +18446744073709551615 inclusive.
+*/
+#ifdef SQLITE_INT64_TYPE
+  typedef SQLITE_INT64_TYPE sqlite_int64;
+  typedef unsigned SQLITE_INT64_TYPE sqlite_uint64;
+#elif defined(_MSC_VER) || defined(__BORLANDC__)
+  typedef __int64 sqlite_int64;
+  typedef unsigned __int64 sqlite_uint64;
+#else
+  typedef long long int sqlite_int64;
+  typedef unsigned long long int sqlite_uint64;
+#endif
+typedef sqlite_int64 sqlite3_int64;
+typedef sqlite_uint64 sqlite3_uint64;
+
+/*
+** If compiling for a processor that lacks floating point support,
+** substitute integer for floating-point.
+*/
+#ifdef SQLITE_OMIT_FLOATING_POINT
+# define double sqlite3_int64
+#endif
+
+/*
+** CAPI3REF: Closing A Database Connection
+**
+** ^The sqlite3_close() and sqlite3_close_v2() routines are destructors
+** for the [sqlite3] object.
+** ^Calls to sqlite3_close() and sqlite3_close_v2() return [SQLITE_OK] if
+** the [sqlite3] object is successfully destroyed and all associated
+** resources are deallocated.
+**
+** ^If the database connection is associated with unfinalized prepared
+** statements or unfinished sqlite3_backup objects then sqlite3_close()
+** will leave the database connection open and return [SQLITE_BUSY].
+** ^If sqlite3_close_v2() is called with unfinalized prepared statements
+** and/or unfinished sqlite3_backups, then the database connection becomes
+** an unusable "zombie" which will automatically be deallocated when the
+** last prepared statement is finalized or the last sqlite3_backup is
+** finished.  The sqlite3_close_v2() interface is intended for use with
+** host languages that are garbage collected, and where the order in which
+** destructors are called is arbitrary.
+**
+** Applications should [sqlite3_finalize | finalize] all [prepared statements],
+** [sqlite3_blob_close | close] all [BLOB handles], and 
+** [sqlite3_backup_finish | finish] all [sqlite3_backup] objects associated
+** with the [sqlite3] object prior to attempting to close the object.  ^If
+** sqlite3_close_v2() is called on a [database connection] that still has
+** outstanding [prepared statements], [BLOB handles], and/or
+** [sqlite3_backup] objects then it returns [SQLITE_OK] and the deallocation
+** of resources is deferred until all [prepared statements], [BLOB handles],
+** and [sqlite3_backup] objects are also destroyed.
+**
+** ^If an [sqlite3] object is destroyed while a transaction is open,
+** the transaction is automatically rolled back.
+**
+** The C parameter to [sqlite3_close(C)] and [sqlite3_close_v2(C)]
+** must be either a NULL
+** pointer or an [sqlite3] object pointer obtained
+** from [sqlite3_open()], [sqlite3_open16()], or
+** [sqlite3_open_v2()], and not previously closed.
+** ^Calling sqlite3_close() or sqlite3_close_v2() with a NULL pointer
+** argument is a harmless no-op.
+*/
+SQLITE_API int sqlite3_close(sqlite3*);
+SQLITE_API int sqlite3_close_v2(sqlite3*);
+
+/*
+** The type for a callback function.
+** This is legacy and deprecated.  It is included for historical
+** compatibility and is not documented.
+*/
+typedef int (*sqlite3_callback)(void*,int,char**, char**);
+
+/*
+** CAPI3REF: One-Step Query Execution Interface
+**
+** The sqlite3_exec() interface is a convenience wrapper around
+** [sqlite3_prepare_v2()], [sqlite3_step()], and [sqlite3_finalize()],
+** that allows an application to run multiple statements of SQL
+** without having to use a lot of C code. 
+**
+** ^The sqlite3_exec() interface runs zero or more UTF-8 encoded,
+** semicolon-separate SQL statements passed into its 2nd argument,
+** in the context of the [database connection] passed in as its 1st
+** argument.  ^If the callback function of the 3rd argument to
+** sqlite3_exec() is not NULL, then it is invoked for each result row
+** coming out of the evaluated SQL statements.  ^The 4th argument to
+** sqlite3_exec() is relayed through to the 1st argument of each
+** callback invocation.  ^If the callback pointer to sqlite3_exec()
+** is NULL, then no callback is ever invoked and result rows are
+** ignored.
+**
+** ^If an error occurs while evaluating the SQL statements passed into
+** sqlite3_exec(), then execution of the current statement stops and
+** subsequent statements are skipped.  ^If the 5th parameter to sqlite3_exec()
+** is not NULL then any error message is written into memory obtained
+** from [sqlite3_malloc()] and passed back through the 5th parameter.
+** To avoid memory leaks, the application should invoke [sqlite3_free()]
+** on error message strings returned through the 5th parameter of
+** of sqlite3_exec() after the error message string is no longer needed.
+** ^If the 5th parameter to sqlite3_exec() is not NULL and no errors
+** occur, then sqlite3_exec() sets the pointer in its 5th parameter to
+** NULL before returning.
+**
+** ^If an sqlite3_exec() callback returns non-zero, the sqlite3_exec()
+** routine returns SQLITE_ABORT without invoking the callback again and
+** without running any subsequent SQL statements.
+**
+** ^The 2nd argument to the sqlite3_exec() callback function is the
+** number of columns in the result.  ^The 3rd argument to the sqlite3_exec()
+** callback is an array of pointers to strings obtained as if from
+** [sqlite3_column_text()], one for each column.  ^If an element of a
+** result row is NULL then the corresponding string pointer for the
+** sqlite3_exec() callback is a NULL pointer.  ^The 4th argument to the
+** sqlite3_exec() callback is an array of pointers to strings where each
+** entry represents the name of corresponding result column as obtained
+** from [sqlite3_column_name()].
+**
+** ^If the 2nd parameter to sqlite3_exec() is a NULL pointer, a pointer
+** to an empty string, or a pointer that contains only whitespace and/or 
+** SQL comments, then no SQL statements are evaluated and the database
+** is not changed.
+**
+** Restrictions:
+**
+** <ul>
+** <li> The application must insure that the 1st parameter to sqlite3_exec()
+**      is a valid and open [database connection].
+** <li> The application must not close the [database connection] specified by
+**      the 1st parameter to sqlite3_exec() while sqlite3_exec() is running.
+** <li> The application must not modify the SQL statement text passed into
+**      the 2nd parameter of sqlite3_exec() while sqlite3_exec() is running.
+** </ul>
+*/
+SQLITE_API int sqlite3_exec(
+  sqlite3*,                                  /* An open database */
+  const char *sql,                           /* SQL to be evaluated */
+  int (*callback)(void*,int,char**,char**),  /* Callback function */
+  void *,                                    /* 1st argument to callback */
+  char **errmsg                              /* Error msg written here */
+);
+
+/*
+** CAPI3REF: Result Codes
+** KEYWORDS: {result code definitions}
+**
+** Many SQLite functions return an integer result code from the set shown
+** here in order to indicate success or failure.
+**
+** New error codes may be added in future versions of SQLite.
+**
+** See also: [extended result code definitions]
+*/
+#define SQLITE_OK           0   /* Successful result */
+/* beginning-of-error-codes */
+#define SQLITE_ERROR        1   /* SQL error or missing database */
+#define SQLITE_INTERNAL     2   /* Internal logic error in SQLite */
+#define SQLITE_PERM         3   /* Access permission denied */
+#define SQLITE_ABORT        4   /* Callback routine requested an abort */
+#define SQLITE_BUSY         5   /* The database file is locked */
+#define SQLITE_LOCKED       6   /* A table in the database is locked */
+#define SQLITE_NOMEM        7   /* A malloc() failed */
+#define SQLITE_READONLY     8   /* Attempt to write a readonly database */
+#define SQLITE_INTERRUPT    9   /* Operation terminated by sqlite3_interrupt()*/
+#define SQLITE_IOERR       10   /* Some kind of disk I/O error occurred */
+#define SQLITE_CORRUPT     11   /* The database disk image is malformed */
+#define SQLITE_NOTFOUND    12   /* Unknown opcode in sqlite3_file_control() */
+#define SQLITE_FULL        13   /* Insertion failed because database is full */
+#define SQLITE_CANTOPEN    14   /* Unable to open the database file */
+#define SQLITE_PROTOCOL    15   /* Database lock protocol error */
+#define SQLITE_EMPTY       16   /* Database is empty */
+#define SQLITE_SCHEMA      17   /* The database schema changed */
+#define SQLITE_TOOBIG      18   /* String or BLOB exceeds size limit */
+#define SQLITE_CONSTRAINT  19   /* Abort due to constraint violation */
+#define SQLITE_MISMATCH    20   /* Data type mismatch */
+#define SQLITE_MISUSE      21   /* Library used incorrectly */
+#define SQLITE_NOLFS       22   /* Uses OS features not supported on host */
+#define SQLITE_AUTH        23   /* Authorization denied */
+#define SQLITE_FORMAT      24   /* Auxiliary database format error */
+#define SQLITE_RANGE       25   /* 2nd parameter to sqlite3_bind out of range */
+#define SQLITE_NOTADB      26   /* File opened that is not a database file */
+#define SQLITE_NOTICE      27   /* Notifications from sqlite3_log() */
+#define SQLITE_WARNING     28   /* Warnings from sqlite3_log() */
+#define SQLITE_ROW         100  /* sqlite3_step() has another row ready */
+#define SQLITE_DONE        101  /* sqlite3_step() has finished executing */
+/* end-of-error-codes */
+
+/*
+** CAPI3REF: Extended Result Codes
+** KEYWORDS: {extended result code definitions}
+**
+** In its default configuration, SQLite API routines return one of 30 integer
+** [result codes].  However, experience has shown that many of
+** these result codes are too coarse-grained.  They do not provide as
+** much information about problems as programmers might like.  In an effort to
+** address this, newer versions of SQLite (version 3.3.8 and later) include
+** support for additional result codes that provide more detailed information
+** about errors. These [extended result codes] are enabled or disabled
+** on a per database connection basis using the
+** [sqlite3_extended_result_codes()] API.  Or, the extended code for
+** the most recent error can be obtained using
+** [sqlite3_extended_errcode()].
+*/
+#define SQLITE_IOERR_READ              (SQLITE_IOERR | (1<<8))
+#define SQLITE_IOERR_SHORT_READ        (SQLITE_IOERR | (2<<8))
+#define SQLITE_IOERR_WRITE             (SQLITE_IOERR | (3<<8))
+#define SQLITE_IOERR_FSYNC             (SQLITE_IOERR | (4<<8))
+#define SQLITE_IOERR_DIR_FSYNC         (SQLITE_IOERR | (5<<8))
+#define SQLITE_IOERR_TRUNCATE          (SQLITE_IOERR | (6<<8))
+#define SQLITE_IOERR_FSTAT             (SQLITE_IOERR | (7<<8))
+#define SQLITE_IOERR_UNLOCK            (SQLITE_IOERR | (8<<8))
+#define SQLITE_IOERR_RDLOCK            (SQLITE_IOERR | (9<<8))
+#define SQLITE_IOERR_DELETE            (SQLITE_IOERR | (10<<8))
+#define SQLITE_IOERR_BLOCKED           (SQLITE_IOERR | (11<<8))
+#define SQLITE_IOERR_NOMEM             (SQLITE_IOERR | (12<<8))
+#define SQLITE_IOERR_ACCESS            (SQLITE_IOERR | (13<<8))
+#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
+#define SQLITE_IOERR_LOCK              (SQLITE_IOERR | (15<<8))
+#define SQLITE_IOERR_CLOSE             (SQLITE_IOERR | (16<<8))
+#define SQLITE_IOERR_DIR_CLOSE         (SQLITE_IOERR | (17<<8))
+#define SQLITE_IOERR_SHMOPEN           (SQLITE_IOERR | (18<<8))
+#define SQLITE_IOERR_SHMSIZE           (SQLITE_IOERR | (19<<8))
+#define SQLITE_IOERR_SHMLOCK           (SQLITE_IOERR | (20<<8))
+#define SQLITE_IOERR_SHMMAP            (SQLITE_IOERR | (21<<8))
+#define SQLITE_IOERR_SEEK              (SQLITE_IOERR | (22<<8))
+#define SQLITE_IOERR_DELETE_NOENT      (SQLITE_IOERR | (23<<8))
+#define SQLITE_IOERR_MMAP              (SQLITE_IOERR | (24<<8))
+#define SQLITE_IOERR_GETTEMPPATH       (SQLITE_IOERR | (25<<8))
+#define SQLITE_IOERR_CONVPATH          (SQLITE_IOERR | (26<<8))
+#define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
+#define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
+#define SQLITE_BUSY_SNAPSHOT           (SQLITE_BUSY   |  (2<<8))
+#define SQLITE_CANTOPEN_NOTEMPDIR      (SQLITE_CANTOPEN | (1<<8))
+#define SQLITE_CANTOPEN_ISDIR          (SQLITE_CANTOPEN | (2<<8))
+#define SQLITE_CANTOPEN_FULLPATH       (SQLITE_CANTOPEN | (3<<8))
+#define SQLITE_CANTOPEN_CONVPATH       (SQLITE_CANTOPEN | (4<<8))
+#define SQLITE_CORRUPT_VTAB            (SQLITE_CORRUPT | (1<<8))
+#define SQLITE_READONLY_RECOVERY       (SQLITE_READONLY | (1<<8))
+#define SQLITE_READONLY_CANTLOCK       (SQLITE_READONLY | (2<<8))
+#define SQLITE_READONLY_ROLLBACK       (SQLITE_READONLY | (3<<8))
+#define SQLITE_READONLY_DBMOVED        (SQLITE_READONLY | (4<<8))
+#define SQLITE_ABORT_ROLLBACK          (SQLITE_ABORT | (2<<8))
+#define SQLITE_CONSTRAINT_CHECK        (SQLITE_CONSTRAINT | (1<<8))
+#define SQLITE_CONSTRAINT_COMMITHOOK   (SQLITE_CONSTRAINT | (2<<8))
+#define SQLITE_CONSTRAINT_FOREIGNKEY   (SQLITE_CONSTRAINT | (3<<8))
+#define SQLITE_CONSTRAINT_FUNCTION     (SQLITE_CONSTRAINT | (4<<8))
+#define SQLITE_CONSTRAINT_NOTNULL      (SQLITE_CONSTRAINT | (5<<8))
+#define SQLITE_CONSTRAINT_PRIMARYKEY   (SQLITE_CONSTRAINT | (6<<8))
+#define SQLITE_CONSTRAINT_TRIGGER      (SQLITE_CONSTRAINT | (7<<8))
+#define SQLITE_CONSTRAINT_UNIQUE       (SQLITE_CONSTRAINT | (8<<8))
+#define SQLITE_CONSTRAINT_VTAB         (SQLITE_CONSTRAINT | (9<<8))
+#define SQLITE_CONSTRAINT_ROWID        (SQLITE_CONSTRAINT |(10<<8))
+#define SQLITE_NOTICE_RECOVER_WAL      (SQLITE_NOTICE | (1<<8))
+#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8))
+#define SQLITE_WARNING_AUTOINDEX       (SQLITE_WARNING | (1<<8))
+#define SQLITE_AUTH_USER               (SQLITE_AUTH | (1<<8))
+
+/*
+** CAPI3REF: Flags For File Open Operations
+**
+** These bit values are intended for use in the
+** 3rd parameter to the [sqlite3_open_v2()] interface and
+** in the 4th parameter to the [sqlite3_vfs.xOpen] method.
+*/
+#define SQLITE_OPEN_READONLY         0x00000001  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_READWRITE        0x00000002  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_CREATE           0x00000004  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_DELETEONCLOSE    0x00000008  /* VFS only */
+#define SQLITE_OPEN_EXCLUSIVE        0x00000010  /* VFS only */
+#define SQLITE_OPEN_AUTOPROXY        0x00000020  /* VFS only */
+#define SQLITE_OPEN_URI              0x00000040  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_MEMORY           0x00000080  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_MAIN_DB          0x00000100  /* VFS only */
+#define SQLITE_OPEN_TEMP_DB          0x00000200  /* VFS only */
+#define SQLITE_OPEN_TRANSIENT_DB     0x00000400  /* VFS only */
+#define SQLITE_OPEN_MAIN_JOURNAL     0x00000800  /* VFS only */
+#define SQLITE_OPEN_TEMP_JOURNAL     0x00001000  /* VFS only */
+#define SQLITE_OPEN_SUBJOURNAL       0x00002000  /* VFS only */
+#define SQLITE_OPEN_MASTER_JOURNAL   0x00004000  /* VFS only */
+#define SQLITE_OPEN_NOMUTEX          0x00008000  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_FULLMUTEX        0x00010000  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_SHAREDCACHE      0x00020000  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_PRIVATECACHE     0x00040000  /* Ok for sqlite3_open_v2() */
+#define SQLITE_OPEN_WAL              0x00080000  /* VFS only */
+
+/* Reserved:                         0x00F00000 */
+
+/*
+** CAPI3REF: Device Characteristics
+**
+** The xDeviceCharacteristics method of the [sqlite3_io_methods]
+** object returns an integer which is a vector of these
+** bit values expressing I/O characteristics of the mass storage
+** device that holds the file that the [sqlite3_io_methods]
+** refers to.
+**
+** The SQLITE_IOCAP_ATOMIC property means that all writes of
+** any size are atomic.  The SQLITE_IOCAP_ATOMICnnn values
+** mean that writes of blocks that are nnn bytes in size and
+** are aligned to an address which is an integer multiple of
+** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
+** that when data is appended to a file, the data is appended
+** first then the size of the file is extended, never the other
+** way around.  The SQLITE_IOCAP_SEQUENTIAL property means that
+** information is written to disk in the same order as calls
+** to xWrite().  The SQLITE_IOCAP_POWERSAFE_OVERWRITE property means that
+** after reboot following a crash or power loss, the only bytes in a
+** file that were written at the application level might have changed
+** and that adjacent bytes, even bytes within the same sector are
+** guaranteed to be unchanged.  The SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN
+** flag indicate that a file cannot be deleted when open.  The
+** SQLITE_IOCAP_IMMUTABLE flag indicates that the file is on
+** read-only media and cannot be changed even by processes with
+** elevated privileges.
+*/
+#define SQLITE_IOCAP_ATOMIC                 0x00000001
+#define SQLITE_IOCAP_ATOMIC512              0x00000002
+#define SQLITE_IOCAP_ATOMIC1K               0x00000004
+#define SQLITE_IOCAP_ATOMIC2K               0x00000008
+#define SQLITE_IOCAP_ATOMIC4K               0x00000010
+#define SQLITE_IOCAP_ATOMIC8K               0x00000020
+#define SQLITE_IOCAP_ATOMIC16K              0x00000040
+#define SQLITE_IOCAP_ATOMIC32K              0x00000080
+#define SQLITE_IOCAP_ATOMIC64K              0x00000100
+#define SQLITE_IOCAP_SAFE_APPEND            0x00000200
+#define SQLITE_IOCAP_SEQUENTIAL             0x00000400
+#define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN  0x00000800
+#define SQLITE_IOCAP_POWERSAFE_OVERWRITE    0x00001000
+#define SQLITE_IOCAP_IMMUTABLE              0x00002000
+
+/*
+** CAPI3REF: File Locking Levels
+**
+** SQLite uses one of these integer values as the second
+** argument to calls it makes to the xLock() and xUnlock() methods
+** of an [sqlite3_io_methods] object.
+*/
+#define SQLITE_LOCK_NONE          0
+#define SQLITE_LOCK_SHARED        1
+#define SQLITE_LOCK_RESERVED      2
+#define SQLITE_LOCK_PENDING       3
+#define SQLITE_LOCK_EXCLUSIVE     4
+
+/*
+** CAPI3REF: Synchronization Type Flags
+**
+** When SQLite invokes the xSync() method of an
+** [sqlite3_io_methods] object it uses a combination of
+** these integer values as the second argument.
+**
+** When the SQLITE_SYNC_DATAONLY flag is used, it means that the
+** sync operation only needs to flush data to mass storage.  Inode
+** information need not be flushed. If the lower four bits of the flag
+** equal SQLITE_SYNC_NORMAL, that means to use normal fsync() semantics.
+** If the lower four bits equal SQLITE_SYNC_FULL, that means
+** to use Mac OS X style fullsync instead of fsync().
+**
+** Do not confuse the SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags
+** with the [PRAGMA synchronous]=NORMAL and [PRAGMA synchronous]=FULL
+** settings.  The [synchronous pragma] determines when calls to the
+** xSync VFS method occur and applies uniformly across all platforms.
+** The SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags determine how
+** energetic or rigorous or forceful the sync operations are and
+** only make a difference on Mac OSX for the default SQLite code.
+** (Third-party VFS implementations might also make the distinction
+** between SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL, but among the
+** operating systems natively supported by SQLite, only Mac OSX
+** cares about the difference.)
+*/
+#define SQLITE_SYNC_NORMAL        0x00002
+#define SQLITE_SYNC_FULL          0x00003
+#define SQLITE_SYNC_DATAONLY      0x00010
+
+/*
+** CAPI3REF: OS Interface Open File Handle
+**
+** An [sqlite3_file] object represents an open file in the 
+** [sqlite3_vfs | OS interface layer].  Individual OS interface
+** implementations will
+** want to subclass this object by appending additional fields
+** for their own use.  The pMethods entry is a pointer to an
+** [sqlite3_io_methods] object that defines methods for performing
+** I/O operations on the open file.
+*/
+typedef struct sqlite3_file sqlite3_file;
+struct sqlite3_file {
+  const struct sqlite3_io_methods *pMethods;  /* Methods for an open file */
+};
+
+/*
+** CAPI3REF: OS Interface File Virtual Methods Object
+**
+** Every file opened by the [sqlite3_vfs.xOpen] method populates an
+** [sqlite3_file] object (or, more commonly, a subclass of the
+** [sqlite3_file] object) with a pointer to an instance of this object.
+** This object defines the methods used to perform various operations
+** against the open file represented by the [sqlite3_file] object.
+**
+** If the [sqlite3_vfs.xOpen] method sets the sqlite3_file.pMethods element 
+** to a non-NULL pointer, then the sqlite3_io_methods.xClose method
+** may be invoked even if the [sqlite3_vfs.xOpen] reported that it failed.  The
+** only way to prevent a call to xClose following a failed [sqlite3_vfs.xOpen]
+** is for the [sqlite3_vfs.xOpen] to set the sqlite3_file.pMethods element
+** to NULL.
+**
+** The flags argument to xSync may be one of [SQLITE_SYNC_NORMAL] or
+** [SQLITE_SYNC_FULL].  The first choice is the normal fsync().
+** The second choice is a Mac OS X style fullsync.  The [SQLITE_SYNC_DATAONLY]
+** flag may be ORed in to indicate that only the data of the file
+** and not its inode needs to be synced.
+**
+** The integer values to xLock() and xUnlock() are one of
+** <ul>
+** <li> [SQLITE_LOCK_NONE],
+** <li> [SQLITE_LOCK_SHARED],
+** <li> [SQLITE_LOCK_RESERVED],
+** <li> [SQLITE_LOCK_PENDING], or
+** <li> [SQLITE_LOCK_EXCLUSIVE].
+** </ul>
+** xLock() increases the lock. xUnlock() decreases the lock.
+** The xCheckReservedLock() method checks whether any database connection,
+** either in this process or in some other process, is holding a RESERVED,
+** PENDING, or EXCLUSIVE lock on the file.  It returns true
+** if such a lock exists and false otherwise.
+**
+** The xFileControl() method is a generic interface that allows custom
+** VFS implementations to directly control an open file using the
+** [sqlite3_file_control()] interface.  The second "op" argument is an
+** integer opcode.  The third argument is a generic pointer intended to
+** point to a structure that may contain arguments or space in which to
+** write return values.  Potential uses for xFileControl() might be
+** functions to enable blocking locks with timeouts, to change the
+** locking strategy (for example to use dot-file locks), to inquire
+** about the status of a lock, or to break stale locks.  The SQLite
+** core reserves all opcodes less than 100 for its own use.
+** A [file control opcodes | list of opcodes] less than 100 is available.
+** Applications that define a custom xFileControl method should use opcodes
+** greater than 100 to avoid conflicts.  VFS implementations should
+** return [SQLITE_NOTFOUND] for file control opcodes that they do not
+** recognize.
+**
+** The xSectorSize() method returns the sector size of the
+** device that underlies the file.  The sector size is the
+** minimum write that can be performed without disturbing
+** other bytes in the file.  The xDeviceCharacteristics()
+** method returns a bit vector describing behaviors of the
+** underlying device:
+**
+** <ul>
+** <li> [SQLITE_IOCAP_ATOMIC]
+** <li> [SQLITE_IOCAP_ATOMIC512]
+** <li> [SQLITE_IOCAP_ATOMIC1K]
+** <li> [SQLITE_IOCAP_ATOMIC2K]
+** <li> [SQLITE_IOCAP_ATOMIC4K]
+** <li> [SQLITE_IOCAP_ATOMIC8K]
+** <li> [SQLITE_IOCAP_ATOMIC16K]
+** <li> [SQLITE_IOCAP_ATOMIC32K]
+** <li> [SQLITE_IOCAP_ATOMIC64K]
+** <li> [SQLITE_IOCAP_SAFE_APPEND]
+** <li> [SQLITE_IOCAP_SEQUENTIAL]
+** </ul>
+**
+** The SQLITE_IOCAP_ATOMIC property means that all writes of
+** any size are atomic.  The SQLITE_IOCAP_ATOMICnnn values
+** mean that writes of blocks that are nnn bytes in size and
+** are aligned to an address which is an integer multiple of
+** nnn are atomic.  The SQLITE_IOCAP_SAFE_APPEND value means
+** that when data is appended to a file, the data is appended
+** first then the size of the file is extended, never the other
+** way around.  The SQLITE_IOCAP_SEQUENTIAL property means that
+** information is written to disk in the same order as calls
+** to xWrite().
+**
+** If xRead() returns SQLITE_IOERR_SHORT_READ it must also fill
+** in the unread portions of the buffer with zeros.  A VFS that
+** fails to zero-fill short reads might seem to work.  However,
+** failure to zero-fill short reads will eventually lead to
+** database corruption.
+*/
+typedef struct sqlite3_io_methods sqlite3_io_methods;
+struct sqlite3_io_methods {
+  int iVersion;
+  int (*xClose)(sqlite3_file*);
+  int (*xRead)(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst);
+  int (*xWrite)(sqlite3_file*, const void*, int iAmt, sqlite3_int64 iOfst);
+  int (*xTruncate)(sqlite3_file*, sqlite3_int64 size);
+  int (*xSync)(sqlite3_file*, int flags);
+  int (*xFileSize)(sqlite3_file*, sqlite3_int64 *pSize);
+  int (*xLock)(sqlite3_file*, int);
+  int (*xUnlock)(sqlite3_file*, int);
+  int (*xCheckReservedLock)(sqlite3_file*, int *pResOut);
+  int (*xFileControl)(sqlite3_file*, int op, void *pArg);
+  int (*xSectorSize)(sqlite3_file*);
+  int (*xDeviceCharacteristics)(sqlite3_file*);
+  /* Methods above are valid for version 1 */
+  int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**);
+  int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
+  void (*xShmBarrier)(sqlite3_file*);
+  int (*xShmUnmap)(sqlite3_file*, int deleteFlag);
+  /* Methods above are valid for version 2 */
+  int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp);
+  int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p);
+  /* Methods above are valid for version 3 */
+  /* Additional methods may be added in future releases */
+};
+
+/*
+** CAPI3REF: Standard File Control Opcodes
+** KEYWORDS: {file control opcodes} {file control opcode}
+**
+** These integer constants are opcodes for the xFileControl method
+** of the [sqlite3_io_methods] object and for the [sqlite3_file_control()]
+** interface.
+**
+** The [SQLITE_FCNTL_LOCKSTATE] opcode is used for debugging.  This
+** opcode causes the xFileControl method to write the current state of
+** the lock (one of [SQLITE_LOCK_NONE], [SQLITE_LOCK_SHARED],
+** [SQLITE_LOCK_RESERVED], [SQLITE_LOCK_PENDING], or [SQLITE_LOCK_EXCLUSIVE])
+** into an integer that the pArg argument points to. This capability
+** is used during testing and only needs to be supported when SQLITE_TEST
+** is defined.
+** <ul>
+** <li>[[SQLITE_FCNTL_SIZE_HINT]]
+** The [SQLITE_FCNTL_SIZE_HINT] opcode is used by SQLite to give the VFS
+** layer a hint of how large the database file will grow to be during the
+** current transaction.  This hint is not guaranteed to be accurate but it
+** is often close.  The underlying VFS might choose to preallocate database
+** file space based on this hint in order to help writes to the database
+** file run faster.
+**
+** <li>[[SQLITE_FCNTL_CHUNK_SIZE]]
+** The [SQLITE_FCNTL_CHUNK_SIZE] opcode is used to request that the VFS
+** extends and truncates the database file in chunks of a size specified
+** by the user. The fourth argument to [sqlite3_file_control()] should 
+** point to an integer (type int) containing the new chunk-size to use
+** for the nominated database. Allocating database file space in large
+** chunks (say 1MB at a time), may reduce file-system fragmentation and
+** improve performance on some systems.
+**
+** <li>[[SQLITE_FCNTL_FILE_POINTER]]
+** The [SQLITE_FCNTL_FILE_POINTER] opcode is used to obtain a pointer
+** to the [sqlite3_file] object associated with a particular database
+** connection.  See the [sqlite3_file_control()] documentation for
+** additional information.
+**
+** <li>[[SQLITE_FCNTL_SYNC_OMITTED]]
+** No longer in use.
+**
+** <li>[[SQLITE_FCNTL_SYNC]]
+** The [SQLITE_FCNTL_SYNC] opcode is generated internally by SQLite and
+** sent to the VFS immediately before the xSync method is invoked on a
+** database file descriptor. Or, if the xSync method is not invoked 
+** because the user has configured SQLite with 
+** [PRAGMA synchronous | PRAGMA synchronous=OFF] it is invoked in place 
+** of the xSync method. In most cases, the pointer argument passed with
+** this file-control is NULL. However, if the database file is being synced
+** as part of a multi-database commit, the argument points to a nul-terminated
+** string containing the transactions master-journal file name. VFSes that 
+** do not need this signal should silently ignore this opcode. Applications 
+** should not call [sqlite3_file_control()] with this opcode as doing so may 
+** disrupt the operation of the specialized VFSes that do require it.  
+**
+** <li>[[SQLITE_FCNTL_COMMIT_PHASETWO]]
+** The [SQLITE_FCNTL_COMMIT_PHASETWO] opcode is generated internally by SQLite
+** and sent to the VFS after a transaction has been committed immediately
+** but before the database is unlocked. VFSes that do not need this signal
+** should silently ignore this opcode. Applications should not call
+** [sqlite3_file_control()] with this opcode as doing so may disrupt the 
+** operation of the specialized VFSes that do require it.  
+**
+** <li>[[SQLITE_FCNTL_WIN32_AV_RETRY]]
+** ^The [SQLITE_FCNTL_WIN32_AV_RETRY] opcode is used to configure automatic
+** retry counts and intervals for certain disk I/O operations for the
+** windows [VFS] in order to provide robustness in the presence of
+** anti-virus programs.  By default, the windows VFS will retry file read,
+** file write, and file delete operations up to 10 times, with a delay
+** of 25 milliseconds before the first retry and with the delay increasing
+** by an additional 25 milliseconds with each subsequent retry.  This
+** opcode allows these two values (10 retries and 25 milliseconds of delay)
+** to be adjusted.  The values are changed for all database connections
+** within the same process.  The argument is a pointer to an array of two
+** integers where the first integer i the new retry count and the second
+** integer is the delay.  If either integer is negative, then the setting
+** is not changed but instead the prior value of that setting is written
+** into the array entry, allowing the current retry settings to be
+** interrogated.  The zDbName parameter is ignored.
+**
+** <li>[[SQLITE_FCNTL_PERSIST_WAL]]
+** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the
+** persistent [WAL | Write Ahead Log] setting.  By default, the auxiliary
+** write ahead log and shared memory files used for transaction control
+** are automatically deleted when the latest connection to the database
+** closes.  Setting persistent WAL mode causes those files to persist after
+** close.  Persisting the files is useful when other processes that do not
+** have write permission on the directory containing the database file want
+** to read the database file, as the WAL and shared memory files must exist
+** in order for the database to be readable.  The fourth parameter to
+** [sqlite3_file_control()] for this opcode should be a pointer to an integer.
+** That integer is 0 to disable persistent WAL mode or 1 to enable persistent
+** WAL mode.  If the integer is -1, then it is overwritten with the current
+** WAL persistence setting.
+**
+** <li>[[SQLITE_FCNTL_POWERSAFE_OVERWRITE]]
+** ^The [SQLITE_FCNTL_POWERSAFE_OVERWRITE] opcode is used to set or query the
+** persistent "powersafe-overwrite" or "PSOW" setting.  The PSOW setting
+** determines the [SQLITE_IOCAP_POWERSAFE_OVERWRITE] bit of the
+** xDeviceCharacteristics methods. The fourth parameter to
+** [sqlite3_file_control()] for this opcode should be a pointer to an integer.
+** That integer is 0 to disable zero-damage mode or 1 to enable zero-damage
+** mode.  If the integer is -1, then it is overwritten with the current
+** zero-damage mode setting.
+**
+** <li>[[SQLITE_FCNTL_OVERWRITE]]
+** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening
+** a write transaction to indicate that, unless it is rolled back for some
+** reason, the entire database file will be overwritten by the current 
+** transaction. This is used by VACUUM operations.
+**
+** <li>[[SQLITE_FCNTL_VFSNAME]]
+** ^The [SQLITE_FCNTL_VFSNAME] opcode can be used to obtain the names of
+** all [VFSes] in the VFS stack.  The names are of all VFS shims and the
+** final bottom-level VFS are written into memory obtained from 
+** [sqlite3_malloc()] and the result is stored in the char* variable
+** that the fourth parameter of [sqlite3_file_control()] points to.
+** The caller is responsible for freeing the memory when done.  As with
+** all file-control actions, there is no guarantee that this will actually
+** do anything.  Callers should initialize the char* variable to a NULL
+** pointer in case this file-control is not implemented.  This file-control
+** is intended for diagnostic use only.
+**
+** <li>[[SQLITE_FCNTL_PRAGMA]]
+** ^Whenever a [PRAGMA] statement is parsed, an [SQLITE_FCNTL_PRAGMA] 
+** file control is sent to the open [sqlite3_file] object corresponding
+** to the database file to which the pragma statement refers. ^The argument
+** to the [SQLITE_FCNTL_PRAGMA] file control is an array of
+** pointers to strings (char**) in which the second element of the array
+** is the name of the pragma and the third element is the argument to the
+** pragma or NULL if the pragma has no argument.  ^The handler for an
+** [SQLITE_FCNTL_PRAGMA] file control can optionally make the first element
+** of the char** argument point to a string obtained from [sqlite3_mprintf()]
+** or the equivalent and that string will become the result of the pragma or
+** the error message if the pragma fails. ^If the
+** [SQLITE_FCNTL_PRAGMA] file control returns [SQLITE_NOTFOUND], then normal 
+** [PRAGMA] processing continues.  ^If the [SQLITE_FCNTL_PRAGMA]
+** file control returns [SQLITE_OK], then the parser assumes that the
+** VFS has handled the PRAGMA itself and the parser generates a no-op
+** prepared statement.  ^If the [SQLITE_FCNTL_PRAGMA] file control returns
+** any result code other than [SQLITE_OK] or [SQLITE_NOTFOUND], that means
+** that the VFS encountered an error while handling the [PRAGMA] and the
+** compilation of the PRAGMA fails with an error.  ^The [SQLITE_FCNTL_PRAGMA]
+** file control occurs at the beginning of pragma statement analysis and so
+** it is able to override built-in [PRAGMA] statements.
+**
+** <li>[[SQLITE_FCNTL_BUSYHANDLER]]
+** ^The [SQLITE_FCNTL_BUSYHANDLER]
+** file-control may be invoked by SQLite on the database file handle
+** shortly after it is opened in order to provide a custom VFS with access
+** to the connections busy-handler callback. The argument is of type (void **)
+** - an array of two (void *) values. The first (void *) actually points
+** to a function of type (int (*)(void *)). In order to invoke the connections
+** busy-handler, this function should be invoked with the second (void *) in
+** the array as the only argument. If it returns non-zero, then the operation
+** should be retried. If it returns zero, the custom VFS should abandon the
+** current operation.
+**
+** <li>[[SQLITE_FCNTL_TEMPFILENAME]]
+** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control
+** to have SQLite generate a
+** temporary filename using the same algorithm that is followed to generate
+** temporary filenames for TEMP tables and other internal uses.  The
+** argument should be a char** which will be filled with the filename
+** written into memory obtained from [sqlite3_malloc()].  The caller should
+** invoke [sqlite3_free()] on the result to avoid a memory leak.
+**
+** <li>[[SQLITE_FCNTL_MMAP_SIZE]]
+** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the
+** maximum number of bytes that will be used for memory-mapped I/O.
+** The argument is a pointer to a value of type sqlite3_int64 that
+** is an advisory maximum number of bytes in the file to memory map.  The
+** pointer is overwritten with the old value.  The limit is not changed if
+** the value originally pointed to is negative, and so the current limit 
+** can be queried by passing in a pointer to a negative number.  This
+** file-control is used internally to implement [PRAGMA mmap_size].
+**
+** <li>[[SQLITE_FCNTL_TRACE]]
+** The [SQLITE_FCNTL_TRACE] file control provides advisory information
+** to the VFS about what the higher layers of the SQLite stack are doing.
+** This file control is used by some VFS activity tracing [shims].
+** The argument is a zero-terminated string.  Higher layers in the
+** SQLite stack may generate instances of this file control if
+** the [SQLITE_USE_FCNTL_TRACE] compile-time option is enabled.
+**
+** <li>[[SQLITE_FCNTL_HAS_MOVED]]
+** The [SQLITE_FCNTL_HAS_MOVED] file control interprets its argument as a
+** pointer to an integer and it writes a boolean into that integer depending
+** on whether or not the file has been renamed, moved, or deleted since it
+** was first opened.
+**
+** <li>[[SQLITE_FCNTL_WIN32_SET_HANDLE]]
+** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging.  This
+** opcode causes the xFileControl method to swap the file handle with the one
+** pointed to by the pArg argument.  This capability is used during testing
+** and only needs to be supported when SQLITE_TEST is defined.
+**
+** </ul>
+*/
+#define SQLITE_FCNTL_LOCKSTATE               1
+#define SQLITE_GET_LOCKPROXYFILE             2
+#define SQLITE_SET_LOCKPROXYFILE             3
+#define SQLITE_LAST_ERRNO                    4
+#define SQLITE_FCNTL_SIZE_HINT               5
+#define SQLITE_FCNTL_CHUNK_SIZE              6
+#define SQLITE_FCNTL_FILE_POINTER            7
+#define SQLITE_FCNTL_SYNC_OMITTED            8
+#define SQLITE_FCNTL_WIN32_AV_RETRY          9
+#define SQLITE_FCNTL_PERSIST_WAL            10
+#define SQLITE_FCNTL_OVERWRITE              11
+#define SQLITE_FCNTL_VFSNAME                12
+#define SQLITE_FCNTL_POWERSAFE_OVERWRITE    13
+#define SQLITE_FCNTL_PRAGMA                 14
+#define SQLITE_FCNTL_BUSYHANDLER            15
+#define SQLITE_FCNTL_TEMPFILENAME           16
+#define SQLITE_FCNTL_MMAP_SIZE              18
+#define SQLITE_FCNTL_TRACE                  19
+#define SQLITE_FCNTL_HAS_MOVED              20
+#define SQLITE_FCNTL_SYNC                   21
+#define SQLITE_FCNTL_COMMIT_PHASETWO        22
+#define SQLITE_FCNTL_WIN32_SET_HANDLE       23
+
+/*
+** CAPI3REF: Mutex Handle
+**
+** The mutex module within SQLite defines [sqlite3_mutex] to be an
+** abstract type for a mutex object.  The SQLite core never looks
+** at the internal representation of an [sqlite3_mutex].  It only
+** deals with pointers to the [sqlite3_mutex] object.
+**
+** Mutexes are created using [sqlite3_mutex_alloc()].
+*/
+typedef struct sqlite3_mutex sqlite3_mutex;
+
+/*
+** CAPI3REF: OS Interface Object
+**
+** An instance of the sqlite3_vfs object defines the interface between
+** the SQLite core and the underlying operating system.  The "vfs"
+** in the name of the object stands for "virtual file system".  See
+** the [VFS | VFS documentation] for further information.
+**
+** The value of the iVersion field is initially 1 but may be larger in
+** future versions of SQLite.  Additional fields may be appended to this
+** object when the iVersion value is increased.  Note that the structure
+** of the sqlite3_vfs object changes in the transaction between
+** SQLite version 3.5.9 and 3.6.0 and yet the iVersion field was not
+** modified.
+**
+** The szOsFile field is the size of the subclassed [sqlite3_file]
+** structure used by this VFS.  mxPathname is the maximum length of
+** a pathname in this VFS.
+**
+** Registered sqlite3_vfs objects are kept on a linked list formed by
+** the pNext pointer.  The [sqlite3_vfs_register()]
+** and [sqlite3_vfs_unregister()] interfaces manage this list
+** in a thread-safe way.  The [sqlite3_vfs_find()] interface
+** searches the list.  Neither the application code nor the VFS
+** implementation should use the pNext pointer.
+**
+** The pNext field is the only field in the sqlite3_vfs
+** structure that SQLite will ever modify.  SQLite will only access
+** or modify this field while holding a particular static mutex.
+** The application should never modify anything within the sqlite3_vfs
+** object once the object has been registered.
+**
+** The zName field holds the name of the VFS module.  The name must
+** be unique across all VFS modules.
+**
+** [[sqlite3_vfs.xOpen]]
+** ^SQLite guarantees that the zFilename parameter to xOpen
+** is either a NULL pointer or string obtained
+** from xFullPathname() with an optional suffix added.
+** ^If a suffix is added to the zFilename parameter, it will
+** consist of a single "-" character followed by no more than
+** 11 alphanumeric and/or "-" characters.
+** ^SQLite further guarantees that
+** the string will be valid and unchanged until xClose() is
+** called. Because of the previous sentence,
+** the [sqlite3_file] can safely store a pointer to the
+** filename if it needs to remember the filename for some reason.
+** If the zFilename parameter to xOpen is a NULL pointer then xOpen
+** must invent its own temporary name for the file.  ^Whenever the 
+** xFilename parameter is NULL it will also be the case that the
+** flags parameter will include [SQLITE_OPEN_DELETEONCLOSE].
+**
+** The flags argument to xOpen() includes all bits set in
+** the flags argument to [sqlite3_open_v2()].  Or if [sqlite3_open()]
+** or [sqlite3_open16()] is used, then flags includes at least
+** [SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]. 
+** If xOpen() opens a file read-only then it sets *pOutFlags to
+** include [SQLITE_OPEN_READONLY].  Other bits in *pOutFlags may be set.
+**
+** ^(SQLite will also add one of the following flags to the xOpen()
+** call, depending on the object being opened:
+**
+** <ul>
+** <li>  [SQLITE_OPEN_MAIN_DB]
+** <li>  [SQLITE_OPEN_MAIN_JOURNAL]
+** <li>  [SQLITE_OPEN_TEMP_DB]
+** <li>  [SQLITE_OPEN_TEMP_JOURNAL]
+** <li>  [SQLITE_OPEN_TRANSIENT_DB]
+** <li>  [SQLITE_OPEN_SUBJOURNAL]
+** <li>  [SQLITE_OPEN_MASTER_JOURNAL]
+** <li>  [SQLITE_OPEN_WAL]
+** </ul>)^
+**
+** The file I/O implementation can use the object type flags to
+** change the way it deals with files.  For example, an application
+** that does not care about crash recovery or rollback might make
+** the open of a journal file a no-op.  Writes to this journal would
+** also be no-ops, and any attempt to read the journal would return
+** SQLITE_IOERR.  Or the implementation might recognize that a database
+** file will be doing page-aligned sector reads and writes in a random
+** order and set up its I/O subsystem accordingly.
+**
+** SQLite might also add one of the following flags to the xOpen method:
+**
+** <ul>
+** <li> [SQLITE_OPEN_DELETEONCLOSE]
+** <li> [SQLITE_OPEN_EXCLUSIVE]
+** </ul>
+**
+** The [SQLITE_OPEN_DELETEONCLOSE] flag means the file should be
+** deleted when it is closed.  ^The [SQLITE_OPEN_DELETEONCLOSE]
+** will be set for TEMP databases and their journals, transient
+** databases, and subjournals.
+**
+** ^The [SQLITE_OPEN_EXCLUSIVE] flag is always used in conjunction
+** with the [SQLITE_OPEN_CREATE] flag, which are both directly
+** analogous to the O_EXCL and O_CREAT flags of the POSIX open()
+** API.  The SQLITE_OPEN_EXCLUSIVE flag, when paired with the 
+** SQLITE_OPEN_CREATE, is used to indicate that file should always
+** be created, and that it is an error if it already exists.
+** It is <i>not</i> used to indicate the file should be opened 
+** for exclusive access.
+**
+** ^At least szOsFile bytes of memory are allocated by SQLite
+** to hold the  [sqlite3_file] structure passed as the third
+** argument to xOpen.  The xOpen method does not have to
+** allocate the structure; it should just fill it in.  Note that
+** the xOpen method must set the sqlite3_file.pMethods to either
+** a valid [sqlite3_io_methods] object or to NULL.  xOpen must do
+** this even if the open fails.  SQLite expects that the sqlite3_file.pMethods
+** element will be valid after xOpen returns regardless of the success
+** or failure of the xOpen call.
+**
+** [[sqlite3_vfs.xAccess]]
+** ^The flags argument to xAccess() may be [SQLITE_ACCESS_EXISTS]
+** to test for the existence of a file, or [SQLITE_ACCESS_READWRITE] to
+** test whether a file is readable and writable, or [SQLITE_ACCESS_READ]
+** to test whether a file is at least readable.   The file can be a
+** directory.
+**
+** ^SQLite will always allocate at least mxPathname+1 bytes for the
+** output buffer xFullPathname.  The exact size of the output buffer
+** is also passed as a parameter to both  methods. If the output buffer
+** is not large enough, [SQLITE_CANTOPEN] should be returned. Since this is
+** handled as a fatal error by SQLite, vfs implementations should endeavor
+** to prevent this by setting mxPathname to a sufficiently large value.
+**
+** The xRandomness(), xSleep(), xCurrentTime(), and xCurrentTimeInt64()
+** interfaces are not strictly a part of the filesystem, but they are
+** included in the VFS structure for completeness.
+** The xRandomness() function attempts to return nBytes bytes
+** of good-quality randomness into zOut.  The return value is
+** the actual number of bytes of randomness obtained.
+** The xSleep() method causes the calling thread to sleep for at
+** least the number of microseconds given.  ^The xCurrentTime()
+** method returns a Julian Day Number for the current date and time as
+** a floating point value.
+** ^The xCurrentTimeInt64() method returns, as an integer, the Julian
+** Day Number multiplied by 86400000 (the number of milliseconds in 
+** a 24-hour day).  
+** ^SQLite will use the xCurrentTimeInt64() method to get the current
+** date and time if that method is available (if iVersion is 2 or 
+** greater and the function pointer is not NULL) and will fall back
+** to xCurrentTime() if xCurrentTimeInt64() is unavailable.
+**
+** ^The xSetSystemCall(), xGetSystemCall(), and xNestSystemCall() interfaces
+** are not used by the SQLite core.  These optional interfaces are provided
+** by some VFSes to facilitate testing of the VFS code. By overriding 
+** system calls with functions under its control, a test program can
+** simulate faults and error conditions that would otherwise be difficult
+** or impossible to induce.  The set of system calls that can be overridden
+** varies from one VFS to another, and from one version of the same VFS to the
+** next.  Applications that use these interfaces must be prepared for any
+** or all of these interfaces to be NULL or for their behavior to change
+** from one release to the next.  Applications must not attempt to access
+** any of these methods if the iVersion of the VFS is less than 3.
+*/
+typedef struct sqlite3_vfs sqlite3_vfs;
+typedef void (*sqlite3_syscall_ptr)(void);
+struct sqlite3_vfs {
+  int iVersion;            /* Structure version number (currently 3) */
+  int szOsFile;            /* Size of subclassed sqlite3_file */
+  int mxPathname;          /* Maximum file pathname length */
+  sqlite3_vfs *pNext;      /* Next registered VFS */
+  const char *zName;       /* Name of this virtual file system */
+  void *pAppData;          /* Pointer to application-specific data */
+  int (*xOpen)(sqlite3_vfs*, const char *zName, sqlite3_file*,
+               int flags, int *pOutFlags);
+  int (*xDelete)(sqlite3_vfs*, const char *zName, int syncDir);
+  int (*xAccess)(sqlite3_vfs*, const char *zName, int flags, int *pResOut);
+  int (*xFullPathname)(sqlite3_vfs*, const char *zName, int nOut, char *zOut);
+  void *(*xDlOpen)(sqlite3_vfs*, const char *zFilename);
+  void (*xDlError)(sqlite3_vfs*, int nByte, char *zErrMsg);
+  void (*(*xDlSym)(sqlite3_vfs*,void*, const char *zSymbol))(void);
+  void (*xDlClose)(sqlite3_vfs*, void*);
+  int (*xRandomness)(sqlite3_vfs*, int nByte, char *zOut);
+  int (*xSleep)(sqlite3_vfs*, int microseconds);
+  int (*xCurrentTime)(sqlite3_vfs*, double*);
+  int (*xGetLastError)(sqlite3_vfs*, int, char *);
+  /*
+  ** The methods above are in version 1 of the sqlite_vfs object
+  ** definition.  Those that follow are added in version 2 or later
+  */
+  int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*);
+  /*
+  ** The methods above are in versions 1 and 2 of the sqlite_vfs object.
+  ** Those below are for version 3 and greater.
+  */
+  int (*xSetSystemCall)(sqlite3_vfs*, const char *zName, sqlite3_syscall_ptr);
+  sqlite3_syscall_ptr (*xGetSystemCall)(sqlite3_vfs*, const char *zName);
+  const char *(*xNextSystemCall)(sqlite3_vfs*, const char *zName);
+  /*
+  ** The methods above are in versions 1 through 3 of the sqlite_vfs object.
+  ** New fields may be appended in figure versions.  The iVersion
+  ** value will increment whenever this happens. 
+  */
+};
+
+/*
+** CAPI3REF: Flags for the xAccess VFS method
+**
+** These integer constants can be used as the third parameter to
+** the xAccess method of an [sqlite3_vfs] object.  They determine
+** what kind of permissions the xAccess method is looking for.
+** With SQLITE_ACCESS_EXISTS, the xAccess method
+** simply checks whether the file exists.
+** With SQLITE_ACCESS_READWRITE, the xAccess method
+** checks whether the named directory is both readable and writable
+** (in other words, if files can be added, removed, and renamed within
+** the directory).
+** The SQLITE_ACCESS_READWRITE constant is currently used only by the
+** [temp_store_directory pragma], though this could change in a future
+** release of SQLite.
+** With SQLITE_ACCESS_READ, the xAccess method
+** checks whether the file is readable.  The SQLITE_ACCESS_READ constant is
+** currently unused, though it might be used in a future release of
+** SQLite.
+*/
+#define SQLITE_ACCESS_EXISTS    0
+#define SQLITE_ACCESS_READWRITE 1   /* Used by PRAGMA temp_store_directory */
+#define SQLITE_ACCESS_READ      2   /* Unused */
+
+/*
+** CAPI3REF: Flags for the xShmLock VFS method
+**
+** These integer constants define the various locking operations
+** allowed by the xShmLock method of [sqlite3_io_methods].  The
+** following are the only legal combinations of flags to the
+** xShmLock method:
+**
+** <ul>
+** <li>  SQLITE_SHM_LOCK | SQLITE_SHM_SHARED
+** <li>  SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE
+** <li>  SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED
+** <li>  SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE
+** </ul>
+**
+** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as
+** was given on the corresponding lock.  
+**
+** The xShmLock method can transition between unlocked and SHARED or
+** between unlocked and EXCLUSIVE.  It cannot transition between SHARED
+** and EXCLUSIVE.
+*/
+#define SQLITE_SHM_UNLOCK       1
+#define SQLITE_SHM_LOCK         2
+#define SQLITE_SHM_SHARED       4
+#define SQLITE_SHM_EXCLUSIVE    8
+
+/*
+** CAPI3REF: Maximum xShmLock index
+**
+** The xShmLock method on [sqlite3_io_methods] may use values
+** between 0 and this upper bound as its "offset" argument.
+** The SQLite core will never attempt to acquire or release a
+** lock outside of this range
+*/
+#define SQLITE_SHM_NLOCK        8
+
+
+/*
+** CAPI3REF: Initialize The SQLite Library
+**
+** ^The sqlite3_initialize() routine initializes the
+** SQLite library.  ^The sqlite3_shutdown() routine
+** deallocates any resources that were allocated by sqlite3_initialize().
+** These routines are designed to aid in process initialization and
+** shutdown on embedded systems.  Workstation applications using
+** SQLite normally do not need to invoke either of these routines.
+**
+** A call to sqlite3_initialize() is an "effective" call if it is
+** the first time sqlite3_initialize() is invoked during the lifetime of
+** the process, or if it is the first time sqlite3_initialize() is invoked
+** following a call to sqlite3_shutdown().  ^(Only an effective call
+** of sqlite3_initialize() does any initialization.  All other calls
+** are harmless no-ops.)^
+**
+** A call to sqlite3_shutdown() is an "effective" call if it is the first
+** call to sqlite3_shutdown() since the last sqlite3_initialize().  ^(Only
+** an effective call to sqlite3_shutdown() does any deinitialization.
+** All other valid calls to sqlite3_shutdown() are harmless no-ops.)^
+**
+** The sqlite3_initialize() interface is threadsafe, but sqlite3_shutdown()
+** is not.  The sqlite3_shutdown() interface must only be called from a
+** single thread.  All open [database connections] must be closed and all
+** other SQLite resources must be deallocated prior to invoking
+** sqlite3_shutdown().
+**
+** Among other things, ^sqlite3_initialize() will invoke
+** sqlite3_os_init().  Similarly, ^sqlite3_shutdown()
+** will invoke sqlite3_os_end().
+**
+** ^The sqlite3_initialize() routine returns [SQLITE_OK] on success.
+** ^If for some reason, sqlite3_initialize() is unable to initialize
+** the library (perhaps it is unable to allocate a needed resource such
+** as a mutex) it returns an [error code] other than [SQLITE_OK].
+**
+** ^The sqlite3_initialize() routine is called internally by many other
+** SQLite interfaces so that an application usually does not need to
+** invoke sqlite3_initialize() directly.  For example, [sqlite3_open()]
+** calls sqlite3_initialize() so the SQLite library will be automatically
+** initialized when [sqlite3_open()] is called if it has not be initialized
+** already.  ^However, if SQLite is compiled with the [SQLITE_OMIT_AUTOINIT]
+** compile-time option, then the automatic calls to sqlite3_initialize()
+** are omitted and the application must call sqlite3_initialize() directly
+** prior to using any other SQLite interface.  For maximum portability,
+** it is recommended that applications always invoke sqlite3_initialize()
+** directly prior to using any other SQLite interface.  Future releases
+** of SQLite may require this.  In other words, the behavior exhibited
+** when SQLite is compiled with [SQLITE_OMIT_AUTOINIT] might become the
+** default behavior in some future release of SQLite.
+**
+** The sqlite3_os_init() routine does operating-system specific
+** initialization of the SQLite library.  The sqlite3_os_end()
+** routine undoes the effect of sqlite3_os_init().  Typical tasks
+** performed by these routines include allocation or deallocation
+** of static resources, initialization of global variables,
+** setting up a default [sqlite3_vfs] module, or setting up
+** a default configuration using [sqlite3_config()].
+**
+** The application should never invoke either sqlite3_os_init()
+** or sqlite3_os_end() directly.  The application should only invoke
+** sqlite3_initialize() and sqlite3_shutdown().  The sqlite3_os_init()
+** interface is called automatically by sqlite3_initialize() and
+** sqlite3_os_end() is called by sqlite3_shutdown().  Appropriate
+** implementations for sqlite3_os_init() and sqlite3_os_end()
+** are built into SQLite when it is compiled for Unix, Windows, or OS/2.
+** When [custom builds | built for other platforms]
+** (using the [SQLITE_OS_OTHER=1] compile-time
+** option) the application must supply a suitable implementation for
+** sqlite3_os_init() and sqlite3_os_end().  An application-supplied
+** implementation of sqlite3_os_init() or sqlite3_os_end()
+** must return [SQLITE_OK] on success and some other [error code] upon
+** failure.
+*/
+SQLITE_API int sqlite3_initialize(void);
+SQLITE_API int sqlite3_shutdown(void);
+SQLITE_API int sqlite3_os_init(void);
+SQLITE_API int sqlite3_os_end(void);
+
+/*
+** CAPI3REF: Configuring The SQLite Library
+**
+** The sqlite3_config() interface is used to make global configuration
+** changes to SQLite in order to tune SQLite to the specific needs of
+** the application.  The default configuration is recommended for most
+** applications and so this routine is usually not necessary.  It is
+** provided to support rare applications with unusual needs.
+**
+** The sqlite3_config() interface is not threadsafe.  The application
+** must insure that no other SQLite interfaces are invoked by other
+** threads while sqlite3_config() is running.  Furthermore, sqlite3_config()
+** may only be invoked prior to library initialization using
+** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
+** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
+** [sqlite3_shutdown()] then it will return SQLITE_MISUSE.
+** Note, however, that ^sqlite3_config() can be called as part of the
+** implementation of an application-defined [sqlite3_os_init()].
+**
+** The first argument to sqlite3_config() is an integer
+** [configuration option] that determines
+** what property of SQLite is to be configured.  Subsequent arguments
+** vary depending on the [configuration option]
+** in the first argument.
+**
+** ^When a configuration option is set, sqlite3_config() returns [SQLITE_OK].
+** ^If the option is unknown or SQLite is unable to set the option
+** then this routine returns a non-zero [error code].
+*/
+SQLITE_API int sqlite3_config(int, ...);
+
+/*
+** CAPI3REF: Configure database connections
+**
+** The sqlite3_db_config() interface is used to make configuration
+** changes to a [database connection].  The interface is similar to
+** [sqlite3_config()] except that the changes apply to a single
+** [database connection] (specified in the first argument).
+**
+** The second argument to sqlite3_db_config(D,V,...)  is the
+** [SQLITE_DBCONFIG_LOOKASIDE | configuration verb] - an integer code 
+** that indicates what aspect of the [database connection] is being configured.
+** Subsequent arguments vary depending on the configuration verb.
+**
+** ^Calls to sqlite3_db_config() return SQLITE_OK if and only if
+** the call is considered successful.
+*/
+SQLITE_API int sqlite3_db_config(sqlite3*, int op, ...);
+
+/*
+** CAPI3REF: Memory Allocation Routines
+**
+** An instance of this object defines the interface between SQLite
+** and low-level memory allocation routines.
+**
+** This object is used in only one place in the SQLite interface.
+** A pointer to an instance of this object is the argument to
+** [sqlite3_config()] when the configuration option is
+** [SQLITE_CONFIG_MALLOC] or [SQLITE_CONFIG_GETMALLOC].  
+** By creating an instance of this object
+** and passing it to [sqlite3_config]([SQLITE_CONFIG_MALLOC])
+** during configuration, an application can specify an alternative
+** memory allocation subsystem for SQLite to use for all of its
+** dynamic memory needs.
+**
+** Note that SQLite comes with several [built-in memory allocators]
+** that are perfectly adequate for the overwhelming majority of applications
+** and that this object is only useful to a tiny minority of applications
+** with specialized memory allocation requirements.  This object is
+** also used during testing of SQLite in order to specify an alternative
+** memory allocator that simulates memory out-of-memory conditions in
+** order to verify that SQLite recovers gracefully from such
+** conditions.
+**
+** The xMalloc, xRealloc, and xFree methods must work like the
+** malloc(), realloc() and free() functions from the standard C library.
+** ^SQLite guarantees that the second argument to
+** xRealloc is always a value returned by a prior call to xRoundup.
+**
+** xSize should return the allocated size of a memory allocation
+** previously obtained from xMalloc or xRealloc.  The allocated size
+** is always at least as big as the requested size but may be larger.
+**
+** The xRoundup method returns what would be the allocated size of
+** a memory allocation given a particular requested size.  Most memory
+** allocators round up memory allocations at least to the next multiple
+** of 8.  Some allocators round up to a larger multiple or to a power of 2.
+** Every memory allocation request coming in through [sqlite3_malloc()]
+** or [sqlite3_realloc()] first calls xRoundup.  If xRoundup returns 0, 
+** that causes the corresponding memory allocation to fail.
+**
+** The xInit method initializes the memory allocator.  For example,
+** it might allocate any require mutexes or initialize internal data
+** structures.  The xShutdown method is invoked (indirectly) by
+** [sqlite3_shutdown()] and should deallocate any resources acquired
+** by xInit.  The pAppData pointer is used as the only parameter to
+** xInit and xShutdown.
+**
+** SQLite holds the [SQLITE_MUTEX_STATIC_MASTER] mutex when it invokes
+** the xInit method, so the xInit method need not be threadsafe.  The
+** xShutdown method is only called from [sqlite3_shutdown()] so it does
+** not need to be threadsafe either.  For all other methods, SQLite
+** holds the [SQLITE_MUTEX_STATIC_MEM] mutex as long as the
+** [SQLITE_CONFIG_MEMSTATUS] configuration option is turned on (which
+** it is by default) and so the methods are automatically serialized.
+** However, if [SQLITE_CONFIG_MEMSTATUS] is disabled, then the other
+** methods must be threadsafe or else make their own arrangements for
+** serialization.
+**
+** SQLite will never invoke xInit() more than once without an intervening
+** call to xShutdown().
+*/
+typedef struct sqlite3_mem_methods sqlite3_mem_methods;
+struct sqlite3_mem_methods {
+  void *(*xMalloc)(int);         /* Memory allocation function */
+  void (*xFree)(void*);          /* Free a prior allocation */
+  void *(*xRealloc)(void*,int);  /* Resize an allocation */
+  int (*xSize)(void*);           /* Return the size of an allocation */
+  int (*xRoundup)(int);          /* Round up request size to allocation size */
+  int (*xInit)(void*);           /* Initialize the memory allocator */
+  void (*xShutdown)(void*);      /* Deinitialize the memory allocator */
+  void *pAppData;                /* Argument to xInit() and xShutdown() */
+};
+
+/*
+** CAPI3REF: Configuration Options
+** KEYWORDS: {configuration option}
+**
+** These constants are the available integer configuration options that
+** can be passed as the first argument to the [sqlite3_config()] interface.
+**
+** New configuration options may be added in future releases of SQLite.
+** Existing configuration options might be discontinued.  Applications
+** should check the return code from [sqlite3_config()] to make sure that
+** the call worked.  The [sqlite3_config()] interface will return a
+** non-zero [error code] if a discontinued or unsupported configuration option
+** is invoked.
+**
+** <dl>
+** [[SQLITE_CONFIG_SINGLETHREAD]] <dt>SQLITE_CONFIG_SINGLETHREAD</dt>
+** <dd>There are no arguments to this option.  ^This option sets the
+** [threading mode] to Single-thread.  In other words, it disables
+** all mutexing and puts SQLite into a mode where it can only be used
+** by a single thread.   ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** it is not possible to change the [threading mode] from its default
+** value of Single-thread and so [sqlite3_config()] will return 
+** [SQLITE_ERROR] if called with the SQLITE_CONFIG_SINGLETHREAD
+** configuration option.</dd>
+**
+** [[SQLITE_CONFIG_MULTITHREAD]] <dt>SQLITE_CONFIG_MULTITHREAD</dt>
+** <dd>There are no arguments to this option.  ^This option sets the
+** [threading mode] to Multi-thread.  In other words, it disables
+** mutexing on [database connection] and [prepared statement] objects.
+** The application is responsible for serializing access to
+** [database connections] and [prepared statements].  But other mutexes
+** are enabled so that SQLite will be safe to use in a multi-threaded
+** environment as long as no two threads attempt to use the same
+** [database connection] at the same time.  ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** it is not possible to set the Multi-thread [threading mode] and
+** [sqlite3_config()] will return [SQLITE_ERROR] if called with the
+** SQLITE_CONFIG_MULTITHREAD configuration option.</dd>
+**
+** [[SQLITE_CONFIG_SERIALIZED]] <dt>SQLITE_CONFIG_SERIALIZED</dt>
+** <dd>There are no arguments to this option.  ^This option sets the
+** [threading mode] to Serialized. In other words, this option enables
+** all mutexes including the recursive
+** mutexes on [database connection] and [prepared statement] objects.
+** In this mode (which is the default when SQLite is compiled with
+** [SQLITE_THREADSAFE=1]) the SQLite library will itself serialize access
+** to [database connections] and [prepared statements] so that the
+** application is free to use the same [database connection] or the
+** same [prepared statement] in different threads at the same time.
+** ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** it is not possible to set the Serialized [threading mode] and
+** [sqlite3_config()] will return [SQLITE_ERROR] if called with the
+** SQLITE_CONFIG_SERIALIZED configuration option.</dd>
+**
+** [[SQLITE_CONFIG_MALLOC]] <dt>SQLITE_CONFIG_MALLOC</dt>
+** <dd> ^(The SQLITE_CONFIG_MALLOC option takes a single argument which is 
+** a pointer to an instance of the [sqlite3_mem_methods] structure.
+** The argument specifies
+** alternative low-level memory allocation routines to be used in place of
+** the memory allocation routines built into SQLite.)^ ^SQLite makes
+** its own private copy of the content of the [sqlite3_mem_methods] structure
+** before the [sqlite3_config()] call returns.</dd>
+**
+** [[SQLITE_CONFIG_GETMALLOC]] <dt>SQLITE_CONFIG_GETMALLOC</dt>
+** <dd> ^(The SQLITE_CONFIG_GETMALLOC option takes a single argument which
+** is a pointer to an instance of the [sqlite3_mem_methods] structure.
+** The [sqlite3_mem_methods]
+** structure is filled with the currently defined memory allocation routines.)^
+** This option can be used to overload the default memory allocation
+** routines with a wrapper that simulations memory allocation failure or
+** tracks memory usage, for example. </dd>
+**
+** [[SQLITE_CONFIG_MEMSTATUS]] <dt>SQLITE_CONFIG_MEMSTATUS</dt>
+** <dd> ^The SQLITE_CONFIG_MEMSTATUS option takes single argument of type int,
+** interpreted as a boolean, which enables or disables the collection of
+** memory allocation statistics. ^(When memory allocation statistics are
+** disabled, the following SQLite interfaces become non-operational:
+**   <ul>
+**   <li> [sqlite3_memory_used()]
+**   <li> [sqlite3_memory_highwater()]
+**   <li> [sqlite3_soft_heap_limit64()]
+**   <li> [sqlite3_status()]
+**   </ul>)^
+** ^Memory allocation statistics are enabled by default unless SQLite is
+** compiled with [SQLITE_DEFAULT_MEMSTATUS]=0 in which case memory
+** allocation statistics are disabled by default.
+** </dd>
+**
+** [[SQLITE_CONFIG_SCRATCH]] <dt>SQLITE_CONFIG_SCRATCH</dt>
+** <dd> ^The SQLITE_CONFIG_SCRATCH option specifies a static memory buffer
+** that SQLite can use for scratch memory.  ^(There are three arguments
+** to SQLITE_CONFIG_SCRATCH:  A pointer an 8-byte
+** aligned memory buffer from which the scratch allocations will be
+** drawn, the size of each scratch allocation (sz),
+** and the maximum number of scratch allocations (N).)^
+** The first argument must be a pointer to an 8-byte aligned buffer
+** of at least sz*N bytes of memory.
+** ^SQLite will not use more than one scratch buffers per thread.
+** ^SQLite will never request a scratch buffer that is more than 6
+** times the database page size.
+** ^If SQLite needs needs additional
+** scratch memory beyond what is provided by this configuration option, then 
+** [sqlite3_malloc()] will be used to obtain the memory needed.<p>
+** ^When the application provides any amount of scratch memory using
+** SQLITE_CONFIG_SCRATCH, SQLite avoids unnecessary large
+** [sqlite3_malloc|heap allocations].
+** This can help [Robson proof|prevent memory allocation failures] due to heap
+** fragmentation in low-memory embedded systems.
+** </dd>
+**
+** [[SQLITE_CONFIG_PAGECACHE]] <dt>SQLITE_CONFIG_PAGECACHE</dt>
+** <dd> ^The SQLITE_CONFIG_PAGECACHE option specifies a static memory buffer
+** that SQLite can use for the database page cache with the default page
+** cache implementation.  
+** This configuration should not be used if an application-define page
+** cache implementation is loaded using the [SQLITE_CONFIG_PCACHE2]
+** configuration option.
+** ^There are three arguments to SQLITE_CONFIG_PAGECACHE: A pointer to
+** 8-byte aligned
+** memory, the size of each page buffer (sz), and the number of pages (N).
+** The sz argument should be the size of the largest database page
+** (a power of two between 512 and 65536) plus some extra bytes for each
+** page header.  ^The number of extra bytes needed by the page header
+** can be determined using the [SQLITE_CONFIG_PCACHE_HDRSZ] option 
+** to [sqlite3_config()].
+** ^It is harmless, apart from the wasted memory,
+** for the sz parameter to be larger than necessary.  The first
+** argument should pointer to an 8-byte aligned block of memory that
+** is at least sz*N bytes of memory, otherwise subsequent behavior is
+** undefined.
+** ^SQLite will use the memory provided by the first argument to satisfy its
+** memory needs for the first N pages that it adds to cache.  ^If additional
+** page cache memory is needed beyond what is provided by this option, then
+** SQLite goes to [sqlite3_malloc()] for the additional storage space.</dd>
+**
+** [[SQLITE_CONFIG_HEAP]] <dt>SQLITE_CONFIG_HEAP</dt>
+** <dd> ^The SQLITE_CONFIG_HEAP option specifies a static memory buffer 
+** that SQLite will use for all of its dynamic memory allocation needs
+** beyond those provided for by [SQLITE_CONFIG_SCRATCH] and
+** [SQLITE_CONFIG_PAGECACHE].
+** ^The SQLITE_CONFIG_HEAP option is only available if SQLite is compiled
+** with either [SQLITE_ENABLE_MEMSYS3] or [SQLITE_ENABLE_MEMSYS5] and returns
+** [SQLITE_ERROR] if invoked otherwise.
+** ^There are three arguments to SQLITE_CONFIG_HEAP:
+** An 8-byte aligned pointer to the memory,
+** the number of bytes in the memory buffer, and the minimum allocation size.
+** ^If the first pointer (the memory pointer) is NULL, then SQLite reverts
+** to using its default memory allocator (the system malloc() implementation),
+** undoing any prior invocation of [SQLITE_CONFIG_MALLOC].  ^If the
+** memory pointer is not NULL then the alternative memory
+** allocator is engaged to handle all of SQLites memory allocation needs.
+** The first pointer (the memory pointer) must be aligned to an 8-byte
+** boundary or subsequent behavior of SQLite will be undefined.
+** The minimum allocation size is capped at 2**12. Reasonable values
+** for the minimum allocation size are 2**5 through 2**8.</dd>
+**
+** [[SQLITE_CONFIG_MUTEX]] <dt>SQLITE_CONFIG_MUTEX</dt>
+** <dd> ^(The SQLITE_CONFIG_MUTEX option takes a single argument which is a
+** pointer to an instance of the [sqlite3_mutex_methods] structure.
+** The argument specifies alternative low-level mutex routines to be used
+** in place the mutex routines built into SQLite.)^  ^SQLite makes a copy of
+** the content of the [sqlite3_mutex_methods] structure before the call to
+** [sqlite3_config()] returns. ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** the entire mutexing subsystem is omitted from the build and hence calls to
+** [sqlite3_config()] with the SQLITE_CONFIG_MUTEX configuration option will
+** return [SQLITE_ERROR].</dd>
+**
+** [[SQLITE_CONFIG_GETMUTEX]] <dt>SQLITE_CONFIG_GETMUTEX</dt>
+** <dd> ^(The SQLITE_CONFIG_GETMUTEX option takes a single argument which
+** is a pointer to an instance of the [sqlite3_mutex_methods] structure.  The
+** [sqlite3_mutex_methods]
+** structure is filled with the currently defined mutex routines.)^
+** This option can be used to overload the default mutex allocation
+** routines with a wrapper used to track mutex usage for performance
+** profiling or testing, for example.   ^If SQLite is compiled with
+** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then
+** the entire mutexing subsystem is omitted from the build and hence calls to
+** [sqlite3_config()] with the SQLITE_CONFIG_GETMUTEX configuration option will
+** return [SQLITE_ERROR].</dd>
+**
+** [[SQLITE_CONFIG_LOOKASIDE]] <dt>SQLITE_CONFIG_LOOKASIDE</dt>
+** <dd> ^(The SQLITE_CONFIG_LOOKASIDE option takes two arguments that determine
+** the default size of lookaside memory on each [database connection].
+** The first argument is the
+** size of each lookaside buffer slot and the second is the number of
+** slots allocated to each database connection.)^  ^(SQLITE_CONFIG_LOOKASIDE
+** sets the <i>default</i> lookaside size. The [SQLITE_DBCONFIG_LOOKASIDE]
+** option to [sqlite3_db_config()] can be used to change the lookaside
+** configuration on individual connections.)^ </dd>
+**
+** [[SQLITE_CONFIG_PCACHE2]] <dt>SQLITE_CONFIG_PCACHE2</dt>
+** <dd> ^(The SQLITE_CONFIG_PCACHE2 option takes a single argument which is 
+** a pointer to an [sqlite3_pcache_methods2] object.  This object specifies
+** the interface to a custom page cache implementation.)^
+** ^SQLite makes a copy of the [sqlite3_pcache_methods2] object.</dd>
+**
+** [[SQLITE_CONFIG_GETPCACHE2]] <dt>SQLITE_CONFIG_GETPCACHE2</dt>
+** <dd> ^(The SQLITE_CONFIG_GETPCACHE2 option takes a single argument which
+** is a pointer to an [sqlite3_pcache_methods2] object.  SQLite copies of
+** the current page cache implementation into that object.)^ </dd>
+**
+** [[SQLITE_CONFIG_LOG]] <dt>SQLITE_CONFIG_LOG</dt>
+** <dd> The SQLITE_CONFIG_LOG option is used to configure the SQLite
+** global [error log].
+** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a
+** function with a call signature of void(*)(void*,int,const char*), 
+** and a pointer to void. ^If the function pointer is not NULL, it is
+** invoked by [sqlite3_log()] to process each logging event.  ^If the
+** function pointer is NULL, the [sqlite3_log()] interface becomes a no-op.
+** ^The void pointer that is the second argument to SQLITE_CONFIG_LOG is
+** passed through as the first parameter to the application-defined logger
+** function whenever that function is invoked.  ^The second parameter to
+** the logger function is a copy of the first parameter to the corresponding
+** [sqlite3_log()] call and is intended to be a [result code] or an
+** [extended result code].  ^The third parameter passed to the logger is
+** log message after formatting via [sqlite3_snprintf()].
+** The SQLite logging interface is not reentrant; the logger function
+** supplied by the application must not invoke any SQLite interface.
+** In a multi-threaded application, the application-defined logger
+** function must be threadsafe. </dd>
+**
+** [[SQLITE_CONFIG_URI]] <dt>SQLITE_CONFIG_URI
+** <dd>^(The SQLITE_CONFIG_URI option takes a single argument of type int.
+** If non-zero, then URI handling is globally enabled. If the parameter is zero,
+** then URI handling is globally disabled.)^ ^If URI handling is globally
+** enabled, all filenames passed to [sqlite3_open()], [sqlite3_open_v2()],
+** [sqlite3_open16()] or
+** specified as part of [ATTACH] commands are interpreted as URIs, regardless
+** of whether or not the [SQLITE_OPEN_URI] flag is set when the database
+** connection is opened. ^If it is globally disabled, filenames are
+** only interpreted as URIs if the SQLITE_OPEN_URI flag is set when the
+** database connection is opened. ^(By default, URI handling is globally
+** disabled. The default value may be changed by compiling with the
+** [SQLITE_USE_URI] symbol defined.)^
+**
+** [[SQLITE_CONFIG_COVERING_INDEX_SCAN]] <dt>SQLITE_CONFIG_COVERING_INDEX_SCAN
+** <dd>^The SQLITE_CONFIG_COVERING_INDEX_SCAN option takes a single integer
+** argument which is interpreted as a boolean in order to enable or disable
+** the use of covering indices for full table scans in the query optimizer.
+** ^The default setting is determined
+** by the [SQLITE_ALLOW_COVERING_INDEX_SCAN] compile-time option, or is "on"
+** if that compile-time option is omitted.
+** The ability to disable the use of covering indices for full table scans
+** is because some incorrectly coded legacy applications might malfunction
+** when the optimization is enabled.  Providing the ability to
+** disable the optimization allows the older, buggy application code to work
+** without change even with newer versions of SQLite.
+**
+** [[SQLITE_CONFIG_PCACHE]] [[SQLITE_CONFIG_GETPCACHE]]
+** <dt>SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE
+** <dd> These options are obsolete and should not be used by new code.
+** They are retained for backwards compatibility but are now no-ops.
+** </dd>
+**
+** [[SQLITE_CONFIG_SQLLOG]]
+** <dt>SQLITE_CONFIG_SQLLOG
+** <dd>This option is only available if sqlite is compiled with the
+** [SQLITE_ENABLE_SQLLOG] pre-processor macro defined. The first argument should
+** be a pointer to a function of type void(*)(void*,sqlite3*,const char*, int).
+** The second should be of type (void*). The callback is invoked by the library
+** in three separate circumstances, identified by the value passed as the
+** fourth parameter. If the fourth parameter is 0, then the database connection
+** passed as the second argument has just been opened. The third argument
+** points to a buffer containing the name of the main database file. If the
+** fourth parameter is 1, then the SQL statement that the third parameter
+** points to has just been executed. Or, if the fourth parameter is 2, then
+** the connection being passed as the second parameter is being closed. The
+** third parameter is passed NULL In this case.  An example of using this
+** configuration option can be seen in the "test_sqllog.c" source file in
+** the canonical SQLite source tree.</dd>
+**
+** [[SQLITE_CONFIG_MMAP_SIZE]]
+** <dt>SQLITE_CONFIG_MMAP_SIZE
+** <dd>^SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values
+** that are the default mmap size limit (the default setting for
+** [PRAGMA mmap_size]) and the maximum allowed mmap size limit.
+** ^The default setting can be overridden by each database connection using
+** either the [PRAGMA mmap_size] command, or by using the
+** [SQLITE_FCNTL_MMAP_SIZE] file control.  ^(The maximum allowed mmap size
+** will be silently truncated if necessary so that it does not exceed the
+** compile-time maximum mmap size set by the
+** [SQLITE_MAX_MMAP_SIZE] compile-time option.)^
+** ^If either argument to this option is negative, then that argument is
+** changed to its compile-time default.
+**
+** [[SQLITE_CONFIG_WIN32_HEAPSIZE]]
+** <dt>SQLITE_CONFIG_WIN32_HEAPSIZE
+** <dd>^The SQLITE_CONFIG_WIN32_HEAPSIZE option is only available if SQLite is
+** compiled for Windows with the [SQLITE_WIN32_MALLOC] pre-processor macro
+** defined. ^SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit unsigned integer value
+** that specifies the maximum size of the created heap.
+** </dl>
+**
+** [[SQLITE_CONFIG_PCACHE_HDRSZ]]
+** <dt>SQLITE_CONFIG_PCACHE_HDRSZ
+** <dd>^The SQLITE_CONFIG_PCACHE_HDRSZ option takes a single parameter which
+** is a pointer to an integer and writes into that integer the number of extra
+** bytes per page required for each page in [SQLITE_CONFIG_PAGECACHE].
+** The amount of extra space required can change depending on the compiler,
+** target platform, and SQLite version.
+**
+** [[SQLITE_CONFIG_PMASZ]]
+** <dt>SQLITE_CONFIG_PMASZ
+** <dd>^The SQLITE_CONFIG_PMASZ option takes a single parameter which
+** is an unsigned integer and sets the "Minimum PMA Size" for the multithreaded
+** sorter to that integer.  The default minimum PMA Size is set by the
+** [SQLITE_SORTER_PMASZ] compile-time option.  New threads are launched
+** to help with sort operations when multithreaded sorting
+** is enabled (using the [PRAGMA threads] command) and the amount of content
+** to be sorted exceeds the page size times the minimum of the
+** [PRAGMA cache_size] setting and this value.
+** </dl>
+*/
+#define SQLITE_CONFIG_SINGLETHREAD  1  /* nil */
+#define SQLITE_CONFIG_MULTITHREAD   2  /* nil */
+#define SQLITE_CONFIG_SERIALIZED    3  /* nil */
+#define SQLITE_CONFIG_MALLOC        4  /* sqlite3_mem_methods* */
+#define SQLITE_CONFIG_GETMALLOC     5  /* sqlite3_mem_methods* */
+#define SQLITE_CONFIG_SCRATCH       6  /* void*, int sz, int N */
+#define SQLITE_CONFIG_PAGECACHE     7  /* void*, int sz, int N */
+#define SQLITE_CONFIG_HEAP          8  /* void*, int nByte, int min */
+#define SQLITE_CONFIG_MEMSTATUS     9  /* boolean */
+#define SQLITE_CONFIG_MUTEX        10  /* sqlite3_mutex_methods* */
+#define SQLITE_CONFIG_GETMUTEX     11  /* sqlite3_mutex_methods* */
+/* previously SQLITE_CONFIG_CHUNKALLOC 12 which is now unused. */ 
+#define SQLITE_CONFIG_LOOKASIDE    13  /* int int */
+#define SQLITE_CONFIG_PCACHE       14  /* no-op */
+#define SQLITE_CONFIG_GETPCACHE    15  /* no-op */
+#define SQLITE_CONFIG_LOG          16  /* xFunc, void* */
+#define SQLITE_CONFIG_URI          17  /* int */
+#define SQLITE_CONFIG_PCACHE2      18  /* sqlite3_pcache_methods2* */
+#define SQLITE_CONFIG_GETPCACHE2   19  /* sqlite3_pcache_methods2* */
+#define SQLITE_CONFIG_COVERING_INDEX_SCAN 20  /* int */
+#define SQLITE_CONFIG_SQLLOG       21  /* xSqllog, void* */
+#define SQLITE_CONFIG_MMAP_SIZE    22  /* sqlite3_int64, sqlite3_int64 */
+#define SQLITE_CONFIG_WIN32_HEAPSIZE      23  /* int nByte */
+#define SQLITE_CONFIG_PCACHE_HDRSZ        24  /* int *psz */
+#define SQLITE_CONFIG_PMASZ               25  /* unsigned int szPma */
+
+/*
+** CAPI3REF: Database Connection Configuration Options
+**
+** These constants are the available integer configuration options that
+** can be passed as the second argument to the [sqlite3_db_config()] interface.
+**
+** New configuration options may be added in future releases of SQLite.
+** Existing configuration options might be discontinued.  Applications
+** should check the return code from [sqlite3_db_config()] to make sure that
+** the call worked.  ^The [sqlite3_db_config()] interface will return a
+** non-zero [error code] if a discontinued or unsupported configuration option
+** is invoked.
+**
+** <dl>
+** <dt>SQLITE_DBCONFIG_LOOKASIDE</dt>
+** <dd> ^This option takes three additional arguments that determine the 
+** [lookaside memory allocator] configuration for the [database connection].
+** ^The first argument (the third parameter to [sqlite3_db_config()] is a
+** pointer to a memory buffer to use for lookaside memory.
+** ^The first argument after the SQLITE_DBCONFIG_LOOKASIDE verb
+** may be NULL in which case SQLite will allocate the
+** lookaside buffer itself using [sqlite3_malloc()]. ^The second argument is the
+** size of each lookaside buffer slot.  ^The third argument is the number of
+** slots.  The size of the buffer in the first argument must be greater than
+** or equal to the product of the second and third arguments.  The buffer
+** must be aligned to an 8-byte boundary.  ^If the second argument to
+** SQLITE_DBCONFIG_LOOKASIDE is not a multiple of 8, it is internally
+** rounded down to the next smaller multiple of 8.  ^(The lookaside memory
+** configuration for a database connection can only be changed when that
+** connection is not currently using lookaside memory, or in other words
+** when the "current value" returned by
+** [sqlite3_db_status](D,[SQLITE_CONFIG_LOOKASIDE],...) is zero.
+** Any attempt to change the lookaside memory configuration when lookaside
+** memory is in use leaves the configuration unchanged and returns 
+** [SQLITE_BUSY].)^</dd>
+**
+** <dt>SQLITE_DBCONFIG_ENABLE_FKEY</dt>
+** <dd> ^This option is used to enable or disable the enforcement of
+** [foreign key constraints].  There should be two additional arguments.
+** The first argument is an integer which is 0 to disable FK enforcement,
+** positive to enable FK enforcement or negative to leave FK enforcement
+** unchanged.  The second parameter is a pointer to an integer into which
+** is written 0 or 1 to indicate whether FK enforcement is off or on
+** following this call.  The second parameter may be a NULL pointer, in
+** which case the FK enforcement setting is not reported back. </dd>
+**
+** <dt>SQLITE_DBCONFIG_ENABLE_TRIGGER</dt>
+** <dd> ^This option is used to enable or disable [CREATE TRIGGER | triggers].
+** There should be two additional arguments.
+** The first argument is an integer which is 0 to disable triggers,
+** positive to enable triggers or negative to leave the setting unchanged.
+** The second parameter is a pointer to an integer into which
+** is written 0 or 1 to indicate whether triggers are disabled or enabled
+** following this call.  The second parameter may be a NULL pointer, in
+** which case the trigger setting is not reported back. </dd>
+**
+** </dl>
+*/
+#define SQLITE_DBCONFIG_LOOKASIDE       1001  /* void* int int */
+#define SQLITE_DBCONFIG_ENABLE_FKEY     1002  /* int int* */
+#define SQLITE_DBCONFIG_ENABLE_TRIGGER  1003  /* int int* */
+
+
+/*
+** CAPI3REF: Enable Or Disable Extended Result Codes
+**
+** ^The sqlite3_extended_result_codes() routine enables or disables the
+** [extended result codes] feature of SQLite. ^The extended result
+** codes are disabled by default for historical compatibility.
+*/
+SQLITE_API int sqlite3_extended_result_codes(sqlite3*, int onoff);
+
+/*
+** CAPI3REF: Last Insert Rowid
+**
+** ^Each entry in most SQLite tables (except for [WITHOUT ROWID] tables)
+** has a unique 64-bit signed
+** integer key called the [ROWID | "rowid"]. ^The rowid is always available
+** as an undeclared column named ROWID, OID, or _ROWID_ as long as those
+** names are not also used by explicitly declared columns. ^If
+** the table has a column of type [INTEGER PRIMARY KEY] then that column
+** is another alias for the rowid.
+**
+** ^The sqlite3_last_insert_rowid(D) interface returns the [rowid] of the 
+** most recent successful [INSERT] into a rowid table or [virtual table]
+** on database connection D.
+** ^Inserts into [WITHOUT ROWID] tables are not recorded.
+** ^If no successful [INSERT]s into rowid tables
+** have ever occurred on the database connection D, 
+** then sqlite3_last_insert_rowid(D) returns zero.
+**
+** ^(If an [INSERT] occurs within a trigger or within a [virtual table]
+** method, then this routine will return the [rowid] of the inserted
+** row as long as the trigger or virtual table method is running.
+** But once the trigger or virtual table method ends, the value returned 
+** by this routine reverts to what it was before the trigger or virtual
+** table method began.)^
+**
+** ^An [INSERT] that fails due to a constraint violation is not a
+** successful [INSERT] and does not change the value returned by this
+** routine.  ^Thus INSERT OR FAIL, INSERT OR IGNORE, INSERT OR ROLLBACK,
+** and INSERT OR ABORT make no changes to the return value of this
+** routine when their insertion fails.  ^(When INSERT OR REPLACE
+** encounters a constraint violation, it does not fail.  The
+** INSERT continues to completion after deleting rows that caused
+** the constraint problem so INSERT OR REPLACE will always change
+** the return value of this interface.)^
+**
+** ^For the purposes of this routine, an [INSERT] is considered to
+** be successful even if it is subsequently rolled back.
+**
+** This function is accessible to SQL statements via the
+** [last_insert_rowid() SQL function].
+**
+** If a separate thread performs a new [INSERT] on the same
+** database connection while the [sqlite3_last_insert_rowid()]
+** function is running and thus changes the last insert [rowid],
+** then the value returned by [sqlite3_last_insert_rowid()] is
+** unpredictable and might not equal either the old or the new
+** last insert [rowid].
+*/
+SQLITE_API sqlite3_int64 sqlite3_last_insert_rowid(sqlite3*);
+
+/*
+** CAPI3REF: Count The Number Of Rows Modified
+**
+** ^This function returns the number of rows modified, inserted or
+** deleted by the most recently completed INSERT, UPDATE or DELETE
+** statement on the database connection specified by the only parameter.
+** ^Executing any other type of SQL statement does not modify the value
+** returned by this function.
+**
+** ^Only changes made directly by the INSERT, UPDATE or DELETE statement are
+** considered - auxiliary changes caused by [CREATE TRIGGER | triggers], 
+** [foreign key actions] or [REPLACE] constraint resolution are not counted.
+** 
+** Changes to a view that are intercepted by 
+** [INSTEAD OF trigger | INSTEAD OF triggers] are not counted. ^The value 
+** returned by sqlite3_changes() immediately after an INSERT, UPDATE or 
+** DELETE statement run on a view is always zero. Only changes made to real 
+** tables are counted.
+**
+** Things are more complicated if the sqlite3_changes() function is
+** executed while a trigger program is running. This may happen if the
+** program uses the [changes() SQL function], or if some other callback
+** function invokes sqlite3_changes() directly. Essentially:
+** 
+** <ul>
+**   <li> ^(Before entering a trigger program the value returned by
+**        sqlite3_changes() function is saved. After the trigger program 
+**        has finished, the original value is restored.)^
+** 
+**   <li> ^(Within a trigger program each INSERT, UPDATE and DELETE 
+**        statement sets the value returned by sqlite3_changes() 
+**        upon completion as normal. Of course, this value will not include 
+**        any changes performed by sub-triggers, as the sqlite3_changes() 
+**        value will be saved and restored after each sub-trigger has run.)^
+** </ul>
+** 
+** ^This means that if the changes() SQL function (or similar) is used
+** by the first INSERT, UPDATE or DELETE statement within a trigger, it 
+** returns the value as set when the calling statement began executing.
+** ^If it is used by the second or subsequent such statement within a trigger 
+** program, the value returned reflects the number of rows modified by the 
+** previous INSERT, UPDATE or DELETE statement within the same trigger.
+**
+** See also the [sqlite3_total_changes()] interface, the
+** [count_changes pragma], and the [changes() SQL function].
+**
+** If a separate thread makes changes on the same database connection
+** while [sqlite3_changes()] is running then the value returned
+** is unpredictable and not meaningful.
+*/
+SQLITE_API int sqlite3_changes(sqlite3*);
+
+/*
+** CAPI3REF: Total Number Of Rows Modified
+**
+** ^This function returns the total number of rows inserted, modified or
+** deleted by all [INSERT], [UPDATE] or [DELETE] statements completed
+** since the database connection was opened, including those executed as
+** part of trigger programs. ^Executing any other type of SQL statement
+** does not affect the value returned by sqlite3_total_changes().
+** 
+** ^Changes made as part of [foreign key actions] are included in the
+** count, but those made as part of REPLACE constraint resolution are
+** not. ^Changes to a view that are intercepted by INSTEAD OF triggers 
+** are not counted.
+** 
+** See also the [sqlite3_changes()] interface, the
+** [count_changes pragma], and the [total_changes() SQL function].
+**
+** If a separate thread makes changes on the same database connection
+** while [sqlite3_total_changes()] is running then the value
+** returned is unpredictable and not meaningful.
+*/
+SQLITE_API int sqlite3_total_changes(sqlite3*);
+
+/*
+** CAPI3REF: Interrupt A Long-Running Query
+**
+** ^This function causes any pending database operation to abort and
+** return at its earliest opportunity. This routine is typically
+** called in response to a user action such as pressing "Cancel"
+** or Ctrl-C where the user wants a long query operation to halt
+** immediately.
+**
+** ^It is safe to call this routine from a thread different from the
+** thread that is currently running the database operation.  But it
+** is not safe to call this routine with a [database connection] that
+** is closed or might close before sqlite3_interrupt() returns.
+**
+** ^If an SQL operation is very nearly finished at the time when
+** sqlite3_interrupt() is called, then it might not have an opportunity
+** to be interrupted and might continue to completion.
+**
+** ^An SQL operation that is interrupted will return [SQLITE_INTERRUPT].
+** ^If the interrupted SQL operation is an INSERT, UPDATE, or DELETE
+** that is inside an explicit transaction, then the entire transaction
+** will be rolled back automatically.
+**
+** ^The sqlite3_interrupt(D) call is in effect until all currently running
+** SQL statements on [database connection] D complete.  ^Any new SQL statements
+** that are started after the sqlite3_interrupt() call and before the 
+** running statements reaches zero are interrupted as if they had been
+** running prior to the sqlite3_interrupt() call.  ^New SQL statements
+** that are started after the running statement count reaches zero are
+** not effected by the sqlite3_interrupt().
+** ^A call to sqlite3_interrupt(D) that occurs when there are no running
+** SQL statements is a no-op and has no effect on SQL statements
+** that are started after the sqlite3_interrupt() call returns.
+**
+** If the database connection closes while [sqlite3_interrupt()]
+** is running then bad things will likely happen.
+*/
+SQLITE_API void sqlite3_interrupt(sqlite3*);
+
+/*
+** CAPI3REF: Determine If An SQL Statement Is Complete
+**
+** These routines are useful during command-line input to determine if the
+** currently entered text seems to form a complete SQL statement or
+** if additional input is needed before sending the text into
+** SQLite for parsing.  ^These routines return 1 if the input string
+** appears to be a complete SQL statement.  ^A statement is judged to be
+** complete if it ends with a semicolon token and is not a prefix of a
+** well-formed CREATE TRIGGER statement.  ^Semicolons that are embedded within
+** string literals or quoted identifier names or comments are not
+** independent tokens (they are part of the token in which they are
+** embedded) and thus do not count as a statement terminator.  ^Whitespace
+** and comments that follow the final semicolon are ignored.
+**
+** ^These routines return 0 if the statement is incomplete.  ^If a
+** memory allocation fails, then SQLITE_NOMEM is returned.
+**
+** ^These routines do not parse the SQL statements thus
+** will not detect syntactically incorrect SQL.
+**
+** ^(If SQLite has not been initialized using [sqlite3_initialize()] prior 
+** to invoking sqlite3_complete16() then sqlite3_initialize() is invoked
+** automatically by sqlite3_complete16().  If that initialization fails,
+** then the return value from sqlite3_complete16() will be non-zero
+** regardless of whether or not the input SQL is complete.)^
+**
+** The input to [sqlite3_complete()] must be a zero-terminated
+** UTF-8 string.
+**
+** The input to [sqlite3_complete16()] must be a zero-terminated
+** UTF-16 string in native byte order.
+*/
+SQLITE_API int sqlite3_complete(const char *sql);
+SQLITE_API int sqlite3_complete16(const void *sql);
+
+/*
+** CAPI3REF: Register A Callback To Handle SQLITE_BUSY Errors
+** KEYWORDS: {busy-handler callback} {busy handler}
+**
+** ^The sqlite3_busy_handler(D,X,P) routine sets a callback function X
+** that might be invoked with argument P whenever
+** an attempt is made to access a database table associated with
+** [database connection] D when another thread
+** or process has the table locked.
+** The sqlite3_busy_handler() interface is used to implement
+** [sqlite3_busy_timeout()] and [PRAGMA busy_timeout].
+**
+** ^If the busy callback is NULL, then [SQLITE_BUSY]
+** is returned immediately upon encountering the lock.  ^If the busy callback
+** is not NULL, then the callback might be invoked with two arguments.
+**
+** ^The first argument to the busy handler is a copy of the void* pointer which
+** is the third argument to sqlite3_busy_handler().  ^The second argument to
+** the busy handler callback is the number of times that the busy handler has
+** been invoked previously for the same locking event.  ^If the
+** busy callback returns 0, then no additional attempts are made to
+** access the database and [SQLITE_BUSY] is returned
+** to the application.
+** ^If the callback returns non-zero, then another attempt
+** is made to access the database and the cycle repeats.
+**
+** The presence of a busy handler does not guarantee that it will be invoked
+** when there is lock contention. ^If SQLite determines that invoking the busy
+** handler could result in a deadlock, it will go ahead and return [SQLITE_BUSY]
+** to the application instead of invoking the 
+** busy handler.
+** Consider a scenario where one process is holding a read lock that
+** it is trying to promote to a reserved lock and
+** a second process is holding a reserved lock that it is trying
+** to promote to an exclusive lock.  The first process cannot proceed
+** because it is blocked by the second and the second process cannot
+** proceed because it is blocked by the first.  If both processes
+** invoke the busy handlers, neither will make any progress.  Therefore,
+** SQLite returns [SQLITE_BUSY] for the first process, hoping that this
+** will induce the first process to release its read lock and allow
+** the second process to proceed.
+**
+** ^The default busy callback is NULL.
+**
+** ^(There can only be a single busy handler defined for each
+** [database connection].  Setting a new busy handler clears any
+** previously set handler.)^  ^Note that calling [sqlite3_busy_timeout()]
+** or evaluating [PRAGMA busy_timeout=N] will change the
+** busy handler and thus clear any previously set busy handler.
+**
+** The busy callback should not take any actions which modify the
+** database connection that invoked the busy handler.  In other words,
+** the busy handler is not reentrant.  Any such actions
+** result in undefined behavior.
+** 
+** A busy handler must not close the database connection
+** or [prepared statement] that invoked the busy handler.
+*/
+SQLITE_API int sqlite3_busy_handler(sqlite3*, int(*)(void*,int), void*);
+
+/*
+** CAPI3REF: Set A Busy Timeout
+**
+** ^This routine sets a [sqlite3_busy_handler | busy handler] that sleeps
+** for a specified amount of time when a table is locked.  ^The handler
+** will sleep multiple times until at least "ms" milliseconds of sleeping
+** have accumulated.  ^After at least "ms" milliseconds of sleeping,
+** the handler returns 0 which causes [sqlite3_step()] to return
+** [SQLITE_BUSY].
+**
+** ^Calling this routine with an argument less than or equal to zero
+** turns off all busy handlers.
+**
+** ^(There can only be a single busy handler for a particular
+** [database connection] at any given moment.  If another busy handler
+** was defined  (using [sqlite3_busy_handler()]) prior to calling
+** this routine, that other busy handler is cleared.)^
+**
+** See also:  [PRAGMA busy_timeout]
+*/
+SQLITE_API int sqlite3_busy_timeout(sqlite3*, int ms);
+
+/*
+** CAPI3REF: Convenience Routines For Running Queries
+**
+** This is a legacy interface that is preserved for backwards compatibility.
+** Use of this interface is not recommended.
+**
+** Definition: A <b>result table</b> is memory data structure created by the
+** [sqlite3_get_table()] interface.  A result table records the
+** complete query results from one or more queries.
+**
+** The table conceptually has a number of rows and columns.  But
+** these numbers are not part of the result table itself.  These
+** numbers are obtained separately.  Let N be the number of rows
+** and M be the number of columns.
+**
+** A result table is an array of pointers to zero-terminated UTF-8 strings.
+** There are (N+1)*M elements in the array.  The first M pointers point
+** to zero-terminated strings that  contain the names of the columns.
+** The remaining entries all point to query results.  NULL values result
+** in NULL pointers.  All other values are in their UTF-8 zero-terminated
+** string representation as returned by [sqlite3_column_text()].
+**
+** A result table might consist of one or more memory allocations.
+** It is not safe to pass a result table directly to [sqlite3_free()].
+** A result table should be deallocated using [sqlite3_free_table()].
+**
+** ^(As an example of the result table format, suppose a query result
+** is as follows:
+**
+** <blockquote><pre>
+**        Name        | Age
+**        -----------------------
+**        Alice       | 43
+**        Bob         | 28
+**        Cindy       | 21
+** </pre></blockquote>
+**
+** There are two column (M==2) and three rows (N==3).  Thus the
+** result table has 8 entries.  Suppose the result table is stored
+** in an array names azResult.  Then azResult holds this content:
+**
+** <blockquote><pre>
+**        azResult&#91;0] = "Name";
+**        azResult&#91;1] = "Age";
+**        azResult&#91;2] = "Alice";
+**        azResult&#91;3] = "43";
+**        azResult&#91;4] = "Bob";
+**        azResult&#91;5] = "28";
+**        azResult&#91;6] = "Cindy";
+**        azResult&#91;7] = "21";
+** </pre></blockquote>)^
+**
+** ^The sqlite3_get_table() function evaluates one or more
+** semicolon-separated SQL statements in the zero-terminated UTF-8
+** string of its 2nd parameter and returns a result table to the
+** pointer given in its 3rd parameter.
+**
+** After the application has finished with the result from sqlite3_get_table(),
+** it must pass the result table pointer to sqlite3_free_table() in order to
+** release the memory that was malloced.  Because of the way the
+** [sqlite3_malloc()] happens within sqlite3_get_table(), the calling
+** function must not try to call [sqlite3_free()] directly.  Only
+** [sqlite3_free_table()] is able to release the memory properly and safely.
+**
+** The sqlite3_get_table() interface is implemented as a wrapper around
+** [sqlite3_exec()].  The sqlite3_get_table() routine does not have access
+** to any internal data structures of SQLite.  It uses only the public
+** interface defined here.  As a consequence, errors that occur in the
+** wrapper layer outside of the internal [sqlite3_exec()] call are not
+** reflected in subsequent calls to [sqlite3_errcode()] or
+** [sqlite3_errmsg()].
+*/
+SQLITE_API int sqlite3_get_table(
+  sqlite3 *db,          /* An open database */
+  const char *zSql,     /* SQL to be evaluated */
+  char ***pazResult,    /* Results of the query */
+  int *pnRow,           /* Number of result rows written here */
+  int *pnColumn,        /* Number of result columns written here */
+  char **pzErrmsg       /* Error msg written here */
+);
+SQLITE_API void sqlite3_free_table(char **result);
+
+/*
+** CAPI3REF: Formatted String Printing Functions
+**
+** These routines are work-alikes of the "printf()" family of functions
+** from the standard C library.
+**
+** ^The sqlite3_mprintf() and sqlite3_vmprintf() routines write their
+** results into memory obtained from [sqlite3_malloc()].
+** The strings returned by these two routines should be
+** released by [sqlite3_free()].  ^Both routines return a
+** NULL pointer if [sqlite3_malloc()] is unable to allocate enough
+** memory to hold the resulting string.
+**
+** ^(The sqlite3_snprintf() routine is similar to "snprintf()" from
+** the standard C library.  The result is written into the
+** buffer supplied as the second parameter whose size is given by
+** the first parameter. Note that the order of the
+** first two parameters is reversed from snprintf().)^  This is an
+** historical accident that cannot be fixed without breaking
+** backwards compatibility.  ^(Note also that sqlite3_snprintf()
+** returns a pointer to its buffer instead of the number of
+** characters actually written into the buffer.)^  We admit that
+** the number of characters written would be a more useful return
+** value but we cannot change the implementation of sqlite3_snprintf()
+** now without breaking compatibility.
+**
+** ^As long as the buffer size is greater than zero, sqlite3_snprintf()
+** guarantees that the buffer is always zero-terminated.  ^The first
+** parameter "n" is the total size of the buffer, including space for
+** the zero terminator.  So the longest string that can be completely
+** written will be n-1 characters.
+**
+** ^The sqlite3_vsnprintf() routine is a varargs version of sqlite3_snprintf().
+**
+** These routines all implement some additional formatting
+** options that are useful for constructing SQL statements.
+** All of the usual printf() formatting options apply.  In addition, there
+** is are "%q", "%Q", and "%z" options.
+**
+** ^(The %q option works like %s in that it substitutes a nul-terminated
+** string from the argument list.  But %q also doubles every '\'' character.
+** %q is designed for use inside a string literal.)^  By doubling each '\''
+** character it escapes that character and allows it to be inserted into
+** the string.
+**
+** For example, assume the string variable zText contains text as follows:
+**
+** <blockquote><pre>
+**  char *zText = "It's a happy day!";
+** </pre></blockquote>
+**
+** One can use this text in an SQL statement as follows:
+**
+** <blockquote><pre>
+**  char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES('%q')", zText);
+**  sqlite3_exec(db, zSQL, 0, 0, 0);
+**  sqlite3_free(zSQL);
+** </pre></blockquote>
+**
+** Because the %q format string is used, the '\'' character in zText
+** is escaped and the SQL generated is as follows:
+**
+** <blockquote><pre>
+**  INSERT INTO table1 VALUES('It''s a happy day!')
+** </pre></blockquote>
+**
+** This is correct.  Had we used %s instead of %q, the generated SQL
+** would have looked like this:
+**
+** <blockquote><pre>
+**  INSERT INTO table1 VALUES('It's a happy day!');
+** </pre></blockquote>
+**
+** This second example is an SQL syntax error.  As a general rule you should
+** always use %q instead of %s when inserting text into a string literal.
+**
+** ^(The %Q option works like %q except it also adds single quotes around
+** the outside of the total string.  Additionally, if the parameter in the
+** argument list is a NULL pointer, %Q substitutes the text "NULL" (without
+** single quotes).)^  So, for example, one could say:
+**
+** <blockquote><pre>
+**  char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES(%Q)", zText);
+**  sqlite3_exec(db, zSQL, 0, 0, 0);
+**  sqlite3_free(zSQL);
+** </pre></blockquote>
+**
+** The code above will render a correct SQL statement in the zSQL
+** variable even if the zText variable is a NULL pointer.
+**
+** ^(The "%z" formatting option works like "%s" but with the
+** addition that after the string has been read and copied into
+** the result, [sqlite3_free()] is called on the input string.)^
+*/
+SQLITE_API char *sqlite3_mprintf(const char*,...);
+SQLITE_API char *sqlite3_vmprintf(const char*, va_list);
+SQLITE_API char *sqlite3_snprintf(int,char*,const char*, ...);
+SQLITE_API char *sqlite3_vsnprintf(int,char*,const char*, va_list);
+
+/*
+** CAPI3REF: Memory Allocation Subsystem
+**
+** The SQLite core uses these three routines for all of its own
+** internal memory allocation needs. "Core" in the previous sentence
+** does not include operating-system specific VFS implementation.  The
+** Windows VFS uses native malloc() and free() for some operations.
+**
+** ^The sqlite3_malloc() routine returns a pointer to a block
+** of memory at least N bytes in length, where N is the parameter.
+** ^If sqlite3_malloc() is unable to obtain sufficient free
+** memory, it returns a NULL pointer.  ^If the parameter N to
+** sqlite3_malloc() is zero or negative then sqlite3_malloc() returns
+** a NULL pointer.
+**
+** ^The sqlite3_malloc64(N) routine works just like
+** sqlite3_malloc(N) except that N is an unsigned 64-bit integer instead
+** of a signed 32-bit integer.
+**
+** ^Calling sqlite3_free() with a pointer previously returned
+** by sqlite3_malloc() or sqlite3_realloc() releases that memory so
+** that it might be reused.  ^The sqlite3_free() routine is
+** a no-op if is called with a NULL pointer.  Passing a NULL pointer
+** to sqlite3_free() is harmless.  After being freed, memory
+** should neither be read nor written.  Even reading previously freed
+** memory might result in a segmentation fault or other severe error.
+** Memory corruption, a segmentation fault, or other severe error
+** might result if sqlite3_free() is called with a non-NULL pointer that
+** was not obtained from sqlite3_malloc() or sqlite3_realloc().
+**
+** ^The sqlite3_realloc(X,N) interface attempts to resize a
+** prior memory allocation X to be at least N bytes.
+** ^If the X parameter to sqlite3_realloc(X,N)
+** is a NULL pointer then its behavior is identical to calling
+** sqlite3_malloc(N).
+** ^If the N parameter to sqlite3_realloc(X,N) is zero or
+** negative then the behavior is exactly the same as calling
+** sqlite3_free(X).
+** ^sqlite3_realloc(X,N) returns a pointer to a memory allocation
+** of at least N bytes in size or NULL if insufficient memory is available.
+** ^If M is the size of the prior allocation, then min(N,M) bytes
+** of the prior allocation are copied into the beginning of buffer returned
+** by sqlite3_realloc(X,N) and the prior allocation is freed.
+** ^If sqlite3_realloc(X,N) returns NULL and N is positive, then the
+** prior allocation is not freed.
+**
+** ^The sqlite3_realloc64(X,N) interfaces works the same as
+** sqlite3_realloc(X,N) except that N is a 64-bit unsigned integer instead
+** of a 32-bit signed integer.
+**
+** ^If X is a memory allocation previously obtained from sqlite3_malloc(),
+** sqlite3_malloc64(), sqlite3_realloc(), or sqlite3_realloc64(), then
+** sqlite3_msize(X) returns the size of that memory allocation in bytes.
+** ^The value returned by sqlite3_msize(X) might be larger than the number
+** of bytes requested when X was allocated.  ^If X is a NULL pointer then
+** sqlite3_msize(X) returns zero.  If X points to something that is not
+** the beginning of memory allocation, or if it points to a formerly
+** valid memory allocation that has now been freed, then the behavior
+** of sqlite3_msize(X) is undefined and possibly harmful.
+**
+** ^The memory returned by sqlite3_malloc(), sqlite3_realloc(),
+** sqlite3_malloc64(), and sqlite3_realloc64()
+** is always aligned to at least an 8 byte boundary, or to a
+** 4 byte boundary if the [SQLITE_4_BYTE_ALIGNED_MALLOC] compile-time
+** option is used.
+**
+** In SQLite version 3.5.0 and 3.5.1, it was possible to define
+** the SQLITE_OMIT_MEMORY_ALLOCATION which would cause the built-in
+** implementation of these routines to be omitted.  That capability
+** is no longer provided.  Only built-in memory allocators can be used.
+**
+** Prior to SQLite version 3.7.10, the Windows OS interface layer called
+** the system malloc() and free() directly when converting
+** filenames between the UTF-8 encoding used by SQLite
+** and whatever filename encoding is used by the particular Windows
+** installation.  Memory allocation errors were detected, but
+** they were reported back as [SQLITE_CANTOPEN] or
+** [SQLITE_IOERR] rather than [SQLITE_NOMEM].
+**
+** The pointer arguments to [sqlite3_free()] and [sqlite3_realloc()]
+** must be either NULL or else pointers obtained from a prior
+** invocation of [sqlite3_malloc()] or [sqlite3_realloc()] that have
+** not yet been released.
+**
+** The application must not read or write any part of
+** a block of memory after it has been released using
+** [sqlite3_free()] or [sqlite3_realloc()].
+*/
+SQLITE_API void *sqlite3_malloc(int);
+SQLITE_API void *sqlite3_malloc64(sqlite3_uint64);
+SQLITE_API void *sqlite3_realloc(void*, int);
+SQLITE_API void *sqlite3_realloc64(void*, sqlite3_uint64);
+SQLITE_API void sqlite3_free(void*);
+SQLITE_API sqlite3_uint64 sqlite3_msize(void*);
+
+/*
+** CAPI3REF: Memory Allocator Statistics
+**
+** SQLite provides these two interfaces for reporting on the status
+** of the [sqlite3_malloc()], [sqlite3_free()], and [sqlite3_realloc()]
+** routines, which form the built-in memory allocation subsystem.
+**
+** ^The [sqlite3_memory_used()] routine returns the number of bytes
+** of memory currently outstanding (malloced but not freed).
+** ^The [sqlite3_memory_highwater()] routine returns the maximum
+** value of [sqlite3_memory_used()] since the high-water mark
+** was last reset.  ^The values returned by [sqlite3_memory_used()] and
+** [sqlite3_memory_highwater()] include any overhead
+** added by SQLite in its implementation of [sqlite3_malloc()],
+** but not overhead added by the any underlying system library
+** routines that [sqlite3_malloc()] may call.
+**
+** ^The memory high-water mark is reset to the current value of
+** [sqlite3_memory_used()] if and only if the parameter to
+** [sqlite3_memory_highwater()] is true.  ^The value returned
+** by [sqlite3_memory_highwater(1)] is the high-water mark
+** prior to the reset.
+*/
+SQLITE_API sqlite3_int64 sqlite3_memory_used(void);
+SQLITE_API sqlite3_int64 sqlite3_memory_highwater(int resetFlag);
+
+/*
+** CAPI3REF: Pseudo-Random Number Generator
+**
+** SQLite contains a high-quality pseudo-random number generator (PRNG) used to
+** select random [ROWID | ROWIDs] when inserting new records into a table that
+** already uses the largest possible [ROWID].  The PRNG is also used for
+** the build-in random() and randomblob() SQL functions.  This interface allows
+** applications to access the same PRNG for other purposes.
+**
+** ^A call to this routine stores N bytes of randomness into buffer P.
+** ^The P parameter can be a NULL pointer.
+**
+** ^If this routine has not been previously called or if the previous
+** call had N less than one or a NULL pointer for P, then the PRNG is
+** seeded using randomness obtained from the xRandomness method of
+** the default [sqlite3_vfs] object.
+** ^If the previous call to this routine had an N of 1 or more and a
+** non-NULL P then the pseudo-randomness is generated
+** internally and without recourse to the [sqlite3_vfs] xRandomness
+** method.
+*/
+SQLITE_API void sqlite3_randomness(int N, void *P);
+
+/*
+** CAPI3REF: Compile-Time Authorization Callbacks
+**
+** ^This routine registers an authorizer callback with a particular
+** [database connection], supplied in the first argument.
+** ^The authorizer callback is invoked as SQL statements are being compiled
+** by [sqlite3_prepare()] or its variants [sqlite3_prepare_v2()],
+** [sqlite3_prepare16()] and [sqlite3_prepare16_v2()].  ^At various
+** points during the compilation process, as logic is being created
+** to perform various actions, the authorizer callback is invoked to
+** see if those actions are allowed.  ^The authorizer callback should
+** return [SQLITE_OK] to allow the action, [SQLITE_IGNORE] to disallow the
+** specific action but allow the SQL statement to continue to be
+** compiled, or [SQLITE_DENY] to cause the entire SQL statement to be
+** rejected with an error.  ^If the authorizer callback returns
+** any value other than [SQLITE_IGNORE], [SQLITE_OK], or [SQLITE_DENY]
+** then the [sqlite3_prepare_v2()] or equivalent call that triggered
+** the authorizer will fail with an error message.
+**
+** When the callback returns [SQLITE_OK], that means the operation
+** requested is ok.  ^When the callback returns [SQLITE_DENY], the
+** [sqlite3_prepare_v2()] or equivalent call that triggered the
+** authorizer will fail with an error message explaining that
+** access is denied. 
+**
+** ^The first parameter to the authorizer callback is a copy of the third
+** parameter to the sqlite3_set_authorizer() interface. ^The second parameter
+** to the callback is an integer [SQLITE_COPY | action code] that specifies
+** the particular action to be authorized. ^The third through sixth parameters
+** to the callback are zero-terminated strings that contain additional
+** details about the action to be authorized.
+**
+** ^If the action code is [SQLITE_READ]
+** and the callback returns [SQLITE_IGNORE] then the
+** [prepared statement] statement is constructed to substitute
+** a NULL value in place of the table column that would have
+** been read if [SQLITE_OK] had been returned.  The [SQLITE_IGNORE]
+** return can be used to deny an untrusted user access to individual
+** columns of a table.
+** ^If the action code is [SQLITE_DELETE] and the callback returns
+** [SQLITE_IGNORE] then the [DELETE] operation proceeds but the
+** [truncate optimization] is disabled and all rows are deleted individually.
+**
+** An authorizer is used when [sqlite3_prepare | preparing]
+** SQL statements from an untrusted source, to ensure that the SQL statements
+** do not try to access data they are not allowed to see, or that they do not
+** try to execute malicious statements that damage the database.  For
+** example, an application may allow a user to enter arbitrary
+** SQL queries for evaluation by a database.  But the application does
+** not want the user to be able to make arbitrary changes to the
+** database.  An authorizer could then be put in place while the
+** user-entered SQL is being [sqlite3_prepare | prepared] that
+** disallows everything except [SELECT] statements.
+**
+** Applications that need to process SQL from untrusted sources
+** might also consider lowering resource limits using [sqlite3_limit()]
+** and limiting database size using the [max_page_count] [PRAGMA]
+** in addition to using an authorizer.
+**
+** ^(Only a single authorizer can be in place on a database connection
+** at a time.  Each call to sqlite3_set_authorizer overrides the
+** previous call.)^  ^Disable the authorizer by installing a NULL callback.
+** The authorizer is disabled by default.
+**
+** The authorizer callback must not do anything that will modify
+** the database connection that invoked the authorizer callback.
+** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their
+** database connections for the meaning of "modify" in this paragraph.
+**
+** ^When [sqlite3_prepare_v2()] is used to prepare a statement, the
+** statement might be re-prepared during [sqlite3_step()] due to a 
+** schema change.  Hence, the application should ensure that the
+** correct authorizer callback remains in place during the [sqlite3_step()].
+**
+** ^Note that the authorizer callback is invoked only during
+** [sqlite3_prepare()] or its variants.  Authorization is not
+** performed during statement evaluation in [sqlite3_step()], unless
+** as stated in the previous paragraph, sqlite3_step() invokes
+** sqlite3_prepare_v2() to reprepare a statement after a schema change.
+*/
+SQLITE_API int sqlite3_set_authorizer(
+  sqlite3*,
+  int (*xAuth)(void*,int,const char*,const char*,const char*,const char*),
+  void *pUserData
+);
+
+/*
+** CAPI3REF: Authorizer Return Codes
+**
+** The [sqlite3_set_authorizer | authorizer callback function] must
+** return either [SQLITE_OK] or one of these two constants in order
+** to signal SQLite whether or not the action is permitted.  See the
+** [sqlite3_set_authorizer | authorizer documentation] for additional
+** information.
+**
+** Note that SQLITE_IGNORE is also used as a [conflict resolution mode]
+** returned from the [sqlite3_vtab_on_conflict()] interface.
+*/
+#define SQLITE_DENY   1   /* Abort the SQL statement with an error */
+#define SQLITE_IGNORE 2   /* Don't allow access, but don't generate an error */
+
+/*
+** CAPI3REF: Authorizer Action Codes
+**
+** The [sqlite3_set_authorizer()] interface registers a callback function
+** that is invoked to authorize certain SQL statement actions.  The
+** second parameter to the callback is an integer code that specifies
+** what action is being authorized.  These are the integer action codes that
+** the authorizer callback may be passed.
+**
+** These action code values signify what kind of operation is to be
+** authorized.  The 3rd and 4th parameters to the authorization
+** callback function will be parameters or NULL depending on which of these
+** codes is used as the second parameter.  ^(The 5th parameter to the
+** authorizer callback is the name of the database ("main", "temp",
+** etc.) if applicable.)^  ^The 6th parameter to the authorizer callback
+** is the name of the inner-most trigger or view that is responsible for
+** the access attempt or NULL if this access attempt is directly from
+** top-level SQL code.
+*/
+/******************************************* 3rd ************ 4th ***********/
+#define SQLITE_CREATE_INDEX          1   /* Index Name      Table Name      */
+#define SQLITE_CREATE_TABLE          2   /* Table Name      NULL            */
+#define SQLITE_CREATE_TEMP_INDEX     3   /* Index Name      Table Name      */
+#define SQLITE_CREATE_TEMP_TABLE     4   /* Table Name      NULL            */
+#define SQLITE_CREATE_TEMP_TRIGGER   5   /* Trigger Name    Table Name      */
+#define SQLITE_CREATE_TEMP_VIEW      6   /* View Name       NULL            */
+#define SQLITE_CREATE_TRIGGER        7   /* Trigger Name    Table Name      */
+#define SQLITE_CREATE_VIEW           8   /* View Name       NULL            */
+#define SQLITE_DELETE                9   /* Table Name      NULL            */
+#define SQLITE_DROP_INDEX           10   /* Index Name      Table Name      */
+#define SQLITE_DROP_TABLE           11   /* Table Name      NULL            */
+#define SQLITE_DROP_TEMP_INDEX      12   /* Index Name      Table Name      */
+#define SQLITE_DROP_TEMP_TABLE      13   /* Table Name      NULL            */
+#define SQLITE_DROP_TEMP_TRIGGER    14   /* Trigger Name    Table Name      */
+#define SQLITE_DROP_TEMP_VIEW       15   /* View Name       NULL            */
+#define SQLITE_DROP_TRIGGER         16   /* Trigger Name    Table Name      */
+#define SQLITE_DROP_VIEW            17   /* View Name       NULL            */
+#define SQLITE_INSERT               18   /* Table Name      NULL            */
+#define SQLITE_PRAGMA               19   /* Pragma Name     1st arg or NULL */
+#define SQLITE_READ                 20   /* Table Name      Column Name     */
+#define SQLITE_SELECT               21   /* NULL            NULL            */
+#define SQLITE_TRANSACTION          22   /* Operation       NULL            */
+#define SQLITE_UPDATE               23   /* Table Name      Column Name     */
+#define SQLITE_ATTACH               24   /* Filename        NULL            */
+#define SQLITE_DETACH               25   /* Database Name   NULL            */
+#define SQLITE_ALTER_TABLE          26   /* Database Name   Table Name      */
+#define SQLITE_REINDEX              27   /* Index Name      NULL            */
+#define SQLITE_ANALYZE              28   /* Table Name      NULL            */
+#define SQLITE_CREATE_VTABLE        29   /* Table Name      Module Name     */
+#define SQLITE_DROP_VTABLE          30   /* Table Name      Module Name     */
+#define SQLITE_FUNCTION             31   /* NULL            Function Name   */
+#define SQLITE_SAVEPOINT            32   /* Operation       Savepoint Name  */
+#define SQLITE_COPY                  0   /* No longer used */
+#define SQLITE_RECURSIVE            33   /* NULL            NULL            */
+
+/*
+** CAPI3REF: Tracing And Profiling Functions
+**
+** These routines register callback functions that can be used for
+** tracing and profiling the execution of SQL statements.
+**
+** ^The callback function registered by sqlite3_trace() is invoked at
+** various times when an SQL statement is being run by [sqlite3_step()].
+** ^The sqlite3_trace() callback is invoked with a UTF-8 rendering of the
+** SQL statement text as the statement first begins executing.
+** ^(Additional sqlite3_trace() callbacks might occur
+** as each triggered subprogram is entered.  The callbacks for triggers
+** contain a UTF-8 SQL comment that identifies the trigger.)^
+**
+** The [SQLITE_TRACE_SIZE_LIMIT] compile-time option can be used to limit
+** the length of [bound parameter] expansion in the output of sqlite3_trace().
+**
+** ^The callback function registered by sqlite3_profile() is invoked
+** as each SQL statement finishes.  ^The profile callback contains
+** the original statement text and an estimate of wall-clock time
+** of how long that statement took to run.  ^The profile callback
+** time is in units of nanoseconds, however the current implementation
+** is only capable of millisecond resolution so the six least significant
+** digits in the time are meaningless.  Future versions of SQLite
+** might provide greater resolution on the profiler callback.  The
+** sqlite3_profile() function is considered experimental and is
+** subject to change in future versions of SQLite.
+*/
+SQLITE_API void *sqlite3_trace(sqlite3*, void(*xTrace)(void*,const char*), void*);
+SQLITE_API SQLITE_EXPERIMENTAL void *sqlite3_profile(sqlite3*,
+   void(*xProfile)(void*,const char*,sqlite3_uint64), void*);
+
+/*
+** CAPI3REF: Query Progress Callbacks
+**
+** ^The sqlite3_progress_handler(D,N,X,P) interface causes the callback
+** function X to be invoked periodically during long running calls to
+** [sqlite3_exec()], [sqlite3_step()] and [sqlite3_get_table()] for
+** database connection D.  An example use for this
+** interface is to keep a GUI updated during a large query.
+**
+** ^The parameter P is passed through as the only parameter to the 
+** callback function X.  ^The parameter N is the approximate number of 
+** [virtual machine instructions] that are evaluated between successive
+** invocations of the callback X.  ^If N is less than one then the progress
+** handler is disabled.
+**
+** ^Only a single progress handler may be defined at one time per
+** [database connection]; setting a new progress handler cancels the
+** old one.  ^Setting parameter X to NULL disables the progress handler.
+** ^The progress handler is also disabled by setting N to a value less
+** than 1.
+**
+** ^If the progress callback returns non-zero, the operation is
+** interrupted.  This feature can be used to implement a
+** "Cancel" button on a GUI progress dialog box.
+**
+** The progress handler callback must not do anything that will modify
+** the database connection that invoked the progress handler.
+** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their
+** database connections for the meaning of "modify" in this paragraph.
+**
+*/
+SQLITE_API void sqlite3_progress_handler(sqlite3*, int, int(*)(void*), void*);
+
+/*
+** CAPI3REF: Opening A New Database Connection
+**
+** ^These routines open an SQLite database file as specified by the 
+** filename argument. ^The filename argument is interpreted as UTF-8 for
+** sqlite3_open() and sqlite3_open_v2() and as UTF-16 in the native byte
+** order for sqlite3_open16(). ^(A [database connection] handle is usually
+** returned in *ppDb, even if an error occurs.  The only exception is that
+** if SQLite is unable to allocate memory to hold the [sqlite3] object,
+** a NULL will be written into *ppDb instead of a pointer to the [sqlite3]
+** object.)^ ^(If the database is opened (and/or created) successfully, then
+** [SQLITE_OK] is returned.  Otherwise an [error code] is returned.)^ ^The
+** [sqlite3_errmsg()] or [sqlite3_errmsg16()] routines can be used to obtain
+** an English language description of the error following a failure of any
+** of the sqlite3_open() routines.
+**
+** ^The default encoding will be UTF-8 for databases created using
+** sqlite3_open() or sqlite3_open_v2().  ^The default encoding for databases
+** created using sqlite3_open16() will be UTF-16 in the native byte order.
+**
+** Whether or not an error occurs when it is opened, resources
+** associated with the [database connection] handle should be released by
+** passing it to [sqlite3_close()] when it is no longer required.
+**
+** The sqlite3_open_v2() interface works like sqlite3_open()
+** except that it accepts two additional parameters for additional control
+** over the new database connection.  ^(The flags parameter to
+** sqlite3_open_v2() can take one of
+** the following three values, optionally combined with the 
+** [SQLITE_OPEN_NOMUTEX], [SQLITE_OPEN_FULLMUTEX], [SQLITE_OPEN_SHAREDCACHE],
+** [SQLITE_OPEN_PRIVATECACHE], and/or [SQLITE_OPEN_URI] flags:)^
+**
+** <dl>
+** ^(<dt>[SQLITE_OPEN_READONLY]</dt>
+** <dd>The database is opened in read-only mode.  If the database does not
+** already exist, an error is returned.</dd>)^
+**
+** ^(<dt>[SQLITE_OPEN_READWRITE]</dt>
+** <dd>The database is opened for reading and writing if possible, or reading
+** only if the file is write protected by the operating system.  In either
+** case the database must already exist, otherwise an error is returned.</dd>)^
+**
+** ^(<dt>[SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]</dt>
+** <dd>The database is opened for reading and writing, and is created if
+** it does not already exist. This is the behavior that is always used for
+** sqlite3_open() and sqlite3_open16().</dd>)^
+** </dl>
+**
+** If the 3rd parameter to sqlite3_open_v2() is not one of the
+** combinations shown above optionally combined with other
+** [SQLITE_OPEN_READONLY | SQLITE_OPEN_* bits]
+** then the behavior is undefined.
+**
+** ^If the [SQLITE_OPEN_NOMUTEX] flag is set, then the database connection
+** opens in the multi-thread [threading mode] as long as the single-thread
+** mode has not been set at compile-time or start-time.  ^If the
+** [SQLITE_OPEN_FULLMUTEX] flag is set then the database connection opens
+** in the serialized [threading mode] unless single-thread was
+** previously selected at compile-time or start-time.
+** ^The [SQLITE_OPEN_SHAREDCACHE] flag causes the database connection to be
+** eligible to use [shared cache mode], regardless of whether or not shared
+** cache is enabled using [sqlite3_enable_shared_cache()].  ^The
+** [SQLITE_OPEN_PRIVATECACHE] flag causes the database connection to not
+** participate in [shared cache mode] even if it is enabled.
+**
+** ^The fourth parameter to sqlite3_open_v2() is the name of the
+** [sqlite3_vfs] object that defines the operating system interface that
+** the new database connection should use.  ^If the fourth parameter is
+** a NULL pointer then the default [sqlite3_vfs] object is used.
+**
+** ^If the filename is ":memory:", then a private, temporary in-memory database
+** is created for the connection.  ^This in-memory database will vanish when
+** the database connection is closed.  Future versions of SQLite might
+** make use of additional special filenames that begin with the ":" character.
+** It is recommended that when a database filename actually does begin with
+** a ":" character you should prefix the filename with a pathname such as
+** "./" to avoid ambiguity.
+**
+** ^If the filename is an empty string, then a private, temporary
+** on-disk database will be created.  ^This private database will be
+** automatically deleted as soon as the database connection is closed.
+**
+** [[URI filenames in sqlite3_open()]] <h3>URI Filenames</h3>
+**
+** ^If [URI filename] interpretation is enabled, and the filename argument
+** begins with "file:", then the filename is interpreted as a URI. ^URI
+** filename interpretation is enabled if the [SQLITE_OPEN_URI] flag is
+** set in the fourth argument to sqlite3_open_v2(), or if it has
+** been enabled globally using the [SQLITE_CONFIG_URI] option with the
+** [sqlite3_config()] method or by the [SQLITE_USE_URI] compile-time option.
+** As of SQLite version 3.7.7, URI filename interpretation is turned off
+** by default, but future releases of SQLite might enable URI filename
+** interpretation by default.  See "[URI filenames]" for additional
+** information.
+**
+** URI filenames are parsed according to RFC 3986. ^If the URI contains an
+** authority, then it must be either an empty string or the string 
+** "localhost". ^If the authority is not an empty string or "localhost", an 
+** error is returned to the caller. ^The fragment component of a URI, if 
+** present, is ignored.
+**
+** ^SQLite uses the path component of the URI as the name of the disk file
+** which contains the database. ^If the path begins with a '/' character, 
+** then it is interpreted as an absolute path. ^If the path does not begin 
+** with a '/' (meaning that the authority section is omitted from the URI)
+** then the path is interpreted as a relative path. 
+** ^(On windows, the first component of an absolute path 
+** is a drive specification (e.g. "C:").)^
+**
+** [[core URI query parameters]]
+** The query component of a URI may contain parameters that are interpreted
+** either by SQLite itself, or by a [VFS | custom VFS implementation].
+** SQLite and its built-in [VFSes] interpret the
+** following query parameters:
+**
+** <ul>
+**   <li> <b>vfs</b>: ^The "vfs" parameter may be used to specify the name of
+**     a VFS object that provides the operating system interface that should
+**     be used to access the database file on disk. ^If this option is set to
+**     an empty string the default VFS object is used. ^Specifying an unknown
+**     VFS is an error. ^If sqlite3_open_v2() is used and the vfs option is
+**     present, then the VFS specified by the option takes precedence over
+**     the value passed as the fourth parameter to sqlite3_open_v2().
+**
+**   <li> <b>mode</b>: ^(The mode parameter may be set to either "ro", "rw",
+**     "rwc", or "memory". Attempting to set it to any other value is
+**     an error)^. 
+**     ^If "ro" is specified, then the database is opened for read-only 
+**     access, just as if the [SQLITE_OPEN_READONLY] flag had been set in the 
+**     third argument to sqlite3_open_v2(). ^If the mode option is set to 
+**     "rw", then the database is opened for read-write (but not create) 
+**     access, as if SQLITE_OPEN_READWRITE (but not SQLITE_OPEN_CREATE) had 
+**     been set. ^Value "rwc" is equivalent to setting both 
+**     SQLITE_OPEN_READWRITE and SQLITE_OPEN_CREATE.  ^If the mode option is
+**     set to "memory" then a pure [in-memory database] that never reads
+**     or writes from disk is used. ^It is an error to specify a value for
+**     the mode parameter that is less restrictive than that specified by
+**     the flags passed in the third parameter to sqlite3_open_v2().
+**
+**   <li> <b>cache</b>: ^The cache parameter may be set to either "shared" or
+**     "private". ^Setting it to "shared" is equivalent to setting the
+**     SQLITE_OPEN_SHAREDCACHE bit in the flags argument passed to
+**     sqlite3_open_v2(). ^Setting the cache parameter to "private" is 
+**     equivalent to setting the SQLITE_OPEN_PRIVATECACHE bit.
+**     ^If sqlite3_open_v2() is used and the "cache" parameter is present in
+**     a URI filename, its value overrides any behavior requested by setting
+**     SQLITE_OPEN_PRIVATECACHE or SQLITE_OPEN_SHAREDCACHE flag.
+**
+**  <li> <b>psow</b>: ^The psow parameter indicates whether or not the
+**     [powersafe overwrite] property does or does not apply to the
+**     storage media on which the database file resides.
+**
+**  <li> <b>nolock</b>: ^The nolock parameter is a boolean query parameter
+**     which if set disables file locking in rollback journal modes.  This
+**     is useful for accessing a database on a filesystem that does not
+**     support locking.  Caution:  Database corruption might result if two
+**     or more processes write to the same database and any one of those
+**     processes uses nolock=1.
+**
+**  <li> <b>immutable</b>: ^The immutable parameter is a boolean query
+**     parameter that indicates that the database file is stored on
+**     read-only media.  ^When immutable is set, SQLite assumes that the
+**     database file cannot be changed, even by a process with higher
+**     privilege, and so the database is opened read-only and all locking
+**     and change detection is disabled.  Caution: Setting the immutable
+**     property on a database file that does in fact change can result
+**     in incorrect query results and/or [SQLITE_CORRUPT] errors.
+**     See also: [SQLITE_IOCAP_IMMUTABLE].
+**       
+** </ul>
+**
+** ^Specifying an unknown parameter in the query component of a URI is not an
+** error.  Future versions of SQLite might understand additional query
+** parameters.  See "[query parameters with special meaning to SQLite]" for
+** additional information.
+**
+** [[URI filename examples]] <h3>URI filename examples</h3>
+**
+** <table border="1" align=center cellpadding=5>
+** <tr><th> URI filenames <th> Results
+** <tr><td> file:data.db <td> 
+**          Open the file "data.db" in the current directory.
+** <tr><td> file:/home/fred/data.db<br>
+**          file:///home/fred/data.db <br> 
+**          file://localhost/home/fred/data.db <br> <td> 
+**          Open the database file "/home/fred/data.db".
+** <tr><td> file://darkstar/home/fred/data.db <td> 
+**          An error. "darkstar" is not a recognized authority.
+** <tr><td style="white-space:nowrap"> 
+**          file:///C:/Documents%20and%20Settings/fred/Desktop/data.db
+**     <td> Windows only: Open the file "data.db" on fred's desktop on drive
+**          C:. Note that the %20 escaping in this example is not strictly 
+**          necessary - space characters can be used literally
+**          in URI filenames.
+** <tr><td> file:data.db?mode=ro&cache=private <td> 
+**          Open file "data.db" in the current directory for read-only access.
+**          Regardless of whether or not shared-cache mode is enabled by
+**          default, use a private cache.
+** <tr><td> file:/home/fred/data.db?vfs=unix-dotfile <td>
+**          Open file "/home/fred/data.db". Use the special VFS "unix-dotfile"
+**          that uses dot-files in place of posix advisory locking.
+** <tr><td> file:data.db?mode=readonly <td> 
+**          An error. "readonly" is not a valid option for the "mode" parameter.
+** </table>
+**
+** ^URI hexadecimal escape sequences (%HH) are supported within the path and
+** query components of a URI. A hexadecimal escape sequence consists of a
+** percent sign - "%" - followed by exactly two hexadecimal digits 
+** specifying an octet value. ^Before the path or query components of a
+** URI filename are interpreted, they are encoded using UTF-8 and all 
+** hexadecimal escape sequences replaced by a single byte containing the
+** corresponding octet. If this process generates an invalid UTF-8 encoding,
+** the results are undefined.
+**
+** <b>Note to Windows users:</b>  The encoding used for the filename argument
+** of sqlite3_open() and sqlite3_open_v2() must be UTF-8, not whatever
+** codepage is currently defined.  Filenames containing international
+** characters must be converted to UTF-8 prior to passing them into
+** sqlite3_open() or sqlite3_open_v2().
+**
+** <b>Note to Windows Runtime users:</b>  The temporary directory must be set
+** prior to calling sqlite3_open() or sqlite3_open_v2().  Otherwise, various
+** features that require the use of temporary files may fail.
+**
+** See also: [sqlite3_temp_directory]
+*/
+SQLITE_API int sqlite3_open(
+  const char *filename,   /* Database filename (UTF-8) */
+  sqlite3 **ppDb          /* OUT: SQLite db handle */
+);
+SQLITE_API int sqlite3_open16(
+  const void *filename,   /* Database filename (UTF-16) */
+  sqlite3 **ppDb          /* OUT: SQLite db handle */
+);
+SQLITE_API int sqlite3_open_v2(
+  const char *filename,   /* Database filename (UTF-8) */
+  sqlite3 **ppDb,         /* OUT: SQLite db handle */
+  int flags,              /* Flags */
+  const char *zVfs        /* Name of VFS module to use */
+);
+
+/*
+** CAPI3REF: Obtain Values For URI Parameters
+**
+** These are utility routines, useful to VFS implementations, that check
+** to see if a database file was a URI that contained a specific query 
+** parameter, and if so obtains the value of that query parameter.
+**
+** If F is the database filename pointer passed into the xOpen() method of 
+** a VFS implementation when the flags parameter to xOpen() has one or 
+** more of the [SQLITE_OPEN_URI] or [SQLITE_OPEN_MAIN_DB] bits set and
+** P is the name of the query parameter, then
+** sqlite3_uri_parameter(F,P) returns the value of the P
+** parameter if it exists or a NULL pointer if P does not appear as a 
+** query parameter on F.  If P is a query parameter of F
+** has no explicit value, then sqlite3_uri_parameter(F,P) returns
+** a pointer to an empty string.
+**
+** The sqlite3_uri_boolean(F,P,B) routine assumes that P is a boolean
+** parameter and returns true (1) or false (0) according to the value
+** of P.  The sqlite3_uri_boolean(F,P,B) routine returns true (1) if the
+** value of query parameter P is one of "yes", "true", or "on" in any
+** case or if the value begins with a non-zero number.  The 
+** sqlite3_uri_boolean(F,P,B) routines returns false (0) if the value of
+** query parameter P is one of "no", "false", or "off" in any case or
+** if the value begins with a numeric zero.  If P is not a query
+** parameter on F or if the value of P is does not match any of the
+** above, then sqlite3_uri_boolean(F,P,B) returns (B!=0).
+**
+** The sqlite3_uri_int64(F,P,D) routine converts the value of P into a
+** 64-bit signed integer and returns that integer, or D if P does not
+** exist.  If the value of P is something other than an integer, then
+** zero is returned.
+** 
+** If F is a NULL pointer, then sqlite3_uri_parameter(F,P) returns NULL and
+** sqlite3_uri_boolean(F,P,B) returns B.  If F is not a NULL pointer and
+** is not a database file pathname pointer that SQLite passed into the xOpen
+** VFS method, then the behavior of this routine is undefined and probably
+** undesirable.
+*/
+SQLITE_API const char *sqlite3_uri_parameter(const char *zFilename, const char *zParam);
+SQLITE_API int sqlite3_uri_boolean(const char *zFile, const char *zParam, int bDefault);
+SQLITE_API sqlite3_int64 sqlite3_uri_int64(const char*, const char*, sqlite3_int64);
+
+
+/*
+** CAPI3REF: Error Codes And Messages
+**
+** ^The sqlite3_errcode() interface returns the numeric [result code] or
+** [extended result code] for the most recent failed sqlite3_* API call
+** associated with a [database connection]. If a prior API call failed
+** but the most recent API call succeeded, the return value from
+** sqlite3_errcode() is undefined.  ^The sqlite3_extended_errcode()
+** interface is the same except that it always returns the 
+** [extended result code] even when extended result codes are
+** disabled.
+**
+** ^The sqlite3_errmsg() and sqlite3_errmsg16() return English-language
+** text that describes the error, as either UTF-8 or UTF-16 respectively.
+** ^(Memory to hold the error message string is managed internally.
+** The application does not need to worry about freeing the result.
+** However, the error string might be overwritten or deallocated by
+** subsequent calls to other SQLite interface functions.)^
+**
+** ^The sqlite3_errstr() interface returns the English-language text
+** that describes the [result code], as UTF-8.
+** ^(Memory to hold the error message string is managed internally
+** and must not be freed by the application)^.
+**
+** When the serialized [threading mode] is in use, it might be the
+** case that a second error occurs on a separate thread in between
+** the time of the first error and the call to these interfaces.
+** When that happens, the second error will be reported since these
+** interfaces always report the most recent result.  To avoid
+** this, each thread can obtain exclusive use of the [database connection] D
+** by invoking [sqlite3_mutex_enter]([sqlite3_db_mutex](D)) before beginning
+** to use D and invoking [sqlite3_mutex_leave]([sqlite3_db_mutex](D)) after
+** all calls to the interfaces listed here are completed.
+**
+** If an interface fails with SQLITE_MISUSE, that means the interface
+** was invoked incorrectly by the application.  In that case, the
+** error code and message may or may not be set.
+*/
+SQLITE_API int sqlite3_errcode(sqlite3 *db);
+SQLITE_API int sqlite3_extended_errcode(sqlite3 *db);
+SQLITE_API const char *sqlite3_errmsg(sqlite3*);
+SQLITE_API const void *sqlite3_errmsg16(sqlite3*);
+SQLITE_API const char *sqlite3_errstr(int);
+
+/*
+** CAPI3REF: SQL Statement Object
+** KEYWORDS: {prepared statement} {prepared statements}
+**
+** An instance of this object represents a single SQL statement.
+** This object is variously known as a "prepared statement" or a
+** "compiled SQL statement" or simply as a "statement".
+**
+** The life of a statement object goes something like this:
+**
+** <ol>
+** <li> Create the object using [sqlite3_prepare_v2()] or a related
+**      function.
+** <li> Bind values to [host parameters] using the sqlite3_bind_*()
+**      interfaces.
+** <li> Run the SQL by calling [sqlite3_step()] one or more times.
+** <li> Reset the statement using [sqlite3_reset()] then go back
+**      to step 2.  Do this zero or more times.
+** <li> Destroy the object using [sqlite3_finalize()].
+** </ol>
+**
+** Refer to documentation on individual methods above for additional
+** information.
+*/
+typedef struct sqlite3_stmt sqlite3_stmt;
+
+/*
+** CAPI3REF: Run-time Limits
+**
+** ^(This interface allows the size of various constructs to be limited
+** on a connection by connection basis.  The first parameter is the
+** [database connection] whose limit is to be set or queried.  The
+** second parameter is one of the [limit categories] that define a
+** class of constructs to be size limited.  The third parameter is the
+** new limit for that construct.)^
+**
+** ^If the new limit is a negative number, the limit is unchanged.
+** ^(For each limit category SQLITE_LIMIT_<i>NAME</i> there is a 
+** [limits | hard upper bound]
+** set at compile-time by a C preprocessor macro called
+** [limits | SQLITE_MAX_<i>NAME</i>].
+** (The "_LIMIT_" in the name is changed to "_MAX_".))^
+** ^Attempts to increase a limit above its hard upper bound are
+** silently truncated to the hard upper bound.
+**
+** ^Regardless of whether or not the limit was changed, the 
+** [sqlite3_limit()] interface returns the prior value of the limit.
+** ^Hence, to find the current value of a limit without changing it,
+** simply invoke this interface with the third parameter set to -1.
+**
+** Run-time limits are intended for use in applications that manage
+** both their own internal database and also databases that are controlled
+** by untrusted external sources.  An example application might be a
+** web browser that has its own databases for storing history and
+** separate databases controlled by JavaScript applications downloaded
+** off the Internet.  The internal databases can be given the
+** large, default limits.  Databases managed by external sources can
+** be given much smaller limits designed to prevent a denial of service
+** attack.  Developers might also want to use the [sqlite3_set_authorizer()]
+** interface to further control untrusted SQL.  The size of the database
+** created by an untrusted script can be contained using the
+** [max_page_count] [PRAGMA].
+**
+** New run-time limit categories may be added in future releases.
+*/
+SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal);
+
+/*
+** CAPI3REF: Run-Time Limit Categories
+** KEYWORDS: {limit category} {*limit categories}
+**
+** These constants define various performance limits
+** that can be lowered at run-time using [sqlite3_limit()].
+** The synopsis of the meanings of the various limits is shown below.
+** Additional information is available at [limits | Limits in SQLite].
+**
+** <dl>
+** [[SQLITE_LIMIT_LENGTH]] ^(<dt>SQLITE_LIMIT_LENGTH</dt>
+** <dd>The maximum size of any string or BLOB or table row, in bytes.<dd>)^
+**
+** [[SQLITE_LIMIT_SQL_LENGTH]] ^(<dt>SQLITE_LIMIT_SQL_LENGTH</dt>
+** <dd>The maximum length of an SQL statement, in bytes.</dd>)^
+**
+** [[SQLITE_LIMIT_COLUMN]] ^(<dt>SQLITE_LIMIT_COLUMN</dt>
+** <dd>The maximum number of columns in a table definition or in the
+** result set of a [SELECT] or the maximum number of columns in an index
+** or in an ORDER BY or GROUP BY clause.</dd>)^
+**
+** [[SQLITE_LIMIT_EXPR_DEPTH]] ^(<dt>SQLITE_LIMIT_EXPR_DEPTH</dt>
+** <dd>The maximum depth of the parse tree on any expression.</dd>)^
+**
+** [[SQLITE_LIMIT_COMPOUND_SELECT]] ^(<dt>SQLITE_LIMIT_COMPOUND_SELECT</dt>
+** <dd>The maximum number of terms in a compound SELECT statement.</dd>)^
+**
+** [[SQLITE_LIMIT_VDBE_OP]] ^(<dt>SQLITE_LIMIT_VDBE_OP</dt>
+** <dd>The maximum number of instructions in a virtual machine program
+** used to implement an SQL statement.  This limit is not currently
+** enforced, though that might be added in some future release of
+** SQLite.</dd>)^
+**
+** [[SQLITE_LIMIT_FUNCTION_ARG]] ^(<dt>SQLITE_LIMIT_FUNCTION_ARG</dt>
+** <dd>The maximum number of arguments on a function.</dd>)^
+**
+** [[SQLITE_LIMIT_ATTACHED]] ^(<dt>SQLITE_LIMIT_ATTACHED</dt>
+** <dd>The maximum number of [ATTACH | attached databases].)^</dd>
+**
+** [[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]]
+** ^(<dt>SQLITE_LIMIT_LIKE_PATTERN_LENGTH</dt>
+** <dd>The maximum length of the pattern argument to the [LIKE] or
+** [GLOB] operators.</dd>)^
+**
+** [[SQLITE_LIMIT_VARIABLE_NUMBER]]
+** ^(<dt>SQLITE_LIMIT_VARIABLE_NUMBER</dt>
+** <dd>The maximum index number of any [parameter] in an SQL statement.)^
+**
+** [[SQLITE_LIMIT_TRIGGER_DEPTH]] ^(<dt>SQLITE_LIMIT_TRIGGER_DEPTH</dt>
+** <dd>The maximum depth of recursion for triggers.</dd>)^
+**
+** [[SQLITE_LIMIT_WORKER_THREADS]] ^(<dt>SQLITE_LIMIT_WORKER_THREADS</dt>
+** <dd>The maximum number of auxiliary worker threads that a single
+** [prepared statement] may start.</dd>)^
+** </dl>
+*/
+#define SQLITE_LIMIT_LENGTH                    0
+#define SQLITE_LIMIT_SQL_LENGTH                1
+#define SQLITE_LIMIT_COLUMN                    2
+#define SQLITE_LIMIT_EXPR_DEPTH                3
+#define SQLITE_LIMIT_COMPOUND_SELECT           4
+#define SQLITE_LIMIT_VDBE_OP                   5
+#define SQLITE_LIMIT_FUNCTION_ARG              6
+#define SQLITE_LIMIT_ATTACHED                  7
+#define SQLITE_LIMIT_LIKE_PATTERN_LENGTH       8
+#define SQLITE_LIMIT_VARIABLE_NUMBER           9
+#define SQLITE_LIMIT_TRIGGER_DEPTH            10
+#define SQLITE_LIMIT_WORKER_THREADS           11
+
+/*
+** CAPI3REF: Compiling An SQL Statement
+** KEYWORDS: {SQL statement compiler}
+**
+** To execute an SQL query, it must first be compiled into a byte-code
+** program using one of these routines.
+**
+** The first argument, "db", is a [database connection] obtained from a
+** prior successful call to [sqlite3_open()], [sqlite3_open_v2()] or
+** [sqlite3_open16()].  The database connection must not have been closed.
+**
+** The second argument, "zSql", is the statement to be compiled, encoded
+** as either UTF-8 or UTF-16.  The sqlite3_prepare() and sqlite3_prepare_v2()
+** interfaces use UTF-8, and sqlite3_prepare16() and sqlite3_prepare16_v2()
+** use UTF-16.
+**
+** ^If the nByte argument is less than zero, then zSql is read up to the
+** first zero terminator. ^If nByte is non-negative, then it is the maximum
+** number of  bytes read from zSql.  ^When nByte is non-negative, the
+** zSql string ends at either the first '\000' or '\u0000' character or
+** the nByte-th byte, whichever comes first. If the caller knows
+** that the supplied string is nul-terminated, then there is a small
+** performance advantage to be gained by passing an nByte parameter that
+** is equal to the number of bytes in the input string <i>including</i>
+** the nul-terminator bytes as this saves SQLite from having to
+** make a copy of the input string.
+**
+** ^If pzTail is not NULL then *pzTail is made to point to the first byte
+** past the end of the first SQL statement in zSql.  These routines only
+** compile the first statement in zSql, so *pzTail is left pointing to
+** what remains uncompiled.
+**
+** ^*ppStmt is left pointing to a compiled [prepared statement] that can be
+** executed using [sqlite3_step()].  ^If there is an error, *ppStmt is set
+** to NULL.  ^If the input text contains no SQL (if the input is an empty
+** string or a comment) then *ppStmt is set to NULL.
+** The calling procedure is responsible for deleting the compiled
+** SQL statement using [sqlite3_finalize()] after it has finished with it.
+** ppStmt may not be NULL.
+**
+** ^On success, the sqlite3_prepare() family of routines return [SQLITE_OK];
+** otherwise an [error code] is returned.
+**
+** The sqlite3_prepare_v2() and sqlite3_prepare16_v2() interfaces are
+** recommended for all new programs. The two older interfaces are retained
+** for backwards compatibility, but their use is discouraged.
+** ^In the "v2" interfaces, the prepared statement
+** that is returned (the [sqlite3_stmt] object) contains a copy of the
+** original SQL text. This causes the [sqlite3_step()] interface to
+** behave differently in three ways:
+**
+** <ol>
+** <li>
+** ^If the database schema changes, instead of returning [SQLITE_SCHEMA] as it
+** always used to do, [sqlite3_step()] will automatically recompile the SQL
+** statement and try to run it again. As many as [SQLITE_MAX_SCHEMA_RETRY]
+** retries will occur before sqlite3_step() gives up and returns an error.
+** </li>
+**
+** <li>
+** ^When an error occurs, [sqlite3_step()] will return one of the detailed
+** [error codes] or [extended error codes].  ^The legacy behavior was that
+** [sqlite3_step()] would only return a generic [SQLITE_ERROR] result code
+** and the application would have to make a second call to [sqlite3_reset()]
+** in order to find the underlying cause of the problem. With the "v2" prepare
+** interfaces, the underlying reason for the error is returned immediately.
+** </li>
+**
+** <li>
+** ^If the specific value bound to [parameter | host parameter] in the 
+** WHERE clause might influence the choice of query plan for a statement,
+** then the statement will be automatically recompiled, as if there had been 
+** a schema change, on the first  [sqlite3_step()] call following any change
+** to the [sqlite3_bind_text | bindings] of that [parameter]. 
+** ^The specific value of WHERE-clause [parameter] might influence the 
+** choice of query plan if the parameter is the left-hand side of a [LIKE]
+** or [GLOB] operator or if the parameter is compared to an indexed column
+** and the [SQLITE_ENABLE_STAT3] compile-time option is enabled.
+** </li>
+** </ol>
+*/
+SQLITE_API int sqlite3_prepare(
+  sqlite3 *db,            /* Database handle */
+  const char *zSql,       /* SQL statement, UTF-8 encoded */
+  int nByte,              /* Maximum length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,  /* OUT: Statement handle */
+  const char **pzTail     /* OUT: Pointer to unused portion of zSql */
+);
+SQLITE_API int sqlite3_prepare_v2(
+  sqlite3 *db,            /* Database handle */
+  const char *zSql,       /* SQL statement, UTF-8 encoded */
+  int nByte,              /* Maximum length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,  /* OUT: Statement handle */
+  const char **pzTail     /* OUT: Pointer to unused portion of zSql */
+);
+SQLITE_API int sqlite3_prepare16(
+  sqlite3 *db,            /* Database handle */
+  const void *zSql,       /* SQL statement, UTF-16 encoded */
+  int nByte,              /* Maximum length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,  /* OUT: Statement handle */
+  const void **pzTail     /* OUT: Pointer to unused portion of zSql */
+);
+SQLITE_API int sqlite3_prepare16_v2(
+  sqlite3 *db,            /* Database handle */
+  const void *zSql,       /* SQL statement, UTF-16 encoded */
+  int nByte,              /* Maximum length of zSql in bytes. */
+  sqlite3_stmt **ppStmt,  /* OUT: Statement handle */
+  const void **pzTail     /* OUT: Pointer to unused portion of zSql */
+);
+
+/*
+** CAPI3REF: Retrieving Statement SQL
+**
+** ^This interface can be used to retrieve a saved copy of the original
+** SQL text used to create a [prepared statement] if that statement was
+** compiled using either [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()].
+*/
+SQLITE_API const char *sqlite3_sql(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Determine If An SQL Statement Writes The Database
+**
+** ^The sqlite3_stmt_readonly(X) interface returns true (non-zero) if
+** and only if the [prepared statement] X makes no direct changes to
+** the content of the database file.
+**
+** Note that [application-defined SQL functions] or
+** [virtual tables] might change the database indirectly as a side effect.  
+** ^(For example, if an application defines a function "eval()" that 
+** calls [sqlite3_exec()], then the following SQL statement would
+** change the database file through side-effects:
+**
+** <blockquote><pre>
+**    SELECT eval('DELETE FROM t1') FROM t2;
+** </pre></blockquote>
+**
+** But because the [SELECT] statement does not change the database file
+** directly, sqlite3_stmt_readonly() would still return true.)^
+**
+** ^Transaction control statements such as [BEGIN], [COMMIT], [ROLLBACK],
+** [SAVEPOINT], and [RELEASE] cause sqlite3_stmt_readonly() to return true,
+** since the statements themselves do not actually modify the database but
+** rather they control the timing of when other statements modify the 
+** database.  ^The [ATTACH] and [DETACH] statements also cause
+** sqlite3_stmt_readonly() to return true since, while those statements
+** change the configuration of a database connection, they do not make 
+** changes to the content of the database files on disk.
+*/
+SQLITE_API int sqlite3_stmt_readonly(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Determine If A Prepared Statement Has Been Reset
+**
+** ^The sqlite3_stmt_busy(S) interface returns true (non-zero) if the
+** [prepared statement] S has been stepped at least once using 
+** [sqlite3_step(S)] but has not run to completion and/or has not 
+** been reset using [sqlite3_reset(S)].  ^The sqlite3_stmt_busy(S)
+** interface returns false if S is a NULL pointer.  If S is not a 
+** NULL pointer and is not a pointer to a valid [prepared statement]
+** object, then the behavior is undefined and probably undesirable.
+**
+** This interface can be used in combination [sqlite3_next_stmt()]
+** to locate all prepared statements associated with a database 
+** connection that are in need of being reset.  This can be used,
+** for example, in diagnostic routines to search for prepared 
+** statements that are holding a transaction open.
+*/
+SQLITE_API int sqlite3_stmt_busy(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Dynamically Typed Value Object
+** KEYWORDS: {protected sqlite3_value} {unprotected sqlite3_value}
+**
+** SQLite uses the sqlite3_value object to represent all values
+** that can be stored in a database table. SQLite uses dynamic typing
+** for the values it stores.  ^Values stored in sqlite3_value objects
+** can be integers, floating point values, strings, BLOBs, or NULL.
+**
+** An sqlite3_value object may be either "protected" or "unprotected".
+** Some interfaces require a protected sqlite3_value.  Other interfaces
+** will accept either a protected or an unprotected sqlite3_value.
+** Every interface that accepts sqlite3_value arguments specifies
+** whether or not it requires a protected sqlite3_value.
+**
+** The terms "protected" and "unprotected" refer to whether or not
+** a mutex is held.  An internal mutex is held for a protected
+** sqlite3_value object but no mutex is held for an unprotected
+** sqlite3_value object.  If SQLite is compiled to be single-threaded
+** (with [SQLITE_THREADSAFE=0] and with [sqlite3_threadsafe()] returning 0)
+** or if SQLite is run in one of reduced mutex modes 
+** [SQLITE_CONFIG_SINGLETHREAD] or [SQLITE_CONFIG_MULTITHREAD]
+** then there is no distinction between protected and unprotected
+** sqlite3_value objects and they can be used interchangeably.  However,
+** for maximum code portability it is recommended that applications
+** still make the distinction between protected and unprotected
+** sqlite3_value objects even when not strictly required.
+**
+** ^The sqlite3_value objects that are passed as parameters into the
+** implementation of [application-defined SQL functions] are protected.
+** ^The sqlite3_value object returned by
+** [sqlite3_column_value()] is unprotected.
+** Unprotected sqlite3_value objects may only be used with
+** [sqlite3_result_value()] and [sqlite3_bind_value()].
+** The [sqlite3_value_blob | sqlite3_value_type()] family of
+** interfaces require protected sqlite3_value objects.
+*/
+typedef struct Mem sqlite3_value;
+
+/*
+** CAPI3REF: SQL Function Context Object
+**
+** The context in which an SQL function executes is stored in an
+** sqlite3_context object.  ^A pointer to an sqlite3_context object
+** is always first parameter to [application-defined SQL functions].
+** The application-defined SQL function implementation will pass this
+** pointer through into calls to [sqlite3_result_int | sqlite3_result()],
+** [sqlite3_aggregate_context()], [sqlite3_user_data()],
+** [sqlite3_context_db_handle()], [sqlite3_get_auxdata()],
+** and/or [sqlite3_set_auxdata()].
+*/
+typedef struct sqlite3_context sqlite3_context;
+
+/*
+** CAPI3REF: Binding Values To Prepared Statements
+** KEYWORDS: {host parameter} {host parameters} {host parameter name}
+** KEYWORDS: {SQL parameter} {SQL parameters} {parameter binding}
+**
+** ^(In the SQL statement text input to [sqlite3_prepare_v2()] and its variants,
+** literals may be replaced by a [parameter] that matches one of following
+** templates:
+**
+** <ul>
+** <li>  ?
+** <li>  ?NNN
+** <li>  :VVV
+** <li>  @VVV
+** <li>  $VVV
+** </ul>
+**
+** In the templates above, NNN represents an integer literal,
+** and VVV represents an alphanumeric identifier.)^  ^The values of these
+** parameters (also called "host parameter names" or "SQL parameters")
+** can be set using the sqlite3_bind_*() routines defined here.
+**
+** ^The first argument to the sqlite3_bind_*() routines is always
+** a pointer to the [sqlite3_stmt] object returned from
+** [sqlite3_prepare_v2()] or its variants.
+**
+** ^The second argument is the index of the SQL parameter to be set.
+** ^The leftmost SQL parameter has an index of 1.  ^When the same named
+** SQL parameter is used more than once, second and subsequent
+** occurrences have the same index as the first occurrence.
+** ^The index for named parameters can be looked up using the
+** [sqlite3_bind_parameter_index()] API if desired.  ^The index
+** for "?NNN" parameters is the value of NNN.
+** ^The NNN value must be between 1 and the [sqlite3_limit()]
+** parameter [SQLITE_LIMIT_VARIABLE_NUMBER] (default value: 999).
+**
+** ^The third argument is the value to bind to the parameter.
+** ^If the third parameter to sqlite3_bind_text() or sqlite3_bind_text16()
+** or sqlite3_bind_blob() is a NULL pointer then the fourth parameter
+** is ignored and the end result is the same as sqlite3_bind_null().
+**
+** ^(In those routines that have a fourth argument, its value is the
+** number of bytes in the parameter.  To be clear: the value is the
+** number of <u>bytes</u> in the value, not the number of characters.)^
+** ^If the fourth parameter to sqlite3_bind_text() or sqlite3_bind_text16()
+** is negative, then the length of the string is
+** the number of bytes up to the first zero terminator.
+** If the fourth parameter to sqlite3_bind_blob() is negative, then
+** the behavior is undefined.
+** If a non-negative fourth parameter is provided to sqlite3_bind_text()
+** or sqlite3_bind_text16() or sqlite3_bind_text64() then
+** that parameter must be the byte offset
+** where the NUL terminator would occur assuming the string were NUL
+** terminated.  If any NUL characters occur at byte offsets less than 
+** the value of the fourth parameter then the resulting string value will
+** contain embedded NULs.  The result of expressions involving strings
+** with embedded NULs is undefined.
+**
+** ^The fifth argument to the BLOB and string binding interfaces
+** is a destructor used to dispose of the BLOB or
+** string after SQLite has finished with it.  ^The destructor is called
+** to dispose of the BLOB or string even if the call to bind API fails.
+** ^If the fifth argument is
+** the special value [SQLITE_STATIC], then SQLite assumes that the
+** information is in static, unmanaged space and does not need to be freed.
+** ^If the fifth argument has the value [SQLITE_TRANSIENT], then
+** SQLite makes its own private copy of the data immediately, before
+** the sqlite3_bind_*() routine returns.
+**
+** ^The sixth argument to sqlite3_bind_text64() must be one of
+** [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE]
+** to specify the encoding of the text in the third parameter.  If
+** the sixth argument to sqlite3_bind_text64() is not one of the
+** allowed values shown above, or if the text encoding is different
+** from the encoding specified by the sixth parameter, then the behavior
+** is undefined.
+**
+** ^The sqlite3_bind_zeroblob() routine binds a BLOB of length N that
+** is filled with zeroes.  ^A zeroblob uses a fixed amount of memory
+** (just an integer to hold its size) while it is being processed.
+** Zeroblobs are intended to serve as placeholders for BLOBs whose
+** content is later written using
+** [sqlite3_blob_open | incremental BLOB I/O] routines.
+** ^A negative value for the zeroblob results in a zero-length BLOB.
+**
+** ^If any of the sqlite3_bind_*() routines are called with a NULL pointer
+** for the [prepared statement] or with a prepared statement for which
+** [sqlite3_step()] has been called more recently than [sqlite3_reset()],
+** then the call will return [SQLITE_MISUSE].  If any sqlite3_bind_()
+** routine is passed a [prepared statement] that has been finalized, the
+** result is undefined and probably harmful.
+**
+** ^Bindings are not cleared by the [sqlite3_reset()] routine.
+** ^Unbound parameters are interpreted as NULL.
+**
+** ^The sqlite3_bind_* routines return [SQLITE_OK] on success or an
+** [error code] if anything goes wrong.
+** ^[SQLITE_TOOBIG] might be returned if the size of a string or BLOB
+** exceeds limits imposed by [sqlite3_limit]([SQLITE_LIMIT_LENGTH]) or
+** [SQLITE_MAX_LENGTH].
+** ^[SQLITE_RANGE] is returned if the parameter
+** index is out of range.  ^[SQLITE_NOMEM] is returned if malloc() fails.
+**
+** See also: [sqlite3_bind_parameter_count()],
+** [sqlite3_bind_parameter_name()], and [sqlite3_bind_parameter_index()].
+*/
+SQLITE_API int sqlite3_bind_blob(sqlite3_stmt*, int, const void*, int n, void(*)(void*));
+SQLITE_API int sqlite3_bind_blob64(sqlite3_stmt*, int, const void*, sqlite3_uint64,
+                        void(*)(void*));
+SQLITE_API int sqlite3_bind_double(sqlite3_stmt*, int, double);
+SQLITE_API int sqlite3_bind_int(sqlite3_stmt*, int, int);
+SQLITE_API int sqlite3_bind_int64(sqlite3_stmt*, int, sqlite3_int64);
+SQLITE_API int sqlite3_bind_null(sqlite3_stmt*, int);
+SQLITE_API int sqlite3_bind_text(sqlite3_stmt*,int,const char*,int,void(*)(void*));
+SQLITE_API int sqlite3_bind_text16(sqlite3_stmt*, int, const void*, int, void(*)(void*));
+SQLITE_API int sqlite3_bind_text64(sqlite3_stmt*, int, const char*, sqlite3_uint64,
+                         void(*)(void*), unsigned char encoding);
+SQLITE_API int sqlite3_bind_value(sqlite3_stmt*, int, const sqlite3_value*);
+SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt*, int, int n);
+
+/*
+** CAPI3REF: Number Of SQL Parameters
+**
+** ^This routine can be used to find the number of [SQL parameters]
+** in a [prepared statement].  SQL parameters are tokens of the
+** form "?", "?NNN", ":AAA", "$AAA", or "@AAA" that serve as
+** placeholders for values that are [sqlite3_bind_blob | bound]
+** to the parameters at a later time.
+**
+** ^(This routine actually returns the index of the largest (rightmost)
+** parameter. For all forms except ?NNN, this will correspond to the
+** number of unique parameters.  If parameters of the ?NNN form are used,
+** there may be gaps in the list.)^
+**
+** See also: [sqlite3_bind_blob|sqlite3_bind()],
+** [sqlite3_bind_parameter_name()], and
+** [sqlite3_bind_parameter_index()].
+*/
+SQLITE_API int sqlite3_bind_parameter_count(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Name Of A Host Parameter
+**
+** ^The sqlite3_bind_parameter_name(P,N) interface returns
+** the name of the N-th [SQL parameter] in the [prepared statement] P.
+** ^(SQL parameters of the form "?NNN" or ":AAA" or "@AAA" or "$AAA"
+** have a name which is the string "?NNN" or ":AAA" or "@AAA" or "$AAA"
+** respectively.
+** In other words, the initial ":" or "$" or "@" or "?"
+** is included as part of the name.)^
+** ^Parameters of the form "?" without a following integer have no name
+** and are referred to as "nameless" or "anonymous parameters".
+**
+** ^The first host parameter has an index of 1, not 0.
+**
+** ^If the value N is out of range or if the N-th parameter is
+** nameless, then NULL is returned.  ^The returned string is
+** always in UTF-8 encoding even if the named parameter was
+** originally specified as UTF-16 in [sqlite3_prepare16()] or
+** [sqlite3_prepare16_v2()].
+**
+** See also: [sqlite3_bind_blob|sqlite3_bind()],
+** [sqlite3_bind_parameter_count()], and
+** [sqlite3_bind_parameter_index()].
+*/
+SQLITE_API const char *sqlite3_bind_parameter_name(sqlite3_stmt*, int);
+
+/*
+** CAPI3REF: Index Of A Parameter With A Given Name
+**
+** ^Return the index of an SQL parameter given its name.  ^The
+** index value returned is suitable for use as the second
+** parameter to [sqlite3_bind_blob|sqlite3_bind()].  ^A zero
+** is returned if no matching parameter is found.  ^The parameter
+** name must be given in UTF-8 even if the original statement
+** was prepared from UTF-16 text using [sqlite3_prepare16_v2()].
+**
+** See also: [sqlite3_bind_blob|sqlite3_bind()],
+** [sqlite3_bind_parameter_count()], and
+** [sqlite3_bind_parameter_index()].
+*/
+SQLITE_API int sqlite3_bind_parameter_index(sqlite3_stmt*, const char *zName);
+
+/*
+** CAPI3REF: Reset All Bindings On A Prepared Statement
+**
+** ^Contrary to the intuition of many, [sqlite3_reset()] does not reset
+** the [sqlite3_bind_blob | bindings] on a [prepared statement].
+** ^Use this routine to reset all host parameters to NULL.
+*/
+SQLITE_API int sqlite3_clear_bindings(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Number Of Columns In A Result Set
+**
+** ^Return the number of columns in the result set returned by the
+** [prepared statement]. ^This routine returns 0 if pStmt is an SQL
+** statement that does not return data (for example an [UPDATE]).
+**
+** See also: [sqlite3_data_count()]
+*/
+SQLITE_API int sqlite3_column_count(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Column Names In A Result Set
+**
+** ^These routines return the name assigned to a particular column
+** in the result set of a [SELECT] statement.  ^The sqlite3_column_name()
+** interface returns a pointer to a zero-terminated UTF-8 string
+** and sqlite3_column_name16() returns a pointer to a zero-terminated
+** UTF-16 string.  ^The first parameter is the [prepared statement]
+** that implements the [SELECT] statement. ^The second parameter is the
+** column number.  ^The leftmost column is number 0.
+**
+** ^The returned string pointer is valid until either the [prepared statement]
+** is destroyed by [sqlite3_finalize()] or until the statement is automatically
+** reprepared by the first call to [sqlite3_step()] for a particular run
+** or until the next call to
+** sqlite3_column_name() or sqlite3_column_name16() on the same column.
+**
+** ^If sqlite3_malloc() fails during the processing of either routine
+** (for example during a conversion from UTF-8 to UTF-16) then a
+** NULL pointer is returned.
+**
+** ^The name of a result column is the value of the "AS" clause for
+** that column, if there is an AS clause.  If there is no AS clause
+** then the name of the column is unspecified and may change from
+** one release of SQLite to the next.
+*/
+SQLITE_API const char *sqlite3_column_name(sqlite3_stmt*, int N);
+SQLITE_API const void *sqlite3_column_name16(sqlite3_stmt*, int N);
+
+/*
+** CAPI3REF: Source Of Data In A Query Result
+**
+** ^These routines provide a means to determine the database, table, and
+** table column that is the origin of a particular result column in
+** [SELECT] statement.
+** ^The name of the database or table or column can be returned as
+** either a UTF-8 or UTF-16 string.  ^The _database_ routines return
+** the database name, the _table_ routines return the table name, and
+** the origin_ routines return the column name.
+** ^The returned string is valid until the [prepared statement] is destroyed
+** using [sqlite3_finalize()] or until the statement is automatically
+** reprepared by the first call to [sqlite3_step()] for a particular run
+** or until the same information is requested
+** again in a different encoding.
+**
+** ^The names returned are the original un-aliased names of the
+** database, table, and column.
+**
+** ^The first argument to these interfaces is a [prepared statement].
+** ^These functions return information about the Nth result column returned by
+** the statement, where N is the second function argument.
+** ^The left-most column is column 0 for these routines.
+**
+** ^If the Nth column returned by the statement is an expression or
+** subquery and is not a column value, then all of these functions return
+** NULL.  ^These routine might also return NULL if a memory allocation error
+** occurs.  ^Otherwise, they return the name of the attached database, table,
+** or column that query result column was extracted from.
+**
+** ^As with all other SQLite APIs, those whose names end with "16" return
+** UTF-16 encoded strings and the other functions return UTF-8.
+**
+** ^These APIs are only available if the library was compiled with the
+** [SQLITE_ENABLE_COLUMN_METADATA] C-preprocessor symbol.
+**
+** If two or more threads call one or more of these routines against the same
+** prepared statement and column at the same time then the results are
+** undefined.
+**
+** If two or more threads call one or more
+** [sqlite3_column_database_name | column metadata interfaces]
+** for the same [prepared statement] and result column
+** at the same time then the results are undefined.
+*/
+SQLITE_API const char *sqlite3_column_database_name(sqlite3_stmt*,int);
+SQLITE_API const void *sqlite3_column_database_name16(sqlite3_stmt*,int);
+SQLITE_API const char *sqlite3_column_table_name(sqlite3_stmt*,int);
+SQLITE_API const void *sqlite3_column_table_name16(sqlite3_stmt*,int);
+SQLITE_API const char *sqlite3_column_origin_name(sqlite3_stmt*,int);
+SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt*,int);
+
+/*
+** CAPI3REF: Declared Datatype Of A Query Result
+**
+** ^(The first parameter is a [prepared statement].
+** If this statement is a [SELECT] statement and the Nth column of the
+** returned result set of that [SELECT] is a table column (not an
+** expression or subquery) then the declared type of the table
+** column is returned.)^  ^If the Nth column of the result set is an
+** expression or subquery, then a NULL pointer is returned.
+** ^The returned string is always UTF-8 encoded.
+**
+** ^(For example, given the database schema:
+**
+** CREATE TABLE t1(c1 VARIANT);
+**
+** and the following statement to be compiled:
+**
+** SELECT c1 + 1, c1 FROM t1;
+**
+** this routine would return the string "VARIANT" for the second result
+** column (i==1), and a NULL pointer for the first result column (i==0).)^
+**
+** ^SQLite uses dynamic run-time typing.  ^So just because a column
+** is declared to contain a particular type does not mean that the
+** data stored in that column is of the declared type.  SQLite is
+** strongly typed, but the typing is dynamic not static.  ^Type
+** is associated with individual values, not with the containers
+** used to hold those values.
+*/
+SQLITE_API const char *sqlite3_column_decltype(sqlite3_stmt*,int);
+SQLITE_API const void *sqlite3_column_decltype16(sqlite3_stmt*,int);
+
+/*
+** CAPI3REF: Evaluate An SQL Statement
+**
+** After a [prepared statement] has been prepared using either
+** [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()] or one of the legacy
+** interfaces [sqlite3_prepare()] or [sqlite3_prepare16()], this function
+** must be called one or more times to evaluate the statement.
+**
+** The details of the behavior of the sqlite3_step() interface depend
+** on whether the statement was prepared using the newer "v2" interface
+** [sqlite3_prepare_v2()] and [sqlite3_prepare16_v2()] or the older legacy
+** interface [sqlite3_prepare()] and [sqlite3_prepare16()].  The use of the
+** new "v2" interface is recommended for new applications but the legacy
+** interface will continue to be supported.
+**
+** ^In the legacy interface, the return value will be either [SQLITE_BUSY],
+** [SQLITE_DONE], [SQLITE_ROW], [SQLITE_ERROR], or [SQLITE_MISUSE].
+** ^With the "v2" interface, any of the other [result codes] or
+** [extended result codes] might be returned as well.
+**
+** ^[SQLITE_BUSY] means that the database engine was unable to acquire the
+** database locks it needs to do its job.  ^If the statement is a [COMMIT]
+** or occurs outside of an explicit transaction, then you can retry the
+** statement.  If the statement is not a [COMMIT] and occurs within an
+** explicit transaction then you should rollback the transaction before
+** continuing.
+**
+** ^[SQLITE_DONE] means that the statement has finished executing
+** successfully.  sqlite3_step() should not be called again on this virtual
+** machine without first calling [sqlite3_reset()] to reset the virtual
+** machine back to its initial state.
+**
+** ^If the SQL statement being executed returns any data, then [SQLITE_ROW]
+** is returned each time a new row of data is ready for processing by the
+** caller. The values may be accessed using the [column access functions].
+** sqlite3_step() is called again to retrieve the next row of data.
+**
+** ^[SQLITE_ERROR] means that a run-time error (such as a constraint
+** violation) has occurred.  sqlite3_step() should not be called again on
+** the VM. More information may be found by calling [sqlite3_errmsg()].
+** ^With the legacy interface, a more specific error code (for example,
+** [SQLITE_INTERRUPT], [SQLITE_SCHEMA], [SQLITE_CORRUPT], and so forth)
+** can be obtained by calling [sqlite3_reset()] on the
+** [prepared statement].  ^In the "v2" interface,
+** the more specific error code is returned directly by sqlite3_step().
+**
+** [SQLITE_MISUSE] means that the this routine was called inappropriately.
+** Perhaps it was called on a [prepared statement] that has
+** already been [sqlite3_finalize | finalized] or on one that had
+** previously returned [SQLITE_ERROR] or [SQLITE_DONE].  Or it could
+** be the case that the same database connection is being used by two or
+** more threads at the same moment in time.
+**
+** For all versions of SQLite up to and including 3.6.23.1, a call to
+** [sqlite3_reset()] was required after sqlite3_step() returned anything
+** other than [SQLITE_ROW] before any subsequent invocation of
+** sqlite3_step().  Failure to reset the prepared statement using 
+** [sqlite3_reset()] would result in an [SQLITE_MISUSE] return from
+** sqlite3_step().  But after version 3.6.23.1, sqlite3_step() began
+** calling [sqlite3_reset()] automatically in this circumstance rather
+** than returning [SQLITE_MISUSE].  This is not considered a compatibility
+** break because any application that ever receives an SQLITE_MISUSE error
+** is broken by definition.  The [SQLITE_OMIT_AUTORESET] compile-time option
+** can be used to restore the legacy behavior.
+**
+** <b>Goofy Interface Alert:</b> In the legacy interface, the sqlite3_step()
+** API always returns a generic error code, [SQLITE_ERROR], following any
+** error other than [SQLITE_BUSY] and [SQLITE_MISUSE].  You must call
+** [sqlite3_reset()] or [sqlite3_finalize()] in order to find one of the
+** specific [error codes] that better describes the error.
+** We admit that this is a goofy design.  The problem has been fixed
+** with the "v2" interface.  If you prepare all of your SQL statements
+** using either [sqlite3_prepare_v2()] or [sqlite3_prepare16_v2()] instead
+** of the legacy [sqlite3_prepare()] and [sqlite3_prepare16()] interfaces,
+** then the more specific [error codes] are returned directly
+** by sqlite3_step().  The use of the "v2" interface is recommended.
+*/
+SQLITE_API int sqlite3_step(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Number of columns in a result set
+**
+** ^The sqlite3_data_count(P) interface returns the number of columns in the
+** current row of the result set of [prepared statement] P.
+** ^If prepared statement P does not have results ready to return
+** (via calls to the [sqlite3_column_int | sqlite3_column_*()] of
+** interfaces) then sqlite3_data_count(P) returns 0.
+** ^The sqlite3_data_count(P) routine also returns 0 if P is a NULL pointer.
+** ^The sqlite3_data_count(P) routine returns 0 if the previous call to
+** [sqlite3_step](P) returned [SQLITE_DONE].  ^The sqlite3_data_count(P)
+** will return non-zero if previous call to [sqlite3_step](P) returned
+** [SQLITE_ROW], except in the case of the [PRAGMA incremental_vacuum]
+** where it always returns zero since each step of that multi-step
+** pragma returns 0 columns of data.
+**
+** See also: [sqlite3_column_count()]
+*/
+SQLITE_API int sqlite3_data_count(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Fundamental Datatypes
+** KEYWORDS: SQLITE_TEXT
+**
+** ^(Every value in SQLite has one of five fundamental datatypes:
+**
+** <ul>
+** <li> 64-bit signed integer
+** <li> 64-bit IEEE floating point number
+** <li> string
+** <li> BLOB
+** <li> NULL
+** </ul>)^
+**
+** These constants are codes for each of those types.
+**
+** Note that the SQLITE_TEXT constant was also used in SQLite version 2
+** for a completely different meaning.  Software that links against both
+** SQLite version 2 and SQLite version 3 should use SQLITE3_TEXT, not
+** SQLITE_TEXT.
+*/
+#define SQLITE_INTEGER  1
+#define SQLITE_FLOAT    2
+#define SQLITE_BLOB     4
+#define SQLITE_NULL     5
+#ifdef SQLITE_TEXT
+# undef SQLITE_TEXT
+#else
+# define SQLITE_TEXT     3
+#endif
+#define SQLITE3_TEXT     3
+
+/*
+** CAPI3REF: Result Values From A Query
+** KEYWORDS: {column access functions}
+**
+** These routines form the "result set" interface.
+**
+** ^These routines return information about a single column of the current
+** result row of a query.  ^In every case the first argument is a pointer
+** to the [prepared statement] that is being evaluated (the [sqlite3_stmt*]
+** that was returned from [sqlite3_prepare_v2()] or one of its variants)
+** and the second argument is the index of the column for which information
+** should be returned. ^The leftmost column of the result set has the index 0.
+** ^The number of columns in the result can be determined using
+** [sqlite3_column_count()].
+**
+** If the SQL statement does not currently point to a valid row, or if the
+** column index is out of range, the result is undefined.
+** These routines may only be called when the most recent call to
+** [sqlite3_step()] has returned [SQLITE_ROW] and neither
+** [sqlite3_reset()] nor [sqlite3_finalize()] have been called subsequently.
+** If any of these routines are called after [sqlite3_reset()] or
+** [sqlite3_finalize()] or after [sqlite3_step()] has returned
+** something other than [SQLITE_ROW], the results are undefined.
+** If [sqlite3_step()] or [sqlite3_reset()] or [sqlite3_finalize()]
+** are called from a different thread while any of these routines
+** are pending, then the results are undefined.
+**
+** ^The sqlite3_column_type() routine returns the
+** [SQLITE_INTEGER | datatype code] for the initial data type
+** of the result column.  ^The returned value is one of [SQLITE_INTEGER],
+** [SQLITE_FLOAT], [SQLITE_TEXT], [SQLITE_BLOB], or [SQLITE_NULL].  The value
+** returned by sqlite3_column_type() is only meaningful if no type
+** conversions have occurred as described below.  After a type conversion,
+** the value returned by sqlite3_column_type() is undefined.  Future
+** versions of SQLite may change the behavior of sqlite3_column_type()
+** following a type conversion.
+**
+** ^If the result is a BLOB or UTF-8 string then the sqlite3_column_bytes()
+** routine returns the number of bytes in that BLOB or string.
+** ^If the result is a UTF-16 string, then sqlite3_column_bytes() converts
+** the string to UTF-8 and then returns the number of bytes.
+** ^If the result is a numeric value then sqlite3_column_bytes() uses
+** [sqlite3_snprintf()] to convert that value to a UTF-8 string and returns
+** the number of bytes in that string.
+** ^If the result is NULL, then sqlite3_column_bytes() returns zero.
+**
+** ^If the result is a BLOB or UTF-16 string then the sqlite3_column_bytes16()
+** routine returns the number of bytes in that BLOB or string.
+** ^If the result is a UTF-8 string, then sqlite3_column_bytes16() converts
+** the string to UTF-16 and then returns the number of bytes.
+** ^If the result is a numeric value then sqlite3_column_bytes16() uses
+** [sqlite3_snprintf()] to convert that value to a UTF-16 string and returns
+** the number of bytes in that string.
+** ^If the result is NULL, then sqlite3_column_bytes16() returns zero.
+**
+** ^The values returned by [sqlite3_column_bytes()] and 
+** [sqlite3_column_bytes16()] do not include the zero terminators at the end
+** of the string.  ^For clarity: the values returned by
+** [sqlite3_column_bytes()] and [sqlite3_column_bytes16()] are the number of
+** bytes in the string, not the number of characters.
+**
+** ^Strings returned by sqlite3_column_text() and sqlite3_column_text16(),
+** even empty strings, are always zero-terminated.  ^The return
+** value from sqlite3_column_blob() for a zero-length BLOB is a NULL pointer.
+**
+** ^The object returned by [sqlite3_column_value()] is an
+** [unprotected sqlite3_value] object.  An unprotected sqlite3_value object
+** may only be used with [sqlite3_bind_value()] and [sqlite3_result_value()].
+** If the [unprotected sqlite3_value] object returned by
+** [sqlite3_column_value()] is used in any other way, including calls
+** to routines like [sqlite3_value_int()], [sqlite3_value_text()],
+** or [sqlite3_value_bytes()], then the behavior is undefined.
+**
+** These routines attempt to convert the value where appropriate.  ^For
+** example, if the internal representation is FLOAT and a text result
+** is requested, [sqlite3_snprintf()] is used internally to perform the
+** conversion automatically.  ^(The following table details the conversions
+** that are applied:
+**
+** <blockquote>
+** <table border="1">
+** <tr><th> Internal<br>Type <th> Requested<br>Type <th>  Conversion
+**
+** <tr><td>  NULL    <td> INTEGER   <td> Result is 0
+** <tr><td>  NULL    <td>  FLOAT    <td> Result is 0.0
+** <tr><td>  NULL    <td>   TEXT    <td> Result is a NULL pointer
+** <tr><td>  NULL    <td>   BLOB    <td> Result is a NULL pointer
+** <tr><td> INTEGER  <td>  FLOAT    <td> Convert from integer to float
+** <tr><td> INTEGER  <td>   TEXT    <td> ASCII rendering of the integer
+** <tr><td> INTEGER  <td>   BLOB    <td> Same as INTEGER->TEXT
+** <tr><td>  FLOAT   <td> INTEGER   <td> [CAST] to INTEGER
+** <tr><td>  FLOAT   <td>   TEXT    <td> ASCII rendering of the float
+** <tr><td>  FLOAT   <td>   BLOB    <td> [CAST] to BLOB
+** <tr><td>  TEXT    <td> INTEGER   <td> [CAST] to INTEGER
+** <tr><td>  TEXT    <td>  FLOAT    <td> [CAST] to REAL
+** <tr><td>  TEXT    <td>   BLOB    <td> No change
+** <tr><td>  BLOB    <td> INTEGER   <td> [CAST] to INTEGER
+** <tr><td>  BLOB    <td>  FLOAT    <td> [CAST] to REAL
+** <tr><td>  BLOB    <td>   TEXT    <td> Add a zero terminator if needed
+** </table>
+** </blockquote>)^
+**
+** The table above makes reference to standard C library functions atoi()
+** and atof().  SQLite does not really use these functions.  It has its
+** own equivalent internal routines.  The atoi() and atof() names are
+** used in the table for brevity and because they are familiar to most
+** C programmers.
+**
+** Note that when type conversions occur, pointers returned by prior
+** calls to sqlite3_column_blob(), sqlite3_column_text(), and/or
+** sqlite3_column_text16() may be invalidated.
+** Type conversions and pointer invalidations might occur
+** in the following cases:
+**
+** <ul>
+** <li> The initial content is a BLOB and sqlite3_column_text() or
+**      sqlite3_column_text16() is called.  A zero-terminator might
+**      need to be added to the string.</li>
+** <li> The initial content is UTF-8 text and sqlite3_column_bytes16() or
+**      sqlite3_column_text16() is called.  The content must be converted
+**      to UTF-16.</li>
+** <li> The initial content is UTF-16 text and sqlite3_column_bytes() or
+**      sqlite3_column_text() is called.  The content must be converted
+**      to UTF-8.</li>
+** </ul>
+**
+** ^Conversions between UTF-16be and UTF-16le are always done in place and do
+** not invalidate a prior pointer, though of course the content of the buffer
+** that the prior pointer references will have been modified.  Other kinds
+** of conversion are done in place when it is possible, but sometimes they
+** are not possible and in those cases prior pointers are invalidated.
+**
+** The safest and easiest to remember policy is to invoke these routines
+** in one of the following ways:
+**
+** <ul>
+**  <li>sqlite3_column_text() followed by sqlite3_column_bytes()</li>
+**  <li>sqlite3_column_blob() followed by sqlite3_column_bytes()</li>
+**  <li>sqlite3_column_text16() followed by sqlite3_column_bytes16()</li>
+** </ul>
+**
+** In other words, you should call sqlite3_column_text(),
+** sqlite3_column_blob(), or sqlite3_column_text16() first to force the result
+** into the desired format, then invoke sqlite3_column_bytes() or
+** sqlite3_column_bytes16() to find the size of the result.  Do not mix calls
+** to sqlite3_column_text() or sqlite3_column_blob() with calls to
+** sqlite3_column_bytes16(), and do not mix calls to sqlite3_column_text16()
+** with calls to sqlite3_column_bytes().
+**
+** ^The pointers returned are valid until a type conversion occurs as
+** described above, or until [sqlite3_step()] or [sqlite3_reset()] or
+** [sqlite3_finalize()] is called.  ^The memory space used to hold strings
+** and BLOBs is freed automatically.  Do <b>not</b> pass the pointers returned
+** from [sqlite3_column_blob()], [sqlite3_column_text()], etc. into
+** [sqlite3_free()].
+**
+** ^(If a memory allocation error occurs during the evaluation of any
+** of these routines, a default value is returned.  The default value
+** is either the integer 0, the floating point number 0.0, or a NULL
+** pointer.  Subsequent calls to [sqlite3_errcode()] will return
+** [SQLITE_NOMEM].)^
+*/
+SQLITE_API const void *sqlite3_column_blob(sqlite3_stmt*, int iCol);
+SQLITE_API int sqlite3_column_bytes(sqlite3_stmt*, int iCol);
+SQLITE_API int sqlite3_column_bytes16(sqlite3_stmt*, int iCol);
+SQLITE_API double sqlite3_column_double(sqlite3_stmt*, int iCol);
+SQLITE_API int sqlite3_column_int(sqlite3_stmt*, int iCol);
+SQLITE_API sqlite3_int64 sqlite3_column_int64(sqlite3_stmt*, int iCol);
+SQLITE_API const unsigned char *sqlite3_column_text(sqlite3_stmt*, int iCol);
+SQLITE_API const void *sqlite3_column_text16(sqlite3_stmt*, int iCol);
+SQLITE_API int sqlite3_column_type(sqlite3_stmt*, int iCol);
+SQLITE_API sqlite3_value *sqlite3_column_value(sqlite3_stmt*, int iCol);
+
+/*
+** CAPI3REF: Destroy A Prepared Statement Object
+**
+** ^The sqlite3_finalize() function is called to delete a [prepared statement].
+** ^If the most recent evaluation of the statement encountered no errors
+** or if the statement is never been evaluated, then sqlite3_finalize() returns
+** SQLITE_OK.  ^If the most recent evaluation of statement S failed, then
+** sqlite3_finalize(S) returns the appropriate [error code] or
+** [extended error code].
+**
+** ^The sqlite3_finalize(S) routine can be called at any point during
+** the life cycle of [prepared statement] S:
+** before statement S is ever evaluated, after
+** one or more calls to [sqlite3_reset()], or after any call
+** to [sqlite3_step()] regardless of whether or not the statement has
+** completed execution.
+**
+** ^Invoking sqlite3_finalize() on a NULL pointer is a harmless no-op.
+**
+** The application must finalize every [prepared statement] in order to avoid
+** resource leaks.  It is a grievous error for the application to try to use
+** a prepared statement after it has been finalized.  Any use of a prepared
+** statement after it has been finalized can result in undefined and
+** undesirable behavior such as segfaults and heap corruption.
+*/
+SQLITE_API int sqlite3_finalize(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Reset A Prepared Statement Object
+**
+** The sqlite3_reset() function is called to reset a [prepared statement]
+** object back to its initial state, ready to be re-executed.
+** ^Any SQL statement variables that had values bound to them using
+** the [sqlite3_bind_blob | sqlite3_bind_*() API] retain their values.
+** Use [sqlite3_clear_bindings()] to reset the bindings.
+**
+** ^The [sqlite3_reset(S)] interface resets the [prepared statement] S
+** back to the beginning of its program.
+**
+** ^If the most recent call to [sqlite3_step(S)] for the
+** [prepared statement] S returned [SQLITE_ROW] or [SQLITE_DONE],
+** or if [sqlite3_step(S)] has never before been called on S,
+** then [sqlite3_reset(S)] returns [SQLITE_OK].
+**
+** ^If the most recent call to [sqlite3_step(S)] for the
+** [prepared statement] S indicated an error, then
+** [sqlite3_reset(S)] returns an appropriate [error code].
+**
+** ^The [sqlite3_reset(S)] interface does not change the values
+** of any [sqlite3_bind_blob|bindings] on the [prepared statement] S.
+*/
+SQLITE_API int sqlite3_reset(sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Create Or Redefine SQL Functions
+** KEYWORDS: {function creation routines}
+** KEYWORDS: {application-defined SQL function}
+** KEYWORDS: {application-defined SQL functions}
+**
+** ^These functions (collectively known as "function creation routines")
+** are used to add SQL functions or aggregates or to redefine the behavior
+** of existing SQL functions or aggregates.  The only differences between
+** these routines are the text encoding expected for
+** the second parameter (the name of the function being created)
+** and the presence or absence of a destructor callback for
+** the application data pointer.
+**
+** ^The first parameter is the [database connection] to which the SQL
+** function is to be added.  ^If an application uses more than one database
+** connection then application-defined SQL functions must be added
+** to each database connection separately.
+**
+** ^The second parameter is the name of the SQL function to be created or
+** redefined.  ^The length of the name is limited to 255 bytes in a UTF-8
+** representation, exclusive of the zero-terminator.  ^Note that the name
+** length limit is in UTF-8 bytes, not characters nor UTF-16 bytes.  
+** ^Any attempt to create a function with a longer name
+** will result in [SQLITE_MISUSE] being returned.
+**
+** ^The third parameter (nArg)
+** is the number of arguments that the SQL function or
+** aggregate takes. ^If this parameter is -1, then the SQL function or
+** aggregate may take any number of arguments between 0 and the limit
+** set by [sqlite3_limit]([SQLITE_LIMIT_FUNCTION_ARG]).  If the third
+** parameter is less than -1 or greater than 127 then the behavior is
+** undefined.
+**
+** ^The fourth parameter, eTextRep, specifies what
+** [SQLITE_UTF8 | text encoding] this SQL function prefers for
+** its parameters.  The application should set this parameter to
+** [SQLITE_UTF16LE] if the function implementation invokes 
+** [sqlite3_value_text16le()] on an input, or [SQLITE_UTF16BE] if the
+** implementation invokes [sqlite3_value_text16be()] on an input, or
+** [SQLITE_UTF16] if [sqlite3_value_text16()] is used, or [SQLITE_UTF8]
+** otherwise.  ^The same SQL function may be registered multiple times using
+** different preferred text encodings, with different implementations for
+** each encoding.
+** ^When multiple implementations of the same function are available, SQLite
+** will pick the one that involves the least amount of data conversion.
+**
+** ^The fourth parameter may optionally be ORed with [SQLITE_DETERMINISTIC]
+** to signal that the function will always return the same result given
+** the same inputs within a single SQL statement.  Most SQL functions are
+** deterministic.  The built-in [random()] SQL function is an example of a
+** function that is not deterministic.  The SQLite query planner is able to
+** perform additional optimizations on deterministic functions, so use
+** of the [SQLITE_DETERMINISTIC] flag is recommended where possible.
+**
+** ^(The fifth parameter is an arbitrary pointer.  The implementation of the
+** function can gain access to this pointer using [sqlite3_user_data()].)^
+**
+** ^The sixth, seventh and eighth parameters, xFunc, xStep and xFinal, are
+** pointers to C-language functions that implement the SQL function or
+** aggregate. ^A scalar SQL function requires an implementation of the xFunc
+** callback only; NULL pointers must be passed as the xStep and xFinal
+** parameters. ^An aggregate SQL function requires an implementation of xStep
+** and xFinal and NULL pointer must be passed for xFunc. ^To delete an existing
+** SQL function or aggregate, pass NULL pointers for all three function
+** callbacks.
+**
+** ^(If the ninth parameter to sqlite3_create_function_v2() is not NULL,
+** then it is destructor for the application data pointer. 
+** The destructor is invoked when the function is deleted, either by being
+** overloaded or when the database connection closes.)^
+** ^The destructor is also invoked if the call to
+** sqlite3_create_function_v2() fails.
+** ^When the destructor callback of the tenth parameter is invoked, it
+** is passed a single argument which is a copy of the application data 
+** pointer which was the fifth parameter to sqlite3_create_function_v2().
+**
+** ^It is permitted to register multiple implementations of the same
+** functions with the same name but with either differing numbers of
+** arguments or differing preferred text encodings.  ^SQLite will use
+** the implementation that most closely matches the way in which the
+** SQL function is used.  ^A function implementation with a non-negative
+** nArg parameter is a better match than a function implementation with
+** a negative nArg.  ^A function where the preferred text encoding
+** matches the database encoding is a better
+** match than a function where the encoding is different.  
+** ^A function where the encoding difference is between UTF16le and UTF16be
+** is a closer match than a function where the encoding difference is
+** between UTF8 and UTF16.
+**
+** ^Built-in functions may be overloaded by new application-defined functions.
+**
+** ^An application-defined function is permitted to call other
+** SQLite interfaces.  However, such calls must not
+** close the database connection nor finalize or reset the prepared
+** statement in which the function is running.
+*/
+SQLITE_API int sqlite3_create_function(
+  sqlite3 *db,
+  const char *zFunctionName,
+  int nArg,
+  int eTextRep,
+  void *pApp,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+  void (*xFinal)(sqlite3_context*)
+);
+SQLITE_API int sqlite3_create_function16(
+  sqlite3 *db,
+  const void *zFunctionName,
+  int nArg,
+  int eTextRep,
+  void *pApp,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+  void (*xFinal)(sqlite3_context*)
+);
+SQLITE_API int sqlite3_create_function_v2(
+  sqlite3 *db,
+  const char *zFunctionName,
+  int nArg,
+  int eTextRep,
+  void *pApp,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+  void (*xFinal)(sqlite3_context*),
+  void(*xDestroy)(void*)
+);
+
+/*
+** CAPI3REF: Text Encodings
+**
+** These constant define integer codes that represent the various
+** text encodings supported by SQLite.
+*/
+#define SQLITE_UTF8           1    /* IMP: R-37514-35566 */
+#define SQLITE_UTF16LE        2    /* IMP: R-03371-37637 */
+#define SQLITE_UTF16BE        3    /* IMP: R-51971-34154 */
+#define SQLITE_UTF16          4    /* Use native byte order */
+#define SQLITE_ANY            5    /* Deprecated */
+#define SQLITE_UTF16_ALIGNED  8    /* sqlite3_create_collation only */
+
+/*
+** CAPI3REF: Function Flags
+**
+** These constants may be ORed together with the 
+** [SQLITE_UTF8 | preferred text encoding] as the fourth argument
+** to [sqlite3_create_function()], [sqlite3_create_function16()], or
+** [sqlite3_create_function_v2()].
+*/
+#define SQLITE_DETERMINISTIC    0x800
+
+/*
+** CAPI3REF: Deprecated Functions
+** DEPRECATED
+**
+** These functions are [deprecated].  In order to maintain
+** backwards compatibility with older code, these functions continue 
+** to be supported.  However, new applications should avoid
+** the use of these functions.  To help encourage people to avoid
+** using these functions, we are not going to tell you what they do.
+*/
+#ifndef SQLITE_OMIT_DEPRECATED
+SQLITE_API SQLITE_DEPRECATED int sqlite3_aggregate_count(sqlite3_context*);
+SQLITE_API SQLITE_DEPRECATED int sqlite3_expired(sqlite3_stmt*);
+SQLITE_API SQLITE_DEPRECATED int sqlite3_transfer_bindings(sqlite3_stmt*, sqlite3_stmt*);
+SQLITE_API SQLITE_DEPRECATED int sqlite3_global_recover(void);
+SQLITE_API SQLITE_DEPRECATED void sqlite3_thread_cleanup(void);
+SQLITE_API SQLITE_DEPRECATED int sqlite3_memory_alarm(void(*)(void*,sqlite3_int64,int),
+                      void*,sqlite3_int64);
+#endif
+
+/*
+** CAPI3REF: Obtaining SQL Function Parameter Values
+**
+** The C-language implementation of SQL functions and aggregates uses
+** this set of interface routines to access the parameter values on
+** the function or aggregate.
+**
+** The xFunc (for scalar functions) or xStep (for aggregates) parameters
+** to [sqlite3_create_function()] and [sqlite3_create_function16()]
+** define callbacks that implement the SQL functions and aggregates.
+** The 3rd parameter to these callbacks is an array of pointers to
+** [protected sqlite3_value] objects.  There is one [sqlite3_value] object for
+** each parameter to the SQL function.  These routines are used to
+** extract values from the [sqlite3_value] objects.
+**
+** These routines work only with [protected sqlite3_value] objects.
+** Any attempt to use these routines on an [unprotected sqlite3_value]
+** object results in undefined behavior.
+**
+** ^These routines work just like the corresponding [column access functions]
+** except that these routines take a single [protected sqlite3_value] object
+** pointer instead of a [sqlite3_stmt*] pointer and an integer column number.
+**
+** ^The sqlite3_value_text16() interface extracts a UTF-16 string
+** in the native byte-order of the host machine.  ^The
+** sqlite3_value_text16be() and sqlite3_value_text16le() interfaces
+** extract UTF-16 strings as big-endian and little-endian respectively.
+**
+** ^(The sqlite3_value_numeric_type() interface attempts to apply
+** numeric affinity to the value.  This means that an attempt is
+** made to convert the value to an integer or floating point.  If
+** such a conversion is possible without loss of information (in other
+** words, if the value is a string that looks like a number)
+** then the conversion is performed.  Otherwise no conversion occurs.
+** The [SQLITE_INTEGER | datatype] after conversion is returned.)^
+**
+** Please pay particular attention to the fact that the pointer returned
+** from [sqlite3_value_blob()], [sqlite3_value_text()], or
+** [sqlite3_value_text16()] can be invalidated by a subsequent call to
+** [sqlite3_value_bytes()], [sqlite3_value_bytes16()], [sqlite3_value_text()],
+** or [sqlite3_value_text16()].
+**
+** These routines must be called from the same thread as
+** the SQL function that supplied the [sqlite3_value*] parameters.
+*/
+SQLITE_API const void *sqlite3_value_blob(sqlite3_value*);
+SQLITE_API int sqlite3_value_bytes(sqlite3_value*);
+SQLITE_API int sqlite3_value_bytes16(sqlite3_value*);
+SQLITE_API double sqlite3_value_double(sqlite3_value*);
+SQLITE_API int sqlite3_value_int(sqlite3_value*);
+SQLITE_API sqlite3_int64 sqlite3_value_int64(sqlite3_value*);
+SQLITE_API const unsigned char *sqlite3_value_text(sqlite3_value*);
+SQLITE_API const void *sqlite3_value_text16(sqlite3_value*);
+SQLITE_API const void *sqlite3_value_text16le(sqlite3_value*);
+SQLITE_API const void *sqlite3_value_text16be(sqlite3_value*);
+SQLITE_API int sqlite3_value_type(sqlite3_value*);
+SQLITE_API int sqlite3_value_numeric_type(sqlite3_value*);
+
+/*
+** CAPI3REF: Obtain Aggregate Function Context
+**
+** Implementations of aggregate SQL functions use this
+** routine to allocate memory for storing their state.
+**
+** ^The first time the sqlite3_aggregate_context(C,N) routine is called 
+** for a particular aggregate function, SQLite
+** allocates N of memory, zeroes out that memory, and returns a pointer
+** to the new memory. ^On second and subsequent calls to
+** sqlite3_aggregate_context() for the same aggregate function instance,
+** the same buffer is returned.  Sqlite3_aggregate_context() is normally
+** called once for each invocation of the xStep callback and then one
+** last time when the xFinal callback is invoked.  ^(When no rows match
+** an aggregate query, the xStep() callback of the aggregate function
+** implementation is never called and xFinal() is called exactly once.
+** In those cases, sqlite3_aggregate_context() might be called for the
+** first time from within xFinal().)^
+**
+** ^The sqlite3_aggregate_context(C,N) routine returns a NULL pointer 
+** when first called if N is less than or equal to zero or if a memory
+** allocate error occurs.
+**
+** ^(The amount of space allocated by sqlite3_aggregate_context(C,N) is
+** determined by the N parameter on first successful call.  Changing the
+** value of N in subsequent call to sqlite3_aggregate_context() within
+** the same aggregate function instance will not resize the memory
+** allocation.)^  Within the xFinal callback, it is customary to set
+** N=0 in calls to sqlite3_aggregate_context(C,N) so that no 
+** pointless memory allocations occur.
+**
+** ^SQLite automatically frees the memory allocated by 
+** sqlite3_aggregate_context() when the aggregate query concludes.
+**
+** The first parameter must be a copy of the
+** [sqlite3_context | SQL function context] that is the first parameter
+** to the xStep or xFinal callback routine that implements the aggregate
+** function.
+**
+** This routine must be called from the same thread in which
+** the aggregate SQL function is running.
+*/
+SQLITE_API void *sqlite3_aggregate_context(sqlite3_context*, int nBytes);
+
+/*
+** CAPI3REF: User Data For Functions
+**
+** ^The sqlite3_user_data() interface returns a copy of
+** the pointer that was the pUserData parameter (the 5th parameter)
+** of the [sqlite3_create_function()]
+** and [sqlite3_create_function16()] routines that originally
+** registered the application defined function.
+**
+** This routine must be called from the same thread in which
+** the application-defined function is running.
+*/
+SQLITE_API void *sqlite3_user_data(sqlite3_context*);
+
+/*
+** CAPI3REF: Database Connection For Functions
+**
+** ^The sqlite3_context_db_handle() interface returns a copy of
+** the pointer to the [database connection] (the 1st parameter)
+** of the [sqlite3_create_function()]
+** and [sqlite3_create_function16()] routines that originally
+** registered the application defined function.
+*/
+SQLITE_API sqlite3 *sqlite3_context_db_handle(sqlite3_context*);
+
+/*
+** CAPI3REF: Function Auxiliary Data
+**
+** These functions may be used by (non-aggregate) SQL functions to
+** associate metadata with argument values. If the same value is passed to
+** multiple invocations of the same SQL function during query execution, under
+** some circumstances the associated metadata may be preserved.  An example
+** of where this might be useful is in a regular-expression matching
+** function. The compiled version of the regular expression can be stored as
+** metadata associated with the pattern string.  
+** Then as long as the pattern string remains the same,
+** the compiled regular expression can be reused on multiple
+** invocations of the same function.
+**
+** ^The sqlite3_get_auxdata() interface returns a pointer to the metadata
+** associated by the sqlite3_set_auxdata() function with the Nth argument
+** value to the application-defined function. ^If there is no metadata
+** associated with the function argument, this sqlite3_get_auxdata() interface
+** returns a NULL pointer.
+**
+** ^The sqlite3_set_auxdata(C,N,P,X) interface saves P as metadata for the N-th
+** argument of the application-defined function.  ^Subsequent
+** calls to sqlite3_get_auxdata(C,N) return P from the most recent
+** sqlite3_set_auxdata(C,N,P,X) call if the metadata is still valid or
+** NULL if the metadata has been discarded.
+** ^After each call to sqlite3_set_auxdata(C,N,P,X) where X is not NULL,
+** SQLite will invoke the destructor function X with parameter P exactly
+** once, when the metadata is discarded.
+** SQLite is free to discard the metadata at any time, including: <ul>
+** <li> when the corresponding function parameter changes, or
+** <li> when [sqlite3_reset()] or [sqlite3_finalize()] is called for the
+**      SQL statement, or
+** <li> when sqlite3_set_auxdata() is invoked again on the same parameter, or
+** <li> during the original sqlite3_set_auxdata() call when a memory 
+**      allocation error occurs. </ul>)^
+**
+** Note the last bullet in particular.  The destructor X in 
+** sqlite3_set_auxdata(C,N,P,X) might be called immediately, before the
+** sqlite3_set_auxdata() interface even returns.  Hence sqlite3_set_auxdata()
+** should be called near the end of the function implementation and the
+** function implementation should not make any use of P after
+** sqlite3_set_auxdata() has been called.
+**
+** ^(In practice, metadata is preserved between function calls for
+** function parameters that are compile-time constants, including literal
+** values and [parameters] and expressions composed from the same.)^
+**
+** These routines must be called from the same thread in which
+** the SQL function is running.
+*/
+SQLITE_API void *sqlite3_get_auxdata(sqlite3_context*, int N);
+SQLITE_API void sqlite3_set_auxdata(sqlite3_context*, int N, void*, void (*)(void*));
+
+
+/*
+** CAPI3REF: Constants Defining Special Destructor Behavior
+**
+** These are special values for the destructor that is passed in as the
+** final argument to routines like [sqlite3_result_blob()].  ^If the destructor
+** argument is SQLITE_STATIC, it means that the content pointer is constant
+** and will never change.  It does not need to be destroyed.  ^The
+** SQLITE_TRANSIENT value means that the content will likely change in
+** the near future and that SQLite should make its own private copy of
+** the content before returning.
+**
+** The typedef is necessary to work around problems in certain
+** C++ compilers.
+*/
+typedef void (*sqlite3_destructor_type)(void*);
+#define SQLITE_STATIC      ((sqlite3_destructor_type)0)
+#define SQLITE_TRANSIENT   ((sqlite3_destructor_type)-1)
+
+/*
+** CAPI3REF: Setting The Result Of An SQL Function
+**
+** These routines are used by the xFunc or xFinal callbacks that
+** implement SQL functions and aggregates.  See
+** [sqlite3_create_function()] and [sqlite3_create_function16()]
+** for additional information.
+**
+** These functions work very much like the [parameter binding] family of
+** functions used to bind values to host parameters in prepared statements.
+** Refer to the [SQL parameter] documentation for additional information.
+**
+** ^The sqlite3_result_blob() interface sets the result from
+** an application-defined function to be the BLOB whose content is pointed
+** to by the second parameter and which is N bytes long where N is the
+** third parameter.
+**
+** ^The sqlite3_result_zeroblob() interfaces set the result of
+** the application-defined function to be a BLOB containing all zero
+** bytes and N bytes in size, where N is the value of the 2nd parameter.
+**
+** ^The sqlite3_result_double() interface sets the result from
+** an application-defined function to be a floating point value specified
+** by its 2nd argument.
+**
+** ^The sqlite3_result_error() and sqlite3_result_error16() functions
+** cause the implemented SQL function to throw an exception.
+** ^SQLite uses the string pointed to by the
+** 2nd parameter of sqlite3_result_error() or sqlite3_result_error16()
+** as the text of an error message.  ^SQLite interprets the error
+** message string from sqlite3_result_error() as UTF-8. ^SQLite
+** interprets the string from sqlite3_result_error16() as UTF-16 in native
+** byte order.  ^If the third parameter to sqlite3_result_error()
+** or sqlite3_result_error16() is negative then SQLite takes as the error
+** message all text up through the first zero character.
+** ^If the third parameter to sqlite3_result_error() or
+** sqlite3_result_error16() is non-negative then SQLite takes that many
+** bytes (not characters) from the 2nd parameter as the error message.
+** ^The sqlite3_result_error() and sqlite3_result_error16()
+** routines make a private copy of the error message text before
+** they return.  Hence, the calling function can deallocate or
+** modify the text after they return without harm.
+** ^The sqlite3_result_error_code() function changes the error code
+** returned by SQLite as a result of an error in a function.  ^By default,
+** the error code is SQLITE_ERROR.  ^A subsequent call to sqlite3_result_error()
+** or sqlite3_result_error16() resets the error code to SQLITE_ERROR.
+**
+** ^The sqlite3_result_error_toobig() interface causes SQLite to throw an
+** error indicating that a string or BLOB is too long to represent.
+**
+** ^The sqlite3_result_error_nomem() interface causes SQLite to throw an
+** error indicating that a memory allocation failed.
+**
+** ^The sqlite3_result_int() interface sets the return value
+** of the application-defined function to be the 32-bit signed integer
+** value given in the 2nd argument.
+** ^The sqlite3_result_int64() interface sets the return value
+** of the application-defined function to be the 64-bit signed integer
+** value given in the 2nd argument.
+**
+** ^The sqlite3_result_null() interface sets the return value
+** of the application-defined function to be NULL.
+**
+** ^The sqlite3_result_text(), sqlite3_result_text16(),
+** sqlite3_result_text16le(), and sqlite3_result_text16be() interfaces
+** set the return value of the application-defined function to be
+** a text string which is represented as UTF-8, UTF-16 native byte order,
+** UTF-16 little endian, or UTF-16 big endian, respectively.
+** ^The sqlite3_result_text64() interface sets the return value of an
+** application-defined function to be a text string in an encoding
+** specified by the fifth (and last) parameter, which must be one
+** of [SQLITE_UTF8], [SQLITE_UTF16], [SQLITE_UTF16BE], or [SQLITE_UTF16LE].
+** ^SQLite takes the text result from the application from
+** the 2nd parameter of the sqlite3_result_text* interfaces.
+** ^If the 3rd parameter to the sqlite3_result_text* interfaces
+** is negative, then SQLite takes result text from the 2nd parameter
+** through the first zero character.
+** ^If the 3rd parameter to the sqlite3_result_text* interfaces
+** is non-negative, then as many bytes (not characters) of the text
+** pointed to by the 2nd parameter are taken as the application-defined
+** function result.  If the 3rd parameter is non-negative, then it
+** must be the byte offset into the string where the NUL terminator would
+** appear if the string where NUL terminated.  If any NUL characters occur
+** in the string at a byte offset that is less than the value of the 3rd
+** parameter, then the resulting string will contain embedded NULs and the
+** result of expressions operating on strings with embedded NULs is undefined.
+** ^If the 4th parameter to the sqlite3_result_text* interfaces
+** or sqlite3_result_blob is a non-NULL pointer, then SQLite calls that
+** function as the destructor on the text or BLOB result when it has
+** finished using that result.
+** ^If the 4th parameter to the sqlite3_result_text* interfaces or to
+** sqlite3_result_blob is the special constant SQLITE_STATIC, then SQLite
+** assumes that the text or BLOB result is in constant space and does not
+** copy the content of the parameter nor call a destructor on the content
+** when it has finished using that result.
+** ^If the 4th parameter to the sqlite3_result_text* interfaces
+** or sqlite3_result_blob is the special constant SQLITE_TRANSIENT
+** then SQLite makes a copy of the result into space obtained from
+** from [sqlite3_malloc()] before it returns.
+**
+** ^The sqlite3_result_value() interface sets the result of
+** the application-defined function to be a copy the
+** [unprotected sqlite3_value] object specified by the 2nd parameter.  ^The
+** sqlite3_result_value() interface makes a copy of the [sqlite3_value]
+** so that the [sqlite3_value] specified in the parameter may change or
+** be deallocated after sqlite3_result_value() returns without harm.
+** ^A [protected sqlite3_value] object may always be used where an
+** [unprotected sqlite3_value] object is required, so either
+** kind of [sqlite3_value] object can be used with this interface.
+**
+** If these routines are called from within the different thread
+** than the one containing the application-defined function that received
+** the [sqlite3_context] pointer, the results are undefined.
+*/
+SQLITE_API void sqlite3_result_blob(sqlite3_context*, const void*, int, void(*)(void*));
+SQLITE_API void sqlite3_result_blob64(sqlite3_context*,const void*,
+                           sqlite3_uint64,void(*)(void*));
+SQLITE_API void sqlite3_result_double(sqlite3_context*, double);
+SQLITE_API void sqlite3_result_error(sqlite3_context*, const char*, int);
+SQLITE_API void sqlite3_result_error16(sqlite3_context*, const void*, int);
+SQLITE_API void sqlite3_result_error_toobig(sqlite3_context*);
+SQLITE_API void sqlite3_result_error_nomem(sqlite3_context*);
+SQLITE_API void sqlite3_result_error_code(sqlite3_context*, int);
+SQLITE_API void sqlite3_result_int(sqlite3_context*, int);
+SQLITE_API void sqlite3_result_int64(sqlite3_context*, sqlite3_int64);
+SQLITE_API void sqlite3_result_null(sqlite3_context*);
+SQLITE_API void sqlite3_result_text(sqlite3_context*, const char*, int, void(*)(void*));
+SQLITE_API void sqlite3_result_text64(sqlite3_context*, const char*,sqlite3_uint64,
+                           void(*)(void*), unsigned char encoding);
+SQLITE_API void sqlite3_result_text16(sqlite3_context*, const void*, int, void(*)(void*));
+SQLITE_API void sqlite3_result_text16le(sqlite3_context*, const void*, int,void(*)(void*));
+SQLITE_API void sqlite3_result_text16be(sqlite3_context*, const void*, int,void(*)(void*));
+SQLITE_API void sqlite3_result_value(sqlite3_context*, sqlite3_value*);
+SQLITE_API void sqlite3_result_zeroblob(sqlite3_context*, int n);
+
+/*
+** CAPI3REF: Define New Collating Sequences
+**
+** ^These functions add, remove, or modify a [collation] associated
+** with the [database connection] specified as the first argument.
+**
+** ^The name of the collation is a UTF-8 string
+** for sqlite3_create_collation() and sqlite3_create_collation_v2()
+** and a UTF-16 string in native byte order for sqlite3_create_collation16().
+** ^Collation names that compare equal according to [sqlite3_strnicmp()] are
+** considered to be the same name.
+**
+** ^(The third argument (eTextRep) must be one of the constants:
+** <ul>
+** <li> [SQLITE_UTF8],
+** <li> [SQLITE_UTF16LE],
+** <li> [SQLITE_UTF16BE],
+** <li> [SQLITE_UTF16], or
+** <li> [SQLITE_UTF16_ALIGNED].
+** </ul>)^
+** ^The eTextRep argument determines the encoding of strings passed
+** to the collating function callback, xCallback.
+** ^The [SQLITE_UTF16] and [SQLITE_UTF16_ALIGNED] values for eTextRep
+** force strings to be UTF16 with native byte order.
+** ^The [SQLITE_UTF16_ALIGNED] value for eTextRep forces strings to begin
+** on an even byte address.
+**
+** ^The fourth argument, pArg, is an application data pointer that is passed
+** through as the first argument to the collating function callback.
+**
+** ^The fifth argument, xCallback, is a pointer to the collating function.
+** ^Multiple collating functions can be registered using the same name but
+** with different eTextRep parameters and SQLite will use whichever
+** function requires the least amount of data transformation.
+** ^If the xCallback argument is NULL then the collating function is
+** deleted.  ^When all collating functions having the same name are deleted,
+** that collation is no longer usable.
+**
+** ^The collating function callback is invoked with a copy of the pArg 
+** application data pointer and with two strings in the encoding specified
+** by the eTextRep argument.  The collating function must return an
+** integer that is negative, zero, or positive
+** if the first string is less than, equal to, or greater than the second,
+** respectively.  A collating function must always return the same answer
+** given the same inputs.  If two or more collating functions are registered
+** to the same collation name (using different eTextRep values) then all
+** must give an equivalent answer when invoked with equivalent strings.
+** The collating function must obey the following properties for all
+** strings A, B, and C:
+**
+** <ol>
+** <li> If A==B then B==A.
+** <li> If A==B and B==C then A==C.
+** <li> If A&lt;B THEN B&gt;A.
+** <li> If A&lt;B and B&lt;C then A&lt;C.
+** </ol>
+**
+** If a collating function fails any of the above constraints and that
+** collating function is  registered and used, then the behavior of SQLite
+** is undefined.
+**
+** ^The sqlite3_create_collation_v2() works like sqlite3_create_collation()
+** with the addition that the xDestroy callback is invoked on pArg when
+** the collating function is deleted.
+** ^Collating functions are deleted when they are overridden by later
+** calls to the collation creation functions or when the
+** [database connection] is closed using [sqlite3_close()].
+**
+** ^The xDestroy callback is <u>not</u> called if the 
+** sqlite3_create_collation_v2() function fails.  Applications that invoke
+** sqlite3_create_collation_v2() with a non-NULL xDestroy argument should 
+** check the return code and dispose of the application data pointer
+** themselves rather than expecting SQLite to deal with it for them.
+** This is different from every other SQLite interface.  The inconsistency 
+** is unfortunate but cannot be changed without breaking backwards 
+** compatibility.
+**
+** See also:  [sqlite3_collation_needed()] and [sqlite3_collation_needed16()].
+*/
+SQLITE_API int sqlite3_create_collation(
+  sqlite3*, 
+  const char *zName, 
+  int eTextRep, 
+  void *pArg,
+  int(*xCompare)(void*,int,const void*,int,const void*)
+);
+SQLITE_API int sqlite3_create_collation_v2(
+  sqlite3*, 
+  const char *zName, 
+  int eTextRep, 
+  void *pArg,
+  int(*xCompare)(void*,int,const void*,int,const void*),
+  void(*xDestroy)(void*)
+);
+SQLITE_API int sqlite3_create_collation16(
+  sqlite3*, 
+  const void *zName,
+  int eTextRep, 
+  void *pArg,
+  int(*xCompare)(void*,int,const void*,int,const void*)
+);
+
+/*
+** CAPI3REF: Collation Needed Callbacks
+**
+** ^To avoid having to register all collation sequences before a database
+** can be used, a single callback function may be registered with the
+** [database connection] to be invoked whenever an undefined collation
+** sequence is required.
+**
+** ^If the function is registered using the sqlite3_collation_needed() API,
+** then it is passed the names of undefined collation sequences as strings
+** encoded in UTF-8. ^If sqlite3_collation_needed16() is used,
+** the names are passed as UTF-16 in machine native byte order.
+** ^A call to either function replaces the existing collation-needed callback.
+**
+** ^(When the callback is invoked, the first argument passed is a copy
+** of the second argument to sqlite3_collation_needed() or
+** sqlite3_collation_needed16().  The second argument is the database
+** connection.  The third argument is one of [SQLITE_UTF8], [SQLITE_UTF16BE],
+** or [SQLITE_UTF16LE], indicating the most desirable form of the collation
+** sequence function required.  The fourth parameter is the name of the
+** required collation sequence.)^
+**
+** The callback function should register the desired collation using
+** [sqlite3_create_collation()], [sqlite3_create_collation16()], or
+** [sqlite3_create_collation_v2()].
+*/
+SQLITE_API int sqlite3_collation_needed(
+  sqlite3*, 
+  void*, 
+  void(*)(void*,sqlite3*,int eTextRep,const char*)
+);
+SQLITE_API int sqlite3_collation_needed16(
+  sqlite3*, 
+  void*,
+  void(*)(void*,sqlite3*,int eTextRep,const void*)
+);
+
+#ifdef SQLITE_HAS_CODEC
+/*
+** Specify the key for an encrypted database.  This routine should be
+** called right after sqlite3_open().
+**
+** The code to implement this API is not available in the public release
+** of SQLite.
+*/
+SQLITE_API int sqlite3_key(
+  sqlite3 *db,                   /* Database to be rekeyed */
+  const void *pKey, int nKey     /* The key */
+);
+SQLITE_API int sqlite3_key_v2(
+  sqlite3 *db,                   /* Database to be rekeyed */
+  const char *zDbName,           /* Name of the database */
+  const void *pKey, int nKey     /* The key */
+);
+
+/*
+** Change the key on an open database.  If the current database is not
+** encrypted, this routine will encrypt it.  If pNew==0 or nNew==0, the
+** database is decrypted.
+**
+** The code to implement this API is not available in the public release
+** of SQLite.
+*/
+SQLITE_API int sqlite3_rekey(
+  sqlite3 *db,                   /* Database to be rekeyed */
+  const void *pKey, int nKey     /* The new key */
+);
+SQLITE_API int sqlite3_rekey_v2(
+  sqlite3 *db,                   /* Database to be rekeyed */
+  const char *zDbName,           /* Name of the database */
+  const void *pKey, int nKey     /* The new key */
+);
+
+/*
+** Specify the activation key for a SEE database.  Unless 
+** activated, none of the SEE routines will work.
+*/
+SQLITE_API void sqlite3_activate_see(
+  const char *zPassPhrase        /* Activation phrase */
+);
+#endif
+
+#ifdef SQLITE_ENABLE_CEROD
+/*
+** Specify the activation key for a CEROD database.  Unless 
+** activated, none of the CEROD routines will work.
+*/
+SQLITE_API void sqlite3_activate_cerod(
+  const char *zPassPhrase        /* Activation phrase */
+);
+#endif
+
+/*
+** CAPI3REF: Suspend Execution For A Short Time
+**
+** The sqlite3_sleep() function causes the current thread to suspend execution
+** for at least a number of milliseconds specified in its parameter.
+**
+** If the operating system does not support sleep requests with
+** millisecond time resolution, then the time will be rounded up to
+** the nearest second. The number of milliseconds of sleep actually
+** requested from the operating system is returned.
+**
+** ^SQLite implements this interface by calling the xSleep()
+** method of the default [sqlite3_vfs] object.  If the xSleep() method
+** of the default VFS is not implemented correctly, or not implemented at
+** all, then the behavior of sqlite3_sleep() may deviate from the description
+** in the previous paragraphs.
+*/
+SQLITE_API int sqlite3_sleep(int);
+
+/*
+** CAPI3REF: Name Of The Folder Holding Temporary Files
+**
+** ^(If this global variable is made to point to a string which is
+** the name of a folder (a.k.a. directory), then all temporary files
+** created by SQLite when using a built-in [sqlite3_vfs | VFS]
+** will be placed in that directory.)^  ^If this variable
+** is a NULL pointer, then SQLite performs a search for an appropriate
+** temporary file directory.
+**
+** Applications are strongly discouraged from using this global variable.
+** It is required to set a temporary folder on Windows Runtime (WinRT).
+** But for all other platforms, it is highly recommended that applications
+** neither read nor write this variable.  This global variable is a relic
+** that exists for backwards compatibility of legacy applications and should
+** be avoided in new projects.
+**
+** It is not safe to read or modify this variable in more than one
+** thread at a time.  It is not safe to read or modify this variable
+** if a [database connection] is being used at the same time in a separate
+** thread.
+** It is intended that this variable be set once
+** as part of process initialization and before any SQLite interface
+** routines have been called and that this variable remain unchanged
+** thereafter.
+**
+** ^The [temp_store_directory pragma] may modify this variable and cause
+** it to point to memory obtained from [sqlite3_malloc].  ^Furthermore,
+** the [temp_store_directory pragma] always assumes that any string
+** that this variable points to is held in memory obtained from 
+** [sqlite3_malloc] and the pragma may attempt to free that memory
+** using [sqlite3_free].
+** Hence, if this variable is modified directly, either it should be
+** made NULL or made to point to memory obtained from [sqlite3_malloc]
+** or else the use of the [temp_store_directory pragma] should be avoided.
+** Except when requested by the [temp_store_directory pragma], SQLite
+** does not free the memory that sqlite3_temp_directory points to.  If
+** the application wants that memory to be freed, it must do
+** so itself, taking care to only do so after all [database connection]
+** objects have been destroyed.
+**
+** <b>Note to Windows Runtime users:</b>  The temporary directory must be set
+** prior to calling [sqlite3_open] or [sqlite3_open_v2].  Otherwise, various
+** features that require the use of temporary files may fail.  Here is an
+** example of how to do this using C++ with the Windows Runtime:
+**
+** <blockquote><pre>
+** LPCWSTR zPath = Windows::Storage::ApplicationData::Current->
+** &nbsp;     TemporaryFolder->Path->Data();
+** char zPathBuf&#91;MAX_PATH + 1&#93;;
+** memset(zPathBuf, 0, sizeof(zPathBuf));
+** WideCharToMultiByte(CP_UTF8, 0, zPath, -1, zPathBuf, sizeof(zPathBuf),
+** &nbsp;     NULL, NULL);
+** sqlite3_temp_directory = sqlite3_mprintf("%s", zPathBuf);
+** </pre></blockquote>
+*/
+SQLITE_API SQLITE_EXTERN char *sqlite3_temp_directory;
+
+/*
+** CAPI3REF: Name Of The Folder Holding Database Files
+**
+** ^(If this global variable is made to point to a string which is
+** the name of a folder (a.k.a. directory), then all database files
+** specified with a relative pathname and created or accessed by
+** SQLite when using a built-in windows [sqlite3_vfs | VFS] will be assumed
+** to be relative to that directory.)^ ^If this variable is a NULL
+** pointer, then SQLite assumes that all database files specified
+** with a relative pathname are relative to the current directory
+** for the process.  Only the windows VFS makes use of this global
+** variable; it is ignored by the unix VFS.
+**
+** Changing the value of this variable while a database connection is
+** open can result in a corrupt database.
+**
+** It is not safe to read or modify this variable in more than one
+** thread at a time.  It is not safe to read or modify this variable
+** if a [database connection] is being used at the same time in a separate
+** thread.
+** It is intended that this variable be set once
+** as part of process initialization and before any SQLite interface
+** routines have been called and that this variable remain unchanged
+** thereafter.
+**
+** ^The [data_store_directory pragma] may modify this variable and cause
+** it to point to memory obtained from [sqlite3_malloc].  ^Furthermore,
+** the [data_store_directory pragma] always assumes that any string
+** that this variable points to is held in memory obtained from 
+** [sqlite3_malloc] and the pragma may attempt to free that memory
+** using [sqlite3_free].
+** Hence, if this variable is modified directly, either it should be
+** made NULL or made to point to memory obtained from [sqlite3_malloc]
+** or else the use of the [data_store_directory pragma] should be avoided.
+*/
+SQLITE_API SQLITE_EXTERN char *sqlite3_data_directory;
+
+/*
+** CAPI3REF: Test For Auto-Commit Mode
+** KEYWORDS: {autocommit mode}
+**
+** ^The sqlite3_get_autocommit() interface returns non-zero or
+** zero if the given database connection is or is not in autocommit mode,
+** respectively.  ^Autocommit mode is on by default.
+** ^Autocommit mode is disabled by a [BEGIN] statement.
+** ^Autocommit mode is re-enabled by a [COMMIT] or [ROLLBACK].
+**
+** If certain kinds of errors occur on a statement within a multi-statement
+** transaction (errors including [SQLITE_FULL], [SQLITE_IOERR],
+** [SQLITE_NOMEM], [SQLITE_BUSY], and [SQLITE_INTERRUPT]) then the
+** transaction might be rolled back automatically.  The only way to
+** find out whether SQLite automatically rolled back the transaction after
+** an error is to use this function.
+**
+** If another thread changes the autocommit status of the database
+** connection while this routine is running, then the return value
+** is undefined.
+*/
+SQLITE_API int sqlite3_get_autocommit(sqlite3*);
+
+/*
+** CAPI3REF: Find The Database Handle Of A Prepared Statement
+**
+** ^The sqlite3_db_handle interface returns the [database connection] handle
+** to which a [prepared statement] belongs.  ^The [database connection]
+** returned by sqlite3_db_handle is the same [database connection]
+** that was the first argument
+** to the [sqlite3_prepare_v2()] call (or its variants) that was used to
+** create the statement in the first place.
+*/
+SQLITE_API sqlite3 *sqlite3_db_handle(sqlite3_stmt*);
+
+/*
+** CAPI3REF: Return The Filename For A Database Connection
+**
+** ^The sqlite3_db_filename(D,N) interface returns a pointer to a filename
+** associated with database N of connection D.  ^The main database file
+** has the name "main".  If there is no attached database N on the database
+** connection D, or if database N is a temporary or in-memory database, then
+** a NULL pointer is returned.
+**
+** ^The filename returned by this function is the output of the
+** xFullPathname method of the [VFS].  ^In other words, the filename
+** will be an absolute pathname, even if the filename used
+** to open the database originally was a URI or relative pathname.
+*/
+SQLITE_API const char *sqlite3_db_filename(sqlite3 *db, const char *zDbName);
+
+/*
+** CAPI3REF: Determine if a database is read-only
+**
+** ^The sqlite3_db_readonly(D,N) interface returns 1 if the database N
+** of connection D is read-only, 0 if it is read/write, or -1 if N is not
+** the name of a database on connection D.
+*/
+SQLITE_API int sqlite3_db_readonly(sqlite3 *db, const char *zDbName);
+
+/*
+** CAPI3REF: Find the next prepared statement
+**
+** ^This interface returns a pointer to the next [prepared statement] after
+** pStmt associated with the [database connection] pDb.  ^If pStmt is NULL
+** then this interface returns a pointer to the first prepared statement
+** associated with the database connection pDb.  ^If no prepared statement
+** satisfies the conditions of this routine, it returns NULL.
+**
+** The [database connection] pointer D in a call to
+** [sqlite3_next_stmt(D,S)] must refer to an open database
+** connection and in particular must not be a NULL pointer.
+*/
+SQLITE_API sqlite3_stmt *sqlite3_next_stmt(sqlite3 *pDb, sqlite3_stmt *pStmt);
+
+/*
+** CAPI3REF: Commit And Rollback Notification Callbacks
+**
+** ^The sqlite3_commit_hook() interface registers a callback
+** function to be invoked whenever a transaction is [COMMIT | committed].
+** ^Any callback set by a previous call to sqlite3_commit_hook()
+** for the same database connection is overridden.
+** ^The sqlite3_rollback_hook() interface registers a callback
+** function to be invoked whenever a transaction is [ROLLBACK | rolled back].
+** ^Any callback set by a previous call to sqlite3_rollback_hook()
+** for the same database connection is overridden.
+** ^The pArg argument is passed through to the callback.
+** ^If the callback on a commit hook function returns non-zero,
+** then the commit is converted into a rollback.
+**
+** ^The sqlite3_commit_hook(D,C,P) and sqlite3_rollback_hook(D,C,P) functions
+** return the P argument from the previous call of the same function
+** on the same [database connection] D, or NULL for
+** the first call for each function on D.
+**
+** The commit and rollback hook callbacks are not reentrant.
+** The callback implementation must not do anything that will modify
+** the database connection that invoked the callback.  Any actions
+** to modify the database connection must be deferred until after the
+** completion of the [sqlite3_step()] call that triggered the commit
+** or rollback hook in the first place.
+** Note that running any other SQL statements, including SELECT statements,
+** or merely calling [sqlite3_prepare_v2()] and [sqlite3_step()] will modify
+** the database connections for the meaning of "modify" in this paragraph.
+**
+** ^Registering a NULL function disables the callback.
+**
+** ^When the commit hook callback routine returns zero, the [COMMIT]
+** operation is allowed to continue normally.  ^If the commit hook
+** returns non-zero, then the [COMMIT] is converted into a [ROLLBACK].
+** ^The rollback hook is invoked on a rollback that results from a commit
+** hook returning non-zero, just as it would be with any other rollback.
+**
+** ^For the purposes of this API, a transaction is said to have been
+** rolled back if an explicit "ROLLBACK" statement is executed, or
+** an error or constraint causes an implicit rollback to occur.
+** ^The rollback callback is not invoked if a transaction is
+** automatically rolled back because the database connection is closed.
+**
+** See also the [sqlite3_update_hook()] interface.
+*/
+SQLITE_API void *sqlite3_commit_hook(sqlite3*, int(*)(void*), void*);
+SQLITE_API void *sqlite3_rollback_hook(sqlite3*, void(*)(void *), void*);
+
+/*
+** CAPI3REF: Data Change Notification Callbacks
+**
+** ^The sqlite3_update_hook() interface registers a callback function
+** with the [database connection] identified by the first argument
+** to be invoked whenever a row is updated, inserted or deleted in
+** a rowid table.
+** ^Any callback set by a previous call to this function
+** for the same database connection is overridden.
+**
+** ^The second argument is a pointer to the function to invoke when a
+** row is updated, inserted or deleted in a rowid table.
+** ^The first argument to the callback is a copy of the third argument
+** to sqlite3_update_hook().
+** ^The second callback argument is one of [SQLITE_INSERT], [SQLITE_DELETE],
+** or [SQLITE_UPDATE], depending on the operation that caused the callback
+** to be invoked.
+** ^The third and fourth arguments to the callback contain pointers to the
+** database and table name containing the affected row.
+** ^The final callback parameter is the [rowid] of the row.
+** ^In the case of an update, this is the [rowid] after the update takes place.
+**
+** ^(The update hook is not invoked when internal system tables are
+** modified (i.e. sqlite_master and sqlite_sequence).)^
+** ^The update hook is not invoked when [WITHOUT ROWID] tables are modified.
+**
+** ^In the current implementation, the update hook
+** is not invoked when duplication rows are deleted because of an
+** [ON CONFLICT | ON CONFLICT REPLACE] clause.  ^Nor is the update hook
+** invoked when rows are deleted using the [truncate optimization].
+** The exceptions defined in this paragraph might change in a future
+** release of SQLite.
+**
+** The update hook implementation must not do anything that will modify
+** the database connection that invoked the update hook.  Any actions
+** to modify the database connection must be deferred until after the
+** completion of the [sqlite3_step()] call that triggered the update hook.
+** Note that [sqlite3_prepare_v2()] and [sqlite3_step()] both modify their
+** database connections for the meaning of "modify" in this paragraph.
+**
+** ^The sqlite3_update_hook(D,C,P) function
+** returns the P argument from the previous call
+** on the same [database connection] D, or NULL for
+** the first call on D.
+**
+** See also the [sqlite3_commit_hook()] and [sqlite3_rollback_hook()]
+** interfaces.
+*/
+SQLITE_API void *sqlite3_update_hook(
+  sqlite3*, 
+  void(*)(void *,int ,char const *,char const *,sqlite3_int64),
+  void*
+);
+
+/*
+** CAPI3REF: Enable Or Disable Shared Pager Cache
+**
+** ^(This routine enables or disables the sharing of the database cache
+** and schema data structures between [database connection | connections]
+** to the same database. Sharing is enabled if the argument is true
+** and disabled if the argument is false.)^
+**
+** ^Cache sharing is enabled and disabled for an entire process.
+** This is a change as of SQLite version 3.5.0. In prior versions of SQLite,
+** sharing was enabled or disabled for each thread separately.
+**
+** ^(The cache sharing mode set by this interface effects all subsequent
+** calls to [sqlite3_open()], [sqlite3_open_v2()], and [sqlite3_open16()].
+** Existing database connections continue use the sharing mode
+** that was in effect at the time they were opened.)^
+**
+** ^(This routine returns [SQLITE_OK] if shared cache was enabled or disabled
+** successfully.  An [error code] is returned otherwise.)^
+**
+** ^Shared cache is disabled by default. But this might change in
+** future releases of SQLite.  Applications that care about shared
+** cache setting should set it explicitly.
+**
+** This interface is threadsafe on processors where writing a
+** 32-bit integer is atomic.
+**
+** See Also:  [SQLite Shared-Cache Mode]
+*/
+SQLITE_API int sqlite3_enable_shared_cache(int);
+
+/*
+** CAPI3REF: Attempt To Free Heap Memory
+**
+** ^The sqlite3_release_memory() interface attempts to free N bytes
+** of heap memory by deallocating non-essential memory allocations
+** held by the database library.   Memory used to cache database
+** pages to improve performance is an example of non-essential memory.
+** ^sqlite3_release_memory() returns the number of bytes actually freed,
+** which might be more or less than the amount requested.
+** ^The sqlite3_release_memory() routine is a no-op returning zero
+** if SQLite is not compiled with [SQLITE_ENABLE_MEMORY_MANAGEMENT].
+**
+** See also: [sqlite3_db_release_memory()]
+*/
+SQLITE_API int sqlite3_release_memory(int);
+
+/*
+** CAPI3REF: Free Memory Used By A Database Connection
+**
+** ^The sqlite3_db_release_memory(D) interface attempts to free as much heap
+** memory as possible from database connection D. Unlike the
+** [sqlite3_release_memory()] interface, this interface is in effect even
+** when the [SQLITE_ENABLE_MEMORY_MANAGEMENT] compile-time option is
+** omitted.
+**
+** See also: [sqlite3_release_memory()]
+*/
+SQLITE_API int sqlite3_db_release_memory(sqlite3*);
+
+/*
+** CAPI3REF: Impose A Limit On Heap Size
+**
+** ^The sqlite3_soft_heap_limit64() interface sets and/or queries the
+** soft limit on the amount of heap memory that may be allocated by SQLite.
+** ^SQLite strives to keep heap memory utilization below the soft heap
+** limit by reducing the number of pages held in the page cache
+** as heap memory usages approaches the limit.
+** ^The soft heap limit is "soft" because even though SQLite strives to stay
+** below the limit, it will exceed the limit rather than generate
+** an [SQLITE_NOMEM] error.  In other words, the soft heap limit 
+** is advisory only.
+**
+** ^The return value from sqlite3_soft_heap_limit64() is the size of
+** the soft heap limit prior to the call, or negative in the case of an
+** error.  ^If the argument N is negative
+** then no change is made to the soft heap limit.  Hence, the current
+** size of the soft heap limit can be determined by invoking
+** sqlite3_soft_heap_limit64() with a negative argument.
+**
+** ^If the argument N is zero then the soft heap limit is disabled.
+**
+** ^(The soft heap limit is not enforced in the current implementation
+** if one or more of following conditions are true:
+**
+** <ul>
+** <li> The soft heap limit is set to zero.
+** <li> Memory accounting is disabled using a combination of the
+**      [sqlite3_config]([SQLITE_CONFIG_MEMSTATUS],...) start-time option and
+**      the [SQLITE_DEFAULT_MEMSTATUS] compile-time option.
+** <li> An alternative page cache implementation is specified using
+**      [sqlite3_config]([SQLITE_CONFIG_PCACHE2],...).
+** <li> The page cache allocates from its own memory pool supplied
+**      by [sqlite3_config]([SQLITE_CONFIG_PAGECACHE],...) rather than
+**      from the heap.
+** </ul>)^
+**
+** Beginning with SQLite version 3.7.3, the soft heap limit is enforced
+** regardless of whether or not the [SQLITE_ENABLE_MEMORY_MANAGEMENT]
+** compile-time option is invoked.  With [SQLITE_ENABLE_MEMORY_MANAGEMENT],
+** the soft heap limit is enforced on every memory allocation.  Without
+** [SQLITE_ENABLE_MEMORY_MANAGEMENT], the soft heap limit is only enforced
+** when memory is allocated by the page cache.  Testing suggests that because
+** the page cache is the predominate memory user in SQLite, most
+** applications will achieve adequate soft heap limit enforcement without
+** the use of [SQLITE_ENABLE_MEMORY_MANAGEMENT].
+**
+** The circumstances under which SQLite will enforce the soft heap limit may
+** changes in future releases of SQLite.
+*/
+SQLITE_API sqlite3_int64 sqlite3_soft_heap_limit64(sqlite3_int64 N);
+
+/*
+** CAPI3REF: Deprecated Soft Heap Limit Interface
+** DEPRECATED
+**
+** This is a deprecated version of the [sqlite3_soft_heap_limit64()]
+** interface.  This routine is provided for historical compatibility
+** only.  All new applications should use the
+** [sqlite3_soft_heap_limit64()] interface rather than this one.
+*/
+SQLITE_API SQLITE_DEPRECATED void sqlite3_soft_heap_limit(int N);
+
+
+/*
+** CAPI3REF: Extract Metadata About A Column Of A Table
+**
+** ^(The sqlite3_table_column_metadata(X,D,T,C,....) routine returns
+** information about column C of table T in database D
+** on [database connection] X.)^  ^The sqlite3_table_column_metadata()
+** interface returns SQLITE_OK and fills in the non-NULL pointers in
+** the final five arguments with appropriate values if the specified
+** column exists.  ^The sqlite3_table_column_metadata() interface returns
+** SQLITE_ERROR and if the specified column does not exist.
+** ^If the column-name parameter to sqlite3_table_column_metadata() is a
+** NULL pointer, then this routine simply checks for the existance of the
+** table and returns SQLITE_OK if the table exists and SQLITE_ERROR if it
+** does not.
+**
+** ^The column is identified by the second, third and fourth parameters to
+** this function. ^(The second parameter is either the name of the database
+** (i.e. "main", "temp", or an attached database) containing the specified
+** table or NULL.)^ ^If it is NULL, then all attached databases are searched
+** for the table using the same algorithm used by the database engine to
+** resolve unqualified table references.
+**
+** ^The third and fourth parameters to this function are the table and column
+** name of the desired column, respectively.
+**
+** ^Metadata is returned by writing to the memory locations passed as the 5th
+** and subsequent parameters to this function. ^Any of these arguments may be
+** NULL, in which case the corresponding element of metadata is omitted.
+**
+** ^(<blockquote>
+** <table border="1">
+** <tr><th> Parameter <th> Output<br>Type <th>  Description
+**
+** <tr><td> 5th <td> const char* <td> Data type
+** <tr><td> 6th <td> const char* <td> Name of default collation sequence
+** <tr><td> 7th <td> int         <td> True if column has a NOT NULL constraint
+** <tr><td> 8th <td> int         <td> True if column is part of the PRIMARY KEY
+** <tr><td> 9th <td> int         <td> True if column is [AUTOINCREMENT]
+** </table>
+** </blockquote>)^
+**
+** ^The memory pointed to by the character pointers returned for the
+** declaration type and collation sequence is valid until the next
+** call to any SQLite API function.
+**
+** ^If the specified table is actually a view, an [error code] is returned.
+**
+** ^If the specified column is "rowid", "oid" or "_rowid_" and the table 
+** is not a [WITHOUT ROWID] table and an
+** [INTEGER PRIMARY KEY] column has been explicitly declared, then the output
+** parameters are set for the explicitly declared column. ^(If there is no
+** [INTEGER PRIMARY KEY] column, then the outputs
+** for the [rowid] are set as follows:
+**
+** <pre>
+**     data type: "INTEGER"
+**     collation sequence: "BINARY"
+**     not null: 0
+**     primary key: 1
+**     auto increment: 0
+** </pre>)^
+**
+** ^This function causes all database schemas to be read from disk and
+** parsed, if that has not already been done, and returns an error if
+** any errors are encountered while loading the schema.
+*/
+SQLITE_API int sqlite3_table_column_metadata(
+  sqlite3 *db,                /* Connection handle */
+  const char *zDbName,        /* Database name or NULL */
+  const char *zTableName,     /* Table name */
+  const char *zColumnName,    /* Column name */
+  char const **pzDataType,    /* OUTPUT: Declared data type */
+  char const **pzCollSeq,     /* OUTPUT: Collation sequence name */
+  int *pNotNull,              /* OUTPUT: True if NOT NULL constraint exists */
+  int *pPrimaryKey,           /* OUTPUT: True if column part of PK */
+  int *pAutoinc               /* OUTPUT: True if column is auto-increment */
+);
+
+/*
+** CAPI3REF: Load An Extension
+**
+** ^This interface loads an SQLite extension library from the named file.
+**
+** ^The sqlite3_load_extension() interface attempts to load an
+** [SQLite extension] library contained in the file zFile.  If
+** the file cannot be loaded directly, attempts are made to load
+** with various operating-system specific extensions added.
+** So for example, if "samplelib" cannot be loaded, then names like
+** "samplelib.so" or "samplelib.dylib" or "samplelib.dll" might
+** be tried also.
+**
+** ^The entry point is zProc.
+** ^(zProc may be 0, in which case SQLite will try to come up with an
+** entry point name on its own.  It first tries "sqlite3_extension_init".
+** If that does not work, it constructs a name "sqlite3_X_init" where the
+** X is consists of the lower-case equivalent of all ASCII alphabetic
+** characters in the filename from the last "/" to the first following
+** "." and omitting any initial "lib".)^
+** ^The sqlite3_load_extension() interface returns
+** [SQLITE_OK] on success and [SQLITE_ERROR] if something goes wrong.
+** ^If an error occurs and pzErrMsg is not 0, then the
+** [sqlite3_load_extension()] interface shall attempt to
+** fill *pzErrMsg with error message text stored in memory
+** obtained from [sqlite3_malloc()]. The calling function
+** should free this memory by calling [sqlite3_free()].
+**
+** ^Extension loading must be enabled using
+** [sqlite3_enable_load_extension()] prior to calling this API,
+** otherwise an error will be returned.
+**
+** See also the [load_extension() SQL function].
+*/
+SQLITE_API int sqlite3_load_extension(
+  sqlite3 *db,          /* Load the extension into this database connection */
+  const char *zFile,    /* Name of the shared library containing extension */
+  const char *zProc,    /* Entry point.  Derived from zFile if 0 */
+  char **pzErrMsg       /* Put error message here if not 0 */
+);
+
+/*
+** CAPI3REF: Enable Or Disable Extension Loading
+**
+** ^So as not to open security holes in older applications that are
+** unprepared to deal with [extension loading], and as a means of disabling
+** [extension loading] while evaluating user-entered SQL, the following API
+** is provided to turn the [sqlite3_load_extension()] mechanism on and off.
+**
+** ^Extension loading is off by default.
+** ^Call the sqlite3_enable_load_extension() routine with onoff==1
+** to turn extension loading on and call it with onoff==0 to turn
+** it back off again.
+*/
+SQLITE_API int sqlite3_enable_load_extension(sqlite3 *db, int onoff);
+
+/*
+** CAPI3REF: Automatically Load Statically Linked Extensions
+**
+** ^This interface causes the xEntryPoint() function to be invoked for
+** each new [database connection] that is created.  The idea here is that
+** xEntryPoint() is the entry point for a statically linked [SQLite extension]
+** that is to be automatically loaded into all new database connections.
+**
+** ^(Even though the function prototype shows that xEntryPoint() takes
+** no arguments and returns void, SQLite invokes xEntryPoint() with three
+** arguments and expects and integer result as if the signature of the
+** entry point where as follows:
+**
+** <blockquote><pre>
+** &nbsp;  int xEntryPoint(
+** &nbsp;    sqlite3 *db,
+** &nbsp;    const char **pzErrMsg,
+** &nbsp;    const struct sqlite3_api_routines *pThunk
+** &nbsp;  );
+** </pre></blockquote>)^
+**
+** If the xEntryPoint routine encounters an error, it should make *pzErrMsg
+** point to an appropriate error message (obtained from [sqlite3_mprintf()])
+** and return an appropriate [error code].  ^SQLite ensures that *pzErrMsg
+** is NULL before calling the xEntryPoint().  ^SQLite will invoke
+** [sqlite3_free()] on *pzErrMsg after xEntryPoint() returns.  ^If any
+** xEntryPoint() returns an error, the [sqlite3_open()], [sqlite3_open16()],
+** or [sqlite3_open_v2()] call that provoked the xEntryPoint() will fail.
+**
+** ^Calling sqlite3_auto_extension(X) with an entry point X that is already
+** on the list of automatic extensions is a harmless no-op. ^No entry point
+** will be called more than once for each database connection that is opened.
+**
+** See also: [sqlite3_reset_auto_extension()]
+** and [sqlite3_cancel_auto_extension()]
+*/
+SQLITE_API int sqlite3_auto_extension(void (*xEntryPoint)(void));
+
+/*
+** CAPI3REF: Cancel Automatic Extension Loading
+**
+** ^The [sqlite3_cancel_auto_extension(X)] interface unregisters the
+** initialization routine X that was registered using a prior call to
+** [sqlite3_auto_extension(X)].  ^The [sqlite3_cancel_auto_extension(X)]
+** routine returns 1 if initialization routine X was successfully 
+** unregistered and it returns 0 if X was not on the list of initialization
+** routines.
+*/
+SQLITE_API int sqlite3_cancel_auto_extension(void (*xEntryPoint)(void));
+
+/*
+** CAPI3REF: Reset Automatic Extension Loading
+**
+** ^This interface disables all automatic extensions previously
+** registered using [sqlite3_auto_extension()].
+*/
+SQLITE_API void sqlite3_reset_auto_extension(void);
+
+/*
+** The interface to the virtual-table mechanism is currently considered
+** to be experimental.  The interface might change in incompatible ways.
+** If this is a problem for you, do not use the interface at this time.
+**
+** When the virtual-table mechanism stabilizes, we will declare the
+** interface fixed, support it indefinitely, and remove this comment.
+*/
+
+/*
+** Structures used by the virtual table interface
+*/
+typedef struct sqlite3_vtab sqlite3_vtab;
+typedef struct sqlite3_index_info sqlite3_index_info;
+typedef struct sqlite3_vtab_cursor sqlite3_vtab_cursor;
+typedef struct sqlite3_module sqlite3_module;
+
+/*
+** CAPI3REF: Virtual Table Object
+** KEYWORDS: sqlite3_module {virtual table module}
+**
+** This structure, sometimes called a "virtual table module", 
+** defines the implementation of a [virtual tables].  
+** This structure consists mostly of methods for the module.
+**
+** ^A virtual table module is created by filling in a persistent
+** instance of this structure and passing a pointer to that instance
+** to [sqlite3_create_module()] or [sqlite3_create_module_v2()].
+** ^The registration remains valid until it is replaced by a different
+** module or until the [database connection] closes.  The content
+** of this structure must not change while it is registered with
+** any database connection.
+*/
+struct sqlite3_module {
+  int iVersion;
+  int (*xCreate)(sqlite3*, void *pAux,
+               int argc, const char *const*argv,
+               sqlite3_vtab **ppVTab, char**);
+  int (*xConnect)(sqlite3*, void *pAux,
+               int argc, const char *const*argv,
+               sqlite3_vtab **ppVTab, char**);
+  int (*xBestIndex)(sqlite3_vtab *pVTab, sqlite3_index_info*);
+  int (*xDisconnect)(sqlite3_vtab *pVTab);
+  int (*xDestroy)(sqlite3_vtab *pVTab);
+  int (*xOpen)(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor);
+  int (*xClose)(sqlite3_vtab_cursor*);
+  int (*xFilter)(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
+                int argc, sqlite3_value **argv);
+  int (*xNext)(sqlite3_vtab_cursor*);
+  int (*xEof)(sqlite3_vtab_cursor*);
+  int (*xColumn)(sqlite3_vtab_cursor*, sqlite3_context*, int);
+  int (*xRowid)(sqlite3_vtab_cursor*, sqlite3_int64 *pRowid);
+  int (*xUpdate)(sqlite3_vtab *, int, sqlite3_value **, sqlite3_int64 *);
+  int (*xBegin)(sqlite3_vtab *pVTab);
+  int (*xSync)(sqlite3_vtab *pVTab);
+  int (*xCommit)(sqlite3_vtab *pVTab);
+  int (*xRollback)(sqlite3_vtab *pVTab);
+  int (*xFindFunction)(sqlite3_vtab *pVtab, int nArg, const char *zName,
+                       void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
+                       void **ppArg);
+  int (*xRename)(sqlite3_vtab *pVtab, const char *zNew);
+  /* The methods above are in version 1 of the sqlite_module object. Those 
+  ** below are for version 2 and greater. */
+  int (*xSavepoint)(sqlite3_vtab *pVTab, int);
+  int (*xRelease)(sqlite3_vtab *pVTab, int);
+  int (*xRollbackTo)(sqlite3_vtab *pVTab, int);
+};
+
+/*
+** CAPI3REF: Virtual Table Indexing Information
+** KEYWORDS: sqlite3_index_info
+**
+** The sqlite3_index_info structure and its substructures is used as part
+** of the [virtual table] interface to
+** pass information into and receive the reply from the [xBestIndex]
+** method of a [virtual table module].  The fields under **Inputs** are the
+** inputs to xBestIndex and are read-only.  xBestIndex inserts its
+** results into the **Outputs** fields.
+**
+** ^(The aConstraint[] array records WHERE clause constraints of the form:
+**
+** <blockquote>column OP expr</blockquote>
+**
+** where OP is =, &lt;, &lt;=, &gt;, or &gt;=.)^  ^(The particular operator is
+** stored in aConstraint[].op using one of the
+** [SQLITE_INDEX_CONSTRAINT_EQ | SQLITE_INDEX_CONSTRAINT_ values].)^
+** ^(The index of the column is stored in
+** aConstraint[].iColumn.)^  ^(aConstraint[].usable is TRUE if the
+** expr on the right-hand side can be evaluated (and thus the constraint
+** is usable) and false if it cannot.)^
+**
+** ^The optimizer automatically inverts terms of the form "expr OP column"
+** and makes other simplifications to the WHERE clause in an attempt to
+** get as many WHERE clause terms into the form shown above as possible.
+** ^The aConstraint[] array only reports WHERE clause terms that are
+** relevant to the particular virtual table being queried.
+**
+** ^Information about the ORDER BY clause is stored in aOrderBy[].
+** ^Each term of aOrderBy records a column of the ORDER BY clause.
+**
+** The [xBestIndex] method must fill aConstraintUsage[] with information
+** about what parameters to pass to xFilter.  ^If argvIndex>0 then
+** the right-hand side of the corresponding aConstraint[] is evaluated
+** and becomes the argvIndex-th entry in argv.  ^(If aConstraintUsage[].omit
+** is true, then the constraint is assumed to be fully handled by the
+** virtual table and is not checked again by SQLite.)^
+**
+** ^The idxNum and idxPtr values are recorded and passed into the
+** [xFilter] method.
+** ^[sqlite3_free()] is used to free idxPtr if and only if
+** needToFreeIdxPtr is true.
+**
+** ^The orderByConsumed means that output from [xFilter]/[xNext] will occur in
+** the correct order to satisfy the ORDER BY clause so that no separate
+** sorting step is required.
+**
+** ^The estimatedCost value is an estimate of the cost of a particular
+** strategy. A cost of N indicates that the cost of the strategy is similar
+** to a linear scan of an SQLite table with N rows. A cost of log(N) 
+** indicates that the expense of the operation is similar to that of a
+** binary search on a unique indexed field of an SQLite table with N rows.
+**
+** ^The estimatedRows value is an estimate of the number of rows that
+** will be returned by the strategy.
+**
+** IMPORTANT: The estimatedRows field was added to the sqlite3_index_info
+** structure for SQLite version 3.8.2. If a virtual table extension is
+** used with an SQLite version earlier than 3.8.2, the results of attempting 
+** to read or write the estimatedRows field are undefined (but are likely 
+** to included crashing the application). The estimatedRows field should
+** therefore only be used if [sqlite3_libversion_number()] returns a
+** value greater than or equal to 3008002.
+*/
+struct sqlite3_index_info {
+  /* Inputs */
+  int nConstraint;           /* Number of entries in aConstraint */
+  struct sqlite3_index_constraint {
+     int iColumn;              /* Column on left-hand side of constraint */
+     unsigned char op;         /* Constraint operator */
+     unsigned char usable;     /* True if this constraint is usable */
+     int iTermOffset;          /* Used internally - xBestIndex should ignore */
+  } *aConstraint;            /* Table of WHERE clause constraints */
+  int nOrderBy;              /* Number of terms in the ORDER BY clause */
+  struct sqlite3_index_orderby {
+     int iColumn;              /* Column number */
+     unsigned char desc;       /* True for DESC.  False for ASC. */
+  } *aOrderBy;               /* The ORDER BY clause */
+  /* Outputs */
+  struct sqlite3_index_constraint_usage {
+    int argvIndex;           /* if >0, constraint is part of argv to xFilter */
+    unsigned char omit;      /* Do not code a test for this constraint */
+  } *aConstraintUsage;
+  int idxNum;                /* Number used to identify the index */
+  char *idxStr;              /* String, possibly obtained from sqlite3_malloc */
+  int needToFreeIdxStr;      /* Free idxStr using sqlite3_free() if true */
+  int orderByConsumed;       /* True if output is already ordered */
+  double estimatedCost;           /* Estimated cost of using this index */
+  /* Fields below are only available in SQLite 3.8.2 and later */
+  sqlite3_int64 estimatedRows;    /* Estimated number of rows returned */
+};
+
+/*
+** CAPI3REF: Virtual Table Constraint Operator Codes
+**
+** These macros defined the allowed values for the
+** [sqlite3_index_info].aConstraint[].op field.  Each value represents
+** an operator that is part of a constraint term in the wHERE clause of
+** a query that uses a [virtual table].
+*/
+#define SQLITE_INDEX_CONSTRAINT_EQ    2
+#define SQLITE_INDEX_CONSTRAINT_GT    4
+#define SQLITE_INDEX_CONSTRAINT_LE    8
+#define SQLITE_INDEX_CONSTRAINT_LT    16
+#define SQLITE_INDEX_CONSTRAINT_GE    32
+#define SQLITE_INDEX_CONSTRAINT_MATCH 64
+
+/*
+** CAPI3REF: Register A Virtual Table Implementation
+**
+** ^These routines are used to register a new [virtual table module] name.
+** ^Module names must be registered before
+** creating a new [virtual table] using the module and before using a
+** preexisting [virtual table] for the module.
+**
+** ^The module name is registered on the [database connection] specified
+** by the first parameter.  ^The name of the module is given by the 
+** second parameter.  ^The third parameter is a pointer to
+** the implementation of the [virtual table module].   ^The fourth
+** parameter is an arbitrary client data pointer that is passed through
+** into the [xCreate] and [xConnect] methods of the virtual table module
+** when a new virtual table is be being created or reinitialized.
+**
+** ^The sqlite3_create_module_v2() interface has a fifth parameter which
+** is a pointer to a destructor for the pClientData.  ^SQLite will
+** invoke the destructor function (if it is not NULL) when SQLite
+** no longer needs the pClientData pointer.  ^The destructor will also
+** be invoked if the call to sqlite3_create_module_v2() fails.
+** ^The sqlite3_create_module()
+** interface is equivalent to sqlite3_create_module_v2() with a NULL
+** destructor.
+*/
+SQLITE_API int sqlite3_create_module(
+  sqlite3 *db,               /* SQLite connection to register module with */
+  const char *zName,         /* Name of the module */
+  const sqlite3_module *p,   /* Methods for the module */
+  void *pClientData          /* Client data for xCreate/xConnect */
+);
+SQLITE_API int sqlite3_create_module_v2(
+  sqlite3 *db,               /* SQLite connection to register module with */
+  const char *zName,         /* Name of the module */
+  const sqlite3_module *p,   /* Methods for the module */
+  void *pClientData,         /* Client data for xCreate/xConnect */
+  void(*xDestroy)(void*)     /* Module destructor function */
+);
+
+/*
+** CAPI3REF: Virtual Table Instance Object
+** KEYWORDS: sqlite3_vtab
+**
+** Every [virtual table module] implementation uses a subclass
+** of this object to describe a particular instance
+** of the [virtual table].  Each subclass will
+** be tailored to the specific needs of the module implementation.
+** The purpose of this superclass is to define certain fields that are
+** common to all module implementations.
+**
+** ^Virtual tables methods can set an error message by assigning a
+** string obtained from [sqlite3_mprintf()] to zErrMsg.  The method should
+** take care that any prior string is freed by a call to [sqlite3_free()]
+** prior to assigning a new string to zErrMsg.  ^After the error message
+** is delivered up to the client application, the string will be automatically
+** freed by sqlite3_free() and the zErrMsg field will be zeroed.
+*/
+struct sqlite3_vtab {
+  const sqlite3_module *pModule;  /* The module for this virtual table */
+  int nRef;                       /* NO LONGER USED */
+  char *zErrMsg;                  /* Error message from sqlite3_mprintf() */
+  /* Virtual table implementations will typically add additional fields */
+};
+
+/*
+** CAPI3REF: Virtual Table Cursor Object
+** KEYWORDS: sqlite3_vtab_cursor {virtual table cursor}
+**
+** Every [virtual table module] implementation uses a subclass of the
+** following structure to describe cursors that point into the
+** [virtual table] and are used
+** to loop through the virtual table.  Cursors are created using the
+** [sqlite3_module.xOpen | xOpen] method of the module and are destroyed
+** by the [sqlite3_module.xClose | xClose] method.  Cursors are used
+** by the [xFilter], [xNext], [xEof], [xColumn], and [xRowid] methods
+** of the module.  Each module implementation will define
+** the content of a cursor structure to suit its own needs.
+**
+** This superclass exists in order to define fields of the cursor that
+** are common to all implementations.
+*/
+struct sqlite3_vtab_cursor {
+  sqlite3_vtab *pVtab;      /* Virtual table of this cursor */
+  /* Virtual table implementations will typically add additional fields */
+};
+
+/*
+** CAPI3REF: Declare The Schema Of A Virtual Table
+**
+** ^The [xCreate] and [xConnect] methods of a
+** [virtual table module] call this interface
+** to declare the format (the names and datatypes of the columns) of
+** the virtual tables they implement.
+*/
+SQLITE_API int sqlite3_declare_vtab(sqlite3*, const char *zSQL);
+
+/*
+** CAPI3REF: Overload A Function For A Virtual Table
+**
+** ^(Virtual tables can provide alternative implementations of functions
+** using the [xFindFunction] method of the [virtual table module].  
+** But global versions of those functions
+** must exist in order to be overloaded.)^
+**
+** ^(This API makes sure a global version of a function with a particular
+** name and number of parameters exists.  If no such function exists
+** before this API is called, a new function is created.)^  ^The implementation
+** of the new function always causes an exception to be thrown.  So
+** the new function is not good for anything by itself.  Its only
+** purpose is to be a placeholder function that can be overloaded
+** by a [virtual table].
+*/
+SQLITE_API int sqlite3_overload_function(sqlite3*, const char *zFuncName, int nArg);
+
+/*
+** The interface to the virtual-table mechanism defined above (back up
+** to a comment remarkably similar to this one) is currently considered
+** to be experimental.  The interface might change in incompatible ways.
+** If this is a problem for you, do not use the interface at this time.
+**
+** When the virtual-table mechanism stabilizes, we will declare the
+** interface fixed, support it indefinitely, and remove this comment.
+*/
+
+/*
+** CAPI3REF: A Handle To An Open BLOB
+** KEYWORDS: {BLOB handle} {BLOB handles}
+**
+** An instance of this object represents an open BLOB on which
+** [sqlite3_blob_open | incremental BLOB I/O] can be performed.
+** ^Objects of this type are created by [sqlite3_blob_open()]
+** and destroyed by [sqlite3_blob_close()].
+** ^The [sqlite3_blob_read()] and [sqlite3_blob_write()] interfaces
+** can be used to read or write small subsections of the BLOB.
+** ^The [sqlite3_blob_bytes()] interface returns the size of the BLOB in bytes.
+*/
+typedef struct sqlite3_blob sqlite3_blob;
+
+/*
+** CAPI3REF: Open A BLOB For Incremental I/O
+**
+** ^(This interfaces opens a [BLOB handle | handle] to the BLOB located
+** in row iRow, column zColumn, table zTable in database zDb;
+** in other words, the same BLOB that would be selected by:
+**
+** <pre>
+**     SELECT zColumn FROM zDb.zTable WHERE [rowid] = iRow;
+** </pre>)^
+**
+** ^(Parameter zDb is not the filename that contains the database, but 
+** rather the symbolic name of the database. For attached databases, this is
+** the name that appears after the AS keyword in the [ATTACH] statement.
+** For the main database file, the database name is "main". For TEMP
+** tables, the database name is "temp".)^
+**
+** ^If the flags parameter is non-zero, then the BLOB is opened for read
+** and write access. ^If the flags parameter is zero, the BLOB is opened for
+** read-only access.
+**
+** ^(On success, [SQLITE_OK] is returned and the new [BLOB handle] is stored
+** in *ppBlob. Otherwise an [error code] is returned and, unless the error
+** code is SQLITE_MISUSE, *ppBlob is set to NULL.)^ ^This means that, provided
+** the API is not misused, it is always safe to call [sqlite3_blob_close()] 
+** on *ppBlob after this function it returns.
+**
+** This function fails with SQLITE_ERROR if any of the following are true:
+** <ul>
+**   <li> ^(Database zDb does not exist)^, 
+**   <li> ^(Table zTable does not exist within database zDb)^, 
+**   <li> ^(Table zTable is a WITHOUT ROWID table)^, 
+**   <li> ^(Column zColumn does not exist)^,
+**   <li> ^(Row iRow is not present in the table)^,
+**   <li> ^(The specified column of row iRow contains a value that is not
+**         a TEXT or BLOB value)^,
+**   <li> ^(Column zColumn is part of an index, PRIMARY KEY or UNIQUE 
+**         constraint and the blob is being opened for read/write access)^,
+**   <li> ^([foreign key constraints | Foreign key constraints] are enabled, 
+**         column zColumn is part of a [child key] definition and the blob is
+**         being opened for read/write access)^.
+** </ul>
+**
+** ^Unless it returns SQLITE_MISUSE, this function sets the 
+** [database connection] error code and message accessible via 
+** [sqlite3_errcode()] and [sqlite3_errmsg()] and related functions. 
+**
+**
+** ^(If the row that a BLOB handle points to is modified by an
+** [UPDATE], [DELETE], or by [ON CONFLICT] side-effects
+** then the BLOB handle is marked as "expired".
+** This is true if any column of the row is changed, even a column
+** other than the one the BLOB handle is open on.)^
+** ^Calls to [sqlite3_blob_read()] and [sqlite3_blob_write()] for
+** an expired BLOB handle fail with a return code of [SQLITE_ABORT].
+** ^(Changes written into a BLOB prior to the BLOB expiring are not
+** rolled back by the expiration of the BLOB.  Such changes will eventually
+** commit if the transaction continues to completion.)^
+**
+** ^Use the [sqlite3_blob_bytes()] interface to determine the size of
+** the opened blob.  ^The size of a blob may not be changed by this
+** interface.  Use the [UPDATE] SQL command to change the size of a
+** blob.
+**
+** ^The [sqlite3_bind_zeroblob()] and [sqlite3_result_zeroblob()] interfaces
+** and the built-in [zeroblob] SQL function may be used to create a 
+** zero-filled blob to read or write using the incremental-blob interface.
+**
+** To avoid a resource leak, every open [BLOB handle] should eventually
+** be released by a call to [sqlite3_blob_close()].
+*/
+SQLITE_API int sqlite3_blob_open(
+  sqlite3*,
+  const char *zDb,
+  const char *zTable,
+  const char *zColumn,
+  sqlite3_int64 iRow,
+  int flags,
+  sqlite3_blob **ppBlob
+);
+
+/*
+** CAPI3REF: Move a BLOB Handle to a New Row
+**
+** ^This function is used to move an existing blob handle so that it points
+** to a different row of the same database table. ^The new row is identified
+** by the rowid value passed as the second argument. Only the row can be
+** changed. ^The database, table and column on which the blob handle is open
+** remain the same. Moving an existing blob handle to a new row can be
+** faster than closing the existing handle and opening a new one.
+**
+** ^(The new row must meet the same criteria as for [sqlite3_blob_open()] -
+** it must exist and there must be either a blob or text value stored in
+** the nominated column.)^ ^If the new row is not present in the table, or if
+** it does not contain a blob or text value, or if another error occurs, an
+** SQLite error code is returned and the blob handle is considered aborted.
+** ^All subsequent calls to [sqlite3_blob_read()], [sqlite3_blob_write()] or
+** [sqlite3_blob_reopen()] on an aborted blob handle immediately return
+** SQLITE_ABORT. ^Calling [sqlite3_blob_bytes()] on an aborted blob handle
+** always returns zero.
+**
+** ^This function sets the database handle error code and message.
+*/
+SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64);
+
+/*
+** CAPI3REF: Close A BLOB Handle
+**
+** ^This function closes an open [BLOB handle]. ^(The BLOB handle is closed
+** unconditionally.  Even if this routine returns an error code, the 
+** handle is still closed.)^
+**
+** ^If the blob handle being closed was opened for read-write access, and if
+** the database is in auto-commit mode and there are no other open read-write
+** blob handles or active write statements, the current transaction is
+** committed. ^If an error occurs while committing the transaction, an error
+** code is returned and the transaction rolled back.
+**
+** Calling this function with an argument that is not a NULL pointer or an
+** open blob handle results in undefined behaviour. ^Calling this routine 
+** with a null pointer (such as would be returned by a failed call to 
+** [sqlite3_blob_open()]) is a harmless no-op. ^Otherwise, if this function
+** is passed a valid open blob handle, the values returned by the 
+** sqlite3_errcode() and sqlite3_errmsg() functions are set before returning.
+*/
+SQLITE_API int sqlite3_blob_close(sqlite3_blob *);
+
+/*
+** CAPI3REF: Return The Size Of An Open BLOB
+**
+** ^Returns the size in bytes of the BLOB accessible via the 
+** successfully opened [BLOB handle] in its only argument.  ^The
+** incremental blob I/O routines can only read or overwriting existing
+** blob content; they cannot change the size of a blob.
+**
+** This routine only works on a [BLOB handle] which has been created
+** by a prior successful call to [sqlite3_blob_open()] and which has not
+** been closed by [sqlite3_blob_close()].  Passing any other pointer in
+** to this routine results in undefined and probably undesirable behavior.
+*/
+SQLITE_API int sqlite3_blob_bytes(sqlite3_blob *);
+
+/*
+** CAPI3REF: Read Data From A BLOB Incrementally
+**
+** ^(This function is used to read data from an open [BLOB handle] into a
+** caller-supplied buffer. N bytes of data are copied into buffer Z
+** from the open BLOB, starting at offset iOffset.)^
+**
+** ^If offset iOffset is less than N bytes from the end of the BLOB,
+** [SQLITE_ERROR] is returned and no data is read.  ^If N or iOffset is
+** less than zero, [SQLITE_ERROR] is returned and no data is read.
+** ^The size of the blob (and hence the maximum value of N+iOffset)
+** can be determined using the [sqlite3_blob_bytes()] interface.
+**
+** ^An attempt to read from an expired [BLOB handle] fails with an
+** error code of [SQLITE_ABORT].
+**
+** ^(On success, sqlite3_blob_read() returns SQLITE_OK.
+** Otherwise, an [error code] or an [extended error code] is returned.)^
+**
+** This routine only works on a [BLOB handle] which has been created
+** by a prior successful call to [sqlite3_blob_open()] and which has not
+** been closed by [sqlite3_blob_close()].  Passing any other pointer in
+** to this routine results in undefined and probably undesirable behavior.
+**
+** See also: [sqlite3_blob_write()].
+*/
+SQLITE_API int sqlite3_blob_read(sqlite3_blob *, void *Z, int N, int iOffset);
+
+/*
+** CAPI3REF: Write Data Into A BLOB Incrementally
+**
+** ^(This function is used to write data into an open [BLOB handle] from a
+** caller-supplied buffer. N bytes of data are copied from the buffer Z
+** into the open BLOB, starting at offset iOffset.)^
+**
+** ^(On success, sqlite3_blob_write() returns SQLITE_OK.
+** Otherwise, an  [error code] or an [extended error code] is returned.)^
+** ^Unless SQLITE_MISUSE is returned, this function sets the 
+** [database connection] error code and message accessible via 
+** [sqlite3_errcode()] and [sqlite3_errmsg()] and related functions. 
+**
+** ^If the [BLOB handle] passed as the first argument was not opened for
+** writing (the flags parameter to [sqlite3_blob_open()] was zero),
+** this function returns [SQLITE_READONLY].
+**
+** This function may only modify the contents of the BLOB; it is
+** not possible to increase the size of a BLOB using this API.
+** ^If offset iOffset is less than N bytes from the end of the BLOB,
+** [SQLITE_ERROR] is returned and no data is written. The size of the 
+** BLOB (and hence the maximum value of N+iOffset) can be determined 
+** using the [sqlite3_blob_bytes()] interface. ^If N or iOffset are less 
+** than zero [SQLITE_ERROR] is returned and no data is written.
+**
+** ^An attempt to write to an expired [BLOB handle] fails with an
+** error code of [SQLITE_ABORT].  ^Writes to the BLOB that occurred
+** before the [BLOB handle] expired are not rolled back by the
+** expiration of the handle, though of course those changes might
+** have been overwritten by the statement that expired the BLOB handle
+** or by other independent statements.
+**
+** This routine only works on a [BLOB handle] which has been created
+** by a prior successful call to [sqlite3_blob_open()] and which has not
+** been closed by [sqlite3_blob_close()].  Passing any other pointer in
+** to this routine results in undefined and probably undesirable behavior.
+**
+** See also: [sqlite3_blob_read()].
+*/
+SQLITE_API int sqlite3_blob_write(sqlite3_blob *, const void *z, int n, int iOffset);
+
+/*
+** CAPI3REF: Virtual File System Objects
+**
+** A virtual filesystem (VFS) is an [sqlite3_vfs] object
+** that SQLite uses to interact
+** with the underlying operating system.  Most SQLite builds come with a
+** single default VFS that is appropriate for the host computer.
+** New VFSes can be registered and existing VFSes can be unregistered.
+** The following interfaces are provided.
+**
+** ^The sqlite3_vfs_find() interface returns a pointer to a VFS given its name.
+** ^Names are case sensitive.
+** ^Names are zero-terminated UTF-8 strings.
+** ^If there is no match, a NULL pointer is returned.
+** ^If zVfsName is NULL then the default VFS is returned.
+**
+** ^New VFSes are registered with sqlite3_vfs_register().
+** ^Each new VFS becomes the default VFS if the makeDflt flag is set.
+** ^The same VFS can be registered multiple times without injury.
+** ^To make an existing VFS into the default VFS, register it again
+** with the makeDflt flag set.  If two different VFSes with the
+** same name are registered, the behavior is undefined.  If a
+** VFS is registered with a name that is NULL or an empty string,
+** then the behavior is undefined.
+**
+** ^Unregister a VFS with the sqlite3_vfs_unregister() interface.
+** ^(If the default VFS is unregistered, another VFS is chosen as
+** the default.  The choice for the new VFS is arbitrary.)^
+*/
+SQLITE_API sqlite3_vfs *sqlite3_vfs_find(const char *zVfsName);
+SQLITE_API int sqlite3_vfs_register(sqlite3_vfs*, int makeDflt);
+SQLITE_API int sqlite3_vfs_unregister(sqlite3_vfs*);
+
+/*
+** CAPI3REF: Mutexes
+**
+** The SQLite core uses these routines for thread
+** synchronization. Though they are intended for internal
+** use by SQLite, code that links against SQLite is
+** permitted to use any of these routines.
+**
+** The SQLite source code contains multiple implementations
+** of these mutex routines.  An appropriate implementation
+** is selected automatically at compile-time.  The following
+** implementations are available in the SQLite core:
+**
+** <ul>
+** <li>   SQLITE_MUTEX_PTHREADS
+** <li>   SQLITE_MUTEX_W32
+** <li>   SQLITE_MUTEX_NOOP
+** </ul>
+**
+** The SQLITE_MUTEX_NOOP implementation is a set of routines
+** that does no real locking and is appropriate for use in
+** a single-threaded application.  The SQLITE_MUTEX_PTHREADS and
+** SQLITE_MUTEX_W32 implementations are appropriate for use on Unix
+** and Windows.
+**
+** If SQLite is compiled with the SQLITE_MUTEX_APPDEF preprocessor
+** macro defined (with "-DSQLITE_MUTEX_APPDEF=1"), then no mutex
+** implementation is included with the library. In this case the
+** application must supply a custom mutex implementation using the
+** [SQLITE_CONFIG_MUTEX] option of the sqlite3_config() function
+** before calling sqlite3_initialize() or any other public sqlite3_
+** function that calls sqlite3_initialize().
+**
+** ^The sqlite3_mutex_alloc() routine allocates a new
+** mutex and returns a pointer to it. ^The sqlite3_mutex_alloc()
+** routine returns NULL if it is unable to allocate the requested
+** mutex.  The argument to sqlite3_mutex_alloc() must one of these
+** integer constants:
+**
+** <ul>
+** <li>  SQLITE_MUTEX_FAST
+** <li>  SQLITE_MUTEX_RECURSIVE
+** <li>  SQLITE_MUTEX_STATIC_MASTER
+** <li>  SQLITE_MUTEX_STATIC_MEM
+** <li>  SQLITE_MUTEX_STATIC_OPEN
+** <li>  SQLITE_MUTEX_STATIC_PRNG
+** <li>  SQLITE_MUTEX_STATIC_LRU
+** <li>  SQLITE_MUTEX_STATIC_PMEM
+** <li>  SQLITE_MUTEX_STATIC_APP1
+** <li>  SQLITE_MUTEX_STATIC_APP2
+** <li>  SQLITE_MUTEX_STATIC_APP3
+** </ul>
+**
+** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE)
+** cause sqlite3_mutex_alloc() to create
+** a new mutex.  ^The new mutex is recursive when SQLITE_MUTEX_RECURSIVE
+** is used but not necessarily so when SQLITE_MUTEX_FAST is used.
+** The mutex implementation does not need to make a distinction
+** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does
+** not want to.  SQLite will only request a recursive mutex in
+** cases where it really needs one.  If a faster non-recursive mutex
+** implementation is available on the host platform, the mutex subsystem
+** might return such a mutex in response to SQLITE_MUTEX_FAST.
+**
+** ^The other allowed parameters to sqlite3_mutex_alloc() (anything other
+** than SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) each return
+** a pointer to a static preexisting mutex.  ^Nine static mutexes are
+** used by the current version of SQLite.  Future versions of SQLite
+** may add additional static mutexes.  Static mutexes are for internal
+** use by SQLite only.  Applications that use SQLite mutexes should
+** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or
+** SQLITE_MUTEX_RECURSIVE.
+**
+** ^Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST
+** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc()
+** returns a different mutex on every call.  ^For the static
+** mutex types, the same mutex is returned on every call that has
+** the same type number.
+**
+** ^The sqlite3_mutex_free() routine deallocates a previously
+** allocated dynamic mutex.  Attempting to deallocate a static
+** mutex results in undefined behavior.
+**
+** ^The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt
+** to enter a mutex.  ^If another thread is already within the mutex,
+** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return
+** SQLITE_BUSY.  ^The sqlite3_mutex_try() interface returns [SQLITE_OK]
+** upon successful entry.  ^(Mutexes created using
+** SQLITE_MUTEX_RECURSIVE can be entered multiple times by the same thread.
+** In such cases, the
+** mutex must be exited an equal number of times before another thread
+** can enter.)^  If the same thread tries to enter any mutex other
+** than an SQLITE_MUTEX_RECURSIVE more than once, the behavior is undefined.
+**
+** ^(Some systems (for example, Windows 95) do not support the operation
+** implemented by sqlite3_mutex_try().  On those systems, sqlite3_mutex_try()
+** will always return SQLITE_BUSY. The SQLite core only ever uses
+** sqlite3_mutex_try() as an optimization so this is acceptable 
+** behavior.)^
+**
+** ^The sqlite3_mutex_leave() routine exits a mutex that was
+** previously entered by the same thread.   The behavior
+** is undefined if the mutex is not currently entered by the
+** calling thread or is not currently allocated.
+**
+** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(), or
+** sqlite3_mutex_leave() is a NULL pointer, then all three routines
+** behave as no-ops.
+**
+** See also: [sqlite3_mutex_held()] and [sqlite3_mutex_notheld()].
+*/
+SQLITE_API sqlite3_mutex *sqlite3_mutex_alloc(int);
+SQLITE_API void sqlite3_mutex_free(sqlite3_mutex*);
+SQLITE_API void sqlite3_mutex_enter(sqlite3_mutex*);
+SQLITE_API int sqlite3_mutex_try(sqlite3_mutex*);
+SQLITE_API void sqlite3_mutex_leave(sqlite3_mutex*);
+
+/*
+** CAPI3REF: Mutex Methods Object
+**
+** An instance of this structure defines the low-level routines
+** used to allocate and use mutexes.
+**
+** Usually, the default mutex implementations provided by SQLite are
+** sufficient, however the application has the option of substituting a custom
+** implementation for specialized deployments or systems for which SQLite
+** does not provide a suitable implementation. In this case, the application
+** creates and populates an instance of this structure to pass
+** to sqlite3_config() along with the [SQLITE_CONFIG_MUTEX] option.
+** Additionally, an instance of this structure can be used as an
+** output variable when querying the system for the current mutex
+** implementation, using the [SQLITE_CONFIG_GETMUTEX] option.
+**
+** ^The xMutexInit method defined by this structure is invoked as
+** part of system initialization by the sqlite3_initialize() function.
+** ^The xMutexInit routine is called by SQLite exactly once for each
+** effective call to [sqlite3_initialize()].
+**
+** ^The xMutexEnd method defined by this structure is invoked as
+** part of system shutdown by the sqlite3_shutdown() function. The
+** implementation of this method is expected to release all outstanding
+** resources obtained by the mutex methods implementation, especially
+** those obtained by the xMutexInit method.  ^The xMutexEnd()
+** interface is invoked exactly once for each call to [sqlite3_shutdown()].
+**
+** ^(The remaining seven methods defined by this structure (xMutexAlloc,
+** xMutexFree, xMutexEnter, xMutexTry, xMutexLeave, xMutexHeld and
+** xMutexNotheld) implement the following interfaces (respectively):
+**
+** <ul>
+**   <li>  [sqlite3_mutex_alloc()] </li>
+**   <li>  [sqlite3_mutex_free()] </li>
+**   <li>  [sqlite3_mutex_enter()] </li>
+**   <li>  [sqlite3_mutex_try()] </li>
+**   <li>  [sqlite3_mutex_leave()] </li>
+**   <li>  [sqlite3_mutex_held()] </li>
+**   <li>  [sqlite3_mutex_notheld()] </li>
+** </ul>)^
+**
+** The only difference is that the public sqlite3_XXX functions enumerated
+** above silently ignore any invocations that pass a NULL pointer instead
+** of a valid mutex handle. The implementations of the methods defined
+** by this structure are not required to handle this case, the results
+** of passing a NULL pointer instead of a valid mutex handle are undefined
+** (i.e. it is acceptable to provide an implementation that segfaults if
+** it is passed a NULL pointer).
+**
+** The xMutexInit() method must be threadsafe.  It must be harmless to
+** invoke xMutexInit() multiple times within the same process and without
+** intervening calls to xMutexEnd().  Second and subsequent calls to
+** xMutexInit() must be no-ops.
+**
+** xMutexInit() must not use SQLite memory allocation ([sqlite3_malloc()]
+** and its associates).  Similarly, xMutexAlloc() must not use SQLite memory
+** allocation for a static mutex.  ^However xMutexAlloc() may use SQLite
+** memory allocation for a fast or recursive mutex.
+**
+** ^SQLite will invoke the xMutexEnd() method when [sqlite3_shutdown()] is
+** called, but only if the prior call to xMutexInit returned SQLITE_OK.
+** If xMutexInit fails in any way, it is expected to clean up after itself
+** prior to returning.
+*/
+typedef struct sqlite3_mutex_methods sqlite3_mutex_methods;
+struct sqlite3_mutex_methods {
+  int (*xMutexInit)(void);
+  int (*xMutexEnd)(void);
+  sqlite3_mutex *(*xMutexAlloc)(int);
+  void (*xMutexFree)(sqlite3_mutex *);
+  void (*xMutexEnter)(sqlite3_mutex *);
+  int (*xMutexTry)(sqlite3_mutex *);
+  void (*xMutexLeave)(sqlite3_mutex *);
+  int (*xMutexHeld)(sqlite3_mutex *);
+  int (*xMutexNotheld)(sqlite3_mutex *);
+};
+
+/*
+** CAPI3REF: Mutex Verification Routines
+**
+** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routines
+** are intended for use inside assert() statements.  The SQLite core
+** never uses these routines except inside an assert() and applications
+** are advised to follow the lead of the core.  The SQLite core only
+** provides implementations for these routines when it is compiled
+** with the SQLITE_DEBUG flag.  External mutex implementations
+** are only required to provide these routines if SQLITE_DEBUG is
+** defined and if NDEBUG is not defined.
+**
+** These routines should return true if the mutex in their argument
+** is held or not held, respectively, by the calling thread.
+**
+** The implementation is not required to provide versions of these
+** routines that actually work. If the implementation does not provide working
+** versions of these routines, it should at least provide stubs that always
+** return true so that one does not get spurious assertion failures.
+**
+** If the argument to sqlite3_mutex_held() is a NULL pointer then
+** the routine should return 1.   This seems counter-intuitive since
+** clearly the mutex cannot be held if it does not exist.  But
+** the reason the mutex does not exist is because the build is not
+** using mutexes.  And we do not want the assert() containing the
+** call to sqlite3_mutex_held() to fail, so a non-zero return is
+** the appropriate thing to do.  The sqlite3_mutex_notheld()
+** interface should also return 1 when given a NULL pointer.
+*/
+#ifndef NDEBUG
+SQLITE_API int sqlite3_mutex_held(sqlite3_mutex*);
+SQLITE_API int sqlite3_mutex_notheld(sqlite3_mutex*);
+#endif
+
+/*
+** CAPI3REF: Mutex Types
+**
+** The [sqlite3_mutex_alloc()] interface takes a single argument
+** which is one of these integer constants.
+**
+** The set of static mutexes may change from one SQLite release to the
+** next.  Applications that override the built-in mutex logic must be
+** prepared to accommodate additional static mutexes.
+*/
+#define SQLITE_MUTEX_FAST             0
+#define SQLITE_MUTEX_RECURSIVE        1
+#define SQLITE_MUTEX_STATIC_MASTER    2
+#define SQLITE_MUTEX_STATIC_MEM       3  /* sqlite3_malloc() */
+#define SQLITE_MUTEX_STATIC_MEM2      4  /* NOT USED */
+#define SQLITE_MUTEX_STATIC_OPEN      4  /* sqlite3BtreeOpen() */
+#define SQLITE_MUTEX_STATIC_PRNG      5  /* sqlite3_random() */
+#define SQLITE_MUTEX_STATIC_LRU       6  /* lru page list */
+#define SQLITE_MUTEX_STATIC_LRU2      7  /* NOT USED */
+#define SQLITE_MUTEX_STATIC_PMEM      7  /* sqlite3PageMalloc() */
+#define SQLITE_MUTEX_STATIC_APP1      8  /* For use by application */
+#define SQLITE_MUTEX_STATIC_APP2      9  /* For use by application */
+#define SQLITE_MUTEX_STATIC_APP3     10  /* For use by application */
+
+/*
+** CAPI3REF: Retrieve the mutex for a database connection
+**
+** ^This interface returns a pointer the [sqlite3_mutex] object that 
+** serializes access to the [database connection] given in the argument
+** when the [threading mode] is Serialized.
+** ^If the [threading mode] is Single-thread or Multi-thread then this
+** routine returns a NULL pointer.
+*/
+SQLITE_API sqlite3_mutex *sqlite3_db_mutex(sqlite3*);
+
+/*
+** CAPI3REF: Low-Level Control Of Database Files
+**
+** ^The [sqlite3_file_control()] interface makes a direct call to the
+** xFileControl method for the [sqlite3_io_methods] object associated
+** with a particular database identified by the second argument. ^The
+** name of the database is "main" for the main database or "temp" for the
+** TEMP database, or the name that appears after the AS keyword for
+** databases that are added using the [ATTACH] SQL command.
+** ^A NULL pointer can be used in place of "main" to refer to the
+** main database file.
+** ^The third and fourth parameters to this routine
+** are passed directly through to the second and third parameters of
+** the xFileControl method.  ^The return value of the xFileControl
+** method becomes the return value of this routine.
+**
+** ^The SQLITE_FCNTL_FILE_POINTER value for the op parameter causes
+** a pointer to the underlying [sqlite3_file] object to be written into
+** the space pointed to by the 4th parameter.  ^The SQLITE_FCNTL_FILE_POINTER
+** case is a short-circuit path which does not actually invoke the
+** underlying sqlite3_io_methods.xFileControl method.
+**
+** ^If the second parameter (zDbName) does not match the name of any
+** open database file, then SQLITE_ERROR is returned.  ^This error
+** code is not remembered and will not be recalled by [sqlite3_errcode()]
+** or [sqlite3_errmsg()].  The underlying xFileControl method might
+** also return SQLITE_ERROR.  There is no way to distinguish between
+** an incorrect zDbName and an SQLITE_ERROR return from the underlying
+** xFileControl method.
+**
+** See also: [SQLITE_FCNTL_LOCKSTATE]
+*/
+SQLITE_API int sqlite3_file_control(sqlite3*, const char *zDbName, int op, void*);
+
+/*
+** CAPI3REF: Testing Interface
+**
+** ^The sqlite3_test_control() interface is used to read out internal
+** state of SQLite and to inject faults into SQLite for testing
+** purposes.  ^The first parameter is an operation code that determines
+** the number, meaning, and operation of all subsequent parameters.
+**
+** This interface is not for use by applications.  It exists solely
+** for verifying the correct operation of the SQLite library.  Depending
+** on how the SQLite library is compiled, this interface might not exist.
+**
+** The details of the operation codes, their meanings, the parameters
+** they take, and what they do are all subject to change without notice.
+** Unlike most of the SQLite API, this function is not guaranteed to
+** operate consistently from one release to the next.
+*/
+SQLITE_API int sqlite3_test_control(int op, ...);
+
+/*
+** CAPI3REF: Testing Interface Operation Codes
+**
+** These constants are the valid operation code parameters used
+** as the first argument to [sqlite3_test_control()].
+**
+** These parameters and their meanings are subject to change
+** without notice.  These values are for testing purposes only.
+** Applications should not use any of these parameters or the
+** [sqlite3_test_control()] interface.
+*/
+#define SQLITE_TESTCTRL_FIRST                    5
+#define SQLITE_TESTCTRL_PRNG_SAVE                5
+#define SQLITE_TESTCTRL_PRNG_RESTORE             6
+#define SQLITE_TESTCTRL_PRNG_RESET               7
+#define SQLITE_TESTCTRL_BITVEC_TEST              8
+#define SQLITE_TESTCTRL_FAULT_INSTALL            9
+#define SQLITE_TESTCTRL_BENIGN_MALLOC_HOOKS     10
+#define SQLITE_TESTCTRL_PENDING_BYTE            11
+#define SQLITE_TESTCTRL_ASSERT                  12
+#define SQLITE_TESTCTRL_ALWAYS                  13
+#define SQLITE_TESTCTRL_RESERVE                 14
+#define SQLITE_TESTCTRL_OPTIMIZATIONS           15
+#define SQLITE_TESTCTRL_ISKEYWORD               16
+#define SQLITE_TESTCTRL_SCRATCHMALLOC           17
+#define SQLITE_TESTCTRL_LOCALTIME_FAULT         18
+#define SQLITE_TESTCTRL_EXPLAIN_STMT            19  /* NOT USED */
+#define SQLITE_TESTCTRL_NEVER_CORRUPT           20
+#define SQLITE_TESTCTRL_VDBE_COVERAGE           21
+#define SQLITE_TESTCTRL_BYTEORDER               22
+#define SQLITE_TESTCTRL_ISINIT                  23
+#define SQLITE_TESTCTRL_SORTER_MMAP             24
+#define SQLITE_TESTCTRL_LAST                    24
+
+/*
+** CAPI3REF: SQLite Runtime Status
+**
+** ^This interface is used to retrieve runtime status information
+** about the performance of SQLite, and optionally to reset various
+** highwater marks.  ^The first argument is an integer code for
+** the specific parameter to measure.  ^(Recognized integer codes
+** are of the form [status parameters | SQLITE_STATUS_...].)^
+** ^The current value of the parameter is returned into *pCurrent.
+** ^The highest recorded value is returned in *pHighwater.  ^If the
+** resetFlag is true, then the highest record value is reset after
+** *pHighwater is written.  ^(Some parameters do not record the highest
+** value.  For those parameters
+** nothing is written into *pHighwater and the resetFlag is ignored.)^
+** ^(Other parameters record only the highwater mark and not the current
+** value.  For these latter parameters nothing is written into *pCurrent.)^
+**
+** ^The sqlite3_status() routine returns SQLITE_OK on success and a
+** non-zero [error code] on failure.
+**
+** This routine is threadsafe but is not atomic.  This routine can be
+** called while other threads are running the same or different SQLite
+** interfaces.  However the values returned in *pCurrent and
+** *pHighwater reflect the status of SQLite at different points in time
+** and it is possible that another thread might change the parameter
+** in between the times when *pCurrent and *pHighwater are written.
+**
+** See also: [sqlite3_db_status()]
+*/
+SQLITE_API int sqlite3_status(int op, int *pCurrent, int *pHighwater, int resetFlag);
+
+
+/*
+** CAPI3REF: Status Parameters
+** KEYWORDS: {status parameters}
+**
+** These integer constants designate various run-time status parameters
+** that can be returned by [sqlite3_status()].
+**
+** <dl>
+** [[SQLITE_STATUS_MEMORY_USED]] ^(<dt>SQLITE_STATUS_MEMORY_USED</dt>
+** <dd>This parameter is the current amount of memory checked out
+** using [sqlite3_malloc()], either directly or indirectly.  The
+** figure includes calls made to [sqlite3_malloc()] by the application
+** and internal memory usage by the SQLite library.  Scratch memory
+** controlled by [SQLITE_CONFIG_SCRATCH] and auxiliary page-cache
+** memory controlled by [SQLITE_CONFIG_PAGECACHE] is not included in
+** this parameter.  The amount returned is the sum of the allocation
+** sizes as reported by the xSize method in [sqlite3_mem_methods].</dd>)^
+**
+** [[SQLITE_STATUS_MALLOC_SIZE]] ^(<dt>SQLITE_STATUS_MALLOC_SIZE</dt>
+** <dd>This parameter records the largest memory allocation request
+** handed to [sqlite3_malloc()] or [sqlite3_realloc()] (or their
+** internal equivalents).  Only the value returned in the
+** *pHighwater parameter to [sqlite3_status()] is of interest.  
+** The value written into the *pCurrent parameter is undefined.</dd>)^
+**
+** [[SQLITE_STATUS_MALLOC_COUNT]] ^(<dt>SQLITE_STATUS_MALLOC_COUNT</dt>
+** <dd>This parameter records the number of separate memory allocations
+** currently checked out.</dd>)^
+**
+** [[SQLITE_STATUS_PAGECACHE_USED]] ^(<dt>SQLITE_STATUS_PAGECACHE_USED</dt>
+** <dd>This parameter returns the number of pages used out of the
+** [pagecache memory allocator] that was configured using 
+** [SQLITE_CONFIG_PAGECACHE].  The
+** value returned is in pages, not in bytes.</dd>)^
+**
+** [[SQLITE_STATUS_PAGECACHE_OVERFLOW]] 
+** ^(<dt>SQLITE_STATUS_PAGECACHE_OVERFLOW</dt>
+** <dd>This parameter returns the number of bytes of page cache
+** allocation which could not be satisfied by the [SQLITE_CONFIG_PAGECACHE]
+** buffer and where forced to overflow to [sqlite3_malloc()].  The
+** returned value includes allocations that overflowed because they
+** where too large (they were larger than the "sz" parameter to
+** [SQLITE_CONFIG_PAGECACHE]) and allocations that overflowed because
+** no space was left in the page cache.</dd>)^
+**
+** [[SQLITE_STATUS_PAGECACHE_SIZE]] ^(<dt>SQLITE_STATUS_PAGECACHE_SIZE</dt>
+** <dd>This parameter records the largest memory allocation request
+** handed to [pagecache memory allocator].  Only the value returned in the
+** *pHighwater parameter to [sqlite3_status()] is of interest.  
+** The value written into the *pCurrent parameter is undefined.</dd>)^
+**
+** [[SQLITE_STATUS_SCRATCH_USED]] ^(<dt>SQLITE_STATUS_SCRATCH_USED</dt>
+** <dd>This parameter returns the number of allocations used out of the
+** [scratch memory allocator] configured using
+** [SQLITE_CONFIG_SCRATCH].  The value returned is in allocations, not
+** in bytes.  Since a single thread may only have one scratch allocation
+** outstanding at time, this parameter also reports the number of threads
+** using scratch memory at the same time.</dd>)^
+**
+** [[SQLITE_STATUS_SCRATCH_OVERFLOW]] ^(<dt>SQLITE_STATUS_SCRATCH_OVERFLOW</dt>
+** <dd>This parameter returns the number of bytes of scratch memory
+** allocation which could not be satisfied by the [SQLITE_CONFIG_SCRATCH]
+** buffer and where forced to overflow to [sqlite3_malloc()].  The values
+** returned include overflows because the requested allocation was too
+** larger (that is, because the requested allocation was larger than the
+** "sz" parameter to [SQLITE_CONFIG_SCRATCH]) and because no scratch buffer
+** slots were available.
+** </dd>)^
+**
+** [[SQLITE_STATUS_SCRATCH_SIZE]] ^(<dt>SQLITE_STATUS_SCRATCH_SIZE</dt>
+** <dd>This parameter records the largest memory allocation request
+** handed to [scratch memory allocator].  Only the value returned in the
+** *pHighwater parameter to [sqlite3_status()] is of interest.  
+** The value written into the *pCurrent parameter is undefined.</dd>)^
+**
+** [[SQLITE_STATUS_PARSER_STACK]] ^(<dt>SQLITE_STATUS_PARSER_STACK</dt>
+** <dd>This parameter records the deepest parser stack.  It is only
+** meaningful if SQLite is compiled with [YYTRACKMAXSTACKDEPTH].</dd>)^
+** </dl>
+**
+** New status parameters may be added from time to time.
+*/
+#define SQLITE_STATUS_MEMORY_USED          0
+#define SQLITE_STATUS_PAGECACHE_USED       1
+#define SQLITE_STATUS_PAGECACHE_OVERFLOW   2
+#define SQLITE_STATUS_SCRATCH_USED         3
+#define SQLITE_STATUS_SCRATCH_OVERFLOW     4
+#define SQLITE_STATUS_MALLOC_SIZE          5
+#define SQLITE_STATUS_PARSER_STACK         6
+#define SQLITE_STATUS_PAGECACHE_SIZE       7
+#define SQLITE_STATUS_SCRATCH_SIZE         8
+#define SQLITE_STATUS_MALLOC_COUNT         9
+
+/*
+** CAPI3REF: Database Connection Status
+**
+** ^This interface is used to retrieve runtime status information 
+** about a single [database connection].  ^The first argument is the
+** database connection object to be interrogated.  ^The second argument
+** is an integer constant, taken from the set of
+** [SQLITE_DBSTATUS options], that
+** determines the parameter to interrogate.  The set of 
+** [SQLITE_DBSTATUS options] is likely
+** to grow in future releases of SQLite.
+**
+** ^The current value of the requested parameter is written into *pCur
+** and the highest instantaneous value is written into *pHiwtr.  ^If
+** the resetFlg is true, then the highest instantaneous value is
+** reset back down to the current value.
+**
+** ^The sqlite3_db_status() routine returns SQLITE_OK on success and a
+** non-zero [error code] on failure.
+**
+** See also: [sqlite3_status()] and [sqlite3_stmt_status()].
+*/
+SQLITE_API int sqlite3_db_status(sqlite3*, int op, int *pCur, int *pHiwtr, int resetFlg);
+
+/*
+** CAPI3REF: Status Parameters for database connections
+** KEYWORDS: {SQLITE_DBSTATUS options}
+**
+** These constants are the available integer "verbs" that can be passed as
+** the second argument to the [sqlite3_db_status()] interface.
+**
+** New verbs may be added in future releases of SQLite. Existing verbs
+** might be discontinued. Applications should check the return code from
+** [sqlite3_db_status()] to make sure that the call worked.
+** The [sqlite3_db_status()] interface will return a non-zero error code
+** if a discontinued or unsupported verb is invoked.
+**
+** <dl>
+** [[SQLITE_DBSTATUS_LOOKASIDE_USED]] ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_USED</dt>
+** <dd>This parameter returns the number of lookaside memory slots currently
+** checked out.</dd>)^
+**
+** [[SQLITE_DBSTATUS_LOOKASIDE_HIT]] ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_HIT</dt>
+** <dd>This parameter returns the number malloc attempts that were 
+** satisfied using lookaside memory. Only the high-water value is meaningful;
+** the current value is always zero.)^
+**
+** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE]]
+** ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE</dt>
+** <dd>This parameter returns the number malloc attempts that might have
+** been satisfied using lookaside memory but failed due to the amount of
+** memory requested being larger than the lookaside slot size.
+** Only the high-water value is meaningful;
+** the current value is always zero.)^
+**
+** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL]]
+** ^(<dt>SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL</dt>
+** <dd>This parameter returns the number malloc attempts that might have
+** been satisfied using lookaside memory but failed due to all lookaside
+** memory already being in use.
+** Only the high-water value is meaningful;
+** the current value is always zero.)^
+**
+** [[SQLITE_DBSTATUS_CACHE_USED]] ^(<dt>SQLITE_DBSTATUS_CACHE_USED</dt>
+** <dd>This parameter returns the approximate number of bytes of heap
+** memory used by all pager caches associated with the database connection.)^
+** ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_USED is always 0.
+**
+** [[SQLITE_DBSTATUS_SCHEMA_USED]] ^(<dt>SQLITE_DBSTATUS_SCHEMA_USED</dt>
+** <dd>This parameter returns the approximate number of bytes of heap
+** memory used to store the schema for all databases associated
+** with the connection - main, temp, and any [ATTACH]-ed databases.)^ 
+** ^The full amount of memory used by the schemas is reported, even if the
+** schema memory is shared with other database connections due to
+** [shared cache mode] being enabled.
+** ^The highwater mark associated with SQLITE_DBSTATUS_SCHEMA_USED is always 0.
+**
+** [[SQLITE_DBSTATUS_STMT_USED]] ^(<dt>SQLITE_DBSTATUS_STMT_USED</dt>
+** <dd>This parameter returns the approximate number of bytes of heap
+** and lookaside memory used by all prepared statements associated with
+** the database connection.)^
+** ^The highwater mark associated with SQLITE_DBSTATUS_STMT_USED is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_HIT]] ^(<dt>SQLITE_DBSTATUS_CACHE_HIT</dt>
+** <dd>This parameter returns the number of pager cache hits that have
+** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_HIT 
+** is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_MISS]] ^(<dt>SQLITE_DBSTATUS_CACHE_MISS</dt>
+** <dd>This parameter returns the number of pager cache misses that have
+** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_MISS 
+** is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_WRITE]] ^(<dt>SQLITE_DBSTATUS_CACHE_WRITE</dt>
+** <dd>This parameter returns the number of dirty cache entries that have
+** been written to disk. Specifically, the number of pages written to the
+** wal file in wal mode databases, or the number of pages written to the
+** database file in rollback mode databases. Any pages written as part of
+** transaction rollback or database recovery operations are not included.
+** If an IO or other error occurs while writing a page to disk, the effect
+** on subsequent SQLITE_DBSTATUS_CACHE_WRITE requests is undefined.)^ ^The
+** highwater mark associated with SQLITE_DBSTATUS_CACHE_WRITE is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_DEFERRED_FKS]] ^(<dt>SQLITE_DBSTATUS_DEFERRED_FKS</dt>
+** <dd>This parameter returns zero for the current value if and only if
+** all foreign key constraints (deferred or immediate) have been
+** resolved.)^  ^The highwater mark is always 0.
+** </dd>
+** </dl>
+*/
+#define SQLITE_DBSTATUS_LOOKASIDE_USED       0
+#define SQLITE_DBSTATUS_CACHE_USED           1
+#define SQLITE_DBSTATUS_SCHEMA_USED          2
+#define SQLITE_DBSTATUS_STMT_USED            3
+#define SQLITE_DBSTATUS_LOOKASIDE_HIT        4
+#define SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE  5
+#define SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL  6
+#define SQLITE_DBSTATUS_CACHE_HIT            7
+#define SQLITE_DBSTATUS_CACHE_MISS           8
+#define SQLITE_DBSTATUS_CACHE_WRITE          9
+#define SQLITE_DBSTATUS_DEFERRED_FKS        10
+#define SQLITE_DBSTATUS_MAX                 10   /* Largest defined DBSTATUS */
+
+
+/*
+** CAPI3REF: Prepared Statement Status
+**
+** ^(Each prepared statement maintains various
+** [SQLITE_STMTSTATUS counters] that measure the number
+** of times it has performed specific operations.)^  These counters can
+** be used to monitor the performance characteristics of the prepared
+** statements.  For example, if the number of table steps greatly exceeds
+** the number of table searches or result rows, that would tend to indicate
+** that the prepared statement is using a full table scan rather than
+** an index.  
+**
+** ^(This interface is used to retrieve and reset counter values from
+** a [prepared statement].  The first argument is the prepared statement
+** object to be interrogated.  The second argument
+** is an integer code for a specific [SQLITE_STMTSTATUS counter]
+** to be interrogated.)^
+** ^The current value of the requested counter is returned.
+** ^If the resetFlg is true, then the counter is reset to zero after this
+** interface call returns.
+**
+** See also: [sqlite3_status()] and [sqlite3_db_status()].
+*/
+SQLITE_API int sqlite3_stmt_status(sqlite3_stmt*, int op,int resetFlg);
+
+/*
+** CAPI3REF: Status Parameters for prepared statements
+** KEYWORDS: {SQLITE_STMTSTATUS counter} {SQLITE_STMTSTATUS counters}
+**
+** These preprocessor macros define integer codes that name counter
+** values associated with the [sqlite3_stmt_status()] interface.
+** The meanings of the various counters are as follows:
+**
+** <dl>
+** [[SQLITE_STMTSTATUS_FULLSCAN_STEP]] <dt>SQLITE_STMTSTATUS_FULLSCAN_STEP</dt>
+** <dd>^This is the number of times that SQLite has stepped forward in
+** a table as part of a full table scan.  Large numbers for this counter
+** may indicate opportunities for performance improvement through 
+** careful use of indices.</dd>
+**
+** [[SQLITE_STMTSTATUS_SORT]] <dt>SQLITE_STMTSTATUS_SORT</dt>
+** <dd>^This is the number of sort operations that have occurred.
+** A non-zero value in this counter may indicate an opportunity to
+** improvement performance through careful use of indices.</dd>
+**
+** [[SQLITE_STMTSTATUS_AUTOINDEX]] <dt>SQLITE_STMTSTATUS_AUTOINDEX</dt>
+** <dd>^This is the number of rows inserted into transient indices that
+** were created automatically in order to help joins run faster.
+** A non-zero value in this counter may indicate an opportunity to
+** improvement performance by adding permanent indices that do not
+** need to be reinitialized each time the statement is run.</dd>
+**
+** [[SQLITE_STMTSTATUS_VM_STEP]] <dt>SQLITE_STMTSTATUS_VM_STEP</dt>
+** <dd>^This is the number of virtual machine operations executed
+** by the prepared statement if that number is less than or equal
+** to 2147483647.  The number of virtual machine operations can be 
+** used as a proxy for the total work done by the prepared statement.
+** If the number of virtual machine operations exceeds 2147483647
+** then the value returned by this statement status code is undefined.
+** </dd>
+** </dl>
+*/
+#define SQLITE_STMTSTATUS_FULLSCAN_STEP     1
+#define SQLITE_STMTSTATUS_SORT              2
+#define SQLITE_STMTSTATUS_AUTOINDEX         3
+#define SQLITE_STMTSTATUS_VM_STEP           4
+
+/*
+** CAPI3REF: Custom Page Cache Object
+**
+** The sqlite3_pcache type is opaque.  It is implemented by
+** the pluggable module.  The SQLite core has no knowledge of
+** its size or internal structure and never deals with the
+** sqlite3_pcache object except by holding and passing pointers
+** to the object.
+**
+** See [sqlite3_pcache_methods2] for additional information.
+*/
+typedef struct sqlite3_pcache sqlite3_pcache;
+
+/*
+** CAPI3REF: Custom Page Cache Object
+**
+** The sqlite3_pcache_page object represents a single page in the
+** page cache.  The page cache will allocate instances of this
+** object.  Various methods of the page cache use pointers to instances
+** of this object as parameters or as their return value.
+**
+** See [sqlite3_pcache_methods2] for additional information.
+*/
+typedef struct sqlite3_pcache_page sqlite3_pcache_page;
+struct sqlite3_pcache_page {
+  void *pBuf;        /* The content of the page */
+  void *pExtra;      /* Extra information associated with the page */
+};
+
+/*
+** CAPI3REF: Application Defined Page Cache.
+** KEYWORDS: {page cache}
+**
+** ^(The [sqlite3_config]([SQLITE_CONFIG_PCACHE2], ...) interface can
+** register an alternative page cache implementation by passing in an 
+** instance of the sqlite3_pcache_methods2 structure.)^
+** In many applications, most of the heap memory allocated by 
+** SQLite is used for the page cache.
+** By implementing a 
+** custom page cache using this API, an application can better control
+** the amount of memory consumed by SQLite, the way in which 
+** that memory is allocated and released, and the policies used to 
+** determine exactly which parts of a database file are cached and for 
+** how long.
+**
+** The alternative page cache mechanism is an
+** extreme measure that is only needed by the most demanding applications.
+** The built-in page cache is recommended for most uses.
+**
+** ^(The contents of the sqlite3_pcache_methods2 structure are copied to an
+** internal buffer by SQLite within the call to [sqlite3_config].  Hence
+** the application may discard the parameter after the call to
+** [sqlite3_config()] returns.)^
+**
+** [[the xInit() page cache method]]
+** ^(The xInit() method is called once for each effective 
+** call to [sqlite3_initialize()])^
+** (usually only once during the lifetime of the process). ^(The xInit()
+** method is passed a copy of the sqlite3_pcache_methods2.pArg value.)^
+** The intent of the xInit() method is to set up global data structures 
+** required by the custom page cache implementation. 
+** ^(If the xInit() method is NULL, then the 
+** built-in default page cache is used instead of the application defined
+** page cache.)^
+**
+** [[the xShutdown() page cache method]]
+** ^The xShutdown() method is called by [sqlite3_shutdown()].
+** It can be used to clean up 
+** any outstanding resources before process shutdown, if required.
+** ^The xShutdown() method may be NULL.
+**
+** ^SQLite automatically serializes calls to the xInit method,
+** so the xInit method need not be threadsafe.  ^The
+** xShutdown method is only called from [sqlite3_shutdown()] so it does
+** not need to be threadsafe either.  All other methods must be threadsafe
+** in multithreaded applications.
+**
+** ^SQLite will never invoke xInit() more than once without an intervening
+** call to xShutdown().
+**
+** [[the xCreate() page cache methods]]
+** ^SQLite invokes the xCreate() method to construct a new cache instance.
+** SQLite will typically create one cache instance for each open database file,
+** though this is not guaranteed. ^The
+** first parameter, szPage, is the size in bytes of the pages that must
+** be allocated by the cache.  ^szPage will always a power of two.  ^The
+** second parameter szExtra is a number of bytes of extra storage 
+** associated with each page cache entry.  ^The szExtra parameter will
+** a number less than 250.  SQLite will use the
+** extra szExtra bytes on each page to store metadata about the underlying
+** database page on disk.  The value passed into szExtra depends
+** on the SQLite version, the target platform, and how SQLite was compiled.
+** ^The third argument to xCreate(), bPurgeable, is true if the cache being
+** created will be used to cache database pages of a file stored on disk, or
+** false if it is used for an in-memory database. The cache implementation
+** does not have to do anything special based with the value of bPurgeable;
+** it is purely advisory.  ^On a cache where bPurgeable is false, SQLite will
+** never invoke xUnpin() except to deliberately delete a page.
+** ^In other words, calls to xUnpin() on a cache with bPurgeable set to
+** false will always have the "discard" flag set to true.  
+** ^Hence, a cache created with bPurgeable false will
+** never contain any unpinned pages.
+**
+** [[the xCachesize() page cache method]]
+** ^(The xCachesize() method may be called at any time by SQLite to set the
+** suggested maximum cache-size (number of pages stored by) the cache
+** instance passed as the first argument. This is the value configured using
+** the SQLite "[PRAGMA cache_size]" command.)^  As with the bPurgeable
+** parameter, the implementation is not required to do anything with this
+** value; it is advisory only.
+**
+** [[the xPagecount() page cache methods]]
+** The xPagecount() method must return the number of pages currently
+** stored in the cache, both pinned and unpinned.
+** 
+** [[the xFetch() page cache methods]]
+** The xFetch() method locates a page in the cache and returns a pointer to 
+** an sqlite3_pcache_page object associated with that page, or a NULL pointer.
+** The pBuf element of the returned sqlite3_pcache_page object will be a
+** pointer to a buffer of szPage bytes used to store the content of a 
+** single database page.  The pExtra element of sqlite3_pcache_page will be
+** a pointer to the szExtra bytes of extra storage that SQLite has requested
+** for each entry in the page cache.
+**
+** The page to be fetched is determined by the key. ^The minimum key value
+** is 1.  After it has been retrieved using xFetch, the page is considered
+** to be "pinned".
+**
+** If the requested page is already in the page cache, then the page cache
+** implementation must return a pointer to the page buffer with its content
+** intact.  If the requested page is not already in the cache, then the
+** cache implementation should use the value of the createFlag
+** parameter to help it determined what action to take:
+**
+** <table border=1 width=85% align=center>
+** <tr><th> createFlag <th> Behavior when page is not already in cache
+** <tr><td> 0 <td> Do not allocate a new page.  Return NULL.
+** <tr><td> 1 <td> Allocate a new page if it easy and convenient to do so.
+**                 Otherwise return NULL.
+** <tr><td> 2 <td> Make every effort to allocate a new page.  Only return
+**                 NULL if allocating a new page is effectively impossible.
+** </table>
+**
+** ^(SQLite will normally invoke xFetch() with a createFlag of 0 or 1.  SQLite
+** will only use a createFlag of 2 after a prior call with a createFlag of 1
+** failed.)^  In between the to xFetch() calls, SQLite may
+** attempt to unpin one or more cache pages by spilling the content of
+** pinned pages to disk and synching the operating system disk cache.
+**
+** [[the xUnpin() page cache method]]
+** ^xUnpin() is called by SQLite with a pointer to a currently pinned page
+** as its second argument.  If the third parameter, discard, is non-zero,
+** then the page must be evicted from the cache.
+** ^If the discard parameter is
+** zero, then the page may be discarded or retained at the discretion of
+** page cache implementation. ^The page cache implementation
+** may choose to evict unpinned pages at any time.
+**
+** The cache must not perform any reference counting. A single 
+** call to xUnpin() unpins the page regardless of the number of prior calls 
+** to xFetch().
+**
+** [[the xRekey() page cache methods]]
+** The xRekey() method is used to change the key value associated with the
+** page passed as the second argument. If the cache
+** previously contains an entry associated with newKey, it must be
+** discarded. ^Any prior cache entry associated with newKey is guaranteed not
+** to be pinned.
+**
+** When SQLite calls the xTruncate() method, the cache must discard all
+** existing cache entries with page numbers (keys) greater than or equal
+** to the value of the iLimit parameter passed to xTruncate(). If any
+** of these pages are pinned, they are implicitly unpinned, meaning that
+** they can be safely discarded.
+**
+** [[the xDestroy() page cache method]]
+** ^The xDestroy() method is used to delete a cache allocated by xCreate().
+** All resources associated with the specified cache should be freed. ^After
+** calling the xDestroy() method, SQLite considers the [sqlite3_pcache*]
+** handle invalid, and will not use it with any other sqlite3_pcache_methods2
+** functions.
+**
+** [[the xShrink() page cache method]]
+** ^SQLite invokes the xShrink() method when it wants the page cache to
+** free up as much of heap memory as possible.  The page cache implementation
+** is not obligated to free any memory, but well-behaved implementations should
+** do their best.
+*/
+typedef struct sqlite3_pcache_methods2 sqlite3_pcache_methods2;
+struct sqlite3_pcache_methods2 {
+  int iVersion;
+  void *pArg;
+  int (*xInit)(void*);
+  void (*xShutdown)(void*);
+  sqlite3_pcache *(*xCreate)(int szPage, int szExtra, int bPurgeable);
+  void (*xCachesize)(sqlite3_pcache*, int nCachesize);
+  int (*xPagecount)(sqlite3_pcache*);
+  sqlite3_pcache_page *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag);
+  void (*xUnpin)(sqlite3_pcache*, sqlite3_pcache_page*, int discard);
+  void (*xRekey)(sqlite3_pcache*, sqlite3_pcache_page*, 
+      unsigned oldKey, unsigned newKey);
+  void (*xTruncate)(sqlite3_pcache*, unsigned iLimit);
+  void (*xDestroy)(sqlite3_pcache*);
+  void (*xShrink)(sqlite3_pcache*);
+};
+
+/*
+** This is the obsolete pcache_methods object that has now been replaced
+** by sqlite3_pcache_methods2.  This object is not used by SQLite.  It is
+** retained in the header file for backwards compatibility only.
+*/
+typedef struct sqlite3_pcache_methods sqlite3_pcache_methods;
+struct sqlite3_pcache_methods {
+  void *pArg;
+  int (*xInit)(void*);
+  void (*xShutdown)(void*);
+  sqlite3_pcache *(*xCreate)(int szPage, int bPurgeable);
+  void (*xCachesize)(sqlite3_pcache*, int nCachesize);
+  int (*xPagecount)(sqlite3_pcache*);
+  void *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag);
+  void (*xUnpin)(sqlite3_pcache*, void*, int discard);
+  void (*xRekey)(sqlite3_pcache*, void*, unsigned oldKey, unsigned newKey);
+  void (*xTruncate)(sqlite3_pcache*, unsigned iLimit);
+  void (*xDestroy)(sqlite3_pcache*);
+};
+
+
+/*
+** CAPI3REF: Online Backup Object
+**
+** The sqlite3_backup object records state information about an ongoing
+** online backup operation.  ^The sqlite3_backup object is created by
+** a call to [sqlite3_backup_init()] and is destroyed by a call to
+** [sqlite3_backup_finish()].
+**
+** See Also: [Using the SQLite Online Backup API]
+*/
+typedef struct sqlite3_backup sqlite3_backup;
+
+/*
+** CAPI3REF: Online Backup API.
+**
+** The backup API copies the content of one database into another.
+** It is useful either for creating backups of databases or
+** for copying in-memory databases to or from persistent files. 
+**
+** See Also: [Using the SQLite Online Backup API]
+**
+** ^SQLite holds a write transaction open on the destination database file
+** for the duration of the backup operation.
+** ^The source database is read-locked only while it is being read;
+** it is not locked continuously for the entire backup operation.
+** ^Thus, the backup may be performed on a live source database without
+** preventing other database connections from
+** reading or writing to the source database while the backup is underway.
+** 
+** ^(To perform a backup operation: 
+**   <ol>
+**     <li><b>sqlite3_backup_init()</b> is called once to initialize the
+**         backup, 
+**     <li><b>sqlite3_backup_step()</b> is called one or more times to transfer 
+**         the data between the two databases, and finally
+**     <li><b>sqlite3_backup_finish()</b> is called to release all resources 
+**         associated with the backup operation. 
+**   </ol>)^
+** There should be exactly one call to sqlite3_backup_finish() for each
+** successful call to sqlite3_backup_init().
+**
+** [[sqlite3_backup_init()]] <b>sqlite3_backup_init()</b>
+**
+** ^The D and N arguments to sqlite3_backup_init(D,N,S,M) are the 
+** [database connection] associated with the destination database 
+** and the database name, respectively.
+** ^The database name is "main" for the main database, "temp" for the
+** temporary database, or the name specified after the AS keyword in
+** an [ATTACH] statement for an attached database.
+** ^The S and M arguments passed to 
+** sqlite3_backup_init(D,N,S,M) identify the [database connection]
+** and database name of the source database, respectively.
+** ^The source and destination [database connections] (parameters S and D)
+** must be different or else sqlite3_backup_init(D,N,S,M) will fail with
+** an error.
+**
+** ^A call to sqlite3_backup_init() will fail, returning SQLITE_ERROR, if 
+** there is already a read or read-write transaction open on the 
+** destination database.
+**
+** ^If an error occurs within sqlite3_backup_init(D,N,S,M), then NULL is
+** returned and an error code and error message are stored in the
+** destination [database connection] D.
+** ^The error code and message for the failed call to sqlite3_backup_init()
+** can be retrieved using the [sqlite3_errcode()], [sqlite3_errmsg()], and/or
+** [sqlite3_errmsg16()] functions.
+** ^A successful call to sqlite3_backup_init() returns a pointer to an
+** [sqlite3_backup] object.
+** ^The [sqlite3_backup] object may be used with the sqlite3_backup_step() and
+** sqlite3_backup_finish() functions to perform the specified backup 
+** operation.
+**
+** [[sqlite3_backup_step()]] <b>sqlite3_backup_step()</b>
+**
+** ^Function sqlite3_backup_step(B,N) will copy up to N pages between 
+** the source and destination databases specified by [sqlite3_backup] object B.
+** ^If N is negative, all remaining source pages are copied. 
+** ^If sqlite3_backup_step(B,N) successfully copies N pages and there
+** are still more pages to be copied, then the function returns [SQLITE_OK].
+** ^If sqlite3_backup_step(B,N) successfully finishes copying all pages
+** from source to destination, then it returns [SQLITE_DONE].
+** ^If an error occurs while running sqlite3_backup_step(B,N),
+** then an [error code] is returned. ^As well as [SQLITE_OK] and
+** [SQLITE_DONE], a call to sqlite3_backup_step() may return [SQLITE_READONLY],
+** [SQLITE_NOMEM], [SQLITE_BUSY], [SQLITE_LOCKED], or an
+** [SQLITE_IOERR_ACCESS | SQLITE_IOERR_XXX] extended error code.
+**
+** ^(The sqlite3_backup_step() might return [SQLITE_READONLY] if
+** <ol>
+** <li> the destination database was opened read-only, or
+** <li> the destination database is using write-ahead-log journaling
+** and the destination and source page sizes differ, or
+** <li> the destination database is an in-memory database and the
+** destination and source page sizes differ.
+** </ol>)^
+**
+** ^If sqlite3_backup_step() cannot obtain a required file-system lock, then
+** the [sqlite3_busy_handler | busy-handler function]
+** is invoked (if one is specified). ^If the 
+** busy-handler returns non-zero before the lock is available, then 
+** [SQLITE_BUSY] is returned to the caller. ^In this case the call to
+** sqlite3_backup_step() can be retried later. ^If the source
+** [database connection]
+** is being used to write to the source database when sqlite3_backup_step()
+** is called, then [SQLITE_LOCKED] is returned immediately. ^Again, in this
+** case the call to sqlite3_backup_step() can be retried later on. ^(If
+** [SQLITE_IOERR_ACCESS | SQLITE_IOERR_XXX], [SQLITE_NOMEM], or
+** [SQLITE_READONLY] is returned, then 
+** there is no point in retrying the call to sqlite3_backup_step(). These 
+** errors are considered fatal.)^  The application must accept 
+** that the backup operation has failed and pass the backup operation handle 
+** to the sqlite3_backup_finish() to release associated resources.
+**
+** ^The first call to sqlite3_backup_step() obtains an exclusive lock
+** on the destination file. ^The exclusive lock is not released until either 
+** sqlite3_backup_finish() is called or the backup operation is complete 
+** and sqlite3_backup_step() returns [SQLITE_DONE].  ^Every call to
+** sqlite3_backup_step() obtains a [shared lock] on the source database that
+** lasts for the duration of the sqlite3_backup_step() call.
+** ^Because the source database is not locked between calls to
+** sqlite3_backup_step(), the source database may be modified mid-way
+** through the backup process.  ^If the source database is modified by an
+** external process or via a database connection other than the one being
+** used by the backup operation, then the backup will be automatically
+** restarted by the next call to sqlite3_backup_step(). ^If the source 
+** database is modified by the using the same database connection as is used
+** by the backup operation, then the backup database is automatically
+** updated at the same time.
+**
+** [[sqlite3_backup_finish()]] <b>sqlite3_backup_finish()</b>
+**
+** When sqlite3_backup_step() has returned [SQLITE_DONE], or when the 
+** application wishes to abandon the backup operation, the application
+** should destroy the [sqlite3_backup] by passing it to sqlite3_backup_finish().
+** ^The sqlite3_backup_finish() interfaces releases all
+** resources associated with the [sqlite3_backup] object. 
+** ^If sqlite3_backup_step() has not yet returned [SQLITE_DONE], then any
+** active write-transaction on the destination database is rolled back.
+** The [sqlite3_backup] object is invalid
+** and may not be used following a call to sqlite3_backup_finish().
+**
+** ^The value returned by sqlite3_backup_finish is [SQLITE_OK] if no
+** sqlite3_backup_step() errors occurred, regardless or whether or not
+** sqlite3_backup_step() completed.
+** ^If an out-of-memory condition or IO error occurred during any prior
+** sqlite3_backup_step() call on the same [sqlite3_backup] object, then
+** sqlite3_backup_finish() returns the corresponding [error code].
+**
+** ^A return of [SQLITE_BUSY] or [SQLITE_LOCKED] from sqlite3_backup_step()
+** is not a permanent error and does not affect the return value of
+** sqlite3_backup_finish().
+**
+** [[sqlite3_backup__remaining()]] [[sqlite3_backup_pagecount()]]
+** <b>sqlite3_backup_remaining() and sqlite3_backup_pagecount()</b>
+**
+** ^Each call to sqlite3_backup_step() sets two values inside
+** the [sqlite3_backup] object: the number of pages still to be backed
+** up and the total number of pages in the source database file.
+** The sqlite3_backup_remaining() and sqlite3_backup_pagecount() interfaces
+** retrieve these two values, respectively.
+**
+** ^The values returned by these functions are only updated by
+** sqlite3_backup_step(). ^If the source database is modified during a backup
+** operation, then the values are not updated to account for any extra
+** pages that need to be updated or the size of the source database file
+** changing.
+**
+** <b>Concurrent Usage of Database Handles</b>
+**
+** ^The source [database connection] may be used by the application for other
+** purposes while a backup operation is underway or being initialized.
+** ^If SQLite is compiled and configured to support threadsafe database
+** connections, then the source database connection may be used concurrently
+** from within other threads.
+**
+** However, the application must guarantee that the destination 
+** [database connection] is not passed to any other API (by any thread) after 
+** sqlite3_backup_init() is called and before the corresponding call to
+** sqlite3_backup_finish().  SQLite does not currently check to see
+** if the application incorrectly accesses the destination [database connection]
+** and so no error code is reported, but the operations may malfunction
+** nevertheless.  Use of the destination database connection while a
+** backup is in progress might also also cause a mutex deadlock.
+**
+** If running in [shared cache mode], the application must
+** guarantee that the shared cache used by the destination database
+** is not accessed while the backup is running. In practice this means
+** that the application must guarantee that the disk file being 
+** backed up to is not accessed by any connection within the process,
+** not just the specific connection that was passed to sqlite3_backup_init().
+**
+** The [sqlite3_backup] object itself is partially threadsafe. Multiple 
+** threads may safely make multiple concurrent calls to sqlite3_backup_step().
+** However, the sqlite3_backup_remaining() and sqlite3_backup_pagecount()
+** APIs are not strictly speaking threadsafe. If they are invoked at the
+** same time as another thread is invoking sqlite3_backup_step() it is
+** possible that they return invalid values.
+*/
+SQLITE_API sqlite3_backup *sqlite3_backup_init(
+  sqlite3 *pDest,                        /* Destination database handle */
+  const char *zDestName,                 /* Destination database name */
+  sqlite3 *pSource,                      /* Source database handle */
+  const char *zSourceName                /* Source database name */
+);
+SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage);
+SQLITE_API int sqlite3_backup_finish(sqlite3_backup *p);
+SQLITE_API int sqlite3_backup_remaining(sqlite3_backup *p);
+SQLITE_API int sqlite3_backup_pagecount(sqlite3_backup *p);
+
+/*
+** CAPI3REF: Unlock Notification
+**
+** ^When running in shared-cache mode, a database operation may fail with
+** an [SQLITE_LOCKED] error if the required locks on the shared-cache or
+** individual tables within the shared-cache cannot be obtained. See
+** [SQLite Shared-Cache Mode] for a description of shared-cache locking. 
+** ^This API may be used to register a callback that SQLite will invoke 
+** when the connection currently holding the required lock relinquishes it.
+** ^This API is only available if the library was compiled with the
+** [SQLITE_ENABLE_UNLOCK_NOTIFY] C-preprocessor symbol defined.
+**
+** See Also: [Using the SQLite Unlock Notification Feature].
+**
+** ^Shared-cache locks are released when a database connection concludes
+** its current transaction, either by committing it or rolling it back. 
+**
+** ^When a connection (known as the blocked connection) fails to obtain a
+** shared-cache lock and SQLITE_LOCKED is returned to the caller, the
+** identity of the database connection (the blocking connection) that
+** has locked the required resource is stored internally. ^After an 
+** application receives an SQLITE_LOCKED error, it may call the
+** sqlite3_unlock_notify() method with the blocked connection handle as 
+** the first argument to register for a callback that will be invoked
+** when the blocking connections current transaction is concluded. ^The
+** callback is invoked from within the [sqlite3_step] or [sqlite3_close]
+** call that concludes the blocking connections transaction.
+**
+** ^(If sqlite3_unlock_notify() is called in a multi-threaded application,
+** there is a chance that the blocking connection will have already
+** concluded its transaction by the time sqlite3_unlock_notify() is invoked.
+** If this happens, then the specified callback is invoked immediately,
+** from within the call to sqlite3_unlock_notify().)^
+**
+** ^If the blocked connection is attempting to obtain a write-lock on a
+** shared-cache table, and more than one other connection currently holds
+** a read-lock on the same table, then SQLite arbitrarily selects one of 
+** the other connections to use as the blocking connection.
+**
+** ^(There may be at most one unlock-notify callback registered by a 
+** blocked connection. If sqlite3_unlock_notify() is called when the
+** blocked connection already has a registered unlock-notify callback,
+** then the new callback replaces the old.)^ ^If sqlite3_unlock_notify() is
+** called with a NULL pointer as its second argument, then any existing
+** unlock-notify callback is canceled. ^The blocked connections 
+** unlock-notify callback may also be canceled by closing the blocked
+** connection using [sqlite3_close()].
+**
+** The unlock-notify callback is not reentrant. If an application invokes
+** any sqlite3_xxx API functions from within an unlock-notify callback, a
+** crash or deadlock may be the result.
+**
+** ^Unless deadlock is detected (see below), sqlite3_unlock_notify() always
+** returns SQLITE_OK.
+**
+** <b>Callback Invocation Details</b>
+**
+** When an unlock-notify callback is registered, the application provides a 
+** single void* pointer that is passed to the callback when it is invoked.
+** However, the signature of the callback function allows SQLite to pass
+** it an array of void* context pointers. The first argument passed to
+** an unlock-notify callback is a pointer to an array of void* pointers,
+** and the second is the number of entries in the array.
+**
+** When a blocking connections transaction is concluded, there may be
+** more than one blocked connection that has registered for an unlock-notify
+** callback. ^If two or more such blocked connections have specified the
+** same callback function, then instead of invoking the callback function
+** multiple times, it is invoked once with the set of void* context pointers
+** specified by the blocked connections bundled together into an array.
+** This gives the application an opportunity to prioritize any actions 
+** related to the set of unblocked database connections.
+**
+** <b>Deadlock Detection</b>
+**
+** Assuming that after registering for an unlock-notify callback a 
+** database waits for the callback to be issued before taking any further
+** action (a reasonable assumption), then using this API may cause the
+** application to deadlock. For example, if connection X is waiting for
+** connection Y's transaction to be concluded, and similarly connection
+** Y is waiting on connection X's transaction, then neither connection
+** will proceed and the system may remain deadlocked indefinitely.
+**
+** To avoid this scenario, the sqlite3_unlock_notify() performs deadlock
+** detection. ^If a given call to sqlite3_unlock_notify() would put the
+** system in a deadlocked state, then SQLITE_LOCKED is returned and no
+** unlock-notify callback is registered. The system is said to be in
+** a deadlocked state if connection A has registered for an unlock-notify
+** callback on the conclusion of connection B's transaction, and connection
+** B has itself registered for an unlock-notify callback when connection
+** A's transaction is concluded. ^Indirect deadlock is also detected, so
+** the system is also considered to be deadlocked if connection B has
+** registered for an unlock-notify callback on the conclusion of connection
+** C's transaction, where connection C is waiting on connection A. ^Any
+** number of levels of indirection are allowed.
+**
+** <b>The "DROP TABLE" Exception</b>
+**
+** When a call to [sqlite3_step()] returns SQLITE_LOCKED, it is almost 
+** always appropriate to call sqlite3_unlock_notify(). There is however,
+** one exception. When executing a "DROP TABLE" or "DROP INDEX" statement,
+** SQLite checks if there are any currently executing SELECT statements
+** that belong to the same connection. If there are, SQLITE_LOCKED is
+** returned. In this case there is no "blocking connection", so invoking
+** sqlite3_unlock_notify() results in the unlock-notify callback being
+** invoked immediately. If the application then re-attempts the "DROP TABLE"
+** or "DROP INDEX" query, an infinite loop might be the result.
+**
+** One way around this problem is to check the extended error code returned
+** by an sqlite3_step() call. ^(If there is a blocking connection, then the
+** extended error code is set to SQLITE_LOCKED_SHAREDCACHE. Otherwise, in
+** the special "DROP TABLE/INDEX" case, the extended error code is just 
+** SQLITE_LOCKED.)^
+*/
+SQLITE_API int sqlite3_unlock_notify(
+  sqlite3 *pBlocked,                          /* Waiting connection */
+  void (*xNotify)(void **apArg, int nArg),    /* Callback function to invoke */
+  void *pNotifyArg                            /* Argument to pass to xNotify */
+);
+
+
+/*
+** CAPI3REF: String Comparison
+**
+** ^The [sqlite3_stricmp()] and [sqlite3_strnicmp()] APIs allow applications
+** and extensions to compare the contents of two buffers containing UTF-8
+** strings in a case-independent fashion, using the same definition of "case
+** independence" that SQLite uses internally when comparing identifiers.
+*/
+SQLITE_API int sqlite3_stricmp(const char *, const char *);
+SQLITE_API int sqlite3_strnicmp(const char *, const char *, int);
+
+/*
+** CAPI3REF: String Globbing
+*
+** ^The [sqlite3_strglob(P,X)] interface returns zero if string X matches
+** the glob pattern P, and it returns non-zero if string X does not match
+** the glob pattern P.  ^The definition of glob pattern matching used in
+** [sqlite3_strglob(P,X)] is the same as for the "X GLOB P" operator in the
+** SQL dialect used by SQLite.  ^The sqlite3_strglob(P,X) function is case
+** sensitive.
+**
+** Note that this routine returns zero on a match and non-zero if the strings
+** do not match, the same as [sqlite3_stricmp()] and [sqlite3_strnicmp()].
+*/
+SQLITE_API int sqlite3_strglob(const char *zGlob, const char *zStr);
+
+/*
+** CAPI3REF: Error Logging Interface
+**
+** ^The [sqlite3_log()] interface writes a message into the [error log]
+** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()].
+** ^If logging is enabled, the zFormat string and subsequent arguments are
+** used with [sqlite3_snprintf()] to generate the final output string.
+**
+** The sqlite3_log() interface is intended for use by extensions such as
+** virtual tables, collating functions, and SQL functions.  While there is
+** nothing to prevent an application from calling sqlite3_log(), doing so
+** is considered bad form.
+**
+** The zFormat string must not be NULL.
+**
+** To avoid deadlocks and other threading problems, the sqlite3_log() routine
+** will not use dynamically allocated memory.  The log message is stored in
+** a fixed-length buffer on the stack.  If the log message is longer than
+** a few hundred characters, it will be truncated to the length of the
+** buffer.
+*/
+SQLITE_API void sqlite3_log(int iErrCode, const char *zFormat, ...);
+
+/*
+** CAPI3REF: Write-Ahead Log Commit Hook
+**
+** ^The [sqlite3_wal_hook()] function is used to register a callback that
+** is invoked each time data is committed to a database in wal mode.
+**
+** ^(The callback is invoked by SQLite after the commit has taken place and 
+** the associated write-lock on the database released)^, so the implementation 
+** may read, write or [checkpoint] the database as required.
+**
+** ^The first parameter passed to the callback function when it is invoked
+** is a copy of the third parameter passed to sqlite3_wal_hook() when
+** registering the callback. ^The second is a copy of the database handle.
+** ^The third parameter is the name of the database that was written to -
+** either "main" or the name of an [ATTACH]-ed database. ^The fourth parameter
+** is the number of pages currently in the write-ahead log file,
+** including those that were just committed.
+**
+** The callback function should normally return [SQLITE_OK].  ^If an error
+** code is returned, that error will propagate back up through the
+** SQLite code base to cause the statement that provoked the callback
+** to report an error, though the commit will have still occurred. If the
+** callback returns [SQLITE_ROW] or [SQLITE_DONE], or if it returns a value
+** that does not correspond to any valid SQLite error code, the results
+** are undefined.
+**
+** A single database handle may have at most a single write-ahead log callback 
+** registered at one time. ^Calling [sqlite3_wal_hook()] replaces any
+** previously registered write-ahead log callback. ^Note that the
+** [sqlite3_wal_autocheckpoint()] interface and the
+** [wal_autocheckpoint pragma] both invoke [sqlite3_wal_hook()] and will
+** those overwrite any prior [sqlite3_wal_hook()] settings.
+*/
+SQLITE_API void *sqlite3_wal_hook(
+  sqlite3*, 
+  int(*)(void *,sqlite3*,const char*,int),
+  void*
+);
+
+/*
+** CAPI3REF: Configure an auto-checkpoint
+**
+** ^The [sqlite3_wal_autocheckpoint(D,N)] is a wrapper around
+** [sqlite3_wal_hook()] that causes any database on [database connection] D
+** to automatically [checkpoint]
+** after committing a transaction if there are N or
+** more frames in the [write-ahead log] file.  ^Passing zero or 
+** a negative value as the nFrame parameter disables automatic
+** checkpoints entirely.
+**
+** ^The callback registered by this function replaces any existing callback
+** registered using [sqlite3_wal_hook()].  ^Likewise, registering a callback
+** using [sqlite3_wal_hook()] disables the automatic checkpoint mechanism
+** configured by this function.
+**
+** ^The [wal_autocheckpoint pragma] can be used to invoke this interface
+** from SQL.
+**
+** ^Checkpoints initiated by this mechanism are
+** [sqlite3_wal_checkpoint_v2|PASSIVE].
+**
+** ^Every new [database connection] defaults to having the auto-checkpoint
+** enabled with a threshold of 1000 or [SQLITE_DEFAULT_WAL_AUTOCHECKPOINT]
+** pages.  The use of this interface
+** is only necessary if the default setting is found to be suboptimal
+** for a particular application.
+*/
+SQLITE_API int sqlite3_wal_autocheckpoint(sqlite3 *db, int N);
+
+/*
+** CAPI3REF: Checkpoint a database
+**
+** ^(The sqlite3_wal_checkpoint(D,X) is equivalent to
+** [sqlite3_wal_checkpoint_v2](D,X,[SQLITE_CHECKPOINT_PASSIVE],0,0).)^
+**
+** In brief, sqlite3_wal_checkpoint(D,X) causes the content in the 
+** [write-ahead log] for database X on [database connection] D to be
+** transferred into the database file and for the write-ahead log to
+** be reset.  See the [checkpointing] documentation for addition
+** information.
+**
+** This interface used to be the only way to cause a checkpoint to
+** occur.  But then the newer and more powerful [sqlite3_wal_checkpoint_v2()]
+** interface was added.  This interface is retained for backwards
+** compatibility and as a convenience for applications that need to manually
+** start a callback but which do not need the full power (and corresponding
+** complication) of [sqlite3_wal_checkpoint_v2()].
+*/
+SQLITE_API int sqlite3_wal_checkpoint(sqlite3 *db, const char *zDb);
+
+/*
+** CAPI3REF: Checkpoint a database
+**
+** ^(The sqlite3_wal_checkpoint_v2(D,X,M,L,C) interface runs a checkpoint
+** operation on database X of [database connection] D in mode M.  Status
+** information is written back into integers pointed to by L and C.)^
+** ^(The M parameter must be a valid [checkpoint mode]:)^
+**
+** <dl>
+** <dt>SQLITE_CHECKPOINT_PASSIVE<dd>
+**   ^Checkpoint as many frames as possible without waiting for any database 
+**   readers or writers to finish, then sync the database file if all frames 
+**   in the log were checkpointed. ^The [busy-handler callback]
+**   is never invoked in the SQLITE_CHECKPOINT_PASSIVE mode.  
+**   ^On the other hand, passive mode might leave the checkpoint unfinished
+**   if there are concurrent readers or writers.
+**
+** <dt>SQLITE_CHECKPOINT_FULL<dd>
+**   ^This mode blocks (it invokes the
+**   [sqlite3_busy_handler|busy-handler callback]) until there is no
+**   database writer and all readers are reading from the most recent database
+**   snapshot. ^It then checkpoints all frames in the log file and syncs the
+**   database file. ^This mode blocks new database writers while it is pending,
+**   but new database readers are allowed to continue unimpeded.
+**
+** <dt>SQLITE_CHECKPOINT_RESTART<dd>
+**   ^This mode works the same way as SQLITE_CHECKPOINT_FULL with the addition
+**   that after checkpointing the log file it blocks (calls the 
+**   [busy-handler callback])
+**   until all readers are reading from the database file only. ^This ensures 
+**   that the next writer will restart the log file from the beginning.
+**   ^Like SQLITE_CHECKPOINT_FULL, this mode blocks new
+**   database writer attempts while it is pending, but does not impede readers.
+**
+** <dt>SQLITE_CHECKPOINT_TRUNCATE<dd>
+**   ^This mode works the same way as SQLITE_CHECKPOINT_RESTART with the
+**   addition that it also truncates the log file to zero bytes just prior
+**   to a successful return.
+** </dl>
+**
+** ^If pnLog is not NULL, then *pnLog is set to the total number of frames in
+** the log file or to -1 if the checkpoint could not run because
+** of an error or because the database is not in [WAL mode]. ^If pnCkpt is not
+** NULL,then *pnCkpt is set to the total number of checkpointed frames in the
+** log file (including any that were already checkpointed before the function
+** was called) or to -1 if the checkpoint could not run due to an error or
+** because the database is not in WAL mode. ^Note that upon successful
+** completion of an SQLITE_CHECKPOINT_TRUNCATE, the log file will have been
+** truncated to zero bytes and so both *pnLog and *pnCkpt will be set to zero.
+**
+** ^All calls obtain an exclusive "checkpoint" lock on the database file. ^If
+** any other process is running a checkpoint operation at the same time, the 
+** lock cannot be obtained and SQLITE_BUSY is returned. ^Even if there is a 
+** busy-handler configured, it will not be invoked in this case.
+**
+** ^The SQLITE_CHECKPOINT_FULL, RESTART and TRUNCATE modes also obtain the 
+** exclusive "writer" lock on the database file. ^If the writer lock cannot be
+** obtained immediately, and a busy-handler is configured, it is invoked and
+** the writer lock retried until either the busy-handler returns 0 or the lock
+** is successfully obtained. ^The busy-handler is also invoked while waiting for
+** database readers as described above. ^If the busy-handler returns 0 before
+** the writer lock is obtained or while waiting for database readers, the
+** checkpoint operation proceeds from that point in the same way as 
+** SQLITE_CHECKPOINT_PASSIVE - checkpointing as many frames as possible 
+** without blocking any further. ^SQLITE_BUSY is returned in this case.
+**
+** ^If parameter zDb is NULL or points to a zero length string, then the
+** specified operation is attempted on all WAL databases [attached] to 
+** [database connection] db.  In this case the
+** values written to output parameters *pnLog and *pnCkpt are undefined. ^If 
+** an SQLITE_BUSY error is encountered when processing one or more of the 
+** attached WAL databases, the operation is still attempted on any remaining 
+** attached databases and SQLITE_BUSY is returned at the end. ^If any other 
+** error occurs while processing an attached database, processing is abandoned 
+** and the error code is returned to the caller immediately. ^If no error 
+** (SQLITE_BUSY or otherwise) is encountered while processing the attached 
+** databases, SQLITE_OK is returned.
+**
+** ^If database zDb is the name of an attached database that is not in WAL
+** mode, SQLITE_OK is returned and both *pnLog and *pnCkpt set to -1. ^If
+** zDb is not NULL (or a zero length string) and is not the name of any
+** attached database, SQLITE_ERROR is returned to the caller.
+**
+** ^Unless it returns SQLITE_MISUSE,
+** the sqlite3_wal_checkpoint_v2() interface
+** sets the error information that is queried by
+** [sqlite3_errcode()] and [sqlite3_errmsg()].
+**
+** ^The [PRAGMA wal_checkpoint] command can be used to invoke this interface
+** from SQL.
+*/
+SQLITE_API int sqlite3_wal_checkpoint_v2(
+  sqlite3 *db,                    /* Database handle */
+  const char *zDb,                /* Name of attached database (or NULL) */
+  int eMode,                      /* SQLITE_CHECKPOINT_* value */
+  int *pnLog,                     /* OUT: Size of WAL log in frames */
+  int *pnCkpt                     /* OUT: Total number of frames checkpointed */
+);
+
+/*
+** CAPI3REF: Checkpoint Mode Values
+** KEYWORDS: {checkpoint mode}
+**
+** These constants define all valid values for the "checkpoint mode" passed
+** as the third parameter to the [sqlite3_wal_checkpoint_v2()] interface.
+** See the [sqlite3_wal_checkpoint_v2()] documentation for details on the
+** meaning of each of these checkpoint modes.
+*/
+#define SQLITE_CHECKPOINT_PASSIVE  0  /* Do as much as possible w/o blocking */
+#define SQLITE_CHECKPOINT_FULL     1  /* Wait for writers, then checkpoint */
+#define SQLITE_CHECKPOINT_RESTART  2  /* Like FULL but wait for for readers */
+#define SQLITE_CHECKPOINT_TRUNCATE 3  /* Like RESTART but also truncate WAL */
+
+/*
+** CAPI3REF: Virtual Table Interface Configuration
+**
+** This function may be called by either the [xConnect] or [xCreate] method
+** of a [virtual table] implementation to configure
+** various facets of the virtual table interface.
+**
+** If this interface is invoked outside the context of an xConnect or
+** xCreate virtual table method then the behavior is undefined.
+**
+** At present, there is only one option that may be configured using
+** this function. (See [SQLITE_VTAB_CONSTRAINT_SUPPORT].)  Further options
+** may be added in the future.
+*/
+SQLITE_API int sqlite3_vtab_config(sqlite3*, int op, ...);
+
+/*
+** CAPI3REF: Virtual Table Configuration Options
+**
+** These macros define the various options to the
+** [sqlite3_vtab_config()] interface that [virtual table] implementations
+** can use to customize and optimize their behavior.
+**
+** <dl>
+** <dt>SQLITE_VTAB_CONSTRAINT_SUPPORT
+** <dd>Calls of the form
+** [sqlite3_vtab_config](db,SQLITE_VTAB_CONSTRAINT_SUPPORT,X) are supported,
+** where X is an integer.  If X is zero, then the [virtual table] whose
+** [xCreate] or [xConnect] method invoked [sqlite3_vtab_config()] does not
+** support constraints.  In this configuration (which is the default) if
+** a call to the [xUpdate] method returns [SQLITE_CONSTRAINT], then the entire
+** statement is rolled back as if [ON CONFLICT | OR ABORT] had been
+** specified as part of the users SQL statement, regardless of the actual
+** ON CONFLICT mode specified.
+**
+** If X is non-zero, then the virtual table implementation guarantees
+** that if [xUpdate] returns [SQLITE_CONSTRAINT], it will do so before
+** any modifications to internal or persistent data structures have been made.
+** If the [ON CONFLICT] mode is ABORT, FAIL, IGNORE or ROLLBACK, SQLite 
+** is able to roll back a statement or database transaction, and abandon
+** or continue processing the current SQL statement as appropriate. 
+** If the ON CONFLICT mode is REPLACE and the [xUpdate] method returns
+** [SQLITE_CONSTRAINT], SQLite handles this as if the ON CONFLICT mode
+** had been ABORT.
+**
+** Virtual table implementations that are required to handle OR REPLACE
+** must do so within the [xUpdate] method. If a call to the 
+** [sqlite3_vtab_on_conflict()] function indicates that the current ON 
+** CONFLICT policy is REPLACE, the virtual table implementation should 
+** silently replace the appropriate rows within the xUpdate callback and
+** return SQLITE_OK. Or, if this is not possible, it may return
+** SQLITE_CONSTRAINT, in which case SQLite falls back to OR ABORT 
+** constraint handling.
+** </dl>
+*/
+#define SQLITE_VTAB_CONSTRAINT_SUPPORT 1
+
+/*
+** CAPI3REF: Determine The Virtual Table Conflict Policy
+**
+** This function may only be called from within a call to the [xUpdate] method
+** of a [virtual table] implementation for an INSERT or UPDATE operation. ^The
+** value returned is one of [SQLITE_ROLLBACK], [SQLITE_IGNORE], [SQLITE_FAIL],
+** [SQLITE_ABORT], or [SQLITE_REPLACE], according to the [ON CONFLICT] mode
+** of the SQL statement that triggered the call to the [xUpdate] method of the
+** [virtual table].
+*/
+SQLITE_API int sqlite3_vtab_on_conflict(sqlite3 *);
+
+/*
+** CAPI3REF: Conflict resolution modes
+** KEYWORDS: {conflict resolution mode}
+**
+** These constants are returned by [sqlite3_vtab_on_conflict()] to
+** inform a [virtual table] implementation what the [ON CONFLICT] mode
+** is for the SQL statement being evaluated.
+**
+** Note that the [SQLITE_IGNORE] constant is also used as a potential
+** return value from the [sqlite3_set_authorizer()] callback and that
+** [SQLITE_ABORT] is also a [result code].
+*/
+#define SQLITE_ROLLBACK 1
+/* #define SQLITE_IGNORE 2 // Also used by sqlite3_authorizer() callback */
+#define SQLITE_FAIL     3
+/* #define SQLITE_ABORT 4  // Also an error code */
+#define SQLITE_REPLACE  5
+
+/*
+** CAPI3REF: Prepared Statement Scan Status Opcodes
+** KEYWORDS: {scanstatus options}
+**
+** The following constants can be used for the T parameter to the
+** [sqlite3_stmt_scanstatus(S,X,T,V)] interface.  Each constant designates a
+** different metric for sqlite3_stmt_scanstatus() to return.
+**
+** When the value returned to V is a string, space to hold that string is
+** managed by the prepared statement S and will be automatically freed when
+** S is finalized.
+**
+** <dl>
+** [[SQLITE_SCANSTAT_NLOOP]] <dt>SQLITE_SCANSTAT_NLOOP</dt>
+** <dd>^The [sqlite3_int64] variable pointed to by the T parameter will be
+** set to the total number of times that the X-th loop has run.</dd>
+**
+** [[SQLITE_SCANSTAT_NVISIT]] <dt>SQLITE_SCANSTAT_NVISIT</dt>
+** <dd>^The [sqlite3_int64] variable pointed to by the T parameter will be set
+** to the total number of rows examined by all iterations of the X-th loop.</dd>
+**
+** [[SQLITE_SCANSTAT_EST]] <dt>SQLITE_SCANSTAT_EST</dt>
+** <dd>^The "double" variable pointed to by the T parameter will be set to the
+** query planner's estimate for the average number of rows output from each
+** iteration of the X-th loop.  If the query planner's estimates was accurate,
+** then this value will approximate the quotient NVISIT/NLOOP and the
+** product of this value for all prior loops with the same SELECTID will
+** be the NLOOP value for the current loop.
+**
+** [[SQLITE_SCANSTAT_NAME]] <dt>SQLITE_SCANSTAT_NAME</dt>
+** <dd>^The "const char *" variable pointed to by the T parameter will be set
+** to a zero-terminated UTF-8 string containing the name of the index or table
+** used for the X-th loop.
+**
+** [[SQLITE_SCANSTAT_EXPLAIN]] <dt>SQLITE_SCANSTAT_EXPLAIN</dt>
+** <dd>^The "const char *" variable pointed to by the T parameter will be set
+** to a zero-terminated UTF-8 string containing the [EXPLAIN QUERY PLAN]
+** description for the X-th loop.
+**
+** [[SQLITE_SCANSTAT_SELECTID]] <dt>SQLITE_SCANSTAT_SELECT</dt>
+** <dd>^The "int" variable pointed to by the T parameter will be set to the
+** "select-id" for the X-th loop.  The select-id identifies which query or
+** subquery the loop is part of.  The main query has a select-id of zero.
+** The select-id is the same value as is output in the first column
+** of an [EXPLAIN QUERY PLAN] query.
+** </dl>
+*/
+#define SQLITE_SCANSTAT_NLOOP    0
+#define SQLITE_SCANSTAT_NVISIT   1
+#define SQLITE_SCANSTAT_EST      2
+#define SQLITE_SCANSTAT_NAME     3
+#define SQLITE_SCANSTAT_EXPLAIN  4
+#define SQLITE_SCANSTAT_SELECTID 5
+
+/*
+** CAPI3REF: Prepared Statement Scan Status
+**
+** This interface returns information about the predicted and measured
+** performance for pStmt.  Advanced applications can use this
+** interface to compare the predicted and the measured performance and
+** issue warnings and/or rerun [ANALYZE] if discrepancies are found.
+**
+** Since this interface is expected to be rarely used, it is only
+** available if SQLite is compiled using the [SQLITE_ENABLE_STMT_SCANSTATUS]
+** compile-time option.
+**
+** The "iScanStatusOp" parameter determines which status information to return.
+** The "iScanStatusOp" must be one of the [scanstatus options] or the behavior
+** of this interface is undefined.
+** ^The requested measurement is written into a variable pointed to by
+** the "pOut" parameter.
+** Parameter "idx" identifies the specific loop to retrieve statistics for.
+** Loops are numbered starting from zero. ^If idx is out of range - less than
+** zero or greater than or equal to the total number of loops used to implement
+** the statement - a non-zero value is returned and the variable that pOut
+** points to is unchanged.
+**
+** ^Statistics might not be available for all loops in all statements. ^In cases
+** where there exist loops with no available statistics, this function behaves
+** as if the loop did not exist - it returns non-zero and leave the variable
+** that pOut points to unchanged.
+**
+** See also: [sqlite3_stmt_scanstatus_reset()]
+*/
+SQLITE_API SQLITE_EXPERIMENTAL int sqlite3_stmt_scanstatus(
+  sqlite3_stmt *pStmt,      /* Prepared statement for which info desired */
+  int idx,                  /* Index of loop to report on */
+  int iScanStatusOp,        /* Information desired.  SQLITE_SCANSTAT_* */
+  void *pOut                /* Result written here */
+);     
+
+/*
+** CAPI3REF: Zero Scan-Status Counters
+**
+** ^Zero all [sqlite3_stmt_scanstatus()] related event counters.
+**
+** This API is only available if the library is built with pre-processor
+** symbol [SQLITE_ENABLE_STMT_SCANSTATUS] defined.
+*/
+SQLITE_API SQLITE_EXPERIMENTAL void sqlite3_stmt_scanstatus_reset(sqlite3_stmt*);
+
+
+/*
+** Undo the hack that converts floating point types to integer for
+** builds on processors without floating point support.
+*/
+#ifdef SQLITE_OMIT_FLOATING_POINT
+# undef double
+#endif
+
+#ifdef __cplusplus
+}  /* End of the 'extern "C"' block */
+#endif
+#endif /* _SQLITE3_H_ */
+
+/*
+** 2010 August 30
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+*/
+
+#ifndef _SQLITE3RTREE_H_
+#define _SQLITE3RTREE_H_
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct sqlite3_rtree_geometry sqlite3_rtree_geometry;
+typedef struct sqlite3_rtree_query_info sqlite3_rtree_query_info;
+
+/* The double-precision datatype used by RTree depends on the
+** SQLITE_RTREE_INT_ONLY compile-time option.
+*/
+#ifdef SQLITE_RTREE_INT_ONLY
+  typedef sqlite3_int64 sqlite3_rtree_dbl;
+#else
+  typedef double sqlite3_rtree_dbl;
+#endif
+
+/*
+** Register a geometry callback named zGeom that can be used as part of an
+** R-Tree geometry query as follows:
+**
+**   SELECT ... FROM <rtree> WHERE <rtree col> MATCH $zGeom(... params ...)
+*/
+SQLITE_API int sqlite3_rtree_geometry_callback(
+  sqlite3 *db,
+  const char *zGeom,
+  int (*xGeom)(sqlite3_rtree_geometry*, int, sqlite3_rtree_dbl*,int*),
+  void *pContext
+);
+
+
+/*
+** A pointer to a structure of the following type is passed as the first
+** argument to callbacks registered using rtree_geometry_callback().
+*/
+struct sqlite3_rtree_geometry {
+  void *pContext;                 /* Copy of pContext passed to s_r_g_c() */
+  int nParam;                     /* Size of array aParam[] */
+  sqlite3_rtree_dbl *aParam;      /* Parameters passed to SQL geom function */
+  void *pUser;                    /* Callback implementation user data */
+  void (*xDelUser)(void *);       /* Called by SQLite to clean up pUser */
+};
+
+/*
+** Register a 2nd-generation geometry callback named zScore that can be 
+** used as part of an R-Tree geometry query as follows:
+**
+**   SELECT ... FROM <rtree> WHERE <rtree col> MATCH $zQueryFunc(... params ...)
+*/
+SQLITE_API int sqlite3_rtree_query_callback(
+  sqlite3 *db,
+  const char *zQueryFunc,
+  int (*xQueryFunc)(sqlite3_rtree_query_info*),
+  void *pContext,
+  void (*xDestructor)(void*)
+);
+
+
+/*
+** A pointer to a structure of the following type is passed as the 
+** argument to scored geometry callback registered using
+** sqlite3_rtree_query_callback().
+**
+** Note that the first 5 fields of this structure are identical to
+** sqlite3_rtree_geometry.  This structure is a subclass of
+** sqlite3_rtree_geometry.
+*/
+struct sqlite3_rtree_query_info {
+  void *pContext;                   /* pContext from when function registered */
+  int nParam;                       /* Number of function parameters */
+  sqlite3_rtree_dbl *aParam;        /* value of function parameters */
+  void *pUser;                      /* callback can use this, if desired */
+  void (*xDelUser)(void*);          /* function to free pUser */
+  sqlite3_rtree_dbl *aCoord;        /* Coordinates of node or entry to check */
+  unsigned int *anQueue;            /* Number of pending entries in the queue */
+  int nCoord;                       /* Number of coordinates */
+  int iLevel;                       /* Level of current node or entry */
+  int mxLevel;                      /* The largest iLevel value in the tree */
+  sqlite3_int64 iRowid;             /* Rowid for current entry */
+  sqlite3_rtree_dbl rParentScore;   /* Score of parent node */
+  int eParentWithin;                /* Visibility of parent node */
+  int eWithin;                      /* OUT: Visiblity */
+  sqlite3_rtree_dbl rScore;         /* OUT: Write the score here */
+};
+
+/*
+** Allowed values for sqlite3_rtree_query.eWithin and .eParentWithin.
+*/
+#define NOT_WITHIN       0   /* Object completely outside of query region */
+#define PARTLY_WITHIN    1   /* Object partially overlaps query region */
+#define FULLY_WITHIN     2   /* Object fully contained within query region */
+
+
+#ifdef __cplusplus
+}  /* end of the 'extern "C"' block */
+#endif
+
+#endif  /* ifndef _SQLITE3RTREE_H_ */
+
diff --git a/src/main/jni/sqlite_cursor.c b/src/main/jni/sqlite_cursor.c
new file mode 100644
index 000000000..de3f7469d
--- /dev/null
+++ b/src/main/jni/sqlite_cursor.c
@@ -0,0 +1,79 @@
+#include "sqlite.h"
+
+int Java_org_telegram_SQLite_SQLiteCursor_columnType(JNIEnv *env, jobject object, int statementHandle, int columnIndex) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+	return sqlite3_column_type(handle, columnIndex);
+}
+
+int Java_org_telegram_SQLite_SQLiteCursor_columnIsNull(JNIEnv *env, jobject object, int statementHandle, int columnIndex) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+	int valType = sqlite3_column_type(handle, columnIndex);
+	return SQLITE_NULL == valType;
+}
+
+int Java_org_telegram_SQLite_SQLiteCursor_columnIntValue(JNIEnv *env, jobject object, int statementHandle, int columnIndex) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+	int valType = sqlite3_column_type(handle, columnIndex);
+	if (SQLITE_NULL == valType) {
+		return 0;
+	}
+	return sqlite3_column_int(handle, columnIndex);
+}
+
+long long Java_org_telegram_SQLite_SQLiteCursor_columnLongValue(JNIEnv *env, jobject object, int statementHandle, int columnIndex) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+	int valType = sqlite3_column_type(handle, columnIndex);
+	if (SQLITE_NULL == valType) {
+		return 0;
+	}
+	return sqlite3_column_int64(handle, columnIndex);
+}
+
+double Java_org_telegram_SQLite_SQLiteCursor_columnDoubleValue(JNIEnv *env, jobject object, int statementHandle, int columnIndex) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+	int valType = sqlite3_column_type(handle, columnIndex);
+	if (SQLITE_NULL == valType) {
+		return 0;
+	}
+	return sqlite3_column_double(handle, columnIndex);
+}
+
+jstring Java_org_telegram_SQLite_SQLiteCursor_columnStringValue(JNIEnv *env, jobject object, int statementHandle, int columnIndex) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+	const char *str = sqlite3_column_text(handle, columnIndex);
+	if (str != 0) {
+		return (*env)->NewStringUTF(env, str);
+	}
+	return 0;
+}
+
+jbyteArray Java_org_telegram_SQLite_SQLiteCursor_columnByteArrayValue(JNIEnv *env, jobject object, int statementHandle, int columnIndex) {
+    sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+	void *buf = sqlite3_column_blob(handle, columnIndex);
+	int length = sqlite3_column_bytes(handle, columnIndex);
+	if (buf != 0 && length > 0) {
+		jbyteArray result = (*env)->NewByteArray(env, length);
+        (*env)->SetByteArrayRegion(env, result, 0, length, buf);
+        return result;
+	}
+	return 0;
+}
+
+int Java_org_telegram_SQLite_SQLiteCursor_columnByteArrayLength(JNIEnv *env, jobject object, int statementHandle, int columnIndex) {
+	return sqlite3_column_bytes((sqlite3_stmt *)statementHandle, columnIndex);
+}
+
+int Java_org_telegram_SQLite_SQLiteCursor_columnByteBufferValue(JNIEnv *env, jobject object, int statementHandle, int columnIndex, jobject buffer) {
+    if (!buffer) {
+        return 0;
+    }
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+	void *buf = sqlite3_column_blob(handle, columnIndex);
+	int length = sqlite3_column_bytes(handle, columnIndex);
+	if (buf != 0 && length > 0) {
+        jbyte *byteBuff = (*env)->GetDirectBufferAddress(env, buffer);
+        memcpy(byteBuff, buf, length);
+        return length;
+	}
+	return 0;
+}
diff --git a/src/main/jni/sqlite_database.c b/src/main/jni/sqlite_database.c
new file mode 100644
index 000000000..a249cff43
--- /dev/null
+++ b/src/main/jni/sqlite_database.c
@@ -0,0 +1,42 @@
+#include "sqlite.h"
+
+void Java_org_telegram_SQLite_SQLiteDatabase_closedb(JNIEnv *env, jobject object, int sqliteHandle) {
+	sqlite3 *handle = (sqlite3 *)sqliteHandle;
+	int err = sqlite3_close(handle);
+	if (SQLITE_OK != err) {
+		throw_sqlite3_exception(env, handle, err);
+	}
+}
+
+void Java_org_telegram_SQLite_SQLiteDatabase_beginTransaction(JNIEnv *env, jobject object, int sqliteHandle) {
+    sqlite3 *handle = (sqlite3 *)sqliteHandle;
+    sqlite3_exec(handle, "BEGIN", 0, 0, 0);
+}
+
+void Java_org_telegram_SQLite_SQLiteDatabase_commitTransaction(JNIEnv *env, jobject object, int sqliteHandle) {
+    sqlite3 *handle = (sqlite3 *)sqliteHandle;
+    sqlite3_exec(handle, "COMMIT", 0, 0, 0);
+}
+
+int Java_org_telegram_SQLite_SQLiteDatabase_opendb(JNIEnv *env, jobject object, jstring fileName, jstring tempDir) {
+    char const *fileNameStr = (*env)->GetStringUTFChars(env, fileName, 0);
+    char const *tempDirStr = (*env)->GetStringUTFChars(env, tempDir, 0);
+    
+    if (sqlite3_temp_directory != 0) {
+        sqlite3_free(sqlite3_temp_directory);
+    }
+    sqlite3_temp_directory = sqlite3_mprintf("%s", tempDirStr);
+    
+    sqlite3 *handle = 0;
+    int err = sqlite3_open(fileNameStr, &handle);
+    if (SQLITE_OK != err) {
+    	throw_sqlite3_exception(env, handle, err);
+    }
+    if (fileNameStr != 0) {
+        (*env)->ReleaseStringUTFChars(env, fileName, fileNameStr);
+    }
+    if (tempDirStr != 0) {
+        (*env)->ReleaseStringUTFChars(env, tempDir, tempDirStr);
+    }
+    return (int)handle;
+}
diff --git a/src/main/jni/sqlite_statement.c b/src/main/jni/sqlite_statement.c
new file mode 100644
index 000000000..2fc4ef337
--- /dev/null
+++ b/src/main/jni/sqlite_statement.c
@@ -0,0 +1,125 @@
+#include "sqlite.h"
+
+jfieldID queryArgsCountField;
+
+jint sqliteOnJNILoad(JavaVM *vm, void *reserved, JNIEnv *env) {
+	jclass class = (*env)->FindClass(env, "org/telegram/SQLite/SQLitePreparedStatement");
+	queryArgsCountField = (*env)->GetFieldID(env, class, "queryArgsCount", "I");
+	return JNI_VERSION_1_6;
+}
+
+int Java_org_telegram_SQLite_SQLitePreparedStatement_step(JNIEnv* env, jobject object, int statementHandle) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+    
+    int errcode = sqlite3_step(handle);
+    if (errcode == SQLITE_ROW)  {
+        return 0;
+    } else if(errcode == SQLITE_DONE) {
+        return 1;
+    }  else if(errcode == SQLITE_BUSY) {
+        return -1;
+    }
+	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+}
+
+int Java_org_telegram_SQLite_SQLitePreparedStatement_prepare(JNIEnv *env, jobject object, int sqliteHandle, jstring sql) {
+	sqlite3* handle = (sqlite3 *)sqliteHandle;
+
+    char const *sqlStr = (*env)->GetStringUTFChars(env, sql, 0);
+
+    sqlite3_stmt *stmt_handle;
+
+    int errcode = sqlite3_prepare_v2(handle, sqlStr, -1, &stmt_handle, 0);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, handle, errcode);
+    } else {
+    	int argsCount = sqlite3_bind_parameter_count(stmt_handle);
+    	(*env)->SetIntField(env, object, queryArgsCountField, argsCount);
+    }
+
+    if (sqlStr != 0) {
+        (*env)->ReleaseStringUTFChars(env, sql, sqlStr);
+    }
+
+    return (int)stmt_handle;
+}
+
+void Java_org_telegram_SQLite_SQLitePreparedStatement_reset(JNIEnv *env, jobject object, int statementHandle) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+
+	int errcode = sqlite3_reset(handle);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+    }
+}
+
+void Java_org_telegram_SQLite_SQLitePreparedStatement_finalize(JNIEnv *env, jobject object, int statementHandle) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+
+	int errcode = sqlite3_finalize (handle);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+    }
+}
+
+void Java_org_telegram_SQLite_SQLitePreparedStatement_bindByteBuffer(JNIEnv *env, jobject object, int statementHandle, int index, jobject value, int length) {
+	sqlite3_stmt *handle = (sqlite3_stmt *)statementHandle;
+    jbyte *buf = (*env)->GetDirectBufferAddress(env, value);
+    
+	int errcode = sqlite3_bind_blob(handle, index, buf, length, SQLITE_STATIC);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+    }
+}
+
+void Java_org_telegram_SQLite_SQLitePreparedStatement_bindString(JNIEnv *env, jobject object, int statementHandle, int index, jstring value) {
+	sqlite3_stmt *handle = (sqlite3_stmt*)statementHandle;
+
+	char const *valueStr = (*env)->GetStringUTFChars(env, value, 0);
+
+	int errcode = sqlite3_bind_text(handle, index, valueStr, -1, SQLITE_TRANSIENT);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+    }
+
+	if (valueStr != 0) {
+        (*env)->ReleaseStringUTFChars(env, value, valueStr);
+    }
+}
+
+void Java_org_telegram_SQLite_SQLitePreparedStatement_bindInt(JNIEnv *env, jobject object, int statementHandle, int index, int value) {
+	sqlite3_stmt *handle = (sqlite3_stmt*)statementHandle;
+
+	int errcode = sqlite3_bind_int(handle, index, value);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+    }
+}
+
+void Java_org_telegram_SQLite_SQLitePreparedStatement_bindLong(JNIEnv *env, jobject object, int statementHandle, int index, long long value) {
+	sqlite3_stmt *handle = (sqlite3_stmt*)statementHandle;
+    
+	int errcode = sqlite3_bind_int64(handle, index, value);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+    }
+}
+
+void Java_org_telegram_SQLite_SQLitePreparedStatement_bindDouble(JNIEnv* env, jobject object, int statementHandle, int index, double value) {
+	sqlite3_stmt *handle = (sqlite3_stmt*)statementHandle;
+
+	int errcode = sqlite3_bind_double(handle, index, value);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+    }
+}
+
+void Java_org_telegram_SQLite_SQLitePreparedStatement_bindNull(JNIEnv* env, jobject object, int statementHandle, int index) {
+	sqlite3_stmt *handle = (sqlite3_stmt*)statementHandle;
+
+	int errcode = sqlite3_bind_null(handle, index);
+    if (SQLITE_OK != errcode) {
+    	throw_sqlite3_exception(env, sqlite3_db_handle(handle), errcode);
+    }
+}
+
diff --git a/src/main/jni/utils.c b/src/main/jni/utils.c
new file mode 100644
index 000000000..1321a8213
--- /dev/null
+++ b/src/main/jni/utils.c
@@ -0,0 +1,14 @@
+#include "utils.h"
+
+void throwException(JNIEnv *env, char *format, ...) {
+    jclass exClass = (*env)->FindClass(env, "java/lang/UnsupportedOperationException");
+    if (!exClass) {
+        return;
+	}
+    char dest[256];
+    va_list argptr;
+    va_start(argptr, format);
+    vsprintf(dest, format, argptr);
+    va_end(argptr);
+    (*env)->ThrowNew(env, exClass, dest);
+}
diff --git a/src/main/jni/utils.h b/src/main/jni/utils.h
new file mode 100644
index 000000000..f17ae9025
--- /dev/null
+++ b/src/main/jni/utils.h
@@ -0,0 +1,29 @@
+#ifndef log_h
+#define log_h
+
+#include <android/log.h>
+#include <jni.h>
+
+#define LOG_TAG "tmessages"
+#ifndef LOG_DISABLED
+#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
+#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
+#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
+#define LOGV(...) __android_log_print(ANDROID_LOG_VERBOSE, LOG_TAG, __VA_ARGS__)
+#else
+#define LOGI(...)
+#define LOGD(...)
+#define LOGE(...)
+#define LOGV(...)
+#endif
+
+#ifndef max
+#define max(x, y) ((x) > (y)) ? (x) : (y)
+#endif
+#ifndef min
+#define min(x, y) ((x) < (y)) ? (x) : (y)
+#endif
+
+void throwException(JNIEnv *env, char *format, ...);
+
+#endif
diff --git a/src/main/jni/video.c b/src/main/jni/video.c
new file mode 100644
index 000000000..2afca482e
--- /dev/null
+++ b/src/main/jni/video.c
@@ -0,0 +1,109 @@
+#include <jni.h>
+#include <libyuv.h>
+#include <utils.h>
+
+enum COLOR_FORMATTYPE {
+    COLOR_FormatMonochrome              = 1,
+    COLOR_Format8bitRGB332              = 2,
+    COLOR_Format12bitRGB444             = 3,
+    COLOR_Format16bitARGB4444           = 4,
+    COLOR_Format16bitARGB1555           = 5,
+    COLOR_Format16bitRGB565             = 6,
+    COLOR_Format16bitBGR565             = 7,
+    COLOR_Format18bitRGB666             = 8,
+    COLOR_Format18bitARGB1665           = 9,
+    COLOR_Format19bitARGB1666           = 10,
+    COLOR_Format24bitRGB888             = 11,
+    COLOR_Format24bitBGR888             = 12,
+    COLOR_Format24bitARGB1887           = 13,
+    COLOR_Format25bitARGB1888           = 14,
+    COLOR_Format32bitBGRA8888           = 15,
+    COLOR_Format32bitARGB8888           = 16,
+    COLOR_FormatYUV411Planar            = 17,
+    COLOR_FormatYUV411PackedPlanar      = 18,
+    COLOR_FormatYUV420Planar            = 19,
+    COLOR_FormatYUV420PackedPlanar      = 20,
+    COLOR_FormatYUV420SemiPlanar        = 21,
+    COLOR_FormatYUV422Planar            = 22,
+    COLOR_FormatYUV422PackedPlanar      = 23,
+    COLOR_FormatYUV422SemiPlanar        = 24,
+    COLOR_FormatYCbYCr                  = 25,
+    COLOR_FormatYCrYCb                  = 26,
+    COLOR_FormatCbYCrY                  = 27,
+    COLOR_FormatCrYCbY                  = 28,
+    COLOR_FormatYUV444Interleaved       = 29,
+    COLOR_FormatRawBayer8bit            = 30,
+    COLOR_FormatRawBayer10bit           = 31,
+    COLOR_FormatRawBayer8bitcompressed  = 32,
+    COLOR_FormatL2                      = 33,
+    COLOR_FormatL4                      = 34,
+    COLOR_FormatL8                      = 35,
+    COLOR_FormatL16                     = 36,
+    COLOR_FormatL24                     = 37,
+    COLOR_FormatL32                     = 38,
+    COLOR_FormatYUV420PackedSemiPlanar  = 39,
+    COLOR_FormatYUV422PackedSemiPlanar  = 40,
+    COLOR_Format18BitBGR666             = 41,
+    COLOR_Format24BitARGB6666           = 42,
+    COLOR_Format24BitABGR6666           = 43,
+
+    COLOR_TI_FormatYUV420PackedSemiPlanar = 0x7f000100,
+    COLOR_FormatSurface                   = 0x7F000789,
+    COLOR_QCOM_FormatYUV420SemiPlanar     = 0x7fa30c00
+};
+
+int isSemiPlanarYUV(int colorFormat) {
+    switch (colorFormat) {
+        case COLOR_FormatYUV420Planar:
+        case COLOR_FormatYUV420PackedPlanar:
+            return 0;
+        case COLOR_FormatYUV420SemiPlanar:
+        case COLOR_FormatYUV420PackedSemiPlanar:
+        case COLOR_TI_FormatYUV420PackedSemiPlanar:
+            return 1;
+        default:
+            return 0;
+    }
+}
+
+JNIEXPORT int com_example_khalsa_video_MediaController_convertVideoFrame(JNIEnv *env, jclass class, jobject src, jobject dest, int destFormat, int width, int height, int padding, int swap) {
+    if (!src || !dest || !destFormat) {
+        return 0;
+    }
+    
+    jbyte *srcBuff = (*env)->GetDirectBufferAddress(env, src);
+    jbyte *destBuff = (*env)->GetDirectBufferAddress(env, dest);
+
+    int half_width = (width + 1) / 2;
+    int half_height = (height + 1) / 2;
+    
+    if (!isSemiPlanarYUV(destFormat)) {
+        if (!swap) {
+            ARGBToI420(srcBuff, width * 4,
+                       destBuff, width,
+                       destBuff + width * height + half_width * half_height + padding * 5 / 4, half_width,
+                       destBuff + width * height + padding, half_width,
+                       width, height);
+        } else {
+            ARGBToI420(srcBuff, width * 4,
+                       destBuff, width,
+                       destBuff + width * height + padding, half_width,
+                       destBuff + width * height + half_width * half_height + padding * 5 / 4, half_width,
+                       width, height);
+        }
+    } else {
+        if (!swap) {
+            ARGBToNV21(srcBuff, width * 4,
+                       destBuff, width,
+                       destBuff + width * height + padding, half_width * 2,
+                       width, height);
+        } else {
+            ARGBToNV12(srcBuff, width * 4,
+                       destBuff, width,
+                       destBuff + width * height + padding, half_width * 2,
+                       width, height);
+        }
+    }
+    
+    return 1;
+}
diff --git a/src/main/jniLibs/armeabi-v7a/libtmessages.7.so b/src/main/jniLibs/armeabi-v7a/libtmessages.7.so
new file mode 100644
index 000000000..c086f4a98
--- /dev/null
+++ b/src/main/jniLibs/armeabi-v7a/libtmessages.7.so
diff --git a/src/main/jniLibs/armeabi/libtmessages.7.so b/src/main/jniLibs/armeabi/libtmessages.7.so
new file mode 100644
index 000000000..42ad265ae
--- /dev/null
+++ b/src/main/jniLibs/armeabi/libtmessages.7.so
diff --git a/src/main/jniLibs/x86/libtmessages.7.so b/src/main/jniLibs/x86/libtmessages.7.so
new file mode 100644
index 000000000..aaa768eac
--- /dev/null
+++ b/src/main/jniLibs/x86/libtmessages.7.so
diff --git a/src/main/res/drawable-hdpi/ic_send_video_away.png b/src/main/res/drawable-hdpi/ic_send_video_away.png
new file mode 100644
index 000000000..ecaabe948
--- /dev/null
+++ b/src/main/res/drawable-hdpi/ic_send_video_away.png
diff --git a/src/main/res/drawable-hdpi/ic_send_video_dnd.png b/src/main/res/drawable-hdpi/ic_send_video_dnd.png
new file mode 100644
index 000000000..f2557b5bd
--- /dev/null
+++ b/src/main/res/drawable-hdpi/ic_send_video_dnd.png
diff --git a/src/main/res/drawable-hdpi/ic_send_video_offline.png b/src/main/res/drawable-hdpi/ic_send_video_offline.png
new file mode 100644
index 000000000..6d4518d5c
--- /dev/null
+++ b/src/main/res/drawable-hdpi/ic_send_video_offline.png
diff --git a/src/main/res/drawable-hdpi/ic_send_video_online.png b/src/main/res/drawable-hdpi/ic_send_video_online.png
new file mode 100644
index 000000000..31e864bba
--- /dev/null
+++ b/src/main/res/drawable-hdpi/ic_send_video_online.png
diff --git a/src/main/res/drawable-mdpi/ic_send_video_away.png b/src/main/res/drawable-mdpi/ic_send_video_away.png
new file mode 100644
index 000000000..f5d9fc7c9
--- /dev/null
+++ b/src/main/res/drawable-mdpi/ic_send_video_away.png
diff --git a/src/main/res/drawable-mdpi/ic_send_video_dnd.png b/src/main/res/drawable-mdpi/ic_send_video_dnd.png
new file mode 100644
index 000000000..c8ad525b1
--- /dev/null
+++ b/src/main/res/drawable-mdpi/ic_send_video_dnd.png
diff --git a/src/main/res/drawable-mdpi/ic_send_video_offline.png b/src/main/res/drawable-mdpi/ic_send_video_offline.png
new file mode 100644
index 000000000..ff06763c1
--- /dev/null
+++ b/src/main/res/drawable-mdpi/ic_send_video_offline.png
diff --git a/src/main/res/drawable-mdpi/ic_send_video_online.png b/src/main/res/drawable-mdpi/ic_send_video_online.png
new file mode 100644
index 000000000..2b8523469
--- /dev/null
+++ b/src/main/res/drawable-mdpi/ic_send_video_online.png
diff --git a/src/main/res/drawable-xhdpi/ic_send_video_away.png b/src/main/res/drawable-xhdpi/ic_send_video_away.png
new file mode 100644
index 000000000..d02216dee
--- /dev/null
+++ b/src/main/res/drawable-xhdpi/ic_send_video_away.png
diff --git a/src/main/res/drawable-xhdpi/ic_send_video_dnd.png b/src/main/res/drawable-xhdpi/ic_send_video_dnd.png
new file mode 100644
index 000000000..b37c1560a
--- /dev/null
+++ b/src/main/res/drawable-xhdpi/ic_send_video_dnd.png
diff --git a/src/main/res/drawable-xhdpi/ic_send_video_offline.png b/src/main/res/drawable-xhdpi/ic_send_video_offline.png
new file mode 100644
index 000000000..2dfe662da
--- /dev/null
+++ b/src/main/res/drawable-xhdpi/ic_send_video_offline.png
diff --git a/src/main/res/drawable-xhdpi/ic_send_video_online.png b/src/main/res/drawable-xhdpi/ic_send_video_online.png
new file mode 100644
index 000000000..c057ac686
--- /dev/null
+++ b/src/main/res/drawable-xhdpi/ic_send_video_online.png
diff --git a/src/main/res/drawable-xxhdpi/ic_send_video_away.png b/src/main/res/drawable-xxhdpi/ic_send_video_away.png
new file mode 100644
index 000000000..0bff31b38
--- /dev/null
+++ b/src/main/res/drawable-xxhdpi/ic_send_video_away.png
diff --git a/src/main/res/drawable-xxhdpi/ic_send_video_dnd.png b/src/main/res/drawable-xxhdpi/ic_send_video_dnd.png
new file mode 100644
index 000000000..392d71627
--- /dev/null
+++ b/src/main/res/drawable-xxhdpi/ic_send_video_dnd.png
diff --git a/src/main/res/drawable-xxhdpi/ic_send_video_offline.png b/src/main/res/drawable-xxhdpi/ic_send_video_offline.png
new file mode 100644
index 000000000..25de2e765
--- /dev/null
+++ b/src/main/res/drawable-xxhdpi/ic_send_video_offline.png
diff --git a/src/main/res/drawable-xxhdpi/ic_send_video_online.png b/src/main/res/drawable-xxhdpi/ic_send_video_online.png
new file mode 100644
index 000000000..f3cd06086
--- /dev/null
+++ b/src/main/res/drawable-xxhdpi/ic_send_video_online.png
diff --git a/src/main/res/drawable-xxxhdpi/ic_send_video_away.png b/src/main/res/drawable-xxxhdpi/ic_send_video_away.png
new file mode 100644
index 000000000..9fd450bdb
--- /dev/null
+++ b/src/main/res/drawable-xxxhdpi/ic_send_video_away.png
diff --git a/src/main/res/drawable-xxxhdpi/ic_send_video_dnd.png b/src/main/res/drawable-xxxhdpi/ic_send_video_dnd.png
new file mode 100644
index 000000000..02e2c7883
--- /dev/null
+++ b/src/main/res/drawable-xxxhdpi/ic_send_video_dnd.png
diff --git a/src/main/res/drawable-xxxhdpi/ic_send_video_offline.png b/src/main/res/drawable-xxxhdpi/ic_send_video_offline.png
new file mode 100644
index 000000000..7d803011c
--- /dev/null
+++ b/src/main/res/drawable-xxxhdpi/ic_send_video_offline.png
diff --git a/src/main/res/drawable-xxxhdpi/ic_send_video_online.png b/src/main/res/drawable-xxxhdpi/ic_send_video_online.png
new file mode 100644
index 000000000..8da325ba1
--- /dev/null
+++ b/src/main/res/drawable-xxxhdpi/ic_send_video_online.png
diff --git a/src/main/res/menu/attachment_choices.xml b/src/main/res/menu/attachment_choices.xml
index 6e8fc51d3..e2b0798ee 100644
--- a/src/main/res/menu/attachment_choices.xml
+++ b/src/main/res/menu/attachment_choices.xml
@@ -18,6 +18,10 @@
         android:title="@string/attach_choose_picture"/>
 
     <item
+        android:id="@+id/attach_choose_video"
+        android:title="@string/attach_choose_video"/>
+
+    <item
         android:id="@+id/attach_choose_file"
         android:title="@string/choose_file"/>
 
diff --git a/src/main/res/values/strings.xml b/src/main/res/values/strings.xml
index 93c287159..6ac1bae2b 100644
--- a/src/main/res/values/strings.xml
+++ b/src/main/res/values/strings.xml
@@ -690,4 +690,6 @@
 	<string name="me">Me</string>
 	<string name="contact_asks_for_presence_subscription">Contact asks for presence subscription</string>
 	<string name="allow">Allow</string>
+    <string name="attach_choose_video">Choose video</string>
+    <string name="preparing_video">Prepare video for transmission</string>
 </resources>
author	Christian Schneppe <christian@pix-art.de>	2016-08-26 23:48:48 +0200
committer	Christian Schneppe <christian@pix-art.de>	2016-08-28 21:33:19 +0200
commit	b3b3475e93a9b08f9e35edbf74673728b560ad3b (patch)
tree	fc72bfce668b358310061c0a94736a0bd14e8b5d
parent	1f7f535d37b844dbd87447e1872c270edbca1302 (diff)